From e6d1592492a3a379186bfb02bd0f4eda0669c0d5 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Tue, 20 Aug 2019 20:50:12 +0000
Subject: Vendor import of stripped llvm trunk r366426 (just before the
 release_90 branch point):

https://llvm.org/svn/llvm-project/llvm/trunk@366426
---
 LICENSE.TXT                                        |  261 +-
 include/llvm-c/Analysis.h                          |    8 +-
 include/llvm-c/BitReader.h                         |    8 +-
 include/llvm-c/BitWriter.h                         |    8 +-
 include/llvm-c/Comdat.h                            |    8 +-
 include/llvm-c/Core.h                              |  223 +-
 include/llvm-c/DataTypes.h                         |    8 +-
 include/llvm-c/DebugInfo.h                         |  135 +-
 include/llvm-c/Disassembler.h                      |    8 +-
 include/llvm-c/DisassemblerTypes.h                 |    8 +-
 include/llvm-c/Error.h                             |   10 +-
 include/llvm-c/ErrorHandling.h                     |    8 +-
 include/llvm-c/ExecutionEngine.h                   |    8 +-
 include/llvm-c/IRReader.h                          |    8 +-
 include/llvm-c/Initialization.h                    |    8 +-
 include/llvm-c/LinkTimeOptimizer.h                 |    7 +-
 include/llvm-c/Linker.h                            |    8 +-
 include/llvm-c/Object.h                            |  163 +-
 include/llvm-c/OptRemarks.h                        |  204 -
 include/llvm-c/OrcBindings.h                       |    8 +-
 include/llvm-c/Remarks.h                           |  329 +
 include/llvm-c/Support.h                           |    8 +-
 include/llvm-c/Target.h                            |   12 +-
 include/llvm-c/TargetMachine.h                     |    8 +-
 include/llvm-c/Transforms/AggressiveInstCombine.h  |    8 +-
 include/llvm-c/Transforms/Coroutines.h             |    8 +-
 include/llvm-c/Transforms/IPO.h                    |    8 +-
 include/llvm-c/Transforms/InstCombine.h            |    8 +-
 include/llvm-c/Transforms/PassManagerBuilder.h     |    8 +-
 include/llvm-c/Transforms/Scalar.h                 |    8 +-
 include/llvm-c/Transforms/Utils.h                  |   11 +-
 include/llvm-c/Transforms/Vectorize.h              |    8 +-
 include/llvm-c/Types.h                             |   13 +-
 include/llvm-c/lto.h                               |   52 +-
 include/llvm/ADT/APFloat.h                         |   18 +-
 include/llvm/ADT/APInt.h                           |   18 +-
 include/llvm/ADT/APSInt.h                          |   25 +-
 include/llvm/ADT/AllocatorList.h                   |    7 +-
 include/llvm/ADT/Any.h                             |    7 +-
 include/llvm/ADT/ArrayRef.h                        |   15 +-
 include/llvm/ADT/BitVector.h                       |    7 +-
 include/llvm/ADT/BitmaskEnum.h                     |    7 +-
 include/llvm/ADT/BreadthFirstIterator.h            |    9 +-
 include/llvm/ADT/CachedHashString.h                |    7 +-
 include/llvm/ADT/DAGDeltaAlgorithm.h               |    7 +-
 include/llvm/ADT/DeltaAlgorithm.h                  |    7 +-
 include/llvm/ADT/DenseMap.h                        |   15 +-
 include/llvm/ADT/DenseMapInfo.h                    |   23 +-
 include/llvm/ADT/DenseSet.h                        |    9 +-
 include/llvm/ADT/DepthFirstIterator.h              |    7 +-
 include/llvm/ADT/EpochTracker.h                    |    7 +-
 include/llvm/ADT/EquivalenceClasses.h              |    7 +-
 include/llvm/ADT/FoldingSet.h                      |    7 +-
 include/llvm/ADT/FunctionExtras.h                  |    7 +-
 include/llvm/ADT/GraphTraits.h                     |    7 +-
 include/llvm/ADT/Hashing.h                         |    9 +-
 include/llvm/ADT/ImmutableList.h                   |   11 +-
 include/llvm/ADT/ImmutableMap.h                    |    7 +-
 include/llvm/ADT/ImmutableSet.h                    |    7 +-
 include/llvm/ADT/IndexedMap.h                      |    7 +-
 include/llvm/ADT/IntEqClasses.h                    |    7 +-
 include/llvm/ADT/IntervalMap.h                     |    7 +-
 include/llvm/ADT/IntrusiveRefCntPtr.h              |    7 +-
 include/llvm/ADT/MapVector.h                       |    7 +-
 include/llvm/ADT/None.h                            |    7 +-
 include/llvm/ADT/Optional.h                        |  255 +-
 include/llvm/ADT/PackedVector.h                    |    7 +-
 include/llvm/ADT/PointerEmbeddedInt.h              |    7 +-
 include/llvm/ADT/PointerIntPair.h                  |   27 +-
 include/llvm/ADT/PointerSumType.h                  |    7 +-
 include/llvm/ADT/PointerUnion.h                    |  482 +-
 include/llvm/ADT/PostOrderIterator.h               |    7 +-
 include/llvm/ADT/PriorityQueue.h                   |    7 +-
 include/llvm/ADT/PriorityWorklist.h                |    7 +-
 include/llvm/ADT/SCCIterator.h                     |    7 +-
 include/llvm/ADT/STLExtras.h                       |  108 +-
 include/llvm/ADT/ScopeExit.h                       |    7 +-
 include/llvm/ADT/ScopedHashTable.h                 |    7 +-
 include/llvm/ADT/Sequence.h                        |    7 +-
 include/llvm/ADT/SetOperations.h                   |    7 +-
 include/llvm/ADT/SetVector.h                       |    7 +-
 include/llvm/ADT/SmallBitVector.h                  |    7 +-
 include/llvm/ADT/SmallPtrSet.h                     |    7 +-
 include/llvm/ADT/SmallSet.h                        |    7 +-
 include/llvm/ADT/SmallString.h                     |    7 +-
 include/llvm/ADT/SmallVector.h                     |   48 +-
 include/llvm/ADT/SparseBitVector.h                 |    7 +-
 include/llvm/ADT/SparseMultiSet.h                  |    7 +-
 include/llvm/ADT/SparseSet.h                       |    7 +-
 include/llvm/ADT/Statistic.h                       |    7 +-
 include/llvm/ADT/StringExtras.h                    |    7 +-
 include/llvm/ADT/StringMap.h                       |   12 +-
 include/llvm/ADT/StringRef.h                       |   43 +-
 include/llvm/ADT/StringSet.h                       |   14 +-
 include/llvm/ADT/StringSwitch.h                    |   29 +-
 include/llvm/ADT/TinyPtrVector.h                   |    7 +-
 include/llvm/ADT/Triple.h                          |   85 +-
 include/llvm/ADT/Twine.h                           |   10 +-
 include/llvm/ADT/UniqueVector.h                    |    7 +-
 include/llvm/ADT/VariadicFunction.h                |    9 +-
 include/llvm/ADT/bit.h                             |   17 +-
 include/llvm/ADT/edit_distance.h                   |    7 +-
 include/llvm/ADT/fallible_iterator.h               |  243 +
 include/llvm/ADT/ilist.h                           |   16 +-
 include/llvm/ADT/ilist_base.h                      |    7 +-
 include/llvm/ADT/ilist_iterator.h                  |    7 +-
 include/llvm/ADT/ilist_node.h                      |    7 +-
 include/llvm/ADT/ilist_node_base.h                 |    7 +-
 include/llvm/ADT/ilist_node_options.h              |    7 +-
 include/llvm/ADT/iterator.h                        |    7 +-
 include/llvm/ADT/iterator_range.h                  |    7 +-
 include/llvm/ADT/simple_ilist.h                    |    7 +-
 include/llvm/Analysis/AliasAnalysis.h              |  239 +-
 include/llvm/Analysis/AliasAnalysisEvaluator.h     |    7 +-
 include/llvm/Analysis/AliasSetTracker.h            |   28 +-
 include/llvm/Analysis/AssumptionCache.h            |   15 +-
 include/llvm/Analysis/BasicAliasAnalysis.h         |   34 +-
 include/llvm/Analysis/BlockFrequencyInfo.h         |   10 +-
 include/llvm/Analysis/BlockFrequencyInfoImpl.h     |   33 +-
 include/llvm/Analysis/BranchProbabilityInfo.h      |    7 +-
 include/llvm/Analysis/CFG.h                        |   32 +-
 include/llvm/Analysis/CFGPrinter.h                 |    7 +-
 include/llvm/Analysis/CFLAliasAnalysisUtils.h      |    7 +-
 include/llvm/Analysis/CFLAndersAliasAnalysis.h     |   10 +-
 include/llvm/Analysis/CFLSteensAliasAnalysis.h     |   14 +-
 include/llvm/Analysis/CGSCCPassManager.h           |  396 +-
 include/llvm/Analysis/CallGraph.h                  |   24 +-
 include/llvm/Analysis/CallGraphSCCPass.h           |    7 +-
 include/llvm/Analysis/CallPrinter.h                |    7 +-
 include/llvm/Analysis/CaptureTracking.h            |    7 +-
 include/llvm/Analysis/CmpInstAnalysis.h            |    7 +-
 include/llvm/Analysis/CodeMetrics.h                |   16 +-
 include/llvm/Analysis/ConstantFolding.h            |   22 +-
 include/llvm/Analysis/DOTGraphTraitsPass.h         |    7 +-
 include/llvm/Analysis/DemandedBits.h               |    7 +-
 include/llvm/Analysis/DependenceAnalysis.h         |   11 +-
 include/llvm/Analysis/DivergenceAnalysis.h         |    7 +-
 include/llvm/Analysis/DomPrinter.h                 |    7 +-
 include/llvm/Analysis/DomTreeUpdater.h             |  309 +
 include/llvm/Analysis/DominanceFrontier.h          |    7 +-
 include/llvm/Analysis/DominanceFrontierImpl.h      |    7 +-
 include/llvm/Analysis/EHPersonalities.h            |    7 +-
 include/llvm/Analysis/GlobalsModRef.h              |   15 +-
 include/llvm/Analysis/GuardUtils.h                 |   30 +-
 include/llvm/Analysis/IVDescriptors.h              |   28 +-
 include/llvm/Analysis/IVUsers.h                    |    7 +-
 .../llvm/Analysis/IndirectCallPromotionAnalysis.h  |    7 +-
 include/llvm/Analysis/IndirectCallVisitor.h        |    7 +-
 include/llvm/Analysis/InlineCost.h                 |   21 +-
 .../llvm/Analysis/InstructionPrecedenceTracking.h  |    9 +-
 include/llvm/Analysis/InstructionSimplify.h        |   33 +-
 include/llvm/Analysis/Interval.h                   |    7 +-
 include/llvm/Analysis/IntervalIterator.h           |    7 +-
 include/llvm/Analysis/IntervalPartition.h          |    7 +-
 include/llvm/Analysis/IteratedDominanceFrontier.h  |  154 +-
 include/llvm/Analysis/LazyBlockFrequencyInfo.h     |    7 +-
 include/llvm/Analysis/LazyBranchProbabilityInfo.h  |    7 +-
 include/llvm/Analysis/LazyCallGraph.h              |   32 +-
 include/llvm/Analysis/LazyValueInfo.h              |    7 +-
 include/llvm/Analysis/LegacyDivergenceAnalysis.h   |    7 +-
 include/llvm/Analysis/Lint.h                       |    7 +-
 include/llvm/Analysis/Loads.h                      |   29 +-
 include/llvm/Analysis/LoopAccessAnalysis.h         |   13 +-
 include/llvm/Analysis/LoopAnalysisManager.h        |   10 +-
 include/llvm/Analysis/LoopInfo.h                   |  249 +-
 include/llvm/Analysis/LoopInfoImpl.h               |   85 +-
 include/llvm/Analysis/LoopIterator.h               |    7 +-
 include/llvm/Analysis/LoopPass.h                   |    7 +-
 include/llvm/Analysis/LoopUnrollAnalyzer.h         |    7 +-
 include/llvm/Analysis/MemoryBuiltins.h             |   50 +-
 include/llvm/Analysis/MemoryDependenceAnalysis.h   |   26 +-
 include/llvm/Analysis/MemoryLocation.h             |    7 +-
 include/llvm/Analysis/MemorySSA.h                  |   49 +-
 include/llvm/Analysis/MemorySSAUpdater.h           |   42 +-
 include/llvm/Analysis/ModuleSummaryAnalysis.h      |    7 +-
 include/llvm/Analysis/MustExecute.h                |    7 +-
 include/llvm/Analysis/ObjCARCAliasAnalysis.h       |   16 +-
 include/llvm/Analysis/ObjCARCAnalysisUtils.h       |    7 +-
 include/llvm/Analysis/ObjCARCInstKind.h            |   11 +-
 include/llvm/Analysis/OptimizationRemarkEmitter.h  |   11 +-
 include/llvm/Analysis/OrderedBasicBlock.h          |   15 +-
 include/llvm/Analysis/OrderedInstructions.h        |    7 +-
 include/llvm/Analysis/PHITransAddr.h               |    7 +-
 include/llvm/Analysis/Passes.h                     |    7 +-
 include/llvm/Analysis/PhiValues.h                  |    7 +-
 include/llvm/Analysis/PostDominators.h             |    7 +-
 include/llvm/Analysis/ProfileSummaryInfo.h         |   16 +-
 include/llvm/Analysis/PtrUseVisitor.h              |   11 +-
 include/llvm/Analysis/RegionInfo.h                 |    7 +-
 include/llvm/Analysis/RegionInfoImpl.h             |    7 +-
 include/llvm/Analysis/RegionIterator.h             |    7 +-
 include/llvm/Analysis/RegionPass.h                 |    7 +-
 include/llvm/Analysis/RegionPrinter.h              |    7 +-
 include/llvm/Analysis/ScalarEvolution.h            |   72 +-
 .../llvm/Analysis/ScalarEvolutionAliasAnalysis.h   |   10 +-
 include/llvm/Analysis/ScalarEvolutionExpander.h    |   17 +-
 include/llvm/Analysis/ScalarEvolutionExpressions.h |  156 +-
 .../llvm/Analysis/ScalarEvolutionNormalization.h   |    7 +-
 include/llvm/Analysis/ScopedNoAliasAA.h            |   16 +-
 include/llvm/Analysis/SparsePropagation.h          |   15 +-
 include/llvm/Analysis/StackSafetyAnalysis.h        |    7 +-
 include/llvm/Analysis/SyncDependenceAnalysis.h     |    7 +-
 include/llvm/Analysis/SyntheticCountsUtils.h       |    7 +-
 include/llvm/Analysis/TargetFolder.h               |   11 +-
 include/llvm/Analysis/TargetLibraryInfo.def        |   65 +-
 include/llvm/Analysis/TargetLibraryInfo.h          |   14 +-
 include/llvm/Analysis/TargetTransformInfo.h        |  235 +-
 include/llvm/Analysis/TargetTransformInfoImpl.h    |  191 +-
 include/llvm/Analysis/Trace.h                      |    7 +-
 include/llvm/Analysis/TypeBasedAliasAnalysis.h     |   19 +-
 include/llvm/Analysis/TypeMetadataUtils.h          |    7 +-
 include/llvm/Analysis/Utils/Local.h                |    7 +-
 include/llvm/Analysis/ValueLattice.h               |    7 +-
 include/llvm/Analysis/ValueLatticeUtils.h          |    7 +-
 include/llvm/Analysis/ValueTracking.h              |   64 +-
 include/llvm/Analysis/VecFuncs.def                 |  250 +
 include/llvm/Analysis/VectorUtils.h                |   81 +-
 include/llvm/AsmParser/Parser.h                    |    7 +-
 include/llvm/AsmParser/SlotMapping.h               |    7 +-
 include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h |   37 +-
 include/llvm/BinaryFormat/COFF.h                   |   14 +-
 include/llvm/BinaryFormat/Dwarf.def                |   16 +-
 include/llvm/BinaryFormat/Dwarf.h                  |   11 +-
 include/llvm/BinaryFormat/DynamicTags.def          |   28 +
 include/llvm/BinaryFormat/ELF.h                    |   88 +-
 include/llvm/BinaryFormat/ELFRelocs/ARM.def        |    3 +
 include/llvm/BinaryFormat/ELFRelocs/PowerPC.def    |   33 +
 include/llvm/BinaryFormat/MachO.def                |    7 +-
 include/llvm/BinaryFormat/MachO.h                  |   25 +-
 include/llvm/BinaryFormat/Magic.h                  |   10 +-
 include/llvm/BinaryFormat/Minidump.h               |  203 +
 include/llvm/BinaryFormat/MinidumpConstants.def    |  107 +
 include/llvm/BinaryFormat/MsgPack.def              |    7 +-
 include/llvm/BinaryFormat/MsgPack.h                |    7 +-
 include/llvm/BinaryFormat/MsgPackDocument.h        |  385 +
 include/llvm/BinaryFormat/MsgPackReader.h          |    7 +-
 include/llvm/BinaryFormat/MsgPackTypes.h           |  372 -
 include/llvm/BinaryFormat/MsgPackWriter.h          |    7 +-
 include/llvm/BinaryFormat/Wasm.h                   |   65 +-
 include/llvm/BinaryFormat/WasmRelocs.def           |   24 +-
 include/llvm/BinaryFormat/XCOFF.h                  |  145 +
 include/llvm/Bitcode/BitCodes.h                    |  185 -
 include/llvm/Bitcode/BitcodeAnalyzer.h             |  103 +
 include/llvm/Bitcode/BitcodeReader.h               |    9 +-
 include/llvm/Bitcode/BitcodeWriter.h               |    7 +-
 include/llvm/Bitcode/BitcodeWriterPass.h           |    7 +-
 include/llvm/Bitcode/BitstreamReader.h             |  506 --
 include/llvm/Bitcode/BitstreamWriter.h             |  550 --
 include/llvm/Bitcode/LLVMBitCodes.h                |   42 +-
 include/llvm/Bitstream/BitCodes.h                  |  184 +
 include/llvm/Bitstream/BitstreamReader.h           |  557 ++
 include/llvm/Bitstream/BitstreamWriter.h           |  547 ++
 include/llvm/CodeGen/AccelTable.h                  |   31 +-
 include/llvm/CodeGen/Analysis.h                    |   27 +-
 include/llvm/CodeGen/AsmPrinter.h                  |   61 +-
 include/llvm/CodeGen/AsmPrinterHandler.h           |    7 +-
 include/llvm/CodeGen/AtomicExpandUtils.h           |    7 +-
 include/llvm/CodeGen/BasicTTIImpl.h                |  250 +-
 include/llvm/CodeGen/BuiltinGCs.h                  |    7 +-
 include/llvm/CodeGen/CSEConfigBase.h               |   28 +
 include/llvm/CodeGen/CalcSpillWeights.h            |    7 +-
 include/llvm/CodeGen/CallingConvLower.h            |   11 +-
 include/llvm/CodeGen/CommandFlags.inc              |   13 +-
 include/llvm/CodeGen/CostTable.h                   |    7 +-
 include/llvm/CodeGen/DAGCombine.h                  |    7 +-
 include/llvm/CodeGen/DFAPacketizer.h               |    7 +-
 include/llvm/CodeGen/DIE.h                         |   59 +-
 include/llvm/CodeGen/DIEValue.def                  |    8 +-
 include/llvm/CodeGen/DbgEntityHistoryCalculator.h  |   93 +-
 include/llvm/CodeGen/DebugHandlerBase.h            |    9 +-
 include/llvm/CodeGen/DwarfStringPoolEntry.h        |    7 +-
 include/llvm/CodeGen/EdgeBundles.h                 |    7 +-
 include/llvm/CodeGen/ExecutionDomainFix.h          |    7 +-
 include/llvm/CodeGen/ExpandReductions.h            |    7 +-
 include/llvm/CodeGen/FastISel.h                    |    9 +-
 include/llvm/CodeGen/FaultMaps.h                   |    7 +-
 include/llvm/CodeGen/FunctionLoweringInfo.h        |   65 +-
 include/llvm/CodeGen/GCMetadata.h                  |    7 +-
 include/llvm/CodeGen/GCMetadataPrinter.h           |    7 +-
 include/llvm/CodeGen/GCStrategy.h                  |    7 +-
 include/llvm/CodeGen/GlobalISel/CSEInfo.h          |   41 +-
 include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h    |    7 +-
 include/llvm/CodeGen/GlobalISel/CallLowering.h     |  137 +-
 include/llvm/CodeGen/GlobalISel/Combiner.h         |    9 +-
 include/llvm/CodeGen/GlobalISel/CombinerHelper.h   |   27 +-
 include/llvm/CodeGen/GlobalISel/CombinerInfo.h     |    7 +-
 .../CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h |    7 +-
 .../llvm/CodeGen/GlobalISel/GISelChangeObserver.h  |   18 +-
 include/llvm/CodeGen/GlobalISel/GISelWorkList.h    |   50 +-
 include/llvm/CodeGen/GlobalISel/IRTranslator.h     |  121 +-
 .../llvm/CodeGen/GlobalISel/InstructionSelect.h    |    7 +-
 .../llvm/CodeGen/GlobalISel/InstructionSelector.h  |   17 +-
 .../CodeGen/GlobalISel/InstructionSelectorImpl.h   |   60 +-
 .../GlobalISel/LegalizationArtifactCombiner.h      |  278 +-
 include/llvm/CodeGen/GlobalISel/Legalizer.h        |   14 +-
 include/llvm/CodeGen/GlobalISel/LegalizerHelper.h  |  120 +-
 include/llvm/CodeGen/GlobalISel/LegalizerInfo.h    |  211 +-
 include/llvm/CodeGen/GlobalISel/Localizer.h        |   22 +-
 include/llvm/CodeGen/GlobalISel/MIPatternMatch.h   |   14 +-
 include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h |  361 +-
 include/llvm/CodeGen/GlobalISel/RegBankSelect.h    |   16 +-
 include/llvm/CodeGen/GlobalISel/RegisterBank.h     |    7 +-
 include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h |   53 +-
 include/llvm/CodeGen/GlobalISel/Types.h            |    7 +-
 include/llvm/CodeGen/GlobalISel/Utils.h            |   74 +-
 include/llvm/CodeGen/ISDOpcodes.h                  |   54 +-
 include/llvm/CodeGen/IntrinsicLowering.h           |   11 +-
 include/llvm/CodeGen/LatencyPriorityQueue.h        |    7 +-
 .../llvm/CodeGen/LazyMachineBlockFrequencyInfo.h   |    7 +-
 include/llvm/CodeGen/LexicalScopes.h               |    7 +-
 include/llvm/CodeGen/LinkAllAsmWriterComponents.h  |    7 +-
 include/llvm/CodeGen/LinkAllCodegenComponents.h    |    7 +-
 include/llvm/CodeGen/LiveInterval.h                |   54 +-
 include/llvm/CodeGen/LiveIntervalUnion.h           |    7 +-
 include/llvm/CodeGen/LiveIntervals.h               |   16 +-
 include/llvm/CodeGen/LivePhysRegs.h                |    7 +-
 include/llvm/CodeGen/LiveRangeEdit.h               |    7 +-
 include/llvm/CodeGen/LiveRegMatrix.h               |    7 +-
 include/llvm/CodeGen/LiveRegUnits.h                |    7 +-
 include/llvm/CodeGen/LiveStacks.h                  |    7 +-
 include/llvm/CodeGen/LiveVariables.h               |    7 +-
 include/llvm/CodeGen/LoopTraversal.h               |    7 +-
 include/llvm/CodeGen/LowLevelType.h                |    7 +-
 include/llvm/CodeGen/MIRParser/MIParser.h          |  233 +
 include/llvm/CodeGen/MIRParser/MIRParser.h         |    7 +-
 include/llvm/CodeGen/MIRPrinter.h                  |    9 +-
 include/llvm/CodeGen/MIRYamlMapping.h              |  108 +-
 include/llvm/CodeGen/MachORelocation.h             |    7 +-
 include/llvm/CodeGen/MachineBasicBlock.h           |   28 +-
 include/llvm/CodeGen/MachineBlockFrequencyInfo.h   |    7 +-
 .../llvm/CodeGen/MachineBranchProbabilityInfo.h    |    7 +-
 include/llvm/CodeGen/MachineCombinerPattern.h      |    7 +-
 include/llvm/CodeGen/MachineConstantPool.h         |    7 +-
 include/llvm/CodeGen/MachineDominanceFrontier.h    |    7 +-
 include/llvm/CodeGen/MachineDominators.h           |    7 +-
 include/llvm/CodeGen/MachineFrameInfo.h            |   14 +-
 include/llvm/CodeGen/MachineFunction.h             |   76 +-
 include/llvm/CodeGen/MachineFunctionPass.h         |    7 +-
 include/llvm/CodeGen/MachineInstr.h                |   81 +-
 include/llvm/CodeGen/MachineInstrBuilder.h         |   15 +-
 include/llvm/CodeGen/MachineInstrBundle.h          |   15 +-
 include/llvm/CodeGen/MachineInstrBundleIterator.h  |    7 +-
 include/llvm/CodeGen/MachineJumpTableInfo.h        |    7 +-
 include/llvm/CodeGen/MachineLoopInfo.h             |    7 +-
 include/llvm/CodeGen/MachineMemOperand.h           |   26 +-
 include/llvm/CodeGen/MachineModuleInfo.h           |   32 +-
 include/llvm/CodeGen/MachineModuleInfoImpls.h      |    7 +-
 include/llvm/CodeGen/MachineOperand.h              |   21 +-
 .../CodeGen/MachineOptimizationRemarkEmitter.h     |   21 +-
 include/llvm/CodeGen/MachineOutliner.h             |   13 +-
 include/llvm/CodeGen/MachinePassRegistry.h         |    7 +-
 include/llvm/CodeGen/MachinePipeliner.h            |   85 +-
 include/llvm/CodeGen/MachinePostDominators.h       |    9 +-
 include/llvm/CodeGen/MachineRegionInfo.h           |    7 +-
 include/llvm/CodeGen/MachineRegisterInfo.h         |   20 +-
 include/llvm/CodeGen/MachineSSAUpdater.h           |    7 +-
 include/llvm/CodeGen/MachineScheduler.h            |   35 +-
 include/llvm/CodeGen/MachineTraceMetrics.h         |    7 +-
 include/llvm/CodeGen/MacroFusion.h                 |    7 +-
 include/llvm/CodeGen/PBQP/CostAllocator.h          |    7 +-
 include/llvm/CodeGen/PBQP/Graph.h                  |    7 +-
 include/llvm/CodeGen/PBQP/Math.h                   |    7 +-
 include/llvm/CodeGen/PBQP/ReductionRules.h         |    7 +-
 include/llvm/CodeGen/PBQP/Solution.h               |    7 +-
 include/llvm/CodeGen/PBQPRAConstraint.h            |    9 +-
 include/llvm/CodeGen/ParallelCG.h                  |    7 +-
 include/llvm/CodeGen/Passes.h                      |   15 +-
 include/llvm/CodeGen/PreISelIntrinsicLowering.h    |    7 +-
 include/llvm/CodeGen/PseudoSourceValue.h           |   10 +-
 include/llvm/CodeGen/ReachingDefAnalysis.h         |    9 +-
 include/llvm/CodeGen/RegAllocPBQP.h                |    7 +-
 include/llvm/CodeGen/RegAllocRegistry.h            |   35 +-
 include/llvm/CodeGen/Register.h                    |   60 +
 include/llvm/CodeGen/RegisterClassInfo.h           |    7 +-
 include/llvm/CodeGen/RegisterPressure.h            |   11 +-
 include/llvm/CodeGen/RegisterScavenging.h          |   24 +-
 include/llvm/CodeGen/RegisterUsageInfo.h           |    7 +-
 include/llvm/CodeGen/ResourcePriorityQueue.h       |    7 +-
 include/llvm/CodeGen/RuntimeLibcalls.h             |    7 +-
 include/llvm/CodeGen/SDNodeProperties.td           |    7 +-
 include/llvm/CodeGen/ScheduleDAG.h                 |   31 +-
 include/llvm/CodeGen/ScheduleDAGInstrs.h           |   23 +-
 include/llvm/CodeGen/ScheduleDAGMutation.h         |    7 +-
 include/llvm/CodeGen/ScheduleDFS.h                 |    9 +-
 include/llvm/CodeGen/ScheduleHazardRecognizer.h    |    7 +-
 include/llvm/CodeGen/SchedulerRegistry.h           |    7 +-
 include/llvm/CodeGen/ScoreboardHazardRecognizer.h  |    7 +-
 include/llvm/CodeGen/SelectionDAG.h                |  139 +-
 include/llvm/CodeGen/SelectionDAGAddressAnalysis.h |   46 +-
 include/llvm/CodeGen/SelectionDAGISel.h            |   15 +-
 include/llvm/CodeGen/SelectionDAGNodes.h           |  185 +-
 include/llvm/CodeGen/SelectionDAGTargetInfo.h      |   15 +-
 include/llvm/CodeGen/SlotIndexes.h                 |   87 +-
 include/llvm/CodeGen/StackMaps.h                   |    7 +-
 include/llvm/CodeGen/StackProtector.h              |   13 +-
 include/llvm/CodeGen/SwiftErrorValueTracking.h     |  110 +
 include/llvm/CodeGen/SwitchLoweringUtils.h         |  297 +
 include/llvm/CodeGen/TailDuplicator.h              |    7 +-
 include/llvm/CodeGen/TargetCallingConv.h           |   23 +-
 include/llvm/CodeGen/TargetFrameLowering.h         |   37 +-
 include/llvm/CodeGen/TargetInstrInfo.h             |   46 +-
 include/llvm/CodeGen/TargetLowering.h              |  375 +-
 .../llvm/CodeGen/TargetLoweringObjectFileImpl.h    |    7 +-
 include/llvm/CodeGen/TargetOpcodes.h               |    7 +-
 include/llvm/CodeGen/TargetPassConfig.h            |   37 +-
 include/llvm/CodeGen/TargetRegisterInfo.h          |   14 +-
 include/llvm/CodeGen/TargetSchedule.h              |    7 +-
 include/llvm/CodeGen/TargetSubtargetInfo.h         |   33 +-
 include/llvm/CodeGen/UnreachableBlockElim.h        |    7 +-
 include/llvm/CodeGen/ValueTypes.h                  |    7 +-
 include/llvm/CodeGen/ValueTypes.td                 |  200 +-
 include/llvm/CodeGen/VirtRegMap.h                  |   17 +-
 include/llvm/CodeGen/WasmEHFuncInfo.h              |   29 +-
 include/llvm/CodeGen/WinEHFuncInfo.h               |    7 +-
 .../DebugInfo/CodeView/AppendingTypeTableBuilder.h |    7 +-
 include/llvm/DebugInfo/CodeView/CVRecord.h         |   38 +-
 include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h  |    7 +-
 include/llvm/DebugInfo/CodeView/CVTypeVisitor.h    |   11 +-
 include/llvm/DebugInfo/CodeView/CodeView.h         |   29 +-
 include/llvm/DebugInfo/CodeView/CodeViewError.h    |    7 +-
 include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h |  140 +-
 .../llvm/DebugInfo/CodeView/CodeViewRegisters.def  |  210 +-
 .../llvm/DebugInfo/CodeView/CodeViewSymbols.def    |    9 +-
 include/llvm/DebugInfo/CodeView/CodeViewTypes.def  |    7 +-
 .../DebugInfo/CodeView/ContinuationRecordBuilder.h |    9 +-
 .../DebugInfo/CodeView/DebugChecksumsSubsection.h  |    7 +-
 .../DebugInfo/CodeView/DebugCrossExSubsection.h    |    7 +-
 .../DebugInfo/CodeView/DebugCrossImpSubsection.h   |    9 +-
 .../DebugInfo/CodeView/DebugFrameDataSubsection.h  |    7 +-
 .../CodeView/DebugInlineeLinesSubsection.h         |   14 +-
 .../llvm/DebugInfo/CodeView/DebugLinesSubsection.h |    7 +-
 .../CodeView/DebugStringTableSubsection.h          |    7 +-
 include/llvm/DebugInfo/CodeView/DebugSubsection.h  |    7 +-
 .../DebugInfo/CodeView/DebugSubsectionRecord.h     |    7 +-
 .../DebugInfo/CodeView/DebugSubsectionVisitor.h    |    7 +-
 .../DebugInfo/CodeView/DebugSymbolRVASubsection.h  |    7 +-
 .../DebugInfo/CodeView/DebugSymbolsSubsection.h    |    7 +-
 .../DebugInfo/CodeView/DebugUnknownSubsection.h    |    7 +-
 include/llvm/DebugInfo/CodeView/EnumTables.h       |    9 +-
 include/llvm/DebugInfo/CodeView/Formatters.h       |    7 +-
 include/llvm/DebugInfo/CodeView/FunctionId.h       |    7 +-
 include/llvm/DebugInfo/CodeView/GUID.h             |    7 +-
 .../DebugInfo/CodeView/GlobalTypeTableBuilder.h    |   29 +-
 .../DebugInfo/CodeView/LazyRandomTypeCollection.h  |    7 +-
 include/llvm/DebugInfo/CodeView/Line.h             |    7 +-
 .../DebugInfo/CodeView/MergingTypeTableBuilder.h   |    7 +-
 include/llvm/DebugInfo/CodeView/RecordName.h       |    7 +-
 .../llvm/DebugInfo/CodeView/RecordSerialization.h  |   10 +-
 .../llvm/DebugInfo/CodeView/SimpleTypeSerializer.h |    7 +-
 .../llvm/DebugInfo/CodeView/StringsAndChecksums.h  |    7 +-
 .../llvm/DebugInfo/CodeView/SymbolDeserializer.h   |    7 +-
 .../llvm/DebugInfo/CodeView/SymbolDumpDelegate.h   |    7 +-
 include/llvm/DebugInfo/CodeView/SymbolDumper.h     |    7 +-
 include/llvm/DebugInfo/CodeView/SymbolRecord.h     |   66 +-
 .../llvm/DebugInfo/CodeView/SymbolRecordHelpers.h  |    7 +-
 .../llvm/DebugInfo/CodeView/SymbolRecordMapping.h  |    7 +-
 include/llvm/DebugInfo/CodeView/SymbolSerializer.h |   11 +-
 .../CodeView/SymbolVisitorCallbackPipeline.h       |    7 +-
 .../DebugInfo/CodeView/SymbolVisitorCallbacks.h    |    7 +-
 .../DebugInfo/CodeView/SymbolVisitorDelegate.h     |    7 +-
 include/llvm/DebugInfo/CodeView/TypeCollection.h   |    7 +-
 include/llvm/DebugInfo/CodeView/TypeDeserializer.h |   17 +-
 include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h  |    7 +-
 include/llvm/DebugInfo/CodeView/TypeHashing.h      |   38 +-
 include/llvm/DebugInfo/CodeView/TypeIndex.h        |    7 +-
 .../llvm/DebugInfo/CodeView/TypeIndexDiscovery.h   |    7 +-
 include/llvm/DebugInfo/CodeView/TypeRecord.h       |    7 +-
 .../llvm/DebugInfo/CodeView/TypeRecordHelpers.h    |    7 +-
 .../llvm/DebugInfo/CodeView/TypeRecordMapping.h    |    9 +-
 include/llvm/DebugInfo/CodeView/TypeStreamMerger.h |    7 +-
 .../llvm/DebugInfo/CodeView/TypeSymbolEmitter.h    |    7 +-
 .../llvm/DebugInfo/CodeView/TypeTableCollection.h  |    7 +-
 .../CodeView/TypeVisitorCallbackPipeline.h         |   12 +-
 .../llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h |    7 +-
 include/llvm/DebugInfo/DIContext.h                 |   46 +-
 .../DebugInfo/DWARF/DWARFAbbreviationDeclaration.h |    7 +-
 .../llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h   |   23 +-
 include/llvm/DebugInfo/DWARF/DWARFAddressRange.h   |   13 +-
 include/llvm/DebugInfo/DWARF/DWARFAttribute.h      |   21 +-
 include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h    |    7 +-
 include/llvm/DebugInfo/DWARF/DWARFContext.h        |   25 +-
 include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h  |    7 +-
 include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h    |    7 +-
 include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h      |    7 +-
 include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h |    7 +-
 include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h   |   11 +-
 include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h     |    7 +-
 include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h |    7 +-
 include/llvm/DebugInfo/DWARF/DWARFDebugLine.h      |   74 +-
 include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h       |   11 +-
 include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h     |    7 +-
 include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h  |    7 +-
 include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h |    9 +-
 include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h  |   11 +-
 include/llvm/DebugInfo/DWARF/DWARFDie.h            |    7 +-
 include/llvm/DebugInfo/DWARF/DWARFExpression.h     |   26 +-
 include/llvm/DebugInfo/DWARF/DWARFFormValue.h      |   54 +-
 include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h       |    7 +-
 include/llvm/DebugInfo/DWARF/DWARFListTable.h      |   11 +-
 include/llvm/DebugInfo/DWARF/DWARFObject.h         |    7 +-
 include/llvm/DebugInfo/DWARF/DWARFRelocMap.h       |   14 +-
 include/llvm/DebugInfo/DWARF/DWARFSection.h        |   12 +-
 include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h       |    7 +-
 include/llvm/DebugInfo/DWARF/DWARFUnit.h           |   50 +-
 include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h      |    7 +-
 include/llvm/DebugInfo/DWARF/DWARFVerifier.h       |    7 +-
 include/llvm/DebugInfo/GSYM/FileEntry.h            |   68 +
 include/llvm/DebugInfo/GSYM/FunctionInfo.h         |  107 +
 include/llvm/DebugInfo/GSYM/InlineInfo.h           |   78 +
 include/llvm/DebugInfo/GSYM/LineEntry.h            |   48 +
 include/llvm/DebugInfo/GSYM/Range.h                |   87 +
 include/llvm/DebugInfo/GSYM/StringTable.h          |   54 +
 include/llvm/DebugInfo/MSF/IMSFFile.h              |    7 +-
 include/llvm/DebugInfo/MSF/MSFBuilder.h            |    7 +-
 include/llvm/DebugInfo/MSF/MSFCommon.h             |    7 +-
 include/llvm/DebugInfo/MSF/MSFError.h              |    7 +-
 include/llvm/DebugInfo/MSF/MappedBlockStream.h     |    7 +-
 .../llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h  |    7 +-
 include/llvm/DebugInfo/PDB/DIA/DIADataStream.h     |    7 +-
 .../llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h   |    7 +-
 include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h  |    7 +-
 .../DebugInfo/PDB/DIA/DIAEnumInjectedSources.h     |    7 +-
 .../llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h    |    7 +-
 .../DebugInfo/PDB/DIA/DIAEnumSectionContribs.h     |    7 +-
 .../llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h    |    7 +-
 include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h    |    7 +-
 include/llvm/DebugInfo/PDB/DIA/DIAEnumTables.h     |    7 +-
 include/llvm/DebugInfo/PDB/DIA/DIAError.h          |    7 +-
 include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h      |    7 +-
 include/llvm/DebugInfo/PDB/DIA/DIAInjectedSource.h |    9 +-
 include/llvm/DebugInfo/PDB/DIA/DIALineNumber.h     |    7 +-
 include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h      |    7 +-
 include/llvm/DebugInfo/PDB/DIA/DIASectionContrib.h |    7 +-
 include/llvm/DebugInfo/PDB/DIA/DIASession.h        |    7 +-
 include/llvm/DebugInfo/PDB/DIA/DIASourceFile.h     |    7 +-
 include/llvm/DebugInfo/PDB/DIA/DIASupport.h        |    7 +-
 include/llvm/DebugInfo/PDB/DIA/DIATable.h          |    7 +-
 include/llvm/DebugInfo/PDB/DIA/DIAUtils.h          |    7 +-
 include/llvm/DebugInfo/PDB/GenericError.h          |    9 +-
 include/llvm/DebugInfo/PDB/IPDBDataStream.h        |    7 +-
 include/llvm/DebugInfo/PDB/IPDBEnumChildren.h      |    7 +-
 include/llvm/DebugInfo/PDB/IPDBFrameData.h         |    7 +-
 include/llvm/DebugInfo/PDB/IPDBInjectedSource.h    |   13 +-
 include/llvm/DebugInfo/PDB/IPDBLineNumber.h        |    7 +-
 include/llvm/DebugInfo/PDB/IPDBRawSymbol.h         |    7 +-
 include/llvm/DebugInfo/PDB/IPDBSectionContrib.h    |    7 +-
 include/llvm/DebugInfo/PDB/IPDBSession.h           |    7 +-
 include/llvm/DebugInfo/PDB/IPDBSourceFile.h        |    7 +-
 include/llvm/DebugInfo/PDB/IPDBTable.h             |    7 +-
 .../DebugInfo/PDB/Native/DbiModuleDescriptor.h     |    7 +-
 .../PDB/Native/DbiModuleDescriptorBuilder.h        |    7 +-
 include/llvm/DebugInfo/PDB/Native/DbiModuleList.h  |    7 +-
 include/llvm/DebugInfo/PDB/Native/DbiStream.h      |   26 +-
 .../llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h   |    7 +-
 include/llvm/DebugInfo/PDB/Native/EnumTables.h     |    7 +-
 include/llvm/DebugInfo/PDB/Native/Formatters.h     |    7 +-
 .../llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h   |    7 +-
 include/llvm/DebugInfo/PDB/Native/GlobalsStream.h  |    7 +-
 include/llvm/DebugInfo/PDB/Native/Hash.h           |    7 +-
 include/llvm/DebugInfo/PDB/Native/HashTable.h      |   92 +-
 .../DebugInfo/PDB/Native/ISectionContribVisitor.h  |    7 +-
 include/llvm/DebugInfo/PDB/Native/InfoStream.h     |    7 +-
 .../llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h  |    7 +-
 .../DebugInfo/PDB/Native/InjectedSourceStream.h    |   44 +
 .../llvm/DebugInfo/PDB/Native/ModuleDebugStream.h  |    9 +-
 include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h |    9 +-
 .../DebugInfo/PDB/Native/NativeCompilandSymbol.h   |    7 +-
 .../llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h  |    7 +-
 .../PDB/Native/NativeEnumInjectedSources.h         |   43 +
 .../llvm/DebugInfo/PDB/Native/NativeEnumModules.h  |    7 +-
 .../llvm/DebugInfo/PDB/Native/NativeEnumTypes.h    |    7 +-
 .../llvm/DebugInfo/PDB/Native/NativeExeSymbol.h    |    7 +-
 .../llvm/DebugInfo/PDB/Native/NativeRawSymbol.h    |    7 +-
 include/llvm/DebugInfo/PDB/Native/NativeSession.h  |    7 +-
 .../DebugInfo/PDB/Native/NativeSymbolEnumerator.h  |    7 +-
 .../llvm/DebugInfo/PDB/Native/NativeTypeArray.h    |    7 +-
 .../llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h  |    7 +-
 include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h |    7 +-
 .../DebugInfo/PDB/Native/NativeTypeFunctionSig.h   |    7 +-
 .../llvm/DebugInfo/PDB/Native/NativeTypePointer.h  |    7 +-
 .../llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h  |    7 +-
 include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h  |    7 +-
 .../llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h  |    7 +-
 include/llvm/DebugInfo/PDB/Native/PDBFile.h        |   23 +-
 include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h |    9 +-
 include/llvm/DebugInfo/PDB/Native/PDBStringTable.h |    7 +-
 .../DebugInfo/PDB/Native/PDBStringTableBuilder.h   |    7 +-
 include/llvm/DebugInfo/PDB/Native/PublicsStream.h  |    7 +-
 include/llvm/DebugInfo/PDB/Native/RawConstants.h   |    7 +-
 include/llvm/DebugInfo/PDB/Native/RawError.h       |    7 +-
 include/llvm/DebugInfo/PDB/Native/RawTypes.h       |   18 +-
 include/llvm/DebugInfo/PDB/Native/SymbolCache.h    |    7 +-
 include/llvm/DebugInfo/PDB/Native/SymbolStream.h   |    7 +-
 include/llvm/DebugInfo/PDB/Native/TpiHashing.h     |    7 +-
 include/llvm/DebugInfo/PDB/Native/TpiStream.h      |    7 +-
 .../llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h   |    7 +-
 include/llvm/DebugInfo/PDB/PDB.h                   |    7 +-
 include/llvm/DebugInfo/PDB/PDBContext.h            |   16 +-
 include/llvm/DebugInfo/PDB/PDBExtras.h             |   13 +-
 include/llvm/DebugInfo/PDB/PDBSymDumper.h          |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbol.h             |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h   |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolBlock.h        |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h    |    7 +-
 .../llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolCustom.h       |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolData.h         |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolExe.h          |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolFunc.h         |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h |    7 +-
 .../llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h   |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolLabel.h        |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolThunk.h        |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h    |    7 +-
 .../llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h    |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h  |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h   |    7 +-
 .../llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h    |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h     |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h   |    7 +-
 .../llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h  |    7 +-
 .../llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h  |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h  |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h  |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h  |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h      |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h   |    7 +-
 .../llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h  |    7 +-
 include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h      |    7 +-
 .../llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h   |    7 +-
 include/llvm/DebugInfo/PDB/PDBTypes.h              |   76 +-
 include/llvm/DebugInfo/PDB/UDTLayout.h             |    7 +-
 include/llvm/DebugInfo/Symbolize/DIPrinter.h       |   20 +-
 .../llvm/DebugInfo/Symbolize/SymbolizableModule.h  |   20 +-
 include/llvm/DebugInfo/Symbolize/Symbolize.h       |   56 +-
 include/llvm/Demangle/Compiler.h                   |   93 -
 include/llvm/Demangle/Demangle.h                   |   15 +-
 include/llvm/Demangle/DemangleConfig.h             |   99 +
 include/llvm/Demangle/ItaniumDemangle.h            |  108 +-
 include/llvm/Demangle/MicrosoftDemangle.h          |   53 +-
 include/llvm/Demangle/MicrosoftDemangleNodes.h     |   33 +-
 include/llvm/Demangle/README.txt                   |   52 +
 include/llvm/Demangle/StringView.h                 |   21 +-
 include/llvm/Demangle/Utility.h                    |   18 +-
 include/llvm/ExecutionEngine/ExecutionEngine.h     |   20 +-
 include/llvm/ExecutionEngine/GenericValue.h        |    7 +-
 include/llvm/ExecutionEngine/Interpreter.h         |    7 +-
 include/llvm/ExecutionEngine/JITEventListener.h    |    7 +-
 .../llvm/ExecutionEngine/JITLink/EHFrameSupport.h  |   80 +
 include/llvm/ExecutionEngine/JITLink/JITLink.h     |  930 ++
 .../ExecutionEngine/JITLink/JITLinkMemoryManager.h |   99 +
 include/llvm/ExecutionEngine/JITLink/MachO.h       |   30 +
 .../llvm/ExecutionEngine/JITLink/MachO_x86_64.h    |   63 +
 include/llvm/ExecutionEngine/JITSymbol.h           |   34 +-
 include/llvm/ExecutionEngine/MCJIT.h               |    7 +-
 include/llvm/ExecutionEngine/OProfileWrapper.h     |    7 +-
 include/llvm/ExecutionEngine/ObjectCache.h         |    7 +-
 .../ExecutionEngine/Orc/CompileOnDemandLayer.h     |   52 +-
 include/llvm/ExecutionEngine/Orc/CompileUtils.h    |   95 +-
 include/llvm/ExecutionEngine/Orc/Core.h            |  228 +-
 include/llvm/ExecutionEngine/Orc/ExecutionUtils.h  |   52 +-
 .../llvm/ExecutionEngine/Orc/GlobalMappingLayer.h  |    7 +-
 include/llvm/ExecutionEngine/Orc/IRCompileLayer.h  |   27 +-
 .../llvm/ExecutionEngine/Orc/IRTransformLayer.h    |   26 +-
 .../llvm/ExecutionEngine/Orc/IndirectionUtils.h    |   11 +-
 .../ExecutionEngine/Orc/JITTargetMachineBuilder.h  |    7 +-
 include/llvm/ExecutionEngine/Orc/LLJIT.h           |  230 +-
 include/llvm/ExecutionEngine/Orc/LambdaResolver.h  |   34 +-
 include/llvm/ExecutionEngine/Orc/Layer.h           |    7 +-
 .../llvm/ExecutionEngine/Orc/LazyEmittingLayer.h   |   24 +-
 include/llvm/ExecutionEngine/Orc/LazyReexports.h   |    7 +-
 include/llvm/ExecutionEngine/Orc/Legacy.h          |   18 +-
 include/llvm/ExecutionEngine/Orc/NullResolver.h    |    7 +-
 .../llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h  |  165 +
 .../ExecutionEngine/Orc/ObjectTransformLayer.h     |   23 +-
 include/llvm/ExecutionEngine/Orc/OrcABISupport.h   |    7 +-
 include/llvm/ExecutionEngine/Orc/OrcError.h        |    7 +-
 .../ExecutionEngine/Orc/OrcRemoteTargetClient.h    |    7 +-
 .../ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h    |    7 +-
 .../ExecutionEngine/Orc/OrcRemoteTargetServer.h    |   13 +-
 .../llvm/ExecutionEngine/Orc/RPCSerialization.h    |   93 +-
 include/llvm/ExecutionEngine/Orc/RPCUtils.h        |   25 +-
 .../ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h |   50 +-
 include/llvm/ExecutionEngine/Orc/RawByteChannel.h  |    7 +-
 .../llvm/ExecutionEngine/Orc/RemoteObjectLayer.h   |   60 +-
 .../llvm/ExecutionEngine/Orc/SymbolStringPool.h    |   85 +-
 .../llvm/ExecutionEngine/Orc/ThreadSafeModule.h    |    7 +-
 include/llvm/ExecutionEngine/OrcMCJITReplacement.h |    7 +-
 include/llvm/ExecutionEngine/OrcV1Deprecation.h    |   22 +
 include/llvm/ExecutionEngine/RTDyldMemoryManager.h |    7 +-
 include/llvm/ExecutionEngine/RuntimeDyld.h         |   32 +-
 include/llvm/ExecutionEngine/RuntimeDyldChecker.h  |   98 +-
 .../llvm/ExecutionEngine/SectionMemoryManager.h    |    7 +-
 include/llvm/FuzzMutate/FuzzerCLI.h                |    7 +-
 include/llvm/FuzzMutate/IRMutator.h                |    7 +-
 include/llvm/FuzzMutate/OpDescriptor.h             |    7 +-
 include/llvm/FuzzMutate/Operations.h               |    7 +-
 include/llvm/FuzzMutate/Random.h                   |    7 +-
 include/llvm/FuzzMutate/RandomIRBuilder.h          |    9 +-
 include/llvm/IR/Argument.h                         |   15 +-
 include/llvm/IR/AssemblyAnnotationWriter.h         |    7 +-
 include/llvm/IR/Attributes.h                       |   27 +-
 include/llvm/IR/Attributes.td                      |   16 +
 include/llvm/IR/AutoUpgrade.h                      |   13 +-
 include/llvm/IR/BasicBlock.h                       |   17 +-
 include/llvm/IR/CFG.h                              |   11 +-
 include/llvm/IR/CFGDiff.h                          |    7 +-
 include/llvm/IR/CallSite.h                         |  311 +-
 include/llvm/IR/CallingConv.h                      |    7 +-
 include/llvm/IR/Comdat.h                           |    7 +-
 include/llvm/IR/Constant.h                         |   11 +-
 include/llvm/IR/ConstantFolder.h                   |   11 +-
 include/llvm/IR/ConstantRange.h                    |  191 +-
 include/llvm/IR/Constants.h                        |    7 +-
 include/llvm/IR/DIBuilder.h                        |   17 +-
 include/llvm/IR/DataLayout.h                       |   47 +-
 include/llvm/IR/DebugInfo.h                        |    7 +-
 include/llvm/IR/DebugInfoFlags.def                 |   17 +-
 include/llvm/IR/DebugInfoMetadata.h                |  386 +-
 include/llvm/IR/DebugLoc.h                         |    7 +-
 include/llvm/IR/DerivedTypes.h                     |  107 +-
 include/llvm/IR/DerivedUser.h                      |    7 +-
 include/llvm/IR/DiagnosticHandler.h                |    9 +-
 include/llvm/IR/DiagnosticInfo.h                   |   20 +-
 include/llvm/IR/DiagnosticPrinter.h                |    7 +-
 include/llvm/IR/DomTreeUpdater.h                   |  257 -
 include/llvm/IR/Dominators.h                       |    7 +-
 include/llvm/IR/Function.h                         |   48 +-
 include/llvm/IR/GVMaterializer.h                   |    7 +-
 include/llvm/IR/GetElementPtrTypeIterator.h        |    7 +-
 include/llvm/IR/GlobalAlias.h                      |    7 +-
 include/llvm/IR/GlobalIFunc.h                      |    7 +-
 include/llvm/IR/GlobalIndirectSymbol.h             |    7 +-
 include/llvm/IR/GlobalObject.h                     |    7 +-
 include/llvm/IR/GlobalValue.h                      |   25 +-
 include/llvm/IR/GlobalVariable.h                   |    7 +-
 include/llvm/IR/IRBuilder.h                        |  340 +-
 include/llvm/IR/IRPrintingPasses.h                 |    7 +-
 include/llvm/IR/InlineAsm.h                        |    7 +-
 include/llvm/IR/InstIterator.h                     |    7 +-
 include/llvm/IR/InstVisitor.h                      |   20 +-
 include/llvm/IR/InstrTypes.h                       |  166 +-
 include/llvm/IR/Instruction.def                    |  146 +-
 include/llvm/IR/Instruction.h                      |   28 +-
 include/llvm/IR/Instructions.h                     |  487 +-
 include/llvm/IR/IntrinsicInst.h                    |  136 +-
 include/llvm/IR/Intrinsics.h                       |   37 +-
 include/llvm/IR/Intrinsics.td                      |  227 +-
 include/llvm/IR/IntrinsicsAArch64.td               |   77 +-
 include/llvm/IR/IntrinsicsAMDGPU.td                |  524 +-
 include/llvm/IR/IntrinsicsARM.td                   |   57 +-
 include/llvm/IR/IntrinsicsBPF.td                   |    7 +-
 include/llvm/IR/IntrinsicsHexagon.td               |  513 +-
 include/llvm/IR/IntrinsicsMips.td                  |  277 +-
 include/llvm/IR/IntrinsicsNVVM.td                  |  465 +-
 include/llvm/IR/IntrinsicsPowerPC.td               |   35 +-
 include/llvm/IR/IntrinsicsRISCV.td                 |   38 +-
 include/llvm/IR/IntrinsicsSystemZ.td               |   66 +-
 include/llvm/IR/IntrinsicsWebAssembly.td           |   45 +-
 include/llvm/IR/IntrinsicsX86.td                   | 1266 ++-
 include/llvm/IR/IntrinsicsXCore.td                 |    7 +-
 include/llvm/IR/LLVMContext.h                      |   42 +-
 include/llvm/IR/LegacyPassManager.h                |    7 +-
 include/llvm/IR/LegacyPassManagers.h               |    7 +-
 include/llvm/IR/LegacyPassNameParser.h             |    7 +-
 include/llvm/IR/MDBuilder.h                        |   18 +-
 include/llvm/IR/Mangler.h                          |    7 +-
 include/llvm/IR/Metadata.def                       |    8 +-
 include/llvm/IR/Metadata.h                         |    7 +-
 include/llvm/IR/Module.h                           |   50 +-
 include/llvm/IR/ModuleSlotTracker.h                |    7 +-
 include/llvm/IR/ModuleSummaryIndex.h               |  253 +-
 include/llvm/IR/ModuleSummaryIndexYAML.h           |   15 +-
 include/llvm/IR/NoFolder.h                         |   11 +-
 include/llvm/IR/OperandTraits.h                    |    7 +-
 include/llvm/IR/Operator.h                         |   14 +-
 include/llvm/IR/OptBisect.h                        |   43 +-
 include/llvm/IR/PassInstrumentation.h              |    7 +-
 include/llvm/IR/PassManager.h                      |   14 +-
 include/llvm/IR/PassManagerInternal.h              |    7 +-
 include/llvm/IR/PassTimingInfo.h                   |   28 +-
 include/llvm/IR/PatternMatch.h                     |   91 +-
 include/llvm/IR/PredIteratorCache.h                |    7 +-
 include/llvm/IR/ProfileSummary.h                   |   10 +-
 include/llvm/IR/RemarkStreamer.h                   |   96 +
 include/llvm/IR/RuntimeLibcalls.def                |   30 +-
 include/llvm/IR/SafepointIRVerifier.h              |   19 +-
 include/llvm/IR/Statepoint.h                       |  149 +-
 include/llvm/IR/SymbolTableListTraits.h            |    7 +-
 include/llvm/IR/TrackingMDRef.h                    |    7 +-
 include/llvm/IR/Type.h                             |   30 +-
 include/llvm/IR/TypeFinder.h                       |    7 +-
 include/llvm/IR/Use.h                              |    9 +-
 include/llvm/IR/UseListOrder.h                     |    7 +-
 include/llvm/IR/User.h                             |    7 +-
 include/llvm/IR/Value.def                          |    7 +-
 include/llvm/IR/Value.h                            |   69 +-
 include/llvm/IR/ValueHandle.h                      |   24 +-
 include/llvm/IR/ValueMap.h                         |    7 +-
 include/llvm/IR/ValueSymbolTable.h                 |    7 +-
 include/llvm/IR/Verifier.h                         |    7 +-
 include/llvm/IRReader/IRReader.h                   |   18 +-
 include/llvm/InitializePasses.h                    |   25 +-
 include/llvm/LTO/Caching.h                         |   11 +-
 include/llvm/LTO/Config.h                          |   35 +-
 include/llvm/LTO/LTO.h                             |   47 +-
 include/llvm/LTO/LTOBackend.h                      |    7 +-
 include/llvm/LTO/SummaryBasedOptimizations.h       |    7 +-
 include/llvm/LTO/legacy/LTOCodeGenerator.h         |    8 +-
 include/llvm/LTO/legacy/LTOModule.h                |   18 +-
 include/llvm/LTO/legacy/ThinLTOCodeGenerator.h     |   49 +-
 include/llvm/LTO/legacy/UpdateCompilerUsed.h       |    7 +-
 include/llvm/LineEditor/LineEditor.h               |    7 +-
 include/llvm/LinkAllIR.h                           |    7 +-
 include/llvm/LinkAllPasses.h                       |   13 +-
 include/llvm/Linker/IRMover.h                      |    7 +-
 include/llvm/Linker/Linker.h                       |    7 +-
 include/llvm/MC/ConstantPools.h                    |    9 +-
 include/llvm/MC/LaneBitmask.h                      |    7 +-
 include/llvm/MC/MCAsmBackend.h                     |   23 +-
 include/llvm/MC/MCAsmInfo.h                        |   24 +-
 include/llvm/MC/MCAsmInfoCOFF.h                    |    7 +-
 include/llvm/MC/MCAsmInfoDarwin.h                  |    7 +-
 include/llvm/MC/MCAsmInfoELF.h                     |    7 +-
 include/llvm/MC/MCAsmInfoWasm.h                    |    7 +-
 include/llvm/MC/MCAsmInfoXCOFF.h                   |   25 +
 include/llvm/MC/MCAsmLayout.h                      |    7 +-
 include/llvm/MC/MCAsmMacro.h                       |    7 +-
 include/llvm/MC/MCAssembler.h                      |    7 +-
 include/llvm/MC/MCCodeEmitter.h                    |    7 +-
 include/llvm/MC/MCCodePadder.h                     |    9 +-
 include/llvm/MC/MCCodeView.h                       |    7 +-
 include/llvm/MC/MCContext.h                        |   54 +-
 include/llvm/MC/MCDirectives.h                     |    8 +-
 include/llvm/MC/MCDisassembler/MCDisassembler.h    |   25 +-
 .../llvm/MC/MCDisassembler/MCExternalSymbolizer.h  |    7 +-
 include/llvm/MC/MCDisassembler/MCRelocationInfo.h  |    7 +-
 include/llvm/MC/MCDisassembler/MCSymbolizer.h      |    7 +-
 include/llvm/MC/MCDwarf.h                          |   91 +-
 include/llvm/MC/MCELFObjectWriter.h                |   11 +-
 include/llvm/MC/MCELFStreamer.h                    |    7 +-
 include/llvm/MC/MCExpr.h                           |   28 +-
 include/llvm/MC/MCFixedLenDisassembler.h           |    7 +-
 include/llvm/MC/MCFixup.h                          |   10 +-
 include/llvm/MC/MCFixupKindInfo.h                  |    7 +-
 include/llvm/MC/MCFragment.h                       |    7 +-
 include/llvm/MC/MCInst.h                           |   10 +-
 include/llvm/MC/MCInstBuilder.h                    |    7 +-
 include/llvm/MC/MCInstPrinter.h                    |   11 +-
 include/llvm/MC/MCInstrAnalysis.h                  |    7 +-
 include/llvm/MC/MCInstrDesc.h                      |   13 +-
 include/llvm/MC/MCInstrInfo.h                      |    7 +-
 include/llvm/MC/MCInstrItineraries.h               |    7 +-
 include/llvm/MC/MCLabel.h                          |    7 +-
 include/llvm/MC/MCLinkerOptimizationHint.h         |    7 +-
 include/llvm/MC/MCMachObjectWriter.h               |    7 +-
 include/llvm/MC/MCObjectFileInfo.h                 |   14 +-
 include/llvm/MC/MCObjectStreamer.h                 |   10 +-
 include/llvm/MC/MCObjectWriter.h                   |    7 +-
 include/llvm/MC/MCParser/AsmCond.h                 |    7 +-
 include/llvm/MC/MCParser/AsmLexer.h                |    7 +-
 include/llvm/MC/MCParser/MCAsmLexer.h              |    7 +-
 include/llvm/MC/MCParser/MCAsmParser.h             |   13 +-
 include/llvm/MC/MCParser/MCAsmParserExtension.h    |    7 +-
 include/llvm/MC/MCParser/MCAsmParserUtils.h        |    7 +-
 include/llvm/MC/MCParser/MCParsedAsmOperand.h      |    7 +-
 include/llvm/MC/MCParser/MCTargetAsmParser.h       |   27 +-
 include/llvm/MC/MCRegisterInfo.h                   |    7 +-
 include/llvm/MC/MCSchedule.h                       |   13 +-
 include/llvm/MC/MCSection.h                        |    9 +-
 include/llvm/MC/MCSectionCOFF.h                    |    9 +-
 include/llvm/MC/MCSectionELF.h                     |    9 +-
 include/llvm/MC/MCSectionMachO.h                   |    7 +-
 include/llvm/MC/MCSectionWasm.h                    |   27 +-
 include/llvm/MC/MCSectionXCOFF.h                   |   56 +
 include/llvm/MC/MCStreamer.h                       |   34 +-
 include/llvm/MC/MCSubtargetInfo.h                  |   71 +-
 include/llvm/MC/MCSymbol.h                         |   36 +-
 include/llvm/MC/MCSymbolCOFF.h                     |    7 +-
 include/llvm/MC/MCSymbolELF.h                      |    7 +-
 include/llvm/MC/MCSymbolMachO.h                    |   12 +-
 include/llvm/MC/MCSymbolWasm.h                     |   40 +-
 include/llvm/MC/MCSymbolXCOFF.h                    |   26 +
 include/llvm/MC/MCTargetOptions.h                  |   28 +-
 include/llvm/MC/MCTargetOptionsCommandFlags.inc    |   18 +-
 include/llvm/MC/MCValue.h                          |    7 +-
 include/llvm/MC/MCWasmObjectWriter.h               |    7 +-
 include/llvm/MC/MCWasmStreamer.h                   |    7 +-
 include/llvm/MC/MCWin64EH.h                        |    7 +-
 include/llvm/MC/MCWinCOFFObjectWriter.h            |    7 +-
 include/llvm/MC/MCWinCOFFStreamer.h                |    7 +-
 include/llvm/MC/MCWinEH.h                          |    7 +-
 include/llvm/MC/MCXCOFFObjectWriter.h              |   41 +
 include/llvm/MC/MCXCOFFStreamer.h                  |   33 +
 include/llvm/MC/MachineLocation.h                  |    7 +-
 include/llvm/MC/SectionKind.h                      |    7 +-
 include/llvm/MC/StringTableBuilder.h               |    7 +-
 include/llvm/MC/SubtargetFeature.h                 |  100 +-
 include/llvm/MCA/Context.h                         |   21 +-
 include/llvm/MCA/HWEventListener.h                 |   38 +-
 include/llvm/MCA/HardwareUnits/HardwareUnit.h      |    7 +-
 include/llvm/MCA/HardwareUnits/LSUnit.h            |  393 +-
 include/llvm/MCA/HardwareUnits/RegisterFile.h      |   10 +-
 include/llvm/MCA/HardwareUnits/ResourceManager.h   |   31 +-
 include/llvm/MCA/HardwareUnits/RetireControlUnit.h |    7 +-
 include/llvm/MCA/HardwareUnits/Scheduler.h         |  138 +-
 include/llvm/MCA/InstrBuilder.h                    |    7 +-
 include/llvm/MCA/Instruction.h                     |  163 +-
 include/llvm/MCA/Pipeline.h                        |    7 +-
 include/llvm/MCA/SourceMgr.h                       |    7 +-
 include/llvm/MCA/Stages/DispatchStage.h            |   13 +-
 include/llvm/MCA/Stages/EntryStage.h               |    7 +-
 include/llvm/MCA/Stages/ExecuteStage.h             |   20 +-
 include/llvm/MCA/Stages/InstructionTables.h        |    7 +-
 include/llvm/MCA/Stages/MicroOpQueueStage.h        |   88 +
 include/llvm/MCA/Stages/RetireStage.h              |    7 +-
 include/llvm/MCA/Stages/Stage.h                    |    7 +-
 include/llvm/MCA/Support.h                         |   33 +-
 include/llvm/Object/Archive.h                      |   40 +-
 include/llvm/Object/ArchiveWriter.h                |   10 +-
 include/llvm/Object/Binary.h                       |   22 +-
 include/llvm/Object/COFF.h                         |   20 +-
 include/llvm/Object/COFFImportFile.h               |   24 +-
 include/llvm/Object/COFFModuleDefinition.h         |    7 +-
 include/llvm/Object/CVDebugRecord.h                |    7 +-
 include/llvm/Object/Decompressor.h                 |    7 +-
 include/llvm/Object/ELF.h                          |  158 +-
 include/llvm/Object/ELFObjectFile.h                |  133 +-
 include/llvm/Object/ELFTypes.h                     |    9 +-
 include/llvm/Object/Error.h                        |    7 +-
 include/llvm/Object/IRObjectFile.h                 |   10 +-
 include/llvm/Object/IRSymtab.h                     |   25 +-
 include/llvm/Object/MachO.h                        |   76 +-
 include/llvm/Object/MachOUniversal.h               |    7 +-
 include/llvm/Object/Minidump.h                     |  165 +
 include/llvm/Object/ModuleSymbolTable.h            |    7 +-
 include/llvm/Object/ObjectFile.h                   |   90 +-
 include/llvm/Object/RelocVisitor.h                 |  351 -
 include/llvm/Object/RelocationResolver.h           |   42 +
 include/llvm/Object/StackMapParser.h               |   50 +-
 include/llvm/Object/SymbolSize.h                   |    7 +-
 include/llvm/Object/SymbolicFile.h                 |   14 +-
 include/llvm/Object/Wasm.h                         |   80 +-
 include/llvm/Object/WasmTraits.h                   |    7 +-
 include/llvm/Object/WindowsMachineFlag.h           |   33 +
 include/llvm/Object/WindowsResource.h              |   47 +-
 include/llvm/Object/XCOFFObjectFile.h              |  268 +
 include/llvm/ObjectYAML/COFFYAML.h                 |    7 +-
 .../llvm/ObjectYAML/CodeViewYAMLDebugSections.h    |    7 +-
 include/llvm/ObjectYAML/CodeViewYAMLSymbols.h      |    7 +-
 include/llvm/ObjectYAML/CodeViewYAMLTypeHashing.h  |    7 +-
 include/llvm/ObjectYAML/CodeViewYAMLTypes.h        |    7 +-
 include/llvm/ObjectYAML/DWARFEmitter.h             |    7 +-
 include/llvm/ObjectYAML/DWARFYAML.h                |    7 +-
 include/llvm/ObjectYAML/ELFYAML.h                  |  152 +-
 include/llvm/ObjectYAML/MachOYAML.h                |    7 +-
 include/llvm/ObjectYAML/MinidumpYAML.h             |  239 +
 include/llvm/ObjectYAML/ObjectYAML.h               |    9 +-
 include/llvm/ObjectYAML/WasmYAML.h                 |   69 +-
 include/llvm/ObjectYAML/XCOFFYAML.h                |   71 +
 include/llvm/ObjectYAML/YAML.h                     |   10 +-
 include/llvm/Option/Arg.h                          |   31 +-
 include/llvm/Option/ArgList.h                      |   17 +-
 include/llvm/Option/OptParser.td                   |    7 +-
 include/llvm/Option/OptSpecifier.h                 |    7 +-
 include/llvm/Option/OptTable.h                     |    7 +-
 include/llvm/Option/Option.h                       |   12 +-
 include/llvm/Pass.h                                |    7 +-
 include/llvm/PassAnalysisSupport.h                 |    7 +-
 include/llvm/PassInfo.h                            |    7 +-
 include/llvm/PassRegistry.h                        |    7 +-
 include/llvm/PassSupport.h                         |    7 +-
 include/llvm/Passes/PassBuilder.h                  |  114 +-
 include/llvm/Passes/PassPlugin.h                   |    7 +-
 include/llvm/Passes/StandardInstrumentations.h     |    9 +-
 .../llvm/ProfileData/Coverage/CoverageMapping.h    |    7 +-
 .../ProfileData/Coverage/CoverageMappingReader.h   |   17 +-
 .../ProfileData/Coverage/CoverageMappingWriter.h   |    7 +-
 include/llvm/ProfileData/GCOV.h                    |   19 +-
 include/llvm/ProfileData/InstrProf.h               |  109 +-
 include/llvm/ProfileData/InstrProfData.inc         |   92 +-
 include/llvm/ProfileData/InstrProfReader.h         |   56 +-
 include/llvm/ProfileData/InstrProfWriter.h         |   38 +-
 include/llvm/ProfileData/ProfileCommon.h           |   10 +-
 include/llvm/ProfileData/SampleProf.h              |   50 +-
 include/llvm/ProfileData/SampleProfReader.h        |   16 +-
 include/llvm/ProfileData/SampleProfWriter.h        |    7 +-
 include/llvm/Remarks/Remark.h                      |  113 +
 include/llvm/Remarks/RemarkFormat.h                |   33 +
 include/llvm/Remarks/RemarkParser.h                |   77 +
 include/llvm/Remarks/RemarkSerializer.h            |   68 +
 include/llvm/Remarks/RemarkStringTable.h           |   59 +
 include/llvm/Support/AArch64TargetParser.def       |  109 +-
 include/llvm/Support/AArch64TargetParser.h         |   12 +-
 include/llvm/Support/AMDGPUMetadata.h              |   39 +-
 include/llvm/Support/AMDHSAKernelDescriptor.h      |   33 +-
 include/llvm/Support/ARMAttributeParser.h          |    9 +-
 include/llvm/Support/ARMBuildAttributes.h          |   13 +-
 include/llvm/Support/ARMEHABI.h                    |    7 +-
 include/llvm/Support/ARMTargetParser.def           |   22 +-
 include/llvm/Support/ARMTargetParser.h             |   19 +-
 include/llvm/Support/ARMWinEH.h                    |   11 +-
 include/llvm/Support/AlignOf.h                     |    7 +-
 include/llvm/Support/Allocator.h                   |    7 +-
 include/llvm/Support/ArrayRecycler.h               |    7 +-
 include/llvm/Support/Atomic.h                      |    7 +-
 include/llvm/Support/AtomicOrdering.h              |    7 +-
 include/llvm/Support/BinaryByteStream.h            |    7 +-
 include/llvm/Support/BinaryItemStream.h            |    7 +-
 include/llvm/Support/BinaryStream.h                |    7 +-
 include/llvm/Support/BinaryStreamArray.h           |    7 +-
 include/llvm/Support/BinaryStreamError.h           |    7 +-
 include/llvm/Support/BinaryStreamReader.h          |   19 +-
 include/llvm/Support/BinaryStreamRef.h             |    7 +-
 include/llvm/Support/BinaryStreamWriter.h          |   21 +-
 include/llvm/Support/BlockFrequency.h              |    7 +-
 include/llvm/Support/BranchProbability.h           |   35 +-
 include/llvm/Support/BuryPointer.h                 |    7 +-
 include/llvm/Support/CBindingWrapping.h            |    9 +-
 include/llvm/Support/CFGUpdate.h                   |    7 +-
 include/llvm/Support/COM.h                         |    7 +-
 include/llvm/Support/CRC.h                         |   25 +
 include/llvm/Support/CachePruning.h                |    7 +-
 include/llvm/Support/Capacity.h                    |    7 +-
 include/llvm/Support/Casting.h                     |   17 +-
 include/llvm/Support/CheckedArithmetic.h           |   16 +-
 include/llvm/Support/Chrono.h                      |   13 +-
 include/llvm/Support/CodeGen.h                     |   20 +-
 include/llvm/Support/CodeGenCoverage.h             |    7 +-
 include/llvm/Support/CommandLine.h                 |  137 +-
 include/llvm/Support/Compiler.h                    |   16 +-
 include/llvm/Support/Compression.h                 |    7 +-
 include/llvm/Support/ConvertUTF.h                  |    7 +-
 include/llvm/Support/CrashRecoveryContext.h        |    7 +-
 include/llvm/Support/DJB.h                         |    7 +-
 include/llvm/Support/DOTGraphTraits.h              |    9 +-
 include/llvm/Support/DataExtractor.h               |    7 +-
 include/llvm/Support/DataTypes.h                   |    7 +-
 include/llvm/Support/Debug.h                       |    7 +-
 include/llvm/Support/DebugCounter.h                |    7 +-
 include/llvm/Support/DynamicLibrary.h              |    7 +-
 include/llvm/Support/Endian.h                      |   28 +-
 include/llvm/Support/EndianStream.h                |    7 +-
 include/llvm/Support/Errc.h                        |    7 +-
 include/llvm/Support/Errno.h                       |    7 +-
 include/llvm/Support/Error.h                       |   60 +-
 include/llvm/Support/ErrorHandling.h               |    7 +-
 include/llvm/Support/ErrorOr.h                     |    7 +-
 include/llvm/Support/FileCheck.h                   |  557 +-
 include/llvm/Support/FileOutputBuffer.h            |   12 +-
 include/llvm/Support/FileSystem.h                  |  113 +-
 include/llvm/Support/FileUtilities.h               |    7 +-
 include/llvm/Support/Format.h                      |    7 +-
 include/llvm/Support/FormatAdapters.h              |    7 +-
 include/llvm/Support/FormatCommon.h                |    9 +-
 include/llvm/Support/FormatProviders.h             |    7 +-
 include/llvm/Support/FormatVariadic.h              |    7 +-
 include/llvm/Support/FormatVariadicDetails.h       |    7 +-
 include/llvm/Support/FormattedStream.h             |    7 +-
 include/llvm/Support/GenericDomTree.h              |   19 +-
 include/llvm/Support/GenericDomTreeConstruction.h  |  333 +-
 .../Support/GenericIteratedDominanceFrontier.h     |  209 +
 include/llvm/Support/GlobPattern.h                 |    7 +-
 include/llvm/Support/GraphWriter.h                 |    7 +-
 include/llvm/Support/Host.h                        |    7 +-
 include/llvm/Support/InitLLVM.h                    |   12 +-
 .../llvm/Support/ItaniumManglingCanonicalizer.h    |    7 +-
 include/llvm/Support/JSON.h                        |  180 +-
 include/llvm/Support/JamCRC.h                      |    7 +-
 include/llvm/Support/KnownBits.h                   |   40 +-
 include/llvm/Support/LEB128.h                      |   15 +-
 include/llvm/Support/LineIterator.h                |    7 +-
 include/llvm/Support/LockFileManager.h             |    7 +-
 include/llvm/Support/LowLevelTypeImpl.h            |   53 +-
 include/llvm/Support/MSVCErrorWorkarounds.h        |    7 +-
 include/llvm/Support/MachineValueType.h            |  287 +-
 include/llvm/Support/ManagedStatic.h               |   32 +-
 include/llvm/Support/MathExtras.h                  |   42 +-
 include/llvm/Support/MemAlloc.h                    |   31 +-
 include/llvm/Support/Memory.h                      |   46 +-
 include/llvm/Support/MemoryBuffer.h                |   13 +-
 include/llvm/Support/MipsABIFlags.h                |    7 +-
 include/llvm/Support/Mutex.h                       |    7 +-
 include/llvm/Support/MutexGuard.h                  |    7 +-
 include/llvm/Support/NativeFormatting.h            |    7 +-
 include/llvm/Support/OnDiskHashTable.h             |    7 +-
 include/llvm/Support/Options.h                     |    7 +-
 include/llvm/Support/Parallel.h                    |   11 +-
 include/llvm/Support/Path.h                        |    7 +-
 include/llvm/Support/PluginLoader.h                |    7 +-
 include/llvm/Support/PointerLikeTypeTraits.h       |    7 +-
 include/llvm/Support/PrettyStackTrace.h            |   21 +-
 include/llvm/Support/Printable.h                   |    7 +-
 include/llvm/Support/Process.h                     |   28 +-
 include/llvm/Support/Program.h                     |    7 +-
 include/llvm/Support/RWMutex.h                     |    7 +-
 include/llvm/Support/RandomNumberGenerator.h       |    7 +-
 include/llvm/Support/Recycler.h                    |    7 +-
 include/llvm/Support/RecyclingAllocator.h          |    7 +-
 include/llvm/Support/Regex.h                       |    7 +-
 include/llvm/Support/Registry.h                    |   13 +-
 include/llvm/Support/SHA1.h                        |    7 +-
 include/llvm/Support/SMLoc.h                       |    7 +-
 include/llvm/Support/SMTAPI.h                      |  447 +
 include/llvm/Support/SaveAndRestore.h              |    7 +-
 include/llvm/Support/ScalableSize.h                |   43 +
 include/llvm/Support/ScaledNumber.h                |   11 +-
 include/llvm/Support/ScopedPrinter.h               |    9 +-
 include/llvm/Support/Signals.h                     |   25 +-
 include/llvm/Support/Signposts.h                   |   43 +
 include/llvm/Support/SmallVectorMemoryBuffer.h     |    7 +-
 include/llvm/Support/Solaris/sys/regset.h          |    7 +-
 include/llvm/Support/SourceMgr.h                   |    9 +-
 include/llvm/Support/SpecialCaseList.h             |    7 +-
 include/llvm/Support/StringPool.h                  |    7 +-
 include/llvm/Support/StringSaver.h                 |    7 +-
 include/llvm/Support/SwapByteOrder.h               |   15 +-
 include/llvm/Support/SymbolRemappingReader.h       |    7 +-
 include/llvm/Support/SystemUtils.h                 |    7 +-
 include/llvm/Support/TarWriter.h                   |    7 +-
 include/llvm/Support/TargetOpcodes.def             |   70 +-
 include/llvm/Support/TargetParser.h                |   14 +-
 include/llvm/Support/TargetRegistry.h              |   18 +-
 include/llvm/Support/TargetSelect.h                |    7 +-
 include/llvm/Support/TaskQueue.h                   |    7 +-
 include/llvm/Support/ThreadLocal.h                 |    7 +-
 include/llvm/Support/ThreadPool.h                  |    7 +-
 include/llvm/Support/Threading.h                   |   23 +-
 include/llvm/Support/TimeProfiler.h                |   76 +
 include/llvm/Support/Timer.h                       |   14 +-
 include/llvm/Support/ToolOutputFile.h              |    7 +-
 include/llvm/Support/TrailingObjects.h             |    7 +-
 include/llvm/Support/TrigramIndex.h                |    7 +-
 include/llvm/Support/TypeName.h                    |    7 +-
 include/llvm/Support/Unicode.h                     |    7 +-
 include/llvm/Support/UnicodeCharRanges.h           |    7 +-
 include/llvm/Support/UniqueLock.h                  |    7 +-
 include/llvm/Support/Valgrind.h                    |    7 +-
 include/llvm/Support/VersionTuple.h                |    7 +-
 include/llvm/Support/VirtualFileSystem.h           |   34 +-
 include/llvm/Support/Watchdog.h                    |    7 +-
 include/llvm/Support/Win64EH.h                     |    7 +-
 include/llvm/Support/WindowsError.h                |    7 +-
 include/llvm/Support/WithColor.h                   |    7 +-
 .../llvm/Support/X86DisassemblerDecoderCommon.h    |   48 +-
 include/llvm/Support/X86TargetParser.def           |   16 +-
 include/llvm/Support/YAMLParser.h                  |    7 +-
 include/llvm/Support/YAMLTraits.h                  |   83 +-
 include/llvm/Support/circular_raw_ostream.h        |    7 +-
 include/llvm/Support/raw_os_ostream.h              |    7 +-
 include/llvm/Support/raw_ostream.h                 |    9 +-
 include/llvm/Support/raw_sha1_ostream.h            |    7 +-
 include/llvm/Support/thread.h                      |    7 +-
 include/llvm/Support/type_traits.h                 |  114 +-
 include/llvm/TableGen/Error.h                      |    7 +-
 include/llvm/TableGen/Main.h                       |    7 +-
 include/llvm/TableGen/Record.h                     |  100 +-
 include/llvm/TableGen/SearchableTable.td           |    7 +-
 include/llvm/TableGen/SetTheory.h                  |    7 +-
 include/llvm/TableGen/StringMatcher.h              |    7 +-
 include/llvm/TableGen/StringToOffsetTable.h        |    7 +-
 include/llvm/TableGen/TableGenBackend.h            |    9 +-
 include/llvm/Target/CodeGenCWrappers.h             |    7 +-
 include/llvm/Target/GenericOpcodes.td              |  195 +-
 include/llvm/Target/GlobalISel/RegisterBank.td     |    7 +-
 .../llvm/Target/GlobalISel/SelectionDAGCompat.td   |   23 +-
 include/llvm/Target/GlobalISel/Target.td           |    7 +-
 include/llvm/Target/Target.td                      |  109 +-
 include/llvm/Target/TargetCallingConv.td           |   21 +-
 include/llvm/Target/TargetInstrPredicate.td        |    7 +-
 include/llvm/Target/TargetIntrinsicInfo.h          |    7 +-
 include/llvm/Target/TargetItinerary.td             |    7 +-
 include/llvm/Target/TargetLoweringObjectFile.h     |   12 +-
 include/llvm/Target/TargetMachine.h                |   39 +-
 include/llvm/Target/TargetOptions.h                |   13 +-
 include/llvm/Target/TargetPfmCounters.td           |    7 +-
 include/llvm/Target/TargetSchedule.td              |    9 +-
 include/llvm/Target/TargetSelectionDAG.td          |  178 +-
 include/llvm/Testing/Support/Annotations.h         |   90 +
 include/llvm/Testing/Support/Error.h               |    7 +-
 include/llvm/Testing/Support/SupportHelpers.h      |   56 +-
 include/llvm/TextAPI/ELF/ELFStub.h                 |    7 +-
 include/llvm/TextAPI/ELF/TBEHandler.h              |    7 +-
 include/llvm/TextAPI/MachO/Architecture.def        |   38 +
 include/llvm/TextAPI/MachO/Architecture.h          |   47 +
 include/llvm/TextAPI/MachO/ArchitectureSet.h       |  159 +
 include/llvm/TextAPI/MachO/InterfaceFile.h         |  436 +
 include/llvm/TextAPI/MachO/PackedVersion.h         |   64 +
 include/llvm/TextAPI/MachO/Symbol.h                |   96 +
 include/llvm/TextAPI/MachO/TextAPIReader.h         |   34 +
 include/llvm/TextAPI/MachO/TextAPIWriter.h         |   29 +
 .../llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h  |    7 +-
 include/llvm/ToolDrivers/llvm-lib/LibDriver.h      |    8 +-
 .../AggressiveInstCombine/AggressiveInstCombine.h  |    7 +-
 include/llvm/Transforms/Coroutines.h               |    7 +-
 include/llvm/Transforms/IPO.h                      |   11 +-
 include/llvm/Transforms/IPO/AlwaysInliner.h        |    7 +-
 include/llvm/Transforms/IPO/ArgumentPromotion.h    |    7 +-
 include/llvm/Transforms/IPO/Attributor.h           |  789 ++
 .../llvm/Transforms/IPO/CalledValuePropagation.h   |    7 +-
 include/llvm/Transforms/IPO/ConstantMerge.h        |    7 +-
 include/llvm/Transforms/IPO/CrossDSOCFI.h          |    7 +-
 .../llvm/Transforms/IPO/DeadArgumentElimination.h  |    7 +-
 include/llvm/Transforms/IPO/ElimAvailExtern.h      |    7 +-
 include/llvm/Transforms/IPO/ForceFunctionAttrs.h   |    7 +-
 include/llvm/Transforms/IPO/FunctionAttrs.h        |    7 +-
 include/llvm/Transforms/IPO/FunctionImport.h       |    7 +-
 include/llvm/Transforms/IPO/GlobalDCE.h            |    7 +-
 include/llvm/Transforms/IPO/GlobalOpt.h            |    7 +-
 include/llvm/Transforms/IPO/GlobalSplit.h          |    7 +-
 include/llvm/Transforms/IPO/HotColdSplitting.h     |    7 +-
 include/llvm/Transforms/IPO/InferFunctionAttrs.h   |    7 +-
 include/llvm/Transforms/IPO/Inliner.h              |    7 +-
 include/llvm/Transforms/IPO/Internalize.h          |   13 +-
 include/llvm/Transforms/IPO/LowerTypeTests.h       |    7 +-
 include/llvm/Transforms/IPO/PartialInlining.h      |    7 +-
 include/llvm/Transforms/IPO/PassManagerBuilder.h   |   30 +-
 include/llvm/Transforms/IPO/SCCP.h                 |    7 +-
 include/llvm/Transforms/IPO/SampleProfile.h        |    7 +-
 include/llvm/Transforms/IPO/StripDeadPrototypes.h  |    7 +-
 include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h |    7 +-
 include/llvm/Transforms/IPO/WholeProgramDevirt.h   |    7 +-
 include/llvm/Transforms/InstCombine/InstCombine.h  |    7 +-
 .../Transforms/InstCombine/InstCombineWorklist.h   |    7 +-
 include/llvm/Transforms/Instrumentation.h          |   52 +-
 .../Transforms/Instrumentation/AddressSanitizer.h  |  143 +
 .../Transforms/Instrumentation/BoundsChecking.h    |    7 +-
 .../llvm/Transforms/Instrumentation/CGProfile.h    |    7 +-
 .../Instrumentation/ControlHeightReduction.h       |    7 +-
 .../llvm/Transforms/Instrumentation/GCOVProfiler.h |    7 +-
 .../Instrumentation/HWAddressSanitizer.h           |   41 +
 .../Transforms/Instrumentation/InstrOrderFile.h    |   28 +
 .../Transforms/Instrumentation/InstrProfiling.h    |   15 +-
 .../Transforms/Instrumentation/MemorySanitizer.h   |   30 +-
 .../Instrumentation/PGOInstrumentation.h           |   33 +-
 .../Transforms/Instrumentation/PoisonChecking.h    |   25 +
 .../Transforms/Instrumentation/ThreadSanitizer.h   |    9 +-
 include/llvm/Transforms/ObjCARC.h                  |    7 +-
 include/llvm/Transforms/Scalar.h                   |   38 +-
 include/llvm/Transforms/Scalar/ADCE.h              |    7 +-
 .../Transforms/Scalar/AlignmentFromAssumptions.h   |    7 +-
 include/llvm/Transforms/Scalar/BDCE.h              |    7 +-
 include/llvm/Transforms/Scalar/CallSiteSplitting.h |    7 +-
 include/llvm/Transforms/Scalar/ConstantHoisting.h  |   14 +-
 .../Transforms/Scalar/CorrelatedValuePropagation.h |    7 +-
 include/llvm/Transforms/Scalar/DCE.h               |    7 +-
 .../llvm/Transforms/Scalar/DeadStoreElimination.h  |    7 +-
 include/llvm/Transforms/Scalar/DivRemPairs.h       |    7 +-
 include/llvm/Transforms/Scalar/EarlyCSE.h          |    7 +-
 include/llvm/Transforms/Scalar/Float2Int.h         |    7 +-
 include/llvm/Transforms/Scalar/GVN.h               |    7 +-
 include/llvm/Transforms/Scalar/GVNExpression.h     |    7 +-
 include/llvm/Transforms/Scalar/GuardWidening.h     |   11 +-
 include/llvm/Transforms/Scalar/IVUsersPrinter.h    |    7 +-
 include/llvm/Transforms/Scalar/IndVarSimplify.h    |    7 +-
 .../Scalar/InductiveRangeCheckElimination.h        |    7 +-
 include/llvm/Transforms/Scalar/InstSimplifyPass.h  |    7 +-
 include/llvm/Transforms/Scalar/JumpThreading.h     |    9 +-
 include/llvm/Transforms/Scalar/LICM.h              |   19 +-
 .../Transforms/Scalar/LoopAccessAnalysisPrinter.h  |    7 +-
 include/llvm/Transforms/Scalar/LoopDataPrefetch.h  |    7 +-
 include/llvm/Transforms/Scalar/LoopDeletion.h      |    7 +-
 include/llvm/Transforms/Scalar/LoopDistribute.h    |    7 +-
 include/llvm/Transforms/Scalar/LoopFuse.h          |   30 +
 .../llvm/Transforms/Scalar/LoopIdiomRecognize.h    |    7 +-
 include/llvm/Transforms/Scalar/LoopInstSimplify.h  |    7 +-
 .../llvm/Transforms/Scalar/LoopLoadElimination.h   |    7 +-
 include/llvm/Transforms/Scalar/LoopPassManager.h   |    7 +-
 include/llvm/Transforms/Scalar/LoopPredication.h   |    7 +-
 include/llvm/Transforms/Scalar/LoopRotation.h      |    7 +-
 include/llvm/Transforms/Scalar/LoopSimplifyCFG.h   |    7 +-
 include/llvm/Transforms/Scalar/LoopSink.h          |    7 +-
 .../llvm/Transforms/Scalar/LoopStrengthReduce.h    |    7 +-
 .../llvm/Transforms/Scalar/LoopUnrollAndJamPass.h  |    7 +-
 include/llvm/Transforms/Scalar/LoopUnrollPass.h    |   31 +-
 include/llvm/Transforms/Scalar/LowerAtomic.h       |    7 +-
 .../llvm/Transforms/Scalar/LowerExpectIntrinsic.h  |    7 +-
 .../llvm/Transforms/Scalar/LowerGuardIntrinsic.h   |    7 +-
 .../Transforms/Scalar/LowerWidenableCondition.h    |   26 +
 .../llvm/Transforms/Scalar/MakeGuardsExplicit.h    |    7 +-
 include/llvm/Transforms/Scalar/MemCpyOptimizer.h   |    7 +-
 include/llvm/Transforms/Scalar/MergeICmps.h        |   25 +
 .../llvm/Transforms/Scalar/MergedLoadStoreMotion.h |    7 +-
 include/llvm/Transforms/Scalar/NaryReassociate.h   |    7 +-
 include/llvm/Transforms/Scalar/NewGVN.h            |    7 +-
 .../Transforms/Scalar/PartiallyInlineLibCalls.h    |    7 +-
 include/llvm/Transforms/Scalar/Reassociate.h       |   16 +-
 .../Transforms/Scalar/RewriteStatepointsForGC.h    |    7 +-
 include/llvm/Transforms/Scalar/SCCP.h              |    7 +-
 include/llvm/Transforms/Scalar/SROA.h              |    8 +-
 include/llvm/Transforms/Scalar/Scalarizer.h        |    7 +-
 .../llvm/Transforms/Scalar/SimpleLoopUnswitch.h    |    7 +-
 include/llvm/Transforms/Scalar/SimplifyCFG.h       |    7 +-
 include/llvm/Transforms/Scalar/Sink.h              |    7 +-
 .../llvm/Transforms/Scalar/SpeculateAroundPHIs.h   |    7 +-
 .../llvm/Transforms/Scalar/SpeculativeExecution.h  |    7 +-
 .../Transforms/Scalar/TailRecursionElimination.h   |    7 +-
 .../llvm/Transforms/Scalar/WarnMissedTransforms.h  |    7 +-
 include/llvm/Transforms/Utils.h                    |    7 +-
 .../llvm/Transforms/Utils/ASanStackFrameLayout.h   |    7 +-
 include/llvm/Transforms/Utils/AddDiscriminators.h  |    7 +-
 include/llvm/Transforms/Utils/BasicBlockUtils.h    |   58 +-
 include/llvm/Transforms/Utils/BreakCriticalEdges.h |    7 +-
 include/llvm/Transforms/Utils/BuildLibCalls.h      |   62 +-
 include/llvm/Transforms/Utils/BypassSlowDivision.h |    7 +-
 include/llvm/Transforms/Utils/CallPromotionUtils.h |    7 +-
 .../llvm/Transforms/Utils/CanonicalizeAliases.h    |    7 +-
 include/llvm/Transforms/Utils/Cloning.h            |   19 +-
 include/llvm/Transforms/Utils/CodeExtractor.h      |   21 +-
 include/llvm/Transforms/Utils/CtorUtils.h          |    7 +-
 .../llvm/Transforms/Utils/EntryExitInstrumenter.h  |    7 +-
 include/llvm/Transforms/Utils/EscapeEnumerator.h   |    7 +-
 include/llvm/Transforms/Utils/Evaluator.h          |    7 +-
 include/llvm/Transforms/Utils/FunctionComparator.h |    7 +-
 .../llvm/Transforms/Utils/FunctionImportUtils.h    |   12 +-
 include/llvm/Transforms/Utils/GlobalStatus.h       |    7 +-
 include/llvm/Transforms/Utils/GuardUtils.h         |    7 +-
 .../Utils/ImportedFunctionsInliningStatistics.h    |    9 +-
 include/llvm/Transforms/Utils/IntegerDivision.h    |    7 +-
 include/llvm/Transforms/Utils/LCSSA.h              |    7 +-
 include/llvm/Transforms/Utils/LibCallsShrinkWrap.h |    7 +-
 include/llvm/Transforms/Utils/Local.h              |   45 +-
 include/llvm/Transforms/Utils/LoopRotationUtils.h  |    7 +-
 include/llvm/Transforms/Utils/LoopSimplify.h       |   15 +-
 include/llvm/Transforms/Utils/LoopUtils.h          |   45 +-
 include/llvm/Transforms/Utils/LoopVersioning.h     |    7 +-
 include/llvm/Transforms/Utils/LowerInvoke.h        |    7 +-
 include/llvm/Transforms/Utils/LowerMemIntrinsics.h |    9 +-
 include/llvm/Transforms/Utils/Mem2Reg.h            |    7 +-
 include/llvm/Transforms/Utils/ModuleUtils.h        |   24 +-
 include/llvm/Transforms/Utils/NameAnonGlobals.h    |    7 +-
 include/llvm/Transforms/Utils/PredicateInfo.h      |    7 +-
 include/llvm/Transforms/Utils/PromoteMemToReg.h    |    7 +-
 include/llvm/Transforms/Utils/SSAUpdater.h         |   11 +-
 include/llvm/Transforms/Utils/SSAUpdaterBulk.h     |    7 +-
 include/llvm/Transforms/Utils/SSAUpdaterImpl.h     |    7 +-
 include/llvm/Transforms/Utils/SanitizerStats.h     |    7 +-
 include/llvm/Transforms/Utils/SimplifyIndVar.h     |    7 +-
 include/llvm/Transforms/Utils/SimplifyLibCalls.h   |   45 +-
 include/llvm/Transforms/Utils/SizeOpts.h           |   34 +
 include/llvm/Transforms/Utils/SplitModule.h        |    7 +-
 include/llvm/Transforms/Utils/SymbolRewriter.h     |    7 +-
 .../llvm/Transforms/Utils/UnifyFunctionExitNodes.h |    7 +-
 include/llvm/Transforms/Utils/UnrollLoop.h         |   48 +-
 include/llvm/Transforms/Utils/VNCoercion.h         |    7 +-
 include/llvm/Transforms/Utils/ValueMapper.h        |    7 +-
 include/llvm/Transforms/Vectorize.h                |   12 +-
 .../Transforms/Vectorize/LoadStoreVectorizer.h     |    7 +-
 .../Vectorize/LoopVectorizationLegality.h          |   60 +-
 include/llvm/Transforms/Vectorize/LoopVectorize.h  |   58 +-
 include/llvm/Transforms/Vectorize/SLPVectorizer.h  |    9 +-
 .../llvm/WindowsManifest/WindowsManifestMerger.h   |    7 +-
 include/llvm/WindowsResource/ResourceProcessor.h   |    7 +-
 include/llvm/WindowsResource/ResourceScriptToken.h |    7 +-
 .../llvm/WindowsResource/ResourceScriptTokenList.h |    7 +-
 include/llvm/XRay/BlockIndexer.h                   |    7 +-
 include/llvm/XRay/BlockPrinter.h                   |    7 +-
 include/llvm/XRay/BlockVerifier.h                  |    7 +-
 include/llvm/XRay/FDRLogBuilder.h                  |    7 +-
 include/llvm/XRay/FDRRecordConsumer.h              |    7 +-
 include/llvm/XRay/FDRRecordProducer.h              |    7 +-
 include/llvm/XRay/FDRRecords.h                     |    7 +-
 include/llvm/XRay/FDRTraceExpander.h               |    7 +-
 include/llvm/XRay/FDRTraceWriter.h                 |    7 +-
 include/llvm/XRay/FileHeaderReader.h               |    7 +-
 include/llvm/XRay/Graph.h                          |    7 +-
 include/llvm/XRay/InstrumentationMap.h             |    7 +-
 include/llvm/XRay/Profile.h                        |    7 +-
 include/llvm/XRay/RecordPrinter.h                  |    7 +-
 include/llvm/XRay/Trace.h                          |    7 +-
 include/llvm/XRay/XRayRecord.h                     |    7 +-
 include/llvm/XRay/YAMLXRayRecord.h                 |    7 +-
 include/llvm/module.modulemap                      |    5 +-
 lib/Analysis/AliasAnalysis.cpp                     |  136 +-
 lib/Analysis/AliasAnalysisEvaluator.cpp            |    7 +-
 lib/Analysis/AliasAnalysisSummary.cpp              |   18 +-
 lib/Analysis/AliasAnalysisSummary.h                |   22 +-
 lib/Analysis/AliasSetTracker.cpp                   |  131 +-
 lib/Analysis/Analysis.cpp                          |    7 +-
 lib/Analysis/AssumptionCache.cpp                   |   35 +-
 lib/Analysis/BasicAliasAnalysis.cpp                |  239 +-
 lib/Analysis/BlockFrequencyInfo.cpp                |   12 +-
 lib/Analysis/BlockFrequencyInfoImpl.cpp            |   18 +-
 lib/Analysis/BranchProbabilityInfo.cpp             |   15 +-
 lib/Analysis/CFG.cpp                               |   83 +-
 lib/Analysis/CFGPrinter.cpp                        |    7 +-
 lib/Analysis/CFLAndersAliasAnalysis.cpp            |   16 +-
 lib/Analysis/CFLGraph.h                            |   68 +-
 lib/Analysis/CFLSteensAliasAnalysis.cpp            |    7 +-
 lib/Analysis/CGSCCPassManager.cpp                  |   13 +-
 lib/Analysis/CallGraph.cpp                         |   32 +-
 lib/Analysis/CallGraphSCCPass.cpp                  |   94 +-
 lib/Analysis/CallPrinter.cpp                       |    7 +-
 lib/Analysis/CaptureTracking.cpp                   |   39 +-
 lib/Analysis/CmpInstAnalysis.cpp                   |    7 +-
 lib/Analysis/CodeMetrics.cpp                       |   18 +-
 lib/Analysis/ConstantFolding.cpp                   | 1099 +--
 lib/Analysis/CostModel.cpp                         |    7 +-
 lib/Analysis/Delinearization.cpp                   |    7 +-
 lib/Analysis/DemandedBits.cpp                      |   35 +-
 lib/Analysis/DependenceAnalysis.cpp                |   51 +-
 lib/Analysis/DivergenceAnalysis.cpp                |    7 +-
 lib/Analysis/DomPrinter.cpp                        |    7 +-
 lib/Analysis/DomTreeUpdater.cpp                    |  533 ++
 lib/Analysis/DominanceFrontier.cpp                 |    7 +-
 lib/Analysis/EHPersonalities.cpp                   |    7 +-
 lib/Analysis/GlobalsModRef.cpp                     |   39 +-
 lib/Analysis/GuardUtils.cpp                        |   36 +-
 lib/Analysis/IVDescriptors.cpp                     |   33 +-
 lib/Analysis/IVUsers.cpp                           |    7 +-
 lib/Analysis/IndirectCallPromotionAnalysis.cpp     |    7 +-
 lib/Analysis/InlineCost.cpp                        |  424 +-
 lib/Analysis/InstCount.cpp                         |    7 +-
 lib/Analysis/InstructionPrecedenceTracking.cpp     |   11 +-
 lib/Analysis/InstructionSimplify.cpp               |  713 +-
 lib/Analysis/Interval.cpp                          |    7 +-
 lib/Analysis/IntervalPartition.cpp                 |    7 +-
 lib/Analysis/IteratedDominanceFrontier.cpp         |  110 -
 lib/Analysis/LazyBlockFrequencyInfo.cpp            |    7 +-
 lib/Analysis/LazyBranchProbabilityInfo.cpp         |    7 +-
 lib/Analysis/LazyCallGraph.cpp                     |   20 +-
 lib/Analysis/LazyValueInfo.cpp                     |  192 +-
 lib/Analysis/LegacyDivergenceAnalysis.cpp          |    7 +-
 lib/Analysis/Lint.cpp                              |   15 +-
 lib/Analysis/Loads.cpp                             |   44 +-
 lib/Analysis/LoopAccessAnalysis.cpp                |   94 +-
 lib/Analysis/LoopAnalysisManager.cpp               |   14 +-
 lib/Analysis/LoopInfo.cpp                          |  353 +-
 lib/Analysis/LoopPass.cpp                          |   20 +-
 lib/Analysis/LoopUnrollAnalyzer.cpp                |    7 +-
 lib/Analysis/MemDepPrinter.cpp                     |    7 +-
 lib/Analysis/MemDerefPrinter.cpp                   |   12 +-
 lib/Analysis/MemoryBuiltins.cpp                    |  137 +-
 lib/Analysis/MemoryDependenceAnalysis.cpp          |   42 +-
 lib/Analysis/MemoryLocation.cpp                    |    7 +-
 lib/Analysis/MemorySSA.cpp                         |  315 +-
 lib/Analysis/MemorySSAUpdater.cpp                  |  239 +-
 lib/Analysis/ModuleDebugInfoPrinter.cpp            |    7 +-
 lib/Analysis/ModuleSummaryAnalysis.cpp             |  276 +-
 lib/Analysis/MustExecute.cpp                       |   16 +-
 lib/Analysis/ObjCARCAliasAnalysis.cpp              |   32 +-
 lib/Analysis/ObjCARCAnalysisUtils.cpp              |    7 +-
 lib/Analysis/ObjCARCInstKind.cpp                   |   42 +-
 lib/Analysis/OptimizationRemarkEmitter.cpp         |    7 +-
 lib/Analysis/OrderedBasicBlock.cpp                 |   31 +-
 lib/Analysis/OrderedInstructions.cpp               |    7 +-
 lib/Analysis/PHITransAddr.cpp                      |    7 +-
 lib/Analysis/PhiValues.cpp                         |    7 +-
 lib/Analysis/PostDominators.cpp                    |    7 +-
 lib/Analysis/ProfileSummaryInfo.cpp                |   26 +-
 lib/Analysis/PtrUseVisitor.cpp                     |   15 +-
 lib/Analysis/RegionInfo.cpp                        |    7 +-
 lib/Analysis/RegionPass.cpp                        |   16 +-
 lib/Analysis/RegionPrinter.cpp                     |    7 +-
 lib/Analysis/ScalarEvolution.cpp                   |  794 +-
 lib/Analysis/ScalarEvolutionAliasAnalysis.cpp      |   14 +-
 lib/Analysis/ScalarEvolutionExpander.cpp           |  267 +-
 lib/Analysis/ScalarEvolutionNormalization.cpp      |    7 +-
 lib/Analysis/ScopedNoAliasAA.cpp                   |   28 +-
 lib/Analysis/StackSafetyAnalysis.cpp               |   11 +-
 lib/Analysis/StratifiedSets.h                      |    7 +-
 lib/Analysis/SyncDependenceAnalysis.cpp            |   35 +-
 lib/Analysis/SyntheticCountsUtils.cpp              |    7 +-
 lib/Analysis/TargetLibraryInfo.cpp                 |  431 +-
 lib/Analysis/TargetTransformInfo.cpp               |  184 +-
 lib/Analysis/Trace.cpp                             |    7 +-
 lib/Analysis/TypeBasedAliasAnalysis.cpp            |   35 +-
 lib/Analysis/TypeMetadataUtils.cpp                 |    7 +-
 lib/Analysis/ValueLattice.cpp                      |    7 +-
 lib/Analysis/ValueLatticeUtils.cpp                 |    7 +-
 lib/Analysis/ValueTracking.cpp                     | 1204 ++-
 lib/Analysis/VectorUtils.cpp                       |  148 +-
 lib/AsmParser/LLLexer.cpp                          |   31 +-
 lib/AsmParser/LLLexer.h                            |    7 +-
 lib/AsmParser/LLParser.cpp                         |  711 +-
 lib/AsmParser/LLParser.h                           |   17 +-
 lib/AsmParser/LLToken.h                            |   20 +-
 lib/AsmParser/Parser.cpp                           |    7 +-
 lib/BinaryFormat/AMDGPUMetadataVerifier.cpp        |  160 +-
 lib/BinaryFormat/Dwarf.cpp                         |   13 +-
 lib/BinaryFormat/Magic.cpp                         |   21 +-
 lib/BinaryFormat/Minidump.cpp                      |   14 +
 lib/BinaryFormat/MsgPackDocument.cpp               |  245 +
 lib/BinaryFormat/MsgPackDocumentYAML.cpp           |  249 +
 lib/BinaryFormat/MsgPackReader.cpp                 |    7 +-
 lib/BinaryFormat/MsgPackTypes.cpp                  |  303 -
 lib/BinaryFormat/MsgPackWriter.cpp                 |    7 +-
 lib/BinaryFormat/Wasm.cpp                          |   29 +-
 lib/Bitcode/Reader/BitReader.cpp                   |    7 +-
 lib/Bitcode/Reader/BitcodeAnalyzer.cpp             |  980 ++
 lib/Bitcode/Reader/BitcodeReader.cpp               | 1261 ++-
 lib/Bitcode/Reader/BitstreamReader.cpp             |  390 -
 lib/Bitcode/Reader/MetadataLoader.cpp              |  269 +-
 lib/Bitcode/Reader/MetadataLoader.h                |    7 +-
 lib/Bitcode/Reader/ValueList.cpp                   |   31 +-
 lib/Bitcode/Reader/ValueList.h                     |   44 +-
 lib/Bitcode/Writer/BitWriter.cpp                   |    7 +-
 lib/Bitcode/Writer/BitcodeWriter.cpp               |  244 +-
 lib/Bitcode/Writer/BitcodeWriterPass.cpp           |    7 +-
 lib/Bitcode/Writer/ValueEnumerator.cpp             |   22 +-
 lib/Bitcode/Writer/ValueEnumerator.h               |    7 +-
 lib/Bitstream/Reader/BitstreamReader.cpp           |  510 ++
 lib/CodeGen/AggressiveAntiDepBreaker.cpp           |    7 +-
 lib/CodeGen/AggressiveAntiDepBreaker.h             |    7 +-
 lib/CodeGen/AllocationOrder.cpp                    |    7 +-
 lib/CodeGen/AllocationOrder.h                      |    7 +-
 lib/CodeGen/Analysis.cpp                           |   52 +-
 lib/CodeGen/AntiDepBreaker.h                       |    7 +-
 lib/CodeGen/AsmPrinter/ARMException.cpp            |    7 +-
 lib/CodeGen/AsmPrinter/AccelTable.cpp              |   46 +-
 lib/CodeGen/AsmPrinter/AddressPool.cpp             |   31 +-
 lib/CodeGen/AsmPrinter/AddressPool.h               |    9 +-
 lib/CodeGen/AsmPrinter/AsmPrinter.cpp              |  279 +-
 lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp         |   31 +-
 lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp     |  104 +-
 lib/CodeGen/AsmPrinter/ByteStreamer.h              |   17 +-
 lib/CodeGen/AsmPrinter/CodeViewDebug.cpp           |  371 +-
 lib/CodeGen/AsmPrinter/CodeViewDebug.h             |   24 +-
 lib/CodeGen/AsmPrinter/DIE.cpp                     |   26 +-
 lib/CodeGen/AsmPrinter/DIEHash.cpp                 |   10 +-
 lib/CodeGen/AsmPrinter/DIEHash.h                   |    7 +-
 .../AsmPrinter/DbgEntityHistoryCalculator.cpp      |  354 +-
 lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp        |   74 +-
 lib/CodeGen/AsmPrinter/DebugLocEntry.h             |  205 +-
 lib/CodeGen/AsmPrinter/DebugLocStream.cpp          |    7 +-
 lib/CodeGen/AsmPrinter/DebugLocStream.h            |    7 +-
 lib/CodeGen/AsmPrinter/DwarfCFIException.cpp       |    7 +-
 lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp        |  184 +-
 lib/CodeGen/AsmPrinter/DwarfCompileUnit.h          |   29 +-
 lib/CodeGen/AsmPrinter/DwarfDebug.cpp              |  668 +-
 lib/CodeGen/AsmPrinter/DwarfDebug.h                |   92 +-
 lib/CodeGen/AsmPrinter/DwarfException.h            |    7 +-
 lib/CodeGen/AsmPrinter/DwarfExpression.cpp         |  120 +-
 lib/CodeGen/AsmPrinter/DwarfExpression.h           |   86 +-
 lib/CodeGen/AsmPrinter/DwarfFile.cpp               |   17 +-
 lib/CodeGen/AsmPrinter/DwarfFile.h                 |   10 +-
 lib/CodeGen/AsmPrinter/DwarfStringPool.cpp         |    7 +-
 lib/CodeGen/AsmPrinter/DwarfStringPool.h           |    7 +-
 lib/CodeGen/AsmPrinter/DwarfUnit.cpp               |  197 +-
 lib/CodeGen/AsmPrinter/DwarfUnit.h                 |   38 +-
 lib/CodeGen/AsmPrinter/EHStreamer.cpp              |   20 +-
 lib/CodeGen/AsmPrinter/EHStreamer.h                |    7 +-
 lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp         |    7 +-
 lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp          |    7 +-
 lib/CodeGen/AsmPrinter/WasmException.cpp           |   11 +-
 lib/CodeGen/AsmPrinter/WasmException.h             |    7 +-
 lib/CodeGen/AsmPrinter/WinCFGuard.cpp              |    7 +-
 lib/CodeGen/AsmPrinter/WinCFGuard.h                |    7 +-
 lib/CodeGen/AsmPrinter/WinException.cpp            |   49 +-
 lib/CodeGen/AsmPrinter/WinException.h              |   10 +-
 lib/CodeGen/AtomicExpandPass.cpp                   |   70 +-
 lib/CodeGen/BasicTargetTransformInfo.cpp           |    7 +-
 lib/CodeGen/BranchFolding.cpp                      |   72 +-
 lib/CodeGen/BranchFolding.h                        |    7 +-
 lib/CodeGen/BranchRelaxation.cpp                   |    7 +-
 lib/CodeGen/BreakFalseDeps.cpp                     |    7 +-
 lib/CodeGen/BuiltinGCs.cpp                         |    7 +-
 lib/CodeGen/CFIInstrInserter.cpp                   |    7 +-
 lib/CodeGen/CalcSpillWeights.cpp                   |    7 +-
 lib/CodeGen/CallingConvLower.cpp                   |    7 +-
 lib/CodeGen/CodeGen.cpp                            |   10 +-
 lib/CodeGen/CodeGenPrepare.cpp                     |  523 +-
 lib/CodeGen/CriticalAntiDepBreaker.cpp             |    7 +-
 lib/CodeGen/CriticalAntiDepBreaker.h               |    7 +-
 lib/CodeGen/DFAPacketizer.cpp                      |    7 +-
 lib/CodeGen/DeadMachineInstructionElim.cpp         |   15 +-
 lib/CodeGen/DetectDeadLanes.cpp                    |    7 +-
 lib/CodeGen/DwarfEHPrepare.cpp                     |   11 +-
 lib/CodeGen/EarlyIfConversion.cpp                  |    7 +-
 lib/CodeGen/EdgeBundles.cpp                        |    9 +-
 lib/CodeGen/ExecutionDomainFix.cpp                 |   16 +-
 lib/CodeGen/ExpandISelPseudos.cpp                  |   74 -
 lib/CodeGen/ExpandMemCmp.cpp                       |   68 +-
 lib/CodeGen/ExpandPostRAPseudos.cpp                |    7 +-
 lib/CodeGen/ExpandReductions.cpp                   |   59 +-
 lib/CodeGen/FEntryInserter.cpp                     |    7 +-
 lib/CodeGen/FaultMaps.cpp                          |    7 +-
 lib/CodeGen/FinalizeISel.cpp                       |   76 +
 lib/CodeGen/FuncletLayout.cpp                      |    7 +-
 lib/CodeGen/GCMetadata.cpp                         |    7 +-
 lib/CodeGen/GCMetadataPrinter.cpp                  |    7 +-
 lib/CodeGen/GCRootLowering.cpp                     |    9 +-
 lib/CodeGen/GCStrategy.cpp                         |    7 +-
 lib/CodeGen/GlobalISel/CSEInfo.cpp                 |   47 +-
 lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp           |   21 +-
 lib/CodeGen/GlobalISel/CallLowering.cpp            |  154 +-
 lib/CodeGen/GlobalISel/Combiner.cpp                |   12 +-
 lib/CodeGen/GlobalISel/CombinerHelper.cpp          |  220 +-
 lib/CodeGen/GlobalISel/GISelChangeObserver.cpp     |    8 +-
 lib/CodeGen/GlobalISel/GlobalISel.cpp              |    7 +-
 lib/CodeGen/GlobalISel/IRTranslator.cpp            | 1284 ++-
 lib/CodeGen/GlobalISel/InstructionSelect.cpp       |   19 +-
 lib/CodeGen/GlobalISel/InstructionSelector.cpp     |   19 +-
 lib/CodeGen/GlobalISel/LegalityPredicates.cpp      |   86 +-
 lib/CodeGen/GlobalISel/LegalizeMutations.cpp       |   54 +-
 lib/CodeGen/GlobalISel/Legalizer.cpp               |   54 +-
 lib/CodeGen/GlobalISel/LegalizerHelper.cpp         | 2936 +++++-
 lib/CodeGen/GlobalISel/LegalizerInfo.cpp           |  186 +-
 lib/CodeGen/GlobalISel/Localizer.cpp               |  233 +-
 lib/CodeGen/GlobalISel/MachineIRBuilder.cpp        |  429 +-
 lib/CodeGen/GlobalISel/RegBankSelect.cpp           |  139 +-
 lib/CodeGen/GlobalISel/RegisterBank.cpp            |    7 +-
 lib/CodeGen/GlobalISel/RegisterBankInfo.cpp        |  115 +-
 lib/CodeGen/GlobalISel/Utils.cpp                   |  159 +-
 lib/CodeGen/GlobalMerge.cpp                        |   29 +-
 lib/CodeGen/HardwareLoops.cpp                      |  463 +
 lib/CodeGen/IfConversion.cpp                       |    9 +-
 lib/CodeGen/ImplicitNullChecks.cpp                 |   25 +-
 lib/CodeGen/IndirectBrExpandPass.cpp               |   15 +-
 lib/CodeGen/InlineSpiller.cpp                      |   52 +-
 lib/CodeGen/InterferenceCache.cpp                  |    7 +-
 lib/CodeGen/InterferenceCache.h                    |    7 +-
 lib/CodeGen/InterleavedAccessPass.cpp              |   19 +-
 lib/CodeGen/InterleavedLoadCombinePass.cpp         |   10 +-
 lib/CodeGen/IntrinsicLowering.cpp                  |  115 +-
 lib/CodeGen/LLVMTargetMachine.cpp                  |   16 +-
 lib/CodeGen/LatencyPriorityQueue.cpp               |    7 +-
 lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp      |    7 +-
 lib/CodeGen/LexicalScopes.cpp                      |    7 +-
 lib/CodeGen/LiveDebugValues.cpp                    |  720 +-
 lib/CodeGen/LiveDebugVariables.cpp                 |  181 +-
 lib/CodeGen/LiveDebugVariables.h                   |    7 +-
 lib/CodeGen/LiveInterval.cpp                       |   64 +-
 lib/CodeGen/LiveIntervalUnion.cpp                  |    7 +-
 lib/CodeGen/LiveIntervals.cpp                      |   13 +-
 lib/CodeGen/LivePhysRegs.cpp                       |    7 +-
 lib/CodeGen/LiveRangeCalc.cpp                      |   16 +-
 lib/CodeGen/LiveRangeCalc.h                        |    7 +-
 lib/CodeGen/LiveRangeEdit.cpp                      |    9 +-
 lib/CodeGen/LiveRangeShrink.cpp                    |    7 +-
 lib/CodeGen/LiveRangeUtils.h                       |    7 +-
 lib/CodeGen/LiveRegMatrix.cpp                      |    7 +-
 lib/CodeGen/LiveRegUnits.cpp                       |   23 +-
 lib/CodeGen/LiveStacks.cpp                         |    7 +-
 lib/CodeGen/LiveVariables.cpp                      |    9 +-
 lib/CodeGen/LocalStackSlotAllocation.cpp           |   23 +-
 lib/CodeGen/LoopTraversal.cpp                      |    7 +-
 lib/CodeGen/LowLevelType.cpp                       |    7 +-
 lib/CodeGen/LowerEmuTLS.cpp                        |    7 +-
 lib/CodeGen/MIRCanonicalizerPass.cpp               |   65 +-
 lib/CodeGen/MIRParser/MILexer.cpp                  |    8 +-
 lib/CodeGen/MIRParser/MILexer.h                    |    8 +-
 lib/CodeGen/MIRParser/MIParser.cpp                 |  574 +-
 lib/CodeGen/MIRParser/MIParser.h                   |  125 -
 lib/CodeGen/MIRParser/MIRParser.cpp                |  184 +-
 lib/CodeGen/MIRPrinter.cpp                         |   67 +-
 lib/CodeGen/MIRPrintingPass.cpp                    |    7 +-
 lib/CodeGen/MachineBasicBlock.cpp                  |   17 +-
 lib/CodeGen/MachineBlockFrequencyInfo.cpp          |    7 +-
 lib/CodeGen/MachineBlockPlacement.cpp              |  398 +-
 lib/CodeGen/MachineBranchProbabilityInfo.cpp       |    7 +-
 lib/CodeGen/MachineCSE.cpp                         |  181 +-
 lib/CodeGen/MachineCombiner.cpp                    |   26 +-
 lib/CodeGen/MachineCopyPropagation.cpp             |    7 +-
 lib/CodeGen/MachineDominanceFrontier.cpp           |    7 +-
 lib/CodeGen/MachineDominators.cpp                  |    7 +-
 lib/CodeGen/MachineFrameInfo.cpp                   |   18 +-
 lib/CodeGen/MachineFunction.cpp                    |   87 +-
 lib/CodeGen/MachineFunctionPass.cpp                |    7 +-
 lib/CodeGen/MachineFunctionPrinterPass.cpp         |    7 +-
 lib/CodeGen/MachineInstr.cpp                       |  128 +-
 lib/CodeGen/MachineInstrBundle.cpp                 |    7 +-
 lib/CodeGen/MachineLICM.cpp                        |    7 +-
 lib/CodeGen/MachineLoopInfo.cpp                    |    7 +-
 lib/CodeGen/MachineModuleInfo.cpp                  |   30 +-
 lib/CodeGen/MachineModuleInfoImpls.cpp             |    7 +-
 lib/CodeGen/MachineOperand.cpp                     |   29 +-
 lib/CodeGen/MachineOptimizationRemarkEmitter.cpp   |    7 +-
 lib/CodeGen/MachineOutliner.cpp                    |   42 +-
 lib/CodeGen/MachinePipeliner.cpp                   |  534 +-
 lib/CodeGen/MachinePostDominators.cpp              |    7 +-
 lib/CodeGen/MachineRegionInfo.cpp                  |    7 +-
 lib/CodeGen/MachineRegisterInfo.cpp                |   20 +-
 lib/CodeGen/MachineSSAUpdater.cpp                  |    7 +-
 lib/CodeGen/MachineScheduler.cpp                   |  144 +-
 lib/CodeGen/MachineSink.cpp                        |   17 +-
 lib/CodeGen/MachineTraceMetrics.cpp                |    7 +-
 lib/CodeGen/MachineVerifier.cpp                    |  510 +-
 lib/CodeGen/MacroFusion.cpp                        |   19 +-
 lib/CodeGen/OptimizePHIs.cpp                       |   14 +-
 lib/CodeGen/PHIElimination.cpp                     |    7 +-
 lib/CodeGen/PHIEliminationUtils.cpp                |    7 +-
 lib/CodeGen/PHIEliminationUtils.h                  |    7 +-
 lib/CodeGen/ParallelCG.cpp                         |    7 +-
 lib/CodeGen/PatchableFunction.cpp                  |    7 +-
 lib/CodeGen/PeepholeOptimizer.cpp                  |   24 +-
 lib/CodeGen/PostRAHazardRecognizer.cpp             |    7 +-
 lib/CodeGen/PostRASchedulerList.cpp                |    7 +-
 lib/CodeGen/PreISelIntrinsicLowering.cpp           |   13 +-
 lib/CodeGen/ProcessImplicitDefs.cpp                |    7 +-
 lib/CodeGen/PrologEpilogInserter.cpp               |  198 +-
 lib/CodeGen/PseudoSourceValue.cpp                  |    7 +-
 lib/CodeGen/ReachingDefAnalysis.cpp                |    7 +-
 lib/CodeGen/RegAllocBase.cpp                       |   23 +-
 lib/CodeGen/RegAllocBase.h                         |    7 +-
 lib/CodeGen/RegAllocBasic.cpp                      |    7 +-
 lib/CodeGen/RegAllocFast.cpp                       |  240 +-
 lib/CodeGen/RegAllocGreedy.cpp                     |   65 +-
 lib/CodeGen/RegAllocPBQP.cpp                       |    7 +-
 lib/CodeGen/RegUsageInfoCollector.cpp              |   90 +-
 lib/CodeGen/RegUsageInfoPropagate.cpp              |    7 +-
 lib/CodeGen/RegisterClassInfo.cpp                  |   11 +-
 lib/CodeGen/RegisterCoalescer.cpp                  |  140 +-
 lib/CodeGen/RegisterCoalescer.h                    |    7 +-
 lib/CodeGen/RegisterPressure.cpp                   |   15 +-
 lib/CodeGen/RegisterScavenging.cpp                 |   45 +-
 lib/CodeGen/RegisterUsageInfo.cpp                  |    7 +-
 lib/CodeGen/RenameIndependentSubregs.cpp           |    7 +-
 lib/CodeGen/ResetMachineFunctionPass.cpp           |    9 +-
 lib/CodeGen/SafeStack.cpp                          |   60 +-
 lib/CodeGen/SafeStackColoring.cpp                  |    7 +-
 lib/CodeGen/SafeStackColoring.h                    |    7 +-
 lib/CodeGen/SafeStackLayout.cpp                    |    7 +-
 lib/CodeGen/SafeStackLayout.h                      |    7 +-
 lib/CodeGen/ScalarizeMaskedMemIntrin.cpp           |  306 +-
 lib/CodeGen/ScheduleDAG.cpp                        |   47 +-
 lib/CodeGen/ScheduleDAGInstrs.cpp                  |   66 +-
 lib/CodeGen/ScheduleDAGPrinter.cpp                 |    7 +-
 lib/CodeGen/ScoreboardHazardRecognizer.cpp         |    7 +-
 lib/CodeGen/SelectionDAG/DAGCombiner.cpp           | 3287 +++++--
 lib/CodeGen/SelectionDAG/FastISel.cpp              |   81 +-
 lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp  |   83 +-
 lib/CodeGen/SelectionDAG/InstrEmitter.cpp          |   92 +-
 lib/CodeGen/SelectionDAG/InstrEmitter.h            |   14 +-
 lib/CodeGen/SelectionDAG/LegalizeDAG.cpp           |  472 +-
 lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp    |  168 +-
 lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp  |  447 +-
 lib/CodeGen/SelectionDAG/LegalizeTypes.cpp         |    8 +-
 lib/CodeGen/SelectionDAG/LegalizeTypes.h           |   50 +-
 lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp  |    7 +-
 lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp     |  181 +-
 lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp   |  646 +-
 lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp |   11 +-
 lib/CodeGen/SelectionDAG/SDNodeDbgValue.h          |   10 +-
 lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp       |   10 +-
 lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp     |   94 +-
 lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp    |  107 +-
 lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h      |    7 +-
 lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp       |    7 +-
 lib/CodeGen/SelectionDAG/SelectionDAG.cpp          | 1429 +--
 .../SelectionDAG/SelectionDAGAddressAnalysis.cpp   |  139 +-
 lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp   | 2305 ++---
 lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h     |  383 +-
 lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp    |  152 +-
 lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp      |  446 +-
 lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp   |    7 +-
 .../SelectionDAG/SelectionDAGTargetInfo.cpp        |    7 +-
 lib/CodeGen/SelectionDAG/StatepointLowering.cpp    |  109 +-
 lib/CodeGen/SelectionDAG/StatepointLowering.h      |   14 +-
 lib/CodeGen/SelectionDAG/TargetLowering.cpp        | 1723 +++-
 lib/CodeGen/ShadowStackGCLowering.cpp              |   13 +-
 lib/CodeGen/ShrinkWrap.cpp                         |   16 +-
 lib/CodeGen/SjLjEHPrepare.cpp                      |   31 +-
 lib/CodeGen/SlotIndexes.cpp                        |   24 +-
 lib/CodeGen/SpillPlacement.cpp                     |    7 +-
 lib/CodeGen/SpillPlacement.h                       |    7 +-
 lib/CodeGen/Spiller.h                              |    7 +-
 lib/CodeGen/SplitKit.cpp                           |   16 +-
 lib/CodeGen/SplitKit.h                             |    7 +-
 lib/CodeGen/StackColoring.cpp                      |   16 +-
 lib/CodeGen/StackMapLivenessAnalysis.cpp           |    7 +-
 lib/CodeGen/StackMaps.cpp                          |    7 +-
 lib/CodeGen/StackProtector.cpp                     |   70 +-
 lib/CodeGen/StackSlotColoring.cpp                  |   11 +-
 lib/CodeGen/SwiftErrorValueTracking.cpp            |  312 +
 lib/CodeGen/SwitchLoweringUtils.cpp                |  489 +
 lib/CodeGen/TailDuplication.cpp                    |    7 +-
 lib/CodeGen/TailDuplicator.cpp                     |   16 +-
 lib/CodeGen/TargetFrameLoweringImpl.cpp            |    7 +-
 lib/CodeGen/TargetInstrInfo.cpp                    |   41 +-
 lib/CodeGen/TargetLoweringBase.cpp                 |  137 +-
 lib/CodeGen/TargetLoweringObjectFileImpl.cpp       |   46 +-
 lib/CodeGen/TargetOptionsImpl.cpp                  |    7 +-
 lib/CodeGen/TargetPassConfig.cpp                   |  106 +-
 lib/CodeGen/TargetRegisterInfo.cpp                 |   13 +-
 lib/CodeGen/TargetSchedule.cpp                     |    7 +-
 lib/CodeGen/TargetSubtargetInfo.cpp                |   69 +-
 lib/CodeGen/TwoAddressInstructionPass.cpp          |   12 +-
 lib/CodeGen/UnreachableBlockElim.cpp               |   43 +-
 lib/CodeGen/ValueTypes.cpp                         |   43 +-
 lib/CodeGen/VirtRegMap.cpp                         |    9 +-
 lib/CodeGen/WasmEHPrepare.cpp                      |  180 +-
 lib/CodeGen/WinEHPrepare.cpp                       |   20 +-
 lib/CodeGen/XRayInstrumentation.cpp                |    9 +-
 .../CodeView/AppendingTypeTableBuilder.cpp         |   16 +-
 lib/DebugInfo/CodeView/CVSymbolVisitor.cpp         |   11 +-
 lib/DebugInfo/CodeView/CVTypeVisitor.cpp           |   26 +-
 lib/DebugInfo/CodeView/CodeViewError.cpp           |    9 +-
 lib/DebugInfo/CodeView/CodeViewRecordIO.cpp        |  173 +-
 .../CodeView/ContinuationRecordBuilder.cpp         |   20 +-
 .../CodeView/DebugChecksumsSubsection.cpp          |    7 +-
 lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp  |    7 +-
 lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp |    7 +-
 .../CodeView/DebugFrameDataSubsection.cpp          |    7 +-
 .../CodeView/DebugInlineeLinesSubsection.cpp       |    7 +-
 lib/DebugInfo/CodeView/DebugLinesSubsection.cpp    |    7 +-
 .../CodeView/DebugStringTableSubsection.cpp        |    7 +-
 lib/DebugInfo/CodeView/DebugSubsection.cpp         |    7 +-
 lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp   |    7 +-
 lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp  |    7 +-
 .../CodeView/DebugSymbolRVASubsection.cpp          |    7 +-
 lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp  |    9 +-
 lib/DebugInfo/CodeView/EnumTables.cpp              |   28 +-
 lib/DebugInfo/CodeView/Formatters.cpp              |    7 +-
 lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp  |   16 +-
 .../CodeView/LazyRandomTypeCollection.cpp          |    7 +-
 lib/DebugInfo/CodeView/Line.cpp                    |    7 +-
 lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp |   13 +-
 lib/DebugInfo/CodeView/RecordName.cpp              |    7 +-
 lib/DebugInfo/CodeView/RecordSerialization.cpp     |    7 +-
 lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp    |   18 +-
 lib/DebugInfo/CodeView/StringsAndChecksums.cpp     |    7 +-
 lib/DebugInfo/CodeView/SymbolDumper.cpp            |   42 +-
 lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp     |    7 +-
 lib/DebugInfo/CodeView/SymbolRecordMapping.cpp     |   19 +-
 lib/DebugInfo/CodeView/SymbolSerializer.cpp        |    7 +-
 lib/DebugInfo/CodeView/TypeDumpVisitor.cpp         |   11 +-
 lib/DebugInfo/CodeView/TypeHashing.cpp             |   15 +-
 lib/DebugInfo/CodeView/TypeIndex.cpp               |    7 +-
 lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp      |   13 +-
 lib/DebugInfo/CodeView/TypeRecordHelpers.cpp       |    7 +-
 lib/DebugInfo/CodeView/TypeRecordMapping.cpp       |  266 +-
 lib/DebugInfo/CodeView/TypeStreamMerger.cpp        |    7 +-
 lib/DebugInfo/CodeView/TypeTableCollection.cpp     |   13 +-
 .../DWARF/DWARFAbbreviationDeclaration.cpp         |   15 +-
 lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp      |   21 +-
 lib/DebugInfo/DWARF/DWARFAddressRange.cpp          |    7 +-
 lib/DebugInfo/DWARF/DWARFCompileUnit.cpp           |    7 +-
 lib/DebugInfo/DWARF/DWARFContext.cpp               |  250 +-
 lib/DebugInfo/DWARF/DWARFDataExtractor.cpp         |   23 +-
 lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp           |    9 +-
 lib/DebugInfo/DWARF/DWARFDebugAddr.cpp             |   34 +-
 lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp        |    7 +-
 lib/DebugInfo/DWARF/DWARFDebugAranges.cpp          |   26 +-
 lib/DebugInfo/DWARF/DWARFDebugFrame.cpp            |   20 +-
 lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp        |    7 +-
 lib/DebugInfo/DWARF/DWARFDebugLine.cpp             |  266 +-
 lib/DebugInfo/DWARF/DWARFDebugLoc.cpp              |   34 +-
 lib/DebugInfo/DWARF/DWARFDebugMacro.cpp            |    7 +-
 lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp         |    7 +-
 lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp        |    9 +-
 lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp         |   17 +-
 lib/DebugInfo/DWARF/DWARFDie.cpp                   |  134 +-
 lib/DebugInfo/DWARF/DWARFExpression.cpp            |   93 +-
 lib/DebugInfo/DWARF/DWARFFormValue.cpp             |   84 +-
 lib/DebugInfo/DWARF/DWARFGdbIndex.cpp              |   13 +-
 lib/DebugInfo/DWARF/DWARFListTable.cpp             |   11 +-
 lib/DebugInfo/DWARF/DWARFTypeUnit.cpp              |    7 +-
 lib/DebugInfo/DWARF/DWARFUnit.cpp                  |  147 +-
 lib/DebugInfo/DWARF/DWARFUnitIndex.cpp             |   14 +-
 lib/DebugInfo/DWARF/DWARFVerifier.cpp              |  125 +-
 lib/DebugInfo/GSYM/FunctionInfo.cpp                |   22 +
 lib/DebugInfo/GSYM/InlineInfo.cpp                  |   59 +
 lib/DebugInfo/GSYM/Range.cpp                       |   55 +
 lib/DebugInfo/MSF/MSFBuilder.cpp                   |    7 +-
 lib/DebugInfo/MSF/MSFCommon.cpp                    |    7 +-
 lib/DebugInfo/MSF/MSFError.cpp                     |    9 +-
 lib/DebugInfo/MSF/MappedBlockStream.cpp            |    7 +-
 lib/DebugInfo/PDB/DIA/DIADataStream.cpp            |    7 +-
 lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp      |    7 +-
 lib/DebugInfo/PDB/DIA/DIAEnumFrameData.cpp         |    7 +-
 lib/DebugInfo/PDB/DIA/DIAEnumInjectedSources.cpp   |    7 +-
 lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp       |    7 +-
 lib/DebugInfo/PDB/DIA/DIAEnumSectionContribs.cpp   |    7 +-
 lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp       |    7 +-
 lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp           |    7 +-
 lib/DebugInfo/PDB/DIA/DIAEnumTables.cpp            |    7 +-
 lib/DebugInfo/PDB/DIA/DIAFrameData.cpp             |    7 +-
 lib/DebugInfo/PDB/DIA/DIAInjectedSource.cpp        |   11 +-
 lib/DebugInfo/PDB/DIA/DIALineNumber.cpp            |    7 +-
 lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp             |    7 +-
 lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp        |    7 +-
 lib/DebugInfo/PDB/DIA/DIASession.cpp               |    7 +-
 lib/DebugInfo/PDB/DIA/DIASourceFile.cpp            |    7 +-
 lib/DebugInfo/PDB/DIA/DIATable.cpp                 |    7 +-
 lib/DebugInfo/PDB/GenericError.cpp                 |    9 +-
 lib/DebugInfo/PDB/IPDBSourceFile.cpp               |    7 +-
 lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp   |    7 +-
 .../PDB/Native/DbiModuleDescriptorBuilder.cpp      |   13 +-
 lib/DebugInfo/PDB/Native/DbiModuleList.cpp         |    7 +-
 lib/DebugInfo/PDB/Native/DbiStream.cpp             |  108 +-
 lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp      |    7 +-
 lib/DebugInfo/PDB/Native/EnumTables.cpp            |    7 +-
 lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp      |   22 +-
 lib/DebugInfo/PDB/Native/GlobalsStream.cpp         |    7 +-
 lib/DebugInfo/PDB/Native/Hash.cpp                  |    7 +-
 lib/DebugInfo/PDB/Native/HashTable.cpp             |    7 +-
 lib/DebugInfo/PDB/Native/InfoStream.cpp            |    7 +-
 lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp     |    7 +-
 lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp  |   65 +
 lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp     |   23 +-
 lib/DebugInfo/PDB/Native/NamedStreamMap.cpp        |   15 +-
 lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp |    7 +-
 lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp     |    7 +-
 .../PDB/Native/NativeEnumInjectedSources.cpp       |  120 +
 lib/DebugInfo/PDB/Native/NativeEnumModules.cpp     |    7 +-
 lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp       |    7 +-
 lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp       |    7 +-
 lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp       |    7 +-
 lib/DebugInfo/PDB/Native/NativeSession.cpp         |   20 +-
 .../PDB/Native/NativeSymbolEnumerator.cpp          |    7 +-
 lib/DebugInfo/PDB/Native/NativeTypeArray.cpp       |    7 +-
 lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp     |    7 +-
 lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp        |    7 +-
 lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp |    7 +-
 lib/DebugInfo/PDB/Native/NativeTypePointer.cpp     |    7 +-
 lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp         |    7 +-
 lib/DebugInfo/PDB/Native/PDBFile.cpp               |   95 +-
 lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp        |   12 +-
 lib/DebugInfo/PDB/Native/PDBStringTable.cpp        |    7 +-
 lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp |  139 +-
 lib/DebugInfo/PDB/Native/PublicsStream.cpp         |    7 +-
 lib/DebugInfo/PDB/Native/RawError.cpp              |    2 +
 lib/DebugInfo/PDB/Native/SymbolStream.cpp          |    7 +-
 lib/DebugInfo/PDB/Native/TpiHashing.cpp            |    7 +-
 lib/DebugInfo/PDB/Native/TpiStream.cpp             |   20 +-
 lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp      |   16 +-
 lib/DebugInfo/PDB/PDB.cpp                          |    7 +-
 lib/DebugInfo/PDB/PDBContext.cpp                   |   31 +-
 lib/DebugInfo/PDB/PDBExtras.cpp                    |   47 +-
 lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp          |    7 +-
 lib/DebugInfo/PDB/PDBSymDumper.cpp                 |    7 +-
 lib/DebugInfo/PDB/PDBSymbol.cpp                    |    7 +-
 lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp          |    7 +-
 lib/DebugInfo/PDB/PDBSymbolBlock.cpp               |    7 +-
 lib/DebugInfo/PDB/PDBSymbolCompiland.cpp           |   17 +-
 lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp    |    7 +-
 lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp        |    7 +-
 lib/DebugInfo/PDB/PDBSymbolCustom.cpp              |    7 +-
 lib/DebugInfo/PDB/PDBSymbolData.cpp                |    7 +-
 lib/DebugInfo/PDB/PDBSymbolExe.cpp                 |    7 +-
 lib/DebugInfo/PDB/PDBSymbolFunc.cpp                |    7 +-
 lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp        |    7 +-
 lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp      |    7 +-
 lib/DebugInfo/PDB/PDBSymbolLabel.cpp               |    7 +-
 lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp        |    7 +-
 lib/DebugInfo/PDB/PDBSymbolThunk.cpp               |    7 +-
 lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp           |    7 +-
 lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp       |    7 +-
 lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp         |    7 +-
 lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp          |    7 +-
 lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp       |    7 +-
 lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp            |    7 +-
 lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp          |    7 +-
 lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp     |    7 +-
 lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp     |    7 +-
 lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp         |    7 +-
 lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp         |    7 +-
 lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp         |    7 +-
 lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp             |    7 +-
 lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp          |    7 +-
 lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp     |    7 +-
 lib/DebugInfo/PDB/PDBSymbolUnknown.cpp             |    7 +-
 lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp      |    7 +-
 lib/DebugInfo/PDB/UDTLayout.cpp                    |    7 +-
 lib/DebugInfo/Symbolize/DIPrinter.cpp              |   38 +-
 lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp |  124 +-
 lib/DebugInfo/Symbolize/SymbolizableObjectFile.h   |   33 +-
 lib/DebugInfo/Symbolize/Symbolize.cpp              |  223 +-
 lib/Demangle/Demangle.cpp                          |   36 +
 lib/Demangle/ItaniumDemangle.cpp                   |    7 +-
 lib/Demangle/MicrosoftDemangle.cpp                 |  397 +-
 lib/Demangle/MicrosoftDemangleNodes.cpp            |   35 +-
 lib/ExecutionEngine/ExecutionEngine.cpp            |   60 +-
 lib/ExecutionEngine/ExecutionEngineBindings.cpp    |    7 +-
 lib/ExecutionEngine/GDBRegistrationListener.cpp    |    7 +-
 .../IntelJITEvents/IntelJITEventListener.cpp       |   21 +-
 .../IntelJITEvents/IntelJITEventsWrapper.h         |    7 +-
 .../IntelJITEvents/ittnotify_config.h              |    7 +-
 .../IntelJITEvents/ittnotify_types.h               |    7 +-
 lib/ExecutionEngine/IntelJITEvents/jitprofiling.c  |    7 +-
 lib/ExecutionEngine/IntelJITEvents/jitprofiling.h  |    7 +-
 lib/ExecutionEngine/Interpreter/Execution.cpp      |   63 +-
 .../Interpreter/ExternalFunctions.cpp              |    7 +-
 lib/ExecutionEngine/Interpreter/Interpreter.cpp    |    7 +-
 lib/ExecutionEngine/Interpreter/Interpreter.h      |    8 +-
 .../JITLink/BasicGOTAndStubsBuilder.h              |   82 +
 lib/ExecutionEngine/JITLink/EHFrameSupport.cpp     |  544 ++
 lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h   |   72 +
 lib/ExecutionEngine/JITLink/JITLink.cpp            |  172 +
 lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp     |  481 +
 lib/ExecutionEngine/JITLink/JITLinkGeneric.h       |  256 +
 .../JITLink/JITLinkMemoryManager.cpp               |  105 +
 lib/ExecutionEngine/JITLink/MachO.cpp              |   78 +
 .../JITLink/MachOAtomGraphBuilder.cpp              |  411 +
 .../JITLink/MachOAtomGraphBuilder.h                |  138 +
 lib/ExecutionEngine/JITLink/MachO_x86_64.cpp       |  608 ++
 lib/ExecutionEngine/MCJIT/MCJIT.cpp                |    7 +-
 lib/ExecutionEngine/MCJIT/MCJIT.h                  |    7 +-
 .../OProfileJIT/OProfileJITEventListener.cpp       |    7 +-
 .../OProfileJIT/OProfileWrapper.cpp                |    7 +-
 lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp   |    7 +-
 lib/ExecutionEngine/Orc/CompileUtils.cpp           |   86 +
 lib/ExecutionEngine/Orc/Core.cpp                   | 1022 +--
 lib/ExecutionEngine/Orc/ExecutionUtils.cpp         |   32 +-
 lib/ExecutionEngine/Orc/IRCompileLayer.cpp         |    7 +-
 lib/ExecutionEngine/Orc/IRTransformLayer.cpp       |    7 +-
 lib/ExecutionEngine/Orc/IndirectionUtils.cpp       |   15 +-
 .../Orc/JITTargetMachineBuilder.cpp                |    7 +-
 lib/ExecutionEngine/Orc/LLJIT.cpp                  |  262 +-
 lib/ExecutionEngine/Orc/Layer.cpp                  |   17 +-
 lib/ExecutionEngine/Orc/LazyReexports.cpp          |   20 +-
 lib/ExecutionEngine/Orc/Legacy.cpp                 |   10 +-
 lib/ExecutionEngine/Orc/NullResolver.cpp           |    7 +-
 lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp     |  483 +
 lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp   |    7 +-
 lib/ExecutionEngine/Orc/OrcABISupport.cpp          |   17 +-
 lib/ExecutionEngine/Orc/OrcCBindings.cpp           |    7 +-
 lib/ExecutionEngine/Orc/OrcCBindingsStack.h        |   64 +-
 lib/ExecutionEngine/Orc/OrcError.cpp               |    7 +-
 lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp    |   10 +-
 lib/ExecutionEngine/Orc/OrcMCJITReplacement.h      |   32 +-
 lib/ExecutionEngine/Orc/RPCUtils.cpp               |    7 +-
 .../Orc/RTDyldObjectLinkingLayer.cpp               |   66 +-
 lib/ExecutionEngine/Orc/ThreadSafeModule.cpp       |    7 +-
 .../PerfJITEvents/PerfJITEventListener.cpp         |   27 +-
 lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp      |    7 +-
 .../RuntimeDyld/RTDyldMemoryManager.cpp            |   16 +-
 lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp    |  147 +-
 .../RuntimeDyld/RuntimeDyldCOFF.cpp                |    7 +-
 lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h  |    7 +-
 .../RuntimeDyld/RuntimeDyldChecker.cpp             |  354 +-
 .../RuntimeDyld/RuntimeDyldCheckerImpl.h           |   59 +-
 lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp |   10 +-
 lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h   |    9 +-
 lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h  |   55 +-
 .../RuntimeDyld/RuntimeDyldMachO.cpp               |    7 +-
 lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h |    7 +-
 .../RuntimeDyld/Targets/RuntimeDyldCOFFI386.h      |   15 +-
 .../RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h     |   28 +-
 .../RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h    |   23 +-
 .../RuntimeDyld/Targets/RuntimeDyldELFMips.cpp     |    7 +-
 .../RuntimeDyld/Targets/RuntimeDyldELFMips.h       |    7 +-
 .../RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h  |    9 +-
 .../RuntimeDyld/Targets/RuntimeDyldMachOARM.h      |   11 +-
 .../RuntimeDyld/Targets/RuntimeDyldMachOI386.h     |    9 +-
 .../RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h   |   11 +-
 lib/ExecutionEngine/SectionMemoryManager.cpp       |   34 +-
 lib/ExecutionEngine/TargetSelect.cpp               |    7 +-
 lib/FuzzMutate/FuzzerCLI.cpp                       |    7 +-
 lib/FuzzMutate/IRMutator.cpp                       |    7 +-
 lib/FuzzMutate/OpDescriptor.cpp                    |    7 +-
 lib/FuzzMutate/Operations.cpp                      |    7 +-
 lib/FuzzMutate/RandomIRBuilder.cpp                 |   10 +-
 lib/IR/AbstractCallSite.cpp                        |  134 +
 lib/IR/AsmWriter.cpp                               |  150 +-
 lib/IR/AttributeImpl.h                             |   51 +-
 lib/IR/Attributes.cpp                              |  150 +-
 lib/IR/AutoUpgrade.cpp                             |  406 +-
 lib/IR/BasicBlock.cpp                              |   63 +-
 lib/IR/Comdat.cpp                                  |    7 +-
 lib/IR/ConstantFold.cpp                            |  134 +-
 lib/IR/ConstantFold.h                              |    8 +-
 lib/IR/ConstantRange.cpp                           |  892 +-
 lib/IR/Constants.cpp                               |   61 +-
 lib/IR/ConstantsContext.h                          |    7 +-
 lib/IR/Core.cpp                                    |  158 +-
 lib/IR/DIBuilder.cpp                               |   18 +-
 lib/IR/DataLayout.cpp                              |   36 +-
 lib/IR/DebugInfo.cpp                               |  107 +-
 lib/IR/DebugInfoMetadata.cpp                       |  121 +-
 lib/IR/DebugLoc.cpp                                |    7 +-
 lib/IR/DiagnosticHandler.cpp                       |    7 +-
 lib/IR/DiagnosticInfo.cpp                          |   87 +-
 lib/IR/DiagnosticPrinter.cpp                       |    7 +-
 lib/IR/DomTreeUpdater.cpp                          |  529 --
 lib/IR/Dominators.cpp                              |    7 +-
 lib/IR/Function.cpp                                |  218 +-
 lib/IR/GVMaterializer.cpp                          |    7 +-
 lib/IR/Globals.cpp                                 |   35 +-
 lib/IR/IRBuilder.cpp                               |   55 +-
 lib/IR/IRPrintingPasses.cpp                        |    7 +-
 lib/IR/InlineAsm.cpp                               |    7 +-
 lib/IR/Instruction.cpp                             |   76 +-
 lib/IR/Instructions.cpp                            |  341 +-
 lib/IR/IntrinsicInst.cpp                           |  116 +-
 lib/IR/LLVMContext.cpp                             |   35 +-
 lib/IR/LLVMContextImpl.cpp                         |    7 +-
 lib/IR/LLVMContextImpl.h                           |   48 +-
 lib/IR/LegacyPassManager.cpp                       |   26 +-
 lib/IR/MDBuilder.cpp                               |   57 +-
 lib/IR/Mangler.cpp                                 |   13 +-
 lib/IR/Metadata.cpp                                |   12 +-
 lib/IR/MetadataImpl.h                              |    7 +-
 lib/IR/Module.cpp                                  |   32 +-
 lib/IR/ModuleSummaryIndex.cpp                      |  192 +-
 lib/IR/Operator.cpp                                |    7 +-
 lib/IR/OptBisect.cpp                               |   83 +-
 lib/IR/Pass.cpp                                    |   33 +-
 lib/IR/PassInstrumentation.cpp                     |    7 +-
 lib/IR/PassManager.cpp                             |    7 +-
 lib/IR/PassRegistry.cpp                            |    7 +-
 lib/IR/PassTimingInfo.cpp                          |   28 +-
 lib/IR/ProfileSummary.cpp                          |   13 +-
 lib/IR/RemarkStreamer.cpp                          |  154 +
 lib/IR/SafepointIRVerifier.cpp                     |   18 +-
 lib/IR/Statepoint.cpp                              |   37 +-
 lib/IR/SymbolTableListTraitsImpl.h                 |   10 +-
 lib/IR/Type.cpp                                    |   38 +-
 lib/IR/TypeFinder.cpp                              |    7 +-
 lib/IR/Use.cpp                                     |    7 +-
 lib/IR/User.cpp                                    |    7 +-
 lib/IR/Value.cpp                                   |   85 +-
 lib/IR/ValueSymbolTable.cpp                        |    7 +-
 lib/IR/Verifier.cpp                                |  451 +-
 lib/IRReader/IRReader.cpp                          |   13 +-
 lib/LTO/Caching.cpp                                |   31 +-
 lib/LTO/LTO.cpp                                    |  193 +-
 lib/LTO/LTOBackend.cpp                             |   41 +-
 lib/LTO/LTOCodeGenerator.cpp                       |   60 +-
 lib/LTO/LTOModule.cpp                              |   38 +-
 lib/LTO/SummaryBasedOptimizations.cpp              |    7 +-
 lib/LTO/ThinLTOCodeGenerator.cpp                   |  207 +-
 lib/LTO/UpdateCompilerUsed.cpp                     |    7 +-
 lib/LineEditor/LineEditor.cpp                      |    7 +-
 lib/Linker/IRMover.cpp                             |   74 +-
 lib/Linker/LinkDiagnosticInfo.h                    |    7 +-
 lib/Linker/LinkModules.cpp                         |    7 +-
 lib/MC/ConstantPools.cpp                           |    7 +-
 lib/MC/ELFObjectWriter.cpp                         |   59 +-
 lib/MC/MCAsmBackend.cpp                            |   12 +-
 lib/MC/MCAsmInfo.cpp                               |   11 +-
 lib/MC/MCAsmInfoCOFF.cpp                           |    7 +-
 lib/MC/MCAsmInfoDarwin.cpp                         |    7 +-
 lib/MC/MCAsmInfoELF.cpp                            |    7 +-
 lib/MC/MCAsmInfoWasm.cpp                           |    8 +-
 lib/MC/MCAsmInfoXCOFF.cpp                          |   18 +
 lib/MC/MCAsmMacro.cpp                              |    7 +-
 lib/MC/MCAsmStreamer.cpp                           |   61 +-
 lib/MC/MCAssembler.cpp                             |   24 +-
 lib/MC/MCCodeEmitter.cpp                           |    7 +-
 lib/MC/MCCodePadder.cpp                            |    7 +-
 lib/MC/MCCodeView.cpp                              |    7 +-
 lib/MC/MCContext.cpp                               |   97 +-
 lib/MC/MCDisassembler/Disassembler.cpp             |   35 +-
 lib/MC/MCDisassembler/Disassembler.h               |   41 +-
 lib/MC/MCDisassembler/MCDisassembler.cpp           |   16 +-
 lib/MC/MCDisassembler/MCExternalSymbolizer.cpp     |    7 +-
 lib/MC/MCDisassembler/MCRelocationInfo.cpp         |    7 +-
 lib/MC/MCDisassembler/MCSymbolizer.cpp             |    7 +-
 lib/MC/MCDwarf.cpp                                 |  108 +-
 lib/MC/MCELFObjectTargetWriter.cpp                 |   12 +-
 lib/MC/MCELFStreamer.cpp                           |   10 +-
 lib/MC/MCExpr.cpp                                  |   47 +-
 lib/MC/MCFragment.cpp                              |    7 +-
 lib/MC/MCInst.cpp                                  |    7 +-
 lib/MC/MCInstPrinter.cpp                           |   13 +-
 lib/MC/MCInstrAnalysis.cpp                         |    7 +-
 lib/MC/MCInstrDesc.cpp                             |    7 +-
 lib/MC/MCLabel.cpp                                 |    7 +-
 lib/MC/MCLinkerOptimizationHint.cpp                |    7 +-
 lib/MC/MCMachOStreamer.cpp                         |   11 +-
 lib/MC/MCMachObjectTargetWriter.cpp                |    7 +-
 lib/MC/MCNullStreamer.cpp                          |    7 +-
 lib/MC/MCObjectFileInfo.cpp                        |   28 +-
 lib/MC/MCObjectStreamer.cpp                        |    9 +-
 lib/MC/MCObjectWriter.cpp                          |    7 +-
 lib/MC/MCParser/AsmLexer.cpp                       |   32 +-
 lib/MC/MCParser/AsmParser.cpp                      |   69 +-
 lib/MC/MCParser/COFFAsmParser.cpp                  |    7 +-
 lib/MC/MCParser/DarwinAsmParser.cpp                |    9 +-
 lib/MC/MCParser/ELFAsmParser.cpp                   |   11 +-
 lib/MC/MCParser/MCAsmLexer.cpp                     |    7 +-
 lib/MC/MCParser/MCAsmParser.cpp                    |    7 +-
 lib/MC/MCParser/MCAsmParserExtension.cpp           |    7 +-
 lib/MC/MCParser/MCTargetAsmParser.cpp              |    7 +-
 lib/MC/MCParser/WasmAsmParser.cpp                  |  174 +-
 lib/MC/MCRegisterInfo.cpp                          |    7 +-
 lib/MC/MCSchedule.cpp                              |   23 +-
 lib/MC/MCSection.cpp                               |    7 +-
 lib/MC/MCSectionCOFF.cpp                           |    9 +-
 lib/MC/MCSectionELF.cpp                            |   13 +-
 lib/MC/MCSectionMachO.cpp                          |    7 +-
 lib/MC/MCSectionWasm.cpp                           |   16 +-
 lib/MC/MCSectionXCOFF.cpp                          |   33 +
 lib/MC/MCStreamer.cpp                              |   32 +-
 lib/MC/MCSubtargetInfo.cpp                         |  251 +-
 lib/MC/MCSymbol.cpp                                |    7 +-
 lib/MC/MCSymbolELF.cpp                             |   17 +-
 lib/MC/MCTargetOptions.cpp                         |   18 +-
 lib/MC/MCValue.cpp                                 |    7 +-
 lib/MC/MCWasmObjectTargetWriter.cpp                |   11 +-
 lib/MC/MCWasmStreamer.cpp                          |   26 +-
 lib/MC/MCWin64EH.cpp                               |  108 +-
 lib/MC/MCWinCOFFStreamer.cpp                       |   11 +-
 lib/MC/MCWinEH.cpp                                 |    7 +-
 lib/MC/MCXCOFFObjectTargetWriter.cpp               |   16 +
 lib/MC/MCXCOFFStreamer.cpp                         |   59 +
 lib/MC/MachObjectWriter.cpp                        |   22 +-
 lib/MC/StringTableBuilder.cpp                      |   14 +-
 lib/MC/SubtargetFeature.cpp                        |  206 +-
 lib/MC/WasmObjectWriter.cpp                        |  423 +-
 lib/MC/WinCOFFObjectWriter.cpp                     |    9 +-
 lib/MC/XCOFFObjectWriter.cpp                       |   94 +
 lib/MCA/Context.cpp                                |   14 +-
 lib/MCA/HWEventListener.cpp                        |    7 +-
 lib/MCA/HardwareUnits/HardwareUnit.cpp             |    7 +-
 lib/MCA/HardwareUnits/LSUnit.cpp                   |  256 +-
 lib/MCA/HardwareUnits/RegisterFile.cpp             |   53 +-
 lib/MCA/HardwareUnits/ResourceManager.cpp          |   98 +-
 lib/MCA/HardwareUnits/RetireControlUnit.cpp        |    7 +-
 lib/MCA/HardwareUnits/Scheduler.cpp                |  166 +-
 lib/MCA/InstrBuilder.cpp                           |   37 +-
 lib/MCA/Instruction.cpp                            |  117 +-
 lib/MCA/Pipeline.cpp                               |   12 +-
 lib/MCA/Stages/DispatchStage.cpp                   |   51 +-
 lib/MCA/Stages/EntryStage.cpp                      |   11 +-
 lib/MCA/Stages/ExecuteStage.cpp                    |   83 +-
 lib/MCA/Stages/InstructionTables.cpp               |    7 +-
 lib/MCA/Stages/MicroOpQueueStage.cpp               |   70 +
 lib/MCA/Stages/RetireStage.cpp                     |    7 +-
 lib/MCA/Stages/Stage.cpp                           |    7 +-
 lib/MCA/Support.cpp                                |   28 +-
 lib/Object/Archive.cpp                             |   18 +-
 lib/Object/ArchiveWriter.cpp                       |  149 +-
 lib/Object/Binary.cpp                              |   12 +-
 lib/Object/COFFImportFile.cpp                      |   14 +-
 lib/Object/COFFModuleDefinition.cpp                |    7 +-
 lib/Object/COFFObjectFile.cpp                      |   53 +-
 lib/Object/Decompressor.cpp                        |    7 +-
 lib/Object/ELF.cpp                                 |   39 +-
 lib/Object/ELFObjectFile.cpp                       |   58 +-
 lib/Object/Error.cpp                               |   30 +-
 lib/Object/IRObjectFile.cpp                        |   22 +-
 lib/Object/IRSymtab.cpp                            |   25 +-
 lib/Object/MachOObjectFile.cpp                     |  445 +-
 lib/Object/MachOUniversal.cpp                      |    7 +-
 lib/Object/Minidump.cpp                            |  137 +
 lib/Object/ModuleSymbolTable.cpp                   |    7 +-
 lib/Object/Object.cpp                              |  132 +-
 lib/Object/ObjectFile.cpp                          |   24 +-
 lib/Object/RecordStreamer.cpp                      |    9 +-
 lib/Object/RecordStreamer.h                        |   19 +-
 lib/Object/RelocationResolver.cpp                  |  550 ++
 lib/Object/SymbolSize.cpp                          |    7 +-
 lib/Object/SymbolicFile.cpp                        |   10 +-
 lib/Object/WasmObjectFile.cpp                      |  332 +-
 lib/Object/WindowsMachineFlag.cpp                  |   44 +
 lib/Object/WindowsResource.cpp                     |  203 +-
 lib/Object/XCOFFObjectFile.cpp                     |  584 ++
 lib/ObjectYAML/COFFYAML.cpp                        |   16 +-
 lib/ObjectYAML/CodeViewYAMLDebugSections.cpp       |    7 +-
 lib/ObjectYAML/CodeViewYAMLSymbols.cpp             |   17 +-
 lib/ObjectYAML/CodeViewYAMLTypeHashing.cpp         |    7 +-
 lib/ObjectYAML/CodeViewYAMLTypes.cpp               |   11 +-
 lib/ObjectYAML/DWARFEmitter.cpp                    |    7 +-
 lib/ObjectYAML/DWARFVisitor.cpp                    |    7 +-
 lib/ObjectYAML/DWARFVisitor.h                      |    7 +-
 lib/ObjectYAML/DWARFYAML.cpp                       |    7 +-
 lib/ObjectYAML/ELFYAML.cpp                         |  244 +-
 lib/ObjectYAML/MachOYAML.cpp                       |    7 +-
 lib/ObjectYAML/MinidumpYAML.cpp                    |  673 ++
 lib/ObjectYAML/ObjectYAML.cpp                      |   22 +-
 lib/ObjectYAML/WasmYAML.cpp                        |   80 +-
 lib/ObjectYAML/XCOFFYAML.cpp                       |  109 +
 lib/ObjectYAML/YAML.cpp                            |   14 +-
 lib/OptRemarks/OptRemarksParser.cpp                |  368 -
 lib/Option/Arg.cpp                                 |   10 +-
 lib/Option/ArgList.cpp                             |   22 +-
 lib/Option/OptTable.cpp                            |   81 +-
 lib/Option/Option.cpp                              |  120 +-
 lib/Passes/PassBuilder.cpp                         |  375 +-
 lib/Passes/PassPlugin.cpp                          |    7 +-
 lib/Passes/PassRegistry.def                        |   59 +-
 lib/Passes/StandardInstrumentations.cpp            |    7 +-
 lib/ProfileData/Coverage/CoverageMapping.cpp       |   18 +-
 lib/ProfileData/Coverage/CoverageMappingReader.cpp |  235 +-
 lib/ProfileData/Coverage/CoverageMappingWriter.cpp |   24 +-
 lib/ProfileData/GCOV.cpp                           |   26 +-
 lib/ProfileData/InstrProf.cpp                      |  293 +-
 lib/ProfileData/InstrProfReader.cpp                |   47 +-
 lib/ProfileData/InstrProfWriter.cpp                |  107 +-
 lib/ProfileData/ProfileSummaryBuilder.cpp          |   20 +-
 lib/ProfileData/SampleProf.cpp                     |    7 +-
 lib/ProfileData/SampleProfReader.cpp               |   11 +-
 lib/ProfileData/SampleProfWriter.cpp               |   12 +-
 lib/Remarks/Remark.cpp                             |  132 +
 lib/Remarks/RemarkFormat.cpp                       |   30 +
 lib/Remarks/RemarkParser.cpp                       |  119 +
 lib/Remarks/RemarkStringTable.cpp                  |   48 +
 lib/Remarks/YAMLRemarkParser.cpp                   |  327 +
 lib/Remarks/YAMLRemarkParser.h                     |   96 +
 lib/Remarks/YAMLRemarkSerializer.cpp               |  167 +
 lib/Support/AArch64TargetParser.cpp                |   17 +-
 lib/Support/AMDGPUMetadata.cpp                     |   23 +-
 lib/Support/APFloat.cpp                            |   51 +-
 lib/Support/APInt.cpp                              |   84 +-
 lib/Support/APSInt.cpp                             |   13 +-
 lib/Support/ARMAttributeParser.cpp                 |   32 +-
 lib/Support/ARMBuildAttrs.cpp                      |    8 +-
 lib/Support/ARMTargetParser.cpp                    |  265 +-
 lib/Support/ARMWinEH.cpp                           |    7 +-
 lib/Support/Allocator.cpp                          |    7 +-
 lib/Support/Atomic.cpp                             |    7 +-
 lib/Support/BinaryStreamError.cpp                  |    7 +-
 lib/Support/BinaryStreamReader.cpp                 |   40 +-
 lib/Support/BinaryStreamRef.cpp                    |    7 +-
 lib/Support/BinaryStreamWriter.cpp                 |   20 +-
 lib/Support/BlockFrequency.cpp                     |    7 +-
 lib/Support/BranchProbability.cpp                  |   11 +-
 lib/Support/BuryPointer.cpp                        |    7 +-
 lib/Support/COM.cpp                                |    7 +-
 lib/Support/CRC.cpp                                |   68 +
 lib/Support/CachePruning.cpp                       |   18 +-
 lib/Support/Chrono.cpp                             |    7 +-
 lib/Support/CodeGenCoverage.cpp                    |    7 +-
 lib/Support/CommandLine.cpp                        |  445 +-
 lib/Support/Compression.cpp                        |    7 +-
 lib/Support/ConvertUTF.cpp                         |    7 +-
 lib/Support/ConvertUTFWrapper.cpp                  |    7 +-
 lib/Support/CrashRecoveryContext.cpp               |    7 +-
 lib/Support/DAGDeltaAlgorithm.cpp                  |    7 +-
 lib/Support/DJB.cpp                                |   42 +-
 lib/Support/DataExtractor.cpp                      |   62 +-
 lib/Support/Debug.cpp                              |    7 +-
 lib/Support/DeltaAlgorithm.cpp                     |    7 +-
 lib/Support/DynamicLibrary.cpp                     |    7 +-
 lib/Support/Errno.cpp                              |    9 +-
 lib/Support/Error.cpp                              |    7 +-
 lib/Support/ErrorHandling.cpp                      |   29 +-
 lib/Support/FileCheck.cpp                          | 1122 ++-
 lib/Support/FileOutputBuffer.cpp                   |   81 +-
 lib/Support/FileUtilities.cpp                      |    7 +-
 lib/Support/FoldingSet.cpp                         |    7 +-
 lib/Support/FormatVariadic.cpp                     |    7 +-
 lib/Support/FormattedStream.cpp                    |    7 +-
 lib/Support/GlobPattern.cpp                        |    7 +-
 lib/Support/GraphWriter.cpp                        |    7 +-
 lib/Support/Hashing.cpp                            |    7 +-
 lib/Support/Host.cpp                               |  102 +-
 lib/Support/InitLLVM.cpp                           |    8 +-
 lib/Support/IntEqClasses.cpp                       |    7 +-
 lib/Support/IntervalMap.cpp                        |    7 +-
 lib/Support/ItaniumManglingCanonicalizer.cpp       |    8 +-
 lib/Support/JSON.cpp                               |  221 +-
 lib/Support/JamCRC.cpp                             |    7 +-
 lib/Support/KnownBits.cpp                          |   50 +-
 lib/Support/LEB128.cpp                             |    7 +-
 lib/Support/LineIterator.cpp                       |    7 +-
 lib/Support/LockFileManager.cpp                    |    7 +-
 lib/Support/LowLevelType.cpp                       |   11 +-
 lib/Support/ManagedStatic.cpp                      |    7 +-
 lib/Support/MathExtras.cpp                         |    7 +-
 lib/Support/Memory.cpp                             |   36 +-
 lib/Support/MemoryBuffer.cpp                       |   96 +-
 lib/Support/Mutex.cpp                              |    7 +-
 lib/Support/NativeFormatting.cpp                   |    7 +-
 lib/Support/Optional.cpp                           |   14 +
 lib/Support/Options.cpp                            |    7 +-
 lib/Support/Parallel.cpp                           |   38 +-
 lib/Support/Path.cpp                               |   93 +-
 lib/Support/PluginLoader.cpp                       |    7 +-
 lib/Support/PrettyStackTrace.cpp                   |   92 +-
 lib/Support/Process.cpp                            |    7 +-
 lib/Support/Program.cpp                            |    7 +-
 lib/Support/RWMutex.cpp                            |    7 +-
 lib/Support/RandomNumberGenerator.cpp              |   17 +-
 lib/Support/Regex.cpp                              |    7 +-
 lib/Support/SHA1.cpp                               |    7 +-
 lib/Support/ScaledNumber.cpp                       |    7 +-
 lib/Support/Signals.cpp                            |   11 +-
 lib/Support/Signposts.cpp                          |  119 +
 lib/Support/SmallPtrSet.cpp                        |    7 +-
 lib/Support/SmallVector.cpp                        |    7 +-
 lib/Support/SourceMgr.cpp                          |   18 +-
 lib/Support/SpecialCaseList.cpp                    |    7 +-
 lib/Support/Statistic.cpp                          |   10 +-
 lib/Support/StringExtras.cpp                       |    7 +-
 lib/Support/StringMap.cpp                          |    7 +-
 lib/Support/StringPool.cpp                         |    7 +-
 lib/Support/StringRef.cpp                          |    7 +-
 lib/Support/StringSaver.cpp                        |    7 +-
 lib/Support/SymbolRemappingReader.cpp              |    7 +-
 lib/Support/SystemUtils.cpp                        |    7 +-
 lib/Support/TarWriter.cpp                          |    7 +-
 lib/Support/TargetParser.cpp                       |   61 +-
 lib/Support/TargetRegistry.cpp                     |    7 +-
 lib/Support/ThreadLocal.cpp                        |    7 +-
 lib/Support/ThreadPool.cpp                         |    7 +-
 lib/Support/Threading.cpp                          |    7 +-
 lib/Support/TimeProfiler.cpp                       |  199 +
 lib/Support/Timer.cpp                              |   30 +-
 lib/Support/ToolOutputFile.cpp                     |    7 +-
 lib/Support/TrigramIndex.cpp                       |    7 +-
 lib/Support/Triple.cpp                             |   36 +-
 lib/Support/Twine.cpp                              |    7 +-
 lib/Support/Unicode.cpp                            |    7 +-
 lib/Support/Unix/COM.inc                           |    7 +-
 lib/Support/Unix/DynamicLibrary.inc                |    7 +-
 lib/Support/Unix/Host.inc                          |   24 +-
 lib/Support/Unix/Memory.inc                        |   81 +-
 lib/Support/Unix/Mutex.inc                         |    7 +-
 lib/Support/Unix/Path.inc                          |  219 +-
 lib/Support/Unix/Process.inc                       |   20 +-
 lib/Support/Unix/Program.inc                       |   23 +-
 lib/Support/Unix/RWMutex.inc                       |    7 +-
 lib/Support/Unix/Signals.inc                       |   81 +-
 lib/Support/Unix/ThreadLocal.inc                   |    7 +-
 lib/Support/Unix/Threading.inc                     |   52 +-
 lib/Support/Unix/Unix.h                            |    9 +-
 lib/Support/Unix/Watchdog.inc                      |    7 +-
 lib/Support/Valgrind.cpp                           |    7 +-
 lib/Support/VersionTuple.cpp                       |    7 +-
 lib/Support/VirtualFileSystem.cpp                  |  159 +-
 lib/Support/Watchdog.cpp                           |    7 +-
 lib/Support/Windows/COM.inc                        |    7 +-
 lib/Support/Windows/DynamicLibrary.inc             |    7 +-
 lib/Support/Windows/Host.inc                       |    7 +-
 lib/Support/Windows/Memory.inc                     |   96 +-
 lib/Support/Windows/Mutex.inc                      |    7 +-
 lib/Support/Windows/Path.inc                       |   98 +-
 lib/Support/Windows/Process.inc                    |    9 +-
 lib/Support/Windows/Program.inc                    |    7 +-
 lib/Support/Windows/RWMutex.inc                    |    7 +-
 lib/Support/Windows/Signals.inc                    |   11 +-
 lib/Support/Windows/ThreadLocal.inc                |    7 +-
 lib/Support/Windows/Threading.inc                  |   23 +-
 lib/Support/Windows/Watchdog.inc                   |    7 +-
 lib/Support/Windows/WindowsSupport.h               |    7 +-
 lib/Support/WithColor.cpp                          |    7 +-
 lib/Support/YAMLParser.cpp                         |    7 +-
 lib/Support/YAMLTraits.cpp                         |   58 +-
 lib/Support/Z3Solver.cpp                           |  900 ++
 lib/Support/circular_raw_ostream.cpp               |    7 +-
 lib/Support/raw_os_ostream.cpp                     |    7 +-
 lib/Support/raw_ostream.cpp                        |    9 +-
 lib/TableGen/Error.cpp                             |    7 +-
 lib/TableGen/JSONBackend.cpp                       |    7 +-
 lib/TableGen/Main.cpp                              |    7 +-
 lib/TableGen/Record.cpp                            |  204 +-
 lib/TableGen/SetTheory.cpp                         |    7 +-
 lib/TableGen/StringMatcher.cpp                     |    7 +-
 lib/TableGen/TGLexer.cpp                           |   28 +-
 lib/TableGen/TGLexer.h                             |   15 +-
 lib/TableGen/TGParser.cpp                          |  280 +-
 lib/TableGen/TGParser.h                            |   11 +-
 lib/TableGen/TableGenBackend.cpp                   |    7 +-
 lib/Target/AArch64/AArch64.h                       |    9 +-
 lib/Target/AArch64/AArch64.td                      |   65 +-
 lib/Target/AArch64/AArch64A53Fix835769.cpp         |    7 +-
 lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp   |    7 +-
 lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp    |    7 +-
 lib/Target/AArch64/AArch64AsmPrinter.cpp           |  281 +-
 lib/Target/AArch64/AArch64BranchTargets.cpp        |    7 +-
 lib/Target/AArch64/AArch64CallLowering.cpp         |  205 +-
 lib/Target/AArch64/AArch64CallLowering.h           |   28 +-
 lib/Target/AArch64/AArch64CallingConvention.cpp    |  134 +
 lib/Target/AArch64/AArch64CallingConvention.h      |  156 +-
 lib/Target/AArch64/AArch64CallingConvention.td     |   33 +-
 .../AArch64/AArch64CleanupLocalDynamicTLSPass.cpp  |    7 +-
 lib/Target/AArch64/AArch64CollectLOH.cpp           |    7 +-
 lib/Target/AArch64/AArch64CompressJumpTables.cpp   |   10 +-
 lib/Target/AArch64/AArch64CondBrTuning.cpp         |    7 +-
 lib/Target/AArch64/AArch64ConditionOptimizer.cpp   |    7 +-
 lib/Target/AArch64/AArch64ConditionalCompares.cpp  |    9 +-
 .../AArch64/AArch64DeadRegisterDefinitionsPass.cpp |  108 +-
 lib/Target/AArch64/AArch64ExpandImm.cpp            |  411 +
 lib/Target/AArch64/AArch64ExpandImm.h              |   35 +
 lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp    |  619 +-
 lib/Target/AArch64/AArch64FalkorHWPFFix.cpp        |   13 +-
 lib/Target/AArch64/AArch64FastISel.cpp             |   34 +-
 lib/Target/AArch64/AArch64FrameLowering.cpp        |  215 +-
 lib/Target/AArch64/AArch64FrameLowering.h          |   17 +-
 lib/Target/AArch64/AArch64GenRegisterBankInfo.def  |   11 +-
 lib/Target/AArch64/AArch64ISelDAGToDAG.cpp         |  140 +-
 lib/Target/AArch64/AArch64ISelLowering.cpp         |  583 +-
 lib/Target/AArch64/AArch64ISelLowering.h           |   42 +-
 lib/Target/AArch64/AArch64InstrAtomics.td          |    7 +-
 lib/Target/AArch64/AArch64InstrFormats.td          |   50 +-
 lib/Target/AArch64/AArch64InstrInfo.cpp            |  472 +-
 lib/Target/AArch64/AArch64InstrInfo.h              |   51 +-
 lib/Target/AArch64/AArch64InstrInfo.td             |  172 +-
 lib/Target/AArch64/AArch64InstructionSelector.cpp  | 2803 +++++-
 lib/Target/AArch64/AArch64LegalizerInfo.cpp        |  388 +-
 lib/Target/AArch64/AArch64LegalizerInfo.h          |   13 +-
 lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp   |   13 +-
 lib/Target/AArch64/AArch64MCInstLower.cpp          |    7 +-
 lib/Target/AArch64/AArch64MCInstLower.h            |    7 +-
 lib/Target/AArch64/AArch64MachineFunctionInfo.h    |   28 +-
 lib/Target/AArch64/AArch64MacroFusion.cpp          |    7 +-
 lib/Target/AArch64/AArch64MacroFusion.h            |    7 +-
 lib/Target/AArch64/AArch64PBQPRegAlloc.cpp         |    7 +-
 lib/Target/AArch64/AArch64PBQPRegAlloc.h           |    7 +-
 lib/Target/AArch64/AArch64PerfectShuffle.h         |    7 +-
 lib/Target/AArch64/AArch64PfmCounters.td           |    7 +-
 lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp |   11 +-
 lib/Target/AArch64/AArch64PromoteConstant.cpp      |   10 +-
 .../AArch64/AArch64RedundantCopyElimination.cpp    |   11 +-
 lib/Target/AArch64/AArch64RegisterBankInfo.cpp     |  238 +-
 lib/Target/AArch64/AArch64RegisterBankInfo.h       |   20 +-
 lib/Target/AArch64/AArch64RegisterBanks.td         |    7 +-
 lib/Target/AArch64/AArch64RegisterInfo.cpp         |   49 +-
 lib/Target/AArch64/AArch64RegisterInfo.h           |   11 +-
 lib/Target/AArch64/AArch64RegisterInfo.td          |   26 +-
 lib/Target/AArch64/AArch64SIMDInstrOpt.cpp         |    7 +-
 lib/Target/AArch64/AArch64SVEInstrInfo.td          |  426 +-
 lib/Target/AArch64/AArch64SchedA53.td              |    9 +-
 lib/Target/AArch64/AArch64SchedA57.td              |    9 +-
 lib/Target/AArch64/AArch64SchedA57WriteRes.td      |    7 +-
 lib/Target/AArch64/AArch64SchedCyclone.td          |    9 +-
 lib/Target/AArch64/AArch64SchedExynosM1.td         |    9 +-
 lib/Target/AArch64/AArch64SchedExynosM3.td         |    9 +-
 lib/Target/AArch64/AArch64SchedExynosM4.td         |   45 +-
 lib/Target/AArch64/AArch64SchedFalkor.td           |    9 +-
 lib/Target/AArch64/AArch64SchedFalkorDetails.td    |    7 +-
 lib/Target/AArch64/AArch64SchedKryo.td             |    9 +-
 lib/Target/AArch64/AArch64SchedKryoDetails.td      |    7 +-
 lib/Target/AArch64/AArch64SchedPredExynos.td       |   18 +-
 lib/Target/AArch64/AArch64SchedPredicates.td       |   60 +-
 lib/Target/AArch64/AArch64SchedThunderX.td         |    9 +-
 lib/Target/AArch64/AArch64SchedThunderX2T99.td     |    9 +-
 lib/Target/AArch64/AArch64Schedule.td              |    7 +-
 lib/Target/AArch64/AArch64SelectionDAGInfo.cpp     |   95 +-
 lib/Target/AArch64/AArch64SelectionDAGInfo.h       |   11 +-
 lib/Target/AArch64/AArch64SpeculationHardening.cpp |  182 +-
 lib/Target/AArch64/AArch64StackTagging.cpp         |  345 +
 lib/Target/AArch64/AArch64StorePairSuppress.cpp    |    9 +-
 lib/Target/AArch64/AArch64Subtarget.cpp            |    8 +-
 lib/Target/AArch64/AArch64Subtarget.h              |   40 +-
 lib/Target/AArch64/AArch64SystemOperands.td        |    8 +-
 lib/Target/AArch64/AArch64TargetMachine.cpp        |   37 +-
 lib/Target/AArch64/AArch64TargetMachine.h          |    7 +-
 lib/Target/AArch64/AArch64TargetObjectFile.cpp     |    7 +-
 lib/Target/AArch64/AArch64TargetObjectFile.h       |    7 +-
 lib/Target/AArch64/AArch64TargetTransformInfo.cpp  |   15 +-
 lib/Target/AArch64/AArch64TargetTransformInfo.h    |   11 +-
 lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp  |  102 +-
 .../AArch64/Disassembler/AArch64Disassembler.cpp   |   49 +-
 .../AArch64/Disassembler/AArch64Disassembler.h     |    7 +-
 .../Disassembler/AArch64ExternalSymbolizer.cpp     |    7 +-
 .../Disassembler/AArch64ExternalSymbolizer.h       |    7 +-
 .../AArch64/InstPrinter/AArch64InstPrinter.cpp     | 1582 ----
 .../AArch64/InstPrinter/AArch64InstPrinter.h       |  223 -
 .../AArch64/MCTargetDesc/AArch64AddressingModes.h  |    7 +-
 .../AArch64/MCTargetDesc/AArch64AsmBackend.cpp     |   54 +-
 .../MCTargetDesc/AArch64ELFObjectWriter.cpp        |    9 +-
 .../AArch64/MCTargetDesc/AArch64ELFStreamer.cpp    |   11 +-
 .../AArch64/MCTargetDesc/AArch64ELFStreamer.h      |    7 +-
 .../AArch64/MCTargetDesc/AArch64FixupKinds.h       |    7 +-
 .../AArch64/MCTargetDesc/AArch64InstPrinter.cpp    | 1587 ++++
 .../AArch64/MCTargetDesc/AArch64InstPrinter.h      |  222 +
 .../AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp      |   11 +-
 lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h |    7 +-
 .../AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp  |   14 +-
 lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp  |   10 +-
 lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h    |    9 +-
 .../AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp   |  203 +-
 .../AArch64/MCTargetDesc/AArch64MCTargetDesc.h     |   14 +-
 .../MCTargetDesc/AArch64MachObjectWriter.cpp       |   17 +-
 .../AArch64/MCTargetDesc/AArch64TargetStreamer.cpp |    8 +-
 .../AArch64/MCTargetDesc/AArch64TargetStreamer.h   |    7 +-
 .../MCTargetDesc/AArch64WinCOFFObjectWriter.cpp    |    7 +-
 .../MCTargetDesc/AArch64WinCOFFStreamer.cpp        |    7 +-
 .../AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h  |    7 +-
 lib/Target/AArch64/SVEInstrFormats.td              | 1340 ++-
 .../AArch64/TargetInfo/AArch64TargetInfo.cpp       |   33 +-
 lib/Target/AArch64/TargetInfo/AArch64TargetInfo.h  |   24 +
 lib/Target/AArch64/Utils/AArch64BaseInfo.cpp       |    7 +-
 lib/Target/AArch64/Utils/AArch64BaseInfo.h         |   50 +-
 lib/Target/AMDGPU/AMDGPU.h                         |   52 +-
 lib/Target/AMDGPU/AMDGPU.td                        |  570 +-
 lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp          |   41 +-
 lib/Target/AMDGPU/AMDGPUAliasAnalysis.h            |   13 +-
 lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp       |    7 +-
 lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp |   75 +-
 lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp  |    8 +-
 lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp      |   19 +-
 lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h        |   43 +-
 lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp             |  339 +-
 lib/Target/AMDGPU/AMDGPUAsmPrinter.h               |   17 +-
 lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp        |  314 +-
 lib/Target/AMDGPU/AMDGPUCallLowering.cpp           |  362 +-
 lib/Target/AMDGPU/AMDGPUCallLowering.h             |   20 +-
 lib/Target/AMDGPU/AMDGPUCallingConv.td             |   49 +-
 lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp         |  136 +-
 lib/Target/AMDGPU/AMDGPUFeatures.td                |   18 +-
 lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp    |    7 +-
 lib/Target/AMDGPU/AMDGPUFrameLowering.cpp          |    7 +-
 lib/Target/AMDGPU/AMDGPUFrameLowering.h            |    7 +-
 lib/Target/AMDGPU/AMDGPUGISel.td                   |   55 +-
 lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def    |  113 +-
 lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp    |  220 +-
 lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h      |   41 +-
 lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp           |  802 +-
 lib/Target/AMDGPU/AMDGPUISelLowering.cpp           |  363 +-
 lib/Target/AMDGPU/AMDGPUISelLowering.h             |   73 +-
 lib/Target/AMDGPU/AMDGPUInline.cpp                 |   45 +-
 lib/Target/AMDGPU/AMDGPUInstrInfo.cpp              |    7 +-
 lib/Target/AMDGPU/AMDGPUInstrInfo.h                |    7 +-
 lib/Target/AMDGPU/AMDGPUInstrInfo.td               |   46 +-
 lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp    | 1469 ++-
 lib/Target/AMDGPU/AMDGPUInstructionSelector.h      |   55 +-
 lib/Target/AMDGPU/AMDGPUInstructions.td            |  267 +-
 lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp          |  103 -
 lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h            |   58 -
 lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp          | 1357 ++-
 lib/Target/AMDGPU/AMDGPULegalizerInfo.h            |   50 +-
 lib/Target/AMDGPU/AMDGPULibCalls.cpp               |  151 +-
 lib/Target/AMDGPU/AMDGPULibFunc.cpp                |   62 +-
 lib/Target/AMDGPU/AMDGPULibFunc.h                  |   11 +-
 lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp        |    7 +-
 lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp   |   38 +-
 lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp  |    7 +-
 lib/Target/AMDGPU/AMDGPUMCInstLower.cpp            |   48 +-
 lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp |    7 +-
 lib/Target/AMDGPU/AMDGPUMachineFunction.cpp        |   21 +-
 lib/Target/AMDGPU/AMDGPUMachineFunction.h          |    7 +-
 lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp      |   17 +-
 lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h        |   80 +-
 lib/Target/AMDGPU/AMDGPUMacroFusion.cpp            |    7 +-
 lib/Target/AMDGPU/AMDGPUMacroFusion.h              |    7 +-
 .../AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp   |   11 +-
 lib/Target/AMDGPU/AMDGPUPTNote.h                   |    7 +-
 lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp       |   77 +-
 lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h         |   17 +-
 lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp          |   36 +-
 lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp    |  336 +
 lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp        |  353 -
 lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp       | 1782 +++-
 lib/Target/AMDGPU/AMDGPURegisterBankInfo.h         |   52 +-
 lib/Target/AMDGPU/AMDGPURegisterBanks.td           |    9 +-
 lib/Target/AMDGPU/AMDGPURegisterInfo.cpp           |   27 +-
 lib/Target/AMDGPU/AMDGPURegisterInfo.h             |    7 +-
 lib/Target/AMDGPU/AMDGPURegisterInfo.td            |    9 +-
 lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp    |    7 +-
 lib/Target/AMDGPU/AMDGPUSearchableTables.td        |   60 +-
 lib/Target/AMDGPU/AMDGPUSubtarget.cpp              |  263 +-
 lib/Target/AMDGPU/AMDGPUSubtarget.h                |  311 +-
 lib/Target/AMDGPU/AMDGPUTargetMachine.cpp          |  307 +-
 lib/Target/AMDGPU/AMDGPUTargetMachine.h            |   21 +-
 lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp       |    7 +-
 lib/Target/AMDGPU/AMDGPUTargetObjectFile.h         |    7 +-
 lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp    |   38 +-
 lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h      |   21 +-
 .../AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp       |   18 +-
 lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp          |    7 +-
 lib/Target/AMDGPU/AMDILCFGStructurizer.cpp         |    7 +-
 lib/Target/AMDGPU/AMDKernelCodeT.h                 |   15 +-
 lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp    | 2828 ++++--
 lib/Target/AMDGPU/BUFInstructions.td               |  957 +-
 lib/Target/AMDGPU/CaymanInstructions.td            |    7 +-
 lib/Target/AMDGPU/DSInstructions.td                |  566 +-
 .../AMDGPU/Disassembler/AMDGPUDisassembler.cpp     |  485 +-
 .../AMDGPU/Disassembler/AMDGPUDisassembler.h       |   32 +-
 lib/Target/AMDGPU/EvergreenInstructions.td         |    7 +-
 lib/Target/AMDGPU/FLATInstructions.td              |  527 +-
 lib/Target/AMDGPU/GCNDPPCombine.cpp                |  259 +-
 lib/Target/AMDGPU/GCNHazardRecognizer.cpp          |  826 +-
 lib/Target/AMDGPU/GCNHazardRecognizer.h            |   41 +-
 lib/Target/AMDGPU/GCNILPSched.cpp                  |    7 +-
 lib/Target/AMDGPU/GCNIterativeScheduler.cpp        |    7 +-
 lib/Target/AMDGPU/GCNIterativeScheduler.h          |    7 +-
 lib/Target/AMDGPU/GCNMinRegStrategy.cpp            |    7 +-
 lib/Target/AMDGPU/GCNNSAReassign.cpp               |  343 +
 lib/Target/AMDGPU/GCNProcessors.td                 |  114 +-
 lib/Target/AMDGPU/GCNRegBankReassign.cpp           |  800 ++
 lib/Target/AMDGPU/GCNRegPressure.cpp               |   22 +-
 lib/Target/AMDGPU/GCNRegPressure.h                 |   61 +-
 lib/Target/AMDGPU/GCNSchedStrategy.cpp             |   35 +-
 lib/Target/AMDGPU/GCNSchedStrategy.h               |   16 +-
 .../AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp       | 1413 ---
 lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h  |  250 -
 .../AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp       |   65 +-
 .../AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp  |   21 +-
 .../AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp      |    7 +-
 lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h |    7 +-
 lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h  |    7 +-
 .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp      | 1568 ++++
 lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h |  268 +
 lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp |   29 +-
 lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h   |    8 +-
 .../AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp    |    7 +-
 .../AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h      |   20 +-
 .../AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp     |   41 +-
 .../AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h       |   12 +-
 .../AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp   |  218 +-
 .../AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h     |   40 +-
 .../AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp      |   14 +-
 .../AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp       |    7 +-
 lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp |   84 +-
 lib/Target/AMDGPU/MIMGInstructions.td              |  484 +-
 lib/Target/AMDGPU/R600.td                          |    7 +-
 lib/Target/AMDGPU/R600AsmPrinter.cpp               |    7 +-
 lib/Target/AMDGPU/R600AsmPrinter.h                 |    7 +-
 lib/Target/AMDGPU/R600ClauseMergePass.cpp          |    7 +-
 lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp     |    7 +-
 lib/Target/AMDGPU/R600Defines.h                    |    7 +-
 lib/Target/AMDGPU/R600EmitClauseMarkers.cpp        |    7 +-
 lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp      |    7 +-
 lib/Target/AMDGPU/R600FrameLowering.cpp            |    7 +-
 lib/Target/AMDGPU/R600FrameLowering.h              |    7 +-
 lib/Target/AMDGPU/R600ISelLowering.cpp             |   37 +-
 lib/Target/AMDGPU/R600ISelLowering.h               |   14 +-
 lib/Target/AMDGPU/R600InstrFormats.td              |    7 +-
 lib/Target/AMDGPU/R600InstrInfo.cpp                |    8 +-
 lib/Target/AMDGPU/R600InstrInfo.h                  |    7 +-
 lib/Target/AMDGPU/R600Instructions.td              |   35 +-
 lib/Target/AMDGPU/R600MachineFunctionInfo.cpp      |    7 +-
 lib/Target/AMDGPU/R600MachineFunctionInfo.h        |    7 +-
 lib/Target/AMDGPU/R600MachineScheduler.cpp         |    7 +-
 lib/Target/AMDGPU/R600MachineScheduler.h           |    7 +-
 .../AMDGPU/R600OpenCLImageTypeLoweringPass.cpp     |    7 +-
 lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp  |   22 +-
 lib/Target/AMDGPU/R600Packetizer.cpp               |   11 +-
 lib/Target/AMDGPU/R600Processors.td                |   18 +-
 lib/Target/AMDGPU/R600RegisterInfo.cpp             |    9 +-
 lib/Target/AMDGPU/R600RegisterInfo.h               |    9 +-
 lib/Target/AMDGPU/R600Schedule.td                  |    7 +-
 lib/Target/AMDGPU/R700Instructions.td              |    7 +-
 lib/Target/AMDGPU/SIAddIMGInit.cpp                 |    7 +-
 lib/Target/AMDGPU/SIAnnotateControlFlow.cpp        |   64 +-
 lib/Target/AMDGPU/SIDebuggerInsertNops.cpp         |   97 -
 lib/Target/AMDGPU/SIDefines.h                      |  178 +-
 lib/Target/AMDGPU/SIFixSGPRCopies.cpp              |   83 +-
 lib/Target/AMDGPU/SIFixVGPRCopies.cpp              |    7 +-
 lib/Target/AMDGPU/SIFixWWMLiveness.cpp             |  418 -
 lib/Target/AMDGPU/SIFixupVectorISel.cpp            |   12 +-
 lib/Target/AMDGPU/SIFoldOperands.cpp               |  363 +-
 lib/Target/AMDGPU/SIFormMemoryClauses.cpp          |   22 +-
 lib/Target/AMDGPU/SIFrameLowering.cpp              |  810 +-
 lib/Target/AMDGPU/SIFrameLowering.h                |   28 +-
 lib/Target/AMDGPU/SIISelLowering.cpp               | 1918 +++-
 lib/Target/AMDGPU/SIISelLowering.h                 |   49 +-
 lib/Target/AMDGPU/SIInsertSkips.cpp                |   76 +-
 lib/Target/AMDGPU/SIInsertWaitcnts.cpp             |  417 +-
 lib/Target/AMDGPU/SIInstrFormats.td                |   68 +-
 lib/Target/AMDGPU/SIInstrInfo.cpp                  | 1415 ++-
 lib/Target/AMDGPU/SIInstrInfo.h                    |  125 +-
 lib/Target/AMDGPU/SIInstrInfo.td                   |  654 +-
 lib/Target/AMDGPU/SIInstructions.td                |  425 +-
 lib/Target/AMDGPU/SIIntrinsics.td                  |   19 -
 lib/Target/AMDGPU/SILoadStoreOptimizer.cpp         |   60 +-
 lib/Target/AMDGPU/SILowerControlFlow.cpp           |  104 +-
 lib/Target/AMDGPU/SILowerI1Copies.cpp              |  107 +-
 lib/Target/AMDGPU/SILowerSGPRSpills.cpp            |  323 +
 lib/Target/AMDGPU/SIMachineFunctionInfo.cpp        |  271 +-
 lib/Target/AMDGPU/SIMachineFunctionInfo.h          |  377 +-
 lib/Target/AMDGPU/SIMachineScheduler.cpp           |   11 +-
 lib/Target/AMDGPU/SIMachineScheduler.h             |    7 +-
 lib/Target/AMDGPU/SIMemoryLegalizer.cpp            |  322 +-
 lib/Target/AMDGPU/SIModeRegister.cpp               |    9 +-
 lib/Target/AMDGPU/SIOptimizeExecMasking.cpp        |   98 +-
 lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp   |  155 +-
 lib/Target/AMDGPU/SIPeepholeSDWA.cpp               |   36 +-
 lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp         |  221 +
 lib/Target/AMDGPU/SIProgramInfo.h                  |   21 +-
 lib/Target/AMDGPU/SIRegisterInfo.cpp               |  660 +-
 lib/Target/AMDGPU/SIRegisterInfo.h                 |   78 +-
 lib/Target/AMDGPU/SIRegisterInfo.td                |  633 +-
 lib/Target/AMDGPU/SISchedule.td                    |   71 +-
 lib/Target/AMDGPU/SIShrinkInstructions.cpp         |  140 +-
 lib/Target/AMDGPU/SIWholeQuadMode.cpp              |   82 +-
 lib/Target/AMDGPU/SMInstructions.td                |  359 +-
 lib/Target/AMDGPU/SOPInstructions.td               |  666 +-
 lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp  |    9 +-
 lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.h    |   29 +
 lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp         |   36 +-
 lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h           |   14 +-
 lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp         |  410 +-
 lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h           |  203 +-
 lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp      |  723 ++
 lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h        |  135 +
 lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h       |   11 +-
 lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp    |    7 +-
 lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h      |    7 +-
 lib/Target/AMDGPU/VIInstrFormats.td                |    7 +-
 lib/Target/AMDGPU/VIInstructions.td                |    7 +-
 lib/Target/AMDGPU/VOP1Instructions.td              |  487 +-
 lib/Target/AMDGPU/VOP2Instructions.td              |  889 +-
 lib/Target/AMDGPU/VOP3Instructions.td              |  501 +-
 lib/Target/AMDGPU/VOP3PInstructions.td             |  220 +-
 lib/Target/AMDGPU/VOPCInstructions.td              |  972 +-
 lib/Target/AMDGPU/VOPInstructions.td               |  182 +-
 lib/Target/ARC/ARC.h                               |    8 +-
 lib/Target/ARC/ARC.td                              |    7 +-
 lib/Target/ARC/ARCAsmPrinter.cpp                   |   26 +-
 lib/Target/ARC/ARCBranchFinalize.cpp               |    7 +-
 lib/Target/ARC/ARCCallingConv.td                   |    7 +-
 lib/Target/ARC/ARCExpandPseudos.cpp                |    7 +-
 lib/Target/ARC/ARCFrameLowering.cpp                |   59 +-
 lib/Target/ARC/ARCFrameLowering.h                  |    7 +-
 lib/Target/ARC/ARCISelDAGToDAG.cpp                 |    7 +-
 lib/Target/ARC/ARCISelLowering.cpp                 |    7 +-
 lib/Target/ARC/ARCISelLowering.h                   |    7 +-
 lib/Target/ARC/ARCInstrFormats.td                  |   71 +-
 lib/Target/ARC/ARCInstrInfo.cpp                    |   54 +-
 lib/Target/ARC/ARCInstrInfo.h                      |   17 +-
 lib/Target/ARC/ARCInstrInfo.td                     |  122 +-
 lib/Target/ARC/ARCMCInstLower.cpp                  |    7 +-
 lib/Target/ARC/ARCMCInstLower.h                    |    7 +-
 lib/Target/ARC/ARCMachineFunctionInfo.cpp          |    7 +-
 lib/Target/ARC/ARCMachineFunctionInfo.h            |    7 +-
 lib/Target/ARC/ARCOptAddrMode.cpp                  |  507 ++
 lib/Target/ARC/ARCRegisterInfo.cpp                 |   15 +-
 lib/Target/ARC/ARCRegisterInfo.h                   |    9 +-
 lib/Target/ARC/ARCRegisterInfo.td                  |    7 +-
 lib/Target/ARC/ARCSubtarget.cpp                    |    7 +-
 lib/Target/ARC/ARCSubtarget.h                      |    7 +-
 lib/Target/ARC/ARCTargetMachine.cpp                |   13 +-
 lib/Target/ARC/ARCTargetMachine.h                  |    7 +-
 lib/Target/ARC/ARCTargetStreamer.h                 |    7 +-
 lib/Target/ARC/ARCTargetTransformInfo.h            |    7 +-
 lib/Target/ARC/Disassembler/ARCDisassembler.cpp    |    8 +-
 lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp      |  180 -
 lib/Target/ARC/InstPrinter/ARCInstPrinter.h        |   46 -
 lib/Target/ARC/MCTargetDesc/ARCInfo.h              |    7 +-
 lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp     |  179 +
 lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h       |   45 +
 lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.cpp       |    7 +-
 lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.h         |    7 +-
 lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp    |   11 +-
 lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h      |    9 +-
 lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp        |    9 +-
 lib/Target/ARC/TargetInfo/ARCTargetInfo.h          |   20 +
 lib/Target/ARM/A15SDOptimizer.cpp                  |    7 +-
 lib/Target/ARM/ARM.h                               |   18 +-
 lib/Target/ARM/ARM.td                              |  185 +-
 lib/Target/ARM/ARMAsmPrinter.cpp                   |  153 +-
 lib/Target/ARM/ARMAsmPrinter.h                     |   14 +-
 lib/Target/ARM/ARMBaseInstrInfo.cpp                |  412 +-
 lib/Target/ARM/ARMBaseInstrInfo.h                  |   72 +-
 lib/Target/ARM/ARMBaseRegisterInfo.cpp             |   51 +-
 lib/Target/ARM/ARMBaseRegisterInfo.h               |    9 +-
 lib/Target/ARM/ARMBasicBlockInfo.cpp               |  146 +
 lib/Target/ARM/ARMBasicBlockInfo.h                 |   59 +-
 lib/Target/ARM/ARMCallLowering.cpp                 |  176 +-
 lib/Target/ARM/ARMCallLowering.h                   |   20 +-
 lib/Target/ARM/ARMCallingConv.cpp                  |  284 +
 lib/Target/ARM/ARMCallingConv.h                    |  308 +-
 lib/Target/ARM/ARMCallingConv.td                   |   52 +-
 lib/Target/ARM/ARMCodeGenPrepare.cpp               |  205 +-
 lib/Target/ARM/ARMComputeBlockSize.cpp             |   81 -
 lib/Target/ARM/ARMConstantIslandPass.cpp           |  246 +-
 lib/Target/ARM/ARMConstantPoolValue.cpp            |    7 +-
 lib/Target/ARM/ARMConstantPoolValue.h              |    7 +-
 lib/Target/ARM/ARMExpandPseudoInsts.cpp            |   28 +-
 lib/Target/ARM/ARMFastISel.cpp                     |   53 +-
 lib/Target/ARM/ARMFeatures.h                       |    7 +-
 lib/Target/ARM/ARMFrameLowering.cpp                |  117 +-
 lib/Target/ARM/ARMFrameLowering.h                  |    7 +-
 lib/Target/ARM/ARMHazardRecognizer.cpp             |    7 +-
 lib/Target/ARM/ARMHazardRecognizer.h               |    7 +-
 lib/Target/ARM/ARMISelDAGToDAG.cpp                 |  213 +-
 lib/Target/ARM/ARMISelLowering.cpp                 | 1556 +++-
 lib/Target/ARM/ARMISelLowering.h                   |  101 +-
 lib/Target/ARM/ARMInstrFormats.td                  |  115 +-
 lib/Target/ARM/ARMInstrInfo.cpp                    |    9 +-
 lib/Target/ARM/ARMInstrInfo.h                      |    7 +-
 lib/Target/ARM/ARMInstrInfo.td                     |  380 +-
 lib/Target/ARM/ARMInstrMVE.td                      | 4591 ++++++++++
 lib/Target/ARM/ARMInstrNEON.td                     | 1093 ++-
 lib/Target/ARM/ARMInstrThumb.td                    |   75 +-
 lib/Target/ARM/ARMInstrThumb2.td                   |  487 +-
 lib/Target/ARM/ARMInstrVFP.td                      |  367 +-
 lib/Target/ARM/ARMInstructionSelector.cpp          |  268 +-
 lib/Target/ARM/ARMLegalizerInfo.cpp                |  161 +-
 lib/Target/ARM/ARMLegalizerInfo.h                  |    7 +-
 lib/Target/ARM/ARMLoadStoreOptimizer.cpp           |  149 +-
 lib/Target/ARM/ARMLowOverheadLoops.cpp             |  384 +
 lib/Target/ARM/ARMMCInstLower.cpp                  |    7 +-
 lib/Target/ARM/ARMMachineFunctionInfo.cpp          |    7 +-
 lib/Target/ARM/ARMMachineFunctionInfo.h            |   16 +-
 lib/Target/ARM/ARMMacroFusion.cpp                  |    7 +-
 lib/Target/ARM/ARMMacroFusion.h                    |    7 +-
 lib/Target/ARM/ARMOptimizeBarriersPass.cpp         |    7 +-
 lib/Target/ARM/ARMParallelDSP.cpp                  |  889 +-
 lib/Target/ARM/ARMPerfectShuffle.h                 |    7 +-
 lib/Target/ARM/ARMPredicates.td                    |  211 +
 lib/Target/ARM/ARMRegisterBankInfo.cpp             |   51 +-
 lib/Target/ARM/ARMRegisterBankInfo.h               |    7 +-
 lib/Target/ARM/ARMRegisterBanks.td                 |    7 +-
 lib/Target/ARM/ARMRegisterInfo.cpp                 |    7 +-
 lib/Target/ARM/ARMRegisterInfo.h                   |    7 +-
 lib/Target/ARM/ARMRegisterInfo.td                  |  132 +-
 lib/Target/ARM/ARMSchedule.td                      |    9 +-
 lib/Target/ARM/ARMScheduleA57.td                   |   13 +-
 lib/Target/ARM/ARMScheduleA57WriteRes.td           |    7 +-
 lib/Target/ARM/ARMScheduleA8.td                    |    7 +-
 lib/Target/ARM/ARMScheduleA9.td                    |    7 +-
 lib/Target/ARM/ARMScheduleM3.td                    |   21 -
 lib/Target/ARM/ARMScheduleM4.td                    |  119 +
 lib/Target/ARM/ARMScheduleR52.td                   |    7 +-
 lib/Target/ARM/ARMScheduleSwift.td                 |    7 +-
 lib/Target/ARM/ARMScheduleV6.td                    |    7 +-
 lib/Target/ARM/ARMSelectionDAGInfo.cpp             |    9 +-
 lib/Target/ARM/ARMSelectionDAGInfo.h               |    7 +-
 lib/Target/ARM/ARMSubtarget.cpp                    |   73 +-
 lib/Target/ARM/ARMSubtarget.h                      |   78 +-
 lib/Target/ARM/ARMSystemRegister.td                |    7 +-
 lib/Target/ARM/ARMTargetMachine.cpp                |   43 +-
 lib/Target/ARM/ARMTargetMachine.h                  |    7 +-
 lib/Target/ARM/ARMTargetObjectFile.cpp             |    7 +-
 lib/Target/ARM/ARMTargetObjectFile.h               |    7 +-
 lib/Target/ARM/ARMTargetTransformInfo.cpp          |  275 +-
 lib/Target/ARM/ARMTargetTransformInfo.h            |   23 +-
 lib/Target/ARM/AsmParser/ARMAsmParser.cpp          | 1739 +++-
 lib/Target/ARM/Disassembler/ARMDisassembler.cpp    | 1391 ++-
 lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp      | 1571 ----
 lib/Target/ARM/InstPrinter/ARMInstPrinter.h        |  243 -
 lib/Target/ARM/LICENSE.TXT                         |   47 -
 lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h   |   11 +-
 lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp      |  142 +-
 lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h        |    9 +-
 lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h  |    7 +-
 lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h     |    7 +-
 lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h |    7 +-
 lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h          |   18 +-
 lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp |   15 +-
 lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp     |   11 +-
 lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h        |   16 +-
 lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp     | 1678 ++++
 lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h       |  272 +
 lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp       |    7 +-
 lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h         |    7 +-
 lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp   |  459 +-
 lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp          |    7 +-
 lib/Target/ARM/MCTargetDesc/ARMMCExpr.h            |    7 +-
 lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp    |   35 +-
 lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h      |   27 +-
 .../ARM/MCTargetDesc/ARMMachORelocationInfo.cpp    |    7 +-
 .../ARM/MCTargetDesc/ARMMachObjectWriter.cpp       |    7 +-
 lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp  |   62 +-
 lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp     |    7 +-
 lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h       |    7 +-
 .../ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp    |    7 +-
 lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp |    7 +-
 lib/Target/ARM/MLxExpansionPass.cpp                |    7 +-
 lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp        |    9 +-
 lib/Target/ARM/TargetInfo/ARMTargetInfo.h          |   23 +
 lib/Target/ARM/Thumb1FrameLowering.cpp             |  120 +-
 lib/Target/ARM/Thumb1FrameLowering.h               |    7 +-
 lib/Target/ARM/Thumb1InstrInfo.cpp                 |    7 +-
 lib/Target/ARM/Thumb1InstrInfo.h                   |    7 +-
 lib/Target/ARM/Thumb2ITBlockPass.cpp               |  221 +-
 lib/Target/ARM/Thumb2InstrInfo.cpp                 |   58 +-
 lib/Target/ARM/Thumb2InstrInfo.h                   |   13 +-
 lib/Target/ARM/Thumb2SizeReduction.cpp             |   13 +-
 lib/Target/ARM/ThumbRegisterInfo.cpp               |   75 +-
 lib/Target/ARM/ThumbRegisterInfo.h                 |   13 +-
 lib/Target/ARM/Utils/ARMBaseInfo.cpp               |    7 +-
 lib/Target/ARM/Utils/ARMBaseInfo.h                 |   31 +-
 lib/Target/AVR/AVR.h                               |    7 +-
 lib/Target/AVR/AVR.td                              |    7 +-
 lib/Target/AVR/AVRAsmPrinter.cpp                   |   29 +-
 lib/Target/AVR/AVRCallingConv.td                   |    7 +-
 lib/Target/AVR/AVRExpandPseudoInsts.cpp            |   17 +-
 lib/Target/AVR/AVRFrameLowering.cpp                |   12 +-
 lib/Target/AVR/AVRFrameLowering.h                  |    7 +-
 lib/Target/AVR/AVRISelDAGToDAG.cpp                 |    7 +-
 lib/Target/AVR/AVRISelLowering.cpp                 |   55 +-
 lib/Target/AVR/AVRISelLowering.h                   |   20 +-
 lib/Target/AVR/AVRInstrFormats.td                  |    7 +-
 lib/Target/AVR/AVRInstrInfo.cpp                    |   10 +-
 lib/Target/AVR/AVRInstrInfo.h                      |    7 +-
 lib/Target/AVR/AVRInstrInfo.td                     |   53 +-
 lib/Target/AVR/AVRMCInstLower.cpp                  |    7 +-
 lib/Target/AVR/AVRMCInstLower.h                    |    7 +-
 lib/Target/AVR/AVRMachineFunctionInfo.h            |    7 +-
 lib/Target/AVR/AVRRegisterInfo.cpp                 |   30 +-
 lib/Target/AVR/AVRRegisterInfo.h                   |   16 +-
 lib/Target/AVR/AVRRegisterInfo.td                  |   11 +-
 lib/Target/AVR/AVRRelaxMemOperations.cpp           |    7 +-
 lib/Target/AVR/AVRSelectionDAGInfo.h               |    7 +-
 lib/Target/AVR/AVRSubtarget.cpp                    |   19 +-
 lib/Target/AVR/AVRSubtarget.h                      |   12 +-
 lib/Target/AVR/AVRTargetMachine.cpp                |    8 +-
 lib/Target/AVR/AVRTargetMachine.h                  |    7 +-
 lib/Target/AVR/AVRTargetObjectFile.cpp             |    7 +-
 lib/Target/AVR/AVRTargetObjectFile.h               |    7 +-
 lib/Target/AVR/AsmParser/AVRAsmParser.cpp          |   24 +-
 lib/Target/AVR/Disassembler/AVRDisassembler.cpp    |    8 +-
 lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp      |  171 -
 lib/Target/AVR/InstPrinter/AVRInstPrinter.h        |   54 -
 lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp      |    7 +-
 lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h        |    7 +-
 lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp |    7 +-
 lib/Target/AVR/MCTargetDesc/AVRELFStreamer.h       |    7 +-
 lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h        |    7 +-
 lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp     |  170 +
 lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h       |   53 +
 lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp       |    8 +-
 lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h         |    7 +-
 lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp   |    7 +-
 lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h     |    7 +-
 lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp   |    7 +-
 lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.h     |    7 +-
 lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp          |    7 +-
 lib/Target/AVR/MCTargetDesc/AVRMCExpr.h            |    7 +-
 lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp    |   10 +-
 lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h      |    9 +-
 lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp  |    7 +-
 lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h    |    7 +-
 lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp        |    9 +-
 lib/Target/AVR/TargetInfo/AVRTargetInfo.h          |   18 +
 lib/Target/BPF/AsmParser/BPFAsmParser.cpp          |   10 +-
 lib/Target/BPF/BPF.h                               |   12 +-
 lib/Target/BPF/BPF.td                              |    8 +-
 lib/Target/BPF/BPFAbstractMemberAccess.cpp         |  482 +
 lib/Target/BPF/BPFAsmPrinter.cpp                   |   42 +-
 lib/Target/BPF/BPFCORE.h                           |   24 +
 lib/Target/BPF/BPFCallingConv.td                   |    7 +-
 lib/Target/BPF/BPFFrameLowering.cpp                |    7 +-
 lib/Target/BPF/BPFFrameLowering.h                  |    7 +-
 lib/Target/BPF/BPFISelDAGToDAG.cpp                 |    7 +-
 lib/Target/BPF/BPFISelLowering.cpp                 |   64 +-
 lib/Target/BPF/BPFISelLowering.h                   |   11 +-
 lib/Target/BPF/BPFInstrFormats.td                  |    8 +-
 lib/Target/BPF/BPFInstrInfo.cpp                    |    7 +-
 lib/Target/BPF/BPFInstrInfo.h                      |    7 +-
 lib/Target/BPF/BPFInstrInfo.td                     |  111 +-
 lib/Target/BPF/BPFMCInstLower.cpp                  |    7 +-
 lib/Target/BPF/BPFMCInstLower.h                    |    7 +-
 lib/Target/BPF/BPFMIChecking.cpp                   |  104 +-
 lib/Target/BPF/BPFMIPeephole.cpp                   |    7 +-
 lib/Target/BPF/BPFMISimplifyPatchable.cpp          |  163 +
 lib/Target/BPF/BPFRegisterInfo.cpp                 |    9 +-
 lib/Target/BPF/BPFRegisterInfo.h                   |    9 +-
 lib/Target/BPF/BPFRegisterInfo.td                  |    7 +-
 lib/Target/BPF/BPFSelectionDAGInfo.cpp             |    7 +-
 lib/Target/BPF/BPFSelectionDAGInfo.h               |    7 +-
 lib/Target/BPF/BPFSubtarget.cpp                    |   13 +-
 lib/Target/BPF/BPFSubtarget.h                      |   12 +-
 lib/Target/BPF/BPFTargetMachine.cpp                |   20 +-
 lib/Target/BPF/BPFTargetMachine.h                  |    7 +-
 lib/Target/BPF/BTF.def                             |    9 +-
 lib/Target/BPF/BTF.h                               |   98 +-
 lib/Target/BPF/BTFDebug.cpp                        |  727 +-
 lib/Target/BPF/BTFDebug.h                          |  120 +-
 lib/Target/BPF/Disassembler/BPFDisassembler.cpp    |   13 +-
 lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp      |  108 -
 lib/Target/BPF/InstPrinter/BPFInstPrinter.h        |   41 -
 lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp      |   19 +-
 lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp |   39 +-
 lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp     |  107 +
 lib/Target/BPF/MCTargetDesc/BPFInstPrinter.h       |   40 +
 lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h         |    7 +-
 lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp   |   14 +-
 lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp    |   11 +-
 lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h      |   11 +-
 lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp        |   18 +-
 lib/Target/BPF/TargetInfo/BPFTargetInfo.h          |   22 +
 lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp  |   29 +-
 lib/Target/Hexagon/BitTracker.cpp                  |    7 +-
 lib/Target/Hexagon/BitTracker.h                    |    7 +-
 .../Hexagon/Disassembler/HexagonDisassembler.cpp   |   10 +-
 lib/Target/Hexagon/Hexagon.h                       |    7 +-
 lib/Target/Hexagon/Hexagon.td                      |    7 +-
 lib/Target/Hexagon/HexagonAsmPrinter.cpp           |   20 +-
 lib/Target/Hexagon/HexagonAsmPrinter.h             |   14 +-
 lib/Target/Hexagon/HexagonBitSimplify.cpp          |    7 +-
 lib/Target/Hexagon/HexagonBitTracker.cpp           |    7 +-
 lib/Target/Hexagon/HexagonBitTracker.h             |    7 +-
 lib/Target/Hexagon/HexagonBlockRanges.cpp          |    7 +-
 lib/Target/Hexagon/HexagonBlockRanges.h            |    7 +-
 lib/Target/Hexagon/HexagonBranchRelaxation.cpp     |    7 +-
 lib/Target/Hexagon/HexagonCFGOptimizer.cpp         |    7 +-
 lib/Target/Hexagon/HexagonCallingConv.td           |    7 +-
 lib/Target/Hexagon/HexagonCommonGEP.cpp            |   24 +-
 lib/Target/Hexagon/HexagonConstExtenders.cpp       |    7 +-
 lib/Target/Hexagon/HexagonConstPropagation.cpp     |  186 +-
 lib/Target/Hexagon/HexagonCopyToCombine.cpp        |   11 +-
 lib/Target/Hexagon/HexagonDepArch.h                |    7 +-
 lib/Target/Hexagon/HexagonDepArch.td               |    7 +-
 lib/Target/Hexagon/HexagonDepDecoders.h            |   79 -
 lib/Target/Hexagon/HexagonDepDecoders.inc          |   78 +
 lib/Target/Hexagon/HexagonDepIICHVX.td             |    7 +-
 lib/Target/Hexagon/HexagonDepIICScalar.td          |    7 +-
 lib/Target/Hexagon/HexagonDepITypes.h              |    7 +-
 lib/Target/Hexagon/HexagonDepITypes.td             |    7 +-
 lib/Target/Hexagon/HexagonDepInstrFormats.td       |    7 +-
 lib/Target/Hexagon/HexagonDepInstrInfo.td          |    7 +-
 lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td      |    7 +-
 lib/Target/Hexagon/HexagonDepMappings.td           |    7 +-
 lib/Target/Hexagon/HexagonDepOperands.td           |    7 +-
 lib/Target/Hexagon/HexagonDepTimingClasses.h       |    7 +-
 lib/Target/Hexagon/HexagonEarlyIfConv.cpp          |    7 +-
 lib/Target/Hexagon/HexagonExpandCondsets.cpp       |    9 +-
 lib/Target/Hexagon/HexagonFixupHwLoops.cpp         |    7 +-
 lib/Target/Hexagon/HexagonFrameLowering.cpp        |   15 +-
 lib/Target/Hexagon/HexagonFrameLowering.h          |    7 +-
 lib/Target/Hexagon/HexagonGenExtract.cpp           |    9 +-
 lib/Target/Hexagon/HexagonGenInsert.cpp            |   11 +-
 lib/Target/Hexagon/HexagonGenMux.cpp               |   11 +-
 lib/Target/Hexagon/HexagonGenPredicate.cpp         |   73 +-
 lib/Target/Hexagon/HexagonHardwareLoops.cpp        |    7 +-
 lib/Target/Hexagon/HexagonHazardRecognizer.cpp     |    7 +-
 lib/Target/Hexagon/HexagonHazardRecognizer.h       |    7 +-
 lib/Target/Hexagon/HexagonIICHVX.td                |   19 +-
 lib/Target/Hexagon/HexagonIICScalar.td             |    7 +-
 lib/Target/Hexagon/HexagonISelDAGToDAG.cpp         |   12 +-
 lib/Target/Hexagon/HexagonISelDAGToDAG.h           |    7 +-
 lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp      |    7 +-
 lib/Target/Hexagon/HexagonISelLowering.cpp         |  100 +-
 lib/Target/Hexagon/HexagonISelLowering.h           |   15 +-
 lib/Target/Hexagon/HexagonISelLoweringHVX.cpp      |    9 +-
 lib/Target/Hexagon/HexagonInstrFormats.td          |    7 +-
 lib/Target/Hexagon/HexagonInstrFormatsV5.td        |    7 +-
 lib/Target/Hexagon/HexagonInstrFormatsV60.td       |    7 +-
 lib/Target/Hexagon/HexagonInstrFormatsV65.td       |    7 +-
 lib/Target/Hexagon/HexagonInstrInfo.cpp            |   62 +-
 lib/Target/Hexagon/HexagonInstrInfo.h              |   21 +-
 lib/Target/Hexagon/HexagonIntrinsics.td            |    7 +-
 lib/Target/Hexagon/HexagonIntrinsicsV5.td          |    7 +-
 lib/Target/Hexagon/HexagonIntrinsicsV60.td         |    7 +-
 lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp |   19 +-
 lib/Target/Hexagon/HexagonMCInstLower.cpp          |    7 +-
 lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp  |    7 +-
 lib/Target/Hexagon/HexagonMachineFunctionInfo.h    |    7 +-
 lib/Target/Hexagon/HexagonMachineScheduler.cpp     |    9 +-
 lib/Target/Hexagon/HexagonMachineScheduler.h       |    7 +-
 lib/Target/Hexagon/HexagonMapAsm2IntrinV62.gen.td  |    7 +-
 lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td  |    7 +-
 lib/Target/Hexagon/HexagonNewValueJump.cpp         |    7 +-
 lib/Target/Hexagon/HexagonOperands.td              |    7 +-
 lib/Target/Hexagon/HexagonOptAddrMode.cpp          |    7 +-
 lib/Target/Hexagon/HexagonOptimizeSZextends.cpp    |    7 +-
 lib/Target/Hexagon/HexagonPatterns.td              |   11 +-
 lib/Target/Hexagon/HexagonPatternsV65.td           |    7 +-
 lib/Target/Hexagon/HexagonPeephole.cpp             |    7 +-
 lib/Target/Hexagon/HexagonPseudo.td                |   12 +-
 lib/Target/Hexagon/HexagonRDFOpt.cpp               |    7 +-
 lib/Target/Hexagon/HexagonRegisterInfo.cpp         |    9 +-
 lib/Target/Hexagon/HexagonRegisterInfo.h           |    9 +-
 lib/Target/Hexagon/HexagonRegisterInfo.td          |    7 +-
 lib/Target/Hexagon/HexagonSchedule.td              |    7 +-
 lib/Target/Hexagon/HexagonScheduleV5.td            |    7 +-
 lib/Target/Hexagon/HexagonScheduleV55.td           |    7 +-
 lib/Target/Hexagon/HexagonScheduleV60.td           |    7 +-
 lib/Target/Hexagon/HexagonScheduleV62.td           |    7 +-
 lib/Target/Hexagon/HexagonScheduleV65.td           |    7 +-
 lib/Target/Hexagon/HexagonScheduleV66.td           |    7 +-
 lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp     |    7 +-
 lib/Target/Hexagon/HexagonSelectionDAGInfo.h       |    7 +-
 .../Hexagon/HexagonSplitConst32AndConst64.cpp      |    7 +-
 lib/Target/Hexagon/HexagonSplitDouble.cpp          |   11 +-
 lib/Target/Hexagon/HexagonStoreWidening.cpp        |   15 +-
 lib/Target/Hexagon/HexagonSubtarget.cpp            |    7 +-
 lib/Target/Hexagon/HexagonSubtarget.h              |    7 +-
 lib/Target/Hexagon/HexagonTargetMachine.cpp        |    8 +-
 lib/Target/Hexagon/HexagonTargetMachine.h          |    7 +-
 lib/Target/Hexagon/HexagonTargetObjectFile.cpp     |   14 +-
 lib/Target/Hexagon/HexagonTargetObjectFile.h       |    7 +-
 lib/Target/Hexagon/HexagonTargetStreamer.h         |    7 +-
 lib/Target/Hexagon/HexagonTargetTransformInfo.cpp  |   12 +-
 lib/Target/Hexagon/HexagonTargetTransformInfo.h    |    7 +-
 lib/Target/Hexagon/HexagonVExtract.cpp             |    7 +-
 lib/Target/Hexagon/HexagonVLIWPacketizer.cpp       |    7 +-
 lib/Target/Hexagon/HexagonVLIWPacketizer.h         |    7 +-
 .../Hexagon/HexagonVectorLoopCarriedReuse.cpp      |  222 +-
 lib/Target/Hexagon/HexagonVectorPrint.cpp          |    7 +-
 .../Hexagon/MCTargetDesc/HexagonAsmBackend.cpp     |    8 +-
 lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h  |    7 +-
 .../MCTargetDesc/HexagonELFObjectWriter.cpp        |    9 +-
 .../Hexagon/MCTargetDesc/HexagonFixupKinds.h       |    7 +-
 .../Hexagon/MCTargetDesc/HexagonInstPrinter.cpp    |    8 +-
 .../Hexagon/MCTargetDesc/HexagonInstPrinter.h      |    7 +-
 .../Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp      |    7 +-
 lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h |    7 +-
 .../Hexagon/MCTargetDesc/HexagonMCChecker.cpp      |    8 +-
 lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h |    7 +-
 .../Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp  |   10 +-
 .../Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h    |   14 +-
 .../Hexagon/MCTargetDesc/HexagonMCCompound.cpp     |    8 +-
 .../Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp   |    7 +-
 .../Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp  |    9 +-
 .../Hexagon/MCTargetDesc/HexagonMCELFStreamer.h    |   10 +-
 lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp  |    7 +-
 lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h    |    7 +-
 .../Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp    |    8 +-
 .../Hexagon/MCTargetDesc/HexagonMCInstrInfo.h      |    7 +-
 .../Hexagon/MCTargetDesc/HexagonMCShuffler.cpp     |    8 +-
 .../Hexagon/MCTargetDesc/HexagonMCShuffler.h       |    7 +-
 .../Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp   |    9 +-
 .../Hexagon/MCTargetDesc/HexagonMCTargetDesc.h     |    8 +-
 .../Hexagon/MCTargetDesc/HexagonShuffler.cpp       |    9 +-
 lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h  |    9 +-
 lib/Target/Hexagon/RDFCopy.cpp                     |    7 +-
 lib/Target/Hexagon/RDFCopy.h                       |    7 +-
 lib/Target/Hexagon/RDFDeadCode.cpp                 |    7 +-
 lib/Target/Hexagon/RDFDeadCode.h                   |    7 +-
 lib/Target/Hexagon/RDFGraph.cpp                    |   29 +-
 lib/Target/Hexagon/RDFGraph.h                      |   34 +-
 lib/Target/Hexagon/RDFLiveness.cpp                 |    8 +-
 lib/Target/Hexagon/RDFLiveness.h                   |    9 +-
 lib/Target/Hexagon/RDFRegisters.cpp                |    7 +-
 lib/Target/Hexagon/RDFRegisters.h                  |    7 +-
 .../Hexagon/TargetInfo/HexagonTargetInfo.cpp       |   10 +-
 lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.h  |   20 +
 lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp      |   10 +-
 .../Lanai/Disassembler/LanaiDisassembler.cpp       |   13 +-
 lib/Target/Lanai/Disassembler/LanaiDisassembler.h  |    7 +-
 lib/Target/Lanai/InstPrinter/LanaiInstPrinter.cpp  |  305 -
 lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h    |   66 -
 lib/Target/Lanai/Lanai.h                           |   15 +-
 lib/Target/Lanai/Lanai.td                          |    7 +-
 lib/Target/Lanai/LanaiAluCode.h                    |    7 +-
 lib/Target/Lanai/LanaiAsmPrinter.cpp               |   19 +-
 lib/Target/Lanai/LanaiCallingConv.td               |    7 +-
 lib/Target/Lanai/LanaiDelaySlotFiller.cpp          |    7 +-
 lib/Target/Lanai/LanaiFrameLowering.cpp            |    9 +-
 lib/Target/Lanai/LanaiFrameLowering.h              |    8 +-
 lib/Target/Lanai/LanaiISelDAGToDAG.cpp             |    9 +-
 lib/Target/Lanai/LanaiISelLowering.cpp             |    7 +-
 lib/Target/Lanai/LanaiISelLowering.h               |    7 +-
 lib/Target/Lanai/LanaiInstrFormats.td              |    7 +-
 lib/Target/Lanai/LanaiInstrInfo.cpp                |   24 +-
 lib/Target/Lanai/LanaiInstrInfo.h                  |   16 +-
 lib/Target/Lanai/LanaiInstrInfo.td                 |    7 +-
 lib/Target/Lanai/LanaiMCInstLower.cpp              |    7 +-
 lib/Target/Lanai/LanaiMCInstLower.h                |    7 +-
 lib/Target/Lanai/LanaiMachineFunctionInfo.cpp      |    7 +-
 lib/Target/Lanai/LanaiMachineFunctionInfo.h        |    7 +-
 lib/Target/Lanai/LanaiMemAluCombiner.cpp           |   12 +-
 lib/Target/Lanai/LanaiRegisterInfo.cpp             |   17 +-
 lib/Target/Lanai/LanaiRegisterInfo.h               |   11 +-
 lib/Target/Lanai/LanaiRegisterInfo.td              |    7 +-
 lib/Target/Lanai/LanaiSchedule.td                  |    7 +-
 lib/Target/Lanai/LanaiSelectionDAGInfo.cpp         |    7 +-
 lib/Target/Lanai/LanaiSelectionDAGInfo.h           |    7 +-
 lib/Target/Lanai/LanaiSubtarget.cpp                |    7 +-
 lib/Target/Lanai/LanaiSubtarget.h                  |    7 +-
 lib/Target/Lanai/LanaiTargetMachine.cpp            |    8 +-
 lib/Target/Lanai/LanaiTargetMachine.h              |    7 +-
 lib/Target/Lanai/LanaiTargetObjectFile.cpp         |    7 +-
 lib/Target/Lanai/LanaiTargetObjectFile.h           |    7 +-
 lib/Target/Lanai/LanaiTargetTransformInfo.h        |    7 +-
 lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp  |    7 +-
 lib/Target/Lanai/MCTargetDesc/LanaiBaseInfo.h      |    7 +-
 .../Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp    |    9 +-
 lib/Target/Lanai/MCTargetDesc/LanaiFixupKinds.h    |    7 +-
 lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp |  307 +
 lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h   |   65 +
 lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp   |    7 +-
 lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.h     |    7 +-
 .../Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp      |    9 +-
 lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.cpp      |    7 +-
 lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.h        |    7 +-
 .../Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp       |   10 +-
 lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h  |    9 +-
 lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp    |   13 +-
 lib/Target/Lanai/TargetInfo/LanaiTargetInfo.h      |   20 +
 lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp    |    8 +-
 .../MSP430/Disassembler/MSP430Disassembler.cpp     |    8 +-
 .../MSP430/InstPrinter/MSP430InstPrinter.cpp       |  138 -
 lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h  |   50 -
 .../MSP430/MCTargetDesc/MSP430AsmBackend.cpp       |    7 +-
 .../MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp  |    7 +-
 .../MSP430/MCTargetDesc/MSP430ELFStreamer.cpp      |    7 +-
 lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h  |    7 +-
 .../MSP430/MCTargetDesc/MSP430InstPrinter.cpp      |  137 +
 lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h |   49 +
 lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp |    8 +-
 lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h   |    7 +-
 .../MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp    |    7 +-
 .../MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp     |   10 +-
 .../MSP430/MCTargetDesc/MSP430MCTargetDesc.h       |    9 +-
 lib/Target/MSP430/MSP430.h                         |    7 +-
 lib/Target/MSP430/MSP430.td                        |    7 +-
 lib/Target/MSP430/MSP430AsmPrinter.cpp             |   85 +-
 lib/Target/MSP430/MSP430BranchSelector.cpp         |    7 +-
 lib/Target/MSP430/MSP430CallingConv.td             |    7 +-
 lib/Target/MSP430/MSP430FrameLowering.cpp          |    7 +-
 lib/Target/MSP430/MSP430FrameLowering.h            |    7 +-
 lib/Target/MSP430/MSP430ISelDAGToDAG.cpp           |    7 +-
 lib/Target/MSP430/MSP430ISelLowering.cpp           |    7 +-
 lib/Target/MSP430/MSP430ISelLowering.h             |    7 +-
 lib/Target/MSP430/MSP430InstrFormats.td            |    7 +-
 lib/Target/MSP430/MSP430InstrInfo.cpp              |   10 +-
 lib/Target/MSP430/MSP430InstrInfo.h                |    7 +-
 lib/Target/MSP430/MSP430InstrInfo.td               |    7 +-
 lib/Target/MSP430/MSP430MCInstLower.cpp            |    7 +-
 lib/Target/MSP430/MSP430MCInstLower.h              |    7 +-
 lib/Target/MSP430/MSP430MachineFunctionInfo.cpp    |    7 +-
 lib/Target/MSP430/MSP430MachineFunctionInfo.h      |    7 +-
 lib/Target/MSP430/MSP430RegisterInfo.cpp           |    9 +-
 lib/Target/MSP430/MSP430RegisterInfo.h             |    9 +-
 lib/Target/MSP430/MSP430RegisterInfo.td            |    7 +-
 lib/Target/MSP430/MSP430Subtarget.cpp              |    7 +-
 lib/Target/MSP430/MSP430Subtarget.h                |    7 +-
 lib/Target/MSP430/MSP430TargetMachine.cpp          |    8 +-
 lib/Target/MSP430/MSP430TargetMachine.h            |    7 +-
 lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp  |   10 +-
 lib/Target/MSP430/TargetInfo/MSP430TargetInfo.h    |   20 +
 lib/Target/Mips/AsmParser/MipsAsmParser.cpp        |  409 +-
 lib/Target/Mips/Disassembler/MipsDisassembler.cpp  |   17 +-
 lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp    |  288 -
 lib/Target/Mips/InstPrinter/MipsInstPrinter.h      |  113 -
 .../Mips/MCTargetDesc/MipsABIFlagsSection.cpp      |    7 +-
 lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h |    7 +-
 lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp       |   14 +-
 lib/Target/Mips/MCTargetDesc/MipsABIInfo.h         |    7 +-
 lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp    |   11 +-
 lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h      |    7 +-
 lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h        |   12 +-
 .../Mips/MCTargetDesc/MipsELFObjectWriter.cpp      |    9 +-
 lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp   |    9 +-
 lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h     |   10 +-
 lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h      |   12 +-
 lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp   |  287 +
 lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h     |  112 +
 lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp     |    7 +-
 lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h       |    7 +-
 lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp |   15 +-
 lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h   |    7 +-
 lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp        |   21 +-
 lib/Target/Mips/MCTargetDesc/MipsMCExpr.h          |    7 +-
 lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h          |    7 +-
 lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp  |   12 +-
 lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h    |   12 +-
 .../Mips/MCTargetDesc/MipsNaClELFStreamer.cpp      |   11 +-
 lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp  |    7 +-
 .../Mips/MCTargetDesc/MipsTargetStreamer.cpp       |   83 +-
 lib/Target/Mips/MicroMips32r6InstrFormats.td       |    7 +-
 lib/Target/Mips/MicroMips32r6InstrInfo.td          |   32 +-
 lib/Target/Mips/MicroMipsDSPInstrFormats.td        |    7 +-
 lib/Target/Mips/MicroMipsDSPInstrInfo.td           |    7 +-
 lib/Target/Mips/MicroMipsInstrFPU.td               |   19 +-
 lib/Target/Mips/MicroMipsInstrFormats.td           |    7 +-
 lib/Target/Mips/MicroMipsInstrInfo.td              |   36 +-
 lib/Target/Mips/MicroMipsSizeReduction.cpp         |    7 +-
 lib/Target/Mips/Mips.h                             |    7 +-
 lib/Target/Mips/Mips.td                            |   13 +-
 lib/Target/Mips/Mips16FrameLowering.cpp            |    7 +-
 lib/Target/Mips/Mips16FrameLowering.h              |    7 +-
 lib/Target/Mips/Mips16HardFloat.cpp                |    9 +-
 lib/Target/Mips/Mips16HardFloatInfo.cpp            |    7 +-
 lib/Target/Mips/Mips16HardFloatInfo.h              |    7 +-
 lib/Target/Mips/Mips16ISelDAGToDAG.cpp             |    7 +-
 lib/Target/Mips/Mips16ISelDAGToDAG.h               |    7 +-
 lib/Target/Mips/Mips16ISelLowering.cpp             |   17 +-
 lib/Target/Mips/Mips16ISelLowering.h               |    8 +-
 lib/Target/Mips/Mips16InstrFormats.td              |    7 +-
 lib/Target/Mips/Mips16InstrInfo.cpp                |    7 +-
 lib/Target/Mips/Mips16InstrInfo.h                  |    7 +-
 lib/Target/Mips/Mips16InstrInfo.td                 |   15 +-
 lib/Target/Mips/Mips16RegisterInfo.cpp             |    7 +-
 lib/Target/Mips/Mips16RegisterInfo.h               |    7 +-
 lib/Target/Mips/Mips32r6InstrFormats.td            |    7 +-
 lib/Target/Mips/Mips32r6InstrInfo.td               |   12 +-
 lib/Target/Mips/Mips64InstrInfo.td                 |   92 +-
 lib/Target/Mips/Mips64r6InstrInfo.td               |   10 +-
 lib/Target/Mips/MipsAnalyzeImmediate.cpp           |    7 +-
 lib/Target/Mips/MipsAnalyzeImmediate.h             |    7 +-
 lib/Target/Mips/MipsAsmPrinter.cpp                 |   63 +-
 lib/Target/Mips/MipsAsmPrinter.h                   |   13 +-
 lib/Target/Mips/MipsBranchExpansion.cpp            |    7 +-
 lib/Target/Mips/MipsCCState.cpp                    |    7 +-
 lib/Target/Mips/MipsCCState.h                      |    7 +-
 lib/Target/Mips/MipsCallLowering.cpp               |  265 +-
 lib/Target/Mips/MipsCallLowering.h                 |   31 +-
 lib/Target/Mips/MipsCallingConv.td                 |    7 +-
 lib/Target/Mips/MipsCondMov.td                     |   29 +-
 lib/Target/Mips/MipsConstantIslandPass.cpp         |   15 +-
 lib/Target/Mips/MipsDSPInstrFormats.td             |    7 +-
 lib/Target/Mips/MipsDSPInstrInfo.td                |   12 +-
 lib/Target/Mips/MipsDelaySlotFiller.cpp            |   45 +-
 lib/Target/Mips/MipsEVAInstrFormats.td             |    7 +-
 lib/Target/Mips/MipsEVAInstrInfo.td                |    7 +-
 lib/Target/Mips/MipsExpandPseudo.cpp               |    7 +-
 lib/Target/Mips/MipsFastISel.cpp                   |   55 +-
 lib/Target/Mips/MipsFrameLowering.cpp              |    7 +-
 lib/Target/Mips/MipsFrameLowering.h                |    7 +-
 lib/Target/Mips/MipsISelDAGToDAG.cpp               |    7 +-
 lib/Target/Mips/MipsISelDAGToDAG.h                 |    7 +-
 lib/Target/Mips/MipsISelLowering.cpp               |  175 +-
 lib/Target/Mips/MipsISelLowering.h                 |   21 +-
 lib/Target/Mips/MipsInstrFPU.td                    |   26 +-
 lib/Target/Mips/MipsInstrFormats.td                |    8 +-
 lib/Target/Mips/MipsInstrInfo.cpp                  |   23 +-
 lib/Target/Mips/MipsInstrInfo.h                    |    7 +-
 lib/Target/Mips/MipsInstrInfo.td                   |  114 +-
 lib/Target/Mips/MipsInstructionSelector.cpp        |  447 +-
 lib/Target/Mips/MipsLegalizerInfo.cpp              |   93 +-
 lib/Target/Mips/MipsLegalizerInfo.h                |    7 +-
 lib/Target/Mips/MipsMCInstLower.cpp                |    9 +-
 lib/Target/Mips/MipsMCInstLower.h                  |    7 +-
 lib/Target/Mips/MipsMSAInstrFormats.td             |    7 +-
 lib/Target/Mips/MipsMSAInstrInfo.td                |   90 +-
 lib/Target/Mips/MipsMTInstrFormats.td              |    7 +-
 lib/Target/Mips/MipsMTInstrInfo.td                 |    7 +-
 lib/Target/Mips/MipsMachineFunction.cpp            |  105 +-
 lib/Target/Mips/MipsMachineFunction.h              |   14 +-
 lib/Target/Mips/MipsOptimizePICCall.cpp            |    7 +-
 lib/Target/Mips/MipsOptionRecord.h                 |    7 +-
 lib/Target/Mips/MipsOs16.cpp                       |    7 +-
 lib/Target/Mips/MipsPreLegalizerCombiner.cpp       |   18 +-
 lib/Target/Mips/MipsRegisterBankInfo.cpp           |  598 +-
 lib/Target/Mips/MipsRegisterBankInfo.h             |  132 +-
 lib/Target/Mips/MipsRegisterBanks.td               |    9 +-
 lib/Target/Mips/MipsRegisterInfo.cpp               |   40 +-
 lib/Target/Mips/MipsRegisterInfo.h                 |    9 +-
 lib/Target/Mips/MipsRegisterInfo.td                |   54 +-
 lib/Target/Mips/MipsSEFrameLowering.cpp            |    7 +-
 lib/Target/Mips/MipsSEFrameLowering.h              |    7 +-
 lib/Target/Mips/MipsSEISelDAGToDAG.cpp             |  113 +-
 lib/Target/Mips/MipsSEISelDAGToDAG.h               |   11 +-
 lib/Target/Mips/MipsSEISelLowering.cpp             |  126 +-
 lib/Target/Mips/MipsSEISelLowering.h               |   15 +-
 lib/Target/Mips/MipsSEInstrInfo.cpp                |   12 +-
 lib/Target/Mips/MipsSEInstrInfo.h                  |    7 +-
 lib/Target/Mips/MipsSERegisterInfo.cpp             |    7 +-
 lib/Target/Mips/MipsSERegisterInfo.h               |    7 +-
 lib/Target/Mips/MipsSchedule.td                    |    7 +-
 lib/Target/Mips/MipsScheduleGeneric.td             |  934 +-
 lib/Target/Mips/MipsScheduleP5600.td               |   67 +-
 lib/Target/Mips/MipsSubtarget.cpp                  |   21 +-
 lib/Target/Mips/MipsSubtarget.h                    |   11 +-
 lib/Target/Mips/MipsTargetMachine.cpp              |   17 +-
 lib/Target/Mips/MipsTargetMachine.h                |   13 +-
 lib/Target/Mips/MipsTargetObjectFile.cpp           |    7 +-
 lib/Target/Mips/MipsTargetObjectFile.h             |    7 +-
 lib/Target/Mips/MipsTargetStreamer.h               |   11 +-
 lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp      |   10 +-
 lib/Target/Mips/TargetInfo/MipsTargetInfo.h        |   23 +
 lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp  |  296 -
 lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h    |   52 -
 lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h      |    7 +-
 lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp |  309 +
 lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h   |   53 +
 lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp   |   16 +-
 lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h     |    7 +-
 .../NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp       |   10 +-
 lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h  |   10 +-
 .../NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp     |   26 +-
 .../NVPTX/MCTargetDesc/NVPTXTargetStreamer.h       |   10 +-
 lib/Target/NVPTX/ManagedStringPool.h               |    7 +-
 lib/Target/NVPTX/NVPTX.h                           |   20 +-
 lib/Target/NVPTX/NVPTX.td                          |    9 +-
 lib/Target/NVPTX/NVPTXAllocaHoisting.cpp           |    7 +-
 lib/Target/NVPTX/NVPTXAllocaHoisting.h             |    7 +-
 lib/Target/NVPTX/NVPTXAsmPrinter.cpp               |   83 +-
 lib/Target/NVPTX/NVPTXAsmPrinter.h                 |   18 +-
 lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp   |    7 +-
 lib/Target/NVPTX/NVPTXFrameLowering.cpp            |    7 +-
 lib/Target/NVPTX/NVPTXFrameLowering.h              |    7 +-
 lib/Target/NVPTX/NVPTXGenericToNVVM.cpp            |    7 +-
 lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp             |   14 +-
 lib/Target/NVPTX/NVPTXISelDAGToDAG.h               |    8 +-
 lib/Target/NVPTX/NVPTXISelLowering.cpp             |  283 +-
 lib/Target/NVPTX/NVPTXISelLowering.h               |   11 +-
 lib/Target/NVPTX/NVPTXImageOptimizer.cpp           |    7 +-
 lib/Target/NVPTX/NVPTXInstrFormats.td              |    7 +-
 lib/Target/NVPTX/NVPTXInstrInfo.cpp                |    7 +-
 lib/Target/NVPTX/NVPTXInstrInfo.h                  |    7 +-
 lib/Target/NVPTX/NVPTXInstrInfo.td                 |   23 +-
 lib/Target/NVPTX/NVPTXIntrinsics.td                |  658 +-
 lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp          |    7 +-
 lib/Target/NVPTX/NVPTXLowerAggrCopies.h            |    7 +-
 lib/Target/NVPTX/NVPTXLowerAlloca.cpp              |    8 +-
 lib/Target/NVPTX/NVPTXLowerArgs.cpp                |   11 +-
 lib/Target/NVPTX/NVPTXMCExpr.cpp                   |    7 +-
 lib/Target/NVPTX/NVPTXMCExpr.h                     |    7 +-
 lib/Target/NVPTX/NVPTXMachineFunctionInfo.h        |    7 +-
 lib/Target/NVPTX/NVPTXPeephole.cpp                 |    7 +-
 lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp         |   11 +-
 lib/Target/NVPTX/NVPTXProxyRegErasure.cpp          |    7 +-
 lib/Target/NVPTX/NVPTXRegisterInfo.cpp             |    9 +-
 lib/Target/NVPTX/NVPTXRegisterInfo.h               |    9 +-
 lib/Target/NVPTX/NVPTXRegisterInfo.td              |    7 +-
 lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp      |    8 +-
 lib/Target/NVPTX/NVPTXSubtarget.cpp                |    7 +-
 lib/Target/NVPTX/NVPTXSubtarget.h                  |    7 +-
 lib/Target/NVPTX/NVPTXTargetMachine.cpp            |   27 +-
 lib/Target/NVPTX/NVPTXTargetMachine.h              |    7 +-
 lib/Target/NVPTX/NVPTXTargetObjectFile.h           |    7 +-
 lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp      |    8 +-
 lib/Target/NVPTX/NVPTXTargetTransformInfo.h        |    9 +-
 lib/Target/NVPTX/NVPTXUtilities.cpp                |    8 +-
 lib/Target/NVPTX/NVPTXUtilities.h                  |    7 +-
 lib/Target/NVPTX/NVVMIntrRange.cpp                 |    7 +-
 lib/Target/NVPTX/NVVMReflect.cpp                   |    7 +-
 lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp    |   10 +-
 lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.h      |   21 +
 lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp      |   15 +-
 .../PowerPC/Disassembler/PPCDisassembler.cpp       |   22 +-
 lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp  |  532 --
 lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h    |   77 -
 lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp  |  117 +-
 .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp    |   10 +-
 lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h    |    7 +-
 lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp |  543 ++
 lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h   |   76 +
 lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp   |   13 +-
 lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h     |   17 +-
 .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp      |    9 +-
 lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h |   14 +-
 lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp      |    7 +-
 lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h        |    7 +-
 .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp       |   67 +-
 lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h  |   14 +-
 .../PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp   |    7 +-
 lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp  |    7 +-
 lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h    |    7 +-
 .../PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp  |   29 +
 lib/Target/PowerPC/P9InstrResources.td             |  371 +-
 lib/Target/PowerPC/PPC.h                           |   22 +-
 lib/Target/PowerPC/PPC.td                          |   38 +-
 lib/Target/PowerPC/PPCAsmPrinter.cpp               |  223 +-
 lib/Target/PowerPC/PPCBoolRetToInt.cpp             |    7 +-
 lib/Target/PowerPC/PPCBranchCoalescing.cpp         |   11 +-
 lib/Target/PowerPC/PPCBranchSelector.cpp           |  262 +-
 lib/Target/PowerPC/PPCCCState.cpp                  |    7 +-
 lib/Target/PowerPC/PPCCCState.h                    |    7 +-
 lib/Target/PowerPC/PPCCTRLoops.cpp                 |  585 +-
 lib/Target/PowerPC/PPCCallingConv.cpp              |  162 +
 lib/Target/PowerPC/PPCCallingConv.h                |   36 +-
 lib/Target/PowerPC/PPCCallingConv.td               |   50 +-
 lib/Target/PowerPC/PPCEarlyReturn.cpp              |   19 +-
 lib/Target/PowerPC/PPCExpandISEL.cpp               |    7 +-
 lib/Target/PowerPC/PPCFastISel.cpp                 |  108 +-
 lib/Target/PowerPC/PPCFrameLowering.cpp            |  211 +-
 lib/Target/PowerPC/PPCFrameLowering.h              |   31 +-
 lib/Target/PowerPC/PPCHazardRecognizers.cpp        |   10 +-
 lib/Target/PowerPC/PPCHazardRecognizers.h          |    7 +-
 lib/Target/PowerPC/PPCISelDAGToDAG.cpp             |   94 +-
 lib/Target/PowerPC/PPCISelLowering.cpp             | 1087 ++-
 lib/Target/PowerPC/PPCISelLowering.h               |  117 +-
 lib/Target/PowerPC/PPCInstr64Bit.td                |   66 +-
 lib/Target/PowerPC/PPCInstrAltivec.td              |   37 +-
 lib/Target/PowerPC/PPCInstrBuilder.h               |    7 +-
 lib/Target/PowerPC/PPCInstrFormats.td              |   21 +-
 lib/Target/PowerPC/PPCInstrHTM.td                  |   49 +-
 lib/Target/PowerPC/PPCInstrInfo.cpp                |  388 +-
 lib/Target/PowerPC/PPCInstrInfo.h                  |  100 +-
 lib/Target/PowerPC/PPCInstrInfo.td                 |   84 +-
 lib/Target/PowerPC/PPCInstrQPX.td                  |    7 +-
 lib/Target/PowerPC/PPCInstrSPE.td                  |   19 +-
 lib/Target/PowerPC/PPCInstrVSX.td                  |  531 +-
 lib/Target/PowerPC/PPCLoopPreIncPrep.cpp           |   15 +-
 lib/Target/PowerPC/PPCMCInstLower.cpp              |   17 +-
 lib/Target/PowerPC/PPCMIPeephole.cpp               |  186 +-
 lib/Target/PowerPC/PPCMachineFunctionInfo.cpp      |    7 +-
 lib/Target/PowerPC/PPCMachineFunctionInfo.h        |   16 +-
 lib/Target/PowerPC/PPCMachineScheduler.cpp         |   83 +
 lib/Target/PowerPC/PPCMachineScheduler.h           |   49 +
 lib/Target/PowerPC/PPCPerfectShuffle.h             |    7 +-
 lib/Target/PowerPC/PPCPfmCounters.td               |    7 +-
 lib/Target/PowerPC/PPCPreEmitPeephole.cpp          |    7 +-
 lib/Target/PowerPC/PPCQPXLoadSplat.cpp             |   11 +-
 lib/Target/PowerPC/PPCReduceCRLogicals.cpp         |   52 +-
 lib/Target/PowerPC/PPCRegisterInfo.cpp             |  217 +-
 lib/Target/PowerPC/PPCRegisterInfo.h               |   18 +-
 lib/Target/PowerPC/PPCRegisterInfo.td              |    9 +-
 lib/Target/PowerPC/PPCSchedule.td                  |    8 +-
 lib/Target/PowerPC/PPCSchedule440.td               |    7 +-
 lib/Target/PowerPC/PPCScheduleA2.td                |    7 +-
 lib/Target/PowerPC/PPCScheduleE500.td              |    7 +-
 lib/Target/PowerPC/PPCScheduleE500mc.td            |    7 +-
 lib/Target/PowerPC/PPCScheduleE5500.td             |    7 +-
 lib/Target/PowerPC/PPCScheduleG3.td                |    7 +-
 lib/Target/PowerPC/PPCScheduleG4.td                |    7 +-
 lib/Target/PowerPC/PPCScheduleG4Plus.td            |    7 +-
 lib/Target/PowerPC/PPCScheduleG5.td                |    7 +-
 lib/Target/PowerPC/PPCScheduleP7.td                |    7 +-
 lib/Target/PowerPC/PPCScheduleP8.td                |    7 +-
 lib/Target/PowerPC/PPCScheduleP9.td                |   77 +-
 lib/Target/PowerPC/PPCSubtarget.cpp                |   29 +-
 lib/Target/PowerPC/PPCSubtarget.h                  |   28 +-
 lib/Target/PowerPC/PPCTLSDynamicCall.cpp           |   11 +-
 lib/Target/PowerPC/PPCTOCRegDeps.cpp               |   11 +-
 lib/Target/PowerPC/PPCTargetMachine.cpp            |   74 +-
 lib/Target/PowerPC/PPCTargetMachine.h              |   11 +-
 lib/Target/PowerPC/PPCTargetObjectFile.cpp         |    7 +-
 lib/Target/PowerPC/PPCTargetObjectFile.h           |    7 +-
 lib/Target/PowerPC/PPCTargetStreamer.h             |    7 +-
 lib/Target/PowerPC/PPCTargetTransformInfo.cpp      |  449 +-
 lib/Target/PowerPC/PPCTargetTransformInfo.h        |   21 +-
 lib/Target/PowerPC/PPCVSXCopy.cpp                  |   11 +-
 lib/Target/PowerPC/PPCVSXFMAMutate.cpp             |    7 +-
 lib/Target/PowerPC/PPCVSXSwapRemoval.cpp           |   12 +-
 lib/Target/PowerPC/README_P9.txt                   |    8 +-
 .../PowerPC/TargetInfo/PowerPCTargetInfo.cpp       |   10 +-
 lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h  |   22 +
 lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp      |  393 +-
 .../RISCV/Disassembler/RISCVDisassembler.cpp       |   20 +-
 lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp  |  115 -
 lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h    |   55 -
 lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp  |   93 +-
 lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h    |   54 +-
 .../RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp    |   70 +-
 lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp |   32 +-
 lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h   |    7 +-
 lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h    |   36 +-
 lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp |  114 +
 lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h   |   54 +
 lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp   |    8 +-
 lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h     |    7 +-
 .../RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp      |  150 +-
 lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp      |  120 +-
 lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h        |   23 +-
 .../RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp       |   18 +-
 lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h  |   10 +-
 .../RISCV/MCTargetDesc/RISCVTargetStreamer.cpp     |    7 +-
 .../RISCV/MCTargetDesc/RISCVTargetStreamer.h       |    7 +-
 lib/Target/RISCV/RISCV.h                           |    7 +-
 lib/Target/RISCV/RISCV.td                          |   25 +-
 lib/Target/RISCV/RISCVAsmPrinter.cpp               |   65 +-
 lib/Target/RISCV/RISCVCallingConv.td               |   18 +-
 lib/Target/RISCV/RISCVExpandPseudoInsts.cpp        |  196 +-
 lib/Target/RISCV/RISCVFrameLowering.cpp            |   80 +-
 lib/Target/RISCV/RISCVFrameLowering.h              |    7 +-
 lib/Target/RISCV/RISCVISelDAGToDAG.cpp             |   15 +-
 lib/Target/RISCV/RISCVISelLowering.cpp             | 1185 ++-
 lib/Target/RISCV/RISCVISelLowering.h               |   86 +-
 lib/Target/RISCV/RISCVInstrFormats.td              |   36 +-
 lib/Target/RISCV/RISCVInstrFormatsC.td             |    7 +-
 lib/Target/RISCV/RISCVInstrInfo.cpp                |   36 +-
 lib/Target/RISCV/RISCVInstrInfo.h                  |    9 +-
 lib/Target/RISCV/RISCVInstrInfo.td                 |  320 +-
 lib/Target/RISCV/RISCVInstrInfoA.td                |   89 +-
 lib/Target/RISCV/RISCVInstrInfoC.td                |   57 +-
 lib/Target/RISCV/RISCVInstrInfoD.td                |   41 +-
 lib/Target/RISCV/RISCVInstrInfoF.td                |   97 +-
 lib/Target/RISCV/RISCVInstrInfoM.td                |   46 +-
 lib/Target/RISCV/RISCVMCInstLower.cpp              |   37 +-
 lib/Target/RISCV/RISCVMachineFunctionInfo.h        |    9 +-
 lib/Target/RISCV/RISCVMergeBaseOffset.cpp          |    7 +-
 lib/Target/RISCV/RISCVRegisterInfo.cpp             |   53 +-
 lib/Target/RISCV/RISCVRegisterInfo.h               |    9 +-
 lib/Target/RISCV/RISCVRegisterInfo.td              |    9 +-
 lib/Target/RISCV/RISCVSubtarget.cpp                |   22 +-
 lib/Target/RISCV/RISCVSubtarget.h                  |   21 +-
 lib/Target/RISCV/RISCVSystemOperands.td            |   27 +-
 lib/Target/RISCV/RISCVTargetMachine.cpp            |   21 +-
 lib/Target/RISCV/RISCVTargetMachine.h              |    9 +-
 lib/Target/RISCV/RISCVTargetObjectFile.cpp         |  103 +-
 lib/Target/RISCV/RISCVTargetObjectFile.h           |   31 +-
 lib/Target/RISCV/RISCVTargetTransformInfo.cpp      |   92 +
 lib/Target/RISCV/RISCVTargetTransformInfo.h        |   52 +
 lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp    |   14 +-
 lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h      |   21 +
 lib/Target/RISCV/Utils/RISCVBaseInfo.cpp           |   71 +
 lib/Target/RISCV/Utils/RISCVBaseInfo.h             |   44 +-
 lib/Target/RISCV/Utils/RISCVMatInt.cpp             |   32 +-
 lib/Target/RISCV/Utils/RISCVMatInt.h               |   16 +-
 lib/Target/Sparc/AsmParser/SparcAsmParser.cpp      |   11 +-
 lib/Target/Sparc/DelaySlotFiller.cpp               |    7 +-
 .../Sparc/Disassembler/SparcDisassembler.cpp       |   14 +-
 lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp  |  220 -
 lib/Target/Sparc/InstPrinter/SparcInstPrinter.h    |   57 -
 lib/Target/Sparc/LeonFeatures.td                   |    7 +-
 lib/Target/Sparc/LeonPasses.cpp                    |    7 +-
 lib/Target/Sparc/LeonPasses.h                      |    7 +-
 lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp  |    7 +-
 .../Sparc/MCTargetDesc/SparcELFObjectWriter.cpp    |    7 +-
 lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h    |    7 +-
 lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp |  219 +
 lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h   |   56 +
 lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp   |    7 +-
 lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h     |    7 +-
 .../Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp      |   14 +-
 lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp      |    7 +-
 lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h        |    7 +-
 .../Sparc/MCTargetDesc/SparcMCTargetDesc.cpp       |   10 +-
 lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h  |   11 +-
 .../Sparc/MCTargetDesc/SparcTargetStreamer.cpp     |    9 +-
 .../Sparc/MCTargetDesc/SparcTargetStreamer.h       |    7 +-
 lib/Target/Sparc/Sparc.h                           |    7 +-
 lib/Target/Sparc/Sparc.td                          |    7 +-
 lib/Target/Sparc/SparcAsmPrinter.cpp               |   23 +-
 lib/Target/Sparc/SparcCallingConv.td               |    7 +-
 lib/Target/Sparc/SparcFrameLowering.cpp            |    7 +-
 lib/Target/Sparc/SparcFrameLowering.h              |    7 +-
 lib/Target/Sparc/SparcISelDAGToDAG.cpp             |   12 +-
 lib/Target/Sparc/SparcISelLowering.cpp             |   10 +-
 lib/Target/Sparc/SparcISelLowering.h               |    7 +-
 lib/Target/Sparc/SparcInstr64Bit.td                |    7 +-
 lib/Target/Sparc/SparcInstrAliases.td              |    7 +-
 lib/Target/Sparc/SparcInstrFormats.td              |    7 +-
 lib/Target/Sparc/SparcInstrInfo.cpp                |    7 +-
 lib/Target/Sparc/SparcInstrInfo.h                  |    7 +-
 lib/Target/Sparc/SparcInstrInfo.td                 |    7 +-
 lib/Target/Sparc/SparcInstrVIS.td                  |    7 +-
 lib/Target/Sparc/SparcMCInstLower.cpp              |    7 +-
 lib/Target/Sparc/SparcMachineFunctionInfo.cpp      |    7 +-
 lib/Target/Sparc/SparcMachineFunctionInfo.h        |    7 +-
 lib/Target/Sparc/SparcRegisterInfo.cpp             |   15 +-
 lib/Target/Sparc/SparcRegisterInfo.h               |    9 +-
 lib/Target/Sparc/SparcRegisterInfo.td              |    7 +-
 lib/Target/Sparc/SparcSchedule.td                  |    7 +-
 lib/Target/Sparc/SparcSubtarget.cpp                |    7 +-
 lib/Target/Sparc/SparcSubtarget.h                  |    7 +-
 lib/Target/Sparc/SparcTargetMachine.cpp            |   12 +-
 lib/Target/Sparc/SparcTargetMachine.h              |    7 +-
 lib/Target/Sparc/SparcTargetObjectFile.cpp         |    7 +-
 lib/Target/Sparc/SparcTargetObjectFile.h           |    7 +-
 lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp    |   10 +-
 lib/Target/Sparc/TargetInfo/SparcTargetInfo.h      |   22 +
 lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp  |   35 +-
 .../SystemZ/Disassembler/SystemZDisassembler.cpp   |    8 +-
 .../SystemZ/InstPrinter/SystemZInstPrinter.cpp     |  234 -
 .../SystemZ/InstPrinter/SystemZInstPrinter.h       |   78 -
 .../SystemZ/MCTargetDesc/SystemZInstPrinter.cpp    |  233 +
 .../SystemZ/MCTargetDesc/SystemZInstPrinter.h      |   77 +
 .../SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp   |    7 +-
 .../SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp      |    7 +-
 lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h |    7 +-
 .../SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp  |   14 +-
 lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h  |    7 +-
 .../SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp |   11 +-
 .../SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp   |   11 +-
 .../SystemZ/MCTargetDesc/SystemZMCTargetDesc.h     |    9 +-
 lib/Target/SystemZ/SystemZ.h                       |    8 +-
 lib/Target/SystemZ/SystemZ.td                      |    7 +-
 lib/Target/SystemZ/SystemZAsmPrinter.cpp           |   70 +-
 lib/Target/SystemZ/SystemZAsmPrinter.h             |   13 +-
 lib/Target/SystemZ/SystemZCallingConv.cpp          |    7 +-
 lib/Target/SystemZ/SystemZCallingConv.h            |    7 +-
 lib/Target/SystemZ/SystemZCallingConv.td           |    7 +-
 lib/Target/SystemZ/SystemZConstantPoolValue.cpp    |    7 +-
 lib/Target/SystemZ/SystemZConstantPoolValue.h      |    7 +-
 lib/Target/SystemZ/SystemZElimCompare.cpp          |   16 +-
 lib/Target/SystemZ/SystemZExpandPseudo.cpp         |    7 +-
 lib/Target/SystemZ/SystemZFeatures.td              |   58 +-
 lib/Target/SystemZ/SystemZFrameLowering.cpp        |    7 +-
 lib/Target/SystemZ/SystemZFrameLowering.h          |    7 +-
 lib/Target/SystemZ/SystemZHazardRecognizer.cpp     |    7 +-
 lib/Target/SystemZ/SystemZHazardRecognizer.h       |    7 +-
 lib/Target/SystemZ/SystemZISelDAGToDAG.cpp         |  109 +-
 lib/Target/SystemZ/SystemZISelLowering.cpp         |  816 +-
 lib/Target/SystemZ/SystemZISelLowering.h           |   44 +-
 lib/Target/SystemZ/SystemZInstrBuilder.h           |    7 +-
 lib/Target/SystemZ/SystemZInstrDFP.td              |   99 +-
 lib/Target/SystemZ/SystemZInstrFP.td               |  302 +-
 lib/Target/SystemZ/SystemZInstrFormats.td          |  378 +-
 lib/Target/SystemZ/SystemZInstrHFP.td              |    7 +-
 lib/Target/SystemZ/SystemZInstrInfo.cpp            |  306 +-
 lib/Target/SystemZ/SystemZInstrInfo.h              |   23 +-
 lib/Target/SystemZ/SystemZInstrInfo.td             |  150 +-
 lib/Target/SystemZ/SystemZInstrSystem.td           |    7 +-
 lib/Target/SystemZ/SystemZInstrVector.td           |  555 +-
 lib/Target/SystemZ/SystemZLDCleanup.cpp            |    7 +-
 lib/Target/SystemZ/SystemZLongBranch.cpp           |    7 +-
 lib/Target/SystemZ/SystemZMCInstLower.cpp          |    7 +-
 lib/Target/SystemZ/SystemZMCInstLower.h            |    7 +-
 lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp  |    7 +-
 lib/Target/SystemZ/SystemZMachineFunctionInfo.h    |    7 +-
 lib/Target/SystemZ/SystemZMachineScheduler.cpp     |    7 +-
 lib/Target/SystemZ/SystemZMachineScheduler.h       |    7 +-
 lib/Target/SystemZ/SystemZOperands.td              |   27 +-
 lib/Target/SystemZ/SystemZOperators.td             |  105 +-
 lib/Target/SystemZ/SystemZPatterns.td              |    7 +-
 lib/Target/SystemZ/SystemZPostRewrite.cpp          |  124 +
 lib/Target/SystemZ/SystemZProcessors.td            |    9 +-
 lib/Target/SystemZ/SystemZRegisterInfo.cpp         |  123 +-
 lib/Target/SystemZ/SystemZRegisterInfo.h           |    9 +-
 lib/Target/SystemZ/SystemZRegisterInfo.td          |   14 +-
 lib/Target/SystemZ/SystemZSchedule.td              |    8 +-
 lib/Target/SystemZ/SystemZScheduleArch13.td        | 1695 ++++
 lib/Target/SystemZ/SystemZScheduleZ13.td           |   18 +-
 lib/Target/SystemZ/SystemZScheduleZ14.td           |   18 +-
 lib/Target/SystemZ/SystemZScheduleZ196.td          |    7 +-
 lib/Target/SystemZ/SystemZScheduleZEC12.td         |    7 +-
 lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp     |   25 +-
 lib/Target/SystemZ/SystemZSelectionDAGInfo.h       |    7 +-
 lib/Target/SystemZ/SystemZShortenInst.cpp          |   62 +-
 lib/Target/SystemZ/SystemZSubtarget.cpp            |   10 +-
 lib/Target/SystemZ/SystemZSubtarget.h              |   37 +-
 lib/Target/SystemZ/SystemZTDC.cpp                  |   11 +-
 lib/Target/SystemZ/SystemZTargetMachine.cpp        |   22 +-
 lib/Target/SystemZ/SystemZTargetMachine.h          |    7 +-
 lib/Target/SystemZ/SystemZTargetTransformInfo.cpp  |   39 +-
 lib/Target/SystemZ/SystemZTargetTransformInfo.h    |    7 +-
 .../SystemZ/TargetInfo/SystemZTargetInfo.cpp       |    9 +-
 lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.h  |   20 +
 lib/Target/Target.cpp                              |    7 +-
 lib/Target/TargetIntrinsicInfo.cpp                 |    7 +-
 lib/Target/TargetLoweringObjectFile.cpp            |    8 +-
 lib/Target/TargetMachine.cpp                       |   22 +-
 lib/Target/TargetMachineC.cpp                      |    7 +-
 .../WebAssembly/AsmParser/WebAssemblyAsmParser.cpp |  278 +-
 .../Disassembler/WebAssemblyDisassembler.cpp       |   58 +-
 .../InstPrinter/WebAssemblyInstPrinter.cpp         |  310 -
 .../InstPrinter/WebAssemblyInstPrinter.h           |   66 -
 .../MCTargetDesc/WebAssemblyAsmBackend.cpp         |   20 +-
 .../MCTargetDesc/WebAssemblyFixupKinds.h           |   13 +-
 .../MCTargetDesc/WebAssemblyInstPrinter.cpp        |  296 +
 .../MCTargetDesc/WebAssemblyInstPrinter.h          |   65 +
 .../MCTargetDesc/WebAssemblyMCAsmInfo.cpp          |    9 +-
 .../MCTargetDesc/WebAssemblyMCAsmInfo.h            |    7 +-
 .../MCTargetDesc/WebAssemblyMCCodeEmitter.cpp      |   35 +-
 .../MCTargetDesc/WebAssemblyMCTargetDesc.cpp       |   24 +-
 .../MCTargetDesc/WebAssemblyMCTargetDesc.h         |  302 +-
 .../MCTargetDesc/WebAssemblyTargetStreamer.cpp     |   24 +-
 .../MCTargetDesc/WebAssemblyTargetStreamer.h       |   20 +-
 .../MCTargetDesc/WebAssemblyWasmObjectWriter.cpp   |  109 +-
 lib/Target/WebAssembly/README.txt                  |    2 +-
 .../TargetInfo/WebAssemblyTargetInfo.cpp           |   10 +-
 .../WebAssembly/TargetInfo/WebAssemblyTargetInfo.h |   26 +
 lib/Target/WebAssembly/WebAssembly.h               |   13 +-
 lib/Target/WebAssembly/WebAssembly.td              |   29 +-
 .../WebAssemblyAddMissingPrototypes.cpp            |   89 +-
 lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp |   11 +-
 lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp   |  186 +-
 lib/Target/WebAssembly/WebAssemblyAsmPrinter.h     |   16 +-
 lib/Target/WebAssembly/WebAssemblyCFGSort.cpp      |   54 +-
 lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp  |  931 +-
 .../WebAssembly/WebAssemblyCallIndirectFixup.cpp   |   37 +-
 .../WebAssembly/WebAssemblyDebugValueManager.cpp   |    7 +-
 .../WebAssembly/WebAssemblyDebugValueManager.h     |    7 +-
 .../WebAssemblyEHRestoreStackPointer.cpp           |   87 -
 .../WebAssembly/WebAssemblyExceptionInfo.cpp       |   21 +-
 lib/Target/WebAssembly/WebAssemblyExceptionInfo.h  |    7 +-
 .../WebAssembly/WebAssemblyExplicitLocals.cpp      |   55 +-
 lib/Target/WebAssembly/WebAssemblyFastISel.cpp     |  183 +-
 .../WebAssembly/WebAssemblyFixFunctionBitcasts.cpp |   79 +-
 .../WebAssemblyFixIrreducibleControlFlow.cpp       |  616 +-
 .../WebAssembly/WebAssemblyFrameLowering.cpp       |   14 +-
 lib/Target/WebAssembly/WebAssemblyFrameLowering.h  |    7 +-
 lib/Target/WebAssembly/WebAssemblyISD.def          |   14 +-
 lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp |  168 +-
 lib/Target/WebAssembly/WebAssemblyISelLowering.cpp |  556 +-
 lib/Target/WebAssembly/WebAssemblyISelLowering.h   |   21 +-
 lib/Target/WebAssembly/WebAssemblyInstrAtomics.td  |  546 +-
 .../WebAssembly/WebAssemblyInstrBulkMemory.td      |   71 +
 lib/Target/WebAssembly/WebAssemblyInstrCall.td     |  202 +-
 lib/Target/WebAssembly/WebAssemblyInstrControl.td  |   93 +-
 lib/Target/WebAssembly/WebAssemblyInstrConv.td     |    7 +-
 .../WebAssembly/WebAssemblyInstrExceptRef.td       |   27 -
 lib/Target/WebAssembly/WebAssemblyInstrFloat.td    |    7 +-
 lib/Target/WebAssembly/WebAssemblyInstrFormats.td  |   10 +-
 lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp    |   62 +-
 lib/Target/WebAssembly/WebAssemblyInstrInfo.h      |   16 +-
 lib/Target/WebAssembly/WebAssemblyInstrInfo.td     |  129 +-
 lib/Target/WebAssembly/WebAssemblyInstrInteger.td  |   14 +-
 lib/Target/WebAssembly/WebAssemblyInstrMemory.td   |   95 +-
 lib/Target/WebAssembly/WebAssemblyInstrRef.td      |   25 +
 lib/Target/WebAssembly/WebAssemblyInstrSIMD.td     |  215 +-
 .../WebAssembly/WebAssemblyLateEHPrepare.cpp       |  467 +-
 .../WebAssembly/WebAssemblyLowerBrUnless.cpp       |    7 +-
 .../WebAssemblyLowerEmscriptenEHSjLj.cpp           |   95 +-
 .../WebAssembly/WebAssemblyLowerGlobalDtors.cpp    |   30 +-
 lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp  |  118 +-
 lib/Target/WebAssembly/WebAssemblyMCInstLower.h    |   12 +-
 .../WebAssembly/WebAssemblyMachineFunctionInfo.cpp |   40 +-
 .../WebAssembly/WebAssemblyMachineFunctionInfo.h   |   47 +-
 .../WebAssembly/WebAssemblyMemIntrinsicResults.cpp |   23 +-
 .../WebAssemblyOptimizeLiveIntervals.cpp           |   13 +-
 .../WebAssembly/WebAssemblyOptimizeReturned.cpp    |   17 +-
 lib/Target/WebAssembly/WebAssemblyPeephole.cpp     |   39 +-
 .../WebAssemblyPrepareForLiveIntervals.cpp         |   19 +-
 lib/Target/WebAssembly/WebAssemblyRegColoring.cpp  |   31 +-
 lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp |    9 +-
 lib/Target/WebAssembly/WebAssemblyRegStackify.cpp  |  173 +-
 lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp |   30 +-
 lib/Target/WebAssembly/WebAssemblyRegisterInfo.h   |    9 +-
 lib/Target/WebAssembly/WebAssemblyRegisterInfo.td  |   11 +-
 .../WebAssembly/WebAssemblyReplacePhysRegs.cpp     |    7 +-
 .../WebAssemblyRuntimeLibcallSignatures.cpp        |  143 +-
 .../WebAssemblyRuntimeLibcallSignatures.h          |   11 +-
 .../WebAssembly/WebAssemblySelectionDAGInfo.cpp    |   49 +-
 .../WebAssembly/WebAssemblySelectionDAGInfo.h      |   22 +-
 .../WebAssembly/WebAssemblySetP2AlignOperands.cpp  |  123 +-
 lib/Target/WebAssembly/WebAssemblySubtarget.cpp    |   12 +-
 lib/Target/WebAssembly/WebAssemblySubtarget.h      |   22 +-
 .../WebAssembly/WebAssemblyTargetMachine.cpp       |  250 +-
 lib/Target/WebAssembly/WebAssemblyTargetMachine.h  |   18 +-
 .../WebAssembly/WebAssemblyTargetObjectFile.cpp    |    7 +-
 .../WebAssembly/WebAssemblyTargetObjectFile.h      |    7 +-
 .../WebAssembly/WebAssemblyTargetTransformInfo.cpp |    9 +-
 .../WebAssembly/WebAssemblyTargetTransformInfo.h   |    7 +-
 lib/Target/WebAssembly/WebAssemblyUtilities.cpp    |  301 +-
 lib/Target/WebAssembly/WebAssemblyUtilities.h      |   27 +-
 lib/Target/WebAssembly/known_gcc_test_failures.txt |   27 +-
 lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp | 1089 ---
 lib/Target/X86/AsmParser/X86AsmInstrumentation.h   |   68 -
 lib/Target/X86/AsmParser/X86AsmParser.cpp          |  447 +-
 lib/Target/X86/AsmParser/X86AsmParserCommon.h      |    7 +-
 lib/Target/X86/AsmParser/X86Operand.h              |   58 +-
 lib/Target/X86/Disassembler/X86Disassembler.cpp    |  217 +-
 .../X86/Disassembler/X86DisassemblerDecoder.cpp    |   19 +-
 .../X86/Disassembler/X86DisassemblerDecoder.h      |   14 +-
 lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp   |  202 -
 lib/Target/X86/InstPrinter/X86ATTInstPrinter.h     |  138 -
 lib/Target/X86/InstPrinter/X86InstComments.cpp     | 1310 ---
 lib/Target/X86/InstPrinter/X86InstComments.h       |   27 -
 .../X86/InstPrinter/X86InstPrinterCommon.cpp       |  142 -
 lib/Target/X86/InstPrinter/X86InstPrinterCommon.h  |   38 -
 lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp |  162 -
 lib/Target/X86/InstPrinter/X86IntelInstPrinter.h   |  157 -
 lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp  |  487 +
 lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h    |  124 +
 lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp      |   82 +-
 lib/Target/X86/MCTargetDesc/X86BaseInfo.h          |   94 +-
 lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp |   38 +-
 lib/Target/X86/MCTargetDesc/X86FixupKinds.h        |    7 +-
 lib/Target/X86/MCTargetDesc/X86InstComments.cpp    | 1322 +++
 lib/Target/X86/MCTargetDesc/X86InstComments.h      |   26 +
 .../X86/MCTargetDesc/X86InstPrinterCommon.cpp      |  362 +
 lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h |   41 +
 .../X86/MCTargetDesc/X86IntelInstPrinter.cpp       |  445 +
 lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h  |  144 +
 lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp       |    7 +-
 lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h         |    7 +-
 lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp   |   97 +-
 lib/Target/X86/MCTargetDesc/X86MCExpr.h            |    9 +-
 lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp    |   22 +-
 lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h      |   10 +-
 .../X86/MCTargetDesc/X86MachObjectWriter.cpp       |    7 +-
 lib/Target/X86/MCTargetDesc/X86TargetStreamer.h    |    7 +-
 .../X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp    |    7 +-
 lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp |    7 +-
 .../X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp  |    7 +-
 lib/Target/X86/ShadowCallStack.cpp                 |  322 -
 lib/Target/X86/TargetInfo/X86TargetInfo.cpp        |    9 +-
 lib/Target/X86/TargetInfo/X86TargetInfo.h          |   21 +
 lib/Target/X86/Utils/X86ShuffleDecode.cpp          |   14 +-
 lib/Target/X86/Utils/X86ShuffleDecode.h            |    9 +-
 lib/Target/X86/X86.h                               |   15 +-
 lib/Target/X86/X86.td                              | 1226 ++-
 lib/Target/X86/X86AsmPrinter.cpp                   |  274 +-
 lib/Target/X86/X86AsmPrinter.h                     |   25 +-
 lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp   |   29 +-
 lib/Target/X86/X86CallFrameOptimization.cpp        |   12 +-
 lib/Target/X86/X86CallLowering.cpp                 |   78 +-
 lib/Target/X86/X86CallLowering.h                   |   13 +-
 lib/Target/X86/X86CallingConv.cpp                  |  162 +-
 lib/Target/X86/X86CallingConv.h                    |  104 +-
 lib/Target/X86/X86CallingConv.td                   |   28 +-
 lib/Target/X86/X86CmovConversion.cpp               |   35 +-
 lib/Target/X86/X86CondBrFolding.cpp                |   26 +-
 lib/Target/X86/X86DiscriminateMemOps.cpp           |   42 +-
 lib/Target/X86/X86DomainReassignment.cpp           |   12 +-
 lib/Target/X86/X86EvexToVex.cpp                    |   21 +-
 lib/Target/X86/X86ExpandPseudo.cpp                 |   41 +-
 lib/Target/X86/X86FastISel.cpp                     |  264 +-
 lib/Target/X86/X86FixupBWInsts.cpp                 |   13 +-
 lib/Target/X86/X86FixupLEAs.cpp                    |  393 +-
 lib/Target/X86/X86FixupSetCC.cpp                   |   37 +-
 lib/Target/X86/X86FlagsCopyLowering.cpp            |   56 +-
 lib/Target/X86/X86FloatingPoint.cpp                |   28 +-
 lib/Target/X86/X86FrameLowering.cpp                |   80 +-
 lib/Target/X86/X86FrameLowering.h                  |   11 +-
 lib/Target/X86/X86GenRegisterBankInfo.def          |    7 +-
 lib/Target/X86/X86ISelDAGToDAG.cpp                 | 1590 +++-
 lib/Target/X86/X86ISelLowering.cpp                 | 9548 +++++++++++++-------
 lib/Target/X86/X86ISelLowering.h                   |  216 +-
 lib/Target/X86/X86IndirectBranchTracking.cpp       |   49 +-
 lib/Target/X86/X86InsertPrefetch.cpp               |   10 +-
 lib/Target/X86/X86Instr3DNow.td                    |   11 +-
 lib/Target/X86/X86InstrAVX512.td                   | 3488 +++----
 lib/Target/X86/X86InstrArithmetic.td               |  101 +-
 lib/Target/X86/X86InstrBuilder.h                   |    7 +-
 lib/Target/X86/X86InstrCMovSetCC.td                |  176 +-
 lib/Target/X86/X86InstrCompiler.td                 |  323 +-
 lib/Target/X86/X86InstrControl.td                  |   64 +-
 lib/Target/X86/X86InstrExtension.td                |   11 +-
 lib/Target/X86/X86InstrFMA.td                      |   13 +-
 lib/Target/X86/X86InstrFMA3Info.cpp                |   17 +-
 lib/Target/X86/X86InstrFMA3Info.h                  |    7 +-
 lib/Target/X86/X86InstrFPStack.td                  |  341 +-
 lib/Target/X86/X86InstrFoldTables.cpp              |  186 +-
 lib/Target/X86/X86InstrFoldTables.h                |    7 +-
 lib/Target/X86/X86InstrFormats.td                  |   33 +-
 lib/Target/X86/X86InstrFragmentsSIMD.td            |  368 +-
 lib/Target/X86/X86InstrInfo.cpp                    | 1116 +--
 lib/Target/X86/X86InstrInfo.h                      |   79 +-
 lib/Target/X86/X86InstrInfo.td                     |  439 +-
 lib/Target/X86/X86InstrMMX.td                      |   13 +-
 lib/Target/X86/X86InstrMPX.td                      |    7 +-
 lib/Target/X86/X86InstrSGX.td                      |    7 +-
 lib/Target/X86/X86InstrSSE.td                      | 1917 ++--
 lib/Target/X86/X86InstrSVM.td                      |    7 +-
 lib/Target/X86/X86InstrShiftRotate.td              |   98 +-
 lib/Target/X86/X86InstrSystem.td                   |   26 +-
 lib/Target/X86/X86InstrTSX.td                      |    7 +-
 lib/Target/X86/X86InstrVMX.td                      |    7 +-
 lib/Target/X86/X86InstrVecCompiler.td              |  104 +-
 lib/Target/X86/X86InstrXOP.td                      |   33 +-
 lib/Target/X86/X86InstructionSelector.cpp          |   92 +-
 lib/Target/X86/X86InterleavedAccess.cpp            |   27 +-
 lib/Target/X86/X86IntrinsicsInfo.h                 |  781 +-
 lib/Target/X86/X86LegalizerInfo.cpp                |   30 +-
 lib/Target/X86/X86LegalizerInfo.h                  |    7 +-
 lib/Target/X86/X86MCInstLower.cpp                  |  274 +-
 lib/Target/X86/X86MachineFunctionInfo.cpp          |    7 +-
 lib/Target/X86/X86MachineFunctionInfo.h            |    7 +-
 lib/Target/X86/X86MacroFusion.cpp                  |  164 +-
 lib/Target/X86/X86MacroFusion.h                    |    7 +-
 lib/Target/X86/X86OptimizeLEAs.cpp                 |   14 +-
 lib/Target/X86/X86PadShortFunction.cpp             |   16 +-
 lib/Target/X86/X86PfmCounters.td                   |    7 +-
 lib/Target/X86/X86RegisterBankInfo.cpp             |   24 +-
 lib/Target/X86/X86RegisterBankInfo.h               |    7 +-
 lib/Target/X86/X86RegisterBanks.td                 |    7 +-
 lib/Target/X86/X86RegisterInfo.cpp                 |   37 +-
 lib/Target/X86/X86RegisterInfo.h                   |   23 +-
 lib/Target/X86/X86RegisterInfo.td                  |   44 +-
 lib/Target/X86/X86RetpolineThunks.cpp              |    7 +-
 lib/Target/X86/X86SchedBroadwell.td                |  169 +-
 lib/Target/X86/X86SchedHaswell.td                  |  195 +-
 lib/Target/X86/X86SchedPredicates.td               |   31 +-
 lib/Target/X86/X86SchedSandyBridge.td              |   96 +-
 lib/Target/X86/X86SchedSkylakeClient.td            |  193 +-
 lib/Target/X86/X86SchedSkylakeServer.td            |  212 +-
 lib/Target/X86/X86Schedule.td                      |   14 +-
 lib/Target/X86/X86ScheduleAtom.td                  |   12 +-
 lib/Target/X86/X86ScheduleBdVer2.td                |  599 +-
 lib/Target/X86/X86ScheduleBtVer2.td                |   45 +-
 lib/Target/X86/X86ScheduleSLM.td                   |   10 +-
 lib/Target/X86/X86ScheduleZnver1.td                |   10 +-
 lib/Target/X86/X86SelectionDAGInfo.cpp             |  222 +-
 lib/Target/X86/X86SelectionDAGInfo.h               |    7 +-
 lib/Target/X86/X86ShuffleDecodeConstantPool.cpp    |    7 +-
 lib/Target/X86/X86ShuffleDecodeConstantPool.h      |    7 +-
 lib/Target/X86/X86SpeculativeLoadHardening.cpp     |   41 +-
 lib/Target/X86/X86Subtarget.cpp                    |   22 +-
 lib/Target/X86/X86Subtarget.h                      |   47 +-
 lib/Target/X86/X86TargetMachine.cpp                |   33 +-
 lib/Target/X86/X86TargetMachine.h                  |    7 +-
 lib/Target/X86/X86TargetObjectFile.cpp             |    7 +-
 lib/Target/X86/X86TargetObjectFile.h               |    7 +-
 lib/Target/X86/X86TargetTransformInfo.cpp          |  529 +-
 lib/Target/X86/X86TargetTransformInfo.h            |   76 +-
 lib/Target/X86/X86VZeroUpper.cpp                   |    7 +-
 lib/Target/X86/X86WinAllocaExpander.cpp            |   46 +-
 lib/Target/X86/X86WinEHState.cpp                   |   45 +-
 .../XCore/Disassembler/XCoreDisassembler.cpp       |   12 +-
 lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp  |   90 -
 lib/Target/XCore/InstPrinter/XCoreInstPrinter.h    |   47 -
 lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp |   89 +
 lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h   |   46 +
 lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp   |    7 +-
 lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h     |    7 +-
 .../XCore/MCTargetDesc/XCoreMCTargetDesc.cpp       |   10 +-
 lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h  |    9 +-
 lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp    |   10 +-
 lib/Target/XCore/TargetInfo/XCoreTargetInfo.h      |   20 +
 lib/Target/XCore/XCore.h                           |    7 +-
 lib/Target/XCore/XCore.td                          |    7 +-
 lib/Target/XCore/XCoreAsmPrinter.cpp               |   31 +-
 lib/Target/XCore/XCoreCallingConv.td               |    7 +-
 lib/Target/XCore/XCoreFrameLowering.cpp            |    7 +-
 lib/Target/XCore/XCoreFrameLowering.h              |    7 +-
 lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp    |    7 +-
 lib/Target/XCore/XCoreISelDAGToDAG.cpp             |    7 +-
 lib/Target/XCore/XCoreISelLowering.cpp             |   82 +-
 lib/Target/XCore/XCoreISelLowering.h               |    9 +-
 lib/Target/XCore/XCoreInstrFormats.td              |    7 +-
 lib/Target/XCore/XCoreInstrInfo.cpp                |    7 +-
 lib/Target/XCore/XCoreInstrInfo.h                  |    7 +-
 lib/Target/XCore/XCoreInstrInfo.td                 |    7 +-
 lib/Target/XCore/XCoreLowerThreadLocal.cpp         |    7 +-
 lib/Target/XCore/XCoreMCInstLower.cpp              |    7 +-
 lib/Target/XCore/XCoreMCInstLower.h                |    7 +-
 lib/Target/XCore/XCoreMachineFunctionInfo.cpp      |    7 +-
 lib/Target/XCore/XCoreMachineFunctionInfo.h        |    7 +-
 lib/Target/XCore/XCoreRegisterInfo.cpp             |   11 +-
 lib/Target/XCore/XCoreRegisterInfo.h               |    9 +-
 lib/Target/XCore/XCoreRegisterInfo.td              |    7 +-
 lib/Target/XCore/XCoreSelectionDAGInfo.cpp         |    7 +-
 lib/Target/XCore/XCoreSelectionDAGInfo.h           |    7 +-
 lib/Target/XCore/XCoreSubtarget.cpp                |    7 +-
 lib/Target/XCore/XCoreSubtarget.h                  |    7 +-
 lib/Target/XCore/XCoreTargetMachine.cpp            |    8 +-
 lib/Target/XCore/XCoreTargetMachine.h              |    7 +-
 lib/Target/XCore/XCoreTargetObjectFile.cpp         |    7 +-
 lib/Target/XCore/XCoreTargetObjectFile.h           |    7 +-
 lib/Target/XCore/XCoreTargetStreamer.h             |    7 +-
 lib/Target/XCore/XCoreTargetTransformInfo.h        |    7 +-
 lib/Testing/Support/Annotations.cpp                |   95 +
 lib/Testing/Support/Error.cpp                      |    7 +-
 lib/TextAPI/ELF/ELFStub.cpp                        |    7 +-
 lib/TextAPI/ELF/TBEHandler.cpp                     |    7 +-
 lib/TextAPI/MachO/Architecture.cpp                 |   77 +
 lib/TextAPI/MachO/ArchitectureSet.cpp              |   69 +
 lib/TextAPI/MachO/InterfaceFile.cpp                |   81 +
 lib/TextAPI/MachO/PackedVersion.cpp                |  113 +
 lib/TextAPI/MachO/Symbol.cpp                       |   49 +
 lib/TextAPI/MachO/TextAPIContext.h                 |   33 +
 lib/TextAPI/MachO/TextStub.cpp                     |  660 ++
 lib/TextAPI/MachO/TextStubCommon.cpp               |  178 +
 lib/TextAPI/MachO/TextStubCommon.h                 |   81 +
 lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp     |   10 +-
 lib/ToolDrivers/llvm-lib/LibDriver.cpp             |  156 +-
 lib/ToolDrivers/llvm-lib/Options.td                |   16 +-
 .../AggressiveInstCombine.cpp                      |    7 +-
 .../AggressiveInstCombineInternal.h                |    7 +-
 .../AggressiveInstCombine/TruncInstCombine.cpp     |    7 +-
 lib/Transforms/Coroutines/CoroCleanup.cpp          |    9 +-
 lib/Transforms/Coroutines/CoroEarly.cpp            |   11 +-
 lib/Transforms/Coroutines/CoroElide.cpp            |    7 +-
 lib/Transforms/Coroutines/CoroFrame.cpp            |   62 +-
 lib/Transforms/Coroutines/CoroInstr.h              |    7 +-
 lib/Transforms/Coroutines/CoroInternal.h           |    7 +-
 lib/Transforms/Coroutines/CoroSplit.cpp            |   30 +-
 lib/Transforms/Coroutines/Coroutines.cpp           |   15 +-
 lib/Transforms/IPO/AlwaysInliner.cpp               |   41 +-
 lib/Transforms/IPO/ArgumentPromotion.cpp           |   93 +-
 lib/Transforms/IPO/Attributor.cpp                  | 1690 ++++
 lib/Transforms/IPO/BarrierNoopPass.cpp             |    7 +-
 lib/Transforms/IPO/BlockExtractor.cpp              |  122 +-
 lib/Transforms/IPO/CalledValuePropagation.cpp      |    7 +-
 lib/Transforms/IPO/ConstantMerge.cpp               |   29 +-
 lib/Transforms/IPO/CrossDSOCFI.cpp                 |   17 +-
 lib/Transforms/IPO/DeadArgumentElimination.cpp     |    9 +-
 lib/Transforms/IPO/ElimAvailExtern.cpp             |    7 +-
 lib/Transforms/IPO/ExtractGV.cpp                   |    7 +-
 lib/Transforms/IPO/ForceFunctionAttrs.cpp          |    8 +-
 lib/Transforms/IPO/FunctionAttrs.cpp               |   73 +-
 lib/Transforms/IPO/FunctionImport.cpp              |   57 +-
 lib/Transforms/IPO/GlobalDCE.cpp                   |    7 +-
 lib/Transforms/IPO/GlobalOpt.cpp                   |  144 +-
 lib/Transforms/IPO/GlobalSplit.cpp                 |    7 +-
 lib/Transforms/IPO/HotColdSplitting.cpp            |  424 +-
 lib/Transforms/IPO/IPConstantPropagation.cpp       |   50 +-
 lib/Transforms/IPO/IPO.cpp                         |    8 +-
 lib/Transforms/IPO/InferFunctionAttrs.cpp          |    9 +-
 lib/Transforms/IPO/InlineSimple.cpp                |   13 +-
 lib/Transforms/IPO/Inliner.cpp                     |   19 +-
 lib/Transforms/IPO/Internalize.cpp                 |   30 +-
 lib/Transforms/IPO/LoopExtractor.cpp               |   14 +-
 lib/Transforms/IPO/LowerTypeTests.cpp              |   41 +-
 lib/Transforms/IPO/MergeFunctions.cpp              |   70 +-
 lib/Transforms/IPO/PartialInlining.cpp             |   75 +-
 lib/Transforms/IPO/PassManagerBuilder.cpp          |  206 +-
 lib/Transforms/IPO/PruneEH.cpp                     |   18 +-
 lib/Transforms/IPO/SCCP.cpp                        |    1 +
 lib/Transforms/IPO/SampleProfile.cpp               |   66 +-
 lib/Transforms/IPO/StripDeadPrototypes.cpp         |    7 +-
 lib/Transforms/IPO/StripSymbols.cpp                |    7 +-
 lib/Transforms/IPO/SyntheticCountsPropagation.cpp  |    7 +-
 lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp        |   50 +-
 lib/Transforms/IPO/WholeProgramDevirt.cpp          |   53 +-
 lib/Transforms/InstCombine/InstCombineAddSub.cpp   |  193 +-
 lib/Transforms/InstCombine/InstCombineAndOrXor.cpp |  120 +-
 .../InstCombine/InstCombineAtomicRMW.cpp           |  159 +
 lib/Transforms/InstCombine/InstCombineCalls.cpp    | 1158 +--
 lib/Transforms/InstCombine/InstCombineCasts.cpp    |   90 +-
 lib/Transforms/InstCombine/InstCombineCompares.cpp |  643 +-
 lib/Transforms/InstCombine/InstCombineInternal.h   |  101 +-
 .../InstCombine/InstCombineLoadStoreAlloca.cpp     |   62 +-
 .../InstCombine/InstCombineMulDivRem.cpp           |  103 +-
 lib/Transforms/InstCombine/InstCombinePHI.cpp      |   15 +-
 lib/Transforms/InstCombine/InstCombineSelect.cpp   |  288 +-
 lib/Transforms/InstCombine/InstCombineShifts.cpp   |   98 +-
 .../InstCombine/InstCombineSimplifyDemanded.cpp    |  112 +-
 .../InstCombine/InstCombineVectorOps.cpp           |  321 +-
 .../InstCombine/InstructionCombining.cpp           |  348 +-
 .../Instrumentation/AddressSanitizer.cpp           |  775 +-
 lib/Transforms/Instrumentation/BoundsChecking.cpp  |   12 +-
 lib/Transforms/Instrumentation/CFGMST.h            |   16 +-
 lib/Transforms/Instrumentation/CGProfile.cpp       |    7 +-
 .../Instrumentation/ControlHeightReduction.cpp     |   70 +-
 .../Instrumentation/DataFlowSanitizer.cpp          |  171 +-
 .../Instrumentation/EfficiencySanitizer.cpp        |  900 --
 lib/Transforms/Instrumentation/GCOVProfiling.cpp   |  154 +-
 .../Instrumentation/HWAddressSanitizer.cpp         |  594 +-
 .../Instrumentation/IndirectCallPromotion.cpp      |   21 +-
 lib/Transforms/Instrumentation/InstrOrderFile.cpp  |  211 +
 lib/Transforms/Instrumentation/InstrProfiling.cpp  |  213 +-
 lib/Transforms/Instrumentation/Instrumentation.cpp |   25 +-
 .../Instrumentation/MaximumSpanningTree.h          |   10 +-
 lib/Transforms/Instrumentation/MemorySanitizer.cpp |  395 +-
 .../Instrumentation/PGOInstrumentation.cpp         |  445 +-
 lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp |    9 +-
 lib/Transforms/Instrumentation/PoisonChecking.cpp  |  357 +
 .../Instrumentation/SanitizerCoverage.cpp          |  199 +-
 lib/Transforms/Instrumentation/ThreadSanitizer.cpp |  141 +-
 lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h     |   31 +-
 lib/Transforms/ObjCARC/BlotMapVector.h             |    7 +-
 lib/Transforms/ObjCARC/DependencyAnalysis.cpp      |    7 +-
 lib/Transforms/ObjCARC/DependencyAnalysis.h        |    7 +-
 lib/Transforms/ObjCARC/ObjCARC.cpp                 |    7 +-
 lib/Transforms/ObjCARC/ObjCARC.h                   |    7 +-
 lib/Transforms/ObjCARC/ObjCARCAPElim.cpp           |    7 +-
 lib/Transforms/ObjCARC/ObjCARCContract.cpp         |  251 +-
 lib/Transforms/ObjCARC/ObjCARCExpand.cpp           |    7 +-
 lib/Transforms/ObjCARC/ObjCARCOpts.cpp             |   80 +-
 lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp      |    7 +-
 lib/Transforms/ObjCARC/ProvenanceAnalysis.h        |    7 +-
 .../ObjCARC/ProvenanceAnalysisEvaluator.cpp        |    7 +-
 lib/Transforms/ObjCARC/PtrState.cpp                |    7 +-
 lib/Transforms/ObjCARC/PtrState.h                  |    7 +-
 lib/Transforms/Scalar/ADCE.cpp                     |   20 +-
 lib/Transforms/Scalar/AlignmentFromAssumptions.cpp |    7 +-
 lib/Transforms/Scalar/BDCE.cpp                     |   17 +-
 lib/Transforms/Scalar/CallSiteSplitting.cpp        |   12 +-
 lib/Transforms/Scalar/ConstantHoisting.cpp         |   40 +-
 lib/Transforms/Scalar/ConstantProp.cpp             |    7 +-
 .../Scalar/CorrelatedValuePropagation.cpp          |  305 +-
 lib/Transforms/Scalar/DCE.cpp                      |    7 +-
 lib/Transforms/Scalar/DeadStoreElimination.cpp     |  102 +-
 lib/Transforms/Scalar/DivRemPairs.cpp              |    7 +-
 lib/Transforms/Scalar/EarlyCSE.cpp                 |  239 +-
 lib/Transforms/Scalar/FlattenCFGPass.cpp           |    7 +-
 lib/Transforms/Scalar/Float2Int.cpp                |   29 +-
 lib/Transforms/Scalar/GVN.cpp                      |  104 +-
 lib/Transforms/Scalar/GVNHoist.cpp                 |    9 +-
 lib/Transforms/Scalar/GVNSink.cpp                  |   22 +-
 lib/Transforms/Scalar/GuardWidening.cpp            |  212 +-
 lib/Transforms/Scalar/IVUsersPrinter.cpp           |    7 +-
 lib/Transforms/Scalar/IndVarSimplify.cpp           |  716 +-
 .../Scalar/InductiveRangeCheckElimination.cpp      |  101 +-
 lib/Transforms/Scalar/InferAddressSpaces.cpp       |   53 +-
 lib/Transforms/Scalar/InstSimplifyPass.cpp         |    7 +-
 lib/Transforms/Scalar/JumpThreading.cpp            |   93 +-
 lib/Transforms/Scalar/LICM.cpp                     |  466 +-
 .../Scalar/LoopAccessAnalysisPrinter.cpp           |    7 +-
 lib/Transforms/Scalar/LoopDataPrefetch.cpp         |   11 +-
 lib/Transforms/Scalar/LoopDeletion.cpp             |    7 +-
 lib/Transforms/Scalar/LoopDistribute.cpp           |   22 +-
 lib/Transforms/Scalar/LoopFuse.cpp                 | 1215 +++
 lib/Transforms/Scalar/LoopIdiomRecognize.cpp       |   94 +-
 lib/Transforms/Scalar/LoopInstSimplify.cpp         |    9 +-
 lib/Transforms/Scalar/LoopInterchange.cpp          |  130 +-
 lib/Transforms/Scalar/LoopLoadElimination.cpp      |   62 +-
 lib/Transforms/Scalar/LoopPassManager.cpp          |    7 +-
 lib/Transforms/Scalar/LoopPredication.cpp          |  524 +-
 lib/Transforms/Scalar/LoopRerollPass.cpp           |   17 +-
 lib/Transforms/Scalar/LoopRotation.cpp             |   12 +-
 lib/Transforms/Scalar/LoopSimplifyCFG.cpp          |  237 +-
 lib/Transforms/Scalar/LoopSink.cpp                 |   14 +-
 lib/Transforms/Scalar/LoopStrengthReduce.cpp       |  344 +-
 lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp     |   10 +-
 lib/Transforms/Scalar/LoopUnrollPass.cpp           |   97 +-
 lib/Transforms/Scalar/LoopUnswitch.cpp             |   26 +-
 lib/Transforms/Scalar/LoopVersioningLICM.cpp       |   17 +-
 lib/Transforms/Scalar/LowerAtomic.cpp              |   17 +-
 lib/Transforms/Scalar/LowerExpectIntrinsic.cpp     |    7 +-
 lib/Transforms/Scalar/LowerGuardIntrinsic.cpp      |    7 +-
 lib/Transforms/Scalar/LowerWidenableCondition.cpp  |   85 +
 lib/Transforms/Scalar/MakeGuardsExplicit.cpp       |    7 +-
 lib/Transforms/Scalar/MemCpyOptimizer.cpp          |   24 +-
 lib/Transforms/Scalar/MergeICmps.cpp               |  728 +-
 lib/Transforms/Scalar/MergedLoadStoreMotion.cpp    |    7 +-
 lib/Transforms/Scalar/NaryReassociate.cpp          |   11 +-
 lib/Transforms/Scalar/NewGVN.cpp                   |   61 +-
 lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp  |    7 +-
 lib/Transforms/Scalar/PlaceSafepoints.cpp          |   42 +-
 lib/Transforms/Scalar/Reassociate.cpp              |  103 +-
 lib/Transforms/Scalar/Reg2Mem.cpp                  |    7 +-
 lib/Transforms/Scalar/RewriteStatepointsForGC.cpp  |  254 +-
 lib/Transforms/Scalar/SCCP.cpp                     |   76 +-
 lib/Transforms/Scalar/SROA.cpp                     |  215 +-
 lib/Transforms/Scalar/Scalar.cpp                   |   11 +-
 lib/Transforms/Scalar/Scalarizer.cpp               |   70 +-
 .../Scalar/SeparateConstOffsetFromGEP.cpp          |    7 +-
 lib/Transforms/Scalar/SimpleLoopUnswitch.cpp       |  130 +-
 lib/Transforms/Scalar/SimplifyCFGPass.cpp          |    7 +-
 lib/Transforms/Scalar/Sink.cpp                     |    7 +-
 lib/Transforms/Scalar/SpeculateAroundPHIs.cpp      |   15 +-
 lib/Transforms/Scalar/SpeculativeExecution.cpp     |    8 +-
 .../Scalar/StraightLineStrengthReduce.cpp          |   15 +-
 lib/Transforms/Scalar/StructurizeCFG.cpp           |   47 +-
 lib/Transforms/Scalar/TailRecursionElimination.cpp |   13 +-
 lib/Transforms/Scalar/WarnMissedTransforms.cpp     |    9 +-
 lib/Transforms/Utils/ASanStackFrameLayout.cpp      |    9 +-
 lib/Transforms/Utils/AddDiscriminators.cpp         |   11 +-
 lib/Transforms/Utils/BasicBlockUtils.cpp           |  141 +-
 lib/Transforms/Utils/BreakCriticalEdges.cpp        |   34 +-
 lib/Transforms/Utils/BuildLibCalls.cpp             |  339 +-
 lib/Transforms/Utils/BypassSlowDivision.cpp        |    7 +-
 lib/Transforms/Utils/CallPromotionUtils.cpp        |   21 +-
 lib/Transforms/Utils/CanonicalizeAliases.cpp       |    7 +-
 lib/Transforms/Utils/CloneFunction.cpp             |   37 +-
 lib/Transforms/Utils/CloneModule.cpp               |    7 +-
 lib/Transforms/Utils/CodeExtractor.cpp             |  496 +-
 lib/Transforms/Utils/CtorUtils.cpp                 |    7 +-
 lib/Transforms/Utils/DemoteRegToStack.cpp          |   16 +-
 lib/Transforms/Utils/EntryExitInstrumenter.cpp     |   11 +-
 lib/Transforms/Utils/EscapeEnumerator.cpp          |   13 +-
 lib/Transforms/Utils/Evaluator.cpp                 |  113 +-
 lib/Transforms/Utils/FlattenCFG.cpp                |    7 +-
 lib/Transforms/Utils/FunctionComparator.cpp        |   53 +-
 lib/Transforms/Utils/FunctionImportUtils.cpp       |   44 +-
 lib/Transforms/Utils/GlobalStatus.cpp              |    7 +-
 lib/Transforms/Utils/GuardUtils.cpp                |    7 +-
 .../Utils/ImportedFunctionsInliningStatistics.cpp  |    7 +-
 lib/Transforms/Utils/InlineFunction.cpp            |  172 +-
 lib/Transforms/Utils/InstructionNamer.cpp          |    7 +-
 lib/Transforms/Utils/IntegerDivision.cpp           |    7 +-
 lib/Transforms/Utils/LCSSA.cpp                     |   47 +-
 lib/Transforms/Utils/LibCallsShrinkWrap.cpp        |    7 +-
 lib/Transforms/Utils/Local.cpp                     |  387 +-
 lib/Transforms/Utils/LoopRotationUtils.cpp         |   26 +-
 lib/Transforms/Utils/LoopSimplify.cpp              |  126 +-
 lib/Transforms/Utils/LoopUnroll.cpp                |  424 +-
 lib/Transforms/Utils/LoopUnrollAndJam.cpp          |   17 +-
 lib/Transforms/Utils/LoopUnrollPeel.cpp            |  210 +-
 lib/Transforms/Utils/LoopUnrollRuntime.cpp         |   61 +-
 lib/Transforms/Utils/LoopUtils.cpp                 |  106 +-
 lib/Transforms/Utils/LoopVersioning.cpp            |   12 +-
 lib/Transforms/Utils/LowerInvoke.cpp               |   10 +-
 lib/Transforms/Utils/LowerMemIntrinsics.cpp        |   29 +-
 lib/Transforms/Utils/LowerSwitch.cpp               |  218 +-
 lib/Transforms/Utils/Mem2Reg.cpp                   |    7 +-
 lib/Transforms/Utils/MetaRenamer.cpp               |    7 +-
 lib/Transforms/Utils/ModuleUtils.cpp               |   88 +-
 lib/Transforms/Utils/NameAnonGlobals.cpp           |    7 +-
 lib/Transforms/Utils/PredicateInfo.cpp             |   18 +-
 lib/Transforms/Utils/PromoteMemoryToRegister.cpp   |   66 +-
 lib/Transforms/Utils/SSAUpdater.cpp                |   10 +-
 lib/Transforms/Utils/SSAUpdaterBulk.cpp            |    7 +-
 lib/Transforms/Utils/SanitizerStats.cpp            |   15 +-
 lib/Transforms/Utils/SimplifyCFG.cpp               |  203 +-
 lib/Transforms/Utils/SimplifyIndVar.cpp            |  200 +-
 lib/Transforms/Utils/SimplifyLibCalls.cpp          |  510 +-
 lib/Transforms/Utils/SizeOpts.cpp                  |   37 +
 lib/Transforms/Utils/SplitModule.cpp               |    7 +-
 lib/Transforms/Utils/StripGCRelocates.cpp          |    7 +-
 .../Utils/StripNonLineTableDebugInfo.cpp           |    7 +-
 lib/Transforms/Utils/SymbolRewriter.cpp            |    7 +-
 lib/Transforms/Utils/UnifyFunctionExitNodes.cpp    |    7 +-
 lib/Transforms/Utils/Utils.cpp                     |   10 +-
 lib/Transforms/Utils/VNCoercion.cpp                |   66 +-
 lib/Transforms/Utils/ValueMapper.cpp               |   22 +-
 lib/Transforms/Vectorize/LoadStoreVectorizer.cpp   |   21 +-
 .../Vectorize/LoopVectorizationLegality.cpp        |  347 +-
 .../Vectorize/LoopVectorizationPlanner.h           |   23 +-
 lib/Transforms/Vectorize/LoopVectorize.cpp         |  476 +-
 lib/Transforms/Vectorize/SLPVectorizer.cpp         | 1362 ++-
 lib/Transforms/Vectorize/VPRecipeBuilder.h         |   14 +-
 lib/Transforms/Vectorize/VPlan.cpp                 |   23 +-
 lib/Transforms/Vectorize/VPlan.h                   |   60 +-
 lib/Transforms/Vectorize/VPlanDominatorTree.h      |    7 +-
 lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp      |   11 +-
 lib/Transforms/Vectorize/VPlanHCFGBuilder.h        |    7 +-
 lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp   |    7 +-
 lib/Transforms/Vectorize/VPlanHCFGTransforms.h     |    7 +-
 lib/Transforms/Vectorize/VPlanLoopInfo.h           |    7 +-
 lib/Transforms/Vectorize/VPlanPredicator.cpp       |  248 +
 lib/Transforms/Vectorize/VPlanPredicator.h         |   74 +
 lib/Transforms/Vectorize/VPlanSLP.cpp              |    7 +-
 lib/Transforms/Vectorize/VPlanValue.h              |    7 +-
 lib/Transforms/Vectorize/VPlanVerifier.cpp         |    7 +-
 lib/Transforms/Vectorize/VPlanVerifier.h           |    7 +-
 lib/Transforms/Vectorize/Vectorize.cpp             |    7 +-
 lib/WindowsManifest/WindowsManifestMerger.cpp      |    7 +-
 lib/XRay/BlockIndexer.cpp                          |    7 +-
 lib/XRay/BlockPrinter.cpp                          |    7 +-
 lib/XRay/BlockVerifier.cpp                         |    7 +-
 lib/XRay/FDRRecordProducer.cpp                     |    7 +-
 lib/XRay/FDRRecords.cpp                            |    7 +-
 lib/XRay/FDRTraceExpander.cpp                      |    7 +-
 lib/XRay/FDRTraceWriter.cpp                        |    7 +-
 lib/XRay/FileHeaderReader.cpp                      |    7 +-
 lib/XRay/InstrumentationMap.cpp                    |   26 +-
 lib/XRay/LogBuilderConsumer.cpp                    |    7 +-
 lib/XRay/Profile.cpp                               |   18 +-
 lib/XRay/RecordInitializer.cpp                     |    7 +-
 lib/XRay/RecordPrinter.cpp                         |    7 +-
 lib/XRay/Trace.cpp                                 |   26 +-
 tools/bugpoint/BugDriver.cpp                       |    7 +-
 tools/bugpoint/BugDriver.h                         |    7 +-
 tools/bugpoint/CrashDebugger.cpp                   |    7 +-
 tools/bugpoint/ExecutionDriver.cpp                 |    7 +-
 tools/bugpoint/ExtractFunction.cpp                 |    7 +-
 tools/bugpoint/FindBugs.cpp                        |    7 +-
 tools/bugpoint/ListReducer.h                       |    7 +-
 tools/bugpoint/Miscompilation.cpp                  |   27 +-
 tools/bugpoint/OptimizerDriver.cpp                 |    7 +-
 tools/bugpoint/ToolRunner.cpp                      |    7 +-
 tools/bugpoint/ToolRunner.h                        |    7 +-
 tools/bugpoint/bugpoint.cpp                        |    7 +-
 tools/llc/llc.cpp                                  |   62 +-
 tools/lli/RemoteJITUtils.h                         |    9 +-
 tools/lli/lli.cpp                                  |   97 +-
 tools/llvm-ar/llvm-ar.cpp                          |  168 +-
 tools/llvm-as/llvm-as.cpp                          |   29 +-
 tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp          |  984 +-
 tools/llvm-cov/CodeCoverage.cpp                    |   20 +-
 tools/llvm-cov/CoverageExporter.h                  |    7 +-
 tools/llvm-cov/CoverageExporterJson.cpp            |   69 +-
 tools/llvm-cov/CoverageExporterJson.h              |    7 +-
 tools/llvm-cov/CoverageExporterLcov.cpp            |    9 +-
 tools/llvm-cov/CoverageExporterLcov.h              |    7 +-
 tools/llvm-cov/CoverageFilters.cpp                 |    7 +-
 tools/llvm-cov/CoverageFilters.h                   |    7 +-
 tools/llvm-cov/CoverageReport.cpp                  |    7 +-
 tools/llvm-cov/CoverageReport.h                    |    7 +-
 tools/llvm-cov/CoverageSummaryInfo.cpp             |    7 +-
 tools/llvm-cov/CoverageSummaryInfo.h               |    7 +-
 tools/llvm-cov/CoverageViewOptions.h               |    9 +-
 tools/llvm-cov/RenderingSupport.h                  |    7 +-
 tools/llvm-cov/SourceCoverageView.cpp              |   11 +-
 tools/llvm-cov/SourceCoverageView.h                |    7 +-
 tools/llvm-cov/SourceCoverageViewHTML.cpp          |    7 +-
 tools/llvm-cov/SourceCoverageViewHTML.h            |    7 +-
 tools/llvm-cov/SourceCoverageViewText.cpp          |    7 +-
 tools/llvm-cov/SourceCoverageViewText.h            |    7 +-
 tools/llvm-cov/TestingSupport.cpp                  |   20 +-
 tools/llvm-cov/gcov.cpp                            |   15 +-
 tools/llvm-cov/llvm-cov.cpp                        |    7 +-
 tools/llvm-cxxdump/Error.cpp                       |    7 +-
 tools/llvm-cxxdump/Error.h                         |    7 +-
 tools/llvm-cxxdump/llvm-cxxdump.cpp                |   24 +-
 tools/llvm-cxxdump/llvm-cxxdump.h                  |    7 +-
 tools/llvm-cxxfilt/llvm-cxxfilt.cpp                |   93 +-
 tools/llvm-cxxmap/llvm-cxxmap.cpp                  |    7 +-
 tools/llvm-diff/DiffConsumer.cpp                   |    9 +-
 tools/llvm-diff/DiffConsumer.h                     |    7 +-
 tools/llvm-diff/DiffLog.cpp                        |    7 +-
 tools/llvm-diff/DiffLog.h                          |    7 +-
 tools/llvm-diff/DifferenceEngine.cpp               |    9 +-
 tools/llvm-diff/DifferenceEngine.h                 |    7 +-
 tools/llvm-diff/llvm-diff.cpp                      |    7 +-
 tools/llvm-dis/llvm-dis.cpp                        |    7 +-
 tools/llvm-dwarfdump/Statistics.cpp                |  178 +-
 tools/llvm-dwarfdump/llvm-dwarfdump.cpp            |  111 +-
 tools/llvm-extract/llvm-extract.cpp                |  131 +-
 tools/llvm-link/llvm-link.cpp                      |    7 +-
 tools/llvm-lto/llvm-lto.cpp                        |  118 +-
 tools/llvm-lto2/llvm-lto2.cpp                      |   55 +-
 tools/llvm-mc/Disassembler.cpp                     |    7 +-
 tools/llvm-mc/Disassembler.h                       |    7 +-
 tools/llvm-mc/llvm-mc.cpp                          |   21 +-
 tools/llvm-mca/CodeRegion.cpp                      |  114 +-
 tools/llvm-mca/CodeRegion.h                        |   40 +-
 tools/llvm-mca/CodeRegionGenerator.cpp             |   19 +-
 tools/llvm-mca/CodeRegionGenerator.h               |    7 +-
 tools/llvm-mca/PipelinePrinter.cpp                 |    7 +-
 tools/llvm-mca/PipelinePrinter.h                   |    7 +-
 tools/llvm-mca/Views/BottleneckAnalysis.cpp        |  624 ++
 tools/llvm-mca/Views/BottleneckAnalysis.h          |  341 +
 tools/llvm-mca/Views/DispatchStatistics.cpp        |    7 +-
 tools/llvm-mca/Views/DispatchStatistics.h          |    7 +-
 tools/llvm-mca/Views/InstructionInfoView.cpp       |   10 +-
 tools/llvm-mca/Views/InstructionInfoView.h         |    7 +-
 tools/llvm-mca/Views/RegisterFileStatistics.cpp    |    7 +-
 tools/llvm-mca/Views/RegisterFileStatistics.h      |    7 +-
 tools/llvm-mca/Views/ResourcePressureView.cpp      |    7 +-
 tools/llvm-mca/Views/ResourcePressureView.h        |    7 +-
 .../llvm-mca/Views/RetireControlUnitStatistics.cpp |    7 +-
 tools/llvm-mca/Views/RetireControlUnitStatistics.h |    7 +-
 tools/llvm-mca/Views/SchedulerStatistics.cpp       |   37 +-
 tools/llvm-mca/Views/SchedulerStatistics.h         |   11 +-
 tools/llvm-mca/Views/SummaryView.cpp               |   25 +-
 tools/llvm-mca/Views/SummaryView.h                 |   13 +-
 tools/llvm-mca/Views/TimelineView.cpp              |    7 +-
 tools/llvm-mca/Views/TimelineView.h                |    7 +-
 tools/llvm-mca/Views/View.cpp                      |    7 +-
 tools/llvm-mca/Views/View.h                        |    7 +-
 tools/llvm-mca/llvm-mca.cpp                        |   56 +-
 tools/llvm-modextract/llvm-modextract.cpp          |    7 +-
 tools/llvm-nm/llvm-nm.cpp                          |  517 +-
 tools/llvm-objcopy/Buffer.cpp                      |   50 +-
 tools/llvm-objcopy/Buffer.h                        |   16 +-
 tools/llvm-objcopy/COFF/COFFObjcopy.cpp            |  158 +-
 tools/llvm-objcopy/COFF/COFFObjcopy.h              |   12 +-
 tools/llvm-objcopy/COFF/Object.cpp                 |   91 +-
 tools/llvm-objcopy/COFF/Object.h                   |   79 +-
 tools/llvm-objcopy/COFF/Reader.cpp                 |  112 +-
 tools/llvm-objcopy/COFF/Reader.h                   |    9 +-
 tools/llvm-objcopy/COFF/Writer.cpp                 |  167 +-
 tools/llvm-objcopy/COFF/Writer.h                   |   10 +-
 tools/llvm-objcopy/CopyConfig.cpp                  |  661 +-
 tools/llvm-objcopy/CopyConfig.h                    |  130 +-
 tools/llvm-objcopy/ELF/ELFObjcopy.cpp              |  684 +-
 tools/llvm-objcopy/ELF/ELFObjcopy.h                |   18 +-
 tools/llvm-objcopy/ELF/Object.cpp                  | 1198 ++-
 tools/llvm-objcopy/ELF/Object.h                    |  314 +-
 tools/llvm-objcopy/MachO/MachOObjcopy.cpp          |   68 +
 tools/llvm-objcopy/MachO/MachOObjcopy.h            |   31 +
 tools/llvm-objcopy/MachO/MachOReader.cpp           |  241 +
 tools/llvm-objcopy/MachO/MachOReader.h             |   48 +
 tools/llvm-objcopy/MachO/MachOWriter.cpp           |  590 ++
 tools/llvm-objcopy/MachO/MachOWriter.h             |   64 +
 tools/llvm-objcopy/MachO/Object.cpp                |   15 +
 tools/llvm-objcopy/MachO/Object.h                  |  232 +
 tools/llvm-objcopy/ObjcopyOpts.td                  |  166 +-
 tools/llvm-objcopy/StripOpts.td                    |   61 +-
 tools/llvm-objcopy/llvm-objcopy.cpp                |  237 +-
 tools/llvm-objcopy/llvm-objcopy.h                  |    8 +-
 tools/llvm-objdump/COFFDump.cpp                    |   82 +-
 tools/llvm-objdump/ELFDump.cpp                     |  252 +-
 tools/llvm-objdump/MachODump.cpp                   |  978 +-
 tools/llvm-objdump/WasmDump.cpp                    |   40 +-
 tools/llvm-objdump/llvm-objdump.cpp                | 2215 ++---
 tools/llvm-objdump/llvm-objdump.h                  |  153 +-
 tools/llvm-pdbutil/BytesOutputStyle.cpp            |   11 +-
 tools/llvm-pdbutil/BytesOutputStyle.h              |    7 +-
 tools/llvm-pdbutil/DumpOutputStyle.cpp             |  192 +-
 tools/llvm-pdbutil/DumpOutputStyle.h               |   16 +-
 tools/llvm-pdbutil/ExplainOutputStyle.cpp          |    7 +-
 tools/llvm-pdbutil/ExplainOutputStyle.h            |    7 +-
 tools/llvm-pdbutil/FormatUtil.cpp                  |    7 +-
 tools/llvm-pdbutil/FormatUtil.h                    |    7 +-
 tools/llvm-pdbutil/InputFile.cpp                   |   16 +-
 tools/llvm-pdbutil/InputFile.h                     |    7 +-
 tools/llvm-pdbutil/LinePrinter.cpp                 |   10 +-
 tools/llvm-pdbutil/LinePrinter.h                   |   10 +-
 tools/llvm-pdbutil/MinimalSymbolDumper.cpp         |  159 +-
 tools/llvm-pdbutil/MinimalSymbolDumper.h           |    7 +-
 tools/llvm-pdbutil/MinimalTypeDumper.cpp           |   29 +-
 tools/llvm-pdbutil/MinimalTypeDumper.h             |   14 +-
 tools/llvm-pdbutil/OutputStyle.h                   |    7 +-
 tools/llvm-pdbutil/PdbYaml.cpp                     |   10 +-
 tools/llvm-pdbutil/PdbYaml.h                       |    7 +-
 tools/llvm-pdbutil/PrettyBuiltinDumper.cpp         |    7 +-
 tools/llvm-pdbutil/PrettyBuiltinDumper.h           |    7 +-
 tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp |    7 +-
 tools/llvm-pdbutil/PrettyClassDefinitionDumper.h   |    7 +-
 .../PrettyClassLayoutGraphicalDumper.cpp           |    7 +-
 .../PrettyClassLayoutGraphicalDumper.h             |    7 +-
 tools/llvm-pdbutil/PrettyCompilandDumper.cpp       |    7 +-
 tools/llvm-pdbutil/PrettyCompilandDumper.h         |    7 +-
 tools/llvm-pdbutil/PrettyEnumDumper.cpp            |    7 +-
 tools/llvm-pdbutil/PrettyEnumDumper.h              |    7 +-
 tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp  |    7 +-
 tools/llvm-pdbutil/PrettyExternalSymbolDumper.h    |    7 +-
 tools/llvm-pdbutil/PrettyFunctionDumper.cpp        |   14 +-
 tools/llvm-pdbutil/PrettyFunctionDumper.h          |    7 +-
 tools/llvm-pdbutil/PrettyTypeDumper.cpp            |    7 +-
 tools/llvm-pdbutil/PrettyTypeDumper.h              |    7 +-
 tools/llvm-pdbutil/PrettyTypedefDumper.cpp         |    7 +-
 tools/llvm-pdbutil/PrettyTypedefDumper.h           |    7 +-
 tools/llvm-pdbutil/PrettyVariableDumper.cpp        |    7 +-
 tools/llvm-pdbutil/PrettyVariableDumper.h          |    7 +-
 tools/llvm-pdbutil/StreamUtil.cpp                  |    7 +-
 tools/llvm-pdbutil/StreamUtil.h                    |    7 +-
 tools/llvm-pdbutil/TypeReferenceTracker.cpp        |  160 +
 tools/llvm-pdbutil/TypeReferenceTracker.h          |   69 +
 tools/llvm-pdbutil/YAMLOutputStyle.cpp             |   12 +-
 tools/llvm-pdbutil/YAMLOutputStyle.h               |    7 +-
 tools/llvm-pdbutil/llvm-pdbutil.cpp                |   43 +-
 tools/llvm-pdbutil/llvm-pdbutil.h                  |    9 +-
 tools/llvm-profdata/llvm-profdata.cpp              |  153 +-
 tools/llvm-readobj/ARMEHABIPrinter.h               |   17 +-
 tools/llvm-readobj/ARMWinEHPrinter.cpp             |   21 +-
 tools/llvm-readobj/ARMWinEHPrinter.h               |    9 +-
 tools/llvm-readobj/COFFDumper.cpp                  |  187 +-
 tools/llvm-readobj/COFFImportDumper.cpp            |    9 +-
 tools/llvm-readobj/DwarfCFIEHPrinter.h             |    7 +-
 tools/llvm-readobj/ELFDumper.cpp                   | 2259 +++--
 tools/llvm-readobj/Error.cpp                       |    7 +-
 tools/llvm-readobj/Error.h                         |    7 +-
 tools/llvm-readobj/MachODumper.cpp                 |   43 +-
 tools/llvm-readobj/ObjDumper.cpp                   |  218 +-
 tools/llvm-readobj/ObjDumper.h                     |   56 +-
 tools/llvm-readobj/StackMapPrinter.h               |   19 +-
 tools/llvm-readobj/WasmDumper.cpp                  |   62 +-
 tools/llvm-readobj/Win64EHDumper.cpp               |    7 +-
 tools/llvm-readobj/Win64EHDumper.h                 |    7 +-
 tools/llvm-readobj/WindowsResourceDumper.cpp       |    7 +-
 tools/llvm-readobj/WindowsResourceDumper.h         |    7 +-
 tools/llvm-readobj/XCOFFDumper.cpp                 |  190 +
 tools/llvm-readobj/llvm-readobj.cpp                |  272 +-
 tools/llvm-readobj/llvm-readobj.h                  |   14 +-
 tools/llvm-rtdyld/llvm-rtdyld.cpp                  |  361 +-
 tools/llvm-stress/llvm-stress.cpp                  |    9 +-
 tools/llvm-symbolizer/llvm-symbolizer.cpp          |  161 +-
 tools/llvm-xray/func-id-helper.cpp                 |   21 +-
 tools/llvm-xray/func-id-helper.h                   |    7 +-
 tools/llvm-xray/llvm-xray.cpp                      |    7 +-
 tools/llvm-xray/trie-node.h                        |    7 +-
 tools/llvm-xray/xray-account.cpp                   |   11 +-
 tools/llvm-xray/xray-account.h                     |   19 +-
 tools/llvm-xray/xray-color-helper.cpp              |    7 +-
 tools/llvm-xray/xray-color-helper.h                |    7 +-
 tools/llvm-xray/xray-converter.cpp                 |  120 +-
 tools/llvm-xray/xray-converter.h                   |    7 +-
 tools/llvm-xray/xray-extract.cpp                   |   11 +-
 tools/llvm-xray/xray-fdr-dump.cpp                  |   18 +-
 tools/llvm-xray/xray-graph-diff.cpp                |    7 +-
 tools/llvm-xray/xray-graph-diff.h                  |    7 +-
 tools/llvm-xray/xray-graph.cpp                     |   11 +-
 tools/llvm-xray/xray-graph.h                       |   10 +-
 tools/llvm-xray/xray-registry.cpp                  |    7 +-
 tools/llvm-xray/xray-registry.h                    |    7 +-
 tools/llvm-xray/xray-stacks.cpp                    |   17 +-
 tools/opt/AnalysisWrappers.cpp                     |    7 +-
 tools/opt/BreakpointPrinter.cpp                    |   11 +-
 tools/opt/BreakpointPrinter.h                      |    7 +-
 tools/opt/Debugify.cpp                             |    7 +-
 tools/opt/Debugify.h                               |    7 +-
 tools/opt/GraphPrinters.cpp                        |    7 +-
 tools/opt/NewPMDriver.cpp                          |   58 +-
 tools/opt/NewPMDriver.h                            |   14 +-
 tools/opt/PassPrinters.cpp                         |    7 +-
 tools/opt/PassPrinters.h                           |    7 +-
 tools/opt/PrintSCC.cpp                             |    7 +-
 tools/opt/opt.cpp                                  |  151 +-
 utils/TableGen/AsmMatcherEmitter.cpp               |  246 +-
 utils/TableGen/AsmWriterEmitter.cpp                |   26 +-
 utils/TableGen/AsmWriterInst.cpp                   |   40 +-
 utils/TableGen/AsmWriterInst.h                     |    7 +-
 utils/TableGen/Attributes.cpp                      |    7 +-
 utils/TableGen/CTagsEmitter.cpp                    |    7 +-
 utils/TableGen/CallingConvEmitter.cpp              |   51 +-
 utils/TableGen/CodeEmitterGen.cpp                  |  106 +-
 utils/TableGen/CodeGenDAGPatterns.cpp              |  110 +-
 utils/TableGen/CodeGenDAGPatterns.h                |   25 +-
 utils/TableGen/CodeGenHwModes.cpp                  |    7 +-
 utils/TableGen/CodeGenHwModes.h                    |    7 +-
 utils/TableGen/CodeGenInstruction.cpp              |   79 +-
 utils/TableGen/CodeGenInstruction.h                |    8 +-
 utils/TableGen/CodeGenIntrinsics.h                 |   20 +-
 utils/TableGen/CodeGenMapTable.cpp                 |    7 +-
 utils/TableGen/CodeGenRegisters.cpp                |   15 +-
 utils/TableGen/CodeGenRegisters.h                  |    7 +-
 utils/TableGen/CodeGenSchedule.cpp                 |   52 +-
 utils/TableGen/CodeGenSchedule.h                   |    7 +-
 utils/TableGen/CodeGenTarget.cpp                   |  100 +-
 utils/TableGen/CodeGenTarget.h                     |    7 +-
 utils/TableGen/DAGISelEmitter.cpp                  |    7 +-
 utils/TableGen/DAGISelMatcher.cpp                  |   31 +-
 utils/TableGen/DAGISelMatcher.h                    |   66 +-
 utils/TableGen/DAGISelMatcherEmitter.cpp           |   27 +-
 utils/TableGen/DAGISelMatcherGen.cpp               |   60 +-
 utils/TableGen/DAGISelMatcherOpt.cpp               |   15 +-
 utils/TableGen/DFAPacketizerEmitter.cpp            |    7 +-
 utils/TableGen/DisassemblerEmitter.cpp             |    7 +-
 utils/TableGen/ExegesisEmitter.cpp                 |    7 +-
 utils/TableGen/FastISelEmitter.cpp                 |    7 +-
 utils/TableGen/FixedLenDecoderEmitter.cpp          |  147 +-
 utils/TableGen/GlobalISelEmitter.cpp               |  157 +-
 utils/TableGen/InfoByHwMode.cpp                    |   14 +-
 utils/TableGen/InfoByHwMode.h                      |   13 +-
 utils/TableGen/InstrDocsEmitter.cpp                |    7 +-
 utils/TableGen/InstrInfoEmitter.cpp                |   89 +-
 utils/TableGen/IntrinsicEmitter.cpp                |  120 +-
 utils/TableGen/OptParserEmitter.cpp                |    7 +-
 utils/TableGen/PredicateExpander.cpp               |    7 +-
 utils/TableGen/PredicateExpander.h                 |    7 +-
 utils/TableGen/PseudoLoweringEmitter.cpp           |    7 +-
 utils/TableGen/RISCVCompressInstEmitter.cpp        |   59 +-
 utils/TableGen/RegisterBankEmitter.cpp             |    7 +-
 utils/TableGen/RegisterInfoEmitter.cpp             |    7 +-
 utils/TableGen/SDNodeProperties.cpp                |   13 +-
 utils/TableGen/SDNodeProperties.h                  |    7 +-
 utils/TableGen/SearchableTableEmitter.cpp          |   35 +-
 utils/TableGen/SequenceToOffsetTable.h             |    7 +-
 utils/TableGen/SubtargetEmitter.cpp                |  140 +-
 utils/TableGen/SubtargetFeatureInfo.cpp            |   27 +-
 utils/TableGen/SubtargetFeatureInfo.h              |   16 +-
 utils/TableGen/TableGen.cpp                        |   18 +-
 utils/TableGen/TableGenBackends.h                  |    7 +-
 utils/TableGen/Types.cpp                           |    7 +-
 utils/TableGen/Types.h                             |    7 +-
 utils/TableGen/WebAssemblyDisassemblerEmitter.cpp  |   38 +-
 utils/TableGen/WebAssemblyDisassemblerEmitter.h    |    7 +-
 utils/TableGen/X86DisassemblerShared.h             |    7 +-
 utils/TableGen/X86DisassemblerTables.cpp           |   81 +-
 utils/TableGen/X86DisassemblerTables.h             |    7 +-
 utils/TableGen/X86EVEX2VEXTablesEmitter.cpp        |   64 +-
 utils/TableGen/X86FoldTablesEmitter.cpp            |   71 +-
 utils/TableGen/X86ModRMFilters.cpp                 |    7 +-
 utils/TableGen/X86ModRMFilters.h                   |    7 +-
 utils/TableGen/X86RecognizableInstr.cpp            |  128 +-
 utils/TableGen/X86RecognizableInstr.h              |   24 +-
 4643 files changed, 231107 insertions(+), 109687 deletions(-)
 delete mode 100644 include/llvm-c/OptRemarks.h
 create mode 100644 include/llvm-c/Remarks.h
 create mode 100644 include/llvm/ADT/fallible_iterator.h
 create mode 100644 include/llvm/Analysis/DomTreeUpdater.h
 create mode 100644 include/llvm/Analysis/VecFuncs.def
 create mode 100644 include/llvm/BinaryFormat/Minidump.h
 create mode 100644 include/llvm/BinaryFormat/MinidumpConstants.def
 create mode 100644 include/llvm/BinaryFormat/MsgPackDocument.h
 delete mode 100644 include/llvm/BinaryFormat/MsgPackTypes.h
 create mode 100644 include/llvm/BinaryFormat/XCOFF.h
 delete mode 100644 include/llvm/Bitcode/BitCodes.h
 create mode 100644 include/llvm/Bitcode/BitcodeAnalyzer.h
 delete mode 100644 include/llvm/Bitcode/BitstreamReader.h
 delete mode 100644 include/llvm/Bitcode/BitstreamWriter.h
 create mode 100644 include/llvm/Bitstream/BitCodes.h
 create mode 100644 include/llvm/Bitstream/BitstreamReader.h
 create mode 100644 include/llvm/Bitstream/BitstreamWriter.h
 create mode 100644 include/llvm/CodeGen/CSEConfigBase.h
 create mode 100644 include/llvm/CodeGen/MIRParser/MIParser.h
 create mode 100644 include/llvm/CodeGen/Register.h
 create mode 100644 include/llvm/CodeGen/SwiftErrorValueTracking.h
 create mode 100644 include/llvm/CodeGen/SwitchLoweringUtils.h
 create mode 100644 include/llvm/DebugInfo/GSYM/FileEntry.h
 create mode 100644 include/llvm/DebugInfo/GSYM/FunctionInfo.h
 create mode 100644 include/llvm/DebugInfo/GSYM/InlineInfo.h
 create mode 100644 include/llvm/DebugInfo/GSYM/LineEntry.h
 create mode 100644 include/llvm/DebugInfo/GSYM/Range.h
 create mode 100644 include/llvm/DebugInfo/GSYM/StringTable.h
 create mode 100644 include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h
 create mode 100644 include/llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h
 delete mode 100644 include/llvm/Demangle/Compiler.h
 create mode 100644 include/llvm/Demangle/DemangleConfig.h
 create mode 100644 include/llvm/Demangle/README.txt
 create mode 100644 include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h
 create mode 100644 include/llvm/ExecutionEngine/JITLink/JITLink.h
 create mode 100644 include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
 create mode 100644 include/llvm/ExecutionEngine/JITLink/MachO.h
 create mode 100644 include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h
 create mode 100644 include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
 create mode 100644 include/llvm/ExecutionEngine/OrcV1Deprecation.h
 delete mode 100644 include/llvm/IR/DomTreeUpdater.h
 create mode 100644 include/llvm/IR/RemarkStreamer.h
 create mode 100644 include/llvm/MC/MCAsmInfoXCOFF.h
 create mode 100644 include/llvm/MC/MCSectionXCOFF.h
 create mode 100644 include/llvm/MC/MCSymbolXCOFF.h
 create mode 100644 include/llvm/MC/MCXCOFFObjectWriter.h
 create mode 100644 include/llvm/MC/MCXCOFFStreamer.h
 create mode 100644 include/llvm/MCA/Stages/MicroOpQueueStage.h
 create mode 100644 include/llvm/Object/Minidump.h
 delete mode 100644 include/llvm/Object/RelocVisitor.h
 create mode 100644 include/llvm/Object/RelocationResolver.h
 create mode 100644 include/llvm/Object/WindowsMachineFlag.h
 create mode 100644 include/llvm/Object/XCOFFObjectFile.h
 create mode 100644 include/llvm/ObjectYAML/MinidumpYAML.h
 create mode 100644 include/llvm/ObjectYAML/XCOFFYAML.h
 create mode 100644 include/llvm/Remarks/Remark.h
 create mode 100644 include/llvm/Remarks/RemarkFormat.h
 create mode 100644 include/llvm/Remarks/RemarkParser.h
 create mode 100644 include/llvm/Remarks/RemarkSerializer.h
 create mode 100644 include/llvm/Remarks/RemarkStringTable.h
 create mode 100644 include/llvm/Support/CRC.h
 create mode 100644 include/llvm/Support/GenericIteratedDominanceFrontier.h
 create mode 100644 include/llvm/Support/SMTAPI.h
 create mode 100644 include/llvm/Support/ScalableSize.h
 create mode 100644 include/llvm/Support/Signposts.h
 create mode 100644 include/llvm/Support/TimeProfiler.h
 create mode 100644 include/llvm/Testing/Support/Annotations.h
 create mode 100644 include/llvm/TextAPI/MachO/Architecture.def
 create mode 100644 include/llvm/TextAPI/MachO/Architecture.h
 create mode 100644 include/llvm/TextAPI/MachO/ArchitectureSet.h
 create mode 100644 include/llvm/TextAPI/MachO/InterfaceFile.h
 create mode 100644 include/llvm/TextAPI/MachO/PackedVersion.h
 create mode 100644 include/llvm/TextAPI/MachO/Symbol.h
 create mode 100644 include/llvm/TextAPI/MachO/TextAPIReader.h
 create mode 100644 include/llvm/TextAPI/MachO/TextAPIWriter.h
 create mode 100644 include/llvm/Transforms/IPO/Attributor.h
 create mode 100644 include/llvm/Transforms/Instrumentation/AddressSanitizer.h
 create mode 100644 include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
 create mode 100644 include/llvm/Transforms/Instrumentation/InstrOrderFile.h
 create mode 100644 include/llvm/Transforms/Instrumentation/PoisonChecking.h
 create mode 100644 include/llvm/Transforms/Scalar/LoopFuse.h
 create mode 100644 include/llvm/Transforms/Scalar/LowerWidenableCondition.h
 create mode 100644 include/llvm/Transforms/Scalar/MergeICmps.h
 create mode 100644 include/llvm/Transforms/Utils/SizeOpts.h
 create mode 100644 lib/Analysis/DomTreeUpdater.cpp
 delete mode 100644 lib/Analysis/IteratedDominanceFrontier.cpp
 create mode 100644 lib/BinaryFormat/Minidump.cpp
 create mode 100644 lib/BinaryFormat/MsgPackDocument.cpp
 create mode 100644 lib/BinaryFormat/MsgPackDocumentYAML.cpp
 delete mode 100644 lib/BinaryFormat/MsgPackTypes.cpp
 create mode 100644 lib/Bitcode/Reader/BitcodeAnalyzer.cpp
 delete mode 100644 lib/Bitcode/Reader/BitstreamReader.cpp
 create mode 100644 lib/Bitstream/Reader/BitstreamReader.cpp
 delete mode 100644 lib/CodeGen/ExpandISelPseudos.cpp
 create mode 100644 lib/CodeGen/FinalizeISel.cpp
 create mode 100644 lib/CodeGen/HardwareLoops.cpp
 delete mode 100644 lib/CodeGen/MIRParser/MIParser.h
 create mode 100644 lib/CodeGen/SwiftErrorValueTracking.cpp
 create mode 100644 lib/CodeGen/SwitchLoweringUtils.cpp
 create mode 100644 lib/DebugInfo/GSYM/FunctionInfo.cpp
 create mode 100644 lib/DebugInfo/GSYM/InlineInfo.cpp
 create mode 100644 lib/DebugInfo/GSYM/Range.cpp
 create mode 100644 lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp
 create mode 100644 lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
 create mode 100644 lib/Demangle/Demangle.cpp
 create mode 100644 lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h
 create mode 100644 lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
 create mode 100644 lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h
 create mode 100644 lib/ExecutionEngine/JITLink/JITLink.cpp
 create mode 100644 lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
 create mode 100644 lib/ExecutionEngine/JITLink/JITLinkGeneric.h
 create mode 100644 lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
 create mode 100644 lib/ExecutionEngine/JITLink/MachO.cpp
 create mode 100644 lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.cpp
 create mode 100644 lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.h
 create mode 100644 lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
 create mode 100644 lib/ExecutionEngine/Orc/CompileUtils.cpp
 create mode 100644 lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
 create mode 100644 lib/IR/AbstractCallSite.cpp
 delete mode 100644 lib/IR/DomTreeUpdater.cpp
 create mode 100644 lib/IR/RemarkStreamer.cpp
 create mode 100644 lib/MC/MCAsmInfoXCOFF.cpp
 create mode 100644 lib/MC/MCSectionXCOFF.cpp
 create mode 100644 lib/MC/MCXCOFFObjectTargetWriter.cpp
 create mode 100644 lib/MC/MCXCOFFStreamer.cpp
 create mode 100644 lib/MC/XCOFFObjectWriter.cpp
 create mode 100644 lib/MCA/Stages/MicroOpQueueStage.cpp
 create mode 100644 lib/Object/Minidump.cpp
 create mode 100644 lib/Object/RelocationResolver.cpp
 create mode 100644 lib/Object/WindowsMachineFlag.cpp
 create mode 100644 lib/Object/XCOFFObjectFile.cpp
 create mode 100644 lib/ObjectYAML/MinidumpYAML.cpp
 create mode 100644 lib/ObjectYAML/XCOFFYAML.cpp
 delete mode 100644 lib/OptRemarks/OptRemarksParser.cpp
 create mode 100644 lib/Remarks/Remark.cpp
 create mode 100644 lib/Remarks/RemarkFormat.cpp
 create mode 100644 lib/Remarks/RemarkParser.cpp
 create mode 100644 lib/Remarks/RemarkStringTable.cpp
 create mode 100644 lib/Remarks/YAMLRemarkParser.cpp
 create mode 100644 lib/Remarks/YAMLRemarkParser.h
 create mode 100644 lib/Remarks/YAMLRemarkSerializer.cpp
 create mode 100644 lib/Support/CRC.cpp
 create mode 100644 lib/Support/Optional.cpp
 create mode 100644 lib/Support/Signposts.cpp
 create mode 100644 lib/Support/TimeProfiler.cpp
 create mode 100644 lib/Support/Z3Solver.cpp
 create mode 100644 lib/Target/AArch64/AArch64CallingConvention.cpp
 create mode 100644 lib/Target/AArch64/AArch64ExpandImm.cpp
 create mode 100644 lib/Target/AArch64/AArch64ExpandImm.h
 create mode 100644 lib/Target/AArch64/AArch64StackTagging.cpp
 delete mode 100644 lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
 delete mode 100644 lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
 create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
 create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
 create mode 100644 lib/Target/AArch64/TargetInfo/AArch64TargetInfo.h
 delete mode 100644 lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
 delete mode 100644 lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h
 create mode 100644 lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
 delete mode 100644 lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp
 create mode 100644 lib/Target/AMDGPU/GCNNSAReassign.cpp
 create mode 100644 lib/Target/AMDGPU/GCNRegBankReassign.cpp
 delete mode 100644 lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
 delete mode 100644 lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
 create mode 100644 lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
 create mode 100644 lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
 delete mode 100644 lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
 delete mode 100644 lib/Target/AMDGPU/SIFixWWMLiveness.cpp
 delete mode 100644 lib/Target/AMDGPU/SIIntrinsics.td
 create mode 100644 lib/Target/AMDGPU/SILowerSGPRSpills.cpp
 create mode 100644 lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
 create mode 100644 lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.h
 create mode 100644 lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
 create mode 100644 lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
 create mode 100644 lib/Target/ARC/ARCOptAddrMode.cpp
 delete mode 100644 lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp
 delete mode 100644 lib/Target/ARC/InstPrinter/ARCInstPrinter.h
 create mode 100644 lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp
 create mode 100644 lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h
 create mode 100644 lib/Target/ARC/TargetInfo/ARCTargetInfo.h
 create mode 100644 lib/Target/ARM/ARMBasicBlockInfo.cpp
 create mode 100644 lib/Target/ARM/ARMCallingConv.cpp
 delete mode 100644 lib/Target/ARM/ARMComputeBlockSize.cpp
 create mode 100644 lib/Target/ARM/ARMInstrMVE.td
 create mode 100644 lib/Target/ARM/ARMLowOverheadLoops.cpp
 create mode 100644 lib/Target/ARM/ARMPredicates.td
 delete mode 100644 lib/Target/ARM/ARMScheduleM3.td
 create mode 100644 lib/Target/ARM/ARMScheduleM4.td
 delete mode 100644 lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
 delete mode 100644 lib/Target/ARM/InstPrinter/ARMInstPrinter.h
 delete mode 100755 lib/Target/ARM/LICENSE.TXT
 create mode 100644 lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
 create mode 100644 lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
 create mode 100644 lib/Target/ARM/TargetInfo/ARMTargetInfo.h
 delete mode 100644 lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp
 delete mode 100644 lib/Target/AVR/InstPrinter/AVRInstPrinter.h
 create mode 100644 lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
 create mode 100644 lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
 create mode 100644 lib/Target/AVR/TargetInfo/AVRTargetInfo.h
 create mode 100644 lib/Target/BPF/BPFAbstractMemberAccess.cpp
 create mode 100644 lib/Target/BPF/BPFCORE.h
 create mode 100644 lib/Target/BPF/BPFMISimplifyPatchable.cpp
 delete mode 100644 lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
 delete mode 100644 lib/Target/BPF/InstPrinter/BPFInstPrinter.h
 create mode 100644 lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
 create mode 100644 lib/Target/BPF/MCTargetDesc/BPFInstPrinter.h
 create mode 100644 lib/Target/BPF/TargetInfo/BPFTargetInfo.h
 delete mode 100644 lib/Target/Hexagon/HexagonDepDecoders.h
 create mode 100644 lib/Target/Hexagon/HexagonDepDecoders.inc
 create mode 100644 lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.h
 delete mode 100644 lib/Target/Lanai/InstPrinter/LanaiInstPrinter.cpp
 delete mode 100644 lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h
 create mode 100644 lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp
 create mode 100644 lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h
 create mode 100644 lib/Target/Lanai/TargetInfo/LanaiTargetInfo.h
 delete mode 100644 lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
 delete mode 100644 lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
 create mode 100644 lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp
 create mode 100644 lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h
 create mode 100644 lib/Target/MSP430/TargetInfo/MSP430TargetInfo.h
 delete mode 100644 lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
 delete mode 100644 lib/Target/Mips/InstPrinter/MipsInstPrinter.h
 create mode 100644 lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
 create mode 100644 lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h
 create mode 100644 lib/Target/Mips/TargetInfo/MipsTargetInfo.h
 delete mode 100644 lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
 delete mode 100644 lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
 create mode 100644 lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
 create mode 100644 lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
 create mode 100644 lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.h
 delete mode 100644 lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
 delete mode 100644 lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
 create mode 100644 lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
 create mode 100644 lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
 create mode 100644 lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
 create mode 100644 lib/Target/PowerPC/PPCCallingConv.cpp
 create mode 100644 lib/Target/PowerPC/PPCMachineScheduler.cpp
 create mode 100644 lib/Target/PowerPC/PPCMachineScheduler.h
 create mode 100644 lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h
 delete mode 100644 lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp
 delete mode 100644 lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h
 create mode 100644 lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
 create mode 100644 lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
 create mode 100644 lib/Target/RISCV/RISCVTargetTransformInfo.cpp
 create mode 100644 lib/Target/RISCV/RISCVTargetTransformInfo.h
 create mode 100644 lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h
 delete mode 100644 lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
 delete mode 100644 lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
 create mode 100644 lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
 create mode 100644 lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h
 create mode 100644 lib/Target/Sparc/TargetInfo/SparcTargetInfo.h
 delete mode 100644 lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
 delete mode 100644 lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
 create mode 100644 lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
 create mode 100644 lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
 create mode 100644 lib/Target/SystemZ/SystemZPostRewrite.cpp
 create mode 100644 lib/Target/SystemZ/SystemZScheduleArch13.td
 create mode 100644 lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.h
 delete mode 100644 lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
 delete mode 100644 lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
 create mode 100644 lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
 create mode 100644 lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
 create mode 100644 lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h
 delete mode 100644 lib/Target/WebAssembly/WebAssemblyEHRestoreStackPointer.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td
 delete mode 100644 lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td
 create mode 100644 lib/Target/WebAssembly/WebAssemblyInstrRef.td
 delete mode 100644 lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
 delete mode 100644 lib/Target/X86/AsmParser/X86AsmInstrumentation.h
 delete mode 100644 lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
 delete mode 100644 lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
 delete mode 100644 lib/Target/X86/InstPrinter/X86InstComments.cpp
 delete mode 100644 lib/Target/X86/InstPrinter/X86InstComments.h
 delete mode 100644 lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp
 delete mode 100644 lib/Target/X86/InstPrinter/X86InstPrinterCommon.h
 delete mode 100644 lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
 delete mode 100644 lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
 create mode 100644 lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
 create mode 100644 lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h
 create mode 100644 lib/Target/X86/MCTargetDesc/X86InstComments.cpp
 create mode 100644 lib/Target/X86/MCTargetDesc/X86InstComments.h
 create mode 100644 lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
 create mode 100644 lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
 create mode 100644 lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
 create mode 100644 lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h
 delete mode 100644 lib/Target/X86/ShadowCallStack.cpp
 create mode 100644 lib/Target/X86/TargetInfo/X86TargetInfo.h
 delete mode 100644 lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
 delete mode 100644 lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
 create mode 100644 lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp
 create mode 100644 lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h
 create mode 100644 lib/Target/XCore/TargetInfo/XCoreTargetInfo.h
 create mode 100644 lib/Testing/Support/Annotations.cpp
 create mode 100644 lib/TextAPI/MachO/Architecture.cpp
 create mode 100644 lib/TextAPI/MachO/ArchitectureSet.cpp
 create mode 100644 lib/TextAPI/MachO/InterfaceFile.cpp
 create mode 100644 lib/TextAPI/MachO/PackedVersion.cpp
 create mode 100644 lib/TextAPI/MachO/Symbol.cpp
 create mode 100644 lib/TextAPI/MachO/TextAPIContext.h
 create mode 100644 lib/TextAPI/MachO/TextStub.cpp
 create mode 100644 lib/TextAPI/MachO/TextStubCommon.cpp
 create mode 100644 lib/TextAPI/MachO/TextStubCommon.h
 create mode 100644 lib/Transforms/IPO/Attributor.cpp
 create mode 100644 lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
 delete mode 100644 lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
 create mode 100644 lib/Transforms/Instrumentation/InstrOrderFile.cpp
 create mode 100644 lib/Transforms/Instrumentation/PoisonChecking.cpp
 create mode 100644 lib/Transforms/Scalar/LoopFuse.cpp
 create mode 100644 lib/Transforms/Scalar/LowerWidenableCondition.cpp
 create mode 100644 lib/Transforms/Utils/SizeOpts.cpp
 create mode 100644 lib/Transforms/Vectorize/VPlanPredicator.cpp
 create mode 100644 lib/Transforms/Vectorize/VPlanPredicator.h
 create mode 100644 tools/llvm-mca/Views/BottleneckAnalysis.cpp
 create mode 100644 tools/llvm-mca/Views/BottleneckAnalysis.h
 create mode 100644 tools/llvm-objcopy/MachO/MachOObjcopy.cpp
 create mode 100644 tools/llvm-objcopy/MachO/MachOObjcopy.h
 create mode 100644 tools/llvm-objcopy/MachO/MachOReader.cpp
 create mode 100644 tools/llvm-objcopy/MachO/MachOReader.h
 create mode 100644 tools/llvm-objcopy/MachO/MachOWriter.cpp
 create mode 100644 tools/llvm-objcopy/MachO/MachOWriter.h
 create mode 100644 tools/llvm-objcopy/MachO/Object.cpp
 create mode 100644 tools/llvm-objcopy/MachO/Object.h
 create mode 100644 tools/llvm-pdbutil/TypeReferenceTracker.cpp
 create mode 100644 tools/llvm-pdbutil/TypeReferenceTracker.h
 create mode 100644 tools/llvm-readobj/XCOFFDumper.cpp

diff --git a/LICENSE.TXT b/LICENSE.TXT
index e4d67d16fea1..fa6ac5400070 100644
--- a/LICENSE.TXT
+++ b/LICENSE.TXT
@@ -1,5 +1,240 @@
 ==============================================================================
-LLVM Release License
+The LLVM Project is under the Apache License v2.0 with LLVM Exceptions:
+==============================================================================
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+    1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+    2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+    3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+    4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+    5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+    6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+    7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+    8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+    9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+    END OF TERMS AND CONDITIONS
+
+    APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+    Copyright [yyyy] [name of copyright owner]
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+
+---- LLVM Exceptions to the Apache 2.0 License ----
+
+As an exception, if, as a result of your compiling your source code, portions
+of this Software are embedded into an Object form of such source code, you
+may redistribute such embedded portions in such Object form without complying
+with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
+
+In addition, if you combine or link compiled forms of this Software with
+software that is licensed under the GPLv2 ("Combined Software") and if a
+court of competent jurisdiction determines that the patent provision (Section
+3), the indemnity provision (Section 9) or other Section of the License
+conflicts with the conditions of the GPLv2, you may retroactively and
+prospectively choose to deem waived or otherwise exclude such Section(s) of
+the License, but only in their entirety and only with respect to the Combined
+Software.
+
+==============================================================================
+Software from third parties included in the LLVM Project:
+==============================================================================
+The LLVM Project contains third party software which is under different license
+terms. All such code will be identified clearly using at least one of two
+mechanisms:
+1) It will be in a separate directory tree with its own `LICENSE.txt` or
+   `LICENSE` file at the top containing the specific license and restrictions
+   which apply to that software, or
+2) It will contain specific license and restriction terms at the top of every
+   file.
+
+==============================================================================
+Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy):
 ==============================================================================
 University of Illinois/NCSA
 Open Source License
@@ -42,27 +277,3 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
 SOFTWARE.
 
-==============================================================================
-Copyrights and Licenses for Third Party Software Distributed with LLVM:
-==============================================================================
-The LLVM software contains code written by third parties.  Such software will
-have its own individual LICENSE.TXT file in the directory in which it appears.
-This file will describe the copyrights, license, and restrictions which apply
-to that code.
-
-The disclaimer of warranty in the University of Illinois Open Source License
-applies to all code in the LLVM Distribution, and nothing in any of the
-other licenses gives permission to use the names of the LLVM Team or the
-University of Illinois to endorse or promote products derived from this
-Software.
-
-The following pieces of software have additional or alternate copyrights,
-licenses, and/or restrictions:
-
-Program             Directory
--------             ---------
-Google Test         llvm/utils/unittest/googletest
-OpenBSD regex       llvm/lib/Support/{reg*, COPYRIGHT.regex}
-pyyaml tests        llvm/test/YAMLParser/{*.data, LICENSE.TXT}
-ARM contributions   llvm/lib/Target/ARM/LICENSE.TXT
-md5 contributions   llvm/lib/Support/MD5.cpp llvm/include/llvm/Support/MD5.h
diff --git a/include/llvm-c/Analysis.h b/include/llvm-c/Analysis.h
index 36dcb89e0e08..cb9e8ece3c53 100644
--- a/include/llvm-c/Analysis.h
+++ b/include/llvm-c/Analysis.h
@@ -1,9 +1,9 @@
 /*===-- llvm-c/Analysis.h - Analysis Library C Interface --------*- C++ -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/BitReader.h b/include/llvm-c/BitReader.h
index d1fc302767ba..b307ee979f8a 100644
--- a/include/llvm-c/BitReader.h
+++ b/include/llvm-c/BitReader.h
@@ -1,9 +1,9 @@
 /*===-- llvm-c/BitReader.h - BitReader Library C Interface ------*- C++ -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/BitWriter.h b/include/llvm-c/BitWriter.h
index 797d03179ab3..187051555b9a 100644
--- a/include/llvm-c/BitWriter.h
+++ b/include/llvm-c/BitWriter.h
@@ -1,9 +1,9 @@
 /*===-- llvm-c/BitWriter.h - BitWriter Library C Interface ------*- C++ -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/Comdat.h b/include/llvm-c/Comdat.h
index 499996d68a53..81fee3fc9a6b 100644
--- a/include/llvm-c/Comdat.h
+++ b/include/llvm-c/Comdat.h
@@ -1,9 +1,9 @@
 /*===-- llvm-c/Comdat.h - Module Comdat C Interface -------------*- C++ -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 06de058bdc58..cac2f297056d 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -1,9 +1,9 @@
 /*===-- llvm-c/Core.h - Core Library C Interface ------------------*- C -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
@@ -65,6 +65,7 @@ typedef enum {
   LLVMInvoke         = 5,
   /* removed 6 due to API changes */
   LLVMUnreachable    = 7,
+  LLVMCallBr         = 67,
 
   /* Standard Unary Operators */
   LLVMFNeg           = 66,
@@ -2401,6 +2402,13 @@ LLVMValueRef LLVMGetPersonalityFn(LLVMValueRef Fn);
  */
 void LLVMSetPersonalityFn(LLVMValueRef Fn, LLVMValueRef PersonalityFn);
 
+/**
+ * Obtain the intrinsic ID number which matches the given function name.
+ *
+ * @see llvm::Function::lookupIntrinsicID()
+ */
+unsigned LLVMLookupIntrinsicID(const char *Name, size_t NameLen);
+
 /**
  * Obtain the ID number from a function instance.
  *
@@ -2612,52 +2620,138 @@ void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned Align);
  */
 
 /**
- * @}
+ * @defgroup LLVMCCoreValueGlobalIFunc IFuncs
+ *
+ * Functions in this group relate to indirect functions.
+ *
+ * Functions in this group expect LLVMValueRef instances that correspond
+ * to llvm::GlobalIFunc instances.
+ *
+ * @{
  */
 
 /**
- * @}
+ * Add a global indirect function to a module under a specified name.
+ *
+ * @see llvm::GlobalIFunc::create()
  */
+LLVMValueRef LLVMAddGlobalIFunc(LLVMModuleRef M,
+                                const char *Name, size_t NameLen,
+                                LLVMTypeRef Ty, unsigned AddrSpace,
+                                LLVMValueRef Resolver);
 
 /**
- * @}
+ * Obtain a GlobalIFunc value from a Module by its name.
+ *
+ * The returned value corresponds to a llvm::GlobalIFunc value.
+ *
+ * @see llvm::Module::getNamedIFunc()
  */
+LLVMValueRef LLVMGetNamedGlobalIFunc(LLVMModuleRef M,
+                                     const char *Name, size_t NameLen);
 
 /**
- * @defgroup LLVMCCoreValueMetadata Metadata
+ * Obtain an iterator to the first GlobalIFunc in a Module.
  *
- * @{
+ * @see llvm::Module::ifunc_begin()
  */
+LLVMValueRef LLVMGetFirstGlobalIFunc(LLVMModuleRef M);
 
 /**
- * Obtain a MDString value from a context.
+ * Obtain an iterator to the last GlobalIFunc in a Module.
  *
- * The returned instance corresponds to the llvm::MDString class.
+ * @see llvm::Module::ifunc_end()
+ */
+LLVMValueRef LLVMGetLastGlobalIFunc(LLVMModuleRef M);
+
+/**
+ * Advance a GlobalIFunc iterator to the next GlobalIFunc.
  *
- * The instance is specified by string data of a specified length. The
- * string content is copied, so the backing memory can be freed after
- * this function returns.
+ * Returns NULL if the iterator was already at the end and there are no more
+ * global aliases.
  */
-LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
-                                   unsigned SLen);
+LLVMValueRef LLVMGetNextGlobalIFunc(LLVMValueRef IFunc);
 
 /**
- * Obtain a MDString value from the global context.
+ * Decrement a GlobalIFunc iterator to the previous GlobalIFunc.
+ *
+ * Returns NULL if the iterator was already at the beginning and there are
+ * no previous global aliases.
  */
-LLVMValueRef LLVMMDString(const char *Str, unsigned SLen);
+LLVMValueRef LLVMGetPreviousGlobalIFunc(LLVMValueRef IFunc);
+  
+/**
+ * Retrieves the resolver function associated with this indirect function, or
+ * NULL if it doesn't not exist.
+ *
+ * @see llvm::GlobalIFunc::getResolver()
+ */
+LLVMValueRef LLVMGetGlobalIFuncResolver(LLVMValueRef IFunc);
 
 /**
- * Obtain a MDNode value from a context.
+ * Sets the resolver function associated with this indirect function.
  *
- * The returned value corresponds to the llvm::MDNode class.
+ * @see llvm::GlobalIFunc::setResolver()
  */
-LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
-                                 unsigned Count);
+void LLVMSetGlobalIFuncResolver(LLVMValueRef IFunc, LLVMValueRef Resolver);
 
 /**
- * Obtain a MDNode value from the global context.
+ * Remove a global indirect function from its parent module and delete it.
+ *
+ * @see llvm::GlobalIFunc::eraseFromParent()
  */
-LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count);
+void LLVMEraseGlobalIFunc(LLVMValueRef IFunc);
+
+/**
+ * Remove a global indirect function from its parent module.
+ *
+ * This unlinks the global indirect function from its containing module but
+ * keeps it alive.
+ *
+ * @see llvm::GlobalIFunc::removeFromParent()
+ */
+void LLVMRemoveGlobalIFunc(LLVMValueRef IFunc);
+
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueMetadata Metadata
+ *
+ * @{
+ */
+
+/**
+ * Create an MDString value from a given string value.
+ *
+ * The MDString value does not take ownership of the given string, it remains
+ * the responsibility of the caller to free it.
+ *
+ * @see llvm::MDString::get()
+ */
+LLVMMetadataRef LLVMMDStringInContext2(LLVMContextRef C, const char *Str,
+                                       size_t SLen);
+
+/**
+ * Create an MDNode value with the given array of operands.
+ *
+ * @see llvm::MDNode::get()
+ */
+LLVMMetadataRef LLVMMDNodeInContext2(LLVMContextRef C, LLVMMetadataRef *MDs,
+                                     size_t Count);
 
 /**
  * Obtain a Metadata as a Value.
@@ -2699,6 +2793,17 @@ unsigned LLVMGetMDNodeNumOperands(LLVMValueRef V);
  */
 void LLVMGetMDNodeOperands(LLVMValueRef V, LLVMValueRef *Dest);
 
+/** Deprecated: Use LLVMMDStringInContext2 instead. */
+LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
+                                   unsigned SLen);
+/** Deprecated: Use LLVMMDStringInContext2 instead. */
+LLVMValueRef LLVMMDString(const char *Str, unsigned SLen);
+/** Deprecated: Use LLVMMDNodeInContext2 instead. */
+LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
+                                 unsigned Count);
+/** Deprecated: Use LLVMMDNodeInContext2 instead. */
+LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count);
+
 /**
  * @}
  */
@@ -2811,6 +2916,24 @@ LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB);
  */
 LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn);
 
+/**
+ * Insert the given basic block after the insertion point of the given builder.
+ *
+ * The insertion point must be valid.
+ *
+ * @see llvm::Function::BasicBlockListType::insertAfter()
+ */
+void LLVMInsertExistingBasicBlockAfterInsertBlock(LLVMBuilderRef Builder,
+                                                  LLVMBasicBlockRef BB);
+
+/**
+ * Append the given basic block to the basic block list of the given function.
+ *
+ * @see llvm::Function::BasicBlockListType::push_back()
+ */
+void LLVMAppendExistingBasicBlock(LLVMValueRef Fn,
+                                  LLVMBasicBlockRef BB);
+  
 /**
  * Create a new basic block without inserting it into a function.
  *
@@ -3387,9 +3510,59 @@ void LLVMInsertIntoBuilderWithName(LLVMBuilderRef Builder, LLVMValueRef Instr,
 void LLVMDisposeBuilder(LLVMBuilderRef Builder);
 
 /* Metadata */
+
+/**
+ * Get location information used by debugging information.
+ *
+ * @see llvm::IRBuilder::getCurrentDebugLocation()
+ */
+LLVMMetadataRef LLVMGetCurrentDebugLocation2(LLVMBuilderRef Builder);
+
+/**
+ * Set location information used by debugging information.
+ *
+ * To clear the location metadata of the given instruction, pass NULL to \p Loc.
+ *
+ * @see llvm::IRBuilder::SetCurrentDebugLocation()
+ */
+void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Builder, LLVMMetadataRef Loc);
+
+/**
+ * Attempts to set the debug location for the given instruction using the
+ * current debug location for the given builder.  If the builder has no current
+ * debug location, this function is a no-op.
+ *
+ * @see llvm::IRBuilder::SetInstDebugLocation()
+ */
+void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst);
+
+/**
+ * Get the dafult floating-point math metadata for a given builder.
+ *
+ * @see llvm::IRBuilder::getDefaultFPMathTag()
+ */
+LLVMMetadataRef LLVMBuilderGetDefaultFPMathTag(LLVMBuilderRef Builder);
+
+/**
+ * Set the default floating-point math metadata for the given builder.
+ *
+ * To clear the metadata, pass NULL to \p FPMathTag.
+ *
+ * @see llvm::IRBuilder::setDefaultFPMathTag()
+ */
+void LLVMBuilderSetDefaultFPMathTag(LLVMBuilderRef Builder,
+                                    LLVMMetadataRef FPMathTag);
+
+/**
+ * Deprecated: Passing the NULL location will crash.
+ * Use LLVMGetCurrentDebugLocation2 instead.
+ */
 void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L);
+/**
+ * Deprecated: Returning the NULL location will crash.
+ * Use LLVMGetCurrentDebugLocation2 instead.
+ */
 LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder);
-void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst);
 
 /* Terminators */
 LLVMValueRef LLVMBuildRetVoid(LLVMBuilderRef);
diff --git a/include/llvm-c/DataTypes.h b/include/llvm-c/DataTypes.h
index 7081c83ffc2b..893b22b49ffc 100644
--- a/include/llvm-c/DataTypes.h
+++ b/include/llvm-c/DataTypes.h
@@ -1,9 +1,9 @@
 /*===-- include/llvm-c/DataTypes.h - Define fixed size types ------*- C -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/DebugInfo.h b/include/llvm-c/DebugInfo.h
index 87a72034b0e8..33c8110a863c 100644
--- a/include/llvm-c/DebugInfo.h
+++ b/include/llvm-c/DebugInfo.h
@@ -1,9 +1,8 @@
 //===------------ DebugInfo.h - LLVM C API Debug Info API -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -51,13 +50,12 @@ typedef enum {
   LLVMDIFlagIntroducedVirtual = 1 << 18,
   LLVMDIFlagBitField = 1 << 19,
   LLVMDIFlagNoReturn = 1 << 20,
-  LLVMDIFlagMainSubprogram = 1 << 21,
   LLVMDIFlagTypePassByValue = 1 << 22,
   LLVMDIFlagTypePassByReference = 1 << 23,
   LLVMDIFlagEnumClass = 1 << 24,
   LLVMDIFlagFixedEnum = LLVMDIFlagEnumClass, // Deprecated.
   LLVMDIFlagThunk = 1 << 25,
-  LLVMDIFlagTrivial = 1 << 26,
+  LLVMDIFlagNonTrivial = 1 << 26,
   LLVMDIFlagBigEndian = 1 << 27,
   LLVMDIFlagLittleEndian = 1 << 28,
   LLVMDIFlagIndirectVirtualBase = (1 << 2) | (1 << 5),
@@ -161,7 +159,8 @@ enum {
   LLVMDIObjCPropertyMetadataKind,
   LLVMDIImportedEntityMetadataKind,
   LLVMDIMacroMetadataKind,
-  LLVMDIMacroFileMetadataKind
+  LLVMDIMacroFileMetadataKind,
+  LLVMDICommonBlockMetadataKind
 };
 typedef unsigned LLVMMetadataKind;
 
@@ -452,6 +451,49 @@ unsigned LLVMDILocationGetColumn(LLVMMetadataRef Location);
  */
 LLVMMetadataRef LLVMDILocationGetScope(LLVMMetadataRef Location);
 
+/**
+ * Get the "inline at" location associated with this debug location.
+ * \param Location     The debug location.
+ *
+ * @see DILocation::getInlinedAt()
+ */
+LLVMMetadataRef LLVMDILocationGetInlinedAt(LLVMMetadataRef Location);
+
+/**
+ * Get the metadata of the file associated with a given scope.
+ * \param Scope     The scope object.
+ *
+ * @see DIScope::getFile()
+ */
+LLVMMetadataRef LLVMDIScopeGetFile(LLVMMetadataRef Scope);
+
+/**
+ * Get the directory of a given file.
+ * \param File     The file object.
+ * \param Len      The length of the returned string.
+ *
+ * @see DIFile::getDirectory()
+ */
+const char *LLVMDIFileGetDirectory(LLVMMetadataRef File, unsigned *Len);
+
+/**
+ * Get the name of a given file.
+ * \param File     The file object.
+ * \param Len      The length of the returned string.
+ *
+ * @see DIFile::getFilename()
+ */
+const char *LLVMDIFileGetFilename(LLVMMetadataRef File, unsigned *Len);
+
+/**
+ * Get the source of a given file.
+ * \param File     The file object.
+ * \param Len      The length of the returned string.
+ *
+ * @see DIFile::getSource()
+ */
+const char *LLVMDIFileGetSource(LLVMMetadataRef File, unsigned *Len);
+
 /**
  * Create a type array.
  * \param Builder        The DIBuilder.
@@ -479,6 +521,19 @@ LLVMDIBuilderCreateSubroutineType(LLVMDIBuilderRef Builder,
                                   unsigned NumParameterTypes,
                                   LLVMDIFlags Flags);
 
+/**
+ * Create debugging information entry for an enumerator.
+ * @param Builder        The DIBuilder.
+ * @param Name           Enumerator name.
+ * @param NameLen        Length of enumerator name.
+ * @param Value          Enumerator value.
+ * @param IsUnsigned     True if the value is unsigned.
+ */
+LLVMMetadataRef LLVMDIBuilderCreateEnumerator(LLVMDIBuilderRef Builder,
+                                              const char *Name, size_t NameLen,
+                                              int64_t Value,
+                                              LLVMBool IsUnsigned);
+
 /**
  * Create debugging information entry for an enumeration.
  * \param Builder        The DIBuilder.
@@ -1017,6 +1072,48 @@ LLVMMetadataRef LLVMDIBuilderCreateGlobalVariableExpression(
     size_t NameLen, const char *Linkage, size_t LinkLen, LLVMMetadataRef File,
     unsigned LineNo, LLVMMetadataRef Ty, LLVMBool LocalToUnit,
     LLVMMetadataRef Expr, LLVMMetadataRef Decl, uint32_t AlignInBits);
+
+/**
+ * Retrieves the \c DIVariable associated with this global variable expression.
+ * \param GVE    The global variable expression.
+ *
+ * @see llvm::DIGlobalVariableExpression::getVariable()
+ */
+LLVMMetadataRef LLVMDIGlobalVariableExpressionGetVariable(LLVMMetadataRef GVE);
+
+/**
+ * Retrieves the \c DIExpression associated with this global variable expression.
+ * \param GVE    The global variable expression.
+ *
+ * @see llvm::DIGlobalVariableExpression::getExpression()
+ */
+LLVMMetadataRef LLVMDIGlobalVariableExpressionGetExpression(
+    LLVMMetadataRef GVE);
+
+/**
+ * Get the metadata of the file associated with a given variable.
+ * \param Var     The variable object.
+ *
+ * @see DIVariable::getFile()
+ */
+LLVMMetadataRef LLVMDIVariableGetFile(LLVMMetadataRef Var);
+
+/**
+ * Get the metadata of the scope associated with a given variable.
+ * \param Var     The variable object.
+ *
+ * @see DIVariable::getScope()
+ */
+LLVMMetadataRef LLVMDIVariableGetScope(LLVMMetadataRef Var);
+
+/**
+ * Get the source line where this \c DIVariable is declared.
+ * \param Var     The DIVariable.
+ *
+ * @see DIVariable::getLine()
+ */
+unsigned LLVMDIVariableGetLine(LLVMMetadataRef Var);
+
 /**
  * Create a new temporary \c MDNode.  Suitable for use in constructing cyclic
  * \c MDNode structures. A temporary \c MDNode is not uniqued, may be RAUW'd,
@@ -1180,6 +1277,30 @@ LLVMMetadataRef LLVMGetSubprogram(LLVMValueRef Func);
  */
 void LLVMSetSubprogram(LLVMValueRef Func, LLVMMetadataRef SP);
 
+/**
+ * Get the line associated with a given subprogram.
+ * \param Subprogram     The subprogram object.
+ *
+ * @see DISubprogram::getLine()
+ */
+unsigned LLVMDISubprogramGetLine(LLVMMetadataRef Subprogram);
+
+/**
+ * Get the debug location for the given instruction.
+ *
+ * @see llvm::Instruction::getDebugLoc()
+ */
+LLVMMetadataRef LLVMInstructionGetDebugLoc(LLVMValueRef Inst);
+
+/**
+ * Set the debug location for the given instruction.
+ *
+ * To clear the location metadata of the given instruction, pass NULL to \p Loc.
+ *
+ * @see llvm::Instruction::setDebugLoc()
+ */
+void LLVMInstructionSetDebugLoc(LLVMValueRef Inst, LLVMMetadataRef Loc);
+
 /**
  * Obtain the enumerated type of a Metadata instance.
  *
diff --git a/include/llvm-c/Disassembler.h b/include/llvm-c/Disassembler.h
index 5e80b95848cf..3adcc3c47a3f 100644
--- a/include/llvm-c/Disassembler.h
+++ b/include/llvm-c/Disassembler.h
@@ -1,9 +1,9 @@
 /*===-- llvm-c/Disassembler.h - Disassembler Public C Interface ---*- C -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/DisassemblerTypes.h b/include/llvm-c/DisassemblerTypes.h
index e8754ac77055..389e5ee454a8 100644
--- a/include/llvm-c/DisassemblerTypes.h
+++ b/include/llvm-c/DisassemblerTypes.h
@@ -1,9 +1,9 @@
 /*===-- llvm-c/DisassemblerTypedefs.h -----------------------------*- C -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*/
 
diff --git a/include/llvm-c/Error.h b/include/llvm-c/Error.h
index 71e84661222b..52943063c697 100644
--- a/include/llvm-c/Error.h
+++ b/include/llvm-c/Error.h
@@ -1,9 +1,9 @@
 /*===------- llvm-c/Error.h - llvm::Error class C Interface -------*- C -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
@@ -60,7 +60,7 @@ void LLVMDisposeErrorMessage(char *ErrMsg);
 /**
  * Returns the type id for llvm StringError.
  */
-LLVMErrorTypeId LLVMGetStringErrorTypeId();
+LLVMErrorTypeId LLVMGetStringErrorTypeId(void);
 
 #ifdef __cplusplus
 }
diff --git a/include/llvm-c/ErrorHandling.h b/include/llvm-c/ErrorHandling.h
index 2059b3aeb158..4927349d8983 100644
--- a/include/llvm-c/ErrorHandling.h
+++ b/include/llvm-c/ErrorHandling.h
@@ -1,9 +1,9 @@
 /*===-- llvm-c/ErrorHandling.h - Error Handling C Interface -------*- C -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/ExecutionEngine.h b/include/llvm-c/ExecutionEngine.h
index e8ebef9ab15d..ef714cd06384 100644
--- a/include/llvm-c/ExecutionEngine.h
+++ b/include/llvm-c/ExecutionEngine.h
@@ -1,9 +1,9 @@
 /*===-- llvm-c/ExecutionEngine.h - ExecutionEngine Lib C Iface --*- C++ -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/IRReader.h b/include/llvm-c/IRReader.h
index 5b58d9921fb0..4d0b696e9583 100644
--- a/include/llvm-c/IRReader.h
+++ b/include/llvm-c/IRReader.h
@@ -1,9 +1,9 @@
 /*===-- llvm-c/IRReader.h - IR Reader C Interface -----------------*- C -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/Initialization.h b/include/llvm-c/Initialization.h
index e45eafb139f2..36c41dbd8d31 100644
--- a/include/llvm-c/Initialization.h
+++ b/include/llvm-c/Initialization.h
@@ -1,9 +1,9 @@
 /*===-- llvm-c/Initialization.h - Initialization C Interface ------*- C -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/LinkTimeOptimizer.h b/include/llvm-c/LinkTimeOptimizer.h
index 8bcf59969ccb..19b4f5cf7491 100644
--- a/include/llvm-c/LinkTimeOptimizer.h
+++ b/include/llvm-c/LinkTimeOptimizer.h
@@ -1,9 +1,8 @@
 //===-- llvm/LinkTimeOptimizer.h - LTO Public C Interface -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm-c/Linker.h b/include/llvm-c/Linker.h
index d02c37f94c86..908513041661 100644
--- a/include/llvm-c/Linker.h
+++ b/include/llvm-c/Linker.h
@@ -1,9 +1,9 @@
 /*===-- llvm-c/Linker.h - Module Linker C Interface -------------*- C++ -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/Object.h b/include/llvm-c/Object.h
index a2980e89fe3d..1e9b703a68ff 100644
--- a/include/llvm-c/Object.h
+++ b/include/llvm-c/Object.h
@@ -1,9 +1,9 @@
 /*===-- llvm-c/Object.h - Object Lib C Iface --------------------*- C++ -*-===*/
 /*                                                                            */
-/*                     The LLVM Compiler Infrastructure                       */
-/*                                                                            */
-/* This file is distributed under the University of Illinois Open Source      */
-/* License. See LICENSE.TXT for details.                                      */
+/* Part of the LLVM Project, under the Apache License v2.0 with LLVM          */
+/* Exceptions.                                                                */
+/* See https://llvm.org/LICENSE.txt for license information.                  */
+/* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    */
 /*                                                                            */
 /*===----------------------------------------------------------------------===*/
 /*                                                                            */
@@ -34,29 +34,140 @@ extern "C" {
  */
 
 // Opaque type wrappers
-typedef struct LLVMOpaqueObjectFile *LLVMObjectFileRef;
 typedef struct LLVMOpaqueSectionIterator *LLVMSectionIteratorRef;
 typedef struct LLVMOpaqueSymbolIterator *LLVMSymbolIteratorRef;
 typedef struct LLVMOpaqueRelocationIterator *LLVMRelocationIteratorRef;
 
-// ObjectFile creation
-LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf);
-void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile);
+typedef enum {
+  LLVMBinaryTypeArchive,                /**< Archive file. */
+  LLVMBinaryTypeMachOUniversalBinary,   /**< Mach-O Universal Binary file. */
+  LLVMBinaryTypeCOFFImportFile,         /**< COFF Import file. */
+  LLVMBinaryTypeIR,                     /**< LLVM IR. */
+  LLVMBinaryTypeWinRes,                 /**< Windows resource (.res) file. */
+  LLVMBinaryTypeCOFF,                   /**< COFF Object file. */
+  LLVMBinaryTypeELF32L,                 /**< ELF 32-bit, little endian. */
+  LLVMBinaryTypeELF32B,                 /**< ELF 32-bit, big endian. */
+  LLVMBinaryTypeELF64L,                 /**< ELF 64-bit, little endian. */
+  LLVMBinaryTypeELF64B,                 /**< ELF 64-bit, big endian. */
+  LLVMBinaryTypeMachO32L,               /**< MachO 32-bit, little endian. */
+  LLVMBinaryTypeMachO32B,               /**< MachO 32-bit, big endian. */
+  LLVMBinaryTypeMachO64L,               /**< MachO 64-bit, little endian. */
+  LLVMBinaryTypeMachO64B,               /**< MachO 64-bit, big endian. */
+  LLVMBinaryTypeWasm,                   /**< Web Assembly. */
+} LLVMBinaryType;
+
+/**
+ * Create a binary file from the given memory buffer.
+ *
+ * The exact type of the binary file will be inferred automatically, and the
+ * appropriate implementation selected.  The context may be NULL except if
+ * the resulting file is an LLVM IR file.
+ *
+ * The memory buffer is not consumed by this function.  It is the responsibilty
+ * of the caller to free it with \c LLVMDisposeMemoryBuffer.
+ *
+ * If NULL is returned, the \p ErrorMessage parameter is populated with the
+ * error's description.  It is then the caller's responsibility to free this
+ * message by calling \c LLVMDisposeMessage.
+ *
+ * @see llvm::object::createBinary
+ */
+LLVMBinaryRef LLVMCreateBinary(LLVMMemoryBufferRef MemBuf,
+                               LLVMContextRef Context,
+                               char **ErrorMessage);
+
+/**
+ * Dispose of a binary file.
+ *
+ * The binary file does not own its backing buffer.  It is the responsibilty
+ * of the caller to free it with \c LLVMDisposeMemoryBuffer.
+ */
+void LLVMDisposeBinary(LLVMBinaryRef BR);
+
+/**
+ * Retrieves a copy of the memory buffer associated with this object file.
+ *
+ * The returned buffer is merely a shallow copy and does not own the actual
+ * backing buffer of the binary. Nevertheless, it is the responsibility of the
+ * caller to free it with \c LLVMDisposeMemoryBuffer.
+ *
+ * @see llvm::object::getMemoryBufferRef
+ */
+LLVMMemoryBufferRef LLVMBinaryCopyMemoryBuffer(LLVMBinaryRef BR);
+
+/**
+ * Retrieve the specific type of a binary.
+ *
+ * @see llvm::object::Binary::getType
+ */
+LLVMBinaryType LLVMBinaryGetType(LLVMBinaryRef BR);
+
+/*
+ * For a Mach-O universal binary file, retrieves the object file corresponding
+ * to the given architecture if it is present as a slice.
+ *
+ * If NULL is returned, the \p ErrorMessage parameter is populated with the
+ * error's description.  It is then the caller's responsibility to free this
+ * message by calling \c LLVMDisposeMessage.
+ *
+ * It is the responsiblity of the caller to free the returned object file by
+ * calling \c LLVMDisposeBinary.
+ */
+LLVMBinaryRef LLVMMachOUniversalBinaryCopyObjectForArch(LLVMBinaryRef BR,
+                                                        const char *Arch,
+                                                        size_t ArchLen,
+                                                        char **ErrorMessage);
+
+/**
+ * Retrieve a copy of the section iterator for this object file.
+ *
+ * If there are no sections, the result is NULL.
+ *
+ * The returned iterator is merely a shallow copy. Nevertheless, it is
+ * the responsibility of the caller to free it with
+ * \c LLVMDisposeSectionIterator.
+ *
+ * @see llvm::object::sections()
+ */
+LLVMSectionIteratorRef LLVMObjectFileCopySectionIterator(LLVMBinaryRef BR);
+
+/**
+ * Returns whether the given section iterator is at the end.
+ *
+ * @see llvm::object::section_end
+ */
+LLVMBool LLVMObjectFileIsSectionIteratorAtEnd(LLVMBinaryRef BR,
+                                              LLVMSectionIteratorRef SI);
+
+/**
+ * Retrieve a copy of the symbol iterator for this object file.
+ *
+ * If there are no symbols, the result is NULL.
+ *
+ * The returned iterator is merely a shallow copy. Nevertheless, it is
+ * the responsibility of the caller to free it with
+ * \c LLVMDisposeSymbolIterator.
+ *
+ * @see llvm::object::symbols()
+ */
+LLVMSymbolIteratorRef LLVMObjectFileCopySymbolIterator(LLVMBinaryRef BR);
+
+/**
+ * Returns whether the given symbol iterator is at the end.
+ *
+ * @see llvm::object::symbol_end
+ */
+LLVMBool LLVMObjectFileIsSymbolIteratorAtEnd(LLVMBinaryRef BR,
+                                             LLVMSymbolIteratorRef SI);
 
-// ObjectFile Section iterators
-LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef ObjectFile);
 void LLVMDisposeSectionIterator(LLVMSectionIteratorRef SI);
-LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef ObjectFile,
-                                LLVMSectionIteratorRef SI);
+
 void LLVMMoveToNextSection(LLVMSectionIteratorRef SI);
 void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect,
                                  LLVMSymbolIteratorRef Sym);
 
 // ObjectFile Symbol iterators
-LLVMSymbolIteratorRef LLVMGetSymbols(LLVMObjectFileRef ObjectFile);
 void LLVMDisposeSymbolIterator(LLVMSymbolIteratorRef SI);
-LLVMBool LLVMIsSymbolIteratorAtEnd(LLVMObjectFileRef ObjectFile,
-                                LLVMSymbolIteratorRef SI);
 void LLVMMoveToNextSymbol(LLVMSymbolIteratorRef SI);
 
 // SectionRef accessors
@@ -89,6 +200,28 @@ uint64_t LLVMGetRelocationType(LLVMRelocationIteratorRef RI);
 const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI);
 const char *LLVMGetRelocationValueString(LLVMRelocationIteratorRef RI);
 
+/** Deprecated: Use LLVMBinaryRef instead. */
+typedef struct LLVMOpaqueObjectFile *LLVMObjectFileRef;
+
+/** Deprecated: Use LLVMCreateBinary instead. */
+LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf);
+
+/** Deprecated: Use LLVMDisposeBinary instead. */
+void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile);
+
+/** Deprecated: Use LLVMObjectFileCopySectionIterator instead. */
+LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef ObjectFile);
+
+/** Deprecated: Use LLVMObjectFileIsSectionIteratorAtEnd instead. */
+LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef ObjectFile,
+                                    LLVMSectionIteratorRef SI);
+
+/** Deprecated: Use LLVMObjectFileCopySymbolIterator instead. */
+LLVMSymbolIteratorRef LLVMGetSymbols(LLVMObjectFileRef ObjectFile);
+
+/** Deprecated: Use LLVMObjectFileIsSymbolIteratorAtEnd instead. */
+LLVMBool LLVMIsSymbolIteratorAtEnd(LLVMObjectFileRef ObjectFile,
+                                   LLVMSymbolIteratorRef SI);
 /**
  * @}
  */
diff --git a/include/llvm-c/OptRemarks.h b/include/llvm-c/OptRemarks.h
deleted file mode 100644
index 6a90394e711c..000000000000
--- a/include/llvm-c/OptRemarks.h
+++ /dev/null
@@ -1,204 +0,0 @@
-/*===-- llvm-c/OptRemarks.h - OptRemarks Public C Interface -------*- C -*-===*\
-|*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
-|*                                                                            *|
-|*===----------------------------------------------------------------------===*|
-|*                                                                            *|
-|* This header provides a public interface to an opt-remark library.          *|
-|* LLVM provides an implementation of this interface.                         *|
-|*                                                                            *|
-\*===----------------------------------------------------------------------===*/
-
-#ifndef LLVM_C_OPT_REMARKS_H
-#define LLVM_C_OPT_REMARKS_H
-
-#include "llvm-c/Core.h"
-#include "llvm-c/Types.h"
-#ifdef __cplusplus
-#include <cstddef>
-extern "C" {
-#else
-#include <stddef.h>
-#endif /* !defined(__cplusplus) */
-
-/**
- * @defgroup LLVMCOPTREMARKS OptRemarks
- * @ingroup LLVMC
- *
- * @{
- */
-
-#define OPT_REMARKS_API_VERSION 0
-
-/**
- * String containing a buffer and a length. The buffer is not guaranteed to be
- * zero-terminated.
- *
- * \since OPT_REMARKS_API_VERSION=0
- */
-typedef struct {
-  const char *Str;
-  uint32_t Len;
-} LLVMOptRemarkStringRef;
-
-/**
- * DebugLoc containing File, Line and Column.
- *
- * \since OPT_REMARKS_API_VERSION=0
- */
-typedef struct {
-  // File:
-  LLVMOptRemarkStringRef SourceFile;
-  // Line:
-  uint32_t SourceLineNumber;
-  // Column:
-  uint32_t SourceColumnNumber;
-} LLVMOptRemarkDebugLoc;
-
-/**
- * Element of the "Args" list. The key might give more information about what
- * are the semantics of the value, e.g. "Callee" will tell you that the value
- * is a symbol that names a function.
- *
- * \since OPT_REMARKS_API_VERSION=0
- */
-typedef struct {
-  // e.g. "Callee"
-  LLVMOptRemarkStringRef Key;
-  // e.g. "malloc"
-  LLVMOptRemarkStringRef Value;
-
-  // "DebugLoc": Optional
-  LLVMOptRemarkDebugLoc DebugLoc;
-} LLVMOptRemarkArg;
-
-/**
- * One remark entry.
- *
- * \since OPT_REMARKS_API_VERSION=0
- */
-typedef struct {
-  // e.g. !Missed, !Passed
-  LLVMOptRemarkStringRef RemarkType;
-  // "Pass": Required
-  LLVMOptRemarkStringRef PassName;
-  // "Name": Required
-  LLVMOptRemarkStringRef RemarkName;
-  // "Function": Required
-  LLVMOptRemarkStringRef FunctionName;
-
-  // "DebugLoc": Optional
-  LLVMOptRemarkDebugLoc DebugLoc;
-  // "Hotness": Optional
-  uint32_t Hotness;
-  // "Args": Optional. It is an array of `num_args` elements.
-  uint32_t NumArgs;
-  LLVMOptRemarkArg *Args;
-} LLVMOptRemarkEntry;
-
-typedef struct LLVMOptRemarkOpaqueParser *LLVMOptRemarkParserRef;
-
-/**
- * Creates a remark parser that can be used to read and parse the buffer located
- * in \p Buf of size \p Size.
- *
- * \p Buf cannot be NULL.
- *
- * This function should be paired with LLVMOptRemarkParserDispose() to avoid
- * leaking resources.
- *
- * \since OPT_REMARKS_API_VERSION=0
- */
-extern LLVMOptRemarkParserRef LLVMOptRemarkParserCreate(const void *Buf,
-                                                        uint64_t Size);
-
-/**
- * Returns the next remark in the file.
- *
- * The value pointed to by the return value is invalidated by the next call to
- * LLVMOptRemarkParserGetNext().
- *
- * If the parser reaches the end of the buffer, the return value will be NULL.
- *
- * In the case of an error, the return value will be NULL, and:
- *
- * 1) LLVMOptRemarkParserHasError() will return `1`.
- *
- * 2) LLVMOptRemarkParserGetErrorMessage() will return a descriptive error
- *    message.
- *
- * An error may occur if:
- *
- * 1) An argument is invalid.
- *
- * 2) There is a YAML parsing error. This type of error aborts parsing
- *    immediately and returns `1`. It can occur on malformed YAML.
- *
- * 3) Remark parsing error. If this type of error occurs, the parser won't call
- *    the handler and will continue to the next one. It can occur on malformed
- *    remarks, like missing or extra fields in the file.
- *
- * Here is a quick example of the usage:
- *
- * ```
- *  LLVMOptRemarkParserRef Parser = LLVMOptRemarkParserCreate(Buf, Size);
- *  LLVMOptRemarkEntry *Remark = NULL;
- *  while ((Remark == LLVMOptRemarkParserGetNext(Parser))) {
- *    // use Remark
- *  }
- *  bool HasError = LLVMOptRemarkParserHasError(Parser);
- *  LLVMOptRemarkParserDispose(Parser);
- * ```
- *
- * \since OPT_REMARKS_API_VERSION=0
- */
-extern LLVMOptRemarkEntry *
-LLVMOptRemarkParserGetNext(LLVMOptRemarkParserRef Parser);
-
-/**
- * Returns `1` if the parser encountered an error while parsing the buffer.
- *
- * \since OPT_REMARKS_API_VERSION=0
- */
-extern LLVMBool LLVMOptRemarkParserHasError(LLVMOptRemarkParserRef Parser);
-
-/**
- * Returns a null-terminated string containing an error message.
- *
- * In case of no error, the result is `NULL`.
- *
- * The memory of the string is bound to the lifetime of \p Parser. If
- * LLVMOptRemarkParserDispose() is called, the memory of the string will be
- * released.
- *
- * \since OPT_REMARKS_API_VERSION=0
- */
-extern const char *
-LLVMOptRemarkParserGetErrorMessage(LLVMOptRemarkParserRef Parser);
-
-/**
- * Releases all the resources used by \p Parser.
- *
- * \since OPT_REMARKS_API_VERSION=0
- */
-extern void LLVMOptRemarkParserDispose(LLVMOptRemarkParserRef Parser);
-
-/**
- * Returns the version of the opt-remarks dylib.
- *
- * \since OPT_REMARKS_API_VERSION=0
- */
-extern uint32_t LLVMOptRemarkVersion(void);
-
-/**
- * @} // endgoup LLVMCOPTREMARKS
- */
-
-#ifdef __cplusplus
-}
-#endif /* !defined(__cplusplus) */
-
-#endif /* LLVM_C_OPT_REMARKS_H */
diff --git a/include/llvm-c/OrcBindings.h b/include/llvm-c/OrcBindings.h
index 570db87fee94..9e92371b5a3a 100644
--- a/include/llvm-c/OrcBindings.h
+++ b/include/llvm-c/OrcBindings.h
@@ -1,9 +1,9 @@
 /*===----------- llvm-c/OrcBindings.h - Orc Lib C Iface ---------*- C++ -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/Remarks.h b/include/llvm-c/Remarks.h
new file mode 100644
index 000000000000..88eb5120c57c
--- /dev/null
+++ b/include/llvm-c/Remarks.h
@@ -0,0 +1,329 @@
+/*===-- llvm-c/Remarks.h - Remarks Public C Interface -------------*- C -*-===*\
+|*                                                                            *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header provides a public interface to a remark diagnostics library.   *|
+|* LLVM provides an implementation of this interface.                         *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_REMARKS_H
+#define LLVM_C_REMARKS_H
+
+#include "llvm-c/Types.h"
+#ifdef __cplusplus
+#include <cstddef>
+extern "C" {
+#else
+#include <stddef.h>
+#endif /* !defined(__cplusplus) */
+
+/**
+ * @defgroup LLVMCREMARKS Remarks
+ * @ingroup LLVMC
+ *
+ * @{
+ */
+
+#define REMARKS_API_VERSION 0
+
+/**
+ * The type of the emitted remark.
+ */
+enum LLVMRemarkType {
+  LLVMRemarkTypeUnknown,
+  LLVMRemarkTypePassed,
+  LLVMRemarkTypeMissed,
+  LLVMRemarkTypeAnalysis,
+  LLVMRemarkTypeAnalysisFPCommute,
+  LLVMRemarkTypeAnalysisAliasing,
+  LLVMRemarkTypeFailure
+};
+
+/**
+ * String containing a buffer and a length. The buffer is not guaranteed to be
+ * zero-terminated.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+typedef struct LLVMRemarkOpaqueString *LLVMRemarkStringRef;
+
+/**
+ * Returns the buffer holding the string.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern const char *LLVMRemarkStringGetData(LLVMRemarkStringRef String);
+
+/**
+ * Returns the size of the string.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern uint32_t LLVMRemarkStringGetLen(LLVMRemarkStringRef String);
+
+/**
+ * DebugLoc containing File, Line and Column.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+typedef struct LLVMRemarkOpaqueDebugLoc *LLVMRemarkDebugLocRef;
+
+/**
+ * Return the path to the source file for a debug location.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkStringRef
+LLVMRemarkDebugLocGetSourceFilePath(LLVMRemarkDebugLocRef DL);
+
+/**
+ * Return the line in the source file for a debug location.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern uint32_t LLVMRemarkDebugLocGetSourceLine(LLVMRemarkDebugLocRef DL);
+
+/**
+ * Return the column in the source file for a debug location.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern uint32_t LLVMRemarkDebugLocGetSourceColumn(LLVMRemarkDebugLocRef DL);
+
+/**
+ * Element of the "Args" list. The key might give more information about what
+ * the semantics of the value are, e.g. "Callee" will tell you that the value
+ * is a symbol that names a function.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+typedef struct LLVMRemarkOpaqueArg *LLVMRemarkArgRef;
+
+/**
+ * Returns the key of an argument. The key defines what the value is, and the
+ * same key can appear multiple times in the list of arguments.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkStringRef LLVMRemarkArgGetKey(LLVMRemarkArgRef Arg);
+
+/**
+ * Returns the value of an argument. This is a string that can contain newlines.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkStringRef LLVMRemarkArgGetValue(LLVMRemarkArgRef Arg);
+
+/**
+ * Returns the debug location that is attached to the value of this argument.
+ *
+ * If there is no debug location, the return value will be `NULL`.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkDebugLocRef LLVMRemarkArgGetDebugLoc(LLVMRemarkArgRef Arg);
+
+/**
+ * A remark emitted by the compiler.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+typedef struct LLVMRemarkOpaqueEntry *LLVMRemarkEntryRef;
+
+/**
+ * Free the resources used by the remark entry.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern void LLVMRemarkEntryDispose(LLVMRemarkEntryRef Remark);
+
+/**
+ * The type of the remark. For example, it can allow users to only keep the
+ * missed optimizations from the compiler.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern enum LLVMRemarkType LLVMRemarkEntryGetType(LLVMRemarkEntryRef Remark);
+
+/**
+ * Get the name of the pass that emitted this remark.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkStringRef
+LLVMRemarkEntryGetPassName(LLVMRemarkEntryRef Remark);
+
+/**
+ * Get an identifier of the remark.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkStringRef
+LLVMRemarkEntryGetRemarkName(LLVMRemarkEntryRef Remark);
+
+/**
+ * Get the name of the function being processed when the remark was emitted.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkStringRef
+LLVMRemarkEntryGetFunctionName(LLVMRemarkEntryRef Remark);
+
+/**
+ * Returns the debug location that is attached to this remark.
+ *
+ * If there is no debug location, the return value will be `NULL`.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkDebugLocRef
+LLVMRemarkEntryGetDebugLoc(LLVMRemarkEntryRef Remark);
+
+/**
+ * Return the hotness of the remark.
+ *
+ * A hotness of `0` means this value is not set.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern uint64_t LLVMRemarkEntryGetHotness(LLVMRemarkEntryRef Remark);
+
+/**
+ * The number of arguments the remark holds.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern uint32_t LLVMRemarkEntryGetNumArgs(LLVMRemarkEntryRef Remark);
+
+/**
+ * Get a new iterator to iterate over a remark's argument.
+ *
+ * If there are no arguments in \p Remark, the return value will be `NULL`.
+ *
+ * The lifetime of the returned value is bound to the lifetime of \p Remark.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkArgRef LLVMRemarkEntryGetFirstArg(LLVMRemarkEntryRef Remark);
+
+/**
+ * Get the next argument in \p Remark from the position of \p It.
+ *
+ * Returns `NULL` if there are no more arguments available.
+ *
+ * The lifetime of the returned value is bound to the lifetime of \p Remark.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkArgRef LLVMRemarkEntryGetNextArg(LLVMRemarkArgRef It,
+                                                  LLVMRemarkEntryRef Remark);
+
+typedef struct LLVMRemarkOpaqueParser *LLVMRemarkParserRef;
+
+/**
+ * Creates a remark parser that can be used to parse the buffer located in \p
+ * Buf of size \p Size bytes.
+ *
+ * \p Buf cannot be `NULL`.
+ *
+ * This function should be paired with LLVMRemarkParserDispose() to avoid
+ * leaking resources.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkParserRef LLVMRemarkParserCreateYAML(const void *Buf,
+                                                      uint64_t Size);
+
+/**
+ * Returns the next remark in the file.
+ *
+ * The value pointed to by the return value needs to be disposed using a call to
+ * LLVMRemarkEntryDispose().
+ *
+ * All the entries in the returned value that are of LLVMRemarkStringRef type
+ * will become invalidated once a call to LLVMRemarkParserDispose is made.
+ *
+ * If the parser reaches the end of the buffer, the return value will be `NULL`.
+ *
+ * In the case of an error, the return value will be `NULL`, and:
+ *
+ * 1) LLVMRemarkParserHasError() will return `1`.
+ *
+ * 2) LLVMRemarkParserGetErrorMessage() will return a descriptive error
+ *    message.
+ *
+ * An error may occur if:
+ *
+ * 1) An argument is invalid.
+ *
+ * 2) There is a parsing error. This can occur on things like malformed YAML.
+ *
+ * 3) There is a Remark semantic error. This can occur on well-formed files with
+ *    missing or extra fields.
+ *
+ * Here is a quick example of the usage:
+ *
+ * ```
+ * LLVMRemarkParserRef Parser = LLVMRemarkParserCreateYAML(Buf, Size);
+ * LLVMRemarkEntryRef Remark = NULL;
+ * while ((Remark = LLVMRemarkParserGetNext(Parser))) {
+ *    // use Remark
+ *    LLVMRemarkEntryDispose(Remark); // Release memory.
+ * }
+ * bool HasError = LLVMRemarkParserHasError(Parser);
+ * LLVMRemarkParserDispose(Parser);
+ * ```
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkEntryRef LLVMRemarkParserGetNext(LLVMRemarkParserRef Parser);
+
+/**
+ * Returns `1` if the parser encountered an error while parsing the buffer.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMBool LLVMRemarkParserHasError(LLVMRemarkParserRef Parser);
+
+/**
+ * Returns a null-terminated string containing an error message.
+ *
+ * In case of no error, the result is `NULL`.
+ *
+ * The memory of the string is bound to the lifetime of \p Parser. If
+ * LLVMRemarkParserDispose() is called, the memory of the string will be
+ * released.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern const char *LLVMRemarkParserGetErrorMessage(LLVMRemarkParserRef Parser);
+
+/**
+ * Releases all the resources used by \p Parser.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern void LLVMRemarkParserDispose(LLVMRemarkParserRef Parser);
+
+/**
+ * Returns the version of the remarks library.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern uint32_t LLVMRemarkVersion(void);
+
+/**
+ * @} // endgoup LLVMCREMARKS
+ */
+
+#ifdef __cplusplus
+}
+#endif /* !defined(__cplusplus) */
+
+#endif /* LLVM_C_REMARKS_H */
diff --git a/include/llvm-c/Support.h b/include/llvm-c/Support.h
index 37d5d72ff5dc..097f784246c5 100644
--- a/include/llvm-c/Support.h
+++ b/include/llvm-c/Support.h
@@ -1,9 +1,9 @@
 /*===-- llvm-c/Support.h - Support C Interface --------------------*- C -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/Target.h b/include/llvm-c/Target.h
index 03004ba5eec0..4ef641eaf232 100644
--- a/include/llvm-c/Target.h
+++ b/include/llvm-c/Target.h
@@ -1,9 +1,9 @@
 /*===-- llvm-c/Target.h - Target Lib C Iface --------------------*- C++ -*-===*/
 /*                                                                            */
-/*                     The LLVM Compiler Infrastructure                       */
-/*                                                                            */
-/* This file is distributed under the University of Illinois Open Source      */
-/* License. See LICENSE.TXT for details.                                      */
+/* Part of the LLVM Project, under the Apache License v2.0 with LLVM          */
+/* Exceptions.                                                                */
+/* See https://llvm.org/LICENSE.txt for license information.                  */
+/* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    */
 /*                                                                            */
 /*===----------------------------------------------------------------------===*/
 /*                                                                            */
@@ -22,10 +22,6 @@
 #include "llvm-c/Types.h"
 #include "llvm/Config/llvm-config.h"
 
-#if defined(_MSC_VER) && !defined(inline)
-#define inline __inline
-#endif
-
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/include/llvm-c/TargetMachine.h b/include/llvm-c/TargetMachine.h
index c06e9edc9aaf..28d7c096871e 100644
--- a/include/llvm-c/TargetMachine.h
+++ b/include/llvm-c/TargetMachine.h
@@ -1,9 +1,9 @@
 /*===-- llvm-c/TargetMachine.h - Target Machine Library C Interface - C++ -*-=*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/Transforms/AggressiveInstCombine.h b/include/llvm-c/Transforms/AggressiveInstCombine.h
index 8756a22e917a..c0b0141c3da1 100644
--- a/include/llvm-c/Transforms/AggressiveInstCombine.h
+++ b/include/llvm-c/Transforms/AggressiveInstCombine.h
@@ -1,9 +1,9 @@
 /*===-- AggressiveInstCombine.h ---------------------------------*- C++ -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/Transforms/Coroutines.h b/include/llvm-c/Transforms/Coroutines.h
index 827e30fb2d7c..227e7cf0a360 100644
--- a/include/llvm-c/Transforms/Coroutines.h
+++ b/include/llvm-c/Transforms/Coroutines.h
@@ -1,9 +1,9 @@
 /*===-- Coroutines.h - Coroutines Library C Interface -----------*- C++ -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/Transforms/IPO.h b/include/llvm-c/Transforms/IPO.h
index 7705b1864dc3..7a82ed464141 100644
--- a/include/llvm-c/Transforms/IPO.h
+++ b/include/llvm-c/Transforms/IPO.h
@@ -1,9 +1,9 @@
 /*===-- IPO.h - Interprocedural Transformations C Interface -----*- C++ -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/Transforms/InstCombine.h b/include/llvm-c/Transforms/InstCombine.h
index e1c1572d53dc..166f278d9a69 100644
--- a/include/llvm-c/Transforms/InstCombine.h
+++ b/include/llvm-c/Transforms/InstCombine.h
@@ -1,9 +1,9 @@
 /*===-- Scalar.h - Scalar Transformation Library C Interface ----*- C++ -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/Transforms/PassManagerBuilder.h b/include/llvm-c/Transforms/PassManagerBuilder.h
index 69786b341ab4..d164c00d49c5 100644
--- a/include/llvm-c/Transforms/PassManagerBuilder.h
+++ b/include/llvm-c/Transforms/PassManagerBuilder.h
@@ -1,9 +1,9 @@
 /*===-- llvm-c/Transform/PassManagerBuilder.h - PMB C Interface ---*- C -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/Transforms/Scalar.h b/include/llvm-c/Transforms/Scalar.h
index 3c3bb4eb9b82..031cf98b2df2 100644
--- a/include/llvm-c/Transforms/Scalar.h
+++ b/include/llvm-c/Transforms/Scalar.h
@@ -1,9 +1,9 @@
 /*===-- Scalar.h - Scalar Transformation Library C Interface ----*- C++ -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/Transforms/Utils.h b/include/llvm-c/Transforms/Utils.h
index f171f7fbbe3e..63594abfa460 100644
--- a/include/llvm-c/Transforms/Utils.h
+++ b/include/llvm-c/Transforms/Utils.h
@@ -1,9 +1,9 @@
 /*===-- Utils.h - Transformation Utils Library C Interface ------*- C++ -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
@@ -38,6 +38,9 @@ void LLVMAddLowerSwitchPass(LLVMPassManagerRef PM);
 /** See llvm::createPromoteMemoryToRegisterPass function. */
 void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM);
 
+/** See llvm::createAddDiscriminatorsPass function. */
+void LLVMAddAddDiscriminatorsPass(LLVMPassManagerRef PM);
+
 /**
  * @}
  */
diff --git a/include/llvm-c/Transforms/Vectorize.h b/include/llvm-c/Transforms/Vectorize.h
index e3f9961acfb1..e383481fe4f4 100644
--- a/include/llvm-c/Transforms/Vectorize.h
+++ b/include/llvm-c/Transforms/Vectorize.h
@@ -1,10 +1,10 @@
 /*===---------------------------Vectorize.h --------------------- -*- C -*-===*\
 |*===----------- Vectorization Transformation Library C Interface ---------===*|
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
diff --git a/include/llvm-c/Types.h b/include/llvm-c/Types.h
index ce1acf3e0421..612c7d3eff32 100644
--- a/include/llvm-c/Types.h
+++ b/include/llvm-c/Types.h
@@ -1,9 +1,9 @@
 /*===-- llvm-c/Support.h - C Interface Types declarations ---------*- C -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
@@ -163,6 +163,11 @@ typedef struct LLVMOpaqueModuleFlagEntry LLVMModuleFlagEntry;
  */
 typedef struct LLVMOpaqueJITEventListener *LLVMJITEventListenerRef;
 
+/**
+ * @see llvm::object::Binary
+ */
+typedef struct LLVMOpaqueBinary *LLVMBinaryRef;
+
 /**
  * @}
  */
diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h
index 090cd34af4e9..2467722b1954 100644
--- a/include/llvm-c/lto.h
+++ b/include/llvm-c/lto.h
@@ -1,9 +1,9 @@
 /*===-- llvm-c/lto.h - LTO Public C Interface ---------------------*- C -*-===*\
 |*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM          *|
+|* Exceptions.                                                                *|
+|* See https://llvm.org/LICENSE.txt for license information.                  *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception                    *|
 |*                                                                            *|
 |*===----------------------------------------------------------------------===*|
 |*                                                                            *|
@@ -44,7 +44,7 @@ typedef bool lto_bool_t;
  * @{
  */
 
-#define LTO_API_VERSION 23
+#define LTO_API_VERSION 24
 
 /**
  * \since prior to LTO_API_VERSION=3
@@ -846,7 +846,47 @@ thinlto_codegen_set_cache_size_megabytes(thinlto_code_gen_t cg,
 extern void thinlto_codegen_set_cache_size_files(thinlto_code_gen_t cg,
                                                  unsigned max_size_files);
 
-
+/** Opaque reference to an LTO input file */
+typedef struct LLVMOpaqueLTOInput *lto_input_t;
+
+/**
+  * Creates an LTO input file from a buffer. The path
+  * argument is used for diagnotics as this function
+  * otherwise does not know which file the given buffer
+  * is associated with.
+  *
+  * \since LTO_API_VERSION=24
+  */
+extern lto_input_t lto_input_create(const void *buffer,
+                                    size_t buffer_size,
+                                    const char *path);
+
+/**
+  * Frees all memory internally allocated by the LTO input file.
+  * Upon return the lto_module_t is no longer valid.
+  *
+  * \since LTO_API_VERSION=24
+  */
+extern void lto_input_dispose(lto_input_t input);
+
+/**
+  * Returns the number of dependent library specifiers
+  * for the given LTO input file.
+  *
+  * \since LTO_API_VERSION=24
+  */
+extern unsigned lto_input_get_num_dependent_libraries(lto_input_t input);
+
+/**
+  * Returns the ith dependent library specifier
+  * for the given LTO input file. The returned
+  * string is not null-terminated.
+  *
+  * \since LTO_API_VERSION=24
+  */
+extern const char * lto_input_get_dependent_library(lto_input_t input,
+                                                    size_t index,
+                                                    size_t *size);
 
 /**
  * @} // endgroup LLVMCTLTO_CACHING
diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h
index c6fa5ad674f6..a9648d35cf5d 100644
--- a/include/llvm/ADT/APFloat.h
+++ b/include/llvm/ADT/APFloat.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/APFloat.h - Arbitrary Precision Floating Point ---*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -148,6 +147,17 @@ struct APFloatBase {
 
   /// \name Floating Point Semantics.
   /// @{
+  enum Semantics {
+    S_IEEEhalf,
+    S_IEEEsingle,
+    S_IEEEdouble,
+    S_x87DoubleExtended,
+    S_IEEEquad,
+    S_PPCDoubleDouble
+  };
+
+  static const llvm::fltSemantics &EnumToSemantics(Semantics S);
+  static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem);
 
   static const fltSemantics &IEEEhalf() LLVM_READNONE;
   static const fltSemantics &IEEEsingle() LLVM_READNONE;
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index 6e106ff8bf5d..2381b75e08b1 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -1,9 +1,8 @@
 //===-- llvm/ADT/APInt.h - For Arbitrary Precision Integer -----*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -2213,6 +2212,15 @@ Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
 // See friend declaration above. This additional declaration is required in
 // order to compile LLVM with IBM xlC compiler.
 hash_code hash_value(const APInt &Arg);
-} // End of llvm namespace
+
+/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
+/// with the integer held in IntVal.
+void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes);
+
+/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
+/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
+void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes);
+
+} // namespace llvm
 
 #endif
diff --git a/include/llvm/ADT/APSInt.h b/include/llvm/ADT/APSInt.h
index 7ee2c4c62fce..0f991826c457 100644
--- a/include/llvm/ADT/APSInt.h
+++ b/include/llvm/ADT/APSInt.h
@@ -1,9 +1,8 @@
 //===-- llvm/ADT/APSInt.h - Arbitrary Precision Signed Int -----*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -43,6 +42,24 @@ public:
   /// \param Str the string to be interpreted.
   explicit APSInt(StringRef Str);
 
+  /// Determine sign of this APSInt.
+  ///
+  /// \returns true if this APSInt is negative, false otherwise
+  bool isNegative() const { return isSigned() && APInt::isNegative(); }
+
+  /// Determine if this APSInt Value is non-negative (>= 0)
+  ///
+  /// \returns true if this APSInt is non-negative, false otherwise
+  bool isNonNegative() const { return !isNegative(); }
+
+  /// Determine if this APSInt Value is positive.
+  ///
+  /// This tests if the value of this APSInt is positive (> 0). Note
+  /// that 0 is not a positive value.
+  ///
+  /// \returns true if this APSInt is positive.
+  bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); }
+
   APSInt &operator=(APInt RHS) {
     // Retain our current sign.
     APInt::operator=(std::move(RHS));
diff --git a/include/llvm/ADT/AllocatorList.h b/include/llvm/ADT/AllocatorList.h
index 178c6742a87b..405a2e4264df 100644
--- a/include/llvm/ADT/AllocatorList.h
+++ b/include/llvm/ADT/AllocatorList.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/AllocatorList.h - Custom allocator list ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/ADT/Any.h b/include/llvm/ADT/Any.h
index 7faa4c963d3d..5dcd6e73c54f 100644
--- a/include/llvm/ADT/Any.h
+++ b/include/llvm/ADT/Any.h
@@ -1,9 +1,8 @@
 //===- Any.h - Generic type erased holder of any type -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h
index 9cb25b09c6cb..773c88f7c9f9 100644
--- a/include/llvm/ADT/ArrayRef.h
+++ b/include/llvm/ADT/ArrayRef.h
@@ -1,9 +1,8 @@
 //===- ArrayRef.h - Array Reference Wrapper ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -431,7 +430,7 @@ namespace llvm {
       std::copy(Data.begin(), Data.end(), this->begin());
     }
 
-    OwningArrayRef(OwningArrayRef &&Other) { *this = Other; }
+    OwningArrayRef(OwningArrayRef &&Other) { *this = std::move(Other); }
 
     OwningArrayRef &operator=(OwningArrayRef &&Other) {
       delete[] this->data();
@@ -526,12 +525,6 @@ namespace llvm {
 
   /// @}
 
-  // ArrayRefs can be treated like a POD type.
-  template <typename T> struct isPodLike;
-  template <typename T> struct isPodLike<ArrayRef<T>> {
-    static const bool value = true;
-  };
-
   template <typename T> hash_code hash_value(ArrayRef<T> S) {
     return hash_combine_range(S.begin(), S.end());
   }
diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h
index 9ab1da7c6913..fabf5d9cd348 100644
--- a/include/llvm/ADT/BitVector.h
+++ b/include/llvm/ADT/BitVector.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/BitVector.h - Bit vectors -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/BitmaskEnum.h b/include/llvm/ADT/BitmaskEnum.h
index 18c6ba5a3eb8..1a18bc721b21 100644
--- a/include/llvm/ADT/BitmaskEnum.h
+++ b/include/llvm/ADT/BitmaskEnum.h
@@ -1,9 +1,8 @@
 //===-- llvm/ADT/BitmaskEnum.h ----------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/ADT/BreadthFirstIterator.h b/include/llvm/ADT/BreadthFirstIterator.h
index 6bc63c283b09..e97d76680db8 100644
--- a/include/llvm/ADT/BreadthFirstIterator.h
+++ b/include/llvm/ADT/BreadthFirstIterator.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/BreadthFirstIterator.h - Breadth First iterator -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -125,7 +124,7 @@ public:
 
   const NodeRef &operator*() const { return VisitQueue.front()->first; }
 
-  // This is a nonstandard operator-> that dereferenfces the pointer an extra
+  // This is a nonstandard operator-> that dereferences the pointer an extra
   // time so that you can actually call methods on the node, because the
   // contained type is a pointer.
   NodeRef operator->() const { return **this; }
diff --git a/include/llvm/ADT/CachedHashString.h b/include/llvm/ADT/CachedHashString.h
index d8f0e7afdd49..80144fb87e0e 100644
--- a/include/llvm/ADT/CachedHashString.h
+++ b/include/llvm/ADT/CachedHashString.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/CachedHashString.h - Prehashed string/StringRef -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/DAGDeltaAlgorithm.h b/include/llvm/ADT/DAGDeltaAlgorithm.h
index 41fdd43efb8a..d4cdc3c86048 100644
--- a/include/llvm/ADT/DAGDeltaAlgorithm.h
+++ b/include/llvm/ADT/DAGDeltaAlgorithm.h
@@ -1,9 +1,8 @@
 //===- DAGDeltaAlgorithm.h - A DAG Minimization Algorithm ------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_DAGDELTAALGORITHM_H
diff --git a/include/llvm/ADT/DeltaAlgorithm.h b/include/llvm/ADT/DeltaAlgorithm.h
index 6becb2a60104..114b95499530 100644
--- a/include/llvm/ADT/DeltaAlgorithm.h
+++ b/include/llvm/ADT/DeltaAlgorithm.h
@@ -1,9 +1,8 @@
 //===- DeltaAlgorithm.h - A Set Minimization Algorithm ---------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_DELTAALGORITHM_H
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index 1f50502fff92..a05cf8130d3c 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/DenseMap.h - Dense probed hash table ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -64,7 +63,7 @@ struct DenseMapPair : public std::pair<KeyT, ValueT> {
   template <typename AltPairT>
   DenseMapPair(AltPairT &&AltPair,
                typename std::enable_if<std::is_convertible<
-                   AltPairT, std::pair<KeyT, ValueT>>::value>::type * = 0)
+                   AltPairT, std::pair<KeyT, ValueT>>::value>::type * = nullptr)
       : std::pair<KeyT, ValueT>(std::forward<AltPairT>(AltPair)) {}
 
   KeyT &getFirst() { return std::pair<KeyT, ValueT>::first; }
@@ -146,7 +145,8 @@ public:
     }
 
     const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey();
-    if (isPodLike<KeyT>::value && isPodLike<ValueT>::value) {
+    if (is_trivially_copyable<KeyT>::value &&
+        is_trivially_copyable<ValueT>::value) {
       // Use a simpler loop when these are trivial types.
       for (BucketT *P = getBuckets(), *E = getBucketsEnd(); P != E; ++P)
         P->getFirst() = EmptyKey;
@@ -422,7 +422,8 @@ protected:
     setNumEntries(other.getNumEntries());
     setNumTombstones(other.getNumTombstones());
 
-    if (isPodLike<KeyT>::value && isPodLike<ValueT>::value)
+    if (is_trivially_copyable<KeyT>::value &&
+        is_trivially_copyable<ValueT>::value)
       memcpy(reinterpret_cast<void *>(getBuckets()), other.getBuckets(),
              getNumBuckets() * sizeof(BucketT));
     else
diff --git a/include/llvm/ADT/DenseMapInfo.h b/include/llvm/ADT/DenseMapInfo.h
index 5d12b424fb37..5ef6f3ad1b04 100644
--- a/include/llvm/ADT/DenseMapInfo.h
+++ b/include/llvm/ADT/DenseMapInfo.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/DenseMapInfo.h - Type traits for DenseMap -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,6 +17,7 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
+#include "llvm/Support/ScalableSize.h"
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
@@ -269,6 +269,21 @@ template <> struct DenseMapInfo<hash_code> {
   static bool isEqual(hash_code LHS, hash_code RHS) { return LHS == RHS; }
 };
 
+template <> struct DenseMapInfo<ElementCount> {
+  static inline ElementCount getEmptyKey() { return {~0U, true}; }
+  static inline ElementCount getTombstoneKey() { return {~0U - 1, false}; }
+  static unsigned getHashValue(const ElementCount& EltCnt) {
+    if (EltCnt.Scalable)
+      return (EltCnt.Min * 37U) - 1U;
+
+    return EltCnt.Min * 37U;
+  }
+
+  static bool isEqual(const ElementCount& LHS, const ElementCount& RHS) {
+    return LHS == RHS;
+  }
+};
+
 } // end namespace llvm
 
 #endif // LLVM_ADT_DENSEMAPINFO_H
diff --git a/include/llvm/ADT/DenseSet.h b/include/llvm/ADT/DenseSet.h
index e85a38587e41..9afb715ae1db 100644
--- a/include/llvm/ADT/DenseSet.h
+++ b/include/llvm/ADT/DenseSet.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/DenseSet.h - Dense probed hash table ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -131,7 +130,7 @@ public:
 
   class ConstIterator {
     typename MapTy::const_iterator I;
-    friend class DenseSet;
+    friend class DenseSetImpl;
     friend class Iterator;
 
   public:
diff --git a/include/llvm/ADT/DepthFirstIterator.h b/include/llvm/ADT/DepthFirstIterator.h
index 1f3766d3c9de..11967f5eefcc 100644
--- a/include/llvm/ADT/DepthFirstIterator.h
+++ b/include/llvm/ADT/DepthFirstIterator.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/DepthFirstIterator.h - Depth First iterator -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/EpochTracker.h b/include/llvm/ADT/EpochTracker.h
index 49ef192364e8..a782b4756898 100644
--- a/include/llvm/ADT/EpochTracker.h
+++ b/include/llvm/ADT/EpochTracker.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/EpochTracker.h - ADT epoch tracking --------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/EquivalenceClasses.h b/include/llvm/ADT/EquivalenceClasses.h
index e3f48433c69f..2cb7108c0794 100644
--- a/include/llvm/ADT/EquivalenceClasses.h
+++ b/include/llvm/ADT/EquivalenceClasses.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/EquivalenceClasses.h - Generic Equiv. Classes ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/FoldingSet.h b/include/llvm/ADT/FoldingSet.h
index e363e69d032a..d5837e51bcfc 100644
--- a/include/llvm/ADT/FoldingSet.h
+++ b/include/llvm/ADT/FoldingSet.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/FoldingSet.h - Uniquing Hash Set ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/FunctionExtras.h b/include/llvm/ADT/FunctionExtras.h
index 2b75dc6ac219..121aa527a5da 100644
--- a/include/llvm/ADT/FunctionExtras.h
+++ b/include/llvm/ADT/FunctionExtras.h
@@ -1,9 +1,8 @@
 //===- FunctionExtras.h - Function type erasure utilities -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/ADT/GraphTraits.h b/include/llvm/ADT/GraphTraits.h
index d39b50fdc488..3ce91225d80d 100644
--- a/include/llvm/ADT/GraphTraits.h
+++ b/include/llvm/ADT/GraphTraits.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/GraphTraits.h - Graph traits template -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/Hashing.h b/include/llvm/ADT/Hashing.h
index 9175c545b7c9..008188bfa210 100644
--- a/include/llvm/ADT/Hashing.h
+++ b/include/llvm/ADT/Hashing.h
@@ -1,9 +1,8 @@
 //===-- llvm/ADT/Hashing.h - Utilities for hashing --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -192,7 +191,7 @@ inline uint64_t hash_1to3_bytes(const char *s, size_t len, uint64_t seed) {
   uint8_t b = s[len >> 1];
   uint8_t c = s[len - 1];
   uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8);
-  uint32_t z = len + (static_cast<uint32_t>(c) << 2);
+  uint32_t z = static_cast<uint32_t>(len) + (static_cast<uint32_t>(c) << 2);
   return shift_mix(y * k2 ^ z * k3 ^ seed) * k2;
 }
 
diff --git a/include/llvm/ADT/ImmutableList.h b/include/llvm/ADT/ImmutableList.h
index 0541dc2566ed..c9ee494734e7 100644
--- a/include/llvm/ADT/ImmutableList.h
+++ b/include/llvm/ADT/ImmutableList.h
@@ -1,9 +1,8 @@
 //==--- ImmutableList.h - Immutable (functional) list interface --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -242,10 +241,6 @@ template<typename T> struct DenseMapInfo<ImmutableList<T>> {
   }
 };
 
-template <typename T> struct isPodLike;
-template <typename T>
-struct isPodLike<ImmutableList<T>> { static const bool value = true; };
-
 } // end namespace llvm
 
 #endif // LLVM_ADT_IMMUTABLELIST_H
diff --git a/include/llvm/ADT/ImmutableMap.h b/include/llvm/ADT/ImmutableMap.h
index cbc27ff17ccf..86fd7fefaec3 100644
--- a/include/llvm/ADT/ImmutableMap.h
+++ b/include/llvm/ADT/ImmutableMap.h
@@ -1,9 +1,8 @@
 //===--- ImmutableMap.h - Immutable (functional) map interface --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/ImmutableSet.h b/include/llvm/ADT/ImmutableSet.h
index b1d5f4ac42e4..587105431533 100644
--- a/include/llvm/ADT/ImmutableSet.h
+++ b/include/llvm/ADT/ImmutableSet.h
@@ -1,9 +1,8 @@
 //===--- ImmutableSet.h - Immutable (functional) set interface --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/IndexedMap.h b/include/llvm/ADT/IndexedMap.h
index 2ee80d2cde63..b44f16b91d76 100644
--- a/include/llvm/ADT/IndexedMap.h
+++ b/include/llvm/ADT/IndexedMap.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/IndexedMap.h - An index map implementation ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/IntEqClasses.h b/include/llvm/ADT/IntEqClasses.h
index 0baee2f11a79..08f46a3079ef 100644
--- a/include/llvm/ADT/IntEqClasses.h
+++ b/include/llvm/ADT/IntEqClasses.h
@@ -1,9 +1,8 @@
 //===-- llvm/ADT/IntEqClasses.h - Equiv. Classes of Integers ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/IntervalMap.h b/include/llvm/ADT/IntervalMap.h
index 2af61049e5af..12828c4cfdab 100644
--- a/include/llvm/ADT/IntervalMap.h
+++ b/include/llvm/ADT/IntervalMap.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/IntervalMap.h - A sorted interval map -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/IntrusiveRefCntPtr.h b/include/llvm/ADT/IntrusiveRefCntPtr.h
index 430ef86afbd9..6d97fe15db8b 100644
--- a/include/llvm/ADT/IntrusiveRefCntPtr.h
+++ b/include/llvm/ADT/IntrusiveRefCntPtr.h
@@ -1,9 +1,8 @@
 //==- llvm/ADT/IntrusiveRefCntPtr.h - Smart Refcounting Pointer --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/MapVector.h b/include/llvm/ADT/MapVector.h
index 47b4987f210a..1de1124f4ea2 100644
--- a/include/llvm/ADT/MapVector.h
+++ b/include/llvm/ADT/MapVector.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/MapVector.h - Map w/ deterministic value order --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/None.h b/include/llvm/ADT/None.h
index 4b6bc1e005b5..004ca0ac50ac 100644
--- a/include/llvm/ADT/None.h
+++ b/include/llvm/ADT/None.h
@@ -1,9 +1,8 @@
 //===-- None.h - Simple null value for implicit construction ------*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/Optional.h b/include/llvm/ADT/Optional.h
index 76937d632ae1..b45a74002e10 100644
--- a/include/llvm/ADT/Optional.h
+++ b/include/llvm/ADT/Optional.h
@@ -1,9 +1,8 @@
 //===- Optional.h - Simple variant for passing optional values --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,94 +16,197 @@
 #define LLVM_ADT_OPTIONAL_H
 
 #include "llvm/ADT/None.h"
-#include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/type_traits.h"
-#include <algorithm>
 #include <cassert>
+#include <memory>
 #include <new>
 #include <utility>
 
 namespace llvm {
 
+class raw_ostream;
+
 namespace optional_detail {
+
+struct in_place_t {};
+
 /// Storage for any type.
-template <typename T, bool = isPodLike<T>::value> struct OptionalStorage {
-  AlignedCharArrayUnion<T> storage;
-  bool hasVal = false;
+template <typename T, bool = is_trivially_copyable<T>::value>
+class OptionalStorage {
+  union {
+    char empty;
+    T value;
+  };
+  bool hasVal;
 
-  OptionalStorage() = default;
+public:
+  ~OptionalStorage() { reset(); }
 
-  OptionalStorage(const T &y) : hasVal(true) { new (storage.buffer) T(y); }
-  OptionalStorage(const OptionalStorage &O) : hasVal(O.hasVal) {
-    if (hasVal)
-      new (storage.buffer) T(*O.getPointer());
+  OptionalStorage() noexcept : empty(), hasVal(false) {}
+
+  OptionalStorage(OptionalStorage const &other) : OptionalStorage() {
+    if (other.hasValue()) {
+      emplace(other.value);
+    }
   }
-  OptionalStorage(T &&y) : hasVal(true) {
-    new (storage.buffer) T(std::forward<T>(y));
+  OptionalStorage(OptionalStorage &&other) : OptionalStorage() {
+    if (other.hasValue()) {
+      emplace(std::move(other.value));
+    }
   }
-  OptionalStorage(OptionalStorage &&O) : hasVal(O.hasVal) {
-    if (O.hasVal) {
-      new (storage.buffer) T(std::move(*O.getPointer()));
+
+  template <class... Args>
+  explicit OptionalStorage(in_place_t, Args &&... args)
+      : value(std::forward<Args>(args)...), hasVal(true) {}
+
+  void reset() noexcept {
+    if (hasVal) {
+      value.~T();
+      hasVal = false;
     }
   }
 
-  OptionalStorage &operator=(T &&y) {
-    if (hasVal)
-      *getPointer() = std::move(y);
-    else {
-      new (storage.buffer) T(std::move(y));
+  bool hasValue() const noexcept { return hasVal; }
+
+  T &getValue() LLVM_LVALUE_FUNCTION noexcept {
+    assert(hasVal);
+    return value;
+  }
+  T const &getValue() const LLVM_LVALUE_FUNCTION noexcept {
+    assert(hasVal);
+    return value;
+  }
+#if LLVM_HAS_RVALUE_REFERENCE_THIS
+  T &&getValue() && noexcept {
+    assert(hasVal);
+    return std::move(value);
+  }
+#endif
+
+  template <class... Args> void emplace(Args &&... args) {
+    reset();
+    ::new ((void *)std::addressof(value)) T(std::forward<Args>(args)...);
+    hasVal = true;
+  }
+
+  OptionalStorage &operator=(T const &y) {
+    if (hasValue()) {
+      value = y;
+    } else {
+      ::new ((void *)std::addressof(value)) T(y);
       hasVal = true;
     }
     return *this;
   }
-  OptionalStorage &operator=(OptionalStorage &&O) {
-    if (!O.hasVal)
-      reset();
-    else {
-      *this = std::move(*O.getPointer());
+  OptionalStorage &operator=(T &&y) {
+    if (hasValue()) {
+      value = std::move(y);
+    } else {
+      ::new ((void *)std::addressof(value)) T(std::move(y));
+      hasVal = true;
     }
     return *this;
   }
 
-  // FIXME: these assignments (& the equivalent const T&/const Optional& ctors)
-  // could be made more efficient by passing by value, possibly unifying them
-  // with the rvalue versions above - but this could place a different set of
-  // requirements (notably: the existence of a default ctor) when implemented
-  // in that way. Careful SFINAE to avoid such pitfalls would be required.
-  OptionalStorage &operator=(const T &y) {
-    if (hasVal)
-      *getPointer() = y;
-    else {
-      new (storage.buffer) T(y);
-      hasVal = true;
+  OptionalStorage &operator=(OptionalStorage const &other) {
+    if (other.hasValue()) {
+      if (hasValue()) {
+        value = other.value;
+      } else {
+        ::new ((void *)std::addressof(value)) T(other.value);
+        hasVal = true;
+      }
+    } else {
+      reset();
     }
     return *this;
   }
-  OptionalStorage &operator=(const OptionalStorage &O) {
-    if (!O.hasVal)
+
+  OptionalStorage &operator=(OptionalStorage &&other) {
+    if (other.hasValue()) {
+      if (hasValue()) {
+        value = std::move(other.value);
+      } else {
+        ::new ((void *)std::addressof(value)) T(std::move(other.value));
+        hasVal = true;
+      }
+    } else {
       reset();
-    else
-      *this = *O.getPointer();
+    }
     return *this;
   }
+};
 
-  ~OptionalStorage() { reset(); }
+template <typename T> class OptionalStorage<T, true> {
+  union {
+    char empty;
+    T value;
+  };
+  bool hasVal = false;
+
+public:
+  ~OptionalStorage() = default;
+
+  OptionalStorage() noexcept : empty{} {}
+
+  OptionalStorage(OptionalStorage const &other) = default;
+  OptionalStorage(OptionalStorage &&other) = default;
+
+  OptionalStorage &operator=(OptionalStorage const &other) = default;
+  OptionalStorage &operator=(OptionalStorage &&other) = default;
+
+  template <class... Args>
+  explicit OptionalStorage(in_place_t, Args &&... args)
+      : value(std::forward<Args>(args)...), hasVal(true) {}
 
-  void reset() {
+  void reset() noexcept {
     if (hasVal) {
-      (*getPointer()).~T();
+      value.~T();
       hasVal = false;
     }
   }
 
-  T *getPointer() {
+  bool hasValue() const noexcept { return hasVal; }
+
+  T &getValue() LLVM_LVALUE_FUNCTION noexcept {
     assert(hasVal);
-    return reinterpret_cast<T *>(storage.buffer);
+    return value;
   }
-  const T *getPointer() const {
+  T const &getValue() const LLVM_LVALUE_FUNCTION noexcept {
     assert(hasVal);
-    return reinterpret_cast<const T *>(storage.buffer);
+    return value;
+  }
+#if LLVM_HAS_RVALUE_REFERENCE_THIS
+  T &&getValue() && noexcept {
+    assert(hasVal);
+    return std::move(value);
+  }
+#endif
+
+  template <class... Args> void emplace(Args &&... args) {
+    reset();
+    ::new ((void *)std::addressof(value)) T(std::forward<Args>(args)...);
+    hasVal = true;
+  }
+
+  OptionalStorage &operator=(T const &y) {
+    if (hasValue()) {
+      value = y;
+    } else {
+      ::new ((void *)std::addressof(value)) T(y);
+      hasVal = true;
+    }
+    return *this;
+  }
+  OptionalStorage &operator=(T &&y) {
+    if (hasValue()) {
+      value = std::move(y);
+    } else {
+      ::new ((void *)std::addressof(value)) T(std::move(y));
+      hasVal = true;
+    }
+    return *this;
   }
 };
 
@@ -119,10 +221,10 @@ public:
   constexpr Optional() {}
   constexpr Optional(NoneType) {}
 
-  Optional(const T &y) : Storage(y) {}
+  Optional(const T &y) : Storage(optional_detail::in_place_t{}, y) {}
   Optional(const Optional &O) = default;
 
-  Optional(T &&y) : Storage(std::forward<T>(y)) {}
+  Optional(T &&y) : Storage(optional_detail::in_place_t{}, std::move(y)) {}
   Optional(Optional &&O) = default;
 
   Optional &operator=(T &&y) {
@@ -133,9 +235,7 @@ public:
 
   /// Create a new object by constructing it in place with the given arguments.
   template <typename... ArgTypes> void emplace(ArgTypes &&... Args) {
-    reset();
-    Storage.hasVal = true;
-    new (getPointer()) T(std::forward<ArgTypes>(Args)...);
+    Storage.emplace(std::forward<ArgTypes>(Args)...);
   }
 
   static inline Optional create(const T *y) {
@@ -150,23 +250,17 @@ public:
 
   void reset() { Storage.reset(); }
 
-  const T *getPointer() const {
-    assert(Storage.hasVal);
-    return reinterpret_cast<const T *>(Storage.storage.buffer);
-  }
-  T *getPointer() {
-    assert(Storage.hasVal);
-    return reinterpret_cast<T *>(Storage.storage.buffer);
-  }
-  const T &getValue() const LLVM_LVALUE_FUNCTION { return *getPointer(); }
-  T &getValue() LLVM_LVALUE_FUNCTION { return *getPointer(); }
+  const T *getPointer() const { return &Storage.getValue(); }
+  T *getPointer() { return &Storage.getValue(); }
+  const T &getValue() const LLVM_LVALUE_FUNCTION { return Storage.getValue(); }
+  T &getValue() LLVM_LVALUE_FUNCTION { return Storage.getValue(); }
 
-  explicit operator bool() const { return Storage.hasVal; }
-  bool hasValue() const { return Storage.hasVal; }
+  explicit operator bool() const { return hasValue(); }
+  bool hasValue() const { return Storage.hasValue(); }
   const T *operator->() const { return getPointer(); }
   T *operator->() { return getPointer(); }
-  const T &operator*() const LLVM_LVALUE_FUNCTION { return *getPointer(); }
-  T &operator*() LLVM_LVALUE_FUNCTION { return *getPointer(); }
+  const T &operator*() const LLVM_LVALUE_FUNCTION { return getValue(); }
+  T &operator*() LLVM_LVALUE_FUNCTION { return getValue(); }
 
   template <typename U>
   constexpr T getValueOr(U &&value) const LLVM_LVALUE_FUNCTION {
@@ -174,8 +268,8 @@ public:
   }
 
 #if LLVM_HAS_RVALUE_REFERENCE_THIS
-  T &&getValue() && { return std::move(*getPointer()); }
-  T &&operator*() && { return std::move(*getPointer()); }
+  T &&getValue() && { return std::move(Storage.getValue()); }
+  T &&operator*() && { return std::move(Storage.getValue()); }
 
   template <typename U>
   T getValueOr(U &&value) && {
@@ -184,11 +278,6 @@ public:
 #endif
 };
 
-template <typename T> struct isPodLike<Optional<T>> {
-  // An Optional<T> is pod-like if T is.
-  static const bool value = isPodLike<T>::value;
-};
-
 template <typename T, typename U>
 bool operator==(const Optional<T> &X, const Optional<U> &Y) {
   if (X && Y)
@@ -323,6 +412,18 @@ template <typename T> bool operator>=(const T &X, const Optional<T> &Y) {
   return !(X < Y);
 }
 
+raw_ostream &operator<<(raw_ostream &OS, NoneType);
+
+template <typename T, typename = decltype(std::declval<raw_ostream &>()
+                                          << std::declval<const T &>())>
+raw_ostream &operator<<(raw_ostream &OS, const Optional<T> &O) {
+  if (O)
+    OS << *O;
+  else
+    OS << None;
+  return OS;
+}
+
 } // end namespace llvm
 
 #endif // LLVM_ADT_OPTIONAL_H
diff --git a/include/llvm/ADT/PackedVector.h b/include/llvm/ADT/PackedVector.h
index 3d53c49536d0..ae7f8cc85743 100644
--- a/include/llvm/ADT/PackedVector.h
+++ b/include/llvm/ADT/PackedVector.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/PackedVector.h - Packed values vector -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/PointerEmbeddedInt.h b/include/llvm/ADT/PointerEmbeddedInt.h
index ab4e1048a5bc..3eb6edb03430 100644
--- a/include/llvm/ADT/PointerEmbeddedInt.h
+++ b/include/llvm/ADT/PointerEmbeddedInt.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/PointerEmbeddedInt.h ----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h
index 6d1b53a90ad2..24a2bb67a36e 100644
--- a/include/llvm/ADT/PointerIntPair.h
+++ b/include/llvm/ADT/PointerIntPair.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/PointerIntPair.h - Pair for pointer and int -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,6 +14,7 @@
 #define LLVM_ADT_POINTERINTPAIR_H
 
 #include "llvm/Support/PointerLikeTypeTraits.h"
+#include "llvm/Support/type_traits.h"
 #include <cassert>
 #include <cstdint>
 #include <limits>
@@ -126,6 +126,19 @@ public:
   }
 };
 
+// Specialize is_trivially_copyable to avoid limitation of llvm::is_trivially_copyable
+// when compiled with gcc 4.9.
+template <typename PointerTy, unsigned IntBits, typename IntType,
+          typename PtrTraits,
+          typename Info>
+struct is_trivially_copyable<PointerIntPair<PointerTy, IntBits, IntType, PtrTraits, Info>> : std::true_type {
+#ifdef HAVE_STD_IS_TRIVIALLY_COPYABLE
+  static_assert(std::is_trivially_copyable<PointerIntPair<PointerTy, IntBits, IntType, PtrTraits, Info>>::value,
+                "inconsistent behavior between llvm:: and std:: implementation of is_trivially_copyable");
+#endif
+};
+
+
 template <typename PointerT, unsigned IntBits, typename PtrTraits>
 struct PointerIntPairInfo {
   static_assert(PtrTraits::NumLowBitsAvailable <
@@ -176,12 +189,6 @@ struct PointerIntPairInfo {
   }
 };
 
-template <typename T> struct isPodLike;
-template <typename PointerTy, unsigned IntBits, typename IntType>
-struct isPodLike<PointerIntPair<PointerTy, IntBits, IntType>> {
-  static const bool value = true;
-};
-
 // Provide specialization of DenseMapInfo for PointerIntPair.
 template <typename PointerTy, unsigned IntBits, typename IntType>
 struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType>> {
diff --git a/include/llvm/ADT/PointerSumType.h b/include/llvm/ADT/PointerSumType.h
index a19e45a46218..d467f83f58ac 100644
--- a/include/llvm/ADT/PointerSumType.h
+++ b/include/llvm/ADT/PointerSumType.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/PointerSumType.h --------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h
index 315e58336cba..2bcdf546c6e4 100644
--- a/include/llvm/ADT/PointerUnion.h
+++ b/include/llvm/ADT/PointerUnion.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/PointerUnion.h - Discriminated Union of 2 Ptrs --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -54,22 +53,98 @@ struct PointerUnionTypeSelectorReturn<
       typename PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE>::Return;
 };
 
-/// Provide PointerLikeTypeTraits for void* that is used by PointerUnion
-/// for the two template arguments.
-template <typename PT1, typename PT2> class PointerUnionUIntTraits {
-public:
-  static inline void *getAsVoidPointer(void *P) { return P; }
-  static inline void *getFromVoidPointer(void *P) { return P; }
+namespace pointer_union_detail {
+  constexpr int constexprMin(int a, int b) { return a < b ? a : b; }
+  /// Determine the number of bits required to store integers with values < n.
+  /// This is ceil(log2(n)).
+  constexpr int bitsRequired(unsigned n) {
+    return n > 1 ? 1 + bitsRequired((n + 1) / 2) : 0;
+  }
+
+  // FIXME: In C++14, replace this with
+  //   std::min({PointerLikeTypeTraits<Ts>::NumLowBitsAvailable...})
+  template <typename T> constexpr int lowBitsAvailable() {
+    return PointerLikeTypeTraits<T>::NumLowBitsAvailable;
+  }
+  template <typename T1, typename T2, typename... Ts>
+  constexpr int lowBitsAvailable() {
+    return constexprMin(lowBitsAvailable<T1>(), lowBitsAvailable<T2, Ts...>());
+  }
 
-  enum {
-    PT1BitsAv = (int)(PointerLikeTypeTraits<PT1>::NumLowBitsAvailable),
-    PT2BitsAv = (int)(PointerLikeTypeTraits<PT2>::NumLowBitsAvailable),
-    NumLowBitsAvailable = PT1BitsAv < PT2BitsAv ? PT1BitsAv : PT2BitsAv
+  /// Find the index of a type in a list of types. TypeIndex<T, Us...>::Index
+  /// is the index of T in Us, or sizeof...(Us) if T does not appear in the
+  /// list.
+  template <typename T, typename ...Us> struct TypeIndex;
+  template <typename T, typename ...Us> struct TypeIndex<T, T, Us...> {
+    static constexpr int Index = 0;
   };
-};
+  template <typename T, typename U, typename... Us>
+  struct TypeIndex<T, U, Us...> {
+    static constexpr int Index = 1 + TypeIndex<T, Us...>::Index;
+  };
+  template <typename T> struct TypeIndex<T> {
+    static constexpr int Index = 0;
+  };
+
+  /// Find the first type in a list of types.
+  template <typename T, typename...> struct GetFirstType {
+    using type = T;
+  };
+
+  /// Provide PointerLikeTypeTraits for void* that is used by PointerUnion
+  /// for the template arguments.
+  template <typename ...PTs> class PointerUnionUIntTraits {
+  public:
+    static inline void *getAsVoidPointer(void *P) { return P; }
+    static inline void *getFromVoidPointer(void *P) { return P; }
+    static constexpr int NumLowBitsAvailable = lowBitsAvailable<PTs...>();
+  };
+
+  /// Implement assigment in terms of construction.
+  template <typename Derived, typename T> struct AssignableFrom {
+    Derived &operator=(T t) {
+      return static_cast<Derived &>(*this) = Derived(t);
+    }
+  };
+
+  template <typename Derived, typename ValTy, int I, typename ...Types>
+  class PointerUnionMembers;
 
-/// A discriminated union of two pointer types, with the discriminator in the
-/// low bit of the pointer.
+  template <typename Derived, typename ValTy, int I>
+  class PointerUnionMembers<Derived, ValTy, I> {
+  protected:
+    ValTy Val;
+    PointerUnionMembers() = default;
+    PointerUnionMembers(ValTy Val) : Val(Val) {}
+
+    friend struct PointerLikeTypeTraits<Derived>;
+  };
+
+  template <typename Derived, typename ValTy, int I, typename Type,
+            typename ...Types>
+  class PointerUnionMembers<Derived, ValTy, I, Type, Types...>
+      : public PointerUnionMembers<Derived, ValTy, I + 1, Types...> {
+    using Base = PointerUnionMembers<Derived, ValTy, I + 1, Types...>;
+  public:
+    using Base::Base;
+    PointerUnionMembers() = default;
+    PointerUnionMembers(Type V)
+        : Base(ValTy(const_cast<void *>(
+                         PointerLikeTypeTraits<Type>::getAsVoidPointer(V)),
+                     I)) {}
+
+    using Base::operator=;
+    Derived &operator=(Type V) {
+      this->Val = ValTy(
+          const_cast<void *>(PointerLikeTypeTraits<Type>::getAsVoidPointer(V)),
+          I);
+      return static_cast<Derived &>(*this);
+    };
+  };
+}
+
+/// A discriminated union of two or more pointer types, with the discriminator
+/// in the low bit of the pointer.
 ///
 /// This implementation is extremely efficient in space due to leveraging the
 /// low bits of the pointer, while exposing a natural and type-safe API.
@@ -84,49 +159,44 @@ public:
 ///    P = (float*)0;
 ///    Y = P.get<float*>();   // ok.
 ///    X = P.get<int*>();     // runtime assertion failure.
-template <typename PT1, typename PT2> class PointerUnion {
-public:
-  using ValTy =
-      PointerIntPair<void *, 1, bool, PointerUnionUIntTraits<PT1, PT2>>;
-
-private:
-  ValTy Val;
-
-  struct IsPT1 {
-    static const int Num = 0;
-  };
-  struct IsPT2 {
-    static const int Num = 1;
-  };
-  template <typename T> struct UNION_DOESNT_CONTAIN_TYPE {};
+template <typename... PTs>
+class PointerUnion
+    : public pointer_union_detail::PointerUnionMembers<
+          PointerUnion<PTs...>,
+          PointerIntPair<
+              void *, pointer_union_detail::bitsRequired(sizeof...(PTs)), int,
+              pointer_union_detail::PointerUnionUIntTraits<PTs...>>,
+          0, PTs...> {
+  // The first type is special in some ways, but we don't want PointerUnion to
+  // be a 'template <typename First, typename ...Rest>' because it's much more
+  // convenient to have a name for the whole pack. So split off the first type
+  // here.
+  using First = typename pointer_union_detail::GetFirstType<PTs...>::type;
+  using Base = typename PointerUnion::PointerUnionMembers;
 
 public:
   PointerUnion() = default;
-  PointerUnion(PT1 V)
-      : Val(const_cast<void *>(
-            PointerLikeTypeTraits<PT1>::getAsVoidPointer(V))) {}
-  PointerUnion(PT2 V)
-      : Val(const_cast<void *>(PointerLikeTypeTraits<PT2>::getAsVoidPointer(V)),
-            1) {}
+
+  PointerUnion(std::nullptr_t) : PointerUnion() {}
+  using Base::Base;
 
   /// Test if the pointer held in the union is null, regardless of
   /// which type it is.
   bool isNull() const {
     // Convert from the void* to one of the pointer types, to make sure that
     // we recursively strip off low bits if we have a nested PointerUnion.
-    return !PointerLikeTypeTraits<PT1>::getFromVoidPointer(Val.getPointer());
+    return !PointerLikeTypeTraits<First>::getFromVoidPointer(
+        this->Val.getPointer());
   }
 
   explicit operator bool() const { return !isNull(); }
 
   /// Test if the Union currently holds the type matching T.
   template <typename T> int is() const {
-    using Ty = typename ::llvm::PointerUnionTypeSelector<
-        PT1, T, IsPT1,
-        ::llvm::PointerUnionTypeSelector<PT2, T, IsPT2,
-                                         UNION_DOESNT_CONTAIN_TYPE<T>>>::Return;
-    int TyNo = Ty::Num;
-    return static_cast<int>(Val.getInt()) == TyNo;
+    constexpr int Index = pointer_union_detail::TypeIndex<T, PTs...>::Index;
+    static_assert(Index < sizeof...(PTs),
+                  "PointerUnion::is<T> given type not in the union");
+    return this->Val.getInt() == Index;
   }
 
   /// Returns the value of the specified pointer type.
@@ -134,7 +204,7 @@ public:
   /// If the specified pointer type is incorrect, assert.
   template <typename T> T get() const {
     assert(is<T>() && "Invalid accessor called");
-    return PointerLikeTypeTraits<T>::getFromVoidPointer(Val.getPointer());
+    return PointerLikeTypeTraits<T>::getFromVoidPointer(this->Val.getPointer());
   }
 
   /// Returns the current pointer if it is of the specified pointer type,
@@ -147,342 +217,100 @@ public:
 
   /// If the union is set to the first pointer type get an address pointing to
   /// it.
-  PT1 const *getAddrOfPtr1() const {
+  First const *getAddrOfPtr1() const {
     return const_cast<PointerUnion *>(this)->getAddrOfPtr1();
   }
 
   /// If the union is set to the first pointer type get an address pointing to
   /// it.
-  PT1 *getAddrOfPtr1() {
-    assert(is<PT1>() && "Val is not the first pointer");
+  First *getAddrOfPtr1() {
+    assert(is<First>() && "Val is not the first pointer");
     assert(
-        get<PT1>() == Val.getPointer() &&
+        get<First>() == this->Val.getPointer() &&
         "Can't get the address because PointerLikeTypeTraits changes the ptr");
-    return const_cast<PT1 *>(
-        reinterpret_cast<const PT1 *>(Val.getAddrOfPointer()));
+    return const_cast<First *>(
+        reinterpret_cast<const First *>(this->Val.getAddrOfPointer()));
   }
 
   /// Assignment from nullptr which just clears the union.
   const PointerUnion &operator=(std::nullptr_t) {
-    Val.initWithPointer(nullptr);
+    this->Val.initWithPointer(nullptr);
     return *this;
   }
 
-  /// Assignment operators - Allow assigning into this union from either
-  /// pointer type, setting the discriminator to remember what it came from.
-  const PointerUnion &operator=(const PT1 &RHS) {
-    Val.initWithPointer(
-        const_cast<void *>(PointerLikeTypeTraits<PT1>::getAsVoidPointer(RHS)));
-    return *this;
-  }
-  const PointerUnion &operator=(const PT2 &RHS) {
-    Val.setPointerAndInt(
-        const_cast<void *>(PointerLikeTypeTraits<PT2>::getAsVoidPointer(RHS)),
-        1);
-    return *this;
-  }
+  /// Assignment from elements of the union.
+  using Base::operator=;
 
-  void *getOpaqueValue() const { return Val.getOpaqueValue(); }
+  void *getOpaqueValue() const { return this->Val.getOpaqueValue(); }
   static inline PointerUnion getFromOpaqueValue(void *VP) {
     PointerUnion V;
-    V.Val = ValTy::getFromOpaqueValue(VP);
+    V.Val = decltype(V.Val)::getFromOpaqueValue(VP);
     return V;
   }
 };
 
-template <typename PT1, typename PT2>
-bool operator==(PointerUnion<PT1, PT2> lhs, PointerUnion<PT1, PT2> rhs) {
+template <typename ...PTs>
+bool operator==(PointerUnion<PTs...> lhs, PointerUnion<PTs...> rhs) {
   return lhs.getOpaqueValue() == rhs.getOpaqueValue();
 }
 
-template <typename PT1, typename PT2>
-bool operator!=(PointerUnion<PT1, PT2> lhs, PointerUnion<PT1, PT2> rhs) {
+template <typename ...PTs>
+bool operator!=(PointerUnion<PTs...> lhs, PointerUnion<PTs...> rhs) {
   return lhs.getOpaqueValue() != rhs.getOpaqueValue();
 }
 
-template <typename PT1, typename PT2>
-bool operator<(PointerUnion<PT1, PT2> lhs, PointerUnion<PT1, PT2> rhs) {
+template <typename ...PTs>
+bool operator<(PointerUnion<PTs...> lhs, PointerUnion<PTs...> rhs) {
   return lhs.getOpaqueValue() < rhs.getOpaqueValue();
 }
 
 // Teach SmallPtrSet that PointerUnion is "basically a pointer", that has
 // # low bits available = min(PT1bits,PT2bits)-1.
-template <typename PT1, typename PT2>
-struct PointerLikeTypeTraits<PointerUnion<PT1, PT2>> {
-  static inline void *getAsVoidPointer(const PointerUnion<PT1, PT2> &P) {
+template <typename ...PTs>
+struct PointerLikeTypeTraits<PointerUnion<PTs...>> {
+  static inline void *getAsVoidPointer(const PointerUnion<PTs...> &P) {
     return P.getOpaqueValue();
   }
 
-  static inline PointerUnion<PT1, PT2> getFromVoidPointer(void *P) {
-    return PointerUnion<PT1, PT2>::getFromOpaqueValue(P);
+  static inline PointerUnion<PTs...> getFromVoidPointer(void *P) {
+    return PointerUnion<PTs...>::getFromOpaqueValue(P);
   }
 
-  // The number of bits available are the min of the two pointer types.
-  enum {
-    NumLowBitsAvailable = PointerLikeTypeTraits<
-        typename PointerUnion<PT1, PT2>::ValTy>::NumLowBitsAvailable
-  };
+  // The number of bits available are the min of the pointer types minus the
+  // bits needed for the discriminator.
+  static constexpr int NumLowBitsAvailable = PointerLikeTypeTraits<decltype(
+      PointerUnion<PTs...>::Val)>::NumLowBitsAvailable;
 };
 
 /// A pointer union of three pointer types. See documentation for PointerUnion
 /// for usage.
-template <typename PT1, typename PT2, typename PT3> class PointerUnion3 {
-public:
-  using InnerUnion = PointerUnion<PT1, PT2>;
-  using ValTy = PointerUnion<InnerUnion, PT3>;
-
-private:
-  ValTy Val;
-
-  struct IsInnerUnion {
-    ValTy Val;
-
-    IsInnerUnion(ValTy val) : Val(val) {}
-
-    template <typename T> int is() const {
-      return Val.template is<InnerUnion>() &&
-             Val.template get<InnerUnion>().template is<T>();
-    }
-
-    template <typename T> T get() const {
-      return Val.template get<InnerUnion>().template get<T>();
-    }
-  };
-
-  struct IsPT3 {
-    ValTy Val;
-
-    IsPT3(ValTy val) : Val(val) {}
-
-    template <typename T> int is() const { return Val.template is<T>(); }
-    template <typename T> T get() const { return Val.template get<T>(); }
-  };
-
-public:
-  PointerUnion3() = default;
-  PointerUnion3(PT1 V) { Val = InnerUnion(V); }
-  PointerUnion3(PT2 V) { Val = InnerUnion(V); }
-  PointerUnion3(PT3 V) { Val = V; }
-
-  /// Test if the pointer held in the union is null, regardless of
-  /// which type it is.
-  bool isNull() const { return Val.isNull(); }
-  explicit operator bool() const { return !isNull(); }
-
-  /// Test if the Union currently holds the type matching T.
-  template <typename T> int is() const {
-    // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3.
-    using Ty = typename ::llvm::PointerUnionTypeSelector<
-        PT1, T, IsInnerUnion,
-        ::llvm::PointerUnionTypeSelector<PT2, T, IsInnerUnion, IsPT3>>::Return;
-    return Ty(Val).template is<T>();
-  }
-
-  /// Returns the value of the specified pointer type.
-  ///
-  /// If the specified pointer type is incorrect, assert.
-  template <typename T> T get() const {
-    assert(is<T>() && "Invalid accessor called");
-    // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3.
-    using Ty = typename ::llvm::PointerUnionTypeSelector<
-        PT1, T, IsInnerUnion,
-        ::llvm::PointerUnionTypeSelector<PT2, T, IsInnerUnion, IsPT3>>::Return;
-    return Ty(Val).template get<T>();
-  }
-
-  /// Returns the current pointer if it is of the specified pointer type,
-  /// otherwises returns null.
-  template <typename T> T dyn_cast() const {
-    if (is<T>())
-      return get<T>();
-    return T();
-  }
-
-  /// Assignment from nullptr which just clears the union.
-  const PointerUnion3 &operator=(std::nullptr_t) {
-    Val = nullptr;
-    return *this;
-  }
-
-  /// Assignment operators - Allow assigning into this union from either
-  /// pointer type, setting the discriminator to remember what it came from.
-  const PointerUnion3 &operator=(const PT1 &RHS) {
-    Val = InnerUnion(RHS);
-    return *this;
-  }
-  const PointerUnion3 &operator=(const PT2 &RHS) {
-    Val = InnerUnion(RHS);
-    return *this;
-  }
-  const PointerUnion3 &operator=(const PT3 &RHS) {
-    Val = RHS;
-    return *this;
-  }
-
-  void *getOpaqueValue() const { return Val.getOpaqueValue(); }
-  static inline PointerUnion3 getFromOpaqueValue(void *VP) {
-    PointerUnion3 V;
-    V.Val = ValTy::getFromOpaqueValue(VP);
-    return V;
-  }
-};
-
-// Teach SmallPtrSet that PointerUnion3 is "basically a pointer", that has
-// # low bits available = min(PT1bits,PT2bits,PT2bits)-2.
 template <typename PT1, typename PT2, typename PT3>
-struct PointerLikeTypeTraits<PointerUnion3<PT1, PT2, PT3>> {
-  static inline void *getAsVoidPointer(const PointerUnion3<PT1, PT2, PT3> &P) {
-    return P.getOpaqueValue();
-  }
-
-  static inline PointerUnion3<PT1, PT2, PT3> getFromVoidPointer(void *P) {
-    return PointerUnion3<PT1, PT2, PT3>::getFromOpaqueValue(P);
-  }
-
-  // The number of bits available are the min of the two pointer types.
-  enum {
-    NumLowBitsAvailable = PointerLikeTypeTraits<
-        typename PointerUnion3<PT1, PT2, PT3>::ValTy>::NumLowBitsAvailable
-  };
-};
-
-template <typename PT1, typename PT2, typename PT3>
-bool operator<(PointerUnion3<PT1, PT2, PT3> lhs,
-               PointerUnion3<PT1, PT2, PT3> rhs) {
-  return lhs.getOpaqueValue() < rhs.getOpaqueValue();
-}
+using PointerUnion3 = PointerUnion<PT1, PT2, PT3>;
 
 /// A pointer union of four pointer types. See documentation for PointerUnion
 /// for usage.
 template <typename PT1, typename PT2, typename PT3, typename PT4>
-class PointerUnion4 {
-public:
-  using InnerUnion1 = PointerUnion<PT1, PT2>;
-  using InnerUnion2 = PointerUnion<PT3, PT4>;
-  using ValTy = PointerUnion<InnerUnion1, InnerUnion2>;
-
-private:
-  ValTy Val;
-
-public:
-  PointerUnion4() = default;
-  PointerUnion4(PT1 V) { Val = InnerUnion1(V); }
-  PointerUnion4(PT2 V) { Val = InnerUnion1(V); }
-  PointerUnion4(PT3 V) { Val = InnerUnion2(V); }
-  PointerUnion4(PT4 V) { Val = InnerUnion2(V); }
-
-  /// Test if the pointer held in the union is null, regardless of
-  /// which type it is.
-  bool isNull() const { return Val.isNull(); }
-  explicit operator bool() const { return !isNull(); }
-
-  /// Test if the Union currently holds the type matching T.
-  template <typename T> int is() const {
-    // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2.
-    using Ty = typename ::llvm::PointerUnionTypeSelector<
-        PT1, T, InnerUnion1,
-        ::llvm::PointerUnionTypeSelector<PT2, T, InnerUnion1,
-                                         InnerUnion2>>::Return;
-    return Val.template is<Ty>() && Val.template get<Ty>().template is<T>();
-  }
-
-  /// Returns the value of the specified pointer type.
-  ///
-  /// If the specified pointer type is incorrect, assert.
-  template <typename T> T get() const {
-    assert(is<T>() && "Invalid accessor called");
-    // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2.
-    using Ty = typename ::llvm::PointerUnionTypeSelector<
-        PT1, T, InnerUnion1,
-        ::llvm::PointerUnionTypeSelector<PT2, T, InnerUnion1,
-                                         InnerUnion2>>::Return;
-    return Val.template get<Ty>().template get<T>();
-  }
-
-  /// Returns the current pointer if it is of the specified pointer type,
-  /// otherwises returns null.
-  template <typename T> T dyn_cast() const {
-    if (is<T>())
-      return get<T>();
-    return T();
-  }
-
-  /// Assignment from nullptr which just clears the union.
-  const PointerUnion4 &operator=(std::nullptr_t) {
-    Val = nullptr;
-    return *this;
-  }
-
-  /// Assignment operators - Allow assigning into this union from either
-  /// pointer type, setting the discriminator to remember what it came from.
-  const PointerUnion4 &operator=(const PT1 &RHS) {
-    Val = InnerUnion1(RHS);
-    return *this;
-  }
-  const PointerUnion4 &operator=(const PT2 &RHS) {
-    Val = InnerUnion1(RHS);
-    return *this;
-  }
-  const PointerUnion4 &operator=(const PT3 &RHS) {
-    Val = InnerUnion2(RHS);
-    return *this;
-  }
-  const PointerUnion4 &operator=(const PT4 &RHS) {
-    Val = InnerUnion2(RHS);
-    return *this;
-  }
-
-  void *getOpaqueValue() const { return Val.getOpaqueValue(); }
-  static inline PointerUnion4 getFromOpaqueValue(void *VP) {
-    PointerUnion4 V;
-    V.Val = ValTy::getFromOpaqueValue(VP);
-    return V;
-  }
-};
-
-// Teach SmallPtrSet that PointerUnion4 is "basically a pointer", that has
-// # low bits available = min(PT1bits,PT2bits,PT2bits)-2.
-template <typename PT1, typename PT2, typename PT3, typename PT4>
-struct PointerLikeTypeTraits<PointerUnion4<PT1, PT2, PT3, PT4>> {
-  static inline void *
-  getAsVoidPointer(const PointerUnion4<PT1, PT2, PT3, PT4> &P) {
-    return P.getOpaqueValue();
-  }
-
-  static inline PointerUnion4<PT1, PT2, PT3, PT4> getFromVoidPointer(void *P) {
-    return PointerUnion4<PT1, PT2, PT3, PT4>::getFromOpaqueValue(P);
-  }
-
-  // The number of bits available are the min of the two pointer types.
-  enum {
-    NumLowBitsAvailable = PointerLikeTypeTraits<
-        typename PointerUnion4<PT1, PT2, PT3, PT4>::ValTy>::NumLowBitsAvailable
-  };
-};
+using PointerUnion4 = PointerUnion<PT1, PT2, PT3, PT4>;
 
 // Teach DenseMap how to use PointerUnions as keys.
-template <typename T, typename U> struct DenseMapInfo<PointerUnion<T, U>> {
-  using Pair = PointerUnion<T, U>;
-  using FirstInfo = DenseMapInfo<T>;
-  using SecondInfo = DenseMapInfo<U>;
+template <typename ...PTs> struct DenseMapInfo<PointerUnion<PTs...>> {
+  using Union = PointerUnion<PTs...>;
+  using FirstInfo =
+      DenseMapInfo<typename pointer_union_detail::GetFirstType<PTs...>::type>;
 
-  static inline Pair getEmptyKey() { return Pair(FirstInfo::getEmptyKey()); }
+  static inline Union getEmptyKey() { return Union(FirstInfo::getEmptyKey()); }
 
-  static inline Pair getTombstoneKey() {
-    return Pair(FirstInfo::getTombstoneKey());
+  static inline Union getTombstoneKey() {
+    return Union(FirstInfo::getTombstoneKey());
   }
 
-  static unsigned getHashValue(const Pair &PairVal) {
-    intptr_t key = (intptr_t)PairVal.getOpaqueValue();
+  static unsigned getHashValue(const Union &UnionVal) {
+    intptr_t key = (intptr_t)UnionVal.getOpaqueValue();
     return DenseMapInfo<intptr_t>::getHashValue(key);
   }
 
-  static bool isEqual(const Pair &LHS, const Pair &RHS) {
-    return LHS.template is<T>() == RHS.template is<T>() &&
-           (LHS.template is<T>() ? FirstInfo::isEqual(LHS.template get<T>(),
-                                                      RHS.template get<T>())
-                                 : SecondInfo::isEqual(LHS.template get<U>(),
-                                                       RHS.template get<U>()));
+  static bool isEqual(const Union &LHS, const Union &RHS) {
+    return LHS == RHS;
   }
 };
 
diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h
index d77b12228cb1..2fe7447a8e77 100644
--- a/include/llvm/ADT/PostOrderIterator.h
+++ b/include/llvm/ADT/PostOrderIterator.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/PostOrderIterator.h - PostOrder iterator --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/PriorityQueue.h b/include/llvm/ADT/PriorityQueue.h
index 8ba871e25304..cf79ee10ba7f 100644
--- a/include/llvm/ADT/PriorityQueue.h
+++ b/include/llvm/ADT/PriorityQueue.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/PriorityQueue.h - Priority queues ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/PriorityWorklist.h b/include/llvm/ADT/PriorityWorklist.h
index aa531f3337d9..96d22c87557e 100644
--- a/include/llvm/ADT/PriorityWorklist.h
+++ b/include/llvm/ADT/PriorityWorklist.h
@@ -1,9 +1,8 @@
 //===- PriorityWorklist.h - Worklist with insertion priority ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/include/llvm/ADT/SCCIterator.h b/include/llvm/ADT/SCCIterator.h
index ab1dc4613be0..eb1a5d0938cf 100644
--- a/include/llvm/ADT/SCCIterator.h
+++ b/include/llvm/ADT/SCCIterator.h
@@ -1,9 +1,8 @@
 //===- ADT/SCCIterator.h - Strongly Connected Comp. Iter. -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h
index f66ca7c08a73..81dce0168c79 100644
--- a/include/llvm/ADT/STLExtras.h
+++ b/include/llvm/ADT/STLExtras.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/STLExtras.h - Useful STL related functions ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -241,6 +240,13 @@ inline mapped_iterator<ItTy, FuncTy> map_iterator(ItTy I, FuncTy F) {
   return mapped_iterator<ItTy, FuncTy>(std::move(I), std::move(F));
 }
 
+template <class ContainerTy, class FuncTy>
+auto map_range(ContainerTy &&C, FuncTy F)
+    -> decltype(make_range(map_iterator(C.begin(), F),
+                           map_iterator(C.end(), F))) {
+  return make_range(map_iterator(C.begin(), F), map_iterator(C.end(), F));
+}
+
 /// Helper to determine if type T has a member called rbegin().
 template <typename Ty> class has_rbegin_impl {
   using yes = char[1];
@@ -1278,29 +1284,52 @@ auto partition(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range)) {
 
 /// Provide wrappers to std::lower_bound which take ranges instead of having to
 /// pass begin/end explicitly.
-template <typename R, typename ForwardIt>
-auto lower_bound(R &&Range, ForwardIt I) -> decltype(adl_begin(Range)) {
-  return std::lower_bound(adl_begin(Range), adl_end(Range), I);
+template <typename R, typename T>
+auto lower_bound(R &&Range, T &&Value) -> decltype(adl_begin(Range)) {
+  return std::lower_bound(adl_begin(Range), adl_end(Range),
+                          std::forward<T>(Value));
 }
 
-template <typename R, typename ForwardIt, typename Compare>
-auto lower_bound(R &&Range, ForwardIt I, Compare C)
+template <typename R, typename T, typename Compare>
+auto lower_bound(R &&Range, T &&Value, Compare C)
     -> decltype(adl_begin(Range)) {
-  return std::lower_bound(adl_begin(Range), adl_end(Range), I, C);
+  return std::lower_bound(adl_begin(Range), adl_end(Range),
+                          std::forward<T>(Value), C);
 }
 
 /// Provide wrappers to std::upper_bound which take ranges instead of having to
 /// pass begin/end explicitly.
-template <typename R, typename ForwardIt>
-auto upper_bound(R &&Range, ForwardIt I) -> decltype(adl_begin(Range)) {
-  return std::upper_bound(adl_begin(Range), adl_end(Range), I);
+template <typename R, typename T>
+auto upper_bound(R &&Range, T &&Value) -> decltype(adl_begin(Range)) {
+  return std::upper_bound(adl_begin(Range), adl_end(Range),
+                          std::forward<T>(Value));
 }
 
-template <typename R, typename ForwardIt, typename Compare>
-auto upper_bound(R &&Range, ForwardIt I, Compare C)
+template <typename R, typename T, typename Compare>
+auto upper_bound(R &&Range, T &&Value, Compare C)
     -> decltype(adl_begin(Range)) {
-  return std::upper_bound(adl_begin(Range), adl_end(Range), I, C);
+  return std::upper_bound(adl_begin(Range), adl_end(Range),
+                          std::forward<T>(Value), C);
+}
+
+template <typename R>
+void stable_sort(R &&Range) {
+  std::stable_sort(adl_begin(Range), adl_end(Range));
+}
+
+template <typename R, typename Compare>
+void stable_sort(R &&Range, Compare C) {
+  std::stable_sort(adl_begin(Range), adl_end(Range), C);
+}
+
+/// Binary search for the first iterator in a range where a predicate is false.
+/// Requires that C is always true below some limit, and always false above it.
+template <typename R, typename Predicate,
+          typename Val = decltype(*adl_begin(std::declval<R>()))>
+auto partition_point(R &&Range, Predicate P) -> decltype(adl_begin(Range)) {
+  return std::partition_point(adl_begin(Range), adl_end(Range), P);
 }
+
 /// Wrapper function around std::equal to detect if all elements
 /// in a container are same.
 template <typename R>
@@ -1331,6 +1360,33 @@ void erase_if(Container &C, UnaryPredicate P) {
   C.erase(remove_if(C, P), C.end());
 }
 
+/// Given a sequence container Cont, replace the range [ContIt, ContEnd) with
+/// the range [ValIt, ValEnd) (which is not from the same container).
+template<typename Container, typename RandomAccessIterator>
+void replace(Container &Cont, typename Container::iterator ContIt,
+             typename Container::iterator ContEnd, RandomAccessIterator ValIt,
+             RandomAccessIterator ValEnd) {
+  while (true) {
+    if (ValIt == ValEnd) {
+      Cont.erase(ContIt, ContEnd);
+      return;
+    } else if (ContIt == ContEnd) {
+      Cont.insert(ContIt, ValIt, ValEnd);
+      return;
+    }
+    *ContIt++ = *ValIt++;
+  }
+}
+
+/// Given a sequence container Cont, replace the range [ContIt, ContEnd) with
+/// the range R.
+template<typename Container, typename Range = std::initializer_list<
+                                 typename Container::value_type>>
+void replace(Container &Cont, typename Container::iterator ContIt,
+             typename Container::iterator ContEnd, Range R) {
+  replace(Cont, ContIt, ContEnd, R.begin(), R.end());
+}
+
 //===----------------------------------------------------------------------===//
 //     Extra additions to <memory>
 //===----------------------------------------------------------------------===//
@@ -1418,6 +1474,9 @@ namespace detail {
 template <typename R> class enumerator_iter;
 
 template <typename R> struct result_pair {
+  using value_reference =
+      typename std::iterator_traits<IterOfRange<R>>::reference;
+
   friend class enumerator_iter<R>;
 
   result_pair() = default;
@@ -1431,8 +1490,8 @@ template <typename R> struct result_pair {
   }
 
   std::size_t index() const { return Index; }
-  const ValueOfRange<R> &value() const { return *Iter; }
-  ValueOfRange<R> &value() { return *Iter; }
+  const value_reference value() const { return *Iter; }
+  value_reference value() { return *Iter; }
 
 private:
   std::size_t Index = std::numeric_limits<std::size_t>::max();
@@ -1577,6 +1636,19 @@ bool hasNItemsOrMore(
   return true;
 }
 
+/// Returns a raw pointer that represents the same address as the argument.
+///
+/// The late bound return should be removed once we move to C++14 to better
+/// align with the C++20 declaration. Also, this implementation can be removed
+/// once we move to C++20 where it's defined as std::to_addres()
+///
+/// The std::pointer_traits<>::to_address(p) variations of these overloads has
+/// not been implemented.
+template <class Ptr> auto to_address(const Ptr &P) -> decltype(P.operator->()) {
+  return P.operator->();
+}
+template <class T> constexpr T *to_address(T *P) { return P; }
+
 } // end namespace llvm
 
 #endif // LLVM_ADT_STLEXTRAS_H
diff --git a/include/llvm/ADT/ScopeExit.h b/include/llvm/ADT/ScopeExit.h
index bd13755fa999..712d91237739 100644
--- a/include/llvm/ADT/ScopeExit.h
+++ b/include/llvm/ADT/ScopeExit.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/ScopeExit.h - Execute code at scope exit --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/ScopedHashTable.h b/include/llvm/ADT/ScopedHashTable.h
index 22b0c1bdaf4d..40c49ebc0be1 100644
--- a/include/llvm/ADT/ScopedHashTable.h
+++ b/include/llvm/ADT/ScopedHashTable.h
@@ -1,9 +1,8 @@
 //===- ScopedHashTable.h - A simple scoped hash table -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/Sequence.h b/include/llvm/ADT/Sequence.h
index 3d4a897bf9a9..8c505f2010dd 100644
--- a/include/llvm/ADT/Sequence.h
+++ b/include/llvm/ADT/Sequence.h
@@ -1,9 +1,8 @@
 //===- Sequence.h - Utility for producing sequences of values ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/ADT/SetOperations.h b/include/llvm/ADT/SetOperations.h
index 7c9f2fbe066e..037256a860b2 100644
--- a/include/llvm/ADT/SetOperations.h
+++ b/include/llvm/ADT/SetOperations.h
@@ -1,9 +1,8 @@
 //===-- llvm/ADT/SetOperations.h - Generic Set Operations -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/SetVector.h b/include/llvm/ADT/SetVector.h
index 3d6781041320..d0a0d28d1c81 100644
--- a/include/llvm/ADT/SetVector.h
+++ b/include/llvm/ADT/SetVector.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/SetVector.h - Set with insert order iteration ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h
index 0a73dbd60671..742450e6a951 100644
--- a/include/llvm/ADT/SmallBitVector.h
+++ b/include/llvm/ADT/SmallBitVector.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/SmallBitVector.h - 'Normally small' bit vectors -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h
index db08e40257ba..913518230d2d 100644
--- a/include/llvm/ADT/SmallPtrSet.h
+++ b/include/llvm/ADT/SmallPtrSet.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/SmallPtrSet.h - 'Normally small' pointer set ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/SmallSet.h b/include/llvm/ADT/SmallSet.h
index 5d84627714bc..6b128c2e2992 100644
--- a/include/llvm/ADT/SmallSet.h
+++ b/include/llvm/ADT/SmallSet.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/SmallSet.h - 'Normally small' sets --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/SmallString.h b/include/llvm/ADT/SmallString.h
index ff46e85ccb09..898be80d0324 100644
--- a/include/llvm/ADT/SmallString.h
+++ b/include/llvm/ADT/SmallString.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/SmallString.h - 'Normally small' strings --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index 0636abbb1fbf..17586904d212 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/SmallVector.h - 'Normally small' vectors --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -42,8 +41,8 @@ protected:
   unsigned Size = 0, Capacity;
 
   SmallVectorBase() = delete;
-  SmallVectorBase(void *FirstEl, size_t Capacity)
-      : BeginX(FirstEl), Capacity(Capacity) {}
+  SmallVectorBase(void *FirstEl, size_t TotalCapacity)
+      : BeginX(FirstEl), Capacity(TotalCapacity) {}
 
   /// This is an implementation of the grow() method which only works
   /// on POD-like data types and is out of line to reduce code duplication.
@@ -64,9 +63,9 @@ public:
   /// of the buffer when they know that more elements are available, and only
   /// update the size later. This avoids the cost of value initializing elements
   /// which will only be overwritten.
-  void set_size(size_t Size) {
-    assert(Size <= capacity());
-    this->Size = Size;
+  void set_size(size_t N) {
+    assert(N <= capacity());
+    Size = N;
   }
 };
 
@@ -125,13 +124,9 @@ public:
   using const_pointer = const T *;
 
   // forward iterator creation methods.
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   iterator begin() { return (iterator)this->BeginX; }
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   const_iterator begin() const { return (const_iterator)this->BeginX; }
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   iterator end() { return begin() + size(); }
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   const_iterator end() const { return begin() + size(); }
 
   // reverse iterator creation methods.
@@ -150,12 +145,10 @@ public:
   /// Return a pointer to the vector's buffer, even if empty().
   const_pointer data() const { return const_pointer(begin()); }
 
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   reference operator[](size_type idx) {
     assert(idx < size());
     return begin()[idx];
   }
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   const_reference operator[](size_type idx) const {
     assert(idx < size());
     return begin()[idx];
@@ -180,9 +173,9 @@ public:
   }
 };
 
-/// SmallVectorTemplateBase<isPodLike = false> - This is where we put method
+/// SmallVectorTemplateBase<TriviallyCopyable = false> - This is where we put method
 /// implementations that are designed to work with non-POD-like T's.
-template <typename T, bool = isPodLike<T>::value>
+template <typename T, bool = is_trivially_copyable<T>::value>
 class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
 protected:
   SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
@@ -236,8 +229,8 @@ public:
 };
 
 // Define this out-of-line to dissuade the C++ compiler from inlining it.
-template <typename T, bool isPodLike>
-void SmallVectorTemplateBase<T, isPodLike>::grow(size_t MinSize) {
+template <typename T, bool TriviallyCopyable>
+void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) {
   if (MinSize > UINT32_MAX)
     report_bad_alloc_error("SmallVector capacity overflow during allocation");
 
@@ -260,9 +253,8 @@ void SmallVectorTemplateBase<T, isPodLike>::grow(size_t MinSize) {
   this->Capacity = NewCapacity;
 }
 
-
-/// SmallVectorTemplateBase<isPodLike = true> - This is where we put method
-/// implementations that are designed to work with POD-like T's.
+/// SmallVectorTemplateBase<TriviallyCopyable = true> - This is where we put
+/// method implementations that are designed to work with POD-like T's.
 template <typename T>
 class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> {
 protected:
@@ -326,12 +318,13 @@ class SmallVectorImpl : public SmallVectorTemplateBase<T> {
 public:
   using iterator = typename SuperClass::iterator;
   using const_iterator = typename SuperClass::const_iterator;
+  using reference = typename SuperClass::reference;
   using size_type = typename SuperClass::size_type;
 
 protected:
   // Default ctor - Initialize to empty.
   explicit SmallVectorImpl(unsigned N)
-      : SmallVectorTemplateBase<T, isPodLike<T>::value>(N) {}
+      : SmallVectorTemplateBase<T>(N) {}
 
 public:
   SmallVectorImpl(const SmallVectorImpl &) = delete;
@@ -393,22 +386,18 @@ public:
                 std::input_iterator_tag>::value>::type>
   void append(in_iter in_start, in_iter in_end) {
     size_type NumInputs = std::distance(in_start, in_end);
-    // Grow allocated space if needed.
     if (NumInputs > this->capacity() - this->size())
       this->grow(this->size()+NumInputs);
 
-    // Copy the new elements over.
     this->uninitialized_copy(in_start, in_end, this->end());
     this->set_size(this->size() + NumInputs);
   }
 
-  /// Add the specified range to the end of the SmallVector.
+  /// Append \p NumInputs copies of \p Elt to the end.
   void append(size_type NumInputs, const T &Elt) {
-    // Grow allocated space if needed.
     if (NumInputs > this->capacity() - this->size())
       this->grow(this->size()+NumInputs);
 
-    // Copy the new elements over.
     std::uninitialized_fill_n(this->end(), NumInputs, Elt);
     this->set_size(this->size() + NumInputs);
   }
@@ -649,11 +638,12 @@ public:
     insert(I, IL.begin(), IL.end());
   }
 
-  template <typename... ArgTypes> void emplace_back(ArgTypes &&... Args) {
+  template <typename... ArgTypes> reference emplace_back(ArgTypes &&... Args) {
     if (LLVM_UNLIKELY(this->size() >= this->capacity()))
       this->grow();
     ::new ((void *)this->end()) T(std::forward<ArgTypes>(Args)...);
     this->set_size(this->size() + 1);
+    return this->back();
   }
 
   SmallVectorImpl &operator=(const SmallVectorImpl &RHS);
diff --git a/include/llvm/ADT/SparseBitVector.h b/include/llvm/ADT/SparseBitVector.h
index 84e73bcbace8..12850e14f4ed 100644
--- a/include/llvm/ADT/SparseBitVector.h
+++ b/include/llvm/ADT/SparseBitVector.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/SparseBitVector.h - Efficient Sparse BitVector --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/SparseMultiSet.h b/include/llvm/ADT/SparseMultiSet.h
index 3c8637621510..d9d3ff459267 100644
--- a/include/llvm/ADT/SparseMultiSet.h
+++ b/include/llvm/ADT/SparseMultiSet.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/SparseMultiSet.h - Sparse multiset --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/SparseSet.h b/include/llvm/ADT/SparseSet.h
index 74cc6dab8c74..a6eb9b942e80 100644
--- a/include/llvm/ADT/SparseSet.h
+++ b/include/llvm/ADT/SparseSet.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/SparseSet.h - Sparse set ------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/Statistic.h b/include/llvm/ADT/Statistic.h
index 90c2eefceb6c..2ac59da596ef 100644
--- a/include/llvm/ADT/Statistic.h
+++ b/include/llvm/ADT/Statistic.h
@@ -1,9 +1,8 @@
 //===-- llvm/ADT/Statistic.h - Easy way to expose stats ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h
index 60a03633a8a6..16ac90bd6c89 100644
--- a/include/llvm/ADT/StringExtras.h
+++ b/include/llvm/ADT/StringExtras.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/StringExtras.h - Useful string functions --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h
index a9f83d3f5091..8a586fc26709 100644
--- a/include/llvm/ADT/StringMap.h
+++ b/include/llvm/ADT/StringMap.h
@@ -1,9 +1,8 @@
 //===- StringMap.h - String Hash table map interface ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -360,6 +359,11 @@ public:
     return find(Key) == end() ? 0 : 1;
   }
 
+  template <typename InputTy>
+  size_type count(const StringMapEntry<InputTy> &MapEntry) const {
+    return count(MapEntry.getKey());
+  }
+
   /// insert - Insert the specified key/value pair into the map.  If the key
   /// already exists in the map, return false and ignore the request, otherwise
   /// insert it and return true.
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
index a5ba5b59b5a3..4661b1e68b2f 100644
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -1,9 +1,8 @@
 //===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -63,7 +62,6 @@ namespace llvm {
 
     // Workaround memcmp issue with null pointers (undefined behavior)
     // by providing a specialized version
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
       if (Length == 0) { return 0; }
       return ::memcmp(Lhs,Rhs,Length);
@@ -81,17 +79,14 @@ namespace llvm {
     StringRef(std::nullptr_t) = delete;
 
     /// Construct a string ref from a cstring.
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     /*implicit*/ StringRef(const char *Str)
         : Data(Str), Length(Str ? ::strlen(Str) : 0) {}
 
     /// Construct a string ref from a pointer and length.
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     /*implicit*/ constexpr StringRef(const char *data, size_t length)
         : Data(data), Length(length) {}
 
     /// Construct a string ref from an std::string.
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     /*implicit*/ StringRef(const std::string &Str)
       : Data(Str.data()), Length(Str.length()) {}
 
@@ -124,17 +119,14 @@ namespace llvm {
     /// data - Get a pointer to the start of the string (which may not be null
     /// terminated).
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     const char *data() const { return Data; }
 
     /// empty - Check if the string is empty.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     bool empty() const { return Length == 0; }
 
     /// size - Get the string size.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     size_t size() const { return Length; }
 
     /// front - Get the first character in the string.
@@ -165,7 +157,6 @@ namespace llvm {
     /// equals - Check for string equality, this is more efficient than
     /// compare() when the relative ordering of inequal strings isn't needed.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     bool equals(StringRef RHS) const {
       return (Length == RHS.Length &&
               compareMemory(Data, RHS.Data, RHS.Length) == 0);
@@ -180,7 +171,6 @@ namespace llvm {
     /// compare - Compare two strings; the result is -1, 0, or 1 if this string
     /// is lexicographically less than, equal to, or greater than the \p RHS.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     int compare(StringRef RHS) const {
       // Check the prefix for a mismatch.
       if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
@@ -263,7 +253,6 @@ namespace llvm {
 
     /// Check if this string starts with the given \p Prefix.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     bool startswith(StringRef Prefix) const {
       return Length >= Prefix.Length &&
              compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
@@ -275,7 +264,6 @@ namespace llvm {
 
     /// Check if this string ends with the given \p Suffix.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     bool endswith(StringRef Suffix) const {
       return Length >= Suffix.Length &&
         compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
@@ -294,7 +282,6 @@ namespace llvm {
     /// \returns The index of the first occurrence of \p C, or npos if not
     /// found.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     size_t find(char C, size_t From = 0) const {
       size_t FindBegin = std::min(From, Length);
       if (FindBegin < Length) { // Avoid calling memchr with nullptr.
@@ -317,7 +304,6 @@ namespace llvm {
     /// \returns The index of the first character satisfying \p F starting from
     /// \p From, or npos if not found.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     size_t find_if(function_ref<bool(char)> F, size_t From = 0) const {
       StringRef S = drop_front(From);
       while (!S.empty()) {
@@ -333,7 +319,6 @@ namespace llvm {
     /// \returns The index of the first character not satisfying \p F starting
     /// from \p From, or npos if not found.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     size_t find_if_not(function_ref<bool(char)> F, size_t From = 0) const {
       return find_if([F](char c) { return !F(c); }, From);
     }
@@ -444,19 +429,16 @@ namespace llvm {
     /// Return true if the given string is a substring of *this, and false
     /// otherwise.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     bool contains(StringRef Other) const { return find(Other) != npos; }
 
     /// Return true if the given character is contained in *this, and false
     /// otherwise.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     bool contains(char C) const { return find_first_of(C) != npos; }
 
     /// Return true if the given string is a substring of *this, and false
     /// otherwise.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     bool contains_lower(StringRef Other) const {
       return find_lower(Other) != npos;
     }
@@ -464,7 +446,6 @@ namespace llvm {
     /// Return true if the given character is contained in *this, and false
     /// otherwise.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     bool contains_lower(char C) const { return find_lower(C) != npos; }
 
     /// @}
@@ -594,7 +575,6 @@ namespace llvm {
     /// exceeds the number of characters remaining in the string, the string
     /// suffix (starting with \p Start) will be returned.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     StringRef substr(size_t Start, size_t N = npos) const {
       Start = std::min(Start, Length);
       return StringRef(Data + Start, std::min(N, Length - Start));
@@ -604,7 +584,6 @@ namespace llvm {
     /// elements remaining.  If \p N is greater than the length of the
     /// string, the entire string is returned.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     StringRef take_front(size_t N = 1) const {
       if (N >= size())
         return *this;
@@ -615,7 +594,6 @@ namespace llvm {
     /// elements remaining.  If \p N is greater than the length of the
     /// string, the entire string is returned.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     StringRef take_back(size_t N = 1) const {
       if (N >= size())
         return *this;
@@ -625,7 +603,6 @@ namespace llvm {
     /// Return the longest prefix of 'this' such that every character
     /// in the prefix satisfies the given predicate.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     StringRef take_while(function_ref<bool(char)> F) const {
       return substr(0, find_if_not(F));
     }
@@ -633,7 +610,6 @@ namespace llvm {
     /// Return the longest prefix of 'this' such that no character in
     /// the prefix satisfies the given predicate.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     StringRef take_until(function_ref<bool(char)> F) const {
       return substr(0, find_if(F));
     }
@@ -641,7 +617,6 @@ namespace llvm {
     /// Return a StringRef equal to 'this' but with the first \p N elements
     /// dropped.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     StringRef drop_front(size_t N = 1) const {
       assert(size() >= N && "Dropping more elements than exist");
       return substr(N);
@@ -650,7 +625,6 @@ namespace llvm {
     /// Return a StringRef equal to 'this' but with the last \p N elements
     /// dropped.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     StringRef drop_back(size_t N = 1) const {
       assert(size() >= N && "Dropping more elements than exist");
       return substr(0, size()-N);
@@ -659,7 +633,6 @@ namespace llvm {
     /// Return a StringRef equal to 'this', but with all characters satisfying
     /// the given predicate dropped from the beginning of the string.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     StringRef drop_while(function_ref<bool(char)> F) const {
       return substr(find_if_not(F));
     }
@@ -667,14 +640,12 @@ namespace llvm {
     /// Return a StringRef equal to 'this', but with all characters not
     /// satisfying the given predicate dropped from the beginning of the string.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     StringRef drop_until(function_ref<bool(char)> F) const {
       return substr(find_if(F));
     }
 
     /// Returns true if this StringRef has the given prefix and removes that
     /// prefix.
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     bool consume_front(StringRef Prefix) {
       if (!startswith(Prefix))
         return false;
@@ -685,7 +656,6 @@ namespace llvm {
 
     /// Returns true if this StringRef has the given suffix and removes that
     /// suffix.
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     bool consume_back(StringRef Suffix) {
       if (!endswith(Suffix))
         return false;
@@ -706,7 +676,6 @@ namespace llvm {
     /// will be returned. If this is less than \p Start, an empty string will
     /// be returned.
     LLVM_NODISCARD
-    LLVM_ATTRIBUTE_ALWAYS_INLINE
     StringRef slice(size_t Start, size_t End) const {
       Start = std::min(Start, Length);
       End = std::min(std::max(Start, End), Length);
@@ -894,12 +863,10 @@ namespace llvm {
   /// @name StringRef Comparison Operators
   /// @{
 
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   inline bool operator==(StringRef LHS, StringRef RHS) {
     return LHS.equals(RHS);
   }
 
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
 
   inline bool operator<(StringRef LHS, StringRef RHS) {
@@ -928,10 +895,6 @@ namespace llvm {
   LLVM_NODISCARD
   hash_code hash_value(StringRef S);
 
-  // StringRefs can be treated like a POD type.
-  template <typename T> struct isPodLike;
-  template <> struct isPodLike<StringRef> { static const bool value = true; };
-
 } // end namespace llvm
 
 #endif // LLVM_ADT_STRINGREF_H
diff --git a/include/llvm/ADT/StringSet.h b/include/llvm/ADT/StringSet.h
index 9af44c07df79..af3a44a7b32c 100644
--- a/include/llvm/ADT/StringSet.h
+++ b/include/llvm/ADT/StringSet.h
@@ -1,9 +1,8 @@
 //===- StringSet.h - The LLVM Compiler Driver -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open
-// Source License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -34,6 +33,7 @@ namespace llvm {
       for (StringRef X : S)
         insert(X);
     }
+    explicit StringSet(AllocatorTy A) : base(A) {}
 
     std::pair<typename base::iterator, bool> insert(StringRef Key) {
       assert(!Key.empty());
@@ -45,6 +45,12 @@ namespace llvm {
       for (auto It = Begin; It != End; ++It)
         base::insert(std::make_pair(*It, '\0'));
     }
+
+    template <typename ValueTy>
+    std::pair<typename base::iterator, bool>
+    insert(const StringMapEntry<ValueTy> &MapEntry) {
+      return insert(MapEntry.getKey());
+    }
   };
 
 } // end namespace llvm
diff --git a/include/llvm/ADT/StringSwitch.h b/include/llvm/ADT/StringSwitch.h
index b7860b98ce5d..fea911f6928b 100644
--- a/include/llvm/ADT/StringSwitch.h
+++ b/include/llvm/ADT/StringSwitch.h
@@ -1,9 +1,8 @@
 //===--- StringSwitch.h - Switch-on-literal-string Construct --------------===/
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //===----------------------------------------------------------------------===/
 //
 //  This file implements the StringSwitch template, which mimics a switch()
@@ -49,7 +48,6 @@ class StringSwitch {
   Optional<T> Result;
 
 public:
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   explicit StringSwitch(StringRef S)
   : Str(S), Result() { }
 
@@ -66,7 +64,6 @@ public:
   ~StringSwitch() = default;
 
   // Case-sensitive case matchers
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch &Case(StringLiteral S, T Value) {
     if (!Result && Str == S) {
       Result = std::move(Value);
@@ -74,7 +71,6 @@ public:
     return *this;
   }
 
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch& EndsWith(StringLiteral S, T Value) {
     if (!Result && Str.endswith(S)) {
       Result = std::move(Value);
@@ -82,7 +78,6 @@ public:
     return *this;
   }
 
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch& StartsWith(StringLiteral S, T Value) {
     if (!Result && Str.startswith(S)) {
       Result = std::move(Value);
@@ -90,51 +85,43 @@ public:
     return *this;
   }
 
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch &Cases(StringLiteral S0, StringLiteral S1, T Value) {
     return Case(S0, Value).Case(S1, Value);
   }
 
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                       T Value) {
     return Case(S0, Value).Cases(S1, S2, Value);
   }
 
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                       StringLiteral S3, T Value) {
     return Case(S0, Value).Cases(S1, S2, S3, Value);
   }
 
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                       StringLiteral S3, StringLiteral S4, T Value) {
     return Case(S0, Value).Cases(S1, S2, S3, S4, Value);
   }
 
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                       StringLiteral S3, StringLiteral S4, StringLiteral S5,
                       T Value) {
     return Case(S0, Value).Cases(S1, S2, S3, S4, S5, Value);
   }
 
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                       StringLiteral S3, StringLiteral S4, StringLiteral S5,
                       StringLiteral S6, T Value) {
     return Case(S0, Value).Cases(S1, S2, S3, S4, S5, S6, Value);
   }
 
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                       StringLiteral S3, StringLiteral S4, StringLiteral S5,
                       StringLiteral S6, StringLiteral S7, T Value) {
     return Case(S0, Value).Cases(S1, S2, S3, S4, S5, S6, S7, Value);
   }
 
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                       StringLiteral S3, StringLiteral S4, StringLiteral S5,
                       StringLiteral S6, StringLiteral S7, StringLiteral S8,
@@ -142,7 +129,6 @@ public:
     return Case(S0, Value).Cases(S1, S2, S3, S4, S5, S6, S7, S8, Value);
   }
 
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                       StringLiteral S3, StringLiteral S4, StringLiteral S5,
                       StringLiteral S6, StringLiteral S7, StringLiteral S8,
@@ -151,7 +137,6 @@ public:
   }
 
   // Case-insensitive case matchers.
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch &CaseLower(StringLiteral S, T Value) {
     if (!Result && Str.equals_lower(S))
       Result = std::move(Value);
@@ -159,7 +144,6 @@ public:
     return *this;
   }
 
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch &EndsWithLower(StringLiteral S, T Value) {
     if (!Result && Str.endswith_lower(S))
       Result = Value;
@@ -167,7 +151,6 @@ public:
     return *this;
   }
 
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch &StartsWithLower(StringLiteral S, T Value) {
     if (!Result && Str.startswith_lower(S))
       Result = std::move(Value);
@@ -175,31 +158,26 @@ public:
     return *this;
   }
 
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch &CasesLower(StringLiteral S0, StringLiteral S1, T Value) {
     return CaseLower(S0, Value).CaseLower(S1, Value);
   }
 
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch &CasesLower(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                            T Value) {
     return CaseLower(S0, Value).CasesLower(S1, S2, Value);
   }
 
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch &CasesLower(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                            StringLiteral S3, T Value) {
     return CaseLower(S0, Value).CasesLower(S1, S2, S3, Value);
   }
 
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch &CasesLower(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                            StringLiteral S3, StringLiteral S4, T Value) {
     return CaseLower(S0, Value).CasesLower(S1, S2, S3, S4, Value);
   }
 
   LLVM_NODISCARD
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   R Default(T Value) {
     if (Result)
       return std::move(*Result);
@@ -207,7 +185,6 @@ public:
   }
 
   LLVM_NODISCARD
-  LLVM_ATTRIBUTE_ALWAYS_INLINE
   operator R() {
     assert(Result && "Fell off the end of a string-switch");
     return std::move(*Result);
diff --git a/include/llvm/ADT/TinyPtrVector.h b/include/llvm/ADT/TinyPtrVector.h
index 1b8e9aa658c3..ac82451a9b21 100644
--- a/include/llvm/ADT/TinyPtrVector.h
+++ b/include/llvm/ADT/TinyPtrVector.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/TinyPtrVector.h - 'Normally tiny' vectors -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index e06a68e27317..edeb31efab80 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -1,9 +1,8 @@
 //===-- llvm/ADT/Triple.h - Target triple helper class ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -50,6 +49,7 @@ public:
     armeb,          // ARM (big endian): armeb
     aarch64,        // AArch64 (little endian): aarch64
     aarch64_be,     // AArch64 (big endian): aarch64_be
+    aarch64_32,     // AArch64 (little endian) ILP32: aarch64_32
     arc,            // ARC: Synopsys ARC
     avr,            // AVR: Atmel AVR microcontroller
     bpfel,          // eBPF or extended BPF or 64-bit BPF (little endian)
@@ -109,6 +109,7 @@ public:
     ARMSubArch_v8r,
     ARMSubArch_v8m_baseline,
     ARMSubArch_v8m_mainline,
+    ARMSubArch_v8_1m_mainline,
     ARMSubArch_v7,
     ARMSubArch_v7em,
     ARMSubArch_v7m,
@@ -187,7 +188,8 @@ public:
     HermitCore, // HermitCore Unikernel/Multikernel
     Hurd,       // GNU/Hurd
     WASI,       // Experimental WebAssembly OS
-    LastOSType = WASI
+    Emscripten,
+    LastOSType = Emscripten
   };
   enum EnvironmentType {
     UnknownEnvironment,
@@ -201,6 +203,8 @@ public:
     CODE16,
     EABI,
     EABIHF,
+    ELFv1,
+    ELFv2,
     Android,
     Musl,
     MuslEABI,
@@ -210,8 +214,9 @@ public:
     Itanium,
     Cygnus,
     CoreCLR,
-    Simulator,  // Simulator variants of other systems, e.g., Apple's iOS
-    LastEnvironmentType = Simulator
+    Simulator, // Simulator variants of other systems, e.g., Apple's iOS
+    MacABI, // Mac Catalyst variant of Apple's iOS deployment target.
+    LastEnvironmentType = MacABI
   };
   enum ObjectFormatType {
     UnknownObjectFormat,
@@ -220,6 +225,7 @@ public:
     ELF,
     MachO,
     Wasm,
+    XCOFF,
   };
 
 private:
@@ -415,7 +421,7 @@ public:
     if (LHS[1] != Minor)
       return LHS[1] < Minor;
     if (LHS[2] != Micro)
-      return LHS[1] < Micro;
+      return LHS[2] < Micro;
 
     return false;
   }
@@ -480,6 +486,10 @@ public:
     return getEnvironment() == Triple::Simulator;
   }
 
+  bool isMacCatalystEnvironment() const {
+    return getEnvironment() == Triple::MacABI;
+  }
+
   bool isOSNetBSD() const {
     return getOS() == Triple::NetBSD;
   }
@@ -524,32 +534,36 @@ public:
     return getOS() == Triple::Haiku;
   }
 
-  /// Checks if the environment could be MSVC.
-  bool isWindowsMSVCEnvironment() const {
-    return getOS() == Triple::Win32 &&
-           (getEnvironment() == Triple::UnknownEnvironment ||
-            getEnvironment() == Triple::MSVC);
+  /// Tests whether the OS is Windows.
+  bool isOSWindows() const {
+    return getOS() == Triple::Win32;
   }
 
   /// Checks if the environment is MSVC.
   bool isKnownWindowsMSVCEnvironment() const {
-    return getOS() == Triple::Win32 && getEnvironment() == Triple::MSVC;
+    return isOSWindows() && getEnvironment() == Triple::MSVC;
+  }
+
+  /// Checks if the environment could be MSVC.
+  bool isWindowsMSVCEnvironment() const {
+    return isKnownWindowsMSVCEnvironment() ||
+           (isOSWindows() && getEnvironment() == Triple::UnknownEnvironment);
   }
 
   bool isWindowsCoreCLREnvironment() const {
-    return getOS() == Triple::Win32 && getEnvironment() == Triple::CoreCLR;
+    return isOSWindows() && getEnvironment() == Triple::CoreCLR;
   }
 
   bool isWindowsItaniumEnvironment() const {
-    return getOS() == Triple::Win32 && getEnvironment() == Triple::Itanium;
+    return isOSWindows() && getEnvironment() == Triple::Itanium;
   }
 
   bool isWindowsCygwinEnvironment() const {
-    return getOS() == Triple::Win32 && getEnvironment() == Triple::Cygnus;
+    return isOSWindows() && getEnvironment() == Triple::Cygnus;
   }
 
   bool isWindowsGNUEnvironment() const {
-    return getOS() == Triple::Win32 && getEnvironment() == Triple::GNU;
+    return isOSWindows() && getEnvironment() == Triple::GNU;
   }
 
   /// Tests for either Cygwin or MinGW OS
@@ -563,11 +577,6 @@ public:
            isWindowsItaniumEnvironment();
   }
 
-  /// Tests whether the OS is Windows.
-  bool isOSWindows() const {
-    return getOS() == Triple::Win32;
-  }
-
   /// Tests whether the OS is NaCl (Native Client)
   bool isOSNaCl() const {
     return getOS() == Triple::NaCl;
@@ -593,6 +602,11 @@ public:
     return getOS() == Triple::WASI;
   }
 
+  /// Tests whether the OS is Emscripten.
+  bool isOSEmscripten() const {
+    return getOS() == Triple::Emscripten;
+  }
+
   /// Tests whether the OS uses glibc.
   bool isOSGlibc() const {
     return (getOS() == Triple::Linux || getOS() == Triple::KFreeBSD ||
@@ -600,6 +614,11 @@ public:
            !isAndroid();
   }
 
+  /// Tests whether the OS is AIX.
+  bool isOSAIX() const {
+    return getOS() == Triple::AIX;
+  }
+
   /// Tests whether the OS uses the ELF binary format.
   bool isOSBinFormatELF() const {
     return getObjectFormat() == Triple::ELF;
@@ -620,6 +639,11 @@ public:
     return getObjectFormat() == Triple::Wasm;
   }
 
+  /// Tests whether the OS uses the XCOFF binary format.
+  bool isOSBinFormatXCOFF() const {
+    return getObjectFormat() == Triple::XCOFF;
+  }
+
   /// Tests whether the target is the PS4 CPU
   bool isPS4CPU() const {
     return getArch() == Triple::x86_64 &&
@@ -656,6 +680,11 @@ public:
            getEnvironment() == Triple::MuslEABIHF;
   }
 
+  /// Tests whether the target is SPIR (32- or 64-bit).
+  bool isSPIR() const {
+    return getArch() == Triple::spir || getArch() == Triple::spir64;
+  }
+
   /// Tests whether the target is NVPTX (32- or 64-bit).
   bool isNVPTX() const {
     return getArch() == Triple::nvptx || getArch() == Triple::nvptx64;
@@ -691,6 +720,16 @@ public:
     return isMIPS32() || isMIPS64();
   }
 
+  /// Tests whether the target is 64-bit PowerPC (little and big endian).
+  bool isPPC64() const {
+    return getArch() == Triple::ppc64 || getArch() == Triple::ppc64le;
+  }
+
+  /// Tests whether the target is RISC-V (32- and 64-bit).
+  bool isRISCV() const {
+    return getArch() == Triple::riscv32 || getArch() == Triple::riscv64;
+  }
+
   /// Tests whether the target supports comdat
   bool supportsCOMDAT() const {
     return !isOSBinFormatMachO();
diff --git a/include/llvm/ADT/Twine.h b/include/llvm/ADT/Twine.h
index b60fd0981398..4140c22aad3d 100644
--- a/include/llvm/ADT/Twine.h
+++ b/include/llvm/ADT/Twine.h
@@ -1,9 +1,8 @@
 //===- Twine.h - Fast Temporary String Concatenation ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -274,6 +273,9 @@ namespace llvm {
 
       assert(isValid() && "Invalid twine!");
     }
+    /// Delete the implicit conversion from nullptr as Twine(const char *)
+    /// cannot take nullptr.
+    /*implicit*/ Twine(std::nullptr_t) = delete;
 
     /// Construct from an std::string.
     /*implicit*/ Twine(const std::string &Str) : LHSKind(StdStringKind) {
diff --git a/include/llvm/ADT/UniqueVector.h b/include/llvm/ADT/UniqueVector.h
index c86bedd07687..bfea988f1702 100644
--- a/include/llvm/ADT/UniqueVector.h
+++ b/include/llvm/ADT/UniqueVector.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/UniqueVector.h ----------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/ADT/VariadicFunction.h b/include/llvm/ADT/VariadicFunction.h
index 9028abe4c72c..5aefb05ecdda 100644
--- a/include/llvm/ADT/VariadicFunction.h
+++ b/include/llvm/ADT/VariadicFunction.h
@@ -1,9 +1,8 @@
-//===--- VariadicFunctions.h - Variadic Functions ---------------*- C++ -*-===//
+//===- VariadicFunction.h - Variadic Functions ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/bit.h b/include/llvm/ADT/bit.h
index a4aba7b6a9ee..a790d5ed2d21 100644
--- a/include/llvm/ADT/bit.h
+++ b/include/llvm/ADT/bit.h
@@ -1,9 +1,8 @@
 //===-- llvm/ADT/bit.h - C++20 <bit> ----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -41,11 +40,11 @@ template <typename To, typename From
           , typename = typename std::enable_if<__is_trivially_copyable(To)>::type
           , typename = typename std::enable_if<__is_trivially_copyable(From)>::type
 #else
-  // This case is GCC 4.x. clang with libc++ or libstdc++ never get here. Unlike
-  // llvm/Support/type_traits.h's isPodLike we don't want to provide a
-  // good-enough answer here: developers in that configuration will hit
-  // compilation failures on the bots instead of locally. That's acceptable
-  // because it's very few developers, and only until we move past C++11.
+// This case is GCC 4.x. clang with libc++ or libstdc++ never get here. Unlike
+// llvm/Support/type_traits.h's is_trivially_copyable we don't want to
+// provide a good-enough answer here: developers in that configuration will hit
+// compilation failures on the bots instead of locally. That's acceptable
+// because it's very few developers, and only until we move past C++11.
 #endif
 >
 inline To bit_cast(const From &from) noexcept {
diff --git a/include/llvm/ADT/edit_distance.h b/include/llvm/ADT/edit_distance.h
index b2e8ec5c3f6d..4f5134008692 100644
--- a/include/llvm/ADT/edit_distance.h
+++ b/include/llvm/ADT/edit_distance.h
@@ -1,9 +1,8 @@
 //===-- llvm/ADT/edit_distance.h - Array edit distance function --- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/fallible_iterator.h b/include/llvm/ADT/fallible_iterator.h
new file mode 100644
index 000000000000..6501ad2233cd
--- /dev/null
+++ b/include/llvm/ADT/fallible_iterator.h
@@ -0,0 +1,243 @@
+//===--- fallible_iterator.h - Wrapper for fallible iterators ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_FALLIBLE_ITERATOR_H
+#define LLVM_ADT_FALLIBLE_ITERATOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Error.h"
+
+#include <type_traits>
+
+namespace llvm {
+
+/// A wrapper class for fallible iterators.
+///
+///   The fallible_iterator template wraps an underlying iterator-like class
+/// whose increment and decrement operations are replaced with fallible versions
+/// like:
+///
+///   @code{.cpp}
+///   Error inc();
+///   Error dec();
+///   @endcode
+///
+///   It produces an interface that is (mostly) compatible with a traditional
+/// c++ iterator, including ++ and -- operators that do not fail.
+///
+///   Instances of the wrapper are constructed with an instance of the
+/// underlying iterator and (for non-end iterators) a reference to an Error
+/// instance. If the underlying increment/decrement operations fail, the Error
+/// is returned via this reference, and the resulting iterator value set to an
+/// end-of-range sentinel value. This enables the following loop idiom:
+///
+///   @code{.cpp}
+///   class Archive { // E.g. Potentially malformed on-disk archive
+///   public:
+///     fallible_iterator<ArchiveChildItr> children_begin(Error &Err);
+///     fallible_iterator<ArchiveChildItr> children_end();
+///     iterator_range<fallible_iterator<ArchiveChildItr>>
+///     children(Error &Err) {
+///       return make_range(children_begin(Err), children_end());
+///     //...
+///   };
+///
+///   void walk(Archive &A) {
+///     Error Err = Error::success();
+///     for (auto &C : A.children(Err)) {
+///       // Loop body only entered when increment succeeds.
+///     }
+///     if (Err) {
+///       // handle error.
+///     }
+///   }
+///   @endcode
+///
+///   The wrapper marks the referenced Error as unchecked after each increment
+/// and/or decrement operation, and clears the unchecked flag when a non-end
+/// value is compared against end (since, by the increment invariant, not being
+/// an end value proves that there was no error, and is equivalent to checking
+/// that the Error is success). This allows early exits from the loop body
+/// without requiring redundant error checks.
+template <typename Underlying> class fallible_iterator {
+private:
+  template <typename T>
+  using enable_if_struct_deref_supported = std::enable_if<
+      !std::is_void<decltype(std::declval<T>().operator->())>::value,
+      decltype(std::declval<T>().operator->())>;
+
+public:
+  /// Construct a fallible iterator that *cannot* be used as an end-of-range
+  /// value.
+  ///
+  /// A value created by this method can be dereferenced, incremented,
+  /// decremented and compared, providing the underlying type supports it.
+  ///
+  /// The error that is passed in will be initially marked as checked, so if the
+  /// iterator is not used at all the Error need not be checked.
+  static fallible_iterator itr(Underlying I, Error &Err) {
+    (void)!!Err;
+    return fallible_iterator(std::move(I), &Err);
+  }
+
+  /// Construct a fallible iteratro that can be used as an end-of-range value.
+  ///
+  /// A value created by this method can be dereferenced (if the underlying
+  /// value points at a valid value) and compared, but not incremented or
+  /// decremented.
+  static fallible_iterator end(Underlying I) {
+    return fallible_iterator(std::move(I), nullptr);
+  }
+
+  /// Forward dereference to the underlying iterator.
+  auto operator*() -> decltype(*std::declval<Underlying>()) { return *I; }
+
+  /// Forward const dereference to the underlying iterator.
+  auto operator*() const -> decltype(*std::declval<const Underlying>()) {
+    return *I;
+  }
+
+  /// Forward structure dereference to the underlying iterator (if the
+  /// underlying iterator supports it).
+  template <typename T = Underlying>
+  typename enable_if_struct_deref_supported<T>::type operator->() {
+    return I.operator->();
+  }
+
+  /// Forward const structure dereference to the underlying iterator (if the
+  /// underlying iterator supports it).
+  template <typename T = Underlying>
+  typename enable_if_struct_deref_supported<const T>::type operator->() const {
+    return I.operator->();
+  }
+
+  /// Increment the fallible iterator.
+  ///
+  /// If the underlying 'inc' operation fails, this will set the Error value
+  /// and update this iterator value to point to end-of-range.
+  ///
+  /// The Error value is marked as needing checking, regardless of whether the
+  /// 'inc' operation succeeds or fails.
+  fallible_iterator &operator++() {
+    assert(getErrPtr() && "Cannot increment end iterator");
+    if (auto Err = I.inc())
+      handleError(std::move(Err));
+    else
+      resetCheckedFlag();
+    return *this;
+  }
+
+  /// Decrement the fallible iterator.
+  ///
+  /// If the underlying 'dec' operation fails, this will set the Error value
+  /// and update this iterator value to point to end-of-range.
+  ///
+  /// The Error value is marked as needing checking, regardless of whether the
+  /// 'dec' operation succeeds or fails.
+  fallible_iterator &operator--() {
+    assert(getErrPtr() && "Cannot decrement end iterator");
+    if (auto Err = I.dec())
+      handleError(std::move(Err));
+    else
+      resetCheckedFlag();
+    return *this;
+  }
+
+  /// Compare fallible iterators for equality.
+  ///
+  /// Returns true if both LHS and RHS are end-of-range values, or if both are
+  /// non-end-of-range values whose underlying iterator values compare equal.
+  ///
+  /// If this is a comparison between an end-of-range iterator and a
+  /// non-end-of-range iterator, then the Error (referenced by the
+  /// non-end-of-range value) is marked as checked: Since all
+  /// increment/decrement operations result in an end-of-range value, comparing
+  /// false against end-of-range is equivalent to checking that the Error value
+  /// is success. This flag management enables early returns from loop bodies
+  /// without redundant Error checks.
+  friend bool operator==(const fallible_iterator &LHS,
+                         const fallible_iterator &RHS) {
+    // If both iterators are in the end state they compare
+    // equal, regardless of whether either is valid.
+    if (LHS.isEnd() && RHS.isEnd())
+      return true;
+
+    assert(LHS.isValid() && RHS.isValid() &&
+           "Invalid iterators can only be compared against end");
+
+    bool Equal = LHS.I == RHS.I;
+
+    // If the iterators differ and this is a comparison against end then mark
+    // the Error as checked.
+    if (!Equal) {
+      if (LHS.isEnd())
+        (void)!!*RHS.getErrPtr();
+      else
+        (void)!!*LHS.getErrPtr();
+    }
+
+    return Equal;
+  }
+
+  /// Compare fallible iterators for inequality.
+  ///
+  /// See notes for operator==.
+  friend bool operator!=(const fallible_iterator &LHS,
+                         const fallible_iterator &RHS) {
+    return !(LHS == RHS);
+  }
+
+private:
+  fallible_iterator(Underlying I, Error *Err)
+      : I(std::move(I)), ErrState(Err, false) {}
+
+  Error *getErrPtr() const { return ErrState.getPointer(); }
+
+  bool isEnd() const { return getErrPtr() == nullptr; }
+
+  bool isValid() const { return !ErrState.getInt(); }
+
+  void handleError(Error Err) {
+    *getErrPtr() = std::move(Err);
+    ErrState.setPointer(nullptr);
+    ErrState.setInt(true);
+  }
+
+  void resetCheckedFlag() {
+    *getErrPtr() = Error::success();
+  }
+
+  Underlying I;
+  mutable PointerIntPair<Error *, 1> ErrState;
+};
+
+/// Convenience wrapper to make a fallible_iterator value from an instance
+/// of an underlying iterator and an Error reference.
+template <typename Underlying>
+fallible_iterator<Underlying> make_fallible_itr(Underlying I, Error &Err) {
+  return fallible_iterator<Underlying>::itr(std::move(I), Err);
+}
+
+/// Convenience wrapper to make a fallible_iterator end value from an instance
+/// of an underlying iterator.
+template <typename Underlying>
+fallible_iterator<Underlying> make_fallible_end(Underlying E) {
+  return fallible_iterator<Underlying>::end(std::move(E));
+}
+
+template <typename Underlying>
+iterator_range<fallible_iterator<Underlying>>
+make_fallible_range(Underlying I, Underlying E, Error &Err) {
+  return make_range(make_fallible_itr(std::move(I), Err),
+                    make_fallible_end(std::move(E)));
+}
+
+} // end namespace llvm
+
+#endif // LLVM_ADT_FALLIBLE_ITERATOR_H
diff --git a/include/llvm/ADT/ilist.h b/include/llvm/ADT/ilist.h
index 00bb6d528175..06c7abff965f 100644
--- a/include/llvm/ADT/ilist.h
+++ b/include/llvm/ADT/ilist.h
@@ -1,9 +1,8 @@
 //==-- llvm/ADT/ilist.h - Intrusive Linked List Template ---------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -66,9 +65,8 @@ template <typename NodeTy> struct ilist_callback_traits {
   void addNodeToList(NodeTy *) {}
   void removeNodeFromList(NodeTy *) {}
 
-  /// Callback before transferring nodes to this list.
-  ///
-  /// \pre \c this!=&OldList
+  /// Callback before transferring nodes to this list. The nodes may already be
+  /// in this same list.
   template <class Iterator>
   void transferNodesFromList(ilist_callback_traits &OldList, Iterator /*first*/,
                              Iterator /*last*/) {
@@ -287,8 +285,8 @@ private:
     if (position == last)
       return;
 
-    if (this != &L2) // Notify traits we moved the nodes...
-      this->transferNodesFromList(L2, first, last);
+    // Notify traits we moved the nodes...
+    this->transferNodesFromList(L2, first, last);
 
     base_list_type::splice(position, L2, first, last);
   }
diff --git a/include/llvm/ADT/ilist_base.h b/include/llvm/ADT/ilist_base.h
index 3d818a48d41d..b8c098b951ad 100644
--- a/include/llvm/ADT/ilist_base.h
+++ b/include/llvm/ADT/ilist_base.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/ilist_base.h - Intrusive List Base --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/ADT/ilist_iterator.h b/include/llvm/ADT/ilist_iterator.h
index 671e644e0154..cbe5cefa96d1 100644
--- a/include/llvm/ADT/ilist_iterator.h
+++ b/include/llvm/ADT/ilist_iterator.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/ilist_iterator.h - Intrusive List Iterator ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/ADT/ilist_node.h b/include/llvm/ADT/ilist_node.h
index dd0e6b4ec2b9..e040d9630a1e 100644
--- a/include/llvm/ADT/ilist_node.h
+++ b/include/llvm/ADT/ilist_node.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/ilist_node.h - Intrusive Linked List Helper -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ADT/ilist_node_base.h b/include/llvm/ADT/ilist_node_base.h
index e5062ac4eaad..f6c518e6eed7 100644
--- a/include/llvm/ADT/ilist_node_base.h
+++ b/include/llvm/ADT/ilist_node_base.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/ilist_node_base.h - Intrusive List Node Base -----*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/ADT/ilist_node_options.h b/include/llvm/ADT/ilist_node_options.h
index 7ff4005f6757..9b95cdbe08c4 100644
--- a/include/llvm/ADT/ilist_node_options.h
+++ b/include/llvm/ADT/ilist_node_options.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/ilist_node_options.h - ilist_node Options -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/ADT/iterator.h b/include/llvm/ADT/iterator.h
index 40e490cf7864..467fd4c00ec5 100644
--- a/include/llvm/ADT/iterator.h
+++ b/include/llvm/ADT/iterator.h
@@ -1,9 +1,8 @@
 //===- iterator.h - Utilities for using and defining iterators --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/ADT/iterator_range.h b/include/llvm/ADT/iterator_range.h
index 2ba12866ecf3..774c7c4e3366 100644
--- a/include/llvm/ADT/iterator_range.h
+++ b/include/llvm/ADT/iterator_range.h
@@ -1,9 +1,8 @@
 //===- iterator_range.h - A range adaptor for iterators ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/ADT/simple_ilist.h b/include/llvm/ADT/simple_ilist.h
index 4c7598a1acb4..9257b47b9cf8 100644
--- a/include/llvm/ADT/simple_ilist.h
+++ b/include/llvm/ADT/simple_ilist.h
@@ -1,9 +1,8 @@
 //===- llvm/ADT/simple_ilist.h - Simple Intrusive List ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index e2a2ac0622e8..948341554f23 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/AliasAnalysis.h - Alias Analysis Interface -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -38,6 +37,7 @@
 #ifndef LLVM_ANALYSIS_ALIASANALYSIS_H
 #define LLVM_ANALYSIS_ALIASANALYSIS_H
 
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
@@ -286,6 +286,28 @@ createModRefInfo(const FunctionModRefBehavior FMRB) {
   return ModRefInfo(FMRB & static_cast<int>(ModRefInfo::ModRef));
 }
 
+/// This class stores info we want to provide to or retain within an alias
+/// query. By default, the root query is stateless and starts with a freshly
+/// constructed info object. Specific alias analyses can use this query info to
+/// store per-query state that is important for recursive or nested queries to
+/// avoid recomputing. To enable preserving this state across multiple queries
+/// where safe (due to the IR not changing), use a `BatchAAResults` wrapper.
+/// The information stored in an `AAQueryInfo` is currently limitted to the
+/// caches used by BasicAA, but can further be extended to fit other AA needs.
+class AAQueryInfo {
+public:
+  using LocPair = std::pair<MemoryLocation, MemoryLocation>;
+  using AliasCacheT = SmallDenseMap<LocPair, AliasResult, 8>;
+  AliasCacheT AliasCache;
+
+  using IsCapturedCacheT = SmallDenseMap<const Value *, bool, 8>;
+  IsCapturedCacheT IsCapturedCache;
+
+  AAQueryInfo() : AliasCache(), IsCapturedCache() {}
+};
+
+class BatchAAResults;
+
 class AAResults {
 public:
   // Make these results default constructable and movable. We have to spell
@@ -600,32 +622,8 @@ public:
   /// helpers above.
   ModRefInfo getModRefInfo(const Instruction *I,
                            const Optional<MemoryLocation> &OptLoc) {
-    if (OptLoc == None) {
-      if (const auto *Call = dyn_cast<CallBase>(I)) {
-        return createModRefInfo(getModRefBehavior(Call));
-      }
-    }
-
-    const MemoryLocation &Loc = OptLoc.getValueOr(MemoryLocation());
-
-    switch (I->getOpcode()) {
-    case Instruction::VAArg:  return getModRefInfo((const VAArgInst*)I, Loc);
-    case Instruction::Load:   return getModRefInfo((const LoadInst*)I,  Loc);
-    case Instruction::Store:  return getModRefInfo((const StoreInst*)I, Loc);
-    case Instruction::Fence:  return getModRefInfo((const FenceInst*)I, Loc);
-    case Instruction::AtomicCmpXchg:
-      return getModRefInfo((const AtomicCmpXchgInst*)I, Loc);
-    case Instruction::AtomicRMW:
-      return getModRefInfo((const AtomicRMWInst*)I, Loc);
-    case Instruction::Call:   return getModRefInfo((const CallInst*)I,  Loc);
-    case Instruction::Invoke: return getModRefInfo((const InvokeInst*)I,Loc);
-    case Instruction::CatchPad:
-      return getModRefInfo((const CatchPadInst *)I, Loc);
-    case Instruction::CatchRet:
-      return getModRefInfo((const CatchReturnInst *)I, Loc);
-    default:
-      return ModRefInfo::NoModRef;
-    }
+    AAQueryInfo AAQIP;
+    return getModRefInfo(I, OptLoc, AAQIP);
   }
 
   /// A convenience wrapper for constructing the memory location.
@@ -692,6 +690,69 @@ public:
   }
 
 private:
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+                    AAQueryInfo &AAQI);
+  bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+                              bool OrLocal = false);
+  ModRefInfo getModRefInfo(Instruction *I, const CallBase *Call2,
+                           AAQueryInfo &AAQIP);
+  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+                           AAQueryInfo &AAQI);
+  ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+                           AAQueryInfo &AAQI);
+  ModRefInfo getModRefInfo(const VAArgInst *V, const MemoryLocation &Loc,
+                           AAQueryInfo &AAQI);
+  ModRefInfo getModRefInfo(const LoadInst *L, const MemoryLocation &Loc,
+                           AAQueryInfo &AAQI);
+  ModRefInfo getModRefInfo(const StoreInst *S, const MemoryLocation &Loc,
+                           AAQueryInfo &AAQI);
+  ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc,
+                           AAQueryInfo &AAQI);
+  ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX,
+                           const MemoryLocation &Loc, AAQueryInfo &AAQI);
+  ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const MemoryLocation &Loc,
+                           AAQueryInfo &AAQI);
+  ModRefInfo getModRefInfo(const CatchPadInst *I, const MemoryLocation &Loc,
+                           AAQueryInfo &AAQI);
+  ModRefInfo getModRefInfo(const CatchReturnInst *I, const MemoryLocation &Loc,
+                           AAQueryInfo &AAQI);
+  ModRefInfo getModRefInfo(const Instruction *I,
+                           const Optional<MemoryLocation> &OptLoc,
+                           AAQueryInfo &AAQIP) {
+    if (OptLoc == None) {
+      if (const auto *Call = dyn_cast<CallBase>(I)) {
+        return createModRefInfo(getModRefBehavior(Call));
+      }
+    }
+
+    const MemoryLocation &Loc = OptLoc.getValueOr(MemoryLocation());
+
+    switch (I->getOpcode()) {
+    case Instruction::VAArg:
+      return getModRefInfo((const VAArgInst *)I, Loc, AAQIP);
+    case Instruction::Load:
+      return getModRefInfo((const LoadInst *)I, Loc, AAQIP);
+    case Instruction::Store:
+      return getModRefInfo((const StoreInst *)I, Loc, AAQIP);
+    case Instruction::Fence:
+      return getModRefInfo((const FenceInst *)I, Loc, AAQIP);
+    case Instruction::AtomicCmpXchg:
+      return getModRefInfo((const AtomicCmpXchgInst *)I, Loc, AAQIP);
+    case Instruction::AtomicRMW:
+      return getModRefInfo((const AtomicRMWInst *)I, Loc, AAQIP);
+    case Instruction::Call:
+      return getModRefInfo((const CallInst *)I, Loc, AAQIP);
+    case Instruction::Invoke:
+      return getModRefInfo((const InvokeInst *)I, Loc, AAQIP);
+    case Instruction::CatchPad:
+      return getModRefInfo((const CatchPadInst *)I, Loc, AAQIP);
+    case Instruction::CatchRet:
+      return getModRefInfo((const CatchReturnInst *)I, Loc, AAQIP);
+    default:
+      return ModRefInfo::NoModRef;
+    }
+  }
+
   class Concept;
 
   template <typename T> class Model;
@@ -703,6 +764,47 @@ private:
   std::vector<std::unique_ptr<Concept>> AAs;
 
   std::vector<AnalysisKey *> AADeps;
+
+  friend class BatchAAResults;
+};
+
+/// This class is a wrapper over an AAResults, and it is intended to be used
+/// only when there are no IR changes inbetween queries. BatchAAResults is
+/// reusing the same `AAQueryInfo` to preserve the state across queries,
+/// esentially making AA work in "batch mode". The internal state cannot be
+/// cleared, so to go "out-of-batch-mode", the user must either use AAResults,
+/// or create a new BatchAAResults.
+class BatchAAResults {
+  AAResults &AA;
+  AAQueryInfo AAQI;
+
+public:
+  BatchAAResults(AAResults &AAR) : AA(AAR), AAQI() {}
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
+    return AA.alias(LocA, LocB, AAQI);
+  }
+  bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal = false) {
+    return AA.pointsToConstantMemory(Loc, AAQI, OrLocal);
+  }
+  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc) {
+    return AA.getModRefInfo(Call, Loc, AAQI);
+  }
+  ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2) {
+    return AA.getModRefInfo(Call1, Call2, AAQI);
+  }
+  ModRefInfo getModRefInfo(const Instruction *I,
+                           const Optional<MemoryLocation> &OptLoc) {
+    return AA.getModRefInfo(I, OptLoc, AAQI);
+  }
+  ModRefInfo getModRefInfo(Instruction *I, const CallBase *Call2) {
+    return AA.getModRefInfo(I, Call2, AAQI);
+  }
+  ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
+    return AA.getArgModRefInfo(Call, ArgIdx);
+  }
+  FunctionModRefBehavior getModRefBehavior(const CallBase *Call) {
+    return AA.getModRefBehavior(Call);
+  }
 };
 
 /// Temporary typedef for legacy code that uses a generic \c AliasAnalysis
@@ -735,12 +837,12 @@ public:
   /// each other. This is the interface that must be implemented by specific
   /// alias analysis implementations.
   virtual AliasResult alias(const MemoryLocation &LocA,
-                            const MemoryLocation &LocB) = 0;
+                            const MemoryLocation &LocB, AAQueryInfo &AAQI) = 0;
 
   /// Checks whether the given location points to constant memory, or if
   /// \p OrLocal is true whether it points to a local alloca.
   virtual bool pointsToConstantMemory(const MemoryLocation &Loc,
-                                      bool OrLocal) = 0;
+                                      AAQueryInfo &AAQI, bool OrLocal) = 0;
 
   /// @}
   //===--------------------------------------------------------------------===//
@@ -764,13 +866,14 @@ public:
   /// getModRefInfo (for call sites) - Return information about whether
   /// a particular call site modifies or reads the specified memory location.
   virtual ModRefInfo getModRefInfo(const CallBase *Call,
-                                   const MemoryLocation &Loc) = 0;
+                                   const MemoryLocation &Loc,
+                                   AAQueryInfo &AAQI) = 0;
 
   /// Return information about whether two call sites may refer to the same set
   /// of memory locations. See the AA documentation for details:
   ///   http://llvm.org/docs/AliasAnalysis.html#ModRefInfo
-  virtual ModRefInfo getModRefInfo(const CallBase *Call1,
-                                   const CallBase *Call2) = 0;
+  virtual ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+                                   AAQueryInfo &AAQI) = 0;
 
   /// @}
 };
@@ -792,14 +895,14 @@ public:
 
   void setAAResults(AAResults *NewAAR) override { Result.setAAResults(NewAAR); }
 
-  AliasResult alias(const MemoryLocation &LocA,
-                    const MemoryLocation &LocB) override {
-    return Result.alias(LocA, LocB);
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+                    AAQueryInfo &AAQI) override {
+    return Result.alias(LocA, LocB, AAQI);
   }
 
-  bool pointsToConstantMemory(const MemoryLocation &Loc,
+  bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
                               bool OrLocal) override {
-    return Result.pointsToConstantMemory(Loc, OrLocal);
+    return Result.pointsToConstantMemory(Loc, AAQI, OrLocal);
   }
 
   ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) override {
@@ -814,14 +917,14 @@ public:
     return Result.getModRefBehavior(F);
   }
 
-  ModRefInfo getModRefInfo(const CallBase *Call,
-                           const MemoryLocation &Loc) override {
-    return Result.getModRefInfo(Call, Loc);
+  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+                           AAQueryInfo &AAQI) override {
+    return Result.getModRefInfo(Call, Loc, AAQI);
   }
 
-  ModRefInfo getModRefInfo(const CallBase *Call1,
-                           const CallBase *Call2) override {
-    return Result.getModRefInfo(Call1, Call2);
+  ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+                           AAQueryInfo &AAQI) override {
+    return Result.getModRefInfo(Call1, Call2, AAQI);
   }
 };
 
@@ -867,13 +970,16 @@ protected:
     AAResultsProxy(AAResults *AAR, DerivedT &CurrentResult)
         : AAR(AAR), CurrentResult(CurrentResult) {}
 
-    AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
-      return AAR ? AAR->alias(LocA, LocB) : CurrentResult.alias(LocA, LocB);
+    AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+                      AAQueryInfo &AAQI) {
+      return AAR ? AAR->alias(LocA, LocB, AAQI)
+                 : CurrentResult.alias(LocA, LocB, AAQI);
     }
 
-    bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) {
-      return AAR ? AAR->pointsToConstantMemory(Loc, OrLocal)
-                 : CurrentResult.pointsToConstantMemory(Loc, OrLocal);
+    bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+                                bool OrLocal) {
+      return AAR ? AAR->pointsToConstantMemory(Loc, AAQI, OrLocal)
+                 : CurrentResult.pointsToConstantMemory(Loc, AAQI, OrLocal);
     }
 
     ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
@@ -890,14 +996,16 @@ protected:
       return AAR ? AAR->getModRefBehavior(F) : CurrentResult.getModRefBehavior(F);
     }
 
-    ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc) {
-      return AAR ? AAR->getModRefInfo(Call, Loc)
-                 : CurrentResult.getModRefInfo(Call, Loc);
+    ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+                             AAQueryInfo &AAQI) {
+      return AAR ? AAR->getModRefInfo(Call, Loc, AAQI)
+                 : CurrentResult.getModRefInfo(Call, Loc, AAQI);
     }
 
-    ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2) {
-      return AAR ? AAR->getModRefInfo(Call1, Call2)
-                 : CurrentResult.getModRefInfo(Call1, Call2);
+    ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+                             AAQueryInfo &AAQI) {
+      return AAR ? AAR->getModRefInfo(Call1, Call2, AAQI)
+                 : CurrentResult.getModRefInfo(Call1, Call2, AAQI);
     }
   };
 
@@ -921,11 +1029,13 @@ protected:
   AAResultsProxy getBestAAResults() { return AAResultsProxy(AAR, derived()); }
 
 public:
-  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+                    AAQueryInfo &AAQI) {
     return MayAlias;
   }
 
-  bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) {
+  bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+                              bool OrLocal) {
     return false;
   }
 
@@ -941,11 +1051,13 @@ public:
     return FMRB_UnknownModRefBehavior;
   }
 
-  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc) {
+  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+                           AAQueryInfo &AAQI) {
     return ModRefInfo::ModRef;
   }
 
-  ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2) {
+  ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+                           AAQueryInfo &AAQI) {
     return ModRefInfo::ModRef;
   }
 };
@@ -984,6 +1096,11 @@ bool isIdentifiedFunctionLocal(const Value *V);
 /// This manager effectively wraps the AnalysisManager for registering alias
 /// analyses. When you register your alias analysis with this manager, it will
 /// ensure the analysis itself is registered with its AnalysisManager.
+///
+/// The result of this analysis is only invalidated if one of the particular
+/// aggregated AA results end up being invalidated. This removes the need to
+/// explicitly preserve the results of `AAManager`. Note that analyses should no
+/// longer be registered once the `AAManager` is run.
 class AAManager : public AnalysisInfoMixin<AAManager> {
 public:
   using Result = AAResults;
diff --git a/include/llvm/Analysis/AliasAnalysisEvaluator.h b/include/llvm/Analysis/AliasAnalysisEvaluator.h
index 0941814a56c3..972eceaa3ba9 100644
--- a/include/llvm/Analysis/AliasAnalysisEvaluator.h
+++ b/include/llvm/Analysis/AliasAnalysisEvaluator.h
@@ -1,9 +1,8 @@
 //===- AliasAnalysisEvaluator.h - Alias Analysis Accuracy Evaluator -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index 7ed5cd5c4734..34a509b7f4bb 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/AliasSetTracker.h - Build Alias Sets -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -37,6 +36,8 @@ namespace llvm {
 class AliasSetTracker;
 class BasicBlock;
 class LoadInst;
+class Loop;
+class MemorySSA;
 class AnyMemSetInst;
 class AnyMemTransferInst;
 class raw_ostream;
@@ -294,7 +295,8 @@ private:
   void removeFromTracker(AliasSetTracker &AST);
 
   void addPointer(AliasSetTracker &AST, PointerRec &Entry, LocationSize Size,
-                  const AAMDNodes &AAInfo, bool KnownMustAlias = false);
+                  const AAMDNodes &AAInfo, bool KnownMustAlias = false,
+                  bool SkipSizeUpdate = false);
   void addUnknownInst(Instruction *I, AliasAnalysis &AA);
 
   void removeUnknownInst(AliasSetTracker &AST, Instruction *I) {
@@ -310,10 +312,10 @@ private:
   }
 
 public:
-  /// Return true if the specified pointer "may" (or must) alias one of the
-  /// members in the set.
-  bool aliasesPointer(const Value *Ptr, LocationSize Size,
-                      const AAMDNodes &AAInfo, AliasAnalysis &AA) const;
+  /// If the specified pointer "may" (or must) alias one of the members in the
+  /// set return the appropriate AliasResult. Otherwise return NoAlias.
+  AliasResult aliasesPointer(const Value *Ptr, LocationSize Size,
+                             const AAMDNodes &AAInfo, AliasAnalysis &AA) const;
   bool aliasesUnknownInst(const Instruction *Inst, AliasAnalysis &AA) const;
 };
 
@@ -341,6 +343,8 @@ class AliasSetTracker {
   struct ASTCallbackVHDenseMapInfo : public DenseMapInfo<Value *> {};
 
   AliasAnalysis &AA;
+  MemorySSA *MSSA;
+  Loop *L;
   ilist<AliasSet> AliasSets;
 
   using PointerMapType = DenseMap<ASTCallbackVH, AliasSet::PointerRec *,
@@ -353,6 +357,8 @@ public:
   /// Create an empty collection of AliasSets, and use the specified alias
   /// analysis object to disambiguate load and store addresses.
   explicit AliasSetTracker(AliasAnalysis &aa) : AA(aa) {}
+  explicit AliasSetTracker(AliasAnalysis &aa, MemorySSA *mssa, Loop *l)
+      : AA(aa), MSSA(mssa), L(l) {}
   ~AliasSetTracker() { clear(); }
 
   /// These methods are used to add different types of instructions to the alias
@@ -377,6 +383,7 @@ public:
   void add(BasicBlock &BB);       // Add all instructions in basic block
   void add(const AliasSetTracker &AST); // Add alias relations from another AST
   void addUnknown(Instruction *I);
+  void addAllInstructionsInLoopUsingMSSA();
 
   void clear();
 
@@ -439,7 +446,8 @@ private:
 
   AliasSet &addPointer(MemoryLocation Loc, AliasSet::AccessLattice E);
   AliasSet *mergeAliasSetsForPointer(const Value *Ptr, LocationSize Size,
-                                     const AAMDNodes &AAInfo);
+                                     const AAMDNodes &AAInfo,
+                                     bool &MustAliasAll);
 
   /// Merge all alias sets into a single set that is considered to alias any
   /// pointer.
diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h
index 46538b1fa86f..b42846472f2e 100644
--- a/include/llvm/Analysis/AssumptionCache.h
+++ b/include/llvm/Analysis/AssumptionCache.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/AssumptionCache.h - Track @llvm.assume -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -104,6 +103,10 @@ public:
   /// not already be in the cache.
   void registerAssumption(CallInst *CI);
 
+  /// Remove an \@llvm.assume intrinsic from this function's cache if it has
+  /// been added to the cache earlier.
+  void unregisterAssumption(CallInst *CI);
+
   /// Update the cache of values being affected by this assumption (i.e.
   /// the values about which this assumption provides information).
   void updateAffectedValues(CallInst *CI);
@@ -209,6 +212,10 @@ public:
   /// existing cache will be returned.
   AssumptionCache &getAssumptionCache(Function &F);
 
+  /// Return the cached assumptions for a function if it has already been
+  /// scanned. Otherwise return nullptr.
+  AssumptionCache *lookupAssumptionCache(Function &F);
+
   AssumptionCacheTracker();
   ~AssumptionCacheTracker() override;
 
diff --git a/include/llvm/Analysis/BasicAliasAnalysis.h b/include/llvm/Analysis/BasicAliasAnalysis.h
index 820d7ac0935a..22e8c4b474cb 100644
--- a/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -1,9 +1,8 @@
 //===- BasicAliasAnalysis.h - Stateless, local Alias Analysis ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -82,14 +81,18 @@ public:
   bool invalidate(Function &Fn, const PreservedAnalyses &PA,
                   FunctionAnalysisManager::Invalidator &Inv);
 
-  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+                    AAQueryInfo &AAQI);
 
-  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
+  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+                           AAQueryInfo &AAQI);
 
-  ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2);
+  ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+                           AAQueryInfo &AAQI);
 
   /// Chases pointers until we find a (constant global) or not.
-  bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
+  bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+                              bool OrLocal);
 
   /// Get the location associated with a pointer argument of a callsite.
   ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx);
@@ -141,11 +144,6 @@ private:
     SmallVector<VariableGEPIndex, 4> VarIndices;
   };
 
-  /// Track alias queries to guard against recursion.
-  using LocPair = std::pair<MemoryLocation, MemoryLocation>;
-  using AliasCacheTy = SmallDenseMap<LocPair, AliasResult, 8>;
-  AliasCacheTy AliasCache;
-
   /// Tracks phi nodes we have visited.
   ///
   /// When interpret "Value" pointer equality as value equality we need to make
@@ -200,22 +198,24 @@ private:
   AliasResult aliasGEP(const GEPOperator *V1, LocationSize V1Size,
                        const AAMDNodes &V1AAInfo, const Value *V2,
                        LocationSize V2Size, const AAMDNodes &V2AAInfo,
-                       const Value *UnderlyingV1, const Value *UnderlyingV2);
+                       const Value *UnderlyingV1, const Value *UnderlyingV2,
+                       AAQueryInfo &AAQI);
 
   AliasResult aliasPHI(const PHINode *PN, LocationSize PNSize,
                        const AAMDNodes &PNAAInfo, const Value *V2,
                        LocationSize V2Size, const AAMDNodes &V2AAInfo,
-                       const Value *UnderV2);
+                       const Value *UnderV2, AAQueryInfo &AAQI);
 
   AliasResult aliasSelect(const SelectInst *SI, LocationSize SISize,
                           const AAMDNodes &SIAAInfo, const Value *V2,
                           LocationSize V2Size, const AAMDNodes &V2AAInfo,
-                          const Value *UnderV2);
+                          const Value *UnderV2, AAQueryInfo &AAQI);
 
   AliasResult aliasCheck(const Value *V1, LocationSize V1Size,
                          AAMDNodes V1AATag, const Value *V2,
                          LocationSize V2Size, AAMDNodes V2AATag,
-                         const Value *O1 = nullptr, const Value *O2 = nullptr);
+                         AAQueryInfo &AAQI, const Value *O1 = nullptr,
+                         const Value *O2 = nullptr);
 };
 
 /// Analysis pass providing a never-invalidated alias analysis result.
diff --git a/include/llvm/Analysis/BlockFrequencyInfo.h b/include/llvm/Analysis/BlockFrequencyInfo.h
index 0b2618735697..8bcfd7ff8f58 100644
--- a/include/llvm/Analysis/BlockFrequencyInfo.h
+++ b/include/llvm/Analysis/BlockFrequencyInfo.h
@@ -1,9 +1,8 @@
 //===- BlockFrequencyInfo.h - Block Frequency Analysis ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -68,7 +67,8 @@ public:
   /// Returns the estimated profile count of \p BB.
   /// This computes the relative block frequency of \p BB and multiplies it by
   /// the enclosing function's count (if available) and returns the value.
-  Optional<uint64_t> getBlockProfileCount(const BasicBlock *BB) const;
+  Optional<uint64_t> getBlockProfileCount(const BasicBlock *BB,
+                                          bool AllowSynthetic = false) const;
 
   /// Returns the estimated profile count of \p Freq.
   /// This uses the frequency \p Freq and multiplies it by
diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index 25b2efd33c98..bfe4fb14a2b8 100644
--- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -1,9 +1,8 @@
 //==- BlockFrequencyInfoImpl.h - Block Frequency Implementation --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -160,10 +159,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, BlockMass X) {
 
 } // end namespace bfi_detail
 
-template <> struct isPodLike<bfi_detail::BlockMass> {
-  static const bool value = true;
-};
-
 /// Base class for BlockFrequencyInfoImpl
 ///
 /// BlockFrequencyInfoImplBase has supporting data structures and some
@@ -187,9 +182,9 @@ public:
   struct BlockNode {
     using IndexType = uint32_t;
 
-    IndexType Index = std::numeric_limits<uint32_t>::max();
+    IndexType Index;
 
-    BlockNode() = default;
+    BlockNode() : Index(std::numeric_limits<uint32_t>::max()) {}
     BlockNode(IndexType Index) : Index(Index) {}
 
     bool operator==(const BlockNode &X) const { return Index == X.Index; }
@@ -525,9 +520,11 @@ public:
 
   BlockFrequency getBlockFreq(const BlockNode &Node) const;
   Optional<uint64_t> getBlockProfileCount(const Function &F,
-                                          const BlockNode &Node) const;
+                                          const BlockNode &Node,
+                                          bool AllowSynthetic = false) const;
   Optional<uint64_t> getProfileCountFromFreq(const Function &F,
-                                             uint64_t Freq) const;
+                                             uint64_t Freq,
+                                             bool AllowSynthetic = false) const;
   bool isIrrLoopHeader(const BlockNode &Node);
 
   void setBlockFreq(const BlockNode &Node, uint64_t Freq);
@@ -973,13 +970,17 @@ public:
   }
 
   Optional<uint64_t> getBlockProfileCount(const Function &F,
-                                          const BlockT *BB) const {
-    return BlockFrequencyInfoImplBase::getBlockProfileCount(F, getNode(BB));
+                                          const BlockT *BB,
+                                          bool AllowSynthetic = false) const {
+    return BlockFrequencyInfoImplBase::getBlockProfileCount(F, getNode(BB),
+                                                            AllowSynthetic);
   }
 
   Optional<uint64_t> getProfileCountFromFreq(const Function &F,
-                                             uint64_t Freq) const {
-    return BlockFrequencyInfoImplBase::getProfileCountFromFreq(F, Freq);
+                                             uint64_t Freq,
+                                             bool AllowSynthetic = false) const {
+    return BlockFrequencyInfoImplBase::getProfileCountFromFreq(F, Freq,
+                                                               AllowSynthetic);
   }
 
   bool isIrrLoopHeader(const BlockT *BB) {
diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h
index 45277db46090..97cb730d16c7 100644
--- a/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -1,9 +1,8 @@
 //===- BranchProbabilityInfo.h - Branch Probability Analysis ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/CFG.h b/include/llvm/Analysis/CFG.h
index caae0b6e2a8f..bb55e76ac86a 100644
--- a/include/llvm/Analysis/CFG.h
+++ b/include/llvm/Analysis/CFG.h
@@ -1,9 +1,8 @@
 //===-- Analysis/CFG.h - BasicBlock Analyses --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -48,8 +47,8 @@ unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ);
 bool isCriticalEdge(const Instruction *TI, unsigned SuccNum,
                     bool AllowIdenticalEdges = false);
 
-/// Determine whether instruction 'To' is reachable from 'From',
-/// returning true if uncertain.
+/// Determine whether instruction 'To' is reachable from 'From', without passing
+/// through any blocks in ExclusionSet, returning true if uncertain.
 ///
 /// Determine whether there is a path from From to To within a single function.
 /// Returns false only if we can prove that once 'From' has been executed then
@@ -63,9 +62,10 @@ bool isCriticalEdge(const Instruction *TI, unsigned SuccNum,
 /// we find a block that dominates the block containing 'To'. DT is most useful
 /// on branchy code but not loops, and LI is most useful on code with loops but
 /// does not help on branchy code outside loops.
-bool isPotentiallyReachable(const Instruction *From, const Instruction *To,
-                            const DominatorTree *DT = nullptr,
-                            const LoopInfo *LI = nullptr);
+bool isPotentiallyReachable(
+    const Instruction *From, const Instruction *To,
+    const SmallPtrSetImpl<BasicBlock *> *ExclusionSet = nullptr,
+    const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
 
 /// Determine whether block 'To' is reachable from 'From', returning
 /// true if uncertain.
@@ -89,6 +89,20 @@ bool isPotentiallyReachableFromMany(SmallVectorImpl<BasicBlock *> &Worklist,
                                     const DominatorTree *DT = nullptr,
                                     const LoopInfo *LI = nullptr);
 
+/// Determine whether there is at least one path from a block in
+/// 'Worklist' to 'StopBB' without passing through any blocks in
+/// 'ExclusionSet', returning true if uncertain.
+///
+/// Determine whether there is a path from at least one block in Worklist to
+/// StopBB within a single function without passing through any of the blocks
+/// in 'ExclusionSet'. Returns false only if we can prove that once any block
+/// in 'Worklist' has been reached then 'StopBB' can not be executed.
+/// Conservatively returns true.
+bool isPotentiallyReachableFromMany(
+    SmallVectorImpl<BasicBlock *> &Worklist, BasicBlock *StopBB,
+    const SmallPtrSetImpl<BasicBlock *> *ExclusionSet,
+    const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
+
 /// Return true if the control flow in \p RPOTraversal is irreducible.
 ///
 /// This is a generic implementation to detect CFG irreducibility based on loop
diff --git a/include/llvm/Analysis/CFGPrinter.h b/include/llvm/Analysis/CFGPrinter.h
index 5996dd90bcfd..aaefc11653dd 100644
--- a/include/llvm/Analysis/CFGPrinter.h
+++ b/include/llvm/Analysis/CFGPrinter.h
@@ -1,9 +1,8 @@
 //===-- CFGPrinter.h - CFG printer external interface -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/CFLAliasAnalysisUtils.h b/include/llvm/Analysis/CFLAliasAnalysisUtils.h
index 981a8ddc2289..02f999a5b913 100644
--- a/include/llvm/Analysis/CFLAliasAnalysisUtils.h
+++ b/include/llvm/Analysis/CFLAliasAnalysisUtils.h
@@ -1,9 +1,8 @@
 //=- CFLAliasAnalysisUtils.h - Utilities for CFL Alias Analysis ----*- C++-*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // \file
diff --git a/include/llvm/Analysis/CFLAndersAliasAnalysis.h b/include/llvm/Analysis/CFLAndersAliasAnalysis.h
index 8ae72553ab94..7c8b42b1d8d2 100644
--- a/include/llvm/Analysis/CFLAndersAliasAnalysis.h
+++ b/include/llvm/Analysis/CFLAndersAliasAnalysis.h
@@ -1,9 +1,8 @@
 //==- CFLAndersAliasAnalysis.h - Unification-based Alias Analysis -*- C++-*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -61,7 +60,8 @@ public:
   const cflaa::AliasSummary *getAliasSummary(const Function &);
 
   AliasResult query(const MemoryLocation &, const MemoryLocation &);
-  AliasResult alias(const MemoryLocation &, const MemoryLocation &);
+  AliasResult alias(const MemoryLocation &, const MemoryLocation &,
+                    AAQueryInfo &);
 
 private:
   /// Ensures that the given function is available in the cache.
diff --git a/include/llvm/Analysis/CFLSteensAliasAnalysis.h b/include/llvm/Analysis/CFLSteensAliasAnalysis.h
index 09e366f11e18..cc7a47cd9a5f 100644
--- a/include/llvm/Analysis/CFLSteensAliasAnalysis.h
+++ b/include/llvm/Analysis/CFLSteensAliasAnalysis.h
@@ -1,9 +1,8 @@
 //==- CFLSteensAliasAnalysis.h - Unification-based Alias Analysis -*- C++-*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -70,7 +69,8 @@ public:
 
   AliasResult query(const MemoryLocation &LocA, const MemoryLocation &LocB);
 
-  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+                    AAQueryInfo &AAQI) {
     if (LocA.Ptr == LocB.Ptr)
       return MustAlias;
 
@@ -80,11 +80,11 @@ public:
     // ConstantExpr, but every query needs to have at least one Value tied to a
     // Function, and neither GlobalValues nor ConstantExprs are.
     if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr))
-      return AAResultBase::alias(LocA, LocB);
+      return AAResultBase::alias(LocA, LocB, AAQI);
 
     AliasResult QueryResult = query(LocA, LocB);
     if (QueryResult == MayAlias)
-      return AAResultBase::alias(LocA, LocB);
+      return AAResultBase::alias(LocA, LocB, AAQI);
 
     return QueryResult;
   }
diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h
index 61b99f6c3e6b..8af5fb86995a 100644
--- a/include/llvm/Analysis/CGSCCPassManager.h
+++ b/include/llvm/Analysis/CGSCCPassManager.h
@@ -1,9 +1,8 @@
 //===- CGSCCPassManager.h - Call graph pass management ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -292,6 +291,21 @@ struct CGSCCUpdateResult {
   /// post-order walk.
   LazyCallGraph::SCC *UpdatedC;
 
+  /// Preserved analyses across SCCs.
+  ///
+  /// We specifically want to allow CGSCC passes to mutate ancestor IR
+  /// (changing both the CG structure and the function IR itself). However,
+  /// this means we need to take special care to correctly mark what analyses
+  /// are preserved *across* SCCs. We have to track this out-of-band here
+  /// because within the main `PassManeger` infrastructure we need to mark
+  /// everything within an SCC as preserved in order to avoid repeatedly
+  /// invalidating the same analyses as we unnest pass managers and adaptors.
+  /// So we track the cross-SCC version of the preserved analyses here from any
+  /// code that does direct invalidation of SCC analyses, and then use it
+  /// whenever we move forward in the post-order walk of SCCs before running
+  /// passes over the new SCC.
+  PreservedAnalyses CrossSCCPA;
+
   /// A hacky area where the inliner can retain history about inlining
   /// decisions that mutated the call graph's SCC structure in order to avoid
   /// infinite inlining. See the comments in the inliner's CG update logic.
@@ -339,175 +353,7 @@ public:
   }
 
   /// Runs the CGSCC pass across every SCC in the module.
-  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) {
-    // Setup the CGSCC analysis manager from its proxy.
-    CGSCCAnalysisManager &CGAM =
-        AM.getResult<CGSCCAnalysisManagerModuleProxy>(M).getManager();
-
-    // Get the call graph for this module.
-    LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
-
-    // We keep worklists to allow us to push more work onto the pass manager as
-    // the passes are run.
-    SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> RCWorklist;
-    SmallPriorityWorklist<LazyCallGraph::SCC *, 1> CWorklist;
-
-    // Keep sets for invalidated SCCs and RefSCCs that should be skipped when
-    // iterating off the worklists.
-    SmallPtrSet<LazyCallGraph::RefSCC *, 4> InvalidRefSCCSet;
-    SmallPtrSet<LazyCallGraph::SCC *, 4> InvalidSCCSet;
-
-    SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4>
-        InlinedInternalEdges;
-
-    CGSCCUpdateResult UR = {RCWorklist,          CWorklist, InvalidRefSCCSet,
-                            InvalidSCCSet,       nullptr,   nullptr,
-                            InlinedInternalEdges};
-
-    // Request PassInstrumentation from analysis manager, will use it to run
-    // instrumenting callbacks for the passes later.
-    PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M);
-
-    PreservedAnalyses PA = PreservedAnalyses::all();
-    CG.buildRefSCCs();
-    for (auto RCI = CG.postorder_ref_scc_begin(),
-              RCE = CG.postorder_ref_scc_end();
-         RCI != RCE;) {
-      assert(RCWorklist.empty() &&
-             "Should always start with an empty RefSCC worklist");
-      // The postorder_ref_sccs range we are walking is lazily constructed, so
-      // we only push the first one onto the worklist. The worklist allows us
-      // to capture *new* RefSCCs created during transformations.
-      //
-      // We really want to form RefSCCs lazily because that makes them cheaper
-      // to update as the program is simplified and allows us to have greater
-      // cache locality as forming a RefSCC touches all the parts of all the
-      // functions within that RefSCC.
-      //
-      // We also eagerly increment the iterator to the next position because
-      // the CGSCC passes below may delete the current RefSCC.
-      RCWorklist.insert(&*RCI++);
-
-      do {
-        LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val();
-        if (InvalidRefSCCSet.count(RC)) {
-          LLVM_DEBUG(dbgs() << "Skipping an invalid RefSCC...\n");
-          continue;
-        }
-
-        assert(CWorklist.empty() &&
-               "Should always start with an empty SCC worklist");
-
-        LLVM_DEBUG(dbgs() << "Running an SCC pass across the RefSCC: " << *RC
-                          << "\n");
-
-        // Push the initial SCCs in reverse post-order as we'll pop off the
-        // back and so see this in post-order.
-        for (LazyCallGraph::SCC &C : llvm::reverse(*RC))
-          CWorklist.insert(&C);
-
-        do {
-          LazyCallGraph::SCC *C = CWorklist.pop_back_val();
-          // Due to call graph mutations, we may have invalid SCCs or SCCs from
-          // other RefSCCs in the worklist. The invalid ones are dead and the
-          // other RefSCCs should be queued above, so we just need to skip both
-          // scenarios here.
-          if (InvalidSCCSet.count(C)) {
-            LLVM_DEBUG(dbgs() << "Skipping an invalid SCC...\n");
-            continue;
-          }
-          if (&C->getOuterRefSCC() != RC) {
-            LLVM_DEBUG(dbgs()
-                       << "Skipping an SCC that is now part of some other "
-                          "RefSCC...\n");
-            continue;
-          }
-
-          do {
-            // Check that we didn't miss any update scenario.
-            assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!");
-            assert(C->begin() != C->end() && "Cannot have an empty SCC!");
-            assert(&C->getOuterRefSCC() == RC &&
-                   "Processing an SCC in a different RefSCC!");
-
-            UR.UpdatedRC = nullptr;
-            UR.UpdatedC = nullptr;
-
-            // Check the PassInstrumentation's BeforePass callbacks before
-            // running the pass, skip its execution completely if asked to
-            // (callback returns false).
-            if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C))
-              continue;
-
-            PreservedAnalyses PassPA = Pass.run(*C, CGAM, CG, UR);
-
-            if (UR.InvalidatedSCCs.count(C))
-              PI.runAfterPassInvalidated<LazyCallGraph::SCC>(Pass);
-            else
-              PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C);
-
-            // Update the SCC and RefSCC if necessary.
-            C = UR.UpdatedC ? UR.UpdatedC : C;
-            RC = UR.UpdatedRC ? UR.UpdatedRC : RC;
-
-            // If the CGSCC pass wasn't able to provide a valid updated SCC,
-            // the current SCC may simply need to be skipped if invalid.
-            if (UR.InvalidatedSCCs.count(C)) {
-              LLVM_DEBUG(dbgs()
-                         << "Skipping invalidated root or island SCC!\n");
-              break;
-            }
-            // Check that we didn't miss any update scenario.
-            assert(C->begin() != C->end() && "Cannot have an empty SCC!");
-
-            // We handle invalidating the CGSCC analysis manager's information
-            // for the (potentially updated) SCC here. Note that any other SCCs
-            // whose structure has changed should have been invalidated by
-            // whatever was updating the call graph. This SCC gets invalidated
-            // late as it contains the nodes that were actively being
-            // processed.
-            CGAM.invalidate(*C, PassPA);
-
-            // Then intersect the preserved set so that invalidation of module
-            // analyses will eventually occur when the module pass completes.
-            PA.intersect(std::move(PassPA));
-
-            // The pass may have restructured the call graph and refined the
-            // current SCC and/or RefSCC. We need to update our current SCC and
-            // RefSCC pointers to follow these. Also, when the current SCC is
-            // refined, re-run the SCC pass over the newly refined SCC in order
-            // to observe the most precise SCC model available. This inherently
-            // cannot cycle excessively as it only happens when we split SCCs
-            // apart, at most converging on a DAG of single nodes.
-            // FIXME: If we ever start having RefSCC passes, we'll want to
-            // iterate there too.
-            if (UR.UpdatedC)
-              LLVM_DEBUG(dbgs()
-                         << "Re-running SCC passes after a refinement of the "
-                            "current SCC: "
-                         << *UR.UpdatedC << "\n");
-
-            // Note that both `C` and `RC` may at this point refer to deleted,
-            // invalid SCC and RefSCCs respectively. But we will short circuit
-            // the processing when we check them in the loop above.
-          } while (UR.UpdatedC);
-        } while (!CWorklist.empty());
-
-        // We only need to keep internal inlined edge information within
-        // a RefSCC, clear it to save on space and let the next time we visit
-        // any of these functions have a fresh start.
-        InlinedInternalEdges.clear();
-      } while (!RCWorklist.empty());
-    }
-
-    // By definition we preserve the call garph, all SCC analyses, and the
-    // analysis proxies by handling them above and in any nested pass managers.
-    PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>();
-    PA.preserve<LazyCallGraphAnalysis>();
-    PA.preserve<CGSCCAnalysisManagerModuleProxy>();
-    PA.preserve<FunctionAnalysisManagerModuleProxy>();
-    return PA;
-  }
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 
 private:
   CGSCCPassT Pass;
@@ -873,6 +719,210 @@ DevirtSCCRepeatedPass<PassT> createDevirtSCCRepeatedPass(PassT Pass,
   return DevirtSCCRepeatedPass<PassT>(std::move(Pass), MaxIterations);
 }
 
+// Out-of-line implementation details for templates below this point.
+
+template <typename CGSCCPassT>
+PreservedAnalyses
+ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>::run(Module &M,
+                                                   ModuleAnalysisManager &AM) {
+  // Setup the CGSCC analysis manager from its proxy.
+  CGSCCAnalysisManager &CGAM =
+      AM.getResult<CGSCCAnalysisManagerModuleProxy>(M).getManager();
+
+  // Get the call graph for this module.
+  LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
+
+  // We keep worklists to allow us to push more work onto the pass manager as
+  // the passes are run.
+  SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> RCWorklist;
+  SmallPriorityWorklist<LazyCallGraph::SCC *, 1> CWorklist;
+
+  // Keep sets for invalidated SCCs and RefSCCs that should be skipped when
+  // iterating off the worklists.
+  SmallPtrSet<LazyCallGraph::RefSCC *, 4> InvalidRefSCCSet;
+  SmallPtrSet<LazyCallGraph::SCC *, 4> InvalidSCCSet;
+
+  SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4>
+      InlinedInternalEdges;
+
+  CGSCCUpdateResult UR = {
+      RCWorklist, CWorklist, InvalidRefSCCSet,         InvalidSCCSet,
+      nullptr,    nullptr,   PreservedAnalyses::all(), InlinedInternalEdges};
+
+  // Request PassInstrumentation from analysis manager, will use it to run
+  // instrumenting callbacks for the passes later.
+  PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M);
+
+  PreservedAnalyses PA = PreservedAnalyses::all();
+  CG.buildRefSCCs();
+  for (auto RCI = CG.postorder_ref_scc_begin(),
+            RCE = CG.postorder_ref_scc_end();
+       RCI != RCE;) {
+    assert(RCWorklist.empty() &&
+           "Should always start with an empty RefSCC worklist");
+    // The postorder_ref_sccs range we are walking is lazily constructed, so
+    // we only push the first one onto the worklist. The worklist allows us
+    // to capture *new* RefSCCs created during transformations.
+    //
+    // We really want to form RefSCCs lazily because that makes them cheaper
+    // to update as the program is simplified and allows us to have greater
+    // cache locality as forming a RefSCC touches all the parts of all the
+    // functions within that RefSCC.
+    //
+    // We also eagerly increment the iterator to the next position because
+    // the CGSCC passes below may delete the current RefSCC.
+    RCWorklist.insert(&*RCI++);
+
+    do {
+      LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val();
+      if (InvalidRefSCCSet.count(RC)) {
+        LLVM_DEBUG(dbgs() << "Skipping an invalid RefSCC...\n");
+        continue;
+      }
+
+      assert(CWorklist.empty() &&
+             "Should always start with an empty SCC worklist");
+
+      LLVM_DEBUG(dbgs() << "Running an SCC pass across the RefSCC: " << *RC
+                        << "\n");
+
+      // Push the initial SCCs in reverse post-order as we'll pop off the
+      // back and so see this in post-order.
+      for (LazyCallGraph::SCC &C : llvm::reverse(*RC))
+        CWorklist.insert(&C);
+
+      do {
+        LazyCallGraph::SCC *C = CWorklist.pop_back_val();
+        // Due to call graph mutations, we may have invalid SCCs or SCCs from
+        // other RefSCCs in the worklist. The invalid ones are dead and the
+        // other RefSCCs should be queued above, so we just need to skip both
+        // scenarios here.
+        if (InvalidSCCSet.count(C)) {
+          LLVM_DEBUG(dbgs() << "Skipping an invalid SCC...\n");
+          continue;
+        }
+        if (&C->getOuterRefSCC() != RC) {
+          LLVM_DEBUG(dbgs() << "Skipping an SCC that is now part of some other "
+                               "RefSCC...\n");
+          continue;
+        }
+
+        // Ensure we can proxy analysis updates from from the CGSCC analysis
+        // manager into the Function analysis manager by getting a proxy here.
+        // FIXME: This seems like a bit of a hack. We should find a cleaner
+        // or more costructive way to ensure this happens.
+        (void)CGAM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG);
+
+        // Each time we visit a new SCC pulled off the worklist,
+        // a transformation of a child SCC may have also modified this parent
+        // and invalidated analyses. So we invalidate using the update record's
+        // cross-SCC preserved set. This preserved set is intersected by any
+        // CGSCC pass that handles invalidation (primarily pass managers) prior
+        // to marking its SCC as preserved. That lets us track everything that
+        // might need invalidation across SCCs without excessive invalidations
+        // on a single SCC.
+        //
+        // This essentially allows SCC passes to freely invalidate analyses
+        // of any ancestor SCC. If this becomes detrimental to successfully
+        // caching analyses, we could force each SCC pass to manually
+        // invalidate the analyses for any SCCs other than themselves which
+        // are mutated. However, that seems to lose the robustness of the
+        // pass-manager driven invalidation scheme.
+        //
+        // FIXME: This is redundant in one case -- the top of the worklist may
+        // *also* be the same SCC we just ran over (and invalidated for). In
+        // that case, we'll end up doing a redundant invalidation here as
+        // a consequence.
+        CGAM.invalidate(*C, UR.CrossSCCPA);
+
+        do {
+          // Check that we didn't miss any update scenario.
+          assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!");
+          assert(C->begin() != C->end() && "Cannot have an empty SCC!");
+          assert(&C->getOuterRefSCC() == RC &&
+                 "Processing an SCC in a different RefSCC!");
+
+          UR.UpdatedRC = nullptr;
+          UR.UpdatedC = nullptr;
+
+          // Check the PassInstrumentation's BeforePass callbacks before
+          // running the pass, skip its execution completely if asked to
+          // (callback returns false).
+          if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C))
+            continue;
+
+          PreservedAnalyses PassPA = Pass.run(*C, CGAM, CG, UR);
+
+          if (UR.InvalidatedSCCs.count(C))
+            PI.runAfterPassInvalidated<LazyCallGraph::SCC>(Pass);
+          else
+            PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C);
+
+          // Update the SCC and RefSCC if necessary.
+          C = UR.UpdatedC ? UR.UpdatedC : C;
+          RC = UR.UpdatedRC ? UR.UpdatedRC : RC;
+
+          // If the CGSCC pass wasn't able to provide a valid updated SCC,
+          // the current SCC may simply need to be skipped if invalid.
+          if (UR.InvalidatedSCCs.count(C)) {
+            LLVM_DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n");
+            break;
+          }
+          // Check that we didn't miss any update scenario.
+          assert(C->begin() != C->end() && "Cannot have an empty SCC!");
+
+          // We handle invalidating the CGSCC analysis manager's information
+          // for the (potentially updated) SCC here. Note that any other SCCs
+          // whose structure has changed should have been invalidated by
+          // whatever was updating the call graph. This SCC gets invalidated
+          // late as it contains the nodes that were actively being
+          // processed.
+          CGAM.invalidate(*C, PassPA);
+
+          // Then intersect the preserved set so that invalidation of module
+          // analyses will eventually occur when the module pass completes.
+          // Also intersect with the cross-SCC preserved set to capture any
+          // cross-SCC invalidation.
+          UR.CrossSCCPA.intersect(PassPA);
+          PA.intersect(std::move(PassPA));
+
+          // The pass may have restructured the call graph and refined the
+          // current SCC and/or RefSCC. We need to update our current SCC and
+          // RefSCC pointers to follow these. Also, when the current SCC is
+          // refined, re-run the SCC pass over the newly refined SCC in order
+          // to observe the most precise SCC model available. This inherently
+          // cannot cycle excessively as it only happens when we split SCCs
+          // apart, at most converging on a DAG of single nodes.
+          // FIXME: If we ever start having RefSCC passes, we'll want to
+          // iterate there too.
+          if (UR.UpdatedC)
+            LLVM_DEBUG(dbgs()
+                       << "Re-running SCC passes after a refinement of the "
+                          "current SCC: "
+                       << *UR.UpdatedC << "\n");
+
+          // Note that both `C` and `RC` may at this point refer to deleted,
+          // invalid SCC and RefSCCs respectively. But we will short circuit
+          // the processing when we check them in the loop above.
+        } while (UR.UpdatedC);
+      } while (!CWorklist.empty());
+
+      // We only need to keep internal inlined edge information within
+      // a RefSCC, clear it to save on space and let the next time we visit
+      // any of these functions have a fresh start.
+      InlinedInternalEdges.clear();
+    } while (!RCWorklist.empty());
+  }
+
+  // By definition we preserve the call garph, all SCC analyses, and the
+  // analysis proxies by handling them above and in any nested pass managers.
+  PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>();
+  PA.preserve<LazyCallGraphAnalysis>();
+  PA.preserve<CGSCCAnalysisManagerModuleProxy>();
+  PA.preserve<FunctionAnalysisManagerModuleProxy>();
+  return PA;
+}
+
 // Clear out the debug logging macro.
 #undef DEBUG_TYPE
 
diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h
index f109cf2fac4d..7a10183c4d91 100644
--- a/include/llvm/Analysis/CallGraph.h
+++ b/include/llvm/Analysis/CallGraph.h
@@ -1,9 +1,8 @@
 //===- CallGraph.h - Build a Module's call graph ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -48,8 +47,8 @@
 
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/ValueHandle.h"
@@ -230,11 +229,11 @@ public:
   }
 
   /// Adds a function to the list of functions called by this one.
-  void addCalledFunction(CallSite CS, CallGraphNode *M) {
-    assert(!CS.getInstruction() || !CS.getCalledFunction() ||
-           !CS.getCalledFunction()->isIntrinsic() ||
-           !Intrinsic::isLeaf(CS.getCalledFunction()->getIntrinsicID()));
-    CalledFunctions.emplace_back(CS.getInstruction(), M);
+  void addCalledFunction(CallBase *Call, CallGraphNode *M) {
+    assert(!Call || !Call->getCalledFunction() ||
+           !Call->getCalledFunction()->isIntrinsic() ||
+           !Intrinsic::isLeaf(Call->getCalledFunction()->getIntrinsicID()));
+    CalledFunctions.emplace_back(Call, M);
     M->AddRef();
   }
 
@@ -247,7 +246,7 @@ public:
   /// Removes the edge in the node for the specified call site.
   ///
   /// Note that this method takes linear time, so it should be used sparingly.
-  void removeCallEdgeFor(CallSite CS);
+  void removeCallEdgeFor(CallBase &Call);
 
   /// Removes all call edges from this node to the specified callee
   /// function.
@@ -264,7 +263,8 @@ public:
   /// new one.
   ///
   /// Note that this method takes linear time, so it should be used sparingly.
-  void replaceCallEdge(CallSite CS, CallSite NewCS, CallGraphNode *NewNode);
+  void replaceCallEdge(CallBase &Call, CallBase &NewCall,
+                       CallGraphNode *NewNode);
 
 private:
   friend class CallGraph;
diff --git a/include/llvm/Analysis/CallGraphSCCPass.h b/include/llvm/Analysis/CallGraphSCCPass.h
index ace54607634c..1b5b7e2f039e 100644
--- a/include/llvm/Analysis/CallGraphSCCPass.h
+++ b/include/llvm/Analysis/CallGraphSCCPass.h
@@ -1,9 +1,8 @@
 //===- CallGraphSCCPass.h - Pass that operates BU on call graph -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/CallPrinter.h b/include/llvm/Analysis/CallPrinter.h
index 8b697d5aa149..8d4159f3ddc0 100644
--- a/include/llvm/Analysis/CallPrinter.h
+++ b/include/llvm/Analysis/CallPrinter.h
@@ -1,9 +1,8 @@
 //===-- CallPrinter.h - Call graph printer external interface ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/CaptureTracking.h b/include/llvm/Analysis/CaptureTracking.h
index aaaaff9ae252..ca7abd34fea2 100644
--- a/include/llvm/Analysis/CaptureTracking.h
+++ b/include/llvm/Analysis/CaptureTracking.h
@@ -1,9 +1,8 @@
 //===----- llvm/Analysis/CaptureTracking.h - Pointer capture ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/CmpInstAnalysis.h b/include/llvm/Analysis/CmpInstAnalysis.h
index 0e9c6a96b0f4..3d34cd12aea4 100644
--- a/include/llvm/Analysis/CmpInstAnalysis.h
+++ b/include/llvm/Analysis/CmpInstAnalysis.h
@@ -1,9 +1,8 @@
 //===-- CmpInstAnalysis.h - Utils to help fold compare insts ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/CodeMetrics.h b/include/llvm/Analysis/CodeMetrics.h
index 752902238522..1482b66a3080 100644
--- a/include/llvm/Analysis/CodeMetrics.h
+++ b/include/llvm/Analysis/CodeMetrics.h
@@ -1,9 +1,8 @@
 //===- CodeMetrics.h - Code cost measurements -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,7 +16,6 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/IR/CallSite.h"
 
 namespace llvm {
 class AssumptionCache;
@@ -29,14 +27,6 @@ class DataLayout;
 class TargetTransformInfo;
 class Value;
 
-/// Check whether a call will lower to something small.
-///
-/// This tests checks whether this callsite will lower to something
-/// significantly cheaper than a traditional call, often a single
-/// instruction. Note that if isInstructionFree(CS.getInstruction()) would
-/// return true, so will this function.
-bool callIsSmall(ImmutableCallSite CS);
-
 /// Utility to calculate the size and a few similar metrics for a set
 /// of basic blocks.
 struct CodeMetrics {
diff --git a/include/llvm/Analysis/ConstantFolding.h b/include/llvm/Analysis/ConstantFolding.h
index 192c1abddcd2..2385b6f09c40 100644
--- a/include/llvm/Analysis/ConstantFolding.h
+++ b/include/llvm/Analysis/ConstantFolding.h
@@ -1,9 +1,8 @@
 //===-- ConstantFolding.h - Fold instructions into constants ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -23,7 +22,7 @@
 namespace llvm {
 class APInt;
 template <typename T> class ArrayRef;
-class CallSite;
+class CallBase;
 class Constant;
 class ConstantExpr;
 class ConstantVector;
@@ -31,7 +30,6 @@ class DataLayout;
 class Function;
 class GlobalValue;
 class Instruction;
-class ImmutableCallSite;
 class TargetLibraryInfo;
 class Type;
 
@@ -73,6 +71,12 @@ ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS,
                                 Constant *RHS, const DataLayout &DL,
                                 const TargetLibraryInfo *TLI = nullptr);
 
+/// Attempt to constant fold a unary operation with the specified
+/// operand. If it fails, it returns a constant expression of the specified
+/// operands.
+Constant *ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op,
+                                     const DataLayout &DL);
+
 /// Attempt to constant fold a binary operation with the specified
 /// operands.  If it fails, it returns a constant expression of the specified
 /// operands.
@@ -139,11 +143,11 @@ Constant *ConstantFoldLoadThroughGEPIndices(Constant *C,
 
 /// canConstantFoldCallTo - Return true if its even possible to fold a call to
 /// the specified function.
-bool canConstantFoldCallTo(ImmutableCallSite CS, const Function *F);
+bool canConstantFoldCallTo(const CallBase *Call, const Function *F);
 
 /// ConstantFoldCall - Attempt to constant fold a call to the specified function
 /// with the specified arguments, returning null if unsuccessful.
-Constant *ConstantFoldCall(ImmutableCallSite CS, Function *F,
+Constant *ConstantFoldCall(const CallBase *Call, Function *F,
                            ArrayRef<Constant *> Operands,
                            const TargetLibraryInfo *TLI = nullptr);
 
@@ -155,7 +159,7 @@ Constant *ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
 
 /// Check whether the given call has no side-effects.
 /// Specifically checks for math routimes which sometimes set errno.
-bool isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI);
+bool isMathLibCallNoop(const CallBase *Call, const TargetLibraryInfo *TLI);
 }
 
 #endif
diff --git a/include/llvm/Analysis/DOTGraphTraitsPass.h b/include/llvm/Analysis/DOTGraphTraitsPass.h
index b7447a0547d5..0410a3314659 100644
--- a/include/llvm/Analysis/DOTGraphTraitsPass.h
+++ b/include/llvm/Analysis/DOTGraphTraitsPass.h
@@ -1,9 +1,8 @@
 //===-- DOTGraphTraitsPass.h - Print/View dotty graphs-----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/DemandedBits.h b/include/llvm/Analysis/DemandedBits.h
index 4c4e3f6c99e7..04db3eb57c18 100644
--- a/include/llvm/Analysis/DemandedBits.h
+++ b/include/llvm/Analysis/DemandedBits.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/DemandedBits.h - Determine demanded bits ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/DependenceAnalysis.h b/include/llvm/Analysis/DependenceAnalysis.h
index 69d0e2c1513e..997013a5fc8e 100644
--- a/include/llvm/Analysis/DependenceAnalysis.h
+++ b/include/llvm/Analysis/DependenceAnalysis.h
@@ -1,9 +1,8 @@
 //===-- llvm/Analysis/DependenceAnalysis.h -------------------- -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -275,6 +274,10 @@ template <typename T> class ArrayRef;
                    LoopInfo *LI)
         : AA(AA), SE(SE), LI(LI), F(F) {}
 
+    /// Handle transitive invalidation when the cached analysis results go away.
+    bool invalidate(Function &F, const PreservedAnalyses &PA,
+                    FunctionAnalysisManager::Invalidator &Inv);
+
     /// depends - Tests for a dependence between the Src and Dst instructions.
     /// Returns NULL if no dependence; otherwise, returns a Dependence (or a
     /// FullDependence) with as much information as can be gleaned.
diff --git a/include/llvm/Analysis/DivergenceAnalysis.h b/include/llvm/Analysis/DivergenceAnalysis.h
index d834862db095..3cfb9d13df94 100644
--- a/include/llvm/Analysis/DivergenceAnalysis.h
+++ b/include/llvm/Analysis/DivergenceAnalysis.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/DivergenceAnalysis.h - Divergence Analysis -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/DomPrinter.h b/include/llvm/Analysis/DomPrinter.h
index 0ed28994995a..a177f877b295 100644
--- a/include/llvm/Analysis/DomPrinter.h
+++ b/include/llvm/Analysis/DomPrinter.h
@@ -1,9 +1,8 @@
 //===-- DomPrinter.h - Dom printer external interface ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/DomTreeUpdater.h b/include/llvm/Analysis/DomTreeUpdater.h
new file mode 100644
index 000000000000..5ccce2e064cc
--- /dev/null
+++ b/include/llvm/Analysis/DomTreeUpdater.h
@@ -0,0 +1,309 @@
+//===- DomTreeUpdater.h - DomTree/Post DomTree Updater ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DomTreeUpdater class, which provides a uniform way to
+// update dominator tree related data structures.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DOMTREEUPDATER_H
+#define LLVM_ANALYSIS_DOMTREEUPDATER_H
+
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/GenericDomTree.h"
+#include <functional>
+#include <vector>
+
+namespace llvm {
+class DomTreeUpdater {
+public:
+  enum class UpdateStrategy : unsigned char { Eager = 0, Lazy = 1 };
+
+  explicit DomTreeUpdater(UpdateStrategy Strategy_) : Strategy(Strategy_) {}
+  DomTreeUpdater(DominatorTree &DT_, UpdateStrategy Strategy_)
+      : DT(&DT_), Strategy(Strategy_) {}
+  DomTreeUpdater(DominatorTree *DT_, UpdateStrategy Strategy_)
+      : DT(DT_), Strategy(Strategy_) {}
+  DomTreeUpdater(PostDominatorTree &PDT_, UpdateStrategy Strategy_)
+      : PDT(&PDT_), Strategy(Strategy_) {}
+  DomTreeUpdater(PostDominatorTree *PDT_, UpdateStrategy Strategy_)
+      : PDT(PDT_), Strategy(Strategy_) {}
+  DomTreeUpdater(DominatorTree &DT_, PostDominatorTree &PDT_,
+                 UpdateStrategy Strategy_)
+      : DT(&DT_), PDT(&PDT_), Strategy(Strategy_) {}
+  DomTreeUpdater(DominatorTree *DT_, PostDominatorTree *PDT_,
+                 UpdateStrategy Strategy_)
+      : DT(DT_), PDT(PDT_), Strategy(Strategy_) {}
+
+  ~DomTreeUpdater() { flush(); }
+
+  /// Returns true if the current strategy is Lazy.
+  bool isLazy() const { return Strategy == UpdateStrategy::Lazy; };
+
+  /// Returns true if the current strategy is Eager.
+  bool isEager() const { return Strategy == UpdateStrategy::Eager; };
+
+  /// Returns true if it holds a DominatorTree.
+  bool hasDomTree() const { return DT != nullptr; }
+
+  /// Returns true if it holds a PostDominatorTree.
+  bool hasPostDomTree() const { return PDT != nullptr; }
+
+  /// Returns true if there is BasicBlock awaiting deletion.
+  /// The deletion will only happen until a flush event and
+  /// all available trees are up-to-date.
+  /// Returns false under Eager UpdateStrategy.
+  bool hasPendingDeletedBB() const { return !DeletedBBs.empty(); }
+
+  /// Returns true if DelBB is awaiting deletion.
+  /// Returns false under Eager UpdateStrategy.
+  bool isBBPendingDeletion(BasicBlock *DelBB) const;
+
+  /// Returns true if either of DT or PDT is valid and the tree has at
+  /// least one update pending. If DT or PDT is nullptr it is treated
+  /// as having no pending updates. This function does not check
+  /// whether there is BasicBlock awaiting deletion.
+  /// Returns false under Eager UpdateStrategy.
+  bool hasPendingUpdates() const;
+
+  /// Returns true if there are DominatorTree updates queued.
+  /// Returns false under Eager UpdateStrategy or DT is nullptr.
+  bool hasPendingDomTreeUpdates() const;
+
+  /// Returns true if there are PostDominatorTree updates queued.
+  /// Returns false under Eager UpdateStrategy or PDT is nullptr.
+  bool hasPendingPostDomTreeUpdates() const;
+
+  ///@{
+  /// \name Mutation APIs
+  ///
+  /// These methods provide APIs for submitting updates to the DominatorTree and
+  /// the PostDominatorTree.
+  ///
+  /// Note: There are two strategies to update the DominatorTree and the
+  /// PostDominatorTree:
+  /// 1. Eager UpdateStrategy: Updates are submitted and then flushed
+  /// immediately.
+  /// 2. Lazy UpdateStrategy: Updates are submitted but only flushed when you
+  /// explicitly call Flush APIs. It is recommended to use this update strategy
+  /// when you submit a bunch of updates multiple times which can then
+  /// add up to a large number of updates between two queries on the
+  /// DominatorTree. The incremental updater can reschedule the updates or
+  /// decide to recalculate the dominator tree in order to speedup the updating
+  /// process depending on the number of updates.
+  ///
+  /// Although GenericDomTree provides several update primitives,
+  /// it is not encouraged to use these APIs directly.
+
+  /// Submit updates to all available trees.
+  /// The Eager Strategy flushes updates immediately while the Lazy Strategy
+  /// queues the updates.
+  ///
+  /// Note: The "existence" of an edge in a CFG refers to the CFG which DTU is
+  /// in sync with + all updates before that single update.
+  ///
+  /// CAUTION!
+  /// 1. It is required for the state of the LLVM IR to be updated
+  /// *before* submitting the updates because the internal update routine will
+  /// analyze the current state of the CFG to determine whether an update
+  /// is valid.
+  /// 2. It is illegal to submit any update that has already been submitted,
+  /// i.e., you are supposed not to insert an existent edge or delete a
+  /// nonexistent edge.
+  void applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates);
+
+  /// Submit updates to all available trees. It will also
+  /// 1. discard duplicated updates,
+  /// 2. remove invalid updates. (Invalid updates means deletion of an edge that
+  /// still exists or insertion of an edge that does not exist.)
+  /// The Eager Strategy flushes updates immediately while the Lazy Strategy
+  /// queues the updates.
+  ///
+  /// Note: The "existence" of an edge in a CFG refers to the CFG which DTU is
+  /// in sync with + all updates before that single update.
+  ///
+  /// CAUTION!
+  /// 1. It is required for the state of the LLVM IR to be updated
+  /// *before* submitting the updates because the internal update routine will
+  /// analyze the current state of the CFG to determine whether an update
+  /// is valid.
+  /// 2. It is illegal to submit any update that has already been submitted,
+  /// i.e., you are supposed not to insert an existent edge or delete a
+  /// nonexistent edge.
+  /// 3. It is only legal to submit updates to an edge in the order CFG changes
+  /// are made. The order you submit updates on different edges is not
+  /// restricted.
+  void applyUpdatesPermissive(ArrayRef<DominatorTree::UpdateType> Updates);
+
+  /// Notify DTU that the entry block was replaced.
+  /// Recalculate all available trees and flush all BasicBlocks
+  /// awaiting deletion immediately.
+  void recalculate(Function &F);
+
+  /// \deprecated { Submit an edge insertion to all available trees. The Eager
+  /// Strategy flushes this update immediately while the Lazy Strategy queues
+  /// the update. An internal function checks if the edge exists in the CFG in
+  /// DEBUG mode. CAUTION! This function has to be called *after* making the
+  /// update on the actual CFG. It is illegal to submit any update that has
+  /// already been applied. }
+  LLVM_ATTRIBUTE_DEPRECATED(void insertEdge(BasicBlock *From, BasicBlock *To),
+                            "Use applyUpdates() instead.");
+
+  /// \deprecated {Submit an edge insertion to all available trees.
+  /// Under either Strategy, an invalid update will be discard silently.
+  /// Invalid update means inserting an edge that does not exist in the CFG.
+  /// The Eager Strategy flushes this update immediately while the Lazy Strategy
+  /// queues the update. It is only recommended to use this method when you
+  /// want to discard an invalid update.
+  /// CAUTION! It is illegal to submit any update that has already been
+  /// submitted. }
+  LLVM_ATTRIBUTE_DEPRECATED(void insertEdgeRelaxed(BasicBlock *From,
+                                                   BasicBlock *To),
+                            "Use applyUpdatesPermissive() instead.");
+
+  /// \deprecated { Submit an edge deletion to all available trees. The Eager
+  /// Strategy flushes this update immediately while the Lazy Strategy queues
+  /// the update. An internal function checks if the edge doesn't exist in the
+  /// CFG in DEBUG mode.
+  /// CAUTION! This function has to be called *after* making the update on the
+  /// actual CFG. It is illegal to submit any update that has already been
+  /// submitted. }
+  LLVM_ATTRIBUTE_DEPRECATED(void deleteEdge(BasicBlock *From, BasicBlock *To),
+                            "Use applyUpdates() instead.");
+
+  /// \deprecated { Submit an edge deletion to all available trees.
+  /// Under either Strategy, an invalid update will be discard silently.
+  /// Invalid update means deleting an edge that exists in the CFG.
+  /// The Eager Strategy flushes this update immediately while the Lazy Strategy
+  /// queues the update. It is only recommended to use this method when you
+  /// want to discard an invalid update.
+  /// CAUTION! It is illegal to submit any update that has already been
+  /// submitted. }
+  LLVM_ATTRIBUTE_DEPRECATED(void deleteEdgeRelaxed(BasicBlock *From,
+                                                   BasicBlock *To),
+                            "Use applyUpdatesPermissive() instead.");
+
+  /// Delete DelBB. DelBB will be removed from its Parent and
+  /// erased from available trees if it exists and finally get deleted.
+  /// Under Eager UpdateStrategy, DelBB will be processed immediately.
+  /// Under Lazy UpdateStrategy, DelBB will be queued until a flush event and
+  /// all available trees are up-to-date. Assert if any instruction of DelBB is
+  /// modified while awaiting deletion. When both DT and PDT are nullptrs, DelBB
+  /// will be queued until flush() is called.
+  void deleteBB(BasicBlock *DelBB);
+
+  /// Delete DelBB. DelBB will be removed from its Parent and
+  /// erased from available trees if it exists. Then the callback will
+  /// be called. Finally, DelBB will be deleted.
+  /// Under Eager UpdateStrategy, DelBB will be processed immediately.
+  /// Under Lazy UpdateStrategy, DelBB will be queued until a flush event and
+  /// all available trees are up-to-date. Assert if any instruction of DelBB is
+  /// modified while awaiting deletion. Multiple callbacks can be queued for one
+  /// DelBB under Lazy UpdateStrategy.
+  void callbackDeleteBB(BasicBlock *DelBB,
+                        std::function<void(BasicBlock *)> Callback);
+
+  ///@}
+
+  ///@{
+  /// \name Flush APIs
+  ///
+  /// CAUTION! By the moment these flush APIs are called, the current CFG needs
+  /// to be the same as the CFG which DTU is in sync with + all updates
+  /// submitted.
+
+  /// Flush DomTree updates and return DomTree.
+  /// It flushes Deleted BBs if both trees are up-to-date.
+  /// It must only be called when it has a DomTree.
+  DominatorTree &getDomTree();
+
+  /// Flush PostDomTree updates and return PostDomTree.
+  /// It flushes Deleted BBs if both trees are up-to-date.
+  /// It must only be called when it has a PostDomTree.
+  PostDominatorTree &getPostDomTree();
+
+  /// Apply all pending updates to available trees and flush all BasicBlocks
+  /// awaiting deletion.
+
+  void flush();
+
+  ///@}
+
+  /// Debug method to help view the internal state of this class.
+  LLVM_DUMP_METHOD void dump() const;
+
+private:
+  class CallBackOnDeletion final : public CallbackVH {
+  public:
+    CallBackOnDeletion(BasicBlock *V,
+                       std::function<void(BasicBlock *)> Callback)
+        : CallbackVH(V), DelBB(V), Callback_(Callback) {}
+
+  private:
+    BasicBlock *DelBB = nullptr;
+    std::function<void(BasicBlock *)> Callback_;
+
+    void deleted() override {
+      Callback_(DelBB);
+      CallbackVH::deleted();
+    }
+  };
+
+  SmallVector<DominatorTree::UpdateType, 16> PendUpdates;
+  size_t PendDTUpdateIndex = 0;
+  size_t PendPDTUpdateIndex = 0;
+  DominatorTree *DT = nullptr;
+  PostDominatorTree *PDT = nullptr;
+  const UpdateStrategy Strategy;
+  SmallPtrSet<BasicBlock *, 8> DeletedBBs;
+  std::vector<CallBackOnDeletion> Callbacks;
+  bool IsRecalculatingDomTree = false;
+  bool IsRecalculatingPostDomTree = false;
+
+  /// First remove all the instructions of DelBB and then make sure DelBB has a
+  /// valid terminator instruction which is necessary to have when DelBB still
+  /// has to be inside of its parent Function while awaiting deletion under Lazy
+  /// UpdateStrategy to prevent other routines from asserting the state of the
+  /// IR is inconsistent. Assert if DelBB is nullptr or has predecessors.
+  void validateDeleteBB(BasicBlock *DelBB);
+
+  /// Returns true if at least one BasicBlock is deleted.
+  bool forceFlushDeletedBB();
+
+  /// Helper function to apply all pending DomTree updates.
+  void applyDomTreeUpdates();
+
+  /// Helper function to apply all pending PostDomTree updates.
+  void applyPostDomTreeUpdates();
+
+  /// Helper function to flush deleted BasicBlocks if all available
+  /// trees are up-to-date.
+  void tryFlushDeletedBB();
+
+  /// Drop all updates applied by all available trees and delete BasicBlocks if
+  /// all available trees are up-to-date.
+  void dropOutOfDateUpdates();
+
+  /// Erase Basic Block node that has been unlinked from Function
+  /// in the DomTree and PostDomTree.
+  void eraseDelBBNode(BasicBlock *DelBB);
+
+  /// Returns true if the update appears in the LLVM IR.
+  /// It is used to check whether an update is valid in
+  /// insertEdge/deleteEdge or is unnecessary in the batch update.
+  bool isUpdateValid(DominatorTree::UpdateType Update) const;
+
+  /// Returns true if the update is self dominance.
+  bool isSelfDominance(DominatorTree::UpdateType Update) const;
+};
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_DOMTREEUPDATER_H
diff --git a/include/llvm/Analysis/DominanceFrontier.h b/include/llvm/Analysis/DominanceFrontier.h
index d94c420d7177..c0bf30e162dd 100644
--- a/include/llvm/Analysis/DominanceFrontier.h
+++ b/include/llvm/Analysis/DominanceFrontier.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/DominanceFrontier.h - Dominator Frontiers --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/DominanceFrontierImpl.h b/include/llvm/Analysis/DominanceFrontierImpl.h
index 99224c0bf131..aa764be93b91 100644
--- a/include/llvm/Analysis/DominanceFrontierImpl.h
+++ b/include/llvm/Analysis/DominanceFrontierImpl.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/DominanceFrontier.h - Dominator Frontiers --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/EHPersonalities.h b/include/llvm/Analysis/EHPersonalities.h
index fe0e65b828ca..d89aa11617b5 100644
--- a/include/llvm/Analysis/EHPersonalities.h
+++ b/include/llvm/Analysis/EHPersonalities.h
@@ -1,9 +1,8 @@
 //===- EHPersonalities.h - Compute EH-related information -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Analysis/GlobalsModRef.h b/include/llvm/Analysis/GlobalsModRef.h
index 3a664ca6ef50..d3fcfc2d41ab 100644
--- a/include/llvm/Analysis/GlobalsModRef.h
+++ b/include/llvm/Analysis/GlobalsModRef.h
@@ -1,9 +1,8 @@
 //===- GlobalsModRef.h - Simple Mod/Ref AA for Globals ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -85,10 +84,12 @@ public:
   //------------------------------------------------
   // Implement the AliasAnalysis API
   //
-  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+                    AAQueryInfo &AAQI);
 
   using AAResultBase::getModRefInfo;
-  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
+  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+                           AAQueryInfo &AAQI);
 
   /// getModRefBehavior - Return the behavior of the specified function if
   /// called from the specified call site.  The call site may be null in which
@@ -114,7 +115,7 @@ private:
 
   bool isNonEscapingGlobalNoAlias(const GlobalValue *GV, const Value *V);
   ModRefInfo getModRefInfoForArgument(const CallBase *Call,
-                                      const GlobalValue *GV);
+                                      const GlobalValue *GV, AAQueryInfo &AAQI);
 };
 
 /// Analysis pass providing a never-invalidated alias analysis result.
diff --git a/include/llvm/Analysis/GuardUtils.h b/include/llvm/Analysis/GuardUtils.h
index 3b151eeafc81..41e7b7c06c75 100644
--- a/include/llvm/Analysis/GuardUtils.h
+++ b/include/llvm/Analysis/GuardUtils.h
@@ -1,9 +1,8 @@
 //===-- GuardUtils.h - Utils for work with guards ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Utils that are used to perform analyzes related to guards and their
@@ -15,12 +14,31 @@
 
 namespace llvm {
 
+class BasicBlock;
 class User;
+class Value;
 
-/// Returns true iff \p U has semantics of a guard.
+/// Returns true iff \p U has semantics of a guard expressed in a form of call
+/// of llvm.experimental.guard intrinsic.
 bool isGuard(const User *U);
 
+/// Returns true iff \p U has semantics of a guard expressed in a form of a
+/// widenable conditional branch to deopt block.
+bool isGuardAsWidenableBranch(const User *U);
+
+/// If U is widenable branch looking like:
+///   %cond = ...
+///   %wc = call i1 @llvm.experimental.widenable.condition()
+///   %branch_cond = and i1 %cond, %wc
+///   br i1 %branch_cond, label %if_true_bb, label %if_false_bb ; <--- U
+/// The function returns true, and the values %cond and %wc and blocks
+/// %if_true_bb, if_false_bb are returned in
+/// the parameters (Condition, WidenableCondition, IfTrueBB and IfFalseFF)
+/// respectively. If \p U does not match this pattern, return false.
+bool parseWidenableBranch(const User *U, Value *&Condition,
+                          Value *&WidenableCondition, BasicBlock *&IfTrueBB,
+                          BasicBlock *&IfFalseBB);
+
 } // llvm
 
 #endif // LLVM_ANALYSIS_GUARDUTILS_H
-
diff --git a/include/llvm/Analysis/IVDescriptors.h b/include/llvm/Analysis/IVDescriptors.h
index 64b4ae23cc59..7be1fd3f5788 100644
--- a/include/llvm/Analysis/IVDescriptors.h
+++ b/include/llvm/Analysis/IVDescriptors.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/IVDescriptors.h - IndVar Descriptors -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -90,10 +89,12 @@ public:
   RecurrenceDescriptor() = default;
 
   RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K,
-                       MinMaxRecurrenceKind MK, Instruction *UAI, Type *RT,
-                       bool Signed, SmallPtrSetImpl<Instruction *> &CI)
-      : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK),
-        UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) {
+                       FastMathFlags FMF, MinMaxRecurrenceKind MK,
+                       Instruction *UAI, Type *RT, bool Signed,
+                       SmallPtrSetImpl<Instruction *> &CI)
+      : StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF),
+        MinMaxKind(MK), UnsafeAlgebraInst(UAI), RecurrenceType(RT),
+        IsSigned(Signed) {
     CastInsts.insert(CI.begin(), CI.end());
   }
 
@@ -199,6 +200,8 @@ public:
 
   MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; }
 
+  FastMathFlags getFastMathFlags() { return FMF; }
+
   TrackingVH<Value> getRecurrenceStartValue() { return StartValue; }
 
   Instruction *getLoopExitInstr() { return LoopExitInstr; }
@@ -238,6 +241,9 @@ private:
   Instruction *LoopExitInstr = nullptr;
   // The kind of the recurrence.
   RecurrenceKind Kind = RK_NoRecurrence;
+  // The fast-math flags on the recurrent instructions.  We propagate these
+  // fast-math flags into the vectorized FP instructions we generate.
+  FastMathFlags FMF;
   // If this a min/max recurrence the kind of recurrence.
   MinMaxRecurrenceKind MinMaxKind = MRK_Invalid;
   // First occurrence of unasfe algebra in the PHI's use-chain.
@@ -309,12 +315,16 @@ public:
   /// not have the "fast-math" property. Such operation requires a relaxed FP
   /// mode.
   bool hasUnsafeAlgebra() {
-    return InductionBinOp && !cast<FPMathOperator>(InductionBinOp)->isFast();
+    return (IK == IK_FpInduction) && InductionBinOp &&
+           !cast<FPMathOperator>(InductionBinOp)->isFast();
   }
 
   /// Returns induction operator that does not have "fast-math" property
   /// and requires FP unsafe mode.
   Instruction *getUnsafeAlgebraInst() {
+    if (IK != IK_FpInduction)
+      return nullptr;
+
     if (!InductionBinOp || cast<FPMathOperator>(InductionBinOp)->isFast())
       return nullptr;
     return InductionBinOp;
diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h
index 035b974c5c1d..f8ea3bcca229 100644
--- a/include/llvm/Analysis/IVUsers.h
+++ b/include/llvm/Analysis/IVUsers.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/IVUsers.h - Induction Variable Users -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/IndirectCallPromotionAnalysis.h b/include/llvm/Analysis/IndirectCallPromotionAnalysis.h
index be3a28424cf5..8a05e913a910 100644
--- a/include/llvm/Analysis/IndirectCallPromotionAnalysis.h
+++ b/include/llvm/Analysis/IndirectCallPromotionAnalysis.h
@@ -1,9 +1,8 @@
 //===- IndirectCallPromotionAnalysis.h - Indirect call analysis -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Analysis/IndirectCallVisitor.h b/include/llvm/Analysis/IndirectCallVisitor.h
index d00cf63368f1..1d1f3f4cc5c0 100644
--- a/include/llvm/Analysis/IndirectCallVisitor.h
+++ b/include/llvm/Analysis/IndirectCallVisitor.h
@@ -1,9 +1,8 @@
 //===-- IndirectCallVisitor.h - indirect call visitor ---------------------===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index 4c270354b0c4..611c9de24e47 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -1,9 +1,8 @@
 //===- InlineCost.h - Cost analysis for inliner -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -23,7 +22,7 @@
 namespace llvm {
 class AssumptionCacheTracker;
 class BlockFrequencyInfo;
-class CallSite;
+class CallBase;
 class DataLayout;
 class Function;
 class ProfileSummaryInfo;
@@ -68,10 +67,10 @@ class InlineCost {
   };
 
   /// The estimated cost of inlining this callsite.
-  const int Cost;
+  int Cost;
 
   /// The adjusted threshold against which this cost was computed.
-  const int Threshold;
+  int Threshold;
 
   /// Must be set for Always and Never instances.
   const char *Reason = nullptr;
@@ -200,7 +199,7 @@ InlineParams getInlineParams(unsigned OptLevel, unsigned SizeOptLevel);
 
 /// Return the cost associated with a callsite, including parameter passing
 /// and the call/return instruction.
-int getCallsiteCost(CallSite CS, const DataLayout &DL);
+int getCallsiteCost(CallBase &Call, const DataLayout &DL);
 
 /// Get an InlineCost object representing the cost of inlining this
 /// callsite.
@@ -214,7 +213,7 @@ int getCallsiteCost(CallSite CS, const DataLayout &DL);
 /// Also note that calling this function *dynamically* computes the cost of
 /// inlining the callsite. It is an expensive, heavyweight call.
 InlineCost getInlineCost(
-    CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
+    CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
     Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
     ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE = nullptr);
@@ -225,14 +224,14 @@ InlineCost getInlineCost(
 /// parameter in all other respects.
 //
 InlineCost
-getInlineCost(CallSite CS, Function *Callee, const InlineParams &Params,
+getInlineCost(CallBase &Call, Function *Callee, const InlineParams &Params,
               TargetTransformInfo &CalleeTTI,
               std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
               Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
               ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE);
 
 /// Minimal filter to detect invalid constructs for inlining.
-bool isInlineViable(Function &Callee);
+InlineResult isInlineViable(Function &Callee);
 }
 
 #endif
diff --git a/include/llvm/Analysis/InstructionPrecedenceTracking.h b/include/llvm/Analysis/InstructionPrecedenceTracking.h
index 073e6ec3b7f6..3c3981066a49 100644
--- a/include/llvm/Analysis/InstructionPrecedenceTracking.h
+++ b/include/llvm/Analysis/InstructionPrecedenceTracking.h
@@ -1,9 +1,8 @@
 //===-- InstructionPrecedenceTracking.h -------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Implements a class that is able to define some instructions as "special"
@@ -93,7 +92,7 @@ public:
 /// example, throwing calls and guards do not always do this. If we need to know
 /// for sure that some instruction is guaranteed to execute if the given block
 /// is reached, then we need to make sure that there is no implicit control flow
-/// instruction (ICFI) preceeding it. For example, this check is required if we
+/// instruction (ICFI) preceding it. For example, this check is required if we
 /// perform PRE moving non-speculable instruction to other place.
 class ImplicitControlFlowTracking : public InstructionPrecedenceTracking {
 public:
diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h
index 6662e91037e1..054ffca7215e 100644
--- a/include/llvm/Analysis/InstructionSimplify.h
+++ b/include/llvm/Analysis/InstructionSimplify.h
@@ -1,9 +1,8 @@
 //===-- InstructionSimplify.h - Fold instrs into simpler forms --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -41,8 +40,8 @@ class Function;
 template <typename T, typename... TArgs> class AnalysisManager;
 template <class T> class ArrayRef;
 class AssumptionCache;
+class CallBase;
 class DominatorTree;
-class ImmutableCallSite;
 class DataLayout;
 class FastMathFlags;
 struct LoopStandardAnalysisResults;
@@ -118,6 +117,10 @@ struct SimplifyQuery {
 // deprecated.
 // Please use the SimplifyQuery versions in new code.
 
+/// Given operand for an FNeg, fold the result or return null.
+Value *SimplifyFNegInst(Value *Op, FastMathFlags FMF,
+                        const SimplifyQuery &Q);
+
 /// Given operands for an Add, fold the result or return null.
 Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
                        const SimplifyQuery &Q);
@@ -228,6 +231,15 @@ Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
 Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                        const SimplifyQuery &Q);
 
+/// Given operand for a UnaryOperator, fold the result or return null.
+Value *SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q);
+
+/// Given operand for an FP UnaryOperator, fold the result or return null.
+/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the
+/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp.
+Value *SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
+                      const SimplifyQuery &Q);
+
 /// Given operands for a BinaryOperator, fold the result or return null.
 Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
                      const SimplifyQuery &Q);
@@ -239,16 +251,7 @@ Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
                        FastMathFlags FMF, const SimplifyQuery &Q);
 
 /// Given a callsite, fold the result or return null.
-Value *SimplifyCall(ImmutableCallSite CS, const SimplifyQuery &Q);
-
-/// Given a function and iterators over arguments, fold the result or return
-/// null.
-Value *SimplifyCall(ImmutableCallSite CS, Value *V, User::op_iterator ArgBegin,
-                    User::op_iterator ArgEnd, const SimplifyQuery &Q);
-
-/// Given a function and set of arguments, fold the result or return null.
-Value *SimplifyCall(ImmutableCallSite CS, Value *V, ArrayRef<Value *> Args,
-                    const SimplifyQuery &Q);
+Value *SimplifyCall(CallBase *Call, const SimplifyQuery &Q);
 
 /// See if we can compute a simplified version of this instruction. If not,
 /// return null.
diff --git a/include/llvm/Analysis/Interval.h b/include/llvm/Analysis/Interval.h
index f3714dddedd5..5c9a4535bc7f 100644
--- a/include/llvm/Analysis/Interval.h
+++ b/include/llvm/Analysis/Interval.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/Interval.h - Interval Class Declaration ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/IntervalIterator.h b/include/llvm/Analysis/IntervalIterator.h
index 6ffcae592e98..efaaf9715b3d 100644
--- a/include/llvm/Analysis/IntervalIterator.h
+++ b/include/llvm/Analysis/IntervalIterator.h
@@ -1,9 +1,8 @@
 //===- IntervalIterator.h - Interval Iterator Declaration -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/IntervalPartition.h b/include/llvm/Analysis/IntervalPartition.h
index 50335165711f..5b127c25a2b8 100644
--- a/include/llvm/Analysis/IntervalPartition.h
+++ b/include/llvm/Analysis/IntervalPartition.h
@@ -1,9 +1,8 @@
 //===- IntervalPartition.h - Interval partition Calculation -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/IteratedDominanceFrontier.h b/include/llvm/Analysis/IteratedDominanceFrontier.h
index 3083db75b81c..7c826780c318 100644
--- a/include/llvm/Analysis/IteratedDominanceFrontier.h
+++ b/include/llvm/Analysis/IteratedDominanceFrontier.h
@@ -1,101 +1,89 @@
 //===- IteratedDominanceFrontier.h - Calculate IDF --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// Compute iterated dominance frontiers using a linear time algorithm.
-///
-/// The algorithm used here is based on:
-///
-///   Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
-///   In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
-///   Programming Languages
-///   POPL '95. ACM, New York, NY, 62-73.
-///
-/// It has been modified to not explicitly use the DJ graph data structure and
-/// to directly compute pruned SSA using per-variable liveness information.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ANALYSIS_IDF_H
 #define LLVM_ANALYSIS_IDF_H
 
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFGDiff.h"
-#include "llvm/IR/Dominators.h"
+#include "llvm/Support/GenericIteratedDominanceFrontier.h"
 
 namespace llvm {
 
-/// Determine the iterated dominance frontier, given a set of defining
-/// blocks, and optionally, a set of live-in blocks.
-///
-/// In turn, the results can be used to place phi nodes.
-///
-/// This algorithm is a linear time computation of Iterated Dominance Frontiers,
-/// pruned using the live-in set.
-/// By default, liveness is not used to prune the IDF computation.
-/// The template parameters should be either BasicBlock* or Inverse<BasicBlock
-/// *>, depending on if you want the forward or reverse IDF.
-template <class NodeTy, bool IsPostDom>
-class IDFCalculator {
- public:
-   IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT)
-       : DT(DT), GD(nullptr), useLiveIn(false) {}
-
-   IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT,
-                 const GraphDiff<BasicBlock *, IsPostDom> *GD)
-       : DT(DT), GD(GD), useLiveIn(false) {}
-
-   /// Give the IDF calculator the set of blocks in which the value is
-   /// defined.  This is equivalent to the set of starting blocks it should be
-   /// calculating the IDF for (though later gets pruned based on liveness).
-   ///
-   /// Note: This set *must* live for the entire lifetime of the IDF calculator.
-   void setDefiningBlocks(const SmallPtrSetImpl<BasicBlock *> &Blocks) {
-     DefBlocks = &Blocks;
-   }
-
-  /// Give the IDF calculator the set of blocks in which the value is
-  /// live on entry to the block.   This is used to prune the IDF calculation to
-  /// not include blocks where any phi insertion would be dead.
-  ///
-  /// Note: This set *must* live for the entire lifetime of the IDF calculator.
-
-  void setLiveInBlocks(const SmallPtrSetImpl<BasicBlock *> &Blocks) {
-    LiveInBlocks = &Blocks;
-    useLiveIn = true;
-  }
+class BasicBlock;
 
-  /// Reset the live-in block set to be empty, and tell the IDF
-  /// calculator to not use liveness anymore.
-  void resetLiveInBlocks() {
-    LiveInBlocks = nullptr;
-    useLiveIn = false;
+namespace IDFCalculatorDetail {
+
+/// Specialization for BasicBlock for the optional use of GraphDiff.
+template <bool IsPostDom> struct ChildrenGetterTy<BasicBlock, IsPostDom> {
+  using NodeRef = BasicBlock *;
+  using ChildrenTy = SmallVector<BasicBlock *, 8>;
+
+  ChildrenGetterTy() = default;
+  ChildrenGetterTy(const GraphDiff<BasicBlock *, IsPostDom> *GD) : GD(GD) {
+    assert(GD);
   }
 
-  /// Calculate iterated dominance frontiers
-  ///
-  /// This uses the linear-time phi algorithm based on DJ-graphs mentioned in
-  /// the file-level comment.  It performs DF->IDF pruning using the live-in
-  /// set, to avoid computing the IDF for blocks where an inserted PHI node
-  /// would be dead.
-  void calculate(SmallVectorImpl<BasicBlock *> &IDFBlocks);
-
-private:
- DominatorTreeBase<BasicBlock, IsPostDom> &DT;
- const GraphDiff<BasicBlock *, IsPostDom> *GD;
- bool useLiveIn;
- const SmallPtrSetImpl<BasicBlock *> *LiveInBlocks;
- const SmallPtrSetImpl<BasicBlock *> *DefBlocks;
+  ChildrenTy get(const NodeRef &N);
+
+  const GraphDiff<BasicBlock *, IsPostDom> *GD = nullptr;
 };
-typedef IDFCalculator<BasicBlock *, false> ForwardIDFCalculator;
-typedef IDFCalculator<Inverse<BasicBlock *>, true> ReverseIDFCalculator;
+
+} // end of namespace IDFCalculatorDetail
+
+template <bool IsPostDom>
+class IDFCalculator final : public IDFCalculatorBase<BasicBlock, IsPostDom> {
+public:
+  using IDFCalculatorBase =
+      typename llvm::IDFCalculatorBase<BasicBlock, IsPostDom>;
+  using ChildrenGetterTy = typename IDFCalculatorBase::ChildrenGetterTy;
+
+  IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT)
+      : IDFCalculatorBase(DT) {}
+
+  IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT,
+                const GraphDiff<BasicBlock *, IsPostDom> *GD)
+      : IDFCalculatorBase(DT, ChildrenGetterTy(GD)) {
+    assert(GD);
+  }
+};
+
+using ForwardIDFCalculator = IDFCalculator<false>;
+using ReverseIDFCalculator = IDFCalculator<true>;
+
+//===----------------------------------------------------------------------===//
+// Implementation.
+//===----------------------------------------------------------------------===//
+
+namespace IDFCalculatorDetail {
+
+template <bool IsPostDom>
+typename ChildrenGetterTy<BasicBlock, IsPostDom>::ChildrenTy
+ChildrenGetterTy<BasicBlock, IsPostDom>::get(const NodeRef &N) {
+
+  using OrderedNodeTy =
+      typename IDFCalculatorBase<BasicBlock, IsPostDom>::OrderedNodeTy;
+
+  if (!GD) {
+    auto Children = children<OrderedNodeTy>(N);
+    return {Children.begin(), Children.end()};
+  }
+
+  using SnapShotBBPairTy =
+      std::pair<const GraphDiff<BasicBlock *, IsPostDom> *, OrderedNodeTy>;
+
+  ChildrenTy Ret;
+  for (const auto &SnapShotBBPair : children<SnapShotBBPairTy>({GD, N}))
+    Ret.emplace_back(SnapShotBBPair.second);
+  return Ret;
 }
+
+} // end of namespace IDFCalculatorDetail
+
+} // end of namespace llvm
+
 #endif
diff --git a/include/llvm/Analysis/LazyBlockFrequencyInfo.h b/include/llvm/Analysis/LazyBlockFrequencyInfo.h
index d1afb63d7e08..0e7dc943bacf 100644
--- a/include/llvm/Analysis/LazyBlockFrequencyInfo.h
+++ b/include/llvm/Analysis/LazyBlockFrequencyInfo.h
@@ -1,9 +1,8 @@
 //===- LazyBlockFrequencyInfo.h - Lazy Block Frequency Analysis -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/LazyBranchProbabilityInfo.h b/include/llvm/Analysis/LazyBranchProbabilityInfo.h
index 9e6bcfedcbb9..cae0778cd16d 100644
--- a/include/llvm/Analysis/LazyBranchProbabilityInfo.h
+++ b/include/llvm/Analysis/LazyBranchProbabilityInfo.h
@@ -1,9 +1,8 @@
 //===- LazyBranchProbabilityInfo.h - Lazy Branch Probability ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h
index d1ec6a9dcc55..2d83929211e2 100644
--- a/include/llvm/Analysis/LazyCallGraph.h
+++ b/include/llvm/Analysis/LazyCallGraph.h
@@ -1,9 +1,8 @@
 //===- LazyCallGraph.h - Analysis of a Module's call graph ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -39,6 +38,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -1083,12 +1083,26 @@ public:
         continue;
       }
 
+      // The blockaddress constant expression is a weird special case, we can't
+      // generically walk its operands the way we do for all other constants.
       if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
-        // The blockaddress constant expression is a weird special case, we
-        // can't generically walk its operands the way we do for all other
-        // constants.
-        if (Visited.insert(BA->getFunction()).second)
-          Worklist.push_back(BA->getFunction());
+        // If we've already visited the function referred to by the block
+        // address, we don't need to revisit it.
+        if (Visited.count(BA->getFunction()))
+          continue;
+
+        // If all of the blockaddress' users are instructions within the
+        // referred to function, we don't need to insert a cycle.
+        if (llvm::all_of(BA->users(), [&](User *U) {
+              if (Instruction *I = dyn_cast<Instruction>(U))
+                return I->getFunction() == BA->getFunction();
+              return false;
+            }))
+          continue;
+
+        // Otherwise we should go visit the referred to function.
+        Visited.insert(BA->getFunction());
+        Worklist.push_back(BA->getFunction());
         continue;
       }
 
diff --git a/include/llvm/Analysis/LazyValueInfo.h b/include/llvm/Analysis/LazyValueInfo.h
index 1a4fdb591427..570a5044f6f8 100644
--- a/include/llvm/Analysis/LazyValueInfo.h
+++ b/include/llvm/Analysis/LazyValueInfo.h
@@ -1,9 +1,8 @@
 //===- LazyValueInfo.h - Value constraint analysis --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/include/llvm/Analysis/LegacyDivergenceAnalysis.h
index fc426ad7fb64..0a338b816640 100644
--- a/include/llvm/Analysis/LegacyDivergenceAnalysis.h
+++ b/include/llvm/Analysis/LegacyDivergenceAnalysis.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/LegacyDivergenceAnalysis.h - KernelDivergence Analysis -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/Lint.h b/include/llvm/Analysis/Lint.h
index db5919fd91c7..0fea81e215c9 100644
--- a/include/llvm/Analysis/Lint.h
+++ b/include/llvm/Analysis/Lint.h
@@ -1,9 +1,8 @@
 //===-- llvm/Analysis/Lint.h - LLVM IR Lint ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h
index f110c28bfc6d..5df6bb02308d 100644
--- a/include/llvm/Analysis/Loads.h
+++ b/include/llvm/Analysis/Loads.h
@@ -1,9 +1,8 @@
 //===- Loads.h - Local load analysis --------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -26,7 +25,8 @@ class MDNode;
 /// Return true if this is always a dereferenceable pointer. If the context
 /// instruction is specified perform context-sensitive analysis and return true
 /// if the pointer is dereferenceable at the specified instruction.
-bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
+bool isDereferenceablePointer(const Value *V, Type *Ty,
+                              const DataLayout &DL,
                               const Instruction *CtxI = nullptr,
                               const DominatorTree *DT = nullptr);
 
@@ -34,8 +34,8 @@ bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
 /// greater or equal than requested. If the context instruction is specified
 /// performs context-sensitive analysis and returns true if the pointer is
 /// dereferenceable at the specified instruction.
-bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
-                                        const DataLayout &DL,
+bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
+                                        unsigned Align, const DataLayout &DL,
                                         const Instruction *CtxI = nullptr,
                                         const DominatorTree *DT = nullptr);
 
@@ -56,7 +56,20 @@ bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
 /// If it is not obviously safe to load from the specified pointer, we do a
 /// quick local scan of the basic block containing ScanFrom, to determine if
 /// the address is already accessed.
-bool isSafeToLoadUnconditionally(Value *V, unsigned Align,
+bool isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
+                                 const DataLayout &DL,
+                                 Instruction *ScanFrom = nullptr,
+                                 const DominatorTree *DT = nullptr);
+
+/// Return true if we know that executing a load from this value cannot trap.
+///
+/// If DT and ScanFrom are specified this method performs context-sensitive
+/// analysis and returns true if it is safe to load immediately before ScanFrom.
+///
+/// If it is not obviously safe to load from the specified pointer, we do a
+/// quick local scan of the basic block containing ScanFrom, to determine if
+/// the address is already accessed.
+bool isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align,
                                  const DataLayout &DL,
                                  Instruction *ScanFrom = nullptr,
                                  const DominatorTree *DT = nullptr);
diff --git a/include/llvm/Analysis/LoopAccessAnalysis.h b/include/llvm/Analysis/LoopAccessAnalysis.h
index 4ed00e207753..9e9aaa32c64f 100644
--- a/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/LoopAccessAnalysis.h -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -523,6 +522,11 @@ public:
   /// no memory dependence cycles.
   bool canVectorizeMemory() const { return CanVecMem; }
 
+  /// Return true if there is a convergent operation in the loop. There may
+  /// still be reported runtime pointer checks that would be required, but it is
+  /// not legal to insert them.
+  bool hasConvergentOp() const { return HasConvergentOp; }
+
   const RuntimePointerChecking *getRuntimePointerChecking() const {
     return PtrRtChecking.get();
   }
@@ -643,6 +647,7 @@ private:
 
   /// Cache the result of analyzeLoop.
   bool CanVecMem;
+  bool HasConvergentOp;
 
   /// Indicator that there are non vectorizable stores to a uniform address.
   bool HasDependenceInvolvingLoopInvariantAddress;
diff --git a/include/llvm/Analysis/LoopAnalysisManager.h b/include/llvm/Analysis/LoopAnalysisManager.h
index 00e562c4f31f..368a810cfa67 100644
--- a/include/llvm/Analysis/LoopAnalysisManager.h
+++ b/include/llvm/Analysis/LoopAnalysisManager.h
@@ -1,9 +1,8 @@
 //===- LoopAnalysisManager.h - Loop analysis management ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -62,9 +61,6 @@ struct LoopStandardAnalysisResults {
   MemorySSA *MSSA;
 };
 
-/// Enables memory ssa as a dependency for loop passes.
-extern cl::opt<bool> EnableMSSALoopDependency;
-
 /// Extern template declaration for the analysis set for this IR unit.
 extern template class AllAnalysesOn<Loop>;
 
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index 72873546a068..584eb3a8c854 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/LoopInfo.h - Natural Loop Calculator -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -55,8 +54,11 @@ namespace llvm {
 class DominatorTree;
 class LoopInfo;
 class Loop;
+class InductionDescriptor;
 class MDNode;
+class MemorySSAUpdater;
 class PHINode;
+class ScalarEvolution;
 class raw_ostream;
 template <class N, bool IsPostDom> class DominatorTreeBase;
 template <class N, class M> class LoopInfoBase;
@@ -199,9 +201,10 @@ public:
   }
 
   /// True if terminator in the block can branch to another block that is
-  /// outside of the current loop.
+  /// outside of the current loop. \p BB must be inside the loop.
   bool isLoopExiting(const BlockT *BB) const {
     assert(!isInvalid() && "Loop not in a valid state!");
+    assert(contains(BB) && "Exiting block must be part of the loop");
     for (const auto &Succ : children<const BlockT *>(BB)) {
       if (!contains(Succ))
         return true;
@@ -267,16 +270,20 @@ public:
 
   /// Return all unique successor blocks of this loop.
   /// These are the blocks _outside of the current loop_ which are branched to.
-  /// This assumes that loop exits are in canonical form, i.e. all exits are
-  /// dedicated exits.
   void getUniqueExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const;
 
+  /// Return all unique successor blocks of this loop except successors from
+  /// Latch block are not considered. If the exit comes from Latch has also
+  /// non Latch predecessor in a loop it will be added to ExitBlocks.
+  /// These are the blocks _outside of the current loop_ which are branched to.
+  void getUniqueNonLatchExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const;
+
   /// If getUniqueExitBlocks would return exactly one block, return that block.
   /// Otherwise return null.
   BlockT *getUniqueExitBlock() const;
 
   /// Edge type.
-  typedef std::pair<const BlockT *, const BlockT *> Edge;
+  typedef std::pair<BlockT *, BlockT *> Edge;
 
   /// Return all pairs of (_inside_block_,_outside_block_).
   void getExitEdges(SmallVectorImpl<Edge> &ExitEdges) const;
@@ -309,6 +316,40 @@ public:
         LoopLatches.push_back(Pred);
   }
 
+  /// Return all inner loops in the loop nest rooted by the loop in preorder,
+  /// with siblings in forward program order.
+  template <class Type>
+  static void getInnerLoopsInPreorder(const LoopT &L,
+                                      SmallVectorImpl<Type> &PreOrderLoops) {
+    SmallVector<LoopT *, 4> PreOrderWorklist;
+    PreOrderWorklist.append(L.rbegin(), L.rend());
+
+    while (!PreOrderWorklist.empty()) {
+      LoopT *L = PreOrderWorklist.pop_back_val();
+      // Sub-loops are stored in forward program order, but will process the
+      // worklist backwards so append them in reverse order.
+      PreOrderWorklist.append(L->rbegin(), L->rend());
+      PreOrderLoops.push_back(L);
+    }
+  }
+
+  /// Return all loops in the loop nest rooted by the loop in preorder, with
+  /// siblings in forward program order.
+  SmallVector<const LoopT *, 4> getLoopsInPreorder() const {
+    SmallVector<const LoopT *, 4> PreOrderLoops;
+    const LoopT *CurLoop = static_cast<const LoopT *>(this);
+    PreOrderLoops.push_back(CurLoop);
+    getInnerLoopsInPreorder(*CurLoop, PreOrderLoops);
+    return PreOrderLoops;
+  }
+  SmallVector<LoopT *, 4> getLoopsInPreorder() {
+    SmallVector<LoopT *, 4> PreOrderLoops;
+    LoopT *CurLoop = static_cast<LoopT *>(this);
+    PreOrderLoops.push_back(CurLoop);
+    getInnerLoopsInPreorder(*CurLoop, PreOrderLoops);
+    return PreOrderLoops;
+  }
+
   //===--------------------------------------------------------------------===//
   // APIs for updating loop information after changing the CFG
   //
@@ -471,7 +512,7 @@ public:
 
   public:
     LocRange() {}
-    LocRange(DebugLoc Start) : Start(std::move(Start)), End(std::move(Start)) {}
+    LocRange(DebugLoc Start) : Start(Start), End(Start) {}
     LocRange(DebugLoc Start, DebugLoc End)
         : Start(std::move(Start)), End(std::move(End)) {}
 
@@ -499,7 +540,8 @@ public:
   /// If InsertPt is specified, it is the point to hoist instructions to.
   /// If null, the terminator of the loop preheader is used.
   bool makeLoopInvariant(Value *V, bool &Changed,
-                         Instruction *InsertPt = nullptr) const;
+                         Instruction *InsertPt = nullptr,
+                         MemorySSAUpdater *MSSAU = nullptr) const;
 
   /// If the given instruction is inside of the loop and it can be hoisted, do
   /// so to make it trivially loop-invariant.
@@ -511,7 +553,8 @@ public:
   /// If null, the terminator of the loop preheader is used.
   ///
   bool makeLoopInvariant(Instruction *I, bool &Changed,
-                         Instruction *InsertPt = nullptr) const;
+                         Instruction *InsertPt = nullptr,
+                         MemorySSAUpdater *MSSAU = nullptr) const;
 
   /// Check to see if the loop has a canonical induction variable: an integer
   /// recurrence that starts at 0 and increments by one each time through the
@@ -522,6 +565,170 @@ public:
   ///
   PHINode *getCanonicalInductionVariable() const;
 
+  /// Obtain the unique incoming and back edge. Return false if they are
+  /// non-unique or the loop is dead; otherwise, return true.
+  bool getIncomingAndBackEdge(BasicBlock *&Incoming,
+                              BasicBlock *&Backedge) const;
+
+  /// Below are some utilities to get loop bounds and induction variable, and
+  /// check if a given phinode is an auxiliary induction variable, as well as
+  /// checking if the loop is canonical.
+  ///
+  /// Here is an example:
+  /// \code
+  /// for (int i = lb; i < ub; i+=step)
+  ///   <loop body>
+  /// --- pseudo LLVMIR ---
+  /// beforeloop:
+  ///   guardcmp = (lb < ub)
+  ///   if (guardcmp) goto preheader; else goto afterloop
+  /// preheader:
+  /// loop:
+  ///   i_1 = phi[{lb, preheader}, {i_2, latch}]
+  ///   <loop body>
+  ///   i_2 = i_1 + step
+  /// latch:
+  ///   cmp = (i_2 < ub)
+  ///   if (cmp) goto loop
+  /// exit:
+  /// afterloop:
+  /// \endcode
+  ///
+  /// - getBounds
+  ///   - getInitialIVValue      --> lb
+  ///   - getStepInst            --> i_2 = i_1 + step
+  ///   - getStepValue           --> step
+  ///   - getFinalIVValue        --> ub
+  ///   - getCanonicalPredicate  --> '<'
+  ///   - getDirection           --> Increasing
+  ///
+  /// - getInductionVariable            --> i_1
+  /// - isAuxiliaryInductionVariable(x) --> true if x == i_1
+  /// - isCanonical                     --> false
+  struct LoopBounds {
+    /// Return the LoopBounds object if
+    /// - the given \p IndVar is an induction variable
+    /// - the initial value of the induction variable can be found
+    /// - the step instruction of the induction variable can be found
+    /// - the final value of the induction variable can be found
+    ///
+    /// Else None.
+    static Optional<Loop::LoopBounds> getBounds(const Loop &L, PHINode &IndVar,
+                                                ScalarEvolution &SE);
+
+    /// Get the initial value of the loop induction variable.
+    Value &getInitialIVValue() const { return InitialIVValue; }
+
+    /// Get the instruction that updates the loop induction variable.
+    Instruction &getStepInst() const { return StepInst; }
+
+    /// Get the step that the loop induction variable gets updated by in each
+    /// loop iteration. Return nullptr if not found.
+    Value *getStepValue() const { return StepValue; }
+
+    /// Get the final value of the loop induction variable.
+    Value &getFinalIVValue() const { return FinalIVValue; }
+
+    /// Return the canonical predicate for the latch compare instruction, if
+    /// able to be calcuated. Else BAD_ICMP_PREDICATE.
+    ///
+    /// A predicate is considered as canonical if requirements below are all
+    /// satisfied:
+    /// 1. The first successor of the latch branch is the loop header
+    ///    If not, inverse the predicate.
+    /// 2. One of the operands of the latch comparison is StepInst
+    ///    If not, and
+    ///    - if the current calcuated predicate is not ne or eq, flip the
+    ///      predicate.
+    ///    - else if the loop is increasing, return slt
+    ///      (notice that it is safe to change from ne or eq to sign compare)
+    ///    - else if the loop is decreasing, return sgt
+    ///      (notice that it is safe to change from ne or eq to sign compare)
+    ///
+    /// Here is an example when both (1) and (2) are not satisfied:
+    /// \code
+    /// loop.header:
+    ///  %iv = phi [%initialiv, %loop.preheader], [%inc, %loop.header]
+    ///  %inc = add %iv, %step
+    ///  %cmp = slt %iv, %finaliv
+    ///  br %cmp, %loop.exit, %loop.header
+    /// loop.exit:
+    /// \endcode
+    /// - The second successor of the latch branch is the loop header instead
+    ///   of the first successor (slt -> sge)
+    /// - The first operand of the latch comparison (%cmp) is the IndVar (%iv)
+    ///   instead of the StepInst (%inc) (sge -> sgt)
+    ///
+    /// The predicate would be sgt if both (1) and (2) are satisfied.
+    /// getCanonicalPredicate() returns sgt for this example.
+    /// Note: The IR is not changed.
+    ICmpInst::Predicate getCanonicalPredicate() const;
+
+    /// An enum for the direction of the loop
+    /// - for (int i = 0; i < ub; ++i)  --> Increasing
+    /// - for (int i = ub; i > 0; --i)  --> Descresing
+    /// - for (int i = x; i != y; i+=z) --> Unknown
+    enum class Direction { Increasing, Decreasing, Unknown };
+
+    /// Get the direction of the loop.
+    Direction getDirection() const;
+
+  private:
+    LoopBounds(const Loop &Loop, Value &I, Instruction &SI, Value *SV, Value &F,
+               ScalarEvolution &SE)
+        : L(Loop), InitialIVValue(I), StepInst(SI), StepValue(SV),
+          FinalIVValue(F), SE(SE) {}
+
+    const Loop &L;
+
+    // The initial value of the loop induction variable
+    Value &InitialIVValue;
+
+    // The instruction that updates the loop induction variable
+    Instruction &StepInst;
+
+    // The value that the loop induction variable gets updated by in each loop
+    // iteration
+    Value *StepValue;
+
+    // The final value of the loop induction variable
+    Value &FinalIVValue;
+
+    ScalarEvolution &SE;
+  };
+
+  /// Return the struct LoopBounds collected if all struct members are found,
+  /// else None.
+  Optional<LoopBounds> getBounds(ScalarEvolution &SE) const;
+
+  /// Return the loop induction variable if found, else return nullptr.
+  /// An instruction is considered as the loop induction variable if
+  /// - it is an induction variable of the loop; and
+  /// - it is used to determine the condition of the branch in the loop latch
+  ///
+  /// Note: the induction variable doesn't need to be canonical, i.e. starts at
+  /// zero and increments by one each time through the loop (but it can be).
+  PHINode *getInductionVariable(ScalarEvolution &SE) const;
+
+  /// Get the loop induction descriptor for the loop induction variable. Return
+  /// true if the loop induction variable is found.
+  bool getInductionDescriptor(ScalarEvolution &SE,
+                              InductionDescriptor &IndDesc) const;
+
+  /// Return true if the given PHINode \p AuxIndVar is
+  /// - in the loop header
+  /// - not used outside of the loop
+  /// - incremented by a loop invariant step for each loop iteration
+  /// - step instruction opcode should be add or sub
+  /// Note: auxiliary induction variable is not required to be used in the
+  ///       conditional branch in the loop latch. (but it can be)
+  bool isAuxiliaryInductionVariable(PHINode &AuxIndVar,
+                                    ScalarEvolution &SE) const;
+
+  /// Return true if the loop induction variable starts at zero and increments
+  /// by one each time through the loop.
+  bool isCanonical(ScalarEvolution &SE) const;
+
   /// Return true if the Loop is in LCSSA form.
   bool isLCSSAForm(DominatorTree &DT) const;
 
@@ -1015,6 +1222,26 @@ MDNode *findOptionMDForLoop(const Loop *TheLoop, StringRef Name);
 /// is representing an access group.
 bool isValidAsAccessGroup(MDNode *AccGroup);
 
+/// Create a new LoopID after the loop has been transformed.
+///
+/// This can be used when no follow-up loop attributes are defined
+/// (llvm::makeFollowupLoopID returning None) to stop transformations to be
+/// applied again.
+///
+/// @param Context        The LLVMContext in which to create the new LoopID.
+/// @param OrigLoopID     The original LoopID; can be nullptr if the original
+///                       loop has no LoopID.
+/// @param RemovePrefixes Remove all loop attributes that have these prefixes.
+///                       Use to remove metadata of the transformation that has
+///                       been applied.
+/// @param AddAttrs       Add these loop attributes to the new LoopID.
+///
+/// @return A new LoopID that can be applied using Loop::setLoopID().
+llvm::MDNode *
+makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID,
+                               llvm::ArrayRef<llvm::StringRef> RemovePrefixes,
+                               llvm::ArrayRef<llvm::MDNode *> AddAttrs);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h
index 2b807919fedf..4c33dac9e21e 100644
--- a/include/llvm/Analysis/LoopInfoImpl.h
+++ b/include/llvm/Analysis/LoopInfoImpl.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/LoopInfoImpl.h - Natural Loop Calculator ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -96,49 +95,36 @@ bool LoopBase<BlockT, LoopT>::hasDedicatedExits() const {
   return true;
 }
 
+// Helper function to get unique loop exits. Pred is a predicate pointing to
+// BasicBlocks in a loop which should be considered to find loop exits.
+template <class BlockT, class LoopT, typename PredicateT>
+void getUniqueExitBlocksHelper(const LoopT *L,
+                               SmallVectorImpl<BlockT *> &ExitBlocks,
+                               PredicateT Pred) {
+  assert(!L->isInvalid() && "Loop not in a valid state!");
+  SmallPtrSet<BlockT *, 32> Visited;
+  auto Filtered = make_filter_range(L->blocks(), Pred);
+  for (BlockT *BB : Filtered)
+    for (BlockT *Successor : children<BlockT *>(BB))
+      if (!L->contains(Successor))
+        if (Visited.insert(Successor).second)
+          ExitBlocks.push_back(Successor);
+}
+
 template <class BlockT, class LoopT>
 void LoopBase<BlockT, LoopT>::getUniqueExitBlocks(
     SmallVectorImpl<BlockT *> &ExitBlocks) const {
-  typedef GraphTraits<BlockT *> BlockTraits;
-  typedef GraphTraits<Inverse<BlockT *>> InvBlockTraits;
-
-  assert(hasDedicatedExits() &&
-         "getUniqueExitBlocks assumes the loop has canonical form exits!");
-
-  SmallVector<BlockT *, 32> SwitchExitBlocks;
-  for (BlockT *Block : this->blocks()) {
-    SwitchExitBlocks.clear();
-    for (BlockT *Successor : children<BlockT *>(Block)) {
-      // If block is inside the loop then it is not an exit block.
-      if (contains(Successor))
-        continue;
-
-      BlockT *FirstPred = *InvBlockTraits::child_begin(Successor);
-
-      // If current basic block is this exit block's first predecessor then only
-      // insert exit block in to the output ExitBlocks vector. This ensures that
-      // same exit block is not inserted twice into ExitBlocks vector.
-      if (Block != FirstPred)
-        continue;
-
-      // If a terminator has more then two successors, for example SwitchInst,
-      // then it is possible that there are multiple edges from current block to
-      // one exit block.
-      if (std::distance(BlockTraits::child_begin(Block),
-                        BlockTraits::child_end(Block)) <= 2) {
-        ExitBlocks.push_back(Successor);
-        continue;
-      }
+  getUniqueExitBlocksHelper(this, ExitBlocks,
+                            [](const BlockT *BB) { return true; });
+}
 
-      // In case of multiple edges from current block to exit block, collect
-      // only one edge in ExitBlocks. Use switchExitBlocks to keep track of
-      // duplicate edges.
-      if (!is_contained(SwitchExitBlocks, Successor)) {
-        SwitchExitBlocks.push_back(Successor);
-        ExitBlocks.push_back(Successor);
-      }
-    }
-  }
+template <class BlockT, class LoopT>
+void LoopBase<BlockT, LoopT>::getUniqueNonLatchExitBlocks(
+    SmallVectorImpl<BlockT *> &ExitBlocks) const {
+  const BlockT *Latch = getLoopLatch();
+  assert(Latch && "Latch block must exists");
+  getUniqueExitBlocksHelper(this, ExitBlocks,
+                            [Latch](const BlockT *BB) { return BB != Latch; });
 }
 
 template <class BlockT, class LoopT>
@@ -588,16 +574,9 @@ SmallVector<LoopT *, 4> LoopInfoBase<BlockT, LoopT>::getLoopsInPreorder() {
   // FIXME: If we change the order of LoopInfo we will want to remove the
   // reverse here.
   for (LoopT *RootL : reverse(*this)) {
-    assert(PreOrderWorklist.empty() &&
-           "Must start with an empty preorder walk worklist.");
-    PreOrderWorklist.push_back(RootL);
-    do {
-      LoopT *L = PreOrderWorklist.pop_back_val();
-      // Sub-loops are stored in forward program order, but will process the
-      // worklist backwards so append them in reverse order.
-      PreOrderWorklist.append(L->rbegin(), L->rend());
-      PreOrderLoops.push_back(L);
-    } while (!PreOrderWorklist.empty());
+    auto PreOrderLoopsInRootL = RootL->getLoopsInPreorder();
+    PreOrderLoops.append(PreOrderLoopsInRootL.begin(),
+                         PreOrderLoopsInRootL.end());
   }
 
   return PreOrderLoops;
diff --git a/include/llvm/Analysis/LoopIterator.h b/include/llvm/Analysis/LoopIterator.h
index 91c54b23029b..fa4da4283f55 100644
--- a/include/llvm/Analysis/LoopIterator.h
+++ b/include/llvm/Analysis/LoopIterator.h
@@ -1,9 +1,8 @@
 //===--------- LoopIterator.h - Iterate over loop blocks --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This file defines iterators to visit the basic blocks within a loop.
diff --git a/include/llvm/Analysis/LoopPass.h b/include/llvm/Analysis/LoopPass.h
index 86cfecd9df11..9215ab34ec6d 100644
--- a/include/llvm/Analysis/LoopPass.h
+++ b/include/llvm/Analysis/LoopPass.h
@@ -1,9 +1,8 @@
 //===- LoopPass.h - LoopPass class ----------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/LoopUnrollAnalyzer.h b/include/llvm/Analysis/LoopUnrollAnalyzer.h
index f45bf0b223b8..5f332e3cac16 100644
--- a/include/llvm/Analysis/LoopUnrollAnalyzer.h
+++ b/include/llvm/Analysis/LoopUnrollAnalyzer.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/LoopUnrollAnalyzer.h - Loop Unroll Analyzer-*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h
index 5418128f16ef..49f9e58ffad7 100644
--- a/include/llvm/Analysis/MemoryBuiltins.h
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@@ -1,9 +1,8 @@
 //==- llvm/Analysis/MemoryBuiltins.h - Calls to memory builtins --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,6 +18,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/TargetFolder.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstVisitor.h"
@@ -84,6 +84,15 @@ bool isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
 bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
                    bool LookThroughBitCast = false);
 
+/// Tests if a value is a call or invoke to a library function that
+/// reallocates memory (e.g., realloc).
+bool isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+                     bool LookThroughBitCast = false);
+
+/// Tests if a function is a call or invoke to a library function that
+/// reallocates memory (e.g., realloc).
+bool isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI);
+
 //===----------------------------------------------------------------------===//
 //  malloc Call Utility Functions.
 //
@@ -135,6 +144,9 @@ inline CallInst *extractCallocCall(Value *I, const TargetLibraryInfo *TLI) {
 //  free Call Utility Functions.
 //
 
+/// isLibFreeFunction - Returns true if the function is a builtin free()
+bool isLibFreeFunction(const Function *F, const LibFunc TLIFn);
+
 /// isFreeCall - Returns non-null if the value is a call to the builtin free()
 const CallInst *isFreeCall(const Value *I, const TargetLibraryInfo *TLI);
 
@@ -178,14 +190,13 @@ bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
                    const TargetLibraryInfo *TLI, ObjectSizeOpts Opts = {});
 
 /// Try to turn a call to \@llvm.objectsize into an integer value of the given
-/// Type. Returns null on failure.
-/// If MustSucceed is true, this function will not return null, and may return
-/// conservative values governed by the second argument of the call to
-/// objectsize.
-ConstantInt *lowerObjectSizeCall(IntrinsicInst *ObjectSize,
-                                 const DataLayout &DL,
-                                 const TargetLibraryInfo *TLI,
-                                 bool MustSucceed);
+/// Type. Returns null on failure. If MustSucceed is true, this function will
+/// not return null, and may return conservative values governed by the second
+/// argument of the call to objectsize.
+Value *lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL,
+                           const TargetLibraryInfo *TLI, bool MustSucceed);
+
+
 
 using SizeOffsetType = std::pair<APInt, APInt>;
 
@@ -252,7 +263,7 @@ using SizeOffsetEvalType = std::pair<Value *, Value *>;
 /// May create code to compute the result at run-time.
 class ObjectSizeOffsetEvaluator
   : public InstVisitor<ObjectSizeOffsetEvaluator, SizeOffsetEvalType> {
-  using BuilderTy = IRBuilder<TargetFolder>;
+  using BuilderTy = IRBuilder<TargetFolder, IRBuilderCallbackInserter>;
   using WeakEvalType = std::pair<WeakTrackingVH, WeakTrackingVH>;
   using CacheMapTy = DenseMap<const Value *, WeakEvalType>;
   using PtrSetTy = SmallPtrSet<const Value *, 8>;
@@ -265,17 +276,18 @@ class ObjectSizeOffsetEvaluator
   Value *Zero;
   CacheMapTy CacheMap;
   PtrSetTy SeenVals;
-  bool RoundToAlign;
-
-  SizeOffsetEvalType unknown() {
-    return std::make_pair(nullptr, nullptr);
-  }
+  ObjectSizeOpts EvalOpts;
+  SmallPtrSet<Instruction *, 8> InsertedInstructions;
 
   SizeOffsetEvalType compute_(Value *V);
 
 public:
+  static SizeOffsetEvalType unknown() {
+    return std::make_pair(nullptr, nullptr);
+  }
+
   ObjectSizeOffsetEvaluator(const DataLayout &DL, const TargetLibraryInfo *TLI,
-                            LLVMContext &Context, bool RoundToAlign = false);
+                            LLVMContext &Context, ObjectSizeOpts EvalOpts = {});
 
   SizeOffsetEvalType compute(Value *V);
 
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h
index 958d4fe4b832..e2669c2fa601 100644
--- a/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/MemoryDependenceAnalysis.h - Memory Deps ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -382,7 +381,8 @@ public:
   ///
   /// See the class comment for more details. It is illegal to call this on
   /// non-memory instructions.
-  MemDepResult getDependency(Instruction *QueryInst);
+  MemDepResult getDependency(Instruction *QueryInst,
+                             OrderedBasicBlock *OBB = nullptr);
 
   /// Perform a full dependency query for the specified call, returning the set
   /// of blocks that the value is potentially live across.
@@ -448,14 +448,14 @@ public:
                                         BasicBlock::iterator ScanIt,
                                         BasicBlock *BB,
                                         Instruction *QueryInst = nullptr,
-                                        unsigned *Limit = nullptr);
-
-  MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc,
-                                              bool isLoad,
-                                              BasicBlock::iterator ScanIt,
-                                              BasicBlock *BB,
-                                              Instruction *QueryInst,
-                                              unsigned *Limit = nullptr);
+                                        unsigned *Limit = nullptr,
+                                        OrderedBasicBlock *OBB = nullptr);
+
+  MemDepResult
+  getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad,
+                                 BasicBlock::iterator ScanIt, BasicBlock *BB,
+                                 Instruction *QueryInst, unsigned *Limit,
+                                 OrderedBasicBlock *OBB);
 
   /// This analysis looks for other loads and stores with invariant.group
   /// metadata and the same pointer operand. Returns Unknown if it does not
diff --git a/include/llvm/Analysis/MemoryLocation.h b/include/llvm/Analysis/MemoryLocation.h
index fca18c1b5999..7c26353e618b 100644
--- a/include/llvm/Analysis/MemoryLocation.h
+++ b/include/llvm/Analysis/MemoryLocation.h
@@ -1,9 +1,8 @@
 //===- MemoryLocation.h - Memory location descriptions ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Analysis/MemorySSA.h b/include/llvm/Analysis/MemorySSA.h
index 17e2d0c73977..b7730be75354 100644
--- a/include/llvm/Analysis/MemorySSA.h
+++ b/include/llvm/Analysis/MemorySSA.h
@@ -1,9 +1,8 @@
 //===- MemorySSA.h - Build Memory SSA ---------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -105,6 +104,9 @@
 
 namespace llvm {
 
+/// Enables memory ssa as a dependency for loop passes.
+extern cl::opt<bool> EnableMSSALoopDependency;
+
 class Function;
 class Instruction;
 class MemoryAccess;
@@ -701,6 +703,11 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryPhi, MemoryAccess)
 class MemorySSA {
 public:
   MemorySSA(Function &, AliasAnalysis *, DominatorTree *);
+
+  // MemorySSA must remain where it's constructed; Walkers it creates store
+  // pointers to it.
+  MemorySSA(MemorySSA &&) = delete;
+
   ~MemorySSA();
 
   MemorySSAWalker *getWalker();
@@ -776,9 +783,6 @@ public:
   /// all uses, uses appear in the right places).  This is used by unit tests.
   void verifyMemorySSA() const;
 
-  /// Check clobber sanity for an access.
-  void checkClobberSanityAccess(const MemoryAccess *MA) const;
-
   /// Used in various insertion functions to specify whether we are talking
   /// about the beginning or end of a block.
   enum InsertionPlace { Beginning, End };
@@ -793,7 +797,6 @@ protected:
   void verifyDomination(Function &F) const;
   void verifyOrdering(Function &F) const;
   void verifyDominationNumbers(const Function &F) const;
-  void verifyClobberSanity(const Function &F) const;
 
   // This is used by the use optimizer and updater.
   AccessList *getWritableBlockAccesses(const BasicBlock *BB) const {
@@ -830,13 +833,13 @@ protected:
                                       const MemoryUseOrDef *Template = nullptr);
 
 private:
-  class ClobberWalkerBase;
-  class CachingWalker;
-  class SkipSelfWalker;
+  template <class AliasAnalysisType> class ClobberWalkerBase;
+  template <class AliasAnalysisType> class CachingWalker;
+  template <class AliasAnalysisType> class SkipSelfWalker;
   class OptimizeUses;
 
-  CachingWalker *getWalkerImpl();
-  void buildMemorySSA();
+  CachingWalker<AliasAnalysis> *getWalkerImpl();
+  void buildMemorySSA(BatchAAResults &BAA);
   void optimizeUses();
 
   void prepareForMoveTo(MemoryAccess *, BasicBlock *);
@@ -850,7 +853,8 @@ private:
   void markUnreachableAsLiveOnEntry(BasicBlock *BB);
   bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const;
   MemoryPhi *createMemoryPhi(BasicBlock *BB);
-  MemoryUseOrDef *createNewAccess(Instruction *,
+  template <typename AliasAnalysisType>
+  MemoryUseOrDef *createNewAccess(Instruction *, AliasAnalysisType *,
                                   const MemoryUseOrDef *Template = nullptr);
   MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace);
   void placePHINodes(const SmallPtrSetImpl<BasicBlock *> &);
@@ -886,9 +890,9 @@ private:
   mutable DenseMap<const MemoryAccess *, unsigned long> BlockNumbering;
 
   // Memory SSA building info
-  std::unique_ptr<ClobberWalkerBase> WalkerBase;
-  std::unique_ptr<CachingWalker> Walker;
-  std::unique_ptr<SkipSelfWalker> SkipWalker;
+  std::unique_ptr<ClobberWalkerBase<AliasAnalysis>> WalkerBase;
+  std::unique_ptr<CachingWalker<AliasAnalysis>> Walker;
+  std::unique_ptr<SkipSelfWalker<AliasAnalysis>> SkipWalker;
   unsigned NextID;
 };
 
@@ -932,6 +936,9 @@ public:
     MemorySSA &getMSSA() { return *MSSA.get(); }
 
     std::unique_ptr<MemorySSA> MSSA;
+
+    bool invalidate(Function &F, const PreservedAnalyses &PA,
+                    FunctionAnalysisManager::Invalidator &Inv);
   };
 
   Result run(Function &F, FunctionAnalysisManager &AM);
@@ -1044,8 +1051,6 @@ public:
   /// the walker it uses or returns.
   virtual void invalidateInfo(MemoryAccess *) {}
 
-  virtual void verify(const MemorySSA *MSSA) { assert(MSSA == this->MSSA); }
-
 protected:
   friend class MemorySSA; // For updating MSSA pointer in MemorySSA move
                           // constructor.
@@ -1101,15 +1106,15 @@ public:
     assert(Access && "Tried to access past the end of our iterator");
     // Go to the first argument for phis, and the defining access for everything
     // else.
-    if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Access))
+    if (const MemoryPhi *MP = dyn_cast<MemoryPhi>(Access))
       return MP->getIncomingValue(ArgNo);
     return cast<MemoryUseOrDef>(Access)->getDefiningAccess();
   }
 
   using BaseT::operator++;
-  memoryaccess_def_iterator &operator++() {
+  memoryaccess_def_iterator_base &operator++() {
     assert(Access && "Hit end of iterator");
-    if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Access)) {
+    if (const MemoryPhi *MP = dyn_cast<MemoryPhi>(Access)) {
       if (++ArgNo >= MP->getNumIncomingValues()) {
         ArgNo = 0;
         Access = nullptr;
diff --git a/include/llvm/Analysis/MemorySSAUpdater.h b/include/llvm/Analysis/MemorySSAUpdater.h
index 169d5bd9fa8b..d4d8040c1ff6 100644
--- a/include/llvm/Analysis/MemorySSAUpdater.h
+++ b/include/llvm/Analysis/MemorySSAUpdater.h
@@ -1,9 +1,8 @@
 //===- MemorySSAUpdater.h - Memory SSA Updater-------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -32,6 +31,7 @@
 #ifndef LLVM_ANALYSIS_MEMORYSSAUPDATER_H
 #define LLVM_ANALYSIS_MEMORYSSAUPDATER_H
 
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -106,7 +106,12 @@ public:
   /// Update the MemoryPhi in `To` to have a single incoming edge from `From`,
   /// following a CFG change that replaced multiple edges (switch) with a direct
   /// branch.
-  void removeDuplicatePhiEdgesBetween(BasicBlock *From, BasicBlock *To);
+  void removeDuplicatePhiEdgesBetween(const BasicBlock *From,
+                                      const BasicBlock *To);
+  /// Update MemorySSA when inserting a unique backedge block for a loop.
+  void updatePhisWhenInsertingUniqueBackedgeBlock(BasicBlock *LoopHeader,
+                                                  BasicBlock *LoopPreheader,
+                                                  BasicBlock *BackedgeBlock);
   /// Update MemorySSA after a loop was cloned, given the blocks in RPO order,
   /// the exit blocks and a 1:1 mapping of all blocks and instructions
   /// cloned. This involves duplicating all defs and uses in the cloned blocks
@@ -222,14 +227,14 @@ public:
   /// associated with it is erased from the program.  For example, if a store or
   /// load is simply erased (not replaced), removeMemoryAccess should be called
   /// on the MemoryAccess for that store/load.
-  void removeMemoryAccess(MemoryAccess *);
+  void removeMemoryAccess(MemoryAccess *, bool OptimizePhis = false);
 
   /// Remove MemoryAccess for a given instruction, if a MemoryAccess exists.
   /// This should be called when an instruction (load/store) is deleted from
   /// the program.
-  void removeMemoryAccess(const Instruction *I) {
+  void removeMemoryAccess(const Instruction *I, bool OptimizePhis = false) {
     if (MemoryAccess *MA = MSSA->getMemoryAccess(I))
-      removeMemoryAccess(MA);
+      removeMemoryAccess(MA, OptimizePhis);
   }
 
   /// Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
@@ -239,7 +244,17 @@ public:
   /// Deleted blocks still have successor info, but their predecessor edges and
   /// Phi nodes may already be updated. Instructions in DeadBlocks should be
   /// deleted after this call.
-  void removeBlocks(const SmallPtrSetImpl<BasicBlock *> &DeadBlocks);
+  void removeBlocks(const SmallSetVector<BasicBlock *, 8> &DeadBlocks);
+
+  /// Instruction I will be changed to an unreachable. Remove all accesses in
+  /// I's block that follow I (inclusive), and update the Phis in the blocks'
+  /// successors.
+  void changeToUnreachable(const Instruction *I);
+
+  /// Conditional branch BI is changed or replaced with an unconditional branch
+  /// to `To`. Update Phis in BI's successors to remove BI's BB.
+  void changeCondBranchToUnconditionalTo(const BranchInst *BI,
+                                         const BasicBlock *To);
 
   /// Get handle on MemorySSA.
   MemorySSA* getMemorySSA() const { return MSSA; }
@@ -262,6 +277,7 @@ private:
   MemoryAccess *recursePhi(MemoryAccess *Phi);
   template <class RangeType>
   MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands);
+  void tryRemoveTrivialPhis(ArrayRef<WeakVH> UpdatedPHIs);
   void fixupDefs(const SmallVectorImpl<WeakVH> &);
   // Clone all uses and defs from BB to NewBB given a 1:1 map of all
   // instructions and blocks cloned, and a map of MemoryPhi : Definition
@@ -272,8 +288,14 @@ private:
   // not necessarily be MemoryPhis themselves, they may be MemoryDefs. As such,
   // the map is between MemoryPhis and MemoryAccesses, where the MemoryAccesses
   // may be MemoryPhis or MemoryDefs and not MemoryUses.
+  // If CloneWasSimplified = true, the clone was exact. Otherwise, assume that
+  // the clone involved simplifications that may have: (1) turned a MemoryUse
+  // into an instruction that MemorySSA has no representation for, or (2) turned
+  // a MemoryDef into a MemoryUse or an instruction that MemorySSA has no
+  // representation for. No other cases are supported.
   void cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
-                        const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap);
+                        const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap,
+                        bool CloneWasSimplified = false);
   template <typename Iter>
   void privateUpdateExitBlocksForClonedLoop(ArrayRef<BasicBlock *> ExitBlocks,
                                             Iter ValuesBegin, Iter ValuesEnd,
diff --git a/include/llvm/Analysis/ModuleSummaryAnalysis.h b/include/llvm/Analysis/ModuleSummaryAnalysis.h
index 9af7859cb4bf..1572a49e3384 100644
--- a/include/llvm/Analysis/ModuleSummaryAnalysis.h
+++ b/include/llvm/Analysis/ModuleSummaryAnalysis.h
@@ -1,9 +1,8 @@
 //===- ModuleSummaryAnalysis.h - Module summary index builder ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Analysis/MustExecute.h b/include/llvm/Analysis/MustExecute.h
index ad3222c17e62..3ef539c89d97 100644
--- a/include/llvm/Analysis/MustExecute.h
+++ b/include/llvm/Analysis/MustExecute.h
@@ -1,9 +1,8 @@
 //===- MustExecute.h - Is an instruction known to execute--------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Analysis/ObjCARCAliasAnalysis.h b/include/llvm/Analysis/ObjCARCAliasAnalysis.h
index 58a67042ea2d..b4f4e5f29768 100644
--- a/include/llvm/Analysis/ObjCARCAliasAnalysis.h
+++ b/include/llvm/Analysis/ObjCARCAliasAnalysis.h
@@ -1,9 +1,8 @@
 //===- ObjCARCAliasAnalysis.h - ObjC ARC Alias Analysis ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -53,14 +52,17 @@ public:
     return false;
   }
 
-  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
-  bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+                    AAQueryInfo &AAQI);
+  bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+                              bool OrLocal);
 
   using AAResultBase::getModRefBehavior;
   FunctionModRefBehavior getModRefBehavior(const Function *F);
 
   using AAResultBase::getModRefInfo;
-  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
+  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+                           AAQueryInfo &AAQI);
 };
 
 /// Analysis pass providing a never-invalidated alias analysis result.
diff --git a/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/include/llvm/Analysis/ObjCARCAnalysisUtils.h
index 1f497fab35da..522abd756c9f 100644
--- a/include/llvm/Analysis/ObjCARCAnalysisUtils.h
+++ b/include/llvm/Analysis/ObjCARCAnalysisUtils.h
@@ -1,9 +1,8 @@
 //===- ObjCARCAnalysisUtils.h - ObjC ARC Analysis Utilities -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Analysis/ObjCARCInstKind.h b/include/llvm/Analysis/ObjCARCInstKind.h
index 018ea1f851be..dc6093a7b86c 100644
--- a/include/llvm/Analysis/ObjCARCInstKind.h
+++ b/include/llvm/Analysis/ObjCARCInstKind.h
@@ -1,9 +1,8 @@
 //===- ObjCARCInstKind.h - ARC instruction equivalence classes --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -75,6 +74,10 @@ bool IsForwarding(ARCInstKind Class);
 /// passed a null pointer.
 bool IsNoopOnNull(ARCInstKind Class);
 
+/// Test if the given class represents instructions which do nothing if
+/// passed a global variable.
+bool IsNoopOnGlobal(ARCInstKind Class);
+
 /// Test if the given class represents instructions which are always safe
 /// to mark with the "tail" keyword.
 bool IsAlwaysTail(ARCInstKind Class);
diff --git a/include/llvm/Analysis/OptimizationRemarkEmitter.h b/include/llvm/Analysis/OptimizationRemarkEmitter.h
index fa838696e2f8..7b8404404ce7 100644
--- a/include/llvm/Analysis/OptimizationRemarkEmitter.h
+++ b/include/llvm/Analysis/OptimizationRemarkEmitter.h
@@ -1,9 +1,8 @@
 //===- OptimizationRemarkEmitter.h - Optimization Diagnostic ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -78,7 +77,7 @@ public:
     // remarks enabled. We can't currently check whether remarks are requested
     // for the calling pass since that requires actually building the remark.
 
-    if (F->getContext().getDiagnosticsOutputFile() ||
+    if (F->getContext().getRemarkStreamer() ||
         F->getContext().getDiagHandlerPtr()->isAnyRemarkEnabled()) {
       auto R = RemarkBuilder();
       emit((DiagnosticInfoOptimizationBase &)R);
@@ -93,7 +92,7 @@ public:
   /// provide more context so that non-trivial false positives can be quickly
   /// detected by the user.
   bool allowExtraAnalysis(StringRef PassName) const {
-    return (F->getContext().getDiagnosticsOutputFile() ||
+    return (F->getContext().getRemarkStreamer() ||
             F->getContext().getDiagHandlerPtr()->isAnyRemarkEnabled(PassName));
   }
 
diff --git a/include/llvm/Analysis/OrderedBasicBlock.h b/include/llvm/Analysis/OrderedBasicBlock.h
index 0776aa626005..ae64c0189f5e 100644
--- a/include/llvm/Analysis/OrderedBasicBlock.h
+++ b/include/llvm/Analysis/OrderedBasicBlock.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/OrderedBasicBlock.h --------------------- -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -60,6 +59,14 @@ public:
   /// only relevant to compare relative instructions positions inside \p BB.
   /// Returns false for A == B.
   bool dominates(const Instruction *A, const Instruction *B);
+
+  /// Remove \p from the ordering, if it is present.
+  void eraseInstruction(const Instruction *I);
+
+  /// Replace \p Old with \p New in the ordering. \p New is assigned the
+  /// numbering of \p Old, so it must be inserted at the same position in the
+  /// IR.
+  void replaceInstruction(const Instruction *Old, const Instruction *New);
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Analysis/OrderedInstructions.h b/include/llvm/Analysis/OrderedInstructions.h
index 7e3850b87c57..967b146b52de 100644
--- a/include/llvm/Analysis/OrderedInstructions.h
+++ b/include/llvm/Analysis/OrderedInstructions.h
@@ -1,9 +1,8 @@
 //===- llvm/Transforms/Utils/OrderedInstructions.h -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/PHITransAddr.h b/include/llvm/Analysis/PHITransAddr.h
index 0a335b6be6c7..54a07f053478 100644
--- a/include/llvm/Analysis/PHITransAddr.h
+++ b/include/llvm/Analysis/PHITransAddr.h
@@ -1,9 +1,8 @@
 //===- PHITransAddr.h - PHI Translation for Addresses -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h
index 081dd5000835..d9c97dff8c6e 100644
--- a/include/llvm/Analysis/Passes.h
+++ b/include/llvm/Analysis/Passes.h
@@ -1,9 +1,8 @@
 //===-- llvm/Analysis/Passes.h - Constructors for analyses ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/PhiValues.h b/include/llvm/Analysis/PhiValues.h
index 76204ac1bc6c..124fa2191694 100644
--- a/include/llvm/Analysis/PhiValues.h
+++ b/include/llvm/Analysis/PhiValues.h
@@ -1,9 +1,8 @@
 //===- PhiValues.h - Phi Value Analysis -------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/PostDominators.h b/include/llvm/Analysis/PostDominators.h
index f2dc8d135d71..87d2e0318d0a 100644
--- a/include/llvm/Analysis/PostDominators.h
+++ b/include/llvm/Analysis/PostDominators.h
@@ -1,9 +1,8 @@
 //=- llvm/Analysis/PostDominators.h - Post Dominator Calculation --*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/ProfileSummaryInfo.h b/include/llvm/Analysis/ProfileSummaryInfo.h
index 3aef4be72d71..f309d344b8d1 100644
--- a/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/ProfileSummaryInfo.h - profile summary ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -74,6 +73,12 @@ public:
            Summary->getKind() == ProfileSummary::PSK_Instr;
   }
 
+  /// Returns true if module \c M has context sensitive instrumentation profile.
+  bool hasCSInstrumentationProfile() {
+    return hasProfileSummary() &&
+           Summary->getKind() == ProfileSummary::PSK_CSInstr;
+  }
+
   /// Handle the invalidation of this information.
   ///
   /// When used as a result of \c ProfileSummaryAnalysis this method will be
@@ -87,7 +92,8 @@ public:
 
   /// Returns the profile count for \p CallInst.
   Optional<uint64_t> getProfileCount(const Instruction *CallInst,
-                                     BlockFrequencyInfo *BFI);
+                                     BlockFrequencyInfo *BFI,
+                                     bool AllowSynthetic = false);
   /// Returns true if the working set size of the code is considered huge.
   bool hasHugeWorkingSetSize();
   /// Returns true if \p F has hot function entry.
diff --git a/include/llvm/Analysis/PtrUseVisitor.h b/include/llvm/Analysis/PtrUseVisitor.h
index b34b25c75040..fbf04c841d30 100644
--- a/include/llvm/Analysis/PtrUseVisitor.h
+++ b/include/llvm/Analysis/PtrUseVisitor.h
@@ -1,9 +1,8 @@
 //===- PtrUseVisitor.h - InstVisitors over a pointers uses ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -257,6 +256,10 @@ protected:
     enqueueUsers(BC);
   }
 
+  void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
+    enqueueUsers(ASC);
+  }
+
   void visitPtrToIntInst(PtrToIntInst &I) {
     PI.setEscaped(&I);
   }
diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h
index 27f6cc197927..8bcc3e851200 100644
--- a/include/llvm/Analysis/RegionInfo.h
+++ b/include/llvm/Analysis/RegionInfo.h
@@ -1,9 +1,8 @@
 //===- RegionInfo.h - SESE region analysis ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/RegionInfoImpl.h b/include/llvm/Analysis/RegionInfoImpl.h
index 5904214aa925..c59c09dd2095 100644
--- a/include/llvm/Analysis/RegionInfoImpl.h
+++ b/include/llvm/Analysis/RegionInfoImpl.h
@@ -1,9 +1,8 @@
 //===- RegionInfoImpl.h - SESE region detection analysis --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Detects single entry single exit regions in the control flow graph.
diff --git a/include/llvm/Analysis/RegionIterator.h b/include/llvm/Analysis/RegionIterator.h
index 4fd92fcde20b..72bc5bbcb506 100644
--- a/include/llvm/Analysis/RegionIterator.h
+++ b/include/llvm/Analysis/RegionIterator.h
@@ -1,9 +1,8 @@
 //===- RegionIterator.h - Iterators to iteratate over Regions ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This file defines the iterators to iterate over the elements of a Region.
diff --git a/include/llvm/Analysis/RegionPass.h b/include/llvm/Analysis/RegionPass.h
index b3da91c89cbd..5b1864a37629 100644
--- a/include/llvm/Analysis/RegionPass.h
+++ b/include/llvm/Analysis/RegionPass.h
@@ -1,9 +1,8 @@
 //===- RegionPass.h - RegionPass class --------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/RegionPrinter.h b/include/llvm/Analysis/RegionPrinter.h
index e132eaea5674..154ac35c486a 100644
--- a/include/llvm/Analysis/RegionPrinter.h
+++ b/include/llvm/Analysis/RegionPrinter.h
@@ -1,9 +1,8 @@
 //===-- RegionPrinter.h - Region printer external interface -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 8f4200b07e5c..0bd98ef37e7a 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/ScalarEvolution.h - Scalar Evolution -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -85,6 +84,9 @@ class SCEV : public FoldingSetNode {
   const unsigned short SCEVType;
 
 protected:
+  // Estimated complexity of this node's expression tree size.
+  const unsigned short ExpressionSize;
+
   /// This field is initialized to zero and may be used in subclasses to store
   /// miscellaneous information.
   unsigned short SubclassData = 0;
@@ -116,8 +118,9 @@ public:
     NoWrapMask = (1 << 3) - 1
   };
 
-  explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy)
-      : FastID(ID), SCEVType(SCEVTy) {}
+  explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy,
+                unsigned short ExpressionSize)
+      : FastID(ID), SCEVType(SCEVTy), ExpressionSize(ExpressionSize) {}
   SCEV(const SCEV &) = delete;
   SCEV &operator=(const SCEV &) = delete;
 
@@ -138,6 +141,19 @@ public:
   /// Return true if the specified scev is negated, but not a constant.
   bool isNonConstantNegative() const;
 
+  // Returns estimated size of the mathematical expression represented by this
+  // SCEV. The rules of its calculation are following:
+  // 1) Size of a SCEV without operands (like constants and SCEVUnknown) is 1;
+  // 2) Size SCEV with operands Op1, Op2, ..., OpN is calculated by formula:
+  //    (1 + Size(Op1) + ... + Size(OpN)).
+  // This value gives us an estimation of time we need to traverse through this
+  // SCEV and all its operands recursively. We may use it to avoid performing
+  // heavy transformations on SCEVs of excessive size for sake of saving the
+  // compilation time.
+  unsigned short getExpressionSize() const {
+    return ExpressionSize;
+  }
+
   /// Print out the internal representation of this scalar to the specified
   /// stream.  This should really only be used for debugging purposes.
   void print(raw_ostream &OS) const;
@@ -521,7 +537,7 @@ public:
   const SCEV *getConstant(ConstantInt *V);
   const SCEV *getConstant(const APInt &Val);
   const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false);
-  const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty);
+  const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
   const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
   const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
   const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty);
@@ -582,6 +598,8 @@ public:
   /// \p IndexExprs The expressions for the indices.
   const SCEV *getGEPExpr(GEPOperator *GEP,
                          const SmallVectorImpl<const SCEV *> &IndexExprs);
+  const SCEV *getMinMaxExpr(unsigned Kind,
+                            SmallVectorImpl<const SCEV *> &Operands);
   const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS);
   const SCEV *getSMaxExpr(SmallVectorImpl<const SCEV *> &Operands);
   const SCEV *getUMaxExpr(const SCEV *LHS, const SCEV *RHS);
@@ -619,11 +637,13 @@ public:
 
   /// Return a SCEV corresponding to a conversion of the input value to the
   /// specified type.  If the type must be extended, it is zero extended.
-  const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty);
+  const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty,
+                                      unsigned Depth = 0);
 
   /// Return a SCEV corresponding to a conversion of the input value to the
   /// specified type.  If the type must be extended, it is sign extended.
-  const SCEV *getTruncateOrSignExtend(const SCEV *V, Type *Ty);
+  const SCEV *getTruncateOrSignExtend(const SCEV *V, Type *Ty,
+                                      unsigned Depth = 0);
 
   /// Return a SCEV corresponding to a conversion of the input value to the
   /// specified type.  If the type must be extended, it is zero extended.  The
@@ -726,9 +746,12 @@ public:
   unsigned getSmallConstantTripMultiple(const Loop *L,
                                         BasicBlock *ExitingBlock);
 
-  /// Get the expression for the number of loop iterations for which this loop
-  /// is guaranteed not to exit via ExitingBlock. Otherwise return
-  /// SCEVCouldNotCompute.
+  /// Return the number of times the backedge executes before the given exit
+  /// would be taken; if not exactly computable, return SCEVCouldNotCompute. 
+  /// For a single exit loop, this value is equivelent to the result of
+  /// getBackedgeTakenCount.  The loop is guaranteed to exit (via *some* exit)
+  /// before the backedge is executed (ExitCount + 1) times.  Note that there
+  /// is no guarantee about *which* exit is taken on the exiting iteration.  
   const SCEV *getExitCount(const Loop *L, BasicBlock *ExitingBlock);
 
   /// If the specified loop has a predictable backedge-taken count, return it,
@@ -764,6 +787,13 @@ public:
   /// backedge-taken count.
   bool hasLoopInvariantBackedgeTakenCount(const Loop *L);
 
+  // This method should be called by the client when it made any change that
+  // would invalidate SCEV's answers, and the client wants to remove all loop
+  // information held internally by ScalarEvolution. This is intended to be used
+  // when the alternative to forget a loop is too expensive (i.e. large loop
+  // bodies).
+  void forgetAllLoops();
+
   /// This method should be called by the client when it has changed a loop in
   /// a way that may effect ScalarEvolution's ability to compute a trip count,
   /// or if the loop is deleted.  This call is potentially expensive for large
@@ -1273,7 +1303,7 @@ private:
     using EdgeExitInfo = std::pair<BasicBlock *, ExitLimit>;
 
     /// Initialize BackedgeTakenInfo from a list of exact exit counts.
-    BackedgeTakenInfo(SmallVectorImpl<EdgeExitInfo> &&ExitCounts, bool Complete,
+    BackedgeTakenInfo(ArrayRef<EdgeExitInfo> ExitCounts, bool Complete,
                       const SCEV *MaxCount, bool MaxOrZero);
 
     /// Test whether this BackedgeTakenInfo contains any computed information,
@@ -1826,15 +1856,15 @@ private:
                           bool NoWrap);
 
   /// Get add expr already created or create a new one.
-  const SCEV *getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops,
+  const SCEV *getOrCreateAddExpr(ArrayRef<const SCEV *> Ops,
                                  SCEV::NoWrapFlags Flags);
 
   /// Get mul expr already created or create a new one.
-  const SCEV *getOrCreateMulExpr(SmallVectorImpl<const SCEV *> &Ops,
+  const SCEV *getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
                                  SCEV::NoWrapFlags Flags);
 
   // Get addrec expr already created or create a new one.
-  const SCEV *getOrCreateAddRecExpr(SmallVectorImpl<const SCEV *> &Ops,
+  const SCEV *getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
                                     const Loop *L, SCEV::NoWrapFlags Flags);
 
   /// Return x if \p Val is f(x) where f is a 1-1 function.
@@ -1853,6 +1883,16 @@ private:
   /// Assign A and B to LHS and RHS, respectively.
   bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS);
 
+  /// Look for a SCEV expression with type `SCEVType` and operands `Ops` in
+  /// `UniqueSCEVs`.
+  ///
+  /// The first component of the returned tuple is the SCEV if found and null
+  /// otherwise.  The second component is the `FoldingSetNodeID` that was
+  /// constructed to look up the SCEV and the third component is the insertion
+  /// point.
+  std::tuple<const SCEV *, FoldingSetNodeID, void *>
+  findExistingSCEVInCache(int SCEVType, ArrayRef<const SCEV *> Ops);
+
   FoldingSet<SCEV> UniqueSCEVs;
   FoldingSet<SCEVPredicate> UniquePreds;
   BumpPtrAllocator SCEVAllocator;
diff --git a/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h b/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
index 329be51e5eac..98d53237d4a0 100644
--- a/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
+++ b/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
@@ -1,9 +1,8 @@
 //===- ScalarEvolutionAliasAnalysis.h - SCEV-based AA -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -31,7 +30,8 @@ public:
   explicit SCEVAAResult(ScalarEvolution &SE) : AAResultBase(), SE(SE) {}
   SCEVAAResult(SCEVAAResult &&Arg) : AAResultBase(std::move(Arg)), SE(Arg.SE) {}
 
-  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+                    AAQueryInfo &AAQI);
 
 private:
   Value *GetBaseValue(const SCEV *S);
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 58d42680d6bc..a519f93216b3 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -1,9 +1,8 @@
 //===---- llvm/Analysis/ScalarEvolutionExpander.h - SCEV Exprs --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -316,8 +315,10 @@ namespace llvm {
                                    SmallPtrSetImpl<const SCEV *> &Processed);
 
     /// Insert the specified binary operator, doing a small amount of work to
-    /// avoid inserting an obviously redundant operation.
-    Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS);
+    /// avoid inserting an obviously redundant operation, and hoisting to an
+    /// outer loop when the opportunity is there and it is safe.
+    Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS,
+                       SCEV::NoWrapFlags Flags, bool IsSafeToHoist);
 
     /// Arrange for there to be a cast of V to Ty at IP, reusing an existing
     /// cast if a suitable one exists, moving an existing cast if a suitable one
@@ -368,6 +369,10 @@ namespace llvm {
 
     Value *visitUMaxExpr(const SCEVUMaxExpr *S);
 
+    Value *visitSMinExpr(const SCEVSMinExpr *S);
+
+    Value *visitUMinExpr(const SCEVUMinExpr *S);
+
     Value *visitUnknown(const SCEVUnknown *S) {
       return S->getValue();
     }
diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
index 42e76094eb2b..d008af7b7e6f 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/ScalarEvolutionExpressions.h - SCEV Exprs --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -40,7 +39,7 @@ class Type;
     // These should be ordered in terms of increasing complexity to make the
     // folders simpler.
     scConstant, scTruncate, scZeroExtend, scSignExtend, scAddExpr, scMulExpr,
-    scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr,
+    scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, scUMinExpr, scSMinExpr,
     scUnknown, scCouldNotCompute
   };
 
@@ -51,7 +50,7 @@ class Type;
     ConstantInt *V;
 
     SCEVConstant(const FoldingSetNodeIDRef ID, ConstantInt *v) :
-      SCEV(ID, scConstant), V(v) {}
+      SCEV(ID, scConstant, 1), V(v) {}
 
   public:
     ConstantInt *getValue() const { return V; }
@@ -65,6 +64,13 @@ class Type;
     }
   };
 
+  static unsigned short computeExpressionSize(ArrayRef<const SCEV *> Args) {
+    APInt Size(16, 1);
+    for (auto *Arg : Args)
+      Size = Size.uadd_sat(APInt(16, Arg->getExpressionSize()));
+    return (unsigned short)Size.getZExtValue();
+  }
+
   /// This is the base class for unary cast operator classes.
   class SCEVCastExpr : public SCEV {
   protected:
@@ -142,9 +148,10 @@ class Type;
     const SCEV *const *Operands;
     size_t NumOperands;
 
-    SCEVNAryExpr(const FoldingSetNodeIDRef ID,
-                 enum SCEVTypes T, const SCEV *const *O, size_t N)
-      : SCEV(ID, T), Operands(O), NumOperands(N) {}
+    SCEVNAryExpr(const FoldingSetNodeIDRef ID, enum SCEVTypes T,
+                 const SCEV *const *O, size_t N)
+        : SCEV(ID, T, computeExpressionSize(makeArrayRef(O, N))), Operands(O),
+          NumOperands(N) {}
 
   public:
     size_t getNumOperands() const { return NumOperands; }
@@ -183,10 +190,9 @@ class Type;
 
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     static bool classof(const SCEV *S) {
-      return S->getSCEVType() == scAddExpr ||
-             S->getSCEVType() == scMulExpr ||
-             S->getSCEVType() == scSMaxExpr ||
-             S->getSCEVType() == scUMaxExpr ||
+      return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr ||
+             S->getSCEVType() == scSMaxExpr || S->getSCEVType() == scUMaxExpr ||
+             S->getSCEVType() == scSMinExpr || S->getSCEVType() == scUMinExpr ||
              S->getSCEVType() == scAddRecExpr;
     }
   };
@@ -201,10 +207,9 @@ class Type;
   public:
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     static bool classof(const SCEV *S) {
-      return S->getSCEVType() == scAddExpr ||
-             S->getSCEVType() == scMulExpr ||
-             S->getSCEVType() == scSMaxExpr ||
-             S->getSCEVType() == scUMaxExpr;
+      return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr ||
+             S->getSCEVType() == scSMaxExpr || S->getSCEVType() == scUMaxExpr ||
+             S->getSCEVType() == scSMinExpr || S->getSCEVType() == scUMinExpr;
     }
 
     /// Set flags for a non-recurrence without clearing previously set flags.
@@ -258,7 +263,8 @@ class Type;
     const SCEV *RHS;
 
     SCEVUDivExpr(const FoldingSetNodeIDRef ID, const SCEV *lhs, const SCEV *rhs)
-      : SCEV(ID, scUDivExpr), LHS(lhs), RHS(rhs) {}
+        : SCEV(ID, scUDivExpr, computeExpressionSize({lhs, rhs})), LHS(lhs),
+          RHS(rhs) {}
 
   public:
     const SCEV *getLHS() const { return LHS; }
@@ -358,17 +364,53 @@ class Type;
     }
   };
 
-  /// This class represents a signed maximum selection.
-  class SCEVSMaxExpr : public SCEVCommutativeExpr {
+  /// This node is the base class min/max selections.
+  class SCEVMinMaxExpr : public SCEVCommutativeExpr {
     friend class ScalarEvolution;
 
-    SCEVSMaxExpr(const FoldingSetNodeIDRef ID,
-                 const SCEV *const *O, size_t N)
-      : SCEVCommutativeExpr(ID, scSMaxExpr, O, N) {
-      // Max never overflows.
+    static bool isMinMaxType(enum SCEVTypes T) {
+      return T == scSMaxExpr || T == scUMaxExpr || T == scSMinExpr ||
+             T == scUMinExpr;
+    }
+
+  protected:
+    /// Note: Constructing subclasses via this constructor is allowed
+    SCEVMinMaxExpr(const FoldingSetNodeIDRef ID, enum SCEVTypes T,
+                   const SCEV *const *O, size_t N)
+        : SCEVCommutativeExpr(ID, T, O, N) {
+      assert(isMinMaxType(T));
+      // Min and max never overflow
       setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW));
     }
 
+  public:
+    static bool classof(const SCEV *S) {
+      return isMinMaxType(static_cast<SCEVTypes>(S->getSCEVType()));
+    }
+
+    static enum SCEVTypes negate(enum SCEVTypes T) {
+      switch (T) {
+      case scSMaxExpr:
+        return scSMinExpr;
+      case scSMinExpr:
+        return scSMaxExpr;
+      case scUMaxExpr:
+        return scUMinExpr;
+      case scUMinExpr:
+        return scUMaxExpr;
+      default:
+        llvm_unreachable("Not a min or max SCEV type!");
+      }
+    }
+  };
+
+  /// This class represents a signed maximum selection.
+  class SCEVSMaxExpr : public SCEVMinMaxExpr {
+    friend class ScalarEvolution;
+
+    SCEVSMaxExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
+        : SCEVMinMaxExpr(ID, scSMaxExpr, O, N) {}
+
   public:
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
     static bool classof(const SCEV *S) {
@@ -377,15 +419,11 @@ class Type;
   };
 
   /// This class represents an unsigned maximum selection.
-  class SCEVUMaxExpr : public SCEVCommutativeExpr {
+  class SCEVUMaxExpr : public SCEVMinMaxExpr {
     friend class ScalarEvolution;
 
-    SCEVUMaxExpr(const FoldingSetNodeIDRef ID,
-                 const SCEV *const *O, size_t N)
-      : SCEVCommutativeExpr(ID, scUMaxExpr, O, N) {
-      // Max never overflows.
-      setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW));
-    }
+    SCEVUMaxExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
+        : SCEVMinMaxExpr(ID, scUMaxExpr, O, N) {}
 
   public:
     /// Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -394,6 +432,34 @@ class Type;
     }
   };
 
+  /// This class represents a signed minimum selection.
+  class SCEVSMinExpr : public SCEVMinMaxExpr {
+    friend class ScalarEvolution;
+
+    SCEVSMinExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
+        : SCEVMinMaxExpr(ID, scSMinExpr, O, N) {}
+
+  public:
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static bool classof(const SCEV *S) {
+      return S->getSCEVType() == scSMinExpr;
+    }
+  };
+
+  /// This class represents an unsigned minimum selection.
+  class SCEVUMinExpr : public SCEVMinMaxExpr {
+    friend class ScalarEvolution;
+
+    SCEVUMinExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
+        : SCEVMinMaxExpr(ID, scUMinExpr, O, N) {}
+
+  public:
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static bool classof(const SCEV *S) {
+      return S->getSCEVType() == scUMinExpr;
+    }
+  };
+
   /// This means that we are dealing with an entirely unknown SCEV
   /// value, and only represent it as its LLVM Value.  This is the
   /// "bottom" value for the analysis.
@@ -411,7 +477,7 @@ class Type;
 
     SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V,
                 ScalarEvolution *se, SCEVUnknown *next) :
-      SCEV(ID, scUnknown), CallbackVH(V), SE(se), Next(next) {}
+      SCEV(ID, scUnknown, 1), CallbackVH(V), SE(se), Next(next) {}
 
     // Implement CallbackVH.
     void deleted() override;
@@ -466,6 +532,10 @@ class Type;
         return ((SC*)this)->visitSMaxExpr((const SCEVSMaxExpr*)S);
       case scUMaxExpr:
         return ((SC*)this)->visitUMaxExpr((const SCEVUMaxExpr*)S);
+      case scSMinExpr:
+        return ((SC *)this)->visitSMinExpr((const SCEVSMinExpr *)S);
+      case scUMinExpr:
+        return ((SC *)this)->visitUMinExpr((const SCEVUMinExpr *)S);
       case scUnknown:
         return ((SC*)this)->visitUnknown((const SCEVUnknown*)S);
       case scCouldNotCompute:
@@ -519,6 +589,8 @@ class Type;
         case scMulExpr:
         case scSMaxExpr:
         case scUMaxExpr:
+        case scSMinExpr:
+        case scUMinExpr:
         case scAddRecExpr:
           for (const auto *Op : cast<SCEVNAryExpr>(S)->operands())
             push(Op);
@@ -681,6 +753,26 @@ class Type;
       return !Changed ? Expr : SE.getUMaxExpr(Operands);
     }
 
+    const SCEV *visitSMinExpr(const SCEVSMinExpr *Expr) {
+      SmallVector<const SCEV *, 2> Operands;
+      bool Changed = false;
+      for (auto *Op : Expr->operands()) {
+        Operands.push_back(((SC *)this)->visit(Op));
+        Changed |= Op != Operands.back();
+      }
+      return !Changed ? Expr : SE.getSMinExpr(Operands);
+    }
+
+    const SCEV *visitUMinExpr(const SCEVUMinExpr *Expr) {
+      SmallVector<const SCEV *, 2> Operands;
+      bool Changed = false;
+      for (auto *Op : Expr->operands()) {
+        Operands.push_back(((SC *)this)->visit(Op));
+        Changed |= Op != Operands.back();
+      }
+      return !Changed ? Expr : SE.getUMinExpr(Operands);
+    }
+
     const SCEV *visitUnknown(const SCEVUnknown *Expr) {
       return Expr;
     }
diff --git a/include/llvm/Analysis/ScalarEvolutionNormalization.h b/include/llvm/Analysis/ScalarEvolutionNormalization.h
index 51c92121c8f0..1a05594a46ec 100644
--- a/include/llvm/Analysis/ScalarEvolutionNormalization.h
+++ b/include/llvm/Analysis/ScalarEvolutionNormalization.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/ScalarEvolutionNormalization.h - See below -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/ScopedNoAliasAA.h b/include/llvm/Analysis/ScopedNoAliasAA.h
index 1356c6e9198a..dae733bd2015 100644
--- a/include/llvm/Analysis/ScopedNoAliasAA.h
+++ b/include/llvm/Analysis/ScopedNoAliasAA.h
@@ -1,9 +1,8 @@
 //===- ScopedNoAliasAA.h - Scoped No-Alias Alias Analysis -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -40,9 +39,12 @@ public:
     return false;
   }
 
-  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
-  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
-  ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2);
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+                    AAQueryInfo &AAQI);
+  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+                           AAQueryInfo &AAQI);
+  ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+                           AAQueryInfo &AAQI);
 
 private:
   bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
diff --git a/include/llvm/Analysis/SparsePropagation.h b/include/llvm/Analysis/SparsePropagation.h
index 02a2e64268b7..fac92e4a25a4 100644
--- a/include/llvm/Analysis/SparsePropagation.h
+++ b/include/llvm/Analysis/SparsePropagation.h
@@ -1,9 +1,8 @@
 //===- SparsePropagation.h - Sparse Conditional Property Propagation ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -330,12 +329,8 @@ void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::getFeasibleSuccessors(
     return;
   }
 
-  if (TI.isExceptionalTerminator()) {
-    Succs.assign(Succs.size(), true);
-    return;
-  }
-
-  if (isa<IndirectBrInst>(TI)) {
+  if (TI.isExceptionalTerminator() ||
+      TI.isIndirectTerminator()) {
     Succs.assign(Succs.size(), true);
     return;
   }
diff --git a/include/llvm/Analysis/StackSafetyAnalysis.h b/include/llvm/Analysis/StackSafetyAnalysis.h
index 8a151650a34c..f9d8b08ac142 100644
--- a/include/llvm/Analysis/StackSafetyAnalysis.h
+++ b/include/llvm/Analysis/StackSafetyAnalysis.h
@@ -1,9 +1,8 @@
 //===- StackSafetyAnalysis.h - Stack memory safety analysis -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/SyncDependenceAnalysis.h b/include/llvm/Analysis/SyncDependenceAnalysis.h
index df693d9d8e8c..099403b47757 100644
--- a/include/llvm/Analysis/SyncDependenceAnalysis.h
+++ b/include/llvm/Analysis/SyncDependenceAnalysis.h
@@ -1,9 +1,8 @@
 //===- SyncDependenceAnalysis.h - Divergent Branch Dependence -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/SyntheticCountsUtils.h b/include/llvm/Analysis/SyntheticCountsUtils.h
index db80bef001e2..b9b4c98bfc35 100644
--- a/include/llvm/Analysis/SyntheticCountsUtils.h
+++ b/include/llvm/Analysis/SyntheticCountsUtils.h
@@ -1,9 +1,8 @@
 //===- SyntheticCountsUtils.h - utilities for count propagation--*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/TargetFolder.h b/include/llvm/Analysis/TargetFolder.h
index ae75d3773362..7ab6562be440 100644
--- a/include/llvm/Analysis/TargetFolder.h
+++ b/include/llvm/Analysis/TargetFolder.h
@@ -1,9 +1,8 @@
 //====- TargetFolder.h - Constant folding helper ---------------*- C++ -*-====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -125,6 +124,10 @@ public:
     return Fold(ConstantExpr::getNot(C));
   }
 
+  Constant *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const {
+    return Fold(ConstantExpr::get(Opc, C));
+  }
+
   //===--------------------------------------------------------------------===//
   // Memory Instructions
   //===--------------------------------------------------------------------===//
diff --git a/include/llvm/Analysis/TargetLibraryInfo.def b/include/llvm/Analysis/TargetLibraryInfo.def
index 518a85ee1a01..afed404f04c0 100644
--- a/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/include/llvm/Analysis/TargetLibraryInfo.def
@@ -1,9 +1,8 @@
 //===-- TargetLibraryInfo.def - Library information -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -12,6 +11,15 @@
 // Which is defined depends on whether TLI_DEFINE_ENUM is defined or
 // TLI_DEFINE_STRING is defined. Only one should be defined at a time.
 
+// NOTE: The nofree attribute is added to Libfuncs which are not
+// listed as free or realloc functions in MemoryBuiltins.cpp
+//
+// When adding a function which frees memory include the LibFunc
+// in lib/Analysis/MemoryBuiltins.cpp "isLibFreeFunction".
+//
+// When adding a LibFunc which reallocates memory include the LibFunc
+// in lib/Analysis/MemoryBuiltins.cpp "AllocationFnData[]".
+
 #if !(defined(TLI_DEFINE_ENUM) || defined(TLI_DEFINE_STRING))
 #error "Must define TLI_DEFINE_ENUM or TLI_DEFINE_STRING for TLI .def."
 #elif defined(TLI_DEFINE_ENUM) && defined(TLI_DEFINE_STRING)
@@ -330,6 +338,10 @@ TLI_DEFINE_STRING_INTERNAL("__logf_finite")
 /// long double __logl_finite(long double x);
 TLI_DEFINE_ENUM_INTERNAL(logl_finite)
 TLI_DEFINE_STRING_INTERNAL("__logl_finite")
+/// void *__memccpy_chk(void *dst, const void *src, int c, size_t n,
+///                     size_t dstsize)
+TLI_DEFINE_ENUM_INTERNAL(memccpy_chk)
+TLI_DEFINE_STRING_INTERNAL("__memccpy_chk")
 /// void *__memcpy_chk(void *s1, const void *s2, size_t n, size_t s1size);
 TLI_DEFINE_ENUM_INTERNAL(memcpy_chk)
 TLI_DEFINE_STRING_INTERNAL("__memcpy_chk")
@@ -373,6 +385,23 @@ TLI_DEFINE_STRING_INTERNAL("__sinpi")
 /// float __sinpif(float x);
 TLI_DEFINE_ENUM_INTERNAL(sinpif)
 TLI_DEFINE_STRING_INTERNAL("__sinpif")
+/// int __small_fprintf(FILE *stream, const char *format, ...);
+TLI_DEFINE_ENUM_INTERNAL(small_fprintf)
+TLI_DEFINE_STRING_INTERNAL("__small_fprintf")
+/// int __small_printf(const char *format, ...);
+TLI_DEFINE_ENUM_INTERNAL(small_printf)
+TLI_DEFINE_STRING_INTERNAL("__small_printf")
+/// int __small_sprintf(char *str, const char *format, ...);
+TLI_DEFINE_ENUM_INTERNAL(small_sprintf)
+TLI_DEFINE_STRING_INTERNAL("__small_sprintf")
+/// int __snprintf_chk(char *s, size_t n, int flags, size_t slen,
+///                    const char *format, ...);
+TLI_DEFINE_ENUM_INTERNAL(snprintf_chk)
+TLI_DEFINE_STRING_INTERNAL("__snprintf_chk")
+/// int __sprintf_chk(char *str, int flags, size_t str_len,
+///                   const char *format, ...);
+TLI_DEFINE_ENUM_INTERNAL(sprintf_chk)
+TLI_DEFINE_STRING_INTERNAL("__sprintf_chk")
 /// double __sqrt_finite(double x);
 TLI_DEFINE_ENUM_INTERNAL(sqrt_finite)
 TLI_DEFINE_STRING_INTERNAL("__sqrt_finite")
@@ -388,12 +417,26 @@ TLI_DEFINE_STRING_INTERNAL("__stpcpy_chk")
 /// char *__stpncpy_chk(char *s1, const char *s2, size_t n, size_t s1size);
 TLI_DEFINE_ENUM_INTERNAL(stpncpy_chk)
 TLI_DEFINE_STRING_INTERNAL("__stpncpy_chk")
+/// char *__strcat_chk(char *s1, const char *s2, size_t s1size);
+TLI_DEFINE_ENUM_INTERNAL(strcat_chk)
+TLI_DEFINE_STRING_INTERNAL("__strcat_chk")
 /// char *__strcpy_chk(char *s1, const char *s2, size_t s1size);
 TLI_DEFINE_ENUM_INTERNAL(strcpy_chk)
 TLI_DEFINE_STRING_INTERNAL("__strcpy_chk")
 /// char * __strdup(const char *s);
 TLI_DEFINE_ENUM_INTERNAL(dunder_strdup)
 TLI_DEFINE_STRING_INTERNAL("__strdup")
+/// size_t __strlcat_chk(char *dst, const char *src, size_t size,
+///                      size_t dstsize);
+TLI_DEFINE_ENUM_INTERNAL(strlcat_chk)
+TLI_DEFINE_STRING_INTERNAL("__strlcat_chk")
+/// size_t __strlcpy_chk(char *dst, const char *src, size_t size,
+///                      size_t dstsize);
+TLI_DEFINE_ENUM_INTERNAL(strlcpy_chk)
+TLI_DEFINE_STRING_INTERNAL("__strlcpy_chk")
+/// char *strncat_chk(char *s1, const char *s2, size_t n, size_t s1size);
+TLI_DEFINE_ENUM_INTERNAL(strncat_chk)
+TLI_DEFINE_STRING_INTERNAL("__strncat_chk")
 /// char *__strncpy_chk(char *s1, const char *s2, size_t n, size_t s1size);
 TLI_DEFINE_ENUM_INTERNAL(strncpy_chk)
 TLI_DEFINE_STRING_INTERNAL("__strncpy_chk")
@@ -403,6 +446,14 @@ TLI_DEFINE_STRING_INTERNAL("__strndup")
 /// char * __strtok_r(char *s, const char *delim, char **save_ptr);
 TLI_DEFINE_ENUM_INTERNAL(dunder_strtok_r)
 TLI_DEFINE_STRING_INTERNAL("__strtok_r")
+/// int __vsnprintf_chk(char *s, size_t n, int flags, size_t slen,
+///                     const char *format, va_list ap);
+TLI_DEFINE_ENUM_INTERNAL(vsnprintf_chk)
+TLI_DEFINE_STRING_INTERNAL("__vsnprintf_chk")
+/// int __vsprintf_chk(char *s, int flags, size_t slen, const char *format,
+///                    va_list ap);
+TLI_DEFINE_ENUM_INTERNAL(vsprintf_chk)
+TLI_DEFINE_STRING_INTERNAL("__vsprintf_chk")
 /// int abs(int j);
 TLI_DEFINE_ENUM_INTERNAL(abs)
 TLI_DEFINE_STRING_INTERNAL("abs")
@@ -1192,6 +1243,12 @@ TLI_DEFINE_STRING_INTERNAL("strcspn")
 /// char *strdup(const char *s1);
 TLI_DEFINE_ENUM_INTERNAL(strdup)
 TLI_DEFINE_STRING_INTERNAL("strdup")
+/// size_t strlcat(char *dst, const char *src, size_t size);
+TLI_DEFINE_ENUM_INTERNAL(strlcat)
+TLI_DEFINE_STRING_INTERNAL("strlcat")
+/// size_t strlcpy(char *dst, const char *src, size_t size);
+TLI_DEFINE_ENUM_INTERNAL(strlcpy)
+TLI_DEFINE_STRING_INTERNAL("strlcpy")
 /// size_t strlen(const char *s);
 TLI_DEFINE_ENUM_INTERNAL(strlen)
 TLI_DEFINE_STRING_INTERNAL("strlen")
diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h
index a3fe834022f7..4b5200f5a838 100644
--- a/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/include/llvm/Analysis/TargetLibraryInfo.h
@@ -1,9 +1,8 @@
 //===-- TargetLibraryInfo.h - Library information ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -87,6 +86,7 @@ public:
   enum VectorLibrary {
     NoLibrary,  // Don't use any vector library.
     Accelerate, // Use Accelerate framework.
+    MASSV,      // IBM MASS vector library.
     SVML        // Intel short vector math library.
   };
 
@@ -281,9 +281,9 @@ public:
     case LibFunc_trunc:        case LibFunc_truncf:     case LibFunc_truncl:
     case LibFunc_log2:         case LibFunc_log2f:      case LibFunc_log2l:
     case LibFunc_exp2:         case LibFunc_exp2f:      case LibFunc_exp2l:
-    case LibFunc_memcmp:       case LibFunc_strcmp:     case LibFunc_strcpy:
-    case LibFunc_stpcpy:       case LibFunc_strlen:     case LibFunc_strnlen:
-    case LibFunc_memchr:       case LibFunc_mempcpy:
+    case LibFunc_memcmp:       case LibFunc_bcmp:       case LibFunc_strcmp:
+    case LibFunc_strcpy:       case LibFunc_stpcpy:     case LibFunc_strlen:
+    case LibFunc_strnlen:      case LibFunc_memchr:     case LibFunc_mempcpy:
       return true;
     }
     return false;
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index 223175d17c2d..7574b811bc1c 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -1,9 +1,8 @@
 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -28,6 +27,10 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/Analysis/AssumptionCache.h"
 #include <functional>
 
 namespace llvm {
@@ -36,6 +39,8 @@ namespace Intrinsic {
 enum ID : unsigned;
 }
 
+class AssumptionCache;
+class BranchInst;
 class Function;
 class GlobalValue;
 class IntrinsicInst;
@@ -45,6 +50,7 @@ class SCEV;
 class ScalarEvolution;
 class StoreInst;
 class SwitchInst;
+class TargetLibraryInfo;
 class Type;
 class User;
 class Value;
@@ -73,6 +79,30 @@ struct MemIntrinsicInfo {
   }
 };
 
+/// Attributes of a target dependent hardware loop.
+struct HardwareLoopInfo {
+  HardwareLoopInfo() = delete;
+  HardwareLoopInfo(Loop *L) : L(L) {}
+  Loop *L = nullptr;
+  BasicBlock *ExitBlock = nullptr;
+  BranchInst *ExitBranch = nullptr;
+  const SCEV *ExitCount = nullptr;
+  IntegerType *CountType = nullptr;
+  Value *LoopDecrement = nullptr; // Decrement the loop counter by this
+                                  // value in every iteration.
+  bool IsNestingLegal = false;    // Can a hardware loop be a parent to
+                                  // another hardware loop?
+  bool CounterInReg = false;      // Should loop counter be updated in
+                                  // the loop via a phi?
+  bool PerformEntryTest = false;  // Generate the intrinsic which also performs
+                                  // icmp ne zero on the loop counter value and
+                                  // produces an i1 to guard the loop entry.
+  bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
+                               DominatorTree &DT, bool ForceNestedLoop = false,
+                               bool ForceHardwareLoopPHI = false);
+  bool canAnalyze(LoopInfo &LI);
+};
+
 /// This pass provides access to the codegen interfaces that are needed
 /// for IR-level transformations.
 class TargetTransformInfo {
@@ -81,7 +111,7 @@ public:
   /// API below.
   ///
   /// This is used by targets to construct a TTI wrapping their target-specific
-  /// implementaion that encodes appropriate costs for their target.
+  /// implementation that encodes appropriate costs for their target.
   template <typename T> TargetTransformInfo(T Impl);
 
   /// Construct a baseline TTI object using a minimal implementation of
@@ -209,18 +239,21 @@ public:
   /// This is the most basic query for estimating call cost: it only knows the
   /// function type and (potentially) the number of arguments at the call site.
   /// The latter is only interesting for varargs function types.
-  int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
+  int getCallCost(FunctionType *FTy, int NumArgs = -1,
+                  const User *U = nullptr) const;
 
   /// Estimate the cost of calling a specific function when lowered.
   ///
   /// This overload adds the ability to reason about the particular function
   /// being called in the event it is a library call with special lowering.
-  int getCallCost(const Function *F, int NumArgs = -1) const;
+  int getCallCost(const Function *F, int NumArgs = -1,
+                  const User *U = nullptr) const;
 
   /// Estimate the cost of calling a specific function when lowered.
   ///
   /// This overload allows specifying a set of candidate argument values.
-  int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
+  int getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
+                  const User *U = nullptr) const;
 
   /// \returns A value by which our inlining threshold should be multiplied.
   /// This is primarily used to bump up the inlining threshold wholesale on
@@ -230,17 +263,35 @@ public:
   /// individual classes of instructions would be better.
   unsigned getInliningThresholdMultiplier() const;
 
+  /// \returns Vector bonus in percent.
+  ///
+  /// Vector bonuses: We want to more aggressively inline vector-dense kernels
+  /// and apply this bonus based on the percentage of vector instructions. A
+  /// bonus is applied if the vector instructions exceed 50% and half that amount
+  /// is applied if it exceeds 10%. Note that these bonuses are some what
+  /// arbitrary and evolved over time by accident as much as because they are
+  /// principled bonuses.
+  /// FIXME: It would be nice to base the bonus values on something more
+  /// scientific. A target may has no bonus on vector instructions.
+  int getInlinerVectorBonusPercent() const;
+
   /// Estimate the cost of an intrinsic when lowered.
   ///
   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                       ArrayRef<Type *> ParamTys) const;
+                       ArrayRef<Type *> ParamTys,
+                       const User *U = nullptr) const;
 
   /// Estimate the cost of an intrinsic when lowered.
   ///
   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                       ArrayRef<const Value *> Arguments) const;
+                       ArrayRef<const Value *> Arguments,
+                       const User *U = nullptr) const;
+
+  /// \return the expected cost of a memcpy, which could e.g. depend on the
+  /// source/destination type and alignment and the number of bytes copied.
+  int getMemcpyCost(const Instruction *I) const;
 
   /// \return The estimated number of case clusters when lowering \p 'SI'.
   /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
@@ -296,7 +347,7 @@ public:
 
   // Returns true for the target specific
   // set of operations which produce uniform result
-  // even taking non-unform arguments
+  // even taking non-uniform arguments
   bool isAlwaysUniform(const Value *V) const;
 
   /// Returns the address space ID for a target's 'flat' address space. Note
@@ -437,6 +488,13 @@ public:
   void getUnrollingPreferences(Loop *L, ScalarEvolution &,
                                UnrollingPreferences &UP) const;
 
+  /// Query the target whether it would be profitable to convert the given loop
+  /// into a hardware loop.
+  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+                                AssumptionCache &AC,
+                                TargetLibraryInfo *LibInfo,
+                                HardwareLoopInfo &HWLoopInfo) const;
+
   /// @}
 
   /// \name Scalar Target Information
@@ -483,21 +541,40 @@ public:
   /// calculation for the instructions in a loop.
   bool canMacroFuseCmp() const;
 
+  /// Return true if the target can save a compare for loop count, for example
+  /// hardware loop saves a compare.
+  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
+                  DominatorTree *DT, AssumptionCache *AC,
+                  TargetLibraryInfo *LibInfo) const;
+
   /// \return True is LSR should make efforts to create/preserve post-inc
   /// addressing mode expressions.
   bool shouldFavorPostInc() const;
 
-  /// Return true if the target supports masked load/store
-  /// AVX2 and AVX-512 targets allow masks for consecutive load and store
+  /// Return true if LSR should make efforts to generate indexed addressing
+  /// modes that operate across loop iterations.
+  bool shouldFavorBackedgeIndex(const Loop *L) const;
+
+  /// Return true if the target supports masked load.
   bool isLegalMaskedStore(Type *DataType) const;
+  /// Return true if the target supports masked store.
   bool isLegalMaskedLoad(Type *DataType) const;
 
-  /// Return true if the target supports masked gather/scatter
-  /// AVX-512 fully supports gather and scatter for vectors with 32 and 64
-  /// bits scalar type.
+  /// Return true if the target supports nontemporal store.
+  bool isLegalNTStore(Type *DataType, unsigned Alignment) const;
+  /// Return true if the target supports nontemporal load.
+  bool isLegalNTLoad(Type *DataType, unsigned Alignment) const;
+
+  /// Return true if the target supports masked scatter.
   bool isLegalMaskedScatter(Type *DataType) const;
+  /// Return true if the target supports masked gather.
   bool isLegalMaskedGather(Type *DataType) const;
 
+  /// Return true if the target supports masked compress store.
+  bool isLegalMaskedCompressStore(Type *DataType) const;
+  /// Return true if the target supports masked expand load.
+  bool isLegalMaskedExpandLoad(Type *DataType) const;
+
   /// Return true if the target has a unified operation to calculate division
   /// and remainder. If so, the additional implicit multiplication and
   /// subtraction required to calculate a remainder from division are free. This
@@ -576,17 +653,35 @@ public:
   /// Don't restrict interleaved unrolling to small loops.
   bool enableAggressiveInterleaving(bool LoopHasReductions) const;
 
-  /// If not nullptr, enable inline expansion of memcmp. IsZeroCmp is
-  /// true if this is the expansion of memcmp(p1, p2, s) == 0.
+  /// Returns options for expansion of memcmp. IsZeroCmp is
+  // true if this is the expansion of memcmp(p1, p2, s) == 0.
   struct MemCmpExpansionOptions {
+    // Return true if memcmp expansion is enabled.
+    operator bool() const { return MaxNumLoads > 0; }
+
+    // Maximum number of load operations.
+    unsigned MaxNumLoads = 0;
+
     // The list of available load sizes (in bytes), sorted in decreasing order.
     SmallVector<unsigned, 8> LoadSizes;
+
+    // For memcmp expansion when the memcmp result is only compared equal or
+    // not-equal to 0, allow up to this number of load pairs per block. As an
+    // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
+    //   a0 = load2bytes &a[0]
+    //   b0 = load2bytes &b[0]
+    //   a2 = load1byte  &a[2]
+    //   b2 = load1byte  &b[2]
+    //   r  = cmp eq (a0 ^ b0 | a2 ^ b2), 0
+    unsigned NumLoadsPerBlock = 1;
+
     // Set to true to allow overlapping loads. For example, 7-byte compares can
     // be done with two 4-byte compares instead of 4+2+1-byte compares. This
     // requires all loads in LoadSizes to be doable in an unaligned way.
     bool AllowOverlappingLoads = false;
   };
-  const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
+  MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+                                               bool IsZeroCmp) const;
 
   /// Enable matching of interleaved access groups.
   bool enableInterleavedAccessVectorization() const;
@@ -700,7 +795,7 @@ public:
   bool shouldMaximizeVectorBandwidth(bool OptSize) const;
 
   /// \return The minimum vectorization factor for types of given element
-  /// bit width, or 0 if there is no mimimum VF. The returned value only
+  /// bit width, or 0 if there is no minimum VF. The returned value only
   /// applies when shouldMaximizeVectorBandwidth returns true.
   unsigned getMinimumVF(unsigned ElemWidth) const;
 
@@ -1005,6 +1100,11 @@ public:
   /// \returns True if the target wants to expand the given reduction intrinsic
   /// into a shuffle sequence.
   bool shouldExpandReduction(const IntrinsicInst *II) const;
+
+  /// \returns the size cost of rematerializing a GlobalValue address relative
+  /// to a stack reload.
+  unsigned getGISelRematGlobalCost() const;
+
   /// @}
 
 private:
@@ -1035,15 +1135,18 @@ public:
   virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
                          ArrayRef<const Value *> Operands) = 0;
   virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
-  virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
-  virtual int getCallCost(const Function *F, int NumArgs) = 0;
+  virtual int getCallCost(FunctionType *FTy, int NumArgs, const User *U) = 0;
+  virtual int getCallCost(const Function *F, int NumArgs, const User *U) = 0;
   virtual int getCallCost(const Function *F,
-                          ArrayRef<const Value *> Arguments) = 0;
+                          ArrayRef<const Value *> Arguments, const User *U) = 0;
   virtual unsigned getInliningThresholdMultiplier() = 0;
+  virtual int getInlinerVectorBonusPercent() = 0;
   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                               ArrayRef<Type *> ParamTys) = 0;
+                               ArrayRef<Type *> ParamTys, const User *U) = 0;
   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                               ArrayRef<const Value *> Arguments) = 0;
+                               ArrayRef<const Value *> Arguments,
+                               const User *U) = 0;
+  virtual int getMemcpyCost(const Instruction *I) = 0;
   virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
                                                     unsigned &JTSize) = 0;
   virtual int
@@ -1055,6 +1158,10 @@ public:
   virtual bool isLoweredToCall(const Function *F) = 0;
   virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
                                        UnrollingPreferences &UP) = 0;
+  virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+                                        AssumptionCache &AC,
+                                        TargetLibraryInfo *LibInfo,
+                                        HardwareLoopInfo &HWLoopInfo) = 0;
   virtual bool isLegalAddImmediate(int64_t Imm) = 0;
   virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
   virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
@@ -1065,11 +1172,19 @@ public:
   virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
                              TargetTransformInfo::LSRCost &C2) = 0;
   virtual bool canMacroFuseCmp() = 0;
+  virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
+                          LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
+                          TargetLibraryInfo *LibInfo) = 0;
   virtual bool shouldFavorPostInc() const = 0;
+  virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
   virtual bool isLegalMaskedStore(Type *DataType) = 0;
   virtual bool isLegalMaskedLoad(Type *DataType) = 0;
+  virtual bool isLegalNTStore(Type *DataType, unsigned Alignment) = 0;
+  virtual bool isLegalNTLoad(Type *DataType, unsigned Alignment) = 0;
   virtual bool isLegalMaskedScatter(Type *DataType) = 0;
   virtual bool isLegalMaskedGather(Type *DataType) = 0;
+  virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
+  virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
   virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
   virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
   virtual bool prefersVectorizedAddressing() = 0;
@@ -1092,8 +1207,8 @@ public:
                                                     unsigned VF) = 0;
   virtual bool supportsEfficientVectorElementLoadStore() = 0;
   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
-  virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
-      bool IsZeroCmp) const = 0;
+  virtual MemCmpExpansionOptions
+  enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
   virtual bool enableInterleavedAccessVectorization() = 0;
   virtual bool enableMaskedInterleavedAccessVectorization() = 0;
   virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
@@ -1210,6 +1325,7 @@ public:
   virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
                                      ReductionFlags) const = 0;
   virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
+  virtual unsigned getGISelRematGlobalCost() const = 0;
   virtual int getInstructionLatency(const Instruction *I) = 0;
 };
 
@@ -1235,26 +1351,33 @@ public:
   int getExtCost(const Instruction *I, const Value *Src) override {
     return Impl.getExtCost(I, Src);
   }
-  int getCallCost(FunctionType *FTy, int NumArgs) override {
-    return Impl.getCallCost(FTy, NumArgs);
+  int getCallCost(FunctionType *FTy, int NumArgs, const User *U) override {
+    return Impl.getCallCost(FTy, NumArgs, U);
   }
-  int getCallCost(const Function *F, int NumArgs) override {
-    return Impl.getCallCost(F, NumArgs);
+  int getCallCost(const Function *F, int NumArgs, const User *U) override {
+    return Impl.getCallCost(F, NumArgs, U);
   }
   int getCallCost(const Function *F,
-                  ArrayRef<const Value *> Arguments) override {
-    return Impl.getCallCost(F, Arguments);
+                  ArrayRef<const Value *> Arguments, const User *U) override {
+    return Impl.getCallCost(F, Arguments, U);
   }
   unsigned getInliningThresholdMultiplier() override {
     return Impl.getInliningThresholdMultiplier();
   }
+  int getInlinerVectorBonusPercent() override {
+    return Impl.getInlinerVectorBonusPercent();
+  }
   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                       ArrayRef<Type *> ParamTys) override {
-    return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
+                       ArrayRef<Type *> ParamTys, const User *U = nullptr) override {
+    return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U);
   }
   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                       ArrayRef<const Value *> Arguments) override {
-    return Impl.getIntrinsicCost(IID, RetTy, Arguments);
+                       ArrayRef<const Value *> Arguments,
+                       const User *U = nullptr) override {
+    return Impl.getIntrinsicCost(IID, RetTy, Arguments, U);
+  }
+  int getMemcpyCost(const Instruction *I) override {
+    return Impl.getMemcpyCost(I);
   }
   int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
     return Impl.getUserCost(U, Operands);
@@ -1279,6 +1402,12 @@ public:
                                UnrollingPreferences &UP) override {
     return Impl.getUnrollingPreferences(L, SE, UP);
   }
+  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+                                AssumptionCache &AC,
+                                TargetLibraryInfo *LibInfo,
+                                HardwareLoopInfo &HWLoopInfo) override {
+    return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
+  }
   bool isLegalAddImmediate(int64_t Imm) override {
     return Impl.isLegalAddImmediate(Imm);
   }
@@ -1299,21 +1428,42 @@ public:
   bool canMacroFuseCmp() override {
     return Impl.canMacroFuseCmp();
   }
+  bool canSaveCmp(Loop *L, BranchInst **BI,
+                        ScalarEvolution *SE,
+                        LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
+                        TargetLibraryInfo *LibInfo) override {
+    return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
+  }
   bool shouldFavorPostInc() const override {
     return Impl.shouldFavorPostInc();
   }
+  bool shouldFavorBackedgeIndex(const Loop *L) const override {
+    return Impl.shouldFavorBackedgeIndex(L);
+  }
   bool isLegalMaskedStore(Type *DataType) override {
     return Impl.isLegalMaskedStore(DataType);
   }
   bool isLegalMaskedLoad(Type *DataType) override {
     return Impl.isLegalMaskedLoad(DataType);
   }
+  bool isLegalNTStore(Type *DataType, unsigned Alignment) override {
+    return Impl.isLegalNTStore(DataType, Alignment);
+  }
+  bool isLegalNTLoad(Type *DataType, unsigned Alignment) override {
+    return Impl.isLegalNTLoad(DataType, Alignment);
+  }
   bool isLegalMaskedScatter(Type *DataType) override {
     return Impl.isLegalMaskedScatter(DataType);
   }
   bool isLegalMaskedGather(Type *DataType) override {
     return Impl.isLegalMaskedGather(DataType);
   }
+  bool isLegalMaskedCompressStore(Type *DataType) override {
+    return Impl.isLegalMaskedCompressStore(DataType);
+  }
+  bool isLegalMaskedExpandLoad(Type *DataType) override {
+    return Impl.isLegalMaskedExpandLoad(DataType);
+  }
   bool hasDivRemOp(Type *DataType, bool IsSigned) override {
     return Impl.hasDivRemOp(DataType, IsSigned);
   }
@@ -1368,9 +1518,9 @@ public:
   bool enableAggressiveInterleaving(bool LoopHasReductions) override {
     return Impl.enableAggressiveInterleaving(LoopHasReductions);
   }
-  const MemCmpExpansionOptions *enableMemCmpExpansion(
-      bool IsZeroCmp) const override {
-    return Impl.enableMemCmpExpansion(IsZeroCmp);
+  MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+                                               bool IsZeroCmp) const override {
+    return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
   }
   bool enableInterleavedAccessVectorization() override {
     return Impl.enableInterleavedAccessVectorization();
@@ -1617,6 +1767,11 @@ public:
   bool shouldExpandReduction(const IntrinsicInst *II) const override {
     return Impl.shouldExpandReduction(II);
   }
+
+  unsigned getGISelRematGlobalCost() const override {
+    return Impl.getGISelRematGlobalCost();
+  }
+
   int getInstructionLatency(const Instruction *I) override {
     return Impl.getInstructionLatency(I);
   }
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h
index c9a234deeb7d..b99e1eb9adf0 100644
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1,9 +1,8 @@
 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -124,7 +123,7 @@ public:
     return TTI::TCC_Basic;
   }
 
-  unsigned getCallCost(FunctionType *FTy, int NumArgs) {
+  unsigned getCallCost(FunctionType *FTy, int NumArgs, const User *U) {
     assert(FTy && "FunctionType must be provided to this routine.");
 
     // The target-independent implementation just measures the size of the
@@ -141,45 +140,10 @@ public:
 
   unsigned getInliningThresholdMultiplier() { return 1; }
 
-  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<Type *> ParamTys) {
-    switch (IID) {
-    default:
-      // Intrinsics rarely (if ever) have normal argument setup constraints.
-      // Model them as having a basic instruction cost.
-      // FIXME: This is wrong for libc intrinsics.
-      return TTI::TCC_Basic;
+  int getInlinerVectorBonusPercent() { return 150; }
 
-    case Intrinsic::annotation:
-    case Intrinsic::assume:
-    case Intrinsic::sideeffect:
-    case Intrinsic::dbg_declare:
-    case Intrinsic::dbg_value:
-    case Intrinsic::dbg_label:
-    case Intrinsic::invariant_start:
-    case Intrinsic::invariant_end:
-    case Intrinsic::launder_invariant_group:
-    case Intrinsic::strip_invariant_group:
-    case Intrinsic::is_constant:
-    case Intrinsic::lifetime_start:
-    case Intrinsic::lifetime_end:
-    case Intrinsic::objectsize:
-    case Intrinsic::ptr_annotation:
-    case Intrinsic::var_annotation:
-    case Intrinsic::experimental_gc_result:
-    case Intrinsic::experimental_gc_relocate:
-    case Intrinsic::coro_alloc:
-    case Intrinsic::coro_begin:
-    case Intrinsic::coro_free:
-    case Intrinsic::coro_end:
-    case Intrinsic::coro_frame:
-    case Intrinsic::coro_size:
-    case Intrinsic::coro_suspend:
-    case Intrinsic::coro_param:
-    case Intrinsic::coro_subfn_addr:
-      // These intrinsics don't actually represent code after lowering.
-      return TTI::TCC_Free;
-    }
+  unsigned getMemcpyCost(const Instruction *I) {
+    return TTI::TCC_Expensive;
   }
 
   bool hasBranchDivergence() { return false; }
@@ -228,6 +192,13 @@ public:
     return true;
   }
 
+  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+                                AssumptionCache &AC,
+                                TargetLibraryInfo *LibInfo,
+                                HardwareLoopInfo &HWLoopInfo) {
+    return false;
+  }
+
   void getUnrollingPreferences(Loop *, ScalarEvolution &,
                                TTI::UnrollingPreferences &) {}
 
@@ -252,16 +223,42 @@ public:
 
   bool canMacroFuseCmp() { return false; }
 
+  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
+                  DominatorTree *DT, AssumptionCache *AC,
+                  TargetLibraryInfo *LibInfo) {
+    return false;
+  }
+
   bool shouldFavorPostInc() const { return false; }
 
+  bool shouldFavorBackedgeIndex(const Loop *L) const { return false; }
+
   bool isLegalMaskedStore(Type *DataType) { return false; }
 
   bool isLegalMaskedLoad(Type *DataType) { return false; }
 
+  bool isLegalNTStore(Type *DataType, unsigned Alignment) {
+    // By default, assume nontemporal memory stores are available for stores
+    // that are aligned and have a size that is a power of 2.
+    unsigned DataSize = DL.getTypeStoreSize(DataType);
+    return Alignment >= DataSize && isPowerOf2_32(DataSize);
+  }
+
+  bool isLegalNTLoad(Type *DataType, unsigned Alignment) {
+    // By default, assume nontemporal memory loads are available for loads that
+    // are aligned and have a size that is a power of 2.
+    unsigned DataSize = DL.getTypeStoreSize(DataType);
+    return Alignment >= DataSize && isPowerOf2_32(DataSize);
+  }
+
   bool isLegalMaskedScatter(Type *DataType) { return false; }
 
   bool isLegalMaskedGather(Type *DataType) { return false; }
 
+  bool isLegalMaskedCompressStore(Type *DataType) { return false; }
+
+  bool isLegalMaskedExpandLoad(Type *DataType) { return false; }
+
   bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; }
 
   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; }
@@ -307,9 +304,9 @@ public:
 
   bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
 
-  const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
-      bool IsZeroCmp) const {
-    return nullptr;
+  TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+                                                    bool IsZeroCmp) const {
+    return {};
   }
 
   bool enableInterleavedAccessVectorization() { return false; }
@@ -583,6 +580,10 @@ public:
     return true;
   }
 
+  unsigned getGISelRematGlobalCost() const {
+    return 1;
+  }
+
 protected:
   // Obtain the minimum required size to hold the value (without the sign)
   // In case of a vector it returns the min required size for one element.
@@ -679,7 +680,7 @@ protected:
 public:
   using BaseT::getCallCost;
 
-  unsigned getCallCost(const Function *F, int NumArgs) {
+  unsigned getCallCost(const Function *F, int NumArgs, const User *U) {
     assert(F && "A concrete function must be provided to this routine.");
 
     if (NumArgs < 0)
@@ -691,35 +692,34 @@ public:
       FunctionType *FTy = F->getFunctionType();
       SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
       return static_cast<T *>(this)
-          ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys);
+          ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys, U);
     }
 
     if (!static_cast<T *>(this)->isLoweredToCall(F))
       return TTI::TCC_Basic; // Give a basic cost if it will be lowered
                              // directly.
 
-    return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs);
+    return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs, U);
   }
 
-  unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments) {
+  unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
+                       const User *U) {
     // Simply delegate to generic handling of the call.
     // FIXME: We should use instsimplify or something else to catch calls which
     // will constant fold with these arguments.
-    return static_cast<T *>(this)->getCallCost(F, Arguments.size());
+    return static_cast<T *>(this)->getCallCost(F, Arguments.size(), U);
   }
 
   using BaseT::getGEPCost;
 
   int getGEPCost(Type *PointeeType, const Value *Ptr,
                  ArrayRef<const Value *> Operands) {
-    const GlobalValue *BaseGV = nullptr;
-    if (Ptr != nullptr) {
-      // TODO: will remove this when pointers have an opaque type.
-      assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
-                 PointeeType &&
-             "explicit pointee type doesn't match operand's pointee type");
-      BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
-    }
+    assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
+    // TODO: will remove this when pointers have an opaque type.
+    assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
+               PointeeType &&
+           "explicit pointee type doesn't match operand's pointee type");
+    auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
     bool HasBaseReg = (BaseGV == nullptr);
 
     auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
@@ -762,21 +762,60 @@ public:
       }
     }
 
-    // Assumes the address space is 0 when Ptr is nullptr.
-    unsigned AS =
-        (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace());
-
     if (static_cast<T *>(this)->isLegalAddressingMode(
             TargetType, const_cast<GlobalValue *>(BaseGV),
-            BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, AS))
+            BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
+            Ptr->getType()->getPointerAddressSpace()))
       return TTI::TCC_Free;
     return TTI::TCC_Basic;
   }
 
-  using BaseT::getIntrinsicCost;
+  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+                            ArrayRef<Type *> ParamTys, const User *U) {
+    switch (IID) {
+    default:
+      // Intrinsics rarely (if ever) have normal argument setup constraints.
+      // Model them as having a basic instruction cost.
+      return TTI::TCC_Basic;
+
+    // TODO: other libc intrinsics.
+    case Intrinsic::memcpy:
+      return static_cast<T *>(this)->getMemcpyCost(dyn_cast<Instruction>(U));
+
+    case Intrinsic::annotation:
+    case Intrinsic::assume:
+    case Intrinsic::sideeffect:
+    case Intrinsic::dbg_declare:
+    case Intrinsic::dbg_value:
+    case Intrinsic::dbg_label:
+    case Intrinsic::invariant_start:
+    case Intrinsic::invariant_end:
+    case Intrinsic::launder_invariant_group:
+    case Intrinsic::strip_invariant_group:
+    case Intrinsic::is_constant:
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+    case Intrinsic::objectsize:
+    case Intrinsic::ptr_annotation:
+    case Intrinsic::var_annotation:
+    case Intrinsic::experimental_gc_result:
+    case Intrinsic::experimental_gc_relocate:
+    case Intrinsic::coro_alloc:
+    case Intrinsic::coro_begin:
+    case Intrinsic::coro_free:
+    case Intrinsic::coro_end:
+    case Intrinsic::coro_frame:
+    case Intrinsic::coro_size:
+    case Intrinsic::coro_suspend:
+    case Intrinsic::coro_param:
+    case Intrinsic::coro_subfn_addr:
+      // These intrinsics don't actually represent code after lowering.
+      return TTI::TCC_Free;
+    }
+  }
 
   unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<const Value *> Arguments) {
+                            ArrayRef<const Value *> Arguments, const User *U) {
     // Delegate to the generic intrinsic handling code. This mostly provides an
     // opportunity for targets to (for example) special case the cost of
     // certain intrinsics based on constants used as arguments.
@@ -784,7 +823,7 @@ public:
     ParamTys.reserve(Arguments.size());
     for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
       ParamTys.push_back(Arguments[Idx]->getType());
-    return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys);
+    return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U);
   }
 
   unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) {
@@ -808,22 +847,18 @@ public:
         // Just use the called value type.
         Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
         return static_cast<T *>(this)
-            ->getCallCost(cast<FunctionType>(FTy), CS.arg_size());
+            ->getCallCost(cast<FunctionType>(FTy), CS.arg_size(), U);
       }
 
       SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end());
-      return static_cast<T *>(this)->getCallCost(F, Arguments);
+      return static_cast<T *>(this)->getCallCost(F, Arguments, U);
     }
 
-    if (const CastInst *CI = dyn_cast<CastInst>(U)) {
-      // Result of a cmp instruction is often extended (to be used by other
-      // cmp instructions, logical or return instructions). These are usually
-      // nop on most sane targets.
-      if (isa<CmpInst>(CI->getOperand(0)))
-        return TTI::TCC_Free;
-      if (isa<SExtInst>(CI) || isa<ZExtInst>(CI) || isa<FPExtInst>(CI))
-        return static_cast<T *>(this)->getExtCost(CI, Operands.back());
-    }
+    if (isa<SExtInst>(U) || isa<ZExtInst>(U) || isa<FPExtInst>(U))
+      // The old behaviour of generally treating extensions of icmp to be free
+      // has been removed. A target that needs it should override getUserCost().
+      return static_cast<T *>(this)->getExtCost(cast<Instruction>(U),
+                                                Operands.back());
 
     return static_cast<T *>(this)->getOperationCost(
         Operator::getOpcode(U), U->getType(),
diff --git a/include/llvm/Analysis/Trace.h b/include/llvm/Analysis/Trace.h
index b05d384ab1a3..a1ffd03c4053 100644
--- a/include/llvm/Analysis/Trace.h
+++ b/include/llvm/Analysis/Trace.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/Trace.h - Represent one trace of LLVM code -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/TypeBasedAliasAnalysis.h b/include/llvm/Analysis/TypeBasedAliasAnalysis.h
index d2e6df22425e..344f26806618 100644
--- a/include/llvm/Analysis/TypeBasedAliasAnalysis.h
+++ b/include/llvm/Analysis/TypeBasedAliasAnalysis.h
@@ -1,9 +1,8 @@
 //===- TypeBasedAliasAnalysis.h - Type-Based Alias Analysis -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -41,12 +40,16 @@ public:
     return false;
   }
 
-  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
-  bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+                    AAQueryInfo &AAQI);
+  bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+                              bool OrLocal);
   FunctionModRefBehavior getModRefBehavior(const CallBase *Call);
   FunctionModRefBehavior getModRefBehavior(const Function *F);
-  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
-  ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2);
+  ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+                           AAQueryInfo &AAQI);
+  ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+                           AAQueryInfo &AAQI);
 
 private:
   bool Aliases(const MDNode *A, const MDNode *B) const;
diff --git a/include/llvm/Analysis/TypeMetadataUtils.h b/include/llvm/Analysis/TypeMetadataUtils.h
index 3bf9c5d20741..82cf8efeea54 100644
--- a/include/llvm/Analysis/TypeMetadataUtils.h
+++ b/include/llvm/Analysis/TypeMetadataUtils.h
@@ -1,9 +1,8 @@
 //===- TypeMetadataUtils.h - Utilities related to type metadata --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/Utils/Local.h b/include/llvm/Analysis/Utils/Local.h
index b4141bbff28d..acbdf5dca32c 100644
--- a/include/llvm/Analysis/Utils/Local.h
+++ b/include/llvm/Analysis/Utils/Local.h
@@ -1,9 +1,8 @@
 //===- Local.h - Functions to perform local transformations -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/ValueLattice.h b/include/llvm/Analysis/ValueLattice.h
index 0744ca617e48..56519d7d0857 100644
--- a/include/llvm/Analysis/ValueLattice.h
+++ b/include/llvm/Analysis/ValueLattice.h
@@ -1,9 +1,8 @@
 //===- ValueLattice.h - Value constraint analysis ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Analysis/ValueLatticeUtils.h b/include/llvm/Analysis/ValueLatticeUtils.h
index 02072672e56e..a3bbb96129bf 100644
--- a/include/llvm/Analysis/ValueLatticeUtils.h
+++ b/include/llvm/Analysis/ValueLatticeUtils.h
@@ -1,9 +1,8 @@
 //===-- ValueLatticeUtils.h - Utils for solving lattices --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index f46fdfcb608e..fa7e0e0eef7e 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/ValueTracking.h - Walk computations --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,8 +16,10 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Intrinsics.h"
 #include <cassert>
@@ -29,10 +30,10 @@ namespace llvm {
 class AddOperator;
 class APInt;
 class AssumptionCache;
-class DataLayout;
 class DominatorTree;
 class GEPOperator;
 class IntrinsicInst;
+class WithOverflowInst;
 struct KnownBits;
 class Loop;
 class LoopInfo;
@@ -223,7 +224,7 @@ class Value;
   /// 0.0 etc. If the value can't be handled with a repeated byte store (e.g.
   /// i16 0x1234), return null. If the value is entirely undef and padding,
   /// return undef.
-  Value *isBytewiseValue(Value *V);
+  Value *isBytewiseValue(Value *V, const DataLayout &DL);
 
   /// Given an aggregrate and an sequence of indices, see if the scalar value
   /// indexed is already around as a register, for example if it were inserted
@@ -237,8 +238,18 @@ class Value;
 
   /// Analyze the specified pointer to see if it can be expressed as a base
   /// pointer plus a constant offset. Return the base and offset to the caller.
-  Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
-                                          const DataLayout &DL);
+  ///
+  /// This is a wrapper around Value::stripAndAccumulateConstantOffsets that
+  /// creates and later unpacks the required APInt.
+  inline Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
+                                                 const DataLayout &DL) {
+    APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
+    Value *Base =
+        Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt,
+                                               /* AllowNonInbounds */ true);
+    Offset = OffsetAPInt.getSExtValue();
+    return Base;
+  }
   inline const Value *GetPointerBaseWithConstantOffset(const Value *Ptr,
                                                        int64_t &Offset,
                                                        const DataLayout &DL) {
@@ -351,7 +362,8 @@ class Value;
   /// Since A[i] and A[i-1] are independent pointers, getUnderlyingObjects
   /// should not assume that Curr and Prev share the same underlying object thus
   /// it shouldn't look through the phi above.
-  void GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects,
+  void GetUnderlyingObjects(const Value *V,
+                            SmallVectorImpl<const Value *> &Objects,
                             const DataLayout &DL, LoopInfo *LI = nullptr,
                             unsigned MaxLookup = 6);
 
@@ -411,7 +423,16 @@ class Value;
   bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI,
                                const DominatorTree *DT = nullptr);
 
-  enum class OverflowResult { AlwaysOverflows, MayOverflow, NeverOverflows };
+  enum class OverflowResult {
+    /// Always overflows in the direction of signed/unsigned min value.
+    AlwaysOverflowsLow,
+    /// Always overflows in the direction of signed/unsigned max value.
+    AlwaysOverflowsHigh,
+    /// May or may not overflow.
+    MayOverflow,
+    /// Never overflows.
+    NeverOverflows,
+  };
 
   OverflowResult computeOverflowForUnsignedMul(const Value *LHS,
                                                const Value *RHS,
@@ -455,12 +476,17 @@ class Value;
                                              const Instruction *CxtI,
                                              const DominatorTree *DT);
 
-  /// Returns true if the arithmetic part of the \p II 's result is
+  /// Returns true if the arithmetic part of the \p WO 's result is
   /// used only along the paths control dependent on the computation
-  /// not overflowing, \p II being an <op>.with.overflow intrinsic.
-  bool isOverflowIntrinsicNoWrap(const IntrinsicInst *II,
+  /// not overflowing, \p WO being an <op>.with.overflow intrinsic.
+  bool isOverflowIntrinsicNoWrap(const WithOverflowInst *WO,
                                  const DominatorTree &DT);
 
+
+  /// Determine the possible constant range of an integer or vector of integer
+  /// value. This is intended as a cheap, non-recursive check.
+  ConstantRange computeConstantRange(const Value *V, bool UseInstrInfo = true);
+
   /// Return true if this function can prove that the instruction I will
   /// always transfer execution to one of its successors (including the next
   /// instruction that follows within a basic block). E.g. this is not
@@ -506,6 +532,12 @@ class Value;
   /// value (all bits poison).
   const Value *getGuaranteedNonFullPoisonOp(const Instruction *I);
 
+  /// Return true if the given instruction must trigger undefined behavior.
+  /// when I is executed with any operands which appear in KnownPoison holding
+  /// a full-poison value at the point of execution.
+  bool mustTriggerUB(const Instruction *I,
+                     const SmallSet<const Value *, 16>& KnownPoison);
+
   /// Return true if this function can prove that if PoisonI is executed
   /// and yields a full-poison value (all bits poison), then that will
   /// trigger undefined behavior.
@@ -584,6 +616,12 @@ class Value;
     return Result;
   }
 
+  /// Determine the pattern that a select with the given compare as its
+  /// predicate and given values as its true/false operands would match.
+  SelectPatternResult matchDecomposedSelectPattern(
+      CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS,
+      Instruction::CastOps *CastOp = nullptr, unsigned Depth = 0);
+
   /// Return the canonical comparison predicate for the specified
   /// minimum/maximum flavor.
   CmpInst::Predicate getMinMaxPred(SelectPatternFlavor SPF,
diff --git a/include/llvm/Analysis/VecFuncs.def b/include/llvm/Analysis/VecFuncs.def
new file mode 100644
index 000000000000..4c9206266d9a
--- /dev/null
+++ b/include/llvm/Analysis/VecFuncs.def
@@ -0,0 +1,250 @@
+//===-- VecFuncs.def - Library information -------------*- C++ -*-----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// This .def file will create mappings from scalar math functions to vector
+// functions along with their vectorization factor. The current support includes
+// such mappings for Accelerate framework, MASS vector library, and SVML library. 
+
+#if !(defined(TLI_DEFINE_VECFUNC))
+#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF},
+#endif 
+
+#if defined(TLI_DEFINE_ACCELERATE_VECFUNCS)
+// Accelerate framework's Vector Functions
+
+// Floating-Point Arithmetic and Auxiliary Functions
+TLI_DEFINE_VECFUNC("ceilf", "vceilf", 4)
+TLI_DEFINE_VECFUNC("fabsf", "vfabsf", 4)
+TLI_DEFINE_VECFUNC("llvm.fabs.f32", "vfabsf", 4)
+TLI_DEFINE_VECFUNC("floorf", "vfloorf", 4)
+TLI_DEFINE_VECFUNC("sqrtf", "vsqrtf", 4)
+TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "vsqrtf", 4)
+
+// Exponential and Logarithmic Functions
+TLI_DEFINE_VECFUNC("expf", "vexpf", 4)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "vexpf", 4)
+TLI_DEFINE_VECFUNC("expm1f", "vexpm1f", 4)
+TLI_DEFINE_VECFUNC("logf", "vlogf", 4)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "vlogf", 4)
+TLI_DEFINE_VECFUNC("log1pf", "vlog1pf", 4)
+TLI_DEFINE_VECFUNC("log10f", "vlog10f", 4)
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "vlog10f", 4)
+TLI_DEFINE_VECFUNC("logbf", "vlogbf", 4)
+
+// Trigonometric Functions
+TLI_DEFINE_VECFUNC("sinf", "vsinf", 4)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "vsinf", 4)
+TLI_DEFINE_VECFUNC("cosf", "vcosf", 4)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "vcosf", 4)
+TLI_DEFINE_VECFUNC("tanf", "vtanf", 4)
+TLI_DEFINE_VECFUNC("asinf", "vasinf", 4)
+TLI_DEFINE_VECFUNC("acosf", "vacosf", 4)
+TLI_DEFINE_VECFUNC("atanf", "vatanf", 4)
+
+// Hyperbolic Functions
+TLI_DEFINE_VECFUNC("sinhf", "vsinhf", 4)
+TLI_DEFINE_VECFUNC("coshf", "vcoshf", 4)
+TLI_DEFINE_VECFUNC("tanhf", "vtanhf", 4)
+TLI_DEFINE_VECFUNC("asinhf", "vasinhf", 4)
+TLI_DEFINE_VECFUNC("acoshf", "vacoshf", 4)
+TLI_DEFINE_VECFUNC("atanhf", "vatanhf", 4)
+
+
+#elif defined(TLI_DEFINE_MASSV_VECFUNCS)
+// IBM MASS library's vector Functions
+
+// Floating-Point Arithmetic and Auxiliary Functions
+TLI_DEFINE_VECFUNC("cbrt", "__cbrtd2_massv", 2)
+TLI_DEFINE_VECFUNC("cbrtf", "__cbrtf4_massv", 4)
+TLI_DEFINE_VECFUNC("pow", "__powd2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__powd2_massv", 2)
+TLI_DEFINE_VECFUNC("powf", "__powf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__powf4_massv", 4)
+TLI_DEFINE_VECFUNC("sqrt", "__sqrtd2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__sqrtd2_massv", 2)
+TLI_DEFINE_VECFUNC("sqrtf", "__sqrtf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__sqrtf4_massv", 4)
+
+// Exponential and Logarithmic Functions
+TLI_DEFINE_VECFUNC("exp", "__expd2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__expd2_massv", 2)
+TLI_DEFINE_VECFUNC("expf", "__expf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__expf4_massv", 4)
+TLI_DEFINE_VECFUNC("exp2", "__exp2d2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__exp2d2_massv", 2)
+TLI_DEFINE_VECFUNC("exp2f", "__exp2f4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__exp2f4_massv", 4)
+TLI_DEFINE_VECFUNC("expm1", "__expm1d2_massv", 2)
+TLI_DEFINE_VECFUNC("expm1f", "__expm1f4_massv", 4)
+TLI_DEFINE_VECFUNC("log", "__logd2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__logd2_massv", 2)
+TLI_DEFINE_VECFUNC("logf", "__logf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__logf4_massv", 4)
+TLI_DEFINE_VECFUNC("log1p", "__log1pd2_massv", 2)
+TLI_DEFINE_VECFUNC("log1pf", "__log1pf4_massv", 4)
+TLI_DEFINE_VECFUNC("log10", "__log10d2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "__log10d2_massv", 2)
+TLI_DEFINE_VECFUNC("log10f", "__log10f4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "__log10f4_massv", 4)
+TLI_DEFINE_VECFUNC("log2", "__log2d2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "__log2d2_massv", 2)
+TLI_DEFINE_VECFUNC("log2f", "__log2f4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "__log2f4_massv", 4)
+
+// Trigonometric Functions
+TLI_DEFINE_VECFUNC("sin", "__sind2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__sind2_massv", 2)
+TLI_DEFINE_VECFUNC("sinf", "__sinf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__sinf4_massv", 4)
+TLI_DEFINE_VECFUNC("cos", "__cosd2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__cosd2_massv", 2)
+TLI_DEFINE_VECFUNC("cosf", "__cosf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__cosf4_massv", 4)
+TLI_DEFINE_VECFUNC("tan", "__tand2_massv", 2)
+TLI_DEFINE_VECFUNC("tanf", "__tanf4_massv", 4)
+TLI_DEFINE_VECFUNC("asin", "__asind2_massv", 2)
+TLI_DEFINE_VECFUNC("asinf", "__asinf4_massv", 4)
+TLI_DEFINE_VECFUNC("acos", "__acosd2_massv", 2)
+TLI_DEFINE_VECFUNC("acosf", "__acosf4_massv", 4)
+TLI_DEFINE_VECFUNC("atan", "__atand2_massv", 2)
+TLI_DEFINE_VECFUNC("atanf", "__atanf4_massv", 4)
+TLI_DEFINE_VECFUNC("atan2", "__atan2d2_massv", 2)
+TLI_DEFINE_VECFUNC("atan2f", "__atan2f4_massv", 4)
+
+// Hyperbolic Functions
+TLI_DEFINE_VECFUNC("sinh", "__sinhd2_massv", 2)
+TLI_DEFINE_VECFUNC("sinhf", "__sinhf4_massv", 4)
+TLI_DEFINE_VECFUNC("cosh", "__coshd2_massv", 2)
+TLI_DEFINE_VECFUNC("coshf", "__coshf4_massv", 4)
+TLI_DEFINE_VECFUNC("tanh", "__tanhd2_massv", 2)
+TLI_DEFINE_VECFUNC("tanhf", "__tanhf4_massv", 4)
+TLI_DEFINE_VECFUNC("asinh", "__asinhd2_massv", 2)
+TLI_DEFINE_VECFUNC("asinhf", "__asinhf4_massv", 4)
+TLI_DEFINE_VECFUNC("acosh", "__acoshd2_massv", 2)
+TLI_DEFINE_VECFUNC("acoshf", "__acoshf4_massv", 4)
+TLI_DEFINE_VECFUNC("atanh", "__atanhd2_massv", 2)
+TLI_DEFINE_VECFUNC("atanhf", "__atanhf4_massv", 4)
+
+
+#elif defined(TLI_DEFINE_SVML_VECFUNCS)
+// Intel SVM library's Vector Functions
+
+TLI_DEFINE_VECFUNC("sin", "__svml_sin2", 2)
+TLI_DEFINE_VECFUNC("sin", "__svml_sin4", 4)
+TLI_DEFINE_VECFUNC("sin", "__svml_sin8", 8)
+
+TLI_DEFINE_VECFUNC("sinf", "__svml_sinf4", 4)
+TLI_DEFINE_VECFUNC("sinf", "__svml_sinf8", 8)
+TLI_DEFINE_VECFUNC("sinf", "__svml_sinf16", 16)
+
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin2", 2)
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin4", 4)
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin8", 8)
+
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf4", 4)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf8", 8)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf16", 16)
+
+TLI_DEFINE_VECFUNC("cos", "__svml_cos2", 2)
+TLI_DEFINE_VECFUNC("cos", "__svml_cos4", 4)
+TLI_DEFINE_VECFUNC("cos", "__svml_cos8", 8)
+
+TLI_DEFINE_VECFUNC("cosf", "__svml_cosf4", 4)
+TLI_DEFINE_VECFUNC("cosf", "__svml_cosf8", 8)
+TLI_DEFINE_VECFUNC("cosf", "__svml_cosf16", 16)
+
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos2", 2)
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos4", 4)
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos8", 8)
+
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf4", 4)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf8", 8)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf16", 16)
+
+TLI_DEFINE_VECFUNC("pow", "__svml_pow2", 2)
+TLI_DEFINE_VECFUNC("pow", "__svml_pow4", 4)
+TLI_DEFINE_VECFUNC("pow", "__svml_pow8", 8)
+
+TLI_DEFINE_VECFUNC("powf", "__svml_powf4", 4)
+TLI_DEFINE_VECFUNC("powf", "__svml_powf8", 8)
+TLI_DEFINE_VECFUNC("powf", "__svml_powf16", 16)
+
+TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow2", 2)
+TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow4", 4)
+TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow8", 8)
+
+TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf4", 4)
+TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf8", 8)
+TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf16", 16)
+
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow2", 2)
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow4", 4)
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow8", 8)
+
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf4", 4)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf8", 8)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf16", 16)
+
+TLI_DEFINE_VECFUNC("exp", "__svml_exp2", 2)
+TLI_DEFINE_VECFUNC("exp", "__svml_exp4", 4)
+TLI_DEFINE_VECFUNC("exp", "__svml_exp8", 8)
+
+TLI_DEFINE_VECFUNC("expf", "__svml_expf4", 4)
+TLI_DEFINE_VECFUNC("expf", "__svml_expf8", 8)
+TLI_DEFINE_VECFUNC("expf", "__svml_expf16", 16)
+
+TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp2", 2)
+TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp4", 4)
+TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp8", 8)
+
+TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf4", 4)
+TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf8", 8)
+TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf16", 16)
+
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp2", 2)
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp4", 4)
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp8", 8)
+
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf4", 4)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf8", 8)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf16", 16)
+
+TLI_DEFINE_VECFUNC("log", "__svml_log2", 2)
+TLI_DEFINE_VECFUNC("log", "__svml_log4", 4)
+TLI_DEFINE_VECFUNC("log", "__svml_log8", 8)
+
+TLI_DEFINE_VECFUNC("logf", "__svml_logf4", 4)
+TLI_DEFINE_VECFUNC("logf", "__svml_logf8", 8)
+TLI_DEFINE_VECFUNC("logf", "__svml_logf16", 16)
+
+TLI_DEFINE_VECFUNC("__log_finite", "__svml_log2", 2)
+TLI_DEFINE_VECFUNC("__log_finite", "__svml_log4", 4)
+TLI_DEFINE_VECFUNC("__log_finite", "__svml_log8", 8)
+
+TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf4", 4)
+TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf8", 8)
+TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf16", 16)
+
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log2", 2)
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log4", 4)
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log8", 8)
+
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf4", 4)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf8", 8)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16)
+
+
+#else
+#error "Must choose which vector library functions are to be defined."
+#endif
+
+#undef TLI_DEFINE_VECFUNC
+#undef TLI_DEFINE_ACCELERATE_VECFUNCS
+#undef TLI_DEFINE_MASSV_VECFUNCS
+#undef TLI_DEFINE_SVML_VECFUNCS
+
diff --git a/include/llvm/Analysis/VectorUtils.h b/include/llvm/Analysis/VectorUtils.h
index be4d4f17b9ad..d93d2bc4570b 100644
--- a/include/llvm/Analysis/VectorUtils.h
+++ b/include/llvm/Analysis/VectorUtils.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,6 +17,7 @@
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/CheckedArithmetic.h"
 
 namespace llvm {
 
@@ -36,13 +36,12 @@ enum ID : unsigned;
 }
 
 /// Identify if the intrinsic is trivially vectorizable.
-/// This method returns true if the intrinsic's argument types are all
-/// scalars for the scalar form of the intrinsic and all vectors for
-/// the vector form of the intrinsic.
+/// This method returns true if the intrinsic's argument types are all scalars
+/// for the scalar form of the intrinsic and all vectors (or scalars handled by
+/// hasVectorInstrinsicScalarOpd) for the vector form of the intrinsic.
 bool isTriviallyVectorizable(Intrinsic::ID ID);
 
-/// Identifies if the intrinsic has a scalar operand. It checks for
-/// ctlz,cttz and powi special intrinsics whose argument is scalar.
+/// Identifies if the vector form of the intrinsic has a scalar operand.
 bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
 
 /// Returns intrinsic ID for call.
@@ -78,6 +77,12 @@ Value *findScalarElement(Value *V, unsigned EltNo);
 /// a sequence of instructions that broadcast a single value into a vector.
 const Value *getSplatValue(const Value *V);
 
+/// Return true if the input value is known to be a vector with all identical
+/// elements (potentially including undefined elements).
+/// This may be more powerful than the related getSplatValue() because it is
+/// not limited by finding a scalar source value to a splatted vector.
+bool isSplatValue(const Value *V, unsigned Depth = 0);
+
 /// Compute a map of integer instructions to their minimum legal type
 /// size.
 ///
@@ -223,6 +228,20 @@ Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start,
 /// elements, it will be padded with undefs.
 Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs);
 
+/// Given a mask vector of the form <Y x i1>, Return true if all of the
+/// elements of this predicate mask are false or undef.  That is, return true
+/// if all lanes can be assumed inactive. 
+bool maskIsAllZeroOrUndef(Value *Mask);
+
+/// Given a mask vector of the form <Y x i1>, Return true if all of the
+/// elements of this predicate mask are true or undef.  That is, return true
+/// if all lanes can be assumed active. 
+bool maskIsAllOneOrUndef(Value *Mask);
+
+/// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
+/// for each lane which may be active.
+APInt possiblyDemandedEltsInMask(Value *Mask);
+  
 /// The group of interleaved loads/stores sharing the same stride and
 /// close to each other.
 ///
@@ -251,10 +270,10 @@ Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs);
 /// the interleaved store group doesn't allow gaps.
 template <typename InstTy> class InterleaveGroup {
 public:
-  InterleaveGroup(unsigned Factor, bool Reverse, unsigned Align)
+  InterleaveGroup(uint32_t Factor, bool Reverse, uint32_t Align)
       : Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {}
 
-  InterleaveGroup(InstTy *Instr, int Stride, unsigned Align)
+  InterleaveGroup(InstTy *Instr, int32_t Stride, uint32_t Align)
       : Align(Align), InsertPos(Instr) {
     assert(Align && "The alignment should be non-zero");
 
@@ -266,19 +285,23 @@ public:
   }
 
   bool isReverse() const { return Reverse; }
-  unsigned getFactor() const { return Factor; }
-  unsigned getAlignment() const { return Align; }
-  unsigned getNumMembers() const { return Members.size(); }
+  uint32_t getFactor() const { return Factor; }
+  uint32_t getAlignment() const { return Align; }
+  uint32_t getNumMembers() const { return Members.size(); }
 
   /// Try to insert a new member \p Instr with index \p Index and
   /// alignment \p NewAlign. The index is related to the leader and it could be
   /// negative if it is the new leader.
   ///
   /// \returns false if the instruction doesn't belong to the group.
-  bool insertMember(InstTy *Instr, int Index, unsigned NewAlign) {
+  bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign) {
     assert(NewAlign && "The new member's alignment should be non-zero");
 
-    int Key = Index + SmallestKey;
+    // Make sure the key fits in an int32_t.
+    Optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
+    if (!MaybeKey)
+      return false;
+    int32_t Key = *MaybeKey;
 
     // Skip if there is already a member with the same index.
     if (Members.find(Key) != Members.end())
@@ -286,13 +309,19 @@ public:
 
     if (Key > LargestKey) {
       // The largest index is always less than the interleave factor.
-      if (Index >= static_cast<int>(Factor))
+      if (Index >= static_cast<int32_t>(Factor))
         return false;
 
       LargestKey = Key;
     } else if (Key < SmallestKey) {
+
+      // Make sure the largest index fits in an int32_t.
+      Optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
+      if (!MaybeLargestIndex)
+        return false;
+
       // The largest index is always less than the interleave factor.
-      if (LargestKey - Key >= static_cast<int>(Factor))
+      if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
         return false;
 
       SmallestKey = Key;
@@ -307,8 +336,8 @@ public:
   /// Get the member with the given index \p Index
   ///
   /// \returns nullptr if contains no such member.
-  InstTy *getMember(unsigned Index) const {
-    int Key = SmallestKey + Index;
+  InstTy *getMember(uint32_t Index) const {
+    int32_t Key = SmallestKey + Index;
     auto Member = Members.find(Key);
     if (Member == Members.end())
       return nullptr;
@@ -318,7 +347,7 @@ public:
 
   /// Get the index for the given member. Unlike the key in the member
   /// map, the index starts from 0.
-  unsigned getIndex(const InstTy *Instr) const {
+  uint32_t getIndex(const InstTy *Instr) const {
     for (auto I : Members) {
       if (I.second == Instr)
         return I.first - SmallestKey;
@@ -356,12 +385,12 @@ public:
   }
 
 private:
-  unsigned Factor; // Interleave Factor.
+  uint32_t Factor; // Interleave Factor.
   bool Reverse;
-  unsigned Align;
-  DenseMap<int, InstTy *> Members;
-  int SmallestKey = 0;
-  int LargestKey = 0;
+  uint32_t Align;
+  DenseMap<int32_t, InstTy *> Members;
+  int32_t SmallestKey = 0;
+  int32_t LargestKey = 0;
 
   // To avoid breaking dependences, vectorized instructions of an interleave
   // group should be inserted at either the first load or the last store in
diff --git a/include/llvm/AsmParser/Parser.h b/include/llvm/AsmParser/Parser.h
index 285a7c022a24..b0c603497805 100644
--- a/include/llvm/AsmParser/Parser.h
+++ b/include/llvm/AsmParser/Parser.h
@@ -1,9 +1,8 @@
 //===-- Parser.h - Parser for LLVM IR text assembly files -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/AsmParser/SlotMapping.h b/include/llvm/AsmParser/SlotMapping.h
index bd7e8fcad8bc..0e95eb816b4c 100644
--- a/include/llvm/AsmParser/SlotMapping.h
+++ b/include/llvm/AsmParser/SlotMapping.h
@@ -1,9 +1,8 @@
 //===-- SlotMapping.h - Slot number mapping for unnamed values --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h b/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h
index de44f41720ed..7332b2a7ea89 100644
--- a/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h
+++ b/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h
@@ -1,9 +1,8 @@
 //===- AMDGPUMetadataVerifier.h - MsgPack Types -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,7 +16,7 @@
 #ifndef LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
 #define LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
 
-#include "llvm/BinaryFormat/MsgPackTypes.h"
+#include "llvm/BinaryFormat/MsgPackDocument.h"
 
 namespace llvm {
 namespace AMDGPU {
@@ -34,22 +33,22 @@ namespace V3 {
 class MetadataVerifier {
   bool Strict;
 
-  bool verifyScalar(msgpack::Node &Node, msgpack::ScalarNode::ScalarKind SKind,
-                    function_ref<bool(msgpack::ScalarNode &)> verifyValue = {});
-  bool verifyInteger(msgpack::Node &Node);
-  bool verifyArray(msgpack::Node &Node,
-                   function_ref<bool(msgpack::Node &)> verifyNode,
+  bool verifyScalar(msgpack::DocNode &Node, msgpack::Type SKind,
+                    function_ref<bool(msgpack::DocNode &)> verifyValue = {});
+  bool verifyInteger(msgpack::DocNode &Node);
+  bool verifyArray(msgpack::DocNode &Node,
+                   function_ref<bool(msgpack::DocNode &)> verifyNode,
                    Optional<size_t> Size = None);
-  bool verifyEntry(msgpack::MapNode &MapNode, StringRef Key, bool Required,
-                   function_ref<bool(msgpack::Node &)> verifyNode);
+  bool verifyEntry(msgpack::MapDocNode &MapNode, StringRef Key, bool Required,
+                   function_ref<bool(msgpack::DocNode &)> verifyNode);
   bool
-  verifyScalarEntry(msgpack::MapNode &MapNode, StringRef Key, bool Required,
-                    msgpack::ScalarNode::ScalarKind SKind,
-                    function_ref<bool(msgpack::ScalarNode &)> verifyValue = {});
-  bool verifyIntegerEntry(msgpack::MapNode &MapNode, StringRef Key,
+  verifyScalarEntry(msgpack::MapDocNode &MapNode, StringRef Key, bool Required,
+                    msgpack::Type SKind,
+                    function_ref<bool(msgpack::DocNode &)> verifyValue = {});
+  bool verifyIntegerEntry(msgpack::MapDocNode &MapNode, StringRef Key,
                           bool Required);
-  bool verifyKernelArgs(msgpack::Node &Node);
-  bool verifyKernel(msgpack::Node &Node);
+  bool verifyKernelArgs(msgpack::DocNode &Node);
+  bool verifyKernel(msgpack::DocNode &Node);
 
 public:
   /// Construct a MetadataVerifier, specifying whether it will operate in \p
@@ -59,7 +58,7 @@ public:
   /// Verify given HSA metadata.
   ///
   /// \returns True when successful, false when metadata is invalid.
-  bool verify(msgpack::Node &HSAMetadataRoot);
+  bool verify(msgpack::DocNode &HSAMetadataRoot);
 };
 
 } // end namespace V3
diff --git a/include/llvm/BinaryFormat/COFF.h b/include/llvm/BinaryFormat/COFF.h
index 7b973c03cc80..0fe38a437725 100644
--- a/include/llvm/BinaryFormat/COFF.h
+++ b/include/llvm/BinaryFormat/COFF.h
@@ -1,9 +1,8 @@
 //===-- llvm/BinaryFormat/COFF.h --------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -371,13 +370,15 @@ enum RelocationTypesARM : unsigned {
   IMAGE_REL_ARM_TOKEN = 0x0005,
   IMAGE_REL_ARM_BLX24 = 0x0008,
   IMAGE_REL_ARM_BLX11 = 0x0009,
+  IMAGE_REL_ARM_REL32 = 0x000A,
   IMAGE_REL_ARM_SECTION = 0x000E,
   IMAGE_REL_ARM_SECREL = 0x000F,
   IMAGE_REL_ARM_MOV32A = 0x0010,
   IMAGE_REL_ARM_MOV32T = 0x0011,
   IMAGE_REL_ARM_BRANCH20T = 0x0012,
   IMAGE_REL_ARM_BRANCH24T = 0x0014,
-  IMAGE_REL_ARM_BLX23T = 0x0015
+  IMAGE_REL_ARM_BLX23T = 0x0015,
+  IMAGE_REL_ARM_PAIR = 0x0016,
 };
 
 enum RelocationTypesARM64 : unsigned {
@@ -398,9 +399,10 @@ enum RelocationTypesARM64 : unsigned {
   IMAGE_REL_ARM64_ADDR64 = 0x000E,
   IMAGE_REL_ARM64_BRANCH19 = 0x000F,
   IMAGE_REL_ARM64_BRANCH14 = 0x0010,
+  IMAGE_REL_ARM64_REL32 = 0x0011,
 };
 
-enum COMDATType : unsigned {
+enum COMDATType : uint8_t {
   IMAGE_COMDAT_SELECT_NODUPLICATES = 1,
   IMAGE_COMDAT_SELECT_ANY,
   IMAGE_COMDAT_SELECT_SAME_SIZE,
diff --git a/include/llvm/BinaryFormat/Dwarf.def b/include/llvm/BinaryFormat/Dwarf.def
index 6ad3cb57f62f..b0f78d0fd61f 100644
--- a/include/llvm/BinaryFormat/Dwarf.def
+++ b/include/llvm/BinaryFormat/Dwarf.def
@@ -1,9 +1,8 @@
 //===- llvm/Support/Dwarf.def - Dwarf definitions ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -355,7 +354,13 @@ HANDLE_DW_AT(0x2107, GNU_vector, 0, GNU)
 HANDLE_DW_AT(0x2110, GNU_template_name, 0, GNU)
 HANDLE_DW_AT(0x210f, GNU_odr_signature, 0, GNU)
 HANDLE_DW_AT(0x2111, GNU_call_site_value, 0, GNU)
+HANDLE_DW_AT (0x2112, GNU_call_site_data_value, 0, GNU)
+HANDLE_DW_AT (0x2113, GNU_call_site_target, 0, GNU)
+HANDLE_DW_AT (0x2114, GNU_call_site_target_clobbered, 0, GNU)
+HANDLE_DW_AT (0x2115, GNU_tail_call, 0, GNU)
+HANDLE_DW_AT (0x2116, GNU_all_tail_call_sites, 0, GNU)
 HANDLE_DW_AT(0x2117, GNU_all_call_sites, 0, GNU)
+HANDLE_DW_AT (0x2118, GNU_all_source_call_sites, 0, GNU)
 HANDLE_DW_AT(0x2119, GNU_macros, 0, GNU)
 // Extensions for Fission proposal.
 HANDLE_DW_AT(0x2130, GNU_dwo_name, 0, GNU)
@@ -387,6 +392,7 @@ HANDLE_DW_AT(0x3b31, BORLAND_closure, 0, BORLAND)
 HANDLE_DW_AT(0x3e00, LLVM_include_path, 0, LLVM)
 HANDLE_DW_AT(0x3e01, LLVM_config_macros, 0, LLVM)
 HANDLE_DW_AT(0x3e02, LLVM_isysroot, 0, LLVM)
+HANDLE_DW_AT(0x3e03, LLVM_tag_offset, 0, LLVM)
 // Apple extensions.
 HANDLE_DW_AT(0x3fe1, APPLE_optimized, 0, APPLE)
 HANDLE_DW_AT(0x3fe2, APPLE_flags, 0, APPLE)
@@ -627,6 +633,8 @@ HANDLE_DW_OP(0xa9, reinterpret, 5, DWARF)
 // Vendor extensions:
 // Extensions for GNU-style thread-local storage.
 HANDLE_DW_OP(0xe0, GNU_push_tls_address, 0, GNU)
+// The GNU entry value extension.
+HANDLE_DW_OP(0xf3, GNU_entry_value, 0, GNU)
 // Extensions for Fission proposal.
 HANDLE_DW_OP(0xfb, GNU_addr_index, 0, GNU)
 HANDLE_DW_OP(0xfc, GNU_const_index, 0, GNU)
diff --git a/include/llvm/BinaryFormat/Dwarf.h b/include/llvm/BinaryFormat/Dwarf.h
index 525a04d5e6cf..76d9c365c0a8 100644
--- a/include/llvm/BinaryFormat/Dwarf.h
+++ b/include/llvm/BinaryFormat/Dwarf.h
@@ -1,9 +1,8 @@
 //===-- llvm/BinaryFormat/Dwarf.h ---Dwarf Constants-------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -130,7 +129,9 @@ enum LocationAtom {
 #include "llvm/BinaryFormat/Dwarf.def"
   DW_OP_lo_user = 0xe0,
   DW_OP_hi_user = 0xff,
-  DW_OP_LLVM_fragment = 0x1000 ///< Only used in LLVM metadata.
+  DW_OP_LLVM_fragment = 0x1000,   ///< Only used in LLVM metadata.
+  DW_OP_LLVM_convert = 0x1001,    ///< Only used in LLVM metadata.
+  DW_OP_LLVM_tag_offset = 0x1002, ///< Only used in LLVM metadata.
 };
 
 enum TypeKind : uint8_t {
diff --git a/include/llvm/BinaryFormat/DynamicTags.def b/include/llvm/BinaryFormat/DynamicTags.def
index 2e15cc30fca7..aec408bd2d72 100644
--- a/include/llvm/BinaryFormat/DynamicTags.def
+++ b/include/llvm/BinaryFormat/DynamicTags.def
@@ -6,6 +6,11 @@
 // such as DT_HIOS, etc. to allow using this file to in other contexts.
 // For example we can use it to generate a stringification switch statement.
 
+#ifndef AARCH64_DYNAMIC_TAG
+#define AARCH64_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#define AARCH64_DYNAMIC_TAG_DEFINED
+#endif
+
 #ifndef HEXAGON_DYNAMIC_TAG
 #define HEXAGON_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
 #define HEXAGON_DYNAMIC_TAG_DEFINED
@@ -16,6 +21,11 @@
 #define MIPS_DYNAMIC_TAG_DEFINED
 #endif
 
+#ifndef PPC_DYNAMIC_TAG
+#define PPC_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#define PPC_DYNAMIC_TAG_DEFINED
+#endif
+
 #ifndef PPC64_DYNAMIC_TAG
 #define PPC64_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
 #define PPC64_DYNAMIC_TAG_DEFINED
@@ -107,6 +117,10 @@ DYNAMIC_TAG(VERNEED, 0X6FFFFFFE)    // The address of the version dependency
                                     // table.
 DYNAMIC_TAG(VERNEEDNUM, 0X6FFFFFFF) // The number of entries in DT_VERNEED.
 
+// AArch64 specific dynamic table entries
+AARCH64_DYNAMIC_TAG(AARCH64_BTI_PLT, 0x70000001)
+AARCH64_DYNAMIC_TAG(AARCH64_PAC_PLT, 0x70000003)
+
 // Hexagon specific dynamic table entries
 HEXAGON_DYNAMIC_TAG(HEXAGON_SYMSZ, 0x70000000)
 HEXAGON_DYNAMIC_TAG(HEXAGON_VER, 0x70000001)
@@ -190,17 +204,27 @@ MIPS_DYNAMIC_TAG(MIPS_RWPLT, 0x70000034)        // Points to the base
 MIPS_DYNAMIC_TAG(MIPS_RLD_MAP_REL, 0x70000035)  // Relative offset of run time loader
                                                 // map, used for debugging.
 
+// PPC specific dynamic table entries.
+PPC_DYNAMIC_TAG(PPC_GOT, 0x70000000) // Uses Secure PLT ABI.
+PPC_DYNAMIC_TAG(PPC_OPT, 0x70000001) // Has TLS optimization.
+
 // PPC64 specific dynamic table entries.
 PPC64_DYNAMIC_TAG(PPC64_GLINK, 0x70000000) // Address of 32 bytes before the
                                            // first glink lazy resolver stub.
 
 // Sun machine-independent extensions.
 DYNAMIC_TAG(AUXILIARY, 0x7FFFFFFD) // Shared object to load before self
+DYNAMIC_TAG(USED, 0x7FFFFFFE)      // Same as DT_NEEDED
 DYNAMIC_TAG(FILTER, 0x7FFFFFFF)    // Shared object to get values from
 
 
 #ifdef DYNAMIC_TAG_MARKER_DEFINED
 #undef DYNAMIC_TAG_MARKER
+#undef DYNAMIC_TAG_MARKER_DEFINED
+#endif
+#ifdef AARCH64_DYNAMIC_TAG_DEFINED
+#undef AARCH64_DYNAMIC_TAG
+#undef AARCH64_DYNAMIC_TAG_DEFINED
 #endif
 #ifdef MIPS_DYNAMIC_TAG_DEFINED
 #undef MIPS_DYNAMIC_TAG
@@ -210,6 +234,10 @@ DYNAMIC_TAG(FILTER, 0x7FFFFFFF)    // Shared object to get values from
 #undef HEXAGON_DYNAMIC_TAG
 #undef HEXAGON_DYNAMIC_TAG_DEFINED
 #endif
+#ifdef PPC_DYNAMIC_TAG_DEFINED
+#undef PPC_DYNAMIC_TAG
+#undef PPC_DYNAMIC_TAG_DEFINED
+#endif
 #ifdef PPC64_DYNAMIC_TAG_DEFINED
 #undef PPC64_DYNAMIC_TAG
 #undef PPC64_DYNAMIC_TAG_DEFINED
diff --git a/include/llvm/BinaryFormat/ELF.h b/include/llvm/BinaryFormat/ELF.h
index ce35d127d433..2bd711137845 100644
--- a/include/llvm/BinaryFormat/ELF.h
+++ b/include/llvm/BinaryFormat/ELF.h
@@ -1,9 +1,8 @@
 //===- llvm/BinaryFormat/ELF.h - ELF constants and structures ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -703,15 +702,20 @@ enum : unsigned {
   EF_AMDGPU_MACH_AMDGCN_GFX902 = 0x02d,
   EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e,
   EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
+  EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030,
   EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031,
+  // AMDGCN GFX10.
+  EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033,
+  EF_AMDGPU_MACH_AMDGCN_GFX1011 = 0x034,
+  EF_AMDGPU_MACH_AMDGCN_GFX1012 = 0x035,
 
   // Reserved for AMDGCN-based processors.
   EF_AMDGPU_MACH_AMDGCN_RESERVED0 = 0x027,
-  EF_AMDGPU_MACH_AMDGCN_RESERVED1 = 0x030,
+  EF_AMDGPU_MACH_AMDGCN_RESERVED1 = 0x032,
 
   // First/last AMDGCN-based processors.
   EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
-  EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX909,
+  EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1012,
 
   // Indicates if the "xnack" target feature is enabled for all code contained
   // in the object.
@@ -839,6 +843,10 @@ enum : unsigned {
   SHT_LLVM_CALL_GRAPH_PROFILE = 0x6fff4c02, // LLVM Call Graph Profile.
   SHT_LLVM_ADDRSIG = 0x6fff4c03,        // List of address-significant symbols
                                         // for safe ICF.
+  SHT_LLVM_DEPENDENT_LIBRARIES = 0x6fff4c04, // LLVM Dependent Library Specifiers.
+  SHT_LLVM_SYMPART = 0x6fff4c05,        // Symbol partition specification.
+  SHT_LLVM_PART_EHDR = 0x6fff4c06,      // ELF header for loadable partition.
+  SHT_LLVM_PART_PHDR = 0x6fff4c07,      // Phdrs for loadable partition.
   // Android's experimental support for SHT_RELR sections.
   // https://android.googlesource.com/platform/bionic/+/b7feec74547f84559a1467aca02708ff61346d2a/libc/include/elf.h#512
   SHT_ANDROID_RELR = 0x6fffff00,        // Relocation entries; only offsets.
@@ -1340,6 +1348,14 @@ enum {
   NT_FREEBSD_PROCSTAT_AUXV = 16,
 };
 
+// Generic note types
+enum : unsigned {
+  NT_VERSION = 1,
+  NT_ARCH = 2,
+  NT_GNU_BUILD_ATTRIBUTE_OPEN = 0x100,
+  NT_GNU_BUILD_ATTRIBUTE_FUNC = 0x101,
+};
+
 enum {
   NT_GNU_ABI_TAG = 1,
   NT_GNU_HWCAP = 2,
@@ -1352,13 +1368,65 @@ enum {
 enum : unsigned {
   GNU_PROPERTY_STACK_SIZE = 1,
   GNU_PROPERTY_NO_COPY_ON_PROTECTED = 2,
-  GNU_PROPERTY_X86_FEATURE_1_AND = 0xc0000002
+  GNU_PROPERTY_AARCH64_FEATURE_1_AND = 0xc0000000,
+  GNU_PROPERTY_X86_FEATURE_1_AND = 0xc0000002,
+  GNU_PROPERTY_X86_ISA_1_NEEDED = 0xc0008000,
+  GNU_PROPERTY_X86_FEATURE_2_NEEDED = 0xc0008001,
+  GNU_PROPERTY_X86_ISA_1_USED = 0xc0010000,
+  GNU_PROPERTY_X86_FEATURE_2_USED = 0xc0010001,
 };
 
-// CET properties
-enum {
+// aarch64 processor feature bits.
+enum : unsigned {
+  GNU_PROPERTY_AARCH64_FEATURE_1_BTI = 1 << 0,
+  GNU_PROPERTY_AARCH64_FEATURE_1_PAC = 1 << 1,
+};
+
+// x86 processor feature bits.
+enum : unsigned {
   GNU_PROPERTY_X86_FEATURE_1_IBT = 1 << 0,
-  GNU_PROPERTY_X86_FEATURE_1_SHSTK = 1 << 1
+  GNU_PROPERTY_X86_FEATURE_1_SHSTK = 1 << 1,
+
+  GNU_PROPERTY_X86_ISA_1_CMOV = 1 << 0,
+  GNU_PROPERTY_X86_ISA_1_SSE = 1 << 1,
+  GNU_PROPERTY_X86_ISA_1_SSE2 = 1 << 2,
+  GNU_PROPERTY_X86_ISA_1_SSE3 = 1 << 3,
+  GNU_PROPERTY_X86_ISA_1_SSSE3 = 1 << 4,
+  GNU_PROPERTY_X86_ISA_1_SSE4_1 = 1 << 5,
+  GNU_PROPERTY_X86_ISA_1_SSE4_2 = 1 << 6,
+  GNU_PROPERTY_X86_ISA_1_AVX = 1 << 7,
+  GNU_PROPERTY_X86_ISA_1_AVX2 = 1 << 8,
+  GNU_PROPERTY_X86_ISA_1_FMA = 1 << 9,
+  GNU_PROPERTY_X86_ISA_1_AVX512F = 1 << 10,
+  GNU_PROPERTY_X86_ISA_1_AVX512CD = 1 << 11,
+  GNU_PROPERTY_X86_ISA_1_AVX512ER = 1 << 12,
+  GNU_PROPERTY_X86_ISA_1_AVX512PF = 1 << 13,
+  GNU_PROPERTY_X86_ISA_1_AVX512VL = 1 << 14,
+  GNU_PROPERTY_X86_ISA_1_AVX512DQ = 1 << 15,
+  GNU_PROPERTY_X86_ISA_1_AVX512BW = 1 << 16,
+  GNU_PROPERTY_X86_ISA_1_AVX512_4FMAPS = 1 << 17,
+  GNU_PROPERTY_X86_ISA_1_AVX512_4VNNIW = 1 << 18,
+  GNU_PROPERTY_X86_ISA_1_AVX512_BITALG = 1 << 19,
+  GNU_PROPERTY_X86_ISA_1_AVX512_IFMA = 1 << 20,
+  GNU_PROPERTY_X86_ISA_1_AVX512_VBMI = 1 << 21,
+  GNU_PROPERTY_X86_ISA_1_AVX512_VBMI2 = 1 << 22,
+  GNU_PROPERTY_X86_ISA_1_AVX512_VNNI = 1 << 23,
+
+  GNU_PROPERTY_X86_FEATURE_2_X86 = 1 << 0,
+  GNU_PROPERTY_X86_FEATURE_2_X87 = 1 << 1,
+  GNU_PROPERTY_X86_FEATURE_2_MMX = 1 << 2,
+  GNU_PROPERTY_X86_FEATURE_2_XMM = 1 << 3,
+  GNU_PROPERTY_X86_FEATURE_2_YMM = 1 << 4,
+  GNU_PROPERTY_X86_FEATURE_2_ZMM = 1 << 5,
+  GNU_PROPERTY_X86_FEATURE_2_FXSR = 1 << 6,
+  GNU_PROPERTY_X86_FEATURE_2_XSAVE = 1 << 7,
+  GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT = 1 << 8,
+  GNU_PROPERTY_X86_FEATURE_2_XSAVEC = 1 << 9,
+};
+
+// AMDGPU-specific section indices.
+enum {
+  SHN_AMDGPU_LDS = 0xff00, // Variable in LDS; symbol encoded like SHN_COMMON
 };
 
 // AMD specific notes. (Code Object V2)
diff --git a/include/llvm/BinaryFormat/ELFRelocs/ARM.def b/include/llvm/BinaryFormat/ELFRelocs/ARM.def
index 730fc5b8836c..e0709fb81813 100644
--- a/include/llvm/BinaryFormat/ELFRelocs/ARM.def
+++ b/include/llvm/BinaryFormat/ELFRelocs/ARM.def
@@ -135,4 +135,7 @@ ELF_RELOC(R_ARM_PRIVATE_15,             0x7f)
 ELF_RELOC(R_ARM_ME_TOO,                 0x80)
 ELF_RELOC(R_ARM_THM_TLS_DESCSEQ16,      0x81)
 ELF_RELOC(R_ARM_THM_TLS_DESCSEQ32,      0x82)
+ELF_RELOC(R_ARM_THM_BF16,               0x88)
+ELF_RELOC(R_ARM_THM_BF12,               0x89)
+ELF_RELOC(R_ARM_THM_BF18,               0x8a)
 ELF_RELOC(R_ARM_IRELATIVE,              0xa0)
diff --git a/include/llvm/BinaryFormat/ELFRelocs/PowerPC.def b/include/llvm/BinaryFormat/ELFRelocs/PowerPC.def
index e4f8ee0ebe2b..28036889cca6 100644
--- a/include/llvm/BinaryFormat/ELFRelocs/PowerPC.def
+++ b/include/llvm/BinaryFormat/ELFRelocs/PowerPC.def
@@ -27,9 +27,25 @@
 #undef R_PPC_GOT16_HI
 #undef R_PPC_GOT16_HA
 #undef R_PPC_PLTREL24
+#undef R_PPC_COPY
+#undef R_PPC_GLOB_DAT
 #undef R_PPC_JMP_SLOT
+#undef R_PPC_RELATIVE
 #undef R_PPC_LOCAL24PC
+#undef R_PPC_UADDR32
+#undef R_PPC_UADDR16
 #undef R_PPC_REL32
+#undef R_PPC_PLT32
+#undef R_PPC_PLTREL32
+#undef R_PPC_PLT16_LO
+#undef R_PPC_PLT16_HI
+#undef R_PPC_PLT16_HA
+#undef R_PPC_SDAREL16
+#undef R_PPC_SECTOFF
+#undef R_PPC_SECTOFF_LO
+#undef R_PPC_SECTOFF_HI
+#undef R_PPC_SECTOFF_HA
+#undef R_PPC_ADDR30
 #undef R_PPC_TLS
 #undef R_PPC_DTPMOD32
 #undef R_PPC_TPREL16
@@ -84,9 +100,25 @@ ELF_RELOC(R_PPC_GOT16_LO,               15)
 ELF_RELOC(R_PPC_GOT16_HI,               16)
 ELF_RELOC(R_PPC_GOT16_HA,               17)
 ELF_RELOC(R_PPC_PLTREL24,               18)
+ELF_RELOC(R_PPC_COPY,                   19)
+ELF_RELOC(R_PPC_GLOB_DAT,               20)
 ELF_RELOC(R_PPC_JMP_SLOT,               21)
+ELF_RELOC(R_PPC_RELATIVE,               22)
 ELF_RELOC(R_PPC_LOCAL24PC,              23)
+ELF_RELOC(R_PPC_UADDR32,                24)
+ELF_RELOC(R_PPC_UADDR16,                25)
 ELF_RELOC(R_PPC_REL32,                  26)
+ELF_RELOC(R_PPC_PLT32,                  27)
+ELF_RELOC(R_PPC_PLTREL32,               28)
+ELF_RELOC(R_PPC_PLT16_LO,               29)
+ELF_RELOC(R_PPC_PLT16_HI,               30)
+ELF_RELOC(R_PPC_PLT16_HA,               31)
+ELF_RELOC(R_PPC_SDAREL16,               32)
+ELF_RELOC(R_PPC_SECTOFF,                33)
+ELF_RELOC(R_PPC_SECTOFF_LO,             34)
+ELF_RELOC(R_PPC_SECTOFF_HI,             35)
+ELF_RELOC(R_PPC_SECTOFF_HA,             36)
+ELF_RELOC(R_PPC_ADDR30,                 37)
 ELF_RELOC(R_PPC_TLS,                    67)
 ELF_RELOC(R_PPC_DTPMOD32,               68)
 ELF_RELOC(R_PPC_TPREL16,                69)
@@ -117,6 +149,7 @@ ELF_RELOC(R_PPC_GOT_DTPREL16_HI,        93)
 ELF_RELOC(R_PPC_GOT_DTPREL16_HA,        94)
 ELF_RELOC(R_PPC_TLSGD,                  95)
 ELF_RELOC(R_PPC_TLSLD,                  96)
+ELF_RELOC(R_PPC_IRELATIVE,              248)
 ELF_RELOC(R_PPC_REL16,                  249)
 ELF_RELOC(R_PPC_REL16_LO,               250)
 ELF_RELOC(R_PPC_REL16_HI,               251)
diff --git a/include/llvm/BinaryFormat/MachO.def b/include/llvm/BinaryFormat/MachO.def
index 95de48d2b19e..76dcc58ba048 100644
--- a/include/llvm/BinaryFormat/MachO.def
+++ b/include/llvm/BinaryFormat/MachO.def
@@ -1,9 +1,8 @@
 //,,,-- llvm/Support/MachO.def - The MachO file definitions -----*- C++ -*-,,,//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //,,,----------------------------------------------------------------------,,,//
 //
diff --git a/include/llvm/BinaryFormat/MachO.h b/include/llvm/BinaryFormat/MachO.h
index b3d60984249f..a01393a3b303 100644
--- a/include/llvm/BinaryFormat/MachO.h
+++ b/include/llvm/BinaryFormat/MachO.h
@@ -1,9 +1,8 @@
 //===-- llvm/BinaryFormat/MachO.h - The MachO file format -------*- C++/-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -335,6 +334,7 @@ enum {
   N_WEAK_DEF = 0x0080u,
   N_SYMBOL_RESOLVER = 0x0100u,
   N_ALT_ENTRY = 0x0200u,
+  N_COLD_FUNC = 0x0400u,
   // For undefined symbols coming from libraries, see GET_LIBRARY_ORDINAL()
   // as these are in the top 8 bits.
   SELF_LIBRARY_ORDINAL = 0x0,
@@ -487,6 +487,7 @@ enum PlatformType {
   PLATFORM_TVOS = 3,
   PLATFORM_WATCHOS = 4,
   PLATFORM_BRIDGEOS = 5,
+  PLATFORM_MACCATALYST = 6,
   PLATFORM_IOSSIMULATOR = 7,
   PLATFORM_TVOSSIMULATOR = 8,
   PLATFORM_WATCHOSSIMULATOR = 9
@@ -942,8 +943,13 @@ struct fat_arch_64 {
 // Structs from <mach-o/reloc.h>
 struct relocation_info {
   int32_t r_address;
+#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)
+  uint32_t r_type : 4,  r_extern : 1, r_length : 2, r_pcrel : 1,
+      r_symbolnum : 24;
+#else
   uint32_t r_symbolnum : 24, r_pcrel : 1, r_length : 2, r_extern : 1,
       r_type : 4;
+#endif
 };
 
 struct scattered_relocation_info {
@@ -1396,7 +1402,8 @@ inline void SET_COMM_ALIGN(uint16_t &n_desc, uint8_t align) {
 enum : uint32_t {
   // Capability bits used in the definition of cpu_type.
   CPU_ARCH_MASK = 0xff000000, // Mask for architecture bits
-  CPU_ARCH_ABI64 = 0x01000000 // 64 bit ABI
+  CPU_ARCH_ABI64 = 0x01000000, // 64 bit ABI
+  CPU_ARCH_ABI64_32 = 0x02000000, // ILP32 ABI on 64-bit hardware
 };
 
 // Constants for the cputype field.
@@ -1409,6 +1416,7 @@ enum CPUType {
   CPU_TYPE_MC98000 = 10, // Old Motorola PowerPC
   CPU_TYPE_ARM = 12,
   CPU_TYPE_ARM64 = CPU_TYPE_ARM | CPU_ARCH_ABI64,
+  CPU_TYPE_ARM64_32 = CPU_TYPE_ARM | CPU_ARCH_ABI64_32,
   CPU_TYPE_SPARC = 14,
   CPU_TYPE_POWERPC = 18,
   CPU_TYPE_POWERPC64 = CPU_TYPE_POWERPC | CPU_ARCH_ABI64
@@ -1477,7 +1485,12 @@ enum CPUSubTypeARM {
   CPU_SUBTYPE_ARM_V7EM = 16
 };
 
-enum CPUSubTypeARM64 { CPU_SUBTYPE_ARM64_ALL = 0 };
+enum CPUSubTypeARM64 {
+  CPU_SUBTYPE_ARM64_ALL = 0,
+  CPU_SUBTYPE_ARM64E = 2,
+};
+
+enum CPUSubTypeARM64_32 { CPU_SUBTYPE_ARM64_32_V8 = 1 };
 
 enum CPUSubTypeSPARC { CPU_SUBTYPE_SPARC_ALL = 0 };
 
diff --git a/include/llvm/BinaryFormat/Magic.h b/include/llvm/BinaryFormat/Magic.h
index 04801f810be3..cd9833ec4d22 100644
--- a/include/llvm/BinaryFormat/Magic.h
+++ b/include/llvm/BinaryFormat/Magic.h
@@ -1,9 +1,8 @@
 //===- llvm/BinaryFormat/Magic.h - File magic identification ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -40,11 +39,14 @@ struct file_magic {
     macho_dsym_companion,                     ///< Mach-O dSYM companion file
     macho_kext_bundle,                        ///< Mach-O kext bundle file
     macho_universal_binary,                   ///< Mach-O universal binary
+    minidump,                                 ///< Windows minidump file
     coff_cl_gl_object,   ///< Microsoft cl.exe's intermediate code file
     coff_object,         ///< COFF object file
     coff_import_library, ///< COFF import library
     pecoff_executable,   ///< PECOFF executable file
     windows_resource,    ///< Windows compiled resource file (.res)
+    xcoff_object_32,     ///< 32-bit XCOFF object file
+    xcoff_object_64,     ///< 64-bit XCOFF object file
     wasm_object,         ///< WebAssembly Object file
     pdb,                 ///< Windows PDB debug info file
   };
diff --git a/include/llvm/BinaryFormat/Minidump.h b/include/llvm/BinaryFormat/Minidump.h
new file mode 100644
index 000000000000..65c17d1eb00c
--- /dev/null
+++ b/include/llvm/BinaryFormat/Minidump.h
@@ -0,0 +1,203 @@
+//===- Minidump.h - Minidump constants and structures -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header constants and data structures pertaining to the Windows Minidump
+// core file format.
+//
+// Reference:
+// https://msdn.microsoft.com/en-us/library/windows/desktop/ms679293(v=vs.85).aspx
+// https://chromium.googlesource.com/breakpad/breakpad/
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BINARYFORMAT_MINIDUMP_H
+#define LLVM_BINARYFORMAT_MINIDUMP_H
+
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/Support/Endian.h"
+
+namespace llvm {
+namespace minidump {
+
+/// The minidump header is the first part of a minidump file. It identifies the
+/// file as a minidump file, and gives the location of the stream directory.
+struct Header {
+  static constexpr uint32_t MagicSignature = 0x504d444d; // PMDM
+  static constexpr uint16_t MagicVersion = 0xa793;
+
+  support::ulittle32_t Signature;
+  // The high 16 bits of version field are implementation specific. The low 16
+  // bits should be MagicVersion.
+  support::ulittle32_t Version;
+  support::ulittle32_t NumberOfStreams;
+  support::ulittle32_t StreamDirectoryRVA;
+  support::ulittle32_t Checksum;
+  support::ulittle32_t TimeDateStamp;
+  support::ulittle64_t Flags;
+};
+static_assert(sizeof(Header) == 32, "");
+
+/// The type of a minidump stream identifies its contents. Streams numbers after
+/// LastReserved are for application-defined data streams.
+enum class StreamType : uint32_t {
+#define HANDLE_MDMP_STREAM_TYPE(CODE, NAME) NAME = CODE,
+#include "llvm/BinaryFormat/MinidumpConstants.def"
+  Unused = 0,
+  LastReserved = 0x0000ffff,
+};
+
+/// Specifies the location (and size) of various objects in the minidump file.
+/// The location is relative to the start of the file.
+struct LocationDescriptor {
+  support::ulittle32_t DataSize;
+  support::ulittle32_t RVA;
+};
+static_assert(sizeof(LocationDescriptor) == 8, "");
+
+/// Describes a single memory range (both its VM address and where to find it in
+/// the file) of the process from which this minidump file was generated.
+struct MemoryDescriptor {
+  support::ulittle64_t StartOfMemoryRange;
+  LocationDescriptor Memory;
+};
+static_assert(sizeof(MemoryDescriptor) == 16, "");
+
+/// Specifies the location and type of a single stream in the minidump file. The
+/// minidump stream directory is an array of entries of this type, with its size
+/// given by Header.NumberOfStreams.
+struct Directory {
+  support::little_t<StreamType> Type;
+  LocationDescriptor Location;
+};
+static_assert(sizeof(Directory) == 12, "");
+
+/// The processor architecture of the system that generated this minidump. Used
+/// in the ProcessorArch field of the SystemInfo stream.
+enum class ProcessorArchitecture : uint16_t {
+#define HANDLE_MDMP_ARCH(CODE, NAME) NAME = CODE,
+#include "llvm/BinaryFormat/MinidumpConstants.def"
+};
+
+/// The OS Platform of the system that generated this minidump. Used in the
+/// PlatformId field of the SystemInfo stream.
+enum class OSPlatform : uint32_t {
+#define HANDLE_MDMP_PLATFORM(CODE, NAME) NAME = CODE,
+#include "llvm/BinaryFormat/MinidumpConstants.def"
+};
+
+/// Detailed information about the processor of the system that generated this
+/// minidump. Its interpretation depends on the ProcessorArchitecture enum.
+union CPUInfo {
+  struct X86Info {
+    char VendorID[12];                        // cpuid 0: ebx, edx, ecx
+    support::ulittle32_t VersionInfo;         // cpuid 1: eax
+    support::ulittle32_t FeatureInfo;         // cpuid 1: edx
+    support::ulittle32_t AMDExtendedFeatures; // cpuid 0x80000001, ebx
+  } X86;
+  struct ArmInfo {
+    support::ulittle32_t CPUID;
+    support::ulittle32_t ElfHWCaps; // linux specific, 0 otherwise
+  } Arm;
+  struct OtherInfo {
+    uint8_t ProcessorFeatures[16];
+  } Other;
+};
+static_assert(sizeof(CPUInfo) == 24, "");
+
+/// The SystemInfo stream, containing various information about the system where
+/// this minidump was generated.
+struct SystemInfo {
+  support::little_t<ProcessorArchitecture> ProcessorArch;
+  support::ulittle16_t ProcessorLevel;
+  support::ulittle16_t ProcessorRevision;
+
+  uint8_t NumberOfProcessors;
+  uint8_t ProductType;
+
+  support::ulittle32_t MajorVersion;
+  support::ulittle32_t MinorVersion;
+  support::ulittle32_t BuildNumber;
+  support::little_t<OSPlatform> PlatformId;
+  support::ulittle32_t CSDVersionRVA;
+
+  support::ulittle16_t SuiteMask;
+  support::ulittle16_t Reserved;
+
+  CPUInfo CPU;
+};
+static_assert(sizeof(SystemInfo) == 56, "");
+
+struct VSFixedFileInfo {
+  support::ulittle32_t Signature;
+  support::ulittle32_t StructVersion;
+  support::ulittle32_t FileVersionHigh;
+  support::ulittle32_t FileVersionLow;
+  support::ulittle32_t ProductVersionHigh;
+  support::ulittle32_t ProductVersionLow;
+  support::ulittle32_t FileFlagsMask;
+  support::ulittle32_t FileFlags;
+  support::ulittle32_t FileOS;
+  support::ulittle32_t FileType;
+  support::ulittle32_t FileSubtype;
+  support::ulittle32_t FileDateHigh;
+  support::ulittle32_t FileDateLow;
+};
+static_assert(sizeof(VSFixedFileInfo) == 52, "");
+
+inline bool operator==(const VSFixedFileInfo &LHS, const VSFixedFileInfo &RHS) {
+  return memcmp(&LHS, &RHS, sizeof(VSFixedFileInfo)) == 0;
+}
+
+struct Module {
+  support::ulittle64_t BaseOfImage;
+  support::ulittle32_t SizeOfImage;
+  support::ulittle32_t Checksum;
+  support::ulittle32_t TimeDateStamp;
+  support::ulittle32_t ModuleNameRVA;
+  VSFixedFileInfo VersionInfo;
+  LocationDescriptor CvRecord;
+  LocationDescriptor MiscRecord;
+  support::ulittle64_t Reserved0;
+  support::ulittle64_t Reserved1;
+};
+static_assert(sizeof(Module) == 108, "");
+
+/// Describes a single thread in the minidump file. Part of the ThreadList
+/// stream.
+struct Thread {
+  support::ulittle32_t ThreadId;
+  support::ulittle32_t SuspendCount;
+  support::ulittle32_t PriorityClass;
+  support::ulittle32_t Priority;
+  support::ulittle64_t EnvironmentBlock;
+  MemoryDescriptor Stack;
+  LocationDescriptor Context;
+};
+static_assert(sizeof(Thread) == 48, "");
+
+} // namespace minidump
+
+template <> struct DenseMapInfo<minidump::StreamType> {
+  static minidump::StreamType getEmptyKey() { return minidump::StreamType(-1); }
+
+  static minidump::StreamType getTombstoneKey() {
+    return minidump::StreamType(-2);
+  }
+
+  static unsigned getHashValue(minidump::StreamType Val) {
+    return DenseMapInfo<uint32_t>::getHashValue(static_cast<uint32_t>(Val));
+  }
+
+  static bool isEqual(minidump::StreamType LHS, minidump::StreamType RHS) {
+    return LHS == RHS;
+  }
+};
+
+} // namespace llvm
+
+#endif // LLVM_BINARYFORMAT_MINIDUMP_H
diff --git a/include/llvm/BinaryFormat/MinidumpConstants.def b/include/llvm/BinaryFormat/MinidumpConstants.def
new file mode 100644
index 000000000000..d4f13dd99217
--- /dev/null
+++ b/include/llvm/BinaryFormat/MinidumpConstants.def
@@ -0,0 +1,107 @@
+//===- MinidumpConstants.def - Iteration over minidump constants-*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if !(defined HANDLE_MDMP_STREAM_TYPE || defined HANDLE_MDMP_ARCH ||           \
+      defined HANDLE_MDMP_PLATFORM)
+#error "Missing HANDLE_MDMP definition"
+#endif
+
+#ifndef HANDLE_MDMP_STREAM_TYPE
+#define HANDLE_MDMP_STREAM_TYPE(CODE, NAME)
+#endif
+
+#ifndef HANDLE_MDMP_ARCH
+#define HANDLE_MDMP_ARCH(CODE, NAME)
+#endif
+
+#ifndef HANDLE_MDMP_PLATFORM
+#define HANDLE_MDMP_PLATFORM(CODE, NAME)
+#endif
+
+HANDLE_MDMP_STREAM_TYPE(0x0003, ThreadList)
+HANDLE_MDMP_STREAM_TYPE(0x0004, ModuleList)
+HANDLE_MDMP_STREAM_TYPE(0x0005, MemoryList)
+HANDLE_MDMP_STREAM_TYPE(0x0006, Exception)
+HANDLE_MDMP_STREAM_TYPE(0x0007, SystemInfo)
+HANDLE_MDMP_STREAM_TYPE(0x0008, ThreadExList)
+HANDLE_MDMP_STREAM_TYPE(0x0009, Memory64List)
+HANDLE_MDMP_STREAM_TYPE(0x000a, CommentA)
+HANDLE_MDMP_STREAM_TYPE(0x000b, CommentW)
+HANDLE_MDMP_STREAM_TYPE(0x000c, HandleData)
+HANDLE_MDMP_STREAM_TYPE(0x000d, FunctionTable)
+HANDLE_MDMP_STREAM_TYPE(0x000e, UnloadedModuleList)
+HANDLE_MDMP_STREAM_TYPE(0x000f, MiscInfo)
+HANDLE_MDMP_STREAM_TYPE(0x0010, MemoryInfoList)
+HANDLE_MDMP_STREAM_TYPE(0x0011, ThreadInfoList)
+HANDLE_MDMP_STREAM_TYPE(0x0012, HandleOperationList)
+HANDLE_MDMP_STREAM_TYPE(0x0013, Token)
+HANDLE_MDMP_STREAM_TYPE(0x0014, JavascriptData)
+HANDLE_MDMP_STREAM_TYPE(0x0015, SystemMemoryInfo)
+HANDLE_MDMP_STREAM_TYPE(0x0016, ProcessVMCounters)
+// Breakpad extension types.  0x4767 = "Gg"
+HANDLE_MDMP_STREAM_TYPE(0x47670001, BreakpadInfo)
+HANDLE_MDMP_STREAM_TYPE(0x47670002, AssertionInfo)
+// These are additional minidump stream values which are specific to the linux
+// breakpad implementation.
+HANDLE_MDMP_STREAM_TYPE(0x47670003, LinuxCPUInfo)    // /proc/cpuinfo
+HANDLE_MDMP_STREAM_TYPE(0x47670004, LinuxProcStatus) // /proc/$x/status
+HANDLE_MDMP_STREAM_TYPE(0x47670005, LinuxLSBRelease) // /etc/lsb-release
+HANDLE_MDMP_STREAM_TYPE(0x47670006, LinuxCMDLine)    // /proc/$x/cmdline
+HANDLE_MDMP_STREAM_TYPE(0x47670007, LinuxEnviron)    // /proc/$x/environ
+HANDLE_MDMP_STREAM_TYPE(0x47670008, LinuxAuxv)       // /proc/$x/auxv
+HANDLE_MDMP_STREAM_TYPE(0x47670009, LinuxMaps)       // /proc/$x/maps
+HANDLE_MDMP_STREAM_TYPE(0x4767000A, LinuxDSODebug)
+HANDLE_MDMP_STREAM_TYPE(0x4767000B, LinuxProcStat)   // /proc/$x/stat
+HANDLE_MDMP_STREAM_TYPE(0x4767000C, LinuxProcUptime) // uptime
+HANDLE_MDMP_STREAM_TYPE(0x4767000D, LinuxProcFD)     // /proc/$x/fd
+// Facebook-defined stream types
+HANDLE_MDMP_STREAM_TYPE(0xFACE1CA7, FacebookLogcat)
+HANDLE_MDMP_STREAM_TYPE(0xFACECAFA, FacebookAppCustomData)
+HANDLE_MDMP_STREAM_TYPE(0xFACECAFB, FacebookBuildID)
+HANDLE_MDMP_STREAM_TYPE(0xFACECAFC, FacebookAppVersionName)
+HANDLE_MDMP_STREAM_TYPE(0xFACECAFD, FacebookJavaStack)
+HANDLE_MDMP_STREAM_TYPE(0xFACECAFE, FacebookDalvikInfo)
+HANDLE_MDMP_STREAM_TYPE(0xFACECAFF, FacebookUnwindSymbols)
+HANDLE_MDMP_STREAM_TYPE(0xFACECB00, FacebookDumpErrorLog)
+HANDLE_MDMP_STREAM_TYPE(0xFACECCCC, FacebookAppStateLog)
+HANDLE_MDMP_STREAM_TYPE(0xFACEDEAD, FacebookAbortReason)
+HANDLE_MDMP_STREAM_TYPE(0xFACEE000, FacebookThreadName)
+
+HANDLE_MDMP_ARCH(0x0000, X86)      // PROCESSOR_ARCHITECTURE_INTEL
+HANDLE_MDMP_ARCH(0x0001, MIPS)     // PROCESSOR_ARCHITECTURE_MIPS
+HANDLE_MDMP_ARCH(0x0002, Alpha)    // PROCESSOR_ARCHITECTURE_ALPHA
+HANDLE_MDMP_ARCH(0x0003, PPC)      // PROCESSOR_ARCHITECTURE_PPC
+HANDLE_MDMP_ARCH(0x0004, SHX)      // PROCESSOR_ARCHITECTURE_SHX (Super-H)
+HANDLE_MDMP_ARCH(0x0005, ARM)      // PROCESSOR_ARCHITECTURE_ARM
+HANDLE_MDMP_ARCH(0x0006, IA64)     // PROCESSOR_ARCHITECTURE_IA64
+HANDLE_MDMP_ARCH(0x0007, Alpha64)  // PROCESSOR_ARCHITECTURE_ALPHA64
+HANDLE_MDMP_ARCH(0x0008, MSIL)     // PROCESSOR_ARCHITECTURE_MSIL
+HANDLE_MDMP_ARCH(0x0009, AMD64)    // PROCESSOR_ARCHITECTURE_AMD64
+HANDLE_MDMP_ARCH(0x000a, X86Win64) // PROCESSOR_ARCHITECTURE_IA32_ON_WIN64
+HANDLE_MDMP_ARCH(0x8001, SPARC)    // Breakpad-defined value for SPARC
+HANDLE_MDMP_ARCH(0x8002, PPC64)    // Breakpad-defined value for PPC64
+HANDLE_MDMP_ARCH(0x8003, ARM64)    // Breakpad-defined value for ARM64
+HANDLE_MDMP_ARCH(0x8004, MIPS64)   // Breakpad-defined value for MIPS64
+
+HANDLE_MDMP_PLATFORM(0x0000, Win32S) // Win32 on Windows 3.1
+HANDLE_MDMP_PLATFORM(0x0001, Win32Windows) // Windows 95-98-Me
+HANDLE_MDMP_PLATFORM(0x0002, Win32NT) // Windows NT, 2000+
+HANDLE_MDMP_PLATFORM(0x0003, Win32CE) // Windows CE, Windows Mobile, "Handheld"
+// Breakpad-defined values.
+HANDLE_MDMP_PLATFORM(0x8000, Unix) // Generic Unix-ish
+HANDLE_MDMP_PLATFORM(0x8101, MacOSX) // Mac OS X/Darwin
+HANDLE_MDMP_PLATFORM(0x8102, IOS) // iOS
+HANDLE_MDMP_PLATFORM(0x8201, Linux) // Linux
+HANDLE_MDMP_PLATFORM(0x8202, Solaris) // Solaris
+HANDLE_MDMP_PLATFORM(0x8203, Android) // Android
+HANDLE_MDMP_PLATFORM(0x8204, PS3) // PS3
+HANDLE_MDMP_PLATFORM(0x8205, NaCl) // Native Client (NaCl)
+
+#undef HANDLE_MDMP_STREAM_TYPE
+#undef HANDLE_MDMP_ARCH
+#undef HANDLE_MDMP_PLATFORM
diff --git a/include/llvm/BinaryFormat/MsgPack.def b/include/llvm/BinaryFormat/MsgPack.def
index 781b49f46aeb..7ad83ff21c42 100644
--- a/include/llvm/BinaryFormat/MsgPack.def
+++ b/include/llvm/BinaryFormat/MsgPack.def
@@ -1,9 +1,8 @@
 //===- MsgPack.def - MessagePack definitions --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/include/llvm/BinaryFormat/MsgPack.h b/include/llvm/BinaryFormat/MsgPack.h
index d431912a53e5..9fda14b21c71 100644
--- a/include/llvm/BinaryFormat/MsgPack.h
+++ b/include/llvm/BinaryFormat/MsgPack.h
@@ -1,9 +1,8 @@
 //===-- MsgPack.h - MessagePack Constants -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/include/llvm/BinaryFormat/MsgPackDocument.h b/include/llvm/BinaryFormat/MsgPackDocument.h
new file mode 100644
index 000000000000..824ecc353207
--- /dev/null
+++ b/include/llvm/BinaryFormat/MsgPackDocument.h
@@ -0,0 +1,385 @@
+//===-- MsgPackDocument.h - MsgPack Document --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file declares a class that exposes a simple in-memory representation
+/// of a document of MsgPack objects, that can be read from MsgPack, written to
+/// MsgPack, and inspected and modified in memory. This is intended to be a
+/// lighter-weight (in terms of memory allocations) replacement for
+/// MsgPackTypes.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
+#define LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
+
+#include "llvm/BinaryFormat/MsgPackReader.h"
+#include <map>
+
+namespace llvm {
+namespace msgpack {
+
+class ArrayDocNode;
+class Document;
+class MapDocNode;
+
+/// The kind of a DocNode and its owning Document.
+struct KindAndDocument {
+  Document *Doc;
+  Type Kind;
+};
+
+/// A node in a MsgPack Document. This is a simple copyable and
+/// passable-by-value type that does not own any memory.
+class DocNode {
+  friend Document;
+
+public:
+  typedef std::map<DocNode, DocNode> MapTy;
+  typedef std::vector<DocNode> ArrayTy;
+
+private:
+  // Using KindAndDocument allows us to squeeze Kind and a pointer to the
+  // owning Document into the same word. Having a pointer to the owning
+  // Document makes the API of DocNode more convenient, and allows its use in
+  // YAMLIO.
+  const KindAndDocument *KindAndDoc;
+
+protected:
+  // The union of different values.
+  union {
+    int64_t Int;
+    uint64_t UInt;
+    bool Bool;
+    double Float;
+    StringRef Raw;
+    ArrayTy *Array;
+    MapTy *Map;
+  };
+
+public:
+  DocNode() : KindAndDoc(nullptr) {}
+
+  // Type methods
+  bool isMap() const { return getKind() == Type::Map; }
+  bool isArray() const { return getKind() == Type::Array; }
+  bool isScalar() const { return !isMap() && !isArray(); }
+  bool isString() const { return getKind() == Type::String; }
+
+  // Accessors
+  bool isEmpty() const { return !KindAndDoc; }
+  Type getKind() const { return KindAndDoc->Kind; }
+  Document *getDocument() const { return KindAndDoc->Doc; }
+
+  int64_t &getInt() {
+    assert(getKind() == Type::Int);
+    return Int;
+  }
+
+  uint64_t &getUInt() {
+    assert(getKind() == Type::UInt);
+    return UInt;
+  }
+
+  bool &getBool() {
+    assert(getKind() == Type::Boolean);
+    return Bool;
+  }
+
+  double &getFloat() {
+    assert(getKind() == Type::Float);
+    return Float;
+  }
+
+  int64_t getInt() const {
+    assert(getKind() == Type::Int);
+    return Int;
+  }
+
+  uint64_t getUInt() const {
+    assert(getKind() == Type::UInt);
+    return UInt;
+  }
+
+  bool getBool() const {
+    assert(getKind() == Type::Boolean);
+    return Bool;
+  }
+
+  double getFloat() const {
+    assert(getKind() == Type::Float);
+    return Float;
+  }
+
+  StringRef getString() const {
+    assert(getKind() == Type::String);
+    return Raw;
+  }
+
+  /// Get an ArrayDocNode for an array node. If Convert, convert the node to an
+  /// array node if necessary.
+  ArrayDocNode &getArray(bool Convert = false) {
+    if (getKind() != Type::Array) {
+      assert(Convert);
+      convertToArray();
+    }
+    // This could be a static_cast, except ArrayDocNode is a forward reference.
+    return *reinterpret_cast<ArrayDocNode *>(this);
+  }
+
+  /// Get a MapDocNode for a map node. If Convert, convert the node to a map
+  /// node if necessary.
+  MapDocNode &getMap(bool Convert = false) {
+    if (getKind() != Type::Map) {
+      assert(Convert);
+      convertToMap();
+    }
+    // This could be a static_cast, except MapDocNode is a forward reference.
+    return *reinterpret_cast<MapDocNode *>(this);
+  }
+
+  /// Comparison operator, used for map keys.
+  friend bool operator<(const DocNode &Lhs, const DocNode &Rhs) {
+    // This has to cope with one or both of the nodes being default-constructed,
+    // such that KindAndDoc is not set.
+    if (Lhs.KindAndDoc != Rhs.KindAndDoc) {
+      if (!Rhs.KindAndDoc)
+        return false;
+      if (!Lhs.KindAndDoc)
+        return true;
+      return (unsigned)Lhs.getKind() < (unsigned)Rhs.getKind();
+    }
+    switch (Lhs.getKind()) {
+    case Type::Int:
+      return Lhs.Int < Rhs.Int;
+    case Type::UInt:
+      return Lhs.UInt < Rhs.UInt;
+    case Type::Nil:
+      return false;
+    case Type::Boolean:
+      return Lhs.Bool < Rhs.Bool;
+    case Type::Float:
+      return Lhs.Float < Rhs.Float;
+    case Type::String:
+    case Type::Binary:
+      return Lhs.Raw < Rhs.Raw;
+    default:
+      llvm_unreachable("bad map key type");
+    }
+  }
+
+  /// Equality operator
+  friend bool operator==(const DocNode &Lhs, const DocNode &Rhs) {
+    return !(Lhs < Rhs) && !(Rhs < Lhs);
+  }
+
+  /// Convert this node to a string, assuming it is scalar.
+  std::string toString() const;
+
+  /// Convert the StringRef and use it to set this DocNode (assuming scalar). If
+  /// it is a string, copy the string into the Document's strings list so we do
+  /// not rely on S having a lifetime beyond this call. Tag is "" or a YAML tag.
+  StringRef fromString(StringRef S, StringRef Tag = "");
+
+private:
+  // Private constructor setting KindAndDoc, used by methods in Document.
+  DocNode(const KindAndDocument *KindAndDoc) : KindAndDoc(KindAndDoc) {}
+
+  void convertToArray();
+  void convertToMap();
+};
+
+/// A DocNode that is a map.
+class MapDocNode : public DocNode {
+public:
+  MapDocNode() {}
+  MapDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Map); }
+
+  // Map access methods.
+  size_t size() const { return Map->size(); }
+  bool empty() const { return !size(); }
+  MapTy::iterator begin() { return Map->begin(); }
+  MapTy::iterator end() { return Map->end(); }
+  MapTy::iterator find(DocNode Key) { return Map->find(Key); }
+  MapTy::iterator find(StringRef Key);
+  /// Member access. The string data must remain valid for the lifetime of the
+  /// Document.
+  DocNode &operator[](StringRef S);
+  /// Member access.
+  DocNode &operator[](DocNode Key);
+};
+
+/// A DocNode that is an array.
+class ArrayDocNode : public DocNode {
+public:
+  ArrayDocNode() {}
+  ArrayDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Array); }
+
+  // Array access methods.
+  size_t size() const { return Array->size(); }
+  bool empty() const { return !size(); }
+  ArrayTy::iterator begin() { return Array->begin(); }
+  ArrayTy::iterator end() { return Array->end(); }
+  void push_back(DocNode N) {
+    assert(N.getDocument() == getDocument());
+    Array->push_back(N);
+  }
+
+  /// Element access. This extends the array if necessary.
+  DocNode &operator[](size_t Index);
+};
+
+/// Simple in-memory representation of a document of msgpack objects with
+/// ability to find and create array and map elements.  Does not currently cope
+/// with any extension types.
+class Document {
+  // Maps, arrays and strings used by nodes in the document. No attempt is made
+  // to free unused ones.
+  std::vector<std::unique_ptr<DocNode::MapTy>> Maps;
+  std::vector<std::unique_ptr<DocNode::ArrayTy>> Arrays;
+  std::vector<std::unique_ptr<char[]>> Strings;
+
+  // The root node of the document.
+  DocNode Root;
+
+  // The KindAndDocument structs pointed to by nodes in the document.
+  KindAndDocument KindAndDocs[size_t(Type::Extension) + 1];
+
+  // Whether YAML output uses hex for UInt.
+  bool HexMode = false;
+
+public:
+  Document() {
+    clear();
+    for (unsigned T = 0; T != size_t(Type::Extension) + 1; ++T)
+      KindAndDocs[T] = {this, Type(T)};
+  }
+
+  /// Get ref to the document's root element.
+  DocNode &getRoot() { return Root; }
+
+  /// Restore the Document to an empty state.
+  void clear() { getRoot() = getNode(); }
+
+  /// Create a nil node associated with this Document.
+  DocNode getNode() {
+    auto N = DocNode(&KindAndDocs[size_t(Type::Nil)]);
+    return N;
+  }
+
+  /// Create an Int node associated with this Document.
+  DocNode getNode(int64_t V) {
+    auto N = DocNode(&KindAndDocs[size_t(Type::Int)]);
+    N.Int = V;
+    return N;
+  }
+
+  /// Create an Int node associated with this Document.
+  DocNode getNode(int V) {
+    auto N = DocNode(&KindAndDocs[size_t(Type::Int)]);
+    N.Int = V;
+    return N;
+  }
+
+  /// Create a UInt node associated with this Document.
+  DocNode getNode(uint64_t V) {
+    auto N = DocNode(&KindAndDocs[size_t(Type::UInt)]);
+    N.UInt = V;
+    return N;
+  }
+
+  /// Create a UInt node associated with this Document.
+  DocNode getNode(unsigned V) {
+    auto N = DocNode(&KindAndDocs[size_t(Type::UInt)]);
+    N.UInt = V;
+    return N;
+  }
+
+  /// Create a Boolean node associated with this Document.
+  DocNode getNode(bool V) {
+    auto N = DocNode(&KindAndDocs[size_t(Type::Boolean)]);
+    N.Bool = V;
+    return N;
+  }
+
+  /// Create a Float node associated with this Document.
+  DocNode getNode(double V) {
+    auto N = DocNode(&KindAndDocs[size_t(Type::Float)]);
+    N.Float = V;
+    return N;
+  }
+
+  /// Create a String node associated with this Document. If !Copy, the passed
+  /// string must remain valid for the lifetime of the Document.
+  DocNode getNode(StringRef V, bool Copy = false) {
+    if (Copy)
+      V = addString(V);
+    auto N = DocNode(&KindAndDocs[size_t(Type::String)]);
+    N.Raw = V;
+    return N;
+  }
+
+  /// Create a String node associated with this Document. If !Copy, the passed
+  /// string must remain valid for the lifetime of the Document.
+  DocNode getNode(const char *V, bool Copy = false) {
+    return getNode(StringRef(V), Copy);
+  }
+
+  /// Create an empty Map node associated with this Document.
+  MapDocNode getMapNode() {
+    auto N = DocNode(&KindAndDocs[size_t(Type::Map)]);
+    Maps.push_back(std::unique_ptr<DocNode::MapTy>(new DocNode::MapTy));
+    N.Map = Maps.back().get();
+    return N.getMap();
+  }
+
+  /// Create an empty Array node associated with this Document.
+  ArrayDocNode getArrayNode() {
+    auto N = DocNode(&KindAndDocs[size_t(Type::Array)]);
+    Arrays.push_back(std::unique_ptr<DocNode::ArrayTy>(new DocNode::ArrayTy));
+    N.Array = Arrays.back().get();
+    return N.getArray();
+  }
+
+  /// Read a MsgPack document from a binary MsgPack blob.
+  /// The blob data must remain valid for the lifetime of this Document (because
+  /// a string object in the document contains a StringRef into the original
+  /// blob).
+  /// If Multi, then this sets root to an array and adds top-level objects to
+  /// it. If !Multi, then it only reads a single top-level object, even if there
+  /// are more, and sets root to that.
+  /// Returns false if failed due to illegal format.
+  bool readFromBlob(StringRef Blob, bool Multi);
+
+  /// Write a MsgPack document to a binary MsgPack blob.
+  void writeToBlob(std::string &Blob);
+
+  /// Copy a string into the Document's strings list, and return the copy that
+  /// is owned by the Document.
+  StringRef addString(StringRef S) {
+    Strings.push_back(std::unique_ptr<char[]>(new char[S.size()]));
+    memcpy(&Strings.back()[0], S.data(), S.size());
+    return StringRef(&Strings.back()[0], S.size());
+  }
+
+  /// Set whether YAML output uses hex for UInt. Default off.
+  void setHexMode(bool Val = true) { HexMode = Val; }
+
+  /// Get Hexmode flag.
+  bool getHexMode() const { return HexMode; }
+
+  /// Convert MsgPack Document to YAML text.
+  void toYAML(raw_ostream &OS);
+
+  /// Read YAML text into the MsgPack document. Returns false on failure.
+  bool fromYAML(StringRef S);
+};
+
+} // namespace msgpack
+} // namespace llvm
+
+#endif // LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
diff --git a/include/llvm/BinaryFormat/MsgPackReader.h b/include/llvm/BinaryFormat/MsgPackReader.h
index 511c31407455..2d332f531b23 100644
--- a/include/llvm/BinaryFormat/MsgPackReader.h
+++ b/include/llvm/BinaryFormat/MsgPackReader.h
@@ -1,9 +1,8 @@
 //===- MsgPackReader.h - Simple MsgPack reader ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/include/llvm/BinaryFormat/MsgPackTypes.h b/include/llvm/BinaryFormat/MsgPackTypes.h
deleted file mode 100644
index f96cd4c338fd..000000000000
--- a/include/llvm/BinaryFormat/MsgPackTypes.h
+++ /dev/null
@@ -1,372 +0,0 @@
-//===- MsgPackTypes.h - MsgPack Types ---------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This is a data structure for representing MessagePack "documents", with
-/// methods to go to and from MessagePack. The types also specialize YAMLIO
-/// traits in order to go to and from YAML.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/Optional.h"
-#include "llvm/BinaryFormat/MsgPackReader.h"
-#include "llvm/BinaryFormat/MsgPackWriter.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/YAMLTraits.h"
-#include <vector>
-
-#ifndef LLVM_BINARYFORMAT_MSGPACKTYPES_H
-#define LLVM_BINARYFORMAT_MSGPACKTYPES_H
-
-namespace llvm {
-namespace msgpack {
-
-class Node;
-
-/// Short-hand for a Node pointer.
-using NodePtr = std::shared_ptr<Node>;
-
-/// Short-hand for an Optional Node pointer.
-using OptNodePtr = Optional<NodePtr>;
-
-/// Abstract base-class which can be any MessagePack type.
-class Node {
-public:
-  enum NodeKind {
-    NK_Scalar,
-    NK_Array,
-    NK_Map,
-  };
-
-private:
-  virtual void anchor() = 0;
-  const NodeKind Kind;
-
-  static Expected<OptNodePtr> readArray(Reader &MPReader, size_t Length);
-  static Expected<OptNodePtr> readMap(Reader &MPReader, size_t Length);
-
-public:
-  NodeKind getKind() const { return Kind; }
-
-  /// Construct a Node. Used by derived classes to track kind information.
-  Node(NodeKind Kind) : Kind(Kind) {}
-
-  virtual ~Node() = default;
-
-  /// Read from a MessagePack reader \p MPReader, returning an error if one is
-  /// encountered, or None if \p MPReader is at the end of stream, or some Node
-  /// pointer if some type is read.
-  static Expected<OptNodePtr> read(Reader &MPReader);
-
-  /// Write to a MessagePack writer \p MPWriter.
-  virtual void write(Writer &MPWriter) = 0;
-};
-
-/// A MessagePack scalar.
-class ScalarNode : public Node {
-public:
-  enum ScalarKind {
-    SK_Int,
-    SK_UInt,
-    SK_Nil,
-    SK_Boolean,
-    SK_Float,
-    SK_String,
-    SK_Binary,
-  };
-
-private:
-  void anchor() override;
-
-  void destroy();
-
-  ScalarKind SKind;
-
-  union {
-    int64_t IntValue;
-    uint64_t UIntValue;
-    bool BoolValue;
-    double FloatValue;
-    std::string StringValue;
-  };
-
-public:
-  /// Construct an Int ScalarNode.
-  ScalarNode(int64_t IntValue);
-  /// Construct an Int ScalarNode.
-  ScalarNode(int32_t IntValue);
-  /// Construct an UInt ScalarNode.
-  ScalarNode(uint64_t UIntValue);
-  /// Construct an UInt ScalarNode.
-  ScalarNode(uint32_t UIntValue);
-  /// Construct a Nil ScalarNode.
-  ScalarNode();
-  /// Construct a Boolean ScalarNode.
-  ScalarNode(bool BoolValue);
-  /// Construct a Float ScalarNode.
-  ScalarNode(double FloatValue);
-  /// Construct a String ScalarNode.
-  ScalarNode(StringRef StringValue);
-  /// Construct a String ScalarNode.
-  ScalarNode(const char *StringValue);
-  /// Construct a String ScalarNode.
-  ScalarNode(std::string &&StringValue);
-  /// Construct a Binary ScalarNode.
-  ScalarNode(MemoryBufferRef BinaryValue);
-
-  ~ScalarNode();
-
-  ScalarNode &operator=(const ScalarNode &RHS) = delete;
-  /// A ScalarNode can only be move assigned.
-  ScalarNode &operator=(ScalarNode &&RHS);
-
-  /// Change the kind of this ScalarNode, zero initializing it to the new type.
-  void setScalarKind(ScalarKind SKind) {
-    switch (SKind) {
-    case SK_Int:
-      *this = int64_t(0);
-      break;
-    case SK_UInt:
-      *this = uint64_t(0);
-      break;
-    case SK_Boolean:
-      *this = false;
-      break;
-    case SK_Float:
-      *this = 0.0;
-      break;
-    case SK_String:
-      *this = StringRef();
-      break;
-    case SK_Binary:
-      *this = MemoryBufferRef("", "");
-      break;
-    case SK_Nil:
-      *this = ScalarNode();
-      break;
-    }
-  }
-
-  /// Get the current kind of ScalarNode.
-  ScalarKind getScalarKind() { return SKind; }
-
-  /// Get the value of an Int scalar.
-  ///
-  /// \warning Assumes getScalarKind() == SK_Int
-  int64_t getInt() {
-    assert(SKind == SK_Int);
-    return IntValue;
-  }
-
-  /// Get the value of a UInt scalar.
-  ///
-  /// \warning Assumes getScalarKind() == SK_UInt
-  uint64_t getUInt() {
-    assert(SKind == SK_UInt);
-    return UIntValue;
-  }
-
-  /// Get the value of an Boolean scalar.
-  ///
-  /// \warning Assumes getScalarKind() == SK_Boolean
-  bool getBool() {
-    assert(SKind == SK_Boolean);
-    return BoolValue;
-  }
-
-  /// Get the value of an Float scalar.
-  ///
-  /// \warning Assumes getScalarKind() == SK_Float
-  double getFloat() {
-    assert(SKind == SK_Float);
-    return FloatValue;
-  }
-
-  /// Get the value of a String scalar.
-  ///
-  /// \warning Assumes getScalarKind() == SK_String
-  StringRef getString() {
-    assert(SKind == SK_String);
-    return StringValue;
-  }
-
-  /// Get the value of a Binary scalar.
-  ///
-  /// \warning Assumes getScalarKind() == SK_Binary
-  StringRef getBinary() {
-    assert(SKind == SK_Binary);
-    return StringValue;
-  }
-
-  static bool classof(const Node *N) { return N->getKind() == NK_Scalar; }
-
-  void write(Writer &MPWriter) override;
-
-  /// Parse a YAML scalar of the current ScalarKind from \p ScalarStr.
-  ///
-  /// \returns An empty string on success, otherwise an error message.
-  StringRef inputYAML(StringRef ScalarStr);
-
-  /// Output a YAML scalar of the current ScalarKind into \p OS.
-  void outputYAML(raw_ostream &OS) const;
-
-  /// Determine which YAML quoting type the current value would need when
-  /// output.
-  yaml::QuotingType mustQuoteYAML(StringRef ScalarStr) const;
-
-  /// Get the YAML tag for the current ScalarKind.
-  StringRef getYAMLTag() const;
-
-  /// Flag which affects how the type handles YAML tags when reading and
-  /// writing.
-  ///
-  /// When false, tags are used when reading and writing. When reading, the tag
-  /// is used to decide the ScalarKind before parsing. When writing, the tag is
-  /// output along with the value.
-  ///
-  /// When true, tags are ignored when reading and writing. When reading, the
-  /// ScalarKind is always assumed to be String. When writing, the tag is not
-  /// output.
-  bool IgnoreTag = false;
-
-  static const char *IntTag;
-  static const char *NilTag;
-  static const char *BooleanTag;
-  static const char *FloatTag;
-  static const char *StringTag;
-  static const char *BinaryTag;
-};
-
-class ArrayNode : public Node, public std::vector<NodePtr> {
-  void anchor() override;
-
-public:
-  ArrayNode() : Node(NK_Array) {}
-  static bool classof(const Node *N) { return N->getKind() == NK_Array; }
-
-  void write(Writer &MPWriter) override {
-    MPWriter.writeArraySize(this->size());
-    for (auto &N : *this)
-      N->write(MPWriter);
-  }
-};
-
-class MapNode : public Node, public StringMap<NodePtr> {
-  void anchor() override;
-
-public:
-  MapNode() : Node(NK_Map) {}
-  static bool classof(const Node *N) { return N->getKind() == NK_Map; }
-
-  void write(Writer &MPWriter) override {
-    MPWriter.writeMapSize(this->size());
-    for (auto &N : *this) {
-      MPWriter.write(N.first());
-      N.second->write(MPWriter);
-    }
-  }
-};
-
-} // end namespace msgpack
-
-namespace yaml {
-
-template <> struct PolymorphicTraits<msgpack::NodePtr> {
-  static NodeKind getKind(const msgpack::NodePtr &N) {
-    if (isa<msgpack::ScalarNode>(*N))
-      return NodeKind::Scalar;
-    if (isa<msgpack::MapNode>(*N))
-      return NodeKind::Map;
-    if (isa<msgpack::ArrayNode>(*N))
-      return NodeKind::Sequence;
-    llvm_unreachable("NodeKind not supported");
-  }
-  static msgpack::ScalarNode &getAsScalar(msgpack::NodePtr &N) {
-    if (!N || !isa<msgpack::ScalarNode>(*N))
-      N.reset(new msgpack::ScalarNode());
-    return *cast<msgpack::ScalarNode>(N.get());
-  }
-  static msgpack::MapNode &getAsMap(msgpack::NodePtr &N) {
-    if (!N || !isa<msgpack::MapNode>(*N))
-      N.reset(new msgpack::MapNode());
-    return *cast<msgpack::MapNode>(N.get());
-  }
-  static msgpack::ArrayNode &getAsSequence(msgpack::NodePtr &N) {
-    if (!N || !isa<msgpack::ArrayNode>(*N))
-      N.reset(new msgpack::ArrayNode());
-    return *cast<msgpack::ArrayNode>(N.get());
-  }
-};
-
-template <> struct TaggedScalarTraits<msgpack::ScalarNode> {
-  static void output(const msgpack::ScalarNode &S, void *Ctxt,
-                     raw_ostream &ScalarOS, raw_ostream &TagOS) {
-    if (!S.IgnoreTag)
-      TagOS << S.getYAMLTag();
-    S.outputYAML(ScalarOS);
-  }
-
-  static StringRef input(StringRef ScalarStr, StringRef Tag, void *Ctxt,
-                         msgpack::ScalarNode &S) {
-    if (Tag == msgpack::ScalarNode::IntTag) {
-      S.setScalarKind(msgpack::ScalarNode::SK_UInt);
-      if (S.inputYAML(ScalarStr) == StringRef())
-        return StringRef();
-      S.setScalarKind(msgpack::ScalarNode::SK_Int);
-      return S.inputYAML(ScalarStr);
-    }
-
-    if (S.IgnoreTag || Tag == msgpack::ScalarNode::StringTag ||
-        Tag == "tag:yaml.org,2002:str")
-      S.setScalarKind(msgpack::ScalarNode::SK_String);
-    else if (Tag == msgpack::ScalarNode::NilTag)
-      S.setScalarKind(msgpack::ScalarNode::SK_Nil);
-    else if (Tag == msgpack::ScalarNode::BooleanTag)
-      S.setScalarKind(msgpack::ScalarNode::SK_Boolean);
-    else if (Tag == msgpack::ScalarNode::FloatTag)
-      S.setScalarKind(msgpack::ScalarNode::SK_Float);
-    else if (Tag == msgpack::ScalarNode::StringTag)
-      S.setScalarKind(msgpack::ScalarNode::SK_String);
-    else if (Tag == msgpack::ScalarNode::BinaryTag)
-      S.setScalarKind(msgpack::ScalarNode::SK_Binary);
-    else
-      return "Unsupported messagepack tag";
-
-    return S.inputYAML(ScalarStr);
-  }
-
-  static QuotingType mustQuote(const msgpack::ScalarNode &S, StringRef Str) {
-    return S.mustQuoteYAML(Str);
-  }
-};
-
-template <> struct CustomMappingTraits<msgpack::MapNode> {
-  static void inputOne(IO &IO, StringRef Key, msgpack::MapNode &M) {
-    IO.mapRequired(Key.str().c_str(), M[Key]);
-  }
-  static void output(IO &IO, msgpack::MapNode &M) {
-    for (auto &N : M)
-      IO.mapRequired(N.getKey().str().c_str(), N.getValue());
-  }
-};
-
-template <> struct SequenceTraits<msgpack::ArrayNode> {
-  static size_t size(IO &IO, msgpack::ArrayNode &A) { return A.size(); }
-  static msgpack::NodePtr &element(IO &IO, msgpack::ArrayNode &A,
-                                   size_t Index) {
-    if (Index >= A.size())
-      A.resize(Index + 1);
-    return A[Index];
-  }
-};
-
-} // end namespace yaml
-} // end namespace llvm
-
-#endif //  LLVM_BINARYFORMAT_MSGPACKTYPES_H
diff --git a/include/llvm/BinaryFormat/MsgPackWriter.h b/include/llvm/BinaryFormat/MsgPackWriter.h
index 98af422c9f19..3b610b774f77 100644
--- a/include/llvm/BinaryFormat/MsgPackWriter.h
+++ b/include/llvm/BinaryFormat/MsgPackWriter.h
@@ -1,9 +1,8 @@
 //===- MsgPackWriter.h - Simple MsgPack writer ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/include/llvm/BinaryFormat/Wasm.h b/include/llvm/BinaryFormat/Wasm.h
index d9f0f94b298d..0f22bfe610c6 100644
--- a/include/llvm/BinaryFormat/Wasm.h
+++ b/include/llvm/BinaryFormat/Wasm.h
@@ -1,9 +1,8 @@
 //===- Wasm.h - Wasm object file format -------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -43,6 +42,17 @@ struct WasmDylinkInfo {
   std::vector<StringRef> Needed; // Shared library depenedencies
 };
 
+struct WasmProducerInfo {
+  std::vector<std::pair<std::string, std::string>> Languages;
+  std::vector<std::pair<std::string, std::string>> Tools;
+  std::vector<std::pair<std::string, std::string>> SDKs;
+};
+
+struct WasmFeatureEntry {
+  uint8_t Prefix;
+  std::string Name;
+};
+
 struct WasmExport {
   StringRef Name;
   uint8_t Kind;
@@ -126,12 +136,13 @@ struct WasmFunction {
 };
 
 struct WasmDataSegment {
-  uint32_t MemoryIndex;
-  WasmInitExpr Offset;
+  uint32_t InitFlags;
+  uint32_t MemoryIndex; // present if InitFlags & WASM_SEGMENT_HAS_MEMINDEX
+  WasmInitExpr Offset; // present if InitFlags & WASM_SEGMENT_IS_PASSIVE == 0
   ArrayRef<uint8_t> Content;
   StringRef Name; // from the "segment info" section
   uint32_t Alignment;
-  uint32_t Flags;
+  uint32_t LinkerFlags;
   uint32_t Comdat; // from the "comdat info" section
 };
 
@@ -165,7 +176,8 @@ struct WasmSymbolInfo {
   StringRef Name;
   uint8_t Kind;
   uint32_t Flags;
-  StringRef Module; // For undefined symbols the module name of the import
+  StringRef ImportModule; // For undefined symbols the module of the import
+  StringRef ImportName;   // For undefined symbols the name of the import
   union {
     // For function or global symbols, the index in function or global index
     // space.
@@ -212,7 +224,7 @@ enum : unsigned {
   WASM_TYPE_F64 = 0x7C,
   WASM_TYPE_V128 = 0x7B,
   WASM_TYPE_FUNCREF = 0x70,
-  WASM_TYPE_EXCEPT_REF = 0x68,
+  WASM_TYPE_EXNREF = 0x68,
   WASM_TYPE_FUNC = 0x60,
   WASM_TYPE_NORESULT = 0x40, // for blocks with no result values
 };
@@ -229,11 +241,19 @@ enum : unsigned {
 // Opcodes used in initializer expressions.
 enum : unsigned {
   WASM_OPCODE_END = 0x0b,
+  WASM_OPCODE_CALL = 0x10,
+  WASM_OPCODE_LOCAL_GET = 0x20,
   WASM_OPCODE_GLOBAL_GET = 0x23,
+  WASM_OPCODE_GLOBAL_SET = 0x24,
+  WASM_OPCODE_I32_STORE = 0x36,
   WASM_OPCODE_I32_CONST = 0x41,
   WASM_OPCODE_I64_CONST = 0x42,
   WASM_OPCODE_F32_CONST = 0x43,
   WASM_OPCODE_F64_CONST = 0x44,
+  WASM_OPCODE_I32_ADD = 0x6a,
+  WASM_OPCODE_MISC_PREFIX = 0xfc,
+  WASM_OPCODE_MEMORY_INIT = 0x08,
+  WASM_OPCODE_DATA_DROP = 0x09,
 };
 
 enum : unsigned {
@@ -241,6 +261,18 @@ enum : unsigned {
   WASM_LIMITS_FLAG_IS_SHARED = 0x2,
 };
 
+enum : unsigned {
+  WASM_SEGMENT_IS_PASSIVE = 0x01,
+  WASM_SEGMENT_HAS_MEMINDEX = 0x02,
+};
+
+// Feature policy prefixes used in the custom "target_features" section
+enum : uint8_t {
+  WASM_FEATURE_PREFIX_USED = '+',
+  WASM_FEATURE_PREFIX_REQUIRED = '=',
+  WASM_FEATURE_PREFIX_DISALLOWED = '-',
+};
+
 // Kind codes used in the custom "name" section
 enum : unsigned {
   WASM_NAMES_FUNCTION = 0x1,
@@ -284,6 +316,8 @@ const unsigned WASM_SYMBOL_BINDING_LOCAL = 0x2;
 const unsigned WASM_SYMBOL_VISIBILITY_DEFAULT = 0x0;
 const unsigned WASM_SYMBOL_VISIBILITY_HIDDEN = 0x4;
 const unsigned WASM_SYMBOL_UNDEFINED = 0x10;
+const unsigned WASM_SYMBOL_EXPORTED = 0x20;
+const unsigned WASM_SYMBOL_EXPLICIT_NAME = 0x40;
 
 #define WASM_RELOC(name, value) name = value,
 
@@ -300,17 +334,17 @@ enum class ValType {
   F32 = WASM_TYPE_F32,
   F64 = WASM_TYPE_F64,
   V128 = WASM_TYPE_V128,
-  EXCEPT_REF = WASM_TYPE_EXCEPT_REF,
+  EXNREF = WASM_TYPE_EXNREF,
 };
 
 struct WasmSignature {
-  SmallVector<wasm::ValType, 1> Returns;
-  SmallVector<wasm::ValType, 4> Params;
+  SmallVector<ValType, 1> Returns;
+  SmallVector<ValType, 4> Params;
   // Support empty and tombstone instances, needed by DenseMap.
   enum { Plain, Empty, Tombstone } State = Plain;
 
-  WasmSignature(SmallVector<wasm::ValType, 1> &&InReturns,
-                SmallVector<wasm::ValType, 4> &&InParams)
+  WasmSignature(SmallVector<ValType, 1> &&InReturns,
+                SmallVector<ValType, 4> &&InParams)
       : Returns(InReturns), Params(InParams) {}
   WasmSignature() = default;
 };
@@ -333,8 +367,9 @@ inline bool operator!=(const WasmGlobalType &LHS, const WasmGlobalType &RHS) {
   return !(LHS == RHS);
 }
 
-std::string toString(wasm::WasmSymbolType type);
+std::string toString(WasmSymbolType type);
 std::string relocTypetoString(uint32_t type);
+bool relocTypeHasAddend(uint32_t type);
 
 } // end namespace wasm
 } // end namespace llvm
diff --git a/include/llvm/BinaryFormat/WasmRelocs.def b/include/llvm/BinaryFormat/WasmRelocs.def
index b3a08e70c1d5..00dacf72abb0 100644
--- a/include/llvm/BinaryFormat/WasmRelocs.def
+++ b/include/llvm/BinaryFormat/WasmRelocs.def
@@ -2,14 +2,16 @@
 #error "WASM_RELOC must be defined"
 #endif
 
-WASM_RELOC(R_WEBASSEMBLY_FUNCTION_INDEX_LEB,   0)
-WASM_RELOC(R_WEBASSEMBLY_TABLE_INDEX_SLEB,     1)
-WASM_RELOC(R_WEBASSEMBLY_TABLE_INDEX_I32,      2)
-WASM_RELOC(R_WEBASSEMBLY_MEMORY_ADDR_LEB,      3)
-WASM_RELOC(R_WEBASSEMBLY_MEMORY_ADDR_SLEB,     4)
-WASM_RELOC(R_WEBASSEMBLY_MEMORY_ADDR_I32,      5)
-WASM_RELOC(R_WEBASSEMBLY_TYPE_INDEX_LEB,       6)
-WASM_RELOC(R_WEBASSEMBLY_GLOBAL_INDEX_LEB,     7)
-WASM_RELOC(R_WEBASSEMBLY_FUNCTION_OFFSET_I32,  8)
-WASM_RELOC(R_WEBASSEMBLY_SECTION_OFFSET_I32,   9)
-WASM_RELOC(R_WEBASSEMBLY_EVENT_INDEX_LEB,     10)
+WASM_RELOC(R_WASM_FUNCTION_INDEX_LEB,    0)
+WASM_RELOC(R_WASM_TABLE_INDEX_SLEB,      1)
+WASM_RELOC(R_WASM_TABLE_INDEX_I32,       2)
+WASM_RELOC(R_WASM_MEMORY_ADDR_LEB,       3)
+WASM_RELOC(R_WASM_MEMORY_ADDR_SLEB,      4)
+WASM_RELOC(R_WASM_MEMORY_ADDR_I32,       5)
+WASM_RELOC(R_WASM_TYPE_INDEX_LEB,        6)
+WASM_RELOC(R_WASM_GLOBAL_INDEX_LEB,      7)
+WASM_RELOC(R_WASM_FUNCTION_OFFSET_I32,   8)
+WASM_RELOC(R_WASM_SECTION_OFFSET_I32,    9)
+WASM_RELOC(R_WASM_EVENT_INDEX_LEB,      10)
+WASM_RELOC(R_WASM_MEMORY_ADDR_REL_SLEB, 11)
+WASM_RELOC(R_WASM_TABLE_INDEX_REL_SLEB, 12)
diff --git a/include/llvm/BinaryFormat/XCOFF.h b/include/llvm/BinaryFormat/XCOFF.h
new file mode 100644
index 000000000000..7774ab3ed24a
--- /dev/null
+++ b/include/llvm/BinaryFormat/XCOFF.h
@@ -0,0 +1,145 @@
+//===-- llvm/BinaryFormat/XCOFF.h - The XCOFF file format -------*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines manifest constants for the XCOFF object file format.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BINARYFORMAT_XCOFF_H
+#define LLVM_BINARYFORMAT_XCOFF_H
+
+#include <cstdint>
+
+namespace llvm {
+namespace XCOFF {
+
+// Constants used in the XCOFF definition.
+enum { SectionNameSize = 8, SymbolNameSize = 8 };
+enum ReservedSectionNum { N_DEBUG = -2, N_ABS = -1, N_UNDEF = 0 };
+
+// x_smclas field of x_csect from system header: /usr/include/syms.h
+/// Storage Mapping Class definitions.
+enum StorageMappingClass {
+  //     READ ONLY CLASSES
+  XMC_PR = 0,      ///< Program Code
+  XMC_RO = 1,      ///< Read Only Constant
+  XMC_DB = 2,      ///< Debug Dictionary Table
+  XMC_GL = 6,      ///< Global Linkage (Interfile Interface Code)
+  XMC_XO = 7,      ///< Extended Operation (Pseudo Machine Instruction)
+  XMC_SV = 8,      ///< Supervisor Call (32-bit process only)
+  XMC_SV64 = 17,   ///< Supervisor Call for 64-bit process
+  XMC_SV3264 = 18, ///< Supervisor Call for both 32- and 64-bit processes
+  XMC_TI = 12,     ///< Traceback Index csect
+  XMC_TB = 13,     ///< Traceback Table csect
+
+  //       READ WRITE CLASSES
+  XMC_RW = 5,   ///< Read Write Data
+  XMC_TC0 = 15, ///< TOC Anchor for TOC Addressability
+  XMC_TC = 3,   ///< General TOC item
+  XMC_TD = 16,  ///< Scalar data item in the TOC
+  XMC_DS = 10,  ///< Descriptor csect
+  XMC_UA = 4,   ///< Unclassified - Treated as Read Write
+  XMC_BS = 9,   ///< BSS class (uninitialized static internal)
+  XMC_UC = 11,  ///< Un-named Fortran Common
+
+  XMC_TL = 20, ///< Initialized thread-local variable
+  XMC_UL = 21, ///< Uninitialized thread-local variable
+  XMC_TE = 22  ///< Symbol mapped at the end of TOC
+};
+
+// Flags for defining the section type. Used for the s_flags field of
+// the section header structure. Defined in the system header `scnhdr.h`.
+enum SectionTypeFlags {
+  STYP_PAD = 0x0008,
+  STYP_DWARF = 0x0010,
+  STYP_TEXT = 0x0020,
+  STYP_DATA = 0x0040,
+  STYP_BSS = 0x0080,
+  STYP_EXCEPT = 0x0100,
+  STYP_INFO = 0x0200,
+  STYP_TDATA = 0x0400,
+  STYP_TBSS = 0x0800,
+  STYP_LOADER = 0x1000,
+  STYP_DEBUG = 0x2000,
+  STYP_TYPCHK = 0x4000,
+  STYP_OVRFLO = 0x8000
+};
+
+// STORAGE CLASSES, n_sclass field of syment.
+// The values come from `storclass.h` and `dbxstclass.h`.
+enum StorageClass : uint8_t {
+  // Storage classes used for symbolic debugging symbols.
+  C_FILE = 103,  // File name
+  C_BINCL = 108, // Beginning of include file
+  C_EINCL = 109, // Ending of include file
+  C_GSYM = 128,  // Global variable
+  C_STSYM = 133, // Statically allocated symbol
+  C_BCOMM = 135, // Beginning of common block
+  C_ECOMM = 137, // End of common block
+  C_ENTRY = 141, // Alternate entry
+  C_BSTAT = 143, // Beginning of static block
+  C_ESTAT = 144, // End of static block
+  C_GTLS = 145,  // Global thread-local variable
+  C_STTLS = 146, // Static thread-local variable
+
+  // Storage classes used for DWARF symbols.
+  C_DWARF = 112, // DWARF section symbol
+
+  // Storage classes used for absolute symbols.
+  C_LSYM = 129,  // Automatic variable allocated on stack
+  C_PSYM = 130,  // Argument to subroutine allocated on stack
+  C_RSYM = 131,  // Register variable
+  C_RPSYM = 132, // Argument to function or procedure stored in register
+  C_ECOML = 136, // Local member of common block
+  C_FUN = 142,   // Function or procedure
+
+  // Storage classes used for undefined external symbols or
+  // symbols of general sections.
+  C_EXT = 2,       // External symbol
+  C_WEAKEXT = 111, // Weak external symbol
+
+  // Storage classes used for symbols of general sections.
+  C_NULL = 0,
+  C_STAT = 3,     // Static
+  C_BLOCK = 100,  // ".bb" or ".eb"
+  C_FCN = 101,    // ".bf" or ".ef"
+  C_HIDEXT = 107, // Un-named external symbol
+  C_INFO = 110,   // Comment string in .info section
+  C_DECL = 140,   // Declaration of object (type)
+
+  // Storage classes - Obsolete/Undocumented.
+  C_AUTO = 1,     // Automatic variable
+  C_REG = 4,      // Register variable
+  C_EXTDEF = 5,   // External definition
+  C_LABEL = 6,    // Label
+  C_ULABEL = 7,   // Undefined label
+  C_MOS = 8,      // Member of structure
+  C_ARG = 9,      // Function argument
+  C_STRTAG = 10,  // Structure tag
+  C_MOU = 11,     // Member of union
+  C_UNTAG = 12,   // Union tag
+  C_TPDEF = 13,   // Type definition
+  C_USTATIC = 14, // Undefined static
+  C_ENTAG = 15,   // Enumeration tag
+  C_MOE = 16,     // Member of enumeration
+  C_REGPARM = 17, // Register parameter
+  C_FIELD = 18,   // Bit field
+  C_EOS = 102,    // End of structure
+  C_LINE = 104,
+  C_ALIAS = 105,  // Duplicate tag
+  C_HIDDEN = 106, // Special storage class for external
+  C_EFCN = 255,   // Physical end of function
+
+  // Storage classes - reserved
+  C_TCSYM = 134 // Reserved
+};
+
+} // end namespace XCOFF
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Bitcode/BitCodes.h b/include/llvm/Bitcode/BitCodes.h
deleted file mode 100644
index bf21e146e771..000000000000
--- a/include/llvm/Bitcode/BitCodes.h
+++ /dev/null
@@ -1,185 +0,0 @@
-//===- BitCodes.h - Enum values for the bitcode format ----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This header Bitcode enum values.
-//
-// The enum values defined in this file should be considered permanent.  If
-// new features are added, they should have values added at the end of the
-// respective lists.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_BITCODE_BITCODES_H
-#define LLVM_BITCODE_BITCODES_H
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <cassert>
-
-namespace llvm {
-/// Offsets of the 32-bit fields of bitcode wrapper header.
-static const unsigned BWH_MagicField = 0 * 4;
-static const unsigned BWH_VersionField = 1 * 4;
-static const unsigned BWH_OffsetField = 2 * 4;
-static const unsigned BWH_SizeField = 3 * 4;
-static const unsigned BWH_CPUTypeField = 4 * 4;
-static const unsigned BWH_HeaderSize = 5 * 4;
-
-namespace bitc {
-  enum StandardWidths {
-    BlockIDWidth   = 8,  // We use VBR-8 for block IDs.
-    CodeLenWidth   = 4,  // Codelen are VBR-4.
-    BlockSizeWidth = 32  // BlockSize up to 2^32 32-bit words = 16GB per block.
-  };
-
-  // The standard abbrev namespace always has a way to exit a block, enter a
-  // nested block, define abbrevs, and define an unabbreviated record.
-  enum FixedAbbrevIDs {
-    END_BLOCK = 0,  // Must be zero to guarantee termination for broken bitcode.
-    ENTER_SUBBLOCK = 1,
-
-    /// DEFINE_ABBREV - Defines an abbrev for the current block.  It consists
-    /// of a vbr5 for # operand infos.  Each operand info is emitted with a
-    /// single bit to indicate if it is a literal encoding.  If so, the value is
-    /// emitted with a vbr8.  If not, the encoding is emitted as 3 bits followed
-    /// by the info value as a vbr5 if needed.
-    DEFINE_ABBREV = 2,
-
-    // UNABBREV_RECORDs are emitted with a vbr6 for the record code, followed by
-    // a vbr6 for the # operands, followed by vbr6's for each operand.
-    UNABBREV_RECORD = 3,
-
-    // This is not a code, this is a marker for the first abbrev assignment.
-    FIRST_APPLICATION_ABBREV = 4
-  };
-
-  /// StandardBlockIDs - All bitcode files can optionally include a BLOCKINFO
-  /// block, which contains metadata about other blocks in the file.
-  enum StandardBlockIDs {
-    /// BLOCKINFO_BLOCK is used to define metadata about blocks, for example,
-    /// standard abbrevs that should be available to all blocks of a specified
-    /// ID.
-    BLOCKINFO_BLOCK_ID = 0,
-
-    // Block IDs 1-7 are reserved for future expansion.
-    FIRST_APPLICATION_BLOCKID = 8
-  };
-
-  /// BlockInfoCodes - The blockinfo block contains metadata about user-defined
-  /// blocks.
-  enum BlockInfoCodes {
-    // DEFINE_ABBREV has magic semantics here, applying to the current SETBID'd
-    // block, instead of the BlockInfo block.
-
-    BLOCKINFO_CODE_SETBID        = 1, // SETBID: [blockid#]
-    BLOCKINFO_CODE_BLOCKNAME     = 2, // BLOCKNAME: [name]
-    BLOCKINFO_CODE_SETRECORDNAME = 3  // BLOCKINFO_CODE_SETRECORDNAME:
-                                      //                             [id, name]
-  };
-
-} // End bitc namespace
-
-/// BitCodeAbbrevOp - This describes one or more operands in an abbreviation.
-/// This is actually a union of two different things:
-///   1. It could be a literal integer value ("the operand is always 17").
-///   2. It could be an encoding specification ("this operand encoded like so").
-///
-class BitCodeAbbrevOp {
-  uint64_t Val;           // A literal value or data for an encoding.
-  bool IsLiteral : 1;     // Indicate whether this is a literal value or not.
-  unsigned Enc   : 3;     // The encoding to use.
-public:
-  enum Encoding {
-    Fixed = 1,  // A fixed width field, Val specifies number of bits.
-    VBR   = 2,  // A VBR field where Val specifies the width of each chunk.
-    Array = 3,  // A sequence of fields, next field species elt encoding.
-    Char6 = 4,  // A 6-bit fixed field which maps to [a-zA-Z0-9._].
-    Blob  = 5   // 32-bit aligned array of 8-bit characters.
-  };
-
-  explicit BitCodeAbbrevOp(uint64_t V) :  Val(V), IsLiteral(true) {}
-  explicit BitCodeAbbrevOp(Encoding E, uint64_t Data = 0)
-    : Val(Data), IsLiteral(false), Enc(E) {}
-
-  bool isLiteral() const  { return IsLiteral; }
-  bool isEncoding() const { return !IsLiteral; }
-
-  // Accessors for literals.
-  uint64_t getLiteralValue() const { assert(isLiteral()); return Val; }
-
-  // Accessors for encoding info.
-  Encoding getEncoding() const { assert(isEncoding()); return (Encoding)Enc; }
-  uint64_t getEncodingData() const {
-    assert(isEncoding() && hasEncodingData());
-    return Val;
-  }
-
-  bool hasEncodingData() const { return hasEncodingData(getEncoding()); }
-  static bool hasEncodingData(Encoding E) {
-    switch (E) {
-    case Fixed:
-    case VBR:
-      return true;
-    case Array:
-    case Char6:
-    case Blob:
-      return false;
-    }
-    report_fatal_error("Invalid encoding");
-  }
-
-  /// isChar6 - Return true if this character is legal in the Char6 encoding.
-  static bool isChar6(char C) {
-    if (C >= 'a' && C <= 'z') return true;
-    if (C >= 'A' && C <= 'Z') return true;
-    if (C >= '0' && C <= '9') return true;
-    if (C == '.' || C == '_') return true;
-    return false;
-  }
-  static unsigned EncodeChar6(char C) {
-    if (C >= 'a' && C <= 'z') return C-'a';
-    if (C >= 'A' && C <= 'Z') return C-'A'+26;
-    if (C >= '0' && C <= '9') return C-'0'+26+26;
-    if (C == '.')             return 62;
-    if (C == '_')             return 63;
-    llvm_unreachable("Not a value Char6 character!");
-  }
-
-  static char DecodeChar6(unsigned V) {
-    assert((V & ~63) == 0 && "Not a Char6 encoded character!");
-    return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._"
-        [V];
-  }
-
-};
-
-template <> struct isPodLike<BitCodeAbbrevOp> { static const bool value=true; };
-
-/// BitCodeAbbrev - This class represents an abbreviation record.  An
-/// abbreviation allows a complex record that has redundancy to be stored in a
-/// specialized format instead of the fully-general, fully-vbr, format.
-class BitCodeAbbrev {
-  SmallVector<BitCodeAbbrevOp, 32> OperandList;
-
-public:
-  unsigned getNumOperandInfos() const {
-    return static_cast<unsigned>(OperandList.size());
-  }
-  const BitCodeAbbrevOp &getOperandInfo(unsigned N) const {
-    return OperandList[N];
-  }
-
-  void Add(const BitCodeAbbrevOp &OpInfo) {
-    OperandList.push_back(OpInfo);
-  }
-};
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Bitcode/BitcodeAnalyzer.h b/include/llvm/Bitcode/BitcodeAnalyzer.h
new file mode 100644
index 000000000000..cfdebd6fe6cb
--- /dev/null
+++ b/include/llvm/Bitcode/BitcodeAnalyzer.h
@@ -0,0 +1,103 @@
+//===- llvm/Bitcode/BitcodeAnalyzer.h - Bitcode analyzer --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines interfaces to analyze LLVM bitcode files/streams.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_BITCODE_ANALYZER_H
+#define LLVM_BITCODE_BITCODE_ANALYZER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Bitstream/BitstreamReader.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+#include <vector>
+
+namespace llvm {
+
+/// CurStreamTypeType - A type for CurStreamType
+enum CurStreamTypeType {
+  UnknownBitstream,
+  LLVMIRBitstream,
+  ClangSerializedASTBitstream,
+  ClangSerializedDiagnosticsBitstream,
+};
+
+struct BCDumpOptions {
+  /// The stream.
+  raw_ostream &OS;
+  /// Print per-code histogram.
+  bool Histogram = false;
+  /// Don't emit numeric info in dump if symbolic info is available.
+  bool Symbolic = false;
+  /// Print binary blobs using hex escapes.
+  bool ShowBinaryBlobs = false;
+
+  BCDumpOptions(raw_ostream &OS) : OS(OS) {}
+};
+
+class BitcodeAnalyzer {
+  BitstreamCursor Stream;
+  BitstreamBlockInfo BlockInfo;
+  CurStreamTypeType CurStreamType;
+  Optional<BitstreamCursor> BlockInfoStream;
+  unsigned NumTopBlocks = 0;
+
+  struct PerRecordStats {
+    unsigned NumInstances;
+    unsigned NumAbbrev;
+    uint64_t TotalBits;
+    PerRecordStats() : NumInstances(0), NumAbbrev(0), TotalBits(0) {}
+  };
+
+  struct PerBlockIDStats {
+    /// NumInstances - This the number of times this block ID has been seen.
+    unsigned NumInstances;
+    /// NumBits - The total size in bits of all of these blocks.
+    uint64_t NumBits;
+    /// NumSubBlocks - The total number of blocks these blocks contain.
+    unsigned NumSubBlocks;
+    /// NumAbbrevs - The total number of abbreviations.
+    unsigned NumAbbrevs;
+    /// NumRecords - The total number of records these blocks contain, and the
+    /// number that are abbreviated.
+    unsigned NumRecords, NumAbbreviatedRecords;
+    /// CodeFreq - Keep track of the number of times we see each code.
+    std::vector<PerRecordStats> CodeFreq;
+    PerBlockIDStats()
+        : NumInstances(0), NumBits(0), NumSubBlocks(0), NumAbbrevs(0),
+          NumRecords(0), NumAbbreviatedRecords(0) {}
+  };
+
+  std::map<unsigned, PerBlockIDStats> BlockIDStats;
+
+public:
+  BitcodeAnalyzer(StringRef Buffer, Optional<StringRef> BlockInfoBuffer = None);
+  /// Analyze the bitcode file.
+  Error analyze(Optional<BCDumpOptions> O = None,
+                Optional<StringRef> CheckHash = None);
+  /// Print stats about the bitcode file.
+  void printStats(BCDumpOptions O, Optional<StringRef> Filename = None);
+
+private:
+  /// Read a block, updating statistics, etc.
+  Error parseBlock(unsigned BlockID, unsigned IndentLevel,
+                   Optional<BCDumpOptions> O = None,
+                   Optional<StringRef> CheckHash = None);
+
+  Error decodeMetadataStringsBlob(StringRef Indent, ArrayRef<uint64_t> Record,
+                                  StringRef Blob, raw_ostream &OS);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_BITCODE_BITCODE_ANALYZER_H
diff --git a/include/llvm/Bitcode/BitcodeReader.h b/include/llvm/Bitcode/BitcodeReader.h
index 0d7cc141f2ce..ba61da733bea 100644
--- a/include/llvm/Bitcode/BitcodeReader.h
+++ b/include/llvm/Bitcode/BitcodeReader.h
@@ -1,9 +1,8 @@
 //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,7 +15,7 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Bitcode/BitCodes.h"
+#include "llvm/Bitstream/BitCodes.h"
 #include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
diff --git a/include/llvm/Bitcode/BitcodeWriter.h b/include/llvm/Bitcode/BitcodeWriter.h
index 0010cf6c0544..39061e09cda5 100644
--- a/include/llvm/Bitcode/BitcodeWriter.h
+++ b/include/llvm/Bitcode/BitcodeWriter.h
@@ -1,9 +1,8 @@
 //===- llvm/Bitcode/BitcodeWriter.h - Bitcode writers -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Bitcode/BitcodeWriterPass.h b/include/llvm/Bitcode/BitcodeWriterPass.h
index 05044c9ae11c..1773d1b9f11b 100644
--- a/include/llvm/Bitcode/BitcodeWriterPass.h
+++ b/include/llvm/Bitcode/BitcodeWriterPass.h
@@ -1,9 +1,8 @@
 //===-- BitcodeWriterPass.h - Bitcode writing pass --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h
deleted file mode 100644
index 72e7619d9e1c..000000000000
--- a/include/llvm/Bitcode/BitstreamReader.h
+++ /dev/null
@@ -1,506 +0,0 @@
-//===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This header defines the BitstreamReader class.  This class can be used to
-// read an arbitrary bitstream, regardless of its contents.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_BITCODE_BITSTREAMREADER_H
-#define LLVM_BITCODE_BITSTREAMREADER_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Bitcode/BitCodes.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include <algorithm>
-#include <cassert>
-#include <climits>
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-namespace llvm {
-
-/// This class maintains the abbreviations read from a block info block.
-class BitstreamBlockInfo {
-public:
-  /// This contains information emitted to BLOCKINFO_BLOCK blocks. These
-  /// describe abbreviations that all blocks of the specified ID inherit.
-  struct BlockInfo {
-    unsigned BlockID;
-    std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs;
-    std::string Name;
-    std::vector<std::pair<unsigned, std::string>> RecordNames;
-  };
-
-private:
-  std::vector<BlockInfo> BlockInfoRecords;
-
-public:
-  /// If there is block info for the specified ID, return it, otherwise return
-  /// null.
-  const BlockInfo *getBlockInfo(unsigned BlockID) const {
-    // Common case, the most recent entry matches BlockID.
-    if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
-      return &BlockInfoRecords.back();
-
-    for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
-         i != e; ++i)
-      if (BlockInfoRecords[i].BlockID == BlockID)
-        return &BlockInfoRecords[i];
-    return nullptr;
-  }
-
-  BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
-    if (const BlockInfo *BI = getBlockInfo(BlockID))
-      return *const_cast<BlockInfo*>(BI);
-
-    // Otherwise, add a new record.
-    BlockInfoRecords.emplace_back();
-    BlockInfoRecords.back().BlockID = BlockID;
-    return BlockInfoRecords.back();
-  }
-};
-
-/// This represents a position within a bitstream. There may be multiple
-/// independent cursors reading within one bitstream, each maintaining their
-/// own local state.
-class SimpleBitstreamCursor {
-  ArrayRef<uint8_t> BitcodeBytes;
-  size_t NextChar = 0;
-
-public:
-  /// This is the current data we have pulled from the stream but have not
-  /// returned to the client. This is specifically and intentionally defined to
-  /// follow the word size of the host machine for efficiency. We use word_t in
-  /// places that are aware of this to make it perfectly explicit what is going
-  /// on.
-  using word_t = size_t;
-
-private:
-  word_t CurWord = 0;
-
-  /// This is the number of bits in CurWord that are valid. This is always from
-  /// [0...bits_of(size_t)-1] inclusive.
-  unsigned BitsInCurWord = 0;
-
-public:
-  static const size_t MaxChunkSize = sizeof(word_t) * 8;
-
-  SimpleBitstreamCursor() = default;
-  explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
-      : BitcodeBytes(BitcodeBytes) {}
-  explicit SimpleBitstreamCursor(StringRef BitcodeBytes)
-      : BitcodeBytes(reinterpret_cast<const uint8_t *>(BitcodeBytes.data()),
-                     BitcodeBytes.size()) {}
-  explicit SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes)
-      : SimpleBitstreamCursor(BitcodeBytes.getBuffer()) {}
-
-  bool canSkipToPos(size_t pos) const {
-    // pos can be skipped to if it is a valid address or one byte past the end.
-    return pos <= BitcodeBytes.size();
-  }
-
-  bool AtEndOfStream() {
-    return BitsInCurWord == 0 && BitcodeBytes.size() <= NextChar;
-  }
-
-  /// Return the bit # of the bit we are reading.
-  uint64_t GetCurrentBitNo() const {
-    return NextChar*CHAR_BIT - BitsInCurWord;
-  }
-
-  // Return the byte # of the current bit.
-  uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; }
-
-  ArrayRef<uint8_t> getBitcodeBytes() const { return BitcodeBytes; }
-
-  /// Reset the stream to the specified bit number.
-  void JumpToBit(uint64_t BitNo) {
-    size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1);
-    unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
-    assert(canSkipToPos(ByteNo) && "Invalid location");
-
-    // Move the cursor to the right word.
-    NextChar = ByteNo;
-    BitsInCurWord = 0;
-
-    // Skip over any bits that are already consumed.
-    if (WordBitNo)
-      Read(WordBitNo);
-  }
-
-  /// Get a pointer into the bitstream at the specified byte offset.
-  const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) {
-    return BitcodeBytes.data() + ByteNo;
-  }
-
-  /// Get a pointer into the bitstream at the specified bit offset.
-  ///
-  /// The bit offset must be on a byte boundary.
-  const uint8_t *getPointerToBit(uint64_t BitNo, uint64_t NumBytes) {
-    assert(!(BitNo % 8) && "Expected bit on byte boundary");
-    return getPointerToByte(BitNo / 8, NumBytes);
-  }
-
-  void fillCurWord() {
-    if (NextChar >= BitcodeBytes.size())
-      report_fatal_error("Unexpected end of file");
-
-    // Read the next word from the stream.
-    const uint8_t *NextCharPtr = BitcodeBytes.data() + NextChar;
-    unsigned BytesRead;
-    if (BitcodeBytes.size() >= NextChar + sizeof(word_t)) {
-      BytesRead = sizeof(word_t);
-      CurWord =
-          support::endian::read<word_t, support::little, support::unaligned>(
-              NextCharPtr);
-    } else {
-      // Short read.
-      BytesRead = BitcodeBytes.size() - NextChar;
-      CurWord = 0;
-      for (unsigned B = 0; B != BytesRead; ++B)
-        CurWord |= uint64_t(NextCharPtr[B]) << (B * 8);
-    }
-    NextChar += BytesRead;
-    BitsInCurWord = BytesRead * 8;
-  }
-
-  word_t Read(unsigned NumBits) {
-    static const unsigned BitsInWord = MaxChunkSize;
-
-    assert(NumBits && NumBits <= BitsInWord &&
-           "Cannot return zero or more than BitsInWord bits!");
-
-    static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f;
-
-    // If the field is fully contained by CurWord, return it quickly.
-    if (BitsInCurWord >= NumBits) {
-      word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits));
-
-      // Use a mask to avoid undefined behavior.
-      CurWord >>= (NumBits & Mask);
-
-      BitsInCurWord -= NumBits;
-      return R;
-    }
-
-    word_t R = BitsInCurWord ? CurWord : 0;
-    unsigned BitsLeft = NumBits - BitsInCurWord;
-
-    fillCurWord();
-
-    // If we run out of data, abort.
-    if (BitsLeft > BitsInCurWord)
-      report_fatal_error("Unexpected end of file");
-
-    word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft));
-
-    // Use a mask to avoid undefined behavior.
-    CurWord >>= (BitsLeft & Mask);
-
-    BitsInCurWord -= BitsLeft;
-
-    R |= R2 << (NumBits - BitsLeft);
-
-    return R;
-  }
-
-  uint32_t ReadVBR(unsigned NumBits) {
-    uint32_t Piece = Read(NumBits);
-    if ((Piece & (1U << (NumBits-1))) == 0)
-      return Piece;
-
-    uint32_t Result = 0;
-    unsigned NextBit = 0;
-    while (true) {
-      Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
-
-      if ((Piece & (1U << (NumBits-1))) == 0)
-        return Result;
-
-      NextBit += NumBits-1;
-      Piece = Read(NumBits);
-    }
-  }
-
-  // Read a VBR that may have a value up to 64-bits in size. The chunk size of
-  // the VBR must still be <= 32 bits though.
-  uint64_t ReadVBR64(unsigned NumBits) {
-    uint32_t Piece = Read(NumBits);
-    if ((Piece & (1U << (NumBits-1))) == 0)
-      return uint64_t(Piece);
-
-    uint64_t Result = 0;
-    unsigned NextBit = 0;
-    while (true) {
-      Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
-
-      if ((Piece & (1U << (NumBits-1))) == 0)
-        return Result;
-
-      NextBit += NumBits-1;
-      Piece = Read(NumBits);
-    }
-  }
-
-  void SkipToFourByteBoundary() {
-    // If word_t is 64-bits and if we've read less than 32 bits, just dump
-    // the bits we have up to the next 32-bit boundary.
-    if (sizeof(word_t) > 4 &&
-        BitsInCurWord >= 32) {
-      CurWord >>= BitsInCurWord-32;
-      BitsInCurWord = 32;
-      return;
-    }
-
-    BitsInCurWord = 0;
-  }
-
-  /// Skip to the end of the file.
-  void skipToEnd() { NextChar = BitcodeBytes.size(); }
-};
-
-/// When advancing through a bitstream cursor, each advance can discover a few
-/// different kinds of entries:
-struct BitstreamEntry {
-  enum {
-    Error,    // Malformed bitcode was found.
-    EndBlock, // We've reached the end of the current block, (or the end of the
-              // file, which is treated like a series of EndBlock records.
-    SubBlock, // This is the start of a new subblock of a specific ID.
-    Record    // This is a record with a specific AbbrevID.
-  } Kind;
-
-  unsigned ID;
-
-  static BitstreamEntry getError() {
-    BitstreamEntry E; E.Kind = Error; return E;
-  }
-
-  static BitstreamEntry getEndBlock() {
-    BitstreamEntry E; E.Kind = EndBlock; return E;
-  }
-
-  static BitstreamEntry getSubBlock(unsigned ID) {
-    BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
-  }
-
-  static BitstreamEntry getRecord(unsigned AbbrevID) {
-    BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
-  }
-};
-
-/// This represents a position within a bitcode file, implemented on top of a
-/// SimpleBitstreamCursor.
-///
-/// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
-/// be passed by value.
-class BitstreamCursor : SimpleBitstreamCursor {
-  // This is the declared size of code values used for the current block, in
-  // bits.
-  unsigned CurCodeSize = 2;
-
-  /// Abbrevs installed at in this block.
-  std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;
-
-  struct Block {
-    unsigned PrevCodeSize;
-    std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs;
-
-    explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
-  };
-
-  /// This tracks the codesize of parent blocks.
-  SmallVector<Block, 8> BlockScope;
-
-  BitstreamBlockInfo *BlockInfo = nullptr;
-
-public:
-  static const size_t MaxChunkSize = sizeof(word_t) * 8;
-
-  BitstreamCursor() = default;
-  explicit BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
-      : SimpleBitstreamCursor(BitcodeBytes) {}
-  explicit BitstreamCursor(StringRef BitcodeBytes)
-      : SimpleBitstreamCursor(BitcodeBytes) {}
-  explicit BitstreamCursor(MemoryBufferRef BitcodeBytes)
-      : SimpleBitstreamCursor(BitcodeBytes) {}
-
-  using SimpleBitstreamCursor::canSkipToPos;
-  using SimpleBitstreamCursor::AtEndOfStream;
-  using SimpleBitstreamCursor::getBitcodeBytes;
-  using SimpleBitstreamCursor::GetCurrentBitNo;
-  using SimpleBitstreamCursor::getCurrentByteNo;
-  using SimpleBitstreamCursor::getPointerToByte;
-  using SimpleBitstreamCursor::JumpToBit;
-  using SimpleBitstreamCursor::fillCurWord;
-  using SimpleBitstreamCursor::Read;
-  using SimpleBitstreamCursor::ReadVBR;
-  using SimpleBitstreamCursor::ReadVBR64;
-
-  /// Return the number of bits used to encode an abbrev #.
-  unsigned getAbbrevIDWidth() const { return CurCodeSize; }
-
-  /// Flags that modify the behavior of advance().
-  enum {
-    /// If this flag is used, the advance() method does not automatically pop
-    /// the block scope when the end of a block is reached.
-    AF_DontPopBlockAtEnd = 1,
-
-    /// If this flag is used, abbrev entries are returned just like normal
-    /// records.
-    AF_DontAutoprocessAbbrevs = 2
-  };
-
-  /// Advance the current bitstream, returning the next entry in the stream.
-  BitstreamEntry advance(unsigned Flags = 0) {
-    while (true) {
-      if (AtEndOfStream())
-        return BitstreamEntry::getError();
-
-      unsigned Code = ReadCode();
-      if (Code == bitc::END_BLOCK) {
-        // Pop the end of the block unless Flags tells us not to.
-        if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
-          return BitstreamEntry::getError();
-        return BitstreamEntry::getEndBlock();
-      }
-
-      if (Code == bitc::ENTER_SUBBLOCK)
-        return BitstreamEntry::getSubBlock(ReadSubBlockID());
-
-      if (Code == bitc::DEFINE_ABBREV &&
-          !(Flags & AF_DontAutoprocessAbbrevs)) {
-        // We read and accumulate abbrev's, the client can't do anything with
-        // them anyway.
-        ReadAbbrevRecord();
-        continue;
-      }
-
-      return BitstreamEntry::getRecord(Code);
-    }
-  }
-
-  /// This is a convenience function for clients that don't expect any
-  /// subblocks. This just skips over them automatically.
-  BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
-    while (true) {
-      // If we found a normal entry, return it.
-      BitstreamEntry Entry = advance(Flags);
-      if (Entry.Kind != BitstreamEntry::SubBlock)
-        return Entry;
-
-      // If we found a sub-block, just skip over it and check the next entry.
-      if (SkipBlock())
-        return BitstreamEntry::getError();
-    }
-  }
-
-  unsigned ReadCode() {
-    return Read(CurCodeSize);
-  }
-
-  // Block header:
-  //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
-
-  /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block.
-  unsigned ReadSubBlockID() {
-    return ReadVBR(bitc::BlockIDWidth);
-  }
-
-  /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body
-  /// of this block. If the block record is malformed, return true.
-  bool SkipBlock() {
-    // Read and ignore the codelen value.  Since we are skipping this block, we
-    // don't care what code widths are used inside of it.
-    ReadVBR(bitc::CodeLenWidth);
-    SkipToFourByteBoundary();
-    size_t NumFourBytes = Read(bitc::BlockSizeWidth);
-
-    // Check that the block wasn't partially defined, and that the offset isn't
-    // bogus.
-    size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8;
-    if (AtEndOfStream() || !canSkipToPos(SkipTo/8))
-      return true;
-
-    JumpToBit(SkipTo);
-    return false;
-  }
-
-  /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true
-  /// if the block has an error.
-  bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr);
-
-  bool ReadBlockEnd() {
-    if (BlockScope.empty()) return true;
-
-    // Block tail:
-    //    [END_BLOCK, <align4bytes>]
-    SkipToFourByteBoundary();
-
-    popBlockScope();
-    return false;
-  }
-
-private:
-  void popBlockScope() {
-    CurCodeSize = BlockScope.back().PrevCodeSize;
-
-    CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs);
-    BlockScope.pop_back();
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Record Processing
-  //===--------------------------------------------------------------------===//
-
-public:
-  /// Return the abbreviation for the specified AbbrevId.
-  const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
-    unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV;
-    if (AbbrevNo >= CurAbbrevs.size())
-      report_fatal_error("Invalid abbrev number");
-    return CurAbbrevs[AbbrevNo].get();
-  }
-
-  /// Read the current record and discard it, returning the code for the record.
-  unsigned skipRecord(unsigned AbbrevID);
-
-  unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
-                      StringRef *Blob = nullptr);
-
-  //===--------------------------------------------------------------------===//
-  // Abbrev Processing
-  //===--------------------------------------------------------------------===//
-  void ReadAbbrevRecord();
-
-  /// Read and return a block info block from the bitstream. If an error was
-  /// encountered, return None.
-  ///
-  /// \param ReadBlockInfoNames Whether to read block/record name information in
-  /// the BlockInfo block. Only llvm-bcanalyzer uses this.
-  Optional<BitstreamBlockInfo>
-  ReadBlockInfoBlock(bool ReadBlockInfoNames = false);
-
-  /// Set the block info to be used by this BitstreamCursor to interpret
-  /// abbreviated records.
-  void setBlockInfo(BitstreamBlockInfo *BI) { BlockInfo = BI; }
-};
-
-} // end llvm namespace
-
-#endif // LLVM_BITCODE_BITSTREAMREADER_H
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h
deleted file mode 100644
index c854769e0622..000000000000
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ /dev/null
@@ -1,550 +0,0 @@
-//===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This header defines the BitstreamWriter class.  This class can be used to
-// write an arbitrary bitstream, regardless of its contents.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_BITCODE_BITSTREAMWRITER_H
-#define LLVM_BITCODE_BITSTREAMWRITER_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Bitcode/BitCodes.h"
-#include "llvm/Support/Endian.h"
-#include <vector>
-
-namespace llvm {
-
-class BitstreamWriter {
-  SmallVectorImpl<char> &Out;
-
-  /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
-  unsigned CurBit;
-
-  /// CurValue - The current value.  Only bits < CurBit are valid.
-  uint32_t CurValue;
-
-  /// CurCodeSize - This is the declared size of code values used for the
-  /// current block, in bits.
-  unsigned CurCodeSize;
-
-  /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently
-  /// selected BLOCK ID.
-  unsigned BlockInfoCurBID;
-
-  /// CurAbbrevs - Abbrevs installed at in this block.
-  std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;
-
-  struct Block {
-    unsigned PrevCodeSize;
-    size_t StartSizeWord;
-    std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs;
-    Block(unsigned PCS, size_t SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {}
-  };
-
-  /// BlockScope - This tracks the current blocks that we have entered.
-  std::vector<Block> BlockScope;
-
-  /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
-  /// These describe abbreviations that all blocks of the specified ID inherit.
-  struct BlockInfo {
-    unsigned BlockID;
-    std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs;
-  };
-  std::vector<BlockInfo> BlockInfoRecords;
-
-  void WriteByte(unsigned char Value) {
-    Out.push_back(Value);
-  }
-
-  void WriteWord(unsigned Value) {
-    Value = support::endian::byte_swap<uint32_t, support::little>(Value);
-    Out.append(reinterpret_cast<const char *>(&Value),
-               reinterpret_cast<const char *>(&Value + 1));
-  }
-
-  size_t GetBufferOffset() const { return Out.size(); }
-
-  size_t GetWordIndex() const {
-    size_t Offset = GetBufferOffset();
-    assert((Offset & 3) == 0 && "Not 32-bit aligned");
-    return Offset / 4;
-  }
-
-public:
-  explicit BitstreamWriter(SmallVectorImpl<char> &O)
-    : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {}
-
-  ~BitstreamWriter() {
-    assert(CurBit == 0 && "Unflushed data remaining");
-    assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance");
-  }
-
-  /// Retrieve the current position in the stream, in bits.
-  uint64_t GetCurrentBitNo() const { return GetBufferOffset() * 8 + CurBit; }
-
-  /// Retrieve the number of bits currently used to encode an abbrev ID.
-  unsigned GetAbbrevIDWidth() const { return CurCodeSize; }
-
-  //===--------------------------------------------------------------------===//
-  // Basic Primitives for emitting bits to the stream.
-  //===--------------------------------------------------------------------===//
-
-  /// Backpatch a 32-bit word in the output at the given bit offset
-  /// with the specified value.
-  void BackpatchWord(uint64_t BitNo, unsigned NewWord) {
-    using namespace llvm::support;
-    unsigned ByteNo = BitNo / 8;
-    assert((!endian::readAtBitAlignment<uint32_t, little, unaligned>(
-               &Out[ByteNo], BitNo & 7)) &&
-           "Expected to be patching over 0-value placeholders");
-    endian::writeAtBitAlignment<uint32_t, little, unaligned>(
-        &Out[ByteNo], NewWord, BitNo & 7);
-  }
-
-  void BackpatchWord64(uint64_t BitNo, uint64_t Val) {
-    BackpatchWord(BitNo, (uint32_t)Val);
-    BackpatchWord(BitNo + 32, (uint32_t)(Val >> 32));
-  }
-
-  void Emit(uint32_t Val, unsigned NumBits) {
-    assert(NumBits && NumBits <= 32 && "Invalid value size!");
-    assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
-    CurValue |= Val << CurBit;
-    if (CurBit + NumBits < 32) {
-      CurBit += NumBits;
-      return;
-    }
-
-    // Add the current word.
-    WriteWord(CurValue);
-
-    if (CurBit)
-      CurValue = Val >> (32-CurBit);
-    else
-      CurValue = 0;
-    CurBit = (CurBit+NumBits) & 31;
-  }
-
-  void FlushToWord() {
-    if (CurBit) {
-      WriteWord(CurValue);
-      CurBit = 0;
-      CurValue = 0;
-    }
-  }
-
-  void EmitVBR(uint32_t Val, unsigned NumBits) {
-    assert(NumBits <= 32 && "Too many bits to emit!");
-    uint32_t Threshold = 1U << (NumBits-1);
-
-    // Emit the bits with VBR encoding, NumBits-1 bits at a time.
-    while (Val >= Threshold) {
-      Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits);
-      Val >>= NumBits-1;
-    }
-
-    Emit(Val, NumBits);
-  }
-
-  void EmitVBR64(uint64_t Val, unsigned NumBits) {
-    assert(NumBits <= 32 && "Too many bits to emit!");
-    if ((uint32_t)Val == Val)
-      return EmitVBR((uint32_t)Val, NumBits);
-
-    uint32_t Threshold = 1U << (NumBits-1);
-
-    // Emit the bits with VBR encoding, NumBits-1 bits at a time.
-    while (Val >= Threshold) {
-      Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) |
-           (1 << (NumBits-1)), NumBits);
-      Val >>= NumBits-1;
-    }
-
-    Emit((uint32_t)Val, NumBits);
-  }
-
-  /// EmitCode - Emit the specified code.
-  void EmitCode(unsigned Val) {
-    Emit(Val, CurCodeSize);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Block Manipulation
-  //===--------------------------------------------------------------------===//
-
-  /// getBlockInfo - If there is block info for the specified ID, return it,
-  /// otherwise return null.
-  BlockInfo *getBlockInfo(unsigned BlockID) {
-    // Common case, the most recent entry matches BlockID.
-    if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
-      return &BlockInfoRecords.back();
-
-    for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
-         i != e; ++i)
-      if (BlockInfoRecords[i].BlockID == BlockID)
-        return &BlockInfoRecords[i];
-    return nullptr;
-  }
-
-  void EnterSubblock(unsigned BlockID, unsigned CodeLen) {
-    // Block header:
-    //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
-    EmitCode(bitc::ENTER_SUBBLOCK);
-    EmitVBR(BlockID, bitc::BlockIDWidth);
-    EmitVBR(CodeLen, bitc::CodeLenWidth);
-    FlushToWord();
-
-    size_t BlockSizeWordIndex = GetWordIndex();
-    unsigned OldCodeSize = CurCodeSize;
-
-    // Emit a placeholder, which will be replaced when the block is popped.
-    Emit(0, bitc::BlockSizeWidth);
-
-    CurCodeSize = CodeLen;
-
-    // Push the outer block's abbrev set onto the stack, start out with an
-    // empty abbrev set.
-    BlockScope.emplace_back(OldCodeSize, BlockSizeWordIndex);
-    BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
-
-    // If there is a blockinfo for this BlockID, add all the predefined abbrevs
-    // to the abbrev list.
-    if (BlockInfo *Info = getBlockInfo(BlockID)) {
-      CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(),
-                        Info->Abbrevs.end());
-    }
-  }
-
-  void ExitBlock() {
-    assert(!BlockScope.empty() && "Block scope imbalance!");
-    const Block &B = BlockScope.back();
-
-    // Block tail:
-    //    [END_BLOCK, <align4bytes>]
-    EmitCode(bitc::END_BLOCK);
-    FlushToWord();
-
-    // Compute the size of the block, in words, not counting the size field.
-    size_t SizeInWords = GetWordIndex() - B.StartSizeWord - 1;
-    uint64_t BitNo = uint64_t(B.StartSizeWord) * 32;
-
-    // Update the block size field in the header of this sub-block.
-    BackpatchWord(BitNo, SizeInWords);
-
-    // Restore the inner block's code size and abbrev table.
-    CurCodeSize = B.PrevCodeSize;
-    CurAbbrevs = std::move(B.PrevAbbrevs);
-    BlockScope.pop_back();
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Record Emission
-  //===--------------------------------------------------------------------===//
-
-private:
-  /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev
-  /// record.  This is a no-op, since the abbrev specifies the literal to use.
-  template<typename uintty>
-  void EmitAbbreviatedLiteral(const BitCodeAbbrevOp &Op, uintty V) {
-    assert(Op.isLiteral() && "Not a literal");
-    // If the abbrev specifies the literal value to use, don't emit
-    // anything.
-    assert(V == Op.getLiteralValue() &&
-           "Invalid abbrev for record!");
-  }
-
-  /// EmitAbbreviatedField - Emit a single scalar field value with the specified
-  /// encoding.
-  template<typename uintty>
-  void EmitAbbreviatedField(const BitCodeAbbrevOp &Op, uintty V) {
-    assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!");
-
-    // Encode the value as we are commanded.
-    switch (Op.getEncoding()) {
-    default: llvm_unreachable("Unknown encoding!");
-    case BitCodeAbbrevOp::Fixed:
-      if (Op.getEncodingData())
-        Emit((unsigned)V, (unsigned)Op.getEncodingData());
-      break;
-    case BitCodeAbbrevOp::VBR:
-      if (Op.getEncodingData())
-        EmitVBR64(V, (unsigned)Op.getEncodingData());
-      break;
-    case BitCodeAbbrevOp::Char6:
-      Emit(BitCodeAbbrevOp::EncodeChar6((char)V), 6);
-      break;
-    }
-  }
-
-  /// EmitRecordWithAbbrevImpl - This is the core implementation of the record
-  /// emission code.  If BlobData is non-null, then it specifies an array of
-  /// data that should be emitted as part of the Blob or Array operand that is
-  /// known to exist at the end of the record. If Code is specified, then
-  /// it is the record code to emit before the Vals, which must not contain
-  /// the code.
-  template <typename uintty>
-  void EmitRecordWithAbbrevImpl(unsigned Abbrev, ArrayRef<uintty> Vals,
-                                StringRef Blob, Optional<unsigned> Code) {
-    const char *BlobData = Blob.data();
-    unsigned BlobLen = (unsigned) Blob.size();
-    unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV;
-    assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
-    const BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo].get();
-
-    EmitCode(Abbrev);
-
-    unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
-    if (Code) {
-      assert(e && "Expected non-empty abbreviation");
-      const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i++);
-
-      if (Op.isLiteral())
-        EmitAbbreviatedLiteral(Op, Code.getValue());
-      else {
-        assert(Op.getEncoding() != BitCodeAbbrevOp::Array &&
-               Op.getEncoding() != BitCodeAbbrevOp::Blob &&
-               "Expected literal or scalar");
-        EmitAbbreviatedField(Op, Code.getValue());
-      }
-    }
-
-    unsigned RecordIdx = 0;
-    for (; i != e; ++i) {
-      const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
-      if (Op.isLiteral()) {
-        assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
-        EmitAbbreviatedLiteral(Op, Vals[RecordIdx]);
-        ++RecordIdx;
-      } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
-        // Array case.
-        assert(i + 2 == e && "array op not second to last?");
-        const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
-
-        // If this record has blob data, emit it, otherwise we must have record
-        // entries to encode this way.
-        if (BlobData) {
-          assert(RecordIdx == Vals.size() &&
-                 "Blob data and record entries specified for array!");
-          // Emit a vbr6 to indicate the number of elements present.
-          EmitVBR(static_cast<uint32_t>(BlobLen), 6);
-
-          // Emit each field.
-          for (unsigned i = 0; i != BlobLen; ++i)
-            EmitAbbreviatedField(EltEnc, (unsigned char)BlobData[i]);
-
-          // Know that blob data is consumed for assertion below.
-          BlobData = nullptr;
-        } else {
-          // Emit a vbr6 to indicate the number of elements present.
-          EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
-
-          // Emit each field.
-          for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx)
-            EmitAbbreviatedField(EltEnc, Vals[RecordIdx]);
-        }
-      } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) {
-        // If this record has blob data, emit it, otherwise we must have record
-        // entries to encode this way.
-
-        if (BlobData) {
-          assert(RecordIdx == Vals.size() &&
-                 "Blob data and record entries specified for blob operand!");
-
-          assert(Blob.data() == BlobData && "BlobData got moved");
-          assert(Blob.size() == BlobLen && "BlobLen got changed");
-          emitBlob(Blob);
-          BlobData = nullptr;
-        } else {
-          emitBlob(Vals.slice(RecordIdx));
-        }
-      } else {  // Single scalar field.
-        assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
-        EmitAbbreviatedField(Op, Vals[RecordIdx]);
-        ++RecordIdx;
-      }
-    }
-    assert(RecordIdx == Vals.size() && "Not all record operands emitted!");
-    assert(BlobData == nullptr &&
-           "Blob data specified for record that doesn't use it!");
-  }
-
-public:
-  /// Emit a blob, including flushing before and tail-padding.
-  template <class UIntTy>
-  void emitBlob(ArrayRef<UIntTy> Bytes, bool ShouldEmitSize = true) {
-    // Emit a vbr6 to indicate the number of elements present.
-    if (ShouldEmitSize)
-      EmitVBR(static_cast<uint32_t>(Bytes.size()), 6);
-
-    // Flush to a 32-bit alignment boundary.
-    FlushToWord();
-
-    // Emit literal bytes.
-    for (const auto &B : Bytes) {
-      assert(isUInt<8>(B) && "Value too large to emit as byte");
-      WriteByte((unsigned char)B);
-    }
-
-    // Align end to 32-bits.
-    while (GetBufferOffset() & 3)
-      WriteByte(0);
-  }
-  void emitBlob(StringRef Bytes, bool ShouldEmitSize = true) {
-    emitBlob(makeArrayRef((const uint8_t *)Bytes.data(), Bytes.size()),
-             ShouldEmitSize);
-  }
-
-  /// EmitRecord - Emit the specified record to the stream, using an abbrev if
-  /// we have one to compress the output.
-  template <typename Container>
-  void EmitRecord(unsigned Code, const Container &Vals, unsigned Abbrev = 0) {
-    if (!Abbrev) {
-      // If we don't have an abbrev to use, emit this in its fully unabbreviated
-      // form.
-      auto Count = static_cast<uint32_t>(makeArrayRef(Vals).size());
-      EmitCode(bitc::UNABBREV_RECORD);
-      EmitVBR(Code, 6);
-      EmitVBR(Count, 6);
-      for (unsigned i = 0, e = Count; i != e; ++i)
-        EmitVBR64(Vals[i], 6);
-      return;
-    }
-
-    EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), StringRef(), Code);
-  }
-
-  /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation.
-  /// Unlike EmitRecord, the code for the record should be included in Vals as
-  /// the first entry.
-  template <typename Container>
-  void EmitRecordWithAbbrev(unsigned Abbrev, const Container &Vals) {
-    EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), StringRef(), None);
-  }
-
-  /// EmitRecordWithBlob - Emit the specified record to the stream, using an
-  /// abbrev that includes a blob at the end.  The blob data to emit is
-  /// specified by the pointer and length specified at the end.  In contrast to
-  /// EmitRecord, this routine expects that the first entry in Vals is the code
-  /// of the record.
-  template <typename Container>
-  void EmitRecordWithBlob(unsigned Abbrev, const Container &Vals,
-                          StringRef Blob) {
-    EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), Blob, None);
-  }
-  template <typename Container>
-  void EmitRecordWithBlob(unsigned Abbrev, const Container &Vals,
-                          const char *BlobData, unsigned BlobLen) {
-    return EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals),
-                                    StringRef(BlobData, BlobLen), None);
-  }
-
-  /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records
-  /// that end with an array.
-  template <typename Container>
-  void EmitRecordWithArray(unsigned Abbrev, const Container &Vals,
-                           StringRef Array) {
-    EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), Array, None);
-  }
-  template <typename Container>
-  void EmitRecordWithArray(unsigned Abbrev, const Container &Vals,
-                           const char *ArrayData, unsigned ArrayLen) {
-    return EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals),
-                                    StringRef(ArrayData, ArrayLen), None);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Abbrev Emission
-  //===--------------------------------------------------------------------===//
-
-private:
-  // Emit the abbreviation as a DEFINE_ABBREV record.
-  void EncodeAbbrev(const BitCodeAbbrev &Abbv) {
-    EmitCode(bitc::DEFINE_ABBREV);
-    EmitVBR(Abbv.getNumOperandInfos(), 5);
-    for (unsigned i = 0, e = static_cast<unsigned>(Abbv.getNumOperandInfos());
-         i != e; ++i) {
-      const BitCodeAbbrevOp &Op = Abbv.getOperandInfo(i);
-      Emit(Op.isLiteral(), 1);
-      if (Op.isLiteral()) {
-        EmitVBR64(Op.getLiteralValue(), 8);
-      } else {
-        Emit(Op.getEncoding(), 3);
-        if (Op.hasEncodingData())
-          EmitVBR64(Op.getEncodingData(), 5);
-      }
-    }
-  }
-public:
-
-  /// EmitAbbrev - This emits an abbreviation to the stream.  Note that this
-  /// method takes ownership of the specified abbrev.
-  unsigned EmitAbbrev(std::shared_ptr<BitCodeAbbrev> Abbv) {
-    // Emit the abbreviation as a record.
-    EncodeAbbrev(*Abbv);
-    CurAbbrevs.push_back(std::move(Abbv));
-    return static_cast<unsigned>(CurAbbrevs.size())-1 +
-      bitc::FIRST_APPLICATION_ABBREV;
-  }
-
-  //===--------------------------------------------------------------------===//
-  // BlockInfo Block Emission
-  //===--------------------------------------------------------------------===//
-
-  /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK.
-  void EnterBlockInfoBlock() {
-    EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, 2);
-    BlockInfoCurBID = ~0U;
-    BlockInfoRecords.clear();
-  }
-private:
-  /// SwitchToBlockID - If we aren't already talking about the specified block
-  /// ID, emit a BLOCKINFO_CODE_SETBID record.
-  void SwitchToBlockID(unsigned BlockID) {
-    if (BlockInfoCurBID == BlockID) return;
-    SmallVector<unsigned, 2> V;
-    V.push_back(BlockID);
-    EmitRecord(bitc::BLOCKINFO_CODE_SETBID, V);
-    BlockInfoCurBID = BlockID;
-  }
-
-  BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
-    if (BlockInfo *BI = getBlockInfo(BlockID))
-      return *BI;
-
-    // Otherwise, add a new record.
-    BlockInfoRecords.emplace_back();
-    BlockInfoRecords.back().BlockID = BlockID;
-    return BlockInfoRecords.back();
-  }
-
-public:
-
-  /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified
-  /// BlockID.
-  unsigned EmitBlockInfoAbbrev(unsigned BlockID, std::shared_ptr<BitCodeAbbrev> Abbv) {
-    SwitchToBlockID(BlockID);
-    EncodeAbbrev(*Abbv);
-
-    // Add the abbrev to the specified block record.
-    BlockInfo &Info = getOrCreateBlockInfo(BlockID);
-    Info.Abbrevs.push_back(std::move(Abbv));
-
-    return Info.Abbrevs.size()-1+bitc::FIRST_APPLICATION_ABBREV;
-  }
-};
-
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index f0d11e9c1689..decd4dd3a965 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -1,9 +1,8 @@
 //===- LLVMBitCodes.h - Enum values for the LLVM bitcode format -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,7 +17,7 @@
 #ifndef LLVM_BITCODE_LLVMBITCODES_H
 #define LLVM_BITCODE_LLVMBITCODES_H
 
-#include "llvm/Bitcode/BitCodes.h"
+#include "llvm/Bitstream/BitCodes.h"
 
 namespace llvm {
 namespace bitc {
@@ -264,10 +263,31 @@ enum GlobalValueSummarySymtabCodes {
   // Index-wide flags
   FS_FLAGS = 20,
   // Maps type identifier to summary information for that type identifier.
+  // Produced by the thin link (only lives in combined index).
   // TYPE_ID: [typeid, kind, bitwidth, align, size, bitmask, inlinebits,
   //           n x (typeid, kind, name, numrba,
   //                numrba x (numarg, numarg x arg, kind, info, byte, bit))]
   FS_TYPE_ID = 21,
+  // For background see overview at https://llvm.org/docs/TypeMetadata.html.
+  // The type metadata includes both the type identifier and the offset of
+  // the address point of the type (the address held by objects of that type
+  // which may not be the beginning of the virtual table). Vtable definitions
+  // are decorated with type metadata for the types they are compatible with.
+  //
+  // Maps type identifier to summary information for that type identifier
+  // computed from type metadata: the valueid of each vtable definition
+  // decorated with a type metadata for that identifier, and the offset from
+  // the corresponding type metadata.
+  // Exists in the per-module summary to provide information to thin link
+  // for index-based whole program devirtualization.
+  // TYPE_ID_METADATA: [typeid, n x (valueid, offset)]
+  FS_TYPE_ID_METADATA = 22,
+  // Summarizes vtable definition for use in index-based whole program
+  // devirtualization during the thin link.
+  // PERMODULE_VTABLE_GLOBALVAR_INIT_REFS: [valueid, flags, varflags,
+  //                                        numrefs, numrefs x valueid,
+  //                                        n x (valueid, offset)]
+  FS_PERMODULE_VTABLE_GLOBALVAR_INIT_REFS = 23,
 };
 
 enum MetadataCodes {
@@ -311,6 +331,7 @@ enum MetadataCodes {
   METADATA_INDEX_OFFSET = 38,           // [offset]
   METADATA_INDEX = 39,                  // [bitpos]
   METADATA_LABEL = 40,                  // [distinct, scope, name, file, line]
+  METADATA_COMMON_BLOCK = 44,     // [distinct, scope, name, variable,...]
 };
 
 // The constants block (CONSTANTS_BLOCK_ID) describes emission for each
@@ -407,7 +428,9 @@ enum RMWOperations {
   RMW_MAX = 7,
   RMW_MIN = 8,
   RMW_UMAX = 9,
-  RMW_UMIN = 10
+  RMW_UMIN = 10,
+  RMW_FADD = 11,
+  RMW_FSUB = 12
 };
 
 /// OverflowingBinaryOperatorOptionalFlags - Flags for serializing
@@ -534,6 +557,8 @@ enum FunctionCodes {
   // 54 is unused.
   FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...]
   FUNC_CODE_INST_UNOP = 56,      // UNOP:       [opcode, ty, opval]
+  FUNC_CODE_INST_CALLBR = 57,    // CALLBR:     [attr, cc, norm, transfs,
+                                 //              fnty, fnid, args...]
 };
 
 enum UseListCodes {
@@ -602,6 +627,11 @@ enum AttributeKindCodes {
   ATTR_KIND_OPT_FOR_FUZZING = 57,
   ATTR_KIND_SHADOWCALLSTACK = 58,
   ATTR_KIND_SPECULATIVE_LOAD_HARDENING = 59,
+  ATTR_KIND_IMMARG = 60,
+  ATTR_KIND_WILLRETURN = 61,
+  ATTR_KIND_NOFREE = 62,
+  ATTR_KIND_NOSYNC = 63,
+  ATTR_KIND_SANITIZE_MEMTAG = 64,
 };
 
 enum ComdatSelectionKindCodes {
diff --git a/include/llvm/Bitstream/BitCodes.h b/include/llvm/Bitstream/BitCodes.h
new file mode 100644
index 000000000000..adf54ba96396
--- /dev/null
+++ b/include/llvm/Bitstream/BitCodes.h
@@ -0,0 +1,184 @@
+//===- BitCodes.h - Enum values for the bitstream format --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines bitstream enum values.
+//
+// The enum values defined in this file should be considered permanent.  If
+// new features are added, they should have values added at the end of the
+// respective lists.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITSTREAM_BITCODES_H
+#define LLVM_BITSTREAM_BITCODES_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+
+namespace llvm {
+/// Offsets of the 32-bit fields of bitstream wrapper header.
+enum BitstreamWrapperHeader : unsigned {
+  BWH_MagicField   = 0 * 4,
+  BWH_VersionField = 1 * 4,
+  BWH_OffsetField  = 2 * 4,
+  BWH_SizeField    = 3 * 4,
+  BWH_CPUTypeField = 4 * 4,
+  BWH_HeaderSize   = 5 * 4
+};
+
+namespace bitc {
+  enum StandardWidths {
+    BlockIDWidth   = 8,  // We use VBR-8 for block IDs.
+    CodeLenWidth   = 4,  // Codelen are VBR-4.
+    BlockSizeWidth = 32  // BlockSize up to 2^32 32-bit words = 16GB per block.
+  };
+
+  // The standard abbrev namespace always has a way to exit a block, enter a
+  // nested block, define abbrevs, and define an unabbreviated record.
+  enum FixedAbbrevIDs {
+    END_BLOCK = 0,  // Must be zero to guarantee termination for broken bitcode.
+    ENTER_SUBBLOCK = 1,
+
+    /// DEFINE_ABBREV - Defines an abbrev for the current block.  It consists
+    /// of a vbr5 for # operand infos.  Each operand info is emitted with a
+    /// single bit to indicate if it is a literal encoding.  If so, the value is
+    /// emitted with a vbr8.  If not, the encoding is emitted as 3 bits followed
+    /// by the info value as a vbr5 if needed.
+    DEFINE_ABBREV = 2,
+
+    // UNABBREV_RECORDs are emitted with a vbr6 for the record code, followed by
+    // a vbr6 for the # operands, followed by vbr6's for each operand.
+    UNABBREV_RECORD = 3,
+
+    // This is not a code, this is a marker for the first abbrev assignment.
+    FIRST_APPLICATION_ABBREV = 4
+  };
+
+  /// StandardBlockIDs - All bitcode files can optionally include a BLOCKINFO
+  /// block, which contains metadata about other blocks in the file.
+  enum StandardBlockIDs {
+    /// BLOCKINFO_BLOCK is used to define metadata about blocks, for example,
+    /// standard abbrevs that should be available to all blocks of a specified
+    /// ID.
+    BLOCKINFO_BLOCK_ID = 0,
+
+    // Block IDs 1-7 are reserved for future expansion.
+    FIRST_APPLICATION_BLOCKID = 8
+  };
+
+  /// BlockInfoCodes - The blockinfo block contains metadata about user-defined
+  /// blocks.
+  enum BlockInfoCodes {
+    // DEFINE_ABBREV has magic semantics here, applying to the current SETBID'd
+    // block, instead of the BlockInfo block.
+
+    BLOCKINFO_CODE_SETBID        = 1, // SETBID: [blockid#]
+    BLOCKINFO_CODE_BLOCKNAME     = 2, // BLOCKNAME: [name]
+    BLOCKINFO_CODE_SETRECORDNAME = 3  // BLOCKINFO_CODE_SETRECORDNAME:
+                                      //                             [id, name]
+  };
+
+} // End bitc namespace
+
+/// BitCodeAbbrevOp - This describes one or more operands in an abbreviation.
+/// This is actually a union of two different things:
+///   1. It could be a literal integer value ("the operand is always 17").
+///   2. It could be an encoding specification ("this operand encoded like so").
+///
+class BitCodeAbbrevOp {
+  uint64_t Val;           // A literal value or data for an encoding.
+  bool IsLiteral : 1;     // Indicate whether this is a literal value or not.
+  unsigned Enc   : 3;     // The encoding to use.
+public:
+  enum Encoding {
+    Fixed = 1,  // A fixed width field, Val specifies number of bits.
+    VBR   = 2,  // A VBR field where Val specifies the width of each chunk.
+    Array = 3,  // A sequence of fields, next field species elt encoding.
+    Char6 = 4,  // A 6-bit fixed field which maps to [a-zA-Z0-9._].
+    Blob  = 5   // 32-bit aligned array of 8-bit characters.
+  };
+
+  explicit BitCodeAbbrevOp(uint64_t V) :  Val(V), IsLiteral(true) {}
+  explicit BitCodeAbbrevOp(Encoding E, uint64_t Data = 0)
+    : Val(Data), IsLiteral(false), Enc(E) {}
+
+  bool isLiteral() const  { return IsLiteral; }
+  bool isEncoding() const { return !IsLiteral; }
+
+  // Accessors for literals.
+  uint64_t getLiteralValue() const { assert(isLiteral()); return Val; }
+
+  // Accessors for encoding info.
+  Encoding getEncoding() const { assert(isEncoding()); return (Encoding)Enc; }
+  uint64_t getEncodingData() const {
+    assert(isEncoding() && hasEncodingData());
+    return Val;
+  }
+
+  bool hasEncodingData() const { return hasEncodingData(getEncoding()); }
+  static bool hasEncodingData(Encoding E) {
+    switch (E) {
+    case Fixed:
+    case VBR:
+      return true;
+    case Array:
+    case Char6:
+    case Blob:
+      return false;
+    }
+    report_fatal_error("Invalid encoding");
+  }
+
+  /// isChar6 - Return true if this character is legal in the Char6 encoding.
+  static bool isChar6(char C) {
+    if (C >= 'a' && C <= 'z') return true;
+    if (C >= 'A' && C <= 'Z') return true;
+    if (C >= '0' && C <= '9') return true;
+    if (C == '.' || C == '_') return true;
+    return false;
+  }
+  static unsigned EncodeChar6(char C) {
+    if (C >= 'a' && C <= 'z') return C-'a';
+    if (C >= 'A' && C <= 'Z') return C-'A'+26;
+    if (C >= '0' && C <= '9') return C-'0'+26+26;
+    if (C == '.')             return 62;
+    if (C == '_')             return 63;
+    llvm_unreachable("Not a value Char6 character!");
+  }
+
+  static char DecodeChar6(unsigned V) {
+    assert((V & ~63) == 0 && "Not a Char6 encoded character!");
+    return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._"
+        [V];
+  }
+
+};
+
+/// BitCodeAbbrev - This class represents an abbreviation record.  An
+/// abbreviation allows a complex record that has redundancy to be stored in a
+/// specialized format instead of the fully-general, fully-vbr, format.
+class BitCodeAbbrev {
+  SmallVector<BitCodeAbbrevOp, 32> OperandList;
+
+public:
+  unsigned getNumOperandInfos() const {
+    return static_cast<unsigned>(OperandList.size());
+  }
+  const BitCodeAbbrevOp &getOperandInfo(unsigned N) const {
+    return OperandList[N];
+  }
+
+  void Add(const BitCodeAbbrevOp &OpInfo) {
+    OperandList.push_back(OpInfo);
+  }
+};
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Bitstream/BitstreamReader.h b/include/llvm/Bitstream/BitstreamReader.h
new file mode 100644
index 000000000000..ee82e7ec1ba2
--- /dev/null
+++ b/include/llvm/Bitstream/BitstreamReader.h
@@ -0,0 +1,557 @@
+//===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the BitstreamReader class.  This class can be used to
+// read an arbitrary bitstream, regardless of its contents.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITSTREAM_BITSTREAMREADER_H
+#define LLVM_BITSTREAM_BITSTREAMREADER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Bitstream/BitCodes.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+/// This class maintains the abbreviations read from a block info block.
+class BitstreamBlockInfo {
+public:
+  /// This contains information emitted to BLOCKINFO_BLOCK blocks. These
+  /// describe abbreviations that all blocks of the specified ID inherit.
+  struct BlockInfo {
+    unsigned BlockID;
+    std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs;
+    std::string Name;
+    std::vector<std::pair<unsigned, std::string>> RecordNames;
+  };
+
+private:
+  std::vector<BlockInfo> BlockInfoRecords;
+
+public:
+  /// If there is block info for the specified ID, return it, otherwise return
+  /// null.
+  const BlockInfo *getBlockInfo(unsigned BlockID) const {
+    // Common case, the most recent entry matches BlockID.
+    if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
+      return &BlockInfoRecords.back();
+
+    for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
+         i != e; ++i)
+      if (BlockInfoRecords[i].BlockID == BlockID)
+        return &BlockInfoRecords[i];
+    return nullptr;
+  }
+
+  BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
+    if (const BlockInfo *BI = getBlockInfo(BlockID))
+      return *const_cast<BlockInfo*>(BI);
+
+    // Otherwise, add a new record.
+    BlockInfoRecords.emplace_back();
+    BlockInfoRecords.back().BlockID = BlockID;
+    return BlockInfoRecords.back();
+  }
+};
+
+/// This represents a position within a bitstream. There may be multiple
+/// independent cursors reading within one bitstream, each maintaining their
+/// own local state.
+class SimpleBitstreamCursor {
+  ArrayRef<uint8_t> BitcodeBytes;
+  size_t NextChar = 0;
+
+public:
+  /// This is the current data we have pulled from the stream but have not
+  /// returned to the client. This is specifically and intentionally defined to
+  /// follow the word size of the host machine for efficiency. We use word_t in
+  /// places that are aware of this to make it perfectly explicit what is going
+  /// on.
+  using word_t = size_t;
+
+private:
+  word_t CurWord = 0;
+
+  /// This is the number of bits in CurWord that are valid. This is always from
+  /// [0...bits_of(size_t)-1] inclusive.
+  unsigned BitsInCurWord = 0;
+
+public:
+  static const constexpr size_t MaxChunkSize = sizeof(word_t) * 8;
+
+  SimpleBitstreamCursor() = default;
+  explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
+      : BitcodeBytes(BitcodeBytes) {}
+  explicit SimpleBitstreamCursor(StringRef BitcodeBytes)
+      : BitcodeBytes(arrayRefFromStringRef(BitcodeBytes)) {}
+  explicit SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes)
+      : SimpleBitstreamCursor(BitcodeBytes.getBuffer()) {}
+
+  bool canSkipToPos(size_t pos) const {
+    // pos can be skipped to if it is a valid address or one byte past the end.
+    return pos <= BitcodeBytes.size();
+  }
+
+  bool AtEndOfStream() {
+    return BitsInCurWord == 0 && BitcodeBytes.size() <= NextChar;
+  }
+
+  /// Return the bit # of the bit we are reading.
+  uint64_t GetCurrentBitNo() const {
+    return NextChar*CHAR_BIT - BitsInCurWord;
+  }
+
+  // Return the byte # of the current bit.
+  uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; }
+
+  ArrayRef<uint8_t> getBitcodeBytes() const { return BitcodeBytes; }
+
+  /// Reset the stream to the specified bit number.
+  Error JumpToBit(uint64_t BitNo) {
+    size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1);
+    unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
+    assert(canSkipToPos(ByteNo) && "Invalid location");
+
+    // Move the cursor to the right word.
+    NextChar = ByteNo;
+    BitsInCurWord = 0;
+
+    // Skip over any bits that are already consumed.
+    if (WordBitNo) {
+      if (Expected<word_t> Res = Read(WordBitNo))
+        return Error::success();
+      else
+        return Res.takeError();
+    }
+
+    return Error::success();
+  }
+
+  /// Get a pointer into the bitstream at the specified byte offset.
+  const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) {
+    return BitcodeBytes.data() + ByteNo;
+  }
+
+  /// Get a pointer into the bitstream at the specified bit offset.
+  ///
+  /// The bit offset must be on a byte boundary.
+  const uint8_t *getPointerToBit(uint64_t BitNo, uint64_t NumBytes) {
+    assert(!(BitNo % 8) && "Expected bit on byte boundary");
+    return getPointerToByte(BitNo / 8, NumBytes);
+  }
+
+  Error fillCurWord() {
+    if (NextChar >= BitcodeBytes.size())
+      return createStringError(std::errc::io_error,
+                               "Unexpected end of file reading %u of %u bytes",
+                               NextChar, BitcodeBytes.size());
+
+    // Read the next word from the stream.
+    const uint8_t *NextCharPtr = BitcodeBytes.data() + NextChar;
+    unsigned BytesRead;
+    if (BitcodeBytes.size() >= NextChar + sizeof(word_t)) {
+      BytesRead = sizeof(word_t);
+      CurWord =
+          support::endian::read<word_t, support::little, support::unaligned>(
+              NextCharPtr);
+    } else {
+      // Short read.
+      BytesRead = BitcodeBytes.size() - NextChar;
+      CurWord = 0;
+      for (unsigned B = 0; B != BytesRead; ++B)
+        CurWord |= uint64_t(NextCharPtr[B]) << (B * 8);
+    }
+    NextChar += BytesRead;
+    BitsInCurWord = BytesRead * 8;
+    return Error::success();
+  }
+
+  Expected<word_t> Read(unsigned NumBits) {
+    static const unsigned BitsInWord = MaxChunkSize;
+
+    assert(NumBits && NumBits <= BitsInWord &&
+           "Cannot return zero or more than BitsInWord bits!");
+
+    static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f;
+
+    // If the field is fully contained by CurWord, return it quickly.
+    if (BitsInCurWord >= NumBits) {
+      word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits));
+
+      // Use a mask to avoid undefined behavior.
+      CurWord >>= (NumBits & Mask);
+
+      BitsInCurWord -= NumBits;
+      return R;
+    }
+
+    word_t R = BitsInCurWord ? CurWord : 0;
+    unsigned BitsLeft = NumBits - BitsInCurWord;
+
+    if (Error fillResult = fillCurWord())
+      return std::move(fillResult);
+
+    // If we run out of data, abort.
+    if (BitsLeft > BitsInCurWord)
+      return createStringError(std::errc::io_error,
+                               "Unexpected end of file reading %u of %u bits",
+                               BitsInCurWord, BitsLeft);
+
+    word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft));
+
+    // Use a mask to avoid undefined behavior.
+    CurWord >>= (BitsLeft & Mask);
+
+    BitsInCurWord -= BitsLeft;
+
+    R |= R2 << (NumBits - BitsLeft);
+
+    return R;
+  }
+
+  Expected<uint32_t> ReadVBR(unsigned NumBits) {
+    Expected<unsigned> MaybeRead = Read(NumBits);
+    if (!MaybeRead)
+      return MaybeRead;
+    uint32_t Piece = MaybeRead.get();
+
+    if ((Piece & (1U << (NumBits-1))) == 0)
+      return Piece;
+
+    uint32_t Result = 0;
+    unsigned NextBit = 0;
+    while (true) {
+      Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
+
+      if ((Piece & (1U << (NumBits-1))) == 0)
+        return Result;
+
+      NextBit += NumBits-1;
+      MaybeRead = Read(NumBits);
+      if (!MaybeRead)
+        return MaybeRead;
+      Piece = MaybeRead.get();
+    }
+  }
+
+  // Read a VBR that may have a value up to 64-bits in size. The chunk size of
+  // the VBR must still be <= 32 bits though.
+  Expected<uint64_t> ReadVBR64(unsigned NumBits) {
+    Expected<uint64_t> MaybeRead = Read(NumBits);
+    if (!MaybeRead)
+      return MaybeRead;
+    uint32_t Piece = MaybeRead.get();
+
+    if ((Piece & (1U << (NumBits-1))) == 0)
+      return uint64_t(Piece);
+
+    uint64_t Result = 0;
+    unsigned NextBit = 0;
+    while (true) {
+      Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
+
+      if ((Piece & (1U << (NumBits-1))) == 0)
+        return Result;
+
+      NextBit += NumBits-1;
+      MaybeRead = Read(NumBits);
+      if (!MaybeRead)
+        return MaybeRead;
+      Piece = MaybeRead.get();
+    }
+  }
+
+  void SkipToFourByteBoundary() {
+    // If word_t is 64-bits and if we've read less than 32 bits, just dump
+    // the bits we have up to the next 32-bit boundary.
+    if (sizeof(word_t) > 4 &&
+        BitsInCurWord >= 32) {
+      CurWord >>= BitsInCurWord-32;
+      BitsInCurWord = 32;
+      return;
+    }
+
+    BitsInCurWord = 0;
+  }
+
+  /// Return the size of the stream in bytes.
+  size_t SizeInBytes() const { return BitcodeBytes.size(); }
+
+  /// Skip to the end of the file.
+  void skipToEnd() { NextChar = BitcodeBytes.size(); }
+};
+
+/// When advancing through a bitstream cursor, each advance can discover a few
+/// different kinds of entries:
+struct BitstreamEntry {
+  enum {
+    Error,    // Malformed bitcode was found.
+    EndBlock, // We've reached the end of the current block, (or the end of the
+              // file, which is treated like a series of EndBlock records.
+    SubBlock, // This is the start of a new subblock of a specific ID.
+    Record    // This is a record with a specific AbbrevID.
+  } Kind;
+
+  unsigned ID;
+
+  static BitstreamEntry getError() {
+    BitstreamEntry E; E.Kind = Error; return E;
+  }
+
+  static BitstreamEntry getEndBlock() {
+    BitstreamEntry E; E.Kind = EndBlock; return E;
+  }
+
+  static BitstreamEntry getSubBlock(unsigned ID) {
+    BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
+  }
+
+  static BitstreamEntry getRecord(unsigned AbbrevID) {
+    BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
+  }
+};
+
+/// This represents a position within a bitcode file, implemented on top of a
+/// SimpleBitstreamCursor.
+///
+/// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
+/// be passed by value.
+class BitstreamCursor : SimpleBitstreamCursor {
+  // This is the declared size of code values used for the current block, in
+  // bits.
+  unsigned CurCodeSize = 2;
+
+  /// Abbrevs installed at in this block.
+  std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;
+
+  struct Block {
+    unsigned PrevCodeSize;
+    std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs;
+
+    explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
+  };
+
+  /// This tracks the codesize of parent blocks.
+  SmallVector<Block, 8> BlockScope;
+
+  BitstreamBlockInfo *BlockInfo = nullptr;
+
+public:
+  static const size_t MaxChunkSize = sizeof(word_t) * 8;
+
+  BitstreamCursor() = default;
+  explicit BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
+      : SimpleBitstreamCursor(BitcodeBytes) {}
+  explicit BitstreamCursor(StringRef BitcodeBytes)
+      : SimpleBitstreamCursor(BitcodeBytes) {}
+  explicit BitstreamCursor(MemoryBufferRef BitcodeBytes)
+      : SimpleBitstreamCursor(BitcodeBytes) {}
+
+  using SimpleBitstreamCursor::AtEndOfStream;
+  using SimpleBitstreamCursor::canSkipToPos;
+  using SimpleBitstreamCursor::fillCurWord;
+  using SimpleBitstreamCursor::getBitcodeBytes;
+  using SimpleBitstreamCursor::GetCurrentBitNo;
+  using SimpleBitstreamCursor::getCurrentByteNo;
+  using SimpleBitstreamCursor::getPointerToByte;
+  using SimpleBitstreamCursor::JumpToBit;
+  using SimpleBitstreamCursor::Read;
+  using SimpleBitstreamCursor::ReadVBR;
+  using SimpleBitstreamCursor::ReadVBR64;
+  using SimpleBitstreamCursor::SizeInBytes;
+
+  /// Return the number of bits used to encode an abbrev #.
+  unsigned getAbbrevIDWidth() const { return CurCodeSize; }
+
+  /// Flags that modify the behavior of advance().
+  enum {
+    /// If this flag is used, the advance() method does not automatically pop
+    /// the block scope when the end of a block is reached.
+    AF_DontPopBlockAtEnd = 1,
+
+    /// If this flag is used, abbrev entries are returned just like normal
+    /// records.
+    AF_DontAutoprocessAbbrevs = 2
+  };
+
+  /// Advance the current bitstream, returning the next entry in the stream.
+  Expected<BitstreamEntry> advance(unsigned Flags = 0) {
+    while (true) {
+      if (AtEndOfStream())
+        return BitstreamEntry::getError();
+
+      Expected<unsigned> MaybeCode = ReadCode();
+      if (!MaybeCode)
+        return MaybeCode.takeError();
+      unsigned Code = MaybeCode.get();
+
+      if (Code == bitc::END_BLOCK) {
+        // Pop the end of the block unless Flags tells us not to.
+        if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
+          return BitstreamEntry::getError();
+        return BitstreamEntry::getEndBlock();
+      }
+
+      if (Code == bitc::ENTER_SUBBLOCK) {
+        if (Expected<unsigned> MaybeSubBlock = ReadSubBlockID())
+          return BitstreamEntry::getSubBlock(MaybeSubBlock.get());
+        else
+          return MaybeSubBlock.takeError();
+      }
+
+      if (Code == bitc::DEFINE_ABBREV &&
+          !(Flags & AF_DontAutoprocessAbbrevs)) {
+        // We read and accumulate abbrev's, the client can't do anything with
+        // them anyway.
+        if (Error Err = ReadAbbrevRecord())
+          return std::move(Err);
+        continue;
+      }
+
+      return BitstreamEntry::getRecord(Code);
+    }
+  }
+
+  /// This is a convenience function for clients that don't expect any
+  /// subblocks. This just skips over them automatically.
+  Expected<BitstreamEntry> advanceSkippingSubblocks(unsigned Flags = 0) {
+    while (true) {
+      // If we found a normal entry, return it.
+      Expected<BitstreamEntry> MaybeEntry = advance(Flags);
+      if (!MaybeEntry)
+        return MaybeEntry;
+      BitstreamEntry Entry = MaybeEntry.get();
+
+      if (Entry.Kind != BitstreamEntry::SubBlock)
+        return Entry;
+
+      // If we found a sub-block, just skip over it and check the next entry.
+      if (Error Err = SkipBlock())
+        return std::move(Err);
+    }
+  }
+
+  Expected<unsigned> ReadCode() { return Read(CurCodeSize); }
+
+  // Block header:
+  //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
+
+  /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block.
+  Expected<unsigned> ReadSubBlockID() { return ReadVBR(bitc::BlockIDWidth); }
+
+  /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body
+  /// of this block.
+  Error SkipBlock() {
+    // Read and ignore the codelen value.
+    if (Expected<uint32_t> Res = ReadVBR(bitc::CodeLenWidth))
+      ; // Since we are skipping this block, we don't care what code widths are
+        // used inside of it.
+    else
+      return Res.takeError();
+
+    SkipToFourByteBoundary();
+    Expected<unsigned> MaybeNum = Read(bitc::BlockSizeWidth);
+    if (!MaybeNum)
+      return MaybeNum.takeError();
+    size_t NumFourBytes = MaybeNum.get();
+
+    // Check that the block wasn't partially defined, and that the offset isn't
+    // bogus.
+    size_t SkipTo = GetCurrentBitNo() + NumFourBytes * 4 * 8;
+    if (AtEndOfStream())
+      return createStringError(std::errc::illegal_byte_sequence,
+                               "can't skip block: already at end of stream");
+    if (!canSkipToPos(SkipTo / 8))
+      return createStringError(std::errc::illegal_byte_sequence,
+                               "can't skip to bit %zu from %" PRIu64, SkipTo,
+                               GetCurrentBitNo());
+
+    if (Error Res = JumpToBit(SkipTo))
+      return Res;
+
+    return Error::success();
+  }
+
+  /// Having read the ENTER_SUBBLOCK abbrevid, and enter the block.
+  Error EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr);
+
+  bool ReadBlockEnd() {
+    if (BlockScope.empty()) return true;
+
+    // Block tail:
+    //    [END_BLOCK, <align4bytes>]
+    SkipToFourByteBoundary();
+
+    popBlockScope();
+    return false;
+  }
+
+private:
+  void popBlockScope() {
+    CurCodeSize = BlockScope.back().PrevCodeSize;
+
+    CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs);
+    BlockScope.pop_back();
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Record Processing
+  //===--------------------------------------------------------------------===//
+
+public:
+  /// Return the abbreviation for the specified AbbrevId.
+  const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
+    unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV;
+    if (AbbrevNo >= CurAbbrevs.size())
+      report_fatal_error("Invalid abbrev number");
+    return CurAbbrevs[AbbrevNo].get();
+  }
+
+  /// Read the current record and discard it, returning the code for the record.
+  Expected<unsigned> skipRecord(unsigned AbbrevID);
+
+  Expected<unsigned> readRecord(unsigned AbbrevID,
+                                SmallVectorImpl<uint64_t> &Vals,
+                                StringRef *Blob = nullptr);
+
+  //===--------------------------------------------------------------------===//
+  // Abbrev Processing
+  //===--------------------------------------------------------------------===//
+  Error ReadAbbrevRecord();
+
+  /// Read and return a block info block from the bitstream. If an error was
+  /// encountered, return None.
+  ///
+  /// \param ReadBlockInfoNames Whether to read block/record name information in
+  /// the BlockInfo block. Only llvm-bcanalyzer uses this.
+  Expected<Optional<BitstreamBlockInfo>>
+  ReadBlockInfoBlock(bool ReadBlockInfoNames = false);
+
+  /// Set the block info to be used by this BitstreamCursor to interpret
+  /// abbreviated records.
+  void setBlockInfo(BitstreamBlockInfo *BI) { BlockInfo = BI; }
+};
+
+} // end llvm namespace
+
+#endif // LLVM_BITSTREAM_BITSTREAMREADER_H
diff --git a/include/llvm/Bitstream/BitstreamWriter.h b/include/llvm/Bitstream/BitstreamWriter.h
new file mode 100644
index 000000000000..c0ead19dc71d
--- /dev/null
+++ b/include/llvm/Bitstream/BitstreamWriter.h
@@ -0,0 +1,547 @@
+//===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the BitstreamWriter class.  This class can be used to
+// write an arbitrary bitstream, regardless of its contents.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITSTREAM_BITSTREAMWRITER_H
+#define LLVM_BITSTREAM_BITSTREAMWRITER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Bitstream/BitCodes.h"
+#include "llvm/Support/Endian.h"
+#include <vector>
+
+namespace llvm {
+
+class BitstreamWriter {
+  SmallVectorImpl<char> &Out;
+
+  /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
+  unsigned CurBit;
+
+  /// CurValue - The current value.  Only bits < CurBit are valid.
+  uint32_t CurValue;
+
+  /// CurCodeSize - This is the declared size of code values used for the
+  /// current block, in bits.
+  unsigned CurCodeSize;
+
+  /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently
+  /// selected BLOCK ID.
+  unsigned BlockInfoCurBID;
+
+  /// CurAbbrevs - Abbrevs installed at in this block.
+  std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;
+
+  struct Block {
+    unsigned PrevCodeSize;
+    size_t StartSizeWord;
+    std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs;
+    Block(unsigned PCS, size_t SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {}
+  };
+
+  /// BlockScope - This tracks the current blocks that we have entered.
+  std::vector<Block> BlockScope;
+
+  /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
+  /// These describe abbreviations that all blocks of the specified ID inherit.
+  struct BlockInfo {
+    unsigned BlockID;
+    std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs;
+  };
+  std::vector<BlockInfo> BlockInfoRecords;
+
+  void WriteByte(unsigned char Value) {
+    Out.push_back(Value);
+  }
+
+  void WriteWord(unsigned Value) {
+    Value = support::endian::byte_swap<uint32_t, support::little>(Value);
+    Out.append(reinterpret_cast<const char *>(&Value),
+               reinterpret_cast<const char *>(&Value + 1));
+  }
+
+  size_t GetBufferOffset() const { return Out.size(); }
+
+  size_t GetWordIndex() const {
+    size_t Offset = GetBufferOffset();
+    assert((Offset & 3) == 0 && "Not 32-bit aligned");
+    return Offset / 4;
+  }
+
+public:
+  explicit BitstreamWriter(SmallVectorImpl<char> &O)
+    : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {}
+
+  ~BitstreamWriter() {
+    assert(CurBit == 0 && "Unflushed data remaining");
+    assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance");
+  }
+
+  /// Retrieve the current position in the stream, in bits.
+  uint64_t GetCurrentBitNo() const { return GetBufferOffset() * 8 + CurBit; }
+
+  /// Retrieve the number of bits currently used to encode an abbrev ID.
+  unsigned GetAbbrevIDWidth() const { return CurCodeSize; }
+
+  //===--------------------------------------------------------------------===//
+  // Basic Primitives for emitting bits to the stream.
+  //===--------------------------------------------------------------------===//
+
+  /// Backpatch a 32-bit word in the output at the given bit offset
+  /// with the specified value.
+  void BackpatchWord(uint64_t BitNo, unsigned NewWord) {
+    using namespace llvm::support;
+    unsigned ByteNo = BitNo / 8;
+    assert((!endian::readAtBitAlignment<uint32_t, little, unaligned>(
+               &Out[ByteNo], BitNo & 7)) &&
+           "Expected to be patching over 0-value placeholders");
+    endian::writeAtBitAlignment<uint32_t, little, unaligned>(
+        &Out[ByteNo], NewWord, BitNo & 7);
+  }
+
+  void BackpatchWord64(uint64_t BitNo, uint64_t Val) {
+    BackpatchWord(BitNo, (uint32_t)Val);
+    BackpatchWord(BitNo + 32, (uint32_t)(Val >> 32));
+  }
+
+  void Emit(uint32_t Val, unsigned NumBits) {
+    assert(NumBits && NumBits <= 32 && "Invalid value size!");
+    assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
+    CurValue |= Val << CurBit;
+    if (CurBit + NumBits < 32) {
+      CurBit += NumBits;
+      return;
+    }
+
+    // Add the current word.
+    WriteWord(CurValue);
+
+    if (CurBit)
+      CurValue = Val >> (32-CurBit);
+    else
+      CurValue = 0;
+    CurBit = (CurBit+NumBits) & 31;
+  }
+
+  void FlushToWord() {
+    if (CurBit) {
+      WriteWord(CurValue);
+      CurBit = 0;
+      CurValue = 0;
+    }
+  }
+
+  void EmitVBR(uint32_t Val, unsigned NumBits) {
+    assert(NumBits <= 32 && "Too many bits to emit!");
+    uint32_t Threshold = 1U << (NumBits-1);
+
+    // Emit the bits with VBR encoding, NumBits-1 bits at a time.
+    while (Val >= Threshold) {
+      Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits);
+      Val >>= NumBits-1;
+    }
+
+    Emit(Val, NumBits);
+  }
+
+  void EmitVBR64(uint64_t Val, unsigned NumBits) {
+    assert(NumBits <= 32 && "Too many bits to emit!");
+    if ((uint32_t)Val == Val)
+      return EmitVBR((uint32_t)Val, NumBits);
+
+    uint32_t Threshold = 1U << (NumBits-1);
+
+    // Emit the bits with VBR encoding, NumBits-1 bits at a time.
+    while (Val >= Threshold) {
+      Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) |
+           (1 << (NumBits-1)), NumBits);
+      Val >>= NumBits-1;
+    }
+
+    Emit((uint32_t)Val, NumBits);
+  }
+
+  /// EmitCode - Emit the specified code.
+  void EmitCode(unsigned Val) {
+    Emit(Val, CurCodeSize);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Block Manipulation
+  //===--------------------------------------------------------------------===//
+
+  /// getBlockInfo - If there is block info for the specified ID, return it,
+  /// otherwise return null.
+  BlockInfo *getBlockInfo(unsigned BlockID) {
+    // Common case, the most recent entry matches BlockID.
+    if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
+      return &BlockInfoRecords.back();
+
+    for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
+         i != e; ++i)
+      if (BlockInfoRecords[i].BlockID == BlockID)
+        return &BlockInfoRecords[i];
+    return nullptr;
+  }
+
+  void EnterSubblock(unsigned BlockID, unsigned CodeLen) {
+    // Block header:
+    //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
+    EmitCode(bitc::ENTER_SUBBLOCK);
+    EmitVBR(BlockID, bitc::BlockIDWidth);
+    EmitVBR(CodeLen, bitc::CodeLenWidth);
+    FlushToWord();
+
+    size_t BlockSizeWordIndex = GetWordIndex();
+    unsigned OldCodeSize = CurCodeSize;
+
+    // Emit a placeholder, which will be replaced when the block is popped.
+    Emit(0, bitc::BlockSizeWidth);
+
+    CurCodeSize = CodeLen;
+
+    // Push the outer block's abbrev set onto the stack, start out with an
+    // empty abbrev set.
+    BlockScope.emplace_back(OldCodeSize, BlockSizeWordIndex);
+    BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
+
+    // If there is a blockinfo for this BlockID, add all the predefined abbrevs
+    // to the abbrev list.
+    if (BlockInfo *Info = getBlockInfo(BlockID)) {
+      CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(),
+                        Info->Abbrevs.end());
+    }
+  }
+
+  void ExitBlock() {
+    assert(!BlockScope.empty() && "Block scope imbalance!");
+    const Block &B = BlockScope.back();
+
+    // Block tail:
+    //    [END_BLOCK, <align4bytes>]
+    EmitCode(bitc::END_BLOCK);
+    FlushToWord();
+
+    // Compute the size of the block, in words, not counting the size field.
+    size_t SizeInWords = GetWordIndex() - B.StartSizeWord - 1;
+    uint64_t BitNo = uint64_t(B.StartSizeWord) * 32;
+
+    // Update the block size field in the header of this sub-block.
+    BackpatchWord(BitNo, SizeInWords);
+
+    // Restore the inner block's code size and abbrev table.
+    CurCodeSize = B.PrevCodeSize;
+    CurAbbrevs = std::move(B.PrevAbbrevs);
+    BlockScope.pop_back();
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Record Emission
+  //===--------------------------------------------------------------------===//
+
+private:
+  /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev
+  /// record.  This is a no-op, since the abbrev specifies the literal to use.
+  template<typename uintty>
+  void EmitAbbreviatedLiteral(const BitCodeAbbrevOp &Op, uintty V) {
+    assert(Op.isLiteral() && "Not a literal");
+    // If the abbrev specifies the literal value to use, don't emit
+    // anything.
+    assert(V == Op.getLiteralValue() &&
+           "Invalid abbrev for record!");
+  }
+
+  /// EmitAbbreviatedField - Emit a single scalar field value with the specified
+  /// encoding.
+  template<typename uintty>
+  void EmitAbbreviatedField(const BitCodeAbbrevOp &Op, uintty V) {
+    assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!");
+
+    // Encode the value as we are commanded.
+    switch (Op.getEncoding()) {
+    default: llvm_unreachable("Unknown encoding!");
+    case BitCodeAbbrevOp::Fixed:
+      if (Op.getEncodingData())
+        Emit((unsigned)V, (unsigned)Op.getEncodingData());
+      break;
+    case BitCodeAbbrevOp::VBR:
+      if (Op.getEncodingData())
+        EmitVBR64(V, (unsigned)Op.getEncodingData());
+      break;
+    case BitCodeAbbrevOp::Char6:
+      Emit(BitCodeAbbrevOp::EncodeChar6((char)V), 6);
+      break;
+    }
+  }
+
+  /// EmitRecordWithAbbrevImpl - This is the core implementation of the record
+  /// emission code.  If BlobData is non-null, then it specifies an array of
+  /// data that should be emitted as part of the Blob or Array operand that is
+  /// known to exist at the end of the record. If Code is specified, then
+  /// it is the record code to emit before the Vals, which must not contain
+  /// the code.
+  template <typename uintty>
+  void EmitRecordWithAbbrevImpl(unsigned Abbrev, ArrayRef<uintty> Vals,
+                                StringRef Blob, Optional<unsigned> Code) {
+    const char *BlobData = Blob.data();
+    unsigned BlobLen = (unsigned) Blob.size();
+    unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV;
+    assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
+    const BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo].get();
+
+    EmitCode(Abbrev);
+
+    unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
+    if (Code) {
+      assert(e && "Expected non-empty abbreviation");
+      const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i++);
+
+      if (Op.isLiteral())
+        EmitAbbreviatedLiteral(Op, Code.getValue());
+      else {
+        assert(Op.getEncoding() != BitCodeAbbrevOp::Array &&
+               Op.getEncoding() != BitCodeAbbrevOp::Blob &&
+               "Expected literal or scalar");
+        EmitAbbreviatedField(Op, Code.getValue());
+      }
+    }
+
+    unsigned RecordIdx = 0;
+    for (; i != e; ++i) {
+      const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+      if (Op.isLiteral()) {
+        assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
+        EmitAbbreviatedLiteral(Op, Vals[RecordIdx]);
+        ++RecordIdx;
+      } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
+        // Array case.
+        assert(i + 2 == e && "array op not second to last?");
+        const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
+
+        // If this record has blob data, emit it, otherwise we must have record
+        // entries to encode this way.
+        if (BlobData) {
+          assert(RecordIdx == Vals.size() &&
+                 "Blob data and record entries specified for array!");
+          // Emit a vbr6 to indicate the number of elements present.
+          EmitVBR(static_cast<uint32_t>(BlobLen), 6);
+
+          // Emit each field.
+          for (unsigned i = 0; i != BlobLen; ++i)
+            EmitAbbreviatedField(EltEnc, (unsigned char)BlobData[i]);
+
+          // Know that blob data is consumed for assertion below.
+          BlobData = nullptr;
+        } else {
+          // Emit a vbr6 to indicate the number of elements present.
+          EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
+
+          // Emit each field.
+          for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx)
+            EmitAbbreviatedField(EltEnc, Vals[RecordIdx]);
+        }
+      } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) {
+        // If this record has blob data, emit it, otherwise we must have record
+        // entries to encode this way.
+
+        if (BlobData) {
+          assert(RecordIdx == Vals.size() &&
+                 "Blob data and record entries specified for blob operand!");
+
+          assert(Blob.data() == BlobData && "BlobData got moved");
+          assert(Blob.size() == BlobLen && "BlobLen got changed");
+          emitBlob(Blob);
+          BlobData = nullptr;
+        } else {
+          emitBlob(Vals.slice(RecordIdx));
+        }
+      } else {  // Single scalar field.
+        assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
+        EmitAbbreviatedField(Op, Vals[RecordIdx]);
+        ++RecordIdx;
+      }
+    }
+    assert(RecordIdx == Vals.size() && "Not all record operands emitted!");
+    assert(BlobData == nullptr &&
+           "Blob data specified for record that doesn't use it!");
+  }
+
+public:
+  /// Emit a blob, including flushing before and tail-padding.
+  template <class UIntTy>
+  void emitBlob(ArrayRef<UIntTy> Bytes, bool ShouldEmitSize = true) {
+    // Emit a vbr6 to indicate the number of elements present.
+    if (ShouldEmitSize)
+      EmitVBR(static_cast<uint32_t>(Bytes.size()), 6);
+
+    // Flush to a 32-bit alignment boundary.
+    FlushToWord();
+
+    // Emit literal bytes.
+    for (const auto &B : Bytes) {
+      assert(isUInt<8>(B) && "Value too large to emit as byte");
+      WriteByte((unsigned char)B);
+    }
+
+    // Align end to 32-bits.
+    while (GetBufferOffset() & 3)
+      WriteByte(0);
+  }
+  void emitBlob(StringRef Bytes, bool ShouldEmitSize = true) {
+    emitBlob(makeArrayRef((const uint8_t *)Bytes.data(), Bytes.size()),
+             ShouldEmitSize);
+  }
+
+  /// EmitRecord - Emit the specified record to the stream, using an abbrev if
+  /// we have one to compress the output.
+  template <typename Container>
+  void EmitRecord(unsigned Code, const Container &Vals, unsigned Abbrev = 0) {
+    if (!Abbrev) {
+      // If we don't have an abbrev to use, emit this in its fully unabbreviated
+      // form.
+      auto Count = static_cast<uint32_t>(makeArrayRef(Vals).size());
+      EmitCode(bitc::UNABBREV_RECORD);
+      EmitVBR(Code, 6);
+      EmitVBR(Count, 6);
+      for (unsigned i = 0, e = Count; i != e; ++i)
+        EmitVBR64(Vals[i], 6);
+      return;
+    }
+
+    EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), StringRef(), Code);
+  }
+
+  /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation.
+  /// Unlike EmitRecord, the code for the record should be included in Vals as
+  /// the first entry.
+  template <typename Container>
+  void EmitRecordWithAbbrev(unsigned Abbrev, const Container &Vals) {
+    EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), StringRef(), None);
+  }
+
+  /// EmitRecordWithBlob - Emit the specified record to the stream, using an
+  /// abbrev that includes a blob at the end.  The blob data to emit is
+  /// specified by the pointer and length specified at the end.  In contrast to
+  /// EmitRecord, this routine expects that the first entry in Vals is the code
+  /// of the record.
+  template <typename Container>
+  void EmitRecordWithBlob(unsigned Abbrev, const Container &Vals,
+                          StringRef Blob) {
+    EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), Blob, None);
+  }
+  template <typename Container>
+  void EmitRecordWithBlob(unsigned Abbrev, const Container &Vals,
+                          const char *BlobData, unsigned BlobLen) {
+    return EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals),
+                                    StringRef(BlobData, BlobLen), None);
+  }
+
+  /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records
+  /// that end with an array.
+  template <typename Container>
+  void EmitRecordWithArray(unsigned Abbrev, const Container &Vals,
+                           StringRef Array) {
+    EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), Array, None);
+  }
+  template <typename Container>
+  void EmitRecordWithArray(unsigned Abbrev, const Container &Vals,
+                           const char *ArrayData, unsigned ArrayLen) {
+    return EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals),
+                                    StringRef(ArrayData, ArrayLen), None);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Abbrev Emission
+  //===--------------------------------------------------------------------===//
+
+private:
+  // Emit the abbreviation as a DEFINE_ABBREV record.
+  void EncodeAbbrev(const BitCodeAbbrev &Abbv) {
+    EmitCode(bitc::DEFINE_ABBREV);
+    EmitVBR(Abbv.getNumOperandInfos(), 5);
+    for (unsigned i = 0, e = static_cast<unsigned>(Abbv.getNumOperandInfos());
+         i != e; ++i) {
+      const BitCodeAbbrevOp &Op = Abbv.getOperandInfo(i);
+      Emit(Op.isLiteral(), 1);
+      if (Op.isLiteral()) {
+        EmitVBR64(Op.getLiteralValue(), 8);
+      } else {
+        Emit(Op.getEncoding(), 3);
+        if (Op.hasEncodingData())
+          EmitVBR64(Op.getEncodingData(), 5);
+      }
+    }
+  }
+public:
+
+  /// Emits the abbreviation \p Abbv to the stream.
+  unsigned EmitAbbrev(std::shared_ptr<BitCodeAbbrev> Abbv) {
+    EncodeAbbrev(*Abbv);
+    CurAbbrevs.push_back(std::move(Abbv));
+    return static_cast<unsigned>(CurAbbrevs.size())-1 +
+      bitc::FIRST_APPLICATION_ABBREV;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // BlockInfo Block Emission
+  //===--------------------------------------------------------------------===//
+
+  /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK.
+  void EnterBlockInfoBlock() {
+    EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, 2);
+    BlockInfoCurBID = ~0U;
+    BlockInfoRecords.clear();
+  }
+private:
+  /// SwitchToBlockID - If we aren't already talking about the specified block
+  /// ID, emit a BLOCKINFO_CODE_SETBID record.
+  void SwitchToBlockID(unsigned BlockID) {
+    if (BlockInfoCurBID == BlockID) return;
+    SmallVector<unsigned, 2> V;
+    V.push_back(BlockID);
+    EmitRecord(bitc::BLOCKINFO_CODE_SETBID, V);
+    BlockInfoCurBID = BlockID;
+  }
+
+  BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
+    if (BlockInfo *BI = getBlockInfo(BlockID))
+      return *BI;
+
+    // Otherwise, add a new record.
+    BlockInfoRecords.emplace_back();
+    BlockInfoRecords.back().BlockID = BlockID;
+    return BlockInfoRecords.back();
+  }
+
+public:
+
+  /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified
+  /// BlockID.
+  unsigned EmitBlockInfoAbbrev(unsigned BlockID, std::shared_ptr<BitCodeAbbrev> Abbv) {
+    SwitchToBlockID(BlockID);
+    EncodeAbbrev(*Abbv);
+
+    // Add the abbrev to the specified block record.
+    BlockInfo &Info = getOrCreateBlockInfo(BlockID);
+    Info.Abbrevs.push_back(std::move(Abbv));
+
+    return Info.Abbrevs.size()-1+bitc::FIRST_APPLICATION_ABBREV;
+  }
+};
+
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/CodeGen/AccelTable.h b/include/llvm/CodeGen/AccelTable.h
index 13928582f2dd..734531a65d50 100644
--- a/include/llvm/CodeGen/AccelTable.h
+++ b/include/llvm/CodeGen/AccelTable.h
@@ -1,9 +1,8 @@
 //==- include/llvm/CodeGen/AccelTable.h - Accelerator Tables -----*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -327,14 +326,8 @@ public:
 
   void emit(AsmPrinter *Asm) const override;
 
-#ifndef _MSC_VER
-  // The line below is rejected by older versions (TBD) of MSVC.
   static constexpr Atom Atoms[] = {
       Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
-#else
-  // FIXME: Erase this path once the minimum MSCV version has been bumped.
-  static const SmallVector<Atom, 4> Atoms;
-#endif
 
 #ifndef NDEBUG
   void print(raw_ostream &OS) const override;
@@ -352,16 +345,10 @@ public:
 
   void emit(AsmPrinter *Asm) const override;
 
-#ifndef _MSC_VER
-  // The line below is rejected by older versions (TBD) of MSVC.
   static constexpr Atom Atoms[] = {
       Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
       Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
       Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
-#else
-  // FIXME: Erase this path once the minimum MSCV version has been bumped.
-  static const SmallVector<Atom, 4> Atoms;
-#endif
 
 #ifndef NDEBUG
   void print(raw_ostream &OS) const override;
@@ -376,14 +363,8 @@ public:
 
   void emit(AsmPrinter *Asm) const override;
 
-#ifndef _MSC_VER
-  // The line below is rejected by older versions (TBD) of MSVC.
   static constexpr Atom Atoms[] = {
       Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
-#else
-  // FIXME: Erase this path once the minimum MSCV version has been bumped.
-  static const SmallVector<Atom, 4> Atoms;
-#endif
 
 #ifndef NDEBUG
   void print(raw_ostream &OS) const override;
@@ -407,16 +388,10 @@ public:
 
   void emit(AsmPrinter *Asm) const override;
 
-#ifndef _MSC_VER
-  // The line below is rejected by older versions (TBD) of MSVC.
   static constexpr Atom Atoms[] = {
       Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
       Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
       Atom(5, dwarf::DW_FORM_data1), Atom(6, dwarf::DW_FORM_data4)};
-#else
-  // FIXME: Erase this path once the minimum MSCV version has been bumped.
-  static const SmallVector<Atom, 4> Atoms;
-#endif
 
 #ifndef NDEBUG
   void print(raw_ostream &OS) const override;
diff --git a/include/llvm/CodeGen/Analysis.h b/include/llvm/CodeGen/Analysis.h
index d77aee66ed76..0be0ac22a74d 100644
--- a/include/llvm/CodeGen/Analysis.h
+++ b/include/llvm/CodeGen/Analysis.h
@@ -1,9 +1,8 @@
 //===- CodeGen/Analysis.h - CodeGen LLVM IR Analysis Utilities --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -26,6 +25,7 @@
 
 namespace llvm {
 class GlobalValue;
+class LLT;
 class MachineBasicBlock;
 class MachineFunction;
 class TargetLoweringBase;
@@ -74,6 +74,25 @@ void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty,
                      SmallVectorImpl<uint64_t> *Offsets = nullptr,
                      uint64_t StartingOffset = 0);
 
+/// Variant of ComputeValueVTs that also produces the memory VTs.
+void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty,
+                     SmallVectorImpl<EVT> &ValueVTs,
+                     SmallVectorImpl<EVT> *MemVTs,
+                     SmallVectorImpl<uint64_t> *Offsets = nullptr,
+                     uint64_t StartingOffset = 0);
+
+/// computeValueLLTs - Given an LLVM IR type, compute a sequence of
+/// LLTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void computeValueLLTs(const DataLayout &DL, Type &Ty,
+                      SmallVectorImpl<LLT> &ValueTys,
+                      SmallVectorImpl<uint64_t> *Offsets = nullptr,
+                      uint64_t StartingOffset = 0);
+
 /// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
 GlobalValue *ExtractTypeInfo(Value *V);
 
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index 413901d218f9..d110f8b01cb5 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/AsmPrinter.h - AsmPrinter Framework ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -20,6 +19,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
 #include "llvm/CodeGen/DwarfStringPoolEntry.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/IR/InlineAsm.h"
@@ -33,7 +33,6 @@
 
 namespace llvm {
 
-class AsmPrinterHandler;
 class BasicBlock;
 class BlockAddress;
 class Constant;
@@ -122,9 +121,6 @@ public:
   using GOTEquivUsePair = std::pair<const GlobalVariable *, unsigned>;
   MapVector<const MCSymbol *, GOTEquivUsePair> GlobalGOTEquivs;
 
-  /// Enable print [latency:throughput] in output.
-  bool EnablePrintSchedInfo = false;
-
 private:
   MCSymbol *CurrentFnBegin = nullptr;
   MCSymbol *CurrentFnEnd = nullptr;
@@ -142,16 +138,16 @@ protected:
   /// Protected struct HandlerInfo and Handlers permit target extended
   /// AsmPrinter adds their own handlers.
   struct HandlerInfo {
-    AsmPrinterHandler *Handler;
+    std::unique_ptr<AsmPrinterHandler> Handler;
     const char *TimerName;
     const char *TimerDescription;
     const char *TimerGroupName;
     const char *TimerGroupDescription;
 
-    HandlerInfo(AsmPrinterHandler *Handler, const char *TimerName,
-                const char *TimerDescription, const char *TimerGroupName,
-                const char *TimerGroupDescription)
-        : Handler(Handler), TimerName(TimerName),
+    HandlerInfo(std::unique_ptr<AsmPrinterHandler> Handler,
+                const char *TimerName, const char *TimerDescription,
+                const char *TimerGroupName, const char *TimerGroupDescription)
+        : Handler(std::move(Handler)), TimerName(TimerName),
           TimerDescription(TimerDescription), TimerGroupName(TimerGroupName),
           TimerGroupDescription(TimerGroupDescription) {}
   };
@@ -227,6 +223,9 @@ public:
 
   void EmitToStreamer(MCStreamer &S, const MCInst &Inst);
 
+  /// Emits inital debug location directive.
+  void emitInitialRawDwarfLocDirective(const MachineFunction &MF);
+
   /// Return the current section we are emitting to.
   const MCSection *getCurrentSection() const;
 
@@ -316,6 +315,8 @@ public:
 
   void emitStackSizeSection(const MachineFunction &MF);
 
+  void emitRemarksSection(Module &M);
+
   enum CFIMoveType { CFI_M_None, CFI_M_EH, CFI_M_Debug };
   CFIMoveType needsCFIMoves() const;
 
@@ -511,7 +512,7 @@ public:
   void EmitSLEB128(int64_t Value, const char *Desc = nullptr) const;
 
   /// Emit the specified unsigned leb128 value.
-  void EmitULEB128(uint64_t Value, const char *Desc = nullptr) const;
+  void EmitULEB128(uint64_t Value, const char *Desc = nullptr, unsigned PadTo = 0) const;
 
   /// Emit a .byte 42 directive that corresponds to an encoding.  If verbose
   /// assembly output is enabled, we output comments describing the encoding.
@@ -542,6 +543,12 @@ public:
     emitDwarfStringOffset(S.getEntry());
   }
 
+  /// Emit reference to a call site with a specified encoding
+  void EmitCallSiteOffset(const MCSymbol *Hi, const MCSymbol *Lo,
+                          unsigned Encoding) const;
+  /// Emit an integer value corresponding to the call site encoding
+  void EmitCallSiteValue(uint64_t Value, unsigned Encoding) const;
+
   /// Get the value for DW_AT_APPLE_isa. Zero if no isa encoding specified.
   virtual unsigned getISAEncoding() { return 0; }
 
@@ -589,20 +596,22 @@ public:
   virtual void PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
                             const char *Code) const;
 
+  /// Print the MachineOperand as a symbol. Targets with complex handling of
+  /// symbol references should override the base implementation.
+  virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS);
+
   /// Print the specified operand of MI, an INLINEASM instruction, using the
   /// specified assembler variant.  Targets should override this to format as
   /// appropriate.  This method can return true if the operand is erroneous.
   virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                               unsigned AsmVariant, const char *ExtraCode,
-                               raw_ostream &OS);
+                               const char *ExtraCode, raw_ostream &OS);
 
   /// Print the specified operand of MI, an INLINEASM instruction, using the
   /// specified assembler variant as an address. Targets should override this to
   /// format as appropriate.  This method can return true if the operand is
   /// erroneous.
   virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                                     unsigned AsmVariant, const char *ExtraCode,
-                                     raw_ostream &OS);
+                                     const char *ExtraCode, raw_ostream &OS);
 
   /// Let the target do anything it needs to do before emitting inlineasm.
   /// \p StartInfo - the subtarget info before parsing inline asm
@@ -617,6 +626,15 @@ public:
   virtual void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
                                 const MCSubtargetInfo *EndInfo) const;
 
+  /// This emits visibility information about symbol, if this is supported by
+  /// the target.
+  void EmitVisibility(MCSymbol *Sym, unsigned Visibility,
+                      bool IsDefinition = true) const;
+
+  /// This emits linkage information about \p GVSym based on \p GV, if this is
+  /// supported by the target.
+  void EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const;
+
 private:
   /// Private state for PrintSpecial()
   // Assign a unique ID to this machine instruction.
@@ -647,13 +665,6 @@ private:
   // Internal Implementation Details
   //===------------------------------------------------------------------===//
 
-  /// This emits visibility information about symbol, if this is supported by
-  /// the target.
-  void EmitVisibility(MCSymbol *Sym, unsigned Visibility,
-                      bool IsDefinition = true) const;
-
-  void EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const;
-
   void EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
                           const MachineBasicBlock *MBB, unsigned uid) const;
   void EmitLLVMUsedList(const ConstantArray *InitList);
diff --git a/include/llvm/CodeGen/AsmPrinterHandler.h b/include/llvm/CodeGen/AsmPrinterHandler.h
index a8b13200dd4e..affb558f2fa6 100644
--- a/include/llvm/CodeGen/AsmPrinterHandler.h
+++ b/include/llvm/CodeGen/AsmPrinterHandler.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/AsmPrinterHandler.h -----------------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/AtomicExpandUtils.h b/include/llvm/CodeGen/AtomicExpandUtils.h
index b1adf66e7ff4..8a46c6e00d22 100644
--- a/include/llvm/CodeGen/AtomicExpandUtils.h
+++ b/include/llvm/CodeGen/AtomicExpandUtils.h
@@ -1,9 +1,8 @@
 //===- AtomicExpandUtils.h - Utilities for expanding atomic instructions --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h
index f105d887c397..70bf670fdf0b 100644
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1,9 +1,8 @@
 //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -197,11 +196,12 @@ protected:
 public:
   /// \name Scalar TTI Implementations
   /// @{
-  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
-                                      unsigned BitWidth, unsigned AddressSpace,
-                                      unsigned Alignment, bool *Fast) const {
+  bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
+                                      unsigned AddressSpace, unsigned Alignment,
+                                      bool *Fast) const {
     EVT E = EVT::getIntegerVT(Context, BitWidth);
-    return getTLI()->allowsMisalignedMemoryAccesses(E, AddressSpace, Alignment, Fast);
+    return getTLI()->allowsMisalignedMemoryAccesses(
+        E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
   }
 
   bool hasBranchDivergence() { return false; }
@@ -293,12 +293,12 @@ public:
   }
 
   unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<const Value *> Arguments) {
-    return BaseT::getIntrinsicCost(IID, RetTy, Arguments);
+                            ArrayRef<const Value *> Arguments, const User *U) {
+    return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U);
   }
 
   unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<Type *> ParamTys) {
+                            ArrayRef<Type *> ParamTys, const User *U) {
     if (IID == Intrinsic::cttz) {
       if (getTLI()->isCheapToSpeculateCttz())
         return TargetTransformInfo::TCC_Basic;
@@ -311,7 +311,7 @@ public:
       return TargetTransformInfo::TCC_Expensive;
     }
 
-    return BaseT::getIntrinsicCost(IID, RetTy, ParamTys);
+    return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
   }
 
   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
@@ -414,6 +414,12 @@ public:
       if (TLI->isZExtFree(OpTy, Ty))
         return TargetTransformInfo::TCC_Free;
       return TargetTransformInfo::TCC_Basic;
+
+    case Instruction::AddrSpaceCast:
+      if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
+                                   Ty->getPointerAddressSpace()))
+        return TargetTransformInfo::TCC_Free;
+      return TargetTransformInfo::TCC_Basic;
     }
 
     return BaseT::getOperationCost(Opcode, Ty, OpTy);
@@ -421,6 +427,8 @@ public:
 
   unsigned getInliningThresholdMultiplier() { return 1; }
 
+  int getInlinerVectorBonusPercent() { return 150; }
+
   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                TTI::UnrollingPreferences &UP) {
     // This unrolling functionality is target independent, but to provide some
@@ -486,6 +494,13 @@ public:
     UP.BEInsns = 2;
   }
 
+  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+                                AssumptionCache &AC,
+                                TargetLibraryInfo *LibInfo,
+                                HardwareLoopInfo &HWLoopInfo) {
+    return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
+  }
+
   int getInstructionLatency(const Instruction *I) {
     if (isa<LoadInst>(I))
       return getST()->getSchedModel().DefaultLoadLatency;
@@ -657,7 +672,7 @@ public:
       return 0;
 
     if (Opcode == Instruction::AddrSpaceCast &&
-        TLI->isNoopAddrSpaceCast(Src->getPointerAddressSpace(),
+        TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
                                  Dst->getPointerAddressSpace()))
       return 0;
 
@@ -997,7 +1012,7 @@ public:
     // inside the loop.
     if (UseMaskForGaps)
       Cost += static_cast<T *>(this)->getArithmeticInstrCost(
-          BinaryOperator::And, MaskVT); 
+          BinaryOperator::And, MaskVT);
 
     return Cost;
   }
@@ -1058,8 +1073,8 @@ public:
     case Intrinsic::experimental_vector_reduce_and:
     case Intrinsic::experimental_vector_reduce_or:
     case Intrinsic::experimental_vector_reduce_xor:
-    case Intrinsic::experimental_vector_reduce_fadd:
-    case Intrinsic::experimental_vector_reduce_fmul:
+    case Intrinsic::experimental_vector_reduce_v2_fadd:
+    case Intrinsic::experimental_vector_reduce_v2_fmul:
     case Intrinsic::experimental_vector_reduce_smax:
     case Intrinsic::experimental_vector_reduce_smin:
     case Intrinsic::experimental_vector_reduce_fmax:
@@ -1116,6 +1131,9 @@ public:
   unsigned getIntrinsicInstrCost(
       Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
       unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
+    unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
+    auto *ConcreteTTI = static_cast<T *>(this);
+
     SmallVector<unsigned, 2> ISDs;
     unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
     switch (IID) {
@@ -1144,8 +1162,8 @@ public:
       if (ScalarCalls == 1)
         return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
 
-      unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
-          IID, ScalarRetTy, ScalarTys, FMF);
+      unsigned ScalarCost =
+          ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
 
       return ScalarCalls * ScalarCost + ScalarizationCost;
     }
@@ -1227,44 +1245,181 @@ public:
     case Intrinsic::sideeffect:
       return 0;
     case Intrinsic::masked_store:
-      return static_cast<T *>(this)
-          ->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, 0);
+      return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
+                                                0);
     case Intrinsic::masked_load:
-      return static_cast<T *>(this)
-          ->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
+      return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
     case Intrinsic::experimental_vector_reduce_add:
-      return static_cast<T *>(this)->getArithmeticReductionCost(
-          Instruction::Add, Tys[0], /*IsPairwiseForm=*/false);
+      return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
+                                                     /*IsPairwiseForm=*/false);
     case Intrinsic::experimental_vector_reduce_mul:
-      return static_cast<T *>(this)->getArithmeticReductionCost(
-          Instruction::Mul, Tys[0], /*IsPairwiseForm=*/false);
+      return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
+                                                     /*IsPairwiseForm=*/false);
     case Intrinsic::experimental_vector_reduce_and:
-      return static_cast<T *>(this)->getArithmeticReductionCost(
-          Instruction::And, Tys[0], /*IsPairwiseForm=*/false);
+      return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
+                                                     /*IsPairwiseForm=*/false);
     case Intrinsic::experimental_vector_reduce_or:
-      return static_cast<T *>(this)->getArithmeticReductionCost(
-          Instruction::Or, Tys[0], /*IsPairwiseForm=*/false);
+      return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
+                                                     /*IsPairwiseForm=*/false);
     case Intrinsic::experimental_vector_reduce_xor:
-      return static_cast<T *>(this)->getArithmeticReductionCost(
-          Instruction::Xor, Tys[0], /*IsPairwiseForm=*/false);
-    case Intrinsic::experimental_vector_reduce_fadd:
-      return static_cast<T *>(this)->getArithmeticReductionCost(
-          Instruction::FAdd, Tys[0], /*IsPairwiseForm=*/false);
-    case Intrinsic::experimental_vector_reduce_fmul:
-      return static_cast<T *>(this)->getArithmeticReductionCost(
-          Instruction::FMul, Tys[0], /*IsPairwiseForm=*/false);
+      return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
+                                                     /*IsPairwiseForm=*/false);
+    case Intrinsic::experimental_vector_reduce_v2_fadd:
+      return ConcreteTTI->getArithmeticReductionCost(
+          Instruction::FAdd, Tys[0],
+          /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
+                                     // reductions.
+    case Intrinsic::experimental_vector_reduce_v2_fmul:
+      return ConcreteTTI->getArithmeticReductionCost(
+          Instruction::FMul, Tys[0],
+          /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
+                                     // reductions.
     case Intrinsic::experimental_vector_reduce_smax:
     case Intrinsic::experimental_vector_reduce_smin:
     case Intrinsic::experimental_vector_reduce_fmax:
     case Intrinsic::experimental_vector_reduce_fmin:
-      return static_cast<T *>(this)->getMinMaxReductionCost(
+      return ConcreteTTI->getMinMaxReductionCost(
           Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
-          /*IsSigned=*/true);
+          /*IsUnsigned=*/true);
     case Intrinsic::experimental_vector_reduce_umax:
     case Intrinsic::experimental_vector_reduce_umin:
-      return static_cast<T *>(this)->getMinMaxReductionCost(
+      return ConcreteTTI->getMinMaxReductionCost(
           Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
-          /*IsSigned=*/false);
+          /*IsUnsigned=*/false);
+    case Intrinsic::sadd_sat:
+    case Intrinsic::ssub_sat: {
+      Type *CondTy = Type::getInt1Ty(RetTy->getContext());
+      if (RetVF > 1)
+        CondTy = VectorType::get(CondTy, RetVF);
+
+      Type *OpTy = StructType::create({RetTy, CondTy});
+      Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
+                                     ? Intrinsic::sadd_with_overflow
+                                     : Intrinsic::ssub_with_overflow;
+
+      // SatMax -> Overflow && SumDiff < 0
+      // SatMin -> Overflow && SumDiff >= 0
+      unsigned Cost = 0;
+      Cost += ConcreteTTI->getIntrinsicInstrCost(
+          OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
+      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
+                                              CondTy, nullptr);
+      Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
+                                                  CondTy, nullptr);
+      return Cost;
+    }
+    case Intrinsic::uadd_sat:
+    case Intrinsic::usub_sat: {
+      Type *CondTy = Type::getInt1Ty(RetTy->getContext());
+      if (RetVF > 1)
+        CondTy = VectorType::get(CondTy, RetVF);
+
+      Type *OpTy = StructType::create({RetTy, CondTy});
+      Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
+                                     ? Intrinsic::uadd_with_overflow
+                                     : Intrinsic::usub_with_overflow;
+
+      unsigned Cost = 0;
+      Cost += ConcreteTTI->getIntrinsicInstrCost(
+          OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
+      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
+                                              CondTy, nullptr);
+      return Cost;
+    }
+    case Intrinsic::smul_fix:
+    case Intrinsic::umul_fix: {
+      unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
+      Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
+      if (RetVF > 1)
+        ExtTy = VectorType::get(ExtTy, RetVF);
+
+      unsigned ExtOp =
+          IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+
+      unsigned Cost = 0;
+      Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
+      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
+      Cost +=
+          2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
+      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
+                                                  TTI::OK_AnyValue,
+                                                  TTI::OK_UniformConstantValue);
+      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
+                                                  TTI::OK_AnyValue,
+                                                  TTI::OK_UniformConstantValue);
+      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
+      return Cost;
+    }
+    case Intrinsic::sadd_with_overflow:
+    case Intrinsic::ssub_with_overflow: {
+      Type *SumTy = RetTy->getContainedType(0);
+      Type *OverflowTy = RetTy->getContainedType(1);
+      unsigned Opcode = IID == Intrinsic::sadd_with_overflow
+                            ? BinaryOperator::Add
+                            : BinaryOperator::Sub;
+
+      //   LHSSign -> LHS >= 0
+      //   RHSSign -> RHS >= 0
+      //   SumSign -> Sum >= 0
+      //
+      //   Add:
+      //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+      //   Sub:
+      //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+      unsigned Cost = 0;
+      Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
+      Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
+                                                  OverflowTy, nullptr);
+      Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
+                      BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
+      Cost +=
+          ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
+      return Cost;
+    }
+    case Intrinsic::uadd_with_overflow:
+    case Intrinsic::usub_with_overflow: {
+      Type *SumTy = RetTy->getContainedType(0);
+      Type *OverflowTy = RetTy->getContainedType(1);
+      unsigned Opcode = IID == Intrinsic::uadd_with_overflow
+                            ? BinaryOperator::Add
+                            : BinaryOperator::Sub;
+
+      unsigned Cost = 0;
+      Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
+      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
+                                              OverflowTy, nullptr);
+      return Cost;
+    }
+    case Intrinsic::smul_with_overflow:
+    case Intrinsic::umul_with_overflow: {
+      Type *MulTy = RetTy->getContainedType(0);
+      Type *OverflowTy = RetTy->getContainedType(1);
+      unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
+      Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
+      if (MulTy->isVectorTy())
+        ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() );
+
+      unsigned ExtOp =
+          IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+
+      unsigned Cost = 0;
+      Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
+      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
+      Cost +=
+          2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
+      Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
+                                                  TTI::OK_AnyValue,
+                                                  TTI::OK_UniformConstantValue);
+
+      if (IID == Intrinsic::smul_with_overflow)
+        Cost += ConcreteTTI->getArithmeticInstrCost(
+            Instruction::AShr, MulTy, TTI::OK_AnyValue,
+            TTI::OK_UniformConstantValue);
+
+      Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
+                                              OverflowTy, nullptr);
+      return Cost;
+    }
     case Intrinsic::ctpop:
       ISDs.push_back(ISD::CTPOP);
       // In case of legalization use TCC_Expensive. This is cheaper than a
@@ -1305,17 +1460,16 @@ public:
     if (MinLegalCostI != LegalCost.end())
       return *MinLegalCostI;
 
-    auto MinCustomCostI = std::min_element(CustomCost.begin(), CustomCost.end());
+    auto MinCustomCostI =
+        std::min_element(CustomCost.begin(), CustomCost.end());
     if (MinCustomCostI != CustomCost.end())
       return *MinCustomCostI;
 
     // If we can't lower fmuladd into an FMA estimate the cost as a floating
     // point mul followed by an add.
     if (IID == Intrinsic::fmuladd)
-      return static_cast<T *>(this)
-                 ->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
-             static_cast<T *>(this)
-                 ->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
+      return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
+             ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
 
     // Else, assume that we need to scalarize this intrinsic. For math builtins
     // this will emit a costly libcall, adding call overhead and spills. Make it
@@ -1333,7 +1487,7 @@ public:
           Ty = Ty->getScalarType();
         ScalarTys.push_back(Ty);
       }
-      unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
+      unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
           IID, RetTy->getScalarType(), ScalarTys, FMF);
       for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
         if (Tys[i]->isVectorTy()) {
diff --git a/include/llvm/CodeGen/BuiltinGCs.h b/include/llvm/CodeGen/BuiltinGCs.h
index 1767922fb5ac..d44183dab0f7 100644
--- a/include/llvm/CodeGen/BuiltinGCs.h
+++ b/include/llvm/CodeGen/BuiltinGCs.h
@@ -1,9 +1,8 @@
 //===-- BuiltinGCs.h - Garbage collector linkage hacks --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/CSEConfigBase.h b/include/llvm/CodeGen/CSEConfigBase.h
new file mode 100644
index 000000000000..70b5e5c17eb1
--- /dev/null
+++ b/include/llvm/CodeGen/CSEConfigBase.h
@@ -0,0 +1,28 @@
+//===- CSEConfigBase.h - A CSEConfig interface ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CSECONFIG_BASE_H
+#define LLVM_CODEGEN_CSECONFIG_BASE_H
+
+namespace llvm {
+// Class representing some configuration that can be done during GlobalISel's
+// CSEInfo analysis. We define it here because TargetPassConfig can't depend on
+// the GlobalISel library, and so we use this in the interface between them
+// so that the derived classes in GISel can reference generic opcodes.
+class CSEConfigBase {
+public:
+  virtual ~CSEConfigBase() = default;
+  // Hook for defining which Generic instructions should be CSEd.
+  // GISelCSEInfo currently only calls this hook when dealing with generic
+  // opcodes.
+  virtual bool shouldCSEOpc(unsigned Opc) { return false; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_CSECONFIG_BASE_H
diff --git a/include/llvm/CodeGen/CalcSpillWeights.h b/include/llvm/CodeGen/CalcSpillWeights.h
index f85767f1fc11..9b8b7324f30a 100644
--- a/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/include/llvm/CodeGen/CalcSpillWeights.h
@@ -1,9 +1,8 @@
 //===- lib/CodeGen/CalcSpillWeights.h ---------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index efcf80ba0b4e..aa339e1cc913 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -1,9 +1,8 @@
 //===- llvm/CallingConvLower.h - Calling Conventions ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -146,7 +145,7 @@ public:
 
   bool needsCustom() const { return isCustom; }
 
-  unsigned getLocReg() const { assert(isRegLoc()); return Loc; }
+  Register getLocReg() const { assert(isRegLoc()); return Loc; }
   unsigned getLocMemOffset() const { assert(isMemLoc()); return Loc; }
   unsigned getExtraInfo() const { return Loc; }
   MVT getLocVT() const { return LocVT; }
@@ -557,7 +556,7 @@ public:
 
     // Sort the locations of the arguments according to their original position.
     SmallVector<CCValAssign, 16> TmpArgLocs;
-    std::swap(TmpArgLocs, Locs);
+    TmpArgLocs.swap(Locs);
     auto B = TmpArgLocs.begin(), E = TmpArgLocs.end();
     std::merge(B, B + NumFirstPassLocs, B + NumFirstPassLocs, E,
                std::back_inserter(Locs),
diff --git a/include/llvm/CodeGen/CommandFlags.inc b/include/llvm/CodeGen/CommandFlags.inc
index 568d329a5e8c..cb69e9f61405 100644
--- a/include/llvm/CodeGen/CommandFlags.inc
+++ b/include/llvm/CodeGen/CommandFlags.inc
@@ -1,9 +1,8 @@
 //===-- CommandFlags.h - Command Line Flags Interface -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -272,6 +271,11 @@ static cl::opt<bool>
     EnableAddrsig("addrsig", cl::desc("Emit an address-significance table"),
                   cl::init(false));
 
+static cl::opt<bool>
+    EnableDebugEntryValues("debug-entry-values",
+                           cl::desc("Emit debug info about parameter's entry values"),
+                           cl::init(false));
+
 // Common utility function tightly tied to the options listed here. Initializes
 // a TargetOptions object with CodeGen flags and returns it.
 static TargetOptions InitTargetOptionsFromCodeGenFlags() {
@@ -301,6 +305,7 @@ static TargetOptions InitTargetOptionsFromCodeGenFlags() {
   Options.ExceptionModel = ExceptionModel;
   Options.EmitStackSizeSection = EnableStackSizeSection;
   Options.EmitAddrsig = EnableAddrsig;
+  Options.EnableDebugEntryValues = EnableDebugEntryValues;
 
   Options.MCOptions = InitMCTargetOptionsFromFlags();
 
diff --git a/include/llvm/CodeGen/CostTable.h b/include/llvm/CodeGen/CostTable.h
index 48ad76971520..52f3bfaea180 100644
--- a/include/llvm/CodeGen/CostTable.h
+++ b/include/llvm/CodeGen/CostTable.h
@@ -1,9 +1,8 @@
 //===-- CostTable.h - Instruction Cost Table handling -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/include/llvm/CodeGen/DAGCombine.h b/include/llvm/CodeGen/DAGCombine.h
index 8b5919005451..944187341455 100644
--- a/include/llvm/CodeGen/DAGCombine.h
+++ b/include/llvm/CodeGen/DAGCombine.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/DAGCombine.h  ------- SelectionDAG Nodes ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h
index d3aabe22f216..cf58ee0cabea 100644
--- a/include/llvm/CodeGen/DFAPacketizer.h
+++ b/include/llvm/CodeGen/DFAPacketizer.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/DFAPacketizer.h - DFA Packetizer for VLIW ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This class implements a deterministic finite automaton (DFA) based
diff --git a/include/llvm/CodeGen/DIE.h b/include/llvm/CodeGen/DIE.h
index 7d486b1df56d..684f9e40ca5a 100644
--- a/include/llvm/CodeGen/DIE.h
+++ b/include/llvm/CodeGen/DIE.h
@@ -1,9 +1,8 @@
 //===- lib/CodeGen/DIE.h - DWARF Info Entries -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -39,6 +38,7 @@ namespace llvm {
 class AsmPrinter;
 class DIE;
 class DIEUnit;
+class DwarfCompileUnit;
 class MCExpr;
 class MCSection;
 class MCSymbol;
@@ -230,6 +230,25 @@ public:
   void print(raw_ostream &O) const;
 };
 
+//===--------------------------------------------------------------------===//
+/// A BaseTypeRef DIE.
+class DIEBaseTypeRef {
+  const DwarfCompileUnit *CU;
+  const uint64_t Index;
+  static constexpr unsigned ULEB128PadSize = 4;
+
+public:
+  explicit DIEBaseTypeRef(const DwarfCompileUnit *TheCU, uint64_t Idx)
+    : CU(TheCU), Index(Idx) {}
+
+  /// EmitValue - Emit base type reference.
+  void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+  /// SizeOf - Determine size of the base type reference in bytes.
+  unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
+
+  void print(raw_ostream &O) const;
+};
+
 //===--------------------------------------------------------------------===//
 /// A simple label difference DIE.
 ///
@@ -350,7 +369,7 @@ private:
   /// should be stored by reference instead of by value.
   using ValTy = AlignedCharArrayUnion<DIEInteger, DIEString, DIEExpr, DIELabel,
                                       DIEDelta *, DIEEntry, DIEBlock *,
-                                      DIELoc *, DIELocList>;
+                                      DIELoc *, DIELocList, DIEBaseTypeRef *>;
 
   static_assert(sizeof(ValTy) <= sizeof(uint64_t) ||
                     sizeof(ValTy) <= sizeof(void *),
@@ -502,6 +521,18 @@ struct IntrusiveBackListBase {
     }
     Last = &N;
   }
+
+  void push_front(Node &N) {
+    assert(N.Next.getPointer() == &N && "Expected unlinked node");
+    assert(N.Next.getInt() == true && "Expected unlinked node");
+
+    if (Last) {
+      N.Next.setPointerAndInt(Last->Next.getPointer(), false);
+      Last->Next.setPointerAndInt(&N, true);
+    } else {
+      Last = &N;
+    }
+  }
 };
 
 template <class T> class IntrusiveBackList : IntrusiveBackListBase {
@@ -509,8 +540,15 @@ public:
   using IntrusiveBackListBase::empty;
 
   void push_back(T &N) { IntrusiveBackListBase::push_back(N); }
+  void push_front(T &N) { IntrusiveBackListBase::push_front(N); }
   T &back() { return *static_cast<T *>(Last); }
   const T &back() const { return *static_cast<T *>(Last); }
+  T &front() {
+    return *static_cast<T *>(Last ? Last->Next.getPointer() : nullptr);
+  }
+  const T &front() const {
+    return *static_cast<T *>(Last ? Last->Next.getPointer() : nullptr);
+  }
 
   class const_iterator;
   class iterator
@@ -760,7 +798,7 @@ public:
   ///
   /// \returns the DIEUnit that represents the compile or type unit that owns
   /// this DIE, or NULL if this DIE hasn't been added to a unit DIE.
-  const DIEUnit *getUnit() const;
+  DIEUnit *getUnit() const;
 
   void setOffset(unsigned O) { Offset = O; }
   void setSize(unsigned S) { Size = S; }
@@ -773,6 +811,13 @@ public:
     return Children.back();
   }
 
+  DIE &addChildFront(DIE *Child) {
+    assert(!Child->getParent() && "Child should be orphaned");
+    Child->Owner = this;
+    Children.push_front(*Child);
+    return Children.front();
+  }
+
   /// Find a value in the DIE with the attribute given.
   ///
   /// Returns a default-constructed DIEValue (where \a DIEValue::getType()
@@ -800,7 +845,7 @@ class DIEUnit {
   const uint16_t Version; /// The Dwarf version number for this unit.
   const uint8_t AddrSize; /// The size in bytes of an address for this unit.
 protected:
-  ~DIEUnit() = default;
+  virtual ~DIEUnit() = default;
 
 public:
   DIEUnit(uint16_t Version, uint8_t AddrSize, dwarf::Tag UnitTag);
diff --git a/include/llvm/CodeGen/DIEValue.def b/include/llvm/CodeGen/DIEValue.def
index a3fce9b1d20c..92afeb3868b4 100644
--- a/include/llvm/CodeGen/DIEValue.def
+++ b/include/llvm/CodeGen/DIEValue.def
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/DIEValue.def - DIEValue types ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -35,6 +34,7 @@ HANDLE_DIEVALUE_SMALL(Integer)
 HANDLE_DIEVALUE_SMALL(String)
 HANDLE_DIEVALUE_SMALL(Expr)
 HANDLE_DIEVALUE_SMALL(Label)
+HANDLE_DIEVALUE_LARGE(BaseTypeRef)
 HANDLE_DIEVALUE_LARGE(Delta)
 HANDLE_DIEVALUE_SMALL(Entry)
 HANDLE_DIEVALUE_LARGE(Block)
diff --git a/include/llvm/CodeGen/DbgEntityHistoryCalculator.h b/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
index befc28f084e7..7eec75bc81bf 100644
--- a/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
+++ b/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/DbgEntityHistoryCalculator.h ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -11,6 +10,7 @@
 #define LLVM_CODEGEN_DBGVALUEHISTORYCALCULATOR_H
 
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include <utility>
@@ -22,35 +22,76 @@ class MachineFunction;
 class MachineInstr;
 class TargetRegisterInfo;
 
-// For each user variable, keep a list of instruction ranges where this variable
-// is accessible. The variables are listed in order of appearance.
+/// For each user variable, keep a list of instruction ranges where this
+/// variable is accessible. The variables are listed in order of appearance.
 class DbgValueHistoryMap {
-  // Each instruction range starts with a DBG_VALUE instruction, specifying the
-  // location of a variable, which is assumed to be valid until the end of the
-  // range. If end is not specified, location is valid until the start
-  // instruction of the next instruction range, or until the end of the
-  // function.
 public:
-  using InstrRange = std::pair<const MachineInstr *, const MachineInstr *>;
-  using InstrRanges = SmallVector<InstrRange, 4>;
+  /// Index in the entry vector.
+  typedef size_t EntryIndex;
+
+  /// Special value to indicate that an entry is valid until the end of the
+  /// function.
+  static const EntryIndex NoEntry = std::numeric_limits<EntryIndex>::max();
+
+  /// Specifies a change in a variable's debug value history.
+  ///
+  /// There exist two types of entries:
+  ///
+  /// * Debug value entry:
+  ///
+  ///   A new debug value becomes live. If the entry's \p EndIndex is \p NoEntry,
+  ///   the value is valid until the end of the function. For other values, the
+  ///   index points to the entry in the entry vector that ends this debug
+  ///   value. The ending entry can either be an overlapping debug value, or
+  ///   an instruction that clobbers the value.
+  ///
+  /// * Clobbering entry:
+  ///
+  ///   This entry's instruction clobbers one or more preceding
+  ///   register-described debug values that have their end index
+  ///   set to this entry's position in the entry vector.
+  class Entry {
+  public:
+    enum EntryKind { DbgValue, Clobber };
+
+    Entry(const MachineInstr *Instr, EntryKind Kind)
+        : Instr(Instr, Kind), EndIndex(NoEntry) {}
+
+    const MachineInstr *getInstr() const { return Instr.getPointer(); }
+    EntryIndex getEndIndex() const { return EndIndex; }
+    EntryKind getEntryKind() const { return Instr.getInt(); }
+
+    bool isClobber() const { return getEntryKind() == Clobber; }
+    bool isDbgValue() const { return getEntryKind() == DbgValue; }
+    bool isClosed() const { return EndIndex != NoEntry; }
+
+    void endEntry(EntryIndex EndIndex);
+
+  private:
+    PointerIntPair<const MachineInstr *, 1, EntryKind> Instr;
+    EntryIndex EndIndex;
+  };
+  using Entries = SmallVector<Entry, 4>;
   using InlinedEntity = std::pair<const DINode *, const DILocation *>;
-  using InstrRangesMap = MapVector<InlinedEntity, InstrRanges>;
+  using EntriesMap = MapVector<InlinedEntity, Entries>;
 
 private:
-  InstrRangesMap VarInstrRanges;
+  EntriesMap VarEntries;
 
 public:
-  void startInstrRange(InlinedEntity Var, const MachineInstr &MI);
-  void endInstrRange(InlinedEntity Var, const MachineInstr &MI);
-
-  // Returns register currently describing @Var. If @Var is currently
-  // unaccessible or is not described by a register, returns 0.
-  unsigned getRegisterForVar(InlinedEntity Var) const;
-
-  bool empty() const { return VarInstrRanges.empty(); }
-  void clear() { VarInstrRanges.clear(); }
-  InstrRangesMap::const_iterator begin() const { return VarInstrRanges.begin(); }
-  InstrRangesMap::const_iterator end() const { return VarInstrRanges.end(); }
+  bool startDbgValue(InlinedEntity Var, const MachineInstr &MI,
+                     EntryIndex &NewIndex);
+  EntryIndex startClobber(InlinedEntity Var, const MachineInstr &MI);
+
+  Entry &getEntry(InlinedEntity Var, EntryIndex Index) {
+    auto &Entries = VarEntries[Var];
+    return Entries[Index];
+  }
+
+  bool empty() const { return VarEntries.empty(); }
+  void clear() { VarEntries.clear(); }
+  EntriesMap::const_iterator begin() const { return VarEntries.begin(); }
+  EntriesMap::const_iterator end() const { return VarEntries.end(); }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   LLVM_DUMP_METHOD void dump() const;
diff --git a/include/llvm/CodeGen/DebugHandlerBase.h b/include/llvm/CodeGen/DebugHandlerBase.h
index 4f0d14d317f2..4008d597395e 100644
--- a/include/llvm/CodeGen/DebugHandlerBase.h
+++ b/include/llvm/CodeGen/DebugHandlerBase.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/DebugHandlerBase.h -----------------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -130,7 +129,7 @@ public:
   const MCExpr *getFunctionLocalOffsetAfterInsn(const MachineInstr *MI);
 
   /// If this type is derived from a base type then return base type size.
-  static uint64_t getBaseTypeSize(const DITypeRef TyRef);
+  static uint64_t getBaseTypeSize(const DIType *Ty);
 };
 
 }
diff --git a/include/llvm/CodeGen/DwarfStringPoolEntry.h b/include/llvm/CodeGen/DwarfStringPoolEntry.h
index 8b1a7af17bbf..e189352a7b2d 100644
--- a/include/llvm/CodeGen/DwarfStringPoolEntry.h
+++ b/include/llvm/CodeGen/DwarfStringPoolEntry.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/DwarfStringPoolEntry.h - String pool entry --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/CodeGen/EdgeBundles.h b/include/llvm/CodeGen/EdgeBundles.h
index c31fad246c96..28cdf54e0575 100644
--- a/include/llvm/CodeGen/EdgeBundles.h
+++ b/include/llvm/CodeGen/EdgeBundles.h
@@ -1,9 +1,8 @@
 //===-------- EdgeBundles.h - Bundles of CFG edges --------------*- c++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/ExecutionDomainFix.h b/include/llvm/CodeGen/ExecutionDomainFix.h
index 338c214dd073..6836678e2101 100644
--- a/include/llvm/CodeGen/ExecutionDomainFix.h
+++ b/include/llvm/CodeGen/ExecutionDomainFix.h
@@ -1,9 +1,8 @@
 //==-- llvm/CodeGen/ExecutionDomainFix.h - Execution Domain Fix -*- C++ -*--==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/ExpandReductions.h b/include/llvm/CodeGen/ExpandReductions.h
index c6aaaad967b3..5dbed07873c1 100644
--- a/include/llvm/CodeGen/ExpandReductions.h
+++ b/include/llvm/CodeGen/ExpandReductions.h
@@ -1,9 +1,8 @@
 //===----- ExpandReductions.h - Expand experimental reduction intrinsics --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 865d8a88b8cc..f09b59daf4dd 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -1,9 +1,8 @@
 //===- FastISel.h - Definition of the FastISel class ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -528,7 +527,7 @@ protected:
   /// Select and emit code for a binary operator instruction, which has
   /// an opcode which directly corresponds to the given ISD opcode.
   bool selectBinaryOp(const User *I, unsigned ISDOpcode);
-  bool selectFNeg(const User *I);
+  bool selectFNeg(const User *I, const Value *In);
   bool selectGetElementPtr(const User *I);
   bool selectStackmap(const CallInst *I);
   bool selectPatchpoint(const CallInst *I);
diff --git a/include/llvm/CodeGen/FaultMaps.h b/include/llvm/CodeGen/FaultMaps.h
index 55e25c9823b1..a1e2349c413e 100644
--- a/include/llvm/CodeGen/FaultMaps.h
+++ b/include/llvm/CodeGen/FaultMaps.h
@@ -1,9 +1,8 @@
 //===- FaultMaps.h - The "FaultMaps" section --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h
index 7c658515de09..fb60191abd3a 100644
--- a/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -1,9 +1,8 @@
 //===- FunctionLoweringInfo.h - Lower functions from LLVM IR ---*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,13 +13,14 @@
 
 #ifndef LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
 #define LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
-
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -57,6 +57,7 @@ public:
   const TargetLowering *TLI;
   MachineRegisterInfo *RegInfo;
   BranchProbabilityInfo *BPI;
+  const LegacyDivergenceAnalysis *DA;
   /// CanLowerReturn - true iff the function's return value can be lowered to
   /// registers.
   bool CanLowerReturn;
@@ -71,48 +72,6 @@ public:
   /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
   DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;
 
-  /// A map from swifterror value in a basic block to the virtual register it is
-  /// currently represented by.
-  DenseMap<std::pair<const MachineBasicBlock *, const Value *>, unsigned>
-      SwiftErrorVRegDefMap;
-
-  /// A list of upward exposed vreg uses that need to be satisfied by either a
-  /// copy def or a phi node at the beginning of the basic block representing
-  /// the predecessor(s) swifterror value.
-  DenseMap<std::pair<const MachineBasicBlock *, const Value *>, unsigned>
-      SwiftErrorVRegUpwardsUse;
-
-  /// A map from instructions that define/use a swifterror value to the virtual
-  /// register that represents that def/use.
-  llvm::DenseMap<PointerIntPair<const Instruction *, 1, bool>, unsigned>
-      SwiftErrorVRegDefUses;
-
-  /// The swifterror argument of the current function.
-  const Value *SwiftErrorArg;
-
-  using SwiftErrorValues = SmallVector<const Value*, 1>;
-  /// A function can only have a single swifterror argument. And if it does
-  /// have a swifterror argument, it must be the first entry in
-  /// SwiftErrorVals.
-  SwiftErrorValues SwiftErrorVals;
-
-  /// Get or create the swifterror value virtual register in
-  /// SwiftErrorVRegDefMap for this basic block.
-  unsigned getOrCreateSwiftErrorVReg(const MachineBasicBlock *,
-                                     const Value *);
-
-  /// Set the swifterror virtual register in the SwiftErrorVRegDefMap for this
-  /// basic block.
-  void setCurrentSwiftErrorVReg(const MachineBasicBlock *MBB, const Value *,
-                                unsigned);
-
-  /// Get or create the swifterror value virtual register for a def of a
-  /// swifterror by an instruction.
-  std::pair<unsigned, bool> getOrCreateSwiftErrorVRegDefAt(const Instruction *);
-  std::pair<unsigned, bool>
-  getOrCreateSwiftErrorVRegUseAt(const Instruction *, const MachineBasicBlock *,
-                                 const Value *);
-
   /// ValueMap - Since we emit code for the function a basic block at a time,
   /// we must remember which virtual registers hold the values for
   /// cross-basic-block values.
@@ -175,6 +134,10 @@ public:
   /// function arguments that are inserted after scheduling is completed.
   SmallVector<MachineInstr*, 8> ArgDbgValues;
 
+  /// Bitvector with a bit set if corresponding argument is described in
+  /// ArgDbgValues. Using arg numbers according to Argument numbering.
+  BitVector DescribedArgs;
+
   /// RegFixups - Registers which need to be replaced after isel is done.
   DenseMap<unsigned, unsigned> RegFixups;
 
@@ -236,9 +199,11 @@ public:
     return ValueMap.count(V);
   }
 
-  unsigned CreateReg(MVT VT);
+  unsigned CreateReg(MVT VT, bool isDivergent = false);
+
+  unsigned CreateRegs(const Value *V);
 
-  unsigned CreateRegs(Type *Ty);
+  unsigned CreateRegs(Type *Ty, bool isDivergent = false);
 
   unsigned InitializeRegForValue(const Value *V) {
     // Tokens never live in vregs.
@@ -247,7 +212,7 @@ public:
     unsigned &R = ValueMap[V];
     assert(R == 0 && "Already initialized this value register!");
     assert(VirtReg2Value.empty());
-    return R = CreateRegs(V->getType());
+    return R = CreateRegs(V);
   }
 
   /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h
index 7fb27202c122..77cd356c49dd 100644
--- a/include/llvm/CodeGen/GCMetadata.h
+++ b/include/llvm/CodeGen/GCMetadata.h
@@ -1,9 +1,8 @@
 //===- GCMetadata.h - Garbage collector metadata ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/GCMetadataPrinter.h b/include/llvm/CodeGen/GCMetadataPrinter.h
index 5f1efb2ce02c..f9527c9f8752 100644
--- a/include/llvm/CodeGen/GCMetadataPrinter.h
+++ b/include/llvm/CodeGen/GCMetadataPrinter.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/GCMetadataPrinter.h - Prints asm GC tables --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/GCStrategy.h b/include/llvm/CodeGen/GCStrategy.h
index 5a60cd7cb823..c5731528da4e 100644
--- a/include/llvm/CodeGen/GCStrategy.h
+++ b/include/llvm/CodeGen/GCStrategy.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/GCStrategy.h - Garbage collection -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/GlobalISel/CSEInfo.h b/include/llvm/CodeGen/GlobalISel/CSEInfo.h
index ce2d285a99e5..5a44e67992ad 100644
--- a/include/llvm/CodeGen/GlobalISel/CSEInfo.h
+++ b/include/llvm/CodeGen/GlobalISel/CSEInfo.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/GlobalISel/CSEInfo.h ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,6 +13,7 @@
 #define LLVM_CODEGEN_GLOBALISEL_CSEINFO_H
 
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/CodeGen/CSEConfigBase.h"
 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
 #include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
@@ -37,25 +37,27 @@ public:
   void Profile(FoldingSetNodeID &ID);
 };
 
-// Class representing some configuration that can be done during CSE analysis.
-// Currently it only supports shouldCSE method that each pass can set.
-class CSEConfig {
+// A CSE config for fully optimized builds.
+class CSEConfigFull : public CSEConfigBase {
 public:
-  virtual ~CSEConfig() = default;
-  // Hook for defining which Generic instructions should be CSEd.
-  // GISelCSEInfo currently only calls this hook when dealing with generic
-  // opcodes.
-  virtual bool shouldCSEOpc(unsigned Opc);
+  virtual ~CSEConfigFull() = default;
+  virtual bool shouldCSEOpc(unsigned Opc) override;
 };
 
-// TODO: Find a better place for this.
 // Commonly used for O0 config.
-class CSEConfigConstantOnly : public CSEConfig {
+class CSEConfigConstantOnly : public CSEConfigBase {
 public:
   virtual ~CSEConfigConstantOnly() = default;
   virtual bool shouldCSEOpc(unsigned Opc) override;
 };
 
+// Returns the standard expected CSEConfig for the given optimization level.
+// We have this logic here so targets can make use of it from their derived
+// TargetPassConfig, but can't put this logic into TargetPassConfig directly
+// because the CodeGen library can't depend on GlobalISel.
+std::unique_ptr<CSEConfigBase>
+getStandardCSEConfigForOpt(CodeGenOpt::Level Level);
+
 /// The CSE Analysis object.
 /// This installs itself as a delegate to the MachineFunction to track
 /// new instructions as well as deletions. It however will not be able to
@@ -74,7 +76,7 @@ class GISelCSEInfo : public GISelChangeObserver {
   FoldingSet<UniqueMachineInstr> CSEMap;
   MachineRegisterInfo *MRI = nullptr;
   MachineFunction *MF = nullptr;
-  std::unique_ptr<CSEConfig> CSEOpt;
+  std::unique_ptr<CSEConfigBase> CSEOpt;
   /// Keep a cache of UniqueInstrs for each MachineInstr. In GISel,
   /// often instructions are mutated (while their ID has completely changed).
   /// Whenever mutation happens, invalidate the UniqueMachineInstr for the
@@ -139,7 +141,9 @@ public:
 
   void releaseMemory();
 
-  void setCSEConfig(std::unique_ptr<CSEConfig> Opt) { CSEOpt = std::move(Opt); }
+  void setCSEConfig(std::unique_ptr<CSEConfigBase> Opt) {
+    CSEOpt = std::move(Opt);
+  }
 
   bool shouldCSE(unsigned Opc) const;
 
@@ -199,11 +203,12 @@ class GISelCSEAnalysisWrapper {
   bool AlreadyComputed = false;
 
 public:
-  /// Takes a CSEConfig object that defines what opcodes get CSEd.
+  /// Takes a CSEConfigBase object that defines what opcodes get CSEd.
   /// If CSEConfig is already set, and the CSE Analysis has been preserved,
   /// it will not use the new CSEOpt(use Recompute to force using the new
   /// CSEOpt).
-  GISelCSEInfo &get(std::unique_ptr<CSEConfig> CSEOpt, bool ReCompute = false);
+  GISelCSEInfo &get(std::unique_ptr<CSEConfigBase> CSEOpt,
+                    bool ReCompute = false);
   void setMF(MachineFunction &MFunc) { MF = &MFunc; }
   void setComputed(bool Computed) { AlreadyComputed = Computed; }
   void releaseMemory() { Info.releaseMemory(); }
diff --git a/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h b/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h
index a8fb736ebbb5..4f95335db74b 100644
--- a/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h
+++ b/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/GlobalISel/CSEMIRBuilder.h  --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/CodeGen/GlobalISel/CallLowering.h b/include/llvm/CodeGen/GlobalISel/CallLowering.h
index ab498e8f070b..d717121ad78e 100644
--- a/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/GlobalISel/CallLowering.h - Call lowering ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -16,6 +15,7 @@
 #define LLVM_CODEGEN_GLOBALISEL_CALLLOWERING_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/TargetCallingConv.h"
 #include "llvm/IR/CallSite.h"
@@ -27,6 +27,7 @@
 
 namespace llvm {
 
+class CCState;
 class DataLayout;
 class Function;
 class MachineIRBuilder;
@@ -43,14 +44,19 @@ class CallLowering {
   virtual void anchor();
 public:
   struct ArgInfo {
-    unsigned Reg;
+    SmallVector<Register, 4> Regs;
     Type *Ty;
     ISD::ArgFlagsTy Flags;
     bool IsFixed;
 
-    ArgInfo(unsigned Reg, Type *Ty, ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy{},
-            bool IsFixed = true)
-        : Reg(Reg), Ty(Ty), Flags(Flags), IsFixed(IsFixed) {}
+    ArgInfo(ArrayRef<Register> Regs, Type *Ty,
+            ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy{}, bool IsFixed = true)
+        : Regs(Regs.begin(), Regs.end()), Ty(Ty), Flags(Flags),
+          IsFixed(IsFixed) {
+      // FIXME: We should have just one way of saying "no register".
+      assert((Ty->isVoidTy() == (Regs.empty() || Regs[0] == 0)) &&
+             "only void types should have no register");
+    }
   };
 
   /// Argument handling is mostly uniform between the four places that
@@ -66,24 +72,28 @@ public:
 
     virtual ~ValueHandler() = default;
 
+    /// Returns true if the handler is dealing with formal arguments,
+    /// not with return values etc.
+    virtual bool isArgumentHandler() const { return false; }
+
     /// Materialize a VReg containing the address of the specified
     /// stack-based object. This is either based on a FrameIndex or
     /// direct SP manipulation, depending on the context. \p MPO
     /// should be initialized to an appropriate description of the
     /// address created.
-    virtual unsigned getStackAddress(uint64_t Size, int64_t Offset,
+    virtual Register getStackAddress(uint64_t Size, int64_t Offset,
                                      MachinePointerInfo &MPO) = 0;
 
     /// The specified value has been assigned to a physical register,
     /// handle the appropriate COPY (either to or from) and mark any
     /// relevant uses/defines as needed.
-    virtual void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+    virtual void assignValueToReg(Register ValVReg, Register PhysReg,
                                   CCValAssign &VA) = 0;
 
     /// The specified value has been assigned to a stack
     /// location. Load or store it there, with appropriate extension
     /// if necessary.
-    virtual void assignValueToAddress(unsigned ValVReg, unsigned Addr,
+    virtual void assignValueToAddress(Register ValVReg, Register Addr,
                                       uint64_t Size, MachinePointerInfo &MPO,
                                       CCValAssign &VA) = 0;
 
@@ -98,7 +108,7 @@ public:
       llvm_unreachable("Custom values not supported");
     }
 
-    unsigned extendRegister(unsigned ValReg, CCValAssign &VA);
+    Register extendRegister(Register ValReg, CCValAssign &VA);
 
     virtual bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
                            CCValAssign::LocInfo LocInfo, const ArgInfo &Info,
@@ -130,39 +140,83 @@ protected:
   void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL,
                    const FuncInfoTy &FuncInfo) const;
 
+  /// Generate instructions for packing \p SrcRegs into one big register
+  /// corresponding to the aggregate type \p PackedTy.
+  ///
+  /// \param SrcRegs should contain one virtual register for each base type in
+  ///                \p PackedTy, as returned by computeValueLLTs.
+  ///
+  /// \return The packed register.
+  Register packRegs(ArrayRef<Register> SrcRegs, Type *PackedTy,
+                    MachineIRBuilder &MIRBuilder) const;
+
+  /// Generate instructions for unpacking \p SrcReg into the \p DstRegs
+  /// corresponding to the aggregate type \p PackedTy.
+  ///
+  /// \param DstRegs should contain one virtual register for each base type in
+  ///        \p PackedTy, as returned by computeValueLLTs.
+  void unpackRegs(ArrayRef<Register> DstRegs, Register SrcReg, Type *PackedTy,
+                  MachineIRBuilder &MIRBuilder) const;
+
   /// Invoke Handler::assignArg on each of the given \p Args and then use
   /// \p Callback to move them to the assigned locations.
   ///
   /// \return True if everything has succeeded, false otherwise.
   bool handleAssignments(MachineIRBuilder &MIRBuilder, ArrayRef<ArgInfo> Args,
                          ValueHandler &Handler) const;
-
+  bool handleAssignments(CCState &CCState,
+                         SmallVectorImpl<CCValAssign> &ArgLocs,
+                         MachineIRBuilder &MIRBuilder, ArrayRef<ArgInfo> Args,
+                         ValueHandler &Handler) const;
 public:
   CallLowering(const TargetLowering *TLI) : TLI(TLI) {}
   virtual ~CallLowering() = default;
 
+  /// \return true if the target is capable of handling swifterror values that
+  /// have been promoted to a specified register. The extended versions of
+  /// lowerReturn and lowerCall should be implemented.
+  virtual bool supportSwiftError() const {
+    return false;
+  }
+
   /// This hook must be implemented to lower outgoing return values, described
   /// by \p Val, into the specified virtual registers \p VRegs.
   /// This hook is used by GlobalISel.
   ///
+  /// \p SwiftErrorVReg is non-zero if the function has a swifterror parameter
+  /// that needs to be implicitly returned.
+  ///
   /// \return True if the lowering succeeds, false otherwise.
   virtual bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
-                           ArrayRef<unsigned> VRegs) const {
+                           ArrayRef<Register> VRegs,
+                           Register SwiftErrorVReg) const {
+    if (!supportSwiftError()) {
+      assert(SwiftErrorVReg == 0 && "attempt to use unsupported swifterror");
+      return lowerReturn(MIRBuilder, Val, VRegs);
+    }
+    return false;
+  }
+
+  /// This hook behaves as the extended lowerReturn function, but for targets
+  /// that do not support swifterror value promotion.
+  virtual bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+                           ArrayRef<Register> VRegs) const {
     return false;
   }
 
   /// This hook must be implemented to lower the incoming (formal)
-  /// arguments, described by \p Args, for GlobalISel. Each argument
-  /// must end up in the related virtual register described by VRegs.
-  /// In other words, the first argument should end up in VRegs[0],
-  /// the second in VRegs[1], and so on.
+  /// arguments, described by \p VRegs, for GlobalISel. Each argument
+  /// must end up in the related virtual registers described by \p VRegs.
+  /// In other words, the first argument should end up in \c VRegs[0],
+  /// the second in \c VRegs[1], and so on. For each argument, there will be one
+  /// register for each non-aggregate type, as returned by \c computeValueLLTs.
   /// \p MIRBuilder is set to the proper insertion for the argument
   /// lowering.
   ///
   /// \return True if the lowering succeeded, false otherwise.
   virtual bool lowerFormalArguments(MachineIRBuilder &MIRBuilder,
                                     const Function &F,
-                                    ArrayRef<unsigned> VRegs) const {
+                                    ArrayRef<ArrayRef<Register>> VRegs) const {
     return false;
   }
 
@@ -174,18 +228,29 @@ public:
   /// \p Callee is the destination of the call. It should be either a register,
   /// globaladdress, or externalsymbol.
   ///
-  /// \p ResTy is the type returned by the function
-  ///
-  /// \p ResReg is the generic virtual register that the returned
-  /// value should be lowered into.
+  /// \p OrigRet is a descriptor for the return type of the function.
   ///
-  /// \p ArgTys is a list of the types each member of \p ArgRegs has; used by
-  /// the target to decide which register/stack slot should be allocated.
+  /// \p OrigArgs is a list of descriptors of the arguments passed to the
+  /// function.
   ///
-  /// \p ArgRegs is a list of virtual registers containing each argument that
-  /// needs to be passed.
+  /// \p SwiftErrorVReg is non-zero if the call has a swifterror inout
+  /// parameter, and contains the vreg that the swifterror should be copied into
+  /// after the call.
   ///
   /// \return true if the lowering succeeded, false otherwise.
+  virtual bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
+                         const MachineOperand &Callee, const ArgInfo &OrigRet,
+                         ArrayRef<ArgInfo> OrigArgs,
+                         Register SwiftErrorVReg) const {
+    if (!supportSwiftError()) {
+      assert(SwiftErrorVReg == 0 && "trying to use unsupported swifterror");
+      return lowerCall(MIRBuilder, CallConv, Callee, OrigRet, OrigArgs);
+    }
+    return false;
+  }
+
+  /// This hook behaves as the extended lowerCall function, but for targets that
+  /// do not support swifterror value promotion.
   virtual bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
                          const MachineOperand &Callee, const ArgInfo &OrigRet,
                          ArrayRef<ArgInfo> OrigArgs) const {
@@ -197,11 +262,18 @@ public:
   ///
   /// \p CI is the call/invoke instruction.
   ///
-  /// \p ResReg is a register where the call's return value should be stored (or
-  /// 0 if there is no return value).
+  /// \p ResRegs are the registers where the call's return value should be
+  /// stored (or 0 if there is no return value). There will be one register for
+  /// each non-aggregate type, as returned by \c computeValueLLTs.
+  ///
+  /// \p ArgRegs is a list of lists of virtual registers containing each
+  /// argument that needs to be passed (argument \c i should be placed in \c
+  /// ArgRegs[i]). For each argument, there will be one register for each
+  /// non-aggregate type, as returned by \c computeValueLLTs.
   ///
-  /// \p ArgRegs is a list of virtual registers containing each argument that
-  /// needs to be passed.
+  /// \p SwiftErrorVReg is non-zero if the call has a swifterror inout
+  /// parameter, and contains the vreg that the swifterror should be copied into
+  /// after the call.
   ///
   /// \p GetCalleeReg is a callback to materialize a register for the callee if
   /// the target determines it cannot jump to the destination based purely on \p
@@ -210,7 +282,8 @@ public:
   ///
   /// \return true if the lowering succeeded, false otherwise.
   bool lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS,
-                 unsigned ResReg, ArrayRef<unsigned> ArgRegs,
+                 ArrayRef<Register> ResRegs,
+                 ArrayRef<ArrayRef<Register>> ArgRegs, Register SwiftErrorVReg,
                  std::function<unsigned()> GetCalleeReg) const;
 };
 
diff --git a/include/llvm/CodeGen/GlobalISel/Combiner.h b/include/llvm/CodeGen/GlobalISel/Combiner.h
index b097c7817762..efe8bdf93664 100644
--- a/include/llvm/CodeGen/GlobalISel/Combiner.h
+++ b/include/llvm/CodeGen/GlobalISel/Combiner.h
@@ -1,9 +1,8 @@
-//== ----- llvm/CodeGen/GlobalISel/Combiner.h --------------------- == //
+//== ----- llvm/CodeGen/GlobalISel/Combiner.h -------------------*- C++ -*-== //
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 6e9ac01c1ee2..0c50c9c5e0cf 100644
--- a/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/GlobalISel/CombinerHelper.h --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===--------------------------------------------------------------------===//
 //
@@ -18,6 +17,9 @@
 #ifndef LLVM_CODEGEN_GLOBALISEL_COMBINER_HELPER_H
 #define LLVM_CODEGEN_GLOBALISEL_COMBINER_HELPER_H
 
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/Register.h"
+
 namespace llvm {
 
 class GISelChangeObserver;
@@ -26,6 +28,12 @@ class MachineRegisterInfo;
 class MachineInstr;
 class MachineOperand;
 
+struct PreferredTuple {
+  LLT Ty;                // The result type of the extend.
+  unsigned ExtendOpcode; // G_ANYEXT/G_SEXT/G_ZEXT
+  MachineInstr *MI;
+};
+
 class CombinerHelper {
   MachineIRBuilder &Builder;
   MachineRegisterInfo &MRI;
@@ -35,20 +43,27 @@ public:
   CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B);
 
   /// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes
-  void replaceRegWith(MachineRegisterInfo &MRI, unsigned FromReg, unsigned ToReg) const;
+  void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const;
 
   /// Replace a single register operand with a new register and inform the
   /// observer of the changes.
   void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp,
-                        unsigned ToReg) const;
+                        Register ToReg) const;
 
   /// If \p MI is COPY, try to combine it.
   /// Returns true if MI changed.
   bool tryCombineCopy(MachineInstr &MI);
+  bool matchCombineCopy(MachineInstr &MI);
+  void applyCombineCopy(MachineInstr &MI);
 
   /// If \p MI is extend that consumes the result of a load, try to combine it.
   /// Returns true if MI changed.
   bool tryCombineExtendingLoads(MachineInstr &MI);
+  bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
+  void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
+
+  bool matchCombineBr(MachineInstr &MI);
+  bool tryCombineBr(MachineInstr &MI);
 
   /// Try to transform \p MI by using all of the above
   /// combine functions. Returns true if changed.
diff --git a/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
index d21aa3f725d9..3b09a8e2b479 100644
--- a/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
+++ b/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/GlobalISel/CombinerInfo.h ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h b/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h
index 220a571b21db..e817d9b4550e 100644
--- a/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h
+++ b/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h  --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h b/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
index c8e8a7a5a7cb..e5691cb35174 100644
--- a/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
+++ b/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
@@ -1,9 +1,8 @@
-//===----- llvm/CodeGen/GlobalISel/GISelChangeObserver.h ------------------===//
+//===----- llvm/CodeGen/GlobalISel/GISelChangeObserver.h --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -34,10 +33,17 @@ public:
 
   /// An instruction is about to be erased.
   virtual void erasingInstr(MachineInstr &MI) = 0;
-  /// An instruction was created and inserted into the function.
+
+  /// An instruction has been created and inserted into the function.
+  /// Note that the instruction might not be a fully fledged instruction at this
+  /// point and won't be if the MachineFunction::Delegate is calling it. This is
+  /// because the delegate only sees the construction of the MachineInstr before
+  /// operands have been added.
   virtual void createdInstr(MachineInstr &MI) = 0;
+
   /// This instruction is about to be mutated in some way.
   virtual void changingInstr(MachineInstr &MI) = 0;
+
   /// This instruction was mutated in some way.
   virtual void changedInstr(MachineInstr &MI) = 0;
 
diff --git a/include/llvm/CodeGen/GlobalISel/GISelWorkList.h b/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
index 1571841a208d..b0bb519283b1 100644
--- a/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
+++ b/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
@@ -1,9 +1,8 @@
 //===- GISelWorkList.h - Worklist for GISel passes ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -33,23 +32,61 @@ class GISelWorkList {
   SmallVector<MachineInstr *, N> Worklist;
   DenseMap<MachineInstr *, unsigned> WorklistMap;
 
+#ifndef NDEBUG
+  bool Finalized = true;
+#endif
+
 public:
-  GISelWorkList() {}
+  GISelWorkList() : WorklistMap(N) {}
 
   bool empty() const { return WorklistMap.empty(); }
 
   unsigned size() const { return WorklistMap.size(); }
 
+  // Since we don't know ahead of time how many instructions we're going to add
+  // to the worklist, and migrating densemap's elements is quite expensive
+  // everytime we resize, only insert to the smallvector (typically during the
+  // initial phase of populating lists). Before the worklist can be used,
+  // finalize should be called. Also assert with NDEBUG if list is ever used
+  // without finalizing. Note that unlike insert, we won't check for duplicates
+  // - so the ideal place to use this is during the initial prepopulating phase
+  // of most passes.
+  void deferred_insert(MachineInstr *I) {
+    Worklist.push_back(I);
+#ifndef NDEBUG
+    Finalized = false;
+#endif
+  }
+
+  // This should only be called when using deferred_insert.
+  // This asserts that the WorklistMap is empty, and then
+  // inserts all the elements in the Worklist into the map.
+  // It also asserts if there are any duplicate elements found.
+  void finalize() {
+    assert(WorklistMap.empty() && "Expecting empty worklistmap");
+    if (Worklist.size() > N)
+      WorklistMap.reserve(Worklist.size());
+    for (unsigned i = 0; i < Worklist.size(); ++i)
+      if (!WorklistMap.try_emplace(Worklist[i], i).second)
+        llvm_unreachable("Duplicate elements in the list");
+#ifndef NDEBUG
+    Finalized = true;
+#endif
+  }
+
   /// Add the specified instruction to the worklist if it isn't already in it.
   void insert(MachineInstr *I) {
+    assert(Finalized && "GISelWorkList used without finalizing");
     if (WorklistMap.try_emplace(I, Worklist.size()).second)
       Worklist.push_back(I);
   }
 
   /// Remove I from the worklist if it exists.
   void remove(const MachineInstr *I) {
+    assert((Finalized || WorklistMap.empty()) && "Neither finalized nor empty");
     auto It = WorklistMap.find(I);
-    if (It == WorklistMap.end()) return; // Not in worklist.
+    if (It == WorklistMap.end())
+      return; // Not in worklist.
 
     // Don't bother moving everything down, just null out the slot.
     Worklist[It->second] = nullptr;
@@ -63,6 +100,7 @@ public:
   }
 
   MachineInstr *pop_back_val() {
+    assert(Finalized && "GISelWorkList used without finalizing");
     MachineInstr *I;
     do {
       I = Worklist.pop_back_val();
diff --git a/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index d1770bf6e4ce..8654ba83f08d 100644
--- a/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/GlobalISel/IRTranslator.h - IRTranslator ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -23,7 +22,9 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/Types.h"
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/SwitchLoweringUtils.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/Allocator.h"
 #include <memory>
@@ -37,6 +38,7 @@ class CallInst;
 class CallLowering;
 class Constant;
 class DataLayout;
+class FunctionLoweringInfo;
 class Instruction;
 class MachineBasicBlock;
 class MachineFunction;
@@ -69,7 +71,7 @@ private:
   public:
     ValueToVRegInfo() = default;
 
-    using VRegListT = SmallVector<unsigned, 1>;
+    using VRegListT = SmallVector<Register, 1>;
     using OffsetListT = SmallVector<uint64_t, 1>;
 
     using const_vreg_iterator =
@@ -164,6 +166,8 @@ private:
   /// this function.
   DenseMap<const AllocaInst *, int> FrameIndices;
 
+  SwiftErrorValueTracking SwiftError;
+
   /// \name Methods for translating form LLVM IR to MachineInstr.
   /// \see ::translate for general information on the translate methods.
   /// @{
@@ -196,7 +200,7 @@ private:
   /// the function.
   ///
   /// \return true if the materialization succeeded.
-  bool translate(const Constant &C, unsigned Reg);
+  bool translate(const Constant &C, Register Reg);
 
   /// Translate an LLVM bitcast into generic IR. Either a COPY or a G_BITCAST is
   /// emitted.
@@ -212,24 +216,27 @@ private:
   bool translateMemfunc(const CallInst &CI, MachineIRBuilder &MIRBuilder,
                         unsigned ID);
 
-  void getStackGuard(unsigned DstReg, MachineIRBuilder &MIRBuilder);
+  void getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder);
 
   bool translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
                                   MachineIRBuilder &MIRBuilder);
 
+  /// Helper function for translateSimpleIntrinsic.
+  /// \return The generic opcode for \p IntrinsicID if \p IntrinsicID is a
+  /// simple intrinsic (ceil, fabs, etc.). Otherwise, returns
+  /// Intrinsic::not_intrinsic.
+  unsigned getSimpleIntrinsicOpcode(Intrinsic::ID ID);
+
+  /// Translates the intrinsics defined in getSimpleIntrinsicOpcode.
+  /// \return true if the translation succeeded.
+  bool translateSimpleIntrinsic(const CallInst &CI, Intrinsic::ID ID,
+                                MachineIRBuilder &MIRBuilder);
+
   bool translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
                                MachineIRBuilder &MIRBuilder);
 
   bool translateInlineAsm(const CallInst &CI, MachineIRBuilder &MIRBuilder);
 
-  // FIXME: temporary function to expose previous interface to call lowering
-  // until it is refactored.
-  /// Combines all component registers of \p V into a single scalar with size
-  /// "max(Offsets) + last size".
-  unsigned packRegs(const Value &V, MachineIRBuilder &MIRBuilder);
-
-  void unpackRegs(const Value &V, unsigned Src, MachineIRBuilder &MIRBuilder);
-
   /// Returns true if the value should be split into multiple LLTs.
   /// If \p Offsets is given then the split type's offsets will be stored in it.
   /// If \p Offsets is not empty it will be cleared first.
@@ -242,6 +249,8 @@ private:
 
   bool translateInvoke(const User &U, MachineIRBuilder &MIRBuilder);
 
+  bool translateCallBr(const User &U, MachineIRBuilder &MIRBuilder);
+
   bool translateLandingPad(const User &U, MachineIRBuilder &MIRBuilder);
 
   /// Translate one of LLVM's cast instructions into MachineInstrs, with the
@@ -278,7 +287,42 @@ private:
   /// \pre \p U is a branch instruction.
   bool translateBr(const User &U, MachineIRBuilder &MIRBuilder);
 
+  // Begin switch lowering functions.
+  bool emitJumpTableHeader(SwitchCG::JumpTable &JT,
+                           SwitchCG::JumpTableHeader &JTH,
+                           MachineBasicBlock *HeaderBB);
+  void emitJumpTable(SwitchCG::JumpTable &JT, MachineBasicBlock *MBB);
+
+  void emitSwitchCase(SwitchCG::CaseBlock &CB, MachineBasicBlock *SwitchBB,
+                      MachineIRBuilder &MIB);
+
+  bool lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
+                              MachineBasicBlock *SwitchMBB,
+                              MachineBasicBlock *CurMBB,
+                              MachineBasicBlock *DefaultMBB,
+                              MachineIRBuilder &MIB,
+                              MachineFunction::iterator BBI,
+                              BranchProbability UnhandledProbs,
+                              SwitchCG::CaseClusterIt I,
+                              MachineBasicBlock *Fallthrough,
+                              bool FallthroughUnreachable);
+
+  bool lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
+                                Value *Cond,
+                                MachineBasicBlock *Fallthrough,
+                                bool FallthroughUnreachable,
+                                BranchProbability UnhandledProbs,
+                                MachineBasicBlock *CurMBB,
+                                MachineIRBuilder &MIB,
+                                MachineBasicBlock *SwitchMBB);
+
+  bool lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W, Value *Cond,
+                           MachineBasicBlock *SwitchMBB,
+                           MachineBasicBlock *DefaultMBB,
+                           MachineIRBuilder &MIB);
+
   bool translateSwitch(const User &U, MachineIRBuilder &MIRBuilder);
+  // End switch lowering section.
 
   bool translateIndirectBr(const User &U, MachineIRBuilder &MIRBuilder);
 
@@ -404,6 +448,7 @@ private:
 
   bool translateAtomicCmpXchg(const User &U, MachineIRBuilder &MIRBuilder);
   bool translateAtomicRMW(const User &U, MachineIRBuilder &MIRBuilder);
+  bool translateFence(const User &U, MachineIRBuilder &MIRBuilder);
 
   // Stubs to keep the compiler happy while we implement the rest of the
   // translation.
@@ -419,9 +464,6 @@ private:
   bool translateCatchSwitch(const User &U, MachineIRBuilder &MIRBuilder) {
     return false;
   }
-  bool translateFence(const User &U, MachineIRBuilder &MIRBuilder) {
-    return false;
-  }
   bool translateAddrSpaceCast(const User &U, MachineIRBuilder &MIRBuilder) {
     return translateCast(TargetOpcode::G_ADDRSPACE_CAST, U, MIRBuilder);
   }
@@ -466,19 +508,50 @@ private:
   /// Current optimization remark emitter. Used to report failures.
   std::unique_ptr<OptimizationRemarkEmitter> ORE;
 
+  FunctionLoweringInfo FuncInfo;
+
+  // True when either the Target Machine specifies no optimizations or the
+  // function has the optnone attribute.
+  bool EnableOpts = false;
+
+  /// Switch analysis and optimization.
+  class GISelSwitchLowering : public SwitchCG::SwitchLowering {
+  public:
+    GISelSwitchLowering(IRTranslator *irt, FunctionLoweringInfo &funcinfo)
+        : SwitchLowering(funcinfo), IRT(irt) {
+      assert(irt && "irt is null!");
+    }
+
+    virtual void addSuccessorWithProb(
+        MachineBasicBlock *Src, MachineBasicBlock *Dst,
+        BranchProbability Prob = BranchProbability::getUnknown()) override {
+      IRT->addSuccessorWithProb(Src, Dst, Prob);
+    }
+
+    virtual ~GISelSwitchLowering() = default;
+
+  private:
+    IRTranslator *IRT;
+  };
+
+  std::unique_ptr<GISelSwitchLowering> SL;
+
   // * Insert all the code needed to materialize the constants
   // at the proper place. E.g., Entry block or dominator block
   // of each constant depending on how fancy we want to be.
   // * Clear the different maps.
   void finalizeFunction();
 
+  // Handle emitting jump tables for each basic block.
+  void finalizeBasicBlock();
+
   /// Get the VRegs that represent \p Val.
   /// Non-aggregate types have just one corresponding VReg and the list can be
   /// used as a single "unsigned". Aggregates get flattened. If such VRegs do
   /// not exist, they are created.
-  ArrayRef<unsigned> getOrCreateVRegs(const Value &Val);
+  ArrayRef<Register> getOrCreateVRegs(const Value &Val);
 
-  unsigned getOrCreateVReg(const Value &Val) {
+  Register getOrCreateVReg(const Value &Val) {
     auto Regs = getOrCreateVRegs(Val);
     if (Regs.empty())
       return 0;
@@ -522,6 +595,14 @@ private:
     return SmallVector<MachineBasicBlock *, 4>(1, &getMBB(*Edge.first));
   }
 
+  /// Return branch probability calculated by BranchProbabilityInfo for IR
+  /// blocks.
+  BranchProbability getEdgeProbability(const MachineBasicBlock *Src,
+                                       const MachineBasicBlock *Dst) const;
+
+  void addSuccessorWithProb(MachineBasicBlock *Src, MachineBasicBlock *Dst,
+                            BranchProbability Prob);
+
 public:
   // Ctor, nothing fancy.
   IRTranslator();
diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelect.h b/include/llvm/CodeGen/GlobalISel/InstructionSelect.h
index 01521c46ab6a..1af46e0a9e76 100644
--- a/include/llvm/CodeGen/GlobalISel/InstructionSelect.h
+++ b/include/llvm/CodeGen/GlobalISel/InstructionSelect.h
@@ -1,9 +1,8 @@
 //== llvm/CodeGen/GlobalISel/InstructionSelect.h -----------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file This file describes the interface of the MachineFunctionPass
diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
index 471def7f45a3..e9b93be76754 100644
--- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
+++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/GlobalISel/InstructionSelector.h ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -139,6 +138,16 @@ enum {
   /// - MMOIdx - MMO index
   /// - Size - The size in bytes of the memory access
   GIM_CheckMemorySizeEqualTo,
+
+  /// Check the address space of the memory access for the given machine memory
+  /// operand.
+  /// - InsnID - Instruction ID
+  /// - MMOIdx - MMO index
+  /// - NumAddrSpace - Number of valid address spaces
+  /// - AddrSpaceN - An allowed space of the memory access
+  /// - AddrSpaceN+1 ...
+  GIM_CheckMemoryAddressSpace,
+
   /// Check the size of the memory access for the given machine memory operand
   /// against the size of an operand.
   /// - InsnID - Instruction ID
diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
index 2003a79f6b20..e8ee4af0cb0b 100644
--- a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
+++ b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -371,6 +370,45 @@ bool InstructionSelector::executeMatchTable(
             return false;
       break;
     }
+    case GIM_CheckMemoryAddressSpace: {
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t MMOIdx = MatchTable[CurrentIdx++];
+      // This accepts a list of possible address spaces.
+      const int NumAddrSpace = MatchTable[CurrentIdx++];
+
+      if (State.MIs[InsnID]->getNumMemOperands() <= MMOIdx) {
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+        break;
+      }
+
+      // Need to still jump to the end of the list of address spaces if we find
+      // a match earlier.
+      const uint64_t LastIdx = CurrentIdx + NumAddrSpace;
+
+      const MachineMemOperand *MMO
+        = *(State.MIs[InsnID]->memoperands_begin() + MMOIdx);
+      const unsigned MMOAddrSpace = MMO->getAddrSpace();
+
+      bool Success = false;
+      for (int I = 0; I != NumAddrSpace; ++I) {
+        unsigned AddrSpace = MatchTable[CurrentIdx++];
+        DEBUG_WITH_TYPE(
+          TgtInstructionSelector::getName(),
+          dbgs() << "addrspace(" << MMOAddrSpace << ") vs "
+                 << AddrSpace << '\n');
+
+        if (AddrSpace == MMOAddrSpace) {
+          Success = true;
+          break;
+        }
+      }
+
+      CurrentIdx = LastIdx;
+      if (!Success && handleReject() == RejectAndGiveUp)
+        return false;
+      break;
+    }
     case GIM_CheckMemorySizeEqualTo: {
       int64_t InsnID = MatchTable[CurrentIdx++];
       int64_t MMOIdx = MatchTable[CurrentIdx++];
@@ -438,15 +476,15 @@ bool InstructionSelector::executeMatchTable(
 
       unsigned Size = MRI.getType(MO.getReg()).getSizeInBits();
       if (MatcherOpcode == GIM_CheckMemorySizeEqualToLLT &&
-          MMO->getSize() * 8 != Size) {
+          MMO->getSizeInBits() != Size) {
         if (handleReject() == RejectAndGiveUp)
           return false;
       } else if (MatcherOpcode == GIM_CheckMemorySizeLessThanLLT &&
-                 MMO->getSize() * 8 >= Size) {
+                 MMO->getSizeInBits() >= Size) {
         if (handleReject() == RejectAndGiveUp)
           return false;
       } else if (MatcherOpcode == GIM_CheckMemorySizeGreaterThanLLT &&
-                 MMO->getSize() * 8 <= Size)
+                 MMO->getSizeInBits() <= Size)
         if (handleReject() == RejectAndGiveUp)
           return false;
 
@@ -479,17 +517,19 @@ bool InstructionSelector::executeMatchTable(
                              << InsnID << "]->getOperand(" << OpIdx
                              << "), SizeInBits=" << SizeInBits << ")\n");
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+      MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
+      const LLT Ty = MRI.getType(MO.getReg());
+
       // iPTR must be looked up in the target.
       if (SizeInBits == 0) {
         MachineFunction *MF = State.MIs[InsnID]->getParent()->getParent();
-        SizeInBits = MF->getDataLayout().getPointerSizeInBits(0);
+        const unsigned AddrSpace = Ty.getAddressSpace();
+        SizeInBits = MF->getDataLayout().getPointerSizeInBits(AddrSpace);
       }
 
       assert(SizeInBits != 0 && "Pointer size must be known");
 
-      MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
       if (MO.isReg()) {
-        const LLT &Ty = MRI.getType(MO.getReg());
         if (!Ty.isPointer() || Ty.getSizeInBits() != SizeInBits)
           if (handleReject() == RejectAndGiveUp)
             return false;
diff --git a/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 20bec7650179..a22778b8848c 100644
--- a/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h -----*- C++ -*-//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This file contains some helper functions which try to cleanup artifacts
@@ -29,6 +28,18 @@ class LegalizationArtifactCombiner {
   MachineRegisterInfo &MRI;
   const LegalizerInfo &LI;
 
+  static bool isArtifactCast(unsigned Opc) {
+    switch (Opc) {
+    case TargetOpcode::G_TRUNC:
+    case TargetOpcode::G_SEXT:
+    case TargetOpcode::G_ZEXT:
+    case TargetOpcode::G_ANYEXT:
+      return true;
+    default:
+      return false;
+    }
+  }
+
 public:
   LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI,
                     const LegalizerInfo &LI)
@@ -40,11 +51,11 @@ public:
       return false;
 
     Builder.setInstr(MI);
-    unsigned DstReg = MI.getOperand(0).getReg();
-    unsigned SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+    Register DstReg = MI.getOperand(0).getReg();
+    Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
 
     // aext(trunc x) - > aext/copy/trunc x
-    unsigned TruncSrc;
+    Register TruncSrc;
     if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
       Builder.buildAnyExtOrTrunc(DstReg, TruncSrc);
@@ -53,7 +64,7 @@ public:
     }
 
     // aext([asz]ext x) -> [asz]ext x
-    unsigned ExtSrc;
+    Register ExtSrc;
     MachineInstr *ExtMI;
     if (mi_match(SrcReg, MRI,
                  m_all_of(m_MInstr(ExtMI), m_any_of(m_GAnyExt(m_Reg(ExtSrc)),
@@ -63,6 +74,20 @@ public:
       markInstAndDefDead(MI, *ExtMI, DeadInsts);
       return true;
     }
+
+    // Try to fold aext(g_constant) when the larger constant type is legal.
+    // Can't use MIPattern because we don't have a specific constant in mind.
+    auto *SrcMI = MRI.getVRegDef(SrcReg);
+    if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
+      const LLT &DstTy = MRI.getType(DstReg);
+      if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
+        auto &CstVal = SrcMI->getOperand(1);
+        Builder.buildConstant(
+            DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits()));
+        markInstAndDefDead(MI, *SrcMI, DeadInsts);
+        return true;
+      }
+    }
     return tryFoldImplicitDef(MI, DeadInsts);
   }
 
@@ -73,25 +98,39 @@ public:
       return false;
 
     Builder.setInstr(MI);
-    unsigned DstReg = MI.getOperand(0).getReg();
-    unsigned SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+    Register DstReg = MI.getOperand(0).getReg();
+    Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
 
     // zext(trunc x) - > and (aext/copy/trunc x), mask
-    unsigned TruncSrc;
+    Register TruncSrc;
     if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
       LLT DstTy = MRI.getType(DstReg);
       if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) ||
-          isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}}))
+          isConstantUnsupported(DstTy))
         return false;
       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
       LLT SrcTy = MRI.getType(SrcReg);
-      APInt Mask = APInt::getAllOnesValue(SrcTy.getSizeInBits());
+      APInt Mask = APInt::getAllOnesValue(SrcTy.getScalarSizeInBits());
       auto MIBMask = Builder.buildConstant(DstTy, Mask.getZExtValue());
       Builder.buildAnd(DstReg, Builder.buildAnyExtOrTrunc(DstTy, TruncSrc),
                        MIBMask);
       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
       return true;
     }
+
+    // Try to fold zext(g_constant) when the larger constant type is legal.
+    // Can't use MIPattern because we don't have a specific constant in mind.
+    auto *SrcMI = MRI.getVRegDef(SrcReg);
+    if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
+      const LLT &DstTy = MRI.getType(DstReg);
+      if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
+        auto &CstVal = SrcMI->getOperand(1);
+        Builder.buildConstant(
+            DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits()));
+        markInstAndDefDead(MI, *SrcMI, DeadInsts);
+        return true;
+      }
+    }
     return tryFoldImplicitDef(MI, DeadInsts);
   }
 
@@ -102,20 +141,22 @@ public:
       return false;
 
     Builder.setInstr(MI);
-    unsigned DstReg = MI.getOperand(0).getReg();
-    unsigned SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+    Register DstReg = MI.getOperand(0).getReg();
+    Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
 
     // sext(trunc x) - > ashr (shl (aext/copy/trunc x), c), c
-    unsigned TruncSrc;
+    Register TruncSrc;
     if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
       LLT DstTy = MRI.getType(DstReg);
-      if (isInstUnsupported({TargetOpcode::G_SHL, {DstTy}}) ||
-          isInstUnsupported({TargetOpcode::G_ASHR, {DstTy}}) ||
-          isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}}))
+      // Guess on the RHS shift amount type, which should be re-legalized if
+      // applicable.
+      if (isInstUnsupported({TargetOpcode::G_SHL, {DstTy, DstTy}}) ||
+          isInstUnsupported({TargetOpcode::G_ASHR, {DstTy, DstTy}}) ||
+          isConstantUnsupported(DstTy))
         return false;
       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
       LLT SrcTy = MRI.getType(SrcReg);
-      unsigned ShAmt = DstTy.getSizeInBits() - SrcTy.getSizeInBits();
+      unsigned ShAmt = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
       auto MIBShAmt = Builder.buildConstant(DstTy, ShAmt);
       auto MIBShl = Builder.buildInstr(
           TargetOpcode::G_SHL, {DstTy},
@@ -138,7 +179,7 @@ public:
     if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF,
                                            MI.getOperand(1).getReg(), MRI)) {
       Builder.setInstr(MI);
-      unsigned DstReg = MI.getOperand(0).getReg();
+      Register DstReg = MI.getOperand(0).getReg();
       LLT DstTy = MRI.getType(DstReg);
 
       if (Opcode == TargetOpcode::G_ANYEXT) {
@@ -150,7 +191,7 @@ public:
       } else {
         // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top
         // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT.
-        if (isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}}))
+        if (isConstantUnsupported(DstTy))
           return false;
         LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;);
         Builder.buildConstant(DstReg, 0);
@@ -162,6 +203,16 @@ public:
     return false;
   }
 
+  static unsigned getMergeOpcode(LLT OpTy, LLT DestTy) {
+    if (OpTy.isVector() && DestTy.isVector())
+      return TargetOpcode::G_CONCAT_VECTORS;
+
+    if (OpTy.isVector() && !DestTy.isVector())
+      return TargetOpcode::G_BUILD_VECTOR;
+
+    return TargetOpcode::G_MERGE_VALUES;
+  }
+
   bool tryCombineMerges(MachineInstr &MI,
                         SmallVectorImpl<MachineInstr *> &DeadInsts) {
 
@@ -169,27 +220,33 @@ public:
       return false;
 
     unsigned NumDefs = MI.getNumOperands() - 1;
+    MachineInstr *SrcDef =
+        getDefIgnoringCopies(MI.getOperand(NumDefs).getReg(), MRI);
+    if (!SrcDef)
+      return false;
 
-    unsigned MergingOpcode;
     LLT OpTy = MRI.getType(MI.getOperand(NumDefs).getReg());
     LLT DestTy = MRI.getType(MI.getOperand(0).getReg());
-    if (OpTy.isVector() && DestTy.isVector())
-      MergingOpcode = TargetOpcode::G_CONCAT_VECTORS;
-    else if (OpTy.isVector() && !DestTy.isVector())
-      MergingOpcode = TargetOpcode::G_BUILD_VECTOR;
-    else
-      MergingOpcode = TargetOpcode::G_MERGE_VALUES;
-
-    MachineInstr *MergeI =
-        getOpcodeDef(MergingOpcode, MI.getOperand(NumDefs).getReg(), MRI);
+    MachineInstr *MergeI = SrcDef;
+    unsigned ConvertOp = 0;
+
+    // Handle intermediate conversions
+    unsigned SrcOp = SrcDef->getOpcode();
+    if (isArtifactCast(SrcOp)) {
+      ConvertOp = SrcOp;
+      MergeI = getDefIgnoringCopies(SrcDef->getOperand(1).getReg(), MRI);
+    }
 
-    if (!MergeI)
+    // FIXME: Handle scalarizing concat_vectors (scalar result type with vector
+    // source)
+    unsigned MergingOpcode = getMergeOpcode(OpTy, DestTy);
+    if (!MergeI || MergeI->getOpcode() != MergingOpcode)
       return false;
 
     const unsigned NumMergeRegs = MergeI->getNumOperands() - 1;
 
     if (NumMergeRegs < NumDefs) {
-      if (NumDefs % NumMergeRegs != 0)
+      if (ConvertOp != 0 || NumDefs % NumMergeRegs != 0)
         return false;
 
       Builder.setInstr(MI);
@@ -202,7 +259,7 @@ public:
 
       const unsigned NewNumDefs = NumDefs / NumMergeRegs;
       for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) {
-        SmallVector<unsigned, 2> DstRegs;
+        SmallVector<Register, 2> DstRegs;
         for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs;
              ++j, ++DefIdx)
           DstRegs.push_back(MI.getOperand(DefIdx).getReg());
@@ -211,7 +268,7 @@ public:
       }
 
     } else if (NumMergeRegs > NumDefs) {
-      if (NumMergeRegs % NumDefs != 0)
+      if (ConvertOp != 0 || NumMergeRegs % NumDefs != 0)
         return false;
 
       Builder.setInstr(MI);
@@ -224,7 +281,7 @@ public:
 
       const unsigned NumRegs = NumMergeRegs / NumDefs;
       for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
-        SmallVector<unsigned, 2> Regs;
+        SmallVector<Register, 2> Regs;
         for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs;
              ++j, ++Idx)
           Regs.push_back(MergeI->getOperand(Idx).getReg());
@@ -233,10 +290,22 @@ public:
       }
 
     } else {
+      LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg());
+      if (ConvertOp) {
+        Builder.setInstr(MI);
+
+        for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
+          Register MergeSrc = MergeI->getOperand(Idx + 1).getReg();
+          Builder.buildInstr(ConvertOp, {MI.getOperand(Idx).getReg()},
+                             {MergeSrc});
+        }
+
+        markInstAndDefDead(MI, *MergeI, DeadInsts);
+        return true;
+      }
       // FIXME: is a COPY appropriate if the types mismatch? We know both
       // registers are allocatable by now.
-      if (MRI.getType(MI.getOperand(0).getReg()) !=
-          MRI.getType(MergeI->getOperand(1).getReg()))
+      if (DestTy != MergeSrcTy)
         return false;
 
       for (unsigned Idx = 0; Idx < NumDefs; ++Idx)
@@ -248,12 +317,77 @@ public:
     return true;
   }
 
+  static bool isMergeLikeOpcode(unsigned Opc) {
+    switch (Opc) {
+    case TargetOpcode::G_MERGE_VALUES:
+    case TargetOpcode::G_BUILD_VECTOR:
+    case TargetOpcode::G_CONCAT_VECTORS:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  bool tryCombineExtract(MachineInstr &MI,
+                         SmallVectorImpl<MachineInstr *> &DeadInsts) {
+    assert(MI.getOpcode() == TargetOpcode::G_EXTRACT);
+
+    // Try to use the source registers from a G_MERGE_VALUES
+    //
+    // %2 = G_MERGE_VALUES %0, %1
+    // %3 = G_EXTRACT %2, N
+    // =>
+    //
+    // for N < %2.getSizeInBits() / 2
+    //     %3 = G_EXTRACT %0, N
+    //
+    // for N >= %2.getSizeInBits() / 2
+    //    %3 = G_EXTRACT %1, (N - %0.getSizeInBits()
+
+    unsigned Src = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+    MachineInstr *MergeI = MRI.getVRegDef(Src);
+    if (!MergeI || !isMergeLikeOpcode(MergeI->getOpcode()))
+      return false;
+
+    LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+    LLT SrcTy = MRI.getType(Src);
+
+    // TODO: Do we need to check if the resulting extract is supported?
+    unsigned ExtractDstSize = DstTy.getSizeInBits();
+    unsigned Offset = MI.getOperand(2).getImm();
+    unsigned NumMergeSrcs = MergeI->getNumOperands() - 1;
+    unsigned MergeSrcSize = SrcTy.getSizeInBits() / NumMergeSrcs;
+    unsigned MergeSrcIdx = Offset / MergeSrcSize;
+
+    // Compute the offset of the last bit the extract needs.
+    unsigned EndMergeSrcIdx = (Offset + ExtractDstSize - 1) / MergeSrcSize;
+
+    // Can't handle the case where the extract spans multiple inputs.
+    if (MergeSrcIdx != EndMergeSrcIdx)
+      return false;
+
+    // TODO: We could modify MI in place in most cases.
+    Builder.setInstr(MI);
+    Builder.buildExtract(
+      MI.getOperand(0).getReg(),
+      MergeI->getOperand(MergeSrcIdx + 1).getReg(),
+      Offset - MergeSrcIdx * MergeSrcSize);
+    markInstAndDefDead(MI, *MergeI, DeadInsts);
+    return true;
+  }
+
   /// Try to combine away MI.
   /// Returns true if it combined away the MI.
   /// Adds instructions that are dead as a result of the combine
   /// into DeadInsts, which can include MI.
   bool tryCombineInstruction(MachineInstr &MI,
-                             SmallVectorImpl<MachineInstr *> &DeadInsts) {
+                             SmallVectorImpl<MachineInstr *> &DeadInsts,
+                             GISelObserverWrapper &WrapperObserver) {
+    // This might be a recursive call, and we might have DeadInsts already
+    // populated. To avoid bad things happening later with multiple vreg defs
+    // etc, process the dead instructions now if any.
+    if (!DeadInsts.empty())
+      deleteMarkedDeadInsts(DeadInsts, WrapperObserver);
     switch (MI.getOpcode()) {
     default:
       return false;
@@ -265,16 +399,35 @@ public:
       return tryCombineSExt(MI, DeadInsts);
     case TargetOpcode::G_UNMERGE_VALUES:
       return tryCombineMerges(MI, DeadInsts);
+    case TargetOpcode::G_EXTRACT:
+      return tryCombineExtract(MI, DeadInsts);
     case TargetOpcode::G_TRUNC: {
       bool Changed = false;
       for (auto &Use : MRI.use_instructions(MI.getOperand(0).getReg()))
-        Changed |= tryCombineInstruction(Use, DeadInsts);
+        Changed |= tryCombineInstruction(Use, DeadInsts, WrapperObserver);
       return Changed;
     }
     }
   }
 
 private:
+
+  static unsigned getArtifactSrcReg(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    case TargetOpcode::COPY:
+    case TargetOpcode::G_TRUNC:
+    case TargetOpcode::G_ZEXT:
+    case TargetOpcode::G_ANYEXT:
+    case TargetOpcode::G_SEXT:
+    case TargetOpcode::G_UNMERGE_VALUES:
+      return MI.getOperand(MI.getNumOperands() - 1).getReg();
+    case TargetOpcode::G_EXTRACT:
+      return MI.getOperand(1).getReg();
+    default:
+      llvm_unreachable("Not a legalization artifact happen");
+    }
+  }
+
   /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be
   /// dead due to MI being killed, then mark DefMI as dead too.
   /// Some of the combines (extends(trunc)), try to walk through redundant
@@ -295,13 +448,15 @@ private:
     // and as a result, %3, %2, %1 are dead.
     MachineInstr *PrevMI = &MI;
     while (PrevMI != &DefMI) {
-      unsigned PrevRegSrc =
-          PrevMI->getOperand(PrevMI->getNumOperands() - 1).getReg();
+      unsigned PrevRegSrc = getArtifactSrcReg(*PrevMI);
+
       MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc);
       if (MRI.hasOneUse(PrevRegSrc)) {
         if (TmpDef != &DefMI) {
-          assert(TmpDef->getOpcode() == TargetOpcode::COPY &&
-                 "Expecting copy here");
+          assert((TmpDef->getOpcode() == TargetOpcode::COPY ||
+                  isArtifactCast(TmpDef->getOpcode())) &&
+                 "Expecting copy or artifact cast here");
+
           DeadInsts.push_back(TmpDef);
         }
       } else
@@ -312,6 +467,22 @@ private:
       DeadInsts.push_back(&DefMI);
   }
 
+  /// Erase the dead instructions in the list and call the observer hooks.
+  /// Normally the Legalizer will deal with erasing instructions that have been
+  /// marked dead. However, for the trunc(ext(x)) cases we can end up trying to
+  /// process instructions which have been marked dead, but otherwise break the
+  /// MIR by introducing multiple vreg defs. For those cases, allow the combines
+  /// to explicitly delete the instructions before we run into trouble.
+  void deleteMarkedDeadInsts(SmallVectorImpl<MachineInstr *> &DeadInsts,
+                             GISelObserverWrapper &WrapperObserver) {
+    for (auto *DeadMI : DeadInsts) {
+      LLVM_DEBUG(dbgs() << *DeadMI << "Is dead, eagerly deleting\n");
+      WrapperObserver.erasingInstr(*DeadMI);
+      DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();
+    }
+    DeadInsts.clear();
+  }
+
   /// Checks if the target legalizer info has specified anything about the
   /// instruction, or if unsupported.
   bool isInstUnsupported(const LegalityQuery &Query) const {
@@ -320,10 +491,23 @@ private:
     return Step.Action == Unsupported || Step.Action == NotFound;
   }
 
+  bool isInstLegal(const LegalityQuery &Query) const {
+    return LI.getAction(Query).Action == LegalizeActions::Legal;
+  }
+
+  bool isConstantUnsupported(LLT Ty) const {
+    if (!Ty.isVector())
+      return isInstUnsupported({TargetOpcode::G_CONSTANT, {Ty}});
+
+    LLT EltTy = Ty.getElementType();
+    return isInstUnsupported({TargetOpcode::G_CONSTANT, {EltTy}}) ||
+           isInstUnsupported({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}});
+  }
+
   /// Looks through copy instructions and returns the actual
   /// source register.
-  unsigned lookThroughCopyInstrs(unsigned Reg) {
-    unsigned TmpReg;
+  unsigned lookThroughCopyInstrs(Register Reg) {
+    Register TmpReg;
     while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) {
       if (MRI.getType(TmpReg).isValid())
         Reg = TmpReg;
diff --git a/include/llvm/CodeGen/GlobalISel/Legalizer.h b/include/llvm/CodeGen/GlobalISel/Legalizer.h
index 8284ab6dac65..13cf3f7e694d 100644
--- a/include/llvm/CodeGen/GlobalISel/Legalizer.h
+++ b/include/llvm/CodeGen/GlobalISel/Legalizer.h
@@ -1,9 +1,8 @@
-//== llvm/CodeGen/GlobalISel/LegalizePass.h ------------- -*- C++ -*-==//
+//== llvm/CodeGen/GlobalISel/Legalizer.h ---------------- -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -55,6 +54,11 @@ public:
         MachineFunctionProperties::Property::Legalized);
   }
 
+  MachineFunctionProperties getClearedProperties() const override {
+    return MachineFunctionProperties()
+      .set(MachineFunctionProperties::Property::NoPHIs);
+  }
+
   bool combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI,
                        const TargetInstrInfo &TII);
 
diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 9b4ecf9284e3..a0f21e8b19d7 100644
--- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -1,9 +1,8 @@
 //== llvm/CodeGen/GlobalISel/LegalizerHelper.h ---------------- -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -87,7 +86,7 @@ public:
   /// Legalize a vector instruction by increasing the number of vector elements
   /// involved and ignoring the added elements later.
   LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
-                                    LLT WideTy);
+                                    LLT MoreTy);
 
   /// Expose MIRBuilder so clients can set their own RecordInsertInstruction
   /// functions
@@ -104,20 +103,127 @@ private:
   void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx,
                       unsigned ExtOpcode);
 
+  /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
+  /// Use by truncating the operand's type to \p NarrowTy using G_TRUNC, and
+  /// replacing the vreg of the operand in place.
+  void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx);
+
   /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
   /// Def by extending the operand's type to \p WideTy and truncating it back
   /// with the \p TruncOpcode, and replacing the vreg of the operand in place.
   void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx = 0,
                       unsigned TruncOpcode = TargetOpcode::G_TRUNC);
 
+  // Legalize a single operand \p OpIdx of the machine instruction \p MI as a
+  // Def by truncating the operand's type to \p NarrowTy, replacing in place and
+  // extending back with \p ExtOpcode.
+  void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx,
+                       unsigned ExtOpcode);
+  /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
+  /// Def by performing it with additional vector elements and extracting the
+  /// result elements, and replacing the vreg of the operand in place.
+  void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx);
+
+  /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
+  /// Use by producing a vector with undefined high elements, extracting the
+  /// original vector type, and replacing the vreg of the operand in place.
+  void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx);
+
+  LegalizeResult
+  widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
+  LegalizeResult
+  widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
+  LegalizeResult
+  widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
+  LegalizeResult
+  widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
+
   /// Helper function to split a wide generic register into bitwise blocks with
   /// the given Type (which implies the number of blocks needed). The generic
   /// registers created are appended to Ops, starting at bit 0 of Reg.
-  void extractParts(unsigned Reg, LLT Ty, int NumParts,
-                    SmallVectorImpl<unsigned> &VRegs);
+  void extractParts(Register Reg, LLT Ty, int NumParts,
+                    SmallVectorImpl<Register> &VRegs);
+
+  /// Version which handles irregular splits.
+  bool extractParts(Register Reg, LLT RegTy, LLT MainTy,
+                    LLT &LeftoverTy,
+                    SmallVectorImpl<Register> &VRegs,
+                    SmallVectorImpl<Register> &LeftoverVRegs);
+
+  /// Helper function to build a wide generic register \p DstReg of type \p
+  /// RegTy from smaller parts. This will produce a G_MERGE_VALUES,
+  /// G_BUILD_VECTOR, G_CONCAT_VECTORS, or sequence of G_INSERT as appropriate
+  /// for the types.
+  ///
+  /// \p PartRegs must be registers of type \p PartTy.
+  ///
+  /// If \p ResultTy does not evenly break into \p PartTy sized pieces, the
+  /// remainder must be specified with \p LeftoverRegs of type \p LeftoverTy.
+  void insertParts(Register DstReg, LLT ResultTy,
+                   LLT PartTy, ArrayRef<Register> PartRegs,
+                   LLT LeftoverTy = LLT(), ArrayRef<Register> LeftoverRegs = {});
+
+  /// Perform generic multiplication of values held in multiple registers.
+  /// Generated instructions use only types NarrowTy and i1.
+  /// Destination can be same or two times size of the source.
+  void multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
+                         ArrayRef<Register> Src1Regs,
+                         ArrayRef<Register> Src2Regs, LLT NarrowTy);
+
+public:
+  LegalizeResult fewerElementsVectorImplicitDef(MachineInstr &MI,
+                                                unsigned TypeIdx, LLT NarrowTy);
+
+  /// Legalize a simple vector instruction where all operands are the same type
+  /// by splitting into multiple components.
+  LegalizeResult fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
+                                          LLT NarrowTy);
+
+  /// Legalize a instruction with a vector type where each operand may have a
+  /// different element type. All type indexes must have the same number of
+  /// elements.
+  LegalizeResult fewerElementsVectorMultiEltType(MachineInstr &MI,
+                                                 unsigned TypeIdx, LLT NarrowTy);
+
+  LegalizeResult fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
+                                          LLT NarrowTy);
+
+  LegalizeResult
+  fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy);
+
+  LegalizeResult
+  fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy);
+
+  LegalizeResult fewerElementsVectorPhi(MachineInstr &MI,
+                                        unsigned TypeIdx, LLT NarrowTy);
+
+  LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
+                                       LLT MoreTy);
+
+  LegalizeResult
+  reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy);
+
+  LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
+                                             LLT HalfTy, LLT ShiftAmtTy);
+
+  LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+  LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty);
+  LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+  LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+
+  LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+  LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
 
   LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
 
+  LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI);
+  LegalizeResult lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+  LegalizeResult lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+  LegalizeResult lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+  LegalizeResult lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+  LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI);
+
+private:
   MachineRegisterInfo &MRI;
   const LegalizerInfo &LI;
   /// To keep track of changes made by the LegalizerHelper.
diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 13776dd3e87d..513c98f2d23f 100644
--- a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/GlobalISel/LegalizerInfo.h ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -93,6 +92,7 @@ enum LegalizeAction : std::uint8_t {
   UseLegacyRules,
 };
 } // end namespace LegalizeActions
+raw_ostream &operator<<(raw_ostream &OS, LegalizeActions::LegalizeAction Action);
 
 using LegalizeActions::LegalizeAction;
 
@@ -123,6 +123,7 @@ struct LegalityQuery {
 
   struct MemDesc {
     uint64_t SizeInBits;
+    uint64_t AlignInBits;
     AtomicOrdering Ordering;
   };
 
@@ -165,13 +166,23 @@ using LegalizeMutation =
     std::function<std::pair<unsigned, LLT>(const LegalityQuery &)>;
 
 namespace LegalityPredicates {
-struct TypePairAndMemSize {
+struct TypePairAndMemDesc {
   LLT Type0;
   LLT Type1;
   uint64_t MemSize;
+  uint64_t Align;
+
+  bool operator==(const TypePairAndMemDesc &Other) const {
+    return Type0 == Other.Type0 && Type1 == Other.Type1 &&
+           Align == Other.Align &&
+           MemSize == Other.MemSize;
+  }
 
-  bool operator==(const TypePairAndMemSize &Other) const {
+  /// \returns true if this memory access is legal with for the acecss described
+  /// by \p Other (The alignment is sufficient for the size and result type).
+  bool isCompatible(const TypePairAndMemDesc &Other) const {
     return Type0 == Other.Type0 && Type1 == Other.Type1 &&
+           Align >= Other.Align &&
            MemSize == Other.MemSize;
   }
 };
@@ -200,20 +211,45 @@ typePairInSet(unsigned TypeIdx0, unsigned TypeIdx1,
               std::initializer_list<std::pair<LLT, LLT>> TypesInit);
 /// True iff the given types for the given pair of type indexes is one of the
 /// specified type pairs.
-LegalityPredicate typePairAndMemSizeInSet(
+LegalityPredicate typePairAndMemDescInSet(
     unsigned TypeIdx0, unsigned TypeIdx1, unsigned MMOIdx,
-    std::initializer_list<TypePairAndMemSize> TypesAndMemSizeInit);
+    std::initializer_list<TypePairAndMemDesc> TypesAndMemDescInit);
 /// True iff the specified type index is a scalar.
 LegalityPredicate isScalar(unsigned TypeIdx);
+/// True iff the specified type index is a vector.
+LegalityPredicate isVector(unsigned TypeIdx);
+/// True iff the specified type index is a pointer (with any address space).
+LegalityPredicate isPointer(unsigned TypeIdx);
+/// True iff the specified type index is a pointer with the specified address
+/// space.
+LegalityPredicate isPointer(unsigned TypeIdx, unsigned AddrSpace);
+
 /// True iff the specified type index is a scalar that's narrower than the given
 /// size.
 LegalityPredicate narrowerThan(unsigned TypeIdx, unsigned Size);
+
 /// True iff the specified type index is a scalar that's wider than the given
 /// size.
 LegalityPredicate widerThan(unsigned TypeIdx, unsigned Size);
+
+/// True iff the specified type index is a scalar or vector with an element type
+/// that's narrower than the given size.
+LegalityPredicate scalarOrEltNarrowerThan(unsigned TypeIdx, unsigned Size);
+
+/// True iff the specified type index is a scalar or a vector with an element
+/// type that's wider than the given size.
+LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size);
+
 /// True iff the specified type index is a scalar whose size is not a power of
 /// 2.
 LegalityPredicate sizeNotPow2(unsigned TypeIdx);
+
+/// True iff the specified type index is a scalar or vector whose element size
+/// is not a power of 2.
+LegalityPredicate scalarOrEltSizeNotPow2(unsigned TypeIdx);
+
+/// True iff the specified type indices are both the same bit size.
+LegalityPredicate sameSize(unsigned TypeIdx0, unsigned TypeIdx1);
 /// True iff the specified MMO index has a size that is not a power of 2
 LegalityPredicate memSizeInBytesNotPow2(unsigned MMOIdx);
 /// True iff the specified type index is a vector whose element count is not a
@@ -228,13 +264,25 @@ LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx,
 namespace LegalizeMutations {
 /// Select this specific type for the given type index.
 LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty);
+
 /// Keep the same type as the given type index.
 LegalizeMutation changeTo(unsigned TypeIdx, unsigned FromTypeIdx);
-/// Widen the type for the given type index to the next power of 2.
-LegalizeMutation widenScalarToNextPow2(unsigned TypeIdx, unsigned Min = 0);
+
+/// Keep the same scalar or element type as the given type index.
+LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx);
+
+/// Keep the same scalar or element type as the given type.
+LegalizeMutation changeElementTo(unsigned TypeIdx, LLT Ty);
+
+/// Widen the scalar type or vector element type for the given type index to the
+/// next power of 2.
+LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min = 0);
+
 /// Add more elements to the type for the given type index to the next power of
 /// 2.
 LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min = 0);
+/// Break up the vector type for the given type index into the element type.
+LegalizeMutation scalarize(unsigned TypeIdx);
 } // end namespace LegalizeMutations
 
 /// A single rule in a legalizer info ruleset.
@@ -419,13 +467,13 @@ public:
     return actionFor(LegalizeAction::Legal, Types);
   }
   /// The instruction is legal when type indexes 0 and 1 along with the memory
-  /// size is any type and size tuple in the given list.
-  LegalizeRuleSet &legalForTypesWithMemSize(
-      std::initializer_list<LegalityPredicates::TypePairAndMemSize>
-          TypesAndMemSize) {
+  /// size and minimum alignment is any type and size tuple in the given list.
+  LegalizeRuleSet &legalForTypesWithMemDesc(
+      std::initializer_list<LegalityPredicates::TypePairAndMemDesc>
+          TypesAndMemDesc) {
     return actionIf(LegalizeAction::Legal,
-                    LegalityPredicates::typePairAndMemSizeInSet(
-                        typeIdx(0), typeIdx(1), /*MMOIdx*/ 0, TypesAndMemSize));
+                    LegalityPredicates::typePairAndMemDescInSet(
+                        typeIdx(0), typeIdx(1), /*MMOIdx*/ 0, TypesAndMemDesc));
   }
   /// The instruction is legal when type indexes 0 and 1 are both in the given
   /// list. That is, the type pair is in the cartesian product of the list.
@@ -438,6 +486,20 @@ public:
                                             std::initializer_list<LLT> Types1) {
     return actionForCartesianProduct(LegalizeAction::Legal, Types0, Types1);
   }
+  /// The instruction is legal when type indexes 0, 1, and 2 are both their
+  /// respective lists.
+  LegalizeRuleSet &legalForCartesianProduct(std::initializer_list<LLT> Types0,
+                                            std::initializer_list<LLT> Types1,
+                                            std::initializer_list<LLT> Types2) {
+    return actionForCartesianProduct(LegalizeAction::Legal, Types0, Types1,
+                                     Types2);
+  }
+
+  LegalizeRuleSet &alwaysLegal() {
+    using namespace LegalizeMutations;
+    markAllTypeIdxsAsCovered();
+    return actionIf(LegalizeAction::Legal, always);
+  }
 
   /// The instruction is lowered.
   LegalizeRuleSet &lower() {
@@ -588,6 +650,13 @@ public:
   LegalizeRuleSet &customFor(std::initializer_list<LLT> Types) {
     return actionFor(LegalizeAction::Custom, Types);
   }
+
+  /// The instruction is custom when type indexes 0 and 1 is any type pair in the
+  /// given list.
+  LegalizeRuleSet &customFor(std::initializer_list<std::pair<LLT, LLT>> Types) {
+    return actionFor(LegalizeAction::Custom, Types);
+  }
+
   LegalizeRuleSet &customForCartesianProduct(std::initializer_list<LLT> Types) {
     return actionForCartesianProduct(LegalizeAction::Custom, Types);
   }
@@ -597,13 +666,29 @@ public:
     return actionForCartesianProduct(LegalizeAction::Custom, Types0, Types1);
   }
 
+  /// Unconditionally custom lower.
+  LegalizeRuleSet &custom() {
+    return customIf(always);
+  }
+
   /// Widen the scalar to the next power of two that is at least MinSize.
   /// No effect if the type is not a scalar or is a power of two.
   LegalizeRuleSet &widenScalarToNextPow2(unsigned TypeIdx,
                                          unsigned MinSize = 0) {
     using namespace LegalityPredicates;
-    return actionIf(LegalizeAction::WidenScalar, sizeNotPow2(typeIdx(TypeIdx)),
-                    LegalizeMutations::widenScalarToNextPow2(TypeIdx, MinSize));
+    return actionIf(
+        LegalizeAction::WidenScalar, sizeNotPow2(typeIdx(TypeIdx)),
+        LegalizeMutations::widenScalarOrEltToNextPow2(TypeIdx, MinSize));
+  }
+
+  /// Widen the scalar or vector element type to the next power of two that is
+  /// at least MinSize.  No effect if the scalar size is a power of two.
+  LegalizeRuleSet &widenScalarOrEltToNextPow2(unsigned TypeIdx,
+                                              unsigned MinSize = 0) {
+    using namespace LegalityPredicates;
+    return actionIf(
+        LegalizeAction::WidenScalar, scalarOrEltSizeNotPow2(typeIdx(TypeIdx)),
+        LegalizeMutations::widenScalarOrEltToNextPow2(TypeIdx, MinSize));
   }
 
   LegalizeRuleSet &narrowScalar(unsigned TypeIdx, LegalizeMutation Mutation) {
@@ -612,6 +697,32 @@ public:
                     Mutation);
   }
 
+  LegalizeRuleSet &scalarize(unsigned TypeIdx) {
+    using namespace LegalityPredicates;
+    return actionIf(LegalizeAction::FewerElements, isVector(typeIdx(TypeIdx)),
+                    LegalizeMutations::scalarize(TypeIdx));
+  }
+
+  /// Ensure the scalar or element is at least as wide as Ty.
+  LegalizeRuleSet &minScalarOrElt(unsigned TypeIdx, const LLT &Ty) {
+    using namespace LegalityPredicates;
+    using namespace LegalizeMutations;
+    return actionIf(LegalizeAction::WidenScalar,
+                    scalarOrEltNarrowerThan(TypeIdx, Ty.getScalarSizeInBits()),
+                    changeElementTo(typeIdx(TypeIdx), Ty));
+  }
+
+  /// Ensure the scalar or element is at least as wide as Ty.
+  LegalizeRuleSet &minScalarOrEltIf(LegalityPredicate Predicate,
+                                    unsigned TypeIdx, const LLT &Ty) {
+    using namespace LegalityPredicates;
+    using namespace LegalizeMutations;
+    return actionIf(LegalizeAction::WidenScalar,
+                    all(Predicate, scalarOrEltNarrowerThan(
+                                       TypeIdx, Ty.getScalarSizeInBits())),
+                    changeElementTo(typeIdx(TypeIdx), Ty));
+  }
+
   /// Ensure the scalar is at least as wide as Ty.
   LegalizeRuleSet &minScalar(unsigned TypeIdx, const LLT &Ty) {
     using namespace LegalityPredicates;
@@ -621,6 +732,15 @@ public:
                     changeTo(typeIdx(TypeIdx), Ty));
   }
 
+  /// Ensure the scalar is at most as wide as Ty.
+  LegalizeRuleSet &maxScalarOrElt(unsigned TypeIdx, const LLT &Ty) {
+    using namespace LegalityPredicates;
+    using namespace LegalizeMutations;
+    return actionIf(LegalizeAction::NarrowScalar,
+                    scalarOrEltWiderThan(TypeIdx, Ty.getScalarSizeInBits()),
+                    changeElementTo(typeIdx(TypeIdx), Ty));
+  }
+
   /// Ensure the scalar is at most as wide as Ty.
   LegalizeRuleSet &maxScalar(unsigned TypeIdx, const LLT &Ty) {
     using namespace LegalityPredicates;
@@ -637,12 +757,12 @@ public:
                                const LLT &Ty) {
     using namespace LegalityPredicates;
     using namespace LegalizeMutations;
-    return actionIf(LegalizeAction::NarrowScalar,
-                    [=](const LegalityQuery &Query) {
-                      return widerThan(TypeIdx, Ty.getSizeInBits()) &&
-                             Predicate(Query);
-                    },
-                    changeTo(typeIdx(TypeIdx), Ty));
+    return actionIf(
+        LegalizeAction::NarrowScalar,
+        [=](const LegalityQuery &Query) {
+          return widerThan(TypeIdx, Ty.getSizeInBits()) && Predicate(Query);
+        },
+        changeElementTo(typeIdx(TypeIdx), Ty));
   }
 
   /// Limit the range of scalar sizes to MinTy and MaxTy.
@@ -652,6 +772,12 @@ public:
     return minScalar(TypeIdx, MinTy).maxScalar(TypeIdx, MaxTy);
   }
 
+  /// Limit the range of scalar sizes to MinTy and MaxTy.
+  LegalizeRuleSet &clampScalarOrElt(unsigned TypeIdx, const LLT &MinTy,
+                                    const LLT &MaxTy) {
+    return minScalarOrElt(TypeIdx, MinTy).maxScalarOrElt(TypeIdx, MaxTy);
+  }
+
   /// Widen the scalar to match the size of another.
   LegalizeRuleSet &minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx) {
     typeIdx(TypeIdx);
@@ -661,8 +787,25 @@ public:
                  Query.Types[TypeIdx].getSizeInBits();
         },
         [=](const LegalityQuery &Query) {
+          LLT T = Query.Types[LargeTypeIdx];
           return std::make_pair(TypeIdx,
-                                Query.Types[LargeTypeIdx].getElementType());
+                                T.isVector() ? T.getElementType() : T);
+        });
+  }
+
+  /// Conditionally widen the scalar or elt to match the size of another.
+  LegalizeRuleSet &minScalarEltSameAsIf(LegalityPredicate Predicate,
+                                   unsigned TypeIdx, unsigned LargeTypeIdx) {
+    typeIdx(TypeIdx);
+    return widenScalarIf(
+        [=](const LegalityQuery &Query) {
+          return Query.Types[LargeTypeIdx].getScalarSizeInBits() >
+                     Query.Types[TypeIdx].getScalarSizeInBits() &&
+                 Predicate(Query);
+        },
+        [=](const LegalityQuery &Query) {
+          LLT T = Query.Types[LargeTypeIdx];
+          return std::make_pair(TypeIdx, T);
         });
   }
 
@@ -691,7 +834,7 @@ public:
         [=](const LegalityQuery &Query) {
           LLT VecTy = Query.Types[TypeIdx];
           return std::make_pair(
-              TypeIdx, LLT::vector(MinElements, VecTy.getScalarSizeInBits()));
+              TypeIdx, LLT::vector(MinElements, VecTy.getElementType()));
         });
   }
   /// Limit the number of elements in EltTy vectors to at most MaxElements.
@@ -708,10 +851,8 @@ public:
         },
         [=](const LegalityQuery &Query) {
           LLT VecTy = Query.Types[TypeIdx];
-          if (MaxElements == 1)
-            return std::make_pair(TypeIdx, VecTy.getElementType());
-          return std::make_pair(
-              TypeIdx, LLT::vector(MaxElements, VecTy.getScalarSizeInBits()));
+          LLT NewTy = LLT::scalarOrVector(MaxElements, VecTy.getElementType());
+          return std::make_pair(TypeIdx, NewTy);
         });
   }
   /// Limit the number of elements for the given vectors to at least MinTy's
@@ -962,12 +1103,22 @@ public:
   LegalizeActionStep getAction(const MachineInstr &MI,
                                const MachineRegisterInfo &MRI) const;
 
+  bool isLegal(const LegalityQuery &Query) const {
+    return getAction(Query).Action == LegalizeAction::Legal;
+  }
   bool isLegal(const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
+  bool isLegalOrCustom(const MachineInstr &MI,
+                       const MachineRegisterInfo &MRI) const;
 
   virtual bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
                               MachineIRBuilder &MIRBuilder,
                               GISelChangeObserver &Observer) const;
 
+  /// Return true if MI is either legal or has been legalized and false
+  /// if not legal.
+  virtual bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
+                                 MachineIRBuilder &MIRBuilder) const;
+
 private:
   /// Determine what action should be taken to legalize the given generic
   /// instruction opcode, type-index and type. Requires computeTables to have
diff --git a/include/llvm/CodeGen/GlobalISel/Localizer.h b/include/llvm/CodeGen/GlobalISel/Localizer.h
index 1e2d4763e5e1..06de5800b8b7 100644
--- a/include/llvm/CodeGen/GlobalISel/Localizer.h
+++ b/include/llvm/CodeGen/GlobalISel/Localizer.h
@@ -1,9 +1,8 @@
 //== llvm/CodeGen/GlobalISel/Localizer.h - Localizer -------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -22,12 +21,14 @@
 #ifndef LLVM_CODEGEN_GLOBALISEL_LOCALIZER_H
 #define LLVM_CODEGEN_GLOBALISEL_LOCALIZER_H
 
+#include "llvm/ADT/SetVector.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 
 namespace llvm {
 // Forward declarations.
 class MachineRegisterInfo;
+class TargetTransformInfo;
 
 /// This pass implements the localization mechanism described at the
 /// top of this file. One specificity of the implementation is that
@@ -44,9 +45,11 @@ private:
   /// MRI contains all the register class/bank information that this
   /// pass uses and updates.
   MachineRegisterInfo *MRI;
+  /// TTI used for getting remat costs for instructions.
+  TargetTransformInfo *TTI;
 
   /// Check whether or not \p MI needs to be moved close to its uses.
-  static bool shouldLocalize(const MachineInstr &MI);
+  bool shouldLocalize(const MachineInstr &MI);
 
   /// Check if \p MOUse is used in the same basic block as \p Def.
   /// If the use is in the same block, we say it is local.
@@ -58,6 +61,15 @@ private:
   /// Initialize the field members using \p MF.
   void init(MachineFunction &MF);
 
+  typedef SmallSetVector<MachineInstr *, 32> LocalizedSetVecT;
+
+  /// Do inter-block localization from the entry block.
+  bool localizeInterBlock(MachineFunction &MF,
+                          LocalizedSetVecT &LocalizedInstrs);
+
+  /// Do intra-block localization of already localized instructions.
+  bool localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs);
+
 public:
   Localizer();
 
diff --git a/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
index f77f9a8df7ee..13eddd9539fa 100644
--- a/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
+++ b/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
@@ -1,9 +1,8 @@
-//== ----- llvm/CodeGen/GlobalISel/MIPatternMatch.h --------------------- == //
+//==------ llvm/CodeGen/GlobalISel/MIPatternMatch.h -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -31,8 +30,7 @@ template <typename SubPatternT> struct OneUse_match {
   SubPatternT SubPat;
   OneUse_match(const SubPatternT &SP) : SubPat(SP) {}
 
-  template <typename OpTy>
-  bool match(const MachineRegisterInfo &MRI, unsigned Reg) {
+  bool match(MachineRegisterInfo &MRI, unsigned Reg) {
     return MRI.hasOneUse(Reg) && SubPat.match(MRI, Reg);
   }
 };
@@ -162,7 +160,7 @@ template <typename Class> struct bind_ty {
   }
 };
 
-inline bind_ty<unsigned> m_Reg(unsigned &R) { return R; }
+inline bind_ty<Register> m_Reg(Register &R) { return R; }
 inline bind_ty<MachineInstr *> m_MInstr(MachineInstr *&MI) { return MI; }
 inline bind_ty<LLT> m_Type(LLT &Ty) { return Ty; }
 
diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 37de8f030410..10d712176b1b 100644
--- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/GlobalISel/MachineIRBuilder.h - MIBuilder --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -60,13 +59,15 @@ struct MachineIRBuilderState {
 class DstOp {
   union {
     LLT LLTTy;
-    unsigned Reg;
+    Register Reg;
     const TargetRegisterClass *RC;
   };
 
 public:
   enum class DstType { Ty_LLT, Ty_Reg, Ty_RC };
   DstOp(unsigned R) : Reg(R), Ty(DstType::Ty_Reg) {}
+  DstOp(Register R) : Reg(R), Ty(DstType::Ty_Reg) {}
+  DstOp(const MachineOperand &Op) : Reg(Op.getReg()), Ty(DstType::Ty_Reg) {}
   DstOp(const LLT &T) : LLTTy(T), Ty(DstType::Ty_LLT) {}
   DstOp(const TargetRegisterClass *TRC) : RC(TRC), Ty(DstType::Ty_RC) {}
 
@@ -96,7 +97,7 @@ public:
     llvm_unreachable("Unrecognised DstOp::DstType enum");
   }
 
-  unsigned getReg() const {
+  Register getReg() const {
     assert(Ty == DstType::Ty_Reg && "Not a register");
     return Reg;
   }
@@ -119,13 +120,14 @@ private:
 class SrcOp {
   union {
     MachineInstrBuilder SrcMIB;
-    unsigned Reg;
+    Register Reg;
     CmpInst::Predicate Pred;
   };
 
 public:
   enum class SrcType { Ty_Reg, Ty_MIB, Ty_Predicate };
-  SrcOp(unsigned R) : Reg(R), Ty(SrcType::Ty_Reg) {}
+  SrcOp(Register R) : Reg(R), Ty(SrcType::Ty_Reg) {}
+  SrcOp(const MachineOperand &Op) : Reg(Op.getReg()), Ty(SrcType::Ty_Reg) {}
   SrcOp(const MachineInstrBuilder &MIB) : SrcMIB(MIB), Ty(SrcType::Ty_MIB) {}
   SrcOp(const CmpInst::Predicate P) : Pred(P), Ty(SrcType::Ty_Predicate) {}
 
@@ -155,7 +157,7 @@ public:
     llvm_unreachable("Unrecognised SrcOp::SrcType enum");
   }
 
-  unsigned getReg() const {
+  Register getReg() const {
     switch (Ty) {
     case SrcType::Ty_Predicate:
       llvm_unreachable("Not a register operand");
@@ -202,6 +204,7 @@ protected:
   void validateTruncExt(const LLT &Dst, const LLT &Src, bool IsExtend);
 
   void validateBinaryOp(const LLT &Res, const LLT &Op0, const LLT &Op1);
+  void validateShiftOp(const LLT &Res, const LLT &Op0, const LLT &Op1);
 
   void validateSelectOp(const LLT &ResTy, const LLT &TstTy, const LLT &Op0Ty,
                         const LLT &Op1Ty);
@@ -230,6 +233,15 @@ public:
     return *State.MF;
   }
 
+  const MachineFunction &getMF() const {
+    assert(State.MF && "MachineFunction is not set");
+    return *State.MF;
+  }
+
+  const DataLayout &getDataLayout() const {
+    return getMF().getFunction().getParent()->getDataLayout();
+  }
+
   /// Getter for DebugLoc
   const DebugLoc &getDL() { return State.DL; }
 
@@ -310,13 +322,13 @@ public:
 
   /// Build and insert a DBG_VALUE instruction expressing the fact that the
   /// associated \p Variable lives in \p Reg (suitably modified by \p Expr).
-  MachineInstrBuilder buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
+  MachineInstrBuilder buildDirectDbgValue(Register Reg, const MDNode *Variable,
                                           const MDNode *Expr);
 
   /// Build and insert a DBG_VALUE instruction expressing the fact that the
   /// associated \p Variable lives in memory at \p Reg (suitably modified by \p
   /// Expr).
-  MachineInstrBuilder buildIndirectDbgValue(unsigned Reg,
+  MachineInstrBuilder buildIndirectDbgValue(Register Reg,
                                             const MDNode *Variable,
                                             const MDNode *Expr);
 
@@ -345,7 +357,7 @@ public:
   /// \pre \p Res must be a generic virtual register with pointer type.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildFrameIndex(unsigned Res, int Idx);
+  MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx);
 
   /// Build and insert \p Res = G_GLOBAL_VALUE \p GV
   ///
@@ -357,8 +369,7 @@ public:
   ///      in the same address space as \p GV.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildGlobalValue(unsigned Res, const GlobalValue *GV);
-
+  MachineInstrBuilder buildGlobalValue(const DstOp &Res, const GlobalValue *GV);
 
   /// Build and insert \p Res = G_GEP \p Op0, \p Op1
   ///
@@ -371,8 +382,8 @@ public:
   /// \pre \p Op1 must be a generic virtual register with scalar type.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildGEP(unsigned Res, unsigned Op0,
-                               unsigned Op1);
+  MachineInstrBuilder buildGEP(const DstOp &Res, const SrcOp &Op0,
+                               const SrcOp &Op1);
 
   /// Materialize and insert \p Res = G_GEP \p Op0, (G_CONSTANT \p Value)
   ///
@@ -390,7 +401,7 @@ public:
   ///       type as \p Op0 or \p Op0 itself.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  Optional<MachineInstrBuilder> materializeGEP(unsigned &Res, unsigned Op0,
+  Optional<MachineInstrBuilder> materializeGEP(Register &Res, Register Op0,
                                                const LLT &ValueTy,
                                                uint64_t Value);
 
@@ -407,9 +418,24 @@ public:
   ///      be cleared in \p Op0.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildPtrMask(unsigned Res, unsigned Op0,
+  MachineInstrBuilder buildPtrMask(const DstOp &Res, const SrcOp &Op0,
                                    uint32_t NumBits);
 
+  /// Build and insert \p Res, \p CarryOut = G_UADDO \p Op0, \p Op1
+  ///
+  /// G_UADDO sets \p Res to \p Op0 + \p Op1 (truncated to the bit width) and
+  /// sets \p CarryOut to 1 if the result overflowed in unsigned arithmetic.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers with the
+  /// same scalar type.
+  ////\pre \p CarryOut must be generic virtual register with scalar type
+  ///(typically s1)
+  ///
+  /// \return The newly created instruction.
+  MachineInstrBuilder buildUAddo(const DstOp &Res, const DstOp &CarryOut,
+                                 const SrcOp &Op0, const SrcOp &Op1);
+
   /// Build and insert \p Res, \p CarryOut = G_UADDE \p Op0,
   /// \p Op1, \p CarryIn
   ///
@@ -458,6 +484,25 @@ public:
   /// \return The newly created instruction.
   MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op);
 
+  /// Build and insert a G_PTRTOINT instruction.
+  MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src) {
+    return buildInstr(TargetOpcode::G_PTRTOINT, {Dst}, {Src});
+  }
+
+  /// Build and insert \p Dst = G_BITCAST \p Src
+  MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src) {
+    return buildInstr(TargetOpcode::G_BITCAST, {Dst}, {Src});
+  }
+
+  /// \return The opcode of the extension the target wants to use for boolean
+  /// values.
+  unsigned getBoolExtOp(bool IsVec, bool IsFP) const;
+
+  // Build and insert \p Res = G_ANYEXT \p Op, \p Res = G_SEXT \p Op, or \p Res
+  // = G_ZEXT \p Op depending on how the target wants to extend boolean values.
+  MachineInstrBuilder buildBoolExt(const DstOp &Res, const SrcOp &Op,
+                                   bool IsFP);
+
   /// Build and insert \p Res = G_ZEXT \p Op
   ///
   /// G_ZEXT produces a register of the specified width, with bits 0 to
@@ -538,7 +583,7 @@ public:
   ///      depend on bit 0 (for now).
   ///
   /// \return The newly created instruction.
-  MachineInstrBuilder buildBrCond(unsigned Tst, MachineBasicBlock &Dest);
+  MachineInstrBuilder buildBrCond(Register Tst, MachineBasicBlock &Dest);
 
   /// Build and insert G_BRINDIRECT \p Tgt
   ///
@@ -548,7 +593,21 @@ public:
   /// \pre \p Tgt must be a generic virtual register with pointer type.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildBrIndirect(unsigned Tgt);
+  MachineInstrBuilder buildBrIndirect(Register Tgt);
+
+  /// Build and insert G_BRJT \p TablePtr, \p JTI, \p IndexReg
+  ///
+  /// G_BRJT is a jump table branch using a table base pointer \p TablePtr,
+  /// jump table index \p JTI and index \p IndexReg
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p TablePtr must be a generic virtual register with pointer type.
+  /// \pre \p JTI must be be a jump table index.
+  /// \pre \p IndexReg must be a generic virtual register with pointer type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildBrJT(Register TablePtr, unsigned JTI,
+                                Register IndexReg);
 
   /// Build and insert \p Res = G_CONSTANT \p Val
   ///
@@ -572,6 +631,7 @@ public:
   ///
   /// \return The newly created instruction.
   MachineInstrBuilder buildConstant(const DstOp &Res, int64_t Val);
+  MachineInstrBuilder buildConstant(const DstOp &Res, const APInt &Val);
 
   /// Build and insert \p Res = G_FCONSTANT \p Val
   ///
@@ -586,6 +646,7 @@ public:
                                              const ConstantFP &Val);
 
   MachineInstrBuilder buildFConstant(const DstOp &Res, double Val);
+  MachineInstrBuilder buildFConstant(const DstOp &Res, const APFloat &Val);
 
   /// Build and insert \p Res = COPY Op
   ///
@@ -605,7 +666,7 @@ public:
   /// \pre \p Addr must be a generic virtual register with pointer type.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildLoad(unsigned Res, unsigned Addr,
+  MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr,
                                 MachineMemOperand &MMO);
 
   /// Build and insert `Res = <opcode> Addr, MMO`.
@@ -617,8 +678,8 @@ public:
   /// \pre \p Addr must be a generic virtual register with pointer type.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildLoadInstr(unsigned Opcode, unsigned Res,
-                                     unsigned Addr, MachineMemOperand &MMO);
+  MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res,
+                                     const SrcOp &Addr, MachineMemOperand &MMO);
 
   /// Build and insert `G_STORE Val, Addr, MMO`.
   ///
@@ -629,7 +690,7 @@ public:
   /// \pre \p Addr must be a generic virtual register with pointer type.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildStore(unsigned Val, unsigned Addr,
+  MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr,
                                  MachineMemOperand &MMO);
 
   /// Build and insert `Res0, ... = G_EXTRACT Src, Idx0`.
@@ -638,7 +699,7 @@ public:
   /// \pre \p Res and \p Src must be generic virtual registers.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildExtract(unsigned Res, unsigned Src, uint64_t Index);
+  MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index);
 
   /// Build and insert \p Res = IMPLICIT_DEF.
   MachineInstrBuilder buildUndef(const DstOp &Res);
@@ -656,7 +717,7 @@ public:
   /// \pre The bits defined by each Op (derived from index and scalar size) must
   ///      not overlap.
   /// \pre \p Indices must be in ascending order of bit position.
-  void buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
+  void buildSequence(Register Res, ArrayRef<Register> Ops,
                      ArrayRef<uint64_t> Indices);
 
   /// Build and insert \p Res = G_MERGE_VALUES \p Op0, ...
@@ -670,7 +731,7 @@ public:
   /// \pre The type of all \p Ops registers must be identical.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef<unsigned> Ops);
+  MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef<Register> Ops);
 
   /// Build and insert \p Res0, ... = G_UNMERGE_VALUES \p Op
   ///
@@ -683,7 +744,10 @@ public:
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
   MachineInstrBuilder buildUnmerge(ArrayRef<LLT> Res, const SrcOp &Op);
-  MachineInstrBuilder buildUnmerge(ArrayRef<unsigned> Res, const SrcOp &Op);
+  MachineInstrBuilder buildUnmerge(ArrayRef<Register> Res, const SrcOp &Op);
+
+  /// Build and insert an unmerge of \p Res sized pieces to cover \p Op
+  MachineInstrBuilder buildUnmerge(LLT Res, const SrcOp &Op);
 
   /// Build and insert \p Res = G_BUILD_VECTOR \p Op0, ...
   ///
@@ -695,7 +759,12 @@ public:
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
   MachineInstrBuilder buildBuildVector(const DstOp &Res,
-                                       ArrayRef<unsigned> Ops);
+                                       ArrayRef<Register> Ops);
+
+  /// Build and insert \p Res = G_BUILD_VECTOR with \p Src replicated to fill
+  /// the number of elements
+  MachineInstrBuilder buildSplatVector(const DstOp &Res,
+                                       const SrcOp &Src);
 
   /// Build and insert \p Res = G_BUILD_VECTOR_TRUNC \p Op0, ...
   ///
@@ -711,7 +780,7 @@ public:
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
   MachineInstrBuilder buildBuildVectorTrunc(const DstOp &Res,
-                                            ArrayRef<unsigned> Ops);
+                                            ArrayRef<Register> Ops);
 
   /// Build and insert \p Res = G_CONCAT_VECTORS \p Op0, ...
   ///
@@ -725,10 +794,10 @@ public:
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
   MachineInstrBuilder buildConcatVectors(const DstOp &Res,
-                                         ArrayRef<unsigned> Ops);
+                                         ArrayRef<Register> Ops);
 
-  MachineInstrBuilder buildInsert(unsigned Res, unsigned Src,
-                                  unsigned Op, unsigned Index);
+  MachineInstrBuilder buildInsert(Register Res, Register Src,
+                                  Register Op, unsigned Index);
 
   /// Build and insert either a G_INTRINSIC (if \p HasSideEffects is false) or
   /// G_INTRINSIC_W_SIDE_EFFECTS instruction. Its first operand will be the
@@ -740,7 +809,9 @@ public:
   /// \pre setBasicBlock or setMI must have been called.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, unsigned Res,
+  MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef<Register> Res,
+                                     bool HasSideEffects);
+  MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef<DstOp> Res,
                                      bool HasSideEffects);
 
   /// Build and insert \p Res = G_FPTRUNC \p Op
@@ -855,8 +926,8 @@ public:
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
   MachineInstrBuilder
-  buildAtomicCmpXchgWithSuccess(unsigned OldValRes, unsigned SuccessRes,
-                                unsigned Addr, unsigned CmpVal, unsigned NewVal,
+  buildAtomicCmpXchgWithSuccess(Register OldValRes, Register SuccessRes,
+                                Register Addr, Register CmpVal, Register NewVal,
                                 MachineMemOperand &MMO);
 
   /// Build and insert `OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal,
@@ -873,8 +944,8 @@ public:
   ///      registers of the same type.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
-                                         unsigned CmpVal, unsigned NewVal,
+  MachineInstrBuilder buildAtomicCmpXchg(Register OldValRes, Register Addr,
+                                         Register CmpVal, Register NewVal,
                                          MachineMemOperand &MMO);
 
   /// Build and insert `OldValRes<def> = G_ATOMICRMW_<Opcode> Addr, Val, MMO`.
@@ -890,8 +961,8 @@ public:
   ///      same type.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildAtomicRMW(unsigned Opcode, unsigned OldValRes,
-                                     unsigned Addr, unsigned Val,
+  MachineInstrBuilder buildAtomicRMW(unsigned Opcode, Register OldValRes,
+                                     Register Addr, Register Val,
                                      MachineMemOperand &MMO);
 
   /// Build and insert `OldValRes<def> = G_ATOMICRMW_XCHG Addr, Val, MMO`.
@@ -906,8 +977,8 @@ public:
   ///      same type.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildAtomicRMWXchg(unsigned OldValRes, unsigned Addr,
-                                         unsigned Val, MachineMemOperand &MMO);
+  MachineInstrBuilder buildAtomicRMWXchg(Register OldValRes, Register Addr,
+                                         Register Val, MachineMemOperand &MMO);
 
   /// Build and insert `OldValRes<def> = G_ATOMICRMW_ADD Addr, Val, MMO`.
   ///
@@ -921,8 +992,8 @@ public:
   ///      same type.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildAtomicRMWAdd(unsigned OldValRes, unsigned Addr,
-                                         unsigned Val, MachineMemOperand &MMO);
+  MachineInstrBuilder buildAtomicRMWAdd(Register OldValRes, Register Addr,
+                                        Register Val, MachineMemOperand &MMO);
 
   /// Build and insert `OldValRes<def> = G_ATOMICRMW_SUB Addr, Val, MMO`.
   ///
@@ -936,8 +1007,8 @@ public:
   ///      same type.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildAtomicRMWSub(unsigned OldValRes, unsigned Addr,
-                                         unsigned Val, MachineMemOperand &MMO);
+  MachineInstrBuilder buildAtomicRMWSub(Register OldValRes, Register Addr,
+                                        Register Val, MachineMemOperand &MMO);
 
   /// Build and insert `OldValRes<def> = G_ATOMICRMW_AND Addr, Val, MMO`.
   ///
@@ -951,8 +1022,8 @@ public:
   ///      same type.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildAtomicRMWAnd(unsigned OldValRes, unsigned Addr,
-                                         unsigned Val, MachineMemOperand &MMO);
+  MachineInstrBuilder buildAtomicRMWAnd(Register OldValRes, Register Addr,
+                                        Register Val, MachineMemOperand &MMO);
 
   /// Build and insert `OldValRes<def> = G_ATOMICRMW_NAND Addr, Val, MMO`.
   ///
@@ -967,8 +1038,8 @@ public:
   ///      same type.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildAtomicRMWNand(unsigned OldValRes, unsigned Addr,
-                                         unsigned Val, MachineMemOperand &MMO);
+  MachineInstrBuilder buildAtomicRMWNand(Register OldValRes, Register Addr,
+                                         Register Val, MachineMemOperand &MMO);
 
   /// Build and insert `OldValRes<def> = G_ATOMICRMW_OR Addr, Val, MMO`.
   ///
@@ -982,8 +1053,8 @@ public:
   ///      same type.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildAtomicRMWOr(unsigned OldValRes, unsigned Addr,
-                                       unsigned Val, MachineMemOperand &MMO);
+  MachineInstrBuilder buildAtomicRMWOr(Register OldValRes, Register Addr,
+                                       Register Val, MachineMemOperand &MMO);
 
   /// Build and insert `OldValRes<def> = G_ATOMICRMW_XOR Addr, Val, MMO`.
   ///
@@ -997,8 +1068,8 @@ public:
   ///      same type.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildAtomicRMWXor(unsigned OldValRes, unsigned Addr,
-                                        unsigned Val, MachineMemOperand &MMO);
+  MachineInstrBuilder buildAtomicRMWXor(Register OldValRes, Register Addr,
+                                        Register Val, MachineMemOperand &MMO);
 
   /// Build and insert `OldValRes<def> = G_ATOMICRMW_MAX Addr, Val, MMO`.
   ///
@@ -1013,8 +1084,8 @@ public:
   ///      same type.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildAtomicRMWMax(unsigned OldValRes, unsigned Addr,
-                                        unsigned Val, MachineMemOperand &MMO);
+  MachineInstrBuilder buildAtomicRMWMax(Register OldValRes, Register Addr,
+                                        Register Val, MachineMemOperand &MMO);
 
   /// Build and insert `OldValRes<def> = G_ATOMICRMW_MIN Addr, Val, MMO`.
   ///
@@ -1029,8 +1100,8 @@ public:
   ///      same type.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildAtomicRMWMin(unsigned OldValRes, unsigned Addr,
-                                        unsigned Val, MachineMemOperand &MMO);
+  MachineInstrBuilder buildAtomicRMWMin(Register OldValRes, Register Addr,
+                                        Register Val, MachineMemOperand &MMO);
 
   /// Build and insert `OldValRes<def> = G_ATOMICRMW_UMAX Addr, Val, MMO`.
   ///
@@ -1045,8 +1116,8 @@ public:
   ///      same type.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildAtomicRMWUmax(unsigned OldValRes, unsigned Addr,
-                                         unsigned Val, MachineMemOperand &MMO);
+  MachineInstrBuilder buildAtomicRMWUmax(Register OldValRes, Register Addr,
+                                         Register Val, MachineMemOperand &MMO);
 
   /// Build and insert `OldValRes<def> = G_ATOMICRMW_UMIN Addr, Val, MMO`.
   ///
@@ -1061,8 +1132,11 @@ public:
   ///      same type.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr,
-                                         unsigned Val, MachineMemOperand &MMO);
+  MachineInstrBuilder buildAtomicRMWUmin(Register OldValRes, Register Addr,
+                                         Register Val, MachineMemOperand &MMO);
+
+  /// Build and insert `G_FENCE Ordering, Scope`.
+  MachineInstrBuilder buildFence(unsigned Ordering, unsigned Scope);
 
   /// Build and insert \p Res = G_BLOCK_ADDR \p BA
   ///
@@ -1072,7 +1146,7 @@ public:
   /// \pre \p Res must be a generic virtual register of a pointer type.
   ///
   /// \return The newly created instruction.
-  MachineInstrBuilder buildBlockAddress(unsigned Res, const BlockAddress *BA);
+  MachineInstrBuilder buildBlockAddress(Register Res, const BlockAddress *BA);
 
   /// Build and insert \p Res = G_ADD \p Op0, \p Op1
   ///
@@ -1124,6 +1198,36 @@ public:
     return buildInstr(TargetOpcode::G_MUL, {Dst}, {Src0, Src1}, Flags);
   }
 
+  MachineInstrBuilder buildUMulH(const DstOp &Dst, const SrcOp &Src0,
+                                 const SrcOp &Src1,
+                                 Optional<unsigned> Flags = None) {
+    return buildInstr(TargetOpcode::G_UMULH, {Dst}, {Src0, Src1}, Flags);
+  }
+
+  MachineInstrBuilder buildSMulH(const DstOp &Dst, const SrcOp &Src0,
+                                 const SrcOp &Src1,
+                                 Optional<unsigned> Flags = None) {
+    return buildInstr(TargetOpcode::G_SMULH, {Dst}, {Src0, Src1}, Flags);
+  }
+
+  MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0,
+                               const SrcOp &Src1,
+                               Optional<unsigned> Flags = None) {
+    return buildInstr(TargetOpcode::G_SHL, {Dst}, {Src0, Src1}, Flags);
+  }
+
+  MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0,
+                                const SrcOp &Src1,
+                                Optional<unsigned> Flags = None) {
+    return buildInstr(TargetOpcode::G_LSHR, {Dst}, {Src0, Src1}, Flags);
+  }
+
+  MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0,
+                                const SrcOp &Src1,
+                                Optional<unsigned> Flags = None) {
+    return buildInstr(TargetOpcode::G_ASHR, {Dst}, {Src0, Src1}, Flags);
+  }
+
   /// Build and insert \p Res = G_AND \p Op0, \p Op1
   ///
   /// G_AND sets \p Res to the bitwise and of integer parameters \p Op0 and \p
@@ -1155,6 +1259,137 @@ public:
     return buildInstr(TargetOpcode::G_OR, {Dst}, {Src0, Src1});
   }
 
+  /// Build and insert \p Res = G_XOR \p Op0, \p Op1
+  MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0,
+                               const SrcOp &Src1) {
+    return buildInstr(TargetOpcode::G_XOR, {Dst}, {Src0, Src1});
+  }
+
+  /// Build and insert a bitwise not,
+  /// \p NegOne = G_CONSTANT -1
+  /// \p Res = G_OR \p Op0, NegOne
+  MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0) {
+    auto NegOne = buildConstant(Dst.getLLTTy(*getMRI()), -1);
+    return buildInstr(TargetOpcode::G_XOR, {Dst}, {Src0, NegOne});
+  }
+
+  /// Build and insert \p Res = G_CTPOP \p Op0, \p Src0
+  MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0) {
+    return buildInstr(TargetOpcode::G_CTPOP, {Dst}, {Src0});
+  }
+
+  /// Build and insert \p Res = G_CTLZ \p Op0, \p Src0
+  MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0) {
+    return buildInstr(TargetOpcode::G_CTLZ, {Dst}, {Src0});
+  }
+
+  /// Build and insert \p Res = G_CTLZ_ZERO_UNDEF \p Op0, \p Src0
+  MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0) {
+    return buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, {Dst}, {Src0});
+  }
+
+  /// Build and insert \p Res = G_CTTZ \p Op0, \p Src0
+  MachineInstrBuilder buildCTTZ(const DstOp &Dst, const SrcOp &Src0) {
+    return buildInstr(TargetOpcode::G_CTTZ, {Dst}, {Src0});
+  }
+
+  /// Build and insert \p Res = G_CTTZ_ZERO_UNDEF \p Op0, \p Src0
+  MachineInstrBuilder buildCTTZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0) {
+    return buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, {Dst}, {Src0});
+  }
+
+  /// Build and insert \p Res = G_FADD \p Op0, \p Op1
+  MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0,
+                                const SrcOp &Src1) {
+    return buildInstr(TargetOpcode::G_FADD, {Dst}, {Src0, Src1});
+  }
+
+  /// Build and insert \p Res = G_FSUB \p Op0, \p Op1
+  MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0,
+                                const SrcOp &Src1) {
+    return buildInstr(TargetOpcode::G_FSUB, {Dst}, {Src0, Src1});
+  }
+
+  /// Build and insert \p Res = G_FMA \p Op0, \p Op1, \p Op2
+  MachineInstrBuilder buildFMA(const DstOp &Dst, const SrcOp &Src0,
+                               const SrcOp &Src1, const SrcOp &Src2) {
+    return buildInstr(TargetOpcode::G_FMA, {Dst}, {Src0, Src1, Src2});
+  }
+
+  /// Build and insert \p Res = G_FNEG \p Op0
+  MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0) {
+    return buildInstr(TargetOpcode::G_FNEG, {Dst}, {Src0});
+  }
+
+  /// Build and insert \p Res = G_FABS \p Op0
+  MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0) {
+    return buildInstr(TargetOpcode::G_FABS, {Dst}, {Src0});
+  }
+
+  /// Build and insert \p Dst = G_FCANONICALIZE \p Src0
+  MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0,
+                                         Optional<unsigned> Flags = None) {
+    return buildInstr(TargetOpcode::G_FCANONICALIZE, {Dst}, {Src0}, Flags);
+  }
+
+  /// Build and insert \p Res = G_FCOPYSIGN \p Op0, \p Op1
+  MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0,
+                                     const SrcOp &Src1) {
+    return buildInstr(TargetOpcode::G_FCOPYSIGN, {Dst}, {Src0, Src1});
+  }
+
+  /// Build and insert \p Res = G_UITOFP \p Src0
+  MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0) {
+    return buildInstr(TargetOpcode::G_UITOFP, {Dst}, {Src0});
+  }
+
+  /// Build and insert \p Res = G_SITOFP \p Src0
+  MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0) {
+    return buildInstr(TargetOpcode::G_SITOFP, {Dst}, {Src0});
+  }
+
+  /// Build and insert \p Res = G_FPTOUI \p Src0
+  MachineInstrBuilder buildFPTOUI(const DstOp &Dst, const SrcOp &Src0) {
+    return buildInstr(TargetOpcode::G_FPTOUI, {Dst}, {Src0});
+  }
+
+  /// Build and insert \p Res = G_FPTOSI \p Src0
+  MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0) {
+    return buildInstr(TargetOpcode::G_FPTOSI, {Dst}, {Src0});
+  }
+
+  /// Build and insert \p Res = G_SMIN \p Op0, \p Op1
+  MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0,
+                                const SrcOp &Src1) {
+    return buildInstr(TargetOpcode::G_SMIN, {Dst}, {Src0, Src1});
+  }
+
+  /// Build and insert \p Res = G_SMAX \p Op0, \p Op1
+  MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0,
+                                const SrcOp &Src1) {
+    return buildInstr(TargetOpcode::G_SMAX, {Dst}, {Src0, Src1});
+  }
+
+  /// Build and insert \p Res = G_UMIN \p Op0, \p Op1
+  MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0,
+                                const SrcOp &Src1) {
+    return buildInstr(TargetOpcode::G_UMIN, {Dst}, {Src0, Src1});
+  }
+
+  /// Build and insert \p Res = G_UMAX \p Op0, \p Op1
+  MachineInstrBuilder buildUMax(const DstOp &Dst, const SrcOp &Src0,
+                                const SrcOp &Src1) {
+    return buildInstr(TargetOpcode::G_UMAX, {Dst}, {Src0, Src1});
+  }
+
+  /// Build and insert \p Res = G_JUMP_TABLE \p JTI
+  ///
+  /// G_JUMP_TABLE sets \p Res to the address of the jump table specified by
+  /// the jump table index \p JTI.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildJumpTable(const LLT PtrTy, unsigned JTI);
+
   virtual MachineInstrBuilder buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
                                          ArrayRef<SrcOp> SrcOps,
                                          Optional<unsigned> Flags = None);
diff --git a/include/llvm/CodeGen/GlobalISel/RegBankSelect.h b/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
index c53ae416e60b..d9d076ba312c 100644
--- a/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
+++ b/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
@@ -1,9 +1,8 @@
 //=- llvm/CodeGen/GlobalISel/RegBankSelect.h - Reg Bank Selector --*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -524,7 +523,7 @@ private:
   /// \p OnlyAssign == true means that \p Reg just needs to be assigned a
   /// register bank.  I.e., no repairing is necessary to have the
   /// assignment match.
-  bool assignmentMatch(unsigned Reg,
+  bool assignmentMatch(Register Reg,
                        const RegisterBankInfo::ValueMapping &ValMapping,
                        bool &OnlyAssign) const;
 
@@ -563,7 +562,7 @@ private:
   bool repairReg(MachineOperand &MO,
                  const RegisterBankInfo::ValueMapping &ValMapping,
                  RegBankSelect::RepairingPlacement &RepairPt,
-                 const iterator_range<SmallVectorImpl<unsigned>::const_iterator>
+                 const iterator_range<SmallVectorImpl<Register>::const_iterator>
                      &NewVRegs);
 
   /// Return the cost of the instruction needed to map \p MO to \p ValMapping.
@@ -634,6 +633,11 @@ public:
         MachineFunctionProperties::Property::RegBankSelected);
   }
 
+  MachineFunctionProperties getClearedProperties() const override {
+    return MachineFunctionProperties()
+      .set(MachineFunctionProperties::Property::NoPHIs);
+  }
+
   /// Walk through \p MF and assign a register bank to every virtual register
   /// that are still mapped to nothing.
   /// The target needs to provide a RegisterBankInfo and in particular
diff --git a/include/llvm/CodeGen/GlobalISel/RegisterBank.h b/include/llvm/CodeGen/GlobalISel/RegisterBank.h
index d5612e17393c..f528d1a46012 100644
--- a/include/llvm/CodeGen/GlobalISel/RegisterBank.h
+++ b/include/llvm/CodeGen/GlobalISel/RegisterBank.h
@@ -1,9 +1,8 @@
 //==-- llvm/CodeGen/GlobalISel/RegisterBank.h - Register Bank ----*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
index c33b32b2db40..e84b1c3ea8b1 100644
--- a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
+++ b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/GlobalISel/RegisterBankInfo.h ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,6 +18,7 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/Register.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 #include <initializer_list>
@@ -161,6 +161,10 @@ public:
     const PartialMapping *begin() const { return BreakDown; }
     const PartialMapping *end() const { return BreakDown + NumBreakDowns; }
 
+    /// \return true if all partial mappings are the same size and register
+    /// bank.
+    bool partsAllUniform() const;
+
     /// Check if this ValueMapping is valid.
     bool isValid() const { return BreakDown && NumBreakDowns; }
 
@@ -190,7 +194,7 @@ public:
     unsigned Cost = 0;
 
     /// Mapping of all the operands.
-    const ValueMapping *OperandsMapping;
+    const ValueMapping *OperandsMapping = nullptr;
 
     /// Number of operands.
     unsigned NumOperands = 0;
@@ -207,15 +211,11 @@ public:
     /// The rationale is that it is more efficient for the optimizers
     /// to be able to assume that the mapping of the ith operand is
     /// at the index i.
-    ///
-    /// \pre ID != InvalidMappingID
     InstructionMapping(unsigned ID, unsigned Cost,
                        const ValueMapping *OperandsMapping,
                        unsigned NumOperands)
         : ID(ID), Cost(Cost), OperandsMapping(OperandsMapping),
           NumOperands(NumOperands) {
-      assert(getID() != InvalidMappingID &&
-             "Use the default constructor for invalid mapping");
     }
 
     /// Default constructor.
@@ -282,7 +282,7 @@ public:
     SmallVector<int, 8> OpToNewVRegIdx;
 
     /// Hold the registers that will be used to map MI with InstrMapping.
-    SmallVector<unsigned, 8> NewVRegs;
+    SmallVector<Register, 8> NewVRegs;
 
     /// Current MachineRegisterInfo, used to create new virtual registers.
     MachineRegisterInfo &MRI;
@@ -303,15 +303,15 @@ public:
     /// \return The iterator range for the space created.
     //
     /// \pre getMI().getOperand(OpIdx).isReg()
-    iterator_range<SmallVectorImpl<unsigned>::iterator>
+    iterator_range<SmallVectorImpl<Register>::iterator>
     getVRegsMem(unsigned OpIdx);
 
     /// Get the end iterator for a range starting at \p StartIdx and
     /// spannig \p NumVal in NewVRegs.
     /// \pre StartIdx + NumVal <= NewVRegs.size()
-    SmallVectorImpl<unsigned>::const_iterator
+    SmallVectorImpl<Register>::const_iterator
     getNewVRegsEnd(unsigned StartIdx, unsigned NumVal) const;
-    SmallVectorImpl<unsigned>::iterator getNewVRegsEnd(unsigned StartIdx,
+    SmallVectorImpl<Register>::iterator getNewVRegsEnd(unsigned StartIdx,
                                                        unsigned NumVal);
 
   public:
@@ -357,7 +357,7 @@ public:
     ///
     /// \post the \p PartialMapIdx-th register of the value mapping of the \p
     /// OpIdx-th operand has been set.
-    void setVRegs(unsigned OpIdx, unsigned PartialMapIdx, unsigned NewVReg);
+    void setVRegs(unsigned OpIdx, unsigned PartialMapIdx, Register NewVReg);
 
     /// Get all the virtual registers required to map the \p OpIdx-th operand of
     /// the instruction.
@@ -371,7 +371,7 @@ public:
     ///
     /// \pre getMI().getOperand(OpIdx).isReg()
     /// \pre ForDebug || All partial mappings have been set a register
-    iterator_range<SmallVectorImpl<unsigned>::const_iterator>
+    iterator_range<SmallVectorImpl<Register>::const_iterator>
     getVRegs(unsigned OpIdx, bool ForDebug = false) const;
 
     /// Print this operands mapper on dbgs() stream.
@@ -435,7 +435,7 @@ protected:
   /// Get the MinimalPhysRegClass for Reg.
   /// \pre Reg is a physical register.
   const TargetRegisterClass &
-  getMinimalPhysRegClass(unsigned Reg, const TargetRegisterInfo &TRI) const;
+  getMinimalPhysRegClass(Register Reg, const TargetRegisterInfo &TRI) const;
 
   /// Try to get the mapping of \p MI.
   /// See getInstrMapping for more details on what a mapping represents.
@@ -580,7 +580,7 @@ public:
   /// or a register bank, then this returns nullptr.
   ///
   /// \pre Reg != 0 (NoRegister)
-  const RegisterBank *getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
+  const RegisterBank *getRegBank(Register Reg, const MachineRegisterInfo &MRI,
                                  const TargetRegisterInfo &TRI) const;
 
   /// Get the total number of register banks.
@@ -618,6 +618,21 @@ public:
     return &A != &B;
   }
 
+  /// \returns true if emitting a copy from \p Src to \p Dst is impossible.
+  bool cannotCopy(const RegisterBank &Dst, const RegisterBank &Src,
+                  unsigned Size) const {
+    return copyCost(Dst, Src, Size) == std::numeric_limits<unsigned>::max();
+  }
+
+  /// Get the cost of using \p ValMapping to decompose a register. This is
+  /// similar to ::copyCost, except for cases where multiple copy-like
+  /// operations need to be inserted. If the register is used as a source
+  /// operand and already has a bank assigned, \p CurBank is non-null.
+  virtual unsigned getBreakDownCost(const ValueMapping &ValMapping,
+                                    const RegisterBank *CurBank = nullptr) const {
+    return std::numeric_limits<unsigned>::max();
+  }
+
   /// Constrain the (possibly generic) virtual register \p Reg to \p RC.
   ///
   /// \pre \p Reg is a virtual register that either has a bank or a class.
@@ -626,7 +641,7 @@ public:
   /// \note Use MachineRegisterInfo::constrainRegAttrs instead for any non-isel
   /// purpose, including non-select passes of GlobalISel
   static const TargetRegisterClass *
-  constrainGenericRegister(unsigned Reg, const TargetRegisterClass &RC,
+  constrainGenericRegister(Register Reg, const TargetRegisterClass &RC,
                            MachineRegisterInfo &MRI);
 
   /// Identifier used when the related instruction mapping instance
@@ -711,7 +726,7 @@ public:
   /// virtual register.
   ///
   /// \pre \p Reg != 0 (NoRegister).
-  unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI,
+  unsigned getSizeInBits(Register Reg, const MachineRegisterInfo &MRI,
                          const TargetRegisterInfo &TRI) const;
 
   /// Check that information hold by this instance make sense for the
diff --git a/include/llvm/CodeGen/GlobalISel/Types.h b/include/llvm/CodeGen/GlobalISel/Types.h
index 7b22e343a7f8..4fd7043ba02d 100644
--- a/include/llvm/CodeGen/GlobalISel/Types.h
+++ b/include/llvm/CodeGen/GlobalISel/Types.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/GlobalISel/Types.h - Types used by GISel ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/include/llvm/CodeGen/GlobalISel/Utils.h b/include/llvm/CodeGen/GlobalISel/Utils.h
index 82b791d35b2b..4cdaa48fb689 100644
--- a/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -1,9 +1,8 @@
 //==-- llvm/CodeGen/GlobalISel/Utils.h ---------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,6 +15,7 @@
 #define LLVM_CODEGEN_GLOBALISEL_UTILS_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/Register.h"
 
 namespace llvm {
 
@@ -37,21 +37,37 @@ class ConstantFP;
 class APFloat;
 
 /// Try to constrain Reg to the specified register class. If this fails,
-/// create a new virtual register in the correct class and insert a COPY before
-/// \p InsertPt. The debug location of \p InsertPt is used for the new copy.
+/// create a new virtual register in the correct class.
 ///
 /// \return The virtual register constrained to the right register class.
 unsigned constrainRegToClass(MachineRegisterInfo &MRI,
                              const TargetInstrInfo &TII,
-                             const RegisterBankInfo &RBI,
-                             MachineInstr &InsertPt, unsigned Reg,
+                             const RegisterBankInfo &RBI, unsigned Reg,
                              const TargetRegisterClass &RegClass);
 
+/// Constrain the Register operand OpIdx, so that it is now constrained to the
+/// TargetRegisterClass passed as an argument (RegClass).
+/// If this fails, create a new virtual register in the correct class and
+/// insert a COPY before \p InsertPt if it is a use or after if it is a
+/// definition. The debug location of \p InsertPt is used for the new copy.
+///
+/// \return The virtual register constrained to the right register class.
+unsigned constrainOperandRegClass(const MachineFunction &MF,
+                                  const TargetRegisterInfo &TRI,
+                                  MachineRegisterInfo &MRI,
+                                  const TargetInstrInfo &TII,
+                                  const RegisterBankInfo &RBI,
+                                  MachineInstr &InsertPt,
+                                  const TargetRegisterClass &RegClass,
+                                  const MachineOperand &RegMO, unsigned OpIdx);
+
 /// Try to constrain Reg so that it is usable by argument OpIdx of the
 /// provided MCInstrDesc \p II. If this fails, create a new virtual
-/// register in the correct class and insert a COPY before \p InsertPt.
-/// This is equivalent to constrainRegToClass() with RegClass obtained from the
-/// MCInstrDesc. The debug location of \p InsertPt is used for the new copy.
+/// register in the correct class and insert a COPY before \p InsertPt
+/// if it is a use or after if it is a definition.
+/// This is equivalent to constrainOperandRegClass(..., RegClass, ...)
+/// with RegClass obtained from the MCInstrDesc. The debug location of \p
+/// InsertPt is used for the new copy.
 ///
 /// \return The virtual register constrained to the right register class.
 unsigned constrainOperandRegClass(const MachineFunction &MF,
@@ -90,17 +106,40 @@ void reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
                         const char *PassName, StringRef Msg,
                         const MachineInstr &MI);
 
+/// If \p VReg is defined by a G_CONSTANT fits in int64_t
+/// returns it.
 Optional<int64_t> getConstantVRegVal(unsigned VReg,
                                      const MachineRegisterInfo &MRI);
+/// Simple struct used to hold a constant integer value and a virtual
+/// register.
+struct ValueAndVReg {
+  int64_t Value;
+  unsigned VReg;
+};
+/// If \p VReg is defined by a statically evaluable chain of
+/// instructions rooted on a G_CONSTANT (\p LookThroughInstrs == true)
+/// and that constant fits in int64_t, returns its value as well as
+/// the virtual register defined by this G_CONSTANT.
+/// When \p LookThroughInstrs == false, this function behaves like
+/// getConstantVRegVal.
+Optional<ValueAndVReg>
+getConstantVRegValWithLookThrough(unsigned VReg, const MachineRegisterInfo &MRI,
+                                  bool LookThroughInstrs = true);
 const ConstantFP* getConstantFPVRegVal(unsigned VReg,
                                        const MachineRegisterInfo &MRI);
 
 /// See if Reg is defined by an single def instruction that is
 /// Opcode. Also try to do trivial folding if it's a COPY with
 /// same types. Returns null otherwise.
-MachineInstr *getOpcodeDef(unsigned Opcode, unsigned Reg,
+MachineInstr *getOpcodeDef(unsigned Opcode, Register Reg,
                            const MachineRegisterInfo &MRI);
 
+/// Find the def instruction for \p Reg, folding away any trivial copies. Note
+/// it may still return a COPY, if it changes the type. May return nullptr if \p
+/// Reg is not a generic virtual register.
+MachineInstr *getDefIgnoringCopies(Register Reg,
+                                   const MachineRegisterInfo &MRI);
+
 /// Returns an APFloat from Val converted to the appropriate size.
 APFloat getAPFloatFromSize(double Val, unsigned Size);
 
@@ -111,5 +150,16 @@ void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU);
 Optional<APInt> ConstantFoldBinOp(unsigned Opcode, const unsigned Op1,
                                   const unsigned Op2,
                                   const MachineRegisterInfo &MRI);
+
+/// Returns true if \p Val can be assumed to never be a NaN. If \p SNaN is true,
+/// this returns if \p Val can be assumed to never be a signaling NaN.
+bool isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
+                     bool SNaN = false);
+
+/// Returns true if \p Val can be assumed to never be a signaling NaN.
+inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI) {
+  return isKnownNeverNaN(Val, MRI, true);
+}
+
 } // End namespace llvm.
 #endif
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index 9c918ae1104f..acf27dcc5fab 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/ISDOpcodes.h - CodeGen opcodes -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -272,12 +271,17 @@ namespace ISD {
     /// resulting value is this minimum value.
     SSUBSAT, USUBSAT,
 
-    /// RESULT = SMULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on
+    /// RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on
     /// 2 integers with the same width and scale. SCALE represents the scale of
     /// both operands as fixed point numbers. This SCALE parameter must be a
     /// constant integer. A scale of zero is effectively performing
     /// multiplication on 2 integers.
-    SMULFIX,
+    SMULFIX, UMULFIX,
+
+    /// Same as the corresponding unsaturated fixed point instructions, but the
+    /// result is clamped between the min and max values representable by the
+    /// bits of the first 2 operands.
+    SMULFIXSAT,
 
     /// Simple binary floating point operators.
     FADD, FSUB, FMUL, FDIV, FREM,
@@ -298,6 +302,26 @@ namespace ISD {
     STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM,
     STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND, STRICT_FTRUNC,
 
+    /// X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating 
+    /// point type down to the precision of the destination VT.  TRUNC is a 
+    /// flag, which is always an integer that is zero or one.  If TRUNC is 0,
+    /// this is a normal rounding, if it is 1, this FP_ROUND is known to not
+    /// change the value of Y.
+    ///
+    /// The TRUNC = 1 case is used in cases where we know that the value will
+    /// not be modified by the node, because Y is not using any of the extra
+    /// precision of source type.  This allows certain transformations like
+    /// STRICT_FP_EXTEND(STRICT_FP_ROUND(X,1)) -> X which are not safe for
+    /// STRICT_FP_EXTEND(STRICT_FP_ROUND(X,0)) because the extra bits aren't
+    /// removed.
+    /// It is used to limit optimizations while the DAG is being optimized.
+    STRICT_FP_ROUND,
+
+    /// X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP
+    /// type.
+    /// It is used to limit optimizations while the DAG is being optimized.
+    STRICT_FP_EXTEND,
+
     /// FMA - Perform a * b + c with no intermediate rounding step.
     FMA,
 
@@ -580,10 +604,14 @@ namespace ISD {
     /// is often a storage-only type but has native conversions.
     FP16_TO_FP, FP_TO_FP16,
 
-    /// Perform various unary floating-point operations inspired by libm.
+    /// Perform various unary floating-point operations inspired by libm. For
+    /// FPOWI, the result is undefined if if the integer operand doesn't fit
+    /// into 32 bits.
     FNEG, FABS, FSQRT, FCBRT, FSIN, FCOS, FPOWI, FPOW,
     FLOG, FLOG2, FLOG10, FEXP, FEXP2,
     FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR,
+    LROUND, LLROUND, LRINT, LLRINT,
+
     /// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two
     /// values.
     //
@@ -666,6 +694,9 @@ namespace ISD {
     /// SDOperands.
     INLINEASM,
 
+    /// INLINEASM_BR - Terminator version of inline asm. Used by asm-goto.
+    INLINEASM_BR,
+
     /// EH_LABEL - Represents a label in mid basic block used to track
     /// locations needed for debug and exception handling tables.  These nodes
     /// take a chain as input and return a chain.
@@ -819,6 +850,8 @@ namespace ISD {
     ATOMIC_LOAD_MAX,
     ATOMIC_LOAD_UMIN,
     ATOMIC_LOAD_UMAX,
+    ATOMIC_LOAD_FADD,
+    ATOMIC_LOAD_FSUB,
 
     // Masked load and store - consecutive vector load and store operations
     // with additional mask operand that prevents memory accesses to the
@@ -866,11 +899,14 @@ namespace ISD {
     VECREDUCE_STRICT_FADD, VECREDUCE_STRICT_FMUL,
     /// These reductions are non-strict, and have a single vector operand.
     VECREDUCE_FADD, VECREDUCE_FMUL,
+    /// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
+    VECREDUCE_FMAX, VECREDUCE_FMIN,
+    /// Integer reductions may have a result type larger than the vector element
+    /// type. However, the reduction is performed using the vector element type
+    /// and the value in the top bits is unspecified.
     VECREDUCE_ADD, VECREDUCE_MUL,
     VECREDUCE_AND, VECREDUCE_OR, VECREDUCE_XOR,
     VECREDUCE_SMAX, VECREDUCE_SMIN, VECREDUCE_UMAX, VECREDUCE_UMIN,
-    /// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
-    VECREDUCE_FMAX, VECREDUCE_FMIN,
 
     /// BUILTIN_OP_END - This must be the last enum value in this list.
     /// The target-specific pre-isel opcode values start here.
diff --git a/include/llvm/CodeGen/IntrinsicLowering.h b/include/llvm/CodeGen/IntrinsicLowering.h
index 597d684909c1..daf2d9a47801 100644
--- a/include/llvm/CodeGen/IntrinsicLowering.h
+++ b/include/llvm/CodeGen/IntrinsicLowering.h
@@ -1,9 +1,8 @@
 //===-- IntrinsicLowering.h - Intrinsic Function Lowering -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -31,10 +30,6 @@ class IntrinsicLowering {
 public:
   explicit IntrinsicLowering(const DataLayout &DL) : DL(DL), Warned(false) {}
 
-  /// Add all of the prototypes that might be needed by an intrinsic lowering
-  /// implementation to be inserted into the module specified.
-  void AddPrototypes(Module &M);
-
   /// Replace a call to the specified intrinsic function.
   /// If an intrinsic function must be implemented by the code generator
   /// (such as va_start), this function should print a message and abort.
diff --git a/include/llvm/CodeGen/LatencyPriorityQueue.h b/include/llvm/CodeGen/LatencyPriorityQueue.h
index 9b8d83ce77ca..95f4c6473542 100644
--- a/include/llvm/CodeGen/LatencyPriorityQueue.h
+++ b/include/llvm/CodeGen/LatencyPriorityQueue.h
@@ -1,9 +1,8 @@
 //===---- LatencyPriorityQueue.h - A latency-oriented priority queue ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h b/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h
index 221f16a03f16..ca99c6c89b19 100644
--- a/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h
+++ b/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h
@@ -1,9 +1,8 @@
 ///===- LazyMachineBlockFrequencyInfo.h - Lazy Block Frequency -*- C++ -*--===//
 ///
-///                     The LLVM Compiler Infrastructure
-///
-/// This file is distributed under the University of Illinois Open Source
-/// License. See LICENSE.TXT for details.
+/// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+/// See https://llvm.org/LICENSE.txt for license information.
+/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 ///
 ///===---------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/CodeGen/LexicalScopes.h b/include/llvm/CodeGen/LexicalScopes.h
index 3ba503487823..253d4734995b 100644
--- a/include/llvm/CodeGen/LexicalScopes.h
+++ b/include/llvm/CodeGen/LexicalScopes.h
@@ -1,9 +1,8 @@
 //===- LexicalScopes.cpp - Collecting lexical scope info --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/LinkAllAsmWriterComponents.h b/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
index 38fcb37b1e69..75a5c359630e 100644
--- a/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
+++ b/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
@@ -1,9 +1,8 @@
 //===- llvm/Codegen/LinkAllAsmWriterComponents.h ----------------*- C++ -*-===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/LinkAllCodegenComponents.h b/include/llvm/CodeGen/LinkAllCodegenComponents.h
index 18c13ca8f598..56c93b24147e 100644
--- a/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -1,9 +1,8 @@
 //===- llvm/Codegen/LinkAllCodegenComponents.h ------------------*- C++ -*-===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index cdf9ad2588cf..8bb88165d3e1 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/LiveInterval.h - Interval representation ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -606,6 +605,44 @@ namespace llvm {
     /// activated in the constructor of the live range.
     void flushSegmentSet();
 
+    /// Stores indexes from the input index sequence R at which this LiveRange
+    /// is live to the output O iterator.
+    /// R is a range of _ascending sorted_ _random_ access iterators
+    /// to the input indexes. Indexes stored at O are ascending sorted so it
+    /// can be used directly in the subsequent search (for example for
+    /// subranges). Returns true if found at least one index.
+    template <typename Range, typename OutputIt>
+    bool findIndexesLiveAt(Range &&R, OutputIt O) const {
+      assert(std::is_sorted(R.begin(), R.end()));
+      auto Idx = R.begin(), EndIdx = R.end();
+      auto Seg = segments.begin(), EndSeg = segments.end();
+      bool Found = false;
+      while (Idx != EndIdx && Seg != EndSeg) {
+        // if the Seg is lower find first segment that is above Idx using binary
+        // search
+        if (Seg->end <= *Idx) {
+          Seg = std::upper_bound(++Seg, EndSeg, *Idx,
+            [=](typename std::remove_reference<decltype(*Idx)>::type V,
+                const typename std::remove_reference<decltype(*Seg)>::type &S) {
+              return V < S.end;
+            });
+          if (Seg == EndSeg)
+            break;
+        }
+        auto NotLessStart = std::lower_bound(Idx, EndIdx, Seg->start);
+        if (NotLessStart == EndIdx)
+          break;
+        auto NotLessEnd = std::lower_bound(NotLessStart, EndIdx, Seg->end);
+        if (NotLessEnd != NotLessStart) {
+          Found = true;
+          O = std::copy(NotLessStart, NotLessEnd, O);
+        }
+        Idx = NotLessEnd;
+        ++Seg;
+      }
+      return Found;
+    }
+
     void print(raw_ostream &OS) const;
     void dump() const;
 
@@ -790,8 +827,15 @@ namespace llvm {
     ///    L000F, refining for mask L0018. Will split the L00F0 lane into
     ///    L00E0 and L0010 and the L000F lane into L0007 and L0008. The Mod
     ///    function will be applied to the L0010 and L0008 subranges.
+    ///
+    /// \p Indexes and \p TRI are required to clean up the VNIs that
+    /// don't defne the related lane masks after they get shrunk. E.g.,
+    /// when L000F gets split into L0007 and L0008 maybe only a subset
+    /// of the VNIs that defined L000F defines L0007.
     void refineSubRanges(BumpPtrAllocator &Allocator, LaneBitmask LaneMask,
-                         std::function<void(LiveInterval::SubRange&)> Apply);
+                         std::function<void(LiveInterval::SubRange &)> Apply,
+                         const SlotIndexes &Indexes,
+                         const TargetRegisterInfo &TRI);
 
     bool operator<(const LiveInterval& other) const {
       const SlotIndex &thisIndex = beginIndex();
diff --git a/include/llvm/CodeGen/LiveIntervalUnion.h b/include/llvm/CodeGen/LiveIntervalUnion.h
index 9e2799bd4414..05506d2c3bc6 100644
--- a/include/llvm/CodeGen/LiveIntervalUnion.h
+++ b/include/llvm/CodeGen/LiveIntervalUnion.h
@@ -1,9 +1,8 @@
 //===- LiveIntervalUnion.h - Live interval union data struct ---*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/LiveIntervals.h b/include/llvm/CodeGen/LiveIntervals.h
index 16ab1dc475c4..588b0f9cf39c 100644
--- a/include/llvm/CodeGen/LiveIntervals.h
+++ b/include/llvm/CodeGen/LiveIntervals.h
@@ -1,9 +1,8 @@
 //===- LiveIntervals.h - Live Interval Analysis -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -418,6 +417,15 @@ class VirtRegMap;
       RegUnitRanges[Unit] = nullptr;
     }
 
+    /// Remove associated live ranges for the register units associated with \p
+    /// Reg. Subsequent uses should rely on on-demand recomputation.  \note This
+    /// method can result in inconsistent liveness tracking if multiple phyical
+    /// registers share a regunit, and should be used cautiously.
+    void removeAllRegUnitsForPhysReg(unsigned Reg) {
+      for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+        removeRegUnit(*Units);
+    }
+
     /// Remove value numbers and related live segments starting at position
     /// \p Pos that are part of any liverange of physical register \p Reg or one
     /// of its subregisters.
diff --git a/include/llvm/CodeGen/LivePhysRegs.h b/include/llvm/CodeGen/LivePhysRegs.h
index 7312902e21b7..50da0b3d5c48 100644
--- a/include/llvm/CodeGen/LivePhysRegs.h
+++ b/include/llvm/CodeGen/LivePhysRegs.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/LivePhysRegs.h - Live Physical Register Set -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h
index 53830297c525..6519937ec071 100644
--- a/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/include/llvm/CodeGen/LiveRangeEdit.h
@@ -1,9 +1,8 @@
 //===- LiveRangeEdit.h - Basic tools for split and spill --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/LiveRegMatrix.h b/include/llvm/CodeGen/LiveRegMatrix.h
index f62a55c73085..ab4d44f9a611 100644
--- a/include/llvm/CodeGen/LiveRegMatrix.h
+++ b/include/llvm/CodeGen/LiveRegMatrix.h
@@ -1,9 +1,8 @@
 //===- LiveRegMatrix.h - Track register interference ----------*- C++ -*---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/LiveRegUnits.h b/include/llvm/CodeGen/LiveRegUnits.h
index 5e9dd8b3cdf6..7dbb2feab8bf 100644
--- a/include/llvm/CodeGen/LiveRegUnits.h
+++ b/include/llvm/CodeGen/LiveRegUnits.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/LiveRegUnits.h - Register Unit Set ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/LiveStacks.h b/include/llvm/CodeGen/LiveStacks.h
index 44ed785f7b53..7c4c64d515df 100644
--- a/include/llvm/CodeGen/LiveStacks.h
+++ b/include/llvm/CodeGen/LiveStacks.h
@@ -1,9 +1,8 @@
 //===- LiveStacks.h - Live Stack Slot Analysis ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h
index ed8da8662106..71de306e2942 100644
--- a/include/llvm/CodeGen/LiveVariables.h
+++ b/include/llvm/CodeGen/LiveVariables.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/LiveVariables.h - Live Variable Analysis ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/LoopTraversal.h b/include/llvm/CodeGen/LoopTraversal.h
index 750da0143c0d..e5810ef1ef26 100644
--- a/include/llvm/CodeGen/LoopTraversal.h
+++ b/include/llvm/CodeGen/LoopTraversal.h
@@ -1,9 +1,8 @@
 //==------ llvm/CodeGen/LoopTraversal.h - Loop Traversal -*- C++ -*---------==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/LowLevelType.h b/include/llvm/CodeGen/LowLevelType.h
index a3c5c9329f53..687233e4e168 100644
--- a/include/llvm/CodeGen/LowLevelType.h
+++ b/include/llvm/CodeGen/LowLevelType.h
@@ -1,9 +1,8 @@
 //== llvm/CodeGen/LowLevelType.h ------------------------------- -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/MIRParser/MIParser.h b/include/llvm/CodeGen/MIRParser/MIParser.h
new file mode 100644
index 000000000000..4e32a04551c1
--- /dev/null
+++ b/include/llvm/CodeGen/MIRParser/MIParser.h
@@ -0,0 +1,233 @@
+//===- MIParser.h - Machine Instructions Parser -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the function that parses the machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
+#define LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/Support/Allocator.h"
+
+namespace llvm {
+
+class MachineBasicBlock;
+class MachineFunction;
+class MDNode;
+class RegisterBank;
+struct SlotMapping;
+class SMDiagnostic;
+class SourceMgr;
+class StringRef;
+class TargetRegisterClass;
+class TargetSubtargetInfo;
+
+struct VRegInfo {
+  enum uint8_t {
+    UNKNOWN, NORMAL, GENERIC, REGBANK
+  } Kind = UNKNOWN;
+  bool Explicit = false; ///< VReg was explicitly specified in the .mir file.
+  union {
+    const TargetRegisterClass *RC;
+    const RegisterBank *RegBank;
+  } D;
+  unsigned VReg;
+  unsigned PreferredReg = 0;
+};
+
+using Name2RegClassMap = StringMap<const TargetRegisterClass *>;
+using Name2RegBankMap = StringMap<const RegisterBank *>;
+
+struct PerTargetMIParsingState {
+private:
+  const TargetSubtargetInfo &Subtarget;
+
+  /// Maps from instruction names to op codes.
+  StringMap<unsigned> Names2InstrOpCodes;
+
+  /// Maps from register names to registers.
+  StringMap<unsigned> Names2Regs;
+
+  /// Maps from register mask names to register masks.
+  StringMap<const uint32_t *> Names2RegMasks;
+
+  /// Maps from subregister names to subregister indices.
+  StringMap<unsigned> Names2SubRegIndices;
+
+  /// Maps from target index names to target indices.
+  StringMap<int> Names2TargetIndices;
+
+  /// Maps from direct target flag names to the direct target flag values.
+  StringMap<unsigned> Names2DirectTargetFlags;
+
+  /// Maps from direct target flag names to the bitmask target flag values.
+  StringMap<unsigned> Names2BitmaskTargetFlags;
+
+  /// Maps from MMO target flag names to MMO target flag values.
+  StringMap<MachineMemOperand::Flags> Names2MMOTargetFlags;
+
+  /// Maps from register class names to register classes.
+  Name2RegClassMap Names2RegClasses;
+
+  /// Maps from register bank names to register banks.
+  Name2RegBankMap Names2RegBanks;
+
+  void initNames2InstrOpCodes();
+  void initNames2Regs();
+  void initNames2RegMasks();
+  void initNames2SubRegIndices();
+  void initNames2TargetIndices();
+  void initNames2DirectTargetFlags();
+  void initNames2BitmaskTargetFlags();
+  void initNames2MMOTargetFlags();
+
+  void initNames2RegClasses();
+  void initNames2RegBanks();
+
+public:
+  /// Try to convert an instruction name to an opcode. Return true if the
+  /// instruction name is invalid.
+  bool parseInstrName(StringRef InstrName, unsigned &OpCode);
+
+  /// Try to convert a register name to a register number. Return true if the
+  /// register name is invalid.
+  bool getRegisterByName(StringRef RegName, unsigned &Reg);
+
+  /// Check if the given identifier is a name of a register mask.
+  ///
+  /// Return null if the identifier isn't a register mask.
+  const uint32_t *getRegMask(StringRef Identifier);
+
+  /// Check if the given identifier is a name of a subregister index.
+  ///
+  /// Return 0 if the name isn't a subregister index class.
+  unsigned getSubRegIndex(StringRef Name);
+
+  /// Try to convert a name of target index to the corresponding target index.
+  ///
+  /// Return true if the name isn't a name of a target index.
+  bool getTargetIndex(StringRef Name, int &Index);
+
+  /// Try to convert a name of a direct target flag to the corresponding
+  /// target flag.
+  ///
+  /// Return true if the name isn't a name of a direct flag.
+  bool getDirectTargetFlag(StringRef Name, unsigned &Flag);
+
+  /// Try to convert a name of a bitmask target flag to the corresponding
+  /// target flag.
+  ///
+  /// Return true if the name isn't a name of a bitmask target flag.
+  bool getBitmaskTargetFlag(StringRef Name, unsigned &Flag);
+
+  /// Try to convert a name of a MachineMemOperand target flag to the
+  /// corresponding target flag.
+  ///
+  /// Return true if the name isn't a name of a target MMO flag.
+  bool getMMOTargetFlag(StringRef Name, MachineMemOperand::Flags &Flag);
+
+  /// Check if the given identifier is a name of a register class.
+  ///
+  /// Return null if the name isn't a register class.
+  const TargetRegisterClass *getRegClass(StringRef Name);
+
+  /// Check if the given identifier is a name of a register bank.
+  ///
+  /// Return null if the name isn't a register bank.
+  const RegisterBank *getRegBank(StringRef Name);
+
+  PerTargetMIParsingState(const TargetSubtargetInfo &STI)
+    : Subtarget(STI) {
+    initNames2RegClasses();
+    initNames2RegBanks();
+  }
+
+  ~PerTargetMIParsingState() = default;
+
+  void setTarget(const TargetSubtargetInfo &NewSubtarget);
+};
+
+struct PerFunctionMIParsingState {
+  BumpPtrAllocator Allocator;
+  MachineFunction &MF;
+  SourceMgr *SM;
+  const SlotMapping &IRSlots;
+  PerTargetMIParsingState &Target;
+
+  DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
+  DenseMap<unsigned, VRegInfo *> VRegInfos;
+  StringMap<VRegInfo *> VRegInfosNamed;
+  DenseMap<unsigned, int> FixedStackObjectSlots;
+  DenseMap<unsigned, int> StackObjectSlots;
+  DenseMap<unsigned, unsigned> ConstantPoolSlots;
+  DenseMap<unsigned, unsigned> JumpTableSlots;
+
+  PerFunctionMIParsingState(MachineFunction &MF, SourceMgr &SM,
+                            const SlotMapping &IRSlots,
+                            PerTargetMIParsingState &Target);
+
+  VRegInfo &getVRegInfo(unsigned Num);
+  VRegInfo &getVRegInfoNamed(StringRef RegName);
+};
+
+/// Parse the machine basic block definitions, and skip the machine
+/// instructions.
+///
+/// This function runs the first parsing pass on the machine function's body.
+/// It parses only the machine basic block definitions and creates the machine
+/// basic blocks in the given machine function.
+///
+/// The machine instructions aren't parsed during the first pass because all
+/// the machine basic blocks aren't defined yet - this makes it impossible to
+/// resolve the machine basic block references.
+///
+/// Return true if an error occurred.
+bool parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS,
+                                       StringRef Src, SMDiagnostic &Error);
+
+/// Parse the machine instructions.
+///
+/// This function runs the second parsing pass on the machine function's body.
+/// It skips the machine basic block definitions and parses only the machine
+/// instructions and basic block attributes like liveins and successors.
+///
+/// The second parsing pass assumes that the first parsing pass already ran
+/// on the given source string.
+///
+/// Return true if an error occurred.
+bool parseMachineInstructions(PerFunctionMIParsingState &PFS, StringRef Src,
+                              SMDiagnostic &Error);
+
+bool parseMBBReference(PerFunctionMIParsingState &PFS,
+                       MachineBasicBlock *&MBB, StringRef Src,
+                       SMDiagnostic &Error);
+
+bool parseRegisterReference(PerFunctionMIParsingState &PFS,
+                            unsigned &Reg, StringRef Src,
+                            SMDiagnostic &Error);
+
+bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, unsigned &Reg,
+                                 StringRef Src, SMDiagnostic &Error);
+
+bool parseVirtualRegisterReference(PerFunctionMIParsingState &PFS,
+                                   VRegInfo *&Info, StringRef Src,
+                                   SMDiagnostic &Error);
+
+bool parseStackObjectReference(PerFunctionMIParsingState &PFS, int &FI,
+                               StringRef Src, SMDiagnostic &Error);
+
+bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src,
+                 SMDiagnostic &Error);
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
diff --git a/include/llvm/CodeGen/MIRParser/MIRParser.h b/include/llvm/CodeGen/MIRParser/MIRParser.h
index e199a1f69ad7..6a04e48e533c 100644
--- a/include/llvm/CodeGen/MIRParser/MIRParser.h
+++ b/include/llvm/CodeGen/MIRParser/MIRParser.h
@@ -1,9 +1,8 @@
 //===- MIRParser.h - MIR serialization format parser ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/MIRPrinter.h b/include/llvm/CodeGen/MIRPrinter.h
index 078c4b2f6072..a4b03a7fb765 100644
--- a/include/llvm/CodeGen/MIRPrinter.h
+++ b/include/llvm/CodeGen/MIRPrinter.h
@@ -1,9 +1,8 @@
-//===- MIRPrinter.h - MIR serialization format printer --------------------===//
+//===- MIRPrinter.h - MIR serialization format printer ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h
index 98ac81915dc0..94e76a75e8da 100644
--- a/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/include/llvm/CodeGen/MIRYamlMapping.h
@@ -1,9 +1,8 @@
-//===- MIRYAMLMapping.h - Describes the mapping between MIR and YAML ------===//
+//===- MIRYamlMapping.h - Describe mapping between MIR and YAML--*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,6 +17,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/raw_ostream.h"
@@ -37,6 +37,7 @@ struct StringValue {
 
   StringValue() = default;
   StringValue(std::string Value) : Value(std::move(Value)) {}
+  StringValue(const char Val[]) : Value(Val) {}
 
   bool operator==(const StringValue &Other) const {
     return Value == Other.Value;
@@ -212,7 +213,7 @@ struct MachineStackObject {
   int64_t Offset = 0;
   uint64_t Size = 0;
   unsigned Alignment = 0;
-  uint8_t StackID = 0;
+  TargetStackID::Value StackID;
   StringValue CalleeSavedRegister;
   bool CalleeSavedRestored = true;
   Optional<int64_t> LocalOffset;
@@ -252,7 +253,7 @@ template <> struct MappingTraits<MachineStackObject> {
     if (Object.Type != MachineStackObject::VariableSized)
       YamlIO.mapRequired("size", Object.Size);
     YamlIO.mapOptional("alignment", Object.Alignment, (unsigned)0);
-    YamlIO.mapOptional("stack-id", Object.StackID);
+    YamlIO.mapOptional("stack-id", Object.StackID, TargetStackID::Default);
     YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister,
                        StringValue()); // Don't print it out when it's empty.
     YamlIO.mapOptional("callee-saved-restored", Object.CalleeSavedRestored,
@@ -278,7 +279,7 @@ struct FixedMachineStackObject {
   int64_t Offset = 0;
   uint64_t Size = 0;
   unsigned Alignment = 0;
-  uint8_t StackID = 0;
+  TargetStackID::Value StackID;
   bool IsImmutable = false;
   bool IsAliased = false;
   StringValue CalleeSavedRegister;
@@ -308,6 +309,15 @@ struct ScalarEnumerationTraits<FixedMachineStackObject::ObjectType> {
   }
 };
 
+template <>
+struct ScalarEnumerationTraits<TargetStackID::Value> {
+  static void enumeration(yaml::IO &IO, TargetStackID::Value &ID) {
+    IO.enumCase(ID, "default", TargetStackID::Default);
+    IO.enumCase(ID, "sgpr-spill", TargetStackID::SGPRSpill);
+    IO.enumCase(ID, "noalloc", TargetStackID::NoAlloc);
+  }
+};
+
 template <> struct MappingTraits<FixedMachineStackObject> {
   static void mapping(yaml::IO &YamlIO, FixedMachineStackObject &Object) {
     YamlIO.mapRequired("id", Object.ID);
@@ -317,7 +327,7 @@ template <> struct MappingTraits<FixedMachineStackObject> {
     YamlIO.mapOptional("offset", Object.Offset, (int64_t)0);
     YamlIO.mapOptional("size", Object.Size, (uint64_t)0);
     YamlIO.mapOptional("alignment", Object.Alignment, (unsigned)0);
-    YamlIO.mapOptional("stack-id", Object.StackID);
+    YamlIO.mapOptional("stack-id", Object.StackID, TargetStackID::Default);
     if (Object.Type != FixedMachineStackObject::SpillSlot) {
       YamlIO.mapOptional("isImmutable", Object.IsImmutable, false);
       YamlIO.mapOptional("isAliased", Object.IsAliased, false);
@@ -337,6 +347,66 @@ template <> struct MappingTraits<FixedMachineStackObject> {
   static const bool flow = true;
 };
 
+
+/// Serializable representation of CallSiteInfo.
+struct CallSiteInfo {
+  // Representation of call argument and register which is used to
+  // transfer it.
+  struct ArgRegPair {
+    StringValue Reg;
+    uint16_t ArgNo;
+
+    bool operator==(const ArgRegPair &Other) const {
+      return Reg == Other.Reg && ArgNo == Other.ArgNo;
+    }
+  };
+
+  /// Identifies call instruction location in machine function.
+  struct MachineInstrLoc {
+    unsigned BlockNum;
+    unsigned Offset;
+
+    bool operator==(const MachineInstrLoc &Other) const {
+      return BlockNum == Other.BlockNum && Offset == Other.Offset;
+    }
+  };
+
+  MachineInstrLoc CallLocation;
+  std::vector<ArgRegPair> ArgForwardingRegs;
+
+  bool operator==(const CallSiteInfo &Other) const {
+    return CallLocation.BlockNum == Other.CallLocation.BlockNum &&
+           CallLocation.Offset == Other.CallLocation.Offset;
+  }
+};
+
+template <> struct MappingTraits<CallSiteInfo::ArgRegPair> {
+  static void mapping(IO &YamlIO, CallSiteInfo::ArgRegPair &ArgReg) {
+    YamlIO.mapRequired("arg", ArgReg.ArgNo);
+    YamlIO.mapRequired("reg", ArgReg.Reg);
+  }
+
+  static const bool flow = true;
+};
+}
+}
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::CallSiteInfo::ArgRegPair)
+
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<CallSiteInfo> {
+  static void mapping(IO &YamlIO, CallSiteInfo &CSInfo) {
+    YamlIO.mapRequired("bb", CSInfo.CallLocation.BlockNum);
+    YamlIO.mapRequired("offset", CSInfo.CallLocation.Offset);
+    YamlIO.mapOptional("fwdArgRegs", CSInfo.ArgForwardingRegs,
+                       std::vector<CallSiteInfo::ArgRegPair>());
+  }
+
+  static const bool flow = true;
+};
+
 struct MachineConstantPoolValue {
   UnsignedValue ID;
   StringValue Value;
@@ -391,6 +461,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineFunctionLiveIn)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::VirtualRegisterDefinition)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineStackObject)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::FixedMachineStackObject)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::CallSiteInfo)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineConstantPoolValue)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineJumpTable::Entry)
 
@@ -483,6 +554,20 @@ template <> struct MappingTraits<MachineFrameInfo> {
   }
 };
 
+/// Targets should override this in a way that mirrors the implementation of
+/// llvm::MachineFunctionInfo.
+struct MachineFunctionInfo {
+  virtual ~MachineFunctionInfo() {}
+  virtual void mappingImpl(IO &YamlIO) {}
+};
+
+template <> struct MappingTraits<std::unique_ptr<MachineFunctionInfo>> {
+  static void mapping(IO &YamlIO, std::unique_ptr<MachineFunctionInfo> &MFI) {
+    if (MFI)
+      MFI->mappingImpl(YamlIO);
+  }
+};
+
 struct MachineFunction {
   StringRef Name;
   unsigned Alignment = 0;
@@ -504,6 +589,8 @@ struct MachineFunction {
   std::vector<FixedMachineStackObject> FixedStackObjects;
   std::vector<MachineStackObject> StackObjects;
   std::vector<MachineConstantPoolValue> Constants; /// Constant pool.
+  std::unique_ptr<MachineFunctionInfo> MachineFuncInfo;
+  std::vector<CallSiteInfo> CallSitesInfo;
   MachineJumpTable JumpTableInfo;
   BlockStringValue Body;
 };
@@ -530,8 +617,11 @@ template <> struct MappingTraits<MachineFunction> {
                        std::vector<FixedMachineStackObject>());
     YamlIO.mapOptional("stack", MF.StackObjects,
                        std::vector<MachineStackObject>());
+    YamlIO.mapOptional("callSites", MF.CallSitesInfo,
+                       std::vector<CallSiteInfo>());
     YamlIO.mapOptional("constants", MF.Constants,
                        std::vector<MachineConstantPoolValue>());
+    YamlIO.mapOptional("machineFunctionInfo", MF.MachineFuncInfo);
     if (!YamlIO.outputting() || !MF.JumpTableInfo.Entries.empty())
       YamlIO.mapOptional("jumpTable", MF.JumpTableInfo, MachineJumpTable());
     YamlIO.mapOptional("body", MF.Body, BlockStringValue());
diff --git a/include/llvm/CodeGen/MachORelocation.h b/include/llvm/CodeGen/MachORelocation.h
index cbb49695af75..0185c7cbe018 100644
--- a/include/llvm/CodeGen/MachORelocation.h
+++ b/include/llvm/CodeGen/MachORelocation.h
@@ -1,9 +1,8 @@
 //=== MachORelocation.h - Mach-O Relocation Info ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index ec2f270fcb3f..333d0a78618c 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/MachineBasicBlock.h -------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -115,6 +114,10 @@ private:
   /// branch.
   bool AddressTaken = false;
 
+  /// Indicate that this basic block needs its symbol be emitted regardless of
+  /// whether the flow just falls-through to it.
+  bool LabelMustBeEmitted = false;
+
   /// Indicate that this basic block is the entry block of an EH scope, i.e.,
   /// the block that used to have a catchpad or cleanuppad instruction in the
   /// LLVM IR.
@@ -159,6 +162,13 @@ public:
   /// branch.
   void setHasAddressTaken() { AddressTaken = true; }
 
+  /// Test whether this block must have its label emitted.
+  bool hasLabelMustBeEmitted() const { return LabelMustBeEmitted; }
+
+  /// Set this block to reflect that, regardless how we flow to it, we need
+  /// its label be emitted.
+  void setLabelMustBeEmitted() { LabelMustBeEmitted = true; }
+
   /// Return the MachineFunction containing this basic block.
   const MachineFunction *getParent() const { return xParent; }
   MachineFunction *getParent() { return xParent; }
@@ -900,11 +910,11 @@ class MachineInstrSpan {
   MachineBasicBlock::iterator I, B, E;
 
 public:
-  MachineInstrSpan(MachineBasicBlock::iterator I)
-    : MBB(*I->getParent()),
-      I(I),
-      B(I == MBB.begin() ? MBB.end() : std::prev(I)),
-      E(std::next(I)) {}
+  MachineInstrSpan(MachineBasicBlock::iterator I, MachineBasicBlock *BB)
+      : MBB(*BB), I(I), B(I == MBB.begin() ? MBB.end() : std::prev(I)),
+        E(std::next(I)) {
+    assert(I == BB->end() || I->getParent() == BB);
+  }
 
   MachineBasicBlock::iterator begin() {
     return B == MBB.end() ? MBB.begin() : std::next(B);
diff --git a/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
index 5b4b99ca0a5d..a438ecfcc25e 100644
--- a/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
+++ b/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
@@ -1,9 +1,8 @@
 //===- MachineBlockFrequencyInfo.h - MBB Frequency Analysis -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
index 81b0524cf0a4..2b9b2030eb97 100644
--- a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
+++ b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
@@ -1,9 +1,8 @@
 //=- MachineBranchProbabilityInfo.h - Branch Probability Analysis -*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/MachineCombinerPattern.h b/include/llvm/CodeGen/MachineCombinerPattern.h
index 586535f771c2..4f4034baf801 100644
--- a/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -1,10 +1,9 @@
 //===-- llvm/CodeGen/MachineCombinerPattern.h - Instruction pattern supported by
 // combiner  ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/MachineConstantPool.h b/include/llvm/CodeGen/MachineConstantPool.h
index b0b5420a884b..4d07b620a4b4 100644
--- a/include/llvm/CodeGen/MachineConstantPool.h
+++ b/include/llvm/CodeGen/MachineConstantPool.h
@@ -1,9 +1,8 @@
 //===- CodeGen/MachineConstantPool.h - Abstract Constant Pool ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/MachineDominanceFrontier.h b/include/llvm/CodeGen/MachineDominanceFrontier.h
index 75d75bc3669a..f7bbd07a63ab 100644
--- a/include/llvm/CodeGen/MachineDominanceFrontier.h
+++ b/include/llvm/CodeGen/MachineDominanceFrontier.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/MachineDominanceFrontier.h ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h
index e3d3d169db97..d2200080b897 100644
--- a/include/llvm/CodeGen/MachineDominators.h
+++ b/include/llvm/CodeGen/MachineDominators.h
@@ -1,9 +1,8 @@
 //==- llvm/CodeGen/MachineDominators.h - Machine Dom Calculation -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index c2706a21a177..761735120a64 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -1,9 +1,8 @@
 //===-- CodeGen/MachineFrameInfo.h - Abstract Stack Frame Rep. --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -471,7 +470,10 @@ public:
     assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
            "Invalid Object Idx!");
     Objects[ObjectIdx+NumFixedObjects].Alignment = Align;
-    ensureMaxAlignment(Align);
+
+    // Only ensure max alignment for the default stack.
+    if (getStackID(ObjectIdx) == 0)
+      ensureMaxAlignment(Align);
   }
 
   /// Return the underlying Alloca of the specified
@@ -698,6 +700,8 @@ public:
     assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
            "Invalid Object Idx!");
     Objects[ObjectIdx+NumFixedObjects].StackID = ID;
+    // If ID > 0, MaxAlignment may now be overly conservative.
+    // If ID == 0, MaxAlignment will need to be updated separately.
   }
 
   /// Returns true if the specified index corresponds to a dead object.
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index 25edf5bcce51..201c126ee52e 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/MachineFunction.h ---------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -31,11 +30,6 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/MC/MCDwarf.h"
-#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/ArrayRecycler.h"
 #include "llvm/Support/AtomicOrdering.h"
@@ -53,6 +47,7 @@ namespace llvm {
 class BasicBlock;
 class BlockAddress;
 class DataLayout;
+class DebugLoc;
 class DIExpression;
 class DILocalVariable;
 class DILocation;
@@ -67,6 +62,7 @@ class MachineModuleInfo;
 class MachineRegisterInfo;
 class MCContext;
 class MCInstrDesc;
+class MCSymbol;
 class Pass;
 class PseudoSourceValueManager;
 class raw_ostream;
@@ -86,7 +82,7 @@ template <> struct ilist_callback_traits<MachineBasicBlock> {
 
   template <class Iterator>
   void transferNodesFromList(ilist_callback_traits &OldList, Iterator, Iterator) {
-    llvm_unreachable("Never transfer between lists");
+    assert(this == &OldList && "never transfer MBBs between functions");
   }
 };
 
@@ -325,6 +321,10 @@ class MachineFunction {
   /// CodeView label annotations.
   std::vector<std::pair<MCSymbol *, MDNode *>> CodeViewAnnotations;
 
+  /// CodeView heapallocsites.
+  std::vector<std::tuple<MCSymbol*, MCSymbol*, DIType*>>
+      CodeViewHeapAllocSites;
+
   bool CallsEHReturn = false;
   bool CallsUnwindInit = false;
   bool HasEHScopes = false;
@@ -378,9 +378,28 @@ public:
     virtual void MF_HandleRemoval(MachineInstr &MI) = 0;
   };
 
+  /// Structure used to represent pair of argument number after call lowering
+  /// and register used to transfer that argument.
+  /// For now we support only cases when argument is transferred through one
+  /// register.
+  struct ArgRegPair {
+    unsigned Reg;
+    uint16_t ArgNo;
+    ArgRegPair(unsigned R, unsigned Arg) : Reg(R), ArgNo(Arg) {
+      assert(Arg < (1 << 16) && "Arg out of range");
+    }
+  };
+  /// Vector of call argument and its forwarding register.
+  using CallSiteInfo = SmallVector<ArgRegPair, 1>;
+  using CallSiteInfoImpl = SmallVectorImpl<ArgRegPair>;
+
 private:
   Delegate *TheDelegate = nullptr;
 
+  using CallSiteInfoMap = DenseMap<const MachineInstr *, CallSiteInfo>;
+  /// Map a call instruction to call site arguments forwarding info.
+  CallSiteInfoMap CallSitesInfo;
+
   // Callbacks for insertion and removal.
   void handleInsertion(MachineInstr &MI);
   void handleRemoval(MachineInstr &MI);
@@ -443,7 +462,6 @@ public:
   /// getSubtarget - Return the subtarget for which this machine code is being
   /// compiled.
   const TargetSubtargetInfo &getSubtarget() const { return *STI; }
-  void setSubtarget(const TargetSubtargetInfo *ST) { STI = ST; }
 
   /// getSubtarget - This method returns a pointer to the specified type of
   /// TargetSubtargetInfo.  In debug builds, it verifies that the object being
@@ -741,6 +759,12 @@ public:
   MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
                                           const AAMDNodes &AAInfo);
 
+  /// Allocate a new MachineMemOperand by copying an existing one,
+  /// replacing the flags. MachineMemOperands are owned
+  /// by the MachineFunction and need not be explicitly deallocated.
+  MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
+                                          MachineMemOperand::Flags Flags);
+
   using OperandCapacity = ArrayRecycler<MachineOperand>::Capacity;
 
   /// Allocate an array of MachineOperands. This is only intended for use by
@@ -791,10 +815,7 @@ public:
     return FrameInstructions;
   }
 
-  LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst) {
-    FrameInstructions.push_back(Inst);
-    return FrameInstructions.size() - 1;
-  }
+  LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst);
 
   /// \name Exception Handling
   /// \{
@@ -913,6 +934,14 @@ public:
     return CodeViewAnnotations;
   }
 
+  /// Record heapallocsites
+  void addCodeViewHeapAllocSite(MachineInstr *I, MDNode *MD);
+
+  ArrayRef<std::tuple<MCSymbol*, MCSymbol*, DIType*>>
+      getCodeViewHeapAllocSites() const {
+    return CodeViewHeapAllocSites;
+  }
+
   /// Return a reference to the C++ typeinfo for the current function.
   const std::vector<const GlobalValue *> &getTypeInfos() const {
     return TypeInfos;
@@ -936,6 +965,23 @@ public:
   const VariableDbgInfoMapTy &getVariableDbgInfo() const {
     return VariableDbgInfos;
   }
+
+  void addCallArgsForwardingRegs(const MachineInstr *CallI,
+                                 CallSiteInfoImpl &&CallInfo) {
+    assert(CallI->isCall());
+    CallSitesInfo[CallI] = std::move(CallInfo);
+  }
+
+  const CallSiteInfoMap &getCallSitesInfo() const {
+    return CallSitesInfo;
+  }
+
+  /// Update call sites info by deleting entry for \p Old call instruction.
+  /// If \p New is present then transfer \p Old call info to it. This function
+  /// should be called before removing call instruction or before replacing
+  /// call instruction with new one.
+  void updateCallSiteInfo(const MachineInstr *Old,
+                          const MachineInstr *New = nullptr);
 };
 
 //===--------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/MachineFunctionPass.h b/include/llvm/CodeGen/MachineFunctionPass.h
index 6d978daa2018..caaf22c2139e 100644
--- a/include/llvm/CodeGen/MachineFunctionPass.h
+++ b/include/llvm/CodeGen/MachineFunctionPass.h
@@ -1,9 +1,8 @@
 //===-- MachineFunctionPass.h - Pass for MachineFunctions --------*-C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index ea1a2a536fc7..c82c5b137507 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/MachineInstr.h - MachineInstr class ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -25,6 +24,7 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/MC/MCInstrDesc.h"
@@ -103,8 +103,10 @@ public:
                                         // no unsigned wrap.
     NoSWrap      = 1 << 12,             // Instruction supports binary operator
                                         // no signed wrap.
-    IsExact      = 1 << 13              // Instruction supports division is
+    IsExact      = 1 << 13,             // Instruction supports division is
                                         // known to be exact.
+    FPExcept     = 1 << 14,             // Instruction may raise floating-point
+                                        // exceptions.
   };
 
 private:
@@ -831,6 +833,17 @@ public:
     return mayLoad(Type) || mayStore(Type);
   }
 
+  /// Return true if this instruction could possibly raise a floating-point
+  /// exception.  This is the case if the instruction is a floating-point
+  /// instruction that can in principle raise an exception, as indicated
+  /// by the MCID::MayRaiseFPException property, *and* at the same time,
+  /// the instruction is used in a context where we expect floating-point
+  /// exceptions might be enabled, as indicated by the FPExcept MI flag.
+  bool mayRaiseFPException() const {
+    return hasProperty(MCID::MayRaiseFPException) &&
+           getFlag(MachineInstr::MIFlag::FPExcept);
+  }
+
   //===--------------------------------------------------------------------===//
   // Flags that indicate whether an instruction can be modified by a method.
   //===--------------------------------------------------------------------===//
@@ -1006,16 +1019,33 @@ public:
       && getOperand(1).isImm();
   }
 
+  /// A DBG_VALUE is an entry value iff its debug expression contains the
+  /// DW_OP_entry_value DWARF operation.
+  bool isDebugEntryValue() const {
+    return isDebugValue() && getDebugExpression()->isEntryValue();
+  }
+
+  /// Return true if the instruction is a debug value which describes a part of
+  /// a variable as unavailable.
+  bool isUndefDebugValue() const {
+    return isDebugValue() && getOperand(0).isReg() && !getOperand(0).getReg();
+  }
+
   bool isPHI() const {
     return getOpcode() == TargetOpcode::PHI ||
            getOpcode() == TargetOpcode::G_PHI;
   }
   bool isKill() const { return getOpcode() == TargetOpcode::KILL; }
   bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; }
-  bool isInlineAsm() const { return getOpcode() == TargetOpcode::INLINEASM; }
+  bool isInlineAsm() const {
+    return getOpcode() == TargetOpcode::INLINEASM ||
+           getOpcode() == TargetOpcode::INLINEASM_BR;
+  }
 
+  /// FIXME: Seems like a layering violation that the AsmDialect, which is X86
+  /// specific, be attached to a generic MachineInstr.
   bool isMSInlineAsm() const {
-    return getOpcode() == TargetOpcode::INLINEASM && getInlineAsmDialect();
+    return isInlineAsm() && getInlineAsmDialect() == InlineAsm::AD_Intel;
   }
 
   bool isStackAligningInlineAsm() const;
@@ -1197,12 +1227,22 @@ public:
 
   /// Wrapper for findRegisterDefOperandIdx, it returns
   /// a pointer to the MachineOperand rather than an index.
-  MachineOperand *findRegisterDefOperand(unsigned Reg, bool isDead = false,
-                                      const TargetRegisterInfo *TRI = nullptr) {
-    int Idx = findRegisterDefOperandIdx(Reg, isDead, false, TRI);
+  MachineOperand *
+  findRegisterDefOperand(unsigned Reg, bool isDead = false,
+                         bool Overlap = false,
+                         const TargetRegisterInfo *TRI = nullptr) {
+    int Idx = findRegisterDefOperandIdx(Reg, isDead, Overlap, TRI);
     return (Idx == -1) ? nullptr : &getOperand(Idx);
   }
 
+  const MachineOperand *
+  findRegisterDefOperand(unsigned Reg, bool isDead = false,
+                         bool Overlap = false,
+                         const TargetRegisterInfo *TRI = nullptr) const {
+    return const_cast<MachineInstr *>(this)->findRegisterDefOperand(
+        Reg, isDead, Overlap, TRI);
+  }
+
   /// Find the index of the first operand in the
   /// operand list that is used to represent the predicate. It returns -1 if
   /// none is found.
@@ -1364,7 +1404,7 @@ public:
   /// @param AA Optional alias analysis, used to compare memory operands.
   /// @param Other MachineInstr to check aliasing against.
   /// @param UseTBAA Whether to pass TBAA information to alias analysis.
-  bool mayAlias(AliasAnalysis *AA, MachineInstr &Other, bool UseTBAA);
+  bool mayAlias(AliasAnalysis *AA, const MachineInstr &Other, bool UseTBAA) const;
 
   /// Return true if this instruction may have an ordered
   /// or volatile memory reference, or if the information describing the memory
@@ -1400,6 +1440,19 @@ public:
   /// Return true if all the defs of this instruction are dead.
   bool allDefsAreDead() const;
 
+  /// Return a valid size if the instruction is a spill instruction.
+  Optional<unsigned> getSpillSize(const TargetInstrInfo *TII) const;
+
+  /// Return a valid size if the instruction is a folded spill instruction.
+  Optional<unsigned> getFoldedSpillSize(const TargetInstrInfo *TII) const;
+
+  /// Return a valid size if the instruction is a restore instruction.
+  Optional<unsigned> getRestoreSize(const TargetInstrInfo *TII) const;
+
+  /// Return a valid size if the instruction is a folded restore instruction.
+  Optional<unsigned>
+  getFoldedRestoreSize(const TargetInstrInfo *TII) const;
+
   /// Copy implicit register operands from specified
   /// instruction to this instruction.
   void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI);
@@ -1521,11 +1574,17 @@ public:
   /// FIXME: This is not fully implemented yet.
   void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol);
 
+  /// Clone another MachineInstr's pre- and post- instruction symbols and
+  /// replace ours with it.
+  void cloneInstrSymbols(MachineFunction &MF, const MachineInstr &MI);
+
   /// Return the MIFlags which represent both MachineInstrs. This
   /// should be used when merging two MachineInstrs into one. This routine does
   /// not modify the MIFlags of this MachineInstr.
   uint16_t mergeFlagsWith(const MachineInstr& Other) const;
 
+  static uint16_t copyFlagsFromInstruction(const Instruction &I);
+
   /// Copy all flags to MachineInst MIFlags
   void copyIRFlags(const Instruction &I);
 
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index b5e523f655e7..6d7fb72b6bd1 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -1,9 +1,8 @@
 //===- CodeGen/MachineInstrBuilder.h - Simplify creation of MIs --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -80,6 +79,11 @@ public:
   /// explicitly.
   MachineInstr *getInstr() const { return MI; }
 
+  /// Get the register for the operand index.
+  /// The operand at the index should be a register (asserted by
+  /// MachineOperand).
+  Register getReg(unsigned Idx) const { return MI->getOperand(Idx).getReg(); }
+
   /// Add a new virtual register operand.
   const MachineInstrBuilder &addReg(unsigned RegNo, unsigned flags = 0,
                                     unsigned SubReg = 0) const {
@@ -283,6 +287,9 @@ public:
       case MachineOperand::MO_GlobalAddress:
         return addGlobalAddress(Disp.getGlobal(), Disp.getOffset() + off,
                                 TargetFlags);
+      case MachineOperand::MO_BlockAddress:
+        return addBlockAddress(Disp.getBlockAddress(), Disp.getOffset() + off,
+                               TargetFlags);
     }
   }
 
diff --git a/include/llvm/CodeGen/MachineInstrBundle.h b/include/llvm/CodeGen/MachineInstrBundle.h
index b5341fd1ae49..1810d23072d0 100644
--- a/include/llvm/CodeGen/MachineInstrBundle.h
+++ b/include/llvm/CodeGen/MachineInstrBundle.h
@@ -1,9 +1,8 @@
-//===-- CodeGen/MachineInstBundle.h - MI bundle utilities -------*- C++ -*-===//
+//===- llvm/CodeGen/MachineInstrBundle.h - MI bundle utilities --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -62,7 +61,8 @@ inline MachineBasicBlock::instr_iterator getBundleEnd(
     MachineBasicBlock::instr_iterator I) {
   while (I->isBundledWithSucc())
     ++I;
-  return ++I;
+  ++I;
+  return I;
 }
 
 /// Returns an iterator pointing beyond the bundle containing \p I.
@@ -70,7 +70,8 @@ inline MachineBasicBlock::const_instr_iterator getBundleEnd(
     MachineBasicBlock::const_instr_iterator I) {
   while (I->isBundledWithSucc())
     ++I;
-  return ++I;
+  ++I;
+  return I;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/MachineInstrBundleIterator.h b/include/llvm/CodeGen/MachineInstrBundleIterator.h
index 5fe4964ff116..0f59563e7e1b 100644
--- a/include/llvm/CodeGen/MachineInstrBundleIterator.h
+++ b/include/llvm/CodeGen/MachineInstrBundleIterator.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/MachineInstrBundleIterator.h ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/MachineJumpTableInfo.h b/include/llvm/CodeGen/MachineJumpTableInfo.h
index 25a3e6b556a3..11781145b378 100644
--- a/include/llvm/CodeGen/MachineJumpTableInfo.h
+++ b/include/llvm/CodeGen/MachineJumpTableInfo.h
@@ -1,9 +1,8 @@
 //===-- CodeGen/MachineJumpTableInfo.h - Abstract Jump Tables  --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h
index 917fb90380f5..da6df59c739c 100644
--- a/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/include/llvm/CodeGen/MachineLoopInfo.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/MachineLoopInfo.h - Natural Loop Calculator -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h
index 078ef7ca510c..65f706302bc2 100644
--- a/include/llvm/CodeGen/MachineMemOperand.h
+++ b/include/llvm/CodeGen/MachineMemOperand.h
@@ -1,9 +1,8 @@
 //==- llvm/CodeGen/MachineMemOperand.h - MachineMemOperand class -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,8 +18,6 @@
 #include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Metadata.h"
 #include "llvm/IR/Value.h" // PointerLikeTypeTraits<Value*>
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/DataTypes.h"
@@ -223,6 +220,9 @@ public:
   /// Return the size in bytes of the memory reference.
   uint64_t getSize() const { return Size; }
 
+  /// Return the size in bits of the memory reference.
+  uint64_t getSizeInBits() const { return Size * 8; }
+
   /// Return the minimum known alignment in bytes of the actual memory
   /// reference.
   uint64_t getAlignment() const;
@@ -267,13 +267,13 @@ public:
   bool isAtomic() const { return getOrdering() != AtomicOrdering::NotAtomic; }
 
   /// Returns true if this memory operation doesn't have any ordering
-  /// constraints other than normal aliasing. Volatile and atomic memory
-  /// operations can't be reordered.
-  ///
-  /// Currently, we don't model the difference between volatile and atomic
-  /// operations. They should retain their ordering relative to all memory
-  /// operations.
-  bool isUnordered() const { return !isVolatile(); }
+  /// constraints other than normal aliasing. Volatile and (ordered) atomic
+  /// memory operations can't be reordered. 
+  bool isUnordered() const {
+    return (getOrdering() == AtomicOrdering::NotAtomic ||
+            getOrdering() == AtomicOrdering::Unordered) &&
+           !isVolatile();
+  }
 
   /// Update this MachineMemOperand to reflect the alignment of MMO, if it has a
   /// greater alignment. This must only be used when the new alignment applies
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index 4371420bc7a2..4ff5c7fd013a 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/MachineModuleInfo.h ------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -114,10 +113,9 @@ class MachineModuleInfo : public ImmutablePass {
   /// True if debugging information is available in this module.
   bool DbgInfoAvailable;
 
-  /// True if this module calls VarArg function with floating-point arguments.
-  /// This is used to emit an undefined reference to _fltused on Windows
-  /// targets.
-  bool UsesVAFloatArgument;
+  /// True if this module is being built for windows/msvc, and uses floating
+  /// point.  This is used to emit an undefined reference to _fltused.
+  bool UsesMSVCFloatingPoint;
 
   /// True if the module calls the __morestack function indirectly, as is
   /// required under the large code model on x86. This is used to emit
@@ -152,6 +150,8 @@ public:
   bool doInitialization(Module &) override;
   bool doFinalization(Module &) override;
 
+  const LLVMTargetMachine &getTarget() const { return TM; }
+
   const MCContext &getContext() const { return Context; }
   MCContext &getContext() { return Context; }
 
@@ -187,13 +187,9 @@ public:
   bool hasDebugInfo() const { return DbgInfoAvailable; }
   void setDebugInfoAvailability(bool avail) { DbgInfoAvailable = avail; }
 
-  bool usesVAFloatArgument() const {
-    return UsesVAFloatArgument;
-  }
+  bool usesMSVCFloatingPoint() const { return UsesMSVCFloatingPoint; }
 
-  void setUsesVAFloatArgument(bool b) {
-    UsesVAFloatArgument = b;
-  }
+  void setUsesMSVCFloatingPoint(bool b) { UsesMSVCFloatingPoint = b; }
 
   bool usesMorestackAddr() const {
     return UsesMorestackAddr;
@@ -258,14 +254,6 @@ public:
   /// \}
 }; // End class MachineModuleInfo
 
-//===- MMI building helpers -----------------------------------------------===//
-
-/// Determine if any floating-point values are being passed to this variadic
-/// function, and set the MachineModuleInfo's usesVAFloatArgument flag if so.
-/// This flag is used to emit an undefined reference to _fltused on Windows,
-/// which will link in MSVCRT's floating-point support.
-void computeUsesVAFloatArgument(const CallInst &I, MachineModuleInfo &MMI);
-
 } // end namespace llvm
 
 #endif // LLVM_CODEGEN_MACHINEMODULEINFO_H
diff --git a/include/llvm/CodeGen/MachineModuleInfoImpls.h b/include/llvm/CodeGen/MachineModuleInfoImpls.h
index 17df1fa792b7..746e92239613 100644
--- a/include/llvm/CodeGen/MachineModuleInfoImpls.h
+++ b/include/llvm/CodeGen/MachineModuleInfoImpls.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/MachineModuleInfoImpls.h --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index 53e8889d118a..2152c7582e5a 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/MachineOperand.h - MachineOperand class ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,6 +14,7 @@
 #define LLVM_CODEGEN_MACHINEOPERAND_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/Register.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/LowLevelTypeImpl.h"
@@ -346,9 +346,9 @@ public:
   //===--------------------------------------------------------------------===//
 
   /// getReg - Returns the register number.
-  unsigned getReg() const {
+  Register getReg() const {
     assert(isReg() && "This is not a register operand!");
-    return SmallContents.RegNo;
+    return Register(SmallContents.RegNo);
   }
 
   unsigned getSubReg() const {
@@ -684,6 +684,11 @@ public:
     Contents.RegMask = RegMaskPtr;
   }
 
+  void setPredicate(unsigned Predicate) {
+    assert(isPredicate() && "Wrong MachineOperand mutator");
+    Contents.Pred = Predicate;
+  }
+
   //===--------------------------------------------------------------------===//
   // Other methods.
   //===--------------------------------------------------------------------===//
@@ -714,6 +719,10 @@ public:
   /// ChangeToES - Replace this operand with a new external symbol operand.
   void ChangeToES(const char *SymName, unsigned char TargetFlags = 0);
 
+  /// ChangeToGA - Replace this operand with a new global address operand.
+  void ChangeToGA(const GlobalValue *GV, int64_t Offset,
+                  unsigned char TargetFlags = 0);
+
   /// ChangeToMCSymbol - Replace this operand with a new MC symbol operand.
   void ChangeToMCSymbol(MCSymbol *Sym);
 
diff --git a/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
index a7ce870400c2..a461a299917c 100644
--- a/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
+++ b/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
@@ -1,9 +1,8 @@
 ///===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*----===//
 ///
-///                     The LLVM Compiler Infrastructure
-///
-/// This file is distributed under the University of Illinois Open Source
-/// License. See LICENSE.TXT for details.
+/// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+/// See https://llvm.org/LICENSE.txt for license information.
+/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 ///
 ///===---------------------------------------------------------------------===//
 /// \file
@@ -159,9 +158,10 @@ public:
   /// (1) to filter trivial false positives or (2) to provide more context so
   /// that non-trivial false positives can be quickly detected by the user.
   bool allowExtraAnalysis(StringRef PassName) const {
-    return (MF.getFunction().getContext().getDiagnosticsOutputFile() ||
-            MF.getFunction().getContext()
-            .getDiagHandlerPtr()->isAnyRemarkEnabled(PassName));
+    return (
+        MF.getFunction().getContext().getRemarkStreamer() ||
+        MF.getFunction().getContext().getDiagHandlerPtr()->isAnyRemarkEnabled(
+            PassName));
   }
 
   /// Take a lambda that returns a remark which will be emitted.  Second
@@ -172,8 +172,11 @@ public:
     // remarks enabled. We can't currently check whether remarks are requested
     // for the calling pass since that requires actually building the remark.
 
-    if (MF.getFunction().getContext().getDiagnosticsOutputFile() ||
-        MF.getFunction().getContext().getDiagHandlerPtr()->isAnyRemarkEnabled()) {
+    if (MF.getFunction().getContext().getRemarkStreamer() ||
+        MF.getFunction()
+            .getContext()
+            .getDiagHandlerPtr()
+            ->isAnyRemarkEnabled()) {
       auto R = RemarkBuilder();
       emit((DiagnosticInfoOptimizationBase &)R);
     }
diff --git a/include/llvm/CodeGen/MachineOutliner.h b/include/llvm/CodeGen/MachineOutliner.h
index bfd1e994053a..3868fa415579 100644
--- a/include/llvm/CodeGen/MachineOutliner.h
+++ b/include/llvm/CodeGen/MachineOutliner.h
@@ -1,9 +1,8 @@
 //===---- MachineOutliner.h - Outliner data structures ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -172,13 +171,13 @@ public:
 
   /// Represents the size of a sequence in bytes. (Some instructions vary
   /// widely in size, so just counting the instructions isn't very useful.)
-  unsigned SequenceSize;
+  unsigned SequenceSize = 0;
 
   /// Target-defined overhead of constructing a frame for this function.
-  unsigned FrameOverhead;
+  unsigned FrameOverhead = 0;
 
   /// Target-defined identifier for constructing a frame for this function.
-  unsigned FrameConstructionID;
+  unsigned FrameConstructionID = 0;
 
   /// Return the number of candidates for this \p OutlinedFunction.
   unsigned getOccurrenceCount() const { return Candidates.size(); }
diff --git a/include/llvm/CodeGen/MachinePassRegistry.h b/include/llvm/CodeGen/MachinePassRegistry.h
index a031c92d914f..f5b3723db0aa 100644
--- a/include/llvm/CodeGen/MachinePassRegistry.h
+++ b/include/llvm/CodeGen/MachinePassRegistry.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/MachinePassRegistry.h -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/MachinePipeliner.h b/include/llvm/CodeGen/MachinePipeliner.h
index 38cb33e90e63..03ca53072685 100644
--- a/include/llvm/CodeGen/MachinePipeliner.h
+++ b/include/llvm/CodeGen/MachinePipeliner.h
@@ -1,9 +1,8 @@
 //===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -63,6 +62,8 @@ public:
   const InstrItineraryData *InstrItins;
   const TargetInstrInfo *TII = nullptr;
   RegisterClassInfo RegClassInfo;
+  bool disabledByPragma = false;
+  unsigned II_setByPragma = 0;
 
 #ifndef NDEBUG
   static int NumTries;
@@ -100,6 +101,7 @@ private:
   bool canPipelineLoop(MachineLoop &L);
   bool scheduleLoop(MachineLoop &L);
   bool swingModuloScheduler(MachineLoop &L);
+  void setPragmaPipelineOptions(MachineLoop &L);
 };
 
 /// This class builds the dependence graph for the instructions in a loop,
@@ -108,11 +110,14 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
   MachinePipeliner &Pass;
   /// The minimum initiation interval between iterations for this schedule.
   unsigned MII = 0;
+  /// The maximum initiation interval between iterations for this schedule.
+  unsigned MAX_II = 0;
   /// Set to true if a valid pipelined schedule is found for the loop.
   bool Scheduled = false;
   MachineLoop &Loop;
   LiveIntervals &LIS;
   const RegisterClassInfo &RegClassInfo;
+  unsigned II_setByPragma = 0;
 
   /// A toplogical ordering of the SUnits, which is needed for changing
   /// dependences and iterating over the SUnits.
@@ -190,9 +195,9 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
 
 public:
   SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
-                    const RegisterClassInfo &rci)
+                    const RegisterClassInfo &rci, unsigned II)
       : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
-        RegClassInfo(rci), Topo(SUnits, &ExitSU) {
+        RegClassInfo(rci), II_setByPragma(II), Topo(SUnits, &ExitSU) {
     P.MF->getSubtarget().getSMSMutations(Mutations);
     if (SwpEnableCopyToPhi)
       Mutations.push_back(llvm::make_unique<CopyToPhiMutation>());
@@ -253,9 +258,6 @@ public:
     return 0;
   }
 
-  /// Set the Minimum Initiation Interval for this schedule attempt.
-  void setMII(unsigned mii) { MII = mii; }
-
   void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
 
   void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
@@ -316,9 +318,9 @@ private:
                               MBBVectorTy &EpilogBBs);
   void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
                       SMSchedule &Schedule);
-  void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB,
-                   MBBVectorTy &EpilogBBs, SMSchedule &Schedule,
-                   ValueMapTy *VRMap);
+  void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
+                   MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
+                   SMSchedule &Schedule, ValueMapTy *VRMap);
   bool computeDelta(MachineInstr &MI, unsigned &Delta);
   void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
                          unsigned Num);
@@ -346,6 +348,10 @@ private:
                              unsigned &OffsetPos, unsigned &NewBase,
                              int64_t &NewOffset);
   void postprocessDAG();
+  /// Set the Minimum Initiation Interval for this schedule attempt.
+  void setMII(unsigned ResMII, unsigned RecMII);
+  /// Set the Maximum Initiation Interval for this schedule attempt.
+  void setMAX_II();
 };
 
 /// A NodeSet contains a set of SUnit DAG nodes with additional information
@@ -457,6 +463,56 @@ public:
 #endif
 };
 
+// 16 was selected based on the number of ProcResource kinds for all
+// existing Subtargets, so that SmallVector don't need to resize too often.
+static const int DefaultProcResSize = 16;
+
+class ResourceManager {
+private:
+  const MCSubtargetInfo *STI;
+  const MCSchedModel &SM;
+  const bool UseDFA;
+  std::unique_ptr<DFAPacketizer> DFAResources;
+  /// Each processor resource is associated with a so-called processor resource
+  /// mask. This vector allows to correlate processor resource IDs with
+  /// processor resource masks. There is exactly one element per each processor
+  /// resource declared by the scheduling model.
+  llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceMasks;
+
+  llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceCount;
+
+public:
+  ResourceManager(const TargetSubtargetInfo *ST)
+      : STI(ST), SM(ST->getSchedModel()), UseDFA(ST->useDFAforSMS()),
+        ProcResourceMasks(SM.getNumProcResourceKinds(), 0),
+        ProcResourceCount(SM.getNumProcResourceKinds(), 0) {
+    if (UseDFA)
+      DFAResources.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
+    initProcResourceVectors(SM, ProcResourceMasks);
+  }
+
+  void initProcResourceVectors(const MCSchedModel &SM,
+                               SmallVectorImpl<uint64_t> &Masks);
+  /// Check if the resources occupied by a MCInstrDesc are available in
+  /// the current state.
+  bool canReserveResources(const MCInstrDesc *MID) const;
+
+  /// Reserve the resources occupied by a MCInstrDesc and change the current
+  /// state to reflect that change.
+  void reserveResources(const MCInstrDesc *MID);
+
+  /// Check if the resources occupied by a machine instruction are available
+  /// in the current state.
+  bool canReserveResources(const MachineInstr &MI) const;
+
+  /// Reserve the resources occupied by a machine instruction and change the
+  /// current state to reflect that change.
+  void reserveResources(const MachineInstr &MI);
+
+  /// Reset the state
+  void clearResources();
+};
+
 /// This class represents the scheduled code.  The main data structure is a
 /// map from scheduled cycle to instructions.  During scheduling, the
 /// data structure explicitly represents all stages/iterations.   When
@@ -495,12 +551,11 @@ private:
   /// Virtual register information.
   MachineRegisterInfo &MRI;
 
-  std::unique_ptr<DFAPacketizer> Resources;
+  ResourceManager ProcItinResources;
 
 public:
   SMSchedule(MachineFunction *mf)
-      : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
-        Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {}
+      : ST(mf->getSubtarget()), MRI(mf->getRegInfo()), ProcItinResources(&ST) {}
 
   void reset() {
     ScheduledInstrs.clear();
diff --git a/include/llvm/CodeGen/MachinePostDominators.h b/include/llvm/CodeGen/MachinePostDominators.h
index c6a41598ce32..b67e6b52ac8f 100644
--- a/include/llvm/CodeGen/MachinePostDominators.h
+++ b/include/llvm/CodeGen/MachinePostDominators.h
@@ -1,9 +1,8 @@
-//=- llvm/CodeGen/MachineDominators.h ----------------------------*- C++ -*-==//
+//===- llvm/CodeGen/MachinePostDominators.h ----------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/MachineRegionInfo.h b/include/llvm/CodeGen/MachineRegionInfo.h
index 8394b58d0a16..6d9fb9b9100a 100644
--- a/include/llvm/CodeGen/MachineRegionInfo.h
+++ b/include/llvm/CodeGen/MachineRegionInfo.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/MachineRegionInfo.h -------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index fef010a23ef9..b5deed1f5010 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/MachineRegisterInfo.h -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -562,9 +561,14 @@ public:
   }
 
   /// hasOneNonDBGUse - Return true if there is exactly one non-Debug
-  /// instruction using the specified register.
+  /// use of the specified register.
   bool hasOneNonDBGUse(unsigned RegNo) const;
 
+  /// hasOneNonDBGUse - Return true if there is exactly one non-Debug
+  /// instruction using the specified register. Said instruction may have
+  /// multiple uses.
+  bool hasOneNonDBGUser(unsigned RegNo) const;
+  
   /// replaceRegWith - Replace all instances of FromReg with ToReg in the
   /// machine function.  This is like llvm-level X->replaceAllUsesWith(Y),
   /// except that it also changes any definitions of the register as well.
@@ -713,12 +717,12 @@ public:
 
   /// createVirtualRegister - Create and return a new virtual register in the
   /// function with the specified register class.
-  unsigned createVirtualRegister(const TargetRegisterClass *RegClass,
+  Register createVirtualRegister(const TargetRegisterClass *RegClass,
                                  StringRef Name = "");
 
   /// Create and return a new virtual register in the function with the same
   /// attributes as the given register.
-  unsigned cloneVirtualRegister(unsigned VReg, StringRef Name = "");
+  Register cloneVirtualRegister(Register VReg, StringRef Name = "");
 
   /// Get the low-level type of \p Reg or LLT{} if Reg is not a generic
   /// (target independent) virtual register.
@@ -733,7 +737,7 @@ public:
 
   /// Create and return a new generic virtual register with low-level
   /// type \p Ty.
-  unsigned createGenericVirtualRegister(LLT Ty, StringRef Name = "");
+  Register createGenericVirtualRegister(LLT Ty, StringRef Name = "");
 
   /// Remove all types associated to virtual registers (after instruction
   /// selection and constraining of all generic virtual registers).
diff --git a/include/llvm/CodeGen/MachineSSAUpdater.h b/include/llvm/CodeGen/MachineSSAUpdater.h
index 5e91246b402c..0319ec774671 100644
--- a/include/llvm/CodeGen/MachineSSAUpdater.h
+++ b/include/llvm/CodeGen/MachineSSAUpdater.h
@@ -1,9 +1,8 @@
 //===- MachineSSAUpdater.h - Unstructured SSA Update Tool -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index 4bc31ae7c61a..75a334f61ad0 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -1,9 +1,8 @@
 //===- MachineScheduler.h - MachineInstr Scheduling Pass --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -265,10 +264,6 @@ protected:
   LiveIntervals *LIS;
   std::unique_ptr<MachineSchedStrategy> SchedImpl;
 
-  /// Topo - A topological ordering for SUnits which permits fast IsReachable
-  /// and similar queries.
-  ScheduleDAGTopologicalSort Topo;
-
   /// Ordered list of DAG postprocessing steps.
   std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
 
@@ -292,7 +287,7 @@ public:
   ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
                 bool RemoveKillFlags)
       : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA),
-        LIS(C->LIS), SchedImpl(std::move(S)), Topo(SUnits, &ExitSU) {}
+        LIS(C->LIS), SchedImpl(std::move(S)) {}
 
   // Provide a vtable anchor
   ~ScheduleDAGMI() override;
@@ -320,17 +315,6 @@ public:
       Mutations.push_back(std::move(Mutation));
   }
 
-  /// True if an edge can be added from PredSU to SuccSU without creating
-  /// a cycle.
-  bool canAddEdge(SUnit *SuccSU, SUnit *PredSU);
-
-  /// Add a DAG edge to the given SU with the given predecessor
-  /// dependence data.
-  ///
-  /// \returns true if the edge may be added without creating a cycle OR if an
-  /// equivalent edge already existed (false indicates failure).
-  bool addEdge(SUnit *SuccSU, const SDep &PredDep);
-
   MachineBasicBlock::iterator top() const { return CurrentTop; }
   MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
 
@@ -682,6 +666,10 @@ private:
   // scheduled instruction.
   SmallVector<unsigned, 16> ReservedCycles;
 
+  // For each PIdx, stores first index into ReservedCycles that corresponds to
+  // it.
+  SmallVector<unsigned, 16> ReservedCyclesIndex;
+
 #ifndef NDEBUG
   // Remember the greatest possible stall as an upper bound on the number of
   // times we should retry the pending queue because of a hazard.
@@ -756,7 +744,11 @@ public:
   /// cycle.
   unsigned getLatencyStallCycles(SUnit *SU);
 
-  unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles);
+  unsigned getNextResourceCycleByInstance(unsigned InstanceIndex,
+                                          unsigned Cycles);
+
+  std::pair<unsigned, unsigned> getNextResourceCycle(unsigned PIdx,
+                                                     unsigned Cycles);
 
   bool checkHazard(SUnit *SU);
 
@@ -1015,6 +1007,7 @@ protected:
 /// Callbacks from ScheduleDAGMI:
 ///   initPolicy -> initialize(DAG) -> registerRoots -> pickNode ...
 class PostGenericScheduler : public GenericSchedulerBase {
+protected:
   ScheduleDAGMI *DAG;
   SchedBoundary Top;
   SmallVector<SUnit*, 8> BotRoots;
diff --git a/include/llvm/CodeGen/MachineTraceMetrics.h b/include/llvm/CodeGen/MachineTraceMetrics.h
index 9d8db393ca92..025989504177 100644
--- a/include/llvm/CodeGen/MachineTraceMetrics.h
+++ b/include/llvm/CodeGen/MachineTraceMetrics.h
@@ -1,9 +1,8 @@
 //===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/MacroFusion.h b/include/llvm/CodeGen/MacroFusion.h
index a77226ddaf33..3a140fe63fde 100644
--- a/include/llvm/CodeGen/MacroFusion.h
+++ b/include/llvm/CodeGen/MacroFusion.h
@@ -1,9 +1,8 @@
 //===- MacroFusion.h - Macro Fusion -----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/PBQP/CostAllocator.h b/include/llvm/CodeGen/PBQP/CostAllocator.h
index bde451ae1fcc..0d6d8a31317b 100644
--- a/include/llvm/CodeGen/PBQP/CostAllocator.h
+++ b/include/llvm/CodeGen/PBQP/CostAllocator.h
@@ -1,9 +1,8 @@
 //===- CostAllocator.h - PBQP Cost Allocator --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/PBQP/Graph.h b/include/llvm/CodeGen/PBQP/Graph.h
index a6d88b057dcb..c2cd6dadae5f 100644
--- a/include/llvm/CodeGen/PBQP/Graph.h
+++ b/include/llvm/CodeGen/PBQP/Graph.h
@@ -1,9 +1,8 @@
 //===- Graph.h - PBQP Graph -------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/PBQP/Math.h b/include/llvm/CodeGen/PBQP/Math.h
index d1432a3053c4..8b014ccbb07b 100644
--- a/include/llvm/CodeGen/PBQP/Math.h
+++ b/include/llvm/CodeGen/PBQP/Math.h
@@ -1,9 +1,8 @@
 //===- Math.h - PBQP Vector and Matrix classes ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/CodeGen/PBQP/ReductionRules.h b/include/llvm/CodeGen/PBQP/ReductionRules.h
index 21b99027970d..51822d082bad 100644
--- a/include/llvm/CodeGen/PBQP/ReductionRules.h
+++ b/include/llvm/CodeGen/PBQP/ReductionRules.h
@@ -1,9 +1,8 @@
 //===- ReductionRules.h - Reduction Rules -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/PBQP/Solution.h b/include/llvm/CodeGen/PBQP/Solution.h
index 4d4379fbc2c2..d5b1474f0f4c 100644
--- a/include/llvm/CodeGen/PBQP/Solution.h
+++ b/include/llvm/CodeGen/PBQP/Solution.h
@@ -1,9 +1,8 @@
 //===- Solution.h - PBQP Solution -------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/PBQPRAConstraint.h b/include/llvm/CodeGen/PBQPRAConstraint.h
index 995467dc56d8..876ab97a669f 100644
--- a/include/llvm/CodeGen/PBQPRAConstraint.h
+++ b/include/llvm/CodeGen/PBQPRAConstraint.h
@@ -1,9 +1,8 @@
-//===- RegAllocPBQP.h -------------------------------------------*- C++ -*-===//
+//===- llvm/CodeGen/PBQPRAConstraint.h --------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/ParallelCG.h b/include/llvm/CodeGen/ParallelCG.h
index dbf09ea31e20..a44715d4fc4f 100644
--- a/include/llvm/CodeGen/ParallelCG.h
+++ b/include/llvm/CodeGen/ParallelCG.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/ParallelCG.h - Parallel code generation ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index acf1ebb5bc83..d92ee93268e7 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -1,9 +1,8 @@
 //===-- Passes.h - Target independent code generation passes ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -346,8 +345,9 @@ namespace llvm {
   /// pointer or stack pointer index addressing.
   extern char &LocalStackSlotAllocationID;
 
-  /// ExpandISelPseudos - This pass expands pseudo-instructions.
-  extern char &ExpandISelPseudosID;
+  /// This pass expands pseudo-instructions, reserves registers and adjusts
+  /// machine frame information.
+  extern char &FinalizeISelID;
 
   /// UnpackMachineBundles - This pass unpack machine instruction bundles.
   extern char &UnpackMachineBundlesID;
@@ -447,6 +447,9 @@ namespace llvm {
   /// Creates CFI Instruction Inserter pass. \see CFIInstrInserter.cpp
   FunctionPass *createCFIInstrInserter();
 
+  /// Create Hardware Loop pass. \see HardwareLoops.cpp
+  FunctionPass *createHardwareLoopsPass();
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/PreISelIntrinsicLowering.h b/include/llvm/CodeGen/PreISelIntrinsicLowering.h
index b7f83e515b7e..73d7d779e55b 100644
--- a/include/llvm/CodeGen/PreISelIntrinsicLowering.h
+++ b/include/llvm/CodeGen/PreISelIntrinsicLowering.h
@@ -1,9 +1,8 @@
 //===- PreISelIntrinsicLowering.h - Pre-ISel intrinsic lowering pass ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/PseudoSourceValue.h b/include/llvm/CodeGen/PseudoSourceValue.h
index f66191bc9fb4..4b3cc9145a13 100644
--- a/include/llvm/CodeGen/PseudoSourceValue.h
+++ b/include/llvm/CodeGen/PseudoSourceValue.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/PseudoSourceValue.h ------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,7 +15,6 @@
 
 #include "llvm/ADT/StringMap.h"
 #include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/Value.h"
 #include "llvm/IR/ValueMap.h"
 #include <map>
 
@@ -124,7 +122,7 @@ public:
   bool mayAlias(const MachineFrameInfo *) const override;
 };
 
-/// A specialized pseudo soruce value for holding GlobalValue values.
+/// A specialized pseudo source value for holding GlobalValue values.
 class GlobalValuePseudoSourceValue : public CallEntryPseudoSourceValue {
   const GlobalValue *GV;
 
diff --git a/include/llvm/CodeGen/ReachingDefAnalysis.h b/include/llvm/CodeGen/ReachingDefAnalysis.h
index b21b745c8fd1..a599fb62f5e2 100644
--- a/include/llvm/CodeGen/ReachingDefAnalysis.h
+++ b/include/llvm/CodeGen/ReachingDefAnalysis.h
@@ -1,15 +1,14 @@
 //==--- llvm/CodeGen/ReachingDefAnalysis.h - Reaching Def Analysis -*- C++ -*---==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 /// \file Reaching Defs Analysis pass.
 ///
-/// This pass tracks for each instruction what is the �closest� reaching def of
+/// This pass tracks for each instruction what is the "closest" reaching def of
 /// a given register. It is used by BreakFalseDeps (for clearance calculation)
 /// and ExecutionDomainFix (for arbitrating conflicting domains).
 ///
diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h
index ba9763077d09..f7f92248f4ce 100644
--- a/include/llvm/CodeGen/RegAllocPBQP.h
+++ b/include/llvm/CodeGen/RegAllocPBQP.h
@@ -1,9 +1,8 @@
 //===- RegAllocPBQP.h -------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/RegAllocRegistry.h b/include/llvm/CodeGen/RegAllocRegistry.h
index b518fbb9c9da..9a63674689b3 100644
--- a/include/llvm/CodeGen/RegAllocRegistry.h
+++ b/include/llvm/CodeGen/RegAllocRegistry.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/RegAllocRegistry.h --------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -23,29 +22,30 @@ class FunctionPass;
 
 //===----------------------------------------------------------------------===//
 ///
-/// RegisterRegAlloc class - Track the registration of register allocators.
+/// RegisterRegAllocBase class - Track the registration of register allocators.
 ///
 //===----------------------------------------------------------------------===//
-class RegisterRegAlloc : public MachinePassRegistryNode<FunctionPass *(*)()> {
+template <class SubClass>
+class RegisterRegAllocBase : public MachinePassRegistryNode<FunctionPass *(*)()> {
 public:
   using FunctionPassCtor = FunctionPass *(*)();
 
   static MachinePassRegistry<FunctionPassCtor> Registry;
 
-  RegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
+  RegisterRegAllocBase(const char *N, const char *D, FunctionPassCtor C)
       : MachinePassRegistryNode(N, D, C) {
     Registry.Add(this);
   }
 
-  ~RegisterRegAlloc() { Registry.Remove(this); }
+  ~RegisterRegAllocBase() { Registry.Remove(this); }
 
   // Accessors.
-  RegisterRegAlloc *getNext() const {
-    return (RegisterRegAlloc *)MachinePassRegistryNode::getNext();
+  SubClass *getNext() const {
+    return static_cast<SubClass *>(MachinePassRegistryNode::getNext());
   }
 
-  static RegisterRegAlloc *getList() {
-    return (RegisterRegAlloc *)Registry.getList();
+  static SubClass *getList() {
+    return static_cast<SubClass *>(Registry.getList());
   }
 
   static FunctionPassCtor getDefault() { return Registry.getDefault(); }
@@ -57,6 +57,17 @@ public:
   }
 };
 
+class RegisterRegAlloc : public RegisterRegAllocBase<RegisterRegAlloc> {
+public:
+  RegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
+    : RegisterRegAllocBase(N, D, C) {}
+};
+
+/// RegisterRegAlloc's global Registry tracks allocator registration.
+template <class T>
+MachinePassRegistry<RegisterRegAlloc::FunctionPassCtor>
+RegisterRegAllocBase<T>::Registry;
+
 } // end namespace llvm
 
 #endif // LLVM_CODEGEN_REGALLOCREGISTRY_H
diff --git a/include/llvm/CodeGen/Register.h b/include/llvm/CodeGen/Register.h
new file mode 100644
index 000000000000..907c1a99e56f
--- /dev/null
+++ b/include/llvm/CodeGen/Register.h
@@ -0,0 +1,60 @@
+//===-- llvm/CodeGen/Register.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGISTER_H
+#define LLVM_CODEGEN_REGISTER_H
+
+#include <cassert>
+
+namespace llvm {
+
+/// Wrapper class representing virtual and physical registers. Should be passed
+/// by value.
+class Register {
+  unsigned Reg;
+
+public:
+  Register(unsigned Val = 0): Reg(Val) {}
+
+  /// Return true if the specified register number is in the virtual register
+  /// namespace.
+  bool isVirtual() const {
+    return int(Reg) < 0;
+  }
+
+  /// Return true if the specified register number is in the physical register
+  /// namespace.
+  bool isPhysical() const {
+    return int(Reg) > 0;
+  }
+
+  /// Convert a virtual register number to a 0-based index. The first virtual
+  /// register in a function will get the index 0.
+  unsigned virtRegIndex() const {
+    assert(isVirtual() && "Not a virtual register");
+    return Reg & ~(1u << 31);
+  }
+
+  /// Convert a 0-based index to a virtual register number.
+  /// This is the inverse operation of VirtReg2IndexFunctor below.
+  static Register index2VirtReg(unsigned Index) {
+    return Register(Index | (1u << 31));
+  }
+
+  operator unsigned() const {
+    return Reg;
+  }
+
+  bool isValid() const {
+    return Reg != 0;
+  }
+};
+
+}
+
+#endif
diff --git a/include/llvm/CodeGen/RegisterClassInfo.h b/include/llvm/CodeGen/RegisterClassInfo.h
index 97113c575815..14af5c4d090d 100644
--- a/include/llvm/CodeGen/RegisterClassInfo.h
+++ b/include/llvm/CodeGen/RegisterClassInfo.h
@@ -1,9 +1,8 @@
 //===- RegisterClassInfo.h - Dynamic Register Class Info --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h
index 79054b9e33b7..5bbaa03fd751 100644
--- a/include/llvm/CodeGen/RegisterPressure.h
+++ b/include/llvm/CodeGen/RegisterPressure.h
@@ -1,9 +1,8 @@
 //===- RegisterPressure.h - Dynamic Register Pressure -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -132,10 +131,6 @@ public:
   }
 };
 
-template <> struct isPodLike<PressureChange> {
-   static const bool value = true;
-};
-
 /// List of PressureChanges in order of increasing, unique PSetID.
 ///
 /// Use a small fixed number, because we can fit more PressureChanges in an
diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h
index b6bd028a8cac..9c48df82f07d 100644
--- a/include/llvm/CodeGen/RegisterScavenging.h
+++ b/include/llvm/CodeGen/RegisterScavenging.h
@@ -1,9 +1,8 @@
 //===- RegisterScavenging.h - Machine register scavenging -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -158,10 +157,15 @@ public:
   /// Returns the scavenged register.
   /// This is deprecated as it depends on the quality of the kill flags being
   /// present; Use scavengeRegisterBackwards() instead!
+  ///
+  /// If \p AllowSpill is false, fail if a spill is required to make the
+  /// register available, and return NoRegister.
   unsigned scavengeRegister(const TargetRegisterClass *RC,
-                            MachineBasicBlock::iterator I, int SPAdj);
-  unsigned scavengeRegister(const TargetRegisterClass *RegClass, int SPAdj) {
-    return scavengeRegister(RegClass, MBBI, SPAdj);
+                            MachineBasicBlock::iterator I, int SPAdj,
+                            bool AllowSpill = true);
+  unsigned scavengeRegister(const TargetRegisterClass *RegClass, int SPAdj,
+                            bool AllowSpill = true) {
+    return scavengeRegister(RegClass, MBBI, SPAdj, AllowSpill);
   }
 
   /// Make a register of the specific register class available from the current
@@ -170,9 +174,13 @@ public:
   /// SPAdj is the stack adjustment due to call frame, it's passed along to
   /// eliminateFrameIndex().
   /// Returns the scavenged register.
+  ///
+  /// If \p AllowSpill is false, fail if a spill is required to make the
+  /// register available, and return NoRegister.
   unsigned scavengeRegisterBackwards(const TargetRegisterClass &RC,
                                      MachineBasicBlock::iterator To,
-                                     bool RestoreAfter, int SPAdj);
+                                     bool RestoreAfter, int SPAdj,
+                                     bool AllowSpill = true);
 
   /// Tell the scavenger a register is used.
   void setRegUsed(unsigned Reg, LaneBitmask LaneMask = LaneBitmask::getAll());
diff --git a/include/llvm/CodeGen/RegisterUsageInfo.h b/include/llvm/CodeGen/RegisterUsageInfo.h
index efecc61d9c30..33554550b9dc 100644
--- a/include/llvm/CodeGen/RegisterUsageInfo.h
+++ b/include/llvm/CodeGen/RegisterUsageInfo.h
@@ -1,9 +1,8 @@
 //==- RegisterUsageInfo.h - Register Usage Informartion Storage --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/CodeGen/ResourcePriorityQueue.h b/include/llvm/CodeGen/ResourcePriorityQueue.h
index 8d582ee298b6..81587a3170ce 100644
--- a/include/llvm/CodeGen/ResourcePriorityQueue.h
+++ b/include/llvm/CodeGen/ResourcePriorityQueue.h
@@ -1,9 +1,8 @@
 //===----- ResourcePriorityQueue.h - A DFA-oriented priority queue -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h
index 28567a1ce437..f71f39e5bf03 100644
--- a/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -1,9 +1,8 @@
 //===-- CodeGen/RuntimeLibcalls.h - Runtime Library Calls -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/SDNodeProperties.td b/include/llvm/CodeGen/SDNodeProperties.td
index 83bbab2fdc8d..d25e0bda26a9 100644
--- a/include/llvm/CodeGen/SDNodeProperties.td
+++ b/include/llvm/CodeGen/SDNodeProperties.td
@@ -1,9 +1,8 @@
 //===- SDNodeProperties.td - Common code for DAG isels ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 0870d67db390..e004f3bf2cc1 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/ScheduleDAG.h - Common Base Class -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -239,9 +238,6 @@ class TargetRegisterInfo;
     void dump(const TargetRegisterInfo *TRI = nullptr) const;
   };
 
-  template <>
-  struct isPodLike<SDep> { static const bool value = true; };
-
   /// Scheduling unit. This is a node in the scheduling DAG.
   class SUnit {
   private:
@@ -418,7 +414,7 @@ class TargetRegisterInfo;
     /// dirty.
     void setDepthToAtLeast(unsigned NewDepth);
 
-    /// If NewDepth is greater than this node's depth value, set it to be
+    /// If NewHeight is greater than this node's height value, set it to be
     /// the new height value. This also recursively marks predecessor nodes
     /// dirty.
     void setHeightToAtLeast(unsigned NewHeight);
@@ -695,6 +691,12 @@ class TargetRegisterInfo;
     std::vector<SUnit> &SUnits;
     SUnit *ExitSU;
 
+    // Have any new nodes been added?
+    bool Dirty = false;
+
+    // Outstanding added edges, that have not been applied to the ordering.
+    SmallVector<std::pair<SUnit *, SUnit *>, 16> Updates;
+
     /// Maps topological index to the node number.
     std::vector<int> Index2Node;
     /// Maps the node number to its topological index.
@@ -714,6 +716,11 @@ class TargetRegisterInfo;
     /// Assigns the topological index to the node n.
     void Allocate(int n, int index);
 
+    /// Fix the ordering, by either recomputing from scratch or by applying
+    /// any outstanding updates. Uses a heuristic to estimate what will be
+    /// cheaper.
+    void FixOrder();
+
   public:
     ScheduleDAGTopologicalSort(std::vector<SUnit> &SUnits, SUnit *ExitSU);
 
@@ -738,11 +745,19 @@ class TargetRegisterInfo;
     /// added from SUnit \p X to SUnit \p Y.
     void AddPred(SUnit *Y, SUnit *X);
 
+    /// Queues an update to the topological ordering to accommodate an edge to
+    /// be added from SUnit \p X to SUnit \p Y.
+    void AddPredQueued(SUnit *Y, SUnit *X);
+
     /// Updates the topological ordering to accommodate an an edge to be
     /// removed from the specified node \p N from the predecessors of the
     /// current node \p M.
     void RemovePred(SUnit *M, SUnit *N);
 
+    /// Mark the ordering as temporarily broken, after a new node has been
+    /// added.
+    void MarkDirty() { Dirty = true; }
+
     typedef std::vector<int>::iterator iterator;
     typedef std::vector<int>::const_iterator const_iterator;
     iterator begin() { return Index2Node.begin(); }
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index daad18125db9..3e3b604acbac 100644
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -1,9 +1,8 @@
 //===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -235,6 +234,11 @@ namespace llvm {
     /// For an unanalyzable memory access, this Value is used in maps.
     UndefValue *UnknownValue;
 
+
+    /// Topo - A topological ordering for SUnits which permits fast IsReachable
+    /// and similar queries.
+    ScheduleDAGTopologicalSort Topo;
+
     using DbgValueVector =
         std::vector<std::pair<MachineInstr *, MachineInstr *>>;
     /// Remember instruction that precedes DBG_VALUE.
@@ -339,6 +343,17 @@ namespace llvm {
     /// Fixes register kill flags that scheduling has made invalid.
     void fixupKills(MachineBasicBlock &MBB);
 
+    /// True if an edge can be added from PredSU to SuccSU without creating
+    /// a cycle.
+    bool canAddEdge(SUnit *SuccSU, SUnit *PredSU);
+
+    /// Add a DAG edge to the given SU with the given predecessor
+    /// dependence data.
+    ///
+    /// \returns true if the edge may be added without creating a cycle OR if an
+    /// equivalent edge already existed (false indicates failure).
+    bool addEdge(SUnit *SuccSU, const SDep &PredDep);
+
   protected:
     void initSUnits();
     void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx);
diff --git a/include/llvm/CodeGen/ScheduleDAGMutation.h b/include/llvm/CodeGen/ScheduleDAGMutation.h
index 5c236427e0b8..d1dd72859a38 100644
--- a/include/llvm/CodeGen/ScheduleDAGMutation.h
+++ b/include/llvm/CodeGen/ScheduleDAGMutation.h
@@ -1,9 +1,8 @@
 //===- ScheduleDAGMutation.h - MachineInstr Scheduling ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/ScheduleDFS.h b/include/llvm/CodeGen/ScheduleDFS.h
index 3ecc033ac35a..d60deab95f5d 100644
--- a/include/llvm/CodeGen/ScheduleDFS.h
+++ b/include/llvm/CodeGen/ScheduleDFS.h
@@ -1,9 +1,8 @@
-//===- ScheduleDAGILP.h - ILP metric for ScheduleDAGInstrs ------*- C++ -*-===//
+//===- ScheduleDFS.h - ILP metric for ScheduleDAGInstrs ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/ScheduleHazardRecognizer.h b/include/llvm/CodeGen/ScheduleHazardRecognizer.h
index ace4a2d836ca..37590f496ca2 100644
--- a/include/llvm/CodeGen/ScheduleHazardRecognizer.h
+++ b/include/llvm/CodeGen/ScheduleHazardRecognizer.h
@@ -1,9 +1,8 @@
 //=- llvm/CodeGen/ScheduleHazardRecognizer.h - Scheduling Support -*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/SchedulerRegistry.h b/include/llvm/CodeGen/SchedulerRegistry.h
index fbe559f25556..0ccfaafd9e50 100644
--- a/include/llvm/CodeGen/SchedulerRegistry.h
+++ b/include/llvm/CodeGen/SchedulerRegistry.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/SchedulerRegistry.h -------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
index 3f75d108f282..ac67f3008fa7 100644
--- a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
+++ b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
@@ -1,9 +1,8 @@
 //=- llvm/CodeGen/ScoreboardHazardRecognizer.h - Schedule Support -*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 67fe87fc96af..12a970847021 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/SelectionDAG.h - InstSelection DAG ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -268,6 +267,10 @@ class SelectionDAG {
   /// Tracks dbg_value and dbg_label information through SDISel.
   SDDbgInfo *DbgInfo;
 
+  using CallSiteInfo = MachineFunction::CallSiteInfo;
+  using CallSiteInfoImpl = MachineFunction::CallSiteInfoImpl;
+  DenseMap<const SDNode *, CallSiteInfo> SDCallSiteInfo;
+
   uint16_t NextPersistentId = 0;
 
 public:
@@ -298,6 +301,9 @@ public:
 
     /// The node N that was updated.
     virtual void NodeUpdated(SDNode *N);
+
+    /// The node N that was inserted.
+    virtual void NodeInserted(SDNode *N);
   };
 
   struct DAGNodeDeletedListener : public DAGUpdateListener {
@@ -404,6 +410,7 @@ public:
   const TargetLowering &getTargetLoweringInfo() const { return *TLI; }
   const TargetLibraryInfo &getLibInfo() const { return *LibInfo; }
   const SelectionDAGTargetInfo &getSelectionDAGInfo() const { return *TSI; }
+  const LegacyDivergenceAnalysis *getDivergenceAnalysis() const { return DA; }
   LLVMContext *getContext() const {return Context; }
   OptimizationRemarkEmitter &getORE() const { return *ORE; }
 
@@ -573,6 +580,9 @@ public:
                       bool isTarget = false, bool isOpaque = false);
   SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL,
                             bool isTarget = false);
+  SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL,
+                                 bool LegalTypes = true);
+
   SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT,
                             bool isOpaque = false) {
     return getConstant(Val, DL, VT, true, isOpaque);
@@ -789,6 +799,16 @@ public:
   /// value assuming it was the smaller SrcTy value.
   SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT);
 
+  /// Convert Op, which must be of integer type, to the integer type VT, by
+  /// either truncating it or performing either zero or sign extension as
+  /// appropriate extension for the pointer's semantics.
+  SDValue getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
+
+  /// Return the expression required to extend the Op as a pointer value
+  /// assuming it was the smaller SrcTy value. This may be either a zero extend
+  /// or a sign extend.
+  SDValue getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT);
+
   /// Convert Op, which must be of integer type, to the integer type VT,
   /// by using an extension appropriate for the target's
   /// BooleanContent for type OpVT or truncating it.
@@ -971,6 +991,10 @@ public:
   /// Try to simplify a shift into 1 of its operands or a constant.
   SDValue simplifyShift(SDValue X, SDValue Y);
 
+  /// Try to simplify a floating-point binary operation into 1 of its operands
+  /// or a constant.
+  SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y);
+
   /// VAArg produces a result and token chain, and takes a pointer
   /// and a source value as input.
   SDValue getVAArg(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
@@ -980,22 +1004,12 @@ public:
   /// valid Opcodes. ISD::ATOMIC_CMO_SWAP produces the value loaded and a
   /// chain result. ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS produces the value loaded,
   /// a success flag (initially i1), and a chain.
-  SDValue getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, EVT MemVT,
-                           SDVTList VTs, SDValue Chain, SDValue Ptr,
-                           SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo,
-                           unsigned Alignment, AtomicOrdering SuccessOrdering,
-                           AtomicOrdering FailureOrdering,
-                           SyncScope::ID SSID);
   SDValue getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, EVT MemVT,
                            SDVTList VTs, SDValue Chain, SDValue Ptr,
                            SDValue Cmp, SDValue Swp, MachineMemOperand *MMO);
 
   /// Gets a node for an atomic op, produces result (if relevant)
   /// and chain and takes 2 operands.
-  SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain,
-                    SDValue Ptr, SDValue Val, const Value *PtrVal,
-                    unsigned Alignment, AtomicOrdering Ordering,
-                    SyncScope::ID SSID);
   SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain,
                     SDValue Ptr, SDValue Val, MachineMemOperand *MMO);
 
@@ -1021,12 +1035,19 @@ public:
     unsigned Align = 0,
     MachineMemOperand::Flags Flags
     = MachineMemOperand::MOLoad | MachineMemOperand::MOStore,
-    unsigned Size = 0);
+    unsigned Size = 0,
+    const AAMDNodes &AAInfo = AAMDNodes());
 
   SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList,
                               ArrayRef<SDValue> Ops, EVT MemVT,
                               MachineMemOperand *MMO);
 
+  /// Creates a LifetimeSDNode that starts (`IsStart==true`) or ends
+  /// (`IsStart==false`) the lifetime of the portion of `FrameIndex` between
+  /// offsets `Offset` and `Offset + Size`.
+  SDValue getLifetimeNode(bool IsStart, const SDLoc &dl, SDValue Chain,
+                          int FrameIndex, int64_t Size, int64_t Offset = -1);
+
   /// Create a MERGE_VALUES node from the given operands.
   SDValue getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl);
 
@@ -1154,6 +1175,11 @@ public:
                                SDValue Op3, SDValue Op4, SDValue Op5);
   SDNode *UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops);
 
+  /// Creates a new TokenFactor containing \p Vals. If \p Vals contains 64k
+  /// values or more, move values into new TokenFactors in 64k-1 blocks, until
+  /// the final TokenFactor has less than 64k operands.
+  SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl<SDValue> &Vals);
+
   /// *Mutate* the specified machine node's memory references to the provided
   /// list.
   void setNodeMemRefs(MachineSDNode *N,
@@ -1358,21 +1384,20 @@ public:
   /// with this SelectionDAG.
   bool hasDebugValues() const { return !DbgInfo->empty(); }
 
-  SDDbgInfo::DbgIterator DbgBegin() { return DbgInfo->DbgBegin(); }
-  SDDbgInfo::DbgIterator DbgEnd()   { return DbgInfo->DbgEnd(); }
+  SDDbgInfo::DbgIterator DbgBegin() const { return DbgInfo->DbgBegin(); }
+  SDDbgInfo::DbgIterator DbgEnd() const  { return DbgInfo->DbgEnd(); }
 
-  SDDbgInfo::DbgIterator ByvalParmDbgBegin() {
+  SDDbgInfo::DbgIterator ByvalParmDbgBegin() const {
     return DbgInfo->ByvalParmDbgBegin();
   }
-
-  SDDbgInfo::DbgIterator ByvalParmDbgEnd()   {
+  SDDbgInfo::DbgIterator ByvalParmDbgEnd() const {
     return DbgInfo->ByvalParmDbgEnd();
   }
 
-  SDDbgInfo::DbgLabelIterator DbgLabelBegin() {
+  SDDbgInfo::DbgLabelIterator DbgLabelBegin() const {
     return DbgInfo->DbgLabelBegin();
   }
-  SDDbgInfo::DbgLabelIterator DbgLabelEnd() {
+  SDDbgInfo::DbgLabelIterator DbgLabelEnd() const {
     return DbgInfo->DbgLabelEnd();
   }
 
@@ -1395,27 +1420,42 @@ public:
                            const SDNode *N2);
 
   SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
-                                 SDNode *Cst1, SDNode *Cst2);
+                                 SDNode *N1, SDNode *N2);
 
   SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
-                                 const ConstantSDNode *Cst1,
-                                 const ConstantSDNode *Cst2);
+                                 const ConstantSDNode *C1,
+                                 const ConstantSDNode *C2);
 
   SDValue FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
                                        ArrayRef<SDValue> Ops,
                                        const SDNodeFlags Flags = SDNodeFlags());
 
+  /// Fold floating-point operations with 2 operands when both operands are
+  /// constants and/or undefined.
+  SDValue foldConstantFPMath(unsigned Opcode, const SDLoc &DL, EVT VT,
+                             SDValue N1, SDValue N2);
+
   /// Constant fold a setcc to true or false.
   SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond,
                     const SDLoc &dl);
 
-  /// See if the specified operand can be simplified with the knowledge that only
-  /// the bits specified by Mask are used.  If so, return the simpler operand,
-  /// otherwise return a null SDValue.
+  /// See if the specified operand can be simplified with the knowledge that
+  /// only the bits specified by DemandedBits are used.  If so, return the
+  /// simpler operand, otherwise return a null SDValue.
+  ///
+  /// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
+  /// simplify nodes with multiple uses more aggressively.)
+  SDValue GetDemandedBits(SDValue V, const APInt &DemandedBits);
+
+  /// See if the specified operand can be simplified with the knowledge that
+  /// only the bits specified by DemandedBits are used in the elements specified
+  /// by DemandedElts.  If so, return the simpler operand, otherwise return a
+  /// null SDValue.
   ///
   /// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
   /// simplify nodes with multiple uses more aggressively.)
-  SDValue GetDemandedBits(SDValue V, const APInt &Mask);
+  SDValue GetDemandedBits(SDValue V, const APInt &DemandedBits,
+                          const APInt &DemandedElts);
 
   /// Return true if the sign bit of Op is known to be zero.
   /// We use this predicate to simplify operations downstream.
@@ -1424,8 +1464,19 @@ public:
   /// Return true if 'Op & Mask' is known to be zero.  We
   /// use this predicate to simplify operations downstream.  Op and Mask are
   /// known to be the same type.
-  bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth = 0)
-    const;
+  bool MaskedValueIsZero(SDValue Op, const APInt &Mask,
+                         unsigned Depth = 0) const;
+
+  /// Return true if 'Op & Mask' is known to be zero in DemandedElts.  We
+  /// use this predicate to simplify operations downstream.  Op and Mask are
+  /// known to be the same type.
+  bool MaskedValueIsZero(SDValue Op, const APInt &Mask,
+                         const APInt &DemandedElts, unsigned Depth = 0) const;
+
+  /// Return true if '(Op & Mask) == Mask'.
+  /// Op and Mask are known to be the same type.
+  bool MaskedValueIsAllOnes(SDValue Op, const APInt &Mask,
+                            unsigned Depth = 0) const;
 
   /// Determine which bits of Op are known to be either zero or one and return
   /// them in Known. For vectors, the known bits are those that are shared by
@@ -1525,6 +1576,13 @@ public:
   /// Test whether \p V has a splatted value.
   bool isSplatValue(SDValue V, bool AllowUndefs = false);
 
+  /// If V is a splatted value, return the source vector and its splat index.
+  SDValue getSplatSourceVector(SDValue V, int &SplatIndex);
+
+  /// If V is a splat vector, return its scalar source operand by extracting
+  /// that element from the source vector.
+  SDValue getSplatValue(SDValue V);
+
   /// Match a binop + shuffle pyramid that represents a horizontal reduction
   /// over the elements of a vector starting from the EXTRACT_VECTOR_ELT node /p
   /// Extract. The reduction must use one of the opcodes listed in /p
@@ -1542,6 +1600,11 @@ public:
   /// vector op and fill the end of the resulting vector with UNDEFS.
   SDValue UnrollVectorOp(SDNode *N, unsigned ResNE = 0);
 
+  /// Like UnrollVectorOp(), but for the [US](ADD|SUB|MUL)O family of opcodes.
+  /// This is a separate function because those opcodes have two results.
+  std::pair<SDValue, SDValue> UnrollVectorOverflowOp(SDNode *N,
+                                                     unsigned ResNE = 0);
+
   /// Return true if loads are next to each other and can be
   /// merged. Check that both are nonvolatile and if LD is loading
   /// 'Bytes' bytes from a location that is 'Dist' units away from the
@@ -1576,6 +1639,9 @@ public:
     return SplitVector(N->getOperand(OpNo), SDLoc(N));
   }
 
+  /// Widen the vector up to the next power of two using INSERT_SUBVECTOR.
+  SDValue WidenVector(const SDValue &N, const SDLoc &DL);
+
   /// Append the extracted elements from Start to Count out of the vector Op
   /// in Args. If Count is 0, all of the elements will be extracted.
   void ExtractVectorElements(SDValue Op, SmallVectorImpl<SDValue> &Args,
@@ -1597,6 +1663,17 @@ public:
            isConstantFPBuildVectorOrConstantFP(N);
   }
 
+  void addCallSiteInfo(const SDNode *CallNode, CallSiteInfoImpl &&CallInfo) {
+    SDCallSiteInfo[CallNode] = std::move(CallInfo);
+  }
+
+  CallSiteInfo getSDCallSiteInfo(const SDNode *CallNode) {
+    auto I = SDCallSiteInfo.find(CallNode);
+    if (I != SDCallSiteInfo.end())
+      return std::move(I->second);
+    return CallSiteInfo();
+  }
+
 private:
   void InsertNode(SDNode *N);
   bool RemoveNodeFromCSEMaps(SDNode *N);
diff --git a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
index 2b2c48d57bc0..4ee58333495b 100644
--- a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
+++ b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
@@ -1,9 +1,8 @@
 //===- SelectionDAGAddressAnalysis.h - DAG Address Analysis -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -34,11 +33,13 @@ class BaseIndexOffset {
 private:
   SDValue Base;
   SDValue Index;
-  int64_t Offset = 0;
+  Optional<int64_t> Offset;
   bool IsIndexSignExt = false;
 
 public:
   BaseIndexOffset() = default;
+  BaseIndexOffset(SDValue Base, SDValue Index, bool IsIndexSignExt)
+      : Base(Base), Index(Index), Offset(), IsIndexSignExt(IsIndexSignExt) {}
   BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
                   bool IsIndexSignExt)
       : Base(Base), Index(Index), Offset(Offset),
@@ -48,6 +49,13 @@ public:
   SDValue getBase() const { return Base; }
   SDValue getIndex() { return Index; }
   SDValue getIndex() const { return Index; }
+  bool hasValidOffset() const { return Offset.hasValue(); }
+
+  // Returns true if `Other` and `*this` are both some offset from the same base
+  // pointer. In that case, `Off` is set to the offset between `*this` and
+  // `Other` (negative if `Other` is before `*this`).
+  bool equalBaseIndex(const BaseIndexOffset &Other, const SelectionDAG &DAG,
+                      int64_t &Off) const;
 
   bool equalBaseIndex(const BaseIndexOffset &Other,
                       const SelectionDAG &DAG) const {
@@ -55,11 +63,31 @@ public:
     return equalBaseIndex(Other, DAG, Off);
   }
 
-  bool equalBaseIndex(const BaseIndexOffset &Other, const SelectionDAG &DAG,
-                      int64_t &Off) const;
+  // Returns true if `Other` (with size `OtherSize`) can be proven to be fully
+  // contained in `*this` (with size `Size`).
+  bool contains(const SelectionDAG &DAG, int64_t BitSize,
+                const BaseIndexOffset &Other, int64_t OtherBitSize,
+                int64_t &BitOffset) const;
+
+  bool contains(const SelectionDAG &DAG, int64_t BitSize,
+                const BaseIndexOffset &Other, int64_t OtherBitSize) const {
+    int64_t BitOffset;
+    return contains(DAG, BitSize, Other, OtherBitSize, BitOffset);
+  }
+
+  // Returns true `Op0` and `Op1` can be proven to alias/not alias, in
+  // which case `IsAlias` is set to true/false.
+  static bool computeAliasing(const SDNode *Op0,
+                              const Optional<int64_t> NumBytes0,
+                              const SDNode *Op1,
+                              const Optional<int64_t> NumBytes1,
+                              const SelectionDAG &DAG, bool &IsAlias);
+
+  /// Parses tree in N for base, index, offset addresses.
+  static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG);
 
-  /// Parses tree in Ptr for base, index, offset addresses.
-  static BaseIndexOffset match(const LSBaseSDNode *N, const SelectionDAG &DAG);
+  void print(raw_ostream& OS) const;
+  void dump() const;
 };
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index 6758c55c696a..147c325342fc 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/SelectionDAGISel.h - Common Base Class------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -35,6 +34,7 @@ namespace llvm {
   class TargetLibraryInfo;
   class FunctionLoweringInfo;
   class ScheduleHazardRecognizer;
+  class SwiftErrorValueTracking;
   class GCFunctionInfo;
   class ScheduleDAGSDNodes;
   class LoadInst;
@@ -46,6 +46,7 @@ public:
   TargetMachine &TM;
   const TargetLibraryInfo *LibInfo;
   FunctionLoweringInfo *FuncInfo;
+  SwiftErrorValueTracking *SwiftError;
   MachineFunction *MF;
   MachineRegisterInfo *RegInfo;
   SelectionDAG *CurDAG;
@@ -144,10 +145,12 @@ public:
     OPC_CheckInteger,
     OPC_CheckChild0Integer, OPC_CheckChild1Integer, OPC_CheckChild2Integer,
     OPC_CheckChild3Integer, OPC_CheckChild4Integer,
-    OPC_CheckCondCode,
+    OPC_CheckCondCode, OPC_CheckChild2CondCode,
     OPC_CheckValueType,
     OPC_CheckComplexPat,
     OPC_CheckAndImm, OPC_CheckOrImm,
+    OPC_CheckImmAllOnesV,
+    OPC_CheckImmAllZerosV,
     OPC_CheckFoldableChainNode,
 
     OPC_EmitInteger,
@@ -303,7 +306,7 @@ public:
 private:
 
   // Calls to these functions are generated by tblgen.
-  void Select_INLINEASM(SDNode *N);
+  void Select_INLINEASM(SDNode *N, bool Branch);
   void Select_READ_REGISTER(SDNode *Op);
   void Select_WRITE_REGISTER(SDNode *Op);
   void Select_UNDEF(SDNode *N);
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 10f284179084..5aab9643e09d 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -184,6 +183,7 @@ public:
   inline unsigned getNumOperands() const;
   inline const SDValue &getOperand(unsigned i) const;
   inline uint64_t getConstantOperandVal(unsigned i) const;
+  inline const APInt &getConstantOperandAPInt(unsigned i) const;
   inline bool isTargetMemoryOpcode() const;
   inline bool isTargetOpcode() const;
   inline bool isMachineOpcode() const;
@@ -232,7 +232,6 @@ template<> struct DenseMapInfo<SDValue> {
     return LHS == RHS;
   }
 };
-template <> struct isPodLike<SDValue> { static const bool value = true; };
 
 /// Allow casting operators to work directly on
 /// SDValues as if they were SDNode*'s.
@@ -369,6 +368,13 @@ private:
   bool ApproximateFuncs : 1;
   bool AllowReassociation : 1;
 
+  // We assume instructions do not raise floating-point exceptions by default,
+  // and only those marked explicitly may do so.  We could choose to represent
+  // this via a positive "FPExcept" flags like on the MI level, but having a
+  // negative "NoFPExcept" flag here (that defaults to true) makes the flag
+  // intersection logic more straightforward.
+  bool NoFPExcept : 1;
+
 public:
   /// Default constructor turns off all optimization flags.
   SDNodeFlags()
@@ -376,7 +382,7 @@ public:
         Exact(false), NoNaNs(false), NoInfs(false),
         NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false),
         AllowContract(false), ApproximateFuncs(false),
-        AllowReassociation(false) {}
+        AllowReassociation(false), NoFPExcept(true) {}
 
   /// Propagate the fast-math-flags from an IR FPMathOperator.
   void copyFMF(const FPMathOperator &FPMO) {
@@ -439,6 +445,10 @@ public:
     setDefined();
     AllowReassociation = b;
   }
+  void setFPExcept(bool b) {
+    setDefined();
+    NoFPExcept = !b;
+  }
 
   // These are accessors for each flag.
   bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
@@ -452,9 +462,10 @@ public:
   bool hasAllowContract() const { return AllowContract; }
   bool hasApproximateFuncs() const { return ApproximateFuncs; }
   bool hasAllowReassociation() const { return AllowReassociation; }
+  bool hasFPExcept() const { return !NoFPExcept; }
 
   bool isFast() const {
-    return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs &&
+    return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs && NoFPExcept &&
            AllowContract && ApproximateFuncs && AllowReassociation;
   }
 
@@ -474,6 +485,7 @@ public:
     AllowContract &= Flags.AllowContract;
     ApproximateFuncs &= Flags.ApproximateFuncs;
     AllowReassociation &= Flags.AllowReassociation;
+    NoFPExcept &= Flags.NoFPExcept;
   }
 };
 
@@ -489,6 +501,17 @@ protected:
   // SubclassData.  These are designed to fit within a uint16_t so they pack
   // with NodeType.
 
+#if defined(_AIX) && (!defined(__GNUC__) || defined(__ibmxl__))
+// Except for GCC; by default, AIX compilers store bit-fields in 4-byte words
+// and give the `pack` pragma push semantics.
+#define BEGIN_TWO_BYTE_PACK() _Pragma("pack(2)")
+#define END_TWO_BYTE_PACK() _Pragma("pack(pop)")
+#else
+#define BEGIN_TWO_BYTE_PACK()
+#define END_TWO_BYTE_PACK()
+#endif
+
+BEGIN_TWO_BYTE_PACK()
   class SDNodeBitfields {
     friend class SDNode;
     friend class MemIntrinsicSDNode;
@@ -561,6 +584,9 @@ protected:
     LoadSDNodeBitfields LoadSDNodeBits;
     StoreSDNodeBitfields StoreSDNodeBits;
   };
+END_TWO_BYTE_PACK()
+#undef BEGIN_TWO_BYTE_PACK
+#undef END_TWO_BYTE_PACK
 
   // RawSDNodeBits must cover the entirety of the union.  This means that all of
   // the union's members must have size <= RawSDNodeBits.  We write the RHS as
@@ -678,6 +704,8 @@ public:
       case ISD::STRICT_FFLOOR:
       case ISD::STRICT_FROUND:
       case ISD::STRICT_FTRUNC:
+      case ISD::STRICT_FP_ROUND:
+      case ISD::STRICT_FP_EXTEND:
         return true;
     }
   }
@@ -898,9 +926,17 @@ public:
   /// Return the number of values used by this operation.
   unsigned getNumOperands() const { return NumOperands; }
 
+  /// Return the maximum number of operands that a SDNode can hold.
+  static constexpr size_t getMaxNumOperands() {
+    return std::numeric_limits<decltype(SDNode::NumOperands)>::max();
+  }
+
   /// Helper method returns the integer value of a ConstantSDNode operand.
   inline uint64_t getConstantOperandVal(unsigned Num) const;
 
+  /// Helper method returns the APInt of a ConstantSDNode operand.
+  inline const APInt &getConstantOperandAPInt(unsigned Num) const;
+
   const SDValue &getOperand(unsigned Num) const {
     assert(Num < NumOperands && "Invalid child # of SDNode!");
     return OperandList[Num];
@@ -1128,6 +1164,10 @@ inline uint64_t SDValue::getConstantOperandVal(unsigned i) const {
   return Node->getConstantOperandVal(i);
 }
 
+inline const APInt &SDValue::getConstantOperandAPInt(unsigned i) const {
+  return Node->getConstantOperandAPInt(i);
+}
+
 inline bool SDValue::isTargetOpcode() const {
   return Node->isTargetOpcode();
 }
@@ -1356,6 +1396,8 @@ public:
            N->getOpcode() == ISD::ATOMIC_LOAD_MAX     ||
            N->getOpcode() == ISD::ATOMIC_LOAD_UMIN    ||
            N->getOpcode() == ISD::ATOMIC_LOAD_UMAX    ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_FADD    ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_FSUB    ||
            N->getOpcode() == ISD::ATOMIC_LOAD         ||
            N->getOpcode() == ISD::ATOMIC_STORE        ||
            N->getOpcode() == ISD::MLOAD               ||
@@ -1372,7 +1414,10 @@ class AtomicSDNode : public MemSDNode {
 public:
   AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL,
                EVT MemVT, MachineMemOperand *MMO)
-      : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {}
+    : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
+    assert(((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) ||
+            MMO->isAtomic()) && "then why are we using an AtomicSDNode?");
+  }
 
   const SDValue &getBasePtr() const { return getOperand(1); }
   const SDValue &getVal() const { return getOperand(2); }
@@ -1408,6 +1453,8 @@ public:
            N->getOpcode() == ISD::ATOMIC_LOAD_MAX     ||
            N->getOpcode() == ISD::ATOMIC_LOAD_UMIN    ||
            N->getOpcode() == ISD::ATOMIC_LOAD_UMAX    ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_FADD    ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_FSUB    ||
            N->getOpcode() == ISD::ATOMIC_LOAD         ||
            N->getOpcode() == ISD::ATOMIC_STORE;
   }
@@ -1467,14 +1514,16 @@ public:
 
   bool isSplat() const { return isSplatMask(Mask, getValueType(0)); }
 
-  int  getSplatIndex() const {
+  int getSplatIndex() const {
     assert(isSplat() && "Cannot get splat index for non-splat!");
     EVT VT = getValueType(0);
-    for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+    for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
       if (Mask[i] >= 0)
         return Mask[i];
-    }
-    llvm_unreachable("Splat with all undef indices?");
+
+    // We can choose any index value here and be correct because all elements
+    // are undefined. Return 0 for better potential for callers to simplify.
+    return 0;
   }
 
   static bool isSplatMask(const int *Mask, EVT VT);
@@ -1536,6 +1585,10 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
   return cast<ConstantSDNode>(getOperand(Num))->getZExtValue();
 }
 
+const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const {
+  return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue();
+}
+
 class ConstantFPSDNode : public SDNode {
   friend class SelectionDAG;
 
@@ -1603,20 +1656,36 @@ SDValue peekThroughBitcasts(SDValue V);
 /// If \p V is not a bitcasted one-use value, it is returned as-is.
 SDValue peekThroughOneUseBitcasts(SDValue V);
 
+/// Return the non-extracted vector source operand of \p V if it exists.
+/// If \p V is not an extracted subvector, it is returned as-is.
+SDValue peekThroughExtractSubvectors(SDValue V);
+
 /// Returns true if \p V is a bitwise not operation. Assumes that an all ones
 /// constant is canonicalized to be operand 1.
-bool isBitwiseNot(SDValue V);
+bool isBitwiseNot(SDValue V, bool AllowUndefs = false);
 
 /// Returns the SDNode if it is a constant splat BuildVector or constant int.
-ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false);
+ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false,
+                                    bool AllowTruncation = false);
+
+/// Returns the SDNode if it is a demanded constant splat BuildVector or
+/// constant int.
+ConstantSDNode *isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
+                                    bool AllowUndefs = false,
+                                    bool AllowTruncation = false);
 
 /// Returns the SDNode if it is a constant splat BuildVector or constant float.
 ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
 
+/// Returns the SDNode if it is a demanded constant splat BuildVector or
+/// constant float.
+ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, const APInt &DemandedElts,
+                                        bool AllowUndefs = false);
+
 /// Return true if the value is a constant 0 integer or a splatted vector of
-/// a constant 0 integer (with no undefs).
+/// a constant 0 integer (with no undefs by default).
 /// Build vector implicit truncation is not an issue for null values.
-bool isNullOrNullSplat(SDValue V);
+bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false);
 
 /// Return true if the value is a constant 1 integer or a splatted vector of a
 /// constant 1 integer (with no undefs).
@@ -1673,6 +1742,38 @@ public:
   }
 };
 
+/// This SDNode is used for LIFETIME_START/LIFETIME_END values, which indicate
+/// the offet and size that are started/ended in the underlying FrameIndex.
+class LifetimeSDNode : public SDNode {
+  friend class SelectionDAG;
+  int64_t Size;
+  int64_t Offset; // -1 if offset is unknown.
+
+  LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
+                 SDVTList VTs, int64_t Size, int64_t Offset)
+      : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {}
+public:
+  int64_t getFrameIndex() const {
+    return cast<FrameIndexSDNode>(getOperand(1))->getIndex();
+  }
+
+  bool hasOffset() const { return Offset >= 0; }
+  int64_t getOffset() const {
+    assert(hasOffset() && "offset is unknown");
+    return Offset;
+  }
+  int64_t getSize() const {
+    assert(hasOffset() && "offset is unknown");
+    return Size;
+  }
+
+  // Methods to support isa and dyn_cast
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::LIFETIME_START ||
+           N->getOpcode() == ISD::LIFETIME_END;
+  }
+};
+
 class JumpTableSDNode : public SDNode {
   friend class SelectionDAG;
 
@@ -1818,12 +1919,31 @@ public:
                        unsigned MinSplatBits = 0,
                        bool isBigEndian = false) const;
 
+  /// Returns the demanded splatted value or a null value if this is not a
+  /// splat.
+  ///
+  /// The DemandedElts mask indicates the elements that must be in the splat.
+  /// If passed a non-null UndefElements bitvector, it will resize it to match
+  /// the vector width and set the bits where elements are undef.
+  SDValue getSplatValue(const APInt &DemandedElts,
+                        BitVector *UndefElements = nullptr) const;
+
   /// Returns the splatted value or a null value if this is not a splat.
   ///
   /// If passed a non-null UndefElements bitvector, it will resize it to match
   /// the vector width and set the bits where elements are undef.
   SDValue getSplatValue(BitVector *UndefElements = nullptr) const;
 
+  /// Returns the demanded splatted constant or null if this is not a constant
+  /// splat.
+  ///
+  /// The DemandedElts mask indicates the elements that must be in the splat.
+  /// If passed a non-null UndefElements bitvector, it will resize it to match
+  /// the vector width and set the bits where elements are undef.
+  ConstantSDNode *
+  getConstantSplatNode(const APInt &DemandedElts,
+                       BitVector *UndefElements = nullptr) const;
+
   /// Returns the splatted constant or null if this is not a constant
   /// splat.
   ///
@@ -1832,6 +1952,16 @@ public:
   ConstantSDNode *
   getConstantSplatNode(BitVector *UndefElements = nullptr) const;
 
+  /// Returns the demanded splatted constant FP or null if this is not a
+  /// constant FP splat.
+  ///
+  /// The DemandedElts mask indicates the elements that must be in the splat.
+  /// If passed a non-null UndefElements bitvector, it will resize it to match
+  /// the vector width and set the bits where elements are undef.
+  ConstantFPSDNode *
+  getConstantFPSplatNode(const APInt &DemandedElts,
+                         BitVector *UndefElements = nullptr) const;
+
   /// Returns the splatted constant FP or null if this is not a constant
   /// FP splat.
   ///
@@ -1956,8 +2086,10 @@ class LabelSDNode : public SDNode {
 
   MCSymbol *Label;
 
-  LabelSDNode(unsigned Order, const DebugLoc &dl, MCSymbol *L)
-      : SDNode(ISD::EH_LABEL, Order, dl, getSDVTList(MVT::Other)), Label(L) {}
+  LabelSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, MCSymbol *L)
+      : SDNode(Opcode, Order, dl, getSDVTList(MVT::Other)), Label(L) {
+    assert(LabelSDNode::classof(this) && "not a label opcode");
+  }
 
 public:
   MCSymbol *getLabel() const { return Label; }
@@ -2049,6 +2181,8 @@ public:
       : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
     LSBaseSDNodeBits.AddressingMode = AM;
     assert(getAddressingMode() == AM && "Value truncated");
+    assert((!MMO->isAtomic() || MMO->isVolatile()) &&
+           "use an AtomicSDNode instead for non-volatile atomics");
   }
 
   const SDValue &getOffset() const {
@@ -2473,18 +2607,6 @@ namespace ISD {
       cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
   }
 
-  /// Return true if the node is a math/logic binary operator. This corresponds
-  /// to the IR function of the same name.
-  inline bool isBinaryOp(const SDNode *N) {
-    auto Op = N->getOpcode();
-    return (Op == ISD::ADD || Op == ISD::SUB || Op == ISD::MUL ||
-            Op == ISD::AND || Op == ISD::OR || Op == ISD::XOR ||
-            Op == ISD::SHL || Op == ISD::SRL || Op == ISD::SRA ||
-            Op == ISD::SDIV || Op == ISD::UDIV || Op == ISD::SREM ||
-            Op == ISD::UREM || Op == ISD::FADD || Op == ISD::FSUB ||
-            Op == ISD::FMUL || Op == ISD::FDIV || Op == ISD::FREM);
-  }
-
   /// Attempt to match a unary predicate against a scalar/splat constant or
   /// every element of a constant BUILD_VECTOR.
   /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
@@ -2495,10 +2617,11 @@ namespace ISD {
   /// Attempt to match a binary predicate against a pair of scalar/splat
   /// constants or every element of a pair of constant BUILD_VECTORs.
   /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
+  /// If AllowTypeMismatch is true then RetType + ArgTypes don't need to match.
   bool matchBinaryPredicate(
       SDValue LHS, SDValue RHS,
       std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
-      bool AllowUndefs = false);
+      bool AllowUndefs = false, bool AllowTypeMismatch = false);
 } // end namespace ISD
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/SelectionDAGTargetInfo.h b/include/llvm/CodeGen/SelectionDAGTargetInfo.h
index 45c1df48a5e6..6f6a9a5ae269 100644
--- a/include/llvm/CodeGen/SelectionDAGTargetInfo.h
+++ b/include/llvm/CodeGen/SelectionDAGTargetInfo.h
@@ -1,9 +1,8 @@
 //==- llvm/CodeGen/SelectionDAGTargetInfo.h - SelectionDAG Info --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -148,6 +147,14 @@ public:
     return std::make_pair(SDValue(), SDValue());
   }
 
+  virtual SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl,
+                                          SDValue Chain, SDValue Addr,
+                                          SDValue Size,
+                                          MachinePointerInfo DstPtrInfo,
+                                          bool ZeroData) const {
+    return SDValue();
+  }
+
   // Return true when the decision to generate FMA's (or FMS, FMLA etc) rather
   // than FMUL and ADD is delegated to the machine combiner.
   virtual bool generateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const {
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index 8c8a7be459fd..2b32a4d30dff 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/SlotIndexes.h - Slot indexes representation -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -302,8 +301,6 @@ class raw_ostream;
     }
   };
 
-  template <> struct isPodLike<SlotIndex> { static const bool value = true; };
-
   inline raw_ostream& operator<<(raw_ostream &os, SlotIndex li) {
     li.print(os);
     return os;
@@ -311,20 +308,6 @@ class raw_ostream;
 
   using IdxMBBPair = std::pair<SlotIndex, MachineBasicBlock *>;
 
-  inline bool operator<(SlotIndex V, const IdxMBBPair &IM) {
-    return V < IM.first;
-  }
-
-  inline bool operator<(const IdxMBBPair &IM, SlotIndex V) {
-    return IM.first < V;
-  }
-
-  struct Idx2MBBCompare {
-    bool operator()(const IdxMBBPair &LHS, const IdxMBBPair &RHS) const {
-      return LHS.first < RHS.first;
-    }
-  };
-
   /// SlotIndexes pass.
   ///
   /// This pass assigns indexes to each instruction.
@@ -336,10 +319,6 @@ class raw_ostream;
     using IndexList = ilist<IndexListEntry>;
     IndexList indexList;
 
-#ifdef EXPENSIVE_CHECKS
-    IndexList graveyardList;
-#endif // EXPENSIVE_CHECKS
-
     MachineFunction *mf;
 
     using Mi2IndexMap = DenseMap<const MachineInstr *, SlotIndex>;
@@ -368,7 +347,7 @@ class raw_ostream;
   public:
     static char ID;
 
-    SlotIndexes() : MachineFunctionPass(ID) {
+    SlotIndexes() : MachineFunctionPass(ID), mf(nullptr) {
       initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
     }
 
@@ -385,9 +364,6 @@ class raw_ostream;
     /// Dump the indexes.
     void dump() const;
 
-    /// Renumber the index list, providing space for new instructions.
-    void renumberIndexes();
-
     /// Repair indexes after adding and removing instructions.
     void repairIndexesInRange(MachineBasicBlock *MBB,
                               MachineBasicBlock::iterator Begin,
@@ -516,7 +492,9 @@ class raw_ostream;
     /// Move iterator to the next IdxMBBPair where the SlotIndex is greater or
     /// equal to \p To.
     MBBIndexIterator advanceMBBIndex(MBBIndexIterator I, SlotIndex To) const {
-      return std::lower_bound(I, idx2MBBMap.end(), To);
+      return std::partition_point(
+          I, idx2MBBMap.end(),
+          [=](const IdxMBBPair &IM) { return IM.first < To; });
     }
 
     /// Get an iterator pointing to the IdxMBBPair with the biggest SlotIndex
@@ -552,29 +530,6 @@ class raw_ostream;
       return J->second;
     }
 
-    /// Returns the MBB covering the given range, or null if the range covers
-    /// more than one basic block.
-    MachineBasicBlock* getMBBCoveringRange(SlotIndex start, SlotIndex end) const {
-
-      assert(start < end && "Backwards ranges not allowed.");
-      MBBIndexIterator itr = findMBBIndex(start);
-      if (itr == MBBIndexEnd()) {
-        itr = std::prev(itr);
-        return itr->second;
-      }
-
-      // Check that we don't cross the boundary into this block.
-      if (itr->first < end)
-        return nullptr;
-
-      itr = std::prev(itr);
-
-      if (itr->first <= start)
-        return itr->second;
-
-      return nullptr;
-    }
-
     /// Insert the given machine instruction into the mapping. Returns the
     /// assigned index.
     /// If Late is set and there are null indexes between mi's neighboring
@@ -680,33 +635,7 @@ class raw_ostream;
       idx2MBBMap.push_back(IdxMBBPair(startIdx, mbb));
 
       renumberIndexes(newItr);
-      llvm::sort(idx2MBBMap, Idx2MBBCompare());
-    }
-
-    /// Free the resources that were required to maintain a SlotIndex.
-    ///
-    /// Once an index is no longer needed (for instance because the instruction
-    /// at that index has been moved), the resources required to maintain the
-    /// index can be relinquished to reduce memory use and improve renumbering
-    /// performance. Any remaining SlotIndex objects that point to the same
-    /// index are left 'dangling' (much the same as a dangling pointer to a
-    /// freed object) and should not be accessed, except to destruct them.
-    ///
-    /// Like dangling pointers, access to dangling SlotIndexes can cause
-    /// painful-to-track-down bugs, especially if the memory for the index
-    /// previously pointed to has been re-used. To detect dangling SlotIndex
-    /// bugs, build with EXPENSIVE_CHECKS=1. This will cause "erased" indexes to
-    /// be retained in a graveyard instead of being freed. Operations on indexes
-    /// in the graveyard will trigger an assertion.
-    void eraseIndex(SlotIndex index) {
-      IndexListEntry *entry = index.listEntry();
-#ifdef EXPENSIVE_CHECKS
-      indexList.remove(entry);
-      graveyardList.push_back(entry);
-      entry->setPoison();
-#else
-      indexList.erase(entry);
-#endif
+      llvm::sort(idx2MBBMap, less_first());
     }
   };
 
diff --git a/include/llvm/CodeGen/StackMaps.h b/include/llvm/CodeGen/StackMaps.h
index 8be9ae378557..d7d88de6f682 100644
--- a/include/llvm/CodeGen/StackMaps.h
+++ b/include/llvm/CodeGen/StackMaps.h
@@ -1,9 +1,8 @@
 //===- StackMaps.h - StackMaps ----------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/CodeGen/StackProtector.h b/include/llvm/CodeGen/StackProtector.h
index a506ac636a17..2bdf4425e24a 100644
--- a/include/llvm/CodeGen/StackProtector.h
+++ b/include/llvm/CodeGen/StackProtector.h
@@ -1,9 +1,8 @@
 //===- StackProtector.h - Stack Protector Insertion -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -62,12 +61,6 @@ private:
   /// protection when -fstack-protection is used.
   unsigned SSPBufferSize = 0;
 
-  /// VisitedPHIs - The set of PHI nodes visited when determining
-  /// if a variable's reference has been taken.  This set
-  /// is maintained to ensure we don't visit the same PHI node multiple
-  /// times.
-  SmallPtrSet<const PHINode *, 16> VisitedPHIs;
-
   // A prologue is generated.
   bool HasPrologue = false;
 
diff --git a/include/llvm/CodeGen/SwiftErrorValueTracking.h b/include/llvm/CodeGen/SwiftErrorValueTracking.h
new file mode 100644
index 000000000000..fb7a12853c09
--- /dev/null
+++ b/include/llvm/CodeGen/SwiftErrorValueTracking.h
@@ -0,0 +1,110 @@
+//===- SwiftErrorValueTracking.h - Track swifterror VReg vals --*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a limited mem2reg-like analysis to promote uses of function
+// arguments and allocas marked with swiftalloc from memory into virtual
+// registers tracked by this class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SWIFTERRORVALUETRACKING_H
+#define SWIFTERRORVALUETRACKING_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DebugLoc.h"
+#include <functional>
+#include <type_traits>
+#include <utility>
+
+
+namespace llvm {
+  class Function;
+  class MachineBasicBlock;
+  class MachineFunction;
+  class MachineInstr;
+  class TargetInstrInfo;
+  class TargetLowering;
+
+class SwiftErrorValueTracking {
+  // Some useful objects to reduce the number of function arguments needed.
+  MachineFunction *MF;
+  const Function *Fn;
+  const TargetLowering *TLI;
+  const TargetInstrInfo *TII;
+
+  /// A map from swifterror value in a basic block to the virtual register it is
+  /// currently represented by.
+  DenseMap<std::pair<const MachineBasicBlock *, const Value *>, Register>
+      VRegDefMap;
+
+  /// A list of upward exposed vreg uses that need to be satisfied by either a
+  /// copy def or a phi node at the beginning of the basic block representing
+  /// the predecessor(s) swifterror value.
+  DenseMap<std::pair<const MachineBasicBlock *, const Value *>, Register>
+      VRegUpwardsUse;
+
+  /// A map from instructions that define/use a swifterror value to the virtual
+  /// register that represents that def/use.
+  llvm::DenseMap<PointerIntPair<const Instruction *, 1, bool>, Register>
+      VRegDefUses;
+
+  /// The swifterror argument of the current function.
+  const Value *SwiftErrorArg;
+
+  using SwiftErrorValues = SmallVector<const Value*, 1>;
+  /// A function can only have a single swifterror argument. And if it does
+  /// have a swifterror argument, it must be the first entry in
+  /// SwiftErrorVals.
+  SwiftErrorValues SwiftErrorVals;
+
+public:
+  /// Initialize data structures for specified new function.
+  void setFunction(MachineFunction &MF);
+
+  /// Get the (unique) function argument that was marked swifterror, or nullptr
+  /// if this function has no swifterror args.
+  const Value *getFunctionArg() const {
+    return SwiftErrorArg;
+  }
+
+  /// Get or create the swifterror value virtual register in
+  /// VRegDefMap for this basic block.
+  Register getOrCreateVReg(const MachineBasicBlock *, const Value *);
+
+  /// Set the swifterror virtual register in the VRegDefMap for this
+  /// basic block.
+  void setCurrentVReg(const MachineBasicBlock *MBB, const Value *, Register);
+
+  /// Get or create the swifterror value virtual register for a def of a
+  /// swifterror by an instruction.
+  Register getOrCreateVRegDefAt(const Instruction *, const MachineBasicBlock *,
+                                const Value *);
+
+  /// Get or create the swifterror value virtual register for a use of a
+  /// swifterror by an instruction.
+  Register getOrCreateVRegUseAt(const Instruction *, const MachineBasicBlock *,
+                                const Value *);
+
+  /// Create initial definitions of swifterror values in the entry block of the
+  /// current function.
+  bool createEntriesInEntryBlock(DebugLoc DbgLoc);
+
+  /// Propagate assigned swifterror vregs through a function, synthesizing PHI
+  /// nodes when needed to maintain consistency.
+  void propagateVRegs();
+
+  void preassignVRegs(MachineBasicBlock *MBB, BasicBlock::const_iterator Begin,
+                      BasicBlock::const_iterator End);
+};
+
+}
+
+#endif
diff --git a/include/llvm/CodeGen/SwitchLoweringUtils.h b/include/llvm/CodeGen/SwitchLoweringUtils.h
new file mode 100644
index 000000000000..62134dc792f7
--- /dev/null
+++ b/include/llvm/CodeGen/SwitchLoweringUtils.h
@@ -0,0 +1,297 @@
+//===- SwitchLoweringUtils.h - Switch Lowering ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SWITCHLOWERINGUTILS_H
+#define LLVM_CODEGEN_SWITCHLOWERINGUTILS_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/BranchProbability.h"
+
+namespace llvm {
+
+class FunctionLoweringInfo;
+class MachineBasicBlock;
+
+namespace SwitchCG {
+
+enum CaseClusterKind {
+  /// A cluster of adjacent case labels with the same destination, or just one
+  /// case.
+  CC_Range,
+  /// A cluster of cases suitable for jump table lowering.
+  CC_JumpTable,
+  /// A cluster of cases suitable for bit test lowering.
+  CC_BitTests
+};
+
+/// A cluster of case labels.
+struct CaseCluster {
+  CaseClusterKind Kind;
+  const ConstantInt *Low, *High;
+  union {
+    MachineBasicBlock *MBB;
+    unsigned JTCasesIndex;
+    unsigned BTCasesIndex;
+  };
+  BranchProbability Prob;
+
+  static CaseCluster range(const ConstantInt *Low, const ConstantInt *High,
+                           MachineBasicBlock *MBB, BranchProbability Prob) {
+    CaseCluster C;
+    C.Kind = CC_Range;
+    C.Low = Low;
+    C.High = High;
+    C.MBB = MBB;
+    C.Prob = Prob;
+    return C;
+  }
+
+  static CaseCluster jumpTable(const ConstantInt *Low, const ConstantInt *High,
+                               unsigned JTCasesIndex, BranchProbability Prob) {
+    CaseCluster C;
+    C.Kind = CC_JumpTable;
+    C.Low = Low;
+    C.High = High;
+    C.JTCasesIndex = JTCasesIndex;
+    C.Prob = Prob;
+    return C;
+  }
+
+  static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High,
+                              unsigned BTCasesIndex, BranchProbability Prob) {
+    CaseCluster C;
+    C.Kind = CC_BitTests;
+    C.Low = Low;
+    C.High = High;
+    C.BTCasesIndex = BTCasesIndex;
+    C.Prob = Prob;
+    return C;
+  }
+};
+
+using CaseClusterVector = std::vector<CaseCluster>;
+using CaseClusterIt = CaseClusterVector::iterator;
+
+/// Sort Clusters and merge adjacent cases.
+void sortAndRangeify(CaseClusterVector &Clusters);
+
+struct CaseBits {
+  uint64_t Mask = 0;
+  MachineBasicBlock *BB = nullptr;
+  unsigned Bits = 0;
+  BranchProbability ExtraProb;
+
+  CaseBits() = default;
+  CaseBits(uint64_t mask, MachineBasicBlock *bb, unsigned bits,
+           BranchProbability Prob)
+      : Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) {}
+};
+
+using CaseBitsVector = std::vector<CaseBits>;
+
+/// This structure is used to communicate between SelectionDAGBuilder and
+/// SDISel for the code generation of additional basic blocks needed by
+/// multi-case switch statements.
+struct CaseBlock {
+  // For the GISel interface.
+  struct PredInfoPair {
+    CmpInst::Predicate Pred;
+    // Set when no comparison should be emitted.
+    bool NoCmp;
+  };
+  union {
+    // The condition code to use for the case block's setcc node.
+    // Besides the integer condition codes, this can also be SETTRUE, in which
+    // case no comparison gets emitted.
+    ISD::CondCode CC;
+    struct PredInfoPair PredInfo;
+  };
+
+  // The LHS/MHS/RHS of the comparison to emit.
+  // Emit by default LHS op RHS. MHS is used for range comparisons:
+  // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
+  const Value *CmpLHS, *CmpMHS, *CmpRHS;
+
+  // The block to branch to if the setcc is true/false.
+  MachineBasicBlock *TrueBB, *FalseBB;
+
+  // The block into which to emit the code for the setcc and branches.
+  MachineBasicBlock *ThisBB;
+
+  /// The debug location of the instruction this CaseBlock was
+  /// produced from.
+  SDLoc DL;
+  DebugLoc DbgLoc;
+
+  // Branch weights.
+  BranchProbability TrueProb, FalseProb;
+
+  // Constructor for SelectionDAG.
+  CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
+            const Value *cmpmiddle, MachineBasicBlock *truebb,
+            MachineBasicBlock *falsebb, MachineBasicBlock *me, SDLoc dl,
+            BranchProbability trueprob = BranchProbability::getUnknown(),
+            BranchProbability falseprob = BranchProbability::getUnknown())
+      : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
+        TrueBB(truebb), FalseBB(falsebb), ThisBB(me), DL(dl),
+        TrueProb(trueprob), FalseProb(falseprob) {}
+
+  // Constructor for GISel.
+  CaseBlock(CmpInst::Predicate pred, bool nocmp, const Value *cmplhs,
+            const Value *cmprhs, const Value *cmpmiddle,
+            MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
+            MachineBasicBlock *me, DebugLoc dl,
+            BranchProbability trueprob = BranchProbability::getUnknown(),
+            BranchProbability falseprob = BranchProbability::getUnknown())
+      : PredInfo({pred, nocmp}), CmpLHS(cmplhs), CmpMHS(cmpmiddle),
+        CmpRHS(cmprhs), TrueBB(truebb), FalseBB(falsebb), ThisBB(me),
+        DbgLoc(dl), TrueProb(trueprob), FalseProb(falseprob) {}
+};
+
+struct JumpTable {
+  /// The virtual register containing the index of the jump table entry
+  /// to jump to.
+  unsigned Reg;
+  /// The JumpTableIndex for this jump table in the function.
+  unsigned JTI;
+  /// The MBB into which to emit the code for the indirect jump.
+  MachineBasicBlock *MBB;
+  /// The MBB of the default bb, which is a successor of the range
+  /// check MBB.  This is when updating PHI nodes in successors.
+  MachineBasicBlock *Default;
+
+  JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, MachineBasicBlock *D)
+      : Reg(R), JTI(J), MBB(M), Default(D) {}
+};
+struct JumpTableHeader {
+  APInt First;
+  APInt Last;
+  const Value *SValue;
+  MachineBasicBlock *HeaderBB;
+  bool Emitted;
+  bool OmitRangeCheck;
+
+  JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
+                  bool E = false)
+      : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H),
+        Emitted(E), OmitRangeCheck(false) {}
+};
+using JumpTableBlock = std::pair<JumpTableHeader, JumpTable>;
+
+struct BitTestCase {
+  uint64_t Mask;
+  MachineBasicBlock *ThisBB;
+  MachineBasicBlock *TargetBB;
+  BranchProbability ExtraProb;
+
+  BitTestCase(uint64_t M, MachineBasicBlock *T, MachineBasicBlock *Tr,
+              BranchProbability Prob)
+      : Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) {}
+};
+
+using BitTestInfo = SmallVector<BitTestCase, 3>;
+
+struct BitTestBlock {
+  APInt First;
+  APInt Range;
+  const Value *SValue;
+  unsigned Reg;
+  MVT RegVT;
+  bool Emitted;
+  bool ContiguousRange;
+  MachineBasicBlock *Parent;
+  MachineBasicBlock *Default;
+  BitTestInfo Cases;
+  BranchProbability Prob;
+  BranchProbability DefaultProb;
+
+  BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, bool E,
+               bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
+               BitTestInfo C, BranchProbability Pr)
+      : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
+        RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D),
+        Cases(std::move(C)), Prob(Pr) {}
+};
+
+/// Return the range of value within a range.
+uint64_t getJumpTableRange(const CaseClusterVector &Clusters, unsigned First,
+                           unsigned Last);
+
+/// Return the number of cases within a range.
+uint64_t getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases,
+                              unsigned First, unsigned Last);
+
+struct SwitchWorkListItem {
+  MachineBasicBlock *MBB;
+  CaseClusterIt FirstCluster;
+  CaseClusterIt LastCluster;
+  const ConstantInt *GE;
+  const ConstantInt *LT;
+  BranchProbability DefaultProb;
+};
+using SwitchWorkList = SmallVector<SwitchWorkListItem, 4>;
+
+class SwitchLowering {
+public:
+  SwitchLowering(FunctionLoweringInfo &funcinfo) : FuncInfo(funcinfo) {}
+
+  void init(const TargetLowering &tli, const TargetMachine &tm,
+            const DataLayout &dl) {
+    TLI = &tli;
+    TM = &tm;
+    DL = &dl;
+  }
+
+  /// Vector of CaseBlock structures used to communicate SwitchInst code
+  /// generation information.
+  std::vector<CaseBlock> SwitchCases;
+
+  /// Vector of JumpTable structures used to communicate SwitchInst code
+  /// generation information.
+  std::vector<JumpTableBlock> JTCases;
+
+  /// Vector of BitTestBlock structures used to communicate SwitchInst code
+  /// generation information.
+  std::vector<BitTestBlock> BitTestCases;
+
+  void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI,
+                      MachineBasicBlock *DefaultMBB);
+
+  bool buildJumpTable(const CaseClusterVector &Clusters, unsigned First,
+                      unsigned Last, const SwitchInst *SI,
+                      MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster);
+
+
+  void findBitTestClusters(CaseClusterVector &Clusters, const SwitchInst *SI);
+
+  /// Build a bit test cluster from Clusters[First..Last]. Returns false if it
+  /// decides it's not a good idea.
+  bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last,
+                     const SwitchInst *SI, CaseCluster &BTCluster);
+
+  virtual void addSuccessorWithProb(
+      MachineBasicBlock *Src, MachineBasicBlock *Dst,
+      BranchProbability Prob = BranchProbability::getUnknown()) = 0;
+
+  virtual ~SwitchLowering() = default;
+
+private:
+  const TargetLowering *TLI;
+  const TargetMachine *TM;
+  const DataLayout *DL;
+  FunctionLoweringInfo &FuncInfo;
+};
+
+} // namespace SwitchCG
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_SWITCHLOWERINGUTILS_H
+
diff --git a/include/llvm/CodeGen/TailDuplicator.h b/include/llvm/CodeGen/TailDuplicator.h
index be6562c85f2e..358798d5ed60 100644
--- a/include/llvm/CodeGen/TailDuplicator.h
+++ b/include/llvm/CodeGen/TailDuplicator.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/TailDuplicator.h ----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/TargetCallingConv.h b/include/llvm/CodeGen/TargetCallingConv.h
index 7d138f585171..aebeeecbe506 100644
--- a/include/llvm/CodeGen/TargetCallingConv.h
+++ b/include/llvm/CodeGen/TargetCallingConv.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/TargetCallingConv.h - Calling Convention ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -46,9 +45,12 @@ namespace ISD {
     unsigned IsInConsecutiveRegsLast : 1;
     unsigned IsInConsecutiveRegs : 1;
     unsigned IsCopyElisionCandidate : 1; ///< Argument copy elision candidate
+    unsigned IsPointer : 1;
 
     unsigned ByValSize; ///< Byval struct size
 
+    unsigned PointerAddrSpace; ///< Address space of pointer argument
+
   public:
     ArgFlagsTy()
         : IsZExt(0), IsSExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsNest(0),
@@ -56,8 +58,9 @@ namespace ISD {
           IsSwiftSelf(0), IsSwiftError(0), IsHva(0), IsHvaStart(0),
           IsSecArgPass(0), ByValAlign(0), OrigAlign(0),
           IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
-          IsCopyElisionCandidate(0), ByValSize(0) {
-      static_assert(sizeof(*this) == 2 * sizeof(unsigned), "flags are too big");
+          IsCopyElisionCandidate(0), IsPointer(0), ByValSize(0),
+          PointerAddrSpace(0) {
+      static_assert(sizeof(*this) == 3 * sizeof(unsigned), "flags are too big");
     }
 
     bool isZExt() const { return IsZExt; }
@@ -114,6 +117,9 @@ namespace ISD {
     bool isCopyElisionCandidate()  const { return IsCopyElisionCandidate; }
     void setCopyElisionCandidate() { IsCopyElisionCandidate = 1; }
 
+    bool isPointer()  const { return IsPointer; }
+    void setPointer() { IsPointer = 1; }
+
     unsigned getByValAlign() const { return (1U << ByValAlign) / 2; }
     void setByValAlign(unsigned A) {
       ByValAlign = Log2_32(A) + 1;
@@ -128,7 +134,10 @@ namespace ISD {
 
     unsigned getByValSize() const { return ByValSize; }
     void setByValSize(unsigned S) { ByValSize = S; }
-  };
+
+    unsigned getPointerAddrSpace() const { return PointerAddrSpace; }
+    void setPointerAddrSpace(unsigned AS) { PointerAddrSpace = AS; }
+};
 
   /// InputArg - This struct carries flags and type information about a
   /// single incoming (formal) argument or incoming (from the perspective
diff --git a/include/llvm/CodeGen/TargetFrameLowering.h b/include/llvm/CodeGen/TargetFrameLowering.h
index b4d1da941433..878c9ffd2b51 100644
--- a/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/include/llvm/CodeGen/TargetFrameLowering.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/TargetFrameLowering.h ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,6 +14,7 @@
 #define LLVM_CODEGEN_TARGETFRAMELOWERING_H
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/ADT/StringSwitch.h"
 #include <utility>
 #include <vector>
 
@@ -24,6 +24,14 @@ namespace llvm {
   class MachineFunction;
   class RegScavenger;
 
+namespace TargetStackID {
+  enum Value {
+    Default = 0,
+    SGPRSpill = 1,
+    NoAlloc = 255
+  };
+}
+
 /// Information about stack frame layout on the target.  It holds the direction
 /// of stack growth, the known stack alignment on entry to each function, and
 /// the offset to the locals area.
@@ -262,6 +270,17 @@ public:
     return getFrameIndexReference(MF, FI, FrameReg);
   }
 
+  /// getNonLocalFrameIndexReference - This method returns the offset used to
+  /// reference a frame index location. The offset can be from either FP/BP/SP
+  /// based on which base register is returned by llvm.localaddress.
+  virtual int getNonLocalFrameIndexReference(const MachineFunction &MF,
+                                       int FI) const {
+    // By default, dispatch to getFrameIndexReference. Interested targets can
+    // override this.
+    unsigned FrameReg;
+    return getFrameIndexReference(MF, FI, FrameReg);
+  }
+
   /// This method determines which of the registers reported by
   /// TargetRegisterInfo::getCalleeSavedRegs() should actually get saved.
   /// The default implementation checks populates the \p SavedRegs bitset with
@@ -335,6 +354,16 @@ public:
     return true;
   }
 
+  virtual bool isSupportedStackID(TargetStackID::Value ID) const {
+    switch (ID) {
+    default:
+      return false;
+    case TargetStackID::Default:
+    case TargetStackID::NoAlloc:
+      return true;
+    }
+  }
+
   /// Check if given function is safe for not having callee saved registers.
   /// This is used when interprocedural register allocation is enabled.
   static bool isSafeForNoCSROpt(const Function &F) {
diff --git a/include/llvm/CodeGen/TargetInstrInfo.h b/include/llvm/CodeGen/TargetInstrInfo.h
index 961b90e9bc12..25b04f8c019a 100644
--- a/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/TargetInstrInfo.h - Instruction Info --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -27,6 +26,7 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineOutliner.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -81,6 +81,7 @@ public:
 
   /// Given a machine instruction descriptor, returns the register
   /// class constraint for OpNum, or NULL.
+  virtual
   const TargetRegisterClass *getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
                                          const TargetRegisterInfo *TRI,
                                          const MachineFunction &MF) const;
@@ -429,6 +430,13 @@ public:
 
     RegSubRegPair(unsigned Reg = 0, unsigned SubReg = 0)
         : Reg(Reg), SubReg(SubReg) {}
+
+    bool operator==(const RegSubRegPair& P) const {
+      return Reg == P.Reg && SubReg == P.SubReg;
+    }
+    bool operator!=(const RegSubRegPair& P) const {
+      return !(*this == P);
+    }
   };
 
   /// A pair composed of a pair of a register and a sub-register index,
@@ -663,8 +671,9 @@ public:
   /// is finished.  Return the value/register of the new loop count.  We need
   /// this function when peeling off one or more iterations of a loop. This
   /// function assumes the nth iteration is peeled first.
-  virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar,
-                                   MachineInstr &Cmp,
+  virtual unsigned reduceLoopCount(MachineBasicBlock &MBB,
+                                   MachineBasicBlock &PreHeader,
+                                   MachineInstr *IndVar, MachineInstr &Cmp,
                                    SmallVectorImpl<MachineOperand> &Cond,
                                    SmallVectorImpl<MachineInstr *> &PrevInsts,
                                    unsigned Iter, unsigned MaxIter) const {
@@ -926,9 +935,12 @@ public:
   /// operand folded, otherwise NULL is returned.
   /// The new instruction is inserted before MI, and the client is responsible
   /// for removing the old instruction.
+  /// If VRM is passed, the assigned physregs can be inspected by target to
+  /// decide on using an opcode (note that those assignments can still change).
   MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef<unsigned> Ops,
                                   int FI,
-                                  LiveIntervals *LIS = nullptr) const;
+                                  LiveIntervals *LIS = nullptr,
+                                  VirtRegMap *VRM = nullptr) const;
 
   /// Same as the previous version except it allows folding of any load and
   /// store from / to any address, not just from a specific stack slot.
@@ -1018,7 +1030,8 @@ protected:
   foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
                         ArrayRef<unsigned> Ops,
                         MachineBasicBlock::iterator InsertPt, int FrameIndex,
-                        LiveIntervals *LIS = nullptr) const {
+                        LiveIntervals *LIS = nullptr,
+                        VirtRegMap *VRM = nullptr) const {
     return nullptr;
   }
 
@@ -1138,8 +1151,9 @@ public:
 
   /// Get the base operand and byte offset of an instruction that reads/writes
   /// memory.
-  virtual bool getMemOperandWithOffset(MachineInstr &MI,
-                                       MachineOperand *&BaseOp, int64_t &Offset,
+  virtual bool getMemOperandWithOffset(const MachineInstr &MI,
+                                       const MachineOperand *&BaseOp,
+                                       int64_t &Offset,
                                        const TargetRegisterInfo *TRI) const {
     return false;
   }
@@ -1164,8 +1178,8 @@ public:
   /// or
   ///   DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
   /// to TargetPassConfig::createMachineScheduler() to have an effect.
-  virtual bool shouldClusterMemOps(MachineOperand &BaseOp1,
-                                   MachineOperand &BaseOp2,
+  virtual bool shouldClusterMemOps(const MachineOperand &BaseOp1,
+                                   const MachineOperand &BaseOp2,
                                    unsigned NumLoads) const {
     llvm_unreachable("target did not implement shouldClusterMemOps()");
   }
@@ -1253,8 +1267,9 @@ public:
 
   /// Measure the specified inline asm to determine an approximation of its
   /// length.
-  virtual unsigned getInlineAsmLength(const char *Str,
-                                      const MCAsmInfo &MAI) const;
+  virtual unsigned getInlineAsmLength(
+    const char *Str, const MCAsmInfo &MAI,
+    const TargetSubtargetInfo *STI = nullptr) const;
 
   /// Allocate and return a hazard recognizer to use for this target when
   /// scheduling the machine instructions before register allocation.
@@ -1542,7 +1557,8 @@ public:
   /// See also MachineInstr::mayAlias, which is implemented on top of this
   /// function.
   virtual bool
-  areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
+  areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+                                  const MachineInstr &MIb,
                                   AliasAnalysis *AA = nullptr) const {
     assert((MIa.mayLoad() || MIa.mayStore()) &&
            "MIa must load from or modify a memory location");
diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h
index 23dbaac03ebe..d5cca60bb1b2 100644
--- a/include/llvm/CodeGen/TargetLowering.h
+++ b/include/llvm/CodeGen/TargetLowering.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -189,13 +188,18 @@ public:
     bool IsSwiftSelf : 1;
     bool IsSwiftError : 1;
     uint16_t Alignment = 0;
+    Type *ByValType = nullptr;
 
     ArgListEntry()
         : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
           IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false),
           IsSwiftSelf(false), IsSwiftError(false) {}
 
-    void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx);
+    void setAttributes(const CallBase *Call, unsigned ArgIdx);
+
+    void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx) {
+      return setAttributes(cast<CallBase>(CS->getInstruction()), ArgIdx);
+    }
   };
   using ArgListTy = std::vector<ArgListEntry>;
 
@@ -235,7 +239,14 @@ public:
   /// Return the pointer type for the given address space, defaults to
   /// the pointer type from the data layout.
   /// FIXME: The default needs to be removed once all the code is updated.
-  MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const {
+  virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const {
+    return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
+  }
+
+  /// Return the in-memory pointer type for the given address space, defaults to
+  /// the pointer type from the data layout.  FIXME: The default needs to be
+  /// removed once all the code is updated.
+  MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const {
     return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
   }
 
@@ -291,6 +302,9 @@ public:
     // The default action for one element vectors is to scalarize
     if (VT.getVectorNumElements() == 1)
       return TypeScalarizeVector;
+    // The default action for an odd-width vector is to widen.
+    if (!VT.isPow2VectorType())
+      return TypeWidenVector;
     // The default action for other vectors is to promote
     return TypePromoteInteger;
   }
@@ -387,8 +401,9 @@ public:
   /// efficiently, casting the load to a smaller vector of larger types and
   /// loading is more efficient, however, this can be undone by optimizations in
   /// dag combiner.
-  virtual bool isLoadBitCastBeneficial(EVT LoadVT,
-                                       EVT BitcastVT) const {
+  virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
+                                       const SelectionDAG &DAG,
+                                       const MachineMemOperand &MMO) const {
     // Don't do if we could do an indexed load on the original type, but not on
     // the new one.
     if (!LoadVT.isSimple() || !BitcastVT.isSimple())
@@ -402,14 +417,18 @@ public:
         getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT())
       return false;
 
-    return true;
+    bool Fast = false;
+    return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT,
+                              MMO, &Fast) && Fast;
   }
 
   /// Return true if the following transform is beneficial:
   /// (store (y (conv x)), y*)) -> (store x, (x*))
-  virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT) const {
+  virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT,
+                                        const SelectionDAG &DAG,
+                                        const MachineMemOperand &MMO) const {
     // Default to the same logic as loads.
-    return isLoadBitCastBeneficial(StoreVT, BitcastVT);
+    return isLoadBitCastBeneficial(StoreVT, BitcastVT, DAG, MMO);
   }
 
   /// Return true if it is expected to be cheaper to do a store of a non-zero
@@ -421,10 +440,12 @@ public:
     return false;
   }
 
-  /// Allow store merging after legalization in addition to before legalization.
-  /// This may catch stores that do not exist earlier (eg, stores created from
-  /// intrinsics).
-  virtual bool mergeStoresAfterLegalization() const { return true; }
+  /// Allow store merging for the specified type after legalization in addition
+  /// to before legalization. This may transform stores that do not exist
+  /// earlier (for example, stores created from intrinsics).
+  virtual bool mergeStoresAfterLegalization(EVT MemVT) const {
+    return true;
+  }
 
   /// Returns if it's reasonable to merge stores to MemVT size.
   virtual bool canMergeStoresTo(unsigned AS, EVT MemVT,
@@ -521,13 +542,22 @@ public:
   /// There are two ways to clear extreme bits (either low or high):
   /// Mask:    x &  (-1 << y)  (the instcombine canonical form)
   /// Shifts:  x >> y << y
-  /// Return true if the variant with 2 shifts is preferred.
+  /// Return true if the variant with 2 variable shifts is preferred.
   /// Return false if there is no preference.
-  virtual bool preferShiftsToClearExtremeBits(SDValue X) const {
+  virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const {
     // By default, let's assume that no one prefers shifts.
     return false;
   }
 
+  /// Return true if it is profitable to fold a pair of shifts into a mask.
+  /// This is usually true on most targets. But some targets, like Thumb1,
+  /// have immediate shift instructions, but no immediate "and" instruction;
+  /// this makes the fold unprofitable.
+  virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N,
+                                                 CombineLevel Level) const {
+    return true;
+  }
+
   /// Should we tranform the IR-optimal check for whether given truncation
   /// down into KeptBits would be truncating or not:
   ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
@@ -541,6 +571,16 @@ public:
     return false;
   }
 
+  /// These two forms are equivalent:
+  ///   sub %y, (xor %x, -1)
+  ///   add (add %x, 1), %y
+  /// The variant with two add's is IR-canonical.
+  /// Some targets may prefer one to the other.
+  virtual bool preferIncOfAddToSubOfNot(EVT VT) const {
+    // By default, let's assume that everyone prefers the form with two add's.
+    return true;
+  }
+
   /// Return true if the target wants to use the optimization that
   /// turns ext(promotableInst1(...(promotableInstN(load)))) into
   /// promotedInst1(...(promotedInstN(ext(load)))).
@@ -560,11 +600,6 @@ public:
     return false;
   }
 
-  /// Return true if target supports floating point exceptions.
-  bool hasFloatingPointExceptions() const {
-    return HasFloatingPointExceptions;
-  }
-
   /// Return true if target always beneficiates from combining into FMA for a
   /// given value type. This must typically return false on targets where FMA
   /// takes more cycles to execute than FADD.
@@ -619,12 +654,21 @@ public:
 
   /// Return the register class that should be used for the specified value
   /// type.
-  virtual const TargetRegisterClass *getRegClassFor(MVT VT) const {
+  virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const {
+    (void)isDivergent;
     const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
     assert(RC && "This value type is not natively supported!");
     return RC;
   }
 
+  /// Allows target to decide about the register class of the
+  /// specific value that is live outside the defining block.
+  /// Returns true if the value needs uniform register class.
+  virtual bool requiresUniformRegister(MachineFunction &MF,
+                                       const Value *) const {
+    return false;
+  }
+
   /// Return the 'representative' register class for the specified value
   /// type.
   ///
@@ -643,6 +687,13 @@ public:
     return RepRegClassCostForVT[VT.SimpleTy];
   }
 
+  /// Return true if SHIFT instructions should be expanded to SHIFT_PARTS
+  /// instructions, and false if a library call is preferred (e.g for code-size
+  /// reasons).
+  virtual bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const {
+    return true;
+  }
+
   /// Return true if the target has native support for the specified value type.
   /// This means that it has a register that directly holds it without
   /// promotions or expansions.
@@ -768,7 +819,8 @@ public:
   /// Returns true if the target can instruction select the specified FP
   /// immediate natively. If false, the legalizer will materialize the FP
   /// immediate as a load from a constant pool.
-  virtual bool isFPImmLegal(const APFloat &/*Imm*/, EVT /*VT*/) const {
+  virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/,
+                            bool ForCodeSize = false) const {
     return false;
   }
 
@@ -830,6 +882,8 @@ public:
     default:
       llvm_unreachable("Unexpected fixed point operation.");
     case ISD::SMULFIX:
+    case ISD::SMULFIXSAT:
+    case ISD::UMULFIX:
       Supported = isSupportedFixedPointOperation(Op, VT, Scale);
       break;
     }
@@ -865,6 +919,8 @@ public:
       case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break;
       case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break;
       case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break;
+      case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break;
+      case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break;
     }
 
     auto Action = getOperationAction(EqOpc, VT);
@@ -931,21 +987,20 @@ public:
 
   /// Return true if lowering to a jump table is suitable for a set of case
   /// clusters which may contain \p NumCases cases, \p Range range of values.
-  /// FIXME: This function check the maximum table size and density, but the
-  /// minimum size is not checked. It would be nice if the minimum size is
-  /// also combined within this function. Currently, the minimum size check is
-  /// performed in findJumpTable() in SelectionDAGBuiler and
-  /// getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
   virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases,
                                       uint64_t Range) const {
-    const bool OptForSize = SI->getParent()->getParent()->optForSize();
+    // FIXME: This function check the maximum table size and density, but the
+    // minimum size is not checked. It would be nice if the minimum size is
+    // also combined within this function. Currently, the minimum size check is
+    // performed in findJumpTable() in SelectionDAGBuiler and
+    // getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
+    const bool OptForSize = SI->getParent()->getParent()->hasOptSize();
     const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize);
-    const unsigned MaxJumpTableSize =
-        OptForSize || getMaximumJumpTableSize() == 0
-            ? UINT_MAX
-            : getMaximumJumpTableSize();
-    // Check whether a range of clusters is dense enough for a jump table.
-    if (Range <= MaxJumpTableSize &&
+    const unsigned MaxJumpTableSize = getMaximumJumpTableSize();
+    
+    // Check whether the number of cases is small enough and
+    // the range is dense enough for a jump table.
+    if ((OptForSize || Range <= MaxJumpTableSize) &&
         (NumCases * 100 >= Range * MinDensity)) {
       return true;
     }
@@ -1140,24 +1195,42 @@ public:
   EVT getValueType(const DataLayout &DL, Type *Ty,
                    bool AllowUnknown = false) const {
     // Lower scalar pointers to native pointer types.
-    if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+    if (auto *PTy = dyn_cast<PointerType>(Ty))
       return getPointerTy(DL, PTy->getAddressSpace());
 
-    if (Ty->isVectorTy()) {
-      VectorType *VTy = cast<VectorType>(Ty);
-      Type *Elm = VTy->getElementType();
+    if (auto *VTy = dyn_cast<VectorType>(Ty)) {
+      Type *EltTy = VTy->getElementType();
       // Lower vectors of pointers to native pointer types.
+      if (auto *PTy = dyn_cast<PointerType>(EltTy)) {
+        EVT PointerTy(getPointerTy(DL, PTy->getAddressSpace()));
+        EltTy = PointerTy.getTypeForEVT(Ty->getContext());
+      }
+      return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false),
+                              VTy->getNumElements());
+    }
+
+    return EVT::getEVT(Ty, AllowUnknown);
+  }
+
+  EVT getMemValueType(const DataLayout &DL, Type *Ty,
+                      bool AllowUnknown = false) const {
+    // Lower scalar pointers to native pointer types.
+    if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+      return getPointerMemTy(DL, PTy->getAddressSpace());
+    else if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+      Type *Elm = VTy->getElementType();
       if (PointerType *PT = dyn_cast<PointerType>(Elm)) {
-        EVT PointerTy(getPointerTy(DL, PT->getAddressSpace()));
+        EVT PointerTy(getPointerMemTy(DL, PT->getAddressSpace()));
         Elm = PointerTy.getTypeForEVT(Ty->getContext());
       }
-
       return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false),
                        VTy->getNumElements());
     }
-    return EVT::getEVT(Ty, AllowUnknown);
+
+    return getValueType(DL, Ty, AllowUnknown);
   }
 
+
   /// Return the MVT corresponding to this LLVM type. See getValueType.
   MVT getSimpleValueType(const DataLayout &DL, Type *Ty,
                          bool AllowUnknown = false) const {
@@ -1327,18 +1400,6 @@ public:
     return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp;
   }
 
-  /// For memcmp expansion when the memcmp result is only compared equal or
-  /// not-equal to 0, allow up to this number of load pairs per block. As an
-  /// example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
-  ///   a0 = load2bytes &a[0]
-  ///   b0 = load2bytes &b[0]
-  ///   a2 = load1byte  &a[2]
-  ///   b2 = load1byte  &b[2]
-  ///   r  = cmp eq (a0 ^ b0 | a2 ^ b2), 0
-  virtual unsigned getMemcmpEqZeroLoadsPerBlock() const {
-    return 1;
-  }
-
   /// Get maximum # of store operations permitted for llvm.memmove
   ///
   /// This function returns the maximum number of store operations permitted
@@ -1358,10 +1419,10 @@ public:
   /// copy/move/set is converted to a sequence of store operations. Its use
   /// helps to ensure that such replacements don't generate code that causes an
   /// alignment error (trap) on the target machine.
-  virtual bool allowsMisalignedMemoryAccesses(EVT,
-                                              unsigned AddrSpace = 0,
-                                              unsigned Align = 1,
-                                              bool * /*Fast*/ = nullptr) const {
+  virtual bool allowsMisalignedMemoryAccesses(
+      EVT, unsigned AddrSpace = 0, unsigned Align = 1,
+      MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+      bool * /*Fast*/ = nullptr) const {
     return false;
   }
 
@@ -1369,8 +1430,18 @@ public:
   /// given address space and alignment. If the access is allowed, the optional
   /// final parameter returns if the access is also fast (as defined by the
   /// target).
+  bool
+  allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
+                     unsigned AddrSpace = 0, unsigned Alignment = 1,
+                     MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+                     bool *Fast = nullptr) const;
+
+  /// Return true if the target supports a memory access of this type for the
+  /// given MachineMemOperand. If the access is allowed, the optional
+  /// final parameter returns if the access is also fast (as defined by the
+  /// target).
   bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
-                          unsigned AddrSpace = 0, unsigned Alignment = 1,
+                          const MachineMemOperand &MMO,
                           bool *Fast = nullptr) const;
 
   /// Returns the target specific optimal type for load and store operations as
@@ -1384,12 +1455,11 @@ public:
   /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
   /// does not need to be loaded.  It returns EVT::Other if the type should be
   /// determined using generic target-independent logic.
-  virtual EVT getOptimalMemOpType(uint64_t /*Size*/,
-                                  unsigned /*DstAlign*/, unsigned /*SrcAlign*/,
-                                  bool /*IsMemset*/,
-                                  bool /*ZeroMemset*/,
-                                  bool /*MemcpyStrSrc*/,
-                                  MachineFunction &/*MF*/) const {
+  virtual EVT
+  getOptimalMemOpType(uint64_t /*Size*/, unsigned /*DstAlign*/,
+                      unsigned /*SrcAlign*/, bool /*IsMemset*/,
+                      bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/,
+                      const AttributeList & /*FuncAttributes*/) const {
     return MVT::Other;
   }
 
@@ -1515,7 +1585,7 @@ public:
   /// performs validation and error handling, returns the function. Otherwise,
   /// returns nullptr. Must be previously inserted by insertSSPDeclarations.
   /// Should be used only when getIRStackGuard returns nullptr.
-  virtual Value *getSSPStackGuardCheck(const Module &M) const;
+  virtual Function *getSSPStackGuardCheck(const Module &M) const;
 
 protected:
   Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB,
@@ -1537,8 +1607,9 @@ public:
   }
 
   /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
-  /// are happy to sink it into basic blocks.
-  virtual bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
+  /// are happy to sink it into basic blocks. A cast may be free, but not
+  /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer.
+  virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
     return isNoopAddrSpaceCast(SrcAS, DestAS);
   }
 
@@ -1716,8 +1787,9 @@ public:
 
   /// Returns how the IR-level AtomicExpand pass should expand the given
   /// AtomicRMW, if at all. Default is to never expand.
-  virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const {
-    return AtomicExpansionKind::None;
+  virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
+    return RMW->isFloatingPointOperation() ?
+      AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None;
   }
 
   /// On some platforms, an AtomicRMW that never actually modifies the value
@@ -1762,6 +1834,8 @@ public:
       Action != TypeSplitVector;
   }
 
+  virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; }
+
   /// Return true if a select of constants (select Cond, C1, C2) should be
   /// transformed into simple math ops with the condition value. For example:
   /// select Cond, C1, C1-1 --> add (zext Cond), C1-1
@@ -1865,12 +1939,6 @@ protected:
   /// control.
   void setJumpIsExpensive(bool isExpensive = true);
 
-  /// Tells the code generator that this target supports floating point
-  /// exceptions and cares about preserving floating point exception behavior.
-  void setHasFloatingPointExceptions(bool FPExceptions = true) {
-    HasFloatingPointExceptions = FPExceptions;
-  }
-
   /// Tells the code generator which bitwidths to bypass.
   void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) {
     BypassSlowDivWidths[SlowBitWidth] = FastBitWidth;
@@ -2159,6 +2227,8 @@ public:
     case ISD::UADDSAT:
     case ISD::FMINNUM:
     case ISD::FMAXNUM:
+    case ISD::FMINNUM_IEEE:
+    case ISD::FMAXNUM_IEEE:
     case ISD::FMINIMUM:
     case ISD::FMAXIMUM:
       return true;
@@ -2166,6 +2236,30 @@ public:
     }
   }
 
+  /// Return true if the node is a math/logic binary operator.
+  virtual bool isBinOp(unsigned Opcode) const {
+    // A commutative binop must be a binop.
+    if (isCommutativeBinOp(Opcode))
+      return true;
+    // These are non-commutative binops.
+    switch (Opcode) {
+    case ISD::SUB:
+    case ISD::SHL:
+    case ISD::SRL:
+    case ISD::SRA:
+    case ISD::SDIV:
+    case ISD::UDIV:
+    case ISD::SREM:
+    case ISD::UREM:
+    case ISD::FSUB:
+    case ISD::FDIV:
+    case ISD::FREM:
+      return true;
+    default:
+      return false;
+    }
+  }
+
   /// Return true if it's free to truncate a value of type FromTy to type
   /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
   /// by referencing its sub-register AX.
@@ -2270,6 +2364,16 @@ public:
     return false;
   }
 
+  /// Return true if sinking I's operands to the same basic block as I is
+  /// profitable, e.g. because the operands can be folded into a target
+  /// instruction during instruction selection. After calling the function
+  /// \p Ops contains the Uses to sink ordered by dominance (dominating users
+  /// come first).
+  virtual bool shouldSinkOperands(Instruction *I,
+                                  SmallVectorImpl<Use *> &Ops) const {
+    return false;
+  }
+
   /// Return true if the target supplies and combines to a paired load
   /// two loaded values of type LoadedType next to each other in memory.
   /// RequiredAlignment gives the minimal alignment constraints that must be met
@@ -2415,6 +2519,31 @@ public:
     return false;
   }
 
+  /// Return true if extraction of a scalar element from the given vector type
+  /// at the given index is cheap. For example, if scalar operations occur on
+  /// the same register file as vector operations, then an extract element may
+  /// be a sub-register rename rather than an actual instruction.
+  virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const {
+    return false;
+  }
+
+  /// Try to convert math with an overflow comparison into the corresponding DAG
+  /// node operation. Targets may want to override this independently of whether
+  /// the operation is legal/custom for the given type because it may obscure
+  /// matching of other patterns.
+  virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const {
+    // TODO: The default logic is inherited from code in CodeGenPrepare.
+    // The opcode should not make a difference by default?
+    if (Opcode != ISD::UADDO)
+      return false;
+
+    // Allow the transform as long as we have an integer type that is not
+    // obviously illegal and unsupported.
+    if (VT.isVector())
+      return false;
+    return VT.isSimple() || !isOperationExpand(Opcode, VT);
+  }
+
   // Return true if it is profitable to use a scalar input to a BUILD_VECTOR
   // even if the vector itself has multiple uses.
   virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const {
@@ -2495,10 +2624,6 @@ private:
   /// predication.
   bool JumpIsExpensive;
 
-  /// Whether the target supports or cares about preserving floating point
-  /// exception behavior.
-  bool HasFloatingPointExceptions;
-
   /// This target prefers to use _setjmp to implement llvm.setjmp.
   ///
   /// Defaults to false.
@@ -2834,11 +2959,10 @@ public:
 
   /// Returns a pair of (return value, chain).
   /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC.
-  std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC,
-                                          EVT RetVT, ArrayRef<SDValue> Ops,
-                                          bool isSigned, const SDLoc &dl,
-                                          bool doesNotReturn = false,
-                                          bool isReturnValueUsed = true) const;
+  std::pair<SDValue, SDValue> makeLibCall(
+      SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef<SDValue> Ops,
+      bool isSigned, const SDLoc &dl, bool doesNotReturn = false,
+      bool isReturnValueUsed = true, bool isPostTypeLegalization = false) const;
 
   /// Check whether parameters to a call that are passed in callee saved
   /// registers are the same as from the calling function.  This needs to be
@@ -2876,6 +3000,20 @@ public:
     }
   };
 
+  /// Determines the optimal series of memory ops to replace the memset / memcpy.
+  /// Return true if the number of memory ops is below the threshold (Limit).
+  /// It returns the types of the sequence of memory ops to perform
+  /// memset / memcpy by reference.
+  bool findOptimalMemOpLowering(std::vector<EVT> &MemOps,
+                                unsigned Limit, uint64_t Size,
+                                unsigned DstAlign, unsigned SrcAlign,
+                                bool IsMemset,
+                                bool ZeroMemset,
+                                bool MemcpyStrSrc,
+                                bool AllowOverlap,
+                                unsigned DstAS, unsigned SrcAS,
+                                const AttributeList &FuncAttributes) const;
+
   /// Check to see if the specified operand of the specified instruction is a
   /// constant integer.  If so, check to see if there are any bits set in the
   /// constant that are not demanded.  If so, shrink the constant and return
@@ -3001,6 +3139,10 @@ public:
                                                  TargetLoweringOpt &TLO,
                                                  unsigned Depth = 0) const;
 
+  /// This method returns the constant pool value that will be loaded by LD.
+  /// NOTE: You must check for implicit extensions of the constant by LD.
+  virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const;
+
   /// If \p SNaN is false, \returns true if \p Op is known to never be any
   /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling
   /// NaN.
@@ -3088,15 +3230,6 @@ public:
     return true;
   }
 
-  /// Return true if it is profitable to fold a pair of shifts into a mask.
-  /// This is usually true on most targets. But some targets, like Thumb1,
-  /// have immediate shift instructions, but no immediate "and" instruction;
-  /// this makes the fold unprofitable.
-  virtual bool shouldFoldShiftPairToMask(const SDNode *N,
-                                         CombineLevel Level) const {
-    return true;
-  }
-
   // Return true if it is profitable to combine a BUILD_VECTOR with a stride-pattern
   // to a shuffle and a truncate.
   // Example of such a combine:
@@ -3430,6 +3563,15 @@ public:
     return false;
   }
 
+  /// For most targets, an LLVM type must be broken down into multiple
+  /// smaller types. Usually the halves are ordered according to the endianness
+  /// but for some platform that would break. So this method will default to
+  /// matching the endianness but can be overridden.
+  virtual bool
+  shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const {
+    return DL.isLittleEndian();
+  }
+
   /// Returns a 0 terminated array of registers that can be safely used as
   /// scratch registers.
   virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const {
@@ -3638,6 +3780,12 @@ public:
                                             std::vector<SDValue> &Ops,
                                             SelectionDAG &DAG) const;
 
+  // Lower custom output constraints. If invalid, return SDValue().
+  virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
+                                              SDLoc DL,
+                                              const AsmOperandInfo &OpInfo,
+                                              SelectionDAG &DAG) const;
+
   //===--------------------------------------------------------------------===//
   // Div utility functions
   //
@@ -3840,8 +3988,26 @@ public:
 
   /// Method for building the DAG expansion of ISD::SMULFIX. This method accepts
   /// integers as its arguments.
-  SDValue getExpandedFixedPointMultiplication(SDNode *Node,
-                                              SelectionDAG &DAG) const;
+  SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const;
+
+  /// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion
+  /// always suceeds and populates the Result and Overflow arguments.
+  void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow,
+                      SelectionDAG &DAG) const;
+
+  /// Method for building the DAG expansion of ISD::S(ADD|SUB)O. Expansion
+  /// always suceeds and populates the Result and Overflow arguments.
+  void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow,
+                      SelectionDAG &DAG) const;
+
+  /// Method for building the DAG expansion of ISD::[US]MULO. Returns whether
+  /// expansion was successful and populates the Result and Overflow arguments.
+  bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow,
+                  SelectionDAG &DAG) const;
+
+  /// Expand a VECREDUCE_* into an explicit calculation. If Count is specified,
+  /// only the first Count elements of the vector are used.
+  SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const;
 
   //===--------------------------------------------------------------------===//
   // Instruction Emitting Hooks
@@ -3894,14 +4060,23 @@ public:
   SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const;
 
 private:
-  SDValue simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
-                               ISD::CondCode Cond, DAGCombinerInfo &DCI,
-                               const SDLoc &DL) const;
+  SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+                           const SDLoc &DL, DAGCombinerInfo &DCI) const;
+  SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+                             const SDLoc &DL, DAGCombinerInfo &DCI) const;
 
   SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0,
                                                SDValue N1, ISD::CondCode Cond,
                                                DAGCombinerInfo &DCI,
                                                const SDLoc &DL) const;
+
+  SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
+                            SDValue CompTargetNode, ISD::CondCode Cond,
+                            DAGCombinerInfo &DCI, const SDLoc &DL,
+                            SmallVectorImpl<SDNode *> &Created) const;
+  SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode,
+                          ISD::CondCode Cond, DAGCombinerInfo &DCI,
+                          const SDLoc &DL) const;
 };
 
 /// Given an LLVM IR type and return type attributes, compute the return value
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 052d1f8bc686..a1fb81cb009d 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -1,9 +1,8 @@
 //==- llvm/CodeGen/TargetLoweringObjectFileImpl.h - Object Info --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/TargetOpcodes.h b/include/llvm/CodeGen/TargetOpcodes.h
index d0d959c4ae11..080a244f6f69 100644
--- a/include/llvm/CodeGen/TargetOpcodes.h
+++ b/include/llvm/CodeGen/TargetOpcodes.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/TargetOpcodes.h - Target Indep Opcodes -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/TargetPassConfig.h b/include/llvm/CodeGen/TargetPassConfig.h
index 3288711a335d..0bd82aafac37 100644
--- a/include/llvm/CodeGen/TargetPassConfig.h
+++ b/include/llvm/CodeGen/TargetPassConfig.h
@@ -1,9 +1,8 @@
 //===- TargetPassConfig.h - Code Generation pass options --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -25,6 +24,7 @@ class LLVMTargetMachine;
 struct MachineSchedContext;
 class PassConfigImpl;
 class ScheduleDAGInstrs;
+class CSEConfigBase;
 
 // The old pass manager infrastructure is hidden in a legacy namespace now.
 namespace legacy {
@@ -75,9 +75,6 @@ public:
   }
 };
 
-template <> struct isPodLike<IdentifyingPassPtr> {
-  static const bool value = true;
-};
 
 /// Target-Independent Code Generator Pass Configuration Options.
 ///
@@ -319,6 +316,13 @@ public:
   /// when GlobalISel failed and isGlobalISelAbortEnabled is false.
   virtual bool reportDiagnosticWhenGlobalISelFallback() const;
 
+  /// Check whether continuous CSE should be enabled in GISel passes.
+  /// By default, it's enabled for non O0 levels.
+  virtual bool isGISelCSEEnabled() const;
+
+  /// Returns the CSEConfig object to use for the current optimization level.
+  virtual std::unique_ptr<CSEConfigBase> getCSEConfig() const;
+
 protected:
   // Helper to verify the analysis is really immutable.
   void setOpt(bool &Opt, bool Val);
@@ -360,11 +364,11 @@ protected:
 
   /// addFastRegAlloc - Add the minimum set of target-independent passes that
   /// are required for fast register allocation.
-  virtual void addFastRegAlloc(FunctionPass *RegAllocPass);
+  virtual void addFastRegAlloc();
 
   /// addOptimizedRegAlloc - Add passes related to register allocation.
   /// LLVMTargetMachine provides standard regalloc passes for most targets.
-  virtual void addOptimizedRegAlloc(FunctionPass *RegAllocPass);
+  virtual void addOptimizedRegAlloc();
 
   /// addPreRewrite - Add passes to the optimized register allocation pipeline
   /// after register allocation is complete, but before virtual registers are
@@ -374,10 +378,18 @@ protected:
   /// after RABasic or RAGreedy, they should take advantage of LiveRegMatrix.
   /// When these passes run, VirtRegMap contains legal physreg assignments for
   /// all virtual registers.
+  ///
+  /// Note if the target overloads addRegAssignAndRewriteOptimized, this may not
+  /// be honored. This is also not generally used for the the fast variant,
+  /// where the allocation and rewriting are done in one pass.
   virtual bool addPreRewrite() {
     return false;
   }
 
+  /// Add passes to be run immediately after virtual registers are rewritten
+  /// to physical registers.
+  virtual void addPostRewrite() { }
+
   /// This method may be implemented by targets that want to run passes after
   /// register allocation pass pipeline but before prolog-epilog insertion.
   virtual void addPostRegAlloc() { }
@@ -431,7 +443,12 @@ protected:
 
   /// addMachinePasses helper to create the target-selected or overriden
   /// regalloc pass.
-  FunctionPass *createRegAllocPass(bool Optimized);
+  virtual FunctionPass *createRegAllocPass(bool Optimized);
+
+  /// Add core register alloator passes which do the actual register assignment
+  /// and rewriting. \returns true if any passes were added.
+  virtual bool addRegAssignmentFast();
+  virtual bool addRegAssignmentOptimized();
 };
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/TargetRegisterInfo.h b/include/llvm/CodeGen/TargetRegisterInfo.h
index 0fbff3137653..ddbd677b3eaa 100644
--- a/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -1,9 +1,8 @@
 //==- CodeGen/TargetRegisterInfo.h - Target Register Information -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -521,6 +520,11 @@ public:
   /// function.  Used by MachineRegisterInfo::isConstantPhysReg().
   virtual bool isConstantPhysReg(unsigned PhysReg) const { return false; }
 
+  /// Returns true if the register class is considered divergent.
+  virtual bool isDivergentRegClass(const TargetRegisterClass *RC) const {
+    return false;
+  }
+
   /// Physical registers that may be modified within a function but are
   /// guaranteed to be restored before any uses. This is useful for targets that
   /// have call sequences where a GOT register may be updated by the caller
@@ -986,7 +990,7 @@ public:
 
   /// getFrameRegister - This method should return the register used as a base
   /// for values allocated in the current stack frame.
-  virtual unsigned getFrameRegister(const MachineFunction &MF) const = 0;
+  virtual Register getFrameRegister(const MachineFunction &MF) const = 0;
 
   /// Mark a register and all its aliases as reserved in the given set.
   void markSuperRegs(BitVector &RegisterSet, unsigned Reg) const;
diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h
index 6173925e23a1..cce85c8d7b0d 100644
--- a/include/llvm/CodeGen/TargetSchedule.h
+++ b/include/llvm/CodeGen/TargetSchedule.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/TargetSchedule.h - Sched Machine Model ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/TargetSubtargetInfo.h b/include/llvm/CodeGen/TargetSubtargetInfo.h
index 968e4c4b8102..037fc3ed3243 100644
--- a/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/TargetSubtargetInfo.h - Target Information --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -43,6 +42,7 @@ class RegisterBankInfo;
 class SDep;
 class SelectionDAGTargetInfo;
 struct SubtargetFeatureKV;
+struct SubtargetSubTypeKV;
 struct SubtargetInfoKV;
 class SUnit;
 class TargetFrameLowering;
@@ -63,8 +63,7 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
 protected: // Can only create subclasses...
   TargetSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS,
                       ArrayRef<SubtargetFeatureKV> PF,
-                      ArrayRef<SubtargetFeatureKV> PD,
-                      const SubtargetInfoKV *ProcSched,
+                      ArrayRef<SubtargetSubTypeKV> PD,
                       const MCWriteProcResEntry *WPR,
                       const MCWriteLatencyEntry *WL,
                       const MCReadAdvanceEntry *RA, const InstrStage *IS,
@@ -190,13 +189,13 @@ public:
   /// TargetLowering preference). It does not yet disable the postRA scheduler.
   virtual bool enableMachineScheduler() const;
 
-  /// Support printing of [latency:throughput] comment in output .S file.
-  virtual bool supportPrintSchedInfo() const { return false; }
-
   /// True if the machine scheduler should disable the TLI preference
   /// for preRA scheduling with the source level scheduler.
   virtual bool enableMachineSchedDefaultSched() const { return true; }
 
+  /// True if the subtarget should run MachinePipeliner
+  virtual bool enableMachinePipeliner() const { return true; };
+
   /// True if the subtarget should enable joining global copies.
   ///
   /// By default this is enabled if the machine scheduler is enabled, but
@@ -250,6 +249,10 @@ public:
       std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
   }
 
+  /// Default to DFA for resource management, return false when target will use
+  /// ProcResource in InstrSchedModel instead.
+  virtual bool useDFAforSMS() const { return true; }
+
   // For use with PostRAScheduling: get the minimum optimization level needed
   // to enable post-RA scheduling.
   virtual CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const {
@@ -286,12 +289,16 @@ public:
   /// possible.
   virtual bool enableSubRegLiveness() const { return false; }
 
-  /// Returns string representation of scheduler comment
-  std::string getSchedInfoStr(const MachineInstr &MI) const;
-  std::string getSchedInfoStr(MCInst const &MCI) const override;
-
   /// This is called after a .mir file was loaded.
   virtual void mirFileLoaded(MachineFunction &MF) const;
+
+  /// True if the register allocator should use the allocation orders exactly as
+  /// written in the tablegen descriptions, false if it should allocate
+  /// the specified physical register later if is it callee-saved.
+  virtual bool ignoreCSRForAllocationOrder(const MachineFunction &MF,
+                                           unsigned PhysReg) const {
+    return false;
+  }
 };
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/UnreachableBlockElim.h b/include/llvm/CodeGen/UnreachableBlockElim.h
index 3e7afd4cd433..d52d7c3c5b49 100644
--- a/include/llvm/CodeGen/UnreachableBlockElim.h
+++ b/include/llvm/CodeGen/UnreachableBlockElim.h
@@ -1,9 +1,8 @@
 //===-- UnreachableBlockElim.h - Remove unreachable blocks for codegen --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index d2ef4a94f8e2..c540c94f79d9 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -1,9 +1,8 @@
 //===- CodeGen/ValueTypes.h - Low-Level Target independ. types --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index 0abb4ece1d14..5818ac183fcc 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -1,9 +1,8 @@
 //===- ValueTypes.td - ValueType definitions ---------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -63,89 +62,105 @@ def v32i16 : ValueType<512, 38>;   // 32 x i16 vector value
 def v64i16 : ValueType<1024,39>;   // 64 x i16 vector value
 def v128i16: ValueType<2048,40>;   //128 x i16 vector value
 
-def v1i32  : ValueType<32 , 41>;   //  1 x i32 vector value
-def v2i32  : ValueType<64 , 42>;   //  2 x i32 vector value
-def v4i32  : ValueType<128, 43>;   //  4 x i32 vector value
-def v8i32  : ValueType<256, 44>;   //  8 x i32 vector value
-def v16i32 : ValueType<512, 45>;   // 16 x i32 vector value
-def v32i32 : ValueType<1024,46>;   // 32 x i32 vector value
-def v64i32 : ValueType<2048,47>;   // 64 x i32 vector value
-
-def v1i64  : ValueType<64 , 48>;   //  1 x i64 vector value
-def v2i64  : ValueType<128, 49>;   //  2 x i64 vector value
-def v4i64  : ValueType<256, 50>;   //  4 x i64 vector value
-def v8i64  : ValueType<512, 51>;   //  8 x i64 vector value
-def v16i64 : ValueType<1024,52>;   // 16 x i64 vector value
-def v32i64 : ValueType<2048,53>;   // 32 x i64 vector value
-
-def v1i128 : ValueType<128, 54>;   //  1 x i128 vector value
-
-def nxv1i1  : ValueType<1,   55>;  // n x  1 x i1  vector value
-def nxv2i1  : ValueType<2,   56>;  // n x  2 x i1  vector value
-def nxv4i1  : ValueType<4,   57>;  // n x  4 x i1  vector value
-def nxv8i1  : ValueType<8,   58>;  // n x  8 x i1  vector value
-def nxv16i1 : ValueType<16,  59>;  // n x 16 x i1  vector value
-def nxv32i1 : ValueType<32,  60>;  // n x 32 x i1  vector value
-
-def nxv1i8  : ValueType<8,   61>;  // n x  1 x i8  vector value
-def nxv2i8  : ValueType<16,  62>;  // n x  2 x i8  vector value
-def nxv4i8  : ValueType<32,  63>;  // n x  4 x i8  vector value
-def nxv8i8  : ValueType<64,  64>;  // n x  8 x i8  vector value
-def nxv16i8 : ValueType<128, 65>;  // n x 16 x i8  vector value
-def nxv32i8 : ValueType<256, 66>;  // n x 32 x i8  vector value
-
-def nxv1i16 : ValueType<16,  67>;  // n x  1 x i16 vector value
-def nxv2i16 : ValueType<32,  68>;  // n x  2 x i16 vector value
-def nxv4i16 : ValueType<64,  69>;  // n x  4 x i16 vector value
-def nxv8i16 : ValueType<128, 70>;  // n x  8 x i16 vector value
-def nxv16i16: ValueType<256, 71>;  // n x 16 x i16 vector value
-def nxv32i16: ValueType<512, 72>;  // n x 32 x i16 vector value
-
-def nxv1i32 : ValueType<32,  73>;  // n x  1 x i32 vector value
-def nxv2i32 : ValueType<64,  74>;  // n x  2 x i32 vector value
-def nxv4i32 : ValueType<128, 75>;  // n x  4 x i32 vector value
-def nxv8i32 : ValueType<256, 76>;  // n x  8 x i32 vector value
-def nxv16i32: ValueType<512, 77>;  // n x 16 x i32 vector value
-def nxv32i32: ValueType<1024,78>;  // n x 32 x i32 vector value
-
-def nxv1i64 : ValueType<64,  79>;  // n x  1 x i64 vector value
-def nxv2i64 : ValueType<128, 80>;  // n x  2 x i64 vector value
-def nxv4i64 : ValueType<256, 81>;  // n x  4 x i64 vector value
-def nxv8i64 : ValueType<512, 82>;  // n x  8 x i64 vector value
-def nxv16i64: ValueType<1024,83>;  // n x 16 x i64 vector value
-def nxv32i64: ValueType<2048,84>;  // n x 32 x i64 vector value
-
-def v2f16  : ValueType<32 , 85>;   //  2 x f16 vector value
-def v4f16  : ValueType<64 , 86>;   //  4 x f16 vector value
-def v8f16  : ValueType<128, 87>;   //  8 x f16 vector value
-def v1f32  : ValueType<32 , 88>;   //  1 x f32 vector value
-def v2f32  : ValueType<64 , 89>;   //  2 x f32 vector value
-def v4f32  : ValueType<128, 90>;   //  4 x f32 vector value
-def v8f32  : ValueType<256, 91>;   //  8 x f32 vector value
-def v16f32 : ValueType<512, 92>;   // 16 x f32 vector value
-def v1f64  : ValueType<64,  93>;   //  1 x f64 vector value
-def v2f64  : ValueType<128, 94>;   //  2 x f64 vector value
-def v4f64  : ValueType<256, 95>;   //  4 x f64 vector value
-def v8f64  : ValueType<512, 96>;   //  8 x f64 vector value
-
-def nxv2f16  : ValueType<32 ,  97>; // n x  2 x f16 vector value
-def nxv4f16  : ValueType<64 ,  98>; // n x  4 x f16 vector value
-def nxv8f16  : ValueType<128,  99>; // n x  8 x f16 vector value
-def nxv1f32  : ValueType<32 , 100>; // n x  1 x f32 vector value
-def nxv2f32  : ValueType<64 , 101>; // n x  2 x f32 vector value
-def nxv4f32  : ValueType<128, 102>; // n x  4 x f32 vector value
-def nxv8f32  : ValueType<256, 103>; // n x  8 x f32 vector value
-def nxv16f32 : ValueType<512, 104>; // n x 16 x f32 vector value
-def nxv1f64  : ValueType<64,  105>; // n x  1 x f64 vector value
-def nxv2f64  : ValueType<128, 106>; // n x  2 x f64 vector value
-def nxv4f64  : ValueType<256, 107>; // n x  4 x f64 vector value
-def nxv8f64  : ValueType<512, 108>; // n x  8 x f64 vector value
-
-def x86mmx : ValueType<64 , 109>;   // X86 MMX value
-def FlagVT : ValueType<0  , 110>;   // Pre-RA sched glue
-def isVoid : ValueType<0  , 111>;   // Produces no value
-def untyped: ValueType<8  , 112>;   // Produces an untyped value
-def ExceptRef: ValueType<0, 113>;   // WebAssembly's except_ref type
+def v1i32    : ValueType<32 , 41>;   //  1 x i32 vector value
+def v2i32    : ValueType<64 , 42>;   //  2 x i32 vector value
+def v3i32    : ValueType<96 , 43>;   //  3 x i32 vector value
+def v4i32    : ValueType<128, 44>;   //  4 x i32 vector value
+def v5i32    : ValueType<160, 45>;   //  5 x i32 vector value
+def v8i32    : ValueType<256, 46>;   //  8 x i32 vector value
+def v16i32   : ValueType<512, 47>;   // 16 x i32 vector value
+def v32i32   : ValueType<1024,48>;   // 32 x i32 vector value
+def v64i32   : ValueType<2048,49>;   // 64 x i32 vector value
+def v128i32  : ValueType<4096,50>;   // 128 x i32 vector value
+def v256i32  : ValueType<8182,51>;   // 256 x i32 vector value
+def v512i32  : ValueType<16384,52>;  // 512 x i32 vector value
+def v1024i32 : ValueType<32768,53>;  // 1024 x i32 vector value
+def v2048i32 : ValueType<65536,54>;  // 2048 x i32 vector value
+
+def v1i64  : ValueType<64 , 55>;   //  1 x i64 vector value
+def v2i64  : ValueType<128, 56>;   //  2 x i64 vector value
+def v4i64  : ValueType<256, 57>;   //  4 x i64 vector value
+def v8i64  : ValueType<512, 58>;   //  8 x i64 vector value
+def v16i64 : ValueType<1024,59>;   // 16 x i64 vector value
+def v32i64 : ValueType<2048,60>;   // 32 x i64 vector value
+
+def v1i128 : ValueType<128, 61>;   //  1 x i128 vector value
+
+def nxv1i1  : ValueType<1,   62>;  // n x  1 x i1  vector value
+def nxv2i1  : ValueType<2,   63>;  // n x  2 x i1  vector value
+def nxv4i1  : ValueType<4,   64>;  // n x  4 x i1  vector value
+def nxv8i1  : ValueType<8,   65>;  // n x  8 x i1  vector value
+def nxv16i1 : ValueType<16,  66>;  // n x 16 x i1  vector value
+def nxv32i1 : ValueType<32,  67>;  // n x 32 x i1  vector value
+
+def nxv1i8  : ValueType<8,   68>;  // n x  1 x i8  vector value
+def nxv2i8  : ValueType<16,  69>;  // n x  2 x i8  vector value
+def nxv4i8  : ValueType<32,  70>;  // n x  4 x i8  vector value
+def nxv8i8  : ValueType<64,  71>;  // n x  8 x i8  vector value
+def nxv16i8 : ValueType<128, 72>;  // n x 16 x i8  vector value
+def nxv32i8 : ValueType<256, 73>;  // n x 32 x i8  vector value
+
+def nxv1i16 : ValueType<16,  74>;  // n x  1 x i16 vector value
+def nxv2i16 : ValueType<32,  75>;  // n x  2 x i16 vector value
+def nxv4i16 : ValueType<64,  76>;  // n x  4 x i16 vector value
+def nxv8i16 : ValueType<128, 77>;  // n x  8 x i16 vector value
+def nxv16i16: ValueType<256, 78>;  // n x 16 x i16 vector value
+def nxv32i16: ValueType<512, 79>;  // n x 32 x i16 vector value
+
+def nxv1i32 : ValueType<32,  80>;  // n x  1 x i32 vector value
+def nxv2i32 : ValueType<64,  81>;  // n x  2 x i32 vector value
+def nxv4i32 : ValueType<128, 82>;  // n x  4 x i32 vector value
+def nxv8i32 : ValueType<256, 83>;  // n x  8 x i32 vector value
+def nxv16i32: ValueType<512, 84>;  // n x 16 x i32 vector value
+def nxv32i32: ValueType<1024,85>;  // n x 32 x i32 vector value
+
+def nxv1i64 : ValueType<64,  86>;  // n x  1 x i64 vector value
+def nxv2i64 : ValueType<128, 87>;  // n x  2 x i64 vector value
+def nxv4i64 : ValueType<256, 88>;  // n x  4 x i64 vector value
+def nxv8i64 : ValueType<512, 89>;  // n x  8 x i64 vector value
+def nxv16i64: ValueType<1024,90>;  // n x 16 x i64 vector value
+def nxv32i64: ValueType<2048,91>;  // n x 32 x i64 vector value
+
+def v2f16    : ValueType<32 , 92>;    //    2 x f16 vector value
+def v4f16    : ValueType<64 , 93>;    //    4 x f16 vector value
+def v8f16    : ValueType<128, 94>;    //    8 x f16 vector value
+def v1f32    : ValueType<32 , 95>;    //    1 x f32 vector value
+def v2f32    : ValueType<64 , 96>;    //    2 x f32 vector value
+def v3f32    : ValueType<96 , 97>;    //    3 x f32 vector value
+def v4f32    : ValueType<128, 98>;    //    4 x f32 vector value
+def v5f32    : ValueType<160, 99>;    //    5 x f32 vector value
+def v8f32    : ValueType<256, 100>;   //    8 x f32 vector value
+def v16f32   : ValueType<512,  101>;  //   16 x f32 vector value
+def v32f32   : ValueType<1024, 102>;  //   32 x f32 vector value
+def v64f32   : ValueType<2048, 103>;  //   64 x f32 vector value
+def v128f32  : ValueType<4096, 104>;  //  128 x f32 vector value
+def v256f32  : ValueType<8182, 105>;  //  256 x f32 vector value
+def v512f32  : ValueType<16384, 106>; //  512 x f32 vector value
+def v1024f32 : ValueType<32768, 107>; // 1024 x f32 vector value
+def v2048f32 : ValueType<65536, 108>; // 2048 x f32 vector value
+def v1f64    : ValueType<64, 109>;    //    1 x f64 vector value
+def v2f64    : ValueType<128, 110>;   //    2 x f64 vector value
+def v4f64    : ValueType<256, 111>;   //    4 x f64 vector value
+def v8f64    : ValueType<512, 112>;   //    8 x f64 vector value
+
+def nxv2f16  : ValueType<32 , 113>; // n x  2 x f16 vector value
+def nxv4f16  : ValueType<64 , 114>; // n x  4 x f16 vector value
+def nxv8f16  : ValueType<128, 115>; // n x  8 x f16 vector value
+def nxv1f32  : ValueType<32 , 116>; // n x  1 x f32 vector value
+def nxv2f32  : ValueType<64 , 117>; // n x  2 x f32 vector value
+def nxv4f32  : ValueType<128, 118>; // n x  4 x f32 vector value
+def nxv8f32  : ValueType<256, 119>; // n x  8 x f32 vector value
+def nxv16f32 : ValueType<512, 120>; // n x 16 x f32 vector value
+def nxv1f64  : ValueType<64,  121>; // n x  1 x f64 vector value
+def nxv2f64  : ValueType<128, 122>; // n x  2 x f64 vector value
+def nxv4f64  : ValueType<256, 123>; // n x  4 x f64 vector value
+def nxv8f64  : ValueType<512, 124>; // n x  8 x f64 vector value
+
+def x86mmx : ValueType<64 , 125>;   // X86 MMX value
+def FlagVT : ValueType<0  , 126>;   // Pre-RA sched glue
+def isVoid : ValueType<0  , 127>;   // Produces no value
+def untyped: ValueType<8  , 128>;   // Produces an untyped value
+def exnref: ValueType<0, 129>;      // WebAssembly's exnref type
 def token  : ValueType<0  , 248>;   // TokenTy
 def MetadataVT: ValueType<0, 249>;  // Metadata
 
@@ -167,3 +182,14 @@ def iPTR   : ValueType<0  , 254>;
 
 // Pseudo valuetype to represent "any type of any size".
 def Any    : ValueType<0  , 255>;
+
+/// This class is for targets that want to use pointer types in patterns
+/// with the GlobalISelEmitter.  Targets must define their own pointer
+/// derived from this class.  The scalar argument should be an
+/// integer type with the same bit size as the ponter.
+/// e.g. def p0 : PtrValueType <i64, 0>;
+
+class PtrValueType <ValueType scalar, int addrspace> :
+    ValueType<scalar.Size, scalar.Value> {
+  int AddrSpace = addrspace;
+}
diff --git a/include/llvm/CodeGen/VirtRegMap.h b/include/llvm/CodeGen/VirtRegMap.h
index 6a8e50a7e5f5..70eb048f05eb 100644
--- a/include/llvm/CodeGen/VirtRegMap.h
+++ b/include/llvm/CodeGen/VirtRegMap.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/VirtRegMap.h - Virtual Register Map ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -68,8 +67,10 @@ class TargetInstrInfo;
   public:
     static char ID;
 
-    VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG),
-                   Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) {}
+    VirtRegMap()
+        : MachineFunctionPass(ID), MRI(nullptr), TII(nullptr), TRI(nullptr),
+          MF(nullptr), Virt2PhysMap(NO_PHYS_REG),
+          Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) {}
     VirtRegMap(const VirtRegMap &) = delete;
     VirtRegMap &operator=(const VirtRegMap &) = delete;
 
@@ -98,8 +99,8 @@ class TargetInstrInfo;
 
     /// returns the physical register mapped to the specified
     /// virtual register
-    unsigned getPhys(unsigned virtReg) const {
-      assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+    Register getPhys(Register virtReg) const {
+      assert(virtReg.isVirtual());
       return Virt2PhysMap[virtReg];
     }
 
diff --git a/include/llvm/CodeGen/WasmEHFuncInfo.h b/include/llvm/CodeGen/WasmEHFuncInfo.h
index 219fff988f6e..887a1467b3e4 100644
--- a/include/llvm/CodeGen/WasmEHFuncInfo.h
+++ b/include/llvm/CodeGen/WasmEHFuncInfo.h
@@ -1,9 +1,8 @@
 //===--- llvm/CodeGen/WasmEHFuncInfo.h --------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -29,10 +28,6 @@ struct WasmEHFuncInfo {
   // When there is an entry <A, B>, if an exception is not caught by A, it
   // should next unwind to the EH pad B.
   DenseMap<BBOrMBB, BBOrMBB> EHPadUnwindMap;
-  // For entry <A, B>, A is a BB with an instruction that may throw
-  // (invoke/cleanupret in LLVM IR, call/rethrow in the backend) and B is an EH
-  // pad that A unwinds to.
-  DenseMap<BBOrMBB, BBOrMBB> ThrowUnwindMap;
 
   // Helper functions
   const BasicBlock *getEHPadUnwindDest(const BasicBlock *BB) const {
@@ -41,18 +36,9 @@ struct WasmEHFuncInfo {
   void setEHPadUnwindDest(const BasicBlock *BB, const BasicBlock *Dest) {
     EHPadUnwindMap[BB] = Dest;
   }
-  const BasicBlock *getThrowUnwindDest(BasicBlock *BB) const {
-    return ThrowUnwindMap.lookup(BB).get<const BasicBlock *>();
-  }
-  void setThrowUnwindDest(const BasicBlock *BB, const BasicBlock *Dest) {
-    ThrowUnwindMap[BB] = Dest;
-  }
   bool hasEHPadUnwindDest(const BasicBlock *BB) const {
     return EHPadUnwindMap.count(BB);
   }
-  bool hasThrowUnwindDest(const BasicBlock *BB) const {
-    return ThrowUnwindMap.count(BB);
-  }
 
   MachineBasicBlock *getEHPadUnwindDest(MachineBasicBlock *MBB) const {
     return EHPadUnwindMap.lookup(MBB).get<MachineBasicBlock *>();
@@ -60,18 +46,9 @@ struct WasmEHFuncInfo {
   void setEHPadUnwindDest(MachineBasicBlock *MBB, MachineBasicBlock *Dest) {
     EHPadUnwindMap[MBB] = Dest;
   }
-  MachineBasicBlock *getThrowUnwindDest(MachineBasicBlock *MBB) const {
-    return ThrowUnwindMap.lookup(MBB).get<MachineBasicBlock *>();
-  }
-  void setThrowUnwindDest(MachineBasicBlock *MBB, MachineBasicBlock *Dest) {
-    ThrowUnwindMap[MBB] = Dest;
-  }
   bool hasEHPadUnwindDest(MachineBasicBlock *MBB) const {
     return EHPadUnwindMap.count(MBB);
   }
-  bool hasThrowUnwindDest(MachineBasicBlock *MBB) const {
-    return ThrowUnwindMap.count(MBB);
-  }
 };
 
 // Analyze the IR in the given function to build WasmEHFuncInfo.
diff --git a/include/llvm/CodeGen/WinEHFuncInfo.h b/include/llvm/CodeGen/WinEHFuncInfo.h
index 8043024626a0..f098316de793 100644
--- a/include/llvm/CodeGen/WinEHFuncInfo.h
+++ b/include/llvm/CodeGen/WinEHFuncInfo.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/WinEHFuncInfo.h -----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h b/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h
index bd1743511ed4..0ac8b651939d 100644
--- a/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h
+++ b/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h
@@ -1,9 +1,8 @@
 //===- AppendingTypeTableBuilder.h -------------------------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/CVRecord.h b/include/llvm/DebugInfo/CodeView/CVRecord.h
index 11ca9ff108de..784c47e3bf5d 100644
--- a/include/llvm/DebugInfo/CodeView/CVRecord.h
+++ b/include/llvm/DebugInfo/CodeView/CVRecord.h
@@ -1,9 +1,8 @@
-//===- RecordIterator.h -----------------------------------------*- C++ -*-===//
+//===- CVRecord.h -----------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -25,17 +24,31 @@ namespace llvm {
 
 namespace codeview {
 
+/// CVRecord is a fat pointer (base + size pair) to a symbol or type record.
+/// Carrying the size separately instead of trusting the size stored in the
+/// record prefix provides some extra safety and flexibility.
 template <typename Kind> class CVRecord {
 public:
-  CVRecord() : Type(static_cast<Kind>(0)) {}
+  CVRecord() = default;
+
+  CVRecord(ArrayRef<uint8_t> Data) : RecordData(Data) {}
 
-  CVRecord(Kind K, ArrayRef<uint8_t> Data) : Type(K), RecordData(Data) {}
+  CVRecord(const RecordPrefix *P, size_t Size)
+      : RecordData(reinterpret_cast<const uint8_t *>(P), Size) {}
 
-  bool valid() const { return Type != static_cast<Kind>(0); }
+  bool valid() const { return kind() != Kind(0); }
 
   uint32_t length() const { return RecordData.size(); }
-  Kind kind() const { return Type; }
+
+  Kind kind() const {
+    if (RecordData.size() < sizeof(RecordPrefix))
+      return Kind(0);
+    return static_cast<Kind>(static_cast<uint16_t>(
+        reinterpret_cast<const RecordPrefix *>(RecordData.data())->RecordKind));
+  }
+
   ArrayRef<uint8_t> data() const { return RecordData; }
+
   StringRef str_data() const {
     return StringRef(reinterpret_cast<const char *>(RecordData.data()),
                      RecordData.size());
@@ -45,7 +58,6 @@ public:
     return RecordData.drop_front(sizeof(RecordPrefix));
   }
 
-  Kind Type;
   ArrayRef<uint8_t> RecordData;
 };
 
@@ -72,8 +84,7 @@ Error forEachCodeViewRecord(ArrayRef<uint8_t> StreamBuffer, Func F) {
     ArrayRef<uint8_t> Data = StreamBuffer.take_front(RealLen);
     StreamBuffer = StreamBuffer.drop_front(RealLen);
 
-    Record R(static_cast<decltype(Record::Type)>((uint16_t)Prefix->RecordKind),
-             Data);
+    Record R(Data);
     if (auto EC = F(R))
       return EC;
   }
@@ -92,13 +103,12 @@ inline Expected<CVRecord<Kind>> readCVRecordFromStream(BinaryStreamRef Stream,
     return std::move(EC);
   if (Prefix->RecordLen < 2)
     return make_error<CodeViewError>(cv_error_code::corrupt_record);
-  Kind K = static_cast<Kind>(uint16_t(Prefix->RecordKind));
 
   Reader.setOffset(Offset);
   ArrayRef<uint8_t> RawData;
   if (auto EC = Reader.readBytes(RawData, Prefix->RecordLen + sizeof(uint16_t)))
     return std::move(EC);
-  return codeview::CVRecord<Kind>(K, RawData);
+  return codeview::CVRecord<Kind>(RawData);
 }
 
 } // end namespace codeview
diff --git a/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h b/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h
index 7c8cd121751a..1615ff41df12 100644
--- a/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h
+++ b/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h
@@ -1,9 +1,8 @@
 //===- CVSymbolVisitor.h ----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h b/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h
index b765ba1abb4d..7d20bb0a7bde 100644
--- a/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h
+++ b/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h
@@ -1,9 +1,8 @@
 //===- CVTypeVisitor.h ------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -12,6 +11,7 @@
 
 #include "llvm/DebugInfo/CodeView/CVRecord.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
 #include "llvm/Support/Error.h"
 
 namespace llvm {
@@ -31,6 +31,9 @@ enum VisitorDataSource {
 Error visitTypeRecord(CVType &Record, TypeIndex Index,
                       TypeVisitorCallbacks &Callbacks,
                       VisitorDataSource Source = VDS_BytesPresent);
+Error visitTypeRecord(CVType &Record, TypeIndex Index,
+                      TypeVisitorCallbackPipeline &Callbacks,
+                      VisitorDataSource Source = VDS_BytesPresent);
 Error visitTypeRecord(CVType &Record, TypeVisitorCallbacks &Callbacks,
                       VisitorDataSource Source = VDS_BytesPresent);
 
diff --git a/include/llvm/DebugInfo/CodeView/CodeView.h b/include/llvm/DebugInfo/CodeView/CodeView.h
index 8e0d9f608e93..c3acb05ea8b1 100644
--- a/include/llvm/DebugInfo/CodeView/CodeView.h
+++ b/include/llvm/DebugInfo/CodeView/CodeView.h
@@ -1,9 +1,8 @@
 //===- CodeView.h -----------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -160,9 +159,10 @@ enum SourceLanguage : uint8_t {
   MSIL = 0x0f,
   HLSL = 0x10,
 
-  /// The DMD compiler emits 'D' for the CV source language. Microsoft doesn't
-  /// have an enumerator for it yet.
+  /// The DMD & Swift compilers emit 'D' and 'S', respectively, for the CV
+  /// source language. Microsoft does not have enumerators for them yet.
   D = 'D',
+  Swift = 'S',
 };
 
 /// These values correspond to the CV_call_e enumeration, and are documented
@@ -304,6 +304,9 @@ enum class ModifierOptions : uint16_t {
 };
 CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(ModifierOptions)
 
+// If the subsection kind has this bit set, then the linker should ignore it.
+enum : uint32_t { SubsectionIgnoreFlag = 0x80000000 };
+
 enum class DebugSubsectionKind : uint32_t {
   None = 0,
   Symbols = 0xf1,
@@ -509,9 +512,23 @@ enum class FrameCookieKind : uint8_t {
 
 // Corresponds to CV_HREG_e enum.
 enum class RegisterId : uint16_t {
+#define CV_REGISTERS_ALL
 #define CV_REGISTER(name, value) name = value,
 #include "CodeViewRegisters.def"
 #undef CV_REGISTER
+#undef CV_REGISTERS_ALL
+};
+
+// Register Ids are shared between architectures in CodeView. CPUType is needed
+// to map register Id to name.
+struct CPURegister {
+  CPURegister() = delete;
+  CPURegister(CPUType Cpu, codeview::RegisterId Reg) {
+    this->Cpu = Cpu;
+    this->Reg = Reg;
+  }
+  CPUType Cpu;
+  RegisterId Reg;
 };
 
 /// Two-bit value indicating which register is the designated frame pointer
diff --git a/include/llvm/DebugInfo/CodeView/CodeViewError.h b/include/llvm/DebugInfo/CodeView/CodeViewError.h
index d4615d02220d..9990c8d05d1c 100644
--- a/include/llvm/DebugInfo/CodeView/CodeViewError.h
+++ b/include/llvm/DebugInfo/CodeView/CodeViewError.h
@@ -1,9 +1,8 @@
 //===- CodeViewError.h - Error extensions for CodeView ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h b/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
index 94f104ff772c..00fb0cf4cc90 100644
--- a/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
+++ b/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
@@ -1,9 +1,8 @@
 //===- CodeViewRecordIO.h ---------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -25,28 +24,65 @@
 #include <type_traits>
 
 namespace llvm {
+
 namespace codeview {
 
+class CodeViewRecordStreamer {
+public:
+  virtual void EmitBytes(StringRef Data) = 0;
+  virtual void EmitIntValue(uint64_t Value, unsigned Size) = 0;
+  virtual void EmitBinaryData(StringRef Data) = 0;
+  virtual void AddComment(const Twine &T) = 0;
+  virtual ~CodeViewRecordStreamer() = default;
+};
+
 class CodeViewRecordIO {
   uint32_t getCurrentOffset() const {
-    return (isWriting()) ? Writer->getOffset() : Reader->getOffset();
+    if (isWriting())
+      return Writer->getOffset();
+    else if (isReading())
+      return Reader->getOffset();
+    else
+      return 0;
   }
 
 public:
+  // deserializes records to structures
   explicit CodeViewRecordIO(BinaryStreamReader &Reader) : Reader(&Reader) {}
+
+  // serializes records to buffer
   explicit CodeViewRecordIO(BinaryStreamWriter &Writer) : Writer(&Writer) {}
 
+  // writes records to assembly file using MC library interface
+  explicit CodeViewRecordIO(CodeViewRecordStreamer &Streamer)
+      : Streamer(&Streamer) {}
+
   Error beginRecord(Optional<uint32_t> MaxLength);
   Error endRecord();
 
-  Error mapInteger(TypeIndex &TypeInd);
+  Error mapInteger(TypeIndex &TypeInd, const Twine &Comment = "");
 
-  bool isReading() const { return Reader != nullptr; }
-  bool isWriting() const { return !isReading(); }
+  bool isStreaming() const {
+    return (Streamer != nullptr) && (Reader == nullptr) && (Writer == nullptr);
+  }
+  bool isReading() const {
+    return (Reader != nullptr) && (Streamer == nullptr) && (Writer == nullptr);
+  }
+  bool isWriting() const {
+    return (Writer != nullptr) && (Streamer == nullptr) && (Reader == nullptr);
+  }
 
   uint32_t maxFieldLength() const;
 
   template <typename T> Error mapObject(T &Value) {
+    if (isStreaming()) {
+      StringRef BytesSR =
+          StringRef((reinterpret_cast<const char *>(&Value)), sizeof(Value));
+      Streamer->EmitBytes(BytesSR);
+      incrStreamedLen(sizeof(T));
+      return Error::success();
+    }
+
     if (isWriting())
       return Writer->writeObject(Value);
 
@@ -57,41 +93,63 @@ public:
     return Error::success();
   }
 
-  template <typename T> Error mapInteger(T &Value) {
+  template <typename T> Error mapInteger(T &Value, const Twine &Comment = "") {
+    if (isStreaming()) {
+      emitComment(Comment);
+      Streamer->EmitIntValue((int)Value, sizeof(T));
+      incrStreamedLen(sizeof(T));
+      return Error::success();
+    }
+
     if (isWriting())
       return Writer->writeInteger(Value);
 
     return Reader->readInteger(Value);
   }
 
-  template <typename T> Error mapEnum(T &Value) {
-    if (sizeof(Value) > maxFieldLength())
+  template <typename T> Error mapEnum(T &Value, const Twine &Comment = "") {
+    if (!isStreaming() && sizeof(Value) > maxFieldLength())
       return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
 
     using U = typename std::underlying_type<T>::type;
     U X;
-    if (isWriting())
+
+    if (isWriting() || isStreaming())
       X = static_cast<U>(Value);
 
-    if (auto EC = mapInteger(X))
+    if (auto EC = mapInteger(X, Comment))
       return EC;
+
     if (isReading())
       Value = static_cast<T>(X);
+
     return Error::success();
   }
 
-  Error mapEncodedInteger(int64_t &Value);
-  Error mapEncodedInteger(uint64_t &Value);
-  Error mapEncodedInteger(APSInt &Value);
-  Error mapStringZ(StringRef &Value);
-  Error mapGuid(GUID &Guid);
+  Error mapEncodedInteger(int64_t &Value, const Twine &Comment = "");
+  Error mapEncodedInteger(uint64_t &Value, const Twine &Comment = "");
+  Error mapEncodedInteger(APSInt &Value, const Twine &Comment = "");
+  Error mapStringZ(StringRef &Value, const Twine &Comment = "");
+  Error mapGuid(GUID &Guid, const Twine &Comment = "");
 
-  Error mapStringZVectorZ(std::vector<StringRef> &Value);
+  Error mapStringZVectorZ(std::vector<StringRef> &Value,
+                          const Twine &Comment = "");
 
   template <typename SizeType, typename T, typename ElementMapper>
-  Error mapVectorN(T &Items, const ElementMapper &Mapper) {
+  Error mapVectorN(T &Items, const ElementMapper &Mapper,
+                   const Twine &Comment = "") {
     SizeType Size;
-    if (isWriting()) {
+    if (isStreaming()) {
+      Size = static_cast<SizeType>(Items.size());
+      emitComment(Comment);
+      Streamer->EmitIntValue(Size, sizeof(Size));
+      incrStreamedLen(sizeof(Size)); // add 1 for the delimiter
+
+      for (auto &X : Items) {
+        if (auto EC = Mapper(*this, X))
+          return EC;
+      }
+    } else if (isWriting()) {
       Size = static_cast<SizeType>(Items.size());
       if (auto EC = Writer->writeInteger(Size))
         return EC;
@@ -115,8 +173,10 @@ public:
   }
 
   template <typename T, typename ElementMapper>
-  Error mapVectorTail(T &Items, const ElementMapper &Mapper) {
-    if (isWriting()) {
+  Error mapVectorTail(T &Items, const ElementMapper &Mapper,
+                      const Twine &Comment = "") {
+    emitComment(Comment);
+    if (isStreaming() || isWriting()) {
       for (auto &Item : Items) {
         if (auto EC = Mapper(*this, Item))
           return EC;
@@ -133,16 +193,44 @@ public:
     return Error::success();
   }
 
-  Error mapByteVectorTail(ArrayRef<uint8_t> &Bytes);
-  Error mapByteVectorTail(std::vector<uint8_t> &Bytes);
+  Error mapByteVectorTail(ArrayRef<uint8_t> &Bytes, const Twine &Comment = "");
+  Error mapByteVectorTail(std::vector<uint8_t> &Bytes,
+                          const Twine &Comment = "");
 
   Error padToAlignment(uint32_t Align);
   Error skipPadding();
 
+  uint64_t getStreamedLen() {
+    if (isStreaming())
+      return StreamedLen;
+    return 0;
+  }
+
 private:
+  void emitEncodedSignedInteger(const int64_t &Value,
+                                const Twine &Comment = "");
+  void emitEncodedUnsignedInteger(const uint64_t &Value,
+                                  const Twine &Comment = "");
   Error writeEncodedSignedInteger(const int64_t &Value);
   Error writeEncodedUnsignedInteger(const uint64_t &Value);
 
+  void incrStreamedLen(const uint64_t &Len) {
+    if (isStreaming())
+      StreamedLen += Len;
+  }
+
+  void resetStreamedLen() {
+    if (isStreaming())
+      StreamedLen = 4; // The record prefix is 4 bytes long
+  }
+
+  void emitComment(const Twine &Comment) {
+    if (isStreaming()) {
+      Twine TComment(Comment);
+      Streamer->AddComment(TComment);
+    }
+  }
+
   struct RecordLimit {
     uint32_t BeginOffset;
     Optional<uint32_t> MaxLength;
@@ -163,6 +251,8 @@ private:
 
   BinaryStreamReader *Reader = nullptr;
   BinaryStreamWriter *Writer = nullptr;
+  CodeViewRecordStreamer *Streamer = nullptr;
+  uint64_t StreamedLen = 0;
 };
 
 } // end namespace codeview
diff --git a/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def b/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
index fdfcf4d53a23..9767e49c44f5 100644
--- a/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
+++ b/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
@@ -1,9 +1,8 @@
 //===-- CodeViewRegisters.def - CodeView registers --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,8 +14,15 @@
 #define CV_REGISTER(name, value)
 #endif
 
+#if !defined(CV_REGISTERS_ALL) && !defined(CV_REGISTERS_X86) &&                \
+    !defined(CV_REGISTERS_ARM64)
+#error Need include at least one register set.
+#endif
+
 // This currently only contains the "register subset shared by all processor
-// types" (ERR etc.) and the x86 registers.
+// types" (ERR etc.) and the x86/arm64 registers.
+
+#if defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_X86)
 
 // Some system headers define macros that conflict with our enums. Every
 // compiler supported by LLVM has the push_macro and pop_macro pragmas, so use
@@ -357,3 +363,197 @@ CV_REGISTER(AMD64_K7, 765)
 #pragma pop_macro("CR2")
 #pragma pop_macro("CR3")
 #pragma pop_macro("CR4")
+
+#endif // defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_X86)
+
+#if defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM64)
+
+// ARM64 registers
+
+CV_REGISTER(ARM64_NOREG, 0)
+
+// General purpose 32-bit integer registers
+
+CV_REGISTER(ARM64_W0, 10)
+CV_REGISTER(ARM64_W1, 11)
+CV_REGISTER(ARM64_W2, 12)
+CV_REGISTER(ARM64_W3, 13)
+CV_REGISTER(ARM64_W4, 14)
+CV_REGISTER(ARM64_W5, 15)
+CV_REGISTER(ARM64_W6, 16)
+CV_REGISTER(ARM64_W7, 17)
+CV_REGISTER(ARM64_W8, 18)
+CV_REGISTER(ARM64_W9, 19)
+CV_REGISTER(ARM64_W10, 20)
+CV_REGISTER(ARM64_W11, 21)
+CV_REGISTER(ARM64_W12, 22)
+CV_REGISTER(ARM64_W13, 23)
+CV_REGISTER(ARM64_W14, 24)
+CV_REGISTER(ARM64_W15, 25)
+CV_REGISTER(ARM64_W16, 26)
+CV_REGISTER(ARM64_W17, 27)
+CV_REGISTER(ARM64_W18, 28)
+CV_REGISTER(ARM64_W19, 29)
+CV_REGISTER(ARM64_W20, 30)
+CV_REGISTER(ARM64_W21, 31)
+CV_REGISTER(ARM64_W22, 32)
+CV_REGISTER(ARM64_W23, 33)
+CV_REGISTER(ARM64_W24, 34)
+CV_REGISTER(ARM64_W25, 35)
+CV_REGISTER(ARM64_W26, 36)
+CV_REGISTER(ARM64_W27, 37)
+CV_REGISTER(ARM64_W28, 38)
+CV_REGISTER(ARM64_W29, 39)
+CV_REGISTER(ARM64_W30, 40)
+CV_REGISTER(ARM64_WZR, 41)
+
+// General purpose 64-bit integer registers
+
+CV_REGISTER(ARM64_X0, 50)
+CV_REGISTER(ARM64_X1, 51)
+CV_REGISTER(ARM64_X2, 52)
+CV_REGISTER(ARM64_X3, 53)
+CV_REGISTER(ARM64_X4, 54)
+CV_REGISTER(ARM64_X5, 55)
+CV_REGISTER(ARM64_X6, 56)
+CV_REGISTER(ARM64_X7, 57)
+CV_REGISTER(ARM64_X8, 58)
+CV_REGISTER(ARM64_X9, 59)
+CV_REGISTER(ARM64_X10, 60)
+CV_REGISTER(ARM64_X11, 61)
+CV_REGISTER(ARM64_X12, 62)
+CV_REGISTER(ARM64_X13, 63)
+CV_REGISTER(ARM64_X14, 64)
+CV_REGISTER(ARM64_X15, 65)
+CV_REGISTER(ARM64_X16, 66)
+CV_REGISTER(ARM64_X17, 67)
+CV_REGISTER(ARM64_X18, 68)
+CV_REGISTER(ARM64_X19, 69)
+CV_REGISTER(ARM64_X20, 70)
+CV_REGISTER(ARM64_X21, 71)
+CV_REGISTER(ARM64_X22, 72)
+CV_REGISTER(ARM64_X23, 73)
+CV_REGISTER(ARM64_X24, 74)
+CV_REGISTER(ARM64_X25, 75)
+CV_REGISTER(ARM64_X26, 76)
+CV_REGISTER(ARM64_X27, 77)
+CV_REGISTER(ARM64_X28, 78)
+CV_REGISTER(ARM64_FP, 79)
+CV_REGISTER(ARM64_LR, 80)
+CV_REGISTER(ARM64_SP, 81)
+CV_REGISTER(ARM64_ZR, 82)
+
+// status register
+
+CV_REGISTER(ARM64_NZCV, 90)
+
+// 32-bit floating point registers
+
+CV_REGISTER(ARM64_S0, 100)
+CV_REGISTER(ARM64_S1, 101)
+CV_REGISTER(ARM64_S2, 102)
+CV_REGISTER(ARM64_S3, 103)
+CV_REGISTER(ARM64_S4, 104)
+CV_REGISTER(ARM64_S5, 105)
+CV_REGISTER(ARM64_S6, 106)
+CV_REGISTER(ARM64_S7, 107)
+CV_REGISTER(ARM64_S8, 108)
+CV_REGISTER(ARM64_S9, 109)
+CV_REGISTER(ARM64_S10, 110)
+CV_REGISTER(ARM64_S11, 111)
+CV_REGISTER(ARM64_S12, 112)
+CV_REGISTER(ARM64_S13, 113)
+CV_REGISTER(ARM64_S14, 114)
+CV_REGISTER(ARM64_S15, 115)
+CV_REGISTER(ARM64_S16, 116)
+CV_REGISTER(ARM64_S17, 117)
+CV_REGISTER(ARM64_S18, 118)
+CV_REGISTER(ARM64_S19, 119)
+CV_REGISTER(ARM64_S20, 120)
+CV_REGISTER(ARM64_S21, 121)
+CV_REGISTER(ARM64_S22, 122)
+CV_REGISTER(ARM64_S23, 123)
+CV_REGISTER(ARM64_S24, 124)
+CV_REGISTER(ARM64_S25, 125)
+CV_REGISTER(ARM64_S26, 126)
+CV_REGISTER(ARM64_S27, 127)
+CV_REGISTER(ARM64_S28, 128)
+CV_REGISTER(ARM64_S29, 129)
+CV_REGISTER(ARM64_S30, 130)
+CV_REGISTER(ARM64_S31, 131)
+
+// 64-bit floating point registers
+
+CV_REGISTER(ARM64_D0, 140)
+CV_REGISTER(ARM64_D1, 141)
+CV_REGISTER(ARM64_D2, 142)
+CV_REGISTER(ARM64_D3, 143)
+CV_REGISTER(ARM64_D4, 144)
+CV_REGISTER(ARM64_D5, 145)
+CV_REGISTER(ARM64_D6, 146)
+CV_REGISTER(ARM64_D7, 147)
+CV_REGISTER(ARM64_D8, 148)
+CV_REGISTER(ARM64_D9, 149)
+CV_REGISTER(ARM64_D10, 150)
+CV_REGISTER(ARM64_D11, 151)
+CV_REGISTER(ARM64_D12, 152)
+CV_REGISTER(ARM64_D13, 153)
+CV_REGISTER(ARM64_D14, 154)
+CV_REGISTER(ARM64_D15, 155)
+CV_REGISTER(ARM64_D16, 156)
+CV_REGISTER(ARM64_D17, 157)
+CV_REGISTER(ARM64_D18, 158)
+CV_REGISTER(ARM64_D19, 159)
+CV_REGISTER(ARM64_D20, 160)
+CV_REGISTER(ARM64_D21, 161)
+CV_REGISTER(ARM64_D22, 162)
+CV_REGISTER(ARM64_D23, 163)
+CV_REGISTER(ARM64_D24, 164)
+CV_REGISTER(ARM64_D25, 165)
+CV_REGISTER(ARM64_D26, 166)
+CV_REGISTER(ARM64_D27, 167)
+CV_REGISTER(ARM64_D28, 168)
+CV_REGISTER(ARM64_D29, 169)
+CV_REGISTER(ARM64_D30, 170)
+CV_REGISTER(ARM64_D31, 171)
+
+// 128-bit SIMD registers
+
+CV_REGISTER(ARM64_Q0, 180)
+CV_REGISTER(ARM64_Q1, 181)
+CV_REGISTER(ARM64_Q2, 182)
+CV_REGISTER(ARM64_Q3, 183)
+CV_REGISTER(ARM64_Q4, 184)
+CV_REGISTER(ARM64_Q5, 185)
+CV_REGISTER(ARM64_Q6, 186)
+CV_REGISTER(ARM64_Q7, 187)
+CV_REGISTER(ARM64_Q8, 188)
+CV_REGISTER(ARM64_Q9, 189)
+CV_REGISTER(ARM64_Q10, 190)
+CV_REGISTER(ARM64_Q11, 191)
+CV_REGISTER(ARM64_Q12, 192)
+CV_REGISTER(ARM64_Q13, 193)
+CV_REGISTER(ARM64_Q14, 194)
+CV_REGISTER(ARM64_Q15, 195)
+CV_REGISTER(ARM64_Q16, 196)
+CV_REGISTER(ARM64_Q17, 197)
+CV_REGISTER(ARM64_Q18, 198)
+CV_REGISTER(ARM64_Q19, 199)
+CV_REGISTER(ARM64_Q20, 200)
+CV_REGISTER(ARM64_Q21, 201)
+CV_REGISTER(ARM64_Q22, 202)
+CV_REGISTER(ARM64_Q23, 203)
+CV_REGISTER(ARM64_Q24, 204)
+CV_REGISTER(ARM64_Q25, 205)
+CV_REGISTER(ARM64_Q26, 206)
+CV_REGISTER(ARM64_Q27, 207)
+CV_REGISTER(ARM64_Q28, 208)
+CV_REGISTER(ARM64_Q29, 209)
+CV_REGISTER(ARM64_Q30, 210)
+CV_REGISTER(ARM64_Q31, 211)
+
+// Floating point status register
+
+CV_REGISTER(ARM64_FPSR, 220)
+
+#endif // defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM64)
diff --git a/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def b/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def
index b5f1cc0198dc..4f8ccfdd16af 100644
--- a/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def
+++ b/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def
@@ -1,9 +1,8 @@
 //===-- CodeViewSymbols.def - All CodeView leaf types -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -103,7 +102,6 @@ CV_SYMBOL(S_LPROCIA64_ST  , 0x1015)
 CV_SYMBOL(S_GPROCIA64_ST  , 0x1016)
 CV_SYMBOL(S_LOCALSLOT_ST  , 0x1017)
 CV_SYMBOL(S_PARAMSLOT_ST  , 0x1018)
-CV_SYMBOL(S_ANNOTATION    , 0x1019)
 CV_SYMBOL(S_GMANPROC_ST   , 0x101a)
 CV_SYMBOL(S_LMANPROC_ST   , 0x101b)
 CV_SYMBOL(S_RESERVED1     , 0x101c)
@@ -255,6 +253,7 @@ SYMBOL_RECORD(S_LTHREAD32     , 0x1112, ThreadLocalDataSym)
 SYMBOL_RECORD_ALIAS(S_GTHREAD32     , 0x1113, GlobalTLS, ThreadLocalDataSym)
 
 SYMBOL_RECORD(S_UNAMESPACE    , 0x1124, UsingNamespaceSym)
+SYMBOL_RECORD(S_ANNOTATION    , 0x1019, AnnotationSym)
 
 #undef CV_SYMBOL
 #undef SYMBOL_RECORD
diff --git a/include/llvm/DebugInfo/CodeView/CodeViewTypes.def b/include/llvm/DebugInfo/CodeView/CodeViewTypes.def
index e9a479dba496..a31111eb80a4 100644
--- a/include/llvm/DebugInfo/CodeView/CodeViewTypes.def
+++ b/include/llvm/DebugInfo/CodeView/CodeViewTypes.def
@@ -1,9 +1,8 @@
 //===-- CodeViewTypes.def - All CodeView leaf types -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h b/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h
index 7f851a2595dc..53ab2dd04aa7 100644
--- a/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h
+++ b/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h
@@ -1,9 +1,8 @@
 //===- ContinuationRecordBuilder.h ------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -62,4 +61,4 @@ public:
 } // namespace codeview
 } // namespace llvm
 
-#endif
\ No newline at end of file
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h b/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h
index 78b284563afd..01f83676afdf 100644
--- a/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h
@@ -1,9 +1,8 @@
 //===- DebugChecksumsSubsection.h -------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h b/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h
index 2f9e9814d998..64a78a7cef21 100644
--- a/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h
@@ -1,9 +1,8 @@
 //===- DebugCrossExSubsection.h ---------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h b/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h
index 8be7ef265c82..e7683cb2a9c4 100644
--- a/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h
@@ -1,9 +1,8 @@
-//===- DebugCrossExSubsection.h ---------------------------------*- C++ -*-===//
+//===- DebugCrossImpSubsection.h --------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h b/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h
index 847d93f0e985..d5cd640231f9 100644
--- a/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h
@@ -1,9 +1,8 @@
 //===- DebugFrameDataSubsection.h ------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h b/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
index b88c0eae1de2..9fd88a64873a 100644
--- a/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
@@ -1,9 +1,8 @@
 //===- DebugInlineeLinesSubsection.h ----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -71,6 +70,11 @@ public:
   }
 
   Error initialize(BinaryStreamReader Reader);
+  Error initialize(BinaryStreamRef Section) {
+    return initialize(BinaryStreamReader(Section));
+  }
+
+  bool valid() const { return Lines.valid(); }
   bool hasExtraFiles() const;
 
   Iterator begin() const { return Lines.begin(); }
@@ -78,7 +82,7 @@ public:
 
 private:
   InlineeLinesSignature Signature;
-  VarStreamArray<InlineeSourceLine> Lines;
+  LinesArray Lines;
 };
 
 class DebugInlineeLinesSubsection final : public DebugSubsection {
diff --git a/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h b/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h
index 53044b6c3dc8..1f8e56c5311f 100644
--- a/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h
@@ -1,9 +1,8 @@
 //===- DebugLinesSubsection.h -----------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h b/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h
index bebc960223cc..6e5b8adddd4a 100644
--- a/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h
@@ -1,9 +1,8 @@
 //===- DebugStringTableSubsection.h - CodeView String Table -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/DebugSubsection.h b/include/llvm/DebugInfo/CodeView/DebugSubsection.h
index e427e0006a55..66272870efda 100644
--- a/include/llvm/DebugInfo/CodeView/DebugSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugSubsection.h
@@ -1,9 +1,8 @@
 //===- DebugSubsection.h ------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h b/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h
index fc0cf0d1d90e..bcb379f00d68 100644
--- a/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h
+++ b/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h
@@ -1,9 +1,8 @@
 //===- DebugSubsectionRecord.h ----------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h b/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h
index 75f749dfa933..720b1b49581f 100644
--- a/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h
+++ b/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h
@@ -1,9 +1,8 @@
 //===- DebugSubsectionVisitor.h -----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h b/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h
index a4c04b55eb4c..91b740ce6b9a 100644
--- a/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h
@@ -1,9 +1,8 @@
 //===- DebugSymbolRVASubsection.h -------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h b/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h
index dfda7deb6cb4..784fc59484b9 100644
--- a/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h
@@ -1,9 +1,8 @@
 //===- DebugSymbolsSubsection.h --------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/DebugUnknownSubsection.h b/include/llvm/DebugInfo/CodeView/DebugUnknownSubsection.h
index ea9a96ca8d68..fa7df325499f 100644
--- a/include/llvm/DebugInfo/CodeView/DebugUnknownSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugUnknownSubsection.h
@@ -1,9 +1,8 @@
 //===- DebugUnknownSubsection.h -----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/EnumTables.h b/include/llvm/DebugInfo/CodeView/EnumTables.h
index ee0f0f7c6023..ed126ed9e2ff 100644
--- a/include/llvm/DebugInfo/CodeView/EnumTables.h
+++ b/include/llvm/DebugInfo/CodeView/EnumTables.h
@@ -1,9 +1,8 @@
 //===- EnumTables.h - Enum to string conversion tables ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -21,7 +20,7 @@ namespace codeview {
 
 ArrayRef<EnumEntry<SymbolKind>> getSymbolTypeNames();
 ArrayRef<EnumEntry<TypeLeafKind>> getTypeLeafNames();
-ArrayRef<EnumEntry<uint16_t>> getRegisterNames();
+ArrayRef<EnumEntry<uint16_t>> getRegisterNames(CPUType Cpu);
 ArrayRef<EnumEntry<uint32_t>> getPublicSymFlagNames();
 ArrayRef<EnumEntry<uint8_t>> getProcSymFlagNames();
 ArrayRef<EnumEntry<uint16_t>> getLocalFlagNames();
diff --git a/include/llvm/DebugInfo/CodeView/Formatters.h b/include/llvm/DebugInfo/CodeView/Formatters.h
index 278ad02a39cd..7d04a6a89bef 100644
--- a/include/llvm/DebugInfo/CodeView/Formatters.h
+++ b/include/llvm/DebugInfo/CodeView/Formatters.h
@@ -1,9 +1,8 @@
 //===- Formatters.h ---------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/FunctionId.h b/include/llvm/DebugInfo/CodeView/FunctionId.h
index 1af3da810b5a..bc102278819c 100644
--- a/include/llvm/DebugInfo/CodeView/FunctionId.h
+++ b/include/llvm/DebugInfo/CodeView/FunctionId.h
@@ -1,9 +1,8 @@
 //===- FunctionId.h ---------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/GUID.h b/include/llvm/DebugInfo/CodeView/GUID.h
index a055ce9e2e45..5f807e6f7eeb 100644
--- a/include/llvm/DebugInfo/CodeView/GUID.h
+++ b/include/llvm/DebugInfo/CodeView/GUID.h
@@ -1,9 +1,8 @@
 //===- GUID.h ---------------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h b/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h
index c4704168ed34..a43ce20edde6 100644
--- a/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h
+++ b/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h
@@ -1,9 +1,8 @@
 //===- GlobalTypeTableBuilder.h ----------------------------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -74,14 +73,30 @@ public:
                            CreateFunc Create) {
     auto Result = HashedRecords.try_emplace(Hash, nextTypeIndex());
 
-    if (LLVM_UNLIKELY(Result.second)) {
+    if (LLVM_UNLIKELY(Result.second /*inserted*/ ||
+                      Result.first->second.isSimple())) {
       uint8_t *Stable = RecordStorage.Allocate<uint8_t>(RecordSize);
       MutableArrayRef<uint8_t> Data(Stable, RecordSize);
-      SeenRecords.push_back(Create(Data));
+      ArrayRef<uint8_t> StableRecord = Create(Data);
+      if (StableRecord.empty()) {
+        // Records with forward references into the Type stream will be deferred
+        // for insertion at a later time, on the second pass.
+        Result.first->getSecond() = TypeIndex(SimpleTypeKind::NotTranslated);
+        return TypeIndex(SimpleTypeKind::NotTranslated);
+      }
+      if (Result.first->second.isSimple()) {
+        assert(Result.first->second.getIndex() ==
+               (uint32_t)SimpleTypeKind::NotTranslated);
+        // On the second pass, update with index to remapped record. The
+        // (initially misbehaved) record will now come *after* other records
+        // resolved in the first pass, with proper *back* references in the
+        // stream.
+        Result.first->second = nextTypeIndex();
+      }
+      SeenRecords.push_back(StableRecord);
       SeenHashes.push_back(Hash);
     }
 
-    // Update the caller's copy of Record to point a stable copy.
     return Result.first->second;
   }
 
diff --git a/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h b/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h
index 383f7dd9fb6a..4e03627e9580 100644
--- a/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h
+++ b/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h
@@ -1,9 +1,8 @@
 //===- LazyRandomTypeCollection.h -------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/Line.h b/include/llvm/DebugInfo/CodeView/Line.h
index ac229c337513..eb2aa154df1b 100644
--- a/include/llvm/DebugInfo/CodeView/Line.h
+++ b/include/llvm/DebugInfo/CodeView/Line.h
@@ -1,9 +1,8 @@
 //===- Line.h ---------------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h b/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h
index 9030918ebbb3..1b2f6d29a9b6 100644
--- a/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h
+++ b/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h
@@ -1,9 +1,8 @@
 //===- MergingTypeTableBuilder.h ---------------------------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/RecordName.h b/include/llvm/DebugInfo/CodeView/RecordName.h
index b022108df3d6..cc09db8933bd 100644
--- a/include/llvm/DebugInfo/CodeView/RecordName.h
+++ b/include/llvm/DebugInfo/CodeView/RecordName.h
@@ -1,9 +1,8 @@
 //===- RecordName.h ------------------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/RecordSerialization.h b/include/llvm/DebugInfo/CodeView/RecordSerialization.h
index 36237e1a4d9e..36c0f2fbd8fa 100644
--- a/include/llvm/DebugInfo/CodeView/RecordSerialization.h
+++ b/include/llvm/DebugInfo/CodeView/RecordSerialization.h
@@ -1,9 +1,8 @@
 //===- RecordSerialization.h ------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -32,6 +31,9 @@ using llvm::support::ulittle32_t;
 enum : unsigned { MaxRecordLength = 0xFF00 };
 
 struct RecordPrefix {
+  RecordPrefix() = default;
+  explicit RecordPrefix(uint16_t Kind) : RecordLen(2), RecordKind(Kind) {}
+
   ulittle16_t RecordLen;  // Record length, starting from &RecordKind.
   ulittle16_t RecordKind; // Record kind enum (SymRecordKind or TypeRecordKind)
 };
diff --git a/include/llvm/DebugInfo/CodeView/SimpleTypeSerializer.h b/include/llvm/DebugInfo/CodeView/SimpleTypeSerializer.h
index a85d9270186b..3ca09b445a30 100644
--- a/include/llvm/DebugInfo/CodeView/SimpleTypeSerializer.h
+++ b/include/llvm/DebugInfo/CodeView/SimpleTypeSerializer.h
@@ -1,9 +1,8 @@
 //===- SimpleTypeSerializer.h -----------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h b/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h
index 22a333e631a0..22a283e785e1 100644
--- a/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h
+++ b/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h
@@ -1,9 +1,8 @@
 //===- StringsAndChecksums.h ------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h b/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
index 6b5dd2d20d17..62761cb87c81 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
@@ -1,9 +1,8 @@
 //===- SymbolDeserializer.h -------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/SymbolDumpDelegate.h b/include/llvm/DebugInfo/CodeView/SymbolDumpDelegate.h
index 823636c398de..12f45dcb21ff 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolDumpDelegate.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolDumpDelegate.h
@@ -1,9 +1,8 @@
 //===-- SymbolDumpDelegate.h ------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/SymbolDumper.h b/include/llvm/DebugInfo/CodeView/SymbolDumper.h
index 215da2e2b522..d832a48b1265 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolDumper.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolDumper.h
@@ -1,9 +1,8 @@
 //===-- SymbolDumper.h - CodeView symbol info dumper ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/include/llvm/DebugInfo/CodeView/SymbolRecord.h
index b58825c4a788..5e9a7432b9b6 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolRecord.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolRecord.h
@@ -1,9 +1,8 @@
 //===- SymbolRecord.h -------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -14,6 +13,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/DebugInfo/CodeView/CVRecord.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
@@ -156,15 +156,19 @@ public:
   uint32_t RecordOffset;
 };
 
-struct BinaryAnnotationIterator {
-  struct AnnotationData {
-    BinaryAnnotationsOpCode OpCode;
-    StringRef Name;
-    uint32_t U1;
-    uint32_t U2;
-    int32_t S1;
-  };
+struct DecodedAnnotation {
+  StringRef Name;
+  ArrayRef<uint8_t> Bytes;
+  BinaryAnnotationsOpCode OpCode;
+  uint32_t U1 = 0;
+  uint32_t U2 = 0;
+  int32_t S1 = 0;
+};
 
+struct BinaryAnnotationIterator
+    : public iterator_facade_base<BinaryAnnotationIterator,
+                                  std::forward_iterator_tag,
+                                  DecodedAnnotation> {
   BinaryAnnotationIterator() = default;
   BinaryAnnotationIterator(ArrayRef<uint8_t> Annotations) : Data(Annotations) {}
   BinaryAnnotationIterator(const BinaryAnnotationIterator &Other)
@@ -174,10 +178,6 @@ struct BinaryAnnotationIterator {
     return Data == Other.Data;
   }
 
-  bool operator!=(const BinaryAnnotationIterator &Other) const {
-    return !(*this == Other);
-  }
-
   BinaryAnnotationIterator &operator=(const BinaryAnnotationIterator Other) {
     Data = Other.Data;
     return *this;
@@ -194,13 +194,7 @@ struct BinaryAnnotationIterator {
     return *this;
   }
 
-  BinaryAnnotationIterator operator++(int) {
-    BinaryAnnotationIterator Orig(*this);
-    ++(*this);
-    return Orig;
-  }
-
-  const AnnotationData &operator*() {
+  const DecodedAnnotation &operator*() {
     ParseCurrentAnnotation();
     return Current.getValue();
   }
@@ -242,17 +236,17 @@ private:
              (ThirdByte << 8) | FourthByte;
 
     return -1;
-  };
+  }
 
   static int32_t DecodeSignedOperand(uint32_t Operand) {
     if (Operand & 1)
       return -(Operand >> 1);
     return Operand >> 1;
-  };
+  }
 
   static int32_t DecodeSignedOperand(ArrayRef<uint8_t> &Annotations) {
     return DecodeSignedOperand(GetCompressedAnnotation(Annotations));
-  };
+  }
 
   bool ParseCurrentAnnotation() {
     if (Current.hasValue())
@@ -260,7 +254,7 @@ private:
 
     Next = Data;
     uint32_t Op = GetCompressedAnnotation(Next);
-    AnnotationData Result;
+    DecodedAnnotation Result;
     Result.OpCode = static_cast<BinaryAnnotationsOpCode>(Op);
     switch (Result.OpCode) {
     case BinaryAnnotationsOpCode::Invalid:
@@ -325,11 +319,12 @@ private:
       break;
     }
     }
+    Result.Bytes = Data.take_front(Data.size() - Next.size());
     Current = Result;
     return true;
   }
 
-  Optional<AnnotationData> Current;
+  Optional<DecodedAnnotation> Current;
   ArrayRef<uint8_t> Data;
   ArrayRef<uint8_t> Next;
 };
@@ -974,7 +969,7 @@ class UsingNamespaceSym : public SymbolRecord {
 public:
   explicit UsingNamespaceSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
   explicit UsingNamespaceSym(uint32_t RecordOffset)
-      : SymbolRecord(SymbolRecordKind::RegRelativeSym),
+      : SymbolRecord(SymbolRecordKind::UsingNamespaceSym),
         RecordOffset(RecordOffset) {}
 
   StringRef Name;
@@ -983,6 +978,19 @@ public:
 };
 
 // S_ANNOTATION
+class AnnotationSym : public SymbolRecord {
+public:
+  explicit AnnotationSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  explicit AnnotationSym(uint32_t RecordOffset)
+      : SymbolRecord(SymbolRecordKind::AnnotationSym),
+        RecordOffset(RecordOffset) {}
+
+  uint32_t CodeOffset = 0;
+  uint16_t Segment = 0;
+  std::vector<StringRef> Strings;
+
+  uint32_t RecordOffset;
+};
 
 using CVSymbol = CVRecord<SymbolKind>;
 using CVSymbolArray = VarStreamArray<CVSymbol>;
diff --git a/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h b/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h
index 3713fe118eaa..57dbc56c0769 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h
@@ -1,9 +1,8 @@
 //===- SymbolRecordHelpers.h ------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h b/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h
index 391e8f127665..34368b6185d6 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h
@@ -1,9 +1,8 @@
 //===- SymbolRecordMapping.h ------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/SymbolSerializer.h b/include/llvm/DebugInfo/CodeView/SymbolSerializer.h
index f4d8ab0c3c2e..b805b6595e80 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolSerializer.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolSerializer.h
@@ -1,9 +1,8 @@
 //===- SymbolSerializer.h ---------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -52,8 +51,8 @@ public:
   template <typename SymType>
   static CVSymbol writeOneSymbol(SymType &Sym, BumpPtrAllocator &Storage,
                                  CodeViewContainer Container) {
-    CVSymbol Result;
-    Result.Type = static_cast<SymbolKind>(Sym.Kind);
+    RecordPrefix Prefix{uint16_t(Sym.Kind)};
+    CVSymbol Result(&Prefix, sizeof(Prefix));
     SymbolSerializer Serializer(Storage, Container);
     consumeError(Serializer.visitSymbolBegin(Result));
     consumeError(Serializer.visitKnownRecord(Result, Sym));
diff --git a/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h b/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h
index e29511a67b7f..145d63a6fe61 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h
@@ -1,9 +1,8 @@
 //===- SymbolVisitorCallbackPipeline.h --------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h b/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h
index 0816f7c62656..1a4d5b9d31df 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h
@@ -1,9 +1,8 @@
 //===- SymbolVisitorCallbacks.h ---------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h b/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h
index a2a3c6f18fba..368d8b288315 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h
@@ -1,9 +1,8 @@
 //===-- SymbolVisitorDelegate.h ---------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/TypeCollection.h b/include/llvm/DebugInfo/CodeView/TypeCollection.h
index e9fc9b0de8ef..58b1dd058c1a 100644
--- a/include/llvm/DebugInfo/CodeView/TypeCollection.h
+++ b/include/llvm/DebugInfo/CodeView/TypeCollection.h
@@ -1,9 +1,8 @@
 //===- TypeCollection.h - A collection of CodeView type records -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/TypeDeserializer.h b/include/llvm/DebugInfo/CodeView/TypeDeserializer.h
index 9887d901773a..081de32dd02c 100644
--- a/include/llvm/DebugInfo/CodeView/TypeDeserializer.h
+++ b/include/llvm/DebugInfo/CodeView/TypeDeserializer.h
@@ -1,9 +1,8 @@
 //===- TypeDeserializer.h ---------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -59,7 +58,7 @@ public:
     TypeRecordKind K =
         static_cast<TypeRecordKind>(uint16_t(Prefix->RecordKind));
     T Record(K);
-    CVType CVT(static_cast<TypeLeafKind>(K), Data);
+    CVType CVT(Data);
     if (auto EC = deserializeAs<T>(CVT, Record))
       return std::move(EC);
     return Record;
@@ -112,14 +111,14 @@ class FieldListDeserializer : public TypeVisitorCallbacks {
 
 public:
   explicit FieldListDeserializer(BinaryStreamReader &Reader) : Mapping(Reader) {
-    CVType FieldList;
-    FieldList.Type = TypeLeafKind::LF_FIELDLIST;
+    RecordPrefix Pre(static_cast<uint16_t>(TypeLeafKind::LF_FIELDLIST));
+    CVType FieldList(&Pre, sizeof(Pre));
     consumeError(Mapping.Mapping.visitTypeBegin(FieldList));
   }
 
   ~FieldListDeserializer() override {
-    CVType FieldList;
-    FieldList.Type = TypeLeafKind::LF_FIELDLIST;
+    RecordPrefix Pre(static_cast<uint16_t>(TypeLeafKind::LF_FIELDLIST));
+    CVType FieldList(&Pre, sizeof(Pre));
     consumeError(Mapping.Mapping.visitTypeEnd(FieldList));
   }
 
diff --git a/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h b/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h
index afb8b3636361..41a219ae5a7b 100644
--- a/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h
+++ b/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h
@@ -1,9 +1,8 @@
 //===-- TypeDumpVisitor.h - CodeView type info dumper -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/TypeHashing.h b/include/llvm/DebugInfo/CodeView/TypeHashing.h
index 1f732d29a538..b0a16cccbff3 100644
--- a/include/llvm/DebugInfo/CodeView/TypeHashing.h
+++ b/include/llvm/DebugInfo/CodeView/TypeHashing.h
@@ -1,9 +1,8 @@
 //===- TypeHashing.h ---------------------------------------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -85,6 +84,8 @@ struct GloballyHashedType {
   }
   std::array<uint8_t, 8> Hash;
 
+  bool empty() const { return *(const uint64_t*)Hash.data() == 0; }
+
   /// Given a sequence of bytes representing a record, compute a global hash for
   /// this record.  Due to the nature of global hashes incorporating the hashes
   /// of referenced records, this function requires a list of types and ids
@@ -108,8 +109,33 @@ struct GloballyHashedType {
   template <typename Range>
   static std::vector<GloballyHashedType> hashTypes(Range &&Records) {
     std::vector<GloballyHashedType> Hashes;
-    for (const auto &R : Records)
-      Hashes.push_back(hashType(R, Hashes, Hashes));
+    bool UnresolvedRecords = false;
+    for (const auto &R : Records) {
+      GloballyHashedType H = hashType(R, Hashes, Hashes);
+      if (H.empty())
+        UnresolvedRecords = true;
+      Hashes.push_back(H);
+    }
+
+    // In some rare cases, there might be records with forward references in the
+    // stream. Several passes might be needed to fully hash each record in the
+    // Type stream. However this occurs on very small OBJs generated by MASM,
+    // with a dozen records at most. Therefore this codepath isn't
+    // time-critical, as it isn't taken in 99% of cases.
+    while (UnresolvedRecords) {
+      UnresolvedRecords = false;
+      auto HashIt = Hashes.begin();
+      for (const auto &R : Records) {
+        if (HashIt->empty()) {
+          GloballyHashedType H = hashType(R, Hashes, Hashes);
+          if (H.empty())
+            UnresolvedRecords = true;
+          else
+            *HashIt = H;
+        }
+        ++HashIt;
+      }
+    }
 
     return Hashes;
   }
diff --git a/include/llvm/DebugInfo/CodeView/TypeIndex.h b/include/llvm/DebugInfo/CodeView/TypeIndex.h
index 58463a6b13df..b9e2562bfc2b 100644
--- a/include/llvm/DebugInfo/CodeView/TypeIndex.h
+++ b/include/llvm/DebugInfo/CodeView/TypeIndex.h
@@ -1,9 +1,8 @@
 //===- TypeIndex.h ----------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h b/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h
index c424a09ece89..469768787274 100644
--- a/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h
+++ b/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h
@@ -1,9 +1,8 @@
 //===- TypeIndexDiscovery.h -------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/TypeRecord.h b/include/llvm/DebugInfo/CodeView/TypeRecord.h
index 7b4a30ee622d..b147dd6c3d05 100644
--- a/include/llvm/DebugInfo/CodeView/TypeRecord.h
+++ b/include/llvm/DebugInfo/CodeView/TypeRecord.h
@@ -1,9 +1,8 @@
 //===- TypeRecord.h ---------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h b/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h
index 389472ed1aea..e84704d99ddc 100644
--- a/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h
+++ b/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h
@@ -1,9 +1,8 @@
 //===- TypeRecordHelpers.h --------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h b/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h
index cbe8d6066bb9..4c309c10ff0c 100644
--- a/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h
+++ b/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h
@@ -1,9 +1,8 @@
 //===- TypeRecordMapping.h --------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -24,9 +23,11 @@ class TypeRecordMapping : public TypeVisitorCallbacks {
 public:
   explicit TypeRecordMapping(BinaryStreamReader &Reader) : IO(Reader) {}
   explicit TypeRecordMapping(BinaryStreamWriter &Writer) : IO(Writer) {}
+  explicit TypeRecordMapping(CodeViewRecordStreamer &Streamer) : IO(Streamer) {}
 
   using TypeVisitorCallbacks::visitTypeBegin;
   Error visitTypeBegin(CVType &Record) override;
+  Error visitTypeBegin(CVType &Record, TypeIndex Index) override;
   Error visitTypeEnd(CVType &Record) override;
 
   Error visitMemberBegin(CVMemberRecord &Record) override;
diff --git a/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h b/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h
index 0b9f54ec60bf..d0506cce8176 100644
--- a/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h
+++ b/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h
@@ -1,9 +1,8 @@
 //===- TypeStreamMerger.h ---------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h b/include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h
index dfba83d62fce..4f2e5deb10b4 100644
--- a/include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h
+++ b/include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h
@@ -1,9 +1,8 @@
 //===- TypeSymbolEmitter.h --------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/TypeTableCollection.h b/include/llvm/DebugInfo/CodeView/TypeTableCollection.h
index 80326a0ffd39..5cbe3400e029 100644
--- a/include/llvm/DebugInfo/CodeView/TypeTableCollection.h
+++ b/include/llvm/DebugInfo/CodeView/TypeTableCollection.h
@@ -1,9 +1,8 @@
 //===- TypeTableCollection.h ---------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h b/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h
index 126fb8abb0da..169715be2d52 100644
--- a/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h
+++ b/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h
@@ -1,9 +1,8 @@
 //===- TypeVisitorCallbackPipeline.h ----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -83,6 +82,11 @@ public:
     Pipeline.push_back(&Callbacks);
   }
 
+  void addCallbackToPipelineFront(TypeVisitorCallbacks &Callbacks) {
+    auto CallBackItr = Pipeline.begin();
+    Pipeline.insert(CallBackItr, &Callbacks);
+  }
+
 #define TYPE_RECORD(EnumName, EnumVal, Name)                                   \
   Error visitKnownRecord(CVType &CVR, Name##Record &Record) override {         \
     return visitKnownRecordImpl(CVR, Record);                                  \
diff --git a/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h b/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h
index d7a473306bc2..33f8b1f24b1b 100644
--- a/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h
+++ b/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h
@@ -1,9 +1,8 @@
 //===- TypeVisitorCallbacks.h -----------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h
index 85e96402a246..d2a5318179eb 100644
--- a/include/llvm/DebugInfo/DIContext.h
+++ b/include/llvm/DebugInfo/DIContext.h
@@ -1,9 +1,8 @@
 //===- DIContext.h ----------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -98,11 +97,10 @@ public:
   void addFrame(const DILineInfo &Frame) {
     Frames.push_back(Frame);
   }
-  
+
   void resize(unsigned i) {
     Frames.resize(i);
   }
-  
 };
 
 /// Container for description of a global variable.
@@ -114,6 +112,16 @@ struct DIGlobal {
   DIGlobal() : Name("<invalid>") {}
 };
 
+struct DILocal {
+  std::string FunctionName;
+  std::string Name;
+  std::string DeclFile;
+  uint64_t DeclLine = 0;
+  Optional<int64_t> FrameOffset;
+  Optional<uint64_t> Size;
+  Optional<uint64_t> TagOffset;
+};
+
 /// A DINameKind is passed to name search methods to specify a
 /// preference regarding the type of name resolution the caller wants.
 enum class DINameKind { None, ShortName, LinkageName };
@@ -158,7 +166,8 @@ enum DIDumpType : unsigned {
 /// dumped.
 struct DIDumpOptions {
   unsigned DumpType = DIDT_All;
-  unsigned RecurseDepth = -1U;
+  unsigned ChildRecurseDepth = -1U;
+  unsigned ParentRecurseDepth = -1U;
   uint16_t Version = 0; // DWARF version to assume when extracting.
   uint8_t AddrSize = 4; // Address byte size to assume when extracting.
   bool ShowAddresses = true;
@@ -172,15 +181,18 @@ struct DIDumpOptions {
   /// Return default option set for printing a single DIE without children.
   static DIDumpOptions getForSingleDIE() {
     DIDumpOptions Opts;
-    Opts.RecurseDepth = 0;
+    Opts.ChildRecurseDepth = 0;
+    Opts.ParentRecurseDepth = 0;
     return Opts;
   }
 
   /// Return the options with RecurseDepth set to 0 unless explicitly required.
   DIDumpOptions noImplicitRecursion() const {
     DIDumpOptions Opts = *this;
-    if (RecurseDepth == -1U && !ShowChildren)
-      Opts.RecurseDepth = 0;
+    if (ChildRecurseDepth == -1U && !ShowChildren)
+      Opts.ChildRecurseDepth = 0;
+    if (ParentRecurseDepth == -1U && !ShowParents)
+      Opts.ParentRecurseDepth = 0;
     return Opts;
   }
 };
@@ -204,12 +216,18 @@ public:
     return true;
   }
 
-  virtual DILineInfo getLineInfoForAddress(uint64_t Address,
+  virtual DILineInfo getLineInfoForAddress(
+      object::SectionedAddress Address,
       DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0;
-  virtual DILineInfoTable getLineInfoForAddressRange(uint64_t Address,
-      uint64_t Size, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0;
-  virtual DIInliningInfo getInliningInfoForAddress(uint64_t Address,
+  virtual DILineInfoTable getLineInfoForAddressRange(
+      object::SectionedAddress Address, uint64_t Size,
       DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0;
+  virtual DIInliningInfo getInliningInfoForAddress(
+      object::SectionedAddress Address,
+      DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0;
+
+  virtual std::vector<DILocal>
+  getLocalsForAddress(object::SectionedAddress Address) = 0;
 
 private:
   const DIContextKind Kind;
diff --git a/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
index 84b23398b8cc..ccf2891c2e21 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
@@ -1,9 +1,8 @@
 //===- DWARFAbbreviationDeclaration.h ---------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
index 1d448728338f..303375703d2e 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
@@ -1,9 +1,8 @@
 //===- DWARFAcceleratorTable.h ----------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -72,7 +71,7 @@ public:
       : AccelSection(AccelSection), StringSection(StringSection) {}
   virtual ~DWARFAcceleratorTable();
 
-  virtual llvm::Error extract() = 0;
+  virtual Error extract() = 0;
   virtual void dump(raw_ostream &OS) const = 0;
 
   DWARFAcceleratorTable(const DWARFAcceleratorTable &) = delete;
@@ -175,7 +174,7 @@ public:
                         DataExtractor StringSection)
       : DWARFAcceleratorTable(AccelSection, StringSection) {}
 
-  llvm::Error extract() override;
+  Error extract() override;
   uint32_t getNumBuckets();
   uint32_t getNumHashes();
   uint32_t getSizeHdr();
@@ -223,7 +222,7 @@ public:
 /// referenced by the name table and interpreted with the help of the
 /// abbreviation table.
 class DWARFDebugNames : public DWARFAcceleratorTable {
-  /// The fixed-size part of a Dwarf 5 Name Index header
+  /// The fixed-size part of a DWARF v5 Name Index header
   struct HeaderPOD {
     uint32_t UnitLength;
     uint16_t Version;
@@ -242,7 +241,7 @@ public:
   class NameIterator;
   class ValueIterator;
 
-  /// Dwarf 5 Name Index header.
+  /// DWARF v5 Name Index header.
   struct Header : public HeaderPOD {
     SmallString<8> AugmentationString;
 
@@ -349,7 +348,7 @@ private:
   };
 
 public:
-  /// A single entry in the Name Table (Dwarf 5 sect. 6.1.1.4.6) of the Name
+  /// A single entry in the Name Table (DWARF v5 sect. 6.1.1.4.6) of the Name
   /// Index.
   class NameTableEntry {
     DataExtractor StrData;
@@ -381,7 +380,7 @@ public:
     uint32_t getEntryOffset() const { return EntryOffset; }
   };
 
-  /// Represents a single accelerator table within the Dwarf 5 .debug_names
+  /// Represents a single accelerator table within the DWARF v5 .debug_names
   /// section.
   class NameIndex {
     DenseSet<Abbrev, AbbrevMapInfo> Abbrevs;
@@ -460,7 +459,7 @@ public:
     NameIterator begin() const { return NameIterator(this, 1); }
     NameIterator end() const { return NameIterator(this, getNameCount() + 1); }
 
-    llvm::Error extract();
+    Error extract();
     uint32_t getUnitOffset() const { return Base; }
     uint32_t getNextUnitOffset() const { return Base + 4 + Hdr.UnitLength; }
     void dump(ScopedPrinter &W) const;
@@ -580,7 +579,7 @@ public:
                   DataExtractor StringSection)
       : DWARFAcceleratorTable(AccelSection, StringSection) {}
 
-  llvm::Error extract() override;
+  Error extract() override;
   void dump(raw_ostream &OS) const override;
 
   /// Look up all entries in the accelerator table matching \c Key.
diff --git a/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h b/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
index 5a7df5c353e8..2d5f9f3c7658 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
@@ -1,9 +1,8 @@
 //===- DWARFAddressRange.h --------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -43,12 +42,6 @@ struct DWARFAddressRange {
     return LowPC < RHS.HighPC && RHS.LowPC < HighPC;
   }
 
-  /// Returns true if [LowPC, HighPC) fully contains [RHS.LowPC, RHS.HighPC).
-  bool contains(const DWARFAddressRange &RHS) const {
-    assert(valid() && RHS.valid());
-    return LowPC <= RHS.LowPC && RHS.HighPC <= HighPC;
-  }
-
   void dump(raw_ostream &OS, uint32_t AddressSize,
             DIDumpOptions DumpOpts = {}) const;
 };
diff --git a/include/llvm/DebugInfo/DWARF/DWARFAttribute.h b/include/llvm/DebugInfo/DWARF/DWARFAttribute.h
index f0672bb0ca75..c8ad19ad6bf6 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFAttribute.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFAttribute.h
@@ -1,9 +1,8 @@
 //===- DWARFAttribute.h -----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -28,13 +27,10 @@ struct DWARFAttribute {
   /// The debug info/types section byte size of the data for this attribute.
   uint32_t ByteSize = 0;
   /// The attribute enumeration of this attribute.
-  dwarf::Attribute Attr;
+  dwarf::Attribute Attr = dwarf::Attribute(0);
   /// The form and value for this attribute.
   DWARFFormValue Value;
 
-  DWARFAttribute(uint32_t O, dwarf::Attribute A = dwarf::Attribute(0),
-                 dwarf::Form F = dwarf::Form(0)) : Attr(A), Value(F) {}
-
   bool isValid() const {
     return Offset != 0 && Attr != dwarf::Attribute(0);
   }
@@ -43,12 +39,9 @@ struct DWARFAttribute {
     return isValid();
   }
 
-  void clear() {
-    Offset = 0;
-    ByteSize = 0;
-    Attr = dwarf::Attribute(0);
-    Value = DWARFFormValue();
-  }
+  /// Identifies DWARF attributes that may contain a reference to a
+  /// DWARF expression.
+  static bool mayHaveLocationDescription(dwarf::Attribute Attr);
 };
 
 } // end namespace llvm
diff --git a/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h b/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
index 33797419a7b8..16b9bfb5de56 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
@@ -1,9 +1,8 @@
 //===- DWARFCompileUnit.h ---------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFContext.h b/include/llvm/DebugInfo/DWARF/DWARFContext.h
index dbb6be04544b..23cf21c3523f 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -1,9 +1,8 @@
 //===- DWARFContext.h -------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===/
 
@@ -318,15 +317,23 @@ public:
 
   /// Get the compilation unit, the function DIE and lexical block DIE for the
   /// given address where applicable.
+  /// TODO: change input parameter from "uint64_t Address"
+  ///       into "SectionedAddress Address"
   DIEsForAddress getDIEsForAddress(uint64_t Address);
 
-  DILineInfo getLineInfoForAddress(uint64_t Address,
+  DILineInfo getLineInfoForAddress(
+      object::SectionedAddress Address,
       DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
-  DILineInfoTable getLineInfoForAddressRange(uint64_t Address, uint64_t Size,
+  DILineInfoTable getLineInfoForAddressRange(
+      object::SectionedAddress Address, uint64_t Size,
       DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
-  DIInliningInfo getInliningInfoForAddress(uint64_t Address,
+  DIInliningInfo getInliningInfoForAddress(
+      object::SectionedAddress Address,
       DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
 
+  std::vector<DILocal>
+  getLocalsForAddress(object::SectionedAddress Address) override;
+
   bool isLittleEndian() const { return DObj->isLittleEndian(); }
   static bool isSupportedVersion(unsigned version) {
     return version == 2 || version == 3 || version == 4 || version == 5;
@@ -367,7 +374,11 @@ public:
 private:
   /// Return the compile unit which contains instruction with provided
   /// address.
+  /// TODO: change input parameter from "uint64_t Address"
+  ///       into "SectionedAddress Address"
   DWARFCompileUnit *getCompileUnitForAddress(uint64_t Address);
+  void addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram, DWARFDie Die,
+                       std::vector<DILocal> &Result);
 };
 
 } // end namespace llvm
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h b/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h
index 1ed087520b30..7c2a159b71fa 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h
@@ -1,9 +1,8 @@
 //===- DWARFDataExtractor.h -------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
index d277ec382ba5..28fd8484b4a9 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
@@ -1,9 +1,8 @@
 //===- DWARFDebugAbbrev.h ---------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h
index ffbd1b06d1e2..a98bf282fe7c 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h
@@ -1,9 +1,8 @@
 //===- DWARFDebugAddr.h -------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h b/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
index ab46fac39f7c..5b6c578bc3bf 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
@@ -1,9 +1,8 @@
 //===- DWARFDebugArangeSet.h ------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
index ea71a50f3270..03223fbc80a9 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
@@ -1,9 +1,8 @@
 //===- DWARFDebugAranges.h --------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -50,10 +49,6 @@ private:
       return -1ULL;
     }
 
-    bool containsAddress(uint64_t Address) const {
-      return LowPC <= Address && Address < HighPC();
-    }
-
     bool operator<(const Range &other) const {
       return LowPC < other.LowPC;
     }
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h b/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
index 7dc07d774aba..d960f4bc9b1c 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
@@ -1,9 +1,8 @@
 //===- DWARFDebugFrame.h - Parsing of .debug_frame --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
index 88c8f57bc33c..f50063b24370 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
@@ -1,9 +1,8 @@
 //===- DWARFDebugInfoEntry.h ------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index d50af5a057f1..e7425c192373 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -1,9 +1,8 @@
 //===- DWARFDebugLine.h -----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -122,6 +121,17 @@ public:
       return LineBase + (int8_t)LineRange - 1;
     }
 
+    /// Get DWARF-version aware access to the file name entry at the provided
+    /// index.
+    const llvm::DWARFDebugLine::FileNameEntry &
+    getFileNameEntry(uint64_t Index) const;
+
+    bool hasFileAtIndex(uint64_t FileIndex) const;
+
+    bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
+                            DILineInfoSpecifier::FileLineInfoKind Kind,
+                            std::string &Result) const;
+
     void clear();
     void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const;
     Error parse(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr,
@@ -140,12 +150,16 @@ public:
     static void dumpTableHeader(raw_ostream &OS);
 
     static bool orderByAddress(const Row &LHS, const Row &RHS) {
-      return LHS.Address < RHS.Address;
+      return std::tie(LHS.Address.SectionIndex, LHS.Address.Address) <
+             std::tie(RHS.Address.SectionIndex, RHS.Address.Address);
     }
 
     /// The program-counter value corresponding to a machine instruction
-    /// generated by the compiler.
-    uint64_t Address;
+    /// generated by the compiler and section index pointing to the section
+    /// containg this PC. If relocation information is present then section
+    /// index is the index of the section which contains above address.
+    /// Otherwise this is object::SectionedAddress::Undef value.
+    object::SectionedAddress Address;
     /// An unsigned integer indicating a source line number. Lines are numbered
     /// beginning at 1. The compiler may emit the value 0 in cases where an
     /// instruction cannot be attributed to any source line.
@@ -193,21 +207,29 @@ public:
     /// and is described by line table rows [FirstRowIndex, LastRowIndex).
     uint64_t LowPC;
     uint64_t HighPC;
+    /// If relocation information is present then this is the index of the
+    /// section which contains above addresses. Otherwise this is
+    /// object::SectionedAddress::Undef value.
+    uint64_t SectionIndex;
     unsigned FirstRowIndex;
     unsigned LastRowIndex;
     bool Empty;
 
     void reset();
 
-    static bool orderByLowPC(const Sequence &LHS, const Sequence &RHS) {
-      return LHS.LowPC < RHS.LowPC;
+    static bool orderByHighPC(const Sequence &LHS, const Sequence &RHS) {
+      return std::tie(LHS.SectionIndex, LHS.HighPC) <
+             std::tie(RHS.SectionIndex, RHS.HighPC);
     }
 
     bool isValid() const {
       return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex);
     }
 
-    bool containsPC(uint64_t PC) const { return (LowPC <= PC && PC < HighPC); }
+    bool containsPC(object::SectionedAddress PC) const {
+      return SectionIndex == PC.SectionIndex &&
+             (LowPC <= PC.Address && PC.Address < HighPC);
+    }
   };
 
   struct LineTable {
@@ -224,22 +246,30 @@ public:
 
     /// Returns the index of the row with file/line info for a given address,
     /// or UnknownRowIndex if there is no such row.
-    uint32_t lookupAddress(uint64_t Address) const;
+    uint32_t lookupAddress(object::SectionedAddress Address) const;
 
-    bool lookupAddressRange(uint64_t Address, uint64_t Size,
+    bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size,
                             std::vector<uint32_t> &Result) const;
 
-    bool hasFileAtIndex(uint64_t FileIndex) const;
+    bool hasFileAtIndex(uint64_t FileIndex) const {
+      return Prologue.hasFileAtIndex(FileIndex);
+    }
 
     /// Extracts filename by its index in filename table in prologue.
+    /// In Dwarf 4, the files are 1-indexed and the current compilation file
+    /// name is not represented in the list. In DWARF v5, the files are
+    /// 0-indexed and the primary source file has the index 0.
     /// Returns true on success.
-    bool getFileNameByIndex(uint64_t FileIndex, const char *CompDir,
+    bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
                             DILineInfoSpecifier::FileLineInfoKind Kind,
-                            std::string &Result) const;
+                            std::string &Result) const {
+      return Prologue.getFileNameByIndex(FileIndex, CompDir, Kind, Result);
+    }
 
     /// Fills the Result argument with the file and line information
     /// corresponding to Address. Returns true on success.
-    bool getFileLineInfoForAddress(uint64_t Address, const char *CompDir,
+    bool getFileLineInfoForAddress(object::SectionedAddress Address,
+                                   const char *CompDir,
                                    DILineInfoSpecifier::FileLineInfoKind Kind,
                                    DILineInfo &Result) const;
 
@@ -264,10 +294,15 @@ public:
 
   private:
     uint32_t findRowInSeq(const DWARFDebugLine::Sequence &Seq,
-                          uint64_t Address) const;
+                          object::SectionedAddress Address) const;
     Optional<StringRef>
     getSourceByIndex(uint64_t FileIndex,
                      DILineInfoSpecifier::FileLineInfoKind Kind) const;
+
+    uint32_t lookupAddressImpl(object::SectionedAddress Address) const;
+
+    bool lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
+                                std::vector<uint32_t> &Result) const;
   };
 
   const LineTable *getLineTable(uint32_t Offset) const;
@@ -334,13 +369,10 @@ private:
     ParsingState(struct LineTable *LT);
 
     void resetRowAndSequence();
-    void appendRowToMatrix(uint32_t Offset);
+    void appendRowToMatrix();
 
     /// Line table we're currently parsing.
     struct LineTable *LineTable;
-    /// The row number that starts at zero for the prologue, and increases for
-    /// each row added to the matrix.
-    unsigned RowNumber = 0;
     struct Row Row;
     struct Sequence Sequence;
   };
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
index da2098e15402..cced6048e811 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
@@ -1,9 +1,8 @@
 //===- DWARFDebugLoc.h ------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -42,7 +41,7 @@ public:
     SmallVector<Entry, 2> Entries;
     /// Dump this list on OS.
     void dump(raw_ostream &OS, bool IsLittleEndian, unsigned AddressSize,
-              const MCRegisterInfo *MRI, uint64_t BaseAddress,
+              const MCRegisterInfo *MRI, DWARFUnit *U, uint64_t BaseAddress,
               unsigned Indent) const;
   };
 
@@ -87,7 +86,7 @@ public:
     SmallVector<Entry, 2> Entries;
     void dump(raw_ostream &OS, uint64_t BaseAddr, bool IsLittleEndian,
               unsigned AddressSize, const MCRegisterInfo *RegInfo,
-              unsigned Indent) const;
+              DWARFUnit *U, unsigned Indent) const;
   };
 
 private:
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h b/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
index bfe2fc3ac02d..a6c125990ca7 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
@@ -1,9 +1,8 @@
 //===- DWARFDebugMacro.h ----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h b/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
index 9e1656eb1615..99e91ca90319 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
@@ -1,9 +1,8 @@
 //===- DWARFDebugPubTable.h -------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
index bc26edf00647..a66f60292343 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
@@ -1,9 +1,8 @@
 //===- DWARFDebugRangeList.h ------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -77,7 +76,7 @@ public:
   /// list. Has to be passed base address of the compile unit referencing this
   /// range list.
   DWARFAddressRangesVector
-  getAbsoluteRanges(llvm::Optional<SectionedAddress> BaseAddr) const;
+  getAbsoluteRanges(llvm::Optional<object::SectionedAddress> BaseAddr) const;
 };
 
 } // end namespace llvm
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
index 5cc8d789e598..167ddde3ec3d 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
@@ -1,9 +1,8 @@
 //===- DWARFDebugRnglists.h -------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -38,7 +37,7 @@ struct RangeListEntry : public DWARFListEntryBase {
   Error extract(DWARFDataExtractor Data, uint32_t End, uint32_t *OffsetPtr);
   void dump(raw_ostream &OS, uint8_t AddrSize, uint8_t MaxEncodingStringLength,
             uint64_t &CurrentBase, DIDumpOptions DumpOpts,
-            llvm::function_ref<Optional<SectionedAddress>(uint32_t)>
+            llvm::function_ref<Optional<object::SectionedAddress>(uint32_t)>
                 LookupPooledAddress) const;
   bool isSentinel() const { return EntryKind == dwarf::DW_RLE_end_of_list; }
 };
@@ -48,7 +47,7 @@ class DWARFDebugRnglist : public DWARFListType<RangeListEntry> {
 public:
   /// Build a DWARFAddressRangesVector from a rangelist.
   DWARFAddressRangesVector
-  getAbsoluteRanges(llvm::Optional<SectionedAddress> BaseAddr,
+  getAbsoluteRanges(llvm::Optional<object::SectionedAddress> BaseAddr,
                     DWARFUnit &U) const;
 };
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDie.h b/include/llvm/DebugInfo/DWARF/DWARFDie.h
index 56d46cd739a2..21e68f983bb3 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDie.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDie.h
@@ -1,9 +1,8 @@
 //===- DWARFDie.h -----------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/include/llvm/DebugInfo/DWARF/DWARFExpression.h
index 3fad68a9b48b..f066dd58d606 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFExpression.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFExpression.h
@@ -1,9 +1,8 @@
 //===--- DWARFExpression.h - DWARF Expression handling ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -42,7 +41,8 @@ public:
       SizeAddr = 5,
       SizeRefAddr = 6,
       SizeBlock = 7, ///< Preceding operand contains block size
-      SignBit = 0x8,
+      BaseTypeRef = 8,
+      SignBit = 0x80,
       SignedSize1 = SignBit | Size1,
       SignedSize2 = SignBit | Size2,
       SignedSize4 = SignBit | Size4,
@@ -55,7 +55,8 @@ public:
       DwarfNA, ///< Serves as a marker for unused entries
       Dwarf2 = 2,
       Dwarf3,
-      Dwarf4
+      Dwarf4,
+      Dwarf5
     };
 
     /// Description of the encoding of one expression Op.
@@ -78,17 +79,20 @@ public:
     bool Error;
     uint32_t EndOffset;
     uint64_t Operands[2];
+    uint32_t OperandEndOffsets[2];
 
   public:
     Description &getDescription() { return Desc; }
     uint8_t getCode() { return Opcode; }
     uint64_t getRawOperand(unsigned Idx) { return Operands[Idx]; }
+    uint32_t getOperandEndOffset(unsigned Idx) { return OperandEndOffsets[Idx]; }
     uint32_t getEndOffset() { return EndOffset; }
     bool extract(DataExtractor Data, uint16_t Version, uint8_t AddressSize,
                  uint32_t Offset);
     bool isError() { return Error; }
-    bool print(raw_ostream &OS, const DWARFExpression *U,
-               const MCRegisterInfo *RegInfo, bool isEH);
+    bool print(raw_ostream &OS, const DWARFExpression *Expr,
+               const MCRegisterInfo *RegInfo, DWARFUnit *U, bool isEH);
+    bool verify(DWARFUnit *U);
   };
 
   /// An iterator to go through the expression operations.
@@ -125,15 +129,17 @@ public:
 
   DWARFExpression(DataExtractor Data, uint16_t Version, uint8_t AddressSize)
       : Data(Data), Version(Version), AddressSize(AddressSize) {
-    assert(AddressSize == 8 || AddressSize == 4);
+    assert(AddressSize == 8 || AddressSize == 4 || AddressSize == 2);
   }
 
   iterator begin() const { return iterator(this, 0); }
   iterator end() const { return iterator(this, Data.getData().size()); }
 
-  void print(raw_ostream &OS, const MCRegisterInfo *RegInfo,
+  void print(raw_ostream &OS, const MCRegisterInfo *RegInfo, DWARFUnit *U,
              bool IsEH = false) const;
 
+  bool verify(DWARFUnit *U);
+
 private:
   DataExtractor Data;
   uint16_t Version;
diff --git a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
index 727e853c09fb..731e71ed9eae 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
@@ -1,9 +1,8 @@
 //===- DWARFFormValue.h -----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -42,6 +41,9 @@ public:
 private:
   struct ValueType {
     ValueType() { uval = 0; }
+    ValueType(int64_t V) : sval(V) {}
+    ValueType(uint64_t V) : uval(V) {}
+    ValueType(const char *V) : cstr(V) {}
 
     union {
       uint64_t uval;
@@ -56,26 +58,28 @@ private:
   ValueType Value;              /// Contains all data for the form.
   const DWARFUnit *U = nullptr; /// Remember the DWARFUnit at extract time.
   const DWARFContext *C = nullptr; /// Context for extract time.
+
+  DWARFFormValue(dwarf::Form F, ValueType V) : Form(F), Value(V) {}
+
 public:
   DWARFFormValue(dwarf::Form F = dwarf::Form(0)) : Form(F) {}
 
+  static DWARFFormValue createFromSValue(dwarf::Form F, int64_t V);
+  static DWARFFormValue createFromUValue(dwarf::Form F, uint64_t V);
+  static DWARFFormValue createFromPValue(dwarf::Form F, const char *V);
+  static DWARFFormValue createFromBlockValue(dwarf::Form F,
+                                             ArrayRef<uint8_t> D);
+  static DWARFFormValue createFromUnit(dwarf::Form F, const DWARFUnit *Unit,
+                                       uint32_t *OffsetPtr);
+
   dwarf::Form getForm() const { return Form; }
   uint64_t getRawUValue() const { return Value.uval; }
-  void setForm(dwarf::Form F) { Form = F; }
-  void setUValue(uint64_t V) { Value.uval = V; }
-  void setSValue(int64_t V) { Value.sval = V; }
-  void setPValue(const char *V) { Value.cstr = V; }
-
-  void setBlockValue(const ArrayRef<uint8_t> &Data) {
-    Value.data = Data.data();
-    setUValue(Data.size());
-  }
 
   bool isFormClass(FormClass FC) const;
   const DWARFUnit *getUnit() const { return U; }
   void dump(raw_ostream &OS, DIDumpOptions DumpOpts = DIDumpOptions()) const;
   void dumpSectionedAddress(raw_ostream &OS, DIDumpOptions DumpOpts,
-                            SectionedAddress SA) const;
+                            object::SectionedAddress SA) const;
   static void dumpAddressSection(const DWARFObject &Obj, raw_ostream &OS,
                                  DIDumpOptions DumpOpts, uint64_t SectionIndex);
 
@@ -100,11 +104,16 @@ public:
   /// getAsFoo functions below return the extracted value as Foo if only
   /// DWARFFormValue has form class is suitable for representing Foo.
   Optional<uint64_t> getAsReference() const;
+  struct UnitOffset {
+    DWARFUnit *Unit;
+    uint64_t Offset;
+  };
+  Optional<UnitOffset> getAsRelativeReference() const;
   Optional<uint64_t> getAsUnsignedConstant() const;
   Optional<int64_t> getAsSignedConstant() const;
   Optional<const char *> getAsCString() const;
   Optional<uint64_t> getAsAddress() const;
-  Optional<SectionedAddress> getAsSectionedAddress() const;
+  Optional<object::SectionedAddress> getAsSectionedAddress() const;
   Optional<uint64_t> getAsSectionOffset() const;
   Optional<ArrayRef<uint8_t>> getAsBlock() const;
   Optional<uint64_t> getAsCStringOffset() const;
@@ -155,6 +164,19 @@ inline Optional<const char *> toString(const Optional<DWARFFormValue> &V) {
   return None;
 }
 
+/// Take an optional DWARFFormValue and try to extract a string value from it.
+///
+/// \param V and optional DWARFFormValue to attempt to extract the value from.
+/// \returns an optional value that contains a value if the form value
+/// was valid and was a string.
+inline StringRef toStringRef(const Optional<DWARFFormValue> &V,
+                             StringRef Default = {}) {
+  if (V)
+    if (auto S = V->getAsCString())
+      return *S;
+  return Default;
+}
+
 /// Take an optional DWARFFormValue and extract a string value from it.
 ///
 /// \param V and optional DWARFFormValue to attempt to extract the value from.
@@ -242,7 +264,7 @@ inline Optional<uint64_t> toAddress(const Optional<DWARFFormValue> &V) {
   return None;
 }
 
-inline Optional<SectionedAddress>
+inline Optional<object::SectionedAddress>
 toSectionedAddress(const Optional<DWARFFormValue> &V) {
   if (V)
     return V->getAsSectionedAddress();
diff --git a/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h b/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h
index 073e02903c39..38cd42ddb883 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h
@@ -1,9 +1,8 @@
 //===- DWARFGdbIndex.h ------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFListTable.h b/include/llvm/DebugInfo/DWARF/DWARFListTable.h
index 9b987314f209..a1ea69b040f0 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFListTable.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFListTable.h
@@ -1,9 +1,8 @@
 //===- DWARFListTable.h -----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -158,7 +157,7 @@ public:
   uint8_t getAddrSize() const { return Header.getAddrSize(); }
 
   void dump(raw_ostream &OS,
-            llvm::function_ref<Optional<SectionedAddress>(uint32_t)>
+            llvm::function_ref<Optional<object::SectionedAddress>(uint32_t)>
                 LookupPooledAddress,
             DIDumpOptions DumpOpts = {}) const;
 
@@ -235,7 +234,7 @@ Error DWARFListType<ListEntryType>::extract(DWARFDataExtractor Data,
 template <typename DWARFListType>
 void DWARFListTableBase<DWARFListType>::dump(
     raw_ostream &OS,
-    llvm::function_ref<Optional<SectionedAddress>(uint32_t)>
+    llvm::function_ref<Optional<object::SectionedAddress>(uint32_t)>
         LookupPooledAddress,
     DIDumpOptions DumpOpts) const {
   Header.dump(OS, DumpOpts);
diff --git a/include/llvm/DebugInfo/DWARF/DWARFObject.h b/include/llvm/DebugInfo/DWARF/DWARFObject.h
index d611b5d075c8..1bba74a25d0e 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFObject.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFObject.h
@@ -1,9 +1,8 @@
 //===- DWARFObject.h --------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===-----------------------------------------------------------------------===/
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h b/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h
index f51838424614..3add711943d0 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h
@@ -1,9 +1,8 @@
 //===- DWARFRelocMap.h ------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -11,6 +10,7 @@
 #define LLVM_DEBUGINFO_DWARF_DWARFRELOCMAP_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Object/RelocationResolver.h"
 #include <cstdint>
 
 namespace llvm {
@@ -19,7 +19,11 @@ namespace llvm {
 /// Section index is -1LL if relocation points to absolute symbol.
 struct RelocAddrEntry {
   uint64_t SectionIndex;
-  uint64_t Value;
+  object::RelocationRef Reloc;
+  uint64_t SymbolValue;
+  Optional<object::RelocationRef> Reloc2;
+  uint64_t SymbolValue2;
+  object::RelocationResolver Resolver;
 };
 
 /// In place of applying the relocations to the data we've read from disk we use
diff --git a/include/llvm/DebugInfo/DWARF/DWARFSection.h b/include/llvm/DebugInfo/DWARF/DWARFSection.h
index 7f8235965297..054524d368ed 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFSection.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFSection.h
@@ -1,9 +1,8 @@
 //===- DWARFSection.h -------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -23,11 +22,6 @@ struct SectionName {
   bool IsNameUnique;
 };
 
-struct SectionedAddress {
-  uint64_t Address;
-  uint64_t SectionIndex;
-};
-
 } // end namespace llvm
 
 #endif // LLVM_DEBUGINFO_DWARF_DWARFSECTION_H
diff --git a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
index 8ca5ba13fc23..90d89375fd35 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
@@ -1,9 +1,8 @@
 //===- DWARFTypeUnit.h ------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 79c3ce1106d5..f9f90db31890 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -1,9 +1,8 @@
 //===- DWARFUnit.h ----------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -49,7 +48,7 @@ class DWARFUnitHeader {
   uint32_t Offset = 0;
   // Version, address size, and DWARF format.
   dwarf::FormParams FormParams;
-  uint32_t Length = 0;
+  uint64_t Length = 0;
   uint64_t AbbrOffset = 0;
 
   // For DWO units only.
@@ -83,7 +82,7 @@ public:
   uint8_t getDwarfOffsetByteSize() const {
     return FormParams.getDwarfOffsetByteSize();
   }
-  uint32_t getLength() const { return Length; }
+  uint64_t getLength() const { return Length; }
   uint64_t getAbbrOffset() const { return AbbrOffset; }
   Optional<uint64_t> getDWOId() const { return DWOId; }
   void setDWOId(uint64_t Id) {
@@ -98,8 +97,11 @@ public:
     return UnitType == dwarf::DW_UT_type || UnitType == dwarf::DW_UT_split_type;
   }
   uint8_t getSize() const { return Size; }
-  // FIXME: Support DWARF64.
-  uint32_t getNextUnitOffset() const { return Offset + Length + 4; }
+  uint32_t getNextUnitOffset() const {
+    return Offset + Length +
+           (FormParams.Format == llvm::dwarf::DwarfFormat::DWARF64 ? 4 : 0) +
+           FormParams.getDwarfOffsetByteSize();
+  }
 };
 
 const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context,
@@ -173,6 +175,7 @@ struct StrOffsetsContributionDescriptor {
   StrOffsetsContributionDescriptor(uint64_t Base, uint64_t Size,
                                    uint8_t Version, dwarf::DwarfFormat Format)
       : Base(Base), Size(Size), FormParams({Version, 0, Format}) {}
+  StrOffsetsContributionDescriptor() = default;
 
   uint8_t getVersion() const { return FormParams.Version; }
   dwarf::DwarfFormat getFormat() const { return FormParams.Format; }
@@ -182,7 +185,7 @@ struct StrOffsetsContributionDescriptor {
   /// Determine whether a contribution to the string offsets table is
   /// consistent with the relevant section size and that its length is
   /// a multiple of the size of one of its entries.
-  Optional<StrOffsetsContributionDescriptor>
+  Expected<StrOffsetsContributionDescriptor>
   validateContributionSize(DWARFDataExtractor &DA);
 };
 
@@ -218,7 +221,7 @@ class DWARFUnit {
   Optional<DWARFDebugRnglistTable> RngListTable;
 
   mutable const DWARFAbbreviationDeclarationSet *Abbrevs;
-  llvm::Optional<SectionedAddress> BaseAddr;
+  llvm::Optional<object::SectionedAddress> BaseAddr;
   /// The compile unit debug information entry items.
   std::vector<DWARFDebugInfoEntry> DieArray;
 
@@ -247,14 +250,14 @@ protected:
   /// Find the unit's contribution to the string offsets table and determine its
   /// length and form. The given offset is expected to be derived from the unit
   /// DIE's DW_AT_str_offsets_base attribute.
-  Optional<StrOffsetsContributionDescriptor>
+  Expected<Optional<StrOffsetsContributionDescriptor>>
   determineStringOffsetsTableContribution(DWARFDataExtractor &DA);
 
   /// Find the unit's contribution to the string offsets table and determine its
   /// length and form. The given offset is expected to be 0 in a dwo file or,
   /// in a dwp file, the start of the unit's contribution to the string offsets
   /// table section (as determined by the index table).
-  Optional<StrOffsetsContributionDescriptor>
+  Expected<Optional<StrOffsetsContributionDescriptor>>
   determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA);
 
 public:
@@ -305,7 +308,8 @@ public:
     RangeSectionBase = Base;
   }
 
-  Optional<SectionedAddress> getAddrOffsetSectionItem(uint32_t Index) const;
+  Optional<object::SectionedAddress>
+  getAddrOffsetSectionItem(uint32_t Index) const;
   Optional<uint64_t> getStringOffsetSectionItem(uint32_t Index) const;
 
   DWARFDataExtractor getDebugInfoExtractor() const;
@@ -376,7 +380,7 @@ public:
     llvm_unreachable("Invalid UnitType.");
   }
 
-  llvm::Optional<SectionedAddress> getBaseAddress();
+  llvm::Optional<object::SectionedAddress> getBaseAddress();
 
   DWARFDie getUnitDIE(bool ExtractUnitDIEOnly = true) {
     extractDIEsIfNeeded(ExtractUnitDIEOnly);
@@ -385,6 +389,13 @@ public:
     return DWARFDie(this, &DieArray[0]);
   }
 
+  DWARFDie getNonSkeletonUnitDIE(bool ExtractUnitDIEOnly = true) {
+    parseDWO();
+    if (DWO)
+      return DWO->getUnitDIE(ExtractUnitDIEOnly);
+    return getUnitDIE(ExtractUnitDIEOnly);
+  }
+
   const char *getCompilationDir();
   Optional<uint64_t> getDWOId() {
     extractDIEsIfNeeded(/*CUDieOnly*/ true);
@@ -462,13 +473,12 @@ public:
   DWARFDie getDIEForOffset(uint32_t Offset) {
     extractDIEsIfNeeded(false);
     assert(!DieArray.empty());
-    auto it = std::lower_bound(
-        DieArray.begin(), DieArray.end(), Offset,
-        [](const DWARFDebugInfoEntry &LHS, uint32_t Offset) {
-          return LHS.getOffset() < Offset;
+    auto It =
+        llvm::partition_point(DieArray, [=](const DWARFDebugInfoEntry &DIE) {
+          return DIE.getOffset() < Offset;
         });
-    if (it != DieArray.end() && it->getOffset() == Offset)
-      return DWARFDie(this, &*it);
+    if (It != DieArray.end() && It->getOffset() == Offset)
+      return DWARFDie(this, &*It);
     return DWARFDie();
   }
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h b/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
index 16be5f9401c0..fc8c707c512e 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
@@ -1,9 +1,8 @@
 //===- DWARFUnitIndex.h -----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
index e47fbea5646e..f1268f220272 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
@@ -1,9 +1,8 @@
 //===- DWARFVerifier.h ----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/GSYM/FileEntry.h b/include/llvm/DebugInfo/GSYM/FileEntry.h
new file mode 100644
index 000000000000..228b4efa0656
--- /dev/null
+++ b/include/llvm/DebugInfo/GSYM/FileEntry.h
@@ -0,0 +1,68 @@
+//===- FileEntry.h ----------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_FILEENTRY_H
+#define LLVM_DEBUGINFO_GSYM_FILEENTRY_H
+
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/Hashing.h"
+#include <functional>
+#include <stdint.h>
+#include <utility>
+
+namespace llvm {
+namespace gsym {
+
+/// Files in GSYM are contained in FileEntry structs where we split the
+/// directory and basename into two different strings in the string
+/// table. This allows paths to shared commont directory and filename
+/// strings and saves space.
+struct FileEntry {
+
+  /// Offsets in the string table.
+  /// @{
+  uint32_t Dir = 0;
+  uint32_t Base = 0;
+  /// @}
+
+  FileEntry() = default;
+  FileEntry(uint32_t D, uint32_t B) : Dir(D), Base(B) {}
+
+  // Implement operator== so that FileEntry can be used as key in
+  // unordered containers.
+  bool operator==(const FileEntry &RHS) const {
+    return Base == RHS.Base && Dir == RHS.Dir;
+  };
+  bool operator!=(const FileEntry &RHS) const {
+    return Base != RHS.Base || Dir != RHS.Dir;
+  };
+};
+
+} // namespace gsym
+
+template <> struct DenseMapInfo<gsym::FileEntry> {
+  static inline gsym::FileEntry getEmptyKey() {
+    uint32_t key = DenseMapInfo<uint32_t>::getEmptyKey();
+    return gsym::FileEntry(key, key);
+  }
+  static inline gsym::FileEntry getTombstoneKey() {
+    uint32_t key = DenseMapInfo<uint32_t>::getTombstoneKey();
+    return gsym::FileEntry(key, key);
+  }
+  static unsigned getHashValue(const gsym::FileEntry &Val) {
+    return llvm::hash_combine(DenseMapInfo<uint32_t>::getHashValue(Val.Dir),
+                              DenseMapInfo<uint32_t>::getHashValue(Val.Base));
+  }
+  static bool isEqual(const gsym::FileEntry &LHS, const gsym::FileEntry &RHS) {
+    return LHS == RHS;
+  }
+};
+
+} // namespace llvm
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_FILEENTRY_H
diff --git a/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/include/llvm/DebugInfo/GSYM/FunctionInfo.h
new file mode 100644
index 000000000000..eedb1e638fd1
--- /dev/null
+++ b/include/llvm/DebugInfo/GSYM/FunctionInfo.h
@@ -0,0 +1,107 @@
+//===- FunctionInfo.h -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
+#define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
+
+#include "llvm/DebugInfo/GSYM/InlineInfo.h"
+#include "llvm/DebugInfo/GSYM/LineEntry.h"
+#include "llvm/DebugInfo/GSYM/Range.h"
+#include "llvm/DebugInfo/GSYM/StringTable.h"
+#include <tuple>
+#include <vector>
+
+namespace llvm {
+class raw_ostream;
+namespace gsym {
+
+/// Function information in GSYM files encodes information for one
+/// contiguous address range. The name of the function is encoded as
+/// a string table offset and allows multiple functions with the same
+/// name to share the name string in the string table. Line tables are
+/// stored in a sorted vector of gsym::LineEntry objects and are split
+/// into line tables for each function. If a function has a discontiguous
+/// range, it will be split into two gsym::FunctionInfo objects. If the
+/// function has inline functions, the information will be encoded in
+/// the "Inline" member, see gsym::InlineInfo for more information.
+struct FunctionInfo {
+  AddressRange Range;
+  uint32_t Name; ///< String table offset in the string table.
+  std::vector<gsym::LineEntry> Lines;
+  InlineInfo Inline;
+
+  FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0)
+      : Range(Addr, Addr + Size), Name(N) {}
+
+  bool hasRichInfo() const {
+    /// Returns whether we have something else than range and name. When
+    /// converting information from a symbol table and from debug info, we
+    /// might end up with multiple FunctionInfo objects for the same range
+    /// and we need to be able to tell which one is the better object to use.
+    return !Lines.empty() || Inline.isValid();
+  }
+
+  bool isValid() const {
+    /// Address and size can be zero and there can be no line entries for a
+    /// symbol so the only indication this entry is valid is if the name is
+    /// not zero. This can happen when extracting information from symbol
+    /// tables that do not encode symbol sizes. In that case only the
+    /// address and name will be filled in.
+    return Name != 0;
+  }
+
+  uint64_t startAddress() const { return Range.Start; }
+  uint64_t endAddress() const { return Range.End; }
+  uint64_t size() const { return Range.size(); }
+  void setStartAddress(uint64_t Addr) { Range.Start = Addr; }
+  void setEndAddress(uint64_t Addr) { Range.End = Addr; }
+  void setSize(uint64_t Size) { Range.End = Range.Start + Size; }
+
+  void clear() {
+    Range = {0, 0};
+    Name = 0;
+    Lines.clear();
+    Inline.clear();
+  }
+};
+
+inline bool operator==(const FunctionInfo &LHS, const FunctionInfo &RHS) {
+  return LHS.Range == RHS.Range && LHS.Name == RHS.Name &&
+         LHS.Lines == RHS.Lines && LHS.Inline == RHS.Inline;
+}
+inline bool operator!=(const FunctionInfo &LHS, const FunctionInfo &RHS) {
+  return !(LHS == RHS);
+}
+/// This sorting will order things consistently by address range first, but then
+/// followed by inlining being valid and line tables. We might end up with a
+/// FunctionInfo from debug info that will have the same range as one from the
+/// symbol table, but we want to quickly be able to sort and use the best version
+/// when creating the final GSYM file.
+inline bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS) {
+  // First sort by address range
+  if (LHS.Range != RHS.Range)
+    return LHS.Range < RHS.Range;
+
+  // Then sort by inline
+  if (LHS.Inline.isValid() != RHS.Inline.isValid())
+    return RHS.Inline.isValid();
+
+  // If the number of lines is the same, then compare line table entries
+  if (LHS.Lines.size() == RHS.Lines.size())
+    return LHS.Lines < RHS.Lines;
+  // Then sort by number of line table entries (more is better)
+  return LHS.Lines.size() < RHS.Lines.size();
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const FunctionInfo &R);
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
diff --git a/include/llvm/DebugInfo/GSYM/InlineInfo.h b/include/llvm/DebugInfo/GSYM/InlineInfo.h
new file mode 100644
index 000000000000..222430622932
--- /dev/null
+++ b/include/llvm/DebugInfo/GSYM/InlineInfo.h
@@ -0,0 +1,78 @@
+//===- InlineInfo.h ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_INLINEINFO_H
+#define LLVM_DEBUGINFO_GSYM_INLINEINFO_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/GSYM/Range.h"
+#include <stdint.h>
+#include <vector>
+
+
+namespace llvm {
+class raw_ostream;
+
+namespace gsym {
+
+/// Inline information stores the name of the inline function along with
+/// an array of address ranges. It also stores the call file and call line
+/// that called this inline function. This allows us to unwind inline call
+/// stacks back to the inline or concrete function that called this
+/// function. Inlined functions contained in this function are stored in the
+/// "Children" variable. All address ranges must be sorted and all address
+/// ranges of all children must be contained in the ranges of this function.
+/// Any clients that encode information will need to ensure the ranges are
+/// all contined correctly or lookups could fail. Add ranges in these objects
+/// must be contained in the top level FunctionInfo address ranges as well.
+struct InlineInfo {
+
+  uint32_t Name; ///< String table offset in the string table.
+  uint32_t CallFile; ///< 1 based file index in the file table.
+  uint32_t CallLine; ///< Source line number.
+  AddressRanges Ranges;
+  std::vector<InlineInfo> Children;
+  InlineInfo() : Name(0), CallFile(0), CallLine(0) {}
+  void clear() {
+    Name = 0;
+    CallFile = 0;
+    CallLine = 0;
+    Ranges.clear();
+    Children.clear();
+  }
+  bool isValid() const { return !Ranges.empty(); }
+
+  using InlineArray = std::vector<const InlineInfo *>;
+
+  /// Lookup an address in the InlineInfo object
+  ///
+  /// This function is used to symbolicate an inline call stack and can
+  /// turn one address in the program into one or more inline call stacks
+  /// and have the stack trace show the original call site from
+  /// non-inlined code.
+  ///
+  /// \param Addr the address to lookup
+  ///
+  /// \returns optional vector of InlineInfo objects that describe the
+  /// inline call stack for a given address, false otherwise.
+  llvm::Optional<InlineArray> getInlineStack(uint64_t Addr) const;
+};
+
+inline bool operator==(const InlineInfo &LHS, const InlineInfo &RHS) {
+  return LHS.Name == RHS.Name && LHS.CallFile == RHS.CallFile &&
+         LHS.CallLine == RHS.CallLine && LHS.Ranges == RHS.Ranges &&
+         LHS.Children == RHS.Children;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const InlineInfo &FI);
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_INLINEINFO_H
diff --git a/include/llvm/DebugInfo/GSYM/LineEntry.h b/include/llvm/DebugInfo/GSYM/LineEntry.h
new file mode 100644
index 000000000000..6b9380940bd3
--- /dev/null
+++ b/include/llvm/DebugInfo/GSYM/LineEntry.h
@@ -0,0 +1,48 @@
+//===- LineEntry.h ----------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_LINEENTRY_H
+#define LLVM_DEBUGINFO_GSYM_LINEENTRY_H
+
+#include "llvm/DebugInfo/GSYM/Range.h"
+
+namespace llvm {
+namespace gsym {
+
+/// Line entries are used to encode the line tables in FunctionInfo objects.
+/// They are stored as a sorted vector of these objects and store the
+/// address, file and line of the line table row for a given address. The
+/// size of a line table entry is calculated by looking at the next entry
+/// in the FunctionInfo's vector of entries.
+struct LineEntry {
+  uint64_t Addr; ///< Start address of this line entry.
+  uint32_t File; ///< 1 based index of file in FileTable
+  uint32_t Line; ///< Source line number.
+  LineEntry(uint64_t A = 0, uint32_t F = 0, uint32_t L = 0)
+      : Addr(A), File(F), Line(L) {}
+  bool isValid() { return File != 0; }
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const LineEntry &LE) {
+  return OS << "addr=" << HEX64(LE.Addr) << ", file=" << format("%3u", LE.File)
+      << ", line=" << format("%3u", LE.Line);
+}
+
+inline bool operator==(const LineEntry &LHS, const LineEntry &RHS) {
+  return LHS.Addr == RHS.Addr && LHS.File == RHS.File && LHS.Line == RHS.Line;
+}
+inline bool operator!=(const LineEntry &LHS, const LineEntry &RHS) {
+  return !(LHS == RHS);
+}
+inline bool operator<(const LineEntry &LHS, const LineEntry &RHS) {
+  return LHS.Addr < RHS.Addr;
+}
+} // namespace gsym
+} // namespace llvm
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_LINEENTRY_H
diff --git a/include/llvm/DebugInfo/GSYM/Range.h b/include/llvm/DebugInfo/GSYM/Range.h
new file mode 100644
index 000000000000..772ff244c5b7
--- /dev/null
+++ b/include/llvm/DebugInfo/GSYM/Range.h
@@ -0,0 +1,87 @@
+//===- AddressRange.h -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_RANGE_H
+#define LLVM_DEBUGINFO_GSYM_RANGE_H
+
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <stdint.h>
+#include <vector>
+
+#define HEX8(v) llvm::format_hex(v, 4)
+#define HEX16(v) llvm::format_hex(v, 6)
+#define HEX32(v) llvm::format_hex(v, 10)
+#define HEX64(v) llvm::format_hex(v, 18)
+
+namespace llvm {
+class raw_ostream;
+
+namespace gsym {
+
+/// A class that represents an address range. The range is specified using
+/// a start and an end address.
+struct AddressRange {
+  uint64_t Start;
+  uint64_t End;
+  AddressRange() : Start(0), End(0) {}
+  AddressRange(uint64_t S, uint64_t E) : Start(S), End(E) {}
+  uint64_t size() const { return End - Start; }
+  bool contains(uint64_t Addr) const { return Start <= Addr && Addr < End; }
+  bool intersects(const AddressRange &R) const {
+    return Start < R.End && R.Start < End;
+  }
+
+  bool operator==(const AddressRange &R) const {
+    return Start == R.Start && End == R.End;
+  }
+  bool operator!=(const AddressRange &R) const {
+    return !(*this == R);
+  }
+  bool operator<(const AddressRange &R) const {
+    return std::make_pair(Start, End) < std::make_pair(R.Start, R.End);
+  }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const AddressRange &R);
+
+/// The AddressRanges class helps normalize address range collections.
+/// This class keeps a sorted vector of AddressRange objects and can perform
+/// insertions and searches efficiently. The address ranges are always sorted
+/// and never contain any invalid or empty address ranges. This allows us to
+/// emit address ranges into the GSYM file efficiently. Intersecting address
+/// ranges are combined during insertion so that we can emit the most compact
+/// representation for address ranges when writing to disk.
+class AddressRanges {
+protected:
+  using Collection = std::vector<AddressRange>;
+  Collection Ranges;
+public:
+  void clear() { Ranges.clear(); }
+  bool empty() const { return Ranges.empty(); }
+  bool contains(uint64_t Addr) const;
+  void insert(AddressRange Range);
+  size_t size() const { return Ranges.size(); }
+  bool operator==(const AddressRanges &RHS) const {
+    return Ranges == RHS.Ranges;
+  }
+  const AddressRange &operator[](size_t i) const {
+    assert(i < Ranges.size());
+    return Ranges[i];
+  }
+  Collection::const_iterator begin() const { return Ranges.begin(); }
+  Collection::const_iterator end() const { return Ranges.end(); }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const AddressRanges &AR);
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_RANGE_H
diff --git a/include/llvm/DebugInfo/GSYM/StringTable.h b/include/llvm/DebugInfo/GSYM/StringTable.h
new file mode 100644
index 000000000000..0001b8b82743
--- /dev/null
+++ b/include/llvm/DebugInfo/GSYM/StringTable.h
@@ -0,0 +1,54 @@
+//===- StringTable.h --------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_STRINGTABLE_H
+#define LLVM_DEBUGINFO_GSYM_STRINGTABLE_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/GSYM/Range.h"
+#include <stdint.h>
+#include <string>
+
+
+namespace llvm {
+namespace gsym {
+
+/// String tables in GSYM files are required to start with an empty
+/// string at offset zero. Strings must be UTF8 NULL terminated strings.
+struct StringTable {
+  StringRef Data;
+  StringTable() : Data() {}
+  StringTable(StringRef D) : Data(D) {}
+  StringRef operator[](size_t Offset) const { return getString(Offset); }
+  StringRef getString(uint32_t Offset) const {
+    if (Offset < Data.size()) {
+      auto End = Data.find('\0', Offset);
+      return Data.substr(Offset, End - Offset);
+    }
+    return StringRef();
+  }
+  void clear() { Data = StringRef(); }
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const StringTable &S) {
+  OS << "String table:\n";
+  uint32_t Offset = 0;
+  const size_t Size = S.Data.size();
+  while (Offset < Size) {
+    StringRef Str = S.getString(Offset);
+    OS << HEX32(Offset) << ": \"" << Str << "\"\n";
+    Offset += Str.size() + 1;
+  }
+  return OS;
+}
+
+} // namespace gsym
+} // namespace llvm
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_STRINGTABLE_H
diff --git a/include/llvm/DebugInfo/MSF/IMSFFile.h b/include/llvm/DebugInfo/MSF/IMSFFile.h
index f98e715e6b15..7e80f96b89ae 100644
--- a/include/llvm/DebugInfo/MSF/IMSFFile.h
+++ b/include/llvm/DebugInfo/MSF/IMSFFile.h
@@ -1,9 +1,8 @@
 //===- IMSFFile.h - Abstract base class for an MSF file ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/MSF/MSFBuilder.h b/include/llvm/DebugInfo/MSF/MSFBuilder.h
index 3de98c4ecba8..282870f5b3f1 100644
--- a/include/llvm/DebugInfo/MSF/MSFBuilder.h
+++ b/include/llvm/DebugInfo/MSF/MSFBuilder.h
@@ -1,9 +1,8 @@
 //===- MSFBuilder.h - MSF Directory & Metadata Builder ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/MSF/MSFCommon.h b/include/llvm/DebugInfo/MSF/MSFCommon.h
index 2db2b71df4a7..83331b14b8af 100644
--- a/include/llvm/DebugInfo/MSF/MSFCommon.h
+++ b/include/llvm/DebugInfo/MSF/MSFCommon.h
@@ -1,9 +1,8 @@
 //===- MSFCommon.h - Common types and functions for MSF files ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/MSF/MSFError.h b/include/llvm/DebugInfo/MSF/MSFError.h
index 5c043a7837b3..fbc4e6928536 100644
--- a/include/llvm/DebugInfo/MSF/MSFError.h
+++ b/include/llvm/DebugInfo/MSF/MSFError.h
@@ -1,9 +1,8 @@
 //===- MSFError.h - Error extensions for MSF Files --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/MSF/MappedBlockStream.h b/include/llvm/DebugInfo/MSF/MappedBlockStream.h
index f65e52922da7..593d781b990e 100644
--- a/include/llvm/DebugInfo/MSF/MappedBlockStream.h
+++ b/include/llvm/DebugInfo/MSF/MappedBlockStream.h
@@ -1,9 +1,8 @@
 //==- MappedBlockStream.h - Discontiguous stream data in an MSF --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h b/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
index ac7f19637ab1..49ba20af7263 100644
--- a/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
+++ b/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
@@ -1,9 +1,8 @@
 //===- ConcreteSymbolEnumerator.h -------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIADataStream.h b/include/llvm/DebugInfo/PDB/DIA/DIADataStream.h
index 881d7329ab66..f05b58c55507 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIADataStream.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIADataStream.h
@@ -1,9 +1,8 @@
 //===- DIADataStream.h - DIA implementation of IPDBDataStream ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h
index 1f129052d034..8a00ad45291a 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h
@@ -1,9 +1,8 @@
 //==- DIAEnumDebugStreams.h - DIA Debug Stream Enumerator impl ---*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h
index f3b02f07e648..bd417c0746b1 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h
@@ -1,9 +1,8 @@
 //==- DIAEnumFrameData.h --------------------------------------- -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h
index 4669a8d31196..1f75ca27c4f8 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h
@@ -1,9 +1,8 @@
 //==- DIAEnumInjectedSources.h - DIA Injected Sources Enumerator -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h
index f1cb6268a26d..8800baac105d 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h
@@ -1,9 +1,8 @@
 //==- DIAEnumLineNumbers.h - DIA Line Number Enumerator impl -----*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h
index ac2ae317d263..be8613bfba9d 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h
@@ -1,9 +1,8 @@
 //==- DIAEnumSectionContribs.h --------------------------------- -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h
index dac3df06a178..61278994ed36 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h
@@ -1,9 +1,8 @@
 //==- DIAEnumSourceFiles.h - DIA Source File Enumerator impl -----*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h
index 9689859ae0f8..f55342cea2e5 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h
@@ -1,9 +1,8 @@
 //==- DIAEnumSymbols.h - DIA Symbol Enumerator impl --------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumTables.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumTables.h
index f4f856ebb6fd..057cb06fc8ca 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumTables.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumTables.h
@@ -1,9 +1,8 @@
 //===- DIAEnumTables.h - DIA Tables Enumerator Impl -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAError.h b/include/llvm/DebugInfo/PDB/DIA/DIAError.h
index 2b33a65a0a14..96d960599f7e 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAError.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAError.h
@@ -1,9 +1,8 @@
 //===- DIAError.h - Error extensions for PDB DIA implementation -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h b/include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h
index 0ce6cfc93030..c04f7cd00836 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h
@@ -1,9 +1,8 @@
 //===- DIAFrameData.h - DIA Impl. of IPDBFrameData ---------------- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAInjectedSource.h b/include/llvm/DebugInfo/PDB/DIA/DIAInjectedSource.h
index 635508da84ea..67963a06d939 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAInjectedSource.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAInjectedSource.h
@@ -1,9 +1,8 @@
 //===- DIAInjectedSource.h - DIA impl for IPDBInjectedSource ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -26,7 +25,7 @@ public:
   std::string getFileName() const override;
   std::string getObjectFileName() const override;
   std::string getVirtualFileName() const override;
-  PDB_SourceCompression getCompression() const override;
+  uint32_t getCompression() const override;
   std::string getCode() const override;
 
 private:
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIALineNumber.h b/include/llvm/DebugInfo/PDB/DIA/DIALineNumber.h
index a59e3a19c8c2..d8bb27220763 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIALineNumber.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIALineNumber.h
@@ -1,9 +1,8 @@
 //===- DIALineNumber.h - DIA implementation of IPDBLineNumber ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h b/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h
index 5d4f855c63ca..7f201d3a4e36 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h
@@ -1,9 +1,8 @@
 //===- DIARawSymbol.h - DIA implementation of IPDBRawSymbol ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIASectionContrib.h b/include/llvm/DebugInfo/PDB/DIA/DIASectionContrib.h
index 4688f1f91a89..0972831e8b16 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIASectionContrib.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIASectionContrib.h
@@ -1,9 +1,8 @@
 //===- DIASectionContrib.h - DIA Impl. of IPDBSectionContrib ------ C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIASession.h b/include/llvm/DebugInfo/PDB/DIA/DIASession.h
index 592e061a8d83..6f62e6061f56 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIASession.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIASession.h
@@ -1,9 +1,8 @@
 //===- DIASession.h - DIA implementation of IPDBSession ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIASourceFile.h b/include/llvm/DebugInfo/PDB/DIA/DIASourceFile.h
index 1088ea54981c..96edfc9f9e29 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIASourceFile.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIASourceFile.h
@@ -1,9 +1,8 @@
 //===- DIASourceFile.h - DIA implementation of IPDBSourceFile ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIASupport.h b/include/llvm/DebugInfo/PDB/DIA/DIASupport.h
index 92ebc04ae5a4..1a7c2f3aeeab 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIASupport.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIASupport.h
@@ -1,9 +1,8 @@
 //===- DIASupport.h - Common header includes for DIA ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Common defines and header includes for all LLVMDebugInfoPDBDIA.  The
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIATable.h b/include/llvm/DebugInfo/PDB/DIA/DIATable.h
index ce93fa0b86c3..65396a042f06 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIATable.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIATable.h
@@ -1,9 +1,8 @@
 //===- DIATable.h - DIA implementation of IPDBTable -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAUtils.h b/include/llvm/DebugInfo/PDB/DIA/DIAUtils.h
index aa843e05de70..5e01d8f10a6e 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAUtils.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAUtils.h
@@ -1,9 +1,8 @@
 //===- DIAUtils.h - Utility functions for working with DIA ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/GenericError.h b/include/llvm/DebugInfo/PDB/GenericError.h
index 997f13f5f30e..ec85d92d2a92 100644
--- a/include/llvm/DebugInfo/PDB/GenericError.h
+++ b/include/llvm/DebugInfo/PDB/GenericError.h
@@ -1,9 +1,8 @@
-//===- Error.h - system_error extensions for PDB ----------------*- C++ -*-===//
+//===- GenericError.h - system_error extensions for PDB ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/IPDBDataStream.h b/include/llvm/DebugInfo/PDB/IPDBDataStream.h
index 0d7a286a11a6..4d0589a87915 100644
--- a/include/llvm/DebugInfo/PDB/IPDBDataStream.h
+++ b/include/llvm/DebugInfo/PDB/IPDBDataStream.h
@@ -1,9 +1,8 @@
 //===- IPDBDataStream.h - base interface for child enumerator ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h b/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
index 7017f2600e9b..bfa67d39bc76 100644
--- a/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
+++ b/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
@@ -1,9 +1,8 @@
 //===- IPDBEnumChildren.h - base interface for child enumerator -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/IPDBFrameData.h b/include/llvm/DebugInfo/PDB/IPDBFrameData.h
index 74679215b880..24138b380db4 100644
--- a/include/llvm/DebugInfo/PDB/IPDBFrameData.h
+++ b/include/llvm/DebugInfo/PDB/IPDBFrameData.h
@@ -1,9 +1,8 @@
 //===- IPDBFrameData.h - base interface for frame data ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/IPDBInjectedSource.h b/include/llvm/DebugInfo/PDB/IPDBInjectedSource.h
index e75d64af92bb..d5b36f9846b5 100644
--- a/include/llvm/DebugInfo/PDB/IPDBInjectedSource.h
+++ b/include/llvm/DebugInfo/PDB/IPDBInjectedSource.h
@@ -1,16 +1,14 @@
 //===- IPDBInjectedSource.h - base class for PDB injected file --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_DEBUGINFO_PDB_IPDBINJECTEDSOURCE_H
 #define LLVM_DEBUGINFO_PDB_IPDBINJECTEDSOURCE_H
 
-#include "PDBTypes.h"
 #include "llvm/Support/raw_ostream.h"
 #include <memory>
 #include <string>
@@ -33,7 +31,10 @@ public:
   virtual std::string getFileName() const = 0;
   virtual std::string getObjectFileName() const = 0;
   virtual std::string getVirtualFileName() const = 0;
-  virtual PDB_SourceCompression getCompression() const = 0;
+  // The returned value depends on the PDB producer,
+  // but 0 is guaranteed to mean "no compression".
+  // The enum PDB_SourceCompression lists known return values.
+  virtual uint32_t getCompression() const = 0;
   virtual std::string getCode() const = 0;
 };
 } // namespace pdb
diff --git a/include/llvm/DebugInfo/PDB/IPDBLineNumber.h b/include/llvm/DebugInfo/PDB/IPDBLineNumber.h
index e20080f2fbfc..77e88999497e 100644
--- a/include/llvm/DebugInfo/PDB/IPDBLineNumber.h
+++ b/include/llvm/DebugInfo/PDB/IPDBLineNumber.h
@@ -1,9 +1,8 @@
 //===- IPDBLineNumber.h - base interface for PDB line no. info ---*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h b/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h
index 7c818d7cadeb..b24e712e3b78 100644
--- a/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h
+++ b/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h
@@ -1,9 +1,8 @@
 //===- IPDBRawSymbol.h - base interface for PDB symbol types ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/IPDBSectionContrib.h b/include/llvm/DebugInfo/PDB/IPDBSectionContrib.h
index 4fda62404672..c5cf4bbe5560 100644
--- a/include/llvm/DebugInfo/PDB/IPDBSectionContrib.h
+++ b/include/llvm/DebugInfo/PDB/IPDBSectionContrib.h
@@ -1,9 +1,8 @@
 //==- IPDBSectionContrib.h - Interfaces for PDB SectionContribs --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/IPDBSession.h b/include/llvm/DebugInfo/PDB/IPDBSession.h
index 88fd02c0a345..aa8d9c76d63e 100644
--- a/include/llvm/DebugInfo/PDB/IPDBSession.h
+++ b/include/llvm/DebugInfo/PDB/IPDBSession.h
@@ -1,9 +1,8 @@
 //===- IPDBSession.h - base interface for a PDB symbol context --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/IPDBSourceFile.h b/include/llvm/DebugInfo/PDB/IPDBSourceFile.h
index 3676c4030b13..d7e49fb70580 100644
--- a/include/llvm/DebugInfo/PDB/IPDBSourceFile.h
+++ b/include/llvm/DebugInfo/PDB/IPDBSourceFile.h
@@ -1,9 +1,8 @@
 //===- IPDBSourceFile.h - base interface for a PDB source file --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/IPDBTable.h b/include/llvm/DebugInfo/PDB/IPDBTable.h
index 4561c4e847b2..55ca230d58c4 100644
--- a/include/llvm/DebugInfo/PDB/IPDBTable.h
+++ b/include/llvm/DebugInfo/PDB/IPDBTable.h
@@ -1,9 +1,8 @@
 //===- IPDBTable.h - Base Interface for a PDB Symbol Context ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
index 9eef4041d0a1..568f0c98c559 100644
--- a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
@@ -1,9 +1,8 @@
 //===- DbiModuleDescriptor.h - PDB module information -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
index ac7f741afefa..4f5d28bbd05a 100644
--- a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
@@ -1,9 +1,8 @@
 //===- DbiModuleDescriptorBuilder.h - PDB module information ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h b/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
index 5f6e7ab92a96..14223273c898 100644
--- a/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
@@ -1,9 +1,8 @@
 //===- DbiModuleList.h - PDB module information list ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiStream.h b/include/llvm/DebugInfo/PDB/Native/DbiStream.h
index a3ca607efbef..7d75c159b7ae 100644
--- a/include/llvm/DebugInfo/PDB/Native/DbiStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiStream.h
@@ -1,9 +1,8 @@
 //===- DbiStream.h - PDB Dbi Stream (Stream 3) Access -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -11,6 +10,7 @@
 #define LLVM_DEBUGINFO_PDB_RAW_PDBDBISTREAM_H
 
 #include "llvm/DebugInfo/CodeView/DebugSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
 #include "llvm/DebugInfo/PDB/Native/DbiModuleList.h"
@@ -80,7 +80,10 @@ public:
 
   FixedStreamArray<object::coff_section> getSectionHeaders() const;
 
-  FixedStreamArray<object::FpoData> getFpoRecords();
+  bool hasOldFpoRecords() const;
+  FixedStreamArray<object::FpoData> getOldFpoRecords() const;
+  bool hasNewFpoRecords() const;
+  const codeview::DebugFrameDataSubsectionRef &getNewFpoRecords() const;
 
   FixedStreamArray<SecMapEntry> getSectionMap() const;
   void visitSectionContributions(ISectionContribVisitor &Visitor) const;
@@ -91,7 +94,11 @@ private:
   Error initializeSectionContributionData();
   Error initializeSectionHeadersData(PDBFile *Pdb);
   Error initializeSectionMapData();
-  Error initializeFpoRecords(PDBFile *Pdb);
+  Error initializeOldFpoRecords(PDBFile *Pdb);
+  Error initializeNewFpoRecords(PDBFile *Pdb);
+
+  Expected<std::unique_ptr<msf::MappedBlockStream>>
+  createIndexedStreamForHeaderType(PDBFile *Pdb, DbgHeaderType Type) const;
 
   std::unique_ptr<BinaryStream> Stream;
 
@@ -117,8 +124,11 @@ private:
   std::unique_ptr<msf::MappedBlockStream> SectionHeaderStream;
   FixedStreamArray<object::coff_section> SectionHeaders;
 
-  std::unique_ptr<msf::MappedBlockStream> FpoStream;
-  FixedStreamArray<object::FpoData> FpoRecords;
+  std::unique_ptr<msf::MappedBlockStream> OldFpoStream;
+  FixedStreamArray<object::FpoData> OldFpoRecords;
+  
+  std::unique_ptr<msf::MappedBlockStream> NewFpoStream;
+  codeview::DebugFrameDataSubsectionRef NewFpoRecords;
 
   const DbiStreamHeader *Header;
 };
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h b/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h
index b538de576677..d9be238af07b 100644
--- a/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h
@@ -1,9 +1,8 @@
 //===- DbiStreamBuilder.h - PDB Dbi Stream Creation -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/EnumTables.h b/include/llvm/DebugInfo/PDB/Native/EnumTables.h
index c018445630fe..70161fadf7d2 100644
--- a/include/llvm/DebugInfo/PDB/Native/EnumTables.h
+++ b/include/llvm/DebugInfo/PDB/Native/EnumTables.h
@@ -1,9 +1,8 @@
 //===- EnumTables.h - Enum to string conversion tables ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/Formatters.h b/include/llvm/DebugInfo/PDB/Native/Formatters.h
index 7d5eab2e2a09..29c957eeb5e0 100644
--- a/include/llvm/DebugInfo/PDB/Native/Formatters.h
+++ b/include/llvm/DebugInfo/PDB/Native/Formatters.h
@@ -1,9 +1,8 @@
 //===- Formatters.h ---------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h b/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h
index 4c39ca762b5b..a49795600028 100644
--- a/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h
@@ -1,9 +1,8 @@
 //===- GSIStreamBuilder.h - PDB Publics/Globals Stream Creation -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h b/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h
index 7f84564ee988..404baaa25077 100644
--- a/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h
@@ -1,9 +1,8 @@
 //===- GlobalsStream.h - PDB Index of Symbols by Name -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/Hash.h b/include/llvm/DebugInfo/PDB/Native/Hash.h
index 1f11d43ecdd4..b048d878a12c 100644
--- a/include/llvm/DebugInfo/PDB/Native/Hash.h
+++ b/include/llvm/DebugInfo/PDB/Native/Hash.h
@@ -1,9 +1,8 @@
 //===- Hash.h - PDB hash functions ------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/HashTable.h b/include/llvm/DebugInfo/PDB/Native/HashTable.h
index 34cc6179688b..aa38417bcf4c 100644
--- a/include/llvm/DebugInfo/PDB/Native/HashTable.h
+++ b/include/llvm/DebugInfo/PDB/Native/HashTable.h
@@ -1,9 +1,8 @@
 //===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -32,21 +31,21 @@ namespace pdb {
 Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
 Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
 
-template <typename ValueT, typename TraitsT> class HashTable;
+template <typename ValueT> class HashTable;
 
-template <typename ValueT, typename TraitsT>
+template <typename ValueT>
 class HashTableIterator
-    : public iterator_facade_base<HashTableIterator<ValueT, TraitsT>,
+    : public iterator_facade_base<HashTableIterator<ValueT>,
                                   std::forward_iterator_tag,
-                                  std::pair<uint32_t, ValueT>> {
-  friend HashTable<ValueT, TraitsT>;
+                                  const std::pair<uint32_t, ValueT>> {
+  friend HashTable<ValueT>;
 
-  HashTableIterator(const HashTable<ValueT, TraitsT> &Map, uint32_t Index,
+  HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index,
                     bool IsEnd)
       : Map(&Map), Index(Index), IsEnd(IsEnd) {}
 
 public:
-  HashTableIterator(const HashTable<ValueT, TraitsT> &Map) : Map(&Map) {
+  HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) {
     int I = Map.Present.find_first();
     if (I == -1) {
       Index = 0;
@@ -73,6 +72,12 @@ public:
     assert(Map->Present.test(Index));
     return Map->Buckets[Index];
   }
+
+  // Implement postfix op++ in terms of prefix op++ by using the superclass
+  // implementation.
+  using iterator_facade_base<HashTableIterator<ValueT>,
+                             std::forward_iterator_tag,
+                             const std::pair<uint32_t, ValueT>>::operator++;
   HashTableIterator &operator++() {
     while (Index < Map->Buckets.size()) {
       ++Index;
@@ -88,24 +93,13 @@ private:
   bool isEnd() const { return IsEnd; }
   uint32_t index() const { return Index; }
 
-  const HashTable<ValueT, TraitsT> *Map;
+  const HashTable<ValueT> *Map;
   uint32_t Index;
   bool IsEnd;
 };
 
-template <typename T> struct PdbHashTraits {};
-
-template <> struct PdbHashTraits<uint32_t> {
-  uint32_t hashLookupKey(uint32_t N) const { return N; }
-  uint32_t storageKeyToLookupKey(uint32_t N) const { return N; }
-  uint32_t lookupKeyToStorageKey(uint32_t N) { return N; }
-};
-
-template <typename ValueT, typename TraitsT = PdbHashTraits<ValueT>>
+template <typename ValueT>
 class HashTable {
-  using iterator = HashTableIterator<ValueT, TraitsT>;
-  friend iterator;
-
   struct Header {
     support::ulittle32_t Size;
     support::ulittle32_t Capacity;
@@ -114,10 +108,11 @@ class HashTable {
   using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
 
 public:
-  HashTable() { Buckets.resize(8); }
+  using const_iterator = HashTableIterator<ValueT>;
+  friend const_iterator;
 
-  explicit HashTable(TraitsT Traits) : HashTable(8, std::move(Traits)) {}
-  HashTable(uint32_t Capacity, TraitsT Traits) : Traits(Traits) {
+  HashTable() { Buckets.resize(8); }
+  explicit HashTable(uint32_t Capacity) {
     Buckets.resize(Capacity);
   }
 
@@ -144,7 +139,7 @@ public:
       return EC;
     if (Present.intersects(Deleted))
       return make_error<RawError>(raw_error_code::corrupt_file,
-                                  "Present bit vector interesects deleted!");
+                                  "Present bit vector intersects deleted!");
 
     for (uint32_t P : Present) {
       if (auto EC = Stream.readInteger(Buckets[P].first))
@@ -217,19 +212,20 @@ public:
   uint32_t capacity() const { return Buckets.size(); }
   uint32_t size() const { return Present.count(); }
 
-  iterator begin() const { return iterator(*this); }
-  iterator end() const { return iterator(*this, 0, true); }
+  const_iterator begin() const { return const_iterator(*this); }
+  const_iterator end() const { return const_iterator(*this, 0, true); }
 
   /// Find the entry whose key has the specified hash value, using the specified
   /// traits defining hash function and equality.
-  template <typename Key> iterator find_as(const Key &K) const {
+  template <typename Key, typename TraitsT>
+  const_iterator find_as(const Key &K, TraitsT &Traits) const {
     uint32_t H = Traits.hashLookupKey(K) % capacity();
     uint32_t I = H;
     Optional<uint32_t> FirstUnused;
     do {
       if (isPresent(I)) {
         if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
-          return iterator(*this, I, false);
+          return const_iterator(*this, I, false);
       } else {
         if (!FirstUnused)
           FirstUnused = I;
@@ -248,17 +244,19 @@ public:
     // table were Present.  But this would violate the load factor constraints
     // that we impose, so it should never happen.
     assert(FirstUnused);
-    return iterator(*this, *FirstUnused, true);
+    return const_iterator(*this, *FirstUnused, true);
   }
 
   /// Set the entry using a key type that the specified Traits can convert
   /// from a real key to an internal key.
-  template <typename Key> bool set_as(const Key &K, ValueT V) {
-    return set_as_internal(K, std::move(V), None);
+  template <typename Key, typename TraitsT>
+  bool set_as(const Key &K, ValueT V, TraitsT &Traits) {
+    return set_as_internal(K, std::move(V), Traits, None);
   }
 
-  template <typename Key> ValueT get(const Key &K) const {
-    auto Iter = find_as(K);
+  template <typename Key, typename TraitsT>
+  ValueT get(const Key &K, TraitsT &Traits) const {
+    auto Iter = find_as(K, Traits);
     assert(Iter != end());
     return (*Iter).second;
   }
@@ -267,7 +265,6 @@ protected:
   bool isPresent(uint32_t K) const { return Present.test(K); }
   bool isDeleted(uint32_t K) const { return Deleted.test(K); }
 
-  TraitsT Traits;
   BucketList Buckets;
   mutable SparseBitVector<> Present;
   mutable SparseBitVector<> Deleted;
@@ -275,9 +272,10 @@ protected:
 private:
   /// Set the entry using a key type that the specified Traits can convert
   /// from a real key to an internal key.
-  template <typename Key>
-  bool set_as_internal(const Key &K, ValueT V, Optional<uint32_t> InternalKey) {
-    auto Entry = find_as(K);
+  template <typename Key, typename TraitsT>
+  bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits,
+                       Optional<uint32_t> InternalKey) {
+    auto Entry = find_as(K, Traits);
     if (Entry != end()) {
       assert(isPresent(Entry.index()));
       assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
@@ -294,15 +292,16 @@ private:
     Present.set(Entry.index());
     Deleted.reset(Entry.index());
 
-    grow();
+    grow(Traits);
 
-    assert((find_as(K)) != end());
+    assert((find_as(K, Traits)) != end());
     return true;
   }
 
   static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
 
-  void grow() {
+  template <typename TraitsT>
+  void grow(TraitsT &Traits) {
     uint32_t S = size();
     uint32_t MaxLoad = maxLoad(capacity());
     if (S < maxLoad(capacity()))
@@ -314,10 +313,11 @@ private:
     // Growing requires rebuilding the table and re-hashing every item.  Make a
     // copy with a larger capacity, insert everything into the copy, then swap
     // it in.
-    HashTable NewMap(NewCapacity, Traits);
+    HashTable NewMap(NewCapacity);
     for (auto I : Present) {
       auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
-      NewMap.set_as_internal(LookupKey, Buckets[I].second, Buckets[I].first);
+      NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits,
+                             Buckets[I].first);
     }
 
     Buckets.swap(NewMap.Buckets);
diff --git a/include/llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h b/include/llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h
index fb00d6ad4bc7..717dce2f2737 100644
--- a/include/llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h
+++ b/include/llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h
@@ -1,9 +1,8 @@
 //===- ISectionContribVisitor.h ---------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/InfoStream.h b/include/llvm/DebugInfo/PDB/Native/InfoStream.h
index 8c52b042f289..315b09356ae3 100644
--- a/include/llvm/DebugInfo/PDB/Native/InfoStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/InfoStream.h
@@ -1,9 +1,8 @@
 //===- InfoStream.h - PDB Info Stream (Stream 1) Access ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h b/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h
index 101127a355f5..208a37c45d49 100644
--- a/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h
@@ -1,9 +1,8 @@
 //===- InfoStreamBuilder.h - PDB Info Stream Creation -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h b/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h
new file mode 100644
index 000000000000..d0cac3749bca
--- /dev/null
+++ b/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h
@@ -0,0 +1,44 @@
+//===- InjectedSourceStream.h - PDB Headerblock Stream Access ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBINJECTEDSOURCESTREAM_H
+#define LLVM_DEBUGINFO_PDB_RAW_PDBINJECTEDSOURCESTREAM_H
+
+#include "llvm/DebugInfo/PDB/Native/HashTable.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace msf {
+class MappedBlockStream;
+}
+namespace pdb {
+class PDBFile;
+class PDBStringTable;
+
+class InjectedSourceStream {
+public:
+  InjectedSourceStream(std::unique_ptr<msf::MappedBlockStream> Stream);
+  Error reload(const PDBStringTable &Strings);
+
+  using const_iterator = HashTable<SrcHeaderBlockEntry>::const_iterator;
+  const_iterator begin() const { return InjectedSourceTable.begin(); }
+  const_iterator end() const { return InjectedSourceTable.end(); }
+
+  uint32_t size() const { return InjectedSourceTable.size(); }
+
+private:
+  std::unique_ptr<msf::MappedBlockStream> Stream;
+
+  const SrcHeaderBlockHeader* Header;
+  HashTable<SrcHeaderBlockEntry> InjectedSourceTable;
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
index 8d590df288f3..cb1ffc729512 100644
--- a/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
@@ -1,9 +1,8 @@
 //===- ModuleDebugStream.h - PDB Module Info Stream Access ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -69,6 +68,8 @@ public:
   findChecksumsSubsection() const;
 
 private:
+  Error reloadSerialize(BinaryStreamReader &Reader);
+
   DbiModuleDescriptor Mod;
 
   uint32_t Signature;
diff --git a/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h b/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
index 01b8f1b5da56..1df059ffa9fd 100644
--- a/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
+++ b/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
@@ -1,9 +1,8 @@
 //===- NamedStreamMap.h - PDB Named Stream Map ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -60,7 +59,7 @@ private:
   NamedStreamMapTraits HashTraits;
   /// Closed hash table from Offset -> StreamNumber, where Offset is the offset
   /// of the stream name in NamesBuffer.
-  HashTable<support::ulittle32_t, NamedStreamMapTraits> OffsetIndexMap;
+  HashTable<support::ulittle32_t> OffsetIndexMap;
 
   /// Buffer of string data.
   std::vector<char> NamesBuffer;
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h b/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h
index 3cd465503044..50d437642d0f 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h
@@ -1,9 +1,8 @@
 //===- NativeCompilandSymbol.h - native impl for compiland syms -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h b/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h
index 4442a1ec41fb..073878afd129 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h
@@ -1,9 +1,8 @@
 //==- NativeEnumGlobals.h - Native Global Enumerator impl --------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h b/include/llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h
new file mode 100644
index 000000000000..ca1e22bd82a2
--- /dev/null
+++ b/include/llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h
@@ -0,0 +1,43 @@
+//==- NativeEnumInjectedSources.cpp - Native Injected Source Enumerator --*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMINJECTEDSOURCES_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMINJECTEDSOURCES_H
+
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/IPDBInjectedSource.h"
+#include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"
+
+namespace llvm {
+namespace pdb {
+
+class InjectedSourceStream;
+class PDBStringTable;
+
+class NativeEnumInjectedSources : public IPDBEnumChildren<IPDBInjectedSource> {
+public:
+  NativeEnumInjectedSources(PDBFile &File, const InjectedSourceStream &IJS,
+                            const PDBStringTable &Strings);
+
+  uint32_t getChildCount() const override;
+  std::unique_ptr<IPDBInjectedSource>
+  getChildAtIndex(uint32_t Index) const override;
+  std::unique_ptr<IPDBInjectedSource> getNext() override;
+  void reset() override;
+
+private:
+  PDBFile &File;
+  const InjectedSourceStream &Stream;
+  const PDBStringTable &Strings;
+  InjectedSourceStream::const_iterator Cur;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h b/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h
index c268641a1008..94f1ee18ed9f 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h
@@ -1,9 +1,8 @@
 //==- NativeEnumModules.h - Native Module Enumerator impl --------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h b/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h
index f8ac1655dc61..25c56567384f 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h
@@ -1,9 +1,8 @@
 //==- NativeEnumTypes.h - Native Type Enumerator impl ------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h b/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h
index f4030da1d026..280358d02305 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h
@@ -1,9 +1,8 @@
 //===- NativeExeSymbol.h - native impl for PDBSymbolExe ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h b/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h
index 6505a7d39573..4133be220713 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h
@@ -1,9 +1,8 @@
 //==- NativeRawSymbol.h - Native implementation of IPDBRawSymbol -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeSession.h b/include/llvm/DebugInfo/PDB/Native/NativeSession.h
index 4878e47d3121..ee7d8cdec93b 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeSession.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeSession.h
@@ -1,9 +1,8 @@
 //===- NativeSession.h - Native implementation of IPDBSession ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h b/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h
index acc5eb8ff2c2..063585097899 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h
@@ -1,9 +1,8 @@
 //===- NativeSymbolEnumerator.h - info about enumerator values --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeTypeArray.h b/include/llvm/DebugInfo/PDB/Native/NativeTypeArray.h
index 10e68e6df450..262864fd709f 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeTypeArray.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeTypeArray.h
@@ -1,9 +1,8 @@
 //===- NativeTypeArray.h ------------------------------------------ C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h b/include/llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h
index 725dfb89222f..8bb09f05d0bc 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h
@@ -1,9 +1,8 @@
 //===- NativeTypeBuiltin.h ---------------------------------------- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h b/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h
index a5cbefc18111..2068c88fc74a 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h
@@ -1,9 +1,8 @@
 //===- NativeTypeEnum.h - info about enum type ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h b/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
index 1b1b87f6581f..a7ea287dffc8 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
@@ -1,9 +1,8 @@
 //===- NativeTypeFunctionSig.h - info about function signature ---*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h b/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h
index bcb7431fecf1..446f77db0f6c 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h
@@ -1,9 +1,8 @@
 //===- NativeTypePointer.h - info about pointer type -------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h b/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h
index 06eb6fcf3764..fe8a6f7f2bda 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h
@@ -1,9 +1,8 @@
 //===- NativeTypeTypedef.h - info about typedef ------------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h b/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h
index 84821d8731be..8f4dee3e658c 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h
@@ -1,9 +1,8 @@
 //===- NativeTypeUDT.h - info about class/struct type ------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h b/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
index a996f34ef859..4ec0f9bf6b3d 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
@@ -1,9 +1,8 @@
 //===- NativeTypeVTShape.h - info about virtual table shape ------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/PDBFile.h b/include/llvm/DebugInfo/PDB/Native/PDBFile.h
index 5e39ac3e37b7..56de4030167d 100644
--- a/include/llvm/DebugInfo/PDB/Native/PDBFile.h
+++ b/include/llvm/DebugInfo/PDB/Native/PDBFile.h
@@ -1,9 +1,8 @@
 //===- PDBFile.h - Low level interface to a PDB file ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -33,6 +32,7 @@ namespace pdb {
 class DbiStream;
 class GlobalsStream;
 class InfoStream;
+class InjectedSourceStream;
 class PDBStringTable;
 class PDBFileBuilder;
 class PublicsStream;
@@ -84,7 +84,12 @@ public:
 
   ArrayRef<support::ulittle32_t> getDirectoryBlockArray() const;
 
-  std::unique_ptr<msf::MappedBlockStream> createIndexedStream(uint16_t SN);
+  std::unique_ptr<msf::MappedBlockStream>
+  createIndexedStream(uint16_t SN) const;
+  Expected<std::unique_ptr<msf::MappedBlockStream>>
+  safelyCreateIndexedStream(uint32_t StreamIndex) const;
+  Expected<std::unique_ptr<msf::MappedBlockStream>>
+  safelyCreateNamedStream(StringRef Name);
 
   msf::MSFStreamLayout getStreamLayout(uint32_t StreamIdx) const;
   msf::MSFStreamLayout getFpmStreamLayout() const;
@@ -100,6 +105,7 @@ public:
   Expected<PublicsStream &> getPDBPublicsStream();
   Expected<SymbolStream &> getPDBSymbolStream();
   Expected<PDBStringTable &> getStringTable();
+  Expected<InjectedSourceStream &> getInjectedSourceStream();
 
   BumpPtrAllocator &getAllocator() { return Allocator; }
 
@@ -111,15 +117,11 @@ public:
   bool hasPDBSymbolStream();
   bool hasPDBTpiStream() const;
   bool hasPDBStringTable();
+  bool hasPDBInjectedSourceStream();
 
   uint32_t getPointerSize();
 
 private:
-  Expected<std::unique_ptr<msf::MappedBlockStream>>
-  safelyCreateIndexedStream(const msf::MSFLayout &Layout,
-                            BinaryStreamRef MsfData,
-                            uint32_t StreamIndex) const;
-
   std::string FilePath;
   BumpPtrAllocator &Allocator;
 
@@ -136,6 +138,7 @@ private:
   std::unique_ptr<SymbolStream> Symbols;
   std::unique_ptr<msf::MappedBlockStream> DirectoryStream;
   std::unique_ptr<msf::MappedBlockStream> StringTableStream;
+  std::unique_ptr<InjectedSourceStream> InjectedSources;
   std::unique_ptr<PDBStringTable> Strings;
 };
 }
diff --git a/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h b/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
index 37458749a8d8..2abaa5f4cdc4 100644
--- a/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
@@ -1,9 +1,8 @@
 //===- PDBFileBuilder.h - PDB File Creation ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -98,7 +97,7 @@ private:
 
   PDBStringTableBuilder Strings;
   StringTableHashTraits InjectedSourceHashTraits;
-  HashTable<SrcHeaderBlockEntry, StringTableHashTraits> InjectedSourceTable;
+  HashTable<SrcHeaderBlockEntry> InjectedSourceTable;
 
   SmallVector<InjectedSourceDescriptor, 2> InjectedSources;
 
diff --git a/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h b/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h
index 29167c966d42..57f0b64a32a6 100644
--- a/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h
+++ b/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h
@@ -1,9 +1,8 @@
 //===- PDBStringTable.h - PDB String Table -----------------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h b/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h
index 0f81c18eafe6..57267ef5c6c5 100644
--- a/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h
@@ -1,9 +1,8 @@
 //===- PDBStringTableBuilder.h - PDB String Table Builder -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/DebugInfo/PDB/Native/PublicsStream.h b/include/llvm/DebugInfo/PDB/Native/PublicsStream.h
index 2d0222a9071a..ee28d108df8b 100644
--- a/include/llvm/DebugInfo/PDB/Native/PublicsStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/PublicsStream.h
@@ -1,9 +1,8 @@
 //===- PublicsStream.h - PDB Public Symbol Stream -------- ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/RawConstants.h b/include/llvm/DebugInfo/PDB/Native/RawConstants.h
index fbbd3318d958..0dde5ef66932 100644
--- a/include/llvm/DebugInfo/PDB/Native/RawConstants.h
+++ b/include/llvm/DebugInfo/PDB/Native/RawConstants.h
@@ -1,9 +1,8 @@
 //===- RawConstants.h -------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/RawError.h b/include/llvm/DebugInfo/PDB/Native/RawError.h
index 97d11b4f20d1..aadb64c2e3f1 100644
--- a/include/llvm/DebugInfo/PDB/Native/RawError.h
+++ b/include/llvm/DebugInfo/PDB/Native/RawError.h
@@ -1,9 +1,8 @@
 //===- RawError.h - Error extensions for raw PDB implementation -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/RawTypes.h b/include/llvm/DebugInfo/PDB/Native/RawTypes.h
index 8f6d6611c032..6119e6e5db26 100644
--- a/include/llvm/DebugInfo/PDB/Native/RawTypes.h
+++ b/include/llvm/DebugInfo/PDB/Native/RawTypes.h
@@ -1,9 +1,8 @@
 //===- RawTypes.h -----------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -177,7 +176,7 @@ struct DbiStreamHeader {
 };
 static_assert(sizeof(DbiStreamHeader) == 64, "Invalid DbiStreamHeader size!");
 
-/// The header preceeding the File Info Substream of the DBI stream.
+/// The header preceding the File Info Substream of the DBI stream.
 struct FileInfoSubstreamHeader {
   /// Total # of modules, should match number of records in the ModuleInfo
   /// substream.
@@ -208,7 +207,7 @@ struct ModInfoFlags {
   static const uint16_t TypeServerIndexShift = 8;
 };
 
-/// The header preceeding each entry in the Module Info substream of the DBI
+/// The header preceding each entry in the Module Info substream of the DBI
 /// stream.  Corresponds to the type MODI in the reference implementation.
 struct ModuleInfoHeader {
   /// Currently opened module. This field is a pointer in the reference
@@ -273,7 +272,7 @@ struct PublicsStreamHeader {
   support::ulittle32_t NumSections;
 };
 
-// The header preceeding the global TPI stream.
+// The header preceding the global TPI stream.
 // This corresponds to `HDR` in PDB/dbi/tpi.h.
 struct TpiStreamHeader {
   struct EmbeddedBuf {
@@ -301,7 +300,7 @@ struct TpiStreamHeader {
 const uint32_t MinTpiHashBuckets = 0x1000;
 const uint32_t MaxTpiHashBuckets = 0x40000;
 
-/// The header preceeding the global PDB Stream (Stream 1)
+/// The header preceding the global PDB Stream (Stream 1)
 struct InfoStreamHeader {
   support::ulittle32_t Version;
   support::ulittle32_t Signature;
@@ -309,7 +308,7 @@ struct InfoStreamHeader {
   codeview::GUID Guid;
 };
 
-/// The header preceeding the /names stream.
+/// The header preceding the /names stream.
 struct PDBStringTableHeader {
   support::ulittle32_t Signature;   // PDBStringTableSignature
   support::ulittle32_t HashVersion; // 1 or 2
@@ -342,7 +341,6 @@ struct SrcHeaderBlockEntry {
   short Padding;                 // Pad to 4 bytes.
   char Reserved[8];
 };
-
 static_assert(sizeof(SrcHeaderBlockEntry) == 40, "Incorrect struct size!");
 
 } // namespace pdb
diff --git a/include/llvm/DebugInfo/PDB/Native/SymbolCache.h b/include/llvm/DebugInfo/PDB/Native/SymbolCache.h
index 08e1d41e6ee9..0b15ab474f71 100644
--- a/include/llvm/DebugInfo/PDB/Native/SymbolCache.h
+++ b/include/llvm/DebugInfo/PDB/Native/SymbolCache.h
@@ -1,9 +1,8 @@
 //==- SymbolCache.h - Cache of native symbols and ids ------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/SymbolStream.h b/include/llvm/DebugInfo/PDB/Native/SymbolStream.h
index ae9f7d657b70..4fe1bd9734e4 100644
--- a/include/llvm/DebugInfo/PDB/Native/SymbolStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/SymbolStream.h
@@ -1,9 +1,8 @@
 //===- SymbolStream.cpp - PDB Symbol Stream Access --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/TpiHashing.h b/include/llvm/DebugInfo/PDB/Native/TpiHashing.h
index c2996ccf1825..4ac60a80e701 100644
--- a/include/llvm/DebugInfo/PDB/Native/TpiHashing.h
+++ b/include/llvm/DebugInfo/PDB/Native/TpiHashing.h
@@ -1,9 +1,8 @@
 //===- TpiHashing.h ---------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/TpiStream.h b/include/llvm/DebugInfo/PDB/Native/TpiStream.h
index b76576a7a263..1b7fd2d54cb2 100644
--- a/include/llvm/DebugInfo/PDB/Native/TpiStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/TpiStream.h
@@ -1,9 +1,8 @@
 //===- TpiStream.cpp - PDB Type Info (TPI) Stream 2 Access ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h b/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h
index 411720d6f56b..72d98e9c2c4d 100644
--- a/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h
@@ -1,9 +1,8 @@
 //===- TpiStreamBuilder.h - PDB Tpi Stream Creation -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDB.h b/include/llvm/DebugInfo/PDB/PDB.h
index 9f9da39ca6cc..6d734dc2f243 100644
--- a/include/llvm/DebugInfo/PDB/PDB.h
+++ b/include/llvm/DebugInfo/PDB/PDB.h
@@ -1,9 +1,8 @@
 //===- PDB.h - base header file for creating a PDB reader -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBContext.h b/include/llvm/DebugInfo/PDB/PDBContext.h
index 0ce49f5ef922..7b6793f0a639 100644
--- a/include/llvm/DebugInfo/PDB/PDBContext.h
+++ b/include/llvm/DebugInfo/PDB/PDBContext.h
@@ -1,9 +1,8 @@
 //===-- PDBContext.h --------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===/
 
@@ -44,15 +43,18 @@ namespace pdb {
     void dump(raw_ostream &OS, DIDumpOptions DIDumpOpts) override;
 
     DILineInfo getLineInfoForAddress(
-        uint64_t Address,
+        object::SectionedAddress Address,
         DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
     DILineInfoTable getLineInfoForAddressRange(
-        uint64_t Address, uint64_t Size,
+        object::SectionedAddress Address, uint64_t Size,
         DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
     DIInliningInfo getInliningInfoForAddress(
-        uint64_t Address,
+        object::SectionedAddress Address,
         DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
 
+    std::vector<DILocal>
+    getLocalsForAddress(object::SectionedAddress Address) override;
+
   private:
     std::string getFunctionName(uint64_t Address, DINameKind NameKind) const;
     std::unique_ptr<IPDBSession> Session;
diff --git a/include/llvm/DebugInfo/PDB/PDBExtras.h b/include/llvm/DebugInfo/PDB/PDBExtras.h
index aaec71aa8c90..45aba013e7c8 100644
--- a/include/llvm/DebugInfo/PDB/PDBExtras.h
+++ b/include/llvm/DebugInfo/PDB/PDBExtras.h
@@ -1,9 +1,8 @@
 //===- PDBExtras.h - helper functions and classes for PDBs ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -28,7 +27,8 @@ raw_ostream &operator<<(raw_ostream &OS, const PDB_VariantType &Value);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_CallingConv &Conv);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_BuiltinType &Type);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_DataKind &Data);
-raw_ostream &operator<<(raw_ostream &OS, const codeview::RegisterId &Reg);
+raw_ostream &operator<<(raw_ostream &OS,
+                        const llvm::codeview::CPURegister &CpuReg);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_LocType &Loc);
 raw_ostream &operator<<(raw_ostream &OS, const codeview::ThunkOrdinal &Thunk);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_Checksum &Checksum);
@@ -37,13 +37,12 @@ raw_ostream &operator<<(raw_ostream &OS, const PDB_SymType &Tag);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_MemberAccess &Access);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_UdtType &Type);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_Machine &Machine);
-raw_ostream &operator<<(raw_ostream &OS,
-                        const PDB_SourceCompression &Compression);
 
 raw_ostream &operator<<(raw_ostream &OS, const Variant &Value);
 raw_ostream &operator<<(raw_ostream &OS, const VersionInfo &Version);
 raw_ostream &operator<<(raw_ostream &OS, const TagStats &Stats);
 
+raw_ostream& dumpPDBSourceCompression(raw_ostream& OS, uint32_t Compression);
 
 template <typename T>
 void dumpSymbolField(raw_ostream &OS, StringRef Name, T Value, int Indent) {
diff --git a/include/llvm/DebugInfo/PDB/PDBSymDumper.h b/include/llvm/DebugInfo/PDB/PDBSymDumper.h
index c976935c48e0..f81b15f2353d 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymDumper.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymDumper.h
@@ -1,9 +1,8 @@
 //===- PDBSymDumper.h - base interface for PDB symbol dumper *- C++ -----*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbol.h b/include/llvm/DebugInfo/PDB/PDBSymbol.h
index 3a74f7c3aace..d9004a8894d9 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbol.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbol.h
@@ -1,9 +1,8 @@
 //===- PDBSymbol.h - base class for user-facing symbol types -----*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h b/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
index ef00df15cb0a..c76466a97b66 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolAnnotation.h - Accessors for querying PDB annotations ---*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_DEBUGINFO_PDB_PDBSYMBOLANNOTATION_H
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h b/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
index 2cf9c72a8886..cf471450d989 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolBlock.h - Accessors for querying PDB blocks -------------*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_DEBUGINFO_PDB_PDBSYMBOLBLOCK_H
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h b/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h
index 04dbd962ebd4..ca8b39d03f86 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolCompiland.h - Accessors for querying PDB compilands -----*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_DEBUGINFO_PDB_PDBSYMBOLCOMPILAND_H
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h b/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h
index 3d651a464d94..b82bb6c0a352 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolCompilandDetails.h - PDB compiland details ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h b/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h
index ffc408314d9a..61607a03593d 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolCompilandEnv.h - compiland environment variables *- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h b/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h
index c29e4c31d3f3..75a86411643a 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolCustom.h - compiler-specific types --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolData.h b/include/llvm/DebugInfo/PDB/PDBSymbolData.h
index 217e1e976e6b..7e9b69d7cf4b 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolData.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolData.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolData.h - PDB data (e.g. variable) accessors -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolExe.h b/include/llvm/DebugInfo/PDB/PDBSymbolExe.h
index 366d0cf4777f..1a9fb240a248 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolExe.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolExe.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolExe.h - Accessors for querying executables in a PDB ----*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h b/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h
index 129e557c7f25..6be27c8d3bc7 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolFunc.h - class representing a function instance -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h b/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h
index 18db8a50fd1b..7152249cbd03 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolFuncDebugEnd.h - function end bounds info -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h b/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h
index 83d82f0cbcc5..3125c271d2e8 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolFuncDebugStart.h - function start bounds info ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h b/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h
index 8b2617fcd757..3625e23f014f 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolLabel.h - label info ----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h b/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h
index 9def3edb469a..e2b2545d78ec 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolPublicSymbol.h - public symbol info -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h b/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h
index 7bb0555362db..274de8b0b16f 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolThunk.h - Support for querying PDB thunks ---------------*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h
index 488f668bdc10..c0215c9ee4b1 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeArray.h - array type information ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h
index 550deedd7504..bab292ee0d46 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeBaseClass.h - base class type information ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h
index e07e88802b8f..7d94c3c97a2b 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeBuiltin.h - builtin type information --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h
index 0d8979c9c5c5..dc647aff48d3 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeCustom.h - custom compiler type information -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h
index 58292a63501f..7a9e43785d67 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeDimension.h - array dimension type info -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h
index f463047bb5b5..3ac72801b202 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeEnum.h - enum type info ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h
index 5b940b0737af..c4d9dd6308a3 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeFriend.h - friend type info -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h
index 074cb418fc82..22d3623496f2 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeFunctionArg.h - function arg type info ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h
index dfdf436197c3..a1491ca2e415 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeFunctionSig.h - function signature type info *- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h
index d716abd640c6..6bc70bca82e7 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeManaged.h - managed type info ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h
index 300d6722fc4d..b36f459e880c 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypePointer.h - pointer type info ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h
index d6e2a36486d5..2712d0617e0e 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeTypedef.h - typedef type info ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h
index 937dd6c87221..3e73ad7ac85a 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeUDT.h - UDT type info -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h
index 6efce4bbd686..e8161d311ea7 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeVTable.h - VTable type info -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h
index 8949052b0c0f..614060867042 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeVTableShape.h - VTable shape info -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h b/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h
index e935ac6ce0dc..cc29d38c2578 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolUnknown.h - unknown symbol type -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h b/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h
index 4e8c99fc8d89..fd812cb2f793 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h
@@ -1,9 +1,8 @@
 //===- PDBSymbolUsingNamespace.h - using namespace info ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/PDB/PDBTypes.h b/include/llvm/DebugInfo/PDB/PDBTypes.h
index 917f3ed73910..c26d8d1ed10c 100644
--- a/include/llvm/DebugInfo/PDB/PDBTypes.h
+++ b/include/llvm/DebugInfo/PDB/PDBTypes.h
@@ -1,9 +1,8 @@
 //===- PDBTypes.h - Defines enums for various fields contained in PDB ----====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -127,6 +126,7 @@ enum class PDB_Machine {
   Am33 = 0x13,
   Amd64 = 0x8664,
   Arm = 0x1C0,
+  Arm64 = 0xaa64,
   ArmNT = 0x1C4,
   Ebc = 0xEBC,
   x86 = 0x14C,
@@ -146,11 +146,69 @@ enum class PDB_Machine {
   WceMipsV2 = 0x169
 };
 
-enum class PDB_SourceCompression {
-  None,
-  RunLengthEncoded,
-  Huffman,
-  LZ,
+// A struct with an inner unnamed enum with explicit underlying type resuls
+// in an enum class that can implicitly convert to the underlying type, which
+// is convenient for this enum.
+struct PDB_SourceCompression {
+  enum : uint32_t {
+    // No compression. Produced e.g. by `link.exe /natvis:foo.natvis`.
+    None,
+    // Not known what produces this.
+    RunLengthEncoded,
+    // Not known what produces this.
+    Huffman,
+    // Not known what produces this.
+    LZ,
+    // Produced e.g. by `csc /debug`. The encoded data is its own mini-stream
+    // with the following layout (in little endian):
+    //   GUID LanguageTypeGuid;
+    //   GUID LanguageVendorGuid;
+    //   GUID DocumentTypeGuid;
+    //   GUID HashFunctionGuid;
+    //   uint32_t HashDataSize;
+    //   uint32_t CompressedDataSize;
+    // Followed by HashDataSize bytes containing a hash checksum,
+    // followed by CompressedDataSize bytes containing source contents.
+    //
+    // CompressedDataSize can be 0, in this case only the hash data is present.
+    // (CompressedDataSize is != 0 e.g. if `/embed` is passed to csc.exe.)
+    // The compressed data format is:
+    //   uint32_t UncompressedDataSize;
+    // If UncompressedDataSize is 0, the data is stored uncompressed and
+    // CompressedDataSize stores the uncompressed size.
+    // If UncompressedDataSize is != 0, then the data is in raw deflate
+    // encoding as described in rfc1951.
+    //
+    // A GUID is 16 bytes, stored in the usual
+    //   uint32_t
+    //   uint16_t
+    //   uint16_t
+    //   uint8_t[24]
+    // layout.
+    //
+    // Well-known GUIDs for LanguageTypeGuid are:
+    //   63a08714-fc37-11d2-904c-00c04fa302a1 C
+    //   3a12d0b7-c26c-11d0-b442-00a0244a1dd2 C++
+    //   3f5162f8-07c6-11d3-9053-00c04fa302a1 C#
+    //   af046cd1-d0e1-11d2-977c-00a0c9b4d50c Cobol
+    //   ab4f38c9-b6e6-43ba-be3b-58080b2ccce3 F#
+    //   3a12d0b4-c26c-11d0-b442-00a0244a1dd2 Java
+    //   3a12d0b6-c26c-11d0-b442-00a0244a1dd2 JScript
+    //   af046cd2-d0e1-11d2-977c-00a0c9b4d50c Pascal
+    //   3a12d0b8-c26c-11d0-b442-00a0244a1dd2 Visual Basic
+    //
+    // Well-known GUIDs for LanguageVendorGuid are:
+    //   994b45c4-e6e9-11d2-903f-00c04fa302a1 Microsoft
+    //
+    // Well-known GUIDs for DocumentTypeGuid are:
+    //   5a869d0b-6611-11d3-bd2a-0000f80849bd Text
+    //
+    // Well-known GUIDs for HashFunctionGuid are:
+    //   406ea660-64cf-4c82-b6f0-42d48172a799 MD5    (HashDataSize is 16)
+    //   ff1816ec-aa5e-4d10-87f7-6f4963833460 SHA1   (HashDataSize is 20)
+    //   8829d00f-11b8-4213-878b-770e8597ac16 SHA256 (HashDataSize is 32)
+    DotNet = 101,
+  };
 };
 
 /// These values correspond to the CV_call_e enumeration, and are documented
diff --git a/include/llvm/DebugInfo/PDB/UDTLayout.h b/include/llvm/DebugInfo/PDB/UDTLayout.h
index c4234c191e21..c67b093b63c0 100644
--- a/include/llvm/DebugInfo/PDB/UDTLayout.h
+++ b/include/llvm/DebugInfo/PDB/UDTLayout.h
@@ -1,9 +1,8 @@
 //===- UDTLayout.h - UDT layout info ----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/DebugInfo/Symbolize/DIPrinter.h b/include/llvm/DebugInfo/Symbolize/DIPrinter.h
index ab82be3706d8..db7a61a8f160 100644
--- a/include/llvm/DebugInfo/Symbolize/DIPrinter.h
+++ b/include/llvm/DebugInfo/Symbolize/DIPrinter.h
@@ -1,9 +1,8 @@
 //===- llvm/DebugInfo/Symbolize/DIPrinter.h ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -21,15 +20,22 @@ namespace llvm {
 struct DILineInfo;
 class DIInliningInfo;
 struct DIGlobal;
+struct DILocal;
 
 namespace symbolize {
 
 class DIPrinter {
+public:
+  enum class OutputStyle { LLVM, GNU };
+
+private:
   raw_ostream &OS;
   bool PrintFunctionNames;
   bool PrintPretty;
   int PrintSourceContext;
   bool Verbose;
+  bool Basenames;
+  OutputStyle Style;
 
   void print(const DILineInfo &Info, bool Inlined);
   void printContext(const std::string &FileName, int64_t Line);
@@ -37,14 +43,16 @@ class DIPrinter {
 public:
   DIPrinter(raw_ostream &OS, bool PrintFunctionNames = true,
             bool PrintPretty = false, int PrintSourceContext = 0,
-            bool Verbose = false)
+            bool Verbose = false, bool Basenames = false,
+            OutputStyle Style = OutputStyle::LLVM)
       : OS(OS), PrintFunctionNames(PrintFunctionNames),
         PrintPretty(PrintPretty), PrintSourceContext(PrintSourceContext),
-        Verbose(Verbose) {}
+        Verbose(Verbose), Basenames(Basenames), Style(Style) {}
 
   DIPrinter &operator<<(const DILineInfo &Info);
   DIPrinter &operator<<(const DIInliningInfo &Info);
   DIPrinter &operator<<(const DIGlobal &Global);
+  DIPrinter &operator<<(const DILocal &Local);
 };
 }
 }
diff --git a/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h b/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
index e576a91e887c..506ecc424b4c 100644
--- a/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
+++ b/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
@@ -1,9 +1,8 @@
 //===- SymbolizableModule.h -------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -25,13 +24,16 @@ class SymbolizableModule {
 public:
   virtual ~SymbolizableModule() = default;
 
-  virtual DILineInfo symbolizeCode(uint64_t ModuleOffset,
+  virtual DILineInfo symbolizeCode(object::SectionedAddress ModuleOffset,
                                    FunctionNameKind FNKind,
                                    bool UseSymbolTable) const = 0;
-  virtual DIInliningInfo symbolizeInlinedCode(uint64_t ModuleOffset,
-                                              FunctionNameKind FNKind,
-                                              bool UseSymbolTable) const = 0;
-  virtual DIGlobal symbolizeData(uint64_t ModuleOffset) const = 0;
+  virtual DIInliningInfo
+  symbolizeInlinedCode(object::SectionedAddress ModuleOffset,
+                       FunctionNameKind FNKind, bool UseSymbolTable) const = 0;
+  virtual DIGlobal
+  symbolizeData(object::SectionedAddress ModuleOffset) const = 0;
+  virtual std::vector<DILocal>
+  symbolizeFrame(object::SectionedAddress ModuleOffset) const = 0;
 
   // Return true if this is a 32-bit x86 PE COFF module.
   virtual bool isWin32Module() const = 0;
diff --git a/include/llvm/DebugInfo/Symbolize/Symbolize.h b/include/llvm/DebugInfo/Symbolize/Symbolize.h
index 289148f569db..d3da28ca0b7b 100644
--- a/include/llvm/DebugInfo/Symbolize/Symbolize.h
+++ b/include/llvm/DebugInfo/Symbolize/Symbolize.h
@@ -1,9 +1,8 @@
 //===- Symbolize.h ----------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -36,35 +35,35 @@ using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind;
 class LLVMSymbolizer {
 public:
   struct Options {
-    FunctionNameKind PrintFunctions;
-    bool UseSymbolTable : 1;
-    bool Demangle : 1;
-    bool RelativeAddresses : 1;
+    FunctionNameKind PrintFunctions = FunctionNameKind::LinkageName;
+    bool UseSymbolTable = true;
+    bool Demangle = true;
+    bool RelativeAddresses = false;
     std::string DefaultArch;
     std::vector<std::string> DsymHints;
-
-    Options(FunctionNameKind PrintFunctions = FunctionNameKind::LinkageName,
-            bool UseSymbolTable = true, bool Demangle = true,
-            bool RelativeAddresses = false, std::string DefaultArch = "")
-        : PrintFunctions(PrintFunctions), UseSymbolTable(UseSymbolTable),
-          Demangle(Demangle), RelativeAddresses(RelativeAddresses),
-          DefaultArch(std::move(DefaultArch)) {}
+    std::string FallbackDebugPath;
+    std::string DWPName;
   };
 
-  LLVMSymbolizer(const Options &Opts = Options()) : Opts(Opts) {}
+  LLVMSymbolizer() = default;
+  LLVMSymbolizer(const Options &Opts) : Opts(Opts) {}
 
   ~LLVMSymbolizer() {
     flush();
   }
 
+  Expected<DILineInfo> symbolizeCode(const ObjectFile &Obj,
+                                     object::SectionedAddress ModuleOffset);
   Expected<DILineInfo> symbolizeCode(const std::string &ModuleName,
-                                     uint64_t ModuleOffset,
-                                     StringRef DWPName = "");
-  Expected<DIInliningInfo> symbolizeInlinedCode(const std::string &ModuleName,
-                                                uint64_t ModuleOffset,
-                                                StringRef DWPName = "");
+                                     object::SectionedAddress ModuleOffset);
+  Expected<DIInliningInfo>
+  symbolizeInlinedCode(const std::string &ModuleName,
+                       object::SectionedAddress ModuleOffset);
   Expected<DIGlobal> symbolizeData(const std::string &ModuleName,
-                                   uint64_t ModuleOffset);
+                                   object::SectionedAddress ModuleOffset);
+  Expected<std::vector<DILocal>>
+  symbolizeFrame(const std::string &ModuleName,
+                 object::SectionedAddress ModuleOffset);
   void flush();
 
   static std::string
@@ -74,14 +73,23 @@ public:
 private:
   // Bundles together object file with code/data and object file with
   // corresponding debug info. These objects can be the same.
-  using ObjectPair = std::pair<ObjectFile *, ObjectFile *>;
+  using ObjectPair = std::pair<const ObjectFile *, const ObjectFile *>;
+
+  Expected<DILineInfo>
+  symbolizeCodeCommon(SymbolizableModule *Info,
+                      object::SectionedAddress ModuleOffset);
 
   /// Returns a SymbolizableModule or an error if loading debug info failed.
   /// Only one attempt is made to load a module, and errors during loading are
   /// only reported once. Subsequent calls to get module info for a module that
   /// failed to load will return nullptr.
   Expected<SymbolizableModule *>
-  getOrCreateModuleInfo(const std::string &ModuleName, StringRef DWPName = "");
+  getOrCreateModuleInfo(const std::string &ModuleName);
+
+  Expected<SymbolizableModule *>
+  createModuleInfo(const ObjectFile *Obj,
+                   std::unique_ptr<DIContext> Context,
+                   StringRef ModuleName);
 
   ObjectFile *lookUpDsymFile(const std::string &Path,
                              const MachOObjectFile *ExeObj,
diff --git a/include/llvm/Demangle/Compiler.h b/include/llvm/Demangle/Compiler.h
deleted file mode 100644
index 248d6e3a7faa..000000000000
--- a/include/llvm/Demangle/Compiler.h
+++ /dev/null
@@ -1,93 +0,0 @@
-//===--- Compiler.h ---------------------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//
-// This file contains a variety of feature test macros copied from
-// include/llvm/Support/Compiler.h so that LLVMDemangle does not need to take
-// a dependency on LLVMSupport.
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEMANGLE_COMPILER_H
-#define LLVM_DEMANGLE_COMPILER_H
-
-#ifdef _MSC_VER
-// snprintf is implemented in VS 2015
-#if _MSC_VER < 1900
-#define snprintf _snprintf_s
-#endif
-#endif
-
-#ifndef __has_feature
-#define __has_feature(x) 0
-#endif
-
-#ifndef __has_cpp_attribute
-#define __has_cpp_attribute(x) 0
-#endif
-
-#ifndef __has_attribute
-#define __has_attribute(x) 0
-#endif
-
-#ifndef __has_builtin
-#define __has_builtin(x) 0
-#endif
-
-#ifndef LLVM_GNUC_PREREQ
-#if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
-#define LLVM_GNUC_PREREQ(maj, min, patch)                                      \
-  ((__GNUC__ << 20) + (__GNUC_MINOR__ << 10) + __GNUC_PATCHLEVEL__ >=          \
-   ((maj) << 20) + ((min) << 10) + (patch))
-#elif defined(__GNUC__) && defined(__GNUC_MINOR__)
-#define LLVM_GNUC_PREREQ(maj, min, patch)                                      \
-  ((__GNUC__ << 20) + (__GNUC_MINOR__ << 10) >= ((maj) << 20) + ((min) << 10))
-#else
-#define LLVM_GNUC_PREREQ(maj, min, patch) 0
-#endif
-#endif
-
-#if __has_attribute(used) || LLVM_GNUC_PREREQ(3, 1, 0)
-#define LLVM_ATTRIBUTE_USED __attribute__((__used__))
-#else
-#define LLVM_ATTRIBUTE_USED
-#endif
-
-#if __has_builtin(__builtin_unreachable) || LLVM_GNUC_PREREQ(4, 5, 0)
-#define LLVM_BUILTIN_UNREACHABLE __builtin_unreachable()
-#elif defined(_MSC_VER)
-#define LLVM_BUILTIN_UNREACHABLE __assume(false)
-#endif
-
-#if __has_attribute(noinline) || LLVM_GNUC_PREREQ(3, 4, 0)
-#define LLVM_ATTRIBUTE_NOINLINE __attribute__((noinline))
-#elif defined(_MSC_VER)
-#define LLVM_ATTRIBUTE_NOINLINE __declspec(noinline)
-#else
-#define LLVM_ATTRIBUTE_NOINLINE
-#endif
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-#define LLVM_DUMP_METHOD LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED
-#else
-#define LLVM_DUMP_METHOD LLVM_ATTRIBUTE_NOINLINE
-#endif
-
-#if __cplusplus > 201402L && __has_cpp_attribute(fallthrough)
-#define LLVM_FALLTHROUGH [[fallthrough]]
-#elif __has_cpp_attribute(gnu::fallthrough)
-#define LLVM_FALLTHROUGH [[gnu::fallthrough]]
-#elif !__cplusplus
-// Workaround for llvm.org/PR23435, since clang 3.6 and below emit a spurious
-// error when __has_cpp_attribute is given a scoped attribute in C mode.
-#define LLVM_FALLTHROUGH
-#elif __has_cpp_attribute(clang::fallthrough)
-#define LLVM_FALLTHROUGH [[clang::fallthrough]]
-#else
-#define LLVM_FALLTHROUGH
-#endif
-
-#endif
diff --git a/include/llvm/Demangle/Demangle.h b/include/llvm/Demangle/Demangle.h
index 4c9dc9569e18..6fea7ef13f11 100644
--- a/include/llvm/Demangle/Demangle.h
+++ b/include/llvm/Demangle/Demangle.h
@@ -1,9 +1,8 @@
 //===--- Demangle.h ---------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -11,6 +10,7 @@
 #define LLVM_DEMANGLE_DEMANGLE_H
 
 #include <cstddef>
+#include <string>
 
 namespace llvm {
 /// This is a llvm local version of __cxa_demangle. Other than the name and
@@ -36,6 +36,13 @@ enum MSDemangleFlags { MSDF_None = 0, MSDF_DumpBackrefs = 1 << 0 };
 char *microsoftDemangle(const char *mangled_name, char *buf, size_t *n,
                         int *status, MSDemangleFlags Flags = MSDF_None);
 
+/// Attempt to demangle a string using different demangling schemes.
+/// The function uses heuristics to determine which demangling scheme to use.
+/// \param MangledName - reference to string to demangle.
+/// \returns - the demangled string, or a copy of the input string if no
+/// demangling occurred.
+std::string demangle(const std::string &MangledName);
+
 /// "Partial" demangler. This supports demangling a string into an AST
 /// (typically an intermediate stage in itaniumDemangle) and querying certain
 /// properties or partially printing the demangled name.
diff --git a/include/llvm/Demangle/DemangleConfig.h b/include/llvm/Demangle/DemangleConfig.h
new file mode 100644
index 000000000000..73f89d357c85
--- /dev/null
+++ b/include/llvm/Demangle/DemangleConfig.h
@@ -0,0 +1,99 @@
+//===--- DemangleConfig.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a variety of feature test macros copied from
+// include/llvm/Support/Compiler.h so that LLVMDemangle does not need to take
+// a dependency on LLVMSupport.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEMANGLE_COMPILER_H
+#define LLVM_DEMANGLE_COMPILER_H
+
+#ifdef _MSC_VER
+// snprintf is implemented in VS 2015
+#if _MSC_VER < 1900
+#define snprintf _snprintf_s
+#endif
+#endif
+
+#ifndef __has_feature
+#define __has_feature(x) 0
+#endif
+
+#ifndef __has_cpp_attribute
+#define __has_cpp_attribute(x) 0
+#endif
+
+#ifndef __has_attribute
+#define __has_attribute(x) 0
+#endif
+
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+
+#ifndef DEMANGLE_GNUC_PREREQ
+#if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
+#define DEMANGLE_GNUC_PREREQ(maj, min, patch)                           \
+  ((__GNUC__ << 20) + (__GNUC_MINOR__ << 10) + __GNUC_PATCHLEVEL__ >=          \
+   ((maj) << 20) + ((min) << 10) + (patch))
+#elif defined(__GNUC__) && defined(__GNUC_MINOR__)
+#define DEMANGLE_GNUC_PREREQ(maj, min, patch)                           \
+  ((__GNUC__ << 20) + (__GNUC_MINOR__ << 10) >= ((maj) << 20) + ((min) << 10))
+#else
+#define DEMANGLE_GNUC_PREREQ(maj, min, patch) 0
+#endif
+#endif
+
+#if __has_attribute(used) || DEMANGLE_GNUC_PREREQ(3, 1, 0)
+#define DEMANGLE_ATTRIBUTE_USED __attribute__((__used__))
+#else
+#define DEMANGLE_ATTRIBUTE_USED
+#endif
+
+#if __has_builtin(__builtin_unreachable) || DEMANGLE_GNUC_PREREQ(4, 5, 0)
+#define DEMANGLE_UNREACHABLE __builtin_unreachable()
+#elif defined(_MSC_VER)
+#define DEMANGLE_UNREACHABLE __assume(false)
+#else
+#define DEMANGLE_UNREACHABLE
+#endif
+
+#if __has_attribute(noinline) || DEMANGLE_GNUC_PREREQ(3, 4, 0)
+#define DEMANGLE_ATTRIBUTE_NOINLINE __attribute__((noinline))
+#elif defined(_MSC_VER)
+#define DEMANGLE_ATTRIBUTE_NOINLINE __declspec(noinline)
+#else
+#define DEMANGLE_ATTRIBUTE_NOINLINE
+#endif
+
+#if !defined(NDEBUG)
+#define DEMANGLE_DUMP_METHOD DEMANGLE_ATTRIBUTE_NOINLINE DEMANGLE_ATTRIBUTE_USED
+#else
+#define DEMANGLE_DUMP_METHOD DEMANGLE_ATTRIBUTE_NOINLINE
+#endif
+
+#if __cplusplus > 201402L && __has_cpp_attribute(fallthrough)
+#define DEMANGLE_FALLTHROUGH [[fallthrough]]
+#elif __has_cpp_attribute(gnu::fallthrough)
+#define DEMANGLE_FALLTHROUGH [[gnu::fallthrough]]
+#elif !__cplusplus
+// Workaround for llvm.org/PR23435, since clang 3.6 and below emit a spurious
+// error when __has_cpp_attribute is given a scoped attribute in C mode.
+#define DEMANGLE_FALLTHROUGH
+#elif __has_cpp_attribute(clang::fallthrough)
+#define DEMANGLE_FALLTHROUGH [[clang::fallthrough]]
+#else
+#define DEMANGLE_FALLTHROUGH
+#endif
+
+#define DEMANGLE_NAMESPACE_BEGIN namespace llvm { namespace itanium_demangle {
+#define DEMANGLE_NAMESPACE_END } }
+
+#endif
diff --git a/include/llvm/Demangle/ItaniumDemangle.h b/include/llvm/Demangle/ItaniumDemangle.h
index 0b9187f30a5a..aaccb27e17a3 100644
--- a/include/llvm/Demangle/ItaniumDemangle.h
+++ b/include/llvm/Demangle/ItaniumDemangle.h
@@ -1,23 +1,26 @@
 //===------------------------- ItaniumDemangle.h ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+//
+// Generic itanium demangler library. This file has two byte-per-byte identical
+// copies in the source tree, one in libcxxabi, and the other in llvm.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEMANGLE_ITANIUMDEMANGLE_H
-#define LLVM_DEMANGLE_ITANIUMDEMANGLE_H
+#ifndef DEMANGLE_ITANIUMDEMANGLE_H
+#define DEMANGLE_ITANIUMDEMANGLE_H
 
 // FIXME: (possibly) incomplete list of features that clang mangles that this
 // file does not yet support:
 //   - C++ modules TS
 
-#include "llvm/Demangle/Compiler.h"
-#include "llvm/Demangle/StringView.h"
-#include "llvm/Demangle/Utility.h"
-
+#include "DemangleConfig.h"
+#include "StringView.h"
+#include "Utility.h"
 #include <cassert>
 #include <cctype>
 #include <cstdio>
@@ -86,6 +89,7 @@
     X(InitListExpr) \
     X(FoldExpr) \
     X(ThrowExpr) \
+    X(UUIDOfExpr) \
     X(BoolExpr) \
     X(IntegerCastExpr) \
     X(IntegerLiteral) \
@@ -95,8 +99,8 @@
     X(BracedExpr) \
     X(BracedRangeExpr)
 
-namespace llvm {
-namespace itanium_demangle {
+DEMANGLE_NAMESPACE_BEGIN
+
 // Base class of all AST nodes. The AST is built by the parser, then is
 // traversed by the printLeft/Right functions to produce a demangled string.
 class Node {
@@ -194,7 +198,7 @@ public:
   virtual ~Node() = default;
 
 #ifndef NDEBUG
-  LLVM_DUMP_METHOD void dump() const;
+  DEMANGLE_DUMP_METHOD void dump() const;
 #endif
 };
 
@@ -1278,7 +1282,7 @@ public:
     case SpecialSubKind::iostream:
       return StringView("basic_iostream");
     }
-    LLVM_BUILTIN_UNREACHABLE;
+    DEMANGLE_UNREACHABLE;
   }
 
   void printLeft(OutputStream &S) const override {
@@ -1330,7 +1334,7 @@ public:
     case SpecialSubKind::iostream:
       return StringView("iostream");
     }
-    LLVM_BUILTIN_UNREACHABLE;
+    DEMANGLE_UNREACHABLE;
   }
 
   void printLeft(OutputStream &S) const override {
@@ -1870,6 +1874,21 @@ public:
   }
 };
 
+// MSVC __uuidof extension, generated by clang in -fms-extensions mode.
+class UUIDOfExpr : public Node {
+  Node *Operand;
+public:
+  UUIDOfExpr(Node *Operand_) : Node(KUUIDOfExpr), Operand(Operand_) {}
+
+  template<typename Fn> void match(Fn F) const { F(Operand); }
+
+  void printLeft(OutputStream &S) const override {
+    S << "__uuidof(";
+    Operand->print(S);
+    S << ")";
+  }
+};
+
 class BoolExpr : public Node {
   bool Value;
 
@@ -2476,6 +2495,12 @@ AbstractManglingParser<Derived, Alloc>::parseUnnamedTypeName(NameState *) {
       return nullptr;
     return make<ClosureTypeName>(Params, Count);
   }
+  if (consumeIf("Ub")) {
+    (void)parseNumber();
+    if (!consumeIf('_'))
+      return nullptr;
+    return make<NameType>("'block-literal'");
+  }
   return nullptr;
 }
 
@@ -2785,11 +2810,13 @@ AbstractManglingParser<Derived, Alloc>::parseOperatorName(NameState *State) {
 // <ctor-dtor-name> ::= C1  # complete object constructor
 //                  ::= C2  # base object constructor
 //                  ::= C3  # complete object allocating constructor
-//   extension      ::= C5    # ?
+//   extension      ::= C4  # gcc old-style "[unified]" constructor
+//   extension      ::= C5  # the COMDAT used for ctors
 //                  ::= D0  # deleting destructor
 //                  ::= D1  # complete object destructor
 //                  ::= D2  # base object destructor
-//   extension      ::= D5    # ?
+//   extension      ::= D4  # gcc old-style "[unified]" destructor
+//   extension      ::= D5  # the COMDAT used for dtors
 template <typename Derived, typename Alloc>
 Node *
 AbstractManglingParser<Derived, Alloc>::parseCtorDtorName(Node *&SoFar,
@@ -2812,7 +2839,8 @@ AbstractManglingParser<Derived, Alloc>::parseCtorDtorName(Node *&SoFar,
 
   if (consumeIf('C')) {
     bool IsInherited = consumeIf('I');
-    if (look() != '1' && look() != '2' && look() != '3' && look() != '5')
+    if (look() != '1' && look() != '2' && look() != '3' && look() != '4' &&
+        look() != '5')
       return nullptr;
     int Variant = look() - '0';
     ++First;
@@ -2821,15 +2849,15 @@ AbstractManglingParser<Derived, Alloc>::parseCtorDtorName(Node *&SoFar,
       if (getDerived().parseName(State) == nullptr)
         return nullptr;
     }
-    return make<CtorDtorName>(SoFar, false, Variant);
+    return make<CtorDtorName>(SoFar, /*IsDtor=*/false, Variant);
   }
 
-  if (look() == 'D' &&
-      (look(1) == '0' || look(1) == '1' || look(1) == '2' || look(1) == '5')) {
+  if (look() == 'D' && (look(1) == '0' || look(1) == '1' || look(1) == '2' ||
+                        look(1) == '4' || look(1) == '5')) {
     int Variant = look(1) - '0';
     First += 2;
     if (State) State->CtorDtorConversion = true;
-    return make<CtorDtorName>(SoFar, true, Variant);
+    return make<CtorDtorName>(SoFar, /*IsDtor=*/true, Variant);
   }
 
   return nullptr;
@@ -3467,7 +3495,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
       Result = getDerived().parseFunctionType();
       break;
     }
-    LLVM_FALLTHROUGH;
+    DEMANGLE_FALLTHROUGH;
   }
   case 'U': {
     Result = getDerived().parseQualifiedType();
@@ -3564,7 +3592,11 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
     StringView Res = parseBareSourceName();
     if (Res.empty())
       return nullptr;
-    return make<NameType>(Res);
+    // Typically, <builtin-type>s are not considered substitution candidates,
+    // but the exception to that exception is vendor extended types (Itanium C++
+    // ABI 5.9.1).
+    Result = make<NameType>(Res);
+    break;
   }
   case 'D':
     switch (look(1)) {
@@ -3592,6 +3624,10 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
     case 's':
       First += 2;
       return make<NameType>("char16_t");
+    //                ::= Du   # char8_t (C++2a, not yet in the Itanium spec)
+    case 'u':
+      First += 2;
+      return make<NameType>("char8_t");
     //                ::= Da   # auto (in dependent new-expressions)
     case 'a':
       First += 2;
@@ -3754,7 +3790,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
       // substitution table.
       return Sub;
     }
-    LLVM_FALLTHROUGH;
+    DEMANGLE_FALLTHROUGH;
   }
   //        ::= <class-enum-type>
   default: {
@@ -4633,6 +4669,21 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExpr() {
   case '9':
     return getDerived().parseUnresolvedName();
   }
+
+  if (consumeIf("u8__uuidoft")) {
+    Node *Ty = getDerived().parseType();
+    if (!Ty)
+      return nullptr;
+    return make<UUIDOfExpr>(Ty);
+  }
+
+  if (consumeIf("u8__uuidofz")) {
+    Node *Ex = getDerived().parseExpr();
+    if (!Ex)
+      return nullptr;
+    return make<UUIDOfExpr>(Ex);
+  }
+
   return nullptr;
 }
 
@@ -5139,7 +5190,7 @@ AbstractManglingParser<Derived, Alloc>::parseTemplateArgs(bool TagTemplates) {
 // extension      ::= ___Z <encoding> _block_invoke_<decimal-digit>+
 template <typename Derived, typename Alloc>
 Node *AbstractManglingParser<Derived, Alloc>::parse() {
-  if (consumeIf("_Z")) {
+  if (consumeIf("_Z") || consumeIf("__Z")) {
     Node *Encoding = getDerived().parseEncoding();
     if (Encoding == nullptr)
       return nullptr;
@@ -5152,7 +5203,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parse() {
     return Encoding;
   }
 
-  if (consumeIf("___Z")) {
+  if (consumeIf("___Z") || consumeIf("____Z")) {
     Node *Encoding = getDerived().parseEncoding();
     if (Encoding == nullptr || !consumeIf("_block_invoke"))
       return nullptr;
@@ -5178,7 +5229,6 @@ struct ManglingParser : AbstractManglingParser<ManglingParser<Alloc>, Alloc> {
                                Alloc>::AbstractManglingParser;
 };
 
-}  // namespace itanium_demangle
-}  // namespace llvm
+DEMANGLE_NAMESPACE_END
 
-#endif // LLVM_DEMANGLE_ITANIUMDEMANGLE_H
+#endif // DEMANGLE_ITANIUMDEMANGLE_H
diff --git a/include/llvm/Demangle/MicrosoftDemangle.h b/include/llvm/Demangle/MicrosoftDemangle.h
index 97b918fc9459..382e79401c43 100644
--- a/include/llvm/Demangle/MicrosoftDemangle.h
+++ b/include/llvm/Demangle/MicrosoftDemangle.h
@@ -1,16 +1,15 @@
 //===------------------------- MicrosoftDemangle.h --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_DEMANGLE_MICROSOFT_DEMANGLE_H
 #define LLVM_DEMANGLE_MICROSOFT_DEMANGLE_H
 
-#include "llvm/Demangle/Compiler.h"
+#include "llvm/Demangle/DemangleConfig.h"
 #include "llvm/Demangle/MicrosoftDemangleNodes.h"
 #include "llvm/Demangle/StringView.h"
 #include "llvm/Demangle/Utility.h"
@@ -56,24 +55,21 @@ public:
     }
   }
 
-  char *allocUnalignedBuffer(size_t Length) {
-    uint8_t *Buf = Head->Buf + Head->Used;
-
-    Head->Used += Length;
-    if (Head->Used > Head->Capacity) {
-      // It's possible we need a buffer which is larger than our default unit
-      // size, so we need to be careful to add a node with capacity that is at
-      // least as large as what we need.
-      addNode(std::max(AllocUnit, Length));
-      Head->Used = Length;
-      Buf = Head->Buf;
-    }
+  char *allocUnalignedBuffer(size_t Size) {
+    assert(Head && Head->Buf);
+
+    uint8_t *P = Head->Buf + Head->Used;
+
+    Head->Used += Size;
+    if (Head->Used <= Head->Capacity)
+      return reinterpret_cast<char *>(P);
 
-    return reinterpret_cast<char *>(Buf);
+    addNode(std::max(AllocUnit, Size));
+    Head->Used = Size;
+    return reinterpret_cast<char *>(Head->Buf);
   }
 
   template <typename T, typename... Args> T *allocArray(size_t Count) {
-
     size_t Size = Count * sizeof(T);
     assert(Head && Head->Buf);
 
@@ -84,17 +80,16 @@ public:
     size_t Adjustment = AlignedP - P;
 
     Head->Used += Size + Adjustment;
-    if (Head->Used < Head->Capacity)
+    if (Head->Used <= Head->Capacity)
       return new (PP) T[Count]();
 
-    addNode(AllocUnit);
+    addNode(std::max(AllocUnit, Size));
     Head->Used = Size;
     return new (Head->Buf) T[Count]();
   }
 
   template <typename T, typename... Args> T *alloc(Args &&... ConstructorArgs) {
-
-    size_t Size = sizeof(T);
+    constexpr size_t Size = sizeof(T);
     assert(Head && Head->Buf);
 
     size_t P = (size_t)Head->Buf + Head->Used;
@@ -104,9 +99,10 @@ public:
     size_t Adjustment = AlignedP - P;
 
     Head->Used += Size + Adjustment;
-    if (Head->Used < Head->Capacity)
+    if (Head->Used <= Head->Capacity)
       return new (PP) T(std::forward<Args>(ConstructorArgs)...);
 
+    static_assert(Size < AllocUnit, "");
     addNode(AllocUnit);
     Head->Used = Size;
     return new (Head->Buf) T(std::forward<Args>(ConstructorArgs)...);
@@ -160,6 +156,8 @@ public:
 private:
   SymbolNode *demangleEncodedSymbol(StringView &MangledName,
                                     QualifiedNameNode *QN);
+  SymbolNode *demangleDeclarator(StringView &MangledName);
+  SymbolNode *demangleMD5Name(StringView &MangledName);
 
   VariableSymbolNode *demangleVariableEncoding(StringView &MangledName,
                                                StorageClass SC);
@@ -179,8 +177,9 @@ private:
 
   ArrayTypeNode *demangleArrayType(StringView &MangledName);
 
+  NodeArrayNode *demangleFunctionParameterList(StringView &MangledName,
+                                               bool &IsVariadic);
   NodeArrayNode *demangleTemplateParameterList(StringView &MangledName);
-  NodeArrayNode *demangleFunctionParameterList(StringView &MangledName);
 
   std::pair<uint64_t, bool> demangleNumber(StringView &MangledName);
   uint64_t demangleUnsigned(StringView &MangledName);
@@ -207,6 +206,8 @@ private:
   NamedIdentifierNode *demangleBackRefName(StringView &MangledName);
   IdentifierNode *demangleTemplateInstantiationName(StringView &MangledName,
                                                     NameBackrefBehavior NBB);
+  IntrinsicFunctionKind
+  translateIntrinsicFunctionCode(char CH, FunctionIdentifierCodeGroup Group);
   IdentifierNode *demangleFunctionIdentifierCode(StringView &MangledName);
   IdentifierNode *
   demangleFunctionIdentifierCode(StringView &MangledName,
@@ -223,7 +224,7 @@ private:
   demangleSpecialTableSymbolNode(StringView &MangledName,
                                  SpecialIntrinsicKind SIK);
   LocalStaticGuardVariableNode *
-  demangleLocalStaticGuard(StringView &MangledName);
+  demangleLocalStaticGuard(StringView &MangledName, bool IsThread);
   VariableSymbolNode *demangleUntypedVariable(ArenaAllocator &Arena,
                                               StringView &MangledName,
                                               StringView VariableName);
diff --git a/include/llvm/Demangle/MicrosoftDemangleNodes.h b/include/llvm/Demangle/MicrosoftDemangleNodes.h
index 9e3478e9fd29..da9d9d5bfdc0 100644
--- a/include/llvm/Demangle/MicrosoftDemangleNodes.h
+++ b/include/llvm/Demangle/MicrosoftDemangleNodes.h
@@ -1,11 +1,30 @@
+//===- MicrosoftDemangleNodes.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AST nodes used in the MSVC demangler.
+//
+//===----------------------------------------------------------------------===//
+
 #ifndef LLVM_SUPPORT_MICROSOFTDEMANGLENODES_H
 #define LLVM_SUPPORT_MICROSOFTDEMANGLENODES_H
 
-#include "llvm/Demangle/Compiler.h"
+#include "llvm/Demangle/DemangleConfig.h"
 #include "llvm/Demangle/StringView.h"
 #include <array>
 
+namespace llvm {
+namespace itanium_demangle {
 class OutputStream;
+}
+}
+
+using llvm::itanium_demangle::OutputStream;
+using llvm::itanium_demangle::StringView;
 
 namespace llvm {
 namespace ms_demangle {
@@ -63,6 +82,7 @@ enum class PrimitiveKind {
   Char,
   Schar,
   Uchar,
+  Char8,
   Char16,
   Char32,
   Short,
@@ -151,8 +171,8 @@ enum class IntrinsicFunctionKind : uint8_t {
   VectorCopyCtorIter,         // ?__G vector copy constructor iterator
   VectorVbaseCopyCtorIter,    // ?__H vector vbase copy constructor iterator
   ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy constructor
-  CoAwait,                    // ?__L co_await
-  Spaceship,                  // operator<=>
+  CoAwait,                    // ?__L operator co_await
+  Spaceship,                  // ?__M operator<=>
   MaxIntrinsic
 };
 
@@ -324,7 +344,7 @@ struct FunctionSignatureNode : public TypeNode {
   // Function parameters
   NodeArrayNode *Params = nullptr;
 
-  // True if the function type is noexcept
+  // True if the function type is noexcept.
   bool IsNoexcept = false;
 };
 
@@ -389,6 +409,7 @@ struct LocalStaticGuardIdentifierNode : public IdentifierNode {
 
   void output(OutputStream &OS, OutputFlags Flags) const override;
 
+  bool IsThread = false;
   uint32_t ScopeIndex = 0;
 };
 
@@ -494,7 +515,7 @@ struct NodeArrayNode : public Node {
 
   void output(OutputStream &OS, OutputFlags Flags, StringView Separator) const;
 
-  Node **Nodes = 0;
+  Node **Nodes = nullptr;
   size_t Count = 0;
 };
 
@@ -602,4 +623,4 @@ struct FunctionSymbolNode : public SymbolNode {
 } // namespace ms_demangle
 } // namespace llvm
 
-#endif
\ No newline at end of file
+#endif
diff --git a/include/llvm/Demangle/README.txt b/include/llvm/Demangle/README.txt
new file mode 100644
index 000000000000..514ff6dd16f2
--- /dev/null
+++ b/include/llvm/Demangle/README.txt
@@ -0,0 +1,52 @@
+Itanium Name Demangler Library
+==============================
+
+Introduction
+------------
+
+This directory contains the generic itanium name demangler library. The main
+purpose of the library is to demangle C++ symbols, i.e. convert the string
+"_Z1fv" into "f()". You can also use the CRTP base ManglingParser to perform
+some simple analysis on the mangled name, or (in LLVM) use the opaque
+ItaniumPartialDemangler to query the demangled AST.
+
+Why are there multiple copies of the this library in the source tree?
+---------------------------------------------------------------------
+
+This directory is mirrored between libcxxabi/demangle and
+llvm/include/llvm/Demangle. The simple reason for this is that both projects
+need to demangle symbols, but neither can depend on each other. libcxxabi needs
+the demangler to implement __cxa_demangle, which is part of the itanium ABI
+spec. LLVM needs a copy for a bunch of places, but doesn't want to use the
+system's __cxa_demangle because it a) might not be available (i.e., on Windows),
+and b) probably isn't that up-to-date on the latest language features.
+
+The copy of the demangler in LLVM has some extra stuff that aren't needed in
+libcxxabi (ie, the MSVC demangler, ItaniumPartialDemangler), which depend on the
+shared generic components. Despite these differences, we want to keep the "core"
+generic demangling library identical between both copies to simplify development
+and testing.
+
+If you're working on the generic library, then do the work first in libcxxabi,
+then run the cp-to-llvm.sh script in src/demangle. This script takes as an
+argument the path to llvm, and re-copies the changes you made to libcxxabi over.
+Note that this script just blindly overwrites all changes to the generic library
+in llvm, so be careful.
+
+Because the core demangler needs to work in libcxxabi, everything needs to be
+declared in an anonymous namespace (see DEMANGLE_NAMESPACE_BEGIN), and you can't
+introduce any code that depends on the libcxx dylib.
+
+Hopefully, when LLVM becomes a monorepo, we can de-duplicate this code, and have
+both LLVM and libcxxabi depend on a shared demangler library.
+
+Testing
+-------
+
+The tests are split up between libcxxabi/test/{unit,}test_demangle.cpp, and
+llvm/unittest/Demangle. The llvm directory should only get tests for stuff not
+included in the core library. In the future though, we should probably move all
+the tests to LLVM.
+
+It is also a really good idea to run libFuzzer after non-trivial changes, see
+libcxxabi/fuzz/cxa_demangle_fuzzer.cpp and https://llvm.org/docs/LibFuzzer.html.
diff --git a/include/llvm/Demangle/StringView.h b/include/llvm/Demangle/StringView.h
index a89deda694c2..ceb6c7958066 100644
--- a/include/llvm/Demangle/StringView.h
+++ b/include/llvm/Demangle/StringView.h
@@ -1,22 +1,25 @@
 //===--- StringView.h -------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
 //
+// FIXME: Use std::string_view instead when we support C++17.
 //
-// This file contains a limited version of LLVM's StringView class.  It is
-// copied here so that LLVMDemangle need not take a dependency on LLVMSupport.
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEMANGLE_STRINGVIEW_H
-#define LLVM_DEMANGLE_STRINGVIEW_H
+#ifndef DEMANGLE_STRINGVIEW_H
+#define DEMANGLE_STRINGVIEW_H
 
+#include "DemangleConfig.h"
 #include <algorithm>
 #include <cassert>
 #include <cstring>
 
+DEMANGLE_NAMESPACE_BEGIN
+
 class StringView {
   const char *First;
   const char *Last;
@@ -43,7 +46,7 @@ public:
     if (FindBegin < size()) {
       // Just forward to memchr, which is faster than a hand-rolled loop.
       if (const void *P = ::memchr(First + FindBegin, C, size() - FindBegin))
-        return static_cast<const char *>(P) - First;
+        return size_t(static_cast<const char *>(P) - First);
     }
     return npos;
   }
@@ -118,4 +121,6 @@ inline bool operator==(const StringView &LHS, const StringView &RHS) {
          std::equal(LHS.begin(), LHS.end(), RHS.begin());
 }
 
+DEMANGLE_NAMESPACE_END
+
 #endif
diff --git a/include/llvm/Demangle/Utility.h b/include/llvm/Demangle/Utility.h
index 1d1601c81635..ec23859af46a 100644
--- a/include/llvm/Demangle/Utility.h
+++ b/include/llvm/Demangle/Utility.h
@@ -1,25 +1,27 @@
 //===--- Utility.h ----------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
 //
+// Provide some utility classes for use in the demangler(s).
 //
-// This file contains several utility classes used by the demangle library.
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEMANGLE_UTILITY_H
-#define LLVM_DEMANGLE_UTILITY_H
+#ifndef DEMANGLE_UTILITY_H
+#define DEMANGLE_UTILITY_H
 
 #include "StringView.h"
-
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
 #include <iterator>
 #include <limits>
 
+DEMANGLE_NAMESPACE_BEGIN
+
 // Stream that AST nodes write their string representation into after the AST
 // has been parsed.
 class OutputStream {
@@ -184,4 +186,6 @@ inline bool initializeOutputStream(char *Buf, size_t *N, OutputStream &S,
   return true;
 }
 
+DEMANGLE_NAMESPACE_END
+
 #endif
diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h
index b61cb24fa5fb..4fb6dad96387 100644
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -1,9 +1,8 @@
 //===- ExecutionEngine.h - Abstract Execution Engine Interface --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -22,6 +21,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/OrcV1Deprecation.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Object/Binary.h"
@@ -635,7 +635,13 @@ public:
   }
 
   // Use OrcMCJITReplacement instead of MCJIT. Off by default.
-  void setUseOrcMCJITReplacement(bool UseOrcMCJITReplacement) {
+  LLVM_ATTRIBUTE_DEPRECATED(
+      inline void setUseOrcMCJITReplacement(bool UseOrcMCJITReplacement),
+      "ORCv1 utilities (including OrcMCJITReplacement) are deprecated. Please "
+      "use ORCv2/LLJIT instead (see docs/ORCv2.rst)");
+
+  void setUseOrcMCJITReplacement(ORCv1DeprecationAcknowledgement,
+                                 bool UseOrcMCJITReplacement) {
     this->UseOrcMCJITReplacement = UseOrcMCJITReplacement;
   }
 
@@ -659,6 +665,10 @@ public:
   ExecutionEngine *create(TargetMachine *TM);
 };
 
+void EngineBuilder::setUseOrcMCJITReplacement(bool UseOrcMCJITReplacement) {
+  this->UseOrcMCJITReplacement = UseOrcMCJITReplacement;
+}
+
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ExecutionEngine, LLVMExecutionEngineRef)
 
diff --git a/include/llvm/ExecutionEngine/GenericValue.h b/include/llvm/ExecutionEngine/GenericValue.h
index 504e30a018b6..1ca989da1b7e 100644
--- a/include/llvm/ExecutionEngine/GenericValue.h
+++ b/include/llvm/ExecutionEngine/GenericValue.h
@@ -1,9 +1,8 @@
 //===- GenericValue.h - Represent any type of LLVM value --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ExecutionEngine/Interpreter.h b/include/llvm/ExecutionEngine/Interpreter.h
index a14707840ad8..0749409766e3 100644
--- a/include/llvm/ExecutionEngine/Interpreter.h
+++ b/include/llvm/ExecutionEngine/Interpreter.h
@@ -1,9 +1,8 @@
 //===-- Interpreter.h - Abstract Execution Engine Interface -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h
index 1b08379b8c3b..606b6f7cc128 100644
--- a/include/llvm/ExecutionEngine/JITEventListener.h
+++ b/include/llvm/ExecutionEngine/JITEventListener.h
@@ -1,9 +1,8 @@
 //===- JITEventListener.h - Exposes events from JIT compilation -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h b/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h
new file mode 100644
index 000000000000..8d2f641254b3
--- /dev/null
+++ b/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h
@@ -0,0 +1,80 @@
+//===--------- EHFrameSupport.h - JITLink eh-frame utils --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// EHFrame registration support for JITLink.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_EHFRAMESUPPORT_H
+#define LLVM_EXECUTIONENGINE_JITLINK_EHFRAMESUPPORT_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace jitlink {
+
+/// Registers all FDEs in the given eh-frame section with the current process.
+Error registerEHFrameSection(const void *EHFrameSectionAddr);
+
+/// Deregisters all FDEs in the given eh-frame section with the current process.
+Error deregisterEHFrameSection(const void *EHFrameSectionAddr);
+
+/// Supports registration/deregistration of EH-frames in a target process.
+class EHFrameRegistrar {
+public:
+  virtual ~EHFrameRegistrar();
+  virtual Error registerEHFrames(JITTargetAddress EHFrameSectionAddr) = 0;
+  virtual Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr) = 0;
+};
+
+/// Registers / Deregisters EH-frames in the current process.
+class InProcessEHFrameRegistrar final : public EHFrameRegistrar {
+public:
+  /// Get a reference to the InProcessEHFrameRegistrar singleton.
+  static InProcessEHFrameRegistrar &getInstance();
+
+  InProcessEHFrameRegistrar(const InProcessEHFrameRegistrar &) = delete;
+  InProcessEHFrameRegistrar &
+  operator=(const InProcessEHFrameRegistrar &) = delete;
+
+  InProcessEHFrameRegistrar(InProcessEHFrameRegistrar &&) = delete;
+  InProcessEHFrameRegistrar &operator=(InProcessEHFrameRegistrar &&) = delete;
+
+  Error registerEHFrames(JITTargetAddress EHFrameSectionAddr) override {
+    return registerEHFrameSection(
+        jitTargetAddressToPointer<void *>(EHFrameSectionAddr));
+  }
+
+  Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr) override {
+    return deregisterEHFrameSection(
+        jitTargetAddressToPointer<void *>(EHFrameSectionAddr));
+  }
+
+private:
+  InProcessEHFrameRegistrar();
+};
+
+using StoreFrameAddressFunction = std::function<void(JITTargetAddress)>;
+
+/// Creates a pass that records the address of the EH frame section. If no
+/// eh-frame section is found, it will set EHFrameAddr to zero.
+///
+/// Authors of JITLinkContexts can use this function to register a post-fixup
+/// pass that records the address of the eh-frame section. This address can
+/// be used after finalization to register and deregister the frame.
+AtomGraphPassFunction
+createEHFrameRecorderPass(const Triple &TT,
+                          StoreFrameAddressFunction StoreFrameAddress);
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_EHFRAMESUPPORT_H
diff --git a/include/llvm/ExecutionEngine/JITLink/JITLink.h b/include/llvm/ExecutionEngine/JITLink/JITLink.h
new file mode 100644
index 000000000000..be80d44ccf51
--- /dev/null
+++ b/include/llvm/ExecutionEngine/JITLink/JITLink.h
@@ -0,0 +1,930 @@
+//===------------ JITLink.h - JIT linker functionality ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains generic JIT-linker types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_JITLINK_H
+#define LLVM_EXECUTIONENGINE_JITLINK_JITLINK_H
+
+#include "JITLinkMemoryManager.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+#include <map>
+#include <string>
+#include <system_error>
+
+namespace llvm {
+namespace jitlink {
+
+/// Base class for errors originating in JIT linker, e.g. missing relocation
+/// support.
+class JITLinkError : public ErrorInfo<JITLinkError> {
+public:
+  static char ID;
+
+  JITLinkError(Twine ErrMsg) : ErrMsg(ErrMsg.str()) {}
+
+  void log(raw_ostream &OS) const override;
+  const std::string &getErrorMessage() const { return ErrMsg; }
+  std::error_code convertToErrorCode() const override;
+
+private:
+  std::string ErrMsg;
+};
+
+// Forward declare the Atom class.
+class Atom;
+
+/// Edge class. Represents both object file relocations, as well as layout and
+/// keep-alive constraints.
+class Edge {
+public:
+  using Kind = uint8_t;
+
+  using GenericEdgeKind = enum : Kind {
+    Invalid,                    // Invalid edge value.
+    FirstKeepAlive,             // Keeps target alive. Offset/addend zero.
+    KeepAlive = FirstKeepAlive, // Tag first edge kind that preserves liveness.
+    LayoutNext,                 // Layout constraint. Offset/Addend zero.
+    FirstRelocation             // First architecture specific relocation.
+  };
+
+  using OffsetT = uint32_t;
+  using AddendT = int64_t;
+
+  Edge(Kind K, OffsetT Offset, Atom &Target, AddendT Addend)
+      : Target(&Target), Offset(Offset), Addend(Addend), K(K) {}
+
+  OffsetT getOffset() const { return Offset; }
+  Kind getKind() const { return K; }
+  void setKind(Kind K) { this->K = K; }
+  bool isRelocation() const { return K >= FirstRelocation; }
+  Kind getRelocation() const {
+    assert(isRelocation() && "Not a relocation edge");
+    return K - FirstRelocation;
+  }
+  bool isKeepAlive() const { return K >= FirstKeepAlive; }
+  Atom &getTarget() const { return *Target; }
+  void setTarget(Atom &Target) { this->Target = &Target; }
+  AddendT getAddend() const { return Addend; }
+  void setAddend(AddendT Addend) { this->Addend = Addend; }
+
+private:
+  Atom *Target;
+  OffsetT Offset;
+  AddendT Addend;
+  Kind K = 0;
+};
+
+using EdgeVector = std::vector<Edge>;
+
+const StringRef getGenericEdgeKindName(Edge::Kind K);
+
+/// Base Atom class. Used by absolute and undefined atoms.
+class Atom {
+  friend class AtomGraph;
+
+protected:
+  /// Create a named (as yet unresolved) atom.
+  Atom(StringRef Name)
+      : Name(Name), IsDefined(false), IsLive(false), ShouldDiscard(false),
+        IsGlobal(false), IsAbsolute(false), IsCallable(false),
+        IsExported(false), IsWeak(false), HasLayoutNext(false),
+        IsCommon(false) {}
+
+  /// Create an absolute symbol atom.
+  Atom(StringRef Name, JITTargetAddress Address)
+      : Name(Name), Address(Address), IsDefined(true), IsLive(false),
+        ShouldDiscard(false), IsGlobal(false), IsAbsolute(false),
+        IsCallable(false), IsExported(false), IsWeak(false),
+        HasLayoutNext(false), IsCommon(false) {}
+
+public:
+  /// Returns true if this atom has a name.
+  bool hasName() const { return Name != StringRef(); }
+
+  /// Returns the name of this atom.
+  StringRef getName() const { return Name; }
+
+  /// Returns the current target address of this atom.
+  /// The initial target address (for atoms that have one) will be taken from
+  /// the input object file's virtual address space. During the layout phase
+  /// of JIT linking the atom's address will be updated to point to its final
+  /// address in the JIT'd process.
+  JITTargetAddress getAddress() const { return Address; }
+
+  /// Set the current target address of this atom.
+  void setAddress(JITTargetAddress Address) { this->Address = Address; }
+
+  /// Returns true if this is a defined atom.
+  bool isDefined() const { return IsDefined; }
+
+  /// Returns true if this atom is marked as live.
+  bool isLive() const { return IsLive; }
+
+  /// Mark this atom as live.
+  ///
+  /// Note: Only defined and absolute atoms can be marked live.
+  void setLive(bool IsLive) {
+    assert((IsDefined || IsAbsolute || !IsLive) &&
+           "Only defined and absolute atoms can be marked live");
+    this->IsLive = IsLive;
+  }
+
+  /// Returns true if this atom should be discarded during pruning.
+  bool shouldDiscard() const { return ShouldDiscard; }
+
+  /// Mark this atom to be discarded.
+  ///
+  /// Note: Only defined and absolute atoms can be marked live.
+  void setShouldDiscard(bool ShouldDiscard) {
+    assert((IsDefined || IsAbsolute || !ShouldDiscard) &&
+           "Only defined and absolute atoms can be marked live");
+    this->ShouldDiscard = ShouldDiscard;
+  }
+
+  /// Returns true if this definition is global (i.e. visible outside this
+  /// linkage unit).
+  ///
+  /// Note: This is distict from Exported, which means visibile outside the
+  /// JITDylib that this graph is being linked in to.
+  bool isGlobal() const { return IsGlobal; }
+
+  /// Mark this atom as global.
+  void setGlobal(bool IsGlobal) { this->IsGlobal = IsGlobal; }
+
+  /// Returns true if this atom represents an absolute symbol.
+  bool isAbsolute() const { return IsAbsolute; }
+
+  /// Returns true if this atom is known to be callable.
+  ///
+  /// Primarily provided for easy interoperability with ORC, which uses the
+  /// JITSymbolFlags::Common flag to identify symbols that can be interposed
+  /// with stubs.
+  bool isCallable() const { return IsCallable; }
+
+  /// Mark this atom as callable.
+  void setCallable(bool IsCallable) {
+    assert((IsDefined || IsAbsolute || !IsCallable) &&
+           "Callable atoms must be defined or absolute");
+    this->IsCallable = IsCallable;
+  }
+
+  /// Returns true if this atom should appear in the symbol table of a final
+  /// linked image.
+  bool isExported() const { return IsExported; }
+
+  /// Mark this atom as exported.
+  void setExported(bool IsExported) {
+    assert((!IsExported || ((IsDefined || IsAbsolute) && hasName())) &&
+           "Exported atoms must have names");
+    this->IsExported = IsExported;
+  }
+
+  /// Returns true if this is a weak symbol.
+  bool isWeak() const { return IsWeak; }
+
+  /// Mark this atom as weak.
+  void setWeak(bool IsWeak) { this->IsWeak = IsWeak; }
+
+private:
+  StringRef Name;
+  JITTargetAddress Address = 0;
+
+  bool IsDefined : 1;
+  bool IsLive : 1;
+  bool ShouldDiscard : 1;
+
+  bool IsGlobal : 1;
+  bool IsAbsolute : 1;
+  bool IsCallable : 1;
+  bool IsExported : 1;
+  bool IsWeak : 1;
+
+protected:
+  // These flags only make sense for DefinedAtom, but we can minimize the size
+  // of DefinedAtom by defining them here.
+  bool HasLayoutNext : 1;
+  bool IsCommon : 1;
+};
+
+// Forward declare DefinedAtom.
+class DefinedAtom;
+
+raw_ostream &operator<<(raw_ostream &OS, const Atom &A);
+void printEdge(raw_ostream &OS, const Atom &FixupAtom, const Edge &E,
+               StringRef EdgeKindName);
+
+/// Represents a section address range via a pair of DefinedAtom pointers to
+/// the first and last atoms in the section.
+class SectionRange {
+public:
+  SectionRange() = default;
+  SectionRange(DefinedAtom *First, DefinedAtom *Last)
+      : First(First), Last(Last) {}
+  DefinedAtom *getFirstAtom() const {
+    assert((!Last || First) && "First can not be null if end is non-null");
+    return First;
+  }
+  DefinedAtom *getLastAtom() const {
+    assert((First || !Last) && "Last can not be null if start is non-null");
+    return Last;
+  }
+  bool isEmpty() const {
+    assert((First || !Last) && "Last can not be null if start is non-null");
+    return !First;
+  }
+  JITTargetAddress getStart() const;
+  JITTargetAddress getEnd() const;
+  uint64_t getSize() const;
+
+private:
+  DefinedAtom *First = nullptr;
+  DefinedAtom *Last = nullptr;
+};
+
+/// Represents an object file section.
+class Section {
+  friend class AtomGraph;
+
+private:
+  Section(StringRef Name, uint32_t Alignment, sys::Memory::ProtectionFlags Prot,
+          unsigned Ordinal, bool IsZeroFill)
+      : Name(Name), Alignment(Alignment), Prot(Prot), Ordinal(Ordinal),
+        IsZeroFill(IsZeroFill) {
+    assert(isPowerOf2_32(Alignment) && "Alignments must be a power of 2");
+  }
+
+  using DefinedAtomSet = DenseSet<DefinedAtom *>;
+
+public:
+  using atom_iterator = DefinedAtomSet::iterator;
+  using const_atom_iterator = DefinedAtomSet::const_iterator;
+
+  ~Section();
+  StringRef getName() const { return Name; }
+  uint32_t getAlignment() const { return Alignment; }
+  sys::Memory::ProtectionFlags getProtectionFlags() const { return Prot; }
+  unsigned getSectionOrdinal() const { return Ordinal; }
+  size_t getNextAtomOrdinal() { return ++NextAtomOrdinal; }
+
+  bool isZeroFill() const { return IsZeroFill; }
+
+  /// Returns an iterator over the atoms in the section (in no particular
+  /// order).
+  iterator_range<atom_iterator> atoms() {
+    return make_range(DefinedAtoms.begin(), DefinedAtoms.end());
+  }
+
+  /// Returns an iterator over the atoms in the section (in no particular
+  /// order).
+  iterator_range<const_atom_iterator> atoms() const {
+    return make_range(DefinedAtoms.begin(), DefinedAtoms.end());
+  }
+
+  /// Return the number of atoms in this section.
+  DefinedAtomSet::size_type atoms_size() { return DefinedAtoms.size(); }
+
+  /// Return true if this section contains no atoms.
+  bool atoms_empty() const { return DefinedAtoms.empty(); }
+
+  /// Returns the range of this section as the pair of atoms with the lowest
+  /// and highest target address. This operation is expensive, as it
+  /// must traverse all atoms in the section.
+  ///
+  /// Note: If the section is empty, both values will be null. The section
+  /// address will evaluate to null, and the size to zero. If the section
+  /// contains a single atom both values will point to it, the address will
+  /// evaluate to the address of that atom, and the size will be the size of
+  /// that atom.
+  SectionRange getRange() const;
+
+private:
+  void addAtom(DefinedAtom &DA) {
+    assert(!DefinedAtoms.count(&DA) && "Atom is already in this section");
+    DefinedAtoms.insert(&DA);
+  }
+
+  void removeAtom(DefinedAtom &DA) {
+    assert(DefinedAtoms.count(&DA) && "Atom is not in this section");
+    DefinedAtoms.erase(&DA);
+  }
+
+  StringRef Name;
+  uint32_t Alignment = 0;
+  sys::Memory::ProtectionFlags Prot;
+  unsigned Ordinal = 0;
+  unsigned NextAtomOrdinal = 0;
+  bool IsZeroFill = false;
+  DefinedAtomSet DefinedAtoms;
+};
+
+/// Defined atom class. Suitable for use by defined named and anonymous
+/// atoms.
+class DefinedAtom : public Atom {
+  friend class AtomGraph;
+
+private:
+  DefinedAtom(Section &Parent, JITTargetAddress Address, uint32_t Alignment)
+      : Atom("", Address), Parent(Parent), Ordinal(Parent.getNextAtomOrdinal()),
+        Alignment(Alignment) {
+    assert(isPowerOf2_32(Alignment) && "Alignments must be a power of two");
+  }
+
+  DefinedAtom(Section &Parent, StringRef Name, JITTargetAddress Address,
+              uint32_t Alignment)
+      : Atom(Name, Address), Parent(Parent),
+        Ordinal(Parent.getNextAtomOrdinal()), Alignment(Alignment) {
+    assert(isPowerOf2_32(Alignment) && "Alignments must be a power of two");
+  }
+
+public:
+  using edge_iterator = EdgeVector::iterator;
+
+  Section &getSection() const { return Parent; }
+
+  uint64_t getSize() const { return Size; }
+
+  StringRef getContent() const {
+    assert(!Parent.isZeroFill() && "Trying to get content for zero-fill atom");
+    assert(Size <= std::numeric_limits<size_t>::max() &&
+           "Content size too large");
+    return {ContentPtr, static_cast<size_t>(Size)};
+  }
+  void setContent(StringRef Content) {
+    assert(!Parent.isZeroFill() && "Calling setContent on zero-fill atom?");
+    ContentPtr = Content.data();
+    Size = Content.size();
+  }
+
+  bool isZeroFill() const { return Parent.isZeroFill(); }
+
+  void setZeroFill(uint64_t Size) {
+    assert(Parent.isZeroFill() && !ContentPtr &&
+           "Can't set zero-fill length of a non zero-fill atom");
+    this->Size = Size;
+  }
+
+  uint64_t getZeroFillSize() const {
+    assert(Parent.isZeroFill() &&
+           "Can't get zero-fill length of a non zero-fill atom");
+    return Size;
+  }
+
+  uint32_t getAlignment() const { return Alignment; }
+
+  bool hasLayoutNext() const { return HasLayoutNext; }
+  void setLayoutNext(DefinedAtom &Next) {
+    assert(!HasLayoutNext && "Atom already has layout-next constraint");
+    HasLayoutNext = true;
+    Edges.push_back(Edge(Edge::LayoutNext, 0, Next, 0));
+  }
+  DefinedAtom &getLayoutNext() {
+    assert(HasLayoutNext && "Atom does not have a layout-next constraint");
+    DefinedAtom *Next = nullptr;
+    for (auto &E : edges())
+      if (E.getKind() == Edge::LayoutNext) {
+        assert(E.getTarget().isDefined() &&
+               "layout-next target atom must be a defined atom");
+        Next = static_cast<DefinedAtom *>(&E.getTarget());
+        break;
+      }
+    assert(Next && "Missing LayoutNext edge");
+    return *Next;
+  }
+
+  bool isCommon() const { return IsCommon; }
+
+  void addEdge(Edge::Kind K, Edge::OffsetT Offset, Atom &Target,
+               Edge::AddendT Addend) {
+    assert(K != Edge::LayoutNext &&
+           "Layout edges should be added via setLayoutNext");
+    Edges.push_back(Edge(K, Offset, Target, Addend));
+  }
+
+  iterator_range<edge_iterator> edges() {
+    return make_range(Edges.begin(), Edges.end());
+  }
+  size_t edges_size() const { return Edges.size(); }
+  bool edges_empty() const { return Edges.empty(); }
+
+  unsigned getOrdinal() const { return Ordinal; }
+
+private:
+  void setCommon(uint64_t Size) {
+    assert(ContentPtr == 0 && "Atom already has content?");
+    IsCommon = true;
+    setZeroFill(Size);
+  }
+
+  EdgeVector Edges;
+  uint64_t Size = 0;
+  Section &Parent;
+  const char *ContentPtr = nullptr;
+  unsigned Ordinal = 0;
+  uint32_t Alignment = 0;
+};
+
+inline JITTargetAddress SectionRange::getStart() const {
+  return First ? First->getAddress() : 0;
+}
+
+inline JITTargetAddress SectionRange::getEnd() const {
+  return Last ? Last->getAddress() + Last->getSize() : 0;
+}
+
+inline uint64_t SectionRange::getSize() const { return getEnd() - getStart(); }
+
+inline SectionRange Section::getRange() const {
+  if (atoms_empty())
+    return SectionRange();
+  DefinedAtom *First = *DefinedAtoms.begin(), *Last = *DefinedAtoms.begin();
+  for (auto *DA : atoms()) {
+    if (DA->getAddress() < First->getAddress())
+      First = DA;
+    if (DA->getAddress() > Last->getAddress())
+      Last = DA;
+  }
+  return SectionRange(First, Last);
+}
+
+class AtomGraph {
+private:
+  using SectionList = std::vector<std::unique_ptr<Section>>;
+  using AddressToAtomMap = std::map<JITTargetAddress, DefinedAtom *>;
+  using NamedAtomMap = DenseMap<StringRef, Atom *>;
+  using ExternalAtomSet = DenseSet<Atom *>;
+
+public:
+  using external_atom_iterator = ExternalAtomSet::iterator;
+
+  using section_iterator = pointee_iterator<SectionList::iterator>;
+  using const_section_iterator = pointee_iterator<SectionList::const_iterator>;
+
+  template <typename SecItrT, typename AtomItrT, typename T>
+  class defined_atom_iterator_impl
+      : public iterator_facade_base<
+            defined_atom_iterator_impl<SecItrT, AtomItrT, T>,
+            std::forward_iterator_tag, T> {
+  public:
+    defined_atom_iterator_impl() = default;
+
+    defined_atom_iterator_impl(SecItrT SI, SecItrT SE)
+        : SI(SI), SE(SE),
+          AI(SI != SE ? SI->atoms().begin() : Section::atom_iterator()) {
+      moveToNextAtomOrEnd();
+    }
+
+    bool operator==(const defined_atom_iterator_impl &RHS) const {
+      return (SI == RHS.SI) && (AI == RHS.AI);
+    }
+
+    T operator*() const {
+      assert(AI != SI->atoms().end() && "Dereferencing end?");
+      return *AI;
+    }
+
+    defined_atom_iterator_impl operator++() {
+      ++AI;
+      moveToNextAtomOrEnd();
+      return *this;
+    }
+
+  private:
+    void moveToNextAtomOrEnd() {
+      while (SI != SE && AI == SI->atoms().end()) {
+        ++SI;
+        if (SI == SE)
+          AI = Section::atom_iterator();
+        else
+          AI = SI->atoms().begin();
+      }
+    }
+
+    SecItrT SI, SE;
+    AtomItrT AI;
+  };
+
+  using defined_atom_iterator =
+      defined_atom_iterator_impl<section_iterator, Section::atom_iterator,
+                                 DefinedAtom *>;
+
+  using const_defined_atom_iterator =
+      defined_atom_iterator_impl<const_section_iterator,
+                                 Section::const_atom_iterator,
+                                 const DefinedAtom *>;
+
+  AtomGraph(std::string Name, unsigned PointerSize,
+            support::endianness Endianness)
+      : Name(std::move(Name)), PointerSize(PointerSize),
+        Endianness(Endianness) {}
+
+  /// Returns the name of this graph (usually the name of the original
+  /// underlying MemoryBuffer).
+  const std::string &getName() { return Name; }
+
+  /// Returns the pointer size for use in this graph.
+  unsigned getPointerSize() const { return PointerSize; }
+
+  /// Returns the endianness of atom-content in this graph.
+  support::endianness getEndianness() const { return Endianness; }
+
+  /// Create a section with the given name, protection flags, and alignment.
+  Section &createSection(StringRef Name, uint32_t Alignment,
+                         sys::Memory::ProtectionFlags Prot, bool IsZeroFill) {
+    std::unique_ptr<Section> Sec(
+        new Section(Name, Alignment, Prot, Sections.size(), IsZeroFill));
+    Sections.push_back(std::move(Sec));
+    return *Sections.back();
+  }
+
+  /// Add an external atom representing an undefined symbol in this graph.
+  Atom &addExternalAtom(StringRef Name) {
+    assert(!NamedAtoms.count(Name) && "Duplicate named atom inserted");
+    Atom *A = reinterpret_cast<Atom *>(
+        AtomAllocator.Allocate(sizeof(Atom), alignof(Atom)));
+    new (A) Atom(Name);
+    ExternalAtoms.insert(A);
+    NamedAtoms[Name] = A;
+    return *A;
+  }
+
+  /// Add an external atom representing an absolute symbol.
+  Atom &addAbsoluteAtom(StringRef Name, JITTargetAddress Addr) {
+    assert(!NamedAtoms.count(Name) && "Duplicate named atom inserted");
+    Atom *A = reinterpret_cast<Atom *>(
+        AtomAllocator.Allocate(sizeof(Atom), alignof(Atom)));
+    new (A) Atom(Name, Addr);
+    AbsoluteAtoms.insert(A);
+    NamedAtoms[Name] = A;
+    return *A;
+  }
+
+  /// Add an anonymous defined atom to the graph.
+  ///
+  /// Anonymous atoms have content but no name. They must have an address.
+  DefinedAtom &addAnonymousAtom(Section &Parent, JITTargetAddress Address,
+                                uint32_t Alignment) {
+    DefinedAtom *A = reinterpret_cast<DefinedAtom *>(
+        AtomAllocator.Allocate(sizeof(DefinedAtom), alignof(DefinedAtom)));
+    new (A) DefinedAtom(Parent, Address, Alignment);
+    Parent.addAtom(*A);
+    getAddrToAtomMap()[A->getAddress()] = A;
+    return *A;
+  }
+
+  /// Add a defined atom to the graph.
+  ///
+  /// Allocates and constructs a DefinedAtom instance with the given parent,
+  /// name, address, and alignment.
+  DefinedAtom &addDefinedAtom(Section &Parent, StringRef Name,
+                              JITTargetAddress Address, uint32_t Alignment) {
+    assert(!NamedAtoms.count(Name) && "Duplicate named atom inserted");
+    DefinedAtom *A = reinterpret_cast<DefinedAtom *>(
+        AtomAllocator.Allocate(sizeof(DefinedAtom), alignof(DefinedAtom)));
+    new (A) DefinedAtom(Parent, Name, Address, Alignment);
+    Parent.addAtom(*A);
+    getAddrToAtomMap()[A->getAddress()] = A;
+    NamedAtoms[Name] = A;
+    return *A;
+  }
+
+  /// Add a common symbol atom to the graph.
+  ///
+  /// Adds a common-symbol atom to the graph with the given parent, name,
+  /// address, alignment and size.
+  DefinedAtom &addCommonAtom(Section &Parent, StringRef Name,
+                             JITTargetAddress Address, uint32_t Alignment,
+                             uint64_t Size) {
+    assert(!NamedAtoms.count(Name) && "Duplicate named atom inserted");
+    DefinedAtom *A = reinterpret_cast<DefinedAtom *>(
+        AtomAllocator.Allocate(sizeof(DefinedAtom), alignof(DefinedAtom)));
+    new (A) DefinedAtom(Parent, Name, Address, Alignment);
+    A->setCommon(Size);
+    Parent.addAtom(*A);
+    NamedAtoms[Name] = A;
+    return *A;
+  }
+
+  iterator_range<section_iterator> sections() {
+    return make_range(section_iterator(Sections.begin()),
+                      section_iterator(Sections.end()));
+  }
+
+  /// Returns the section with the given name if it exists, otherwise returns
+  /// null.
+  Section *findSectionByName(StringRef Name) {
+    for (auto &S : sections())
+      if (S.getName() == Name)
+        return &S;
+    return nullptr;
+  }
+
+  iterator_range<external_atom_iterator> external_atoms() {
+    return make_range(ExternalAtoms.begin(), ExternalAtoms.end());
+  }
+
+  iterator_range<external_atom_iterator> absolute_atoms() {
+    return make_range(AbsoluteAtoms.begin(), AbsoluteAtoms.end());
+  }
+
+  iterator_range<defined_atom_iterator> defined_atoms() {
+    return make_range(defined_atom_iterator(Sections.begin(), Sections.end()),
+                      defined_atom_iterator(Sections.end(), Sections.end()));
+  }
+
+  iterator_range<const_defined_atom_iterator> defined_atoms() const {
+    return make_range(
+        const_defined_atom_iterator(Sections.begin(), Sections.end()),
+        const_defined_atom_iterator(Sections.end(), Sections.end()));
+  }
+
+  /// Returns the atom with the given name, which must exist in this graph.
+  Atom &getAtomByName(StringRef Name) {
+    auto I = NamedAtoms.find(Name);
+    assert(I != NamedAtoms.end() && "Name not in NamedAtoms map");
+    return *I->second;
+  }
+
+  /// Returns the atom with the given name, which must exist in this graph and
+  /// be a DefinedAtom.
+  DefinedAtom &getDefinedAtomByName(StringRef Name) {
+    auto &A = getAtomByName(Name);
+    assert(A.isDefined() && "Atom is not a defined atom");
+    return static_cast<DefinedAtom &>(A);
+  }
+
+  /// Search for the given atom by name.
+  /// Returns the atom (if found) or an error (if no atom with this name
+  /// exists).
+  Expected<Atom &> findAtomByName(StringRef Name) {
+    auto I = NamedAtoms.find(Name);
+    if (I == NamedAtoms.end())
+      return make_error<JITLinkError>("No atom named " + Name);
+    return *I->second;
+  }
+
+  /// Search for the given defined atom by name.
+  /// Returns the defined atom (if found) or an error (if no atom with this
+  /// name exists, or if one exists but is not a defined atom).
+  Expected<DefinedAtom &> findDefinedAtomByName(StringRef Name) {
+    auto I = NamedAtoms.find(Name);
+    if (I == NamedAtoms.end())
+      return make_error<JITLinkError>("No atom named " + Name);
+    if (!I->second->isDefined())
+      return make_error<JITLinkError>("Atom " + Name +
+                                      " exists but is not a "
+                                      "defined atom");
+    return static_cast<DefinedAtom &>(*I->second);
+  }
+
+  /// Returns the atom covering the given address, or an error if no such atom
+  /// exists.
+  ///
+  /// Returns null if no atom exists at the given address.
+  DefinedAtom *getAtomByAddress(JITTargetAddress Address) {
+    refreshAddrToAtomCache();
+
+    // If there are no defined atoms, bail out early.
+    if (AddrToAtomCache->empty())
+      return nullptr;
+
+    // Find the atom *after* the given address.
+    auto I = AddrToAtomCache->upper_bound(Address);
+
+    // If this address falls before any known atom, bail out.
+    if (I == AddrToAtomCache->begin())
+      return nullptr;
+
+    // The atom we're looking for is the one before the atom we found.
+    --I;
+
+    // Otherwise range check the atom that was found.
+    assert(!I->second->getContent().empty() && "Atom content not set");
+    if (Address >= I->second->getAddress() + I->second->getContent().size())
+      return nullptr;
+
+    return I->second;
+  }
+
+  /// Like getAtomByAddress, but returns an Error if the given address is not
+  /// covered by an atom, rather than a null pointer.
+  Expected<DefinedAtom &> findAtomByAddress(JITTargetAddress Address) {
+    if (auto *DA = getAtomByAddress(Address))
+      return *DA;
+    return make_error<JITLinkError>("No atom at address " +
+                                    formatv("{0:x16}", Address));
+  }
+
+  // Remove the given external atom from the graph.
+  void removeExternalAtom(Atom &A) {
+    assert(!A.isDefined() && !A.isAbsolute() && "A is not an external atom");
+    assert(ExternalAtoms.count(&A) && "A is not in the external atoms set");
+    ExternalAtoms.erase(&A);
+    A.~Atom();
+  }
+
+  /// Remove the given absolute atom from the graph.
+  void removeAbsoluteAtom(Atom &A) {
+    assert(A.isAbsolute() && "A is not an absolute atom");
+    assert(AbsoluteAtoms.count(&A) && "A is not in the absolute atoms set");
+    AbsoluteAtoms.erase(&A);
+    A.~Atom();
+  }
+
+  /// Remove the given defined atom from the graph.
+  void removeDefinedAtom(DefinedAtom &DA) {
+    if (AddrToAtomCache) {
+      assert(AddrToAtomCache->count(DA.getAddress()) &&
+             "Cache exists, but does not contain atom");
+      AddrToAtomCache->erase(DA.getAddress());
+    }
+    if (DA.hasName()) {
+      assert(NamedAtoms.count(DA.getName()) && "Named atom not in map");
+      NamedAtoms.erase(DA.getName());
+    }
+    DA.getSection().removeAtom(DA);
+    DA.~DefinedAtom();
+  }
+
+  /// Invalidate the atom-to-address map.
+  void invalidateAddrToAtomMap() { AddrToAtomCache = None; }
+
+  /// Dump the graph.
+  ///
+  /// If supplied, the EdgeKindToName function will be used to name edge
+  /// kinds in the debug output. Otherwise raw edge kind numbers will be
+  /// displayed.
+  void dump(raw_ostream &OS,
+            std::function<StringRef(Edge::Kind)> EdegKindToName =
+                std::function<StringRef(Edge::Kind)>());
+
+private:
+  AddressToAtomMap &getAddrToAtomMap() {
+    refreshAddrToAtomCache();
+    return *AddrToAtomCache;
+  }
+
+  const AddressToAtomMap &getAddrToAtomMap() const {
+    refreshAddrToAtomCache();
+    return *AddrToAtomCache;
+  }
+
+  void refreshAddrToAtomCache() const {
+    if (!AddrToAtomCache) {
+      AddrToAtomCache = AddressToAtomMap();
+      for (auto *DA : defined_atoms())
+        (*AddrToAtomCache)[DA->getAddress()] = const_cast<DefinedAtom *>(DA);
+    }
+  }
+
+  // Put the BumpPtrAllocator first so that we don't free any of the atoms in
+  // it until all of their destructors have been run.
+  BumpPtrAllocator AtomAllocator;
+
+  std::string Name;
+  unsigned PointerSize;
+  support::endianness Endianness;
+  SectionList Sections;
+  NamedAtomMap NamedAtoms;
+  ExternalAtomSet ExternalAtoms;
+  ExternalAtomSet AbsoluteAtoms;
+  mutable Optional<AddressToAtomMap> AddrToAtomCache;
+};
+
+/// A function for mutating AtomGraphs.
+using AtomGraphPassFunction = std::function<Error(AtomGraph &)>;
+
+/// A list of atom graph passes.
+using AtomGraphPassList = std::vector<AtomGraphPassFunction>;
+
+/// An atom graph pass configuration, consisting of a list of pre-prune,
+/// post-prune, and post-fixup passes.
+struct PassConfiguration {
+
+  /// Pre-prune passes.
+  ///
+  /// These passes are called on the graph after it is built, and before any
+  /// atoms have been pruned.
+  ///
+  /// Notable use cases: Marking atoms live or should-discard.
+  AtomGraphPassList PrePrunePasses;
+
+  /// Post-prune passes.
+  ///
+  /// These passes are called on the graph after dead and should-discard atoms
+  /// have been removed, but before fixups are applied.
+  ///
+  /// Notable use cases: Building GOT, stub, and TLV atoms.
+  AtomGraphPassList PostPrunePasses;
+
+  /// Post-fixup passes.
+  ///
+  /// These passes are called on the graph after atom contents has been copied
+  /// to working memory, and fixups applied.
+  ///
+  /// Notable use cases: Testing and validation.
+  AtomGraphPassList PostFixupPasses;
+};
+
+/// A map of symbol names to resolved addresses.
+using AsyncLookupResult = DenseMap<StringRef, JITEvaluatedSymbol>;
+
+/// A function to call with a resolved symbol map (See AsyncLookupResult) or an
+/// error if resolution failed.
+using JITLinkAsyncLookupContinuation =
+    std::function<void(Expected<AsyncLookupResult> LR)>;
+
+/// An asynchronous symbol lookup. Performs a search (possibly asynchronously)
+/// for the given symbols, calling the given continuation with either the result
+/// (if the lookup succeeds), or an error (if the lookup fails).
+using JITLinkAsyncLookupFunction =
+    std::function<void(const DenseSet<StringRef> &Symbols,
+                       JITLinkAsyncLookupContinuation LookupContinuation)>;
+
+/// Holds context for a single jitLink invocation.
+class JITLinkContext {
+public:
+  /// Destroy a JITLinkContext.
+  virtual ~JITLinkContext();
+
+  /// Return the MemoryManager to be used for this link.
+  virtual JITLinkMemoryManager &getMemoryManager() = 0;
+
+  /// Returns a StringRef for the object buffer.
+  /// This method can not be called once takeObjectBuffer has been called.
+  virtual MemoryBufferRef getObjectBuffer() const = 0;
+
+  /// Notify this context that linking failed.
+  /// Called by JITLink if linking cannot be completed.
+  virtual void notifyFailed(Error Err) = 0;
+
+  /// Called by JITLink to resolve external symbols. This method is passed a
+  /// lookup continutation which it must call with a result to continue the
+  /// linking process.
+  virtual void lookup(const DenseSet<StringRef> &Symbols,
+                      JITLinkAsyncLookupContinuation LookupContinuation) = 0;
+
+  /// Called by JITLink once all defined atoms in the graph have been assigned
+  /// their final memory locations in the target process. At this point he
+  /// atom graph can be, inspected to build a symbol table however the atom
+  /// content will not generally have been copied to the target location yet.
+  virtual void notifyResolved(AtomGraph &G) = 0;
+
+  /// Called by JITLink to notify the context that the object has been
+  /// finalized (i.e. emitted to memory and memory permissions set). If all of
+  /// this objects dependencies have also been finalized then the code is ready
+  /// to run.
+  virtual void
+  notifyFinalized(std::unique_ptr<JITLinkMemoryManager::Allocation> A) = 0;
+
+  /// Called by JITLink prior to linking to determine whether default passes for
+  /// the target should be added. The default implementation returns true.
+  /// If subclasses override this method to return false for any target then
+  /// they are required to fully configure the pass pipeline for that target.
+  virtual bool shouldAddDefaultTargetPasses(const Triple &TT) const;
+
+  /// Returns the mark-live pass to be used for this link. If no pass is
+  /// returned (the default) then the target-specific linker implementation will
+  /// choose a conservative default (usually marking all atoms live).
+  /// This function is only called if shouldAddDefaultTargetPasses returns true,
+  /// otherwise the JITContext is responsible for adding a mark-live pass in
+  /// modifyPassConfig.
+  virtual AtomGraphPassFunction getMarkLivePass(const Triple &TT) const;
+
+  /// Called by JITLink to modify the pass pipeline prior to linking.
+  /// The default version performs no modification.
+  virtual Error modifyPassConfig(const Triple &TT, PassConfiguration &Config);
+};
+
+/// Marks all atoms in a graph live. This can be used as a default, conservative
+/// mark-live implementation.
+Error markAllAtomsLive(AtomGraph &G);
+
+/// Basic JITLink implementation.
+///
+/// This function will use sensible defaults for GOT and Stub handling.
+void jitLink(std::unique_ptr<JITLinkContext> Ctx);
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_JITLINK_H
diff --git a/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
new file mode 100644
index 000000000000..9d0b37fe4a4d
--- /dev/null
+++ b/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
@@ -0,0 +1,99 @@
+//===-- JITLinkMemoryManager.h - JITLink mem manager interface --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains the JITLinkMemoryManager interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H
+#define LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/Memory.h"
+#include <cstdint>
+
+namespace llvm {
+namespace jitlink {
+
+/// Manages allocations of JIT memory.
+///
+/// Instances of this class may be accessed concurrently from multiple threads
+/// and their implemetations should include any necessary synchronization.
+class JITLinkMemoryManager {
+public:
+  using ProtectionFlags = sys::Memory::ProtectionFlags;
+
+  class SegmentRequest {
+  public:
+    SegmentRequest() = default;
+    SegmentRequest(size_t ContentSize, unsigned ContentAlign,
+                   uint64_t ZeroFillSize, unsigned ZeroFillAlign)
+        : ContentSize(ContentSize), ZeroFillSize(ZeroFillSize),
+          ContentAlign(ContentAlign), ZeroFillAlign(ZeroFillAlign) {}
+    size_t getContentSize() const { return ContentSize; }
+    unsigned getContentAlignment() const { return ContentAlign; }
+    uint64_t getZeroFillSize() const { return ZeroFillSize; }
+    unsigned getZeroFillAlignment() const { return ZeroFillAlign; }
+
+  private:
+    size_t ContentSize = 0;
+    uint64_t ZeroFillSize = 0;
+    unsigned ContentAlign = 0;
+    unsigned ZeroFillAlign = 0;
+  };
+
+  using SegmentsRequestMap = DenseMap<unsigned, SegmentRequest>;
+
+  /// Represents an allocation created by the memory manager.
+  ///
+  /// An allocation object is responsible for allocating and owning jit-linker
+  /// working and target memory, and for transfering from working to target
+  /// memory.
+  ///
+  class Allocation {
+  public:
+    using FinalizeContinuation = std::function<void(Error)>;
+
+    virtual ~Allocation();
+
+    /// Should return the address of linker working memory for the segment with
+    /// the given protection flags.
+    virtual MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) = 0;
+
+    /// Should return the final address in the target process where the segment
+    /// will reside.
+    virtual JITTargetAddress getTargetMemory(ProtectionFlags Seg) = 0;
+
+    /// Should transfer from working memory to target memory, and release
+    /// working memory.
+    virtual void finalizeAsync(FinalizeContinuation OnFinalize) = 0;
+
+    /// Should deallocate target memory.
+    virtual Error deallocate() = 0;
+  };
+
+  virtual ~JITLinkMemoryManager();
+
+  /// Create an Allocation object.
+  virtual Expected<std::unique_ptr<Allocation>>
+  allocate(const SegmentsRequestMap &Request) = 0;
+};
+
+/// A JITLinkMemoryManager that allocates in-process memory.
+class InProcessMemoryManager : public JITLinkMemoryManager {
+public:
+  Expected<std::unique_ptr<Allocation>>
+  allocate(const SegmentsRequestMap &Request) override;
+};
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_JITLINK_H
diff --git a/include/llvm/ExecutionEngine/JITLink/MachO.h b/include/llvm/ExecutionEngine/JITLink/MachO.h
new file mode 100644
index 000000000000..7facb657a51c
--- /dev/null
+++ b/include/llvm/ExecutionEngine/JITLink/MachO.h
@@ -0,0 +1,30 @@
+//===------- MachO.h - Generic JIT link function for MachO ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic jit-link functions for MachO.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_MACHO_H
+#define LLVM_EXECUTIONENGINE_JITLINK_MACHO_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+namespace llvm {
+namespace jitlink {
+
+/// jit-link the given ObjBuffer, which must be a MachO object file.
+///
+/// Uses conservative defaults for GOT and stub handling based on the target
+/// platform.
+void jitLink_MachO(std::unique_ptr<JITLinkContext> Ctx);
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_MACHO_H
diff --git a/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h b/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h
new file mode 100644
index 000000000000..1d5b586afc32
--- /dev/null
+++ b/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h
@@ -0,0 +1,63 @@
+//===--- MachO_x86_64.h - JIT link functions for MachO/x86-64 ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// jit-link functions for MachO/x86-64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_MACHO_X86_64_H
+#define LLVM_EXECUTIONENGINE_JITLINK_MACHO_X86_64_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+namespace llvm {
+namespace jitlink {
+
+namespace MachO_x86_64_Edges {
+
+enum MachOX86RelocationKind : Edge::Kind {
+  Branch32 = Edge::FirstRelocation,
+  Pointer64,
+  Pointer64Anon,
+  PCRel32,
+  PCRel32Minus1,
+  PCRel32Minus2,
+  PCRel32Minus4,
+  PCRel32Anon,
+  PCRel32Minus1Anon,
+  PCRel32Minus2Anon,
+  PCRel32Minus4Anon,
+  PCRel32GOTLoad,
+  PCRel32GOT,
+  PCRel32TLV,
+  Delta32,
+  Delta64,
+  NegDelta32,
+  NegDelta64,
+};
+
+} // namespace MachO_x86_64_Edges
+
+/// jit-link the given object buffer, which must be a MachO x86-64 object file.
+///
+/// If PrePrunePasses is empty then a default mark-live pass will be inserted
+/// that will mark all exported atoms live. If PrePrunePasses is not empty, the
+/// caller is responsible for including a pass to mark atoms as live.
+///
+/// If PostPrunePasses is empty then a default GOT-and-stubs insertion pass will
+/// be inserted. If PostPrunePasses is not empty then the caller is responsible
+/// for including a pass to insert GOT and stub edges.
+void jitLink_MachO_x86_64(std::unique_ptr<JITLinkContext> Ctx);
+
+/// Return the string name of the given MachO x86-64 edge kind.
+StringRef getMachOX86RelocationKindName(Edge::Kind R);
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_MACHO_X86_64_H
diff --git a/include/llvm/ExecutionEngine/JITSymbol.h b/include/llvm/ExecutionEngine/JITSymbol.h
index 05c9590726df..b14154c5b5e8 100644
--- a/include/llvm/ExecutionEngine/JITSymbol.h
+++ b/include/llvm/ExecutionEngine/JITSymbol.h
@@ -1,9 +1,8 @@
 //===- JITSymbol.h - JIT symbol abstraction ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -56,7 +55,7 @@ template <typename T> JITTargetAddress pointerToJITTargetAddress(T *Ptr) {
 class JITSymbolFlags {
 public:
   using UnderlyingType = uint8_t;
-  using TargetFlagsType = uint64_t;
+  using TargetFlagsType = uint8_t;
 
   enum FlagNames : UnderlyingType {
     None = 0,
@@ -66,15 +65,9 @@ public:
     Absolute = 1U << 3,
     Exported = 1U << 4,
     Callable = 1U << 5,
-    Lazy = 1U << 6,
-    Materializing = 1U << 7,
-    LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Materializing)
+    LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Callable)
   };
 
-  static JITSymbolFlags stripTransientFlags(JITSymbolFlags Orig) {
-    return static_cast<FlagNames>(Orig.Flags & ~Lazy & ~Materializing);
-  }
-
   /// Default-construct a JITSymbolFlags instance.
   JITSymbolFlags() = default;
 
@@ -84,7 +77,7 @@ public:
   /// Construct a JITSymbolFlags instance from the given flags and target
   ///        flags.
   JITSymbolFlags(FlagNames Flags, TargetFlagsType TargetFlags)
-    : Flags(Flags), TargetFlags(TargetFlags) {}
+      : TargetFlags(TargetFlags), Flags(Flags) {}
 
   /// Implicitly convert to bool. Returs true if any flag is set.
   explicit operator bool() const { return Flags != None || TargetFlags != 0; }
@@ -111,19 +104,6 @@ public:
     return (Flags & HasError) == HasError;
   }
 
-  /// Returns true if this is a lazy symbol.
-  ///        This flag is used internally by the JIT APIs to track
-  ///        materialization states.
-  bool isLazy() const { return Flags & Lazy; }
-
-  /// Returns true if this symbol is in the process of being
-  ///        materialized.
-  bool isMaterializing() const { return Flags & Materializing; }
-
-  /// Returns true if this symbol is fully materialized.
-  ///        (i.e. neither lazy, nor materializing).
-  bool isMaterialized() const { return !(Flags & (Lazy | Materializing)); }
-
   /// Returns true if the Weak flag is set.
   bool isWeak() const {
     return (Flags & Weak) == Weak;
@@ -168,8 +148,8 @@ public:
   fromObjectSymbol(const object::SymbolRef &Symbol);
 
 private:
-  FlagNames Flags = None;
   TargetFlagsType TargetFlags = 0;
+  FlagNames Flags = None;
 };
 
 inline JITSymbolFlags operator&(const JITSymbolFlags &LHS,
diff --git a/include/llvm/ExecutionEngine/MCJIT.h b/include/llvm/ExecutionEngine/MCJIT.h
index 66ddb7cdb875..8253bf98963b 100644
--- a/include/llvm/ExecutionEngine/MCJIT.h
+++ b/include/llvm/ExecutionEngine/MCJIT.h
@@ -1,9 +1,8 @@
 //===-- MCJIT.h - MC-Based Just-In-Time Execution Engine --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ExecutionEngine/OProfileWrapper.h b/include/llvm/ExecutionEngine/OProfileWrapper.h
index 05da594a94a8..b13d7f6e245b 100644
--- a/include/llvm/ExecutionEngine/OProfileWrapper.h
+++ b/include/llvm/ExecutionEngine/OProfileWrapper.h
@@ -1,9 +1,8 @@
 //===-- OProfileWrapper.h - OProfile JIT API Wrapper ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This file defines a OProfileWrapper object that detects if the oprofile
diff --git a/include/llvm/ExecutionEngine/ObjectCache.h b/include/llvm/ExecutionEngine/ObjectCache.h
index 077044408e09..47e94f18a1c7 100644
--- a/include/llvm/ExecutionEngine/ObjectCache.h
+++ b/include/llvm/ExecutionEngine/ObjectCache.h
@@ -1,9 +1,8 @@
 //===-- ObjectCache.h - Class definition for the ObjectCache ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
index 884878925cde..5f593a27cad6 100644
--- a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
@@ -1,9 +1,8 @@
 //===- CompileOnDemandLayer.h - Compile each function on demand -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -265,13 +264,26 @@ public:
       std::function<void(VModuleKey K, std::shared_ptr<SymbolResolver> R)>;
 
   /// Construct a compile-on-demand layer instance.
-  LegacyCompileOnDemandLayer(ExecutionSession &ES, BaseLayerT &BaseLayer,
-                             SymbolResolverGetter GetSymbolResolver,
-                             SymbolResolverSetter SetSymbolResolver,
-                             PartitioningFtor Partition,
-                             CompileCallbackMgrT &CallbackMgr,
-                             IndirectStubsManagerBuilderT CreateIndirectStubsManager,
-                             bool CloneStubsIntoPartitions = true)
+  LLVM_ATTRIBUTE_DEPRECATED(
+      LegacyCompileOnDemandLayer(
+          ExecutionSession &ES, BaseLayerT &BaseLayer,
+          SymbolResolverGetter GetSymbolResolver,
+          SymbolResolverSetter SetSymbolResolver, PartitioningFtor Partition,
+          CompileCallbackMgrT &CallbackMgr,
+          IndirectStubsManagerBuilderT CreateIndirectStubsManager,
+          bool CloneStubsIntoPartitions = true),
+      "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please "
+      "use "
+      "the ORCv2 LegacyCompileOnDemandLayer instead");
+
+  /// Legacy layer constructor with deprecation acknowledgement.
+  LegacyCompileOnDemandLayer(
+      ORCv1DeprecationAcknowledgement, ExecutionSession &ES,
+      BaseLayerT &BaseLayer, SymbolResolverGetter GetSymbolResolver,
+      SymbolResolverSetter SetSymbolResolver, PartitioningFtor Partition,
+      CompileCallbackMgrT &CallbackMgr,
+      IndirectStubsManagerBuilderT CreateIndirectStubsManager,
+      bool CloneStubsIntoPartitions = true)
       : ES(ES), BaseLayer(BaseLayer),
         GetSymbolResolver(std::move(GetSymbolResolver)),
         SetSymbolResolver(std::move(SetSymbolResolver)),
@@ -730,8 +742,24 @@ private:
   bool CloneStubsIntoPartitions;
 };
 
-} // end namespace orc
+template <typename BaseLayerT, typename CompileCallbackMgrT,
+          typename IndirectStubsMgrT>
+LegacyCompileOnDemandLayer<BaseLayerT, CompileCallbackMgrT, IndirectStubsMgrT>::
+    LegacyCompileOnDemandLayer(
+        ExecutionSession &ES, BaseLayerT &BaseLayer,
+        SymbolResolverGetter GetSymbolResolver,
+        SymbolResolverSetter SetSymbolResolver, PartitioningFtor Partition,
+        CompileCallbackMgrT &CallbackMgr,
+        IndirectStubsManagerBuilderT CreateIndirectStubsManager,
+        bool CloneStubsIntoPartitions)
+    : ES(ES), BaseLayer(BaseLayer),
+      GetSymbolResolver(std::move(GetSymbolResolver)),
+      SetSymbolResolver(std::move(SetSymbolResolver)),
+      Partition(std::move(Partition)), CompileCallbackMgr(CallbackMgr),
+      CreateIndirectStubsManager(std::move(CreateIndirectStubsManager)),
+      CloneStubsIntoPartitions(CloneStubsIntoPartitions) {}
 
+} // end namespace orc
 } // end namespace llvm
 
 #endif // LLVM_EXECUTIONENGINE_ORC_COMPILEONDEMANDLAYER_H
diff --git a/include/llvm/ExecutionEngine/Orc/CompileUtils.h b/include/llvm/ExecutionEngine/Orc/CompileUtils.h
index f34f88311ba5..eb6d84e8cbb4 100644
--- a/include/llvm/ExecutionEngine/Orc/CompileUtils.h
+++ b/include/llvm/ExecutionEngine/Orc/CompileUtils.h
@@ -1,9 +1,8 @@
 //===- CompileUtils.h - Utilities for compiling IR in the JIT ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,28 +13,21 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_COMPILEUTILS_H
 #define LLVM_EXECUTIONENGINE_ORC_COMPILEUTILS_H
 
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ExecutionEngine/ObjectCache.h"
 #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/Object/Binary.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SmallVectorMemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-#include <algorithm>
 #include <memory>
 
 namespace llvm {
 
 class MCContext;
+class MemoryBuffer;
 class Module;
+class ObjectCache;
+class TargetMachine;
 
 namespace orc {
 
+class JITTargetMachineBuilder;
+
 /// Simple compile functor: Takes a single IR module and returns an ObjectFile.
 /// This compiler supports a single compilation thread and LLVMContext only.
 /// For multithreaded compilation, use ConcurrentIRCompiler below.
@@ -51,56 +43,32 @@ public:
   void setObjectCache(ObjectCache *NewCache) { ObjCache = NewCache; }
 
   /// Compile a Module to an ObjectFile.
-  CompileResult operator()(Module &M) {
-    CompileResult CachedObject = tryToLoadFromObjectCache(M);
-    if (CachedObject)
-      return CachedObject;
-
-    SmallVector<char, 0> ObjBufferSV;
-
-    {
-      raw_svector_ostream ObjStream(ObjBufferSV);
-
-      legacy::PassManager PM;
-      MCContext *Ctx;
-      if (TM.addPassesToEmitMC(PM, Ctx, ObjStream))
-        llvm_unreachable("Target does not support MC emission.");
-      PM.run(M);
-    }
-
-    auto ObjBuffer =
-        llvm::make_unique<SmallVectorMemoryBuffer>(std::move(ObjBufferSV));
-    auto Obj =
-        object::ObjectFile::createObjectFile(ObjBuffer->getMemBufferRef());
-
-    if (Obj) {
-      notifyObjectCompiled(M, *ObjBuffer);
-      return std::move(ObjBuffer);
-    }
-
-    // TODO: Actually report errors helpfully.
-    consumeError(Obj.takeError());
-    return nullptr;
-  }
+  CompileResult operator()(Module &M);
 
 private:
-
-  CompileResult tryToLoadFromObjectCache(const Module &M) {
-    if (!ObjCache)
-      return CompileResult();
-
-    return ObjCache->getObject(&M);
-  }
-
-  void notifyObjectCompiled(const Module &M, const MemoryBuffer &ObjBuffer) {
-    if (ObjCache)
-      ObjCache->notifyObjectCompiled(&M, ObjBuffer.getMemBufferRef());
-  }
+  CompileResult tryToLoadFromObjectCache(const Module &M);
+  void notifyObjectCompiled(const Module &M, const MemoryBuffer &ObjBuffer);
 
   TargetMachine &TM;
   ObjectCache *ObjCache = nullptr;
 };
 
+/// A SimpleCompiler that owns its TargetMachine.
+///
+/// This convenient for clients who don't want to own their TargetMachines,
+/// e.g. LLJIT.
+class TMOwningSimpleCompiler : public SimpleCompiler {
+public:
+  TMOwningSimpleCompiler(std::unique_ptr<TargetMachine> TM,
+                         ObjectCache *ObjCache = nullptr)
+      : SimpleCompiler(*TM, ObjCache), TM(std::move(TM)) {}
+
+private:
+  // FIXME: shared because std::functions (and consequently
+  // IRCompileLayer::CompileFunction) are not moveable.
+  std::shared_ptr<llvm::TargetMachine> TM;
+};
+
 /// A thread-safe version of SimpleCompiler.
 ///
 /// This class creates a new TargetMachine and SimpleCompiler instance for each
@@ -108,16 +76,11 @@ private:
 class ConcurrentIRCompiler {
 public:
   ConcurrentIRCompiler(JITTargetMachineBuilder JTMB,
-                       ObjectCache *ObjCache = nullptr)
-      : JTMB(std::move(JTMB)), ObjCache(ObjCache) {}
+                       ObjectCache *ObjCache = nullptr);
 
   void setObjectCache(ObjectCache *ObjCache) { this->ObjCache = ObjCache; }
 
-  std::unique_ptr<MemoryBuffer> operator()(Module &M) {
-    auto TM = cantFail(JTMB.createTargetMachine());
-    SimpleCompiler C(*TM, ObjCache);
-    return C(M);
-  }
+  std::unique_ptr<MemoryBuffer> operator()(Module &M);
 
 private:
   JITTargetMachineBuilder JTMB;
diff --git a/include/llvm/ExecutionEngine/Orc/Core.h b/include/llvm/ExecutionEngine/Orc/Core.h
index 39d306e0bd4c..94a5618233e4 100644
--- a/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/include/llvm/ExecutionEngine/Orc/Core.h
@@ -1,9 +1,8 @@
 //===------ Core.h -- Core ORC APIs (Layer, JITDylib, etc.) -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,6 +16,7 @@
 #include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
+#include "llvm/ExecutionEngine/OrcV1Deprecation.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 
@@ -34,6 +34,7 @@ class ExecutionSession;
 class MaterializationUnit;
 class MaterializationResponsibility;
 class JITDylib;
+enum class SymbolState : uint8_t;
 
 /// VModuleKey provides a unique identifier (allocated and managed by
 /// ExecutionSessions) for a module added to the JIT.
@@ -57,6 +58,18 @@ using SymbolDependenceMap = DenseMap<JITDylib *, SymbolNameSet>;
 /// A list of (JITDylib*, bool) pairs.
 using JITDylibSearchList = std::vector<std::pair<JITDylib *, bool>>;
 
+struct SymbolAliasMapEntry {
+  SymbolAliasMapEntry() = default;
+  SymbolAliasMapEntry(SymbolStringPtr Aliasee, JITSymbolFlags AliasFlags)
+      : Aliasee(std::move(Aliasee)), AliasFlags(AliasFlags) {}
+
+  SymbolStringPtr Aliasee;
+  JITSymbolFlags AliasFlags;
+};
+
+/// A map of Symbols to (Symbol, Flags) pairs.
+using SymbolAliasMap = DenseMap<SymbolStringPtr, SymbolAliasMapEntry>;
+
 /// Render a SymbolStringPtr.
 raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPtr &Sym);
 
@@ -88,12 +101,15 @@ raw_ostream &operator<<(raw_ostream &OS, const MaterializationUnit &MU);
 /// Render a JITDylibSearchList.
 raw_ostream &operator<<(raw_ostream &OS, const JITDylibSearchList &JDs);
 
+/// Render a SymbolAliasMap.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolAliasMap &Aliases);
+
+/// Render a SymbolState.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolState &S);
+
 /// Callback to notify client that symbols have been resolved.
 using SymbolsResolvedCallback = std::function<void(Expected<SymbolMap>)>;
 
-/// Callback to notify client that symbols are ready for execution.
-using SymbolsReadyCallback = std::function<void(Error)>;
-
 /// Callback to register the dependencies for a given query.
 using RegisterDependenciesFunction =
     std::function<void(const SymbolDependenceMap &)>;
@@ -175,7 +191,7 @@ public:
   /// Note: The returned flags may have transient flags (Lazy, Materializing)
   /// set. These should be stripped with JITSymbolFlags::stripTransientFlags
   /// before using.
-  const SymbolFlagsMap &getSymbols() { return SymbolFlags; }
+  const SymbolFlagsMap &getSymbols() const { return SymbolFlags; }
 
   /// Returns the names of any symbols covered by this
   /// MaterializationResponsibility object that have queries pending. This
@@ -189,12 +205,12 @@ public:
   /// symbols must be ones covered by this MaterializationResponsibility
   /// instance. Individual calls to this method may resolve a subset of the
   /// symbols, but all symbols must have been resolved prior to calling emit.
-  void resolve(const SymbolMap &Symbols);
+  void notifyResolved(const SymbolMap &Symbols);
 
   /// Notifies the target JITDylib (and any pending queries on that JITDylib)
   /// that all symbols covered by this MaterializationResponsibility instance
   /// have been emitted.
-  void emit();
+  void notifyEmitted();
 
   /// Adds new symbols to the JITDylib and this responsibility instance.
   ///        JITDylib entries start out in the materializing state.
@@ -334,18 +350,6 @@ absoluteSymbols(SymbolMap Symbols, VModuleKey K = VModuleKey()) {
       std::move(Symbols), std::move(K));
 }
 
-struct SymbolAliasMapEntry {
-  SymbolAliasMapEntry() = default;
-  SymbolAliasMapEntry(SymbolStringPtr Aliasee, JITSymbolFlags AliasFlags)
-      : Aliasee(std::move(Aliasee)), AliasFlags(AliasFlags) {}
-
-  SymbolStringPtr Aliasee;
-  JITSymbolFlags AliasFlags;
-};
-
-/// A map of Symbols to (Symbol, Flags) pairs.
-using SymbolAliasMap = DenseMap<SymbolStringPtr, SymbolAliasMapEntry>;
-
 /// A materialization unit for symbol aliases. Allows existing symbols to be
 /// aliased with alternate flags.
 class ReExportsMaterializationUnit : public MaterializationUnit {
@@ -419,7 +423,7 @@ public:
   ReexportsGenerator(JITDylib &SourceJD, bool MatchNonExported = false,
                      SymbolPredicate Allow = SymbolPredicate());
 
-  SymbolNameSet operator()(JITDylib &JD, const SymbolNameSet &Names);
+  Expected<SymbolNameSet> operator()(JITDylib &JD, const SymbolNameSet &Names);
 
 private:
   JITDylib &SourceJD;
@@ -427,6 +431,15 @@ private:
   SymbolPredicate Allow;
 };
 
+/// Represents the state that a symbol has reached during materialization.
+enum class SymbolState : uint8_t {
+  Invalid,       /// No symbol should be in this state.
+  NeverSearched, /// Added to the symbol table, never queried.
+  Materializing, /// Queried, materialization begun.
+  Resolved,      /// Assigned address, still materializing.
+  Ready = 0x3f   /// Ready and safe for clients to access.
+};
+
 /// A symbol query that returns results via a callback when results are
 ///        ready.
 ///
@@ -437,38 +450,30 @@ class AsynchronousSymbolQuery {
   friend class JITSymbolResolverAdapter;
 
 public:
-
-  /// Create a query for the given symbols, notify-resolved and
-  ///        notify-ready callbacks.
+  /// Create a query for the given symbols. The NotifyComplete
+  /// callback will be called once all queried symbols reach the given
+  /// minimum state.
   AsynchronousSymbolQuery(const SymbolNameSet &Symbols,
-                          SymbolsResolvedCallback NotifySymbolsResolved,
-                          SymbolsReadyCallback NotifySymbolsReady);
+                          SymbolState RequiredState,
+                          SymbolsResolvedCallback NotifyComplete);
 
-  /// Set the resolved symbol information for the given symbol name.
-  void resolve(const SymbolStringPtr &Name, JITEvaluatedSymbol Sym);
+  /// Notify the query that a requested symbol has reached the required state.
+  void notifySymbolMetRequiredState(const SymbolStringPtr &Name,
+                                    JITEvaluatedSymbol Sym);
 
   /// Returns true if all symbols covered by this query have been
   ///        resolved.
-  bool isFullyResolved() const { return NotYetResolvedCount == 0; }
+  bool isComplete() const { return OutstandingSymbolsCount == 0; }
 
-  /// Call the NotifySymbolsResolved callback.
+  /// Call the NotifyComplete callback.
   ///
-  /// This should only be called if all symbols covered by the query have been
-  /// resolved.
-  void handleFullyResolved();
-
-  /// Notify the query that a requested symbol is ready for execution.
-  void notifySymbolReady();
-
-  /// Returns true if all symbols covered by this query are ready.
-  bool isFullyReady() const { return NotYetReadyCount == 0; }
-
-  /// Calls the NotifySymbolsReady callback.
-  ///
-  /// This should only be called if all symbols covered by this query are ready.
-  void handleFullyReady();
+  /// This should only be called if all symbols covered by the query have
+  /// reached the specified state.
+  void handleComplete();
 
 private:
+  SymbolState getRequiredState() { return RequiredState; }
+
   void addQueryDependence(JITDylib &JD, SymbolStringPtr Name);
 
   void removeQueryDependence(JITDylib &JD, const SymbolStringPtr &Name);
@@ -479,12 +484,11 @@ private:
 
   void detach();
 
-  SymbolsResolvedCallback NotifySymbolsResolved;
-  SymbolsReadyCallback NotifySymbolsReady;
+  SymbolsResolvedCallback NotifyComplete;
   SymbolDependenceMap QueryRegistrations;
   SymbolMap ResolvedSymbols;
-  size_t NotYetResolvedCount;
-  size_t NotYetReadyCount;
+  size_t OutstandingSymbolsCount;
+  SymbolState RequiredState;
 };
 
 /// A symbol table that supports asynchoronous symbol queries.
@@ -498,7 +502,7 @@ class JITDylib {
   friend class ExecutionSession;
   friend class MaterializationResponsibility;
 public:
-  using GeneratorFunction = std::function<SymbolNameSet(
+  using GeneratorFunction = std::function<Expected<SymbolNameSet>(
       JITDylib &Parent, const SymbolNameSet &Names)>;
 
   using AsynchronousSymbolQuerySet =
@@ -596,7 +600,7 @@ public:
 
   /// Search the given JITDylib for the symbols in Symbols. If found, store
   ///        the flags for each symbol in Flags. Returns any unresolved symbols.
-  SymbolFlagsMap lookupFlags(const SymbolNameSet &Names);
+  Expected<SymbolFlagsMap> lookupFlags(const SymbolNameSet &Names);
 
   /// Dump current JITDylib state to OS.
   void dump(raw_ostream &OS);
@@ -609,8 +613,8 @@ public:
   /// and the query will not be applied. The Query is not failed and can be
   /// re-used in a subsequent lookup once the symbols have been added, or
   /// manually failed.
-  SymbolNameSet legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
-                             SymbolNameSet Names);
+  Expected<SymbolNameSet>
+  legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q, SymbolNameSet Names);
 
 private:
   using AsynchronousSymbolQueryList =
@@ -627,40 +631,92 @@ private:
       DenseMap<SymbolStringPtr, std::shared_ptr<UnmaterializedInfo>>;
 
   struct MaterializingInfo {
-    AsynchronousSymbolQueryList PendingQueries;
     SymbolDependenceMap Dependants;
     SymbolDependenceMap UnemittedDependencies;
     bool IsEmitted = false;
+
+    void addQuery(std::shared_ptr<AsynchronousSymbolQuery> Q);
+    void removeQuery(const AsynchronousSymbolQuery &Q);
+    AsynchronousSymbolQueryList takeQueriesMeeting(SymbolState RequiredState);
+    AsynchronousSymbolQueryList takeAllQueries();
+    bool hasQueriesPending() const { return !PendingQueries.empty(); }
+    const AsynchronousSymbolQueryList &pendingQueries() const {
+      return PendingQueries;
+    }
+
+  private:
+    AsynchronousSymbolQueryList PendingQueries;
   };
 
   using MaterializingInfosMap = DenseMap<SymbolStringPtr, MaterializingInfo>;
 
-  using LookupImplActionFlags = enum {
-    None = 0,
-    NotifyFullyResolved = 1 << 0U,
-    NotifyFullyReady = 1 << 1U,
-    LLVM_MARK_AS_BITMASK_ENUM(NotifyFullyReady)
+  class SymbolTableEntry {
+  public:
+    SymbolTableEntry() = default;
+    SymbolTableEntry(JITSymbolFlags Flags)
+        : Flags(Flags), State(static_cast<uint8_t>(SymbolState::NeverSearched)),
+          MaterializerAttached(false), PendingRemoval(false) {}
+
+    JITTargetAddress getAddress() const { return Addr; }
+    JITSymbolFlags getFlags() const { return Flags; }
+    SymbolState getState() const { return static_cast<SymbolState>(State); }
+
+    bool isInMaterializationPhase() const {
+      return getState() == SymbolState::Materializing ||
+             getState() == SymbolState::Resolved;
+    }
+
+    bool hasMaterializerAttached() const { return MaterializerAttached; }
+    bool isPendingRemoval() const { return PendingRemoval; }
+
+    void setAddress(JITTargetAddress Addr) { this->Addr = Addr; }
+    void setFlags(JITSymbolFlags Flags) { this->Flags = Flags; }
+    void setState(SymbolState State) {
+      assert(static_cast<uint8_t>(State) < (1 << 6) &&
+             "State does not fit in bitfield");
+      this->State = static_cast<uint8_t>(State);
+    }
+
+    void setMaterializerAttached(bool MaterializerAttached) {
+      this->MaterializerAttached = MaterializerAttached;
+    }
+
+    void setPendingRemoval(bool PendingRemoval) {
+      this->PendingRemoval = PendingRemoval;
+    }
+
+    JITEvaluatedSymbol getSymbol() const {
+      return JITEvaluatedSymbol(Addr, Flags);
+    }
+
+  private:
+    JITTargetAddress Addr = 0;
+    JITSymbolFlags Flags;
+    uint8_t State : 6;
+    uint8_t MaterializerAttached : 1;
+    uint8_t PendingRemoval : 1;
   };
 
+  using SymbolTable = DenseMap<SymbolStringPtr, SymbolTableEntry>;
+
   JITDylib(ExecutionSession &ES, std::string Name);
 
   Error defineImpl(MaterializationUnit &MU);
 
-  SymbolNameSet lookupFlagsImpl(SymbolFlagsMap &Flags,
-                                const SymbolNameSet &Names);
+  Expected<SymbolNameSet> lookupFlagsImpl(SymbolFlagsMap &Flags,
+                                          const SymbolNameSet &Names);
 
-  void lodgeQuery(std::shared_ptr<AsynchronousSymbolQuery> &Q,
-                  SymbolNameSet &Unresolved, bool MatchNonExported,
-                  MaterializationUnitList &MUs);
+  Error lodgeQuery(std::shared_ptr<AsynchronousSymbolQuery> &Q,
+                   SymbolNameSet &Unresolved, bool MatchNonExported,
+                   MaterializationUnitList &MUs);
 
   void lodgeQueryImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
                       SymbolNameSet &Unresolved, bool MatchNonExported,
                       MaterializationUnitList &MUs);
 
-  LookupImplActionFlags
-  lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
-             std::vector<std::unique_ptr<MaterializationUnit>> &MUs,
-             SymbolNameSet &Unresolved);
+  bool lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
+                  std::vector<std::unique_ptr<MaterializationUnit>> &MUs,
+                  SymbolNameSet &Unresolved);
 
   void detachQueryHelper(AsynchronousSymbolQuery &Q,
                          const SymbolNameSet &QuerySymbols);
@@ -686,7 +742,7 @@ private:
 
   ExecutionSession &ES;
   std::string JITDylibName;
-  SymbolMap Symbols;
+  SymbolTable Symbols;
   UnmaterializedInfosMap UnmaterializedInfos;
   MaterializingInfosMap MaterializingInfos;
   GeneratorFunction DefGenerator;
@@ -727,7 +783,15 @@ public:
   /// the ExecutionSession.
   JITDylib &getMainJITDylib();
 
+  /// Return a pointer to the "name" JITDylib.
+  /// Ownership of JITDylib remains within Execution Session
+  JITDylib *getJITDylibByName(StringRef Name);
+
   /// Add a new JITDylib to this ExecutionSession.
+  ///
+  /// The JITDylib Name is required to be unique. Clients should verify that
+  /// names are not being re-used (e.g. by calling getJITDylibByName) if names
+  /// are based on user input.
   JITDylib &createJITDylib(std::string Name,
                            bool AddToMainDylibSearchOrder = true);
 
@@ -769,7 +833,7 @@ public:
   /// Do not use -- this will be removed soon.
   Expected<SymbolMap>
   legacyLookup(LegacyAsyncLookupFunction AsyncLookup, SymbolNameSet Names,
-               bool WaiUntilReady,
+               SymbolState RequiredState,
                RegisterDependenciesFunction RegisterDependencies);
 
   /// Search the given JITDylib list for the given symbols.
@@ -779,11 +843,8 @@ public:
   /// (hidden visibility) symbols in that dylib (true means match against
   /// non-exported symbols, false means do not match).
   ///
-  /// The OnResolve callback will be called once all requested symbols are
-  /// resolved, or if an error occurs prior to resolution.
-  ///
-  /// The OnReady callback will be called once all requested symbols are ready,
-  /// or if an error occurs after resolution but before all symbols are ready.
+  /// The NotifyComplete callback will be called once all requested symbols
+  /// reach the required state.
   ///
   /// If all symbols are found, the RegisterDependencies function will be called
   /// while the session lock is held. This gives clients a chance to register
@@ -795,7 +856,7 @@ public:
   /// client to get an address to call) then the value NoDependenciesToRegister
   /// can be used.
   void lookup(const JITDylibSearchList &SearchOrder, SymbolNameSet Symbols,
-              SymbolsResolvedCallback OnResolve, SymbolsReadyCallback OnReady,
+              SymbolState RequiredState, SymbolsResolvedCallback NotifyComplete,
               RegisterDependenciesFunction RegisterDependencies);
 
   /// Blocking version of lookup above. Returns the resolved symbol map.
@@ -807,9 +868,9 @@ public:
   /// error will be reported via reportErrors.
   Expected<SymbolMap> lookup(const JITDylibSearchList &SearchOrder,
                              const SymbolNameSet &Symbols,
+                             SymbolState RequiredState = SymbolState::Ready,
                              RegisterDependenciesFunction RegisterDependencies =
-                                 NoDependenciesToRegister,
-                             bool WaitUntilReady = true);
+                                 NoDependenciesToRegister);
 
   /// Convenience version of blocking lookup.
   /// Searches each of the JITDylibs in the search order in turn for the given
@@ -832,10 +893,11 @@ public:
   /// Materialize the given unit.
   void dispatchMaterialization(JITDylib &JD,
                                std::unique_ptr<MaterializationUnit> MU) {
-    LLVM_DEBUG(runSessionLocked([&]() {
-                 dbgs() << "Compiling, for " << JD.getName() << ", " << *MU
-                        << "\n";
-               }););
+    LLVM_DEBUG({
+      runSessionLocked([&]() {
+        dbgs() << "Dispatching " << *MU << " for " << JD.getName() << "\n";
+      });
+    });
     DispatchMaterialization(JD, std::move(MU));
   }
 
diff --git a/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
index 88559f822e5d..75865920c741 100644
--- a/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
+++ b/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
@@ -1,9 +1,8 @@
 //===- ExecutionUtils.h - Utilities for executing code in Orc ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -98,7 +97,14 @@ class LegacyCtorDtorRunner {
 public:
   /// Construct a CtorDtorRunner for the given range using the given
   ///        name mangling function.
-  LegacyCtorDtorRunner(std::vector<std::string> CtorDtorNames, VModuleKey K)
+  LLVM_ATTRIBUTE_DEPRECATED(
+      LegacyCtorDtorRunner(std::vector<std::string> CtorDtorNames,
+                           VModuleKey K),
+      "ORCv1 utilities (utilities with the 'Legacy' prefix) are deprecated. "
+      "Please use the ORCv2 CtorDtorRunner utility instead");
+
+  LegacyCtorDtorRunner(ORCv1DeprecationAcknowledgement,
+                       std::vector<std::string> CtorDtorNames, VModuleKey K)
       : CtorDtorNames(std::move(CtorDtorNames)), K(K) {}
 
   /// Run the recorded constructors/destructors through the given JIT
@@ -129,6 +135,11 @@ private:
   orc::VModuleKey K;
 };
 
+template <typename JITLayerT>
+LegacyCtorDtorRunner<JITLayerT>::LegacyCtorDtorRunner(
+    std::vector<std::string> CtorDtorNames, VModuleKey K)
+    : CtorDtorNames(std::move(CtorDtorNames)), K(K) {}
+
 class CtorDtorRunner {
 public:
   CtorDtorRunner(JITDylib &JD) : JD(JD) {}
@@ -181,7 +192,14 @@ class LegacyLocalCXXRuntimeOverrides : public LocalCXXRuntimeOverridesBase {
 public:
   /// Create a runtime-overrides class.
   template <typename MangleFtorT>
-  LegacyLocalCXXRuntimeOverrides(const MangleFtorT &Mangle) {
+  LLVM_ATTRIBUTE_DEPRECATED(
+      LegacyLocalCXXRuntimeOverrides(const MangleFtorT &Mangle),
+      "ORCv1 utilities (utilities with the 'Legacy' prefix) are deprecated. "
+      "Please use the ORCv2 LocalCXXRuntimeOverrides utility instead");
+
+  template <typename MangleFtorT>
+  LegacyLocalCXXRuntimeOverrides(ORCv1DeprecationAcknowledgement,
+                                 const MangleFtorT &Mangle) {
     addOverride(Mangle("__dso_handle"), toTargetAddress(&DSOHandleOverride));
     addOverride(Mangle("__cxa_atexit"), toTargetAddress(&CXAAtExitOverride));
   }
@@ -202,6 +220,13 @@ private:
   StringMap<JITTargetAddress> CXXRuntimeOverrides;
 };
 
+template <typename MangleFtorT>
+LegacyLocalCXXRuntimeOverrides::LegacyLocalCXXRuntimeOverrides(
+    const MangleFtorT &Mangle) {
+  addOverride(Mangle("__dso_handle"), toTargetAddress(&DSOHandleOverride));
+  addOverride(Mangle("__cxa_atexit"), toTargetAddress(&CXAAtExitOverride));
+}
+
 class LocalCXXRuntimeOverrides : public LocalCXXRuntimeOverridesBase {
 public:
   Error enable(JITDylib &JD, MangleAndInterner &Mangler);
@@ -218,28 +243,29 @@ public:
 
   /// Create a DynamicLibrarySearchGenerator that searches for symbols in the
   /// given sys::DynamicLibrary.
+  ///
   /// If the Allow predicate is given then only symbols matching the predicate
-  /// will be searched for in the DynamicLibrary. If the predicate is not given
-  /// then all symbols will be searched for.
-  DynamicLibrarySearchGenerator(sys::DynamicLibrary Dylib, const DataLayout &DL,
+  /// will be searched for. If the predicate is not given then all symbols will
+  /// be searched for.
+  DynamicLibrarySearchGenerator(sys::DynamicLibrary Dylib, char GlobalPrefix,
                                 SymbolPredicate Allow = SymbolPredicate());
 
   /// Permanently loads the library at the given path and, on success, returns
   /// a DynamicLibrarySearchGenerator that will search it for symbol definitions
   /// in the library. On failure returns the reason the library failed to load.
   static Expected<DynamicLibrarySearchGenerator>
-  Load(const char *FileName, const DataLayout &DL,
+  Load(const char *FileName, char GlobalPrefix,
        SymbolPredicate Allow = SymbolPredicate());
 
   /// Creates a DynamicLibrarySearchGenerator that searches for symbols in
   /// the current process.
   static Expected<DynamicLibrarySearchGenerator>
-  GetForCurrentProcess(const DataLayout &DL,
+  GetForCurrentProcess(char GlobalPrefix,
                        SymbolPredicate Allow = SymbolPredicate()) {
-    return Load(nullptr, DL, std::move(Allow));
+    return Load(nullptr, GlobalPrefix, std::move(Allow));
   }
 
-  SymbolNameSet operator()(JITDylib &JD, const SymbolNameSet &Names);
+  Expected<SymbolNameSet> operator()(JITDylib &JD, const SymbolNameSet &Names);
 
 private:
   sys::DynamicLibrary Dylib;
diff --git a/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h b/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h
index a8a88d7cb2d2..a4e43d4e1c9c 100644
--- a/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h
@@ -1,9 +1,8 @@
 //===- GlobalMappingLayer.h - Run all IR through a functor ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
index 30d71e69cd70..52223a83ad42 100644
--- a/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
@@ -1,9 +1,8 @@
 //===- IRCompileLayer.h -- Eagerly compile IR for JIT -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -64,8 +63,18 @@ public:
 
   /// Construct an LegacyIRCompileLayer with the given BaseLayer, which must
   ///        implement the ObjectLayer concept.
+  LLVM_ATTRIBUTE_DEPRECATED(
+      LegacyIRCompileLayer(
+          BaseLayerT &BaseLayer, CompileFtor Compile,
+          NotifyCompiledCallback NotifyCompiled = NotifyCompiledCallback()),
+      "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please "
+      "use "
+      "the ORCv2 IRCompileLayer instead");
+
+  /// Legacy layer constructor with deprecation acknowledgement.
   LegacyIRCompileLayer(
-      BaseLayerT &BaseLayer, CompileFtor Compile,
+      ORCv1DeprecationAcknowledgement, BaseLayerT &BaseLayer,
+      CompileFtor Compile,
       NotifyCompiledCallback NotifyCompiled = NotifyCompiledCallback())
       : BaseLayer(BaseLayer), Compile(std::move(Compile)),
         NotifyCompiled(std::move(NotifyCompiled)) {}
@@ -123,8 +132,14 @@ private:
   NotifyCompiledCallback NotifyCompiled;
 };
 
-} // end namespace orc
+template <typename BaseLayerT, typename CompileFtor>
+LegacyIRCompileLayer<BaseLayerT, CompileFtor>::LegacyIRCompileLayer(
+    BaseLayerT &BaseLayer, CompileFtor Compile,
+    NotifyCompiledCallback NotifyCompiled)
+    : BaseLayer(BaseLayer), Compile(std::move(Compile)),
+      NotifyCompiled(std::move(NotifyCompiled)) {}
 
+} // end namespace orc
 } // end namespace llvm
 
 #endif // LLVM_EXECUTIONENGINE_ORC_IRCOMPILINGLAYER_H
diff --git a/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h b/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
index 49e65b9f2a80..1b4c8b6cd95f 100644
--- a/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
@@ -1,9 +1,8 @@
 //===- IRTransformLayer.h - Run all IR through a functor --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -57,9 +56,17 @@ class LegacyIRTransformLayer {
 public:
 
   /// Construct an LegacyIRTransformLayer with the given BaseLayer
-  LegacyIRTransformLayer(BaseLayerT &BaseLayer,
-                   TransformFtor Transform = TransformFtor())
-    : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
+  LLVM_ATTRIBUTE_DEPRECATED(
+      LegacyIRTransformLayer(BaseLayerT &BaseLayer,
+                             TransformFtor Transform = TransformFtor()),
+      "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please "
+      "use "
+      "the ORCv2 IRTransformLayer instead");
+
+  /// Legacy layer constructor with deprecation acknowledgement.
+  LegacyIRTransformLayer(ORCv1DeprecationAcknowledgement, BaseLayerT &BaseLayer,
+                         TransformFtor Transform = TransformFtor())
+      : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
 
   /// Apply the transform functor to the module, then add the module to
   ///        the layer below, along with the memory manager and symbol resolver.
@@ -109,6 +116,11 @@ private:
   TransformFtor Transform;
 };
 
+template <typename BaseLayerT, typename TransformFtor>
+LegacyIRTransformLayer<BaseLayerT, TransformFtor>::LegacyIRTransformLayer(
+    BaseLayerT &BaseLayer, TransformFtor Transform)
+    : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
+
 } // end namespace orc
 } // end namespace llvm
 
diff --git a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
index c2527802f6a7..a7ed5372d1e4 100644
--- a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
+++ b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
@@ -1,9 +1,8 @@
 //===- IndirectionUtils.h - Utilities for adding indirections ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -147,13 +146,13 @@ private:
     std::error_code EC;
     auto TrampolineBlock =
         sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
-            sys::Process::getPageSize(), nullptr,
+            sys::Process::getPageSizeEstimate(), nullptr,
             sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
     if (EC)
       return errorCodeToError(EC);
 
     unsigned NumTrampolines =
-        (sys::Process::getPageSize() - ORCABI::PointerSize) /
+        (sys::Process::getPageSizeEstimate() - ORCABI::PointerSize) /
         ORCABI::TrampolineSize;
 
     uint8_t *TrampolineMem = static_cast<uint8_t *>(TrampolineBlock.base());
diff --git a/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h b/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h
index eb9b6bf2dea6..bcbd72e68f15 100644
--- a/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h
+++ b/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h
@@ -1,9 +1,8 @@
 //===- JITTargetMachineBuilder.h - Build TargetMachines for JIT -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ExecutionEngine/Orc/LLJIT.h b/include/llvm/ExecutionEngine/Orc/LLJIT.h
index ce3e5d519c73..0aac1916423f 100644
--- a/include/llvm/ExecutionEngine/Orc/LLJIT.h
+++ b/include/llvm/ExecutionEngine/Orc/LLJIT.h
@@ -1,9 +1,8 @@
 //===----- LLJIT.h -- An ORC-based JIT for compiling LLVM IR ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for 3Bdetails.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -21,35 +20,49 @@
 #include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
 #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
 #include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h"
-#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
 #include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h"
 #include "llvm/Support/ThreadPool.h"
 
 namespace llvm {
 namespace orc {
 
+class LLJITBuilderState;
+class LLLazyJITBuilderState;
+
 /// A pre-fabricated ORC JIT stack that can serve as an alternative to MCJIT.
+///
+/// Create instances using LLJITBuilder.
 class LLJIT {
+  template <typename, typename, typename> friend class LLJITBuilderSetters;
+
 public:
+  static Expected<std::unique_ptr<LLJIT>> Create(LLJITBuilderState &S);
 
   /// Destruct this instance. If a multi-threaded instance, waits for all
   /// compile threads to complete.
   ~LLJIT();
 
-  /// Create an LLJIT instance.
-  /// If NumCompileThreads is not equal to zero, creates a multi-threaded
-  /// LLJIT with the given number of compile threads.
-  static Expected<std::unique_ptr<LLJIT>>
-  Create(JITTargetMachineBuilder JTMB, DataLayout DL,
-         unsigned NumCompileThreads = 0);
-
   /// Returns the ExecutionSession for this instance.
   ExecutionSession &getExecutionSession() { return *ES; }
 
+  /// Returns a reference to the DataLayout for this instance.
+  const DataLayout &getDataLayout() const { return DL; }
+
   /// Returns a reference to the JITDylib representing the JIT'd main program.
   JITDylib &getMainJITDylib() { return Main; }
 
+  /// Returns the JITDylib with the given name, or nullptr if no JITDylib with
+  /// that name exists.
+  JITDylib *getJITDylibByName(StringRef Name) {
+    return ES->getJITDylibByName(Name);
+  }
+
   /// Create a new JITDylib with the given name and return a reference to it.
+  ///
+  /// JITDylib names must be unique. If the given name is derived from user
+  /// input or elsewhere in the environment then the client should check
+  /// (e.g. by calling getJITDylibByName) that the given name is not already in
+  /// use.
   JITDylib &createJITDylib(std::string Name) {
     return ES->createJITDylib(std::move(Name));
   }
@@ -57,8 +70,6 @@ public:
   /// Convenience method for defining an absolute symbol.
   Error defineAbsolute(StringRef Name, JITEvaluatedSymbol Address);
 
-  /// Convenience method for defining an
-
   /// Adds an IR module to the given JITDylib.
   Error addIRModule(JITDylib &JD, ThreadSafeModule TSM);
 
@@ -104,17 +115,17 @@ public:
   Error runDestructors() { return DtorRunner.run(); }
 
   /// Returns a reference to the ObjLinkingLayer
-  RTDyldObjectLinkingLayer &getObjLinkingLayer() { return ObjLinkingLayer; }
+  ObjectLayer &getObjLinkingLayer() { return *ObjLinkingLayer; }
 
 protected:
+  static std::unique_ptr<ObjectLayer>
+  createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES);
 
-  /// Create an LLJIT instance with a single compile thread.
-  LLJIT(std::unique_ptr<ExecutionSession> ES, std::unique_ptr<TargetMachine> TM,
-        DataLayout DL);
+  static Expected<IRCompileLayer::CompileFunction>
+  createCompileFunction(LLJITBuilderState &S, JITTargetMachineBuilder JTMB);
 
-  /// Create an LLJIT instance with multiple compile threads.
-  LLJIT(std::unique_ptr<ExecutionSession> ES, JITTargetMachineBuilder JTMB,
-        DataLayout DL, unsigned NumCompileThreads);
+  /// Create an LLJIT instance with a single compile thread.
+  LLJIT(LLJITBuilderState &S, Error &Err);
 
   std::string mangle(StringRef UnmangledName);
 
@@ -128,8 +139,8 @@ protected:
   DataLayout DL;
   std::unique_ptr<ThreadPool> CompileThreads;
 
-  RTDyldObjectLinkingLayer ObjLinkingLayer;
-  IRCompileLayer CompileLayer;
+  std::unique_ptr<ObjectLayer> ObjLinkingLayer;
+  std::unique_ptr<IRCompileLayer> CompileLayer;
 
   CtorDtorRunner CtorRunner, DtorRunner;
 };
@@ -137,25 +148,20 @@ protected:
 /// An extended version of LLJIT that supports lazy function-at-a-time
 /// compilation of LLVM IR.
 class LLLazyJIT : public LLJIT {
-public:
+  template <typename, typename, typename> friend class LLJITBuilderSetters;
 
-  /// Create an LLLazyJIT instance.
-  /// If NumCompileThreads is not equal to zero, creates a multi-threaded
-  /// LLLazyJIT with the given number of compile threads.
-  static Expected<std::unique_ptr<LLLazyJIT>>
-  Create(JITTargetMachineBuilder JTMB, DataLayout DL,
-         JITTargetAddress ErrorAddr, unsigned NumCompileThreads = 0);
+public:
 
   /// Set an IR transform (e.g. pass manager pipeline) to run on each function
   /// when it is compiled.
   void setLazyCompileTransform(IRTransformLayer::TransformFunction Transform) {
-    TransformLayer.setTransform(std::move(Transform));
+    TransformLayer->setTransform(std::move(Transform));
   }
 
   /// Sets the partition function.
   void
   setPartitionFunction(CompileOnDemandLayer::PartitionFunction Partition) {
-    CODLayer.setPartitionFunction(std::move(Partition));
+    CODLayer->setPartitionFunction(std::move(Partition));
   }
 
   /// Add a module to be lazily compiled to JITDylib JD.
@@ -169,24 +175,160 @@ public:
 private:
 
   // Create a single-threaded LLLazyJIT instance.
-  LLLazyJIT(std::unique_ptr<ExecutionSession> ES,
-            std::unique_ptr<TargetMachine> TM, DataLayout DL,
-            std::unique_ptr<LazyCallThroughManager> LCTMgr,
-            std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder);
+  LLLazyJIT(LLLazyJITBuilderState &S, Error &Err);
+
+  std::unique_ptr<LazyCallThroughManager> LCTMgr;
+  std::unique_ptr<IRTransformLayer> TransformLayer;
+  std::unique_ptr<CompileOnDemandLayer> CODLayer;
+};
+
+class LLJITBuilderState {
+public:
+  using ObjectLinkingLayerCreator =
+      std::function<std::unique_ptr<ObjectLayer>(ExecutionSession &)>;
+
+  using CompileFunctionCreator =
+      std::function<Expected<IRCompileLayer::CompileFunction>(
+          JITTargetMachineBuilder JTMB)>;
+
+  std::unique_ptr<ExecutionSession> ES;
+  Optional<JITTargetMachineBuilder> JTMB;
+  ObjectLinkingLayerCreator CreateObjectLinkingLayer;
+  CompileFunctionCreator CreateCompileFunction;
+  unsigned NumCompileThreads = 0;
 
-  // Create a multi-threaded LLLazyJIT instance.
-  LLLazyJIT(std::unique_ptr<ExecutionSession> ES, JITTargetMachineBuilder JTMB,
-            DataLayout DL, unsigned NumCompileThreads,
-            std::unique_ptr<LazyCallThroughManager> LCTMgr,
-            std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder);
+  /// Called prior to JIT class construcion to fix up defaults.
+  Error prepareForConstruction();
+};
+
+template <typename JITType, typename SetterImpl, typename State>
+class LLJITBuilderSetters {
+public:
+  /// Set the JITTargetMachineBuilder for this instance.
+  ///
+  /// If this method is not called, JITTargetMachineBuilder::detectHost will be
+  /// used to construct a default target machine builder for the host platform.
+  SetterImpl &setJITTargetMachineBuilder(JITTargetMachineBuilder JTMB) {
+    impl().JTMB = std::move(JTMB);
+    return impl();
+  }
+
+  /// Return a reference to the JITTargetMachineBuilder.
+  ///
+  Optional<JITTargetMachineBuilder> &getJITTargetMachineBuilder() {
+    return impl().JTMB;
+  }
 
+  /// Set an ObjectLinkingLayer creation function.
+  ///
+  /// If this method is not called, a default creation function will be used
+  /// that will construct an RTDyldObjectLinkingLayer.
+  SetterImpl &setObjectLinkingLayerCreator(
+      LLJITBuilderState::ObjectLinkingLayerCreator CreateObjectLinkingLayer) {
+    impl().CreateObjectLinkingLayer = std::move(CreateObjectLinkingLayer);
+    return impl();
+  }
+
+  /// Set a CompileFunctionCreator.
+  ///
+  /// If this method is not called, a default creation function wil be used
+  /// that will construct a basic IR compile function that is compatible with
+  /// the selected number of threads (SimpleCompiler for '0' compile threads,
+  /// ConcurrentIRCompiler otherwise).
+  SetterImpl &setCompileFunctionCreator(
+      LLJITBuilderState::CompileFunctionCreator CreateCompileFunction) {
+    impl().CreateCompileFunction = std::move(CreateCompileFunction);
+    return impl();
+  }
+
+  /// Set the number of compile threads to use.
+  ///
+  /// If set to zero, compilation will be performed on the execution thread when
+  /// JITing in-process. If set to any other number N, a thread pool of N
+  /// threads will be created for compilation.
+  ///
+  /// If this method is not called, behavior will be as if it were called with
+  /// a zero argument.
+  SetterImpl &setNumCompileThreads(unsigned NumCompileThreads) {
+    impl().NumCompileThreads = NumCompileThreads;
+    return impl();
+  }
+
+  /// Create an instance of the JIT.
+  Expected<std::unique_ptr<JITType>> create() {
+    if (auto Err = impl().prepareForConstruction())
+      return std::move(Err);
+
+    Error Err = Error::success();
+    std::unique_ptr<JITType> J(new JITType(impl(), Err));
+    if (Err)
+      return std::move(Err);
+    return std::move(J);
+  }
+
+protected:
+  SetterImpl &impl() { return static_cast<SetterImpl &>(*this); }
+};
+
+/// Constructs LLJIT instances.
+class LLJITBuilder
+    : public LLJITBuilderState,
+      public LLJITBuilderSetters<LLJIT, LLJITBuilder, LLJITBuilderState> {};
+
+class LLLazyJITBuilderState : public LLJITBuilderState {
+  friend class LLLazyJIT;
+
+public:
+  using IndirectStubsManagerBuilderFunction =
+      std::function<std::unique_ptr<IndirectStubsManager>()>;
+
+  Triple TT;
+  JITTargetAddress LazyCompileFailureAddr = 0;
   std::unique_ptr<LazyCallThroughManager> LCTMgr;
-  std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder;
+  IndirectStubsManagerBuilderFunction ISMBuilder;
+
+  Error prepareForConstruction();
+};
+
+template <typename JITType, typename SetterImpl, typename State>
+class LLLazyJITBuilderSetters
+    : public LLJITBuilderSetters<JITType, SetterImpl, State> {
+public:
+  /// Set the address in the target address to call if a lazy compile fails.
+  ///
+  /// If this method is not called then the value will default to 0.
+  SetterImpl &setLazyCompileFailureAddr(JITTargetAddress Addr) {
+    this->impl().LazyCompileFailureAddr = Addr;
+    return this->impl();
+  }
+
+  /// Set the lazy-callthrough manager.
+  ///
+  /// If this method is not called then a default, in-process lazy callthrough
+  /// manager for the host platform will be used.
+  SetterImpl &
+  setLazyCallthroughManager(std::unique_ptr<LazyCallThroughManager> LCTMgr) {
+    this->impl().LCTMgr = std::move(LCTMgr);
+    return this->impl();
+  }
 
-  IRTransformLayer TransformLayer;
-  CompileOnDemandLayer CODLayer;
+  /// Set the IndirectStubsManager builder function.
+  ///
+  /// If this method is not called then a default, in-process
+  /// IndirectStubsManager builder for the host platform will be used.
+  SetterImpl &setIndirectStubsManagerBuilder(
+      LLLazyJITBuilderState::IndirectStubsManagerBuilderFunction ISMBuilder) {
+    this->impl().ISMBuilder = std::move(ISMBuilder);
+    return this->impl();
+  }
 };
 
+/// Constructs LLLazyJIT instances.
+class LLLazyJITBuilder
+    : public LLLazyJITBuilderState,
+      public LLLazyJITBuilderSetters<LLLazyJIT, LLLazyJITBuilder,
+                                     LLLazyJITBuilderState> {};
+
 } // End namespace orc
 } // End namespace llvm
 
diff --git a/include/llvm/ExecutionEngine/Orc/LambdaResolver.h b/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
index 7b6f3d2f92ab..855e31b33549 100644
--- a/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
+++ b/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
@@ -1,9 +1,8 @@
 //===- LambdaResolverMM - Redirect symbol lookup via a functor --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -25,7 +24,15 @@ namespace orc {
 template <typename DylibLookupFtorT, typename ExternalLookupFtorT>
 class LambdaResolver : public LegacyJITSymbolResolver {
 public:
-  LambdaResolver(DylibLookupFtorT DylibLookupFtor,
+  LLVM_ATTRIBUTE_DEPRECATED(
+      LambdaResolver(DylibLookupFtorT DylibLookupFtor,
+                     ExternalLookupFtorT ExternalLookupFtor),
+      "ORCv1 utilities (including resolvers) are deprecated and will be "
+      "removed "
+      "in the next release. Please use ORCv2 (see docs/ORCv2.rst)");
+
+  LambdaResolver(ORCv1DeprecationAcknowledgement,
+                 DylibLookupFtorT DylibLookupFtor,
                  ExternalLookupFtorT ExternalLookupFtor)
       : DylibLookupFtor(DylibLookupFtor),
         ExternalLookupFtor(ExternalLookupFtor) {}
@@ -43,6 +50,12 @@ private:
   ExternalLookupFtorT ExternalLookupFtor;
 };
 
+template <typename DylibLookupFtorT, typename ExternalLookupFtorT>
+LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>::LambdaResolver(
+    DylibLookupFtorT DylibLookupFtor, ExternalLookupFtorT ExternalLookupFtor)
+    : DylibLookupFtor(DylibLookupFtor), ExternalLookupFtor(ExternalLookupFtor) {
+}
+
 template <typename DylibLookupFtorT,
           typename ExternalLookupFtorT>
 std::shared_ptr<LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>>
@@ -53,6 +66,17 @@ createLambdaResolver(DylibLookupFtorT DylibLookupFtor,
                          std::move(ExternalLookupFtor));
 }
 
+template <typename DylibLookupFtorT, typename ExternalLookupFtorT>
+std::shared_ptr<LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>>
+createLambdaResolver(ORCv1DeprecationAcknowledgement,
+                     DylibLookupFtorT DylibLookupFtor,
+                     ExternalLookupFtorT ExternalLookupFtor) {
+  using LR = LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>;
+  return make_unique<LR>(AcknowledgeORCv1Deprecation,
+                         std::move(DylibLookupFtor),
+                         std::move(ExternalLookupFtor));
+}
+
 } // end namespace orc
 } // end namespace llvm
 
diff --git a/include/llvm/ExecutionEngine/Orc/Layer.h b/include/llvm/ExecutionEngine/Orc/Layer.h
index cd797445a2e6..8f9bd704395e 100644
--- a/include/llvm/ExecutionEngine/Orc/Layer.h
+++ b/include/llvm/ExecutionEngine/Orc/Layer.h
@@ -1,9 +1,8 @@
 //===---------------- Layer.h -- Layer interfaces --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
index 46761b0ca7e1..16202d89f861 100644
--- a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
@@ -1,9 +1,8 @@
 //===- LazyEmittingLayer.h - Lazily emit IR to lower JIT layers -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -35,8 +34,8 @@ namespace orc {
 
 /// Lazy-emitting IR layer.
 ///
-///   This layer accepts LLVM IR Modules (via addModule), but does not
-/// immediately emit them the layer below. Instead, emissing to the base layer
+///   This layer accepts LLVM IR Modules (via addModule) but does not
+/// immediately emit them the layer below. Instead, emission to the base layer
 /// is deferred until the first time the client requests the address (via
 /// JITSymbol::getAddress) for a symbol contained in this layer.
 template <typename BaseLayerT> class LazyEmittingLayer {
@@ -197,7 +196,14 @@ private:
 public:
 
   /// Construct a lazy emitting layer.
-  LazyEmittingLayer(BaseLayerT &BaseLayer) : BaseLayer(BaseLayer) {}
+  LLVM_ATTRIBUTE_DEPRECATED(
+      LazyEmittingLayer(BaseLayerT &BaseLayer),
+      "ORCv1 layers (including LazyEmittingLayer) are deprecated. Please use "
+      "ORCv2, where lazy emission is the default");
+
+  /// Construct a lazy emitting layer.
+  LazyEmittingLayer(ORCv1DeprecationAcknowledgement, BaseLayerT &BaseLayer)
+      : BaseLayer(BaseLayer) {}
 
   /// Add the given module to the lazy emitting layer.
   Error addModule(VModuleKey K, std::unique_ptr<Module> M) {
@@ -255,6 +261,10 @@ public:
   }
 };
 
+template <typename BaseLayerT>
+LazyEmittingLayer<BaseLayerT>::LazyEmittingLayer(BaseLayerT &BaseLayer)
+    : BaseLayer(BaseLayer) {}
+
 } // end namespace orc
 } // end namespace llvm
 
diff --git a/include/llvm/ExecutionEngine/Orc/LazyReexports.h b/include/llvm/ExecutionEngine/Orc/LazyReexports.h
index b5041325bce2..9fdd1d15f782 100644
--- a/include/llvm/ExecutionEngine/Orc/LazyReexports.h
+++ b/include/llvm/ExecutionEngine/Orc/LazyReexports.h
@@ -1,9 +1,8 @@
 //===------ LazyReexports.h -- Utilities for lazy reexports -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ExecutionEngine/Orc/Legacy.h b/include/llvm/ExecutionEngine/Orc/Legacy.h
index 4c6162ac4b8b..f9cbbf6ff180 100644
--- a/include/llvm/ExecutionEngine/Orc/Legacy.h
+++ b/include/llvm/ExecutionEngine/Orc/Legacy.h
@@ -1,9 +1,8 @@
 //===--- Legacy.h -- Adapters for ExecutionEngine API interop ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -149,8 +148,8 @@ lookupWithLegacyFn(ExecutionSession &ES, AsynchronousSymbolQuery &Query,
   for (auto &S : Symbols) {
     if (JITSymbol Sym = FindSymbol(*S)) {
       if (auto Addr = Sym.getAddress()) {
-        Query.resolve(S, JITEvaluatedSymbol(*Addr, Sym.getFlags()));
-        Query.notifySymbolReady();
+        Query.notifySymbolMetRequiredState(
+            S, JITEvaluatedSymbol(*Addr, Sym.getFlags()));
         NewSymbolsResolved = true;
       } else {
         ES.legacyFailQuery(Query, Addr.takeError());
@@ -163,11 +162,8 @@ lookupWithLegacyFn(ExecutionSession &ES, AsynchronousSymbolQuery &Query,
       SymbolsNotFound.insert(S);
   }
 
-  if (NewSymbolsResolved && Query.isFullyResolved())
-    Query.handleFullyResolved();
-
-  if (NewSymbolsResolved && Query.isFullyReady())
-    Query.handleFullyReady();
+  if (NewSymbolsResolved && Query.isComplete())
+    Query.handleComplete();
 
   return SymbolsNotFound;
 }
diff --git a/include/llvm/ExecutionEngine/Orc/NullResolver.h b/include/llvm/ExecutionEngine/Orc/NullResolver.h
index 03fefb69a928..ffa37a13d064 100644
--- a/include/llvm/ExecutionEngine/Orc/NullResolver.h
+++ b/include/llvm/ExecutionEngine/Orc/NullResolver.h
@@ -1,9 +1,8 @@
 //===------ NullResolver.h - Reject symbol lookup requests ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
new file mode 100644
index 000000000000..c1e7d27f446e
--- /dev/null
+++ b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
@@ -0,0 +1,165 @@
+//===-- ObjectLinkingLayer.h - JITLink-based jit linking layer --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains the definition for an JITLink-based, in-process object linking
+// layer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_OBJECTLINKINGLAYER_H
+#define LLVM_EXECUTIONENGINE_ORC_OBJECTLINKINGLAYER_H
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/Layer.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cassert>
+#include <functional>
+#include <list>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+namespace jitlink {
+class EHFrameRegistrar;
+} // namespace jitlink
+
+namespace object {
+class ObjectFile;
+} // namespace object
+
+namespace orc {
+
+class ObjectLinkingLayerJITLinkContext;
+
+/// An ObjectLayer implementation built on JITLink.
+///
+/// Clients can use this class to add relocatable object files to an
+/// ExecutionSession, and it typically serves as the base layer (underneath
+/// a compiling layer like IRCompileLayer) for the rest of the JIT.
+class ObjectLinkingLayer : public ObjectLayer {
+  friend class ObjectLinkingLayerJITLinkContext;
+
+public:
+  /// Plugin instances can be added to the ObjectLinkingLayer to receive
+  /// callbacks when code is loaded or emitted, and when JITLink is being
+  /// configured.
+  class Plugin {
+  public:
+    virtual ~Plugin();
+    virtual void modifyPassConfig(MaterializationResponsibility &MR,
+                                  const Triple &TT,
+                                  jitlink::PassConfiguration &Config) {}
+    virtual void notifyLoaded(MaterializationResponsibility &MR) {}
+    virtual Error notifyEmitted(MaterializationResponsibility &MR) {
+      return Error::success();
+    }
+    virtual Error notifyRemovingModule(VModuleKey K) {
+      return Error::success();
+    }
+    virtual Error notifyRemovingAllModules() { return Error::success(); }
+  };
+
+  /// Construct an ObjectLinkingLayer with the given NotifyLoaded,
+  /// and NotifyEmitted functors.
+  ObjectLinkingLayer(ExecutionSession &ES,
+                     jitlink::JITLinkMemoryManager &MemMgr);
+
+  /// Destruct an ObjectLinkingLayer.
+  ~ObjectLinkingLayer();
+
+  /// Add a pass-config modifier.
+  ObjectLinkingLayer &addPlugin(std::unique_ptr<Plugin> P) {
+    std::lock_guard<std::mutex> Lock(LayerMutex);
+    Plugins.push_back(std::move(P));
+    return *this;
+  }
+
+  /// Emit the object.
+  void emit(MaterializationResponsibility R,
+            std::unique_ptr<MemoryBuffer> O) override;
+
+  /// Instructs this ObjectLinkingLayer instance to override the symbol flags
+  /// found in the AtomGraph with the flags supplied by the
+  /// MaterializationResponsibility instance. This is a workaround to support
+  /// symbol visibility in COFF, which does not use the libObject's
+  /// SF_Exported flag. Use only when generating / adding COFF object files.
+  ///
+  /// FIXME: We should be able to remove this if/when COFF properly tracks
+  /// exported symbols.
+  ObjectLinkingLayer &
+  setOverrideObjectFlagsWithResponsibilityFlags(bool OverrideObjectFlags) {
+    this->OverrideObjectFlags = OverrideObjectFlags;
+    return *this;
+  }
+
+  /// If set, this ObjectLinkingLayer instance will claim responsibility
+  /// for any symbols provided by a given object file that were not already in
+  /// the MaterializationResponsibility instance. Setting this flag allows
+  /// higher-level program representations (e.g. LLVM IR) to be added based on
+  /// only a subset of the symbols they provide, without having to write
+  /// intervening layers to scan and add the additional symbols. This trades
+  /// diagnostic quality for convenience however: If all symbols are enumerated
+  /// up-front then clashes can be detected and reported early (and usually
+  /// deterministically). If this option is set, clashes for the additional
+  /// symbols may not be detected until late, and detection may depend on
+  /// the flow of control through JIT'd code. Use with care.
+  ObjectLinkingLayer &
+  setAutoClaimResponsibilityForObjectSymbols(bool AutoClaimObjectSymbols) {
+    this->AutoClaimObjectSymbols = AutoClaimObjectSymbols;
+    return *this;
+  }
+
+private:
+  using AllocPtr = std::unique_ptr<jitlink::JITLinkMemoryManager::Allocation>;
+
+  void modifyPassConfig(MaterializationResponsibility &MR, const Triple &TT,
+                        jitlink::PassConfiguration &PassConfig);
+  void notifyLoaded(MaterializationResponsibility &MR);
+  Error notifyEmitted(MaterializationResponsibility &MR, AllocPtr Alloc);
+
+  Error removeModule(VModuleKey K);
+  Error removeAllModules();
+
+  mutable std::mutex LayerMutex;
+  jitlink::JITLinkMemoryManager &MemMgr;
+  bool OverrideObjectFlags = false;
+  bool AutoClaimObjectSymbols = false;
+  DenseMap<VModuleKey, AllocPtr> TrackedAllocs;
+  std::vector<AllocPtr> UntrackedAllocs;
+  std::vector<std::unique_ptr<Plugin>> Plugins;
+};
+
+class EHFrameRegistrationPlugin : public ObjectLinkingLayer::Plugin {
+public:
+  EHFrameRegistrationPlugin(jitlink::EHFrameRegistrar &Registrar);
+  Error notifyEmitted(MaterializationResponsibility &MR) override;
+  void modifyPassConfig(MaterializationResponsibility &MR, const Triple &TT,
+                        jitlink::PassConfiguration &PassConfig) override;
+  Error notifyRemovingModule(VModuleKey K) override;
+  Error notifyRemovingAllModules() override;
+
+private:
+  jitlink::EHFrameRegistrar &Registrar;
+  DenseMap<MaterializationResponsibility *, JITTargetAddress> InProcessLinks;
+  DenseMap<VModuleKey, JITTargetAddress> TrackedEHFrameAddrs;
+  std::vector<JITTargetAddress> UntrackedEHFrameAddrs;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_OBJECTLINKINGLAYER_H
diff --git a/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
index 44d6b490e19d..eac1cc3e097a 100644
--- a/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
@@ -1,9 +1,8 @@
 //===- ObjectTransformLayer.h - Run all objects through functor -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -49,7 +48,16 @@ template <typename BaseLayerT, typename TransformFtor>
 class LegacyObjectTransformLayer {
 public:
   /// Construct an ObjectTransformLayer with the given BaseLayer
-  LegacyObjectTransformLayer(BaseLayerT &BaseLayer,
+  LLVM_ATTRIBUTE_DEPRECATED(
+      LegacyObjectTransformLayer(BaseLayerT &BaseLayer,
+                                 TransformFtor Transform = TransformFtor()),
+      "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please "
+      "use "
+      "the ORCv2 ObjectTransformLayer instead");
+
+  /// Legacy layer constructor with deprecation acknowledgement.
+  LegacyObjectTransformLayer(ORCv1DeprecationAcknowledgement,
+                             BaseLayerT &BaseLayer,
                              TransformFtor Transform = TransformFtor())
       : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
 
@@ -108,6 +116,11 @@ private:
   TransformFtor Transform;
 };
 
+template <typename BaseLayerT, typename TransformFtor>
+LegacyObjectTransformLayer<BaseLayerT, TransformFtor>::
+    LegacyObjectTransformLayer(BaseLayerT &BaseLayer, TransformFtor Transform)
+    : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
+
 } // end namespace orc
 } // end namespace llvm
 
diff --git a/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
index a70fc373713d..38246bc480b6 100644
--- a/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
+++ b/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
@@ -1,9 +1,8 @@
 //===- OrcABISupport.h - ABI support code -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ExecutionEngine/Orc/OrcError.h b/include/llvm/ExecutionEngine/Orc/OrcError.h
index dc60e8d74e97..e5d6a3eca85f 100644
--- a/include/llvm/ExecutionEngine/Orc/OrcError.h
+++ b/include/llvm/ExecutionEngine/Orc/OrcError.h
@@ -1,9 +1,8 @@
 //===------ OrcError.h - Reject symbol lookup requests ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
index 3e07f5cf3742..8b875b7906e1 100644
--- a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
+++ b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
@@ -1,9 +1,8 @@
 //===- OrcRemoteTargetClient.h - Orc Remote-target Client -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
index 8db9e317a18a..e7b598d8f812 100644
--- a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
+++ b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
@@ -1,9 +1,8 @@
 //===- OrcRemoteTargetRPCAPI.h - Orc Remote-target RPC API ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
index acbc1682fa5d..4c8e2ea1a7be 100644
--- a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
+++ b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
@@ -1,9 +1,8 @@
 //===- OrcRemoteTargetServer.h - Orc Remote-target Server -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -300,13 +299,13 @@ private:
     std::error_code EC;
     auto TrampolineBlock =
         sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
-            sys::Process::getPageSize(), nullptr,
+            sys::Process::getPageSizeEstimate(), nullptr,
             sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
     if (EC)
       return errorCodeToError(EC);
 
     uint32_t NumTrampolines =
-        (sys::Process::getPageSize() - TargetT::PointerSize) /
+        (sys::Process::getPageSizeEstimate() - TargetT::PointerSize) /
         TargetT::TrampolineSize;
 
     uint8_t *TrampolineMem = static_cast<uint8_t *>(TrampolineBlock.base());
@@ -336,7 +335,7 @@ private:
   handleGetRemoteInfo() {
     std::string ProcessTriple = sys::getProcessTriple();
     uint32_t PointerSize = TargetT::PointerSize;
-    uint32_t PageSize = sys::Process::getPageSize();
+    uint32_t PageSize = sys::Process::getPageSizeEstimate();
     uint32_t TrampolineSize = TargetT::TrampolineSize;
     uint32_t IndirectStubSize = TargetT::IndirectStubsInfo::StubSize;
     LLVM_DEBUG(dbgs() << "  Remote info:\n"
diff --git a/include/llvm/ExecutionEngine/Orc/RPCSerialization.h b/include/llvm/ExecutionEngine/Orc/RPCSerialization.h
index 1e5f6ced597a..07c7471afc6a 100644
--- a/include/llvm/ExecutionEngine/Orc/RPCSerialization.h
+++ b/include/llvm/ExecutionEngine/Orc/RPCSerialization.h
@@ -1,9 +1,8 @@
 //===- llvm/ExecutionEngine/Orc/RPCSerialization.h --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -128,123 +127,85 @@ template <typename T>
 class RPCTypeName<Expected<T>> {
 public:
   static const char* getName() {
-    std::lock_guard<std::mutex> Lock(NameMutex);
-    if (Name.empty())
+    static std::string Name = [] {
+      std::string Name;
       raw_string_ostream(Name) << "Expected<"
                                << RPCTypeNameSequence<T>()
                                << ">";
+      return Name;
+    }();
     return Name.data();
   }
-
-private:
-  static std::mutex NameMutex;
-  static std::string Name;
 };
 
-template <typename T>
-std::mutex RPCTypeName<Expected<T>>::NameMutex;
-
-template <typename T>
-std::string RPCTypeName<Expected<T>>::Name;
-
 template <typename T1, typename T2>
 class RPCTypeName<std::pair<T1, T2>> {
 public:
   static const char* getName() {
-    std::lock_guard<std::mutex> Lock(NameMutex);
-    if (Name.empty())
+    static std::string Name = [] {
+      std::string Name;
       raw_string_ostream(Name) << "std::pair<" << RPCTypeNameSequence<T1, T2>()
                                << ">";
+      return Name;
+    }();
     return Name.data();
   }
-private:
-  static std::mutex NameMutex;
-  static std::string Name;
 };
 
-template <typename T1, typename T2>
-std::mutex RPCTypeName<std::pair<T1, T2>>::NameMutex;
-template <typename T1, typename T2>
-std::string RPCTypeName<std::pair<T1, T2>>::Name;
-
 template <typename... ArgTs>
 class RPCTypeName<std::tuple<ArgTs...>> {
 public:
   static const char* getName() {
-    std::lock_guard<std::mutex> Lock(NameMutex);
-    if (Name.empty())
+    static std::string Name = [] {
+      std::string Name;
       raw_string_ostream(Name) << "std::tuple<"
                                << RPCTypeNameSequence<ArgTs...>() << ">";
+      return Name;
+    }();
     return Name.data();
   }
-private:
-  static std::mutex NameMutex;
-  static std::string Name;
 };
 
-template <typename... ArgTs>
-std::mutex RPCTypeName<std::tuple<ArgTs...>>::NameMutex;
-template <typename... ArgTs>
-std::string RPCTypeName<std::tuple<ArgTs...>>::Name;
-
 template <typename T>
 class RPCTypeName<std::vector<T>> {
 public:
   static const char*getName() {
-    std::lock_guard<std::mutex> Lock(NameMutex);
-    if (Name.empty())
+    static std::string Name = [] {
+      std::string Name;
       raw_string_ostream(Name) << "std::vector<" << RPCTypeName<T>::getName()
                                << ">";
+      return Name;
+    }();
     return Name.data();
   }
-
-private:
-  static std::mutex NameMutex;
-  static std::string Name;
 };
 
-template <typename T>
-std::mutex RPCTypeName<std::vector<T>>::NameMutex;
-template <typename T>
-std::string RPCTypeName<std::vector<T>>::Name;
-
 template <typename T> class RPCTypeName<std::set<T>> {
 public:
   static const char *getName() {
-    std::lock_guard<std::mutex> Lock(NameMutex);
-    if (Name.empty())
+    static std::string Name = [] {
+      std::string Name;
       raw_string_ostream(Name)
           << "std::set<" << RPCTypeName<T>::getName() << ">";
+      return Name;
+    }();
     return Name.data();
   }
-
-private:
-  static std::mutex NameMutex;
-  static std::string Name;
 };
 
-template <typename T> std::mutex RPCTypeName<std::set<T>>::NameMutex;
-template <typename T> std::string RPCTypeName<std::set<T>>::Name;
-
 template <typename K, typename V> class RPCTypeName<std::map<K, V>> {
 public:
   static const char *getName() {
-    std::lock_guard<std::mutex> Lock(NameMutex);
-    if (Name.empty())
+    static std::string Name = [] {
+      std::string Name;
       raw_string_ostream(Name)
           << "std::map<" << RPCTypeNameSequence<K, V>() << ">";
+      return Name;
+    }();
     return Name.data();
   }
-
-private:
-  static std::mutex NameMutex;
-  static std::string Name;
 };
 
-template <typename K, typename V>
-std::mutex RPCTypeName<std::map<K, V>>::NameMutex;
-template <typename K, typename V> std::string RPCTypeName<std::map<K, V>>::Name;
-
 /// The SerializationTraits<ChannelT, T> class describes how to serialize and
 /// deserialize an instance of type T to/from an abstract channel of type
 /// ChannelT. It also provides a representation of the type's name via the
diff --git a/include/llvm/ExecutionEngine/Orc/RPCUtils.h b/include/llvm/ExecutionEngine/Orc/RPCUtils.h
index 953b73e10e43..3b11e1b283de 100644
--- a/include/llvm/ExecutionEngine/Orc/RPCUtils.h
+++ b/include/llvm/ExecutionEngine/Orc/RPCUtils.h
@@ -1,9 +1,8 @@
-//===------- RPCUTils.h - Utilities for building RPC APIs -------*- C++ -*-===//
+//===- RPCUtils.h - Utilities for building RPC APIs -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -152,25 +151,17 @@ public:
 
   /// Returns the full function prototype as a string.
   static const char *getPrototype() {
-    std::lock_guard<std::mutex> Lock(NameMutex);
-    if (Name.empty())
+    static std::string Name = [] {
+      std::string Name;
       raw_string_ostream(Name)
           << RPCTypeName<RetT>::getName() << " " << DerivedFunc::getName()
           << "(" << llvm::orc::rpc::RPCTypeNameSequence<ArgTs...>() << ")";
+      return Name;
+    }();
     return Name.data();
   }
-
-private:
-  static std::mutex NameMutex;
-  static std::string Name;
 };
 
-template <typename DerivedFunc, typename RetT, typename... ArgTs>
-std::mutex Function<DerivedFunc, RetT(ArgTs...)>::NameMutex;
-
-template <typename DerivedFunc, typename RetT, typename... ArgTs>
-std::string Function<DerivedFunc, RetT(ArgTs...)>::Name;
-
 /// Allocates RPC function ids during autonegotiation.
 /// Specializations of this class must provide four members:
 ///
diff --git a/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
index 6f90f0380d95..d9535ce5f21f 100644
--- a/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
@@ -1,9 +1,8 @@
 //===- RTDyldObjectLinkingLayer.h - RTDyld-based jit linking  ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -44,22 +43,34 @@ public:
                          const RuntimeDyld::LoadedObjectInfo &)>;
 
   /// Functor for receiving finalization notifications.
-  using NotifyEmittedFunction = std::function<void(VModuleKey)>;
+  using NotifyEmittedFunction =
+      std::function<void(VModuleKey, std::unique_ptr<MemoryBuffer>)>;
 
   using GetMemoryManagerFunction =
       std::function<std::unique_ptr<RuntimeDyld::MemoryManager>()>;
 
   /// Construct an ObjectLinkingLayer with the given NotifyLoaded,
   ///        and NotifyEmitted functors.
-  RTDyldObjectLinkingLayer(
-      ExecutionSession &ES, GetMemoryManagerFunction GetMemoryManager,
-      NotifyLoadedFunction NotifyLoaded = NotifyLoadedFunction(),
-      NotifyEmittedFunction NotifyEmitted = NotifyEmittedFunction());
+  RTDyldObjectLinkingLayer(ExecutionSession &ES,
+                           GetMemoryManagerFunction GetMemoryManager);
 
   /// Emit the object.
   void emit(MaterializationResponsibility R,
             std::unique_ptr<MemoryBuffer> O) override;
 
+  /// Set the NotifyLoaded callback.
+  RTDyldObjectLinkingLayer &setNotifyLoaded(NotifyLoadedFunction NotifyLoaded) {
+    this->NotifyLoaded = std::move(NotifyLoaded);
+    return *this;
+  }
+
+  /// Set the NotifyEmitted callback.
+  RTDyldObjectLinkingLayer &
+  setNotifyEmitted(NotifyEmittedFunction NotifyEmitted) {
+    this->NotifyEmitted = std::move(NotifyEmitted);
+    return *this;
+  }
+
   /// Set the 'ProcessAllSections' flag.
   ///
   /// If set to true, all sections in each object file will be allocated using
@@ -109,7 +120,8 @@ private:
                   std::map<StringRef, JITEvaluatedSymbol> Resolved,
                   std::set<StringRef> &InternalSymbols);
 
-  void onObjEmit(VModuleKey K, MaterializationResponsibility &R, Error Err);
+  void onObjEmit(VModuleKey K, std::unique_ptr<MemoryBuffer> ObjBuffer,
+                 MaterializationResponsibility &R, Error Err);
 
   mutable std::mutex RTDyldLayerMutex;
   GetMemoryManagerFunction GetMemoryManager;
@@ -341,17 +353,27 @@ public:
 
   /// Construct an ObjectLinkingLayer with the given NotifyLoaded,
   ///        and NotifyFinalized functors.
+  LLVM_ATTRIBUTE_DEPRECATED(
+      LegacyRTDyldObjectLinkingLayer(
+          ExecutionSession &ES, ResourcesGetter GetResources,
+          NotifyLoadedFtor NotifyLoaded = NotifyLoadedFtor(),
+          NotifyFinalizedFtor NotifyFinalized = NotifyFinalizedFtor(),
+          NotifyFreedFtor NotifyFreed = NotifyFreedFtor()),
+      "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please "
+      "use "
+      "ORCv2 (see docs/ORCv2.rst)");
+
+  // Legacy layer constructor with deprecation acknowledgement.
   LegacyRTDyldObjectLinkingLayer(
-      ExecutionSession &ES, ResourcesGetter GetResources,
+      ORCv1DeprecationAcknowledgement, ExecutionSession &ES,
+      ResourcesGetter GetResources,
       NotifyLoadedFtor NotifyLoaded = NotifyLoadedFtor(),
       NotifyFinalizedFtor NotifyFinalized = NotifyFinalizedFtor(),
       NotifyFreedFtor NotifyFreed = NotifyFreedFtor())
       : ES(ES), GetResources(std::move(GetResources)),
         NotifyLoaded(std::move(NotifyLoaded)),
         NotifyFinalized(std::move(NotifyFinalized)),
-        NotifyFreed(std::move(NotifyFreed)),
-        ProcessAllSections(false) {
-  }
+        NotifyFreed(std::move(NotifyFreed)), ProcessAllSections(false) {}
 
   /// Set the 'ProcessAllSections' flag.
   ///
diff --git a/include/llvm/ExecutionEngine/Orc/RawByteChannel.h b/include/llvm/ExecutionEngine/Orc/RawByteChannel.h
index db810f4ef2e5..46b7c59450e6 100644
--- a/include/llvm/ExecutionEngine/Orc/RawByteChannel.h
+++ b/include/llvm/ExecutionEngine/Orc/RawByteChannel.h
@@ -1,9 +1,8 @@
 //===- llvm/ExecutionEngine/Orc/RawByteChannel.h ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h b/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h
index 955e77607a18..b87cf697a81e 100644
--- a/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h
@@ -1,9 +1,8 @@
 //===------ RemoteObjectLayer.h - Forwards objs to a remote -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,9 +13,10 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_REMOTEOBJECTLAYER_H
 #define LLVM_EXECUTIONENGINE_ORC_REMOTEOBJECTLAYER_H
 
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
 #include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
 #include "llvm/Object/ObjectFile.h"
-#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
 #include <map>
 
 namespace llvm {
@@ -313,7 +313,14 @@ public:
   ///
   /// The ReportError functor can be used locally log errors that are intended
   /// to be sent  sent
-  RemoteObjectClientLayer(RPCEndpoint &Remote,
+  LLVM_ATTRIBUTE_DEPRECATED(
+      RemoteObjectClientLayer(RPCEndpoint &Remote,
+                              std::function<void(Error)> ReportError),
+      "ORCv1 layers (including RemoteObjectClientLayer) are deprecated. Please "
+      "use "
+      "ORCv2 (see docs/ORCv2.rst)");
+
+  RemoteObjectClientLayer(ORCv1DeprecationAcknowledgement, RPCEndpoint &Remote,
                           std::function<void(Error)> ReportError)
       : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)) {
     using ThisT = RemoteObjectClientLayer<RPCEndpoint>;
@@ -418,11 +425,18 @@ public:
 
   /// Create a RemoteObjectServerLayer with the given base layer (which must be
   /// an object layer), RPC endpoint, and error reporter function.
-  RemoteObjectServerLayer(BaseLayerT &BaseLayer,
-                          RPCEndpoint &Remote,
+  LLVM_ATTRIBUTE_DEPRECATED(
+      RemoteObjectServerLayer(BaseLayerT &BaseLayer, RPCEndpoint &Remote,
+                              std::function<void(Error)> ReportError),
+      "ORCv1 layers (including RemoteObjectServerLayer) are deprecated. Please "
+      "use "
+      "ORCv2 (see docs/ORCv2.rst)");
+
+  RemoteObjectServerLayer(ORCv1DeprecationAcknowledgement,
+                          BaseLayerT &BaseLayer, RPCEndpoint &Remote,
                           std::function<void(Error)> ReportError)
-    : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)),
-      BaseLayer(BaseLayer), HandleIdMgr(1) {
+      : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)),
+        BaseLayer(BaseLayer), HandleIdMgr(1) {
     using ThisT = RemoteObjectServerLayer<BaseLayerT, RPCEndpoint>;
 
     Remote.template addHandler<AddObject>(*this, &ThisT::addObject);
@@ -463,6 +477,7 @@ private:
     assert(!BaseLayerHandles.count(Id) && "Id already in use?");
 
     auto Resolver = createLambdaResolver(
+        AcknowledgeORCv1Deprecation,
         [this, Id](const std::string &Name) { return lookup(Id, Name); },
         [this, Id](const std::string &Name) {
           return lookupInLogicalDylib(Id, Name);
@@ -523,6 +538,31 @@ private:
   std::map<ObjHandleT, typename BaseLayerT::ObjHandleT> BaseLayerHandles;
 };
 
+template <typename RPCEndpoint>
+RemoteObjectClientLayer<RPCEndpoint>::RemoteObjectClientLayer(
+    RPCEndpoint &Remote, std::function<void(Error)> ReportError)
+    : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)) {
+  using ThisT = RemoteObjectClientLayer<RPCEndpoint>;
+  Remote.template addHandler<Lookup>(*this, &ThisT::lookup);
+  Remote.template addHandler<LookupInLogicalDylib>(
+      *this, &ThisT::lookupInLogicalDylib);
+}
+
+template <typename BaseLayerT, typename RPCEndpoint>
+RemoteObjectServerLayer<BaseLayerT, RPCEndpoint>::RemoteObjectServerLayer(
+    BaseLayerT &BaseLayer, RPCEndpoint &Remote,
+    std::function<void(Error)> ReportError)
+    : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)),
+      BaseLayer(BaseLayer), HandleIdMgr(1) {
+  using ThisT = RemoteObjectServerLayer<BaseLayerT, RPCEndpoint>;
+
+  Remote.template addHandler<AddObject>(*this, &ThisT::addObject);
+  Remote.template addHandler<RemoveObject>(*this, &ThisT::removeObject);
+  Remote.template addHandler<FindSymbol>(*this, &ThisT::findSymbol);
+  Remote.template addHandler<FindSymbolIn>(*this, &ThisT::findSymbolIn);
+  Remote.template addHandler<EmitAndFinalize>(*this, &ThisT::emitAndFinalize);
+}
+
 } // end namespace orc
 } // end namespace llvm
 
diff --git a/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h b/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h
index 717076e25609..c354f6c3559c 100644
--- a/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h
+++ b/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h
@@ -1,9 +1,8 @@
 //===- SymbolStringPool.h - Multi-threaded pool for JIT symbols -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -51,25 +50,20 @@ private:
 class SymbolStringPtr {
   friend class SymbolStringPool;
   friend struct DenseMapInfo<SymbolStringPtr>;
-  friend bool operator==(const SymbolStringPtr &LHS,
-                         const SymbolStringPtr &RHS);
-  friend bool operator<(const SymbolStringPtr &LHS, const SymbolStringPtr &RHS);
-
-  static SymbolStringPool::PoolMapEntry Tombstone;
 
 public:
   SymbolStringPtr() = default;
   SymbolStringPtr(const SymbolStringPtr &Other)
     : S(Other.S) {
-    if (S)
+    if (isRealPoolEntry(S))
       ++S->getValue();
   }
 
   SymbolStringPtr& operator=(const SymbolStringPtr &Other) {
-    if (S)
+    if (isRealPoolEntry(S))
       --S->getValue();
     S = Other.S;
-    if (S)
+    if (isRealPoolEntry(S))
       ++S->getValue();
     return *this;
   }
@@ -79,7 +73,7 @@ public:
   }
 
   SymbolStringPtr& operator=(SymbolStringPtr &&Other) {
-    if (S)
+    if (isRealPoolEntry(S))
       --S->getValue();
     S = nullptr;
     std::swap(S, Other.S);
@@ -87,34 +81,64 @@ public:
   }
 
   ~SymbolStringPtr() {
-    if (S)
+    if (isRealPoolEntry(S))
       --S->getValue();
   }
 
   StringRef operator*() const { return S->first(); }
 
+  friend bool operator==(const SymbolStringPtr &LHS,
+                         const SymbolStringPtr &RHS) {
+    return LHS.S == RHS.S;
+  }
+
+  friend bool operator!=(const SymbolStringPtr &LHS,
+                         const SymbolStringPtr &RHS) {
+    return !(LHS == RHS);
+  }
+
+  friend bool operator<(const SymbolStringPtr &LHS,
+                        const SymbolStringPtr &RHS) {
+    return LHS.S < RHS.S;
+  }
+
 private:
+  using PoolEntryPtr = SymbolStringPool::PoolMapEntry *;
 
   SymbolStringPtr(SymbolStringPool::PoolMapEntry *S)
       : S(S) {
-    if (S)
+    if (isRealPoolEntry(S))
       ++S->getValue();
   }
 
-  SymbolStringPool::PoolMapEntry *S = nullptr;
-};
+  // Returns false for null, empty, and tombstone values, true otherwise.
+  bool isRealPoolEntry(PoolEntryPtr P) {
+    return ((reinterpret_cast<uintptr_t>(P) - 1) & InvalidPtrMask) !=
+           InvalidPtrMask;
+  }
 
-inline bool operator==(const SymbolStringPtr &LHS, const SymbolStringPtr &RHS) {
-  return LHS.S == RHS.S;
-}
+  static SymbolStringPtr getEmptyVal() {
+    return SymbolStringPtr(reinterpret_cast<PoolEntryPtr>(EmptyBitPattern));
+  }
 
-inline bool operator!=(const SymbolStringPtr &LHS, const SymbolStringPtr &RHS) {
-  return !(LHS == RHS);
-}
+  static SymbolStringPtr getTombstoneVal() {
+    return SymbolStringPtr(reinterpret_cast<PoolEntryPtr>(TombstoneBitPattern));
+  }
 
-inline bool operator<(const SymbolStringPtr &LHS, const SymbolStringPtr &RHS) {
-  return LHS.S < RHS.S;
-}
+  constexpr static uintptr_t EmptyBitPattern =
+      std::numeric_limits<uintptr_t>::max()
+      << PointerLikeTypeTraits<PoolEntryPtr>::NumLowBitsAvailable;
+
+  constexpr static uintptr_t TombstoneBitPattern =
+      (std::numeric_limits<uintptr_t>::max() - 1)
+      << PointerLikeTypeTraits<PoolEntryPtr>::NumLowBitsAvailable;
+
+  constexpr static uintptr_t InvalidPtrMask =
+      (std::numeric_limits<uintptr_t>::max() - 3)
+      << PointerLikeTypeTraits<PoolEntryPtr>::NumLowBitsAvailable;
+
+  PoolEntryPtr S = nullptr;
+};
 
 inline SymbolStringPool::~SymbolStringPool() {
 #ifndef NDEBUG
@@ -151,16 +175,15 @@ template <>
 struct DenseMapInfo<orc::SymbolStringPtr> {
 
   static orc::SymbolStringPtr getEmptyKey() {
-    return orc::SymbolStringPtr();
+    return orc::SymbolStringPtr::getEmptyVal();
   }
 
   static orc::SymbolStringPtr getTombstoneKey() {
-    return orc::SymbolStringPtr(&orc::SymbolStringPtr::Tombstone);
+    return orc::SymbolStringPtr::getTombstoneVal();
   }
 
-  static unsigned getHashValue(orc::SymbolStringPtr V) {
-    uintptr_t IV = reinterpret_cast<uintptr_t>(V.S);
-    return unsigned(IV) ^ unsigned(IV >> 9);
+  static unsigned getHashValue(const orc::SymbolStringPtr &V) {
+    return DenseMapInfo<orc::SymbolStringPtr::PoolEntryPtr>::getHashValue(V.S);
   }
 
   static bool isEqual(const orc::SymbolStringPtr &LHS,
diff --git a/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h b/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h
index bf946de532d3..5787500387c4 100644
--- a/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h
+++ b/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h
@@ -1,9 +1,8 @@
 //===----------- ThreadSafeModule.h -- Layer interfaces ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ExecutionEngine/OrcMCJITReplacement.h b/include/llvm/ExecutionEngine/OrcMCJITReplacement.h
index 4cd5648b2fc2..6cca1933f39f 100644
--- a/include/llvm/ExecutionEngine/OrcMCJITReplacement.h
+++ b/include/llvm/ExecutionEngine/OrcMCJITReplacement.h
@@ -1,9 +1,8 @@
 //===---- OrcMCJITReplacement.h - Orc-based MCJIT replacement ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ExecutionEngine/OrcV1Deprecation.h b/include/llvm/ExecutionEngine/OrcV1Deprecation.h
new file mode 100644
index 000000000000..7ed254b3ee04
--- /dev/null
+++ b/include/llvm/ExecutionEngine/OrcV1Deprecation.h
@@ -0,0 +1,22 @@
+//===------ OrcV1Deprecation.h - Memory manager for MC-JIT ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Tag for suppressing ORCv1 deprecation warnings.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H
+#define LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H
+
+namespace llvm {
+
+enum ORCv1DeprecationAcknowledgement { AcknowledgeORCv1Deprecation };
+
+} // namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H
diff --git a/include/llvm/ExecutionEngine/RTDyldMemoryManager.h b/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
index 23d651f6d1b6..c7c87ecdfa09 100644
--- a/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
+++ b/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
@@ -1,9 +1,8 @@
 //===-- RTDyldMemoryManager.cpp - Memory manager for MC-JIT -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h
index e419ee05e566..b2b4eba47074 100644
--- a/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -1,9 +1,8 @@
 //===- RuntimeDyld.h - Run-time dynamic linker for MC-JIT -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -53,18 +52,19 @@ private:
   std::string ErrMsg;
 };
 
-class RuntimeDyldCheckerImpl;
 class RuntimeDyldImpl;
 
 class RuntimeDyld {
-  friend class RuntimeDyldCheckerImpl;
-
 protected:
   // Change the address associated with a section when resolving relocations.
   // Any relocations already associated with the symbol will be re-resolved.
   void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
 
 public:
+  using NotifyStubEmittedFunction = std::function<void(
+      StringRef FileName, StringRef SectionName, StringRef SymbolName,
+      unsigned SectionID, uint32_t StubOffset)>;
+
   /// Information about the loaded object.
   class LoadedObjectInfo : public llvm::LoadedObjectInfo {
     friend class RuntimeDyldImpl;
@@ -185,6 +185,9 @@ public:
   /// and resolve relocatons based on where they put it).
   void *getSymbolLocalAddress(StringRef Name) const;
 
+  /// Get the section ID for the section containing the given symbol.
+  unsigned getSymbolSectionID(StringRef Name) const;
+
   /// Get the target address and flags for the named symbol.
   /// This address is the one used for relocation.
   JITEvaluatedSymbol getSymbol(StringRef Name) const;
@@ -205,6 +208,19 @@ public:
   /// This is the address which will be used for relocation resolution.
   void mapSectionAddress(const void *LocalAddress, uint64_t TargetAddress);
 
+  /// Returns the section's working memory.
+  StringRef getSectionContent(unsigned SectionID) const;
+
+  /// If the section was loaded, return the section's load address,
+  /// otherwise return None.
+  uint64_t getSectionLoadAddress(unsigned SectionID) const;
+
+  /// Set the NotifyStubEmitted callback. This is used for debugging
+  /// purposes. A callback is made for each stub that is generated.
+  void setNotifyStubEmitted(NotifyStubEmittedFunction NotifyStubEmitted) {
+    this->NotifyStubEmitted = std::move(NotifyStubEmitted);
+  }
+
   /// Register any EH frame sections that have been loaded but not previously
   /// registered with the memory manager.  Note, RuntimeDyld is responsible
   /// for identifying the EH frame and calling the memory manager with the
@@ -266,7 +282,7 @@ private:
   MemoryManager &MemMgr;
   JITSymbolResolver &Resolver;
   bool ProcessAllSections;
-  RuntimeDyldCheckerImpl *Checker;
+  NotifyStubEmittedFunction NotifyStubEmitted;
 };
 
 // Asynchronous JIT link for ORC.
diff --git a/include/llvm/ExecutionEngine/RuntimeDyldChecker.h b/include/llvm/ExecutionEngine/RuntimeDyldChecker.h
index 13fc5fd5a3e7..93ea09107bd1 100644
--- a/include/llvm/ExecutionEngine/RuntimeDyldChecker.h
+++ b/include/llvm/ExecutionEngine/RuntimeDyldChecker.h
@@ -1,16 +1,18 @@
 //===---- RuntimeDyldChecker.h - RuntimeDyld tester framework -----*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_EXECUTIONENGINE_RUNTIMEDYLDCHECKER_H
 #define LLVM_EXECUTIONENGINE_RUNTIMEDYLDCHECKER_H
 
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/Support/Endian.h"
 
 #include <cstdint>
 #include <memory>
@@ -58,7 +60,8 @@ class raw_ostream;
 ///
 /// ident_expr = 'decode_operand' '(' symbol ',' operand-index ')'
 ///            | 'next_pc'        '(' symbol ')'
-///            | 'stub_addr' '(' file-name ',' section-name ',' symbol ')'
+///            | 'stub_addr' '(' stub-container-name ',' symbol ')'
+///            | 'got_addr' '(' stub-container-name ',' symbol ')'
 ///            | symbol
 ///
 /// binary_expr = expr '+' expr
@@ -70,15 +73,84 @@ class raw_ostream;
 ///
 class RuntimeDyldChecker {
 public:
-  RuntimeDyldChecker(RuntimeDyld &RTDyld, MCDisassembler *Disassembler,
-                     MCInstPrinter *InstPrinter, raw_ostream &ErrStream);
-  ~RuntimeDyldChecker();
+  class MemoryRegionInfo {
+  public:
+    MemoryRegionInfo() = default;
+
+    /// Constructor for symbols/sections with content.
+    MemoryRegionInfo(StringRef Content, JITTargetAddress TargetAddress)
+        : ContentPtr(Content.data()), Size(Content.size()),
+          TargetAddress(TargetAddress) {}
+
+    /// Constructor for zero-fill symbols/sections.
+    MemoryRegionInfo(uint64_t Size, JITTargetAddress TargetAddress)
+        : Size(Size), TargetAddress(TargetAddress) {}
+
+    /// Returns true if this is a zero-fill symbol/section.
+    bool isZeroFill() const {
+      assert(Size && "setContent/setZeroFill must be called first");
+      return !ContentPtr;
+    }
+
+    /// Set the content for this memory region.
+    void setContent(StringRef Content) {
+      assert(!ContentPtr && !Size && "Content/zero-fill already set");
+      ContentPtr = Content.data();
+      Size = Content.size();
+    }
+
+    /// Set a zero-fill length for this memory region.
+    void setZeroFill(uint64_t Size) {
+      assert(!ContentPtr && !this->Size && "Content/zero-fill already set");
+      this->Size = Size;
+    }
 
-  // Get the associated RTDyld instance.
-  RuntimeDyld& getRTDyld();
+    /// Returns the content for this section if there is any.
+    StringRef getContent() const {
+      assert(!isZeroFill() && "Can't get content for a zero-fill section");
+      return StringRef(ContentPtr, static_cast<size_t>(Size));
+    }
 
-  // Get the associated RTDyld instance.
-  const RuntimeDyld& getRTDyld() const;
+    /// Returns the zero-fill length for this section.
+    uint64_t getZeroFillLength() const {
+      assert(isZeroFill() && "Can't get zero-fill length for content section");
+      return Size;
+    }
+
+    /// Set the target address for this region.
+    void setTargetAddress(JITTargetAddress TargetAddress) {
+      assert(!this->TargetAddress && "TargetAddress already set");
+      this->TargetAddress = TargetAddress;
+    }
+
+    /// Return the target address for this region.
+    JITTargetAddress getTargetAddress() const { return TargetAddress; }
+
+  private:
+    const char *ContentPtr = 0;
+    uint64_t Size = 0;
+    JITTargetAddress TargetAddress = 0;
+  };
+
+  using IsSymbolValidFunction = std::function<bool(StringRef Symbol)>;
+  using GetSymbolInfoFunction =
+      std::function<Expected<MemoryRegionInfo>(StringRef SymbolName)>;
+  using GetSectionInfoFunction = std::function<Expected<MemoryRegionInfo>(
+      StringRef FileName, StringRef SectionName)>;
+  using GetStubInfoFunction = std::function<Expected<MemoryRegionInfo>(
+      StringRef StubContainer, StringRef TargetName)>;
+  using GetGOTInfoFunction = std::function<Expected<MemoryRegionInfo>(
+      StringRef GOTContainer, StringRef TargetName)>;
+
+  RuntimeDyldChecker(IsSymbolValidFunction IsSymbolValid,
+                     GetSymbolInfoFunction GetSymbolInfo,
+                     GetSectionInfoFunction GetSectionInfo,
+                     GetStubInfoFunction GetStubInfo,
+                     GetGOTInfoFunction GetGOTInfo,
+                     support::endianness Endianness,
+                     MCDisassembler *Disassembler, MCInstPrinter *InstPrinter,
+                     raw_ostream &ErrStream);
+  ~RuntimeDyldChecker();
 
   /// Check a single expression against the attached RuntimeDyld
   ///        instance.
@@ -100,7 +172,7 @@ public:
                                                   bool LocalAddress);
 
   /// If there is a section at the given local address, return its load
-  ///        address, otherwise return none.
+  /// address, otherwise return none.
   Optional<uint64_t> getSectionLoadAddress(void *LocalAddress) const;
 
 private:
diff --git a/include/llvm/ExecutionEngine/SectionMemoryManager.h b/include/llvm/ExecutionEngine/SectionMemoryManager.h
index 3cf131c27778..d7316425da2f 100644
--- a/include/llvm/ExecutionEngine/SectionMemoryManager.h
+++ b/include/llvm/ExecutionEngine/SectionMemoryManager.h
@@ -1,9 +1,8 @@
 //===- SectionMemoryManager.h - Memory manager for MCJIT/RtDyld -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/FuzzMutate/FuzzerCLI.h b/include/llvm/FuzzMutate/FuzzerCLI.h
index 3333e96db166..2a16e43a6ab3 100644
--- a/include/llvm/FuzzMutate/FuzzerCLI.h
+++ b/include/llvm/FuzzMutate/FuzzerCLI.h
@@ -1,9 +1,8 @@
 //===-- FuzzerCLI.h - Common logic for CLIs of fuzzers ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/FuzzMutate/IRMutator.h b/include/llvm/FuzzMutate/IRMutator.h
index 9aa9d6d6a4bc..40a1ce8aeec9 100644
--- a/include/llvm/FuzzMutate/IRMutator.h
+++ b/include/llvm/FuzzMutate/IRMutator.h
@@ -1,9 +1,8 @@
 //===-- IRMutator.h - Mutation engine for fuzzing IR ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/FuzzMutate/OpDescriptor.h b/include/llvm/FuzzMutate/OpDescriptor.h
index dd30fda99bea..d6c98cd949a2 100644
--- a/include/llvm/FuzzMutate/OpDescriptor.h
+++ b/include/llvm/FuzzMutate/OpDescriptor.h
@@ -1,9 +1,8 @@
 //===-- OpDescriptor.h ------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/FuzzMutate/Operations.h b/include/llvm/FuzzMutate/Operations.h
index 668bd952ebb2..2eb4c38c2aeb 100644
--- a/include/llvm/FuzzMutate/Operations.h
+++ b/include/llvm/FuzzMutate/Operations.h
@@ -1,9 +1,8 @@
 //===-- Operations.h - ----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/FuzzMutate/Random.h b/include/llvm/FuzzMutate/Random.h
index 3a5f46a07554..615b15f04ceb 100644
--- a/include/llvm/FuzzMutate/Random.h
+++ b/include/llvm/FuzzMutate/Random.h
@@ -1,9 +1,8 @@
 //===--- Random.h - Utilities for random sampling -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/FuzzMutate/RandomIRBuilder.h b/include/llvm/FuzzMutate/RandomIRBuilder.h
index 5cf3f0b22709..f3b609702e9d 100644
--- a/include/llvm/FuzzMutate/RandomIRBuilder.h
+++ b/include/llvm/FuzzMutate/RandomIRBuilder.h
@@ -1,9 +1,8 @@
-//===-- Mutator.h - Utils for randomly mutation IR --------------*- C++ -*-===//
+//===- RandomIRBuilder.h - Utils for randomly mutation IR -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/Argument.h b/include/llvm/IR/Argument.h
index 497dca44547c..5f514b9c47d2 100644
--- a/include/llvm/IR/Argument.h
+++ b/include/llvm/IR/Argument.h
@@ -1,9 +1,8 @@
 //===-- llvm/Argument.h - Definition of the Argument class ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -79,6 +78,9 @@ public:
   /// If this is a byval or inalloca argument, return its alignment.
   unsigned getParamAlignment() const;
 
+  /// If this is a byval argument, return its type.
+  Type *getParamByValType() const;
+
   /// Return true if this argument has the nest attribute.
   bool hasNestAttr() const;
 
@@ -91,6 +93,9 @@ public:
   /// Return true if this argument has the sret attribute.
   bool hasStructRetAttr() const;
 
+  /// Return true if this argument has the inreg attribute.
+  bool hasInRegAttr() const;
+
   /// Return true if this argument has the returned attribute.
   bool hasReturnedAttr() const;
 
@@ -119,6 +124,8 @@ public:
   /// Check if an argument has a given attribute.
   bool hasAttribute(Attribute::AttrKind Kind) const;
 
+  Attribute getAttribute(Attribute::AttrKind Kind) const;
+
   /// Method for support type inquiry through isa, cast, and dyn_cast.
   static bool classof(const Value *V) {
     return V->getValueID() == ArgumentVal;
diff --git a/include/llvm/IR/AssemblyAnnotationWriter.h b/include/llvm/IR/AssemblyAnnotationWriter.h
index 6e1f5c43e12e..3fd3c57a6796 100644
--- a/include/llvm/IR/AssemblyAnnotationWriter.h
+++ b/include/llvm/IR/AssemblyAnnotationWriter.h
@@ -1,9 +1,8 @@
 //===-- AssemblyAnnotationWriter.h - Annotation .ll files -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h
index 9fc4614af010..06cc09e1cfc7 100644
--- a/include/llvm/IR/Attributes.h
+++ b/include/llvm/IR/Attributes.h
@@ -1,9 +1,8 @@
 //===- llvm/Attributes.h - Container for Attributes -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -91,6 +90,7 @@ public:
   static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val = 0);
   static Attribute get(LLVMContext &Context, StringRef Kind,
                        StringRef Val = StringRef());
+  static Attribute get(LLVMContext &Context, AttrKind Kind, Type *Ty);
 
   /// Return a uniquified Attribute object that has the specific
   /// alignment set.
@@ -103,6 +103,7 @@ public:
   static Attribute getWithAllocSizeArgs(LLVMContext &Context,
                                         unsigned ElemSizeArg,
                                         const Optional<unsigned> &NumElemsArg);
+  static Attribute getWithByValType(LLVMContext &Context, Type *Ty);
 
   //===--------------------------------------------------------------------===//
   // Attribute Accessors
@@ -118,6 +119,9 @@ public:
   /// attribute.
   bool isStringAttribute() const;
 
+  /// Return true if the attribute is a type attribute.
+  bool isTypeAttribute() const;
+
   /// Return true if the attribute is present.
   bool hasAttribute(AttrKind Val) const;
 
@@ -140,6 +144,10 @@ public:
   /// attribute to be a string attribute.
   StringRef getValueAsString() const;
 
+  /// Return the attribute's value as a Type. This requires the attribute to be
+  /// a type attribute.
+  Type *getValueAsType() const;
+
   /// Returns the alignment field of an attribute as a byte alignment
   /// value.
   unsigned getAlignment() const;
@@ -280,6 +288,7 @@ public:
   unsigned getStackAlignment() const;
   uint64_t getDereferenceableBytes() const;
   uint64_t getDereferenceableOrNullBytes() const;
+  Type *getByValType() const;
   std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
   std::string getAsString(bool InAttrGrp = false) const;
 
@@ -599,6 +608,9 @@ public:
   /// Return the alignment for the specified function parameter.
   unsigned getParamAlignment(unsigned ArgNo) const;
 
+  /// Return the byval type for the specified function parameter.
+  Type *getParamByValType(unsigned ArgNo) const;
+
   /// Get the stack alignment.
   unsigned getStackAlignment(unsigned Index) const;
 
@@ -698,6 +710,7 @@ class AttrBuilder {
   uint64_t DerefBytes = 0;
   uint64_t DerefOrNullBytes = 0;
   uint64_t AllocSizeArgs = 0;
+  Type *ByValType = nullptr;
 
 public:
   AttrBuilder() = default;
@@ -773,6 +786,9 @@ public:
   /// dereferenceable_or_null attribute exists (zero is returned otherwise).
   uint64_t getDereferenceableOrNullBytes() const { return DerefOrNullBytes; }
 
+  /// Retrieve the byval type.
+  Type *getByValType() const { return ByValType; }
+
   /// Retrieve the allocsize args, if the allocsize attribute exists.  If it
   /// doesn't exist, pair(0, 0) is returned.
   std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
@@ -797,6 +813,9 @@ public:
   AttrBuilder &addAllocSizeAttr(unsigned ElemSizeArg,
                                 const Optional<unsigned> &NumElemsArg);
 
+  /// This turns a byval type into the form used internally in Attribute.
+  AttrBuilder &addByValAttr(Type *Ty);
+
   /// Add an allocsize attribute, using the representation returned by
   /// Attribute.getIntValue().
   AttrBuilder &addAllocSizeAttrFromRawRepr(uint64_t RawAllocSizeRepr);
diff --git a/include/llvm/IR/Attributes.td b/include/llvm/IR/Attributes.td
index e786d85d05a8..153046d2311c 100644
--- a/include/llvm/IR/Attributes.td
+++ b/include/llvm/IR/Attributes.td
@@ -85,6 +85,9 @@ def NoCapture : EnumAttr<"nocapture">;
 /// Call cannot be duplicated.
 def NoDuplicate : EnumAttr<"noduplicate">;
 
+/// Function does not deallocate memory.
+def NoFree : EnumAttr<"nofree">;
+
 /// Disable implicit floating point insts.
 def NoImplicitFloat : EnumAttr<"noimplicitfloat">;
 
@@ -106,6 +109,9 @@ def NoRedZone : EnumAttr<"noredzone">;
 /// Mark the function as not returning.
 def NoReturn : EnumAttr<"noreturn">;
 
+/// Function does not synchronize.
+def NoSync : EnumAttr<"nosync">;
+
 /// Disable Indirect Branch Tracking.
 def NoCfCheck : EnumAttr<"nocf_check">;
 
@@ -130,6 +136,9 @@ def ReadOnly : EnumAttr<"readonly">;
 /// Return value is always equal to this argument.
 def Returned : EnumAttr<"returned">;
 
+/// Parameter is required to be a trivial constant.
+def ImmArg : EnumAttr<"immarg">;
+
 /// Function can return twice.
 def ReturnsTwice : EnumAttr<"returns_twice">;
 
@@ -176,6 +185,9 @@ def SanitizeMemory : EnumAttr<"sanitize_memory">;
 /// HWAddressSanitizer is on.
 def SanitizeHWAddress : EnumAttr<"sanitize_hwaddress">;
 
+/// MemTagSanitizer is on.
+def SanitizeMemTag : EnumAttr<"sanitize_memtag">;
+
 /// Speculative Load Hardening is enabled.
 ///
 /// Note that this uses the default compatibility (always compatible during
@@ -193,6 +205,9 @@ def SwiftSelf : EnumAttr<"swiftself">;
 /// Function must be in a unwind table.
 def UWTable : EnumAttr<"uwtable">;
 
+/// Function always comes back to callsite.
+def WillReturn : EnumAttr<"willreturn">;
+
 /// Function only writes to memory.
 def WriteOnly : EnumAttr<"writeonly">;
 
@@ -221,6 +236,7 @@ def : CompatRule<"isEqual<SanitizeAddressAttr>">;
 def : CompatRule<"isEqual<SanitizeThreadAttr>">;
 def : CompatRule<"isEqual<SanitizeMemoryAttr>">;
 def : CompatRule<"isEqual<SanitizeHWAddressAttr>">;
+def : CompatRule<"isEqual<SanitizeMemTagAttr>">;
 def : CompatRule<"isEqual<SafeStackAttr>">;
 def : CompatRule<"isEqual<ShadowCallStackAttr>">;
 
diff --git a/include/llvm/IR/AutoUpgrade.h b/include/llvm/IR/AutoUpgrade.h
index 8cf574c6a138..017ad93d8a2a 100644
--- a/include/llvm/IR/AutoUpgrade.h
+++ b/include/llvm/IR/AutoUpgrade.h
@@ -1,9 +1,8 @@
 //===- AutoUpgrade.h - AutoUpgrade Helpers ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -47,9 +46,9 @@ namespace llvm {
   /// so that it can update all calls to the old function.
   void UpgradeCallsToIntrinsic(Function* F);
 
-  /// This checks for global variables which should be upgraded. It returns true
-  /// if it requires upgrading.
-  bool UpgradeGlobalVariable(GlobalVariable *GV);
+  /// This checks for global variables which should be upgraded. It it requires
+  /// upgrading, returns a pointer to the upgraded variable.
+  GlobalVariable *UpgradeGlobalVariable(GlobalVariable *GV);
 
   /// This checks for module flags which should be upgraded. It returns true if
   /// module is modified.
diff --git a/include/llvm/IR/BasicBlock.h b/include/llvm/IR/BasicBlock.h
index 99eac33f742e..69555af50e1f 100644
--- a/include/llvm/IR/BasicBlock.h
+++ b/include/llvm/IR/BasicBlock.h
@@ -1,9 +1,8 @@
 //===- llvm/BasicBlock.h - Represent a basic block in the VM ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -363,7 +362,7 @@ public:
   /// This is actually not used to update the Predecessor list, but is actually
   /// used to update the PHI nodes that reside in the block.  Note that this
   /// should be called while the predecessor still refers to this block.
-  void removePredecessor(BasicBlock *Pred, bool DontDeleteUselessPHIs = false);
+  void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs = false);
 
   bool canSplitPredecessors() const;
 
@@ -391,6 +390,14 @@ public:
   /// direct branches, switches, etc. to it.
   bool hasAddressTaken() const { return getSubclassDataFromValue() != 0; }
 
+  /// Update all phi nodes in this basic block to refer to basic block \p New
+  /// instead of basic block \p Old.
+  void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New);
+
+  /// Update all phi nodes in this basic block's successors to refer to basic
+  /// block \p New instead of basic block \p Old.
+  void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New);
+
   /// Update all phi nodes in this basic block's successors to refer to basic
   /// block \p New instead of to it.
   void replaceSuccessorsPhiUsesWith(BasicBlock *New);
diff --git a/include/llvm/IR/CFG.h b/include/llvm/IR/CFG.h
index 8385c4647e12..55aff7137e86 100644
--- a/include/llvm/IR/CFG.h
+++ b/include/llvm/IR/CFG.h
@@ -1,9 +1,8 @@
 //===- CFG.h ----------------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -238,10 +237,6 @@ public:
   }
 };
 
-template <typename T, typename U> struct isPodLike<SuccIterator<T, U>> {
-  static const bool value = isPodLike<T>::value;
-};
-
 using succ_iterator = SuccIterator<Instruction, BasicBlock>;
 using succ_const_iterator = SuccIterator<const Instruction, const BasicBlock>;
 using succ_range = iterator_range<succ_iterator>;
diff --git a/include/llvm/IR/CFGDiff.h b/include/llvm/IR/CFGDiff.h
index da4373f7bce2..57b62dd66a47 100644
--- a/include/llvm/IR/CFGDiff.h
+++ b/include/llvm/IR/CFGDiff.h
@@ -1,9 +1,8 @@
 //===- CFGDiff.h - Define a CFG snapshot. -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h
index a3e78049f4be..b47a96c5d5fa 100644
--- a/include/llvm/IR/CallSite.h
+++ b/include/llvm/IR/CallSite.h
@@ -1,15 +1,14 @@
 //===- CallSite.h - Abstract Call & Invoke instrs ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file defines the CallSite class, which is a handy wrapper for code that
-// wants to treat Call and Invoke instructions in a generic way. When in non-
-// mutation context (e.g. an analysis) ImmutableCallSite should be used.
+// wants to treat Call, Invoke and CallBr instructions in a generic way. When
+// in non-mutation context (e.g. an analysis) ImmutableCallSite should be used.
 // Finally, when some degree of customization is necessary between these two
 // extremes, CallSiteBase<> can be supplied with fine-tuned parameters.
 //
@@ -18,7 +17,7 @@
 // They are efficiently copyable, assignable and constructable, with cost
 // equivalent to copying a pointer (notice that they have only a single data
 // member). The internal representation carries a flag which indicates which of
-// the two variants is enclosed. This allows for cheaper checks when various
+// the three variants is enclosed. This allows for cheaper checks when various
 // accessors of CallSite are employed.
 //
 //===----------------------------------------------------------------------===//
@@ -49,45 +48,50 @@ namespace Intrinsic {
 enum ID : unsigned;
 }
 
-template <typename FunTy = const Function,
-          typename BBTy = const BasicBlock,
-          typename ValTy = const Value,
-          typename UserTy = const User,
-          typename UseTy = const Use,
-          typename InstrTy = const Instruction,
+template <typename FunTy = const Function, typename BBTy = const BasicBlock,
+          typename ValTy = const Value, typename UserTy = const User,
+          typename UseTy = const Use, typename InstrTy = const Instruction,
           typename CallTy = const CallInst,
           typename InvokeTy = const InvokeInst,
+          typename CallBrTy = const CallBrInst,
           typename IterTy = User::const_op_iterator>
 class CallSiteBase {
 protected:
-  PointerIntPair<InstrTy*, 1, bool> I;
+  PointerIntPair<InstrTy *, 2, int> I;
 
   CallSiteBase() = default;
-  CallSiteBase(CallTy *CI) : I(CI, true) { assert(CI); }
-  CallSiteBase(InvokeTy *II) : I(II, false) { assert(II); }
+  CallSiteBase(CallTy *CI) : I(CI, 1) { assert(CI); }
+  CallSiteBase(InvokeTy *II) : I(II, 0) { assert(II); }
+  CallSiteBase(CallBrTy *CBI) : I(CBI, 2) { assert(CBI); }
   explicit CallSiteBase(ValTy *II) { *this = get(II); }
 
 private:
   /// This static method is like a constructor. It will create an appropriate
-  /// call site for a Call or Invoke instruction, but it can also create a null
-  /// initialized CallSiteBase object for something which is NOT a call site.
+  /// call site for a Call, Invoke or CallBr instruction, but it can also create
+  /// a null initialized CallSiteBase object for something which is NOT a call
+  /// site.
   static CallSiteBase get(ValTy *V) {
     if (InstrTy *II = dyn_cast<InstrTy>(V)) {
       if (II->getOpcode() == Instruction::Call)
         return CallSiteBase(static_cast<CallTy*>(II));
-      else if (II->getOpcode() == Instruction::Invoke)
+      if (II->getOpcode() == Instruction::Invoke)
         return CallSiteBase(static_cast<InvokeTy*>(II));
+      if (II->getOpcode() == Instruction::CallBr)
+        return CallSiteBase(static_cast<CallBrTy *>(II));
     }
     return CallSiteBase();
   }
 
 public:
-  /// Return true if a CallInst is enclosed. Note that !isCall() does not mean
-  /// an InvokeInst is enclosed. It may also signify a NULL instruction pointer.
-  bool isCall() const { return I.getInt(); }
+  /// Return true if a CallInst is enclosed.
+  bool isCall() const { return I.getInt() == 1; }
+
+  /// Return true if a InvokeInst is enclosed. !I.getInt() may also signify a
+  /// NULL instruction pointer, so check that.
+  bool isInvoke() const { return getInstruction() && I.getInt() == 0; }
 
-  /// Return true if a InvokeInst is enclosed.
-  bool isInvoke() const { return getInstruction() && !I.getInt(); }
+  /// Return true if a CallBrInst is enclosed.
+  bool isCallBr() const { return I.getInt() == 2; }
 
   InstrTy *getInstruction() const { return I.getPointer(); }
   InstrTy *operator->() const { return I.getPointer(); }
@@ -98,7 +102,7 @@ public:
 
   /// Return the pointer to function that is being called.
   ValTy *getCalledValue() const {
-    assert(getInstruction() && "Not a call or invoke instruction!");
+    assert(getInstruction() && "Not a call, invoke or callbr instruction!");
     return *getCallee();
   }
 
@@ -115,16 +119,19 @@ public:
       return false;
     if (isa<FunTy>(V) || isa<Constant>(V))
       return false;
-    if (const CallInst *CI = dyn_cast<CallInst>(getInstruction())) {
-      if (CI->isInlineAsm())
+    if (const CallBase *CB = dyn_cast<CallBase>(getInstruction()))
+      if (CB->isInlineAsm())
         return false;
-    }
     return true;
   }
 
-  /// Set the callee to the specified value.
+  /// Set the callee to the specified value.  Unlike the function of the same
+  /// name on CallBase, does not modify the type!
   void setCalledFunction(Value *V) {
-    assert(getInstruction() && "Not a call or invoke instruction!");
+    assert(getInstruction() && "Not a call, callbr, or invoke instruction!");
+    assert(cast<PointerType>(V->getType())->getElementType() ==
+               cast<CallBase>(getInstruction())->getFunctionType() &&
+           "New callee type does not match FunctionType on call");
     *getCallee() = V;
   }
 
@@ -189,7 +196,7 @@ public:
   }
 
   void setArgument(unsigned ArgNo, Value* newVal) {
-    assert(getInstruction() && "Not a call or invoke instruction!");
+    assert(getInstruction() && "Not a call, invoke or callbr instruction!");
     assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!");
     getInstruction()->setOperand(ArgNo, newVal);
   }
@@ -203,7 +210,7 @@ public:
   /// Given a use for an argument, get the argument number that corresponds to
   /// it.
   unsigned getArgumentNo(const Use *U) const {
-    assert(getInstruction() && "Not a call or invoke instruction!");
+    assert(getInstruction() && "Not a call, invoke or callbr instruction!");
     assert(isArgOperand(U) && "Argument # out of range!");
     return U - arg_begin();
   }
@@ -227,7 +234,7 @@ public:
   /// Given a use for a data operand, get the data operand number that
   /// corresponds to it.
   unsigned getDataOperandNo(const Use *U) const {
-    assert(getInstruction() && "Not a call or invoke instruction!");
+    assert(getInstruction() && "Not a call, invoke or callbr instruction!");
     assert(isDataOperand(U) && "Data operand # out of range!");
     return U - data_operands_begin();
   }
@@ -237,18 +244,19 @@ public:
   using data_operand_iterator = IterTy;
 
   /// data_operands_begin/data_operands_end - Return iterators iterating over
-  /// the call / invoke argument list and bundle operands.  For invokes, this is
-  /// the set of instruction operands except the invoke target and the two
-  /// successor blocks; and for calls this is the set of instruction operands
-  /// except the call target.
+  /// the call / invoke / callbr argument list and bundle operands. For invokes,
+  /// this is the set of instruction operands except the invoke target and the
+  /// two successor blocks; for calls this is the set of instruction operands
+  /// except the call target; for callbrs the number of labels to skip must be
+  /// determined first.
 
   IterTy data_operands_begin() const {
     assert(getInstruction() && "Not a call or invoke instruction!");
-    return (*this)->op_begin();
+    return cast<CallBase>(getInstruction())->data_operands_begin();
   }
   IterTy data_operands_end() const {
     assert(getInstruction() && "Not a call or invoke instruction!");
-    return (*this)->op_end() - (isCall() ? 1 : 3);
+    return cast<CallBase>(getInstruction())->data_operands_end();
   }
   iterator_range<IterTy> data_ops() const {
     return make_range(data_operands_begin(), data_operands_end());
@@ -277,17 +285,19 @@ public:
     return isCall() && cast<CallInst>(getInstruction())->isTailCall();
   }
 
-#define CALLSITE_DELEGATE_GETTER(METHOD) \
-  InstrTy *II = getInstruction();    \
-  return isCall()                        \
-    ? cast<CallInst>(II)->METHOD         \
-    : cast<InvokeInst>(II)->METHOD
+#define CALLSITE_DELEGATE_GETTER(METHOD)                                       \
+  InstrTy *II = getInstruction();                                              \
+  return isCall() ? cast<CallInst>(II)->METHOD                                 \
+                  : isCallBr() ? cast<CallBrInst>(II)->METHOD                  \
+                                : cast<InvokeInst>(II)->METHOD
 
-#define CALLSITE_DELEGATE_SETTER(METHOD) \
-  InstrTy *II = getInstruction();    \
-  if (isCall())                          \
-    cast<CallInst>(II)->METHOD;          \
-  else                                   \
+#define CALLSITE_DELEGATE_SETTER(METHOD)                                       \
+  InstrTy *II = getInstruction();                                              \
+  if (isCall())                                                                \
+    cast<CallInst>(II)->METHOD;                                                \
+  else if (isCallBr())                                                         \
+    cast<CallBrInst>(II)->METHOD;                                              \
+  else                                                                         \
     cast<InvokeInst>(II)->METHOD
 
   unsigned getNumArgOperands() const {
@@ -303,9 +313,7 @@ public:
   }
 
   bool isInlineAsm() const {
-    if (isCall())
-      return cast<CallInst>(getInstruction())->isInlineAsm();
-    return false;
+    return cast<CallBase>(getInstruction())->isInlineAsm();
   }
 
   /// Get the calling convention of the call.
@@ -389,10 +397,10 @@ public:
   /// Return true if the data operand at index \p i directly or indirectly has
   /// the attribute \p A.
   ///
-  /// Normal call or invoke arguments have per operand attributes, as specified
-  /// in the attribute set attached to this instruction, while operand bundle
-  /// operands may have some attributes implied by the type of its containing
-  /// operand bundle.
+  /// Normal call, invoke or callbr arguments have per operand attributes, as
+  /// specified in the attribute set attached to this instruction, while operand
+  /// bundle operands may have some attributes implied by the type of its
+  /// containing operand bundle.
   bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const {
     CALLSITE_DELEGATE_GETTER(dataOperandHasImpliedAttr(i, Kind));
   }
@@ -407,6 +415,11 @@ public:
     CALLSITE_DELEGATE_GETTER(getParamAlignment(ArgNo));
   }
 
+  /// Extract the byval type for a call or parameter (nullptr=unknown).
+  Type *getParamByValType(unsigned ArgNo) const {
+    CALLSITE_DELEGATE_GETTER(getParamByValType(ArgNo));
+  }
+
   /// Extract the number of dereferenceable bytes for a call or parameter
   /// (0=unknown).
   uint64_t getDereferenceableBytes(unsigned i) const {
@@ -580,13 +593,9 @@ public:
 #undef CALLSITE_DELEGATE_SETTER
 
   void getOperandBundlesAsDefs(SmallVectorImpl<OperandBundleDef> &Defs) const {
-    const Instruction *II = getInstruction();
     // Since this is actually a getter that "looks like" a setter, don't use the
     // above macros to avoid confusion.
-    if (isCall())
-      cast<CallInst>(II)->getOperandBundlesAsDefs(Defs);
-    else
-      cast<InvokeInst>(II)->getOperandBundlesAsDefs(Defs);
+    cast<CallBase>(getInstruction())->getOperandBundlesAsDefs(Defs);
   }
 
   /// Determine whether this data operand is not captured.
@@ -662,12 +671,13 @@ private:
 
 class CallSite : public CallSiteBase<Function, BasicBlock, Value, User, Use,
                                      Instruction, CallInst, InvokeInst,
-                                     User::op_iterator> {
+                                     CallBrInst, User::op_iterator> {
 public:
   CallSite() = default;
   CallSite(CallSiteBase B) : CallSiteBase(B) {}
   CallSite(CallInst *CI) : CallSiteBase(CI) {}
   CallSite(InvokeInst *II) : CallSiteBase(II) {}
+  CallSite(CallBrInst *CBI) : CallSiteBase(CBI) {}
   explicit CallSite(Instruction *II) : CallSiteBase(II) {}
   explicit CallSite(Value *V) : CallSiteBase(V) {}
 
@@ -683,6 +693,182 @@ private:
   User::op_iterator getCallee() const;
 };
 
+/// AbstractCallSite
+///
+/// An abstract call site is a wrapper that allows to treat direct,
+/// indirect, and callback calls the same. If an abstract call site
+/// represents a direct or indirect call site it behaves like a stripped
+/// down version of a normal call site object. The abstract call site can
+/// also represent a callback call, thus the fact that the initially
+/// called function (=broker) may invoke a third one (=callback callee).
+/// In this case, the abstract call site hides the middle man, hence the
+/// broker function. The result is a representation of the callback call,
+/// inside the broker, but in the context of the original call to the broker.
+///
+/// There are up to three functions involved when we talk about callback call
+/// sites. The caller (1), which invokes the broker function. The broker
+/// function (2), that will invoke the callee zero or more times. And finally
+/// the callee (3), which is the target of the callback call.
+///
+/// The abstract call site will handle the mapping from parameters to arguments
+/// depending on the semantic of the broker function. However, it is important
+/// to note that the mapping is often partial. Thus, some arguments of the
+/// call/invoke instruction are mapped to parameters of the callee while others
+/// are not.
+class AbstractCallSite {
+public:
+
+  /// The encoding of a callback with regards to the underlying instruction.
+  struct CallbackInfo {
+
+    /// For direct/indirect calls the parameter encoding is empty. If it is not,
+    /// the abstract call site represents a callback. In that case, the first
+    /// element of the encoding vector represents which argument of the call
+    /// site CS is the callback callee. The remaining elements map parameters
+    /// (identified by their position) to the arguments that will be passed
+    /// through (also identified by position but in the call site instruction).
+    ///
+    /// NOTE that we use LLVM argument numbers (starting at 0) and not
+    /// clang/source argument numbers (starting at 1). The -1 entries represent
+    /// unknown values that are passed to the callee.
+    using ParameterEncodingTy = SmallVector<int, 0>;
+    ParameterEncodingTy ParameterEncoding;
+
+  };
+
+private:
+
+  /// The underlying call site:
+  ///   caller -> callee,             if this is a direct or indirect call site
+  ///   caller -> broker function,    if this is a callback call site
+  CallSite CS;
+
+  /// The encoding of a callback with regards to the underlying instruction.
+  CallbackInfo CI;
+
+public:
+  /// Sole constructor for abstract call sites (ACS).
+  ///
+  /// An abstract call site can only be constructed through a llvm::Use because
+  /// each operand (=use) of an instruction could potentially be a different
+  /// abstract call site. Furthermore, even if the value of the llvm::Use is the
+  /// same, and the user is as well, the abstract call sites might not be.
+  ///
+  /// If a use is not associated with an abstract call site the constructed ACS
+  /// will evaluate to false if converted to a boolean.
+  ///
+  /// If the use is the callee use of a call or invoke instruction, the
+  /// constructed abstract call site will behave as a llvm::CallSite would.
+  ///
+  /// If the use is not a callee use of a call or invoke instruction, the
+  /// callback metadata is used to determine the argument <-> parameter mapping
+  /// as well as the callee of the abstract call site.
+  AbstractCallSite(const Use *U);
+
+  /// Conversion operator to conveniently check for a valid/initialized ACS.
+  explicit operator bool() const { return (bool)CS; }
+
+  /// Return the underlying instruction.
+  Instruction *getInstruction() const { return CS.getInstruction(); }
+
+  /// Return the call site abstraction for the underlying instruction.
+  CallSite getCallSite() const { return CS; }
+
+  /// Return true if this ACS represents a direct call.
+  bool isDirectCall() const {
+    return !isCallbackCall() && !CS.isIndirectCall();
+  }
+
+  /// Return true if this ACS represents an indirect call.
+  bool isIndirectCall() const {
+    return !isCallbackCall() && CS.isIndirectCall();
+  }
+
+  /// Return true if this ACS represents a callback call.
+  bool isCallbackCall() const {
+    // For a callback call site the callee is ALWAYS stored first in the
+    // transitive values vector. Thus, a non-empty vector indicates a callback.
+    return !CI.ParameterEncoding.empty();
+  }
+
+  /// Return true if @p UI is the use that defines the callee of this ACS.
+  bool isCallee(Value::const_user_iterator UI) const {
+    return isCallee(&UI.getUse());
+  }
+
+  /// Return true if @p U is the use that defines the callee of this ACS.
+  bool isCallee(const Use *U) const {
+    if (isDirectCall())
+      return CS.isCallee(U);
+
+    assert(!CI.ParameterEncoding.empty() &&
+           "Callback without parameter encoding!");
+
+    return (int)CS.getArgumentNo(U) == CI.ParameterEncoding[0];
+  }
+
+  /// Return the number of parameters of the callee.
+  unsigned getNumArgOperands() const {
+    if (isDirectCall())
+      return CS.getNumArgOperands();
+    // Subtract 1 for the callee encoding.
+    return CI.ParameterEncoding.size() - 1;
+  }
+
+  /// Return the operand index of the underlying instruction associated with @p
+  /// Arg.
+  int getCallArgOperandNo(Argument &Arg) const {
+    return getCallArgOperandNo(Arg.getArgNo());
+  }
+
+  /// Return the operand index of the underlying instruction associated with
+  /// the function parameter number @p ArgNo or -1 if there is none.
+  int getCallArgOperandNo(unsigned ArgNo) const {
+    if (isDirectCall())
+      return ArgNo;
+    // Add 1 for the callee encoding.
+    return CI.ParameterEncoding[ArgNo + 1];
+  }
+
+  /// Return the operand of the underlying instruction associated with @p Arg.
+  Value *getCallArgOperand(Argument &Arg) const {
+    return getCallArgOperand(Arg.getArgNo());
+  }
+
+  /// Return the operand of the underlying instruction associated with the
+  /// function parameter number @p ArgNo or nullptr if there is none.
+  Value *getCallArgOperand(unsigned ArgNo) const {
+    if (isDirectCall())
+      return CS.getArgOperand(ArgNo);
+    // Add 1 for the callee encoding.
+    return CI.ParameterEncoding[ArgNo + 1] >= 0
+               ? CS.getArgOperand(CI.ParameterEncoding[ArgNo + 1])
+               : nullptr;
+  }
+
+  /// Return the operand index of the underlying instruction associated with the
+  /// callee of this ACS. Only valid for callback calls!
+  int getCallArgOperandNoForCallee() const {
+    assert(isCallbackCall());
+    assert(CI.ParameterEncoding.size() && CI.ParameterEncoding[0] > 0);
+    return CI.ParameterEncoding[0];
+  }
+
+  /// Return the pointer to function that is being called.
+  Value *getCalledValue() const {
+    if (isDirectCall())
+      return CS.getCalledValue();
+    return CS.getArgOperand(getCallArgOperandNoForCallee());
+  }
+
+  /// Return the function being called if this is a direct call, otherwise
+  /// return null (if it's an indirect call).
+  Function *getCalledFunction() const {
+    Value *V = getCalledValue();
+    return V ? dyn_cast<Function>(V->stripPointerCasts()) : nullptr;
+  }
+};
+
 template <> struct DenseMapInfo<CallSite> {
   using BaseInfo = DenseMapInfo<decltype(CallSite::I)>;
 
@@ -713,6 +899,7 @@ public:
   ImmutableCallSite() = default;
   ImmutableCallSite(const CallInst *CI) : CallSiteBase(CI) {}
   ImmutableCallSite(const InvokeInst *II) : CallSiteBase(II) {}
+  ImmutableCallSite(const CallBrInst *CBI) : CallSiteBase(CBI) {}
   explicit ImmutableCallSite(const Instruction *II) : CallSiteBase(II) {}
   explicit ImmutableCallSite(const Value *V) : CallSiteBase(V) {}
   ImmutableCallSite(CallSite CS) : CallSiteBase(CS.getInstruction()) {}
diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h
index 49c3be960373..399c6ad521fa 100644
--- a/include/llvm/IR/CallingConv.h
+++ b/include/llvm/IR/CallingConv.h
@@ -1,9 +1,8 @@
 //===- llvm/CallingConv.h - LLVM Calling Conventions ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/Comdat.h b/include/llvm/IR/Comdat.h
index 555121e928f7..f712a16dd318 100644
--- a/include/llvm/IR/Comdat.h
+++ b/include/llvm/IR/Comdat.h
@@ -1,9 +1,8 @@
 //===- llvm/IR/Comdat.h - Comdat definitions --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/Constant.h b/include/llvm/IR/Constant.h
index 98437f8eff1f..931576651224 100644
--- a/include/llvm/IR/Constant.h
+++ b/include/llvm/IR/Constant.h
@@ -1,9 +1,8 @@
 //===-- llvm/Constant.h - Constant class definition -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -91,6 +90,10 @@ public:
   /// elements.
   bool containsUndefElement() const;
 
+  /// Return true if this is a vector constant that includes any constant
+  /// expressions.
+  bool containsConstantExpression() const;
+
   /// Return true if evaluation of this constant could trap. This is true for
   /// things like constant expressions that could divide by zero.
   bool canTrap() const;
diff --git a/include/llvm/IR/ConstantFolder.h b/include/llvm/IR/ConstantFolder.h
index da5bba7ba141..5a5cabfd0206 100644
--- a/include/llvm/IR/ConstantFolder.h
+++ b/include/llvm/IR/ConstantFolder.h
@@ -1,9 +1,8 @@
 //===- ConstantFolder.h - Constant folding helper ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -135,6 +134,10 @@ public:
     return ConstantExpr::getNot(C);
   }
 
+  Constant *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const {
+    return ConstantExpr::get(Opc, C);
+  }
+
   //===--------------------------------------------------------------------===//
   // Memory Instructions
   //===--------------------------------------------------------------------===//
diff --git a/include/llvm/IR/ConstantRange.h b/include/llvm/IR/ConstantRange.h
index 1adda3269abc..91f3f31abe17 100644
--- a/include/llvm/IR/ConstantRange.h
+++ b/include/llvm/IR/ConstantRange.h
@@ -1,9 +1,8 @@
 //===- ConstantRange.h - Represent a range ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -42,14 +41,25 @@ namespace llvm {
 
 class MDNode;
 class raw_ostream;
+struct KnownBits;
 
 /// This class represents a range of values.
 class LLVM_NODISCARD ConstantRange {
   APInt Lower, Upper;
 
+  /// Create empty constant range with same bitwidth.
+  ConstantRange getEmpty() const {
+    return ConstantRange(getBitWidth(), false);
+  }
+
+  /// Create full constant range with same bitwidth.
+  ConstantRange getFull() const {
+    return ConstantRange(getBitWidth(), true);
+  }
+
 public:
-  /// Initialize a full (the default) or empty set for the specified bit width.
-  explicit ConstantRange(uint32_t BitWidth, bool isFullSet = true);
+  /// Initialize a full or empty set for the specified bit width.
+  explicit ConstantRange(uint32_t BitWidth, bool isFullSet);
 
   /// Initialize a range to hold the single specified value.
   ConstantRange(APInt Value);
@@ -59,6 +69,29 @@ public:
   /// assert out if the two APInt's are not the same bit width.
   ConstantRange(APInt Lower, APInt Upper);
 
+  /// Create empty constant range with the given bit width.
+  static ConstantRange getEmpty(uint32_t BitWidth) {
+    return ConstantRange(BitWidth, false);
+  }
+
+  /// Create full constant range with the given bit width.
+  static ConstantRange getFull(uint32_t BitWidth) {
+    return ConstantRange(BitWidth, true);
+  }
+
+  /// Create non-empty constant range with the given bounds. If Lower and
+  /// Upper are the same, a full range is returned.
+  static ConstantRange getNonEmpty(APInt Lower, APInt Upper) {
+    if (Lower == Upper)
+      return getFull(Lower.getBitWidth());
+    return ConstantRange(std::move(Lower), std::move(Upper));
+  }
+
+  /// Initialize a range based on a known bits constraint. The IsSigned flag
+  /// indicates whether the constant range should not wrap in the signed or
+  /// unsigned domain.
+  static ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned);
+
   /// Produce the smallest range such that all values that may satisfy the given
   /// predicate with any value contained within Other is contained in the
   /// returned range.  Formally, this returns a superset of
@@ -91,14 +124,12 @@ public:
   static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred,
                                            const APInt &Other);
 
-  /// Return the largest range containing all X such that "X BinOpC Y" is
-  /// guaranteed not to wrap (overflow) for all Y in Other.
+  /// Produce the largest range containing all X such that "X BinOp Y" is
+  /// guaranteed not to wrap (overflow) for *all* Y in Other. However, there may
+  /// be *some* Y in Other for which additional X not contained in the result
+  /// also do not overflow.
   ///
-  /// NB! The returned set does *not* contain **all** possible values of X for
-  /// which "X BinOpC Y" does not wrap -- some viable values of X may be
-  /// missing, so you cannot use this to constrain X's range.  E.g. in the
-  /// fourth example, "(-2) + 1" is both nsw and nuw (so the "X" could be -2),
-  /// but (-2) is not in the set returned.
+  /// NoWrapKind must be one of OBO::NoUnsignedWrap or OBO::NoSignedWrap.
   ///
   /// Examples:
   ///  typedef OverflowingBinaryOperator OBO;
@@ -106,17 +137,19 @@ public:
   ///  MGNR(Add, [i8 1, 2), OBO::NoSignedWrap) == [-128, 127)
   ///  MGNR(Add, [i8 1, 2), OBO::NoUnsignedWrap) == [0, -1)
   ///  MGNR(Add, [i8 0, 1), OBO::NoUnsignedWrap) == Full Set
-  ///  MGNR(Add, [i8 1, 2), OBO::NoUnsignedWrap | OBO::NoSignedWrap)
-  ///    == [0,INT_MAX)
   ///  MGNR(Add, [i8 -1, 6), OBO::NoSignedWrap) == [INT_MIN+1, INT_MAX-4)
   ///  MGNR(Sub, [i8 1, 2), OBO::NoSignedWrap) == [-127, 128)
   ///  MGNR(Sub, [i8 1, 2), OBO::NoUnsignedWrap) == [1, 0)
-  ///  MGNR(Sub, [i8 1, 2), OBO::NoUnsignedWrap | OBO::NoSignedWrap)
-  ///    == [1,INT_MAX)
   static ConstantRange makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp,
                                                   const ConstantRange &Other,
                                                   unsigned NoWrapKind);
 
+  /// Produce the range that contains X if and only if "X BinOp Other" does
+  /// not wrap.
+  static ConstantRange makeExactNoWrapRegion(Instruction::BinaryOps BinOp,
+                                             const APInt &Other,
+                                             unsigned NoWrapKind);
+
   /// Set up \p Pred and \p RHS such that
   /// ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.  Return true if
   /// successful.
@@ -138,14 +171,32 @@ public:
   /// Return true if this set contains no members.
   bool isEmptySet() const;
 
-  /// Return true if this set wraps around the top of the range.
-  /// For example: [100, 8).
+  /// Return true if this set wraps around the unsigned domain. Special cases:
+  ///  * Empty set: Not wrapped.
+  ///  * Full set: Not wrapped.
+  ///  * [X, 0) == [X, Max]: Not wrapped.
   bool isWrappedSet() const;
 
-  /// Return true if this set wraps around the INT_MIN of
-  /// its bitwidth. For example: i8 [120, 140).
+  /// Return true if the exclusive upper bound wraps around the unsigned
+  /// domain. Special cases:
+  ///  * Empty set: Not wrapped.
+  ///  * Full set: Not wrapped.
+  ///  * [X, 0): Wrapped.
+  bool isUpperWrapped() const;
+
+  /// Return true if this set wraps around the signed domain. Special cases:
+  ///  * Empty set: Not wrapped.
+  ///  * Full set: Not wrapped.
+  ///  * [X, SignedMin) == [X, SignedMax]: Not wrapped.
   bool isSignWrappedSet() const;
 
+  /// Return true if the (exclusive) upper bound wraps around the signed
+  /// domain. Special cases:
+  ///  * Empty set: Not wrapped.
+  ///  * Full set: Not wrapped.
+  ///  * [X, SignedMin): Wrapped.
+  bool isUpperSignWrapped() const;
+
   /// Return true if the specified value is in the set.
   bool contains(const APInt &Val) const;
 
@@ -170,15 +221,18 @@ public:
   /// Return true if this set contains exactly one member.
   bool isSingleElement() const { return getSingleElement() != nullptr; }
 
-  /// Return the number of elements in this set.
-  APInt getSetSize() const;
-
   /// Compare set size of this range with the range CR.
   bool isSizeStrictlySmallerThan(const ConstantRange &CR) const;
 
-  // Compare set size of this range with Value.
+  /// Compare set size of this range with Value.
   bool isSizeLargerThan(uint64_t MaxSize) const;
 
+  /// Return true if all values in this range are negative.
+  bool isAllNegative() const;
+
+  /// Return true if all values in this range are non-negative.
+  bool isAllNonNegative() const;
+
   /// Return the largest unsigned value contained in the ConstantRange.
   APInt getUnsignedMax() const;
 
@@ -206,20 +260,30 @@ public:
   /// the sets).
   ConstantRange difference(const ConstantRange &CR) const;
 
-  /// Return the range that results from the intersection of
-  /// this range with another range.  The resultant range is guaranteed to
-  /// include all elements contained in both input ranges, and to have the
-  /// smallest possible set size that does so.  Because there may be two
-  /// intersections with the same set size, A.intersectWith(B) might not
-  /// be equal to B.intersectWith(A).
-  ConstantRange intersectWith(const ConstantRange &CR) const;
+  /// If represented precisely, the result of some range operations may consist
+  /// of multiple disjoint ranges. As only a single range may be returned, any
+  /// range covering these disjoint ranges constitutes a valid result, but some
+  /// may be more useful than others depending on context. The preferred range
+  /// type specifies whether a range that is non-wrapping in the unsigned or
+  /// signed domain, or has the smallest size, is preferred. If a signedness is
+  /// preferred but all ranges are non-wrapping or all wrapping, then the
+  /// smallest set size is preferred. If there are multiple smallest sets, any
+  /// one of them may be returned.
+  enum PreferredRangeType { Smallest, Unsigned, Signed };
+
+  /// Return the range that results from the intersection of this range with
+  /// another range. If the intersection is disjoint, such that two results
+  /// are possible, the preferred range is determined by the PreferredRangeType.
+  ConstantRange intersectWith(const ConstantRange &CR,
+                              PreferredRangeType Type = Smallest) const;
 
   /// Return the range that results from the union of this range
   /// with another range.  The resultant range is guaranteed to include the
   /// elements of both sets, but may contain more.  For example, [3, 9) union
   /// [12,15) is [3, 15), which includes 9, 10, and 11, which were not included
   /// in either set before.
-  ConstantRange unionWith(const ConstantRange &CR) const;
+  ConstantRange unionWith(const ConstantRange &CR,
+                          PreferredRangeType Type = Smallest) const;
 
   /// Return a new range representing the possible values resulting
   /// from an application of the specified cast operator to this range. \p
@@ -300,6 +364,23 @@ public:
   /// \p Other.
   ConstantRange udiv(const ConstantRange &Other) const;
 
+  /// Return a new range representing the possible values resulting
+  /// from a signed division of a value in this range and a value in
+  /// \p Other. Division by zero and division of SignedMin by -1 are considered
+  /// undefined behavior, in line with IR, and do not contribute towards the
+  /// result.
+  ConstantRange sdiv(const ConstantRange &Other) const;
+
+  /// Return a new range representing the possible values resulting
+  /// from an unsigned remainder operation of a value in this range and a
+  /// value in \p Other.
+  ConstantRange urem(const ConstantRange &Other) const;
+
+  /// Return a new range representing the possible values resulting
+  /// from a signed remainder operation of a value in this range and a
+  /// value in \p Other.
+  ConstantRange srem(const ConstantRange &Other) const;
+
   /// Return a new range representing the possible values resulting
   /// from a binary-and of a value in this range by a value in \p Other.
   ConstantRange binaryAnd(const ConstantRange &Other) const;
@@ -321,9 +402,53 @@ public:
   /// arithmetic right shift of a value in this range and a value in \p Other.
   ConstantRange ashr(const ConstantRange &Other) const;
 
+  /// Perform an unsigned saturating addition of two constant ranges.
+  ConstantRange uadd_sat(const ConstantRange &Other) const;
+
+  /// Perform a signed saturating addition of two constant ranges.
+  ConstantRange sadd_sat(const ConstantRange &Other) const;
+
+  /// Perform an unsigned saturating subtraction of two constant ranges.
+  ConstantRange usub_sat(const ConstantRange &Other) const;
+
+  /// Perform a signed saturating subtraction of two constant ranges.
+  ConstantRange ssub_sat(const ConstantRange &Other) const;
+
   /// Return a new range that is the logical not of the current set.
   ConstantRange inverse() const;
 
+  /// Calculate absolute value range. If the original range contains signed
+  /// min, then the resulting range will also contain signed min.
+  ConstantRange abs() const;
+
+  /// Represents whether an operation on the given constant range is known to
+  /// always or never overflow.
+  enum class OverflowResult {
+    /// Always overflows in the direction of signed/unsigned min value.
+    AlwaysOverflowsLow,
+    /// Always overflows in the direction of signed/unsigned max value.
+    AlwaysOverflowsHigh,
+    /// May or may not overflow.
+    MayOverflow,
+    /// Never overflows.
+    NeverOverflows,
+  };
+
+  /// Return whether unsigned add of the two ranges always/never overflows.
+  OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const;
+
+  /// Return whether signed add of the two ranges always/never overflows.
+  OverflowResult signedAddMayOverflow(const ConstantRange &Other) const;
+
+  /// Return whether unsigned sub of the two ranges always/never overflows.
+  OverflowResult unsignedSubMayOverflow(const ConstantRange &Other) const;
+
+  /// Return whether signed sub of the two ranges always/never overflows.
+  OverflowResult signedSubMayOverflow(const ConstantRange &Other) const;
+
+  /// Return whether unsigned mul of the two ranges always/never overflows.
+  OverflowResult unsignedMulMayOverflow(const ConstantRange &Other) const;
+
   /// Print out the bounds to a stream.
   void print(raw_ostream &OS) const;
 
diff --git a/include/llvm/IR/Constants.h b/include/llvm/IR/Constants.h
index afc93cd61d47..ca56e8b9328c 100644
--- a/include/llvm/IR/Constants.h
+++ b/include/llvm/IR/Constants.h
@@ -1,9 +1,8 @@
 //===-- llvm/Constants.h - Constant class subclass definitions --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/DIBuilder.h b/include/llvm/IR/DIBuilder.h
index 443332b1b23c..ad9a35b55414 100644
--- a/include/llvm/IR/DIBuilder.h
+++ b/include/llvm/IR/DIBuilder.h
@@ -1,9 +1,8 @@
 //===- DIBuilder.h - Debug Information Builder ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -707,6 +706,16 @@ namespace llvm {
                  DITemplateParameterArray TParams = nullptr,
                  DITypeArray ThrownTypes = nullptr);
 
+    /// Create common block entry for a Fortran common block.
+    /// \param Scope       Scope of this common block.
+    /// \param decl        Global variable declaration.
+    /// \param Name        The name of this common block.
+    /// \param File        The file this common block is defined.
+    /// \param LineNo      Line number.
+    DICommonBlock *createCommonBlock(DIScope *Scope, DIGlobalVariable *decl,
+                                     StringRef Name, DIFile *File,
+                                     unsigned LineNo);
+
     /// This creates new descriptor for a namespace with the specified
     /// parent scope.
     /// \param Scope       Namespace scope
diff --git a/include/llvm/IR/DataLayout.h b/include/llvm/IR/DataLayout.h
index c144d1c13c34..ac9770a15120 100644
--- a/include/llvm/IR/DataLayout.h
+++ b/include/llvm/IR/DataLayout.h
@@ -1,9 +1,8 @@
 //===- llvm/DataLayout.h - Data size & alignment info -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -109,6 +108,13 @@ struct PointerAlignElem {
 /// generating LLVM IR is required to generate the right target data for the
 /// target being codegen'd to.
 class DataLayout {
+public:
+  enum class FunctionPtrAlignType {
+    /// The function pointer alignment is independent of the function alignment.
+    Independent,
+    /// The function pointer alignment is a multiple of the function alignment.
+    MultipleOfFunctionAlign,
+  };
 private:
   /// Defaults to false.
   bool BigEndian;
@@ -117,6 +123,9 @@ private:
   unsigned StackNaturalAlign;
   unsigned ProgramAddrSpace;
 
+  unsigned FunctionPtrAlign;
+  FunctionPtrAlignType TheFunctionPtrAlignType;
+
   enum ManglingModeT {
     MM_None,
     MM_ELF,
@@ -200,6 +209,8 @@ public:
     BigEndian = DL.isBigEndian();
     AllocaAddrSpace = DL.AllocaAddrSpace;
     StackNaturalAlign = DL.StackNaturalAlign;
+    FunctionPtrAlign = DL.FunctionPtrAlign;
+    TheFunctionPtrAlignType = DL.TheFunctionPtrAlignType;
     ProgramAddrSpace = DL.ProgramAddrSpace;
     ManglingMode = DL.ManglingMode;
     LegalIntWidths = DL.LegalIntWidths;
@@ -257,6 +268,17 @@ public:
   unsigned getStackAlignment() const { return StackNaturalAlign; }
   unsigned getAllocaAddrSpace() const { return AllocaAddrSpace; }
 
+  /// Returns the alignment of function pointers, which may or may not be
+  /// related to the alignment of functions.
+  /// \see getFunctionPtrAlignType
+  unsigned getFunctionPtrAlign() const { return FunctionPtrAlign; }
+
+  /// Return the type of function pointer alignment.
+  /// \see getFunctionPtrAlign
+  FunctionPtrAlignType getFunctionPtrAlignType() const {
+    return TheFunctionPtrAlignType;
+  }
+
   unsigned getProgramAddressSpace() const { return ProgramAddrSpace; }
 
   bool hasMicrosoftFastStdCallMangling() const {
@@ -346,10 +368,13 @@ public:
     return NonIntegralAddressSpaces;
   }
 
-  bool isNonIntegralPointerType(PointerType *PT) const {
+  bool isNonIntegralAddressSpace(unsigned AddrSpace) const {
     ArrayRef<unsigned> NonIntegralSpaces = getNonIntegralAddressSpaces();
-    return find(NonIntegralSpaces, PT->getAddressSpace()) !=
-           NonIntegralSpaces.end();
+    return find(NonIntegralSpaces, AddrSpace) != NonIntegralSpaces.end();
+  }
+
+  bool isNonIntegralPointerType(PointerType *PT) const {
+    return isNonIntegralAddressSpace(PT->getAddressSpace());
   }
 
   bool isNonIntegralPointerType(Type *Ty) const {
@@ -428,6 +453,14 @@ public:
     return 8 * getTypeStoreSize(Ty);
   }
 
+  /// Returns true if no extra padding bits are needed when storing the
+  /// specified type.
+  ///
+  /// For example, returns false for i19 that has a 24-bit store size.
+  bool typeSizeEqualsStoreSize(Type *Ty) const {
+    return getTypeSizeInBits(Ty) == getTypeStoreSizeInBits(Ty);
+  }
+
   /// Returns the offset in bytes between successive objects of the
   /// specified type, including alignment padding.
   ///
diff --git a/include/llvm/IR/DebugInfo.h b/include/llvm/IR/DebugInfo.h
index 01178af3c9ff..171e1621889f 100644
--- a/include/llvm/IR/DebugInfo.h
+++ b/include/llvm/IR/DebugInfo.h
@@ -1,9 +1,8 @@
 //===- DebugInfo.h - Debug Information Helpers ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/DebugInfoFlags.def b/include/llvm/IR/DebugInfoFlags.def
index ce117aa452aa..07e3d6bdc9e5 100644
--- a/include/llvm/IR/DebugInfoFlags.def
+++ b/include/llvm/IR/DebugInfoFlags.def
@@ -1,9 +1,8 @@
 //===- llvm/IR/DebugInfoFlags.def - Debug info flag definitions -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -51,12 +50,12 @@ HANDLE_DI_FLAG((3 << 16), VirtualInheritance)
 HANDLE_DI_FLAG((1 << 18), IntroducedVirtual)
 HANDLE_DI_FLAG((1 << 19), BitField)
 HANDLE_DI_FLAG((1 << 20), NoReturn)
-HANDLE_DI_FLAG((1 << 21), MainSubprogram)
+HANDLE_DI_FLAG((1 << 21), ArgumentNotModified)
 HANDLE_DI_FLAG((1 << 22), TypePassByValue)
 HANDLE_DI_FLAG((1 << 23), TypePassByReference)
 HANDLE_DI_FLAG((1 << 24), EnumClass)
 HANDLE_DI_FLAG((1 << 25), Thunk)
-HANDLE_DI_FLAG((1 << 26), Trivial)
+HANDLE_DI_FLAG((1 << 26), NonTrivial)
 HANDLE_DI_FLAG((1 << 27), BigEndian)
 HANDLE_DI_FLAG((1 << 28), LittleEndian)
 HANDLE_DI_FLAG((1 << 29), AllCallsDescribed)
@@ -85,11 +84,15 @@ HANDLE_DISP_FLAG(2u, PureVirtual)
 HANDLE_DISP_FLAG((1u << 2), LocalToUnit)
 HANDLE_DISP_FLAG((1u << 3), Definition)
 HANDLE_DISP_FLAG((1u << 4), Optimized)
+HANDLE_DISP_FLAG((1u << 5), Pure)
+HANDLE_DISP_FLAG((1u << 6), Elemental)
+HANDLE_DISP_FLAG((1u << 7), Recursive)
+HANDLE_DISP_FLAG((1u << 8), MainSubprogram)
 
 #ifdef DISP_FLAG_LARGEST_NEEDED
 // Intended to be used with ADT/BitmaskEnum.h.
 // NOTE: Always must be equal to largest flag, check this when adding new flags.
-HANDLE_DISP_FLAG((1 << 4), Largest)
+HANDLE_DISP_FLAG((1 << 8), Largest)
 #undef DISP_FLAG_LARGEST_NEEDED
 #endif
 
diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h
index a461d1bd4fe8..9dc6dfbb0f68 100644
--- a/include/llvm/IR/DebugInfoMetadata.h
+++ b/include/llvm/IR/DebugInfoMetadata.h
@@ -1,9 +1,8 @@
 //===- llvm/IR/DebugInfoMetadata.h - Debug info metadata --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -61,44 +60,6 @@
 
 namespace llvm {
 
-/// Holds a subclass of DINode.
-///
-/// FIXME: This class doesn't currently make much sense.  Previously it was a
-/// union beteen MDString (for ODR-uniqued types) and things like DIType.  To
-/// support CodeView work, it wasn't deleted outright when MDString-based type
-/// references were deleted; we'll soon need a similar concept for CodeView
-/// DITypeIndex.
-template <class T> class TypedDINodeRef {
-  const Metadata *MD = nullptr;
-
-public:
-  TypedDINodeRef() = default;
-  TypedDINodeRef(std::nullptr_t) {}
-  TypedDINodeRef(const T *MD) : MD(MD) {}
-
-  explicit TypedDINodeRef(const Metadata *MD) : MD(MD) {
-    assert((!MD || isa<T>(MD)) && "Expected valid type ref");
-  }
-
-  template <class U>
-  TypedDINodeRef(
-      const TypedDINodeRef<U> &X,
-      typename std::enable_if<std::is_convertible<U *, T *>::value>::type * =
-          nullptr)
-      : MD(X) {}
-
-  operator Metadata *() const { return const_cast<Metadata *>(MD); }
-
-  T *resolve() const { return const_cast<T *>(cast_or_null<T>(MD)); }
-
-  bool operator==(const TypedDINodeRef<T> &X) const { return MD == X.MD; }
-  bool operator!=(const TypedDINodeRef<T> &X) const { return MD != X.MD; }
-};
-
-using DINodeRef = TypedDINodeRef<DINode>;
-using DIScopeRef = TypedDINodeRef<DIScope>;
-using DITypeRef = TypedDINodeRef<DIType>;
-
 class DITypeRefArray {
   const MDTuple *N = nullptr;
 
@@ -115,17 +76,19 @@ public:
 
   // FIXME: Fix callers and remove condition on N.
   unsigned size() const { return N ? N->getNumOperands() : 0u; }
-  DITypeRef operator[](unsigned I) const { return DITypeRef(N->getOperand(I)); }
+  DIType *operator[](unsigned I) const {
+    return cast_or_null<DIType>(N->getOperand(I));
+  }
 
-  class iterator : std::iterator<std::input_iterator_tag, DITypeRef,
-                                 std::ptrdiff_t, void, DITypeRef> {
+  class iterator : std::iterator<std::input_iterator_tag, DIType *,
+                                 std::ptrdiff_t, void, DIType *> {
     MDNode::op_iterator I = nullptr;
 
   public:
     iterator() = default;
     explicit iterator(MDNode::op_iterator I) : I(I) {}
 
-    DITypeRef operator*() const { return DITypeRef(*I); }
+    DIType *operator*() const { return cast_or_null<DIType>(*I); }
 
     iterator &operator++() {
       ++I;
@@ -228,6 +191,7 @@ public:
     case DILexicalBlockKind:
     case DILexicalBlockFileKind:
     case DINamespaceKind:
+    case DICommonBlockKind:
     case DITemplateTypeParameterKind:
     case DITemplateValueParameterKind:
     case DIGlobalVariableKind:
@@ -241,18 +205,6 @@ public:
   }
 };
 
-template <class T> struct simplify_type<const TypedDINodeRef<T>> {
-  using SimpleType = Metadata *;
-
-  static SimpleType getSimplifiedValue(const TypedDINodeRef<T> &MD) {
-    return MD;
-  }
-};
-
-template <class T>
-struct simplify_type<TypedDINodeRef<T>>
-    : simplify_type<const TypedDINodeRef<T>> {};
-
 /// Generic tagged DWARF-like metadata node.
 ///
 /// An un-specialized DWARF-like metadata node.  The first operand is a
@@ -459,7 +411,7 @@ public:
   inline Optional<StringRef> getSource() const;
 
   StringRef getName() const;
-  DIScopeRef getScope() const;
+  DIScope *getScope() const;
 
   /// Return the raw underlying file.
   ///
@@ -486,6 +438,7 @@ public:
     case DILexicalBlockKind:
     case DILexicalBlockFileKind:
     case DINamespaceKind:
+    case DICommonBlockKind:
     case DIModuleKind:
       return true;
     }
@@ -672,7 +625,7 @@ public:
   uint64_t getOffsetInBits() const { return OffsetInBits; }
   DIFlags getFlags() const { return Flags; }
 
-  DIScopeRef getScope() const { return DIScopeRef(getRawScope()); }
+  DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
   StringRef getName() const { return getStringOperand(2); }
 
 
@@ -817,14 +770,12 @@ class DIDerivedType : public DIType {
         DWARFAddressSpace(DWARFAddressSpace) {}
   ~DIDerivedType() = default;
 
-  static DIDerivedType *getImpl(LLVMContext &Context, unsigned Tag,
-                                StringRef Name, DIFile *File, unsigned Line,
-                                DIScopeRef Scope, DITypeRef BaseType,
-                                uint64_t SizeInBits, uint32_t AlignInBits,
-                                uint64_t OffsetInBits,
-                                Optional<unsigned> DWARFAddressSpace,
-                                DIFlags Flags, Metadata *ExtraData,
-                                StorageType Storage, bool ShouldCreate = true) {
+  static DIDerivedType *
+  getImpl(LLVMContext &Context, unsigned Tag, StringRef Name, DIFile *File,
+          unsigned Line, DIScope *Scope, DIType *BaseType, uint64_t SizeInBits,
+          uint32_t AlignInBits, uint64_t OffsetInBits,
+          Optional<unsigned> DWARFAddressSpace, DIFlags Flags,
+          Metadata *ExtraData, StorageType Storage, bool ShouldCreate = true) {
     return getImpl(Context, Tag, getCanonicalMDString(Context, Name), File,
                    Line, Scope, BaseType, SizeInBits, AlignInBits, OffsetInBits,
                    DWARFAddressSpace, Flags, ExtraData, Storage, ShouldCreate);
@@ -858,7 +809,7 @@ public:
                      ExtraData))
   DEFINE_MDNODE_GET(DIDerivedType,
                     (unsigned Tag, StringRef Name, DIFile *File, unsigned Line,
-                     DIScopeRef Scope, DITypeRef BaseType, uint64_t SizeInBits,
+                     DIScope *Scope, DIType *BaseType, uint64_t SizeInBits,
                      uint32_t AlignInBits, uint64_t OffsetInBits,
                      Optional<unsigned> DWARFAddressSpace, DIFlags Flags,
                      Metadata *ExtraData = nullptr),
@@ -869,7 +820,7 @@ public:
   TempDIDerivedType clone() const { return cloneImpl(); }
 
   /// Get the base type this is derived from.
-  DITypeRef getBaseType() const { return DITypeRef(getRawBaseType()); }
+  DIType *getBaseType() const { return cast_or_null<DIType>(getRawBaseType()); }
   Metadata *getRawBaseType() const { return getOperand(3); }
 
   /// \returns The DWARF address space of the memory pointed to or referenced by
@@ -889,9 +840,9 @@ public:
 
   /// Get casted version of extra data.
   /// @{
-  DITypeRef getClassType() const {
+  DIType *getClassType() const {
     assert(getTag() == dwarf::DW_TAG_ptr_to_member_type);
-    return DITypeRef(getExtraData());
+    return cast_or_null<DIType>(getExtraData());
   }
 
   DIObjCProperty *getObjCProperty() const {
@@ -963,12 +914,12 @@ class DICompositeType : public DIType {
 
   static DICompositeType *
   getImpl(LLVMContext &Context, unsigned Tag, StringRef Name, Metadata *File,
-          unsigned Line, DIScopeRef Scope, DITypeRef BaseType,
-          uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
-          DIFlags Flags, DINodeArray Elements, unsigned RuntimeLang,
-          DITypeRef VTableHolder, DITemplateParameterArray TemplateParams,
-          StringRef Identifier, DIDerivedType *Discriminator,
-          StorageType Storage, bool ShouldCreate = true) {
+          unsigned Line, DIScope *Scope, DIType *BaseType, uint64_t SizeInBits,
+          uint32_t AlignInBits, uint64_t OffsetInBits, DIFlags Flags,
+          DINodeArray Elements, unsigned RuntimeLang, DIType *VTableHolder,
+          DITemplateParameterArray TemplateParams, StringRef Identifier,
+          DIDerivedType *Discriminator, StorageType Storage,
+          bool ShouldCreate = true) {
     return getImpl(
         Context, Tag, getCanonicalMDString(Context, Name), File, Line, Scope,
         BaseType, SizeInBits, AlignInBits, OffsetInBits, Flags, Elements.get(),
@@ -995,12 +946,13 @@ class DICompositeType : public DIType {
 public:
   DEFINE_MDNODE_GET(DICompositeType,
                     (unsigned Tag, StringRef Name, DIFile *File, unsigned Line,
-                     DIScopeRef Scope, DITypeRef BaseType, uint64_t SizeInBits,
-                     uint32_t AlignInBits, uint64_t OffsetInBits,
-                     DIFlags Flags, DINodeArray Elements, unsigned RuntimeLang,
-                     DITypeRef VTableHolder,
+                     DIScope *Scope, DIType *BaseType, uint64_t SizeInBits,
+                     uint32_t AlignInBits, uint64_t OffsetInBits, DIFlags Flags,
+                     DINodeArray Elements, unsigned RuntimeLang,
+                     DIType *VTableHolder,
                      DITemplateParameterArray TemplateParams = nullptr,
-                     StringRef Identifier = "", DIDerivedType *Discriminator = nullptr),
+                     StringRef Identifier = "",
+                     DIDerivedType *Discriminator = nullptr),
                     (Tag, Name, File, Line, Scope, BaseType, SizeInBits,
                      AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang,
                      VTableHolder, TemplateParams, Identifier, Discriminator))
@@ -1053,11 +1005,13 @@ public:
                unsigned RuntimeLang, Metadata *VTableHolder,
                Metadata *TemplateParams, Metadata *Discriminator);
 
-  DITypeRef getBaseType() const { return DITypeRef(getRawBaseType()); }
+  DIType *getBaseType() const { return cast_or_null<DIType>(getRawBaseType()); }
   DINodeArray getElements() const {
     return cast_or_null<MDTuple>(getRawElements());
   }
-  DITypeRef getVTableHolder() const { return DITypeRef(getRawVTableHolder()); }
+  DIType *getVTableHolder() const {
+    return cast_or_null<DIType>(getRawVTableHolder());
+  }
   DITemplateParameterArray getTemplateParams() const {
     return cast_or_null<MDTuple>(getRawTemplateParams());
   }
@@ -1087,7 +1041,7 @@ public:
     replaceOperandWith(4, Elements.get());
   }
 
-  void replaceVTableHolder(DITypeRef VTableHolder) {
+  void replaceVTableHolder(DIType *VTableHolder) {
     replaceOperandWith(5, VTableHolder);
   }
 
@@ -1541,9 +1495,6 @@ public:
   ///
   /// For precise control over the data being encoded in the discriminator,
   /// use encodeDiscriminator/decodeDiscriminator.
-  ///
-  /// Use {get|set}BaseDiscriminator and cloneWithDuplicationFactor after reading
-  /// their documentation, as their behavior has side-effects.
 
   inline unsigned getDiscriminator() const;
 
@@ -1554,7 +1505,7 @@ public:
   /// base discriminator is set in the new DILocation, the other encoded values
   /// are elided.
   /// If the discriminator cannot be encoded, the function returns None.
-  inline Optional<const DILocation *> setBaseDiscriminator(unsigned BD) const;
+  inline Optional<const DILocation *> cloneWithBaseDiscriminator(unsigned BD) const;
 
   /// Returns the duplication factor stored in the discriminator, or 1 if no
   /// duplication factor (or 0) is encoded.
@@ -1570,7 +1521,7 @@ public:
   /// duplication factor encoded in the discriminator. The current duplication
   /// factor is as defined by getDuplicationFactor().
   /// Returns None if encoding failed.
-  inline Optional<const DILocation *> cloneWithDuplicationFactor(unsigned DF) const;
+  inline Optional<const DILocation *> cloneByMultiplyingDuplicationFactor(unsigned DF) const;
 
   /// When two instructions are combined into a single instruction we also
   /// need to combine the original locations into a single location.
@@ -1594,10 +1545,11 @@ public:
     return getUnsignedFromPrefixEncoding(D);
   }
 
-  /// Raw encoding of the discriminator. APIs such as setBaseDiscriminator or
-  /// cloneWithDuplicationFactor have certain side-effects. This API, in
-  /// conjunction with cloneWithDiscriminator, may be used to encode precisely
-  /// the values provided. \p BD: base discriminator \p DF: duplication factor
+  /// Raw encoding of the discriminator. APIs such as cloneWithDuplicationFactor
+  /// have certain special case behavior (e.g. treating empty duplication factor
+  /// as the value '1').
+  /// This API, in conjunction with cloneWithDiscriminator, may be used to encode
+  /// the raw values provided. \p BD: base discriminator \p DF: duplication factor
   /// \p CI: copy index
   /// The return is None if the values cannot be encoded in 32 bits - for
   /// example, values for BD or DF larger than 12 bits. Otherwise, the return
@@ -1638,9 +1590,6 @@ public:
 };
 
 /// Subprogram description.
-///
-/// TODO: Remove DisplayName.  It's always equal to Name.
-/// TODO: Split up flags.
 class DISubprogram : public DILocalScope {
   friend class LLVMContextImpl;
   friend class MDNode;
@@ -1678,7 +1627,8 @@ public:
   // Helper for converting old bitfields to new flags word.
   static DISPFlags toSPFlags(bool IsLocalToUnit, bool IsDefinition,
                              bool IsOptimized,
-                             unsigned Virtuality = SPFlagNonvirtual) {
+                             unsigned Virtuality = SPFlagNonvirtual,
+                             bool IsMainSubprogram = false) {
     // We're assuming virtuality is the low-order field.
     static_assert(
         int(SPFlagVirtual) == int(dwarf::DW_VIRTUALITY_virtual) &&
@@ -1688,7 +1638,8 @@ public:
         (Virtuality & SPFlagVirtuality) |
         (IsLocalToUnit ? SPFlagLocalToUnit : SPFlagZero) |
         (IsDefinition ? SPFlagDefinition : SPFlagZero) |
-        (IsOptimized ? SPFlagOptimized : SPFlagZero));
+        (IsOptimized ? SPFlagOptimized : SPFlagZero) |
+        (IsMainSubprogram ? SPFlagMainSubprogram : SPFlagZero));
   }
 
 private:
@@ -1707,9 +1658,9 @@ private:
   ~DISubprogram() = default;
 
   static DISubprogram *
-  getImpl(LLVMContext &Context, DIScopeRef Scope, StringRef Name,
+  getImpl(LLVMContext &Context, DIScope *Scope, StringRef Name,
           StringRef LinkageName, DIFile *File, unsigned Line,
-          DISubroutineType *Type, unsigned ScopeLine, DITypeRef ContainingType,
+          DISubroutineType *Type, unsigned ScopeLine, DIType *ContainingType,
           unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags,
           DISPFlags SPFlags, DICompileUnit *Unit,
           DITemplateParameterArray TemplateParams, DISubprogram *Declaration,
@@ -1744,9 +1695,9 @@ private:
 public:
   DEFINE_MDNODE_GET(
       DISubprogram,
-      (DIScopeRef Scope, StringRef Name, StringRef LinkageName, DIFile *File,
+      (DIScope * Scope, StringRef Name, StringRef LinkageName, DIFile *File,
        unsigned Line, DISubroutineType *Type, unsigned ScopeLine,
-       DITypeRef ContainingType, unsigned VirtualIndex, int ThisAdjustment,
+       DIType *ContainingType, unsigned VirtualIndex, int ThisAdjustment,
        DIFlags Flags, DISPFlags SPFlags, DICompileUnit *Unit,
        DITemplateParameterArray TemplateParams = nullptr,
        DISubprogram *Declaration = nullptr, DINodeArray RetainedNodes = nullptr,
@@ -1787,6 +1738,7 @@ public:
   bool isLocalToUnit() const { return getSPFlags() & SPFlagLocalToUnit; }
   bool isDefinition() const { return getSPFlags() & SPFlagDefinition; }
   bool isOptimized() const { return getSPFlags() & SPFlagOptimized; }
+  bool isMainSubprogram() const { return getSPFlags() & SPFlagMainSubprogram; }
 
   bool isArtificial() const { return getFlags() & FlagArtificial; }
   bool isPrivate() const {
@@ -1803,7 +1755,9 @@ public:
   bool areAllCallsDescribed() const {
     return getFlags() & FlagAllCallsDescribed;
   }
-  bool isMainSubprogram() const { return getFlags() & FlagMainSubprogram; }
+  bool isPure() const { return getSPFlags() & SPFlagPure; }
+  bool isElemental() const { return getSPFlags() & SPFlagElemental; }
+  bool isRecursive() const { return getSPFlags() & SPFlagRecursive; }
 
   /// Check if this is reference-qualified.
   ///
@@ -1827,7 +1781,7 @@ public:
   // Returns true if this subprogram is a thunk generated by the compiler.
   bool isThunk() const { return getFlags() & FlagThunk; }
 
-  DIScopeRef getScope() const { return DIScopeRef(getRawScope()); }
+  DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
 
   StringRef getName() const { return getStringOperand(2); }
   StringRef getLinkageName() const { return getStringOperand(3); }
@@ -1835,8 +1789,8 @@ public:
   DISubroutineType *getType() const {
     return cast_or_null<DISubroutineType>(getRawType());
   }
-  DITypeRef getContainingType() const {
-    return DITypeRef(getRawContainingType());
+  DIType *getContainingType() const {
+    return cast_or_null<DIType>(getRawContainingType());
   }
 
   DICompileUnit *getUnit() const {
@@ -2039,15 +1993,17 @@ unsigned DILocation::getCopyIdentifier() const {
   return getCopyIdentifierFromDiscriminator(getDiscriminator());
 }
 
-Optional<const DILocation *> DILocation::setBaseDiscriminator(unsigned D) const {
-  if (D == 0)
+Optional<const DILocation *> DILocation::cloneWithBaseDiscriminator(unsigned D) const {
+  unsigned BD, DF, CI;
+  decodeDiscriminator(getDiscriminator(), BD, DF, CI);
+  if (D == BD)
     return this;
-  if (D > 0xfff)
-    return None;
-  return cloneWithDiscriminator(encodeComponent(D));
+  if (Optional<unsigned> Encoded = encodeDiscriminator(D, DF, CI))
+    return cloneWithDiscriminator(*Encoded);
+  return None;
 }
 
-Optional<const DILocation *> DILocation::cloneWithDuplicationFactor(unsigned DF) const {
+Optional<const DILocation *> DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const {
   DF *= getDuplicationFactor();
   if (DF <= 1)
     return this;
@@ -2179,7 +2135,7 @@ protected:
 
 public:
   StringRef getName() const { return getStringOperand(0); }
-  DITypeRef getType() const { return DITypeRef(getRawType()); }
+  DIType *getType() const { return cast_or_null<DIType>(getRawType()); }
 
   MDString *getRawName() const { return getOperandAs<MDString>(0); }
   Metadata *getRawType() const { return getOperand(1); }
@@ -2201,7 +2157,7 @@ class DITemplateTypeParameter : public DITemplateParameter {
   ~DITemplateTypeParameter() = default;
 
   static DITemplateTypeParameter *getImpl(LLVMContext &Context, StringRef Name,
-                                          DITypeRef Type, StorageType Storage,
+                                          DIType *Type, StorageType Storage,
                                           bool ShouldCreate = true) {
     return getImpl(Context, getCanonicalMDString(Context, Name), Type, Storage,
                    ShouldCreate);
@@ -2215,7 +2171,7 @@ class DITemplateTypeParameter : public DITemplateParameter {
   }
 
 public:
-  DEFINE_MDNODE_GET(DITemplateTypeParameter, (StringRef Name, DITypeRef Type),
+  DEFINE_MDNODE_GET(DITemplateTypeParameter, (StringRef Name, DIType *Type),
                     (Name, Type))
   DEFINE_MDNODE_GET(DITemplateTypeParameter, (MDString * Name, Metadata *Type),
                     (Name, Type))
@@ -2238,7 +2194,7 @@ class DITemplateValueParameter : public DITemplateParameter {
   ~DITemplateValueParameter() = default;
 
   static DITemplateValueParameter *getImpl(LLVMContext &Context, unsigned Tag,
-                                           StringRef Name, DITypeRef Type,
+                                           StringRef Name, DIType *Type,
                                            Metadata *Value, StorageType Storage,
                                            bool ShouldCreate = true) {
     return getImpl(Context, Tag, getCanonicalMDString(Context, Name), Type,
@@ -2255,8 +2211,9 @@ class DITemplateValueParameter : public DITemplateParameter {
   }
 
 public:
-  DEFINE_MDNODE_GET(DITemplateValueParameter, (unsigned Tag, StringRef Name,
-                                               DITypeRef Type, Metadata *Value),
+  DEFINE_MDNODE_GET(DITemplateValueParameter,
+                    (unsigned Tag, StringRef Name, DIType *Type,
+                     Metadata *Value),
                     (Tag, Name, Type, Value))
   DEFINE_MDNODE_GET(DITemplateValueParameter, (unsigned Tag, MDString *Name,
                                                Metadata *Type, Metadata *Value),
@@ -2288,7 +2245,7 @@ public:
   DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
   StringRef getName() const { return getStringOperand(1); }
   DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
-  DITypeRef getType() const { return DITypeRef(getRawType()); }
+  DIType *getType() const { return cast_or_null<DIType>(getRawType()); }
   uint32_t getAlignInBits() const { return AlignInBits; }
   uint32_t getAlignInBytes() const { return getAlignInBits() / CHAR_BIT; }
   /// Determines the size of the variable's type.
@@ -2297,7 +2254,7 @@ public:
   /// Return the signedness of this variable's type, or None if this type is
   /// neither signed nor unsigned.
   Optional<DIBasicType::Signedness> getSignedness() const {
-    if (auto *BT = dyn_cast<DIBasicType>(getType().resolve()))
+    if (auto *BT = dyn_cast<DIBasicType>(getType()))
       return BT->getSignedness();
     return None;
   }
@@ -2504,6 +2461,13 @@ public:
   /// Return whether this is a piece of an aggregate variable.
   bool isFragment() const { return getFragmentInfo().hasValue(); }
 
+  /// Return whether this is an implicit location description.
+  bool isImplicit() const;
+
+  /// Return whether the location is computed on the expression stack, meaning
+  /// it cannot be a simple register location.
+  bool isComplex() const;
+
   /// Append \p Ops with operations to apply the \p Offset.
   static void appendOffset(SmallVectorImpl<uint64_t> &Ops, int64_t Offset);
 
@@ -2511,20 +2475,32 @@ public:
   /// return true with an offset of zero.
   bool extractIfOffset(int64_t &Offset) const;
 
-  /// Constants for DIExpression::prepend.
-  enum { NoDeref = false, WithDeref = true, WithStackValue = true };
+  /// Checks if the last 4 elements of the expression are DW_OP_constu <DWARF
+  /// Address Space> DW_OP_swap DW_OP_xderef and extracts the <DWARF Address
+  /// Space>.
+  static const DIExpression *extractAddressClass(const DIExpression *Expr,
+                                                 unsigned &AddrClass);
+
+  /// Used for DIExpression::prepend.
+  enum PrependOps : uint8_t {
+    ApplyOffset = 0,
+    DerefBefore = 1 << 0,
+    DerefAfter = 1 << 1,
+    StackValue = 1 << 2,
+    EntryValue = 1 << 3
+  };
 
   /// Prepend \p DIExpr with a deref and offset operation and optionally turn it
-  /// into a stack value.
-  static DIExpression *prepend(const DIExpression *Expr, bool DerefBefore,
-                               int64_t Offset = 0, bool DerefAfter = false,
-                               bool StackValue = false);
+  /// into a stack value or/and an entry value.
+  static DIExpression *prepend(const DIExpression *Expr, uint8_t Flags,
+                               int64_t Offset = 0);
 
   /// Prepend \p DIExpr with the given opcodes and optionally turn it into a
   /// stack value.
   static DIExpression *prependOpcodes(const DIExpression *Expr,
                                       SmallVectorImpl<uint64_t> &Ops,
-                                      bool StackValue = false);
+                                      bool StackValue = false,
+                                      bool EntryValue = false);
 
   /// Append the opcodes \p Ops to \p DIExpr. Unlike \ref appendToStack, the
   /// returned expression is a stack value only if \p DIExpr is a stack value.
@@ -2553,17 +2529,14 @@ public:
   createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits,
                            unsigned SizeInBits);
 
-  /// Determine the relative position of the fragments described by this
-  /// DIExpression and \p Other.
+  /// Determine the relative position of the fragments passed in.
   /// Returns -1 if this is entirely before Other, 0 if this and Other overlap,
   /// 1 if this is entirely after Other.
-  int fragmentCmp(const DIExpression *Other) const {
-    auto Fragment1 = *getFragmentInfo();
-    auto Fragment2 = *Other->getFragmentInfo();
-    unsigned l1 = Fragment1.OffsetInBits;
-    unsigned l2 = Fragment2.OffsetInBits;
-    unsigned r1 = l1 + Fragment1.SizeInBits;
-    unsigned r2 = l2 + Fragment2.SizeInBits;
+  static int fragmentCmp(const FragmentInfo &A, const FragmentInfo &B) {
+    uint64_t l1 = A.OffsetInBits;
+    uint64_t l2 = B.OffsetInBits;
+    uint64_t r1 = l1 + A.SizeInBits;
+    uint64_t r2 = l2 + B.SizeInBits;
     if (r1 <= l2)
       return -1;
     else if (r2 <= l1)
@@ -2572,12 +2545,59 @@ public:
       return 0;
   }
 
+  /// Check if fragments overlap between a pair of FragmentInfos.
+  static bool fragmentsOverlap(const FragmentInfo &A, const FragmentInfo &B) {
+    return fragmentCmp(A, B) == 0;
+  }
+
+  /// Determine the relative position of the fragments described by this
+  /// DIExpression and \p Other. Calls static fragmentCmp implementation.
+  int fragmentCmp(const DIExpression *Other) const {
+    auto Fragment1 = *getFragmentInfo();
+    auto Fragment2 = *Other->getFragmentInfo();
+    return fragmentCmp(Fragment1, Fragment2);
+  }
+
   /// Check if fragments overlap between this DIExpression and \p Other.
   bool fragmentsOverlap(const DIExpression *Other) const {
     if (!isFragment() || !Other->isFragment())
       return true;
     return fragmentCmp(Other) == 0;
   }
+
+  /// Check if the expression consists of exactly one entry value operand.
+  /// (This is the only configuration of entry values that is supported.)
+  bool isEntryValue() const {
+    return getNumElements() > 0 &&
+           getElement(0) == dwarf::DW_OP_entry_value;
+  }
+};
+
+inline bool operator==(const DIExpression::FragmentInfo &A,
+                       const DIExpression::FragmentInfo &B) {
+  return std::tie(A.SizeInBits, A.OffsetInBits) ==
+         std::tie(B.SizeInBits, B.OffsetInBits);
+}
+
+inline bool operator<(const DIExpression::FragmentInfo &A,
+                      const DIExpression::FragmentInfo &B) {
+  return std::tie(A.SizeInBits, A.OffsetInBits) <
+         std::tie(B.SizeInBits, B.OffsetInBits);
+}
+
+template <> struct DenseMapInfo<DIExpression::FragmentInfo> {
+  using FragInfo = DIExpression::FragmentInfo;
+  static const uint64_t MaxVal = std::numeric_limits<uint64_t>::max();
+
+  static inline FragInfo getEmptyKey() { return {MaxVal, MaxVal}; }
+
+  static inline FragInfo getTombstoneKey() { return {MaxVal - 1, MaxVal - 1}; }
+
+  static unsigned getHashValue(const FragInfo &Frag) {
+    return (Frag.SizeInBits & 0xffff) << 16 | (Frag.OffsetInBits & 0xffff);
+  }
+
+  static bool isEqual(const FragInfo &A, const FragInfo &B) { return A == B; }
 };
 
 /// Global variables.
@@ -2599,7 +2619,7 @@ class DIGlobalVariable : public DIVariable {
 
   static DIGlobalVariable *
   getImpl(LLVMContext &Context, DIScope *Scope, StringRef Name,
-          StringRef LinkageName, DIFile *File, unsigned Line, DITypeRef Type,
+          StringRef LinkageName, DIFile *File, unsigned Line, DIType *Type,
           bool IsLocalToUnit, bool IsDefinition,
           DIDerivedType *StaticDataMemberDeclaration, MDTuple *TemplateParams,
           uint32_t AlignInBits, StorageType Storage, bool ShouldCreate = true) {
@@ -2626,7 +2646,7 @@ class DIGlobalVariable : public DIVariable {
 public:
   DEFINE_MDNODE_GET(DIGlobalVariable,
                     (DIScope * Scope, StringRef Name, StringRef LinkageName,
-                     DIFile *File, unsigned Line, DITypeRef Type,
+                     DIFile *File, unsigned Line, DIType *Type,
                      bool IsLocalToUnit, bool IsDefinition,
                      DIDerivedType *StaticDataMemberDeclaration,
                      MDTuple *TemplateParams, uint32_t AlignInBits),
@@ -2663,6 +2683,65 @@ public:
   }
 };
 
+class DICommonBlock : public DIScope {
+  unsigned LineNo;
+
+  friend class LLVMContextImpl;
+  friend class MDNode;
+
+  DICommonBlock(LLVMContext &Context, StorageType Storage, unsigned LineNo,
+                ArrayRef<Metadata *> Ops)
+      : DIScope(Context, DICommonBlockKind, Storage, dwarf::DW_TAG_common_block,
+                Ops), LineNo(LineNo) {}
+
+  static DICommonBlock *getImpl(LLVMContext &Context, DIScope *Scope,
+                                DIGlobalVariable *Decl, StringRef Name,
+                                DIFile *File, unsigned LineNo,
+                                StorageType Storage,
+                                bool ShouldCreate = true) {
+    return getImpl(Context, Scope, Decl, getCanonicalMDString(Context, Name),
+                   File, LineNo, Storage, ShouldCreate);
+  }
+  static DICommonBlock *getImpl(LLVMContext &Context, Metadata *Scope,
+                                Metadata *Decl, MDString *Name, Metadata *File,
+                                unsigned LineNo, 
+                                StorageType Storage, bool ShouldCreate = true);
+
+  TempDICommonBlock cloneImpl() const {
+    return getTemporary(getContext(), getScope(), getDecl(), getName(),
+                        getFile(), getLineNo());
+  }
+
+public:
+  DEFINE_MDNODE_GET(DICommonBlock,
+                    (DIScope *Scope, DIGlobalVariable *Decl, StringRef Name,
+                     DIFile *File, unsigned LineNo),
+                    (Scope, Decl, Name, File, LineNo))
+  DEFINE_MDNODE_GET(DICommonBlock,
+                    (Metadata *Scope, Metadata *Decl, MDString *Name,
+                     Metadata *File, unsigned LineNo),
+                    (Scope, Decl, Name, File, LineNo))
+
+  TempDICommonBlock clone() const { return cloneImpl(); }
+
+  DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
+  DIGlobalVariable *getDecl() const {
+    return cast_or_null<DIGlobalVariable>(getRawDecl());
+  }
+  StringRef getName() const { return getStringOperand(2); }
+  DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
+  unsigned getLineNo() const { return LineNo; }
+
+  Metadata *getRawScope() const { return getOperand(0); }
+  Metadata *getRawDecl() const { return getOperand(1); }
+  MDString *getRawName() const { return getOperandAs<MDString>(2); }
+  Metadata *getRawFile() const { return getOperand(3); }
+
+  static bool classof(const Metadata *MD) {
+    return MD->getMetadataID() == DICommonBlockKind;
+  }
+};
+
 /// Local variable.
 ///
 /// TODO: Split up flags.
@@ -2684,7 +2763,7 @@ class DILocalVariable : public DIVariable {
 
   static DILocalVariable *getImpl(LLVMContext &Context, DIScope *Scope,
                                   StringRef Name, DIFile *File, unsigned Line,
-                                  DITypeRef Type, unsigned Arg, DIFlags Flags,
+                                  DIType *Type, unsigned Arg, DIFlags Flags,
                                   uint32_t AlignInBits, StorageType Storage,
                                   bool ShouldCreate = true) {
     return getImpl(Context, Scope, getCanonicalMDString(Context, Name), File,
@@ -2705,8 +2784,8 @@ class DILocalVariable : public DIVariable {
 public:
   DEFINE_MDNODE_GET(DILocalVariable,
                     (DILocalScope * Scope, StringRef Name, DIFile *File,
-                     unsigned Line, DITypeRef Type, unsigned Arg,
-                     DIFlags Flags, uint32_t AlignInBits),
+                     unsigned Line, DIType *Type, unsigned Arg, DIFlags Flags,
+                     uint32_t AlignInBits),
                     (Scope, Name, File, Line, Type, Arg, Flags, AlignInBits))
   DEFINE_MDNODE_GET(DILocalVariable,
                     (Metadata * Scope, MDString *Name, Metadata *File,
@@ -2730,6 +2809,11 @@ public:
   bool isArtificial() const { return getFlags() & FlagArtificial; }
   bool isObjectPointer() const { return getFlags() & FlagObjectPointer; }
 
+  /// Check that an argument is unmodified.
+  bool isNotModified() const { return getFlags() & FlagArgumentNotModified; }
+  /// Set the flag if an argument is unmodified.
+  void setIsNotModified() { Flags |= FlagArgumentNotModified; }
+
   /// Check that a location is valid for this variable.
   ///
   /// Check that \c DL exists, is in the same subprogram, and has the same
@@ -2831,7 +2915,7 @@ class DIObjCProperty : public DINode {
   static DIObjCProperty *
   getImpl(LLVMContext &Context, StringRef Name, DIFile *File, unsigned Line,
           StringRef GetterName, StringRef SetterName, unsigned Attributes,
-          DITypeRef Type, StorageType Storage, bool ShouldCreate = true) {
+          DIType *Type, StorageType Storage, bool ShouldCreate = true) {
     return getImpl(Context, getCanonicalMDString(Context, Name), File, Line,
                    getCanonicalMDString(Context, GetterName),
                    getCanonicalMDString(Context, SetterName), Attributes, Type,
@@ -2853,7 +2937,7 @@ public:
   DEFINE_MDNODE_GET(DIObjCProperty,
                     (StringRef Name, DIFile *File, unsigned Line,
                      StringRef GetterName, StringRef SetterName,
-                     unsigned Attributes, DITypeRef Type),
+                     unsigned Attributes, DIType *Type),
                     (Name, File, Line, GetterName, SetterName, Attributes,
                      Type))
   DEFINE_MDNODE_GET(DIObjCProperty,
@@ -2871,7 +2955,7 @@ public:
   DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
   StringRef getGetterName() const { return getStringOperand(2); }
   StringRef getSetterName() const { return getStringOperand(3); }
-  DITypeRef getType() const { return DITypeRef(getRawType()); }
+  DIType *getType() const { return cast_or_null<DIType>(getRawType()); }
 
   StringRef getFilename() const {
     if (auto *F = getFile())
@@ -2915,8 +2999,8 @@ class DIImportedEntity : public DINode {
   ~DIImportedEntity() = default;
 
   static DIImportedEntity *getImpl(LLVMContext &Context, unsigned Tag,
-                                   DIScope *Scope, DINodeRef Entity,
-                                   DIFile *File, unsigned Line, StringRef Name,
+                                   DIScope *Scope, DINode *Entity, DIFile *File,
+                                   unsigned Line, StringRef Name,
                                    StorageType Storage,
                                    bool ShouldCreate = true) {
     return getImpl(Context, Tag, Scope, Entity, File, Line,
@@ -2935,8 +3019,8 @@ class DIImportedEntity : public DINode {
 
 public:
   DEFINE_MDNODE_GET(DIImportedEntity,
-                    (unsigned Tag, DIScope *Scope, DINodeRef Entity,
-                     DIFile *File, unsigned Line, StringRef Name = ""),
+                    (unsigned Tag, DIScope *Scope, DINode *Entity, DIFile *File,
+                     unsigned Line, StringRef Name = ""),
                     (Tag, Scope, Entity, File, Line, Name))
   DEFINE_MDNODE_GET(DIImportedEntity,
                     (unsigned Tag, Metadata *Scope, Metadata *Entity,
@@ -2947,7 +3031,7 @@ public:
 
   unsigned getLine() const { return Line; }
   DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
-  DINodeRef getEntity() const { return DINodeRef(getRawEntity()); }
+  DINode *getEntity() const { return cast_or_null<DINode>(getRawEntity()); }
   StringRef getName() const { return getStringOperand(2); }
   DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
 
diff --git a/include/llvm/IR/DebugLoc.h b/include/llvm/IR/DebugLoc.h
index 4f0d7f51b5f9..780d17a33661 100644
--- a/include/llvm/IR/DebugLoc.h
+++ b/include/llvm/IR/DebugLoc.h
@@ -1,9 +1,8 @@
 //===- DebugLoc.h - Debug Location Information ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/DerivedTypes.h b/include/llvm/IR/DerivedTypes.h
index 9526d6287d2f..3c1d4278905f 100644
--- a/include/llvm/IR/DerivedTypes.h
+++ b/include/llvm/IR/DerivedTypes.h
@@ -1,9 +1,8 @@
 //===- llvm/DerivedTypes.h - Classes for handling data types ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -24,6 +23,7 @@
 #include "llvm/IR/Type.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ScalableSize.h"
 #include <cassert>
 #include <cstdint>
 
@@ -158,6 +158,38 @@ unsigned Type::getFunctionNumParams() const {
   return cast<FunctionType>(this)->getNumParams();
 }
 
+/// A handy container for a FunctionType+Callee-pointer pair, which can be
+/// passed around as a single entity. This assists in replacing the use of
+/// PointerType::getElementType() to access the function's type, since that's
+/// slated for removal as part of the [opaque pointer types] project.
+class FunctionCallee {
+public:
+  // Allow implicit conversion from types which have a getFunctionType member
+  // (e.g. Function and InlineAsm).
+  template <typename T, typename U = decltype(&T::getFunctionType)>
+  FunctionCallee(T *Fn)
+      : FnTy(Fn ? Fn->getFunctionType() : nullptr), Callee(Fn) {}
+
+  FunctionCallee(FunctionType *FnTy, Value *Callee)
+      : FnTy(FnTy), Callee(Callee) {
+    assert((FnTy == nullptr) == (Callee == nullptr));
+  }
+
+  FunctionCallee(std::nullptr_t) {}
+
+  FunctionCallee() = default;
+
+  FunctionType *getFunctionType() { return FnTy; }
+
+  Value *getCallee() { return Callee; }
+
+  explicit operator bool() { return Callee; }
+
+private:
+  FunctionType *FnTy = nullptr;
+  Value *Callee = nullptr;
+};
+
 /// Common super class of ArrayType, StructType and VectorType.
 class CompositeType : public Type {
 protected:
@@ -356,6 +388,8 @@ public:
   SequentialType(const SequentialType &) = delete;
   SequentialType &operator=(const SequentialType &) = delete;
 
+  /// For scalable vectors, this will return the minimum number of elements
+  /// in the vector.
   uint64_t getNumElements() const { return NumElements; }
   Type *getElementType() const { return ContainedType; }
 
@@ -391,14 +425,37 @@ uint64_t Type::getArrayNumElements() const {
 
 /// Class to represent vector types.
 class VectorType : public SequentialType {
-  VectorType(Type *ElType, unsigned NumEl);
+  /// A fully specified VectorType is of the form <vscale x n x Ty>. 'n' is the
+  /// minimum number of elements of type Ty contained within the vector, and
+  /// 'vscale x' indicates that the total element count is an integer multiple
+  /// of 'n', where the multiple is either guaranteed to be one, or is
+  /// statically unknown at compile time.
+  ///
+  /// If the multiple is known to be 1, then the extra term is discarded in
+  /// textual IR:
+  ///
+  /// <4 x i32>          - a vector containing 4 i32s
+  /// <vscale x 4 x i32> - a vector containing an unknown integer multiple
+  ///                      of 4 i32s
+
+  VectorType(Type *ElType, unsigned NumEl, bool Scalable = false);
+  VectorType(Type *ElType, ElementCount EC);
+
+  // If true, the total number of elements is an unknown multiple of the
+  // minimum 'NumElements' from SequentialType. Otherwise the total number
+  // of elements is exactly equal to 'NumElements'.
+  bool Scalable;
 
 public:
   VectorType(const VectorType &) = delete;
   VectorType &operator=(const VectorType &) = delete;
 
   /// This static method is the primary way to construct an VectorType.
-  static VectorType *get(Type *ElementType, unsigned NumElements);
+  static VectorType *get(Type *ElementType, ElementCount EC);
+  static VectorType *get(Type *ElementType, unsigned NumElements,
+                         bool Scalable = false) {
+    return VectorType::get(ElementType, {NumElements, Scalable});
+  }
 
   /// This static method gets a VectorType with the same number of elements as
   /// the input type, and the element type is an integer type of the same width
@@ -407,7 +464,7 @@ public:
     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
     assert(EltBits && "Element size must be of a non-zero size");
     Type *EltTy = IntegerType::get(VTy->getContext(), EltBits);
-    return VectorType::get(EltTy, VTy->getNumElements());
+    return VectorType::get(EltTy, VTy->getElementCount());
   }
 
   /// This static method is like getInteger except that the element types are
@@ -415,7 +472,7 @@ public:
   static VectorType *getExtendedElementVectorType(VectorType *VTy) {
     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
     Type *EltTy = IntegerType::get(VTy->getContext(), EltBits * 2);
-    return VectorType::get(EltTy, VTy->getNumElements());
+    return VectorType::get(EltTy, VTy->getElementCount());
   }
 
   /// This static method is like getInteger except that the element types are
@@ -425,29 +482,45 @@ public:
     assert((EltBits & 1) == 0 &&
            "Cannot truncate vector element with odd bit-width");
     Type *EltTy = IntegerType::get(VTy->getContext(), EltBits / 2);
-    return VectorType::get(EltTy, VTy->getNumElements());
+    return VectorType::get(EltTy, VTy->getElementCount());
   }
 
   /// This static method returns a VectorType with half as many elements as the
   /// input type and the same element type.
   static VectorType *getHalfElementsVectorType(VectorType *VTy) {
-    unsigned NumElts = VTy->getNumElements();
-    assert ((NumElts & 1) == 0 &&
+    auto EltCnt = VTy->getElementCount();
+    assert ((EltCnt.Min & 1) == 0 &&
             "Cannot halve vector with odd number of elements.");
-    return VectorType::get(VTy->getElementType(), NumElts/2);
+    return VectorType::get(VTy->getElementType(), EltCnt/2);
   }
 
   /// This static method returns a VectorType with twice as many elements as the
   /// input type and the same element type.
   static VectorType *getDoubleElementsVectorType(VectorType *VTy) {
-    unsigned NumElts = VTy->getNumElements();
-    return VectorType::get(VTy->getElementType(), NumElts*2);
+    auto EltCnt = VTy->getElementCount();
+    assert((VTy->getNumElements() * 2ull) <= UINT_MAX &&
+           "Too many elements in vector");
+    return VectorType::get(VTy->getElementType(), EltCnt*2);
   }
 
   /// Return true if the specified type is valid as a element type.
   static bool isValidElementType(Type *ElemTy);
 
-  /// Return the number of bits in the Vector type.
+  /// Return an ElementCount instance to represent the (possibly scalable)
+  /// number of elements in the vector.
+  ElementCount getElementCount() const {
+    uint64_t MinimumEltCnt = getNumElements();
+    assert(MinimumEltCnt <= UINT_MAX && "Too many elements in vector");
+    return { (unsigned)MinimumEltCnt, Scalable };
+  }
+
+  /// Returns whether or not this is a scalable vector (meaning the total
+  /// element count is a multiple of the minimum).
+  bool isScalable() const {
+    return Scalable;
+  }
+
+  /// Return the minimum number of bits in the Vector type.
   /// Returns zero when the vector is a vector of pointers.
   unsigned getBitWidth() const {
     return getNumElements() * getElementType()->getPrimitiveSizeInBits();
@@ -463,6 +536,10 @@ unsigned Type::getVectorNumElements() const {
   return cast<VectorType>(this)->getNumElements();
 }
 
+bool Type::getVectorIsScalable() const {
+  return cast<VectorType>(this)->isScalable();
+}
+
 /// Class to represent pointers.
 class PointerType : public Type {
   explicit PointerType(Type *ElType, unsigned AddrSpace);
diff --git a/include/llvm/IR/DerivedUser.h b/include/llvm/IR/DerivedUser.h
index 67c483d3c497..a25d316c2d60 100644
--- a/include/llvm/IR/DerivedUser.h
+++ b/include/llvm/IR/DerivedUser.h
@@ -1,9 +1,8 @@
 //===- DerivedUser.h - Base for non-IR Users --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/IR/DiagnosticHandler.h b/include/llvm/IR/DiagnosticHandler.h
index 51873bea3d41..55e5e5975808 100644
--- a/include/llvm/IR/DiagnosticHandler.h
+++ b/include/llvm/IR/DiagnosticHandler.h
@@ -1,9 +1,8 @@
-//===- DiagnosticHandler.h - DiagnosticHandler class for LLVM -*- C++ ---*-===//
+//===- DiagnosticHandler.h - DiagnosticHandler class for LLVM ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Base DiagnosticHandler class declaration. Derive from this class to provide
diff --git a/include/llvm/IR/DiagnosticInfo.h b/include/llvm/IR/DiagnosticInfo.h
index 3a55a7dca7f4..373663289dbd 100644
--- a/include/llvm/IR/DiagnosticInfo.h
+++ b/include/llvm/IR/DiagnosticInfo.h
@@ -1,9 +1,8 @@
 //===- llvm/IR/DiagnosticInfo.h - Diagnostic Declaration --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -466,12 +465,15 @@ public:
   virtual bool isEnabled() const = 0;
 
   StringRef getPassName() const { return PassName; }
+  StringRef getRemarkName() const { return RemarkName; }
   std::string getMsg() const;
   Optional<uint64_t> getHotness() const { return Hotness; }
   void setHotness(Optional<uint64_t> H) { Hotness = H; }
 
   bool isVerbose() const { return IsVerbose; }
 
+  ArrayRef<Argument> getArgs() const { return Args; }
+
   static bool classof(const DiagnosticInfo *DI) {
     return (DI->getKind() >= DK_FirstRemark &&
             DI->getKind() <= DK_LastRemark) ||
@@ -501,7 +503,7 @@ protected:
   const char *PassName;
 
   /// Textual identifier for the remark (single-word, camel-case). Can be used
-  /// by external tools reading the YAML output file for optimization remarks to
+  /// by external tools reading the output file for optimization remarks to
   /// identify the remark.
   StringRef RemarkName;
 
@@ -519,8 +521,6 @@ protected:
   /// the optimization records and not in the remark printed in the compiler
   /// output.
   int FirstExtraArgIndex = -1;
-
-  friend struct yaml::MappingTraits<DiagnosticInfoOptimizationBase *>;
 };
 
 /// Allow the insertion operator to return the actual remark type rather than a
@@ -1002,12 +1002,6 @@ public:
   void print(DiagnosticPrinter &DP) const override;
 };
 
-namespace yaml {
-template <> struct MappingTraits<DiagnosticInfoOptimizationBase *> {
-  static void mapping(IO &io, DiagnosticInfoOptimizationBase *&OptDiag);
-};
-} // namespace yaml
-
 } // end namespace llvm
 
 #endif // LLVM_IR_DIAGNOSTICINFO_H
diff --git a/include/llvm/IR/DiagnosticPrinter.h b/include/llvm/IR/DiagnosticPrinter.h
index 25c47cdd1a12..102932ceefa5 100644
--- a/include/llvm/IR/DiagnosticPrinter.h
+++ b/include/llvm/IR/DiagnosticPrinter.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/DiagnosticPrinter.h - Diagnostic Printer ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/DomTreeUpdater.h b/include/llvm/IR/DomTreeUpdater.h
deleted file mode 100644
index e5bb092d21ca..000000000000
--- a/include/llvm/IR/DomTreeUpdater.h
+++ /dev/null
@@ -1,257 +0,0 @@
-//===- DomTreeUpdater.h - DomTree/Post DomTree Updater ----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the DomTreeUpdater class, which provides a uniform way to
-// update dominator tree related data structures.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DOMTREEUPDATER_H
-#define LLVM_DOMTREEUPDATER_H
-
-#include "llvm/Analysis/PostDominators.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/ValueHandle.h"
-#include "llvm/Support/GenericDomTree.h"
-#include <functional>
-#include <vector>
-
-namespace llvm {
-class DomTreeUpdater {
-public:
-  enum class UpdateStrategy : unsigned char { Eager = 0, Lazy = 1 };
-
-  explicit DomTreeUpdater(UpdateStrategy Strategy_) : Strategy(Strategy_) {}
-  DomTreeUpdater(DominatorTree &DT_, UpdateStrategy Strategy_)
-      : DT(&DT_), Strategy(Strategy_) {}
-  DomTreeUpdater(DominatorTree *DT_, UpdateStrategy Strategy_)
-      : DT(DT_), Strategy(Strategy_) {}
-  DomTreeUpdater(PostDominatorTree &PDT_, UpdateStrategy Strategy_)
-      : PDT(&PDT_), Strategy(Strategy_) {}
-  DomTreeUpdater(PostDominatorTree *PDT_, UpdateStrategy Strategy_)
-      : PDT(PDT_), Strategy(Strategy_) {}
-  DomTreeUpdater(DominatorTree &DT_, PostDominatorTree &PDT_,
-                 UpdateStrategy Strategy_)
-      : DT(&DT_), PDT(&PDT_), Strategy(Strategy_) {}
-  DomTreeUpdater(DominatorTree *DT_, PostDominatorTree *PDT_,
-                 UpdateStrategy Strategy_)
-      : DT(DT_), PDT(PDT_), Strategy(Strategy_) {}
-
-  ~DomTreeUpdater() { flush(); }
-
-  /// Returns true if the current strategy is Lazy.
-  bool isLazy() const { return Strategy == UpdateStrategy::Lazy; };
-
-  /// Returns true if the current strategy is Eager.
-  bool isEager() const { return Strategy == UpdateStrategy::Eager; };
-
-  /// Returns true if it holds a DominatorTree.
-  bool hasDomTree() const { return DT != nullptr; }
-
-  /// Returns true if it holds a PostDominatorTree.
-  bool hasPostDomTree() const { return PDT != nullptr; }
-
-  /// Returns true if there is BasicBlock awaiting deletion.
-  /// The deletion will only happen until a flush event and
-  /// all available trees are up-to-date.
-  /// Returns false under Eager UpdateStrategy.
-  bool hasPendingDeletedBB() const { return !DeletedBBs.empty(); }
-
-  /// Returns true if DelBB is awaiting deletion.
-  /// Returns false under Eager UpdateStrategy.
-  bool isBBPendingDeletion(BasicBlock *DelBB) const;
-
-  /// Returns true if either of DT or PDT is valid and the tree has at
-  /// least one update pending. If DT or PDT is nullptr it is treated
-  /// as having no pending updates. This function does not check
-  /// whether there is BasicBlock awaiting deletion.
-  /// Returns false under Eager UpdateStrategy.
-  bool hasPendingUpdates() const;
-
-  /// Returns true if there are DominatorTree updates queued.
-  /// Returns false under Eager UpdateStrategy or DT is nullptr.
-  bool hasPendingDomTreeUpdates() const;
-
-  /// Returns true if there are PostDominatorTree updates queued.
-  /// Returns false under Eager UpdateStrategy or PDT is nullptr.
-  bool hasPendingPostDomTreeUpdates() const;
-
-  /// Apply updates on all available trees. Under Eager UpdateStrategy with
-  /// ForceRemoveDuplicates enabled or under Lazy UpdateStrategy, it will
-  /// discard duplicated updates and self-dominance updates. If both DT and PDT
-  /// are nullptrs, this function discards all updates. The Eager Strategy
-  /// applies the updates immediately while the Lazy Strategy queues the
-  /// updates. It is required for the state of the LLVM IR to be updated
-  /// *before* applying the Updates because the internal update routine will
-  /// analyze the current state of the relationship between a pair of (From, To)
-  /// BasicBlocks to determine whether a single update needs to be discarded.
-  void applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates,
-                    bool ForceRemoveDuplicates = false);
-
-  /// Notify all available trees on an edge insertion. If both DT and PDT are
-  /// nullptrs, this function discards the update. Under either Strategy,
-  /// self-dominance update will be removed. The Eager Strategy applies
-  /// the update immediately while the Lazy Strategy queues the update.
-  /// It is recommended to only use this method when you have exactly one
-  /// insertion (and no deletions). It is recommended to use applyUpdates() in
-  /// all other cases. This function has to be called *after* making the update
-  /// on the actual CFG. An internal functions checks if the edge exists in the
-  /// CFG in DEBUG mode.
-  void insertEdge(BasicBlock *From, BasicBlock *To);
-
-  /// Notify all available trees on an edge insertion.
-  /// Under either Strategy, the following updates will be discard silently
-  /// 1. Invalid - Inserting an edge that does not exist in the CFG.
-  /// 2. Self-dominance update.
-  /// 3. Both DT and PDT are nullptrs.
-  /// The Eager Strategy applies the update immediately while the Lazy Strategy
-  /// queues the update. It is recommended to only use this method when you have
-  /// exactly one insertion (and no deletions) and want to discard an invalid
-  /// update.
-  void insertEdgeRelaxed(BasicBlock *From, BasicBlock *To);
-
-  /// Notify all available trees on an edge deletion. If both DT and PDT are
-  /// nullptrs, this function discards the update. Under either Strategy,
-  /// self-dominance update will be removed. The Eager Strategy applies
-  /// the update immediately while the Lazy Strategy queues the update.
-  /// It is recommended to only use this method when you have exactly one
-  /// deletion (and no insertions). It is recommended to use applyUpdates() in
-  /// all other cases. This function has to be called *after* making the update
-  /// on the actual CFG. An internal functions checks if the edge doesn't exist
-  /// in the CFG in DEBUG mode.
-  void deleteEdge(BasicBlock *From, BasicBlock *To);
-
-  /// Notify all available trees on an edge deletion.
-  /// Under either Strategy, the following updates will be discard silently
-  /// 1. Invalid - Deleting an edge that still exists in the CFG.
-  /// 2. Self-dominance update.
-  /// 3. Both DT and PDT are nullptrs.
-  /// The Eager Strategy applies the update immediately while the Lazy Strategy
-  /// queues the update. It is recommended to only use this method when you have
-  /// exactly one deletion (and no insertions) and want to discard an invalid
-  /// update.
-  void deleteEdgeRelaxed(BasicBlock *From, BasicBlock *To);
-
-  /// Delete DelBB. DelBB will be removed from its Parent and
-  /// erased from available trees if it exists and finally get deleted.
-  /// Under Eager UpdateStrategy, DelBB will be processed immediately.
-  /// Under Lazy UpdateStrategy, DelBB will be queued until a flush event and
-  /// all available trees are up-to-date. Assert if any instruction of DelBB is
-  /// modified while awaiting deletion. When both DT and PDT are nullptrs, DelBB
-  /// will be queued until flush() is called.
-  void deleteBB(BasicBlock *DelBB);
-
-  /// Delete DelBB. DelBB will be removed from its Parent and
-  /// erased from available trees if it exists. Then the callback will
-  /// be called. Finally, DelBB will be deleted.
-  /// Under Eager UpdateStrategy, DelBB will be processed immediately.
-  /// Under Lazy UpdateStrategy, DelBB will be queued until a flush event and
-  /// all available trees are up-to-date. Assert if any instruction of DelBB is
-  /// modified while awaiting deletion. Multiple callbacks can be queued for one
-  /// DelBB under Lazy UpdateStrategy.
-  void callbackDeleteBB(BasicBlock *DelBB,
-                        std::function<void(BasicBlock *)> Callback);
-
-  /// Recalculate all available trees and flush all BasicBlocks
-  /// awaiting deletion immediately.
-  void recalculate(Function &F);
-
-  /// Flush DomTree updates and return DomTree.
-  /// It also flush out of date updates applied by all available trees
-  /// and flush Deleted BBs if both trees are up-to-date.
-  /// It must only be called when it has a DomTree.
-  DominatorTree &getDomTree();
-
-  /// Flush PostDomTree updates and return PostDomTree.
-  /// It also flush out of date updates applied by all available trees
-  /// and flush Deleted BBs if both trees are up-to-date.
-  /// It must only be called when it has a PostDomTree.
-  PostDominatorTree &getPostDomTree();
-
-  /// Apply all pending updates to available trees and flush all BasicBlocks
-  /// awaiting deletion.
-  /// Does nothing under Eager UpdateStrategy.
-  void flush();
-
-  /// Debug method to help view the internal state of this class.
-  LLVM_DUMP_METHOD void dump() const;
-
-private:
-  class CallBackOnDeletion final : public CallbackVH {
-  public:
-    CallBackOnDeletion(BasicBlock *V,
-                       std::function<void(BasicBlock *)> Callback)
-        : CallbackVH(V), DelBB(V), Callback_(Callback) {}
-
-  private:
-    BasicBlock *DelBB = nullptr;
-    std::function<void(BasicBlock *)> Callback_;
-
-    void deleted() override {
-      Callback_(DelBB);
-      CallbackVH::deleted();
-    }
-  };
-
-  SmallVector<DominatorTree::UpdateType, 16> PendUpdates;
-  size_t PendDTUpdateIndex = 0;
-  size_t PendPDTUpdateIndex = 0;
-  DominatorTree *DT = nullptr;
-  PostDominatorTree *PDT = nullptr;
-  const UpdateStrategy Strategy;
-  SmallPtrSet<BasicBlock *, 8> DeletedBBs;
-  std::vector<CallBackOnDeletion> Callbacks;
-  bool IsRecalculatingDomTree = false;
-  bool IsRecalculatingPostDomTree = false;
-
-  /// First remove all the instructions of DelBB and then make sure DelBB has a
-  /// valid terminator instruction which is necessary to have when DelBB still
-  /// has to be inside of its parent Function while awaiting deletion under Lazy
-  /// UpdateStrategy to prevent other routines from asserting the state of the
-  /// IR is inconsistent. Assert if DelBB is nullptr or has predecessors.
-  void validateDeleteBB(BasicBlock *DelBB);
-
-  /// Returns true if at least one BasicBlock is deleted.
-  bool forceFlushDeletedBB();
-
-  /// Deduplicate and remove unnecessary updates (no-ops) when using Lazy
-  /// UpdateStrategy. Returns true if the update is queued for update.
-  bool applyLazyUpdate(DominatorTree::UpdateKind Kind, BasicBlock *From,
-                       BasicBlock *To);
-
-  /// Helper function to apply all pending DomTree updates.
-  void applyDomTreeUpdates();
-
-  /// Helper function to apply all pending PostDomTree updates.
-  void applyPostDomTreeUpdates();
-
-  /// Helper function to flush deleted BasicBlocks if all available
-  /// trees are up-to-date.
-  void tryFlushDeletedBB();
-
-  /// Drop all updates applied by all available trees and delete BasicBlocks if
-  /// all available trees are up-to-date.
-  void dropOutOfDateUpdates();
-
-  /// Erase Basic Block node that has been unlinked from Function
-  /// in the DomTree and PostDomTree.
-  void eraseDelBBNode(BasicBlock *DelBB);
-
-  /// Returns true if the update appears in the LLVM IR.
-  /// It is used to check whether an update is valid in
-  /// insertEdge/deleteEdge or is unnecessary in the batch update.
-  bool isUpdateValid(DominatorTree::UpdateType Update) const;
-
-  /// Returns true if the update is self dominance.
-  bool isSelfDominance(DominatorTree::UpdateType Update) const;
-};
-} // namespace llvm
-
-#endif // LLVM_DOMTREEUPDATER_H
diff --git a/include/llvm/IR/Dominators.h b/include/llvm/IR/Dominators.h
index f7da47d07663..fef1c6abf8c2 100644
--- a/include/llvm/IR/Dominators.h
+++ b/include/llvm/IR/Dominators.h
@@ -1,9 +1,8 @@
 //===- Dominators.h - Dominator Info Calculation ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h
index 630f47e8bb57..7fa61e12f431 100644
--- a/include/llvm/IR/Function.h
+++ b/include/llvm/IR/Function.h
@@ -1,9 +1,8 @@
 //===- llvm/Function.h - Class to represent a single function ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -297,15 +296,18 @@ public:
 
   /// Get the entry count for this function.
   ///
-  /// Entry count is the number of times the function was executed based on
-  /// pgo data.
-  ProfileCount getEntryCount() const;
+  /// Entry count is the number of times the function was executed.
+  /// When AllowSynthetic is false, only pgo_data will be returned.
+  ProfileCount getEntryCount(bool AllowSynthetic = false) const;
 
   /// Return true if the function is annotated with profile data.
   ///
   /// Presence of entry counts from a profile run implies the function has
-  /// profile annotations.
-  bool hasProfileData() const { return getEntryCount().hasValue(); }
+  /// profile annotations. If IncludeSynthetic is false, only return true
+  /// when the profile data is real.
+  bool hasProfileData(bool IncludeSynthetic = false) const {
+    return getEntryCount(IncludeSynthetic).hasValue();
+  }
 
   /// Returns the set of GUIDs that needs to be imported to the function for
   /// sample PGO, to enable the same inlines as the profiled optimized binary.
@@ -399,6 +401,11 @@ public:
     return getAttributes().hasParamAttribute(ArgNo, Kind);
   }
 
+  /// gets the specified attribute from the list of attributes.
+  Attribute getParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const {
+    return getAttributes().getParamAttr(ArgNo, Kind);
+  }
+
   /// gets the attribute from the list of attributes.
   Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
     return AttributeSets.getAttribute(i, Kind);
@@ -429,6 +436,12 @@ public:
     return AttributeSets.getParamAlignment(ArgNo);
   }
 
+  /// Extract the byval type for a parameter.
+  Type *getParamByValType(unsigned ArgNo) const {
+    Type *Ty = AttributeSets.getParamByValType(ArgNo);
+    return Ty ? Ty : (arg_begin() + ArgNo)->getType()->getPointerElementType();
+  }
+
   /// Extract the number of dereferenceable bytes for a call or
   /// parameter (0=unknown).
   /// @param i AttributeList index, referring to a return value or argument.
@@ -551,6 +564,14 @@ public:
     addFnAttr(Attribute::Speculatable);
   }
 
+  /// Determine if the call might deallocate memory.
+  bool doesNotFreeMemory() const {
+    return onlyReadsMemory() || hasFnAttribute(Attribute::NoFree);
+  }
+  void setDoesNotFreeMemory() {
+    addFnAttr(Attribute::NoFree);
+  }
+
   /// Determine if the function is known not to recurse, directly or
   /// indirectly.
   bool doesNotRecurse() const {
@@ -591,12 +612,15 @@ public:
     addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
   }
 
+  /// Do not optimize this function (-O0).
+  bool hasOptNone() const { return hasFnAttribute(Attribute::OptimizeNone); }
+
   /// Optimize this function for minimum size (-Oz).
-  bool optForMinSize() const { return hasFnAttribute(Attribute::MinSize); }
+  bool hasMinSize() const { return hasFnAttribute(Attribute::MinSize); }
 
   /// Optimize this function for size (-Os) or minimum size (-Oz).
-  bool optForSize() const {
-    return hasFnAttribute(Attribute::OptimizeForSize) || optForMinSize();
+  bool hasOptSize() const {
+    return hasFnAttribute(Attribute::OptimizeForSize) || hasMinSize();
   }
 
   /// copyAttributesFrom - copy all additional attributes (those not needed to
diff --git a/include/llvm/IR/GVMaterializer.h b/include/llvm/IR/GVMaterializer.h
index 675abeb6ec3a..d62da41ebc29 100644
--- a/include/llvm/IR/GVMaterializer.h
+++ b/include/llvm/IR/GVMaterializer.h
@@ -1,9 +1,8 @@
 //===- GVMaterializer.h - Interface for GV materializers --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/GetElementPtrTypeIterator.h b/include/llvm/IR/GetElementPtrTypeIterator.h
index 3c143ea5f703..9b257abc7c1f 100644
--- a/include/llvm/IR/GetElementPtrTypeIterator.h
+++ b/include/llvm/IR/GetElementPtrTypeIterator.h
@@ -1,9 +1,8 @@
 //===- GetElementPtrTypeIterator.h ------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/GlobalAlias.h b/include/llvm/IR/GlobalAlias.h
index 450583baaa3c..3cd405701300 100644
--- a/include/llvm/IR/GlobalAlias.h
+++ b/include/llvm/IR/GlobalAlias.h
@@ -1,9 +1,8 @@
 //===-------- llvm/GlobalAlias.h - GlobalAlias class ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/GlobalIFunc.h b/include/llvm/IR/GlobalIFunc.h
index ef51315a6f5d..bc0d3c053cce 100644
--- a/include/llvm/IR/GlobalIFunc.h
+++ b/include/llvm/IR/GlobalIFunc.h
@@ -1,9 +1,8 @@
 //===-------- llvm/GlobalIFunc.h - GlobalIFunc class ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/include/llvm/IR/GlobalIndirectSymbol.h b/include/llvm/IR/GlobalIndirectSymbol.h
index 22c00686c549..8bc3f90b94aa 100644
--- a/include/llvm/IR/GlobalIndirectSymbol.h
+++ b/include/llvm/IR/GlobalIndirectSymbol.h
@@ -1,9 +1,8 @@
 //===- llvm/GlobalIndirectSymbol.h - GlobalIndirectSymbol class -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/GlobalObject.h b/include/llvm/IR/GlobalObject.h
index 1fd3568100c2..b8ab6140ebe7 100644
--- a/include/llvm/IR/GlobalObject.h
+++ b/include/llvm/IR/GlobalObject.h
@@ -1,9 +1,8 @@
 //===-- llvm/GlobalObject.h - Class to represent global objects -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h
index c07d4051c803..2209881dbda6 100644
--- a/include/llvm/IR/GlobalValue.h
+++ b/include/llvm/IR/GlobalValue.h
@@ -1,9 +1,8 @@
 //===-- llvm/GlobalValue.h - Class to represent a global value --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -80,15 +79,15 @@ protected:
         ValueType(Ty), Visibility(DefaultVisibility),
         UnnamedAddrVal(unsigned(UnnamedAddr::None)),
         DllStorageClass(DefaultStorageClass), ThreadLocal(NotThreadLocal),
-        HasLLVMReservedName(false), IsDSOLocal(false), IntID((Intrinsic::ID)0U),
-        Parent(nullptr) {
+        HasLLVMReservedName(false), IsDSOLocal(false), HasPartition(false),
+        IntID((Intrinsic::ID)0U), Parent(nullptr) {
     setLinkage(Linkage);
     setName(Name);
   }
 
   Type *ValueType;
 
-  static const unsigned GlobalValueSubClassDataBits = 17;
+  static const unsigned GlobalValueSubClassDataBits = 16;
 
   // All bitfields use unsigned as the underlying type so that MSVC will pack
   // them.
@@ -109,9 +108,13 @@ protected:
   /// definition cannot be runtime preempted.
   unsigned IsDSOLocal : 1;
 
+  /// True if this symbol has a partition name assigned (see
+  /// https://lld.llvm.org/Partitions.html).
+  unsigned HasPartition : 1;
+
 private:
   // Give subclasses access to what otherwise would be wasted padding.
-  // (17 + 4 + 2 + 2 + 2 + 3 + 1 + 1) == 32.
+  // (16 + 4 + 2 + 2 + 2 + 3 + 1 + 1 + 1) == 32.
   unsigned SubClassData : GlobalValueSubClassDataBits;
 
   friend class Constant;
@@ -281,6 +284,12 @@ public:
     return IsDSOLocal;
   }
 
+  bool hasPartition() const {
+    return HasPartition;
+  }
+  StringRef getPartition() const;
+  void setPartition(StringRef Part);
+
   static LinkageTypes getLinkOnceLinkage(bool ODR) {
     return ODR ? LinkOnceODRLinkage : LinkOnceAnyLinkage;
   }
diff --git a/include/llvm/IR/GlobalVariable.h b/include/llvm/IR/GlobalVariable.h
index 03b9ec46ebb4..2e2c8c477913 100644
--- a/include/llvm/IR/GlobalVariable.h
+++ b/include/llvm/IR/GlobalVariable.h
@@ -1,9 +1,8 @@
 //===-- llvm/GlobalVariable.h - GlobalVariable class ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h
index fac2ff46c453..a74364dffb2e 100644
--- a/include/llvm/IR/IRBuilder.h
+++ b/include/llvm/IR/IRBuilder.h
@@ -1,9 +1,8 @@
 //===- llvm/IRBuilder.h - Builder for LLVM Instructions ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -32,7 +31,7 @@
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
@@ -97,12 +96,18 @@ protected:
   MDNode *DefaultFPMathTag;
   FastMathFlags FMF;
 
+  bool IsFPConstrained;
+  ConstrainedFPIntrinsic::ExceptionBehavior DefaultConstrainedExcept;
+  ConstrainedFPIntrinsic::RoundingMode DefaultConstrainedRounding;
+
   ArrayRef<OperandBundleDef> DefaultOperandBundles;
 
 public:
   IRBuilderBase(LLVMContext &context, MDNode *FPMathTag = nullptr,
                 ArrayRef<OperandBundleDef> OpBundles = None)
-      : Context(context), DefaultFPMathTag(FPMathTag),
+      : Context(context), DefaultFPMathTag(FPMathTag), IsFPConstrained(false),
+        DefaultConstrainedExcept(ConstrainedFPIntrinsic::ebStrict),
+        DefaultConstrainedRounding(ConstrainedFPIntrinsic::rmDynamic),
         DefaultOperandBundles(OpBundles) {
     ClearInsertionPoint();
   }
@@ -219,6 +224,37 @@ public:
   /// Set the fast-math flags to be used with generated fp-math operators
   void setFastMathFlags(FastMathFlags NewFMF) { FMF = NewFMF; }
 
+  /// Enable/Disable use of constrained floating point math. When
+  /// enabled the CreateF<op>() calls instead create constrained
+  /// floating point intrinsic calls. Fast math flags are unaffected
+  /// by this setting.
+  void setIsFPConstrained(bool IsCon) { IsFPConstrained = IsCon; }
+
+  /// Query for the use of constrained floating point math
+  bool getIsFPConstrained() { return IsFPConstrained; }
+
+  /// Set the exception handling to be used with constrained floating point
+  void setDefaultConstrainedExcept(
+      ConstrainedFPIntrinsic::ExceptionBehavior NewExcept) {
+    DefaultConstrainedExcept = NewExcept;
+  }
+
+  /// Set the rounding mode handling to be used with constrained floating point
+  void setDefaultConstrainedRounding(
+      ConstrainedFPIntrinsic::RoundingMode NewRounding) {
+    DefaultConstrainedRounding = NewRounding;
+  }
+
+  /// Get the exception handling used with constrained floating point
+  ConstrainedFPIntrinsic::ExceptionBehavior getDefaultConstrainedExcept() {
+    return DefaultConstrainedExcept;
+  }
+
+  /// Get the rounding mode handling used with constrained floating point
+  ConstrainedFPIntrinsic::RoundingMode getDefaultConstrainedRounding() {
+    return DefaultConstrainedRounding;
+  }
+
   //===--------------------------------------------------------------------===//
   // RAII helpers.
   //===--------------------------------------------------------------------===//
@@ -906,20 +942,20 @@ public:
                   Name);
   }
 
-  InvokeInst *CreateInvoke(Function *Callee, BasicBlock *NormalDest,
+  InvokeInst *CreateInvoke(FunctionCallee Callee, BasicBlock *NormalDest,
                            BasicBlock *UnwindDest, ArrayRef<Value *> Args,
                            ArrayRef<OperandBundleDef> OpBundles,
                            const Twine &Name = "") {
-    return CreateInvoke(Callee->getFunctionType(), Callee, NormalDest,
-                        UnwindDest, Args, OpBundles, Name);
+    return CreateInvoke(Callee.getFunctionType(), Callee.getCallee(),
+                        NormalDest, UnwindDest, Args, OpBundles, Name);
   }
 
-  InvokeInst *CreateInvoke(Function *Callee, BasicBlock *NormalDest,
+  InvokeInst *CreateInvoke(FunctionCallee Callee, BasicBlock *NormalDest,
                            BasicBlock *UnwindDest,
                            ArrayRef<Value *> Args = None,
                            const Twine &Name = "") {
-    return CreateInvoke(Callee->getFunctionType(), Callee, NormalDest,
-                        UnwindDest, Args, Name);
+    return CreateInvoke(Callee.getFunctionType(), Callee.getCallee(),
+                        NormalDest, UnwindDest, Args, Name);
   }
 
   // Deprecated [opaque pointer types]
@@ -944,6 +980,42 @@ public:
         Callee, NormalDest, UnwindDest, Args, Name);
   }
 
+  /// \brief Create a callbr instruction.
+  CallBrInst *CreateCallBr(FunctionType *Ty, Value *Callee,
+                           BasicBlock *DefaultDest,
+                           ArrayRef<BasicBlock *> IndirectDests,
+                           ArrayRef<Value *> Args = None,
+                           const Twine &Name = "") {
+    return Insert(CallBrInst::Create(Ty, Callee, DefaultDest, IndirectDests,
+                                     Args), Name);
+  }
+  CallBrInst *CreateCallBr(FunctionType *Ty, Value *Callee,
+                           BasicBlock *DefaultDest,
+                           ArrayRef<BasicBlock *> IndirectDests,
+                           ArrayRef<Value *> Args,
+                           ArrayRef<OperandBundleDef> OpBundles,
+                           const Twine &Name = "") {
+    return Insert(
+        CallBrInst::Create(Ty, Callee, DefaultDest, IndirectDests, Args,
+                           OpBundles), Name);
+  }
+
+  CallBrInst *CreateCallBr(FunctionCallee Callee, BasicBlock *DefaultDest,
+                           ArrayRef<BasicBlock *> IndirectDests,
+                           ArrayRef<Value *> Args = None,
+                           const Twine &Name = "") {
+    return CreateCallBr(Callee.getFunctionType(), Callee.getCallee(),
+                        DefaultDest, IndirectDests, Args, Name);
+  }
+  CallBrInst *CreateCallBr(FunctionCallee Callee, BasicBlock *DefaultDest,
+                           ArrayRef<BasicBlock *> IndirectDests,
+                           ArrayRef<Value *> Args,
+                           ArrayRef<OperandBundleDef> OpBundles,
+                           const Twine &Name = "") {
+    return CreateCallBr(Callee.getFunctionType(), Callee.getCallee(),
+                        DefaultDest, IndirectDests, Args, Name);
+  }
+
   ResumeInst *CreateResume(Value *Exn) {
     return Insert(ResumeInst::Create(Exn));
   }
@@ -1004,12 +1076,44 @@ private:
   }
 
   Value *foldConstant(Instruction::BinaryOps Opc, Value *L,
-                      Value *R, const Twine &Name = nullptr) const {
+                      Value *R, const Twine &Name) const {
     auto *LC = dyn_cast<Constant>(L);
     auto *RC = dyn_cast<Constant>(R);
     return (LC && RC) ? Insert(Folder.CreateBinOp(Opc, LC, RC), Name) : nullptr;
   }
 
+  Value *getConstrainedFPRounding(
+      Optional<ConstrainedFPIntrinsic::RoundingMode> Rounding) {
+    ConstrainedFPIntrinsic::RoundingMode UseRounding =
+        DefaultConstrainedRounding;
+
+    if (Rounding.hasValue())
+      UseRounding = Rounding.getValue();
+
+    Optional<StringRef> RoundingStr =
+        ConstrainedFPIntrinsic::RoundingModeToStr(UseRounding);
+    assert(RoundingStr.hasValue() && "Garbage strict rounding mode!");
+    auto *RoundingMDS = MDString::get(Context, RoundingStr.getValue());
+
+    return MetadataAsValue::get(Context, RoundingMDS);
+  }
+
+  Value *getConstrainedFPExcept(
+      Optional<ConstrainedFPIntrinsic::ExceptionBehavior> Except) {
+    ConstrainedFPIntrinsic::ExceptionBehavior UseExcept =
+        DefaultConstrainedExcept;
+
+    if (Except.hasValue())
+      UseExcept = Except.getValue();
+
+    Optional<StringRef> ExceptStr =
+        ConstrainedFPIntrinsic::ExceptionBehaviorToStr(UseExcept);
+    assert(ExceptStr.hasValue() && "Garbage strict exception behavior!");
+    auto *ExceptMDS = MDString::get(Context, ExceptStr.getValue());
+
+    return MetadataAsValue::get(Context, ExceptMDS);
+  }
+
 public:
   Value *CreateAdd(Value *LHS, Value *RHS, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
@@ -1179,6 +1283,14 @@ public:
     return CreateAnd(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
   }
 
+  Value *CreateAnd(ArrayRef<Value*> Ops) {
+    assert(!Ops.empty());
+    Value *Accum = Ops[0];
+    for (unsigned i = 1; i < Ops.size(); i++)
+      Accum = CreateAnd(Accum, Ops[i]);
+    return Accum;
+  }
+
   Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (auto *RC = dyn_cast<Constant>(RHS)) {
       if (RC->isNullValue())
@@ -1197,6 +1309,14 @@ public:
     return CreateOr(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
   }
 
+  Value *CreateOr(ArrayRef<Value*> Ops) {
+    assert(!Ops.empty());
+    Value *Accum = Ops[0];
+    for (unsigned i = 1; i < Ops.size(); i++)
+      Accum = CreateOr(Accum, Ops[i]);
+    return Accum;
+  }
+
   Value *CreateXor(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Value *V = foldConstant(Instruction::Xor, LHS, RHS, Name)) return V;
     return Insert(BinaryOperator::CreateXor(LHS, RHS), Name);
@@ -1212,6 +1332,10 @@ public:
 
   Value *CreateFAdd(Value *L, Value *R, const Twine &Name = "",
                     MDNode *FPMD = nullptr) {
+    if (IsFPConstrained)
+      return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fadd,
+                                      L, R, nullptr, Name, FPMD);
+
     if (Value *V = foldConstant(Instruction::FAdd, L, R, Name)) return V;
     Instruction *I = setFPAttrs(BinaryOperator::CreateFAdd(L, R), FPMD, FMF);
     return Insert(I, Name);
@@ -1221,6 +1345,10 @@ public:
   /// default FMF.
   Value *CreateFAddFMF(Value *L, Value *R, Instruction *FMFSource,
                        const Twine &Name = "") {
+    if (IsFPConstrained)
+      return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fadd,
+                                      L, R, FMFSource, Name);
+
     if (Value *V = foldConstant(Instruction::FAdd, L, R, Name)) return V;
     Instruction *I = setFPAttrs(BinaryOperator::CreateFAdd(L, R), nullptr,
                                 FMFSource->getFastMathFlags());
@@ -1229,6 +1357,10 @@ public:
 
   Value *CreateFSub(Value *L, Value *R, const Twine &Name = "",
                     MDNode *FPMD = nullptr) {
+    if (IsFPConstrained)
+      return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fsub,
+                                      L, R, nullptr, Name, FPMD);
+
     if (Value *V = foldConstant(Instruction::FSub, L, R, Name)) return V;
     Instruction *I = setFPAttrs(BinaryOperator::CreateFSub(L, R), FPMD, FMF);
     return Insert(I, Name);
@@ -1238,6 +1370,10 @@ public:
   /// default FMF.
   Value *CreateFSubFMF(Value *L, Value *R, Instruction *FMFSource,
                        const Twine &Name = "") {
+    if (IsFPConstrained)
+      return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fsub,
+                                      L, R, FMFSource, Name);
+
     if (Value *V = foldConstant(Instruction::FSub, L, R, Name)) return V;
     Instruction *I = setFPAttrs(BinaryOperator::CreateFSub(L, R), nullptr,
                                 FMFSource->getFastMathFlags());
@@ -1246,6 +1382,10 @@ public:
 
   Value *CreateFMul(Value *L, Value *R, const Twine &Name = "",
                     MDNode *FPMD = nullptr) {
+    if (IsFPConstrained)
+      return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fmul,
+                                      L, R, nullptr, Name, FPMD);
+
     if (Value *V = foldConstant(Instruction::FMul, L, R, Name)) return V;
     Instruction *I = setFPAttrs(BinaryOperator::CreateFMul(L, R), FPMD, FMF);
     return Insert(I, Name);
@@ -1255,6 +1395,10 @@ public:
   /// default FMF.
   Value *CreateFMulFMF(Value *L, Value *R, Instruction *FMFSource,
                        const Twine &Name = "") {
+    if (IsFPConstrained)
+      return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fmul,
+                                      L, R, FMFSource, Name);
+
     if (Value *V = foldConstant(Instruction::FMul, L, R, Name)) return V;
     Instruction *I = setFPAttrs(BinaryOperator::CreateFMul(L, R), nullptr,
                                 FMFSource->getFastMathFlags());
@@ -1263,6 +1407,10 @@ public:
 
   Value *CreateFDiv(Value *L, Value *R, const Twine &Name = "",
                     MDNode *FPMD = nullptr) {
+    if (IsFPConstrained)
+      return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fdiv,
+                                      L, R, nullptr, Name, FPMD);
+
     if (Value *V = foldConstant(Instruction::FDiv, L, R, Name)) return V;
     Instruction *I = setFPAttrs(BinaryOperator::CreateFDiv(L, R), FPMD, FMF);
     return Insert(I, Name);
@@ -1272,6 +1420,10 @@ public:
   /// default FMF.
   Value *CreateFDivFMF(Value *L, Value *R, Instruction *FMFSource,
                        const Twine &Name = "") {
+    if (IsFPConstrained)
+      return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fdiv,
+                                      L, R, FMFSource, Name);
+
     if (Value *V = foldConstant(Instruction::FDiv, L, R, Name)) return V;
     Instruction *I = setFPAttrs(BinaryOperator::CreateFDiv(L, R), nullptr,
                                 FMFSource->getFastMathFlags());
@@ -1280,6 +1432,10 @@ public:
 
   Value *CreateFRem(Value *L, Value *R, const Twine &Name = "",
                     MDNode *FPMD = nullptr) {
+    if (IsFPConstrained)
+      return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_frem,
+                                      L, R, nullptr, Name, FPMD);
+
     if (Value *V = foldConstant(Instruction::FRem, L, R, Name)) return V;
     Instruction *I = setFPAttrs(BinaryOperator::CreateFRem(L, R), FPMD, FMF);
     return Insert(I, Name);
@@ -1289,6 +1445,10 @@ public:
   /// default FMF.
   Value *CreateFRemFMF(Value *L, Value *R, Instruction *FMFSource,
                        const Twine &Name = "") {
+    if (IsFPConstrained)
+      return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_frem,
+                                      L, R, FMFSource, Name);
+
     if (Value *V = foldConstant(Instruction::FRem, L, R, Name)) return V;
     Instruction *I = setFPAttrs(BinaryOperator::CreateFRem(L, R), nullptr,
                                 FMFSource->getFastMathFlags());
@@ -1305,6 +1465,23 @@ public:
     return Insert(BinOp, Name);
   }
 
+  CallInst *CreateConstrainedFPBinOp(
+      Intrinsic::ID ID, Value *L, Value *R, Instruction *FMFSource = nullptr,
+      const Twine &Name = "", MDNode *FPMathTag = nullptr,
+      Optional<ConstrainedFPIntrinsic::RoundingMode> Rounding = None,
+      Optional<ConstrainedFPIntrinsic::ExceptionBehavior> Except = None) {
+    Value *RoundingV = getConstrainedFPRounding(Rounding);
+    Value *ExceptV = getConstrainedFPExcept(Except);
+
+    FastMathFlags UseFMF = FMF;
+    if (FMFSource)
+      UseFMF = FMFSource->getFastMathFlags();
+
+    CallInst *C = CreateIntrinsic(ID, {L->getType()},
+                                  {L, R, RoundingV, ExceptV}, nullptr, Name);
+    return cast<CallInst>(setFPAttrs(C, FPMathTag, UseFMF));
+  }
+
   Value *CreateNeg(Value *V, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
     if (auto *VC = dyn_cast<Constant>(V))
@@ -1331,12 +1508,54 @@ public:
                   Name);
   }
 
+  /// Copy fast-math-flags from an instruction rather than using the builder's
+  /// default FMF.
+  Value *CreateFNegFMF(Value *V, Instruction *FMFSource,
+                       const Twine &Name = "") {
+   if (auto *VC = dyn_cast<Constant>(V))
+     return Insert(Folder.CreateFNeg(VC), Name);
+   // TODO: This should return UnaryOperator::CreateFNeg(...) once we are
+   // confident that they are optimized sufficiently.
+   return Insert(setFPAttrs(BinaryOperator::CreateFNeg(V), nullptr,
+                            FMFSource->getFastMathFlags()),
+                 Name);
+  }
+
   Value *CreateNot(Value *V, const Twine &Name = "") {
     if (auto *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateNot(VC), Name);
     return Insert(BinaryOperator::CreateNot(V), Name);
   }
 
+  Value *CreateUnOp(Instruction::UnaryOps Opc,
+                    Value *V, const Twine &Name = "",
+                    MDNode *FPMathTag = nullptr) {
+    if (auto *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreateUnOp(Opc, VC), Name);
+    Instruction *UnOp = UnaryOperator::Create(Opc, V);
+    if (isa<FPMathOperator>(UnOp))
+      UnOp = setFPAttrs(UnOp, FPMathTag, FMF);
+    return Insert(UnOp, Name);
+  }
+
+  /// Create either a UnaryOperator or BinaryOperator depending on \p Opc.
+  /// Correct number of operands must be passed accordingly.
+  Value *CreateNAryOp(unsigned Opc, ArrayRef<Value *> Ops,
+                      const Twine &Name = "",
+                      MDNode *FPMathTag = nullptr) {
+    if (Instruction::isBinaryOp(Opc)) {
+      assert(Ops.size() == 2 && "Invalid number of operands!");
+      return CreateBinOp(static_cast<Instruction::BinaryOps>(Opc),
+                         Ops[0], Ops[1], Name, FPMathTag);
+    }
+    if (Instruction::isUnaryOp(Opc)) {
+      assert(Ops.size() == 1 && "Invalid number of operands!");
+      return CreateUnOp(static_cast<Instruction::UnaryOps>(Opc),
+                        Ops[0], Name, FPMathTag);
+    }
+    llvm_unreachable("Unexpected opcode!");
+  }
+
   //===--------------------------------------------------------------------===//
   // Instruction creation methods: Memory Instructions
   //===--------------------------------------------------------------------===//
@@ -1989,16 +2208,17 @@ public:
     return Insert(CI, Name);
   }
 
-  CallInst *CreateCall(Function *Callee, ArrayRef<Value *> Args = None,
+  CallInst *CreateCall(FunctionCallee Callee, ArrayRef<Value *> Args = None,
                        const Twine &Name = "", MDNode *FPMathTag = nullptr) {
-    return CreateCall(Callee->getFunctionType(), Callee, Args, Name, FPMathTag);
+    return CreateCall(Callee.getFunctionType(), Callee.getCallee(), Args, Name,
+                      FPMathTag);
   }
 
-  CallInst *CreateCall(Function *Callee, ArrayRef<Value *> Args,
+  CallInst *CreateCall(FunctionCallee Callee, ArrayRef<Value *> Args,
                        ArrayRef<OperandBundleDef> OpBundles,
                        const Twine &Name = "", MDNode *FPMathTag = nullptr) {
-    return CreateCall(Callee->getFunctionType(), Callee, Args, OpBundles, Name,
-                      FPMathTag);
+    return CreateCall(Callee.getFunctionType(), Callee.getCallee(), Args,
+                      OpBundles, Name, FPMathTag);
   }
 
   // Deprecated [opaque pointer types]
@@ -2031,6 +2251,8 @@ public:
       MDNode *Unpred = MDFrom->getMetadata(LLVMContext::MD_unpredictable);
       Sel = addBranchMetadata(Sel, Prof, Unpred);
     }
+    if (isa<FPMathOperator>(Sel))
+      Sel = cast<SelectInst>(setFPAttrs(Sel, nullptr /* MDNode* */, FMF));
     return Insert(Sel, Name);
   }
 
@@ -2231,6 +2453,74 @@ public:
     return V;
   }
 
+  Value *CreatePreserveArrayAccessIndex(Value *Base, unsigned Dimension,
+                                        unsigned LastIndex) {
+    assert(isa<PointerType>(Base->getType()) &&
+           "Invalid Base ptr type for preserve.array.access.index.");
+    auto *BaseType = Base->getType();
+
+    Value *LastIndexV = getInt32(LastIndex);
+    Constant *Zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
+    SmallVector<Value *, 4> IdxList;
+    for (unsigned I = 0; I < Dimension; ++I)
+      IdxList.push_back(Zero);
+    IdxList.push_back(LastIndexV);
+
+    Type *ResultType =
+        GetElementPtrInst::getGEPReturnType(Base, IdxList);
+
+    Module *M = BB->getParent()->getParent();
+    Function *FnPreserveArrayAccessIndex = Intrinsic::getDeclaration(
+        M, Intrinsic::preserve_array_access_index, {ResultType, BaseType});
+
+    Value *DimV = getInt32(Dimension);
+    CallInst *Fn =
+        CreateCall(FnPreserveArrayAccessIndex, {Base, DimV, LastIndexV});
+
+    return Fn;
+  }
+
+  Value *CreatePreserveUnionAccessIndex(Value *Base, unsigned FieldIndex,
+                                        MDNode *DbgInfo) {
+    assert(isa<PointerType>(Base->getType()) &&
+           "Invalid Base ptr type for preserve.union.access.index.");
+    auto *BaseType = Base->getType();
+
+    Module *M = BB->getParent()->getParent();
+    Function *FnPreserveUnionAccessIndex = Intrinsic::getDeclaration(
+        M, Intrinsic::preserve_union_access_index, {BaseType, BaseType});
+
+    Value *DIIndex = getInt32(FieldIndex);
+    CallInst *Fn =
+        CreateCall(FnPreserveUnionAccessIndex, {Base, DIIndex});
+    Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
+
+    return Fn;
+  }
+
+  Value *CreatePreserveStructAccessIndex(Value *Base, unsigned Index,
+                                         unsigned FieldIndex, MDNode *DbgInfo) {
+    assert(isa<PointerType>(Base->getType()) &&
+           "Invalid Base ptr type for preserve.struct.access.index.");
+    auto *BaseType = Base->getType();
+
+    Value *GEPIndex = getInt32(Index);
+    Constant *Zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
+    Type *ResultType =
+        GetElementPtrInst::getGEPReturnType(Base, {Zero, GEPIndex});
+
+    Module *M = BB->getParent()->getParent();
+    Function *FnPreserveStructAccessIndex = Intrinsic::getDeclaration(
+        M, Intrinsic::preserve_struct_access_index, {ResultType, BaseType});
+
+    Value *DIIndex = getInt32(FieldIndex);
+    CallInst *Fn = CreateCall(FnPreserveStructAccessIndex,
+                              {Base, GEPIndex, DIIndex});
+    Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
+
+    return Fn;
+  }
+
 private:
   /// Helper function that creates an assume intrinsic call that
   /// represents an alignment assumption on the provided Ptr, Mask, Type
@@ -2280,10 +2570,11 @@ public:
                                       Value **TheCheck = nullptr) {
     assert(isa<PointerType>(PtrValue->getType()) &&
            "trying to create an alignment assumption on a non-pointer?");
+    assert(Alignment != 0 && "Invalid Alignment");
     auto *PtrTy = cast<PointerType>(PtrValue->getType());
     Type *IntPtrTy = getIntPtrTy(DL, PtrTy->getAddressSpace());
 
-    Value *Mask = ConstantInt::get(IntPtrTy, Alignment > 0 ? Alignment - 1 : 0);
+    Value *Mask = ConstantInt::get(IntPtrTy, Alignment - 1);
     return CreateAlignmentAssumptionHelper(DL, PtrValue, Mask, IntPtrTy,
                                            OffsetValue, TheCheck);
   }
@@ -2310,15 +2601,10 @@ public:
     Type *IntPtrTy = getIntPtrTy(DL, PtrTy->getAddressSpace());
 
     if (Alignment->getType() != IntPtrTy)
-      Alignment = CreateIntCast(Alignment, IntPtrTy, /*isSigned*/ true,
+      Alignment = CreateIntCast(Alignment, IntPtrTy, /*isSigned*/ false,
                                 "alignmentcast");
-    Value *IsPositive =
-        CreateICmp(CmpInst::ICMP_SGT, Alignment,
-                   ConstantInt::get(Alignment->getType(), 0), "ispositive");
-    Value *PositiveMask =
-        CreateSub(Alignment, ConstantInt::get(IntPtrTy, 1), "positivemask");
-    Value *Mask = CreateSelect(IsPositive, PositiveMask,
-                               ConstantInt::get(IntPtrTy, 0), "mask");
+
+    Value *Mask = CreateSub(Alignment, ConstantInt::get(IntPtrTy, 1), "mask");
 
     return CreateAlignmentAssumptionHelper(DL, PtrValue, Mask, IntPtrTy,
                                            OffsetValue, TheCheck);
diff --git a/include/llvm/IR/IRPrintingPasses.h b/include/llvm/IR/IRPrintingPasses.h
index 75f80567dbd5..3be9449c1a93 100644
--- a/include/llvm/IR/IRPrintingPasses.h
+++ b/include/llvm/IR/IRPrintingPasses.h
@@ -1,9 +1,8 @@
 //===- IRPrintingPasses.h - Passes to print out IR constructs ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/IR/InlineAsm.h b/include/llvm/IR/InlineAsm.h
index 1519a45d59e9..2aac807623a9 100644
--- a/include/llvm/IR/InlineAsm.h
+++ b/include/llvm/IR/InlineAsm.h
@@ -1,9 +1,8 @@
 //===- llvm/InlineAsm.h - Class to represent inline asm strings -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/InstIterator.h b/include/llvm/IR/InstIterator.h
index 2988fc935dd5..054fe4e9cbe9 100644
--- a/include/llvm/IR/InstIterator.h
+++ b/include/llvm/IR/InstIterator.h
@@ -1,9 +1,8 @@
 //===- InstIterator.h - Classes for inst iteration --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/InstVisitor.h b/include/llvm/IR/InstVisitor.h
index c5b4c6f71d7d..fbeb2caf14e6 100644
--- a/include/llvm/IR/InstVisitor.h
+++ b/include/llvm/IR/InstVisitor.h
@@ -1,9 +1,8 @@
 //===- InstVisitor.h - Instruction visitor templates ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -218,14 +217,17 @@ public:
   RetTy visitVACopyInst(VACopyInst &I)            { DELEGATE(IntrinsicInst); }
   RetTy visitIntrinsicInst(IntrinsicInst &I)      { DELEGATE(CallInst); }
 
-  // Call and Invoke are slightly different as they delegate first through
-  // a generic CallSite visitor.
+  // Call, Invoke and CallBr are slightly different as they delegate first
+  // through a generic CallSite visitor.
   RetTy visitCallInst(CallInst &I) {
     return static_cast<SubClass*>(this)->visitCallSite(&I);
   }
   RetTy visitInvokeInst(InvokeInst &I) {
     return static_cast<SubClass*>(this)->visitCallSite(&I);
   }
+  RetTy visitCallBrInst(CallBrInst &I) {
+    return static_cast<SubClass *>(this)->visitCallSite(&I);
+  }
 
   // While terminators don't have a distinct type modeling them, we support
   // intercepting them with dedicated a visitor callback.
@@ -271,14 +273,14 @@ public:
   // The next level delegation for `CallBase` is slightly more complex in order
   // to support visiting cases where the call is also a terminator.
   RetTy visitCallBase(CallBase &I) {
-    if (isa<InvokeInst>(I))
+    if (isa<InvokeInst>(I) || isa<CallBrInst>(I))
       return static_cast<SubClass *>(this)->visitTerminator(I);
 
     DELEGATE(Instruction);
   }
 
-  // Provide a legacy visitor for a 'callsite' that visits both calls and
-  // invokes.
+  // Provide a legacy visitor for a 'callsite' that visits calls, invokes,
+  // and calbrs.
   //
   // Prefer overriding the type system based `CallBase` instead.
   RetTy visitCallSite(CallSite CS) {
diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h
index 3f384a6ee40c..ca419b50da6b 100644
--- a/include/llvm/IR/InstrTypes.h
+++ b/include/llvm/IR/InstrTypes.h
@@ -1,9 +1,8 @@
 //===- llvm/InstrTypes.h - Important Instruction subclasses -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -28,6 +27,7 @@
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/OperandTraits.h"
@@ -77,7 +77,8 @@ public:
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::Alloca ||
+    return I->isUnaryOp() ||
+           I->getOpcode() == Instruction::Alloca ||
            I->getOpcode() == Instruction::Load ||
            I->getOpcode() == Instruction::VAArg ||
            I->getOpcode() == Instruction::ExtractValue ||
@@ -95,6 +96,91 @@ struct OperandTraits<UnaryInstruction> :
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryInstruction, Value)
 
+//===----------------------------------------------------------------------===//
+//                                UnaryOperator Class
+//===----------------------------------------------------------------------===//
+
+class UnaryOperator : public UnaryInstruction {
+  void AssertOK();
+
+protected:
+  UnaryOperator(UnaryOps iType, Value *S, Type *Ty,
+                const Twine &Name, Instruction *InsertBefore);
+  UnaryOperator(UnaryOps iType, Value *S, Type *Ty,
+                const Twine &Name, BasicBlock *InsertAtEnd);
+
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+
+  UnaryOperator *cloneImpl() const;
+
+public:
+
+  /// Construct a unary instruction, given the opcode and an operand.
+  /// Optionally (if InstBefore is specified) insert the instruction
+  /// into a BasicBlock right before the specified instruction.  The specified
+  /// Instruction is allowed to be a dereferenced end iterator.
+  ///
+  static UnaryOperator *Create(UnaryOps Op, Value *S,
+                               const Twine &Name = Twine(),
+                               Instruction *InsertBefore = nullptr);
+
+  /// Construct a unary instruction, given the opcode and an operand.
+  /// Also automatically insert this instruction to the end of the
+  /// BasicBlock specified.
+  ///
+  static UnaryOperator *Create(UnaryOps Op, Value *S,
+                               const Twine &Name,
+                               BasicBlock *InsertAtEnd);
+
+  /// These methods just forward to Create, and are useful when you
+  /// statically know what type of instruction you're going to create.  These
+  /// helpers just save some typing.
+#define HANDLE_UNARY_INST(N, OPC, CLASS) \
+  static UnaryOperator *Create##OPC(Value *V, const Twine &Name = "") {\
+    return Create(Instruction::OPC, V, Name);\
+  }
+#include "llvm/IR/Instruction.def"
+#define HANDLE_UNARY_INST(N, OPC, CLASS) \
+  static UnaryOperator *Create##OPC(Value *V, const Twine &Name, \
+                                    BasicBlock *BB) {\
+    return Create(Instruction::OPC, V, Name, BB);\
+  }
+#include "llvm/IR/Instruction.def"
+#define HANDLE_UNARY_INST(N, OPC, CLASS) \
+  static UnaryOperator *Create##OPC(Value *V, const Twine &Name, \
+                                    Instruction *I) {\
+    return Create(Instruction::OPC, V, Name, I);\
+  }
+#include "llvm/IR/Instruction.def"
+
+  static UnaryOperator *CreateWithCopiedFlags(UnaryOps Opc,
+                                              Value *V,
+                                              Instruction *CopyO,
+                                              const Twine &Name = "") {
+    UnaryOperator *UO = Create(Opc, V, Name);
+    UO->copyIRFlags(CopyO);
+    return UO;
+  }
+
+  static UnaryOperator *CreateFNegFMF(Value *Op, Instruction *FMFSource,
+                                      const Twine &Name = "") {
+    return CreateWithCopiedFlags(Instruction::FNeg, Op, FMFSource, Name);
+  }
+
+  UnaryOps getOpcode() const {
+    return static_cast<UnaryOps>(Instruction::getOpcode());
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const Instruction *I) {
+    return I->isUnaryOp();
+  }
+  static bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
 //===----------------------------------------------------------------------===//
 //                           BinaryOperator Class
 //===----------------------------------------------------------------------===//
@@ -162,42 +248,42 @@ public:
 
   static BinaryOperator *CreateWithCopiedFlags(BinaryOps Opc,
                                                Value *V1, Value *V2,
-                                               BinaryOperator *CopyBO,
+                                               Instruction *CopyO,
                                                const Twine &Name = "") {
     BinaryOperator *BO = Create(Opc, V1, V2, Name);
-    BO->copyIRFlags(CopyBO);
+    BO->copyIRFlags(CopyO);
     return BO;
   }
 
   static BinaryOperator *CreateFAddFMF(Value *V1, Value *V2,
-                                       BinaryOperator *FMFSource,
+                                       Instruction *FMFSource,
                                        const Twine &Name = "") {
     return CreateWithCopiedFlags(Instruction::FAdd, V1, V2, FMFSource, Name);
   }
   static BinaryOperator *CreateFSubFMF(Value *V1, Value *V2,
-                                       BinaryOperator *FMFSource,
+                                       Instruction *FMFSource,
                                        const Twine &Name = "") {
     return CreateWithCopiedFlags(Instruction::FSub, V1, V2, FMFSource, Name);
   }
   static BinaryOperator *CreateFMulFMF(Value *V1, Value *V2,
-                                       BinaryOperator *FMFSource,
+                                       Instruction *FMFSource,
                                        const Twine &Name = "") {
     return CreateWithCopiedFlags(Instruction::FMul, V1, V2, FMFSource, Name);
   }
   static BinaryOperator *CreateFDivFMF(Value *V1, Value *V2,
-                                       BinaryOperator *FMFSource,
+                                       Instruction *FMFSource,
                                        const Twine &Name = "") {
     return CreateWithCopiedFlags(Instruction::FDiv, V1, V2, FMFSource, Name);
   }
   static BinaryOperator *CreateFRemFMF(Value *V1, Value *V2,
-                                       BinaryOperator *FMFSource,
+                                       Instruction *FMFSource,
                                        const Twine &Name = "") {
     return CreateWithCopiedFlags(Instruction::FRem, V1, V2, FMFSource, Name);
   }
-  static BinaryOperator *CreateFNegFMF(Value *Op, BinaryOperator *FMFSource,
+  static BinaryOperator *CreateFNegFMF(Value *Op, Instruction *FMFSource,
                                        const Twine &Name = "") {
     Value *Zero = ConstantFP::getNegativeZero(Op->getType());
-    return CreateWithCopiedFlags(Instruction::FSub, Zero, Op, FMFSource);
+    return CreateWithCopiedFlags(Instruction::FSub, Zero, Op, FMFSource, Name);
   }
 
   static BinaryOperator *CreateNSW(BinaryOps Opc, Value *V1, Value *V2,
@@ -1033,16 +1119,23 @@ protected:
       return 0;
     case Instruction::Invoke:
       return 2;
+    case Instruction::CallBr:
+      return getNumSubclassExtraOperandsDynamic();
     }
     llvm_unreachable("Invalid opcode!");
   }
 
+  /// Get the number of extra operands for instructions that don't have a fixed
+  /// number of extra operands.
+  unsigned getNumSubclassExtraOperandsDynamic() const;
+
 public:
   using Instruction::getContext;
 
   static bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::Call ||
-           I->getOpcode() == Instruction::Invoke;
+           I->getOpcode() == Instruction::Invoke ||
+           I->getOpcode() == Instruction::CallBr;
   }
   static bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
@@ -1096,6 +1189,19 @@ public:
     return isDataOperand(&UI.getUse());
   }
 
+  /// Given a value use iterator, return the data operand corresponding to it.
+  /// Iterator must actually correspond to a data operand.
+  unsigned getDataOperandNo(Value::const_user_iterator UI) const {
+    return getDataOperandNo(&UI.getUse());
+  }
+
+  /// Given a use for a data operand, get the data operand number that
+  /// corresponds to it.
+  unsigned getDataOperandNo(const Use *U) const {
+    assert(isDataOperand(U) && "Data operand # out of range!");
+    return U - data_operands_begin();
+  }
+
   /// Return the iterator pointing to the beginning of the argument list.
   User::op_iterator arg_begin() { return op_begin(); }
   User::const_op_iterator arg_begin() const {
@@ -1199,6 +1305,13 @@ public:
     return const_cast<CallBase *>(this)->getCaller();
   }
 
+  /// Tests if this call site must be tail call optimized. Only a CallInst can
+  /// be tail call optimized.
+  bool isMustTailCall() const;
+
+  /// Tests if this call site is marked as a tail call.
+  bool isTailCall() const;
+
   /// Returns the intrinsic ID of the intrinsic called or
   /// Intrinsic::not_intrinsic if the called function is not an intrinsic, or if
   /// this is an indirect call.
@@ -1207,10 +1320,13 @@ public:
   void setCalledOperand(Value *V) { Op<CalledOperandOpEndIdx>() = V; }
 
   /// Sets the function called, including updating the function type.
-  void setCalledFunction(Value *Fn) {
-    setCalledFunction(
-        cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType()),
-        Fn);
+  void setCalledFunction(Function *Fn) {
+    setCalledFunction(Fn->getFunctionType(), Fn);
+  }
+
+  /// Sets the function called, including updating the function type.
+  void setCalledFunction(FunctionCallee Fn) {
+    setCalledFunction(Fn.getFunctionType(), Fn.getCallee());
   }
 
   /// Sets the function called, including updating to the specified function
@@ -1219,6 +1335,9 @@ public:
     this->FTy = FTy;
     assert(FTy == cast<FunctionType>(
                       cast<PointerType>(Fn->getType())->getElementType()));
+    // This function doesn't mutate the return type, only the function
+    // type. Seems broken, but I'm just gonna stick an assert in for now.
+    assert(getType() == FTy->getReturnType());
     setCalledOperand(Fn);
   }
 
@@ -1233,6 +1352,9 @@ public:
                                (ID << 2));
   }
 
+  /// Check if this call is an inline asm statement.
+  bool isInlineAsm() const { return isa<InlineAsm>(getCalledOperand()); }
+
   /// \name Attribute API
   ///
   /// These methods access and modify attributes on this call (including
@@ -1452,6 +1574,12 @@ public:
     return Attrs.getParamAlignment(ArgNo);
   }
 
+  /// Extract the byval type for a call or parameter.
+  Type *getParamByValType(unsigned ArgNo) const {
+    Type *Ty = Attrs.getParamByValType(ArgNo);
+    return Ty ? Ty : getArgOperand(ArgNo)->getType()->getPointerElementType();
+  }
+
   /// Extract the number of dereferenceable bytes for a call or
   /// parameter (0=unknown).
   uint64_t getDereferenceableBytes(unsigned i) const {
diff --git a/include/llvm/IR/Instruction.def b/include/llvm/IR/Instruction.def
index 58e4e2e1d6cc..41cdf613ad64 100644
--- a/include/llvm/IR/Instruction.def
+++ b/include/llvm/IR/Instruction.def
@@ -1,9 +1,8 @@
 //===-- llvm/Instruction.def - File that describes Instructions -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -135,89 +134,90 @@ HANDLE_TERM_INST  ( 7, Unreachable   , UnreachableInst)
 HANDLE_TERM_INST  ( 8, CleanupRet    , CleanupReturnInst)
 HANDLE_TERM_INST  ( 9, CatchRet      , CatchReturnInst)
 HANDLE_TERM_INST  (10, CatchSwitch   , CatchSwitchInst)
-  LAST_TERM_INST  (10)
+HANDLE_TERM_INST  (11, CallBr        , CallBrInst) // A call-site terminator
+  LAST_TERM_INST  (11)
 
 // Standard unary operators...
- FIRST_UNARY_INST(11)
-HANDLE_UNARY_INST(11, FNeg  , UnaryOperator)
-  LAST_UNARY_INST(11)
+ FIRST_UNARY_INST(12)
+HANDLE_UNARY_INST(12, FNeg  , UnaryOperator)
+  LAST_UNARY_INST(12)
 
 // Standard binary operators...
- FIRST_BINARY_INST(12)
-HANDLE_BINARY_INST(12, Add  , BinaryOperator)
-HANDLE_BINARY_INST(13, FAdd , BinaryOperator)
-HANDLE_BINARY_INST(14, Sub  , BinaryOperator)
-HANDLE_BINARY_INST(15, FSub , BinaryOperator)
-HANDLE_BINARY_INST(16, Mul  , BinaryOperator)
-HANDLE_BINARY_INST(17, FMul , BinaryOperator)
-HANDLE_BINARY_INST(18, UDiv , BinaryOperator)
-HANDLE_BINARY_INST(19, SDiv , BinaryOperator)
-HANDLE_BINARY_INST(20, FDiv , BinaryOperator)
-HANDLE_BINARY_INST(21, URem , BinaryOperator)
-HANDLE_BINARY_INST(22, SRem , BinaryOperator)
-HANDLE_BINARY_INST(23, FRem , BinaryOperator)
+ FIRST_BINARY_INST(13)
+HANDLE_BINARY_INST(13, Add  , BinaryOperator)
+HANDLE_BINARY_INST(14, FAdd , BinaryOperator)
+HANDLE_BINARY_INST(15, Sub  , BinaryOperator)
+HANDLE_BINARY_INST(16, FSub , BinaryOperator)
+HANDLE_BINARY_INST(17, Mul  , BinaryOperator)
+HANDLE_BINARY_INST(18, FMul , BinaryOperator)
+HANDLE_BINARY_INST(19, UDiv , BinaryOperator)
+HANDLE_BINARY_INST(20, SDiv , BinaryOperator)
+HANDLE_BINARY_INST(21, FDiv , BinaryOperator)
+HANDLE_BINARY_INST(22, URem , BinaryOperator)
+HANDLE_BINARY_INST(23, SRem , BinaryOperator)
+HANDLE_BINARY_INST(24, FRem , BinaryOperator)
 
 // Logical operators (integer operands)
-HANDLE_BINARY_INST(24, Shl  , BinaryOperator) // Shift left  (logical)
-HANDLE_BINARY_INST(25, LShr , BinaryOperator) // Shift right (logical)
-HANDLE_BINARY_INST(26, AShr , BinaryOperator) // Shift right (arithmetic)
-HANDLE_BINARY_INST(27, And  , BinaryOperator)
-HANDLE_BINARY_INST(28, Or   , BinaryOperator)
-HANDLE_BINARY_INST(29, Xor  , BinaryOperator)
-  LAST_BINARY_INST(29)
+HANDLE_BINARY_INST(25, Shl  , BinaryOperator) // Shift left  (logical)
+HANDLE_BINARY_INST(26, LShr , BinaryOperator) // Shift right (logical)
+HANDLE_BINARY_INST(27, AShr , BinaryOperator) // Shift right (arithmetic)
+HANDLE_BINARY_INST(28, And  , BinaryOperator)
+HANDLE_BINARY_INST(29, Or   , BinaryOperator)
+HANDLE_BINARY_INST(30, Xor  , BinaryOperator)
+  LAST_BINARY_INST(30)
 
 // Memory operators...
- FIRST_MEMORY_INST(30)
-HANDLE_MEMORY_INST(30, Alloca, AllocaInst)  // Stack management
-HANDLE_MEMORY_INST(31, Load  , LoadInst  )  // Memory manipulation instrs
-HANDLE_MEMORY_INST(32, Store , StoreInst )
-HANDLE_MEMORY_INST(33, GetElementPtr, GetElementPtrInst)
-HANDLE_MEMORY_INST(34, Fence , FenceInst )
-HANDLE_MEMORY_INST(35, AtomicCmpXchg , AtomicCmpXchgInst )
-HANDLE_MEMORY_INST(36, AtomicRMW , AtomicRMWInst )
-  LAST_MEMORY_INST(36)
+ FIRST_MEMORY_INST(31)
+HANDLE_MEMORY_INST(31, Alloca, AllocaInst)  // Stack management
+HANDLE_MEMORY_INST(32, Load  , LoadInst  )  // Memory manipulation instrs
+HANDLE_MEMORY_INST(33, Store , StoreInst )
+HANDLE_MEMORY_INST(34, GetElementPtr, GetElementPtrInst)
+HANDLE_MEMORY_INST(35, Fence , FenceInst )
+HANDLE_MEMORY_INST(36, AtomicCmpXchg , AtomicCmpXchgInst )
+HANDLE_MEMORY_INST(37, AtomicRMW , AtomicRMWInst )
+  LAST_MEMORY_INST(37)
 
 // Cast operators ...
 // NOTE: The order matters here because CastInst::isEliminableCastPair
 // NOTE: (see Instructions.cpp) encodes a table based on this ordering.
- FIRST_CAST_INST(37)
-HANDLE_CAST_INST(37, Trunc   , TruncInst   )  // Truncate integers
-HANDLE_CAST_INST(38, ZExt    , ZExtInst    )  // Zero extend integers
-HANDLE_CAST_INST(39, SExt    , SExtInst    )  // Sign extend integers
-HANDLE_CAST_INST(40, FPToUI  , FPToUIInst  )  // floating point -> UInt
-HANDLE_CAST_INST(41, FPToSI  , FPToSIInst  )  // floating point -> SInt
-HANDLE_CAST_INST(42, UIToFP  , UIToFPInst  )  // UInt -> floating point
-HANDLE_CAST_INST(43, SIToFP  , SIToFPInst  )  // SInt -> floating point
-HANDLE_CAST_INST(44, FPTrunc , FPTruncInst )  // Truncate floating point
-HANDLE_CAST_INST(45, FPExt   , FPExtInst   )  // Extend floating point
-HANDLE_CAST_INST(46, PtrToInt, PtrToIntInst)  // Pointer -> Integer
-HANDLE_CAST_INST(47, IntToPtr, IntToPtrInst)  // Integer -> Pointer
-HANDLE_CAST_INST(48, BitCast , BitCastInst )  // Type cast
-HANDLE_CAST_INST(49, AddrSpaceCast, AddrSpaceCastInst)  // addrspace cast
-  LAST_CAST_INST(49)
-
- FIRST_FUNCLETPAD_INST(50)
-HANDLE_FUNCLETPAD_INST(50, CleanupPad, CleanupPadInst)
-HANDLE_FUNCLETPAD_INST(51, CatchPad  , CatchPadInst)
-  LAST_FUNCLETPAD_INST(51)
+ FIRST_CAST_INST(38)
+HANDLE_CAST_INST(38, Trunc   , TruncInst   )  // Truncate integers
+HANDLE_CAST_INST(39, ZExt    , ZExtInst    )  // Zero extend integers
+HANDLE_CAST_INST(40, SExt    , SExtInst    )  // Sign extend integers
+HANDLE_CAST_INST(41, FPToUI  , FPToUIInst  )  // floating point -> UInt
+HANDLE_CAST_INST(42, FPToSI  , FPToSIInst  )  // floating point -> SInt
+HANDLE_CAST_INST(43, UIToFP  , UIToFPInst  )  // UInt -> floating point
+HANDLE_CAST_INST(44, SIToFP  , SIToFPInst  )  // SInt -> floating point
+HANDLE_CAST_INST(45, FPTrunc , FPTruncInst )  // Truncate floating point
+HANDLE_CAST_INST(46, FPExt   , FPExtInst   )  // Extend floating point
+HANDLE_CAST_INST(47, PtrToInt, PtrToIntInst)  // Pointer -> Integer
+HANDLE_CAST_INST(48, IntToPtr, IntToPtrInst)  // Integer -> Pointer
+HANDLE_CAST_INST(49, BitCast , BitCastInst )  // Type cast
+HANDLE_CAST_INST(50, AddrSpaceCast, AddrSpaceCastInst)  // addrspace cast
+  LAST_CAST_INST(50)
+
+ FIRST_FUNCLETPAD_INST(51)
+HANDLE_FUNCLETPAD_INST(51, CleanupPad, CleanupPadInst)
+HANDLE_FUNCLETPAD_INST(52, CatchPad  , CatchPadInst)
+  LAST_FUNCLETPAD_INST(52)
 
 // Other operators...
- FIRST_OTHER_INST(52)
-HANDLE_OTHER_INST(52, ICmp   , ICmpInst   )  // Integer comparison instruction
-HANDLE_OTHER_INST(53, FCmp   , FCmpInst   )  // Floating point comparison instr.
-HANDLE_OTHER_INST(54, PHI    , PHINode    )  // PHI node instruction
-HANDLE_OTHER_INST(55, Call   , CallInst   )  // Call a function
-HANDLE_OTHER_INST(56, Select , SelectInst )  // select instruction
-HANDLE_USER_INST (57, UserOp1, Instruction)  // May be used internally in a pass
-HANDLE_USER_INST (58, UserOp2, Instruction)  // Internal to passes only
-HANDLE_OTHER_INST(59, VAArg  , VAArgInst  )  // vaarg instruction
-HANDLE_OTHER_INST(60, ExtractElement, ExtractElementInst)// extract from vector
-HANDLE_OTHER_INST(61, InsertElement, InsertElementInst)  // insert into vector
-HANDLE_OTHER_INST(62, ShuffleVector, ShuffleVectorInst)  // shuffle two vectors.
-HANDLE_OTHER_INST(63, ExtractValue, ExtractValueInst)// extract from aggregate
-HANDLE_OTHER_INST(64, InsertValue, InsertValueInst)  // insert into aggregate
-HANDLE_OTHER_INST(65, LandingPad, LandingPadInst)  // Landing pad instruction.
-  LAST_OTHER_INST(65)
+ FIRST_OTHER_INST(53)
+HANDLE_OTHER_INST(53, ICmp   , ICmpInst   )  // Integer comparison instruction
+HANDLE_OTHER_INST(54, FCmp   , FCmpInst   )  // Floating point comparison instr.
+HANDLE_OTHER_INST(55, PHI    , PHINode    )  // PHI node instruction
+HANDLE_OTHER_INST(56, Call   , CallInst   )  // Call a function
+HANDLE_OTHER_INST(57, Select , SelectInst )  // select instruction
+HANDLE_USER_INST (58, UserOp1, Instruction)  // May be used internally in a pass
+HANDLE_USER_INST (59, UserOp2, Instruction)  // Internal to passes only
+HANDLE_OTHER_INST(60, VAArg  , VAArgInst  )  // vaarg instruction
+HANDLE_OTHER_INST(61, ExtractElement, ExtractElementInst)// extract from vector
+HANDLE_OTHER_INST(62, InsertElement, InsertElementInst)  // insert into vector
+HANDLE_OTHER_INST(63, ShuffleVector, ShuffleVectorInst)  // shuffle two vectors.
+HANDLE_OTHER_INST(64, ExtractValue, ExtractValueInst)// extract from aggregate
+HANDLE_OTHER_INST(65, InsertValue, InsertValueInst)  // insert into aggregate
+HANDLE_OTHER_INST(66, LandingPad, LandingPadInst)  // Landing pad instruction.
+  LAST_OTHER_INST(66)
 
 #undef  FIRST_TERM_INST
 #undef HANDLE_TERM_INST
diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h
index 5e78cb1edf02..6a9a74bd16f0 100644
--- a/include/llvm/IR/Instruction.h
+++ b/include/llvm/IR/Instruction.h
@@ -1,9 +1,8 @@
 //===-- llvm/Instruction.h - Instruction class definition -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -136,6 +135,9 @@ public:
   bool isExceptionalTerminator() const {
     return isExceptionalTerminator(getOpcode());
   }
+  bool isIndirectTerminator() const {
+    return isIndirectTerminator(getOpcode());
+  }
 
   static const char* getOpcodeName(unsigned OpCode);
 
@@ -203,6 +205,17 @@ public:
     }
   }
 
+  /// Returns true if the OpCode is a terminator with indirect targets.
+  static inline bool isIndirectTerminator(unsigned OpCode) {
+    switch (OpCode) {
+    case Instruction::IndirectBr:
+    case Instruction::CallBr:
+      return true;
+    default:
+      return false;
+    }
+  }
+
   //===--------------------------------------------------------------------===//
   // Metadata manipulation.
   //===--------------------------------------------------------------------===//
@@ -298,9 +311,6 @@ public:
   /// Returns false if no metadata was found.
   bool extractProfTotalWeight(uint64_t &TotalVal) const;
 
-  /// Updates branch_weights metadata by scaling it by \p S / \p T.
-  void updateProfWeight(uint64_t S, uint64_t T);
-
   /// Sets the branch_weights metadata to \p W for CallInst.
   void setProfWeight(uint64_t W);
 
@@ -655,6 +665,10 @@ public:
   /// instruction must be a terminator.
   void setSuccessor(unsigned Idx, BasicBlock *BB);
 
+  /// Replace specified successor OldBB to point at the provided block.
+  /// This instruction must be a terminator.
+  void replaceSuccessorWith(BasicBlock *OldBB, BasicBlock *NewBB);
+
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Value *V) {
     return V->getValueID() >= Value::InstructionVal;
diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h
index 0ff8f56f213a..215ce45c7b75 100644
--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@@ -1,9 +1,8 @@
 //===- llvm/Instructions.h - Instruction subclass definitions ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -522,9 +521,11 @@ private:
 //                                AtomicCmpXchgInst Class
 //===----------------------------------------------------------------------===//
 
-/// an instruction that atomically checks whether a
+/// An instruction that atomically checks whether a
 /// specified value is in a memory location, and, if it is, stores a new value
-/// there.  Returns the value that was loaded.
+/// there. The value returned by this instruction is a pair containing the
+/// original value as first element, and an i1 indicating success (true) or
+/// failure (false) as second element.
 ///
 class AtomicCmpXchgInst : public Instruction {
   void Init(Value *Ptr, Value *Cmp, Value *NewVal,
@@ -725,8 +726,14 @@ public:
     /// *p = old <unsigned v ? old : v
     UMin,
 
+    /// *p = old + v
+    FAdd,
+
+    /// *p = old - v
+    FSub,
+
     FIRST_BINOP = Xchg,
-    LAST_BINOP = UMin,
+    LAST_BINOP = FSub,
     BAD_BINOP
   };
 
@@ -748,6 +755,16 @@ public:
 
   static StringRef getOperationName(BinOp Op);
 
+  static bool isFPOperation(BinOp Op) {
+    switch (Op) {
+    case AtomicRMWInst::FAdd:
+    case AtomicRMWInst::FSub:
+      return true;
+    default:
+      return false;
+    }
+  }
+
   void setOperation(BinOp Operation) {
     unsigned short SubclassData = getSubclassDataFromInstruction();
     setInstructionSubclassData((SubclassData & 31) |
@@ -805,6 +822,10 @@ public:
     return getPointerOperand()->getType()->getPointerAddressSpace();
   }
 
+  bool isFloatingPointOperation() const {
+    return isFPOperation(getOperation());
+  }
+
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::AtomicRMW;
@@ -1114,71 +1135,6 @@ GetElementPtrInst::GetElementPtrInst(Type *PointeeType, Value *Ptr,
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value)
 
-//===----------------------------------------------------------------------===//
-//                                UnaryOperator Class
-//===----------------------------------------------------------------------===//
-
-/// a unary instruction 
-class UnaryOperator : public UnaryInstruction {
-  void AssertOK();
-
-protected:
-  UnaryOperator(UnaryOps iType, Value *S, Type *Ty,
-                const Twine &Name, Instruction *InsertBefore);
-  UnaryOperator(UnaryOps iType, Value *S, Type *Ty,
-                const Twine &Name, BasicBlock *InsertAtEnd);
-
-  // Note: Instruction needs to be a friend here to call cloneImpl.
-  friend class Instruction;
-
-  UnaryOperator *cloneImpl() const;
-
-public:
-
-  /// Construct a unary instruction, given the opcode and an operand.
-  /// Optionally (if InstBefore is specified) insert the instruction
-  /// into a BasicBlock right before the specified instruction.  The specified
-  /// Instruction is allowed to be a dereferenced end iterator.
-  ///
-  static UnaryOperator *Create(UnaryOps Op, Value *S,
-                               const Twine &Name = Twine(),
-                               Instruction *InsertBefore = nullptr);
-
-  /// Construct a unary instruction, given the opcode and an operand.
-  /// Also automatically insert this instruction to the end of the
-  /// BasicBlock specified.
-  ///
-  static UnaryOperator *Create(UnaryOps Op, Value *S,
-                               const Twine &Name,
-                               BasicBlock *InsertAtEnd);
-
-  /// These methods just forward to Create, and are useful when you
-  /// statically know what type of instruction you're going to create.  These
-  /// helpers just save some typing.
-#define HANDLE_UNARY_INST(N, OPC, CLASS) \
-  static UnaryInstruction *Create##OPC(Value *V, \
-                                       const Twine &Name = "") {\
-    return Create(Instruction::OPC, V, Name);\
-  }
-#include "llvm/IR/Instruction.def"
-#define HANDLE_UNARY_INST(N, OPC, CLASS) \
-  static UnaryInstruction *Create##OPC(Value *V, \
-                                       const Twine &Name, BasicBlock *BB) {\
-    return Create(Instruction::OPC, V, Name, BB);\
-  }
-#include "llvm/IR/Instruction.def"
-#define HANDLE_UNARY_INST(N, OPC, CLASS) \
-  static UnaryInstruction *Create##OPC(Value *V, \
-                                       const Twine &Name, Instruction *I) {\
-    return Create(Instruction::OPC, V, Name, I);\
-  }
-#include "llvm/IR/Instruction.def"
-
-  UnaryOps getOpcode() const {
-    return static_cast<UnaryOps>(Instruction::getOpcode());
-  }
-};
-
 //===----------------------------------------------------------------------===//
 //                               ICmpInst Class
 //===----------------------------------------------------------------------===//
@@ -1524,25 +1480,44 @@ public:
         CallInst(Ty, Func, Args, Bundles, NameStr, InsertAtEnd);
   }
 
-  static CallInst *Create(Function *Func, const Twine &NameStr = "",
+  static CallInst *Create(FunctionCallee Func, const Twine &NameStr = "",
                           Instruction *InsertBefore = nullptr) {
-    return Create(Func->getFunctionType(), Func, NameStr, InsertBefore);
+    return Create(Func.getFunctionType(), Func.getCallee(), NameStr,
+                  InsertBefore);
   }
 
-  static CallInst *Create(Function *Func, ArrayRef<Value *> Args,
+  static CallInst *Create(FunctionCallee Func, ArrayRef<Value *> Args,
+                          ArrayRef<OperandBundleDef> Bundles = None,
                           const Twine &NameStr = "",
                           Instruction *InsertBefore = nullptr) {
-    return Create(Func->getFunctionType(), Func, Args, NameStr, InsertBefore);
+    return Create(Func.getFunctionType(), Func.getCallee(), Args, Bundles,
+                  NameStr, InsertBefore);
   }
 
-  static CallInst *Create(Function *Func, const Twine &NameStr,
+  static CallInst *Create(FunctionCallee Func, ArrayRef<Value *> Args,
+                          const Twine &NameStr,
+                          Instruction *InsertBefore = nullptr) {
+    return Create(Func.getFunctionType(), Func.getCallee(), Args, NameStr,
+                  InsertBefore);
+  }
+
+  static CallInst *Create(FunctionCallee Func, const Twine &NameStr,
                           BasicBlock *InsertAtEnd) {
-    return Create(Func->getFunctionType(), Func, NameStr, InsertAtEnd);
+    return Create(Func.getFunctionType(), Func.getCallee(), NameStr,
+                  InsertAtEnd);
   }
 
-  static CallInst *Create(Function *Func, ArrayRef<Value *> Args,
+  static CallInst *Create(FunctionCallee Func, ArrayRef<Value *> Args,
                           const Twine &NameStr, BasicBlock *InsertAtEnd) {
-    return Create(Func->getFunctionType(), Func, Args, NameStr, InsertAtEnd);
+    return Create(Func.getFunctionType(), Func.getCallee(), Args, NameStr,
+                  InsertAtEnd);
+  }
+
+  static CallInst *Create(FunctionCallee Func, ArrayRef<Value *> Args,
+                          ArrayRef<OperandBundleDef> Bundles,
+                          const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    return Create(Func.getFunctionType(), Func.getCallee(), Args, Bundles,
+                  NameStr, InsertAtEnd);
   }
 
   // Deprecated [opaque pointer types]
@@ -1684,9 +1659,6 @@ public:
     addAttribute(AttributeList::FunctionIndex, Attribute::ReturnsTwice);
   }
 
-  /// Check if this call is an inline asm statement.
-  bool isInlineAsm() const { return isa<InlineAsm>(getCalledOperand()); }
-
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::Call;
@@ -1695,6 +1667,9 @@ public:
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
 
+  /// Updates profile metadata by scaling it by \p S / \p T.
+  void updateProfWeight(uint64_t S, uint64_t T);
+
 private:
   // Shadow Instruction::setInstructionSubclassData with a private forwarding
   // method so that subclasses cannot accidentally use it.
@@ -2008,6 +1983,10 @@ public:
     return User::operator new(s, 3);
   }
 
+  /// Swap the first 2 operands and adjust the mask to preserve the semantics
+  /// of the instruction.
+  void commute();
+
   /// Return true if a shufflevector instruction can be
   /// formed with the specified operands.
   static bool isValidOperands(const Value *V1, const Value *V2,
@@ -2696,6 +2675,14 @@ public:
     block_begin()[i] = BB;
   }
 
+  /// Replace every incoming basic block \p Old to basic block \p New.
+  void replaceIncomingBlockWith(const BasicBlock *Old, BasicBlock *New) {
+    assert(New && Old && "PHI node got a null basic block!");
+    for (unsigned Op = 0, NumOps = getNumOperands(); Op != NumOps; ++Op)
+      if (getIncomingBlock(Op) == Old)
+        setIncomingBlock(Op, New);
+  }
+
   /// Add an incoming value to the end of the PHI list
   ///
   void addIncoming(Value *V, BasicBlock *BB) {
@@ -2739,6 +2726,19 @@ public:
     return getIncomingValue(Idx);
   }
 
+  /// Set every incoming value(s) for block \p BB to \p V.
+  void setIncomingValueForBlock(const BasicBlock *BB, Value *V) {
+    assert(BB && "PHI node got a null basic block!");
+    bool Found = false;
+    for (unsigned Op = 0, NumOps = getNumOperands(); Op != NumOps; ++Op)
+      if (getIncomingBlock(Op) == BB) {
+        Found = true;
+        setIncomingValue(Op, V);
+      }
+    (void)Found;
+    assert(Found && "Invalid basic block argument to set!");
+  }
+
   /// If the specified PHI node always merges together the
   /// same value, return the value, otherwise return null.
   Value *hasConstantValue() const;
@@ -3450,6 +3450,60 @@ public:
   }
 };
 
+/// A wrapper class to simplify modification of SwitchInst cases along with
+/// their prof branch_weights metadata.
+class SwitchInstProfUpdateWrapper {
+  SwitchInst &SI;
+  Optional<SmallVector<uint32_t, 8> > Weights = None;
+
+  // Sticky invalid state is needed to safely ignore operations with prof data
+  // in cases where SwitchInstProfUpdateWrapper is created from SwitchInst
+  // with inconsistent prof data. TODO: once we fix all prof data
+  // inconsistencies we can turn invalid state to assertions.
+  enum {
+    Invalid,
+    Initialized,
+    Changed
+  } State = Invalid;
+
+protected:
+  static MDNode *getProfBranchWeightsMD(const SwitchInst &SI);
+
+  MDNode *buildProfBranchWeightsMD();
+
+  void init();
+
+public:
+  using CaseWeightOpt = Optional<uint32_t>;
+  SwitchInst *operator->() { return &SI; }
+  SwitchInst &operator*() { return SI; }
+  operator SwitchInst *() { return &SI; }
+
+  SwitchInstProfUpdateWrapper(SwitchInst &SI) : SI(SI) { init(); }
+
+  ~SwitchInstProfUpdateWrapper() {
+    if (State == Changed)
+      SI.setMetadata(LLVMContext::MD_prof, buildProfBranchWeightsMD());
+  }
+
+  /// Delegate the call to the underlying SwitchInst::removeCase() and remove
+  /// correspondent branch weight.
+  SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I);
+
+  /// Delegate the call to the underlying SwitchInst::addCase() and set the
+  /// specified branch weight for the added case.
+  void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W);
+
+  /// Delegate the call to the underlying SwitchInst::eraseFromParent() and mark
+  /// this object to not touch the underlying SwitchInst in destructor.
+  SymbolTableList<Instruction>::iterator eraseFromParent();
+
+  void setSuccessorWeight(unsigned idx, CaseWeightOpt W);
+  CaseWeightOpt getSuccessorWeight(unsigned idx);
+
+  static CaseWeightOpt getSuccessorWeight(const SwitchInst &SI, unsigned idx);
+};
+
 template <>
 struct OperandTraits<SwitchInst> : public HungoffOperandTraits<2> {
 };
@@ -3688,36 +3742,36 @@ public:
                    NameStr, InsertAtEnd);
   }
 
-  static InvokeInst *Create(Function *Func, BasicBlock *IfNormal,
+  static InvokeInst *Create(FunctionCallee Func, BasicBlock *IfNormal,
                             BasicBlock *IfException, ArrayRef<Value *> Args,
                             const Twine &NameStr,
                             Instruction *InsertBefore = nullptr) {
-    return Create(Func->getFunctionType(), Func, IfNormal, IfException, Args,
-                  None, NameStr, InsertBefore);
+    return Create(Func.getFunctionType(), Func.getCallee(), IfNormal,
+                  IfException, Args, None, NameStr, InsertBefore);
   }
 
-  static InvokeInst *Create(Function *Func, BasicBlock *IfNormal,
+  static InvokeInst *Create(FunctionCallee Func, BasicBlock *IfNormal,
                             BasicBlock *IfException, ArrayRef<Value *> Args,
                             ArrayRef<OperandBundleDef> Bundles = None,
                             const Twine &NameStr = "",
                             Instruction *InsertBefore = nullptr) {
-    return Create(Func->getFunctionType(), Func, IfNormal, IfException, Args,
-                  Bundles, NameStr, InsertBefore);
+    return Create(Func.getFunctionType(), Func.getCallee(), IfNormal,
+                  IfException, Args, Bundles, NameStr, InsertBefore);
   }
 
-  static InvokeInst *Create(Function *Func, BasicBlock *IfNormal,
+  static InvokeInst *Create(FunctionCallee Func, BasicBlock *IfNormal,
                             BasicBlock *IfException, ArrayRef<Value *> Args,
                             const Twine &NameStr, BasicBlock *InsertAtEnd) {
-    return Create(Func->getFunctionType(), Func, IfNormal, IfException, Args,
-                  NameStr, InsertAtEnd);
+    return Create(Func.getFunctionType(), Func.getCallee(), IfNormal,
+                  IfException, Args, NameStr, InsertAtEnd);
   }
 
-  static InvokeInst *Create(Function *Func, BasicBlock *IfNormal,
+  static InvokeInst *Create(FunctionCallee Func, BasicBlock *IfNormal,
                             BasicBlock *IfException, ArrayRef<Value *> Args,
                             ArrayRef<OperandBundleDef> Bundles,
                             const Twine &NameStr, BasicBlock *InsertAtEnd) {
-    return Create(Func->getFunctionType(), Func, IfNormal, IfException, Args,
-                  Bundles, NameStr, InsertAtEnd);
+    return Create(Func.getFunctionType(), Func.getCallee(), IfNormal,
+                  IfException, Args, Bundles, NameStr, InsertAtEnd);
   }
 
   // Deprecated [opaque pointer types]
@@ -3851,6 +3905,249 @@ InvokeInst::InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
   init(Ty, Func, IfNormal, IfException, Args, Bundles, NameStr);
 }
 
+//===----------------------------------------------------------------------===//
+//                              CallBrInst Class
+//===----------------------------------------------------------------------===//
+
+/// CallBr instruction, tracking function calls that may not return control but
+/// instead transfer it to a third location. The SubclassData field is used to
+/// hold the calling convention of the call.
+///
+class CallBrInst : public CallBase {
+
+  unsigned NumIndirectDests;
+
+  CallBrInst(const CallBrInst &BI);
+
+  /// Construct a CallBrInst given a range of arguments.
+  ///
+  /// Construct a CallBrInst from a range of arguments
+  inline CallBrInst(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest,
+                    ArrayRef<BasicBlock *> IndirectDests,
+                    ArrayRef<Value *> Args,
+                    ArrayRef<OperandBundleDef> Bundles, int NumOperands,
+                    const Twine &NameStr, Instruction *InsertBefore);
+
+  inline CallBrInst(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest,
+                    ArrayRef<BasicBlock *> IndirectDests,
+                    ArrayRef<Value *> Args,
+                    ArrayRef<OperandBundleDef> Bundles, int NumOperands,
+                    const Twine &NameStr, BasicBlock *InsertAtEnd);
+
+  void init(FunctionType *FTy, Value *Func, BasicBlock *DefaultDest,
+            ArrayRef<BasicBlock *> IndirectDests, ArrayRef<Value *> Args,
+            ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr);
+
+  /// Compute the number of operands to allocate.
+  static int ComputeNumOperands(int NumArgs, int NumIndirectDests,
+                                int NumBundleInputs = 0) {
+    // We need one operand for the called function, plus our extra operands and
+    // the input operand counts provided.
+    return 2 + NumIndirectDests + NumArgs + NumBundleInputs;
+  }
+
+protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+
+  CallBrInst *cloneImpl() const;
+
+public:
+  static CallBrInst *Create(FunctionType *Ty, Value *Func,
+                            BasicBlock *DefaultDest,
+                            ArrayRef<BasicBlock *> IndirectDests,
+                            ArrayRef<Value *> Args, const Twine &NameStr,
+                            Instruction *InsertBefore = nullptr) {
+    int NumOperands = ComputeNumOperands(Args.size(), IndirectDests.size());
+    return new (NumOperands)
+        CallBrInst(Ty, Func, DefaultDest, IndirectDests, Args, None,
+                   NumOperands, NameStr, InsertBefore);
+  }
+
+  static CallBrInst *Create(FunctionType *Ty, Value *Func,
+                            BasicBlock *DefaultDest,
+                            ArrayRef<BasicBlock *> IndirectDests,
+                            ArrayRef<Value *> Args,
+                            ArrayRef<OperandBundleDef> Bundles = None,
+                            const Twine &NameStr = "",
+                            Instruction *InsertBefore = nullptr) {
+    int NumOperands = ComputeNumOperands(Args.size(), IndirectDests.size(),
+                                         CountBundleInputs(Bundles));
+    unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+
+    return new (NumOperands, DescriptorBytes)
+        CallBrInst(Ty, Func, DefaultDest, IndirectDests, Args, Bundles,
+                   NumOperands, NameStr, InsertBefore);
+  }
+
+  static CallBrInst *Create(FunctionType *Ty, Value *Func,
+                            BasicBlock *DefaultDest,
+                            ArrayRef<BasicBlock *> IndirectDests,
+                            ArrayRef<Value *> Args, const Twine &NameStr,
+                            BasicBlock *InsertAtEnd) {
+    int NumOperands = ComputeNumOperands(Args.size(), IndirectDests.size());
+    return new (NumOperands)
+        CallBrInst(Ty, Func, DefaultDest, IndirectDests, Args, None,
+                   NumOperands, NameStr, InsertAtEnd);
+  }
+
+  static CallBrInst *Create(FunctionType *Ty, Value *Func,
+                            BasicBlock *DefaultDest,
+                            ArrayRef<BasicBlock *> IndirectDests,
+                            ArrayRef<Value *> Args,
+                            ArrayRef<OperandBundleDef> Bundles,
+                            const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    int NumOperands = ComputeNumOperands(Args.size(), IndirectDests.size(),
+                                         CountBundleInputs(Bundles));
+    unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+
+    return new (NumOperands, DescriptorBytes)
+        CallBrInst(Ty, Func, DefaultDest, IndirectDests, Args, Bundles,
+                   NumOperands, NameStr, InsertAtEnd);
+  }
+
+  static CallBrInst *Create(FunctionCallee Func, BasicBlock *DefaultDest,
+                            ArrayRef<BasicBlock *> IndirectDests,
+                            ArrayRef<Value *> Args, const Twine &NameStr,
+                            Instruction *InsertBefore = nullptr) {
+    return Create(Func.getFunctionType(), Func.getCallee(), DefaultDest,
+                  IndirectDests, Args, NameStr, InsertBefore);
+  }
+
+  static CallBrInst *Create(FunctionCallee Func, BasicBlock *DefaultDest,
+                            ArrayRef<BasicBlock *> IndirectDests,
+                            ArrayRef<Value *> Args,
+                            ArrayRef<OperandBundleDef> Bundles = None,
+                            const Twine &NameStr = "",
+                            Instruction *InsertBefore = nullptr) {
+    return Create(Func.getFunctionType(), Func.getCallee(), DefaultDest,
+                  IndirectDests, Args, Bundles, NameStr, InsertBefore);
+  }
+
+  static CallBrInst *Create(FunctionCallee Func, BasicBlock *DefaultDest,
+                            ArrayRef<BasicBlock *> IndirectDests,
+                            ArrayRef<Value *> Args, const Twine &NameStr,
+                            BasicBlock *InsertAtEnd) {
+    return Create(Func.getFunctionType(), Func.getCallee(), DefaultDest,
+                  IndirectDests, Args, NameStr, InsertAtEnd);
+  }
+
+  static CallBrInst *Create(FunctionCallee Func,
+                            BasicBlock *DefaultDest,
+                            ArrayRef<BasicBlock *> IndirectDests,
+                            ArrayRef<Value *> Args,
+                            ArrayRef<OperandBundleDef> Bundles,
+                            const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    return Create(Func.getFunctionType(), Func.getCallee(), DefaultDest,
+                  IndirectDests, Args, Bundles, NameStr, InsertAtEnd);
+  }
+
+  /// Create a clone of \p CBI with a different set of operand bundles and
+  /// insert it before \p InsertPt.
+  ///
+  /// The returned callbr instruction is identical to \p CBI in every way
+  /// except that the operand bundles for the new instruction are set to the
+  /// operand bundles in \p Bundles.
+  static CallBrInst *Create(CallBrInst *CBI,
+                            ArrayRef<OperandBundleDef> Bundles,
+                            Instruction *InsertPt = nullptr);
+
+  /// Return the number of callbr indirect dest labels.
+  ///
+  unsigned getNumIndirectDests() const { return NumIndirectDests; }
+
+  /// getIndirectDestLabel - Return the i-th indirect dest label.
+  ///
+  Value *getIndirectDestLabel(unsigned i) const {
+    assert(i < getNumIndirectDests() && "Out of bounds!");
+    return getOperand(i + getNumArgOperands() + getNumTotalBundleOperands() +
+                      1);
+  }
+
+  Value *getIndirectDestLabelUse(unsigned i) const {
+    assert(i < getNumIndirectDests() && "Out of bounds!");
+    return getOperandUse(i + getNumArgOperands() + getNumTotalBundleOperands() +
+                         1);
+  }
+
+  // Return the destination basic blocks...
+  BasicBlock *getDefaultDest() const {
+    return cast<BasicBlock>(*(&Op<-1>() - getNumIndirectDests() - 1));
+  }
+  BasicBlock *getIndirectDest(unsigned i) const {
+    return cast<BasicBlock>(*(&Op<-1>() - getNumIndirectDests() + i));
+  }
+  SmallVector<BasicBlock *, 16> getIndirectDests() const {
+    SmallVector<BasicBlock *, 16> IndirectDests;
+    for (unsigned i = 0, e = getNumIndirectDests(); i < e; ++i)
+      IndirectDests.push_back(getIndirectDest(i));
+    return IndirectDests;
+  }
+  void setDefaultDest(BasicBlock *B) {
+    *(&Op<-1>() - getNumIndirectDests() - 1) = reinterpret_cast<Value *>(B);
+  }
+  void setIndirectDest(unsigned i, BasicBlock *B) {
+    *(&Op<-1>() - getNumIndirectDests() + i) = reinterpret_cast<Value *>(B);
+  }
+
+  BasicBlock *getSuccessor(unsigned i) const {
+    assert(i < getNumSuccessors() + 1 &&
+           "Successor # out of range for callbr!");
+    return i == 0 ? getDefaultDest() : getIndirectDest(i - 1);
+  }
+
+  void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
+    assert(idx < getNumIndirectDests() + 1 &&
+           "Successor # out of range for callbr!");
+    *(&Op<-1>() - getNumIndirectDests() -1 + idx) =
+        reinterpret_cast<Value *>(NewSucc);
+  }
+
+  unsigned getNumSuccessors() const { return getNumIndirectDests() + 1; }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const Instruction *I) {
+    return (I->getOpcode() == Instruction::CallBr);
+  }
+  static bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+
+private:
+
+  // Shadow Instruction::setInstructionSubclassData with a private forwarding
+  // method so that subclasses cannot accidentally use it.
+  void setInstructionSubclassData(unsigned short D) {
+    Instruction::setInstructionSubclassData(D);
+  }
+};
+
+CallBrInst::CallBrInst(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest,
+                       ArrayRef<BasicBlock *> IndirectDests,
+                       ArrayRef<Value *> Args,
+                       ArrayRef<OperandBundleDef> Bundles, int NumOperands,
+                       const Twine &NameStr, Instruction *InsertBefore)
+    : CallBase(Ty->getReturnType(), Instruction::CallBr,
+               OperandTraits<CallBase>::op_end(this) - NumOperands, NumOperands,
+               InsertBefore) {
+  init(Ty, Func, DefaultDest, IndirectDests, Args, Bundles, NameStr);
+}
+
+CallBrInst::CallBrInst(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest,
+                       ArrayRef<BasicBlock *> IndirectDests,
+                       ArrayRef<Value *> Args,
+                       ArrayRef<OperandBundleDef> Bundles, int NumOperands,
+                       const Twine &NameStr, BasicBlock *InsertAtEnd)
+    : CallBase(
+          cast<FunctionType>(
+              cast<PointerType>(Func->getType())->getElementType())
+              ->getReturnType(),
+          Instruction::CallBr,
+          OperandTraits<CallBase>::op_end(this) - NumOperands, NumOperands,
+          InsertAtEnd) {
+  init(Ty, Func, DefaultDest, IndirectDests, Args, Bundles, NameStr);
+}
+
 //===----------------------------------------------------------------------===//
 //                              ResumeInst Class
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/IR/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h
index 80a7a7052574..438bdb29b706 100644
--- a/include/llvm/IR/IntrinsicInst.h
+++ b/include/llvm/IR/IntrinsicInst.h
@@ -1,9 +1,8 @@
 //===-- llvm/IntrinsicInst.h - Intrinsic Instruction Wrappers ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -209,26 +208,47 @@ namespace llvm {
   /// This is the common base class for constrained floating point intrinsics.
   class ConstrainedFPIntrinsic : public IntrinsicInst {
   public:
-    enum RoundingMode {
-      rmInvalid,
-      rmDynamic,
-      rmToNearest,
-      rmDownward,
-      rmUpward,
-      rmTowardZero
+    /// Specifies the rounding mode to be assumed. This is only used when
+    /// when constrained floating point is enabled. See the LLVM Language
+    /// Reference Manual for details.
+    enum RoundingMode : uint8_t {
+      rmDynamic,         ///< This corresponds to "fpround.dynamic".
+      rmToNearest,       ///< This corresponds to "fpround.tonearest".
+      rmDownward,        ///< This corresponds to "fpround.downward".
+      rmUpward,          ///< This corresponds to "fpround.upward".
+      rmTowardZero       ///< This corresponds to "fpround.tozero".
     };
 
-    enum ExceptionBehavior {
-      ebInvalid,
-      ebIgnore,
-      ebMayTrap,
-      ebStrict
+    /// Specifies the required exception behavior. This is only used when
+    /// when constrained floating point is used. See the LLVM Language
+    /// Reference Manual for details.
+    enum ExceptionBehavior : uint8_t {
+      ebIgnore,          ///< This corresponds to "fpexcept.ignore".
+      ebMayTrap,         ///< This corresponds to "fpexcept.maytrap".
+      ebStrict           ///< This corresponds to "fpexcept.strict".
     };
 
     bool isUnaryOp() const;
     bool isTernaryOp() const;
-    RoundingMode getRoundingMode() const;
-    ExceptionBehavior getExceptionBehavior() const;
+    Optional<RoundingMode> getRoundingMode() const;
+    Optional<ExceptionBehavior> getExceptionBehavior() const;
+
+    /// Returns a valid RoundingMode enumerator when given a string
+    /// that is valid as input in constrained intrinsic rounding mode
+    /// metadata.
+    static Optional<RoundingMode> StrToRoundingMode(StringRef);
+
+    /// For any RoundingMode enumerator, returns a string valid as input in
+    /// constrained intrinsic rounding mode metadata.
+    static Optional<StringRef> RoundingModeToStr(RoundingMode);
+
+    /// Returns a valid ExceptionBehavior enumerator when given a string
+    /// valid as input in constrained intrinsic exception behavior metadata.
+    static Optional<ExceptionBehavior> StrToExceptionBehavior(StringRef);
+
+    /// For any ExceptionBehavior enumerator, returns a string valid as 
+    /// input in constrained intrinsic exception behavior metadata.
+    static Optional<StringRef> ExceptionBehaviorToStr(ExceptionBehavior);
 
     // Methods for support type inquiry through isa, cast, and dyn_cast:
     static bool classof(const IntrinsicInst *I) {
@@ -239,6 +259,8 @@ namespace llvm {
       case Intrinsic::experimental_constrained_fdiv:
       case Intrinsic::experimental_constrained_frem:
       case Intrinsic::experimental_constrained_fma:
+      case Intrinsic::experimental_constrained_fptrunc:
+      case Intrinsic::experimental_constrained_fpext:
       case Intrinsic::experimental_constrained_sqrt:
       case Intrinsic::experimental_constrained_pow:
       case Intrinsic::experimental_constrained_powi:
@@ -266,6 +288,84 @@ namespace llvm {
     }
   };
 
+  /// This class represents an intrinsic that is based on a binary operation.
+  /// This includes op.with.overflow and saturating add/sub intrinsics.
+  class BinaryOpIntrinsic : public IntrinsicInst {
+  public:
+    static bool classof(const IntrinsicInst *I) {
+      switch (I->getIntrinsicID()) {
+      case Intrinsic::uadd_with_overflow:
+      case Intrinsic::sadd_with_overflow:
+      case Intrinsic::usub_with_overflow:
+      case Intrinsic::ssub_with_overflow:
+      case Intrinsic::umul_with_overflow:
+      case Intrinsic::smul_with_overflow:
+      case Intrinsic::uadd_sat:
+      case Intrinsic::sadd_sat:
+      case Intrinsic::usub_sat:
+      case Intrinsic::ssub_sat:
+        return true;
+      default:
+        return false;
+      }
+    }
+    static bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+
+    Value *getLHS() const { return const_cast<Value*>(getArgOperand(0)); }
+    Value *getRHS() const { return const_cast<Value*>(getArgOperand(1)); }
+
+    /// Returns the binary operation underlying the intrinsic.
+    Instruction::BinaryOps getBinaryOp() const;
+
+    /// Whether the intrinsic is signed or unsigned.
+    bool isSigned() const;
+
+    /// Returns one of OBO::NoSignedWrap or OBO::NoUnsignedWrap.
+    unsigned getNoWrapKind() const;
+  };
+
+  /// Represents an op.with.overflow intrinsic.
+  class WithOverflowInst : public BinaryOpIntrinsic {
+  public:
+    static bool classof(const IntrinsicInst *I) {
+      switch (I->getIntrinsicID()) {
+      case Intrinsic::uadd_with_overflow:
+      case Intrinsic::sadd_with_overflow:
+      case Intrinsic::usub_with_overflow:
+      case Intrinsic::ssub_with_overflow:
+      case Intrinsic::umul_with_overflow:
+      case Intrinsic::smul_with_overflow:
+        return true;
+      default:
+        return false;
+      }
+    }
+    static bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
+  /// Represents a saturating add/sub intrinsic.
+  class SaturatingInst : public BinaryOpIntrinsic {
+  public:
+    static bool classof(const IntrinsicInst *I) {
+      switch (I->getIntrinsicID()) {
+      case Intrinsic::uadd_sat:
+      case Intrinsic::sadd_sat:
+      case Intrinsic::usub_sat:
+      case Intrinsic::ssub_sat:
+        return true;
+      default:
+        return false;
+      }
+    }
+    static bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
   /// Common base class for all memory intrinsics. Simply provides
   /// common methods.
   /// Written as CRTP to avoid a common base class amongst the
diff --git a/include/llvm/IR/Intrinsics.h b/include/llvm/IR/Intrinsics.h
index e1e17f983ff8..f38f92022d21 100644
--- a/include/llvm/IR/Intrinsics.h
+++ b/include/llvm/IR/Intrinsics.h
@@ -1,9 +1,8 @@
-//===-- llvm/Instrinsics.h - LLVM Intrinsic Function Handling ---*- C++ -*-===//
+//===- Intrinsics.h - LLVM Intrinsic Function Handling ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -100,7 +99,8 @@ namespace Intrinsic {
       Void, VarArg, MMX, Token, Metadata, Half, Float, Double, Quad,
       Integer, Vector, Pointer, Struct,
       Argument, ExtendArgument, TruncArgument, HalfVecArgument,
-      SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfAnyPtrsToElt
+      SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfAnyPtrsToElt,
+      VecElementArgument
     } Kind;
 
     union {
@@ -117,20 +117,22 @@ namespace Intrinsic {
       AK_AnyInteger,
       AK_AnyFloat,
       AK_AnyVector,
-      AK_AnyPointer
+      AK_AnyPointer,
+      AK_MatchType = 7
     };
 
     unsigned getArgumentNumber() const {
       assert(Kind == Argument || Kind == ExtendArgument ||
              Kind == TruncArgument || Kind == HalfVecArgument ||
              Kind == SameVecWidthArgument || Kind == PtrToArgument ||
-             Kind == PtrToElt);
+             Kind == PtrToElt || Kind == VecElementArgument);
       return Argument_Info >> 3;
     }
     ArgKind getArgumentKind() const {
       assert(Kind == Argument || Kind == ExtendArgument ||
              Kind == TruncArgument || Kind == HalfVecArgument ||
-             Kind == SameVecWidthArgument || Kind == PtrToArgument);
+             Kind == SameVecWidthArgument || Kind == PtrToArgument ||
+             Kind == VecElementArgument);
       return (ArgKind)(Argument_Info & 7);
     }
 
@@ -162,14 +164,21 @@ namespace Intrinsic {
   /// of IITDescriptors.
   void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl<IITDescriptor> &T);
 
-  /// Match the specified type (which comes from an intrinsic argument or return
-  /// value) with the type constraints specified by the .td file. If the given
-  /// type is an overloaded type it is pushed to the ArgTys vector.
+  enum MatchIntrinsicTypesResult {
+    MatchIntrinsicTypes_Match = 0,
+    MatchIntrinsicTypes_NoMatchRet = 1,
+    MatchIntrinsicTypes_NoMatchArg = 2,
+  };
+
+  /// Match the specified function type with the type constraints specified by
+  /// the .td file. If the given type is an overloaded type it is pushed to the
+  /// ArgTys vector.
   ///
   /// Returns false if the given type matches with the constraints, true
   /// otherwise.
-  bool matchIntrinsicType(Type *Ty, ArrayRef<IITDescriptor> &Infos,
-                          SmallVectorImpl<Type*> &ArgTys);
+  MatchIntrinsicTypesResult
+  matchIntrinsicSignature(FunctionType *FTy, ArrayRef<IITDescriptor> &Infos,
+                          SmallVectorImpl<Type *> &ArgTys);
 
   /// Verify if the intrinsic has variable arguments. This method is intended to
   /// be called after all the fixed arguments have been matched first.
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
index 64603d8ea030..d660f8278437 100644
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -1,9 +1,8 @@
 //===- Intrinsics.td - Defines all LLVM intrinsics ---------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -70,6 +69,11 @@ class Returned<int argNo> : IntrinsicProperty {
   int ArgNo = argNo;
 }
 
+// ImmArg - The specified argument must be an immediate.
+class ImmArg<int argNo> : IntrinsicProperty {
+  int ArgNo = argNo;
+}
+
 // ReadOnly - The specified argument pointer is not written to through the
 // pointer by the intrinsic.
 class ReadOnly<int argNo> : IntrinsicProperty {
@@ -90,6 +94,8 @@ class ReadNone<int argNo> : IntrinsicProperty {
 
 def IntrNoReturn : IntrinsicProperty;
 
+def IntrWillReturn : IntrinsicProperty;
+
 // IntrCold - Calls to this intrinsic are cold.
 // Parallels the cold attribute on LLVM IR functions.
 def IntrCold : IntrinsicProperty;
@@ -157,13 +163,19 @@ class LLVMMatchType<int num>
 // the intrinsic is overloaded, so the matched type should be declared as iAny.
 class LLVMExtendedType<int num> : LLVMMatchType<num>;
 class LLVMTruncatedType<int num> : LLVMMatchType<num>;
-class LLVMVectorSameWidth<int num, LLVMType elty>
-  : LLVMMatchType<num> {
+
+// Match the scalar/vector of another intrinsic parameter but with a different
+// element type. Either both are scalars or both are vectors with the same
+// number of elements.
+class LLVMScalarOrSameVectorWidth<int idx, LLVMType elty>
+  : LLVMMatchType<idx> {
   ValueType ElTy = elty.VT;
 }
+
 class LLVMPointerTo<int num> : LLVMMatchType<num>;
 class LLVMPointerToElt<int num> : LLVMMatchType<num>;
 class LLVMVectorOfAnyPointersToElt<int num> : LLVMMatchType<num>;
+class LLVMVectorElementType<int num> : LLVMMatchType<num>;
 
 // Match the type of another intrinsic parameter that is expected to be a
 // vector type, but change the element count to be half as many
@@ -251,6 +263,7 @@ def llvm_v2f32_ty      : LLVMType<v2f32>;    //  2 x float
 def llvm_v4f32_ty      : LLVMType<v4f32>;    //  4 x float
 def llvm_v8f32_ty      : LLVMType<v8f32>;    //  8 x float
 def llvm_v16f32_ty     : LLVMType<v16f32>;   // 16 x float
+def llvm_v32f32_ty     : LLVMType<v32f32>;   // 32 x float
 def llvm_v1f64_ty      : LLVMType<v1f64>;    //  1 x double
 def llvm_v2f64_ty      : LLVMType<v2f64>;    //  2 x double
 def llvm_v4f64_ty      : LLVMType<v4f64>;    //  4 x double
@@ -393,9 +406,9 @@ def int_objc_arc_annotation_bottomup_bbend  : Intrinsic<[],
 
 //===--------------------- Code Generator Intrinsics ----------------------===//
 //
-def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
 def int_addressofreturnaddress : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
-def int_frameaddress  : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_frameaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
 def int_sponentry  : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
 def int_read_register  : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty],
                                    [IntrReadMem], "llvm.read_register">;
@@ -413,7 +426,7 @@ def int_localescape : Intrinsic<[], [llvm_vararg_ty]>;
 // to an escaped allocation indicated by the index.
 def int_localrecover : Intrinsic<[llvm_ptr_ty],
                                  [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty],
-                                 [IntrNoMem]>;
+                                 [IntrNoMem, ImmArg<2>]>;
 
 // Given the frame pointer passed into an SEH filter function, returns a
 // pointer to the local variable area suitable for use with llvm.localrecover.
@@ -439,7 +452,8 @@ def int_thread_pointer : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>,
 // memory while not impeding optimization.
 def int_prefetch
     : Intrinsic<[], [ llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ],
-                [ IntrInaccessibleMemOrArgMemOnly, ReadOnly<0>, NoCapture<0> ]>;
+                [ IntrInaccessibleMemOrArgMemOnly, ReadOnly<0>, NoCapture<0>,
+                  ImmArg<1>, ImmArg<2>]>;
 def int_pcmarker      : Intrinsic<[], [llvm_i32_ty]>;
 
 def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>;
@@ -480,16 +494,17 @@ def int_memcpy  : Intrinsic<[],
                              [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
                               llvm_i1_ty],
                             [IntrArgMemOnly, NoCapture<0>, NoCapture<1>,
-                             WriteOnly<0>, ReadOnly<1>]>;
+                             WriteOnly<0>, ReadOnly<1>, ImmArg<3>]>;
 def int_memmove : Intrinsic<[],
                             [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
                              llvm_i1_ty],
                             [IntrArgMemOnly, NoCapture<0>, NoCapture<1>,
-                             ReadOnly<1>]>;
+                             ReadOnly<1>, ImmArg<3>]>;
 def int_memset  : Intrinsic<[],
                             [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty,
                              llvm_i1_ty],
-                            [IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
+                            [IntrArgMemOnly, NoCapture<0>, WriteOnly<0>,
+                            ImmArg<3>]>;
 
 // FIXME: Add version of these floating point intrinsics which allow non-default
 // rounding modes and FP exception handling.
@@ -527,6 +542,11 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
   def int_round : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
   def int_canonicalize : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>],
                                    [IntrNoMem]>;
+
+  def int_lround : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
+  def int_llround : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
+  def int_lrint : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
+  def int_llrint : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
 }
 
 def int_minnum : Intrinsic<[llvm_anyfloat_ty],
@@ -554,8 +574,9 @@ def int_siglongjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrNoReturn]>;
 
 // Internal interface for object size checking
 def int_objectsize : Intrinsic<[llvm_anyint_ty],
-                               [llvm_anyptr_ty, llvm_i1_ty, llvm_i1_ty],
-                               [IntrNoMem, IntrSpeculatable]>,
+                               [llvm_anyptr_ty, llvm_i1_ty,
+                                llvm_i1_ty, llvm_i1_ty],
+                               [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>, ImmArg<3>]>,
                                GCCBuiltin<"__builtin_object_size">;
 
 //===--------------- Constrained Floating Point Intrinsics ----------------===//
@@ -595,6 +616,15 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
                                                       llvm_metadata_ty,
                                                       llvm_metadata_ty ]>;
 
+  def int_experimental_constrained_fptrunc : Intrinsic<[ llvm_anyfloat_ty ],
+                                                       [ llvm_anyfloat_ty,
+                                                         llvm_metadata_ty,
+                                                         llvm_metadata_ty ]>;
+
+  def int_experimental_constrained_fpext : Intrinsic<[ llvm_anyfloat_ty ],
+                                                     [ llvm_anyfloat_ty,
+                                                       llvm_metadata_ty ]>;
+
   // These intrinsics are sensitive to the rounding mode so we need constrained
   // versions of each of them.  When strict rounding and exception control are
   // not required the non-constrained versions of these intrinsics should be
@@ -676,14 +706,12 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
                                                        llvm_metadata_ty,
                                                        llvm_metadata_ty ]>;
 }
-// FIXME: Add intrinsics for fcmp, fptrunc, fpext, fptoui and fptosi.
-// FIXME: Add intrinsics for fabs and copysign? 
-
+// FIXME: Add intrinsics for fcmp, fptoui and fptosi.
 
 //===------------------------- Expect Intrinsics --------------------------===//
 //
-def int_expect : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
-                                              LLVMMatchType<0>], [IntrNoMem]>;
+def int_expect : Intrinsic<[llvm_anyint_ty],
+  [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
 
 //===-------------------- Bit Manipulation Intrinsics ---------------------===//
 //
@@ -692,8 +720,6 @@ def int_expect : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
 let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
   def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
   def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
-  def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
-  def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
   def int_bitreverse : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
   def int_fshl : Intrinsic<[llvm_anyint_ty],
       [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
@@ -701,6 +727,11 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
       [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
 }
 
+let IntrProperties = [IntrNoMem, IntrSpeculatable, ImmArg<1>] in {
+  def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
+  def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
+}
+
 //===------------------------ Debugger Intrinsics -------------------------===//
 //
 
@@ -797,24 +828,30 @@ def int_adjust_trampoline : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty],
 //
 
 // Expose the carry flag from add operations on two integrals.
-def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty,
+                                        LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
                                        [LLVMMatchType<0>, LLVMMatchType<0>],
                                        [IntrNoMem, IntrSpeculatable]>;
-def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty,
+                                        LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
                                        [LLVMMatchType<0>, LLVMMatchType<0>],
                                        [IntrNoMem, IntrSpeculatable]>;
 
-def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty,
+                                        LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
                                        [LLVMMatchType<0>, LLVMMatchType<0>],
                                        [IntrNoMem, IntrSpeculatable]>;
-def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty,
+                                        LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
                                        [LLVMMatchType<0>, LLVMMatchType<0>],
                                        [IntrNoMem, IntrSpeculatable]>;
 
-def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty,
+                                        LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
                                        [LLVMMatchType<0>, LLVMMatchType<0>],
                                        [IntrNoMem, IntrSpeculatable]>;
-def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty,
+                                        LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
                                        [LLVMMatchType<0>, LLVMMatchType<0>],
                                        [IntrNoMem, IntrSpeculatable]>;
 
@@ -837,23 +874,33 @@ def int_usub_sat : Intrinsic<[llvm_anyint_ty],
 //
 def int_smul_fix : Intrinsic<[llvm_anyint_ty],
                              [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
-                             [IntrNoMem, IntrSpeculatable, Commutative]>;
+                             [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>;
+
+def int_umul_fix : Intrinsic<[llvm_anyint_ty],
+                             [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
+                             [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>;
+
+//===------------------- Fixed Point Saturation Arithmetic Intrinsics ----------------===//
+//
+def int_smul_fix_sat : Intrinsic<[llvm_anyint_ty],
+                                 [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
+                                 [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>;
 
 //===------------------------- Memory Use Markers -------------------------===//
 //
 def int_lifetime_start  : Intrinsic<[],
                                     [llvm_i64_ty, llvm_anyptr_ty],
-                                    [IntrArgMemOnly, NoCapture<1>]>;
+                                    [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>;
 def int_lifetime_end    : Intrinsic<[],
                                     [llvm_i64_ty, llvm_anyptr_ty],
-                                    [IntrArgMemOnly, NoCapture<1>]>;
+                                    [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>;
 def int_invariant_start : Intrinsic<[llvm_descriptor_ty],
                                     [llvm_i64_ty, llvm_anyptr_ty],
-                                    [IntrArgMemOnly, NoCapture<1>]>;
+                                    [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>;
 def int_invariant_end   : Intrinsic<[],
                                     [llvm_descriptor_ty, llvm_i64_ty,
                                      llvm_anyptr_ty],
-                                    [IntrArgMemOnly, NoCapture<2>]>;
+                                    [IntrArgMemOnly, NoCapture<2>, ImmArg<1>]>;
 
 // launder.invariant.group can't be marked with 'readnone' (IntrNoMem),
 // because it would cause CSE of two barriers with the same argument.
@@ -900,13 +947,13 @@ def int_experimental_gc_statepoint : Intrinsic<[llvm_token_ty],
                                [llvm_i64_ty, llvm_i32_ty,
                                 llvm_anyptr_ty, llvm_i32_ty,
                                 llvm_i32_ty, llvm_vararg_ty],
-                                [Throws]>;
+                                [Throws, ImmArg<0>, ImmArg<1>, ImmArg<3>, ImmArg<4>]>;
 
 def int_experimental_gc_result   : Intrinsic<[llvm_any_ty], [llvm_token_ty],
                                              [IntrReadMem]>;
 def int_experimental_gc_relocate : Intrinsic<[llvm_any_ty],
                                 [llvm_token_ty, llvm_i32_ty, llvm_i32_ty],
-                                [IntrReadMem]>;
+                                [IntrReadMem, ImmArg<1>, ImmArg<2>]>;
 
 //===------------------------ Coroutine Intrinsics ---------------===//
 // These are documented in docs/Coroutines.rst
@@ -996,41 +1043,41 @@ def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
 // Intrinsic to detect whether its argument is a constant.
 def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem], "llvm.is.constant">;
 
-
 //===-------------------------- Masked Intrinsics -------------------------===//
 //
 def int_masked_store : Intrinsic<[], [llvm_anyvector_ty,
                                       LLVMAnyPointerType<LLVMMatchType<0>>,
                                       llvm_i32_ty,
-                                      LLVMVectorSameWidth<0, llvm_i1_ty>],
-                                 [IntrArgMemOnly]>;
+                                      LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+                                 [IntrArgMemOnly, ImmArg<2>]>;
 
 def int_masked_load  : Intrinsic<[llvm_anyvector_ty],
                                  [LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty,
-                                  LLVMVectorSameWidth<0, llvm_i1_ty>, LLVMMatchType<0>],
-                                 [IntrReadMem, IntrArgMemOnly]>;
+                                  LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>],
+                                 [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>;
 
 def int_masked_gather: Intrinsic<[llvm_anyvector_ty],
                                  [LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
-                                  LLVMVectorSameWidth<0, llvm_i1_ty>,
+                                  LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
                                   LLVMMatchType<0>],
-                                 [IntrReadMem]>;
+                                 [IntrReadMem, ImmArg<1>]>;
 
 def int_masked_scatter: Intrinsic<[],
                                   [llvm_anyvector_ty,
                                    LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
-                                   LLVMVectorSameWidth<0, llvm_i1_ty>]>;
+                                   LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+                                   [ImmArg<2>]>;
 
 def int_masked_expandload: Intrinsic<[llvm_anyvector_ty],
                                      [LLVMPointerToElt<0>,
-                                      LLVMVectorSameWidth<0, llvm_i1_ty>,
+                                      LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
                                       LLVMMatchType<0>],
                                      [IntrReadMem]>;
 
 def int_masked_compressstore: Intrinsic<[],
                                      [llvm_anyvector_ty,
                                       LLVMPointerToElt<0>,
-                                      LLVMVectorSameWidth<0, llvm_i1_ty>],
+                                      LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
                                      [IntrArgMemOnly]>;
 
 // Test whether a pointer is associated with a type metadata identifier.
@@ -1049,6 +1096,9 @@ def int_icall_branch_funnel : Intrinsic<[], [llvm_vararg_ty], []>;
 def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty],
                                  [IntrReadMem, IntrArgMemOnly]>;
 
+def int_hwasan_check_memaccess :
+  Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], [IntrInaccessibleMemOnly, ImmArg<2>]>;
+
 // Xray intrinsics
 //===----------------------------------------------------------------------===//
 // Custom event logging for x-ray.
@@ -1072,7 +1122,7 @@ def int_memcpy_element_unordered_atomic
                 ],
                 [
                   IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>,
-                  ReadOnly<1>
+                  ReadOnly<1>, ImmArg<3>
                 ]>;
 
 // @llvm.memmove.element.unordered.atomic.*(dest, src, length, elementsize)
@@ -1083,62 +1133,105 @@ def int_memmove_element_unordered_atomic
                 ],
                 [
                   IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>,
-                  ReadOnly<1>
+                  ReadOnly<1>, ImmArg<3>
                 ]>;
 
 // @llvm.memset.element.unordered.atomic.*(dest, value, length, elementsize)
 def int_memset_element_unordered_atomic
     : Intrinsic<[], [ llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i32_ty ],
-                [ IntrArgMemOnly, NoCapture<0>, WriteOnly<0> ]>;
+                [ IntrArgMemOnly, NoCapture<0>, WriteOnly<0>, ImmArg<3> ]>;
 
 //===------------------------ Reduction Intrinsics ------------------------===//
 //
-def int_experimental_vector_reduce_fadd : Intrinsic<[llvm_anyfloat_ty],
-                                                    [llvm_anyfloat_ty,
-                                                     llvm_anyvector_ty],
-                                                    [IntrNoMem]>;
-def int_experimental_vector_reduce_fmul : Intrinsic<[llvm_anyfloat_ty],
-                                                    [llvm_anyfloat_ty,
-                                                     llvm_anyvector_ty],
-                                                    [IntrNoMem]>;
-def int_experimental_vector_reduce_add : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_v2_fadd : Intrinsic<[llvm_anyfloat_ty],
+                                                       [LLVMMatchType<0>,
+                                                        llvm_anyvector_ty],
+                                                       [IntrNoMem]>;
+def int_experimental_vector_reduce_v2_fmul : Intrinsic<[llvm_anyfloat_ty],
+                                                       [LLVMMatchType<0>,
+                                                        llvm_anyvector_ty],
+                                                       [IntrNoMem]>;
+def int_experimental_vector_reduce_add : Intrinsic<[LLVMVectorElementType<0>],
                                                    [llvm_anyvector_ty],
                                                    [IntrNoMem]>;
-def int_experimental_vector_reduce_mul : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_mul : Intrinsic<[LLVMVectorElementType<0>],
                                                    [llvm_anyvector_ty],
                                                    [IntrNoMem]>;
-def int_experimental_vector_reduce_and : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_and : Intrinsic<[LLVMVectorElementType<0>],
                                                    [llvm_anyvector_ty],
                                                    [IntrNoMem]>;
-def int_experimental_vector_reduce_or : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_or : Intrinsic<[LLVMVectorElementType<0>],
                                                   [llvm_anyvector_ty],
                                                   [IntrNoMem]>;
-def int_experimental_vector_reduce_xor : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_xor : Intrinsic<[LLVMVectorElementType<0>],
                                                    [llvm_anyvector_ty],
                                                    [IntrNoMem]>;
-def int_experimental_vector_reduce_smax : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_smax : Intrinsic<[LLVMVectorElementType<0>],
                                                     [llvm_anyvector_ty],
                                                     [IntrNoMem]>;
-def int_experimental_vector_reduce_smin : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_smin : Intrinsic<[LLVMVectorElementType<0>],
                                                     [llvm_anyvector_ty],
                                                     [IntrNoMem]>;
-def int_experimental_vector_reduce_umax : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_umax : Intrinsic<[LLVMVectorElementType<0>],
                                                     [llvm_anyvector_ty],
                                                     [IntrNoMem]>;
-def int_experimental_vector_reduce_umin : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_umin : Intrinsic<[LLVMVectorElementType<0>],
                                                     [llvm_anyvector_ty],
                                                     [IntrNoMem]>;
-def int_experimental_vector_reduce_fmax : Intrinsic<[llvm_anyfloat_ty],
+def int_experimental_vector_reduce_fmax : Intrinsic<[LLVMVectorElementType<0>],
                                                     [llvm_anyvector_ty],
                                                     [IntrNoMem]>;
-def int_experimental_vector_reduce_fmin : Intrinsic<[llvm_anyfloat_ty],
+def int_experimental_vector_reduce_fmin : Intrinsic<[LLVMVectorElementType<0>],
                                                     [llvm_anyvector_ty],
                                                     [IntrNoMem]>;
 
+//===---------- Intrinsics to control hardware supported loops ----------===//
+
+// Specify that the value given is the number of iterations that the next loop
+// will execute.
+def int_set_loop_iterations :
+  Intrinsic<[], [llvm_anyint_ty], [IntrNoDuplicate]>;
+
+// Specify that the value given is the number of iterations that the next loop
+// will execute. Also test that the given count is not zero, allowing it to
+// control entry to a 'while' loop.
+def int_test_set_loop_iterations :
+  Intrinsic<[llvm_i1_ty], [llvm_anyint_ty], [IntrNoDuplicate]>;
+
+// Decrement loop counter by the given argument. Return false if the loop
+// should exit.
+def int_loop_decrement :
+  Intrinsic<[llvm_i1_ty], [llvm_anyint_ty], [IntrNoDuplicate]>;
+
+// Decrement the first operand (the loop counter) by the second operand (the
+// maximum number of elements processed in an iteration). Return the remaining
+// number of iterations still to be executed. This is effectively a sub which
+// can be used with a phi, icmp and br to control the number of iterations
+// executed, as usual.
+def int_loop_decrement_reg :
+  Intrinsic<[llvm_anyint_ty],
+            [llvm_anyint_ty, llvm_anyint_ty], [IntrNoDuplicate]>;
+
 //===----- Intrinsics that are used to provide predicate information -----===//
 
 def int_ssa_copy : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>],
                              [IntrNoMem, Returned<0>]>;
+
+//===------- Intrinsics that are used to preserve debug information -------===//
+
+def int_preserve_array_access_index : Intrinsic<[llvm_anyptr_ty],
+                                                [llvm_anyptr_ty, llvm_i32_ty,
+                                                 llvm_i32_ty],
+                                                [IntrNoMem, ImmArg<1>, ImmArg<2>]>;
+def int_preserve_union_access_index : Intrinsic<[llvm_anyptr_ty],
+                                                [llvm_anyptr_ty, llvm_i32_ty],
+                                                [IntrNoMem, ImmArg<1>]>;
+def int_preserve_struct_access_index : Intrinsic<[llvm_anyptr_ty],
+                                                 [llvm_anyptr_ty, llvm_i32_ty,
+                                                  llvm_i32_ty],
+                                                 [IntrNoMem, ImmArg<1>,
+                                                  ImmArg<2>]>;
+
 //===----------------------------------------------------------------------===//
 // Target-specific intrinsics
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td
index ff25750fe399..832aca4fd30f 100644
--- a/include/llvm/IR/IntrinsicsAArch64.td
+++ b/include/llvm/IR/IntrinsicsAArch64.td
@@ -1,9 +1,8 @@
 //===- IntrinsicsAARCH64.td - Defines AARCH64 intrinsics ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -32,6 +31,8 @@ def int_aarch64_sdiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
 def int_aarch64_udiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
                                 LLVMMatchType<0>], [IntrNoMem]>;
 
+def int_aarch64_fjcvtzs : Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+
 //===----------------------------------------------------------------------===//
 // HINT
 
@@ -290,6 +291,7 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
 
   // Pairwise Add
   def int_aarch64_neon_addp : AdvSIMD_2VectorArg_Intrinsic;
+  def int_aarch64_neon_faddp : AdvSIMD_2VectorArg_Intrinsic;
 
   // Long Pairwise Add
   // FIXME: In theory, we shouldn't need intrinsics for saddlp or
@@ -462,12 +464,12 @@ let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
                 [IntrArgMemOnly, NoCapture<2>]>;
 
   class AdvSIMD_2Vec_Load_Intrinsic
-    : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+    : Intrinsic<[LLVMMatchType<0>, llvm_anyvector_ty],
                 [LLVMAnyPointerType<LLVMMatchType<0>>],
                 [IntrReadMem, IntrArgMemOnly]>;
   class AdvSIMD_2Vec_Load_Lane_Intrinsic
-    : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
-                [LLVMMatchType<0>, LLVMMatchType<0>,
+    : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>],
+                [LLVMMatchType<0>, llvm_anyvector_ty,
                  llvm_i64_ty, llvm_anyptr_ty],
                 [IntrReadMem, IntrArgMemOnly]>;
   class AdvSIMD_2Vec_Store_Intrinsic
@@ -480,12 +482,12 @@ let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
                 [IntrArgMemOnly, NoCapture<3>]>;
 
   class AdvSIMD_3Vec_Load_Intrinsic
-    : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
+    : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
                 [LLVMAnyPointerType<LLVMMatchType<0>>],
                 [IntrReadMem, IntrArgMemOnly]>;
   class AdvSIMD_3Vec_Load_Lane_Intrinsic
-    : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
-                [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
+    : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+                [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty,
                  llvm_i64_ty, llvm_anyptr_ty],
                 [IntrReadMem, IntrArgMemOnly]>;
   class AdvSIMD_3Vec_Store_Intrinsic
@@ -499,15 +501,15 @@ let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
                 [IntrArgMemOnly, NoCapture<4>]>;
 
   class AdvSIMD_4Vec_Load_Intrinsic
-    : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
-                 LLVMMatchType<0>, LLVMMatchType<0>],
+    : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>,
+                 LLVMMatchType<0>, llvm_anyvector_ty],
                 [LLVMAnyPointerType<LLVMMatchType<0>>],
                 [IntrReadMem, IntrArgMemOnly]>;
   class AdvSIMD_4Vec_Load_Lane_Intrinsic
-    : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+    : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>,
                  LLVMMatchType<0>, LLVMMatchType<0>],
                 [LLVMMatchType<0>, LLVMMatchType<0>,
-                 LLVMMatchType<0>, LLVMMatchType<0>,
+                 LLVMMatchType<0>, llvm_anyvector_ty,
                  llvm_i64_ty, llvm_anyptr_ty],
                 [IntrReadMem, IntrArgMemOnly]>;
   class AdvSIMD_4Vec_Store_Intrinsic
@@ -684,3 +686,50 @@ def int_aarch64_crc32x  : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
 def int_aarch64_crc32cx : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
     [IntrNoMem]>;
 }
+
+//===----------------------------------------------------------------------===//
+// Memory Tagging Extensions (MTE) Intrinsics
+let TargetPrefix = "aarch64" in {
+def int_aarch64_irg   : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty],
+    [IntrInaccessibleMemOnly]>;
+def int_aarch64_addg  : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty],
+    [IntrNoMem]>;
+def int_aarch64_gmi   : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty],
+    [IntrNoMem]>;
+def int_aarch64_ldg   : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty],
+    [IntrReadMem]>;
+def int_aarch64_stg   : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
+    [IntrWriteMem]>;
+def int_aarch64_subp :  Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty],
+    [IntrNoMem]>;
+
+// The following are codegen-only intrinsics for stack instrumentation.
+
+// Generate a randomly tagged stack base pointer.
+def int_aarch64_irg_sp   : Intrinsic<[llvm_ptr_ty], [llvm_i64_ty],
+    [IntrInaccessibleMemOnly]>;
+
+// Transfer pointer tag with offset.
+// ptr1 = tagp(ptr0, baseptr, tag_offset) returns a pointer where
+// * address is the address in ptr0
+// * tag is a function of (tag in baseptr, tag_offset).
+// Address bits in baseptr and tag bits in ptr0 are ignored.
+// When offset between ptr0 and baseptr is a compile time constant, this can be emitted as
+//   ADDG ptr1, baseptr, (ptr0 - baseptr), tag_offset
+// It is intended that ptr0 is an alloca address, and baseptr is the direct output of llvm.aarch64.irg.sp.
+def int_aarch64_tagp : Intrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_ptr_ty, llvm_i64_ty],
+    [IntrNoMem, ImmArg<2>]>;
+
+// Update allocation tags for the memory range to match the tag in the pointer argument.
+def int_aarch64_settag  : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty],
+    [IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
+
+// Update allocation tags for the memory range to match the tag in the pointer argument,
+// and set memory contents to zero.
+def int_aarch64_settag_zero  : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty],
+    [IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
+
+// Update allocation tags for 16-aligned, 16-sized memory region, and store a pair 8-byte values.
+def int_aarch64_stgp  : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty],
+    [IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
+}
diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td
index 7913ce828fbc..3982444b5401 100644
--- a/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1,9 +1,8 @@
 //===- IntrinsicsAMDGPU.td - Defines AMDGPU intrinsics -----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -178,7 +177,7 @@ def int_amdgcn_implicit_buffer_ptr :
 // This is always moved to the beginning of the basic block.
 def int_amdgcn_init_exec : Intrinsic<[],
   [llvm_i64_ty],      // 64-bit literal constant
-  [IntrConvergent]>;
+  [IntrConvergent, ImmArg<0>]>;
 
 // Set EXEC according to a thread count packed in an SGPR input:
 //    thread_count = (input >> bitoffset) & 0x7f;
@@ -188,6 +187,10 @@ def int_amdgcn_init_exec_from_input : Intrinsic<[],
    llvm_i32_ty],      // bit offset of the thread count
   [IntrConvergent]>;
 
+def int_amdgcn_wavefrontsize :
+  GCCBuiltin<"__builtin_amdgcn_wavefrontsize">,
+  Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
+
 
 //===----------------------------------------------------------------------===//
 // Instruction Intrinsics
@@ -196,9 +199,9 @@ def int_amdgcn_init_exec_from_input : Intrinsic<[],
 // The first parameter is s_sendmsg immediate (i16),
 // the second one is copied to m0
 def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
-  Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
+  Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, IntrInaccessibleMemOnly]>;
 def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
-  Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
+  Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, IntrInaccessibleMemOnly]>;
 
 def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
   Intrinsic<[], [], [IntrConvergent]>;
@@ -207,7 +210,7 @@ def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">,
   Intrinsic<[], [], [IntrConvergent]>;
 
 def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">,
-  Intrinsic<[], [llvm_i32_ty], []>;
+  Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]>;
 
 def int_amdgcn_div_scale : Intrinsic<
   // 1st parameter: Numerator
@@ -216,7 +219,7 @@ def int_amdgcn_div_scale : Intrinsic<
   //                second. (0 = first, 1 = second).
   [llvm_anyfloat_ty, llvm_i1_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
-  [IntrNoMem, IntrSpeculatable]
+  [IntrNoMem, IntrSpeculatable, ImmArg<2>]
 >;
 
 def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty],
@@ -293,29 +296,33 @@ def int_amdgcn_fract : Intrinsic<
   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
 >;
 
-def int_amdgcn_cvt_pkrtz : Intrinsic<
-  [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty],
-  [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_cvt_pkrtz : GCCBuiltin<"__builtin_amdgcn_cvt_pkrtz">,
+  Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty],
+            [IntrNoMem, IntrSpeculatable]
 >;
 
-def int_amdgcn_cvt_pknorm_i16 : Intrinsic<
-  [llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
-  [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_cvt_pknorm_i16 :
+  GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_i16">,
+  Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
+            [IntrNoMem, IntrSpeculatable]
 >;
 
-def int_amdgcn_cvt_pknorm_u16 : Intrinsic<
-  [llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
-  [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_cvt_pknorm_u16 :
+  GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_u16">,
+  Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
+            [IntrNoMem, IntrSpeculatable]
 >;
 
-def int_amdgcn_cvt_pk_i16 : Intrinsic<
+def int_amdgcn_cvt_pk_i16 :
+    GCCBuiltin<"__builtin_amdgcn_cvt_pk_i16">,
+    Intrinsic<
   [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
   [IntrNoMem, IntrSpeculatable]
 >;
 
-def int_amdgcn_cvt_pk_u16 : Intrinsic<
-  [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
-  [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_cvt_pk_u16 : GCCBuiltin<"__builtin_amdgcn_cvt_pk_u16">,
+  Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
+    [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_class : Intrinsic<
@@ -374,7 +381,7 @@ class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
   llvm_i32_ty, // ordering
   llvm_i32_ty, // scope
   llvm_i1_ty], // isVolatile
-  [IntrArgMemOnly, NoCapture<0>], "",
+  [IntrArgMemOnly, NoCapture<0>, ImmArg<2>, ImmArg<3>, ImmArg<4>], "",
   [SDNPMemOperand]
 >;
 
@@ -389,9 +396,45 @@ class AMDGPULDSF32Intrin<string clang_builtin> :
     llvm_i32_ty, // ordering
     llvm_i32_ty, // scope
     llvm_i1_ty], // isVolatile
-    [IntrArgMemOnly, NoCapture<0>]
+    [IntrArgMemOnly, NoCapture<0>, ImmArg<2>, ImmArg<3>, ImmArg<4>]
+>;
+
+// FIXME: The m0 argument should be moved after the normal arguments
+class AMDGPUDSOrderedIntrinsic : Intrinsic<
+  [llvm_i32_ty],
+  // M0 = {hi16:address, lo16:waveID}. Allow passing M0 as a pointer, so that
+  // the bit packing can be optimized at the IR level.
+  [LLVMQualPointerType<llvm_i32_ty, 2>, // IntToPtr(M0)
+   llvm_i32_ty, // value to add or swap
+   llvm_i32_ty, // ordering
+   llvm_i32_ty, // scope
+   llvm_i1_ty,  // isVolatile
+   llvm_i32_ty, // ordered count index (OA index), also added to the address
+                // gfx10: bits 24-27 indicate the number of active threads/dwords
+   llvm_i1_ty,  // wave release, usually set to 1
+   llvm_i1_ty], // wave done, set to 1 for the last ordered instruction
+  [NoCapture<0>,
+   ImmArg<2>, ImmArg<3>, ImmArg<4>,
+   ImmArg<5>, ImmArg<6>, ImmArg<7>
+  ]
+>;
+
+class AMDGPUDSAppendConsumedIntrinsic : Intrinsic<
+  [llvm_i32_ty],
+  [llvm_anyptr_ty, // LDS or GDS ptr
+   llvm_i1_ty], // isVolatile
+   [IntrConvergent, IntrArgMemOnly, NoCapture<0>, ImmArg<1>],
+   "",
+   [SDNPMemOperand]
 >;
 
+def int_amdgcn_ds_ordered_add : AMDGPUDSOrderedIntrinsic;
+def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
+
+// The pointer argument is assumed to be dynamically uniform if a VGPR.
+def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
+def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;
+
 def int_amdgcn_ds_fadd : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_faddf">;
 def int_amdgcn_ds_fmin : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fminf">;
 def int_amdgcn_ds_fmax : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fmaxf">;
@@ -442,9 +485,12 @@ class arglistconcat<list<list<AMDGPUArg>> arglists, int shift = 0> {
 }
 
 // Represent texture/image types / dimensionality.
-class AMDGPUDimProps<string name, list<string> coord_names, list<string> slice_names> {
+class AMDGPUDimProps<bits<3> enc, string name, string asmsuffix,
+                     list<string> coord_names, list<string> slice_names> {
   AMDGPUDimProps Dim = !cast<AMDGPUDimProps>(NAME);
   string Name = name; // e.g. "2darraymsaa"
+  string AsmSuffix = asmsuffix; // e.g. 2D_MSAA_ARRAY (used in assembly strings)
+  bits<3> Encoding = enc;
   bit DA = 0; // DA bit in MIMG encoding
 
   list<AMDGPUArg> CoordSliceArgs =
@@ -460,17 +506,17 @@ class AMDGPUDimProps<string name, list<string> coord_names, list<string> slice_n
   bits<8> NumGradients = !size(GradientArgs);
 }
 
-def AMDGPUDim1D : AMDGPUDimProps<"1d", ["s"], []>;
-def AMDGPUDim2D : AMDGPUDimProps<"2d", ["s", "t"], []>;
-def AMDGPUDim3D : AMDGPUDimProps<"3d", ["s", "t", "r"], []>;
+def AMDGPUDim1D : AMDGPUDimProps<0x0, "1d", "1D", ["s"], []>;
+def AMDGPUDim2D : AMDGPUDimProps<0x1, "2d", "2D", ["s", "t"], []>;
+def AMDGPUDim3D : AMDGPUDimProps<0x2, "3d", "3D", ["s", "t", "r"], []>;
 let DA = 1 in {
-  def AMDGPUDimCube : AMDGPUDimProps<"cube", ["s", "t"], ["face"]>;
-  def AMDGPUDim1DArray : AMDGPUDimProps<"1darray", ["s"], ["slice"]>;
-  def AMDGPUDim2DArray : AMDGPUDimProps<"2darray", ["s", "t"], ["slice"]>;
+  def AMDGPUDimCube : AMDGPUDimProps<0x3, "cube", "CUBE", ["s", "t"], ["face"]>;
+  def AMDGPUDim1DArray : AMDGPUDimProps<0x4, "1darray", "1D_ARRAY", ["s"], ["slice"]>;
+  def AMDGPUDim2DArray : AMDGPUDimProps<0x5, "2darray", "2D_ARRAY", ["s", "t"], ["slice"]>;
 }
-def AMDGPUDim2DMsaa : AMDGPUDimProps<"2dmsaa", ["s", "t"], ["fragid"]>;
+def AMDGPUDim2DMsaa : AMDGPUDimProps<0x6, "2dmsaa", "2D_MSAA", ["s", "t"], ["fragid"]>;
 let DA = 1 in {
-  def AMDGPUDim2DArrayMsaa : AMDGPUDimProps<"2darraymsaa", ["s", "t"], ["slice", "fragid"]>;
+  def AMDGPUDim2DArrayMsaa : AMDGPUDimProps<0x7, "2darraymsaa", "2D_MSAA_ARRAY", ["s", "t"], ["slice", "fragid"]>;
 }
 
 def AMDGPUDims {
@@ -621,6 +667,19 @@ class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> : AMDGPUDimProfile<"GET_RES
   let LodClampMip = "mip";
 }
 
+// Helper class for figuring out image intrinsic argument indexes.
+class AMDGPUImageDimIntrinsicEval<AMDGPUDimProfile P_> {
+  int NumDataArgs = !size(P_.DataArgs);
+  int NumDmaskArgs = !if(P_.IsAtomic, 0, 1);
+  int NumVAddrArgs = !size(P_.AddrArgs);
+  int NumRSrcArgs = 1;
+  int NumSampArgs = !if(P_.IsSample, 2, 0);
+  int DmaskArgIndex = NumDataArgs;
+  int UnormArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, 1);
+  int TexFailCtrlArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, NumSampArgs);
+  int CachePolicyArgIndex = !add(TexFailCtrlArgIndex, 1);
+}
+
 // All dimension-aware intrinsics are derived from this class.
 class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
                               list<IntrinsicProperty> props,
@@ -634,8 +693,13 @@ class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
       !if(P_.IsSample, [llvm_v4i32_ty,           // samp(SGPR)
                         llvm_i1_ty], []),        // unorm(imm)
       [llvm_i32_ty,                              // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe)
-       llvm_i32_ty]),                            // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
-      props, "", sdnodeprops>,
+       llvm_i32_ty]),                            // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc)
+     !listconcat(props,
+          !if(P_.IsAtomic, [], [ImmArg<AMDGPUImageDimIntrinsicEval<P_>.DmaskArgIndex>]),
+          !if(P_.IsSample, [ImmArg<AMDGPUImageDimIntrinsicEval<P_>.UnormArgIndex>], []),
+          [ImmArg<AMDGPUImageDimIntrinsicEval<P_>.TexFailCtrlArgIndex>,
+           ImmArg<AMDGPUImageDimIntrinsicEval<P_>.CachePolicyArgIndex>]),
+      "", sdnodeprops>,
   AMDGPURsrcIntrinsic<!add(!size(P_.DataArgs), !size(P_.AddrTypes),
                            !if(P_.IsAtomic, 0, 1)), 1> {
   AMDGPUDimProfile P = P_;
@@ -791,13 +855,13 @@ let TargetPrefix = "amdgcn" in {
 defset list<AMDGPURsrcIntrinsic> AMDGPUBufferIntrinsics = {
 
 class AMDGPUBufferLoad : Intrinsic <
-  [llvm_anyfloat_ty],
+  [llvm_any_ty],
   [llvm_v4i32_ty,     // rsrc(SGPR)
    llvm_i32_ty,       // vindex(VGPR)
    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    llvm_i1_ty,        // glc(imm)
    llvm_i1_ty],       // slc(imm)
-  [IntrReadMem], "", [SDNPMemOperand]>,
+  [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<0>;
 def int_amdgcn_buffer_load_format : AMDGPUBufferLoad;
 def int_amdgcn_buffer_load : AMDGPUBufferLoad;
@@ -805,20 +869,20 @@ def int_amdgcn_buffer_load : AMDGPUBufferLoad;
 def int_amdgcn_s_buffer_load : Intrinsic <
   [llvm_any_ty],
   [llvm_v4i32_ty,     // rsrc(SGPR)
-   llvm_i32_ty,       // byte offset(SGPR/VGPR/imm)
-   llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc)
-  [IntrNoMem]>,
+   llvm_i32_ty,       // byte offset(SGPR/imm)
+   llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc, bit 2 = dlc)
+  [IntrNoMem, ImmArg<2>]>,
   AMDGPURsrcIntrinsic<0>;
 
 class AMDGPUBufferStore : Intrinsic <
   [],
-  [llvm_anyfloat_ty,  // vdata(VGPR) -- can currently only select f32, v2f32, v4f32
+  [llvm_any_ty,       // vdata(VGPR)
    llvm_v4i32_ty,     // rsrc(SGPR)
    llvm_i32_ty,       // vindex(VGPR)
    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    llvm_i1_ty,        // glc(imm)
    llvm_i1_ty],       // slc(imm)
-  [IntrWriteMem], "", [SDNPMemOperand]>,
+  [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<1>;
 def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
 def int_amdgcn_buffer_store : AMDGPUBufferStore;
@@ -835,8 +899,8 @@ class AMDGPURawBufferLoad : Intrinsic <
   [llvm_v4i32_ty,     // rsrc(SGPR)
    llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
    llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
-   llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
-  [IntrReadMem], "", [SDNPMemOperand]>,
+   llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+  [IntrReadMem, ImmArg<3>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<0>;
 def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad;
 def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad;
@@ -847,8 +911,8 @@ class AMDGPUStructBufferLoad : Intrinsic <
    llvm_i32_ty,       // vindex(VGPR)
    llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
    llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
-   llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
-  [IntrReadMem], "", [SDNPMemOperand]>,
+   llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+  [IntrReadMem, ImmArg<4>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<0>;
 def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad;
 def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad;
@@ -859,8 +923,8 @@ class AMDGPURawBufferStore : Intrinsic <
    llvm_v4i32_ty,     // rsrc(SGPR)
    llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
    llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
-   llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
-  [IntrWriteMem], "", [SDNPMemOperand]>,
+   llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+  [IntrWriteMem, ImmArg<4>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<1>;
 def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore;
 def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore;
@@ -872,8 +936,8 @@ class AMDGPUStructBufferStore : Intrinsic <
    llvm_i32_ty,       // vindex(VGPR)
    llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
    llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
-   llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
-  [IntrWriteMem], "", [SDNPMemOperand]>,
+   llvm_i32_ty],      // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+  [IntrWriteMem, ImmArg<5>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<1>;
 def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore;
 def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore;
@@ -885,7 +949,7 @@ class AMDGPURawBufferAtomic : Intrinsic <
    llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
    llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
    llvm_i32_ty],      // cachepolicy(imm; bit 1 = slc)
-  [], "", [SDNPMemOperand]>,
+  [ImmArg<4>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<1, 0>;
 def int_amdgcn_raw_buffer_atomic_swap : AMDGPURawBufferAtomic;
 def int_amdgcn_raw_buffer_atomic_add : AMDGPURawBufferAtomic;
@@ -905,7 +969,7 @@ def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
    llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
    llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
    llvm_i32_ty],      // cachepolicy(imm; bit 1 = slc)
-  [], "", [SDNPMemOperand]>,
+  [ImmArg<5>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<2, 0>;
 
 class AMDGPUStructBufferAtomic : Intrinsic <
@@ -916,7 +980,7 @@ class AMDGPUStructBufferAtomic : Intrinsic <
    llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
    llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
    llvm_i32_ty],      // cachepolicy(imm; bit 1 = slc)
-  [], "", [SDNPMemOperand]>,
+  [ImmArg<5>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<1, 0>;
 def int_amdgcn_struct_buffer_atomic_swap : AMDGPUStructBufferAtomic;
 def int_amdgcn_struct_buffer_atomic_add : AMDGPUStructBufferAtomic;
@@ -937,7 +1001,7 @@ def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic<
    llvm_i32_ty,       // offset(VGPR/imm, included in bounds checking and swizzling)
    llvm_i32_ty,       // soffset(SGPR/imm, excluded from bounds checking and swizzling)
    llvm_i32_ty],      // cachepolicy(imm; bit 1 = slc)
-  [], "", [SDNPMemOperand]>,
+  [ImmArg<6>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<2, 0>;
 
 // Obsolescent tbuffer intrinsics.
@@ -952,7 +1016,8 @@ def int_amdgcn_tbuffer_load : Intrinsic <
      llvm_i32_ty,     // nfmt(imm)
      llvm_i1_ty,     // glc(imm)
      llvm_i1_ty],    // slc(imm)
-    [IntrReadMem], "", [SDNPMemOperand]>,
+    [IntrReadMem, ImmArg<4>, ImmArg<5>, ImmArg<6>,
+     ImmArg<7>, ImmArg<8>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<0>;
 
 def int_amdgcn_tbuffer_store : Intrinsic <
@@ -967,7 +1032,8 @@ def int_amdgcn_tbuffer_store : Intrinsic <
      llvm_i32_ty,    // nfmt(imm)
      llvm_i1_ty,     // glc(imm)
      llvm_i1_ty],    // slc(imm)
-    [IntrWriteMem], "", [SDNPMemOperand]>,
+    [IntrWriteMem, ImmArg<5>, ImmArg<6>, ImmArg<7>,
+     ImmArg<8>, ImmArg<9>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<1>;
 
 // New tbuffer intrinsics, with:
@@ -980,8 +1046,8 @@ def int_amdgcn_raw_tbuffer_load : Intrinsic <
      llvm_i32_ty,     // offset(VGPR/imm, included in bounds checking and swizzling)
      llvm_i32_ty,     // soffset(SGPR/imm, excluded from bounds checking and swizzling)
      llvm_i32_ty,     // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
-     llvm_i32_ty],    // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
-    [IntrReadMem], "", [SDNPMemOperand]>,
+     llvm_i32_ty],    // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+    [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<0>;
 
 def int_amdgcn_raw_tbuffer_store : Intrinsic <
@@ -991,8 +1057,8 @@ def int_amdgcn_raw_tbuffer_store : Intrinsic <
      llvm_i32_ty,    // offset(VGPR/imm, included in bounds checking and swizzling)
      llvm_i32_ty,    // soffset(SGPR/imm, excluded from bounds checking and swizzling)
      llvm_i32_ty,    // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
-     llvm_i32_ty],   // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
-    [IntrWriteMem], "", [SDNPMemOperand]>,
+     llvm_i32_ty],   // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+    [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<1>;
 
 def int_amdgcn_struct_tbuffer_load : Intrinsic <
@@ -1002,8 +1068,8 @@ def int_amdgcn_struct_tbuffer_load : Intrinsic <
      llvm_i32_ty,     // offset(VGPR/imm, included in bounds checking and swizzling)
      llvm_i32_ty,     // soffset(SGPR/imm, excluded from bounds checking and swizzling)
      llvm_i32_ty,     // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
-     llvm_i32_ty],    // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
-    [IntrReadMem], "", [SDNPMemOperand]>,
+     llvm_i32_ty],    // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+    [IntrReadMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<0>;
 
 def int_amdgcn_struct_tbuffer_store : Intrinsic <
@@ -1014,18 +1080,18 @@ def int_amdgcn_struct_tbuffer_store : Intrinsic <
      llvm_i32_ty,    // offset(VGPR/imm, included in bounds checking and swizzling)
      llvm_i32_ty,    // soffset(SGPR/imm, excluded from bounds checking and swizzling)
      llvm_i32_ty,    // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
-     llvm_i32_ty],   // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
-    [IntrWriteMem], "", [SDNPMemOperand]>,
+     llvm_i32_ty],   // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+    [IntrWriteMem, ImmArg<5>, ImmArg<6>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<1>;
 
 class AMDGPUBufferAtomic : Intrinsic <
-  [llvm_i32_ty],
-  [llvm_i32_ty,       // vdata(VGPR)
+  [llvm_anyint_ty],
+  [LLVMMatchType<0>,       // vdata(VGPR)
    llvm_v4i32_ty,     // rsrc(SGPR)
    llvm_i32_ty,       // vindex(VGPR)
    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    llvm_i1_ty],       // slc(imm)
-  [], "", [SDNPMemOperand]>,
+  [ImmArg<4>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<1, 0>;
 def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic;
 def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic;
@@ -1045,7 +1111,7 @@ def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
    llvm_i32_ty,       // vindex(VGPR)
    llvm_i32_ty,       // offset(SGPR/VGPR/imm)
    llvm_i1_ty],       // slc(imm)
-  [], "", [SDNPMemOperand]>,
+  [ImmArg<5>], "", [SDNPMemOperand]>,
   AMDGPURsrcIntrinsic<2, 0>;
 
 } // defset AMDGPUBufferIntrinsics
@@ -1062,7 +1128,7 @@ def int_amdgcn_exp : Intrinsic <[], [
   llvm_i1_ty,        // done
   llvm_i1_ty         // vm
   ],
-  []
+  [ImmArg<0>, ImmArg<1>, ImmArg<6>, ImmArg<7>, IntrInaccessibleMemOnly]
 >;
 
 // exp with compr bit set.
@@ -1073,7 +1139,7 @@ def int_amdgcn_exp_compr : Intrinsic <[], [
   LLVMMatchType<0>,  // src1
   llvm_i1_ty,        // done
   llvm_i1_ty],       // vm
-  []
+  [ImmArg<0>, ImmArg<1>, ImmArg<4>, ImmArg<5>, IntrInaccessibleMemOnly]
 >;
 
 def int_amdgcn_buffer_wbinvl1_sc :
@@ -1090,27 +1156,27 @@ def int_amdgcn_s_dcache_inv :
 
 def int_amdgcn_s_memtime :
   GCCBuiltin<"__builtin_amdgcn_s_memtime">,
-  Intrinsic<[llvm_i64_ty], [], [IntrReadMem]>;
+  Intrinsic<[llvm_i64_ty], []>;
 
 def int_amdgcn_s_sleep :
   GCCBuiltin<"__builtin_amdgcn_s_sleep">,
-  Intrinsic<[], [llvm_i32_ty], []> {
+  Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> {
 }
 
 def int_amdgcn_s_incperflevel :
   GCCBuiltin<"__builtin_amdgcn_s_incperflevel">,
-  Intrinsic<[], [llvm_i32_ty], []> {
+  Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> {
 }
 
 def int_amdgcn_s_decperflevel :
   GCCBuiltin<"__builtin_amdgcn_s_decperflevel">,
-  Intrinsic<[], [llvm_i32_ty], []> {
+  Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> {
 }
 
 def int_amdgcn_s_getreg :
   GCCBuiltin<"__builtin_amdgcn_s_getreg">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
-  [IntrReadMem, IntrSpeculatable]
+  [IntrInaccessibleMemOnly, IntrReadMem, IntrSpeculatable, ImmArg<0>]
 >;
 
 // int_amdgcn_s_getpc is provided to allow a specific style of position
@@ -1129,7 +1195,7 @@ def int_amdgcn_interp_mov :
   GCCBuiltin<"__builtin_amdgcn_interp_mov">,
   Intrinsic<[llvm_float_ty],
             [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-            [IntrNoMem, IntrSpeculatable]>;
+            [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>]>;
 
 // __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0>
 // This intrinsic reads from lds, but the memory values are constant,
@@ -1138,16 +1204,30 @@ def int_amdgcn_interp_p1 :
   GCCBuiltin<"__builtin_amdgcn_interp_p1">,
   Intrinsic<[llvm_float_ty],
             [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-            [IntrNoMem, IntrSpeculatable]>;
+            [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>]>;
 
 // __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0>
 def int_amdgcn_interp_p2 :
   GCCBuiltin<"__builtin_amdgcn_interp_p2">,
   Intrinsic<[llvm_float_ty],
             [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-            [IntrNoMem, IntrSpeculatable]>;
+            [IntrNoMem, IntrSpeculatable, ImmArg<2>, ImmArg<3>]>;
           // See int_amdgcn_v_interp_p1 for why this is IntrNoMem.
 
+// __builtin_amdgcn_interp_p1_f16 <i>, <attr_chan>, <attr>, <high>, <m0>
+def int_amdgcn_interp_p1_f16 :
+  GCCBuiltin<"__builtin_amdgcn_interp_p1_f16">,
+  Intrinsic<[llvm_float_ty],
+            [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty],
+            [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>, ImmArg<3>]>;
+
+// __builtin_amdgcn_interp_p2_f16 <p1>, <j>, <attr_chan>, <attr>, <high>, <m0>
+def int_amdgcn_interp_p2_f16 :
+  GCCBuiltin<"__builtin_amdgcn_interp_p2_f16">,
+  Intrinsic<[llvm_half_ty],
+            [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty],
+            [IntrNoMem, IntrSpeculatable, ImmArg<2>, ImmArg<3>, ImmArg<4>]>;
+
 // Pixel shaders only: whether the current pixel is live (i.e. not a helper
 // invocation for derivative computation).
 def int_amdgcn_ps_live : Intrinsic <
@@ -1166,16 +1246,17 @@ def int_amdgcn_mbcnt_hi :
 // llvm.amdgcn.ds.swizzle src offset
 def int_amdgcn_ds_swizzle :
   GCCBuiltin<"__builtin_amdgcn_ds_swizzle">,
-  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+            [IntrNoMem, IntrConvergent, ImmArg<1>]>;
 
 def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty],
-  [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
-  [IntrNoMem, IntrSpeculatable]
+    [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
+    [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty],
-  [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
-  [IntrNoMem, IntrSpeculatable]
+    [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
+    [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_lerp :
@@ -1233,12 +1314,12 @@ def int_amdgcn_cvt_pk_u8_f32 :
 >;
 
 def int_amdgcn_icmp :
-  Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty],
-            [IntrNoMem, IntrConvergent]>;
+  Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty, LLVMMatchType<1>, llvm_i32_ty],
+            [IntrNoMem, IntrConvergent, ImmArg<2>]>;
 
 def int_amdgcn_fcmp :
-  Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty],
-            [IntrNoMem, IntrConvergent]>;
+  Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>, llvm_i32_ty],
+            [IntrNoMem, IntrConvergent, ImmArg<2>]>;
 
 def int_amdgcn_readfirstlane :
   GCCBuiltin<"__builtin_amdgcn_readfirstlane">,
@@ -1263,16 +1344,86 @@ def int_amdgcn_writelane :
   [IntrNoMem, IntrConvergent]
 >;
 
-def int_amdgcn_alignbit : Intrinsic<[llvm_i32_ty],
+def int_amdgcn_alignbit :
+  GCCBuiltin<"__builtin_amdgcn_alignbit">, Intrinsic<[llvm_i32_ty],
   [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
   [IntrNoMem, IntrSpeculatable]
 >;
 
-def int_amdgcn_alignbyte : Intrinsic<[llvm_i32_ty],
-  [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+def int_amdgcn_alignbyte : GCCBuiltin<"__builtin_amdgcn_alignbyte">,
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
   [IntrNoMem, IntrSpeculatable]
 >;
 
+def int_amdgcn_mul_i24 : Intrinsic<[llvm_i32_ty],
+  [llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable]
+>;
+
+def int_amdgcn_mul_u24 : Intrinsic<[llvm_i32_ty],
+  [llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable]
+>;
+
+// llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id)
+//
+// bar_val is the total number of waves that will wait on this
+// barrier, minus 1.
+def int_amdgcn_ds_gws_init :
+  GCCBuiltin<"__builtin_amdgcn_ds_gws_init">,
+  Intrinsic<[],
+  [llvm_i32_ty, llvm_i32_ty],
+  [IntrConvergent, IntrWriteMem, IntrInaccessibleMemOnly], "",
+  [SDNPMemOperand]
+>;
+
+// llvm.amdgcn.ds.gws.barrier(i32 vsrc0, i32 resource_id)
+// bar_val is the total number of waves that will wait on this
+// barrier, minus 1.
+def int_amdgcn_ds_gws_barrier :
+  GCCBuiltin<"__builtin_amdgcn_ds_gws_barrier">,
+  Intrinsic<[],
+  [llvm_i32_ty, llvm_i32_ty],
+  [IntrConvergent, IntrInaccessibleMemOnly], "",
+  [SDNPMemOperand]
+>;
+
+// llvm.amdgcn.ds.gws.sema.v(i32 resource_id)
+def int_amdgcn_ds_gws_sema_v :
+  GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_v">,
+  Intrinsic<[],
+  [llvm_i32_ty],
+  [IntrConvergent, IntrInaccessibleMemOnly], "",
+  [SDNPMemOperand]
+>;
+
+// llvm.amdgcn.ds.gws.sema.br(i32 vsrc, i32 resource_id)
+def int_amdgcn_ds_gws_sema_br :
+  GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_br">,
+  Intrinsic<[],
+  [llvm_i32_ty, llvm_i32_ty],
+  [IntrConvergent, IntrInaccessibleMemOnly], "",
+  [SDNPMemOperand]
+>;
+
+// llvm.amdgcn.ds.gws.sema.p(i32 resource_id)
+def int_amdgcn_ds_gws_sema_p :
+  GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_p">,
+  Intrinsic<[],
+  [llvm_i32_ty],
+  [IntrConvergent, IntrInaccessibleMemOnly], "",
+  [SDNPMemOperand]
+>;
+
+// llvm.amdgcn.ds.gws.sema.release.all(i32 resource_id)
+def int_amdgcn_ds_gws_sema_release_all :
+  GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_release_all">,
+  Intrinsic<[],
+  [llvm_i32_ty],
+  [IntrConvergent, IntrInaccessibleMemOnly], "",
+  [SDNPMemOperand]
+>;
+
 
 // Copies the source value to the destination value, with the guarantee that
 // the source value is computed as if the entire program were executed in WQM.
@@ -1295,7 +1446,7 @@ def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>;
 // enabled, with a few exceptions: - Phi nodes with require WWM return an
 // undefined value.
 def int_amdgcn_wwm : Intrinsic<[llvm_any_ty],
-  [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+  [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrConvergent]
 >;
 
 // Given a value, copies it while setting all the inactive lanes to a given
@@ -1328,7 +1479,8 @@ def int_amdgcn_buffer_wbinvl1_vol :
 def int_amdgcn_mov_dpp :
   Intrinsic<[llvm_anyint_ty],
             [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-             llvm_i1_ty], [IntrNoMem, IntrConvergent]>;
+             llvm_i1_ty], [IntrNoMem, IntrConvergent, ImmArg<1>,
+                           ImmArg<2>, ImmArg<3>, ImmArg<4>]>;
 
 // llvm.amdgcn.update.dpp.i32 <old> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
 // Should be equivalent to:
@@ -1336,8 +1488,10 @@ def int_amdgcn_mov_dpp :
 // v_mov_b32 <dest> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
 def int_amdgcn_update_dpp :
   Intrinsic<[llvm_anyint_ty],
-            [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty,
-             llvm_i32_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent]>;
+            [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty,
+            llvm_i32_ty, llvm_i32_ty, llvm_i1_ty],
+             [IntrNoMem, IntrConvergent,
+              ImmArg<2>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 def int_amdgcn_s_dcache_wb :
   GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">,
@@ -1349,7 +1503,7 @@ def int_amdgcn_s_dcache_wb_vol :
 
 def int_amdgcn_s_memrealtime :
   GCCBuiltin<"__builtin_amdgcn_s_memrealtime">,
-  Intrinsic<[llvm_i64_ty], [], [IntrReadMem]>;
+  Intrinsic<[llvm_i64_ty]>;
 
 // llvm.amdgcn.ds.permute <index> <src>
 def int_amdgcn_ds_permute :
@@ -1361,6 +1515,34 @@ def int_amdgcn_ds_bpermute :
   GCCBuiltin<"__builtin_amdgcn_ds_bpermute">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
 
+//===----------------------------------------------------------------------===//
+// GFX10 Intrinsics
+//===----------------------------------------------------------------------===//
+
+// llvm.amdgcn.permlane16 <old> <src0> <src1> <src2> <fi> <bound_control>
+def int_amdgcn_permlane16 : GCCBuiltin<"__builtin_amdgcn_permlane16">,
+  Intrinsic<[llvm_i32_ty],
+            [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
+            [IntrNoMem, IntrConvergent, ImmArg<4>, ImmArg<5>]>;
+
+// llvm.amdgcn.permlanex16 <old> <src0> <src1> <src2> <fi> <bound_control>
+def int_amdgcn_permlanex16 : GCCBuiltin<"__builtin_amdgcn_permlanex16">,
+  Intrinsic<[llvm_i32_ty],
+            [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
+            [IntrNoMem, IntrConvergent, ImmArg<4>, ImmArg<5>]>;
+
+// llvm.amdgcn.mov.dpp8.i32 <src> <sel>
+// <sel> is a 32-bit constant whose high 8 bits must be zero which selects
+// the lanes to read from.
+def int_amdgcn_mov_dpp8 :
+  Intrinsic<[llvm_anyint_ty],
+            [LLVMMatchType<0>, llvm_i32_ty],
+            [IntrNoMem, IntrConvergent, ImmArg<1>]>;
+
+def int_amdgcn_s_get_waveid_in_workgroup :
+  GCCBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">,
+  Intrinsic<[llvm_i32_ty], [], [IntrReadMem, IntrInaccessibleMemOnly]>;
+
 //===----------------------------------------------------------------------===//
 // Deep learning intrinsics.
 //===----------------------------------------------------------------------===//
@@ -1377,7 +1559,7 @@ def int_amdgcn_fdot2 :
       llvm_float_ty, // %c
       llvm_i1_ty     // %clamp
     ],
-    [IntrNoMem, IntrSpeculatable]
+    [IntrNoMem, IntrSpeculatable, ImmArg<3>]
   >;
 
 // i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp)
@@ -1392,7 +1574,7 @@ def int_amdgcn_sdot2 :
       llvm_i32_ty,   // %c
       llvm_i1_ty     // %clamp
     ],
-    [IntrNoMem, IntrSpeculatable]
+    [IntrNoMem, IntrSpeculatable, ImmArg<3>]
   >;
 
 // u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c, i1 %clamp)
@@ -1407,7 +1589,7 @@ def int_amdgcn_udot2 :
       llvm_i32_ty,   // %c
       llvm_i1_ty     // %clamp
     ],
-    [IntrNoMem, IntrSpeculatable]
+    [IntrNoMem, IntrSpeculatable, ImmArg<3>]
   >;
 
 // i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c, i1 %clamp)
@@ -1422,7 +1604,7 @@ def int_amdgcn_sdot4 :
       llvm_i32_ty, // %c
       llvm_i1_ty   // %clamp
     ],
-    [IntrNoMem, IntrSpeculatable]
+    [IntrNoMem, IntrSpeculatable, ImmArg<3>]
   >;
 
 // u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c, i1 %clamp)
@@ -1437,7 +1619,7 @@ def int_amdgcn_udot4 :
       llvm_i32_ty, // %c
       llvm_i1_ty   // %clamp
     ],
-    [IntrNoMem, IntrSpeculatable]
+    [IntrNoMem, IntrSpeculatable, ImmArg<3>]
   >;
 
 // i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c, i1 %clamp)
@@ -1453,7 +1635,7 @@ def int_amdgcn_sdot8 :
       llvm_i32_ty, // %c
       llvm_i1_ty   // %clamp
     ],
-    [IntrNoMem, IntrSpeculatable]
+    [IntrNoMem, IntrSpeculatable, ImmArg<3>]
   >;
 
 // u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c, i1 %clamp)
@@ -1469,30 +1651,154 @@ def int_amdgcn_udot8 :
       llvm_i32_ty, // %c
       llvm_i1_ty   // %clamp
     ],
-    [IntrNoMem, IntrSpeculatable]
+    [IntrNoMem, IntrSpeculatable, ImmArg<3>]
   >;
 
+//===----------------------------------------------------------------------===//
+// gfx908 intrinsics
+// ===----------------------------------------------------------------------===//
+
+class AMDGPUBufferAtomicNoRtn : Intrinsic <
+  [],
+  [llvm_anyfloat_ty,  // vdata(VGPR)
+   llvm_v4i32_ty,     // rsrc(SGPR)
+   llvm_i32_ty,       // vindex(VGPR)
+   llvm_i32_ty,       // offset(SGPR/VGPR/imm)
+   llvm_i1_ty],       // slc(imm)
+  [], "", [SDNPMemOperand]>,
+  AMDGPURsrcIntrinsic<1, 0>;
+
+class AMDGPUGlobalAtomicNoRtn : Intrinsic <
+  [],
+  [llvm_anyptr_ty,    // vaddr
+   llvm_anyfloat_ty],               // vdata(VGPR)
+  [IntrArgMemOnly, NoCapture<0>], "", [SDNPMemOperand]>;
+
+def int_amdgcn_buffer_atomic_fadd    : AMDGPUBufferAtomicNoRtn;
+def int_amdgcn_global_atomic_fadd    : AMDGPUGlobalAtomicNoRtn;
+
+// llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp
+def int_amdgcn_mfma_f32_32x32x1f32 : Intrinsic<[llvm_v32f32_ty],
+  [llvm_float_ty, llvm_float_ty, llvm_v32f32_ty,
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_16x16x1f32 : Intrinsic<[llvm_v16f32_ty],
+  [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty,
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_4x4x1f32 : Intrinsic<[llvm_v4f32_ty],
+  [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_32x32x2f32 : Intrinsic<[llvm_v16f32_ty],
+  [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty,
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_16x16x4f32 : Intrinsic<[llvm_v4f32_ty],
+  [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_32x32x4f16 : Intrinsic<[llvm_v32f32_ty],
+  [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32f32_ty,
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_16x16x4f16 : Intrinsic<[llvm_v16f32_ty],
+  [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty,
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_4x4x4f16 : Intrinsic<[llvm_v4f32_ty],
+  [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty,
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_32x32x8f16 : Intrinsic<[llvm_v16f32_ty],
+  [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty,
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_16x16x16f16 : Intrinsic<[llvm_v4f32_ty],
+  [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty,
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_i32_32x32x4i8 : Intrinsic<[llvm_v32i32_ty],
+  [llvm_i32_ty, llvm_i32_ty, llvm_v32i32_ty,
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_i32_16x16x4i8 : Intrinsic<[llvm_v16i32_ty],
+  [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty,
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_i32_4x4x4i8 : Intrinsic<[llvm_v4i32_ty],
+  [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_i32_32x32x8i8 : Intrinsic<[llvm_v16i32_ty],
+  [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty,
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_i32_16x16x16i8 : Intrinsic<[llvm_v4i32_ty],
+  [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_32x32x2bf16 : Intrinsic<[llvm_v32f32_ty],
+  [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32f32_ty,
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_16x16x2bf16 : Intrinsic<[llvm_v16f32_ty],
+  [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty,
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_4x4x2bf16 : Intrinsic<[llvm_v4f32_ty],
+  [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty,
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_32x32x4bf16 : Intrinsic<[llvm_v16f32_ty],
+  [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty,
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_16x16x8bf16 : Intrinsic<[llvm_v4f32_ty],
+  [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty,
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
 //===----------------------------------------------------------------------===//
 // Special Intrinsics for backend internal use only. No frontend
 // should emit calls to these.
 // ===----------------------------------------------------------------------===//
-def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_i64_ty],
+def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_anyint_ty],
   [llvm_i1_ty], [IntrConvergent]
 >;
 
-def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_i64_ty],
-  [llvm_i64_ty], [IntrConvergent]
+def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_anyint_ty],
+  [llvm_anyint_ty], [IntrConvergent]
 >;
 
-def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty],
-  [llvm_i1_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]
+def int_amdgcn_if_break : Intrinsic<[llvm_anyint_ty],
+  [llvm_i1_ty, llvm_anyint_ty], [IntrNoMem, IntrConvergent]
 >;
 
 def int_amdgcn_loop : Intrinsic<[llvm_i1_ty],
-  [llvm_i64_ty], [IntrConvergent]
+  [llvm_anyint_ty], [IntrConvergent]
 >;
 
-def int_amdgcn_end_cf : Intrinsic<[], [llvm_i64_ty], [IntrConvergent]>;
+def int_amdgcn_end_cf : Intrinsic<[], [llvm_anyint_ty], [IntrConvergent]>;
 
 // Represent unreachable in a divergent region.
 def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent]>;
diff --git a/include/llvm/IR/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td
index 4e11f9c29dd0..4792af097d95 100644
--- a/include/llvm/IR/IntrinsicsARM.td
+++ b/include/llvm/IR/IntrinsicsARM.td
@@ -1,9 +1,8 @@
 //===- IntrinsicsARM.td - Defines ARM intrinsics -----------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -20,7 +19,7 @@ let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".
 // A space-consuming intrinsic primarily for testing ARMConstantIslands. The
 // first argument is the number of bytes this "instruction" takes up, the second
 // and return value are essentially chains, used to force ordering during ISel.
-def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>]>;
 
 // 16-bit multiplications
 def int_arm_smulbb : GCCBuiltin<"__builtin_arm_smulbb">,
@@ -263,59 +262,59 @@ def int_arm_vcvtru    : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
 // Coprocessor
 
 def int_arm_ldc : GCCBuiltin<"__builtin_arm_ldc">,
-   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>;
+   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>;
 def int_arm_ldcl : GCCBuiltin<"__builtin_arm_ldcl">,
-   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>;
+   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>;
 def int_arm_ldc2 : GCCBuiltin<"__builtin_arm_ldc2">,
-   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>;
+   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>;
 def int_arm_ldc2l : GCCBuiltin<"__builtin_arm_ldc2l">,
-   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>;
+   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>;
 
 def int_arm_stc : GCCBuiltin<"__builtin_arm_stc">,
-   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>;
+   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>;
 def int_arm_stcl : GCCBuiltin<"__builtin_arm_stcl">,
-   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>;
+   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>;
 def int_arm_stc2 : GCCBuiltin<"__builtin_arm_stc2">,
-   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>;
+   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>;
 def int_arm_stc2l : GCCBuiltin<"__builtin_arm_stc2l">,
-   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>;
+   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>;
 
 // Move to coprocessor
 def int_arm_mcr : GCCBuiltin<"__builtin_arm_mcr">,
    Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-                  llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+                  llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 def int_arm_mcr2 : GCCBuiltin<"__builtin_arm_mcr2">,
    Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-                  llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+                  llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 // Move from coprocessor
 def int_arm_mrc : GCCBuiltin<"__builtin_arm_mrc">,
                   MSBuiltin<"_MoveFromCoprocessor">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-                             llvm_i32_ty, llvm_i32_ty], []>;
+                             llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>, ImmArg<3>, ImmArg<4>]>;
 def int_arm_mrc2 : GCCBuiltin<"__builtin_arm_mrc2">,
                    MSBuiltin<"_MoveFromCoprocessor2">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-                             llvm_i32_ty, llvm_i32_ty], []>;
+                             llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>, ImmArg<3>, ImmArg<4>]>;
 
 // Coprocessor data processing
 def int_arm_cdp : GCCBuiltin<"__builtin_arm_cdp">,
    Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-                  llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+                  llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 def int_arm_cdp2 : GCCBuiltin<"__builtin_arm_cdp2">,
    Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-                  llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+                  llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 // Move from two registers to coprocessor
 def int_arm_mcrr : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-                                  llvm_i32_ty, llvm_i32_ty], []>;
+                                  llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<4>]>;
 def int_arm_mcrr2 : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
-                                   llvm_i32_ty, llvm_i32_ty], []>;
+                                   llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<4>]>;
 
 def int_arm_mrrc : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty,
-                              llvm_i32_ty, llvm_i32_ty], []>;
+                              llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>]>;
 def int_arm_mrrc2 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty,
-                               llvm_i32_ty, llvm_i32_ty], []>;
+                               llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>]>;
 
 //===----------------------------------------------------------------------===//
 // CRC32
@@ -333,6 +332,18 @@ def int_arm_crc32w  : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
 def int_arm_crc32cw : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
     [IntrNoMem]>;
 
+//===----------------------------------------------------------------------===//
+// CMSE
+
+def int_arm_cmse_tt : GCCBuiltin<"__builtin_arm_cmse_TT">,
+    Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
+def int_arm_cmse_ttt : GCCBuiltin<"__builtin_arm_cmse_TTT">,
+    Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
+def int_arm_cmse_tta : GCCBuiltin<"__builtin_arm_cmse_TTA">,
+    Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
+def int_arm_cmse_ttat : GCCBuiltin<"__builtin_arm_cmse_TTAT">,
+    Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
+
 //===----------------------------------------------------------------------===//
 // HINT
 
diff --git a/include/llvm/IR/IntrinsicsBPF.td b/include/llvm/IR/IntrinsicsBPF.td
index 94eca8e40332..d7595a2a7700 100644
--- a/include/llvm/IR/IntrinsicsBPF.td
+++ b/include/llvm/IR/IntrinsicsBPF.td
@@ -1,9 +1,8 @@
 //===- IntrinsicsBPF.td - Defines BPF intrinsics -----------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/IntrinsicsHexagon.td b/include/llvm/IR/IntrinsicsHexagon.td
index ecc69a679553..2abc1dc07ebd 100644
--- a/include/llvm/IR/IntrinsicsHexagon.td
+++ b/include/llvm/IR/IntrinsicsHexagon.td
@@ -1,8 +1,7 @@
 //===- IntrinsicsHexagon.td - Defines Hexagon intrinsics ---*- tablegen -*-===//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -52,19 +51,19 @@ class Hexagon_mem_memmemsisi_Intrinsic<string GCCIntSuffix>
   : Hexagon_Intrinsic<GCCIntSuffix,
                           [llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty,
                            llvm_i32_ty, llvm_i32_ty],
-                          [IntrArgMemOnly]>;
+                          [IntrArgMemOnly, ImmArg<3>]>;
 
 class Hexagon_mem_memsisisi_Intrinsic<string GCCIntSuffix>
   : Hexagon_Intrinsic<GCCIntSuffix,
                           [llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty,
                            llvm_i32_ty, llvm_i32_ty],
-                          [IntrWriteMem]>;
+                          [IntrWriteMem, ImmArg<3>]>;
 
 class Hexagon_mem_memdisisi_Intrinsic<string GCCIntSuffix>
   : Hexagon_Intrinsic<GCCIntSuffix,
                           [llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty,
                            llvm_i32_ty, llvm_i32_ty],
-                          [IntrWriteMem]>;
+                          [IntrWriteMem, ImmArg<3>]>;
 
 //
 // BUILTIN_INFO_NONCONST(circ_ldd,PTR_ftype_PTRPTRSISI,4)
@@ -554,16 +553,18 @@ class Hexagon_v32i32_v32i32v32i32_Intrinsic<string GCCIntSuffix>
        [IntrNoMem]>;
 
 // tag : V6_vaslw_acc
-class Hexagon_v16i32_v16i32v16i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v16i32_v16i32v16i32i32_Intrinsic<string GCCIntSuffix,
+                                               list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : V6_vaslw_acc
-class Hexagon_v32i32_v32i32v32i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v32i32_v32i32v32i32i32_Intrinsic<string GCCIntSuffix,
+                                               list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : V6_vmux
 class Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<string GCCIntSuffix>
@@ -581,7 +582,7 @@ class Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<string GCCIntSuffix>
 class Hexagon_i32_i32i32i32i32_Intrinsic<string GCCIntSuffix>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty,llvm_i32_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       [IntrNoMem, ImmArg<2>, ImmArg<3>]>;
 
 // tag : V6_vandnqrt_acc
 class Hexagon_v16i32_v16i32v512i1i32_Intrinsic<string GCCIntSuffix>
@@ -596,58 +597,62 @@ class Hexagon_v32i32_v32i32v1024i1i32_Intrinsic<string GCCIntSuffix>
        [IntrNoMem]>;
 
 // tag : V6_vrmpybusi
-class Hexagon_v32i32_v32i32i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v32i32_v32i32i32i32_Intrinsic<string GCCIntSuffix,
+      list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i32_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : V6_vrmpybusi
-class Hexagon_v64i32_v64i32i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v64i32_v64i32i32i32_Intrinsic<string GCCIntSuffix,
+                                            list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i32_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : V6_vsubb_dv
-class Hexagon_v64i32_v64i32v64i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v64i32_v64i32v64i32_Intrinsic<string GCCIntSuffix, list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : M2_mpysu_up
-class Hexagon_i32_i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i32_i32i32_Intrinsic<string GCCIntSuffix,
+                                   list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : M2_mpyud_acc_ll_s0
-class Hexagon_i64_i64i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i64_i64i32i32_Intrinsic<string GCCIntSuffix, list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_i64_ty], [llvm_i64_ty,llvm_i32_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : S2_lsr_i_r_nac
-class Hexagon_i32_i32i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i32_i32i32i32_Intrinsic<string GCCIntSuffix,
+                                             list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : M2_cmpysc_s0
-class Hexagon_i64_i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i64_i32i32_Intrinsic<string GCCIntSuffix, list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_i64_ty], [llvm_i32_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : V6_lo
-class Hexagon_v16i32_v32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v16i32_v32i32_Intrinsic<string GCCIntSuffix, list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_v16i32_ty], [llvm_v32i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : V6_lo
-class Hexagon_v32i32_v64i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v32i32_v64i32_Intrinsic<string GCCIntSuffix, list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_v32i32_ty], [llvm_v64i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : S2_shuffoh
 class Hexagon_i64_i64i64_Intrinsic<string GCCIntSuffix>
@@ -698,10 +703,10 @@ class Hexagon_v32i32_v32i32i32_Intrinsic<string GCCIntSuffix>
        [IntrNoMem]>;
 
 // tag : A4_vcmphgti
-class Hexagon_i32_i64i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i32_i64i32_Intrinsic<string GCCIntSuffix, list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_i32_ty], [llvm_i64_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag :
 class Hexagon_v32i32_v16i32i32_Intrinsic<string GCCIntSuffix>
@@ -710,10 +715,11 @@ class Hexagon_v32i32_v16i32i32_Intrinsic<string GCCIntSuffix>
        [IntrNoMem]>;
 
 // tag : S6_rol_i_p_or
-class Hexagon_i64_i64i64i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i64_i64i64i32_Intrinsic<string GCCIntSuffix,
+                                      list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : V6_vgtuh_and
 class Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<string GCCIntSuffix>
@@ -728,16 +734,18 @@ class Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<string GCCIntSuffix>
        [IntrNoMem]>;
 
 // tag : A2_abssat
-class Hexagon_i32_i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i32_i32_Intrinsic<string GCCIntSuffix,
+                                list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_i32_ty], [llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : A2_vcmpwgtu
-class Hexagon_i32_i64i64_Intrinsic<string GCCIntSuffix>
+class Hexagon_i32_i64i64_Intrinsic<string GCCIntSuffix,
+                                  list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_i32_ty], [llvm_i64_ty,llvm_i64_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : V6_vtmpybus_acc
 class Hexagon_v64i32_v64i32v64i32i32_Intrinsic<string GCCIntSuffix>
@@ -764,16 +772,18 @@ class Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<string GCCIntSuffix>
        [IntrNoMem]>;
 
 // tag : S2_asr_i_p_rnd_goodsyntax
-class Hexagon_i64_i64i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i64_i64i32_Intrinsic<string GCCIntSuffix,
+      list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_i64_ty], [llvm_i64_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : F2_conv_w2df
-class Hexagon_double_i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_double_i32_Intrinsic<string GCCIntSuffix,
+      list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_double_ty], [llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : V6_vunpackuh
 class Hexagon_v32i32_v16i32_Intrinsic<string GCCIntSuffix>
@@ -866,16 +876,18 @@ class Hexagon_i32_v32i32i32_Intrinsic<string GCCIntSuffix>
        [IntrNoMem]>;
 
 // tag : V6_vlutvwhi
-class Hexagon_v32i32_v16i32v16i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v32i32_v16i32v16i32i32_Intrinsic<string GCCIntSuffix,
+      list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : V6_vlutvwhi
-class Hexagon_v64i32_v32i32v32i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v64i32_v32i32v32i32i32_Intrinsic<string GCCIntSuffix,
+      list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : V6_vgtuh
 class Hexagon_v512i1_v16i32v16i32_Intrinsic<string GCCIntSuffix>
@@ -902,10 +914,11 @@ class Hexagon_double_i64_Intrinsic<string GCCIntSuffix>
        [IntrNoMem]>;
 
 // tag : S2_vzxthw
-class Hexagon_i64_i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i64_i32_Intrinsic<string GCCIntSuffix,
+      list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_i64_ty], [llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : V6_vtmpyhb
 class Hexagon_v64i32_v64i32i32_Intrinsic<string GCCIntSuffix>
@@ -944,10 +957,11 @@ class Hexagon_v16i32_v16i32_Intrinsic<string GCCIntSuffix>
        [IntrNoMem]>;
 
 // tag : F2_conv_uw2sf
-class Hexagon_float_i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_float_i32_Intrinsic<string GCCIntSuffix,
+      list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_float_ty], [llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : V6_vswap
 class Hexagon_v32i32_v512i1v16i32v16i32_Intrinsic<string GCCIntSuffix>
@@ -1022,16 +1036,17 @@ class Hexagon_v32i32_v32i32v32i32v1024i1_Intrinsic<string GCCIntSuffix>
        [IntrNoMem]>;
 
 // tag : V6_vlutvvb_oracc
-class Hexagon_v16i32_v16i32v16i32v16i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v16i32_v16i32v16i32v16i32i32_Intrinsic<string GCCIntSuffix,
+                                                     list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : V6_vlutvvb_oracc
-class Hexagon_v32i32_v32i32v32i32v32i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v32i32_v32i32v32i32v32i32i32_Intrinsic<string GCCIntSuffix, list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : V6_vrmpybub_rtt
 class Hexagon_v32i32_v16i32i64_Intrinsic<string GCCIntSuffix>
@@ -1052,16 +1067,18 @@ class Hexagon_i64i32_i64i64i32_Intrinsic<string GCCIntSuffix>
        [IntrNoMem]>;
 
 // tag : V6_vrsadubi_acc
-class Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<string GCCIntSuffix,
+                                                  list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : V6_vrsadubi_acc
-class Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<string GCCIntSuffix,
+      list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i32_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : F2_conv_df2sf
 class Hexagon_float_double_Intrinsic<string GCCIntSuffix>
@@ -1166,10 +1183,11 @@ class Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<string GCCIntSuffix>
        [IntrNoMem]>;
 
 // tag : S2_insertp
-class Hexagon_i64_i64i64i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i64_i64i64i32i32_Intrinsic<string GCCIntSuffix,
+                                         list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty,llvm_i32_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : F2_sfinvsqrta
 class Hexagon_floati32_float_Intrinsic<string GCCIntSuffix>
@@ -1190,16 +1208,18 @@ class Hexagon_v32i32v32i32_v32i32v32i32i32_Intrinsic<string GCCIntSuffix>
        [IntrNoMem]>;
 
 // tag : V6_vlutvwh_oracc
-class Hexagon_v32i32_v32i32v16i32v16i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v32i32_v32i32v16i32v16i32i32_Intrinsic<string GCCIntSuffix,
+      list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : V6_vlutvwh_oracc
-class Hexagon_v64i32_v64i32v32i32v32i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v64i32_v64i32v32i32v32i32i32_Intrinsic<string GCCIntSuffix,
+      list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
-       [IntrNoMem]>;
+       !listconcat([IntrNoMem], intr_properties)>;
 
 // tag : F2_dfcmpge
 class Hexagon_i32_doubledouble_Intrinsic<string GCCIntSuffix>
@@ -1223,7 +1243,7 @@ class Hexagon_i32_float_Intrinsic<string GCCIntSuffix>
 class Hexagon_i32_floati32_Intrinsic<string GCCIntSuffix>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_i32_ty], [llvm_float_ty,llvm_i32_ty],
-       [IntrNoMem, Throws]>;
+       [IntrNoMem, Throws, ImmArg<1>]>;
 
 // tag : F2_conv_sf2ud_chop
 class Hexagon_i64_float_Intrinsic<string GCCIntSuffix>
@@ -1292,10 +1312,11 @@ class Hexagon_float_floatfloatfloati32_Intrinsic<string GCCIntSuffix>
        [IntrNoMem, Throws]>;
 
 // tag : F2_dfclass
-class Hexagon_i32_doublei32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i32_doublei32_Intrinsic<string GCCIntSuffix,
+                                      list<IntrinsicProperty> intr_properties = []>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_i32_ty], [llvm_double_ty,llvm_i32_ty],
-       [IntrNoMem, Throws]>;
+       !listconcat([IntrNoMem, Throws], intr_properties)>;
 
 // tag : V6_vd0
 class Hexagon_v16i32__Intrinsic<string GCCIntSuffix>
@@ -1393,13 +1414,13 @@ def int_hexagon_A2_vabswsat :
 Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_vabswsat">;
 
 def int_hexagon_S2_asr_i_r :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r", [ImmArg<1>]>;
 
 def int_hexagon_S2_asr_i_p :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p", [ImmArg<1>]>;
 
 def int_hexagon_A4_combineri :
-Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineri">;
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineri", [ImmArg<1>]>;
 
 def int_hexagon_M2_mpy_nac_sat_hl_s1 :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hl_s1">;
@@ -1450,7 +1471,7 @@ def int_hexagon_A2_maxup :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_maxup">;
 
 def int_hexagon_A4_vcmphgti :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmphgti">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmphgti", [ImmArg<1>]>;
 
 def int_hexagon_S2_interleave :
 Hexagon_i64_i64_Intrinsic<"HEXAGON_S2_interleave">;
@@ -1471,10 +1492,10 @@ def int_hexagon_C2_cmpgtp :
 Hexagon_i32_i64i64_Intrinsic<"HEXAGON_C2_cmpgtp">;
 
 def int_hexagon_A4_cmphgtui :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgtui">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgtui", [ImmArg<1>]>;
 
 def int_hexagon_C2_cmpgti :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgti">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgti", [ImmArg<1>]>;
 
 def int_hexagon_M2_mpyi :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyi">;
@@ -1492,16 +1513,16 @@ def int_hexagon_M2_mpy_lh_s0 :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_lh_s0">;
 
 def int_hexagon_S2_lsr_i_r_xacc :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_xacc">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_xacc", [ImmArg<2>]>;
 
 def int_hexagon_S2_vrcnegh :
 Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_vrcnegh">;
 
 def int_hexagon_S2_extractup :
-Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S2_extractup">;
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S2_extractup", [ImmArg<1>, ImmArg<2>]>;
 
 def int_hexagon_S2_asr_i_p_rnd_goodsyntax :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_rnd_goodsyntax">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_rnd_goodsyntax", [ImmArg<1>]>;
 
 def int_hexagon_S4_ntstbit_r :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_ntstbit_r">;
@@ -1528,10 +1549,10 @@ def int_hexagon_S2_asr_r_r_and :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_r_r_and">;
 
 def int_hexagon_A4_rcmpneqi :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpneqi">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpneqi", [ImmArg<1>]>;
 
 def int_hexagon_S2_asl_i_r_nac :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_nac">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_nac", [ImmArg<2>]>;
 
 def int_hexagon_M2_subacc :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_subacc">;
@@ -1546,10 +1567,10 @@ def int_hexagon_M2_mpy_acc_sat_lh_s1 :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_lh_s1">;
 
 def int_hexagon_S2_asr_i_vh :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_vh">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_vh", [ImmArg<1>]>;
 
 def int_hexagon_S2_asr_i_vw :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_vw">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_vw", [ImmArg<1>]>;
 
 def int_hexagon_A4_cmpbgtu :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgtu">;
@@ -1558,7 +1579,7 @@ def int_hexagon_A4_vcmpbeq_any :
 Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A4_vcmpbeq_any">;
 
 def int_hexagon_A4_cmpbgti :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgti">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgti", [ImmArg<1>]>;
 
 def int_hexagon_M2_mpyd_lh_s1 :
 Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_lh_s1">;
@@ -1567,7 +1588,7 @@ def int_hexagon_S2_asl_r_p_nac :
 Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_r_p_nac">;
 
 def int_hexagon_S2_lsr_i_r_nac :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_nac">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_nac", [ImmArg<2>]>;
 
 def int_hexagon_A2_addsp :
 Hexagon_i64_i32i64_Intrinsic<"HEXAGON_A2_addsp">;
@@ -1576,7 +1597,7 @@ def int_hexagon_S4_vxsubaddw :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S4_vxsubaddw">;
 
 def int_hexagon_A4_vcmpheqi :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpheqi">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpheqi", [ImmArg<1>]>;
 
 def int_hexagon_S4_vxsubaddh :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S4_vxsubaddh">;
@@ -1603,16 +1624,16 @@ def int_hexagon_A2_pxorf :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_A2_pxorf">;
 
 def int_hexagon_C2_cmpgei :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgei">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgei", [ImmArg<1>]>;
 
 def int_hexagon_A2_vsubub :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vsubub">;
 
 def int_hexagon_S2_asl_i_p :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_p">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_p", [ImmArg<1>]>;
 
 def int_hexagon_S2_asl_i_r :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_i_r">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_i_r", [ImmArg<1>]>;
 
 def int_hexagon_A4_vrminuw :
 Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrminuw">;
@@ -1642,10 +1663,10 @@ def int_hexagon_C2_bitsset :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_bitsset">;
 
 def int_hexagon_M2_mpysip :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysip">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysip", [ImmArg<1>]>;
 
 def int_hexagon_M2_mpysin :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysin">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysin", [ImmArg<1>]>;
 
 def int_hexagon_A4_boundscheck :
 Hexagon_i32_i32i64_Intrinsic<"HEXAGON_A4_boundscheck">;
@@ -1684,10 +1705,10 @@ def int_hexagon_A2_vnavgw :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vnavgw">;
 
 def int_hexagon_S2_asl_i_r_acc :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_acc">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_acc", [ImmArg<2>]>;
 
 def int_hexagon_S4_subi_lsr_ri :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subi_lsr_ri">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subi_lsr_ri", [ImmArg<0>, ImmArg<2>]>;
 
 def int_hexagon_S2_vzxthw :
 Hexagon_i64_i32_Intrinsic<"HEXAGON_S2_vzxthw">;
@@ -1714,7 +1735,7 @@ def int_hexagon_S2_packhl :
 Hexagon_i64_i32i32_Intrinsic<"HEXAGON_S2_packhl">;
 
 def int_hexagon_A4_vcmpwgti :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpwgti">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpwgti", [ImmArg<1>]>;
 
 def int_hexagon_A2_vavguwr :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavguwr">;
@@ -1735,7 +1756,7 @@ def int_hexagon_F2_conv_d2df :
 Hexagon_double_i64_Intrinsic<"HEXAGON_F2_conv_d2df">;
 
 def int_hexagon_C2_cmpgtui :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgtui">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgtui", [ImmArg<1>]>;
 
 def int_hexagon_A2_vconj :
 Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_vconj">;
@@ -1765,7 +1786,7 @@ def int_hexagon_S2_togglebit_r :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_togglebit_r">;
 
 def int_hexagon_S2_togglebit_i :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_togglebit_i">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_togglebit_i", [ImmArg<1>]>;
 
 def int_hexagon_F2_conv_uw2sf :
 Hexagon_float_i32_Intrinsic<"HEXAGON_F2_conv_uw2sf">;
@@ -1801,10 +1822,10 @@ def int_hexagon_S2_asl_r_r_nac :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_r_r_nac">;
 
 def int_hexagon_S2_asl_i_p_acc :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_acc">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_acc", [ImmArg<2>]>;
 
 def int_hexagon_A4_vcmpwgtui :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpwgtui">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpwgtui", [ImmArg<1>]>;
 
 def int_hexagon_M4_vrmpyoh_acc_s0 :
 Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M4_vrmpyoh_acc_s0">;
@@ -1831,7 +1852,7 @@ def int_hexagon_A2_vavgwcr :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavgwcr">;
 
 def int_hexagon_S2_asl_i_p_xacc :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_xacc">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_xacc", [ImmArg<2>]>;
 
 def int_hexagon_A4_vrmaxw :
 Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrmaxw">;
@@ -1843,22 +1864,22 @@ def int_hexagon_M4_cmpyi_wh :
 Hexagon_i32_i64i32_Intrinsic<"HEXAGON_M4_cmpyi_wh">;
 
 def int_hexagon_A2_tfrsi :
-Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_tfrsi">;
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_tfrsi", [ImmArg<0>]>;
 
 def int_hexagon_S2_asr_i_r_acc :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_acc">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_acc", [ImmArg<2>]>;
 
 def int_hexagon_A2_svnavgh :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svnavgh">;
 
 def int_hexagon_S2_lsr_i_r :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r", [ImmArg<1>]>;
 
 def int_hexagon_M2_vmac2 :
 Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_vmac2">;
 
 def int_hexagon_A4_vcmphgtui :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmphgtui">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmphgtui", [ImmArg<1>]>;
 
 def int_hexagon_A2_svavgh :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svavgh">;
@@ -1870,7 +1891,7 @@ def int_hexagon_M4_vrmpyeh_acc_s1 :
 Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M4_vrmpyeh_acc_s1">;
 
 def int_hexagon_S2_lsr_i_p :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p", [ImmArg<1>]>;
 
 def int_hexagon_A2_combine_hl :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_combine_hl">;
@@ -1909,7 +1930,7 @@ def int_hexagon_M2_mmpyul_rs0 :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyul_rs0">;
 
 def int_hexagon_S2_asr_i_r_rnd_goodsyntax :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_rnd_goodsyntax">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_rnd_goodsyntax", [ImmArg<1>]>;
 
 def int_hexagon_S2_lsr_r_p_nac :
 Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_r_p_nac">;
@@ -1924,10 +1945,10 @@ def int_hexagon_M4_or_and :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_or_and">;
 
 def int_hexagon_M4_mpyrr_addi :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyrr_addi">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyrr_addi", [ImmArg<0>]>;
 
 def int_hexagon_S4_or_andi :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_andi">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_andi", [ImmArg<2>]>;
 
 def int_hexagon_M2_mpy_sat_hl_s0 :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_hl_s0">;
@@ -2032,7 +2053,7 @@ def int_hexagon_F2_sffms_lib :
 Hexagon_float_floatfloatfloat_Intrinsic<"HEXAGON_F2_sffms_lib">;
 
 def int_hexagon_C4_cmpneqi :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmpneqi">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmpneqi", [ImmArg<1>]>;
 
 def int_hexagon_M4_and_xor :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_and_xor">;
@@ -2056,7 +2077,7 @@ def int_hexagon_A2_vrsadub_acc :
 Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_A2_vrsadub_acc">;
 
 def int_hexagon_C2_bitsclri :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_bitsclri">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_bitsclri", [ImmArg<1>]>;
 
 def int_hexagon_A2_subh_h16_sat_hh :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_h16_sat_hh">;
@@ -2158,10 +2179,10 @@ def int_hexagon_S2_parityp :
 Hexagon_i32_i64i64_Intrinsic<"HEXAGON_S2_parityp">;
 
 def int_hexagon_S2_lsr_i_p_and :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_and">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_and", [ImmArg<2>]>;
 
 def int_hexagon_S2_asr_i_r_or :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_or">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_or", [ImmArg<2>]>;
 
 def int_hexagon_M2_mpyu_nac_ll_s0 :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_nac_ll_s0">;
@@ -2191,7 +2212,7 @@ def int_hexagon_M2_cnacsc_s0 :
 Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cnacsc_s0">;
 
 def int_hexagon_S4_subaddi :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subaddi">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subaddi", [ImmArg<1>]>;
 
 def int_hexagon_M2_mpyud_nac_hl_s1 :
 Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_hl_s1">;
@@ -2200,13 +2221,13 @@ def int_hexagon_M2_mpyud_nac_hl_s0 :
 Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_hl_s0">;
 
 def int_hexagon_S5_vasrhrnd_goodsyntax :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S5_vasrhrnd_goodsyntax">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S5_vasrhrnd_goodsyntax", [ImmArg<1>]>;
 
 def int_hexagon_S2_tstbit_r :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_tstbit_r">;
 
 def int_hexagon_S4_vrcrotate :
-Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S4_vrcrotate">;
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S4_vrcrotate", [ImmArg<2>]>;
 
 def int_hexagon_M2_mmachs_s1 :
 Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmachs_s1">;
@@ -2215,7 +2236,7 @@ def int_hexagon_M2_mmachs_s0 :
 Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmachs_s0">;
 
 def int_hexagon_S2_tstbit_i :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_tstbit_i">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_tstbit_i", [ImmArg<1>]>;
 
 def int_hexagon_M2_mpy_up_s1 :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_up_s1">;
@@ -2227,7 +2248,7 @@ def int_hexagon_M2_mmpyuh_rs0 :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyuh_rs0">;
 
 def int_hexagon_S2_lsr_i_vw :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_vw">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_vw", [ImmArg<1>]>;
 
 def int_hexagon_M2_mpy_rnd_ll_s0 :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_rnd_ll_s0">;
@@ -2266,16 +2287,16 @@ def int_hexagon_A2_subh_l16_sat_hl :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_l16_sat_hl">;
 
 def int_hexagon_C2_cmpeqi :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpeqi">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpeqi", [ImmArg<1>]>;
 
 def int_hexagon_S2_asl_i_r_and :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_and">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_and", [ImmArg<2>]>;
 
 def int_hexagon_S2_vcnegh :
 Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_vcnegh">;
 
 def int_hexagon_A4_vcmpweqi :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpweqi">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpweqi", [ImmArg<1>]>;
 
 def int_hexagon_M2_vdmpyrs_s0 :
 Hexagon_i32_i64i64_Intrinsic<"HEXAGON_M2_vdmpyrs_s0">;
@@ -2308,7 +2329,7 @@ def int_hexagon_S2_cl0p :
 Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_cl0p">;
 
 def int_hexagon_S2_valignib :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_valignib">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_valignib", [ImmArg<2>]>;
 
 def int_hexagon_F2_sffixupd :
 Hexagon_float_floatfloat_Intrinsic<"HEXAGON_F2_sffixupd">;
@@ -2338,7 +2359,7 @@ def int_hexagon_M2_mmpyul_rs1 :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyul_rs1">;
 
 def int_hexagon_S4_ntstbit_i :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_ntstbit_i">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_ntstbit_i", [ImmArg<1>]>   ;
 
 def int_hexagon_F2_sffixupr :
 Hexagon_float_float_Intrinsic<"HEXAGON_F2_sffixupr">;
@@ -2362,7 +2383,7 @@ def int_hexagon_M2_vmpy2s_s0pack :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_vmpy2s_s0pack">;
 
 def int_hexagon_S4_addaddi :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addaddi">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addaddi", [ImmArg<2>]>;
 
 def int_hexagon_M2_mpyd_acc_ll_s0 :
 Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_ll_s0">;
@@ -2371,13 +2392,13 @@ def int_hexagon_M2_mpy_acc_sat_hl_s1 :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hl_s1">;
 
 def int_hexagon_A4_rcmpeqi :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpeqi">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpeqi", [ImmArg<1>]>;
 
 def int_hexagon_M4_xor_and :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_xor_and">;
 
 def int_hexagon_S2_asl_i_p_and :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_and">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_and", [ImmArg<2>]>;
 
 def int_hexagon_M2_mmpyuh_rs1 :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyuh_rs1">;
@@ -2386,7 +2407,7 @@ def int_hexagon_S2_asr_r_r_or :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_r_r_or">;
 
 def int_hexagon_A4_round_ri :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_ri">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_ri", [ImmArg<1>]>;
 
 def int_hexagon_A2_max :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_max">;
@@ -2395,10 +2416,10 @@ def int_hexagon_A4_round_rr :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_rr">;
 
 def int_hexagon_A4_combineii :
-Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineii">;
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineii", [ImmArg<0>, ImmArg<1>]>;
 
 def int_hexagon_A4_combineir :
-Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineir">;
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineir", [ImmArg<0>]>;
 
 def int_hexagon_C4_and_orn :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C4_and_orn">;
@@ -2413,7 +2434,7 @@ def int_hexagon_M4_cmpyr_whc :
 Hexagon_i32_i64i32_Intrinsic<"HEXAGON_M4_cmpyr_whc">;
 
 def int_hexagon_S2_lsr_i_r_acc :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_acc">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_acc", [ImmArg<2>]>;
 
 def int_hexagon_S2_vzxtbh :
 Hexagon_i64_i32_Intrinsic<"HEXAGON_S2_vzxtbh">;
@@ -2440,7 +2461,7 @@ def int_hexagon_S2_asl_r_p_or :
 Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_r_p_or">;
 
 def int_hexagon_S4_ori_asl_ri :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_ori_asl_ri">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_ori_asl_ri", [ImmArg<0>, ImmArg<2>]>;
 
 def int_hexagon_C4_nbitsset :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_nbitsset">;
@@ -2476,10 +2497,10 @@ def int_hexagon_M2_mpyd_acc_hh_s1 :
 Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_hh_s1">;
 
 def int_hexagon_F2_sfimm_p :
-Hexagon_float_i32_Intrinsic<"HEXAGON_F2_sfimm_p">;
+Hexagon_float_i32_Intrinsic<"HEXAGON_F2_sfimm_p", [ImmArg<0>]>;
 
 def int_hexagon_F2_sfimm_n :
-Hexagon_float_i32_Intrinsic<"HEXAGON_F2_sfimm_n">;
+Hexagon_float_i32_Intrinsic<"HEXAGON_F2_sfimm_n", [ImmArg<0>]>;
 
 def int_hexagon_M4_cmpyr_wh :
 Hexagon_i32_i64i32_Intrinsic<"HEXAGON_M4_cmpyr_wh">;
@@ -2497,7 +2518,7 @@ def int_hexagon_A2_vavguh :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavguh">;
 
 def int_hexagon_A4_cmpbeqi :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbeqi">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbeqi", [ImmArg<1>]>;
 
 def int_hexagon_F2_sfcmpuo :
 Hexagon_i32_floatfloat_Intrinsic<"HEXAGON_F2_sfcmpuo">;
@@ -2506,7 +2527,7 @@ def int_hexagon_A2_vavguw :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavguw">;
 
 def int_hexagon_S2_asr_i_p_nac :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_nac">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_nac", [ImmArg<2>]>;
 
 def int_hexagon_S2_vsatwh_nopack :
 Hexagon_i64_i64_Intrinsic<"HEXAGON_S2_vsatwh_nopack">;
@@ -2533,7 +2554,7 @@ def int_hexagon_A2_minp :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_minp">;
 
 def int_hexagon_S4_or_andix :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_andix">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_andix", [ImmArg<2>]>;
 
 def int_hexagon_M2_mpy_rnd_lh_s0 :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_rnd_lh_s0">;
@@ -2584,19 +2605,19 @@ def int_hexagon_S2_lsl_r_r_or :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsl_r_r_or">;
 
 def int_hexagon_C4_cmplteui :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmplteui">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmplteui", [ImmArg<1>]>;
 
 def int_hexagon_S4_addi_lsr_ri :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addi_lsr_ri">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addi_lsr_ri", [ImmArg<0>, ImmArg<2>]>;
 
 def int_hexagon_A4_tfrcpp :
 Hexagon_i64_i64_Intrinsic<"HEXAGON_A4_tfrcpp">;
 
 def int_hexagon_S2_asr_i_svw_trun :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S2_asr_i_svw_trun">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S2_asr_i_svw_trun", [ImmArg<1>]>;
 
 def int_hexagon_A4_cmphgti :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgti">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgti", [ImmArg<1>]>;
 
 def int_hexagon_A4_vrminh :
 Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrminh">;
@@ -2614,7 +2635,7 @@ def int_hexagon_A2_vnavghcr :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vnavghcr">;
 
 def int_hexagon_S4_subi_asl_ri :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subi_asl_ri">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subi_asl_ri", [ImmArg<0>, ImmArg<2>]>;
 
 def int_hexagon_S2_lsl_r_vh :
 Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsl_r_vh">;
@@ -2638,7 +2659,7 @@ def int_hexagon_C2_cmpltu :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpltu">;
 
 def int_hexagon_S2_insertp :
-Hexagon_i64_i64i64i32i32_Intrinsic<"HEXAGON_S2_insertp">;
+Hexagon_i64_i64i64i32i32_Intrinsic<"HEXAGON_S2_insertp", [ImmArg<2>, ImmArg<3>]>;
 
 def int_hexagon_M2_mpyd_rnd_ll_s1 :
 Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_ll_s1">;
@@ -2647,7 +2668,7 @@ def int_hexagon_M2_mpyd_rnd_ll_s0 :
 Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_ll_s0">;
 
 def int_hexagon_S2_lsr_i_p_nac :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_nac">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_nac", [ImmArg<2>]>;
 
 def int_hexagon_S2_extractup_rp :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_extractup_rp">;
@@ -2749,7 +2770,7 @@ def int_hexagon_M2_dpmpyss_rnd_s0 :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_dpmpyss_rnd_s0">;
 
 def int_hexagon_C2_muxri :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxri">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxri", [ImmArg<1>]>;
 
 def int_hexagon_M2_vmac2es_s0 :
 Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vmac2es_s0">;
@@ -2767,7 +2788,7 @@ def int_hexagon_M2_mpyu_lh_s0 :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyu_lh_s0">;
 
 def int_hexagon_S2_asl_i_r_or :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_or">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_or", [ImmArg<2>]>;
 
 def int_hexagon_M2_mpyd_acc_hl_s0 :
 Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_hl_s0">;
@@ -2782,7 +2803,7 @@ def int_hexagon_A2_vaddw :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vaddw">;
 
 def int_hexagon_S2_asr_i_r_and :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_and">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_and", [ImmArg<2>]>;
 
 def int_hexagon_A2_vaddh :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vaddh">;
@@ -2797,22 +2818,22 @@ def int_hexagon_C2_cmpeqp :
 Hexagon_i32_i64i64_Intrinsic<"HEXAGON_C2_cmpeqp">;
 
 def int_hexagon_M4_mpyri_addi :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addi">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addi", [ImmArg<0>, ImmArg<2>]>;
 
 def int_hexagon_A2_not :
 Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_not">;
 
 def int_hexagon_S4_andi_lsr_ri :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_andi_lsr_ri">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_andi_lsr_ri", [ImmArg<0>, ImmArg<2>]>;
 
 def int_hexagon_M2_macsip :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_macsip">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_macsip", [ImmArg<2>]>;
 
 def int_hexagon_A2_tfrcrr :
 Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_tfrcrr">;
 
 def int_hexagon_M2_macsin :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_macsin">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_macsin", [ImmArg<2>]>;
 
 def int_hexagon_C2_orn :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_orn">;
@@ -2875,7 +2896,7 @@ def int_hexagon_F2_dfcmpge :
 Hexagon_i32_doubledouble_Intrinsic<"HEXAGON_F2_dfcmpge">;
 
 def int_hexagon_M2_accii :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_accii">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_accii", [ImmArg<2>]>;
 
 def int_hexagon_A5_vaddhubs :
 Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A5_vaddhubs">;
@@ -2893,10 +2914,10 @@ def int_hexagon_S2_vsxthw :
 Hexagon_i64_i32_Intrinsic<"HEXAGON_S2_vsxthw">;
 
 def int_hexagon_S4_andi_asl_ri :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_andi_asl_ri">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_andi_asl_ri", [ImmArg<0>, ImmArg<2>]>;
 
 def int_hexagon_S2_asl_i_p_nac :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_nac">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_nac", [ImmArg<2>]>;
 
 def int_hexagon_S2_lsl_r_p_xor :
 Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsl_r_p_xor">;
@@ -2929,7 +2950,7 @@ def int_hexagon_M4_xor_andn :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_xor_andn">;
 
 def int_hexagon_S2_addasl_rrri :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_addasl_rrri">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_addasl_rrri", [ImmArg<2>]>;
 
 def int_hexagon_M5_vdmpybsu :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M5_vdmpybsu">;
@@ -2941,7 +2962,7 @@ def int_hexagon_M2_mpyu_nac_hh_s1 :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_nac_hh_s1">;
 
 def int_hexagon_A2_addi :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addi">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addi", [ImmArg<1>]>;
 
 def int_hexagon_A2_addp :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_addp">;
@@ -2962,7 +2983,7 @@ def int_hexagon_S2_shuffeh :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_shuffeh">;
 
 def int_hexagon_S2_lsr_i_r_and :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_and">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_and", [ImmArg<2>]>;
 
 def int_hexagon_M2_mpy_sat_rnd_hh_s1 :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hh_s1">;
@@ -3064,13 +3085,13 @@ def int_hexagon_S5_popcountp :
 Hexagon_i32_i64_Intrinsic<"HEXAGON_S5_popcountp">;
 
 def int_hexagon_S4_extractp :
-Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S4_extractp">;
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S4_extractp", [ImmArg<1>, ImmArg<2>]>;
 
 def int_hexagon_S2_cl0 :
 Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_cl0">;
 
 def int_hexagon_A4_vcmpbgti :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbgti">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbgti", [ImmArg<1>]>;
 
 def int_hexagon_M2_mmacls_s1 :
 Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmacls_s1">;
@@ -3118,7 +3139,7 @@ def int_hexagon_A2_vmaxuh :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vmaxuh">;
 
 def int_hexagon_A4_bitspliti :
-Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_bitspliti">;
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_bitspliti", [ImmArg<1>]>;
 
 def int_hexagon_A2_vmaxub :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vmaxub">;
@@ -3145,13 +3166,13 @@ def int_hexagon_S2_asr_r_r_nac :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_r_r_nac">;
 
 def int_hexagon_F2_dfimm_n :
-Hexagon_double_i32_Intrinsic<"HEXAGON_F2_dfimm_n">;
+Hexagon_double_i32_Intrinsic<"HEXAGON_F2_dfimm_n", [ImmArg<0>]>;
 
 def int_hexagon_A4_cmphgt :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgt">;
 
 def int_hexagon_F2_dfimm_p :
-Hexagon_double_i32_Intrinsic<"HEXAGON_F2_dfimm_p">;
+Hexagon_double_i32_Intrinsic<"HEXAGON_F2_dfimm_p", [ImmArg<0>]>;
 
 def int_hexagon_M2_mpyud_acc_lh_s1 :
 Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_acc_lh_s1">;
@@ -3160,7 +3181,7 @@ def int_hexagon_M2_vcmpy_s1_sat_r :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vcmpy_s1_sat_r">;
 
 def int_hexagon_M4_mpyri_addr_u2 :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addr_u2">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addr_u2", [ImmArg<1>]>;
 
 def int_hexagon_M2_vcmpy_s1_sat_i :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vcmpy_s1_sat_i">;
@@ -3172,10 +3193,10 @@ def int_hexagon_M5_vrmacbuu :
 Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M5_vrmacbuu">;
 
 def int_hexagon_S5_asrhub_rnd_sat_goodsyntax :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S5_asrhub_rnd_sat_goodsyntax">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S5_asrhub_rnd_sat_goodsyntax", [ImmArg<1>]>;
 
 def int_hexagon_S2_vspliceib :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_vspliceib">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_vspliceib", [ImmArg<2>]>;
 
 def int_hexagon_M2_dpmpyss_acc_s0 :
 Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_dpmpyss_acc_s0">;
@@ -3193,25 +3214,25 @@ def int_hexagon_A2_maxp :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_maxp">;
 
 def int_hexagon_A2_andir :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_andir">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_andir", [ImmArg<1>]>;
 
 def int_hexagon_F2_sfrecipa :
 Hexagon_floati32_floatfloat_Intrinsic<"HEXAGON_F2_sfrecipa">;
 
 def int_hexagon_A2_combineii :
-Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A2_combineii">;
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A2_combineii", [ImmArg<0>, ImmArg<1>]>;
 
 def int_hexagon_A4_orn :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_orn">;
 
 def int_hexagon_A4_cmpbgtui :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgtui">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgtui", [ImmArg<1>]>;
 
 def int_hexagon_S2_lsr_r_r_or :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_r_r_or">;
 
 def int_hexagon_A4_vcmpbeqi :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbeqi">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbeqi", [ImmArg<1>]>;
 
 def int_hexagon_S2_lsl_r_r :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_lsl_r_r">;
@@ -3247,19 +3268,19 @@ def int_hexagon_M2_vrcmpys_s1 :
 Hexagon_i64_i64i32_Intrinsic<"HEXAGON_M2_vrcmpys_s1">;
 
 def int_hexagon_S4_or_ori :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_ori">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_ori", [ImmArg<2>]>;
 
 def int_hexagon_C4_fastcorner9_not :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_fastcorner9_not">;
 
 def int_hexagon_A2_tfrih :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_tfrih">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_tfrih", [ImmArg<1>]>;
 
 def int_hexagon_A2_tfril :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_tfril">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_tfril", [ImmArg<1>]>;
 
 def int_hexagon_M4_mpyri_addr :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addr">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addr", [ImmArg<2>]>;
 
 def int_hexagon_S2_vtrunehb :
 Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_vtrunehb">;
@@ -3274,16 +3295,16 @@ def int_hexagon_F2_sfsub :
 Hexagon_float_floatfloat_Intrinsic<"HEXAGON_F2_sfsub">;
 
 def int_hexagon_C2_muxii :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxii">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxii", [ImmArg<1>, ImmArg<2>]>;
 
 def int_hexagon_C2_muxir :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxir">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxir", [ImmArg<2>]>;
 
 def int_hexagon_A2_swiz :
 Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_swiz">;
 
 def int_hexagon_S2_asr_i_p_and :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_and">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_and", [ImmArg<2>]>;
 
 def int_hexagon_M2_cmpyrsc_s0 :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_cmpyrsc_s0">;
@@ -3313,7 +3334,7 @@ def int_hexagon_M2_mpy_nac_sat_ll_s0 :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_sat_ll_s0">;
 
 def int_hexagon_S4_extract :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_extract">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_extract", [ImmArg<1>, ImmArg<2>]>;
 
 def int_hexagon_A2_vcmpweq :
 Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmpweq">;
@@ -3322,10 +3343,10 @@ def int_hexagon_M2_acci :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_acci">;
 
 def int_hexagon_S2_lsr_i_p_acc :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_acc">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_acc", [ImmArg<2>]>;
 
 def int_hexagon_S2_lsr_i_p_or :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_or">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_or", [ImmArg<2>]>;
 
 def int_hexagon_F2_conv_ud2sf :
 Hexagon_float_i64_Intrinsic<"HEXAGON_F2_conv_ud2sf">;
@@ -3334,10 +3355,10 @@ def int_hexagon_A2_tfr :
 Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_tfr">;
 
 def int_hexagon_S2_asr_i_p_or :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_or">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_or", [ImmArg<2>]>;
 
 def int_hexagon_A2_subri :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subri">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subri", [ImmArg<0>]>;
 
 def int_hexagon_A4_vrmaxuw :
 Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrmaxuw">;
@@ -3349,7 +3370,7 @@ def int_hexagon_A4_vrmaxuh :
 Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrmaxuh">;
 
 def int_hexagon_S2_asl_i_vw :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_vw">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_vw", [ImmArg<1>]>;
 
 def int_hexagon_A2_vavgw :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavgw">;
@@ -3361,13 +3382,13 @@ def int_hexagon_A2_vavgh :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavgh">;
 
 def int_hexagon_S2_clrbit_i :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_clrbit_i">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_clrbit_i", [ImmArg<1>]>;
 
 def int_hexagon_S2_asl_i_vh :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_vh">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_vh", [ImmArg<1>]>;
 
 def int_hexagon_S2_lsr_i_r_or :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_or">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_or", [ImmArg<2>]>;
 
 def int_hexagon_S2_lsl_r_r_nac :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsl_r_r_nac">;
@@ -3385,7 +3406,7 @@ def int_hexagon_M2_mmpyl_s1 :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyl_s1">;
 
 def int_hexagon_M2_naccii :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_naccii">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_naccii", [ImmArg<2>]>;
 
 def int_hexagon_S2_vrndpackwhs :
 Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_vrndpackwhs">;
@@ -3406,7 +3427,7 @@ def int_hexagon_M4_mac_up_s1_sat :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mac_up_s1_sat">;
 
 def int_hexagon_S4_vrcrotate_acc :
-Hexagon_i64_i64i64i32i32_Intrinsic<"HEXAGON_S4_vrcrotate_acc">;
+Hexagon_i64_i64i64i32i32_Intrinsic<"HEXAGON_S4_vrcrotate_acc", [ImmArg<3>]>;
 
 def int_hexagon_F2_conv_uw2df :
 Hexagon_double_i32_Intrinsic<"HEXAGON_F2_conv_uw2df">;
@@ -3418,7 +3439,7 @@ def int_hexagon_S2_asr_r_r_acc :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_r_r_acc">;
 
 def int_hexagon_A2_orir :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_orir">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_orir", [ImmArg<1>]>;
 
 def int_hexagon_A2_andp :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_andp">;
@@ -3430,7 +3451,7 @@ def int_hexagon_A2_min :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_min">;
 
 def int_hexagon_M2_mpysmi :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysmi">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysmi", [ImmArg<1>]>;
 
 def int_hexagon_M2_vcmpy_s0_sat_r :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vcmpy_s0_sat_r">;
@@ -3466,10 +3487,10 @@ def int_hexagon_F2_conv_df2w :
 Hexagon_i32_double_Intrinsic<"HEXAGON_F2_conv_df2w">;
 
 def int_hexagon_S5_asrhub_sat :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S5_asrhub_sat">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S5_asrhub_sat", [ImmArg<1>]>;
 
 def int_hexagon_S2_asl_i_r_xacc :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_xacc">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_xacc", [ImmArg<2>]>;
 
 def int_hexagon_F2_conv_df2d :
 Hexagon_i64_double_Intrinsic<"HEXAGON_F2_conv_df2d">;
@@ -3505,7 +3526,7 @@ def int_hexagon_F2_sffma_sc :
 Hexagon_float_floatfloatfloati32_Intrinsic<"HEXAGON_F2_sffma_sc">;
 
 def int_hexagon_F2_dfclass :
-Hexagon_i32_doublei32_Intrinsic<"HEXAGON_F2_dfclass">;
+Hexagon_i32_doublei32_Intrinsic<"HEXAGON_F2_dfclass", [ImmArg<1>]>;
 
 def int_hexagon_F2_conv_df2ud :
 Hexagon_i64_double_Intrinsic<"HEXAGON_F2_conv_df2ud">;
@@ -3520,7 +3541,7 @@ def int_hexagon_M2_cmpyrs_s1 :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_cmpyrs_s1">;
 
 def int_hexagon_C4_cmpltei :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmpltei">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmpltei", [ImmArg<1>]>;
 
 def int_hexagon_C4_cmplteu :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmplteu">;
@@ -3532,7 +3553,7 @@ def int_hexagon_A2_subh_l16_ll :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_l16_ll">;
 
 def int_hexagon_S2_asr_i_r_rnd :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_rnd">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_rnd", [ImmArg<1>]>;
 
 def int_hexagon_M2_vrmpy_s0 :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vrmpy_s0">;
@@ -3577,7 +3598,7 @@ def int_hexagon_M2_vrcmpyi_s0c :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vrcmpyi_s0c">;
 
 def int_hexagon_S2_asr_i_p_rnd :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_rnd">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_rnd", [ImmArg<1>]>;
 
 def int_hexagon_A2_addpsat :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_addpsat">;
@@ -3586,7 +3607,7 @@ def int_hexagon_A2_svaddhs :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svaddhs">;
 
 def int_hexagon_S4_ori_lsr_ri :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_ori_lsr_ri">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_ori_lsr_ri", [ImmArg<0>, ImmArg<2>]>;
 
 def int_hexagon_M2_mpy_sat_rnd_ll_s1 :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_ll_s1">;
@@ -3619,7 +3640,7 @@ def int_hexagon_S2_asl_r_r_or :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_r_r_or">;
 
 def int_hexagon_S4_lsli :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_lsli">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_lsli", [ImmArg<0>]>;
 
 def int_hexagon_S2_lsl_r_vw :
 Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsl_r_vw">;
@@ -3664,7 +3685,7 @@ def int_hexagon_A2_negp :
 Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_negp">;
 
 def int_hexagon_S2_asl_i_r_sat :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_sat">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_sat", [ImmArg<1>]>;
 
 def int_hexagon_A2_addh_l16_sat_hl :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_l16_sat_hl">;
@@ -3682,10 +3703,10 @@ def int_hexagon_C2_cmpgtup :
 Hexagon_i32_i64i64_Intrinsic<"HEXAGON_C2_cmpgtup">;
 
 def int_hexagon_A4_cround_ri :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cround_ri">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cround_ri", [ImmArg<1>]>;
 
 def int_hexagon_S4_clbpaddi :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S4_clbpaddi">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S4_clbpaddi", [ImmArg<1>]>;
 
 def int_hexagon_A4_cround_rr :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cround_rr">;
@@ -3715,13 +3736,13 @@ def int_hexagon_A2_vminub :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vminub">;
 
 def int_hexagon_S2_extractu :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_extractu">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_extractu", [ImmArg<1>, ImmArg<2>]>;
 
 def int_hexagon_A2_svsubh :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svsubh">;
 
 def int_hexagon_S4_clbaddi :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_clbaddi">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_clbaddi", [ImmArg<1>]>;
 
 def int_hexagon_F2_sffms :
 Hexagon_float_floatfloatfloat_Intrinsic<"HEXAGON_F2_sffms">;
@@ -3754,7 +3775,7 @@ def int_hexagon_M2_mpy_acc_hh_s0 :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_hh_s0">;
 
 def int_hexagon_S4_addi_asl_ri :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addi_asl_ri">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addi_asl_ri", [ImmArg<0>, ImmArg<2>]>;
 
 def int_hexagon_M2_mpyd_nac_hh_s1 :
 Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_nac_hh_s1">;
@@ -3763,10 +3784,10 @@ def int_hexagon_M2_mpyd_nac_hh_s0 :
 Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_nac_hh_s0">;
 
 def int_hexagon_S2_asr_i_r_nac :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_nac">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_nac", [ImmArg<2>]>;
 
 def int_hexagon_A4_cmpheqi :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpheqi">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpheqi", [ImmArg<1>]>;
 
 def int_hexagon_S2_lsr_r_p_xor :
 Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_r_p_xor">;
@@ -3781,7 +3802,7 @@ def int_hexagon_F2_conv_sf2ud_chop :
 Hexagon_i64_float_Intrinsic<"HEXAGON_F2_conv_sf2ud_chop">;
 
 def int_hexagon_C2_cmpgeui :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgeui">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgeui", [ImmArg<1>]>;
 
 def int_hexagon_M2_mpy_acc_sat_hh_s0 :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hh_s0">;
@@ -3808,7 +3829,7 @@ def int_hexagon_M2_mpyud_nac_lh_s0 :
 Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_lh_s0">;
 
 def int_hexagon_A4_round_ri_sat :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_ri_sat">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_ri_sat", [ImmArg<1>]>;
 
 def int_hexagon_M2_mpy_nac_hl_s0 :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_hl_s0">;
@@ -3829,10 +3850,10 @@ def int_hexagon_M2_cmaci_s0 :
 Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cmaci_s0">;
 
 def int_hexagon_S2_setbit_i :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_setbit_i">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_setbit_i", [ImmArg<1>]>;
 
 def int_hexagon_S2_asl_i_p_or :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_or">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_or", [ImmArg<2>]>;
 
 def int_hexagon_A4_andn :
 Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_andn">;
@@ -3856,13 +3877,13 @@ def int_hexagon_M2_xor_xacc :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_xor_xacc">;
 
 def int_hexagon_A4_vcmpbgtui :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbgtui">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbgtui", [ImmArg<1>]>;
 
 def int_hexagon_A4_ornp :
 Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A4_ornp">;
 
 def int_hexagon_A2_tfrpi :
-Hexagon_i64_i32_Intrinsic<"HEXAGON_A2_tfrpi">;
+Hexagon_i64_i32_Intrinsic<"HEXAGON_A2_tfrpi", [ImmArg<0>]>;
 
 def int_hexagon_C4_and_or :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C4_and_or">;
@@ -3886,16 +3907,16 @@ def int_hexagon_M2_vmpy2su_s0 :
 Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_vmpy2su_s0">;
 
 def int_hexagon_S2_asr_i_p_acc :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_acc">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_acc", [ImmArg<2>]>;
 
 def int_hexagon_C4_nbitsclri :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_nbitsclri">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_nbitsclri", [ImmArg<1>]>;
 
 def int_hexagon_S2_lsr_i_vh :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_vh">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_vh", [ImmArg<1>]>;
 
 def int_hexagon_S2_lsr_i_p_xacc :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_xacc">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_xacc", [ImmArg<2>]>;
 
 // V55 Scalar Instructions.
 
@@ -3905,40 +3926,40 @@ Hexagon_i64i32_i64i64i64_Intrinsic<"HEXAGON_A5_ACS">;
 // V60 Scalar Instructions.
 
 def int_hexagon_S6_rol_i_p_and :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_and">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_and", [ImmArg<2>]>;
 
 def int_hexagon_S6_rol_i_r_xacc :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_xacc">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_xacc", [ImmArg<2>]>;
 
 def int_hexagon_S6_rol_i_r_and :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_and">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_and", [ImmArg<2>]>;
 
 def int_hexagon_S6_rol_i_r_acc :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_acc">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_acc", [ImmArg<2>]>;
 
 def int_hexagon_S6_rol_i_p_xacc :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_xacc">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_xacc", [ImmArg<2>]>;
 
 def int_hexagon_S6_rol_i_p :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S6_rol_i_p">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S6_rol_i_p", [ImmArg<1>]>;
 
 def int_hexagon_S6_rol_i_p_nac :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_nac">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_nac", [ImmArg<2>]>;
 
 def int_hexagon_S6_rol_i_p_acc :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_acc">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_acc", [ImmArg<2>]>;
 
 def int_hexagon_S6_rol_i_r_or :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_or">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_or", [ImmArg<2>]>;
 
 def int_hexagon_S6_rol_i_r :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S6_rol_i_r">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S6_rol_i_r", [ImmArg<1>]>;
 
 def int_hexagon_S6_rol_i_r_nac :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_nac">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_nac", [ImmArg<2>]>;
 
 def int_hexagon_S6_rol_i_p_or :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_or">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_or", [ImmArg<2>]>;
 
 // V62 Scalar Instructions.
 
@@ -3980,7 +4001,7 @@ def int_hexagon_M2_mnaci :
 Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mnaci">;
 
 def int_hexagon_S2_mask :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_mask">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_mask", [ImmArg<0>, ImmArg<1>]>;
 
 // V60 HVX Instructions.
 
@@ -4021,10 +4042,10 @@ def int_hexagon_V6_vaddh_dv_128B :
 Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddh_dv_128B">;
 
 def int_hexagon_V6_vrmpybusi :
-Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi">;
+Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi", [ImmArg<2>]>;
 
 def int_hexagon_V6_vrmpybusi_128B :
-Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_128B">;
+Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_128B", [ImmArg<2>]>;
 
 def int_hexagon_V6_vshufoh :
 Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vshufoh">;
@@ -4045,10 +4066,10 @@ def int_hexagon_V6_vdmpyhsuisat_128B :
 Hexagon_v32i32_v64i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_128B">;
 
 def int_hexagon_V6_vrsadubi_acc :
-Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_acc">;
+Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_acc", [ImmArg<3>]>;
 
 def int_hexagon_V6_vrsadubi_acc_128B :
-Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_acc_128B">;
+Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_acc_128B", [ImmArg<3>]>;
 
 def int_hexagon_V6_vnavgw :
 Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vnavgw">;
@@ -4915,10 +4936,10 @@ def int_hexagon_V6_vsubhsat_128B :
 Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubhsat_128B">;
 
 def int_hexagon_V6_vrmpyubi_acc :
-Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_acc">;
+Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_acc", [ImmArg<3>]>;
 
 def int_hexagon_V6_vrmpyubi_acc_128B :
-Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_acc_128B">;
+Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_acc_128B", [ImmArg<3>]>;
 
 def int_hexagon_V6_vabsw :
 Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabsw">;
@@ -5095,10 +5116,10 @@ def int_hexagon_V6_vmpybv_acc_128B :
 Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpybv_acc_128B">;
 
 def int_hexagon_V6_vrsadubi :
-Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi">;
+Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi", [ImmArg<2>]>;
 
 def int_hexagon_V6_vrsadubi_128B :
-Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_128B">;
+Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_128B", [ImmArg<2>]>;
 
 def int_hexagon_V6_vdmpyhb_dv_acc :
 Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_acc">;
@@ -5377,10 +5398,10 @@ def int_hexagon_V6_vaddbnq_128B :
 Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddbnq_128B">;
 
 def int_hexagon_V6_vlalignbi :
-Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlalignbi">;
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlalignbi", [ImmArg<2>]>;
 
 def int_hexagon_V6_vlalignbi_128B :
-Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlalignbi_128B">;
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlalignbi_128B", [ImmArg<2>]>;
 
 def int_hexagon_V6_vsatwh :
 Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsatwh">;
@@ -5443,10 +5464,10 @@ def int_hexagon_V6_veqh_and_128B :
 Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_and_128B">;
 
 def int_hexagon_V6_valignbi :
-Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_valignbi">;
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_valignbi", [ImmArg<2>]>;
 
 def int_hexagon_V6_valignbi_128B :
-Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_valignbi_128B">;
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_valignbi_128B", [ImmArg<2>]>;
 
 def int_hexagon_V6_vaddwsat :
 Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddwsat">;
@@ -5689,10 +5710,10 @@ def int_hexagon_V6_vsubh_128B :
 Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubh_128B">;
 
 def int_hexagon_V6_vrmpyubi :
-Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi">;
+Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi", [ImmArg<2>]>;
 
 def int_hexagon_V6_vrmpyubi_128B :
-Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_128B">;
+Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_128B", [ImmArg<2>]>;
 
 def int_hexagon_V6_vminw :
 Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vminw">;
@@ -5755,10 +5776,10 @@ def int_hexagon_V6_vsubuhw_128B :
 Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubuhw_128B">;
 
 def int_hexagon_V6_vrmpybusi_acc :
-Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_acc">;
+Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_acc", [ImmArg<3>]>;
 
 def int_hexagon_V6_vrmpybusi_acc_128B :
-Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_acc_128B">;
+Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_acc_128B", [ImmArg<3>]>;
 
 def int_hexagon_V6_vasrw :
 Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vasrw">;
@@ -5883,10 +5904,10 @@ def int_hexagon_V6_vlsrb_128B :
 Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vlsrb_128B">;
 
 def int_hexagon_V6_vlutvwhi :
-Hexagon_v32i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwhi">;
+Hexagon_v32i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwhi", [ImmArg<2>]>;
 
 def int_hexagon_V6_vlutvwhi_128B :
-Hexagon_v64i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwhi_128B">;
+Hexagon_v64i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwhi_128B", [ImmArg<2>]>;
 
 def int_hexagon_V6_vaddububb_sat :
 Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddububb_sat">;
@@ -5907,10 +5928,10 @@ def int_hexagon_V6_ldtp0_128B :
 Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldtp0_128B">;
 
 def int_hexagon_V6_vlutvvb_oracci :
-Hexagon_v16i32_v16i32v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_oracci">;
+Hexagon_v16i32_v16i32v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_oracci", [ImmArg<3>]>;
 
 def int_hexagon_V6_vlutvvb_oracci_128B :
-Hexagon_v32i32_v32i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_oracci_128B">;
+Hexagon_v32i32_v32i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_oracci_128B", [ImmArg<3>]>;
 
 def int_hexagon_V6_vsubuwsat_dv :
 Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubuwsat_dv">;
@@ -6045,10 +6066,10 @@ def int_hexagon_V6_vasrwuhrndsat_128B :
 Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrwuhrndsat_128B">;
 
 def int_hexagon_V6_vlutvvbi :
-Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvbi">;
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvbi", [ImmArg<2>]>;
 
 def int_hexagon_V6_vlutvvbi_128B :
-Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvbi_128B">;
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvbi_128B", [ImmArg<2>]>;
 
 def int_hexagon_V6_vsubuwsat :
 Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubuwsat">;
@@ -6141,10 +6162,10 @@ def int_hexagon_V6_ldcnp0_128B :
 Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldcnp0_128B">;
 
 def int_hexagon_V6_vlutvwh_oracci :
-Hexagon_v32i32_v32i32v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_oracci">;
+Hexagon_v32i32_v32i32v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_oracci", [ImmArg<3>]>;
 
 def int_hexagon_V6_vlutvwh_oracci_128B :
-Hexagon_v64i32_v64i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_oracci_128B">;
+Hexagon_v64i32_v64i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_oracci_128B", [ImmArg<3>]>;
 
 def int_hexagon_V6_vsubbsat :
 Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubbsat">;
diff --git a/include/llvm/IR/IntrinsicsMips.td b/include/llvm/IR/IntrinsicsMips.td
index 421a79be4ebc..6393a9ca35d5 100644
--- a/include/llvm/IR/IntrinsicsMips.td
+++ b/include/llvm/IR/IntrinsicsMips.td
@@ -1,9 +1,8 @@
 //===- IntrinsicsMips.td - Defines Mips intrinsics ---------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -235,9 +234,9 @@ def int_mips_extpdp: GCCBuiltin<"__builtin_mips_extpdp">,
 // Misc
 
 def int_mips_wrdsp: GCCBuiltin<"__builtin_mips_wrdsp">,
-  Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>;
+  Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<1>]>;
 def int_mips_rddsp: GCCBuiltin<"__builtin_mips_rddsp">,
-  Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem, ImmArg<0>]>;
 
 def int_mips_insv: GCCBuiltin<"__builtin_mips_insv">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
@@ -303,10 +302,10 @@ def int_mips_adduh_r_qb: GCCBuiltin<"__builtin_mips_adduh_r_qb">,
 
 def int_mips_append: GCCBuiltin<"__builtin_mips_append">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-  [IntrNoMem]>;
+  [IntrNoMem, ImmArg<2>]>;
 def int_mips_balign: GCCBuiltin<"__builtin_mips_balign">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-  [IntrNoMem]>;
+  [IntrNoMem, ImmArg<2>]>;
 
 def int_mips_cmpgdu_eq_qb: GCCBuiltin<"__builtin_mips_cmpgdu_eq_qb">,
   Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
@@ -356,14 +355,14 @@ def int_mips_precr_qb_ph: GCCBuiltin<"__builtin_mips_precr_qb_ph">,
   Intrinsic<[llvm_v4i8_ty], [llvm_v2i16_ty, llvm_v2i16_ty], []>;
 def int_mips_precr_sra_ph_w: GCCBuiltin<"__builtin_mips_precr_sra_ph_w">,
   Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<2>]>;
 def int_mips_precr_sra_r_ph_w: GCCBuiltin<"__builtin_mips_precr_sra_r_ph_w">,
   Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<2>]>;
 
 def int_mips_prepend: GCCBuiltin<"__builtin_mips_prepend">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-  [IntrNoMem]>;
+  [IntrNoMem, ImmArg<2>]>;
 
 def int_mips_shra_qb: GCCBuiltin<"__builtin_mips_shra_qb">,
   Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_i32_ty], [IntrNoMem]>;
@@ -464,22 +463,22 @@ def int_mips_addv_d : GCCBuiltin<"__builtin_msa_addv_d">,
 
 def int_mips_addvi_b : GCCBuiltin<"__builtin_msa_addvi_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty],
-  [Commutative, IntrNoMem]>;
+  [Commutative, IntrNoMem, ImmArg<1>]>;
 def int_mips_addvi_h : GCCBuiltin<"__builtin_msa_addvi_h">,
   Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty],
-  [Commutative, IntrNoMem]>;
+  [Commutative, IntrNoMem, ImmArg<1>]>;
 def int_mips_addvi_w : GCCBuiltin<"__builtin_msa_addvi_w">,
   Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty],
-  [Commutative, IntrNoMem]>;
+  [Commutative, IntrNoMem, ImmArg<1>]>;
 def int_mips_addvi_d : GCCBuiltin<"__builtin_msa_addvi_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty],
-  [Commutative, IntrNoMem]>;
+  [Commutative, IntrNoMem, ImmArg<1>]>;
 
 def int_mips_and_v : GCCBuiltin<"__builtin_msa_and_v">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
 
 def int_mips_andi_b : GCCBuiltin<"__builtin_msa_andi_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_asub_s_b : GCCBuiltin<"__builtin_msa_asub_s_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -561,13 +560,13 @@ def int_mips_bclr_d : GCCBuiltin<"__builtin_msa_bclr_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
 
 def int_mips_bclri_b : GCCBuiltin<"__builtin_msa_bclri_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_bclri_h : GCCBuiltin<"__builtin_msa_bclri_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_bclri_w : GCCBuiltin<"__builtin_msa_bclri_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_bclri_d : GCCBuiltin<"__builtin_msa_bclri_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_binsl_b : GCCBuiltin<"__builtin_msa_binsl_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
@@ -584,16 +583,16 @@ def int_mips_binsl_d : GCCBuiltin<"__builtin_msa_binsl_d">,
 
 def int_mips_binsli_b : GCCBuiltin<"__builtin_msa_binsli_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<2>]>;
 def int_mips_binsli_h : GCCBuiltin<"__builtin_msa_binsli_h">,
   Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<2>]>;
 def int_mips_binsli_w : GCCBuiltin<"__builtin_msa_binsli_w">,
   Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<2>]>;
 def int_mips_binsli_d : GCCBuiltin<"__builtin_msa_binsli_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<2>]>;
 
 def int_mips_binsr_b : GCCBuiltin<"__builtin_msa_binsr_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
@@ -610,16 +609,16 @@ def int_mips_binsr_d : GCCBuiltin<"__builtin_msa_binsr_d">,
 
 def int_mips_binsri_b : GCCBuiltin<"__builtin_msa_binsri_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<2>]>;
 def int_mips_binsri_h : GCCBuiltin<"__builtin_msa_binsri_h">,
   Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<2>]>;
 def int_mips_binsri_w : GCCBuiltin<"__builtin_msa_binsri_w">,
   Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<2>]>;
 def int_mips_binsri_d : GCCBuiltin<"__builtin_msa_binsri_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<2>]>;
 
 def int_mips_bmnz_v : GCCBuiltin<"__builtin_msa_bmnz_v">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
@@ -627,7 +626,7 @@ def int_mips_bmnz_v : GCCBuiltin<"__builtin_msa_bmnz_v">,
 
 def int_mips_bmnzi_b : GCCBuiltin<"__builtin_msa_bmnzi_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<2>]>;
 
 def int_mips_bmz_v : GCCBuiltin<"__builtin_msa_bmz_v">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
@@ -635,7 +634,7 @@ def int_mips_bmz_v : GCCBuiltin<"__builtin_msa_bmz_v">,
 
 def int_mips_bmzi_b : GCCBuiltin<"__builtin_msa_bmzi_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<2>]>;
 
 def int_mips_bneg_b : GCCBuiltin<"__builtin_msa_bneg_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -647,13 +646,13 @@ def int_mips_bneg_d : GCCBuiltin<"__builtin_msa_bneg_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
 
 def int_mips_bnegi_b : GCCBuiltin<"__builtin_msa_bnegi_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_bnegi_h : GCCBuiltin<"__builtin_msa_bnegi_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_bnegi_w : GCCBuiltin<"__builtin_msa_bnegi_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_bnegi_d : GCCBuiltin<"__builtin_msa_bnegi_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_bnz_b : GCCBuiltin<"__builtin_msa_bnz_b">,
   Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
@@ -673,7 +672,7 @@ def int_mips_bsel_v : GCCBuiltin<"__builtin_msa_bsel_v">,
 
 def int_mips_bseli_b : GCCBuiltin<"__builtin_msa_bseli_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<2>]>;
 
 def int_mips_bset_b : GCCBuiltin<"__builtin_msa_bset_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -685,13 +684,13 @@ def int_mips_bset_d : GCCBuiltin<"__builtin_msa_bset_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
 
 def int_mips_bseti_b : GCCBuiltin<"__builtin_msa_bseti_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_bseti_h : GCCBuiltin<"__builtin_msa_bseti_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_bseti_w : GCCBuiltin<"__builtin_msa_bseti_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_bseti_d : GCCBuiltin<"__builtin_msa_bseti_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_bz_b : GCCBuiltin<"__builtin_msa_bz_b">,
   Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
@@ -715,16 +714,16 @@ def int_mips_ceq_d : GCCBuiltin<"__builtin_msa_ceq_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
 
 def int_mips_ceqi_b : GCCBuiltin<"__builtin_msa_ceqi_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_ceqi_h : GCCBuiltin<"__builtin_msa_ceqi_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_ceqi_w : GCCBuiltin<"__builtin_msa_ceqi_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_ceqi_d : GCCBuiltin<"__builtin_msa_ceqi_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_cfcmsa : GCCBuiltin<"__builtin_msa_cfcmsa">,
-  Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [ImmArg<0>]>;
 
 def int_mips_cle_s_b : GCCBuiltin<"__builtin_msa_cle_s_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -745,22 +744,22 @@ def int_mips_cle_u_d : GCCBuiltin<"__builtin_msa_cle_u_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
 
 def int_mips_clei_s_b : GCCBuiltin<"__builtin_msa_clei_s_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_clei_s_h : GCCBuiltin<"__builtin_msa_clei_s_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_clei_s_w : GCCBuiltin<"__builtin_msa_clei_s_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_clei_s_d : GCCBuiltin<"__builtin_msa_clei_s_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_clei_u_b : GCCBuiltin<"__builtin_msa_clei_u_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_clei_u_h : GCCBuiltin<"__builtin_msa_clei_u_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_clei_u_w : GCCBuiltin<"__builtin_msa_clei_u_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_clei_u_d : GCCBuiltin<"__builtin_msa_clei_u_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_clt_s_b : GCCBuiltin<"__builtin_msa_clt_s_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -781,22 +780,22 @@ def int_mips_clt_u_d : GCCBuiltin<"__builtin_msa_clt_u_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
 
 def int_mips_clti_s_b : GCCBuiltin<"__builtin_msa_clti_s_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_clti_s_h : GCCBuiltin<"__builtin_msa_clti_s_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_clti_s_w : GCCBuiltin<"__builtin_msa_clti_s_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_clti_s_d : GCCBuiltin<"__builtin_msa_clti_s_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_clti_u_b : GCCBuiltin<"__builtin_msa_clti_u_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_clti_u_h : GCCBuiltin<"__builtin_msa_clti_u_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_clti_u_w : GCCBuiltin<"__builtin_msa_clti_u_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_clti_u_d : GCCBuiltin<"__builtin_msa_clti_u_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_copy_s_b : GCCBuiltin<"__builtin_msa_copy_s_b">,
   Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
@@ -817,7 +816,7 @@ def int_mips_copy_u_d : GCCBuiltin<"__builtin_msa_copy_u_d">,
   Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
 
 def int_mips_ctcmsa : GCCBuiltin<"__builtin_msa_ctcmsa">,
-  Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>;
+  Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>]>;
 
 def int_mips_div_s_b : GCCBuiltin<"__builtin_msa_div_s_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -1245,41 +1244,41 @@ def int_mips_insert_d : GCCBuiltin<"__builtin_msa_insert_d">,
 def int_mips_insve_b : GCCBuiltin<"__builtin_msa_insve_b">,
   Intrinsic<[llvm_v16i8_ty],
             [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<1>]>;
 def int_mips_insve_h : GCCBuiltin<"__builtin_msa_insve_h">,
   Intrinsic<[llvm_v8i16_ty],
             [llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<1>]>;
 def int_mips_insve_w : GCCBuiltin<"__builtin_msa_insve_w">,
   Intrinsic<[llvm_v4i32_ty],
             [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<1>]>;
 def int_mips_insve_d : GCCBuiltin<"__builtin_msa_insve_d">,
   Intrinsic<[llvm_v2i64_ty],
             [llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_ld_b : GCCBuiltin<"__builtin_msa_ld_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly]>;
+  [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>;
 def int_mips_ld_h : GCCBuiltin<"__builtin_msa_ld_h">,
   Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly]>;
+  [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>;
 def int_mips_ld_w : GCCBuiltin<"__builtin_msa_ld_w">,
   Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly]>;
+  [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>;
 def int_mips_ld_d : GCCBuiltin<"__builtin_msa_ld_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly]>;
+  [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>;
 
 def int_mips_ldi_b : GCCBuiltin<"__builtin_msa_ldi_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
 def int_mips_ldi_h : GCCBuiltin<"__builtin_msa_ldi_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
 def int_mips_ldi_w : GCCBuiltin<"__builtin_msa_ldi_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
 def int_mips_ldi_d : GCCBuiltin<"__builtin_msa_ldi_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
 
 // This instruction is part of the MSA spec but it does not share the
 // __builtin_msa prefix because it operates on the GPR registers.
@@ -1342,22 +1341,22 @@ def int_mips_max_u_d : GCCBuiltin<"__builtin_msa_max_u_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
 
 def int_mips_maxi_s_b : GCCBuiltin<"__builtin_msa_maxi_s_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_maxi_s_h : GCCBuiltin<"__builtin_msa_maxi_s_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_maxi_s_w : GCCBuiltin<"__builtin_msa_maxi_s_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_maxi_s_d : GCCBuiltin<"__builtin_msa_maxi_s_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_maxi_u_b : GCCBuiltin<"__builtin_msa_maxi_u_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_maxi_u_h : GCCBuiltin<"__builtin_msa_maxi_u_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_maxi_u_w : GCCBuiltin<"__builtin_msa_maxi_u_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_maxi_u_d : GCCBuiltin<"__builtin_msa_maxi_u_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_min_a_b : GCCBuiltin<"__builtin_msa_min_a_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -1387,22 +1386,22 @@ def int_mips_min_u_d : GCCBuiltin<"__builtin_msa_min_u_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
 
 def int_mips_mini_s_b : GCCBuiltin<"__builtin_msa_mini_s_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_mini_s_h : GCCBuiltin<"__builtin_msa_mini_s_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_mini_s_w : GCCBuiltin<"__builtin_msa_mini_s_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_mini_s_d : GCCBuiltin<"__builtin_msa_mini_s_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_mini_u_b : GCCBuiltin<"__builtin_msa_mini_u_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_mini_u_h : GCCBuiltin<"__builtin_msa_mini_u_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_mini_u_w : GCCBuiltin<"__builtin_msa_mini_u_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_mini_u_d : GCCBuiltin<"__builtin_msa_mini_u_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_mod_s_b : GCCBuiltin<"__builtin_msa_mod_s_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -1493,13 +1492,13 @@ def int_mips_nor_v : GCCBuiltin<"__builtin_msa_nor_v">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
 
 def int_mips_nori_b : GCCBuiltin<"__builtin_msa_nori_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_or_v : GCCBuiltin<"__builtin_msa_or_v">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
 
 def int_mips_ori_b : GCCBuiltin<"__builtin_msa_ori_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_pckev_b : GCCBuiltin<"__builtin_msa_pckev_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -1529,29 +1528,29 @@ def int_mips_pcnt_d : GCCBuiltin<"__builtin_msa_pcnt_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
 
 def int_mips_sat_s_b : GCCBuiltin<"__builtin_msa_sat_s_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_sat_s_h : GCCBuiltin<"__builtin_msa_sat_s_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_sat_s_w : GCCBuiltin<"__builtin_msa_sat_s_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_sat_s_d : GCCBuiltin<"__builtin_msa_sat_s_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_sat_u_b : GCCBuiltin<"__builtin_msa_sat_u_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_sat_u_h : GCCBuiltin<"__builtin_msa_sat_u_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_sat_u_w : GCCBuiltin<"__builtin_msa_sat_u_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_sat_u_d : GCCBuiltin<"__builtin_msa_sat_u_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_shf_b : GCCBuiltin<"__builtin_msa_shf_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_shf_h : GCCBuiltin<"__builtin_msa_shf_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_shf_w : GCCBuiltin<"__builtin_msa_shf_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_sld_b : GCCBuiltin<"__builtin_msa_sld_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
@@ -1564,16 +1563,16 @@ def int_mips_sld_d : GCCBuiltin<"__builtin_msa_sld_d">,
 
 def int_mips_sldi_b : GCCBuiltin<"__builtin_msa_sldi_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<2>]>;
 def int_mips_sldi_h : GCCBuiltin<"__builtin_msa_sldi_h">,
   Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<2>]>;
 def int_mips_sldi_w : GCCBuiltin<"__builtin_msa_sldi_w">,
   Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<2>]>;
 def int_mips_sldi_d : GCCBuiltin<"__builtin_msa_sldi_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
-            [IntrNoMem]>;
+            [IntrNoMem, ImmArg<2>]>;
 
 def int_mips_sll_b : GCCBuiltin<"__builtin_msa_sll_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -1585,13 +1584,13 @@ def int_mips_sll_d : GCCBuiltin<"__builtin_msa_sll_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
 
 def int_mips_slli_b : GCCBuiltin<"__builtin_msa_slli_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_slli_h : GCCBuiltin<"__builtin_msa_slli_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_slli_w : GCCBuiltin<"__builtin_msa_slli_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_slli_d : GCCBuiltin<"__builtin_msa_slli_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_splat_b : GCCBuiltin<"__builtin_msa_splat_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
@@ -1603,13 +1602,13 @@ def int_mips_splat_d : GCCBuiltin<"__builtin_msa_splat_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
 
 def int_mips_splati_b : GCCBuiltin<"__builtin_msa_splati_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_splati_h : GCCBuiltin<"__builtin_msa_splati_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_splati_w : GCCBuiltin<"__builtin_msa_splati_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_splati_d : GCCBuiltin<"__builtin_msa_splati_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_sra_b : GCCBuiltin<"__builtin_msa_sra_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -1621,13 +1620,13 @@ def int_mips_sra_d : GCCBuiltin<"__builtin_msa_sra_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
 
 def int_mips_srai_b : GCCBuiltin<"__builtin_msa_srai_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_srai_h : GCCBuiltin<"__builtin_msa_srai_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_srai_w : GCCBuiltin<"__builtin_msa_srai_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_srai_d : GCCBuiltin<"__builtin_msa_srai_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_srar_b : GCCBuiltin<"__builtin_msa_srar_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -1639,13 +1638,13 @@ def int_mips_srar_d : GCCBuiltin<"__builtin_msa_srar_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
 
 def int_mips_srari_b : GCCBuiltin<"__builtin_msa_srari_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_srari_h : GCCBuiltin<"__builtin_msa_srari_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_srari_w : GCCBuiltin<"__builtin_msa_srari_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_srari_d : GCCBuiltin<"__builtin_msa_srari_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_srl_b : GCCBuiltin<"__builtin_msa_srl_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -1657,13 +1656,13 @@ def int_mips_srl_d : GCCBuiltin<"__builtin_msa_srl_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
 
 def int_mips_srli_b : GCCBuiltin<"__builtin_msa_srli_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_srli_h : GCCBuiltin<"__builtin_msa_srli_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_srli_w : GCCBuiltin<"__builtin_msa_srli_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_srli_d : GCCBuiltin<"__builtin_msa_srli_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_srlr_b : GCCBuiltin<"__builtin_msa_srlr_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -1675,26 +1674,26 @@ def int_mips_srlr_d : GCCBuiltin<"__builtin_msa_srlr_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
 
 def int_mips_srlri_b : GCCBuiltin<"__builtin_msa_srlri_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_srlri_h : GCCBuiltin<"__builtin_msa_srlri_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_srlri_w : GCCBuiltin<"__builtin_msa_srlri_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_srlri_d : GCCBuiltin<"__builtin_msa_srlri_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_st_b : GCCBuiltin<"__builtin_msa_st_b">,
   Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty],
-  [IntrArgMemOnly]>;
+  [IntrArgMemOnly, ImmArg<2>]>;
 def int_mips_st_h : GCCBuiltin<"__builtin_msa_st_h">,
   Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty, llvm_i32_ty],
-  [IntrArgMemOnly]>;
+  [IntrArgMemOnly, ImmArg<2>]>;
 def int_mips_st_w : GCCBuiltin<"__builtin_msa_st_w">,
   Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty],
-  [IntrArgMemOnly]>;
+  [IntrArgMemOnly, ImmArg<2>]>;
 def int_mips_st_d : GCCBuiltin<"__builtin_msa_st_d">,
   Intrinsic<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty],
-  [IntrArgMemOnly]>;
+  [IntrArgMemOnly, ImmArg<2>]>;
 
 def int_mips_subs_s_b : GCCBuiltin<"__builtin_msa_subs_s_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -1742,13 +1741,13 @@ def int_mips_subv_d : GCCBuiltin<"__builtin_msa_subv_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
 
 def int_mips_subvi_b : GCCBuiltin<"__builtin_msa_subvi_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_subvi_h : GCCBuiltin<"__builtin_msa_subvi_h">,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_subvi_w : GCCBuiltin<"__builtin_msa_subvi_w">,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_mips_subvi_d : GCCBuiltin<"__builtin_msa_subvi_d">,
-  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_vshf_b : GCCBuiltin<"__builtin_msa_vshf_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
@@ -1767,5 +1766,5 @@ def int_mips_xor_v : GCCBuiltin<"__builtin_msa_xor_v">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
 
 def int_mips_xori_b : GCCBuiltin<"__builtin_msa_xori_b">,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 }
diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td
index 7f694f68969e..dba7dd76c4ff 100644
--- a/include/llvm/IR/IntrinsicsNVVM.td
+++ b/include/llvm/IR/IntrinsicsNVVM.td
@@ -1,9 +1,8 @@
 //===- IntrinsicsNVVM.td - Defines NVVM intrinsics ---------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -38,6 +37,245 @@ def llvm_anyi64ptr_ty     : LLVMAnyPointerType<llvm_i64_ty>;     // (space)i64*
 // MISC
 //
 
+// Helper class for construction of n-element list<LLVMtype> [t,t,...,t]
+class RepLLVMType<int N, LLVMType T> {
+  list<LLVMType> ret = !if(N, !listconcat(RepLLVMType<!add(N,-1), T>.ret, [T]), []);
+}
+
+// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
+// Geom: m<M>n<N>k<K>. E.g. m8n32k16
+// Frag: [abcd]
+// PtxEltType: PTX type for the element.
+class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
+  string geom = Geom;
+  string frag = Frag;
+  string ptx_elt_type = PtxEltType;
+  string gft = Geom#":"#Frag#":"#ptx_elt_type;
+  string ft = frag#":"#ptx_elt_type;
+  list<LLVMType> regs = !cond(
+    // fp16 -> fp16/fp32 @  m16n16k16/m8n32k16/m32n8k16
+    // All currently supported geometries use the same fragment format,
+    // so we only need to consider {fragment, type}.
+    !eq(ft,"a:f16") : RepLLVMType<8, llvm_v2f16_ty>.ret,
+    !eq(ft,"b:f16") : RepLLVMType<8, llvm_v2f16_ty>.ret,
+    !eq(ft,"c:f16") : RepLLVMType<4, llvm_v2f16_ty>.ret,
+    !eq(ft,"d:f16") : RepLLVMType<4, llvm_v2f16_ty>.ret,
+    !eq(ft,"c:f32") : RepLLVMType<8, llvm_float_ty>.ret,
+    !eq(ft,"d:f32") : RepLLVMType<8, llvm_float_ty>.ret,
+
+    // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
+    !eq(gft,"m16n16k16:a:u8") : RepLLVMType<2, llvm_i32_ty>.ret,
+    !eq(gft,"m16n16k16:a:s8") : RepLLVMType<2, llvm_i32_ty>.ret,
+    !eq(gft,"m16n16k16:b:u8") : RepLLVMType<2, llvm_i32_ty>.ret,
+    !eq(gft,"m16n16k16:b:s8") : RepLLVMType<2, llvm_i32_ty>.ret,
+    !eq(gft,"m16n16k16:c:s32") : RepLLVMType<8, llvm_i32_ty>.ret,
+    !eq(gft,"m16n16k16:d:s32") : RepLLVMType<8, llvm_i32_ty>.ret,
+
+    !eq(gft,"m8n32k16:a:u8") : [llvm_i32_ty],
+    !eq(gft,"m8n32k16:a:s8") : [llvm_i32_ty],
+    !eq(gft,"m8n32k16:b:u8") : RepLLVMType<4, llvm_i32_ty>.ret,
+    !eq(gft,"m8n32k16:b:s8") : RepLLVMType<4, llvm_i32_ty>.ret,
+    !eq(gft,"m8n32k16:c:s32") : RepLLVMType<8, llvm_i32_ty>.ret,
+    !eq(gft,"m8n32k16:d:s32") : RepLLVMType<8, llvm_i32_ty>.ret,
+
+    !eq(gft,"m32n8k16:a:u8") : RepLLVMType<4, llvm_i32_ty>.ret,
+    !eq(gft,"m32n8k16:a:s8") : RepLLVMType<4, llvm_i32_ty>.ret,
+    !eq(gft,"m32n8k16:b:u8") : [llvm_i32_ty],
+    !eq(gft,"m32n8k16:b:s8") : [llvm_i32_ty],
+    !eq(gft,"m32n8k16:c:s32") : RepLLVMType<8, llvm_i32_ty>.ret,
+    !eq(gft,"m32n8k16:d:s32") : RepLLVMType<8, llvm_i32_ty>.ret,
+
+    // u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1)
+    !eq(gft,"m8n8k128:a:b1") : [llvm_i32_ty],
+    !eq(gft,"m8n8k32:a:u4") : [llvm_i32_ty],
+    !eq(gft,"m8n8k32:a:s4") : [llvm_i32_ty],
+    !eq(gft,"m8n8k128:b:b1") : [llvm_i32_ty],
+    !eq(gft,"m8n8k32:b:u4") : [llvm_i32_ty],
+    !eq(gft,"m8n8k32:b:s4") : [llvm_i32_ty],
+    !eq(gft,"m8n8k128:c:s32") : RepLLVMType<2, llvm_i32_ty>.ret,
+    !eq(gft,"m8n8k128:d:s32") : RepLLVMType<2, llvm_i32_ty>.ret,
+    !eq(gft,"m8n8k32:c:s32") : RepLLVMType<2, llvm_i32_ty>.ret,
+    !eq(gft,"m8n8k32:d:s32") : RepLLVMType<2, llvm_i32_ty>.ret,
+  );
+}
+
+class WMMA_NAME_LDST<string Op, WMMA_REGS Frag, string Layout, int WithStride> {
+  string intr = "llvm.nvvm.wmma."
+                # Frag.geom
+                # "." # Op
+                # "." # Frag.frag
+                # "." # Layout
+                # !if(WithStride, ".stride", "")
+                # "." # Frag.ptx_elt_type
+                ;
+  // TODO(tra): record name should ideally use the same field order as the intrinsic.
+  // E.g. string record = !subst("llvm", "int",
+  //                      !subst(".", "_", llvm));
+  string record = "int_nvvm_wmma_"
+                # Frag.geom
+                # "_" # Op
+                # "_" # Frag.frag
+                # "_" # Frag.ptx_elt_type
+                # "_" # Layout
+                # !if(WithStride, "_stride", "");
+}
+
+class MMA_SIGNATURE<WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> {
+  list<WMMA_REGS> id_frags = !cond(
+     // int and sub-int ops are identified by input type.
+     !eq(A.ptx_elt_type, "s8") : [A],
+     !eq(A.ptx_elt_type, "u8") : [A],
+     !eq(A.ptx_elt_type, "s4") : [A],
+     !eq(A.ptx_elt_type, "u4") : [A],
+     !eq(A.ptx_elt_type, "b1") : [A],
+     // the rest are FP ops identified by accumulator & result type.
+     1: [D, C]
+     );
+   string ret = !foldl("", id_frags, a, b, !strconcat(a, ".", b.ptx_elt_type));
+}
+
+class WMMA_NAME_MMA<string ALayout, string BLayout, int Satfinite,
+                    WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> {
+  string signature = MMA_SIGNATURE<A, B, C, D>.ret;
+  string llvm = "llvm.nvvm.wmma."
+                # A.geom
+                # ".mma"
+                # "." # ALayout
+                # "." # BLayout
+                # signature
+                # !if(Satfinite, ".satfinite", "");
+
+  string record = !subst(".", "_",
+                  !subst("llvm.", "int_", llvm));
+}
+
+// Generates list of 4-tuples of WMMA_REGS representing a valid MMA op.
+//   Geom: list of supported geometries.
+//   TypeN: PTX type of the corresponding fragment's element.
+//   TypeB and TypeD may be empty if it must match that of TypeA or TypeC.
+class MMA_OPS<list<string> Geom, list<string> TypeA, list<string> TypeB,
+            list<string> TypeC, list<string> TypeD> {
+  list<list<WMMA_REGS>> ret =
+     !foldl([]<list<WMMA_REGS>>, Geom, t1, geom, !listconcat(t1,
+     !foldl([]<list<WMMA_REGS>>, TypeA, t2, type_a, !listconcat(t2,
+     !foldl([]<list<WMMA_REGS>>, !if(!size(TypeB), TypeB, [type_a]), t3, type_b, !listconcat(t3,
+     !foldl([]<list<WMMA_REGS>>, TypeC, t4, type_c, !listconcat(t4,
+     !foldl([]<list<WMMA_REGS>>, !if(!size(TypeC), TypeC, [type_c]), t5, type_d, !listconcat(t5,
+            [[WMMA_REGS<geom, "a", type_a>,
+              WMMA_REGS<geom, "b", type_b>,
+              WMMA_REGS<geom, "c", type_c>,
+              WMMA_REGS<geom, "d", type_d>]]))))))))));
+   // Debugging aid for readable representation of the list above.
+   list<list<string>> ops = !foreach(x, ret, [x[0].gft, x[1].gft, x[2].gft, x[3].gft]);
+}
+
+class MMA_LDST_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
+  list<WMMA_REGS> ret =
+     !foldl([]<WMMA_REGS>, Geom, t1, geom, !listconcat(t1,
+     !foldl([]<WMMA_REGS>, Frags, t2, frag, !listconcat(t2,
+     !foldl([]<WMMA_REGS>, Types, t3, type, !listconcat(t3,
+            [WMMA_REGS<geom, frag, type>]))))));
+   // Debugging aid for readable representation of the list above.
+   list<string> ops = !foreach(x, ret, x.gft);
+}
+
+
+
+// Creates list of valid combinations of fragments. This is the master list that
+// drives generation of corresponding intrinsics and instructions.
+class NVVM_MMA_OPS<int _ = 0> {
+  list<list<WMMA_REGS>> fp_mma_ops = MMA_OPS<
+            ["m16n16k16", "m32n8k16", "m8n32k16"],
+            ["f16"], [], ["f16", "f32"], ["f16", "f32"]>.ret;
+  list<list<WMMA_REGS>> int_mma_ops = MMA_OPS<
+            ["m16n16k16", "m32n8k16", "m8n32k16"],
+            ["s8", "u8"], [], ["s32"], []>.ret;
+  list<list<WMMA_REGS>> subint_mma_ops = MMA_OPS<
+            ["m8n8k32"],
+            ["s4", "u4"], [], ["s32"], []>.ret;
+  list<list<WMMA_REGS>> bit_mma_ops = MMA_OPS<
+            ["m8n8k128"],
+            ["b1"], [], ["s32"], []>.ret;
+  list<list<WMMA_REGS>> all_mma_ops = !listconcat(fp_mma_ops, int_mma_ops,
+                                                  subint_mma_ops, bit_mma_ops);
+
+  list<WMMA_REGS> ldst_ab_ops = MMA_LDST_OPS<
+            ["m16n16k16", "m32n8k16", "m8n32k16"],
+            ["a", "b"], ["f16", "u8", "s8"]>.ret;
+  list<WMMA_REGS> ldst_cd_ops = MMA_LDST_OPS<
+            ["m16n16k16", "m32n8k16", "m8n32k16"],
+            ["c", "d"], ["f16", "f32", "s32"]>.ret;
+  list<WMMA_REGS> ldst_subint_ab_ops = MMA_LDST_OPS<
+            ["m8n8k32"], ["a", "b"], ["s4","u4"]>.ret;
+  list<WMMA_REGS> ldst_bit_ab_ops = MMA_LDST_OPS<
+            ["m8n8k128"], ["a", "b"], ["b1"]>.ret;
+  list<WMMA_REGS> ldst_subint_cd_ops = MMA_LDST_OPS<
+            ["m8n8k32", "m8n8k128"],  ["c", "d"], ["s32"]>.ret;
+  list<WMMA_REGS> all_ldst_ops = !listconcat(ldst_ab_ops, ldst_cd_ops,
+                                             ldst_subint_ab_ops,
+                                             ldst_bit_ab_ops,
+                                             ldst_subint_cd_ops);
+  // Separate A/B/C fragments (loads) from D (stores).
+  list<WMMA_REGS> all_ld_ops = !foldl([]<WMMA_REGS>, all_ldst_ops, a, b,
+                                      !listconcat(a, !if(!eq(b.frag,"d"), [],[b])));
+  list<WMMA_REGS> all_st_ops = !foldl([]<WMMA_REGS>, all_ldst_ops, a, b,
+                                      !listconcat(a, !if(!eq(b.frag,"d"), [b],[])));
+}
+
+def NVVM_MMA_OPS : NVVM_MMA_OPS;
+
+// Returns [1] if this combination of layout/satf is supported, [] otherwise.
+// MMA ops must provide all parameters. Loads and stores -- only frags and layout_a.
+// The class is used to prevent generation of records for the unsupported variants.
+// E.g.
+// foreach _ = NVVM_MMA_SUPPORTED<...>.ret in =
+//   def : FOO<>; // The record will only be defined for supported ops.
+//
+class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b="-", int satf=-1> {
+  // MMA ops check both layouts.
+  string mma = frags[0].ptx_elt_type
+               # ":" # layout_a
+               # ":" # layout_b;
+  // Load ops only need type/fragment/layout.
+  string ld = frags[0].ptx_elt_type
+               # ":" # frags[0].frag
+               # ":" # layout_a
+               ;
+  string ldf = frags[0].ptx_elt_type
+               # ":" # frags[0].frag
+               ;
+  string t = frags[0].ptx_elt_type;
+  list<int> ret = !cond(
+    // Sub-int MMA only supports fixed A/B layout.
+    // b1 does not support .satf.
+    !eq(mma#":"#satf, "b1:row:col:0") : [1],
+    !eq(mma, "s4:row:col") : [1],
+    !eq(mma, "u4:row:col") : [1],
+    !eq(mma, "s4:row:col") : [1],
+    !eq(mma, "u4:row:col") : [1],
+    // Sub-int load/stores have fixed layout for A and B.
+    !and(!eq(layout_b, "-"), // It's a Load or Store op
+         !or(!eq(ld,  "b1:a:row"),
+             !eq(ld,  "b1:b:col"),
+             !eq(ldf, "b1:c"),
+             !eq(ldf, "b1:d"),
+             !eq(ld, "s4:a:row"),
+             !eq(ld, "s4:b:col"),
+             !eq(ldf, "s4:c"),
+             !eq(ldf, "s4:d"),
+             !eq(ld, "u4:a:row"),
+             !eq(ld, "u4:b:col"),
+             !eq(ldf, "u4:c"),
+             !eq(ldf, "u4:d"))) : [1],
+    // All other sub-int ops are not supported.
+    !eq(t, "b1") : [],
+    !eq(t, "s4") : [],
+    !eq(t, "u4") : [],
+    // All other (non sub-int) are OK.
+    1: [1]
+  );
+}
+
 let TargetPrefix = "nvvm" in {
   def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">,
       Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
@@ -689,14 +927,6 @@ let TargetPrefix = "nvvm" in {
                 [IntrNoMem]>;
 
 // Atomics not available as llvm intrinsics.
-  def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty],
-          [LLVMAnyPointerType<llvm_float_ty>, llvm_float_ty],
-                                      [IntrArgMemOnly, NoCapture<0>]>;
-  // Atomic add of f64 requires sm_60.
-  def int_nvvm_atomic_load_add_f64 : Intrinsic<[llvm_double_ty],
-          [LLVMAnyPointerType<llvm_double_ty>, llvm_double_ty],
-                                      [IntrArgMemOnly, NoCapture<0>]>;
-
   def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty],
           [LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
                                       [IntrArgMemOnly, NoCapture<0>]>;
@@ -3674,11 +3904,19 @@ multiclass PTXReadSRegIntrinsic_v4i32<string regname> {
 class PTXReadSRegIntrinsic_r32<string name>
   : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
     GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
-
 class PTXReadSRegIntrinsic_r64<string name>
   : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
     GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
 
+// Intrinsics to read registers with non-constant values. E.g. the values that
+// do change over the kernel lifetime. Such reads should not be CSE'd.
+class PTXReadNCSRegIntrinsic_r32<string name>
+  : Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>,
+    GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
+class PTXReadNCSRegIntrinsic_r64<string name>
+  : Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly]>,
+    GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
+
 defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">;
 defm int_nvvm_read_ptx_sreg_ntid : PTXReadSRegIntrinsic_v4i32<"ntid">;
 
@@ -3704,13 +3942,13 @@ def int_nvvm_read_ptx_sreg_lanemask_ge :
 def int_nvvm_read_ptx_sreg_lanemask_gt :
     PTXReadSRegIntrinsic_r32<"lanemask_gt">;
 
-def int_nvvm_read_ptx_sreg_clock : PTXReadSRegIntrinsic_r32<"clock">;
-def int_nvvm_read_ptx_sreg_clock64 : PTXReadSRegIntrinsic_r64<"clock64">;
+def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic_r32<"clock">;
+def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic_r64<"clock64">;
 
-def int_nvvm_read_ptx_sreg_pm0 : PTXReadSRegIntrinsic_r32<"pm0">;
-def int_nvvm_read_ptx_sreg_pm1 : PTXReadSRegIntrinsic_r32<"pm1">;
-def int_nvvm_read_ptx_sreg_pm2 : PTXReadSRegIntrinsic_r32<"pm2">;
-def int_nvvm_read_ptx_sreg_pm3 : PTXReadSRegIntrinsic_r32<"pm3">;
+def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic_r32<"pm0">;
+def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic_r32<"pm1">;
+def int_nvvm_read_ptx_sreg_pm2 : PTXReadNCSRegIntrinsic_r32<"pm2">;
+def int_nvvm_read_ptx_sreg_pm3 : PTXReadNCSRegIntrinsic_r32<"pm3">;
 
 def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;
 
@@ -3882,166 +4120,59 @@ def int_nvvm_match_all_sync_i64p :
 //
 // WMMA instructions
 //
-
 // WMMA.LOAD
-class NVVM_WMMA_LD_GALSTS<string Geometry, string Abc, string Layout,
-                          string Type, LLVMType regty, int WithStride>
-  : Intrinsic<!if(!eq(Abc#Type,"cf16"),
-                  [regty, regty, regty, regty],
-                  [regty, regty, regty, regty,
-                   regty, regty, regty, regty]),
+class NVVM_WMMA_LD<WMMA_REGS Frag, string Layout, int WithStride>
+  : Intrinsic<Frag.regs,
               !if(WithStride, [llvm_anyptr_ty, llvm_i32_ty], [llvm_anyptr_ty]),
               [IntrReadMem, IntrArgMemOnly, ReadOnly<0>, NoCapture<0>],
-              "llvm.nvvm.wmma."
-                # Geometry
-                # ".load"
-                # "." # Abc
-                # "." # Layout
-                # !if(WithStride, ".stride", "")
-                # "." # Type>;
-
-multiclass NVVM_WMMA_LD_GALT<string Geometry, string Abc, string Layout,
-                             string Type, LLVMType regty> {
-  def _stride: NVVM_WMMA_LD_GALSTS<Geometry, Abc, Layout, Type, regty, 1>;
-  def NAME   : NVVM_WMMA_LD_GALSTS<Geometry, Abc, Layout, Type, regty, 0>;
-}
-
-multiclass NVVM_WMMA_LD_GAT<string Geometry, string Abc,
-                           string Type, LLVMType regty> {
-  defm _row: NVVM_WMMA_LD_GALT<Geometry, Abc, "row", Type, regty>;
-  defm _col: NVVM_WMMA_LD_GALT<Geometry, Abc, "col", Type, regty>;
-}
-
-multiclass NVVM_WMMA_LD_G<string Geometry> {
-  defm _a_f16: NVVM_WMMA_LD_GAT<Geometry, "a", "f16", llvm_v2f16_ty>;
-  defm _b_f16: NVVM_WMMA_LD_GAT<Geometry, "b", "f16", llvm_v2f16_ty>;
-  defm _c_f16: NVVM_WMMA_LD_GAT<Geometry, "c", "f16", llvm_v2f16_ty>;
-  defm _c_f32: NVVM_WMMA_LD_GAT<Geometry, "c", "f32", llvm_float_ty>;
-}
-
-multiclass NVVM_WMMA_LD {
-  defm _m32n8k16_load: NVVM_WMMA_LD_G<"m32n8k16">;
-  defm _m16n16k16_load: NVVM_WMMA_LD_G<"m16n16k16">;
-  defm _m8n32k16_load: NVVM_WMMA_LD_G<"m8n32k16">;
-}
-
-defm int_nvvm_wmma: NVVM_WMMA_LD;
+              WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.intr>;
 
 // WMMA.STORE.D
-class NVVM_WMMA_STD_GLSTS<string Geometry, string Layout,
-                          string Type, LLVMType regty, int WithStride,
-                          // This is only used to create a typed empty array we
-                          // need to pass to !if below.
-                          list<LLVMType>Empty=[]>
+class NVVM_WMMA_ST<WMMA_REGS Frag, string Layout, int WithStride>
   : Intrinsic<[],
               !listconcat(
                 [llvm_anyptr_ty],
-                !if(!eq(Type,"f16"),
-                    [regty, regty, regty, regty],
-                    [regty, regty, regty, regty,
-                     regty, regty, regty, regty]),
-                !if(WithStride, [llvm_i32_ty], Empty)),
+                Frag.regs,
+                !if(WithStride, [llvm_i32_ty], [])),
               [IntrWriteMem, IntrArgMemOnly, WriteOnly<0>, NoCapture<0>],
-              "llvm.nvvm.wmma."
-                   # Geometry
-                   # ".store.d"
-                   # "." # Layout
-                   # !if(WithStride, ".stride", "")
-                   # "." # Type>;
-
-multiclass NVVM_WMMA_STD_GLT<string Geometry, string Layout,
-                             string Type, LLVMType regty> {
-  def _stride: NVVM_WMMA_STD_GLSTS<Geometry, Layout, Type, regty, 1>;
-  def NAME:    NVVM_WMMA_STD_GLSTS<Geometry, Layout, Type, regty, 0>;
-}
-
-multiclass NVVM_WMMA_STD_GT<string Geometry, string Type, LLVMType regty> {
-  defm _row: NVVM_WMMA_STD_GLT<Geometry, "row", Type, regty>;
-  defm _col: NVVM_WMMA_STD_GLT<Geometry, "col", Type, regty>;
-}
-multiclass NVVM_WMMA_STD_G<string Geometry> {
-  defm _d_f16: NVVM_WMMA_STD_GT<Geometry, "f16", llvm_v2f16_ty>;
-  defm _d_f32: NVVM_WMMA_STD_GT<Geometry, "f32", llvm_float_ty>;
-}
-
-multiclass NVVM_WMMA_STD {
-  defm _m32n8k16_store:  NVVM_WMMA_STD_G<"m32n8k16">;
-  defm _m16n16k16_store: NVVM_WMMA_STD_G<"m16n16k16">;
-  defm _m8n32k16_store:  NVVM_WMMA_STD_G<"m8n32k16">;
+              WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.intr>;
+
+// Create all load/store variants 
+foreach layout = ["row", "col"] in {
+  foreach stride = [0, 1] in {
+    foreach frag = NVVM_MMA_OPS.all_ld_ops in
+      foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
+        def WMMA_NAME_LDST<"load", frag, layout, stride>.record
+             : NVVM_WMMA_LD<frag, layout, stride>;
+    foreach frag = NVVM_MMA_OPS.all_st_ops in
+      foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
+        def WMMA_NAME_LDST<"store", frag, layout, stride>.record
+             : NVVM_WMMA_ST<frag, layout, stride>;
+  }
 }
 
-defm int_nvvm_wmma: NVVM_WMMA_STD;
-
 // WMMA.MMA
-class NVVM_WMMA_MMA_GABDCS<string Geometry,
-                           string ALayout, string BLayout,
-                           string DType, LLVMType d_regty,
-                           string CType, LLVMType c_regty,
-                           string Satfinite = "">
-  : Intrinsic<!if(!eq(DType,"f16"),
-                      [d_regty, d_regty, d_regty, d_regty],
-                      [d_regty, d_regty, d_regty, d_regty,
-                       d_regty, d_regty, d_regty, d_regty]),
-              !listconcat(
-                [// A
-                llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty,
-                llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty,
-                // B
-                llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty,
-                llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty],
-                !if(!eq(CType,"f16"),
-                      [c_regty, c_regty, c_regty, c_regty],
-                      [c_regty, c_regty, c_regty, c_regty,
-                       c_regty, c_regty, c_regty, c_regty])),
+class NVVM_WMMA_MMA<string ALayout, string BLayout, int Satfinite,
+                    WMMA_REGS A, WMMA_REGS B,
+                    WMMA_REGS C, WMMA_REGS D>
+  : Intrinsic<D.regs,
+              !listconcat(A.regs, B.regs, C.regs),
               [IntrNoMem],
-              "llvm.nvvm.wmma."
-                # Geometry
-                # ".mma"
-                # "." # ALayout
-                # "." # BLayout
-                # "." # DType
-                # "." # CType
-                # Satfinite> {
-}
-
-multiclass NVVM_WMMA_MMA_GABDC<string Geometry, string ALayout, string BLayout,
-                               string DType, LLVMType d_regty,
-                               string CType, LLVMType c_regty> {
-  def NAME : NVVM_WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
-                                  DType, d_regty, CType, c_regty>;
-  def _satfinite: NVVM_WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
-                                       DType, d_regty, CType, c_regty,".satfinite">;
-}
-
-multiclass NVVM_WMMA_MMA_GABD<string Geometry, string ALayout, string BLayout,
-                              string DType, LLVMType d_regty> {
-  defm _f16: NVVM_WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_regty,
-                                "f16", llvm_v2f16_ty>;
-  defm _f32: NVVM_WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_regty,
-                                "f32", llvm_float_ty>;
-}
-
-multiclass NVVM_WMMA_MMA_GAB<string Geometry, string ALayout, string BLayout> {
-  defm _f16: NVVM_WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f16", llvm_v2f16_ty>;
-  defm _f32: NVVM_WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f32", llvm_float_ty>;
-}
-
-multiclass NVVM_WMMA_MMA_GA<string Geometry, string ALayout> {
-  defm _col: NVVM_WMMA_MMA_GAB<Geometry, ALayout, "col">;
-  defm _row: NVVM_WMMA_MMA_GAB<Geometry, ALayout, "row">;
-}
-
-multiclass NVVM_WMMA_MMA_G<string Geometry> {
-  defm _col: NVVM_WMMA_MMA_GA<Geometry, "col">;
-  defm _row: NVVM_WMMA_MMA_GA<Geometry, "row">;
-}
-
-multiclass NVVM_WMMA_MMA {
-  defm _m32n8k16_mma : NVVM_WMMA_MMA_G<"m32n8k16">;
-  defm _m16n16k16_mma : NVVM_WMMA_MMA_G<"m16n16k16">;
-  defm _m8n32k16_mma : NVVM_WMMA_MMA_G<"m8n32k16">;
-}
-
-defm int_nvvm_wmma : NVVM_WMMA_MMA;
+              WMMA_NAME_MMA<ALayout, BLayout, Satfinite, A, B, C, D>.llvm>;
+
+foreach layout_a = ["row", "col"] in {
+  foreach layout_b = ["row", "col"] in {
+    foreach satf = [0, 1] in {
+      foreach op = NVVM_MMA_OPS.all_mma_ops in {
+        foreach _ = NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret in {
+          def WMMA_NAME_MMA<layout_a, layout_b, satf,
+                            op[0], op[1], op[2], op[3]>.record
+            : NVVM_WMMA_MMA<layout_a, layout_b, satf,
+                            op[0], op[1], op[2], op[3]>;
+        }
+      }
+    } // satf
+  } // layout_b
+} // layout_a
 
 } // let TargetPrefix = "nvvm"
diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td
index 62b2e8f77e7d..f87317445753 100644
--- a/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1,9 +1,8 @@
 //===- IntrinsicsPowerPC.td - Defines PowerPC intrinsics ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,7 +18,8 @@
 let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
   // dcba/dcbf/dcbi/dcbst/dcbt/dcbz/dcbzl(PPC970) instructions.
   def int_ppc_dcba  : Intrinsic<[], [llvm_ptr_ty], []>;
-  def int_ppc_dcbf  : Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_ppc_dcbf  : GCCBuiltin<"__builtin_dcbf">,
+                      Intrinsic<[], [llvm_ptr_ty], []>;
   def int_ppc_dcbi  : Intrinsic<[], [llvm_ptr_ty], []>;
   def int_ppc_dcbst : Intrinsic<[], [llvm_ptr_ty], []>;
   def int_ppc_dcbt  : Intrinsic<[], [llvm_ptr_ty],
@@ -610,16 +610,16 @@ let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
   // FP <-> integer conversion.
   def int_ppc_altivec_vcfsx : GCCBuiltin<"__builtin_altivec_vcfsx">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, ImmArg<1>]>;
   def int_ppc_altivec_vcfux : GCCBuiltin<"__builtin_altivec_vcfux">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, ImmArg<1>]>;
   def int_ppc_altivec_vctsxs : GCCBuiltin<"__builtin_altivec_vctsxs">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, ImmArg<1>]>;
   def int_ppc_altivec_vctuxs : GCCBuiltin<"__builtin_altivec_vctuxs">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, ImmArg<1>]>;
 
   def int_ppc_altivec_vrfim : GCCBuiltin<"__builtin_altivec_vrfim">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
@@ -716,11 +716,11 @@ let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
 def int_ppc_altivec_crypto_vshasigmad :
             GCCBuiltin<"__builtin_altivec_crypto_vshasigmad">,
             Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                       llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+                       llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>, ImmArg<2>]>;
 def int_ppc_altivec_crypto_vshasigmaw :
             GCCBuiltin<"__builtin_altivec_crypto_vshasigmaw">,
             Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                       llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+                       llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>, ImmArg<2>]>;
 }
 def int_ppc_altivec_crypto_vcipher :
             PowerPC_Vec_DDD_Intrinsic<"crypto_vcipher">;
@@ -915,10 +915,10 @@ def int_ppc_vsx_xvxsigsp :
                             [llvm_v4f32_ty], [IntrNoMem]>;
 def int_ppc_vsx_xvtstdcdp :
       PowerPC_VSX_Intrinsic<"xvtstdcdp", [llvm_v2i64_ty],
-                            [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+                            [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_ppc_vsx_xvtstdcsp :
       PowerPC_VSX_Intrinsic<"xvtstdcsp", [llvm_v4i32_ty],
-                            [llvm_v4f32_ty,llvm_i32_ty], [IntrNoMem]>;
+                            [llvm_v4f32_ty,llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 def int_ppc_vsx_xvcvhpsp :
       PowerPC_VSX_Intrinsic<"xvcvhpsp", [llvm_v4f32_ty],
                             [llvm_v8i16_ty],[IntrNoMem]>;
@@ -1113,9 +1113,9 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
 let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
 
 def int_ppc_tbegin : GCCBuiltin<"__builtin_tbegin">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [ImmArg<0>]>;
 def int_ppc_tend : GCCBuiltin<"__builtin_tend">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [ImmArg<0>]>;
 
 def int_ppc_tabort : GCCBuiltin<"__builtin_tabort">,
       Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
@@ -1167,4 +1167,9 @@ def int_ppc_ttest : GCCBuiltin<"__builtin_ttest">,
       Intrinsic<[llvm_i64_ty], [], []>;
 
 def int_ppc_cfence : Intrinsic<[], [llvm_anyint_ty], []>;
+
+// PowerPC set FPSCR Intrinsic Definitions.
+def int_ppc_setrnd : GCCBuiltin<"__builtin_setrnd">,
+      Intrinsic<[llvm_double_ty], [llvm_i32_ty], []>;
+
 }
diff --git a/include/llvm/IR/IntrinsicsRISCV.td b/include/llvm/IR/IntrinsicsRISCV.td
index 0ac7348b56db..60393189b830 100644
--- a/include/llvm/IR/IntrinsicsRISCV.td
+++ b/include/llvm/IR/IntrinsicsRISCV.td
@@ -1,9 +1,8 @@
 //===- IntrinsicsRISCV.td - Defines RISCV intrinsics -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,13 +18,13 @@ let TargetPrefix = "riscv" in {
 class MaskedAtomicRMW32Intrinsic
     : Intrinsic<[llvm_i32_ty],
                 [llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-                [IntrArgMemOnly, NoCapture<0>]>;
+                [IntrArgMemOnly, NoCapture<0>, ImmArg<3>]>;
 
 class MaskedAtomicRMW32WithSextIntrinsic
     : Intrinsic<[llvm_i32_ty],
                 [llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
                  llvm_i32_ty],
-                [IntrArgMemOnly, NoCapture<0>]>;
+                [IntrArgMemOnly, NoCapture<0>, ImmArg<4>]>;
 
 def int_riscv_masked_atomicrmw_xchg_i32 : MaskedAtomicRMW32Intrinsic;
 def int_riscv_masked_atomicrmw_add_i32  : MaskedAtomicRMW32Intrinsic;
@@ -39,6 +38,31 @@ def int_riscv_masked_atomicrmw_umin_i32 : MaskedAtomicRMW32Intrinsic;
 def int_riscv_masked_cmpxchg_i32
     : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty,
                                 llvm_i32_ty, llvm_i32_ty],
-                [IntrArgMemOnly, NoCapture<0>]>;
+                [IntrArgMemOnly, NoCapture<0>, ImmArg<4>]>;
+
+class MaskedAtomicRMW64Intrinsic
+    : Intrinsic<[llvm_i64_ty],
+                [llvm_anyptr_ty, llvm_i64_ty, llvm_i64_ty, llvm_i64_ty],
+                [IntrArgMemOnly, NoCapture<0>, ImmArg<3>]>;
+
+class MaskedAtomicRMW64WithSextIntrinsic
+    : Intrinsic<[llvm_i64_ty],
+                [llvm_anyptr_ty, llvm_i64_ty, llvm_i64_ty, llvm_i64_ty,
+                 llvm_i64_ty],
+                [IntrArgMemOnly, NoCapture<0>, ImmArg<4>]>;
+
+def int_riscv_masked_atomicrmw_xchg_i64 : MaskedAtomicRMW64Intrinsic;
+def int_riscv_masked_atomicrmw_add_i64  : MaskedAtomicRMW64Intrinsic;
+def int_riscv_masked_atomicrmw_sub_i64  : MaskedAtomicRMW64Intrinsic;
+def int_riscv_masked_atomicrmw_nand_i64 : MaskedAtomicRMW64Intrinsic;
+def int_riscv_masked_atomicrmw_max_i64  : MaskedAtomicRMW64WithSextIntrinsic;
+def int_riscv_masked_atomicrmw_min_i64  : MaskedAtomicRMW64WithSextIntrinsic;
+def int_riscv_masked_atomicrmw_umax_i64 : MaskedAtomicRMW64Intrinsic;
+def int_riscv_masked_atomicrmw_umin_i64 : MaskedAtomicRMW64Intrinsic;
+
+def int_riscv_masked_cmpxchg_i64
+    : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty, llvm_i64_ty, llvm_i64_ty,
+                                llvm_i64_ty, llvm_i64_ty],
+                [IntrArgMemOnly, NoCapture<0>, ImmArg<4>]>;
 
 } // TargetPrefix = "riscv"
diff --git a/include/llvm/IR/IntrinsicsSystemZ.td b/include/llvm/IR/IntrinsicsSystemZ.td
index caa2ec209a31..40d6ba17eaf1 100644
--- a/include/llvm/IR/IntrinsicsSystemZ.td
+++ b/include/llvm/IR/IntrinsicsSystemZ.td
@@ -1,9 +1,8 @@
 //===- IntrinsicsSystemZ.td - Defines SystemZ intrinsics ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -39,7 +38,8 @@ class SystemZBinaryConvCC<LLVMType result, LLVMType arg>
   : Intrinsic<[result, llvm_i32_ty], [arg, arg], [IntrNoMem]>;
 
 class SystemZBinaryConvIntCC<LLVMType result, LLVMType arg>
-  : Intrinsic<[result, llvm_i32_ty], [arg, llvm_i32_ty], [IntrNoMem]>;
+  : Intrinsic<[result, llvm_i32_ty], [arg, llvm_i32_ty],
+              [IntrNoMem, ImmArg<1>]>;
 
 class SystemZBinaryCC<LLVMType type>
   : SystemZBinaryConvCC<type, type>;
@@ -48,23 +48,28 @@ class SystemZTernaryConv<string name, LLVMType result, LLVMType arg>
   : GCCBuiltin<"__builtin_s390_" ## name>,
     Intrinsic<[result], [arg, arg, result], [IntrNoMem]>;
 
+class SystemZTernaryConvCC<LLVMType result, LLVMType arg>
+  : Intrinsic<[result, llvm_i32_ty], [arg, arg, result], [IntrNoMem]>;
+
 class SystemZTernary<string name, LLVMType type>
   : SystemZTernaryConv<name, type, type>;
 
 class SystemZTernaryInt<string name, LLVMType type>
   : GCCBuiltin<"__builtin_s390_" ## name>,
-    Intrinsic<[type], [type, type, llvm_i32_ty], [IntrNoMem]>;
+    Intrinsic<[type], [type, type, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
 
 class SystemZTernaryIntCC<LLVMType type>
-  : Intrinsic<[type, llvm_i32_ty], [type, type, llvm_i32_ty], [IntrNoMem]>;
+  : Intrinsic<[type, llvm_i32_ty], [type, type, llvm_i32_ty],
+              [IntrNoMem, ImmArg<2>]>;
 
 class SystemZQuaternaryInt<string name, LLVMType type>
   : GCCBuiltin<"__builtin_s390_" ## name>,
-    Intrinsic<[type], [type, type, type, llvm_i32_ty], [IntrNoMem]>;
+    Intrinsic<[type], [type, type, type, llvm_i32_ty],
+    [IntrNoMem, ImmArg<3>]>;
 
 class SystemZQuaternaryIntCC<LLVMType type>
   : Intrinsic<[type, llvm_i32_ty], [type, type, type, llvm_i32_ty],
-              [IntrNoMem]>;
+              [IntrNoMem, ImmArg<3>]>;
 
 multiclass SystemZUnaryExtBHF<string name> {
   def b : SystemZUnaryConv<name##"b", llvm_v8i16_ty, llvm_v16i8_ty>;
@@ -180,7 +185,8 @@ multiclass SystemZQuaternaryIntBHF<string name> {
   def f : SystemZQuaternaryInt<name##"f", llvm_v4i32_ty>;
 }
 
-multiclass SystemZQuaternaryIntBHFG<string name> : SystemZQuaternaryIntBHF<name> {
+multiclass SystemZQuaternaryIntBHFG<string name> :
+  SystemZQuaternaryIntBHF<name> {
   def g : SystemZQuaternaryInt<name##"g", llvm_v2i64_ty>;
 }
 
@@ -232,11 +238,11 @@ let TargetPrefix = "s390" in {
 let TargetPrefix = "s390" in {
   def int_s390_lcbb : GCCBuiltin<"__builtin_s390_lcbb">,
                       Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
-                                [IntrNoMem]>;
+                                [IntrNoMem, ImmArg<1>]>;
 
   def int_s390_vlbb : GCCBuiltin<"__builtin_s390_vlbb">,
                       Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-                                [IntrReadMem, IntrArgMemOnly]>;
+                                [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>;
 
   def int_s390_vll : GCCBuiltin<"__builtin_s390_vll">,
                      Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty],
@@ -245,7 +251,7 @@ let TargetPrefix = "s390" in {
   def int_s390_vpdi : GCCBuiltin<"__builtin_s390_vpdi">,
                       Intrinsic<[llvm_v2i64_ty],
                                 [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
-                                [IntrNoMem]>;
+                                [IntrNoMem, ImmArg<2>]>;
 
   def int_s390_vperm : GCCBuiltin<"__builtin_s390_vperm">,
                        Intrinsic<[llvm_v16i8_ty],
@@ -311,7 +317,7 @@ let TargetPrefix = "s390" in {
   def int_s390_vsldb : GCCBuiltin<"__builtin_s390_vsldb">,
                        Intrinsic<[llvm_v16i8_ty],
                                  [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
-                                 [IntrNoMem]>;
+                                 [IntrNoMem, ImmArg<2>]>;
 
   defm int_s390_vscbi : SystemZBinaryBHFG<"vscbi">;
 
@@ -370,7 +376,7 @@ let TargetPrefix = "s390" in {
 
   def int_s390_vfidb : Intrinsic<[llvm_v2f64_ty],
                                  [llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty],
-                                 [IntrNoMem]>;
+                                 [IntrNoMem, ImmArg<1>, ImmArg<2>]>;
 
   // Instructions from the Vector Enhancements Facility 1
   def int_s390_vbperm : SystemZBinaryConv<"vbperm", llvm_v2i64_ty,
@@ -379,20 +385,20 @@ let TargetPrefix = "s390" in {
   def int_s390_vmslg  : GCCBuiltin<"__builtin_s390_vmslg">,
                         Intrinsic<[llvm_v16i8_ty],
                                   [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v16i8_ty,
-                                   llvm_i32_ty], [IntrNoMem]>;
+                                   llvm_i32_ty], [IntrNoMem, ImmArg<3>]>;
 
   def int_s390_vfmaxdb : Intrinsic<[llvm_v2f64_ty],
                                    [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty],
-                                   [IntrNoMem]>;
+                                   [IntrNoMem, ImmArg<2>]>;
   def int_s390_vfmindb : Intrinsic<[llvm_v2f64_ty],
                                    [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty],
-                                   [IntrNoMem]>;
+                                   [IntrNoMem, ImmArg<2>]>;
   def int_s390_vfmaxsb : Intrinsic<[llvm_v4f32_ty],
                                    [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty],
-                                   [IntrNoMem]>;
+                                   [IntrNoMem, ImmArg<2>]>;
   def int_s390_vfminsb : Intrinsic<[llvm_v4f32_ty],
                                    [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty],
-                                   [IntrNoMem]>;
+                                   [IntrNoMem, ImmArg<2>]>;
 
   def int_s390_vfcesbs  : SystemZBinaryConvCC<llvm_v4i32_ty, llvm_v4f32_ty>;
   def int_s390_vfchsbs  : SystemZBinaryConvCC<llvm_v4i32_ty, llvm_v4f32_ty>;
@@ -402,7 +408,7 @@ let TargetPrefix = "s390" in {
 
   def int_s390_vfisb : Intrinsic<[llvm_v4f32_ty],
                                  [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty],
-                                 [IntrNoMem]>;
+                                 [IntrNoMem, ImmArg<1>, ImmArg<2>]>;
 
   // Instructions from the Vector Packed Decimal Facility
   def int_s390_vlrl : GCCBuiltin<"__builtin_s390_vlrl">,
@@ -412,6 +418,24 @@ let TargetPrefix = "s390" in {
   def int_s390_vstrl : GCCBuiltin<"__builtin_s390_vstrl">,
                        Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty],
                                  [IntrArgMemOnly, IntrWriteMem]>;
+
+  // Instructions from the Vector Enhancements Facility 2
+  def int_s390_vsld : GCCBuiltin<"__builtin_s390_vsld">,
+                      Intrinsic<[llvm_v16i8_ty],
+                                [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+                                [IntrNoMem, ImmArg<2>]>;
+
+  def int_s390_vsrd : GCCBuiltin<"__builtin_s390_vsrd">,
+                      Intrinsic<[llvm_v16i8_ty],
+                                [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+                                [IntrNoMem, ImmArg<2>]>;
+
+  def int_s390_vstrsb : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v16i8_ty>;
+  def int_s390_vstrsh : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v8i16_ty>;
+  def int_s390_vstrsf : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v4i32_ty>;
+  def int_s390_vstrszb : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v16i8_ty>;
+  def int_s390_vstrszh : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v8i16_ty>;
+  def int_s390_vstrszf : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v4i32_ty>;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/IR/IntrinsicsWebAssembly.td b/include/llvm/IR/IntrinsicsWebAssembly.td
index b015650906e0..1b892727547d 100644
--- a/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -1,9 +1,8 @@
 //===- IntrinsicsWebAssembly.td - Defines wasm intrinsics --*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -41,8 +40,8 @@ def int_wasm_trunc_saturate_unsigned : Intrinsic<[llvm_anyint_ty],
 
 // throw / rethrow
 def int_wasm_throw : Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty],
-                               [Throws, IntrNoReturn]>;
-def int_wasm_rethrow : Intrinsic<[], [], [Throws, IntrNoReturn]>;
+                               [Throws, IntrNoReturn, ImmArg<0>]>;
+def int_wasm_rethrow_in_catch : Intrinsic<[], [], [Throws, IntrNoReturn]>;
 
 // Since wasm does not use landingpad instructions, these instructions return
 // exception pointer and selector values until we lower them in WasmEHPrepare.
@@ -50,17 +49,16 @@ def int_wasm_get_exception : Intrinsic<[llvm_ptr_ty], [llvm_token_ty],
                                        [IntrHasSideEffects]>;
 def int_wasm_get_ehselector : Intrinsic<[llvm_i32_ty], [llvm_token_ty],
                                         [IntrHasSideEffects]>;
-
-// wasm.catch returns the pointer to the exception object caught by wasm 'catch'
-// instruction.
-def int_wasm_catch : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty],
-                               [IntrHasSideEffects]>;
+// This is the same as llvm.wasm.get.exception except that it does not take a
+// token operand. This is only for instruction selection purpose.
+def int_wasm_extract_exception : Intrinsic<[llvm_ptr_ty], [],
+                                           [IntrHasSideEffects]>;
 
 // WebAssembly EH must maintain the landingpads in the order assigned to them
 // by WasmEHPrepare pass to generate landingpad table in EHStreamer. This is
 // used in order to give them the indices in WasmEHPrepare.
 def int_wasm_landingpad_index: Intrinsic<[], [llvm_token_ty, llvm_i32_ty],
-                                         [IntrNoMem]>;
+                                         [IntrNoMem, ImmArg<1>]>;
 
 // Returns LSDA address of the current function.
 def int_wasm_lsda : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
@@ -112,4 +110,27 @@ def int_wasm_alltrue :
             [llvm_anyvector_ty],
             [IntrNoMem, IntrSpeculatable]>;
 
+//===----------------------------------------------------------------------===//
+// Bulk memory intrinsics
+//===----------------------------------------------------------------------===//
+
+def int_wasm_memory_init :
+  Intrinsic<[],
+            [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+            [IntrWriteMem, IntrInaccessibleMemOrArgMemOnly, WriteOnly<2>,
+             IntrHasSideEffects, ImmArg<0>, ImmArg<1>]>;
+def int_wasm_data_drop :
+  Intrinsic<[],
+            [llvm_i32_ty],
+            [IntrNoDuplicate, IntrHasSideEffects, ImmArg<0>]>;
+
+//===----------------------------------------------------------------------===//
+// Thread-local storage intrinsics
+//===----------------------------------------------------------------------===//
+
+def int_wasm_tls_size :
+  Intrinsic<[llvm_anyint_ty],
+            [],
+            [IntrNoMem, IntrSpeculatable]>;
+
 } // TargetPrefix = "wasm"
diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td
index 8d8cc8e97678..236d312d7d78 100644
--- a/include/llvm/IR/IntrinsicsX86.td
+++ b/include/llvm/IR/IntrinsicsX86.td
@@ -1,9 +1,8 @@
 //===- IntrinsicsX86.td - Defines X86 intrinsics -----------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,7 +13,7 @@
 //===----------------------------------------------------------------------===//
 // Interrupt traps
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_int : Intrinsic<[], [llvm_i8_ty]>;
+  def int_x86_int : Intrinsic<[], [llvm_i8_ty], [ImmArg<0>]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -204,12 +203,12 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse_cmp_ss : GCCBuiltin<"__builtin_ia32_cmpss">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+                         llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>;
   // NOTE: This comparison intrinsic is not used by clang as long as the
   //       distinction in signaling behaviour is not implemented.
   def int_x86_sse_cmp_ps :
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+                         llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_sse_comieq_ss : GCCBuiltin<"__builtin_ia32_comieq">,
               Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
                          llvm_v4f32_ty], [IntrNoMem]>;
@@ -278,9 +277,17 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 // Control register.
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse_stmxcsr :
-              Intrinsic<[], [llvm_ptr_ty], []>;
+              Intrinsic<[], [llvm_ptr_ty],
+                         [IntrWriteMem, IntrArgMemOnly,
+                         // This prevents reordering with ldmxcsr
+                         IntrHasSideEffects]>;
   def int_x86_sse_ldmxcsr :
-              Intrinsic<[], [llvm_ptr_ty], []>;
+              Intrinsic<[], [llvm_ptr_ty],
+                        [IntrReadMem, IntrArgMemOnly, IntrHasSideEffects,
+                         // FIXME: LDMXCSR does not actualy write to memory,
+                         // but Fast and DAG Isel both use writing to memory
+                         // as a proxy for having side effects.
+                         IntrWriteMem]>;
 }
 
 // Misc.
@@ -312,12 +319,12 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse2_cmp_sd : GCCBuiltin<"__builtin_ia32_cmpsd">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+                         llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>;
   // NOTE: This comparison intrinsic is not used by clang as long as the
   //       distinction in signaling behaviour is not implemented.
   def int_x86_sse2_cmp_pd :
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+                         llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_sse2_comieq_sd : GCCBuiltin<"__builtin_ia32_comisdeq">,
               Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
                          llvm_v2f64_ty], [IntrNoMem]>;
@@ -367,6 +374,12 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd128">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty,
                          llvm_v8i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
   def int_x86_sse2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw128">,
               Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty,
                          llvm_v16i8_ty], [IntrNoMem, Commutative]>;
@@ -399,6 +412,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
                          llvm_v4i32_ty], [IntrNoMem]>;
 
+  // Oddly these don't require an immediate due to a gcc compatibility issue.
   def int_x86_sse2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi128">,
               Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
                          llvm_i32_ty], [IntrNoMem]>;
@@ -604,7 +618,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                          llvm_v16i8_ty], [IntrNoMem]>;
   def int_x86_sse_pshuf_w           : GCCBuiltin<"__builtin_ia32_pshufw">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i8_ty],
-                         [IntrNoMem]>;
+                         [IntrNoMem, ImmArg<1>]>;
 }
 
 // Sign ops
@@ -650,16 +664,16 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_round_ss        : GCCBuiltin<"__builtin_ia32_roundss">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
+                         llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_sse41_round_ps        : GCCBuiltin<"__builtin_ia32_roundps">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
+                         llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
   def int_x86_sse41_round_sd        : GCCBuiltin<"__builtin_ia32_roundsd">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
+                         llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_sse41_round_pd        : GCCBuiltin<"__builtin_ia32_roundpd">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
+                         llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 }
 
 // Vector min element
@@ -722,20 +736,20 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_aesni_aeskeygenassist :
               GCCBuiltin<"__builtin_ia32_aeskeygenassist128">,
               Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, ImmArg<1>]>;
 }
 
 // PCLMUL instructions
 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
   def int_x86_pclmulqdq : GCCBuiltin<"__builtin_ia32_pclmulqdq128">,
           Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
-                    [IntrNoMem]>;
+                    [IntrNoMem, ImmArg<2>]>;
   def int_x86_pclmulqdq_256 : GCCBuiltin<"__builtin_ia32_pclmulqdq256">,
           Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
-                    [IntrNoMem]>;
+                    [IntrNoMem, ImmArg<2>]>;
   def int_x86_pclmulqdq_512 : GCCBuiltin<"__builtin_ia32_pclmulqdq512">,
           Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
-                    [IntrNoMem]>;
+                    [IntrNoMem, ImmArg<2>]>;
 }
 
 // Vector pack
@@ -749,7 +763,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_insertps       : GCCBuiltin<"__builtin_ia32_insertps128">,
           Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
-                    [IntrNoMem]>;
+                    [IntrNoMem, ImmArg<2>]>;
 }
 
 // Vector blend
@@ -769,17 +783,17 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_dppd            : GCCBuiltin<"__builtin_ia32_dppd">,
           Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
-                    [IntrNoMem, Commutative]>;
+                    [IntrNoMem, Commutative, ImmArg<2>]>;
   def int_x86_sse41_dpps            : GCCBuiltin<"__builtin_ia32_dpps">,
           Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
-                    [IntrNoMem, Commutative]>;
+                    [IntrNoMem, Commutative, ImmArg<2>]>;
 }
 
 // Vector sum of absolute differences
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_mpsadbw         : GCCBuiltin<"__builtin_ia32_mpsadbw128">,
           Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i8_ty],
-                    [IntrNoMem, Commutative]>;
+                    [IntrNoMem, Commutative, ImmArg<2>]>;
 }
 
 // Test instruction with bitwise comparison.
@@ -820,66 +834,66 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
   def int_x86_sse42_pcmpistrm128  : GCCBuiltin<"__builtin_ia32_pcmpistrm128">,
     Intrinsic<[llvm_v16i8_ty],
         [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-        [IntrNoMem]>;
+        [IntrNoMem, ImmArg<2>]>;
   def int_x86_sse42_pcmpistri128  : GCCBuiltin<"__builtin_ia32_pcmpistri128">,
     Intrinsic<[llvm_i32_ty],
         [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-        [IntrNoMem]>;
+        [IntrNoMem, ImmArg<2>]>;
   def int_x86_sse42_pcmpistria128 : GCCBuiltin<"__builtin_ia32_pcmpistria128">,
     Intrinsic<[llvm_i32_ty],
         [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-        [IntrNoMem]>;
+        [IntrNoMem, ImmArg<2>]>;
   def int_x86_sse42_pcmpistric128 : GCCBuiltin<"__builtin_ia32_pcmpistric128">,
     Intrinsic<[llvm_i32_ty],
         [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-        [IntrNoMem]>;
+        [IntrNoMem, ImmArg<2>]>;
   def int_x86_sse42_pcmpistrio128 : GCCBuiltin<"__builtin_ia32_pcmpistrio128">,
     Intrinsic<[llvm_i32_ty],
         [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-        [IntrNoMem]>;
+        [IntrNoMem, ImmArg<2>]>;
   def int_x86_sse42_pcmpistris128 : GCCBuiltin<"__builtin_ia32_pcmpistris128">,
     Intrinsic<[llvm_i32_ty],
         [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-        [IntrNoMem]>;
+        [IntrNoMem, ImmArg<2>]>;
   def int_x86_sse42_pcmpistriz128 : GCCBuiltin<"__builtin_ia32_pcmpistriz128">,
     Intrinsic<[llvm_i32_ty],
         [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-        [IntrNoMem]>;
+        [IntrNoMem, ImmArg<2>]>;
   def int_x86_sse42_pcmpestrm128  : GCCBuiltin<"__builtin_ia32_pcmpestrm128">,
     Intrinsic<[llvm_v16i8_ty],
         [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
          llvm_i8_ty],
-        [IntrNoMem]>;
+        [IntrNoMem, ImmArg<4>]>;
   def int_x86_sse42_pcmpestri128  : GCCBuiltin<"__builtin_ia32_pcmpestri128">,
     Intrinsic<[llvm_i32_ty],
         [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
          llvm_i8_ty],
-        [IntrNoMem]>;
+        [IntrNoMem, ImmArg<4>]>;
   def int_x86_sse42_pcmpestria128 : GCCBuiltin<"__builtin_ia32_pcmpestria128">,
     Intrinsic<[llvm_i32_ty],
         [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
          llvm_i8_ty],
-        [IntrNoMem]>;
+        [IntrNoMem, ImmArg<4>]>;
   def int_x86_sse42_pcmpestric128 : GCCBuiltin<"__builtin_ia32_pcmpestric128">,
     Intrinsic<[llvm_i32_ty],
         [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
          llvm_i8_ty],
-        [IntrNoMem]>;
+        [IntrNoMem, ImmArg<4>]>;
   def int_x86_sse42_pcmpestrio128 : GCCBuiltin<"__builtin_ia32_pcmpestrio128">,
     Intrinsic<[llvm_i32_ty],
         [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
          llvm_i8_ty],
-        [IntrNoMem]>;
+        [IntrNoMem, ImmArg<4>]>;
   def int_x86_sse42_pcmpestris128 : GCCBuiltin<"__builtin_ia32_pcmpestris128">,
     Intrinsic<[llvm_i32_ty],
         [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
          llvm_i8_ty],
-        [IntrNoMem]>;
+        [IntrNoMem, ImmArg<4>]>;
   def int_x86_sse42_pcmpestriz128 : GCCBuiltin<"__builtin_ia32_pcmpestriz128">,
     Intrinsic<[llvm_i32_ty],
         [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
          llvm_i8_ty],
-        [IntrNoMem]>;
+        [IntrNoMem, ImmArg<4>]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -888,13 +902,14 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse4a_extrqi : GCCBuiltin<"__builtin_ia32_extrqi">,
     Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty, llvm_i8_ty],
-              [IntrNoMem]>;
+              [IntrNoMem, ImmArg<1>, ImmArg<2>]>;
   def int_x86_sse4a_extrq  : GCCBuiltin<"__builtin_ia32_extrq">,
     Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v16i8_ty], [IntrNoMem]>;
 
   def int_x86_sse4a_insertqi : GCCBuiltin<"__builtin_ia32_insertqi">,
     Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                                llvm_i8_ty, llvm_i8_ty], [IntrNoMem]>;
+                                llvm_i8_ty, llvm_i8_ty],
+              [IntrNoMem, ImmArg<2>, ImmArg<3>]>;
   def int_x86_sse4a_insertq  : GCCBuiltin<"__builtin_ia32_insertq">,
     Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
 }
@@ -931,10 +946,10 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
   def int_x86_avx_round_pd_256 : GCCBuiltin<"__builtin_ia32_roundpd256">,
         Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
-                  llvm_i32_ty], [IntrNoMem]>;
+                  llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx_round_ps_256 : GCCBuiltin<"__builtin_ia32_roundps256">,
         Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
-                  llvm_i32_ty], [IntrNoMem]>;
+                  llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 }
 
 // Horizontal ops
@@ -1086,33 +1101,33 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
          GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v16qi">,
           Intrinsic<[llvm_v16i8_ty],
           [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<2>]>;
   def int_x86_vgf2p8affineinvqb_256 :
          GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v32qi">,
           Intrinsic<[llvm_v32i8_ty],
           [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i8_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<2>]>;
   def int_x86_vgf2p8affineinvqb_512 :
          GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v64qi">,
           Intrinsic<[llvm_v64i8_ty],
           [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i8_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<2>]>;
 
   def int_x86_vgf2p8affineqb_128 :
          GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v16qi">,
           Intrinsic<[llvm_v16i8_ty],
           [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<2>]>;
   def int_x86_vgf2p8affineqb_256 :
          GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v32qi">,
           Intrinsic<[llvm_v32i8_ty],
           [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i8_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<2>]>;
   def int_x86_vgf2p8affineqb_512 :
          GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v64qi">,
           Intrinsic<[llvm_v64i8_ty],
           [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i8_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<2>]>;
 
   def int_x86_vgf2p8mulb_128     :
          GCCBuiltin<"__builtin_ia32_vgf2p8mulb_v16qi">,
@@ -1145,17 +1160,18 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx_dp_ps_256 : GCCBuiltin<"__builtin_ia32_dpps256">,
         Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
-                  llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem, Commutative]>;
+                  llvm_v8f32_ty, llvm_i8_ty],
+                  [IntrNoMem, Commutative, ImmArg<2>]>;
 }
 
 // Vector compare
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx_cmp_pd_256 :
         Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
-                  llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+                  llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_avx_cmp_ps_256 :
         Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
-                  llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+                  llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>;
 }
 
 // Vector convert
@@ -1222,30 +1238,30 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
   def int_x86_avx512_fpclass_pd_128 :
           Intrinsic<[llvm_v2i1_ty], [llvm_v2f64_ty, llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_fpclass_pd_256 :
           Intrinsic<[llvm_v4i1_ty], [llvm_v4f64_ty, llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_fpclass_pd_512 :
           Intrinsic<[llvm_v8i1_ty], [llvm_v8f64_ty, llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_fpclass_ps_128 :
           Intrinsic<[llvm_v4i1_ty], [llvm_v4f32_ty, llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_fpclass_ps_256 :
           Intrinsic<[llvm_v8i1_ty], [llvm_v8f32_ty, llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_fpclass_ps_512 :
           Intrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_mask_fpclass_sd :
          GCCBuiltin<"__builtin_ia32_fpclasssd_mask">,
           Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_mask_fpclass_ss :
          GCCBuiltin<"__builtin_ia32_fpclassss_mask">,
           Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<1>]>;
 }
 
 // Vector extract sign mask
@@ -1328,6 +1344,12 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd256">,
               Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty,
                          llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
   def int_x86_avx2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw256">,
               Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty,
                          llvm_v32i8_ty], [IntrNoMem, Commutative]>;
@@ -1360,6 +1382,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
                          llvm_v4i32_ty], [IntrNoMem]>;
 
+  // Oddly these don't require an immediate due to a gcc compatibility issue.
   def int_x86_avx2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi256">,
               Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
                          llvm_i32_ty], [IntrNoMem]>;
@@ -1392,6 +1415,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
                          llvm_v2i64_ty], [IntrNoMem]>;
 
+  // Oddly these don't require an immediate due to a gcc compatibility issue.
   def int_x86_avx512_psrai_q_128 : GCCBuiltin<"__builtin_ia32_psraqi128">,
               Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
                          llvm_i32_ty], [IntrNoMem]>;
@@ -1427,6 +1451,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
                          llvm_v2i64_ty], [IntrNoMem]>;
 
+  // Oddly these don't require an immediate due to a gcc compatibility issue.
   def int_x86_avx512_pslli_w_512 : GCCBuiltin<"__builtin_ia32_psllwi512">,
               Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
                          llvm_i32_ty], [IntrNoMem]>;
@@ -1677,71 +1702,73 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // Gather ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  // NOTE: These can't be ArgMemOnly because you can put the address completely
+  // in the index register.
   def int_x86_avx2_gather_d_pd : GCCBuiltin<"__builtin_ia32_gatherd_pd">,
       Intrinsic<[llvm_v2f64_ty],
         [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty],
-        [IntrReadMem, IntrArgMemOnly]>;
+        [IntrReadMem, ImmArg<4>]>;
   def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">,
       Intrinsic<[llvm_v4f64_ty],
         [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty],
-        [IntrReadMem, IntrArgMemOnly]>;
+        [IntrReadMem, ImmArg<4>]>;
   def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">,
       Intrinsic<[llvm_v2f64_ty],
         [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty],
-        [IntrReadMem, IntrArgMemOnly]>;
+        [IntrReadMem, ImmArg<4>]>;
   def int_x86_avx2_gather_q_pd_256 : GCCBuiltin<"__builtin_ia32_gatherq_pd256">,
       Intrinsic<[llvm_v4f64_ty],
         [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],
-        [IntrReadMem, IntrArgMemOnly]>;
+        [IntrReadMem, ImmArg<4>]>;
   def int_x86_avx2_gather_d_ps : GCCBuiltin<"__builtin_ia32_gatherd_ps">,
       Intrinsic<[llvm_v4f32_ty],
         [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
-        [IntrReadMem, IntrArgMemOnly]>;
+        [IntrReadMem, ImmArg<4>]>;
   def int_x86_avx2_gather_d_ps_256 : GCCBuiltin<"__builtin_ia32_gatherd_ps256">,
       Intrinsic<[llvm_v8f32_ty],
         [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],
-        [IntrReadMem, IntrArgMemOnly]>;
+        [IntrReadMem, ImmArg<4>]>;
   def int_x86_avx2_gather_q_ps : GCCBuiltin<"__builtin_ia32_gatherq_ps">,
       Intrinsic<[llvm_v4f32_ty],
         [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty],
-        [IntrReadMem, IntrArgMemOnly]>;
+        [IntrReadMem, ImmArg<4>]>;
   def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">,
       Intrinsic<[llvm_v4f32_ty],
         [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty],
-        [IntrReadMem, IntrArgMemOnly]>;
+        [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx2_gather_d_q : GCCBuiltin<"__builtin_ia32_gatherd_q">,
       Intrinsic<[llvm_v2i64_ty],
         [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i8_ty],
-        [IntrReadMem, IntrArgMemOnly]>;
+        [IntrReadMem, ImmArg<4>]>;
   def int_x86_avx2_gather_d_q_256 : GCCBuiltin<"__builtin_ia32_gatherd_q256">,
       Intrinsic<[llvm_v4i64_ty],
         [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i8_ty],
-        [IntrReadMem, IntrArgMemOnly]>;
+        [IntrReadMem, ImmArg<4>]>;
   def int_x86_avx2_gather_q_q : GCCBuiltin<"__builtin_ia32_gatherq_q">,
       Intrinsic<[llvm_v2i64_ty],
         [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
-        [IntrReadMem, IntrArgMemOnly]>;
+        [IntrReadMem, ImmArg<4>]>;
   def int_x86_avx2_gather_q_q_256 : GCCBuiltin<"__builtin_ia32_gatherq_q256">,
       Intrinsic<[llvm_v4i64_ty],
         [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
-        [IntrReadMem, IntrArgMemOnly]>;
+        [IntrReadMem, ImmArg<4>]>;
   def int_x86_avx2_gather_d_d : GCCBuiltin<"__builtin_ia32_gatherd_d">,
       Intrinsic<[llvm_v4i32_ty],
         [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
-        [IntrReadMem, IntrArgMemOnly]>;
+        [IntrReadMem, ImmArg<4>]>;
   def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">,
       Intrinsic<[llvm_v8i32_ty],
         [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
-        [IntrReadMem, IntrArgMemOnly]>;
+        [IntrReadMem, ImmArg<4>]>;
   def int_x86_avx2_gather_q_d : GCCBuiltin<"__builtin_ia32_gatherq_d">,
       Intrinsic<[llvm_v4i32_ty],
         [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
-        [IntrReadMem, IntrArgMemOnly]>;
+        [IntrReadMem, ImmArg<4>]>;
   def int_x86_avx2_gather_q_d_256 : GCCBuiltin<"__builtin_ia32_gatherq_d256">,
       Intrinsic<[llvm_v4i32_ty],
         [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
-        [IntrReadMem, IntrArgMemOnly]>;
+        [IntrReadMem, ImmArg<4>]>;
 }
 
 // Misc.
@@ -1753,7 +1780,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                          llvm_v32i8_ty], [IntrNoMem]>;
   def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">,
               Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
-                         llvm_i8_ty], [IntrNoMem, Commutative]>;
+                         llvm_i8_ty], [IntrNoMem, Commutative, ImmArg<2>]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1763,32 +1790,32 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx512_vfmadd_pd_512 :
           Intrinsic<[llvm_v8f64_ty],
           [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_vfmadd_ps_512 :
           Intrinsic<[llvm_v16f32_ty],
           [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   // TODO: Can we use 2 vfmadds+shufflevector?
   def int_x86_avx512_vfmaddsub_pd_512 :
           Intrinsic<[llvm_v8f64_ty],
           [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_vfmaddsub_ps_512 :
           Intrinsic<[llvm_v16f32_ty],
           [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_vfmadd_f64 :
           Intrinsic<[llvm_double_ty],
                     [llvm_double_ty, llvm_double_ty, llvm_double_ty, llvm_i32_ty],
-                    [IntrNoMem]>;
+                    [IntrNoMem, ImmArg<3>]>;
   def int_x86_avx512_vfmadd_f32 :
           Intrinsic<[llvm_float_ty],
                     [llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_i32_ty],
-                    [IntrNoMem]>;
+                    [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_vpmadd52h_uq_128 :
               GCCBuiltin<"__builtin_ia32_vpmadd52huq128">,
@@ -1878,23 +1905,23 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_xop_vpermil2pd : GCCBuiltin<"__builtin_ia32_vpermil2pd">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
                                           llvm_v2i64_ty, llvm_i8_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_xop_vpermil2pd_256 :
               GCCBuiltin<"__builtin_ia32_vpermil2pd256">,
               Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
                                           llvm_v4i64_ty, llvm_i8_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_xop_vpermil2ps : GCCBuiltin<"__builtin_ia32_vpermil2ps">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
                                           llvm_v4i32_ty, llvm_i8_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, ImmArg<3>]>;
   def int_x86_xop_vpermil2ps_256 :
               GCCBuiltin<"__builtin_ia32_vpermil2ps256">,
               Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
                                           llvm_v8i32_ty, llvm_i8_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_xop_vfrcz_pd : GCCBuiltin<"__builtin_ia32_vfrczpd">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
@@ -1909,31 +1936,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_xop_vfrcz_ps_256 : GCCBuiltin<"__builtin_ia32_vfrczps256">,
               Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
 
-  def int_x86_xop_vpcomb : GCCBuiltin<"__builtin_ia32_vpcomb">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
-                         llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vpcomw : GCCBuiltin<"__builtin_ia32_vpcomw">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                         llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vpcomd : GCCBuiltin<"__builtin_ia32_vpcomd">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vpcomq : GCCBuiltin<"__builtin_ia32_vpcomq">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                         llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vpcomub : GCCBuiltin<"__builtin_ia32_vpcomub">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
-                         llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vpcomuw : GCCBuiltin<"__builtin_ia32_vpcomuw">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                         llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vpcomud : GCCBuiltin<"__builtin_ia32_vpcomud">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vpcomuq : GCCBuiltin<"__builtin_ia32_vpcomuq">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                         llvm_i8_ty], [IntrNoMem]>;
-
   def int_x86_xop_vphaddbd :
               GCCBuiltin<"__builtin_ia32_vphaddbd">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
@@ -2261,6 +2263,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
                          llvm_x86mmx_ty], [IntrNoMem]>;
 
+  // Oddly these don't require an immediate due to a gcc compatibility issue.
   def int_x86_mmx_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
                          llvm_i32_ty], [IntrNoMem]>;
@@ -2398,15 +2401,15 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
   def int_x86_mmx_palignr_b : GCCBuiltin<"__builtin_ia32_palignr">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                        llvm_x86mmx_ty, llvm_i8_ty], [IntrNoMem]>;
+                        llvm_x86mmx_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>;
 
   def int_x86_mmx_pextr_w : GCCBuiltin<"__builtin_ia32_vec_ext_v4hi">,
               Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, ImmArg<1>]>;
 
   def int_x86_mmx_pinsr_w : GCCBuiltin<"__builtin_ia32_vec_set_v4hi">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                        llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+                        llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2527,13 +2530,14 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
   def int_x86_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph">,
               Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, ImmArg<1>]>;
   def int_x86_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256">,
               Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_mask_vcvtph2ps_512 : GCCBuiltin<"__builtin_ia32_vcvtph2ps512_mask">,
               Intrinsic<[llvm_v16f32_ty], [llvm_v16i16_ty, llvm_v16f32_ty,
-                                           llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+                                           llvm_i16_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<3>]>;
   def int_x86_avx512_mask_vcvtph2ps_256 : GCCBuiltin<"__builtin_ia32_vcvtph2ps256_mask">,
               Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty, llvm_v8f32_ty,
                                            llvm_i8_ty], [IntrNoMem]>;
@@ -2542,13 +2546,16 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                                            llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512_mask">,
               Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty,
-                                           llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+                                           llvm_v16i16_ty, llvm_i16_ty],
+                        [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_mask_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256_mask">,
               Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty,
-                                           llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+                                           llvm_v8i16_ty, llvm_i8_ty],
+                        [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_mask_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph_mask">,
               Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty,
-                                           llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+                                           llvm_v8i16_ty, llvm_i8_ty],
+                        [IntrNoMem, ImmArg<1>]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2556,9 +2563,11 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_tbm_bextri_u32 : GCCBuiltin<"__builtin_ia32_bextri_u32">,
-        Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+        Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+                  [IntrNoMem, ImmArg<1>]>;
   def int_x86_tbm_bextri_u64 : GCCBuiltin<"__builtin_ia32_bextri_u64">,
-        Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
+        Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+                  [IntrNoMem, ImmArg<1>]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2604,7 +2613,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_xend : GCCBuiltin<"__builtin_ia32_xend">,
               Intrinsic<[], [], []>;
   def int_x86_xabort : GCCBuiltin<"__builtin_ia32_xabort">,
-              Intrinsic<[], [llvm_i8_ty], []>;
+              Intrinsic<[], [llvm_i8_ty], [ImmArg<0>]>;
   def int_x86_xtest : GCCBuiltin<"__builtin_ia32_xtest">,
               Intrinsic<[llvm_i32_ty], [], []>;
 }
@@ -2645,55 +2654,71 @@ let TargetPrefix = "x86" in {
 // Conversion ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx512_cvttss2si : GCCBuiltin<"__builtin_ia32_vcvttss2si32">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_cvttss2si64 : GCCBuiltin<"__builtin_ia32_vcvttss2si64">,
-              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_vcvttss2usi32">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_vcvttss2usi64">,
-              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss32">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+                         llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_avx512_cvtusi642ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss64">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+                         llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_avx512_cvttsd2si : GCCBuiltin<"__builtin_ia32_vcvttsd2si32">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_vcvttsd2si64">,
-              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_vcvttsd2usi32">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_vcvttsd2usi64">,
-              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd64">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+                         llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_avx512_vcvtss2usi32 : GCCBuiltin<"__builtin_ia32_vcvtss2usi32">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_vcvtss2usi64 : GCCBuiltin<"__builtin_ia32_vcvtss2usi64">,
-              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_vcvtss2si32 : GCCBuiltin<"__builtin_ia32_vcvtss2si32">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_vcvtss2si64 : GCCBuiltin<"__builtin_ia32_vcvtss2si64">,
-              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_vcvtsd2usi32 : GCCBuiltin<"__builtin_ia32_vcvtsd2usi32">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_vcvtsd2usi64 : GCCBuiltin<"__builtin_ia32_vcvtsd2usi64">,
-              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_vcvtsd2si32 : GCCBuiltin<"__builtin_ia32_vcvtsd2si32">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_vcvtsd2si64 : GCCBuiltin<"__builtin_ia32_vcvtsd2si64">,
-              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_cvtsi2ss32 : GCCBuiltin<"__builtin_ia32_cvtsi2ss32">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+                         llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_avx512_cvtsi2ss64 : GCCBuiltin<"__builtin_ia32_cvtsi2ss64">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+                         llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+                         llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
 }
 
 // Pack ops.
@@ -2714,11 +2739,13 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // Vector convert
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx512_mask_cvtdq2ps_512 :
-        GCCBuiltin<"__builtin_ia32_cvtdq2ps512_mask">,
-          Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16i32_ty, llvm_v16f32_ty,  llvm_i16_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
+  def int_x86_avx512_sitofp_round :
+          Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
+                    [IntrNoMem, ImmArg<1>]>;
+
+  def int_x86_avx512_uitofp_round :
+          Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
+                    [IntrNoMem, ImmArg<1>]>;
 
   def int_x86_avx512_mask_cvtpd2dq_128 :
         GCCBuiltin<"__builtin_ia32_cvtpd2dq128_mask">,
@@ -2730,25 +2757,25 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         GCCBuiltin<"__builtin_ia32_cvtpd2dq512_mask">,
           Intrinsic<[llvm_v8i32_ty],
           [llvm_v8f64_ty, llvm_v8i32_ty,  llvm_i8_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_mask_cvtpd2ps_512 :
         GCCBuiltin<"__builtin_ia32_cvtpd2ps512_mask">,
           Intrinsic<[llvm_v8f32_ty],
           [llvm_v8f64_ty, llvm_v8f32_ty,  llvm_i8_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_mask_cvtsd2ss_round :
         GCCBuiltin<"__builtin_ia32_cvtsd2ss_round_mask">,
           Intrinsic<[llvm_v4f32_ty],
           [llvm_v4f32_ty, llvm_v2f64_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_cvtss2sd_round :
         GCCBuiltin<"__builtin_ia32_cvtss2sd_round_mask">,
           Intrinsic<[llvm_v2f64_ty],
           [llvm_v2f64_ty, llvm_v4f32_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_cvtpd2ps :
         GCCBuiltin<"__builtin_ia32_cvtpd2ps_mask">,
@@ -2772,7 +2799,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         GCCBuiltin<"__builtin_ia32_cvtpd2qq512_mask">,
           Intrinsic<[llvm_v8i64_ty],
           [llvm_v8f64_ty, llvm_v8i64_ty,  llvm_i8_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_mask_cvtpd2udq_128 :
         GCCBuiltin<"__builtin_ia32_cvtpd2udq128_mask">,
@@ -2790,7 +2817,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         GCCBuiltin<"__builtin_ia32_cvtpd2udq512_mask">,
           Intrinsic<[llvm_v8i32_ty],
           [llvm_v8f64_ty, llvm_v8i32_ty,  llvm_i8_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_mask_cvtpd2uqq_128 :
         GCCBuiltin<"__builtin_ia32_cvtpd2uqq128_mask">,
@@ -2808,7 +2835,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         GCCBuiltin<"__builtin_ia32_cvtpd2uqq512_mask">,
           Intrinsic<[llvm_v8i64_ty],
           [llvm_v8f64_ty, llvm_v8i64_ty,  llvm_i8_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_mask_cvtps2dq_128 :
         GCCBuiltin<"__builtin_ia32_cvtps2dq128_mask">,
@@ -2826,13 +2853,13 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         GCCBuiltin<"__builtin_ia32_cvtps2dq512_mask">,
           Intrinsic<[llvm_v16i32_ty],
           [llvm_v16f32_ty, llvm_v16i32_ty,  llvm_i16_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_mask_cvtps2pd_512 :
         GCCBuiltin<"__builtin_ia32_cvtps2pd512_mask">,
           Intrinsic<[llvm_v8f64_ty],
           [llvm_v8f32_ty, llvm_v8f64_ty,  llvm_i8_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_mask_cvtps2qq_128 :
         GCCBuiltin<"__builtin_ia32_cvtps2qq128_mask">,
@@ -2850,7 +2877,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         GCCBuiltin<"__builtin_ia32_cvtps2qq512_mask">,
           Intrinsic<[llvm_v8i64_ty],
           [llvm_v8f32_ty, llvm_v8i64_ty,  llvm_i8_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_mask_cvtps2udq_128 :
         GCCBuiltin<"__builtin_ia32_cvtps2udq128_mask">,
@@ -2868,7 +2895,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         GCCBuiltin<"__builtin_ia32_cvtps2udq512_mask">,
           Intrinsic<[llvm_v16i32_ty],
           [llvm_v16f32_ty, llvm_v16i32_ty,  llvm_i16_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_mask_cvtps2uqq_128 :
         GCCBuiltin<"__builtin_ia32_cvtps2uqq128_mask">,
@@ -2886,13 +2913,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         GCCBuiltin<"__builtin_ia32_cvtps2uqq512_mask">,
           Intrinsic<[llvm_v8i64_ty],
           [llvm_v8f32_ty, llvm_v8i64_ty,  llvm_i8_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_cvtqq2pd_512 :
-        GCCBuiltin<"__builtin_ia32_cvtqq2pd512_mask">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8i64_ty, llvm_v8f64_ty,  llvm_i8_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_mask_cvtqq2ps_128 :
         GCCBuiltin<"__builtin_ia32_cvtqq2ps128_mask">,
@@ -2900,18 +2921,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           [llvm_v2i64_ty, llvm_v4f32_ty,  llvm_i8_ty],
           [IntrNoMem]>;
 
-  def int_x86_avx512_mask_cvtqq2ps_256 :
-        GCCBuiltin<"__builtin_ia32_cvtqq2ps256_mask">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4i64_ty, llvm_v4f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_cvtqq2ps_512 :
-        GCCBuiltin<"__builtin_ia32_cvtqq2ps512_mask">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8i64_ty, llvm_v8f32_ty,  llvm_i8_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
-
   def int_x86_avx512_mask_cvttpd2dq_128 :
         GCCBuiltin<"__builtin_ia32_cvttpd2dq128_mask">,
           Intrinsic<[llvm_v4i32_ty],
@@ -2922,7 +2931,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         GCCBuiltin<"__builtin_ia32_cvttpd2dq512_mask">,
           Intrinsic<[llvm_v8i32_ty],
           [llvm_v8f64_ty, llvm_v8i32_ty,  llvm_i8_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_mask_cvttpd2qq_128 :
         GCCBuiltin<"__builtin_ia32_cvttpd2qq128_mask">,
@@ -2940,7 +2949,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         GCCBuiltin<"__builtin_ia32_cvttpd2qq512_mask">,
           Intrinsic<[llvm_v8i64_ty],
           [llvm_v8f64_ty, llvm_v8i64_ty,  llvm_i8_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_mask_cvttpd2udq_128 :
         GCCBuiltin<"__builtin_ia32_cvttpd2udq128_mask">,
@@ -2958,7 +2967,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         GCCBuiltin<"__builtin_ia32_cvttpd2udq512_mask">,
           Intrinsic<[llvm_v8i32_ty],
           [llvm_v8f64_ty, llvm_v8i32_ty,  llvm_i8_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_mask_cvttpd2uqq_128 :
         GCCBuiltin<"__builtin_ia32_cvttpd2uqq128_mask">,
@@ -2976,13 +2985,13 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         GCCBuiltin<"__builtin_ia32_cvttpd2uqq512_mask">,
           Intrinsic<[llvm_v8i64_ty],
           [llvm_v8f64_ty, llvm_v8i64_ty,  llvm_i8_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_mask_cvttps2dq_512 :
         GCCBuiltin<"__builtin_ia32_cvttps2dq512_mask">,
           Intrinsic<[llvm_v16i32_ty],
           [llvm_v16f32_ty, llvm_v16i32_ty,  llvm_i16_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_mask_cvttps2qq_128 :
         GCCBuiltin<"__builtin_ia32_cvttps2qq128_mask">,
@@ -3000,7 +3009,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         GCCBuiltin<"__builtin_ia32_cvttps2qq512_mask">,
           Intrinsic<[llvm_v8i64_ty],
           [llvm_v8f32_ty, llvm_v8i64_ty,  llvm_i8_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_mask_cvttps2udq_128 :
         GCCBuiltin<"__builtin_ia32_cvttps2udq128_mask">,
@@ -3018,7 +3027,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         GCCBuiltin<"__builtin_ia32_cvttps2udq512_mask">,
           Intrinsic<[llvm_v16i32_ty],
           [llvm_v16f32_ty, llvm_v16i32_ty,  llvm_i16_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_mask_cvttps2uqq_128 :
         GCCBuiltin<"__builtin_ia32_cvttps2uqq128_mask">,
@@ -3036,19 +3045,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         GCCBuiltin<"__builtin_ia32_cvttps2uqq512_mask">,
           Intrinsic<[llvm_v8i64_ty],
           [llvm_v8f32_ty, llvm_v8i64_ty,  llvm_i8_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_cvtudq2ps_512 :
-        GCCBuiltin<"__builtin_ia32_cvtudq2ps512_mask">,
-          Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16i32_ty, llvm_v16f32_ty,  llvm_i16_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_cvtuqq2pd_512 :
-        GCCBuiltin<"__builtin_ia32_cvtuqq2pd512_mask">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8i64_ty, llvm_v8f64_ty,  llvm_i8_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_mask_cvtuqq2ps_128 :
         GCCBuiltin<"__builtin_ia32_cvtuqq2ps128_mask">,
@@ -3056,72 +3053,78 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           [llvm_v2i64_ty, llvm_v4f32_ty,  llvm_i8_ty],
           [IntrNoMem]>;
 
-  def int_x86_avx512_mask_cvtuqq2ps_256 :
-        GCCBuiltin<"__builtin_ia32_cvtuqq2ps256_mask">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4i64_ty, llvm_v4f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_cvtuqq2ps_512 :
-        GCCBuiltin<"__builtin_ia32_cvtuqq2ps512_mask">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8i64_ty, llvm_v8f32_ty,  llvm_i8_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
-
   def int_x86_avx512_mask_rndscale_pd_128 : GCCBuiltin<"__builtin_ia32_rndscalepd_128_mask">,
         Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty,
-                                     llvm_v2f64_ty,  llvm_i8_ty], [IntrNoMem]>;
+                                     llvm_v2f64_ty,  llvm_i8_ty],
+                  [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_mask_rndscale_pd_256 : GCCBuiltin<"__builtin_ia32_rndscalepd_256_mask">,
         Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty,
-                                     llvm_v4f64_ty,  llvm_i8_ty], [IntrNoMem]>;
+                                     llvm_v4f64_ty,  llvm_i8_ty],
+                  [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_mask_rndscale_pd_512 : GCCBuiltin<"__builtin_ia32_rndscalepd_mask">,
         Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty,
-                                     llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                                     llvm_i8_ty, llvm_i32_ty],
+                  [IntrNoMem, ImmArg<1>, ImmArg<4>]>;
   def int_x86_avx512_mask_rndscale_ps_128 : GCCBuiltin<"__builtin_ia32_rndscaleps_128_mask">,
         Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty,
-                                     llvm_v4f32_ty,  llvm_i8_ty], [IntrNoMem]>;
+                                     llvm_v4f32_ty,  llvm_i8_ty],
+                  [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_mask_rndscale_ps_256 : GCCBuiltin<"__builtin_ia32_rndscaleps_256_mask">,
         Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty,
-                                     llvm_v8f32_ty,  llvm_i8_ty], [IntrNoMem]>;
+                                     llvm_v8f32_ty,  llvm_i8_ty],
+                  [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_mask_rndscale_ps_512 : GCCBuiltin<"__builtin_ia32_rndscaleps_mask">,
         Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty,
-                                     llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+                                     llvm_i16_ty, llvm_i32_ty],
+                  [IntrNoMem, ImmArg<1>, ImmArg<4>]>;
   def int_x86_avx512_mask_reduce_pd_128 : GCCBuiltin<"__builtin_ia32_reducepd128_mask">,
         Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty,
-                                     llvm_v2f64_ty,  llvm_i8_ty], [IntrNoMem]>;
+                                     llvm_v2f64_ty,  llvm_i8_ty],
+                  [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_mask_reduce_pd_256 : GCCBuiltin<"__builtin_ia32_reducepd256_mask">,
         Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty,
-                                     llvm_v4f64_ty,  llvm_i8_ty], [IntrNoMem]>;
+                                     llvm_v4f64_ty,  llvm_i8_ty],
+                  [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_mask_reduce_pd_512 : GCCBuiltin<"__builtin_ia32_reducepd512_mask">,
         Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty,
-                                     llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                                     llvm_i8_ty, llvm_i32_ty],
+                  [IntrNoMem, ImmArg<1>, ImmArg<4>]>;
   def int_x86_avx512_mask_reduce_ps_128 : GCCBuiltin<"__builtin_ia32_reduceps128_mask">,
         Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty,
-                                     llvm_v4f32_ty,  llvm_i8_ty], [IntrNoMem]>;
+                                     llvm_v4f32_ty,  llvm_i8_ty],
+                  [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_mask_reduce_ps_256 : GCCBuiltin<"__builtin_ia32_reduceps256_mask">,
         Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty,
-                                     llvm_v8f32_ty,  llvm_i8_ty], [IntrNoMem]>;
+                                     llvm_v8f32_ty,  llvm_i8_ty],
+                  [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_mask_reduce_ps_512 : GCCBuiltin<"__builtin_ia32_reduceps512_mask">,
         Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty,
-                                     llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+                                     llvm_i16_ty, llvm_i32_ty],
+                  [IntrNoMem, ImmArg<1>, ImmArg<4>]>;
 def int_x86_avx512_mask_range_pd_128 : GCCBuiltin<"__builtin_ia32_rangepd128_mask">,
         Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty,
-                                    llvm_v2f64_ty,  llvm_i8_ty], [IntrNoMem]>;
+                                    llvm_v2f64_ty,  llvm_i8_ty],
+                  [IntrNoMem, ImmArg<2>]>;
 def int_x86_avx512_mask_range_pd_256 : GCCBuiltin<"__builtin_ia32_rangepd256_mask">,
         Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty,
-                                    llvm_v4f64_ty,  llvm_i8_ty], [IntrNoMem]>;
+                                    llvm_v4f64_ty,  llvm_i8_ty],
+                  [IntrNoMem, ImmArg<2>]>;
 def int_x86_avx512_mask_range_pd_512 : GCCBuiltin<"__builtin_ia32_rangepd512_mask">,
         Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty,
-                                    llvm_v8f64_ty,  llvm_i8_ty,  llvm_i32_ty], [IntrNoMem]>;
+                                    llvm_v8f64_ty,  llvm_i8_ty,  llvm_i32_ty],
+                  [IntrNoMem, ImmArg<2>, ImmArg<5>]>;
 def int_x86_avx512_mask_range_ps_128 : GCCBuiltin<"__builtin_ia32_rangeps128_mask">,
         Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty,
-                                    llvm_v4f32_ty,  llvm_i8_ty], [IntrNoMem]>;
+                                    llvm_v4f32_ty,  llvm_i8_ty],
+                  [IntrNoMem, ImmArg<2>]>;
 def int_x86_avx512_mask_range_ps_256 : GCCBuiltin<"__builtin_ia32_rangeps256_mask">,
         Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty,
-                                    llvm_v8f32_ty,  llvm_i8_ty], [IntrNoMem]>;
+                                    llvm_v8f32_ty,  llvm_i8_ty],
+                  [IntrNoMem, ImmArg<2>]>;
 def int_x86_avx512_mask_range_ps_512 : GCCBuiltin<"__builtin_ia32_rangeps512_mask">,
         Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty,
-                                     llvm_v16f32_ty,  llvm_i16_ty,  llvm_i32_ty], [IntrNoMem]>;
+                                     llvm_v16f32_ty,  llvm_i16_ty,  llvm_i32_ty],
+                  [IntrNoMem, ImmArg<2>, ImmArg<5>]>;
 }
 
 // Vector load with broadcast
@@ -3151,109 +3154,111 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
   def int_x86_avx512_add_ps_512 : GCCBuiltin<"__builtin_ia32_addps512">,
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                     llvm_i32_ty], [IntrNoMem]>;
+                     llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_avx512_add_pd_512 : GCCBuiltin<"__builtin_ia32_addpd512">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                     llvm_i32_ty], [IntrNoMem]>;
+                     llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_avx512_sub_ps_512 : GCCBuiltin<"__builtin_ia32_subps512">,
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                     llvm_i32_ty], [IntrNoMem]>;
+                     llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_avx512_sub_pd_512 : GCCBuiltin<"__builtin_ia32_subpd512">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                     llvm_i32_ty], [IntrNoMem]>;
+                     llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_avx512_mul_ps_512 : GCCBuiltin<"__builtin_ia32_mulps512">,
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                     llvm_i32_ty], [IntrNoMem]>;
+                     llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_avx512_mul_pd_512 : GCCBuiltin<"__builtin_ia32_mulpd512">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                     llvm_i32_ty], [IntrNoMem]>;
+                     llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_avx512_div_ps_512 : GCCBuiltin<"__builtin_ia32_divps512">,
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                     llvm_i32_ty], [IntrNoMem]>;
+                     llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_avx512_div_pd_512 : GCCBuiltin<"__builtin_ia32_divpd512">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                     llvm_i32_ty], [IntrNoMem]>;
+                     llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
 
   def int_x86_avx512_max_ps_512 : GCCBuiltin<"__builtin_ia32_maxps512">,
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                     llvm_i32_ty], [IntrNoMem]>;
+                     llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_avx512_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                     llvm_i32_ty], [IntrNoMem]>;
+                     llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_avx512_min_ps_512 : GCCBuiltin<"__builtin_ia32_minps512">,
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                     llvm_i32_ty], [IntrNoMem]>;
+                     llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_avx512_min_pd_512 : GCCBuiltin<"__builtin_ia32_minpd512">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                     llvm_i32_ty], [IntrNoMem]>;
+                     llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
 
   def int_x86_avx512_mask_add_ss_round : GCCBuiltin<"__builtin_ia32_addss_round_mask">,
           Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                     llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                     llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
   def int_x86_avx512_mask_div_ss_round : GCCBuiltin<"__builtin_ia32_divss_round_mask">,
           Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                     llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                     llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
   def int_x86_avx512_mask_mul_ss_round : GCCBuiltin<"__builtin_ia32_mulss_round_mask">,
           Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                     llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                     llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
   def int_x86_avx512_mask_sub_ss_round : GCCBuiltin<"__builtin_ia32_subss_round_mask">,
           Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                     llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                     llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
   def int_x86_avx512_mask_max_ss_round : GCCBuiltin<"__builtin_ia32_maxss_round_mask">,
           Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                     llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                     llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
   def int_x86_avx512_mask_min_ss_round : GCCBuiltin<"__builtin_ia32_minss_round_mask">,
           Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                     llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                     llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
   def int_x86_avx512_mask_add_sd_round : GCCBuiltin<"__builtin_ia32_addsd_round_mask">,
           Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                     llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                     llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
   def int_x86_avx512_mask_div_sd_round : GCCBuiltin<"__builtin_ia32_divsd_round_mask">,
           Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                     llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                     llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
   def int_x86_avx512_mask_mul_sd_round : GCCBuiltin<"__builtin_ia32_mulsd_round_mask">,
           Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                     llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                     llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
   def int_x86_avx512_mask_sub_sd_round : GCCBuiltin<"__builtin_ia32_subsd_round_mask">,
           Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                     llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                     llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
   def int_x86_avx512_mask_max_sd_round : GCCBuiltin<"__builtin_ia32_maxsd_round_mask">,
           Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                     llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                     llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
   def int_x86_avx512_mask_min_sd_round : GCCBuiltin<"__builtin_ia32_minsd_round_mask">,
           Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                     llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                     llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless_round_mask">,
           Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
                                      llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
-                                     [IntrNoMem]>;
+                                     [IntrNoMem, ImmArg<4>, ImmArg<5>]>;
   def int_x86_avx512_mask_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd_round_mask">,
           Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
                                       llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
-                                     [IntrNoMem]>;
+                                     [IntrNoMem, ImmArg<4>, ImmArg<5>]>;
   def int_x86_avx512_mask_range_ss : GCCBuiltin<"__builtin_ia32_rangess128_round_mask">,
           Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
                                      llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
-                                     [IntrNoMem]>;
+                                     [IntrNoMem, ImmArg<4>, ImmArg<5>]>;
   def int_x86_avx512_mask_range_sd : GCCBuiltin<"__builtin_ia32_rangesd128_round_mask">,
           Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
                                       llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
-                                     [IntrNoMem]>;
+                                     [IntrNoMem, ImmArg<4>, ImmArg<5>]>;
   def int_x86_avx512_mask_reduce_ss : GCCBuiltin<"__builtin_ia32_reducess_mask">,
           Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
                                      llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
-                                     [IntrNoMem]>;
+                                     [IntrNoMem, ImmArg<4>, ImmArg<5>]>;
   def int_x86_avx512_mask_reduce_sd : GCCBuiltin<"__builtin_ia32_reducesd_mask">,
           Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
                                       llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
-                                     [IntrNoMem]>;
+                                     [IntrNoMem, ImmArg<4>, ImmArg<5>]>;
   def int_x86_avx512_mask_scalef_sd : GCCBuiltin<"__builtin_ia32_scalefsd_round_mask">,
           Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                                      llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                                      llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
+                                     [IntrNoMem, ImmArg<4>]>;
   def int_x86_avx512_mask_scalef_ss : GCCBuiltin<"__builtin_ia32_scalefss_round_mask">,
           Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                                      llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                                      llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
+                                     [IntrNoMem, ImmArg<4>]>;
   def int_x86_avx512_mask_scalef_pd_128 : GCCBuiltin<"__builtin_ia32_scalefpd128_mask">,
           Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
                     llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
@@ -3262,7 +3267,8 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                     llvm_v4f64_ty, llvm_i8_ty],[IntrNoMem]>;
   def int_x86_avx512_mask_scalef_pd_512 : GCCBuiltin<"__builtin_ia32_scalefpd512_mask">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                    llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                                      llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty],
+                    [IntrNoMem, ImmArg<4>]>;
   def int_x86_avx512_mask_scalef_ps_128 : GCCBuiltin<"__builtin_ia32_scalefps128_mask">,
           Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
                     llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
@@ -3271,99 +3277,104 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                     llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_scalef_ps_512 : GCCBuiltin<"__builtin_ia32_scalefps512_mask">,
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                    llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+                                       llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty],
+                    [IntrNoMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_sqrt_ss :
         Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
-                                    llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                                    llvm_i8_ty, llvm_i32_ty],
+                  [IntrNoMem, ImmArg<4>]>;
   def int_x86_avx512_mask_sqrt_sd :
         Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
-                                    llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                                    llvm_i8_ty, llvm_i32_ty],
+                  [IntrNoMem, ImmArg<4>]>;
 
   def int_x86_avx512_sqrt_pd_512 :
-        Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty], [IntrNoMem]>;
+        Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty],
+                  [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_sqrt_ps_512 :
-        Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty], [IntrNoMem]>;
+        Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty],
+                  [IntrNoMem, ImmArg<1>]>;
   def int_x86_avx512_mask_fixupimm_pd_128 :
          GCCBuiltin<"__builtin_ia32_fixupimmpd128_mask">,
           Intrinsic<[llvm_v2f64_ty],
           [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
   def int_x86_avx512_maskz_fixupimm_pd_128 :
          GCCBuiltin<"__builtin_ia32_fixupimmpd128_maskz">,
           Intrinsic<[llvm_v2f64_ty],
           [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
   def int_x86_avx512_mask_fixupimm_pd_256 :
          GCCBuiltin<"__builtin_ia32_fixupimmpd256_mask">,
           Intrinsic<[llvm_v4f64_ty],
           [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
   def int_x86_avx512_maskz_fixupimm_pd_256 :
          GCCBuiltin<"__builtin_ia32_fixupimmpd256_maskz">,
           Intrinsic<[llvm_v4f64_ty],
           [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
   def int_x86_avx512_mask_fixupimm_pd_512 :
          GCCBuiltin<"__builtin_ia32_fixupimmpd512_mask">,
           Intrinsic<[llvm_v8f64_ty],
           [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
+          llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>;
   def int_x86_avx512_maskz_fixupimm_pd_512 :
          GCCBuiltin<"__builtin_ia32_fixupimmpd512_maskz">,
           Intrinsic<[llvm_v8f64_ty],
           [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
+          llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>;
   def int_x86_avx512_mask_fixupimm_ps_128 :
          GCCBuiltin<"__builtin_ia32_fixupimmps128_mask">,
           Intrinsic<[llvm_v4f32_ty],
           [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
   def int_x86_avx512_maskz_fixupimm_ps_128 :
          GCCBuiltin<"__builtin_ia32_fixupimmps128_maskz">,
           Intrinsic<[llvm_v4f32_ty],
           [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
   def int_x86_avx512_mask_fixupimm_ps_256 :
          GCCBuiltin<"__builtin_ia32_fixupimmps256_mask">,
           Intrinsic<[llvm_v8f32_ty],
           [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
   def int_x86_avx512_maskz_fixupimm_ps_256 :
          GCCBuiltin<"__builtin_ia32_fixupimmps256_maskz">,
           Intrinsic<[llvm_v8f32_ty],
           [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<3>]>;
   def int_x86_avx512_mask_fixupimm_ps_512 :
          GCCBuiltin<"__builtin_ia32_fixupimmps512_mask">,
           Intrinsic<[llvm_v16f32_ty],
           [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16i32_ty, llvm_i32_ty,
-          llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+          llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>;
   def int_x86_avx512_maskz_fixupimm_ps_512 :
          GCCBuiltin<"__builtin_ia32_fixupimmps512_maskz">,
           Intrinsic<[llvm_v16f32_ty],
           [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16i32_ty, llvm_i32_ty,
-          llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+          llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>;
   def int_x86_avx512_mask_fixupimm_sd :
          GCCBuiltin<"__builtin_ia32_fixupimmsd_mask">,
           Intrinsic<[llvm_v2f64_ty],
           [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
+          llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>;
   def int_x86_avx512_maskz_fixupimm_sd :
          GCCBuiltin<"__builtin_ia32_fixupimmsd_maskz">,
           Intrinsic<[llvm_v2f64_ty],
           [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
+          llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>;
   def int_x86_avx512_mask_fixupimm_ss :
          GCCBuiltin<"__builtin_ia32_fixupimmss_mask">,
           Intrinsic<[llvm_v4f32_ty],
           [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
+          llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>;
   def int_x86_avx512_maskz_fixupimm_ss :
          GCCBuiltin<"__builtin_ia32_fixupimmss_maskz">,
           Intrinsic<[llvm_v4f32_ty],
           [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty,
-          llvm_i32_ty], [IntrNoMem]>;
+          llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>;
   def int_x86_avx512_mask_getexp_pd_128 : GCCBuiltin<"__builtin_ia32_getexppd128_mask">,
         Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
                                     llvm_i8_ty], [IntrNoMem]>;
@@ -3372,7 +3383,8 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                                     llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_getexp_pd_512 : GCCBuiltin<"__builtin_ia32_getexppd512_mask">,
         Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                                    llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                                    llvm_i8_ty, llvm_i32_ty],
+                  [IntrNoMem, ImmArg<3>]>;
   def int_x86_avx512_mask_getexp_ps_128 : GCCBuiltin<"__builtin_ia32_getexpps128_mask">,
         Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
                                      llvm_i8_ty], [IntrNoMem]>;
@@ -3381,62 +3393,65 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                                      llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_getexp_ps_512 : GCCBuiltin<"__builtin_ia32_getexpps512_mask">,
         Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                                     llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+                                     llvm_i16_ty, llvm_i32_ty],
+                  [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_mask_getexp_ss : GCCBuiltin<"__builtin_ia32_getexpss128_round_mask">,
         Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
-                                    llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                                    llvm_i8_ty, llvm_i32_ty],
+                  [IntrNoMem, ImmArg<4>]>;
   def int_x86_avx512_mask_getexp_sd : GCCBuiltin<"__builtin_ia32_getexpsd128_round_mask">,
         Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
-                                    llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                                    llvm_i8_ty, llvm_i32_ty],
+                  [IntrNoMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_getmant_pd_128 :
          GCCBuiltin<"__builtin_ia32_getmantpd128_mask">,
           Intrinsic<[llvm_v2f64_ty],
           [llvm_v2f64_ty,llvm_i32_ty, llvm_v2f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<1>]>;
 
   def int_x86_avx512_mask_getmant_pd_256 :
          GCCBuiltin<"__builtin_ia32_getmantpd256_mask">,
           Intrinsic<[llvm_v4f64_ty],
           [llvm_v4f64_ty,llvm_i32_ty, llvm_v4f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<1>]>;
 
   def int_x86_avx512_mask_getmant_pd_512 :
          GCCBuiltin<"__builtin_ia32_getmantpd512_mask">,
           Intrinsic<[llvm_v8f64_ty],
           [llvm_v8f64_ty,llvm_i32_ty, llvm_v8f64_ty,  llvm_i8_ty,llvm_i32_ty ],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<1>, ImmArg<4>]>;
 
   def int_x86_avx512_mask_getmant_ps_128 :
          GCCBuiltin<"__builtin_ia32_getmantps128_mask">,
           Intrinsic<[llvm_v4f32_ty],
           [llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<1>]>;
 
   def int_x86_avx512_mask_getmant_ps_256 :
          GCCBuiltin<"__builtin_ia32_getmantps256_mask">,
           Intrinsic<[llvm_v8f32_ty],
           [llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<1>]>;
 
   def int_x86_avx512_mask_getmant_ps_512 :
          GCCBuiltin<"__builtin_ia32_getmantps512_mask">,
           Intrinsic<[llvm_v16f32_ty],
           [llvm_v16f32_ty,llvm_i32_ty, llvm_v16f32_ty,llvm_i16_ty,llvm_i32_ty],
-          [IntrNoMem]>;
+          [IntrNoMem, ImmArg<1>, ImmArg<4>]>;
 
   def int_x86_avx512_mask_getmant_ss :
          GCCBuiltin<"__builtin_ia32_getmantss_round_mask">,
           Intrinsic<[llvm_v4f32_ty],
           [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty,
-           llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+           llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>, ImmArg<5>]>;
 
   def int_x86_avx512_mask_getmant_sd :
          GCCBuiltin<"__builtin_ia32_getmantsd_round_mask">,
           Intrinsic<[llvm_v2f64_ty],
           [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty,
-           llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+           llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>, ImmArg<5>]>;
 
   def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss_mask">,
         Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
@@ -3491,41 +3506,41 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
   def int_x86_avx512_rcp28_ps : GCCBuiltin<"__builtin_ia32_rcp28ps_mask">,
             Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                                         llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+                                         llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<3>]>;
   def int_x86_avx512_rcp28_pd : GCCBuiltin<"__builtin_ia32_rcp28pd_mask">,
             Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                                        llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                                        llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<3>]>;
   def int_x86_avx512_exp2_ps : GCCBuiltin<"__builtin_ia32_exp2ps_mask">,
             Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                                         llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+                                         llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<3>]>;
   def int_x86_avx512_exp2_pd : GCCBuiltin<"__builtin_ia32_exp2pd_mask">,
             Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                                        llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                                        llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss_round_mask">,
             Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
                                         llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
-                      [IntrNoMem]>;
+                      [IntrNoMem, ImmArg<4>]>;
   def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd_round_mask">,
             Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
                                         llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
-                      [IntrNoMem]>;
+                      [IntrNoMem, ImmArg<4>]>;
   def int_x86_avx512_rsqrt28_ps : GCCBuiltin<"__builtin_ia32_rsqrt28ps_mask">,
             Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
                                          llvm_i16_ty, llvm_i32_ty],
-                      [IntrNoMem]>;
+                      [IntrNoMem, ImmArg<3>]>;
   def int_x86_avx512_rsqrt28_pd : GCCBuiltin<"__builtin_ia32_rsqrt28pd_mask">,
             Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
                                         llvm_i8_ty, llvm_i32_ty],
-                      [IntrNoMem]>;
+                      [IntrNoMem, ImmArg<3>]>;
   def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_round_mask">,
             Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
                                         llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
-                      [IntrNoMem]>;
+                      [IntrNoMem, ImmArg<4>]>;
   def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_round_mask">,
             Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
                                         llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
-                      [IntrNoMem]>;
+                      [IntrNoMem, ImmArg<4>]>;
   def int_x86_avx512_psad_bw_512 : GCCBuiltin<"__builtin_ia32_psadbw512">,
               Intrinsic<[llvm_v8i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
                         [IntrNoMem, Commutative]>;
@@ -3538,6 +3553,12 @@ let TargetPrefix = "x86" in {
   def int_x86_avx512_pmulh_w_512 : GCCBuiltin<"__builtin_ia32_pmulhw512">,
               Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
                          llvm_v32i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx512_pavg_b_512 : GCCBuiltin<"__builtin_ia32_pavgb512">,
+          Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_pavg_w_512 : GCCBuiltin<"__builtin_ia32_pavgw512">,
+          Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
+                    [IntrNoMem]>;
   def int_x86_avx512_pmaddw_d_512 : GCCBuiltin<"__builtin_ia32_pmaddwd512">,
               Intrinsic<[llvm_v16i32_ty], [llvm_v32i16_ty,
                          llvm_v32i16_ty], [IntrNoMem, Commutative]>;
@@ -3548,582 +3569,553 @@ let TargetPrefix = "x86" in {
   def int_x86_avx512_dbpsadbw_128 :
          GCCBuiltin<"__builtin_ia32_dbpsadbw128">,
           Intrinsic<[llvm_v8i16_ty],
-                    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+                    [IntrNoMem, ImmArg<2>]>;
 
   def int_x86_avx512_dbpsadbw_256 :
          GCCBuiltin<"__builtin_ia32_dbpsadbw256">,
           Intrinsic<[llvm_v16i16_ty],
-                    [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                    [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
+                    [IntrNoMem, ImmArg<2>]>;
 
   def int_x86_avx512_dbpsadbw_512 :
          GCCBuiltin<"__builtin_ia32_dbpsadbw512">,
           Intrinsic<[llvm_v32i16_ty],
-                    [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                    [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty],
+                    [IntrNoMem, ImmArg<2>]>;
 }
 
 // Gather and Scatter ops
 let TargetPrefix = "x86" in {
   // NOTE: These are deprecated in favor of the versions that take a vXi1 mask.
-  def int_x86_avx512_gather_dpd_512  : GCCBuiltin<"__builtin_ia32_gathersiv8df">,
+  // NOTE: These can't be ArgMemOnly because you can put the address completely
+  // in the index register.
+  def int_x86_avx512_gather_dpd_512  :
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty,
                      llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
-                    [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_gather_dps_512  : GCCBuiltin<"__builtin_ia32_gathersiv16sf">,
+                    [IntrReadMem, ImmArg<4>]>;
+  def int_x86_avx512_gather_dps_512  :
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_ptr_ty,
                      llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
-                    [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_gather_qpd_512  : GCCBuiltin<"__builtin_ia32_gatherdiv8df">,
+                    [IntrReadMem, ImmArg<4>]>;
+  def int_x86_avx512_gather_qpd_512  :
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty,
                      llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
-                    [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_gather_qps_512  : GCCBuiltin<"__builtin_ia32_gatherdiv16sf">,
+                    [IntrReadMem, ImmArg<4>]>;
+  def int_x86_avx512_gather_qps_512  :
           Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_ptr_ty,
                      llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
-                    [IntrReadMem, IntrArgMemOnly]>;
+                    [IntrReadMem, ImmArg<4>]>;
 
 
-  def int_x86_avx512_gather_dpq_512  : GCCBuiltin<"__builtin_ia32_gathersiv8di">,
+  def int_x86_avx512_gather_dpq_512  :
           Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty,
                      llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
-                    [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_gather_dpi_512  : GCCBuiltin<"__builtin_ia32_gathersiv16si">,
+                    [IntrReadMem, ImmArg<4>]>;
+  def int_x86_avx512_gather_dpi_512  :
           Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_ptr_ty,
                      llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
-                    [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_gather_qpq_512  : GCCBuiltin<"__builtin_ia32_gatherdiv8di">,
+                    [IntrReadMem, ImmArg<4>]>;
+  def int_x86_avx512_gather_qpq_512  :
           Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty,
                      llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
-                    [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx512_gather_qpi_512  : GCCBuiltin<"__builtin_ia32_gatherdiv16si">,
+                    [IntrReadMem, ImmArg<4>]>;
+  def int_x86_avx512_gather_qpi_512  :
           Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_ptr_ty,
                      llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
-                    [IntrReadMem, IntrArgMemOnly]>;
+                    [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_gather3div2_df :
-        GCCBuiltin<"__builtin_ia32_gather3div2df">,
           Intrinsic<[llvm_v2f64_ty],
           [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_gather3div2_di :
-        GCCBuiltin<"__builtin_ia32_gather3div2di">,
           Intrinsic<[llvm_v2i64_ty],
           [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_gather3div4_df :
-        GCCBuiltin<"__builtin_ia32_gather3div4df">,
           Intrinsic<[llvm_v4f64_ty],
           [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_gather3div4_di :
-        GCCBuiltin<"__builtin_ia32_gather3div4di">,
           Intrinsic<[llvm_v4i64_ty],
           [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_gather3div4_sf :
-        GCCBuiltin<"__builtin_ia32_gather3div4sf">,
           Intrinsic<[llvm_v4f32_ty],
           [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_gather3div4_si :
-        GCCBuiltin<"__builtin_ia32_gather3div4si">,
           Intrinsic<[llvm_v4i32_ty],
           [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_gather3div8_sf :
-        GCCBuiltin<"__builtin_ia32_gather3div8sf">,
           Intrinsic<[llvm_v4f32_ty],
           [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_gather3div8_si :
-        GCCBuiltin<"__builtin_ia32_gather3div8si">,
           Intrinsic<[llvm_v4i32_ty],
           [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_gather3siv2_df :
-        GCCBuiltin<"__builtin_ia32_gather3siv2df">,
           Intrinsic<[llvm_v2f64_ty],
           [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_gather3siv2_di :
-        GCCBuiltin<"__builtin_ia32_gather3siv2di">,
           Intrinsic<[llvm_v2i64_ty],
           [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_gather3siv4_df :
-        GCCBuiltin<"__builtin_ia32_gather3siv4df">,
           Intrinsic<[llvm_v4f64_ty],
           [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_gather3siv4_di :
-        GCCBuiltin<"__builtin_ia32_gather3siv4di">,
           Intrinsic<[llvm_v4i64_ty],
           [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_gather3siv4_sf :
-        GCCBuiltin<"__builtin_ia32_gather3siv4sf">,
           Intrinsic<[llvm_v4f32_ty],
           [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_gather3siv4_si :
-        GCCBuiltin<"__builtin_ia32_gather3siv4si">,
           Intrinsic<[llvm_v4i32_ty],
           [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_gather3siv8_sf :
-        GCCBuiltin<"__builtin_ia32_gather3siv8sf">,
           Intrinsic<[llvm_v8f32_ty],
           [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_gather3siv8_si :
-        GCCBuiltin<"__builtin_ia32_gather3siv8si">,
           Intrinsic<[llvm_v8i32_ty],
           [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
 // scatter
   // NOTE: These are deprecated in favor of the versions that take a vXi1 mask.
-  def int_x86_avx512_scatter_dpd_512  : GCCBuiltin<"__builtin_ia32_scattersiv8df">,
+  // NOTE: These can't be ArgMemOnly because you can put the address completely
+  // in the index register.
+  def int_x86_avx512_scatter_dpd_512  :
           Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
                         llvm_v8i32_ty, llvm_v8f64_ty, llvm_i32_ty],
-                    [IntrArgMemOnly]>;
-  def int_x86_avx512_scatter_dps_512  : GCCBuiltin<"__builtin_ia32_scattersiv16sf">,
+                    [ImmArg<4>]>;
+  def int_x86_avx512_scatter_dps_512  :
           Intrinsic<[], [llvm_ptr_ty, llvm_i16_ty,
                        llvm_v16i32_ty, llvm_v16f32_ty, llvm_i32_ty],
-                    [IntrArgMemOnly]>;
-  def int_x86_avx512_scatter_qpd_512  : GCCBuiltin<"__builtin_ia32_scatterdiv8df">,
+                    [ImmArg<4>]>;
+  def int_x86_avx512_scatter_qpd_512  :
           Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
                      llvm_v8i64_ty, llvm_v8f64_ty, llvm_i32_ty],
-                    [IntrArgMemOnly]>;
-  def int_x86_avx512_scatter_qps_512  : GCCBuiltin<"__builtin_ia32_scatterdiv16sf">,
+                    [ImmArg<4>]>;
+  def int_x86_avx512_scatter_qps_512  :
           Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
                      llvm_v8i64_ty, llvm_v8f32_ty, llvm_i32_ty],
-                    [IntrArgMemOnly]>;
+                    [ImmArg<4>]>;
 
 
-  def int_x86_avx512_scatter_dpq_512  : GCCBuiltin<"__builtin_ia32_scattersiv8di">,
+  def int_x86_avx512_scatter_dpq_512  :
           Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
                          llvm_v8i32_ty, llvm_v8i64_ty, llvm_i32_ty],
-                    [IntrArgMemOnly]>;
-  def int_x86_avx512_scatter_dpi_512  : GCCBuiltin<"__builtin_ia32_scattersiv16si">,
+                    [ImmArg<4>]>;
+  def int_x86_avx512_scatter_dpi_512  :
           Intrinsic<[], [llvm_ptr_ty, llvm_i16_ty,
                      llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty],
-                    [IntrArgMemOnly]>;
-  def int_x86_avx512_scatter_qpq_512  : GCCBuiltin<"__builtin_ia32_scatterdiv8di">,
+                    [ImmArg<4>]>;
+  def int_x86_avx512_scatter_qpq_512  :
           Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,llvm_v8i64_ty, llvm_v8i64_ty,
                          llvm_i32_ty],
-                    [IntrArgMemOnly]>;
-  def int_x86_avx512_scatter_qpi_512  : GCCBuiltin<"__builtin_ia32_scatterdiv16si">,
+                    [ImmArg<4>]>;
+  def int_x86_avx512_scatter_qpi_512  :
           Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_v8i32_ty,
                          llvm_i32_ty],
-                    [IntrArgMemOnly]>;
+                    [ImmArg<4>]>;
 
   def int_x86_avx512_scatterdiv2_df :
-       GCCBuiltin<"__builtin_ia32_scatterdiv2df">,
         Intrinsic<[],
         [llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i32_ty],
-        [IntrArgMemOnly]>;
+        [ImmArg<4>]>;
 
   def int_x86_avx512_scatterdiv2_di :
-        GCCBuiltin<"__builtin_ia32_scatterdiv2di">,
           Intrinsic<[],
           [llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_scatterdiv4_df :
-        GCCBuiltin<"__builtin_ia32_scatterdiv4df">,
           Intrinsic<[],
           [llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_scatterdiv4_di :
-        GCCBuiltin<"__builtin_ia32_scatterdiv4di">,
           Intrinsic<[],
           [llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_scatterdiv4_sf :
-        GCCBuiltin<"__builtin_ia32_scatterdiv4sf">,
           Intrinsic<[],
           [llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_scatterdiv4_si :
-        GCCBuiltin<"__builtin_ia32_scatterdiv4si">,
           Intrinsic<[],
           [llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_scatterdiv8_sf :
-        GCCBuiltin<"__builtin_ia32_scatterdiv8sf">,
           Intrinsic<[],
           [llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_scatterdiv8_si :
-        GCCBuiltin<"__builtin_ia32_scatterdiv8si">,
           Intrinsic<[],
           [llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_scattersiv2_df :
-        GCCBuiltin<"__builtin_ia32_scattersiv2df">,
           Intrinsic<[],
           [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_scattersiv2_di :
-        GCCBuiltin<"__builtin_ia32_scattersiv2di">,
           Intrinsic<[],
           [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_scattersiv4_df :
-        GCCBuiltin<"__builtin_ia32_scattersiv4df">,
           Intrinsic<[],
           [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_scattersiv4_di :
-        GCCBuiltin<"__builtin_ia32_scattersiv4di">,
           Intrinsic<[],
           [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_scattersiv4_sf :
-        GCCBuiltin<"__builtin_ia32_scattersiv4sf">,
           Intrinsic<[],
           [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_scattersiv4_si :
-        GCCBuiltin<"__builtin_ia32_scattersiv4si">,
           Intrinsic<[],
           [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_scattersiv8_sf :
-        GCCBuiltin<"__builtin_ia32_scattersiv8sf">,
           Intrinsic<[],
           [llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_scattersiv8_si :
-        GCCBuiltin<"__builtin_ia32_scattersiv8si">,
           Intrinsic<[],
           [llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   // gather prefetch
+  // NOTE: These can't be ArgMemOnly because you can put the address completely
+  // in the index register.
   def int_x86_avx512_gatherpf_dpd_512  : GCCBuiltin<"__builtin_ia32_gatherpfdpd">,
           Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty,
-                     llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
+                     llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>;
   def int_x86_avx512_gatherpf_dps_512  : GCCBuiltin<"__builtin_ia32_gatherpfdps">,
           Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty,
-                     llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
+                     llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>;
   def int_x86_avx512_gatherpf_qpd_512  : GCCBuiltin<"__builtin_ia32_gatherpfqpd">,
           Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
-                     llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
+                     llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>;
   def int_x86_avx512_gatherpf_qps_512  : GCCBuiltin<"__builtin_ia32_gatherpfqps">,
           Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
-                     llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
+                     llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>;
 
   // scatter prefetch
+  // NOTE: These can't be ArgMemOnly because you can put the address completely
+  // in the index register.
   def int_x86_avx512_scatterpf_dpd_512  : GCCBuiltin<"__builtin_ia32_scatterpfdpd">,
           Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty,
-                     llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
+                     llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>;
   def int_x86_avx512_scatterpf_dps_512  : GCCBuiltin<"__builtin_ia32_scatterpfdps">,
           Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty,
-                     llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
+                     llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>;
   def int_x86_avx512_scatterpf_qpd_512  : GCCBuiltin<"__builtin_ia32_scatterpfqpd">,
           Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
-                     llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
+                     llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>;
   def int_x86_avx512_scatterpf_qps_512  : GCCBuiltin<"__builtin_ia32_scatterpfqps">,
           Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
-                     llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
+                     llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>;
 }
 
 // AVX512 gather/scatter intrinsics that use vXi1 masks.
 let TargetPrefix = "x86" in {
+  // NOTE: These can't be ArgMemOnly because you can put the address completely
+  // in the index register.
   def int_x86_avx512_mask_gather_dpd_512  :
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty,
                      llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty],
-                    [IntrReadMem, IntrArgMemOnly]>;
+                    [IntrReadMem, ImmArg<4>]>;
   def int_x86_avx512_mask_gather_dps_512  :
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_ptr_ty,
                      llvm_v16i32_ty, llvm_v16i1_ty, llvm_i32_ty],
-                    [IntrReadMem, IntrArgMemOnly]>;
+                    [IntrReadMem, ImmArg<4>]>;
   def int_x86_avx512_mask_gather_qpd_512  :
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty,
                      llvm_v8i64_ty, llvm_v8i1_ty, llvm_i32_ty],
-                    [IntrReadMem, IntrArgMemOnly]>;
+                    [IntrReadMem, ImmArg<4>]>;
   def int_x86_avx512_mask_gather_qps_512  :
           Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_ptr_ty,
                      llvm_v8i64_ty, llvm_v8i1_ty, llvm_i32_ty],
-                    [IntrReadMem, IntrArgMemOnly]>;
+                    [IntrReadMem, ImmArg<4>]>;
 
 
   def int_x86_avx512_mask_gather_dpq_512  :
           Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty,
                      llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty],
-                    [IntrReadMem, IntrArgMemOnly]>;
+                    [IntrReadMem, ImmArg<4>]>;
   def int_x86_avx512_mask_gather_dpi_512  :
           Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_ptr_ty,
                      llvm_v16i32_ty, llvm_v16i1_ty, llvm_i32_ty],
-                    [IntrReadMem, IntrArgMemOnly]>;
+                    [IntrReadMem, ImmArg<4>]>;
   def int_x86_avx512_mask_gather_qpq_512  :
           Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty,
                      llvm_v8i64_ty, llvm_v8i1_ty, llvm_i32_ty],
-                    [IntrReadMem, IntrArgMemOnly]>;
+                    [IntrReadMem, ImmArg<4>]>;
   def int_x86_avx512_mask_gather_qpi_512  :
           Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_ptr_ty,
                      llvm_v8i64_ty, llvm_v8i1_ty, llvm_i32_ty],
-                    [IntrReadMem, IntrArgMemOnly]>;
+                    [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_gather3div2_df :
           Intrinsic<[llvm_v2f64_ty],
           [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i1_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_gather3div2_di :
           Intrinsic<[llvm_v2i64_ty],
           [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i1_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_gather3div4_df :
           Intrinsic<[llvm_v4f64_ty],
           [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i1_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_gather3div4_di :
           Intrinsic<[llvm_v4i64_ty],
           [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i1_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_gather3div4_sf :
           Intrinsic<[llvm_v4f32_ty],
           [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i1_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_gather3div4_si :
           Intrinsic<[llvm_v4i32_ty],
           [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i1_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_gather3div8_sf :
           Intrinsic<[llvm_v4f32_ty],
           [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i1_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_gather3div8_si :
           Intrinsic<[llvm_v4i32_ty],
           [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i1_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_gather3siv2_df :
           Intrinsic<[llvm_v2f64_ty],
           [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i1_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_gather3siv2_di :
           Intrinsic<[llvm_v2i64_ty],
           [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i1_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_gather3siv4_df :
           Intrinsic<[llvm_v4f64_ty],
           [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i1_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_gather3siv4_di :
           Intrinsic<[llvm_v4i64_ty],
           [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i1_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_gather3siv4_sf :
           Intrinsic<[llvm_v4f32_ty],
           [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i1_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_gather3siv4_si :
           Intrinsic<[llvm_v4i32_ty],
           [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i1_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_gather3siv8_sf :
           Intrinsic<[llvm_v8f32_ty],
           [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_gather3siv8_si :
           Intrinsic<[llvm_v8i32_ty],
           [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty],
-          [IntrReadMem, IntrArgMemOnly]>;
+          [IntrReadMem, ImmArg<4>]>;
 
   def int_x86_avx512_mask_scatter_dpd_512  :
           Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,
                         llvm_v8i32_ty, llvm_v8f64_ty, llvm_i32_ty],
-                    [IntrArgMemOnly]>;
+                    [ImmArg<4>]>;
   def int_x86_avx512_mask_scatter_dps_512  :
           Intrinsic<[], [llvm_ptr_ty, llvm_v16i1_ty,
                        llvm_v16i32_ty, llvm_v16f32_ty, llvm_i32_ty],
-                    [IntrArgMemOnly]>;
+                    [ImmArg<4>]>;
   def int_x86_avx512_mask_scatter_qpd_512  :
           Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,
                      llvm_v8i64_ty, llvm_v8f64_ty, llvm_i32_ty],
-                    [IntrArgMemOnly]>;
+                    [ImmArg<4>]>;
   def int_x86_avx512_mask_scatter_qps_512  :
           Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,
                      llvm_v8i64_ty, llvm_v8f32_ty, llvm_i32_ty],
-                    [IntrArgMemOnly]>;
+                    [ImmArg<4>]>;
 
 
+  // NOTE: These can't be ArgMemOnly because you can put the address completely
+  // in the index register.
   def int_x86_avx512_mask_scatter_dpq_512  :
           Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,
                          llvm_v8i32_ty, llvm_v8i64_ty, llvm_i32_ty],
-                    [IntrArgMemOnly]>;
+                    [ImmArg<4>]>;
   def int_x86_avx512_mask_scatter_dpi_512  :
           Intrinsic<[], [llvm_ptr_ty, llvm_v16i1_ty,
                      llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty],
-                    [IntrArgMemOnly]>;
+                    [ImmArg<4>]>;
   def int_x86_avx512_mask_scatter_qpq_512  :
           Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,llvm_v8i64_ty, llvm_v8i64_ty,
                          llvm_i32_ty],
-                    [IntrArgMemOnly]>;
+                    [ImmArg<4>]>;
   def int_x86_avx512_mask_scatter_qpi_512  :
           Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i64_ty, llvm_v8i32_ty,
                          llvm_i32_ty],
-                    [IntrArgMemOnly]>;
+                    [ImmArg<4>]>;
 
   def int_x86_avx512_mask_scatterdiv2_df :
         Intrinsic<[],
         [llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i32_ty],
-        [IntrArgMemOnly]>;
+        [ImmArg<4>]>;
 
   def int_x86_avx512_mask_scatterdiv2_di :
           Intrinsic<[],
           [llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_mask_scatterdiv4_df :
           Intrinsic<[],
           [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_mask_scatterdiv4_di :
           Intrinsic<[],
           [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_mask_scatterdiv4_sf :
           Intrinsic<[],
           [llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_mask_scatterdiv4_si :
           Intrinsic<[],
           [llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_mask_scatterdiv8_sf :
           Intrinsic<[],
           [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_mask_scatterdiv8_si :
           Intrinsic<[],
           [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_mask_scattersiv2_df :
           Intrinsic<[],
           [llvm_ptr_ty, llvm_v2i1_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_mask_scattersiv2_di :
           Intrinsic<[],
           [llvm_ptr_ty, llvm_v2i1_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_mask_scattersiv4_df :
           Intrinsic<[],
           [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_mask_scattersiv4_di :
           Intrinsic<[],
           [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_mask_scattersiv4_sf :
           Intrinsic<[],
           [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_mask_scattersiv4_si :
           Intrinsic<[],
           [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_mask_scattersiv8_sf :
           Intrinsic<[],
           [llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 
   def int_x86_avx512_mask_scattersiv8_si :
           Intrinsic<[],
           [llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
-          [IntrArgMemOnly]>;
+          [ImmArg<4>]>;
 }
 
 // AVX-512 conflict detection instruction
 // Instructions that count the number of leading zero bits
 let TargetPrefix = "x86" in {
-  def int_x86_avx512_mask_conflict_d_128 :
-          GCCBuiltin<"__builtin_ia32_vpconflictsi_128_mask">,
-          Intrinsic<[llvm_v4i32_ty],
-                    [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
-                    [IntrNoMem]>;
-  def int_x86_avx512_mask_conflict_d_256 :
-          GCCBuiltin<"__builtin_ia32_vpconflictsi_256_mask">,
-          Intrinsic<[llvm_v8i32_ty],
-                    [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
-                    [IntrNoMem]>;
-  def int_x86_avx512_mask_conflict_d_512 :
-          GCCBuiltin<"__builtin_ia32_vpconflictsi_512_mask">,
-          Intrinsic<[llvm_v16i32_ty],
-                    [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
-                    [IntrNoMem]>;
-
-  def int_x86_avx512_mask_conflict_q_128 :
-          GCCBuiltin<"__builtin_ia32_vpconflictdi_128_mask">,
-          Intrinsic<[llvm_v2i64_ty],
-                    [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
-                    [IntrNoMem]>;
-  def int_x86_avx512_mask_conflict_q_256 :
-          GCCBuiltin<"__builtin_ia32_vpconflictdi_256_mask">,
-          Intrinsic<[llvm_v4i64_ty],
-                    [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
-                    [IntrNoMem]>;
-  def int_x86_avx512_mask_conflict_q_512 :
-          GCCBuiltin<"__builtin_ia32_vpconflictdi_512_mask">,
-          Intrinsic<[llvm_v8i64_ty],
-                    [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
-                    [IntrNoMem]>;
+  def int_x86_avx512_conflict_d_128 :
+          GCCBuiltin<"__builtin_ia32_vpconflictsi_128">,
+          Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_conflict_d_256 :
+          GCCBuiltin<"__builtin_ia32_vpconflictsi_256">,
+          Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_conflict_d_512 :
+          GCCBuiltin<"__builtin_ia32_vpconflictsi_512">,
+          Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_conflict_q_128 :
+          GCCBuiltin<"__builtin_ia32_vpconflictdi_128">,
+          Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_conflict_q_256 :
+          GCCBuiltin<"__builtin_ia32_vpconflictdi_256">,
+          Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_conflict_q_512 :
+          GCCBuiltin<"__builtin_ia32_vpconflictdi_512">,
+          Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty], [IntrNoMem]>;
 }
 
 // Compares
@@ -4131,164 +4123,26 @@ let TargetPrefix = "x86" in {
   // 512-bit
   def int_x86_avx512_vcomi_sd : GCCBuiltin<"__builtin_ia32_vcomisd">,
               Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+                         llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<2>, ImmArg<3>]>;
   def int_x86_avx512_vcomi_ss : GCCBuiltin<"__builtin_ia32_vcomiss">,
               Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+                         llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<2>, ImmArg<3>]>;
 }
 
 // Compress, Expand
 let TargetPrefix = "x86" in {
-  def int_x86_avx512_mask_compress_ps_512 :
-                             GCCBuiltin<"__builtin_ia32_compresssf512_mask">,
-        Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_pd_512 :
-                             GCCBuiltin<"__builtin_ia32_compressdf512_mask">,
-        Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_ps_256 :
-                             GCCBuiltin<"__builtin_ia32_compresssf256_mask">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_pd_256 :
-                             GCCBuiltin<"__builtin_ia32_compressdf256_mask">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_ps_128 :
-                             GCCBuiltin<"__builtin_ia32_compresssf128_mask">,
-        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_pd_128 :
-                             GCCBuiltin<"__builtin_ia32_compressdf128_mask">,
-        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_compress_d_512 :
-                             GCCBuiltin<"__builtin_ia32_compresssi512_mask">,
-        Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_q_512 :
-                             GCCBuiltin<"__builtin_ia32_compressdi512_mask">,
-        Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_d_256 :
-                             GCCBuiltin<"__builtin_ia32_compresssi256_mask">,
-        Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_q_256 :
-                             GCCBuiltin<"__builtin_ia32_compressdi256_mask">,
-        Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_d_128 :
-                             GCCBuiltin<"__builtin_ia32_compresssi128_mask">,
-        Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_q_128 :
-                             GCCBuiltin<"__builtin_ia32_compressdi128_mask">,
-        Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_compress_b_512 :
-                             GCCBuiltin<"__builtin_ia32_compressqi512_mask">,
-        Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
-                   llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_w_512 :
-                             GCCBuiltin<"__builtin_ia32_compresshi512_mask">,
-        Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                   llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_b_256 :
-                             GCCBuiltin<"__builtin_ia32_compressqi256_mask">,
-        Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
-                   llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_w_256 :
-                             GCCBuiltin<"__builtin_ia32_compresshi256_mask">,
-        Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_b_128 :
-                             GCCBuiltin<"__builtin_ia32_compressqi128_mask">,
-        Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_w_128 :
-                             GCCBuiltin<"__builtin_ia32_compresshi128_mask">,
-        Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-
-// expand
-  def int_x86_avx512_mask_expand_ps_512 :
-                             GCCBuiltin<"__builtin_ia32_expandsf512_mask">,
-        Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_pd_512 :
-                             GCCBuiltin<"__builtin_ia32_expanddf512_mask">,
-        Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_ps_256 :
-                             GCCBuiltin<"__builtin_ia32_expandsf256_mask">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_pd_256 :
-                             GCCBuiltin<"__builtin_ia32_expanddf256_mask">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_ps_128 :
-                             GCCBuiltin<"__builtin_ia32_expandsf128_mask">,
-        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_pd_128 :
-                             GCCBuiltin<"__builtin_ia32_expanddf128_mask">,
-        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_expand_d_512 :
-                             GCCBuiltin<"__builtin_ia32_expandsi512_mask">,
-        Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_q_512 :
-                             GCCBuiltin<"__builtin_ia32_expanddi512_mask">,
-        Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_d_256 :
-                             GCCBuiltin<"__builtin_ia32_expandsi256_mask">,
-        Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_q_256 :
-                             GCCBuiltin<"__builtin_ia32_expanddi256_mask">,
-        Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_d_128 :
-                             GCCBuiltin<"__builtin_ia32_expandsi128_mask">,
-        Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_q_128 :
-                             GCCBuiltin<"__builtin_ia32_expanddi128_mask">,
-        Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_expand_b_512 :
-                            GCCBuiltin<"__builtin_ia32_expandqi512_mask">,
-        Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
-                   llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_w_512 :
-                            GCCBuiltin<"__builtin_ia32_expandhi512_mask">,
-        Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                   llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_b_256 :
-                            GCCBuiltin<"__builtin_ia32_expandqi256_mask">,
-        Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
-                   llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_w_256 :
-                            GCCBuiltin<"__builtin_ia32_expandhi256_mask">,
-        Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_b_128 :
-                            GCCBuiltin<"__builtin_ia32_expandqi128_mask">,
-        Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_w_128 :
-                            GCCBuiltin<"__builtin_ia32_expandhi128_mask">,
-        Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_compress :
+        Intrinsic<[llvm_anyvector_ty],
+                  [LLVMMatchType<0>, LLVMMatchType<0>,
+                   LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+                  [IntrNoMem]>;
+  def int_x86_avx512_mask_expand :
+        Intrinsic<[llvm_anyvector_ty],
+                  [LLVMMatchType<0>, LLVMMatchType<0>,
+                   LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+                  [IntrNoMem]>;
 }
 
 // truncate
@@ -4502,10 +4356,6 @@ let TargetPrefix = "x86" in {
           Intrinsic<[],
                     [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
                     [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_pmov_qd_256 : // FIXME: Replace with trunc+select.
-          Intrinsic<[llvm_v4i32_ty],
-                    [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
-                    [IntrNoMem]>;
   def int_x86_avx512_mask_pmov_qd_mem_256 :
           GCCBuiltin<"__builtin_ia32_pmovqd256mem_mask">,
           Intrinsic<[],
@@ -4531,10 +4381,6 @@ let TargetPrefix = "x86" in {
           Intrinsic<[],
                     [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
                     [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_pmov_qd_512 : // FIXME: Replace with trunc+select.
-          Intrinsic<[llvm_v8i32_ty],
-                    [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty],
-                    [IntrNoMem]>;
   def int_x86_avx512_mask_pmov_qd_mem_512 :
           GCCBuiltin<"__builtin_ia32_pmovqd512mem_mask">,
           Intrinsic<[],
@@ -4768,10 +4614,6 @@ let TargetPrefix = "x86" in {
           Intrinsic<[],
                     [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty],
                     [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_pmov_wb_256 : // FIXME: Replace with trunc+select.
-          Intrinsic<[llvm_v16i8_ty],
-                    [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty],
-                    [IntrNoMem]>;
   def int_x86_avx512_mask_pmov_wb_mem_256 :
           GCCBuiltin<"__builtin_ia32_pmovwb256mem_mask">,
           Intrinsic<[],
@@ -4797,10 +4639,6 @@ let TargetPrefix = "x86" in {
           Intrinsic<[],
                     [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty],
                     [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_pmov_wb_512 : // FIXME: Replace with trunc+select.
-          Intrinsic<[llvm_v32i8_ty],
-                    [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty],
-                    [IntrNoMem]>;
   def int_x86_avx512_mask_pmov_wb_mem_512 :
           GCCBuiltin<"__builtin_ia32_pmovwb512mem_mask">,
           Intrinsic<[],
@@ -4834,36 +4672,64 @@ let TargetPrefix = "x86" in {
           GCCBuiltin<"__builtin_ia32_pternlogd128">,
           Intrinsic<[llvm_v4i32_ty],
                     [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
-                    [IntrNoMem]>;
+                    [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_pternlog_d_256 :
           GCCBuiltin<"__builtin_ia32_pternlogd256">,
           Intrinsic<[llvm_v8i32_ty],
                     [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
-                    [IntrNoMem]>;
+                    [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_pternlog_d_512 :
           GCCBuiltin<"__builtin_ia32_pternlogd512">,
           Intrinsic<[llvm_v16i32_ty],
                     [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty,
-                     llvm_i32_ty], [IntrNoMem]>;
+                     llvm_i32_ty], [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_pternlog_q_128 :
           GCCBuiltin<"__builtin_ia32_pternlogq128">,
           Intrinsic<[llvm_v2i64_ty],
                     [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
-                    [IntrNoMem]>;
+                    [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_pternlog_q_256 :
           GCCBuiltin<"__builtin_ia32_pternlogq256">,
           Intrinsic<[llvm_v4i64_ty],
                     [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty],
-                    [IntrNoMem]>;
+                    [IntrNoMem, ImmArg<3>]>;
 
   def int_x86_avx512_pternlog_q_512 :
           GCCBuiltin<"__builtin_ia32_pternlogq512">,
           Intrinsic<[llvm_v8i64_ty],
                     [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty],
+                    [IntrNoMem, ImmArg<3>]>;
+}
+
+// vp2intersect
+let TargetPrefix = "x86" in {
+  def int_x86_avx512_vp2intersect_q_512 :
+          Intrinsic<[llvm_v8i1_ty, llvm_v8i1_ty],
+                    [llvm_v8i64_ty, llvm_v8i64_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_vp2intersect_q_256 :
+          Intrinsic<[llvm_v4i1_ty, llvm_v4i1_ty],
+                    [llvm_v4i64_ty, llvm_v4i64_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_vp2intersect_q_128 :
+          Intrinsic<[llvm_v2i1_ty, llvm_v2i1_ty],
+                    [llvm_v2i64_ty, llvm_v2i64_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_vp2intersect_d_512 :
+          Intrinsic<[llvm_v16i1_ty, llvm_v16i1_ty],
+                    [llvm_v16i32_ty, llvm_v16i32_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_vp2intersect_d_256 :
+          Intrinsic<[llvm_v8i1_ty, llvm_v8i1_ty],
+                    [llvm_v8i32_ty, llvm_v8i32_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_vp2intersect_d_128 :
+          Intrinsic<[llvm_v4i1_ty, llvm_v4i1_ty],
+                    [llvm_v4i32_ty, llvm_v4i32_ty],
                     [IntrNoMem]>;
 }
 
@@ -4873,31 +4739,35 @@ let TargetPrefix = "x86" in {
   //       distinction in signaling behaviour is not implemented.
   def int_x86_avx512_cmp_ps_512 :
               Intrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                         llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+                         llvm_i32_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<2>, ImmArg<3>]>;
   def int_x86_avx512_cmp_pd_512 :
               Intrinsic<[llvm_v8i1_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                         llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+                         llvm_i32_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<2>, ImmArg<3>]>;
   def int_x86_avx512_cmp_ps_256 :
               Intrinsic<[llvm_v8i1_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
+                         llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_avx512_cmp_pd_256 :
               Intrinsic<[llvm_v4i1_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
+                         llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_avx512_cmp_ps_128 :
             Intrinsic<[llvm_v4i1_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                       llvm_i32_ty], [IntrNoMem]>;
+                       llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
   def int_x86_avx512_cmp_pd_128 :
             Intrinsic<[llvm_v2i1_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                       llvm_i32_ty], [IntrNoMem]>;
+                       llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
 
   def int_x86_avx512_mask_cmp_ss :
         GCCBuiltin<"__builtin_ia32_cmpss_mask">,
               Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                         llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                         llvm_i32_ty, llvm_i8_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<2>, ImmArg<4>]>;
   def int_x86_avx512_mask_cmp_sd :
         GCCBuiltin<"__builtin_ia32_cmpsd_mask">,
               Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                         llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+                         llvm_i32_ty, llvm_i8_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<2>, ImmArg<4>]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -4905,7 +4775,7 @@ let TargetPrefix = "x86" in {
 let TargetPrefix = "x86" in {
   def int_x86_sha1rnds4 : GCCBuiltin<"__builtin_ia32_sha1rnds4">,
         Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
-                  [IntrNoMem]>;
+                  [IntrNoMem, ImmArg<2>]>;
   def int_x86_sha1nexte : GCCBuiltin<"__builtin_ia32_sha1nexte">,
       Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
   def int_x86_sha1msg1 : GCCBuiltin<"__builtin_ia32_sha1msg1">,
@@ -5000,3 +4870,51 @@ let TargetPrefix = "x86" in {
   def int_x86_invpcid : GCCBuiltin<"__builtin_ia32_invpcid">,
               Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], []>;
 }
+
+let TargetPrefix = "x86" in {
+  def int_x86_avx512bf16_cvtne2ps2bf16_128:
+              GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+              [IntrNoMem]>;
+  def int_x86_avx512bf16_cvtne2ps2bf16_256:
+              GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v8f32_ty, llvm_v8f32_ty],
+              [IntrNoMem]>;
+  def int_x86_avx512bf16_cvtne2ps2bf16_512:
+              GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_512">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty],
+              [IntrNoMem]>;
+  // Intrinsic must be masked due to it producing less than 128 bits of results.
+  def int_x86_avx512bf16_mask_cvtneps2bf16_128:
+              Intrinsic<[llvm_v8i16_ty],
+                        [llvm_v4f32_ty, llvm_v8i16_ty, llvm_v4i1_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx512bf16_cvtneps2bf16_256:
+              GCCBuiltin<"__builtin_ia32_cvtneps2bf16_256">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+  def int_x86_avx512bf16_cvtneps2bf16_512:
+              GCCBuiltin<"__builtin_ia32_cvtneps2bf16_512">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty], [IntrNoMem]>;
+  def int_x86_avx512bf16_dpbf16ps_128:
+              GCCBuiltin<"__builtin_ia32_dpbf16ps_128">,
+              Intrinsic<[llvm_v4f32_ty],
+              [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx512bf16_dpbf16ps_256:
+              GCCBuiltin<"__builtin_ia32_dpbf16ps_256">,
+              Intrinsic<[llvm_v8f32_ty],
+              [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
+  def int_x86_avx512bf16_dpbf16ps_512:
+              GCCBuiltin<"__builtin_ia32_dpbf16ps_512">,
+              Intrinsic<[llvm_v16f32_ty],
+              [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// ENQCMD - Enqueue Stores Instructions
+
+let TargetPrefix = "x86" in {
+  def int_x86_enqcmd : GCCBuiltin<"__builtin_ia32_enqcmd">,
+              Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_ptr_ty], []>;
+  def int_x86_enqcmds : GCCBuiltin<"__builtin_ia32_enqcmds">,
+              Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_ptr_ty], []>;
+}
diff --git a/include/llvm/IR/IntrinsicsXCore.td b/include/llvm/IR/IntrinsicsXCore.td
index b614e1ed6ec0..7fe8bdfd3bd0 100644
--- a/include/llvm/IR/IntrinsicsXCore.td
+++ b/include/llvm/IR/IntrinsicsXCore.td
@@ -1,9 +1,8 @@
 //==- IntrinsicsXCore.td - XCore intrinsics                 -*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/LLVMContext.h b/include/llvm/IR/LLVMContext.h
index bd7097b39a3e..c80504500418 100644
--- a/include/llvm/IR/LLVMContext.h
+++ b/include/llvm/IR/LLVMContext.h
@@ -1,9 +1,8 @@
 //===- llvm/LLVMContext.h - Class for managing "global" state ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -36,12 +35,8 @@ template <typename T> class SmallVectorImpl;
 class SMDiagnostic;
 class StringRef;
 class Twine;
-
-namespace yaml {
-
-class Output;
-
-} // end namespace yaml
+class RemarkStreamer;
+class raw_ostream;
 
 namespace SyncScope {
 
@@ -103,6 +98,8 @@ public:
     MD_callees = 23,                  // "callees"
     MD_irr_loop = 24,                 // "irr_loop"
     MD_access_group = 25,             // "llvm.access.group"
+    MD_callback = 26,                 // "callback"
+    MD_preserve_access_index = 27,    // "llvm.preserve.*.access.index"
   };
 
   /// Known operand bundle tag IDs, which always have the same value.  All
@@ -246,16 +243,23 @@ public:
   /// included in optimization diagnostics.
   void setDiagnosticsHotnessThreshold(uint64_t Threshold);
 
-  /// Return the YAML file used by the backend to save optimization
-  /// diagnostics.  If null, diagnostics are not saved in a file but only
-  /// emitted via the diagnostic handler.
-  yaml::Output *getDiagnosticsOutputFile();
-  /// Set the diagnostics output file used for optimization diagnostics.
+  /// Return the streamer used by the backend to save remark diagnostics. If it
+  /// does not exist, diagnostics are not saved in a file but only emitted via
+  /// the diagnostic handler.
+  RemarkStreamer *getRemarkStreamer();
+  const RemarkStreamer *getRemarkStreamer() const;
+
+  /// Set the diagnostics output used for optimization diagnostics.
+  /// This filename may be embedded in a section for tools to find the
+  /// diagnostics whenever they're needed.
+  ///
+  /// If a remark streamer is already set, it will be replaced with
+  /// \p RemarkStreamer.
   ///
-  /// By default or if invoked with null, diagnostics are not saved in a file
-  /// but only emitted via the diagnostic handler.  Even if an output file is
-  /// set, the handler is invoked for each diagnostic message.
-  void setDiagnosticsOutputFile(std::unique_ptr<yaml::Output> F);
+  /// By default, diagnostics are not saved in a file but only emitted via the
+  /// diagnostic handler.  Even if an output file is set, the handler is invoked
+  /// for each diagnostic message.
+  void setRemarkStreamer(std::unique_ptr<RemarkStreamer> RemarkStreamer);
 
   /// Get the prefix that should be printed in front of a diagnostic of
   ///        the given \p Severity
diff --git a/include/llvm/IR/LegacyPassManager.h b/include/llvm/IR/LegacyPassManager.h
index 5257a0eed488..d6bb79ab6019 100644
--- a/include/llvm/IR/LegacyPassManager.h
+++ b/include/llvm/IR/LegacyPassManager.h
@@ -1,9 +1,8 @@
 //===- LegacyPassManager.h - Legacy Container for Passes --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/LegacyPassManagers.h b/include/llvm/IR/LegacyPassManagers.h
index 51a2eb2a146d..72bc80fb5381 100644
--- a/include/llvm/IR/LegacyPassManagers.h
+++ b/include/llvm/IR/LegacyPassManagers.h
@@ -1,9 +1,8 @@
 //===- LegacyPassManagers.h - Legacy Pass Infrastructure --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/LegacyPassNameParser.h b/include/llvm/IR/LegacyPassNameParser.h
index 4cec08196408..30820e750350 100644
--- a/include/llvm/IR/LegacyPassNameParser.h
+++ b/include/llvm/IR/LegacyPassNameParser.h
@@ -1,9 +1,8 @@
 //===- LegacyPassNameParser.h -----------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/MDBuilder.h b/include/llvm/IR/MDBuilder.h
index 174616c7ab1d..3a2b1bddf45d 100644
--- a/include/llvm/IR/MDBuilder.h
+++ b/include/llvm/IR/MDBuilder.h
@@ -1,9 +1,8 @@
 //===---- llvm/MDBuilder.h - Builder for LLVM metadata ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -94,6 +93,17 @@ public:
   /// calls.
   MDNode *createCallees(ArrayRef<Function *> Callees);
 
+  //===------------------------------------------------------------------===//
+  // Callback metadata.
+  //===------------------------------------------------------------------===//
+
+  /// Return metadata describing a callback (see llvm::AbstractCallSite).
+  MDNode *createCallbackEncoding(unsigned CalleeArgNo, ArrayRef<int> Arguments,
+                                 bool VarArgsArePassed);
+
+  /// Merge the new callback encoding \p NewCB into \p ExistingCallbacks.
+  MDNode *mergeCallbackEncodings(MDNode *ExistingCallbacks, MDNode *NewCB);
+
   //===------------------------------------------------------------------===//
   // AA metadata.
   //===------------------------------------------------------------------===//
diff --git a/include/llvm/IR/Mangler.h b/include/llvm/IR/Mangler.h
index 0261c00f524c..e4a05ab46a65 100644
--- a/include/llvm/IR/Mangler.h
+++ b/include/llvm/IR/Mangler.h
@@ -1,9 +1,8 @@
 //===-- llvm/IR/Mangler.h - Self-contained name mangler ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/Metadata.def b/include/llvm/IR/Metadata.def
index 70a03f28b488..1df60cadac08 100644
--- a/include/llvm/IR/Metadata.def
+++ b/include/llvm/IR/Metadata.def
@@ -1,9 +1,8 @@
 //===- llvm/IR/Metadata.def - Metadata definitions --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -114,6 +113,7 @@ HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIImportedEntity)
 HANDLE_SPECIALIZED_MDNODE_BRANCH(DIMacroNode)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacro)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacroFile)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DICommonBlock)
 
 #undef HANDLE_METADATA
 #undef HANDLE_METADATA_LEAF
diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h
index be82c4efc115..7ca2540181ba 100644
--- a/include/llvm/IR/Metadata.h
+++ b/include/llvm/IR/Metadata.h
@@ -1,9 +1,8 @@
 //===- llvm/IR/Metadata.h - Metadata definitions ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h
index 9ef35f1f73cd..f458680cfe15 100644
--- a/include/llvm/IR/Module.h
+++ b/include/llvm/IR/Module.h
@@ -1,9 +1,8 @@
 //===- llvm/Module.h - C++ class to represent a VM module -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -29,6 +28,7 @@
 #include "llvm/IR/GlobalIFunc.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/IR/ProfileSummary.h"
 #include "llvm/IR/SymbolTableListTraits.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/CodeGen.h"
@@ -333,16 +333,18 @@ public:
   /// Look up the specified function in the module symbol table. Four
   /// possibilities:
   ///   1. If it does not exist, add a prototype for the function and return it.
-  ///   2. If it exists, and has a local linkage, the existing function is
-  ///      renamed and a new one is inserted.
-  ///   3. Otherwise, if the existing function has the correct prototype, return
+  ///   2. Otherwise, if the existing function has the correct prototype, return
   ///      the existing function.
-  ///   4. Finally, the function exists but has the wrong prototype: return the
+  ///   3. Finally, the function exists but has the wrong prototype: return the
   ///      function with a constantexpr cast to the right prototype.
-  Constant *getOrInsertFunction(StringRef Name, FunctionType *T,
-                                AttributeList AttributeList);
+  ///
+  /// In all cases, the returned value is a FunctionCallee wrapper around the
+  /// 'FunctionType *T' passed in, as well as a 'Value*' either of the Function or
+  /// the bitcast to the function.
+  FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T,
+                                     AttributeList AttributeList);
 
-  Constant *getOrInsertFunction(StringRef Name, FunctionType *T);
+  FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T);
 
   /// Look up the specified function in the module symbol table. If it does not
   /// exist, add a prototype for the function and return it. This function
@@ -350,11 +352,10 @@ public:
   /// or a ConstantExpr BitCast of that type if the named function has a
   /// different type. This version of the method takes a list of
   /// function arguments, which makes it easier for clients to use.
-  template<typename... ArgsTy>
-  Constant *getOrInsertFunction(StringRef Name,
-                                AttributeList AttributeList,
-                                Type *RetTy, ArgsTy... Args)
-  {
+  template <typename... ArgsTy>
+  FunctionCallee getOrInsertFunction(StringRef Name,
+                                     AttributeList AttributeList, Type *RetTy,
+                                     ArgsTy... Args) {
     SmallVector<Type*, sizeof...(ArgsTy)> ArgTys{Args...};
     return getOrInsertFunction(Name,
                                FunctionType::get(RetTy, ArgTys, false),
@@ -362,15 +363,17 @@ public:
   }
 
   /// Same as above, but without the attributes.
-  template<typename... ArgsTy>
-  Constant *getOrInsertFunction(StringRef Name, Type *RetTy, ArgsTy... Args) {
+  template <typename... ArgsTy>
+  FunctionCallee getOrInsertFunction(StringRef Name, Type *RetTy,
+                                     ArgsTy... Args) {
     return getOrInsertFunction(Name, AttributeList{}, RetTy, Args...);
   }
 
   // Avoid an incorrect ordering that'd otherwise compile incorrectly.
   template <typename... ArgsTy>
-  Constant *getOrInsertFunction(StringRef Name, AttributeList AttributeList,
-                                FunctionType *Invalid, ArgsTy... Args) = delete;
+  FunctionCallee
+  getOrInsertFunction(StringRef Name, AttributeList AttributeList,
+                      FunctionType *Invalid, ArgsTy... Args) = delete;
 
   /// Look up the specified function in the module symbol table. If it does not
   /// exist, return null.
@@ -866,10 +869,11 @@ public:
   /// @{
 
   /// Attach profile summary metadata to this module.
-  void setProfileSummary(Metadata *M);
+  void setProfileSummary(Metadata *M, ProfileSummary::Kind Kind);
 
-  /// Returns profile summary metadata
-  Metadata *getProfileSummary();
+  /// Returns profile summary metadata. When IsCS is true, use the context
+  /// sensitive profile summary.
+  Metadata *getProfileSummary(bool IsCS);
   /// @}
 
   /// Returns true if PLT should be avoided for RTLib calls.
diff --git a/include/llvm/IR/ModuleSlotTracker.h b/include/llvm/IR/ModuleSlotTracker.h
index eb26fba906ea..85f8ff938366 100644
--- a/include/llvm/IR/ModuleSlotTracker.h
+++ b/include/llvm/IR/ModuleSlotTracker.h
@@ -1,9 +1,8 @@
 //===-- llvm/IR/ModuleSlotTracker.h -----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h
index a1acee494475..aacf8cfc089f 100644
--- a/include/llvm/IR/ModuleSummaryIndex.h
+++ b/include/llvm/IR/ModuleSummaryIndex.h
@@ -1,9 +1,8 @@
 //===- llvm/ModuleSummaryIndex.h - Module Summary Index ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -120,7 +119,7 @@ class GlobalValueSummary;
 
 using GlobalValueSummaryList = std::vector<std::unique_ptr<GlobalValueSummary>>;
 
-struct GlobalValueSummaryInfo {
+struct LLVM_ALIGNAS(8) GlobalValueSummaryInfo {
   union NameOrGV {
     NameOrGV(bool HaveGVs) {
       if (HaveGVs)
@@ -163,7 +162,8 @@ using GlobalValueSummaryMapTy =
 /// Struct that holds a reference to a particular GUID in a global value
 /// summary.
 struct ValueInfo {
-  PointerIntPair<const GlobalValueSummaryMapTy::value_type *, 2, int>
+  enum Flags { HaveGV = 1, ReadOnly = 2, WriteOnly = 4 };
+  PointerIntPair<const GlobalValueSummaryMapTy::value_type *, 3, int>
       RefAndFlags;
 
   ValueInfo() = default;
@@ -189,15 +189,42 @@ struct ValueInfo {
                      : getRef()->second.U.Name;
   }
 
-  bool haveGVs() const { return RefAndFlags.getInt() & 0x1; }
-  bool isReadOnly() const { return RefAndFlags.getInt() & 0x2; }
-  void setReadOnly() { RefAndFlags.setInt(RefAndFlags.getInt() | 0x2); }
+  bool haveGVs() const { return RefAndFlags.getInt() & HaveGV; }
+  bool isReadOnly() const {
+    assert(isValidAccessSpecifier());
+    return RefAndFlags.getInt() & ReadOnly;
+  }
+  bool isWriteOnly() const {
+    assert(isValidAccessSpecifier());
+    return RefAndFlags.getInt() & WriteOnly;
+  }
+  unsigned getAccessSpecifier() const {
+    assert(isValidAccessSpecifier());
+    return RefAndFlags.getInt() & (ReadOnly | WriteOnly);
+  }
+  bool isValidAccessSpecifier() const {
+    unsigned BadAccessMask = ReadOnly | WriteOnly;
+    return (RefAndFlags.getInt() & BadAccessMask) != BadAccessMask;
+  }
+  void setReadOnly() {
+    // We expect ro/wo attribute to set only once during
+    // ValueInfo lifetime.
+    assert(getAccessSpecifier() == 0);
+    RefAndFlags.setInt(RefAndFlags.getInt() | ReadOnly);
+  }
+  void setWriteOnly() {
+    assert(getAccessSpecifier() == 0);
+    RefAndFlags.setInt(RefAndFlags.getInt() | WriteOnly);
+  }
 
   const GlobalValueSummaryMapTy::value_type *getRef() const {
     return RefAndFlags.getPointer();
   }
 
   bool isDSOLocal() const;
+
+  /// Checks if all copies are eligible for auto-hiding (have flag set).
+  bool canAutoHide() const;
 };
 
 inline raw_ostream &operator<<(raw_ostream &OS, const ValueInfo &VI) {
@@ -280,11 +307,23 @@ public:
     /// within the same linkage unit.
     unsigned DSOLocal : 1;
 
+    /// In the per-module summary, indicates that the global value is
+    /// linkonce_odr and global unnamed addr (so eligible for auto-hiding
+    /// via hidden visibility). In the combined summary, indicates that the
+    /// prevailing linkonce_odr copy can be auto-hidden via hidden visibility
+    /// when it is upgraded to weak_odr in the backend. This is legal when
+    /// all copies are eligible for auto-hiding (i.e. all copies were
+    /// linkonce_odr global unnamed addr. If any copy is not (e.g. it was
+    /// originally weak_odr, we cannot auto-hide the prevailing copy as it
+    /// means the symbol was externally visible.
+    unsigned CanAutoHide : 1;
+
     /// Convenience Constructors
     explicit GVFlags(GlobalValue::LinkageTypes Linkage,
-                     bool NotEligibleToImport, bool Live, bool IsLocal)
+                     bool NotEligibleToImport, bool Live, bool IsLocal,
+                     bool CanAutoHide)
         : Linkage(Linkage), NotEligibleToImport(NotEligibleToImport),
-          Live(Live), DSOLocal(IsLocal) {}
+          Live(Live), DSOLocal(IsLocal), CanAutoHide(CanAutoHide) {}
   };
 
 private:
@@ -365,6 +404,10 @@ public:
 
   bool isDSOLocal() const { return Flags.DSOLocal; }
 
+  void setCanAutoHide(bool CanAutoHide) { Flags.CanAutoHide = CanAutoHide; }
+
+  bool canAutoHide() const { return Flags.CanAutoHide; }
+
   /// Flag that this global value cannot be imported.
   void setNotEligibleToImport() { Flags.NotEligibleToImport = true; }
 
@@ -381,25 +424,35 @@ public:
 
 /// Alias summary information.
 class AliasSummary : public GlobalValueSummary {
+  ValueInfo AliaseeValueInfo;
+
+  /// This is the Aliasee in the same module as alias (could get from VI, trades
+  /// memory for time). Note that this pointer may be null (and the value info
+  /// empty) when we have a distributed index where the alias is being imported
+  /// (as a copy of the aliasee), but the aliasee is not.
   GlobalValueSummary *AliaseeSummary;
-  // AliaseeGUID is only set and accessed when we are building a combined index
-  // via the BitcodeReader.
-  GlobalValue::GUID AliaseeGUID;
 
 public:
   AliasSummary(GVFlags Flags)
       : GlobalValueSummary(AliasKind, Flags, ArrayRef<ValueInfo>{}),
-        AliaseeSummary(nullptr), AliaseeGUID(0) {}
+        AliaseeSummary(nullptr) {}
 
   /// Check if this is an alias summary.
   static bool classof(const GlobalValueSummary *GVS) {
     return GVS->getSummaryKind() == AliasKind;
   }
 
-  void setAliasee(GlobalValueSummary *Aliasee) { AliaseeSummary = Aliasee; }
-  void setAliaseeGUID(GlobalValue::GUID GUID) { AliaseeGUID = GUID; }
+  void setAliasee(ValueInfo &AliaseeVI, GlobalValueSummary *Aliasee) {
+    AliaseeValueInfo = AliaseeVI;
+    AliaseeSummary = Aliasee;
+  }
 
-  bool hasAliasee() const { return !!AliaseeSummary; }
+  bool hasAliasee() const {
+    assert(!!AliaseeSummary == (AliaseeValueInfo &&
+                                !AliaseeValueInfo.getSummaryList().empty()) &&
+           "Expect to have both aliasee summary and summary list or neither");
+    return !!AliaseeSummary;
+  }
 
   const GlobalValueSummary &getAliasee() const {
     assert(AliaseeSummary && "Unexpected missing aliasee summary");
@@ -410,10 +463,13 @@ public:
     return const_cast<GlobalValueSummary &>(
                          static_cast<const AliasSummary *>(this)->getAliasee());
   }
-  bool hasAliaseeGUID() const { return AliaseeGUID != 0; }
-  const GlobalValue::GUID &getAliaseeGUID() const {
-    assert(AliaseeGUID && "Unexpected missing aliasee GUID");
-    return AliaseeGUID;
+  ValueInfo getAliaseeVI() const {
+    assert(AliaseeValueInfo && "Unexpected missing aliasee");
+    return AliaseeValueInfo;
+  }
+  GlobalValue::GUID getAliaseeGUID() const {
+    assert(AliaseeValueInfo && "Unexpected missing aliasee");
+    return AliaseeValueInfo.getGUID();
   }
 };
 
@@ -500,7 +556,8 @@ public:
     return FunctionSummary(
         FunctionSummary::GVFlags(
             GlobalValue::LinkageTypes::AvailableExternallyLinkage,
-            /*NotEligibleToImport=*/true, /*Live=*/true, /*IsLocal=*/false),
+            /*NotEligibleToImport=*/true, /*Live=*/true, /*IsLocal=*/false,
+            /*CanAutoHide=*/false),
         /*InsCount=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0,
         std::vector<ValueInfo>(), std::move(Edges),
         std::vector<GlobalValue::GUID>(),
@@ -552,8 +609,8 @@ public:
           std::move(TypeTestAssumeConstVCalls),
           std::move(TypeCheckedLoadConstVCalls)});
   }
-  // Gets the number of immutable refs in RefEdgeList
-  unsigned immutableRefCount() const;
+  // Gets the number of readonly and writeonly refs in RefEdgeList
+  std::pair<unsigned, unsigned> specialRefCounts() const;
 
   /// Check if this is a function summary.
   static bool classof(const GlobalValueSummary *GVS) {
@@ -666,18 +723,43 @@ template <> struct DenseMapInfo<FunctionSummary::ConstVCall> {
   }
 };
 
+/// The ValueInfo and offset for a function within a vtable definition
+/// initializer array.
+struct VirtFuncOffset {
+  VirtFuncOffset(ValueInfo VI, uint64_t Offset)
+      : FuncVI(VI), VTableOffset(Offset) {}
+
+  ValueInfo FuncVI;
+  uint64_t VTableOffset;
+};
+/// List of functions referenced by a particular vtable definition.
+using VTableFuncList = std::vector<VirtFuncOffset>;
+
 /// Global variable summary information to aid decisions and
 /// implementation of importing.
 ///
-/// Global variable summary has extra flag, telling if it is
-/// modified during the program run or not. This affects ThinLTO
-/// internalization
+/// Global variable summary has two extra flag, telling if it is
+/// readonly or writeonly. Both readonly and writeonly variables
+/// can be optimized in the backed: readonly variables can be
+/// const-folded, while writeonly vars can be completely eliminated
+/// together with corresponding stores. We let both things happen
+/// by means of internalizing such variables after ThinLTO import.
 class GlobalVarSummary : public GlobalValueSummary {
+private:
+  /// For vtable definitions this holds the list of functions and
+  /// their corresponding offsets within the initializer array.
+  std::unique_ptr<VTableFuncList> VTableFuncs;
+
 public:
   struct GVarFlags {
-    GVarFlags(bool ReadOnly = false) : ReadOnly(ReadOnly) {}
-
-    unsigned ReadOnly : 1;
+    GVarFlags(bool ReadOnly, bool WriteOnly)
+        : MaybeReadOnly(ReadOnly), MaybeWriteOnly(WriteOnly) {}
+
+    // In permodule summaries both MaybeReadOnly and MaybeWriteOnly
+    // bits are set, because attribute propagation occurs later on
+    // thin link phase.
+    unsigned MaybeReadOnly : 1;
+    unsigned MaybeWriteOnly : 1;
   } VarFlags;
 
   GlobalVarSummary(GVFlags Flags, GVarFlags VarFlags,
@@ -691,8 +773,21 @@ public:
   }
 
   GVarFlags varflags() const { return VarFlags; }
-  void setReadOnly(bool RO) { VarFlags.ReadOnly = RO; }
-  bool isReadOnly() const { return VarFlags.ReadOnly; }
+  void setReadOnly(bool RO) { VarFlags.MaybeReadOnly = RO; }
+  void setWriteOnly(bool WO) { VarFlags.MaybeWriteOnly = WO; }
+  bool maybeReadOnly() const { return VarFlags.MaybeReadOnly; }
+  bool maybeWriteOnly() const { return VarFlags.MaybeWriteOnly; }
+
+  void setVTableFuncs(VTableFuncList Funcs) {
+    assert(!VTableFuncs);
+    VTableFuncs = llvm::make_unique<VTableFuncList>(std::move(Funcs));
+  }
+
+  ArrayRef<VirtFuncOffset> vTableFuncs() const {
+    if (VTableFuncs)
+      return *VTableFuncs;
+    return {};
+  }
 };
 
 struct TypeTestResolution {
@@ -791,6 +886,29 @@ using GVSummaryMapTy = DenseMap<GlobalValue::GUID, GlobalValueSummary *>;
 using TypeIdSummaryMapTy =
     std::multimap<GlobalValue::GUID, std::pair<std::string, TypeIdSummary>>;
 
+/// The following data structures summarize type metadata information.
+/// For type metadata overview see https://llvm.org/docs/TypeMetadata.html.
+/// Each type metadata includes both the type identifier and the offset of
+/// the address point of the type (the address held by objects of that type
+/// which may not be the beginning of the virtual table). Vtable definitions
+/// are decorated with type metadata for the types they are compatible with.
+///
+/// Holds information about vtable definitions decorated with type metadata:
+/// the vtable definition value and its address point offset in a type
+/// identifier metadata it is decorated (compatible) with.
+struct TypeIdOffsetVtableInfo {
+  TypeIdOffsetVtableInfo(uint64_t Offset, ValueInfo VI)
+      : AddressPointOffset(Offset), VTableVI(VI) {}
+
+  uint64_t AddressPointOffset;
+  ValueInfo VTableVI;
+};
+/// List of vtable definitions decorated by a particular type identifier,
+/// and their corresponding offsets in that type identifier's metadata.
+/// Note that each type identifier may be compatible with multiple vtables, due
+/// to inheritance, which is why this is a vector.
+using TypeIdCompatibleVtableInfo = std::vector<TypeIdOffsetVtableInfo>;
+
 /// Class to hold module path string table and global value map,
 /// and encapsulate methods for operating on them.
 class ModuleSummaryIndex {
@@ -803,9 +921,15 @@ private:
   ModulePathStringTableTy ModulePathStringTable;
 
   /// Mapping from type identifier GUIDs to type identifier and its summary
-  /// information.
+  /// information. Produced by thin link.
   TypeIdSummaryMapTy TypeIdMap;
 
+  /// Mapping from type identifier to information about vtables decorated
+  /// with that type identifier's metadata. Produced by per module summary
+  /// analysis and consumed by thin link. For more information, see description
+  /// above where TypeIdCompatibleVtableInfo is defined.
+  std::map<std::string, TypeIdCompatibleVtableInfo> TypeIdCompatibleVtableMap;
+
   /// Mapping from original ID to GUID. If original ID can map to multiple
   /// GUIDs, it will be mapped to 0.
   std::map<GlobalValue::GUID, GlobalValue::GUID> OidGuidMap;
@@ -1044,24 +1168,30 @@ public:
       OidGuidMap[OrigGUID] = ValueGUID;
   }
 
-  /// Find the summary for global \p GUID in module \p ModuleId, or nullptr if
+  /// Find the summary for ValueInfo \p VI in module \p ModuleId, or nullptr if
   /// not found.
-  GlobalValueSummary *findSummaryInModule(GlobalValue::GUID ValueGUID,
-                                          StringRef ModuleId) const {
-    auto CalleeInfo = getValueInfo(ValueGUID);
-    if (!CalleeInfo) {
-      return nullptr; // This function does not have a summary
-    }
+  GlobalValueSummary *findSummaryInModule(ValueInfo VI, StringRef ModuleId) const {
+    auto SummaryList = VI.getSummaryList();
     auto Summary =
-        llvm::find_if(CalleeInfo.getSummaryList(),
+        llvm::find_if(SummaryList,
                       [&](const std::unique_ptr<GlobalValueSummary> &Summary) {
                         return Summary->modulePath() == ModuleId;
                       });
-    if (Summary == CalleeInfo.getSummaryList().end())
+    if (Summary == SummaryList.end())
       return nullptr;
     return Summary->get();
   }
 
+  /// Find the summary for global \p GUID in module \p ModuleId, or nullptr if
+  /// not found.
+  GlobalValueSummary *findSummaryInModule(GlobalValue::GUID ValueGUID,
+                                          StringRef ModuleId) const {
+    auto CalleeInfo = getValueInfo(ValueGUID);
+    if (!CalleeInfo)
+      return nullptr; // This function does not have a summary
+    return findSummaryInModule(CalleeInfo, ModuleId);
+  }
+
   /// Returns the first GlobalValueSummary for \p GV, asserting that there
   /// is only one if \p PerModuleIndex.
   GlobalValueSummary *getGlobalValueSummary(const GlobalValue &GV,
@@ -1163,6 +1293,29 @@ public:
     return nullptr;
   }
 
+  const std::map<std::string, TypeIdCompatibleVtableInfo> &
+  typeIdCompatibleVtableMap() const {
+    return TypeIdCompatibleVtableMap;
+  }
+
+  /// Return an existing or new TypeIdCompatibleVtableMap entry for \p TypeId.
+  /// This accessor can mutate the map and therefore should not be used in
+  /// the ThinLTO backends.
+  TypeIdCompatibleVtableInfo &
+  getOrInsertTypeIdCompatibleVtableSummary(StringRef TypeId) {
+    return TypeIdCompatibleVtableMap[TypeId];
+  }
+
+  /// For the given \p TypeId, this returns the TypeIdCompatibleVtableMap
+  /// entry if present in the summary map. This may be used when importing.
+  Optional<TypeIdCompatibleVtableInfo>
+  getTypeIdCompatibleVtableSummary(StringRef TypeId) const {
+    auto I = TypeIdCompatibleVtableMap.find(TypeId);
+    if (I == TypeIdCompatibleVtableMap.end())
+      return None;
+    return I->second;
+  }
+
   /// Collect for the given module the list of functions it defines
   /// (GUID -> Summary).
   void collectDefinedFunctionsForModule(StringRef ModulePath,
@@ -1170,8 +1323,16 @@ public:
 
   /// Collect for each module the list of Summaries it defines (GUID ->
   /// Summary).
-  void collectDefinedGVSummariesPerModule(
-      StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries) const;
+  template <class Map>
+  void
+  collectDefinedGVSummariesPerModule(Map &ModuleToDefinedGVSummaries) const {
+    for (auto &GlobalList : *this) {
+      auto GUID = GlobalList.first;
+      for (auto &Summary : GlobalList.second.SummaryList) {
+        ModuleToDefinedGVSummaries[Summary->modulePath()][GUID] = Summary.get();
+      }
+    }
+  }
 
   /// Print to an output stream.
   void print(raw_ostream &OS, bool IsForDebug = false) const;
@@ -1186,7 +1347,7 @@ public:
   void dumpSCCs(raw_ostream &OS);
 
   /// Analyze index and detect unmodified globals
-  void propagateConstants(const DenseSet<GlobalValue::GUID> &PreservedSymbols);
+  void propagateAttributes(const DenseSet<GlobalValue::GUID> &PreservedSymbols);
 };
 
 /// GraphTraits definition to build SCC for the index
diff --git a/include/llvm/IR/ModuleSummaryIndexYAML.h b/include/llvm/IR/ModuleSummaryIndexYAML.h
index a88ee26b51c3..26d9c43fabf1 100644
--- a/include/llvm/IR/ModuleSummaryIndexYAML.h
+++ b/include/llvm/IR/ModuleSummaryIndexYAML.h
@@ -1,9 +1,8 @@
 //===-- llvm/ModuleSummaryIndexYAML.h - YAML I/O for summary ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -137,7 +136,7 @@ template <> struct MappingTraits<TypeIdSummary> {
 
 struct FunctionSummaryYaml {
   unsigned Linkage;
-  bool NotEligibleToImport, Live, IsLocal;
+  bool NotEligibleToImport, Live, IsLocal, CanAutoHide;
   std::vector<uint64_t> Refs;
   std::vector<uint64_t> TypeTests;
   std::vector<FunctionSummary::VFuncId> TypeTestAssumeVCalls,
@@ -181,6 +180,7 @@ template <> struct MappingTraits<FunctionSummaryYaml> {
     io.mapOptional("NotEligibleToImport", summary.NotEligibleToImport);
     io.mapOptional("Live", summary.Live);
     io.mapOptional("Local", summary.IsLocal);
+    io.mapOptional("CanAutoHide", summary.CanAutoHide);
     io.mapOptional("Refs", summary.Refs);
     io.mapOptional("TypeTests", summary.TypeTests);
     io.mapOptional("TypeTestAssumeVCalls", summary.TypeTestAssumeVCalls);
@@ -223,7 +223,7 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
       Elem.SummaryList.push_back(llvm::make_unique<FunctionSummary>(
           GlobalValueSummary::GVFlags(
               static_cast<GlobalValue::LinkageTypes>(FSum.Linkage),
-              FSum.NotEligibleToImport, FSum.Live, FSum.IsLocal),
+              FSum.NotEligibleToImport, FSum.Live, FSum.IsLocal, FSum.CanAutoHide),
           /*NumInsts=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0, Refs,
           ArrayRef<FunctionSummary::EdgeTy>{}, std::move(FSum.TypeTests),
           std::move(FSum.TypeTestAssumeVCalls),
@@ -244,7 +244,8 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
               FSum->flags().Linkage,
               static_cast<bool>(FSum->flags().NotEligibleToImport),
               static_cast<bool>(FSum->flags().Live),
-              static_cast<bool>(FSum->flags().DSOLocal), Refs,
+              static_cast<bool>(FSum->flags().DSOLocal),
+              static_cast<bool>(FSum->flags().CanAutoHide), Refs,
               FSum->type_tests(), FSum->type_test_assume_vcalls(),
               FSum->type_checked_load_vcalls(),
               FSum->type_test_assume_const_vcalls(),
diff --git a/include/llvm/IR/NoFolder.h b/include/llvm/IR/NoFolder.h
index def07ffe2ff6..0e3c19f4947f 100644
--- a/include/llvm/IR/NoFolder.h
+++ b/include/llvm/IR/NoFolder.h
@@ -1,9 +1,8 @@
 //===- NoFolder.h - Constant folding helper ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -204,6 +203,10 @@ public:
     return BinaryOperator::CreateNot(C);
   }
 
+  Instruction *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const {
+    return UnaryOperator::Create(Opc, C);
+  }
+
   //===--------------------------------------------------------------------===//
   // Memory Instructions
   //===--------------------------------------------------------------------===//
diff --git a/include/llvm/IR/OperandTraits.h b/include/llvm/IR/OperandTraits.h
index c618aff3df9a..979ad35019f8 100644
--- a/include/llvm/IR/OperandTraits.h
+++ b/include/llvm/IR/OperandTraits.h
@@ -1,9 +1,8 @@
 //===-- llvm/OperandTraits.h - OperandTraits class definition ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/Operator.h b/include/llvm/IR/Operator.h
index 6b387bbcccb1..8199c65ca8a0 100644
--- a/include/llvm/IR/Operator.h
+++ b/include/llvm/IR/Operator.h
@@ -1,9 +1,8 @@
 //===-- llvm/Operator.h - Operator utility subclass -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -188,6 +187,12 @@ public:
 
   FastMathFlags() = default;
 
+  static FastMathFlags getFast() {
+    FastMathFlags FMF;
+    FMF.setFast();
+    return FMF;
+  }
+
   bool any() const { return Flags != 0; }
   bool none() const { return Flags == 0; }
   bool all() const { return Flags == ~0U; }
@@ -380,6 +385,7 @@ public:
     case Instruction::ExtractElement:
     case Instruction::ShuffleVector:
     case Instruction::InsertElement:
+    case Instruction::PHI:
       return false;
     default:
       return V->getType()->isFPOrFPVectorTy();
diff --git a/include/llvm/IR/OptBisect.h b/include/llvm/IR/OptBisect.h
index aa24c94c0130..1b2b0bd7acaa 100644
--- a/include/llvm/IR/OptBisect.h
+++ b/include/llvm/IR/OptBisect.h
@@ -1,9 +1,8 @@
 //===- llvm/IR/OptBisect.h - LLVM Bisect support ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -20,12 +19,6 @@
 namespace llvm {
 
 class Pass;
-class Module;
-class Function;
-class BasicBlock;
-class Region;
-class Loop;
-class CallGraphSCC;
 
 /// Extensions to this class implement mechanisms to disable passes and
 /// individual optimizations at compile time.
@@ -33,12 +26,14 @@ class OptPassGate {
 public:
   virtual ~OptPassGate() = default;
 
-  virtual bool shouldRunPass(const Pass *P, const Module &U) { return true; }
-  virtual bool shouldRunPass(const Pass *P, const Function &U)  {return true; }
-  virtual bool shouldRunPass(const Pass *P, const BasicBlock &U)  { return true; }
-  virtual bool shouldRunPass(const Pass *P, const Region &U)  { return true; }
-  virtual bool shouldRunPass(const Pass *P, const Loop &U)  { return true; }
-  virtual bool shouldRunPass(const Pass *P, const CallGraphSCC &U)  { return true; }
+  /// IRDescription is a textual description of the IR unit the pass is running
+  /// over.
+  virtual bool shouldRunPass(const Pass *P, StringRef IRDescription) {
+    return true;
+  }
+
+  /// isEnabled should return true before calling shouldRunPass
+  virtual bool isEnabled() const { return false; }
 };
 
 /// This class implements a mechanism to disable passes and individual
@@ -60,23 +55,19 @@ public:
 
   /// Checks the bisect limit to determine if the specified pass should run.
   ///
-  /// These functions immediately return true if bisection is disabled. If the
-  /// bisect limit is set to -1, the functions print a message describing
+  /// If the bisect limit is set to -1, the function prints a message describing
   /// the pass and the bisect number assigned to it and return true.  Otherwise,
-  /// the functions print a message with the bisect number assigned to the
+  /// the function prints a message with the bisect number assigned to the
   /// pass and indicating whether or not the pass will be run and return true if
   /// the bisect limit has not yet been exceeded or false if it has.
   ///
-  /// Most passes should not call these routines directly. Instead, they are
+  /// Most passes should not call this routine directly. Instead, they are
   /// called through helper routines provided by the pass base classes.  For
   /// instance, function passes should call FunctionPass::skipFunction().
-  bool shouldRunPass(const Pass *P, const Module &U) override;
-  bool shouldRunPass(const Pass *P, const Function &U) override;
-  bool shouldRunPass(const Pass *P, const BasicBlock &U) override;
-  bool shouldRunPass(const Pass *P, const Region &U) override;
-  bool shouldRunPass(const Pass *P, const Loop &U) override;
-  bool shouldRunPass(const Pass *P, const CallGraphSCC &U) override;
+  bool shouldRunPass(const Pass *P, StringRef IRDescription) override;
 
+  /// isEnabled should return true before calling shouldRunPass
+  bool isEnabled() const override { return BisectEnabled; }
 private:
   bool checkPass(const StringRef PassName, const StringRef TargetDesc);
 
diff --git a/include/llvm/IR/PassInstrumentation.h b/include/llvm/IR/PassInstrumentation.h
index 08dac1c4a274..f8a1196871cf 100644
--- a/include/llvm/IR/PassInstrumentation.h
+++ b/include/llvm/IR/PassInstrumentation.h
@@ -1,9 +1,8 @@
 //===- llvm/IR/PassInstrumentation.h ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/IR/PassManager.h b/include/llvm/IR/PassManager.h
index 738a2242eea0..37fe2a5b01ad 100644
--- a/include/llvm/IR/PassManager.h
+++ b/include/llvm/IR/PassManager.h
@@ -1,9 +1,8 @@
 //===- PassManager.h - Pass management infrastructure -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -287,6 +286,13 @@ public:
                               PA.PreservedIDs.count(ID));
     }
 
+    /// Return true if the checker's analysis was not abandoned, i.e. it was not
+    /// explicitly invalidated. Even if the analysis is not explicitly
+    /// preserved, if the analysis is known stateless, then it is preserved.
+    bool preservedWhenStateless() {
+      return !IsAbandoned;
+    }
+
     /// Returns true if the checker's analysis was not abandoned and either
     ///  - \p AnalysisSetT is explicitly preserved or
     ///  - all analyses are preserved.
diff --git a/include/llvm/IR/PassManagerInternal.h b/include/llvm/IR/PassManagerInternal.h
index 5ad68be62742..58198bf67b11 100644
--- a/include/llvm/IR/PassManagerInternal.h
+++ b/include/llvm/IR/PassManagerInternal.h
@@ -1,9 +1,8 @@
 //===- PassManager internal APIs and implementation details -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/IR/PassTimingInfo.h b/include/llvm/IR/PassTimingInfo.h
index e9945f997f43..b8d8f117f73d 100644
--- a/include/llvm/IR/PassTimingInfo.h
+++ b/include/llvm/IR/PassTimingInfo.h
@@ -1,9 +1,8 @@
 //===- PassTimingInfo.h - pass execution timing -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -27,10 +26,12 @@ namespace llvm {
 
 class Pass;
 class PassInstrumentationCallbacks;
+class raw_ostream;
 
 /// If -time-passes has been specified, report the timings immediately and then
-/// reset the timers to zero.
-void reportAndResetTimings();
+/// reset the timers to zero. By default it uses the stream created by
+/// CreateInfoOutputFile().
+void reportAndResetTimings(raw_ostream *OutStream = nullptr);
 
 /// Request the timer for this legacy-pass-manager's pass instance.
 Timer *getPassTimer(Pass *);
@@ -63,18 +64,18 @@ class TimePassesHandler {
   /// Stack of currently active timers.
   SmallVector<Timer *, 8> TimerStack;
 
+  /// Custom output stream to print timing information into.
+  /// By default (== nullptr) we emit time report into the stream created by
+  /// CreateInfoOutputFile().
+  raw_ostream *OutStream = nullptr;
+
   bool Enabled;
 
 public:
   TimePassesHandler(bool Enabled = TimePassesIsEnabled);
 
   /// Destructor handles the print action if it has not been handled before.
-  ~TimePassesHandler() {
-    // First destroying the timers from TimingData, which deploys all their
-    // collected data into the TG time group member, which later prints itself
-    // when being destroyed.
-    TimingData.clear();
-  }
+  ~TimePassesHandler() { print(); }
 
   /// Prints out timing information and then resets the timers.
   void print();
@@ -85,6 +86,9 @@ public:
 
   void registerCallbacks(PassInstrumentationCallbacks &PIC);
 
+  /// Set a custom output stream for subsequent reporting.
+  void setOutStream(raw_ostream &OutStream);
+
 private:
   /// Dumps information for running/triggered timers, useful for debugging
   LLVM_DUMP_METHOD void dump() const;
diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h
index 120fc253b908..0f03d7cc56b8 100644
--- a/include/llvm/IR/PatternMatch.h
+++ b/include/llvm/IR/PatternMatch.h
@@ -1,9 +1,8 @@
 //===- PatternMatch.h - Match on the LLVM IR --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -419,6 +418,46 @@ inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() {
   return cst_pred_ty<is_lowbit_mask>();
 }
 
+struct icmp_pred_with_threshold {
+  ICmpInst::Predicate Pred;
+  const APInt *Thr;
+  bool isValue(const APInt &C) {
+    switch (Pred) {
+    case ICmpInst::Predicate::ICMP_EQ:
+      return C.eq(*Thr);
+    case ICmpInst::Predicate::ICMP_NE:
+      return C.ne(*Thr);
+    case ICmpInst::Predicate::ICMP_UGT:
+      return C.ugt(*Thr);
+    case ICmpInst::Predicate::ICMP_UGE:
+      return C.uge(*Thr);
+    case ICmpInst::Predicate::ICMP_ULT:
+      return C.ult(*Thr);
+    case ICmpInst::Predicate::ICMP_ULE:
+      return C.ule(*Thr);
+    case ICmpInst::Predicate::ICMP_SGT:
+      return C.sgt(*Thr);
+    case ICmpInst::Predicate::ICMP_SGE:
+      return C.sge(*Thr);
+    case ICmpInst::Predicate::ICMP_SLT:
+      return C.slt(*Thr);
+    case ICmpInst::Predicate::ICMP_SLE:
+      return C.sle(*Thr);
+    default:
+      llvm_unreachable("Unhandled ICmp predicate");
+    }
+  }
+};
+/// Match an integer or vector with every element comparing 'pred' (eg/ne/...)
+/// to Threshold. For vectors, this includes constants with undefined elements.
+inline cst_pred_ty<icmp_pred_with_threshold>
+m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold) {
+  cst_pred_ty<icmp_pred_with_threshold> P;
+  P.Pred = Predicate;
+  P.Thr = &Threshold;
+  return P;
+}
+
 struct is_nan {
   bool isValue(const APFloat &C) { return C.isNaN(); }
 };
@@ -668,18 +707,26 @@ template <typename Op_t> struct FNeg_match {
   FNeg_match(const Op_t &Op) : X(Op) {}
   template <typename OpTy> bool match(OpTy *V) {
     auto *FPMO = dyn_cast<FPMathOperator>(V);
-    if (!FPMO || FPMO->getOpcode() != Instruction::FSub)
-      return false;
-    if (FPMO->hasNoSignedZeros()) {
-      // With 'nsz', any zero goes.
-      if (!cstfp_pred_ty<is_any_zero_fp>().match(FPMO->getOperand(0)))
-        return false;
-    } else {
-      // Without 'nsz', we need fsub -0.0, X exactly.
-      if (!cstfp_pred_ty<is_neg_zero_fp>().match(FPMO->getOperand(0)))
-        return false;
+    if (!FPMO) return false;
+
+    if (FPMO->getOpcode() == Instruction::FNeg)
+      return X.match(FPMO->getOperand(0));
+
+    if (FPMO->getOpcode() == Instruction::FSub) {
+      if (FPMO->hasNoSignedZeros()) {
+        // With 'nsz', any zero goes.
+        if (!cstfp_pred_ty<is_any_zero_fp>().match(FPMO->getOperand(0)))
+          return false;
+      } else {
+        // Without 'nsz', we need fsub -0.0, X exactly.
+        if (!cstfp_pred_ty<is_neg_zero_fp>().match(FPMO->getOperand(0)))
+          return false;
+      }
+
+      return X.match(FPMO->getOperand(1));
     }
-    return X.match(FPMO->getOperand(1));
+
+    return false;
   }
 };
 
@@ -1464,6 +1511,20 @@ struct UAddWithOverflow_match {
       if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS))
         return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
 
+    // Match special-case for increment-by-1.
+    if (Pred == ICmpInst::ICMP_EQ) {
+      // (a + 1) == 0
+      // (1 + a) == 0
+      if (AddExpr.match(ICmpLHS) && m_ZeroInt().match(ICmpRHS) &&
+          (m_One().match(AddLHS) || m_One().match(AddRHS)))
+        return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
+      // 0 == (a + 1)
+      // 0 == (1 + a)
+      if (m_ZeroInt().match(ICmpLHS) && AddExpr.match(ICmpRHS) &&
+          (m_One().match(AddLHS) || m_One().match(AddRHS)))
+        return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
+    }
+
     return false;
   }
 };
diff --git a/include/llvm/IR/PredIteratorCache.h b/include/llvm/IR/PredIteratorCache.h
index 81f535311431..cc835277910b 100644
--- a/include/llvm/IR/PredIteratorCache.h
+++ b/include/llvm/IR/PredIteratorCache.h
@@ -1,9 +1,8 @@
 //===- PredIteratorCache.h - pred_iterator Cache ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/ProfileSummary.h b/include/llvm/IR/ProfileSummary.h
index e38663770a13..78635ec4386c 100644
--- a/include/llvm/IR/ProfileSummary.h
+++ b/include/llvm/IR/ProfileSummary.h
@@ -1,9 +1,8 @@
 //===- ProfileSummary.h - Profile summary data structure. -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -43,11 +42,10 @@ using SummaryEntryVector = std::vector<ProfileSummaryEntry>;
 
 class ProfileSummary {
 public:
-  enum Kind { PSK_Instr, PSK_Sample };
+  enum Kind { PSK_Instr, PSK_CSInstr, PSK_Sample };
 
 private:
   const Kind PSK;
-  static const char *KindStr[2];
   SummaryEntryVector DetailedSummary;
   uint64_t TotalCount, MaxCount, MaxInternalCount, MaxFunctionCount;
   uint32_t NumCounts, NumFunctions;
diff --git a/include/llvm/IR/RemarkStreamer.h b/include/llvm/IR/RemarkStreamer.h
new file mode 100644
index 000000000000..f34cc660b2fb
--- /dev/null
+++ b/include/llvm/IR/RemarkStreamer.h
@@ -0,0 +1,96 @@
+//===- llvm/IR/RemarkStreamer.h - Remark Streamer ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the main interface for outputting remarks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_REMARKSTREAMER_H
+#define LLVM_IR_REMARKSTREAMER_H
+
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/Remarks/RemarkSerializer.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+/// Streamer for remarks.
+class RemarkStreamer {
+  /// The filename that the remark diagnostics are emitted to.
+  const std::string Filename;
+  /// The regex used to filter remarks based on the passes that emit them.
+  Optional<Regex> PassFilter;
+  /// The object used to serialize the remarks to a specific format.
+  std::unique_ptr<remarks::Serializer> Serializer;
+
+  /// Convert diagnostics into remark objects.
+  /// The lifetime of the members of the result is bound to the lifetime of
+  /// the LLVM diagnostics.
+  remarks::Remark toRemark(const DiagnosticInfoOptimizationBase &Diag);
+
+public:
+  RemarkStreamer(StringRef Filename,
+                 std::unique_ptr<remarks::Serializer> Serializer);
+  /// Return the filename that the remark diagnostics are emitted to.
+  StringRef getFilename() const { return Filename; }
+  /// Return stream that the remark diagnostics are emitted to.
+  raw_ostream &getStream() { return Serializer->OS; }
+  /// Return the serializer used for this stream.
+  remarks::Serializer &getSerializer() { return *Serializer; }
+  /// Set a pass filter based on a regex \p Filter.
+  /// Returns an error if the regex is invalid.
+  Error setFilter(StringRef Filter);
+  /// Emit a diagnostic through the streamer.
+  void emit(const DiagnosticInfoOptimizationBase &Diag);
+};
+
+template <typename ThisError>
+struct RemarkSetupErrorInfo : public ErrorInfo<ThisError> {
+  std::string Msg;
+  std::error_code EC;
+
+  RemarkSetupErrorInfo(Error E) {
+    handleAllErrors(std::move(E), [&](const ErrorInfoBase &EIB) {
+      Msg = EIB.message();
+      EC = EIB.convertToErrorCode();
+    });
+  }
+
+  void log(raw_ostream &OS) const override { OS << Msg; }
+  std::error_code convertToErrorCode() const override { return EC; }
+};
+
+struct RemarkSetupFileError : RemarkSetupErrorInfo<RemarkSetupFileError> {
+  static char ID;
+  using RemarkSetupErrorInfo<RemarkSetupFileError>::RemarkSetupErrorInfo;
+};
+
+struct RemarkSetupPatternError : RemarkSetupErrorInfo<RemarkSetupPatternError> {
+  static char ID;
+  using RemarkSetupErrorInfo<RemarkSetupPatternError>::RemarkSetupErrorInfo;
+};
+
+struct RemarkSetupFormatError : RemarkSetupErrorInfo<RemarkSetupFormatError> {
+  static char ID;
+  using RemarkSetupErrorInfo<RemarkSetupFormatError>::RemarkSetupErrorInfo;
+};
+
+/// Setup optimization remarks.
+Expected<std::unique_ptr<ToolOutputFile>>
+setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename,
+                         StringRef RemarksPasses, StringRef RemarksFormat,
+                         bool RemarksWithHotness,
+                         unsigned RemarksHotnessThreshold = 0);
+
+} // end namespace llvm
+
+#endif // LLVM_IR_REMARKSTREAMER_H
diff --git a/include/llvm/IR/RuntimeLibcalls.def b/include/llvm/IR/RuntimeLibcalls.def
index 89005120cdc1..f6c74d497b18 100644
--- a/include/llvm/IR/RuntimeLibcalls.def
+++ b/include/llvm/IR/RuntimeLibcalls.def
@@ -1,9 +1,8 @@
 //===-- llvm/RuntimeLibcalls.def - File that describes libcalls -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -255,6 +254,26 @@ HANDLE_LIBCALL(FMAX_F64, "fmax")
 HANDLE_LIBCALL(FMAX_F80, "fmaxl")
 HANDLE_LIBCALL(FMAX_F128, "fmaxl")
 HANDLE_LIBCALL(FMAX_PPCF128, "fmaxl")
+HANDLE_LIBCALL(LROUND_F32, "lroundf")
+HANDLE_LIBCALL(LROUND_F64, "lround")
+HANDLE_LIBCALL(LROUND_F80, "lroundl")
+HANDLE_LIBCALL(LROUND_F128, "lroundl")
+HANDLE_LIBCALL(LROUND_PPCF128, "lroundl")
+HANDLE_LIBCALL(LLROUND_F32, "llroundf")
+HANDLE_LIBCALL(LLROUND_F64, "llround")
+HANDLE_LIBCALL(LLROUND_F80, "llroundl")
+HANDLE_LIBCALL(LLROUND_F128, "llroundl")
+HANDLE_LIBCALL(LLROUND_PPCF128, "llroundl")
+HANDLE_LIBCALL(LRINT_F32, "lrintf")
+HANDLE_LIBCALL(LRINT_F64, "lrint")
+HANDLE_LIBCALL(LRINT_F80, "lrintl")
+HANDLE_LIBCALL(LRINT_F128, "lrintl")
+HANDLE_LIBCALL(LRINT_PPCF128, "lrintl")
+HANDLE_LIBCALL(LLRINT_F32, "llrintf")
+HANDLE_LIBCALL(LLRINT_F64, "llrint")
+HANDLE_LIBCALL(LLRINT_F80, "llrintl")
+HANDLE_LIBCALL(LLRINT_F128, "llrintl")
+HANDLE_LIBCALL(LLRINT_PPCF128, "llrintl")
 
 // Conversion
 HANDLE_LIBCALL(FPEXT_F32_PPCF128, "__gcc_stoq")
@@ -530,6 +549,9 @@ HANDLE_LIBCALL(STACKPROTECTOR_CHECK_FAIL, "__stack_chk_fail")
 // Deoptimization
 HANDLE_LIBCALL(DEOPTIMIZE, "__llvm_deoptimize")
 
+// Return address
+HANDLE_LIBCALL(RETURN_ADDRESS, nullptr)
+
 HANDLE_LIBCALL(UNKNOWN_LIBCALL, nullptr)
 
 #undef HANDLE_LIBCALL
diff --git a/include/llvm/IR/SafepointIRVerifier.h b/include/llvm/IR/SafepointIRVerifier.h
index 092050d1d207..ec5527954adc 100644
--- a/include/llvm/IR/SafepointIRVerifier.h
+++ b/include/llvm/IR/SafepointIRVerifier.h
@@ -1,9 +1,8 @@
 //===- SafepointIRVerifier.h - Checks for GC relocation problems *- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,6 +18,8 @@
 #ifndef LLVM_IR_SAFEPOINT_IR_VERIFIER
 #define LLVM_IR_SAFEPOINT_IR_VERIFIER
 
+#include "llvm/IR/PassManager.h"
+
 namespace llvm {
 
 class Function;
@@ -30,6 +31,16 @@ void verifySafepointIR(Function &F);
 /// Create an instance of the safepoint verifier pass which can be added to
 /// a pass pipeline to check for relocation bugs.
 FunctionPass *createSafepointIRVerifierPass();
+
+/// Create an instance of the safepoint verifier pass which can be added to
+/// a pass pipeline to check for relocation bugs.
+class SafepointIRVerifierPass : public PassInfoMixin<SafepointIRVerifierPass> {
+
+public:
+  explicit SafepointIRVerifierPass() {}
+
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
 }
 
 #endif // LLVM_IR_SAFEPOINT_IR_VERIFIER
diff --git a/include/llvm/IR/Statepoint.h b/include/llvm/IR/Statepoint.h
index 8908e1b0d090..89f130bc3351 100644
--- a/include/llvm/IR/Statepoint.h
+++ b/include/llvm/IR/Statepoint.h
@@ -1,14 +1,13 @@
 //===- llvm/IR/Statepoint.h - gc.statepoint utilities -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file contains utility functions and a wrapper class analogous to
-// CallSite for accessing the fields of gc.statepoint, gc.relocate,
+// CallBase for accessing the fields of gc.statepoint, gc.relocate,
 // gc.result intrinsics; and some general utilities helpful when dealing with
 // gc.statepoint.
 //
@@ -21,7 +20,6 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instruction.h"
@@ -57,42 +55,36 @@ enum class StatepointFlags {
 class GCRelocateInst;
 class GCResultInst;
 
-bool isStatepoint(ImmutableCallSite CS);
+bool isStatepoint(const CallBase *Call);
 bool isStatepoint(const Value *V);
 bool isStatepoint(const Value &V);
 
-bool isGCRelocate(ImmutableCallSite CS);
+bool isGCRelocate(const CallBase *Call);
 bool isGCRelocate(const Value *V);
 
-bool isGCResult(ImmutableCallSite CS);
+bool isGCResult(const CallBase *Call);
 bool isGCResult(const Value *V);
 
-/// Analogous to CallSiteBase, this provides most of the actual
+/// A wrapper around a GC intrinsic call, this provides most of the actual
 /// functionality for Statepoint and ImmutableStatepoint.  It is
 /// templatized to allow easily specializing of const and non-const
-/// concrete subtypes.  This is structured analogous to CallSite
-/// rather than the IntrinsicInst.h helpers since we need to support
-/// invokable statepoints.
+/// concrete subtypes.
 template <typename FunTy, typename InstructionTy, typename ValueTy,
-          typename CallSiteTy>
+          typename CallBaseTy>
 class StatepointBase {
-  CallSiteTy StatepointCS;
+  CallBaseTy *StatepointCall;
 
 protected:
   explicit StatepointBase(InstructionTy *I) {
-    if (isStatepoint(I)) {
-      StatepointCS = CallSiteTy(I);
-      assert(StatepointCS && "isStatepoint implies CallSite");
-    }
+    StatepointCall = isStatepoint(I) ? cast<CallBaseTy>(I) : nullptr;
   }
 
-  explicit StatepointBase(CallSiteTy CS) {
-    if (isStatepoint(CS))
-      StatepointCS = CS;
+  explicit StatepointBase(CallBaseTy *Call) {
+    StatepointCall = isStatepoint(Call) ? Call : nullptr;
   }
 
 public:
-  using arg_iterator = typename CallSiteTy::arg_iterator;
+  using arg_iterator = typename CallBaseTy::const_op_iterator;
 
   enum {
     IDPos = 0,
@@ -107,30 +99,30 @@ public:
   void *operator new(size_t s) = delete;
 
   explicit operator bool() const {
-    // We do not assign non-statepoint CallSites to StatepointCS.
-    return (bool)StatepointCS;
+    // We do not assign non-statepoint call instructions to StatepointCall.
+    return (bool)StatepointCall;
   }
 
-  /// Return the underlying CallSite.
-  CallSiteTy getCallSite() const {
+  /// Return the underlying call instruction.
+  CallBaseTy *getCall() const {
     assert(*this && "check validity first!");
-    return StatepointCS;
+    return StatepointCall;
   }
 
   uint64_t getFlags() const {
-    return cast<ConstantInt>(getCallSite().getArgument(FlagsPos))
+    return cast<ConstantInt>(getCall()->getArgOperand(FlagsPos))
         ->getZExtValue();
   }
 
   /// Return the ID associated with this statepoint.
   uint64_t getID() const {
-    const Value *IDVal = getCallSite().getArgument(IDPos);
+    const Value *IDVal = getCall()->getArgOperand(IDPos);
     return cast<ConstantInt>(IDVal)->getZExtValue();
   }
 
   /// Return the number of patchable bytes associated with this statepoint.
   uint32_t getNumPatchBytes() const {
-    const Value *NumPatchBytesVal = getCallSite().getArgument(NumPatchBytesPos);
+    const Value *NumPatchBytesVal = getCall()->getArgOperand(NumPatchBytesPos);
     uint64_t NumPatchBytes =
       cast<ConstantInt>(NumPatchBytesVal)->getZExtValue();
     assert(isInt<32>(NumPatchBytes) && "should fit in 32 bits!");
@@ -139,12 +131,11 @@ public:
 
   /// Return the value actually being called or invoked.
   ValueTy *getCalledValue() const {
-    return getCallSite().getArgument(CalledFunctionPos);
+    return getCall()->getArgOperand(CalledFunctionPos);
   }
 
-  InstructionTy *getInstruction() const {
-    return getCallSite().getInstruction();
-  }
+  // FIXME: Migrate users of this to `getCall` and remove it.
+  InstructionTy *getInstruction() const { return getCall(); }
 
   /// Return the function being called if this is a direct call, otherwise
   /// return null (if it's an indirect call).
@@ -153,12 +144,12 @@ public:
   }
 
   /// Return the caller function for this statepoint.
-  FunTy *getCaller() const { return getCallSite().getCaller(); }
+  FunTy *getCaller() const { return getCall()->getCaller(); }
 
   /// Determine if the statepoint cannot unwind.
   bool doesNotThrow() const {
     Function *F = getCalledFunction();
-    return getCallSite().doesNotThrow() || (F ? F->doesNotThrow() : false);
+    return getCall()->doesNotThrow() || (F ? F->doesNotThrow() : false);
   }
 
   /// Return the type of the value returned by the call underlying the
@@ -171,18 +162,18 @@ public:
 
   /// Number of arguments to be passed to the actual callee.
   int getNumCallArgs() const {
-    const Value *NumCallArgsVal = getCallSite().getArgument(NumCallArgsPos);
+    const Value *NumCallArgsVal = getCall()->getArgOperand(NumCallArgsPos);
     return cast<ConstantInt>(NumCallArgsVal)->getZExtValue();
   }
 
   size_t arg_size() const { return getNumCallArgs(); }
-  typename CallSiteTy::arg_iterator arg_begin() const {
-    assert(CallArgsBeginPos <= (int)getCallSite().arg_size());
-    return getCallSite().arg_begin() + CallArgsBeginPos;
+  arg_iterator arg_begin() const {
+    assert(CallArgsBeginPos <= (int)getCall()->arg_size());
+    return getCall()->arg_begin() + CallArgsBeginPos;
   }
-  typename CallSiteTy::arg_iterator arg_end() const {
+  arg_iterator arg_end() const {
     auto I = arg_begin() + arg_size();
-    assert((getCallSite().arg_end() - I) >= 0);
+    assert((getCall()->arg_end() - I) >= 0);
     return I;
   }
 
@@ -199,8 +190,8 @@ public:
   /// Return true if the call or the callee has the given attribute.
   bool paramHasAttr(unsigned i, Attribute::AttrKind A) const {
     Function *F = getCalledFunction();
-    return getCallSite().paramHasAttr(i + CallArgsBeginPos, A) ||
-          (F ? F->getAttributes().hasAttribute(i, A) : false);
+    return getCall()->paramHasAttr(i + CallArgsBeginPos, A) ||
+           (F ? F->getAttributes().hasAttribute(i, A) : false);
   }
 
   /// Number of GC transition args.
@@ -208,14 +199,14 @@ public:
     const Value *NumGCTransitionArgs = *arg_end();
     return cast<ConstantInt>(NumGCTransitionArgs)->getZExtValue();
   }
-  typename CallSiteTy::arg_iterator gc_transition_args_begin() const {
+  arg_iterator gc_transition_args_begin() const {
     auto I = arg_end() + 1;
-    assert((getCallSite().arg_end() - I) >= 0);
+    assert((getCall()->arg_end() - I) >= 0);
     return I;
   }
-  typename CallSiteTy::arg_iterator gc_transition_args_end() const {
+  arg_iterator gc_transition_args_end() const {
     auto I = gc_transition_args_begin() + getNumTotalGCTransitionArgs();
-    assert((getCallSite().arg_end() - I) >= 0);
+    assert((getCall()->arg_end() - I) >= 0);
     return I;
   }
 
@@ -231,14 +222,14 @@ public:
     return cast<ConstantInt>(NumVMSArgs)->getZExtValue();
   }
 
-  typename CallSiteTy::arg_iterator deopt_begin() const {
+  arg_iterator deopt_begin() const {
     auto I = gc_transition_args_end() + 1;
-    assert((getCallSite().arg_end() - I) >= 0);
+    assert((getCall()->arg_end() - I) >= 0);
     return I;
   }
-  typename CallSiteTy::arg_iterator deopt_end() const {
+  arg_iterator deopt_end() const {
     auto I = deopt_begin() + getNumTotalVMSArgs();
-    assert((getCallSite().arg_end() - I) >= 0);
+    assert((getCall()->arg_end() - I) >= 0);
     return I;
   }
 
@@ -247,15 +238,11 @@ public:
     return make_range(deopt_begin(), deopt_end());
   }
 
-  typename CallSiteTy::arg_iterator gc_args_begin() const {
-    return deopt_end();
-  }
-  typename CallSiteTy::arg_iterator gc_args_end() const {
-    return getCallSite().arg_end();
-  }
+  arg_iterator gc_args_begin() const { return deopt_end(); }
+  arg_iterator gc_args_end() const { return getCall()->arg_end(); }
 
   unsigned gcArgsStartIdx() const {
-    return gc_args_begin() - getInstruction()->op_begin();
+    return gc_args_begin() - getCall()->op_begin();
   }
 
   /// range adapter for gc arguments
@@ -304,25 +291,24 @@ public:
 /// to a gc.statepoint.
 class ImmutableStatepoint
     : public StatepointBase<const Function, const Instruction, const Value,
-                            ImmutableCallSite> {
-  using Base =
-      StatepointBase<const Function, const Instruction, const Value,
-                     ImmutableCallSite>;
+                            const CallBase> {
+  using Base = StatepointBase<const Function, const Instruction, const Value,
+                              const CallBase>;
 
 public:
   explicit ImmutableStatepoint(const Instruction *I) : Base(I) {}
-  explicit ImmutableStatepoint(ImmutableCallSite CS) : Base(CS) {}
+  explicit ImmutableStatepoint(const CallBase *Call) : Base(Call) {}
 };
 
 /// A specialization of it's base class for read-write access
 /// to a gc.statepoint.
 class Statepoint
-    : public StatepointBase<Function, Instruction, Value, CallSite> {
-  using Base = StatepointBase<Function, Instruction, Value, CallSite>;
+    : public StatepointBase<Function, Instruction, Value, CallBase> {
+  using Base = StatepointBase<Function, Instruction, Value, CallBase>;
 
 public:
   explicit Statepoint(Instruction *I) : Base(I) {}
-  explicit Statepoint(CallSite CS) : Base(CS) {}
+  explicit Statepoint(CallBase *Call) : Base(Call) {}
 };
 
 /// Common base class for representing values projected from a statepoint.
@@ -347,14 +333,14 @@ public:
   }
 
   /// The statepoint with which this gc.relocate is associated.
-  const Instruction *getStatepoint() const {
+  const CallBase *getStatepoint() const {
     const Value *Token = getArgOperand(0);
 
     // This takes care both of relocates for call statepoints and relocates
     // on normal path of invoke statepoint.
     if (!isa<LandingPadInst>(Token)) {
       assert(isStatepoint(Token));
-      return cast<Instruction>(Token);
+      return cast<CallBase>(Token);
     }
 
     // This relocate is on exceptional path of an invoke statepoint
@@ -366,7 +352,7 @@ public:
            "safepoint block should be well formed");
     assert(isStatepoint(InvokeBB->getTerminator()));
 
-    return InvokeBB->getTerminator();
+    return cast<CallBase>(InvokeBB->getTerminator());
   }
 };
 
@@ -395,13 +381,11 @@ public:
   }
 
   Value *getBasePtr() const {
-    ImmutableCallSite CS(getStatepoint());
-    return *(CS.arg_begin() + getBasePtrIndex());
+    return *(getStatepoint()->arg_begin() + getBasePtrIndex());
   }
 
   Value *getDerivedPtr() const {
-    ImmutableCallSite CS(getStatepoint());
-    return *(CS.arg_begin() + getDerivedPtrIndex());
+    return *(getStatepoint()->arg_begin() + getDerivedPtrIndex());
   }
 };
 
@@ -418,28 +402,25 @@ public:
 };
 
 template <typename FunTy, typename InstructionTy, typename ValueTy,
-          typename CallSiteTy>
+          typename CallBaseTy>
 std::vector<const GCRelocateInst *>
-StatepointBase<FunTy, InstructionTy, ValueTy, CallSiteTy>::getRelocates()
+StatepointBase<FunTy, InstructionTy, ValueTy, CallBaseTy>::getRelocates()
     const {
-
   std::vector<const GCRelocateInst *> Result;
 
-  CallSiteTy StatepointCS = getCallSite();
-
   // Search for relocated pointers.  Note that working backwards from the
   // gc_relocates ensures that we only get pairs which are actually relocated
   // and used after the statepoint.
-  for (const User *U : getInstruction()->users())
+  for (const User *U : StatepointCall->users())
     if (auto *Relocate = dyn_cast<GCRelocateInst>(U))
       Result.push_back(Relocate);
 
-  if (!StatepointCS.isInvoke())
+  auto *StatepointInvoke = dyn_cast<InvokeInst>(StatepointCall);
+  if (!StatepointInvoke)
     return Result;
 
   // We need to scan thorough exceptional relocations if it is invoke statepoint
-  LandingPadInst *LandingPad =
-      cast<InvokeInst>(getInstruction())->getLandingPadInst();
+  LandingPadInst *LandingPad = StatepointInvoke->getLandingPadInst();
 
   // Search for gc relocates that are attached to this landingpad.
   for (const User *LandingPadUser : LandingPad->users()) {
diff --git a/include/llvm/IR/SymbolTableListTraits.h b/include/llvm/IR/SymbolTableListTraits.h
index 87ce902c2811..5b793e5dbf28 100644
--- a/include/llvm/IR/SymbolTableListTraits.h
+++ b/include/llvm/IR/SymbolTableListTraits.h
@@ -1,9 +1,8 @@
 //===- llvm/SymbolTableListTraits.h - Traits for iplist ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/TrackingMDRef.h b/include/llvm/IR/TrackingMDRef.h
index 084efada221f..d7377398b91b 100644
--- a/include/llvm/IR/TrackingMDRef.h
+++ b/include/llvm/IR/TrackingMDRef.h
@@ -1,9 +1,8 @@
 //===- llvm/IR/TrackingMDRef.h - Tracking Metadata references ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/Type.h b/include/llvm/IR/Type.h
index 9c1f99d1b3a2..f2aa49030aaa 100644
--- a/include/llvm/IR/Type.h
+++ b/include/llvm/IR/Type.h
@@ -1,9 +1,8 @@
 //===- llvm/Type.h - Classes for handling data types ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -367,6 +366,7 @@ public:
     return ContainedTys[0];
   }
 
+  inline bool getVectorIsScalable() const;
   inline unsigned getVectorNumElements() const;
   Type *getVectorElementType() const {
     assert(getTypeID() == VectorTyID);
@@ -467,28 +467,6 @@ template <> struct isa_impl<PointerType, Type> {
   }
 };
 
-//===----------------------------------------------------------------------===//
-// Provide specializations of GraphTraits to be able to treat a type as a
-// graph of sub types.
-
-template <> struct GraphTraits<Type *> {
-  using NodeRef = Type *;
-  using ChildIteratorType = Type::subtype_iterator;
-
-  static NodeRef getEntryNode(Type *T) { return T; }
-  static ChildIteratorType child_begin(NodeRef N) { return N->subtype_begin(); }
-  static ChildIteratorType child_end(NodeRef N) { return N->subtype_end(); }
-};
-
-template <> struct GraphTraits<const Type*> {
-  using NodeRef = const Type *;
-  using ChildIteratorType = Type::subtype_iterator;
-
-  static NodeRef getEntryNode(NodeRef T) { return T; }
-  static ChildIteratorType child_begin(NodeRef N) { return N->subtype_begin(); }
-  static ChildIteratorType child_end(NodeRef N) { return N->subtype_end(); }
-};
-
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_ISA_CONVERSION_FUNCTIONS(Type, LLVMTypeRef)
 
diff --git a/include/llvm/IR/TypeFinder.h b/include/llvm/IR/TypeFinder.h
index c050c388d398..a83f85ea84c3 100644
--- a/include/llvm/IR/TypeFinder.h
+++ b/include/llvm/IR/TypeFinder.h
@@ -1,9 +1,8 @@
 //===- llvm/IR/TypeFinder.h - Class to find used struct types ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/Use.h b/include/llvm/IR/Use.h
index 25c44e0871a9..034ca2c8ac23 100644
--- a/include/llvm/IR/Use.h
+++ b/include/llvm/IR/Use.h
@@ -1,9 +1,8 @@
 //===- llvm/Use.h - Definition of the Use class -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -140,7 +139,7 @@ private:
   const Use *getImpliedUser() const LLVM_READONLY;
 
   Value *Val = nullptr;
-  Use *Next;
+  Use *Next = nullptr;
   PointerIntPair<Use **, 2, PrevPtrTag, PrevPointerTraits> Prev;
 
   void setPrev(Use **NewPrev) { Prev.setPointer(NewPrev); }
diff --git a/include/llvm/IR/UseListOrder.h b/include/llvm/IR/UseListOrder.h
index b6bb0f19a0aa..a1f313e269b2 100644
--- a/include/llvm/IR/UseListOrder.h
+++ b/include/llvm/IR/UseListOrder.h
@@ -1,9 +1,8 @@
 //===- llvm/IR/UseListOrder.h - LLVM Use List Order -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/User.h b/include/llvm/IR/User.h
index aea31467f2fa..19d87c5c621d 100644
--- a/include/llvm/IR/User.h
+++ b/include/llvm/IR/User.h
@@ -1,9 +1,8 @@
 //===- llvm/User.h - User class definition ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/Value.def b/include/llvm/IR/Value.def
index e2ddba0aa159..aaf1651979a9 100644
--- a/include/llvm/IR/Value.def
+++ b/include/llvm/IR/Value.def
@@ -1,9 +1,8 @@
 //===-------- llvm/IR/Value.def - File that describes Values ---v-*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h
index 4f3a45c684fc..b2d8e7ac4741 100644
--- a/include/llvm/IR/Value.h
+++ b/include/llvm/IR/Value.h
@@ -1,9 +1,8 @@
 //===- llvm/Value.h - Definition of the Value class -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -494,7 +493,7 @@ public:
   /// swifterror attribute.
   bool isSwiftError() const;
 
-  /// Strip off pointer casts, all-zero GEPs, and aliases.
+  /// Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
   ///
   /// Returns the original uncasted value.  If this is called on a non-pointer
   /// value, it returns 'this'.
@@ -504,6 +503,17 @@ public:
                          static_cast<const Value *>(this)->stripPointerCasts());
   }
 
+  /// Strip off pointer casts, all-zero GEPs, address space casts, and aliases
+  /// but ensures the representation of the result stays the same.
+  ///
+  /// Returns the original uncasted value with the same representation. If this
+  /// is called on a non-pointer value, it returns 'this'.
+  const Value *stripPointerCastsSameRepresentation() const;
+  Value *stripPointerCastsSameRepresentation() {
+    return const_cast<Value *>(static_cast<const Value *>(this)
+                                   ->stripPointerCastsSameRepresentation());
+  }
+
   /// Strip off pointer casts, all-zero GEPs, aliases and invariant group
   /// info.
   ///
@@ -536,19 +546,48 @@ public:
               static_cast<const Value *>(this)->stripInBoundsConstantOffsets());
   }
 
-  /// Accumulate offsets from \a stripInBoundsConstantOffsets().
-  ///
-  /// Stores the resulting constant offset stripped into the APInt provided.
-  /// The provided APInt will be extended or truncated as needed to be the
-  /// correct bitwidth for an offset of this pointer type.
-  ///
-  /// If this is called on a non-pointer value, it returns 'this'.
+  /// Accumulate the constant offset this value has compared to a base pointer.
+  /// Only 'getelementptr' instructions (GEPs) with constant indices are
+  /// accumulated but other instructions, e.g., casts, are stripped away as
+  /// well. The accumulated constant offset is added to \p Offset and the base
+  /// pointer is returned.
+  ///
+  /// The APInt \p Offset has to have a bit-width equal to the IntPtr type for
+  /// the address space of 'this' pointer value, e.g., use
+  /// DataLayout::getIndexTypeSizeInBits(Ty).
+  ///
+  /// If \p AllowNonInbounds is true, constant offsets in GEPs are stripped and
+  /// accumulated even if the GEP is not "inbounds".
+  ///
+  /// If this is called on a non-pointer value, it returns 'this' and the
+  /// \p Offset is not modified.
+  ///
+  /// Note that this function will never return a nullptr. It will also never
+  /// manipulate the \p Offset in a way that would not match the difference
+  /// between the underlying value and the returned one. Thus, if no constant
+  /// offset was found, the returned value is the underlying one and \p Offset
+  /// is unchanged.
+  const Value *stripAndAccumulateConstantOffsets(const DataLayout &DL,
+                                                 APInt &Offset,
+                                                 bool AllowNonInbounds) const;
+  Value *stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset,
+                                           bool AllowNonInbounds) {
+    return const_cast<Value *>(
+        static_cast<const Value *>(this)->stripAndAccumulateConstantOffsets(
+            DL, Offset, AllowNonInbounds));
+  }
+
+  /// This is a wrapper around stripAndAccumulateConstantOffsets with the
+  /// in-bounds requirement set to false.
   const Value *stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
-                                                         APInt &Offset) const;
+                                                         APInt &Offset) const {
+    return stripAndAccumulateConstantOffsets(DL, Offset,
+                                             /* AllowNonInbounds */ false);
+  }
   Value *stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
                                                    APInt &Offset) {
-    return const_cast<Value *>(static_cast<const Value *>(this)
-        ->stripAndAccumulateInBoundsConstantOffsets(DL, Offset));
+    return stripAndAccumulateConstantOffsets(DL, Offset,
+                                             /* AllowNonInbounds */ false);
   }
 
   /// Strip off pointer casts and inbounds GEPs.
diff --git a/include/llvm/IR/ValueHandle.h b/include/llvm/IR/ValueHandle.h
index d94472ce1be1..1135d796f7ed 100644
--- a/include/llvm/IR/ValueHandle.h
+++ b/include/llvm/IR/ValueHandle.h
@@ -1,9 +1,8 @@
 //===- ValueHandle.h - Value Smart Pointer classes --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -309,15 +308,6 @@ struct DenseMapInfo<AssertingVH<T>> {
   }
 };
 
-template <typename T>
-struct isPodLike<AssertingVH<T>> {
-#ifdef NDEBUG
-  static const bool value = true;
-#else
-  static const bool value = false;
-#endif
-};
-
 /// Value handle that tracks a Value across RAUW.
 ///
 /// TrackingVH is designed for situations where a client needs to hold a handle
@@ -549,14 +539,6 @@ template <typename T> struct DenseMapInfo<PoisoningVH<T>> {
   }
 };
 
-template <typename T> struct isPodLike<PoisoningVH<T>> {
-#ifdef NDEBUG
-  static const bool value = true;
-#else
-  static const bool value = false;
-#endif
-};
-
 } // end namespace llvm
 
 #endif // LLVM_IR_VALUEHANDLE_H
diff --git a/include/llvm/IR/ValueMap.h b/include/llvm/IR/ValueMap.h
index e7e33918a613..6a79b1d387f3 100644
--- a/include/llvm/IR/ValueMap.h
+++ b/include/llvm/IR/ValueMap.h
@@ -1,9 +1,8 @@
 //===- ValueMap.h - Safe map from Values to data ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/ValueSymbolTable.h b/include/llvm/IR/ValueSymbolTable.h
index 012e717c7470..105ea73857af 100644
--- a/include/llvm/IR/ValueSymbolTable.h
+++ b/include/llvm/IR/ValueSymbolTable.h
@@ -1,9 +1,8 @@
 //===- llvm/ValueSymbolTable.h - Implement a Value Symtab -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IR/Verifier.h b/include/llvm/IR/Verifier.h
index 7255132e1e65..62c33c8325eb 100644
--- a/include/llvm/IR/Verifier.h
+++ b/include/llvm/IR/Verifier.h
@@ -1,9 +1,8 @@
 //===- Verifier.h - LLVM IR Verifier ----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/IRReader/IRReader.h b/include/llvm/IRReader/IRReader.h
index bedde8954fbb..05171300b602 100644
--- a/include/llvm/IRReader/IRReader.h
+++ b/include/llvm/IRReader/IRReader.h
@@ -1,9 +1,8 @@
 //===---- llvm/IRReader/IRReader.h - Reader for LLVM IR files ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -21,11 +20,22 @@
 namespace llvm {
 
 class StringRef;
+class MemoryBuffer;
 class MemoryBufferRef;
 class Module;
 class SMDiagnostic;
 class LLVMContext;
 
+/// If the given MemoryBuffer holds a bitcode image, return a Module
+/// for it which does lazy deserialization of function bodies.  Otherwise,
+/// attempt to parse it as LLVM Assembly and return a fully populated
+/// Module. The ShouldLazyLoadMetadata flag is passed down to the bitcode
+/// reader to optionally enable lazy metadata loading. This takes ownership
+/// of \p Buffer.
+std::unique_ptr<Module> getLazyIRModule(std::unique_ptr<MemoryBuffer> Buffer,
+                                        SMDiagnostic &Err, LLVMContext &Context,
+                                        bool ShouldLazyLoadMetadata = false);
+
 /// If the given file holds a bitcode image, return a Module
 /// for it which does lazy deserialization of function bodies.  Otherwise,
 /// attempt to parse it as LLVM Assembly and return a fully populated
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 037c0dbb56ec..164d0be2855a 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -1,9 +1,8 @@
 //===- llvm/InitializePasses.h - Initialize All Passes ----------*- C++ -*-===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -65,8 +64,9 @@ void initializeAAEvalLegacyPassPass(PassRegistry&);
 void initializeAAResultsWrapperPassPass(PassRegistry&);
 void initializeADCELegacyPassPass(PassRegistry&);
 void initializeAddDiscriminatorsLegacyPassPass(PassRegistry&);
-void initializeAddressSanitizerModulePass(PassRegistry&);
-void initializeAddressSanitizerPass(PassRegistry&);
+void initializeModuleAddressSanitizerLegacyPassPass(PassRegistry &);
+void initializeASanGlobalsMetadataWrapperPassPass(PassRegistry &);
+void initializeAddressSanitizerLegacyPassPass(PassRegistry &);
 void initializeAggressiveInstCombinerLegacyPassPass(PassRegistry&);
 void initializeAliasSetPrinterPass(PassRegistry&);
 void initializeAlignmentFromAssumptionsPass(PassRegistry&);
@@ -74,6 +74,7 @@ void initializeAlwaysInlinerLegacyPassPass(PassRegistry&);
 void initializeArgPromotionPass(PassRegistry&);
 void initializeAssumptionCacheTrackerPass(PassRegistry&);
 void initializeAtomicExpandPass(PassRegistry&);
+void initializeAttributorLegacyPassPass(PassRegistry&);
 void initializeBDCELegacyPassPass(PassRegistry&);
 void initializeBarrierNoopPass(PassRegistry&);
 void initializeBasicAAWrapperPassPass(PassRegistry&);
@@ -134,16 +135,15 @@ void initializeEarlyIfConverterPass(PassRegistry&);
 void initializeEarlyMachineLICMPass(PassRegistry&);
 void initializeEarlyTailDuplicatePass(PassRegistry&);
 void initializeEdgeBundlesPass(PassRegistry&);
-void initializeEfficiencySanitizerPass(PassRegistry&);
 void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry&);
 void initializeEntryExitInstrumenterPass(PassRegistry&);
-void initializeExpandISelPseudosPass(PassRegistry&);
 void initializeExpandMemCmpPassPass(PassRegistry&);
 void initializeExpandPostRAPass(PassRegistry&);
 void initializeExpandReductionsPass(PassRegistry&);
 void initializeMakeGuardsExplicitLegacyPassPass(PassRegistry&);
 void initializeExternalAAWrapperPassPass(PassRegistry&);
 void initializeFEntryInserterPass(PassRegistry&);
+void initializeFinalizeISelPass(PassRegistry&);
 void initializeFinalizeMachineBundlesPass(PassRegistry&);
 void initializeFlattenCFGPassPass(PassRegistry&);
 void initializeFloat2IntLegacyPassPass(PassRegistry&);
@@ -163,8 +163,9 @@ void initializeGlobalOptLegacyPassPass(PassRegistry&);
 void initializeGlobalSplitPass(PassRegistry&);
 void initializeGlobalsAAWrapperPassPass(PassRegistry&);
 void initializeGuardWideningLegacyPassPass(PassRegistry&);
+void initializeHardwareLoopsPass(PassRegistry&);
 void initializeHotColdSplittingLegacyPassPass(PassRegistry&);
-void initializeHWAddressSanitizerPass(PassRegistry&);
+void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &);
 void initializeIPCPPass(PassRegistry&);
 void initializeIPSCCPLegacyPassPass(PassRegistry&);
 void initializeIRCELegacyPassPass(PassRegistry&);
@@ -181,6 +182,7 @@ void initializeInstCountPass(PassRegistry&);
 void initializeInstNamerPass(PassRegistry&);
 void initializeInstSimplifyLegacyPassPass(PassRegistry &);
 void initializeInstrProfilingLegacyPassPass(PassRegistry&);
+void initializeInstrOrderFileLegacyPassPass(PassRegistry&);
 void initializeInstructionCombiningPassPass(PassRegistry&);
 void initializeInstructionSelectPass(PassRegistry&);
 void initializeInterleavedAccessPass(PassRegistry&);
@@ -219,6 +221,7 @@ void initializeLoopDeletionLegacyPassPass(PassRegistry&);
 void initializeLoopDistributeLegacyPass(PassRegistry&);
 void initializeLoopExtractorPass(PassRegistry&);
 void initializeLoopGuardWideningLegacyPassPass(PassRegistry&);
+void initializeLoopFuseLegacyPass(PassRegistry&);
 void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry&);
 void initializeLoopInfoWrapperPassPass(PassRegistry&);
 void initializeLoopInstSimplifyLegacyPassPass(PassRegistry&);
@@ -241,6 +244,7 @@ void initializeLowerAtomicLegacyPassPass(PassRegistry&);
 void initializeLowerEmuTLSPass(PassRegistry&);
 void initializeLowerExpectIntrinsicPass(PassRegistry&);
 void initializeLowerGuardIntrinsicLegacyPassPass(PassRegistry&);
+void initializeLowerWidenableConditionLegacyPassPass(PassRegistry&);
 void initializeLowerIntrinsicsPass(PassRegistry&);
 void initializeLowerInvokeLegacyPassPass(PassRegistry&);
 void initializeLowerSwitchPass(PassRegistry&);
@@ -277,7 +281,7 @@ void initializeMemorySSAPrinterLegacyPassPass(PassRegistry&);
 void initializeMemorySSAWrapperPassPass(PassRegistry&);
 void initializeMemorySanitizerLegacyPassPass(PassRegistry&);
 void initializeMergeFunctionsPass(PassRegistry&);
-void initializeMergeICmpsPass(PassRegistry&);
+void initializeMergeICmpsLegacyPassPass(PassRegistry &);
 void initializeMergedLoadStoreMotionLegacyPassPass(PassRegistry&);
 void initializeMetaRenamerPass(PassRegistry&);
 void initializeModuleDebugInfoPrinterPass(PassRegistry&);
@@ -298,6 +302,7 @@ void initializePEIPass(PassRegistry&);
 void initializePGOIndirectCallPromotionLegacyPassPass(PassRegistry&);
 void initializePGOInstrumentationGenLegacyPassPass(PassRegistry&);
 void initializePGOInstrumentationUseLegacyPassPass(PassRegistry&);
+void initializePGOInstrumentationGenCreateVarLegacyPassPass(PassRegistry&);
 void initializePGOMemOPSizeOptLegacyPassPass(PassRegistry&);
 void initializePHIEliminationPass(PassRegistry&);
 void initializePartialInlinerLegacyPassPass(PassRegistry&);
diff --git a/include/llvm/LTO/Caching.h b/include/llvm/LTO/Caching.h
index 7201ab31f5b0..4c4a708d24e9 100644
--- a/include/llvm/LTO/Caching.h
+++ b/include/llvm/LTO/Caching.h
@@ -1,9 +1,8 @@
 //===- Caching.h - LLVM Link Time Optimizer Configuration -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -25,8 +24,8 @@ namespace lto {
 /// (e.g. in a cache).
 ///
 /// Buffer callbacks must be thread safe.
-typedef std::function<void(unsigned Task, std::unique_ptr<MemoryBuffer> MB)>
-    AddBufferFn;
+using AddBufferFn =
+    std::function<void(unsigned Task, std::unique_ptr<MemoryBuffer> MB)>;
 
 /// Create a local file system cache which uses the given cache directory and
 /// file callback. This function also creates the cache directory if it does not
diff --git a/include/llvm/LTO/Config.h b/include/llvm/LTO/Config.h
index 7058602c3ee2..fb107e3fbe02 100644
--- a/include/llvm/LTO/Config.h
+++ b/include/llvm/LTO/Config.h
@@ -1,9 +1,8 @@
 //===-Config.h - LLVM Link Time Optimizer Configuration -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -56,6 +55,9 @@ struct Config {
   /// Disable entirely the optimizer, including importing for ThinLTO
   bool CodeGenOnly = false;
 
+  /// Run PGO context sensitive IR instrumentation.
+  bool RunCSIRInstr = false;
+
   /// If this field is set, the set of passes run in the middle-end optimizer
   /// will be the one specified by the string. Only works with the new pass
   /// manager as the old one doesn't have this ability.
@@ -74,6 +76,9 @@ struct Config {
   /// with this triple.
   std::string DefaultTriple;
 
+  /// Context Sensitive PGO profile path.
+  std::string CSIRProfile;
+
   /// Sample PGO profile path.
   std::string SampleProfile;
 
@@ -83,17 +88,29 @@ struct Config {
   /// The directory to store .dwo files.
   std::string DwoDir;
 
+  /// The name for the split debug info file used for the DW_AT_[GNU_]dwo_name
+  /// attribute in the skeleton CU. This should generally only be used when
+  /// running an individual backend directly via thinBackend(), as otherwise
+  /// all objects would use the same .dwo file. Not used as output path.
+  std::string SplitDwarfFile;
+
   /// The path to write a .dwo file to. This should generally only be used when
   /// running an individual backend directly via thinBackend(), as otherwise
-  /// all .dwo files will be written to the same path.
-  std::string DwoPath;
+  /// all .dwo files will be written to the same path. Not used in skeleton CU.
+  std::string SplitDwarfOutput;
 
   /// Optimization remarks file path.
   std::string RemarksFilename = "";
 
+  /// Optimization remarks pass filter.
+  std::string RemarksPasses = "";
+
   /// Whether to emit optimization remarks with hotness informations.
   bool RemarksWithHotness = false;
 
+  /// The format used for serializing remarks (default: YAML).
+  std::string RemarksFormat = "";
+
   /// Whether to emit the pass manager debuggging informations.
   bool DebugPassManager = false;
 
@@ -133,7 +150,7 @@ struct Config {
   ///
   /// Note that in out-of-process backend scenarios, none of the hooks will be
   /// called for ThinLTO tasks.
-  typedef std::function<bool(unsigned Task, const Module &)> ModuleHookFn;
+  using ModuleHookFn = std::function<bool(unsigned Task, const Module &)>;
 
   /// This module hook is called after linking (regular LTO) or loading
   /// (ThinLTO) the module, before modifying it.
@@ -166,8 +183,8 @@ struct Config {
   ///
   /// It is called regardless of whether the backend is in-process, although it
   /// is not called from individual backend processes.
-  typedef std::function<bool(const ModuleSummaryIndex &Index)>
-      CombinedIndexHookFn;
+  using CombinedIndexHookFn =
+      std::function<bool(const ModuleSummaryIndex &Index)>;
   CombinedIndexHookFn CombinedIndexHook;
 
   /// This is a convenience function that configures this Config object to write
diff --git a/include/llvm/LTO/LTO.h b/include/llvm/LTO/LTO.h
index 534d9b6f3f2a..ca0a8b64523a 100644
--- a/include/llvm/LTO/LTO.h
+++ b/include/llvm/LTO/LTO.h
@@ -1,9 +1,8 @@
 //===-LTO.h - LLVM Link Time Optimizer ------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -21,6 +20,7 @@
 #include "llvm/ADT/StringSet.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/IR/RemarkStreamer.h"
 #include "llvm/LTO/Config.h"
 #include "llvm/Linker/IRMover.h"
 #include "llvm/Object/IRSymtab.h"
@@ -51,7 +51,8 @@ void thinLTOResolvePrevailingInIndex(
     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
         isPrevailing,
     function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)>
-        recordNewLinkage);
+        recordNewLinkage,
+    const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols);
 
 /// Update the linkages in the given \p Index to mark exported values
 /// as external and non-exported values as internal. The ThinLTO backends
@@ -84,8 +85,13 @@ std::string getThinLTOOutputFile(const std::string &Path,
 
 /// Setup optimization remarks.
 Expected<std::unique_ptr<ToolOutputFile>>
-setupOptimizationRemarks(LLVMContext &Context, StringRef LTORemarksFilename,
-                         bool LTOPassRemarksWithHotness, int Count = -1);
+setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename,
+                         StringRef RemarksPasses, StringRef RemarksFormat,
+                         bool RemarksWithHotness, int Count = -1);
+
+/// Setups the output file for saving statistics.
+Expected<std::unique_ptr<ToolOutputFile>>
+setupStatsFile(StringRef StatsFilename);
 
 class LTO;
 struct SymbolResolution;
@@ -110,6 +116,7 @@ private:
   std::vector<std::pair<size_t, size_t>> ModuleSymIndices;
 
   StringRef TargetTriple, SourceFileName, COFFLinkerOpts;
+  std::vector<StringRef> DependentLibraries;
   std::vector<StringRef> ComdatTable;
 
 public:
@@ -131,6 +138,7 @@ public:
     using irsymtab::Symbol::isWeak;
     using irsymtab::Symbol::isIndirect;
     using irsymtab::Symbol::getName;
+    using irsymtab::Symbol::getIRName;
     using irsymtab::Symbol::getVisibility;
     using irsymtab::Symbol::canBeOmittedFromSymbolTable;
     using irsymtab::Symbol::isTLS;
@@ -140,6 +148,7 @@ public:
     using irsymtab::Symbol::getCOFFWeakExternalFallback;
     using irsymtab::Symbol::getSectionName;
     using irsymtab::Symbol::isExecutable;
+    using irsymtab::Symbol::isUsed;
   };
 
   /// A range over the symbols in this InputFile.
@@ -148,6 +157,9 @@ public:
   /// Returns linker options specified in the input file.
   StringRef getCOFFLinkerOpts() const { return COFFLinkerOpts; }
 
+  /// Returns dependent library specifiers from the input file.
+  ArrayRef<StringRef> getDependentLibraries() const { return DependentLibraries; }
+
   /// Returns the path to the InputFile.
   StringRef getName() const;
 
@@ -160,6 +172,9 @@ public:
   // Returns a table with all the comdats used by this file.
   ArrayRef<StringRef> getComdatTable() const { return ComdatTable; }
 
+  // Returns the only BitcodeModule from InputFile.
+  BitcodeModule &getSingleBitcodeModule();
+
 private:
   ArrayRef<Symbol> module_symbols(unsigned I) const {
     const auto &Indices = ModuleSymIndices[I];
@@ -183,8 +198,8 @@ public:
 /// the fly.
 ///
 /// Stream callbacks must be thread safe.
-typedef std::function<std::unique_ptr<NativeObjectStream>(unsigned Task)>
-    AddStreamFn;
+using AddStreamFn =
+    std::function<std::unique_ptr<NativeObjectStream>(unsigned Task)>;
 
 /// This is the type of a native object cache. To request an item from the
 /// cache, pass a unique string as the Key. For hits, the cached file will be
@@ -198,17 +213,16 @@ typedef std::function<std::unique_ptr<NativeObjectStream>(unsigned Task)>
 ///
 /// if (AddStreamFn AddStream = Cache(Task, Key))
 ///   ProduceContent(AddStream);
-typedef std::function<AddStreamFn(unsigned Task, StringRef Key)>
-    NativeObjectCache;
+using NativeObjectCache =
+    std::function<AddStreamFn(unsigned Task, StringRef Key)>;
 
 /// A ThinBackend defines what happens after the thin-link phase during ThinLTO.
 /// The details of this type definition aren't important; clients can only
 /// create a ThinBackend using one of the create*ThinBackend() functions below.
-typedef std::function<std::unique_ptr<ThinBackendProc>(
+using ThinBackend = std::function<std::unique_ptr<ThinBackendProc>(
     Config &C, ModuleSummaryIndex &CombinedIndex,
     StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-    AddStreamFn AddStream, NativeObjectCache Cache)>
-    ThinBackend;
+    AddStreamFn AddStream, NativeObjectCache Cache)>;
 
 /// This ThinBackend runs the individual backend jobs in-process.
 ThinBackend createInProcessThinBackend(unsigned ParallelismLevel);
@@ -397,7 +411,10 @@ private:
                    const SymbolResolution *&ResI, const SymbolResolution *ResE);
 
   Error runRegularLTO(AddStreamFn AddStream);
-  Error runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache);
+  Error runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
+                   const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols);
+
+  Error checkPartiallySplit();
 
   mutable bool CalledGetMaxTasks = false;
 
diff --git a/include/llvm/LTO/LTOBackend.h b/include/llvm/LTO/LTOBackend.h
index d4743f6940ff..4ff8a1993d49 100644
--- a/include/llvm/LTO/LTOBackend.h
+++ b/include/llvm/LTO/LTOBackend.h
@@ -1,9 +1,8 @@
 //===-LTOBackend.h - LLVM Link Time Optimizer Backend ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/LTO/SummaryBasedOptimizations.h b/include/llvm/LTO/SummaryBasedOptimizations.h
index ad3a8e7dc77b..6697c821a5ea 100644
--- a/include/llvm/LTO/SummaryBasedOptimizations.h
+++ b/include/llvm/LTO/SummaryBasedOptimizations.h
@@ -1,9 +1,8 @@
 //=- llvm/LTO/SummaryBasedOptimizations.h -Link time optimizations-*- C++ -*-=//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/LTO/legacy/LTOCodeGenerator.h b/include/llvm/LTO/legacy/LTOCodeGenerator.h
index 8f23b7cb4574..d3cb4c8b79a0 100644
--- a/include/llvm/LTO/legacy/LTOCodeGenerator.h
+++ b/include/llvm/LTO/legacy/LTOCodeGenerator.h
@@ -1,9 +1,8 @@
 //===-LTOCodeGenerator.h - LLVM Link Time Optimizer -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -242,6 +241,7 @@ private:
   TargetMachine::CodeGenFileType FileType = TargetMachine::CGFT_ObjectFile;
   std::unique_ptr<ToolOutputFile> DiagnosticOutputFile;
   bool Freestanding = false;
+  std::unique_ptr<ToolOutputFile> StatsFile = nullptr;
 };
 }
 #endif
diff --git a/include/llvm/LTO/legacy/LTOModule.h b/include/llvm/LTO/legacy/LTOModule.h
index 017e223ed8a6..84b9b8c02942 100644
--- a/include/llvm/LTO/legacy/LTOModule.h
+++ b/include/llvm/LTO/legacy/LTOModule.h
@@ -1,9 +1,8 @@
 //===-LTOModule.h - LLVM Link Time Optimizer ------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,6 +17,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/IR/Module.h"
+#include "llvm/LTO/LTO.h"
 #include "llvm/Object/IRObjectFile.h"
 #include "llvm/Object/ModuleSymbolTable.h"
 #include "llvm/Target/TargetMachine.h"
@@ -48,6 +48,8 @@ private:
 
   std::string LinkerOpts;
 
+  std::string DependentLibraries;
+
   std::unique_ptr<Module> Mod;
   MemoryBufferRef MBRef;
   ModuleSymbolTable SymTab;
@@ -156,9 +158,17 @@ public:
 
   const std::vector<StringRef> &getAsmUndefinedRefs() { return _asm_undefines; }
 
+  static lto::InputFile *createInputFile(const void *buffer, size_t buffer_size,
+                                         const char *path, std::string &out_error);
+
+  static size_t getDependentLibraryCount(lto::InputFile *input);
+
+  static const char *getDependentLibrary(lto::InputFile *input, size_t index, size_t *size);
+
 private:
   /// Parse metadata from the module
   // FIXME: it only parses "llvm.linker.options" metadata at the moment
+  // FIXME: can't access metadata in lazily loaded modules
   void parseMetadata();
 
   /// Parse the symbols from the module and model-level ASM and add them to
diff --git a/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h b/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
index d4c69a1ce260..210a2ce00bdf 100644
--- a/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
+++ b/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
@@ -1,9 +1,8 @@
 //===-ThinLTOCodeGenerator.h - LLVM Link Time Optimizer -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -20,6 +19,7 @@
 #include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/LTO/LTO.h"
 #include "llvm/Support/CachePruning.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -32,23 +32,6 @@ class StringRef;
 class LLVMContext;
 class TargetMachine;
 
-/// Wrapper around MemoryBufferRef, owning the identifier
-class ThinLTOBuffer {
-  std::string OwnedIdentifier;
-  StringRef Buffer;
-
-public:
-  ThinLTOBuffer(StringRef Buffer, StringRef Identifier)
-      : OwnedIdentifier(Identifier), Buffer(Buffer) {}
-
-  MemoryBufferRef getMemBuffer() const {
-    return MemoryBufferRef(Buffer,
-                           {OwnedIdentifier.c_str(), OwnedIdentifier.size()});
-  }
-  StringRef getBuffer() const { return Buffer; }
-  StringRef getBufferIdentifier() const { return OwnedIdentifier; }
-};
-
 /// Helper to gather options relevant to the target machine creation
 struct TargetMachineBuilder {
   Triple TheTriple;
@@ -268,37 +251,49 @@ public:
    * and additionally resolve weak and linkonce symbols.
    * Index is updated to reflect linkage changes from weak resolution.
    */
-  void promote(Module &Module, ModuleSummaryIndex &Index);
+  void promote(Module &Module, ModuleSummaryIndex &Index,
+               const lto::InputFile &File);
 
   /**
    * Compute and emit the imported files for module at \p ModulePath.
    */
   void emitImports(Module &Module, StringRef OutputName,
-                   ModuleSummaryIndex &Index);
+                   ModuleSummaryIndex &Index,
+                   const lto::InputFile &File);
 
   /**
    * Perform cross-module importing for the module identified by
    * ModuleIdentifier.
    */
-  void crossModuleImport(Module &Module, ModuleSummaryIndex &Index);
+  void crossModuleImport(Module &Module, ModuleSummaryIndex &Index,
+                         const lto::InputFile &File);
 
   /**
    * Compute the list of summaries needed for importing into module.
    */
   void gatherImportedSummariesForModule(
       Module &Module, ModuleSummaryIndex &Index,
-      std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex);
+      std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
+      const lto::InputFile &File);
 
   /**
    * Perform internalization. Index is updated to reflect linkage changes.
    */
-  void internalize(Module &Module, ModuleSummaryIndex &Index);
+  void internalize(Module &Module, ModuleSummaryIndex &Index,
+                   const lto::InputFile &File);
 
   /**
    * Perform post-importing ThinLTO optimizations.
    */
   void optimize(Module &Module);
 
+  /**
+   * Write temporary object file to SavedObjectDirectoryPath, write symlink
+   * to Cache directory if needed. Returns the path to the generated file in
+   * SavedObjectsDirectoryPath.
+   */
+  std::string writeGeneratedObject(int count, StringRef CacheEntryPath,
+                                   const MemoryBuffer &OutputBuffer);
   /**@}*/
 
 private:
@@ -314,7 +309,7 @@ private:
 
   /// Vector holding the input buffers containing the bitcode modules to
   /// process.
-  std::vector<ThinLTOBuffer> Modules;
+  std::vector<std::unique_ptr<lto::InputFile>> Modules;
 
   /// Set of symbols that need to be preserved outside of the set of bitcode
   /// files.
diff --git a/include/llvm/LTO/legacy/UpdateCompilerUsed.h b/include/llvm/LTO/legacy/UpdateCompilerUsed.h
index 4be0027e97d7..af9d62b4af30 100644
--- a/include/llvm/LTO/legacy/UpdateCompilerUsed.h
+++ b/include/llvm/LTO/legacy/UpdateCompilerUsed.h
@@ -1,9 +1,8 @@
 //==------ UpdateCompilerUsed.h - LLVM Link Time Optimizer Utility --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/LineEditor/LineEditor.h b/include/llvm/LineEditor/LineEditor.h
index 68995d0633ad..0beaf1bb23a9 100644
--- a/include/llvm/LineEditor/LineEditor.h
+++ b/include/llvm/LineEditor/LineEditor.h
@@ -1,9 +1,8 @@
 //===-- llvm/LineEditor/LineEditor.h - line editor --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/LinkAllIR.h b/include/llvm/LinkAllIR.h
index 4f4af7187be4..4b0aabeee701 100644
--- a/include/llvm/LinkAllIR.h
+++ b/include/llvm/LinkAllIR.h
@@ -1,9 +1,8 @@
 //===----- LinkAllIR.h - Reference All VMCore Code --------------*- C++ -*-===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index 0851c2f8d265..675d179eb22a 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -1,9 +1,8 @@
 //===- llvm/LinkAllPasses.h ------------ Reference All Passes ---*- C++ -*-===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -42,6 +41,7 @@
 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/Attributor.h"
 #include "llvm/Transforms/IPO/FunctionAttrs.h"
 #include "llvm/Transforms/InstCombine/InstCombine.h"
 #include "llvm/Transforms/Instrumentation.h"
@@ -103,6 +103,7 @@ namespace {
       (void) llvm::createGCOVProfilerPass();
       (void) llvm::createPGOInstrumentationGenLegacyPass();
       (void) llvm::createPGOInstrumentationUseLegacyPass();
+      (void) llvm::createPGOInstrumentationGenCreateVarLegacyPass();
       (void) llvm::createPGOIndirectCallPromotionLegacyPass();
       (void) llvm::createPGOMemOPSizeOptLegacyPass();
       (void) llvm::createInstrProfilingLegacyPass();
@@ -188,10 +189,11 @@ namespace {
       (void) llvm::createPostDomTree();
       (void) llvm::createInstructionNamerPass();
       (void) llvm::createMetaRenamerPass();
+      (void) llvm::createAttributorLegacyPass();
       (void) llvm::createPostOrderFunctionAttrsLegacyPass();
       (void) llvm::createReversePostOrderFunctionAttrsPass();
       (void) llvm::createMergeFunctionsPass();
-      (void) llvm::createMergeICmpsPass();
+      (void) llvm::createMergeICmpsLegacyPass();
       (void) llvm::createExpandMemCmpPass();
       std::string buf;
       llvm::raw_string_ostream os(buf);
@@ -221,6 +223,7 @@ namespace {
       (void) llvm::createEliminateAvailableExternallyPass();
       (void) llvm::createScalarizeMaskedMemIntrinPass();
       (void) llvm::createWarnMissedTransformationsPass();
+      (void) llvm::createHardwareLoopsPass();
 
       (void)new llvm::IntervalPartition();
       (void)new llvm::ScalarEvolutionWrapperPass();
diff --git a/include/llvm/Linker/IRMover.h b/include/llvm/Linker/IRMover.h
index 235ada47cef4..e5df83f01fe3 100644
--- a/include/llvm/Linker/IRMover.h
+++ b/include/llvm/Linker/IRMover.h
@@ -1,9 +1,8 @@
 //===- IRMover.h ------------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Linker/Linker.h b/include/llvm/Linker/Linker.h
index 7776c720ec53..c9b1d42b3903 100644
--- a/include/llvm/Linker/Linker.h
+++ b/include/llvm/Linker/Linker.h
@@ -1,9 +1,8 @@
 //===- Linker.h - Module Linker Interface -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/ConstantPools.h b/include/llvm/MC/ConstantPools.h
index ef33250204ec..2fe5ce252c94 100644
--- a/include/llvm/MC/ConstantPools.h
+++ b/include/llvm/MC/ConstantPools.h
@@ -1,9 +1,8 @@
-//===- ConstantPool.h - Keep track of assembler-generated  ------*- C++ -*-===//
+//===- ConstantPools.h - Keep track of assembler-generated ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/MC/LaneBitmask.h b/include/llvm/MC/LaneBitmask.h
index 8c0b4ecb8fd4..d5f69287a265 100644
--- a/include/llvm/MC/LaneBitmask.h
+++ b/include/llvm/MC/LaneBitmask.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/LaneBitmask.h ------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/include/llvm/MC/MCAsmBackend.h b/include/llvm/MC/MCAsmBackend.h
index 07835c21fced..1f3ad6c1e547 100644
--- a/include/llvm/MC/MCAsmBackend.h
+++ b/include/llvm/MC/MCAsmBackend.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCAsmBackend.h - MC Asm Backend ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -88,6 +87,22 @@ public:
     return false;
   }
 
+  /// Hook to check if extra nop bytes must be inserted for alignment directive.
+  /// For some targets this may be necessary in order to support linker
+  /// relaxation. The number of bytes to insert are returned in Size.
+  virtual bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF,
+                                                     unsigned &Size) {
+    return false;
+  }
+
+  /// Hook which indicates if the target requires a fixup to be generated when
+  /// handling an align directive in an executable section
+  virtual bool shouldInsertFixupForCodeAlign(MCAssembler &Asm,
+                                             const MCAsmLayout &Layout,
+                                             MCAlignFragment &AF) {
+    return false;
+  }
+
   /// Apply the \p Value for given \p Fixup into the provided data fragment, at
   /// the offset specified by the fixup and following the fixup kind as
   /// appropriate. Errors (such as an out of range fixup value) should be
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index 120fb8fa7492..971e9354da8c 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -1,9 +1,8 @@
 //===-- llvm/MC/MCAsmInfo.h - Asm info --------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,16 +17,17 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCDirectives.h"
-#include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCTargetOptions.h"
 #include <vector>
 
 namespace llvm {
 
 class MCContext;
+class MCCFIInstruction;
 class MCExpr;
 class MCSection;
 class MCStreamer;
+class MCSubtargetInfo;
 class MCSymbol;
 
 namespace WinEH {
@@ -474,7 +474,13 @@ public:
   bool hasMachoTBSSDirective() const { return HasMachoTBSSDirective; }
   bool hasCOFFAssociativeComdats() const { return HasCOFFAssociativeComdats; }
   bool hasCOFFComdatConstants() const { return HasCOFFComdatConstants; }
-  unsigned getMaxInstLength() const { return MaxInstLength; }
+
+  /// Returns the maximum possible encoded instruction size in bytes. If \p STI
+  /// is null, this should be the maximum size for any subtarget.
+  virtual unsigned getMaxInstLength(const MCSubtargetInfo *STI = nullptr) const {
+    return MaxInstLength;
+  }
+
   unsigned getMinInstAlignment() const { return MinInstAlignment; }
   bool getDollarIsPC() const { return DollarIsPC; }
   const char *getSeparatorString() const { return SeparatorString; }
@@ -492,7 +498,7 @@ public:
   StringRef getPrivateLabelPrefix() const { return PrivateLabelPrefix; }
 
   bool hasLinkerPrivateGlobalPrefix() const {
-    return LinkerPrivateGlobalPrefix[0] != '\0';
+    return !LinkerPrivateGlobalPrefix.empty();
   }
 
   StringRef getLinkerPrivateGlobalPrefix() const {
@@ -598,9 +604,7 @@ public:
     return SupportsExtendedDwarfLocDirective;
   }
 
-  void addInitialFrameState(const MCCFIInstruction &Inst) {
-    InitialFrameState.push_back(Inst);
-  }
+  void addInitialFrameState(const MCCFIInstruction &Inst);
 
   const std::vector<MCCFIInstruction> &getInitialFrameState() const {
     return InitialFrameState;
diff --git a/include/llvm/MC/MCAsmInfoCOFF.h b/include/llvm/MC/MCAsmInfoCOFF.h
index 01c8ae49a6fc..1dfb4750af66 100644
--- a/include/llvm/MC/MCAsmInfoCOFF.h
+++ b/include/llvm/MC/MCAsmInfoCOFF.h
@@ -1,9 +1,8 @@
 //===- MCAsmInfoCOFF.h - COFF asm properties --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCAsmInfoDarwin.h b/include/llvm/MC/MCAsmInfoDarwin.h
index a533d604a89e..c889ce99cebe 100644
--- a/include/llvm/MC/MCAsmInfoDarwin.h
+++ b/include/llvm/MC/MCAsmInfoDarwin.h
@@ -1,9 +1,8 @@
 //===- MCAsmInfoDarwin.h - Darwin asm properties ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/MC/MCAsmInfoELF.h b/include/llvm/MC/MCAsmInfoELF.h
index f113afc9885e..aa2e5873e2c6 100644
--- a/include/llvm/MC/MCAsmInfoELF.h
+++ b/include/llvm/MC/MCAsmInfoELF.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCAsmInfoELF.h - ELF Asm info --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCAsmInfoWasm.h b/include/llvm/MC/MCAsmInfoWasm.h
index 71c6ee28df70..3afc610b8b07 100644
--- a/include/llvm/MC/MCAsmInfoWasm.h
+++ b/include/llvm/MC/MCAsmInfoWasm.h
@@ -1,9 +1,8 @@
 //===-- llvm/MC/MCAsmInfoWasm.h - Wasm Asm info -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCAsmInfoXCOFF.h b/include/llvm/MC/MCAsmInfoXCOFF.h
new file mode 100644
index 000000000000..2a72ba7398a7
--- /dev/null
+++ b/include/llvm/MC/MCAsmInfoXCOFF.h
@@ -0,0 +1,25 @@
+//===- MCAsmInfoXCOFF.h - XCOFF asm properties ----------------- *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCASMINFOXCOFF_H
+#define LLVM_MC_MCASMINFOXCOFF_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+
+class MCAsmInfoXCOFF : public MCAsmInfo {
+  virtual void anchor();
+
+protected:
+  MCAsmInfoXCOFF();
+};
+
+} // end namespace llvm
+
+#endif // LLVM_MC_MCASMINFOXCOFF_H
diff --git a/include/llvm/MC/MCAsmLayout.h b/include/llvm/MC/MCAsmLayout.h
index b711db319302..45ac96f0b81e 100644
--- a/include/llvm/MC/MCAsmLayout.h
+++ b/include/llvm/MC/MCAsmLayout.h
@@ -1,9 +1,8 @@
 //===- MCAsmLayout.h - Assembly Layout Object -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCAsmMacro.h b/include/llvm/MC/MCAsmMacro.h
index 135fa4f2e33d..364d3b5f3666 100644
--- a/include/llvm/MC/MCAsmMacro.h
+++ b/include/llvm/MC/MCAsmMacro.h
@@ -1,9 +1,8 @@
 //===- MCAsmMacro.h - Assembly Macros ---------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index 986c6e17548f..4543018901a4 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -1,9 +1,8 @@
 //===- MCAssembler.h - Object File Generation -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCCodeEmitter.h b/include/llvm/MC/MCCodeEmitter.h
index f1b0b784a2df..04b4367ada7b 100644
--- a/include/llvm/MC/MCCodeEmitter.h
+++ b/include/llvm/MC/MCCodeEmitter.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCCodeEmitter.h - Instruction Encoding -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCCodePadder.h b/include/llvm/MC/MCCodePadder.h
index 4dde6bf59272..f7b1a2113a9a 100644
--- a/include/llvm/MC/MCCodePadder.h
+++ b/include/llvm/MC/MCCodePadder.h
@@ -1,9 +1,8 @@
-//===- llvm/MC/CodePadder.h - MC Code Padder --------------------*- C++ -*-===//
+//===- llvm/MC/MCCodePadder.h - MC Code Padder ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCCodeView.h b/include/llvm/MC/MCCodeView.h
index cef03a409f95..2126354cded6 100644
--- a/include/llvm/MC/MCCodeView.h
+++ b/include/llvm/MC/MCCodeView.h
@@ -1,9 +1,8 @@
 //===- MCCodeView.h - Machine Code CodeView support -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index 3b8ac8b79e21..5c2124cc0d15 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -1,9 +1,8 @@
 //===- MCContext.h - Machine Code Context -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -19,6 +18,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/BinaryFormat/XCOFF.h"
 #include "llvm/MC/MCAsmMacro.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCSubtargetInfo.h"
@@ -50,6 +50,7 @@ namespace llvm {
   class MCSectionELF;
   class MCSectionMachO;
   class MCSectionWasm;
+  class MCSectionXCOFF;
   class MCStreamer;
   class MCSymbol;
   class MCSymbolELF;
@@ -92,6 +93,7 @@ namespace llvm {
     SpecificBumpPtrAllocator<MCSectionELF> ELFAllocator;
     SpecificBumpPtrAllocator<MCSectionMachO> MachOAllocator;
     SpecificBumpPtrAllocator<MCSectionWasm> WasmAllocator;
+    SpecificBumpPtrAllocator<MCSectionXCOFF> XCOFFAllocator;
 
     /// Bindings of names to symbols.
     SymbolTable Symbols;
@@ -247,10 +249,25 @@ namespace llvm {
       }
     };
 
+    struct XCOFFSectionKey {
+      std::string SectionName;
+      XCOFF::StorageMappingClass MappingClass;
+
+      XCOFFSectionKey(StringRef SectionName,
+                      XCOFF::StorageMappingClass MappingClass)
+          : SectionName(SectionName), MappingClass(MappingClass) {}
+
+      bool operator<(const XCOFFSectionKey &Other) const {
+        return std::tie(SectionName, MappingClass) <
+               std::tie(Other.SectionName, Other.MappingClass);
+      }
+    };
+
     StringMap<MCSectionMachO *> MachOUniquingMap;
     std::map<ELFSectionKey, MCSectionELF *> ELFUniquingMap;
     std::map<COFFSectionKey, MCSectionCOFF *> COFFUniquingMap;
     std::map<WasmSectionKey, MCSectionWasm *> WasmUniquingMap;
+    std::map<XCOFFSectionKey, MCSectionXCOFF *> XCOFFUniquingMap;
     StringMap<bool> RelSecNames;
 
     SpecificBumpPtrAllocator<MCSubtargetInfo> MCSubtargetAllocator;
@@ -441,8 +458,6 @@ namespace llvm {
                                   SectionKind Kind,
                                   const char *BeginSymName = nullptr);
 
-    MCSectionCOFF *getCOFFSection(StringRef Section);
-
     /// Gets or creates a section equivalent to Sec that is associated with the
     /// section containing KeySym. For example, to create a debug info section
     /// associated with an inline function, pass the normal debug info section
@@ -473,6 +488,11 @@ namespace llvm {
                                   const MCSymbolWasm *Group, unsigned UniqueID,
                                   const char *BeginSymName);
 
+    MCSectionXCOFF *getXCOFFSection(StringRef Section,
+                                    XCOFF::StorageMappingClass MappingClass,
+                                    SectionKind K,
+                                    const char *BeginSymName = nullptr);
+
     // Create and save a copy of STI and return a reference to the copy.
     MCSubtargetInfo &getSubtargetCopy(const MCSubtargetInfo &STI);
 
@@ -489,12 +509,6 @@ namespace llvm {
     /// Set the compilation directory for DW_AT_comp_dir
     void setCompilationDir(StringRef S) { CompilationDir = S.str(); }
 
-    /// Get the debug prefix map.
-    const std::map<const std::string, const std::string> &
-    getDebugPrefixMap() const {
-      return DebugPrefixMap;
-    }
-
     /// Add an entry to the debug prefix map.
     void addDebugPrefixMapEntry(const std::string &From, const std::string &To);
 
@@ -512,7 +526,7 @@ namespace llvm {
     /// Creates an entry in the dwarf file and directory tables.
     Expected<unsigned> getDwarfFile(StringRef Directory, StringRef FileName,
                                     unsigned FileNumber,
-                                    MD5::MD5Result *Checksum,
+                                    Optional<MD5::MD5Result> Checksum,
                                     Optional<StringRef> Source, unsigned CUID);
 
     bool isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID = 0);
@@ -539,13 +553,6 @@ namespace llvm {
       return getMCDwarfLineTable(CUID).getMCDwarfDirs();
     }
 
-    bool hasMCLineSections() const {
-      for (const auto &Table : MCDwarfLineTablesCUMap)
-        if (!Table.second.getMCDwarfFiles().empty() || Table.second.getLabel())
-          return true;
-      return false;
-    }
-
     unsigned getDwarfCompileUnitID() { return DwarfCompileUnitID; }
 
     void setDwarfCompileUnitID(unsigned CUIndex) {
@@ -555,7 +562,8 @@ namespace llvm {
     /// Specifies the "root" file and directory of the compilation unit.
     /// These are "file 0" and "directory 0" in DWARF v5.
     void setMCLineTableRootFile(unsigned CUID, StringRef CompilationDir,
-                                StringRef Filename, MD5::MD5Result *Checksum,
+                                StringRef Filename,
+                                Optional<MD5::MD5Result> Checksum,
                                 Optional<StringRef> Source) {
       getMCDwarfLineTable(CUID).setRootFile(CompilationDir, Filename, Checksum,
                                             Source);
@@ -595,6 +603,10 @@ namespace llvm {
       GenDwarfFileNumber = FileNumber;
     }
 
+    /// Specifies information about the "root file" for assembler clients
+    /// (e.g., llvm-mc). Assumes compilation dir etc. have been set up.
+    void setGenDwarfRootFile(StringRef FileName, StringRef Buffer);
+
     const SetVector<MCSection *> &getGenDwarfSectionSyms() {
       return SectionsForRanges;
     }
diff --git a/include/llvm/MC/MCDirectives.h b/include/llvm/MC/MCDirectives.h
index 8c74b169135b..4029264c2026 100644
--- a/include/llvm/MC/MCDirectives.h
+++ b/include/llvm/MC/MCDirectives.h
@@ -1,9 +1,8 @@
 //===- MCDirectives.h - Enums for directives on various targets -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -20,6 +19,7 @@ enum MCSymbolAttr {
   MCSA_Invalid = 0,    ///< Not a valid directive.
 
   // Various directives in alphabetical order.
+  MCSA_Cold,                ///< .cold (MachO)
   MCSA_ELF_TypeFunction,    ///< .type _foo, STT_FUNC  # aka @function
   MCSA_ELF_TypeIndFunction, ///< .type _foo, STT_GNU_IFUNC
   MCSA_ELF_TypeObject,      ///< .type _foo, STT_OBJECT  # aka @object
diff --git a/include/llvm/MC/MCDisassembler/MCDisassembler.h b/include/llvm/MC/MCDisassembler/MCDisassembler.h
index 7f09c05ccf2a..268f3ccad889 100644
--- a/include/llvm/MC/MCDisassembler/MCDisassembler.h
+++ b/include/llvm/MC/MCDisassembler/MCDisassembler.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -17,6 +16,7 @@
 namespace llvm {
 
 template <typename T> class ArrayRef;
+class StringRef;
 class MCContext;
 class MCInst;
 class MCSubtargetInfo;
@@ -80,6 +80,23 @@ public:
                                       raw_ostream &VStream,
                                       raw_ostream &CStream) const = 0;
 
+  /// May parse any prelude that precedes instructions after the start of a
+  /// symbol. Needed for some targets, e.g. WebAssembly.
+  ///
+  /// \param Name     - The name of the symbol.
+  /// \param Size     - The number of bytes consumed.
+  /// \param Address  - The address, in the memory space of region, of the first
+  ///                   byte of the symbol.
+  /// \param Bytes    - A reference to the actual bytes at the symbol location.
+  /// \param VStream  - The stream to print warnings and diagnostic messages on.
+  /// \param CStream  - The stream to print comments and annotations on.
+  /// \return         - MCDisassembler::Success if the bytes are valid,
+  ///                   MCDisassembler::Fail if the bytes were invalid.
+  virtual DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
+                                     ArrayRef<uint8_t> Bytes, uint64_t Address,
+                                     raw_ostream &VStream,
+                                     raw_ostream &CStream) const;
+
 private:
   MCContext &Ctx;
 
diff --git a/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h b/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h
index df909a0dccd3..ffac5ee5cb1f 100644
--- a/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h
+++ b/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h
@@ -1,9 +1,8 @@
 //===-- llvm/MC/MCExternalSymbolizer.h - ------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/MC/MCDisassembler/MCRelocationInfo.h b/include/llvm/MC/MCDisassembler/MCRelocationInfo.h
index 6030ae660d38..efc59da19335 100644
--- a/include/llvm/MC/MCDisassembler/MCRelocationInfo.h
+++ b/include/llvm/MC/MCDisassembler/MCRelocationInfo.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCRelocationInfo.h -------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/MC/MCDisassembler/MCSymbolizer.h b/include/llvm/MC/MCDisassembler/MCSymbolizer.h
index 0bfa569474ec..b7ca83a5f16c 100644
--- a/include/llvm/MC/MCDisassembler/MCSymbolizer.h
+++ b/include/llvm/MC/MCDisassembler/MCSymbolizer.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCSymbolizer.h - MCSymbolizer class --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h
index 7b96e9aaca89..1a37aafd0654 100644
--- a/include/llvm/MC/MCDwarf.h
+++ b/include/llvm/MC/MCDwarf.h
@@ -1,9 +1,8 @@
 //===- MCDwarf.h - Machine Code Dwarf support -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -42,11 +41,14 @@ class raw_ostream;
 class SMLoc;
 class SourceMgr;
 
-/// Instances of this class represent the name of the dwarf
-/// .file directive and its associated dwarf file number in the MC file,
-/// and MCDwarfFile's are created and uniqued by the MCContext class where
-/// the file number for each is its index into the vector of DwarfFiles (note
-/// index 0 is not used and not a valid dwarf file number).
+/// Instances of this class represent the name of the dwarf .file directive and
+/// its associated dwarf file number in the MC file. MCDwarfFile's are created
+/// and uniqued by the MCContext class. In Dwarf 4 file numbers start from 1;
+/// i.e. the entry with file number 1 is the first element in the vector of
+/// DwarfFiles and there is no MCDwarfFile with file number 0. In Dwarf 5 file
+/// numbers start from 0, with the MCDwarfFile with file number 0 being the
+/// primary source file, and file numbers correspond to their index in the
+/// vector.
 struct MCDwarfFile {
   // The base name of the file without its directory path.
   std::string Name;
@@ -56,7 +58,7 @@ struct MCDwarfFile {
 
   /// The MD5 checksum, if there is one. Non-owning pointer to data allocated
   /// in MCContext.
-  MD5::MD5Result *Checksum = nullptr;
+  Optional<MD5::MD5Result> Checksum;
 
   /// The source code of the file. Non-owning reference to data allocated in
   /// MCContext.
@@ -224,8 +226,9 @@ public:
   MCDwarfLineTableHeader() = default;
 
   Expected<unsigned> tryGetFile(StringRef &Directory, StringRef &FileName,
-                                MD5::MD5Result *Checksum,
-                                Optional<StringRef> &Source,
+                                Optional<MD5::MD5Result> Checksum,
+                                Optional<StringRef> Source,
+                                uint16_t DwarfVersion,
                                 unsigned FileNumber = 0);
   std::pair<MCSymbol *, MCSymbol *>
   Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
@@ -246,32 +249,50 @@ public:
     return MCDwarfFiles.empty() || (HasAllMD5 == HasAnyMD5);
   }
 
+  void setRootFile(StringRef Directory, StringRef FileName,
+                   Optional<MD5::MD5Result> Checksum,
+                   Optional<StringRef> Source) {
+    CompilationDir = Directory;
+    RootFile.Name = FileName;
+    RootFile.DirIndex = 0;
+    RootFile.Checksum = Checksum;
+    RootFile.Source = Source;
+    trackMD5Usage(Checksum.hasValue());
+    HasSource = Source.hasValue();
+  }
+
+  void resetFileTable() {
+    MCDwarfDirs.clear();
+    MCDwarfFiles.clear();
+    RootFile.Name.clear();
+    resetMD5Usage();
+    HasSource = false;
+  }
+
 private:
   void emitV2FileDirTables(MCStreamer *MCOS) const;
-  void emitV5FileDirTables(MCStreamer *MCOS, Optional<MCDwarfLineStr> &LineStr,
-                           StringRef CtxCompilationDir) const;
+  void emitV5FileDirTables(MCStreamer *MCOS, Optional<MCDwarfLineStr> &LineStr) const;
 };
 
 class MCDwarfDwoLineTable {
   MCDwarfLineTableHeader Header;
+  bool HasSplitLineTable = false;
 
 public:
   void maybeSetRootFile(StringRef Directory, StringRef FileName,
-                        MD5::MD5Result *Checksum, Optional<StringRef> Source) {
+                        Optional<MD5::MD5Result> Checksum,
+                        Optional<StringRef> Source) {
     if (!Header.RootFile.Name.empty())
       return;
-    Header.CompilationDir = Directory;
-    Header.RootFile.Name = FileName;
-    Header.RootFile.DirIndex = 0;
-    Header.RootFile.Checksum = Checksum;
-    Header.RootFile.Source = Source;
-    Header.trackMD5Usage(Checksum);
-    Header.HasSource = Source.hasValue();
+    Header.setRootFile(Directory, FileName, Checksum, Source);
   }
 
   unsigned getFile(StringRef Directory, StringRef FileName,
-                   MD5::MD5Result *Checksum, Optional<StringRef> Source) {
-    return cantFail(Header.tryGetFile(Directory, FileName, Checksum, Source));
+                   Optional<MD5::MD5Result> Checksum, uint16_t DwarfVersion,
+                   Optional<StringRef> Source) {
+    HasSplitLineTable = true;
+    return cantFail(Header.tryGetFile(Directory, FileName, Checksum, Source,
+                                      DwarfVersion));
   }
 
   void Emit(MCStreamer &MCOS, MCDwarfLineTableParams Params,
@@ -291,36 +312,34 @@ public:
               Optional<MCDwarfLineStr> &LineStr) const;
 
   Expected<unsigned> tryGetFile(StringRef &Directory, StringRef &FileName,
-                                MD5::MD5Result *Checksum,
+                                Optional<MD5::MD5Result> Checksum,
                                 Optional<StringRef> Source,
+                                uint16_t DwarfVersion,
                                 unsigned FileNumber = 0);
   unsigned getFile(StringRef &Directory, StringRef &FileName,
-                   MD5::MD5Result *Checksum, Optional<StringRef> &Source,
-                   unsigned FileNumber = 0) {
+                   Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source,
+                   uint16_t DwarfVersion, unsigned FileNumber = 0) {
     return cantFail(tryGetFile(Directory, FileName, Checksum, Source,
-                               FileNumber));
+                               DwarfVersion, FileNumber));
   }
 
   void setRootFile(StringRef Directory, StringRef FileName,
-                   MD5::MD5Result *Checksum, Optional<StringRef> Source) {
+                   Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source) {
     Header.CompilationDir = Directory;
     Header.RootFile.Name = FileName;
     Header.RootFile.DirIndex = 0;
     Header.RootFile.Checksum = Checksum;
     Header.RootFile.Source = Source;
-    Header.trackMD5Usage(Checksum);
+    Header.trackMD5Usage(Checksum.hasValue());
     Header.HasSource = Source.hasValue();
   }
 
-  void resetRootFile() {
-    assert(Header.MCDwarfFiles.empty());
-    Header.RootFile.Name.clear();
-    Header.resetMD5Usage();
-    Header.HasSource = false;
-  }
+  void resetFileTable() { Header.resetFileTable(); }
 
   bool hasRootFile() const { return !Header.RootFile.Name.empty(); }
 
+  const MCDwarfFile &getRootFile() const { return Header.RootFile; }
+
   // Report whether MD5 usage has been consistent (all-or-none).
   bool isMD5UsageConsistent() const { return Header.isMD5UsageConsistent(); }
 
diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h
index f226d6a45a5a..2d441fdeee28 100644
--- a/include/llvm/MC/MCELFObjectWriter.h
+++ b/include/llvm/MC/MCELFObjectWriter.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCELFObjectWriter.h - ELF Object Writer ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -54,13 +53,14 @@ struct ELFRelocationEntry {
 
 class MCELFObjectTargetWriter : public MCObjectTargetWriter {
   const uint8_t OSABI;
+  const uint8_t ABIVersion;
   const uint16_t EMachine;
   const unsigned HasRelocationAddend : 1;
   const unsigned Is64Bit : 1;
 
 protected:
   MCELFObjectTargetWriter(bool Is64Bit_, uint8_t OSABI_, uint16_t EMachine_,
-                          bool HasRelocationAddend);
+                          bool HasRelocationAddend_, uint8_t ABIVersion_ = 0);
 
 public:
   virtual ~MCELFObjectTargetWriter() = default;
@@ -98,6 +98,7 @@ public:
   /// \name Accessors
   /// @{
   uint8_t getOSABI() const { return OSABI; }
+  uint8_t getABIVersion() const { return ABIVersion; }
   uint16_t getEMachine() const { return EMachine; }
   bool hasRelocationAddend() const { return HasRelocationAddend; }
   bool is64Bit() const { return Is64Bit; }
diff --git a/include/llvm/MC/MCELFStreamer.h b/include/llvm/MC/MCELFStreamer.h
index 3797079661e4..8838d53d75b5 100644
--- a/include/llvm/MC/MCELFStreamer.h
+++ b/include/llvm/MC/MCELFStreamer.h
@@ -1,9 +1,8 @@
 //===- MCELFStreamer.h - MCStreamer ELF Object File Interface ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index 8cb6b86fd672..fb23c0114c76 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -1,9 +1,8 @@
 //===- MCExpr.h - Assembly Level Expressions --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -135,15 +134,21 @@ inline raw_ostream &operator<<(raw_ostream &OS, const MCExpr &E) {
 ////  Represent a constant integer expression.
 class MCConstantExpr : public MCExpr {
   int64_t Value;
+  bool PrintInHex = false;
 
-  explicit MCConstantExpr(int64_t Value)
+  MCConstantExpr(int64_t Value)
       : MCExpr(MCExpr::Constant, SMLoc()), Value(Value) {}
 
+  MCConstantExpr(int64_t Value, bool PrintInHex)
+      : MCExpr(MCExpr::Constant, SMLoc()), Value(Value),
+        PrintInHex(PrintInHex) {}
+
 public:
   /// \name Construction
   /// @{
 
-  static const MCConstantExpr *create(int64_t Value, MCContext &Ctx);
+  static const MCConstantExpr *create(int64_t Value, MCContext &Ctx,
+                                      bool PrintInHex = false);
 
   /// @}
   /// \name Accessors
@@ -151,6 +156,8 @@ public:
 
   int64_t getValue() const { return Value; }
 
+  bool useHexFormat() const { return PrintInHex; }
+
   /// @}
 
   static bool classof(const MCExpr *E) {
@@ -285,16 +292,17 @@ public:
     VK_Hexagon_IE,
     VK_Hexagon_IE_GOT,
 
-    VK_WebAssembly_FUNCTION, // Function table index, rather than virtual addr
-    VK_WebAssembly_GLOBAL,   // Global object index
-    VK_WebAssembly_TYPEINDEX,// Type table index
-    VK_WebAssembly_EVENT,    // Event index
+    VK_WASM_TYPEINDEX, // Reference to a symbol's type (signature)
+    VK_WASM_MBREL,     // Memory address relative to memory base
+    VK_WASM_TBREL,     // Table index relative to table bare
 
     VK_AMDGPU_GOTPCREL32_LO, // symbol@gotpcrel32@lo
     VK_AMDGPU_GOTPCREL32_HI, // symbol@gotpcrel32@hi
     VK_AMDGPU_REL32_LO,      // symbol@rel32@lo
     VK_AMDGPU_REL32_HI,      // symbol@rel32@hi
     VK_AMDGPU_REL64,         // symbol@rel64
+    VK_AMDGPU_ABS32_LO,      // symbol@abs32@lo
+    VK_AMDGPU_ABS32_HI,      // symbol@abs32@hi
 
     VK_TPREL,
     VK_DTPREL
diff --git a/include/llvm/MC/MCFixedLenDisassembler.h b/include/llvm/MC/MCFixedLenDisassembler.h
index ad34d9494bb9..218ae0d13189 100644
--- a/include/llvm/MC/MCFixedLenDisassembler.h
+++ b/include/llvm/MC/MCFixedLenDisassembler.h
@@ -1,9 +1,8 @@
 //===-- llvm/MC/MCFixedLenDisassembler.h - Decoder driver -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Fixed length disassembler decoder state machine driver.
diff --git a/include/llvm/MC/MCFixup.h b/include/llvm/MC/MCFixup.h
index 5f301eafc556..accffb7f2247 100644
--- a/include/llvm/MC/MCFixup.h
+++ b/include/llvm/MC/MCFixup.h
@@ -1,9 +1,8 @@
 //===-- llvm/MC/MCFixup.h - Instruction Relocation and Patching -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -21,7 +20,8 @@ class MCExpr;
 
 /// Extensible enumeration to represent the type of a fixup.
 enum MCFixupKind {
-  FK_Data_1 = 0, ///< A one-byte fixup.
+  FK_NONE = 0,   ///< A no-op fixup.
+  FK_Data_1,     ///< A one-byte fixup.
   FK_Data_2,     ///< A two-byte fixup.
   FK_Data_4,     ///< A four-byte fixup.
   FK_Data_8,     ///< A eight-byte fixup.
diff --git a/include/llvm/MC/MCFixupKindInfo.h b/include/llvm/MC/MCFixupKindInfo.h
index 483abb39403f..0ea34866db6a 100644
--- a/include/llvm/MC/MCFixupKindInfo.h
+++ b/include/llvm/MC/MCFixupKindInfo.h
@@ -1,9 +1,8 @@
 //===-- llvm/MC/MCFixupKindInfo.h - Fixup Descriptors -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCFragment.h b/include/llvm/MC/MCFragment.h
index c999c9fc4f17..aadf2ce725ea 100644
--- a/include/llvm/MC/MCFragment.h
+++ b/include/llvm/MC/MCFragment.h
@@ -1,9 +1,8 @@
 //===- MCFragment.h - Fragment type hierarchy -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h
index d501b686bb2e..8df8096bba94 100644
--- a/include/llvm/MC/MCInst.h
+++ b/include/llvm/MC/MCInst.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCInst.h - MCInst class --------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -154,8 +153,6 @@ public:
   bool evaluateAsConstantImm(int64_t &Imm) const;
 };
 
-template <> struct isPodLike<MCOperand> { static const bool value = true; };
-
 /// Instances of this class represent a single low-level machine
 /// instruction.
 class MCInst {
@@ -190,6 +187,7 @@ public:
 
   void clear() { Operands.clear(); }
   void erase(iterator I) { Operands.erase(I); }
+  void erase(iterator First, iterator Last) { Operands.erase(First, Last); }
   size_t size() const { return Operands.size(); }
   iterator begin() { return Operands.begin(); }
   const_iterator begin() const { return Operands.begin(); }
diff --git a/include/llvm/MC/MCInstBuilder.h b/include/llvm/MC/MCInstBuilder.h
index c5c4f481e7df..0c8e01fdc412 100644
--- a/include/llvm/MC/MCInstBuilder.h
+++ b/include/llvm/MC/MCInstBuilder.h
@@ -1,9 +1,8 @@
 //===-- llvm/MC/MCInstBuilder.h - Simplify creation of MCInsts --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/MC/MCInstPrinter.h b/include/llvm/MC/MCInstPrinter.h
index df221e1db0e7..6bbc4bc2903b 100644
--- a/include/llvm/MC/MCInstPrinter.h
+++ b/include/llvm/MC/MCInstPrinter.h
@@ -1,9 +1,8 @@
 //===- MCInstPrinter.h - MCInst to target assembly syntax -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -65,6 +64,10 @@ public:
 
   virtual ~MCInstPrinter();
 
+  /// Customize the printer according to a command line option.
+  /// @return true if the option is recognized and applied.
+  virtual bool applyTargetSpecificCLOption(StringRef Opt) { return false; }
+
   /// Specify a stream to emit comments to.
   void setCommentStream(raw_ostream &OS) { CommentStream = &OS; }
 
diff --git a/include/llvm/MC/MCInstrAnalysis.h b/include/llvm/MC/MCInstrAnalysis.h
index 200f10f7d64b..dfefd7e72777 100644
--- a/include/llvm/MC/MCInstrAnalysis.h
+++ b/include/llvm/MC/MCInstrAnalysis.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCInstrAnalysis.h - InstrDesc target hooks -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/MC/MCInstrDesc.h b/include/llvm/MC/MCInstrDesc.h
index 61e7d09afbcb..0aa586dfc901 100644
--- a/include/llvm/MC/MCInstrDesc.h
+++ b/include/llvm/MC/MCInstrDesc.h
@@ -1,9 +1,8 @@
 //===-- llvm/MC/MCInstrDesc.h - Instruction Descriptors -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -135,6 +134,7 @@ enum Flag {
   FoldableAsLoad,
   MayLoad,
   MayStore,
+  MayRaiseFPException,
   Predicable,
   NotDuplicable,
   UnmodeledSideEffects,
@@ -404,6 +404,11 @@ public:
   /// may not actually modify anything, for example.
   bool mayStore() const { return Flags & (1ULL << MCID::MayStore); }
 
+  /// Return true if this instruction may raise a floating-point exception.
+  bool mayRaiseFPException() const {
+    return Flags & (1ULL << MCID::MayRaiseFPException);
+  }
+
   /// Return true if this instruction has side
   /// effects that are not modeled by other flags.  This does not return true
   /// for instructions whose effects are captured by:
diff --git a/include/llvm/MC/MCInstrInfo.h b/include/llvm/MC/MCInstrInfo.h
index 18da87cf8929..874b1e46795b 100644
--- a/include/llvm/MC/MCInstrInfo.h
+++ b/include/llvm/MC/MCInstrInfo.h
@@ -1,9 +1,8 @@
 //===-- llvm/MC/MCInstrInfo.h - Target Instruction Info ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/MC/MCInstrItineraries.h b/include/llvm/MC/MCInstrItineraries.h
index fe81376e0db7..485aa663272e 100644
--- a/include/llvm/MC/MCInstrItineraries.h
+++ b/include/llvm/MC/MCInstrItineraries.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCInstrItineraries.h - Scheduling ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/MC/MCLabel.h b/include/llvm/MC/MCLabel.h
index aaf70691fc01..0b8afac8f754 100644
--- a/include/llvm/MC/MCLabel.h
+++ b/include/llvm/MC/MCLabel.h
@@ -1,9 +1,8 @@
 //===- MCLabel.h - Machine Code Directional Local Labels --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/MC/MCLinkerOptimizationHint.h b/include/llvm/MC/MCLinkerOptimizationHint.h
index f0fd07f43cf3..f2a1364ad884 100644
--- a/include/llvm/MC/MCLinkerOptimizationHint.h
+++ b/include/llvm/MC/MCLinkerOptimizationHint.h
@@ -1,10 +1,9 @@
 //===- MCLinkerOptimizationHint.h - LOH interface ---------------*- C++ -*-===//
 //
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/MC/MCMachObjectWriter.h b/include/llvm/MC/MCMachObjectWriter.h
index 22fbeb72a4ec..278aebee99ac 100644
--- a/include/llvm/MC/MCMachObjectWriter.h
+++ b/include/llvm/MC/MCMachObjectWriter.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCMachObjectWriter.h - Mach Object Writer --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h
index f8142ccd8ac5..abc87bf27748 100644
--- a/include/llvm/MC/MCObjectFileInfo.h
+++ b/include/llvm/MC/MCObjectFileInfo.h
@@ -1,9 +1,8 @@
 //===-- llvm/MC/MCObjectFileInfo.h - Object File Info -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -160,6 +159,9 @@ protected:
   /// FaultMap section.
   MCSection *FaultMapSection;
 
+  /// Remarks section.
+  MCSection *RemarksSection;
+
   /// EH frame section.
   ///
   /// It is initialized on demand so it can be overwritten (with uniquing).
@@ -315,6 +317,7 @@ public:
 
   MCSection *getStackMapSection() const { return StackMapSection; }
   MCSection *getFaultMapSection() const { return FaultMapSection; }
+  MCSection *getRemarksSection() const { return RemarksSection; }
 
   MCSection *getStackSizesSection(const MCSection &TextSec) const;
 
@@ -381,7 +384,7 @@ public:
     return EHFrameSection;
   }
 
-  enum Environment { IsMachO, IsELF, IsCOFF, IsWasm };
+  enum Environment { IsMachO, IsELF, IsCOFF, IsWasm, IsXCOFF };
   Environment getObjectFileType() const { return Env; }
 
   bool isPositionIndependent() const { return PositionIndependent; }
@@ -397,6 +400,7 @@ private:
   void initELFMCObjectFileInfo(const Triple &T, bool Large);
   void initCOFFMCObjectFileInfo(const Triple &T);
   void initWasmMCObjectFileInfo(const Triple &T);
+  void initXCOFFMCObjectFileInfo(const Triple &T);
   MCSection *getDwarfComdatSection(const char *Name, uint64_t Hash) const;
 
 public:
diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h
index 892909656c15..8affca49490f 100644
--- a/include/llvm/MC/MCObjectStreamer.h
+++ b/include/llvm/MC/MCObjectStreamer.h
@@ -1,9 +1,8 @@
 //===- MCObjectStreamer.h - MCStreamer Object File Interface ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -116,8 +115,7 @@ public:
   void EmitSLEB128Value(const MCExpr *Value) override;
   void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override;
   void ChangeSection(MCSection *Section, const MCExpr *Subsection) override;
-  void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
-                       bool = false) override;
+  void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
 
   /// Emit an instruction to a special fragment, because this instruction
   /// can change its size during relaxation.
diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h
index 8bae2bf20083..2547b2b7c9c1 100644
--- a/include/llvm/MC/MCObjectWriter.h
+++ b/include/llvm/MC/MCObjectWriter.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCObjectWriter.h - Object File Writer Interface --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCParser/AsmCond.h b/include/llvm/MC/MCParser/AsmCond.h
index a6e0fbd7f337..ea2155010081 100644
--- a/include/llvm/MC/MCParser/AsmCond.h
+++ b/include/llvm/MC/MCParser/AsmCond.h
@@ -1,9 +1,8 @@
 //===- AsmCond.h - Assembly file conditional assembly  ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCParser/AsmLexer.h b/include/llvm/MC/MCParser/AsmLexer.h
index 2e9b8dfa3b26..b7294493b2f8 100644
--- a/include/llvm/MC/MCParser/AsmLexer.h
+++ b/include/llvm/MC/MCParser/AsmLexer.h
@@ -1,9 +1,8 @@
 //===- AsmLexer.h - Lexer for Assembly Files --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h
index ea13d1cdc09f..e89abeaac94c 100644
--- a/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h
index b80289878e6e..da5653ee71d3 100644
--- a/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/include/llvm/MC/MCParser/MCAsmParser.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCAsmParser.h - Abstract Asm Parser Interface ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -130,9 +129,6 @@ protected: // Can only create subclasses.
   /// Flag tracking whether any errors have been encountered.
   bool HadError = false;
 
-  /// Enable print [latency:throughput] in output file.
-  bool EnablePrintSchedInfo = false;
-
   bool ShowParsedOperands = false;
 
 public:
@@ -166,9 +162,6 @@ public:
   bool getShowParsedOperands() const { return ShowParsedOperands; }
   void setShowParsedOperands(bool Value) { ShowParsedOperands = Value; }
 
-  void setEnablePrintSchedInfo(bool Value) { EnablePrintSchedInfo = Value; }
-  bool shouldPrintSchedInfo() const { return EnablePrintSchedInfo; }
-
   /// Run the parser on the input source buffer.
   virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false) = 0;
 
diff --git a/include/llvm/MC/MCParser/MCAsmParserExtension.h b/include/llvm/MC/MCParser/MCAsmParserExtension.h
index 1a132bceddc5..5d2afe81a54b 100644
--- a/include/llvm/MC/MCParser/MCAsmParserExtension.h
+++ b/include/llvm/MC/MCParser/MCAsmParserExtension.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCAsmParserExtension.h - Asm Parser Hooks --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCParser/MCAsmParserUtils.h b/include/llvm/MC/MCParser/MCAsmParserUtils.h
index 84173bb9cb8e..d692da7402fe 100644
--- a/include/llvm/MC/MCParser/MCAsmParserUtils.h
+++ b/include/llvm/MC/MCParser/MCAsmParserUtils.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCAsmParserUtils.h - Asm Parser Utilities --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
index 4af76ac2a858..2b6e2aa48b8f 100644
--- a/include/llvm/MC/MCParser/MCParsedAsmOperand.h
+++ b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCParsedAsmOperand.h - Asm Parser Operand --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCParser/MCTargetAsmParser.h b/include/llvm/MC/MCParser/MCTargetAsmParser.h
index ccf13a6a4fb4..849dbd57f1aa 100644
--- a/include/llvm/MC/MCParser/MCTargetAsmParser.h
+++ b/include/llvm/MC/MCParser/MCTargetAsmParser.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -17,6 +16,7 @@
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
 #include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/SMLoc.h"
 #include <cstdint>
 #include <memory>
@@ -203,7 +203,7 @@ public:
   // The instruction encoding is not valid because it requires some target
   // features that are not currently enabled. MissingFeatures has a bit set for
   // each feature that the encoding needs but which is not enabled.
-  static NearMissInfo getMissedFeature(uint64_t MissingFeatures) {
+  static NearMissInfo getMissedFeature(const FeatureBitset &MissingFeatures) {
     NearMissInfo Result;
     Result.Kind = NearMissFeature;
     Result.Features = MissingFeatures;
@@ -255,7 +255,7 @@ public:
 
   // Feature flags required by the instruction, that the current target does
   // not have.
-  uint64_t getFeatures() const {
+  const FeatureBitset& getFeatures() const {
     assert(Kind == NearMissFeature);
     return Features;
   }
@@ -305,7 +305,7 @@ private:
   };
 
   union {
-    uint64_t Features;
+    FeatureBitset Features;
     unsigned PredicateError;
     MissedOpInfo MissedOperand;
     TooFewOperandsInfo TooFewOperands;
@@ -335,7 +335,7 @@ protected: // Can only create subclasses.
   MCSubtargetInfo &copySTI();
 
   /// AvailableFeatures - The current set of available features.
-  uint64_t AvailableFeatures = 0;
+  FeatureBitset AvailableFeatures;
 
   /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
   bool ParsingInlineAsm = false;
@@ -360,8 +360,12 @@ public:
 
   const MCSubtargetInfo &getSTI() const;
 
-  uint64_t getAvailableFeatures() const { return AvailableFeatures; }
-  void setAvailableFeatures(uint64_t Value) { AvailableFeatures = Value; }
+  const FeatureBitset& getAvailableFeatures() const {
+    return AvailableFeatures;
+  }
+  void setAvailableFeatures(const FeatureBitset& Value) {
+    AvailableFeatures = Value;
+  }
 
   bool isParsingInlineAsm () { return ParsingInlineAsm; }
   void setParsingInlineAsm (bool Value) { ParsingInlineAsm = Value; }
@@ -380,9 +384,6 @@ public:
   virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
                              SMLoc &EndLoc) = 0;
 
-  /// Sets frame register corresponding to the current MachineFunction.
-  virtual void SetFrameRegister(unsigned RegNo) {}
-
   /// ParseInstruction - Parse one assembly instruction.
   ///
   /// The parser is positioned following the instruction name. The target
diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h
index 8d8c677c77ea..92d39c3fcfb7 100644
--- a/include/llvm/MC/MCRegisterInfo.h
+++ b/include/llvm/MC/MCRegisterInfo.h
@@ -1,9 +1,8 @@
 //===- MC/MCRegisterInfo.h - Target Register Description --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/MC/MCSchedule.h b/include/llvm/MC/MCSchedule.h
index 689ac73cbdd1..df3248ee6e86 100644
--- a/include/llvm/MC/MCSchedule.h
+++ b/include/llvm/MC/MCSchedule.h
@@ -1,9 +1,8 @@
 //===-- llvm/MC/MCSchedule.h - Scheduling -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,6 +14,7 @@
 #ifndef LLVM_MC_MCSCHEDULE_H
 #define LLVM_MC_MCSCHEDULE_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/DataTypes.h"
@@ -370,6 +370,11 @@ struct MCSchedModel {
   getReciprocalThroughput(const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
                           const MCInst &Inst) const;
 
+  /// Returns the maximum forwarding delay for register reads dependent on
+  /// writes of scheduling class WriteResourceIdx.
+  static unsigned getForwardingDelayCycles(ArrayRef<MCReadAdvanceEntry> Entries,
+                                           unsigned WriteResourceIdx = 0);
+
   /// Returns the default initialized model.
   static const MCSchedModel &GetDefaultSchedModel() { return Default; }
   static const MCSchedModel Default;
diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h
index eb210b4e9dfa..6fad1ec2069c 100644
--- a/include/llvm/MC/MCSection.h
+++ b/include/llvm/MC/MCSection.h
@@ -1,9 +1,8 @@
 //===- MCSection.h - Machine Code Sections ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -38,7 +37,7 @@ template <> struct ilist_alloc_traits<MCFragment> {
 /// current translation unit.  The MCContext class uniques and creates these.
 class MCSection {
 public:
-  enum SectionVariant { SV_COFF = 0, SV_ELF, SV_MachO, SV_Wasm };
+  enum SectionVariant { SV_COFF = 0, SV_ELF, SV_MachO, SV_Wasm, SV_XCOFF };
 
   /// Express the state of bundle locked groups while emitting code.
   enum BundleLockStateType {
diff --git a/include/llvm/MC/MCSectionCOFF.h b/include/llvm/MC/MCSectionCOFF.h
index 24b9f8898ebb..8be95e0f1de5 100644
--- a/include/llvm/MC/MCSectionCOFF.h
+++ b/include/llvm/MC/MCSectionCOFF.h
@@ -1,9 +1,8 @@
 //===- MCSectionCOFF.h - COFF Machine Code Sections -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -63,8 +62,6 @@ private:
   }
 
 public:
-  ~MCSectionCOFF();
-
   /// Decides whether a '.section' directive should be printed before the
   /// section name
   bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
diff --git a/include/llvm/MC/MCSectionELF.h b/include/llvm/MC/MCSectionELF.h
index 00c289c6bd6e..fe6b2d7afc79 100644
--- a/include/llvm/MC/MCSectionELF.h
+++ b/include/llvm/MC/MCSectionELF.h
@@ -1,9 +1,8 @@
 //===- MCSectionELF.h - ELF Machine Code Sections ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -64,8 +63,6 @@ private:
   void setSectionName(StringRef Name) { SectionName = Name; }
 
 public:
-  ~MCSectionELF();
-
   /// Decides whether a '.section' directive should be printed before the
   /// section name
   bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h
index 89db09cbdbdc..2c73661fb1fd 100644
--- a/include/llvm/MC/MCSectionMachO.h
+++ b/include/llvm/MC/MCSectionMachO.h
@@ -1,9 +1,8 @@
 //===- MCSectionMachO.h - MachO Machine Code Sections -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/MC/MCSectionWasm.h b/include/llvm/MC/MCSectionWasm.h
index ab4cd7b007ec..2941a40f3b8c 100644
--- a/include/llvm/MC/MCSectionWasm.h
+++ b/include/llvm/MC/MCSectionWasm.h
@@ -1,9 +1,8 @@
 //===- MCSectionWasm.h - Wasm Machine Code Sections -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -43,20 +42,19 @@ class MCSectionWasm final : public MCSection {
   // segment
   uint32_t SegmentIndex = 0;
 
+  // Whether this data segment is passive
+  bool IsPassive = false;
+
   friend class MCContext;
   MCSectionWasm(StringRef Section, SectionKind K, const MCSymbolWasm *group,
                 unsigned UniqueID, MCSymbol *Begin)
       : MCSection(SV_Wasm, K, Begin), SectionName(Section), UniqueID(UniqueID),
         Group(group) {}
 
-  void setSectionName(StringRef Name) { SectionName = Name; }
-
 public:
-  ~MCSectionWasm();
-
   /// Decides whether a '.section' directive should be printed before the
   /// section name
-  bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
+  bool shouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
 
   StringRef getSectionName() const { return SectionName; }
   const MCSymbolWasm *getGroup() const { return Group; }
@@ -68,7 +66,8 @@ public:
   bool isVirtualSection() const override;
 
   bool isWasmData() const {
-    return Kind.isGlobalWriteableData() || Kind.isReadOnly();
+    return Kind.isGlobalWriteableData() || Kind.isReadOnly() ||
+           Kind.isThreadLocal();
   }
 
   bool isUnique() const { return UniqueID != ~0U; }
@@ -80,6 +79,14 @@ public:
   uint32_t getSegmentIndex() const { return SegmentIndex; }
   void setSegmentIndex(uint32_t Index) { SegmentIndex = Index; }
 
+  bool getPassive() const {
+    assert(isWasmData());
+    return IsPassive;
+  }
+  void setPassive(bool V = true) {
+    assert(isWasmData());
+    IsPassive = V;
+  }
   static bool classof(const MCSection *S) { return S->getVariant() == SV_Wasm; }
 };
 
diff --git a/include/llvm/MC/MCSectionXCOFF.h b/include/llvm/MC/MCSectionXCOFF.h
new file mode 100644
index 000000000000..2a3f391fd3e2
--- /dev/null
+++ b/include/llvm/MC/MCSectionXCOFF.h
@@ -0,0 +1,56 @@
+//===- MCSectionXCOFF.h - XCOFF Machine Code Sections -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MCSectionXCOFF class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCSECTIONXCOFF_H
+#define LLVM_MC_MCSECTIONXCOFF_H
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/XCOFF.h"
+#include "llvm/MC/MCSection.h"
+
+namespace llvm {
+
+class MCSymbol;
+
+// This class represents an XCOFF `Control Section`, more commonly referred to
+// as a csect. A csect represents the smallest possible unit of data/code which
+// will be relocated as a single block.
+class MCSectionXCOFF final : public MCSection {
+  friend class MCContext;
+
+  StringRef Name;
+  XCOFF::StorageMappingClass MappingClass;
+
+  MCSectionXCOFF(StringRef Section, XCOFF::StorageMappingClass SMC,
+                 SectionKind K, MCSymbol *Begin)
+      : MCSection(SV_XCOFF, K, Begin), Name(Section), MappingClass(SMC) {}
+
+public:
+  ~MCSectionXCOFF();
+
+  static bool classof(const MCSection *S) {
+    return S->getVariant() == SV_XCOFF;
+  }
+
+  StringRef getSectionName() const { return Name; }
+  XCOFF::StorageMappingClass getMappingClass() const { return MappingClass; }
+
+  void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+                            raw_ostream &OS,
+                            const MCExpr *Subsection) const override;
+  bool UseCodeAlign() const override;
+  bool isVirtualSection() const override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index f613d3a1943f..731e7515448c 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -1,9 +1,8 @@
 //===- MCStreamer.h - High-level Streaming Machine Code Output --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -20,7 +19,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCDirectives.h"
-#include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCLinkerOptimizationHint.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCWinEH.h"
@@ -44,6 +42,7 @@ class MCAsmBackend;
 class MCCodeEmitter;
 struct MCCodePaddingContext;
 class MCContext;
+struct MCDwarfFrameInfo;
 class MCExpr;
 class MCInst;
 class MCInstPrinter;
@@ -267,10 +266,8 @@ public:
   /// closed. Otherwise, issue an error and return null.
   WinEH::FrameInfo *EnsureValidWinFrameInfo(SMLoc Loc);
 
-  unsigned getNumFrameInfos() { return DwarfFrameInfos.size(); }
-  ArrayRef<MCDwarfFrameInfo> getDwarfFrameInfos() const {
-    return DwarfFrameInfos;
-  }
+  unsigned getNumFrameInfos();
+  ArrayRef<MCDwarfFrameInfo> getDwarfFrameInfos() const;
 
   bool hasUnfinishedDwarfFrameInfo();
 
@@ -629,13 +626,20 @@ public:
   /// to pass in a MCExpr for constant integers.
   virtual void EmitIntValue(uint64_t Value, unsigned Size);
 
+  /// Special case of EmitValue that avoids the client having to pass
+  /// in a MCExpr for constant integers & prints in Hex format for certain
+  /// modes.
+  virtual void EmitIntValueInHex(uint64_t Value, unsigned Size) {
+    EmitIntValue(Value, Size);
+  }
+
   virtual void EmitULEB128Value(const MCExpr *Value);
 
   virtual void EmitSLEB128Value(const MCExpr *Value);
 
   /// Special case of EmitULEB128Value that avoids the client having to
   /// pass in a MCExpr for constant integers.
-  void EmitULEB128IntValue(uint64_t Value);
+  void EmitULEB128IntValue(uint64_t Value, unsigned PadTo = 0);
 
   /// Special case of EmitSLEB128Value that avoids the client having to
   /// pass in a MCExpr for constant integers.
@@ -782,7 +786,7 @@ public:
   /// implements the DWARF2 '.file 4 "foo.c"' assembler directive.
   unsigned EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
                                   StringRef Filename,
-                                  MD5::MD5Result *Checksum = nullptr,
+                                  Optional<MD5::MD5Result> Checksum = None,
                                   Optional<StringRef> Source = None,
                                   unsigned CUID = 0) {
     return cantFail(
@@ -797,12 +801,12 @@ public:
   /// '.file 4 "dir/foo.c" md5 "..." source "..."' assembler directive.
   virtual Expected<unsigned> tryEmitDwarfFileDirective(
       unsigned FileNo, StringRef Directory, StringRef Filename,
-      MD5::MD5Result *Checksum = nullptr, Optional<StringRef> Source = None,
+      Optional<MD5::MD5Result> Checksum = None, Optional<StringRef> Source = None,
       unsigned CUID = 0);
 
   /// Specify the "root" file of the compilation, using the ".file 0" extension.
   virtual void emitDwarfFile0Directive(StringRef Directory, StringRef Filename,
-                                       MD5::MD5Result *Checksum,
+                                       Optional<MD5::MD5Result> Checksum,
                                        Optional<StringRef> Source,
                                        unsigned CUID = 0);
 
@@ -953,9 +957,7 @@ public:
   virtual void EmitAddrsigSym(const MCSymbol *Sym) {}
 
   /// Emit the given \p Instruction into the current section.
-  /// PrintSchedInfo == true then schedul comment should be added to output
-  virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
-                               bool PrintSchedInfo = false);
+  virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI);
 
   /// Set the bundle alignment mode from now on in the section.
   /// The argument is the power of 2 to which the alignment is set. The
diff --git a/include/llvm/MC/MCSubtargetInfo.h b/include/llvm/MC/MCSubtargetInfo.h
index b3ce523d9c0c..9490a6ecedad 100644
--- a/include/llvm/MC/MCSubtargetInfo.h
+++ b/include/llvm/MC/MCSubtargetInfo.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCSubtargetInfo.h - Subtarget Information --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -29,6 +28,45 @@ namespace llvm {
 
 class MCInst;
 
+//===----------------------------------------------------------------------===//
+
+/// Used to provide key value pairs for feature and CPU bit flags.
+struct SubtargetFeatureKV {
+  const char *Key;                      ///< K-V key string
+  const char *Desc;                     ///< Help descriptor
+  unsigned Value;                       ///< K-V integer value
+  FeatureBitArray Implies;              ///< K-V bit mask
+
+  /// Compare routine for std::lower_bound
+  bool operator<(StringRef S) const {
+    return StringRef(Key) < S;
+  }
+
+  /// Compare routine for std::is_sorted.
+  bool operator<(const SubtargetFeatureKV &Other) const {
+    return StringRef(Key) < StringRef(Other.Key);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+
+/// Used to provide key value pairs for feature and CPU bit flags.
+struct SubtargetSubTypeKV {
+  const char *Key;                      ///< K-V key string
+  FeatureBitArray Implies;              ///< K-V bit mask
+  const MCSchedModel *SchedModel;
+
+  /// Compare routine for std::lower_bound
+  bool operator<(StringRef S) const {
+    return StringRef(Key) < S;
+  }
+
+  /// Compare routine for std::is_sorted.
+  bool operator<(const SubtargetSubTypeKV &Other) const {
+    return StringRef(Key) < StringRef(Other.Key);
+  }
+};
+
 //===----------------------------------------------------------------------===//
 ///
 /// Generic base class for all target subtargets.
@@ -37,10 +75,9 @@ class MCSubtargetInfo {
   Triple TargetTriple;
   std::string CPU; // CPU being targeted.
   ArrayRef<SubtargetFeatureKV> ProcFeatures;  // Processor feature list
-  ArrayRef<SubtargetFeatureKV> ProcDesc;  // Processor descriptions
+  ArrayRef<SubtargetSubTypeKV> ProcDesc;  // Processor descriptions
 
   // Scheduler machine model
-  const SubtargetInfoKV *ProcSchedModels;
   const MCWriteProcResEntry *WriteProcResTable;
   const MCWriteLatencyEntry *WriteLatencyTable;
   const MCReadAdvanceEntry *ReadAdvanceTable;
@@ -55,8 +92,7 @@ public:
   MCSubtargetInfo(const MCSubtargetInfo &) = default;
   MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS,
                   ArrayRef<SubtargetFeatureKV> PF,
-                  ArrayRef<SubtargetFeatureKV> PD,
-                  const SubtargetInfoKV *ProcSched,
+                  ArrayRef<SubtargetSubTypeKV> PD,
                   const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL,
                   const MCReadAdvanceEntry *RA, const InstrStage *IS,
                   const unsigned *OC, const unsigned *FP);
@@ -105,6 +141,10 @@ public:
   /// all feature bits implied by the flag.
   FeatureBitset ApplyFeatureFlag(StringRef FS);
 
+  /// Set/clear additional feature bits, including all other bits they imply.
+  FeatureBitset SetFeatureBitsTransitively(const FeatureBitset& FB);
+  FeatureBitset ClearFeatureBitsTransitively(const FeatureBitset &FB);
+
   /// Check whether the subtarget features are enabled/disabled as per
   /// the provided string, ignoring all other features.
   bool checkFeatures(StringRef FS) const;
@@ -153,6 +193,16 @@ public:
     return 0;
   }
 
+  /// Return the set of ReadAdvance entries declared by the scheduling class
+  /// descriptor in input.
+  ArrayRef<MCReadAdvanceEntry>
+  getReadAdvanceEntries(const MCSchedClassDesc &SC) const {
+    if (!SC.NumReadAdvanceEntries)
+      return ArrayRef<MCReadAdvanceEntry>();
+    return ArrayRef<MCReadAdvanceEntry>(&ReadAdvanceTable[SC.ReadAdvanceIdx],
+                                        SC.NumReadAdvanceEntries);
+  }
+
   /// Get scheduling itinerary of a CPU.
   InstrItineraryData getInstrItineraryForCPU(StringRef CPU) const;
 
@@ -171,11 +221,6 @@ public:
     auto Found = std::lower_bound(ProcDesc.begin(), ProcDesc.end(), CPU);
     return Found != ProcDesc.end() && StringRef(Found->Key) == CPU;
   }
-
-  /// Returns string representation of scheduler comment
-  virtual std::string getSchedInfoStr(MCInst const &MCI) const {
-    return {};
-  }
 };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h
index 4681a1be60c4..189484deac7e 100644
--- a/include/llvm/MC/MCSymbol.h
+++ b/include/llvm/MC/MCSymbol.h
@@ -1,9 +1,8 @@
 //===- MCSymbol.h - Machine Code Symbols ------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -49,6 +48,7 @@ protected:
     SymbolKindELF,
     SymbolKindMachO,
     SymbolKindWasm,
+    SymbolKindXCOFF,
   };
 
   /// A symbol can contain an Offset, or Value, or be Common, but never more
@@ -58,6 +58,7 @@ protected:
     SymContentsOffset,
     SymContentsVariable,
     SymContentsCommon,
+    SymContentsTargetCommon, // Index stores the section index
   };
 
   // Special sentinal value for the absolute pseudo fragment.
@@ -108,7 +109,7 @@ protected:
 
   /// This is actually a Contents enumerator, but is unsigned to avoid sign
   /// extension and achieve better bitpacking with MSVC.
-  unsigned SymbolContents : 2;
+  unsigned SymbolContents : 3;
 
   /// The alignment of the symbol, if it is 'common', or -1.
   ///
@@ -286,6 +287,8 @@ public:
 
   bool isWasm() const { return Kind == SymbolKindWasm; }
 
+  bool isXCOFF() const { return Kind == SymbolKindXCOFF; }
+
   /// @}
   /// \name Variable Symbols
   /// @{
@@ -342,10 +345,11 @@ public:
   ///
   /// \param Size - The size of the symbol.
   /// \param Align - The alignment of the symbol.
-  void setCommon(uint64_t Size, unsigned Align) {
+  /// \param Target - Is the symbol a target-specific common-like symbol.
+  void setCommon(uint64_t Size, unsigned Align, bool Target = false) {
     assert(getOffset() == 0);
     CommonSize = Size;
-    SymbolContents = SymContentsCommon;
+    SymbolContents = Target ? SymContentsTargetCommon : SymContentsCommon;
 
     assert((!Align || isPowerOf2_32(Align)) &&
            "Alignment must be a power of 2");
@@ -365,20 +369,28 @@ public:
   ///
   /// \param Size - The size of the symbol.
   /// \param Align - The alignment of the symbol.
+  /// \param Target - Is the symbol a target-specific common-like symbol.
   /// \return True if symbol was already declared as a different type
-  bool declareCommon(uint64_t Size, unsigned Align) {
+  bool declareCommon(uint64_t Size, unsigned Align, bool Target = false) {
     assert(isCommon() || getOffset() == 0);
     if(isCommon()) {
-      if(CommonSize != Size || getCommonAlignment() != Align)
-       return true;
+      if (CommonSize != Size || getCommonAlignment() != Align ||
+          isTargetCommon() != Target)
+        return true;
     } else
-      setCommon(Size, Align);
+      setCommon(Size, Align, Target);
     return false;
   }
 
   /// Is this a 'common' symbol.
   bool isCommon() const {
-    return SymbolContents == SymContentsCommon;
+    return SymbolContents == SymContentsCommon ||
+           SymbolContents == SymContentsTargetCommon;
+  }
+
+  /// Is this a target-specific common-like symbol.
+  bool isTargetCommon() const {
+    return SymbolContents == SymContentsTargetCommon;
   }
 
   MCFragment *getFragment(bool SetUsed = true) const {
diff --git a/include/llvm/MC/MCSymbolCOFF.h b/include/llvm/MC/MCSymbolCOFF.h
index 7918c353dc15..94087ce871ae 100644
--- a/include/llvm/MC/MCSymbolCOFF.h
+++ b/include/llvm/MC/MCSymbolCOFF.h
@@ -1,9 +1,8 @@
 //===- MCSymbolCOFF.h -  ----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCSymbolELF.h b/include/llvm/MC/MCSymbolELF.h
index bbcd22e8e7db..34e5c4344aff 100644
--- a/include/llvm/MC/MCSymbolELF.h
+++ b/include/llvm/MC/MCSymbolELF.h
@@ -1,9 +1,8 @@
 //===- MCSymbolELF.h -  -----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_MC_MCSYMBOLELF_H
diff --git a/include/llvm/MC/MCSymbolMachO.h b/include/llvm/MC/MCSymbolMachO.h
index 6125c2050976..8f9ff56470a7 100644
--- a/include/llvm/MC/MCSymbolMachO.h
+++ b/include/llvm/MC/MCSymbolMachO.h
@@ -1,9 +1,8 @@
 //===- MCSymbolMachO.h -  ---------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_MC_MCSYMBOLMACHO_H
@@ -35,6 +34,7 @@ class MCSymbolMachO : public MCSymbol {
     SF_WeakDefinition                       = 0x0080,
     SF_SymbolResolver                       = 0x0100,
     SF_AltEntry                             = 0x0200,
+    SF_Cold                                 = 0x0400,
 
     // Common alignment
     SF_CommonAlignmentMask                  = 0xF0FF,
@@ -98,6 +98,10 @@ public:
     return getFlags() & SF_AltEntry;
   }
 
+  void setCold() const { modifyFlags(SF_Cold, SF_Cold); }
+
+  bool isCold() const { return getFlags() & SF_Cold; }
+
   void setDesc(unsigned Value) const {
     assert(Value == (Value & SF_DescFlagsMask) &&
            "Invalid .desc value!");
diff --git a/include/llvm/MC/MCSymbolWasm.h b/include/llvm/MC/MCSymbolWasm.h
index 8e66dc881d0f..c50cd0ee4709 100644
--- a/include/llvm/MC/MCSymbolWasm.h
+++ b/include/llvm/MC/MCSymbolWasm.h
@@ -1,9 +1,8 @@
 //===- MCSymbolWasm.h -  ----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_MC_MCSYMBOLWASM_H
@@ -19,7 +18,9 @@ class MCSymbolWasm : public MCSymbol {
   bool IsWeak = false;
   bool IsHidden = false;
   bool IsComdat = false;
-  std::string ModuleName;
+  mutable bool IsUsedInGOT = false;
+  Optional<std::string> ImportModule;
+  Optional<std::string> ImportName;
   wasm::WasmSignature *Signature = nullptr;
   Optional<wasm::WasmGlobalType> GlobalType;
   Optional<wasm::WasmEventType> EventType;
@@ -32,7 +33,7 @@ public:
   // Use a module name of "env" for now, for compatibility with existing tools.
   // This is temporary, and may change, as the ABI is not yet stable.
   MCSymbolWasm(const StringMapEntry<bool> *Name, bool isTemporary)
-      : MCSymbol(SymbolKindWasm, Name, isTemporary), ModuleName("env") {}
+      : MCSymbol(SymbolKindWasm, Name, isTemporary) {}
   static bool classof(const MCSymbol *S) { return S->isWasm(); }
 
   const MCExpr *getSize() const { return SymbolSize; }
@@ -46,6 +47,13 @@ public:
   wasm::WasmSymbolType getType() const { return Type; }
   void setType(wasm::WasmSymbolType type) { Type = type; }
 
+  bool isExported() const {
+    return getFlags() & wasm::WASM_SYMBOL_EXPORTED;
+  }
+  void setExported() const {
+    modifyFlags(wasm::WASM_SYMBOL_EXPORTED, wasm::WASM_SYMBOL_EXPORTED);
+  }
+
   bool isWeak() const { return IsWeak; }
   void setWeak(bool isWeak) { IsWeak = isWeak; }
 
@@ -55,8 +63,24 @@ public:
   bool isComdat() const { return IsComdat; }
   void setComdat(bool isComdat) { IsComdat = isComdat; }
 
-  const StringRef getModuleName() const { return ModuleName; }
-  void setModuleName(StringRef Name) { ModuleName = Name; }
+  const StringRef getImportModule() const {
+      if (ImportModule.hasValue()) {
+          return ImportModule.getValue();
+      }
+      return "env";
+  }
+  void setImportModule(StringRef Name) { ImportModule = Name; }
+
+  const StringRef getImportName() const {
+      if (ImportName.hasValue()) {
+          return ImportName.getValue();
+      }
+      return getName();
+  }
+  void setImportName(StringRef Name) { ImportName = Name; }
+
+  void setUsedInGOT() const { IsUsedInGOT = true; }
+  bool isUsedInGOT() const { return IsUsedInGOT; }
 
   const wasm::WasmSignature *getSignature() const { return Signature; }
   void setSignature(wasm::WasmSignature *Sig) { Signature = Sig; }
diff --git a/include/llvm/MC/MCSymbolXCOFF.h b/include/llvm/MC/MCSymbolXCOFF.h
new file mode 100644
index 000000000000..0a1fe1475138
--- /dev/null
+++ b/include/llvm/MC/MCSymbolXCOFF.h
@@ -0,0 +1,26 @@
+//===- MCSymbolXCOFF.h -  ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_MC_MCSYMBOLXCOFF_H
+#define LLVM_MC_MCSYMBOLXCOFF_H
+
+#include "llvm/BinaryFormat/XCOFF.h"
+#include "llvm/MC/MCSymbol.h"
+
+namespace llvm {
+
+class MCSymbolXCOFF : public MCSymbol {
+public:
+  MCSymbolXCOFF(const StringMapEntry<bool> *Name, bool isTemporary)
+      : MCSymbol(SymbolKindXCOFF, Name, isTemporary) {}
+
+  static bool classof(const MCSymbol *S) { return S->isXCOFF(); }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_MC_MCSYMBOLXCOFF_H
diff --git a/include/llvm/MC/MCTargetOptions.h b/include/llvm/MC/MCTargetOptions.h
index f5d330fbeb22..f184620ff047 100644
--- a/include/llvm/MC/MCTargetOptions.h
+++ b/include/llvm/MC/MCTargetOptions.h
@@ -1,9 +1,8 @@
 //===- MCTargetOptions.h - MC Target Options --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -16,18 +15,18 @@
 namespace llvm {
 
 enum class ExceptionHandling {
-  None,     /// No exception support
-  DwarfCFI, /// DWARF-like instruction based exceptions
-  SjLj,     /// setjmp/longjmp based exceptions
-  ARM,      /// ARM EHABI
-  WinEH,    /// Windows Exception Handling
-  Wasm,     /// WebAssembly Exception Handling
+  None,     ///< No exception support
+  DwarfCFI, ///< DWARF-like instruction based exceptions
+  SjLj,     ///< setjmp/longjmp based exceptions
+  ARM,      ///< ARM EHABI
+  WinEH,    ///< Windows Exception Handling
+  Wasm,     ///< WebAssembly Exception Handling
 };
 
 enum class DebugCompressionType {
-  None, /// No compression
-  GNU,  /// zlib-gnu style compression
-  Z,    /// zlib style complession
+  None, ///< No compression
+  GNU,  ///< zlib-gnu style compression
+  Z,    ///< zlib style complession
 };
 
 class StringRef;
@@ -39,9 +38,6 @@ public:
     AsmInstrumentationAddress
   };
 
-  /// Enables AddressSanitizer instrumentation at machine level.
-  bool SanitizeAddress : 1;
-
   bool MCRelaxAll : 1;
   bool MCNoExecStack : 1;
   bool MCFatalWarnings : 1;
diff --git a/include/llvm/MC/MCTargetOptionsCommandFlags.inc b/include/llvm/MC/MCTargetOptionsCommandFlags.inc
index 5172fa44511f..9f1177f470b9 100644
--- a/include/llvm/MC/MCTargetOptionsCommandFlags.inc
+++ b/include/llvm/MC/MCTargetOptionsCommandFlags.inc
@@ -1,9 +1,8 @@
 //===-- MCTargetOptionsCommandFlags.h --------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,15 +18,6 @@
 #include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
-static cl::opt<MCTargetOptions::AsmInstrumentation> AsmInstrumentation(
-    "asm-instrumentation", cl::desc("Instrumentation of inline assembly and "
-                                    "assembly source files"),
-    cl::init(MCTargetOptions::AsmInstrumentationNone),
-    cl::values(clEnumValN(MCTargetOptions::AsmInstrumentationNone, "none",
-                          "no instrumentation at all"),
-               clEnumValN(MCTargetOptions::AsmInstrumentationAddress, "address",
-                          "instrument instructions with memory arguments")));
-
 static cl::opt<bool> RelaxAll("mc-relax-all",
                        cl::desc("When used with filetype=obj, "
                                 "relax all fixups in the emitted object file"));
@@ -63,8 +53,6 @@ ABIName("target-abi", cl::Hidden,
 
 static MCTargetOptions InitMCTargetOptionsFromFlags() {
   MCTargetOptions Options;
-  Options.SanitizeAddress =
-      (AsmInstrumentation == MCTargetOptions::AsmInstrumentationAddress);
   Options.MCRelaxAll = RelaxAll;
   Options.MCIncrementalLinkerCompatible = IncrementalLinkerCompatible;
   Options.MCPIECopyRelocations = PIECopyRelocations;
diff --git a/include/llvm/MC/MCValue.h b/include/llvm/MC/MCValue.h
index 11f5082ed3f4..0be7ce7055c5 100644
--- a/include/llvm/MC/MCValue.h
+++ b/include/llvm/MC/MCValue.h
@@ -1,9 +1,8 @@
 //===-- llvm/MC/MCValue.h - MCValue class -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/MC/MCWasmObjectWriter.h b/include/llvm/MC/MCWasmObjectWriter.h
index 6b788cfe96b9..4adbca28f116 100644
--- a/include/llvm/MC/MCWasmObjectWriter.h
+++ b/include/llvm/MC/MCWasmObjectWriter.h
@@ -1,9 +1,8 @@
 //===-- llvm/MC/MCWasmObjectWriter.h - Wasm Object Writer -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCWasmStreamer.h b/include/llvm/MC/MCWasmStreamer.h
index 01e6a4379287..2d7f2b9975c9 100644
--- a/include/llvm/MC/MCWasmStreamer.h
+++ b/include/llvm/MC/MCWasmStreamer.h
@@ -1,9 +1,8 @@
 //===- MCWasmStreamer.h - MCStreamer Wasm Object File Interface -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCWin64EH.h b/include/llvm/MC/MCWin64EH.h
index 1a9f6f403d7c..60ec06e61b7c 100644
--- a/include/llvm/MC/MCWin64EH.h
+++ b/include/llvm/MC/MCWin64EH.h
@@ -1,9 +1,8 @@
 //===- MCWin64EH.h - Machine Code Win64 EH support --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/MC/MCWinCOFFObjectWriter.h b/include/llvm/MC/MCWinCOFFObjectWriter.h
index c1d35ea1f6ba..3fe124fd7f1c 100644
--- a/include/llvm/MC/MCWinCOFFObjectWriter.h
+++ b/include/llvm/MC/MCWinCOFFObjectWriter.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCWinCOFFObjectWriter.h - Win COFF Object Writer -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCWinCOFFStreamer.h b/include/llvm/MC/MCWinCOFFStreamer.h
index 0049d04b4b3f..c1c1ec56cb48 100644
--- a/include/llvm/MC/MCWinCOFFStreamer.h
+++ b/include/llvm/MC/MCWinCOFFStreamer.h
@@ -1,9 +1,8 @@
 //===- MCWinCOFFStreamer.h - COFF Object File Interface ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCWinEH.h b/include/llvm/MC/MCWinEH.h
index 98ef0367a11d..b1c28c0ecae7 100644
--- a/include/llvm/MC/MCWinEH.h
+++ b/include/llvm/MC/MCWinEH.h
@@ -1,9 +1,8 @@
 //===- MCWinEH.h - Windows Unwinding Support --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/MCXCOFFObjectWriter.h b/include/llvm/MC/MCXCOFFObjectWriter.h
new file mode 100644
index 000000000000..fe4087f70614
--- /dev/null
+++ b/include/llvm/MC/MCXCOFFObjectWriter.h
@@ -0,0 +1,41 @@
+//===-- llvm/MC/MCXCOFFObjectWriter.h - XCOFF Object Writer ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCXCOFFOBJECTWRITER_H
+#define LLVM_MC_MCXCOFFOBJECTWRITER_H
+
+#include "llvm/MC/MCObjectWriter.h"
+
+namespace llvm {
+
+class raw_pwrite_stream;
+
+class MCXCOFFObjectTargetWriter : public MCObjectTargetWriter {
+protected:
+  MCXCOFFObjectTargetWriter(bool Is64Bit);
+
+public:
+  ~MCXCOFFObjectTargetWriter() override;
+
+  Triple::ObjectFormatType getFormat() const override { return Triple::XCOFF; }
+  static bool classof(const MCObjectTargetWriter *W) {
+    return W->getFormat() == Triple::XCOFF;
+  }
+  bool is64Bit() const { return Is64Bit; }
+
+private:
+  bool Is64Bit;
+};
+
+std::unique_ptr<MCObjectWriter>
+createXCOFFObjectWriter(std::unique_ptr<MCXCOFFObjectTargetWriter> MOTW,
+                        raw_pwrite_stream &OS);
+
+} // end namespace llvm
+
+#endif // LLVM_MC_MCXCOFFOBJECTWRITER_H
diff --git a/include/llvm/MC/MCXCOFFStreamer.h b/include/llvm/MC/MCXCOFFStreamer.h
new file mode 100644
index 000000000000..159ae4818749
--- /dev/null
+++ b/include/llvm/MC/MCXCOFFStreamer.h
@@ -0,0 +1,33 @@
+//===- MCXCOFFObjectStreamer.h - MCStreamer XCOFF Object File Interface ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCXCOFFSTREAMER_H
+#define LLVM_MC_MCXCOFFSTREAMER_H
+
+#include "llvm/MC/MCObjectStreamer.h"
+
+namespace llvm {
+
+class MCXCOFFStreamer : public MCObjectStreamer {
+public:
+  MCXCOFFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
+                  std::unique_ptr<MCObjectWriter> OW,
+                  std::unique_ptr<MCCodeEmitter> Emitter);
+
+  bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
+  void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                        unsigned ByteAlignment) override;
+  void EmitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr,
+                    uint64_t Size = 0, unsigned ByteAlignment = 0,
+                    SMLoc Loc = SMLoc()) override;
+  void EmitInstToData(const MCInst &Inst, const MCSubtargetInfo &) override;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_MC_MCXCOFFSTREAMER_H
diff --git a/include/llvm/MC/MachineLocation.h b/include/llvm/MC/MachineLocation.h
index 91ed661ebeab..5872540e6104 100644
--- a/include/llvm/MC/MachineLocation.h
+++ b/include/llvm/MC/MachineLocation.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/MachineLocation.h --------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // The MachineLocation class is used to represent a simple location in a machine
diff --git a/include/llvm/MC/SectionKind.h b/include/llvm/MC/SectionKind.h
index 66eb9ec56d14..0342c4cfbbde 100644
--- a/include/llvm/MC/SectionKind.h
+++ b/include/llvm/MC/SectionKind.h
@@ -1,9 +1,8 @@
 //===-- llvm/MC/SectionKind.h - Classification of sections ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/StringTableBuilder.h b/include/llvm/MC/StringTableBuilder.h
index 265260fcee4d..c83eca4e512d 100644
--- a/include/llvm/MC/StringTableBuilder.h
+++ b/include/llvm/MC/StringTableBuilder.h
@@ -1,9 +1,8 @@
 //===- StringTableBuilder.h - String table building utility -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/MC/SubtargetFeature.h b/include/llvm/MC/SubtargetFeature.h
index 76c7dd560800..fc9565ceafad 100644
--- a/include/llvm/MC/SubtargetFeature.h
+++ b/include/llvm/MC/SubtargetFeature.h
@@ -1,9 +1,8 @@
 //===- llvm/MC/SubtargetFeature.h - CPU characteristics ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,6 +18,7 @@
 #define LLVM_MC_SUBTARGETFEATURE_H
 
 #include "llvm/ADT/StringRef.h"
+#include <array>
 #include <bitset>
 #include <initializer_list>
 #include <string>
@@ -26,11 +26,12 @@
 
 namespace llvm {
 
-template <typename T> class ArrayRef;
 class raw_ostream;
 class Triple;
 
-const unsigned MAX_SUBTARGET_FEATURES = 192;
+const unsigned MAX_SUBTARGET_WORDS = 3;
+const unsigned MAX_SUBTARGET_FEATURES = MAX_SUBTARGET_WORDS * 64;
+
 /// Container class for subtarget features.
 /// This is convenient because std::bitset does not have a constructor
 /// with an initializer list of set bits.
@@ -45,38 +46,34 @@ public:
     for (auto I : Init)
       set(I);
   }
-};
-
-//===----------------------------------------------------------------------===//
-
-/// Used to provide key value pairs for feature and CPU bit flags.
-struct SubtargetFeatureKV {
-  const char *Key;                      ///< K-V key string
-  const char *Desc;                     ///< Help descriptor
-  FeatureBitset Value;                  ///< K-V integer value
-  FeatureBitset Implies;                ///< K-V bit mask
 
-  /// Compare routine for std::lower_bound
-  bool operator<(StringRef S) const {
-    return StringRef(Key) < S;
-  }
-
-  /// Compare routine for std::is_sorted.
-  bool operator<(const SubtargetFeatureKV &Other) const {
-    return StringRef(Key) < StringRef(Other.Key);
+  bool operator < (const FeatureBitset &Other) const {
+    for (unsigned I = 0, E = size(); I != E; ++I) {
+      bool LHS = test(I), RHS = Other.test(I);
+      if (LHS != RHS)
+        return LHS < RHS;
+    }
+    return false;
   }
 };
 
-//===----------------------------------------------------------------------===//
+/// Class used to store the subtarget bits in the tables created by tablegen.
+/// The std::initializer_list constructor of FeatureBitset can't be done at
+/// compile time and requires a static constructor to run at startup.
+class FeatureBitArray {
+  std::array<uint64_t, MAX_SUBTARGET_WORDS> Bits;
+
+public:
+  constexpr FeatureBitArray(const std::array<uint64_t, MAX_SUBTARGET_WORDS> &B)
+      : Bits(B) {}
 
-/// Used to provide key value pairs for CPU and arbitrary pointers.
-struct SubtargetInfoKV {
-  const char *Key;                      ///< K-V key string
-  const void *Value;                    ///< K-V pointer value
+  FeatureBitset getAsBitset() const {
+    FeatureBitset Result;
 
-  /// Compare routine for std::lower_bound
-  bool operator<(StringRef S) const {
-    return StringRef(Key) < S;
+    for (unsigned i = 0, e = Bits.size(); i != e; ++i)
+      Result |= FeatureBitset(Bits[i]) << (64 * i);
+
+    return Result;
   }
 };
 
@@ -102,19 +99,6 @@ public:
   /// Adds Features.
   void AddFeature(StringRef String, bool Enable = true);
 
-  /// Toggles a feature and update the feature bits.
-  static void ToggleFeature(FeatureBitset &Bits, StringRef String,
-                            ArrayRef<SubtargetFeatureKV> FeatureTable);
-
-  /// Applies the feature flag and update the feature bits.
-  static void ApplyFeatureFlag(FeatureBitset &Bits, StringRef Feature,
-                               ArrayRef<SubtargetFeatureKV> FeatureTable);
-
-  /// Returns feature bits of a CPU.
-  FeatureBitset getFeatureBits(StringRef CPU,
-                               ArrayRef<SubtargetFeatureKV> CPUTable,
-                               ArrayRef<SubtargetFeatureKV> FeatureTable);
-
   /// Returns the vector of individual subtarget features.
   const std::vector<std::string> &getFeatures() const { return Features; }
 
@@ -126,6 +110,32 @@ public:
 
   /// Adds the default features for the specified target triple.
   void getDefaultSubtargetFeatures(const Triple& Triple);
+
+  /// Determine if a feature has a flag; '+' or '-'
+  static bool hasFlag(StringRef Feature) {
+    assert(!Feature.empty() && "Empty string");
+    // Get first character
+    char Ch = Feature[0];
+    // Check if first character is '+' or '-' flag
+    return Ch == '+' || Ch =='-';
+  }
+
+  /// Return string stripped of flag.
+  static std::string StripFlag(StringRef Feature) {
+    return hasFlag(Feature) ? Feature.substr(1) : Feature;
+  }
+
+  /// Return true if enable flag; '+'.
+  static inline bool isEnabled(StringRef Feature) {
+    assert(!Feature.empty() && "Empty string");
+    // Get first character
+    char Ch = Feature[0];
+    // Check if first character is '+' for enabled
+    return Ch == '+';
+  }
+
+  /// Splits a string of comma separated items in to a vector of strings.
+  static void Split(std::vector<std::string> &V, StringRef S);
 };
 
 } // end namespace llvm
diff --git a/include/llvm/MCA/Context.h b/include/llvm/MCA/Context.h
index 6b2bee0fdc42..503d780d4947 100644
--- a/include/llvm/MCA/Context.h
+++ b/include/llvm/MCA/Context.h
@@ -1,9 +1,8 @@
 //===---------------------------- Context.h ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -32,15 +31,21 @@ namespace mca {
 /// This is a convenience struct to hold the parameters necessary for creating
 /// the pre-built "default" out-of-order pipeline.
 struct PipelineOptions {
-  PipelineOptions(unsigned DW, unsigned RFS, unsigned LQS, unsigned SQS,
-                  bool NoAlias)
-      : DispatchWidth(DW), RegisterFileSize(RFS), LoadQueueSize(LQS),
-        StoreQueueSize(SQS), AssumeNoAlias(NoAlias) {}
+  PipelineOptions(unsigned UOPQSize, unsigned DecThr, unsigned DW, unsigned RFS,
+                  unsigned LQS, unsigned SQS, bool NoAlias,
+                  bool ShouldEnableBottleneckAnalysis = false)
+      : MicroOpQueueSize(UOPQSize), DecodersThroughput(DecThr),
+        DispatchWidth(DW), RegisterFileSize(RFS), LoadQueueSize(LQS),
+        StoreQueueSize(SQS), AssumeNoAlias(NoAlias),
+        EnableBottleneckAnalysis(ShouldEnableBottleneckAnalysis) {}
+  unsigned MicroOpQueueSize;
+  unsigned DecodersThroughput; // Instructions per cycle.
   unsigned DispatchWidth;
   unsigned RegisterFileSize;
   unsigned LoadQueueSize;
   unsigned StoreQueueSize;
   bool AssumeNoAlias;
+  bool EnableBottleneckAnalysis;
 };
 
 class Context {
diff --git a/include/llvm/MCA/HWEventListener.h b/include/llvm/MCA/HWEventListener.h
index 3b32b2cd6577..e11d06de2b2e 100644
--- a/include/llvm/MCA/HWEventListener.h
+++ b/include/llvm/MCA/HWEventListener.h
@@ -1,9 +1,8 @@
 //===----------------------- HWEventListener.h ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -40,6 +39,7 @@ public:
     // Events generated by the Retire Control Unit.
     Retired,
     // Events generated by the Scheduler.
+    Pending,
     Ready,
     Issued,
     Executed,
@@ -126,6 +126,35 @@ public:
   const InstRef &IR;
 };
 
+// A HWPressureEvent describes an increase in backend pressure caused by
+// the presence of data dependencies or unavailability of pipeline resources.
+class HWPressureEvent {
+public:
+  enum GenericReason {
+    INVALID = 0,
+    // Scheduler was unable to issue all the ready instructions because some
+    // pipeline resources were unavailable.
+    RESOURCES,
+    // Instructions could not be issued because of register data dependencies.
+    REGISTER_DEPS,
+    // Instructions could not be issued because of memory dependencies.
+    MEMORY_DEPS
+  };
+
+  HWPressureEvent(GenericReason reason, ArrayRef<InstRef> Insts,
+                  uint64_t Mask = 0)
+      : Reason(reason), AffectedInstructions(Insts), ResourceMask(Mask) {}
+
+  // Reason for this increase in backend pressure.
+  GenericReason Reason;
+
+  // Instructions affected (i.e. delayed) by this increase in backend pressure.
+  ArrayRef<InstRef> AffectedInstructions;
+
+  // A mask of unavailable processor resources.
+  const uint64_t ResourceMask;
+};
+
 class HWEventListener {
 public:
   // Generic events generated by the pipeline.
@@ -134,6 +163,7 @@ public:
 
   virtual void onEvent(const HWInstructionEvent &Event) {}
   virtual void onEvent(const HWStallEvent &Event) {}
+  virtual void onEvent(const HWPressureEvent &Event) {}
 
   using ResourceRef = std::pair<uint64_t, uint64_t>;
   virtual void onResourceAvailable(const ResourceRef &RRef) {}
diff --git a/include/llvm/MCA/HardwareUnits/HardwareUnit.h b/include/llvm/MCA/HardwareUnits/HardwareUnit.h
index 104a2009f219..f6e178bcff10 100644
--- a/include/llvm/MCA/HardwareUnits/HardwareUnit.h
+++ b/include/llvm/MCA/HardwareUnits/HardwareUnit.h
@@ -1,9 +1,8 @@
 //===-------------------------- HardwareUnit.h ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/MCA/HardwareUnits/LSUnit.h b/include/llvm/MCA/HardwareUnits/LSUnit.h
index e217fc50f780..ae9a49c64855 100644
--- a/include/llvm/MCA/HardwareUnits/LSUnit.h
+++ b/include/llvm/MCA/HardwareUnits/LSUnit.h
@@ -1,9 +1,8 @@
 //===------------------------- LSUnit.h --------------------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -16,21 +15,298 @@
 #ifndef LLVM_MCA_LSUNIT_H
 #define LLVM_MCA_LSUNIT_H
 
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/MC/MCSchedule.h"
 #include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+#include "llvm/MCA/Instruction.h"
 
 namespace llvm {
 namespace mca {
 
-class InstRef;
 class Scheduler;
 
-/// A Load/Store Unit implementing a load and store queues.
+/// A node of a memory dependency graph. A MemoryGroup describes a set of
+/// instructions with same memory dependencies.
 ///
-/// This class implements a load queue and a store queue to emulate the
-/// out-of-order execution of memory operations.
-/// Each load (or store) consumes an entry in the load (or store) queue.
+/// By construction, instructions of a MemoryGroup don't depend on each other.
+/// At dispatch stage, instructions are mapped by the LSUnit to MemoryGroups.
+/// A Memory group identifier is then stored as a "token" in field
+/// Instruction::LSUTokenID of each dispatched instructions. That token is used
+/// internally by the LSUnit to track memory dependencies.
+class MemoryGroup {
+  unsigned NumPredecessors;
+  unsigned NumExecutingPredecessors;
+  unsigned NumExecutedPredecessors;
+
+  unsigned NumInstructions;
+  unsigned NumExecuting;
+  unsigned NumExecuted;
+  SmallVector<MemoryGroup *, 4> Succ;
+
+  CriticalDependency CriticalPredecessor;
+  InstRef CriticalMemoryInstruction;
+
+  MemoryGroup(const MemoryGroup &) = delete;
+  MemoryGroup &operator=(const MemoryGroup &) = delete;
+
+public:
+  MemoryGroup()
+      : NumPredecessors(0), NumExecutingPredecessors(0),
+        NumExecutedPredecessors(0), NumInstructions(0), NumExecuting(0),
+        NumExecuted(0), CriticalPredecessor(), CriticalMemoryInstruction() {}
+  MemoryGroup(MemoryGroup &&) = default;
+
+  ArrayRef<MemoryGroup *> getSuccessors() const { return Succ; }
+  unsigned getNumSuccessors() const { return Succ.size(); }
+  unsigned getNumPredecessors() const { return NumPredecessors; }
+  unsigned getNumExecutingPredecessors() const {
+    return NumExecutingPredecessors;
+  }
+  unsigned getNumExecutedPredecessors() const {
+    return NumExecutedPredecessors;
+  }
+  unsigned getNumInstructions() const { return NumInstructions; }
+  unsigned getNumExecuting() const { return NumExecuting; }
+  unsigned getNumExecuted() const { return NumExecuted; }
+
+  const InstRef &getCriticalMemoryInstruction() const {
+    return CriticalMemoryInstruction;
+  }
+  const CriticalDependency &getCriticalPredecessor() const {
+    return CriticalPredecessor;
+  }
+
+  void addSuccessor(MemoryGroup *Group) {
+    Group->NumPredecessors++;
+    assert(!isExecuted() && "Should have been removed!");
+    if (isExecuting())
+      Group->onGroupIssued(CriticalMemoryInstruction);
+    Succ.emplace_back(Group);
+  }
+
+  bool isWaiting() const {
+    return NumPredecessors >
+           (NumExecutingPredecessors + NumExecutedPredecessors);
+  }
+  bool isPending() const {
+    return NumExecutingPredecessors &&
+           ((NumExecutedPredecessors + NumExecutingPredecessors) ==
+            NumPredecessors);
+  }
+  bool isReady() const { return NumExecutedPredecessors == NumPredecessors; }
+  bool isExecuting() const {
+    return NumExecuting && (NumExecuting == (NumInstructions - NumExecuted));
+  }
+  bool isExecuted() const { return NumInstructions == NumExecuted; }
+
+  void onGroupIssued(const InstRef &IR) {
+    assert(!isReady() && "Unexpected group-start event!");
+    NumExecutingPredecessors++;
+
+    unsigned Cycles = IR.getInstruction()->getCyclesLeft();
+    if (CriticalPredecessor.Cycles < Cycles) {
+      CriticalPredecessor.IID = IR.getSourceIndex();
+      CriticalPredecessor.Cycles = Cycles;
+    }
+  }
+
+  void onGroupExecuted() {
+    assert(!isReady() && "Inconsistent state found!");
+    NumExecutingPredecessors--;
+    NumExecutedPredecessors++;
+  }
+
+  void onInstructionIssued(const InstRef &IR) {
+    assert(!isExecuting() && "Invalid internal state!");
+    ++NumExecuting;
+
+    // update the CriticalMemDep.
+    const Instruction &IS = *IR.getInstruction();
+    if ((bool)CriticalMemoryInstruction) {
+      const Instruction &OtherIS = *CriticalMemoryInstruction.getInstruction();
+      if (OtherIS.getCyclesLeft() < IS.getCyclesLeft())
+        CriticalMemoryInstruction = IR;
+    } else {
+      CriticalMemoryInstruction = IR;
+    }
+
+    if (!isExecuting())
+      return;
+
+    // Notify successors that this group started execution.
+    for (MemoryGroup *MG : Succ)
+      MG->onGroupIssued(CriticalMemoryInstruction);
+  }
+
+  void onInstructionExecuted() {
+    assert(isReady() && !isExecuted() && "Invalid internal state!");
+    --NumExecuting;
+    ++NumExecuted;
+
+    if (!isExecuted())
+      return;
+
+    // Notify successors that this group has finished execution.
+    for (MemoryGroup *MG : Succ)
+      MG->onGroupExecuted();
+  }
+
+  void addInstruction() {
+    assert(!getNumSuccessors() && "Cannot add instructions to this group!");
+    ++NumInstructions;
+  }
+
+  void cycleEvent() {
+    if (isWaiting() && CriticalPredecessor.Cycles)
+      CriticalPredecessor.Cycles--;
+  }
+};
+
+/// Abstract base interface for LS (load/store) units in llvm-mca.
+class LSUnitBase : public HardwareUnit {
+  /// Load queue size.
+  ///
+  /// A value of zero for this field means that the load queue is unbounded.
+  /// Processor models can declare the size of a load queue via tablegen (see
+  /// the definition of tablegen class LoadQueue in
+  /// llvm/Target/TargetSchedule.td).
+  unsigned LQSize;
+
+  /// Load queue size.
+  ///
+  /// A value of zero for this field means that the store queue is unbounded.
+  /// Processor models can declare the size of a store queue via tablegen (see
+  /// the definition of tablegen class StoreQueue in
+  /// llvm/Target/TargetSchedule.td).
+  unsigned SQSize;
+
+  unsigned UsedLQEntries;
+  unsigned UsedSQEntries;
+
+  /// True if loads don't alias with stores.
+  ///
+  /// By default, the LS unit assumes that loads and stores don't alias with
+  /// eachother. If this field is set to false, then loads are always assumed to
+  /// alias with stores.
+  const bool NoAlias;
+
+  /// Used to map group identifiers to MemoryGroups.
+  DenseMap<unsigned, std::unique_ptr<MemoryGroup>> Groups;
+  unsigned NextGroupID;
+
+public:
+  LSUnitBase(const MCSchedModel &SM, unsigned LoadQueueSize,
+             unsigned StoreQueueSize, bool AssumeNoAlias);
+
+  virtual ~LSUnitBase();
+
+  /// Returns the total number of entries in the load queue.
+  unsigned getLoadQueueSize() const { return LQSize; }
+
+  /// Returns the total number of entries in the store queue.
+  unsigned getStoreQueueSize() const { return SQSize; }
+
+  unsigned getUsedLQEntries() const { return UsedLQEntries; }
+  unsigned getUsedSQEntries() const { return UsedSQEntries; }
+  unsigned assignLQSlot() { return UsedLQEntries++; }
+  unsigned assignSQSlot() { return UsedSQEntries++; }
+
+  bool assumeNoAlias() const { return NoAlias; }
+
+  enum Status {
+    LSU_AVAILABLE = 0,
+    LSU_LQUEUE_FULL, // Load Queue unavailable
+    LSU_SQUEUE_FULL  // Store Queue unavailable
+  };
+
+  /// This method checks the availability of the load/store buffers.
+  ///
+  /// Returns LSU_AVAILABLE if there are enough load/store queue entries to
+  /// accomodate instruction IR. By default, LSU_AVAILABLE is returned if IR is
+  /// not a memory operation.
+  virtual Status isAvailable(const InstRef &IR) const = 0;
+
+  /// Allocates LS resources for instruction IR.
+  ///
+  /// This method assumes that a previous call to `isAvailable(IR)` succeeded
+  /// with a LSUnitBase::Status value of LSU_AVAILABLE.
+  /// Returns the GroupID associated with this instruction. That value will be
+  /// used to set the LSUTokenID field in class Instruction.
+  virtual unsigned dispatch(const InstRef &IR) = 0;
+
+  bool isSQEmpty() const { return !UsedSQEntries; }
+  bool isLQEmpty() const { return !UsedLQEntries; }
+  bool isSQFull() const { return SQSize && SQSize == UsedSQEntries; }
+  bool isLQFull() const { return LQSize && LQSize == UsedLQEntries; }
+
+  bool isValidGroupID(unsigned Index) const {
+    return Index && (Groups.find(Index) != Groups.end());
+  }
+
+  /// Check if a peviously dispatched instruction IR is now ready for execution.
+  bool isReady(const InstRef &IR) const {
+    unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+    const MemoryGroup &Group = getGroup(GroupID);
+    return Group.isReady();
+  }
+
+  /// Check if instruction IR only depends on memory instructions that are
+  /// currently executing.
+  bool isPending(const InstRef &IR) const {
+    unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+    const MemoryGroup &Group = getGroup(GroupID);
+    return Group.isPending();
+  }
+
+  /// Check if instruction IR is still waiting on memory operations, and the
+  /// wait time is still unknown.
+  bool isWaiting(const InstRef &IR) const {
+    unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+    const MemoryGroup &Group = getGroup(GroupID);
+    return Group.isWaiting();
+  }
+
+  bool hasDependentUsers(const InstRef &IR) const {
+    unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+    const MemoryGroup &Group = getGroup(GroupID);
+    return !Group.isExecuted() && Group.getNumSuccessors();
+  }
+
+  const MemoryGroup &getGroup(unsigned Index) const {
+    assert(isValidGroupID(Index) && "Group doesn't exist!");
+    return *Groups.find(Index)->second;
+  }
+
+  MemoryGroup &getGroup(unsigned Index) {
+    assert(isValidGroupID(Index) && "Group doesn't exist!");
+    return *Groups.find(Index)->second;
+  }
+
+  unsigned createMemoryGroup() {
+    Groups.insert(
+        std::make_pair(NextGroupID, llvm::make_unique<MemoryGroup>()));
+    return NextGroupID++;
+  }
+
+  // Instruction executed event handlers.
+  virtual void onInstructionExecuted(const InstRef &IR);
+
+  virtual void onInstructionIssued(const InstRef &IR) {
+    unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+    Groups[GroupID]->onInstructionIssued(IR);
+  }
+
+  virtual void cycleEvent();
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
+
+/// Default Load/Store Unit (LS Unit) for simulated processors.
+///
+/// Each load (or store) consumes one entry in the load (or store) queue.
 ///
 /// Rules are:
 /// 1) A younger load is allowed to pass an older load only if there are no
@@ -89,26 +365,7 @@ class Scheduler;
 /// A load/store barrier is "executed" when it becomes the oldest entry in
 /// the load/store queue(s). That also means, all the older loads/stores have
 /// already been executed.
-class LSUnit : public HardwareUnit {
-  // Load queue size.
-  // LQ_Size == 0 means that there are infinite slots in the load queue.
-  unsigned LQ_Size;
-
-  // Store queue size.
-  // SQ_Size == 0 means that there are infinite slots in the store queue.
-  unsigned SQ_Size;
-
-  // If true, loads will never alias with stores. This is the default.
-  bool NoAlias;
-
-  // When a `MayLoad` instruction is dispatched to the schedulers for execution,
-  // the LSUnit reserves an entry in the `LoadQueue` for it.
-  //
-  // LoadQueue keeps track of all the loads that are in-flight. A load
-  // instruction is eventually removed from the LoadQueue when it reaches
-  // completion stage. That means, a load leaves the queue whe it is 'executed',
-  // and its value can be forwarded on the data path to outside units.
-  //
+class LSUnit : public LSUnitBase {
   // This class doesn't know about the latency of a load instruction. So, it
   // conservatively/pessimistically assumes that the latency of a load opcode
   // matches the instruction latency.
@@ -139,66 +396,50 @@ class LSUnit : public HardwareUnit {
   // alternative approaches that let instructions specify the number of
   // load/store queue entries which they consume at dispatch stage (See
   // PR39830).
-  SmallSet<unsigned, 16> LoadQueue;
-  SmallSet<unsigned, 16> StoreQueue;
-
-  void assignLQSlot(unsigned Index);
-  void assignSQSlot(unsigned Index);
-  bool isReadyNoAlias(unsigned Index) const;
-
+  //
   // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is
   // conservatively treated as a store barrier. It forces older store to be
   // executed before newer stores are issued.
-  SmallSet<unsigned, 8> StoreBarriers;
-
+  //
   // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is
   // conservatively treated as a load barrier. It forces older loads to execute
   // before newer loads are issued.
-  SmallSet<unsigned, 8> LoadBarriers;
-
-  bool isSQEmpty() const { return StoreQueue.empty(); }
-  bool isLQEmpty() const { return LoadQueue.empty(); }
-  bool isSQFull() const { return SQ_Size != 0 && StoreQueue.size() == SQ_Size; }
-  bool isLQFull() const { return LQ_Size != 0 && LoadQueue.size() == LQ_Size; }
+  unsigned CurrentLoadGroupID;
+  unsigned CurrentLoadBarrierGroupID;
+  unsigned CurrentStoreGroupID;
 
 public:
-  LSUnit(const MCSchedModel &SM, unsigned LQ = 0, unsigned SQ = 0,
-         bool AssumeNoAlias = false);
+  LSUnit(const MCSchedModel &SM)
+      : LSUnit(SM, /* LQSize */ 0, /* SQSize */ 0, /* NoAlias */ false) {}
+  LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ)
+      : LSUnit(SM, LQ, SQ, /* NoAlias */ false) {}
+  LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, bool AssumeNoAlias)
+      : LSUnitBase(SM, LQ, SQ, AssumeNoAlias), CurrentLoadGroupID(0),
+        CurrentLoadBarrierGroupID(0), CurrentStoreGroupID(0) {}
 
-#ifndef NDEBUG
-  void dump() const;
-#endif
+  /// Returns LSU_AVAILABLE if there are enough load/store queue entries to
+  /// accomodate instruction IR.
+  Status isAvailable(const InstRef &IR) const override;
 
-  enum Status { LSU_AVAILABLE = 0, LSU_LQUEUE_FULL, LSU_SQUEUE_FULL };
+  /// Allocates LS resources for instruction IR.
+  ///
+  /// This method assumes that a previous call to `isAvailable(IR)` succeeded
+  /// returning LSU_AVAILABLE.
+  ///
+  /// Rules are:
+  /// By default, rules are:
+  /// 1. A store may not pass a previous store.
+  /// 2. A load may not pass a previous store unless flag 'NoAlias' is set.
+  /// 3. A load may pass a previous load.
+  /// 4. A store may not pass a previous load (regardless of flag 'NoAlias').
+  /// 5. A load has to wait until an older load barrier is fully executed.
+  /// 6. A store has to wait until an older store barrier is fully executed.
+  unsigned dispatch(const InstRef &IR) override;
 
-  // Returns LSU_AVAILABLE if there are enough load/store queue entries to serve
-  // IR. It also returns LSU_AVAILABLE if IR is not a memory operation.
-  Status isAvailable(const InstRef &IR) const;
-
-  // Allocates load/store queue resources for IR.
-  //
-  // This method assumes that a previous call to `isAvailable(IR)` returned
-  // LSU_AVAILABLE, and that IR is a memory operation.
-  void dispatch(const InstRef &IR);
-
-  // By default, rules are:
-  // 1. A store may not pass a previous store.
-  // 2. A load may not pass a previous store unless flag 'NoAlias' is set.
-  // 3. A load may pass a previous load.
-  // 4. A store may not pass a previous load (regardless of flag 'NoAlias').
-  // 5. A load has to wait until an older load barrier is fully executed.
-  // 6. A store has to wait until an older store barrier is fully executed.
-  virtual bool isReady(const InstRef &IR) const;
-
-  // Load and store instructions are tracked by their corresponding queues from
-  // dispatch until the "instruction executed" event.
-  // Only when a load instruction reaches the 'Executed' stage, its value
-  // becomes available to the users. At that point, the load no longer needs to
-  // be tracked by the load queue.
   // FIXME: For simplicity, we optimistically assume a similar behavior for
   // store instructions. In practice, store operations don't tend to leave the
   // store queue until they reach the 'Retired' stage (See PR39830).
-  void onInstructionExecuted(const InstRef &IR);
+  void onInstructionExecuted(const InstRef &IR) override;
 };
 
 } // namespace mca
diff --git a/include/llvm/MCA/HardwareUnits/RegisterFile.h b/include/llvm/MCA/HardwareUnits/RegisterFile.h
index c23ab0389234..36506327bd29 100644
--- a/include/llvm/MCA/HardwareUnits/RegisterFile.h
+++ b/include/llvm/MCA/HardwareUnits/RegisterFile.h
@@ -1,9 +1,8 @@
 //===--------------------- RegisterFile.h -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -21,6 +20,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSchedule.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MCA/HardwareUnits/HardwareUnit.h"
 #include "llvm/Support/Error.h"
 
@@ -196,7 +196,7 @@ public:
 
   // Collect writes that are in a data dependency with RS, and update RS
   // internal state.
-  void addRegisterRead(ReadState &RS, SmallVectorImpl<WriteRef> &Writes) const;
+  void addRegisterRead(ReadState &RS, const MCSubtargetInfo &STI) const;
 
   // Removes write \param WS from the register mappings.
   // Physical registers may be released to reflect this update.
diff --git a/include/llvm/MCA/HardwareUnits/ResourceManager.h b/include/llvm/MCA/HardwareUnits/ResourceManager.h
index 549a46c247fe..2f91185516fb 100644
--- a/include/llvm/MCA/HardwareUnits/ResourceManager.h
+++ b/include/llvm/MCA/HardwareUnits/ResourceManager.h
@@ -1,9 +1,8 @@
 //===--------------------- ResourceManager.h --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -335,13 +334,26 @@ class ResourceManager {
   // Used to quickly identify groups that own a particular resource unit.
   std::vector<uint64_t> Resource2Groups;
 
-  // A table to map processor resource IDs to processor resource masks.
+  // A table that maps processor resource IDs to processor resource masks.
   SmallVector<uint64_t, 8> ProcResID2Mask;
 
+  // A table that maps resource indices to actual processor resource IDs in the
+  // scheduling model.
+  SmallVector<unsigned, 8> ResIndex2ProcResID;
+
   // Keeps track of which resources are busy, and how many cycles are left
   // before those become usable again.
   SmallDenseMap<ResourceRef, unsigned> BusyResources;
 
+  // Set of processor resource units available on the target.
+  uint64_t ProcResUnitMask;
+
+  // Set of processor resource units that are available during this cycle.
+  uint64_t AvailableProcResUnits;
+
+  // Set of processor resource groups that are currently reserved.
+  uint64_t ReservedResourceGroups;
+
   // Returns the actual resource unit that will be used.
   ResourceRef selectPipe(uint64_t ResourceID);
 
@@ -389,7 +401,14 @@ public:
   // Release a previously reserved processor resource.
   void releaseResource(uint64_t ResourceID);
 
-  bool canBeIssued(const InstrDesc &Desc) const;
+  // Returns a zero mask if resources requested by Desc are all available during
+  // this cycle. It returns a non-zero mask value only if there are unavailable
+  // processor resources; each bit set in the mask represents a busy processor
+  // resource unit or a reserved processor resource group.
+  uint64_t checkAvailability(const InstrDesc &Desc) const;
+
+  uint64_t getProcResUnitMask() const { return ProcResUnitMask; }
+  uint64_t getAvailableProcResUnits() const { return AvailableProcResUnits; }
 
   void issueInstruction(
       const InstrDesc &Desc,
diff --git a/include/llvm/MCA/HardwareUnits/RetireControlUnit.h b/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
index 71360e984ade..06290141739e 100644
--- a/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
+++ b/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
@@ -1,9 +1,8 @@
 //===---------------------- RetireControlUnit.h -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/MCA/HardwareUnits/Scheduler.h b/include/llvm/MCA/HardwareUnits/Scheduler.h
index 351ea4827df9..27beb842dfd2 100644
--- a/include/llvm/MCA/HardwareUnits/Scheduler.h
+++ b/include/llvm/MCA/HardwareUnits/Scheduler.h
@@ -1,9 +1,8 @@
 //===--------------------- Scheduler.h ------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -68,22 +67,6 @@ public:
 /// resources. This class is also responsible for tracking the progress of
 /// instructions from the dispatch stage, until the write-back stage.
 ///
-/// An instruction dispatched to the Scheduler is initially placed into either
-/// the 'WaitSet' or the 'ReadySet' depending on the availability of the input
-/// operands.
-///
-/// An instruction is moved from the WaitSet to the ReadySet when register
-/// operands become available, and all memory dependencies are met.
-/// Instructions that are moved from the WaitSet to the ReadySet transition
-/// in state from 'IS_AVAILABLE' to 'IS_READY'.
-///
-/// On every cycle, the Scheduler checks if it can promote instructions from the
-/// WaitSet to the ReadySet.
-///
-/// An Instruction is moved from the ReadySet the `IssuedSet` when it is issued
-/// to a (one or more) pipeline(s). This event also causes an instruction state
-/// transition (i.e. from state IS_READY, to state IS_EXECUTING). An Instruction
-/// leaves the IssuedSet when it reaches the write-back stage.
 class Scheduler : public HardwareUnit {
   LSUnit &LSU;
 
@@ -93,10 +76,58 @@ class Scheduler : public HardwareUnit {
   // Hardware resources that are managed by this scheduler.
   std::unique_ptr<ResourceManager> Resources;
 
+  // Instructions dispatched to the Scheduler are internally classified based on
+  // the instruction stage (see Instruction::InstrStage).
+  //
+  // An Instruction dispatched to the Scheduler is added to the WaitSet if not
+  // all its register operands are available, and at least one latency is
+  // unknown.  By construction, the WaitSet only contains instructions that are
+  // in the IS_DISPATCHED stage.
+  //
+  // An Instruction transitions from the WaitSet to the PendingSet if the
+  // instruction is not ready yet, but the latency of every register read is
+  // known.  Instructions in the PendingSet can only be in the IS_PENDING or
+  // IS_READY stage.  Only IS_READY instructions that are waiting on memory
+  // dependencies can be added to the PendingSet.
+  //
+  // Instructions in the PendingSet are immediately dominated only by
+  // instructions that have already been issued to the underlying pipelines.  In
+  // the presence of bottlenecks caused by data dependencies, the PendingSet can
+  // be inspected to identify problematic data dependencies between
+  // instructions.
+  //
+  // An instruction is moved to the ReadySet when all register operands become
+  // available, and all memory dependencies are met.  Instructions that are
+  // moved from the PendingSet to the ReadySet must transition to the 'IS_READY'
+  // stage.
+  //
+  // On every cycle, the Scheduler checks if it can promote instructions from the
+  // PendingSet to the ReadySet.
+  //
+  // An Instruction is moved from the ReadySet to the `IssuedSet` when it starts
+  // exection. This event also causes an instruction state transition (i.e. from
+  // state IS_READY, to state IS_EXECUTING). An Instruction leaves the IssuedSet
+  // only when it reaches the write-back stage.
   std::vector<InstRef> WaitSet;
+  std::vector<InstRef> PendingSet;
   std::vector<InstRef> ReadySet;
   std::vector<InstRef> IssuedSet;
 
+  // A mask of busy resource units. It defaults to the empty set (i.e. a zero
+  // mask), and it is cleared at the beginning of every cycle.
+  // It is updated every time the scheduler fails to issue an instruction from
+  // the ready set due to unavailable pipeline resources.
+  // Each bit of the mask represents an unavailable resource.
+  uint64_t BusyResourceUnits;
+
+  // Counts the number of instructions in the pending set that were dispatched
+  // during this cycle.
+  unsigned NumDispatchedToThePendingSet;
+
+  // True if the previous pipeline Stage was unable to dispatch a full group of
+  // opcodes because scheduler buffers (or LS queues) were unavailable.
+  bool HadTokenStall;
+
   /// Verify the given selection strategy and set the Strategy member
   /// accordingly.  If no strategy is provided, the DefaultSchedulerStrategy is
   /// used.
@@ -112,9 +143,15 @@ class Scheduler : public HardwareUnit {
   // vector 'Executed'.
   void updateIssuedSet(SmallVectorImpl<InstRef> &Executed);
 
-  // Try to promote instructions from WaitSet to ReadySet.
+  // Try to promote instructions from the PendingSet to the ReadySet.
   // Add promoted instructions to the 'Ready' vector in input.
-  void promoteToReadySet(SmallVectorImpl<InstRef> &Ready);
+  // Returns true if at least one instruction was promoted.
+  bool promoteToReadySet(SmallVectorImpl<InstRef> &Ready);
+
+  // Try to promote instructions from the WaitSet to the PendingSet.
+  // Add promoted instructions to the 'Pending' vector in input.
+  // Returns true if at least one instruction was promoted.
+  bool promoteToPendingSet(SmallVectorImpl<InstRef> &Pending);
 
 public:
   Scheduler(const MCSchedModel &Model, LSUnit &Lsu)
@@ -127,7 +164,8 @@ public:
 
   Scheduler(std::unique_ptr<ResourceManager> RM, LSUnit &Lsu,
             std::unique_ptr<SchedulerStrategy> SelectStrategy)
-      : LSU(Lsu), Resources(std::move(RM)) {
+      : LSU(Lsu), Resources(std::move(RM)), BusyResourceUnits(0),
+        NumDispatchedToThePendingSet(0), HadTokenStall(false) {
     initializeStrategy(std::move(SelectStrategy));
   }
 
@@ -140,15 +178,12 @@ public:
     SC_DISPATCH_GROUP_STALL,
   };
 
-  /// Check if the instruction in 'IR' can be dispatched and returns an answer
-  /// in the form of a Status value.
+  /// Check if the instruction in 'IR' can be dispatched during this cycle.
+  /// Return SC_AVAILABLE if both scheduler and LS resources are available.
   ///
-  /// The DispatchStage is responsible for querying the Scheduler before
-  /// dispatching new instructions. This routine is used for performing such
-  /// a query.  If the instruction 'IR' can be dispatched, then true is
-  /// returned, otherwise false is returned with Event set to the stall type.
-  /// Internally, it also checks if the load/store unit is available.
-  Status isAvailable(const InstRef &IR) const;
+  /// This method is also responsible for setting field HadTokenStall if
+  /// IR cannot be dispatched to the Scheduler due to unavailable resources.
+  Status isAvailable(const InstRef &IR);
 
   /// Reserves buffer and LSUnit queue resources that are necessary to issue
   /// this instruction.
@@ -156,11 +191,11 @@ public:
   /// Returns true if instruction IR is ready to be issued to the underlying
   /// pipelines. Note that this operation cannot fail; it assumes that a
   /// previous call to method `isAvailable(IR)` returned `SC_AVAILABLE`.
-  void dispatch(const InstRef &IR);
-
-  /// Returns true if IR is ready to be executed by the underlying pipelines.
-  /// This method assumes that IR has been previously dispatched.
-  bool isReady(const InstRef &IR) const;
+  ///
+  /// If IR is a memory operation, then the Scheduler queries the LS unit to
+  /// obtain a LS token. An LS token is used internally to track memory
+  /// dependencies.
+  bool dispatch(InstRef &IR);
 
   /// Issue an instruction and populates a vector of used pipeline resources,
   /// and a vector of instructions that transitioned to the ready state as a
@@ -168,6 +203,7 @@ public:
   void issueInstruction(
       InstRef &IR,
       SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Used,
+      SmallVectorImpl<InstRef> &Pending,
       SmallVectorImpl<InstRef> &Ready);
 
   /// Returns true if IR has to be issued immediately, or if IR is a zero
@@ -181,9 +217,15 @@ public:
   /// have changed in state, and that are now available to new instructions.
   /// Instructions executed are added to vector Executed, while vector Ready is
   /// populated with instructions that have become ready in this new cycle.
+  /// Vector Pending is popluated by instructions that have transitioned through
+  /// the pending stat during this cycle. The Pending and Ready sets may not be
+  /// disjoint. An instruction is allowed to transition from the WAIT state to
+  /// the READY state (going through the PENDING state) within a single cycle.
+  /// That means, instructions may appear in both the Pending and Ready set.
   void cycleEvent(SmallVectorImpl<ResourceRef> &Freed,
-                  SmallVectorImpl<InstRef> &Ready,
-                  SmallVectorImpl<InstRef> &Executed);
+                  SmallVectorImpl<InstRef> &Executed,
+                  SmallVectorImpl<InstRef> &Pending,
+                  SmallVectorImpl<InstRef> &Ready);
 
   /// Convert a resource mask into a valid llvm processor resource identifier.
   unsigned getResourceID(uint64_t Mask) const {
@@ -195,6 +237,26 @@ public:
   /// resources are not available.
   InstRef select();
 
+  bool isReadySetEmpty() const { return ReadySet.empty(); }
+  bool isWaitSetEmpty() const { return WaitSet.empty(); }
+
+  /// This method is called by the ExecuteStage at the end of each cycle to
+  /// identify bottlenecks caused by data dependencies. Vector RegDeps is
+  /// populated by instructions that were not issued because of unsolved
+  /// register dependencies.  Vector MemDeps is populated by instructions that
+  /// were not issued because of unsolved memory dependencies.
+  void analyzeDataDependencies(SmallVectorImpl<InstRef> &RegDeps,
+                               SmallVectorImpl<InstRef> &MemDeps);
+
+  /// Returns a mask of busy resources, and populates vector Insts with
+  /// instructions that could not be issued to the underlying pipelines because
+  /// not all pipeline resources were available.
+  uint64_t analyzeResourcePressure(SmallVectorImpl<InstRef> &Insts);
+
+  // Returns true if the dispatch logic couldn't dispatch a full group due to
+  // unavailable scheduler and/or LS resources.
+  bool hadTokenStall() const { return HadTokenStall; }
+
 #ifndef NDEBUG
   // Update the ready queues.
   void dump() const;
diff --git a/include/llvm/MCA/InstrBuilder.h b/include/llvm/MCA/InstrBuilder.h
index 5f998db5e4ce..690016354f7a 100644
--- a/include/llvm/MCA/InstrBuilder.h
+++ b/include/llvm/MCA/InstrBuilder.h
@@ -1,9 +1,8 @@
 //===--------------------- InstrBuilder.h -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/MCA/Instruction.h b/include/llvm/MCA/Instruction.h
index b91610c64d85..d4d3f22797f7 100644
--- a/include/llvm/MCA/Instruction.h
+++ b/include/llvm/MCA/Instruction.h
@@ -1,9 +1,8 @@
 //===--------------------- Instruction.h ------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -81,6 +80,15 @@ struct ReadDescriptor {
 
 class ReadState;
 
+/// A critical data dependency descriptor.
+///
+/// Field RegID is set to the invalid register for memory dependencies.
+struct CriticalDependency {
+  unsigned IID;
+  unsigned RegID;
+  unsigned Cycles;
+};
+
 /// Tracks uses of a register definition (e.g. register write).
 ///
 /// Each implicit/explicit register write is associated with an instance of
@@ -124,9 +132,11 @@ class WriteState {
 
   // A partial write that is in a false dependency with this write.
   WriteState *PartialWrite;
-
   unsigned DependentWriteCyclesLeft;
 
+  // Critical register dependency for this write.
+  CriticalDependency CRD;
+
   // A list of dependent reads. Users is a set of dependent
   // reads. A dependent read is added to the set only if CyclesLeft
   // is "unknown". As soon as CyclesLeft is 'known', each user in the set
@@ -141,7 +151,7 @@ public:
       : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), PRFID(0),
         ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
         IsEliminated(false), DependentWrite(nullptr), PartialWrite(nullptr),
-        DependentWriteCyclesLeft(0) {}
+        DependentWriteCyclesLeft(0), CRD() {}
 
   WriteState(const WriteState &Other) = default;
   WriteState &operator=(const WriteState &Other) = default;
@@ -151,13 +161,21 @@ public:
   unsigned getRegisterID() const { return RegisterID; }
   unsigned getRegisterFileID() const { return PRFID; }
   unsigned getLatency() const { return WD->Latency; }
-
-  void addUser(ReadState *Use, int ReadAdvance);
-  void addUser(WriteState *Use);
-
   unsigned getDependentWriteCyclesLeft() const {
     return DependentWriteCyclesLeft;
   }
+  const WriteState *getDependentWrite() const { return DependentWrite; }
+  const CriticalDependency &getCriticalRegDep() const { return CRD; }
+
+  // This method adds Use to the set of data dependent reads. IID is the
+  // instruction identifier associated with this write. ReadAdvance is the
+  // number of cycles to subtract from the latency of this data dependency.
+  // Use is in a RAW dependency with this write.
+  void addUser(unsigned IID, ReadState *Use, int ReadAdvance);
+
+  // Use is a younger register write that is in a false dependency with this
+  // write. IID is the instruction identifier associated with this write.
+  void addUser(unsigned IID, WriteState *Use);
 
   unsigned getNumUsers() const {
     unsigned NumUsers = Users.size();
@@ -169,17 +187,20 @@ public:
   bool clearsSuperRegisters() const { return ClearsSuperRegs; }
   bool isWriteZero() const { return WritesZero; }
   bool isEliminated() const { return IsEliminated; }
-  bool isExecuted() const {
-    return CyclesLeft != UNKNOWN_CYCLES && CyclesLeft <= 0;
+
+  bool isReady() const {
+    if (DependentWrite)
+      return false;
+    unsigned CyclesLeft = getDependentWriteCyclesLeft();
+    return !CyclesLeft || CyclesLeft < getLatency();
   }
 
-  const WriteState *getDependentWrite() const { return DependentWrite; }
-  void setDependentWrite(WriteState *Other) { DependentWrite = Other; }
-  void writeStartEvent(unsigned Cycles) {
-    DependentWriteCyclesLeft = Cycles;
-    DependentWrite = nullptr;
+  bool isExecuted() const {
+    return CyclesLeft != UNKNOWN_CYCLES && CyclesLeft <= 0;
   }
 
+  void setDependentWrite(const WriteState *Other) { DependentWrite = Other; }
+  void writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles);
   void setWriteZero() { WritesZero = true; }
   void setEliminated() {
     assert(Users.empty() && "Write is in an inconsistent state.");
@@ -191,7 +212,7 @@ public:
 
   // On every cycle, update CyclesLeft and notify dependent users.
   void cycleEvent();
-  void onInstructionIssued();
+  void onInstructionIssued(unsigned IID);
 
 #ifndef NDEBUG
   void dump() const;
@@ -221,6 +242,8 @@ class ReadState {
   // dependent writes (i.e. field DependentWrite) is zero, this value is
   // propagated to field CyclesLeft.
   unsigned TotalCycles;
+  // Longest register dependency.
+  CriticalDependency CRD;
   // This field is set to true only if there are no dependent writes, and
   // there are no `CyclesLeft' to wait.
   bool IsReady;
@@ -232,14 +255,16 @@ class ReadState {
 public:
   ReadState(const ReadDescriptor &Desc, unsigned RegID)
       : RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0),
-        CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true),
+        CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), CRD(), IsReady(true),
         IsZero(false), IndependentFromDef(false) {}
 
   const ReadDescriptor &getDescriptor() const { return *RD; }
   unsigned getSchedClass() const { return RD->SchedClassID; }
   unsigned getRegisterID() const { return RegisterID; }
   unsigned getRegisterFileID() const { return PRFID; }
+  const CriticalDependency &getCriticalRegDep() const { return CRD; }
 
+  bool isPending() const { return !IndependentFromDef && CyclesLeft > 0; }
   bool isReady() const { return IsReady; }
   bool isImplicitRead() const { return RD->isImplicitRead(); }
 
@@ -247,7 +272,7 @@ public:
   void setIndependentFromDef() { IndependentFromDef = true; }
 
   void cycleEvent();
-  void writeStartEvent(unsigned Cycles);
+  void writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles);
   void setDependentWrites(unsigned Writes) {
     DependentWrites = Writes;
     IsReady = !Writes;
@@ -330,9 +355,16 @@ struct InstrDesc {
   // A list of buffered resources consumed by this instruction.
   SmallVector<uint64_t, 4> Buffers;
 
+  unsigned UsedProcResUnits;
+  unsigned UsedProcResGroups;
+
   unsigned MaxLatency;
   // Number of MicroOps for this instruction.
   unsigned NumMicroOps;
+  // SchedClassID used to construct this InstrDesc.
+  // This information is currently used by views to do fast queries on the
+  // subtarget when computing the reciprocal throughput.
+  unsigned SchedClassID;
 
   bool MayLoad;
   bool MayStore;
@@ -398,6 +430,7 @@ public:
   // Returns true if this instruction is a candidate for move elimination.
   bool isOptimizableMove() const { return IsOptimizableMove; }
   void setOptimizableMove() { IsOptimizableMove = true; }
+  bool isMemOp() const { return Desc.MayLoad || Desc.MayStore; }
 };
 
 /// An instruction propagated through the simulated instruction pipeline.
@@ -406,12 +439,13 @@ public:
 /// that are sent to the various components of the simulated hardware pipeline.
 class Instruction : public InstructionBase {
   enum InstrStage {
-    IS_INVALID,   // Instruction in an invalid state.
-    IS_AVAILABLE, // Instruction dispatched but operands are not ready.
-    IS_READY,     // Instruction dispatched and operands ready.
-    IS_EXECUTING, // Instruction issued.
-    IS_EXECUTED,  // Instruction executed. Values are written back.
-    IS_RETIRED    // Instruction retired.
+    IS_INVALID,    // Instruction in an invalid state.
+    IS_DISPATCHED, // Instruction dispatched but operands are not ready.
+    IS_PENDING,    // Instruction is not ready, but operand latency is known.
+    IS_READY,      // Instruction dispatched and operands ready.
+    IS_EXECUTING,  // Instruction issued.
+    IS_EXECUTED,   // Instruction executed. Values are written back.
+    IS_RETIRED     // Instruction retired.
   };
 
   // The current instruction stage.
@@ -424,12 +458,34 @@ class Instruction : public InstructionBase {
   // Retire Unit token ID for this instruction.
   unsigned RCUTokenID;
 
+  // LS token ID for this instruction.
+  // This field is set to the invalid null token if this is not a memory
+  // operation.
+  unsigned LSUTokenID;
+
+  // Critical register dependency.
+  CriticalDependency CriticalRegDep;
+
+  // Critical memory dependency.
+  CriticalDependency CriticalMemDep;
+
+  // A bitmask of busy processor resource units.
+  // This field is set to zero only if execution is not delayed during this
+  // cycle because of unavailable pipeline resources.
+  uint64_t CriticalResourceMask;
+
+  // True if this instruction has been optimized at register renaming stage.
+  bool IsEliminated;
+
 public:
   Instruction(const InstrDesc &D)
       : InstructionBase(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES),
-        RCUTokenID(0) {}
+        RCUTokenID(0), LSUTokenID(0), CriticalRegDep(), CriticalMemDep(),
+        CriticalResourceMask(0), IsEliminated(false) {}
 
   unsigned getRCUTokenID() const { return RCUTokenID; }
+  unsigned getLSUTokenID() const { return LSUTokenID; }
+  void setLSUTokenID(unsigned LSUTok) { LSUTokenID = LSUTok; }
   int getCyclesLeft() const { return CyclesLeft; }
 
   // Transition to the dispatch stage, and assign a RCUToken to this
@@ -438,37 +494,48 @@ public:
   void dispatch(unsigned RCUTokenID);
 
   // Instruction issued. Transition to the IS_EXECUTING state, and update
-  // all the definitions.
-  void execute();
-
-  // Force a transition from the IS_AVAILABLE state to the IS_READY state if
-  // input operands are all ready. State transitions normally occur at the
-  // beginning of a new cycle (see method cycleEvent()). However, the scheduler
-  // may decide to promote instructions from the wait queue to the ready queue
-  // as the result of another issue event.  This method is called every time the
-  // instruction might have changed in state.
+  // all the register definitions.
+  void execute(unsigned IID);
+
+  // Force a transition from the IS_DISPATCHED state to the IS_READY or
+  // IS_PENDING state. State transitions normally occur either at the beginning
+  // of a new cycle (see method cycleEvent()), or as a result of another issue
+  // event. This method is called every time the instruction might have changed
+  // in state. It internally delegates to method updateDispatched() and
+  // updateWaiting().
   void update();
+  bool updateDispatched();
+  bool updatePending();
 
-  bool isDispatched() const { return Stage == IS_AVAILABLE; }
+  bool isDispatched() const { return Stage == IS_DISPATCHED; }
+  bool isPending() const { return Stage == IS_PENDING; }
   bool isReady() const { return Stage == IS_READY; }
   bool isExecuting() const { return Stage == IS_EXECUTING; }
   bool isExecuted() const { return Stage == IS_EXECUTED; }
   bool isRetired() const { return Stage == IS_RETIRED; }
+  bool isEliminated() const { return IsEliminated; }
 
-  bool isEliminated() const {
-    return isReady() && getDefs().size() &&
-           all_of(getDefs(),
-                  [](const WriteState &W) { return W.isEliminated(); });
-  }
-
-  // Forces a transition from state IS_AVAILABLE to state IS_EXECUTED.
+  // Forces a transition from state IS_DISPATCHED to state IS_EXECUTED.
   void forceExecuted();
+  void setEliminated() { IsEliminated = true; }
 
   void retire() {
     assert(isExecuted() && "Instruction is in an invalid state!");
     Stage = IS_RETIRED;
   }
 
+  const CriticalDependency &getCriticalRegDep() const { return CriticalRegDep; }
+  const CriticalDependency &getCriticalMemDep() const { return CriticalMemDep; }
+  const CriticalDependency &computeCriticalRegDep();
+  void setCriticalMemDep(const CriticalDependency &MemDep) {
+    CriticalMemDep = MemDep;
+  }
+
+  uint64_t getCriticalResourceMask() const { return CriticalResourceMask; }
+  void setCriticalResourceMask(uint64_t ResourceMask) {
+    CriticalResourceMask = ResourceMask;
+  }
+
   void cycleEvent();
 };
 
@@ -483,13 +550,17 @@ public:
   InstRef(unsigned Index, Instruction *I) : Data(std::make_pair(Index, I)) {}
 
   bool operator==(const InstRef &Other) const { return Data == Other.Data; }
+  bool operator!=(const InstRef &Other) const { return Data != Other.Data; }
+  bool operator<(const InstRef &Other) const {
+    return Data.first < Other.Data.first;
+  }
 
   unsigned getSourceIndex() const { return Data.first; }
   Instruction *getInstruction() { return Data.second; }
   const Instruction *getInstruction() const { return Data.second; }
 
   /// Returns true if this references a valid instruction.
-  operator bool() const { return Data.second != nullptr; }
+  explicit operator bool() const { return Data.second != nullptr; }
 
   /// Invalidate this reference.
   void invalidate() { Data.second = nullptr; }
@@ -537,7 +608,7 @@ public:
     return !WS || WS->isExecuted();
   }
 
-  bool isValid() const { return Data.first != INVALID_IID && Data.second; }
+  bool isValid() const { return Data.second && Data.first != INVALID_IID; }
   bool operator==(const WriteRef &Other) const { return Data == Other.Data; }
 
 #ifndef NDEBUG
diff --git a/include/llvm/MCA/Pipeline.h b/include/llvm/MCA/Pipeline.h
index acd256060bdd..935033f67f8b 100644
--- a/include/llvm/MCA/Pipeline.h
+++ b/include/llvm/MCA/Pipeline.h
@@ -1,9 +1,8 @@
 //===--------------------- Pipeline.h ---------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/MCA/SourceMgr.h b/include/llvm/MCA/SourceMgr.h
index 5e0ca6419f5d..dbe31db1b1dd 100644
--- a/include/llvm/MCA/SourceMgr.h
+++ b/include/llvm/MCA/SourceMgr.h
@@ -1,9 +1,8 @@
 //===--------------------- SourceMgr.h --------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/MCA/Stages/DispatchStage.h b/include/llvm/MCA/Stages/DispatchStage.h
index f015cd7522eb..d80ededeaca1 100644
--- a/include/llvm/MCA/Stages/DispatchStage.h
+++ b/include/llvm/MCA/Stages/DispatchStage.h
@@ -1,9 +1,8 @@
 //===----------------------- DispatchStage.h --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -62,8 +61,6 @@ class DispatchStage final : public Stage {
   bool canDispatch(const InstRef &IR) const;
   Error dispatch(InstRef IR);
 
-  void updateRAWDependencies(ReadState &RS, const MCSubtargetInfo &STI);
-
   void notifyInstructionDispatched(const InstRef &IR,
                                    ArrayRef<unsigned> UsedPhysRegs,
                                    unsigned uOps) const;
@@ -71,9 +68,7 @@ class DispatchStage final : public Stage {
 public:
   DispatchStage(const MCSubtargetInfo &Subtarget, const MCRegisterInfo &MRI,
                 unsigned MaxDispatchWidth, RetireControlUnit &R,
-                RegisterFile &F)
-      : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
-        CarryOver(0U), CarriedOver(), STI(Subtarget), RCU(R), PRF(F) {}
+                RegisterFile &F);
 
   bool isAvailable(const InstRef &IR) const override;
 
diff --git a/include/llvm/MCA/Stages/EntryStage.h b/include/llvm/MCA/Stages/EntryStage.h
index cd9a65b8cc2b..59a2daff886e 100644
--- a/include/llvm/MCA/Stages/EntryStage.h
+++ b/include/llvm/MCA/Stages/EntryStage.h
@@ -1,9 +1,8 @@
 //===---------------------- EntryStage.h ------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/MCA/Stages/ExecuteStage.h b/include/llvm/MCA/Stages/ExecuteStage.h
index 8cb287e06d9f..03737e0220eb 100644
--- a/include/llvm/MCA/Stages/ExecuteStage.h
+++ b/include/llvm/MCA/Stages/ExecuteStage.h
@@ -1,9 +1,8 @@
 //===---------------------- ExecuteStage.h ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -29,6 +28,12 @@ namespace mca {
 class ExecuteStage final : public Stage {
   Scheduler &HWS;
 
+  unsigned NumDispatchedOpcodes;
+  unsigned NumIssuedOpcodes;
+
+  // True if this stage should notify listeners of HWPressureEvents.
+  bool EnablePressureEvents;
+
   Error issueInstruction(InstRef &IR);
 
   // Called at the beginning of each cycle to issue already dispatched
@@ -42,7 +47,10 @@ class ExecuteStage final : public Stage {
   ExecuteStage &operator=(const ExecuteStage &Other) = delete;
 
 public:
-  ExecuteStage(Scheduler &S) : Stage(), HWS(S) {}
+  ExecuteStage(Scheduler &S) : ExecuteStage(S, false) {}
+  ExecuteStage(Scheduler &S, bool ShouldPerformBottleneckAnalysis)
+      : Stage(), HWS(S), NumDispatchedOpcodes(0), NumIssuedOpcodes(0),
+        EnablePressureEvents(ShouldPerformBottleneckAnalysis) {}
 
   // This stage works under the assumption that the Pipeline will eventually
   // execute a retire stage. We don't need to check if pipelines and/or
@@ -61,12 +69,14 @@ public:
   // Instructions that transitioned to the 'Executed' state are automatically
   // moved to the next stage (i.e. RetireStage).
   Error cycleStart() override;
+  Error cycleEnd() override;
   Error execute(InstRef &IR) override;
 
   void notifyInstructionIssued(
       const InstRef &IR,
       MutableArrayRef<std::pair<ResourceRef, ResourceCycles>> Used) const;
   void notifyInstructionExecuted(const InstRef &IR) const;
+  void notifyInstructionPending(const InstRef &IR) const;
   void notifyInstructionReady(const InstRef &IR) const;
   void notifyResourceAvailable(const ResourceRef &RR) const;
 
diff --git a/include/llvm/MCA/Stages/InstructionTables.h b/include/llvm/MCA/Stages/InstructionTables.h
index 34e338f0ce6b..4b463c9b51c1 100644
--- a/include/llvm/MCA/Stages/InstructionTables.h
+++ b/include/llvm/MCA/Stages/InstructionTables.h
@@ -1,9 +1,8 @@
 //===--------------------- InstructionTables.h ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/MCA/Stages/MicroOpQueueStage.h b/include/llvm/MCA/Stages/MicroOpQueueStage.h
new file mode 100644
index 000000000000..50a5ef87b2d2
--- /dev/null
+++ b/include/llvm/MCA/Stages/MicroOpQueueStage.h
@@ -0,0 +1,88 @@
+//===---------------------- MicroOpQueueStage.h -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a stage that implements a queue of micro opcodes.
+/// It can be used to simulate a hardware micro-op queue that serves opcodes to
+/// the out of order backend.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_MICRO_OP_QUEUE_STAGE_H
+#define LLVM_MCA_MICRO_OP_QUEUE_STAGE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/Stages/Stage.h"
+
+namespace llvm {
+namespace mca {
+
+/// A stage that simulates a queue of instruction opcodes.
+class MicroOpQueueStage : public Stage {
+  SmallVector<InstRef, 8> Buffer;
+  unsigned NextAvailableSlotIdx;
+  unsigned CurrentInstructionSlotIdx;
+
+  // Limits the number of instructions that can be written to this buffer every
+  // cycle. A value of zero means that there is no limit to the instruction
+  // throughput in input.
+  const unsigned MaxIPC;
+  unsigned CurrentIPC;
+
+  // Number of entries that are available during this cycle.
+  unsigned AvailableEntries;
+
+  // True if instructions dispatched to this stage don't need to wait for the
+  // next cycle before moving to the next stage.
+  // False if this buffer acts as a one cycle delay in the execution pipeline.
+  bool IsZeroLatencyStage;
+
+  MicroOpQueueStage(const MicroOpQueueStage &Other) = delete;
+  MicroOpQueueStage &operator=(const MicroOpQueueStage &Other) = delete;
+
+  // By default, an instruction consumes a number of buffer entries equal to its
+  // number of micro opcodes (see field `InstrDesc::NumMicroOpcodes`).  The
+  // number of entries consumed by an instruction is normalized to the
+  // minimum value between NumMicroOpcodes and the buffer size. This is to avoid
+  // problems with (microcoded) instructions that generate a number of micro
+  // opcodes than doesn't fit in the buffer.
+  unsigned getNormalizedOpcodes(const InstRef &IR) const {
+    unsigned NormalizedOpcodes =
+        std::min(static_cast<unsigned>(Buffer.size()),
+                 IR.getInstruction()->getDesc().NumMicroOps);
+    return NormalizedOpcodes ? NormalizedOpcodes : 1U;
+  }
+
+  Error moveInstructions();
+
+public:
+  MicroOpQueueStage(unsigned Size, unsigned IPC = 0,
+                    bool ZeroLatencyStage = true);
+
+  bool isAvailable(const InstRef &IR) const override {
+    if (MaxIPC && CurrentIPC == MaxIPC)
+      return false;
+    unsigned NormalizedOpcodes = getNormalizedOpcodes(IR);
+    if (NormalizedOpcodes > AvailableEntries)
+      return false;
+    return true;
+  }
+
+  bool hasWorkToComplete() const override {
+    return AvailableEntries != Buffer.size();
+  }
+
+  Error execute(InstRef &IR) override;
+  Error cycleStart() override;
+  Error cycleEnd() override;
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_MICRO_OP_QUEUE_STAGE_H
diff --git a/include/llvm/MCA/Stages/RetireStage.h b/include/llvm/MCA/Stages/RetireStage.h
index 2051ce5c86ad..08c216ac7bf4 100644
--- a/include/llvm/MCA/Stages/RetireStage.h
+++ b/include/llvm/MCA/Stages/RetireStage.h
@@ -1,9 +1,8 @@
 //===---------------------- RetireStage.h -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/MCA/Stages/Stage.h b/include/llvm/MCA/Stages/Stage.h
index fc7ab569bb0f..46b242caa6cf 100644
--- a/include/llvm/MCA/Stages/Stage.h
+++ b/include/llvm/MCA/Stages/Stage.h
@@ -1,9 +1,8 @@
 //===---------------------- Stage.h -----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/MCA/Support.h b/include/llvm/MCA/Support.h
index 7b0c5bf3a486..1da097c90922 100644
--- a/include/llvm/MCA/Support.h
+++ b/include/llvm/MCA/Support.h
@@ -1,9 +1,8 @@
 //===--------------------- Support.h ----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -61,24 +60,13 @@ public:
     return (Denominator == 1) ? Numerator : (double)Numerator / Denominator;
   }
 
+  unsigned getNumerator() const { return Numerator; }
+  unsigned getDenominator() const { return Denominator; }
+
   // Add the components of RHS to this instance.  Instead of calculating
   // the final value here, we keep track of the numerator and denominator
   // separately, to reduce floating point error.
-  ResourceCycles &operator+=(const ResourceCycles &RHS) {
-    if (Denominator == RHS.Denominator)
-      Numerator += RHS.Numerator;
-    else {
-      // Create a common denominator for LHS and RHS by calculating the least
-      // common multiple from the GCD.
-      unsigned GCD = GreatestCommonDivisor64(Denominator, RHS.Denominator);
-      unsigned LCM = (Denominator * RHS.Denominator) / GCD;
-      unsigned LHSNumerator = Numerator * (LCM / Denominator);
-      unsigned RHSNumerator = RHS.Numerator * (LCM / RHS.Denominator);
-      Numerator = LHSNumerator + RHSNumerator;
-      Denominator = LCM;
-    }
-    return *this;
-  }
+  ResourceCycles &operator+=(const ResourceCycles &RHS);
 };
 
 /// Populates vector Masks with processor resource masks.
@@ -106,6 +94,13 @@ public:
 void computeProcResourceMasks(const MCSchedModel &SM,
                               MutableArrayRef<uint64_t> Masks);
 
+// Returns the index of the highest bit set. For resource masks, the position of
+// the highest bit set can be used to construct a resource mask identifier.
+inline unsigned getResourceStateIndex(uint64_t Mask) {
+  assert(Mask && "Processor Resource Mask cannot be zero!");
+  return (std::numeric_limits<uint64_t>::digits - countLeadingZeros(Mask)) - 1;
+}
+
 /// Compute the reciprocal block throughput from a set of processor resource
 /// cycles. The reciprocal block throughput is computed as the MAX between:
 ///  - NumMicroOps / DispatchWidth
diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h
index 9ef1e4875191..c40278a4f923 100644
--- a/include/llvm/Object/Archive.h
+++ b/include/llvm/Object/Archive.h
@@ -1,9 +1,8 @@
 //===- Archive.h - ar archive file format -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,6 +15,7 @@
 
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/fallible_iterator.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Support/Chrono.h"
@@ -143,44 +143,38 @@ public:
     getAsBinary(LLVMContext *Context = nullptr) const;
   };
 
-  class child_iterator {
+  class ChildFallibleIterator {
     Child C;
-    Error *E = nullptr;
 
   public:
-    child_iterator() : C(Child(nullptr, nullptr, nullptr)) {}
-    child_iterator(const Child &C, Error *E) : C(C), E(E) {}
+    ChildFallibleIterator() : C(Child(nullptr, nullptr, nullptr)) {}
+    ChildFallibleIterator(const Child &C) : C(C) {}
 
     const Child *operator->() const { return &C; }
     const Child &operator*() const { return C; }
 
-    bool operator==(const child_iterator &other) const {
+    bool operator==(const ChildFallibleIterator &other) const {
       // Ignore errors here: If an error occurred during increment then getNext
       // will have been set to child_end(), and the following comparison should
       // do the right thing.
       return C == other.C;
     }
 
-    bool operator!=(const child_iterator &other) const {
+    bool operator!=(const ChildFallibleIterator &other) const {
       return !(*this == other);
     }
 
-    // Code in loops with child_iterators must check for errors on each loop
-    // iteration.  And if there is an error break out of the loop.
-    child_iterator &operator++() { // Preincrement
-      assert(E && "Can't increment iterator with no Error attached");
-      ErrorAsOutParameter ErrAsOutParam(E);
-      if (auto ChildOrErr = C.getNext())
-        C = *ChildOrErr;
-      else {
-        C = C.getParent()->child_end().C;
-        *E = ChildOrErr.takeError();
-        E = nullptr;
-      }
-      return *this;
+    Error inc() {
+      auto NextChild = C.getNext();
+      if (!NextChild)
+        return NextChild.takeError();
+      C = std::move(*NextChild);
+      return Error::success();
     }
   };
 
+  using child_iterator = fallible_iterator<ChildFallibleIterator>;
+
   class Symbol {
     const Archive *Parent;
     uint32_t SymbolIndex;
diff --git a/include/llvm/Object/ArchiveWriter.h b/include/llvm/Object/ArchiveWriter.h
index 495b943d04c0..9e6daf2da36e 100644
--- a/include/llvm/Object/ArchiveWriter.h
+++ b/include/llvm/Object/ArchiveWriter.h
@@ -1,9 +1,8 @@
 //===- ArchiveWriter.h - ar archive file format writer ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -27,7 +26,6 @@ struct NewArchiveMember {
   sys::TimePoint<std::chrono::seconds> ModTime;
   unsigned UID = 0, GID = 0, Perms = 0644;
 
-  bool IsNew = false;
   NewArchiveMember() = default;
   NewArchiveMember(MemoryBufferRef BufRef);
 
@@ -38,6 +36,8 @@ struct NewArchiveMember {
                                             bool Deterministic);
 };
 
+Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To);
+
 Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
                    bool WriteSymtab, object::Archive::Kind Kind,
                    bool Deterministic, bool Thin,
diff --git a/include/llvm/Object/Binary.h b/include/llvm/Object/Binary.h
index 99745e24b8c8..3c3e977baff4 100644
--- a/include/llvm/Object/Binary.h
+++ b/include/llvm/Object/Binary.h
@@ -1,9 +1,8 @@
 //===- Binary.h - A generic binary file -------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,6 +13,7 @@
 #ifndef LLVM_OBJECT_BINARY_H
 #define LLVM_OBJECT_BINARY_H
 
+#include "llvm-c/Types.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Support/Error.h"
@@ -42,7 +42,9 @@ protected:
     ID_Archive,
     ID_MachOUniversalBinary,
     ID_COFFImportFile,
-    ID_IR,                 // LLVM IR
+    ID_IR, // LLVM IR
+
+    ID_Minidump,
 
     ID_WinRes, // Windows resource (.res) file.
 
@@ -50,6 +52,9 @@ protected:
     ID_StartObjects,
     ID_COFF,
 
+    ID_XCOFF32, // AIX XCOFF 32-bit
+    ID_XCOFF64, // AIX XCOFF 64-bit
+
     ID_ELF32L, // ELF 32-bit, little endian
     ID_ELF32B, // ELF 32-bit, big endian
     ID_ELF64L, // ELF 64-bit, little endian
@@ -118,6 +123,8 @@ public:
     return TypeID == ID_COFF;
   }
 
+  bool isXCOFF() const { return TypeID == ID_XCOFF32 || TypeID == ID_XCOFF64; }
+
   bool isWasm() const { return TypeID == ID_Wasm; }
 
   bool isCOFFImportFile() const {
@@ -128,6 +135,8 @@ public:
     return TypeID == ID_IR;
   }
 
+  bool isMinidump() const { return TypeID == ID_Minidump; }
+
   bool isLittleEndian() const {
     return !(TypeID == ID_ELF32B || TypeID == ID_ELF64B ||
              TypeID == ID_MachO32B || TypeID == ID_MachO64B);
@@ -156,6 +165,9 @@ public:
   }
 };
 
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_ISA_CONVERSION_FUNCTIONS(Binary, LLVMBinaryRef)
+
 /// Create a Binary from Source, autodetecting the file type.
 ///
 /// @param Source The data to create the Binary from.
diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h
index b753d261a0fc..c53cbc46c747 100644
--- a/include/llvm/Object/COFF.h
+++ b/include/llvm/Object/COFF.h
@@ -1,9 +1,8 @@
 //===- COFF.h - COFF object file implementation -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -898,13 +897,12 @@ protected:
   Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override;
   Expected<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
   void moveSectionNext(DataRefImpl &Sec) const override;
-  std::error_code getSectionName(DataRefImpl Sec,
-                                 StringRef &Res) const override;
+  Expected<StringRef> getSectionName(DataRefImpl Sec) const override;
   uint64_t getSectionAddress(DataRefImpl Sec) const override;
   uint64_t getSectionIndex(DataRefImpl Sec) const override;
   uint64_t getSectionSize(DataRefImpl Sec) const override;
-  std::error_code getSectionContents(DataRefImpl Sec,
-                                     StringRef &Res) const override;
+  Expected<ArrayRef<uint8_t>>
+  getSectionContents(DataRefImpl Sec) const override;
   uint64_t getSectionAlignment(DataRefImpl Sec) const override;
   bool isSectionCompressed(DataRefImpl Sec) const override;
   bool isSectionText(DataRefImpl Sec) const override;
@@ -1034,10 +1032,10 @@ public:
 
   ArrayRef<coff_relocation> getRelocations(const coff_section *Sec) const;
 
-  std::error_code getSectionName(const coff_section *Sec, StringRef &Res) const;
+  Expected<StringRef> getSectionName(const coff_section *Sec) const;
   uint64_t getSectionSize(const coff_section *Sec) const;
-  std::error_code getSectionContents(const coff_section *Sec,
-                                     ArrayRef<uint8_t> &Res) const;
+  Error getSectionContents(const coff_section *Sec,
+                           ArrayRef<uint8_t> &Res) const;
 
   uint64_t getImageBase() const;
   std::error_code getVaPtr(uint64_t VA, uintptr_t &Res) const;
diff --git a/include/llvm/Object/COFFImportFile.h b/include/llvm/Object/COFFImportFile.h
index 0a4556ad8884..5aa836411118 100644
--- a/include/llvm/Object/COFFImportFile.h
+++ b/include/llvm/Object/COFFImportFile.h
@@ -1,9 +1,8 @@
 //===- COFFImportFile.h - COFF short import file implementation -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -37,12 +36,11 @@ public:
 
   void moveSymbolNext(DataRefImpl &Symb) const override { ++Symb.p; }
 
-  std::error_code printSymbolName(raw_ostream &OS,
-                                  DataRefImpl Symb) const override {
+  Error printSymbolName(raw_ostream &OS, DataRefImpl Symb) const override {
     if (Symb.p == 0)
       OS << "__imp_";
     OS << StringRef(Data.getBufferStart() + sizeof(coff_import_header));
-    return std::error_code();
+    return Error::success();
   }
 
   uint32_t getSymbolFlags(DataRefImpl Symb) const override {
@@ -71,9 +69,21 @@ private:
 };
 
 struct COFFShortExport {
+  /// The name of the export as specified in the .def file or on the command
+  /// line, i.e. "foo" in "/EXPORT:foo", and "bar" in "/EXPORT:foo=bar". This
+  /// may lack mangling, such as underscore prefixing and stdcall suffixing.
   std::string Name;
+
+  /// The external, exported name. Only non-empty when export renaming is in
+  /// effect, i.e. "foo" in "/EXPORT:foo=bar".
   std::string ExtName;
+
+  /// The real, mangled symbol name from the object file. Given
+  /// "/export:foo=bar", this could be "_bar@8" if bar is stdcall.
   std::string SymbolName;
+
+  /// Creates a weak alias. This is the name of the weak aliasee. In a .def
+  /// file, this is "baz" in "EXPORTS\nfoo = bar == baz".
   std::string AliasTarget;
 
   uint16_t Ordinal = 0;
diff --git a/include/llvm/Object/COFFModuleDefinition.h b/include/llvm/Object/COFFModuleDefinition.h
index be139a2833b0..ab52259fea1a 100644
--- a/include/llvm/Object/COFFModuleDefinition.h
+++ b/include/llvm/Object/COFFModuleDefinition.h
@@ -1,9 +1,8 @@
 //===--- COFFModuleDefinition.h ---------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Object/CVDebugRecord.h b/include/llvm/Object/CVDebugRecord.h
index faad72c0df29..d41c7391f701 100644
--- a/include/llvm/Object/CVDebugRecord.h
+++ b/include/llvm/Object/CVDebugRecord.h
@@ -1,9 +1,8 @@
 //===- CVDebugRecord.h ------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Object/Decompressor.h b/include/llvm/Object/Decompressor.h
index 2a77d2ffbf68..cc918481b308 100644
--- a/include/llvm/Object/Decompressor.h
+++ b/include/llvm/Object/Decompressor.h
@@ -1,9 +1,8 @@
 //===-- Decompressor.h ------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===/
 
diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h
index bcdc190cc7dc..cf8e4529bad9 100644
--- a/include/llvm/Object/ELF.h
+++ b/include/llvm/Object/ELF.h
@@ -1,9 +1,8 @@
 //===- ELF.h - ELF object file implementation -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -45,10 +44,26 @@ getElfArchType(StringRef Object) {
                         (uint8_t)Object[ELF::EI_DATA]);
 }
 
-static inline Error createError(StringRef Err) {
+static inline Error createError(const Twine &Err) {
   return make_error<StringError>(Err, object_error::parse_failed);
 }
 
+template <class ELFT> class ELFFile;
+
+template <class ELFT>
+std::string getSecIndexForError(const ELFFile<ELFT> *Obj,
+                                const typename ELFT::Shdr *Sec) {
+  auto TableOrErr = Obj->sections();
+  if (TableOrErr)
+    return "[index " + std::to_string(Sec - &TableOrErr->front()) + "]";
+  // To make this helper be more convenient for error reporting purposes we
+  // drop the error. But really it should never be triggered. Before this point,
+  // our code should have called 'sections()' and reported a proper error on
+  // failure.
+  llvm::consumeError(TableOrErr.takeError());
+  return "[unknown index]";
+}
+
 template <class ELFT>
 class ELFFile {
 public:
@@ -80,9 +95,7 @@ public:
   using Elf_Relr_Range = typename ELFT::RelrRange;
   using Elf_Phdr_Range = typename ELFT::PhdrRange;
 
-  const uint8_t *base() const {
-    return reinterpret_cast<const uint8_t *>(Buf.data());
-  }
+  const uint8_t *base() const { return Buf.bytes_begin(); }
 
   size_t getBufSize() const { return Buf.size(); }
 
@@ -115,8 +128,8 @@ public:
                              SmallVectorImpl<char> &Result) const;
   uint32_t getRelativeRelocationType() const;
 
-  const char *getDynamicTagAsString(unsigned Arch, uint64_t Type) const;
-  const char *getDynamicTagAsString(uint64_t Type) const;
+  std::string getDynamicTagAsString(unsigned Arch, uint64_t Type) const;
+  std::string getDynamicTagAsString(uint64_t Type) const;
 
   /// Get the symbol for a given relocation.
   Expected<const Elf_Sym *> getRelocationSymbol(const Elf_Rel *Rel,
@@ -165,11 +178,16 @@ public:
   /// Iterate over program header table.
   Expected<Elf_Phdr_Range> program_headers() const {
     if (getHeader()->e_phnum && getHeader()->e_phentsize != sizeof(Elf_Phdr))
-      return createError("invalid e_phentsize");
+      return createError("invalid e_phentsize: " +
+                         Twine(getHeader()->e_phentsize));
     if (getHeader()->e_phoff +
             (getHeader()->e_phnum * getHeader()->e_phentsize) >
         getBufSize())
-      return createError("program headers longer than binary");
+      return createError("program headers are longer than binary of size " +
+                         Twine(getBufSize()) + ": e_phoff = 0x" +
+                         Twine::utohexstr(getHeader()->e_phoff) +
+                         ", e_phnum = " + Twine(getHeader()->e_phnum) +
+                         ", e_phentsize = " + Twine(getHeader()->e_phentsize));
     auto *Begin =
         reinterpret_cast<const Elf_Phdr *>(base() + getHeader()->e_phoff);
     return makeArrayRef(Begin, Begin + getHeader()->e_phnum);
@@ -183,12 +201,12 @@ public:
   /// \param Err [out] an error to support fallible iteration, which should
   ///  be checked after iteration ends.
   Elf_Note_Iterator notes_begin(const Elf_Phdr &Phdr, Error &Err) const {
-    if (Phdr.p_type != ELF::PT_NOTE) {
-      Err = createError("attempt to iterate notes of non-note program header");
-      return Elf_Note_Iterator(Err);
-    }
+    assert(Phdr.p_type == ELF::PT_NOTE && "Phdr is not of type PT_NOTE");
+    ErrorAsOutParameter ErrAsOutParam(&Err);
     if (Phdr.p_offset + Phdr.p_filesz > getBufSize()) {
-      Err = createError("invalid program header offset/size");
+      Err = createError("PT_NOTE header has invalid offset (0x" +
+                        Twine::utohexstr(Phdr.p_offset) + ") or size (0x" +
+                        Twine::utohexstr(Phdr.p_filesz) + ")");
       return Elf_Note_Iterator(Err);
     }
     return Elf_Note_Iterator(base() + Phdr.p_offset, Phdr.p_filesz, Err);
@@ -202,12 +220,13 @@ public:
   /// \param Err [out] an error to support fallible iteration, which should
   ///  be checked after iteration ends.
   Elf_Note_Iterator notes_begin(const Elf_Shdr &Shdr, Error &Err) const {
-    if (Shdr.sh_type != ELF::SHT_NOTE) {
-      Err = createError("attempt to iterate notes of non-note section");
-      return Elf_Note_Iterator(Err);
-    }
+    assert(Shdr.sh_type == ELF::SHT_NOTE && "Shdr is not of type SHT_NOTE");
+    ErrorAsOutParameter ErrAsOutParam(&Err);
     if (Shdr.sh_offset + Shdr.sh_size > getBufSize()) {
-      Err = createError("invalid section offset/size");
+      Err = createError("SHT_NOTE section " + getSecIndexForError(this, &Shdr) +
+                        " has invalid offset (0x" +
+                        Twine::utohexstr(Shdr.sh_offset) + ") or size (0x" +
+                        Twine::utohexstr(Shdr.sh_size) + ")");
       return Elf_Note_Iterator(Err);
     }
     return Elf_Note_Iterator(base() + Shdr.sh_offset, Shdr.sh_size, Err);
@@ -274,7 +293,7 @@ template <class ELFT>
 inline Expected<const typename ELFT::Shdr *>
 getSection(typename ELFT::ShdrRange Sections, uint32_t Index) {
   if (Index >= Sections.size())
-    return createError("invalid section index");
+    return createError("invalid section index: " + Twine(Index));
   return &Sections[Index];
 }
 
@@ -286,7 +305,10 @@ getExtendedSymbolTableIndex(const typename ELFT::Sym *Sym,
   assert(Sym->st_shndx == ELF::SHN_XINDEX);
   unsigned Index = Sym - FirstSym;
   if (Index >= ShndxTable.size())
-    return createError("index past the end of the symbol table");
+    return createError(
+        "extended symbol index (" + Twine(Index) +
+        ") is past the end of the SHT_SYMTAB_SHNDX section of size " +
+        Twine(ShndxTable.size()));
 
   // The size of the table was checked in getSHNDXTable.
   return ShndxTable[Index];
@@ -332,21 +354,19 @@ ELFFile<ELFT>::getSection(const Elf_Sym *Sym, Elf_Sym_Range Symbols,
   return getSection(Index);
 }
 
-template <class ELFT>
-inline Expected<const typename ELFT::Sym *>
-getSymbol(typename ELFT::SymRange Symbols, uint32_t Index) {
-  if (Index >= Symbols.size())
-    return createError("invalid symbol index");
-  return &Symbols[Index];
-}
-
 template <class ELFT>
 Expected<const typename ELFT::Sym *>
 ELFFile<ELFT>::getSymbol(const Elf_Shdr *Sec, uint32_t Index) const {
-  auto SymtabOrErr = symbols(Sec);
-  if (!SymtabOrErr)
-    return SymtabOrErr.takeError();
-  return object::getSymbol<ELFT>(*SymtabOrErr, Index);
+  auto SymsOrErr = symbols(Sec);
+  if (!SymsOrErr)
+    return SymsOrErr.takeError();
+
+  Elf_Sym_Range Symbols = *SymsOrErr;
+  if (Index >= Symbols.size())
+    return createError("unable to get symbol from section " +
+                       getSecIndexForError(this, Sec) +
+                       ": invalid symbol index (" + Twine(Index) + ")");
+  return &Symbols[Index];
 }
 
 template <class ELFT>
@@ -354,18 +374,26 @@ template <typename T>
 Expected<ArrayRef<T>>
 ELFFile<ELFT>::getSectionContentsAsArray(const Elf_Shdr *Sec) const {
   if (Sec->sh_entsize != sizeof(T) && sizeof(T) != 1)
-    return createError("invalid sh_entsize");
+    return createError("section " + getSecIndexForError(this, Sec) +
+                       " has an invalid sh_entsize: " + Twine(Sec->sh_entsize));
 
   uintX_t Offset = Sec->sh_offset;
   uintX_t Size = Sec->sh_size;
 
   if (Size % sizeof(T))
-    return createError("size is not a multiple of sh_entsize");
+    return createError("section " + getSecIndexForError(this, Sec) +
+                       " has an invalid sh_size (" + Twine(Size) +
+                       ") which is not a multiple of its sh_entsize (" +
+                       Twine(Sec->sh_entsize) + ")");
   if ((std::numeric_limits<uintX_t>::max() - Offset < Size) ||
       Offset + Size > Buf.size())
-    return createError("invalid section offset");
+    return createError("section " + getSecIndexForError(this, Sec) +
+                       " has a sh_offset (0x" + Twine::utohexstr(Offset) +
+                       ") + sh_size (0x" + Twine(Size) +
+                       ") that cannot be represented");
 
   if (Offset % alignof(T))
+    // TODO: this error is untested.
     return createError("unaligned data");
 
   const T *Start = reinterpret_cast<const T *>(base() + Offset);
@@ -438,8 +466,10 @@ ELFFile<ELFT>::getSectionStringTable(Elf_Shdr_Range Sections) const {
 
   if (!Index) // no section string table.
     return "";
+  // TODO: Test a case when the sh_link of the section with index 0 is broken.
   if (Index >= Sections.size())
-    return createError("invalid section index");
+    return createError("section header string table index " + Twine(Index) +
+                       " does not exist");
   return getStringTable(&Sections[Index]);
 }
 
@@ -448,7 +478,9 @@ template <class ELFT> ELFFile<ELFT>::ELFFile(StringRef Object) : Buf(Object) {}
 template <class ELFT>
 Expected<ELFFile<ELFT>> ELFFile<ELFT>::create(StringRef Object) {
   if (sizeof(Elf_Ehdr) > Object.size())
-    return createError("Invalid buffer");
+    return createError("invalid buffer: the size (" + Twine(Object.size()) +
+                       ") is smaller than an ELF header (" +
+                       Twine(sizeof(Elf_Ehdr)) + ")");
   return ELFFile(Object);
 }
 
@@ -459,16 +491,18 @@ Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const {
     return ArrayRef<Elf_Shdr>();
 
   if (getHeader()->e_shentsize != sizeof(Elf_Shdr))
-    return createError(
-        "invalid section header entry size (e_shentsize) in ELF header");
+    return createError("invalid e_shentsize in ELF header: " +
+                       Twine(getHeader()->e_shentsize));
 
   const uint64_t FileSize = Buf.size();
-
   if (SectionTableOffset + sizeof(Elf_Shdr) > FileSize)
-    return createError("section header table goes past the end of the file");
+    return createError(
+        "section header table goes past the end of the file: e_shoff = 0x" +
+        Twine::utohexstr(SectionTableOffset));
 
   // Invalid address alignment of section headers
   if (SectionTableOffset & (alignof(Elf_Shdr) - 1))
+    // TODO: this error is untested.
     return createError("invalid alignment of section headers");
 
   const Elf_Shdr *First =
@@ -479,6 +513,7 @@ Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const {
     NumSections = First->sh_size;
 
   if (NumSections > UINT64_MAX / sizeof(Elf_Shdr))
+    // TODO: this error is untested.
     return createError("section table goes past the end of file");
 
   const uint64_t SectionTableSize = NumSections * sizeof(Elf_Shdr);
@@ -505,10 +540,14 @@ template <typename T>
 Expected<const T *> ELFFile<ELFT>::getEntry(const Elf_Shdr *Section,
                                             uint32_t Entry) const {
   if (sizeof(T) != Section->sh_entsize)
+    // TODO: this error is untested.
     return createError("invalid sh_entsize");
   size_t Pos = Section->sh_offset + Entry * sizeof(T);
   if (Pos + sizeof(T) > Buf.size())
-    return createError("invalid section offset");
+    return createError("unable to access section " +
+                       getSecIndexForError(this, Section) + " data at 0x" +
+                       Twine::utohexstr(Pos) +
+                       ": offset goes past the end of file");
   return reinterpret_cast<const T *>(base() + Pos);
 }
 
@@ -534,6 +573,7 @@ ELFFile<ELFT>::getSection(const StringRef SectionName) const {
     if (*SecNameOrErr == SectionName)
       return &Sec;
   }
+  // TODO: this error is untested.
   return createError("invalid section name");
 }
 
@@ -541,15 +581,24 @@ template <class ELFT>
 Expected<StringRef>
 ELFFile<ELFT>::getStringTable(const Elf_Shdr *Section) const {
   if (Section->sh_type != ELF::SHT_STRTAB)
-    return createError("invalid sh_type for string table, expected SHT_STRTAB");
+    return createError("invalid sh_type for string table section " +
+                       getSecIndexForError(this, Section) +
+                       ": expected SHT_STRTAB, but got " +
+                       object::getELFSectionTypeName(getHeader()->e_machine,
+                                                     Section->sh_type));
   auto V = getSectionContentsAsArray<char>(Section);
   if (!V)
     return V.takeError();
   ArrayRef<char> Data = *V;
   if (Data.empty())
+    // TODO: this error is untested.
     return createError("empty string table");
   if (Data.back() != '\0')
-    return createError("string table non-null terminated");
+    return createError(object::getELFSectionTypeName(getHeader()->e_machine,
+                                                     Section->sh_type) +
+                       " string table section " +
+                       getSecIndexForError(this, Section) +
+                       " is non-null terminated");
   return StringRef(Data.begin(), Data.size());
 }
 
@@ -577,9 +626,13 @@ ELFFile<ELFT>::getSHNDXTable(const Elf_Shdr &Section,
   const Elf_Shdr &SymTable = **SymTableOrErr;
   if (SymTable.sh_type != ELF::SHT_SYMTAB &&
       SymTable.sh_type != ELF::SHT_DYNSYM)
+    // TODO: this error is untested.
     return createError("invalid sh_type");
   if (V.size() != (SymTable.sh_size / sizeof(Elf_Sym)))
-    return createError("invalid section contents size");
+    return createError("SHT_SYMTAB_SHNDX section has sh_size (" +
+                       Twine(SymTable.sh_size) +
+                       ") which is not equal to the number of symbols (" +
+                       Twine(V.size()) + ")");
   return V;
 }
 
@@ -598,6 +651,7 @@ ELFFile<ELFT>::getStringTableForSymtab(const Elf_Shdr &Sec,
                                        Elf_Shdr_Range Sections) const {
 
   if (Sec.sh_type != ELF::SHT_SYMTAB && Sec.sh_type != ELF::SHT_DYNSYM)
+    // TODO: this error is untested.
     return createError(
         "invalid sh_type for symbol table, expected SHT_SYMTAB or SHT_DYNSYM");
   auto SectionOrErr = object::getSection<ELFT>(Sections, Sec.sh_link);
@@ -625,7 +679,11 @@ Expected<StringRef> ELFFile<ELFT>::getSectionName(const Elf_Shdr *Section,
   if (Offset == 0)
     return StringRef();
   if (Offset >= DotShstrtab.size())
-    return createError("invalid string offset");
+    return createError("a section " + getSecIndexForError(this, Section) +
+                       " has an invalid sh_name (0x" +
+                       Twine::utohexstr(Offset) +
+                       ") offset which goes past the end of the "
+                       "section name string table");
   return StringRef(DotShstrtab.data() + Offset);
 }
 
diff --git a/include/llvm/Object/ELFObjectFile.h b/include/llvm/Object/ELFObjectFile.h
index 0f620681cd99..86c015efd704 100644
--- a/include/llvm/Object/ELFObjectFile.h
+++ b/include/llvm/Object/ELFObjectFile.h
@@ -1,9 +1,8 @@
 //===- ELFObjectFile.h - ELF object file implementation ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -42,6 +41,9 @@
 namespace llvm {
 namespace object {
 
+constexpr int NumElfSymbolTypes = 8;
+extern const llvm::EnumEntry<unsigned> ElfSymbolTypes[NumElfSymbolTypes];
+
 class elf_symbol_iterator;
 
 class ELFObjectFileBase : public ObjectFile {
@@ -52,8 +54,8 @@ class ELFObjectFileBase : public ObjectFile {
 protected:
   ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source);
 
-  virtual uint16_t getEMachine() const = 0;
   virtual uint64_t getSymbolSize(DataRefImpl Symb) const = 0;
+  virtual uint8_t getSymbolBinding(DataRefImpl Symb) const = 0;
   virtual uint8_t getSymbolOther(DataRefImpl Symb) const = 0;
   virtual uint8_t getSymbolELFType(DataRefImpl Symb) const = 0;
 
@@ -62,6 +64,7 @@ protected:
   virtual uint64_t getSectionOffset(DataRefImpl Sec) const = 0;
 
   virtual Expected<int64_t> getRelocationAddend(DataRefImpl Rel) const = 0;
+  virtual Error getBuildAttributes(ARMAttributeParser &Attributes) const = 0;
 
 public:
   using elf_symbol_iterator_range = iterator_range<elf_symbol_iterator>;
@@ -87,6 +90,8 @@ public:
 
   virtual uint16_t getEType() const = 0;
 
+  virtual uint16_t getEMachine() const = 0;
+
   std::vector<std::pair<DataRefImpl, uint64_t>> getPltAddresses() const;
 };
 
@@ -142,6 +147,10 @@ public:
     return getObject()->getSymbolSize(getRawDataRefImpl());
   }
 
+  uint8_t getBinding() const {
+    return getObject()->getSymbolBinding(getRawDataRefImpl());
+  }
+
   uint8_t getOther() const {
     return getObject()->getSymbolOther(getRawDataRefImpl());
   }
@@ -149,6 +158,16 @@ public:
   uint8_t getELFType() const {
     return getObject()->getSymbolELFType(getRawDataRefImpl());
   }
+
+  StringRef getELFTypeName() const {
+    uint8_t Type = getELFType();
+    for (auto &EE : ElfSymbolTypes) {
+      if (EE.Value == Type) {
+        return EE.AltName;
+      }
+    }
+    return "";
+  }
 };
 
 class elf_symbol_iterator : public symbol_iterator {
@@ -239,6 +258,7 @@ protected:
   uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
   uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
   uint32_t getSymbolFlags(DataRefImpl Symb) const override;
+  uint8_t getSymbolBinding(DataRefImpl Symb) const override;
   uint8_t getSymbolOther(DataRefImpl Symb) const override;
   uint8_t getSymbolELFType(DataRefImpl Symb) const override;
   Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override;
@@ -247,13 +267,12 @@ protected:
   Expected<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
 
   void moveSectionNext(DataRefImpl &Sec) const override;
-  std::error_code getSectionName(DataRefImpl Sec,
-                                 StringRef &Res) const override;
+  Expected<StringRef> getSectionName(DataRefImpl Sec) const override;
   uint64_t getSectionAddress(DataRefImpl Sec) const override;
   uint64_t getSectionIndex(DataRefImpl Sec) const override;
   uint64_t getSectionSize(DataRefImpl Sec) const override;
-  std::error_code getSectionContents(DataRefImpl Sec,
-                                     StringRef &Res) const override;
+  Expected<ArrayRef<uint8_t>>
+  getSectionContents(DataRefImpl Sec) const override;
   uint64_t getSectionAlignment(DataRefImpl Sec) const override;
   bool isSectionCompressed(DataRefImpl Sec) const override;
   bool isSectionText(DataRefImpl Sec) const override;
@@ -341,6 +360,28 @@ protected:
         (Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_PROTECTED));
   }
 
+  Error getBuildAttributes(ARMAttributeParser &Attributes) const override {
+    auto SectionsOrErr = EF.sections();
+    if (!SectionsOrErr)
+      return SectionsOrErr.takeError();
+
+    for (const Elf_Shdr &Sec : *SectionsOrErr) {
+      if (Sec.sh_type == ELF::SHT_ARM_ATTRIBUTES) {
+        auto ErrorOrContents = EF.getSectionContents(&Sec);
+        if (!ErrorOrContents)
+          return ErrorOrContents.takeError();
+
+        auto Contents = ErrorOrContents.get();
+        if (Contents[0] != ARMBuildAttrs::Format_Version || Contents.size() == 1)
+          return Error::success();
+
+        Attributes.Parse(Contents, ELFT::TargetEndianness == support::little);
+        break;
+      }
+    }
+    return Error::success();
+  }
+
   // This flag is used for classof, to distinguish ELFObjectFile from
   // its subclass. If more subclasses will be created, this flag will
   // have to become an enum.
@@ -382,28 +423,6 @@ public:
 
   unsigned getPlatformFlags() const override { return EF.getHeader()->e_flags; }
 
-  std::error_code getBuildAttributes(ARMAttributeParser &Attributes) const override {
-    auto SectionsOrErr = EF.sections();
-    if (!SectionsOrErr)
-      return errorToErrorCode(SectionsOrErr.takeError());
-
-    for (const Elf_Shdr &Sec : *SectionsOrErr) {
-      if (Sec.sh_type == ELF::SHT_ARM_ATTRIBUTES) {
-        auto ErrorOrContents = EF.getSectionContents(&Sec);
-        if (!ErrorOrContents)
-          return errorToErrorCode(ErrorOrContents.takeError());
-
-        auto Contents = ErrorOrContents.get();
-        if (Contents[0] != ARMBuildAttrs::Format_Version || Contents.size() == 1)
-          return std::error_code();
-
-        Attributes.Parse(Contents, ELFT::TargetEndianness == support::little);
-        break;
-      }
-    }
-    return std::error_code();
-  }
-
   const ELFFile<ELFT> *getELFFile() const { return &EF; }
 
   bool isDyldType() const { return isDyldELFObject; }
@@ -441,7 +460,16 @@ Expected<StringRef> ELFObjectFile<ELFT>::getSymbolName(DataRefImpl Sym) const {
   auto SymStrTabOrErr = EF.getStringTable(StringTableSec);
   if (!SymStrTabOrErr)
     return SymStrTabOrErr.takeError();
-  return ESym->getName(*SymStrTabOrErr);
+  Expected<StringRef> Name = ESym->getName(*SymStrTabOrErr);
+
+  // If the symbol name is empty use the section name.
+  if ((!Name || Name->empty()) && ESym->getType() == ELF::STT_SECTION) {
+    StringRef SecName;
+    Expected<section_iterator> Sec = getSymbolSection(Sym);
+    if (Sec && !(*Sec)->getName(SecName))
+      return SecName;
+  }
+  return Name;
 }
 
 template <class ELFT>
@@ -532,6 +560,11 @@ uint64_t ELFObjectFile<ELFT>::getCommonSymbolSizeImpl(DataRefImpl Symb) const {
   return getSymbol(Symb)->st_size;
 }
 
+template <class ELFT>
+uint8_t ELFObjectFile<ELFT>::getSymbolBinding(DataRefImpl Symb) const {
+  return getSymbol(Symb)->getBinding();
+}
+
 template <class ELFT>
 uint8_t ELFObjectFile<ELFT>::getSymbolOther(DataRefImpl Symb) const {
   return getSymbol(Symb)->st_other;
@@ -654,13 +687,8 @@ void ELFObjectFile<ELFT>::moveSectionNext(DataRefImpl &Sec) const {
 }
 
 template <class ELFT>
-std::error_code ELFObjectFile<ELFT>::getSectionName(DataRefImpl Sec,
-                                                    StringRef &Result) const {
-  auto Name = EF.getSectionName(&*getSection(Sec));
-  if (!Name)
-    return errorToErrorCode(Name.takeError());
-  Result = *Name;
-  return std::error_code();
+Expected<StringRef> ELFObjectFile<ELFT>::getSectionName(DataRefImpl Sec) const {
+  return EF.getSectionName(&*getSection(Sec));
 }
 
 template <class ELFT>
@@ -685,16 +713,15 @@ uint64_t ELFObjectFile<ELFT>::getSectionSize(DataRefImpl Sec) const {
 }
 
 template <class ELFT>
-std::error_code
-ELFObjectFile<ELFT>::getSectionContents(DataRefImpl Sec,
-                                        StringRef &Result) const {
+Expected<ArrayRef<uint8_t>>
+ELFObjectFile<ELFT>::getSectionContents(DataRefImpl Sec) const {
   const Elf_Shdr *EShdr = getSection(Sec);
   if (std::error_code EC =
           checkOffset(getMemoryBufferRef(),
                       (uintptr_t)base() + EShdr->sh_offset, EShdr->sh_size))
-    return EC;
-  Result = StringRef((const char *)base() + EShdr->sh_offset, EShdr->sh_size);
-  return std::error_code();
+    return errorCodeToError(EC);
+  return makeArrayRef((const uint8_t *)base() + EShdr->sh_offset,
+                      EShdr->sh_size);
 }
 
 template <class ELFT>
@@ -750,7 +777,7 @@ ELFObjectFile<ELFT>::dynamic_relocation_sections() const {
     }
   }
   for (const Elf_Shdr &Sec : *SectionsOrErr) {
-    if (is_contained(Offsets, Sec.sh_offset))
+    if (is_contained(Offsets, Sec.sh_addr))
       Res.emplace_back(toDRI(&Sec), this);
   }
   return Res;
@@ -925,15 +952,13 @@ ELFObjectFile<ELFT>::create(MemoryBufferRef Object) {
   for (const Elf_Shdr &Sec : *SectionsOrErr) {
     switch (Sec.sh_type) {
     case ELF::SHT_DYNSYM: {
-      if (DotDynSymSec)
-        return createError("More than one dynamic symbol table!");
-      DotDynSymSec = &Sec;
+      if (!DotDynSymSec)
+        DotDynSymSec = &Sec;
       break;
     }
     case ELF::SHT_SYMTAB: {
-      if (DotSymtabSec)
-        return createError("More than one static symbol table!");
-      DotSymtabSec = &Sec;
+      if (!DotSymtabSec)
+        DotSymtabSec = &Sec;
       break;
     }
     case ELF::SHT_SYMTAB_SHNDX: {
@@ -967,7 +992,9 @@ ELFObjectFile<ELFT>::ELFObjectFile(ELFObjectFile<ELFT> &&Other)
 
 template <class ELFT>
 basic_symbol_iterator ELFObjectFile<ELFT>::symbol_begin() const {
-  DataRefImpl Sym = toDRI(DotSymtabSec, 0);
+  DataRefImpl Sym =
+      toDRI(DotSymtabSec,
+            DotSymtabSec && DotSymtabSec->sh_size >= sizeof(Elf_Sym) ? 1 : 0);
   return basic_symbol_iterator(SymbolRef(Sym, this));
 }
 
diff --git a/include/llvm/Object/ELFTypes.h b/include/llvm/Object/ELFTypes.h
index ec3c8e7bae46..5552208b1f8a 100644
--- a/include/llvm/Object/ELFTypes.h
+++ b/include/llvm/Object/ELFTypes.h
@@ -1,9 +1,8 @@
 //===- ELFTypes.h - Endian specific types for ELF ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -593,9 +592,9 @@ class Elf_Note_Impl {
 
   template <class NoteIteratorELFT> friend class Elf_Note_Iterator_Impl;
 
+public:
   Elf_Note_Impl(const Elf_Nhdr_Impl<ELFT> &Nhdr) : Nhdr(Nhdr) {}
 
-public:
   /// Get the note's name, excluding the terminating null byte.
   StringRef getName() const {
     if (!Nhdr.n_namesz)
diff --git a/include/llvm/Object/Error.h b/include/llvm/Object/Error.h
index a15f8b9236eb..b7bbf06fc86d 100644
--- a/include/llvm/Object/Error.h
+++ b/include/llvm/Object/Error.h
@@ -1,9 +1,8 @@
 //===- Error.h - system_error extensions for Object -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Object/IRObjectFile.h b/include/llvm/Object/IRObjectFile.h
index 993359b766a1..08b92f1bae50 100644
--- a/include/llvm/Object/IRObjectFile.h
+++ b/include/llvm/Object/IRObjectFile.h
@@ -1,9 +1,8 @@
 //===- IRObjectFile.h - LLVM IR object file implementation ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -38,8 +37,7 @@ class IRObjectFile : public SymbolicFile {
 public:
   ~IRObjectFile() override;
   void moveSymbolNext(DataRefImpl &Symb) const override;
-  std::error_code printSymbolName(raw_ostream &OS,
-                                  DataRefImpl Symb) const override;
+  Error printSymbolName(raw_ostream &OS, DataRefImpl Symb) const override;
   uint32_t getSymbolFlags(DataRefImpl Symb) const override;
   basic_symbol_iterator symbol_begin() const override;
   basic_symbol_iterator symbol_end() const override;
diff --git a/include/llvm/Object/IRSymtab.h b/include/llvm/Object/IRSymtab.h
index 5f6a024cd132..0bbfc932493c 100644
--- a/include/llvm/Object/IRSymtab.h
+++ b/include/llvm/Object/IRSymtab.h
@@ -1,9 +1,8 @@
 //===- IRSymtab.h - data definitions for IR symbol tables -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -126,12 +125,13 @@ struct Uncommon {
   Str SectionName;
 };
 
+
 struct Header {
   /// Version number of the symtab format. This number should be incremented
   /// when the format changes, but it does not need to be incremented if a
   /// change to LLVM would cause it to create a different symbol table.
   Word Version;
-  enum { kCurrentVersion = 1 };
+  enum { kCurrentVersion = 2 };
 
   /// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION).
   /// Consumers should rebuild the symbol table from IR if the producer's
@@ -148,6 +148,9 @@ struct Header {
 
   /// COFF-specific: linker directives.
   Str COFFLinkerOpts;
+
+  /// Dependent Library Specifiers
+  Range<Str> DependentLibraries;
 };
 
 } // end namespace storage
@@ -232,6 +235,7 @@ class Reader {
   ArrayRef<storage::Comdat> Comdats;
   ArrayRef<storage::Symbol> Symbols;
   ArrayRef<storage::Uncommon> Uncommons;
+  ArrayRef<storage::Str> DependentLibraries;
 
   StringRef str(storage::Str S) const { return S.get(Strtab); }
 
@@ -252,6 +256,7 @@ public:
     Comdats = range(header().Comdats);
     Symbols = range(header().Symbols);
     Uncommons = range(header().Uncommons);
+    DependentLibraries = range(header().DependentLibraries);
   }
 
   using symbol_range = iterator_range<object::content_iterator<SymbolRef>>;
@@ -284,6 +289,16 @@ public:
 
   /// COFF-specific: returns linker options specified in the input file.
   StringRef getCOFFLinkerOpts() const { return str(header().COFFLinkerOpts); }
+
+  /// Returns dependent library specifiers
+  std::vector<StringRef> getDependentLibraries() const {
+    std::vector<StringRef> Specifiers;
+    Specifiers.reserve(DependentLibraries.size());
+    for (auto S : DependentLibraries) {
+      Specifiers.push_back(str(S));
+    }
+    return Specifiers;
+  }
 };
 
 /// Ephemeral symbols produced by Reader::symbols() and
diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h
index c2f4f4062934..ca9512f21706 100644
--- a/include/llvm/Object/MachO.h
+++ b/include/llvm/Object/MachO.h
@@ -1,9 +1,8 @@
 //===- MachO.h - MachO object file implementation ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -134,11 +133,9 @@ public:
   BindRebaseSegInfo(const MachOObjectFile *Obj);
 
   // Used to check a Mach-O Bind or Rebase entry for errors when iterating.
-  const char *checkSegAndOffset(int32_t SegIndex, uint64_t SegOffset,
-                                bool endInvalid);
-  const char *checkCountAndSkip(uint32_t Count, uint32_t Skip,
-                                uint8_t PointerSize, int32_t SegIndex,
-                                uint64_t SegOffset);
+  const char* checkSegAndOffsets(int32_t SegIndex, uint64_t SegOffset,
+                                 uint8_t PointerSize, uint32_t Count=1,
+                                 uint32_t Skip=0);
   // Used with valid SegIndex/SegOffset values from checked entries.
   StringRef segmentName(int32_t SegIndex);
   StringRef sectionName(int32_t SegIndex, uint64_t SegOffset);
@@ -296,13 +293,12 @@ public:
   unsigned getSectionID(SectionRef Sec) const;
 
   void moveSectionNext(DataRefImpl &Sec) const override;
-  std::error_code getSectionName(DataRefImpl Sec,
-                                 StringRef &Res) const override;
+  Expected<StringRef> getSectionName(DataRefImpl Sec) const override;
   uint64_t getSectionAddress(DataRefImpl Sec) const override;
   uint64_t getSectionIndex(DataRefImpl Sec) const override;
   uint64_t getSectionSize(DataRefImpl Sec) const override;
-  std::error_code getSectionContents(DataRefImpl Sec,
-                                     StringRef &Res) const override;
+  Expected<ArrayRef<uint8_t>>
+  getSectionContents(DataRefImpl Sec) const override;
   uint64_t getSectionAlignment(DataRefImpl Sec) const override;
   Expected<SectionRef> getSection(unsigned SectionIndex) const;
   Expected<SectionRef> getSection(StringRef SectionName) const;
@@ -413,36 +409,32 @@ public:
                                                  bool is64,
                                                  MachOBindEntry::Kind);
 
-  /// For use with a SegIndex,SegOffset pair in MachOBindEntry::moveNext() to
-  /// validate a MachOBindEntry.
-  const char *BindEntryCheckSegAndOffset(int32_t SegIndex, uint64_t SegOffset,
-                                         bool endInvalid) const {
-    return BindRebaseSectionTable->checkSegAndOffset(SegIndex, SegOffset,
-                                                     endInvalid);
-  }
-  /// For use in MachOBindEntry::moveNext() to validate a MachOBindEntry for
-  /// the BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB opcode.
-  const char *BindEntryCheckCountAndSkip(uint32_t Count, uint32_t Skip,
-                                         uint8_t PointerSize, int32_t SegIndex,
-                                         uint64_t SegOffset) const {
-    return BindRebaseSectionTable->checkCountAndSkip(Count, Skip, PointerSize,
-                                                     SegIndex, SegOffset);
+  // Given a SegIndex, SegOffset, and PointerSize, verify a valid section exists
+  // that fully contains a pointer at that location. Multiple fixups in a bind
+  // (such as with the BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB opcode) can
+  // be tested via the Count and Skip parameters.
+  //
+  // This is used by MachOBindEntry::moveNext() to validate a MachOBindEntry.
+  const char *BindEntryCheckSegAndOffsets(int32_t SegIndex, uint64_t SegOffset,
+                                         uint8_t PointerSize, uint32_t Count=1,
+                                          uint32_t Skip=0) const {
+    return BindRebaseSectionTable->checkSegAndOffsets(SegIndex, SegOffset,
+                                                     PointerSize, Count, Skip);
   }
 
-  /// For use with a SegIndex,SegOffset pair in MachORebaseEntry::moveNext() to
-  /// validate a MachORebaseEntry.
-  const char *RebaseEntryCheckSegAndOffset(int32_t SegIndex, uint64_t SegOffset,
-                                           bool endInvalid) const {
-    return BindRebaseSectionTable->checkSegAndOffset(SegIndex, SegOffset,
-                                                     endInvalid);
-  }
-  /// For use in MachORebaseEntry::moveNext() to validate a MachORebaseEntry for
-  /// the REBASE_OPCODE_DO_*_TIMES* opcodes.
-  const char *RebaseEntryCheckCountAndSkip(uint32_t Count, uint32_t Skip,
-                                         uint8_t PointerSize, int32_t SegIndex,
-                                         uint64_t SegOffset) const {
-    return BindRebaseSectionTable->checkCountAndSkip(Count, Skip, PointerSize,
-                                                     SegIndex, SegOffset);
+  // Given a SegIndex, SegOffset, and PointerSize, verify a valid section exists
+  // that fully contains a pointer at that location. Multiple fixups in a rebase
+  // (such as with the REBASE_OPCODE_DO_*_TIMES* opcodes) can be tested via the
+  // Count and Skip parameters.
+  //
+  // This is used by MachORebaseEntry::moveNext() to validate a MachORebaseEntry
+  const char *RebaseEntryCheckSegAndOffsets(int32_t SegIndex,
+                                            uint64_t SegOffset,
+                                            uint8_t PointerSize,
+                                            uint32_t Count=1,
+                                            uint32_t Skip=0) const {
+    return BindRebaseSectionTable->checkSegAndOffsets(SegIndex, SegOffset,
+                                                      PointerSize, Count, Skip);
   }
 
   /// For use with the SegIndex of a checked Mach-O Bind or Rebase entry to
@@ -579,6 +571,7 @@ public:
                               const char **McpuDefault = nullptr,
                               const char **ArchFlag = nullptr);
   static bool isValidArch(StringRef ArchFlag);
+  static ArrayRef<StringRef> getValidArchs();
   static Triple getHostArch();
 
   bool isRelocatableObject() const override;
@@ -616,6 +609,7 @@ public:
     case MachO::PLATFORM_TVOS: return "tvos";
     case MachO::PLATFORM_WATCHOS: return "watchos";
     case MachO::PLATFORM_BRIDGEOS: return "bridgeos";
+    case MachO::PLATFORM_MACCATALYST: return "macCatalyst";
     case MachO::PLATFORM_IOSSIMULATOR: return "iossimulator";
     case MachO::PLATFORM_TVOSSIMULATOR: return "tvossimulator";
     case MachO::PLATFORM_WATCHOSSIMULATOR: return "watchossimulator";
diff --git a/include/llvm/Object/MachOUniversal.h b/include/llvm/Object/MachOUniversal.h
index 9e70b0bc30c0..5bf724f2c8b2 100644
--- a/include/llvm/Object/MachOUniversal.h
+++ b/include/llvm/Object/MachOUniversal.h
@@ -1,9 +1,8 @@
 //===- MachOUniversal.h - Mach-O universal binaries -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Object/Minidump.h b/include/llvm/Object/Minidump.h
new file mode 100644
index 000000000000..470008d552e7
--- /dev/null
+++ b/include/llvm/Object/Minidump.h
@@ -0,0 +1,165 @@
+//===- Minidump.h - Minidump object file implementation ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_MINIDUMP_H
+#define LLVM_OBJECT_MINIDUMP_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/BinaryFormat/Minidump.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace object {
+
+/// A class providing access to the contents of a minidump file.
+class MinidumpFile : public Binary {
+public:
+  /// Construct a new MinidumpFile object from the given memory buffer. Returns
+  /// an error if this file cannot be identified as a minidump file, or if its
+  /// contents are badly corrupted (i.e. we cannot read the stream directory).
+  static Expected<std::unique_ptr<MinidumpFile>> create(MemoryBufferRef Source);
+
+  static bool classof(const Binary *B) { return B->isMinidump(); }
+
+  /// Returns the contents of the minidump header.
+  const minidump::Header &header() const { return Header; }
+
+  /// Returns the list of streams (stream directory entries) in this file.
+  ArrayRef<minidump::Directory> streams() const { return Streams; }
+
+  /// Returns the raw contents of the stream given by the directory entry.
+  ArrayRef<uint8_t> getRawStream(const minidump::Directory &Stream) const {
+    return getData().slice(Stream.Location.RVA, Stream.Location.DataSize);
+  }
+
+  /// Returns the raw contents of the stream of the given type, or None if the
+  /// file does not contain a stream of this type.
+  Optional<ArrayRef<uint8_t>> getRawStream(minidump::StreamType Type) const;
+
+  /// Returns the raw contents of an object given by the LocationDescriptor. An
+  /// error is returned if the descriptor points outside of the minidump file.
+  Expected<ArrayRef<uint8_t>>
+  getRawData(minidump::LocationDescriptor Desc) const {
+    return getDataSlice(getData(), Desc.RVA, Desc.DataSize);
+  }
+
+  /// Returns the minidump string at the given offset. An error is returned if
+  /// we fail to parse the string, or the string is invalid UTF16.
+  Expected<std::string> getString(size_t Offset) const;
+
+  /// Returns the contents of the SystemInfo stream, cast to the appropriate
+  /// type. An error is returned if the file does not contain this stream, or
+  /// the stream is smaller than the size of the SystemInfo structure. The
+  /// internal consistency of the stream is not checked in any way.
+  Expected<const minidump::SystemInfo &> getSystemInfo() const {
+    return getStream<minidump::SystemInfo>(minidump::StreamType::SystemInfo);
+  }
+
+  /// Returns the module list embedded in the ModuleList stream. An error is
+  /// returned if the file does not contain this stream, or if the stream is
+  /// not large enough to contain the number of modules declared in the stream
+  /// header. The consistency of the Module entries themselves is not checked in
+  /// any way.
+  Expected<ArrayRef<minidump::Module>> getModuleList() const {
+    return getListStream<minidump::Module>(minidump::StreamType::ModuleList);
+  }
+
+  /// Returns the thread list embedded in the ThreadList stream. An error is
+  /// returned if the file does not contain this stream, or if the stream is
+  /// not large enough to contain the number of threads declared in the stream
+  /// header. The consistency of the Thread entries themselves is not checked in
+  /// any way.
+  Expected<ArrayRef<minidump::Thread>> getThreadList() const {
+    return getListStream<minidump::Thread>(minidump::StreamType::ThreadList);
+  }
+
+  /// Returns the list of memory ranges embedded in the MemoryList stream. An
+  /// error is returned if the file does not contain this stream, or if the
+  /// stream is not large enough to contain the number of memory descriptors
+  /// declared in the stream header. The consistency of the MemoryDescriptor
+  /// entries themselves is not checked in any way.
+  Expected<ArrayRef<minidump::MemoryDescriptor>> getMemoryList() const {
+    return getListStream<minidump::MemoryDescriptor>(
+        minidump::StreamType::MemoryList);
+  }
+
+private:
+  static Error createError(StringRef Str) {
+    return make_error<GenericBinaryError>(Str, object_error::parse_failed);
+  }
+
+  static Error createEOFError() {
+    return make_error<GenericBinaryError>("Unexpected EOF",
+                                          object_error::unexpected_eof);
+  }
+
+  /// Return a slice of the given data array, with bounds checking.
+  static Expected<ArrayRef<uint8_t>> getDataSlice(ArrayRef<uint8_t> Data,
+                                                  size_t Offset, size_t Size);
+
+  /// Return the slice of the given data array as an array of objects of the
+  /// given type. The function checks that the input array is large enough to
+  /// contain the correct number of objects of the given type.
+  template <typename T>
+  static Expected<ArrayRef<T>> getDataSliceAs(ArrayRef<uint8_t> Data,
+                                              size_t Offset, size_t Count);
+
+  MinidumpFile(MemoryBufferRef Source, const minidump::Header &Header,
+               ArrayRef<minidump::Directory> Streams,
+               DenseMap<minidump::StreamType, std::size_t> StreamMap)
+      : Binary(ID_Minidump, Source), Header(Header), Streams(Streams),
+        StreamMap(std::move(StreamMap)) {}
+
+  ArrayRef<uint8_t> getData() const {
+    return arrayRefFromStringRef(Data.getBuffer());
+  }
+
+  /// Return the stream of the given type, cast to the appropriate type. Checks
+  /// that the stream is large enough to hold an object of this type.
+  template <typename T>
+  Expected<const T &> getStream(minidump::StreamType Stream) const;
+
+  /// Return the contents of a stream which contains a list of fixed-size items,
+  /// prefixed by the list size.
+  template <typename T>
+  Expected<ArrayRef<T>> getListStream(minidump::StreamType Stream) const;
+
+  const minidump::Header &Header;
+  ArrayRef<minidump::Directory> Streams;
+  DenseMap<minidump::StreamType, std::size_t> StreamMap;
+};
+
+template <typename T>
+Expected<const T &> MinidumpFile::getStream(minidump::StreamType Stream) const {
+  if (auto OptionalStream = getRawStream(Stream)) {
+    if (OptionalStream->size() >= sizeof(T))
+      return *reinterpret_cast<const T *>(OptionalStream->data());
+    return createEOFError();
+  }
+  return createError("No such stream");
+}
+
+template <typename T>
+Expected<ArrayRef<T>> MinidumpFile::getDataSliceAs(ArrayRef<uint8_t> Data,
+                                                   size_t Offset,
+                                                   size_t Count) {
+  // Check for overflow.
+  if (Count > std::numeric_limits<size_t>::max() / sizeof(T))
+    return createEOFError();
+  auto ExpectedArray = getDataSlice(Data, Offset, sizeof(T) * Count);
+  if (!ExpectedArray)
+    return ExpectedArray.takeError();
+  return ArrayRef<T>(reinterpret_cast<const T *>(ExpectedArray->data()), Count);
+}
+
+} // end namespace object
+} // end namespace llvm
+
+#endif // LLVM_OBJECT_MINIDUMP_H
diff --git a/include/llvm/Object/ModuleSymbolTable.h b/include/llvm/Object/ModuleSymbolTable.h
index c3cbc27998e5..4c582fbcda81 100644
--- a/include/llvm/Object/ModuleSymbolTable.h
+++ b/include/llvm/Object/ModuleSymbolTable.h
@@ -1,9 +1,8 @@
 //===- ModuleSymbolTable.h - symbol table for in-memory IR ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h
index 036c99cb6baf..483a3486bd72 100644
--- a/include/llvm/Object/ObjectFile.h
+++ b/include/llvm/Object/ObjectFile.h
@@ -1,9 +1,8 @@
 //===- ObjectFile.h - File format independent object file -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,6 +13,7 @@
 #ifndef LLVM_OBJECT_OBJECTFILE_H
 #define LLVM_OBJECT_OBJECTFILE_H
 
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/iterator_range.h"
@@ -98,7 +98,7 @@ public:
   uint64_t getAddress() const;
   uint64_t getIndex() const;
   uint64_t getSize() const;
-  std::error_code getContents(StringRef &Result) const;
+  Expected<StringRef> getContents() const;
 
   /// Get the alignment of this section as the actual value (not log 2).
   uint64_t getAlignment() const;
@@ -136,6 +136,30 @@ public:
   const ObjectFile *getObject() const;
 };
 
+struct SectionedAddress {
+  // TODO: constructors could be removed when C++14 would be adopted.
+  SectionedAddress() {}
+  SectionedAddress(uint64_t Addr, uint64_t SectIdx)
+      : Address(Addr), SectionIndex(SectIdx) {}
+
+  const static uint64_t UndefSection = UINT64_MAX;
+
+  uint64_t Address = 0;
+  uint64_t SectionIndex = UndefSection;
+};
+
+inline bool operator<(const SectionedAddress &LHS,
+                      const SectionedAddress &RHS) {
+  return std::tie(LHS.SectionIndex, LHS.Address) <
+         std::tie(RHS.SectionIndex, RHS.Address);
+}
+
+inline bool operator==(const SectionedAddress &LHS,
+                       const SectionedAddress &RHS) {
+  return std::tie(LHS.SectionIndex, LHS.Address) ==
+         std::tie(RHS.SectionIndex, RHS.Address);
+}
+
 /// This is a value type class that represents a single symbol in the list of
 /// symbols in the object file.
 class SymbolRef : public BasicSymbolRef {
@@ -220,7 +244,7 @@ protected:
   friend class SymbolRef;
 
   virtual Expected<StringRef> getSymbolName(DataRefImpl Symb) const = 0;
-  std::error_code printSymbolName(raw_ostream &OS,
+  Error printSymbolName(raw_ostream &OS,
                                   DataRefImpl Symb) const override;
   virtual Expected<uint64_t> getSymbolAddress(DataRefImpl Symb) const = 0;
   virtual uint64_t getSymbolValueImpl(DataRefImpl Symb) const = 0;
@@ -234,13 +258,12 @@ protected:
   friend class SectionRef;
 
   virtual void moveSectionNext(DataRefImpl &Sec) const = 0;
-  virtual std::error_code getSectionName(DataRefImpl Sec,
-                                         StringRef &Res) const = 0;
+  virtual Expected<StringRef> getSectionName(DataRefImpl Sec) const = 0;
   virtual uint64_t getSectionAddress(DataRefImpl Sec) const = 0;
   virtual uint64_t getSectionIndex(DataRefImpl Sec) const = 0;
   virtual uint64_t getSectionSize(DataRefImpl Sec) const = 0;
-  virtual std::error_code getSectionContents(DataRefImpl Sec,
-                                             StringRef &Res) const = 0;
+  virtual Expected<ArrayRef<uint8_t>>
+  getSectionContents(DataRefImpl Sec) const = 0;
   virtual uint64_t getSectionAlignment(DataRefImpl Sec) const = 0;
   virtual bool isSectionCompressed(DataRefImpl Sec) const = 0;
   virtual bool isSectionText(DataRefImpl Sec) const = 0;
@@ -308,11 +331,6 @@ public:
   /// Create a triple from the data in this object file.
   Triple makeTriple() const;
 
-  virtual std::error_code
-    getBuildAttributes(ARMAttributeParser &Attributes) const {
-      return std::error_code();
-    }
-
   /// Maps a debug section name to a standard DWARF section name.
   virtual StringRef mapDebugSectionName(StringRef Name) const { return Name; }
 
@@ -340,6 +358,9 @@ public:
   static Expected<std::unique_ptr<COFFObjectFile>>
   createCOFFObjectFile(MemoryBufferRef Object);
 
+  static Expected<std::unique_ptr<ObjectFile>>
+  createXCOFFObjectFile(MemoryBufferRef Object, unsigned FileType);
+
   static Expected<std::unique_ptr<ObjectFile>>
   createELFObjectFile(MemoryBufferRef Object);
 
@@ -396,14 +417,16 @@ inline SectionRef::SectionRef(DataRefImpl SectionP,
   , OwningObject(Owner) {}
 
 inline bool SectionRef::operator==(const SectionRef &Other) const {
-  return SectionPimpl == Other.SectionPimpl;
+  return OwningObject == Other.OwningObject &&
+         SectionPimpl == Other.SectionPimpl;
 }
 
 inline bool SectionRef::operator!=(const SectionRef &Other) const {
-  return SectionPimpl != Other.SectionPimpl;
+  return !(*this == Other);
 }
 
 inline bool SectionRef::operator<(const SectionRef &Other) const {
+  assert(OwningObject == Other.OwningObject);
   return SectionPimpl < Other.SectionPimpl;
 }
 
@@ -412,7 +435,11 @@ inline void SectionRef::moveNext() {
 }
 
 inline std::error_code SectionRef::getName(StringRef &Result) const {
-  return OwningObject->getSectionName(SectionPimpl, Result);
+  Expected<StringRef> NameOrErr = OwningObject->getSectionName(SectionPimpl);
+  if (!NameOrErr)
+    return errorToErrorCode(NameOrErr.takeError());
+  Result = *NameOrErr;
+  return std::error_code();
 }
 
 inline uint64_t SectionRef::getAddress() const {
@@ -427,8 +454,12 @@ inline uint64_t SectionRef::getSize() const {
   return OwningObject->getSectionSize(SectionPimpl);
 }
 
-inline std::error_code SectionRef::getContents(StringRef &Result) const {
-  return OwningObject->getSectionContents(SectionPimpl, Result);
+inline Expected<StringRef> SectionRef::getContents() const {
+  Expected<ArrayRef<uint8_t>> Res =
+      OwningObject->getSectionContents(SectionPimpl);
+  if (!Res)
+    return Res.takeError();
+  return StringRef(reinterpret_cast<const char *>(Res->data()), Res->size());
 }
 
 inline uint64_t SectionRef::getAlignment() const {
@@ -531,6 +562,25 @@ inline const ObjectFile *RelocationRef::getObject() const {
 
 } // end namespace object
 
+template <> struct DenseMapInfo<object::SectionRef> {
+  static bool isEqual(const object::SectionRef &A,
+                      const object::SectionRef &B) {
+    return A == B;
+  }
+  static object::SectionRef getEmptyKey() {
+    return object::SectionRef({}, nullptr);
+  }
+  static object::SectionRef getTombstoneKey() {
+    object::DataRefImpl TS;
+    TS.p = (uintptr_t)-1;
+    return object::SectionRef(TS, nullptr);
+  }
+  static unsigned getHashValue(const object::SectionRef &Sec) {
+    object::DataRefImpl Raw = Sec.getRawDataRefImpl();
+    return hash_combine(Raw.p, Raw.d.a, Raw.d.b);
+  }
+};
+
 } // end namespace llvm
 
 #endif // LLVM_OBJECT_OBJECTFILE_H
diff --git a/include/llvm/Object/RelocVisitor.h b/include/llvm/Object/RelocVisitor.h
deleted file mode 100644
index 9a978de2e599..000000000000
--- a/include/llvm/Object/RelocVisitor.h
+++ /dev/null
@@ -1,351 +0,0 @@
-//===- RelocVisitor.h - Visitor for object file relocations -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides a wrapper around all the different types of relocations
-// in different file formats, such that a client can handle them in a unified
-// manner by only implementing a minimal number of functions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_OBJECT_RELOCVISITOR_H
-#define LLVM_OBJECT_RELOCVISITOR_H
-
-#include "llvm/ADT/Triple.h"
-#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/BinaryFormat/MachO.h"
-#include "llvm/Object/COFF.h"
-#include "llvm/Object/ELFObjectFile.h"
-#include "llvm/Object/MachO.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Object/Wasm.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <cstdint>
-#include <system_error>
-
-namespace llvm {
-namespace object {
-
-/// Base class for object file relocation visitors.
-class RelocVisitor {
-public:
-  explicit RelocVisitor(const ObjectFile &Obj) : ObjToVisit(Obj) {}
-
-  // TODO: Should handle multiple applied relocations via either passing in the
-  // previously computed value or just count paired relocations as a single
-  // visit.
-  uint64_t visit(uint32_t Rel, RelocationRef R, uint64_t Value = 0) {
-    if (isa<ELFObjectFileBase>(ObjToVisit))
-      return visitELF(Rel, R, Value);
-    if (isa<COFFObjectFile>(ObjToVisit))
-      return visitCOFF(Rel, R, Value);
-    if (isa<MachOObjectFile>(ObjToVisit))
-      return visitMachO(Rel, R, Value);
-    if (isa<WasmObjectFile>(ObjToVisit))
-      return visitWasm(Rel, R, Value);
-
-    HasError = true;
-    return 0;
-  }
-
-  bool error() { return HasError; }
-
-private:
-  const ObjectFile &ObjToVisit;
-  bool HasError = false;
-
-  uint64_t visitELF(uint32_t Rel, RelocationRef R, uint64_t Value) {
-    if (ObjToVisit.getBytesInAddress() == 8) { // 64-bit object file
-      switch (ObjToVisit.getArch()) {
-      case Triple::x86_64:
-        return visitX86_64(Rel, R, Value);
-      case Triple::aarch64:
-      case Triple::aarch64_be:
-        return visitAarch64(Rel, R, Value);
-      case Triple::bpfel:
-      case Triple::bpfeb:
-        return visitBpf(Rel, R, Value);
-      case Triple::mips64el:
-      case Triple::mips64:
-        return visitMips64(Rel, R, Value);
-      case Triple::ppc64le:
-      case Triple::ppc64:
-        return visitPPC64(Rel, R, Value);
-      case Triple::systemz:
-        return visitSystemz(Rel, R, Value);
-      case Triple::sparcv9:
-        return visitSparc64(Rel, R, Value);
-      case Triple::amdgcn:
-        return visitAmdgpu(Rel, R, Value);
-      default:
-        HasError = true;
-        return 0;
-      }
-    }
-
-    // 32-bit object file
-    assert(ObjToVisit.getBytesInAddress() == 4 &&
-           "Invalid word size in object file");
-
-    switch (ObjToVisit.getArch()) {
-    case Triple::x86:
-      return visitX86(Rel, R, Value);
-    case Triple::ppc:
-      return visitPPC32(Rel, R, Value);
-    case Triple::arm:
-    case Triple::armeb:
-      return visitARM(Rel, R, Value);
-    case Triple::lanai:
-      return visitLanai(Rel, R, Value);
-    case Triple::mipsel:
-    case Triple::mips:
-      return visitMips32(Rel, R, Value);
-    case Triple::sparc:
-      return visitSparc32(Rel, R, Value);
-    case Triple::hexagon:
-      return visitHexagon(Rel, R, Value);
-    default:
-      HasError = true;
-      return 0;
-    }
-  }
-
-  int64_t getELFAddend(RelocationRef R) {
-    Expected<int64_t> AddendOrErr = ELFRelocationRef(R).getAddend();
-    handleAllErrors(AddendOrErr.takeError(), [](const ErrorInfoBase &EI) {
-      report_fatal_error(EI.message());
-    });
-    return *AddendOrErr;
-  }
-
-  uint64_t visitX86_64(uint32_t Rel, RelocationRef R, uint64_t Value) {
-    switch (Rel) {
-    case ELF::R_X86_64_NONE:
-      return 0;
-    case ELF::R_X86_64_64:
-    case ELF::R_X86_64_DTPOFF32:
-    case ELF::R_X86_64_DTPOFF64:
-      return Value + getELFAddend(R);
-    case ELF::R_X86_64_PC32:
-      return Value + getELFAddend(R) - R.getOffset();
-    case ELF::R_X86_64_32:
-    case ELF::R_X86_64_32S:
-      return (Value + getELFAddend(R)) & 0xFFFFFFFF;
-    }
-    HasError = true;
-    return 0;
-  }
-
-  uint64_t visitAarch64(uint32_t Rel, RelocationRef R, uint64_t Value) {
-    switch (Rel) {
-    case ELF::R_AARCH64_ABS32: {
-      int64_t Res = Value + getELFAddend(R);
-      if (Res < INT32_MIN || Res > UINT32_MAX)
-        HasError = true;
-      return static_cast<uint32_t>(Res);
-    }
-    case ELF::R_AARCH64_ABS64:
-      return Value + getELFAddend(R);
-    }
-    HasError = true;
-    return 0;
-  }
-
-  uint64_t visitBpf(uint32_t Rel, RelocationRef R, uint64_t Value) {
-    switch (Rel) {
-    case ELF::R_BPF_64_32:
-      return Value & 0xFFFFFFFF;
-    case ELF::R_BPF_64_64:
-      return Value;
-    }
-    HasError = true;
-    return 0;
-  }
-
-  uint64_t visitMips64(uint32_t Rel, RelocationRef R, uint64_t Value) {
-    switch (Rel) {
-    case ELF::R_MIPS_32:
-      return (Value + getELFAddend(R)) & 0xFFFFFFFF;
-    case ELF::R_MIPS_64:
-      return Value + getELFAddend(R);
-    case ELF::R_MIPS_TLS_DTPREL64:
-      return Value + getELFAddend(R) - 0x8000;
-    }
-    HasError = true;
-    return 0;
-  }
-
-  uint64_t visitPPC64(uint32_t Rel, RelocationRef R, uint64_t Value) {
-    switch (Rel) {
-    case ELF::R_PPC64_ADDR32:
-      return (Value + getELFAddend(R)) & 0xFFFFFFFF;
-    case ELF::R_PPC64_ADDR64:
-      return Value + getELFAddend(R);
-    }
-    HasError = true;
-    return 0;
-  }
-
-  uint64_t visitSystemz(uint32_t Rel, RelocationRef R, uint64_t Value) {
-    switch (Rel) {
-    case ELF::R_390_32: {
-      int64_t Res = Value + getELFAddend(R);
-      if (Res < INT32_MIN || Res > UINT32_MAX)
-        HasError = true;
-      return static_cast<uint32_t>(Res);
-    }
-    case ELF::R_390_64:
-      return Value + getELFAddend(R);
-    }
-    HasError = true;
-    return 0;
-  }
-
-  uint64_t visitSparc64(uint32_t Rel, RelocationRef R, uint64_t Value) {
-    switch (Rel) {
-    case ELF::R_SPARC_32:
-    case ELF::R_SPARC_64:
-    case ELF::R_SPARC_UA32:
-    case ELF::R_SPARC_UA64:
-      return Value + getELFAddend(R);
-    }
-    HasError = true;
-    return 0;
-  }
-
-  uint64_t visitAmdgpu(uint32_t Rel, RelocationRef R, uint64_t Value) {
-    switch (Rel) {
-    case ELF::R_AMDGPU_ABS32:
-    case ELF::R_AMDGPU_ABS64:
-      return Value + getELFAddend(R);
-    }
-    HasError = true;
-    return 0;
-  }
-
-  uint64_t visitX86(uint32_t Rel, RelocationRef R, uint64_t Value) {
-    switch (Rel) {
-    case ELF::R_386_NONE:
-      return 0;
-    case ELF::R_386_32:
-      return Value;
-    case ELF::R_386_PC32:
-      return Value - R.getOffset();
-    }
-    HasError = true;
-    return 0;
-  }
-
-  uint64_t visitPPC32(uint32_t Rel, RelocationRef R, uint64_t Value) {
-    if (Rel == ELF::R_PPC_ADDR32)
-      return (Value + getELFAddend(R)) & 0xFFFFFFFF;
-    HasError = true;
-    return 0;
-  }
-
-  uint64_t visitARM(uint32_t Rel, RelocationRef R, uint64_t Value) {
-    if (Rel == ELF::R_ARM_ABS32) {
-      if ((int64_t)Value < INT32_MIN || (int64_t)Value > UINT32_MAX)
-        HasError = true;
-      return static_cast<uint32_t>(Value);
-    }
-    HasError = true;
-    return 0;
-  }
-
-  uint64_t visitLanai(uint32_t Rel, RelocationRef R, uint64_t Value) {
-    if (Rel == ELF::R_LANAI_32)
-      return (Value + getELFAddend(R)) & 0xFFFFFFFF;
-    HasError = true;
-    return 0;
-  }
-
-  uint64_t visitMips32(uint32_t Rel, RelocationRef R, uint64_t Value) {
-    // FIXME: Take in account implicit addends to get correct results.
-    if (Rel == ELF::R_MIPS_32)
-      return Value & 0xFFFFFFFF;
-    if (Rel == ELF::R_MIPS_TLS_DTPREL32)
-      return Value & 0xFFFFFFFF;
-    HasError = true;
-    return 0;
-  }
-
-  uint64_t visitSparc32(uint32_t Rel, RelocationRef R, uint64_t Value) {
-    if (Rel == ELF::R_SPARC_32 || Rel == ELF::R_SPARC_UA32)
-      return Value + getELFAddend(R);
-    HasError = true;
-    return 0;
-  }
-
-  uint64_t visitHexagon(uint32_t Rel, RelocationRef R, uint64_t Value) {
-    if (Rel == ELF::R_HEX_32)
-      return Value + getELFAddend(R);
-    HasError = true;
-    return 0;
-  }
-
-  uint64_t visitCOFF(uint32_t Rel, RelocationRef R, uint64_t Value) {
-    switch (ObjToVisit.getArch()) {
-    case Triple::x86:
-      switch (Rel) {
-      case COFF::IMAGE_REL_I386_SECREL:
-      case COFF::IMAGE_REL_I386_DIR32:
-        return static_cast<uint32_t>(Value);
-      }
-      break;
-    case Triple::x86_64:
-      switch (Rel) {
-      case COFF::IMAGE_REL_AMD64_SECREL:
-        return static_cast<uint32_t>(Value);
-      case COFF::IMAGE_REL_AMD64_ADDR64:
-        return Value;
-      }
-      break;
-    default:
-      break;
-    }
-    HasError = true;
-    return 0;
-  }
-
-  uint64_t visitMachO(uint32_t Rel, RelocationRef R, uint64_t Value) {
-    if (ObjToVisit.getArch() == Triple::x86_64 &&
-        Rel == MachO::X86_64_RELOC_UNSIGNED)
-      return Value;
-    HasError = true;
-    return 0;
-  }
-
-  uint64_t visitWasm(uint32_t Rel, RelocationRef R, uint64_t Value) {
-    if (ObjToVisit.getArch() == Triple::wasm32) {
-      switch (Rel) {
-      case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
-      case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
-      case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
-      case wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB:
-      case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
-      case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32:
-      case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB:
-      case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
-      case wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
-      case wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32:
-      case wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB:
-        // For wasm section, its offset at 0 -- ignoring Value
-        return 0;
-      }
-    }
-    HasError = true;
-    return 0;
-  }
-};
-
-} // end namespace object
-} // end namespace llvm
-
-#endif // LLVM_OBJECT_RELOCVISITOR_H
diff --git a/include/llvm/Object/RelocationResolver.h b/include/llvm/Object/RelocationResolver.h
new file mode 100644
index 000000000000..1246dcc5ec73
--- /dev/null
+++ b/include/llvm/Object/RelocationResolver.h
@@ -0,0 +1,42 @@
+//===- RelocVisitor.h - Visitor for object file relocations -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides a wrapper around all the different types of relocations
+// in different file formats, such that a client can handle them in a unified
+// manner by only implementing a minimal number of functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_RELOCVISITOR_H
+#define LLVM_OBJECT_RELOCVISITOR_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/Wasm.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstdint>
+#include <system_error>
+
+namespace llvm {
+namespace object {
+
+using RelocationResolver = uint64_t (*)(RelocationRef R, uint64_t S, uint64_t A);
+
+std::pair<bool (*)(uint64_t), RelocationResolver>
+getRelocationResolver(const ObjectFile &Obj);
+
+} // end namespace object
+} // end namespace llvm
+
+#endif // LLVM_OBJECT_RELOCVISITOR_H
diff --git a/include/llvm/Object/StackMapParser.h b/include/llvm/Object/StackMapParser.h
index 557db5afa825..ed44efbf80b9 100644
--- a/include/llvm/Object/StackMapParser.h
+++ b/include/llvm/Object/StackMapParser.h
@@ -1,9 +1,8 @@
 //===- StackMapParser.h - StackMap Parsing Support --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -20,8 +19,9 @@
 
 namespace llvm {
 
+/// A parser for the latest stackmap format.  At the moment, latest=V2.
 template <support::endianness Endianness>
-class StackMapV2Parser {
+class StackMapParser {
 public:
   template <typename AccessorT>
   class AccessorIterator {
@@ -50,7 +50,7 @@ public:
 
   /// Accessor for function records.
   class FunctionAccessor {
-    friend class StackMapV2Parser;
+    friend class StackMapParser;
 
   public:
     /// Get the function address.
@@ -82,7 +82,7 @@ public:
 
   /// Accessor for constants.
   class ConstantAccessor {
-    friend class StackMapV2Parser;
+    friend class StackMapParser;
 
   public:
     /// Return the value of this constant.
@@ -106,7 +106,7 @@ public:
 
   /// Accessor for location records.
   class LocationAccessor {
-    friend class StackMapV2Parser;
+    friend class StackMapParser;
     friend class RecordAccessor;
 
   public:
@@ -115,6 +115,12 @@ public:
       return LocationKind(P[KindOffset]);
     }
 
+    /// Get the Size for this location.
+    unsigned getSizeInBytes() const {
+        return read<uint16_t>(P + SizeOffset);
+
+    }
+
     /// Get the Dwarf register number for this location.
     uint16_t getDwarfRegNum() const {
       return read<uint16_t>(P + DwarfRegNumOffset);
@@ -149,16 +155,17 @@ public:
     }
 
     static const int KindOffset = 0;
-    static const int DwarfRegNumOffset = KindOffset + sizeof(uint16_t);
-    static const int SmallConstantOffset = DwarfRegNumOffset + sizeof(uint16_t);
-    static const int LocationAccessorSize = sizeof(uint64_t);
+    static const int SizeOffset = KindOffset + sizeof(uint16_t);
+    static const int DwarfRegNumOffset = SizeOffset + sizeof(uint16_t);
+    static const int SmallConstantOffset = DwarfRegNumOffset + sizeof(uint32_t);
+    static const int LocationAccessorSize = sizeof(uint64_t) + sizeof(uint32_t);
 
     const uint8_t *P;
   };
 
   /// Accessor for stackmap live-out fields.
   class LiveOutAccessor {
-    friend class StackMapV2Parser;
+    friend class StackMapParser;
     friend class RecordAccessor;
 
   public:
@@ -189,7 +196,7 @@ public:
 
   /// Accessor for stackmap records.
   class RecordAccessor {
-    friend class StackMapV2Parser;
+    friend class StackMapParser;
 
   public:
     using location_iterator = AccessorIterator<LocationAccessor>;
@@ -264,8 +271,9 @@ public:
     RecordAccessor(const uint8_t *P) : P(P) {}
 
     unsigned getNumLiveOutsOffset() const {
-      return LocationListOffset + LocationSize * getNumLocations() +
-             sizeof(uint16_t);
+      unsigned LocOffset = 
+          ((LocationListOffset + LocationSize * getNumLocations()) + 7) & ~0x7; 
+      return LocOffset + sizeof(uint16_t);
     }
 
     unsigned getSizeInBytes() const {
@@ -285,7 +293,7 @@ public:
       InstructionOffsetOffset + sizeof(uint32_t) + sizeof(uint16_t);
     static const unsigned LocationListOffset =
       NumLocationsOffset + sizeof(uint16_t);
-    static const unsigned LocationSize = sizeof(uint64_t);
+    static const unsigned LocationSize = sizeof(uint64_t) + sizeof(uint32_t);
     static const unsigned LiveOutSize = sizeof(uint32_t);
 
     const uint8_t *P;
@@ -293,12 +301,12 @@ public:
 
   /// Construct a parser for a version-2 stackmap. StackMap data will be read
   /// from the given array.
-  StackMapV2Parser(ArrayRef<uint8_t> StackMapSection)
+  StackMapParser(ArrayRef<uint8_t> StackMapSection)
       : StackMapSection(StackMapSection) {
     ConstantsListOffset = FunctionListOffset + getNumFunctions() * FunctionSize;
 
-    assert(StackMapSection[0] == 2 &&
-           "StackMapV2Parser can only parse version 2 stackmaps");
+    assert(StackMapSection[0] == 3 &&
+           "StackMapParser can only parse version 3 stackmaps");
 
     unsigned CurrentRecordOffset =
       ConstantsListOffset + getNumConstants() * ConstantSize;
@@ -314,8 +322,8 @@ public:
   using constant_iterator = AccessorIterator<ConstantAccessor>;
   using record_iterator = AccessorIterator<RecordAccessor>;
 
-  /// Get the version number of this stackmap. (Always returns 2).
-  unsigned getVersion() const { return 2; }
+  /// Get the version number of this stackmap. (Always returns 3).
+  unsigned getVersion() const { return 3; }
 
   /// Get the number of functions in the stack map.
   uint32_t getNumFunctions() const {
diff --git a/include/llvm/Object/SymbolSize.h b/include/llvm/Object/SymbolSize.h
index 1a1dc8752943..085623e35907 100644
--- a/include/llvm/Object/SymbolSize.h
+++ b/include/llvm/Object/SymbolSize.h
@@ -1,9 +1,8 @@
 //===- SymbolSize.h ---------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/Object/SymbolicFile.h b/include/llvm/Object/SymbolicFile.h
index 5b9549bc3449..1398fa134c81 100644
--- a/include/llvm/Object/SymbolicFile.h
+++ b/include/llvm/Object/SymbolicFile.h
@@ -1,9 +1,8 @@
 //===- SymbolicFile.h - Interface that only provides symbols ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -127,7 +126,7 @@ public:
 
   void moveNext();
 
-  std::error_code printName(raw_ostream &OS) const;
+  Error printName(raw_ostream &OS) const;
 
   /// Get symbol flags (bitwise OR of SymbolRef::Flags)
   uint32_t getFlags() const;
@@ -146,8 +145,7 @@ public:
   // virtual interface.
   virtual void moveSymbolNext(DataRefImpl &Symb) const = 0;
 
-  virtual std::error_code printSymbolName(raw_ostream &OS,
-                                          DataRefImpl Symb) const = 0;
+  virtual Error printSymbolName(raw_ostream &OS, DataRefImpl Symb) const = 0;
 
   virtual uint32_t getSymbolFlags(DataRefImpl Symb) const = 0;
 
@@ -194,7 +192,7 @@ inline void BasicSymbolRef::moveNext() {
   return OwningObject->moveSymbolNext(SymbolPimpl);
 }
 
-inline std::error_code BasicSymbolRef::printName(raw_ostream &OS) const {
+inline Error BasicSymbolRef::printName(raw_ostream &OS) const {
   return OwningObject->printSymbolName(OS, SymbolPimpl);
 }
 
diff --git a/include/llvm/Object/Wasm.h b/include/llvm/Object/Wasm.h
index ed857652a048..e130ea32ed21 100644
--- a/include/llvm/Object/Wasm.h
+++ b/include/llvm/Object/Wasm.h
@@ -1,9 +1,8 @@
-//===- WasmObjectFile.h - Wasm object file implementation -------*- C++ -*-===//
+//===- Wasm.h - Wasm object file implementation -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -130,6 +129,10 @@ public:
   static bool classof(const Binary *v) { return v->isWasm(); }
 
   const wasm::WasmDylinkInfo &dylinkInfo() const { return DylinkInfo; }
+  const wasm::WasmProducerInfo &getProducerInfo() const { return ProducerInfo; }
+  ArrayRef<wasm::WasmFeatureEntry> getTargetFeatures() const {
+    return TargetFeatures;
+  }
   ArrayRef<wasm::WasmSignature> types() const { return Signatures; }
   ArrayRef<uint32_t> functionTypes() const { return FunctionTypes; }
   ArrayRef<wasm::WasmImport> imports() const { return Imports; }
@@ -149,7 +152,6 @@ public:
   uint32_t getNumImportedGlobals() const { return NumImportedGlobals; }
   uint32_t getNumImportedFunctions() const { return NumImportedFunctions; }
   uint32_t getNumImportedEvents() const { return NumImportedEvents; }
-
   void moveSymbolNext(DataRefImpl &Symb) const override;
 
   uint32_t getSymbolFlags(DataRefImpl Symb) const override;
@@ -169,13 +171,12 @@ public:
 
   // Overrides from SectionRef.
   void moveSectionNext(DataRefImpl &Sec) const override;
-  std::error_code getSectionName(DataRefImpl Sec,
-                                 StringRef &Res) const override;
+  Expected<StringRef> getSectionName(DataRefImpl Sec) const override;
   uint64_t getSectionAddress(DataRefImpl Sec) const override;
   uint64_t getSectionIndex(DataRefImpl Sec) const override;
   uint64_t getSectionSize(DataRefImpl Sec) const override;
-  std::error_code getSectionContents(DataRefImpl Sec,
-                                     StringRef &Res) const override;
+  Expected<ArrayRef<uint8_t>>
+  getSectionContents(DataRefImpl Sec) const override;
   uint64_t getSectionAlignment(DataRefImpl Sec) const override;
   bool isSectionCompressed(DataRefImpl Sec) const override;
   bool isSectionText(DataRefImpl Sec) const override;
@@ -222,13 +223,13 @@ private:
   bool isValidDataSymbol(uint32_t Index) const;
   bool isValidSectionSymbol(uint32_t Index) const;
   wasm::WasmFunction &getDefinedFunction(uint32_t Index);
+  const wasm::WasmFunction &getDefinedFunction(uint32_t Index) const;
   wasm::WasmGlobal &getDefinedGlobal(uint32_t Index);
   wasm::WasmEvent &getDefinedEvent(uint32_t Index);
 
   const WasmSection &getWasmSection(DataRefImpl Ref) const;
   const wasm::WasmRelocation &getWasmRelocation(DataRefImpl Ref) const;
 
-  const uint8_t *getPtr(size_t Offset) const;
   Error parseSection(WasmSection &Sec);
   Error parseCustomSection(WasmSection &Sec, ReadContext &Ctx);
 
@@ -245,6 +246,7 @@ private:
   Error parseElemSection(ReadContext &Ctx);
   Error parseCodeSection(ReadContext &Ctx);
   Error parseDataSection(ReadContext &Ctx);
+  Error parseDataCountSection(ReadContext &Ctx);
 
   // Custom section types
   Error parseDylinkSection(ReadContext &Ctx);
@@ -252,11 +254,15 @@ private:
   Error parseLinkingSection(ReadContext &Ctx);
   Error parseLinkingSectionSymtab(ReadContext &Ctx);
   Error parseLinkingSectionComdat(ReadContext &Ctx);
+  Error parseProducersSection(ReadContext &Ctx);
+  Error parseTargetFeaturesSection(ReadContext &Ctx);
   Error parseRelocSection(StringRef Name, ReadContext &Ctx);
 
   wasm::WasmObjectHeader Header;
   std::vector<WasmSection> Sections;
   wasm::WasmDylinkInfo DylinkInfo;
+  wasm::WasmProducerInfo ProducerInfo;
+  std::vector<wasm::WasmFeatureEntry> TargetFeatures;
   std::vector<wasm::WasmSignature> Signatures;
   std::vector<uint32_t> FunctionTypes;
   std::vector<wasm::WasmTable> Tables;
@@ -267,6 +273,7 @@ private:
   std::vector<wasm::WasmExport> Exports;
   std::vector<wasm::WasmElemSegment> ElemSegments;
   std::vector<WasmSegment> DataSegments;
+  llvm::Optional<size_t> DataCount;
   std::vector<wasm::WasmFunction> Functions;
   std::vector<WasmSymbol> Symbols;
   std::vector<wasm::WasmFunctionName> DebugNames;
@@ -287,40 +294,51 @@ class WasmSectionOrderChecker {
 public:
   // We define orders for all core wasm sections and known custom sections.
   enum : int {
+    // Sentinel, must be zero
+    WASM_SEC_ORDER_NONE = 0,
+
     // Core sections
-    // The order of standard sections is precisely given by the spec.
-    WASM_SEC_ORDER_TYPE = 1,
-    WASM_SEC_ORDER_IMPORT = 2,
-    WASM_SEC_ORDER_FUNCTION = 3,
-    WASM_SEC_ORDER_TABLE = 4,
-    WASM_SEC_ORDER_MEMORY = 5,
-    WASM_SEC_ORDER_GLOBAL = 6,
-    WASM_SEC_ORDER_EVENT = 7,
-    WASM_SEC_ORDER_EXPORT = 8,
-    WASM_SEC_ORDER_START = 9,
-    WASM_SEC_ORDER_ELEM = 10,
-    WASM_SEC_ORDER_DATACOUNT = 11,
-    WASM_SEC_ORDER_CODE = 12,
-    WASM_SEC_ORDER_DATA = 13,
+    WASM_SEC_ORDER_TYPE,
+    WASM_SEC_ORDER_IMPORT,
+    WASM_SEC_ORDER_FUNCTION,
+    WASM_SEC_ORDER_TABLE,
+    WASM_SEC_ORDER_MEMORY,
+    WASM_SEC_ORDER_GLOBAL,
+    WASM_SEC_ORDER_EVENT,
+    WASM_SEC_ORDER_EXPORT,
+    WASM_SEC_ORDER_START,
+    WASM_SEC_ORDER_ELEM,
+    WASM_SEC_ORDER_DATACOUNT,
+    WASM_SEC_ORDER_CODE,
+    WASM_SEC_ORDER_DATA,
 
     // Custom sections
     // "dylink" should be the very first section in the module
-    WASM_SEC_ORDER_DYLINK = 0,
+    WASM_SEC_ORDER_DYLINK,
     // "linking" section requires DATA section in order to validate data symbols
-    WASM_SEC_ORDER_LINKING = 100,
+    WASM_SEC_ORDER_LINKING,
     // Must come after "linking" section in order to validate reloc indexes.
-    WASM_SEC_ORDER_RELOC = 101,
+    WASM_SEC_ORDER_RELOC,
     // "name" section must appear after DATA. Comes after "linking" to allow
     // symbol table to set default function name.
-    WASM_SEC_ORDER_NAME = 102,
+    WASM_SEC_ORDER_NAME,
     // "producers" section must appear after "name" section.
-    WASM_SEC_ORDER_PRODUCERS = 103
+    WASM_SEC_ORDER_PRODUCERS,
+    // "target_features" section must appear after producers section
+    WASM_SEC_ORDER_TARGET_FEATURES,
+
+    // Must be last
+    WASM_NUM_SEC_ORDERS
+
   };
 
+  // Sections that may or may not be present, but cannot be predecessors
+  static int DisallowedPredecessors[WASM_NUM_SEC_ORDERS][WASM_NUM_SEC_ORDERS];
+
   bool isValidSectionOrder(unsigned ID, StringRef CustomSectionName = "");
 
 private:
-  int LastOrder = -1; // Lastly seen known section's order
+  bool Seen[WASM_NUM_SEC_ORDERS] = {}; // Sections that have been seen already
 
   // Returns -1 for unknown sections.
   int getSectionOrder(unsigned ID, StringRef CustomSectionName = "");
diff --git a/include/llvm/Object/WasmTraits.h b/include/llvm/Object/WasmTraits.h
index 049d72f79e41..3eee8e71b187 100644
--- a/include/llvm/Object/WasmTraits.h
+++ b/include/llvm/Object/WasmTraits.h
@@ -1,9 +1,8 @@
 //===- WasmTraits.h - DenseMap traits for the Wasm structures ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Object/WindowsMachineFlag.h b/include/llvm/Object/WindowsMachineFlag.h
new file mode 100644
index 000000000000..acc6afc0329c
--- /dev/null
+++ b/include/llvm/Object/WindowsMachineFlag.h
@@ -0,0 +1,33 @@
+//===- WindowsMachineFlag.h -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Functions for implementing the /machine: flag.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLDRIVERS_MACHINEFLAG_MACHINEFLAG_H
+#define LLVM_TOOLDRIVERS_MACHINEFLAG_MACHINEFLAG_H
+
+namespace llvm {
+
+class StringRef;
+namespace COFF {
+enum MachineTypes : unsigned;
+}
+
+// Returns a user-readable string for ARMNT, ARM64, AMD64, I386.
+// Other MachineTypes values must not be passed in.
+StringRef machineToStr(COFF::MachineTypes MT);
+
+// Maps /machine: arguments to a MachineTypes value.
+// Only returns ARMNT, ARM64, AMD64, I386, or IMAGE_FILE_MACHINE_UNKNOWN.
+COFF::MachineTypes getMachineType(StringRef S);
+
+}
+
+#endif
diff --git a/include/llvm/Object/WindowsResource.h b/include/llvm/Object/WindowsResource.h
index a077c82871bf..356dcb03abba 100644
--- a/include/llvm/Object/WindowsResource.h
+++ b/include/llvm/Object/WindowsResource.h
@@ -1,9 +1,8 @@
 //===-- WindowsResource.h ---------------------------------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===---------------------------------------------------------------------===//
 //
@@ -38,11 +37,14 @@
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
-#include "llvm/Support/ScopedPrinter.h"
 
 #include <map>
 
 namespace llvm {
+
+class raw_ostream;
+class ScopedPrinter;
+
 namespace object {
 
 class WindowsResource;
@@ -118,6 +120,7 @@ private:
                                            const WindowsResource *Owner);
 
   BinaryStreamReader Reader;
+  const WindowsResource *Owner;
   bool IsStringType;
   ArrayRef<UTF16> Type;
   uint16_t TypeID;
@@ -149,7 +152,7 @@ class WindowsResourceParser {
 public:
   class TreeNode;
   WindowsResourceParser();
-  Error parse(WindowsResource *WR);
+  Error parse(WindowsResource *WR, std::vector<std::string> &Duplicates);
   void printTree(raw_ostream &OS) const;
   const TreeNode &getTree() const { return Root; }
   const ArrayRef<std::vector<uint8_t>> getData() const { return Data; }
@@ -185,21 +188,25 @@ public:
     static std::unique_ptr<TreeNode> createIDNode();
     static std::unique_ptr<TreeNode> createDataNode(uint16_t MajorVersion,
                                                     uint16_t MinorVersion,
-                                                    uint32_t Characteristics);
+                                                    uint32_t Characteristics,
+                                                    uint32_t Origin);
 
     explicit TreeNode(bool IsStringNode);
     TreeNode(uint16_t MajorVersion, uint16_t MinorVersion,
-             uint32_t Characteristics);
+             uint32_t Characteristics, uint32_t Origin);
 
-    void addEntry(const ResourceEntryRef &Entry, bool &IsNewTypeString,
-                  bool &IsNewNameString);
+    bool addEntry(const ResourceEntryRef &Entry, uint32_t Origin,
+                  bool &IsNewTypeString, bool &IsNewNameString,
+                  TreeNode *&Result);
     TreeNode &addTypeNode(const ResourceEntryRef &Entry, bool &IsNewTypeString);
     TreeNode &addNameNode(const ResourceEntryRef &Entry, bool &IsNewNameString);
-    TreeNode &addLanguageNode(const ResourceEntryRef &Entry);
-    TreeNode &addChild(uint32_t ID, bool IsDataNode = false,
-                       uint16_t MajorVersion = 0, uint16_t MinorVersion = 0,
-                       uint32_t Characteristics = 0);
-    TreeNode &addChild(ArrayRef<UTF16> NameRef, bool &IsNewString);
+    bool addLanguageNode(const ResourceEntryRef &Entry, uint32_t Origin,
+                         TreeNode *&Result);
+    bool addDataChild(uint32_t ID, uint16_t MajorVersion, uint16_t MinorVersion,
+                      uint32_t Characteristics, uint32_t Origin,
+                      TreeNode *&Result);
+    TreeNode &addIDChild(uint32_t ID);
+    TreeNode &addNameChild(ArrayRef<UTF16> NameRef, bool &IsNewString);
 
     bool IsDataNode = false;
     uint32_t StringIndex;
@@ -209,18 +216,26 @@ public:
     uint16_t MajorVersion = 0;
     uint16_t MinorVersion = 0;
     uint32_t Characteristics = 0;
+
+    // The .res file that defined this TreeNode, for diagnostics.
+    // Index into InputFilenames.
+    uint32_t Origin;
   };
 
 private:
   TreeNode Root;
   std::vector<std::vector<uint8_t>> Data;
   std::vector<std::vector<UTF16>> StringTable;
+
+  std::vector<std::string> InputFilenames;
 };
 
 Expected<std::unique_ptr<MemoryBuffer>>
 writeWindowsResourceCOFF(llvm::COFF::MachineTypes MachineType,
-                         const WindowsResourceParser &Parser);
+                         const WindowsResourceParser &Parser,
+                         uint32_t TimeDateStamp);
 
+void printResourceTypeName(uint16_t TypeID, raw_ostream &OS);
 } // namespace object
 } // namespace llvm
 
diff --git a/include/llvm/Object/XCOFFObjectFile.h b/include/llvm/Object/XCOFFObjectFile.h
new file mode 100644
index 000000000000..cdee7129a2ab
--- /dev/null
+++ b/include/llvm/Object/XCOFFObjectFile.h
@@ -0,0 +1,268 @@
+//===- XCOFFObjectFile.h - XCOFF object file implementation -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the XCOFFObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_XCOFFOBJECTFILE_H
+#define LLVM_OBJECT_XCOFFOBJECTFILE_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/BinaryFormat/XCOFF.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/SymbolicFile.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <system_error>
+
+namespace llvm {
+namespace object {
+
+struct XCOFFFileHeader32 {
+  support::ubig16_t Magic;
+  support::ubig16_t NumberOfSections;
+
+  // Unix time value, value of 0 indicates no timestamp.
+  // Negative values are reserved.
+  support::big32_t TimeStamp;
+
+  support::ubig32_t SymbolTableOffset; // File offset to symbol table.
+  support::big32_t NumberOfSymTableEntries;
+  support::ubig16_t AuxHeaderSize;
+  support::ubig16_t Flags;
+};
+
+struct XCOFFFileHeader64 {
+  support::ubig16_t Magic;
+  support::ubig16_t NumberOfSections;
+
+  // Unix time value, value of 0 indicates no timestamp.
+  // Negative values are reserved.
+  support::big32_t TimeStamp;
+
+  support::ubig64_t SymbolTableOffset; // File offset to symbol table.
+  support::ubig16_t AuxHeaderSize;
+  support::ubig16_t Flags;
+  support::ubig32_t NumberOfSymTableEntries;
+};
+
+struct XCOFFSectionHeader32 {
+  char Name[XCOFF::SectionNameSize];
+  support::ubig32_t PhysicalAddress;
+  support::ubig32_t VirtualAddress;
+  support::ubig32_t SectionSize;
+  support::ubig32_t FileOffsetToRawData;
+  support::ubig32_t FileOffsetToRelocationInfo;
+  support::ubig32_t FileOffsetToLineNumberInfo;
+  support::ubig16_t NumberOfRelocations;
+  support::ubig16_t NumberOfLineNumbers;
+  support::big32_t Flags;
+
+  StringRef getName() const;
+};
+
+struct XCOFFSectionHeader64 {
+  char Name[XCOFF::SectionNameSize];
+  support::ubig64_t PhysicalAddress;
+  support::ubig64_t VirtualAddress;
+  support::ubig64_t SectionSize;
+  support::big64_t FileOffsetToRawData;
+  support::big64_t FileOffsetToRelocationInfo;
+  support::big64_t FileOffsetToLineNumberInfo;
+  support::ubig32_t NumberOfRelocations;
+  support::ubig32_t NumberOfLineNumbers;
+  support::big32_t Flags;
+  char Padding[4];
+
+  StringRef getName() const;
+};
+
+struct XCOFFSymbolEntry {
+  enum { NAME_IN_STR_TBL_MAGIC = 0x0 };
+  typedef struct {
+    support::big32_t Magic; // Zero indicates name in string table.
+    support::ubig32_t Offset;
+  } NameInStrTblType;
+
+  typedef struct {
+    uint8_t LanguageId;
+    uint8_t CpuTypeId;
+  } CFileLanguageIdAndTypeIdType;
+
+  union {
+    char SymbolName[XCOFF::SymbolNameSize];
+    NameInStrTblType NameInStrTbl;
+  };
+
+  support::ubig32_t Value; // Symbol value; storage class-dependent.
+  support::big16_t SectionNumber;
+
+  union {
+    support::ubig16_t SymbolType;
+    CFileLanguageIdAndTypeIdType CFileLanguageIdAndTypeId;
+  };
+
+  XCOFF::StorageClass StorageClass;
+  uint8_t NumberOfAuxEntries;
+};
+
+struct XCOFFStringTable {
+  uint32_t Size;
+  const char *Data;
+};
+
+class XCOFFObjectFile : public ObjectFile {
+private:
+  const void *FileHeader = nullptr;
+  const void *SectionHeaderTable = nullptr;
+
+  const XCOFFSymbolEntry *SymbolTblPtr = nullptr;
+  XCOFFStringTable StringTable = {0, nullptr};
+
+  const XCOFFFileHeader32 *fileHeader32() const;
+  const XCOFFFileHeader64 *fileHeader64() const;
+
+  const XCOFFSectionHeader32 *sectionHeaderTable32() const;
+  const XCOFFSectionHeader64 *sectionHeaderTable64() const;
+
+  size_t getFileHeaderSize() const;
+  size_t getSectionHeaderSize() const;
+
+  const XCOFFSectionHeader32 *toSection32(DataRefImpl Ref) const;
+  const XCOFFSectionHeader64 *toSection64(DataRefImpl Ref) const;
+  void checkSectionAddress(uintptr_t Addr, uintptr_t TableAddr) const;
+  uintptr_t getSectionHeaderTableAddress() const;
+
+  // This returns a pointer to the start of the storage for the name field of
+  // the 32-bit or 64-bit SectionHeader struct. This string is *not* necessarily
+  // null-terminated.
+  const char *getSectionNameInternal(DataRefImpl Sec) const;
+
+  int32_t getSectionFlags(DataRefImpl Sec) const;
+
+  static bool isReservedSectionNumber(int16_t SectionNumber);
+  Expected<DataRefImpl> getSectionByNum(int16_t Num) const;
+
+  // Constructor and "create" factory function. The constructor is only a thin
+  // wrapper around the base constructor. The "create" function fills out the
+  // XCOFF-specific information and performs the error checking along the way.
+  XCOFFObjectFile(unsigned Type, MemoryBufferRef Object);
+  static Expected<std::unique_ptr<XCOFFObjectFile>> create(unsigned Type,
+                                                           MemoryBufferRef MBR);
+
+  // Helper for parsing the StringTable. Returns an 'Error' if parsing failed
+  // and an XCOFFStringTable if parsing succeeded.
+  static Expected<XCOFFStringTable> parseStringTable(const XCOFFObjectFile *Obj,
+                                                     uint64_t Offset);
+
+  // Make a friend so it can call the private 'create' function.
+  friend Expected<std::unique_ptr<ObjectFile>>
+  ObjectFile::createXCOFFObjectFile(MemoryBufferRef Object, unsigned FileType);
+
+public:
+  // Interface inherited from base classes.
+  void moveSymbolNext(DataRefImpl &Symb) const override;
+  uint32_t getSymbolFlags(DataRefImpl Symb) const override;
+  basic_symbol_iterator symbol_begin() const override;
+  basic_symbol_iterator symbol_end() const override;
+
+  Expected<StringRef> getSymbolName(DataRefImpl Symb) const override;
+  Expected<uint64_t> getSymbolAddress(DataRefImpl Symb) const override;
+  uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
+  uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
+  Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override;
+  Expected<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
+
+  void moveSectionNext(DataRefImpl &Sec) const override;
+  Expected<StringRef> getSectionName(DataRefImpl Sec) const override;
+  uint64_t getSectionAddress(DataRefImpl Sec) const override;
+  uint64_t getSectionIndex(DataRefImpl Sec) const override;
+  uint64_t getSectionSize(DataRefImpl Sec) const override;
+  Expected<ArrayRef<uint8_t>>
+  getSectionContents(DataRefImpl Sec) const override;
+  uint64_t getSectionAlignment(DataRefImpl Sec) const override;
+  bool isSectionCompressed(DataRefImpl Sec) const override;
+  bool isSectionText(DataRefImpl Sec) const override;
+  bool isSectionData(DataRefImpl Sec) const override;
+  bool isSectionBSS(DataRefImpl Sec) const override;
+
+  bool isSectionVirtual(DataRefImpl Sec) const override;
+  relocation_iterator section_rel_begin(DataRefImpl Sec) const override;
+  relocation_iterator section_rel_end(DataRefImpl Sec) const override;
+
+  void moveRelocationNext(DataRefImpl &Rel) const override;
+  uint64_t getRelocationOffset(DataRefImpl Rel) const override;
+  symbol_iterator getRelocationSymbol(DataRefImpl Rel) const override;
+  uint64_t getRelocationType(DataRefImpl Rel) const override;
+  void getRelocationTypeName(DataRefImpl Rel,
+                             SmallVectorImpl<char> &Result) const override;
+
+  section_iterator section_begin() const override;
+  section_iterator section_end() const override;
+  uint8_t getBytesInAddress() const override;
+  StringRef getFileFormatName() const override;
+  Triple::ArchType getArch() const override;
+  SubtargetFeatures getFeatures() const override;
+  Expected<uint64_t> getStartAddress() const override;
+  bool isRelocatableObject() const override;
+
+  // Below here is the non-inherited interface.
+  bool is64Bit() const;
+
+  const XCOFFSymbolEntry *getPointerToSymbolTable() const {
+    assert(!is64Bit() && "Symbol table handling not supported yet.");
+    return SymbolTblPtr;
+  }
+
+  Expected<StringRef>
+  getSymbolSectionName(const XCOFFSymbolEntry *SymEntPtr) const;
+
+  const XCOFFSymbolEntry *toSymbolEntry(DataRefImpl Ref) const;
+
+  // File header related interfaces.
+  uint16_t getMagic() const;
+  uint16_t getNumberOfSections() const;
+  int32_t getTimeStamp() const;
+
+  // Symbol table offset and entry count are handled differently between
+  // XCOFF32 and XCOFF64.
+  uint32_t getSymbolTableOffset32() const;
+  uint64_t getSymbolTableOffset64() const;
+
+  // Note that this value is signed and might return a negative value. Negative
+  // values are reserved for future use.
+  int32_t getRawNumberOfSymbolTableEntries32() const;
+
+  // The sanitized value appropriate to use as an index into the symbol table.
+  uint32_t getLogicalNumberOfSymbolTableEntries32() const;
+
+  uint32_t getNumberOfSymbolTableEntries64() const;
+
+  uint16_t getOptionalHeaderSize() const;
+  uint16_t getFlags() const;
+
+  // Section header table related interfaces.
+  ArrayRef<XCOFFSectionHeader32> sections32() const;
+  ArrayRef<XCOFFSectionHeader64> sections64() const;
+}; // XCOFFObjectFile
+
+} // namespace object
+} // namespace llvm
+
+#endif // LLVM_OBJECT_XCOFFOBJECTFILE_H
diff --git a/include/llvm/ObjectYAML/COFFYAML.h b/include/llvm/ObjectYAML/COFFYAML.h
index 253c627dd683..eec5af928f6d 100644
--- a/include/llvm/ObjectYAML/COFFYAML.h
+++ b/include/llvm/ObjectYAML/COFFYAML.h
@@ -1,9 +1,8 @@
 //===- COFFYAML.h - COFF YAMLIO implementation ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h b/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h
index d620008e22d2..9cbacb88b518 100644
--- a/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h
+++ b/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h
@@ -1,9 +1,8 @@
 //=- CodeViewYAMLDebugSections.h - CodeView YAMLIO debug sections -*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h b/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h
index 791193c78f19..7c05c9eea05e 100644
--- a/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h
+++ b/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h
@@ -1,9 +1,8 @@
 //===- CodeViewYAMLSymbols.h - CodeView YAMLIO Symbol implementation ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ObjectYAML/CodeViewYAMLTypeHashing.h b/include/llvm/ObjectYAML/CodeViewYAMLTypeHashing.h
index 344966fe6891..d6cec8d310eb 100644
--- a/include/llvm/ObjectYAML/CodeViewYAMLTypeHashing.h
+++ b/include/llvm/ObjectYAML/CodeViewYAMLTypeHashing.h
@@ -1,9 +1,8 @@
 //==- CodeViewYAMLTypeHashing.h - CodeView YAMLIO Type hashing ----*- C++-*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ObjectYAML/CodeViewYAMLTypes.h b/include/llvm/ObjectYAML/CodeViewYAMLTypes.h
index 1b1306df4f53..04b5e0ba3aa1 100644
--- a/include/llvm/ObjectYAML/CodeViewYAMLTypes.h
+++ b/include/llvm/ObjectYAML/CodeViewYAMLTypes.h
@@ -1,9 +1,8 @@
 //==- CodeViewYAMLTypes.h - CodeView YAMLIO Type implementation --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ObjectYAML/DWARFEmitter.h b/include/llvm/ObjectYAML/DWARFEmitter.h
index ce3227421930..2ccc876d5023 100644
--- a/include/llvm/ObjectYAML/DWARFEmitter.h
+++ b/include/llvm/ObjectYAML/DWARFEmitter.h
@@ -1,9 +1,8 @@
 //===--- DWARFEmitter.h - ---------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/ObjectYAML/DWARFYAML.h b/include/llvm/ObjectYAML/DWARFYAML.h
index 705c88778945..78d736c3ef05 100644
--- a/include/llvm/ObjectYAML/DWARFYAML.h
+++ b/include/llvm/ObjectYAML/DWARFYAML.h
@@ -1,9 +1,8 @@
 //===- DWARFYAML.h - DWARF YAMLIO implementation ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/include/llvm/ObjectYAML/ELFYAML.h b/include/llvm/ObjectYAML/ELFYAML.h
index f2b0c35521f0..f4212516f486 100644
--- a/include/llvm/ObjectYAML/ELFYAML.h
+++ b/include/llvm/ObjectYAML/ELFYAML.h
@@ -1,9 +1,8 @@
 //===- ELFYAML.h - ELF YAMLIO implementation --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -44,6 +43,8 @@ LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_ELFDATA)
 LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_ELFOSABI)
 // Just use 64, since it can hold 32-bit values too.
 LLVM_YAML_STRONG_TYPEDEF(uint64_t, ELF_EF)
+// Just use 64, since it can hold 32-bit values too.
+LLVM_YAML_STRONG_TYPEDEF(uint64_t, ELF_DYNTAG)
 LLVM_YAML_STRONG_TYPEDEF(uint32_t, ELF_PF)
 LLVM_YAML_STRONG_TYPEDEF(uint32_t, ELF_SHT)
 LLVM_YAML_STRONG_TYPEDEF(uint32_t, ELF_REL)
@@ -51,6 +52,7 @@ LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_RSS)
 // Just use 64, since it can hold 32-bit values too.
 LLVM_YAML_STRONG_TYPEDEF(uint64_t, ELF_SHF)
 LLVM_YAML_STRONG_TYPEDEF(uint16_t, ELF_SHN)
+LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_STB)
 LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_STT)
 LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_STV)
 LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_STO)
@@ -73,6 +75,11 @@ struct FileHeader {
   ELF_EM Machine;
   ELF_EF Flags;
   llvm::yaml::Hex64 Entry;
+
+  Optional<llvm::yaml::Hex16> SHEntSize;
+  Optional<llvm::yaml::Hex16> SHOffset;
+  Optional<llvm::yaml::Hex16> SHNum;
+  Optional<llvm::yaml::Hex16> SHStrNdx;
 };
 
 struct SectionName {
@@ -85,53 +92,81 @@ struct ProgramHeader {
   llvm::yaml::Hex64 VAddr;
   llvm::yaml::Hex64 PAddr;
   Optional<llvm::yaml::Hex64> Align;
+  Optional<llvm::yaml::Hex64> FileSize;
+  Optional<llvm::yaml::Hex64> MemSize;
+  Optional<llvm::yaml::Hex64> Offset;
   std::vector<SectionName> Sections;
 };
 
 struct Symbol {
   StringRef Name;
+  Optional<uint32_t> NameIndex;
   ELF_STT Type;
   StringRef Section;
   Optional<ELF_SHN> Index;
+  ELF_STB Binding;
   llvm::yaml::Hex64 Value;
   llvm::yaml::Hex64 Size;
   uint8_t Other;
 };
 
-struct LocalGlobalWeakSymbols {
-  std::vector<Symbol> Local;
-  std::vector<Symbol> Global;
-  std::vector<Symbol> Weak;
-};
-
 struct SectionOrType {
   StringRef sectionNameOrType;
 };
 
+struct DynamicEntry {
+  ELF_DYNTAG Tag;
+  llvm::yaml::Hex64 Val;
+};
+
 struct Section {
   enum class SectionKind {
+    Dynamic,
     Group,
     RawContent,
     Relocation,
     NoBits,
+    Verdef,
+    Verneed,
+    Symver,
     MipsABIFlags
   };
   SectionKind Kind;
   StringRef Name;
   ELF_SHT Type;
-  ELF_SHF Flags;
+  Optional<ELF_SHF> Flags;
   llvm::yaml::Hex64 Address;
   StringRef Link;
-  StringRef Info;
   llvm::yaml::Hex64 AddressAlign;
   Optional<llvm::yaml::Hex64> EntSize;
 
+  // This can be used to override the sh_offset field. It does not place the
+  // section data at the offset specified. Useful for creating invalid objects.
+  Optional<llvm::yaml::Hex64> ShOffset;
+
+  // This can be used to override the sh_size field. It does not affect the
+  // content written.
+  Optional<llvm::yaml::Hex64> ShSize;
+
   Section(SectionKind Kind) : Kind(Kind) {}
   virtual ~Section();
 };
+
+struct DynamicSection : Section {
+  std::vector<DynamicEntry> Entries;
+  Optional<yaml::BinaryRef> Content;
+
+  DynamicSection() : Section(SectionKind::Dynamic) {}
+
+  static bool classof(const Section *S) {
+    return S->Kind == SectionKind::Dynamic;
+  }
+};
+
 struct RawContentSection : Section {
-  yaml::BinaryRef Content;
-  llvm::yaml::Hex64 Size;
+  Optional<yaml::BinaryRef> Content;
+  Optional<llvm::yaml::Hex64> Size;
+  Optional<llvm::yaml::Hex64> Info;
 
   RawContentSection() : Section(SectionKind::RawContent) {}
 
@@ -150,10 +185,64 @@ struct NoBitsSection : Section {
   }
 };
 
+struct VernauxEntry {
+  uint32_t Hash;
+  uint16_t Flags;
+  uint16_t Other;
+  StringRef Name;
+};
+
+struct VerneedEntry {
+  uint16_t Version;
+  StringRef File;
+  std::vector<VernauxEntry> AuxV;
+};
+
+struct VerneedSection : Section {
+  std::vector<VerneedEntry> VerneedV;
+  llvm::yaml::Hex64 Info;
+
+  VerneedSection() : Section(SectionKind::Verneed) {}
+
+  static bool classof(const Section *S) {
+    return S->Kind == SectionKind::Verneed;
+  }
+};
+
+struct SymverSection : Section {
+  std::vector<uint16_t> Entries;
+
+  SymverSection() : Section(SectionKind::Symver) {}
+
+  static bool classof(const Section *S) {
+    return S->Kind == SectionKind::Symver;
+  }
+};
+
+struct VerdefEntry {
+  uint16_t Version;
+  uint16_t Flags;
+  uint16_t VersionNdx;
+  uint32_t Hash;
+  std::vector<StringRef> VerNames;
+};
+
+struct VerdefSection : Section {
+  std::vector<VerdefEntry> Entries;
+  llvm::yaml::Hex64 Info;
+
+  VerdefSection() : Section(SectionKind::Verdef) {}
+
+  static bool classof(const Section *S) {
+    return S->Kind == SectionKind::Verdef;
+  }
+};
+
 struct Group : Section {
   // Members of a group contain a flag and a list of section indices
   // that are part of the group.
   std::vector<SectionOrType> Members;
+  StringRef Signature; /* Info */
 
   Group() : Section(SectionKind::Group) {}
 
@@ -171,6 +260,7 @@ struct Relocation {
 
 struct RelocationSection : Section {
   std::vector<Relocation> Relocations;
+  StringRef RelocatableSec; /* Info */
 
   RelocationSection() : Section(SectionKind::Relocation) {}
 
@@ -208,16 +298,20 @@ struct Object {
   // cleaner and nicer if we read them from the YAML as a separate
   // top-level key, which automatically ensures that invariants like there
   // being a single SHT_SYMTAB section are upheld.
-  LocalGlobalWeakSymbols Symbols;
-  LocalGlobalWeakSymbols DynamicSymbols;
+  std::vector<Symbol> Symbols;
+  std::vector<Symbol> DynamicSymbols;
 };
 
 } // end namespace ELFYAML
 } // end namespace llvm
 
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::DynamicEntry)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::ProgramHeader)
 LLVM_YAML_IS_SEQUENCE_VECTOR(std::unique_ptr<llvm::ELFYAML::Section>)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::Symbol)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::VerdefEntry)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::VernauxEntry)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::VerneedEntry)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::Relocation)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::SectionOrType)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::SectionName)
@@ -277,6 +371,10 @@ template <> struct ScalarEnumerationTraits<ELFYAML::ELF_SHN> {
   static void enumeration(IO &IO, ELFYAML::ELF_SHN &Value);
 };
 
+template <> struct ScalarEnumerationTraits<ELFYAML::ELF_STB> {
+  static void enumeration(IO &IO, ELFYAML::ELF_STB &Value);
+};
+
 template <>
 struct ScalarEnumerationTraits<ELFYAML::ELF_STT> {
   static void enumeration(IO &IO, ELFYAML::ELF_STT &Value);
@@ -297,6 +395,11 @@ struct ScalarEnumerationTraits<ELFYAML::ELF_REL> {
   static void enumeration(IO &IO, ELFYAML::ELF_REL &Value);
 };
 
+template <>
+struct ScalarEnumerationTraits<ELFYAML::ELF_DYNTAG> {
+  static void enumeration(IO &IO, ELFYAML::ELF_DYNTAG &Value);
+};
+
 template <>
 struct ScalarEnumerationTraits<ELFYAML::ELF_RSS> {
   static void enumeration(IO &IO, ELFYAML::ELF_RSS &Value);
@@ -347,9 +450,20 @@ struct MappingTraits<ELFYAML::Symbol> {
   static StringRef validate(IO &IO, ELFYAML::Symbol &Symbol);
 };
 
-template <>
-struct MappingTraits<ELFYAML::LocalGlobalWeakSymbols> {
-  static void mapping(IO &IO, ELFYAML::LocalGlobalWeakSymbols &Symbols);
+template <> struct MappingTraits<ELFYAML::DynamicEntry> {
+  static void mapping(IO &IO, ELFYAML::DynamicEntry &Rel);
+};
+
+template <> struct MappingTraits<ELFYAML::VerdefEntry> {
+  static void mapping(IO &IO, ELFYAML::VerdefEntry &E);
+};
+
+template <> struct MappingTraits<ELFYAML::VerneedEntry> {
+  static void mapping(IO &IO, ELFYAML::VerneedEntry &E);
+};
+
+template <> struct MappingTraits<ELFYAML::VernauxEntry> {
+  static void mapping(IO &IO, ELFYAML::VernauxEntry &E);
 };
 
 template <> struct MappingTraits<ELFYAML::Relocation> {
diff --git a/include/llvm/ObjectYAML/MachOYAML.h b/include/llvm/ObjectYAML/MachOYAML.h
index cec4f86185f0..d7e1c033f43b 100644
--- a/include/llvm/ObjectYAML/MachOYAML.h
+++ b/include/llvm/ObjectYAML/MachOYAML.h
@@ -1,9 +1,8 @@
 //===- MachOYAML.h - Mach-O YAMLIO implementation ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/include/llvm/ObjectYAML/MinidumpYAML.h b/include/llvm/ObjectYAML/MinidumpYAML.h
new file mode 100644
index 000000000000..39fdd62e017b
--- /dev/null
+++ b/include/llvm/ObjectYAML/MinidumpYAML.h
@@ -0,0 +1,239 @@
+//===- MinidumpYAML.h - Minidump YAMLIO implementation ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECTYAML_MINIDUMPYAML_H
+#define LLVM_OBJECTYAML_MINIDUMPYAML_H
+
+#include "llvm/BinaryFormat/Minidump.h"
+#include "llvm/Object/Minidump.h"
+#include "llvm/ObjectYAML/YAML.h"
+#include "llvm/Support/YAMLTraits.h"
+
+namespace llvm {
+namespace MinidumpYAML {
+
+/// The base class for all minidump streams. The "Type" of the stream
+/// corresponds to the Stream Type field in the minidump file. The "Kind" field
+/// specifies how are we going to treat it. For highly specialized streams (e.g.
+/// SystemInfo), there is a 1:1 mapping between Types and Kinds, but in general
+/// one stream Kind can be used to represent multiple stream Types (e.g. any
+/// unrecognised stream Type will be handled via RawContentStream). The mapping
+/// from Types to Kinds is fixed and given by the static getKind function.
+struct Stream {
+  enum class StreamKind {
+    MemoryList,
+    ModuleList,
+    RawContent,
+    SystemInfo,
+    TextContent,
+    ThreadList,
+  };
+
+  Stream(StreamKind Kind, minidump::StreamType Type) : Kind(Kind), Type(Type) {}
+  virtual ~Stream(); // anchor
+
+  const StreamKind Kind;
+  const minidump::StreamType Type;
+
+  /// Get the stream Kind used for representing streams of a given Type.
+  static StreamKind getKind(minidump::StreamType Type);
+
+  /// Create an empty stream of the given Type.
+  static std::unique_ptr<Stream> create(minidump::StreamType Type);
+
+  /// Create a stream from the given stream directory entry.
+  static Expected<std::unique_ptr<Stream>>
+  create(const minidump::Directory &StreamDesc,
+         const object::MinidumpFile &File);
+};
+
+namespace detail {
+/// A stream representing a list of abstract entries in a minidump stream. Its
+/// instantiations can be used to represent the ModuleList stream and other
+/// streams with a similar structure.
+template <typename EntryT> struct ListStream : public Stream {
+  using entry_type = EntryT;
+
+  std::vector<entry_type> Entries;
+
+  explicit ListStream(std::vector<entry_type> Entries = {})
+      : Stream(EntryT::Kind, EntryT::Type), Entries(std::move(Entries)) {}
+
+  static bool classof(const Stream *S) { return S->Kind == EntryT::Kind; }
+};
+
+/// A structure containing all data belonging to a single minidump module.
+struct ParsedModule {
+  static constexpr Stream::StreamKind Kind = Stream::StreamKind::ModuleList;
+  static constexpr minidump::StreamType Type = minidump::StreamType::ModuleList;
+
+  minidump::Module Entry;
+  std::string Name;
+  yaml::BinaryRef CvRecord;
+  yaml::BinaryRef MiscRecord;
+};
+
+/// A structure containing all data belonging to a single minidump thread.
+struct ParsedThread {
+  static constexpr Stream::StreamKind Kind = Stream::StreamKind::ThreadList;
+  static constexpr minidump::StreamType Type = minidump::StreamType::ThreadList;
+
+  minidump::Thread Entry;
+  yaml::BinaryRef Stack;
+  yaml::BinaryRef Context;
+};
+
+/// A structure containing all data describing a single memory region.
+struct ParsedMemoryDescriptor {
+  static constexpr Stream::StreamKind Kind = Stream::StreamKind::MemoryList;
+  static constexpr minidump::StreamType Type = minidump::StreamType::MemoryList;
+
+  minidump::MemoryDescriptor Entry;
+  yaml::BinaryRef Content;
+};
+} // namespace detail
+
+using ModuleListStream = detail::ListStream<detail::ParsedModule>;
+using ThreadListStream = detail::ListStream<detail::ParsedThread>;
+using MemoryListStream = detail::ListStream<detail::ParsedMemoryDescriptor>;
+
+/// A minidump stream represented as a sequence of hex bytes. This is used as a
+/// fallback when no other stream kind is suitable.
+struct RawContentStream : public Stream {
+  yaml::BinaryRef Content;
+  yaml::Hex32 Size;
+
+  RawContentStream(minidump::StreamType Type, ArrayRef<uint8_t> Content = {})
+      : Stream(StreamKind::RawContent, Type), Content(Content),
+        Size(Content.size()) {}
+
+  static bool classof(const Stream *S) {
+    return S->Kind == StreamKind::RawContent;
+  }
+};
+
+/// SystemInfo minidump stream.
+struct SystemInfoStream : public Stream {
+  minidump::SystemInfo Info;
+  std::string CSDVersion;
+
+  explicit SystemInfoStream(const minidump::SystemInfo &Info,
+                            std::string CSDVersion)
+      : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo),
+        Info(Info), CSDVersion(std::move(CSDVersion)) {}
+
+  SystemInfoStream()
+      : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo) {
+    memset(&Info, 0, sizeof(Info));
+  }
+
+  static bool classof(const Stream *S) {
+    return S->Kind == StreamKind::SystemInfo;
+  }
+};
+
+/// A StringRef, which is printed using YAML block notation.
+LLVM_YAML_STRONG_TYPEDEF(StringRef, BlockStringRef)
+
+/// A minidump stream containing textual data (typically, the contents of a
+/// /proc/<pid> file on linux).
+struct TextContentStream : public Stream {
+  BlockStringRef Text;
+
+  TextContentStream(minidump::StreamType Type, StringRef Text = {})
+      : Stream(StreamKind::TextContent, Type), Text(Text) {}
+
+  static bool classof(const Stream *S) {
+    return S->Kind == StreamKind::TextContent;
+  }
+};
+
+/// The top level structure representing a minidump object, consisting of a
+/// minidump header, and zero or more streams. To construct an Object from a
+/// minidump file, use the static create function. To serialize to/from yaml,
+/// use the appropriate streaming operator on a yaml stream.
+struct Object {
+  Object() = default;
+  Object(const Object &) = delete;
+  Object &operator=(const Object &) = delete;
+  Object(Object &&) = default;
+  Object &operator=(Object &&) = default;
+
+  Object(const minidump::Header &Header,
+         std::vector<std::unique_ptr<Stream>> Streams)
+      : Header(Header), Streams(std::move(Streams)) {}
+
+  /// The minidump header.
+  minidump::Header Header;
+
+  /// The list of streams in this minidump object.
+  std::vector<std::unique_ptr<Stream>> Streams;
+
+  static Expected<Object> create(const object::MinidumpFile &File);
+};
+
+/// Serialize the minidump file represented by Obj to OS in binary form.
+void writeAsBinary(Object &Obj, raw_ostream &OS);
+
+/// Serialize the yaml string as a minidump file to OS in binary form.
+Error writeAsBinary(StringRef Yaml, raw_ostream &OS);
+
+} // namespace MinidumpYAML
+
+namespace yaml {
+template <> struct BlockScalarTraits<MinidumpYAML::BlockStringRef> {
+  static void output(const MinidumpYAML::BlockStringRef &Text, void *,
+                     raw_ostream &OS) {
+    OS << Text;
+  }
+
+  static StringRef input(StringRef Scalar, void *,
+                         MinidumpYAML::BlockStringRef &Text) {
+    Text = Scalar;
+    return "";
+  }
+};
+
+template <> struct MappingTraits<std::unique_ptr<MinidumpYAML::Stream>> {
+  static void mapping(IO &IO, std::unique_ptr<MinidumpYAML::Stream> &S);
+  static StringRef validate(IO &IO, std::unique_ptr<MinidumpYAML::Stream> &S);
+};
+
+template <> struct MappingContextTraits<minidump::MemoryDescriptor, BinaryRef> {
+  static void mapping(IO &IO, minidump::MemoryDescriptor &Memory,
+                      BinaryRef &Content);
+};
+
+} // namespace yaml
+
+} // namespace llvm
+
+LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::ProcessorArchitecture)
+LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::OSPlatform)
+LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::StreamType)
+
+LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::ArmInfo)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::OtherInfo)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::X86Info)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::VSFixedFileInfo)
+
+LLVM_YAML_DECLARE_MAPPING_TRAITS(
+    llvm::MinidumpYAML::MemoryListStream::entry_type)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(
+    llvm::MinidumpYAML::ModuleListStream::entry_type)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(
+    llvm::MinidumpYAML::ThreadListStream::entry_type)
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(std::unique_ptr<llvm::MinidumpYAML::Stream>)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::MemoryListStream::entry_type)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::ModuleListStream::entry_type)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::ThreadListStream::entry_type)
+
+LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::MinidumpYAML::Object)
+
+#endif // LLVM_OBJECTYAML_MINIDUMPYAML_H
diff --git a/include/llvm/ObjectYAML/ObjectYAML.h b/include/llvm/ObjectYAML/ObjectYAML.h
index 00ce86430fca..0015fd3dc501 100644
--- a/include/llvm/ObjectYAML/ObjectYAML.h
+++ b/include/llvm/ObjectYAML/ObjectYAML.h
@@ -1,9 +1,8 @@
 //===- ObjectYAML.h ---------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -13,6 +12,7 @@
 #include "llvm/ObjectYAML/COFFYAML.h"
 #include "llvm/ObjectYAML/ELFYAML.h"
 #include "llvm/ObjectYAML/MachOYAML.h"
+#include "llvm/ObjectYAML/MinidumpYAML.h"
 #include "llvm/ObjectYAML/WasmYAML.h"
 #include "llvm/Support/YAMLTraits.h"
 #include <memory>
@@ -27,6 +27,7 @@ struct YamlObjectFile {
   std::unique_ptr<COFFYAML::Object> Coff;
   std::unique_ptr<MachOYAML::Object> MachO;
   std::unique_ptr<MachOYAML::UniversalBinary> FatMachO;
+  std::unique_ptr<MinidumpYAML::Object> Minidump;
   std::unique_ptr<WasmYAML::Object> Wasm;
 };
 
diff --git a/include/llvm/ObjectYAML/WasmYAML.h b/include/llvm/ObjectYAML/WasmYAML.h
index 406dd7cb515f..2411dc7ac17d 100644
--- a/include/llvm/ObjectYAML/WasmYAML.h
+++ b/include/llvm/ObjectYAML/WasmYAML.h
@@ -1,9 +1,8 @@
 //===- WasmYAML.h - Wasm YAMLIO implementation ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -39,6 +38,7 @@ LLVM_YAML_STRONG_TYPEDEF(uint32_t, SymbolKind)
 LLVM_YAML_STRONG_TYPEDEF(uint32_t, SegmentFlags)
 LLVM_YAML_STRONG_TYPEDEF(uint32_t, LimitFlags)
 LLVM_YAML_STRONG_TYPEDEF(uint32_t, ComdatKind)
+LLVM_YAML_STRONG_TYPEDEF(uint32_t, FeaturePolicyPrefix)
 
 struct FileHeader {
   yaml::Hex32 Version;
@@ -112,8 +112,9 @@ struct Relocation {
 };
 
 struct DataSegment {
-  uint32_t MemoryIndex;
   uint32_t SectionOffset;
+  uint32_t InitFlags;
+  uint32_t MemoryIndex;
   wasm::WasmInitExpr Offset;
   yaml::BinaryRef Content;
 };
@@ -123,6 +124,16 @@ struct NameEntry {
   StringRef Name;
 };
 
+struct ProducerEntry {
+  std::string Name;
+  std::string Version;
+};
+
+struct FeatureEntry {
+  FeaturePolicyPrefix Prefix;
+  std::string Name;
+};
+
 struct SegmentInfo {
   uint32_t Index;
   StringRef Name;
@@ -224,6 +235,30 @@ struct LinkingSection : CustomSection {
   std::vector<Comdat> Comdats;
 };
 
+struct ProducersSection : CustomSection {
+  ProducersSection() : CustomSection("producers") {}
+
+  static bool classof(const Section *S) {
+    auto C = dyn_cast<CustomSection>(S);
+    return C && C->Name == "producers";
+  }
+
+  std::vector<ProducerEntry> Languages;
+  std::vector<ProducerEntry> Tools;
+  std::vector<ProducerEntry> SDKs;
+};
+
+struct TargetFeaturesSection : CustomSection {
+  TargetFeaturesSection() : CustomSection("target_features") {}
+
+  static bool classof(const Section *S) {
+    auto C = dyn_cast<CustomSection>(S);
+    return C && C->Name == "target_features";
+  }
+
+  std::vector<FeatureEntry> Features;
+};
+
 struct TypeSection : Section {
   TypeSection() : Section(wasm::WASM_SEC_TYPE) {}
 
@@ -344,6 +379,16 @@ struct DataSection : Section {
   std::vector<DataSegment> Segments;
 };
 
+struct DataCountSection : Section {
+  DataCountSection() : Section(wasm::WASM_SEC_DATACOUNT) {}
+
+  static bool classof(const Section *S) {
+    return S->Type == wasm::WASM_SEC_DATACOUNT;
+  }
+
+  uint32_t Count;
+};
+
 struct Object {
   FileHeader Header;
   std::vector<std::unique_ptr<Section>> Sections;
@@ -366,6 +411,8 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Function)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::LocalDecl)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Relocation)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::NameEntry)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::ProducerEntry)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::FeatureEntry)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::SegmentInfo)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::SymbolInfo)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::InitFunction)
@@ -444,6 +491,18 @@ template <> struct MappingTraits<WasmYAML::NameEntry> {
   static void mapping(IO &IO, WasmYAML::NameEntry &NameEntry);
 };
 
+template <> struct MappingTraits<WasmYAML::ProducerEntry> {
+  static void mapping(IO &IO, WasmYAML::ProducerEntry &ProducerEntry);
+};
+
+template <> struct ScalarEnumerationTraits<WasmYAML::FeaturePolicyPrefix> {
+  static void enumeration(IO &IO, WasmYAML::FeaturePolicyPrefix &Prefix);
+};
+
+template <> struct MappingTraits<WasmYAML::FeatureEntry> {
+  static void mapping(IO &IO, WasmYAML::FeatureEntry &FeatureEntry);
+};
+
 template <> struct MappingTraits<WasmYAML::SegmentInfo> {
   static void mapping(IO &IO, WasmYAML::SegmentInfo &SegmentInfo);
 };
diff --git a/include/llvm/ObjectYAML/XCOFFYAML.h b/include/llvm/ObjectYAML/XCOFFYAML.h
new file mode 100644
index 000000000000..f99004e69762
--- /dev/null
+++ b/include/llvm/ObjectYAML/XCOFFYAML.h
@@ -0,0 +1,71 @@
+//===----- XCOFFYAML.h - XCOFF YAMLIO implementation ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares classes for handling the YAML representation of XCOFF.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_OBJECTYAML_XCOFFYAML_H
+#define LLVM_OBJECTYAML_XCOFFYAML_H
+
+#include "llvm/BinaryFormat/XCOFF.h"
+#include "llvm/ObjectYAML/YAML.h"
+#include <vector>
+
+namespace llvm {
+namespace XCOFFYAML {
+
+struct FileHeader {
+  llvm::yaml::Hex16 Magic;
+  uint16_t NumberOfSections;
+  int32_t TimeStamp;
+  llvm::yaml::Hex32 SymbolTableOffset; // File offset to symbol table.
+  int32_t NumberOfSymTableEntries;
+  uint16_t AuxHeaderSize;
+  llvm::yaml::Hex16 Flags;
+};
+
+struct Symbol {
+  StringRef SymbolName;
+  llvm::yaml::Hex32 Value; // Symbol value; storage class-dependent.
+  StringRef SectionName;
+  llvm::yaml::Hex16 Type;
+  XCOFF::StorageClass StorageClass;
+  uint8_t NumberOfAuxEntries; // Number of auxiliary entries
+};
+
+struct Object {
+  FileHeader Header;
+  std::vector<Symbol> Symbols;
+  Object();
+};
+} // namespace XCOFFYAML
+} // namespace llvm
+LLVM_YAML_IS_SEQUENCE_VECTOR(XCOFFYAML::Symbol)
+namespace llvm {
+namespace yaml {
+
+template <> struct ScalarEnumerationTraits<XCOFF::StorageClass> {
+  static void enumeration(IO &IO, XCOFF::StorageClass &Value);
+};
+
+template <> struct MappingTraits<XCOFFYAML::FileHeader> {
+  static void mapping(IO &IO, XCOFFYAML::FileHeader &H);
+};
+
+template <> struct MappingTraits<XCOFFYAML::Object> {
+  static void mapping(IO &IO, XCOFFYAML::Object &Obj);
+};
+
+template <> struct MappingTraits<XCOFFYAML::Symbol> {
+  static void mapping(IO &IO, XCOFFYAML::Symbol &S);
+};
+
+} // namespace yaml
+} // namespace llvm
+
+#endif // LLVM_OBJECTYAML_XCOFFYAML_H
diff --git a/include/llvm/ObjectYAML/YAML.h b/include/llvm/ObjectYAML/YAML.h
index 163cd8dfcf08..37014109a615 100644
--- a/include/llvm/ObjectYAML/YAML.h
+++ b/include/llvm/ObjectYAML/YAML.h
@@ -1,9 +1,8 @@
 //===- YAML.h ---------------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -74,8 +73,7 @@ class BinaryRef {
 public:
   BinaryRef() = default;
   BinaryRef(ArrayRef<uint8_t> Data) : Data(Data), DataIsHexString(false) {}
-  BinaryRef(StringRef Data)
-      : Data(reinterpret_cast<const uint8_t *>(Data.data()), Data.size()) {}
+  BinaryRef(StringRef Data) : Data(arrayRefFromStringRef(Data)) {}
 
   /// The number of bytes that are represented by this BinaryRef.
   /// This is the number of bytes that writeAsBinary() will write.
diff --git a/include/llvm/Option/Arg.h b/include/llvm/Option/Arg.h
index d0086bb6d611..22e2bcf06a6e 100644
--- a/include/llvm/Option/Arg.h
+++ b/include/llvm/Option/Arg.h
@@ -1,9 +1,8 @@
 //===- Arg.h - Parsed Argument Classes --------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -59,6 +58,11 @@ private:
   /// The argument values, as C strings.
   SmallVector<const char *, 2> Values;
 
+  /// If this arg was created through an alias, this is the original alias arg.
+  /// For example, *this might be "-finput-charset=utf-8" and Alias might
+  /// point to an arg representing "/source-charset:utf-8".
+  std::unique_ptr<Arg> Alias;
+
 public:
   Arg(const Option Opt, StringRef Spelling, unsigned Index,
       const Arg *BaseArg = nullptr);
@@ -71,7 +75,15 @@ public:
   ~Arg();
 
   const Option &getOption() const { return Opt; }
+
+  /// Returns the used prefix and name of the option:
+  /// For `--foo=bar`, returns `--foo=`.
+  /// This is often the wrong function to call:
+  /// * Use `getValue()` to get `bar`.
+  /// * Use `getAsString()` to get a string suitable for printing an Arg in
+  ///   a diagnostic.
   StringRef getSpelling() const { return Spelling; }
+
   unsigned getIndex() const { return Index; }
 
   /// Return the base argument which generated this arg.
@@ -83,6 +95,11 @@ public:
   }
   void setBaseArg(const Arg *BaseArg) { this->BaseArg = BaseArg; }
 
+  /// Args are converted to their unaliased form.  For args that originally
+  /// came from an alias, this returns the alias the arg was produced from.
+  const Arg* getAlias() const { return Alias.get(); }
+  void setAlias(std::unique_ptr<Arg> Alias) { this->Alias = std::move(Alias); }
+
   bool getOwnsValues() const { return OwnsValues; }
   void setOwnsValues(bool Value) const { OwnsValues = Value; }
 
@@ -120,8 +137,10 @@ public:
   void print(raw_ostream &O) const;
   void dump() const;
 
-  /// Return a formatted version of the argument and
-  /// its values, for debugging and diagnostics.
+  /// Return a formatted version of the argument and its values, for
+  /// diagnostics. Since this is for diagnostics, if this Arg was produced
+  /// through an alias, this returns the string representation of the alias
+  /// that the user wrote.
   std::string getAsString(const ArgList &Args) const;
 };
 
diff --git a/include/llvm/Option/ArgList.h b/include/llvm/Option/ArgList.h
index 687c8cbb02f9..74bfadcba726 100644
--- a/include/llvm/Option/ArgList.h
+++ b/include/llvm/Option/ArgList.h
@@ -1,9 +1,8 @@
 //===- ArgList.h - Argument List Management ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -302,10 +301,12 @@ public:
   bool hasFlag(OptSpecifier Pos, OptSpecifier PosAlias, OptSpecifier Neg,
                bool Default = true) const;
 
-  /// AddLastArg - Render only the last argument match \p Id0, if present.
-  void AddLastArg(ArgStringList &Output, OptSpecifier Id0) const;
-  void AddLastArg(ArgStringList &Output, OptSpecifier Id0,
-                  OptSpecifier Id1) const;
+  /// Render only the last argument match \p Id0, if present.
+  template<typename ...OptSpecifiers>
+  void AddLastArg(ArgStringList &Output, OptSpecifiers ...Ids) const {
+    if (Arg *A = getLastArg(Ids...)) // Calls claim() on all Ids's Args.
+      A->render(*this, Output);
+  }
 
   /// AddAllArgsExcept - Render all arguments matching any of the given ids
   /// and not matching any of the excluded ids.
diff --git a/include/llvm/Option/OptParser.td b/include/llvm/Option/OptParser.td
index 9c373741770b..a68f17a8b10b 100644
--- a/include/llvm/Option/OptParser.td
+++ b/include/llvm/Option/OptParser.td
@@ -1,9 +1,8 @@
 //===--- OptParser.td - Common Option Parsing Interfaces ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Option/OptSpecifier.h b/include/llvm/Option/OptSpecifier.h
index 84c3cf8ad534..7a5fcfb18b38 100644
--- a/include/llvm/Option/OptSpecifier.h
+++ b/include/llvm/Option/OptSpecifier.h
@@ -1,9 +1,8 @@
 //===- OptSpecifier.h - Option Specifiers -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Option/OptTable.h b/include/llvm/Option/OptTable.h
index fdb05d8a15af..5db30436069d 100644
--- a/include/llvm/Option/OptTable.h
+++ b/include/llvm/Option/OptTable.h
@@ -1,9 +1,8 @@
 //===- OptTable.h - Option Table --------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Option/Option.h b/include/llvm/Option/Option.h
index b09f6043b7a9..33813d28d274 100644
--- a/include/llvm/Option/Option.h
+++ b/include/llvm/Option/Option.h
@@ -1,9 +1,8 @@
 //===- Option.h - Abstract Driver Options -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -207,6 +206,11 @@ public:
   ///                start.
   Arg *accept(const ArgList &Args, unsigned &Index, unsigned ArgSize) const;
 
+private:
+  Arg *acceptInternal(const ArgList &Args, unsigned &Index,
+                      unsigned ArgSize) const;
+
+public:
   void print(raw_ostream &O) const;
   void dump() const;
 };
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index 5935a0853d32..329f7eaba73d 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -1,9 +1,8 @@
 //===- llvm/Pass.h - Base class for Passes ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/PassAnalysisSupport.h b/include/llvm/PassAnalysisSupport.h
index a075eb557472..1228534deb95 100644
--- a/include/llvm/PassAnalysisSupport.h
+++ b/include/llvm/PassAnalysisSupport.h
@@ -1,9 +1,8 @@
 //===- llvm/PassAnalysisSupport.h - Analysis Pass Support code --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/PassInfo.h b/include/llvm/PassInfo.h
index 2f1ab4d43377..686fc044ebcb 100644
--- a/include/llvm/PassInfo.h
+++ b/include/llvm/PassInfo.h
@@ -1,9 +1,8 @@
 //===- llvm/PassInfo.h - Pass Info class ------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/PassRegistry.h b/include/llvm/PassRegistry.h
index 57462138c5ae..b9a015430c10 100644
--- a/include/llvm/PassRegistry.h
+++ b/include/llvm/PassRegistry.h
@@ -1,9 +1,8 @@
 //===- llvm/PassRegistry.h - Pass Information Registry ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/PassSupport.h b/include/llvm/PassSupport.h
index 1bf23dcba50b..ab90217ce4a8 100644
--- a/include/llvm/PassSupport.h
+++ b/include/llvm/PassSupport.h
@@ -1,9 +1,8 @@
 //===- llvm/PassSupport.h - Pass Support code -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Passes/PassBuilder.h b/include/llvm/Passes/PassBuilder.h
index fa59345a02cf..5e6660599f93 100644
--- a/include/llvm/Passes/PassBuilder.h
+++ b/include/llvm/Passes/PassBuilder.h
@@ -1,9 +1,8 @@
 //===- Parsing, selection, and construction of pass pipelines --*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -32,36 +31,85 @@ class ModuleSummaryIndex;
 
 /// A struct capturing PGO tunables.
 struct PGOOptions {
-  PGOOptions(std::string ProfileGenFile = "", std::string ProfileUseFile = "",
-             std::string SampleProfileFile = "",
-             std::string ProfileRemappingFile = "",
-             bool RunProfileGen = false, bool SamplePGOSupport = false)
-      : ProfileGenFile(ProfileGenFile), ProfileUseFile(ProfileUseFile),
-        SampleProfileFile(SampleProfileFile),
-        ProfileRemappingFile(ProfileRemappingFile),
-        RunProfileGen(RunProfileGen),
-        SamplePGOSupport(SamplePGOSupport || !SampleProfileFile.empty()) {
-    assert((RunProfileGen ||
-            !SampleProfileFile.empty() ||
-            !ProfileUseFile.empty() ||
-            SamplePGOSupport) && "Illegal PGOOptions.");
+  enum PGOAction { NoAction, IRInstr, IRUse, SampleUse };
+  enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse };
+  PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "",
+             std::string ProfileRemappingFile = "", PGOAction Action = NoAction,
+             CSPGOAction CSAction = NoCSAction, bool SamplePGOSupport = false)
+      : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
+        ProfileRemappingFile(ProfileRemappingFile), Action(Action),
+        CSAction(CSAction),
+        SamplePGOSupport(SamplePGOSupport || Action == SampleUse) {
+    // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can
+    // callback with IRUse action without ProfileFile.
+
+    // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse.
+    assert(this->CSAction == NoCSAction ||
+           (this->Action != IRInstr && this->Action != SampleUse));
+
+    // For CSIRInstr, CSProfileGenFile also needs to be nonempty.
+    assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty());
+
+    // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share
+    // a profile.
+    assert(this->CSAction != CSIRUse || this->Action == IRUse);
+
+    // If neither Action nor CSAction, SamplePGOSupport needs to be true.
+    assert(this->Action != NoAction || this->CSAction != NoCSAction ||
+           this->SamplePGOSupport);
   }
-  std::string ProfileGenFile;
-  std::string ProfileUseFile;
-  std::string SampleProfileFile;
+  std::string ProfileFile;
+  std::string CSProfileGenFile;
   std::string ProfileRemappingFile;
-  bool RunProfileGen;
+  PGOAction Action;
+  CSPGOAction CSAction;
   bool SamplePGOSupport;
 };
 
+/// Tunable parameters for passes in the default pipelines.
+class PipelineTuningOptions {
+public:
+  /// Constructor sets pipeline tuning defaults based on cl::opts. Each option
+  /// can be set in the PassBuilder when using a LLVM as a library.
+  PipelineTuningOptions();
+
+  /// Tuning option to set loop interleaving on/off. Its default value is that
+  /// of the flag: `-interleave-loops`.
+  bool LoopInterleaving;
+
+  /// Tuning option to enable/disable loop vectorization. Its default value is
+  /// that of the flag: `-vectorize-loops`.
+  bool LoopVectorization;
+
+  /// Tuning option to enable/disable slp loop vectorization. Its default value
+  /// is that of the flag: `vectorize-slp`.
+  bool SLPVectorization;
+
+  /// Tuning option to enable/disable loop unrolling. Its default value is true.
+  bool LoopUnrolling;
+
+  /// Tuning option to forget all SCEV loops in LoopUnroll. Its default value
+  /// is that of the flag: `-forget-scev-loop-unroll`.
+  bool ForgetAllSCEVInLoopUnroll;
+
+  /// Tuning option to cap the number of calls to retrive clobbering accesses in
+  /// MemorySSA, in LICM.
+  unsigned LicmMssaOptCap;
+
+  /// Tuning option to disable promotion to scalars in LICM with MemorySSA, if
+  /// the number of access is too large.
+  unsigned LicmMssaNoAccForPromotionCap;
+};
+
 /// This class provides access to building LLVM's passes.
 ///
-/// It's members provide the baseline state available to passes during their
+/// Its members provide the baseline state available to passes during their
 /// construction. The \c PassRegistry.def file specifies how to construct all
 /// of the built-in passes, and those may reference these members during
 /// construction.
 class PassBuilder {
   TargetMachine *TM;
+  PipelineTuningOptions PTO;
   Optional<PGOOptions> PGOOpt;
   PassInstrumentationCallbacks *PIC;
 
@@ -85,9 +133,9 @@ public:
   enum class ThinLTOPhase {
     /// No ThinLTO behavior needed.
     None,
-    // ThinLTO prelink (summary) phase.
+    /// ThinLTO prelink (summary) phase.
     PreLink,
-    // ThinLTO postlink (backend compile) phase.
+    /// ThinLTO postlink (backend compile) phase.
     PostLink
   };
 
@@ -178,14 +226,15 @@ public:
   };
 
   explicit PassBuilder(TargetMachine *TM = nullptr,
+                       PipelineTuningOptions PTO = PipelineTuningOptions(),
                        Optional<PGOOptions> PGOOpt = None,
                        PassInstrumentationCallbacks *PIC = nullptr)
-      : TM(TM), PGOOpt(PGOOpt), PIC(PIC) {}
+      : TM(TM), PTO(PTO), PGOOpt(PGOOpt), PIC(PIC) {}
 
   /// Cross register the analysis managers through their proxies.
   ///
   /// This is an interface that can be used to cross register each
-  // AnalysisManager with all the others analysis managers.
+  /// AnalysisManager with all the others analysis managers.
   void crossRegisterProxies(LoopAnalysisManager &LAM,
                             FunctionAnalysisManager &FAM,
                             CGSCCAnalysisManager &CGAM,
@@ -275,7 +324,8 @@ public:
   /// require some transformations for semantic reasons, they should explicitly
   /// build them.
   ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level,
-                                                    bool DebugLogging = false);
+                                                    bool DebugLogging = false,
+                                                    bool LTOPreLink = false);
 
   /// Build a per-module default optimization pipeline.
   ///
@@ -289,7 +339,8 @@ public:
   /// require some transformations for semantic reasons, they should explicitly
   /// build them.
   ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level,
-                                                  bool DebugLogging = false);
+                                                  bool DebugLogging = false,
+                                                  bool LTOPreLink = false);
 
   /// Build a pre-link, ThinLTO-targeting default optimization pipeline to
   /// a pass manager.
@@ -392,7 +443,7 @@ public:
   /// {{@ Parse a textual pass pipeline description into a specific PassManager
   ///
   /// Automatic deduction of an appropriate pass manager stack is not supported.
-  /// For example, to insert a loop pass 'lpass' into a FunctinoPassManager,
+  /// For example, to insert a loop pass 'lpass' into a FunctionPassManager,
   /// this is the valid pipeline text:
   ///
   ///   function(lpass)
@@ -606,9 +657,8 @@ private:
                                 bool VerifyEachPass, bool DebugLogging);
 
   void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
-                         OptimizationLevel Level, bool RunProfileGen,
-                         std::string ProfileGenFile,
-                         std::string ProfileUseFile,
+                         OptimizationLevel Level, bool RunProfileGen, bool IsCS,
+                         std::string ProfileFile,
                          std::string ProfileRemappingFile);
 
   void invokePeepholeEPCallbacks(FunctionPassManager &, OptimizationLevel);
diff --git a/include/llvm/Passes/PassPlugin.h b/include/llvm/Passes/PassPlugin.h
index af8f11a7a352..013b7a827c47 100644
--- a/include/llvm/Passes/PassPlugin.h
+++ b/include/llvm/Passes/PassPlugin.h
@@ -1,9 +1,8 @@
 //===- llvm/Passes/PassPlugin.h - Public Plugin API -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Passes/StandardInstrumentations.h b/include/llvm/Passes/StandardInstrumentations.h
index 8c6f5e1e22f7..3d3002eecce9 100644
--- a/include/llvm/Passes/StandardInstrumentations.h
+++ b/include/llvm/Passes/StandardInstrumentations.h
@@ -1,9 +1,8 @@
 //===- StandardInstrumentations.h ------------------------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -64,6 +63,8 @@ public:
   StandardInstrumentations() = default;
 
   void registerCallbacks(PassInstrumentationCallbacks &PIC);
+
+  TimePassesHandler &getTimePasses() { return TimePasses; }
 };
 } // namespace llvm
 
diff --git a/include/llvm/ProfileData/Coverage/CoverageMapping.h b/include/llvm/ProfileData/Coverage/CoverageMapping.h
index beaa36553287..11758ac4cf2f 100644
--- a/include/llvm/ProfileData/Coverage/CoverageMapping.h
+++ b/include/llvm/ProfileData/Coverage/CoverageMapping.h
@@ -1,9 +1,8 @@
 //===- CoverageMapping.h - Code coverage mapping support --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ProfileData/Coverage/CoverageMappingReader.h b/include/llvm/ProfileData/Coverage/CoverageMappingReader.h
index c88c71a6d6f4..57a2aaefd660 100644
--- a/include/llvm/ProfileData/Coverage/CoverageMappingReader.h
+++ b/include/llvm/ProfileData/Coverage/CoverageMappingReader.h
@@ -1,9 +1,8 @@
 //===- CoverageMappingReader.h - Code coverage mapping reader ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -204,9 +203,15 @@ public:
   BinaryCoverageReader(const BinaryCoverageReader &) = delete;
   BinaryCoverageReader &operator=(const BinaryCoverageReader &) = delete;
 
+  static Expected<std::vector<std::unique_ptr<BinaryCoverageReader>>>
+  create(MemoryBufferRef ObjectBuffer, StringRef Arch,
+         SmallVectorImpl<std::unique_ptr<MemoryBuffer>> &ObjectFileBuffers);
+
   static Expected<std::unique_ptr<BinaryCoverageReader>>
-  create(std::unique_ptr<MemoryBuffer> &ObjectBuffer,
-         StringRef Arch);
+  createCoverageReaderFromBuffer(StringRef Coverage,
+                                 InstrProfSymtab &&ProfileNames,
+                                 uint8_t BytesInAddress,
+                                 support::endianness Endian);
 
   Error readNextRecord(CoverageMappingRecord &Record) override;
 };
diff --git a/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h b/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h
index 86fb1bdf1773..5f88cacdfcbb 100644
--- a/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h
+++ b/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h
@@ -1,9 +1,8 @@
 //===- CoverageMappingWriter.h - Code coverage mapping writer ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ProfileData/GCOV.h b/include/llvm/ProfileData/GCOV.h
index a088f63a6915..004ff3f4a2e2 100644
--- a/include/llvm/ProfileData/GCOV.h
+++ b/include/llvm/ProfileData/GCOV.h
@@ -1,9 +1,8 @@
 //===- GCOV.h - LLVM coverage tool ------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -45,9 +44,10 @@ enum GCOVVersion { V402, V404, V704 };
 
 /// A struct for passing gcov options between functions.
 struct Options {
-  Options(bool A, bool B, bool C, bool F, bool P, bool U, bool L, bool N)
+  Options(bool A, bool B, bool C, bool F, bool P, bool U, bool L, bool N, bool X)
       : AllBlocks(A), BranchInfo(B), BranchCount(C), FuncCoverage(F),
-        PreservePaths(P), UncondBranch(U), LongFileNames(L), NoOutput(N) {}
+        PreservePaths(P), UncondBranch(U), LongFileNames(L), NoOutput(N),
+        HashFilenames(X) {}
 
   bool AllBlocks;
   bool BranchInfo;
@@ -57,6 +57,7 @@ struct Options {
   bool UncondBranch;
   bool LongFileNames;
   bool NoOutput;
+  bool HashFilenames;
 };
 
 } // end namespace GCOV
@@ -317,12 +318,6 @@ class GCOVBlock {
     uint64_t Count = 0;
   };
 
-  struct SortDstEdgesFunctor {
-    bool operator()(const GCOVEdge *E1, const GCOVEdge *E2) {
-      return E1->Dst.Number < E2->Dst.Number;
-    }
-  };
-
 public:
   using EdgeIterator = SmallVectorImpl<GCOVEdge *>::const_iterator;
   using BlockVector = SmallVector<const GCOVBlock *, 4>;
diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h
index dc45021fc47d..c7d764ade30d 100644
--- a/include/llvm/ProfileData/InstrProf.h
+++ b/include/llvm/ProfileData/InstrProf.h
@@ -1,9 +1,8 @@
 //===- InstrProf.h - Instrumented profiling format support ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -235,7 +234,7 @@ bool isIRPGOFlagSet(const Module *M);
 bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken = false);
 
 enum InstrProfValueKind : uint32_t {
-#define VALUE_PROF_KIND(Enumerator, Value) Enumerator = Value,
+#define VALUE_PROF_KIND(Enumerator, Value, Descr) Enumerator = Value,
 #include "llvm/ProfileData/InstrProfData.inc"
 };
 
@@ -591,6 +590,70 @@ StringRef InstrProfSymtab::getOrigFuncName(uint64_t FuncMD5Hash) {
   return PGOName.drop_front(S + 1);
 }
 
+// To store the sums of profile count values, or the percentage of
+// the sums of the total count values.
+struct CountSumOrPercent {
+  uint64_t NumEntries;
+  double CountSum;
+  double ValueCounts[IPVK_Last - IPVK_First + 1];
+  CountSumOrPercent() : NumEntries(0), CountSum(0.0f), ValueCounts() {}
+  void reset() {
+    NumEntries = 0;
+    CountSum = 0.0f;
+    for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++)
+      ValueCounts[I] = 0.0f;
+  }
+};
+
+// Function level or program level overlap information.
+struct OverlapStats {
+  enum OverlapStatsLevel { ProgramLevel, FunctionLevel };
+  // Sum of the total count values for the base profile.
+  CountSumOrPercent Base;
+  // Sum of the total count values for the test profile.
+  CountSumOrPercent Test;
+  // Overlap lap score. Should be in range of [0.0f to 1.0f].
+  CountSumOrPercent Overlap;
+  CountSumOrPercent Mismatch;
+  CountSumOrPercent Unique;
+  OverlapStatsLevel Level;
+  const std::string *BaseFilename;
+  const std::string *TestFilename;
+  StringRef FuncName;
+  uint64_t FuncHash;
+  bool Valid;
+
+  OverlapStats(OverlapStatsLevel L = ProgramLevel)
+      : Level(L), BaseFilename(nullptr), TestFilename(nullptr), FuncHash(0),
+        Valid(false) {}
+
+  void dump(raw_fd_ostream &OS) const;
+
+  void setFuncInfo(StringRef Name, uint64_t Hash) {
+    FuncName = Name;
+    FuncHash = Hash;
+  }
+
+  Error accumuateCounts(const std::string &BaseFilename,
+                        const std::string &TestFilename, bool IsCS);
+  void addOneMismatch(const CountSumOrPercent &MismatchFunc);
+  void addOneUnique(const CountSumOrPercent &UniqueFunc);
+
+  static inline double score(uint64_t Val1, uint64_t Val2, double Sum1,
+                             double Sum2) {
+    if (Sum1 < 1.0f || Sum2 < 1.0f)
+      return 0.0f;
+    return std::min(Val1 / Sum1, Val2 / Sum2);
+  }
+};
+
+// This is used to filter the functions whose overlap information
+// to be output.
+struct OverlapFuncFilters {
+  uint64_t ValueCutoff;
+  const std::string NameFilter;
+};
+
 struct InstrProfValueSiteRecord {
   /// Value profiling data pairs at a given value site.
   std::list<InstrProfValueData> ValueData;
@@ -616,6 +679,10 @@ struct InstrProfValueSiteRecord {
              function_ref<void(instrprof_error)> Warn);
   /// Scale up value profile data counts.
   void scale(uint64_t Weight, function_ref<void(instrprof_error)> Warn);
+
+  /// Compute the overlap b/w this record and Input record.
+  void overlap(InstrProfValueSiteRecord &Input, uint32_t ValueKind,
+               OverlapStats &Overlap, OverlapStats &FuncLevelOverlap);
 };
 
 /// Profiling information for a single function.
@@ -704,6 +771,18 @@ struct InstrProfRecord {
   /// Clear value data entries
   void clearValueData() { ValueData = nullptr; }
 
+  /// Compute the sums of all counts and store in Sum.
+  void accumuateCounts(CountSumOrPercent &Sum) const;
+
+  /// Compute the overlap b/w this IntrprofRecord and Other.
+  void overlap(InstrProfRecord &Other, OverlapStats &Overlap,
+               OverlapStats &FuncLevelOverlap, uint64_t ValueCutoff);
+
+  /// Compute the overlap of value profile counts.
+  void overlapValueProfData(uint32_t ValueKind, InstrProfRecord &Src,
+                            OverlapStats &Overlap,
+                            OverlapStats &FuncLevelOverlap);
+
 private:
   struct ValueProfData {
     std::vector<InstrProfValueSiteRecord> IndirectCallSites;
@@ -768,10 +847,20 @@ struct NamedInstrProfRecord : InstrProfRecord {
   StringRef Name;
   uint64_t Hash;
 
+  // We reserve this bit as the flag for context sensitive profile record.
+  static const int CS_FLAG_IN_FUNC_HASH = 60;
+
   NamedInstrProfRecord() = default;
   NamedInstrProfRecord(StringRef Name, uint64_t Hash,
                        std::vector<uint64_t> Counts)
       : InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {}
+
+  static bool hasCSFlagInHash(uint64_t FuncHash) {
+    return ((FuncHash >> CS_FLAG_IN_FUNC_HASH) & 1);
+  }
+  static void setCSFlagInHash(uint64_t &FuncHash) {
+    FuncHash |= ((uint64_t)1 << CS_FLAG_IN_FUNC_HASH);
+  }
 };
 
 uint32_t InstrProfRecord::getNumValueKinds() const {
@@ -1005,6 +1094,8 @@ namespace RawInstrProf {
 // from control data struct is changed from raw pointer to Name's MD5 value.
 // Version 4: ValueDataBegin and ValueDataSizes fields are removed from the
 // raw header.
+// Version 5: Bit 60 of FuncHash is reserved for the flag for the context
+// sensitive records.
 const uint64_t Version = INSTR_PROF_RAW_VERSION;
 
 template <class IntPtrT> inline uint64_t getMagic();
@@ -1041,6 +1132,12 @@ struct Header {
 void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart,
                                  int64_t &RangeLast);
 
-} // end namespace llvm
+// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
+// aware this is an ir_level profile so it can set the version flag.
+void createIRLevelProfileFlagVar(Module &M, bool IsCS);
 
+// Create the variable for the profile file name.
+void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput);
+
+} // end namespace llvm
 #endif // LLVM_PROFILEDATA_INSTRPROF_H
diff --git a/include/llvm/ProfileData/InstrProfData.inc b/include/llvm/ProfileData/InstrProfData.inc
index 454620ed997a..749781b9ac2d 100644
--- a/include/llvm/ProfileData/InstrProfData.inc
+++ b/include/llvm/ProfileData/InstrProfData.inc
@@ -1,9 +1,8 @@
 /*===-- InstrProfData.inc - instr profiling runtime structures -*- C++ -*-=== *\
 |*
-|*                     The LLVM Compiler Infrastructure
-|*
-|* This file is distributed under the University of Illinois Open Source
-|* License. See LICENSE.TXT for details.
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+|* See https://llvm.org/LICENSE.txt for license information.
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 |*
 \*===----------------------------------------------------------------------===*/
 /*
@@ -170,7 +169,7 @@ VALUE_PROF_FUNC_PARAM(uint64_t, LargeValue, Type::getInt64Ty(Ctx))
 
 /* VALUE_PROF_KIND start */
 #ifndef VALUE_PROF_KIND
-#define VALUE_PROF_KIND(Enumerator, Value)
+#define VALUE_PROF_KIND(Enumerator, Value, Descr)
 #else
 #define INSTR_PROF_DATA_DEFINED
 #endif
@@ -183,16 +182,16 @@ VALUE_PROF_FUNC_PARAM(uint64_t, LargeValue, Type::getInt64Ty(Ctx))
  * For this remapping the ProfData is used.  ProfData contains both the function
  * name hash and the function address.
  */
-VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0)
+VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0, "indirect call target")
 /* For memory intrinsic functions size profiling. */
-VALUE_PROF_KIND(IPVK_MemOPSize, 1)
+VALUE_PROF_KIND(IPVK_MemOPSize, 1, "memory intrinsic functions size")
 /* These two kinds must be the last to be
  * declared. This is to make sure the string
  * array created with the template can be
  * indexed with the kind value.
  */
-VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget)
-VALUE_PROF_KIND(IPVK_Last, IPVK_MemOPSize)
+VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget, "first")
+VALUE_PROF_KIND(IPVK_Last, IPVK_MemOPSize, "last")
 
 #undef VALUE_PROF_KIND
 /* VALUE_PROF_KIND end */
@@ -250,22 +249,25 @@ COVMAP_HEADER(uint32_t, Int32Ty, Version, \
 #define INSTR_PROF_DATA_DEFINED
 INSTR_PROF_SECT_ENTRY(IPSK_data, \
                       INSTR_PROF_QUOTE(INSTR_PROF_DATA_COMMON), \
-                      INSTR_PROF_QUOTE(INSTR_PROF_DATA_COFF), "__DATA,")
+                      INSTR_PROF_DATA_COFF, "__DATA,")
 INSTR_PROF_SECT_ENTRY(IPSK_cnts, \
                       INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON), \
-                      INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COFF), "__DATA,")
+                      INSTR_PROF_CNTS_COFF, "__DATA,")
 INSTR_PROF_SECT_ENTRY(IPSK_name, \
                       INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON), \
-                      INSTR_PROF_QUOTE(INSTR_PROF_NAME_COFF), "__DATA,")
+                      INSTR_PROF_NAME_COFF, "__DATA,")
 INSTR_PROF_SECT_ENTRY(IPSK_vals, \
                       INSTR_PROF_QUOTE(INSTR_PROF_VALS_COMMON), \
-                      INSTR_PROF_QUOTE(INSTR_PROF_VALS_COFF), "__DATA,")
+                      INSTR_PROF_VALS_COFF, "__DATA,")
 INSTR_PROF_SECT_ENTRY(IPSK_vnodes, \
                       INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON), \
-                      INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COFF), "__DATA,")
+                      INSTR_PROF_VNODES_COFF, "__DATA,")
 INSTR_PROF_SECT_ENTRY(IPSK_covmap, \
                       INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON), \
-                      INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COFF), "__LLVM_COV,")
+                      INSTR_PROF_COVMAP_COFF, "__LLVM_COV,")
+INSTR_PROF_SECT_ENTRY(IPSK_orderfile, \
+                      INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_COMMON), \
+                      INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_COFF), "__DATA,")
 
 #undef INSTR_PROF_SECT_ENTRY
 #endif
@@ -636,10 +638,12 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
  * version for other variants of profile. We set the lowest bit of the upper 8
  * bits (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentaiton
  * generated profile, and 0 if this is a Clang FE generated profile.
+ * 1 in bit 57 indicates there are context-sensitive records in the profile.
  */
 #define VARIANT_MASKS_ALL 0xff00000000000000ULL
 #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
 #define VARIANT_MASK_IR_PROF (0x1ULL << 56)
+#define VARIANT_MASK_CSIR_PROF (0x1ULL << 57)
 #define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version
 #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime
 
@@ -655,13 +659,17 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 #define INSTR_PROF_VALS_COMMON __llvm_prf_vals
 #define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds
 #define INSTR_PROF_COVMAP_COMMON __llvm_covmap
-/* Win32 */
-#define INSTR_PROF_DATA_COFF .lprfd
-#define INSTR_PROF_NAME_COFF .lprfn
-#define INSTR_PROF_CNTS_COFF .lprfc
-#define INSTR_PROF_VALS_COFF .lprfv
-#define INSTR_PROF_VNODES_COFF .lprfnd
-#define INSTR_PROF_COVMAP_COFF .lcovmap
+#define INSTR_PROF_ORDERFILE_COMMON __llvm_orderfile
+/* Windows section names. Because these section names contain dollar characters,
+ * they must be quoted.
+ */
+#define INSTR_PROF_DATA_COFF ".lprfd$M"
+#define INSTR_PROF_NAME_COFF ".lprfn$M"
+#define INSTR_PROF_CNTS_COFF ".lprfc$M"
+#define INSTR_PROF_VALS_COFF ".lprfv$M"
+#define INSTR_PROF_VNODES_COFF ".lprfnd$M"
+#define INSTR_PROF_COVMAP_COFF ".lcovmap$M"
+#define INSTR_PROF_ORDERFILE_COFF ".lorderfile$M"
 
 #ifdef _WIN32
 /* Runtime section names and name strings.  */
@@ -675,32 +683,30 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 /* Value profile nodes section. */
 #define INSTR_PROF_VNODES_SECT_NAME INSTR_PROF_VNODES_COFF
 #define INSTR_PROF_COVMAP_SECT_NAME INSTR_PROF_COVMAP_COFF
+#define INSTR_PROF_ORDERFILE_SECT_NAME INSTR_PROF_ORDERFILE_COFF
 #else
 /* Runtime section names and name strings.  */
-#define INSTR_PROF_DATA_SECT_NAME INSTR_PROF_DATA_COMMON
-#define INSTR_PROF_NAME_SECT_NAME INSTR_PROF_NAME_COMMON
-#define INSTR_PROF_CNTS_SECT_NAME INSTR_PROF_CNTS_COMMON
+#define INSTR_PROF_DATA_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_DATA_COMMON)
+#define INSTR_PROF_NAME_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON)
+#define INSTR_PROF_CNTS_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON)
 /* Array of pointers. Each pointer points to a list
  * of value nodes associated with one value site.
  */
-#define INSTR_PROF_VALS_SECT_NAME INSTR_PROF_VALS_COMMON
+#define INSTR_PROF_VALS_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_VALS_COMMON)
 /* Value profile nodes section. */
-#define INSTR_PROF_VNODES_SECT_NAME INSTR_PROF_VNODES_COMMON
-#define INSTR_PROF_COVMAP_SECT_NAME INSTR_PROF_COVMAP_COMMON
+#define INSTR_PROF_VNODES_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON)
+#define INSTR_PROF_COVMAP_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON)
+/* Order file instrumentation. */
+#define INSTR_PROF_ORDERFILE_SECT_NAME                                         \
+  INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_COMMON)
 #endif
 
-#define INSTR_PROF_DATA_SECT_NAME_STR                                          \
-  INSTR_PROF_QUOTE(INSTR_PROF_DATA_SECT_NAME)
-#define INSTR_PROF_NAME_SECT_NAME_STR                                          \
-  INSTR_PROF_QUOTE(INSTR_PROF_NAME_SECT_NAME)
-#define INSTR_PROF_CNTS_SECT_NAME_STR                                          \
-  INSTR_PROF_QUOTE(INSTR_PROF_CNTS_SECT_NAME)
-#define INSTR_PROF_COVMAP_SECT_NAME_STR                                        \
-  INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_SECT_NAME)
-#define INSTR_PROF_VALS_SECT_NAME_STR                                          \
-  INSTR_PROF_QUOTE(INSTR_PROF_VALS_SECT_NAME)
-#define INSTR_PROF_VNODES_SECT_NAME_STR                                        \
-  INSTR_PROF_QUOTE(INSTR_PROF_VNODES_SECT_NAME)
+#define INSTR_PROF_ORDERFILE_BUFFER_NAME _llvm_order_file_buffer
+#define INSTR_PROF_ORDERFILE_BUFFER_NAME_STR                                   \
+  INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_BUFFER_NAME)
+#define INSTR_PROF_ORDERFILE_BUFFER_IDX_NAME _llvm_order_file_buffer_idx
+#define INSTR_PROF_ORDERFILE_BUFFER_IDX_NAME_STR                               \
+  INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_BUFFER_IDX_NAME)
 
 /* Macros to define start/stop section symbol for a given
  * section on Linux. For instance
@@ -735,6 +741,12 @@ typedef struct InstrProfValueData {
 
 #endif /* INSTR_PROF_DATA_INC */
 
+#ifndef INSTR_ORDER_FILE_INC
+// The maximal # of functions: 128*1024 (the buffer size will be 128*4 KB).
+#define INSTR_ORDER_FILE_BUFFER_SIZE 131072
+#define INSTR_ORDER_FILE_BUFFER_BITS 17
+#define INSTR_ORDER_FILE_BUFFER_MASK 0x1ffff
+#endif /* INSTR_ORDER_FILE_INC */
 #else
 #undef INSTR_PROF_DATA_DEFINED
 #endif
diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h
index 08d782276117..73751faab88e 100644
--- a/include/llvm/ProfileData/InstrProfReader.h
+++ b/include/llvm/ProfileData/InstrProfReader.h
@@ -1,9 +1,8 @@
 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -78,6 +77,8 @@ public:
 
   virtual bool isIRLevelProfile() const = 0;
 
+  virtual bool hasCSIRLevelProfile() const = 0;
+
   /// Return the PGO symtab. There are three different readers:
   /// Raw, Text, and Indexed profile readers. The first two types
   /// of readers are used only by llvm-profdata tool, while the indexed
@@ -90,6 +91,9 @@ public:
   /// compiler.
   virtual InstrProfSymtab &getSymtab() = 0;
 
+  /// Compute the sum of counts and return in Sum.
+  void accumuateCounts(CountSumOrPercent &Sum, bool IsCS);
+
 protected:
   std::unique_ptr<InstrProfSymtab> Symtab;
 
@@ -143,6 +147,7 @@ private:
   /// Iterator over the profile data.
   line_iterator Line;
   bool IsIRLevelProfile = false;
+  bool HasCSIRLevelProfile = false;
 
   Error readValueProfileData(InstrProfRecord &Record);
 
@@ -157,6 +162,8 @@ public:
 
   bool isIRLevelProfile() const override { return IsIRLevelProfile; }
 
+  bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; }
+
   /// Read the header.
   Error readHeader() override;
 
@@ -213,6 +220,10 @@ public:
     return (Version & VARIANT_MASK_IR_PROF) != 0;
   }
 
+  bool hasCSIRLevelProfile() const override {
+    return (Version & VARIANT_MASK_CSIR_PROF) != 0;
+  }
+
   InstrProfSymtab &getSymtab() override {
     assert(Symtab.get());
     return *Symtab.get();
@@ -342,6 +353,7 @@ struct InstrProfReaderIndexBase {
   virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
   virtual uint64_t getVersion() const = 0;
   virtual bool isIRLevelProfile() const = 0;
+  virtual bool hasCSIRLevelProfile() const = 0;
   virtual Error populateSymtab(InstrProfSymtab &) = 0;
 };
 
@@ -386,6 +398,10 @@ public:
     return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
   }
 
+  bool hasCSIRLevelProfile() const override {
+    return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0;
+  }
+
   Error populateSymtab(InstrProfSymtab &Symtab) override {
     return Symtab.create(HashTable->keys());
   }
@@ -413,13 +429,16 @@ private:
   std::unique_ptr<InstrProfReaderRemapper> Remapper;
   /// Profile summary data.
   std::unique_ptr<ProfileSummary> Summary;
+  /// Context sensitive profile summary data.
+  std::unique_ptr<ProfileSummary> CS_Summary;
   // Index to the current record in the record array.
   unsigned RecordIndex;
 
   // Read the profile summary. Return a pointer pointing to one byte past the
   // end of the summary data if it exists or the input \c Cur.
+  // \c UseCS indicates whether to use the context-sensitive profile summary.
   const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
-                                   const unsigned char *Cur);
+                                   const unsigned char *Cur, bool UseCS);
 
 public:
   IndexedInstrProfReader(
@@ -433,6 +452,9 @@ public:
   /// Return the profile version.
   uint64_t getVersion() const { return Index->getVersion(); }
   bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
+  bool hasCSIRLevelProfile() const override {
+    return Index->hasCSIRLevelProfile();
+  }
 
   /// Return true if the given buffer is in an indexed instrprof format.
   static bool hasFormat(const MemoryBuffer &DataBuffer);
@@ -451,7 +473,16 @@ public:
                           std::vector<uint64_t> &Counts);
 
   /// Return the maximum of all known function counts.
-  uint64_t getMaximumFunctionCount() { return Summary->getMaxFunctionCount(); }
+  /// \c UseCS indicates whether to use the context-sensitive count.
+  uint64_t getMaximumFunctionCount(bool UseCS) {
+    if (UseCS) {
+      assert(CS_Summary && "No context sensitive profile summary");
+      return CS_Summary->getMaxFunctionCount();
+    } else {
+      assert(Summary && "No profile summary");
+      return Summary->getMaxFunctionCount();
+    }
+  }
 
   /// Factory method to create an indexed reader.
   static Expected<std::unique_ptr<IndexedInstrProfReader>>
@@ -470,7 +501,18 @@ public:
   // to be used by llvm-profdata (for dumping). Avoid using this when
   // the client is the compiler.
   InstrProfSymtab &getSymtab() override;
-  ProfileSummary &getSummary() { return *(Summary.get()); }
+
+  /// Return the profile summary.
+  /// \c UseCS indicates whether to use the context-sensitive summary.
+  ProfileSummary &getSummary(bool UseCS) {
+    if (UseCS) {
+      assert(CS_Summary && "No context sensitive summary");
+      return *(CS_Summary.get());
+    } else {
+      assert(Summary && "No profile summary");
+      return *(Summary.get());
+    }
+  }
 };
 
 } // end namespace llvm
diff --git a/include/llvm/ProfileData/InstrProfWriter.h b/include/llvm/ProfileData/InstrProfWriter.h
index 8107ab386fe2..5882fa2781e2 100644
--- a/include/llvm/ProfileData/InstrProfWriter.h
+++ b/include/llvm/ProfileData/InstrProfWriter.h
@@ -1,9 +1,8 @@
 //===- InstrProfWriter.h - Instrumented profiling writer --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -34,7 +33,8 @@ class raw_fd_ostream;
 class InstrProfWriter {
 public:
   using ProfilingData = SmallDenseMap<uint64_t, InstrProfRecord>;
-  enum ProfKind { PF_Unknown = 0, PF_FE, PF_IRLevel };
+  // PF_IRLevelWithCS is the profile from context sensitive IR instrumentation.
+  enum ProfKind { PF_Unknown = 0, PF_FE, PF_IRLevel, PF_IRLevelWithCS };
 
 private:
   bool Sparse;
@@ -75,20 +75,36 @@ public:
   std::unique_ptr<MemoryBuffer> writeBuffer();
 
   /// Set the ProfileKind. Report error if mixing FE and IR level profiles.
-  Error setIsIRLevelProfile(bool IsIRLevel) {
+  /// \c WithCS indicates if this is for contenxt sensitive instrumentation.
+  Error setIsIRLevelProfile(bool IsIRLevel, bool WithCS) {
     if (ProfileKind == PF_Unknown) {
-      ProfileKind = IsIRLevel ? PF_IRLevel: PF_FE;
+      if (IsIRLevel)
+        ProfileKind = WithCS ? PF_IRLevelWithCS : PF_IRLevel;
+      else
+        ProfileKind = PF_FE;
       return Error::success();
     }
-    return (IsIRLevel == (ProfileKind == PF_IRLevel))
-               ? Error::success()
-               : make_error<InstrProfError>(
-                     instrprof_error::unsupported_version);
+
+    if (((ProfileKind != PF_FE) && !IsIRLevel) ||
+        ((ProfileKind == PF_FE) && IsIRLevel))
+      return make_error<InstrProfError>(instrprof_error::unsupported_version);
+
+    // When merging a context-sensitive profile (WithCS == true) with an IRLevel
+    // profile, set the kind to PF_IRLevelWithCS.
+    if (ProfileKind == PF_IRLevel && WithCS)
+      ProfileKind = PF_IRLevelWithCS;
+
+    return Error::success();
   }
 
   // Internal interface for testing purpose only.
   void setValueProfDataEndianness(support::endianness Endianness);
   void setOutputSparse(bool Sparse);
+  // Compute the overlap b/w this object and Other. Program level result is
+  // stored in Overlap and function level result is stored in FuncLevelOverlap.
+  void overlapRecord(NamedInstrProfRecord &&Other, OverlapStats &Overlap,
+                     OverlapStats &FuncLevelOverlap,
+                     const OverlapFuncFilters &FuncFilter);
 
 private:
   void addRecord(StringRef Name, uint64_t Hash, InstrProfRecord &&I,
diff --git a/include/llvm/ProfileData/ProfileCommon.h b/include/llvm/ProfileData/ProfileCommon.h
index 087588f06340..f98a34387fdf 100644
--- a/include/llvm/ProfileData/ProfileCommon.h
+++ b/include/llvm/ProfileData/ProfileCommon.h
@@ -1,9 +1,8 @@
 //===- ProfileCommon.h - Common profiling APIs. -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -84,7 +83,8 @@ public:
   SampleProfileSummaryBuilder(std::vector<uint32_t> Cutoffs)
       : ProfileSummaryBuilder(std::move(Cutoffs)) {}
 
-  void addRecord(const sampleprof::FunctionSamples &FS);
+  void addRecord(const sampleprof::FunctionSamples &FS,
+                 bool isCallsiteSample = false);
   std::unique_ptr<ProfileSummary> getSummary();
 };
 
diff --git a/include/llvm/ProfileData/SampleProf.h b/include/llvm/ProfileData/SampleProf.h
index 927dfd246878..7fbc857b7230 100644
--- a/include/llvm/ProfileData/SampleProf.h
+++ b/include/llvm/ProfileData/SampleProf.h
@@ -1,9 +1,8 @@
 //===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -411,6 +410,34 @@ public:
     return getNameInModule(Name, M);
   }
 
+  /// Return the canonical name for a function, taking into account
+  /// suffix elision policy attributes.
+  static StringRef getCanonicalFnName(const Function &F) {
+    static const char *knownSuffixes[] = { ".llvm.", ".part." };
+    auto AttrName = "sample-profile-suffix-elision-policy";
+    auto Attr = F.getFnAttribute(AttrName).getValueAsString();
+    if (Attr == "" || Attr == "all") {
+      return F.getName().split('.').first;
+    } else if (Attr == "selected") {
+      StringRef Cand(F.getName());
+      for (const auto &Suf : knownSuffixes) {
+        StringRef Suffix(Suf);
+        auto It = Cand.rfind(Suffix);
+        if (It == StringRef::npos)
+          return Cand;
+        auto Dit = Cand.rfind('.');
+        if (Dit == It + Suffix.size() - 1)
+          Cand = Cand.substr(0, It);
+      }
+      return Cand;
+    } else if (Attr == "none") {
+      return F.getName();
+    } else {
+      assert(false && "internal error: unknown suffix elision policy");
+    }
+    return F.getName();
+  }
+
   /// Translate \p Name into its original name in Module.
   /// When the Format is not SPF_Compact_Binary, \p Name needs no translation.
   /// When the Format is SPF_Compact_Binary, \p Name in current FunctionSamples
@@ -466,11 +493,9 @@ public:
         /// built in post-thin-link phase and var promotion has been done,
         /// we need to add the substring of function name without the suffix
         /// into the GUIDToFuncNameMap.
-        auto pos = OrigName.find('.');
-        if (pos != StringRef::npos) {
-          StringRef NewName = OrigName.substr(0, pos);
-          GUIDToFuncNameMap.insert({Function::getGUID(NewName), NewName});
-        }
+        StringRef CanonName = getCanonicalFnName(F);
+        if (CanonName != OrigName)
+          GUIDToFuncNameMap.insert({Function::getGUID(CanonName), CanonName});
       }
       CurrentModule = &M;
     }
@@ -547,10 +572,9 @@ public:
   SampleSorter(const std::map<LocationT, SampleT> &Samples) {
     for (const auto &I : Samples)
       V.push_back(&I);
-    std::stable_sort(V.begin(), V.end(),
-                     [](const SamplesWithLoc *A, const SamplesWithLoc *B) {
-                       return A->first < B->first;
-                     });
+    llvm::stable_sort(V, [](const SamplesWithLoc *A, const SamplesWithLoc *B) {
+      return A->first < B->first;
+    });
   }
 
   const SamplesWithLocList &get() const { return V; }
diff --git a/include/llvm/ProfileData/SampleProfReader.h b/include/llvm/ProfileData/SampleProfReader.h
index 5cc729e42cc8..969cdea859c9 100644
--- a/include/llvm/ProfileData/SampleProfReader.h
+++ b/include/llvm/ProfileData/SampleProfReader.h
@@ -1,9 +1,8 @@
 //===- SampleProfReader.h - Read LLVM sample profile data -------*- C++ -*-===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -287,10 +286,11 @@ public:
 
   /// Return the samples collected for function \p F.
   FunctionSamples *getSamplesFor(const Function &F) {
-    // The function name may have been updated by adding suffix. In sample
-    // profile, the function names are all stripped, so we need to strip
-    // the function name suffix before matching with profile.
-    return getSamplesFor(F.getName().split('.').first);
+    // The function name may have been updated by adding suffix. Call
+    // a helper to (optionally) strip off suffixes so that we can
+    // match against the original function name in the profile.
+    StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+    return getSamplesFor(CanonName);
   }
 
   /// Return the samples collected for function \p F.
diff --git a/include/llvm/ProfileData/SampleProfWriter.h b/include/llvm/ProfileData/SampleProfWriter.h
index d5ac6e53e4f7..81e6e3ab0b4a 100644
--- a/include/llvm/ProfileData/SampleProfWriter.h
+++ b/include/llvm/ProfileData/SampleProfWriter.h
@@ -1,9 +1,8 @@
 //===- SampleProfWriter.h - Write LLVM sample profile data ------*- C++ -*-===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Remarks/Remark.h b/include/llvm/Remarks/Remark.h
new file mode 100644
index 000000000000..05d0ea60accd
--- /dev/null
+++ b/include/llvm/Remarks/Remark.h
@@ -0,0 +1,113 @@
+//===-- llvm/Remarks/Remark.h - The remark type -----------------*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an abstraction for handling remarks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_REMARK_H
+#define LLVM_REMARKS_REMARK_H
+
+#include "llvm-c/Remarks.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CBindingWrapping.h"
+#include <string>
+
+namespace llvm {
+namespace remarks {
+
+constexpr uint64_t Version = 0;
+
+/// The debug location used to track a remark back to the source file.
+struct RemarkLocation {
+  /// Absolute path of the source file corresponding to this remark.
+  StringRef SourceFilePath;
+  unsigned SourceLine;
+  unsigned SourceColumn;
+};
+
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(RemarkLocation, LLVMRemarkDebugLocRef)
+
+/// A key-value pair with a debug location that is used to display the remarks
+/// at the right place in the source.
+struct Argument {
+  StringRef Key;
+  // FIXME: We might want to be able to store other types than strings here.
+  StringRef Val;
+  // If set, the debug location corresponding to the value.
+  Optional<RemarkLocation> Loc;
+};
+
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Argument, LLVMRemarkArgRef)
+
+/// The type of the remark.
+enum class Type {
+  Unknown,
+  Passed,
+  Missed,
+  Analysis,
+  AnalysisFPCommute,
+  AnalysisAliasing,
+  Failure,
+  LastTypeValue = Failure
+};
+
+/// A remark type used for both emission and parsing.
+struct Remark {
+  /// The type of the remark.
+  Type RemarkType = Type::Unknown;
+
+  /// Name of the pass that triggers the emission of this remark.
+  StringRef PassName;
+
+  /// Textual identifier for the remark (single-word, camel-case). Can be used
+  /// by external tools reading the output file for remarks to identify the
+  /// remark.
+  StringRef RemarkName;
+
+  /// Mangled name of the function that triggers the emssion of this remark.
+  StringRef FunctionName;
+
+  /// The location in the source file of the remark.
+  Optional<RemarkLocation> Loc;
+
+  /// If profile information is available, this is the number of times the
+  /// corresponding code was executed in a profile instrumentation run.
+  Optional<uint64_t> Hotness;
+
+  /// Arguments collected via the streaming interface.
+  SmallVector<Argument, 5> Args;
+
+  Remark() = default;
+  Remark(Remark &&) = default;
+  Remark &operator=(Remark &&) = default;
+
+  /// Return a message composed from the arguments as a string.
+  std::string getArgsAsMsg() const;
+
+  /// Clone this remark to explicitly ask for a copy.
+  Remark clone() const { return *this; }
+
+private:
+  /// In order to avoid unwanted copies, "delete" the copy constructor.
+  /// If a copy is needed, it should be done through `Remark::clone()`.
+  Remark(const Remark &) = default;
+  Remark& operator=(const Remark &) = default;
+};
+
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Remark, LLVMRemarkEntryRef)
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_REMARK_H */
diff --git a/include/llvm/Remarks/RemarkFormat.h b/include/llvm/Remarks/RemarkFormat.h
new file mode 100644
index 000000000000..e167d99d2517
--- /dev/null
+++ b/include/llvm/Remarks/RemarkFormat.h
@@ -0,0 +1,33 @@
+//===-- llvm/Remarks/RemarkFormat.h - The format of remarks -----*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines utilities to deal with the format of remarks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_REMARK_FORMAT_H
+#define LLVM_REMARKS_REMARK_FORMAT_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace remarks {
+
+constexpr StringRef Magic("REMARKS", 7);
+
+/// The format used for serializing/deserializing remarks.
+enum class Format { Unknown, YAML };
+
+/// Parse and validate a string for the remark format.
+Expected<Format> parseFormat(StringRef FormatStr);
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_REMARK_FORMAT_H */
diff --git a/include/llvm/Remarks/RemarkParser.h b/include/llvm/Remarks/RemarkParser.h
new file mode 100644
index 000000000000..671e1abe5ec7
--- /dev/null
+++ b/include/llvm/Remarks/RemarkParser.h
@@ -0,0 +1,77 @@
+//===-- llvm/Remarks/Remark.h - The remark type -----------------*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides an interface for parsing remarks in LLVM.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_REMARK_PARSER_H
+#define LLVM_REMARKS_REMARK_PARSER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkFormat.h"
+#include "llvm/Support/Error.h"
+#include <memory>
+
+namespace llvm {
+namespace remarks {
+
+struct ParserImpl;
+struct ParsedStringTable;
+
+class EndOfFileError : public ErrorInfo<EndOfFileError> {
+public:
+  static char ID;
+
+  EndOfFileError() {}
+
+  void log(raw_ostream &OS) const override { OS << "End of file reached."; }
+  std::error_code convertToErrorCode() const override {
+    return inconvertibleErrorCode();
+  }
+};
+
+/// Parser used to parse a raw buffer to remarks::Remark objects.
+struct Parser {
+  /// The format of the parser.
+  Format ParserFormat;
+
+  Parser(Format ParserFormat) : ParserFormat(ParserFormat) {}
+
+  /// If no error occurs, this returns a valid Remark object.
+  /// If an error of type EndOfFileError occurs, it is safe to recover from it
+  /// by stopping the parsing.
+  /// If any other error occurs, it should be propagated to the user.
+  /// The pointer should never be null.
+  virtual Expected<std::unique_ptr<Remark>> next() = 0;
+
+  virtual ~Parser() = default;
+};
+
+/// In-memory representation of the string table parsed from a buffer (e.g. the
+/// remarks section).
+struct ParsedStringTable {
+  /// The buffer mapped from the section contents.
+  StringRef Buffer;
+  /// Collection of offsets in the buffer for each string entry.
+  SmallVector<size_t, 8> Offsets;
+
+  Expected<StringRef> operator[](size_t Index) const;
+  ParsedStringTable(StringRef Buffer);
+};
+
+Expected<std::unique_ptr<Parser>>
+createRemarkParser(Format ParserFormat, StringRef Buf,
+                   Optional<const ParsedStringTable *> StrTab = None);
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_REMARK_PARSER_H */
diff --git a/include/llvm/Remarks/RemarkSerializer.h b/include/llvm/Remarks/RemarkSerializer.h
new file mode 100644
index 000000000000..def5c2e16620
--- /dev/null
+++ b/include/llvm/Remarks/RemarkSerializer.h
@@ -0,0 +1,68 @@
+//===-- RemarkSerializer.h - Remark serialization interface -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides an interface for serializing remarks to different formats.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_REMARK_SERIALIZER_H
+#define LLVM_REMARKS_REMARK_SERIALIZER_H
+
+#include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkStringTable.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace remarks {
+
+/// This is the base class for a remark serializer.
+/// It includes support for using a string table while emitting.
+struct Serializer {
+  /// The open raw_ostream that the remark diagnostics are emitted to.
+  raw_ostream &OS;
+  /// The string table containing all the unique strings used in the output.
+  /// The table can be serialized to be consumed after the compilation.
+  Optional<StringTable> StrTab;
+
+  Serializer(raw_ostream &OS) : OS(OS), StrTab() {}
+
+  /// This is just an interface.
+  virtual ~Serializer() = default;
+  virtual void emit(const Remark &Remark) = 0;
+};
+
+/// Wether the serializer should use a string table while emitting.
+enum class UseStringTable { No, Yes };
+
+/// Serialize the remarks to YAML. One remark entry looks like this:
+/// --- !<TYPE>
+/// Pass:            <PASSNAME>
+/// Name:            <REMARKNAME>
+/// DebugLoc:        { File: <SOURCEFILENAME>, Line: <SOURCELINE>,
+///                    Column: <SOURCECOLUMN> }
+/// Function:        <FUNCTIONNAME>
+/// Args:
+///   - <KEY>: <VALUE>
+///     DebugLoc:        { File: <FILE>, Line: <LINE>, Column: <COL> }
+/// ...
+struct YAMLSerializer : public Serializer {
+  /// The YAML streamer.
+  yaml::Output YAMLOutput;
+
+  YAMLSerializer(raw_ostream &OS,
+                 UseStringTable UseStringTable = remarks::UseStringTable::No);
+
+  /// Emit a remark to the stream.
+  void emit(const Remark &Remark) override;
+};
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_REMARK_SERIALIZER_H */
diff --git a/include/llvm/Remarks/RemarkStringTable.h b/include/llvm/Remarks/RemarkStringTable.h
new file mode 100644
index 000000000000..f9b4fdbbfb8d
--- /dev/null
+++ b/include/llvm/Remarks/RemarkStringTable.h
@@ -0,0 +1,59 @@
+//===-- RemarkStringTable.h - Serializing string table ----------*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class is used to deduplicate and serialize a string table used for
+// generating remarks.
+//
+// For parsing a string table, use ParsedStringTable in RemarkParser.h
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_REMARK_STRING_TABLE_H
+#define LLVM_REMARKS_REMARK_STRING_TABLE_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+
+namespace remarks {
+
+/// The string table used for serializing remarks.
+/// This table can be for example serialized in a section to be consumed after
+/// the compilation.
+struct StringTable {
+  /// Allocator holding all the memory used by the map.
+  BumpPtrAllocator Allocator;
+  /// The string table containing all the unique strings used in the output.
+  /// It maps a string to an unique ID.
+  StringMap<unsigned, BumpPtrAllocator &> StrTab;
+  /// Total size of the string table when serialized.
+  size_t SerializedSize = 0;
+
+  StringTable() : Allocator(), StrTab(Allocator) {}
+  /// Add a string to the table. It returns an unique ID of the string.
+  std::pair<unsigned, StringRef> add(StringRef Str);
+  /// Serialize the string table to a stream. It is serialized as a little
+  /// endian uint64 (the size of the table in bytes) followed by a sequence of
+  /// NULL-terminated strings, where the N-th string is the string with the ID N
+  /// in the StrTab map.
+  void serialize(raw_ostream &OS) const;
+  /// Serialize the string table to a vector. This allows users to do the actual
+  /// writing to file/memory/other.
+  /// The string with the ID == N should be the N-th element in the vector.
+  std::vector<StringRef> serialize() const;
+};
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_REMARK_STRING_TABLE_H */
diff --git a/include/llvm/Support/AArch64TargetParser.def b/include/llvm/Support/AArch64TargetParser.def
index e03297b7c3c3..e152f383b3ec 100644
--- a/include/llvm/Support/AArch64TargetParser.def
+++ b/include/llvm/Support/AArch64TargetParser.def
@@ -1,9 +1,8 @@
 //===- AARCH64TargetParser.def - AARCH64 target parsing defines ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -51,78 +50,92 @@ AARCH64_ARCH("armv8.5-a", ARMV8_5A, "8.5-A", "v8.5a",
 #define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE)
 #endif
 // FIXME: This would be nicer were it tablegen
-AARCH64_ARCH_EXT_NAME("invalid",  AArch64::AEK_INVALID,  nullptr,  nullptr)
-AARCH64_ARCH_EXT_NAME("none",     AArch64::AEK_NONE,     nullptr,  nullptr)
-AARCH64_ARCH_EXT_NAME("crc",      AArch64::AEK_CRC,      "+crc",   "-crc")
-AARCH64_ARCH_EXT_NAME("lse",      AArch64::AEK_LSE,      "+lse",   "-lse")
-AARCH64_ARCH_EXT_NAME("rdm",      AArch64::AEK_RDM,      "+rdm",   "-rdm")
-AARCH64_ARCH_EXT_NAME("crypto",   AArch64::AEK_CRYPTO,   "+crypto","-crypto")
-AARCH64_ARCH_EXT_NAME("sm4",      AArch64::AEK_SM4,      "+sm4",   "-sm4")
-AARCH64_ARCH_EXT_NAME("sha3",     AArch64::AEK_SHA3,     "+sha3",  "-sha3")
-AARCH64_ARCH_EXT_NAME("sha2",     AArch64::AEK_SHA2,     "+sha2",  "-sha2")
-AARCH64_ARCH_EXT_NAME("aes",      AArch64::AEK_AES,      "+aes",   "-aes")
-AARCH64_ARCH_EXT_NAME("dotprod",  AArch64::AEK_DOTPROD,  "+dotprod","-dotprod")
-AARCH64_ARCH_EXT_NAME("fp",       AArch64::AEK_FP,       "+fp-armv8",  "-fp-armv8")
-AARCH64_ARCH_EXT_NAME("simd",     AArch64::AEK_SIMD,     "+neon",  "-neon")
-AARCH64_ARCH_EXT_NAME("fp16",     AArch64::AEK_FP16,     "+fullfp16",  "-fullfp16")
-AARCH64_ARCH_EXT_NAME("fp16fml",  AArch64::AEK_FP16FML,  "+fp16fml", "-fp16fml")
-AARCH64_ARCH_EXT_NAME("profile",  AArch64::AEK_PROFILE,  "+spe",  "-spe")
-AARCH64_ARCH_EXT_NAME("ras",      AArch64::AEK_RAS,      "+ras",  "-ras")
-AARCH64_ARCH_EXT_NAME("sve",      AArch64::AEK_SVE,      "+sve",  "-sve")
-AARCH64_ARCH_EXT_NAME("rcpc",     AArch64::AEK_RCPC,     "+rcpc", "-rcpc")
-AARCH64_ARCH_EXT_NAME("rng",      AArch64::AEK_RAND,     "+rand",  "-rand")
-AARCH64_ARCH_EXT_NAME("memtag",   AArch64::AEK_MTE,      "+mte",   "-mte")
-AARCH64_ARCH_EXT_NAME("ssbs",     AArch64::AEK_SSBS,     "+ssbs",  "-ssbs")
-AARCH64_ARCH_EXT_NAME("sb",       AArch64::AEK_SB,       "+sb",    "-sb")
-AARCH64_ARCH_EXT_NAME("predres",  AArch64::AEK_PREDRES,  "+predres", "-predres")
+AARCH64_ARCH_EXT_NAME("invalid",   AArch64::AEK_INVALID,  nullptr,  nullptr)
+AARCH64_ARCH_EXT_NAME("none",      AArch64::AEK_NONE,     nullptr,  nullptr)
+AARCH64_ARCH_EXT_NAME("crc",       AArch64::AEK_CRC,      "+crc",   "-crc")
+AARCH64_ARCH_EXT_NAME("lse",       AArch64::AEK_LSE,      "+lse",   "-lse")
+AARCH64_ARCH_EXT_NAME("rdm",       AArch64::AEK_RDM,      "+rdm",   "-rdm")
+AARCH64_ARCH_EXT_NAME("crypto",    AArch64::AEK_CRYPTO,   "+crypto","-crypto")
+AARCH64_ARCH_EXT_NAME("sm4",       AArch64::AEK_SM4,      "+sm4",   "-sm4")
+AARCH64_ARCH_EXT_NAME("sha3",      AArch64::AEK_SHA3,     "+sha3",  "-sha3")
+AARCH64_ARCH_EXT_NAME("sha2",      AArch64::AEK_SHA2,     "+sha2",  "-sha2")
+AARCH64_ARCH_EXT_NAME("aes",       AArch64::AEK_AES,      "+aes",   "-aes")
+AARCH64_ARCH_EXT_NAME("dotprod",   AArch64::AEK_DOTPROD,  "+dotprod","-dotprod")
+AARCH64_ARCH_EXT_NAME("fp",        AArch64::AEK_FP,       "+fp-armv8",  "-fp-armv8")
+AARCH64_ARCH_EXT_NAME("simd",      AArch64::AEK_SIMD,     "+neon",  "-neon")
+AARCH64_ARCH_EXT_NAME("fp16",      AArch64::AEK_FP16,     "+fullfp16",  "-fullfp16")
+AARCH64_ARCH_EXT_NAME("fp16fml",   AArch64::AEK_FP16FML,  "+fp16fml", "-fp16fml")
+AARCH64_ARCH_EXT_NAME("profile",   AArch64::AEK_PROFILE,  "+spe",  "-spe")
+AARCH64_ARCH_EXT_NAME("ras",       AArch64::AEK_RAS,      "+ras",  "-ras")
+AARCH64_ARCH_EXT_NAME("sve",       AArch64::AEK_SVE,      "+sve",  "-sve")
+AARCH64_ARCH_EXT_NAME("sve2",      AArch64::AEK_SVE2,     "+sve2", "-sve2")
+AARCH64_ARCH_EXT_NAME("sve2-aes",  AArch64::AEK_SVE2AES,  "+sve2-aes", "-sve2-aes")
+AARCH64_ARCH_EXT_NAME("sve2-sm4",  AArch64::AEK_SVE2SM4,  "+sve2-sm4", "-sve2-sm4")
+AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3")
+AARCH64_ARCH_EXT_NAME("bitperm",   AArch64::AEK_BITPERM,  "+bitperm", "-bitperm")
+AARCH64_ARCH_EXT_NAME("rcpc",      AArch64::AEK_RCPC,     "+rcpc", "-rcpc")
+AARCH64_ARCH_EXT_NAME("rng",       AArch64::AEK_RAND,     "+rand",  "-rand")
+AARCH64_ARCH_EXT_NAME("memtag",    AArch64::AEK_MTE,      "+mte",   "-mte")
+AARCH64_ARCH_EXT_NAME("ssbs",      AArch64::AEK_SSBS,     "+ssbs",  "-ssbs")
+AARCH64_ARCH_EXT_NAME("sb",        AArch64::AEK_SB,       "+sb",    "-sb")
+AARCH64_ARCH_EXT_NAME("predres",   AArch64::AEK_PREDRES,  "+predres", "-predres")
 #undef AARCH64_ARCH_EXT_NAME
 
 #ifndef AARCH64_CPU_NAME
 #define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT)
 #endif
 AARCH64_CPU_NAME("cortex-a35", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
-                (AArch64::AEK_CRC))
+                 (AArch64::AEK_CRC))
 AARCH64_CPU_NAME("cortex-a53", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, true,
-                (AArch64::AEK_CRC))
+                 (AArch64::AEK_CRC))
 AARCH64_CPU_NAME("cortex-a55", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
                  (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC))
 AARCH64_CPU_NAME("cortex-a57", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
-                (AArch64::AEK_CRC))
+                 (AArch64::AEK_CRC))
 AARCH64_CPU_NAME("cortex-a72", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
-                (AArch64::AEK_CRC))
+                 (AArch64::AEK_CRC))
 AARCH64_CPU_NAME("cortex-a73", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
-                (AArch64::AEK_CRC))
+                 (AArch64::AEK_CRC))
 AARCH64_CPU_NAME("cortex-a75", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
                  (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC))
+AARCH64_CPU_NAME("cortex-a76", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+                 (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
+                  AArch64::AEK_SSBS))
+AARCH64_CPU_NAME("cortex-a76ae", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+                 (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
+                  AArch64::AEK_SSBS))
 AARCH64_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
-                (AArch64::AEK_NONE))
+                 (AArch64::AEK_NONE))
 AARCH64_CPU_NAME("exynos-m1", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
-                (AArch64::AEK_CRC))
+                 (AArch64::AEK_CRC))
 AARCH64_CPU_NAME("exynos-m2", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
-                (AArch64::AEK_CRC))
+                 (AArch64::AEK_CRC))
 AARCH64_CPU_NAME("exynos-m3", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
-                (AArch64::AEK_CRC))
+                 (AArch64::AEK_CRC))
 AARCH64_CPU_NAME("exynos-m4", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
-                (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD))
+                 (AArch64::AEK_DOTPROD | AArch64::AEK_FP16))
+AARCH64_CPU_NAME("exynos-m5", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+                 (AArch64::AEK_DOTPROD | AArch64::AEK_FP16))
 AARCH64_CPU_NAME("falkor", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
-                (AArch64::AEK_CRC | AArch64::AEK_RDM))
+                 (AArch64::AEK_CRC | AArch64::AEK_RDM))
 AARCH64_CPU_NAME("saphira", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,
-                (AArch64::AEK_PROFILE))
+                 (AArch64::AEK_PROFILE))
 AARCH64_CPU_NAME("kryo", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
-                (AArch64::AEK_CRC))
+                 (AArch64::AEK_CRC))
 AARCH64_CPU_NAME("thunderx2t99", ARMV8_1A, FK_CRYPTO_NEON_FP_ARMV8, false,
-                (AArch64::AEK_NONE))
+                 (AArch64::AEK_NONE))
 AARCH64_CPU_NAME("thunderx", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
-                (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
+                 (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
 AARCH64_CPU_NAME("thunderxt88", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
-                (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
+                 (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
 AARCH64_CPU_NAME("thunderxt81", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
-                (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
+                 (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
 AARCH64_CPU_NAME("thunderxt83", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
-                (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
+                 (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
 AARCH64_CPU_NAME("tsv110", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
-                 (AArch64::AEK_PROFILE | AArch64::AEK_FP16 | AArch64::AEK_FP16FML |
-                  AArch64::AEK_DOTPROD))
+                 (AArch64::AEK_DOTPROD |
+                  AArch64::AEK_FP16 | AArch64::AEK_FP16FML |
+                  AArch64::AEK_PROFILE))
 // Invalid CPU
 AARCH64_CPU_NAME("invalid", INVALID, FK_INVALID, true, AArch64::AEK_INVALID)
 #undef AARCH64_CPU_NAME
diff --git a/include/llvm/Support/AArch64TargetParser.h b/include/llvm/Support/AArch64TargetParser.h
index 76b77d474428..965d38535e74 100644
--- a/include/llvm/Support/AArch64TargetParser.h
+++ b/include/llvm/Support/AArch64TargetParser.h
@@ -1,9 +1,8 @@
 //===-- AArch64TargetParser - Parser for AArch64 features -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -50,6 +49,11 @@ enum ArchExtKind : unsigned {
   AEK_SSBS =        1 << 20,
   AEK_SB =          1 << 21,
   AEK_PREDRES =     1 << 22,
+  AEK_SVE2 =        1 << 23,
+  AEK_SVE2AES =     1 << 24,
+  AEK_SVE2SM4 =     1 << 25,
+  AEK_SVE2SHA3 =    1 << 26,
+  AEK_BITPERM =     1 << 27,
 };
 
 enum class ArchKind {
diff --git a/include/llvm/Support/AMDGPUMetadata.h b/include/llvm/Support/AMDGPUMetadata.h
index 84851c07499d..f7f1ec40dde9 100644
--- a/include/llvm/Support/AMDGPUMetadata.h
+++ b/include/llvm/Support/AMDGPUMetadata.h
@@ -1,9 +1,8 @@
 //===--- AMDGPUMetadata.h ---------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -75,6 +74,7 @@ enum class ValueKind : uint8_t {
   HiddenPrintfBuffer     = 11,
   HiddenDefaultQueue     = 12,
   HiddenCompletionAction = 13,
+  HiddenMultiGridSyncArg = 14,
   Unknown                = 0xff
 };
 
@@ -157,6 +157,8 @@ constexpr char Name[] = "Name";
 constexpr char TypeName[] = "TypeName";
 /// Key for Kernel::Arg::Metadata::mSize.
 constexpr char Size[] = "Size";
+/// Key for Kernel::Arg::Metadata::mOffset.
+constexpr char Offset[] = "Offset";
 /// Key for Kernel::Arg::Metadata::mAlign.
 constexpr char Align[] = "Align";
 /// Key for Kernel::Arg::Metadata::mValueKind.
@@ -189,6 +191,8 @@ struct Metadata final {
   std::string mTypeName = std::string();
   /// Size in bytes. Required.
   uint32_t mSize = 0;
+  /// Offset in bytes. Required for code object v3, unused for code object v2.
+  uint32_t mOffset = 0;
   /// Alignment in bytes. Required.
   uint32_t mAlign = 0;
   /// Value kind. Required.
@@ -453,11 +457,30 @@ constexpr char AssemblerDirectiveEnd[] = ".end_amdgpu_metadata";
 //===----------------------------------------------------------------------===//
 namespace PALMD {
 
-/// PAL metadata assembler directive.
+/// PAL metadata (old linear format) assembler directive.
 constexpr char AssemblerDirective[] = ".amd_amdgpu_pal_metadata";
 
+/// PAL metadata (new MsgPack format) beginning assembler directive.
+constexpr char AssemblerDirectiveBegin[] = ".amdgpu_pal_metadata";
+
+/// PAL metadata (new MsgPack format) ending assembler directive.
+constexpr char AssemblerDirectiveEnd[] = ".end_amdgpu_pal_metadata";
+
 /// PAL metadata keys.
 enum Key : uint32_t {
+  R_2E12_COMPUTE_PGM_RSRC1 = 0x2e12,
+  R_2D4A_SPI_SHADER_PGM_RSRC1_LS = 0x2d4a,
+  R_2D0A_SPI_SHADER_PGM_RSRC1_HS = 0x2d0a,
+  R_2CCA_SPI_SHADER_PGM_RSRC1_ES = 0x2cca,
+  R_2C8A_SPI_SHADER_PGM_RSRC1_GS = 0x2c8a,
+  R_2C4A_SPI_SHADER_PGM_RSRC1_VS = 0x2c4a,
+  R_2C0A_SPI_SHADER_PGM_RSRC1_PS = 0x2c0a,
+  R_2E00_COMPUTE_DISPATCH_INITIATOR = 0x2e00,
+  R_A1B3_SPI_PS_INPUT_ENA = 0xa1b3,
+  R_A1B4_SPI_PS_INPUT_ADDR = 0xa1b4,
+  R_A1B6_SPI_PS_IN_CONTROL = 0xa1b6,
+  R_A2D5_VGT_SHADER_STAGES_EN = 0xa2d5,
+
   LS_NUM_USED_VGPRS = 0x10000021,
   HS_NUM_USED_VGPRS = 0x10000022,
   ES_NUM_USED_VGPRS = 0x10000023,
@@ -483,12 +506,6 @@ enum Key : uint32_t {
   CS_SCRATCH_SIZE = 0x1000004a
 };
 
-/// PAL metadata represented as a vector.
-typedef std::vector<uint32_t> Metadata;
-
-/// Converts \p PALMetadata to \p String.
-std::error_code toString(const Metadata &PALMetadata, std::string &String);
-
 } // end namespace PALMD
 } // end namespace AMDGPU
 } // end namespace llvm
diff --git a/include/llvm/Support/AMDHSAKernelDescriptor.h b/include/llvm/Support/AMDHSAKernelDescriptor.h
index 751699e3a19f..d1c2147536a7 100644
--- a/include/llvm/Support/AMDHSAKernelDescriptor.h
+++ b/include/llvm/Support/AMDHSAKernelDescriptor.h
@@ -1,9 +1,8 @@
 //===--- AMDHSAKernelDescriptor.h -----------------------------*- C++ -*---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -89,8 +88,11 @@ enum : int32_t {
   COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1),
   COMPUTE_PGM_RSRC1(BULKY, 24, 1),
   COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
-  COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+
-  COMPUTE_PGM_RSRC1(RESERVED0, 27, 5),
+  COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1),    // GFX9+
+  COMPUTE_PGM_RSRC1(RESERVED0, 27, 2),
+  COMPUTE_PGM_RSRC1(WGP_MODE, 29, 1),     // GFX10+
+  COMPUTE_PGM_RSRC1(MEM_ORDERED, 30, 1),  // GFX10+
+  COMPUTE_PGM_RSRC1(FWD_PROGRESS, 31, 1), // GFX10+
 };
 #undef COMPUTE_PGM_RSRC1
 
@@ -120,6 +122,15 @@ enum : int32_t {
 };
 #undef COMPUTE_PGM_RSRC2
 
+// Compute program resource register 3. Must match hardware definition.
+#define COMPUTE_PGM_RSRC3(NAME, SHIFT, WIDTH) \
+  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_ ## NAME, SHIFT, WIDTH)
+enum : int32_t {
+  COMPUTE_PGM_RSRC3(SHARED_VGPR_COUNT, 0, 4), // GFX10+
+  COMPUTE_PGM_RSRC3(RESERVED0, 4, 28),
+};
+#undef COMPUTE_PGM_RSRC3
+
 // Kernel code properties. Must be kept backwards compatible.
 #define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \
   AMDHSA_BITS_ENUM_ENTRY(KERNEL_CODE_PROPERTY_ ## NAME, SHIFT, WIDTH)
@@ -131,7 +142,9 @@ enum : int32_t {
   KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1),
   KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
   KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
-  KERNEL_CODE_PROPERTY(RESERVED0, 7, 9),
+  KERNEL_CODE_PROPERTY(RESERVED0, 7, 3),
+  KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+
+  KERNEL_CODE_PROPERTY(RESERVED1, 11, 5),
 };
 #undef KERNEL_CODE_PROPERTY
 
@@ -141,7 +154,8 @@ struct kernel_descriptor_t {
   uint32_t private_segment_fixed_size;
   uint8_t reserved0[8];
   int64_t kernel_code_entry_byte_offset;
-  uint8_t reserved1[24];
+  uint8_t reserved1[20];
+  uint32_t compute_pgm_rsrc3; // GFX10+
   uint32_t compute_pgm_rsrc1;
   uint32_t compute_pgm_rsrc2;
   uint16_t kernel_code_properties;
@@ -166,6 +180,9 @@ static_assert(
 static_assert(
     offsetof(kernel_descriptor_t, reserved1) == 24,
     "invalid offset for reserved1");
+static_assert(
+    offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == 44,
+    "invalid offset for compute_pgm_rsrc3");
 static_assert(
     offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == 48,
     "invalid offset for compute_pgm_rsrc1");
diff --git a/include/llvm/Support/ARMAttributeParser.h b/include/llvm/Support/ARMAttributeParser.h
index 919f39721f86..f6c39abb4f21 100644
--- a/include/llvm/Support/ARMAttributeParser.h
+++ b/include/llvm/Support/ARMAttributeParser.h
@@ -1,9 +1,8 @@
 //===--- ARMAttributeParser.h - ARM Attribute Information Printer ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -54,6 +53,8 @@ class ARMAttributeParser {
                  uint32_t &Offset);
   void Advanced_SIMD_arch(ARMBuildAttrs::AttrType Tag, const uint8_t *Data,
                           uint32_t &Offset);
+  void MVE_arch(ARMBuildAttrs::AttrType Tag, const uint8_t *Data,
+                uint32_t &Offset);
   void PCS_config(ARMBuildAttrs::AttrType Tag, const uint8_t *Data,
                   uint32_t &Offset);
   void ABI_PCS_R9_use(ARMBuildAttrs::AttrType Tag, const uint8_t *Data,
diff --git a/include/llvm/Support/ARMBuildAttributes.h b/include/llvm/Support/ARMBuildAttributes.h
index b8a03765a7c0..90481eaa1677 100644
--- a/include/llvm/Support/ARMBuildAttributes.h
+++ b/include/llvm/Support/ARMBuildAttributes.h
@@ -1,9 +1,8 @@
 //===-- ARMBuildAttributes.h - ARM Build Attributes -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -68,6 +67,7 @@ enum AttrType {
   MPextension_use           = 42, // recoded from 70 (ABI r2.08)
   DIV_use                   = 44,
   DSP_extension             = 46,
+  MVE_arch                  = 48,
   also_compatible_with      = 65,
   conformance               = 67,
   Virtualization_use        = 68,
@@ -111,6 +111,7 @@ enum CPUArch {
   v8_R     = 15,  // e.g. Cortex R52
   v8_M_Base= 16,  // v8_M_Base AArch32
   v8_M_Main= 17,  // v8_M_Main AArch32
+  v8_1_M_Main=21, // v8_1_M_Main AArch32
 };
 
 enum CPUArchProfile {               // (=7), uleb128
@@ -152,6 +153,10 @@ enum {
   AllowNeonARMv8 = 3, // ARM v8-A SIMD was permitted
   AllowNeonARMv8_1a = 4,// ARM v8.1-A SIMD was permitted (RDMA)
 
+  // Tag_MVE_arch, (=48), uleb128
+  AllowMVEInteger = 1, // integer-only MVE was permitted
+  AllowMVEIntegerAndFloat = 2, // both integer and floating point MVE were permitted
+
   // Tag_ABI_PCS_R9_use, (=14), uleb128
   R9IsGPR = 0,        // R9 used as v6 (just another callee-saved register)
   R9IsSB = 1,         // R9 used as a global static base rgister
diff --git a/include/llvm/Support/ARMEHABI.h b/include/llvm/Support/ARMEHABI.h
index 9b052df0a908..3fbb56d65eb8 100644
--- a/include/llvm/Support/ARMEHABI.h
+++ b/include/llvm/Support/ARMEHABI.h
@@ -1,9 +1,8 @@
 //===--- ARMEHABI.h - ARM Exception Handling ABI ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/ARMTargetParser.def b/include/llvm/Support/ARMTargetParser.def
index 9e844e2b464d..f466b3252748 100644
--- a/include/llvm/Support/ARMTargetParser.def
+++ b/include/llvm/Support/ARMTargetParser.def
@@ -1,9 +1,8 @@
 //===- ARMTargetParser.def - ARM target parsing defines ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -32,6 +31,8 @@ ARM_FPU("fpv4-sp-d16", FK_FPV4_SP_D16, FPUVersion::VFPV4, NeonSupportLevel::None
 ARM_FPU("fpv5-d16", FK_FPV5_D16, FPUVersion::VFPV5, NeonSupportLevel::None, FPURestriction::D16)
 ARM_FPU("fpv5-sp-d16", FK_FPV5_SP_D16, FPUVersion::VFPV5, NeonSupportLevel::None, FPURestriction::SP_D16)
 ARM_FPU("fp-armv8", FK_FP_ARMV8, FPUVersion::VFPV5, NeonSupportLevel::None, FPURestriction::None)
+ARM_FPU("fp-armv8-fullfp16-d16", FK_FP_ARMV8_FULLFP16_D16, FPUVersion::VFPV5_FULLFP16, NeonSupportLevel::None, FPURestriction::D16)
+ARM_FPU("fp-armv8-fullfp16-sp-d16", FK_FP_ARMV8_FULLFP16_SP_D16, FPUVersion::VFPV5_FULLFP16, NeonSupportLevel::None, FPURestriction::SP_D16)
 ARM_FPU("neon", FK_NEON, FPUVersion::VFPV3, NeonSupportLevel::Neon, FPURestriction::None)
 ARM_FPU("neon-fp16", FK_NEON_FP16, FPUVersion::VFPV3_FP16, NeonSupportLevel::Neon, FPURestriction::None)
 ARM_FPU("neon-vfpv4", FK_NEON_VFPV4, FPUVersion::VFPV4, NeonSupportLevel::Neon, FPURestriction::None)
@@ -119,6 +120,8 @@ ARM_ARCH("armv8-m.base", ARMV8MBaseline, "8-M.Baseline", "v8m.base",
           ARMBuildAttrs::CPUArch::v8_M_Base, FK_NONE, ARM::AEK_HWDIVTHUMB)
 ARM_ARCH("armv8-m.main", ARMV8MMainline, "8-M.Mainline", "v8m.main",
           ARMBuildAttrs::CPUArch::v8_M_Main, FK_FPV5_D16, ARM::AEK_HWDIVTHUMB)
+ARM_ARCH("armv8.1-m.main", ARMV8_1MMainline, "8.1-M.Mainline", "v8.1m.main",
+          ARMBuildAttrs::CPUArch::v8_1_M_Main, FK_FP_ARMV8_FULLFP16_SP_D16, ARM::AEK_HWDIVTHUMB | ARM::AEK_RAS | ARM::AEK_LOB)
 // Non-standard Arch names.
 ARM_ARCH("iwmmxt", IWMMXT, "iwmmxt", "", ARMBuildAttrs::CPUArch::v5TE,
           FK_NONE, ARM::AEK_NONE)
@@ -145,6 +148,9 @@ ARM_ARCH_EXT_NAME("aes",      ARM::AEK_AES,      "+aes",   "-aes")
 ARM_ARCH_EXT_NAME("dotprod",  ARM::AEK_DOTPROD,  "+dotprod","-dotprod")
 ARM_ARCH_EXT_NAME("dsp",      ARM::AEK_DSP,      "+dsp",   "-dsp")
 ARM_ARCH_EXT_NAME("fp",       ARM::AEK_FP,       nullptr,  nullptr)
+ARM_ARCH_EXT_NAME("fp.dp",    ARM::AEK_FP_DP,    nullptr,  nullptr)
+ARM_ARCH_EXT_NAME("mve",     (ARM::AEK_DSP | ARM::AEK_SIMD), "+mve", "-mve")
+ARM_ARCH_EXT_NAME("mve.fp",  (ARM::AEK_DSP | ARM::AEK_SIMD | ARM::AEK_FP), "+mve.fp", "-mve.fp")
 ARM_ARCH_EXT_NAME("idiv",     (ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB), nullptr, nullptr)
 ARM_ARCH_EXT_NAME("mp",       ARM::AEK_MP,       nullptr,  nullptr)
 ARM_ARCH_EXT_NAME("simd",     ARM::AEK_SIMD,     nullptr,  nullptr)
@@ -159,6 +165,7 @@ ARM_ARCH_EXT_NAME("maverick", ARM::AEK_MAVERICK, nullptr,  nullptr)
 ARM_ARCH_EXT_NAME("xscale",   ARM::AEK_XSCALE,   nullptr,  nullptr)
 ARM_ARCH_EXT_NAME("fp16fml",  ARM::AEK_FP16FML,  "+fp16fml", "-fp16fml")
 ARM_ARCH_EXT_NAME("sb",       ARM::AEK_SB,       "+sb",      "-sb")
+ARM_ARCH_EXT_NAME("lob",      ARM::AEK_LOB,      "+lob",   "-lob")
 #undef ARM_ARCH_EXT_NAME
 
 #ifndef ARM_HW_DIV_NAME
@@ -252,6 +259,7 @@ ARM_CPU_NAME("cortex-m4", ARMV7EM, FK_FPV4_SP_D16, true, ARM::AEK_NONE)
 ARM_CPU_NAME("cortex-m7", ARMV7EM, FK_FPV5_D16, false, ARM::AEK_NONE)
 ARM_CPU_NAME("cortex-m23", ARMV8MBaseline, FK_NONE, false, ARM::AEK_NONE)
 ARM_CPU_NAME("cortex-m33", ARMV8MMainline, FK_FPV5_SP_D16, false, ARM::AEK_DSP)
+ARM_CPU_NAME("cortex-m35p", ARMV8MMainline, FK_FPV5_SP_D16, false, ARM::AEK_DSP)
 ARM_CPU_NAME("cortex-a32", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
 ARM_CPU_NAME("cortex-a35", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
 ARM_CPU_NAME("cortex-a53", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
@@ -262,12 +270,18 @@ ARM_CPU_NAME("cortex-a72", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
 ARM_CPU_NAME("cortex-a73", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
 ARM_CPU_NAME("cortex-a75", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
             (ARM::AEK_FP16 | ARM::AEK_DOTPROD))
+ARM_CPU_NAME("cortex-a76", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+            (ARM::AEK_FP16 | ARM::AEK_DOTPROD))
+ARM_CPU_NAME("cortex-a76ae", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+            (ARM::AEK_FP16 | ARM::AEK_DOTPROD))
 ARM_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
 ARM_CPU_NAME("exynos-m1", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
 ARM_CPU_NAME("exynos-m2", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
 ARM_CPU_NAME("exynos-m3", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
 ARM_CPU_NAME("exynos-m4", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
              (ARM::AEK_FP16 | ARM::AEK_DOTPROD))
+ARM_CPU_NAME("exynos-m5", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+             (ARM::AEK_FP16 | ARM::AEK_DOTPROD))
 ARM_CPU_NAME("kryo", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
 // Non-standard Arch names.
 ARM_CPU_NAME("iwmmxt", IWMMXT, FK_NONE, true, ARM::AEK_NONE)
diff --git a/include/llvm/Support/ARMTargetParser.h b/include/llvm/Support/ARMTargetParser.h
index 71acc0dc72d0..4b9070dea596 100644
--- a/include/llvm/Support/ARMTargetParser.h
+++ b/include/llvm/Support/ARMTargetParser.h
@@ -1,9 +1,8 @@
 //===-- ARMTargetParser - Parser for ARM target features --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -46,6 +45,13 @@ enum ArchExtKind : unsigned {
   AEK_AES     =     1 << 16,
   AEK_FP16FML =     1 << 17,
   AEK_SB      =     1 << 18,
+  AEK_SVE2 =        1 << 19,
+  AEK_SVE2AES =     1 << 20,
+  AEK_SVE2SM4 =     1 << 21,
+  AEK_SVE2SHA3 =    1 << 22,
+  AEK_BITPERM =     1 << 23,
+  AEK_FP_DP   =     1 << 24,
+  AEK_LOB     =     1 << 25,
   // Unsupported extensions.
   AEK_OS = 0x8000000,
   AEK_IWMMXT = 0x10000000,
@@ -127,7 +133,8 @@ enum class FPUVersion {
   VFPV3,
   VFPV3_FP16,
   VFPV4,
-  VFPV5
+  VFPV5,
+  VFPV5_FULLFP16,
 };
 
 // An FPU name restricts the FPU in one of three ways:
@@ -234,6 +241,8 @@ StringRef getCPUAttr(ArchKind AK);
 StringRef getSubArch(ArchKind AK);
 StringRef getArchExtName(unsigned ArchExtKind);
 StringRef getArchExtFeature(StringRef ArchExt);
+bool appendArchExtFeatures(StringRef CPU, ARM::ArchKind AK, StringRef ArchExt,
+                           std::vector<StringRef> &Features);
 StringRef getHWDivName(unsigned HWDivKind);
 
 // Information by Name
diff --git a/include/llvm/Support/ARMWinEH.h b/include/llvm/Support/ARMWinEH.h
index 60174503ad49..857a0d3814a8 100644
--- a/include/llvm/Support/ARMWinEH.h
+++ b/include/llvm/Support/ARMWinEH.h
@@ -1,9 +1,8 @@
-//===-- llvm/Support/WinARMEH.h - Windows on ARM EH Constants ---*- C++ -*-===//
+//===-- llvm/Support/ARMWinEH.h - Windows on ARM EH Constants ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -383,7 +382,7 @@ struct ExceptionDataRecord {
     return ((Data[0] & 0x00400000) >> 22);
   }
 
-  uint8_t EpilogueCount() const {
+  uint16_t EpilogueCount() const {
     if (HeaderWords(*this) == 1) {
       if (isAArch64)
         return (Data[0] & 0x07C00000) >> 22;
diff --git a/include/llvm/Support/AlignOf.h b/include/llvm/Support/AlignOf.h
index 9e7a62b85e34..d12401f0eb49 100644
--- a/include/llvm/Support/AlignOf.h
+++ b/include/llvm/Support/AlignOf.h
@@ -1,9 +1,8 @@
 //===--- AlignOf.h - Portable calculation of type alignment -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h
index 42d08378a677..09e967b98abc 100644
--- a/include/llvm/Support/Allocator.h
+++ b/include/llvm/Support/Allocator.h
@@ -1,9 +1,8 @@
 //===- Allocator.h - Simple memory allocation abstraction -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Support/ArrayRecycler.h b/include/llvm/Support/ArrayRecycler.h
index 68696be6bf3d..5256ce80c028 100644
--- a/include/llvm/Support/ArrayRecycler.h
+++ b/include/llvm/Support/ArrayRecycler.h
@@ -1,9 +1,8 @@
 //==- llvm/Support/ArrayRecycler.h - Recycling of Arrays ---------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/Atomic.h b/include/llvm/Support/Atomic.h
index 552313f0c241..a8445fddc1a8 100644
--- a/include/llvm/Support/Atomic.h
+++ b/include/llvm/Support/Atomic.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/Atomic.h - Atomic Operations -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/AtomicOrdering.h b/include/llvm/Support/AtomicOrdering.h
index a679ab30243e..763bc3ea7b28 100644
--- a/include/llvm/Support/AtomicOrdering.h
+++ b/include/llvm/Support/AtomicOrdering.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/AtomicOrdering.h ---Atomic Ordering---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/include/llvm/Support/BinaryByteStream.h b/include/llvm/Support/BinaryByteStream.h
index 9808d3b72157..7acce9a03888 100644
--- a/include/llvm/Support/BinaryByteStream.h
+++ b/include/llvm/Support/BinaryByteStream.h
@@ -1,9 +1,8 @@
 //===- BinaryByteStream.h ---------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //===----------------------------------------------------------------------===//
 // A BinaryStream which stores data in a single continguous memory buffer.
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/BinaryItemStream.h b/include/llvm/Support/BinaryItemStream.h
index 278723ddf8da..4cd66adcc01a 100644
--- a/include/llvm/Support/BinaryItemStream.h
+++ b/include/llvm/Support/BinaryItemStream.h
@@ -1,9 +1,8 @@
 //===- BinaryItemStream.h ---------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/BinaryStream.h b/include/llvm/Support/BinaryStream.h
index 7677214e48ee..fcf4398550ee 100644
--- a/include/llvm/Support/BinaryStream.h
+++ b/include/llvm/Support/BinaryStream.h
@@ -1,9 +1,8 @@
 //===- BinaryStream.h - Base interface for a stream of data -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/BinaryStreamArray.h b/include/llvm/Support/BinaryStreamArray.h
index 7c110fcb6a4b..96d09db69ae5 100644
--- a/include/llvm/Support/BinaryStreamArray.h
+++ b/include/llvm/Support/BinaryStreamArray.h
@@ -1,9 +1,8 @@
 //===- BinaryStreamArray.h - Array backed by an arbitrary stream *- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/BinaryStreamError.h b/include/llvm/Support/BinaryStreamError.h
index 7d9699d53639..cf6e034ffd2c 100644
--- a/include/llvm/Support/BinaryStreamError.h
+++ b/include/llvm/Support/BinaryStreamError.h
@@ -1,9 +1,8 @@
 //===- BinaryStreamError.h - Error extensions for Binary Streams *- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/BinaryStreamReader.h b/include/llvm/Support/BinaryStreamReader.h
index 392958de30d5..d8fddde66bfa 100644
--- a/include/llvm/Support/BinaryStreamReader.h
+++ b/include/llvm/Support/BinaryStreamReader.h
@@ -1,9 +1,8 @@
 //===- BinaryStreamReader.h - Reads objects from a binary stream *- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -97,6 +96,18 @@ public:
     return Error::success();
   }
 
+  /// Read an unsigned LEB128 encoded value.
+  ///
+  /// \returns a success error code if the data was successfully read, otherwise
+  /// returns an appropriate error code.
+  Error readULEB128(uint64_t &Dest);
+
+  /// Read a signed LEB128 encoded value.
+  ///
+  /// \returns a success error code if the data was successfully read, otherwise
+  /// returns an appropriate error code.
+  Error readSLEB128(int64_t &Dest);
+
   /// Read a null terminated string from \p Dest.  Whether a copy occurs depends
   /// on the implementation of the underlying stream.  Updates the stream's
   /// offset to point after the newly read data.
diff --git a/include/llvm/Support/BinaryStreamRef.h b/include/llvm/Support/BinaryStreamRef.h
index d8dc1392c01c..7427b8da5b43 100644
--- a/include/llvm/Support/BinaryStreamRef.h
+++ b/include/llvm/Support/BinaryStreamRef.h
@@ -1,9 +1,8 @@
 //===- BinaryStreamRef.h - A copyable reference to a stream -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/BinaryStreamWriter.h b/include/llvm/Support/BinaryStreamWriter.h
index 6e8a68a30474..86d2389d9182 100644
--- a/include/llvm/Support/BinaryStreamWriter.h
+++ b/include/llvm/Support/BinaryStreamWriter.h
@@ -1,9 +1,8 @@
 //===- BinaryStreamWriter.h - Writes objects to a BinaryStream ---*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -80,6 +79,20 @@ public:
     return writeInteger<U>(static_cast<U>(Num));
   }
 
+  /// Write the unsigned integer Value to the underlying stream using ULEB128
+  /// encoding.
+  ///
+  /// \returns a success error code if the data was successfully written,
+  /// otherwise returns an appropriate error code.
+  Error writeULEB128(uint64_t Value);
+
+  /// Write the unsigned integer Value to the underlying stream using ULEB128
+  /// encoding.
+  ///
+  /// \returns a success error code if the data was successfully written,
+  /// otherwise returns an appropriate error code.
+  Error writeSLEB128(int64_t Value);
+
   /// Write the string \p Str to the underlying stream followed by a null
   /// terminator.  On success, updates the offset so that subsequent writes
   /// occur at the next unwritten position.  \p Str need not be null terminated
diff --git a/include/llvm/Support/BlockFrequency.h b/include/llvm/Support/BlockFrequency.h
index 4b468f7acb32..18fb60e1904b 100644
--- a/include/llvm/Support/BlockFrequency.h
+++ b/include/llvm/Support/BlockFrequency.h
@@ -1,9 +1,8 @@
 //===-------- BlockFrequency.h - Block Frequency Wrapper --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/BranchProbability.h b/include/llvm/Support/BranchProbability.h
index 3a88e71c2480..cd9d369b4f4e 100644
--- a/include/llvm/Support/BranchProbability.h
+++ b/include/llvm/Support/BranchProbability.h
@@ -1,9 +1,8 @@
 //===- BranchProbability.h - Branch Probability Wrapper ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -119,6 +118,13 @@ public:
     return *this;
   }
 
+  BranchProbability &operator/=(BranchProbability RHS) {
+    assert(N != UnknownN && RHS.N != UnknownN &&
+           "Unknown probability cannot participate in arithmetics.");
+    N = (static_cast<uint64_t>(N) * D + RHS.N / 2) / RHS.N;
+    return *this;
+  }
+
   BranchProbability &operator/=(uint32_t RHS) {
     assert(N != UnknownN &&
            "Unknown probability cannot participate in arithmetics.");
@@ -129,27 +135,38 @@ public:
 
   BranchProbability operator+(BranchProbability RHS) const {
     BranchProbability Prob(*this);
-    return Prob += RHS;
+    Prob += RHS;
+    return Prob;
   }
 
   BranchProbability operator-(BranchProbability RHS) const {
     BranchProbability Prob(*this);
-    return Prob -= RHS;
+    Prob -= RHS;
+    return Prob;
   }
 
   BranchProbability operator*(BranchProbability RHS) const {
     BranchProbability Prob(*this);
-    return Prob *= RHS;
+    Prob *= RHS;
+    return Prob;
   }
 
   BranchProbability operator*(uint32_t RHS) const {
     BranchProbability Prob(*this);
-    return Prob *= RHS;
+    Prob *= RHS;
+    return Prob;
+  }
+
+  BranchProbability operator/(BranchProbability RHS) const {
+    BranchProbability Prob(*this);
+    Prob /= RHS;
+    return Prob;
   }
 
   BranchProbability operator/(uint32_t RHS) const {
     BranchProbability Prob(*this);
-    return Prob /= RHS;
+    Prob /= RHS;
+    return Prob;
   }
 
   bool operator==(BranchProbability RHS) const { return N == RHS.N; }
diff --git a/include/llvm/Support/BuryPointer.h b/include/llvm/Support/BuryPointer.h
index 53f1f395b922..276a5b7089c3 100644
--- a/include/llvm/Support/BuryPointer.h
+++ b/include/llvm/Support/BuryPointer.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/BuryPointer.h - Memory Manipulation/Leak ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/CBindingWrapping.h b/include/llvm/Support/CBindingWrapping.h
index f60f99d376ad..46d6b4e3fa7d 100644
--- a/include/llvm/Support/CBindingWrapping.h
+++ b/include/llvm/Support/CBindingWrapping.h
@@ -1,9 +1,8 @@
-//===- llvm/Support/CBindingWrapph.h - C Interface Wrapping -----*- C++ -*-===//
+//===- llvm/Support/CBindingWrapping.h - C Interface Wrapping ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/CFGUpdate.h b/include/llvm/Support/CFGUpdate.h
index 63c24a3d2a20..eeaf5d0a21ac 100644
--- a/include/llvm/Support/CFGUpdate.h
+++ b/include/llvm/Support/CFGUpdate.h
@@ -1,9 +1,8 @@
 //===- CFGUpdate.h - Encode a CFG Edge Update. ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/COM.h b/include/llvm/Support/COM.h
index a2d5a7a68ba9..d59966f849b4 100644
--- a/include/llvm/Support/COM.h
+++ b/include/llvm/Support/COM.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/COM.h ---------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Support/CRC.h b/include/llvm/Support/CRC.h
new file mode 100644
index 000000000000..6ea8e3edcea4
--- /dev/null
+++ b/include/llvm/Support/CRC.h
@@ -0,0 +1,25 @@
+//===-- llvm/Support/CRC.h - Cyclic Redundancy Check-------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains basic functions for calculating Cyclic Redundancy Check
+// or CRC.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_CRC_H
+#define LLVM_SUPPORT_CRC_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+/// zlib independent CRC32 calculation.
+uint32_t crc32(uint32_t CRC, StringRef S);
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Support/CachePruning.h b/include/llvm/Support/CachePruning.h
index cf3f8ec67a52..a72a86439f6a 100644
--- a/include/llvm/Support/CachePruning.h
+++ b/include/llvm/Support/CachePruning.h
@@ -1,9 +1,8 @@
 //=- CachePruning.h - Helper to manage the pruning of a cache dir -*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/Capacity.h b/include/llvm/Support/Capacity.h
index 7460f9825bd3..6b99e0aaa488 100644
--- a/include/llvm/Support/Capacity.h
+++ b/include/llvm/Support/Capacity.h
@@ -1,9 +1,8 @@
 //===--- Capacity.h - Generic computation of ADT memory use -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/Casting.h b/include/llvm/Support/Casting.h
index 3f21e0f9ebc3..46bdedb04cfe 100644
--- a/include/llvm/Support/Casting.h
+++ b/include/llvm/Support/Casting.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/Casting.h - Allow flexible, checked, casts --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -144,6 +143,16 @@ template <class X, class Y> LLVM_NODISCARD inline bool isa(const Y &Val) {
                        typename simplify_type<const Y>::SimpleType>::doit(Val);
 }
 
+// isa_and_nonnull<X> - Functionally identical to isa, except that a null value
+// is accepted.
+//
+template <class X, class Y>
+LLVM_NODISCARD inline bool isa_and_nonnull(const Y &Val) {
+  if (!Val)
+    return false;
+  return isa<X>(Val);
+}
+
 //===----------------------------------------------------------------------===//
 //                          cast<x> Support Templates
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/CheckedArithmetic.h b/include/llvm/Support/CheckedArithmetic.h
index 039c374136ff..8a50e3d5ddf6 100644
--- a/include/llvm/Support/CheckedArithmetic.h
+++ b/include/llvm/Support/CheckedArithmetic.h
@@ -1,9 +1,8 @@
 //==-- llvm/Support/CheckedArithmetic.h - Safe arithmetical operations *- C++ //
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -50,6 +49,15 @@ checkedAdd(T LHS, T RHS) {
   return checkedOp(LHS, RHS, &llvm::APInt::sadd_ov);
 }
 
+/// Subtract two signed integers \p LHS and \p RHS.
+/// \return Optional of sum if no signed overflow occurred,
+/// \c None otherwise.
+template <typename T>
+typename std::enable_if<std::is_signed<T>::value, llvm::Optional<T>>::type
+checkedSub(T LHS, T RHS) {
+  return checkedOp(LHS, RHS, &llvm::APInt::ssub_ov);
+}
+
 /// Multiply two signed integers \p LHS and \p RHS.
 /// \return Optional of product if no signed overflow occurred,
 /// \c None otherwise.
diff --git a/include/llvm/Support/Chrono.h b/include/llvm/Support/Chrono.h
index 57677e8d5cf1..334ab60835a4 100644
--- a/include/llvm/Support/Chrono.h
+++ b/include/llvm/Support/Chrono.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/Chrono.h - Utilities for Timing Manipulation-*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -34,21 +33,21 @@ template <typename D = std::chrono::nanoseconds>
 using TimePoint = std::chrono::time_point<std::chrono::system_clock, D>;
 
 /// Convert a TimePoint to std::time_t
-LLVM_ATTRIBUTE_ALWAYS_INLINE inline std::time_t toTimeT(TimePoint<> TP) {
+inline std::time_t toTimeT(TimePoint<> TP) {
   using namespace std::chrono;
   return system_clock::to_time_t(
       time_point_cast<system_clock::time_point::duration>(TP));
 }
 
 /// Convert a std::time_t to a TimePoint
-LLVM_ATTRIBUTE_ALWAYS_INLINE inline TimePoint<std::chrono::seconds>
+inline TimePoint<std::chrono::seconds>
 toTimePoint(std::time_t T) {
   using namespace std::chrono;
   return time_point_cast<seconds>(system_clock::from_time_t(T));
 }
 
 /// Convert a std::time_t + nanoseconds to a TimePoint
-LLVM_ATTRIBUTE_ALWAYS_INLINE inline TimePoint<>
+inline TimePoint<>
 toTimePoint(std::time_t T, uint32_t nsec) {
   using namespace std::chrono;
   return time_point_cast<nanoseconds>(system_clock::from_time_t(T))
diff --git a/include/llvm/Support/CodeGen.h b/include/llvm/Support/CodeGen.h
index 22e74167266c..a3f423e558cf 100644
--- a/include/llvm/Support/CodeGen.h
+++ b/include/llvm/Support/CodeGen.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/CodeGen.h - CodeGen Concepts ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,13 +18,14 @@ namespace llvm {
 
   // Relocation model types.
   namespace Reloc {
-  enum Model { Static, PIC_, DynamicNoPIC, ROPI, RWPI, ROPI_RWPI };
+    // Cannot be named PIC due to collision with -DPIC
+    enum Model { Static, PIC_, DynamicNoPIC, ROPI, RWPI, ROPI_RWPI };
   }
 
   // Code model types.
   namespace CodeModel {
     // Sync changes with CodeGenCWrappers.h.
-  enum Model { Tiny, Small, Kernel, Medium, Large };
+    enum Model { Tiny, Small, Kernel, Medium, Large };
   }
 
   namespace PICLevel {
@@ -50,10 +50,10 @@ namespace llvm {
   // Code generation optimization level.
   namespace CodeGenOpt {
     enum Level {
-      None,        // -O0
-      Less,        // -O1
-      Default,     // -O2, -Os
-      Aggressive   // -O3
+      None = 0,      // -O0
+      Less = 1,      // -O1
+      Default = 2,   // -O2, -Os
+      Aggressive = 3 // -O3
     };
   }
 
diff --git a/include/llvm/Support/CodeGenCoverage.h b/include/llvm/Support/CodeGenCoverage.h
index c863be35b822..0b1af779ffb0 100644
--- a/include/llvm/Support/CodeGenCoverage.h
+++ b/include/llvm/Support/CodeGenCoverage.h
@@ -1,9 +1,8 @@
 //== llvm/Support/CodeGenCoverage.h ------------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file This file provides rule coverage tracking for tablegen-erated CodeGen.
diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h
index a8ad89384d17..3cc2c3c0121b 100644
--- a/include/llvm/Support/CommandLine.h
+++ b/include/llvm/Support/CommandLine.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/CommandLine.h - Command line handler --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -67,7 +66,8 @@ namespace cl {
 bool ParseCommandLineOptions(int argc, const char *const *argv,
                              StringRef Overview = "",
                              raw_ostream *Errs = nullptr,
-                             const char *EnvVar = nullptr);
+                             const char *EnvVar = nullptr,
+                             bool LongOptionsUseDoubleDash = false);
 
 //===----------------------------------------------------------------------===//
 // ParseEnvironmentOptions - Environment variable option processing alternate
@@ -159,23 +159,27 @@ enum OptionHidden {   // Control whether -help shows this option
 // AlwaysPrefix - Only allow the behavior enabled by the Prefix flag and reject
 // the Option=Value form.
 //
-// Grouping - With this option enabled, multiple letter options are allowed to
-// bunch together with only a single hyphen for the whole group.  This allows
-// emulation of the behavior that ls uses for example: ls -la === ls -l -a
-//
 
 enum FormattingFlags {
   NormalFormatting = 0x00, // Nothing special
   Positional = 0x01,       // Is a positional argument, no '-' required
   Prefix = 0x02,           // Can this option directly prefix its value?
-  AlwaysPrefix = 0x03,     // Can this option only directly prefix its value?
-  Grouping = 0x04          // Can this option group with other options?
+  AlwaysPrefix = 0x03      // Can this option only directly prefix its value?
 };
 
 enum MiscFlags {             // Miscellaneous flags to adjust argument
   CommaSeparated = 0x01,     // Should this cl::list split between commas?
   PositionalEatsArgs = 0x02, // Should this positional cl::list eat -args?
-  Sink = 0x04                // Should this cl::list eat all unknown options?
+  Sink = 0x04,               // Should this cl::list eat all unknown options?
+
+  // Grouping - Can this option group with other options?
+  // If this is enabled, multiple letter options are allowed to bunch together
+  // with only a single hyphen for the whole group.  This allows emulation
+  // of the behavior that ls uses for example: ls -la === ls -l -a
+  Grouping = 0x08,
+
+  // Default option
+  DefaultOption = 0x10
 };
 
 //===----------------------------------------------------------------------===//
@@ -261,26 +265,27 @@ class Option {
   // Out of line virtual function to provide home for the class.
   virtual void anchor();
 
-  int NumOccurrences = 0; // The number of times specified
+  uint16_t NumOccurrences; // The number of times specified
   // Occurrences, HiddenFlag, and Formatting are all enum types but to avoid
   // problems with signed enums in bitfields.
-  unsigned Occurrences : 3; // enum NumOccurrencesFlag
+  uint16_t Occurrences : 3; // enum NumOccurrencesFlag
   // not using the enum type for 'Value' because zero is an implementation
   // detail representing the non-value
-  unsigned Value : 2;
-  unsigned HiddenFlag : 2; // enum OptionHidden
-  unsigned Formatting : 3; // enum FormattingFlags
-  unsigned Misc : 3;
-  unsigned Position = 0;       // Position of last occurrence of the option
-  unsigned AdditionalVals = 0; // Greater than 0 for multi-valued option.
+  uint16_t Value : 2;
+  uint16_t HiddenFlag : 2; // enum OptionHidden
+  uint16_t Formatting : 2; // enum FormattingFlags
+  uint16_t Misc : 5;
+  uint16_t FullyInitialized : 1; // Has addArgument been called?
+  uint16_t Position;             // Position of last occurrence of the option
+  uint16_t AdditionalVals;       // Greater than 0 for multi-valued option.
 
 public:
   StringRef ArgStr;   // The argument string itself (ex: "help", "o")
   StringRef HelpStr;  // The descriptive text message for -help
   StringRef ValueStr; // String describing what the value of this option is
-  OptionCategory *Category; // The Category this option belongs to
-  SmallPtrSet<SubCommand *, 4> Subs; // The subcommands this option belongs to.
-  bool FullyInitialized = false; // Has addArgument been called?
+  SmallVector<OptionCategory *, 1>
+      Categories;                    // The Categories this option belongs to
+  SmallPtrSet<SubCommand *, 1> Subs; // The subcommands this option belongs to.
 
   inline enum NumOccurrencesFlag getNumOccurrencesFlag() const {
     return (enum NumOccurrencesFlag)Occurrences;
@@ -306,6 +311,7 @@ public:
   bool hasArgStr() const { return !ArgStr.empty(); }
   bool isPositional() const { return getFormattingFlag() == cl::Positional; }
   bool isSink() const { return getMiscFlags() & cl::Sink; }
+  bool isDefaultOption() const { return getMiscFlags() & cl::DefaultOption; }
 
   bool isConsumeAfter() const {
     return getNumOccurrencesFlag() == cl::ConsumeAfter;
@@ -329,14 +335,17 @@ public:
   void setFormattingFlag(enum FormattingFlags V) { Formatting = V; }
   void setMiscFlag(enum MiscFlags M) { Misc |= M; }
   void setPosition(unsigned pos) { Position = pos; }
-  void setCategory(OptionCategory &C) { Category = &C; }
+  void addCategory(OptionCategory &C);
   void addSubCommand(SubCommand &S) { Subs.insert(&S); }
 
 protected:
   explicit Option(enum NumOccurrencesFlag OccurrencesFlag,
                   enum OptionHidden Hidden)
-      : Occurrences(OccurrencesFlag), Value(0), HiddenFlag(Hidden),
-        Formatting(NormalFormatting), Misc(0), Category(&GeneralCategory) {}
+      : NumOccurrences(0), Occurrences(OccurrencesFlag), Value(0),
+        HiddenFlag(Hidden), Formatting(NormalFormatting), Misc(0),
+        FullyInitialized(false), Position(0), AdditionalVals(0) {
+    Categories.push_back(&GeneralCategory);
+  }
 
   inline void setNumAdditionalVals(unsigned n) { AdditionalVals = n; }
 
@@ -382,7 +391,7 @@ public:
   }
 
   inline int getNumOccurrences() const { return NumOccurrences; }
-  inline void reset() { NumOccurrences = 0; }
+  void reset();
 };
 
 //===----------------------------------------------------------------------===//
@@ -447,7 +456,7 @@ struct cat {
 
   cat(OptionCategory &c) : Category(c) {}
 
-  template <class Opt> void apply(Opt &O) const { O.setCategory(Category); }
+  template <class Opt> void apply(Opt &O) const { O.addCategory(Category); }
 };
 
 // sub - Specify the subcommand that this option belongs to.
@@ -823,6 +832,8 @@ class basic_parser_impl { // non-template implementation of basic_parser<t>
 public:
   basic_parser_impl(Option &) {}
 
+  virtual ~basic_parser_impl() {}
+
   enum ValueExpected getValueExpectedFlagDefault() const {
     return ValueRequired;
   }
@@ -850,8 +861,6 @@ public:
   virtual void anchor();
 
 protected:
-  ~basic_parser_impl() = default;
-
   // A helper for basic_parser::printOptionDiff.
   void printOptionName(const Option &O, size_t GlobalWidth) const;
 };
@@ -865,15 +874,12 @@ public:
   using OptVal = OptionValue<DataType>;
 
   basic_parser(Option &O) : basic_parser_impl(O) {}
-
-protected:
-  ~basic_parser() = default;
 };
 
 //--------------------------------------------------
 // parser<bool>
 //
-template <> class parser<bool> final : public basic_parser<bool> {
+template <> class parser<bool> : public basic_parser<bool> {
 public:
   parser(Option &O) : basic_parser(O) {}
 
@@ -900,8 +906,7 @@ extern template class basic_parser<bool>;
 
 //--------------------------------------------------
 // parser<boolOrDefault>
-template <>
-class parser<boolOrDefault> final : public basic_parser<boolOrDefault> {
+template <> class parser<boolOrDefault> : public basic_parser<boolOrDefault> {
 public:
   parser(Option &O) : basic_parser(O) {}
 
@@ -927,7 +932,7 @@ extern template class basic_parser<boolOrDefault>;
 //--------------------------------------------------
 // parser<int>
 //
-template <> class parser<int> final : public basic_parser<int> {
+template <> class parser<int> : public basic_parser<int> {
 public:
   parser(Option &O) : basic_parser(O) {}
 
@@ -949,7 +954,7 @@ extern template class basic_parser<int>;
 //--------------------------------------------------
 // parser<unsigned>
 //
-template <> class parser<unsigned> final : public basic_parser<unsigned> {
+template <> class parser<unsigned> : public basic_parser<unsigned> {
 public:
   parser(Option &O) : basic_parser(O) {}
 
@@ -968,12 +973,34 @@ public:
 
 extern template class basic_parser<unsigned>;
 
+//--------------------------------------------------
+// parser<unsigned long>
+//
+template <>
+class parser<unsigned long> final : public basic_parser<unsigned long> {
+public:
+  parser(Option &O) : basic_parser(O) {}
+
+  // parse - Return true on error.
+  bool parse(Option &O, StringRef ArgName, StringRef Arg, unsigned long &Val);
+
+  // getValueName - Overload in subclass to provide a better default value.
+  StringRef getValueName() const override { return "ulong"; }
+
+  void printOptionDiff(const Option &O, unsigned long V, OptVal Default,
+                       size_t GlobalWidth) const;
+
+  // An out-of-line virtual method to provide a 'home' for this class.
+  void anchor() override;
+};
+
+extern template class basic_parser<unsigned long>;
+
 //--------------------------------------------------
 // parser<unsigned long long>
 //
 template <>
-class parser<unsigned long long> final
-    : public basic_parser<unsigned long long> {
+class parser<unsigned long long> : public basic_parser<unsigned long long> {
 public:
   parser(Option &O) : basic_parser(O) {}
 
@@ -982,7 +1009,7 @@ public:
              unsigned long long &Val);
 
   // getValueName - Overload in subclass to provide a better default value.
-  StringRef getValueName() const override { return "uint"; }
+  StringRef getValueName() const override { return "ulong"; }
 
   void printOptionDiff(const Option &O, unsigned long long V, OptVal Default,
                        size_t GlobalWidth) const;
@@ -996,7 +1023,7 @@ extern template class basic_parser<unsigned long long>;
 //--------------------------------------------------
 // parser<double>
 //
-template <> class parser<double> final : public basic_parser<double> {
+template <> class parser<double> : public basic_parser<double> {
 public:
   parser(Option &O) : basic_parser(O) {}
 
@@ -1018,7 +1045,7 @@ extern template class basic_parser<double>;
 //--------------------------------------------------
 // parser<float>
 //
-template <> class parser<float> final : public basic_parser<float> {
+template <> class parser<float> : public basic_parser<float> {
 public:
   parser(Option &O) : basic_parser(O) {}
 
@@ -1040,7 +1067,7 @@ extern template class basic_parser<float>;
 //--------------------------------------------------
 // parser<std::string>
 //
-template <> class parser<std::string> final : public basic_parser<std::string> {
+template <> class parser<std::string> : public basic_parser<std::string> {
 public:
   parser(Option &O) : basic_parser(O) {}
 
@@ -1065,7 +1092,7 @@ extern template class basic_parser<std::string>;
 //--------------------------------------------------
 // parser<char>
 //
-template <> class parser<char> final : public basic_parser<char> {
+template <> class parser<char> : public basic_parser<char> {
 public:
   parser(Option &O) : basic_parser(O) {}
 
@@ -1178,7 +1205,11 @@ template <> struct applicator<FormattingFlags> {
 };
 
 template <> struct applicator<MiscFlags> {
-  static void opt(MiscFlags MF, Option &O) { O.setMiscFlag(MF); }
+  static void opt(MiscFlags MF, Option &O) {
+    assert((MF != Grouping || O.ArgStr.size() == 1) &&
+           "cl::Grouping can only apply to single charater Options.");
+    O.setMiscFlag(MF);
+  }
 };
 
 // apply method - Apply modifiers to an option in a type safe way.
@@ -1398,6 +1429,8 @@ template <class DataType, class StorageClass> class list_storage {
 public:
   list_storage() = default;
 
+  void clear() {}
+
   bool setLocation(Option &O, StorageClass &L) {
     if (Location)
       return O.error("cl::location(x) specified more than once!");
@@ -1449,6 +1482,10 @@ public:
   reference operator[](size_type pos) { return Storage[pos]; }
   const_reference operator[](size_type pos) const { return Storage[pos]; }
 
+  void clear() {
+    Storage.clear();
+  }
+
   iterator erase(const_iterator pos) { return Storage.erase(pos); }
   iterator erase(const_iterator first, const_iterator last) {
     return Storage.erase(first, last);
@@ -1526,7 +1563,10 @@ class list : public Option, public list_storage<DataType, StorageClass> {
   void printOptionValue(size_t /*GlobalWidth*/, bool /*Force*/) const override {
   }
 
-  void setDefault() override {}
+  void setDefault() override {
+    Positions.clear();
+    list_storage<DataType, StorageClass>::clear();
+  }
 
   void done() {
     addArgument();
@@ -1732,7 +1772,10 @@ class alias : public Option {
       error("cl::alias must have argument name specified!");
     if (!AliasFor)
       error("cl::alias must have an cl::aliasopt(option) specified!");
+    if (!Subs.empty())
+      error("cl::alias must not have cl::sub(), aliased option's cl::sub() will be used!");
     Subs = AliasFor->Subs;
+    Categories = AliasFor->Categories;
     addArgument();
   }
 
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index 14e4d6e97140..3f4f465f3960 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/Compiler.h - Compiler abstraction support --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -255,6 +254,15 @@
 #define LLVM_FALLTHROUGH
 #endif
 
+/// LLVM_REQUIRE_CONSTANT_INITIALIZATION - Apply this to globals to ensure that
+/// they are constant initialized.
+#if __has_cpp_attribute(clang::require_constant_initialization)
+#define LLVM_REQUIRE_CONSTANT_INITIALIZATION                                   \
+  [[clang::require_constant_initialization]]
+#else
+#define LLVM_REQUIRE_CONSTANT_INITIALIZATION
+#endif
+
 /// LLVM_EXTENSION - Support compilers where we have a keyword to suppress
 /// pedantic diagnostics.
 #ifdef __GNUC__
diff --git a/include/llvm/Support/Compression.h b/include/llvm/Support/Compression.h
index f7258f4bf8f8..5bc0e56913fe 100644
--- a/include/llvm/Support/Compression.h
+++ b/include/llvm/Support/Compression.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/Compression.h ---Compression----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/ConvertUTF.h b/include/llvm/Support/ConvertUTF.h
index 6ae56c2470bb..1add185330fa 100644
--- a/include/llvm/Support/ConvertUTF.h
+++ b/include/llvm/Support/ConvertUTF.h
@@ -1,9 +1,8 @@
 /*===--- ConvertUTF.h - Universal Character Names conversions ---------------===
  *
- *                     The LLVM Compiler Infrastructure
- *
- * This file is distributed under the University of Illinois Open Source
- * License. See LICENSE.TXT for details.
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  *
  *==------------------------------------------------------------------------==*/
 /*
diff --git a/include/llvm/Support/CrashRecoveryContext.h b/include/llvm/Support/CrashRecoveryContext.h
index 7b3fd4f882e4..feb449e2899c 100644
--- a/include/llvm/Support/CrashRecoveryContext.h
+++ b/include/llvm/Support/CrashRecoveryContext.h
@@ -1,9 +1,8 @@
 //===--- CrashRecoveryContext.h - Crash Recovery ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/DJB.h b/include/llvm/Support/DJB.h
index e03111473362..8a04a324a5dc 100644
--- a/include/llvm/Support/DJB.h
+++ b/include/llvm/Support/DJB.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/DJB.h ---DJB Hash --------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/DOTGraphTraits.h b/include/llvm/Support/DOTGraphTraits.h
index 4381b5bf1633..ec01b7d9576a 100644
--- a/include/llvm/Support/DOTGraphTraits.h
+++ b/include/llvm/Support/DOTGraphTraits.h
@@ -1,9 +1,8 @@
-//===-- llvm/Support/DotGraphTraits.h - Customize .dot output ---*- C++ -*-===//
+//===-- llvm/Support/DOTGraphTraits.h - Customize .dot output ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/DataExtractor.h b/include/llvm/Support/DataExtractor.h
index 2b1639856e79..6b08a2a2a445 100644
--- a/include/llvm/Support/DataExtractor.h
+++ b/include/llvm/Support/DataExtractor.h
@@ -1,9 +1,8 @@
 //===-- DataExtractor.h -----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/DataTypes.h b/include/llvm/Support/DataTypes.h
index ad60a5b3f300..a3fcc82531b7 100644
--- a/include/llvm/Support/DataTypes.h
+++ b/include/llvm/Support/DataTypes.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/DataTypes.h - Define fixed size types ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/Debug.h b/include/llvm/Support/Debug.h
index df86dbb82414..64b730951bda 100644
--- a/include/llvm/Support/Debug.h
+++ b/include/llvm/Support/Debug.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/Debug.h - Easy way to add debug output ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/DebugCounter.h b/include/llvm/Support/DebugCounter.h
index 6eadd5c6aeff..e7d1fa68f21a 100644
--- a/include/llvm/Support/DebugCounter.h
+++ b/include/llvm/Support/DebugCounter.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/DebugCounter.h - Debug counter support ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Support/DynamicLibrary.h b/include/llvm/Support/DynamicLibrary.h
index 9563b483f6d5..95d5ba281e22 100644
--- a/include/llvm/Support/DynamicLibrary.h
+++ b/include/llvm/Support/DynamicLibrary.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/DynamicLibrary.h - Portable Dynamic Library -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/Endian.h b/include/llvm/Support/Endian.h
index a4d3f4ff793d..d8be94427d7e 100644
--- a/include/llvm/Support/Endian.h
+++ b/include/llvm/Support/Endian.h
@@ -1,9 +1,8 @@
 //===- Endian.h - Utilities for IO with endian specific data ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -204,10 +203,14 @@ inline void writeAtBitAlignment(void *memory, value_type value,
 
 namespace detail {
 
-template<typename value_type,
-         endianness endian,
-         std::size_t alignment>
+template<typename ValueType,
+         endianness Endian,
+         std::size_t Alignment>
 struct packed_endian_specific_integral {
+  using value_type = ValueType;
+  static constexpr endianness endian = Endian;
+  static constexpr std::size_t alignment = Alignment;
+
   packed_endian_specific_integral() = default;
 
   explicit packed_endian_specific_integral(value_type val) { *this = val; }
@@ -335,6 +338,17 @@ using unaligned_int32_t =
 using unaligned_int64_t =
     detail::packed_endian_specific_integral<int64_t, native, unaligned>;
 
+template <typename T>
+using little_t = detail::packed_endian_specific_integral<T, little, unaligned>;
+template <typename T>
+using big_t = detail::packed_endian_specific_integral<T, big, unaligned>;
+
+template <typename T>
+using aligned_little_t =
+    detail::packed_endian_specific_integral<T, little, aligned>;
+template <typename T>
+using aligned_big_t = detail::packed_endian_specific_integral<T, big, aligned>;
+
 namespace endian {
 
 template <typename T> inline T read(const void *P, endianness E) {
diff --git a/include/llvm/Support/EndianStream.h b/include/llvm/Support/EndianStream.h
index 9742e253ad3e..87898038d216 100644
--- a/include/llvm/Support/EndianStream.h
+++ b/include/llvm/Support/EndianStream.h
@@ -1,9 +1,8 @@
 //===- EndianStream.h - Stream ops with endian specific data ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/Errc.h b/include/llvm/Support/Errc.h
index dce42782a0d3..9be8e5705a54 100644
--- a/include/llvm/Support/Errc.h
+++ b/include/llvm/Support/Errc.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/Errc.h - Defines the llvm::errc enum --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/Errno.h b/include/llvm/Support/Errno.h
index 8069c3639df3..aedb5fb292b8 100644
--- a/include/llvm/Support/Errno.h
+++ b/include/llvm/Support/Errno.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/Errno.h - Portable+convenient errno handling -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/Error.h b/include/llvm/Support/Error.h
index ee2cbeec97a8..299fce7a1368 100644
--- a/include/llvm/Support/Error.h
+++ b/include/llvm/Support/Error.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/Error.h - Recoverable error handling --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1161,8 +1160,8 @@ private:
 
 /// Create formatted StringError object.
 template <typename... Ts>
-Error createStringError(std::error_code EC, char const *Fmt,
-                        const Ts &... Vals) {
+inline Error createStringError(std::error_code EC, char const *Fmt,
+                               const Ts &... Vals) {
   std::string Buffer;
   raw_string_ostream Stream(Buffer);
   Stream << format(Fmt, Vals...);
@@ -1171,18 +1170,27 @@ Error createStringError(std::error_code EC, char const *Fmt,
 
 Error createStringError(std::error_code EC, char const *Msg);
 
+template <typename... Ts>
+inline Error createStringError(std::errc EC, char const *Fmt,
+                               const Ts &... Vals) {
+  return createStringError(std::make_error_code(EC), Fmt, Vals...);
+}
+
 /// This class wraps a filename and another Error.
 ///
 /// In some cases, an error needs to live along a 'source' name, in order to
 /// show more detailed information to the user.
 class FileError final : public ErrorInfo<FileError> {
 
-  friend Error createFileError(std::string, Error);
+  friend Error createFileError(const Twine &, Error);
+  friend Error createFileError(const Twine &, size_t, Error);
 
 public:
   void log(raw_ostream &OS) const override {
     assert(Err && !FileName.empty() && "Trying to log after takeError().");
     OS << "'" << FileName << "': ";
+    if (Line.hasValue())
+      OS << "line " << Line.getValue() << ": ";
     Err->log(OS);
   }
 
@@ -1194,29 +1202,51 @@ public:
   static char ID;
 
 private:
-  FileError(std::string F, std::unique_ptr<ErrorInfoBase> E) {
+  FileError(const Twine &F, Optional<size_t> LineNum,
+            std::unique_ptr<ErrorInfoBase> E) {
     assert(E && "Cannot create FileError from Error success value.");
-    assert(!F.empty() &&
+    assert(!F.isTriviallyEmpty() &&
            "The file name provided to FileError must not be empty.");
-    FileName = F;
+    FileName = F.str();
     Err = std::move(E);
+    Line = std::move(LineNum);
   }
 
-  static Error build(std::string F, Error E) {
-    return Error(std::unique_ptr<FileError>(new FileError(F, E.takePayload())));
+  static Error build(const Twine &F, Optional<size_t> Line, Error E) {
+    return Error(
+        std::unique_ptr<FileError>(new FileError(F, Line, E.takePayload())));
   }
 
   std::string FileName;
+  Optional<size_t> Line;
   std::unique_ptr<ErrorInfoBase> Err;
 };
 
 /// Concatenate a source file path and/or name with an Error. The resulting
 /// Error is unchecked.
-inline Error createFileError(std::string F, Error E) {
-  return FileError::build(F, std::move(E));
+inline Error createFileError(const Twine &F, Error E) {
+  return FileError::build(F, Optional<size_t>(), std::move(E));
+}
+
+/// Concatenate a source file path and/or name with line number and an Error.
+/// The resulting Error is unchecked.
+inline Error createFileError(const Twine &F, size_t Line, Error E) {
+  return FileError::build(F, Optional<size_t>(Line), std::move(E));
+}
+
+/// Concatenate a source file path and/or name with a std::error_code 
+/// to form an Error object.
+inline Error createFileError(const Twine &F, std::error_code EC) {
+  return createFileError(F, errorCodeToError(EC));
+}
+
+/// Concatenate a source file path and/or name with line number and
+/// std::error_code to form an Error object.
+inline Error createFileError(const Twine &F, size_t Line, std::error_code EC) {
+  return createFileError(F, Line, errorCodeToError(EC));
 }
 
-Error createFileError(std::string F, ErrorSuccess) = delete;
+Error createFileError(const Twine &F, ErrorSuccess) = delete;
 
 /// Helper for check-and-exit error handling.
 ///
diff --git a/include/llvm/Support/ErrorHandling.h b/include/llvm/Support/ErrorHandling.h
index fec39e59a717..f75c2984a9ff 100644
--- a/include/llvm/Support/ErrorHandling.h
+++ b/include/llvm/Support/ErrorHandling.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/ErrorHandling.h - Fatal error handling ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/ErrorOr.h b/include/llvm/Support/ErrorOr.h
index e6ce764ad822..8211f4d8a098 100644
--- a/include/llvm/Support/ErrorOr.h
+++ b/include/llvm/Support/ErrorOr.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/ErrorOr.h - Error Smart Pointer -------------*- C++ -*-===//
 //
-//                             The LLVM Linker
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/include/llvm/Support/FileCheck.h b/include/llvm/Support/FileCheck.h
index 4061a26e22c5..0cd25a71a3b3 100644
--- a/include/llvm/Support/FileCheck.h
+++ b/include/llvm/Support/FileCheck.h
@@ -1,9 +1,8 @@
 //==-- llvm/Support/FileCheck.h ---------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -37,9 +36,218 @@ struct FileCheckRequest {
   bool VerboseVerbose = false;
 };
 
+//===----------------------------------------------------------------------===//
+// Numeric substitution handling code.
+//===----------------------------------------------------------------------===//
+
+/// Base class representing the AST of a given expression.
+class FileCheckExpressionAST {
+public:
+  virtual ~FileCheckExpressionAST() = default;
+
+  /// Evaluates and \returns the value of the expression represented by this
+  /// AST or an error if evaluation fails.
+  virtual Expected<uint64_t> eval() const = 0;
+};
+
+/// Class representing an unsigned literal in the AST of an expression.
+class FileCheckExpressionLiteral : public FileCheckExpressionAST {
+private:
+  /// Actual value of the literal.
+  uint64_t Value;
+
+public:
+  /// Constructs a literal with the specified value.
+  FileCheckExpressionLiteral(uint64_t Val) : Value(Val) {}
+
+  /// \returns the literal's value.
+  Expected<uint64_t> eval() const { return Value; }
+};
+
+/// Class to represent an undefined variable error, which quotes that
+/// variable's name when printed.
+class FileCheckUndefVarError : public ErrorInfo<FileCheckUndefVarError> {
+private:
+  StringRef VarName;
+
+public:
+  static char ID;
+
+  FileCheckUndefVarError(StringRef VarName) : VarName(VarName) {}
+
+  StringRef getVarName() const { return VarName; }
+
+  std::error_code convertToErrorCode() const override {
+    return inconvertibleErrorCode();
+  }
+
+  /// Print name of variable associated with this error.
+  void log(raw_ostream &OS) const override {
+    OS << "\"";
+    OS.write_escaped(VarName) << "\"";
+  }
+};
+
+/// Class representing a numeric variable and its associated current value.
+class FileCheckNumericVariable {
+private:
+  /// Name of the numeric variable.
+  StringRef Name;
+
+  /// Value of numeric variable, if defined, or None otherwise.
+  Optional<uint64_t> Value;
+
+  /// Line number where this variable is defined, or None if defined before
+  /// input is parsed. Used to determine whether a variable is defined on the
+  /// same line as a given use.
+  Optional<size_t> DefLineNumber;
+
+public:
+  /// Constructor for a variable \p Name defined at line \p DefLineNumber or
+  /// defined before input is parsed if DefLineNumber is None.
+  FileCheckNumericVariable(StringRef Name,
+                           Optional<size_t> DefLineNumber = None)
+      : Name(Name), DefLineNumber(DefLineNumber) {}
+
+  /// \returns name of this numeric variable.
+  StringRef getName() const { return Name; }
+
+  /// \returns this variable's value.
+  Optional<uint64_t> getValue() const { return Value; }
+
+  /// Sets value of this numeric variable, if undefined. Triggers an assertion
+  /// failure if the variable is actually defined.
+  void setValue(uint64_t Value);
+
+  /// Clears value of this numeric variable, regardless of whether it is
+  /// currently defined or not.
+  void clearValue();
+
+  /// \returns the line number where this variable is defined, if any, or None
+  /// if defined before input is parsed.
+  Optional<size_t> getDefLineNumber() { return DefLineNumber; }
+};
+
+/// Class representing the use of a numeric variable in the AST of an
+/// expression.
+class FileCheckNumericVariableUse : public FileCheckExpressionAST {
+private:
+  /// Name of the numeric variable.
+  StringRef Name;
+
+  /// Pointer to the class instance for the variable this use is about.
+  FileCheckNumericVariable *NumericVariable;
+
+public:
+  FileCheckNumericVariableUse(StringRef Name,
+                              FileCheckNumericVariable *NumericVariable)
+      : Name(Name), NumericVariable(NumericVariable) {}
+
+  /// \returns the value of the variable referenced by this instance.
+  Expected<uint64_t> eval() const;
+};
+
+/// Type of functions evaluating a given binary operation.
+using binop_eval_t = uint64_t (*)(uint64_t, uint64_t);
+
+/// Class representing a single binary operation in the AST of an expression.
+class FileCheckASTBinop : public FileCheckExpressionAST {
+private:
+  /// Left operand.
+  std::unique_ptr<FileCheckExpressionAST> LeftOperand;
+
+  /// Right operand.
+  std::unique_ptr<FileCheckExpressionAST> RightOperand;
+
+  /// Pointer to function that can evaluate this binary operation.
+  binop_eval_t EvalBinop;
+
+public:
+  FileCheckASTBinop(binop_eval_t EvalBinop,
+                    std::unique_ptr<FileCheckExpressionAST> LeftOp,
+                    std::unique_ptr<FileCheckExpressionAST> RightOp)
+      : EvalBinop(EvalBinop) {
+    LeftOperand = std::move(LeftOp);
+    RightOperand = std::move(RightOp);
+  }
+
+  /// Evaluates the value of the binary operation represented by this AST,
+  /// using EvalBinop on the result of recursively evaluating the operands.
+  /// \returns the expression value or an error if an undefined numeric
+  /// variable is used in one of the operands.
+  Expected<uint64_t> eval() const;
+};
+
+class FileCheckPatternContext;
+
+/// Class representing a substitution to perform in the RegExStr string.
+class FileCheckSubstitution {
+protected:
+  /// Pointer to a class instance holding, among other things, the table with
+  /// the values of live string variables at the start of any given CHECK line.
+  /// Used for substituting string variables with the text they were defined
+  /// as. Expressions are linked to the numeric variables they use at
+  /// parse time and directly access the value of the numeric variable to
+  /// evaluate their value.
+  FileCheckPatternContext *Context;
+
+  /// The string that needs to be substituted for something else. For a
+  /// string variable this is its name, otherwise this is the whole expression.
+  StringRef FromStr;
+
+  // Index in RegExStr of where to do the substitution.
+  size_t InsertIdx;
+
+public:
+  FileCheckSubstitution(FileCheckPatternContext *Context, StringRef VarName,
+                        size_t InsertIdx)
+      : Context(Context), FromStr(VarName), InsertIdx(InsertIdx) {}
+
+  virtual ~FileCheckSubstitution() = default;
+
+  /// \returns the string to be substituted for something else.
+  StringRef getFromString() const { return FromStr; }
+
+  /// \returns the index where the substitution is to be performed in RegExStr.
+  size_t getIndex() const { return InsertIdx; }
+
+  /// \returns a string containing the result of the substitution represented
+  /// by this class instance or an error if substitution failed.
+  virtual Expected<std::string> getResult() const = 0;
+};
+
+class FileCheckStringSubstitution : public FileCheckSubstitution {
+public:
+  FileCheckStringSubstitution(FileCheckPatternContext *Context,
+                              StringRef VarName, size_t InsertIdx)
+      : FileCheckSubstitution(Context, VarName, InsertIdx) {}
+
+  /// \returns the text that the string variable in this substitution matched
+  /// when defined, or an error if the variable is undefined.
+  Expected<std::string> getResult() const override;
+};
+
+class FileCheckNumericSubstitution : public FileCheckSubstitution {
+private:
+  /// Pointer to the class representing the expression whose value is to be
+  /// substituted.
+  std::unique_ptr<FileCheckExpressionAST> ExpressionAST;
+
+public:
+  FileCheckNumericSubstitution(FileCheckPatternContext *Context, StringRef Expr,
+                               std::unique_ptr<FileCheckExpressionAST> ExprAST,
+                               size_t InsertIdx)
+      : FileCheckSubstitution(Context, Expr, InsertIdx) {
+    ExpressionAST = std::move(ExprAST);
+  }
+
+  /// \returns a string containing the result of evaluating the expression in
+  /// this substitution, or an error if evaluation failed.
+  Expected<std::string> getResult() const override;
+};
 
 //===----------------------------------------------------------------------===//
-// Pattern Handling Code.
+// Pattern handling code.
 //===----------------------------------------------------------------------===//
 
 namespace Check {
@@ -78,12 +286,133 @@ public:
   int getCount() const { return Count; }
   FileCheckType &setCount(int C);
 
+  // \returns a description of \p Prefix.
   std::string getDescription(StringRef Prefix) const;
 };
-}
+} // namespace Check
 
 struct FileCheckDiag;
 
+/// Class holding the FileCheckPattern global state, shared by all patterns:
+/// tables holding values of variables and whether they are defined or not at
+/// any given time in the matching process.
+class FileCheckPatternContext {
+  friend class FileCheckPattern;
+
+private:
+  /// When matching a given pattern, this holds the value of all the string
+  /// variables defined in previous patterns. In a pattern, only the last
+  /// definition for a given variable is recorded in this table.
+  /// Back-references are used for uses after any the other definition.
+  StringMap<StringRef> GlobalVariableTable;
+
+  /// Map of all string variables defined so far. Used at parse time to detect
+  /// a name conflict between a numeric variable and a string variable when
+  /// the former is defined on a later line than the latter.
+  StringMap<bool> DefinedVariableTable;
+
+  /// When matching a given pattern, this holds the pointers to the classes
+  /// representing the numeric variables defined in previous patterns. When
+  /// matching a pattern all definitions for that pattern are recorded in the
+  /// NumericVariableDefs table in the FileCheckPattern instance of that
+  /// pattern.
+  StringMap<FileCheckNumericVariable *> GlobalNumericVariableTable;
+
+  /// Pointer to the class instance representing the @LINE pseudo variable for
+  /// easily updating its value.
+  FileCheckNumericVariable *LineVariable = nullptr;
+
+  /// Vector holding pointers to all parsed numeric variables. Used to
+  /// automatically free them once they are guaranteed to no longer be used.
+  std::vector<std::unique_ptr<FileCheckNumericVariable>> NumericVariables;
+
+  /// Vector holding pointers to all substitutions. Used to automatically free
+  /// them once they are guaranteed to no longer be used.
+  std::vector<std::unique_ptr<FileCheckSubstitution>> Substitutions;
+
+public:
+  /// \returns the value of string variable \p VarName or an error if no such
+  /// variable has been defined.
+  Expected<StringRef> getPatternVarValue(StringRef VarName);
+
+  /// Defines string and numeric variables from definitions given on the
+  /// command line, passed as a vector of [#]VAR=VAL strings in
+  /// \p CmdlineDefines. \returns an error list containing diagnostics against
+  /// \p SM for all definition parsing failures, if any, or Success otherwise.
+  Error defineCmdlineVariables(std::vector<std::string> &CmdlineDefines,
+                               SourceMgr &SM);
+
+  /// Create @LINE pseudo variable. Value is set when pattern are being
+  /// matched.
+  void createLineVariable();
+
+  /// Undefines local variables (variables whose name does not start with a '$'
+  /// sign), i.e. removes them from GlobalVariableTable and from
+  /// GlobalNumericVariableTable and also clears the value of numeric
+  /// variables.
+  void clearLocalVars();
+
+private:
+  /// Makes a new numeric variable and registers it for destruction when the
+  /// context is destroyed.
+  template <class... Types>
+  FileCheckNumericVariable *makeNumericVariable(Types... args);
+
+  /// Makes a new string substitution and registers it for destruction when the
+  /// context is destroyed.
+  FileCheckSubstitution *makeStringSubstitution(StringRef VarName,
+                                                size_t InsertIdx);
+
+  /// Makes a new numeric substitution and registers it for destruction when
+  /// the context is destroyed.
+  FileCheckSubstitution *
+  makeNumericSubstitution(StringRef ExpressionStr,
+                          std::unique_ptr<FileCheckExpressionAST> ExpressionAST,
+                          size_t InsertIdx);
+};
+
+/// Class to represent an error holding a diagnostic with location information
+/// used when printing it.
+class FileCheckErrorDiagnostic : public ErrorInfo<FileCheckErrorDiagnostic> {
+private:
+  SMDiagnostic Diagnostic;
+
+public:
+  static char ID;
+
+  FileCheckErrorDiagnostic(SMDiagnostic &&Diag) : Diagnostic(Diag) {}
+
+  std::error_code convertToErrorCode() const override {
+    return inconvertibleErrorCode();
+  }
+
+  /// Print diagnostic associated with this error when printing the error.
+  void log(raw_ostream &OS) const override { Diagnostic.print(nullptr, OS); }
+
+  static Error get(const SourceMgr &SM, SMLoc Loc, const Twine &ErrMsg) {
+    return make_error<FileCheckErrorDiagnostic>(
+        SM.GetMessage(Loc, SourceMgr::DK_Error, ErrMsg));
+  }
+
+  static Error get(const SourceMgr &SM, StringRef Buffer, const Twine &ErrMsg) {
+    return get(SM, SMLoc::getFromPointer(Buffer.data()), ErrMsg);
+  }
+};
+
+class FileCheckNotFoundError : public ErrorInfo<FileCheckNotFoundError> {
+public:
+  static char ID;
+
+  std::error_code convertToErrorCode() const override {
+    return inconvertibleErrorCode();
+  }
+
+  /// Print diagnostic associated with this error when printing the error.
+  void log(raw_ostream &OS) const override {
+    OS << "String not found in input";
+  }
+};
+
 class FileCheckPattern {
   SMLoc PatternLoc;
 
@@ -95,43 +424,143 @@ class FileCheckPattern {
   /// a fixed string to match.
   std::string RegExStr;
 
-  /// Entries in this vector map to uses of a variable in the pattern, e.g.
-  /// "foo[[bar]]baz".  In this case, the RegExStr will contain "foobaz" and
-  /// we'll get an entry in this vector that tells us to insert the value of
-  /// bar at offset 3.
-  std::vector<std::pair<StringRef, unsigned>> VariableUses;
+  /// Entries in this vector represent a substitution of a string variable or
+  /// an expression in the RegExStr regex at match time. For example, in the
+  /// case of a CHECK directive with the pattern "foo[[bar]]baz[[#N+1]]",
+  /// RegExStr will contain "foobaz" and we'll get two entries in this vector
+  /// that tells us to insert the value of string variable "bar" at offset 3
+  /// and the value of expression "N+1" at offset 6.
+  std::vector<FileCheckSubstitution *> Substitutions;
 
-  /// Maps definitions of variables to their parenthesized capture numbers.
+  /// Maps names of string variables defined in a pattern to the number of
+  /// their parenthesis group in RegExStr capturing their last definition.
+  ///
+  /// E.g. for the pattern "foo[[bar:.*]]baz([[bar]][[QUUX]][[bar:.*]])",
+  /// RegExStr will be "foo(.*)baz(\1<quux value>(.*))" where <quux value> is
+  /// the value captured for QUUX on the earlier line where it was defined, and
+  /// VariableDefs will map "bar" to the third parenthesis group which captures
+  /// the second definition of "bar".
   ///
-  /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to
-  /// 1.
+  /// Note: uses std::map rather than StringMap to be able to get the key when
+  /// iterating over values.
   std::map<StringRef, unsigned> VariableDefs;
 
+  /// Structure representing the definition of a numeric variable in a pattern.
+  /// It holds the pointer to the class representing the numeric variable whose
+  /// value is being defined and the number of the parenthesis group in
+  /// RegExStr to capture that value.
+  struct FileCheckNumericVariableMatch {
+    /// Pointer to class representing the numeric variable whose value is being
+    /// defined.
+    FileCheckNumericVariable *DefinedNumericVariable;
+
+    /// Number of the parenthesis group in RegExStr that captures the value of
+    /// this numeric variable definition.
+    unsigned CaptureParenGroup;
+  };
+
+  /// Holds the number of the parenthesis group in RegExStr and pointer to the
+  /// corresponding FileCheckNumericVariable class instance of all numeric
+  /// variable definitions. Used to set the matched value of all those
+  /// variables.
+  StringMap<FileCheckNumericVariableMatch> NumericVariableDefs;
+
+  /// Pointer to a class instance holding the global state shared by all
+  /// patterns:
+  /// - separate tables with the values of live string and numeric variables
+  ///   respectively at the start of any given CHECK line;
+  /// - table holding whether a string variable has been defined at any given
+  ///   point during the parsing phase.
+  FileCheckPatternContext *Context;
+
   Check::FileCheckType CheckTy;
 
-  /// Contains the number of line this pattern is in.
-  unsigned LineNumber;
+  /// Line number for this CHECK pattern or None if it is an implicit pattern.
+  /// Used to determine whether a variable definition is made on an earlier
+  /// line to the one with this CHECK.
+  Optional<size_t> LineNumber;
 
 public:
-  explicit FileCheckPattern(Check::FileCheckType Ty)
-      : CheckTy(Ty) {}
+  FileCheckPattern(Check::FileCheckType Ty, FileCheckPatternContext *Context,
+                   Optional<size_t> Line = None)
+      : Context(Context), CheckTy(Ty), LineNumber(Line) {}
 
-  /// Returns the location in source code.
+  /// \returns the location in source code.
   SMLoc getLoc() const { return PatternLoc; }
 
-  bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
-                    unsigned LineNumber, const FileCheckRequest &Req);
-  size_t Match(StringRef Buffer, size_t &MatchLen,
-               StringMap<StringRef> &VariableTable) const;
-  void PrintVariableUses(const SourceMgr &SM, StringRef Buffer,
-                         const StringMap<StringRef> &VariableTable,
-                         SMRange MatchRange = None) const;
-  void PrintFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
-                       const StringMap<StringRef> &VariableTable,
+  /// \returns the pointer to the global state for all patterns in this
+  /// FileCheck instance.
+  FileCheckPatternContext *getContext() const { return Context; }
+
+  /// \returns whether \p C is a valid first character for a variable name.
+  static bool isValidVarNameStart(char C);
+
+  /// Parsing information about a variable.
+  struct VariableProperties {
+    StringRef Name;
+    bool IsPseudo;
+  };
+
+  /// Parses the string at the start of \p Str for a variable name. \returns
+  /// a VariableProperties structure holding the variable name and whether it
+  /// is the name of a pseudo variable, or an error holding a diagnostic
+  /// against \p SM if parsing fail. If parsing was successful, also strips
+  /// \p Str from the variable name.
+  static Expected<VariableProperties> parseVariable(StringRef &Str,
+                                                    const SourceMgr &SM);
+  /// Parses \p Expr for the name of a numeric variable to be defined at line
+  /// \p LineNumber or before input is parsed if \p LineNumber is None.
+  /// \returns a pointer to the class instance representing that variable,
+  /// creating it if needed, or an error holding a diagnostic against \p SM
+  /// should defining such a variable be invalid.
+  static Expected<FileCheckNumericVariable *> parseNumericVariableDefinition(
+      StringRef &Expr, FileCheckPatternContext *Context,
+      Optional<size_t> LineNumber, const SourceMgr &SM);
+  /// Parses \p Expr for a numeric substitution block. Parameter
+  /// \p IsLegacyLineExpr indicates whether \p Expr should be a legacy @LINE
+  /// expression. \returns a pointer to the class instance representing the AST
+  /// of the expression whose value must be substituted, or an error holding a
+  /// diagnostic against \p SM if parsing fails. If substitution was
+  /// successful, sets \p DefinedNumericVariable to point to the class
+  /// representing the numeric variable being defined in this numeric
+  /// substitution block, or None if this block does not define any variable.
+  Expected<std::unique_ptr<FileCheckExpressionAST>>
+  parseNumericSubstitutionBlock(
+      StringRef Expr,
+      Optional<FileCheckNumericVariable *> &DefinedNumericVariable,
+      bool IsLegacyLineExpr, const SourceMgr &SM) const;
+  /// Parses the pattern in \p PatternStr and initializes this FileCheckPattern
+  /// instance accordingly.
+  ///
+  /// \p Prefix provides which prefix is being matched, \p Req describes the
+  /// global options that influence the parsing such as whitespace
+  /// canonicalization, \p SM provides the SourceMgr used for error reports.
+  /// \returns true in case of an error, false otherwise.
+  bool parsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
+                    const FileCheckRequest &Req);
+  /// Matches the pattern string against the input buffer \p Buffer
+  ///
+  /// \returns the position that is matched or an error indicating why matching
+  /// failed. If there is a match, updates \p MatchLen with the size of the
+  /// matched string.
+  ///
+  /// The GlobalVariableTable StringMap in the FileCheckPatternContext class
+  /// instance provides the current values of FileCheck string variables and
+  /// is updated if this match defines new values. Likewise, the
+  /// GlobalNumericVariableTable StringMap in the same class provides the
+  /// current values of FileCheck numeric variables and is updated if this
+  /// match defines new numeric values.
+  Expected<size_t> match(StringRef Buffer, size_t &MatchLen,
+                         const SourceMgr &SM) const;
+  /// Prints the value of successful substitutions or the name of the undefined
+  /// string or numeric variables preventing a successful substitution.
+  void printSubstitutions(const SourceMgr &SM, StringRef Buffer,
+                          SMRange MatchRange = None) const;
+  void printFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
                        std::vector<FileCheckDiag> *Diags) const;
 
   bool hasVariable() const {
-    return !(VariableUses.empty() && VariableDefs.empty());
+    return !(Substitutions.empty() && VariableDefs.empty());
   }
 
   Check::FileCheckType getCheckTy() const { return CheckTy; }
@@ -141,11 +570,40 @@ public:
 private:
   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
   void AddBackrefToRegEx(unsigned BackrefNum);
-  unsigned
-  ComputeMatchDistance(StringRef Buffer,
-                       const StringMap<StringRef> &VariableTable) const;
-  bool EvaluateExpression(StringRef Expr, std::string &Value) const;
+  /// Computes an arbitrary estimate for the quality of matching this pattern
+  /// at the start of \p Buffer; a distance of zero should correspond to a
+  /// perfect match.
+  unsigned computeMatchDistance(StringRef Buffer) const;
+  /// Finds the closing sequence of a regex variable usage or definition.
+  ///
+  /// \p Str has to point in the beginning of the definition (right after the
+  /// opening sequence). \p SM holds the SourceMgr used for error repporting.
+  ///  \returns the offset of the closing sequence within Str, or npos if it
+  /// was not found.
   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
+
+  /// Parses \p Name as a (pseudo if \p IsPseudo is true) numeric variable use.
+  /// \returns the pointer to the class instance representing that variable if
+  /// successful, or an error holding a diagnostic against \p SM otherwise.
+  Expected<std::unique_ptr<FileCheckNumericVariableUse>>
+  parseNumericVariableUse(StringRef Name, bool IsPseudo,
+                          const SourceMgr &SM) const;
+  enum class AllowedOperand { LineVar, Literal, Any };
+  /// Parses \p Expr for use of a numeric operand. Accepts both literal values
+  /// and numeric variables, depending on the value of \p AO. \returns the
+  /// class representing that operand in the AST of the expression or an error
+  /// holding a diagnostic against \p SM otherwise.
+  Expected<std::unique_ptr<FileCheckExpressionAST>>
+  parseNumericOperand(StringRef &Expr, AllowedOperand AO,
+                      const SourceMgr &SM) const;
+  /// Parses \p Expr for a binary operation. The left operand of this binary
+  /// operation is given in \p LeftOp and \p IsLegacyLineExpr indicates whether
+  /// we are parsing a legacy @LINE expression. \returns the class representing
+  /// the binary operation in the AST of the expression, or an error holding a
+  /// diagnostic against \p SM otherwise.
+  Expected<std::unique_ptr<FileCheckExpressionAST>>
+  parseBinop(StringRef &Expr, std::unique_ptr<FileCheckExpressionAST> LeftOp,
+             bool IsLegacyLineExpr, const SourceMgr &SM) const;
 };
 
 //===----------------------------------------------------------------------===//
@@ -223,20 +681,27 @@ struct FileCheckString {
   FileCheckString(const FileCheckPattern &P, StringRef S, SMLoc L)
       : Pat(P), Prefix(S), Loc(L) {}
 
+  /// Matches check string and its "not strings" and/or "dag strings".
   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
-               size_t &MatchLen, StringMap<StringRef> &VariableTable,
-               FileCheckRequest &Req, std::vector<FileCheckDiag> *Diags) const;
+               size_t &MatchLen, FileCheckRequest &Req,
+               std::vector<FileCheckDiag> *Diags) const;
 
+  /// Verifies that there is a single line in the given \p Buffer. Errors are
+  /// reported against \p SM.
   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
+  /// Verifies that there is no newline in the given \p Buffer. Errors are
+  /// reported against \p SM.
   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
+  /// Verifies that none of the strings in \p NotStrings are found in the given
+  /// \p Buffer. Errors are reported against \p SM and diagnostics recorded in
+  /// \p Diags according to the verbosity level set in \p Req.
   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
                 const std::vector<const FileCheckPattern *> &NotStrings,
-                StringMap<StringRef> &VariableTable,
                 const FileCheckRequest &Req,
                 std::vector<FileCheckDiag> *Diags) const;
+  /// Matches "dag strings" and their mixed "not strings".
   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
                   std::vector<const FileCheckPattern *> &NotStrings,
-                  StringMap<StringRef> &VariableTable,
                   const FileCheckRequest &Req,
                   std::vector<FileCheckDiag> *Diags) const;
 };
@@ -245,6 +710,7 @@ struct FileCheckString {
 /// use information from the request.
 class FileCheck {
   FileCheckRequest Req;
+  FileCheckPatternContext PatternContext;
 
 public:
   FileCheck(FileCheckRequest Req) : Req(Req) {}
@@ -256,24 +722,27 @@ public:
   // library.
   Regex buildCheckPrefixRegex();
 
-  /// Read the check file, which specifies the sequence of expected strings.
+  /// Reads the check file from \p Buffer and records the expected strings it
+  /// contains in the \p CheckStrings vector. Errors are reported against
+  /// \p SM.
   ///
-  /// The strings are added to the CheckStrings vector. Returns true in case of
-  /// an error, false otherwise.
+  /// Only expected strings whose prefix is one of those listed in \p PrefixRE
+  /// are recorded. \returns true in case of an error, false otherwise.
   bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
                      std::vector<FileCheckString> &CheckStrings);
 
   bool ValidateCheckPrefixes();
 
-  /// Canonicalize whitespaces in the file. Line endings are replaced with
+  /// Canonicalizes whitespaces in the file. Line endings are replaced with
   /// UNIX-style '\n'.
   StringRef CanonicalizeFile(MemoryBuffer &MB,
                              SmallVectorImpl<char> &OutputBuffer);
 
-  /// Check the input to FileCheck provided in the \p Buffer against the \p
-  /// CheckStrings read from the check file.
+  /// Checks the input to FileCheck provided in the \p Buffer against the
+  /// \p CheckStrings read from the check file and record diagnostics emitted
+  /// in \p Diags. Errors are recorded against \p SM.
   ///
-  /// Returns false if the input fails to satisfy the checks.
+  /// \returns false if the input fails to satisfy the checks.
   bool CheckInput(SourceMgr &SM, StringRef Buffer,
                   ArrayRef<FileCheckString> CheckStrings,
                   std::vector<FileCheckDiag> *Diags = nullptr);
diff --git a/include/llvm/Support/FileOutputBuffer.h b/include/llvm/Support/FileOutputBuffer.h
index 68226ca55502..999f551ebf2d 100644
--- a/include/llvm/Support/FileOutputBuffer.h
+++ b/include/llvm/Support/FileOutputBuffer.h
@@ -1,9 +1,8 @@
 //=== FileOutputBuffer.h - File Output Buffer -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -33,11 +32,6 @@ public:
   enum {
     /// set the 'x' bit on the resulting file
     F_executable = 1,
-
-    /// the contents of the new file are initialized from the file that exists
-    /// at the location (if present).  This allows in-place modification of an
-    /// existing file.
-    F_modify = 2
   };
 
   /// Factory method to create an OutputBuffer object which manages a read/write
diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h
index d2042f51d8c1..1bec27bddad9 100644
--- a/include/llvm/Support/FileSystem.h
+++ b/include/llvm/Support/FileSystem.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/FileSystem.h - File System OS Concept -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -649,6 +648,19 @@ std::error_code status(const Twine &path, file_status &result,
 /// A version for when a file descriptor is already available.
 std::error_code status(int FD, file_status &Result);
 
+#ifdef _WIN32
+/// A version for when a file descriptor is already available.
+std::error_code status(file_t FD, file_status &Result);
+#endif
+
+/// Get file creation mode mask of the process.
+///
+/// @returns Mask reported by umask(2)
+/// @note There is no umask on Windows. This function returns 0 always
+///       on Windows. This function does not return an error_code because
+///       umask(2) never fails. It is not thread safe.
+unsigned getUmask();
+
 /// Set file permissions.
 ///
 /// @param Path File to set permissions on.
@@ -660,6 +672,11 @@ std::error_code status(int FD, file_status &Result);
 ///       Otherwise, the file will be marked as read-only.
 std::error_code setPermissions(const Twine &Path, perms Permissions);
 
+/// Vesion of setPermissions accepting a file descriptor.
+/// TODO Delete the path based overload once we implement the FD based overload
+/// on Windows.
+std::error_code setPermissions(int FD, perms Permissions);
+
 /// Get file permissions.
 ///
 /// @param Path File to get permissions from.
@@ -765,11 +782,32 @@ enum OpenFlags : unsigned {
   OF_UpdateAtime = 16,
 };
 
+/// Create a potentially unique file name but does not create it.
+///
+/// Generates a unique path suitable for a temporary file but does not
+/// open or create the file. The name is based on \a Model with '%'
+/// replaced by a random char in [0-9a-f]. If \a MakeAbsolute is true
+/// then the system's temp directory is prepended first. If \a MakeAbsolute
+/// is false the current directory will be used instead.
+///
+/// This function does not check if the file exists. If you want to be sure
+/// that the file does not yet exist, you should use use enough '%' characters
+/// in your model to ensure this. Each '%' gives 4-bits of entropy so you can
+/// use 32 of them to get 128 bits of entropy.
+///
+/// Example: clang-%%-%%-%%-%%-%%.s => clang-a0-b1-c2-d3-e4.s
+///
+/// @param Model Name to base unique path off of.
+/// @param ResultPath Set to the file's path.
+/// @param MakeAbsolute Whether to use the system temp directory.
+void createUniquePath(const Twine &Model, SmallVectorImpl<char> &ResultPath,
+                      bool MakeAbsolute);
+
 /// Create a uniquely named file.
 ///
 /// Generates a unique path suitable for a temporary file and then opens it as a
-/// file. The name is based on \a model with '%' replaced by a random char in
-/// [0-9a-f]. If \a model is not an absolute path, the temporary file will be
+/// file. The name is based on \a Model with '%' replaced by a random char in
+/// [0-9a-f]. If \a Model is not an absolute path, the temporary file will be
 /// created in the current directory.
 ///
 /// Example: clang-%%-%%-%%-%%-%%.s => clang-a0-b1-c2-d3-e4.s
@@ -932,6 +970,51 @@ Expected<file_t> openNativeFile(const Twine &Name, CreationDisposition Disp,
                                 FileAccess Access, OpenFlags Flags,
                                 unsigned Mode = 0666);
 
+/// Converts from a Posix file descriptor number to a native file handle.
+/// On Windows, this retreives the underlying handle. On non-Windows, this is a
+/// no-op.
+file_t convertFDToNativeFile(int FD);
+
+#ifndef _WIN32
+inline file_t convertFDToNativeFile(int FD) { return FD; }
+#endif
+
+/// Return an open handle to standard in. On Unix, this is typically FD 0.
+/// Returns kInvalidFile when the stream is closed.
+file_t getStdinHandle();
+
+/// Return an open handle to standard out. On Unix, this is typically FD 1.
+/// Returns kInvalidFile when the stream is closed.
+file_t getStdoutHandle();
+
+/// Return an open handle to standard error. On Unix, this is typically FD 2.
+/// Returns kInvalidFile when the stream is closed.
+file_t getStderrHandle();
+
+/// Reads \p Buf.size() bytes from \p FileHandle into \p Buf. The number of
+/// bytes actually read is returned in \p BytesRead. On Unix, this is equivalent
+/// to `*BytesRead = ::read(FD, Buf.data(), Buf.size())`, with error reporting.
+/// BytesRead will contain zero when reaching EOF.
+///
+/// @param FileHandle File to read from.
+/// @param Buf Buffer to read into.
+/// @param BytesRead Output parameter of the number of bytes read.
+/// @returns The error, if any, or errc::success.
+std::error_code readNativeFile(file_t FileHandle, MutableArrayRef<char> Buf,
+                               size_t *BytesRead);
+
+/// Reads \p Buf.size() bytes from \p FileHandle at offset \p Offset into \p
+/// Buf. If 'pread' is available, this will use that, otherwise it will use
+/// 'lseek'. Bytes requested beyond the end of the file will be zero
+/// initialized.
+///
+/// @param FileHandle File to read from.
+/// @param Buf Buffer to read into.
+/// @param Offset Offset into the file at which the read should occur.
+/// @returns The error, if any, or errc::success.
+std::error_code readNativeFileSlice(file_t FileHandle,
+                                    MutableArrayRef<char> Buf, size_t Offset);
+
 /// @brief Opens the file with the given name in a write-only or read-write
 /// mode, returning its open file descriptor. If the file does not exist, it
 /// is created.
@@ -1051,11 +1134,15 @@ openNativeFileForRead(const Twine &Name, OpenFlags Flags = OF_None,
                       SmallVectorImpl<char> *RealPath = nullptr);
 
 /// @brief Close the file object.  This should be used instead of ::close for
-/// portability.
+/// portability. On error, the caller should assume the file is closed, as is
+/// the case for Process::SafelyCloseFileDescriptor
 ///
 /// @param F On input, this is the file to close.  On output, the file is
 /// set to kInvalidFile.
-void closeFile(file_t &F);
+///
+/// @returns An error code if closing the file failed. Typically, an error here
+/// means that the filesystem may have failed to perform some buffered writes.
+std::error_code closeFile(file_t &F);
 
 std::error_code getUniqueID(const Twine Path, UniqueID &Result);
 
@@ -1085,21 +1172,19 @@ private:
   size_t Size;
   void *Mapping;
 #ifdef _WIN32
-  void *FileHandle;
+  sys::fs::file_t FileHandle;
 #endif
   mapmode Mode;
 
-  std::error_code init(int FD, uint64_t Offset, mapmode Mode);
+  std::error_code init(sys::fs::file_t FD, uint64_t Offset, mapmode Mode);
 
 public:
   mapped_file_region() = delete;
   mapped_file_region(mapped_file_region&) = delete;
   mapped_file_region &operator =(mapped_file_region&) = delete;
 
-  /// \param fd An open file descriptor to map. mapped_file_region takes
-  ///   ownership if closefd is true. It must have been opended in the correct
-  ///   mode.
-  mapped_file_region(int fd, mapmode mode, size_t length, uint64_t offset,
+  /// \param fd An open file descriptor to map. Does not take ownership of fd.
+  mapped_file_region(sys::fs::file_t fd, mapmode mode, size_t length, uint64_t offset,
                      std::error_code &ec);
 
   ~mapped_file_region();
diff --git a/include/llvm/Support/FileUtilities.h b/include/llvm/Support/FileUtilities.h
index 2ee2c60b9964..16b2206924c3 100644
--- a/include/llvm/Support/FileUtilities.h
+++ b/include/llvm/Support/FileUtilities.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/FileUtilities.h - File System Utilities -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/Format.h b/include/llvm/Support/Format.h
index bcbd2bec5722..77dcbaebf1a3 100644
--- a/include/llvm/Support/Format.h
+++ b/include/llvm/Support/Format.h
@@ -1,9 +1,8 @@
 //===- Format.h - Efficient printf-style formatting for streams -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/FormatAdapters.h b/include/llvm/Support/FormatAdapters.h
index 8320eaad39a9..a0e8cc439191 100644
--- a/include/llvm/Support/FormatAdapters.h
+++ b/include/llvm/Support/FormatAdapters.h
@@ -1,9 +1,8 @@
 //===- FormatAdapters.h - Formatters for common LLVM types -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/FormatCommon.h b/include/llvm/Support/FormatCommon.h
index 36fbad296c3f..3c119d12529a 100644
--- a/include/llvm/Support/FormatCommon.h
+++ b/include/llvm/Support/FormatCommon.h
@@ -1,9 +1,8 @@
-//===- FormatAdapters.h - Formatters for common LLVM types -----*- C++ -*-===//
+//===- FormatCommon.h - Formatters for common LLVM types --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/FormatProviders.h b/include/llvm/Support/FormatProviders.h
index 4e57034ff98e..629a4845716a 100644
--- a/include/llvm/Support/FormatProviders.h
+++ b/include/llvm/Support/FormatProviders.h
@@ -1,9 +1,8 @@
 //===- FormatProviders.h - Formatters for common LLVM types -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/FormatVariadic.h b/include/llvm/Support/FormatVariadic.h
index b0f582513e07..5bbda9dd626e 100644
--- a/include/llvm/Support/FormatVariadic.h
+++ b/include/llvm/Support/FormatVariadic.h
@@ -1,9 +1,8 @@
 //===- FormatVariadic.h - Efficient type-safe string formatting --*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/FormatVariadicDetails.h b/include/llvm/Support/FormatVariadicDetails.h
index e8bd90f50941..e3c185134daa 100644
--- a/include/llvm/Support/FormatVariadicDetails.h
+++ b/include/llvm/Support/FormatVariadicDetails.h
@@ -1,9 +1,8 @@
 //===- FormatVariadicDetails.h - Helpers for FormatVariadic.h ----*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/FormattedStream.h b/include/llvm/Support/FormattedStream.h
index 4a135cd23174..b49c8d86531d 100644
--- a/include/llvm/Support/FormattedStream.h
+++ b/include/llvm/Support/FormattedStream.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/FormattedStream.h - Formatted streams ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/GenericDomTree.h b/include/llvm/Support/GenericDomTree.h
index b3018bac310a..99620802505b 100644
--- a/include/llvm/Support/GenericDomTree.h
+++ b/include/llvm/Support/GenericDomTree.h
@@ -1,9 +1,8 @@
 //===- GenericDomTree.h - Generic dominator trees for graphs ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -670,14 +669,12 @@ protected:
 
     // The postdom tree can have a null root if there are no returns.
     if (getRootNode()) PrintDomTree<NodeT>(getRootNode(), O, 1);
-    if (IsPostDominator) {
-      O << "Roots: ";
-      for (const NodePtr Block : Roots) {
-        Block->printAsOperand(O, false);
-        O << " ";
-      }
-      O << "\n";
+    O << "Roots: ";
+    for (const NodePtr Block : Roots) {
+      Block->printAsOperand(O, false);
+      O << " ";
     }
+    O << "\n";
   }
 
 public:
diff --git a/include/llvm/Support/GenericDomTreeConstruction.h b/include/llvm/Support/GenericDomTreeConstruction.h
index 971e8305a112..ccceba881718 100644
--- a/include/llvm/Support/GenericDomTreeConstruction.h
+++ b/include/llvm/Support/GenericDomTreeConstruction.h
@@ -1,9 +1,8 @@
 //===- GenericDomTreeConstruction.h - Dominator Calculation ------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -16,9 +15,12 @@
 ///   Loukas Georgiadis, Princeton University, November 2005, pp. 21-23:
 ///   ftp://ftp.cs.princeton.edu/reports/2005/737.pdf
 ///
-/// This implements the O(n*log(n)) versions of EVAL and LINK, because it turns
-/// out that the theoretically slower O(n*log(n)) implementation is actually
-/// faster than the almost-linear O(n*alpha(n)) version, even for large CFGs.
+/// Semi-NCA algorithm runs in O(n^2) worst-case time but usually slightly
+/// faster than Simple Lengauer-Tarjan in practice.
+///
+/// O(n^2) worst cases happen when the computation of nearest common ancestors
+/// requires O(n) average time, which is very unlikely in real world. If this
+/// ever turns out to be an issue, consider implementing a hybrid algorithm.
 ///
 /// The file uses the Depth Based Search algorithm to perform incremental
 /// updates (insertion and deletions). The implemented algorithm is based on
@@ -255,42 +257,47 @@ struct SemiNCAInfo {
     return LastNum;
   }
 
-  NodePtr eval(NodePtr VIn, unsigned LastLinked) {
-    auto &VInInfo = NodeToInfo[VIn];
-    if (VInInfo.DFSNum < LastLinked)
-      return VIn;
-
-    SmallVector<NodePtr, 32> Work;
-    SmallPtrSet<NodePtr, 32> Visited;
-
-    if (VInInfo.Parent >= LastLinked)
-      Work.push_back(VIn);
-
-    while (!Work.empty()) {
-      NodePtr V = Work.back();
-      auto &VInfo = NodeToInfo[V];
-      NodePtr VAncestor = NumToNode[VInfo.Parent];
-
-      // Process Ancestor first
-      if (Visited.insert(VAncestor).second && VInfo.Parent >= LastLinked) {
-        Work.push_back(VAncestor);
-        continue;
-      }
-      Work.pop_back();
-
-      // Update VInfo based on Ancestor info
-      if (VInfo.Parent < LastLinked)
-        continue;
-
-      auto &VAInfo = NodeToInfo[VAncestor];
-      NodePtr VAncestorLabel = VAInfo.Label;
-      NodePtr VLabel = VInfo.Label;
-      if (NodeToInfo[VAncestorLabel].Semi < NodeToInfo[VLabel].Semi)
-        VInfo.Label = VAncestorLabel;
-      VInfo.Parent = VAInfo.Parent;
-    }
-
-    return VInInfo.Label;
+  // V is a predecessor of W. eval() returns V if V < W, otherwise the minimum
+  // of sdom(U), where U > W and there is a virtual forest path from U to V. The
+  // virtual forest consists of linked edges of processed vertices.
+  //
+  // We can follow Parent pointers (virtual forest edges) to determine the
+  // ancestor U with minimum sdom(U). But it is slow and thus we employ the path
+  // compression technique to speed up to O(m*log(n)). Theoretically the virtual
+  // forest can be organized as balanced trees to achieve almost linear
+  // O(m*alpha(m,n)) running time. But it requires two auxiliary arrays (Size
+  // and Child) and is unlikely to be faster than the simple implementation.
+  //
+  // For each vertex V, its Label points to the vertex with the minimal sdom(U)
+  // (Semi) in its path from V (included) to NodeToInfo[V].Parent (excluded).
+  NodePtr eval(NodePtr V, unsigned LastLinked,
+               SmallVectorImpl<InfoRec *> &Stack) {
+    InfoRec *VInfo = &NodeToInfo[V];
+    if (VInfo->Parent < LastLinked)
+      return VInfo->Label;
+
+    // Store ancestors except the last (root of a virtual tree) into a stack.
+    assert(Stack.empty());
+    do {
+      Stack.push_back(VInfo);
+      VInfo = &NodeToInfo[NumToNode[VInfo->Parent]];
+    } while (VInfo->Parent >= LastLinked);
+
+    // Path compression. Point each vertex's Parent to the root and update its
+    // Label if any of its ancestors (PInfo->Label) has a smaller Semi.
+    const InfoRec *PInfo = VInfo;
+    const InfoRec *PLabelInfo = &NodeToInfo[PInfo->Label];
+    do {
+      VInfo = Stack.pop_back_val();
+      VInfo->Parent = PInfo->Parent;
+      const InfoRec *VLabelInfo = &NodeToInfo[VInfo->Label];
+      if (PLabelInfo->Semi < VLabelInfo->Semi)
+        VInfo->Label = PInfo->Label;
+      else
+        PLabelInfo = VLabelInfo;
+      PInfo = VInfo;
+    } while (!Stack.empty());
+    return VInfo->Label;
   }
 
   // This function requires DFS to be run before calling it.
@@ -304,6 +311,7 @@ struct SemiNCAInfo {
     }
 
     // Step #1: Calculate the semidominators of all vertices.
+    SmallVector<InfoRec *, 32> EvalStack;
     for (unsigned i = NextDFSNum - 1; i >= 2; --i) {
       NodePtr W = NumToNode[i];
       auto &WInfo = NodeToInfo[W];
@@ -319,7 +327,7 @@ struct SemiNCAInfo {
         if (TN && TN->getLevel() < MinLevel)
           continue;
 
-        unsigned SemiU = NodeToInfo[eval(N, i + 1)].Semi;
+        unsigned SemiU = NodeToInfo[eval(N, i + 1, EvalStack)].Semi;
         if (SemiU < WInfo.Semi) WInfo.Semi = SemiU;
       }
     }
@@ -620,21 +628,22 @@ struct SemiNCAInfo {
 
   // Helper struct used during edge insertions.
   struct InsertionInfo {
-    using BucketElementTy = std::pair<unsigned, TreeNodePtr>;
-    struct DecreasingLevel {
-      bool operator()(const BucketElementTy &First,
-                      const BucketElementTy &Second) const {
-        return First.first > Second.first;
+    struct Compare {
+      bool operator()(TreeNodePtr LHS, TreeNodePtr RHS) const {
+        return LHS->getLevel() < RHS->getLevel();
       }
     };
 
-    std::priority_queue<BucketElementTy, SmallVector<BucketElementTy, 8>,
-        DecreasingLevel>
-        Bucket;  // Queue of tree nodes sorted by level in descending order.
-    SmallDenseSet<TreeNodePtr, 8> Affected;
-    SmallDenseMap<TreeNodePtr, unsigned, 8> Visited;
-    SmallVector<TreeNodePtr, 8> AffectedQueue;
-    SmallVector<TreeNodePtr, 8> VisitedNotAffectedQueue;
+    // Bucket queue of tree nodes ordered by descending level. For simplicity,
+    // we use a priority_queue here.
+    std::priority_queue<TreeNodePtr, SmallVector<TreeNodePtr, 8>,
+                        Compare>
+        Bucket;
+    SmallDenseSet<TreeNodePtr, 8> Visited;
+    SmallVector<TreeNodePtr, 8> Affected;
+#ifndef NDEBUG
+    SmallVector<TreeNodePtr, 8> VisitedUnaffected;
+#endif
   };
 
   static void InsertEdge(DomTreeT &DT, const BatchUpdatePtr BUI,
@@ -689,6 +698,17 @@ struct SemiNCAInfo {
     return true;
   }
 
+  static bool isPermutation(const SmallVectorImpl<NodePtr> &A,
+                            const SmallVectorImpl<NodePtr> &B) {
+    if (A.size() != B.size())
+      return false;
+    SmallPtrSet<NodePtr, 4> Set(A.begin(), A.end());
+    for (NodePtr N : B)
+      if (Set.count(N) == 0)
+        return false;
+    return true;
+  }
+
   // Updates the set of roots after insertion or deletion. This ensures that
   // roots are the same when after a series of updates and when the tree would
   // be built from scratch.
@@ -702,9 +722,8 @@ struct SemiNCAInfo {
       return;
 
     // Recalculate the set of roots.
-    auto Roots = FindRoots(DT, BUI);
-    if (DT.Roots.size() != Roots.size() ||
-        !std::is_permutation(DT.Roots.begin(), DT.Roots.end(), Roots.begin())) {
+    RootsT Roots = FindRoots(DT, BUI);
+    if (!isPermutation(DT.Roots, Roots)) {
       // The roots chosen in the CFG have changed. This is because the
       // incremental algorithm does not really know or use the set of roots and
       // can make a different (implicit) decision about which node within an
@@ -715,7 +734,6 @@ struct SemiNCAInfo {
       // It may be possible to update the tree without recalculating it, but
       // we do not know yet how to do it, and it happens rarely in practise.
       CalculateFromScratch(DT, BUI);
-      return;
     }
   }
 
@@ -737,128 +755,113 @@ struct SemiNCAInfo {
     assert(NCD);
 
     LLVM_DEBUG(dbgs() << "\t\tNCA == " << BlockNamePrinter(NCD) << "\n");
-    const TreeNodePtr ToIDom = To->getIDom();
+    const unsigned NCDLevel = NCD->getLevel();
 
-    // Nothing affected -- NCA property holds.
-    // (Based on the lemma 2.5 from the second paper.)
-    if (NCD == To || NCD == ToIDom) return;
+    // Based on Lemma 2.5 from the second paper, after insertion of (From,To), v
+    // is affected iff depth(NCD)+1 < depth(v) && a path P from To to v exists
+    // where every w on P s.t. depth(v) <= depth(w)
+    //
+    // This reduces to a widest path problem (maximizing the depth of the
+    // minimum vertex in the path) which can be solved by a modified version of
+    // Dijkstra with a bucket queue (named depth-based search in the paper).
+
+    // To is in the path, so depth(NCD)+1 < depth(v) <= depth(To). Nothing
+    // affected if this does not hold.
+    if (NCDLevel + 1 >= To->getLevel())
+      return;
 
-    // Identify and collect affected nodes.
     InsertionInfo II;
-    LLVM_DEBUG(dbgs() << "Marking " << BlockNamePrinter(To)
-                      << " as affected\n");
-    II.Affected.insert(To);
-    const unsigned ToLevel = To->getLevel();
-    LLVM_DEBUG(dbgs() << "Putting " << BlockNamePrinter(To)
-                      << " into a Bucket\n");
-    II.Bucket.push({ToLevel, To});
+    SmallVector<TreeNodePtr, 8> UnaffectedOnCurrentLevel;
+    II.Bucket.push(To);
+    II.Visited.insert(To);
 
     while (!II.Bucket.empty()) {
-      const TreeNodePtr CurrentNode = II.Bucket.top().second;
-      const unsigned  CurrentLevel = CurrentNode->getLevel();
+      TreeNodePtr TN = II.Bucket.top();
       II.Bucket.pop();
-      LLVM_DEBUG(dbgs() << "\tAdding to Visited and AffectedQueue: "
-                        << BlockNamePrinter(CurrentNode) << "\n");
-
-      II.Visited.insert({CurrentNode, CurrentLevel});
-      II.AffectedQueue.push_back(CurrentNode);
+      II.Affected.push_back(TN);
+
+      const unsigned CurrentLevel = TN->getLevel();
+      LLVM_DEBUG(dbgs() << "Mark " << BlockNamePrinter(TN) <<
+                 "as affected, CurrentLevel " << CurrentLevel << "\n");
+
+      assert(TN->getBlock() && II.Visited.count(TN) && "Preconditions!");
+
+      while (true) {
+        // Unlike regular Dijkstra, we have an inner loop to expand more
+        // vertices. The first iteration is for the (affected) vertex popped
+        // from II.Bucket and the rest are for vertices in
+        // UnaffectedOnCurrentLevel, which may eventually expand to affected
+        // vertices.
+        //
+        // Invariant: there is an optimal path from `To` to TN with the minimum
+        // depth being CurrentLevel.
+        for (const NodePtr Succ :
+             ChildrenGetter<IsPostDom>::Get(TN->getBlock(), BUI)) {
+          const TreeNodePtr SuccTN = DT.getNode(Succ);
+          assert(SuccTN &&
+                 "Unreachable successor found at reachable insertion");
+          const unsigned SuccLevel = SuccTN->getLevel();
+
+          LLVM_DEBUG(dbgs() << "\tSuccessor " << BlockNamePrinter(Succ)
+                            << ", level = " << SuccLevel << "\n");
+
+          // There is an optimal path from `To` to Succ with the minimum depth
+          // being min(CurrentLevel, SuccLevel).
+          //
+          // If depth(NCD)+1 < depth(Succ) is not satisfied, Succ is unaffected
+          // and no affected vertex may be reached by a path passing through it.
+          // Stop here. Also, Succ may be visited by other predecessors but the
+          // first visit has the optimal path. Stop if Succ has been visited.
+          if (SuccLevel <= NCDLevel + 1 || !II.Visited.insert(SuccTN).second)
+            continue;
+
+          if (SuccLevel > CurrentLevel) {
+            // Succ is unaffected but it may (transitively) expand to affected
+            // vertices. Store it in UnaffectedOnCurrentLevel.
+            LLVM_DEBUG(dbgs() << "\t\tMarking visited not affected "
+                              << BlockNamePrinter(Succ) << "\n");
+            UnaffectedOnCurrentLevel.push_back(SuccTN);
+#ifndef NDEBUG
+            II.VisitedUnaffected.push_back(SuccTN);
+#endif
+          } else {
+            // The condition is satisfied (Succ is affected). Add Succ to the
+            // bucket queue.
+            LLVM_DEBUG(dbgs() << "\t\tAdd " << BlockNamePrinter(Succ)
+                              << " to a Bucket\n");
+            II.Bucket.push(SuccTN);
+          }
+        }
 
-      // Discover and collect affected successors of the current node.
-      VisitInsertion(DT, BUI, CurrentNode, CurrentLevel, NCD, II);
+        if (UnaffectedOnCurrentLevel.empty())
+          break;
+        TN = UnaffectedOnCurrentLevel.pop_back_val();
+        LLVM_DEBUG(dbgs() << " Next: " << BlockNamePrinter(TN) << "\n");
+      }
     }
 
     // Finish by updating immediate dominators and levels.
     UpdateInsertion(DT, BUI, NCD, II);
   }
 
-  // Visits an affected node and collect its affected successors.
-  static void VisitInsertion(DomTreeT &DT, const BatchUpdatePtr BUI,
-                             const TreeNodePtr TN, const unsigned RootLevel,
-                             const TreeNodePtr NCD, InsertionInfo &II) {
-    const unsigned NCDLevel = NCD->getLevel();
-    LLVM_DEBUG(dbgs() << "Visiting " << BlockNamePrinter(TN) << ",  RootLevel "
-                      << RootLevel << "\n");
-
-    SmallVector<TreeNodePtr, 8> Stack = {TN};
-    assert(TN->getBlock() && II.Visited.count(TN) && "Preconditions!");
-
-    SmallPtrSet<TreeNodePtr, 8> Processed;
-
-    do {
-      TreeNodePtr Next = Stack.pop_back_val();
-      LLVM_DEBUG(dbgs() << " Next: " << BlockNamePrinter(Next) << "\n");
-
-      for (const NodePtr Succ :
-           ChildrenGetter<IsPostDom>::Get(Next->getBlock(), BUI)) {
-        const TreeNodePtr SuccTN = DT.getNode(Succ);
-        assert(SuccTN && "Unreachable successor found at reachable insertion");
-        const unsigned SuccLevel = SuccTN->getLevel();
-
-        LLVM_DEBUG(dbgs() << "\tSuccessor " << BlockNamePrinter(Succ)
-                          << ", level = " << SuccLevel << "\n");
-
-        // Do not process the same node multiple times.
-        if (Processed.count(Next) > 0)
-          continue;
-
-        // Succ dominated by subtree From -- not affected.
-        // (Based on the lemma 2.5 from the second paper.)
-        if (SuccLevel > RootLevel) {
-          LLVM_DEBUG(dbgs() << "\t\tDominated by subtree From\n");
-          if (II.Visited.count(SuccTN) != 0) {
-            LLVM_DEBUG(dbgs() << "\t\t\talready visited at level "
-                              << II.Visited[SuccTN] << "\n\t\t\tcurrent level "
-                              << RootLevel << ")\n");
-
-            // A node can be necessary to visit again if we see it again at
-            // a lower level than before.
-            if (II.Visited[SuccTN] >= RootLevel)
-              continue;
-          }
-
-          LLVM_DEBUG(dbgs() << "\t\tMarking visited not affected "
-                            << BlockNamePrinter(Succ) << "\n");
-          II.Visited.insert({SuccTN, RootLevel});
-          II.VisitedNotAffectedQueue.push_back(SuccTN);
-          Stack.push_back(SuccTN);
-        } else if ((SuccLevel > NCDLevel + 1) &&
-            II.Affected.count(SuccTN) == 0) {
-          LLVM_DEBUG(dbgs() << "\t\tMarking affected and adding "
-                            << BlockNamePrinter(Succ) << " to a Bucket\n");
-          II.Affected.insert(SuccTN);
-          II.Bucket.push({SuccLevel, SuccTN});
-        }
-      }
-
-      Processed.insert(Next);
-    } while (!Stack.empty());
-  }
-
   // Updates immediate dominators and levels after insertion.
   static void UpdateInsertion(DomTreeT &DT, const BatchUpdatePtr BUI,
                               const TreeNodePtr NCD, InsertionInfo &II) {
     LLVM_DEBUG(dbgs() << "Updating NCD = " << BlockNamePrinter(NCD) << "\n");
 
-    for (const TreeNodePtr TN : II.AffectedQueue) {
+    for (const TreeNodePtr TN : II.Affected) {
       LLVM_DEBUG(dbgs() << "\tIDom(" << BlockNamePrinter(TN)
                         << ") = " << BlockNamePrinter(NCD) << "\n");
       TN->setIDom(NCD);
     }
 
-    UpdateLevelsAfterInsertion(II);
-    if (IsPostDom) UpdateRootsAfterUpdate(DT, BUI);
-  }
-
-  static void UpdateLevelsAfterInsertion(InsertionInfo &II) {
-    LLVM_DEBUG(
-        dbgs() << "Updating levels for visited but not affected nodes\n");
+#ifndef NDEBUG
+    for (const TreeNodePtr TN : II.VisitedUnaffected)
+      assert(TN->getLevel() == TN->getIDom()->getLevel() + 1 &&
+             "TN should have been updated by an affected ancestor");
+#endif
 
-    for (const TreeNodePtr TN : II.VisitedNotAffectedQueue) {
-      LLVM_DEBUG(dbgs() << "\tlevel(" << BlockNamePrinter(TN) << ") = ("
-                        << BlockNamePrinter(TN->getIDom()) << ") "
-                        << TN->getIDom()->getLevel() << " + 1\n");
-      TN->UpdateLevel();
-    }
+    if (IsPostDom) UpdateRootsAfterUpdate(DT, BUI);
   }
 
   // Handles insertion to previously unreachable nodes.
@@ -1182,6 +1185,10 @@ struct SemiNCAInfo {
       BUI.FuturePredecessors[U.getTo()].push_back({U.getFrom(), U.getKind()});
     }
 
+#if 0
+    // FIXME: The LLVM_DEBUG macro only plays well with a modular
+    // build of LLVM when the header is marked as textual, but doing
+    // so causes redefinition errors.
     LLVM_DEBUG(dbgs() << "About to apply " << NumLegalized << " updates\n");
     LLVM_DEBUG(if (NumLegalized < 32) for (const auto &U
                                            : reverse(BUI.Updates)) {
@@ -1190,6 +1197,7 @@ struct SemiNCAInfo {
       dbgs() << "\n";
     });
     LLVM_DEBUG(dbgs() << "\n");
+#endif
 
     // Recalculate the DominatorTree when the number of updates
     // exceeds a threshold, which usually makes direct updating slower than
@@ -1215,8 +1223,13 @@ struct SemiNCAInfo {
   static void ApplyNextUpdate(DomTreeT &DT, BatchUpdateInfo &BUI) {
     assert(!BUI.Updates.empty() && "No updates to apply!");
     UpdateT CurrentUpdate = BUI.Updates.pop_back_val();
+#if 0
+    // FIXME: The LLVM_DEBUG macro only plays well with a modular
+    // build of LLVM when the header is marked as textual, but doing
+    // so causes redefinition errors.
     LLVM_DEBUG(dbgs() << "Applying update: ");
     LLVM_DEBUG(CurrentUpdate.dump(); dbgs() << "\n");
+#endif
 
     // Move to the next snapshot of the CFG by removing the reverse-applied
     // current update. Since updates are performed in the same order they are
@@ -1270,9 +1283,7 @@ struct SemiNCAInfo {
     }
 
     RootsT ComputedRoots = FindRoots(DT, nullptr);
-    if (DT.Roots.size() != ComputedRoots.size() ||
-        !std::is_permutation(DT.Roots.begin(), DT.Roots.end(),
-                             ComputedRoots.begin())) {
+    if (!isPermutation(DT.Roots, ComputedRoots)) {
       errs() << "Tree has different roots than freshly computed ones!\n";
       errs() << "\tPDT roots: ";
       for (const NodePtr N : DT.Roots) errs() << BlockNamePrinter(N) << ", ";
diff --git a/include/llvm/Support/GenericIteratedDominanceFrontier.h b/include/llvm/Support/GenericIteratedDominanceFrontier.h
new file mode 100644
index 000000000000..25eb7cd7b6d5
--- /dev/null
+++ b/include/llvm/Support/GenericIteratedDominanceFrontier.h
@@ -0,0 +1,209 @@
+//===- IteratedDominanceFrontier.h - Calculate IDF --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Compute iterated dominance frontiers using a linear time algorithm.
+///
+/// The algorithm used here is based on:
+///
+///   Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
+///   In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
+///   Programming Languages
+///   POPL '95. ACM, New York, NY, 62-73.
+///
+/// It has been modified to not explicitly use the DJ graph data structure and
+/// to directly compute pruned SSA using per-variable liveness information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_GENERIC_IDF_H
+#define LLVM_SUPPORT_GENERIC_IDF_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/GenericDomTree.h"
+#include <queue>
+
+namespace llvm {
+
+namespace IDFCalculatorDetail {
+
+/// Generic utility class used for getting the children of a basic block.
+/// May be specialized if, for example, one wouldn't like to return nullpointer
+/// successors.
+template <class NodeTy, bool IsPostDom> struct ChildrenGetterTy {
+  using NodeRef = typename GraphTraits<NodeTy>::NodeRef;
+  using ChildrenTy = SmallVector<NodeRef, 8>;
+
+  ChildrenTy get(const NodeRef &N);
+};
+
+} // end of namespace IDFCalculatorDetail
+
+/// Determine the iterated dominance frontier, given a set of defining
+/// blocks, and optionally, a set of live-in blocks.
+///
+/// In turn, the results can be used to place phi nodes.
+///
+/// This algorithm is a linear time computation of Iterated Dominance Frontiers,
+/// pruned using the live-in set.
+/// By default, liveness is not used to prune the IDF computation.
+/// The template parameters should be of a CFG block type.
+template <class NodeTy, bool IsPostDom> class IDFCalculatorBase {
+public:
+  using OrderedNodeTy =
+      typename std::conditional<IsPostDom, Inverse<NodeTy *>, NodeTy *>::type;
+  using ChildrenGetterTy =
+      IDFCalculatorDetail::ChildrenGetterTy<NodeTy, IsPostDom>;
+
+  IDFCalculatorBase(DominatorTreeBase<NodeTy, IsPostDom> &DT) : DT(DT) {}
+
+  IDFCalculatorBase(DominatorTreeBase<NodeTy, IsPostDom> &DT,
+                    const ChildrenGetterTy &C)
+      : DT(DT), ChildrenGetter(C) {}
+
+  /// Give the IDF calculator the set of blocks in which the value is
+  /// defined.  This is equivalent to the set of starting blocks it should be
+  /// calculating the IDF for (though later gets pruned based on liveness).
+  ///
+  /// Note: This set *must* live for the entire lifetime of the IDF calculator.
+  void setDefiningBlocks(const SmallPtrSetImpl<NodeTy *> &Blocks) {
+    DefBlocks = &Blocks;
+  }
+
+  /// Give the IDF calculator the set of blocks in which the value is
+  /// live on entry to the block.   This is used to prune the IDF calculation to
+  /// not include blocks where any phi insertion would be dead.
+  ///
+  /// Note: This set *must* live for the entire lifetime of the IDF calculator.
+  void setLiveInBlocks(const SmallPtrSetImpl<NodeTy *> &Blocks) {
+    LiveInBlocks = &Blocks;
+    useLiveIn = true;
+  }
+
+  /// Reset the live-in block set to be empty, and tell the IDF
+  /// calculator to not use liveness anymore.
+  void resetLiveInBlocks() {
+    LiveInBlocks = nullptr;
+    useLiveIn = false;
+  }
+
+  /// Calculate iterated dominance frontiers
+  ///
+  /// This uses the linear-time phi algorithm based on DJ-graphs mentioned in
+  /// the file-level comment.  It performs DF->IDF pruning using the live-in
+  /// set, to avoid computing the IDF for blocks where an inserted PHI node
+  /// would be dead.
+  void calculate(SmallVectorImpl<NodeTy *> &IDFBlocks);
+
+private:
+  DominatorTreeBase<NodeTy, IsPostDom> &DT;
+  ChildrenGetterTy ChildrenGetter;
+  bool useLiveIn = false;
+  const SmallPtrSetImpl<NodeTy *> *LiveInBlocks;
+  const SmallPtrSetImpl<NodeTy *> *DefBlocks;
+};
+
+//===----------------------------------------------------------------------===//
+// Implementation.
+//===----------------------------------------------------------------------===//
+
+namespace IDFCalculatorDetail {
+
+template <class NodeTy, bool IsPostDom>
+typename ChildrenGetterTy<NodeTy, IsPostDom>::ChildrenTy
+ChildrenGetterTy<NodeTy, IsPostDom>::get(const NodeRef &N) {
+  using OrderedNodeTy =
+      typename IDFCalculatorBase<NodeTy, IsPostDom>::OrderedNodeTy;
+
+  auto Children = children<OrderedNodeTy>(N);
+  return {Children.begin(), Children.end()};
+}
+
+} // end of namespace IDFCalculatorDetail
+
+template <class NodeTy, bool IsPostDom>
+void IDFCalculatorBase<NodeTy, IsPostDom>::calculate(
+    SmallVectorImpl<NodeTy *> &PHIBlocks) {
+  // Use a priority queue keyed on dominator tree level so that inserted nodes
+  // are handled from the bottom of the dominator tree upwards. We also augment
+  // the level with a DFS number to ensure that the blocks are ordered in a
+  // deterministic way.
+  using DomTreeNodePair =
+      std::pair<DomTreeNodeBase<NodeTy> *, std::pair<unsigned, unsigned>>;
+  using IDFPriorityQueue =
+      std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
+                          less_second>;
+
+  IDFPriorityQueue PQ;
+
+  DT.updateDFSNumbers();
+
+  for (NodeTy *BB : *DefBlocks) {
+    if (DomTreeNodeBase<NodeTy> *Node = DT.getNode(BB))
+      PQ.push({Node, std::make_pair(Node->getLevel(), Node->getDFSNumIn())});
+  }
+
+  SmallVector<DomTreeNodeBase<NodeTy> *, 32> Worklist;
+  SmallPtrSet<DomTreeNodeBase<NodeTy> *, 32> VisitedPQ;
+  SmallPtrSet<DomTreeNodeBase<NodeTy> *, 32> VisitedWorklist;
+
+  while (!PQ.empty()) {
+    DomTreeNodePair RootPair = PQ.top();
+    PQ.pop();
+    DomTreeNodeBase<NodeTy> *Root = RootPair.first;
+    unsigned RootLevel = RootPair.second.first;
+
+    // Walk all dominator tree children of Root, inspecting their CFG edges with
+    // targets elsewhere on the dominator tree. Only targets whose level is at
+    // most Root's level are added to the iterated dominance frontier of the
+    // definition set.
+
+    Worklist.clear();
+    Worklist.push_back(Root);
+    VisitedWorklist.insert(Root);
+
+    while (!Worklist.empty()) {
+      DomTreeNodeBase<NodeTy> *Node = Worklist.pop_back_val();
+      NodeTy *BB = Node->getBlock();
+      // Succ is the successor in the direction we are calculating IDF, so it is
+      // successor for IDF, and predecessor for Reverse IDF.
+      auto DoWork = [&](NodeTy *Succ) {
+        DomTreeNodeBase<NodeTy> *SuccNode = DT.getNode(Succ);
+
+        const unsigned SuccLevel = SuccNode->getLevel();
+        if (SuccLevel > RootLevel)
+          return;
+
+        if (!VisitedPQ.insert(SuccNode).second)
+          return;
+
+        NodeTy *SuccBB = SuccNode->getBlock();
+        if (useLiveIn && !LiveInBlocks->count(SuccBB))
+          return;
+
+        PHIBlocks.emplace_back(SuccBB);
+        if (!DefBlocks->count(SuccBB))
+          PQ.push(std::make_pair(
+              SuccNode, std::make_pair(SuccLevel, SuccNode->getDFSNumIn())));
+      };
+
+      for (auto Succ : ChildrenGetter.get(BB))
+        DoWork(Succ);
+
+      for (auto DomChild : *Node) {
+        if (VisitedWorklist.insert(DomChild).second)
+          Worklist.push_back(DomChild);
+      }
+    }
+  }
+}
+
+} // end of namespace llvm
+
+#endif
diff --git a/include/llvm/Support/GlobPattern.h b/include/llvm/Support/GlobPattern.h
index c9436a13c1a3..66a4cd94c12a 100644
--- a/include/llvm/Support/GlobPattern.h
+++ b/include/llvm/Support/GlobPattern.h
@@ -1,9 +1,8 @@
 //===-- GlobPattern.h - glob pattern matcher implementation -*- C++ -*-----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/GraphWriter.h b/include/llvm/Support/GraphWriter.h
index 02d98bec16e2..466a0449e257 100644
--- a/include/llvm/Support/GraphWriter.h
+++ b/include/llvm/Support/GraphWriter.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/GraphWriter.h - Write graph to a .dot file --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/Host.h b/include/llvm/Support/Host.h
index 57c79c0b9fdf..b37cc514c92e 100644
--- a/include/llvm/Support/Host.h
+++ b/include/llvm/Support/Host.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/Host.h - Host machine characteristics --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/InitLLVM.h b/include/llvm/Support/InitLLVM.h
index 0f629c9ac92d..8069859a3e0b 100644
--- a/include/llvm/Support/InitLLVM.h
+++ b/include/llvm/Support/InitLLVM.h
@@ -1,9 +1,8 @@
 //===- InitLLVM.h -----------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -20,7 +19,10 @@
 //  1. Setting up a signal handler so that pretty stack trace is printed out
 //     if a process crashes.
 //
-//  2. If running on Windows, obtain command line arguments using a
+//  2. Set up the global new-handler which is called when a memory allocation
+//     attempt fails.
+//
+//  3. If running on Windows, obtain command line arguments using a
 //     multibyte character-aware API and convert arguments into UTF-8
 //     encoding, so that you can assume that command line arguments are
 //     always encoded in UTF-8 on any platform.
diff --git a/include/llvm/Support/ItaniumManglingCanonicalizer.h b/include/llvm/Support/ItaniumManglingCanonicalizer.h
index 34eb9f7deaaf..6920000340d4 100644
--- a/include/llvm/Support/ItaniumManglingCanonicalizer.h
+++ b/include/llvm/Support/ItaniumManglingCanonicalizer.h
@@ -1,9 +1,8 @@
 //===--- ItaniumManglingCanonicalizer.h -------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/JSON.h b/include/llvm/Support/JSON.h
index 7a04fd52bc50..0ca41097dddd 100644
--- a/include/llvm/Support/JSON.h
+++ b/include/llvm/Support/JSON.h
@@ -1,9 +1,8 @@
 //===--- JSON.h - JSON values, parsing and serialization -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===---------------------------------------------------------------------===//
 ///
@@ -22,6 +21,9 @@
 /// - a convention and helpers for mapping between json::Value and user-defined
 ///   types. See fromJSON(), ObjectMapper, and the class comment on Value.
 ///
+/// - an output API json::OStream which can emit JSON without materializing
+///   all structures as json::Value.
+///
 /// Typically, JSON data would be read from an external source, parsed into
 /// a Value, and then converted into some native data structure before doing
 /// real work on it. (And vice versa when writing).
@@ -37,7 +39,7 @@
 ///
 /// - LLVM bitstream is a space- and CPU- efficient binary format. Typically it
 ///   encodes LLVM IR ("bitcode"), but it can be a container for other data.
-///   Low-level reader/writer libraries are in Bitcode/Bitstream*.h
+///   Low-level reader/writer libraries are in Bitstream/Bitstream*.h
 ///
 //===---------------------------------------------------------------------===//
 
@@ -96,7 +98,7 @@ public:
   using iterator = Storage::iterator;
   using const_iterator = Storage::const_iterator;
 
-  explicit Object() = default;
+  Object() = default;
   // KV is a trivial key-value struct for list-initialization.
   // (using std::pair forces extra copies).
   struct KV;
@@ -157,7 +159,7 @@ public:
   using iterator = std::vector<Value>::iterator;
   using const_iterator = std::vector<Value>::const_iterator;
 
-  explicit Array() = default;
+  Array() = default;
   explicit Array(std::initializer_list<Value> Elements);
   template <typename Collection> explicit Array(const Collection &C) {
     for (const auto &V : C)
@@ -180,6 +182,7 @@ public:
 
   bool empty() const { return V.empty(); }
   size_t size() const { return V.size(); }
+  void reserve(size_t S) { V.reserve(S); }
 
   void clear() { V.clear(); }
   void push_back(const Value &E) { V.push_back(E); }
@@ -310,8 +313,8 @@ public:
     create<std::string>(std::move(V));
   }
   Value(const llvm::SmallVectorImpl<char> &V)
-      : Value(std::string(V.begin(), V.end())){};
-  Value(const llvm::formatv_object_base &V) : Value(V.str()){};
+      : Value(std::string(V.begin(), V.end())) {}
+  Value(const llvm::formatv_object_base &V) : Value(V.str()) {}
   // Strings: types with reference semantics. Must be valid UTF-8.
   Value(StringRef V) : Type(T_StringRef) {
     create<llvm::StringRef>(V);
@@ -437,11 +440,6 @@ public:
     return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
   }
 
-  /// Serializes this Value to JSON, writing it to the provided stream.
-  /// The formatting is compact (no extra whitespace) and deterministic.
-  /// For pretty-printing, use the formatv() format_provider below.
-  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Value &);
-
 private:
   void destroy();
   void copyFrom(const Value &M);
@@ -462,9 +460,7 @@ private:
     return *static_cast<T *>(Storage);
   }
 
-  template <typename Indenter>
-  void print(llvm::raw_ostream &, const Indenter &) const;
-  friend struct llvm::format_provider<llvm::json::Value>;
+  friend class OStream;
 
   enum ValueType : char {
     T_Null,
@@ -481,11 +477,11 @@ private:
   mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, llvm::StringRef,
                                       std::string, json::Array, json::Object>
       Union;
+  friend bool operator==(const Value &, const Value &);
 };
 
 bool operator==(const Value &, const Value &);
 inline bool operator!=(const Value &L, const Value &R) { return !(L == R); }
-llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Value &);
 
 /// ObjectKey is a used to capture keys in Object. Like Value but:
 ///   - only strings are allowed
@@ -698,6 +694,154 @@ public:
     return llvm::inconvertibleErrorCode();
   }
 };
+
+/// json::OStream allows writing well-formed JSON without materializing
+/// all structures as json::Value ahead of time.
+/// It's faster, lower-level, and less safe than OS << json::Value.
+///
+/// Only one "top-level" object can be written to a stream.
+/// Simplest usage involves passing lambdas (Blocks) to fill in containers:
+///
+///   json::OStream J(OS);
+///   J.array([&]{
+///     for (const Event &E : Events)
+///       J.object([&] {
+///         J.attribute("timestamp", int64_t(E.Time));
+///         J.attributeArray("participants", [&] {
+///           for (const Participant &P : E.Participants)
+///             J.string(P.toString());
+///         });
+///       });
+///   });
+///
+/// This would produce JSON like:
+///
+///   [
+///     {
+///       "timestamp": 19287398741,
+///       "participants": [
+///         "King Kong",
+///         "Miley Cyrus",
+///         "Cleopatra"
+///       ]
+///     },
+///     ...
+///   ]
+///
+/// The lower level begin/end methods (arrayBegin()) are more flexible but
+/// care must be taken to pair them correctly:
+///
+///   json::OStream J(OS);
+//    J.arrayBegin();
+///   for (const Event &E : Events) {
+///     J.objectBegin();
+///     J.attribute("timestamp", int64_t(E.Time));
+///     J.attributeBegin("participants");
+///     for (const Participant &P : E.Participants)
+///       J.value(P.toString());
+///     J.attributeEnd();
+///     J.objectEnd();
+///   }
+///   J.arrayEnd();
+///
+/// If the call sequence isn't valid JSON, asserts will fire in debug mode.
+/// This can be mismatched begin()/end() pairs, trying to emit attributes inside
+/// an array, and so on.
+/// With asserts disabled, this is undefined behavior.
+class OStream {
+ public:
+  using Block = llvm::function_ref<void()>;
+  // If IndentSize is nonzero, output is pretty-printed.
+  explicit OStream(llvm::raw_ostream &OS, unsigned IndentSize = 0)
+      : OS(OS), IndentSize(IndentSize) {
+    Stack.emplace_back();
+  }
+  ~OStream() {
+    assert(Stack.size() == 1 && "Unmatched begin()/end()");
+    assert(Stack.back().Ctx == Singleton);
+    assert(Stack.back().HasValue && "Did not write top-level value");
+  }
+
+  /// Flushes the underlying ostream. OStream does not buffer internally.
+  void flush() { OS.flush(); }
+
+  // High level functions to output a value.
+  // Valid at top-level (exactly once), in an attribute value (exactly once),
+  // or in an array (any number of times).
+
+  /// Emit a self-contained value (number, string, vector<string> etc).
+  void value(const Value &V);
+  /// Emit an array whose elements are emitted in the provided Block.
+  void array(Block Contents) {
+    arrayBegin();
+    Contents();
+    arrayEnd();
+  }
+  /// Emit an object whose elements are emitted in the provided Block.
+  void object(Block Contents) {
+    objectBegin();
+    Contents();
+    objectEnd();
+  }
+
+  // High level functions to output object attributes.
+  // Valid only within an object (any number of times).
+
+  /// Emit an attribute whose value is self-contained (number, vector<int> etc).
+  void attribute(llvm::StringRef Key, const Value& Contents) {
+    attributeImpl(Key, [&] { value(Contents); });
+  }
+  /// Emit an attribute whose value is an array with elements from the Block.
+  void attributeArray(llvm::StringRef Key, Block Contents) {
+    attributeImpl(Key, [&] { array(Contents); });
+  }
+  /// Emit an attribute whose value is an object with attributes from the Block.
+  void attributeObject(llvm::StringRef Key, Block Contents) {
+    attributeImpl(Key, [&] { object(Contents); });
+  }
+
+  // Low-level begin/end functions to output arrays, objects, and attributes.
+  // Must be correctly paired. Allowed contexts are as above.
+
+  void arrayBegin();
+  void arrayEnd();
+  void objectBegin();
+  void objectEnd();
+  void attributeBegin(llvm::StringRef Key);
+  void attributeEnd();
+
+ private:
+  void attributeImpl(llvm::StringRef Key, Block Contents) {
+    attributeBegin(Key);
+    Contents();
+    attributeEnd();
+  }
+
+  void valueBegin();
+  void newline();
+
+  enum Context {
+    Singleton, // Top level, or object attribute.
+    Array,
+    Object,
+  };
+  struct State {
+    Context Ctx = Singleton;
+    bool HasValue = false;
+  };
+  llvm::SmallVector<State, 16> Stack; // Never empty.
+  llvm::raw_ostream &OS;
+  unsigned IndentSize;
+  unsigned Indent = 0;
+};
+
+/// Serializes this Value to JSON, writing it to the provided stream.
+/// The formatting is compact (no extra whitespace) and deterministic.
+/// For pretty-printing, use the formatv() format_provider below.
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Value &V) {
+  OStream(OS).value(V);
+  return OS;
+}
 } // namespace json
 
 /// Allow printing json::Value with formatv().
diff --git a/include/llvm/Support/JamCRC.h b/include/llvm/Support/JamCRC.h
index 846d6cea9828..b6fc4e7b9b03 100644
--- a/include/llvm/Support/JamCRC.h
+++ b/include/llvm/Support/JamCRC.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/JamCRC.h - Cyclic Redundancy Check ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/KnownBits.h b/include/llvm/Support/KnownBits.h
index 259df9546c57..07fd94e29a1f 100644
--- a/include/llvm/Support/KnownBits.h
+++ b/include/llvm/Support/KnownBits.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/KnownBits.h - Stores known zeros/ones -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -110,25 +109,36 @@ public:
 
   /// Truncate the underlying known Zero and One bits. This is equivalent
   /// to truncating the value we're tracking.
-  KnownBits trunc(unsigned BitWidth) {
+  KnownBits trunc(unsigned BitWidth) const {
     return KnownBits(Zero.trunc(BitWidth), One.trunc(BitWidth));
   }
 
-  /// Zero extends the underlying known Zero and One bits. This is equivalent
-  /// to zero extending the value we're tracking.
-  KnownBits zext(unsigned BitWidth) {
-    return KnownBits(Zero.zext(BitWidth), One.zext(BitWidth));
+  /// Extends the underlying known Zero and One bits.
+  /// By setting ExtendedBitsAreKnownZero=true this will be equivalent to
+  /// zero extending the value we're tracking.
+  /// With ExtendedBitsAreKnownZero=false the extended bits are set to unknown.
+  KnownBits zext(unsigned BitWidth, bool ExtendedBitsAreKnownZero) const {
+    unsigned OldBitWidth = getBitWidth();
+    APInt NewZero = Zero.zext(BitWidth);
+    if (ExtendedBitsAreKnownZero)
+      NewZero.setBitsFrom(OldBitWidth);
+    return KnownBits(NewZero, One.zext(BitWidth));
   }
 
   /// Sign extends the underlying known Zero and One bits. This is equivalent
   /// to sign extending the value we're tracking.
-  KnownBits sext(unsigned BitWidth) {
+  KnownBits sext(unsigned BitWidth) const {
     return KnownBits(Zero.sext(BitWidth), One.sext(BitWidth));
   }
 
-  /// Zero extends or truncates the underlying known Zero and One bits. This is
-  /// equivalent to zero extending or truncating the value we're tracking.
-  KnownBits zextOrTrunc(unsigned BitWidth) {
+  /// Extends or truncates the underlying known Zero and One bits. When
+  /// extending the extended bits can either be set as known zero (if
+  /// ExtendedBitsAreKnownZero=true) or as unknown (if
+  /// ExtendedBitsAreKnownZero=false).
+  KnownBits zextOrTrunc(unsigned BitWidth,
+                        bool ExtendedBitsAreKnownZero) const {
+    if (BitWidth > getBitWidth())
+      return zext(BitWidth, ExtendedBitsAreKnownZero);
     return KnownBits(Zero.zextOrTrunc(BitWidth), One.zextOrTrunc(BitWidth));
   }
 
@@ -192,6 +202,10 @@ public:
     return getBitWidth() - Zero.countPopulation();
   }
 
+  /// Compute known bits resulting from adding LHS, RHS and a 1-bit Carry.
+  static KnownBits computeForAddCarry(
+      const KnownBits &LHS, const KnownBits &RHS, const KnownBits &Carry);
+
   /// Compute known bits resulting from adding LHS and RHS.
   static KnownBits computeForAddSub(bool Add, bool NSW, const KnownBits &LHS,
                                     KnownBits RHS);
diff --git a/include/llvm/Support/LEB128.h b/include/llvm/Support/LEB128.h
index 9feb07229225..a02b83ca9597 100644
--- a/include/llvm/Support/LEB128.h
+++ b/include/llvm/Support/LEB128.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/LEB128.h - [SU]LEB128 utility functions -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -166,6 +165,8 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
   int64_t Value = 0;
   unsigned Shift = 0;
   uint8_t Byte;
+  if (error)
+    *error = nullptr;
   do {
     if (end && p == end) {
       if (error)
@@ -175,11 +176,11 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
       return 0;
     }
     Byte = *p++;
-    Value |= (int64_t(Byte & 0x7f) << Shift);
+    Value |= (uint64_t(Byte & 0x7f) << Shift);
     Shift += 7;
   } while (Byte >= 128);
-  // Sign extend negative numbers.
-  if (Byte & 0x40)
+  // Sign extend negative numbers if needed.
+  if (Shift < 64 && (Byte & 0x40))
     Value |= (-1ULL) << Shift;
   if (n)
     *n = (unsigned)(p - orig_p);
diff --git a/include/llvm/Support/LineIterator.h b/include/llvm/Support/LineIterator.h
index 892d289976cb..c9f10ca975ae 100644
--- a/include/llvm/Support/LineIterator.h
+++ b/include/llvm/Support/LineIterator.h
@@ -1,9 +1,8 @@
 //===- LineIterator.h - Iterator to read a text buffer's lines --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/LockFileManager.h b/include/llvm/Support/LockFileManager.h
index 86db0b2b1020..57e4fbd84cd9 100644
--- a/include/llvm/Support/LockFileManager.h
+++ b/include/llvm/Support/LockFileManager.h
@@ -1,9 +1,8 @@
 //===--- LockFileManager.h - File-level locking utility ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_SUPPORT_LOCKFILEMANAGER_H
diff --git a/include/llvm/Support/LowLevelTypeImpl.h b/include/llvm/Support/LowLevelTypeImpl.h
index 2a1075c9a48d..0e02b6e7d750 100644
--- a/include/llvm/Support/LowLevelTypeImpl.h
+++ b/include/llvm/Support/LowLevelTypeImpl.h
@@ -1,9 +1,8 @@
 //== llvm/Support/LowLevelTypeImpl.h --------------------------- -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -46,8 +45,8 @@ public:
                SizeInBits, /*AddressSpace=*/0};
   }
 
-  /// Get a low-level pointer in the given address space (defaulting to 0).
-  static LLT pointer(uint16_t AddressSpace, unsigned SizeInBits) {
+  /// Get a low-level pointer in the given address space.
+  static LLT pointer(unsigned AddressSpace, unsigned SizeInBits) {
     assert(SizeInBits > 0 && "invalid pointer size");
     return LLT{/*isPointer=*/true, /*isVector=*/false, /*NumElements=*/0,
                SizeInBits, AddressSpace};
@@ -71,6 +70,14 @@ public:
                ScalarTy.isPointer() ? ScalarTy.getAddressSpace() : 0};
   }
 
+  static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy) {
+    return NumElements == 1 ? ScalarTy : LLT::vector(NumElements, ScalarTy);
+  }
+
+  static LLT scalarOrVector(uint16_t NumElements, unsigned ScalarSize) {
+    return scalarOrVector(NumElements, LLT::scalar(ScalarSize));
+  }
+
   explicit LLT(bool isPointer, bool isVector, uint16_t NumElements,
                unsigned SizeInBits, unsigned AddressSpace) {
     init(isPointer, isVector, NumElements, SizeInBits, AddressSpace);
@@ -104,6 +111,32 @@ public:
     return getScalarSizeInBits() * getNumElements();
   }
 
+  /// Returns the total size of the type in bytes, i.e. number of whole bytes
+  /// needed to represent the size in bits. Must only be called on sized types.
+  unsigned getSizeInBytes() const {
+    return (getSizeInBits() + 7) / 8;
+  }
+
+  LLT getScalarType() const {
+    return isVector() ? getElementType() : *this;
+  }
+
+  /// If this type is a vector, return a vector with the same number of elements
+  /// but the new element type. Otherwise, return the new element type.
+  LLT changeElementType(LLT NewEltTy) const {
+    return isVector() ? LLT::vector(getNumElements(), NewEltTy) : NewEltTy;
+  }
+
+  /// If this type is a vector, return a vector with the same number of elements
+  /// but the new element size. Otherwise, return the new element type. Invalid
+  /// for pointer types. For pointer types, use changeElementType.
+  LLT changeElementSize(unsigned NewEltSize) const {
+    assert(!getScalarType().isPointer() &&
+           "invalid to directly change element size for pointers");
+    return isVector() ? LLT::vector(getNumElements(), NewEltSize)
+                      : LLT::scalar(NewEltSize);
+  }
+
   unsigned getScalarSizeInBits() const {
     assert(RawData != 0 && "Invalid Type");
     if (!IsVector) {
@@ -170,10 +203,10 @@ private:
   static const constexpr BitFieldInfo ScalarSizeFieldInfo{32, 0};
   /// * Pointer (isPointer == 1 && isVector == 0):
   ///   SizeInBits: 16;
-  ///   AddressSpace: 23;
+  ///   AddressSpace: 24;
   static const constexpr BitFieldInfo PointerSizeFieldInfo{16, 0};
   static const constexpr BitFieldInfo PointerAddressSpaceFieldInfo{
-      23, PointerSizeFieldInfo[0] + PointerSizeFieldInfo[1]};
+      24, PointerSizeFieldInfo[0] + PointerSizeFieldInfo[1]};
   /// * Vector-of-non-pointer (isPointer == 0 && isVector == 1):
   ///   NumElements: 16;
   ///   SizeOfElement: 32;
@@ -183,13 +216,13 @@ private:
   /// * Vector-of-pointer (isPointer == 1 && isVector == 1):
   ///   NumElements: 16;
   ///   SizeOfElement: 16;
-  ///   AddressSpace: 23;
+  ///   AddressSpace: 24;
   static const constexpr BitFieldInfo PointerVectorElementsFieldInfo{16, 0};
   static const constexpr BitFieldInfo PointerVectorSizeFieldInfo{
       16,
       PointerVectorElementsFieldInfo[1] + PointerVectorElementsFieldInfo[0]};
   static const constexpr BitFieldInfo PointerVectorAddressSpaceFieldInfo{
-      23, PointerVectorSizeFieldInfo[1] + PointerVectorSizeFieldInfo[0]};
+      24, PointerVectorSizeFieldInfo[1] + PointerVectorSizeFieldInfo[0]};
 
   uint64_t IsPointer : 1;
   uint64_t IsVector : 1;
diff --git a/include/llvm/Support/MSVCErrorWorkarounds.h b/include/llvm/Support/MSVCErrorWorkarounds.h
index 053ecf64d1e9..30e8febae20b 100644
--- a/include/llvm/Support/MSVCErrorWorkarounds.h
+++ b/include/llvm/Support/MSVCErrorWorkarounds.h
@@ -1,9 +1,8 @@
 //===--- MSVCErrorWorkarounds.h - Enable future<Error> in MSVC --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/MachineValueType.h b/include/llvm/Support/MachineValueType.h
index 552dea05029c..b94d2c4836cc 100644
--- a/include/llvm/Support/MachineValueType.h
+++ b/include/llvm/Support/MachineValueType.h
@@ -1,9 +1,8 @@
 //===- Support/MachineValueType.h - Machine-Level types ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -87,58 +86,65 @@ namespace llvm {
       v64i16         =  39,   // 64 x i16
       v128i16        =  40,   //128 x i16
 
-      v1i32          =  41,   //  1 x i32
-      v2i32          =  42,   //  2 x i32
-      v4i32          =  43,   //  4 x i32
-      v8i32          =  44,   //  8 x i32
-      v16i32         =  45,   // 16 x i32
-      v32i32         =  46,   // 32 x i32
-      v64i32         =  47,   // 64 x i32
-
-      v1i64          =  48,   //  1 x i64
-      v2i64          =  49,   //  2 x i64
-      v4i64          =  50,   //  4 x i64
-      v8i64          =  51,   //  8 x i64
-      v16i64         =  52,   // 16 x i64
-      v32i64         =  53,   // 32 x i64
-
-      v1i128         =  54,   //  1 x i128
+      v1i32          =  41,   //    1 x i32
+      v2i32          =  42,   //    2 x i32
+      v3i32          =  43,   //    3 x i32
+      v4i32          =  44,   //    4 x i32
+      v5i32          =  45,   //    5 x i32
+      v8i32          =  46,   //    8 x i32
+      v16i32         =  47,   //   16 x i32
+      v32i32         =  48,   //   32 x i32
+      v64i32         =  49,   //   64 x i32
+      v128i32        =  50,   //  128 x i32
+      v256i32        =  51,   //  256 x i32
+      v512i32        =  52,   //  512 x i32
+      v1024i32       =  53,   // 1024 x i32
+      v2048i32       =  54,   // 2048 x i32
+
+      v1i64          =  55,   //  1 x i64
+      v2i64          =  56,   //  2 x i64
+      v4i64          =  57,   //  4 x i64
+      v8i64          =  58,   //  8 x i64
+      v16i64         =  59,   // 16 x i64
+      v32i64         =  60,   // 32 x i64
+
+      v1i128         =  61,   //  1 x i128
 
       // Scalable integer types
-      nxv1i1         =  55,   // n x  1 x i1
-      nxv2i1         =  56,   // n x  2 x i1
-      nxv4i1         =  57,   // n x  4 x i1
-      nxv8i1         =  58,   // n x  8 x i1
-      nxv16i1        =  59,   // n x 16 x i1
-      nxv32i1        =  60,   // n x 32 x i1
-
-      nxv1i8         =  61,   // n x  1 x i8
-      nxv2i8         =  62,   // n x  2 x i8
-      nxv4i8         =  63,   // n x  4 x i8
-      nxv8i8         =  64,   // n x  8 x i8
-      nxv16i8        =  65,   // n x 16 x i8
-      nxv32i8        =  66,   // n x 32 x i8
-
-      nxv1i16        =  67,   // n x  1 x i16
-      nxv2i16        =  68,   // n x  2 x i16
-      nxv4i16        =  69,   // n x  4 x i16
-      nxv8i16        =  70,   // n x  8 x i16
-      nxv16i16       =  71,   // n x 16 x i16
-      nxv32i16       =  72,   // n x 32 x i16
-
-      nxv1i32        =  73,   // n x  1 x i32
-      nxv2i32        =  74,   // n x  2 x i32
-      nxv4i32        =  75,   // n x  4 x i32
-      nxv8i32        =  76,   // n x  8 x i32
-      nxv16i32       =  77,   // n x 16 x i32
-      nxv32i32       =  78,   // n x 32 x i32
-
-      nxv1i64        =  79,   // n x  1 x i64
-      nxv2i64        =  80,   // n x  2 x i64
-      nxv4i64        =  81,   // n x  4 x i64
-      nxv8i64        =  82,   // n x  8 x i64
-      nxv16i64       =  83,   // n x 16 x i64
-      nxv32i64       =  84,   // n x 32 x i64
+      nxv1i1         =  62,   // n x  1 x i1
+      nxv2i1         =  63,   // n x  2 x i1
+      nxv4i1         =  64,   // n x  4 x i1
+      nxv8i1         =  65,   // n x  8 x i1
+      nxv16i1        =  66,   // n x 16 x i1
+      nxv32i1        =  67,   // n x 32 x i1
+
+      nxv1i8         =  68,   // n x  1 x i8
+      nxv2i8         =  69,   // n x  2 x i8
+      nxv4i8         =  70,   // n x  4 x i8
+      nxv8i8         =  71,   // n x  8 x i8
+      nxv16i8        =  72,   // n x 16 x i8
+      nxv32i8        =  73,   // n x 32 x i8
+
+      nxv1i16        =  74,   // n x  1 x i16
+      nxv2i16        =  75,   // n x  2 x i16
+      nxv4i16        =  76,   // n x  4 x i16
+      nxv8i16        =  77,   // n x  8 x i16
+      nxv16i16       =  78,   // n x 16 x i16
+      nxv32i16       =  79,   // n x 32 x i16
+
+      nxv1i32        =  80,   // n x  1 x i32
+      nxv2i32        =  81,   // n x  2 x i32
+      nxv4i32        =  82,   // n x  4 x i32
+      nxv8i32        =  83,   // n x  8 x i32
+      nxv16i32       =  84,   // n x 16 x i32
+      nxv32i32       =  85,   // n x 32 x i32
+
+      nxv1i64        =  86,   // n x  1 x i64
+      nxv2i64        =  87,   // n x  2 x i64
+      nxv4i64        =  88,   // n x  4 x i64
+      nxv8i64        =  89,   // n x  8 x i64
+      nxv16i64       =  90,   // n x 16 x i64
+      nxv32i64       =  91,   // n x 32 x i64
 
       FIRST_INTEGER_VECTOR_VALUETYPE = v1i1,
       LAST_INTEGER_VECTOR_VALUETYPE = nxv32i64,
@@ -146,31 +152,40 @@ namespace llvm {
       FIRST_INTEGER_SCALABLE_VALUETYPE = nxv1i1,
       LAST_INTEGER_SCALABLE_VALUETYPE = nxv32i64,
 
-      v2f16          =  85,   //  2 x f16
-      v4f16          =  86,   //  4 x f16
-      v8f16          =  87,   //  8 x f16
-      v1f32          =  88,   //  1 x f32
-      v2f32          =  89,   //  2 x f32
-      v4f32          =  90,   //  4 x f32
-      v8f32          =  91,   //  8 x f32
-      v16f32         =  92,   // 16 x f32
-      v1f64          =  93,   //  1 x f64
-      v2f64          =  94,   //  2 x f64
-      v4f64          =  95,   //  4 x f64
-      v8f64          =  96,   //  8 x f64
-
-      nxv2f16        =  97,   // n x  2 x f16
-      nxv4f16        =  98,   // n x  4 x f16
-      nxv8f16        =  99,   // n x  8 x f16
-      nxv1f32        = 100,   // n x  1 x f32
-      nxv2f32        = 101,   // n x  2 x f32
-      nxv4f32        = 102,   // n x  4 x f32
-      nxv8f32        = 103,   // n x  8 x f32
-      nxv16f32       = 104,   // n x 16 x f32
-      nxv1f64        = 105,   // n x  1 x f64
-      nxv2f64        = 106,   // n x  2 x f64
-      nxv4f64        = 107,   // n x  4 x f64
-      nxv8f64        = 108,   // n x  8 x f64
+      v2f16          =  92,   //    2 x f16
+      v4f16          =  93,   //    4 x f16
+      v8f16          =  94,   //    8 x f16
+      v1f32          =  95,   //    1 x f32
+      v2f32          =  96,   //    2 x f32
+      v3f32          =  97,   //    3 x f32
+      v4f32          =  98,   //    4 x f32
+      v5f32          =  99,   //    5 x f32
+      v8f32          =  100,  //    8 x f32
+      v16f32         =  101,  //   16 x f32
+      v32f32         =  102,  //   32 x f32
+      v64f32         =  103,  //   64 x f32
+      v128f32        =  104,  //  128 x f32
+      v256f32        =  105,  //  256 x f32
+      v512f32        =  106,  //  512 x f32
+      v1024f32       =  107,  // 1024 x f32
+      v2048f32       =  108,  // 2048 x f32
+      v1f64          =  109,  //    1 x f64
+      v2f64          =  110,  //    2 x f64
+      v4f64          =  111,  //    4 x f64
+      v8f64          =  112,  //    8 x f64
+
+      nxv2f16        =  113,  // n x  2 x f16
+      nxv4f16        =  114,  // n x  4 x f16
+      nxv8f16        =  115,  // n x  8 x f16
+      nxv1f32        =  116,  // n x  1 x f32
+      nxv2f32        =  117,  // n x  2 x f32
+      nxv4f32        =  118,  // n x  4 x f32
+      nxv8f32        =  119,  // n x  8 x f32
+      nxv16f32       =  120,  // n x 16 x f32
+      nxv1f64        =  121,  // n x  1 x f64
+      nxv2f64        =  122,  // n x  2 x f64
+      nxv4f64        =  123,  // n x  4 x f64
+      nxv8f64        =  124,  // n x  8 x f64
 
       FIRST_FP_VECTOR_VALUETYPE = v2f16,
       LAST_FP_VECTOR_VALUETYPE = nxv8f64,
@@ -181,25 +196,25 @@ namespace llvm {
       FIRST_VECTOR_VALUETYPE = v1i1,
       LAST_VECTOR_VALUETYPE  = nxv8f64,
 
-      x86mmx         =  109,   // This is an X86 MMX value
+      x86mmx         =  125,   // This is an X86 MMX value
 
-      Glue           =  110,   // This glues nodes together during pre-RA sched
+      Glue           =  126,   // This glues nodes together during pre-RA sched
 
-      isVoid         =  111,   // This has no value
+      isVoid         =  127,   // This has no value
 
-      Untyped        =  112,   // This value takes a register, but has
+      Untyped        =  128,   // This value takes a register, but has
                                // unspecified type.  The register class
                                // will be determined by the opcode.
 
-      ExceptRef      = 113,    // WebAssembly's except_ref type
+      exnref         =  129,   // WebAssembly's exnref type
 
       FIRST_VALUETYPE = 1,     // This is always the beginning of the list.
-      LAST_VALUETYPE =  114,   // This always remains at the end of the list.
+      LAST_VALUETYPE =  130,   // This always remains at the end of the list.
 
       // This is the current maximum for LAST_VALUETYPE.
       // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
       // This value must be a multiple of 32.
-      MAX_ALLOWED_VALUETYPE = 128,
+      MAX_ALLOWED_VALUETYPE = 160,
 
       // A value of type llvm::TokenTy
       token          = 248,
@@ -464,11 +479,18 @@ namespace llvm {
       case nxv32i16: return i16;
       case v1i32:
       case v2i32:
+      case v3i32:
       case v4i32:
+      case v5i32:
       case v8i32:
       case v16i32:
       case v32i32:
       case v64i32:
+      case v128i32:
+      case v256i32:
+      case v512i32:
+      case v1024i32:
+      case v2048i32:
       case nxv1i32:
       case nxv2i32:
       case nxv4i32:
@@ -496,9 +518,18 @@ namespace llvm {
       case nxv8f16: return f16;
       case v1f32:
       case v2f32:
+      case v3f32:
       case v4f32:
+      case v5f32:
       case v8f32:
       case v16f32:
+      case v32f32:
+      case v64f32:
+      case v128f32:
+      case v256f32:
+      case v512f32:
+      case v1024f32:
+      case v2048f32:
       case nxv1f32:
       case nxv2f32:
       case nxv4f32:
@@ -519,21 +550,33 @@ namespace llvm {
       switch (SimpleTy) {
       default:
         llvm_unreachable("Not a vector MVT!");
-      case v1024i1: return 1024;
-      case v512i1: return 512;
-      case v256i8: return 256;
+      case v2048i32:
+      case v2048f32: return 2048;
+      case v1024i1:
+      case v1024i32:
+      case v1024f32: return 1024;
+      case v512i1:
+      case v512i32:
+      case v512f32: return 512;
+      case v256i8:
+      case v256i32:
+      case v256f32: return 256;
       case v128i1:
       case v128i8:
-      case v128i16: return 128;
+      case v128i16:
+      case v128i32:
+      case v128f32: return 128;
       case v64i1:
       case v64i8:
       case v64i16:
-      case v64i32: return 64;
+      case v64i32:
+      case v64f32: return 64;
       case v32i1:
       case v32i8:
       case v32i16:
       case v32i32:
       case v32i64:
+      case v32f32:
       case nxv32i1:
       case nxv32i8:
       case nxv32i16:
@@ -567,6 +610,8 @@ namespace llvm {
       case nxv8f16:
       case nxv8f32:
       case nxv8f64: return 8;
+      case v5i32:
+      case v5f32: return 5;
       case v4i1:
       case v4i8:
       case v4i16:
@@ -583,6 +628,8 @@ namespace llvm {
       case nxv4f16:
       case nxv4f32:
       case nxv4f64: return 4;
+      case v3i32:
+      case v3f32: return 3;
       case v2i1:
       case v2i8:
       case v2i16:
@@ -693,6 +740,8 @@ namespace llvm {
       case nxv2f32:
       case nxv1f64: return 64;
       case f80 :  return 80;
+      case v3i32:
+      case v3f32: return 96;
       case f128:
       case ppcf128:
       case i128:
@@ -712,6 +761,8 @@ namespace llvm {
       case nxv8f16:
       case nxv4f32:
       case nxv2f64: return 128;
+      case v5i32:
+      case v5f32: return 160;
       case v32i8:
       case v16i16:
       case v8i32:
@@ -741,14 +792,26 @@ namespace llvm {
       case v64i16:
       case v32i32:
       case v16i64:
+      case v32f32:
       case nxv32i32:
       case nxv16i64: return 1024;
       case v256i8:
       case v128i16:
       case v64i32:
       case v32i64:
+      case v64f32:
       case nxv32i64: return 2048;
-      case ExceptRef: return 0; // opaque type
+      case v128i32:
+      case v128f32:  return 4096;
+      case v256i32:
+      case v256f32:  return 8192;
+      case v512i32:
+      case v512f32:  return 16384;
+      case v1024i32:
+      case v1024f32:  return 32768;
+      case v2048i32:
+      case v2048f32:  return 65536;
+      case exnref: return 0; // opaque type
       }
     }
 
@@ -862,13 +925,20 @@ namespace llvm {
         if (NumElements == 128) return MVT::v128i16;
         break;
       case MVT::i32:
-        if (NumElements == 1)  return MVT::v1i32;
-        if (NumElements == 2)  return MVT::v2i32;
-        if (NumElements == 4)  return MVT::v4i32;
-        if (NumElements == 8)  return MVT::v8i32;
-        if (NumElements == 16) return MVT::v16i32;
-        if (NumElements == 32) return MVT::v32i32;
-        if (NumElements == 64) return MVT::v64i32;
+        if (NumElements == 1)    return MVT::v1i32;
+        if (NumElements == 2)    return MVT::v2i32;
+        if (NumElements == 3)    return MVT::v3i32;
+        if (NumElements == 4)    return MVT::v4i32;
+        if (NumElements == 5)    return MVT::v5i32;
+        if (NumElements == 8)    return MVT::v8i32;
+        if (NumElements == 16)   return MVT::v16i32;
+        if (NumElements == 32)   return MVT::v32i32;
+        if (NumElements == 64)   return MVT::v64i32;
+        if (NumElements == 128)  return MVT::v128i32;
+        if (NumElements == 256)  return MVT::v256i32;
+        if (NumElements == 512)  return MVT::v512i32;
+        if (NumElements == 1024) return MVT::v1024i32;
+        if (NumElements == 2048) return MVT::v2048i32;
         break;
       case MVT::i64:
         if (NumElements == 1)  return MVT::v1i64;
@@ -887,11 +957,20 @@ namespace llvm {
         if (NumElements == 8)  return MVT::v8f16;
         break;
       case MVT::f32:
-        if (NumElements == 1)  return MVT::v1f32;
-        if (NumElements == 2)  return MVT::v2f32;
-        if (NumElements == 4)  return MVT::v4f32;
-        if (NumElements == 8)  return MVT::v8f32;
-        if (NumElements == 16) return MVT::v16f32;
+        if (NumElements == 1)    return MVT::v1f32;
+        if (NumElements == 2)    return MVT::v2f32;
+        if (NumElements == 3)    return MVT::v3f32;
+        if (NumElements == 4)    return MVT::v4f32;
+        if (NumElements == 5)    return MVT::v5f32;
+        if (NumElements == 8)    return MVT::v8f32;
+        if (NumElements == 16)   return MVT::v16f32;
+        if (NumElements == 32)   return MVT::v32f32;
+        if (NumElements == 64)   return MVT::v64f32;
+        if (NumElements == 128)  return MVT::v128f32;
+        if (NumElements == 256)  return MVT::v256f32;
+        if (NumElements == 512)  return MVT::v512f32;
+        if (NumElements == 1024) return MVT::v1024f32;
+        if (NumElements == 2048) return MVT::v2048f32;
         break;
       case MVT::f64:
         if (NumElements == 1)  return MVT::v1f64;
diff --git a/include/llvm/Support/ManagedStatic.h b/include/llvm/Support/ManagedStatic.h
index b4bf3210cc73..e65bb051f181 100644
--- a/include/llvm/Support/ManagedStatic.h
+++ b/include/llvm/Support/ManagedStatic.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/ManagedStatic.h - Static Global wrapper ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -33,18 +32,41 @@ template <typename T, size_t N> struct object_deleter<T[N]> {
   static void call(void *Ptr) { delete[](T *)Ptr; }
 };
 
+// ManagedStatic must be initialized to zero, and it must *not* have a dynamic
+// initializer because managed statics are often created while running other
+// dynamic initializers. In standard C++11, the best way to accomplish this is
+// with a constexpr default constructor. However, different versions of the
+// Visual C++ compiler have had bugs where, even though the constructor may be
+// constexpr, a dynamic initializer may be emitted depending on optimization
+// settings. For the affected versions of MSVC, use the old linker
+// initialization pattern of not providing a constructor and leaving the fields
+// uninitialized.
+#if !defined(_MSC_VER) || defined(__clang__)
+#define LLVM_USE_CONSTEXPR_CTOR
+#endif
+
 /// ManagedStaticBase - Common base class for ManagedStatic instances.
 class ManagedStaticBase {
 protected:
+#ifdef LLVM_USE_CONSTEXPR_CTOR
+  mutable std::atomic<void *> Ptr{};
+  mutable void (*DeleterFn)(void *) = nullptr;
+  mutable const ManagedStaticBase *Next = nullptr;
+#else
   // This should only be used as a static variable, which guarantees that this
   // will be zero initialized.
   mutable std::atomic<void *> Ptr;
-  mutable void (*DeleterFn)(void*);
+  mutable void (*DeleterFn)(void *);
   mutable const ManagedStaticBase *Next;
+#endif
 
   void RegisterManagedStatic(void *(*creator)(), void (*deleter)(void*)) const;
 
 public:
+#ifdef LLVM_USE_CONSTEXPR_CTOR
+  constexpr ManagedStaticBase() = default;
+#endif
+
   /// isConstructed - Return true if this object has not been created yet.
   bool isConstructed() const { return Ptr != nullptr; }
 
diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h
index b59f21b4998e..249139e824b5 100644
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -52,14 +51,14 @@ enum ZeroBehavior {
 
 namespace detail {
 template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
-  static std::size_t count(T Val, ZeroBehavior) {
+  static unsigned count(T Val, ZeroBehavior) {
     if (!Val)
       return std::numeric_limits<T>::digits;
     if (Val & 0x1)
       return 0;
 
     // Bisection method.
-    std::size_t ZeroBits = 0;
+    unsigned ZeroBits = 0;
     T Shift = std::numeric_limits<T>::digits >> 1;
     T Mask = std::numeric_limits<T>::max() >> Shift;
     while (Shift) {
@@ -76,7 +75,7 @@ template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
 
 #if __GNUC__ >= 4 || defined(_MSC_VER)
 template <typename T> struct TrailingZerosCounter<T, 4> {
-  static std::size_t count(T Val, ZeroBehavior ZB) {
+  static unsigned count(T Val, ZeroBehavior ZB) {
     if (ZB != ZB_Undefined && Val == 0)
       return 32;
 
@@ -92,7 +91,7 @@ template <typename T> struct TrailingZerosCounter<T, 4> {
 
 #if !defined(_MSC_VER) || defined(_M_X64)
 template <typename T> struct TrailingZerosCounter<T, 8> {
-  static std::size_t count(T Val, ZeroBehavior ZB) {
+  static unsigned count(T Val, ZeroBehavior ZB) {
     if (ZB != ZB_Undefined && Val == 0)
       return 64;
 
@@ -117,7 +116,7 @@ template <typename T> struct TrailingZerosCounter<T, 8> {
 /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
 ///   valid arguments.
 template <typename T>
-std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
+unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
   static_assert(std::numeric_limits<T>::is_integer &&
                     !std::numeric_limits<T>::is_signed,
                 "Only unsigned integral types are allowed.");
@@ -126,12 +125,12 @@ std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
 
 namespace detail {
 template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
-  static std::size_t count(T Val, ZeroBehavior) {
+  static unsigned count(T Val, ZeroBehavior) {
     if (!Val)
       return std::numeric_limits<T>::digits;
 
     // Bisection method.
-    std::size_t ZeroBits = 0;
+    unsigned ZeroBits = 0;
     for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
       T Tmp = Val >> Shift;
       if (Tmp)
@@ -145,7 +144,7 @@ template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
 
 #if __GNUC__ >= 4 || defined(_MSC_VER)
 template <typename T> struct LeadingZerosCounter<T, 4> {
-  static std::size_t count(T Val, ZeroBehavior ZB) {
+  static unsigned count(T Val, ZeroBehavior ZB) {
     if (ZB != ZB_Undefined && Val == 0)
       return 32;
 
@@ -161,7 +160,7 @@ template <typename T> struct LeadingZerosCounter<T, 4> {
 
 #if !defined(_MSC_VER) || defined(_M_X64)
 template <typename T> struct LeadingZerosCounter<T, 8> {
-  static std::size_t count(T Val, ZeroBehavior ZB) {
+  static unsigned count(T Val, ZeroBehavior ZB) {
     if (ZB != ZB_Undefined && Val == 0)
       return 64;
 
@@ -186,7 +185,7 @@ template <typename T> struct LeadingZerosCounter<T, 8> {
 /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
 ///   valid arguments.
 template <typename T>
-std::size_t countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
+unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
   static_assert(std::numeric_limits<T>::is_integer &&
                     !std::numeric_limits<T>::is_signed,
                 "Only unsigned integral types are allowed.");
@@ -459,7 +458,7 @@ inline uint64_t ByteSwap_64(uint64_t Value) {
 /// \param ZB the behavior on an input of all ones. Only ZB_Width and
 /// ZB_Undefined are valid arguments.
 template <typename T>
-std::size_t countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
+unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
   static_assert(std::numeric_limits<T>::is_integer &&
                     !std::numeric_limits<T>::is_signed,
                 "Only unsigned integral types are allowed.");
@@ -475,7 +474,7 @@ std::size_t countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
 /// \param ZB the behavior on an input of all ones. Only ZB_Width and
 /// ZB_Undefined are valid arguments.
 template <typename T>
-std::size_t countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
+unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
   static_assert(std::numeric_limits<T>::is_integer &&
                     !std::numeric_limits<T>::is_signed,
                 "Only unsigned integral types are allowed.");
@@ -560,15 +559,20 @@ inline unsigned Log2_64_Ceil(uint64_t Value) {
 }
 
 /// Return the greatest common divisor of the values using Euclid's algorithm.
-inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
+template <typename T>
+inline T greatestCommonDivisor(T A, T B) {
   while (B) {
-    uint64_t T = B;
+    T Tmp = B;
     B = A % B;
-    A = T;
+    A = Tmp;
   }
   return A;
 }
 
+inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
+  return greatestCommonDivisor<uint64_t>(A, B);
+}
+
 /// This function takes a 64-bit integer and returns the bit equivalent double.
 inline double BitsToDouble(uint64_t Bits) {
   double D;
diff --git a/include/llvm/Support/MemAlloc.h b/include/llvm/Support/MemAlloc.h
index d06c659cfba6..0e5869141fd3 100644
--- a/include/llvm/Support/MemAlloc.h
+++ b/include/llvm/Support/MemAlloc.h
@@ -1,9 +1,8 @@
 //===- MemAlloc.h - Memory allocation functions -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -25,23 +24,41 @@ namespace llvm {
 
 LLVM_ATTRIBUTE_RETURNS_NONNULL inline void *safe_malloc(size_t Sz) {
   void *Result = std::malloc(Sz);
-  if (Result == nullptr)
+  if (Result == nullptr) {
+    // It is implementation-defined whether allocation occurs if the space
+    // requested is zero (ISO/IEC 9899:2018 7.22.3). Retry, requesting
+    // non-zero, if the space requested was zero.
+    if (Sz == 0)
+      return safe_malloc(1);
     report_bad_alloc_error("Allocation failed");
+  }
   return Result;
 }
 
 LLVM_ATTRIBUTE_RETURNS_NONNULL inline void *safe_calloc(size_t Count,
                                                         size_t Sz) {
   void *Result = std::calloc(Count, Sz);
-  if (Result == nullptr)
+  if (Result == nullptr) {
+    // It is implementation-defined whether allocation occurs if the space
+    // requested is zero (ISO/IEC 9899:2018 7.22.3). Retry, requesting
+    // non-zero, if the space requested was zero.
+    if (Count == 0 || Sz == 0)
+      return safe_malloc(1);
     report_bad_alloc_error("Allocation failed");
+  }
   return Result;
 }
 
 LLVM_ATTRIBUTE_RETURNS_NONNULL inline void *safe_realloc(void *Ptr, size_t Sz) {
   void *Result = std::realloc(Ptr, Sz);
-  if (Result == nullptr)
+  if (Result == nullptr) {
+    // It is implementation-defined whether allocation occurs if the space
+    // requested is zero (ISO/IEC 9899:2018 7.22.3). Retry, requesting
+    // non-zero, if the space requested was zero.
+    if (Sz == 0)
+      return safe_malloc(1);
     report_bad_alloc_error("Allocation failed");
+  }
   return Result;
 }
 
diff --git a/include/llvm/Support/Memory.h b/include/llvm/Support/Memory.h
index fa026d49a61b..6f22dd7080cd 100644
--- a/include/llvm/Support/Memory.h
+++ b/include/llvm/Support/Memory.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/Memory.h - Memory Support -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,6 +18,10 @@
 #include <system_error>
 
 namespace llvm {
+
+// Forward declare raw_ostream: it is used for debug dumping below.
+class raw_ostream;
+
 namespace sys {
 
   /// This class encapsulates the notion of a memory block which has an address
@@ -28,14 +31,18 @@ namespace sys {
   /// Memory block abstraction.
   class MemoryBlock {
   public:
-    MemoryBlock() : Address(nullptr), Size(0) { }
-    MemoryBlock(void *addr, size_t size) : Address(addr), Size(size) { }
+    MemoryBlock() : Address(nullptr), AllocatedSize(0) {}
+    MemoryBlock(void *addr, size_t allocatedSize)
+        : Address(addr), AllocatedSize(allocatedSize) {}
     void *base() const { return Address; }
-    size_t size() const { return Size; }
-
+    /// The size as it was allocated. This is always greater or equal to the
+    /// size that was originally requested.
+    size_t allocatedSize() const { return AllocatedSize; }
+  
   private:
     void *Address;    ///< Address of first byte of memory area
-    size_t Size;      ///< Size, in bytes of the memory area
+    size_t AllocatedSize; ///< Size, in bytes of the memory area
+    unsigned Flags = 0;
     friend class Memory;
   };
 
@@ -46,9 +53,11 @@ namespace sys {
   class Memory {
   public:
     enum ProtectionFlags {
-      MF_READ  = 0x1000000,
+      MF_READ = 0x1000000,
       MF_WRITE = 0x2000000,
-      MF_EXEC  = 0x4000000
+      MF_EXEC = 0x4000000,
+      MF_RWE_MASK = 0x7000000,
+      MF_HUGE_HINT = 0x0000001
     };
 
     /// This method allocates a block of memory that is suitable for loading
@@ -133,13 +142,22 @@ namespace sys {
       Memory::releaseMappedMemory(M);
     }
     void *base() const { return M.base(); }
-    size_t size() const { return M.size(); }
+    /// The size as it was allocated. This is always greater or equal to the
+    /// size that was originally requested.
+    size_t allocatedSize() const { return M.allocatedSize(); }
     MemoryBlock getMemoryBlock() const { return M; }
   private:
     MemoryBlock M;
   };
 
-}
-}
+#ifndef NDEBUG
+  /// Debugging output for Memory::ProtectionFlags.
+  raw_ostream &operator<<(raw_ostream &OS, const Memory::ProtectionFlags &PF);
+
+  /// Debugging output for MemoryBlock.
+  raw_ostream &operator<<(raw_ostream &OS, const MemoryBlock &MB);
+#endif // ifndef NDEBUG
+  }    // end namespace sys
+  }    // end namespace llvm
 
 #endif
diff --git a/include/llvm/Support/MemoryBuffer.h b/include/llvm/Support/MemoryBuffer.h
index 8933295d4ea4..b5196cd84cb4 100644
--- a/include/llvm/Support/MemoryBuffer.h
+++ b/include/llvm/Support/MemoryBuffer.h
@@ -1,9 +1,8 @@
 //===--- MemoryBuffer.h - Memory Buffer Interface ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -91,7 +90,7 @@ public:
   /// MemoryBuffer. The slice is specified by an \p Offset and \p MapSize.
   /// Since this is in the middle of a file, the buffer is not null terminated.
   static ErrorOr<std::unique_ptr<MemoryBuffer>>
-  getOpenFileSlice(int FD, const Twine &Filename, uint64_t MapSize,
+  getOpenFileSlice(sys::fs::file_t FD, const Twine &Filename, uint64_t MapSize,
                    int64_t Offset, bool IsVolatile = false);
 
   /// Given an already-open file descriptor, read the file and return a
@@ -101,7 +100,7 @@ public:
   /// can change outside the user's control, e.g. when libclang tries to parse
   /// while the user is editing/updating the file or if the file is on an NFS.
   static ErrorOr<std::unique_ptr<MemoryBuffer>>
-  getOpenFile(int FD, const Twine &Filename, uint64_t FileSize,
+  getOpenFile(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
               bool RequiresNullTerminator = true, bool IsVolatile = false);
 
   /// Open the specified memory range as a MemoryBuffer. Note that InputData
@@ -265,7 +264,7 @@ class MemoryBufferRef {
 
 public:
   MemoryBufferRef() = default;
-  MemoryBufferRef(MemoryBuffer& Buffer)
+  MemoryBufferRef(const MemoryBuffer& Buffer)
       : Buffer(Buffer.getBuffer()), Identifier(Buffer.getBufferIdentifier()) {}
   MemoryBufferRef(StringRef Buffer, StringRef Identifier)
       : Buffer(Buffer), Identifier(Identifier) {}
diff --git a/include/llvm/Support/MipsABIFlags.h b/include/llvm/Support/MipsABIFlags.h
index 12c350015b21..d3233f645fb9 100644
--- a/include/llvm/Support/MipsABIFlags.h
+++ b/include/llvm/Support/MipsABIFlags.h
@@ -1,9 +1,8 @@
 //===--- MipsABIFlags.h - MIPS ABI flags ----------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/Mutex.h b/include/llvm/Support/Mutex.h
index 680d94b24ef5..c3abfc7a7806 100644
--- a/include/llvm/Support/Mutex.h
+++ b/include/llvm/Support/Mutex.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/Mutex.h - Mutex Operating System Concept -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/MutexGuard.h b/include/llvm/Support/MutexGuard.h
index 641d64d94988..d86ced145816 100644
--- a/include/llvm/Support/MutexGuard.h
+++ b/include/llvm/Support/MutexGuard.h
@@ -1,9 +1,8 @@
 //===-- Support/MutexGuard.h - Acquire/Release Mutex In Scope ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/NativeFormatting.h b/include/llvm/Support/NativeFormatting.h
index 6d1dd7b422fe..825a44c77c00 100644
--- a/include/llvm/Support/NativeFormatting.h
+++ b/include/llvm/Support/NativeFormatting.h
@@ -1,9 +1,8 @@
 //===- NativeFormatting.h - Low level formatting helpers ---------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/OnDiskHashTable.h b/include/llvm/Support/OnDiskHashTable.h
index 912e2700d1a0..d84da92aab9b 100644
--- a/include/llvm/Support/OnDiskHashTable.h
+++ b/include/llvm/Support/OnDiskHashTable.h
@@ -1,9 +1,8 @@
 //===--- OnDiskHashTable.h - On-Disk Hash Table Implementation --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/include/llvm/Support/Options.h b/include/llvm/Support/Options.h
index dd321c6a1984..d02ef85a75bf 100644
--- a/include/llvm/Support/Options.h
+++ b/include/llvm/Support/Options.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/Options.h - Debug options support -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Support/Parallel.h b/include/llvm/Support/Parallel.h
index 1462265343be..eab9b492c4a5 100644
--- a/include/llvm/Support/Parallel.h
+++ b/include/llvm/Support/Parallel.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/Parallel.h - Parallel algorithms ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -74,8 +73,12 @@ public:
 
 class TaskGroup {
   Latch L;
+  bool Parallel;
 
 public:
+  TaskGroup();
+  ~TaskGroup();
+
   void spawn(std::function<void()> f);
 
   void sync() const { L.sync(); }
diff --git a/include/llvm/Support/Path.h b/include/llvm/Support/Path.h
index 76de887b7cb4..5c0bee58f188 100644
--- a/include/llvm/Support/Path.h
+++ b/include/llvm/Support/Path.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/Path.h - Path Operating System Concept ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/PluginLoader.h b/include/llvm/Support/PluginLoader.h
index bdbb134b28eb..c0c516bdae03 100644
--- a/include/llvm/Support/PluginLoader.h
+++ b/include/llvm/Support/PluginLoader.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/PluginLoader.h - Plugin Loader for Tools ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/PointerLikeTypeTraits.h b/include/llvm/Support/PointerLikeTypeTraits.h
index 1710b57131d1..1e7e5b53ca65 100644
--- a/include/llvm/Support/PointerLikeTypeTraits.h
+++ b/include/llvm/Support/PointerLikeTypeTraits.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/PointerLikeTypeTraits.h - Pointer Traits ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/PrettyStackTrace.h b/include/llvm/Support/PrettyStackTrace.h
index 4d64fe4ef727..6eb070b2297e 100644
--- a/include/llvm/Support/PrettyStackTrace.h
+++ b/include/llvm/Support/PrettyStackTrace.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/PrettyStackTrace.h - Pretty Crash Handling --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -22,8 +21,22 @@
 namespace llvm {
   class raw_ostream;
 
+  /// Enables dumping a "pretty" stack trace when the program crashes.
+  ///
+  /// \see PrettyStackTraceEntry
   void EnablePrettyStackTrace();
 
+  /// Enables (or disables) dumping a "pretty" stack trace when the user sends
+  /// SIGINFO or SIGUSR1 to the current process.
+  ///
+  /// This is a per-thread decision so that a program can choose to print stack
+  /// traces only on a primary thread, or on all threads that use
+  /// PrettyStackTraceEntry.
+  ///
+  /// \see EnablePrettyStackTrace
+  /// \see PrettyStackTraceEntry
+  void EnablePrettyStackTraceOnSigInfoForThisThread(bool ShouldEnable = true);
+
   /// PrettyStackTraceEntry - This class is used to represent a frame of the
   /// "pretty" stack trace that is dumped when a program crashes. You can define
   /// subclasses of this and declare them on the program stack: when they are
diff --git a/include/llvm/Support/Printable.h b/include/llvm/Support/Printable.h
index cb55d41316e3..0f8670d0419c 100644
--- a/include/llvm/Support/Printable.h
+++ b/include/llvm/Support/Printable.h
@@ -1,9 +1,8 @@
 //===--- Printable.h - Print function helpers -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/Process.h b/include/llvm/Support/Process.h
index f9f1cac86278..67e37912519b 100644
--- a/include/llvm/Support/Process.h
+++ b/include/llvm/Support/Process.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/Process.h -----------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -29,6 +28,7 @@
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Chrono.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Error.h"
 #include <system_error>
 
 namespace llvm {
@@ -42,7 +42,25 @@ namespace sys {
 /// current executing process.
 class Process {
 public:
-  static unsigned getPageSize();
+  /// Get the process's page size.
+  /// This may fail if the underlying syscall returns an error. In most cases,
+  /// page size information is used for optimization, and this error can be
+  /// safely discarded by calling consumeError, and an estimated page size
+  /// substituted instead.
+  static Expected<unsigned> getPageSize();
+
+  /// Get the process's estimated page size.
+  /// This function always succeeds, but if the underlying syscall to determine
+  /// the page size fails then this will silently return an estimated page size.
+  /// The estimated page size is guaranteed to be a power of 2.
+  static unsigned getPageSizeEstimate() {
+    if (auto PageSize = getPageSize())
+      return *PageSize;
+    else {
+      consumeError(PageSize.takeError());
+      return 4096;
+    }
+  }
 
   /// Return process memory usage.
   /// This static function will return the total amount of memory allocated
diff --git a/include/llvm/Support/Program.h b/include/llvm/Support/Program.h
index 1f4dbdce3323..6b2315c5da8d 100644
--- a/include/llvm/Support/Program.h
+++ b/include/llvm/Support/Program.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/Program.h ------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/RWMutex.h b/include/llvm/Support/RWMutex.h
index 5ac3e558999b..9cd57cbd65a1 100644
--- a/include/llvm/Support/RWMutex.h
+++ b/include/llvm/Support/RWMutex.h
@@ -1,9 +1,8 @@
 //===- RWMutex.h - Reader/Writer Mutual Exclusion Lock ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/RandomNumberGenerator.h b/include/llvm/Support/RandomNumberGenerator.h
index 1399dab815f8..55d6876cc5e4 100644
--- a/include/llvm/Support/RandomNumberGenerator.h
+++ b/include/llvm/Support/RandomNumberGenerator.h
@@ -1,9 +1,8 @@
 //==- llvm/Support/RandomNumberGenerator.h - RNG for diversity ---*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/Recycler.h b/include/llvm/Support/Recycler.h
index 53db2e86d12d..bbd9ae321ae3 100644
--- a/include/llvm/Support/Recycler.h
+++ b/include/llvm/Support/Recycler.h
@@ -1,9 +1,8 @@
 //==- llvm/Support/Recycler.h - Recycling Allocator --------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/RecyclingAllocator.h b/include/llvm/Support/RecyclingAllocator.h
index 32b033b17946..2c29dacfe212 100644
--- a/include/llvm/Support/RecyclingAllocator.h
+++ b/include/llvm/Support/RecyclingAllocator.h
@@ -1,9 +1,8 @@
 //==- llvm/Support/RecyclingAllocator.h - Recycling Allocator ----*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/Regex.h b/include/llvm/Support/Regex.h
index d901eb1e3ffb..2d19b10fd890 100644
--- a/include/llvm/Support/Regex.h
+++ b/include/llvm/Support/Regex.h
@@ -1,9 +1,8 @@
 //===-- Regex.h - Regular Expression matcher implementation -*- C++ -*-----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/Registry.h b/include/llvm/Support/Registry.h
index 02fd5b9354a1..4d8aa5f1470d 100644
--- a/include/llvm/Support/Registry.h
+++ b/include/llvm/Support/Registry.h
@@ -1,9 +1,8 @@
 //=== Registry.h - Linker-supported plugin registries -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -81,17 +80,17 @@ namespace llvm {
 
     /// Iterators for registry entries.
     ///
-    class iterator {
+    class iterator
+        : public llvm::iterator_facade_base<iterator, std::forward_iterator_tag,
+                                            const entry> {
       const node *Cur;
 
     public:
       explicit iterator(const node *N) : Cur(N) {}
 
       bool operator==(const iterator &That) const { return Cur == That.Cur; }
-      bool operator!=(const iterator &That) const { return Cur != That.Cur; }
       iterator &operator++() { Cur = Cur->Next; return *this; }
       const entry &operator*() const { return Cur->Val; }
-      const entry *operator->() const { return &Cur->Val; }
     };
 
     // begin is not defined here in order to avoid usage of an undefined static
diff --git a/include/llvm/Support/SHA1.h b/include/llvm/Support/SHA1.h
index 1fc60a878f94..87fe94bbd5cd 100644
--- a/include/llvm/Support/SHA1.h
+++ b/include/llvm/Support/SHA1.h
@@ -1,9 +1,8 @@
 //==- SHA1.h - SHA1 implementation for LLVM                     --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This code is taken from public domain
diff --git a/include/llvm/Support/SMLoc.h b/include/llvm/Support/SMLoc.h
index c74feff378d6..d8607034ee86 100644
--- a/include/llvm/Support/SMLoc.h
+++ b/include/llvm/Support/SMLoc.h
@@ -1,9 +1,8 @@
 //===- SMLoc.h - Source location for use with diagnostics -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/SMTAPI.h b/include/llvm/Support/SMTAPI.h
new file mode 100644
index 000000000000..24dcd124593e
--- /dev/null
+++ b/include/llvm/Support/SMTAPI.h
@@ -0,0 +1,447 @@
+//===- SMTAPI.h -------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines a SMT generic Solver API, which will be the base class
+//  for every SMT solver specific class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_SMTAPI_H
+#define LLVM_SUPPORT_SMTAPI_H
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/raw_ostream.h"
+#include <memory>
+
+namespace llvm {
+
+/// Generic base class for SMT sorts
+class SMTSort {
+public:
+  SMTSort() = default;
+  virtual ~SMTSort() = default;
+
+  /// Returns true if the sort is a bitvector, calls isBitvectorSortImpl().
+  virtual bool isBitvectorSort() const { return isBitvectorSortImpl(); }
+
+  /// Returns true if the sort is a floating-point, calls isFloatSortImpl().
+  virtual bool isFloatSort() const { return isFloatSortImpl(); }
+
+  /// Returns true if the sort is a boolean, calls isBooleanSortImpl().
+  virtual bool isBooleanSort() const { return isBooleanSortImpl(); }
+
+  /// Returns the bitvector size, fails if the sort is not a bitvector
+  /// Calls getBitvectorSortSizeImpl().
+  virtual unsigned getBitvectorSortSize() const {
+    assert(isBitvectorSort() && "Not a bitvector sort!");
+    unsigned Size = getBitvectorSortSizeImpl();
+    assert(Size && "Size is zero!");
+    return Size;
+  };
+
+  /// Returns the floating-point size, fails if the sort is not a floating-point
+  /// Calls getFloatSortSizeImpl().
+  virtual unsigned getFloatSortSize() const {
+    assert(isFloatSort() && "Not a floating-point sort!");
+    unsigned Size = getFloatSortSizeImpl();
+    assert(Size && "Size is zero!");
+    return Size;
+  };
+
+  virtual void Profile(llvm::FoldingSetNodeID &ID) const = 0;
+
+  bool operator<(const SMTSort &Other) const {
+    llvm::FoldingSetNodeID ID1, ID2;
+    Profile(ID1);
+    Other.Profile(ID2);
+    return ID1 < ID2;
+  }
+
+  friend bool operator==(SMTSort const &LHS, SMTSort const &RHS) {
+    return LHS.equal_to(RHS);
+  }
+
+  virtual void print(raw_ostream &OS) const = 0;
+
+  LLVM_DUMP_METHOD void dump() const;
+
+protected:
+  /// Query the SMT solver and returns true if two sorts are equal (same kind
+  /// and bit width). This does not check if the two sorts are the same objects.
+  virtual bool equal_to(SMTSort const &other) const = 0;
+
+  /// Query the SMT solver and checks if a sort is bitvector.
+  virtual bool isBitvectorSortImpl() const = 0;
+
+  /// Query the SMT solver and checks if a sort is floating-point.
+  virtual bool isFloatSortImpl() const = 0;
+
+  /// Query the SMT solver and checks if a sort is boolean.
+  virtual bool isBooleanSortImpl() const = 0;
+
+  /// Query the SMT solver and returns the sort bit width.
+  virtual unsigned getBitvectorSortSizeImpl() const = 0;
+
+  /// Query the SMT solver and returns the sort bit width.
+  virtual unsigned getFloatSortSizeImpl() const = 0;
+};
+
+/// Shared pointer for SMTSorts, used by SMTSolver API.
+using SMTSortRef = const SMTSort *;
+
+/// Generic base class for SMT exprs
+class SMTExpr {
+public:
+  SMTExpr() = default;
+  virtual ~SMTExpr() = default;
+
+  bool operator<(const SMTExpr &Other) const {
+    llvm::FoldingSetNodeID ID1, ID2;
+    Profile(ID1);
+    Other.Profile(ID2);
+    return ID1 < ID2;
+  }
+
+  virtual void Profile(llvm::FoldingSetNodeID &ID) const = 0;
+
+  friend bool operator==(SMTExpr const &LHS, SMTExpr const &RHS) {
+    return LHS.equal_to(RHS);
+  }
+
+  virtual void print(raw_ostream &OS) const = 0;
+
+  LLVM_DUMP_METHOD void dump() const;
+
+protected:
+  /// Query the SMT solver and returns true if two sorts are equal (same kind
+  /// and bit width). This does not check if the two sorts are the same objects.
+  virtual bool equal_to(SMTExpr const &other) const = 0;
+};
+
+/// Shared pointer for SMTExprs, used by SMTSolver API.
+using SMTExprRef = const SMTExpr *;
+
+/// Generic base class for SMT Solvers
+///
+/// This class is responsible for wrapping all sorts and expression generation,
+/// through the mk* methods. It also provides methods to create SMT expressions
+/// straight from clang's AST, through the from* methods.
+class SMTSolver {
+public:
+  SMTSolver() = default;
+  virtual ~SMTSolver() = default;
+
+  LLVM_DUMP_METHOD void dump() const;
+
+  // Returns an appropriate floating-point sort for the given bitwidth.
+  SMTSortRef getFloatSort(unsigned BitWidth) {
+    switch (BitWidth) {
+    case 16:
+      return getFloat16Sort();
+    case 32:
+      return getFloat32Sort();
+    case 64:
+      return getFloat64Sort();
+    case 128:
+      return getFloat128Sort();
+    default:;
+    }
+    llvm_unreachable("Unsupported floating-point bitwidth!");
+  }
+
+  // Returns a boolean sort.
+  virtual SMTSortRef getBoolSort() = 0;
+
+  // Returns an appropriate bitvector sort for the given bitwidth.
+  virtual SMTSortRef getBitvectorSort(const unsigned BitWidth) = 0;
+
+  // Returns a floating-point sort of width 16
+  virtual SMTSortRef getFloat16Sort() = 0;
+
+  // Returns a floating-point sort of width 32
+  virtual SMTSortRef getFloat32Sort() = 0;
+
+  // Returns a floating-point sort of width 64
+  virtual SMTSortRef getFloat64Sort() = 0;
+
+  // Returns a floating-point sort of width 128
+  virtual SMTSortRef getFloat128Sort() = 0;
+
+  // Returns an appropriate sort for the given AST.
+  virtual SMTSortRef getSort(const SMTExprRef &AST) = 0;
+
+  /// Given a constraint, adds it to the solver
+  virtual void addConstraint(const SMTExprRef &Exp) const = 0;
+
+  /// Creates a bitvector addition operation
+  virtual SMTExprRef mkBVAdd(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a bitvector subtraction operation
+  virtual SMTExprRef mkBVSub(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a bitvector multiplication operation
+  virtual SMTExprRef mkBVMul(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a bitvector signed modulus operation
+  virtual SMTExprRef mkBVSRem(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a bitvector unsigned modulus operation
+  virtual SMTExprRef mkBVURem(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a bitvector signed division operation
+  virtual SMTExprRef mkBVSDiv(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a bitvector unsigned division operation
+  virtual SMTExprRef mkBVUDiv(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a bitvector logical shift left operation
+  virtual SMTExprRef mkBVShl(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a bitvector arithmetic shift right operation
+  virtual SMTExprRef mkBVAshr(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a bitvector logical shift right operation
+  virtual SMTExprRef mkBVLshr(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a bitvector negation operation
+  virtual SMTExprRef mkBVNeg(const SMTExprRef &Exp) = 0;
+
+  /// Creates a bitvector not operation
+  virtual SMTExprRef mkBVNot(const SMTExprRef &Exp) = 0;
+
+  /// Creates a bitvector xor operation
+  virtual SMTExprRef mkBVXor(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a bitvector or operation
+  virtual SMTExprRef mkBVOr(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a bitvector and operation
+  virtual SMTExprRef mkBVAnd(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a bitvector unsigned less-than operation
+  virtual SMTExprRef mkBVUlt(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a bitvector signed less-than operation
+  virtual SMTExprRef mkBVSlt(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a bitvector unsigned greater-than operation
+  virtual SMTExprRef mkBVUgt(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a bitvector signed greater-than operation
+  virtual SMTExprRef mkBVSgt(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a bitvector unsigned less-equal-than operation
+  virtual SMTExprRef mkBVUle(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a bitvector signed less-equal-than operation
+  virtual SMTExprRef mkBVSle(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a bitvector unsigned greater-equal-than operation
+  virtual SMTExprRef mkBVUge(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a bitvector signed greater-equal-than operation
+  virtual SMTExprRef mkBVSge(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a boolean not operation
+  virtual SMTExprRef mkNot(const SMTExprRef &Exp) = 0;
+
+  /// Creates a boolean equality operation
+  virtual SMTExprRef mkEqual(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a boolean and operation
+  virtual SMTExprRef mkAnd(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a boolean or operation
+  virtual SMTExprRef mkOr(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a boolean ite operation
+  virtual SMTExprRef mkIte(const SMTExprRef &Cond, const SMTExprRef &T,
+                           const SMTExprRef &F) = 0;
+
+  /// Creates a bitvector sign extension operation
+  virtual SMTExprRef mkBVSignExt(unsigned i, const SMTExprRef &Exp) = 0;
+
+  /// Creates a bitvector zero extension operation
+  virtual SMTExprRef mkBVZeroExt(unsigned i, const SMTExprRef &Exp) = 0;
+
+  /// Creates a bitvector extract operation
+  virtual SMTExprRef mkBVExtract(unsigned High, unsigned Low,
+                                 const SMTExprRef &Exp) = 0;
+
+  /// Creates a bitvector concat operation
+  virtual SMTExprRef mkBVConcat(const SMTExprRef &LHS,
+                                const SMTExprRef &RHS) = 0;
+
+  /// Creates a predicate that checks for overflow in a bitvector addition
+  /// operation
+  virtual SMTExprRef mkBVAddNoOverflow(const SMTExprRef &LHS,
+                                       const SMTExprRef &RHS,
+                                       bool isSigned) = 0;
+
+  /// Creates a predicate that checks for underflow in a signed bitvector
+  /// addition operation
+  virtual SMTExprRef mkBVAddNoUnderflow(const SMTExprRef &LHS,
+                                        const SMTExprRef &RHS) = 0;
+
+  /// Creates a predicate that checks for overflow in a signed bitvector
+  /// subtraction operation
+  virtual SMTExprRef mkBVSubNoOverflow(const SMTExprRef &LHS,
+                                       const SMTExprRef &RHS) = 0;
+
+  /// Creates a predicate that checks for underflow in a bitvector subtraction
+  /// operation
+  virtual SMTExprRef mkBVSubNoUnderflow(const SMTExprRef &LHS,
+                                        const SMTExprRef &RHS,
+                                        bool isSigned) = 0;
+
+  /// Creates a predicate that checks for overflow in a signed bitvector
+  /// division/modulus operation
+  virtual SMTExprRef mkBVSDivNoOverflow(const SMTExprRef &LHS,
+                                        const SMTExprRef &RHS) = 0;
+
+  /// Creates a predicate that checks for overflow in a bitvector negation
+  /// operation
+  virtual SMTExprRef mkBVNegNoOverflow(const SMTExprRef &Exp) = 0;
+
+  /// Creates a predicate that checks for overflow in a bitvector multiplication
+  /// operation
+  virtual SMTExprRef mkBVMulNoOverflow(const SMTExprRef &LHS,
+                                       const SMTExprRef &RHS,
+                                       bool isSigned) = 0;
+
+  /// Creates a predicate that checks for underflow in a signed bitvector
+  /// multiplication operation
+  virtual SMTExprRef mkBVMulNoUnderflow(const SMTExprRef &LHS,
+                                        const SMTExprRef &RHS) = 0;
+
+  /// Creates a floating-point negation operation
+  virtual SMTExprRef mkFPNeg(const SMTExprRef &Exp) = 0;
+
+  /// Creates a floating-point isInfinite operation
+  virtual SMTExprRef mkFPIsInfinite(const SMTExprRef &Exp) = 0;
+
+  /// Creates a floating-point isNaN operation
+  virtual SMTExprRef mkFPIsNaN(const SMTExprRef &Exp) = 0;
+
+  /// Creates a floating-point isNormal operation
+  virtual SMTExprRef mkFPIsNormal(const SMTExprRef &Exp) = 0;
+
+  /// Creates a floating-point isZero operation
+  virtual SMTExprRef mkFPIsZero(const SMTExprRef &Exp) = 0;
+
+  /// Creates a floating-point multiplication operation
+  virtual SMTExprRef mkFPMul(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a floating-point division operation
+  virtual SMTExprRef mkFPDiv(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a floating-point remainder operation
+  virtual SMTExprRef mkFPRem(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a floating-point addition operation
+  virtual SMTExprRef mkFPAdd(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a floating-point subtraction operation
+  virtual SMTExprRef mkFPSub(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a floating-point less-than operation
+  virtual SMTExprRef mkFPLt(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a floating-point greater-than operation
+  virtual SMTExprRef mkFPGt(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a floating-point less-than-or-equal operation
+  virtual SMTExprRef mkFPLe(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a floating-point greater-than-or-equal operation
+  virtual SMTExprRef mkFPGe(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+  /// Creates a floating-point equality operation
+  virtual SMTExprRef mkFPEqual(const SMTExprRef &LHS,
+                               const SMTExprRef &RHS) = 0;
+
+  /// Creates a floating-point conversion from floatint-point to floating-point
+  /// operation
+  virtual SMTExprRef mkFPtoFP(const SMTExprRef &From, const SMTSortRef &To) = 0;
+
+  /// Creates a floating-point conversion from signed bitvector to
+  /// floatint-point operation
+  virtual SMTExprRef mkSBVtoFP(const SMTExprRef &From,
+                               const SMTSortRef &To) = 0;
+
+  /// Creates a floating-point conversion from unsigned bitvector to
+  /// floatint-point operation
+  virtual SMTExprRef mkUBVtoFP(const SMTExprRef &From,
+                               const SMTSortRef &To) = 0;
+
+  /// Creates a floating-point conversion from floatint-point to signed
+  /// bitvector operation
+  virtual SMTExprRef mkFPtoSBV(const SMTExprRef &From, unsigned ToWidth) = 0;
+
+  /// Creates a floating-point conversion from floatint-point to unsigned
+  /// bitvector operation
+  virtual SMTExprRef mkFPtoUBV(const SMTExprRef &From, unsigned ToWidth) = 0;
+
+  /// Creates a new symbol, given a name and a sort
+  virtual SMTExprRef mkSymbol(const char *Name, SMTSortRef Sort) = 0;
+
+  // Returns an appropriate floating-point rounding mode.
+  virtual SMTExprRef getFloatRoundingMode() = 0;
+
+  // If the a model is available, returns the value of a given bitvector symbol
+  virtual llvm::APSInt getBitvector(const SMTExprRef &Exp, unsigned BitWidth,
+                                    bool isUnsigned) = 0;
+
+  // If the a model is available, returns the value of a given boolean symbol
+  virtual bool getBoolean(const SMTExprRef &Exp) = 0;
+
+  /// Constructs an SMTExprRef from a boolean.
+  virtual SMTExprRef mkBoolean(const bool b) = 0;
+
+  /// Constructs an SMTExprRef from a finite APFloat.
+  virtual SMTExprRef mkFloat(const llvm::APFloat Float) = 0;
+
+  /// Constructs an SMTExprRef from an APSInt and its bit width
+  virtual SMTExprRef mkBitvector(const llvm::APSInt Int, unsigned BitWidth) = 0;
+
+  /// Given an expression, extract the value of this operand in the model.
+  virtual bool getInterpretation(const SMTExprRef &Exp, llvm::APSInt &Int) = 0;
+
+  /// Given an expression extract the value of this operand in the model.
+  virtual bool getInterpretation(const SMTExprRef &Exp,
+                                 llvm::APFloat &Float) = 0;
+
+  /// Check if the constraints are satisfiable
+  virtual Optional<bool> check() const = 0;
+
+  /// Push the current solver state
+  virtual void push() = 0;
+
+  /// Pop the previous solver state
+  virtual void pop(unsigned NumStates = 1) = 0;
+
+  /// Reset the solver and remove all constraints.
+  virtual void reset() = 0;
+
+  /// Checks if the solver supports floating-points.
+  virtual bool isFPSupported() = 0;
+
+  virtual void print(raw_ostream &OS) const = 0;
+};
+
+/// Shared pointer for SMTSolvers.
+using SMTSolverRef = std::shared_ptr<SMTSolver>;
+
+/// Convenience method to create and Z3Solver object
+SMTSolverRef CreateZ3Solver();
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Support/SaveAndRestore.h b/include/llvm/Support/SaveAndRestore.h
index 8e11789907ad..3c0333b7119a 100644
--- a/include/llvm/Support/SaveAndRestore.h
+++ b/include/llvm/Support/SaveAndRestore.h
@@ -1,9 +1,8 @@
 //===-- SaveAndRestore.h - Utility  -------------------------------*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/include/llvm/Support/ScalableSize.h b/include/llvm/Support/ScalableSize.h
new file mode 100644
index 000000000000..96bf043773a0
--- /dev/null
+++ b/include/llvm/Support/ScalableSize.h
@@ -0,0 +1,43 @@
+//===- ScalableSize.h - Scalable vector size info ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides a struct that can be used to query the size of IR types
+// which may be scalable vectors. It provides convenience operators so that
+// it can be used in much the same way as a single scalar value.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_SCALABLESIZE_H
+#define LLVM_SUPPORT_SCALABLESIZE_H
+
+namespace llvm {
+
+class ElementCount {
+public:
+  unsigned Min;  // Minimum number of vector elements.
+  bool Scalable; // If true, NumElements is a multiple of 'Min' determined
+                 // at runtime rather than compile time.
+
+  ElementCount(unsigned Min, bool Scalable)
+  : Min(Min), Scalable(Scalable) {}
+
+  ElementCount operator*(unsigned RHS) {
+    return { Min * RHS, Scalable };
+  }
+  ElementCount operator/(unsigned RHS) {
+    return { Min / RHS, Scalable };
+  }
+
+  bool operator==(const ElementCount& RHS) const {
+    return Min == RHS.Min && Scalable == RHS.Scalable;
+  }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_SCALABLESIZE_H
diff --git a/include/llvm/Support/ScaledNumber.h b/include/llvm/Support/ScaledNumber.h
index 3bd3ccedc42c..552da34f357b 100644
--- a/include/llvm/Support/ScaledNumber.h
+++ b/include/llvm/Support/ScaledNumber.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/ScaledNumber.h - Support for scaled numbers -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -887,10 +886,6 @@ template <class DigitsT> void ScaledNumber<DigitsT>::shiftRight(int32_t Shift) {
   Digits >>= Shift;
 }
 
-template <typename T> struct isPodLike;
-template <typename T> struct isPodLike<ScaledNumber<T>> {
-  static const bool value = true;
-};
 
 } // end namespace llvm
 
diff --git a/include/llvm/Support/ScopedPrinter.h b/include/llvm/Support/ScopedPrinter.h
index 34c1a287ee10..88daedc8713b 100644
--- a/include/llvm/Support/ScopedPrinter.h
+++ b/include/llvm/Support/ScopedPrinter.h
@@ -1,9 +1,8 @@
-//===-- ScopedPrinter.h ---------------------------------------------------===//
+//===-- ScopedPrinter.h ----------------------------------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/Signals.h b/include/llvm/Support/Signals.h
index f25a04969904..a6b215a24311 100644
--- a/include/llvm/Support/Signals.h
+++ b/include/llvm/Support/Signals.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/Signals.h - Signal Handling support ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -66,13 +65,25 @@ namespace sys {
   /// This function registers a function to be called when the user "interrupts"
   /// the program (typically by pressing ctrl-c).  When the user interrupts the
   /// program, the specified interrupt function is called instead of the program
-  /// being killed, and the interrupt function automatically disabled.  Note
-  /// that interrupt functions are not allowed to call any non-reentrant
+  /// being killed, and the interrupt function automatically disabled.
+  ///
+  /// Note that interrupt functions are not allowed to call any non-reentrant
   /// functions.  An null interrupt function pointer disables the current
   /// installed function.  Note also that the handler may be executed on a
   /// different thread on some platforms.
-  /// Register a function to be called when ctrl-c is pressed.
   void SetInterruptFunction(void (*IF)());
+
+  /// Registers a function to be called when an "info" signal is delivered to
+  /// the process.
+  ///
+  /// On POSIX systems, this will be SIGUSR1; on systems that have it, SIGINFO
+  /// will also be used (typically ctrl-t).
+  ///
+  /// Note that signal handlers are not allowed to call any non-reentrant
+  /// functions.  An null function pointer disables the current installed
+  /// function.  Note also that the handler may be executed on a different
+  /// thread on some platforms.
+  void SetInfoSignalFunction(void (*Handler)());
 } // End sys namespace
 } // End llvm namespace
 
diff --git a/include/llvm/Support/Signposts.h b/include/llvm/Support/Signposts.h
new file mode 100644
index 000000000000..b5a8c3d61e3e
--- /dev/null
+++ b/include/llvm/Support/Signposts.h
@@ -0,0 +1,43 @@
+//===-- llvm/Support/Signposts.h - Interval debug annotations ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Some OS's provide profilers that allow applications to provide custom
+/// annotations to the profiler. For example, on Xcode 10 and later 'signposts'
+/// can be emitted by the application and these will be rendered to the Points
+/// of Interest track on the instruments timeline.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_SIGNPOSTS_H
+#define LLVM_SUPPORT_SIGNPOSTS_H
+
+namespace llvm {
+class SignpostEmitterImpl;
+class Timer;
+
+/// Manages the emission of signposts into the recording method supported by
+/// the OS.
+class SignpostEmitter {
+  SignpostEmitterImpl *Impl;
+
+public:
+  SignpostEmitter();
+  ~SignpostEmitter();
+
+  bool isEnabled() const;
+
+  /// Begin a signposted interval for the given timer.
+  void startTimerInterval(Timer *T);
+  /// End a signposted interval for the given timer.
+  void endTimerInterval(Timer *T);
+};
+
+} // end namespace llvm
+
+#endif // ifndef LLVM_SUPPORT_SIGNPOSTS_H
diff --git a/include/llvm/Support/SmallVectorMemoryBuffer.h b/include/llvm/Support/SmallVectorMemoryBuffer.h
index c4a600e7f37d..b63b58e3a8ba 100644
--- a/include/llvm/Support/SmallVectorMemoryBuffer.h
+++ b/include/llvm/Support/SmallVectorMemoryBuffer.h
@@ -1,9 +1,8 @@
 //===- SmallVectorMemoryBuffer.h --------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/Solaris/sys/regset.h b/include/llvm/Support/Solaris/sys/regset.h
index 6a69ebe718a1..6bd98fa826a6 100644
--- a/include/llvm/Support/Solaris/sys/regset.h
+++ b/include/llvm/Support/Solaris/sys/regset.h
@@ -1,9 +1,8 @@
 /*===- llvm/Support/Solaris/sys/regset.h ------------------------*- C++ -*-===*
  *
- *                     The LLVM Compiler Infrastructure
- *
- * This file is distributed under the University of Illinois Open Source
- * License. See LICENSE.TXT for details.
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  *
  *===----------------------------------------------------------------------===*
  *
diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h
index 63ac893239d1..aa6026c23d07 100644
--- a/include/llvm/Support/SourceMgr.h
+++ b/include/llvm/Support/SourceMgr.h
@@ -1,9 +1,8 @@
 //===- SourceMgr.h - Manager for Source Buffers & Diagnostics ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -107,6 +106,8 @@ public:
   SourceMgr() = default;
   SourceMgr(const SourceMgr &) = delete;
   SourceMgr &operator=(const SourceMgr &) = delete;
+  SourceMgr(SourceMgr &&) = default;
+  SourceMgr &operator=(SourceMgr &&) = default;
   ~SourceMgr() = default;
 
   void setIncludeDirs(const std::vector<std::string> &Dirs) {
diff --git a/include/llvm/Support/SpecialCaseList.h b/include/llvm/Support/SpecialCaseList.h
index fd62fc48047b..b7400266f4df 100644
--- a/include/llvm/Support/SpecialCaseList.h
+++ b/include/llvm/Support/SpecialCaseList.h
@@ -1,9 +1,8 @@
 //===-- SpecialCaseList.h - special case list for sanitizers ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //===----------------------------------------------------------------------===//
 //
 // This is a utility class used to parse user-provided text files with
diff --git a/include/llvm/Support/StringPool.h b/include/llvm/Support/StringPool.h
index bb5fd07f0d00..a4f45916f53d 100644
--- a/include/llvm/Support/StringPool.h
+++ b/include/llvm/Support/StringPool.h
@@ -1,9 +1,8 @@
 //===- StringPool.h - Interned string pool ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/StringSaver.h b/include/llvm/Support/StringSaver.h
index 6b77d487333b..c54044e3986c 100644
--- a/include/llvm/Support/StringSaver.h
+++ b/include/llvm/Support/StringSaver.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/StringSaver.h -------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/SwapByteOrder.h b/include/llvm/Support/SwapByteOrder.h
index 71d3724950ab..06a447a27c2a 100644
--- a/include/llvm/Support/SwapByteOrder.h
+++ b/include/llvm/Support/SwapByteOrder.h
@@ -1,9 +1,8 @@
 //===- SwapByteOrder.h - Generic and optimized byte swaps -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,6 +17,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataTypes.h"
 #include <cstddef>
+#include <type_traits>
 #if defined(_MSC_VER) && !defined(_DEBUG)
 #include <stdlib.h>
 #endif
@@ -116,6 +116,13 @@ inline double getSwappedBytes(double C) {
   return out.d;
 }
 
+template <typename T>
+inline typename std::enable_if<std::is_enum<T>::value, T>::type
+getSwappedBytes(T C) {
+  return static_cast<T>(
+      getSwappedBytes(static_cast<typename std::underlying_type<T>::type>(C)));
+}
+
 template<typename T>
 inline void swapByteOrder(T &Value) {
   Value = getSwappedBytes(Value);
diff --git a/include/llvm/Support/SymbolRemappingReader.h b/include/llvm/Support/SymbolRemappingReader.h
index b457b9e817e4..2b9ab570eb8b 100644
--- a/include/llvm/Support/SymbolRemappingReader.h
+++ b/include/llvm/Support/SymbolRemappingReader.h
@@ -1,9 +1,8 @@
 //===- SymbolRemappingReader.h - Read symbol remapping file -----*- C++ -*-===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/SystemUtils.h b/include/llvm/Support/SystemUtils.h
index bd60793d1554..77deddb9ee1c 100644
--- a/include/llvm/Support/SystemUtils.h
+++ b/include/llvm/Support/SystemUtils.h
@@ -1,9 +1,8 @@
 //===- SystemUtils.h - Utilities to do low-level system stuff ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/TarWriter.h b/include/llvm/Support/TarWriter.h
index 639f61b53892..71164e2ef961 100644
--- a/include/llvm/Support/TarWriter.h
+++ b/include/llvm/Support/TarWriter.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/TarWriter.h - Tar archive file creator -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/TargetOpcodes.def b/include/llvm/Support/TargetOpcodes.def
index 3e8193a5cdcf..598c1064efd0 100644
--- a/include/llvm/Support/TargetOpcodes.def
+++ b/include/llvm/Support/TargetOpcodes.def
@@ -1,9 +1,8 @@
 //===-- llvm/Support/TargetOpcodes.def - Target Indep Opcodes ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -29,6 +28,7 @@
 ///
 HANDLE_TARGET_OPCODE(PHI)
 HANDLE_TARGET_OPCODE(INLINEASM)
+HANDLE_TARGET_OPCODE(INLINEASM_BR)
 HANDLE_TARGET_OPCODE(CFI_INSTRUCTION)
 HANDLE_TARGET_OPCODE(EH_LABEL)
 HANDLE_TARGET_OPCODE(GC_LABEL)
@@ -316,6 +316,9 @@ HANDLE_TARGET_OPCODE(G_ATOMICRMW_MIN)
 HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMAX)
 HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMIN)
 
+// Generic atomic fence
+HANDLE_TARGET_OPCODE(G_FENCE)
+
 /// Generic conditional branch instruction.
 HANDLE_TARGET_OPCODE(G_BRCOND)
 
@@ -481,6 +484,27 @@ HANDLE_TARGET_OPCODE(G_UITOFP)
 /// Generic FP absolute value.
 HANDLE_TARGET_OPCODE(G_FABS)
 
+/// FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.  NOTE: This does
+/// not require that X and Y have the same type, just that they are both
+/// floating point. X and the result must have the same type.  FCOPYSIGN(f32,
+/// f64) is allowed.
+HANDLE_TARGET_OPCODE(G_FCOPYSIGN)
+
+/// Generic FP canonicalize value.
+HANDLE_TARGET_OPCODE(G_FCANONICALIZE)
+
+/// FP min/max matching libm's fmin/fmax
+HANDLE_TARGET_OPCODE(G_FMINNUM)
+HANDLE_TARGET_OPCODE(G_FMAXNUM)
+
+/// FP min/max matching IEEE-754 2008's minnum/maxnum semantics.
+HANDLE_TARGET_OPCODE(G_FMINNUM_IEEE)
+HANDLE_TARGET_OPCODE(G_FMAXNUM_IEEE)
+
+/// FP min/max matching IEEE-754 2018 draft semantics.
+HANDLE_TARGET_OPCODE(G_FMINIMUM)
+HANDLE_TARGET_OPCODE(G_FMAXIMUM)
+
 /// Generic pointer offset
 HANDLE_TARGET_OPCODE(G_GEP)
 
@@ -488,9 +512,24 @@ HANDLE_TARGET_OPCODE(G_GEP)
 /// *down* to the given alignment.
 HANDLE_TARGET_OPCODE(G_PTR_MASK)
 
+/// Generic signed integer minimum.
+HANDLE_TARGET_OPCODE(G_SMIN)
+
+/// Generic signed integer maximum.
+HANDLE_TARGET_OPCODE(G_SMAX)
+
+/// Generic unsigned integer maximum.
+HANDLE_TARGET_OPCODE(G_UMIN)
+
+/// Generic unsigned integer maximum.
+HANDLE_TARGET_OPCODE(G_UMAX)
+
 /// Generic BRANCH instruction. This is an unconditional branch.
 HANDLE_TARGET_OPCODE(G_BR)
 
+/// Generic branch to jump table entry.
+HANDLE_TARGET_OPCODE(G_BRJT)
+
 /// Generic insertelement.
 HANDLE_TARGET_OPCODE(G_INSERT_VECTOR_ELT)
 
@@ -521,18 +560,39 @@ HANDLE_TARGET_OPCODE(G_BSWAP)
 /// Floating point ceil.
 HANDLE_TARGET_OPCODE(G_FCEIL)
 
+/// Floating point cosine.
+HANDLE_TARGET_OPCODE(G_FCOS)
+
+/// Floating point sine.
+HANDLE_TARGET_OPCODE(G_FSIN)
+
+/// Floating point square root.
+HANDLE_TARGET_OPCODE(G_FSQRT)
+
+/// Floating point floor.
+HANDLE_TARGET_OPCODE(G_FFLOOR)
+
+/// Floating point round to next integer.
+HANDLE_TARGET_OPCODE(G_FRINT)
+
+/// Floating point round to nearest integer.
+HANDLE_TARGET_OPCODE(G_FNEARBYINT)
+
 /// Generic AddressSpaceCast.
 HANDLE_TARGET_OPCODE(G_ADDRSPACE_CAST)
 
 /// Generic block address
 HANDLE_TARGET_OPCODE(G_BLOCK_ADDR)
 
+/// Generic jump table address
+HANDLE_TARGET_OPCODE(G_JUMP_TABLE)
+
 // TODO: Add more generic opcodes as we move along.
 
 /// Marker for the end of the generic opcode.
 /// This is used to check if an opcode is in the range of the
 /// generic opcodes.
-HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_BLOCK_ADDR)
+HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_JUMP_TABLE)
 
 /// BUILTIN_OP_END - This must be the last enum value in this list.
 /// The target-specific post-isel opcode values start here.
diff --git a/include/llvm/Support/TargetParser.h b/include/llvm/Support/TargetParser.h
index ace11ed410a3..a7e1a752d081 100644
--- a/include/llvm/Support/TargetParser.h
+++ b/include/llvm/Support/TargetParser.h
@@ -1,9 +1,8 @@
 //===-- TargetParser - Parser for target features ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -122,10 +121,15 @@ enum GPUKind : uint32_t {
   GK_GFX902 = 61,
   GK_GFX904 = 62,
   GK_GFX906 = 63,
+  GK_GFX908 = 64,
   GK_GFX909 = 65,
 
+  GK_GFX1010 = 71,
+  GK_GFX1011 = 72,
+  GK_GFX1012 = 73,
+
   GK_AMDGCN_FIRST = GK_GFX600,
-  GK_AMDGCN_LAST = GK_GFX909,
+  GK_AMDGCN_LAST = GK_GFX1012,
 };
 
 /// Instruction set architecture version.
diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h
index 1bafc4e687da..bf75650760d0 100644
--- a/include/llvm/Support/TargetRegistry.h
+++ b/include/llvm/Support/TargetRegistry.h
@@ -1,9 +1,8 @@
 //===- Support/TargetRegistry.h - Target Registration -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -101,6 +100,11 @@ MCStreamer *createWasmStreamer(MCContext &Ctx,
                                std::unique_ptr<MCObjectWriter> &&OW,
                                std::unique_ptr<MCCodeEmitter> &&CE,
                                bool RelaxAll);
+MCStreamer *createXCOFFStreamer(MCContext &Ctx,
+                                std::unique_ptr<MCAsmBackend> &&TAB,
+                                std::unique_ptr<MCObjectWriter> &&OW,
+                                std::unique_ptr<MCCodeEmitter> &&CE,
+                                bool RelaxAll);
 
 MCRelocationInfo *createMCRelocationInfo(const Triple &TT, MCContext &Ctx);
 
@@ -471,7 +475,7 @@ public:
                                      bool DWARFMustBeAtTheEnd) const {
     MCStreamer *S;
     switch (T.getObjectFormat()) {
-    default:
+    case Triple::UnknownObjectFormat:
       llvm_unreachable("Unknown object format");
     case Triple::COFF:
       assert(T.isOSWindows() && "only Windows COFF is supported");
@@ -505,6 +509,10 @@ public:
         S = createWasmStreamer(Ctx, std::move(TAB), std::move(OW),
                                std::move(Emitter), RelaxAll);
       break;
+    case Triple::XCOFF:
+        S = createXCOFFStreamer(Ctx, std::move(TAB), std::move(OW),
+                                std::move(Emitter), RelaxAll);
+      break;
     }
     if (ObjectTargetStreamerCtorFn)
       ObjectTargetStreamerCtorFn(*S, STI);
diff --git a/include/llvm/Support/TargetSelect.h b/include/llvm/Support/TargetSelect.h
index 582785cb69a5..9ffb84c4a570 100644
--- a/include/llvm/Support/TargetSelect.h
+++ b/include/llvm/Support/TargetSelect.h
@@ -1,9 +1,8 @@
 //===- TargetSelect.h - Target Selection & Registration ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/TaskQueue.h b/include/llvm/Support/TaskQueue.h
index 49981adb763d..df2ffdee2cc2 100644
--- a/include/llvm/Support/TaskQueue.h
+++ b/include/llvm/Support/TaskQueue.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/TaskQueue.h - A TaskQueue implementation ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/ThreadLocal.h b/include/llvm/Support/ThreadLocal.h
index 885bd18e8356..d6838c15fc34 100644
--- a/include/llvm/Support/ThreadLocal.h
+++ b/include/llvm/Support/ThreadLocal.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/ThreadLocal.h - Thread Local Data ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/ThreadPool.h b/include/llvm/Support/ThreadPool.h
index 4fdbd528b212..4bcbaa3142fd 100644
--- a/include/llvm/Support/ThreadPool.h
+++ b/include/llvm/Support/ThreadPool.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/ThreadPool.h - A ThreadPool implementation -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/Threading.h b/include/llvm/Support/Threading.h
index ba7ece5e72ba..46d413dc487b 100644
--- a/include/llvm/Support/Threading.h
+++ b/include/llvm/Support/Threading.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/Threading.h - Control multithreading mode --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -33,6 +32,9 @@
 // implementations like libstdc++ are known to have problems on NetBSD,
 // OpenBSD and PowerPC.
 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
+#elif defined(LLVM_ON_UNIX) &&                                                 \
+    ((defined(__ppc__) || defined(__PPC__)) && defined(__LITTLE_ENDIAN__))
+#define LLVM_THREADING_USE_STD_CALL_ONCE 1
 #else
 #define LLVM_THREADING_USE_STD_CALL_ONCE 0
 #endif
@@ -165,6 +167,19 @@ void llvm_execute_on_thread(void (*UserFn)(void *), void *UserData,
   /// purposes, and as with setting a thread's name no indication of whether
   /// the operation succeeded or failed is returned.
   void get_thread_name(SmallVectorImpl<char> &Name);
+
+  enum class ThreadPriority {
+    Background = 0,
+    Default = 1,
+  };
+  /// If priority is Background tries to lower current threads priority such
+  /// that it does not affect foreground tasks significantly. Can be used for
+  /// long-running, latency-insensitive tasks to make sure cpu is not hogged by
+  /// this task.
+  /// If the priority is default tries to restore current threads priority to
+  /// default scheduling priority.
+  enum class SetThreadPriorityResult { FAILURE, SUCCESS };
+  SetThreadPriorityResult set_thread_priority(ThreadPriority Priority);
 }
 
 #endif
diff --git a/include/llvm/Support/TimeProfiler.h b/include/llvm/Support/TimeProfiler.h
new file mode 100644
index 000000000000..72b6f7180bde
--- /dev/null
+++ b/include/llvm/Support/TimeProfiler.h
@@ -0,0 +1,76 @@
+//===- llvm/Support/TimeProfiler.h - Hierarchical Time Profiler -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_TIME_PROFILER_H
+#define LLVM_SUPPORT_TIME_PROFILER_H
+
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+struct TimeTraceProfiler;
+extern TimeTraceProfiler *TimeTraceProfilerInstance;
+
+/// Initialize the time trace profiler.
+/// This sets up the global \p TimeTraceProfilerInstance
+/// variable to be the profiler instance.
+void timeTraceProfilerInitialize();
+
+/// Cleanup the time trace profiler, if it was initialized.
+void timeTraceProfilerCleanup();
+
+/// Is the time trace profiler enabled, i.e. initialized?
+inline bool timeTraceProfilerEnabled() {
+  return TimeTraceProfilerInstance != nullptr;
+}
+
+/// Write profiling data to output file.
+/// Data produced is JSON, in Chrome "Trace Event" format, see
+/// https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview
+void timeTraceProfilerWrite(raw_pwrite_stream &OS);
+
+/// Manually begin a time section, with the given \p Name and \p Detail.
+/// Profiler copies the string data, so the pointers can be given into
+/// temporaries. Time sections can be hierarchical; every Begin must have a
+/// matching End pair but they can nest.
+void timeTraceProfilerBegin(StringRef Name, StringRef Detail);
+void timeTraceProfilerBegin(StringRef Name,
+                            llvm::function_ref<std::string()> Detail);
+
+/// Manually end the last time section.
+void timeTraceProfilerEnd();
+
+/// The TimeTraceScope is a helper class to call the begin and end functions
+/// of the time trace profiler.  When the object is constructed, it begins
+/// the section; and when it is destroyed, it stops it. If the time profiler
+/// is not initialized, the overhead is a single branch.
+struct TimeTraceScope {
+
+  TimeTraceScope() = delete;
+  TimeTraceScope(const TimeTraceScope &) = delete;
+  TimeTraceScope &operator=(const TimeTraceScope &) = delete;
+  TimeTraceScope(TimeTraceScope &&) = delete;
+  TimeTraceScope &operator=(TimeTraceScope &&) = delete;
+
+  TimeTraceScope(StringRef Name, StringRef Detail) {
+    if (TimeTraceProfilerInstance != nullptr)
+      timeTraceProfilerBegin(Name, Detail);
+  }
+  TimeTraceScope(StringRef Name, llvm::function_ref<std::string()> Detail) {
+    if (TimeTraceProfilerInstance != nullptr)
+      timeTraceProfilerBegin(Name, Detail);
+  }
+  ~TimeTraceScope() {
+    if (TimeTraceProfilerInstance != nullptr)
+      timeTraceProfilerEnd();
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Support/Timer.h b/include/llvm/Support/Timer.h
index a11c3ce3ff22..76c9bc7b6863 100644
--- a/include/llvm/Support/Timer.h
+++ b/include/llvm/Support/Timer.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/Timer.h - Interval Timing Support ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -206,8 +205,9 @@ public:
     Description.assign(NewDescription.begin(), NewDescription.end());
   }
 
-  /// Print any started timers in this group.
-  void print(raw_ostream &OS);
+  /// Print any started timers in this group, optionally resetting timers after
+  /// printing them.
+  void print(raw_ostream &OS, bool ResetAfterPrint = false);
 
   /// Clear all timers in this group.
   void clear();
@@ -234,7 +234,7 @@ private:
   friend void PrintStatisticsJSON(raw_ostream &OS);
   void addTimer(Timer &T);
   void removeTimer(Timer &T);
-  void prepareToPrintList();
+  void prepareToPrintList(bool reset_time = false);
   void PrintQueuedTimers(raw_ostream &OS);
   void printJSONValue(raw_ostream &OS, const PrintRecord &R,
                       const char *suffix, double Value);
diff --git a/include/llvm/Support/ToolOutputFile.h b/include/llvm/Support/ToolOutputFile.h
index cf3bc2fb0171..a99e327f8db7 100644
--- a/include/llvm/Support/ToolOutputFile.h
+++ b/include/llvm/Support/ToolOutputFile.h
@@ -1,9 +1,8 @@
 //===- ToolOutputFile.h - Output files for compiler-like tools -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/TrailingObjects.h b/include/llvm/Support/TrailingObjects.h
index 490bd94f4cd5..8cf4f7aed7f8 100644
--- a/include/llvm/Support/TrailingObjects.h
+++ b/include/llvm/Support/TrailingObjects.h
@@ -1,9 +1,8 @@
 //===--- TrailingObjects.h - Variable-length classes ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/include/llvm/Support/TrigramIndex.h b/include/llvm/Support/TrigramIndex.h
index da0b6daf47ed..9351c2db169a 100644
--- a/include/llvm/Support/TrigramIndex.h
+++ b/include/llvm/Support/TrigramIndex.h
@@ -1,9 +1,8 @@
 //===-- TrigramIndex.h - a heuristic for SpecialCaseList --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //===----------------------------------------------------------------------===//
 //
 // TrigramIndex implements a heuristic for SpecialCaseList that allows to
diff --git a/include/llvm/Support/TypeName.h b/include/llvm/Support/TypeName.h
index 0eb7ead98b21..236490a25011 100644
--- a/include/llvm/Support/TypeName.h
+++ b/include/llvm/Support/TypeName.h
@@ -1,9 +1,8 @@
 //===- TypeName.h -----------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/Unicode.h b/include/llvm/Support/Unicode.h
index 983acaf03635..ca17bba2fbb4 100644
--- a/include/llvm/Support/Unicode.h
+++ b/include/llvm/Support/Unicode.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/Unicode.h - Unicode character properties  -*- C++ -*-=====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/UnicodeCharRanges.h b/include/llvm/Support/UnicodeCharRanges.h
index 3cf4a6d96602..4b59f8a92b76 100644
--- a/include/llvm/Support/UnicodeCharRanges.h
+++ b/include/llvm/Support/UnicodeCharRanges.h
@@ -1,9 +1,8 @@
 //===--- UnicodeCharRanges.h - Types and functions for character ranges ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_SUPPORT_UNICODECHARRANGES_H
diff --git a/include/llvm/Support/UniqueLock.h b/include/llvm/Support/UniqueLock.h
index 91dc911036d5..0a887ad5965d 100644
--- a/include/llvm/Support/UniqueLock.h
+++ b/include/llvm/Support/UniqueLock.h
@@ -1,9 +1,8 @@
 //===- Support/UniqueLock.h - Acquire/Release Mutex In Scope ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/Valgrind.h b/include/llvm/Support/Valgrind.h
index 084b901b326c..1e14dfec9a61 100644
--- a/include/llvm/Support/Valgrind.h
+++ b/include/llvm/Support/Valgrind.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/Valgrind.h - Communication with Valgrind ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/VersionTuple.h b/include/llvm/Support/VersionTuple.h
index e85a188e54b4..14736d6b28f0 100644
--- a/include/llvm/Support/VersionTuple.h
+++ b/include/llvm/Support/VersionTuple.h
@@ -1,9 +1,8 @@
 //===- VersionTuple.h - Version Number Handling -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/include/llvm/Support/VirtualFileSystem.h b/include/llvm/Support/VirtualFileSystem.h
index 61c3d2f46e9c..31c9e851daed 100644
--- a/include/llvm/Support/VirtualFileSystem.h
+++ b/include/llvm/Support/VirtualFileSystem.h
@@ -1,9 +1,8 @@
 //===- VirtualFileSystem.h - Virtual File System Layer ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -59,15 +58,15 @@ public:
 
   Status() = default;
   Status(const llvm::sys::fs::file_status &Status);
-  Status(StringRef Name, llvm::sys::fs::UniqueID UID,
+  Status(const Twine &Name, llvm::sys::fs::UniqueID UID,
          llvm::sys::TimePoint<> MTime, uint32_t User, uint32_t Group,
          uint64_t Size, llvm::sys::fs::file_type Type,
          llvm::sys::fs::perms Perms);
 
   /// Get a copy of a Status with a different name.
-  static Status copyWithNewName(const Status &In, StringRef NewName);
+  static Status copyWithNewName(const Status &In, const Twine &NewName);
   static Status copyWithNewName(const llvm::sys::fs::file_status &In,
-                                StringRef NewName);
+                                const Twine &NewName);
 
   /// Returns the name that should be used for this file or directory.
   StringRef getName() const { return Name; }
@@ -299,8 +298,16 @@ public:
 
 /// Gets an \p vfs::FileSystem for the 'real' file system, as seen by
 /// the operating system.
+/// The working directory is linked to the process's working directory.
+/// (This is usually thread-hostile).
 IntrusiveRefCntPtr<FileSystem> getRealFileSystem();
 
+/// Create an \p vfs::FileSystem for the 'real' file system, as seen by
+/// the operating system.
+/// It has its own working directory, independent of (but initially equal to)
+/// that of the process.
+std::unique_ptr<FileSystem> createPhysicalFileSystem();
+
 /// A file system that allows overlaying one \p AbstractFileSystem on top
 /// of another.
 ///
@@ -336,15 +343,24 @@ public:
 
   using iterator = FileSystemList::reverse_iterator;
   using const_iterator = FileSystemList::const_reverse_iterator;
+  using reverse_iterator = FileSystemList::iterator;
+  using const_reverse_iterator = FileSystemList::const_iterator;
 
   /// Get an iterator pointing to the most recently added file system.
   iterator overlays_begin() { return FSList.rbegin(); }
   const_iterator overlays_begin() const { return FSList.rbegin(); }
 
-  /// Get an iterator pointing one-past the least recently added file
-  /// system.
+  /// Get an iterator pointing one-past the least recently added file system.
   iterator overlays_end() { return FSList.rend(); }
   const_iterator overlays_end() const { return FSList.rend(); }
+
+  /// Get an iterator pointing to the least recently added file system.
+  reverse_iterator overlays_rbegin() { return FSList.begin(); }
+  const_reverse_iterator overlays_rbegin() const { return FSList.begin(); }
+
+  /// Get an iterator pointing one-past the most recently added file system.
+  reverse_iterator overlays_rend() { return FSList.end(); }
+  const_reverse_iterator overlays_rend() const { return FSList.end(); }
 };
 
 /// By default, this delegates all calls to the underlying file system. This
diff --git a/include/llvm/Support/Watchdog.h b/include/llvm/Support/Watchdog.h
index 01e1d926eb95..281595e8f272 100644
--- a/include/llvm/Support/Watchdog.h
+++ b/include/llvm/Support/Watchdog.h
@@ -1,9 +1,8 @@
 //===--- Watchdog.h - Watchdog timer ----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/Win64EH.h b/include/llvm/Support/Win64EH.h
index e27bf1b3a1a5..bdd23b41594e 100644
--- a/include/llvm/Support/Win64EH.h
+++ b/include/llvm/Support/Win64EH.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/Win64EH.h ---Win64 EH Constants-------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/WindowsError.h b/include/llvm/Support/WindowsError.h
index 63bfe5976546..195405224124 100644
--- a/include/llvm/Support/WindowsError.h
+++ b/include/llvm/Support/WindowsError.h
@@ -1,9 +1,8 @@
 //===-- WindowsError.h - Support for mapping windows errors to posix-------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/WithColor.h b/include/llvm/Support/WithColor.h
index 76842d1c3dc8..f4e107581179 100644
--- a/include/llvm/Support/WithColor.h
+++ b/include/llvm/Support/WithColor.h
@@ -1,9 +1,8 @@
 //===- WithColor.h ----------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Support/X86DisassemblerDecoderCommon.h b/include/llvm/Support/X86DisassemblerDecoderCommon.h
index 466dd309909a..baf842b12a27 100644
--- a/include/llvm/Support/X86DisassemblerDecoderCommon.h
+++ b/include/llvm/Support/X86DisassemblerDecoderCommon.h
@@ -1,9 +1,8 @@
 //===-- X86DisassemblerDecoderCommon.h - Disassembler decoder ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -47,29 +46,23 @@ namespace X86Disassembler {
 // Attributes of an instruction that must be known before the opcode can be
 // processed correctly.  Most of these indicate the presence of particular
 // prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
-#define ATTRIBUTE_BITS                  \
-  ENUM_ENTRY(ATTR_NONE,   0x00)         \
-  ENUM_ENTRY(ATTR_64BIT,  (0x1 << 0))   \
-  ENUM_ENTRY(ATTR_XS,     (0x1 << 1))   \
-  ENUM_ENTRY(ATTR_XD,     (0x1 << 2))   \
-  ENUM_ENTRY(ATTR_REXW,   (0x1 << 3))   \
-  ENUM_ENTRY(ATTR_OPSIZE, (0x1 << 4))   \
-  ENUM_ENTRY(ATTR_ADSIZE, (0x1 << 5))   \
-  ENUM_ENTRY(ATTR_VEX,    (0x1 << 6))   \
-  ENUM_ENTRY(ATTR_VEXL,   (0x1 << 7))   \
-  ENUM_ENTRY(ATTR_EVEX,   (0x1 << 8))   \
-  ENUM_ENTRY(ATTR_EVEXL,  (0x1 << 9))   \
-  ENUM_ENTRY(ATTR_EVEXL2, (0x1 << 10))  \
-  ENUM_ENTRY(ATTR_EVEXK,  (0x1 << 11))  \
-  ENUM_ENTRY(ATTR_EVEXKZ, (0x1 << 12))  \
-  ENUM_ENTRY(ATTR_EVEXB,  (0x1 << 13))
-
-#define ENUM_ENTRY(n, v) n = v,
 enum attributeBits {
-  ATTRIBUTE_BITS
-  ATTR_max
+  ATTR_NONE   = 0x00,
+  ATTR_64BIT  = 0x1 << 0,
+  ATTR_XS     = 0x1 << 1,
+  ATTR_XD     = 0x1 << 2,
+  ATTR_REXW   = 0x1 << 3,
+  ATTR_OPSIZE = 0x1 << 4,
+  ATTR_ADSIZE = 0x1 << 5,
+  ATTR_VEX    = 0x1 << 6,
+  ATTR_VEXL   = 0x1 << 7,
+  ATTR_EVEX   = 0x1 << 8,
+  ATTR_EVEXL2 = 0x1 << 9,
+  ATTR_EVEXK  = 0x1 << 10,
+  ATTR_EVEXKZ = 0x1 << 11,
+  ATTR_EVEXB  = 0x1 << 12,
+  ATTR_max    = 0x1 << 13,
 };
-#undef ENUM_ENTRY
 
 // Combinations of the above attributes that are relevant to instruction
 // decode. Although other combinations are possible, they can be reduced to
@@ -394,6 +387,7 @@ enum ModRMDecisionType {
   ENUM_ENTRY(ENCODING_IRC,    "Immediate for static rounding control")         \
   ENUM_ENTRY(ENCODING_Rv,     "Register code of operand size added to the "    \
                               "opcode byte")                                   \
+  ENUM_ENTRY(ENCODING_CC,     "Condition code encoded in opcode")              \
   ENUM_ENTRY(ENCODING_DUP,    "Duplicate of another operand; ID is encoded "   \
                               "in type")                                       \
   ENUM_ENTRY(ENCODING_SI,     "Source index; encoded in OpSize/Adsize prefix") \
@@ -415,9 +409,6 @@ enum OperandEncoding {
   ENUM_ENTRY(TYPE_R32,        "4-byte")                                        \
   ENUM_ENTRY(TYPE_R64,        "8-byte")                                        \
   ENUM_ENTRY(TYPE_IMM,        "immediate operand")                             \
-  ENUM_ENTRY(TYPE_IMM3,       "1-byte immediate operand between 0 and 7")      \
-  ENUM_ENTRY(TYPE_IMM5,       "1-byte immediate operand between 0 and 31")     \
-  ENUM_ENTRY(TYPE_AVX512ICC,  "1-byte immediate operand for AVX512 icmp")      \
   ENUM_ENTRY(TYPE_UIMM8,      "1-byte unsigned immediate operand")             \
   ENUM_ENTRY(TYPE_M,          "Memory operand")                                \
   ENUM_ENTRY(TYPE_MVSIBX,     "Memory operand using XMM index")                \
@@ -432,6 +423,7 @@ enum OperandEncoding {
   ENUM_ENTRY(TYPE_YMM,        "32-byte")                                       \
   ENUM_ENTRY(TYPE_ZMM,        "64-byte")                                       \
   ENUM_ENTRY(TYPE_VK,         "mask register")                                 \
+  ENUM_ENTRY(TYPE_VK_PAIR,    "mask register pair")                            \
   ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand")                      \
   ENUM_ENTRY(TYPE_DEBUGREG,   "Debug register operand")                        \
   ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand")                      \
diff --git a/include/llvm/Support/X86TargetParser.def b/include/llvm/Support/X86TargetParser.def
index e9bede545d3f..1749be3b3ae2 100644
--- a/include/llvm/Support/X86TargetParser.def
+++ b/include/llvm/Support/X86TargetParser.def
@@ -1,9 +1,8 @@
 //===- X86TargetParser.def - X86 target parsing defines ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -34,6 +33,8 @@ X86_VENDOR(VENDOR_AMD,   "amd")
 #ifndef X86_CPU_TYPE
 #define X86_CPU_TYPE(ARCHNAME, ENUM)
 #endif
+// The first part of this list must match what is implemented in libgcc and
+// compilert-rt. Clang uses this to know how to implement __builtin_cpu_is.
 X86_CPU_TYPE_COMPAT_WITH_ALIAS("bonnell",       INTEL_BONNELL,       "bonnell", "atom")
 X86_CPU_TYPE_COMPAT           ("core2",         INTEL_CORE2,         "core2")
 X86_CPU_TYPE_COMPAT           ("nehalem",       INTEL_COREI7,        "corei7")
@@ -80,6 +81,8 @@ X86_CPU_TYPE                  ("k8-sse3",     AMD_K8SSE3)
 #define X86_CPU_SUBTYPE(ARCHNAME, ENUM)
 #endif
 
+// The first part of this list must match what is implemented in libgcc and
+// compilert-rt. Clang uses this to know how to implement __builtin_cpu_is.
 X86_CPU_SUBTYPE_COMPAT("nehalem",        INTEL_COREI7_NEHALEM,        "nehalem")
 X86_CPU_SUBTYPE_COMPAT("westmere",       INTEL_COREI7_WESTMERE,       "westmere")
 X86_CPU_SUBTYPE_COMPAT("sandybridge",    INTEL_COREI7_SANDYBRIDGE,    "sandybridge")
@@ -99,14 +102,16 @@ X86_CPU_SUBTYPE_COMPAT("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512, "skylake-a
 X86_CPU_SUBTYPE_COMPAT("cannonlake",     INTEL_COREI7_CANNONLAKE,     "cannonlake")
 X86_CPU_SUBTYPE_COMPAT("icelake-client", INTEL_COREI7_ICELAKE_CLIENT, "icelake-client")
 X86_CPU_SUBTYPE_COMPAT("icelake-server", INTEL_COREI7_ICELAKE_SERVER, "icelake-server")
+X86_CPU_SUBTYPE_COMPAT("znver2",         AMDFAM17H_ZNVER2,            "znver2")
+X86_CPU_SUBTYPE_COMPAT("cascadelake",    INTEL_COREI7_CASCADELAKE,    "cascadelake")
 // Entries below this are not in libgcc/compiler-rt.
 X86_CPU_SUBTYPE       ("core2",          INTEL_CORE2_65)
 X86_CPU_SUBTYPE       ("penryn",         INTEL_CORE2_45)
-X86_CPU_SUBTYPE       ("cascadelake",    INTEL_COREI7_CASCADELAKE)
 X86_CPU_SUBTYPE       ("k6",             AMDPENTIUM_K6)
 X86_CPU_SUBTYPE       ("k6-2",           AMDPENTIUM_K62)
 X86_CPU_SUBTYPE       ("k6-3",           AMDPENTIUM_K63)
 X86_CPU_SUBTYPE       ("geode",          AMDPENTIUM_GEODE)
+X86_CPU_SUBTYPE       ("cooperlake",     INTEL_COREI7_COOPERLAKE)
 #undef X86_CPU_SUBTYPE_COMPAT
 #undef X86_CPU_SUBTYPE
 
@@ -161,5 +166,6 @@ X86_FEATURE       (65, FEATURE_ADX)
 X86_FEATURE       (66, FEATURE_EM64T)
 X86_FEATURE       (67, FEATURE_CLFLUSHOPT)
 X86_FEATURE       (68, FEATURE_SHA)
+X86_FEATURE       (69, FEATURE_AVX512BF16)
 #undef X86_FEATURE_COMPAT
 #undef X86_FEATURE
diff --git a/include/llvm/Support/YAMLParser.h b/include/llvm/Support/YAMLParser.h
index 5b031a9a4270..3570119a3bfd 100644
--- a/include/llvm/Support/YAMLParser.h
+++ b/include/llvm/Support/YAMLParser.h
@@ -1,9 +1,8 @@
 //===- YAMLParser.h - Simple YAML parser ------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/YAMLTraits.h b/include/llvm/Support/YAMLTraits.h
index 3d790e96fff7..5181dc56d81d 100644
--- a/include/llvm/Support/YAMLTraits.h
+++ b/include/llvm/Support/YAMLTraits.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/YAMLTraits.h --------------------------------*- C++ -*-===//
 //
-//                             The LLVM Linker
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -102,8 +101,7 @@ template <class T, class Context> struct MappingContextTraits {
 ///           io.enumCase(value, "green", cGreen);
 ///         }
 ///       };
-template<typename T>
-struct ScalarEnumerationTraits {
+template <typename T, typename Enable = void> struct ScalarEnumerationTraits {
   // Must provide:
   // static void enumeration(IO &io, T &value);
 };
@@ -119,8 +117,7 @@ struct ScalarEnumerationTraits {
 ///          io.bitSetCase(value, "round", flagRound);
 ///        }
 ///      };
-template<typename T>
-struct ScalarBitSetTraits {
+template <typename T, typename Enable = void> struct ScalarBitSetTraits {
   // Must provide:
   // static void bitset(IO &io, T &value);
 };
@@ -146,8 +143,7 @@ enum class QuotingType { None, Single, Double };
 ///      }
 ///      static QuotingType mustQuote(StringRef) { return QuotingType::Single; }
 ///    };
-template<typename T>
-struct ScalarTraits {
+template <typename T, typename Enable = void> struct ScalarTraits {
   // Must provide:
   //
   // Function to write the value as a string:
@@ -864,8 +860,8 @@ public:
     mapOptionalWithContext(Key, Val, Ctx);
   }
 
-  template <typename T>
-  void mapOptional(const char *Key, T &Val, const T &Default) {
+  template <typename T, typename DefaultT>
+  void mapOptional(const char *Key, T &Val, const DefaultT &Default) {
     EmptyContext Ctx;
     mapOptionalWithContext(Key, Val, Default, Ctx);
   }
@@ -891,10 +887,13 @@ public:
     this->processKey(Key, Val, false, Ctx);
   }
 
-  template <typename T, typename Context>
-  void mapOptionalWithContext(const char *Key, T &Val, const T &Default,
+  template <typename T, typename Context, typename DefaultT>
+  void mapOptionalWithContext(const char *Key, T &Val, const DefaultT &Default,
                               Context &Ctx) {
-    this->processKeyWithDefault(Key, Val, Default, false, Ctx);
+    static_assert(std::is_convertible<DefaultT, T>::value,
+                  "Default type must be implicitly convertible to value type!");
+    this->processKeyWithDefault(Key, Val, static_cast<const T &>(Default),
+                                false, Ctx);
   }
 
 private:
@@ -978,7 +977,7 @@ yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
   bool DoClear;
   if ( io.beginBitSetScalar(DoClear) ) {
     if ( DoClear )
-      Val = static_cast<T>(0);
+      Val = T();
     ScalarBitSetTraits<T>::bitset(io, Val);
     io.endBitSetScalar();
   }
@@ -1243,12 +1242,14 @@ struct ScalarTraits<double> {
   static QuotingType mustQuote(StringRef) { return QuotingType::None; }
 };
 
-// For endian types, we just use the existing ScalarTraits for the underlying
-// type.  This way endian aware types are supported whenever a ScalarTraits
-// is defined for the underlying type.
+// For endian types, we use existing scalar Traits class for the underlying
+// type.  This way endian aware types are supported whenever the traits are
+// defined for the underlying type.
 template <typename value_type, support::endianness endian, size_t alignment>
-struct ScalarTraits<support::detail::packed_endian_specific_integral<
-    value_type, endian, alignment>> {
+struct ScalarTraits<
+    support::detail::packed_endian_specific_integral<value_type, endian,
+                                                     alignment>,
+    typename std::enable_if<has_ScalarTraits<value_type>::value>::type> {
   using endian_type =
       support::detail::packed_endian_specific_integral<value_type, endian,
                                                        alignment>;
@@ -1269,6 +1270,38 @@ struct ScalarTraits<support::detail::packed_endian_specific_integral<
   }
 };
 
+template <typename value_type, support::endianness endian, size_t alignment>
+struct ScalarEnumerationTraits<
+    support::detail::packed_endian_specific_integral<value_type, endian,
+                                                     alignment>,
+    typename std::enable_if<
+        has_ScalarEnumerationTraits<value_type>::value>::type> {
+  using endian_type =
+      support::detail::packed_endian_specific_integral<value_type, endian,
+                                                       alignment>;
+
+  static void enumeration(IO &io, endian_type &E) {
+    value_type V = E;
+    ScalarEnumerationTraits<value_type>::enumeration(io, V);
+    E = V;
+  }
+};
+
+template <typename value_type, support::endianness endian, size_t alignment>
+struct ScalarBitSetTraits<
+    support::detail::packed_endian_specific_integral<value_type, endian,
+                                                     alignment>,
+    typename std::enable_if<has_ScalarBitSetTraits<value_type>::value>::type> {
+  using endian_type =
+      support::detail::packed_endian_specific_integral<value_type, endian,
+                                                       alignment>;
+  static void bitset(IO &io, endian_type &E) {
+    value_type V = E;
+    ScalarBitSetTraits<value_type>::bitset(io, V);
+    E = V;
+  }
+};
+
 // Utility for use within MappingTraits<>::mapping() method
 // to [de]normalize an object for use with YAML conversion.
 template <typename TNorm, typename TFinal>
@@ -1587,8 +1620,9 @@ private:
   bool NeedBitValueComma = false;
   bool NeedFlowSequenceComma = false;
   bool EnumerationMatchFound = false;
-  bool NeedsNewLine = false;
   bool WriteDefaultValues = false;
+  StringRef Padding;
+  StringRef PaddingBeforeContainer;
 };
 
 /// YAML I/O does conversion based on types. But often native data types
@@ -1872,6 +1906,11 @@ struct SequenceTraits<SmallVector<T, N>,
                       typename std::enable_if<CheckIsBool<
                           SequenceElementTraits<T>::flow>::value>::type>
     : SequenceTraitsImpl<SmallVector<T, N>, SequenceElementTraits<T>::flow> {};
+template <typename T>
+struct SequenceTraits<SmallVectorImpl<T>,
+                      typename std::enable_if<CheckIsBool<
+                          SequenceElementTraits<T>::flow>::value>::type>
+    : SequenceTraitsImpl<SmallVectorImpl<T>, SequenceElementTraits<T>::flow> {};
 
 // Sequences of fundamental types use flow formatting.
 template <typename T>
diff --git a/include/llvm/Support/circular_raw_ostream.h b/include/llvm/Support/circular_raw_ostream.h
index b46fd7f730c9..4ecdb17376f1 100644
--- a/include/llvm/Support/circular_raw_ostream.h
+++ b/include/llvm/Support/circular_raw_ostream.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/circular_raw_ostream.h - Buffered streams --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/raw_os_ostream.h b/include/llvm/Support/raw_os_ostream.h
index a983aeb90879..c51a94da3a28 100644
--- a/include/llvm/Support/raw_os_ostream.h
+++ b/include/llvm/Support/raw_os_ostream.h
@@ -1,9 +1,8 @@
 //===- raw_os_ostream.h - std::ostream adaptor for raw_ostream --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h
index d062e716209d..48bb623b0638 100644
--- a/include/llvm/Support/raw_ostream.h
+++ b/include/llvm/Support/raw_ostream.h
@@ -1,9 +1,8 @@
 //===--- raw_ostream.h - Raw output stream ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -346,7 +345,7 @@ public:
   explicit raw_pwrite_stream(bool Unbuffered = false)
       : raw_ostream(Unbuffered) {}
   void pwrite(const char *Ptr, size_t Size, uint64_t Offset) {
-#ifndef NDBEBUG
+#ifndef NDEBUG
     uint64_t Pos = tell();
     // /dev/null always reports a pos of 0, so we cannot perform this check
     // in that case.
diff --git a/include/llvm/Support/raw_sha1_ostream.h b/include/llvm/Support/raw_sha1_ostream.h
index bd55d98b7c1d..3991691796b5 100644
--- a/include/llvm/Support/raw_sha1_ostream.h
+++ b/include/llvm/Support/raw_sha1_ostream.h
@@ -1,9 +1,8 @@
 //==- raw_sha1_ostream.h - raw_ostream that compute SHA1        --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/thread.h b/include/llvm/Support/thread.h
index 787a513d6017..084ed16166fe 100644
--- a/include/llvm/Support/thread.h
+++ b/include/llvm/Support/thread.h
@@ -1,9 +1,8 @@
 //===-- llvm/Support/thread.h - Wrapper for <thread> ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Support/type_traits.h b/include/llvm/Support/type_traits.h
index e7b8f2517b8a..c8c6a76a90f1 100644
--- a/include/llvm/Support/type_traits.h
+++ b/include/llvm/Support/type_traits.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/type_traits.h - Simplfied type traits -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -25,35 +24,6 @@
 
 namespace llvm {
 
-/// isPodLike - This is a type trait that is used to determine whether a given
-/// type can be copied around with memcpy instead of running ctors etc.
-template <typename T>
-struct isPodLike {
-  // std::is_trivially_copyable is available in libc++ with clang, libstdc++
-  // that comes with GCC 5.  MSVC 2015 and newer also have
-  // std::is_trivially_copyable.
-#if (__has_feature(is_trivially_copyable) && defined(_LIBCPP_VERSION)) ||      \
-    (defined(__GNUC__) && __GNUC__ >= 5) || defined(_MSC_VER)
-  // If the compiler supports the is_trivially_copyable trait use it, as it
-  // matches the definition of isPodLike closely.
-  static const bool value = std::is_trivially_copyable<T>::value;
-#elif __has_feature(is_trivially_copyable)
-  // Use the internal name if the compiler supports is_trivially_copyable but we
-  // don't know if the standard library does. This is the case for clang in
-  // conjunction with libstdc++ from GCC 4.x.
-  static const bool value = __is_trivially_copyable(T);
-#else
-  // If we don't know anything else, we can (at least) assume that all non-class
-  // types are PODs.
-  static const bool value = !std::is_class<T>::value;
-#endif
-};
-
-// std::pair's are pod-like if their elements are.
-template<typename T, typename U>
-struct isPodLike<std::pair<T, U>> {
-  static const bool value = isPodLike<T>::value && isPodLike<U>::value;
-};
 
 /// Metafunction that determines whether the given type is either an
 /// integral type or an enumeration type, including enum classes.
@@ -120,6 +90,12 @@ template<typename T> union move_construction_triviality_helper {
     move_construction_triviality_helper(move_construction_triviality_helper&&) = default;
     ~move_construction_triviality_helper() = default;
 };
+
+template<class T>
+union trivial_helper {
+    T t;
+};
+
 } // end namespace detail
 
 /// An implementation of `std::is_trivially_copy_constructible` since we have
@@ -144,6 +120,78 @@ struct is_trivially_move_constructible<T &> : std::true_type {};
 template <typename T>
 struct is_trivially_move_constructible<T &&> : std::true_type {};
 
+
+template <typename T>
+struct is_copy_assignable {
+  template<class F>
+    static auto get(F*) -> decltype(std::declval<F &>() = std::declval<const F &>(), std::true_type{});
+    static std::false_type get(...);
+    static constexpr bool value = decltype(get((T*)nullptr))::value;
+};
+
+template <typename T>
+struct is_move_assignable {
+  template<class F>
+    static auto get(F*) -> decltype(std::declval<F &>() = std::declval<F &&>(), std::true_type{});
+    static std::false_type get(...);
+    static constexpr bool value = decltype(get((T*)nullptr))::value;
+};
+
+
+// An implementation of `std::is_trivially_copyable` since STL version
+// is not equally supported by all compilers, especially GCC 4.9.
+// Uniform implementation of this trait is important for ABI compatibility
+// as it has an impact on SmallVector's ABI (among others).
+template <typename T>
+class is_trivially_copyable {
+
+  // copy constructors
+  static constexpr bool has_trivial_copy_constructor =
+      std::is_copy_constructible<detail::trivial_helper<T>>::value;
+  static constexpr bool has_deleted_copy_constructor =
+      !std::is_copy_constructible<T>::value;
+
+  // move constructors
+  static constexpr bool has_trivial_move_constructor =
+      std::is_move_constructible<detail::trivial_helper<T>>::value;
+  static constexpr bool has_deleted_move_constructor =
+      !std::is_move_constructible<T>::value;
+
+  // copy assign
+  static constexpr bool has_trivial_copy_assign =
+      is_copy_assignable<detail::trivial_helper<T>>::value;
+  static constexpr bool has_deleted_copy_assign =
+      !is_copy_assignable<T>::value;
+
+  // move assign
+  static constexpr bool has_trivial_move_assign =
+      is_move_assignable<detail::trivial_helper<T>>::value;
+  static constexpr bool has_deleted_move_assign =
+      !is_move_assignable<T>::value;
+
+  // destructor
+  static constexpr bool has_trivial_destructor =
+      std::is_destructible<detail::trivial_helper<T>>::value;
+
+  public:
+
+  static constexpr bool value =
+      has_trivial_destructor &&
+      (has_deleted_move_assign || has_trivial_move_assign) &&
+      (has_deleted_move_constructor || has_trivial_move_constructor) &&
+      (has_deleted_copy_assign || has_trivial_copy_assign) &&
+      (has_deleted_copy_constructor || has_trivial_copy_constructor);
+
+#ifdef HAVE_STD_IS_TRIVIALLY_COPYABLE
+  static_assert(value == std::is_trivially_copyable<T>::value,
+                "inconsistent behavior between llvm:: and std:: implementation of is_trivially_copyable");
+#endif
+};
+template <typename T>
+class is_trivially_copyable<T*> : public std::true_type {
+};
+
+
 } // end namespace llvm
 
 // If the compiler supports detecting whether a class is final, define
diff --git a/include/llvm/TableGen/Error.h b/include/llvm/TableGen/Error.h
index de4d3bf54782..7c83b6298620 100644
--- a/include/llvm/TableGen/Error.h
+++ b/include/llvm/TableGen/Error.h
@@ -1,9 +1,8 @@
 //===- llvm/TableGen/Error.h - tblgen error handling helpers ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/TableGen/Main.h b/include/llvm/TableGen/Main.h
index 670572dc8103..e464cd4d4fb5 100644
--- a/include/llvm/TableGen/Main.h
+++ b/include/llvm/TableGen/Main.h
@@ -1,9 +1,8 @@
 //===- llvm/TableGen/Main.h - tblgen entry point ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h
index e022bc82b4e4..bf7f02208c28 100644
--- a/include/llvm/TableGen/Record.h
+++ b/include/llvm/TableGen/Record.h
@@ -1,9 +1,8 @@
 //===- llvm/TableGen/Record.h - Classes for Table Records -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -316,6 +315,7 @@ protected:
     IK_TernOpInit,
     IK_UnOpInit,
     IK_LastOpInit,
+    IK_CondOpInit,
     IK_FoldOpInit,
     IK_IsAOpInit,
     IK_StringInit,
@@ -623,10 +623,11 @@ public:
 
 class CodeInit : public TypedInit {
   StringRef Value;
+  SMLoc Loc;
 
-  explicit CodeInit(StringRef V)
+  explicit CodeInit(StringRef V, const SMLoc &Loc)
       : TypedInit(IK_CodeInit, static_cast<RecTy *>(CodeRecTy::get())),
-        Value(V) {}
+        Value(V), Loc(Loc) {}
 
 public:
   CodeInit(const StringInit &) = delete;
@@ -636,9 +637,10 @@ public:
     return I->getKind() == IK_CodeInit;
   }
 
-  static CodeInit *get(StringRef);
+  static CodeInit *get(StringRef, const SMLoc &Loc);
 
   StringRef getValue() const { return Value; }
+  const SMLoc &getLoc() const { return Loc; }
 
   Init *convertInitializerTo(RecTy *Ty) const override;
 
@@ -798,8 +800,9 @@ public:
 /// !op (X, Y) - Combine two inits.
 class BinOpInit : public OpInit, public FoldingSetNode {
 public:
-  enum BinaryOp : uint8_t { ADD, AND, OR, SHL, SRA, SRL, LISTCONCAT,
-                            STRCONCAT, CONCAT, EQ, NE, LE, LT, GE, GT };
+  enum BinaryOp : uint8_t { ADD, MUL, AND, OR, SHL, SRA, SRL, LISTCONCAT,
+                            LISTSPLAT, STRCONCAT, CONCAT, EQ, NE, LE, LT, GE,
+                            GT };
 
 private:
   Init *LHS, *RHS;
@@ -818,6 +821,8 @@ public:
   static BinOpInit *get(BinaryOp opc, Init *lhs, Init *rhs,
                         RecTy *Type);
   static Init *getStrConcat(Init *lhs, Init *rhs);
+  static Init *getListConcat(TypedInit *lhs, Init *rhs);
+  static Init *getListSplat(TypedInit *lhs, Init *rhs);
 
   void Profile(FoldingSetNodeID &ID) const;
 
@@ -912,6 +917,83 @@ public:
   std::string getAsString() const override;
 };
 
+/// !cond(condition_1: value1, ... , condition_n: value)
+/// Selects the first value for which condition is true.
+/// Otherwise reports an error.
+class CondOpInit final : public TypedInit, public FoldingSetNode,
+                      public TrailingObjects<CondOpInit, Init *> {
+  unsigned NumConds;
+  RecTy *ValType;
+
+  CondOpInit(unsigned NC, RecTy *Type)
+    : TypedInit(IK_CondOpInit, Type),
+      NumConds(NC), ValType(Type) {}
+
+  size_t numTrailingObjects(OverloadToken<Init *>) const {
+    return 2*NumConds;
+  }
+
+public:
+  CondOpInit(const CondOpInit &) = delete;
+  CondOpInit &operator=(const CondOpInit &) = delete;
+
+  static bool classof(const Init *I) {
+    return I->getKind() == IK_CondOpInit;
+  }
+
+  static CondOpInit *get(ArrayRef<Init*> C, ArrayRef<Init*> V,
+                        RecTy *Type);
+
+  void Profile(FoldingSetNodeID &ID) const;
+
+  RecTy *getValType() const { return ValType; }
+
+  unsigned getNumConds() const { return NumConds; }
+
+  Init *getCond(unsigned Num) const {
+    assert(Num < NumConds && "Condition number out of range!");
+    return getTrailingObjects<Init *>()[Num];
+  }
+
+  Init *getVal(unsigned Num) const {
+    assert(Num < NumConds && "Val number out of range!");
+    return getTrailingObjects<Init *>()[Num+NumConds];
+  }
+
+  ArrayRef<Init *> getConds() const {
+    return makeArrayRef(getTrailingObjects<Init *>(), NumConds);
+  }
+
+  ArrayRef<Init *> getVals() const {
+    return makeArrayRef(getTrailingObjects<Init *>()+NumConds, NumConds);
+  }
+
+  Init *Fold(Record *CurRec) const;
+
+  Init *resolveReferences(Resolver &R) const override;
+
+  bool isConcrete() const override;
+  bool isComplete() const override;
+  std::string getAsString() const override;
+
+  using const_case_iterator = SmallVectorImpl<Init*>::const_iterator;
+  using const_val_iterator = SmallVectorImpl<Init*>::const_iterator;
+
+  inline const_case_iterator  arg_begin() const { return getConds().begin(); }
+  inline const_case_iterator  arg_end  () const { return getConds().end(); }
+
+  inline size_t              case_size () const { return NumConds; }
+  inline bool                case_empty() const { return NumConds == 0; }
+
+  inline const_val_iterator name_begin() const { return getVals().begin();}
+  inline const_val_iterator name_end  () const { return getVals().end(); }
+
+  inline size_t              val_size () const { return NumConds; }
+  inline bool                val_empty() const { return NumConds == 0; }
+
+  Init *getBit(unsigned Bit) const override;
+};
+
 /// !foldl (a, b, expr, start, lst) - Fold over a list.
 class FoldOpInit : public TypedInit, public FoldingSetNode {
 private:
diff --git a/include/llvm/TableGen/SearchableTable.td b/include/llvm/TableGen/SearchableTable.td
index 1089d363eb6f..2680c71218ea 100644
--- a/include/llvm/TableGen/SearchableTable.td
+++ b/include/llvm/TableGen/SearchableTable.td
@@ -1,9 +1,8 @@
 //===- SearchableTable.td ----------------------------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/TableGen/SetTheory.h b/include/llvm/TableGen/SetTheory.h
index 4b32f9e3da8f..35156424b0d3 100644
--- a/include/llvm/TableGen/SetTheory.h
+++ b/include/llvm/TableGen/SetTheory.h
@@ -1,9 +1,8 @@
 //===- SetTheory.h - Generate ordered sets from DAG expressions -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/TableGen/StringMatcher.h b/include/llvm/TableGen/StringMatcher.h
index 3aa3540d616d..795b7a6d41dc 100644
--- a/include/llvm/TableGen/StringMatcher.h
+++ b/include/llvm/TableGen/StringMatcher.h
@@ -1,9 +1,8 @@
 //===- StringMatcher.h - Generate a matcher for input strings ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/TableGen/StringToOffsetTable.h b/include/llvm/TableGen/StringToOffsetTable.h
index 4b11e889ea6c..76ce51893907 100644
--- a/include/llvm/TableGen/StringToOffsetTable.h
+++ b/include/llvm/TableGen/StringToOffsetTable.h
@@ -1,9 +1,8 @@
 //===- StringToOffsetTable.h - Emit a big concatenated string ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/TableGen/TableGenBackend.h b/include/llvm/TableGen/TableGenBackend.h
index d226f1f1af7b..a426e4217578 100644
--- a/include/llvm/TableGen/TableGenBackend.h
+++ b/include/llvm/TableGen/TableGenBackend.h
@@ -1,9 +1,8 @@
 //===- llvm/TableGen/TableGenBackend.h - Backend utilities ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -23,6 +22,8 @@ class raw_ostream;
 /// raw_ostream.
 void emitSourceFileHeader(StringRef Desc, raw_ostream &OS);
 
+extern bool TimeRegions;
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Target/CodeGenCWrappers.h b/include/llvm/Target/CodeGenCWrappers.h
index 3ad77c5d5e00..a99546357053 100644
--- a/include/llvm/Target/CodeGenCWrappers.h
+++ b/include/llvm/Target/CodeGenCWrappers.h
@@ -1,9 +1,8 @@
 //===- llvm/Target/CodeGenCWrappers.h - CodeGen C Wrappers ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Target/GenericOpcodes.td b/include/llvm/Target/GenericOpcodes.td
index 045fe2520047..45718327b4a7 100644
--- a/include/llvm/Target/GenericOpcodes.td
+++ b/include/llvm/Target/GenericOpcodes.td
@@ -1,9 +1,8 @@
 //===-- GenericOpcodes.td - Opcodes used with GlobalISel ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -93,12 +92,14 @@ def G_BITCAST : GenericInstruction {
   let hasSideEffects = 0;
 }
 
+// Only supports scalar result types
 def G_CONSTANT : GenericInstruction {
   let OutOperandList = (outs type0:$dst);
   let InOperandList = (ins unknown:$imm);
   let hasSideEffects = 0;
 }
 
+// Only supports scalar result types
 def G_FCONSTANT : GenericInstruction {
   let OutOperandList = (outs type0:$dst);
   let InOperandList = (ins unknown:$imm);
@@ -122,31 +123,31 @@ def G_VAARG : GenericInstruction {
 
 def G_CTLZ : GenericInstruction {
   let OutOperandList = (outs type0:$dst);
-  let InOperandList = (ins type0:$src);
+  let InOperandList = (ins type1:$src);
   let hasSideEffects = 0;
 }
 
 def G_CTLZ_ZERO_UNDEF : GenericInstruction {
   let OutOperandList = (outs type0:$dst);
-  let InOperandList = (ins type0:$src);
+  let InOperandList = (ins type1:$src);
   let hasSideEffects = 0;
 }
 
 def G_CTTZ : GenericInstruction {
   let OutOperandList = (outs type0:$dst);
-  let InOperandList = (ins type0:$src);
+  let InOperandList = (ins type1:$src);
   let hasSideEffects = 0;
 }
 
 def G_CTTZ_ZERO_UNDEF : GenericInstruction {
   let OutOperandList = (outs type0:$dst);
-  let InOperandList = (ins type0:$src);
+  let InOperandList = (ins type1:$src);
   let hasSideEffects = 0;
 }
 
 def G_CTPOP : GenericInstruction {
   let OutOperandList = (outs type0:$dst);
-  let InOperandList = (ins type0:$src);
+  let InOperandList = (ins type1:$src);
   let hasSideEffects = 0;
 }
 
@@ -168,6 +169,12 @@ def G_BLOCK_ADDR : GenericInstruction {
   let hasSideEffects = 0;
 }
 
+def G_JUMP_TABLE : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins unknown:$jti);
+  let hasSideEffects = 0;
+}
+
 //------------------------------------------------------------------------------
 // Binary ops.
 //------------------------------------------------------------------------------
@@ -255,21 +262,21 @@ def G_XOR : GenericInstruction {
 // Generic left-shift.
 def G_SHL : GenericInstruction {
   let OutOperandList = (outs type0:$dst);
-  let InOperandList = (ins type0:$src1, type0:$src2);
+  let InOperandList = (ins type0:$src1, type1:$src2);
   let hasSideEffects = 0;
 }
 
 // Generic logical right-shift.
 def G_LSHR : GenericInstruction {
   let OutOperandList = (outs type0:$dst);
-  let InOperandList = (ins type0:$src1, type0:$src2);
+  let InOperandList = (ins type0:$src1, type1:$src2);
   let hasSideEffects = 0;
 }
 
 // Generic arithmetic right-shift.
 def G_ASHR : GenericInstruction {
   let OutOperandList = (outs type0:$dst);
-  let InOperandList = (ins type0:$src1, type0:$src2);
+  let InOperandList = (ins type0:$src1, type1:$src2);
   let hasSideEffects = 0;
 }
 
@@ -307,6 +314,38 @@ def G_PTR_MASK : GenericInstruction {
   let hasSideEffects = 0;
 }
 
+// Generic signed integer minimum.
+def G_SMIN : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
+// Generic signed integer maximum.
+def G_SMAX : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
+// Generic unsigned integer minimum.
+def G_UMIN : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
+// Generic unsigned integer maximum.
+def G_UMAX : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
 //------------------------------------------------------------------------------
 // Overflow ops
 //------------------------------------------------------------------------------
@@ -454,6 +493,74 @@ def G_FABS : GenericInstruction {
   let hasSideEffects = 0;
 }
 
+def G_FCOPYSIGN : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src0, type1:$src1);
+  let hasSideEffects = 0;
+}
+
+def G_FCANONICALIZE : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src);
+  let hasSideEffects = 0;
+}
+
+// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two
+// values.
+//
+// In the case where a single input is a NaN (either signaling or quiet),
+// the non-NaN input is returned.
+//
+// The return value of (FMINNUM 0.0, -0.0) could be either 0.0 or -0.0.
+def G_FMINNUM : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
+def G_FMAXNUM : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
+// FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on
+// two values, following the IEEE-754 2008 definition. This differs from
+// FMINNUM/FMAXNUM in the handling of signaling NaNs. If one input is a
+// signaling NaN, returns a quiet NaN.
+def G_FMINNUM_IEEE : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
+def G_FMAXNUM_IEEE : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
+// FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0
+// as less than 0.0. While FMINNUM_IEEE/FMAXNUM_IEEE follow IEEE 754-2008
+// semantics, FMINIMUM/FMAXIMUM follow IEEE 754-2018 draft semantics.
+def G_FMINIMUM : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
+def G_FMAXIMUM : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
 //------------------------------------------------------------------------------
 // Floating Point Binary ops.
 //------------------------------------------------------------------------------
@@ -554,6 +661,51 @@ def G_FCEIL : GenericInstruction {
   let hasSideEffects = 0;
 }
 
+// Floating point cosine of a value.
+def G_FCOS : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1);
+  let hasSideEffects = 0;
+}
+
+// Floating point sine of a value.
+def G_FSIN : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1);
+  let hasSideEffects = 0;
+}
+
+// Floating point square root of a value.
+// This returns NaN for negative nonzero values.
+// NOTE: Unlike libm sqrt(), this never sets errno. In all other respects it's
+// libm-conformant.
+def G_FSQRT : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1);
+  let hasSideEffects = 0;
+}
+
+// Floating point floor of a value.
+def G_FFLOOR : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1);
+  let hasSideEffects = 0;
+}
+
+// Floating point round to next integer.
+def G_FRINT : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1);
+  let hasSideEffects = 0;
+}
+
+// Floating point round to the nearest integer.
+def G_FNEARBYINT : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1);
+  let hasSideEffects = 0;
+}
+
 //------------------------------------------------------------------------------
 // Opcodes for LLVM Intrinsics
 //------------------------------------------------------------------------------
@@ -647,6 +799,12 @@ def G_ATOMICRMW_MIN : G_ATOMICRMW_OP;
 def G_ATOMICRMW_UMAX : G_ATOMICRMW_OP;
 def G_ATOMICRMW_UMIN : G_ATOMICRMW_OP;
 
+def G_FENCE : GenericInstruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins i32imm:$ordering, i32imm:$scope);
+  let hasSideEffects = 1;
+}
+
 //------------------------------------------------------------------------------
 // Variadic ops
 //------------------------------------------------------------------------------
@@ -689,7 +847,9 @@ def G_MERGE_VALUES : GenericInstruction {
   let hasSideEffects = 0;
 }
 
-/// Create a vector from multiple scalar registers.
+/// Create a vector from multiple scalar registers. No implicit
+/// conversion is performed (i.e. the result element type must be the
+/// same as all source operands)
 def G_BUILD_VECTOR : GenericInstruction {
   let OutOperandList = (outs type0:$dst);
   let InOperandList = (ins type1:$src0, variable_ops);
@@ -759,6 +919,15 @@ def G_BRINDIRECT : GenericInstruction {
   let isTerminator = 1;
 }
 
+// Generic branch to jump table entry
+def G_BRJT : GenericInstruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins ptype0:$tbl, unknown:$jti, type1:$idx);
+  let hasSideEffects = 0;
+  let isBranch = 1;
+  let isTerminator = 1;
+}
+
 //------------------------------------------------------------------------------
 // Vector ops
 //------------------------------------------------------------------------------
diff --git a/include/llvm/Target/GlobalISel/RegisterBank.td b/include/llvm/Target/GlobalISel/RegisterBank.td
index 4dfd139e9fb6..51578b66b160 100644
--- a/include/llvm/Target/GlobalISel/RegisterBank.td
+++ b/include/llvm/Target/GlobalISel/RegisterBank.td
@@ -1,9 +1,8 @@
 //===- RegisterBank.td - Register bank definitions ---------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 31d26361260d..6cc58d6521da 100644
--- a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -1,9 +1,8 @@
 //===- TargetGlobalISel.td - Common code for GlobalISel ----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -50,6 +49,8 @@ def : GINodeEquiv<G_FCONSTANT, fpimm>;
 def : GINodeEquiv<G_ADD, add>;
 def : GINodeEquiv<G_SUB, sub>;
 def : GINodeEquiv<G_MUL, mul>;
+def : GINodeEquiv<G_UMULH, mulhu>;
+def : GINodeEquiv<G_SMULH, mulhs>;
 def : GINodeEquiv<G_SDIV, sdiv>;
 def : GINodeEquiv<G_UDIV, udiv>;
 def : GINodeEquiv<G_SREM, srem>;
@@ -77,6 +78,7 @@ def : GINodeEquiv<G_FREM, frem>;
 def : GINodeEquiv<G_FPOW, fpow>;
 def : GINodeEquiv<G_FEXP2, fexp2>;
 def : GINodeEquiv<G_FLOG2, flog2>;
+def : GINodeEquiv<G_FCANONICALIZE, fcanonicalize>;
 def : GINodeEquiv<G_INTRINSIC, intrinsic_wo_chain>;
 // ISD::INTRINSIC_VOID can also be handled with G_INTRINSIC_W_SIDE_EFFECTS.
 def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_void>;
@@ -89,7 +91,19 @@ def : GINodeEquiv<G_CTLZ_ZERO_UNDEF, ctlz_zero_undef>;
 def : GINodeEquiv<G_CTTZ_ZERO_UNDEF, cttz_zero_undef>;
 def : GINodeEquiv<G_CTPOP, ctpop>;
 def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
+def : GINodeEquiv<G_CONCAT_VECTORS, concat_vectors>;
 def : GINodeEquiv<G_FCEIL, fceil>;
+def : GINodeEquiv<G_FCOS, fcos>;
+def : GINodeEquiv<G_FSIN, fsin>;
+def : GINodeEquiv<G_FABS, fabs>;
+def : GINodeEquiv<G_FSQRT, fsqrt>;
+def : GINodeEquiv<G_FFLOOR, ffloor>;
+def : GINodeEquiv<G_FRINT, frint>;
+def : GINodeEquiv<G_FNEARBYINT, fnearbyint>;
+def : GINodeEquiv<G_SMIN, smin>;
+def : GINodeEquiv<G_SMAX, smax>;
+def : GINodeEquiv<G_UMIN, umin>;
+def : GINodeEquiv<G_UMAX, umax>;
 
 // Broadly speaking G_LOAD is equivalent to ISD::LOAD but there are some
 // complications that tablegen must take care of. For example, Predicates such
@@ -124,6 +138,7 @@ def : GINodeEquiv<G_ATOMICRMW_MIN, atomic_load_min>;
 def : GINodeEquiv<G_ATOMICRMW_MAX, atomic_load_max>;
 def : GINodeEquiv<G_ATOMICRMW_UMIN, atomic_load_umin>;
 def : GINodeEquiv<G_ATOMICRMW_UMAX, atomic_load_umax>;
+def : GINodeEquiv<G_FENCE, atomic_fence>;
 
 // Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern.
 // Should be used on defs that subclass GIComplexOperandMatcher<>.
diff --git a/include/llvm/Target/GlobalISel/Target.td b/include/llvm/Target/GlobalISel/Target.td
index 6740f404a9d3..538ca65e1162 100644
--- a/include/llvm/Target/GlobalISel/Target.td
+++ b/include/llvm/Target/GlobalISel/Target.td
@@ -1,9 +1,8 @@
 //===- Target.td - Define GlobalISel rules -----------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index e4b827babb92..d58662e128e0 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -1,9 +1,8 @@
 //===- Target.td - Target Independent TableGen interface ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -122,6 +121,10 @@ class ComposedSubRegIndex<SubRegIndex A, SubRegIndex B>
 // this register class when printing.
 class RegAltNameIndex {
   string Namespace = "";
+
+  // A set to be used if the name for a register is not defined in this set.
+  // This allows creating name sets with only a few alternative names.
+  RegAltNameIndex FallbackRegAltNameIndex = ?;
 }
 def NoRegAltName : RegAltNameIndex;
 
@@ -395,11 +398,49 @@ include "llvm/Target/TargetSchedule.td"
 
 class Predicate; // Forward def
 
+class InstructionEncoding {
+  // Size of encoded instruction.
+  int Size;
+
+  // The "namespace" in which this instruction exists, on targets like ARM
+  // which multiple ISA namespaces exist.
+  string DecoderNamespace = "";
+
+  // List of predicates which will be turned into isel matching code.
+  list<Predicate> Predicates = [];
+
+  string DecoderMethod = "";
+
+  // Is the instruction decoder method able to completely determine if the
+  // given instruction is valid or not. If the TableGen definition of the
+  // instruction specifies bitpattern A??B where A and B are static bits, the
+  // hasCompleteDecoder flag says whether the decoder method fully handles the
+  // ?? space, i.e. if it is a final arbiter for the instruction validity.
+  // If not then the decoder attempts to continue decoding when the decoder
+  // method fails.
+  //
+  // This allows to handle situations where the encoding is not fully
+  // orthogonal. Example:
+  // * InstA with bitpattern 0b0000????,
+  // * InstB with bitpattern 0b000000?? but the associated decoder method
+  //   DecodeInstB() returns Fail when ?? is 0b00 or 0b11.
+  //
+  // The decoder tries to decode a bitpattern that matches both InstA and
+  // InstB bitpatterns first as InstB (because it is the most specific
+  // encoding). In the default case (hasCompleteDecoder = 1), when
+  // DecodeInstB() returns Fail the bitpattern gets rejected. By setting
+  // hasCompleteDecoder = 0 in InstB, the decoder is informed that
+  // DecodeInstB() is not able to determine if all possible values of ?? are
+  // valid or not. If DecodeInstB() returns Fail the decoder will attempt to
+  // decode the bitpattern as InstA too.
+  bit hasCompleteDecoder = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // Instruction set description - These classes correspond to the C++ classes in
 // the Target/TargetInstrInfo.h file.
 //
-class Instruction {
+class Instruction : InstructionEncoding {
   string Namespace = "";
 
   dag OutOperandList;       // An dag containing the MI def operand list.
@@ -424,10 +465,6 @@ class Instruction {
   // from the opcode.
   int Size = 0;
 
-  // DecoderNamespace - The "namespace" in which this instruction exists, on
-  // targets like ARM which multiple ISA namespaces exist.
-  string DecoderNamespace = "";
-
   // Code size, for instruction selection.
   // FIXME: What does this actually mean?
   int CodeSize = 0;
@@ -453,11 +490,16 @@ class Instruction {
   bit canFoldAsLoad = 0;    // Can this be folded as a simple memory operand?
   bit mayLoad      = ?;     // Is it possible for this inst to read memory?
   bit mayStore     = ?;     // Is it possible for this inst to write memory?
+  bit mayRaiseFPException = 0; // Can this raise a floating-point exception?
   bit isConvertibleToThreeAddress = 0;  // Can this 2-addr instruction promote?
   bit isCommutable = 0;     // Is this 3 operand instruction commutable?
   bit isTerminator = 0;     // Is this part of the terminator for a basic block?
   bit isReMaterializable = 0; // Is this instruction re-materializable?
-  bit isPredicable = 0;     // Is this instruction predicable?
+  bit isPredicable = 0;     // 1 means this instruction is predicable
+                            // even if it does not have any operand
+                            // tablegen can identify as a predicate
+  bit isUnpredicable = 0;   // 1 means this instruction is not predicable
+                            // even if it _does_ have a predicate operand
   bit hasDelaySlot = 0;     // Does this instruction have an delay slot?
   bit usesCustomInserter = 0; // Pseudo instr needing special help.
   bit hasPostISelHook = 0;  // To be *adjusted* after isel by target hook.
@@ -524,31 +566,6 @@ class Instruction {
   string DisableEncoding = "";
 
   string PostEncoderMethod = "";
-  string DecoderMethod = "";
-
-  // Is the instruction decoder method able to completely determine if the
-  // given instruction is valid or not. If the TableGen definition of the
-  // instruction specifies bitpattern A??B where A and B are static bits, the
-  // hasCompleteDecoder flag says whether the decoder method fully handles the
-  // ?? space, i.e. if it is a final arbiter for the instruction validity.
-  // If not then the decoder attempts to continue decoding when the decoder
-  // method fails.
-  //
-  // This allows to handle situations where the encoding is not fully
-  // orthogonal. Example:
-  // * InstA with bitpattern 0b0000????,
-  // * InstB with bitpattern 0b000000?? but the associated decoder method
-  //   DecodeInstB() returns Fail when ?? is 0b00 or 0b11.
-  //
-  // The decoder tries to decode a bitpattern that matches both InstA and
-  // InstB bitpatterns first as InstB (because it is the most specific
-  // encoding). In the default case (hasCompleteDecoder = 1), when
-  // DecodeInstB() returns Fail the bitpattern gets rejected. By setting
-  // hasCompleteDecoder = 0 in InstB, the decoder is informed that
-  // DecodeInstB() is not able to determine if all possible values of ?? are
-  // valid or not. If DecodeInstB() returns Fail the decoder will attempt to
-  // decode the bitpattern as InstA too.
-  bit hasCompleteDecoder = 1;
 
   /// Target-specific flags. This becomes the TSFlags field in TargetInstrDesc.
   bits<64> TSFlags = 0;
@@ -585,6 +602,13 @@ class Instruction {
   bit FastISelShouldIgnore = 0;
 }
 
+/// Defines an additional encoding that disassembles to the given instruction
+/// Like Instruction, the Inst and SoftFail fields are omitted to allow targets
+// to specify their size.
+class AdditionalEncoding<Instruction I> : InstructionEncoding {
+  Instruction AliasOf = I;
+}
+
 /// PseudoInstExpansion - Expansion information for a pseudo-instruction.
 /// Which instruction it expands to and how the operands map from the
 /// pseudo.
@@ -909,7 +933,7 @@ class InstrInfo {
 }
 
 // Standard Pseudo Instructions.
-// This list must match TargetOpcodes.h and CodeGenTarget.cpp.
+// This list must match TargetOpcodes.def.
 // Only these instructions are allowed in the TargetOpcode namespace.
 // Ensure mayLoad and mayStore have a default value, so as not to break
 // targets that set guessInstructionProperties=0. Any local definition of
@@ -934,6 +958,15 @@ def INLINEASM : StandardPseudoInstruction {
   let AsmString = "";
   let hasSideEffects = 0;  // Note side effect is encoded in an operand.
 }
+def INLINEASM_BR : StandardPseudoInstruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins variable_ops);
+  let AsmString = "";
+  let hasSideEffects = 0;  // Note side effect is encoded in an operand.
+  let isTerminator = 1;
+  let isBranch = 1;
+  let isIndirectBranch = 1;
+}
 def CFI_INSTRUCTION : StandardPseudoInstruction {
   let OutOperandList = (outs);
   let InOperandList = (ins i32imm:$id);
@@ -1037,7 +1070,7 @@ def BUNDLE : StandardPseudoInstruction {
   let OutOperandList = (outs);
   let InOperandList = (ins variable_ops);
   let AsmString = "BUNDLE";
-  let hasSideEffects = 1;
+  let hasSideEffects = 0;
 }
 def LIFETIME_START : StandardPseudoInstruction {
   let OutOperandList = (outs);
@@ -1174,7 +1207,7 @@ def FENTRY_CALL : StandardPseudoInstruction {
   let hasSideEffects = 1;
 }
 def ICALL_BRANCH_FUNNEL : StandardPseudoInstruction {
-  let OutOperandList = (outs unknown:$dst);
+  let OutOperandList = (outs);
   let InOperandList = (ins variable_ops);
   let AsmString = "";
   let hasSideEffects = 1;
diff --git a/include/llvm/Target/TargetCallingConv.td b/include/llvm/Target/TargetCallingConv.td
index 95d2b4226294..1bc03cf8a49d 100644
--- a/include/llvm/Target/TargetCallingConv.td
+++ b/include/llvm/Target/TargetCallingConv.td
@@ -1,9 +1,8 @@
 //===- TargetCallingConv.td - Target Calling Conventions ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -83,6 +82,15 @@ class CCIfVarArg<CCAction A> : CCIf<"State.isVarArg()", A> {}
 /// CCIfNotVarArg - If the current function is not vararg - apply the action
 class CCIfNotVarArg<CCAction A> : CCIf<"!State.isVarArg()", A> {}
 
+/// CCIfPtrAddrSpace - If the top-level parent of the current argument has
+/// pointer type in the specified address-space.
+class CCIfPtrAddrSpace<int AS, CCAction A>
+    : CCIf<"(ArgFlags.isPointer() && ArgFlags.getPointerAddrSpace() == " # AS # ")", A> {}
+
+/// CCIfPtr - If the top-level parent of the current argument had
+/// pointer type in some address-space.
+class CCIfPtr<CCAction A> : CCIf<"ArgFlags.isPointer()", A> {}
+
 /// CCAssignToReg - This action matches if there is a register in the specified
 /// list that is still available.  If so, it assigns the value to the first
 /// available register and succeeds.
@@ -160,6 +168,11 @@ class CCDelegateTo<CallingConv cc> : CCAction {
 /// that the target supports.
 class CallingConv<list<CCAction> actions> {
   list<CCAction> Actions = actions;
+
+  /// If true, this calling convention will be emitted as externally visible in
+  /// the llvm namespaces instead of as a static function.
+  bit Entry = 0;
+
   bit Custom = 0;
 }
 
diff --git a/include/llvm/Target/TargetInstrPredicate.td b/include/llvm/Target/TargetInstrPredicate.td
index 4b2c57b34c2e..5623461c648d 100644
--- a/include/llvm/Target/TargetInstrPredicate.td
+++ b/include/llvm/Target/TargetInstrPredicate.td
@@ -1,9 +1,8 @@
 //===- TargetInstrPredicate.td - ---------------------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Target/TargetIntrinsicInfo.h b/include/llvm/Target/TargetIntrinsicInfo.h
index 6a92bdee747e..ef571b15153e 100644
--- a/include/llvm/Target/TargetIntrinsicInfo.h
+++ b/include/llvm/Target/TargetIntrinsicInfo.h
@@ -1,9 +1,8 @@
 //===-- llvm/Target/TargetIntrinsicInfo.h - Instruction Info ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Target/TargetItinerary.td b/include/llvm/Target/TargetItinerary.td
index 182054d8444e..b68ed045520c 100644
--- a/include/llvm/Target/TargetItinerary.td
+++ b/include/llvm/Target/TargetItinerary.td
@@ -1,9 +1,8 @@
 //===- TargetItinerary.td - Target Itinierary Description --*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h
index e80f2bf82f26..3a2497bff11e 100644
--- a/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/include/llvm/Target/TargetLoweringObjectFile.h
@@ -1,9 +1,8 @@
 //===-- llvm/Target/TargetLoweringObjectFile.h - Object Info ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -52,6 +51,7 @@ protected:
   unsigned PersonalityEncoding = 0;
   unsigned LSDAEncoding = 0;
   unsigned TTypeEncoding = 0;
+  unsigned CallSiteEncoding = 0;
 
   /// This section contains the static constructor pointer list.
   MCSection *StaticCtorSection = nullptr;
@@ -80,6 +80,9 @@ public:
   /// Emit the module-level metadata that the platform cares about.
   virtual void emitModuleMetadata(MCStreamer &Streamer, Module &M) const {}
 
+  /// Get the module-level metadata that the platform cares about.
+  virtual void getModuleMetadata(Module &M) {}
+
   /// Given a constant with the SectionKind, return a section that it should be
   /// placed in.
   virtual MCSection *getSectionForConstant(const DataLayout &DL,
@@ -145,6 +148,7 @@ public:
   unsigned getPersonalityEncoding() const { return PersonalityEncoding; }
   unsigned getLSDAEncoding() const { return LSDAEncoding; }
   unsigned getTTypeEncoding() const { return TTypeEncoding; }
+  unsigned getCallSiteEncoding() const { return CallSiteEncoding; }
 
   const MCExpr *getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
                                   MCStreamer &Streamer) const;
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index 3eafcc25583a..cdf9f8bfd5ea 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -1,9 +1,8 @@
 //===-- llvm/Target/TargetMachine.h - Target Information --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -36,6 +35,9 @@ class MCSubtargetInfo;
 class MCSymbol;
 class raw_pwrite_stream;
 class PassManagerBuilder;
+struct PerFunctionMIParsingState;
+class SMDiagnostic;
+class SMRange;
 class Target;
 class TargetIntrinsicInfo;
 class TargetIRAnalysis;
@@ -50,6 +52,10 @@ class PassManagerBase;
 }
 using legacy::PassManagerBase;
 
+namespace yaml {
+struct MachineFunctionInfo;
+}
+
 //===----------------------------------------------------------------------===//
 ///
 /// Primary interface to the complete machine description for the target
@@ -115,6 +121,27 @@ public:
     return nullptr;
   }
 
+  /// Allocate and return a default initialized instance of the YAML
+  /// representation for the MachineFunctionInfo.
+  virtual yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const {
+    return nullptr;
+  }
+
+  /// Allocate and initialize an instance of the YAML representation of the
+  /// MachineFunctionInfo.
+  virtual yaml::MachineFunctionInfo *
+  convertFuncInfoToYAML(const MachineFunction &MF) const {
+    return nullptr;
+  }
+
+  /// Parse out the target's MachineFunctionInfo from the YAML reprsentation.
+  virtual bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &,
+                                        PerFunctionMIParsingState &PFS,
+                                        SMDiagnostic &Error,
+                                        SMRange &SourceRange) const {
+    return false;
+  }
+
   /// This method returns a pointer to the specified type of
   /// TargetSubtargetInfo.  In debug builds, it verifies that the object being
   /// returned is of the correct type.
@@ -363,9 +390,9 @@ inline CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM,
   if (CM) {
     // By default, targets do not support the tiny and kernel models.
     if (*CM == CodeModel::Tiny)
-      report_fatal_error("Target does not support the tiny CodeModel");
+      report_fatal_error("Target does not support the tiny CodeModel", false);
     if (*CM == CodeModel::Kernel)
-      report_fatal_error("Target does not support the kernel CodeModel");
+      report_fatal_error("Target does not support the kernel CodeModel", false);
     return *CM;
   }
   return Default;
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index b18101d92833..8cc2a6010879 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -1,9 +1,8 @@
 //===-- llvm/Target/TargetOptions.h - Target Options ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -119,7 +118,8 @@ namespace llvm {
           NoTrapAfterNoreturn(false), EmulatedTLS(false),
           ExplicitEmulatedTLS(false), EnableIPRA(false),
           EmitStackSizeSection(false), EnableMachineOutliner(false),
-          SupportsDefaultOutlining(false), EmitAddrsig(false) {}
+          SupportsDefaultOutlining(false), EmitAddrsig(false),
+          EnableDebugEntryValues(false) {}
 
     /// PrintMachineCode - This flag is enabled when the -print-machineinstrs
     /// option is specified on the command line, and should enable debugging
@@ -253,6 +253,9 @@ namespace llvm {
     /// Emit address-significance table.
     unsigned EmitAddrsig : 1;
 
+    /// Emit debug info about parameter's entry values.
+    unsigned EnableDebugEntryValues : 1;
+
     /// FloatABIType - This setting is set by -float-abi=xxx option is specfied
     /// on the command line. This setting may either be Default, Soft, or Hard.
     /// Default selects the target's default behavior. Soft selects the ABI for
diff --git a/include/llvm/Target/TargetPfmCounters.td b/include/llvm/Target/TargetPfmCounters.td
index dac150f03445..e1d5013c1291 100644
--- a/include/llvm/Target/TargetPfmCounters.td
+++ b/include/llvm/Target/TargetPfmCounters.td
@@ -1,9 +1,8 @@
 //===- TargetPfmCounters.td - Target Pfm Counters -*- tablegen ----------*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td
index 808e183f5a5f..a36d259df831 100644
--- a/include/llvm/Target/TargetSchedule.td
+++ b/include/llvm/Target/TargetSchedule.td
@@ -1,9 +1,8 @@
 //===- TargetSchedule.td - Target Independent Scheduling ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -136,7 +135,7 @@ class ProcResourceKind;
 //
 // An optional Super resource may be given to model these resources as
 // a subset of the more general super resources. Using one of these
-// resources implies using one of the super resoruces.
+// resources implies using one of the super resources.
 //
 // ProcResourceUnits normally model a few buffered resources within an
 // out-of-order engine. Buffered resources may be held for multiple
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index eb5a14bd21b8..b913a054ac2c 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -1,9 +1,8 @@
 //===- TargetSelectionDAG.td - Common code for DAG isels ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -125,7 +124,7 @@ def SDTIntSatNoShOp : SDTypeProfile<1, 2, [   // ssat with no shift
 def SDTIntBinHiLoOp : SDTypeProfile<2, 2, [ // mulhi, mullo, sdivrem, udivrem
   SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,SDTCisInt<0>
 ]>;
-def SDTIntScaledBinOp : SDTypeProfile<1, 3, [  // smulfix
+def SDTIntScaledBinOp : SDTypeProfile<1, 3, [  // smulfix, umulfix
   SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
 ]>;
 
@@ -265,6 +264,11 @@ def SDTAtomic3 : SDTypeProfile<1, 3, [
 def SDTAtomic2 : SDTypeProfile<1, 2, [
   SDTCisSameAs<0,2>, SDTCisInt<0>, SDTCisPtrTy<1>
 ]>;
+
+def SDTFPAtomic2 : SDTypeProfile<1, 2, [
+  SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1>
+]>;
+
 def SDTAtomicStore : SDTypeProfile<0, 2, [
   SDTCisPtrTy<0>, SDTCisInt<1>
 ]>;
@@ -385,7 +389,10 @@ def saddsat    : SDNode<"ISD::SADDSAT"   , SDTIntBinOp, [SDNPCommutative]>;
 def uaddsat    : SDNode<"ISD::UADDSAT"   , SDTIntBinOp, [SDNPCommutative]>;
 def ssubsat    : SDNode<"ISD::SSUBSAT"   , SDTIntBinOp>;
 def usubsat    : SDNode<"ISD::USUBSAT"   , SDTIntBinOp>;
+
 def smulfix    : SDNode<"ISD::SMULFIX"   , SDTIntScaledBinOp, [SDNPCommutative]>;
+def smulfixsat : SDNode<"ISD::SMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>;
+def umulfix    : SDNode<"ISD::UMULFIX"   , SDTIntScaledBinOp, [SDNPCommutative]>;
 
 def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
 def sext_invec : SDNode<"ISD::SIGN_EXTEND_VECTOR_INREG", SDTExtInvec>;
@@ -444,6 +451,11 @@ def ffloor     : SDNode<"ISD::FFLOOR"     , SDTFPUnaryOp>;
 def fnearbyint : SDNode<"ISD::FNEARBYINT" , SDTFPUnaryOp>;
 def fround     : SDNode<"ISD::FROUND"     , SDTFPUnaryOp>;
 
+def lround     : SDNode<"ISD::LROUND"     , SDTFPToIntOp>;
+def llround    : SDNode<"ISD::LLROUND"    , SDTFPToIntOp>;
+def lrint      : SDNode<"ISD::LRINT"      , SDTFPToIntOp>;
+def llrint     : SDNode<"ISD::LLRINT"     , SDTFPToIntOp>;
+
 def fpround    : SDNode<"ISD::FP_ROUND"   , SDTFPRoundOp>;
 def fpextend   : SDNode<"ISD::FP_EXTEND"  , SDTFPExtendOp>;
 def fcopysign  : SDNode<"ISD::FCOPYSIGN"  , SDTFPSignOp>;
@@ -455,6 +467,53 @@ def fp_to_uint : SDNode<"ISD::FP_TO_UINT" , SDTFPToIntOp>;
 def f16_to_fp  : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>;
 def fp_to_f16  : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>;
 
+def strict_fadd       : SDNode<"ISD::STRICT_FADD",
+                               SDTFPBinOp, [SDNPHasChain, SDNPCommutative]>;
+def strict_fsub       : SDNode<"ISD::STRICT_FSUB",
+                               SDTFPBinOp, [SDNPHasChain]>;
+def strict_fmul       : SDNode<"ISD::STRICT_FMUL",
+                               SDTFPBinOp, [SDNPHasChain, SDNPCommutative]>;
+def strict_fdiv       : SDNode<"ISD::STRICT_FDIV",
+                               SDTFPBinOp, [SDNPHasChain]>;
+def strict_frem       : SDNode<"ISD::STRICT_FREM",
+                               SDTFPBinOp, [SDNPHasChain]>;
+def strict_fma        : SDNode<"ISD::STRICT_FMA",
+                               SDTFPTernaryOp, [SDNPHasChain]>;
+def strict_fsqrt      : SDNode<"ISD::STRICT_FSQRT",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fsin       : SDNode<"ISD::STRICT_FSIN",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fcos       : SDNode<"ISD::STRICT_FCOS",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fexp2      : SDNode<"ISD::STRICT_FEXP2",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fpow       : SDNode<"ISD::STRICT_FPOW",
+                               SDTFPBinOp, [SDNPHasChain]>;
+def strict_flog2      : SDNode<"ISD::STRICT_FLOG2",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_frint      : SDNode<"ISD::STRICT_FRINT",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fnearbyint : SDNode<"ISD::STRICT_FNEARBYINT",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fceil      : SDNode<"ISD::STRICT_FCEIL",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_ffloor     : SDNode<"ISD::STRICT_FFLOOR",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fround     : SDNode<"ISD::STRICT_FROUND",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_ftrunc     : SDNode<"ISD::STRICT_FTRUNC",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fminnum    : SDNode<"ISD::STRICT_FMINNUM",
+                               SDTFPBinOp, [SDNPHasChain,
+                                            SDNPCommutative, SDNPAssociative]>;
+def strict_fmaxnum    : SDNode<"ISD::STRICT_FMAXNUM",
+                               SDTFPBinOp, [SDNPHasChain,
+                                            SDNPCommutative, SDNPAssociative]>;
+def strict_fpround    : SDNode<"ISD::STRICT_FP_ROUND",
+                               SDTFPRoundOp, [SDNPHasChain]>;
+def strict_fpextend   : SDNode<"ISD::STRICT_FP_EXTEND",
+                               SDTFPExtendOp, [SDNPHasChain]>;
+
 def setcc      : SDNode<"ISD::SETCC"      , SDTSetCC>;
 def select     : SDNode<"ISD::SELECT"     , SDTSelect>;
 def vselect    : SDNode<"ISD::VSELECT"    , SDTVSelect>;
@@ -511,14 +570,19 @@ def atomic_load_umin : SDNode<"ISD::ATOMIC_LOAD_UMIN", SDTAtomic2,
                     [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
 def atomic_load_umax : SDNode<"ISD::ATOMIC_LOAD_UMAX", SDTAtomic2,
                     [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_load_fadd : SDNode<"ISD::ATOMIC_LOAD_FADD" , SDTFPAtomic2,
+                    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_load_fsub : SDNode<"ISD::ATOMIC_LOAD_FSUB" , SDTFPAtomic2,
+                    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+
 def atomic_load      : SDNode<"ISD::ATOMIC_LOAD", SDTAtomicLoad,
                     [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 def atomic_store     : SDNode<"ISD::ATOMIC_STORE", SDTAtomicStore,
                     [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 
-def masked_store : SDNode<"ISD::MSTORE",  SDTMaskedStore,
+def masked_st    : SDNode<"ISD::MSTORE",  SDTMaskedStore,
                        [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-def masked_load  : SDNode<"ISD::MLOAD",  SDTMaskedLoad,
+def masked_ld    : SDNode<"ISD::MLOAD",  SDTMaskedLoad,
                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 
 // Do not use ld, st directly. Use load, extload, sextload, zextload, store,
@@ -673,6 +737,10 @@ class PatFrags<dag ops, list<dag> frags, code pred = [{}],
   // cast<StoreSDNode>(N)->isTruncatingStore();
   bit IsTruncStore = ?;
 
+  // cast<MemSDNode>(N)->getAddressSpace() ==
+  // If this empty, accept any address space.
+  list<int> AddressSpaces = ?;
+
   // cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Monotonic
   bit IsAtomicOrderingMonotonic = ?;
   // cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Acquire
@@ -698,6 +766,8 @@ class PatFrags<dag ops, list<dag> frags, code pred = [{}],
   // cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::<VT>;
   // cast<StoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::<VT>;
   ValueType ScalarMemoryVT = ?;
+
+  // TODO: Add alignment
 }
 
 // PatFrag - A version of PatFrags matching only a single fragment.
@@ -771,14 +841,11 @@ class FPImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm>
 def vtInt      : PatLeaf<(vt),  [{ return N->getVT().isInteger(); }]>;
 def vtFP       : PatLeaf<(vt),  [{ return N->getVT().isFloatingPoint(); }]>;
 
-def immAllOnesV: PatLeaf<(build_vector), [{
-  return ISD::isBuildVectorAllOnes(N);
-}]>;
-def immAllZerosV: PatLeaf<(build_vector), [{
-  return ISD::isBuildVectorAllZeros(N);
-}]>;
-
-
+// Use ISD::isBuildVectorAllOnes or ISD::isBuildVectorAllZeros to look for
+// the corresponding build_vector. Will look through bitcasts except when used
+// as a pattern root.
+def immAllOnesV; // ISD::isBuildVectorAllOnes
+def immAllZerosV; // ISD::isBuildVectorAllZeros
 
 // Other helper fragments.
 def not  : PatFrag<(ops node:$in), (xor node:$in, -1)>;
@@ -1163,6 +1230,87 @@ def setle  : PatFrag<(ops node:$lhs, node:$rhs),
 def setne  : PatFrag<(ops node:$lhs, node:$rhs),
                      (setcc node:$lhs, node:$rhs, SETNE)>;
 
+// We don't have strict FP extended loads as single DAG nodes, but we can
+// still provide convenience fragments to match those operations.
+def strict_extloadf32 : PatFrag<(ops node:$ptr),
+                                (strict_fpextend (f32 (load node:$ptr)))>;
+def strict_extloadf64 : PatFrag<(ops node:$ptr),
+                                (strict_fpextend (f64 (load node:$ptr)))>;
+
+// Convenience fragments to match both strict and non-strict fp operations
+def any_fadd       : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fadd node:$lhs, node:$rhs),
+                               (fadd node:$lhs, node:$rhs)]>;
+def any_fsub       : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fsub node:$lhs, node:$rhs),
+                               (fsub node:$lhs, node:$rhs)]>;
+def any_fmul       : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fmul node:$lhs, node:$rhs),
+                               (fmul node:$lhs, node:$rhs)]>;
+def any_fdiv       : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fdiv node:$lhs, node:$rhs),
+                               (fdiv node:$lhs, node:$rhs)]>;
+def any_frem       : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_frem node:$lhs, node:$rhs),
+                               (frem node:$lhs, node:$rhs)]>;
+def any_fma        : PatFrags<(ops node:$src1, node:$src2, node:$src3),
+                              [(strict_fma node:$src1, node:$src2, node:$src3),
+                               (fma node:$src1, node:$src2, node:$src3)]>;
+def any_fsqrt      : PatFrags<(ops node:$src),
+                              [(strict_fsqrt node:$src),
+                               (fsqrt node:$src)]>;
+def any_fsin       : PatFrags<(ops node:$src),
+                              [(strict_fsin node:$src),
+                               (fsin node:$src)]>;
+def any_fcos       : PatFrags<(ops node:$src),
+                              [(strict_fcos node:$src),
+                               (fcos node:$src)]>;
+def any_fexp2      : PatFrags<(ops node:$src),
+                              [(strict_fexp2 node:$src),
+                               (fexp2 node:$src)]>;
+def any_fpow       : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fpow node:$lhs, node:$rhs),
+                               (fpow node:$lhs, node:$rhs)]>;
+def any_flog2      : PatFrags<(ops node:$src),
+                              [(strict_flog2 node:$src),
+                               (flog2 node:$src)]>;
+def any_frint      : PatFrags<(ops node:$src),
+                              [(strict_frint node:$src),
+                               (frint node:$src)]>;
+def any_fnearbyint : PatFrags<(ops node:$src),
+                              [(strict_fnearbyint node:$src),
+                               (fnearbyint node:$src)]>;
+def any_fceil      : PatFrags<(ops node:$src),
+                              [(strict_fceil node:$src),
+                               (fceil node:$src)]>;
+def any_ffloor     : PatFrags<(ops node:$src),
+                              [(strict_ffloor node:$src),
+                               (ffloor node:$src)]>;
+def any_fround     : PatFrags<(ops node:$src),
+                              [(strict_fround node:$src),
+                               (fround node:$src)]>;
+def any_ftrunc     : PatFrags<(ops node:$src),
+                              [(strict_ftrunc node:$src),
+                               (ftrunc node:$src)]>;
+def any_fmaxnum    : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fmaxnum node:$lhs, node:$rhs),
+                               (fmaxnum node:$lhs, node:$rhs)]>;
+def any_fminnum    : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fminnum node:$lhs, node:$rhs),
+                               (fminnum node:$lhs, node:$rhs)]>;
+def any_fpround    : PatFrags<(ops node:$src),
+                              [(strict_fpround node:$src),
+                               (fpround node:$src)]>;
+def any_fpextend   : PatFrags<(ops node:$src),
+                              [(strict_fpextend node:$src),
+                               (fpextend node:$src)]>;
+def any_extloadf32 : PatFrags<(ops node:$ptr),
+                              [(strict_extloadf32 node:$ptr),
+                               (extloadf32 node:$ptr)]>;
+def any_extloadf64 : PatFrags<(ops node:$ptr),
+                              [(strict_extloadf64 node:$ptr),
+                               (extloadf64 node:$ptr)]>;
+
 multiclass binary_atomic_op_ord<SDNode atomic_op> {
   def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),
       (!cast<SDPatternOperator>(#NAME) node:$ptr, node:$val)> {
diff --git a/include/llvm/Testing/Support/Annotations.h b/include/llvm/Testing/Support/Annotations.h
new file mode 100644
index 000000000000..aad1a44f4ec9
--- /dev/null
+++ b/include/llvm/Testing/Support/Annotations.h
@@ -0,0 +1,90 @@
+//===--- Annotations.h - Annotated source code for tests ---------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TESTING_SUPPORT_ANNOTATIONS_H
+#define LLVM_TESTING_SUPPORT_ANNOTATIONS_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include <tuple>
+#include <vector>
+
+namespace llvm {
+
+/// Annotations lets you mark points and ranges inside source code, for tests:
+///
+///    Annotations Example(R"cpp(
+///       int complete() { x.pri^ }         // ^ indicates a point
+///       void err() { [["hello" == 42]]; } // [[this is a range]]
+///       $definition^class Foo{};          // points can be named: "definition"
+///       $fail[[static_assert(false, "")]] // ranges can be named too: "fail"
+///    )cpp");
+///
+///    StringRef Code = Example.code();             // annotations stripped.
+///    std::vector<size_t> PP = Example.points();   // all unnamed points
+///    size_t P = Example.point();                  // there must be exactly one
+///    llvm::Range R = Example.range("fail");       // find named ranges
+///
+/// Points/ranges are coordinated into `code()` which is stripped of
+/// annotations.
+///
+/// Ranges may be nested (and points can be inside ranges), but there's no way
+/// to define general overlapping ranges.
+///
+/// FIXME: the choice of the marking syntax makes it impossible to represent
+///        some of the C++ and Objective C constructs (including common ones
+///        like C++ attributes). We can fix this by:
+///          1. introducing an escaping mechanism for the special characters,
+///          2. making characters for marking points and ranges configurable,
+///          3. changing the syntax to something less commonly used,
+///          4. ...
+class Annotations {
+public:
+  /// Two offsets pointing to a continuous substring. End is not included, i.e.
+  /// represents a half-open range.
+  struct Range {
+    size_t Begin = 0;
+    size_t End = 0;
+
+    friend bool operator==(const Range &L, const Range &R) {
+      return std::tie(L.Begin, L.End) == std::tie(R.Begin, R.End);
+    }
+    friend bool operator!=(const Range &L, const Range &R) { return !(L == R); }
+  };
+
+  /// Parses the annotations from Text. Crashes if it's malformed.
+  Annotations(llvm::StringRef Text);
+
+  /// The input text with all annotations stripped.
+  /// All points and ranges are relative to this stripped text.
+  llvm::StringRef code() const { return Code; }
+
+  /// Returns the position of the point marked by ^ (or $name^) in the text.
+  /// Crashes if there isn't exactly one.
+  size_t point(llvm::StringRef Name = "") const;
+  /// Returns the position of all points marked by ^ (or $name^) in the text.
+  std::vector<size_t> points(llvm::StringRef Name = "") const;
+
+  /// Returns the location of the range marked by [[ ]] (or $name[[ ]]).
+  /// Crashes if there isn't exactly one.
+  Range range(llvm::StringRef Name = "") const;
+  /// Returns the location of all ranges marked by [[ ]] (or $name[[ ]]).
+  std::vector<Range> ranges(llvm::StringRef Name = "") const;
+
+private:
+  std::string Code;
+  llvm::StringMap<llvm::SmallVector<size_t, 1>> Points;
+  llvm::StringMap<llvm::SmallVector<Range, 1>> Ranges;
+};
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &O,
+                              const llvm::Annotations::Range &R);
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Testing/Support/Error.h b/include/llvm/Testing/Support/Error.h
index 0e5b5403ce87..85328f26440b 100644
--- a/include/llvm/Testing/Support/Error.h
+++ b/include/llvm/Testing/Support/Error.h
@@ -1,9 +1,8 @@
 //===- llvm/Testing/Support/Error.h ---------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Testing/Support/SupportHelpers.h b/include/llvm/Testing/Support/SupportHelpers.h
index b2975ec395d5..38726b1cfaf7 100644
--- a/include/llvm/Testing/Support/SupportHelpers.h
+++ b/include/llvm/Testing/Support/SupportHelpers.h
@@ -1,18 +1,19 @@
 //===- Testing/Support/SupportHelpers.h -----------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_TESTING_SUPPORT_SUPPORTHELPERS_H
 #define LLVM_TESTING_SUPPORT_SUPPORTHELPERS_H
 
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/raw_os_ostream.h"
+#include "gmock/gmock-matchers.h"
 #include "gtest/gtest-printers.h"
 
 #include <string>
@@ -54,11 +55,56 @@ void PrintTo(const ExpectedHolder<T> &Item, std::ostream *Out) {
     PrintTo(static_cast<const ErrorHolder &>(Item), Out);
   }
 }
+
+template <class InnerMatcher> class ValueIsMatcher {
+public:
+  explicit ValueIsMatcher(InnerMatcher ValueMatcher)
+      : ValueMatcher(ValueMatcher) {}
+
+  template <class T>
+  operator ::testing::Matcher<const llvm::Optional<T> &>() const {
+    return ::testing::MakeMatcher(
+        new Impl<T>(::testing::SafeMatcherCast<T>(ValueMatcher)));
+  }
+
+  template <class T>
+  class Impl : public ::testing::MatcherInterface<const llvm::Optional<T> &> {
+  public:
+    explicit Impl(const ::testing::Matcher<T> &ValueMatcher)
+        : ValueMatcher(ValueMatcher) {}
+
+    bool MatchAndExplain(const llvm::Optional<T> &Input,
+                         testing::MatchResultListener *L) const override {
+      return Input && ValueMatcher.MatchAndExplain(Input.getValue(), L);
+    }
+
+    void DescribeTo(std::ostream *OS) const override {
+      *OS << "has a value that ";
+      ValueMatcher.DescribeTo(OS);
+    }
+    void DescribeNegationTo(std::ostream *OS) const override {
+      *OS << "does not have a value that ";
+      ValueMatcher.DescribeTo(OS);
+    }
+
+  private:
+    testing::Matcher<T> ValueMatcher;
+  };
+
+private:
+  InnerMatcher ValueMatcher;
+};
 } // namespace detail
 
+/// Matches an llvm::Optional<T> with a value that conforms to an inner matcher.
+/// To match llvm::None you could use Eq(llvm::None).
+template <class InnerMatcher>
+detail::ValueIsMatcher<InnerMatcher> ValueIs(const InnerMatcher &ValueMatcher) {
+  return detail::ValueIsMatcher<InnerMatcher>(ValueMatcher);
+}
 namespace unittest {
 SmallString<128> getInputFileDirectory(const char *Argv0);
-}
+} // namespace unittest
 } // namespace llvm
 
 #endif
diff --git a/include/llvm/TextAPI/ELF/ELFStub.h b/include/llvm/TextAPI/ELF/ELFStub.h
index fa54e6f8b711..76b2af121662 100644
--- a/include/llvm/TextAPI/ELF/ELFStub.h
+++ b/include/llvm/TextAPI/ELF/ELFStub.h
@@ -1,9 +1,8 @@
 //===- ELFStub.h ------------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===-----------------------------------------------------------------------===/
 ///
diff --git a/include/llvm/TextAPI/ELF/TBEHandler.h b/include/llvm/TextAPI/ELF/TBEHandler.h
index 91521c656fa2..1748fd13f3dc 100644
--- a/include/llvm/TextAPI/ELF/TBEHandler.h
+++ b/include/llvm/TextAPI/ELF/TBEHandler.h
@@ -1,9 +1,8 @@
 //===- TBEHandler.h ---------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===-----------------------------------------------------------------------===/
 ///
diff --git a/include/llvm/TextAPI/MachO/Architecture.def b/include/llvm/TextAPI/MachO/Architecture.def
new file mode 100644
index 000000000000..4c695fe18eec
--- /dev/null
+++ b/include/llvm/TextAPI/MachO/Architecture.def
@@ -0,0 +1,38 @@
+//===- llvm/TextAPI/MachO/Architecture.def - Architecture -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARCHINFO
+#define ARCHINFO(arch)
+#endif
+
+///
+/// X86 architectures sorted by cpu type and sub type id.
+///
+ARCHINFO(i386, MachO::CPU_TYPE_I386, MachO::CPU_SUBTYPE_I386_ALL)
+ARCHINFO(x86_64, MachO::CPU_TYPE_X86_64, MachO::CPU_SUBTYPE_X86_64_ALL)
+ARCHINFO(x86_64h, MachO::CPU_TYPE_X86_64, MachO::CPU_SUBTYPE_X86_64_H)
+
+
+///
+/// ARM architectures sorted by cpu sub type id.
+///
+ARCHINFO(armv4t, MachO::CPU_TYPE_ARM, MachO::CPU_SUBTYPE_ARM_V4T)
+ARCHINFO(armv6, MachO::CPU_TYPE_ARM, MachO::CPU_SUBTYPE_ARM_V6)
+ARCHINFO(armv5, MachO::CPU_TYPE_ARM, MachO::CPU_SUBTYPE_ARM_V5TEJ)
+ARCHINFO(armv7, MachO::CPU_TYPE_ARM, MachO::CPU_SUBTYPE_ARM_V7)
+ARCHINFO(armv7s, MachO::CPU_TYPE_ARM, MachO::CPU_SUBTYPE_ARM_V7S)
+ARCHINFO(armv7k, MachO::CPU_TYPE_ARM, MachO::CPU_SUBTYPE_ARM_V7K)
+ARCHINFO(armv6m, MachO::CPU_TYPE_ARM, MachO::CPU_SUBTYPE_ARM_V6M)
+ARCHINFO(armv7m, MachO::CPU_TYPE_ARM, MachO::CPU_SUBTYPE_ARM_V7M)
+ARCHINFO(armv7em, MachO::CPU_TYPE_ARM, MachO::CPU_SUBTYPE_ARM_V7EM)
+
+
+///
+/// ARM64 architectures sorted by cpu sub type id.
+///
+ARCHINFO(arm64, MachO::CPU_TYPE_ARM64, MachO::CPU_SUBTYPE_ARM64_ALL)
diff --git a/include/llvm/TextAPI/MachO/Architecture.h b/include/llvm/TextAPI/MachO/Architecture.h
new file mode 100644
index 000000000000..055baeb0c0f0
--- /dev/null
+++ b/include/llvm/TextAPI/MachO/Architecture.h
@@ -0,0 +1,47 @@
+//===- llvm/TextAPI/MachO/Architecture.h - Architecture ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the architecture enum and helper methods.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TEXTAPI_MACHO_ARCHITECTURE_H
+#define LLVM_TEXTAPI_MACHO_ARCHITECTURE_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace MachO {
+
+/// Defines the architecture slices that are supported by Text-based Stub files.
+enum Architecture : uint8_t {
+#define ARCHINFO(Arch, Type, SubType) AK_##Arch,
+#include "llvm/TextAPI/MachO/Architecture.def"
+#undef ARCHINFO
+  AK_unknown, // this has to go last.
+};
+
+/// Convert a CPU Type and Subtype pair to an architecture slice.
+Architecture getArchitectureFromCpuType(uint32_t CPUType, uint32_t CPUSubType);
+
+/// Convert a name to an architecture slice.
+Architecture getArchitectureFromName(StringRef Name);
+
+/// Convert an architecture slice to a string.
+StringRef getArchitectureName(Architecture Arch);
+
+/// Convert an architecture slice to a CPU Type and Subtype pair.
+std::pair<uint32_t, uint32_t> getCPUTypeFromArchitecture(Architecture Arch);
+
+raw_ostream &operator<<(raw_ostream &OS, Architecture Arch);
+
+} // end namespace MachO.
+} // end namespace llvm.
+
+#endif // LLVM_TEXTAPI_MACHO_ARCHITECTURE_H
diff --git a/include/llvm/TextAPI/MachO/ArchitectureSet.h b/include/llvm/TextAPI/MachO/ArchitectureSet.h
new file mode 100644
index 000000000000..d8dfc7f1af21
--- /dev/null
+++ b/include/llvm/TextAPI/MachO/ArchitectureSet.h
@@ -0,0 +1,159 @@
+//===- llvm/TextAPI/MachO/ArchitectureSet.h - ArchitectureSet ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the architecture set.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TEXTAPI_MACHO_ARCHITECTURE_SET_H
+#define LLVM_TEXTAPI_MACHO_ARCHITECTURE_SET_H
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TextAPI/MachO/Architecture.h"
+#include <cstddef>
+#include <iterator>
+#include <limits>
+#include <vector>
+
+namespace llvm {
+namespace MachO {
+
+class ArchitectureSet {
+private:
+  using ArchSetType = uint32_t;
+
+  const static ArchSetType EndIndexVal =
+      std::numeric_limits<ArchSetType>::max();
+  ArchSetType ArchSet{0};
+
+public:
+  constexpr ArchitectureSet() = default;
+  constexpr ArchitectureSet(ArchSetType Raw) : ArchSet(Raw) {}
+  ArchitectureSet(Architecture Arch) : ArchitectureSet() { set(Arch); }
+  ArchitectureSet(const std::vector<Architecture> &Archs);
+
+  void set(Architecture Arch) {
+    if (Arch == AK_unknown)
+      return;
+    ArchSet |= 1U << static_cast<int>(Arch);
+  }
+
+  void clear(Architecture Arch) { ArchSet &= ~(1U << static_cast<int>(Arch)); }
+
+  bool has(Architecture Arch) const {
+    return ArchSet & (1U << static_cast<int>(Arch));
+  }
+
+  bool contains(ArchitectureSet Archs) const {
+    return (ArchSet & Archs.ArchSet) == Archs.ArchSet;
+  }
+
+  size_t count() const;
+
+  bool empty() const { return ArchSet == 0; }
+
+  ArchSetType rawValue() const { return ArchSet; }
+
+  template <typename Ty>
+  class arch_iterator
+      : public std::iterator<std::forward_iterator_tag, Architecture, size_t> {
+  private:
+    ArchSetType Index;
+    Ty *ArchSet;
+
+    void findNextSetBit() {
+      if (Index == EndIndexVal)
+        return;
+      while (++Index < sizeof(Ty) * 8) {
+        if (*ArchSet & (1UL << Index))
+          return;
+      }
+
+      Index = EndIndexVal;
+    }
+
+  public:
+    arch_iterator(Ty *ArchSet, ArchSetType Index = 0)
+        : Index(Index), ArchSet(ArchSet) {
+      if (Index != EndIndexVal && !(*ArchSet & (1UL << Index)))
+        findNextSetBit();
+    }
+
+    Architecture operator*() const { return static_cast<Architecture>(Index); }
+
+    arch_iterator &operator++() {
+      findNextSetBit();
+      return *this;
+    }
+
+    arch_iterator operator++(int) {
+      auto tmp = *this;
+      findNextSetBit();
+      return tmp;
+    }
+
+    bool operator==(const arch_iterator &o) const {
+      return std::tie(Index, ArchSet) == std::tie(o.Index, o.ArchSet);
+    }
+
+    bool operator!=(const arch_iterator &o) const { return !(*this == o); }
+  };
+
+  ArchitectureSet operator&(const ArchitectureSet &o) {
+    return {ArchSet & o.ArchSet};
+  }
+
+  ArchitectureSet operator|(const ArchitectureSet &o) {
+    return {ArchSet | o.ArchSet};
+  }
+
+  ArchitectureSet &operator|=(const ArchitectureSet &o) {
+    ArchSet |= o.ArchSet;
+    return *this;
+  }
+
+  ArchitectureSet &operator|=(const Architecture &Arch) {
+    set(Arch);
+    return *this;
+  }
+
+  bool operator==(const ArchitectureSet &o) const {
+    return ArchSet == o.ArchSet;
+  }
+
+  bool operator!=(const ArchitectureSet &o) const {
+    return ArchSet != o.ArchSet;
+  }
+
+  bool operator<(const ArchitectureSet &o) const { return ArchSet < o.ArchSet; }
+
+  using iterator = arch_iterator<ArchSetType>;
+  using const_iterator = arch_iterator<const ArchSetType>;
+
+  iterator begin() { return {&ArchSet}; }
+  iterator end() { return {&ArchSet, EndIndexVal}; }
+
+  const_iterator begin() const { return {&ArchSet}; }
+  const_iterator end() const { return {&ArchSet, EndIndexVal}; }
+
+  operator std::string() const;
+  operator std::vector<Architecture>() const;
+  void print(raw_ostream &OS) const;
+};
+
+inline ArchitectureSet operator|(const Architecture &lhs,
+                                 const Architecture &rhs) {
+  return ArchitectureSet(lhs) | ArchitectureSet(rhs);
+}
+
+raw_ostream &operator<<(raw_ostream &OS, ArchitectureSet Set);
+
+} // end namespace MachO.
+} // end namespace llvm.
+
+#endif // LLVM_TEXTAPI_MACHO_ARCHITECTURE_SET_H
diff --git a/include/llvm/TextAPI/MachO/InterfaceFile.h b/include/llvm/TextAPI/MachO/InterfaceFile.h
new file mode 100644
index 000000000000..e722449d52f1
--- /dev/null
+++ b/include/llvm/TextAPI/MachO/InterfaceFile.h
@@ -0,0 +1,436 @@
+//===- llvm/TextAPI/MachO/IntefaceFile.h - TAPI Interface File --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A generic and abstract interface representation for linkable objects. This
+// could be an MachO executable, bundle, dylib, or text-based stub file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TEXTAPI_MACHO_INTERFACE_FILE_H
+#define LLVM_TEXTAPI_MACHO_INTERFACE_FILE_H
+
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Error.h"
+#include "llvm/TextAPI/MachO/Architecture.h"
+#include "llvm/TextAPI/MachO/ArchitectureSet.h"
+#include "llvm/TextAPI/MachO/PackedVersion.h"
+#include "llvm/TextAPI/MachO/Symbol.h"
+
+namespace llvm {
+namespace MachO {
+
+/// Defines the list of MachO platforms.
+enum class PlatformKind : unsigned {
+  unknown,
+  macOS = MachO::PLATFORM_MACOS,
+  iOS = MachO::PLATFORM_IOS,
+  tvOS = MachO::PLATFORM_TVOS,
+  watchOS = MachO::PLATFORM_WATCHOS,
+  bridgeOS = MachO::PLATFORM_BRIDGEOS,
+};
+
+/// Defines a list of Objective-C constraints.
+enum class ObjCConstraintType : unsigned {
+  /// No constraint.
+  None = 0,
+
+  /// Retain/Release.
+  Retain_Release = 1,
+
+  /// Retain/Release for Simulator.
+  Retain_Release_For_Simulator = 2,
+
+  /// Retain/Release or Garbage Collection.
+  Retain_Release_Or_GC = 3,
+
+  /// Garbage Collection.
+  GC = 4,
+};
+
+// clang-format off
+
+/// Defines the file type this file represents.
+enum FileType : unsigned {
+  /// Invalid file type.
+  Invalid = 0U,
+
+  /// Text-based stub file (.tbd) version 1.0
+  TBD_V1  = 1U <<  0,
+
+  /// Text-based stub file (.tbd) version 2.0
+  TBD_V2  = 1U <<  1,
+
+  /// Text-based stub file (.tbd) version 3.0
+  TBD_V3  = 1U <<  2,
+
+  All     = ~0U,
+
+  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/All),
+};
+
+// clang-format on
+
+/// Reference to an interface file.
+class InterfaceFileRef {
+public:
+  InterfaceFileRef() = default;
+
+  InterfaceFileRef(StringRef InstallName) : InstallName(InstallName) {}
+
+  InterfaceFileRef(StringRef InstallName, ArchitectureSet Archs)
+      : InstallName(InstallName), Architectures(Archs) {}
+
+  StringRef getInstallName() const { return InstallName; };
+  void addArchitectures(ArchitectureSet Archs) { Architectures |= Archs; }
+  ArchitectureSet getArchitectures() const { return Architectures; }
+  bool hasArchitecture(Architecture Arch) const {
+    return Architectures.has(Arch);
+  }
+
+  bool operator==(const InterfaceFileRef &O) const {
+    return std::tie(InstallName, Architectures) ==
+           std::tie(O.InstallName, O.Architectures);
+  }
+
+  bool operator<(const InterfaceFileRef &O) const {
+    return std::tie(InstallName, Architectures) <
+           std::tie(O.InstallName, O.Architectures);
+  }
+
+private:
+  std::string InstallName;
+  ArchitectureSet Architectures;
+};
+
+} // end namespace MachO.
+
+struct SymbolsMapKey {
+  MachO::SymbolKind Kind;
+  StringRef Name;
+
+  SymbolsMapKey(MachO::SymbolKind Kind, StringRef Name)
+      : Kind(Kind), Name(Name) {}
+};
+template <> struct DenseMapInfo<SymbolsMapKey> {
+  static inline SymbolsMapKey getEmptyKey() {
+    return SymbolsMapKey(MachO::SymbolKind::GlobalSymbol, StringRef{});
+  }
+
+  static inline SymbolsMapKey getTombstoneKey() {
+    return SymbolsMapKey(MachO::SymbolKind::ObjectiveCInstanceVariable,
+                         StringRef{});
+  }
+
+  static unsigned getHashValue(const SymbolsMapKey &Key) {
+    return hash_combine(hash_value(Key.Kind), hash_value(Key.Name));
+  }
+
+  static bool isEqual(const SymbolsMapKey &LHS, const SymbolsMapKey &RHS) {
+    return std::tie(LHS.Kind, LHS.Name) == std::tie(RHS.Kind, RHS.Name);
+  }
+};
+
+namespace MachO {
+
+/// Defines the interface file.
+class InterfaceFile {
+public:
+  /// Set the path from which this file was generated (if applicable).
+  ///
+  /// \param Path_ The path to the source file.
+  void setPath(StringRef Path_) { Path = Path_; }
+
+  /// Get the path from which this file was generated (if applicable).
+  ///
+  /// \return The path to the source file or empty.
+  StringRef getPath() const { return Path; }
+
+  /// Set the file type.
+  ///
+  /// This is used by the YAML writer to identify the specification it should
+  /// use for writing the file.
+  ///
+  /// \param Kind The file type.
+  void setFileType(FileType Kind) { FileKind = Kind; }
+
+  /// Get the file type.
+  ///
+  /// \return The file type.
+  FileType getFileType() const { return FileKind; }
+
+  /// Set the platform.
+  void setPlatform(PlatformKind Platform_) { Platform = Platform_; }
+
+  /// Get the platform.
+  PlatformKind getPlatform() const { return Platform; }
+
+  /// Specify the set of supported architectures by this file.
+  void setArchitectures(ArchitectureSet Architectures_) {
+    Architectures = Architectures_;
+  }
+
+  /// Add the set of supported architectures by this file.
+  void addArchitectures(ArchitectureSet Architectures_) {
+    Architectures |= Architectures_;
+  }
+
+  /// Add supported architecture by this file..
+  void addArch(Architecture Arch) { Architectures.set(Arch); }
+
+  /// Get the set of supported architectures.
+  ArchitectureSet getArchitectures() const { return Architectures; }
+
+  /// Set the install name of the library.
+  void setInstallName(StringRef InstallName_) { InstallName = InstallName_; }
+
+  /// Get the install name of the library.
+  StringRef getInstallName() const { return InstallName; }
+
+  /// Set the current version of the library.
+  void setCurrentVersion(PackedVersion Version) { CurrentVersion = Version; }
+
+  /// Get the current version of the library.
+  PackedVersion getCurrentVersion() const { return CurrentVersion; }
+
+  /// Set the compatibility version of the library.
+  void setCompatibilityVersion(PackedVersion Version) {
+    CompatibilityVersion = Version;
+  }
+
+  /// Get the compatibility version of the library.
+  PackedVersion getCompatibilityVersion() const { return CompatibilityVersion; }
+
+  /// Set the Swift ABI version of the library.
+  void setSwiftABIVersion(uint8_t Version) { SwiftABIVersion = Version; }
+
+  /// Get the Swift ABI version of the library.
+  uint8_t getSwiftABIVersion() const { return SwiftABIVersion; }
+
+  /// Specify if the library uses two-level namespace (or flat namespace).
+  void setTwoLevelNamespace(bool V = true) { IsTwoLevelNamespace = V; }
+
+  /// Check if the library uses two-level namespace.
+  bool isTwoLevelNamespace() const { return IsTwoLevelNamespace; }
+
+  /// Specify if the library is application extension safe (or not).
+  void setApplicationExtensionSafe(bool V = true) { IsAppExtensionSafe = V; }
+
+  /// Check if the library is application extension safe.
+  bool isApplicationExtensionSafe() const { return IsAppExtensionSafe; }
+
+  /// Set the Objective-C constraint.
+  void setObjCConstraint(ObjCConstraintType Constraint) {
+    ObjcConstraint = Constraint;
+  }
+
+  /// Get the Objective-C constraint.
+  ObjCConstraintType getObjCConstraint() const { return ObjcConstraint; }
+
+  /// Specify if this file was generated during InstallAPI (or not).
+  void setInstallAPI(bool V = true) { IsInstallAPI = V; }
+
+  /// Check if this file was generated during InstallAPI.
+  bool isInstallAPI() const { return IsInstallAPI; }
+
+  /// Set the parent umbrella framework.
+  void setParentUmbrella(StringRef Parent) { ParentUmbrella = Parent; }
+
+  /// Get the parent umbrella framework.
+  StringRef getParentUmbrella() const { return ParentUmbrella; }
+
+  /// Add an allowable client.
+  ///
+  /// Mach-O Dynamic libraries have the concept of allowable clients that are
+  /// checked during static link time. The name of the application or library
+  /// that is being generated needs to match one of the allowable clients or the
+  /// linker refuses to link this library.
+  ///
+  /// \param Name The name of the client that is allowed to link this library.
+  /// \param Architectures The set of architecture for which this applies.
+  void addAllowableClient(StringRef Name, ArchitectureSet Architectures);
+
+  /// Get the list of allowable clients.
+  ///
+  /// \return Returns a list of allowable clients.
+  const std::vector<InterfaceFileRef> &allowableClients() const {
+    return AllowableClients;
+  }
+
+  /// Add a re-exported library.
+  ///
+  /// \param InstallName The name of the library to re-export.
+  /// \param Architectures The set of architecture for which this applies.
+  void addReexportedLibrary(StringRef InstallName,
+                            ArchitectureSet Architectures);
+
+  /// Get the list of re-exported libraries.
+  ///
+  /// \return Returns a list of re-exported libraries.
+  const std::vector<InterfaceFileRef> &reexportedLibraries() const {
+    return ReexportedLibraries;
+  }
+
+  /// Add an architecture/UUID pair.
+  ///
+  /// \param Arch The architecture for which this applies.
+  /// \param UUID The UUID of the library for the specified architecture.
+  void addUUID(Architecture Arch, StringRef UUID);
+
+  /// Add an architecture/UUID pair.
+  ///
+  /// \param Arch The architecture for which this applies.
+  /// \param UUID The UUID of the library for the specified architecture.
+  void addUUID(Architecture Arch, uint8_t UUID[16]);
+
+  /// Get the list of architecture/UUID pairs.
+  ///
+  /// \return Returns a list of architecture/UUID pairs.
+  const std::vector<std::pair<Architecture, std::string>> &uuids() const {
+    return UUIDs;
+  }
+
+  /// Add a symbol to the symbols list or extend an existing one.
+  void addSymbol(SymbolKind Kind, StringRef Name, ArchitectureSet Architectures,
+                 SymbolFlags Flags = SymbolFlags::None);
+
+  using SymbolMapType = DenseMap<SymbolsMapKey, Symbol *>;
+  struct const_symbol_iterator
+      : public iterator_adaptor_base<
+            const_symbol_iterator, SymbolMapType::const_iterator,
+            std::forward_iterator_tag, const Symbol *, ptrdiff_t,
+            const Symbol *, const Symbol *> {
+    const_symbol_iterator() = default;
+
+    template <typename U>
+    const_symbol_iterator(U &&u)
+        : iterator_adaptor_base(std::forward<U &&>(u)) {}
+
+    reference operator*() const { return I->second; }
+    pointer operator->() const { return I->second; }
+  };
+  using const_symbol_range = iterator_range<const_symbol_iterator>;
+
+  // Custom iterator to return only exported symbols.
+  struct const_export_iterator
+      : public iterator_adaptor_base<
+            const_export_iterator, const_symbol_iterator,
+            std::forward_iterator_tag, const Symbol *> {
+    const_symbol_iterator _end;
+
+    void skipToNextSymbol() {
+      while (I != _end && I->isUndefined())
+        ++I;
+    }
+
+    const_export_iterator() = default;
+    template <typename U>
+    const_export_iterator(U &&it, U &&end)
+        : iterator_adaptor_base(std::forward<U &&>(it)),
+          _end(std::forward<U &&>(end)) {
+      skipToNextSymbol();
+    }
+
+    const_export_iterator &operator++() {
+      ++I;
+      skipToNextSymbol();
+      return *this;
+    }
+
+    const_export_iterator operator++(int) {
+      const_export_iterator tmp(*this);
+      ++(*this);
+      return tmp;
+    }
+  };
+  using const_export_range = llvm::iterator_range<const_export_iterator>;
+
+  // Custom iterator to return only undefined symbols.
+  struct const_undefined_iterator
+      : public iterator_adaptor_base<
+            const_undefined_iterator, const_symbol_iterator,
+            std::forward_iterator_tag, const Symbol *> {
+    const_symbol_iterator _end;
+
+    void skipToNextSymbol() {
+      while (I != _end && !I->isUndefined())
+        ++I;
+    }
+
+    const_undefined_iterator() = default;
+    template <typename U>
+    const_undefined_iterator(U &&it, U &&end)
+        : iterator_adaptor_base(std::forward<U &&>(it)),
+          _end(std::forward<U &&>(end)) {
+      skipToNextSymbol();
+    }
+
+    const_undefined_iterator &operator++() {
+      ++I;
+      skipToNextSymbol();
+      return *this;
+    }
+
+    const_undefined_iterator operator++(int) {
+      const_undefined_iterator tmp(*this);
+      ++(*this);
+      return tmp;
+    }
+  };
+  using const_undefined_range = llvm::iterator_range<const_undefined_iterator>;
+
+  const_symbol_range symbols() const {
+    return {Symbols.begin(), Symbols.end()};
+  }
+  const_export_range exports() const {
+    return {{Symbols.begin(), Symbols.end()}, {Symbols.end(), Symbols.end()}};
+  }
+  const_undefined_range undefineds() const {
+    return {{Symbols.begin(), Symbols.end()}, {Symbols.end(), Symbols.end()}};
+  }
+
+private:
+  llvm::BumpPtrAllocator Allocator;
+  StringRef copyString(StringRef String) {
+    if (String.empty())
+      return {};
+
+    void *Ptr = Allocator.Allocate(String.size(), 1);
+    memcpy(Ptr, String.data(), String.size());
+    return StringRef(reinterpret_cast<const char *>(Ptr), String.size());
+  }
+
+  std::string Path;
+  FileType FileKind;
+  PlatformKind Platform;
+  ArchitectureSet Architectures;
+  std::string InstallName;
+  PackedVersion CurrentVersion;
+  PackedVersion CompatibilityVersion;
+  uint8_t SwiftABIVersion{0};
+  bool IsTwoLevelNamespace{false};
+  bool IsAppExtensionSafe{false};
+  bool IsInstallAPI{false};
+  ObjCConstraintType ObjcConstraint = ObjCConstraintType::None;
+  std::string ParentUmbrella;
+  std::vector<InterfaceFileRef> AllowableClients;
+  std::vector<InterfaceFileRef> ReexportedLibraries;
+  std::vector<std::pair<Architecture, std::string>> UUIDs;
+  SymbolMapType Symbols;
+};
+
+} // end namespace MachO.
+} // end namespace llvm.
+
+#endif // LLVM_TEXTAPI_MACHO_INTERFACE_FILE_H
diff --git a/include/llvm/TextAPI/MachO/PackedVersion.h b/include/llvm/TextAPI/MachO/PackedVersion.h
new file mode 100644
index 000000000000..2d0138097dd9
--- /dev/null
+++ b/include/llvm/TextAPI/MachO/PackedVersion.h
@@ -0,0 +1,64 @@
+//===- llvm/TextAPI/MachO/PackedVersion.h - PackedVersion -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the Mach-O packed version format.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TEXTAPI_MACHO_PACKED_VERSION_H
+#define LLVM_TEXTAPI_MACHO_PACKED_VERSION_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace MachO {
+
+class PackedVersion {
+  uint32_t Version{0};
+
+public:
+  constexpr PackedVersion() = default;
+  explicit constexpr PackedVersion(uint32_t RawVersion) : Version(RawVersion) {}
+  PackedVersion(unsigned Major, unsigned Minor, unsigned Subminor)
+      : Version((Major << 16) | ((Minor & 0xff) << 8) | (Subminor & 0xff)) {}
+
+  bool empty() const { return Version == 0; }
+
+  /// Retrieve the major version number.
+  unsigned getMajor() const { return Version >> 16; }
+
+  /// Retrieve the minor version number, if provided.
+  unsigned getMinor() const { return (Version >> 8) & 0xff; }
+
+  /// Retrieve the subminor version number, if provided.
+  unsigned getSubminor() const { return Version & 0xff; }
+
+  bool parse32(StringRef Str);
+  std::pair<bool, bool> parse64(StringRef Str);
+
+  bool operator<(const PackedVersion &O) const { return Version < O.Version; }
+
+  bool operator==(const PackedVersion &O) const { return Version == O.Version; }
+
+  bool operator!=(const PackedVersion &O) const { return Version != O.Version; }
+
+  uint32_t rawValue() const { return Version; }
+
+  void print(raw_ostream &OS) const;
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const PackedVersion &Version) {
+  Version.print(OS);
+  return OS;
+}
+
+} // end namespace MachO.
+} // end namespace llvm.
+
+#endif // LLVM_TEXTAPI_MACHO_PACKED_VERSION_H
diff --git a/include/llvm/TextAPI/MachO/Symbol.h b/include/llvm/TextAPI/MachO/Symbol.h
new file mode 100644
index 000000000000..3c7ff5e0f4ea
--- /dev/null
+++ b/include/llvm/TextAPI/MachO/Symbol.h
@@ -0,0 +1,96 @@
+//===- llvm/TextAPI/Symbol.h - TAPI Symbol ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TEXTAPI_MACHO_SYMBOL_H
+#define LLVM_TEXTAPI_MACHO_SYMBOL_H
+
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TextAPI/MachO/ArchitectureSet.h"
+
+namespace llvm {
+namespace MachO {
+
+// clang-format off
+
+/// Symbol flags.
+enum class SymbolFlags : uint8_t {
+  /// No flags
+  None             = 0,
+
+  /// Thread-local value symbol
+  ThreadLocalValue = 1U << 0,
+
+  /// Weak defined symbol
+  WeakDefined      = 1U << 1,
+
+  /// Weak referenced symbol
+  WeakReferenced   = 1U << 2,
+
+  /// Undefined
+  Undefined        = 1U << 3,
+
+  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/Undefined),
+};
+
+// clang-format on
+
+enum class SymbolKind : uint8_t {
+  GlobalSymbol,
+  ObjectiveCClass,
+  ObjectiveCClassEHType,
+  ObjectiveCInstanceVariable,
+};
+
+class Symbol {
+public:
+  constexpr Symbol(SymbolKind Kind, StringRef Name,
+                   ArchitectureSet Architectures, SymbolFlags Flags)
+      : Name(Name), Architectures(Architectures), Kind(Kind), Flags(Flags) {}
+
+  SymbolKind getKind() const { return Kind; }
+  StringRef getName() const { return Name; }
+  ArchitectureSet getArchitectures() const { return Architectures; }
+  void addArchitectures(ArchitectureSet Archs) { Architectures |= Archs; }
+  SymbolFlags getFlags() const { return Flags; }
+
+  bool isWeakDefined() const {
+    return (Flags & SymbolFlags::WeakDefined) == SymbolFlags::WeakDefined;
+  }
+
+  bool isWeakReferenced() const {
+    return (Flags & SymbolFlags::WeakReferenced) == SymbolFlags::WeakReferenced;
+  }
+
+  bool isThreadLocalValue() const {
+    return (Flags & SymbolFlags::ThreadLocalValue) ==
+           SymbolFlags::ThreadLocalValue;
+  }
+
+  bool isUndefined() const {
+    return (Flags & SymbolFlags::Undefined) == SymbolFlags::Undefined;
+  }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  void dump(raw_ostream &OS) const;
+  void dump() const { dump(llvm::errs()); }
+#endif
+
+private:
+  StringRef Name;
+  ArchitectureSet Architectures;
+  SymbolKind Kind;
+  SymbolFlags Flags;
+};
+
+} // end namespace MachO.
+} // end namespace llvm.
+
+#endif // LLVM_TEXTAPI_MACHO_SYMBOL_H
diff --git a/include/llvm/TextAPI/MachO/TextAPIReader.h b/include/llvm/TextAPI/MachO/TextAPIReader.h
new file mode 100644
index 000000000000..6d9c09de5294
--- /dev/null
+++ b/include/llvm/TextAPI/MachO/TextAPIReader.h
@@ -0,0 +1,34 @@
+//===--- TextAPIReader.h - Text API Reader ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TEXTAPI_MACHO_READER_H
+#define LLVM_TEXTAPI_MACHO_READER_H
+
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace llvm {
+namespace MachO {
+
+class InterfaceFile;
+
+class TextAPIReader {
+public:
+  static Expected<std::unique_ptr<InterfaceFile>>
+  get(std::unique_ptr<MemoryBuffer> InputBuffer);
+
+  static Expected<std::unique_ptr<InterfaceFile>>
+  getUnmanaged(llvm::MemoryBuffer *InputBuffer);
+
+  TextAPIReader() = delete;
+};
+
+} // end namespace MachO.
+} // end namespace llvm.
+
+#endif // LLVM_TEXTAPI_MACHO_READER_H
diff --git a/include/llvm/TextAPI/MachO/TextAPIWriter.h b/include/llvm/TextAPI/MachO/TextAPIWriter.h
new file mode 100644
index 000000000000..2a45bb86a332
--- /dev/null
+++ b/include/llvm/TextAPI/MachO/TextAPIWriter.h
@@ -0,0 +1,29 @@
+//===--- TextAPIWriter.h - Text API Writer ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TEXTAPI_MACHO_WRITER_H
+#define LLVM_TEXTAPI_MACHO_WRITER_H
+
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace llvm {
+namespace MachO {
+
+class InterfaceFile;
+
+class TextAPIWriter {
+public:
+  TextAPIWriter() = delete;
+
+  static Error writeToStream(raw_ostream &os, const InterfaceFile &);
+};
+
+} // end namespace MachO.
+} // end namespace llvm.
+
+#endif // LLVM_TEXTAPI_MACHO_WRITER_H
diff --git a/include/llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h b/include/llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h
index 964b0f7620a2..d144f62f1cc1 100644
--- a/include/llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h
+++ b/include/llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h
@@ -1,9 +1,8 @@
 //===- DlltoolDriver.h - dlltool.exe-compatible driver ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/ToolDrivers/llvm-lib/LibDriver.h b/include/llvm/ToolDrivers/llvm-lib/LibDriver.h
index a4806ac4ad69..23a2fc348a89 100644
--- a/include/llvm/ToolDrivers/llvm-lib/LibDriver.h
+++ b/include/llvm/ToolDrivers/llvm-lib/LibDriver.h
@@ -1,9 +1,8 @@
 //===- llvm-lib/LibDriver.h - lib.exe-compatible driver ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,6 +18,7 @@ namespace llvm {
 template <typename T> class ArrayRef;
 
 int libDriverMain(ArrayRef<const char *> ARgs);
+
 }
 
 #endif
diff --git a/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h b/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
index f970acdc741f..887c8807904e 100644
--- a/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
+++ b/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
@@ -1,9 +1,8 @@
 //===- AggressiveInstCombine.h - AggressiveInstCombine pass -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Transforms/Coroutines.h b/include/llvm/Transforms/Coroutines.h
index 51beb44fdc56..9df3ec0f3ef4 100644
--- a/include/llvm/Transforms/Coroutines.h
+++ b/include/llvm/Transforms/Coroutines.h
@@ -1,9 +1,8 @@
 //===-- Coroutines.h - Coroutine Transformations ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Declare accessor functions for coroutine lowering passes.
diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h
index 11d363b1200b..de0c80f5b19a 100644
--- a/include/llvm/Transforms/IPO.h
+++ b/include/llvm/Transforms/IPO.h
@@ -1,9 +1,8 @@
 //===- llvm/Transforms/IPO.h - Interprocedural Transformations --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -183,6 +182,10 @@ ModulePass *createBlockExtractorPass();
 ModulePass *
 createBlockExtractorPass(const SmallVectorImpl<BasicBlock *> &BlocksToExtract,
                          bool EraseFunctions);
+ModulePass *
+createBlockExtractorPass(const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
+                             &GroupsOfBlocksToExtract,
+                         bool EraseFunctions);
 
 /// createStripDeadPrototypesPass - This pass removes any function declarations
 /// (prototypes) that are not used.
diff --git a/include/llvm/Transforms/IPO/AlwaysInliner.h b/include/llvm/Transforms/IPO/AlwaysInliner.h
index b52c0fdbd2c9..64e25230f6da 100644
--- a/include/llvm/Transforms/IPO/AlwaysInliner.h
+++ b/include/llvm/Transforms/IPO/AlwaysInliner.h
@@ -1,9 +1,8 @@
 //===-- AlwaysInliner.h - Pass to inline "always_inline" functions --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/include/llvm/Transforms/IPO/ArgumentPromotion.h b/include/llvm/Transforms/IPO/ArgumentPromotion.h
index 49ca6cc73393..c8afb7bdcd65 100644
--- a/include/llvm/Transforms/IPO/ArgumentPromotion.h
+++ b/include/llvm/Transforms/IPO/ArgumentPromotion.h
@@ -1,9 +1,8 @@
 //===- ArgumentPromotion.h - Promote by-reference arguments -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Transforms/IPO/Attributor.h b/include/llvm/Transforms/IPO/Attributor.h
new file mode 100644
index 000000000000..5dbe21ac5e4e
--- /dev/null
+++ b/include/llvm/Transforms/IPO/Attributor.h
@@ -0,0 +1,789 @@
+//===- Attributor.h --- Module-wide attribute deduction ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Attributor: An inter procedural (abstract) "attribute" deduction framework.
+//
+// The Attributor framework is an inter procedural abstract analysis (fixpoint
+// iteration analysis). The goal is to allow easy deduction of new attributes as
+// well as information exchange between abstract attributes in-flight.
+//
+// The Attributor class is the driver and the link between the various abstract
+// attributes. The Attributor will iterate until a fixpoint state is reached by
+// all abstract attributes in-flight, or until it will enforce a pessimistic fix
+// point because an iteration limit is reached.
+//
+// Abstract attributes, derived from the AbstractAttribute class, actually
+// describe properties of the code. They can correspond to actual LLVM-IR
+// attributes, or they can be more general, ultimately unrelated to LLVM-IR
+// attributes. The latter is useful when an abstract attributes provides
+// information to other abstract attributes in-flight but we might not want to
+// manifest the information. The Attributor allows to query in-flight abstract
+// attributes through the `Attributor::getAAFor` method (see the method
+// description for an example). If the method is used by an abstract attribute
+// P, and it results in an abstract attribute Q, the Attributor will
+// automatically capture a potential dependence from Q to P. This dependence
+// will cause P to be reevaluated whenever Q changes in the future.
+//
+// The Attributor will only reevaluated abstract attributes that might have
+// changed since the last iteration. That means that the Attribute will not
+// revisit all instructions/blocks/functions in the module but only query
+// an update from a subset of the abstract attributes.
+//
+// The update method `AbstractAttribute::updateImpl` is implemented by the
+// specific "abstract attribute" subclasses. The method is invoked whenever the
+// currently assumed state (see the AbstractState class) might not be valid
+// anymore. This can, for example, happen if the state was dependent on another
+// abstract attribute that changed. In every invocation, the update method has
+// to adjust the internal state of an abstract attribute to a point that is
+// justifiable by the underlying IR and the current state of abstract attributes
+// in-flight. Since the IR is given and assumed to be valid, the information
+// derived from it can be assumed to hold. However, information derived from
+// other abstract attributes is conditional on various things. If the justifying
+// state changed, the `updateImpl` has to revisit the situation and potentially
+// find another justification or limit the optimistic assumes made.
+//
+// Change is the key in this framework. Until a state of no-change, thus a
+// fixpoint, is reached, the Attributor will query the abstract attributes
+// in-flight to re-evaluate their state. If the (current) state is too
+// optimistic, hence it cannot be justified anymore through other abstract
+// attributes or the state of the IR, the state of the abstract attribute will
+// have to change. Generally, we assume abstract attribute state to be a finite
+// height lattice and the update function to be monotone. However, these
+// conditions are not enforced because the iteration limit will guarantee
+// termination. If an optimistic fixpoint is reached, or a pessimistic fix
+// point is enforced after a timeout, the abstract attributes are tasked to
+// manifest their result in the IR for passes to come.
+//
+// Attribute manifestation is not mandatory. If desired, there is support to
+// generate a single LLVM-IR attribute already in the AbstractAttribute base
+// class. In the simplest case, a subclass overloads
+// `AbstractAttribute::getManifestPosition()` and
+// `AbstractAttribute::getAttrKind()` to return the appropriate values. The
+// Attributor manifestation framework will then create and place a new attribute
+// if it is allowed to do so (based on the abstract state). Other use cases can
+// be achieved by overloading other abstract attribute methods.
+//
+//
+// The "mechanics" of adding a new "abstract attribute":
+// - Define a class (transitively) inheriting from AbstractAttribute and one
+//   (which could be the same) that (transitively) inherits from AbstractState.
+//   For the latter, consider the already available BooleanState and
+//   IntegerState if they fit your needs, e.g., you require only a bit-encoding.
+// - Implement all pure methods. Also use overloading if the attribute is not
+//   conforming with the "default" behavior: A (set of) LLVM-IR attribute(s) for
+//   an argument, call site argument, function return value, or function. See
+//   the class and method descriptions for more information on the two
+//   "Abstract" classes and their respective methods.
+// - Register opportunities for the new abstract attribute in the
+//   `Attributor::identifyDefaultAbstractAttributes` method if it should be
+//   counted as a 'default' attribute.
+// - Add sufficient tests.
+// - Add a Statistics object for bookkeeping. If it is a simple (set of)
+//   attribute(s) manifested through the Attributor manifestation framework, see
+//   the bookkeeping function in Attributor.cpp.
+// - If instructions with a certain opcode are interesting to the attribute, add
+//   that opcode to the switch in `Attributor::identifyAbstractAttributes`. This
+//   will make it possible to query all those instructions through the
+//   `InformationCache::getOpcodeInstMapForFunction` interface and eliminate the
+//   need to traverse the IR repeatedly.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
+#define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
+
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct AbstractAttribute;
+struct InformationCache;
+
+class Function;
+
+/// Simple enum class that forces the status to be spelled out explicitly.
+///
+///{
+enum class ChangeStatus {
+  CHANGED,
+  UNCHANGED,
+};
+
+ChangeStatus operator|(ChangeStatus l, ChangeStatus r);
+ChangeStatus operator&(ChangeStatus l, ChangeStatus r);
+///}
+
+/// The fixpoint analysis framework that orchestrates the attribute deduction.
+///
+/// The Attributor provides a general abstract analysis framework (guided
+/// fixpoint iteration) as well as helper functions for the deduction of
+/// (LLVM-IR) attributes. However, also other code properties can be deduced,
+/// propagated, and ultimately manifested through the Attributor framework. This
+/// is particularly useful if these properties interact with attributes and a
+/// co-scheduled deduction allows to improve the solution. Even if not, thus if
+/// attributes/properties are completely isolated, they should use the
+/// Attributor framework to reduce the number of fixpoint iteration frameworks
+/// in the code base. Note that the Attributor design makes sure that isolated
+/// attributes are not impacted, in any way, by others derived at the same time
+/// if there is no cross-reasoning performed.
+///
+/// The public facing interface of the Attributor is kept simple and basically
+/// allows abstract attributes to one thing, query abstract attributes
+/// in-flight. There are two reasons to do this:
+///    a) The optimistic state of one abstract attribute can justify an
+///       optimistic state of another, allowing to framework to end up with an
+///       optimistic (=best possible) fixpoint instead of one based solely on
+///       information in the IR.
+///    b) This avoids reimplementing various kinds of lookups, e.g., to check
+///       for existing IR attributes, in favor of a single lookups interface
+///       provided by an abstract attribute subclass.
+///
+/// NOTE: The mechanics of adding a new "concrete" abstract attribute are
+///       described in the file comment.
+struct Attributor {
+  ~Attributor() { DeleteContainerPointers(AllAbstractAttributes); }
+
+  /// Run the analyses until a fixpoint is reached or enforced (timeout).
+  ///
+  /// The attributes registered with this Attributor can be used after as long
+  /// as the Attributor is not destroyed (it owns the attributes now).
+  ///
+  /// \Returns CHANGED if the IR was changed, otherwise UNCHANGED.
+  ChangeStatus run();
+
+  /// Lookup an abstract attribute of type \p AAType anchored at value \p V and
+  /// argument number \p ArgNo. If no attribute is found and \p V is a call base
+  /// instruction, the called function is tried as a value next. Thus, the
+  /// returned abstract attribute might be anchored at the callee of \p V.
+  ///
+  /// This method is the only (supported) way an abstract attribute can retrieve
+  /// information from another abstract attribute. As an example, take an
+  /// abstract attribute that determines the memory access behavior for a
+  /// argument (readnone, readonly, ...). It should use `getAAFor` to get the
+  /// most optimistic information for other abstract attributes in-flight, e.g.
+  /// the one reasoning about the "captured" state for the argument or the one
+  /// reasoning on the memory access behavior of the function as a whole.
+  template <typename AAType>
+  const AAType *getAAFor(AbstractAttribute &QueryingAA, const Value &V,
+                         int ArgNo = -1) {
+    static_assert(std::is_base_of<AbstractAttribute, AAType>::value,
+                  "Cannot query an attribute with a type not derived from "
+                  "'AbstractAttribute'!");
+    assert(AAType::ID != Attribute::None &&
+           "Cannot lookup generic abstract attributes!");
+
+    // Determine the argument number automatically for llvm::Arguments if none
+    // is set. Do not override a given one as it could be a use of the argument
+    // in a call site.
+    if (ArgNo == -1)
+      if (auto *Arg = dyn_cast<Argument>(&V))
+        ArgNo = Arg->getArgNo();
+
+    // If a function was given together with an argument number, perform the
+    // lookup for the actual argument instead. Don't do it for variadic
+    // arguments.
+    if (ArgNo >= 0 && isa<Function>(&V) &&
+        cast<Function>(&V)->arg_size() > (size_t)ArgNo)
+      return getAAFor<AAType>(
+          QueryingAA, *(cast<Function>(&V)->arg_begin() + ArgNo), ArgNo);
+
+    // Lookup the abstract attribute of type AAType. If found, return it after
+    // registering a dependence of QueryingAA on the one returned attribute.
+    const auto &KindToAbstractAttributeMap = AAMap.lookup({&V, ArgNo});
+    if (AAType *AA = static_cast<AAType *>(
+            KindToAbstractAttributeMap.lookup(AAType::ID))) {
+      // Do not return an attribute with an invalid state. This minimizes checks
+      // at the calls sites and allows the fallback below to kick in.
+      if (AA->getState().isValidState()) {
+        QueryMap[AA].insert(&QueryingAA);
+        return AA;
+      }
+    }
+
+    // If no abstract attribute was found and we look for a call site argument,
+    // defer to the actual argument instead.
+    ImmutableCallSite ICS(&V);
+    if (ICS && ICS.getCalledValue())
+      return getAAFor<AAType>(QueryingAA, *ICS.getCalledValue(), ArgNo);
+
+    // No matching attribute found
+    return nullptr;
+  }
+
+  /// Introduce a new abstract attribute into the fixpoint analysis.
+  ///
+  /// Note that ownership of the attribute is given to the Attributor. It will
+  /// invoke delete for the Attributor on destruction of the Attributor.
+  ///
+  /// Attributes are identified by
+  ///  (1) their anchored value (see AA.getAnchoredValue()),
+  ///  (2) their argument number (\p ArgNo, or Argument::getArgNo()), and
+  ///  (3) their default attribute kind (see AAType::ID).
+  template <typename AAType> AAType &registerAA(AAType &AA, int ArgNo = -1) {
+    static_assert(std::is_base_of<AbstractAttribute, AAType>::value,
+                  "Cannot register an attribute with a type not derived from "
+                  "'AbstractAttribute'!");
+
+    // Determine the anchor value and the argument number which are used to
+    // lookup the attribute together with AAType::ID. If passed an argument,
+    // use its argument number but do not override a given one as it could be a
+    // use of the argument at a call site.
+    Value &AnchoredVal = AA.getAnchoredValue();
+    if (ArgNo == -1)
+      if (auto *Arg = dyn_cast<Argument>(&AnchoredVal))
+        ArgNo = Arg->getArgNo();
+
+    // Put the attribute in the lookup map structure and the container we use to
+    // keep track of all attributes.
+    AAMap[{&AnchoredVal, ArgNo}][AAType::ID] = &AA;
+    AllAbstractAttributes.push_back(&AA);
+    return AA;
+  }
+
+  /// Determine opportunities to derive 'default' attributes in \p F and create
+  /// abstract attribute objects for them.
+  ///
+  /// \param F The function that is checked for attribute opportunities.
+  /// \param InfoCache A cache for information queryable by the new attributes.
+  /// \param Whitelist If not null, a set limiting the attribute opportunities.
+  ///
+  /// Note that abstract attribute instances are generally created even if the
+  /// IR already contains the information they would deduce. The most important
+  /// reason for this is the single interface, the one of the abstract attribute
+  /// instance, which can be queried without the need to look at the IR in
+  /// various places.
+  void identifyDefaultAbstractAttributes(
+      Function &F, InformationCache &InfoCache,
+      DenseSet</* Attribute::AttrKind */ unsigned> *Whitelist = nullptr);
+
+  /// Check \p Pred on all function call sites.
+  ///
+  /// This method will evaluate \p Pred on call sites and return
+  /// true if \p Pred holds in every call sites. However, this is only possible
+  /// all call sites are known, hence the function has internal linkage.
+  bool checkForAllCallSites(Function &F, std::function<bool(CallSite)> &Pred,
+                            bool RequireAllCallSites);
+
+private:
+  /// The set of all abstract attributes.
+  ///{
+  using AAVector = SmallVector<AbstractAttribute *, 64>;
+  AAVector AllAbstractAttributes;
+  ///}
+
+  /// A nested map to lookup abstract attributes based on the anchored value and
+  /// an argument positions (or -1) on the outer level, and attribute kinds
+  /// (Attribute::AttrKind) on the inner level.
+  ///{
+  using KindToAbstractAttributeMap = DenseMap<unsigned, AbstractAttribute *>;
+  DenseMap<std::pair<const Value *, int>, KindToAbstractAttributeMap> AAMap;
+  ///}
+
+  /// A map from abstract attributes to the ones that queried them through calls
+  /// to the getAAFor<...>(...) method.
+  ///{
+  using QueryMapTy =
+      DenseMap<AbstractAttribute *, SetVector<AbstractAttribute *>>;
+  QueryMapTy QueryMap;
+  ///}
+};
+
+/// Data structure to hold cached (LLVM-IR) information.
+///
+/// All attributes are given an InformationCache object at creation time to
+/// avoid inspection of the IR by all of them individually. This default
+/// InformationCache will hold information required by 'default' attributes,
+/// thus the ones deduced when Attributor::identifyDefaultAbstractAttributes(..)
+/// is called.
+///
+/// If custom abstract attributes, registered manually through
+/// Attributor::registerAA(...), need more information, especially if it is not
+/// reusable, it is advised to inherit from the InformationCache and cast the
+/// instance down in the abstract attributes.
+struct InformationCache {
+  /// A map type from opcodes to instructions with this opcode.
+  using OpcodeInstMapTy = DenseMap<unsigned, SmallVector<Instruction *, 32>>;
+
+  /// Return the map that relates "interesting" opcodes with all instructions
+  /// with that opcode in \p F.
+  OpcodeInstMapTy &getOpcodeInstMapForFunction(Function &F) {
+    return FuncInstOpcodeMap[&F];
+  }
+
+  /// A vector type to hold instructions.
+  using InstructionVectorTy = std::vector<Instruction *>;
+
+  /// Return the instructions in \p F that may read or write memory.
+  InstructionVectorTy &getReadOrWriteInstsForFunction(Function &F) {
+    return FuncRWInstsMap[&F];
+  }
+
+private:
+  /// A map type from functions to opcode to instruction maps.
+  using FuncInstOpcodeMapTy = DenseMap<Function *, OpcodeInstMapTy>;
+
+  /// A map type from functions to their read or write instructions.
+  using FuncRWInstsMapTy = DenseMap<Function *, InstructionVectorTy>;
+
+  /// A nested map that remembers all instructions in a function with a certain
+  /// instruction opcode (Instruction::getOpcode()).
+  FuncInstOpcodeMapTy FuncInstOpcodeMap;
+
+  /// A map from functions to their instructions that may read or write memory.
+  FuncRWInstsMapTy FuncRWInstsMap;
+
+  /// Give the Attributor access to the members so
+  /// Attributor::identifyDefaultAbstractAttributes(...) can initialize them.
+  friend struct Attributor;
+};
+
+/// An interface to query the internal state of an abstract attribute.
+///
+/// The abstract state is a minimal interface that allows the Attributor to
+/// communicate with the abstract attributes about their internal state without
+/// enforcing or exposing implementation details, e.g., the (existence of an)
+/// underlying lattice.
+///
+/// It is sufficient to be able to query if a state is (1) valid or invalid, (2)
+/// at a fixpoint, and to indicate to the state that (3) an optimistic fixpoint
+/// was reached or (4) a pessimistic fixpoint was enforced.
+///
+/// All methods need to be implemented by the subclass. For the common use case,
+/// a single boolean state or a bit-encoded state, the BooleanState and
+/// IntegerState classes are already provided. An abstract attribute can inherit
+/// from them to get the abstract state interface and additional methods to
+/// directly modify the state based if needed. See the class comments for help.
+struct AbstractState {
+  virtual ~AbstractState() {}
+
+  /// Return if this abstract state is in a valid state. If false, no
+  /// information provided should be used.
+  virtual bool isValidState() const = 0;
+
+  /// Return if this abstract state is fixed, thus does not need to be updated
+  /// if information changes as it cannot change itself.
+  virtual bool isAtFixpoint() const = 0;
+
+  /// Indicate that the abstract state should converge to the optimistic state.
+  ///
+  /// This will usually make the optimistically assumed state the known to be
+  /// true state.
+  virtual void indicateOptimisticFixpoint() = 0;
+
+  /// Indicate that the abstract state should converge to the pessimistic state.
+  ///
+  /// This will usually revert the optimistically assumed state to the known to
+  /// be true state.
+  virtual void indicatePessimisticFixpoint() = 0;
+};
+
+/// Simple state with integers encoding.
+///
+/// The interface ensures that the assumed bits are always a subset of the known
+/// bits. Users can only add known bits and, except through adding known bits,
+/// they can only remove assumed bits. This should guarantee monotoniticy and
+/// thereby the existence of a fixpoint (if used corretly). The fixpoint is
+/// reached when the assumed and known state/bits are equal. Users can
+/// force/inidicate a fixpoint. If an optimistic one is indicated, the known
+/// state will catch up with the assumed one, for a pessimistic fixpoint it is
+/// the other way around.
+struct IntegerState : public AbstractState {
+  /// Underlying integer type, we assume 32 bits to be enough.
+  using base_t = uint32_t;
+
+  /// Initialize the (best) state.
+  IntegerState(base_t BestState = ~0) : Assumed(BestState) {}
+
+  /// Return the worst possible representable state.
+  static constexpr base_t getWorstState() { return 0; }
+
+  /// See AbstractState::isValidState()
+  /// NOTE: For now we simply pretend that the worst possible state is invalid.
+  bool isValidState() const override { return Assumed != getWorstState(); }
+
+  /// See AbstractState::isAtFixpoint()
+  bool isAtFixpoint() const override { return Assumed == Known; }
+
+  /// See AbstractState::indicateOptimisticFixpoint(...)
+  void indicateOptimisticFixpoint() override { Known = Assumed; }
+
+  /// See AbstractState::indicatePessimisticFixpoint(...)
+  void indicatePessimisticFixpoint() override { Assumed = Known; }
+
+  /// Return the known state encoding
+  base_t getKnown() const { return Known; }
+
+  /// Return the assumed state encoding.
+  base_t getAssumed() const { return Assumed; }
+
+  /// Return true if the bits set in \p BitsEncoding are "known bits".
+  bool isKnown(base_t BitsEncoding) const {
+    return (Known & BitsEncoding) == BitsEncoding;
+  }
+
+  /// Return true if the bits set in \p BitsEncoding are "assumed bits".
+  bool isAssumed(base_t BitsEncoding) const {
+    return (Assumed & BitsEncoding) == BitsEncoding;
+  }
+
+  /// Add the bits in \p BitsEncoding to the "known bits".
+  IntegerState &addKnownBits(base_t Bits) {
+    // Make sure we never miss any "known bits".
+    Assumed |= Bits;
+    Known |= Bits;
+    return *this;
+  }
+
+  /// Remove the bits in \p BitsEncoding from the "assumed bits" if not known.
+  IntegerState &removeAssumedBits(base_t BitsEncoding) {
+    // Make sure we never loose any "known bits".
+    Assumed = (Assumed & ~BitsEncoding) | Known;
+    return *this;
+  }
+
+  /// Keep only "assumed bits" also set in \p BitsEncoding but all known ones.
+  IntegerState &intersectAssumedBits(base_t BitsEncoding) {
+    // Make sure we never loose any "known bits".
+    Assumed = (Assumed & BitsEncoding) | Known;
+    return *this;
+  }
+
+private:
+  /// The known state encoding in an integer of type base_t.
+  base_t Known = getWorstState();
+
+  /// The assumed state encoding in an integer of type base_t.
+  base_t Assumed;
+};
+
+/// Simple wrapper for a single bit (boolean) state.
+struct BooleanState : public IntegerState {
+  BooleanState() : IntegerState(1){};
+};
+
+/// Base struct for all "concrete attribute" deductions.
+///
+/// The abstract attribute is a minimal interface that allows the Attributor to
+/// orchestrate the abstract/fixpoint analysis. The design allows to hide away
+/// implementation choices made for the subclasses but also to structure their
+/// implementation and simplify the use of other abstract attributes in-flight.
+///
+/// To allow easy creation of new attributes, most methods have default
+/// implementations. The ones that do not are generally straight forward, except
+/// `AbstractAttribute::updateImpl` which is the location of most reasoning
+/// associated with the abstract attribute. The update is invoked by the
+/// Attributor in case the situation used to justify the current optimistic
+/// state might have changed. The Attributor determines this automatically
+/// by monitoring the `Attributor::getAAFor` calls made by abstract attributes.
+///
+/// The `updateImpl` method should inspect the IR and other abstract attributes
+/// in-flight to justify the best possible (=optimistic) state. The actual
+/// implementation is, similar to the underlying abstract state encoding, not
+/// exposed. In the most common case, the `updateImpl` will go through a list of
+/// reasons why its optimistic state is valid given the current information. If
+/// any combination of them holds and is sufficient to justify the current
+/// optimistic state, the method shall return UNCHAGED. If not, the optimistic
+/// state is adjusted to the situation and the method shall return CHANGED.
+///
+/// If the manifestation of the "concrete attribute" deduced by the subclass
+/// differs from the "default" behavior, which is a (set of) LLVM-IR
+/// attribute(s) for an argument, call site argument, function return value, or
+/// function, the `AbstractAttribute::manifest` method should be overloaded.
+///
+/// NOTE: If the state obtained via getState() is INVALID, thus if
+///       AbstractAttribute::getState().isValidState() returns false, no
+///       information provided by the methods of this class should be used.
+/// NOTE: The Attributor currently has certain limitations to what we can do.
+///       As a general rule of thumb, "concrete" abstract attributes should *for
+///       now* only perform "backward" information propagation. That means
+///       optimistic information obtained through abstract attributes should
+///       only be used at positions that precede the origin of the information
+///       with regards to the program flow. More practically, information can
+///       *now* be propagated from instructions to their enclosing function, but
+///       *not* from call sites to the called function. The mechanisms to allow
+///       both directions will be added in the future.
+/// NOTE: The mechanics of adding a new "concrete" abstract attribute are
+///       described in the file comment.
+struct AbstractAttribute {
+
+  /// The positions attributes can be manifested in.
+  enum ManifestPosition {
+    MP_ARGUMENT,           ///< An attribute for a function argument.
+    MP_CALL_SITE_ARGUMENT, ///< An attribute for a call site argument.
+    MP_FUNCTION,           ///< An attribute for a function as a whole.
+    MP_RETURNED,           ///< An attribute for the function return value.
+  };
+
+  /// An abstract attribute associated with \p AssociatedVal and anchored at
+  /// \p AnchoredVal.
+  ///
+  /// \param AssociatedVal The value this abstract attribute is associated with.
+  /// \param AnchoredVal The value this abstract attributes is anchored at.
+  /// \param InfoCache Cached information accessible to the abstract attribute.
+  AbstractAttribute(Value *AssociatedVal, Value &AnchoredVal,
+                    InformationCache &InfoCache)
+      : AssociatedVal(AssociatedVal), AnchoredVal(AnchoredVal),
+        InfoCache(InfoCache) {}
+
+  /// An abstract attribute associated with and anchored at \p V.
+  AbstractAttribute(Value &V, InformationCache &InfoCache)
+      : AbstractAttribute(&V, V, InfoCache) {}
+
+  /// Virtual destructor.
+  virtual ~AbstractAttribute() {}
+
+  /// Initialize the state with the information in the Attributor \p A.
+  ///
+  /// This function is called by the Attributor once all abstract attributes
+  /// have been identified. It can and shall be used for task like:
+  ///  - identify existing knowledge in the IR and use it for the "known state"
+  ///  - perform any work that is not going to change over time, e.g., determine
+  ///    a subset of the IR, or attributes in-flight, that have to be looked at
+  ///    in the `updateImpl` method.
+  virtual void initialize(Attributor &A) {}
+
+  /// Return the internal abstract state for inspection.
+  virtual const AbstractState &getState() const = 0;
+
+  /// Return the value this abstract attribute is anchored with.
+  ///
+  /// The anchored value might not be the associated value if the latter is not
+  /// sufficient to determine where arguments will be manifested. This is mostly
+  /// the case for call site arguments as the value is not sufficient to
+  /// pinpoint them. Instead, we can use the call site as an anchor.
+  ///
+  ///{
+  Value &getAnchoredValue() { return AnchoredVal; }
+  const Value &getAnchoredValue() const { return AnchoredVal; }
+  ///}
+
+  /// Return the llvm::Function surrounding the anchored value.
+  ///
+  ///{
+  Function &getAnchorScope();
+  const Function &getAnchorScope() const;
+  ///}
+
+  /// Return the value this abstract attribute is associated with.
+  ///
+  /// The abstract state usually represents this value.
+  ///
+  ///{
+  virtual Value *getAssociatedValue() { return AssociatedVal; }
+  virtual const Value *getAssociatedValue() const { return AssociatedVal; }
+  ///}
+
+  /// Return the position this abstract state is manifested in.
+  virtual ManifestPosition getManifestPosition() const = 0;
+
+  /// Return the kind that identifies the abstract attribute implementation.
+  virtual Attribute::AttrKind getAttrKind() const = 0;
+
+  /// Return the deduced attributes in \p Attrs.
+  virtual void getDeducedAttributes(SmallVectorImpl<Attribute> &Attrs) const {
+    LLVMContext &Ctx = AnchoredVal.getContext();
+    Attrs.emplace_back(Attribute::get(Ctx, getAttrKind()));
+  }
+
+  /// Helper functions, for debug purposes only.
+  ///{
+  virtual void print(raw_ostream &OS) const;
+  void dump() const { print(dbgs()); }
+
+  /// This function should return the "summarized" assumed state as string.
+  virtual const std::string getAsStr() const = 0;
+  ///}
+
+  /// Allow the Attributor access to the protected methods.
+  friend struct Attributor;
+
+protected:
+  /// Hook for the Attributor to trigger an update of the internal state.
+  ///
+  /// If this attribute is already fixed, this method will return UNCHANGED,
+  /// otherwise it delegates to `AbstractAttribute::updateImpl`.
+  ///
+  /// \Return CHANGED if the internal state changed, otherwise UNCHANGED.
+  ChangeStatus update(Attributor &A);
+
+  /// Hook for the Attributor to trigger the manifestation of the information
+  /// represented by the abstract attribute in the LLVM-IR.
+  ///
+  /// \Return CHANGED if the IR was altered, otherwise UNCHANGED.
+  virtual ChangeStatus manifest(Attributor &A);
+
+  /// Return the internal abstract state for careful modification.
+  virtual AbstractState &getState() = 0;
+
+  /// The actual update/transfer function which has to be implemented by the
+  /// derived classes.
+  ///
+  /// If it is called, the environment has changed and we have to determine if
+  /// the current information is still valid or adjust it otherwise.
+  ///
+  /// \Return CHANGED if the internal state changed, otherwise UNCHANGED.
+  virtual ChangeStatus updateImpl(Attributor &A) = 0;
+
+  /// The value this abstract attribute is associated with.
+  Value *AssociatedVal;
+
+  /// The value this abstract attribute is anchored at.
+  Value &AnchoredVal;
+
+  /// The information cache accessible to this abstract attribute.
+  InformationCache &InfoCache;
+};
+
+/// Forward declarations of output streams for debug purposes.
+///
+///{
+raw_ostream &operator<<(raw_ostream &OS, const AbstractAttribute &AA);
+raw_ostream &operator<<(raw_ostream &OS, ChangeStatus S);
+raw_ostream &operator<<(raw_ostream &OS, AbstractAttribute::ManifestPosition);
+raw_ostream &operator<<(raw_ostream &OS, const AbstractState &State);
+///}
+
+struct AttributorPass : public PassInfoMixin<AttributorPass> {
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+Pass *createAttributorLegacyPass();
+
+/// ----------------------------------------------------------------------------
+///                       Abstract Attribute Classes
+/// ----------------------------------------------------------------------------
+
+/// An abstract attribute for the returned values of a function.
+struct AAReturnedValues : public AbstractAttribute {
+  /// See AbstractAttribute::AbstractAttribute(...).
+  AAReturnedValues(Function &F, InformationCache &InfoCache)
+      : AbstractAttribute(F, InfoCache) {}
+
+  /// Check \p Pred on all returned values.
+  ///
+  /// This method will evaluate \p Pred on returned values and return
+  /// true if (1) all returned values are known, and (2) \p Pred returned true
+  /// for all returned values.
+  virtual bool
+  checkForallReturnedValues(std::function<bool(Value &)> &Pred) const = 0;
+
+  /// See AbstractAttribute::getAttrKind()
+  Attribute::AttrKind getAttrKind() const override { return ID; }
+
+  /// The identifier used by the Attributor for this class of attributes.
+  static constexpr Attribute::AttrKind ID = Attribute::Returned;
+};
+
+struct AANoUnwind : public AbstractAttribute {
+  /// An abstract interface for all nosync attributes.
+  AANoUnwind(Value &V, InformationCache &InfoCache)
+      : AbstractAttribute(V, InfoCache) {}
+
+  /// See AbstractAttribute::getAttrKind()/
+  Attribute::AttrKind getAttrKind() const override { return ID; }
+
+  static constexpr Attribute::AttrKind ID = Attribute::NoUnwind;
+
+  /// Returns true if nounwind is assumed.
+  virtual bool isAssumedNoUnwind() const = 0;
+
+  /// Returns true if nounwind is known.
+  virtual bool isKnownNoUnwind() const = 0;
+};
+
+struct AANoSync : public AbstractAttribute {
+  /// An abstract interface for all nosync attributes.
+  AANoSync(Value &V, InformationCache &InfoCache)
+      : AbstractAttribute(V, InfoCache) {}
+
+  /// See AbstractAttribute::getAttrKind().
+  Attribute::AttrKind getAttrKind() const override { return ID; }
+
+  static constexpr Attribute::AttrKind ID =
+      Attribute::AttrKind(Attribute::NoSync);
+
+  /// Returns true if "nosync" is assumed.
+  virtual bool isAssumedNoSync() const = 0;
+
+  /// Returns true if "nosync" is known.
+  virtual bool isKnownNoSync() const = 0;
+};
+
+/// An abstract interface for all nonnull attributes.
+struct AANonNull : public AbstractAttribute {
+
+  /// See AbstractAttribute::AbstractAttribute(...).
+  AANonNull(Value &V, InformationCache &InfoCache)
+      : AbstractAttribute(V, InfoCache) {}
+
+  /// See AbstractAttribute::AbstractAttribute(...).
+  AANonNull(Value *AssociatedVal, Value &AnchoredValue,
+            InformationCache &InfoCache)
+      : AbstractAttribute(AssociatedVal, AnchoredValue, InfoCache) {}
+
+  /// Return true if we assume that the underlying value is nonnull.
+  virtual bool isAssumedNonNull() const = 0;
+
+  /// Return true if we know that underlying value is nonnull.
+  virtual bool isKnownNonNull() const = 0;
+
+  /// See AbastractState::getAttrKind().
+  Attribute::AttrKind getAttrKind() const override { return ID; }
+
+  /// The identifier used by the Attributor for this class of attributes.
+  static constexpr Attribute::AttrKind ID = Attribute::NonNull;
+};
+
+/// An abstract attribute for norecurse.
+struct AANoRecurse : public AbstractAttribute {
+
+  /// See AbstractAttribute::AbstractAttribute(...).
+  AANoRecurse(Value &V, InformationCache &InfoCache)
+      : AbstractAttribute(V, InfoCache) {}
+
+  /// See AbstractAttribute::getAttrKind()
+  virtual Attribute::AttrKind getAttrKind() const override {
+    return Attribute::NoRecurse;
+  }
+
+  /// Return true if "norecurse" is known.
+  virtual bool isKnownNoRecurse() const = 0;
+
+  /// Return true if "norecurse" is assumed.
+  virtual bool isAssumedNoRecurse() const = 0;
+
+  /// The identifier used by the Attributor for this class of attributes.
+  static constexpr Attribute::AttrKind ID = Attribute::NoRecurse;
+};
+
+/// An abstract attribute for willreturn.
+struct AAWillReturn : public AbstractAttribute {
+
+  /// See AbstractAttribute::AbstractAttribute(...).
+  AAWillReturn(Value &V, InformationCache &InfoCache)
+      : AbstractAttribute(V, InfoCache) {}
+
+  /// See AbstractAttribute::getAttrKind()
+  virtual Attribute::AttrKind getAttrKind() const override {
+    return Attribute::WillReturn;
+  }
+
+  /// Return true if "willreturn" is known.
+  virtual bool isKnownWillReturn() const = 0;
+
+  /// Return true if "willreturn" is assumed.
+  virtual bool isAssumedWillReturn() const = 0;
+
+  /// The identifier used by the Attributor for this class of attributes.
+  static constexpr Attribute::AttrKind ID = Attribute::WillReturn;
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H
diff --git a/include/llvm/Transforms/IPO/CalledValuePropagation.h b/include/llvm/Transforms/IPO/CalledValuePropagation.h
index 352bdc7ac17f..c2626d0867b4 100644
--- a/include/llvm/Transforms/IPO/CalledValuePropagation.h
+++ b/include/llvm/Transforms/IPO/CalledValuePropagation.h
@@ -1,9 +1,8 @@
 //===- CalledValuePropagation.h - Propagate called values -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/IPO/ConstantMerge.h b/include/llvm/Transforms/IPO/ConstantMerge.h
index e04d3ae1a40e..12d38b5f58fa 100644
--- a/include/llvm/Transforms/IPO/ConstantMerge.h
+++ b/include/llvm/Transforms/IPO/ConstantMerge.h
@@ -1,9 +1,8 @@
 //===- ConstantMerge.h - Merge duplicate global constants -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/IPO/CrossDSOCFI.h b/include/llvm/Transforms/IPO/CrossDSOCFI.h
index 0979f5b79e86..8440df639729 100644
--- a/include/llvm/Transforms/IPO/CrossDSOCFI.h
+++ b/include/llvm/Transforms/IPO/CrossDSOCFI.h
@@ -1,9 +1,8 @@
 //===-- CrossDSOCFI.cpp - Externalize this module's CFI checks --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/IPO/DeadArgumentElimination.h b/include/llvm/Transforms/IPO/DeadArgumentElimination.h
index ba5666f20a9b..73797bc10017 100644
--- a/include/llvm/Transforms/IPO/DeadArgumentElimination.h
+++ b/include/llvm/Transforms/IPO/DeadArgumentElimination.h
@@ -1,9 +1,8 @@
 //===- DeadArgumentElimination.h - Eliminate Dead Args ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/IPO/ElimAvailExtern.h b/include/llvm/Transforms/IPO/ElimAvailExtern.h
index 94cb954fd2d5..92c319b3cce3 100644
--- a/include/llvm/Transforms/IPO/ElimAvailExtern.h
+++ b/include/llvm/Transforms/IPO/ElimAvailExtern.h
@@ -1,9 +1,8 @@
 //===- ElimAvailExtern.h - Optimize Global Variables ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/IPO/ForceFunctionAttrs.h b/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
index ff8a6546f059..7379009b2592 100644
--- a/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
+++ b/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
@@ -1,9 +1,8 @@
 //===-- ForceFunctionAttrs.h - Force function attrs for debugging ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Transforms/IPO/FunctionAttrs.h b/include/llvm/Transforms/IPO/FunctionAttrs.h
index 901fed7a0fa4..ce61eea05c79 100644
--- a/include/llvm/Transforms/IPO/FunctionAttrs.h
+++ b/include/llvm/Transforms/IPO/FunctionAttrs.h
@@ -1,9 +1,8 @@
 //===- FunctionAttrs.h - Compute function attributes ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/IPO/FunctionImport.h b/include/llvm/Transforms/IPO/FunctionImport.h
index c2103b637266..bbf270c400af 100644
--- a/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/include/llvm/Transforms/IPO/FunctionImport.h
@@ -1,9 +1,8 @@
 //===- llvm/Transforms/IPO/FunctionImport.h - ThinLTO importing -*- C++ -*-===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Transforms/IPO/GlobalDCE.h b/include/llvm/Transforms/IPO/GlobalDCE.h
index 7ca241f4645a..c434484d1ae3 100644
--- a/include/llvm/Transforms/IPO/GlobalDCE.h
+++ b/include/llvm/Transforms/IPO/GlobalDCE.h
@@ -1,9 +1,8 @@
 //===-- GlobalDCE.h - DCE unreachable internal functions ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/IPO/GlobalOpt.h b/include/llvm/Transforms/IPO/GlobalOpt.h
index 5b4878604eab..48a861ff2cf8 100644
--- a/include/llvm/Transforms/IPO/GlobalOpt.h
+++ b/include/llvm/Transforms/IPO/GlobalOpt.h
@@ -1,9 +1,8 @@
 //===- GlobalOpt.h - Optimize Global Variables ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/IPO/GlobalSplit.h b/include/llvm/Transforms/IPO/GlobalSplit.h
index 56cefb7886fe..690b23a2d785 100644
--- a/include/llvm/Transforms/IPO/GlobalSplit.h
+++ b/include/llvm/Transforms/IPO/GlobalSplit.h
@@ -1,9 +1,8 @@
 //===- GlobalSplit.h - global variable splitter -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/IPO/HotColdSplitting.h b/include/llvm/Transforms/IPO/HotColdSplitting.h
index 57e9a9e69187..73668844590d 100644
--- a/include/llvm/Transforms/IPO/HotColdSplitting.h
+++ b/include/llvm/Transforms/IPO/HotColdSplitting.h
@@ -1,9 +1,8 @@
 //===- HotColdSplitting.h ---- Outline Cold Regions -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //===----------------------------------------------------------------------===//
 //
 // This pass outlines cold regions to a separate function.
diff --git a/include/llvm/Transforms/IPO/InferFunctionAttrs.h b/include/llvm/Transforms/IPO/InferFunctionAttrs.h
index 54e1c243ae27..bb7907fb8ac8 100644
--- a/include/llvm/Transforms/IPO/InferFunctionAttrs.h
+++ b/include/llvm/Transforms/IPO/InferFunctionAttrs.h
@@ -1,9 +1,8 @@
 //===-- InferFunctionAttrs.h - Infer implicit function attributes ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/include/llvm/Transforms/IPO/Inliner.h b/include/llvm/Transforms/IPO/Inliner.h
index 610e4500e4b1..8202b94d5a93 100644
--- a/include/llvm/Transforms/IPO/Inliner.h
+++ b/include/llvm/Transforms/IPO/Inliner.h
@@ -1,9 +1,8 @@
 //===- Inliner.h - Inliner pass and infrastructure --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Transforms/IPO/Internalize.h b/include/llvm/Transforms/IPO/Internalize.h
index 45d676d9f77b..6c1e19ef9fe4 100644
--- a/include/llvm/Transforms/IPO/Internalize.h
+++ b/include/llvm/Transforms/IPO/Internalize.h
@@ -1,9 +1,8 @@
 //====- Internalize.h - Internalization API ---------------------*- C++ -*-===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -22,11 +21,11 @@
 #ifndef LLVM_TRANSFORMS_IPO_INTERNALIZE_H
 #define LLVM_TRANSFORMS_IPO_INTERNALIZE_H
 
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/PassManager.h"
 #include <functional>
-#include <set>
 
 namespace llvm {
 class Module;
@@ -45,11 +44,11 @@ class InternalizePass : public PassInfoMixin<InternalizePass> {
   /// Internalize GV if it is possible to do so, i.e. it is not externally
   /// visible and is not a member of an externally visible comdat.
   bool maybeInternalize(GlobalValue &GV,
-                        const std::set<const Comdat *> &ExternalComdats);
+                        const DenseSet<const Comdat *> &ExternalComdats);
   /// If GV is part of a comdat and is externally visible, keep track of its
   /// comdat so that we don't internalize any of its members.
   void checkComdatVisibility(GlobalValue &GV,
-                             std::set<const Comdat *> &ExternalComdats);
+                             DenseSet<const Comdat *> &ExternalComdats);
 
 public:
   InternalizePass();
diff --git a/include/llvm/Transforms/IPO/LowerTypeTests.h b/include/llvm/Transforms/IPO/LowerTypeTests.h
index bc448386b63d..39b23f5957db 100644
--- a/include/llvm/Transforms/IPO/LowerTypeTests.h
+++ b/include/llvm/Transforms/IPO/LowerTypeTests.h
@@ -1,9 +1,8 @@
 //===- LowerTypeTests.h - type metadata lowering pass -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/IPO/PartialInlining.h b/include/llvm/Transforms/IPO/PartialInlining.h
index ec6dd36dae06..3b8297d65987 100644
--- a/include/llvm/Transforms/IPO/PartialInlining.h
+++ b/include/llvm/Transforms/IPO/PartialInlining.h
@@ -1,9 +1,8 @@
 //===- PartialInlining.h - Inline parts of functions ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h
index 276306f686ff..63ff00afc2ae 100644
--- a/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -1,9 +1,8 @@
 // llvm/Transforms/IPO/PassManagerBuilder.h - Build Standard Pass -*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -58,7 +57,7 @@ class PassManagerBase;
 ///   ...
 class PassManagerBuilder {
 public:
-  /// Extensions are passed the builder itself (so they can see how it is
+  /// Extensions are passed to the builder itself (so they can see how it is
   /// configured) as well as the pass manager to add stuff to.
   typedef std::function<void(const PassManagerBuilder &Builder,
                              legacy::PassManagerBase &PM)>
@@ -113,6 +112,16 @@ public:
     /// passes at the end of the main CallGraphSCC passes and before any
     /// function simplification passes run by CGPassManager.
     EP_CGSCCOptimizerLate,
+
+    /// EP_FullLinkTimeOptimizationEarly - This extensions point allow adding
+    /// passes that
+    /// run at Link Time, before Full Link Time Optimization.
+    EP_FullLinkTimeOptimizationEarly,
+
+    /// EP_FullLinkTimeOptimizationLast - This extensions point allow adding
+    /// passes that
+    /// run at Link Time, after Full Link Time Optimization.
+    EP_FullLinkTimeOptimizationLast,
   };
 
   /// The Optimization Level - Specify the basic optimization level.
@@ -143,13 +152,14 @@ public:
   const ModuleSummaryIndex *ImportSummary = nullptr;
 
   bool DisableTailCalls;
-  bool DisableUnitAtATime;
   bool DisableUnrollLoops;
   bool SLPVectorize;
   bool LoopVectorize;
+  bool LoopsInterleaved;
   bool RerollLoops;
   bool NewGVN;
   bool DisableGVNLoadPRE;
+  bool ForgetAllSCEVInLoopUnroll;
   bool VerifyInput;
   bool VerifyOutput;
   bool MergeFunctions;
@@ -157,9 +167,15 @@ public:
   bool PrepareForThinLTO;
   bool PerformThinLTO;
   bool DivergentTarget;
+  unsigned LicmMssaOptCap;
+  unsigned LicmMssaNoAccForPromotionCap;
 
   /// Enable profile instrumentation pass.
   bool EnablePGOInstrGen;
+  /// Enable profile context sensitive instrumentation pass.
+  bool EnablePGOCSInstrGen;
+  /// Enable profile context sensitive profile use pass.
+  bool EnablePGOCSInstrUse;
   /// Profile data file name that the instrumentation will be written to.
   std::string PGOInstrGen;
   /// Path of the profile data file.
@@ -186,7 +202,7 @@ private:
   void addInitialAliasAnalysisPasses(legacy::PassManagerBase &PM) const;
   void addLTOOptimizationPasses(legacy::PassManagerBase &PM);
   void addLateLTOOptimizationPasses(legacy::PassManagerBase &PM);
-  void addPGOInstrPasses(legacy::PassManagerBase &MPM);
+  void addPGOInstrPasses(legacy::PassManagerBase &MPM, bool IsCS);
   void addFunctionSimplificationPasses(legacy::PassManagerBase &MPM);
   void addInstructionCombiningPass(legacy::PassManagerBase &MPM) const;
 
diff --git a/include/llvm/Transforms/IPO/SCCP.h b/include/llvm/Transforms/IPO/SCCP.h
index fdb7865fbac3..3c40d44ca9de 100644
--- a/include/llvm/Transforms/IPO/SCCP.h
+++ b/include/llvm/Transforms/IPO/SCCP.h
@@ -1,9 +1,8 @@
 //===- SCCP.h - Sparse Conditional Constant Propagation ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/IPO/SampleProfile.h b/include/llvm/Transforms/IPO/SampleProfile.h
index af4a933ec1f6..a5ad44551bf6 100644
--- a/include/llvm/Transforms/IPO/SampleProfile.h
+++ b/include/llvm/Transforms/IPO/SampleProfile.h
@@ -1,9 +1,8 @@
 //===- SampleProfile.h - SamplePGO pass ---------- --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/IPO/StripDeadPrototypes.h b/include/llvm/Transforms/IPO/StripDeadPrototypes.h
index 5a05cd75c9d5..f4a15c36afc9 100644
--- a/include/llvm/Transforms/IPO/StripDeadPrototypes.h
+++ b/include/llvm/Transforms/IPO/StripDeadPrototypes.h
@@ -1,9 +1,8 @@
 //===-- StripDeadPrototypes.h - Remove unused function declarations -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h b/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h
index bf04bbfe92d8..7acb922b37e1 100644
--- a/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h
+++ b/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h
@@ -1,9 +1,8 @@
 //===- ThinLTOBitcodeWriter.h - Bitcode writing pass for ThinLTO ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/include/llvm/Transforms/IPO/WholeProgramDevirt.h
index bf2c79b0751e..509fcc867060 100644
--- a/include/llvm/Transforms/IPO/WholeProgramDevirt.h
+++ b/include/llvm/Transforms/IPO/WholeProgramDevirt.h
@@ -1,9 +1,8 @@
 //===- WholeProgramDevirt.h - Whole-program devirt pass ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/InstCombine/InstCombine.h b/include/llvm/Transforms/InstCombine/InstCombine.h
index ab25fe08553a..8894d96e591f 100644
--- a/include/llvm/Transforms/InstCombine/InstCombine.h
+++ b/include/llvm/Transforms/InstCombine/InstCombine.h
@@ -1,9 +1,8 @@
 //===- InstCombine.h - InstCombine pass -------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Transforms/InstCombine/InstCombineWorklist.h b/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
index f860b4b86555..6c33bdbafbd2 100644
--- a/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
+++ b/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
@@ -1,9 +1,8 @@
 //===- InstCombineWorklist.h - Worklist for InstCombine pass ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h
index 017cab0a7750..8b70d2926ae9 100644
--- a/include/llvm/Transforms/Instrumentation.h
+++ b/include/llvm/Transforms/Instrumentation.h
@@ -1,9 +1,8 @@
 //===- Transforms/Instrumentation.h - Instrumentation passes ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -88,10 +87,14 @@ struct GCOVOptions {
 ModulePass *createGCOVProfilerPass(const GCOVOptions &Options =
                                    GCOVOptions::getDefault());
 
-// PGO Instrumention
-ModulePass *createPGOInstrumentationGenLegacyPass();
+// PGO Instrumention. Parameter IsCS indicates if this is the context senstive
+// instrumentation.
+ModulePass *createPGOInstrumentationGenLegacyPass(bool IsCS = false);
 ModulePass *
-createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef(""));
+createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef(""),
+                                      bool IsCS = false);
+ModulePass *createPGOInstrumentationGenCreateVarLegacyPass(
+    StringRef CSInstrName = StringRef(""));
 ModulePass *createPGOIndirectCallPromotionLegacyPass(bool InLTO = false,
                                                      bool SamplePGO = false);
 FunctionPass *createPGOMemOPSizeOptLegacyPass();
@@ -133,48 +136,27 @@ struct InstrProfOptions {
   // Use atomic profile counter increments.
   bool Atomic = false;
 
+  // Use BFI to guide register promotion
+  bool UseBFIInPromotion = false;
+
   // Name of the profile file to use as output
   std::string InstrProfileOutput;
 
   InstrProfOptions() = default;
 };
 
-/// Insert frontend instrumentation based profiling.
+/// Insert frontend instrumentation based profiling. Parameter IsCS indicates if
+// this is the context senstive instrumentation.
 ModulePass *createInstrProfilingLegacyPass(
-    const InstrProfOptions &Options = InstrProfOptions());
+    const InstrProfOptions &Options = InstrProfOptions(), bool IsCS = false);
 
-// Insert AddressSanitizer (address sanity checking) instrumentation
-FunctionPass *createAddressSanitizerFunctionPass(bool CompileKernel = false,
-                                                 bool Recover = false,
-                                                 bool UseAfterScope = false);
-ModulePass *createAddressSanitizerModulePass(bool CompileKernel = false,
-                                             bool Recover = false,
-                                             bool UseGlobalsGC = true,
-                                             bool UseOdrIndicator = true);
-
-FunctionPass *createHWAddressSanitizerPass(bool CompileKernel = false,
-                                           bool Recover = false);
+ModulePass *createInstrOrderFilePass();
 
 // Insert DataFlowSanitizer (dynamic data flow analysis) instrumentation
 ModulePass *createDataFlowSanitizerPass(
     const std::vector<std::string> &ABIListFiles = std::vector<std::string>(),
     void *(*getArgTLS)() = nullptr, void *(*getRetValTLS)() = nullptr);
 
-// Options for EfficiencySanitizer sub-tools.
-struct EfficiencySanitizerOptions {
-  enum Type {
-    ESAN_None = 0,
-    ESAN_CacheFrag,
-    ESAN_WorkingSet,
-  } ToolType = ESAN_None;
-
-  EfficiencySanitizerOptions() = default;
-};
-
-// Insert EfficiencySanitizer instrumentation.
-ModulePass *createEfficiencySanitizerPass(
-    const EfficiencySanitizerOptions &Options = EfficiencySanitizerOptions());
-
 // Options for sanitizer coverage instrumentation.
 struct SanitizerCoverageOptions {
   enum Type {
diff --git a/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
new file mode 100644
index 000000000000..40007a9b8c53
--- /dev/null
+++ b/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
@@ -0,0 +1,143 @@
+//===--------- Definition of the AddressSanitizer class ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the AddressSanitizer class which is a port of the legacy
+// AddressSanitizer pass to use the new PassManager infrastructure.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_ADDRESSSANITIZERPASS_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_ADDRESSSANITIZERPASS_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// Frontend-provided metadata for source location.
+struct LocationMetadata {
+  StringRef Filename;
+  int LineNo = 0;
+  int ColumnNo = 0;
+
+  LocationMetadata() = default;
+
+  bool empty() const { return Filename.empty(); }
+  void parse(MDNode *MDN);
+};
+
+/// Frontend-provided metadata for global variables.
+class GlobalsMetadata {
+public:
+  struct Entry {
+    LocationMetadata SourceLoc;
+    StringRef Name;
+    bool IsDynInit = false;
+    bool IsBlacklisted = false;
+
+    Entry() = default;
+  };
+
+  /// Create a default uninitialized GlobalsMetadata instance.
+  GlobalsMetadata() = default;
+
+  /// Create an initialized GlobalsMetadata instance.
+  GlobalsMetadata(Module &M);
+
+  /// Returns metadata entry for a given global.
+  Entry get(GlobalVariable *G) const {
+    auto Pos = Entries.find(G);
+    return (Pos != Entries.end()) ? Pos->second : Entry();
+  }
+
+  /// Handle invalidation from the pass manager.
+  /// These results are never invalidated.
+  bool invalidate(Module &, const PreservedAnalyses &,
+                  ModuleAnalysisManager::Invalidator &) {
+    return false;
+  }
+  bool invalidate(Function &, const PreservedAnalyses &,
+                  FunctionAnalysisManager::Invalidator &) {
+    return false;
+  }
+
+private:
+  DenseMap<GlobalVariable *, Entry> Entries;
+};
+
+/// The ASanGlobalsMetadataAnalysis initializes and returns a GlobalsMetadata
+/// object. More specifically, ASan requires looking at all globals registered
+/// in 'llvm.asan.globals' before running, which only depends on reading module
+/// level metadata. This analysis is required to run before running the
+/// AddressSanitizerPass since it collects that metadata.
+/// The legacy pass manager equivalent of this is ASanGlobalsMetadataLegacyPass.
+class ASanGlobalsMetadataAnalysis
+    : public AnalysisInfoMixin<ASanGlobalsMetadataAnalysis> {
+public:
+  using Result = GlobalsMetadata;
+
+  Result run(Module &, ModuleAnalysisManager &);
+
+private:
+  friend AnalysisInfoMixin<ASanGlobalsMetadataAnalysis>;
+  static AnalysisKey Key;
+};
+
+/// Public interface to the address sanitizer pass for instrumenting code to
+/// check for various memory errors at runtime.
+///
+/// The sanitizer itself is a function pass that works by inserting various
+/// calls to the ASan runtime library functions. The runtime library essentially
+/// replaces malloc() and free() with custom implementations that allow regions
+/// surrounding requested memory to be checked for invalid accesses.
+class AddressSanitizerPass : public PassInfoMixin<AddressSanitizerPass> {
+public:
+  explicit AddressSanitizerPass(bool CompileKernel = false,
+                                bool Recover = false,
+                                bool UseAfterScope = false);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+private:
+  bool CompileKernel;
+  bool Recover;
+  bool UseAfterScope;
+};
+
+/// Public interface to the address sanitizer module pass for instrumenting code
+/// to check for various memory errors.
+///
+/// This adds 'asan.module_ctor' to 'llvm.global_ctors'. This pass may also
+/// run intependently of the function address sanitizer.
+class ModuleAddressSanitizerPass
+    : public PassInfoMixin<ModuleAddressSanitizerPass> {
+public:
+  explicit ModuleAddressSanitizerPass(bool CompileKernel = false,
+                                      bool Recover = false,
+                                      bool UseGlobalGC = true,
+                                      bool UseOdrIndicator = false);
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+
+private:
+  bool CompileKernel;
+  bool Recover;
+  bool UseGlobalGC;
+  bool UseOdrIndicator;
+};
+
+// Insert AddressSanitizer (address sanity checking) instrumentation
+FunctionPass *createAddressSanitizerFunctionPass(bool CompileKernel = false,
+                                                 bool Recover = false,
+                                                 bool UseAfterScope = false);
+ModulePass *createModuleAddressSanitizerLegacyPassPass(
+    bool CompileKernel = false, bool Recover = false, bool UseGlobalsGC = true,
+    bool UseOdrIndicator = true);
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Transforms/Instrumentation/BoundsChecking.h b/include/llvm/Transforms/Instrumentation/BoundsChecking.h
index 3d4f62c121c2..120c6a8fb09f 100644
--- a/include/llvm/Transforms/Instrumentation/BoundsChecking.h
+++ b/include/llvm/Transforms/Instrumentation/BoundsChecking.h
@@ -1,9 +1,8 @@
 //===- BoundsChecking.h - Bounds checking instrumentation -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Transforms/Instrumentation/CGProfile.h b/include/llvm/Transforms/Instrumentation/CGProfile.h
index c06c1a28715e..28fd3804dec9 100644
--- a/include/llvm/Transforms/Instrumentation/CGProfile.h
+++ b/include/llvm/Transforms/Instrumentation/CGProfile.h
@@ -1,9 +1,8 @@
 //===- Transforms/Instrumentation/CGProfile.h -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h b/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h
index 460342d1631b..18b428582046 100644
--- a/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h
+++ b/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h
@@ -1,9 +1,8 @@
 //===- ControlHeightReduction.h - Control Height Reduction ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Instrumentation/GCOVProfiler.h b/include/llvm/Transforms/Instrumentation/GCOVProfiler.h
index dd55fbe29eed..b3971e49754e 100644
--- a/include/llvm/Transforms/Instrumentation/GCOVProfiler.h
+++ b/include/llvm/Transforms/Instrumentation/GCOVProfiler.h
@@ -1,9 +1,8 @@
 //===- Transforms/Instrumentation/GCOVProfiler.h ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h b/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
new file mode 100644
index 000000000000..e3104eeb1d36
--- /dev/null
+++ b/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
@@ -0,0 +1,41 @@
+//===--------- Definition of the HWAddressSanitizer class -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hardware AddressSanitizer class which is a port of the
+// legacy HWAddressSanitizer pass to use the new PassManager infrastructure.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_HWADDRESSSANITIZERPASS_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_HWADDRESSSANITIZERPASS_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// This is a public interface to the hardware address sanitizer pass for
+/// instrumenting code to check for various memory errors at runtime, similar to
+/// AddressSanitizer but based on partial hardware assistance.
+class HWAddressSanitizerPass : public PassInfoMixin<HWAddressSanitizerPass> {
+public:
+  explicit HWAddressSanitizerPass(bool CompileKernel = false,
+                                  bool Recover = false);
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
+
+private:
+  bool CompileKernel;
+  bool Recover;
+};
+
+FunctionPass *createHWAddressSanitizerLegacyPassPass(bool CompileKernel = false,
+                                                     bool Recover = false);
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Transforms/Instrumentation/InstrOrderFile.h b/include/llvm/Transforms/Instrumentation/InstrOrderFile.h
new file mode 100644
index 000000000000..f1245d8fd785
--- /dev/null
+++ b/include/llvm/Transforms/Instrumentation/InstrOrderFile.h
@@ -0,0 +1,28 @@
+//===- InstrOrderFile.h ---- Late IR instrumentation for order file ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRORDERFILE_H
+#define LLVM_TRANSFORMS_INSTRORDERFILE_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class Module;
+
+/// The instrumentation pass for recording function order.
+class InstrOrderFilePass : public PassInfoMixin<InstrOrderFilePass> {
+public:
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_INSTRORDERFILE_H
diff --git a/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/include/llvm/Transforms/Instrumentation/InstrProfiling.h
index 13fb3db4ae6f..8f76d4a1ce55 100644
--- a/include/llvm/Transforms/Instrumentation/InstrProfiling.h
+++ b/include/llvm/Transforms/Instrumentation/InstrProfiling.h
@@ -1,9 +1,8 @@
 //===- Transforms/Instrumentation/InstrProfiling.h --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -35,8 +34,9 @@ using LoadStorePair = std::pair<Instruction *, Instruction *>;
 /// instrumentation pass.
 class InstrProfiling : public PassInfoMixin<InstrProfiling> {
 public:
-  InstrProfiling() = default;
-  InstrProfiling(const InstrProfOptions &Options) : Options(Options) {}
+  InstrProfiling() : IsCS(false) {}
+  InstrProfiling(const InstrProfOptions &Options, bool IsCS = false)
+      : Options(Options), IsCS(IsCS) {}
 
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
   bool run(Module &M, const TargetLibraryInfo &TLI);
@@ -61,6 +61,9 @@ private:
   GlobalVariable *NamesVar;
   size_t NamesSize;
 
+  // Is this lowering for the context-sensitive instrumentation.
+  bool IsCS;
+
   // vector of counter load/store pairs to be register promoted.
   std::vector<LoadStorePair> PromotionCandidates;
 
diff --git a/include/llvm/Transforms/Instrumentation/MemorySanitizer.h b/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
index 54f0e2f78230..0739d9e58a61 100644
--- a/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
+++ b/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
@@ -1,9 +1,8 @@
 //===- Transforms/Instrumentation/MemorySanitizer.h - MSan Pass -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,10 +18,18 @@
 
 namespace llvm {
 
+struct MemorySanitizerOptions {
+  MemorySanitizerOptions() = default;
+  MemorySanitizerOptions(int TrackOrigins, bool Recover, bool Kernel)
+      : TrackOrigins(TrackOrigins), Recover(Recover), Kernel(Kernel) {}
+  int TrackOrigins = 0;
+  bool Recover = false;
+  bool Kernel = false;
+};
+
 // Insert MemorySanitizer instrumentation (detection of uninitialized reads)
-FunctionPass *createMemorySanitizerLegacyPassPass(int TrackOrigins = 0,
-                                        bool Recover = false,
-                                        bool EnableKmsan = false);
+FunctionPass *
+createMemorySanitizerLegacyPassPass(MemorySanitizerOptions Options = {});
 
 /// A function pass for msan instrumentation.
 ///
@@ -31,17 +38,12 @@ FunctionPass *createMemorySanitizerLegacyPassPass(int TrackOrigins = 0,
 /// yet, the pass inserts the declarations. Otherwise the existing globals are
 /// used.
 struct MemorySanitizerPass : public PassInfoMixin<MemorySanitizerPass> {
-  MemorySanitizerPass(int TrackOrigins = 0, bool Recover = false,
-                      bool EnableKmsan = false)
-      : TrackOrigins(TrackOrigins), Recover(Recover), EnableKmsan(EnableKmsan) {
-  }
+  MemorySanitizerPass(MemorySanitizerOptions Options) : Options(Options) {}
 
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
 
 private:
-  int TrackOrigins;
-  bool Recover;
-  bool EnableKmsan;
+  MemorySanitizerOptions Options;
 };
 }
 
diff --git a/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h b/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
index fdc5df68a669..21cf291d82d1 100644
--- a/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
+++ b/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
@@ -1,9 +1,8 @@
 //===- Transforms/Instrumentation/PGOInstrumentation.h ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -27,23 +26,47 @@ class Function;
 class Instruction;
 class Module;
 
+/// The instrumentation (profile-instr-gen) pass for IR based PGO.
+// We use this pass to create COMDAT profile variables for context
+// sensitive PGO (CSPGO). The reason to have a pass for this is CSPGO
+// can be run after LTO/ThinLTO linking. Lld linker needs to see
+// all the COMDAT variables before linking. So we have this pass
+// always run before linking for CSPGO.
+class PGOInstrumentationGenCreateVar
+    : public PassInfoMixin<PGOInstrumentationGenCreateVar> {
+public:
+  PGOInstrumentationGenCreateVar(std::string CSInstrName = "")
+      : CSInstrName(CSInstrName) {}
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+
+private:
+  std::string CSInstrName;
+};
+
 /// The instrumentation (profile-instr-gen) pass for IR based PGO.
 class PGOInstrumentationGen : public PassInfoMixin<PGOInstrumentationGen> {
 public:
+  PGOInstrumentationGen(bool IsCS = false) : IsCS(IsCS) {}
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+
+private:
+  // If this is a context sensitive instrumentation.
+  bool IsCS;
 };
 
 /// The profile annotation (profile-instr-use) pass for IR based PGO.
 class PGOInstrumentationUse : public PassInfoMixin<PGOInstrumentationUse> {
 public:
   PGOInstrumentationUse(std::string Filename = "",
-                        std::string RemappingFilename = "");
+                        std::string RemappingFilename = "", bool IsCS = false);
 
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 
 private:
   std::string ProfileFileName;
   std::string ProfileRemappingFileName;
+  // If this is a context sensitive instrumentation.
+  bool IsCS;
 };
 
 /// The indirect function call promotion pass.
diff --git a/include/llvm/Transforms/Instrumentation/PoisonChecking.h b/include/llvm/Transforms/Instrumentation/PoisonChecking.h
new file mode 100644
index 000000000000..606d3c255359
--- /dev/null
+++ b/include/llvm/Transforms/Instrumentation/PoisonChecking.h
@@ -0,0 +1,25 @@
+//===- PoisonChecking.h - ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_POISON_CHECKING_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_POISON_CHECKING_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct PoisonCheckingPass : public PassInfoMixin<PoisonCheckingPass> {
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+}
+
+
+#endif  // LLVM_TRANSFORMS_INSTRUMENTATION_POISON_CHECKING_H
diff --git a/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h b/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
index 701e2e6ec89e..b4e7d9924ff6 100644
--- a/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
+++ b/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
@@ -1,9 +1,8 @@
-//===- Transforms/Instrumentation/MemorySanitizer.h - TSan Pass -----------===//
+//===- Transforms/Instrumentation/ThreadSanitizer.h - TSan Pass -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/ObjCARC.h b/include/llvm/Transforms/ObjCARC.h
index 1897adc2ffbf..2f114c75e2e2 100644
--- a/include/llvm/Transforms/ObjCARC.h
+++ b/include/llvm/Transforms/ObjCARC.h
@@ -1,9 +1,8 @@
 //===-- ObjCARC.h - ObjCARC Scalar Transformations --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index 8fcf9296ba47..f9360b5ee2c8 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -1,9 +1,8 @@
 //===-- Scalar.h - Scalar Transformations -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -138,6 +137,8 @@ Pass *createIndVarSimplifyPass();
 // LICM - This pass is a loop invariant code motion and memory promotion pass.
 //
 Pass *createLICMPass();
+Pass *createLICMPass(unsigned LicmMssaOptCap,
+                     unsigned LicmMssaNoAccForPromotionCap);
 
 //===----------------------------------------------------------------------===//
 //
@@ -184,11 +185,13 @@ Pass *createLoopInstSimplifyPass();
 // LoopUnroll - This pass is a simple loop unrolling pass.
 //
 Pass *createLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false,
-                           int Threshold = -1, int Count = -1,
-                           int AllowPartial = -1, int Runtime = -1,
-                           int UpperBound = -1, int AllowPeeling = -1);
+                           bool ForgetAllSCEV = false, int Threshold = -1,
+                           int Count = -1, int AllowPartial = -1,
+                           int Runtime = -1, int UpperBound = -1,
+                           int AllowPeeling = -1);
 // Create an unrolling pass for full unrolling that uses exact trip count only.
-Pass *createSimpleLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false);
+Pass *createSimpleLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false,
+                                 bool ForgetAllSCEV = false);
 
 //===----------------------------------------------------------------------===//
 //
@@ -358,11 +361,17 @@ Pass *createLowerAtomicPass();
 //
 Pass *createLowerGuardIntrinsicPass();
 
+//===----------------------------------------------------------------------===//
+//
+// LowerWidenableCondition - Lower widenable condition to i1 true.
+//
+Pass *createLowerWidenableConditionPass();
+
 //===----------------------------------------------------------------------===//
 //
 // MergeICmps - Merge integer comparison chains into a memcmp
 //
-Pass *createMergeICmpsPass();
+Pass *createMergeICmpsLegacyPass();
 
 //===----------------------------------------------------------------------===//
 //
@@ -374,9 +383,10 @@ Pass *createCorrelatedValuePropagationPass();
 //
 // InferAddressSpaces - Modify users of addrspacecast instructions with values
 // in the source address space if using the destination address space is slower
-// on the target.
+// on the target. If AddressSpace is left to its default value, it will be
+// obtained from the TargetTransformInfo.
 //
-FunctionPass *createInferAddressSpacesPass();
+FunctionPass *createInferAddressSpacesPass(unsigned AddressSpace = ~0u);
 extern char &InferAddressSpacesID;
 
 //===----------------------------------------------------------------------===//
@@ -451,6 +461,12 @@ FunctionPass *createNaryReassociatePass();
 //
 FunctionPass *createLoopDistributePass();
 
+//===----------------------------------------------------------------------===//
+//
+// LoopFuse - Fuse loops.
+//
+FunctionPass *createLoopFusePass();
+
 //===----------------------------------------------------------------------===//
 //
 // LoopLoadElimination - Perform loop-aware load elimination.
diff --git a/include/llvm/Transforms/Scalar/ADCE.h b/include/llvm/Transforms/Scalar/ADCE.h
index f98af62c1a76..7d8b7ae68c00 100644
--- a/include/llvm/Transforms/Scalar/ADCE.h
+++ b/include/llvm/Transforms/Scalar/ADCE.h
@@ -1,9 +1,8 @@
 //===- ADCE.h - Aggressive dead code elimination ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h b/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
index 61975036e9ff..fb1687e1ac5d 100644
--- a/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
+++ b/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
@@ -1,9 +1,8 @@
 //===---- AlignmentFromAssumptions.h ----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/BDCE.h b/include/llvm/Transforms/Scalar/BDCE.h
index d7d2730a8033..996622bccdba 100644
--- a/include/llvm/Transforms/Scalar/BDCE.h
+++ b/include/llvm/Transforms/Scalar/BDCE.h
@@ -1,9 +1,8 @@
 //===---- BDCE.cpp - Bit-tracking dead code elimination ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/CallSiteSplitting.h b/include/llvm/Transforms/Scalar/CallSiteSplitting.h
index b2ca2a1c09ae..b6055639e8a8 100644
--- a/include/llvm/Transforms/Scalar/CallSiteSplitting.h
+++ b/include/llvm/Transforms/Scalar/CallSiteSplitting.h
@@ -1,9 +1,8 @@
 //===- CallSiteSplitting..h - Callsite Splitting ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Transforms/Scalar/ConstantHoisting.h b/include/llvm/Transforms/Scalar/ConstantHoisting.h
index ba32e122fa10..6b0fc9c1dd07 100644
--- a/include/llvm/Transforms/Scalar/ConstantHoisting.h
+++ b/include/llvm/Transforms/Scalar/ConstantHoisting.h
@@ -1,9 +1,8 @@
 //==- ConstantHoisting.h - Prepare code for expensive constants --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -56,6 +55,7 @@ class DominatorTree;
 class Function;
 class GlobalVariable;
 class Instruction;
+class ProfileSummaryInfo;
 class TargetTransformInfo;
 
 /// A private "module" namespace for types and utilities used by
@@ -125,9 +125,10 @@ public:
 
   // Glue for old PM.
   bool runImpl(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
-               BlockFrequencyInfo *BFI, BasicBlock &Entry);
+               BlockFrequencyInfo *BFI, BasicBlock &Entry,
+               ProfileSummaryInfo *PSI);
 
-  void releaseMemory() {
+  void cleanup() {
     ClonedCastMap.clear();
     ConstIntCandVec.clear();
     for (auto MapEntry : ConstGEPCandMap)
@@ -149,6 +150,7 @@ private:
   LLVMContext *Ctx;
   const DataLayout *DL;
   BasicBlock *Entry;
+  ProfileSummaryInfo *PSI;
 
   /// Keeps track of constant candidates found in the function.
   using ConstCandVecType = std::vector<consthoist::ConstantCandidate>;
diff --git a/include/llvm/Transforms/Scalar/CorrelatedValuePropagation.h b/include/llvm/Transforms/Scalar/CorrelatedValuePropagation.h
index 20930699b557..25795de5d951 100644
--- a/include/llvm/Transforms/Scalar/CorrelatedValuePropagation.h
+++ b/include/llvm/Transforms/Scalar/CorrelatedValuePropagation.h
@@ -1,9 +1,8 @@
 //===- CorrelatedValuePropagation.h -----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Transforms/Scalar/DCE.h b/include/llvm/Transforms/Scalar/DCE.h
index 273346cf81d9..974e4b20d152 100644
--- a/include/llvm/Transforms/Scalar/DCE.h
+++ b/include/llvm/Transforms/Scalar/DCE.h
@@ -1,9 +1,8 @@
 //===- DCE.h - Dead code elimination ----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/DeadStoreElimination.h b/include/llvm/Transforms/Scalar/DeadStoreElimination.h
index cfeb21814232..b66b0de90c79 100644
--- a/include/llvm/Transforms/Scalar/DeadStoreElimination.h
+++ b/include/llvm/Transforms/Scalar/DeadStoreElimination.h
@@ -1,9 +1,8 @@
 //===- DeadStoreElimination.h - Fast Dead Store Elimination -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/DivRemPairs.h b/include/llvm/Transforms/Scalar/DivRemPairs.h
index 0a4346f33b12..7401e02cb4ab 100644
--- a/include/llvm/Transforms/Scalar/DivRemPairs.h
+++ b/include/llvm/Transforms/Scalar/DivRemPairs.h
@@ -1,9 +1,8 @@
 //===- DivRemPairs.h - Hoist/decompose integer division and remainder -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/EarlyCSE.h b/include/llvm/Transforms/Scalar/EarlyCSE.h
index faf03a4ec489..1e7fd71dcbf4 100644
--- a/include/llvm/Transforms/Scalar/EarlyCSE.h
+++ b/include/llvm/Transforms/Scalar/EarlyCSE.h
@@ -1,9 +1,8 @@
 //===- EarlyCSE.h - Simple and fast CSE pass --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/Float2Int.h b/include/llvm/Transforms/Scalar/Float2Int.h
index 206ee980109b..06aeb8322527 100644
--- a/include/llvm/Transforms/Scalar/Float2Int.h
+++ b/include/llvm/Transforms/Scalar/Float2Int.h
@@ -1,9 +1,8 @@
 //===-- Float2Int.h - Demote floating point ops to work on integers -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/GVN.h b/include/llvm/Transforms/Scalar/GVN.h
index 9827678b89f2..9fe00a9e7f2d 100644
--- a/include/llvm/Transforms/Scalar/GVN.h
+++ b/include/llvm/Transforms/Scalar/GVN.h
@@ -1,9 +1,8 @@
 //===- GVN.h - Eliminate redundant values and loads -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Transforms/Scalar/GVNExpression.h b/include/llvm/Transforms/Scalar/GVNExpression.h
index 8b346969b1e9..3dc4515f85a1 100644
--- a/include/llvm/Transforms/Scalar/GVNExpression.h
+++ b/include/llvm/Transforms/Scalar/GVNExpression.h
@@ -1,9 +1,8 @@
 //===- GVNExpression.h - GVN Expression classes -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/GuardWidening.h b/include/llvm/Transforms/Scalar/GuardWidening.h
index 2bc0940ac715..06dc9ac97bec 100644
--- a/include/llvm/Transforms/Scalar/GuardWidening.h
+++ b/include/llvm/Transforms/Scalar/GuardWidening.h
@@ -1,9 +1,8 @@
 //===- GuardWidening.h - ----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,7 +16,9 @@
 #ifndef LLVM_TRANSFORMS_SCALAR_GUARD_WIDENING_H
 #define LLVM_TRANSFORMS_SCALAR_GUARD_WIDENING_H
 
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
 
 namespace llvm {
 
@@ -25,6 +26,8 @@ class Function;
 
 struct GuardWideningPass : public PassInfoMixin<GuardWideningPass> {
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+                        LoopStandardAnalysisResults &AR, LPMUpdater &U);
 };
 }
 
diff --git a/include/llvm/Transforms/Scalar/IVUsersPrinter.h b/include/llvm/Transforms/Scalar/IVUsersPrinter.h
index fad00d86a95f..a1f20d9ca983 100644
--- a/include/llvm/Transforms/Scalar/IVUsersPrinter.h
+++ b/include/llvm/Transforms/Scalar/IVUsersPrinter.h
@@ -1,9 +1,8 @@
 //===- IVUsersPrinter.h - Induction Variable Users Printing -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Transforms/Scalar/IndVarSimplify.h b/include/llvm/Transforms/Scalar/IndVarSimplify.h
index e321c8fc6e9c..3c20537ab76a 100644
--- a/include/llvm/Transforms/Scalar/IndVarSimplify.h
+++ b/include/llvm/Transforms/Scalar/IndVarSimplify.h
@@ -1,9 +1,8 @@
 //===- IndVarSimplify.h - Induction Variable Simplification -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/InductiveRangeCheckElimination.h b/include/llvm/Transforms/Scalar/InductiveRangeCheckElimination.h
index 311c549b8326..b1e700714e51 100644
--- a/include/llvm/Transforms/Scalar/InductiveRangeCheckElimination.h
+++ b/include/llvm/Transforms/Scalar/InductiveRangeCheckElimination.h
@@ -1,9 +1,8 @@
 //===- InductiveRangeCheckElimination.h - IRCE ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/InstSimplifyPass.h b/include/llvm/Transforms/Scalar/InstSimplifyPass.h
index da79a13eb7cf..0c30b6260536 100644
--- a/include/llvm/Transforms/Scalar/InstSimplifyPass.h
+++ b/include/llvm/Transforms/Scalar/InstSimplifyPass.h
@@ -1,9 +1,8 @@
 //===- InstSimplifyPass.h ---------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Transforms/Scalar/JumpThreading.h b/include/llvm/Transforms/Scalar/JumpThreading.h
index 9894345645a1..0464d40c45e6 100644
--- a/include/llvm/Transforms/Scalar/JumpThreading.h
+++ b/include/llvm/Transforms/Scalar/JumpThreading.h
@@ -1,9 +1,8 @@
 //===- JumpThreading.h - thread control through conditional BBs -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -23,7 +22,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/IR/DomTreeUpdater.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/IR/ValueHandle.h"
 #include <memory>
 #include <utility>
diff --git a/include/llvm/Transforms/Scalar/LICM.h b/include/llvm/Transforms/Scalar/LICM.h
index 68ad190c7647..f0ea928abd49 100644
--- a/include/llvm/Transforms/Scalar/LICM.h
+++ b/include/llvm/Transforms/Scalar/LICM.h
@@ -1,9 +1,8 @@
 //===- LICM.h - Loop Invariant Code Motion Pass -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -39,9 +38,21 @@
 
 namespace llvm {
 
+extern cl::opt<unsigned> SetLicmMssaOptCap;
+extern cl::opt<unsigned> SetLicmMssaNoAccForPromotionCap;
+
 /// Performs Loop Invariant Code Motion Pass.
 class LICMPass : public PassInfoMixin<LICMPass> {
+  unsigned LicmMssaOptCap;
+  unsigned LicmMssaNoAccForPromotionCap;
+
 public:
+  LICMPass()
+      : LicmMssaOptCap(SetLicmMssaOptCap),
+        LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {}
+  LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap)
+      : LicmMssaOptCap(LicmMssaOptCap),
+        LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
   PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
                         LoopStandardAnalysisResults &AR, LPMUpdater &U);
 };
diff --git a/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h b/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h
index e1b33799578b..3f250fc1ce8c 100644
--- a/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h
+++ b/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/LoopAccessAnalysisPrinter.h ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Transforms/Scalar/LoopDataPrefetch.h b/include/llvm/Transforms/Scalar/LoopDataPrefetch.h
index e1ad67ac6fff..9ebd5984cea9 100644
--- a/include/llvm/Transforms/Scalar/LoopDataPrefetch.h
+++ b/include/llvm/Transforms/Scalar/LoopDataPrefetch.h
@@ -1,10 +1,9 @@
 //===-------- LoopDataPrefetch.h - Loop Data Prefetching Pass ---*- C++ -*-===//
 //
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Transforms/Scalar/LoopDeletion.h b/include/llvm/Transforms/Scalar/LoopDeletion.h
index 7b8cb1e115c9..557616e2e6ba 100644
--- a/include/llvm/Transforms/Scalar/LoopDeletion.h
+++ b/include/llvm/Transforms/Scalar/LoopDeletion.h
@@ -1,9 +1,8 @@
 //===- LoopDeletion.h - Loop Deletion ---------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/LoopDistribute.h b/include/llvm/Transforms/Scalar/LoopDistribute.h
index 2bf1c9d696d5..1a82176490c5 100644
--- a/include/llvm/Transforms/Scalar/LoopDistribute.h
+++ b/include/llvm/Transforms/Scalar/LoopDistribute.h
@@ -1,9 +1,8 @@
 //===- LoopDistribute.cpp - Loop Distribution Pass --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/LoopFuse.h b/include/llvm/Transforms/Scalar/LoopFuse.h
new file mode 100644
index 000000000000..d3a02db6bd28
--- /dev/null
+++ b/include/llvm/Transforms/Scalar/LoopFuse.h
@@ -0,0 +1,30 @@
+//===- LoopFuse.h - Loop Fusion Pass ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the Loop Fusion pass.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOOPFUSE_H
+#define LLVM_TRANSFORMS_SCALAR_LOOPFUSE_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Function;
+
+class LoopFusePass : public PassInfoMixin<LoopFusePass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_LOOPFUSE_H
diff --git a/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h b/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
index 7added8d2c61..d2fff8bb5743 100644
--- a/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
+++ b/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
@@ -1,9 +1,8 @@
 //===- LoopIdiomRecognize.h - Loop Idiom Recognize Pass ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/LoopInstSimplify.h b/include/llvm/Transforms/Scalar/LoopInstSimplify.h
index 04dc79c3fa57..f6e86d11ed95 100644
--- a/include/llvm/Transforms/Scalar/LoopInstSimplify.h
+++ b/include/llvm/Transforms/Scalar/LoopInstSimplify.h
@@ -1,9 +1,8 @@
 //===- LoopInstSimplify.h - Loop Inst Simplify Pass -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/LoopLoadElimination.h b/include/llvm/Transforms/Scalar/LoopLoadElimination.h
index b0514a4a7c98..65b9aabb8f51 100644
--- a/include/llvm/Transforms/Scalar/LoopLoadElimination.h
+++ b/include/llvm/Transforms/Scalar/LoopLoadElimination.h
@@ -1,9 +1,8 @@
 //===- LoopLoadElimination.h ------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/LoopPassManager.h b/include/llvm/Transforms/Scalar/LoopPassManager.h
index 46ebb74c413c..61ec58585fd0 100644
--- a/include/llvm/Transforms/Scalar/LoopPassManager.h
+++ b/include/llvm/Transforms/Scalar/LoopPassManager.h
@@ -1,9 +1,8 @@
 //===- LoopPassManager.h - Loop pass management -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Transforms/Scalar/LoopPredication.h b/include/llvm/Transforms/Scalar/LoopPredication.h
index 57398bdb6bd1..252daafab7a3 100644
--- a/include/llvm/Transforms/Scalar/LoopPredication.h
+++ b/include/llvm/Transforms/Scalar/LoopPredication.h
@@ -1,9 +1,8 @@
 //===- LoopPredication.h - Guard based loop predication pass ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/LoopRotation.h b/include/llvm/Transforms/Scalar/LoopRotation.h
index ea8d5618e6f7..254e6072906a 100644
--- a/include/llvm/Transforms/Scalar/LoopRotation.h
+++ b/include/llvm/Transforms/Scalar/LoopRotation.h
@@ -1,9 +1,8 @@
 //===- LoopRotation.h - Loop Rotation -------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h b/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h
index 7628c7413eac..2d718592aef5 100644
--- a/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h
+++ b/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h
@@ -1,9 +1,8 @@
 //===- LoopSimplifyCFG.cpp - Loop CFG Simplification Pass -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/LoopSink.h b/include/llvm/Transforms/Scalar/LoopSink.h
index 371a7c8d2c44..234c48cbebc5 100644
--- a/include/llvm/Transforms/Scalar/LoopSink.h
+++ b/include/llvm/Transforms/Scalar/LoopSink.h
@@ -1,9 +1,8 @@
 //===- LoopSink.h - Loop Sink Pass ------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/LoopStrengthReduce.h b/include/llvm/Transforms/Scalar/LoopStrengthReduce.h
index 62c038a3857d..5cf805bc4939 100644
--- a/include/llvm/Transforms/Scalar/LoopStrengthReduce.h
+++ b/include/llvm/Transforms/Scalar/LoopStrengthReduce.h
@@ -1,9 +1,8 @@
 //===- LoopStrengthReduce.h - Loop Strength Reduce Pass ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h b/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h
index fc69aa361059..7920269b0fb2 100644
--- a/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h
+++ b/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h
@@ -1,9 +1,8 @@
 //===- LoopUnrollAndJamPass.h -----------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/include/llvm/Transforms/Scalar/LoopUnrollPass.h
index e38e983cc9eb..a84d889a83ad 100644
--- a/include/llvm/Transforms/Scalar/LoopUnrollPass.h
+++ b/include/llvm/Transforms/Scalar/LoopUnrollPass.h
@@ -1,9 +1,8 @@
 //===- LoopUnrollPass.h -----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -16,6 +15,8 @@
 
 namespace llvm {
 
+extern cl::opt<bool> ForgetSCEVInLoopUnroll;
+
 class Function;
 class Loop;
 class LPMUpdater;
@@ -29,9 +30,16 @@ class LoopFullUnrollPass : public PassInfoMixin<LoopFullUnrollPass> {
   /// metadata are considered. All other loops are skipped.
   const bool OnlyWhenForced;
 
+  /// If true, forget all loops when unrolling. If false, forget top-most loop
+  /// of the currently processed loops, which removes one entry at a time from
+  /// the internal SCEV records. For large loops, the former is faster.
+  const bool ForgetSCEV;
+
 public:
-  explicit LoopFullUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false)
-      : OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced) {}
+  explicit LoopFullUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false,
+                              bool ForgetSCEV = false)
+      : OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
+        ForgetSCEV(ForgetSCEV) {}
 
   PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
                         LoopStandardAnalysisResults &AR, LPMUpdater &U);
@@ -61,8 +69,15 @@ struct LoopUnrollOptions {
   /// metadata are considered. All other loops are skipped.
   bool OnlyWhenForced;
 
-  LoopUnrollOptions(int OptLevel = 2, bool OnlyWhenForced = false)
-      : OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced) {}
+  /// If true, forget all loops when unrolling. If false, forget top-most loop
+  /// of the currently processed loops, which removes one entry at a time from
+  /// the internal SCEV records. For large loops, the former is faster.
+  const bool ForgetSCEV;
+
+  LoopUnrollOptions(int OptLevel = 2, bool OnlyWhenForced = false,
+                    bool ForgetSCEV = false)
+      : OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
+        ForgetSCEV(ForgetSCEV) {}
 
   /// Enables or disables partial unrolling. When disabled only full unrolling
   /// is allowed.
diff --git a/include/llvm/Transforms/Scalar/LowerAtomic.h b/include/llvm/Transforms/Scalar/LowerAtomic.h
index a4a2e7aafe44..40f8ca571f19 100644
--- a/include/llvm/Transforms/Scalar/LowerAtomic.h
+++ b/include/llvm/Transforms/Scalar/LowerAtomic.h
@@ -1,9 +1,8 @@
 //===- LowerAtomic.cpp - Lower atomic intrinsics ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h b/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h
index b6ee6523697c..4e47ff70d557 100644
--- a/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h
+++ b/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h
@@ -1,9 +1,8 @@
 //===- LowerExpectIntrinsic.h - LowerExpectIntrinsic pass -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Transforms/Scalar/LowerGuardIntrinsic.h b/include/llvm/Transforms/Scalar/LowerGuardIntrinsic.h
index a9f19f6b84b4..ce97b9e4c386 100644
--- a/include/llvm/Transforms/Scalar/LowerGuardIntrinsic.h
+++ b/include/llvm/Transforms/Scalar/LowerGuardIntrinsic.h
@@ -1,9 +1,8 @@
 //===--- LowerGuardIntrinsic.h - Lower the guard intrinsic ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/LowerWidenableCondition.h b/include/llvm/Transforms/Scalar/LowerWidenableCondition.h
new file mode 100644
index 000000000000..7c1e64b8f3a9
--- /dev/null
+++ b/include/llvm/Transforms/Scalar/LowerWidenableCondition.h
@@ -0,0 +1,26 @@
+//===--- LowerWidenableCondition.h - Lower the guard intrinsic ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers the llvm.widenable.condition intrinsic to default value
+// which is i1 true.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_SCALAR_LOWERWIDENABLECONDITION_H
+#define LLVM_TRANSFORMS_SCALAR_LOWERWIDENABLECONDITION_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct LowerWidenableConditionPass : PassInfoMixin<LowerWidenableConditionPass> {
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+}
+
+#endif //LLVM_TRANSFORMS_SCALAR_LOWERWIDENABLECONDITION_H
diff --git a/include/llvm/Transforms/Scalar/MakeGuardsExplicit.h b/include/llvm/Transforms/Scalar/MakeGuardsExplicit.h
index 41b4aada2baa..525174734303 100644
--- a/include/llvm/Transforms/Scalar/MakeGuardsExplicit.h
+++ b/include/llvm/Transforms/Scalar/MakeGuardsExplicit.h
@@ -1,9 +1,8 @@
 //===-- MakeGuardsExplicit.h - Turn guard intrinsics into guard branches --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index 046c808bd051..5386f58b2b82 100644
--- a/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -1,9 +1,8 @@
 //===- MemCpyOptimizer.h - memcpy optimization ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/MergeICmps.h b/include/llvm/Transforms/Scalar/MergeICmps.h
new file mode 100644
index 000000000000..63bdbf8f4d09
--- /dev/null
+++ b/include/llvm/Transforms/Scalar/MergeICmps.h
@@ -0,0 +1,25 @@
+//===- MergeICmps.h -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_MERGEICMPS_H
+#define LLVM_TRANSFORMS_SCALAR_MERGEICMPS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Function;
+
+struct MergeICmpsPass
+    : PassInfoMixin<MergeICmpsPass> {
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_MERGEICMPS_H
diff --git a/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h b/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
index 48df09cdec9e..9071a56532f8 100644
--- a/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
+++ b/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
@@ -1,9 +1,8 @@
 //===- MergedLoadStoreMotion.h - merge and hoist/sink load/stores ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/NaryReassociate.h b/include/llvm/Transforms/Scalar/NaryReassociate.h
index e835bd5f0761..26f5fe185dd5 100644
--- a/include/llvm/Transforms/Scalar/NaryReassociate.h
+++ b/include/llvm/Transforms/Scalar/NaryReassociate.h
@@ -1,9 +1,8 @@
 //===- NaryReassociate.h - Reassociate n-ary expressions --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/NewGVN.h b/include/llvm/Transforms/Scalar/NewGVN.h
index 3f7541863a19..1f3680fec79c 100644
--- a/include/llvm/Transforms/Scalar/NewGVN.h
+++ b/include/llvm/Transforms/Scalar/NewGVN.h
@@ -1,9 +1,8 @@
 //===- NewGVN.h - Global Value Numbering Pass -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h b/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h
index 7f73831e0eb3..fd5a06c5051d 100644
--- a/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h
+++ b/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h
@@ -1,9 +1,8 @@
 //===--- PartiallyInlineLibCalls.h - Partially inline libcalls --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/Reassociate.h b/include/llvm/Transforms/Scalar/Reassociate.h
index ba7586dffd9d..2db8d8ce309c 100644
--- a/include/llvm/Transforms/Scalar/Reassociate.h
+++ b/include/llvm/Transforms/Scalar/Reassociate.h
@@ -1,9 +1,8 @@
 //===- Reassociate.h - Reassociate binary expressions -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -83,7 +82,14 @@ protected:
   static const unsigned GlobalReassociateLimit = 10;
   static const unsigned NumBinaryOps =
       Instruction::BinaryOpsEnd - Instruction::BinaryOpsBegin;
-  DenseMap<std::pair<Value *, Value *>, unsigned> PairMap[NumBinaryOps];
+
+  struct PairMapValue {
+    WeakVH Value1;
+    WeakVH Value2;
+    unsigned Score;
+    bool isValid() const { return Value1 && Value2; }
+  };
+  DenseMap<std::pair<Value *, Value *>, PairMapValue> PairMap[NumBinaryOps];
 
   bool MadeChange;
 
diff --git a/include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h b/include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h
index 128f176f4420..12773c16dcc2 100644
--- a/include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h
+++ b/include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h
@@ -1,9 +1,8 @@
 //===- RewriteStatepointsForGC.h - ------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/SCCP.h b/include/llvm/Transforms/Scalar/SCCP.h
index 0abbb32fde6a..0ffd983eb3e0 100644
--- a/include/llvm/Transforms/Scalar/SCCP.h
+++ b/include/llvm/Transforms/Scalar/SCCP.h
@@ -1,9 +1,8 @@
 //===- SCCP.cpp - Sparse Conditional Constant Propagation -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/SROA.h b/include/llvm/Transforms/Scalar/SROA.h
index b36c6f492be1..864a0cbd9db1 100644
--- a/include/llvm/Transforms/Scalar/SROA.h
+++ b/include/llvm/Transforms/Scalar/SROA.h
@@ -1,9 +1,8 @@
 //===- SROA.h - Scalar Replacement Of Aggregates ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -19,7 +18,6 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/PassManager.h"
-#include "llvm/Support/Compiler.h"
 #include <vector>
 
 namespace llvm {
diff --git a/include/llvm/Transforms/Scalar/Scalarizer.h b/include/llvm/Transforms/Scalar/Scalarizer.h
index 1a0b9a2b638c..81363130e2e3 100644
--- a/include/llvm/Transforms/Scalar/Scalarizer.h
+++ b/include/llvm/Transforms/Scalar/Scalarizer.h
@@ -1,9 +1,8 @@
 //===- Scalarizer.h --- Scalarize vector operations -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h b/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
index eed50ec96161..33c1faaeee0b 100644
--- a/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
+++ b/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
@@ -1,9 +1,8 @@
 //===- SimpleLoopUnswitch.h - Hoist loop-invariant control flow -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Transforms/Scalar/SimplifyCFG.h b/include/llvm/Transforms/Scalar/SimplifyCFG.h
index ce0a35fc06bd..f9792d38bbe6 100644
--- a/include/llvm/Transforms/Scalar/SimplifyCFG.h
+++ b/include/llvm/Transforms/Scalar/SimplifyCFG.h
@@ -1,9 +1,8 @@
 //===- SimplifyCFG.h - Simplify and canonicalize the CFG --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/include/llvm/Transforms/Scalar/Sink.h b/include/llvm/Transforms/Scalar/Sink.h
index f9b3cb0fae39..6cbe964d1580 100644
--- a/include/llvm/Transforms/Scalar/Sink.h
+++ b/include/llvm/Transforms/Scalar/Sink.h
@@ -1,9 +1,8 @@
 //===-- Sink.h - Code Sinking -----------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/SpeculateAroundPHIs.h b/include/llvm/Transforms/Scalar/SpeculateAroundPHIs.h
index 4a0bfd754723..3c7dafe71e8e 100644
--- a/include/llvm/Transforms/Scalar/SpeculateAroundPHIs.h
+++ b/include/llvm/Transforms/Scalar/SpeculateAroundPHIs.h
@@ -1,9 +1,8 @@
 //===- SpeculateAroundPHIs.h - Speculate around PHIs ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Transforms/Scalar/SpeculativeExecution.h b/include/llvm/Transforms/Scalar/SpeculativeExecution.h
index d00e950222a0..14da86483213 100644
--- a/include/llvm/Transforms/Scalar/SpeculativeExecution.h
+++ b/include/llvm/Transforms/Scalar/SpeculativeExecution.h
@@ -1,9 +1,8 @@
 //===- SpeculativeExecution.h -----------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/TailRecursionElimination.h b/include/llvm/Transforms/Scalar/TailRecursionElimination.h
index 793f9bc152ed..906867644504 100644
--- a/include/llvm/Transforms/Scalar/TailRecursionElimination.h
+++ b/include/llvm/Transforms/Scalar/TailRecursionElimination.h
@@ -1,9 +1,8 @@
 //===---- TailRecursionElimination.h ----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Scalar/WarnMissedTransforms.h b/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
index 018b22a932e6..2d5942a3f569 100644
--- a/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
+++ b/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
@@ -1,9 +1,8 @@
 //===- WarnMissedTransforms.h -----------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils.h b/include/llvm/Transforms/Utils.h
index 378552775c77..6e03453babf1 100644
--- a/include/llvm/Transforms/Utils.h
+++ b/include/llvm/Transforms/Utils.h
@@ -1,9 +1,8 @@
 //===- llvm/Transforms/Utils.h - Utility Transformations --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/ASanStackFrameLayout.h b/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
index eaad06a10819..0b570c0d1342 100644
--- a/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
+++ b/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
@@ -1,9 +1,8 @@
 //===- ASanStackFrameLayout.h - ComputeASanStackFrameLayout -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/AddDiscriminators.h b/include/llvm/Transforms/Utils/AddDiscriminators.h
index 4dad06e6c125..f512c6c06331 100644
--- a/include/llvm/Transforms/Utils/AddDiscriminators.h
+++ b/include/llvm/Transforms/Utils/AddDiscriminators.h
@@ -1,9 +1,8 @@
 //===- AddDiscriminators.h --------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 5b16a2c0d0b1..4d861ffe9a31 100644
--- a/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -1,9 +1,8 @@
 //===- Transform/Utils/BasicBlockUtils.h - BasicBlock Utils -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,9 +17,9 @@
 // FIXME: Move to this file: BasicBlock::removePredecessor, BB::splitBasicBlock
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/InstrTypes.h"
 #include <cassert>
 
@@ -36,19 +35,38 @@ class LoopInfo;
 class MDNode;
 class MemoryDependenceResults;
 class MemorySSAUpdater;
+class PostDominatorTree;
 class ReturnInst;
 class TargetLibraryInfo;
 class Value;
 
+/// Replace contents of every block in \p BBs with single unreachable
+/// instruction. If \p Updates is specified, collect all necessary DT updates
+/// into this vector. If \p KeepOneInputPHIs is true, one-input Phis in
+/// successors of blocks being deleted will be preserved.
+void DetatchDeadBlocks(ArrayRef <BasicBlock *> BBs,
+                       SmallVectorImpl<DominatorTree::UpdateType> *Updates,
+                       bool KeepOneInputPHIs = false);
+
 /// Delete the specified block, which must have no predecessors.
-void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU = nullptr);
+void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU = nullptr,
+                     bool KeepOneInputPHIs = false);
 
 /// Delete the specified blocks from \p BB. The set of deleted blocks must have
 /// no predecessors that are not being deleted themselves. \p BBs must have no
 /// duplicating blocks. If there are loops among this set of blocks, all
 /// relevant loop info updates should be done before this function is called.
-void DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs,
-                      DomTreeUpdater *DTU = nullptr);
+/// If \p KeepOneInputPHIs is true, one-input Phis in successors of blocks
+/// being deleted will be preserved.
+void DeleteDeadBlocks(ArrayRef <BasicBlock *> BBs,
+                      DomTreeUpdater *DTU = nullptr,
+                      bool KeepOneInputPHIs = false);
+
+/// Delete all basic blocks from \p F that are not reachable from its entry
+/// node. If \p KeepOneInputPHIs is true, one-input Phis in successors of
+/// blocks being deleted will be preserved.
+bool EliminateUnreachableBlocks(Function &F, DomTreeUpdater *DTU = nullptr,
+                                bool KeepOneInputPHIs = false);
 
 /// We know that BB has one predecessor. If there are any single-entry PHI nodes
 /// in it, fold them away. This handles the case when all entries to the PHI
@@ -92,24 +110,27 @@ void ReplaceInstWithInst(Instruction *From, Instruction *To);
 /// during critical edge splitting.
 struct CriticalEdgeSplittingOptions {
   DominatorTree *DT;
+  PostDominatorTree *PDT;
   LoopInfo *LI;
   MemorySSAUpdater *MSSAU;
   bool MergeIdenticalEdges = false;
-  bool DontDeleteUselessPHIs = false;
+  bool KeepOneInputPHIs = false;
   bool PreserveLCSSA = false;
+  bool IgnoreUnreachableDests = false;
 
   CriticalEdgeSplittingOptions(DominatorTree *DT = nullptr,
                                LoopInfo *LI = nullptr,
-                               MemorySSAUpdater *MSSAU = nullptr)
-      : DT(DT), LI(LI), MSSAU(MSSAU) {}
+                               MemorySSAUpdater *MSSAU = nullptr,
+                               PostDominatorTree *PDT = nullptr)
+      : DT(DT), PDT(PDT), LI(LI), MSSAU(MSSAU) {}
 
   CriticalEdgeSplittingOptions &setMergeIdenticalEdges() {
     MergeIdenticalEdges = true;
     return *this;
   }
 
-  CriticalEdgeSplittingOptions &setDontDeleteUselessPHIs() {
-    DontDeleteUselessPHIs = true;
+  CriticalEdgeSplittingOptions &setKeepOneInputPHIs() {
+    KeepOneInputPHIs = true;
     return *this;
   }
 
@@ -117,6 +138,11 @@ struct CriticalEdgeSplittingOptions {
     PreserveLCSSA = true;
     return *this;
   }
+
+  CriticalEdgeSplittingOptions &setIgnoreUnreachableDests() {
+    IgnoreUnreachableDests = true;
+    return *this;
+  }
 };
 
 /// If this edge is a critical edge, insert a new node to split the critical
@@ -259,7 +285,8 @@ ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
 ///   SplitBefore
 ///   Tail
 ///
-/// If Unreachable is true, then ThenBlock ends with
+/// If \p ThenBlock is not specified, a new block will be created for it.
+/// If \p Unreachable is true, the newly created block will end with
 /// UnreachableInst, otherwise it branches to Tail.
 /// Returns the NewBasicBlock's terminator.
 ///
@@ -268,7 +295,8 @@ Instruction *SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
                                        bool Unreachable,
                                        MDNode *BranchWeights = nullptr,
                                        DominatorTree *DT = nullptr,
-                                       LoopInfo *LI = nullptr);
+                                       LoopInfo *LI = nullptr,
+                                       BasicBlock *ThenBlock = nullptr);
 
 /// SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen,
 /// but also creates the ElseBlock.
diff --git a/include/llvm/Transforms/Utils/BreakCriticalEdges.h b/include/llvm/Transforms/Utils/BreakCriticalEdges.h
index 9cc81a176cb6..3644f1ed7a13 100644
--- a/include/llvm/Transforms/Utils/BreakCriticalEdges.h
+++ b/include/llvm/Transforms/Utils/BreakCriticalEdges.h
@@ -1,9 +1,8 @@
 //===- BreakCriticalEdges.h - Critical Edge Elimination Pass --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/BuildLibCalls.h b/include/llvm/Transforms/Utils/BuildLibCalls.h
index 28efce6ac3fb..8421c31a36da 100644
--- a/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -1,9 +1,8 @@
 //===- BuildLibCalls.h - Utility builder for libcalls -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -71,12 +70,22 @@ namespace llvm {
   /// Emit a call to the strcpy function to the builder, for the specified
   /// pointer arguments.
   Value *emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
-                    const TargetLibraryInfo *TLI, StringRef Name = "strcpy");
+                    const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the stpcpy function to the builder, for the specified
+  /// pointer arguments.
+  Value *emitStpCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+                    const TargetLibraryInfo *TLI);
 
   /// Emit a call to the strncpy function to the builder, for the specified
   /// pointer arguments and length.
   Value *emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
-                     const TargetLibraryInfo *TLI, StringRef Name = "strncpy");
+                     const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the stpncpy function to the builder, for the specified
+  /// pointer arguments and length.
+  Value *emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
+                     const TargetLibraryInfo *TLI);
 
   /// Emit a call to the __memcpy_chk function to the builder. This expects that
   /// the Len and ObjSize have type 'intptr_t' and Dst/Src are pointers.
@@ -93,6 +102,47 @@ namespace llvm {
   Value *emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
                     const DataLayout &DL, const TargetLibraryInfo *TLI);
 
+  /// Emit a call to the bcmp function.
+  Value *emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+                  const DataLayout &DL, const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the memccpy function.
+  Value *emitMemCCpy(Value *Ptr1, Value *Ptr2, Value *Val, Value *Len,
+                     IRBuilder<> &B, const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the snprintf function.
+  Value *emitSNPrintf(Value *Dest, Value *Size, Value *Fmt,
+                      ArrayRef<Value *> Args, IRBuilder<> &B,
+                      const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the sprintf function.
+  Value *emitSPrintf(Value *Dest, Value *Fmt, ArrayRef<Value *> VariadicArgs,
+                     IRBuilder<> &B, const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the strcat function.
+  Value *emitStrCat(Value *Dest, Value *Src, IRBuilder<> &B,
+                    const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the strlcpy function.
+  Value *emitStrLCpy(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+                     const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the strlcat function.
+  Value *emitStrLCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+                     const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the strncat function.
+  Value *emitStrNCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+                     const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the vsnprintf function.
+  Value *emitVSNPrintf(Value *Dest, Value *Size, Value *Fmt, Value *VAList,
+                       IRBuilder<> &B, const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the vsprintf function.
+  Value *emitVSPrintf(Value *Dest, Value *Fmt, Value *VAList, IRBuilder<> &B,
+                      const TargetLibraryInfo *TLI);
+
   /// Emit a call to the unary function named 'Name' (e.g.  'floor'). This
   /// function is known to take a single of type matching 'Op' and returns one
   /// value with the same type. If 'Op' is a long double, 'l' is added as the
diff --git a/include/llvm/Transforms/Utils/BypassSlowDivision.h b/include/llvm/Transforms/Utils/BypassSlowDivision.h
index 6eca5ed2154e..471055921fa8 100644
--- a/include/llvm/Transforms/Utils/BypassSlowDivision.h
+++ b/include/llvm/Transforms/Utils/BypassSlowDivision.h
@@ -1,9 +1,8 @@
 //===- llvm/Transforms/Utils/BypassSlowDivision.h ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/CallPromotionUtils.h b/include/llvm/Transforms/Utils/CallPromotionUtils.h
index 6e8ece723638..d9d171c6d8bd 100644
--- a/include/llvm/Transforms/Utils/CallPromotionUtils.h
+++ b/include/llvm/Transforms/Utils/CallPromotionUtils.h
@@ -1,9 +1,8 @@
 //===- CallPromotionUtils.h - Utilities for call promotion ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/CanonicalizeAliases.h b/include/llvm/Transforms/Utils/CanonicalizeAliases.h
index f23263783fec..8f23a041a24e 100644
--- a/include/llvm/Transforms/Utils/CanonicalizeAliases.h
+++ b/include/llvm/Transforms/Utils/CanonicalizeAliases.h
@@ -1,9 +1,8 @@
 //===-- CanonicalizeAliases.h - Alias Canonicalization Pass -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h
index f5e997324fc8..872ab9cab85c 100644
--- a/include/llvm/Transforms/Utils/Cloning.h
+++ b/include/llvm/Transforms/Utils/Cloning.h
@@ -1,9 +1,8 @@
 //===- Cloning.h - Clone various parts of LLVM programs ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -230,10 +229,7 @@ public:
 /// and all varargs at the callsite will be passed to any calls to
 /// ForwardVarArgsTo. The caller of InlineFunction has to make sure any varargs
 /// are only used by ForwardVarArgsTo.
-InlineResult InlineFunction(CallInst *C, InlineFunctionInfo &IFI,
-                            AAResults *CalleeAAR = nullptr,
-                            bool InsertLifetime = true);
-InlineResult InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
+InlineResult InlineFunction(CallBase *CB, InlineFunctionInfo &IFI,
                             AAResults *CalleeAAR = nullptr,
                             bool InsertLifetime = true);
 InlineResult InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
@@ -269,6 +265,13 @@ BasicBlock *DuplicateInstructionsInSplitBetween(BasicBlock *BB,
                                                 ValueToValueMapTy &ValueMapping,
                                                 DomTreeUpdater &DTU);
 
+/// Updates profile information by adjusting the entry count by adding
+/// entryDelta then scaling callsite information by the new count divided by the
+/// old count. VMap is used during inlinng to also update the new clone
+void updateProfileCallee(
+    Function *Callee, int64_t entryDelta,
+    const ValueMap<const Value *, WeakTrackingVH> *VMap = nullptr);
+
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_UTILS_CLONING_H
diff --git a/include/llvm/Transforms/Utils/CodeExtractor.h b/include/llvm/Transforms/Utils/CodeExtractor.h
index fee79fdc3bff..9d79ee1633f6 100644
--- a/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -1,9 +1,8 @@
 //===- Transform/Utils/CodeExtractor.h - Code extraction util ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -27,6 +26,7 @@ class BasicBlock;
 class BlockFrequency;
 class BlockFrequencyInfo;
 class BranchProbabilityInfo;
+class AssumptionCache;
 class CallInst;
 class DominatorTree;
 class Function;
@@ -57,6 +57,7 @@ class Value;
     const bool AggregateArgs;
     BlockFrequencyInfo *BFI;
     BranchProbabilityInfo *BPI;
+    AssumptionCache *AC;
 
     // If true, varargs functions can be extracted.
     bool AllowVarArgs;
@@ -85,6 +86,7 @@ class Value;
     CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT = nullptr,
                   bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr,
                   BranchProbabilityInfo *BPI = nullptr,
+                  AssumptionCache *AC = nullptr,
                   bool AllowVarArgs = false, bool AllowAlloca = false,
                   std::string Suffix = "");
 
@@ -95,6 +97,7 @@ class Value;
     CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs = false,
                   BlockFrequencyInfo *BFI = nullptr,
                   BranchProbabilityInfo *BPI = nullptr,
+                  AssumptionCache *AC = nullptr,
                   std::string Suffix = "");
 
     /// Perform the extraction, returning the new function.
@@ -148,6 +151,16 @@ class Value;
     BasicBlock *findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock);
 
   private:
+    struct LifetimeMarkerInfo {
+      bool SinkLifeStart = false;
+      bool HoistLifeEnd = false;
+      Instruction *LifeStart = nullptr;
+      Instruction *LifeEnd = nullptr;
+    };
+
+    LifetimeMarkerInfo getLifetimeMarkers(Instruction *Addr,
+                                          BasicBlock *ExitBlock) const;
+
     void severSplitPHINodesOfEntry(BasicBlock *&Header);
     void severSplitPHINodesOfExits(const SmallPtrSetImpl<BasicBlock *> &Exits);
     void splitReturnBlocks();
diff --git a/include/llvm/Transforms/Utils/CtorUtils.h b/include/llvm/Transforms/Utils/CtorUtils.h
index 63e564dcb87a..3625ee662b1c 100644
--- a/include/llvm/Transforms/Utils/CtorUtils.h
+++ b/include/llvm/Transforms/Utils/CtorUtils.h
@@ -1,9 +1,8 @@
 //===- CtorUtils.h - Helpers for working with global_ctors ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/EntryExitInstrumenter.h b/include/llvm/Transforms/Utils/EntryExitInstrumenter.h
index f50c5c922081..3913693af359 100644
--- a/include/llvm/Transforms/Utils/EntryExitInstrumenter.h
+++ b/include/llvm/Transforms/Utils/EntryExitInstrumenter.h
@@ -1,9 +1,8 @@
 //===- EntryExitInstrumenter.h - Function Entry/Exit Instrumentation ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/EscapeEnumerator.h b/include/llvm/Transforms/Utils/EscapeEnumerator.h
index 1256dfdaca17..e667796c841b 100644
--- a/include/llvm/Transforms/Utils/EscapeEnumerator.h
+++ b/include/llvm/Transforms/Utils/EscapeEnumerator.h
@@ -1,9 +1,8 @@
 //===-- EscapeEnumerator.h --------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/Evaluator.h b/include/llvm/Transforms/Utils/Evaluator.h
index 9908ae6fd393..bffd65f71b2e 100644
--- a/include/llvm/Transforms/Utils/Evaluator.h
+++ b/include/llvm/Transforms/Utils/Evaluator.h
@@ -1,9 +1,8 @@
 //===- Evaluator.h - LLVM IR evaluator --------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/FunctionComparator.h b/include/llvm/Transforms/Utils/FunctionComparator.h
index 35ba0950343c..4e2571b1d0b6 100644
--- a/include/llvm/Transforms/Utils/FunctionComparator.h
+++ b/include/llvm/Transforms/Utils/FunctionComparator.h
@@ -1,9 +1,8 @@
 //===- FunctionComparator.h - Function Comparator ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/FunctionImportUtils.h b/include/llvm/Transforms/Utils/FunctionImportUtils.h
index e24398b90012..9c2a9ea531ea 100644
--- a/include/llvm/Transforms/Utils/FunctionImportUtils.h
+++ b/include/llvm/Transforms/Utils/FunctionImportUtils.h
@@ -1,9 +1,8 @@
 //===- FunctionImportUtils.h - Importing support utilities -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -44,6 +43,11 @@ class FunctionImportGlobalProcessing {
   /// to promote any non-renamable values.
   SmallPtrSet<GlobalValue *, 8> Used;
 
+  /// Keep track of any COMDATs that require renaming (because COMDAT
+  /// leader was promoted and renamed). Maps from original COMDAT to one
+  /// with new name.
+  DenseMap<const Comdat *, Comdat *> RenamedComdats;
+
   /// Check if we should promote the given local value to global scope.
   bool shouldPromoteLocalToGlobal(const GlobalValue *SGV);
 
diff --git a/include/llvm/Transforms/Utils/GlobalStatus.h b/include/llvm/Transforms/Utils/GlobalStatus.h
index 8cc265bdf81d..519593c96766 100644
--- a/include/llvm/Transforms/Utils/GlobalStatus.h
+++ b/include/llvm/Transforms/Utils/GlobalStatus.h
@@ -1,9 +1,8 @@
 //===- GlobalStatus.h - Compute status info for globals ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Transforms/Utils/GuardUtils.h b/include/llvm/Transforms/Utils/GuardUtils.h
index 537045edafe4..3b365c56a5c0 100644
--- a/include/llvm/Transforms/Utils/GuardUtils.h
+++ b/include/llvm/Transforms/Utils/GuardUtils.h
@@ -1,9 +1,8 @@
 //===-- GuardUtils.h - Utils for work with guards ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Utils that are used to perform transformations related to guards and their
diff --git a/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h b/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h
index b55a9893bcf7..033ea05b77fa 100644
--- a/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h
+++ b/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h
@@ -1,9 +1,8 @@
-//===-- ImportedFunctionsInliningStats.h ------------------------*- C++ -*-===//
+//===-- ImportedFunctionsInliningStatistics.h -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Generating inliner statistics for imported functions, mostly useful for
diff --git a/include/llvm/Transforms/Utils/IntegerDivision.h b/include/llvm/Transforms/Utils/IntegerDivision.h
index 5d9927eb51b2..35cae9aa2269 100644
--- a/include/llvm/Transforms/Utils/IntegerDivision.h
+++ b/include/llvm/Transforms/Utils/IntegerDivision.h
@@ -1,9 +1,8 @@
 //===- llvm/Transforms/Utils/IntegerDivision.h ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/LCSSA.h b/include/llvm/Transforms/Utils/LCSSA.h
index fe717e5f6635..b01c8022a65b 100644
--- a/include/llvm/Transforms/Utils/LCSSA.h
+++ b/include/llvm/Transforms/Utils/LCSSA.h
@@ -1,9 +1,8 @@
 //===- LCSSA.h - Loop-closed SSA transform Pass -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/LibCallsShrinkWrap.h b/include/llvm/Transforms/Utils/LibCallsShrinkWrap.h
index c9df532e5794..ff1537ace329 100644
--- a/include/llvm/Transforms/Utils/LibCallsShrinkWrap.h
+++ b/include/llvm/Transforms/Utils/LibCallsShrinkWrap.h
@@ -1,9 +1,8 @@
 //===- LibCallsShrinkWrap.h - Shrink Wrap Library Calls -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index ec8b0eda3641..ff516f230979 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -1,9 +1,8 @@
 //===- Local.h - Functions to perform local transformations -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -21,12 +20,11 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/Utils/Local.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/Operator.h"
@@ -233,7 +231,8 @@ bool FlattenCFG(BasicBlock *BB, AliasAnalysis *AA = nullptr);
 /// If this basic block is ONLY a setcc and a branch, and if a predecessor
 /// branches to us and one of our successors, fold the setcc into the
 /// predecessor and use logical operations to pick the right destination.
-bool FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold = 1);
+bool FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU = nullptr,
+                            unsigned BonusInstThreshold = 1);
 
 /// This function takes a virtual register computed by an Instruction and
 /// replaces it with a slot in the stack frame, allocated via alloca.
@@ -317,7 +316,7 @@ void findDbgUsers(SmallVectorImpl<DbgVariableIntrinsic *> &DbgInsts, Value *V);
 /// (between the optional Deref operations). Offset can be negative.
 bool replaceDbgDeclare(Value *Address, Value *NewAddress,
                        Instruction *InsertBefore, DIBuilder &Builder,
-                       bool DerefBefore, int Offset, bool DerefAfter);
+                       uint8_t DIExprFlags, int Offset);
 
 /// Replaces llvm.dbg.declare instruction when the alloca it describes
 /// is replaced with a new value. If Deref is true, an additional
@@ -326,8 +325,8 @@ bool replaceDbgDeclare(Value *Address, Value *NewAddress,
 /// optional Deref operations). Offset can be negative. The new
 /// llvm.dbg.declare is inserted immediately after AI.
 bool replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
-                                DIBuilder &Builder, bool DerefBefore,
-                                int Offset, bool DerefAfter);
+                                DIBuilder &Builder, uint8_t DIExprFlags,
+                                int Offset);
 
 /// Replaces multiple llvm.dbg.value instructions when the alloca it describes
 /// is replaced with a new value. If Offset is non-zero, a constant displacement
@@ -337,11 +336,27 @@ bool replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
 void replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
                               DIBuilder &Builder, int Offset = 0);
 
+/// Finds alloca where the value comes from.
+AllocaInst *findAllocaForValue(Value *V,
+                               DenseMap<Value *, AllocaInst *> &AllocaForValue);
+
 /// Assuming the instruction \p I is going to be deleted, attempt to salvage
 /// debug users of \p I by writing the effect of \p I in a DIExpression.
 /// Returns true if any debug users were updated.
 bool salvageDebugInfo(Instruction &I);
 
+/// Implementation of salvageDebugInfo, applying only to instructions in
+/// \p Insns, rather than all debug users of \p I.
+bool salvageDebugInfoForDbgValues(Instruction &I,
+                                  ArrayRef<DbgVariableIntrinsic *> Insns);
+
+/// Given an instruction \p I and DIExpression \p DIExpr operating on it, write
+/// the effects of \p I into the returned DIExpression, or return nullptr if
+/// it cannot be salvaged. \p StackVal: whether DW_OP_stack_value should be
+/// appended to the expression.
+DIExpression *salvageDebugInfoImpl(Instruction &I, DIExpression *DIExpr,
+                                   bool StackVal);
+
 /// Point debug users of \p From to \p To or salvage them. Use this function
 /// only when replacing all uses of \p From with \p To, with a guarantee that
 /// \p From is going to be deleted.
@@ -367,7 +382,8 @@ unsigned removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB);
 /// instruction, making it and the rest of the code in the block dead.
 unsigned changeToUnreachable(Instruction *I, bool UseLLVMTrap,
                              bool PreserveLCSSA = false,
-                             DomTreeUpdater *DTU = nullptr);
+                             DomTreeUpdater *DTU = nullptr,
+                             MemorySSAUpdater *MSSAU = nullptr);
 
 /// Convert the CallInst to InvokeInst with the specified unwind edge basic
 /// block.  This also splits the basic block where CI is located, because
@@ -426,7 +442,7 @@ unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT,
 unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT,
                                   const BasicBlock *BB);
 
-/// Return true if the CallSite CS calls a gc leaf function.
+/// Return true if this call calls a gc leaf function.
 ///
 /// A leaf function is a function that does not safepoint the thread during its
 /// execution.  During a call or invoke to such a function, the callers stack
@@ -434,7 +450,7 @@ unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT,
 ///
 /// Most passes can and should ignore this information, and it is only used
 /// during lowering by the GC infrastructure.
-bool callsGCLeafFunction(ImmutableCallSite CS, const TargetLibraryInfo &TLI);
+bool callsGCLeafFunction(const CallBase *Call, const TargetLibraryInfo &TLI);
 
 /// Copy a nonnull metadata node to a new load instruction.
 ///
@@ -456,8 +472,7 @@ void dropDebugUsers(Instruction &I);
 /// \p DomBlock, by moving its instructions to the insertion point \p InsertPt.
 ///
 /// The moved instructions receive the insertion point debug location values
-/// (DILocations) and their debug intrinsic instructions (dbg.values) are
-/// removed.
+/// (DILocations) and their debug intrinsic instructions are removed.
 void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
                               BasicBlock *BB);
 
diff --git a/include/llvm/Transforms/Utils/LoopRotationUtils.h b/include/llvm/Transforms/Utils/LoopRotationUtils.h
index cd5bc4301018..1e80722ed8b8 100644
--- a/include/llvm/Transforms/Utils/LoopRotationUtils.h
+++ b/include/llvm/Transforms/Utils/LoopRotationUtils.h
@@ -1,9 +1,8 @@
 //===- LoopRotationUtils.h - Utilities to perform loop rotation -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/LoopSimplify.h b/include/llvm/Transforms/Utils/LoopSimplify.h
index 166da2738ffd..2c1df7942f63 100644
--- a/include/llvm/Transforms/Utils/LoopSimplify.h
+++ b/include/llvm/Transforms/Utils/LoopSimplify.h
@@ -1,9 +1,8 @@
 //===- LoopSimplify.h - Loop Canonicalization Pass --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -46,6 +45,8 @@
 
 namespace llvm {
 
+class MemorySSAUpdater;
+
 /// This pass is responsible for loop canonicalization.
 class LoopSimplifyPass : public PassInfoMixin<LoopSimplifyPass> {
 public:
@@ -56,9 +57,11 @@ public:
 ///
 /// This takes a potentially un-simplified loop L (and its children) and turns
 /// it into a simplified loop nest with preheaders and single backedges. It will
-/// update \c AliasAnalysis and \c ScalarEvolution analyses if they're non-null.
+/// update \c DominatorTree, \c LoopInfo, \c ScalarEvolution and \c MemorySSA
+/// analyses if they're non-null, and LCSSA if \c PreserveLCSSA is true.
 bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE,
-                  AssumptionCache *AC, bool PreserveLCSSA);
+                  AssumptionCache *AC, MemorySSAUpdater *MSSAU,
+                  bool PreserveLCSSA);
 
 } // end namespace llvm
 
diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h
index 8c2527b6ae68..68bdded5cf93 100644
--- a/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/include/llvm/Transforms/Utils/LoopUtils.h
@@ -1,9 +1,8 @@
 //===- llvm/Transforms/Utils/LoopUtils.h - Loop utilities -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -41,6 +40,7 @@ class BasicBlock;
 class DataLayout;
 class Loop;
 class LoopInfo;
+class MemoryAccess;
 class MemorySSAUpdater;
 class OptimizationRemarkEmitter;
 class PredicatedScalarEvolution;
@@ -51,7 +51,7 @@ class TargetLibraryInfo;
 class TargetTransformInfo;
 
 BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
-                                   bool PreserveLCSSA);
+                                   MemorySSAUpdater *MSSAU, bool PreserveLCSSA);
 
 /// Ensure that all exit blocks of the loop are dedicated exits.
 ///
@@ -59,7 +59,7 @@ BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
 /// predecessors to use a dedicated loop exit block. We update the dominator
 /// tree and loop info if provided, and will preserve LCSSA if requested.
 bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
-                             bool PreserveLCSSA);
+                             MemorySSAUpdater *MSSAU, bool PreserveLCSSA);
 
 /// Ensures LCSSA form for every instruction from the Worklist in the scope of
 /// innermost containing loop.
@@ -79,7 +79,8 @@ bool formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
 ///
 /// Looks at all instructions in the loop which have uses outside of the
 /// current loop. For each, an LCSSA PHI node is inserted and the uses outside
-/// the loop are rewritten to use this node.
+/// the loop are rewritten to use this node. Sub-loops must be in LCSSA form
+/// already.
 ///
 /// LoopInfo and DominatorTree are required and preserved.
 ///
@@ -100,6 +101,14 @@ bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE);
 bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
                           ScalarEvolution *SE);
 
+struct SinkAndHoistLICMFlags {
+  bool NoOfMemAccTooLarge;
+  unsigned LicmMssaOptCounter;
+  unsigned LicmMssaOptCap;
+  unsigned LicmMssaNoAccForPromotionCap;
+  bool IsSink;
+};
+
 /// Walk the specified region of the CFG (defined by all blocks
 /// dominated by the specified block, and that are in the current loop) in
 /// reverse depth first order w.r.t the DominatorTree. This allows us to visit
@@ -111,7 +120,7 @@ bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
 bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
                 TargetLibraryInfo *, TargetTransformInfo *, Loop *,
                 AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
-                OptimizationRemarkEmitter *ORE);
+                SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
 
 /// Walk the specified region of the CFG (defined by all blocks
 /// dominated by the specified block, and that are in the current loop) in depth
@@ -124,7 +133,7 @@ bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
 bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
                  TargetLibraryInfo *, Loop *, AliasSetTracker *,
                  MemorySSAUpdater *, ICFLoopSafetyInfo *,
-                 OptimizationRemarkEmitter *ORE);
+                 SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
 
 /// This function deletes dead loops. The caller of this function needs to
 /// guarantee that the loop is infact dead.
@@ -148,14 +157,12 @@ void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
 /// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions
 /// of the loop and loop safety information as arguments.
 /// Diagnostics is emitted via \p ORE. It returns changed status.
-bool promoteLoopAccessesToScalars(const SmallSetVector<Value *, 8> &,
-                                  SmallVectorImpl<BasicBlock *> &,
-                                  SmallVectorImpl<Instruction *> &,
-                                  PredIteratorCache &, LoopInfo *,
-                                  DominatorTree *, const TargetLibraryInfo *,
-                                  Loop *, AliasSetTracker *,
-                                  ICFLoopSafetyInfo *,
-                                  OptimizationRemarkEmitter *);
+bool promoteLoopAccessesToScalars(
+    const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
+    SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
+    PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *,
+    Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
+    OptimizationRemarkEmitter *);
 
 /// Does a BFS from a given node to all of its children inside a given loop.
 /// The returned vector of nodes includes the starting point.
@@ -277,6 +284,7 @@ void getLoopAnalysisUsage(AnalysisUsage &AU);
 bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
                         Loop *CurLoop, AliasSetTracker *CurAST,
                         MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop,
+                        SinkAndHoistLICMFlags *LICMFlags = nullptr,
                         OptimizationRemarkEmitter *ORE = nullptr);
 
 /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
@@ -292,6 +300,7 @@ getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src, unsigned Op,
                     ArrayRef<Value *> RedOps = None);
 
 /// Generates a vector reduction using shufflevectors to reduce the value.
+/// Fast-math-flags are propagated using the IRBuilder's setting.
 Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
                            RecurrenceDescriptor::MinMaxRecurrenceKind
                                MinMaxKind = RecurrenceDescriptor::MRK_Invalid,
@@ -302,6 +311,7 @@ Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
 /// additional information supplied in \p Flags.
 /// The target is queried to determine if intrinsics or shuffle sequences are
 /// required to implement the reduction.
+/// Fast-math-flags are propagated using the IRBuilder's setting.
 Value *createSimpleTargetReduction(IRBuilder<> &B,
                                    const TargetTransformInfo *TTI,
                                    unsigned Opcode, Value *Src,
@@ -312,6 +322,7 @@ Value *createSimpleTargetReduction(IRBuilder<> &B,
 /// Create a generic target reduction using a recurrence descriptor \p Desc
 /// The target is queried to determine if intrinsics or shuffle sequences are
 /// required to implement the reduction.
+/// Fast-math-flags are propagated using the RecurrenceDescriptor.
 Value *createTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI,
                              RecurrenceDescriptor &Desc, Value *Src,
                              bool NoNaN = false);
diff --git a/include/llvm/Transforms/Utils/LoopVersioning.h b/include/llvm/Transforms/Utils/LoopVersioning.h
index fcd734b37a1f..355c4d7dc6d8 100644
--- a/include/llvm/Transforms/Utils/LoopVersioning.h
+++ b/include/llvm/Transforms/Utils/LoopVersioning.h
@@ -1,9 +1,8 @@
 //===- LoopVersioning.h - Utility to version a loop -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/LowerInvoke.h b/include/llvm/Transforms/Utils/LowerInvoke.h
index 12774c7fd1f7..c1198b08d3de 100644
--- a/include/llvm/Transforms/Utils/LowerInvoke.h
+++ b/include/llvm/Transforms/Utils/LowerInvoke.h
@@ -1,9 +1,8 @@
 //===- LowerInvoke.h - Eliminate Invoke instructions ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/LowerMemIntrinsics.h b/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
index 2b7d0f67a324..8e9d7b522c78 100644
--- a/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
+++ b/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
@@ -1,9 +1,8 @@
-//===- llvm/Transforms/Utils/LowerMemintrinsics.h ---------------*- C++ -*-===//
+//===- llvm/Transforms/Utils/LowerMemIntrinsics.h ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/Mem2Reg.h b/include/llvm/Transforms/Utils/Mem2Reg.h
index 407684338a3b..76c1c2c5bffe 100644
--- a/include/llvm/Transforms/Utils/Mem2Reg.h
+++ b/include/llvm/Transforms/Utils/Mem2Reg.h
@@ -1,9 +1,8 @@
 //===- Mem2Reg.h - The -mem2reg pass, a wrapper around the Utils lib ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/ModuleUtils.h b/include/llvm/Transforms/Utils/ModuleUtils.h
index fee492be2a90..c69af5588741 100644
--- a/include/llvm/Transforms/Utils/ModuleUtils.h
+++ b/include/llvm/Transforms/Utils/ModuleUtils.h
@@ -1,9 +1,8 @@
 //===-- ModuleUtils.h - Functions to manipulate Modules ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -22,6 +21,7 @@ namespace llvm {
 template <typename T> class ArrayRef;
 class Module;
 class Function;
+class FunctionCallee;
 class GlobalValue;
 class GlobalVariable;
 class Constant;
@@ -40,20 +40,14 @@ void appendToGlobalCtors(Module &M, Function *F, int Priority,
 void appendToGlobalDtors(Module &M, Function *F, int Priority,
                          Constant *Data = nullptr);
 
-// Validate the result of Module::getOrInsertFunction called for an interface
-// function of given sanitizer. If the instrumented module defines a function
-// with the same name, their prototypes must match, otherwise
-// getOrInsertFunction returns a bitcast.
-Function *checkSanitizerInterfaceFunction(Constant *FuncOrBitcast);
-
-Function *declareSanitizerInitFunction(Module &M, StringRef InitName,
-                                       ArrayRef<Type *> InitArgTypes);
+FunctionCallee declareSanitizerInitFunction(Module &M, StringRef InitName,
+                                            ArrayRef<Type *> InitArgTypes);
 
 /// Creates sanitizer constructor function, and calls sanitizer's init
 /// function from it.
 /// \return Returns pair of pointers to constructor, and init functions
 /// respectively.
-std::pair<Function *, Function *> createSanitizerCtorAndInitFunctions(
+std::pair<Function *, FunctionCallee> createSanitizerCtorAndInitFunctions(
     Module &M, StringRef CtorName, StringRef InitName,
     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
     StringRef VersionCheckName = StringRef());
@@ -65,10 +59,10 @@ std::pair<Function *, Function *> createSanitizerCtorAndInitFunctions(
 ///
 /// \return Returns pair of pointers to constructor, and init functions
 /// respectively.
-std::pair<Function *, Function *> getOrCreateSanitizerCtorAndInitFunctions(
+std::pair<Function *, FunctionCallee> getOrCreateSanitizerCtorAndInitFunctions(
     Module &M, StringRef CtorName, StringRef InitName,
     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
-    function_ref<void(Function *, Function *)> FunctionsCreatedCallback,
+    function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
     StringRef VersionCheckName = StringRef());
 
 // Creates and returns a sanitizer init function without argument if it doesn't
diff --git a/include/llvm/Transforms/Utils/NameAnonGlobals.h b/include/llvm/Transforms/Utils/NameAnonGlobals.h
index 17fc902eebf8..659ebe33ffa6 100644
--- a/include/llvm/Transforms/Utils/NameAnonGlobals.h
+++ b/include/llvm/Transforms/Utils/NameAnonGlobals.h
@@ -1,9 +1,8 @@
 //===-- NameAnonGlobals.h - Anonymous Global Naming Pass --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/PredicateInfo.h b/include/llvm/Transforms/Utils/PredicateInfo.h
index 2fc38089f3f1..da4a5dcc28c0 100644
--- a/include/llvm/Transforms/Utils/PredicateInfo.h
+++ b/include/llvm/Transforms/Utils/PredicateInfo.h
@@ -1,9 +1,8 @@
 //===- PredicateInfo.h - Build PredicateInfo ----------------------*-C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/include/llvm/Transforms/Utils/PromoteMemToReg.h b/include/llvm/Transforms/Utils/PromoteMemToReg.h
index 5ddfbe2bf058..b2b4507bbc74 100644
--- a/include/llvm/Transforms/Utils/PromoteMemToReg.h
+++ b/include/llvm/Transforms/Utils/PromoteMemToReg.h
@@ -1,9 +1,8 @@
 //===- PromoteMemToReg.h - Promote Allocas to Scalars -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/SSAUpdater.h b/include/llvm/Transforms/Utils/SSAUpdater.h
index d02607acbbb5..22b2295cc9d7 100644
--- a/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -1,9 +1,8 @@
 //===- SSAUpdater.h - Unstructured SSA Update Tool --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -148,7 +147,7 @@ public:
   /// Insts is a list of loads and stores to promote, and Name is the basename
   /// for the PHIs to insert. After this is complete, the loads and stores are
   /// removed from the code.
-  void run(const SmallVectorImpl<Instruction *> &Insts) const;
+  void run(const SmallVectorImpl<Instruction *> &Insts);
 
   /// Return true if the specified instruction is in the Inst list.
   ///
@@ -159,7 +158,7 @@ public:
 
   /// This hook is invoked after all the stores are found and inserted as
   /// available values.
-  virtual void doExtraRewritesBeforeFinalDeletion() const {}
+  virtual void doExtraRewritesBeforeFinalDeletion() {}
 
   /// Clients can choose to implement this to get notified right before
   /// a load is RAUW'd another value.
diff --git a/include/llvm/Transforms/Utils/SSAUpdaterBulk.h b/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
index 53a608f01804..5d17d6f3d285 100644
--- a/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
+++ b/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
@@ -1,9 +1,8 @@
 //===- SSAUpdaterBulk.h - Unstructured SSA Update Tool ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
index cab0f3e71575..ee06893ca660 100644
--- a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
+++ b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
@@ -1,9 +1,8 @@
 //===- SSAUpdaterImpl.h - SSA Updater Implementation ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/SanitizerStats.h b/include/llvm/Transforms/Utils/SanitizerStats.h
index d36e34258a3f..14e8ae045cdd 100644
--- a/include/llvm/Transforms/Utils/SanitizerStats.h
+++ b/include/llvm/Transforms/Utils/SanitizerStats.h
@@ -1,9 +1,8 @@
 //===- SanitizerStats.h - Sanitizer statistics gathering  -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/SimplifyIndVar.h b/include/llvm/Transforms/Utils/SimplifyIndVar.h
index a1dfed29a22d..dec73ef057e8 100644
--- a/include/llvm/Transforms/Utils/SimplifyIndVar.h
+++ b/include/llvm/Transforms/Utils/SimplifyIndVar.h
@@ -1,9 +1,8 @@
 //===-- llvm/Transforms/Utils/SimplifyIndVar.h - Indvar Utils ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 025bcd44e310..2572094ddac8 100644
--- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -1,9 +1,8 @@
 //===- SimplifyLibCalls.h - Library call simplifier -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -29,6 +28,8 @@ class TargetLibraryInfo;
 class BasicBlock;
 class Function;
 class OptimizationRemarkEmitter;
+class BlockFrequencyInfo;
+class ProfileSummaryInfo;
 
 /// This class implements simplifications for calls to fortified library
 /// functions (__st*cpy_chk, __memcpy_chk, __memmove_chk, __memset_chk), to,
@@ -56,14 +57,41 @@ private:
   Value *optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B);
   Value *optimizeMemSetChk(CallInst *CI, IRBuilder<> &B);
 
-  // Str/Stp cpy are similar enough to be handled in the same functions.
+  /// Str/Stp cpy are similar enough to be handled in the same functions.
   Value *optimizeStrpCpyChk(CallInst *CI, IRBuilder<> &B, LibFunc Func);
   Value *optimizeStrpNCpyChk(CallInst *CI, IRBuilder<> &B, LibFunc Func);
+  Value *optimizeMemCCpyChk(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeSNPrintfChk(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeSPrintfChk(CallInst *CI,IRBuilder<> &B);
+  Value *optimizeStrCatChk(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeStrLCat(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeStrNCatChk(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeStrLCpyChk(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeVSNPrintfChk(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeVSPrintfChk(CallInst *CI, IRBuilder<> &B);
 
   /// Checks whether the call \p CI to a fortified libcall is foldable
   /// to the non-fortified version.
+  ///
+  /// \param CI the call to the fortified libcall.
+  ///
+  /// \param ObjSizeOp the index of the object size parameter of this chk
+  /// function. Not optional since this is mandatory.
+  ///
+  /// \param SizeOp optionally set to the parameter index of an explicit buffer
+  /// size argument. For instance, set to '2' for __strncpy_chk.
+  ///
+  /// \param StrOp optionally set to the parameter index of the source string
+  /// parameter to strcpy-like functions, where only the strlen of the source
+  /// will be writtin into the destination.
+  ///
+  /// \param FlagsOp optionally set to the parameter index of a 'flags'
+  /// parameter. These are used by an implementation to opt-into stricter
+  /// checking.
   bool isFortifiedCallFoldable(CallInst *CI, unsigned ObjSizeOp,
-                               unsigned SizeOp, bool isString);
+                               Optional<unsigned> SizeOp = None,
+                               Optional<unsigned> StrOp = None,
+                               Optional<unsigned> FlagsOp = None);
 };
 
 /// LibCallSimplifier - This class implements a collection of optimizations
@@ -75,6 +103,8 @@ private:
   const DataLayout &DL;
   const TargetLibraryInfo *TLI;
   OptimizationRemarkEmitter &ORE;
+  BlockFrequencyInfo *BFI;
+  ProfileSummaryInfo *PSI;
   bool UnsafeFPShrink;
   function_ref<void(Instruction *, Value *)> Replacer;
   function_ref<void(Instruction *)> Eraser;
@@ -102,6 +132,7 @@ public:
   LibCallSimplifier(
       const DataLayout &DL, const TargetLibraryInfo *TLI,
       OptimizationRemarkEmitter &ORE,
+      BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
       function_ref<void(Instruction *, Value *)> Replacer =
           &replaceAllUsesWithDefault,
       function_ref<void(Instruction *)> Eraser = &eraseFromParentDefault);
@@ -134,6 +165,8 @@ private:
   Value *optimizeStrStr(CallInst *CI, IRBuilder<> &B);
   Value *optimizeMemChr(CallInst *CI, IRBuilder<> &B);
   Value *optimizeMemCmp(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeBCmp(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeMemCmpBCmpCommon(CallInst *CI, IRBuilder<> &B);
   Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B);
   Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B);
   Value *optimizeMemSet(CallInst *CI, IRBuilder<> &B);
diff --git a/include/llvm/Transforms/Utils/SizeOpts.h b/include/llvm/Transforms/Utils/SizeOpts.h
new file mode 100644
index 000000000000..1a052c694e6d
--- /dev/null
+++ b/include/llvm/Transforms/Utils/SizeOpts.h
@@ -0,0 +1,34 @@
+//===- llvm/Transforms/Utils/SizeOpts.h - size optimization -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some shared code size optimization related code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_SIZEOPTS_H
+#define LLVM_TRANSFORMS_UTILS_SIZEOPTS_H
+
+namespace llvm {
+
+class BasicBlock;
+class BlockFrequencyInfo;
+class Function;
+class ProfileSummaryInfo;
+
+/// Returns true if function \p F is suggested to be size-optimized base on the
+/// profile.
+bool shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI,
+                           BlockFrequencyInfo *BFI);
+/// Returns true if basic block \p BB is suggested to be size-optimized base
+/// on the profile.
+bool shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI,
+                           BlockFrequencyInfo *BFI);
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_SIZEOPTS_H
diff --git a/include/llvm/Transforms/Utils/SplitModule.h b/include/llvm/Transforms/Utils/SplitModule.h
index d2c31f2701ac..7839c5d9a589 100644
--- a/include/llvm/Transforms/Utils/SplitModule.h
+++ b/include/llvm/Transforms/Utils/SplitModule.h
@@ -1,9 +1,8 @@
 //===- SplitModule.h - Split a module into partitions -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/SymbolRewriter.h b/include/llvm/Transforms/Utils/SymbolRewriter.h
index 5f6488e08b5a..ce9dcaf2b74f 100644
--- a/include/llvm/Transforms/Utils/SymbolRewriter.h
+++ b/include/llvm/Transforms/Utils/SymbolRewriter.h
@@ -1,9 +1,8 @@
 //===- SymbolRewriter.h - Symbol Rewriting Pass -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h b/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
index 222c601ad608..f68534ecd2eb 100644
--- a/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
+++ b/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
@@ -1,9 +1,8 @@
 //===-- UnifyFunctionExitNodes.h - Ensure fn's have one return --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Utils/UnrollLoop.h b/include/llvm/Transforms/Utils/UnrollLoop.h
index 70e936d75008..593ca26feb98 100644
--- a/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -1,9 +1,8 @@
 //===- llvm/Transforms/Utils/UnrollLoop.h - Unrolling utilities -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -25,11 +24,13 @@ namespace llvm {
 
 class AssumptionCache;
 class BasicBlock;
+class BlockFrequencyInfo;
 class DependenceInfo;
 class DominatorTree;
 class Loop;
 class LoopInfo;
 class MDNode;
+class ProfileSummaryInfo;
 class OptimizationRemarkEmitter;
 class ScalarEvolution;
 
@@ -63,22 +64,31 @@ enum class LoopUnrollResult {
   FullyUnrolled
 };
 
-LoopUnrollResult UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
-                            bool Force, bool AllowRuntime,
-                            bool AllowExpensiveTripCount, bool PreserveCondBr,
-                            bool PreserveOnlyFirst, unsigned TripMultiple,
-                            unsigned PeelCount, bool UnrollRemainder,
-                            LoopInfo *LI, ScalarEvolution *SE,
-                            DominatorTree *DT, AssumptionCache *AC,
-                            OptimizationRemarkEmitter *ORE, bool PreserveLCSSA,
-                            Loop **RemainderLoop = nullptr);
+struct UnrollLoopOptions {
+  unsigned Count;
+  unsigned TripCount;
+  bool Force;
+  bool AllowRuntime;
+  bool AllowExpensiveTripCount;
+  bool PreserveCondBr;
+  bool PreserveOnlyFirst;
+  unsigned TripMultiple;
+  unsigned PeelCount;
+  bool UnrollRemainder;
+  bool ForgetAllSCEV;
+};
+
+LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
+                            ScalarEvolution *SE, DominatorTree *DT,
+                            AssumptionCache *AC, OptimizationRemarkEmitter *ORE,
+                            bool PreserveLCSSA, Loop **RemainderLoop = nullptr);
 
 bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                 bool AllowExpensiveTripCount,
                                 bool UseEpilogRemainder, bool UnrollRemainder,
-                                LoopInfo *LI, ScalarEvolution *SE,
-                                DominatorTree *DT, AssumptionCache *AC,
-                                bool PreserveLCSSA,
+                                bool ForgetAllSCEV, LoopInfo *LI,
+                                ScalarEvolution *SE, DominatorTree *DT,
+                                AssumptionCache *AC, bool PreserveLCSSA,
                                 Loop **ResultLoop = nullptr);
 
 void computePeelCount(Loop *L, unsigned LoopSize,
@@ -109,9 +119,6 @@ bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
                         TargetTransformInfo::UnrollingPreferences &UP,
                         bool &UseUpperBound);
 
-BasicBlock *foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI,
-                                     ScalarEvolution *SE, DominatorTree *DT);
-
 void remapInstruction(Instruction *I, ValueToValueMapTy &VMap);
 
 void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
@@ -121,7 +128,8 @@ void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
 MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
 
 TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
-    Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
+    Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
+    BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
     Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
     Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
     Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling);
diff --git a/include/llvm/Transforms/Utils/VNCoercion.h b/include/llvm/Transforms/Utils/VNCoercion.h
index 1baa9b66e491..f67b9ed0afdd 100644
--- a/include/llvm/Transforms/Utils/VNCoercion.h
+++ b/include/llvm/Transforms/Utils/VNCoercion.h
@@ -1,9 +1,8 @@
 //===- VNCoercion.h - Value Numbering Coercion Utilities --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file / This file provides routines used by LLVM's value numbering passes to
diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h
index 4ecb23ea1951..1952a210291e 100644
--- a/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/include/llvm/Transforms/Utils/ValueMapper.h
@@ -1,9 +1,8 @@
 //===- ValueMapper.h - Remapping for constants and metadata -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h
index 70f9a2e0741b..88a0e49d0fae 100644
--- a/include/llvm/Transforms/Vectorize.h
+++ b/include/llvm/Transforms/Vectorize.h
@@ -1,9 +1,8 @@
 //===-- Vectorize.h - Vectorization Transformations -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -110,8 +109,9 @@ struct VectorizeConfig {
 //
 // LoopVectorize - Create a loop vectorization pass.
 //
-Pass *createLoopVectorizePass(bool InterleaveOnlyWhenForced = false,
-                              bool VectorizeOnlyWhenForced = false);
+Pass *createLoopVectorizePass();
+Pass *createLoopVectorizePass(bool InterleaveOnlyWhenForced,
+                              bool VectorizeOnlyWhenForced);
 
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h b/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h
index 6b37d7093c44..f72c76c6f0f2 100644
--- a/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h
+++ b/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h
@@ -1,9 +1,8 @@
 //===- LoadStoreVectorizer.cpp - GPU Load & Store Vectorizer --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 5c7bba048607..b144006e2628 100644
--- a/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -1,9 +1,8 @@
 //===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -99,11 +98,7 @@ public:
                      OptimizationRemarkEmitter &ORE);
 
   /// Mark the loop L as already vectorized by setting the width to 1.
-  void setAlreadyVectorized() {
-    IsVectorized.Value = 1;
-    Hint Hints[] = {IsVectorized};
-    writeHintsToMetadata(Hints);
-  }
+  void setAlreadyVectorized();
 
   bool allowVectorization(Function *F, Loop *L,
                           bool VectorizeOnlyWhenForced) const;
@@ -152,15 +147,6 @@ private:
   /// Checks string hint with one operand and set value if valid.
   void setHint(StringRef Name, Metadata *Arg);
 
-  /// Create a new hint from name / value pair.
-  MDNode *createHintMetadata(StringRef Name, unsigned V) const;
-
-  /// Matches metadata with hint name.
-  bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes);
-
-  /// Sets current hints into loop metadata, keeping other values intact.
-  void writeHintsToMetadata(ArrayRef<Hint> HintTypes);
-
   /// The loop these hints belong to.
   const Loop *TheLoop;
 
@@ -219,12 +205,13 @@ class LoopVectorizationLegality {
 public:
   LoopVectorizationLegality(
       Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
-      TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F,
-      std::function<const LoopAccessInfo &(Loop &)> *GetLAA, LoopInfo *LI,
-      OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R,
-      LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC)
-      : TheLoop(L), LI(LI), PSE(PSE), TLI(TLI), DT(DT), GetLAA(GetLAA),
-        ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
+      TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AliasAnalysis *AA,
+      Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA,
+      LoopInfo *LI, OptimizationRemarkEmitter *ORE,
+      LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
+      AssumptionCache *AC)
+      : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT),
+        GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
 
   /// ReductionList contains the reduction descriptors for all
   /// of the reductions that were found in the loop.
@@ -385,18 +372,6 @@ private:
   void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
                        SmallPtrSetImpl<Value *> &AllowedExit);
 
-  /// Create an analysis remark that explains why vectorization failed
-  ///
-  /// \p RemarkName is the identifier for the remark.  If \p I is passed it is
-  /// an instruction that prevents vectorization.  Otherwise the loop is used
-  /// for the location of the remark.  \return the remark object that can be
-  /// streamed to.
-  OptimizationRemarkAnalysis
-  createMissedAnalysis(StringRef RemarkName, Instruction *I = nullptr) const {
-    return createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
-                                  RemarkName, TheLoop, I);
-  }
-
   /// If an access has a symbolic strides, this maps the pointer value to
   /// the stride symbol.
   const ValueToValueMap *getSymbolicStrides() {
@@ -407,6 +382,14 @@ private:
     return LAI ? &LAI->getSymbolicStrides() : nullptr;
   }
 
+  /// Reports a vectorization illegality: print \p DebugMsg for debugging
+  /// purposes along with the corresponding optimization remark \p RemarkName.
+  /// If \p I is passed it is an instruction that prevents vectorization.
+  /// Otherwise the loop is used for the location of the remark.
+  void reportVectorizationFailure(const StringRef DebugMsg,
+      const StringRef OREMsg, const StringRef ORETag,
+      Instruction *I = nullptr) const;
+
   /// The loop that we evaluate.
   Loop *TheLoop;
 
@@ -420,6 +403,9 @@ private:
   /// unrolling.
   PredicatedScalarEvolution &PSE;
 
+  /// Target Transform Info.
+  TargetTransformInfo *TTI;
+
   /// Target Library Info.
   TargetLibraryInfo *TLI;
 
@@ -479,7 +465,7 @@ private:
   /// Used to emit an analysis of any legality issues.
   LoopVectorizeHints *Hints;
 
-  /// The demanded bits analsyis is used to compute the minimum type size in
+  /// The demanded bits analysis is used to compute the minimum type size in
   /// which a reduction can be computed.
   DemandedBits *DB;
 
diff --git a/include/llvm/Transforms/Vectorize/LoopVectorize.h b/include/llvm/Transforms/Vectorize/LoopVectorize.h
index d9c4f7b023c1..d1ec06afb02a 100644
--- a/include/llvm/Transforms/Vectorize/LoopVectorize.h
+++ b/include/llvm/Transforms/Vectorize/LoopVectorize.h
@@ -1,9 +1,8 @@
 //===- LoopVectorize.h ------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -72,19 +71,63 @@ class Loop;
 class LoopAccessInfo;
 class LoopInfo;
 class OptimizationRemarkEmitter;
+class ProfileSummaryInfo;
 class ScalarEvolution;
 class TargetLibraryInfo;
 class TargetTransformInfo;
 
+extern cl::opt<bool> EnableLoopInterleaving;
+extern cl::opt<bool> EnableLoopVectorization;
+
+struct LoopVectorizeOptions {
+  /// If false, consider all loops for interleaving.
+  /// If true, only loops that explicitly request interleaving are considered.
+  bool InterleaveOnlyWhenForced;
+
+  /// If false, consider all loops for vectorization.
+  /// If true, only loops that explicitly request vectorization are considered.
+  bool VectorizeOnlyWhenForced;
+
+  /// The current defaults when creating the pass with no arguments are:
+  /// EnableLoopInterleaving = true and EnableLoopVectorization = true. This
+  /// means that interleaving default is consistent with the cl::opt flag, while
+  /// vectorization is not.
+  /// FIXME: The default for EnableLoopVectorization in the cl::opt should be
+  /// set to true, and the corresponding change to account for this be made in
+  /// opt.cpp. The initializations below will become:
+  /// InterleaveOnlyWhenForced(!EnableLoopInterleaving)
+  /// VectorizeOnlyWhenForced(!EnableLoopVectorization).
+  LoopVectorizeOptions()
+      : InterleaveOnlyWhenForced(false), VectorizeOnlyWhenForced(false) {}
+  LoopVectorizeOptions(bool InterleaveOnlyWhenForced,
+                       bool VectorizeOnlyWhenForced)
+      : InterleaveOnlyWhenForced(InterleaveOnlyWhenForced),
+        VectorizeOnlyWhenForced(VectorizeOnlyWhenForced) {}
+
+  LoopVectorizeOptions &setInterleaveOnlyWhenForced(bool Value) {
+    InterleaveOnlyWhenForced = Value;
+    return *this;
+  }
+
+  LoopVectorizeOptions &setVectorizeOnlyWhenForced(bool Value) {
+    VectorizeOnlyWhenForced = Value;
+    return *this;
+  }
+};
+
 /// The LoopVectorize Pass.
 struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {
   /// If false, consider all loops for interleaving.
   /// If true, only loops that explicitly request interleaving are considered.
-  bool InterleaveOnlyWhenForced = false;
+  bool InterleaveOnlyWhenForced;
 
   /// If false, consider all loops for vectorization.
   /// If true, only loops that explicitly request vectorization are considered.
-  bool VectorizeOnlyWhenForced = false;
+  bool VectorizeOnlyWhenForced;
+
+  LoopVectorizePass(LoopVectorizeOptions Opts = {})
+      : InterleaveOnlyWhenForced(Opts.InterleaveOnlyWhenForced),
+        VectorizeOnlyWhenForced(Opts.VectorizeOnlyWhenForced) {}
 
   ScalarEvolution *SE;
   LoopInfo *LI;
@@ -97,6 +140,7 @@ struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {
   AssumptionCache *AC;
   std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
   OptimizationRemarkEmitter *ORE;
+  ProfileSummaryInfo *PSI;
 
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 
@@ -106,7 +150,7 @@ struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {
                BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
                DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
                std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
-               OptimizationRemarkEmitter &ORE);
+               OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_);
 
   bool processLoop(Loop *L);
 };
diff --git a/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/include/llvm/Transforms/Vectorize/SLPVectorizer.h
index 3152e8192fc5..ac6afb761d4d 100644
--- a/include/llvm/Transforms/Vectorize/SLPVectorizer.h
+++ b/include/llvm/Transforms/Vectorize/SLPVectorizer.h
@@ -1,9 +1,8 @@
 //===- SLPVectorizer.h ------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This pass implements the Bottom Up SLP vectorizer. It detects consecutive
@@ -56,6 +55,8 @@ class BoUpSLP;
 
 } // end namespace slpvectorizer
 
+extern cl::opt<bool> RunSLPVectorization;
+
 struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
   using StoreList = SmallVector<StoreInst *, 8>;
   using StoreListMap = MapVector<Value *, StoreList>;
diff --git a/include/llvm/WindowsManifest/WindowsManifestMerger.h b/include/llvm/WindowsManifest/WindowsManifestMerger.h
index 302d3705887b..935c930ad91d 100644
--- a/include/llvm/WindowsManifest/WindowsManifestMerger.h
+++ b/include/llvm/WindowsManifest/WindowsManifestMerger.h
@@ -1,9 +1,8 @@
 //===-- WindowsManifestMerger.h ---------------------------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===---------------------------------------------------------------------===//
 //
diff --git a/include/llvm/WindowsResource/ResourceProcessor.h b/include/llvm/WindowsResource/ResourceProcessor.h
index 4ca0a4b05bd0..4e99c05f4cd9 100644
--- a/include/llvm/WindowsResource/ResourceProcessor.h
+++ b/include/llvm/WindowsResource/ResourceProcessor.h
@@ -1,9 +1,8 @@
 //===-- ResourceProcessor.h -------------------------------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===---------------------------------------------------------------------===//
 
diff --git a/include/llvm/WindowsResource/ResourceScriptToken.h b/include/llvm/WindowsResource/ResourceScriptToken.h
index 494ae3222a4b..254121cd318a 100644
--- a/include/llvm/WindowsResource/ResourceScriptToken.h
+++ b/include/llvm/WindowsResource/ResourceScriptToken.h
@@ -1,9 +1,8 @@
 //===-- ResourceScriptToken.h -----------------------------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===---------------------------------------------------------------------===//
 //
diff --git a/include/llvm/WindowsResource/ResourceScriptTokenList.h b/include/llvm/WindowsResource/ResourceScriptTokenList.h
index 0beed117c3e7..6b44dccaa35f 100644
--- a/include/llvm/WindowsResource/ResourceScriptTokenList.h
+++ b/include/llvm/WindowsResource/ResourceScriptTokenList.h
@@ -1,9 +1,8 @@
 //===-- ResourceScriptTokenList.h -------------------------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===---------------------------------------------------------------------===//
 //
diff --git a/include/llvm/XRay/BlockIndexer.h b/include/llvm/XRay/BlockIndexer.h
index b42fa17f3fb7..dafd2b5a5230 100644
--- a/include/llvm/XRay/BlockIndexer.h
+++ b/include/llvm/XRay/BlockIndexer.h
@@ -1,9 +1,8 @@
 //===- BlockIndexer.h - FDR Block Indexing Visitor ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/XRay/BlockPrinter.h b/include/llvm/XRay/BlockPrinter.h
index bfb21e239517..949258085332 100644
--- a/include/llvm/XRay/BlockPrinter.h
+++ b/include/llvm/XRay/BlockPrinter.h
@@ -1,9 +1,8 @@
 //===- BlockPrinter.h - FDR Block Pretty Printer -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/XRay/BlockVerifier.h b/include/llvm/XRay/BlockVerifier.h
index 46371c13891a..c848fdf084bc 100644
--- a/include/llvm/XRay/BlockVerifier.h
+++ b/include/llvm/XRay/BlockVerifier.h
@@ -1,9 +1,8 @@
 //===- BlockVerifier.h - FDR Block Verifier -------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/XRay/FDRLogBuilder.h b/include/llvm/XRay/FDRLogBuilder.h
index b5e9ed5c406b..3f49dc6dcb9d 100644
--- a/include/llvm/XRay/FDRLogBuilder.h
+++ b/include/llvm/XRay/FDRLogBuilder.h
@@ -1,9 +1,8 @@
 //===- FDRLogBuilder.h - XRay FDR Log Building Utility --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_INCLUDE_LLVM_XRAY_FDRLOGBUILDER_H_
diff --git a/include/llvm/XRay/FDRRecordConsumer.h b/include/llvm/XRay/FDRRecordConsumer.h
index e856e1540558..4fbb1d41d0da 100644
--- a/include/llvm/XRay/FDRRecordConsumer.h
+++ b/include/llvm/XRay/FDRRecordConsumer.h
@@ -1,9 +1,8 @@
 //===- FDRRecordConsumer.h - XRay Flight Data Recorder Mode Records -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_INCLUDE_LLVM_XRAY_FDRRECORDCONSUMER_H_
diff --git a/include/llvm/XRay/FDRRecordProducer.h b/include/llvm/XRay/FDRRecordProducer.h
index efdba2a67b7b..b530a85bc7e1 100644
--- a/include/llvm/XRay/FDRRecordProducer.h
+++ b/include/llvm/XRay/FDRRecordProducer.h
@@ -1,9 +1,8 @@
 //===- FDRRecordProducer.h - XRay FDR Mode Record Producer ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_INCLUDE_LLVM_XRAY_FDRRECORDPRODUCER_H_
diff --git a/include/llvm/XRay/FDRRecords.h b/include/llvm/XRay/FDRRecords.h
index 8a84f4d0c1fb..a8ce74bd88fb 100644
--- a/include/llvm/XRay/FDRRecords.h
+++ b/include/llvm/XRay/FDRRecords.h
@@ -1,9 +1,8 @@
 //===- FDRRecords.h - XRay Flight Data Recorder Mode Records --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/XRay/FDRTraceExpander.h b/include/llvm/XRay/FDRTraceExpander.h
index 02a21bed5ce9..f3c36cf4cf66 100644
--- a/include/llvm/XRay/FDRTraceExpander.h
+++ b/include/llvm/XRay/FDRTraceExpander.h
@@ -1,9 +1,8 @@
 //===- FDRTraceExpander.h - XRay FDR Mode Log Expander --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/XRay/FDRTraceWriter.h b/include/llvm/XRay/FDRTraceWriter.h
index 7b3b5fa25eff..2498877e27c1 100644
--- a/include/llvm/XRay/FDRTraceWriter.h
+++ b/include/llvm/XRay/FDRTraceWriter.h
@@ -1,9 +1,8 @@
 //===- FDRTraceWriter.h - XRay FDR Trace Writer -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/XRay/FileHeaderReader.h b/include/llvm/XRay/FileHeaderReader.h
index 3b8809bdbb34..1c9681cfd9af 100644
--- a/include/llvm/XRay/FileHeaderReader.h
+++ b/include/llvm/XRay/FileHeaderReader.h
@@ -1,9 +1,8 @@
 //===- FileHeaderReader.h - XRay Trace File Header Reading Function -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/XRay/Graph.h b/include/llvm/XRay/Graph.h
index a4d34a8a4be3..004681512800 100644
--- a/include/llvm/XRay/Graph.h
+++ b/include/llvm/XRay/Graph.h
@@ -1,9 +1,8 @@
 //===-- Graph.h - XRay Graph Class ------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/XRay/InstrumentationMap.h b/include/llvm/XRay/InstrumentationMap.h
index 42bfca36a20b..5cbe5c44893b 100644
--- a/include/llvm/XRay/InstrumentationMap.h
+++ b/include/llvm/XRay/InstrumentationMap.h
@@ -1,9 +1,8 @@
 //===- InstrumentationMap.h - XRay Instrumentation Map ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/XRay/Profile.h b/include/llvm/XRay/Profile.h
index 9365630358e8..79d9b53387f3 100644
--- a/include/llvm/XRay/Profile.h
+++ b/include/llvm/XRay/Profile.h
@@ -1,9 +1,8 @@
 //===- Profile.h - XRay Profile Abstraction -------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/XRay/RecordPrinter.h b/include/llvm/XRay/RecordPrinter.h
index 649c64ab6f5c..7c7b7a32c56d 100644
--- a/include/llvm/XRay/RecordPrinter.h
+++ b/include/llvm/XRay/RecordPrinter.h
@@ -1,9 +1,8 @@
 //===- RecordPrinter.h - FDR Record Printer -------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/XRay/Trace.h b/include/llvm/XRay/Trace.h
index 924addd1560d..eb1f03b2a0d4 100644
--- a/include/llvm/XRay/Trace.h
+++ b/include/llvm/XRay/Trace.h
@@ -1,9 +1,8 @@
 //===- Trace.h - XRay Trace Abstraction -----------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/XRay/XRayRecord.h b/include/llvm/XRay/XRayRecord.h
index 7685ec95838a..546b02303b6a 100644
--- a/include/llvm/XRay/XRayRecord.h
+++ b/include/llvm/XRay/XRayRecord.h
@@ -1,9 +1,8 @@
 //===- XRayRecord.h - XRay Trace Record -----------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/XRay/YAMLXRayRecord.h b/include/llvm/XRay/YAMLXRayRecord.h
index 6150196ed98d..bc8b03548d6e 100644
--- a/include/llvm/XRay/YAMLXRayRecord.h
+++ b/include/llvm/XRay/YAMLXRayRecord.h
@@ -1,9 +1,8 @@
 //===- YAMLXRayRecord.h - XRay Record YAML Support Definitions ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap
index bcc12534ec85..9c4668e1473c 100644
--- a/include/llvm/module.modulemap
+++ b/include/llvm/module.modulemap
@@ -5,6 +5,7 @@ module LLVM_Analysis {
 
   // This is intended for (repeated) textual inclusion.
   textual header "Analysis/TargetLibraryInfo.def"
+  textual header "Analysis/VecFuncs.def"
 }
 
 module LLVM_AsmParser {
@@ -53,6 +54,7 @@ module LLVM_BinaryFormat {
     textual header "BinaryFormat/Dwarf.def"
     textual header "BinaryFormat/DynamicTags.def"
     textual header "BinaryFormat/MachO.def"
+    textual header "BinaryFormat/MinidumpConstants.def"
     textual header "BinaryFormat/ELFRelocs/AArch64.def"
     textual header "BinaryFormat/ELFRelocs/AMDGPU.def"
     textual header "BinaryFormat/ELFRelocs/ARM.def"
@@ -220,7 +222,7 @@ module LLVM_intrinsic_gen {
   module IR_ConstantRange { header "IR/ConstantRange.h" export * }
   module IR_Dominators { header "IR/Dominators.h" export * }
   module Analysis_PostDominators { header "Analysis/PostDominators.h" export * }
-  module IR_DomTreeUpdater { header "IR/DomTreeUpdater.h" export * }
+  module Analysis_DomTreeUpdater { header "Analysis/DomTreeUpdater.h" export * }
   module IR_IRBuilder { header "IR/IRBuilder.h" export * }
   module IR_PassManager { header "IR/PassManager.h" export * }
   module IR_PredIteratorCache { header "IR/PredIteratorCache.h" export * }
@@ -235,6 +237,7 @@ module LLVM_intrinsic_gen {
   }
   module IR_IntrinsicInst { header "IR/IntrinsicInst.h" export * }
   module IR_PatternMatch { header "IR/PatternMatch.h" export * }
+  module IR_SafepointIRVerifier { header "IR/SafepointIRVerifier.h" export * }
   module IR_Statepoint { header "IR/Statepoint.h" export * }
 
   export *
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 3446aef39938..32241e355eb8 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -1,9 +1,8 @@
 //==- AliasAnalysis.cpp - Generic Alias Analysis Interface Implementation --==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -80,12 +79,16 @@ AAResults::~AAResults() {
 
 bool AAResults::invalidate(Function &F, const PreservedAnalyses &PA,
                            FunctionAnalysisManager::Invalidator &Inv) {
-  // Check if the AA manager itself has been invalidated.
+  // AAResults preserves the AAManager by default, due to the stateless nature
+  // of AliasAnalysis. There is no need to check whether it has been preserved
+  // explicitly. Check if any module dependency was invalidated and caused the
+  // AAManager to be invalidated. Invalidate ourselves in that case.
   auto PAC = PA.getChecker<AAManager>();
-  if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
-    return true; // The manager needs to be blown away, clear everything.
+  if (!PAC.preservedWhenStateless())
+    return true;
 
-  // Check all of the dependencies registered.
+  // Check if any of the function dependencies were invalidated, and invalidate
+  // ourselves in that case.
   for (AnalysisKey *ID : AADeps)
     if (Inv.invalidate(ID, F, PA))
       return true;
@@ -100,8 +103,14 @@ bool AAResults::invalidate(Function &F, const PreservedAnalyses &PA,
 
 AliasResult AAResults::alias(const MemoryLocation &LocA,
                              const MemoryLocation &LocB) {
+  AAQueryInfo AAQIP;
+  return alias(LocA, LocB, AAQIP);
+}
+
+AliasResult AAResults::alias(const MemoryLocation &LocA,
+                             const MemoryLocation &LocB, AAQueryInfo &AAQI) {
   for (const auto &AA : AAs) {
-    auto Result = AA->alias(LocA, LocB);
+    auto Result = AA->alias(LocA, LocB, AAQI);
     if (Result != MayAlias)
       return Result;
   }
@@ -110,8 +119,14 @@ AliasResult AAResults::alias(const MemoryLocation &LocA,
 
 bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc,
                                        bool OrLocal) {
+  AAQueryInfo AAQIP;
+  return pointsToConstantMemory(Loc, AAQIP, OrLocal);
+}
+
+bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc,
+                                       AAQueryInfo &AAQI, bool OrLocal) {
   for (const auto &AA : AAs)
-    if (AA->pointsToConstantMemory(Loc, OrLocal))
+    if (AA->pointsToConstantMemory(Loc, AAQI, OrLocal))
       return true;
 
   return false;
@@ -132,10 +147,16 @@ ModRefInfo AAResults::getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
 }
 
 ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2) {
+  AAQueryInfo AAQIP;
+  return getModRefInfo(I, Call2, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2,
+                                    AAQueryInfo &AAQI) {
   // We may have two calls.
   if (const auto *Call1 = dyn_cast<CallBase>(I)) {
     // Check if the two calls modify the same memory.
-    return getModRefInfo(Call1, Call2);
+    return getModRefInfo(Call1, Call2, AAQI);
   } else if (I->isFenceLike()) {
     // If this is a fence, just return ModRef.
     return ModRefInfo::ModRef;
@@ -145,7 +166,7 @@ ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2) {
     // is that if the call references what this instruction
     // defines, it must be clobbered by this location.
     const MemoryLocation DefLoc = MemoryLocation::get(I);
-    ModRefInfo MR = getModRefInfo(Call2, DefLoc);
+    ModRefInfo MR = getModRefInfo(Call2, DefLoc, AAQI);
     if (isModOrRefSet(MR))
       return setModAndRef(MR);
   }
@@ -154,10 +175,17 @@ ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2) {
 
 ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
                                     const MemoryLocation &Loc) {
+  AAQueryInfo AAQIP;
+  return getModRefInfo(Call, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
+                                    const MemoryLocation &Loc,
+                                    AAQueryInfo &AAQI) {
   ModRefInfo Result = ModRefInfo::ModRef;
 
   for (const auto &AA : AAs) {
-    Result = intersectModRef(Result, AA->getModRefInfo(Call, Loc));
+    Result = intersectModRef(Result, AA->getModRefInfo(Call, Loc, AAQI));
 
     // Early-exit the moment we reach the bottom of the lattice.
     if (isNoModRef(Result))
@@ -215,10 +243,16 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
 
 ModRefInfo AAResults::getModRefInfo(const CallBase *Call1,
                                     const CallBase *Call2) {
+  AAQueryInfo AAQIP;
+  return getModRefInfo(Call1, Call2, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const CallBase *Call1,
+                                    const CallBase *Call2, AAQueryInfo &AAQI) {
   ModRefInfo Result = ModRefInfo::ModRef;
 
   for (const auto &AA : AAs) {
-    Result = intersectModRef(Result, AA->getModRefInfo(Call1, Call2));
+    Result = intersectModRef(Result, AA->getModRefInfo(Call1, Call2, AAQI));
 
     // Early-exit the moment we reach the bottom of the lattice.
     if (isNoModRef(Result))
@@ -397,6 +431,12 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, AliasResult AR) {
 
 ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
                                     const MemoryLocation &Loc) {
+  AAQueryInfo AAQIP;
+  return getModRefInfo(L, Loc, AAQIP);
+}
+ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
+                                    const MemoryLocation &Loc,
+                                    AAQueryInfo &AAQI) {
   // Be conservative in the face of atomic.
   if (isStrongerThan(L->getOrdering(), AtomicOrdering::Unordered))
     return ModRefInfo::ModRef;
@@ -404,7 +444,7 @@ ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
   // If the load address doesn't alias the given address, it doesn't read
   // or write the specified memory.
   if (Loc.Ptr) {
-    AliasResult AR = alias(MemoryLocation::get(L), Loc);
+    AliasResult AR = alias(MemoryLocation::get(L), Loc, AAQI);
     if (AR == NoAlias)
       return ModRefInfo::NoModRef;
     if (AR == MustAlias)
@@ -416,12 +456,18 @@ ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
 
 ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
                                     const MemoryLocation &Loc) {
+  AAQueryInfo AAQIP;
+  return getModRefInfo(S, Loc, AAQIP);
+}
+ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
+                                    const MemoryLocation &Loc,
+                                    AAQueryInfo &AAQI) {
   // Be conservative in the face of atomic.
   if (isStrongerThan(S->getOrdering(), AtomicOrdering::Unordered))
     return ModRefInfo::ModRef;
 
   if (Loc.Ptr) {
-    AliasResult AR = alias(MemoryLocation::get(S), Loc);
+    AliasResult AR = alias(MemoryLocation::get(S), Loc, AAQI);
     // If the store address cannot alias the pointer in question, then the
     // specified memory cannot be modified by the store.
     if (AR == NoAlias)
@@ -429,7 +475,7 @@ ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
 
     // If the pointer is a pointer to constant memory, then it could not have
     // been modified by this store.
-    if (pointsToConstantMemory(Loc))
+    if (pointsToConstantMemory(Loc, AAQI))
       return ModRefInfo::NoModRef;
 
     // If the store address aliases the pointer as must alias, set Must.
@@ -442,17 +488,31 @@ ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
 }
 
 ModRefInfo AAResults::getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) {
+  AAQueryInfo AAQIP;
+  return getModRefInfo(S, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const FenceInst *S,
+                                    const MemoryLocation &Loc,
+                                    AAQueryInfo &AAQI) {
   // If we know that the location is a constant memory location, the fence
   // cannot modify this location.
-  if (Loc.Ptr && pointsToConstantMemory(Loc))
+  if (Loc.Ptr && pointsToConstantMemory(Loc, AAQI))
     return ModRefInfo::Ref;
   return ModRefInfo::ModRef;
 }
 
 ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
                                     const MemoryLocation &Loc) {
+  AAQueryInfo AAQIP;
+  return getModRefInfo(V, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
+                                    const MemoryLocation &Loc,
+                                    AAQueryInfo &AAQI) {
   if (Loc.Ptr) {
-    AliasResult AR = alias(MemoryLocation::get(V), Loc);
+    AliasResult AR = alias(MemoryLocation::get(V), Loc, AAQI);
     // If the va_arg address cannot alias the pointer in question, then the
     // specified memory cannot be accessed by the va_arg.
     if (AR == NoAlias)
@@ -460,7 +520,7 @@ ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
 
     // If the pointer is a pointer to constant memory, then it could not have
     // been modified by this va_arg.
-    if (pointsToConstantMemory(Loc))
+    if (pointsToConstantMemory(Loc, AAQI))
       return ModRefInfo::NoModRef;
 
     // If the va_arg aliases the pointer as must alias, set Must.
@@ -474,10 +534,17 @@ ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
 
 ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad,
                                     const MemoryLocation &Loc) {
+  AAQueryInfo AAQIP;
+  return getModRefInfo(CatchPad, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad,
+                                    const MemoryLocation &Loc,
+                                    AAQueryInfo &AAQI) {
   if (Loc.Ptr) {
     // If the pointer is a pointer to constant memory,
     // then it could not have been modified by this catchpad.
-    if (pointsToConstantMemory(Loc))
+    if (pointsToConstantMemory(Loc, AAQI))
       return ModRefInfo::NoModRef;
   }
 
@@ -487,10 +554,17 @@ ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad,
 
 ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet,
                                     const MemoryLocation &Loc) {
+  AAQueryInfo AAQIP;
+  return getModRefInfo(CatchRet, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet,
+                                    const MemoryLocation &Loc,
+                                    AAQueryInfo &AAQI) {
   if (Loc.Ptr) {
     // If the pointer is a pointer to constant memory,
     // then it could not have been modified by this catchpad.
-    if (pointsToConstantMemory(Loc))
+    if (pointsToConstantMemory(Loc, AAQI))
       return ModRefInfo::NoModRef;
   }
 
@@ -500,12 +574,19 @@ ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet,
 
 ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX,
                                     const MemoryLocation &Loc) {
+  AAQueryInfo AAQIP;
+  return getModRefInfo(CX, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX,
+                                    const MemoryLocation &Loc,
+                                    AAQueryInfo &AAQI) {
   // Acquire/Release cmpxchg has properties that matter for arbitrary addresses.
   if (isStrongerThanMonotonic(CX->getSuccessOrdering()))
     return ModRefInfo::ModRef;
 
   if (Loc.Ptr) {
-    AliasResult AR = alias(MemoryLocation::get(CX), Loc);
+    AliasResult AR = alias(MemoryLocation::get(CX), Loc, AAQI);
     // If the cmpxchg address does not alias the location, it does not access
     // it.
     if (AR == NoAlias)
@@ -521,12 +602,19 @@ ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX,
 
 ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW,
                                     const MemoryLocation &Loc) {
+  AAQueryInfo AAQIP;
+  return getModRefInfo(RMW, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW,
+                                    const MemoryLocation &Loc,
+                                    AAQueryInfo &AAQI) {
   // Acquire/Release atomicrmw has properties that matter for arbitrary addresses.
   if (isStrongerThanMonotonic(RMW->getOrdering()))
     return ModRefInfo::ModRef;
 
   if (Loc.Ptr) {
-    AliasResult AR = alias(MemoryLocation::get(RMW), Loc);
+    AliasResult AR = alias(MemoryLocation::get(RMW), Loc, AAQI);
     // If the atomicrmw address does not alias the location, it does not access
     // it.
     if (AR == NoAlias)
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 85dd4fe95b33..e83703867e09 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -1,9 +1,8 @@
 //===- AliasAnalysisEvaluator.cpp - Alias Analysis Accuracy Evaluator -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Analysis/AliasAnalysisSummary.cpp b/lib/Analysis/AliasAnalysisSummary.cpp
index 2b4879453beb..2f3396a44117 100644
--- a/lib/Analysis/AliasAnalysisSummary.cpp
+++ b/lib/Analysis/AliasAnalysisSummary.cpp
@@ -73,28 +73,28 @@ AliasAttrs getExternallyVisibleAttrs(AliasAttrs Attr) {
 }
 
 Optional<InstantiatedValue> instantiateInterfaceValue(InterfaceValue IValue,
-                                                      CallSite CS) {
+                                                      CallBase &Call) {
   auto Index = IValue.Index;
-  auto Value = (Index == 0) ? CS.getInstruction() : CS.getArgument(Index - 1);
-  if (Value->getType()->isPointerTy())
-    return InstantiatedValue{Value, IValue.DerefLevel};
+  auto *V = (Index == 0) ? &Call : Call.getArgOperand(Index - 1);
+  if (V->getType()->isPointerTy())
+    return InstantiatedValue{V, IValue.DerefLevel};
   return None;
 }
 
 Optional<InstantiatedRelation>
-instantiateExternalRelation(ExternalRelation ERelation, CallSite CS) {
-  auto From = instantiateInterfaceValue(ERelation.From, CS);
+instantiateExternalRelation(ExternalRelation ERelation, CallBase &Call) {
+  auto From = instantiateInterfaceValue(ERelation.From, Call);
   if (!From)
     return None;
-  auto To = instantiateInterfaceValue(ERelation.To, CS);
+  auto To = instantiateInterfaceValue(ERelation.To, Call);
   if (!To)
     return None;
   return InstantiatedRelation{*From, *To, ERelation.Offset};
 }
 
 Optional<InstantiatedAttr> instantiateExternalAttribute(ExternalAttribute EAttr,
-                                                        CallSite CS) {
-  auto Value = instantiateInterfaceValue(EAttr.IValue, CS);
+                                                        CallBase &Call) {
+  auto Value = instantiateInterfaceValue(EAttr.IValue, Call);
   if (!Value)
     return None;
   return InstantiatedAttr{*Value, EAttr.Attr};
diff --git a/lib/Analysis/AliasAnalysisSummary.h b/lib/Analysis/AliasAnalysisSummary.h
index fb93a12420f8..fe75b03cedef 100644
--- a/lib/Analysis/AliasAnalysisSummary.h
+++ b/lib/Analysis/AliasAnalysisSummary.h
@@ -1,9 +1,8 @@
 //=====- CFLSummary.h - Abstract stratified sets implementation. --------=====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -38,7 +37,7 @@
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstrTypes.h"
 #include <bitset>
 
 namespace llvm {
@@ -196,12 +195,13 @@ struct AliasSummary {
   SmallVector<ExternalAttribute, 8> RetParamAttributes;
 };
 
-/// This is the result of instantiating InterfaceValue at a particular callsite
+/// This is the result of instantiating InterfaceValue at a particular call
 struct InstantiatedValue {
   Value *Val;
   unsigned DerefLevel;
 };
-Optional<InstantiatedValue> instantiateInterfaceValue(InterfaceValue, CallSite);
+Optional<InstantiatedValue> instantiateInterfaceValue(InterfaceValue IValue,
+                                                      CallBase &Call);
 
 inline bool operator==(InstantiatedValue LHS, InstantiatedValue RHS) {
   return LHS.Val == RHS.Val && LHS.DerefLevel == RHS.DerefLevel;
@@ -229,8 +229,8 @@ struct InstantiatedRelation {
   InstantiatedValue From, To;
   int64_t Offset;
 };
-Optional<InstantiatedRelation> instantiateExternalRelation(ExternalRelation,
-                                                           CallSite);
+Optional<InstantiatedRelation>
+instantiateExternalRelation(ExternalRelation ERelation, CallBase &Call);
 
 /// This is the result of instantiating ExternalAttribute at a particular
 /// callsite
@@ -238,8 +238,8 @@ struct InstantiatedAttr {
   InstantiatedValue IValue;
   AliasAttrs Attr;
 };
-Optional<InstantiatedAttr> instantiateExternalAttribute(ExternalAttribute,
-                                                        CallSite);
+Optional<InstantiatedAttr> instantiateExternalAttribute(ExternalAttribute EAttr,
+                                                        CallBase &Call);
 }
 
 template <> struct DenseMapInfo<cflaa::InstantiatedValue> {
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index f6ad704cc914..a6e5b9fab558 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -1,9 +1,8 @@
 //===- AliasSetTracker.cpp - Alias Sets Tracker implementation-------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,7 +13,9 @@
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/GuardUtils.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -127,24 +128,24 @@ void AliasSet::removeFromTracker(AliasSetTracker &AST) {
 
 void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
                           LocationSize Size, const AAMDNodes &AAInfo,
-                          bool KnownMustAlias) {
+                          bool KnownMustAlias, bool SkipSizeUpdate) {
   assert(!Entry.hasAliasSet() && "Entry already in set!");
 
   // Check to see if we have to downgrade to _may_ alias.
-  if (isMustAlias() && !KnownMustAlias)
+  if (isMustAlias())
     if (PointerRec *P = getSomePointer()) {
-      AliasAnalysis &AA = AST.getAliasAnalysis();
-      AliasResult Result =
-          AA.alias(MemoryLocation(P->getValue(), P->getSize(), P->getAAInfo()),
-                   MemoryLocation(Entry.getValue(), Size, AAInfo));
-      if (Result != MustAlias) {
-        Alias = SetMayAlias;
-        AST.TotalMayAliasSetSize += size();
-      } else {
-        // First entry of must alias must have maximum size!
+      if (!KnownMustAlias) {
+        AliasAnalysis &AA = AST.getAliasAnalysis();
+        AliasResult Result = AA.alias(
+            MemoryLocation(P->getValue(), P->getSize(), P->getAAInfo()),
+            MemoryLocation(Entry.getValue(), Size, AAInfo));
+        if (Result != MustAlias) {
+          Alias = SetMayAlias;
+          AST.TotalMayAliasSetSize += size();
+        }
+        assert(Result != NoAlias && "Cannot be part of must set!");
+      } else if (!SkipSizeUpdate)
         P->updateSizeAndAAInfo(Size, AAInfo);
-      }
-      assert(Result != NoAlias && "Cannot be part of must set!");
     }
 
   Entry.setAliasSet(this);
@@ -184,14 +185,15 @@ void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
   Access = ModRefAccess;
 }
 
-/// aliasesPointer - Return true if the specified pointer "may" (or must)
-/// alias one of the members in the set.
+/// aliasesPointer - If the specified pointer "may" (or must) alias one of the
+/// members in the set return the appropriate AliasResult. Otherwise return
+/// NoAlias.
 ///
-bool AliasSet::aliasesPointer(const Value *Ptr, LocationSize Size,
-                              const AAMDNodes &AAInfo,
-                              AliasAnalysis &AA) const {
+AliasResult AliasSet::aliasesPointer(const Value *Ptr, LocationSize Size,
+                                     const AAMDNodes &AAInfo,
+                                     AliasAnalysis &AA) const {
   if (AliasAny)
-    return true;
+    return MayAlias;
 
   if (Alias == SetMustAlias) {
     assert(UnknownInsts.empty() && "Illegal must alias set!");
@@ -208,9 +210,10 @@ bool AliasSet::aliasesPointer(const Value *Ptr, LocationSize Size,
   // If this is a may-alias set, we have to check all of the pointers in the set
   // to be sure it doesn't alias the set...
   for (iterator I = begin(), E = end(); I != E; ++I)
-    if (AA.alias(MemoryLocation(Ptr, Size, AAInfo),
-                 MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo())))
-      return true;
+    if (AliasResult AR = AA.alias(
+            MemoryLocation(Ptr, Size, AAInfo),
+            MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo())))
+      return AR;
 
   // Check the unknown instructions...
   if (!UnknownInsts.empty()) {
@@ -218,10 +221,10 @@ bool AliasSet::aliasesPointer(const Value *Ptr, LocationSize Size,
       if (auto *Inst = getUnknownInst(i))
         if (isModOrRefSet(
                 AA.getModRefInfo(Inst, MemoryLocation(Ptr, Size, AAInfo))))
-          return true;
+          return MayAlias;
   }
 
-  return false;
+  return NoAlias;
 }
 
 bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
@@ -288,25 +291,38 @@ void AliasSetTracker::clear() {
   AliasSets.clear();
 }
 
-
 /// mergeAliasSetsForPointer - Given a pointer, merge all alias sets that may
 /// alias the pointer. Return the unified set, or nullptr if no set that aliases
-/// the pointer was found.
+/// the pointer was found. MustAliasAll is updated to true/false if the pointer
+/// is found to MustAlias all the sets it merged.
 AliasSet *AliasSetTracker::mergeAliasSetsForPointer(const Value *Ptr,
                                                     LocationSize Size,
-                                                    const AAMDNodes &AAInfo) {
+                                                    const AAMDNodes &AAInfo,
+                                                    bool &MustAliasAll) {
   AliasSet *FoundSet = nullptr;
+  AliasResult AllAR = MustAlias;
   for (iterator I = begin(), E = end(); I != E;) {
     iterator Cur = I++;
-    if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, AAInfo, AA)) continue;
+    if (Cur->Forward)
+      continue;
+
+    AliasResult AR = Cur->aliasesPointer(Ptr, Size, AAInfo, AA);
+    if (AR == NoAlias)
+      continue;
+
+    AllAR =
+        AliasResult(AllAR & AR); // Possible downgrade to May/Partial, even No
 
-    if (!FoundSet) {      // If this is the first alias set ptr can go into.
-      FoundSet = &*Cur;   // Remember it.
-    } else {              // Otherwise, we must merge the sets.
-      FoundSet->mergeSetIn(*Cur, *this);     // Merge in contents.
+    if (!FoundSet) {
+      // If this is the first alias set ptr can go into, remember it.
+      FoundSet = &*Cur;
+    } else {
+      // Otherwise, we must merge the sets.
+      FoundSet->mergeSetIn(*Cur, *this);
     }
   }
 
+  MustAliasAll = (AllAR == MustAlias);
   return FoundSet;
 }
 
@@ -316,10 +332,13 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
     iterator Cur = I++;
     if (Cur->Forward || !Cur->aliasesUnknownInst(Inst, AA))
       continue;
-    if (!FoundSet)            // If this is the first alias set ptr can go into.
-      FoundSet = &*Cur;       // Remember it.
-    else   // Otherwise, we must merge the sets.
-      FoundSet->mergeSetIn(*Cur, *this);     // Merge in contents.
+    if (!FoundSet) {
+      // If this is the first alias set ptr can go into, remember it.
+      FoundSet = &*Cur;
+    } else {
+      // Otherwise, we must merge the sets.
+      FoundSet->mergeSetIn(*Cur, *this);
+    }
   }
   return FoundSet;
 }
@@ -329,7 +348,7 @@ AliasSet &AliasSetTracker::getAliasSetFor(const MemoryLocation &MemLoc) {
   Value * const Pointer = const_cast<Value*>(MemLoc.Ptr);
   const LocationSize Size = MemLoc.Size;
   const AAMDNodes &AAInfo = MemLoc.AATags;
-  
+
   AliasSet::PointerRec &Entry = getEntryFor(Pointer);
 
   if (AliasAnyAS) {
@@ -348,6 +367,7 @@ AliasSet &AliasSetTracker::getAliasSetFor(const MemoryLocation &MemLoc) {
     return *AliasAnyAS;
   }
 
+  bool MustAliasAll = false;
   // Check to see if the pointer is already known.
   if (Entry.hasAliasSet()) {
     // If the size changed, we may need to merge several alias sets.
@@ -356,20 +376,21 @@ AliasSet &AliasSetTracker::getAliasSetFor(const MemoryLocation &MemLoc) {
     // is NoAlias, mergeAliasSetsForPointer(undef, ...) will not find the
     // the right set for undef, even if it exists.
     if (Entry.updateSizeAndAAInfo(Size, AAInfo))
-      mergeAliasSetsForPointer(Pointer, Size, AAInfo);
+      mergeAliasSetsForPointer(Pointer, Size, AAInfo, MustAliasAll);
     // Return the set!
     return *Entry.getAliasSet(*this)->getForwardedTarget(*this);
   }
 
-  if (AliasSet *AS = mergeAliasSetsForPointer(Pointer, Size, AAInfo)) {
+  if (AliasSet *AS =
+          mergeAliasSetsForPointer(Pointer, Size, AAInfo, MustAliasAll)) {
     // Add it to the alias set it aliases.
-    AS->addPointer(*this, Entry, Size, AAInfo);
+    AS->addPointer(*this, Entry, Size, AAInfo, MustAliasAll);
     return *AS;
   }
 
   // Otherwise create a new alias set to hold the loaded pointer.
   AliasSets.push_back(new AliasSet());
-  AliasSets.back().addPointer(*this, Entry, Size, AAInfo);
+  AliasSets.back().addPointer(*this, Entry, Size, AAInfo, true);
   return AliasSets.back();
 }
 
@@ -422,14 +443,12 @@ void AliasSetTracker::addUnknown(Instruction *Inst) {
   if (!Inst->mayReadOrWriteMemory())
     return; // doesn't alias anything
 
-  AliasSet *AS = findAliasSetForUnknownInst(Inst);
-  if (AS) {
+  if (AliasSet *AS = findAliasSetForUnknownInst(Inst)) {
     AS->addUnknownInst(Inst, AA);
     return;
   }
   AliasSets.push_back(new AliasSet());
-  AS = &AliasSets.back();
-  AS->addUnknownInst(Inst, AA);
+  AliasSets.back().addUnknownInst(Inst, AA);
 }
 
 void AliasSetTracker::add(Instruction *I) {
@@ -516,6 +535,15 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
   }
 }
 
+void AliasSetTracker::addAllInstructionsInLoopUsingMSSA() {
+  assert(MSSA && L && "MSSA and L must be available");
+  for (const BasicBlock *BB : L->blocks())
+    if (auto *Accesses = MSSA->getBlockAccesses(BB))
+      for (auto &Access : *Accesses)
+        if (auto *MUD = dyn_cast<MemoryUseOrDef>(&Access))
+          add(MUD->getMemoryInst());
+}
+
 // deleteValue method - This method is used to remove a pointer value from the
 // AliasSetTracker entirely.  It should be used when an instruction is deleted
 // from the program to update the AST.  If you don't use this, you would have
@@ -563,9 +591,8 @@ void AliasSetTracker::copyValue(Value *From, Value *To) {
   I = PointerMap.find_as(From);
   // Add it to the alias set it aliases...
   AliasSet *AS = I->second->getAliasSet(*this);
-  AS->addPointer(*this, Entry, I->second->getSize(),
-                 I->second->getAAInfo(),
-                 true);
+  AS->addPointer(*this, Entry, I->second->getSize(), I->second->getAAInfo(),
+                 true, true);
 }
 
 AliasSet &AliasSetTracker::mergeAllAliasSets() {
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index bb8742123a0f..d46a8d8e306c 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -1,9 +1,8 @@
 //===-- Analysis.cpp ------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp
index 8bfd24ccf77b..cf2f845dee0a 100644
--- a/lib/Analysis/AssumptionCache.cpp
+++ b/lib/Analysis/AssumptionCache.cpp
@@ -1,9 +1,8 @@
 //===- AssumptionCache.cpp - Cache finding @llvm.assume calls -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -54,11 +53,11 @@ AssumptionCache::getOrInsertAffectedValues(Value *V) {
   return AVIP.first->second;
 }
 
-void AssumptionCache::updateAffectedValues(CallInst *CI) {
+static void findAffectedValues(CallInst *CI,
+                               SmallVectorImpl<Value *> &Affected) {
   // Note: This code must be kept in-sync with the code in
   // computeKnownBitsFromAssume in ValueTracking.
 
-  SmallVector<Value *, 16> Affected;
   auto AddAffected = [&Affected](Value *V) {
     if (isa<Argument>(V)) {
       Affected.push_back(V);
@@ -109,6 +108,11 @@ void AssumptionCache::updateAffectedValues(CallInst *CI) {
       AddAffectedFromEq(B);
     }
   }
+}
+
+void AssumptionCache::updateAffectedValues(CallInst *CI) {
+  SmallVector<Value *, 16> Affected;
+  findAffectedValues(CI, Affected);
 
   for (auto &AV : Affected) {
     auto &AVV = getOrInsertAffectedValues(AV);
@@ -117,6 +121,18 @@ void AssumptionCache::updateAffectedValues(CallInst *CI) {
   }
 }
 
+void AssumptionCache::unregisterAssumption(CallInst *CI) {
+  SmallVector<Value *, 16> Affected;
+  findAffectedValues(CI, Affected);
+
+  for (auto &AV : Affected) {
+    auto AVI = AffectedValues.find_as(AV);
+    if (AVI != AffectedValues.end())
+      AffectedValues.erase(AVI);
+  }
+  remove_if(AssumeHandles, [CI](WeakTrackingVH &VH) { return CI == VH; });
+}
+
 void AssumptionCache::AffectedValueCallbackVH::deleted() {
   auto AVI = AC->AffectedValues.find(getValPtr());
   if (AVI != AC->AffectedValues.end())
@@ -241,6 +257,13 @@ AssumptionCache &AssumptionCacheTracker::getAssumptionCache(Function &F) {
   return *IP.first->second;
 }
 
+AssumptionCache *AssumptionCacheTracker::lookupAssumptionCache(Function &F) {
+  auto I = AssumptionCaches.find_as(&F);
+  if (I != AssumptionCaches.end())
+    return I->second.get();
+  return nullptr;
+}
+
 void AssumptionCacheTracker::verifyAnalysis() const {
   // FIXME: In the long term the verifier should not be controllable with a
   // flag. We should either fix all passes to correctly update the assumption
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 332eeaa00e73..3721c99883b8 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1,9 +1,8 @@
 //===- BasicAliasAnalysis.cpp - Stateless Alias Analysis Impl -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -117,25 +116,44 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA,
 
 /// Returns true if the pointer is to a function-local object that never
 /// escapes from the function.
-static bool isNonEscapingLocalObject(const Value *V) {
+static bool isNonEscapingLocalObject(
+    const Value *V,
+    SmallDenseMap<const Value *, bool, 8> *IsCapturedCache = nullptr) {
+  SmallDenseMap<const Value *, bool, 8>::iterator CacheIt;
+  if (IsCapturedCache) {
+    bool Inserted;
+    std::tie(CacheIt, Inserted) = IsCapturedCache->insert({V, false});
+    if (!Inserted)
+      // Found cached result, return it!
+      return CacheIt->second;
+  }
+
   // If this is a local allocation, check to see if it escapes.
-  if (isa<AllocaInst>(V) || isNoAliasCall(V))
+  if (isa<AllocaInst>(V) || isNoAliasCall(V)) {
     // Set StoreCaptures to True so that we can assume in our callers that the
     // pointer is not the result of a load instruction. Currently
     // PointerMayBeCaptured doesn't have any special analysis for the
     // StoreCaptures=false case; if it did, our callers could be refined to be
     // more precise.
-    return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+    auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+    if (IsCapturedCache)
+      CacheIt->second = Ret;
+    return Ret;
+  }
 
   // If this is an argument that corresponds to a byval or noalias argument,
   // then it has not escaped before entering the function.  Check if it escapes
   // inside the function.
   if (const Argument *A = dyn_cast<Argument>(V))
-    if (A->hasByValAttr() || A->hasNoAliasAttr())
+    if (A->hasByValAttr() || A->hasNoAliasAttr()) {
       // Note even if the argument is marked nocapture, we still need to check
       // for copies made inside the function. The nocapture attribute only
       // specifies that there are no copies made that outlive the function.
-      return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+      auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+      if (IsCapturedCache)
+        CacheIt->second = Ret;
+      return Ret;
+    }
 
   return false;
 }
@@ -613,7 +631,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
 /// the function, with global constants being considered local to all
 /// functions.
 bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
-                                           bool OrLocal) {
+                                           AAQueryInfo &AAQI, bool OrLocal) {
   assert(Visited.empty() && "Visited must be cleared after use!");
 
   unsigned MaxLookup = 8;
@@ -623,7 +641,7 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
     const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL);
     if (!Visited.insert(V).second) {
       Visited.clear();
-      return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+      return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
     }
 
     // An alloca instruction defines local memory.
@@ -637,7 +655,7 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
       // others.  GV may even be a declaration, not a definition.
       if (!GV->isConstant()) {
         Visited.clear();
-        return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+        return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
       }
       continue;
     }
@@ -655,7 +673,7 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
       // Don't bother inspecting phi nodes with many operands.
       if (PN->getNumIncomingValues() > MaxLookup) {
         Visited.clear();
-        return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+        return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
       }
       for (Value *IncValue : PN->incoming_values())
         Worklist.push_back(IncValue);
@@ -664,7 +682,7 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
 
     // Otherwise be conservative.
     Visited.clear();
-    return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+    return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
   } while (!Worklist.empty() && --MaxLookup);
 
   Visited.clear();
@@ -799,24 +817,25 @@ static bool notDifferentParent(const Value *O1, const Value *O2) {
 #endif
 
 AliasResult BasicAAResult::alias(const MemoryLocation &LocA,
-                                 const MemoryLocation &LocB) {
+                                 const MemoryLocation &LocB,
+                                 AAQueryInfo &AAQI) {
   assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
          "BasicAliasAnalysis doesn't support interprocedural queries.");
 
   // If we have a directly cached entry for these locations, we have recursed
   // through this once, so just return the cached results. Notably, when this
   // happens, we don't clear the cache.
-  auto CacheIt = AliasCache.find(LocPair(LocA, LocB));
-  if (CacheIt != AliasCache.end())
+  auto CacheIt = AAQI.AliasCache.find(AAQueryInfo::LocPair(LocA, LocB));
+  if (CacheIt != AAQI.AliasCache.end())
+    return CacheIt->second;
+
+  CacheIt = AAQI.AliasCache.find(AAQueryInfo::LocPair(LocB, LocA));
+  if (CacheIt != AAQI.AliasCache.end())
     return CacheIt->second;
 
   AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, LocB.Ptr,
-                                 LocB.Size, LocB.AATags);
-  // AliasCache rarely has more than 1 or 2 elements, always use
-  // shrink_and_clear so it quickly returns to the inline capacity of the
-  // SmallDenseMap if it ever grows larger.
-  // FIXME: This should really be shrink_to_inline_capacity_and_clear().
-  AliasCache.shrink_and_clear();
+                                 LocB.Size, LocB.AATags, AAQI);
+
   VisitedPhiBBs.clear();
   return Alias;
 }
@@ -828,7 +847,8 @@ AliasResult BasicAAResult::alias(const MemoryLocation &LocA,
 /// say much about this query.  We do, however, use simple "address taken"
 /// analysis on local objects.
 ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
-                                        const MemoryLocation &Loc) {
+                                        const MemoryLocation &Loc,
+                                        AAQueryInfo &AAQI) {
   assert(notDifferentParent(Call, Loc.Ptr) &&
          "AliasAnalysis query involving multiple functions!");
 
@@ -855,7 +875,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
   // then the call can not mod/ref the pointer unless the call takes the pointer
   // as an argument, and itself doesn't capture it.
   if (!isa<Constant>(Object) && Call != Object &&
-      isNonEscapingLocalObject(Object)) {
+      isNonEscapingLocalObject(Object, &AAQI.IsCapturedCache)) {
 
     // Optimistically assume that call doesn't touch Object and check this
     // assumption in the following loop.
@@ -881,11 +901,11 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
 
       // If this is a no-capture pointer argument, see if we can tell that it
       // is impossible to alias the pointer we're checking.
-      AliasResult AR =
-          getBestAAResults().alias(MemoryLocation(*CI), MemoryLocation(Object));
+      AliasResult AR = getBestAAResults().alias(MemoryLocation(*CI),
+                                                MemoryLocation(Object), AAQI);
       if (AR != MustAlias)
         IsMustAlias = false;
-      // Operand doesnt alias 'Object', continue looking for other aliases
+      // Operand doesn't alias 'Object', continue looking for other aliases
       if (AR == NoAlias)
         continue;
       // Operand aliases 'Object', but call doesn't modify it. Strengthen
@@ -928,7 +948,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
   if (isMallocOrCallocLikeFn(Call, &TLI)) {
     // Be conservative if the accessed pointer may alias the allocation -
     // fallback to the generic handling below.
-    if (getBestAAResults().alias(MemoryLocation(Call), Loc) == NoAlias)
+    if (getBestAAResults().alias(MemoryLocation(Call), Loc, AAQI) == NoAlias)
       return ModRefInfo::NoModRef;
   }
 
@@ -940,11 +960,11 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
     AliasResult SrcAA, DestAA;
 
     if ((SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst),
-                                          Loc)) == MustAlias)
+                                          Loc, AAQI)) == MustAlias)
       // Loc is exactly the memcpy source thus disjoint from memcpy dest.
       return ModRefInfo::Ref;
     if ((DestAA = getBestAAResults().alias(MemoryLocation::getForDest(Inst),
-                                           Loc)) == MustAlias)
+                                           Loc, AAQI)) == MustAlias)
       // The converse case.
       return ModRefInfo::Mod;
 
@@ -1000,11 +1020,12 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
     return ModRefInfo::Ref;
 
   // The AAResultBase base class has some smarts, lets use them.
-  return AAResultBase::getModRefInfo(Call, Loc);
+  return AAResultBase::getModRefInfo(Call, Loc, AAQI);
 }
 
 ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1,
-                                        const CallBase *Call2) {
+                                        const CallBase *Call2,
+                                        AAQueryInfo &AAQI) {
   // While the assume intrinsic is marked as arbitrarily writing so that
   // proper control dependencies will be maintained, it never aliases any
   // particular memory location.
@@ -1020,7 +1041,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1,
   // heap state at the point the guard is issued needs to be consistent in case
   // the guard invokes the "deopt" continuation.
 
-  // NB! This function is *not* commutative, so we specical case two
+  // NB! This function is *not* commutative, so we special case two
   // possibilities for guard intrinsics.
 
   if (isIntrinsicCall(Call1, Intrinsic::experimental_guard))
@@ -1034,7 +1055,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1,
                : ModRefInfo::NoModRef;
 
   // The AAResultBase base class has some smarts, lets use them.
-  return AAResultBase::getModRefInfo(Call1, Call2);
+  return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
 }
 
 /// Provide ad-hoc rules to disambiguate accesses through two GEP operators,
@@ -1266,11 +1287,10 @@ bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
 /// We know that V1 is a GEP, but we don't know anything about V2.
 /// UnderlyingV1 is GetUnderlyingObject(GEP1, DL), UnderlyingV2 is the same for
 /// V2.
-AliasResult
-BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
-                        const AAMDNodes &V1AAInfo, const Value *V2,
-                        LocationSize V2Size, const AAMDNodes &V2AAInfo,
-                        const Value *UnderlyingV1, const Value *UnderlyingV2) {
+AliasResult BasicAAResult::aliasGEP(
+    const GEPOperator *GEP1, LocationSize V1Size, const AAMDNodes &V1AAInfo,
+    const Value *V2, LocationSize V2Size, const AAMDNodes &V2AAInfo,
+    const Value *UnderlyingV1, const Value *UnderlyingV2, AAQueryInfo &AAQI) {
   DecomposedGEP DecompGEP1, DecompGEP2;
   unsigned MaxPointerSize = getMaxPointerSize(DL);
   DecompGEP1.StructOffset = DecompGEP1.OtherOffset = APInt(MaxPointerSize, 0);
@@ -1306,14 +1326,14 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
     // Do the base pointers alias?
     AliasResult BaseAlias =
         aliasCheck(UnderlyingV1, LocationSize::unknown(), AAMDNodes(),
-                   UnderlyingV2, LocationSize::unknown(), AAMDNodes());
+                   UnderlyingV2, LocationSize::unknown(), AAMDNodes(), AAQI);
 
     // Check for geps of non-aliasing underlying pointers where the offsets are
     // identical.
     if ((BaseAlias == MayAlias) && V1Size == V2Size) {
       // Do the base pointers alias assuming type and size.
-      AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, V1AAInfo,
-                                                UnderlyingV2, V2Size, V2AAInfo);
+      AliasResult PreciseBaseAlias = aliasCheck(
+          UnderlyingV1, V1Size, V1AAInfo, UnderlyingV2, V2Size, V2AAInfo, AAQI);
       if (PreciseBaseAlias == NoAlias) {
         // See if the computed offset from the common pointer tells us about the
         // relation of the resulting pointer.
@@ -1368,9 +1388,9 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
     if (V1Size == LocationSize::unknown() && V2Size == LocationSize::unknown())
       return MayAlias;
 
-    AliasResult R =
-        aliasCheck(UnderlyingV1, LocationSize::unknown(), AAMDNodes(), V2,
-                   LocationSize::unknown(), V2AAInfo, nullptr, UnderlyingV2);
+    AliasResult R = aliasCheck(UnderlyingV1, LocationSize::unknown(),
+                               AAMDNodes(), V2, LocationSize::unknown(),
+                               V2AAInfo, AAQI, nullptr, UnderlyingV2);
     if (R != MustAlias) {
       // If V2 may alias GEP base pointer, conservatively returns MayAlias.
       // If V2 is known not to alias GEP base pointer, then the two values
@@ -1504,37 +1524,35 @@ static AliasResult MergeAliasResults(AliasResult A, AliasResult B) {
 
 /// Provides a bunch of ad-hoc rules to disambiguate a Select instruction
 /// against another.
-AliasResult BasicAAResult::aliasSelect(const SelectInst *SI,
-                                       LocationSize SISize,
-                                       const AAMDNodes &SIAAInfo,
-                                       const Value *V2, LocationSize V2Size,
-                                       const AAMDNodes &V2AAInfo,
-                                       const Value *UnderV2) {
+AliasResult
+BasicAAResult::aliasSelect(const SelectInst *SI, LocationSize SISize,
+                           const AAMDNodes &SIAAInfo, const Value *V2,
+                           LocationSize V2Size, const AAMDNodes &V2AAInfo,
+                           const Value *UnderV2, AAQueryInfo &AAQI) {
   // If the values are Selects with the same condition, we can do a more precise
   // check: just check for aliases between the values on corresponding arms.
   if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
     if (SI->getCondition() == SI2->getCondition()) {
-      AliasResult Alias = aliasCheck(SI->getTrueValue(), SISize, SIAAInfo,
-                                     SI2->getTrueValue(), V2Size, V2AAInfo);
+      AliasResult Alias =
+          aliasCheck(SI->getTrueValue(), SISize, SIAAInfo, SI2->getTrueValue(),
+                     V2Size, V2AAInfo, AAQI);
       if (Alias == MayAlias)
         return MayAlias;
       AliasResult ThisAlias =
           aliasCheck(SI->getFalseValue(), SISize, SIAAInfo,
-                     SI2->getFalseValue(), V2Size, V2AAInfo);
+                     SI2->getFalseValue(), V2Size, V2AAInfo, AAQI);
       return MergeAliasResults(ThisAlias, Alias);
     }
 
   // If both arms of the Select node NoAlias or MustAlias V2, then returns
   // NoAlias / MustAlias. Otherwise, returns MayAlias.
-  AliasResult Alias =
-      aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(),
-                 SISize, SIAAInfo, UnderV2);
+  AliasResult Alias = aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(),
+                                 SISize, SIAAInfo, AAQI, UnderV2);
   if (Alias == MayAlias)
     return MayAlias;
 
-  AliasResult ThisAlias =
-      aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo,
-                 UnderV2);
+  AliasResult ThisAlias = aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(),
+                                     SISize, SIAAInfo, AAQI, UnderV2);
   return MergeAliasResults(ThisAlias, Alias);
 }
 
@@ -1544,7 +1562,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
                                     const AAMDNodes &PNAAInfo, const Value *V2,
                                     LocationSize V2Size,
                                     const AAMDNodes &V2AAInfo,
-                                    const Value *UnderV2) {
+                                    const Value *UnderV2, AAQueryInfo &AAQI) {
   // Track phi nodes we have visited. We use this information when we determine
   // value equivalence.
   VisitedPhiBBs.insert(PN->getParent());
@@ -1554,8 +1572,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
   // on corresponding edges.
   if (const PHINode *PN2 = dyn_cast<PHINode>(V2))
     if (PN2->getParent() == PN->getParent()) {
-      LocPair Locs(MemoryLocation(PN, PNSize, PNAAInfo),
-                   MemoryLocation(V2, V2Size, V2AAInfo));
+      AAQueryInfo::LocPair Locs(MemoryLocation(PN, PNSize, PNAAInfo),
+                                MemoryLocation(V2, V2Size, V2AAInfo));
       if (PN > V2)
         std::swap(Locs.first, Locs.second);
       // Analyse the PHIs' inputs under the assumption that the PHIs are
@@ -1566,25 +1584,33 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
       // that causes a MayAlias.
       // Pretend the phis do not alias.
       AliasResult Alias = NoAlias;
-      assert(AliasCache.count(Locs) &&
-             "There must exist an entry for the phi node");
-      AliasResult OrigAliasResult = AliasCache[Locs];
-      AliasCache[Locs] = NoAlias;
+      AliasResult OrigAliasResult;
+      {
+        // Limited lifetime iterator invalidated by the aliasCheck call below.
+        auto CacheIt = AAQI.AliasCache.find(Locs);
+        assert((CacheIt != AAQI.AliasCache.end()) &&
+               "There must exist an entry for the phi node");
+        OrigAliasResult = CacheIt->second;
+        CacheIt->second = NoAlias;
+      }
 
       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
         AliasResult ThisAlias =
             aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo,
                        PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
-                       V2Size, V2AAInfo);
+                       V2Size, V2AAInfo, AAQI);
         Alias = MergeAliasResults(ThisAlias, Alias);
         if (Alias == MayAlias)
           break;
       }
 
       // Reset if speculation failed.
-      if (Alias != NoAlias)
-        AliasCache[Locs] = OrigAliasResult;
-
+      if (Alias != NoAlias) {
+        auto Pair =
+            AAQI.AliasCache.insert(std::make_pair(Locs, OrigAliasResult));
+        assert(!Pair.second && "Entry must have existed");
+        Pair.first->second = OrigAliasResult;
+      }
       return Alias;
     }
 
@@ -1658,9 +1684,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
   if (isRecursive)
     PNSize = LocationSize::unknown();
 
-  AliasResult Alias =
-      aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0],
-                 PNSize, PNAAInfo, UnderV2);
+  AliasResult Alias = aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0], PNSize,
+                                 PNAAInfo, AAQI, UnderV2);
 
   // Early exit if the check of the first PHI source against V2 is MayAlias.
   // Other results are not possible.
@@ -1673,7 +1698,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
     Value *V = V1Srcs[i];
 
     AliasResult ThisAlias =
-        aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo, UnderV2);
+        aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo, AAQI, UnderV2);
     Alias = MergeAliasResults(ThisAlias, Alias);
     if (Alias == MayAlias)
       break;
@@ -1687,7 +1712,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
 AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
                                       AAMDNodes V1AAInfo, const Value *V2,
                                       LocationSize V2Size, AAMDNodes V2AAInfo,
-                                      const Value *O1, const Value *O2) {
+                                      AAQueryInfo &AAQI, const Value *O1,
+                                      const Value *O2) {
   // If either of the memory references is empty, it doesn't matter what the
   // pointer values are.
   if (V1Size.isZero() || V2Size.isZero())
@@ -1755,9 +1781,11 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
     // temporary store the nocapture argument's value in a temporary memory
     // location if that memory location doesn't escape. Or it may pass a
     // nocapture value to other functions as long as they don't capture it.
-    if (isEscapeSource(O1) && isNonEscapingLocalObject(O2))
+    if (isEscapeSource(O1) &&
+        isNonEscapingLocalObject(O2, &AAQI.IsCapturedCache))
       return NoAlias;
-    if (isEscapeSource(O2) && isNonEscapingLocalObject(O1))
+    if (isEscapeSource(O2) &&
+        isNonEscapingLocalObject(O1, &AAQI.IsCapturedCache))
       return NoAlias;
   }
 
@@ -1772,12 +1800,12 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
 
   // Check the cache before climbing up use-def chains. This also terminates
   // otherwise infinitely recursive queries.
-  LocPair Locs(MemoryLocation(V1, V1Size, V1AAInfo),
-               MemoryLocation(V2, V2Size, V2AAInfo));
+  AAQueryInfo::LocPair Locs(MemoryLocation(V1, V1Size, V1AAInfo),
+                            MemoryLocation(V2, V2Size, V2AAInfo));
   if (V1 > V2)
     std::swap(Locs.first, Locs.second);
-  std::pair<AliasCacheTy::iterator, bool> Pair =
-      AliasCache.insert(std::make_pair(Locs, MayAlias));
+  std::pair<AAQueryInfo::AliasCacheT::iterator, bool> Pair =
+      AAQI.AliasCache.try_emplace(Locs, MayAlias);
   if (!Pair.second)
     return Pair.first->second;
 
@@ -1791,9 +1819,13 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
   }
   if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) {
     AliasResult Result =
-        aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2);
-    if (Result != MayAlias)
-      return AliasCache[Locs] = Result;
+        aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2, AAQI);
+    if (Result != MayAlias) {
+      auto ItInsPair = AAQI.AliasCache.insert(std::make_pair(Locs, Result));
+      assert(!ItInsPair.second && "Entry must have existed");
+      ItInsPair.first->second = Result;
+      return Result;
+    }
   }
 
   if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
@@ -1803,10 +1835,13 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
     std::swap(V1AAInfo, V2AAInfo);
   }
   if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
-    AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo,
-                                  V2, V2Size, V2AAInfo, O2);
-    if (Result != MayAlias)
-      return AliasCache[Locs] = Result;
+    AliasResult Result =
+        aliasPHI(PN, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O2, AAQI);
+    if (Result != MayAlias) {
+      Pair = AAQI.AliasCache.try_emplace(Locs, Result);
+      assert(!Pair.second && "Entry must have existed");
+      return Pair.first->second = Result;
+    }
   }
 
   if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
@@ -1817,9 +1852,12 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
   }
   if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
     AliasResult Result =
-        aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O2);
-    if (Result != MayAlias)
-      return AliasCache[Locs] = Result;
+        aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O2, AAQI);
+    if (Result != MayAlias) {
+      Pair = AAQI.AliasCache.try_emplace(Locs, Result);
+      assert(!Pair.second && "Entry must have existed");
+      return Pair.first->second = Result;
+    }
   }
 
   // If both pointers are pointing into the same object and one of them
@@ -1827,14 +1865,19 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
   if (O1 == O2)
     if (V1Size.isPrecise() && V2Size.isPrecise() &&
         (isObjectSize(O1, V1Size.getValue(), DL, TLI, NullIsValidLocation) ||
-         isObjectSize(O2, V2Size.getValue(), DL, TLI, NullIsValidLocation)))
-      return AliasCache[Locs] = PartialAlias;
+         isObjectSize(O2, V2Size.getValue(), DL, TLI, NullIsValidLocation))) {
+      Pair = AAQI.AliasCache.try_emplace(Locs, PartialAlias);
+      assert(!Pair.second && "Entry must have existed");
+      return Pair.first->second = PartialAlias;
+    }
 
   // Recurse back into the best AA results we have, potentially with refined
   // memory locations. We have already ensured that BasicAA has a MayAlias
   // cache result for these, so any recursion back into BasicAA won't loop.
-  AliasResult Result = getBestAAResults().alias(Locs.first, Locs.second);
-  return AliasCache[Locs] = Result;
+  AliasResult Result = getBestAAResults().alias(Locs.first, Locs.second, AAQI);
+  Pair = AAQI.AliasCache.try_emplace(Locs, Result);
+  assert(!Pair.second && "Entry must have existed");
+  return Pair.first->second = Result;
 }
 
 /// Check whether two Values can be considered equivalent.
@@ -1863,7 +1906,7 @@ bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V,
   // the Values cannot come from different iterations of a potential cycle the
   // phi nodes could be involved in.
   for (auto *P : VisitedPhiBBs)
-    if (isPotentiallyReachable(&P->front(), Inst, DT, LI))
+    if (isPotentiallyReachable(&P->front(), Inst, nullptr, DT, LI))
       return false;
 
   return true;
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index ef27c36517ea..de183bbde173 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -1,9 +1,8 @@
 //===- BlockFrequencyInfo.cpp - Block Frequency Analysis ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -204,11 +203,12 @@ BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {
 }
 
 Optional<uint64_t>
-BlockFrequencyInfo::getBlockProfileCount(const BasicBlock *BB) const {
+BlockFrequencyInfo::getBlockProfileCount(const BasicBlock *BB,
+                                         bool AllowSynthetic) const {
   if (!BFI)
     return None;
 
-  return BFI->getBlockProfileCount(*getFunction(), BB);
+  return BFI->getBlockProfileCount(*getFunction(), BB, AllowSynthetic);
 }
 
 Optional<uint64_t>
diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 08ebcc47a807..0db6dd04a7e8 100644
--- a/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -1,9 +1,8 @@
 //===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -558,14 +557,17 @@ BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
 
 Optional<uint64_t>
 BlockFrequencyInfoImplBase::getBlockProfileCount(const Function &F,
-                                                 const BlockNode &Node) const {
-  return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency());
+                                                 const BlockNode &Node,
+                                                 bool AllowSynthetic) const {
+  return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency(),
+                                 AllowSynthetic);
 }
 
 Optional<uint64_t>
 BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F,
-                                                    uint64_t Freq) const {
-  auto EntryCount = F.getEntryCount();
+                                                    uint64_t Freq,
+                                                    bool AllowSynthetic) const {
+  auto EntryCount = F.getEntryCount(AllowSynthetic);
   if (!EntryCount)
     return None;
   // Use 128 bit APInt to do the arithmetic to avoid overflow.
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 7f544b27fe9d..5eb95003f5d8 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -1,9 +1,8 @@
 //===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -661,8 +660,14 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB,
   if (!CI)
     return false;
 
+  auto GetConstantInt = [](Value *V) {
+    if (auto *I = dyn_cast<BitCastInst>(V))
+      return dyn_cast<ConstantInt>(I->getOperand(0));
+    return dyn_cast<ConstantInt>(V);
+  };
+
   Value *RHS = CI->getOperand(1);
-  ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
+  ConstantInt *CV = GetConstantInt(RHS);
   if (!CV)
     return false;
 
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index aa880a62b754..18b83d6838cc 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -1,9 +1,8 @@
 //===-- CFG.cpp - BasicBlock analysis --------------------------------------==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/CFG.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/Dominators.h"
@@ -120,22 +120,33 @@ static const Loop *getOutermostLoop(const LoopInfo *LI, const BasicBlock *BB) {
   return L;
 }
 
-// True if there is a loop which contains both BB1 and BB2.
-static bool loopContainsBoth(const LoopInfo *LI,
-                             const BasicBlock *BB1, const BasicBlock *BB2) {
-  const Loop *L1 = getOutermostLoop(LI, BB1);
-  const Loop *L2 = getOutermostLoop(LI, BB2);
-  return L1 != nullptr && L1 == L2;
-}
-
 bool llvm::isPotentiallyReachableFromMany(
     SmallVectorImpl<BasicBlock *> &Worklist, BasicBlock *StopBB,
-    const DominatorTree *DT, const LoopInfo *LI) {
+    const SmallPtrSetImpl<BasicBlock *> *ExclusionSet, const DominatorTree *DT,
+    const LoopInfo *LI) {
   // When the stop block is unreachable, it's dominated from everywhere,
   // regardless of whether there's a path between the two blocks.
   if (DT && !DT->isReachableFromEntry(StopBB))
     DT = nullptr;
 
+  // We can't skip directly from a block that dominates the stop block if the
+  // exclusion block is potentially in between.
+  if (ExclusionSet && !ExclusionSet->empty())
+    DT = nullptr;
+
+  // Normally any block in a loop is reachable from any other block in a loop,
+  // however excluded blocks might partition the body of a loop to make that
+  // untrue.
+  SmallPtrSet<const Loop *, 8> LoopsWithHoles;
+  if (LI && ExclusionSet) {
+    for (auto BB : *ExclusionSet) {
+      if (const Loop *L = getOutermostLoop(LI, BB))
+        LoopsWithHoles.insert(L);
+    }
+  }
+
+  const Loop *StopLoop = LI ? getOutermostLoop(LI, StopBB) : nullptr;
+
   // Limit the number of blocks we visit. The goal is to avoid run-away compile
   // times on large CFGs without hampering sensible code. Arbitrarily chosen.
   unsigned Limit = 32;
@@ -146,10 +157,23 @@ bool llvm::isPotentiallyReachableFromMany(
       continue;
     if (BB == StopBB)
       return true;
+    if (ExclusionSet && ExclusionSet->count(BB))
+      continue;
     if (DT && DT->dominates(BB, StopBB))
       return true;
-    if (LI && loopContainsBoth(LI, BB, StopBB))
-      return true;
+
+    const Loop *Outer = nullptr;
+    if (LI) {
+      Outer = getOutermostLoop(LI, BB);
+      // If we're in a loop with a hole, not all blocks in the loop are
+      // reachable from all other blocks. That implies we can't simply jump to
+      // the loop's exit blocks, as that exit might need to pass through an
+      // excluded block. Clear Outer so we process BB's successors.
+      if (LoopsWithHoles.count(Outer))
+        Outer = nullptr;
+      if (StopLoop && Outer == StopLoop)
+        return true;
+    }
 
     if (!--Limit) {
       // We haven't been able to prove it one way or the other. Conservatively
@@ -157,7 +181,7 @@ bool llvm::isPotentiallyReachableFromMany(
       return true;
     }
 
-    if (const Loop *Outer = LI ? getOutermostLoop(LI, BB) : nullptr) {
+    if (Outer) {
       // All blocks in a single loop are reachable from all other blocks. From
       // any of these blocks, we can skip directly to the exits of the loop,
       // ignoring any other blocks inside the loop body.
@@ -181,11 +205,13 @@ bool llvm::isPotentiallyReachable(const BasicBlock *A, const BasicBlock *B,
   Worklist.push_back(const_cast<BasicBlock*>(A));
 
   return isPotentiallyReachableFromMany(Worklist, const_cast<BasicBlock *>(B),
-                                        DT, LI);
+                                        nullptr, DT, LI);
 }
 
-bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
-                                  const DominatorTree *DT, const LoopInfo *LI) {
+bool llvm::isPotentiallyReachable(
+    const Instruction *A, const Instruction *B,
+    const SmallPtrSetImpl<BasicBlock *> *ExclusionSet, const DominatorTree *DT,
+    const LoopInfo *LI) {
   assert(A->getParent()->getParent() == B->getParent()->getParent() &&
          "This analysis is function-local!");
 
@@ -227,11 +253,20 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
     Worklist.push_back(const_cast<BasicBlock*>(A->getParent()));
   }
 
-  if (A->getParent() == &A->getParent()->getParent()->getEntryBlock())
-    return true;
-  if (B->getParent() == &A->getParent()->getParent()->getEntryBlock())
-    return false;
+  if (DT) {
+    if (DT->isReachableFromEntry(A->getParent()) &&
+        !DT->isReachableFromEntry(B->getParent()))
+      return false;
+    if (!ExclusionSet || ExclusionSet->empty()) {
+      if (A->getParent() == &A->getParent()->getParent()->getEntryBlock() &&
+          DT->isReachableFromEntry(B->getParent()))
+        return true;
+      if (B->getParent() == &A->getParent()->getParent()->getEntryBlock() &&
+          DT->isReachableFromEntry(A->getParent()))
+        return false;
+    }
+  }
 
   return isPotentiallyReachableFromMany(
-      Worklist, const_cast<BasicBlock *>(B->getParent()), DT, LI);
+      Worklist, const_cast<BasicBlock *>(B->getParent()), ExclusionSet, DT, LI);
 }
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 6d01e9d5d447..619b675b58d8 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -1,9 +1,8 @@
 //===- CFGPrinter.cpp - DOT printer for the control flow graph ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/CFLAndersAliasAnalysis.cpp b/lib/Analysis/CFLAndersAliasAnalysis.cpp
index 1c61dd369a05..690e514d4f5c 100644
--- a/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -1,9 +1,8 @@
 //===- CFLAndersAliasAnalysis.cpp - Unification-based Alias Analysis ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -613,7 +612,7 @@ static void initializeWorkList(std::vector<WorkListItem> &WorkList,
     for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
       auto Src = InstantiatedValue{Val, I};
       // If there's an assignment edge from X to Y, it means Y is reachable from
-      // X at S2 and X is reachable from Y at S1
+      // X at S3 and X is reachable from Y at S1
       for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) {
         propagate(Edge.Other, Src, MatchState::FlowFromReadOnly, ReachSet,
                   WorkList);
@@ -876,7 +875,8 @@ AliasResult CFLAndersAAResult::query(const MemoryLocation &LocA,
 }
 
 AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA,
-                                     const MemoryLocation &LocB) {
+                                     const MemoryLocation &LocB,
+                                     AAQueryInfo &AAQI) {
   if (LocA.Ptr == LocB.Ptr)
     return MustAlias;
 
@@ -886,11 +886,11 @@ AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA,
   // ConstantExpr, but every query needs to have at least one Value tied to a
   // Function, and neither GlobalValues nor ConstantExprs are.
   if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr))
-    return AAResultBase::alias(LocA, LocB);
+    return AAResultBase::alias(LocA, LocB, AAQI);
 
   AliasResult QueryResult = query(LocA, LocB);
   if (QueryResult == MayAlias)
-    return AAResultBase::alias(LocA, LocB);
+    return AAResultBase::alias(LocA, LocB, AAQI);
 
   return QueryResult;
 }
diff --git a/lib/Analysis/CFLGraph.h b/lib/Analysis/CFLGraph.h
index 12121d717433..21842ed36487 100644
--- a/lib/Analysis/CFLGraph.h
+++ b/lib/Analysis/CFLGraph.h
@@ -1,9 +1,8 @@
 //===- CFLGraph.h - Abstract stratified sets implementation. -----*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -25,7 +24,6 @@
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
@@ -154,7 +152,7 @@ public:
   }
 };
 
-///A builder class used to create CFLGraph instance from a given function
+/// A builder class used to create CFLGraph instance from a given function
 /// The CFL-AA that uses this builder must provide its own type as a template
 /// argument. This is necessary for interprocedural processing: CFLGraphBuilder
 /// needs a way of obtaining the summary of other functions when callinsts are
@@ -183,24 +181,23 @@ template <typename CFLAA> class CFLGraphBuilder {
 
     static bool hasUsefulEdges(ConstantExpr *CE) {
       // ConstantExpr doesn't have terminators, invokes, or fences, so only
-      // needs
-      // to check for compares.
+      // needs to check for compares.
       return CE->getOpcode() != Instruction::ICmp &&
              CE->getOpcode() != Instruction::FCmp;
     }
 
     // Returns possible functions called by CS into the given SmallVectorImpl.
     // Returns true if targets found, false otherwise.
-    static bool getPossibleTargets(CallSite CS,
+    static bool getPossibleTargets(CallBase &Call,
                                    SmallVectorImpl<Function *> &Output) {
-      if (auto *Fn = CS.getCalledFunction()) {
+      if (auto *Fn = Call.getCalledFunction()) {
         Output.push_back(Fn);
         return true;
       }
 
       // TODO: If the call is indirect, we might be able to enumerate all
-      // potential
-      // targets of the call and return them, rather than just failing.
+      // potential targets of the call and return them, rather than just
+      // failing.
       return false;
     }
 
@@ -294,6 +291,11 @@ template <typename CFLAA> class CFLGraphBuilder {
       addAssignEdge(Op2, &Inst);
     }
 
+    void visitUnaryOperator(UnaryOperator &Inst) {
+      auto *Src = Inst.getOperand(0);
+      addAssignEdge(Src, &Inst);
+    }
+
     void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
       auto *Ptr = Inst.getPointerOperand();
       auto *Val = Inst.getNewValOperand();
@@ -370,11 +372,11 @@ template <typename CFLAA> class CFLGraphBuilder {
       return !Fn->hasExactDefinition();
     }
 
-    bool tryInterproceduralAnalysis(CallSite CS,
+    bool tryInterproceduralAnalysis(CallBase &Call,
                                     const SmallVectorImpl<Function *> &Fns) {
       assert(Fns.size() > 0);
 
-      if (CS.arg_size() > MaxSupportedArgsInSummary)
+      if (Call.arg_size() > MaxSupportedArgsInSummary)
         return false;
 
       // Exit early if we'll fail anyway
@@ -382,7 +384,7 @@ template <typename CFLAA> class CFLGraphBuilder {
         if (isFunctionExternal(Fn) || Fn->isVarArg())
           return false;
         // Fail if the caller does not provide enough arguments
-        assert(Fn->arg_size() <= CS.arg_size());
+        assert(Fn->arg_size() <= Call.arg_size());
         if (!AA.getAliasSummary(*Fn))
           return false;
       }
@@ -393,7 +395,7 @@ template <typename CFLAA> class CFLGraphBuilder {
 
         auto &RetParamRelations = Summary->RetParamRelations;
         for (auto &Relation : RetParamRelations) {
-          auto IRelation = instantiateExternalRelation(Relation, CS);
+          auto IRelation = instantiateExternalRelation(Relation, Call);
           if (IRelation.hasValue()) {
             Graph.addNode(IRelation->From);
             Graph.addNode(IRelation->To);
@@ -403,7 +405,7 @@ template <typename CFLAA> class CFLGraphBuilder {
 
         auto &RetParamAttributes = Summary->RetParamAttributes;
         for (auto &Attribute : RetParamAttributes) {
-          auto IAttr = instantiateExternalAttribute(Attribute, CS);
+          auto IAttr = instantiateExternalAttribute(Attribute, Call);
           if (IAttr.hasValue())
             Graph.addNode(IAttr->IValue, IAttr->Attr);
         }
@@ -412,37 +414,35 @@ template <typename CFLAA> class CFLGraphBuilder {
       return true;
     }
 
-    void visitCallSite(CallSite CS) {
-      auto Inst = CS.getInstruction();
-
+    void visitCallBase(CallBase &Call) {
       // Make sure all arguments and return value are added to the graph first
-      for (Value *V : CS.args())
+      for (Value *V : Call.args())
         if (V->getType()->isPointerTy())
           addNode(V);
-      if (Inst->getType()->isPointerTy())
-        addNode(Inst);
+      if (Call.getType()->isPointerTy())
+        addNode(&Call);
 
       // Check if Inst is a call to a library function that
       // allocates/deallocates on the heap. Those kinds of functions do not
       // introduce any aliases.
       // TODO: address other common library functions such as realloc(),
       // strdup(), etc.
-      if (isMallocOrCallocLikeFn(Inst, &TLI) || isFreeCall(Inst, &TLI))
+      if (isMallocOrCallocLikeFn(&Call, &TLI) || isFreeCall(&Call, &TLI))
         return;
 
       // TODO: Add support for noalias args/all the other fun function
       // attributes that we can tack on.
       SmallVector<Function *, 4> Targets;
-      if (getPossibleTargets(CS, Targets))
-        if (tryInterproceduralAnalysis(CS, Targets))
+      if (getPossibleTargets(Call, Targets))
+        if (tryInterproceduralAnalysis(Call, Targets))
           return;
 
       // Because the function is opaque, we need to note that anything
       // could have happened to the arguments (unless the function is marked
       // readonly or readnone), and that the result could alias just about
       // anything, too (unless the result is marked noalias).
-      if (!CS.onlyReadsMemory())
-        for (Value *V : CS.args()) {
+      if (!Call.onlyReadsMemory())
+        for (Value *V : Call.args()) {
           if (V->getType()->isPointerTy()) {
             // The argument itself escapes.
             Graph.addAttr(InstantiatedValue{V, 0}, getAttrEscaped());
@@ -453,12 +453,12 @@ template <typename CFLAA> class CFLGraphBuilder {
           }
         }
 
-      if (Inst->getType()->isPointerTy()) {
-        auto *Fn = CS.getCalledFunction();
+      if (Call.getType()->isPointerTy()) {
+        auto *Fn = Call.getCalledFunction();
         if (Fn == nullptr || !Fn->returnDoesNotAlias())
           // No need to call addNode() since we've added Inst at the
           // beginning of this function and we know it is not a global.
-          Graph.addAttr(InstantiatedValue{Inst, 0}, getAttrUnknown());
+          Graph.addAttr(InstantiatedValue{&Call, 0}, getAttrUnknown());
       }
     }
 
@@ -559,6 +559,7 @@ template <typename CFLAA> class CFLGraphBuilder {
       }
 
       case Instruction::Add:
+      case Instruction::FAdd:
       case Instruction::Sub:
       case Instruction::FSub:
       case Instruction::Mul:
@@ -583,6 +584,11 @@ template <typename CFLAA> class CFLGraphBuilder {
         break;
       }
 
+      case Instruction::FNeg: {
+        addAssignEdge(CE->getOperand(0), CE);
+        break;
+      }
+
       default:
         llvm_unreachable("Unknown instruction type encountered!");
       }
diff --git a/lib/Analysis/CFLSteensAliasAnalysis.cpp b/lib/Analysis/CFLSteensAliasAnalysis.cpp
index 30ce13578e54..44b1834f70bf 100644
--- a/lib/Analysis/CFLSteensAliasAnalysis.cpp
+++ b/lib/Analysis/CFLSteensAliasAnalysis.cpp
@@ -1,9 +1,8 @@
 //===- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp
index fd2292ced017..a0b3f83cca6a 100644
--- a/lib/Analysis/CGSCCPassManager.cpp
+++ b/lib/Analysis/CGSCCPassManager.cpp
@@ -1,9 +1,8 @@
 //===- CGSCCPassManager.cpp - Managing & running CGSCC passes -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -111,6 +110,12 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
     // ...getContext().yield();
   }
 
+  // Before we mark all of *this* SCC's analyses as preserved below, intersect
+  // this with the cross-SCC preserved analysis set. This is used to allow
+  // CGSCC passes to mutate ancestor SCCs and still trigger proper invalidation
+  // for them.
+  UR.CrossSCCPA.intersect(PA);
+
   // Invalidation was handled after each pass in the above loop for the current
   // SCC. Therefore, the remaining analysis results in the AnalysisManager are
   // preserved. We mark this with a set so that we don't need to inspect each
diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp
index 0da678e1611b..ec5e94d499be 100644
--- a/lib/Analysis/CallGraph.cpp
+++ b/lib/Analysis/CallGraph.cpp
@@ -1,9 +1,8 @@
 //===- CallGraph.cpp - Build a Module's call graph ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -11,7 +10,6 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Config/llvm-config.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Intrinsics.h"
@@ -64,25 +62,25 @@ void CallGraph::addToCallGraph(Function *F) {
   // If this function has external linkage or has its address taken, anything
   // could call it.
   if (!F->hasLocalLinkage() || F->hasAddressTaken())
-    ExternalCallingNode->addCalledFunction(CallSite(), Node);
+    ExternalCallingNode->addCalledFunction(nullptr, Node);
 
   // If this function is not defined in this translation unit, it could call
   // anything.
   if (F->isDeclaration() && !F->isIntrinsic())
-    Node->addCalledFunction(CallSite(), CallsExternalNode.get());
+    Node->addCalledFunction(nullptr, CallsExternalNode.get());
 
   // Look for calls by this function.
   for (BasicBlock &BB : *F)
     for (Instruction &I : BB) {
-      if (auto CS = CallSite(&I)) {
-        const Function *Callee = CS.getCalledFunction();
+      if (auto *Call = dyn_cast<CallBase>(&I)) {
+        const Function *Callee = Call->getCalledFunction();
         if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID()))
           // Indirect calls of intrinsics are not allowed so no need to check.
           // We can be more precise here by using TargetArg returned by
           // Intrinsic::isLeaf.
-          Node->addCalledFunction(CS, CallsExternalNode.get());
+          Node->addCalledFunction(Call, CallsExternalNode.get());
         else if (!Callee->isIntrinsic())
-          Node->addCalledFunction(CS, getOrInsertFunction(Callee));
+          Node->addCalledFunction(Call, getOrInsertFunction(Callee));
       }
     }
 }
@@ -185,10 +183,10 @@ LLVM_DUMP_METHOD void CallGraphNode::dump() const { print(dbgs()); }
 /// removeCallEdgeFor - This method removes the edge in the node for the
 /// specified call site.  Note that this method takes linear time, so it
 /// should be used sparingly.
-void CallGraphNode::removeCallEdgeFor(CallSite CS) {
+void CallGraphNode::removeCallEdgeFor(CallBase &Call) {
   for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
     assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
-    if (I->first == CS.getInstruction()) {
+    if (I->first == &Call) {
       I->second->DropRef();
       *I = CalledFunctions.back();
       CalledFunctions.pop_back();
@@ -228,13 +226,13 @@ void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) {
 /// replaceCallEdge - This method replaces the edge in the node for the
 /// specified call site with a new one.  Note that this method takes linear
 /// time, so it should be used sparingly.
-void CallGraphNode::replaceCallEdge(CallSite CS,
-                                    CallSite NewCS, CallGraphNode *NewNode){
+void CallGraphNode::replaceCallEdge(CallBase &Call, CallBase &NewCall,
+                                    CallGraphNode *NewNode) {
   for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
     assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
-    if (I->first == CS.getInstruction()) {
+    if (I->first == &Call) {
       I->second->DropRef();
-      I->first = NewCS.getInstruction();
+      I->first = &NewCall;
       I->second = NewNode;
       NewNode->AddRef();
       return;
diff --git a/lib/Analysis/CallGraphSCCPass.cpp b/lib/Analysis/CallGraphSCCPass.cpp
index 0aed57a39387..196ef400bc4e 100644
--- a/lib/Analysis/CallGraphSCCPass.cpp
+++ b/lib/Analysis/CallGraphSCCPass.cpp
@@ -1,9 +1,8 @@
 //===- CallGraphSCCPass.cpp - Pass that operates BU on call graph ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -20,7 +19,6 @@
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CallGraph.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/Intrinsics.h"
@@ -202,7 +200,7 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
 /// This never happens in checking mode.
 bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
                                      bool CheckingMode) {
-  DenseMap<Value*, CallGraphNode*> CallSites;
+  DenseMap<Value *, CallGraphNode *> Calls;
 
   LLVM_DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
                     << " nodes:\n";
@@ -231,21 +229,21 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
     for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
       // If this call site is null, then the function pass deleted the call
       // entirely and the WeakTrackingVH nulled it out.
+      auto *Call = dyn_cast_or_null<CallBase>(I->first);
       if (!I->first ||
           // If we've already seen this call site, then the FunctionPass RAUW'd
           // one call with another, which resulted in two "uses" in the edge
           // list of the same call.
-          CallSites.count(I->first) ||
+          Calls.count(I->first) ||
 
           // If the call edge is not from a call or invoke, or it is a
           // instrinsic call, then the function pass RAUW'd a call with
           // another value. This can happen when constant folding happens
           // of well known functions etc.
-          !CallSite(I->first) ||
-          (CallSite(I->first).getCalledFunction() &&
-           CallSite(I->first).getCalledFunction()->isIntrinsic() &&
-           Intrinsic::isLeaf(
-               CallSite(I->first).getCalledFunction()->getIntrinsicID()))) {
+          !Call ||
+          (Call->getCalledFunction() &&
+           Call->getCalledFunction()->isIntrinsic() &&
+           Intrinsic::isLeaf(Call->getCalledFunction()->getIntrinsicID()))) {
         assert(!CheckingMode &&
                "CallGraphSCCPass did not update the CallGraph correctly!");
 
@@ -269,15 +267,14 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
         continue;
       }
 
-      assert(!CallSites.count(I->first) &&
+      assert(!Calls.count(I->first) &&
              "Call site occurs in node multiple times");
 
-      CallSite CS(I->first);
-      if (CS) {
-        Function *Callee = CS.getCalledFunction();
+      if (Call) {
+        Function *Callee = Call->getCalledFunction();
         // Ignore intrinsics because they're not really function calls.
         if (!Callee || !(Callee->isIntrinsic()))
-          CallSites.insert(std::make_pair(I->first, I->second));
+          Calls.insert(std::make_pair(I->first, I->second));
       }
       ++I;
     }
@@ -288,23 +285,25 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
 
     for (BasicBlock &BB : *F)
       for (Instruction &I : BB) {
-        CallSite CS(&I);
-        if (!CS) continue;
-        Function *Callee = CS.getCalledFunction();
-        if (Callee && Callee->isIntrinsic()) continue;
+        auto *Call = dyn_cast<CallBase>(&I);
+        if (!Call)
+          continue;
+        Function *Callee = Call->getCalledFunction();
+        if (Callee && Callee->isIntrinsic())
+          continue;
 
         // If this call site already existed in the callgraph, just verify it
-        // matches up to expectations and remove it from CallSites.
-        DenseMap<Value*, CallGraphNode*>::iterator ExistingIt =
-          CallSites.find(CS.getInstruction());
-        if (ExistingIt != CallSites.end()) {
+        // matches up to expectations and remove it from Calls.
+        DenseMap<Value *, CallGraphNode *>::iterator ExistingIt =
+            Calls.find(Call);
+        if (ExistingIt != Calls.end()) {
           CallGraphNode *ExistingNode = ExistingIt->second;
 
-          // Remove from CallSites since we have now seen it.
-          CallSites.erase(ExistingIt);
+          // Remove from Calls since we have now seen it.
+          Calls.erase(ExistingIt);
 
           // Verify that the callee is right.
-          if (ExistingNode->getFunction() == CS.getCalledFunction())
+          if (ExistingNode->getFunction() == Call->getCalledFunction())
             continue;
 
           // If we are in checking mode, we are not allowed to actually mutate
@@ -312,7 +311,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
           // callgraph is less precise than it could be (e.g. an indirect call
           // site could be turned direct), don't reject it in checking mode, and
           // don't tweak it to be more precise.
-          if (CheckingMode && CS.getCalledFunction() &&
+          if (CheckingMode && Call->getCalledFunction() &&
               ExistingNode->getFunction() == nullptr)
             continue;
 
@@ -322,7 +321,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
           // If not, we either went from a direct call to indirect, indirect to
           // direct, or direct to different direct.
           CallGraphNode *CalleeNode;
-          if (Function *Callee = CS.getCalledFunction()) {
+          if (Function *Callee = Call->getCalledFunction()) {
             CalleeNode = CG.getOrInsertFunction(Callee);
             // Keep track of whether we turned an indirect call into a direct
             // one.
@@ -336,7 +335,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
           }
 
           // Update the edge target in CGN.
-          CGN->replaceCallEdge(CS, CS, CalleeNode);
+          CGN->replaceCallEdge(*Call, *Call, CalleeNode);
           MadeChange = true;
           continue;
         }
@@ -346,7 +345,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
 
         // If the call site didn't exist in the CGN yet, add it.
         CallGraphNode *CalleeNode;
-        if (Function *Callee = CS.getCalledFunction()) {
+        if (Function *Callee = Call->getCalledFunction()) {
           CalleeNode = CG.getOrInsertFunction(Callee);
           ++NumDirectAdded;
         } else {
@@ -354,7 +353,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
           ++NumIndirectAdded;
         }
 
-        CGN->addCalledFunction(CS, CalleeNode);
+        CGN->addCalledFunction(Call, CalleeNode);
         MadeChange = true;
       }
 
@@ -376,12 +375,12 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
     // they are dangling pointers.  WeakTrackingVH should save us for this, so
     // abort if
     // this happens.
-    assert(CallSites.empty() && "Dangling pointers found in call sites map");
+    assert(Calls.empty() && "Dangling pointers found in call sites map");
 
     // Periodically do an explicit clear to remove tombstones when processing
     // large scc's.
     if ((FunctionNo & 15) == 15)
-      CallSites.clear();
+      Calls.clear();
   }
 
   LLVM_DEBUG(if (MadeChange) {
@@ -682,11 +681,28 @@ Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &OS,
   return new PrintCallGraphPass(Banner, OS);
 }
 
+static std::string getDescription(const CallGraphSCC &SCC) {
+  std::string Desc = "SCC (";
+  bool First = true;
+  for (CallGraphNode *CGN : SCC) {
+    if (First)
+      First = false;
+    else
+      Desc += ", ";
+    Function *F = CGN->getFunction();
+    if (F)
+      Desc += F->getName();
+    else
+      Desc += "<<null function>>";
+  }
+  Desc += ")";
+  return Desc;
+}
+
 bool CallGraphSCCPass::skipSCC(CallGraphSCC &SCC) const {
-  return !SCC.getCallGraph().getModule()
-              .getContext()
-              .getOptPassGate()
-              .shouldRunPass(this, SCC);
+  OptPassGate &Gate =
+      SCC.getCallGraph().getModule().getContext().getOptPassGate();
+  return Gate.isEnabled() && !Gate.shouldRunPass(this, getDescription(SCC));
 }
 
 char DummyCGSCCPass::ID = 0;
diff --git a/lib/Analysis/CallPrinter.cpp b/lib/Analysis/CallPrinter.cpp
index e7017e77652a..d24cbd104bf6 100644
--- a/lib/Analysis/CallPrinter.cpp
+++ b/lib/Analysis/CallPrinter.cpp
@@ -1,9 +1,8 @@
 //===- CallPrinter.cpp - DOT printer for call graph -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 669f4f2835fa..adaa83a6c443 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -1,9 +1,8 @@
 //===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -102,14 +101,14 @@ namespace {
 
         SmallVector<BasicBlock*, 32> Worklist;
         Worklist.append(succ_begin(BB), succ_end(BB));
-        return !isPotentiallyReachableFromMany(Worklist, BB, DT);
+        return !isPotentiallyReachableFromMany(Worklist, BB, nullptr, DT);
       }
 
       // If the value is defined in the same basic block as use and BeforeHere,
       // there is no need to explore the use if BeforeHere dominates use.
       // Check whether there is a path from I to BeforeHere.
       if (BeforeHere != I && DT->dominates(BeforeHere, I) &&
-          !isPotentiallyReachable(I, BeforeHere, DT))
+          !isPotentiallyReachable(I, BeforeHere, nullptr, DT))
         return true;
 
       return false;
@@ -331,14 +330,32 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
       AddUses(I);
       break;
     case Instruction::ICmp: {
-      // Don't count comparisons of a no-alias return value against null as
-      // captures. This allows us to ignore comparisons of malloc results
-      // with null, for example.
-      if (ConstantPointerNull *CPN =
-          dyn_cast<ConstantPointerNull>(I->getOperand(1)))
+      if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(1))) {
+        // Don't count comparisons of a no-alias return value against null as
+        // captures. This allows us to ignore comparisons of malloc results
+        // with null, for example.
         if (CPN->getType()->getAddressSpace() == 0)
           if (isNoAliasCall(V->stripPointerCasts()))
             break;
+        if (!I->getFunction()->nullPointerIsDefined()) {
+          auto *O = I->getOperand(0)->stripPointerCastsSameRepresentation();
+          // An inbounds GEP can either be a valid pointer (pointing into
+          // or to the end of an allocation), or be null in the default
+          // address space. So for an inbounds GEPs there is no way to let
+          // the pointer escape using clever GEP hacking because doing so
+          // would make the pointer point outside of the allocated object
+          // and thus make the GEP result a poison value.
+          if (auto *GEP = dyn_cast<GetElementPtrInst>(O))
+            if (GEP->isInBounds())
+              break;
+          // Comparing a dereferenceable_or_null argument against null
+          // cannot lead to pointer escapes, because if it is not null it
+          // must be a valid (in-bounds) pointer.
+          bool CanBeNull;
+          if (O->getPointerDereferenceableBytes(I->getModule()->getDataLayout(), CanBeNull))
+            break;
+        }
+      }
       // Comparison against value stored in global variable. Given the pointer
       // does not escape, its value cannot be guessed and stored separately in a
       // global variable.
diff --git a/lib/Analysis/CmpInstAnalysis.cpp b/lib/Analysis/CmpInstAnalysis.cpp
index 27071babec5c..a5757be2c4f4 100644
--- a/lib/Analysis/CmpInstAnalysis.cpp
+++ b/lib/Analysis/CmpInstAnalysis.cpp
@@ -1,9 +1,8 @@
 //===- CmpInstAnalysis.cpp - Utils to help fold compares ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
index 46cc87d2b178..627d955c865f 100644
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -1,9 +1,8 @@
 //===- CodeMetrics.cpp - Code cost measurements ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,7 +15,6 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Support/Debug.h"
@@ -126,14 +124,12 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
       continue;
 
     // Special handling for calls.
-    if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
-      ImmutableCallSite CS(&I);
-
-      if (const Function *F = CS.getCalledFunction()) {
+    if (const auto *Call = dyn_cast<CallBase>(&I)) {
+      if (const Function *F = Call->getCalledFunction()) {
         // If a function is both internal and has a single use, then it is
         // extremely likely to get inlined in the future (it was probably
         // exposed by an interleaved devirtualization pass).
-        if (!CS.isNoInline() && F->hasInternalLinkage() && F->hasOneUse())
+        if (!Call->isNoInline() && F->hasInternalLinkage() && F->hasOneUse())
           ++NumInlineCandidates;
 
         // If this call is to function itself, then the function is recursive.
@@ -148,7 +144,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
       } else {
         // We don't want inline asm to count as a call - that would prevent loop
         // unrolling. The argument setup cost is still real, though.
-        if (!isa<InlineAsm>(CS.getCalledValue()))
+        if (!Call->isInlineAsm())
           ++NumCalls;
       }
     }
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 5da29d6d2372..20231ca78b45 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -1,9 +1,8 @@
 //===-- ConstantFolding.cpp - Fold instructions into constants ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -26,6 +25,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/Config/config.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
@@ -516,7 +516,7 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
       MapTy = Type::getInt64Ty(C->getContext());
     else if (LoadTy->isVectorTy()) {
       MapTy = PointerType::getIntNTy(C->getContext(),
-                                     DL.getTypeAllocSizeInBits(LoadTy));
+                                     DL.getTypeSizeInBits(LoadTy));
     } else
       return nullptr;
 
@@ -1000,7 +1000,9 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
                                        const TargetLibraryInfo *TLI) {
   Type *DestTy = InstOrCE->getType();
 
-  // Handle easy binops first.
+  if (Instruction::isUnaryOp(Opcode))
+    return ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL);
+
   if (Instruction::isBinaryOp(Opcode))
     return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL);
 
@@ -1025,15 +1027,18 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
   case Instruction::FCmp: llvm_unreachable("Invalid for compares");
   case Instruction::Call:
     if (auto *F = dyn_cast<Function>(Ops.back())) {
-      ImmutableCallSite CS(cast<CallInst>(InstOrCE));
-      if (canConstantFoldCallTo(CS, F))
-        return ConstantFoldCall(CS, F, Ops.slice(0, Ops.size() - 1), TLI);
+      const auto *Call = cast<CallBase>(InstOrCE);
+      if (canConstantFoldCallTo(Call, F))
+        return ConstantFoldCall(Call, F, Ops.slice(0, Ops.size() - 1), TLI);
     }
     return nullptr;
   case Instruction::Select:
     return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
   case Instruction::ExtractElement:
     return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
+  case Instruction::ExtractValue:
+    return ConstantExpr::getExtractValue(
+        Ops[0], dyn_cast<ExtractValueInst>(InstOrCE)->getIndices());
   case Instruction::InsertElement:
     return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
   case Instruction::ShuffleVector:
@@ -1263,6 +1268,13 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
   return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
 }
 
+Constant *llvm::ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op,
+                                           const DataLayout &DL) {
+  assert(Instruction::isUnaryOp(Opcode));
+
+  return ConstantExpr::get(Opcode, Op);
+}
+
 Constant *llvm::ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS,
                                              Constant *RHS,
                                              const DataLayout &DL) {
@@ -1367,8 +1379,8 @@ llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
 //  Constant Folding for Calls
 //
 
-bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
-  if (CS.isNoBuiltin() || CS.isStrictFP())
+bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
+  if (Call->isNoBuiltin() || Call->isStrictFP())
     return false;
   switch (F->getIntrinsicID()) {
   case Intrinsic::fabs:
@@ -1414,6 +1426,8 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
   case Intrinsic::uadd_sat:
   case Intrinsic::ssub_sat:
   case Intrinsic::usub_sat:
+  case Intrinsic::smul_fix:
+  case Intrinsic::smul_fix_sat:
   case Intrinsic::convert_from_fp16:
   case Intrinsic::convert_to_fp16:
   case Intrinsic::bitreverse:
@@ -1518,14 +1532,12 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
 namespace {
 
 Constant *GetConstantFoldFPValue(double V, Type *Ty) {
-  if (Ty->isHalfTy()) {
+  if (Ty->isHalfTy() || Ty->isFloatTy()) {
     APFloat APF(V);
     bool unused;
-    APF.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &unused);
+    APF.convert(Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &unused);
     return ConstantFP::get(Ty->getContext(), APF);
   }
-  if (Ty->isFloatTy())
-    return ConstantFP::get(Ty->getContext(), APFloat((float)V));
   if (Ty->isDoubleTy())
     return ConstantFP::get(Ty->getContext(), APFloat(V));
   llvm_unreachable("Can only constant fold half/float/double");
@@ -1641,522 +1653,538 @@ static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
   return false;
 }
 
-Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
-                                 ArrayRef<Constant *> Operands,
-                                 const TargetLibraryInfo *TLI,
-                                 ImmutableCallSite CS) {
-  if (Operands.size() == 1) {
-    if (IntrinsicID == Intrinsic::is_constant) {
-      // We know we have a "Constant" argument. But we want to only
-      // return true for manifest constants, not those that depend on
-      // constants with unknowable values, e.g. GlobalValue or BlockAddress.
-      if (isManifestConstant(Operands[0]))
-        return ConstantInt::getTrue(Ty->getContext());
-      return nullptr;
-    }
-    if (isa<UndefValue>(Operands[0])) {
-      // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.
-      // ctpop() is between 0 and bitwidth, pick 0 for undef.
-      if (IntrinsicID == Intrinsic::cos ||
-          IntrinsicID == Intrinsic::ctpop)
-        return Constant::getNullValue(Ty);
-      if (IntrinsicID == Intrinsic::bswap ||
-          IntrinsicID == Intrinsic::bitreverse ||
-          IntrinsicID == Intrinsic::launder_invariant_group ||
-          IntrinsicID == Intrinsic::strip_invariant_group)
-        return Operands[0];
-    }
+static Constant *ConstantFoldScalarCall1(StringRef Name,
+                                         Intrinsic::ID IntrinsicID,
+                                         Type *Ty,
+                                         ArrayRef<Constant *> Operands,
+                                         const TargetLibraryInfo *TLI,
+                                         const CallBase *Call) {
+  assert(Operands.size() == 1 && "Wrong number of operands.");
+
+  if (IntrinsicID == Intrinsic::is_constant) {
+    // We know we have a "Constant" argument. But we want to only
+    // return true for manifest constants, not those that depend on
+    // constants with unknowable values, e.g. GlobalValue or BlockAddress.
+    if (isManifestConstant(Operands[0]))
+      return ConstantInt::getTrue(Ty->getContext());
+    return nullptr;
+  }
+  if (isa<UndefValue>(Operands[0])) {
+    // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.
+    // ctpop() is between 0 and bitwidth, pick 0 for undef.
+    if (IntrinsicID == Intrinsic::cos ||
+        IntrinsicID == Intrinsic::ctpop)
+      return Constant::getNullValue(Ty);
+    if (IntrinsicID == Intrinsic::bswap ||
+        IntrinsicID == Intrinsic::bitreverse ||
+        IntrinsicID == Intrinsic::launder_invariant_group ||
+        IntrinsicID == Intrinsic::strip_invariant_group)
+      return Operands[0];
+  }
 
-    if (isa<ConstantPointerNull>(Operands[0])) {
-      // launder(null) == null == strip(null) iff in addrspace 0
-      if (IntrinsicID == Intrinsic::launder_invariant_group ||
-          IntrinsicID == Intrinsic::strip_invariant_group) {
-        // If instruction is not yet put in a basic block (e.g. when cloning
-        // a function during inlining), CS caller may not be available.
-        // So check CS's BB first before querying CS.getCaller.
-        const Function *Caller = CS.getParent() ? CS.getCaller() : nullptr;
-        if (Caller &&
-            !NullPointerIsDefined(
-                Caller, Operands[0]->getType()->getPointerAddressSpace())) {
-          return Operands[0];
-        }
-        return nullptr;
+  if (isa<ConstantPointerNull>(Operands[0])) {
+    // launder(null) == null == strip(null) iff in addrspace 0
+    if (IntrinsicID == Intrinsic::launder_invariant_group ||
+        IntrinsicID == Intrinsic::strip_invariant_group) {
+      // If instruction is not yet put in a basic block (e.g. when cloning
+      // a function during inlining), Call's caller may not be available.
+      // So check Call's BB first before querying Call->getCaller.
+      const Function *Caller =
+          Call->getParent() ? Call->getCaller() : nullptr;
+      if (Caller &&
+          !NullPointerIsDefined(
+              Caller, Operands[0]->getType()->getPointerAddressSpace())) {
+        return Operands[0];
       }
+      return nullptr;
     }
+  }
 
-    if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) {
-      if (IntrinsicID == Intrinsic::convert_to_fp16) {
-        APFloat Val(Op->getValueAPF());
-
-        bool lost = false;
-        Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost);
+  if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) {
+    if (IntrinsicID == Intrinsic::convert_to_fp16) {
+      APFloat Val(Op->getValueAPF());
 
-        return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt());
-      }
+      bool lost = false;
+      Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost);
 
-      if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
-        return nullptr;
+      return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt());
+    }
 
-      if (IntrinsicID == Intrinsic::round) {
-        APFloat V = Op->getValueAPF();
-        V.roundToIntegral(APFloat::rmNearestTiesToAway);
-        return ConstantFP::get(Ty->getContext(), V);
-      }
+    if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
+      return nullptr;
 
-      if (IntrinsicID == Intrinsic::floor) {
-        APFloat V = Op->getValueAPF();
-        V.roundToIntegral(APFloat::rmTowardNegative);
-        return ConstantFP::get(Ty->getContext(), V);
-      }
+    if (IntrinsicID == Intrinsic::round) {
+      APFloat V = Op->getValueAPF();
+      V.roundToIntegral(APFloat::rmNearestTiesToAway);
+      return ConstantFP::get(Ty->getContext(), V);
+    }
 
-      if (IntrinsicID == Intrinsic::ceil) {
-        APFloat V = Op->getValueAPF();
-        V.roundToIntegral(APFloat::rmTowardPositive);
-        return ConstantFP::get(Ty->getContext(), V);
-      }
+    if (IntrinsicID == Intrinsic::floor) {
+      APFloat V = Op->getValueAPF();
+      V.roundToIntegral(APFloat::rmTowardNegative);
+      return ConstantFP::get(Ty->getContext(), V);
+    }
 
-      if (IntrinsicID == Intrinsic::trunc) {
-        APFloat V = Op->getValueAPF();
-        V.roundToIntegral(APFloat::rmTowardZero);
-        return ConstantFP::get(Ty->getContext(), V);
-      }
+    if (IntrinsicID == Intrinsic::ceil) {
+      APFloat V = Op->getValueAPF();
+      V.roundToIntegral(APFloat::rmTowardPositive);
+      return ConstantFP::get(Ty->getContext(), V);
+    }
 
-      if (IntrinsicID == Intrinsic::rint) {
-        APFloat V = Op->getValueAPF();
-        V.roundToIntegral(APFloat::rmNearestTiesToEven);
-        return ConstantFP::get(Ty->getContext(), V);
-      }
+    if (IntrinsicID == Intrinsic::trunc) {
+      APFloat V = Op->getValueAPF();
+      V.roundToIntegral(APFloat::rmTowardZero);
+      return ConstantFP::get(Ty->getContext(), V);
+    }
 
-      if (IntrinsicID == Intrinsic::nearbyint) {
-        APFloat V = Op->getValueAPF();
-        V.roundToIntegral(APFloat::rmNearestTiesToEven);
-        return ConstantFP::get(Ty->getContext(), V);
-      }
+    if (IntrinsicID == Intrinsic::rint) {
+      APFloat V = Op->getValueAPF();
+      V.roundToIntegral(APFloat::rmNearestTiesToEven);
+      return ConstantFP::get(Ty->getContext(), V);
+    }
 
-      /// We only fold functions with finite arguments. Folding NaN and inf is
-      /// likely to be aborted with an exception anyway, and some host libms
-      /// have known errors raising exceptions.
-      if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity())
-        return nullptr;
+    if (IntrinsicID == Intrinsic::nearbyint) {
+      APFloat V = Op->getValueAPF();
+      V.roundToIntegral(APFloat::rmNearestTiesToEven);
+      return ConstantFP::get(Ty->getContext(), V);
+    }
 
-      /// Currently APFloat versions of these functions do not exist, so we use
-      /// the host native double versions.  Float versions are not called
-      /// directly but for all these it is true (float)(f((double)arg)) ==
-      /// f(arg).  Long double not supported yet.
-      double V = getValueAsDouble(Op);
+    /// We only fold functions with finite arguments. Folding NaN and inf is
+    /// likely to be aborted with an exception anyway, and some host libms
+    /// have known errors raising exceptions.
+    if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity())
+      return nullptr;
 
-      switch (IntrinsicID) {
-        default: break;
-        case Intrinsic::fabs:
-          return ConstantFoldFP(fabs, V, Ty);
-        case Intrinsic::log2:
-          return ConstantFoldFP(Log2, V, Ty);
-        case Intrinsic::log:
-          return ConstantFoldFP(log, V, Ty);
-        case Intrinsic::log10:
-          return ConstantFoldFP(log10, V, Ty);
-        case Intrinsic::exp:
-          return ConstantFoldFP(exp, V, Ty);
-        case Intrinsic::exp2:
-          return ConstantFoldFP(exp2, V, Ty);
-        case Intrinsic::sin:
-          return ConstantFoldFP(sin, V, Ty);
-        case Intrinsic::cos:
-          return ConstantFoldFP(cos, V, Ty);
-        case Intrinsic::sqrt:
-          return ConstantFoldFP(sqrt, V, Ty);
-      }
+    /// Currently APFloat versions of these functions do not exist, so we use
+    /// the host native double versions.  Float versions are not called
+    /// directly but for all these it is true (float)(f((double)arg)) ==
+    /// f(arg).  Long double not supported yet.
+    double V = getValueAsDouble(Op);
 
-      if (!TLI)
-        return nullptr;
+    switch (IntrinsicID) {
+      default: break;
+      case Intrinsic::fabs:
+        return ConstantFoldFP(fabs, V, Ty);
+      case Intrinsic::log2:
+        return ConstantFoldFP(Log2, V, Ty);
+      case Intrinsic::log:
+        return ConstantFoldFP(log, V, Ty);
+      case Intrinsic::log10:
+        return ConstantFoldFP(log10, V, Ty);
+      case Intrinsic::exp:
+        return ConstantFoldFP(exp, V, Ty);
+      case Intrinsic::exp2:
+        return ConstantFoldFP(exp2, V, Ty);
+      case Intrinsic::sin:
+        return ConstantFoldFP(sin, V, Ty);
+      case Intrinsic::cos:
+        return ConstantFoldFP(cos, V, Ty);
+      case Intrinsic::sqrt:
+        return ConstantFoldFP(sqrt, V, Ty);
+    }
 
-      char NameKeyChar = Name[0];
-      if (Name[0] == '_' && Name.size() > 2 && Name[1] == '_')
-        NameKeyChar = Name[2];
-
-      switch (NameKeyChar) {
-      case 'a':
-        if ((Name == "acos" && TLI->has(LibFunc_acos)) ||
-            (Name == "acosf" && TLI->has(LibFunc_acosf)) ||
-            (Name == "__acos_finite" && TLI->has(LibFunc_acos_finite)) ||
-            (Name == "__acosf_finite" && TLI->has(LibFunc_acosf_finite)))
-          return ConstantFoldFP(acos, V, Ty);
-        else if ((Name == "asin" && TLI->has(LibFunc_asin)) ||
-                 (Name == "asinf" && TLI->has(LibFunc_asinf)) ||
-                 (Name == "__asin_finite" && TLI->has(LibFunc_asin_finite)) ||
-                 (Name == "__asinf_finite" && TLI->has(LibFunc_asinf_finite)))
-          return ConstantFoldFP(asin, V, Ty);
-        else if ((Name == "atan" && TLI->has(LibFunc_atan)) ||
-                 (Name == "atanf" && TLI->has(LibFunc_atanf)))
-          return ConstantFoldFP(atan, V, Ty);
-        break;
-      case 'c':
-        if ((Name == "ceil" && TLI->has(LibFunc_ceil)) ||
-            (Name == "ceilf" && TLI->has(LibFunc_ceilf)))
-          return ConstantFoldFP(ceil, V, Ty);
-        else if ((Name == "cos" && TLI->has(LibFunc_cos)) ||
-                 (Name == "cosf" && TLI->has(LibFunc_cosf)))
-          return ConstantFoldFP(cos, V, Ty);
-        else if ((Name == "cosh" && TLI->has(LibFunc_cosh)) ||
-                 (Name == "coshf" && TLI->has(LibFunc_coshf)) ||
-                 (Name == "__cosh_finite" && TLI->has(LibFunc_cosh_finite)) ||
-                 (Name == "__coshf_finite" && TLI->has(LibFunc_coshf_finite)))
-          return ConstantFoldFP(cosh, V, Ty);
-        break;
-      case 'e':
-        if ((Name == "exp" && TLI->has(LibFunc_exp)) ||
-            (Name == "expf" && TLI->has(LibFunc_expf)) ||
-            (Name == "__exp_finite" && TLI->has(LibFunc_exp_finite)) ||
-            (Name == "__expf_finite" && TLI->has(LibFunc_expf_finite)))
-          return ConstantFoldFP(exp, V, Ty);
-        if ((Name == "exp2" && TLI->has(LibFunc_exp2)) ||
-            (Name == "exp2f" && TLI->has(LibFunc_exp2f)) ||
-            (Name == "__exp2_finite" && TLI->has(LibFunc_exp2_finite)) ||
-            (Name == "__exp2f_finite" && TLI->has(LibFunc_exp2f_finite)))
-          // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
-          // C99 library.
-          return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
-        break;
-      case 'f':
-        if ((Name == "fabs" && TLI->has(LibFunc_fabs)) ||
-            (Name == "fabsf" && TLI->has(LibFunc_fabsf)))
-          return ConstantFoldFP(fabs, V, Ty);
-        else if ((Name == "floor" && TLI->has(LibFunc_floor)) ||
-                 (Name == "floorf" && TLI->has(LibFunc_floorf)))
-          return ConstantFoldFP(floor, V, Ty);
-        break;
-      case 'l':
-        if ((Name == "log" && V > 0 && TLI->has(LibFunc_log)) ||
-            (Name == "logf" && V > 0 && TLI->has(LibFunc_logf)) ||
-            (Name == "__log_finite" && V > 0 &&
-              TLI->has(LibFunc_log_finite)) ||
-            (Name == "__logf_finite" && V > 0 &&
-              TLI->has(LibFunc_logf_finite)))
-          return ConstantFoldFP(log, V, Ty);
-        else if ((Name == "log10" && V > 0 && TLI->has(LibFunc_log10)) ||
-                 (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f)) ||
-                 (Name == "__log10_finite" && V > 0 &&
-                   TLI->has(LibFunc_log10_finite)) ||
-                 (Name == "__log10f_finite" && V > 0 &&
-                   TLI->has(LibFunc_log10f_finite)))
-          return ConstantFoldFP(log10, V, Ty);
-        break;
-      case 'r':
-        if ((Name == "round" && TLI->has(LibFunc_round)) ||
-            (Name == "roundf" && TLI->has(LibFunc_roundf)))
-          return ConstantFoldFP(round, V, Ty);
-        break;
-      case 's':
-        if ((Name == "sin" && TLI->has(LibFunc_sin)) ||
-            (Name == "sinf" && TLI->has(LibFunc_sinf)))
-          return ConstantFoldFP(sin, V, Ty);
-        else if ((Name == "sinh" && TLI->has(LibFunc_sinh)) ||
-                 (Name == "sinhf" && TLI->has(LibFunc_sinhf)) ||
-                 (Name == "__sinh_finite" && TLI->has(LibFunc_sinh_finite)) ||
-                 (Name == "__sinhf_finite" && TLI->has(LibFunc_sinhf_finite)))
-          return ConstantFoldFP(sinh, V, Ty);
-        else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc_sqrt)) ||
-                 (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc_sqrtf)))
-          return ConstantFoldFP(sqrt, V, Ty);
-        break;
-      case 't':
-        if ((Name == "tan" && TLI->has(LibFunc_tan)) ||
-            (Name == "tanf" && TLI->has(LibFunc_tanf)))
-          return ConstantFoldFP(tan, V, Ty);
-        else if ((Name == "tanh" && TLI->has(LibFunc_tanh)) ||
-                 (Name == "tanhf" && TLI->has(LibFunc_tanhf)))
-          return ConstantFoldFP(tanh, V, Ty);
-        break;
-      default:
-        break;
-      }
+    if (!TLI)
       return nullptr;
-    }
 
-    if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
-      switch (IntrinsicID) {
-      case Intrinsic::bswap:
-        return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap());
-      case Intrinsic::ctpop:
-        return ConstantInt::get(Ty, Op->getValue().countPopulation());
-      case Intrinsic::bitreverse:
-        return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits());
-      case Intrinsic::convert_from_fp16: {
-        APFloat Val(APFloat::IEEEhalf(), Op->getValue());
-
-        bool lost = false;
-        APFloat::opStatus status = Val.convert(
-            Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost);
-
-        // Conversion is always precise.
-        (void)status;
-        assert(status == APFloat::opOK && !lost &&
-               "Precision lost during fp16 constfolding");
-
-        return ConstantFP::get(Ty->getContext(), Val);
-      }
-      default:
-        return nullptr;
-      }
-    }
+    char NameKeyChar = Name[0];
+    if (Name[0] == '_' && Name.size() > 2 && Name[1] == '_')
+      NameKeyChar = Name[2];
 
-    // Support ConstantVector in case we have an Undef in the top.
-    if (isa<ConstantVector>(Operands[0]) ||
-        isa<ConstantDataVector>(Operands[0])) {
-      auto *Op = cast<Constant>(Operands[0]);
-      switch (IntrinsicID) {
-      default: break;
-      case Intrinsic::x86_sse_cvtss2si:
-      case Intrinsic::x86_sse_cvtss2si64:
-      case Intrinsic::x86_sse2_cvtsd2si:
-      case Intrinsic::x86_sse2_cvtsd2si64:
-        if (ConstantFP *FPOp =
-                dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
-          return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
-                                             /*roundTowardZero=*/false, Ty,
-                                             /*IsSigned*/true);
-        break;
-      case Intrinsic::x86_sse_cvttss2si:
-      case Intrinsic::x86_sse_cvttss2si64:
-      case Intrinsic::x86_sse2_cvttsd2si:
-      case Intrinsic::x86_sse2_cvttsd2si64:
-        if (ConstantFP *FPOp =
-                dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
-          return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
-                                             /*roundTowardZero=*/true, Ty,
-                                             /*IsSigned*/true);
-        break;
-      }
+    switch (NameKeyChar) {
+    case 'a':
+      if ((Name == "acos" && TLI->has(LibFunc_acos)) ||
+          (Name == "acosf" && TLI->has(LibFunc_acosf)) ||
+          (Name == "__acos_finite" && TLI->has(LibFunc_acos_finite)) ||
+          (Name == "__acosf_finite" && TLI->has(LibFunc_acosf_finite)))
+        return ConstantFoldFP(acos, V, Ty);
+      else if ((Name == "asin" && TLI->has(LibFunc_asin)) ||
+               (Name == "asinf" && TLI->has(LibFunc_asinf)) ||
+               (Name == "__asin_finite" && TLI->has(LibFunc_asin_finite)) ||
+               (Name == "__asinf_finite" && TLI->has(LibFunc_asinf_finite)))
+        return ConstantFoldFP(asin, V, Ty);
+      else if ((Name == "atan" && TLI->has(LibFunc_atan)) ||
+               (Name == "atanf" && TLI->has(LibFunc_atanf)))
+        return ConstantFoldFP(atan, V, Ty);
+      break;
+    case 'c':
+      if ((Name == "ceil" && TLI->has(LibFunc_ceil)) ||
+          (Name == "ceilf" && TLI->has(LibFunc_ceilf)))
+        return ConstantFoldFP(ceil, V, Ty);
+      else if ((Name == "cos" && TLI->has(LibFunc_cos)) ||
+               (Name == "cosf" && TLI->has(LibFunc_cosf)))
+        return ConstantFoldFP(cos, V, Ty);
+      else if ((Name == "cosh" && TLI->has(LibFunc_cosh)) ||
+               (Name == "coshf" && TLI->has(LibFunc_coshf)) ||
+               (Name == "__cosh_finite" && TLI->has(LibFunc_cosh_finite)) ||
+               (Name == "__coshf_finite" && TLI->has(LibFunc_coshf_finite)))
+        return ConstantFoldFP(cosh, V, Ty);
+      break;
+    case 'e':
+      if ((Name == "exp" && TLI->has(LibFunc_exp)) ||
+          (Name == "expf" && TLI->has(LibFunc_expf)) ||
+          (Name == "__exp_finite" && TLI->has(LibFunc_exp_finite)) ||
+          (Name == "__expf_finite" && TLI->has(LibFunc_expf_finite)))
+        return ConstantFoldFP(exp, V, Ty);
+      if ((Name == "exp2" && TLI->has(LibFunc_exp2)) ||
+          (Name == "exp2f" && TLI->has(LibFunc_exp2f)) ||
+          (Name == "__exp2_finite" && TLI->has(LibFunc_exp2_finite)) ||
+          (Name == "__exp2f_finite" && TLI->has(LibFunc_exp2f_finite)))
+        // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
+        // C99 library.
+        return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
+      break;
+    case 'f':
+      if ((Name == "fabs" && TLI->has(LibFunc_fabs)) ||
+          (Name == "fabsf" && TLI->has(LibFunc_fabsf)))
+        return ConstantFoldFP(fabs, V, Ty);
+      else if ((Name == "floor" && TLI->has(LibFunc_floor)) ||
+               (Name == "floorf" && TLI->has(LibFunc_floorf)))
+        return ConstantFoldFP(floor, V, Ty);
+      break;
+    case 'l':
+      if ((Name == "log" && V > 0 && TLI->has(LibFunc_log)) ||
+          (Name == "logf" && V > 0 && TLI->has(LibFunc_logf)) ||
+          (Name == "__log_finite" && V > 0 &&
+            TLI->has(LibFunc_log_finite)) ||
+          (Name == "__logf_finite" && V > 0 &&
+            TLI->has(LibFunc_logf_finite)))
+        return ConstantFoldFP(log, V, Ty);
+      else if ((Name == "log10" && V > 0 && TLI->has(LibFunc_log10)) ||
+               (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f)) ||
+               (Name == "__log10_finite" && V > 0 &&
+                 TLI->has(LibFunc_log10_finite)) ||
+               (Name == "__log10f_finite" && V > 0 &&
+                 TLI->has(LibFunc_log10f_finite)))
+        return ConstantFoldFP(log10, V, Ty);
+      break;
+    case 'r':
+      if ((Name == "round" && TLI->has(LibFunc_round)) ||
+          (Name == "roundf" && TLI->has(LibFunc_roundf)))
+        return ConstantFoldFP(round, V, Ty);
+      break;
+    case 's':
+      if ((Name == "sin" && TLI->has(LibFunc_sin)) ||
+          (Name == "sinf" && TLI->has(LibFunc_sinf)))
+        return ConstantFoldFP(sin, V, Ty);
+      else if ((Name == "sinh" && TLI->has(LibFunc_sinh)) ||
+               (Name == "sinhf" && TLI->has(LibFunc_sinhf)) ||
+               (Name == "__sinh_finite" && TLI->has(LibFunc_sinh_finite)) ||
+               (Name == "__sinhf_finite" && TLI->has(LibFunc_sinhf_finite)))
+        return ConstantFoldFP(sinh, V, Ty);
+      else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc_sqrt)) ||
+               (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc_sqrtf)))
+        return ConstantFoldFP(sqrt, V, Ty);
+      break;
+    case 't':
+      if ((Name == "tan" && TLI->has(LibFunc_tan)) ||
+          (Name == "tanf" && TLI->has(LibFunc_tanf)))
+        return ConstantFoldFP(tan, V, Ty);
+      else if ((Name == "tanh" && TLI->has(LibFunc_tanh)) ||
+               (Name == "tanhf" && TLI->has(LibFunc_tanhf)))
+        return ConstantFoldFP(tanh, V, Ty);
+      break;
+    default:
+      break;
     }
-
     return nullptr;
   }
 
-  if (Operands.size() == 2) {
-    if (auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
-      if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
-        return nullptr;
-      double Op1V = getValueAsDouble(Op1);
-
-      if (auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
-        if (Op2->getType() != Op1->getType())
-          return nullptr;
+  if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
+    switch (IntrinsicID) {
+    case Intrinsic::bswap:
+      return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap());
+    case Intrinsic::ctpop:
+      return ConstantInt::get(Ty, Op->getValue().countPopulation());
+    case Intrinsic::bitreverse:
+      return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits());
+    case Intrinsic::convert_from_fp16: {
+      APFloat Val(APFloat::IEEEhalf(), Op->getValue());
+
+      bool lost = false;
+      APFloat::opStatus status = Val.convert(
+          Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost);
+
+      // Conversion is always precise.
+      (void)status;
+      assert(status == APFloat::opOK && !lost &&
+             "Precision lost during fp16 constfolding");
+
+      return ConstantFP::get(Ty->getContext(), Val);
+    }
+    default:
+      return nullptr;
+    }
+  }
 
-        double Op2V = getValueAsDouble(Op2);
-        if (IntrinsicID == Intrinsic::pow) {
-          return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
-        }
-        if (IntrinsicID == Intrinsic::copysign) {
-          APFloat V1 = Op1->getValueAPF();
-          const APFloat &V2 = Op2->getValueAPF();
-          V1.copySign(V2);
-          return ConstantFP::get(Ty->getContext(), V1);
-        }
+  // Support ConstantVector in case we have an Undef in the top.
+  if (isa<ConstantVector>(Operands[0]) ||
+      isa<ConstantDataVector>(Operands[0])) {
+    auto *Op = cast<Constant>(Operands[0]);
+    switch (IntrinsicID) {
+    default: break;
+    case Intrinsic::x86_sse_cvtss2si:
+    case Intrinsic::x86_sse_cvtss2si64:
+    case Intrinsic::x86_sse2_cvtsd2si:
+    case Intrinsic::x86_sse2_cvtsd2si64:
+      if (ConstantFP *FPOp =
+              dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+        return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+                                           /*roundTowardZero=*/false, Ty,
+                                           /*IsSigned*/true);
+      break;
+    case Intrinsic::x86_sse_cvttss2si:
+    case Intrinsic::x86_sse_cvttss2si64:
+    case Intrinsic::x86_sse2_cvttsd2si:
+    case Intrinsic::x86_sse2_cvttsd2si64:
+      if (ConstantFP *FPOp =
+              dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+        return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+                                           /*roundTowardZero=*/true, Ty,
+                                           /*IsSigned*/true);
+      break;
+    }
+  }
 
-        if (IntrinsicID == Intrinsic::minnum) {
-          const APFloat &C1 = Op1->getValueAPF();
-          const APFloat &C2 = Op2->getValueAPF();
-          return ConstantFP::get(Ty->getContext(), minnum(C1, C2));
-        }
+  return nullptr;
+}
 
-        if (IntrinsicID == Intrinsic::maxnum) {
-          const APFloat &C1 = Op1->getValueAPF();
-          const APFloat &C2 = Op2->getValueAPF();
-          return ConstantFP::get(Ty->getContext(), maxnum(C1, C2));
-        }
+static Constant *ConstantFoldScalarCall2(StringRef Name,
+                                         Intrinsic::ID IntrinsicID,
+                                         Type *Ty,
+                                         ArrayRef<Constant *> Operands,
+                                         const TargetLibraryInfo *TLI,
+                                         const CallBase *Call) {
+  assert(Operands.size() == 2 && "Wrong number of operands.");
 
-        if (IntrinsicID == Intrinsic::minimum) {
-          const APFloat &C1 = Op1->getValueAPF();
-          const APFloat &C2 = Op2->getValueAPF();
-          return ConstantFP::get(Ty->getContext(), minimum(C1, C2));
-        }
+  if (auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
+    if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
+      return nullptr;
+    double Op1V = getValueAsDouble(Op1);
 
-        if (IntrinsicID == Intrinsic::maximum) {
-          const APFloat &C1 = Op1->getValueAPF();
-          const APFloat &C2 = Op2->getValueAPF();
-          return ConstantFP::get(Ty->getContext(), maximum(C1, C2));
-        }
+    if (auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
+      if (Op2->getType() != Op1->getType())
+        return nullptr;
 
-        if (!TLI)
-          return nullptr;
-        if ((Name == "pow" && TLI->has(LibFunc_pow)) ||
-            (Name == "powf" && TLI->has(LibFunc_powf)) ||
-            (Name == "__pow_finite" && TLI->has(LibFunc_pow_finite)) ||
-            (Name == "__powf_finite" && TLI->has(LibFunc_powf_finite)))
-          return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
-        if ((Name == "fmod" && TLI->has(LibFunc_fmod)) ||
-            (Name == "fmodf" && TLI->has(LibFunc_fmodf)))
-          return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
-        if ((Name == "atan2" && TLI->has(LibFunc_atan2)) ||
-            (Name == "atan2f" && TLI->has(LibFunc_atan2f)) ||
-            (Name == "__atan2_finite" && TLI->has(LibFunc_atan2_finite)) ||
-            (Name == "__atan2f_finite" && TLI->has(LibFunc_atan2f_finite)))
-          return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
-      } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
-        if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
-          return ConstantFP::get(Ty->getContext(),
-                                 APFloat((float)std::pow((float)Op1V,
-                                                 (int)Op2C->getZExtValue())));
-        if (IntrinsicID == Intrinsic::powi && Ty->isFloatTy())
-          return ConstantFP::get(Ty->getContext(),
-                                 APFloat((float)std::pow((float)Op1V,
-                                                 (int)Op2C->getZExtValue())));
-        if (IntrinsicID == Intrinsic::powi && Ty->isDoubleTy())
-          return ConstantFP::get(Ty->getContext(),
-                                 APFloat((double)std::pow((double)Op1V,
-                                                   (int)Op2C->getZExtValue())));
+      double Op2V = getValueAsDouble(Op2);
+      if (IntrinsicID == Intrinsic::pow) {
+        return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+      }
+      if (IntrinsicID == Intrinsic::copysign) {
+        APFloat V1 = Op1->getValueAPF();
+        const APFloat &V2 = Op2->getValueAPF();
+        V1.copySign(V2);
+        return ConstantFP::get(Ty->getContext(), V1);
       }
-      return nullptr;
-    }
 
-    if (Operands[0]->getType()->isIntegerTy() &&
-        Operands[1]->getType()->isIntegerTy()) {
-      const APInt *C0, *C1;
-      if (!getConstIntOrUndef(Operands[0], C0) ||
-          !getConstIntOrUndef(Operands[1], C1))
-        return nullptr;
+      if (IntrinsicID == Intrinsic::minnum) {
+        const APFloat &C1 = Op1->getValueAPF();
+        const APFloat &C2 = Op2->getValueAPF();
+        return ConstantFP::get(Ty->getContext(), minnum(C1, C2));
+      }
 
-      switch (IntrinsicID) {
-      default: break;
-      case Intrinsic::smul_with_overflow:
-      case Intrinsic::umul_with_overflow:
-        // Even if both operands are undef, we cannot fold muls to undef
-        // in the general case. For example, on i2 there are no inputs
-        // that would produce { i2 -1, i1 true } as the result.
-        if (!C0 || !C1)
-          return Constant::getNullValue(Ty);
-        LLVM_FALLTHROUGH;
-      case Intrinsic::sadd_with_overflow:
-      case Intrinsic::uadd_with_overflow:
-      case Intrinsic::ssub_with_overflow:
-      case Intrinsic::usub_with_overflow: {
-        if (!C0 || !C1)
-          return UndefValue::get(Ty);
+      if (IntrinsicID == Intrinsic::maxnum) {
+        const APFloat &C1 = Op1->getValueAPF();
+        const APFloat &C2 = Op2->getValueAPF();
+        return ConstantFP::get(Ty->getContext(), maxnum(C1, C2));
+      }
 
-        APInt Res;
-        bool Overflow;
-        switch (IntrinsicID) {
-        default: llvm_unreachable("Invalid case");
-        case Intrinsic::sadd_with_overflow:
-          Res = C0->sadd_ov(*C1, Overflow);
-          break;
-        case Intrinsic::uadd_with_overflow:
-          Res = C0->uadd_ov(*C1, Overflow);
-          break;
-        case Intrinsic::ssub_with_overflow:
-          Res = C0->ssub_ov(*C1, Overflow);
-          break;
-        case Intrinsic::usub_with_overflow:
-          Res = C0->usub_ov(*C1, Overflow);
-          break;
-        case Intrinsic::smul_with_overflow:
-          Res = C0->smul_ov(*C1, Overflow);
-          break;
-        case Intrinsic::umul_with_overflow:
-          Res = C0->umul_ov(*C1, Overflow);
-          break;
-        }
-        Constant *Ops[] = {
-          ConstantInt::get(Ty->getContext(), Res),
-          ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
-        };
-        return ConstantStruct::get(cast<StructType>(Ty), Ops);
+      if (IntrinsicID == Intrinsic::minimum) {
+        const APFloat &C1 = Op1->getValueAPF();
+        const APFloat &C2 = Op2->getValueAPF();
+        return ConstantFP::get(Ty->getContext(), minimum(C1, C2));
       }
-      case Intrinsic::uadd_sat:
-      case Intrinsic::sadd_sat:
-        if (!C0 && !C1)
-          return UndefValue::get(Ty);
-        if (!C0 || !C1)
-          return Constant::getAllOnesValue(Ty);
-        if (IntrinsicID == Intrinsic::uadd_sat)
-          return ConstantInt::get(Ty, C0->uadd_sat(*C1));
-        else
-          return ConstantInt::get(Ty, C0->sadd_sat(*C1));
-      case Intrinsic::usub_sat:
-      case Intrinsic::ssub_sat:
-        if (!C0 && !C1)
-          return UndefValue::get(Ty);
-        if (!C0 || !C1)
-          return Constant::getNullValue(Ty);
-        if (IntrinsicID == Intrinsic::usub_sat)
-          return ConstantInt::get(Ty, C0->usub_sat(*C1));
-        else
-          return ConstantInt::get(Ty, C0->ssub_sat(*C1));
-      case Intrinsic::cttz:
-      case Intrinsic::ctlz:
-        assert(C1 && "Must be constant int");
-
-        // cttz(0, 1) and ctlz(0, 1) are undef.
-        if (C1->isOneValue() && (!C0 || C0->isNullValue()))
-          return UndefValue::get(Ty);
-        if (!C0)
-          return Constant::getNullValue(Ty);
-        if (IntrinsicID == Intrinsic::cttz)
-          return ConstantInt::get(Ty, C0->countTrailingZeros());
-        else
-          return ConstantInt::get(Ty, C0->countLeadingZeros());
+
+      if (IntrinsicID == Intrinsic::maximum) {
+        const APFloat &C1 = Op1->getValueAPF();
+        const APFloat &C2 = Op2->getValueAPF();
+        return ConstantFP::get(Ty->getContext(), maximum(C1, C2));
       }
 
-      return nullptr;
+      if (!TLI)
+        return nullptr;
+      if ((Name == "pow" && TLI->has(LibFunc_pow)) ||
+          (Name == "powf" && TLI->has(LibFunc_powf)) ||
+          (Name == "__pow_finite" && TLI->has(LibFunc_pow_finite)) ||
+          (Name == "__powf_finite" && TLI->has(LibFunc_powf_finite)))
+        return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+      if ((Name == "fmod" && TLI->has(LibFunc_fmod)) ||
+          (Name == "fmodf" && TLI->has(LibFunc_fmodf)))
+        return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
+      if ((Name == "atan2" && TLI->has(LibFunc_atan2)) ||
+          (Name == "atan2f" && TLI->has(LibFunc_atan2f)) ||
+          (Name == "__atan2_finite" && TLI->has(LibFunc_atan2_finite)) ||
+          (Name == "__atan2f_finite" && TLI->has(LibFunc_atan2f_finite)))
+        return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
+    } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
+      if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
+        return ConstantFP::get(Ty->getContext(),
+                               APFloat((float)std::pow((float)Op1V,
+                                               (int)Op2C->getZExtValue())));
+      if (IntrinsicID == Intrinsic::powi && Ty->isFloatTy())
+        return ConstantFP::get(Ty->getContext(),
+                               APFloat((float)std::pow((float)Op1V,
+                                               (int)Op2C->getZExtValue())));
+      if (IntrinsicID == Intrinsic::powi && Ty->isDoubleTy())
+        return ConstantFP::get(Ty->getContext(),
+                               APFloat((double)std::pow((double)Op1V,
+                                                 (int)Op2C->getZExtValue())));
     }
+    return nullptr;
+  }
 
-    // Support ConstantVector in case we have an Undef in the top.
-    if ((isa<ConstantVector>(Operands[0]) ||
-         isa<ConstantDataVector>(Operands[0])) &&
-        // Check for default rounding mode.
-        // FIXME: Support other rounding modes?
-        isa<ConstantInt>(Operands[1]) &&
-        cast<ConstantInt>(Operands[1])->getValue() == 4) {
-      auto *Op = cast<Constant>(Operands[0]);
+  if (Operands[0]->getType()->isIntegerTy() &&
+      Operands[1]->getType()->isIntegerTy()) {
+    const APInt *C0, *C1;
+    if (!getConstIntOrUndef(Operands[0], C0) ||
+        !getConstIntOrUndef(Operands[1], C1))
+      return nullptr;
+
+    switch (IntrinsicID) {
+    default: break;
+    case Intrinsic::smul_with_overflow:
+    case Intrinsic::umul_with_overflow:
+      // Even if both operands are undef, we cannot fold muls to undef
+      // in the general case. For example, on i2 there are no inputs
+      // that would produce { i2 -1, i1 true } as the result.
+      if (!C0 || !C1)
+        return Constant::getNullValue(Ty);
+      LLVM_FALLTHROUGH;
+    case Intrinsic::sadd_with_overflow:
+    case Intrinsic::uadd_with_overflow:
+    case Intrinsic::ssub_with_overflow:
+    case Intrinsic::usub_with_overflow: {
+      if (!C0 || !C1)
+        return UndefValue::get(Ty);
+
+      APInt Res;
+      bool Overflow;
       switch (IntrinsicID) {
-      default: break;
-      case Intrinsic::x86_avx512_vcvtss2si32:
-      case Intrinsic::x86_avx512_vcvtss2si64:
-      case Intrinsic::x86_avx512_vcvtsd2si32:
-      case Intrinsic::x86_avx512_vcvtsd2si64:
-        if (ConstantFP *FPOp =
-                dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
-          return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
-                                             /*roundTowardZero=*/false, Ty,
-                                             /*IsSigned*/true);
+      default: llvm_unreachable("Invalid case");
+      case Intrinsic::sadd_with_overflow:
+        Res = C0->sadd_ov(*C1, Overflow);
+        break;
+      case Intrinsic::uadd_with_overflow:
+        Res = C0->uadd_ov(*C1, Overflow);
         break;
-      case Intrinsic::x86_avx512_vcvtss2usi32:
-      case Intrinsic::x86_avx512_vcvtss2usi64:
-      case Intrinsic::x86_avx512_vcvtsd2usi32:
-      case Intrinsic::x86_avx512_vcvtsd2usi64:
-        if (ConstantFP *FPOp =
-                dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
-          return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
-                                             /*roundTowardZero=*/false, Ty,
-                                             /*IsSigned*/false);
+      case Intrinsic::ssub_with_overflow:
+        Res = C0->ssub_ov(*C1, Overflow);
+        break;
+      case Intrinsic::usub_with_overflow:
+        Res = C0->usub_ov(*C1, Overflow);
         break;
-      case Intrinsic::x86_avx512_cvttss2si:
-      case Intrinsic::x86_avx512_cvttss2si64:
-      case Intrinsic::x86_avx512_cvttsd2si:
-      case Intrinsic::x86_avx512_cvttsd2si64:
-        if (ConstantFP *FPOp =
-                dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
-          return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
-                                             /*roundTowardZero=*/true, Ty,
-                                             /*IsSigned*/true);
+      case Intrinsic::smul_with_overflow:
+        Res = C0->smul_ov(*C1, Overflow);
         break;
-      case Intrinsic::x86_avx512_cvttss2usi:
-      case Intrinsic::x86_avx512_cvttss2usi64:
-      case Intrinsic::x86_avx512_cvttsd2usi:
-      case Intrinsic::x86_avx512_cvttsd2usi64:
-        if (ConstantFP *FPOp =
-                dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
-          return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
-                                             /*roundTowardZero=*/true, Ty,
-                                             /*IsSigned*/false);
+      case Intrinsic::umul_with_overflow:
+        Res = C0->umul_ov(*C1, Overflow);
         break;
       }
+      Constant *Ops[] = {
+        ConstantInt::get(Ty->getContext(), Res),
+        ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
+      };
+      return ConstantStruct::get(cast<StructType>(Ty), Ops);
+    }
+    case Intrinsic::uadd_sat:
+    case Intrinsic::sadd_sat:
+      if (!C0 && !C1)
+        return UndefValue::get(Ty);
+      if (!C0 || !C1)
+        return Constant::getAllOnesValue(Ty);
+      if (IntrinsicID == Intrinsic::uadd_sat)
+        return ConstantInt::get(Ty, C0->uadd_sat(*C1));
+      else
+        return ConstantInt::get(Ty, C0->sadd_sat(*C1));
+    case Intrinsic::usub_sat:
+    case Intrinsic::ssub_sat:
+      if (!C0 && !C1)
+        return UndefValue::get(Ty);
+      if (!C0 || !C1)
+        return Constant::getNullValue(Ty);
+      if (IntrinsicID == Intrinsic::usub_sat)
+        return ConstantInt::get(Ty, C0->usub_sat(*C1));
+      else
+        return ConstantInt::get(Ty, C0->ssub_sat(*C1));
+    case Intrinsic::cttz:
+    case Intrinsic::ctlz:
+      assert(C1 && "Must be constant int");
+
+      // cttz(0, 1) and ctlz(0, 1) are undef.
+      if (C1->isOneValue() && (!C0 || C0->isNullValue()))
+        return UndefValue::get(Ty);
+      if (!C0)
+        return Constant::getNullValue(Ty);
+      if (IntrinsicID == Intrinsic::cttz)
+        return ConstantInt::get(Ty, C0->countTrailingZeros());
+      else
+        return ConstantInt::get(Ty, C0->countLeadingZeros());
     }
+
     return nullptr;
   }
 
-  if (Operands.size() != 3)
-    return nullptr;
+  // Support ConstantVector in case we have an Undef in the top.
+  if ((isa<ConstantVector>(Operands[0]) ||
+       isa<ConstantDataVector>(Operands[0])) &&
+      // Check for default rounding mode.
+      // FIXME: Support other rounding modes?
+      isa<ConstantInt>(Operands[1]) &&
+      cast<ConstantInt>(Operands[1])->getValue() == 4) {
+    auto *Op = cast<Constant>(Operands[0]);
+    switch (IntrinsicID) {
+    default: break;
+    case Intrinsic::x86_avx512_vcvtss2si32:
+    case Intrinsic::x86_avx512_vcvtss2si64:
+    case Intrinsic::x86_avx512_vcvtsd2si32:
+    case Intrinsic::x86_avx512_vcvtsd2si64:
+      if (ConstantFP *FPOp =
+              dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+        return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+                                           /*roundTowardZero=*/false, Ty,
+                                           /*IsSigned*/true);
+      break;
+    case Intrinsic::x86_avx512_vcvtss2usi32:
+    case Intrinsic::x86_avx512_vcvtss2usi64:
+    case Intrinsic::x86_avx512_vcvtsd2usi32:
+    case Intrinsic::x86_avx512_vcvtsd2usi64:
+      if (ConstantFP *FPOp =
+              dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+        return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+                                           /*roundTowardZero=*/false, Ty,
+                                           /*IsSigned*/false);
+      break;
+    case Intrinsic::x86_avx512_cvttss2si:
+    case Intrinsic::x86_avx512_cvttss2si64:
+    case Intrinsic::x86_avx512_cvttsd2si:
+    case Intrinsic::x86_avx512_cvttsd2si64:
+      if (ConstantFP *FPOp =
+              dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+        return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+                                           /*roundTowardZero=*/true, Ty,
+                                           /*IsSigned*/true);
+      break;
+    case Intrinsic::x86_avx512_cvttss2usi:
+    case Intrinsic::x86_avx512_cvttss2usi64:
+    case Intrinsic::x86_avx512_cvttsd2usi:
+    case Intrinsic::x86_avx512_cvttsd2usi64:
+      if (ConstantFP *FPOp =
+              dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+        return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+                                           /*roundTowardZero=*/true, Ty,
+                                           /*IsSigned*/false);
+      break;
+    }
+  }
+  return nullptr;
+}
+
+static Constant *ConstantFoldScalarCall3(StringRef Name,
+                                         Intrinsic::ID IntrinsicID,
+                                         Type *Ty,
+                                         ArrayRef<Constant *> Operands,
+                                         const TargetLibraryInfo *TLI,
+                                         const CallBase *Call) {
+  assert(Operands.size() == 3 && "Wrong number of operands.");
 
   if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
     if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
@@ -2179,6 +2207,43 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
     }
   }
 
+  if (const auto *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
+    if (const auto *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
+      if (const auto *Op3 = dyn_cast<ConstantInt>(Operands[2])) {
+        switch (IntrinsicID) {
+        default: break;
+        case Intrinsic::smul_fix:
+        case Intrinsic::smul_fix_sat: {
+          // This code performs rounding towards negative infinity in case the
+          // result cannot be represented exactly for the given scale. Targets
+          // that do care about rounding should use a target hook for specifying
+          // how rounding should be done, and provide their own folding to be
+          // consistent with rounding. This is the same approach as used by
+          // DAGTypeLegalizer::ExpandIntRes_MULFIX.
+          APInt Lhs = Op1->getValue();
+          APInt Rhs = Op2->getValue();
+          unsigned Scale = Op3->getValue().getZExtValue();
+          unsigned Width = Lhs.getBitWidth();
+          assert(Scale < Width && "Illegal scale.");
+          unsigned ExtendedWidth = Width * 2;
+          APInt Product = (Lhs.sextOrSelf(ExtendedWidth) *
+                           Rhs.sextOrSelf(ExtendedWidth)).ashr(Scale);
+          if (IntrinsicID == Intrinsic::smul_fix_sat) {
+            APInt MaxValue =
+              APInt::getSignedMaxValue(Width).sextOrSelf(ExtendedWidth);
+            APInt MinValue =
+              APInt::getSignedMinValue(Width).sextOrSelf(ExtendedWidth);
+            Product = APIntOps::smin(Product, MaxValue);
+            Product = APIntOps::smax(Product, MinValue);
+          }
+          return ConstantInt::get(Ty->getContext(),
+                                  Product.sextOrTrunc(Width));
+        }
+        }
+      }
+    }
+  }
+
   if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) {
     const APInt *C0, *C1, *C2;
     if (!getConstIntOrUndef(Operands[0], C0) ||
@@ -2212,11 +2277,31 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
   return nullptr;
 }
 
-Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
-                                 VectorType *VTy, ArrayRef<Constant *> Operands,
-                                 const DataLayout &DL,
-                                 const TargetLibraryInfo *TLI,
-                                 ImmutableCallSite CS) {
+static Constant *ConstantFoldScalarCall(StringRef Name,
+                                        Intrinsic::ID IntrinsicID,
+                                        Type *Ty,
+                                        ArrayRef<Constant *> Operands,
+                                        const TargetLibraryInfo *TLI,
+                                        const CallBase *Call) {
+  if (Operands.size() == 1)
+    return ConstantFoldScalarCall1(Name, IntrinsicID, Ty, Operands, TLI, Call);
+
+  if (Operands.size() == 2)
+    return ConstantFoldScalarCall2(Name, IntrinsicID, Ty, Operands, TLI, Call);
+
+  if (Operands.size() == 3)
+    return ConstantFoldScalarCall3(Name, IntrinsicID, Ty, Operands, TLI, Call);
+
+  return nullptr;
+}
+
+static Constant *ConstantFoldVectorCall(StringRef Name,
+                                        Intrinsic::ID IntrinsicID,
+                                        VectorType *VTy,
+                                        ArrayRef<Constant *> Operands,
+                                        const DataLayout &DL,
+                                        const TargetLibraryInfo *TLI,
+                                        const CallBase *Call) {
   SmallVector<Constant *, 4> Result(VTy->getNumElements());
   SmallVector<Constant *, 4> Lane(Operands.size());
   Type *Ty = VTy->getElementType();
@@ -2263,10 +2348,8 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
   for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) {
     // Gather a column of constants.
     for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) {
-      // These intrinsics use a scalar type for their second argument.
-      if (J == 1 &&
-          (IntrinsicID == Intrinsic::cttz || IntrinsicID == Intrinsic::ctlz ||
-           IntrinsicID == Intrinsic::powi)) {
+      // Some intrinsics use a scalar type for certain arguments.
+      if (hasVectorInstrinsicScalarOpd(IntrinsicID, J)) {
         Lane[J] = Operands[J];
         continue;
       }
@@ -2279,7 +2362,8 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
     }
 
     // Use the regular scalar folding to simplify this column.
-    Constant *Folded = ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, CS);
+    Constant *Folded =
+        ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, Call);
     if (!Folded)
       return nullptr;
     Result[I] = Folded;
@@ -2290,11 +2374,10 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
 
 } // end anonymous namespace
 
-Constant *
-llvm::ConstantFoldCall(ImmutableCallSite CS, Function *F,
-                       ArrayRef<Constant *> Operands,
-                       const TargetLibraryInfo *TLI) {
-  if (CS.isNoBuiltin() || CS.isStrictFP())
+Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
+                                 ArrayRef<Constant *> Operands,
+                                 const TargetLibraryInfo *TLI) {
+  if (Call->isNoBuiltin() || Call->isStrictFP())
     return nullptr;
   if (!F->hasName())
     return nullptr;
@@ -2304,17 +2387,19 @@ llvm::ConstantFoldCall(ImmutableCallSite CS, Function *F,
 
   if (auto *VTy = dyn_cast<VectorType>(Ty))
     return ConstantFoldVectorCall(Name, F->getIntrinsicID(), VTy, Operands,
-                                  F->getParent()->getDataLayout(), TLI, CS);
+                                  F->getParent()->getDataLayout(), TLI, Call);
 
-  return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI, CS);
+  return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI,
+                                Call);
 }
 
-bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
+bool llvm::isMathLibCallNoop(const CallBase *Call,
+                             const TargetLibraryInfo *TLI) {
   // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap
   // (and to some extent ConstantFoldScalarCall).
-  if (CS.isNoBuiltin() || CS.isStrictFP())
+  if (Call->isNoBuiltin() || Call->isStrictFP())
     return false;
-  Function *F = CS.getCalledFunction();
+  Function *F = Call->getCalledFunction();
   if (!F)
     return false;
 
@@ -2322,8 +2407,8 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
   if (!TLI || !TLI->getLibFunc(*F, Func))
     return false;
 
-  if (CS.getNumArgOperands() == 1) {
-    if (ConstantFP *OpC = dyn_cast<ConstantFP>(CS.getArgOperand(0))) {
+  if (Call->getNumArgOperands() == 1) {
+    if (ConstantFP *OpC = dyn_cast<ConstantFP>(Call->getArgOperand(0))) {
       const APFloat &Op = OpC->getValueAPF();
       switch (Func) {
       case LibFunc_logl:
@@ -2421,9 +2506,9 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
     }
   }
 
-  if (CS.getNumArgOperands() == 2) {
-    ConstantFP *Op0C = dyn_cast<ConstantFP>(CS.getArgOperand(0));
-    ConstantFP *Op1C = dyn_cast<ConstantFP>(CS.getArgOperand(1));
+  if (Call->getNumArgOperands() == 2) {
+    ConstantFP *Op0C = dyn_cast<ConstantFP>(Call->getArgOperand(0));
+    ConstantFP *Op1C = dyn_cast<ConstantFP>(Call->getArgOperand(1));
     if (Op0C && Op1C) {
       const APFloat &Op0 = Op0C->getValueAPF();
       const APFloat &Op1 = Op1C->getValueAPF();
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
index 3d55bf20bb40..bf0cdbfd0c8b 100644
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -1,9 +1,8 @@
 //===- CostModel.cpp ------ Cost Model Analysis ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp
index 4cafb7da16d3..c1043e446beb 100644
--- a/lib/Analysis/Delinearization.cpp
+++ b/lib/Analysis/Delinearization.cpp
@@ -1,9 +1,8 @@
 //===---- Delinearization.cpp - MultiDimensional Index Delinearization ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp
index 34f785fb02be..01b8ff10d355 100644
--- a/lib/Analysis/DemandedBits.cpp
+++ b/lib/Analysis/DemandedBits.cpp
@@ -1,9 +1,8 @@
 //===- DemandedBits.cpp - Determine demanded bits -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -340,6 +339,8 @@ void DemandedBits::performAnalysis() {
         Type *T = J->getType();
         if (T->isIntOrIntVectorTy())
           AliveBits[J] = APInt::getAllOnesValue(T->getScalarSizeInBits());
+        else
+          Visited.insert(J);
         Worklist.insert(J);
       }
     }
@@ -355,16 +356,18 @@ void DemandedBits::performAnalysis() {
 
     LLVM_DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI);
     APInt AOut;
+    bool InputIsKnownDead = false;
     if (UserI->getType()->isIntOrIntVectorTy()) {
       AOut = AliveBits[UserI];
       LLVM_DEBUG(dbgs() << " Alive Out: 0x"
                         << Twine::utohexstr(AOut.getLimitedValue()));
+
+      // If all bits of the output are dead, then all bits of the input
+      // are also dead.
+      InputIsKnownDead = !AOut && !isAlwaysLive(UserI);
     }
     LLVM_DEBUG(dbgs() << "\n");
 
-    if (!UserI->getType()->isIntOrIntVectorTy())
-      Visited.insert(UserI);
-
     KnownBits Known, Known2;
     bool KnownBitsComputed = false;
     // Compute the set of alive bits for each operand. These are anded into the
@@ -381,10 +384,7 @@ void DemandedBits::performAnalysis() {
       if (T->isIntOrIntVectorTy()) {
         unsigned BitWidth = T->getScalarSizeInBits();
         APInt AB = APInt::getAllOnesValue(BitWidth);
-        if (UserI->getType()->isIntOrIntVectorTy() && !AOut &&
-            !isAlwaysLive(UserI)) {
-          // If all bits of the output are dead, then all bits of the input
-          // are also dead.
+        if (InputIsKnownDead) {
           AB = APInt(BitWidth, 0);
         } else {
           // Bits of each operand that are used to compute alive bits of the
@@ -403,18 +403,13 @@ void DemandedBits::performAnalysis() {
           // If we've added to the set of alive bits (or the operand has not
           // been previously visited), then re-queue the operand to be visited
           // again.
-          APInt ABPrev(BitWidth, 0);
-          auto ABI = AliveBits.find(I);
-          if (ABI != AliveBits.end())
-            ABPrev = ABI->second;
-
-          APInt ABNew = AB | ABPrev;
-          if (ABNew != ABPrev || ABI == AliveBits.end()) {
-            AliveBits[I] = std::move(ABNew);
+          auto Res = AliveBits.try_emplace(I);
+          if (Res.second || (AB |= Res.first->second) != Res.first->second) {
+            Res.first->second = std::move(AB);
             Worklist.insert(I);
           }
         }
-      } else if (I && !Visited.count(I)) {
+      } else if (I && Visited.insert(I).second) {
         Worklist.insert(I);
       }
     }
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index 3f4dfa52e1da..75f269e84f9d 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -1,9 +1,8 @@
 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -110,6 +109,14 @@ STATISTIC(BanerjeeSuccesses, "Banerjee successes");
 static cl::opt<bool>
     Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore,
                 cl::desc("Try to delinearize array references."));
+static cl::opt<bool> DisableDelinearizationChecks(
+    "da-disable-delinearization-checks", cl::init(false), cl::Hidden,
+    cl::ZeroOrMore,
+    cl::desc(
+        "Disable checks that try to statically verify validity of "
+        "delinearized subscripts. Enabling this option may result in incorrect "
+        "dependence vectors for languages that allow the subscript of one "
+        "dimension to underflow or overflow into another dimension."));
 
 //===----------------------------------------------------------------------===//
 // basics
@@ -3317,19 +3324,20 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
   // and dst.
   // FIXME: It may be better to record these sizes and add them as constraints
   // to the dependency checks.
-  for (int i = 1; i < size; ++i) {
-    if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr))
-      return false;
+  if (!DisableDelinearizationChecks)
+    for (int i = 1; i < size; ++i) {
+      if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr))
+        return false;
 
-    if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1]))
-      return false;
+      if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1]))
+        return false;
 
-    if (!isKnownNonNegative(DstSubscripts[i], DstPtr))
-      return false;
+      if (!isKnownNonNegative(DstSubscripts[i], DstPtr))
+        return false;
 
-    if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1]))
-      return false;
-  }
+      if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1]))
+        return false;
+    }
 
   LLVM_DEBUG({
     dbgs() << "\nSrcSubscripts: ";
@@ -3369,6 +3377,19 @@ static void dumpSmallBitVector(SmallBitVector &BV) {
 }
 #endif
 
+bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA,
+                                FunctionAnalysisManager::Invalidator &Inv) {
+  // Check if the analysis itself has been invalidated.
+  auto PAC = PA.getChecker<DependenceAnalysis>();
+  if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
+    return true;
+
+  // Check transitive dependencies.
+  return Inv.invalidate<AAManager>(F, PA) ||
+         Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
+         Inv.invalidate<LoopAnalysis>(F, PA);
+}
+
 // depends -
 // Returns NULL if there is no dependence.
 // Otherwise, return a Dependence with as many details as possible.
@@ -3510,7 +3531,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
   // to either Separable or Coupled).
   //
   // Next, we consider 1 and 2. The intersection of the GroupLoops is empty.
-  // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty,
+  // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty,
   // so Pair[3].Group = {0, 1, 3} and Done = false.
   //
   // Next, we compare 2 against 3. The intersection of the GroupLoops is empty.
diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp
index 7ba23854a3cc..0ccd59ef2bfd 100644
--- a/lib/Analysis/DivergenceAnalysis.cpp
+++ b/lib/Analysis/DivergenceAnalysis.cpp
@@ -1,9 +1,8 @@
 //===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp
index 8abc0e7d0df9..d9f43dd746ef 100644
--- a/lib/Analysis/DomPrinter.cpp
+++ b/lib/Analysis/DomPrinter.cpp
@@ -1,9 +1,8 @@
 //===- DomPrinter.cpp - DOT printer for the dominance trees    ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/DomTreeUpdater.cpp b/lib/Analysis/DomTreeUpdater.cpp
new file mode 100644
index 000000000000..49215889cfd6
--- /dev/null
+++ b/lib/Analysis/DomTreeUpdater.cpp
@@ -0,0 +1,533 @@
+//===- DomTreeUpdater.cpp - DomTree/Post DomTree Updater --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the DomTreeUpdater class, which provides a uniform way
+// to update dominator tree related data structures.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/Support/GenericDomTree.h"
+#include <algorithm>
+#include <functional>
+#include <utility>
+
+namespace llvm {
+
+bool DomTreeUpdater::isUpdateValid(
+    const DominatorTree::UpdateType Update) const {
+  const auto *From = Update.getFrom();
+  const auto *To = Update.getTo();
+  const auto Kind = Update.getKind();
+
+  // Discard updates by inspecting the current state of successors of From.
+  // Since isUpdateValid() must be called *after* the Terminator of From is
+  // altered we can determine if the update is unnecessary for batch updates
+  // or invalid for a single update.
+  const bool HasEdge = llvm::any_of(
+      successors(From), [To](const BasicBlock *B) { return B == To; });
+
+  // If the IR does not match the update,
+  // 1. In batch updates, this update is unnecessary.
+  // 2. When called by insertEdge*()/deleteEdge*(), this update is invalid.
+  // Edge does not exist in IR.
+  if (Kind == DominatorTree::Insert && !HasEdge)
+    return false;
+
+  // Edge exists in IR.
+  if (Kind == DominatorTree::Delete && HasEdge)
+    return false;
+
+  return true;
+}
+
+bool DomTreeUpdater::isSelfDominance(
+    const DominatorTree::UpdateType Update) const {
+  // Won't affect DomTree and PostDomTree.
+  return Update.getFrom() == Update.getTo();
+}
+
+void DomTreeUpdater::applyDomTreeUpdates() {
+  // No pending DomTreeUpdates.
+  if (Strategy != UpdateStrategy::Lazy || !DT)
+    return;
+
+  // Only apply updates not are applied by DomTree.
+  if (hasPendingDomTreeUpdates()) {
+    const auto I = PendUpdates.begin() + PendDTUpdateIndex;
+    const auto E = PendUpdates.end();
+    assert(I < E && "Iterator range invalid; there should be DomTree updates.");
+    DT->applyUpdates(ArrayRef<DominatorTree::UpdateType>(I, E));
+    PendDTUpdateIndex = PendUpdates.size();
+  }
+}
+
+void DomTreeUpdater::flush() {
+  applyDomTreeUpdates();
+  applyPostDomTreeUpdates();
+  dropOutOfDateUpdates();
+}
+
+void DomTreeUpdater::applyPostDomTreeUpdates() {
+  // No pending PostDomTreeUpdates.
+  if (Strategy != UpdateStrategy::Lazy || !PDT)
+    return;
+
+  // Only apply updates not are applied by PostDomTree.
+  if (hasPendingPostDomTreeUpdates()) {
+    const auto I = PendUpdates.begin() + PendPDTUpdateIndex;
+    const auto E = PendUpdates.end();
+    assert(I < E &&
+           "Iterator range invalid; there should be PostDomTree updates.");
+    PDT->applyUpdates(ArrayRef<DominatorTree::UpdateType>(I, E));
+    PendPDTUpdateIndex = PendUpdates.size();
+  }
+}
+
+void DomTreeUpdater::tryFlushDeletedBB() {
+  if (!hasPendingUpdates())
+    forceFlushDeletedBB();
+}
+
+bool DomTreeUpdater::forceFlushDeletedBB() {
+  if (DeletedBBs.empty())
+    return false;
+
+  for (auto *BB : DeletedBBs) {
+    // After calling deleteBB or callbackDeleteBB under Lazy UpdateStrategy,
+    // validateDeleteBB() removes all instructions of DelBB and adds an
+    // UnreachableInst as its terminator. So we check whether the BasicBlock to
+    // delete only has an UnreachableInst inside.
+    assert(BB->getInstList().size() == 1 &&
+           isa<UnreachableInst>(BB->getTerminator()) &&
+           "DelBB has been modified while awaiting deletion.");
+    BB->removeFromParent();
+    eraseDelBBNode(BB);
+    delete BB;
+  }
+  DeletedBBs.clear();
+  Callbacks.clear();
+  return true;
+}
+
+void DomTreeUpdater::recalculate(Function &F) {
+
+  if (Strategy == UpdateStrategy::Eager) {
+    if (DT)
+      DT->recalculate(F);
+    if (PDT)
+      PDT->recalculate(F);
+    return;
+  }
+
+  // There is little performance gain if we pend the recalculation under
+  // Lazy UpdateStrategy so we recalculate available trees immediately.
+
+  // Prevent forceFlushDeletedBB() from erasing DomTree or PostDomTree nodes.
+  IsRecalculatingDomTree = IsRecalculatingPostDomTree = true;
+
+  // Because all trees are going to be up-to-date after recalculation,
+  // flush awaiting deleted BasicBlocks.
+  forceFlushDeletedBB();
+  if (DT)
+    DT->recalculate(F);
+  if (PDT)
+    PDT->recalculate(F);
+
+  // Resume forceFlushDeletedBB() to erase DomTree or PostDomTree nodes.
+  IsRecalculatingDomTree = IsRecalculatingPostDomTree = false;
+  PendDTUpdateIndex = PendPDTUpdateIndex = PendUpdates.size();
+  dropOutOfDateUpdates();
+}
+
+bool DomTreeUpdater::hasPendingUpdates() const {
+  return hasPendingDomTreeUpdates() || hasPendingPostDomTreeUpdates();
+}
+
+bool DomTreeUpdater::hasPendingDomTreeUpdates() const {
+  if (!DT)
+    return false;
+  return PendUpdates.size() != PendDTUpdateIndex;
+}
+
+bool DomTreeUpdater::hasPendingPostDomTreeUpdates() const {
+  if (!PDT)
+    return false;
+  return PendUpdates.size() != PendPDTUpdateIndex;
+}
+
+bool DomTreeUpdater::isBBPendingDeletion(llvm::BasicBlock *DelBB) const {
+  if (Strategy == UpdateStrategy::Eager || DeletedBBs.empty())
+    return false;
+  return DeletedBBs.count(DelBB) != 0;
+}
+
+// The DT and PDT require the nodes related to updates
+// are not deleted when update functions are called.
+// So BasicBlock deletions must be pended when the
+// UpdateStrategy is Lazy. When the UpdateStrategy is
+// Eager, the BasicBlock will be deleted immediately.
+void DomTreeUpdater::deleteBB(BasicBlock *DelBB) {
+  validateDeleteBB(DelBB);
+  if (Strategy == UpdateStrategy::Lazy) {
+    DeletedBBs.insert(DelBB);
+    return;
+  }
+
+  DelBB->removeFromParent();
+  eraseDelBBNode(DelBB);
+  delete DelBB;
+}
+
+void DomTreeUpdater::callbackDeleteBB(
+    BasicBlock *DelBB, std::function<void(BasicBlock *)> Callback) {
+  validateDeleteBB(DelBB);
+  if (Strategy == UpdateStrategy::Lazy) {
+    Callbacks.push_back(CallBackOnDeletion(DelBB, Callback));
+    DeletedBBs.insert(DelBB);
+    return;
+  }
+
+  DelBB->removeFromParent();
+  eraseDelBBNode(DelBB);
+  Callback(DelBB);
+  delete DelBB;
+}
+
+void DomTreeUpdater::eraseDelBBNode(BasicBlock *DelBB) {
+  if (DT && !IsRecalculatingDomTree)
+    if (DT->getNode(DelBB))
+      DT->eraseNode(DelBB);
+
+  if (PDT && !IsRecalculatingPostDomTree)
+    if (PDT->getNode(DelBB))
+      PDT->eraseNode(DelBB);
+}
+
+void DomTreeUpdater::validateDeleteBB(BasicBlock *DelBB) {
+  assert(DelBB && "Invalid push_back of nullptr DelBB.");
+  assert(pred_empty(DelBB) && "DelBB has one or more predecessors.");
+  // DelBB is unreachable and all its instructions are dead.
+  while (!DelBB->empty()) {
+    Instruction &I = DelBB->back();
+    // Replace used instructions with an arbitrary value (undef).
+    if (!I.use_empty())
+      I.replaceAllUsesWith(llvm::UndefValue::get(I.getType()));
+    DelBB->getInstList().pop_back();
+  }
+  // Make sure DelBB has a valid terminator instruction. As long as DelBB is a
+  // Child of Function F it must contain valid IR.
+  new UnreachableInst(DelBB->getContext(), DelBB);
+}
+
+void DomTreeUpdater::applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates) {
+  if (!DT && !PDT)
+    return;
+
+  if (Strategy == UpdateStrategy::Lazy) {
+    for (const auto U : Updates)
+      if (!isSelfDominance(U))
+        PendUpdates.push_back(U);
+
+    return;
+  }
+
+  if (DT)
+    DT->applyUpdates(Updates);
+  if (PDT)
+    PDT->applyUpdates(Updates);
+}
+
+void DomTreeUpdater::applyUpdatesPermissive(
+    ArrayRef<DominatorTree::UpdateType> Updates) {
+  if (!DT && !PDT)
+    return;
+
+  SmallSet<std::pair<BasicBlock *, BasicBlock *>, 8> Seen;
+  SmallVector<DominatorTree::UpdateType, 8> DeduplicatedUpdates;
+  for (const auto U : Updates) {
+    auto Edge = std::make_pair(U.getFrom(), U.getTo());
+    // Because it is illegal to submit updates that have already been applied
+    // and updates to an edge need to be strictly ordered,
+    // it is safe to infer the existence of an edge from the first update
+    // to this edge.
+    // If the first update to an edge is "Delete", it means that the edge
+    // existed before. If the first update to an edge is "Insert", it means
+    // that the edge didn't exist before.
+    //
+    // For example, if the user submits {{Delete, A, B}, {Insert, A, B}},
+    // because
+    // 1. it is illegal to submit updates that have already been applied,
+    // i.e., user cannot delete an nonexistent edge,
+    // 2. updates to an edge need to be strictly ordered,
+    // So, initially edge A -> B existed.
+    // We can then safely ignore future updates to this edge and directly
+    // inspect the current CFG:
+    // a. If the edge still exists, because the user cannot insert an existent
+    // edge, so both {Delete, A, B}, {Insert, A, B} actually happened and
+    // resulted in a no-op. DTU won't submit any update in this case.
+    // b. If the edge doesn't exist, we can then infer that {Delete, A, B}
+    // actually happened but {Insert, A, B} was an invalid update which never
+    // happened. DTU will submit {Delete, A, B} in this case.
+    if (!isSelfDominance(U) && Seen.count(Edge) == 0) {
+      Seen.insert(Edge);
+      // If the update doesn't appear in the CFG, it means that
+      // either the change isn't made or relevant operations
+      // result in a no-op.
+      if (isUpdateValid(U)) {
+        if (isLazy())
+          PendUpdates.push_back(U);
+        else
+          DeduplicatedUpdates.push_back(U);
+      }
+    }
+  }
+
+  if (Strategy == UpdateStrategy::Lazy)
+    return;
+
+  if (DT)
+    DT->applyUpdates(DeduplicatedUpdates);
+  if (PDT)
+    PDT->applyUpdates(DeduplicatedUpdates);
+}
+
+DominatorTree &DomTreeUpdater::getDomTree() {
+  assert(DT && "Invalid acquisition of a null DomTree");
+  applyDomTreeUpdates();
+  dropOutOfDateUpdates();
+  return *DT;
+}
+
+PostDominatorTree &DomTreeUpdater::getPostDomTree() {
+  assert(PDT && "Invalid acquisition of a null PostDomTree");
+  applyPostDomTreeUpdates();
+  dropOutOfDateUpdates();
+  return *PDT;
+}
+
+void DomTreeUpdater::insertEdge(BasicBlock *From, BasicBlock *To) {
+
+#ifndef NDEBUG
+  assert(isUpdateValid({DominatorTree::Insert, From, To}) &&
+         "Inserted edge does not appear in the CFG");
+#endif
+
+  if (!DT && !PDT)
+    return;
+
+  // Won't affect DomTree and PostDomTree; discard update.
+  if (From == To)
+    return;
+
+  if (Strategy == UpdateStrategy::Eager) {
+    if (DT)
+      DT->insertEdge(From, To);
+    if (PDT)
+      PDT->insertEdge(From, To);
+    return;
+  }
+
+  PendUpdates.push_back({DominatorTree::Insert, From, To});
+}
+
+void DomTreeUpdater::insertEdgeRelaxed(BasicBlock *From, BasicBlock *To) {
+  if (From == To)
+    return;
+
+  if (!DT && !PDT)
+    return;
+
+  if (!isUpdateValid({DominatorTree::Insert, From, To}))
+    return;
+
+  if (Strategy == UpdateStrategy::Eager) {
+    if (DT)
+      DT->insertEdge(From, To);
+    if (PDT)
+      PDT->insertEdge(From, To);
+    return;
+  }
+
+  PendUpdates.push_back({DominatorTree::Insert, From, To});
+}
+
+void DomTreeUpdater::deleteEdge(BasicBlock *From, BasicBlock *To) {
+
+#ifndef NDEBUG
+  assert(isUpdateValid({DominatorTree::Delete, From, To}) &&
+         "Deleted edge still exists in the CFG!");
+#endif
+
+  if (!DT && !PDT)
+    return;
+
+  // Won't affect DomTree and PostDomTree; discard update.
+  if (From == To)
+    return;
+
+  if (Strategy == UpdateStrategy::Eager) {
+    if (DT)
+      DT->deleteEdge(From, To);
+    if (PDT)
+      PDT->deleteEdge(From, To);
+    return;
+  }
+
+  PendUpdates.push_back({DominatorTree::Delete, From, To});
+}
+
+void DomTreeUpdater::deleteEdgeRelaxed(BasicBlock *From, BasicBlock *To) {
+  if (From == To)
+    return;
+
+  if (!DT && !PDT)
+    return;
+
+  if (!isUpdateValid({DominatorTree::Delete, From, To}))
+    return;
+
+  if (Strategy == UpdateStrategy::Eager) {
+    if (DT)
+      DT->deleteEdge(From, To);
+    if (PDT)
+      PDT->deleteEdge(From, To);
+    return;
+  }
+
+  PendUpdates.push_back({DominatorTree::Delete, From, To});
+}
+
+void DomTreeUpdater::dropOutOfDateUpdates() {
+  if (Strategy == DomTreeUpdater::UpdateStrategy::Eager)
+    return;
+
+  tryFlushDeletedBB();
+
+  // Drop all updates applied by both trees.
+  if (!DT)
+    PendDTUpdateIndex = PendUpdates.size();
+  if (!PDT)
+    PendPDTUpdateIndex = PendUpdates.size();
+
+  const size_t dropIndex = std::min(PendDTUpdateIndex, PendPDTUpdateIndex);
+  const auto B = PendUpdates.begin();
+  const auto E = PendUpdates.begin() + dropIndex;
+  assert(B <= E && "Iterator out of range.");
+  PendUpdates.erase(B, E);
+  // Calculate current index.
+  PendDTUpdateIndex -= dropIndex;
+  PendPDTUpdateIndex -= dropIndex;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void DomTreeUpdater::dump() const {
+  raw_ostream &OS = llvm::dbgs();
+
+  OS << "Available Trees: ";
+  if (DT || PDT) {
+    if (DT)
+      OS << "DomTree ";
+    if (PDT)
+      OS << "PostDomTree ";
+    OS << "\n";
+  } else
+    OS << "None\n";
+
+  OS << "UpdateStrategy: ";
+  if (Strategy == UpdateStrategy::Eager) {
+    OS << "Eager\n";
+    return;
+  } else
+    OS << "Lazy\n";
+  int Index = 0;
+
+  auto printUpdates =
+      [&](ArrayRef<DominatorTree::UpdateType>::const_iterator begin,
+          ArrayRef<DominatorTree::UpdateType>::const_iterator end) {
+        if (begin == end)
+          OS << "  None\n";
+        Index = 0;
+        for (auto It = begin, ItEnd = end; It != ItEnd; ++It) {
+          auto U = *It;
+          OS << "  " << Index << " : ";
+          ++Index;
+          if (U.getKind() == DominatorTree::Insert)
+            OS << "Insert, ";
+          else
+            OS << "Delete, ";
+          BasicBlock *From = U.getFrom();
+          if (From) {
+            auto S = From->getName();
+            if (!From->hasName())
+              S = "(no name)";
+            OS << S << "(" << From << "), ";
+          } else {
+            OS << "(badref), ";
+          }
+          BasicBlock *To = U.getTo();
+          if (To) {
+            auto S = To->getName();
+            if (!To->hasName())
+              S = "(no_name)";
+            OS << S << "(" << To << ")\n";
+          } else {
+            OS << "(badref)\n";
+          }
+        }
+      };
+
+  if (DT) {
+    const auto I = PendUpdates.begin() + PendDTUpdateIndex;
+    assert(PendUpdates.begin() <= I && I <= PendUpdates.end() &&
+           "Iterator out of range.");
+    OS << "Applied but not cleared DomTreeUpdates:\n";
+    printUpdates(PendUpdates.begin(), I);
+    OS << "Pending DomTreeUpdates:\n";
+    printUpdates(I, PendUpdates.end());
+  }
+
+  if (PDT) {
+    const auto I = PendUpdates.begin() + PendPDTUpdateIndex;
+    assert(PendUpdates.begin() <= I && I <= PendUpdates.end() &&
+           "Iterator out of range.");
+    OS << "Applied but not cleared PostDomTreeUpdates:\n";
+    printUpdates(PendUpdates.begin(), I);
+    OS << "Pending PostDomTreeUpdates:\n";
+    printUpdates(I, PendUpdates.end());
+  }
+
+  OS << "Pending DeletedBBs:\n";
+  Index = 0;
+  for (auto BB : DeletedBBs) {
+    OS << "  " << Index << " : ";
+    ++Index;
+    if (BB->hasName())
+      OS << BB->getName() << "(";
+    else
+      OS << "(no_name)(";
+    OS << BB << ")\n";
+  }
+
+  OS << "Pending Callbacks:\n";
+  Index = 0;
+  for (auto BB : Callbacks) {
+    OS << "  " << Index << " : ";
+    ++Index;
+    if (BB->hasName())
+      OS << BB->getName() << "(";
+    else
+      OS << "(no_name)(";
+    OS << BB << ")\n";
+  }
+}
+#endif
+} // namespace llvm
diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp
index de7f62cf4ecd..f9a554acb7ea 100644
--- a/lib/Analysis/DominanceFrontier.cpp
+++ b/lib/Analysis/DominanceFrontier.cpp
@@ -1,9 +1,8 @@
 //===- DominanceFrontier.cpp - Dominance Frontier Calculation -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Analysis/EHPersonalities.cpp b/lib/Analysis/EHPersonalities.cpp
index 0df73aeebbdc..2242541696a4 100644
--- a/lib/Analysis/EHPersonalities.cpp
+++ b/lib/Analysis/EHPersonalities.cpp
@@ -1,9 +1,8 @@
 //===- EHPersonalities.cpp - Compute EH-related information ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp
index b28abcadca4a..0d6c0ffb18a8 100644
--- a/lib/Analysis/GlobalsModRef.cpp
+++ b/lib/Analysis/GlobalsModRef.cpp
@@ -1,9 +1,8 @@
 //===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -514,7 +513,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
         break;
       }
 
-      if (F->isDeclaration() || F->hasFnAttribute(Attribute::OptimizeNone)) {
+      if (F->isDeclaration() || F->hasOptNone()) {
         // Try to get mod/ref behaviour from function attributes.
         if (F->doesNotAccessMemory()) {
           // Can't do better than that!
@@ -567,7 +566,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
       // Don't prove any properties based on the implementation of an optnone
       // function. Function attributes were already used as a best approximation
       // above.
-      if (Node->getFunction()->hasFnAttribute(Attribute::OptimizeNone))
+      if (Node->getFunction()->hasOptNone())
         continue;
 
       for (Instruction &I : instructions(Node->getFunction())) {
@@ -597,7 +596,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
         }
 
         // All non-call instructions we use the primary predicates for whether
-        // thay read or write memory.
+        // they read or write memory.
         if (I.mayReadFromMemory())
           FI.addModRefInfo(ModRefInfo::Ref);
         if (I.mayWriteToMemory())
@@ -791,10 +790,10 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV,
     }
 
     // FIXME: It would be good to handle other obvious no-alias cases here, but
-    // it isn't clear how to do so reasonbly without building a small version
+    // it isn't clear how to do so reasonably without building a small version
     // of BasicAA into this code. We could recurse into AAResultBase::alias
     // here but that seems likely to go poorly as we're inside the
-    // implementation of such a query. Until then, just conservatievly retun
+    // implementation of such a query. Until then, just conservatively return
     // false.
     return false;
   } while (!Inputs.empty());
@@ -807,7 +806,8 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV,
 /// other is some random pointer, we know there cannot be an alias, because the
 /// address of the global isn't taken.
 AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA,
-                                   const MemoryLocation &LocB) {
+                                   const MemoryLocation &LocB,
+                                   AAQueryInfo &AAQI) {
   // Get the base object these pointers point to.
   const Value *UV1 = GetUnderlyingObject(LocA.Ptr, DL);
   const Value *UV2 = GetUnderlyingObject(LocB.Ptr, DL);
@@ -882,11 +882,12 @@ AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA,
     if ((GV1 || GV2) && GV1 != GV2)
       return NoAlias;
 
-  return AAResultBase::alias(LocA, LocB);
+  return AAResultBase::alias(LocA, LocB, AAQI);
 }
 
 ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call,
-                                                     const GlobalValue *GV) {
+                                                     const GlobalValue *GV,
+                                                     AAQueryInfo &AAQI) {
   if (Call->doesNotAccessMemory())
     return ModRefInfo::NoModRef;
   ModRefInfo ConservativeResult =
@@ -895,14 +896,15 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call,
   // Iterate through all the arguments to the called function. If any argument
   // is based on GV, return the conservative result.
   for (auto &A : Call->args()) {
-    SmallVector<Value*, 4> Objects;
+    SmallVector<const Value*, 4> Objects;
     GetUnderlyingObjects(A, Objects, DL);
 
     // All objects must be identified.
     if (!all_of(Objects, isIdentifiedObject) &&
         // Try ::alias to see if all objects are known not to alias GV.
-        !all_of(Objects, [&](Value *V) {
-          return this->alias(MemoryLocation(V), MemoryLocation(GV)) == NoAlias;
+        !all_of(Objects, [&](const Value *V) {
+          return this->alias(MemoryLocation(V), MemoryLocation(GV), AAQI) ==
+                 NoAlias;
         }))
       return ConservativeResult;
 
@@ -915,7 +917,8 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call,
 }
 
 ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call,
-                                          const MemoryLocation &Loc) {
+                                          const MemoryLocation &Loc,
+                                          AAQueryInfo &AAQI) {
   ModRefInfo Known = ModRefInfo::ModRef;
 
   // If we are asking for mod/ref info of a direct call with a pointer to a
@@ -927,11 +930,11 @@ ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call,
         if (NonAddressTakenGlobals.count(GV))
           if (const FunctionInfo *FI = getFunctionInfo(F))
             Known = unionModRef(FI->getModRefInfoForGlobal(*GV),
-                                getModRefInfoForArgument(Call, GV));
+                                getModRefInfoForArgument(Call, GV, AAQI));
 
   if (!isModOrRefSet(Known))
     return ModRefInfo::NoModRef; // No need to query other mod/ref analyses
-  return intersectModRef(Known, AAResultBase::getModRefInfo(Call, Loc));
+  return intersectModRef(Known, AAResultBase::getModRefInfo(Call, Loc, AAQI));
 }
 
 GlobalsAAResult::GlobalsAAResult(const DataLayout &DL,
diff --git a/lib/Analysis/GuardUtils.cpp b/lib/Analysis/GuardUtils.cpp
index 08fa6abeafb5..cad92f6e56bb 100644
--- a/lib/Analysis/GuardUtils.cpp
+++ b/lib/Analysis/GuardUtils.cpp
@@ -1,9 +1,8 @@
 //===-- GuardUtils.cpp - Utils for work with guards -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Utils that are used to perform analyzes related to guards and their
@@ -19,3 +18,32 @@ bool llvm::isGuard(const User *U) {
   using namespace llvm::PatternMatch;
   return match(U, m_Intrinsic<Intrinsic::experimental_guard>());
 }
+
+bool llvm::isGuardAsWidenableBranch(const User *U) {
+  Value *Condition, *WidenableCondition;
+  BasicBlock *GuardedBB, *DeoptBB;
+  if (!parseWidenableBranch(U, Condition, WidenableCondition, GuardedBB,
+                            DeoptBB))
+    return false;
+  using namespace llvm::PatternMatch;
+  for (auto &Insn : *DeoptBB) {
+    if (match(&Insn, m_Intrinsic<Intrinsic::experimental_deoptimize>()))
+      return true;
+    if (Insn.mayHaveSideEffects())
+      return false;
+  }
+  return false;
+}
+
+bool llvm::parseWidenableBranch(const User *U, Value *&Condition,
+                                Value *&WidenableCondition,
+                                BasicBlock *&IfTrueBB, BasicBlock *&IfFalseBB) {
+  using namespace llvm::PatternMatch;
+  if (!match(U, m_Br(m_And(m_Value(Condition), m_Value(WidenableCondition)),
+                     IfTrueBB, IfFalseBB)))
+    return false;
+  // TODO: At the moment, we only recognize the branch if the WC call in this
+  // specific position.  We should generalize!
+  return match(WidenableCondition,
+               m_Intrinsic<Intrinsic::experimental_widenable_condition>());
+}
diff --git a/lib/Analysis/IVDescriptors.cpp b/lib/Analysis/IVDescriptors.cpp
index aaebc4a481ec..ce285f82f720 100644
--- a/lib/Analysis/IVDescriptors.cpp
+++ b/lib/Analysis/IVDescriptors.cpp
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,6 +14,7 @@
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -26,7 +26,6 @@
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
@@ -252,6 +251,10 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
   Worklist.push_back(Start);
   VisitedInsts.insert(Start);
 
+  // Start with all flags set because we will intersect this with the reduction
+  // flags from all the reduction operations.
+  FastMathFlags FMF = FastMathFlags::getFast();
+
   // A value in the reduction can be used:
   //  - By the reduction:
   //      - Reduction operation:
@@ -297,6 +300,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
       ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
       if (!ReduxDesc.isRecurrence())
         return false;
+      if (isa<FPMathOperator>(ReduxDesc.getPatternInst()))
+        FMF &= ReduxDesc.getPatternInst()->getFastMathFlags();
     }
 
     bool IsASelect = isa<SelectInst>(Cur);
@@ -442,7 +447,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
 
   // Save the description of this reduction variable.
   RecurrenceDescriptor RD(
-      RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),
+      RdxStart, ExitInstruction, Kind, FMF, ReduxDesc.getMinMaxKind(),
       ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
   RedDes = RD;
 
@@ -550,9 +555,8 @@ RecurrenceDescriptor::isConditionalRdxPattern(
 RecurrenceDescriptor::InstDesc
 RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
                                         InstDesc &Prev, bool HasFunNoNaNAttr) {
-  bool FP = I->getType()->isFloatingPointTy();
   Instruction *UAI = Prev.getUnsafeAlgebraInst();
-  if (!UAI && FP && !I->isFast())
+  if (!UAI && isa<FPMathOperator>(I) && !I->hasAllowReassoc())
     UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
 
   switch (I->getOpcode()) {
@@ -1010,7 +1014,7 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
   // If we started from an UnknownSCEV, and managed to build an addRecurrence
   // only after enabling Assume with PSCEV, this means we may have encountered
   // cast instructions that required adding a runtime check in order to
-  // guarantee the correctness of the AddRecurence respresentation of the
+  // guarantee the correctness of the AddRecurrence respresentation of the
   // induction.
   if (PhiScev != AR && SymbolicPhi) {
     SmallVector<Instruction *, 2> Casts;
@@ -1049,6 +1053,13 @@ bool InductionDescriptor::isInductionPHI(
 
   Value *StartValue =
       Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
+
+  BasicBlock *Latch = AR->getLoop()->getLoopLatch();
+  if (!Latch)
+    return false;
+  BinaryOperator *BOp =
+      dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));
+
   const SCEV *Step = AR->getStepRecurrence(*SE);
   // Calculate the pointer stride and check if it is consecutive.
   // The stride may be a constant or a loop invariant integer value.
@@ -1057,7 +1068,7 @@ bool InductionDescriptor::isInductionPHI(
     return false;
 
   if (PhiTy->isIntegerTy()) {
-    D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/nullptr,
+    D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp,
                             CastsToIgnore);
     return true;
   }
@@ -1084,6 +1095,6 @@ bool InductionDescriptor::isInductionPHI(
     return false;
   auto *StepValue =
       SE->getConstant(CV->getType(), CVSize / Size, true /* signed */);
-  D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
+  D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue, BOp);
   return true;
 }
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 609e5e3a1448..681a0cf7e981 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -1,9 +1,8 @@
 //===- IVUsers.cpp - Induction Variable Users -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index d6e6e76af03c..6ff840efcb64 100644
--- a/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -1,9 +1,8 @@
 //===-- IndirectCallPromotionAnalysis.cpp - Find promotion candidates ===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 6ddb3cbc01a3..0dec146e0465 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -1,9 +1,8 @@
 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -28,7 +27,6 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Config/llvm-config.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
@@ -37,6 +35,7 @@
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -51,19 +50,19 @@ static cl::opt<int> InlineThreshold(
     cl::desc("Control the amount of inlining to perform (default = 225)"));
 
 static cl::opt<int> HintThreshold(
-    "inlinehint-threshold", cl::Hidden, cl::init(325),
+    "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, 
     cl::desc("Threshold for inlining functions with inline hint"));
 
 static cl::opt<int>
     ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
-                          cl::init(45),
+                          cl::init(45), cl::ZeroOrMore,
                           cl::desc("Threshold for inlining cold callsites"));
 
 // We introduce this threshold to help performance of instrumentation based
 // PGO before we actually hook up inliner with analysis passes such as BPI and
 // BFI.
 static cl::opt<int> ColdThreshold(
-    "inlinecold-threshold", cl::Hidden, cl::init(45),
+    "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, 
     cl::desc("Threshold for inlining functions with cold attribute"));
 
 static cl::opt<int>
@@ -77,7 +76,7 @@ static cl::opt<int> LocallyHotCallSiteThreshold(
 
 static cl::opt<int> ColdCallSiteRelFreq(
     "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
-    cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
+    cl::desc("Maximum block frequency, expressed as a percentage of caller's "
              "entry frequency, for a callsite to be cold in the absence of "
              "profile information."));
 
@@ -88,7 +87,7 @@ static cl::opt<int> HotCallSiteRelFreq(
              "profile information."));
 
 static cl::opt<bool> OptComputeFullInlineCost(
-    "inline-cost-full", cl::Hidden, cl::init(false),
+    "inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore,
     cl::desc("Compute the full inline cost of a call site even when the cost "
              "exceeds the threshold."));
 
@@ -122,31 +121,43 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   /// The candidate callsite being analyzed. Please do not use this to do
   /// analysis in the caller function; we want the inline cost query to be
   /// easily cacheable. Instead, use the cover function paramHasAttr.
-  CallSite CandidateCS;
+  CallBase &CandidateCall;
 
   /// Tunable parameters that control the analysis.
   const InlineParams &Params;
 
+  /// Upper bound for the inlining cost. Bonuses are being applied to account
+  /// for speculative "expected profit" of the inlining decision.
   int Threshold;
-  int Cost;
+
+  /// Inlining cost measured in abstract units, accounts for all the
+  /// instructions expected to be executed for a given function invocation.
+  /// Instructions that are statically proven to be dead based on call-site
+  /// arguments are not counted here.
+  int Cost = 0;
+
   bool ComputeFullInlineCost;
 
-  bool IsCallerRecursive;
-  bool IsRecursiveCall;
-  bool ExposesReturnsTwice;
-  bool HasDynamicAlloca;
-  bool ContainsNoDuplicateCall;
-  bool HasReturn;
-  bool HasIndirectBr;
-  bool HasUninlineableIntrinsic;
-  bool InitsVargArgs;
+  bool IsCallerRecursive = false;
+  bool IsRecursiveCall = false;
+  bool ExposesReturnsTwice = false;
+  bool HasDynamicAlloca = false;
+  bool ContainsNoDuplicateCall = false;
+  bool HasReturn = false;
+  bool HasIndirectBr = false;
+  bool HasUninlineableIntrinsic = false;
+  bool InitsVargArgs = false;
 
   /// Number of bytes allocated statically by the callee.
-  uint64_t AllocatedSize;
-  unsigned NumInstructions, NumVectorInstructions;
-  int VectorBonus, TenPercentVectorBonus;
-  // Bonus to be applied when the callee has only one reachable basic block.
-  int SingleBBBonus;
+  uint64_t AllocatedSize = 0;
+  unsigned NumInstructions = 0;
+  unsigned NumVectorInstructions = 0;
+
+  /// Bonus to be applied when percentage of vector instructions in callee is
+  /// high (see more details in updateThreshold).
+  int VectorBonus = 0;
+  /// Bonus to be applied when the callee has only one reachable basic block.
+  int SingleBBBonus = 0;
 
   /// While we walk the potentially-inlined instructions, we build up and
   /// maintain a mapping of simplified values specific to this callsite. The
@@ -181,7 +192,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   /// loads.
   bool EnableLoadElimination;
   SmallPtrSet<Value *, 16> LoadAddrSet;
-  int LoadEliminationCost;
+  int LoadEliminationCost = 0;
 
   // Custom simplification helper routines.
   bool isAllocaDerivedArg(Value *V);
@@ -196,7 +207,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   bool isGEPFree(GetElementPtrInst &GEP);
   bool canFoldInboundsGEP(GetElementPtrInst &I);
   bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
-  bool simplifyCallSite(Function *F, CallSite CS);
+  bool simplifyCallSite(Function *F, CallBase &Call);
   template <typename Callable>
   bool simplifyInstruction(Instruction &I, Callable Evaluate);
   ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
@@ -216,22 +227,28 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   /// attributes and callee hotness for PGO builds. The Callee is explicitly
   /// passed to support analyzing indirect calls whose target is inferred by
   /// analysis.
-  void updateThreshold(CallSite CS, Function &Callee);
+  void updateThreshold(CallBase &Call, Function &Callee);
 
-  /// Return true if size growth is allowed when inlining the callee at CS.
-  bool allowSizeGrowth(CallSite CS);
+  /// Return true if size growth is allowed when inlining the callee at \p Call.
+  bool allowSizeGrowth(CallBase &Call);
 
-  /// Return true if \p CS is a cold callsite.
-  bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
+  /// Return true if \p Call is a cold callsite.
+  bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
 
-  /// Return a higher threshold if \p CS is a hot callsite.
-  Optional<int> getHotCallSiteThreshold(CallSite CS,
+  /// Return a higher threshold if \p Call is a hot callsite.
+  Optional<int> getHotCallSiteThreshold(CallBase &Call,
                                         BlockFrequencyInfo *CallerBFI);
 
   // Custom analysis routines.
   InlineResult analyzeBlock(BasicBlock *BB,
                             SmallPtrSetImpl<const Value *> &EphValues);
 
+  /// Handle a capped 'int' increment for Cost.
+  void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) {
+    assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound");
+    Cost = (int)std::min(UpperBound, Cost + Inc);
+  }
+
   // Disable several entry points to the visitor so we don't accidentally use
   // them by declaring but not defining them here.
   void visit(Module *);
@@ -256,11 +273,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   bool visitCmpInst(CmpInst &I);
   bool visitSub(BinaryOperator &I);
   bool visitBinaryOperator(BinaryOperator &I);
+  bool visitFNeg(UnaryOperator &I);
   bool visitLoad(LoadInst &I);
   bool visitStore(StoreInst &I);
   bool visitExtractValue(ExtractValueInst &I);
   bool visitInsertValue(InsertValueInst &I);
-  bool visitCallSite(CallSite CS);
+  bool visitCallBase(CallBase &Call);
   bool visitReturnInst(ReturnInst &RI);
   bool visitBranchInst(BranchInst &BI);
   bool visitSelectInst(SelectInst &SI);
@@ -276,38 +294,29 @@ public:
                std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
                Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
                ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
-               Function &Callee, CallSite CSArg, const InlineParams &Params)
+               Function &Callee, CallBase &Call, const InlineParams &Params)
       : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
         PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
-        CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
-        Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost ||
-                                       Params.ComputeFullInlineCost || ORE),
-        IsCallerRecursive(false), IsRecursiveCall(false),
-        ExposesReturnsTwice(false), HasDynamicAlloca(false),
-        ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
-        HasUninlineableIntrinsic(false), InitsVargArgs(false), AllocatedSize(0),
-        NumInstructions(0), NumVectorInstructions(0), VectorBonus(0),
-        SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0),
-        NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
-        NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
-        NumInstructionsSimplified(0), SROACostSavings(0),
-        SROACostSavingsLost(0) {}
-
-  InlineResult analyzeCall(CallSite CS);
+        CandidateCall(Call), Params(Params), Threshold(Params.DefaultThreshold),
+        ComputeFullInlineCost(OptComputeFullInlineCost ||
+                              Params.ComputeFullInlineCost || ORE),
+        EnableLoadElimination(true) {}
+
+  InlineResult analyzeCall(CallBase &Call);
 
   int getThreshold() { return Threshold; }
   int getCost() { return Cost; }
 
   // Keep a bunch of stats about the cost savings found so we can print them
   // out when debugging.
-  unsigned NumConstantArgs;
-  unsigned NumConstantOffsetPtrArgs;
-  unsigned NumAllocaArgs;
-  unsigned NumConstantPtrCmps;
-  unsigned NumConstantPtrDiffs;
-  unsigned NumInstructionsSimplified;
-  unsigned SROACostSavings;
-  unsigned SROACostSavingsLost;
+  unsigned NumConstantArgs = 0;
+  unsigned NumConstantOffsetPtrArgs = 0;
+  unsigned NumAllocaArgs = 0;
+  unsigned NumConstantPtrCmps = 0;
+  unsigned NumConstantPtrDiffs = 0;
+  unsigned NumInstructionsSimplified = 0;
+  unsigned SROACostSavings = 0;
+  unsigned SROACostSavingsLost = 0;
 
   void dump();
 };
@@ -342,7 +351,7 @@ bool CallAnalyzer::lookupSROAArgAndCost(
 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
   // If we're no longer able to perform SROA we need to undo its cost savings
   // and prevent subsequent analysis.
-  Cost += CostIt->second;
+  addCost(CostIt->second);
   SROACostSavings -= CostIt->second;
   SROACostSavingsLost += CostIt->second;
   SROAArgCosts.erase(CostIt);
@@ -366,7 +375,7 @@ void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
 
 void CallAnalyzer::disableLoadElimination() {
   if (EnableLoadElimination) {
-    Cost += LoadEliminationCost;
+    addCost(LoadEliminationCost);
     LoadEliminationCost = 0;
     EnableLoadElimination = false;
   }
@@ -701,7 +710,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
 }
 
 bool CallAnalyzer::visitCastInst(CastInst &I) {
-  // Propagate constants through ptrtoint.
+  // Propagate constants through casts.
   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
         return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
       }))
@@ -721,7 +730,7 @@ bool CallAnalyzer::visitCastInst(CastInst &I) {
   case Instruction::FPToUI:
   case Instruction::FPToSI:
     if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
-      Cost += InlineConstants::CallPenalty;
+      addCost(InlineConstants::CallPenalty);
     break;
   default:
     break;
@@ -737,14 +746,14 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
       }))
     return true;
 
-  // Disable any SROA on the argument to arbitrary unary operators.
+  // Disable any SROA on the argument to arbitrary unary instructions.
   disableSROA(Operand);
 
   return false;
 }
 
 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
-  return CandidateCS.paramHasAttr(A->getArgNo(), Attr);
+  return CandidateCall.paramHasAttr(A->getArgNo(), Attr);
 }
 
 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
@@ -769,7 +778,7 @@ bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
   return false;
 }
 
-bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
+bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
   // If the normal destination of the invoke or the parent block of the call
   // site is unreachable-terminated, there is little point in inlining this
   // unless there is literally zero cost.
@@ -785,21 +794,21 @@ bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
   // For now, we are not handling this corner case here as it is rare in real
   // code. In future, we should elaborate this based on BPI and BFI in more
   // general threshold adjusting heuristics in updateThreshold().
-  Instruction *Instr = CS.getInstruction();
-  if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
+  if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
     if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
       return false;
-  } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator()))
+  } else if (isa<UnreachableInst>(Call.getParent()->getTerminator()))
     return false;
 
   return true;
 }
 
-bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
+bool CallAnalyzer::isColdCallSite(CallBase &Call,
+                                  BlockFrequencyInfo *CallerBFI) {
   // If global profile summary is available, then callsite's coldness is
   // determined based on that.
   if (PSI && PSI->hasProfileSummary())
-    return PSI->isColdCallSite(CS, CallerBFI);
+    return PSI->isColdCallSite(CallSite(&Call), CallerBFI);
 
   // Otherwise we need BFI to be available.
   if (!CallerBFI)
@@ -810,20 +819,21 @@ bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
   // complexity is not worth it unless this scaling shows up high in the
   // profiles.
   const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
-  auto CallSiteBB = CS.getInstruction()->getParent();
+  auto CallSiteBB = Call.getParent();
   auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
   auto CallerEntryFreq =
-      CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
+      CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock()));
   return CallSiteFreq < CallerEntryFreq * ColdProb;
 }
 
 Optional<int>
-CallAnalyzer::getHotCallSiteThreshold(CallSite CS,
+CallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
                                       BlockFrequencyInfo *CallerBFI) {
 
   // If global profile summary is available, then callsite's hotness is
   // determined based on that.
-  if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(CS, CallerBFI))
+  if (PSI && PSI->hasProfileSummary() &&
+      PSI->isHotCallSite(CallSite(&Call), CallerBFI))
     return Params.HotCallSiteThreshold;
 
   // Otherwise we need BFI to be available and to have a locally hot callsite
@@ -835,7 +845,7 @@ CallAnalyzer::getHotCallSiteThreshold(CallSite CS,
   // potentially cache the computation of scaled entry frequency, but the added
   // complexity is not worth it unless this scaling shows up high in the
   // profiles.
-  auto CallSiteBB = CS.getInstruction()->getParent();
+  auto CallSiteBB = Call.getParent();
   auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
   auto CallerEntryFreq = CallerBFI->getEntryFreq();
   if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
@@ -845,14 +855,14 @@ CallAnalyzer::getHotCallSiteThreshold(CallSite CS,
   return None;
 }
 
-void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
+void CallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
   // If no size growth is allowed for this inlining, set Threshold to 0.
-  if (!allowSizeGrowth(CS)) {
+  if (!allowSizeGrowth(Call)) {
     Threshold = 0;
     return;
   }
 
-  Function *Caller = CS.getCaller();
+  Function *Caller = Call.getCaller();
 
   // return min(A, B) if B is valid.
   auto MinIfValid = [](int A, Optional<int> B) {
@@ -870,15 +880,6 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
   // basic block at the given callsite context. This is speculatively applied
   // and withdrawn if more than one basic block is seen.
   //
-  // Vector bonuses: We want to more aggressively inline vector-dense kernels
-  // and apply this bonus based on the percentage of vector instructions. A
-  // bonus is applied if the vector instructions exceed 50% and half that amount
-  // is applied if it exceeds 10%. Note that these bonuses are some what
-  // arbitrary and evolved over time by accident as much as because they are
-  // principled bonuses.
-  // FIXME: It would be nice to base the bonus values on something more
-  // scientific.
-  //
   // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
   // of the last call to a static function as inlining such functions is
   // guaranteed to reduce code size.
@@ -886,7 +887,7 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
   // These bonus percentages may be set to 0 based on properties of the caller
   // and the callsite.
   int SingleBBBonusPercent = 50;
-  int VectorBonusPercent = 150;
+  int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
   int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus;
 
   // Lambda to set all the above bonus and bonus percentages to 0.
@@ -898,7 +899,7 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
 
   // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
   // and reduce the threshold if the caller has the necessary attribute.
-  if (Caller->optForMinSize()) {
+  if (Caller->hasMinSize()) {
     Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
     // For minsize, we want to disable the single BB bonus and the vector
     // bonuses, but not the last-call-to-static bonus. Inlining the last call to
@@ -906,12 +907,12 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
     // call/return instructions.
     SingleBBBonusPercent = 0;
     VectorBonusPercent = 0;
-  } else if (Caller->optForSize())
+  } else if (Caller->hasOptSize())
     Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
 
   // Adjust the threshold based on inlinehint attribute and profile based
   // hotness information if the caller does not have MinSize attribute.
-  if (!Caller->optForMinSize()) {
+  if (!Caller->hasMinSize()) {
     if (Callee.hasFnAttribute(Attribute::InlineHint))
       Threshold = MaxIfValid(Threshold, Params.HintThreshold);
 
@@ -923,15 +924,15 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
     // used (which adds hotness metadata to calls) or if caller's
     // BlockFrequencyInfo is available.
     BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
-    auto HotCallSiteThreshold = getHotCallSiteThreshold(CS, CallerBFI);
-    if (!Caller->optForSize() && HotCallSiteThreshold) {
+    auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI);
+    if (!Caller->hasOptSize() && HotCallSiteThreshold) {
       LLVM_DEBUG(dbgs() << "Hot callsite.\n");
       // FIXME: This should update the threshold only if it exceeds the
       // current threshold, but AutoFDO + ThinLTO currently relies on this
       // behavior to prevent inlining of hot callsites during ThinLTO
       // compile phase.
       Threshold = HotCallSiteThreshold.getValue();
-    } else if (isColdCallSite(CS, CallerBFI)) {
+    } else if (isColdCallSite(Call, CallerBFI)) {
       LLVM_DEBUG(dbgs() << "Cold callsite.\n");
       // Do not apply bonuses for a cold callsite including the
       // LastCallToStatic bonus. While this bonus might result in code size
@@ -968,7 +969,7 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
   VectorBonus = Threshold * VectorBonusPercent / 100;
 
   bool OnlyOneCallAndLocalLinkage =
-      F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
+      F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction();
   // If there is only one call of the function, and it has internal linkage,
   // the cost of inlining it drops dramatically. It may seem odd to update
   // Cost in updateThreshold, but the bonus depends on the logic in this method.
@@ -1087,10 +1088,34 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
 
   // If the instruction is floating point, and the target says this operation
   // is expensive, this may eventually become a library call. Treat the cost
-  // as such.
+  // as such. Unless it's fneg which can be implemented with an xor.
+  using namespace llvm::PatternMatch;
   if (I.getType()->isFloatingPointTy() &&
-      TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
-    Cost += InlineConstants::CallPenalty;
+      TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive &&
+      !match(&I, m_FNeg(m_Value())))
+    addCost(InlineConstants::CallPenalty);
+
+  return false;
+}
+
+bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
+  Value *Op = I.getOperand(0);
+  Constant *COp = dyn_cast<Constant>(Op);
+  if (!COp)
+    COp = SimplifiedValues.lookup(Op);
+
+  Value *SimpleV = SimplifyFNegInst(COp ? COp : Op,
+                                    cast<FPMathOperator>(I).getFastMathFlags(),
+                                    DL);
+
+  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
+    SimplifiedValues[&I] = C;
+
+  if (SimpleV)
+    return true;
+
+  // Disable any SROA on arguments to arbitrary, unsimplified fneg.
+  disableSROA(Op);
 
   return false;
 }
@@ -1173,62 +1198,61 @@ bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
 /// analyzing the arguments and call itself with instsimplify. Returns true if
 /// it has simplified the callsite to some other entity (a constant), making it
 /// free.
-bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
+bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
   // FIXME: Using the instsimplify logic directly for this is inefficient
   // because we have to continually rebuild the argument list even when no
   // simplifications can be performed. Until that is fixed with remapping
   // inside of instsimplify, directly constant fold calls here.
-  if (!canConstantFoldCallTo(CS, F))
+  if (!canConstantFoldCallTo(&Call, F))
     return false;
 
   // Try to re-map the arguments to constants.
   SmallVector<Constant *, 4> ConstantArgs;
-  ConstantArgs.reserve(CS.arg_size());
-  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E;
-       ++I) {
-    Constant *C = dyn_cast<Constant>(*I);
+  ConstantArgs.reserve(Call.arg_size());
+  for (Value *I : Call.args()) {
+    Constant *C = dyn_cast<Constant>(I);
     if (!C)
-      C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
+      C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(I));
     if (!C)
       return false; // This argument doesn't map to a constant.
 
     ConstantArgs.push_back(C);
   }
-  if (Constant *C = ConstantFoldCall(CS, F, ConstantArgs)) {
-    SimplifiedValues[CS.getInstruction()] = C;
+  if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) {
+    SimplifiedValues[&Call] = C;
     return true;
   }
 
   return false;
 }
 
-bool CallAnalyzer::visitCallSite(CallSite CS) {
-  if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
+bool CallAnalyzer::visitCallBase(CallBase &Call) {
+  if (Call.hasFnAttr(Attribute::ReturnsTwice) &&
       !F.hasFnAttribute(Attribute::ReturnsTwice)) {
     // This aborts the entire analysis.
     ExposesReturnsTwice = true;
     return false;
   }
-  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate())
+  if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
     ContainsNoDuplicateCall = true;
 
-  if (Function *F = CS.getCalledFunction()) {
+  if (Function *F = Call.getCalledFunction()) {
     // When we have a concrete function, first try to simplify it directly.
-    if (simplifyCallSite(F, CS))
+    if (simplifyCallSite(F, Call))
       return true;
 
     // Next check if it is an intrinsic we know about.
     // FIXME: Lift this into part of the InstVisitor.
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
       switch (II->getIntrinsicID()) {
       default:
-        if (!CS.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
+        if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
           disableLoadElimination();
-        return Base::visitCallSite(CS);
+        return Base::visitCallBase(Call);
 
       case Intrinsic::load_relative:
         // This is normally lowered to 4 LLVM instructions.
-        Cost += 3 * InlineConstants::InstrCost;
+        addCost(3 * InlineConstants::InstrCost);
         return false;
 
       case Intrinsic::memset:
@@ -1247,7 +1271,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
       }
     }
 
-    if (F == CS.getInstruction()->getFunction()) {
+    if (F == Call.getFunction()) {
       // This flag will fully abort the analysis, so don't bother with anything
       // else.
       IsRecursiveCall = true;
@@ -1257,34 +1281,34 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
     if (TTI.isLoweredToCall(F)) {
       // We account for the average 1 instruction per call argument setup
       // here.
-      Cost += CS.arg_size() * InlineConstants::InstrCost;
+      addCost(Call.arg_size() * InlineConstants::InstrCost);
 
       // Everything other than inline ASM will also have a significant cost
       // merely from making the call.
-      if (!isa<InlineAsm>(CS.getCalledValue()))
-        Cost += InlineConstants::CallPenalty;
+      if (!isa<InlineAsm>(Call.getCalledValue()))
+        addCost(InlineConstants::CallPenalty);
     }
 
-    if (!CS.onlyReadsMemory())
+    if (!Call.onlyReadsMemory())
       disableLoadElimination();
-    return Base::visitCallSite(CS);
+    return Base::visitCallBase(Call);
   }
 
   // Otherwise we're in a very special case -- an indirect function call. See
   // if we can be particularly clever about this.
-  Value *Callee = CS.getCalledValue();
+  Value *Callee = Call.getCalledValue();
 
   // First, pay the price of the argument setup. We account for the average
   // 1 instruction per call argument setup here.
-  Cost += CS.arg_size() * InlineConstants::InstrCost;
+  addCost(Call.arg_size() * InlineConstants::InstrCost);
 
   // Next, check if this happens to be an indirect function call to a known
   // function in this inline context. If not, we've done all we can.
   Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
   if (!F) {
-    if (!CS.onlyReadsMemory())
+    if (!Call.onlyReadsMemory())
       disableLoadElimination();
-    return Base::visitCallSite(CS);
+    return Base::visitCallBase(Call);
   }
 
   // If we have a constant that we are calling as a function, we can peer
@@ -1294,9 +1318,9 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
   // out. Pretend to inline the function, with a custom threshold.
   auto IndirectCallParams = Params;
   IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
-  CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS,
+  CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call,
                   IndirectCallParams);
-  if (CA.analyzeCall(CS)) {
+  if (CA.analyzeCall(Call)) {
     // We were able to inline the indirect call! Subtract the cost from the
     // threshold to get the bonus we want to apply, but don't go below zero.
     Cost -= std::max(0, CA.getThreshold() - CA.getCost());
@@ -1304,7 +1328,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
 
   if (!F->onlyReadsMemory())
     disableLoadElimination();
-  return Base::visitCallSite(CS);
+  return Base::visitCallBase(Call);
 }
 
 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
@@ -1438,7 +1462,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
                (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
 
   if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
-    Cost = CostLowerBound;
+    addCost((int64_t)SI.getNumCases() * InlineConstants::InstrCost);
     return false;
   }
 
@@ -1452,7 +1476,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
     int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
                      4 * InlineConstants::InstrCost;
 
-    Cost = std::min((int64_t)CostUpperBound, JTCost + Cost);
+    addCost(JTCost, (int64_t)CostUpperBound);
     return false;
   }
 
@@ -1473,7 +1497,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
   //   n + n / 2 - 1 = n * 3 / 2 - 1
   if (NumCaseCluster <= 3) {
     // Suppose a comparison includes one compare and one conditional branch.
-    Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
+    addCost(NumCaseCluster * 2 * InlineConstants::InstrCost);
     return false;
   }
 
@@ -1481,7 +1505,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
   int64_t SwitchCost =
       ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
 
-  Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost);
+  addCost(SwitchCost, (int64_t)CostUpperBound);
   return false;
 }
 
@@ -1574,7 +1598,7 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
     if (Base::visit(&*I))
       ++NumInstructionsSimplified;
     else
-      Cost += InlineConstants::InstrCost;
+      addCost(InlineConstants::InstrCost);
 
     using namespace ore;
     // If the visit this instruction detected an uninlinable pattern, abort.
@@ -1595,7 +1619,7 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
       if (ORE)
         ORE->emit([&]() {
           return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
-                                          CandidateCS.getInstruction())
+                                          &CandidateCall)
                  << NV("Callee", &F) << " has uninlinable pattern ("
                  << NV("InlineResult", IR.message)
                  << ") and cost is not fully computed";
@@ -1612,14 +1636,14 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
       if (ORE)
         ORE->emit([&]() {
           return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
-                                          CandidateCS.getInstruction())
+                                          &CandidateCall)
                  << NV("Callee", &F) << " is " << NV("InlineResult", IR.message)
                  << ". Cost is not fully computed";
         });
       return IR;
     }
 
-    // Check if we've past the maximum possible threshold so we don't spin in
+    // Check if we've passed the maximum possible threshold so we don't spin in
     // huge basic blocks that will never inline.
     if (Cost >= Threshold && !ComputeFullInlineCost)
       return false;
@@ -1676,7 +1700,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
 /// blocks to see if all their incoming edges are dead or not.
 void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
   auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
-    // A CFG edge is dead if the predecessor is dead or the predessor has a
+    // A CFG edge is dead if the predecessor is dead or the predecessor has a
     // known successor which is not the one under exam.
     return (DeadBlocks.count(Pred) ||
             (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
@@ -1712,7 +1736,7 @@ void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
 /// factors and heuristics. If this method returns false but the computed cost
 /// is below the computed threshold, then inlining was forcibly disabled by
 /// some artifact of the routine.
-InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
+InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
   ++NumCallsAnalyzed;
 
   // Perform some tweaks to the cost and threshold based on the direct
@@ -1729,7 +1753,7 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
   assert(NumVectorInstructions == 0);
 
   // Update the threshold based on callsite properties
-  updateThreshold(CS, F);
+  updateThreshold(Call, F);
 
   // While Threshold depends on commandline options that can take negative
   // values, we want to enforce the invariant that the computed threshold and
@@ -1745,7 +1769,7 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
 
   // Give out bonuses for the callsite, as the instructions setting them up
   // will be gone after inlining.
-  Cost -= getCallsiteCost(CS, DL);
+  addCost(-getCallsiteCost(Call, DL));
 
   // If this function uses the coldcc calling convention, prefer not to inline
   // it.
@@ -1759,14 +1783,11 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
   if (F.empty())
     return true;
 
-  Function *Caller = CS.getInstruction()->getFunction();
+  Function *Caller = Call.getFunction();
   // Check if the caller function is recursive itself.
   for (User *U : Caller->users()) {
-    CallSite Site(U);
-    if (!Site)
-      continue;
-    Instruction *I = Site.getInstruction();
-    if (I->getFunction() == Caller) {
+    CallBase *Call = dyn_cast<CallBase>(U);
+    if (Call && Call->getFunction() == Caller) {
       IsCallerRecursive = true;
       break;
     }
@@ -1774,10 +1795,10 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
 
   // Populate our simplified values by mapping from function arguments to call
   // arguments with known important simplifications.
-  CallSite::arg_iterator CAI = CS.arg_begin();
+  auto CAI = Call.arg_begin();
   for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
        FAI != FAE; ++FAI, ++CAI) {
-    assert(CAI != CS.arg_end());
+    assert(CAI != Call.arg_end());
     if (Constant *C = dyn_cast<Constant>(CAI))
       SimplifiedValues[&*FAI] = C;
 
@@ -1826,14 +1847,18 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
     if (BB->empty())
       continue;
 
-    // Disallow inlining a blockaddress. A blockaddress only has defined
-    // behavior for an indirect branch in the same function, and we do not
-    // currently support inlining indirect branches. But, the inliner may not
-    // see an indirect branch that ends up being dead code at a particular call
-    // site. If the blockaddress escapes the function, e.g., via a global
-    // variable, inlining may lead to an invalid cross-function reference.
+    // Disallow inlining a blockaddress with uses other than strictly callbr.
+    // A blockaddress only has defined behavior for an indirect branch in the
+    // same function, and we do not currently support inlining indirect
+    // branches.  But, the inliner may not see an indirect branch that ends up
+    // being dead code at a particular call site. If the blockaddress escapes
+    // the function, e.g., via a global variable, inlining may lead to an
+    // invalid cross-function reference.
+    // FIXME: pr/39560: continue relaxing this overt restriction.
     if (BB->hasAddressTaken())
-      return "blockaddress";
+      for (User *U : BlockAddress::get(&*BB)->users())
+        if (!isa<CallBrInst>(*U))
+          return "blockaddress used outside of callbr";
 
     // Analyze the cost of this block. If we blow through the threshold, this
     // returns false, and we can bail on out.
@@ -1887,7 +1912,7 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
   }
 
   bool OnlyOneCallAndLocalLinkage =
-      F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
+      F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction();
   // If this is a noduplicate call, we can still inline as long as
   // inlining this would cause the removal of the caller (so the instruction
   // is not actually duplicated, just moved).
@@ -1899,7 +1924,7 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
   // size, we penalise any call sites that perform loops. We do this after all
   // other costs here, so will likely only be dealing with relatively small
   // functions (and hence DT and LI will hopefully be cheap).
-  if (Caller->optForMinSize()) {
+  if (Caller->hasMinSize()) {
     DominatorTree DT(F);
     LoopInfo LI(DT);
     int NumLoops = 0;
@@ -1909,7 +1934,7 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
         continue;
       NumLoops++;
     }
-    Cost += NumLoops * InlineConstants::CallPenalty;
+    addCost(NumLoops * InlineConstants::CallPenalty);
   }
 
   // We applied the maximum possible vector bonus at the beginning. Now,
@@ -1953,13 +1978,13 @@ static bool functionsHaveCompatibleAttributes(Function *Caller,
          AttributeFuncs::areInlineCompatible(*Caller, *Callee);
 }
 
-int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) {
+int llvm::getCallsiteCost(CallBase &Call, const DataLayout &DL) {
   int Cost = 0;
-  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
-    if (CS.isByValArgument(I)) {
+  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
+    if (Call.isByValArgument(I)) {
       // We approximate the number of loads and stores needed by dividing the
       // size of the byval type by the target's pointer size.
-      PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
+      PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
       unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
       unsigned AS = PTy->getAddressSpace();
       unsigned PointerSize = DL.getPointerSizeInBits(AS);
@@ -1987,16 +2012,16 @@ int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) {
 }
 
 InlineCost llvm::getInlineCost(
-    CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
+    CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
     Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
     ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
-  return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
+  return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
                        GetAssumptionCache, GetBFI, PSI, ORE);
 }
 
 InlineCost llvm::getInlineCost(
-    CallSite CS, Function *Callee, const InlineParams &Params,
+    CallBase &Call, Function *Callee, const InlineParams &Params,
     TargetTransformInfo &CalleeTTI,
     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
     Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
@@ -2012,9 +2037,9 @@ InlineCost llvm::getInlineCost(
   // argument is in the alloca address space (so it is a little bit complicated
   // to solve).
   unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
-  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I)
-    if (CS.isByValArgument(I)) {
-      PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
+  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I)
+    if (Call.isByValArgument(I)) {
+      PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
       if (PTy->getAddressSpace() != AllocaAS)
         return llvm::InlineCost::getNever("byval arguments without alloca"
                                           " address space");
@@ -2022,20 +2047,21 @@ InlineCost llvm::getInlineCost(
 
   // Calls to functions with always-inline attributes should be inlined
   // whenever possible.
-  if (CS.hasFnAttr(Attribute::AlwaysInline)) {
-    if (isInlineViable(*Callee))
+  if (Call.hasFnAttr(Attribute::AlwaysInline)) {
+    auto IsViable = isInlineViable(*Callee);
+    if (IsViable)
       return llvm::InlineCost::getAlways("always inline attribute");
-    return llvm::InlineCost::getNever("inapplicable always inline attribute");
+    return llvm::InlineCost::getNever(IsViable.message);
   }
 
   // Never inline functions with conflicting attributes (unless callee has
   // always-inline attribute).
-  Function *Caller = CS.getCaller();
+  Function *Caller = Call.getCaller();
   if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
     return llvm::InlineCost::getNever("conflicting attributes");
 
   // Don't inline this call if the caller has the optnone attribute.
-  if (Caller->hasFnAttribute(Attribute::OptimizeNone))
+  if (Caller->hasOptNone())
     return llvm::InlineCost::getNever("optnone attribute");
 
   // Don't inline a function that treats null pointer as valid into a caller
@@ -2052,15 +2078,15 @@ InlineCost llvm::getInlineCost(
     return llvm::InlineCost::getNever("noinline function attribute");
 
   // Don't inline call sites marked noinline.
-  if (CS.isNoInline())
+  if (Call.isNoInline())
     return llvm::InlineCost::getNever("noinline call site attribute");
 
   LLVM_DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
                           << "... (caller:" << Caller->getName() << ")\n");
 
-  CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS,
-                  Params);
-  InlineResult ShouldInline = CA.analyzeCall(CS);
+  CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee,
+                  Call, Params);
+  InlineResult ShouldInline = CA.analyzeCall(Call);
 
   LLVM_DEBUG(CA.dump());
 
@@ -2073,42 +2099,50 @@ InlineCost llvm::getInlineCost(
   return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
 }
 
-bool llvm::isInlineViable(Function &F) {
+InlineResult llvm::isInlineViable(Function &F) {
   bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
-    // Disallow inlining of functions which contain indirect branches or
-    // blockaddresses.
-    if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
-      return false;
+    // Disallow inlining of functions which contain indirect branches.
+    if (isa<IndirectBrInst>(BI->getTerminator()))
+      return "contains indirect branches";
+
+    // Disallow inlining of blockaddresses which are used by non-callbr
+    // instructions.
+    if (BI->hasAddressTaken())
+      for (User *U : BlockAddress::get(&*BI)->users())
+        if (!isa<CallBrInst>(*U))
+          return "blockaddress used outside of callbr";
 
     for (auto &II : *BI) {
-      CallSite CS(&II);
-      if (!CS)
+      CallBase *Call = dyn_cast<CallBase>(&II);
+      if (!Call)
         continue;
 
       // Disallow recursive calls.
-      if (&F == CS.getCalledFunction())
-        return false;
+      if (&F == Call->getCalledFunction())
+        return "recursive call";
 
       // Disallow calls which expose returns-twice to a function not previously
       // attributed as such.
-      if (!ReturnsTwice && CS.isCall() &&
-          cast<CallInst>(CS.getInstruction())->canReturnTwice())
-        return false;
+      if (!ReturnsTwice && isa<CallInst>(Call) &&
+          cast<CallInst>(Call)->canReturnTwice())
+        return "exposes returns-twice attribute";
 
-      if (CS.getCalledFunction())
-        switch (CS.getCalledFunction()->getIntrinsicID()) {
+      if (Call->getCalledFunction())
+        switch (Call->getCalledFunction()->getIntrinsicID()) {
         default:
           break;
         // Disallow inlining of @llvm.icall.branch.funnel because current
         // backend can't separate call targets from call arguments.
         case llvm::Intrinsic::icall_branch_funnel:
+          return "disallowed inlining of @llvm.icall.branch.funnel";
         // Disallow inlining functions that call @llvm.localescape. Doing this
         // correctly would require major changes to the inliner.
         case llvm::Intrinsic::localescape:
+          return "disallowed inlining of @llvm.localescape";
         // Disallow inlining of functions that initialize VarArgs with va_start.
         case llvm::Intrinsic::vastart:
-          return false;
+          return "contains VarArgs initialized with va_start";
         }
     }
   }
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp
index 95ab6ee3db5b..943a99a5f46d 100644
--- a/lib/Analysis/InstCount.cpp
+++ b/lib/Analysis/InstCount.cpp
@@ -1,9 +1,8 @@
 //===-- InstCount.cpp - Collects the count of all instructions ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/InstructionPrecedenceTracking.cpp b/lib/Analysis/InstructionPrecedenceTracking.cpp
index 816126f407ca..35190ce3e11a 100644
--- a/lib/Analysis/InstructionPrecedenceTracking.cpp
+++ b/lib/Analysis/InstructionPrecedenceTracking.cpp
@@ -1,9 +1,8 @@
 //===-- InstructionPrecedenceTracking.cpp -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Implements a class that is able to define some instructions as "special"
@@ -20,6 +19,7 @@
 
 #include "llvm/Analysis/InstructionPrecedenceTracking.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/PatternMatch.h"
 
 using namespace llvm;
 
@@ -153,5 +153,8 @@ bool ImplicitControlFlowTracking::isSpecialInstruction(
 
 bool MemoryWriteTracking::isSpecialInstruction(
     const Instruction *Insn) const {
+  using namespace PatternMatch;
+  if (match(Insn, m_Intrinsic<Intrinsic::experimental_widenable_condition>()))
+    return false;
   return Insn->mayWriteToMemory();
 }
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index ccf907c144f0..e34bf6f4e43f 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -1,9 +1,8 @@
 //===- InstructionSimplify.cpp - Fold instruction operands ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -34,6 +33,8 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/ValueHandle.h"
@@ -50,6 +51,9 @@ STATISTIC(NumExpand,  "Number of expansions");
 STATISTIC(NumReassoc, "Number of reassociations");
 
 static Value *SimplifyAndInst(Value *, Value *, const SimplifyQuery &, unsigned);
+static Value *simplifyUnOp(unsigned, Value *, const SimplifyQuery &, unsigned);
+static Value *simplifyFPUnOp(unsigned, Value *, const FastMathFlags &,
+                             const SimplifyQuery &, unsigned);
 static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &,
                             unsigned);
 static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &,
@@ -655,32 +659,11 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
   Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType();
   APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth());
 
-  // Even though we don't look through PHI nodes, we could be called on an
-  // instruction in an unreachable block, which may be on a cycle.
-  SmallPtrSet<Value *, 4> Visited;
-  Visited.insert(V);
-  do {
-    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
-      if ((!AllowNonInbounds && !GEP->isInBounds()) ||
-          !GEP->accumulateConstantOffset(DL, Offset))
-        break;
-      V = GEP->getPointerOperand();
-    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
-      V = cast<Operator>(V)->getOperand(0);
-    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
-      if (GA->isInterposable())
-        break;
-      V = GA->getAliasee();
-    } else {
-      if (auto CS = CallSite(V))
-        if (Value *RV = CS.getReturnedArgOperand()) {
-          V = RV;
-          continue;
-        }
-      break;
-    }
-    assert(V->getType()->isPtrOrPtrVectorTy() && "Unexpected operand type!");
-  } while (Visited.insert(V).second);
+  V = V->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds);
+  // As that strip may trace through `addrspacecast`, need to sext or trunc
+  // the offset calculated.
+  IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType();
+  Offset = Offset.sextOrTrunc(IntPtrTy->getIntegerBitWidth());
 
   Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset);
   if (V->getType()->isVectorTy())
@@ -1841,6 +1824,16 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
       return Op1;
   }
 
+  // This is a similar pattern used for checking if a value is a power-of-2:
+  // (A - 1) & A --> 0 (if A is a power-of-2 or 0)
+  // A & (A - 1) --> 0 (if A is a power-of-2 or 0)
+  if (match(Op0, m_Add(m_Specific(Op1), m_AllOnes())) &&
+      isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
+    return Constant::getNullValue(Op1->getType());
+  if (match(Op1, m_Add(m_Specific(Op0), m_AllOnes())) &&
+      isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
+    return Constant::getNullValue(Op0->getType());
+
   if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, true))
     return V;
 
@@ -2280,12 +2273,12 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
     // come from a pointer that cannot overlap with dynamically-allocated
     // memory within the lifetime of the current function (allocas, byval
     // arguments, globals), then determine the comparison result here.
-    SmallVector<Value *, 8> LHSUObjs, RHSUObjs;
+    SmallVector<const Value *, 8> LHSUObjs, RHSUObjs;
     GetUnderlyingObjects(LHS, LHSUObjs, DL);
     GetUnderlyingObjects(RHS, RHSUObjs, DL);
 
     // Is the set of underlying objects all noalias calls?
-    auto IsNAC = [](ArrayRef<Value *> Objects) {
+    auto IsNAC = [](ArrayRef<const Value *> Objects) {
       return all_of(Objects, isNoAliasCall);
     };
 
@@ -2295,8 +2288,8 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
     // live with the compared-to allocation). For globals, we exclude symbols
     // that might be resolve lazily to symbols in another dynamically-loaded
     // library (and, thus, could be malloc'ed by the implementation).
-    auto IsAllocDisjoint = [](ArrayRef<Value *> Objects) {
-      return all_of(Objects, [](Value *V) {
+    auto IsAllocDisjoint = [](ArrayRef<const Value *> Objects) {
+      return all_of(Objects, [](const Value *V) {
         if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
           return AI->getParent() && AI->getFunction() && AI->isStaticAlloca();
         if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
@@ -2472,228 +2465,6 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
   return nullptr;
 }
 
-/// Many binary operators with a constant operand have an easy-to-compute
-/// range of outputs. This can be used to fold a comparison to always true or
-/// always false.
-static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper,
-                              const InstrInfoQuery &IIQ) {
-  unsigned Width = Lower.getBitWidth();
-  const APInt *C;
-  switch (BO.getOpcode()) {
-  case Instruction::Add:
-    if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
-      // FIXME: If we have both nuw and nsw, we should reduce the range further.
-      if (IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(&BO))) {
-        // 'add nuw x, C' produces [C, UINT_MAX].
-        Lower = *C;
-      } else if (IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(&BO))) {
-        if (C->isNegative()) {
-          // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
-          Lower = APInt::getSignedMinValue(Width);
-          Upper = APInt::getSignedMaxValue(Width) + *C + 1;
-        } else {
-          // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX].
-          Lower = APInt::getSignedMinValue(Width) + *C;
-          Upper = APInt::getSignedMaxValue(Width) + 1;
-        }
-      }
-    }
-    break;
-
-  case Instruction::And:
-    if (match(BO.getOperand(1), m_APInt(C)))
-      // 'and x, C' produces [0, C].
-      Upper = *C + 1;
-    break;
-
-  case Instruction::Or:
-    if (match(BO.getOperand(1), m_APInt(C)))
-      // 'or x, C' produces [C, UINT_MAX].
-      Lower = *C;
-    break;
-
-  case Instruction::AShr:
-    if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
-      // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C].
-      Lower = APInt::getSignedMinValue(Width).ashr(*C);
-      Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1;
-    } else if (match(BO.getOperand(0), m_APInt(C))) {
-      unsigned ShiftAmount = Width - 1;
-      if (!C->isNullValue() && IIQ.isExact(&BO))
-        ShiftAmount = C->countTrailingZeros();
-      if (C->isNegative()) {
-        // 'ashr C, x' produces [C, C >> (Width-1)]
-        Lower = *C;
-        Upper = C->ashr(ShiftAmount) + 1;
-      } else {
-        // 'ashr C, x' produces [C >> (Width-1), C]
-        Lower = C->ashr(ShiftAmount);
-        Upper = *C + 1;
-      }
-    }
-    break;
-
-  case Instruction::LShr:
-    if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
-      // 'lshr x, C' produces [0, UINT_MAX >> C].
-      Upper = APInt::getAllOnesValue(Width).lshr(*C) + 1;
-    } else if (match(BO.getOperand(0), m_APInt(C))) {
-      // 'lshr C, x' produces [C >> (Width-1), C].
-      unsigned ShiftAmount = Width - 1;
-      if (!C->isNullValue() && IIQ.isExact(&BO))
-        ShiftAmount = C->countTrailingZeros();
-      Lower = C->lshr(ShiftAmount);
-      Upper = *C + 1;
-    }
-    break;
-
-  case Instruction::Shl:
-    if (match(BO.getOperand(0), m_APInt(C))) {
-      if (IIQ.hasNoUnsignedWrap(&BO)) {
-        // 'shl nuw C, x' produces [C, C << CLZ(C)]
-        Lower = *C;
-        Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
-      } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw?
-        if (C->isNegative()) {
-          // 'shl nsw C, x' produces [C << CLO(C)-1, C]
-          unsigned ShiftAmount = C->countLeadingOnes() - 1;
-          Lower = C->shl(ShiftAmount);
-          Upper = *C + 1;
-        } else {
-          // 'shl nsw C, x' produces [C, C << CLZ(C)-1]
-          unsigned ShiftAmount = C->countLeadingZeros() - 1;
-          Lower = *C;
-          Upper = C->shl(ShiftAmount) + 1;
-        }
-      }
-    }
-    break;
-
-  case Instruction::SDiv:
-    if (match(BO.getOperand(1), m_APInt(C))) {
-      APInt IntMin = APInt::getSignedMinValue(Width);
-      APInt IntMax = APInt::getSignedMaxValue(Width);
-      if (C->isAllOnesValue()) {
-        // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
-        //    where C != -1 and C != 0 and C != 1
-        Lower = IntMin + 1;
-        Upper = IntMax + 1;
-      } else if (C->countLeadingZeros() < Width - 1) {
-        // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C]
-        //    where C != -1 and C != 0 and C != 1
-        Lower = IntMin.sdiv(*C);
-        Upper = IntMax.sdiv(*C);
-        if (Lower.sgt(Upper))
-          std::swap(Lower, Upper);
-        Upper = Upper + 1;
-        assert(Upper != Lower && "Upper part of range has wrapped!");
-      }
-    } else if (match(BO.getOperand(0), m_APInt(C))) {
-      if (C->isMinSignedValue()) {
-        // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
-        Lower = *C;
-        Upper = Lower.lshr(1) + 1;
-      } else {
-        // 'sdiv C, x' produces [-|C|, |C|].
-        Upper = C->abs() + 1;
-        Lower = (-Upper) + 1;
-      }
-    }
-    break;
-
-  case Instruction::UDiv:
-    if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
-      // 'udiv x, C' produces [0, UINT_MAX / C].
-      Upper = APInt::getMaxValue(Width).udiv(*C) + 1;
-    } else if (match(BO.getOperand(0), m_APInt(C))) {
-      // 'udiv C, x' produces [0, C].
-      Upper = *C + 1;
-    }
-    break;
-
-  case Instruction::SRem:
-    if (match(BO.getOperand(1), m_APInt(C))) {
-      // 'srem x, C' produces (-|C|, |C|).
-      Upper = C->abs();
-      Lower = (-Upper) + 1;
-    }
-    break;
-
-  case Instruction::URem:
-    if (match(BO.getOperand(1), m_APInt(C)))
-      // 'urem x, C' produces [0, C).
-      Upper = *C;
-    break;
-
-  default:
-    break;
-  }
-}
-
-/// Some intrinsics with a constant operand have an easy-to-compute range of
-/// outputs. This can be used to fold a comparison to always true or always
-/// false.
-static void setLimitsForIntrinsic(IntrinsicInst &II, APInt &Lower,
-                                  APInt &Upper) {
-  unsigned Width = Lower.getBitWidth();
-  const APInt *C;
-  switch (II.getIntrinsicID()) {
-  case Intrinsic::uadd_sat:
-    // uadd.sat(x, C) produces [C, UINT_MAX].
-    if (match(II.getOperand(0), m_APInt(C)) ||
-        match(II.getOperand(1), m_APInt(C)))
-      Lower = *C;
-    break;
-  case Intrinsic::sadd_sat:
-    if (match(II.getOperand(0), m_APInt(C)) ||
-        match(II.getOperand(1), m_APInt(C))) {
-      if (C->isNegative()) {
-        // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)].
-        Lower = APInt::getSignedMinValue(Width);
-        Upper = APInt::getSignedMaxValue(Width) + *C + 1;
-      } else {
-        // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX].
-        Lower = APInt::getSignedMinValue(Width) + *C;
-        Upper = APInt::getSignedMaxValue(Width) + 1;
-      }
-    }
-    break;
-  case Intrinsic::usub_sat:
-    // usub.sat(C, x) produces [0, C].
-    if (match(II.getOperand(0), m_APInt(C)))
-      Upper = *C + 1;
-    // usub.sat(x, C) produces [0, UINT_MAX - C].
-    else if (match(II.getOperand(1), m_APInt(C)))
-      Upper = APInt::getMaxValue(Width) - *C + 1;
-    break;
-  case Intrinsic::ssub_sat:
-    if (match(II.getOperand(0), m_APInt(C))) {
-      if (C->isNegative()) {
-        // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)].
-        Lower = APInt::getSignedMinValue(Width);
-        Upper = *C - APInt::getSignedMinValue(Width) + 1;
-      } else {
-        // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX].
-        Lower = *C - APInt::getSignedMaxValue(Width);
-        Upper = APInt::getSignedMaxValue(Width) + 1;
-      }
-    } else if (match(II.getOperand(1), m_APInt(C))) {
-      if (C->isNegative()) {
-        // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]:
-        Lower = APInt::getSignedMinValue(Width) - *C;
-        Upper = APInt::getSignedMaxValue(Width) + 1;
-      } else {
-        // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C].
-        Lower = APInt::getSignedMinValue(Width);
-        Upper = APInt::getSignedMaxValue(Width) - *C + 1;
-      }
-    }
-    break;
-  default:
-    break;
-  }
-}
-
 static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
                                        Value *RHS, const InstrInfoQuery &IIQ) {
   Type *ITy = GetCompareTy(RHS); // The return type.
@@ -2721,22 +2492,7 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
   if (RHS_CR.isFullSet())
     return ConstantInt::getTrue(ITy);
 
-  // Find the range of possible values for binary operators.
-  unsigned Width = C->getBitWidth();
-  APInt Lower = APInt(Width, 0);
-  APInt Upper = APInt(Width, 0);
-  if (auto *BO = dyn_cast<BinaryOperator>(LHS))
-    setLimitsForBinOp(*BO, Lower, Upper, IIQ);
-  else if (auto *II = dyn_cast<IntrinsicInst>(LHS))
-    setLimitsForIntrinsic(*II, Lower, Upper);
-
-  ConstantRange LHS_CR =
-      Lower != Upper ? ConstantRange(Lower, Upper) : ConstantRange(Width, true);
-
-  if (auto *I = dyn_cast<Instruction>(LHS))
-    if (auto *Ranges = IIQ.getMetadata(I, LLVMContext::MD_range))
-      LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges));
-
+  ConstantRange LHS_CR = computeConstantRange(LHS, IIQ.UseInstrInfo);
   if (!LHS_CR.isFullSet()) {
     if (RHS_CR.contains(LHS_CR))
       return ConstantInt::getTrue(ITy);
@@ -3062,44 +2818,6 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
   return nullptr;
 }
 
-static Value *simplifyICmpWithAbsNabs(CmpInst::Predicate Pred, Value *Op0,
-                                      Value *Op1) {
-  // We need a comparison with a constant.
-  const APInt *C;
-  if (!match(Op1, m_APInt(C)))
-    return nullptr;
-
-  // matchSelectPattern returns the negation part of an abs pattern in SP1.
-  // If the negate has an NSW flag, abs(INT_MIN) is undefined. Without that
-  // constraint, we can't make a contiguous range for the result of abs.
-  ICmpInst::Predicate AbsPred = ICmpInst::BAD_ICMP_PREDICATE;
-  Value *SP0, *SP1;
-  SelectPatternFlavor SPF = matchSelectPattern(Op0, SP0, SP1).Flavor;
-  if (SPF == SelectPatternFlavor::SPF_ABS &&
-      cast<Instruction>(SP1)->hasNoSignedWrap())
-    // The result of abs(X) is >= 0 (with nsw).
-    AbsPred = ICmpInst::ICMP_SGE;
-  if (SPF == SelectPatternFlavor::SPF_NABS)
-    // The result of -abs(X) is <= 0.
-    AbsPred = ICmpInst::ICMP_SLE;
-
-  if (AbsPred == ICmpInst::BAD_ICMP_PREDICATE)
-    return nullptr;
-
-  // If there is no intersection between abs/nabs and the range of this icmp,
-  // the icmp must be false. If the abs/nabs range is a subset of the icmp
-  // range, the icmp must be true.
-  APInt Zero = APInt::getNullValue(C->getBitWidth());
-  ConstantRange AbsRange = ConstantRange::makeExactICmpRegion(AbsPred, Zero);
-  ConstantRange CmpRange = ConstantRange::makeExactICmpRegion(Pred, *C);
-  if (AbsRange.intersectWith(CmpRange).isEmptySet())
-    return getFalse(GetCompareTy(Op0));
-  if (CmpRange.contains(AbsRange))
-    return getTrue(GetCompareTy(Op0));
-
-  return nullptr;
-}
-
 /// Simplify integer comparisons where at least one operand of the compare
 /// matches an integer min/max idiom.
 static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
@@ -3319,9 +3037,16 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     std::swap(LHS, RHS);
     Pred = CmpInst::getSwappedPredicate(Pred);
   }
+  assert(!isa<UndefValue>(LHS) && "Unexpected icmp undef,%X");
 
   Type *ITy = GetCompareTy(LHS); // The return type.
 
+  // For EQ and NE, we can always pick a value for the undef to make the
+  // predicate pass or fail, so we can return undef.
+  // Matches behavior in llvm::ConstantFoldCompareInstruction.
+  if (isa<UndefValue>(RHS) && ICmpInst::isEquality(Pred))
+    return UndefValue::get(ITy);
+
   // icmp X, X -> true/false
   // icmp X, undef -> true/false because undef could be X.
   if (LHS == RHS || isa<UndefValue>(RHS))
@@ -3531,9 +3256,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   if (Value *V = simplifyICmpWithMinMax(Pred, LHS, RHS, Q, MaxRecurse))
     return V;
 
-  if (Value *V = simplifyICmpWithAbsNabs(Pred, LHS, RHS))
-    return V;
-
   // Simplify comparisons of related pointers using a powerful, recursive
   // GEP-walk when we have target data available..
   if (LHS->getType()->isPointerTy())
@@ -3647,6 +3369,8 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   }
 
   // Handle fcmp with constant RHS.
+  // TODO: Use match with a specific FP value, so these work with vectors with
+  // undef lanes.
   const APFloat *C;
   if (match(RHS, m_APFloat(C))) {
     // Check whether the constant is an infinity.
@@ -3675,28 +3399,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         }
       }
     }
-    if (C->isZero()) {
-      switch (Pred) {
-      case FCmpInst::FCMP_OGE:
-        if (FMF.noNaNs() && CannotBeOrderedLessThanZero(LHS, Q.TLI))
-          return getTrue(RetTy);
-        break;
-      case FCmpInst::FCMP_UGE:
-        if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
-          return getTrue(RetTy);
-        break;
-      case FCmpInst::FCMP_ULT:
-        if (FMF.noNaNs() && CannotBeOrderedLessThanZero(LHS, Q.TLI))
-          return getFalse(RetTy);
-        break;
-      case FCmpInst::FCMP_OLT:
-        if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
-          return getFalse(RetTy);
-        break;
-      default:
-        break;
-      }
-    } else if (C->isNegative()) {
+    if (C->isNegative() && !C->isNegZero()) {
       assert(!C->isNaN() && "Unexpected NaN constant!");
       // TODO: We can catch more cases by using a range check rather than
       //       relying on CannotBeOrderedLessThanZero.
@@ -3719,6 +3422,67 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         break;
       }
     }
+
+    // Check comparison of [minnum/maxnum with constant] with other constant.
+    const APFloat *C2;
+    if ((match(LHS, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_APFloat(C2))) &&
+         C2->compare(*C) == APFloat::cmpLessThan) ||
+        (match(LHS, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_APFloat(C2))) &&
+         C2->compare(*C) == APFloat::cmpGreaterThan)) {
+      bool IsMaxNum =
+          cast<IntrinsicInst>(LHS)->getIntrinsicID() == Intrinsic::maxnum;
+      // The ordered relationship and minnum/maxnum guarantee that we do not
+      // have NaN constants, so ordered/unordered preds are handled the same.
+      switch (Pred) {
+      case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_UEQ:
+        // minnum(X, LesserC)  == C --> false
+        // maxnum(X, GreaterC) == C --> false
+        return getFalse(RetTy);
+      case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_UNE:
+        // minnum(X, LesserC)  != C --> true
+        // maxnum(X, GreaterC) != C --> true
+        return getTrue(RetTy);
+      case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_UGE:
+      case FCmpInst::FCMP_OGT: case FCmpInst::FCMP_UGT:
+        // minnum(X, LesserC)  >= C --> false
+        // minnum(X, LesserC)  >  C --> false
+        // maxnum(X, GreaterC) >= C --> true
+        // maxnum(X, GreaterC) >  C --> true
+        return ConstantInt::get(RetTy, IsMaxNum);
+      case FCmpInst::FCMP_OLE: case FCmpInst::FCMP_ULE:
+      case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_ULT:
+        // minnum(X, LesserC)  <= C --> true
+        // minnum(X, LesserC)  <  C --> true
+        // maxnum(X, GreaterC) <= C --> false
+        // maxnum(X, GreaterC) <  C --> false
+        return ConstantInt::get(RetTy, !IsMaxNum);
+      default:
+        // TRUE/FALSE/ORD/UNO should be handled before this.
+        llvm_unreachable("Unexpected fcmp predicate");
+      }
+    }
+  }
+
+  if (match(RHS, m_AnyZeroFP())) {
+    switch (Pred) {
+    case FCmpInst::FCMP_OGE:
+    case FCmpInst::FCMP_ULT:
+      // Positive or zero X >= 0.0 --> true
+      // Positive or zero X <  0.0 --> false
+      if ((FMF.noNaNs() || isKnownNeverNaN(LHS, Q.TLI)) &&
+          CannotBeOrderedLessThanZero(LHS, Q.TLI))
+        return Pred == FCmpInst::FCMP_OGE ? getTrue(RetTy) : getFalse(RetTy);
+      break;
+    case FCmpInst::FCMP_UGE:
+    case FCmpInst::FCMP_OLT:
+      // Positive or zero or nan X >= 0.0 --> true
+      // Positive or zero or nan X <  0.0 --> false
+      if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
+        return Pred == FCmpInst::FCMP_UGE ? getTrue(RetTy) : getFalse(RetTy);
+      break;
+    default:
+      break;
+    }
   }
 
   // If the comparison is with the result of a select instruction, check whether
@@ -3904,27 +3668,44 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
                                            Pred == ICmpInst::ICMP_EQ))
         return V;
 
-    // Test for zero-shift-guard-ops around funnel shifts. These are used to
-    // avoid UB from oversized shifts in raw IR rotate patterns, but the
-    // intrinsics do not have that problem.
+    // Test for a bogus zero-shift-guard-op around funnel-shift or rotate.
     Value *ShAmt;
     auto isFsh = m_CombineOr(m_Intrinsic<Intrinsic::fshl>(m_Value(X), m_Value(),
                                                           m_Value(ShAmt)),
                              m_Intrinsic<Intrinsic::fshr>(m_Value(), m_Value(X),
                                                           m_Value(ShAmt)));
-    // (ShAmt != 0) ? fshl(X, *, ShAmt) : X --> fshl(X, *, ShAmt)
-    // (ShAmt != 0) ? fshr(*, X, ShAmt) : X --> fshr(*, X, ShAmt)
     // (ShAmt == 0) ? fshl(X, *, ShAmt) : X --> X
     // (ShAmt == 0) ? fshr(*, X, ShAmt) : X --> X
-    if (match(TrueVal, isFsh) && FalseVal == X && CmpLHS == ShAmt)
-      return Pred == ICmpInst::ICMP_NE ? TrueVal : X;
-
-    // (ShAmt == 0) ? X : fshl(X, *, ShAmt) --> fshl(X, *, ShAmt)
-    // (ShAmt == 0) ? X : fshr(*, X, ShAmt) --> fshr(*, X, ShAmt)
+    if (match(TrueVal, isFsh) && FalseVal == X && CmpLHS == ShAmt &&
+        Pred == ICmpInst::ICMP_EQ)
+      return X;
     // (ShAmt != 0) ? X : fshl(X, *, ShAmt) --> X
     // (ShAmt != 0) ? X : fshr(*, X, ShAmt) --> X
-    if (match(FalseVal, isFsh) && TrueVal == X && CmpLHS == ShAmt)
-      return Pred == ICmpInst::ICMP_EQ ? FalseVal : X;
+    if (match(FalseVal, isFsh) && TrueVal == X && CmpLHS == ShAmt &&
+        Pred == ICmpInst::ICMP_NE)
+      return X;
+
+    // Test for a zero-shift-guard-op around rotates. These are used to
+    // avoid UB from oversized shifts in raw IR rotate patterns, but the
+    // intrinsics do not have that problem.
+    // We do not allow this transform for the general funnel shift case because
+    // that would not preserve the poison safety of the original code.
+    auto isRotate = m_CombineOr(m_Intrinsic<Intrinsic::fshl>(m_Value(X),
+                                                             m_Deferred(X),
+                                                             m_Value(ShAmt)),
+                                m_Intrinsic<Intrinsic::fshr>(m_Value(X),
+                                                             m_Deferred(X),
+                                                             m_Value(ShAmt)));
+    // (ShAmt != 0) ? fshl(X, X, ShAmt) : X --> fshl(X, X, ShAmt)
+    // (ShAmt != 0) ? fshr(X, X, ShAmt) : X --> fshr(X, X, ShAmt)
+    if (match(TrueVal, isRotate) && FalseVal == X && CmpLHS == ShAmt &&
+        Pred == ICmpInst::ICMP_NE)
+      return TrueVal;
+    // (ShAmt == 0) ? X : fshl(X, X, ShAmt) --> fshl(X, X, ShAmt)
+    // (ShAmt == 0) ? X : fshr(X, X, ShAmt) --> fshr(X, X, ShAmt)
+    if (match(FalseVal, isRotate) && TrueVal == X && CmpLHS == ShAmt &&
+        Pred == ICmpInst::ICMP_EQ)
+      return FalseVal;
   }
 
   // Check for other compares that behave like bit test.
@@ -4218,6 +3999,17 @@ Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx,
   if (isa<UndefValue>(Idx))
     return UndefValue::get(Vec->getType());
 
+  // Inserting an undef scalar? Assume it is the same value as the existing
+  // vector element.
+  if (isa<UndefValue>(Val))
+    return Vec;
+
+  // If we are extracting a value from a vector, then inserting it into the same
+  // place, that's the input vector:
+  // insertelt Vec, (extractelt Vec, Idx), Idx --> Vec
+  if (match(Val, m_ExtractElement(m_Specific(Vec), m_Specific(Idx))))
+    return Vec;
+
   return nullptr;
 }
 
@@ -4495,6 +4287,33 @@ Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
   return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit);
 }
 
+static Constant *foldConstant(Instruction::UnaryOps Opcode,
+                              Value *&Op, const SimplifyQuery &Q) {
+  if (auto *C = dyn_cast<Constant>(Op))
+    return ConstantFoldUnaryOpOperand(Opcode, C, Q.DL);
+  return nullptr;
+}
+
+/// Given the operand for an FNeg, see if we can fold the result.  If not, this
+/// returns null.
+static Value *simplifyFNegInst(Value *Op, FastMathFlags FMF,
+                               const SimplifyQuery &Q, unsigned MaxRecurse) {
+  if (Constant *C = foldConstant(Instruction::FNeg, Op, Q))
+    return C;
+
+  Value *X;
+  // fneg (fneg X) ==> X
+  if (match(Op, m_FNeg(m_Value(X))))
+    return X;
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyFNegInst(Value *Op, FastMathFlags FMF,
+                              const SimplifyQuery &Q) {
+  return ::simplifyFNegInst(Op, FMF, Q, RecursionLimit);
+}
+
 static Constant *propagateNaN(Constant *In) {
   // If the input is a vector with undef elements, just return a default NaN.
   if (!In->isNaN())
@@ -4536,16 +4355,22 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
       (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
     return Op0;
 
-  // With nnan: (+/-0.0 - X) + X --> 0.0 (and commuted variant)
+  // With nnan: -X + X --> 0.0 (and commuted variant)
   // We don't have to explicitly exclude infinities (ninf): INF + -INF == NaN.
   // Negative zeros are allowed because we always end up with positive zero:
   // X = -0.0: (-0.0 - (-0.0)) + (-0.0) == ( 0.0) + (-0.0) == 0.0
   // X = -0.0: ( 0.0 - (-0.0)) + (-0.0) == ( 0.0) + (-0.0) == 0.0
   // X =  0.0: (-0.0 - ( 0.0)) + ( 0.0) == (-0.0) + ( 0.0) == 0.0
   // X =  0.0: ( 0.0 - ( 0.0)) + ( 0.0) == ( 0.0) + ( 0.0) == 0.0
-  if (FMF.noNaNs() && (match(Op0, m_FSub(m_AnyZeroFP(), m_Specific(Op1))) ||
-                       match(Op1, m_FSub(m_AnyZeroFP(), m_Specific(Op0)))))
-    return ConstantFP::getNullValue(Op0->getType());
+  if (FMF.noNaNs()) {
+    if (match(Op0, m_FSub(m_AnyZeroFP(), m_Specific(Op1))) ||
+        match(Op1, m_FSub(m_AnyZeroFP(), m_Specific(Op0))))
+      return ConstantFP::getNullValue(Op0->getType());
+
+    if (match(Op0, m_FNeg(m_Specific(Op1))) ||
+        match(Op1, m_FNeg(m_Specific(Op0))))
+      return ConstantFP::getNullValue(Op0->getType());
+  }
 
   // (X - Y) + Y --> X
   // Y + (X - Y) --> X
@@ -4578,14 +4403,17 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
     return Op0;
 
   // fsub -0.0, (fsub -0.0, X) ==> X
+  // fsub -0.0, (fneg X) ==> X
   Value *X;
   if (match(Op0, m_NegZeroFP()) &&
-      match(Op1, m_FSub(m_NegZeroFP(), m_Value(X))))
+      match(Op1, m_FNeg(m_Value(X))))
     return X;
 
   // fsub 0.0, (fsub 0.0, X) ==> X if signed zeros are ignored.
+  // fsub 0.0, (fneg X) ==> X if signed zeros are ignored.
   if (FMF.noSignedZeros() && match(Op0, m_AnyZeroFP()) &&
-      match(Op1, m_FSub(m_AnyZeroFP(), m_Value(X))))
+      (match(Op1, m_FSub(m_AnyZeroFP(), m_Value(X))) ||
+       match(Op1, m_FNeg(m_Value(X)))))
     return X;
 
   // fsub nnan x, x ==> 0.0
@@ -4722,6 +4550,42 @@ Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
 
 //=== Helper functions for higher up the class hierarchy.
 
+/// Given the operand for a UnaryOperator, see if we can fold the result.
+/// If not, this returns null.
+static Value *simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q,
+                           unsigned MaxRecurse) {
+  switch (Opcode) {
+  case Instruction::FNeg:
+    return simplifyFNegInst(Op, FastMathFlags(), Q, MaxRecurse);
+  default:
+    llvm_unreachable("Unexpected opcode");
+  }
+}
+
+/// Given the operand for a UnaryOperator, see if we can fold the result.
+/// If not, this returns null.
+/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the
+/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp.
+static Value *simplifyFPUnOp(unsigned Opcode, Value *Op,
+                             const FastMathFlags &FMF,
+                             const SimplifyQuery &Q, unsigned MaxRecurse) {
+  switch (Opcode) {
+  case Instruction::FNeg:
+    return simplifyFNegInst(Op, FMF, Q, MaxRecurse);
+  default:
+    return simplifyUnOp(Opcode, Op, Q, MaxRecurse);
+  }
+}
+
+Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q) {
+  return ::simplifyUnOp(Opcode, Op, Q, RecursionLimit);
+}
+
+Value *llvm::SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
+                            const SimplifyQuery &Q) {
+  return ::simplifyFPUnOp(Opcode, Op, FMF, Q, RecursionLimit);
+}
+
 /// Given operands for a BinaryOperator, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
@@ -4885,22 +4749,6 @@ static Value *SimplifyRelativeLoad(Constant *Ptr, Constant *Offset,
   return ConstantExpr::getBitCast(LoadedLHSPtr, Int8PtrTy);
 }
 
-static bool maskIsAllZeroOrUndef(Value *Mask) {
-  auto *ConstMask = dyn_cast<Constant>(Mask);
-  if (!ConstMask)
-    return false;
-  if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
-    return true;
-  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
-       ++I) {
-    if (auto *MaskElt = ConstMask->getAggregateElement(I))
-      if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
-        continue;
-    return false;
-  }
-  return true;
-}
-
 static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
                                      const SimplifyQuery &Q) {
   // Idempotent functions return the same result when called repeatedly.
@@ -4941,8 +4789,32 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
   case Intrinsic::log2:
     // log2(exp2(x)) -> x
     if (Q.CxtI->hasAllowReassoc() &&
-        match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X)))) return X;
+        (match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X))) ||
+         match(Op0, m_Intrinsic<Intrinsic::pow>(m_SpecificFP(2.0),
+                                                m_Value(X))))) return X;
+    break;
+  case Intrinsic::log10:
+    // log10(pow(10.0, x)) -> x
+    if (Q.CxtI->hasAllowReassoc() &&
+        match(Op0, m_Intrinsic<Intrinsic::pow>(m_SpecificFP(10.0),
+                                               m_Value(X)))) return X;
     break;
+  case Intrinsic::floor:
+  case Intrinsic::trunc:
+  case Intrinsic::ceil:
+  case Intrinsic::round:
+  case Intrinsic::nearbyint:
+  case Intrinsic::rint: {
+    // floor (sitofp x) -> sitofp x
+    // floor (uitofp x) -> uitofp x
+    //
+    // Converting from int always results in a finite integral number or
+    // infinity. For either of those inputs, these rounding functions always
+    // return the same value, so the rounding can be eliminated.
+    if (match(Op0, m_SIToFP(m_Value())) || match(Op0, m_UIToFP(m_Value())))
+      return Op0;
+    break;
+  }
   default:
     break;
   }
@@ -4960,16 +4832,19 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
     // X - X -> { 0, false }
     if (Op0 == Op1)
       return Constant::getNullValue(ReturnType);
-    // X - undef -> undef
-    // undef - X -> undef
-    if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1))
-      return UndefValue::get(ReturnType);
-    break;
+    LLVM_FALLTHROUGH;
   case Intrinsic::uadd_with_overflow:
   case Intrinsic::sadd_with_overflow:
-    // X + undef -> undef
-    if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1))
-      return UndefValue::get(ReturnType);
+    // X - undef -> { undef, false }
+    // undef - X -> { undef, false }
+    // X + undef -> { undef, false }
+    // undef + x -> { undef, false }
+    if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1)) {
+      return ConstantStruct::get(
+          cast<StructType>(ReturnType),
+          {UndefValue::get(ReturnType->getStructElementType(0)),
+           Constant::getNullValue(ReturnType->getStructElementType(1))});
+    }
     break;
   case Intrinsic::umul_with_overflow:
   case Intrinsic::smul_with_overflow:
@@ -5085,26 +4960,28 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
   return nullptr;
 }
 
-template <typename IterTy>
-static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
-                                const SimplifyQuery &Q) {
+static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
+
   // Intrinsics with no operands have some kind of side effect. Don't simplify.
-  unsigned NumOperands = std::distance(ArgBegin, ArgEnd);
-  if (NumOperands == 0)
+  unsigned NumOperands = Call->getNumArgOperands();
+  if (!NumOperands)
     return nullptr;
 
+  Function *F = cast<Function>(Call->getCalledFunction());
   Intrinsic::ID IID = F->getIntrinsicID();
   if (NumOperands == 1)
-    return simplifyUnaryIntrinsic(F, ArgBegin[0], Q);
+    return simplifyUnaryIntrinsic(F, Call->getArgOperand(0), Q);
 
   if (NumOperands == 2)
-    return simplifyBinaryIntrinsic(F, ArgBegin[0], ArgBegin[1], Q);
+    return simplifyBinaryIntrinsic(F, Call->getArgOperand(0),
+                                   Call->getArgOperand(1), Q);
 
   // Handle intrinsics with 3 or more arguments.
   switch (IID) {
-  case Intrinsic::masked_load: {
-    Value *MaskArg = ArgBegin[2];
-    Value *PassthruArg = ArgBegin[3];
+  case Intrinsic::masked_load:
+  case Intrinsic::masked_gather: {
+    Value *MaskArg = Call->getArgOperand(2);
+    Value *PassthruArg = Call->getArgOperand(3);
     // If the mask is all zeros or undef, the "passthru" argument is the result.
     if (maskIsAllZeroOrUndef(MaskArg))
       return PassthruArg;
@@ -5112,7 +4989,8 @@ static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
   }
   case Intrinsic::fshl:
   case Intrinsic::fshr: {
-    Value *Op0 = ArgBegin[0], *Op1 = ArgBegin[1], *ShAmtArg = ArgBegin[2];
+    Value *Op0 = Call->getArgOperand(0), *Op1 = Call->getArgOperand(1),
+          *ShAmtArg = Call->getArgOperand(2);
 
     // If both operands are undef, the result is undef.
     if (match(Op0, m_Undef()) && match(Op1, m_Undef()))
@@ -5120,15 +4998,14 @@ static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
 
     // If shift amount is undef, assume it is zero.
     if (match(ShAmtArg, m_Undef()))
-      return ArgBegin[IID == Intrinsic::fshl ? 0 : 1];
+      return Call->getArgOperand(IID == Intrinsic::fshl ? 0 : 1);
 
     const APInt *ShAmtC;
     if (match(ShAmtArg, m_APInt(ShAmtC))) {
       // If there's effectively no shift, return the 1st arg or 2nd arg.
-      // TODO: For vectors, we could check each element of a non-splat constant.
       APInt BitWidth = APInt(ShAmtC->getBitWidth(), ShAmtC->getBitWidth());
       if (ShAmtC->urem(BitWidth).isNullValue())
-        return ArgBegin[IID == Intrinsic::fshl ? 0 : 1];
+        return Call->getArgOperand(IID == Intrinsic::fshl ? 0 : 1);
     }
     return nullptr;
   }
@@ -5137,58 +5014,36 @@ static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
   }
 }
 
-template <typename IterTy>
-static Value *SimplifyCall(ImmutableCallSite CS, Value *V, IterTy ArgBegin,
-                           IterTy ArgEnd, const SimplifyQuery &Q,
-                           unsigned MaxRecurse) {
-  Type *Ty = V->getType();
-  if (PointerType *PTy = dyn_cast<PointerType>(Ty))
-    Ty = PTy->getElementType();
-  FunctionType *FTy = cast<FunctionType>(Ty);
+Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) {
+  Value *Callee = Call->getCalledValue();
 
   // call undef -> undef
   // call null -> undef
-  if (isa<UndefValue>(V) || isa<ConstantPointerNull>(V))
-    return UndefValue::get(FTy->getReturnType());
+  if (isa<UndefValue>(Callee) || isa<ConstantPointerNull>(Callee))
+    return UndefValue::get(Call->getType());
 
-  Function *F = dyn_cast<Function>(V);
+  Function *F = dyn_cast<Function>(Callee);
   if (!F)
     return nullptr;
 
   if (F->isIntrinsic())
-    if (Value *Ret = simplifyIntrinsic(F, ArgBegin, ArgEnd, Q))
+    if (Value *Ret = simplifyIntrinsic(Call, Q))
       return Ret;
 
-  if (!canConstantFoldCallTo(CS, F))
+  if (!canConstantFoldCallTo(Call, F))
     return nullptr;
 
   SmallVector<Constant *, 4> ConstantArgs;
-  ConstantArgs.reserve(ArgEnd - ArgBegin);
-  for (IterTy I = ArgBegin, E = ArgEnd; I != E; ++I) {
-    Constant *C = dyn_cast<Constant>(*I);
+  unsigned NumArgs = Call->getNumArgOperands();
+  ConstantArgs.reserve(NumArgs);
+  for (auto &Arg : Call->args()) {
+    Constant *C = dyn_cast<Constant>(&Arg);
     if (!C)
       return nullptr;
     ConstantArgs.push_back(C);
   }
 
-  return ConstantFoldCall(CS, F, ConstantArgs, Q.TLI);
-}
-
-Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V,
-                          User::op_iterator ArgBegin, User::op_iterator ArgEnd,
-                          const SimplifyQuery &Q) {
-  return ::SimplifyCall(CS, V, ArgBegin, ArgEnd, Q, RecursionLimit);
-}
-
-Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V,
-                          ArrayRef<Value *> Args, const SimplifyQuery &Q) {
-  return ::SimplifyCall(CS, V, Args.begin(), Args.end(), Q, RecursionLimit);
-}
-
-Value *llvm::SimplifyCall(ImmutableCallSite ICS, const SimplifyQuery &Q) {
-  CallSite CS(const_cast<Instruction*>(ICS.getInstruction()));
-  return ::SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(),
-                        Q, RecursionLimit);
+  return ConstantFoldCall(Call, F, ConstantArgs, Q.TLI);
 }
 
 /// See if we can compute a simplified version of this instruction.
@@ -5203,6 +5058,9 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
   default:
     Result = ConstantFoldInstruction(I, Q.DL, Q.TLI);
     break;
+  case Instruction::FNeg:
+    Result = SimplifyFNegInst(I->getOperand(0), I->getFastMathFlags(), Q);
+    break;
   case Instruction::FAdd:
     Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1),
                               I->getFastMathFlags(), Q);
@@ -5327,8 +5185,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
     Result = SimplifyPHINode(cast<PHINode>(I), Q);
     break;
   case Instruction::Call: {
-    CallSite CS(cast<CallInst>(I));
-    Result = SimplifyCall(CS, Q);
+    Result = SimplifyCall(cast<CallInst>(I), Q);
     break;
   }
 #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc:
diff --git a/lib/Analysis/Interval.cpp b/lib/Analysis/Interval.cpp
index 6d5de22cb93f..07d6e27c13be 100644
--- a/lib/Analysis/Interval.cpp
+++ b/lib/Analysis/Interval.cpp
@@ -1,9 +1,8 @@
 //===- Interval.cpp - Interval class code ---------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/IntervalPartition.cpp b/lib/Analysis/IntervalPartition.cpp
index c777d91b67c6..d12db010db6a 100644
--- a/lib/Analysis/IntervalPartition.cpp
+++ b/lib/Analysis/IntervalPartition.cpp
@@ -1,9 +1,8 @@
 //===- IntervalPartition.cpp - Interval Partition module code -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/IteratedDominanceFrontier.cpp b/lib/Analysis/IteratedDominanceFrontier.cpp
deleted file mode 100644
index 000fe5ddad54..000000000000
--- a/lib/Analysis/IteratedDominanceFrontier.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-//===- IteratedDominanceFrontier.cpp - Compute IDF ------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Compute iterated dominance frontiers using a linear time algorithm.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/IteratedDominanceFrontier.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Dominators.h"
-#include <queue>
-
-namespace llvm {
-
-template <class NodeTy, bool IsPostDom>
-void IDFCalculator<NodeTy, IsPostDom>::calculate(
-    SmallVectorImpl<BasicBlock *> &PHIBlocks) {
-  // Use a priority queue keyed on dominator tree level so that inserted nodes
-  // are handled from the bottom of the dominator tree upwards. We also augment
-  // the level with a DFS number to ensure that the blocks are ordered in a
-  // deterministic way.
-  typedef std::pair<DomTreeNode *, std::pair<unsigned, unsigned>>
-      DomTreeNodePair;
-  typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
-                              less_second> IDFPriorityQueue;
-  IDFPriorityQueue PQ;
-
-  DT.updateDFSNumbers();
-
-  for (BasicBlock *BB : *DefBlocks) {
-    if (DomTreeNode *Node = DT.getNode(BB))
-      PQ.push({Node, std::make_pair(Node->getLevel(), Node->getDFSNumIn())});
-  }
-
-  SmallVector<DomTreeNode *, 32> Worklist;
-  SmallPtrSet<DomTreeNode *, 32> VisitedPQ;
-  SmallPtrSet<DomTreeNode *, 32> VisitedWorklist;
-
-  while (!PQ.empty()) {
-    DomTreeNodePair RootPair = PQ.top();
-    PQ.pop();
-    DomTreeNode *Root = RootPair.first;
-    unsigned RootLevel = RootPair.second.first;
-
-    // Walk all dominator tree children of Root, inspecting their CFG edges with
-    // targets elsewhere on the dominator tree. Only targets whose level is at
-    // most Root's level are added to the iterated dominance frontier of the
-    // definition set.
-
-    Worklist.clear();
-    Worklist.push_back(Root);
-    VisitedWorklist.insert(Root);
-
-    while (!Worklist.empty()) {
-      DomTreeNode *Node = Worklist.pop_back_val();
-      BasicBlock *BB = Node->getBlock();
-      // Succ is the successor in the direction we are calculating IDF, so it is
-      // successor for IDF, and predecessor for Reverse IDF.
-      auto DoWork = [&](BasicBlock *Succ) {
-        DomTreeNode *SuccNode = DT.getNode(Succ);
-
-        // Quickly skip all CFG edges that are also dominator tree edges instead
-        // of catching them below.
-        if (SuccNode->getIDom() == Node)
-          return;
-
-        const unsigned SuccLevel = SuccNode->getLevel();
-        if (SuccLevel > RootLevel)
-          return;
-
-        if (!VisitedPQ.insert(SuccNode).second)
-          return;
-
-        BasicBlock *SuccBB = SuccNode->getBlock();
-        if (useLiveIn && !LiveInBlocks->count(SuccBB))
-          return;
-
-        PHIBlocks.emplace_back(SuccBB);
-        if (!DefBlocks->count(SuccBB))
-          PQ.push(std::make_pair(
-              SuccNode, std::make_pair(SuccLevel, SuccNode->getDFSNumIn())));
-      };
-
-      if (GD) {
-        for (auto Pair : children<
-                 std::pair<const GraphDiff<BasicBlock *, IsPostDom> *, NodeTy>>(
-                 {GD, BB}))
-          DoWork(Pair.second);
-      } else {
-        for (auto *Succ : children<NodeTy>(BB))
-          DoWork(Succ);
-      }
-
-      for (auto DomChild : *Node) {
-        if (VisitedWorklist.insert(DomChild).second)
-          Worklist.push_back(DomChild);
-      }
-    }
-  }
-}
-
-template class IDFCalculator<BasicBlock *, false>;
-template class IDFCalculator<Inverse<BasicBlock *>, true>;
-}
diff --git a/lib/Analysis/LazyBlockFrequencyInfo.cpp b/lib/Analysis/LazyBlockFrequencyInfo.cpp
index 93c23bca96af..439758560284 100644
--- a/lib/Analysis/LazyBlockFrequencyInfo.cpp
+++ b/lib/Analysis/LazyBlockFrequencyInfo.cpp
@@ -1,9 +1,8 @@
 //===- LazyBlockFrequencyInfo.cpp - Lazy Block Frequency Analysis ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/LazyBranchProbabilityInfo.cpp b/lib/Analysis/LazyBranchProbabilityInfo.cpp
index 429b78c3a47e..f2592c26b373 100644
--- a/lib/Analysis/LazyBranchProbabilityInfo.cpp
+++ b/lib/Analysis/LazyBranchProbabilityInfo.cpp
@@ -1,9 +1,8 @@
 //===- LazyBranchProbabilityInfo.cpp - Lazy Branch Probability Analysis ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp
index 3f22ada803c9..797fcf516429 100644
--- a/lib/Analysis/LazyCallGraph.cpp
+++ b/lib/Analysis/LazyCallGraph.cpp
@@ -1,9 +1,8 @@
 //===- LazyCallGraph.cpp - Analysis of a Module's call graph --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -173,6 +172,19 @@ LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) {
     addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(F), Edge::Ref);
   }
 
+  // Externally visible aliases of internal functions are also viable entry
+  // edges to the module.
+  for (auto &A : M.aliases()) {
+    if (A.hasLocalLinkage())
+      continue;
+    if (Function* F = dyn_cast<Function>(A.getAliasee())) {
+      LLVM_DEBUG(dbgs() << "  Adding '" << F->getName()
+                        << "' with alias '" << A.getName()
+                        << "' to entry set of the graph.\n");
+      addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(*F), Edge::Ref);
+    }
+  }
+
   // Now add entry nodes for functions reachable via initializers to globals.
   SmallVector<Constant *, 16> Worklist;
   SmallPtrSet<Constant *, 16> Visited;
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 110c085d3f35..542ff709d475 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -1,9 +1,8 @@
 //===- LazyValueInfo.cpp - Value constraint analysis ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -423,10 +422,18 @@ namespace {
                              BasicBlock *BB);
   Optional<ConstantRange> getRangeForOperand(unsigned Op, Instruction *I,
                                              BasicBlock *BB);
+  bool solveBlockValueBinaryOpImpl(
+      ValueLatticeElement &BBLV, Instruction *I, BasicBlock *BB,
+      std::function<ConstantRange(const ConstantRange &,
+                                  const ConstantRange &)> OpFn);
   bool solveBlockValueBinaryOp(ValueLatticeElement &BBLV, BinaryOperator *BBI,
                                BasicBlock *BB);
   bool solveBlockValueCast(ValueLatticeElement &BBLV, CastInst *CI,
                            BasicBlock *BB);
+  bool solveBlockValueOverflowIntrinsic(
+      ValueLatticeElement &BBLV, WithOverflowInst *WO, BasicBlock *BB);
+  bool solveBlockValueIntrinsic(ValueLatticeElement &BBLV, IntrinsicInst *II,
+                                BasicBlock *BB);
   void intersectAssumeOrGuardBlockValueConstantRange(Value *Val,
                                                      ValueLatticeElement &BBLV,
                                                      Instruction *BBI);
@@ -625,7 +632,7 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res,
   // and the like to prove non-nullness, but it's not clear that's worth it
   // compile time wise.  The context-insensitive value walk done inside
   // isKnownNonZero gets most of the profitable cases at much less expense.
-  // This does mean that we have a sensativity to where the defining
+  // This does mean that we have a sensitivity to where the defining
   // instruction is placed, even if it could legally be hoisted much higher.
   // That is unfortunate.
   PointerType *PT = dyn_cast<PointerType>(BBI->getType());
@@ -639,6 +646,14 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res,
 
     if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI))
       return solveBlockValueBinaryOp(Res, BO, BB);
+
+    if (auto *EVI = dyn_cast<ExtractValueInst>(BBI))
+      if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()))
+        if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 0)
+          return solveBlockValueOverflowIntrinsic(Res, WO, BB);
+
+    if (auto *II = dyn_cast<IntrinsicInst>(BBI))
+      return solveBlockValueIntrinsic(Res, II, BB);
   }
 
   LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
@@ -824,7 +839,9 @@ void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange(
   if (!GuardDecl || GuardDecl->use_empty())
     return;
 
-  for (Instruction &I : make_range(BBI->getIterator().getReverse(),
+  if (BBI->getIterator() == BBI->getParent()->begin())
+    return;
+  for (Instruction &I : make_range(std::next(BBI->getIterator().getReverse()),
                                    BBI->getParent()->rend())) {
     Value *Cond = nullptr;
     if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(Cond))))
@@ -892,7 +909,28 @@ bool LazyValueInfoImpl::solveBlockValueSelect(ValueLatticeElement &BBLV,
       return true;
     }
 
-    // TODO: ABS, NABS from the SelectPatternResult
+    if (SPR.Flavor == SPF_ABS) {
+      if (LHS == SI->getTrueValue()) {
+        BBLV = ValueLatticeElement::getRange(TrueCR.abs());
+        return true;
+      }
+      if (LHS == SI->getFalseValue()) {
+        BBLV = ValueLatticeElement::getRange(FalseCR.abs());
+        return true;
+      }
+    }
+
+    if (SPR.Flavor == SPF_NABS) {
+      ConstantRange Zero(APInt::getNullValue(TrueCR.getBitWidth()));
+      if (LHS == SI->getTrueValue()) {
+        BBLV = ValueLatticeElement::getRange(Zero.sub(TrueCR.abs()));
+        return true;
+      }
+      if (LHS == SI->getFalseValue()) {
+        BBLV = ValueLatticeElement::getRange(Zero.sub(FalseCR.abs()));
+        return true;
+      }
+    }
   }
 
   // Can we constrain the facts about the true and false values by using the
@@ -962,7 +1000,7 @@ Optional<ConstantRange> LazyValueInfoImpl::getRangeForOperand(unsigned Op,
 
   const unsigned OperandBitWidth =
     DL.getTypeSizeInBits(I->getOperand(Op)->getType());
-  ConstantRange Range = ConstantRange(OperandBitWidth);
+  ConstantRange Range = ConstantRange::getFull(OperandBitWidth);
   if (hasBlockValue(I->getOperand(Op), BB)) {
     ValueLatticeElement Val = getBlockValue(I->getOperand(Op), BB);
     intersectAssumeOrGuardBlockValueConstantRange(I->getOperand(Op), Val, I);
@@ -1018,56 +1056,83 @@ bool LazyValueInfoImpl::solveBlockValueCast(ValueLatticeElement &BBLV,
   return true;
 }
 
+bool LazyValueInfoImpl::solveBlockValueBinaryOpImpl(
+    ValueLatticeElement &BBLV, Instruction *I, BasicBlock *BB,
+    std::function<ConstantRange(const ConstantRange &,
+                                const ConstantRange &)> OpFn) {
+  // Figure out the ranges of the operands.  If that fails, use a
+  // conservative range, but apply the transfer rule anyways.  This
+  // lets us pick up facts from expressions like "and i32 (call i32
+  // @foo()), 32"
+  Optional<ConstantRange> LHSRes = getRangeForOperand(0, I, BB);
+  Optional<ConstantRange> RHSRes = getRangeForOperand(1, I, BB);
+  if (!LHSRes.hasValue() || !RHSRes.hasValue())
+    // More work to do before applying this transfer rule.
+    return false;
+
+  ConstantRange LHSRange = LHSRes.getValue();
+  ConstantRange RHSRange = RHSRes.getValue();
+  BBLV = ValueLatticeElement::getRange(OpFn(LHSRange, RHSRange));
+  return true;
+}
+
 bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV,
                                                 BinaryOperator *BO,
                                                 BasicBlock *BB) {
 
   assert(BO->getOperand(0)->getType()->isSized() &&
          "all operands to binary operators are sized");
-
-  // Filter out operators we don't know how to reason about before attempting to
-  // recurse on our operand(s).  This can cut a long search short if we know
-  // we're not going to be able to get any useful information anyways.
-  switch (BO->getOpcode()) {
-  case Instruction::Add:
-  case Instruction::Sub:
-  case Instruction::Mul:
-  case Instruction::UDiv:
-  case Instruction::Shl:
-  case Instruction::LShr:
-  case Instruction::AShr:
-  case Instruction::And:
-  case Instruction::Or:
-    // continue into the code below
-    break;
-  default:
-    // Unhandled instructions are overdefined.
+  if (BO->getOpcode() == Instruction::Xor) {
+    // Xor is the only operation not supported by ConstantRange::binaryOp().
     LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
                       << "' - overdefined (unknown binary operator).\n");
     BBLV = ValueLatticeElement::getOverdefined();
     return true;
-  };
-
-  // Figure out the ranges of the operands.  If that fails, use a
-  // conservative range, but apply the transfer rule anyways.  This
-  // lets us pick up facts from expressions like "and i32 (call i32
-  // @foo()), 32"
-  Optional<ConstantRange> LHSRes = getRangeForOperand(0, BO, BB);
-  Optional<ConstantRange> RHSRes = getRangeForOperand(1, BO, BB);
+  }
 
-  if (!LHSRes.hasValue() || !RHSRes.hasValue())
-    // More work to do before applying this transfer rule.
-    return false;
+  return solveBlockValueBinaryOpImpl(BBLV, BO, BB,
+      [BO](const ConstantRange &CR1, const ConstantRange &CR2) {
+        return CR1.binaryOp(BO->getOpcode(), CR2);
+      });
+}
 
-  ConstantRange LHSRange = LHSRes.getValue();
-  ConstantRange RHSRange = RHSRes.getValue();
+bool LazyValueInfoImpl::solveBlockValueOverflowIntrinsic(
+    ValueLatticeElement &BBLV, WithOverflowInst *WO, BasicBlock *BB) {
+  return solveBlockValueBinaryOpImpl(BBLV, WO, BB,
+      [WO](const ConstantRange &CR1, const ConstantRange &CR2) {
+        return CR1.binaryOp(WO->getBinaryOp(), CR2);
+      });
+}
 
-  // NOTE: We're currently limited by the set of operations that ConstantRange
-  // can evaluate symbolically.  Enhancing that set will allows us to analyze
-  // more definitions.
-  Instruction::BinaryOps BinOp = BO->getOpcode();
-  BBLV = ValueLatticeElement::getRange(LHSRange.binaryOp(BinOp, RHSRange));
-  return true;
+bool LazyValueInfoImpl::solveBlockValueIntrinsic(
+    ValueLatticeElement &BBLV, IntrinsicInst *II, BasicBlock *BB) {
+  switch (II->getIntrinsicID()) {
+  case Intrinsic::uadd_sat:
+    return solveBlockValueBinaryOpImpl(BBLV, II, BB,
+        [](const ConstantRange &CR1, const ConstantRange &CR2) {
+          return CR1.uadd_sat(CR2);
+        });
+  case Intrinsic::usub_sat:
+    return solveBlockValueBinaryOpImpl(BBLV, II, BB,
+        [](const ConstantRange &CR1, const ConstantRange &CR2) {
+          return CR1.usub_sat(CR2);
+        });
+  case Intrinsic::sadd_sat:
+    return solveBlockValueBinaryOpImpl(BBLV, II, BB,
+        [](const ConstantRange &CR1, const ConstantRange &CR2) {
+          return CR1.sadd_sat(CR2);
+        });
+  case Intrinsic::ssub_sat:
+    return solveBlockValueBinaryOpImpl(BBLV, II, BB,
+        [](const ConstantRange &CR1, const ConstantRange &CR2) {
+          return CR1.ssub_sat(CR2);
+        });
+  default:
+    LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
+                      << "' - overdefined (unknown intrinsic).\n");
+    BBLV = ValueLatticeElement::getOverdefined();
+    return true;
+  }
 }
 
 static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
@@ -1133,6 +1198,28 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
   return ValueLatticeElement::getOverdefined();
 }
 
+// Handle conditions of the form
+// extractvalue(op.with.overflow(%x, C), 1).
+static ValueLatticeElement getValueFromOverflowCondition(
+    Value *Val, WithOverflowInst *WO, bool IsTrueDest) {
+  // TODO: This only works with a constant RHS for now. We could also compute
+  // the range of the RHS, but this doesn't fit into the current structure of
+  // the edge value calculation.
+  const APInt *C;
+  if (WO->getLHS() != Val || !match(WO->getRHS(), m_APInt(C)))
+    return ValueLatticeElement::getOverdefined();
+
+  // Calculate the possible values of %x for which no overflow occurs.
+  ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(
+      WO->getBinaryOp(), *C, WO->getNoWrapKind());
+
+  // If overflow is false, %x is constrained to NWR. If overflow is true, %x is
+  // constrained to it's inverse (all values that might cause overflow).
+  if (IsTrueDest)
+    NWR = NWR.inverse();
+  return ValueLatticeElement::getRange(NWR);
+}
+
 static ValueLatticeElement
 getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest,
                       DenseMap<Value*, ValueLatticeElement> &Visited);
@@ -1143,6 +1230,11 @@ getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest,
   if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cond))
     return getValueFromICmpCondition(Val, ICI, isTrueDest);
 
+  if (auto *EVI = dyn_cast<ExtractValueInst>(Cond))
+    if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()))
+      if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 1)
+        return getValueFromOverflowCondition(Val, WO, isTrueDest);
+
   // Handle conditions in the form of (cond1 && cond2), we know that on the
   // true dest path both of the conditions hold. Similarly for conditions of
   // the form (cond1 || cond2), we know that on the false dest path neither
@@ -1575,14 +1667,14 @@ ConstantRange LazyValueInfo::getConstantRange(Value *V, BasicBlock *BB,
   ValueLatticeElement Result =
       getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
   if (Result.isUndefined())
-    return ConstantRange(Width, /*isFullSet=*/false);
+    return ConstantRange::getEmpty(Width);
   if (Result.isConstantRange())
     return Result.getConstantRange();
   // We represent ConstantInt constants as constant ranges but other kinds
   // of integer constants, i.e. ConstantExpr will be tagged as constants
   assert(!(Result.isConstant() && isa<ConstantInt>(Result.getConstant())) &&
          "ConstantInt value must be represented as constantrange");
-  return ConstantRange(Width, /*isFullSet=*/true);
+  return ConstantRange::getFull(Width);
 }
 
 /// Determine whether the specified value is known to be a
@@ -1614,14 +1706,14 @@ ConstantRange LazyValueInfo::getConstantRangeOnEdge(Value *V,
       getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
 
   if (Result.isUndefined())
-    return ConstantRange(Width, /*isFullSet=*/false);
+    return ConstantRange::getEmpty(Width);
   if (Result.isConstantRange())
     return Result.getConstantRange();
   // We represent ConstantInt constants as constant ranges but other kinds
   // of integer constants, i.e. ConstantExpr will be tagged as constants
   assert(!(Result.isConstant() && isa<ConstantInt>(Result.getConstant())) &&
          "ConstantInt value must be represented as constantrange");
-  return ConstantRange(Width, /*isFullSet=*/true);
+  return ConstantRange::getFull(Width);
 }
 
 static LazyValueInfo::Tristate
@@ -1711,7 +1803,7 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
   // through would still be correct.
   const DataLayout &DL = CxtI->getModule()->getDataLayout();
   if (V->getType()->isPointerTy() && C->isNullValue() &&
-      isKnownNonZero(V->stripPointerCasts(), DL)) {
+      isKnownNonZero(V->stripPointerCastsSameRepresentation(), DL)) {
     if (Pred == ICmpInst::ICMP_EQ)
       return LazyValueInfo::False;
     else if (Pred == ICmpInst::ICMP_NE)
diff --git a/lib/Analysis/LegacyDivergenceAnalysis.cpp b/lib/Analysis/LegacyDivergenceAnalysis.cpp
index 5540859ebdda..52212e1c42aa 100644
--- a/lib/Analysis/LegacyDivergenceAnalysis.cpp
+++ b/lib/Analysis/LegacyDivergenceAnalysis.cpp
@@ -1,10 +1,9 @@
 //===- LegacyDivergenceAnalysis.cpp --------- Legacy Divergence Analysis
 //Implementation -==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 5d0a627f8426..d28b8a189d4b 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -1,9 +1,8 @@
 //===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -268,10 +267,14 @@ void Lint::visitCallSite(CallSite CS) {
         if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) {
           AttributeList PAL = CS.getAttributes();
           unsigned ArgNo = 0;
-          for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) {
+          for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE;
+               ++BI, ++ArgNo) {
             // Skip ByVal arguments since they will be memcpy'd to the callee's
             // stack so we're not really passing the pointer anyway.
-            if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal))
+            if (PAL.hasParamAttribute(ArgNo, Attribute::ByVal))
+              continue;
+            // If both arguments are readonly, they have no dependence.
+            if (Formal->onlyReadsMemory() && CS.onlyReadsMemory(ArgNo))
               continue;
             if (AI != BI && (*BI)->getType()->isPointerTy()) {
               AliasResult Result = AA->alias(*AI, *BI);
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 8129795bc0c1..31da4e9ec783 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -1,9 +1,8 @@
 //===- Loads.cpp - Local load analysis ------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -126,7 +125,8 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
                                               Visited);
 }
 
-bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
+bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
+                                              unsigned Align,
                                               const DataLayout &DL,
                                               const Instruction *CtxI,
                                               const DominatorTree *DT) {
@@ -134,8 +134,6 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
   // attribute, we know exactly how many bytes are dereferenceable. If we can
   // determine the exact offset to the attributed variable, we can use that
   // information here.
-  Type *VTy = V->getType();
-  Type *Ty = VTy->getPointerElementType();
 
   // Require ABI alignment for loads without alignment specification
   if (Align == 0)
@@ -146,14 +144,16 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
 
   SmallPtrSet<const Value *, 32> Visited;
   return ::isDereferenceableAndAlignedPointer(
-      V, Align, APInt(DL.getIndexTypeSizeInBits(VTy), DL.getTypeStoreSize(Ty)), DL,
-      CtxI, DT, Visited);
+      V, Align,
+      APInt(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty)),
+      DL, CtxI, DT, Visited);
 }
 
-bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL,
+bool llvm::isDereferenceablePointer(const Value *V, Type *Ty,
+                                    const DataLayout &DL,
                                     const Instruction *CtxI,
                                     const DominatorTree *DT) {
-  return isDereferenceableAndAlignedPointer(V, 1, DL, CtxI, DT);
+  return isDereferenceableAndAlignedPointer(V, Ty, 1, DL, CtxI, DT);
 }
 
 /// Test if A and B will obviously have the same value.
@@ -198,7 +198,7 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
 ///
 /// This uses the pointee type to determine how many bytes need to be safe to
 /// load from the pointer.
-bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align,
+bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
                                        const DataLayout &DL,
                                        Instruction *ScanFrom,
                                        const DominatorTree *DT) {
@@ -209,7 +209,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align,
 
   // If DT is not specified we can't make context-sensitive query
   const Instruction* CtxI = DT ? ScanFrom : nullptr;
-  if (isDereferenceableAndAlignedPointer(V, Align, DL, CtxI, DT))
+  if (isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT))
     return true;
 
   int64_t ByteOffset = 0;
@@ -281,9 +281,17 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align,
     Value *AccessedPtr;
     unsigned AccessedAlign;
     if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+      // Ignore volatile loads. The execution of a volatile load cannot
+      // be used to prove an address is backed by regular memory; it can,
+      // for example, point to an MMIO register.
+      if (LI->isVolatile())
+        continue;
       AccessedPtr = LI->getPointerOperand();
       AccessedAlign = LI->getAlignment();
     } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+      // Ignore volatile stores (see comment for loads).
+      if (SI->isVolatile())
+        continue;
       AccessedPtr = SI->getPointerOperand();
       AccessedAlign = SI->getAlignment();
     } else
@@ -306,7 +314,15 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align,
   return false;
 }
 
-/// DefMaxInstsToScan - the default number of maximum instructions
+bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align,
+                                       const DataLayout &DL,
+                                       Instruction *ScanFrom,
+                                       const DominatorTree *DT) {
+  APInt Size(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty));
+  return isSafeToLoadUnconditionally(V, Align, Size, DL, ScanFrom, DT);
+}
+
+  /// DefMaxInstsToScan - the default number of maximum instructions
 /// to scan in the block, used by FindAvailableLoadedValue().
 /// FindAvailableLoadedValue() was introduced in r60148, to improve jump
 /// threading in part by eliminating partially redundant loads.
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index 7f3480f512ab..36bd9a8b7ea7 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1,9 +1,8 @@
 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -843,7 +842,7 @@ void AccessAnalysis::processMemAccesses() {
     bool SetHasWrite = false;
 
     // Map of pointers to last access encountered.
-    typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
+    typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap;
     UnderlyingObjToAccessMap ObjToLastAccess;
 
     // Set of access to check after all writes have been processed.
@@ -904,13 +903,13 @@ void AccessAnalysis::processMemAccesses() {
 
           // Create sets of pointers connected by a shared alias set and
           // underlying object.
-          typedef SmallVector<Value *, 16> ValueVector;
+          typedef SmallVector<const Value *, 16> ValueVector;
           ValueVector TempObjects;
 
           GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
           LLVM_DEBUG(dbgs()
                      << "Underlying objects for pointer " << *Ptr << "\n");
-          for (Value *UnderlyingObj : TempObjects) {
+          for (const Value *UnderlyingObj : TempObjects) {
             // nullptr never alias, don't join sets for pointer that have "null"
             // in their UnderlyingObjects list.
             if (isa<ConstantPointerNull>(UnderlyingObj) &&
@@ -1014,7 +1013,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
     return 0;
   }
 
-  // The accesss function must stride over the innermost loop.
+  // The access function must stride over the innermost loop.
   if (Lp != AR->getLoop()) {
     LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
                       << *Ptr << " SCEV: " << *AR << "\n");
@@ -1086,7 +1085,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
     if (Assume) {
       // We can avoid this case by adding a run-time check.
       LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
-                        << "inbouds or in address space 0 may wrap:\n"
+                        << "inbounds or in address space 0 may wrap:\n"
                         << "LAA:   Pointer: " << *Ptr << "\n"
                         << "LAA:   SCEV: " << *AR << "\n"
                         << "LAA:   Added an overflow assumption\n");
@@ -1145,10 +1144,9 @@ bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
   std::iota(SortedIndices.begin(), SortedIndices.end(), 0);
 
   // Sort the memory accesses and keep the order of their uses in UseOrder.
-  std::stable_sort(SortedIndices.begin(), SortedIndices.end(),
-                   [&OffValPairs](unsigned Left, unsigned Right) {
-                     return OffValPairs[Left].first < OffValPairs[Right].first;
-                   });
+  llvm::stable_sort(SortedIndices, [&](unsigned Left, unsigned Right) {
+    return OffValPairs[Left].first < OffValPairs[Right].first;
+  });
 
   // Check if the order is consecutive already.
   if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) {
@@ -1346,7 +1344,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
   // where Step is the absolute stride of the memory accesses in bytes,
   // then there is no dependence.
   //
-  // Ratioanle:
+  // Rationale:
   // We basically want to check if the absolute distance (|Dist/Step|)
   // is >= the loop iteration count (or > BackedgeTakenCount).
   // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
@@ -1369,7 +1367,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
 
   // The dependence distance can be positive/negative, so we sign extend Dist;
   // The multiplication of the absolute stride in bytes and the
-  // backdgeTakenCount is non-negative, so we zero extend Product.
+  // backedgeTakenCount is non-negative, so we zero extend Product.
   if (DistTypeSize > ProductTypeSize)
     CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
   else
@@ -1780,6 +1778,11 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
   unsigned NumReads = 0;
   unsigned NumReadWrites = 0;
 
+  bool HasComplexMemInst = false;
+
+  // A runtime check is only legal to insert if there are no convergent calls.
+  HasConvergentOp = false;
+
   PtrRtChecking->Pointers.clear();
   PtrRtChecking->Need = false;
 
@@ -1787,8 +1790,25 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
 
   // For each block.
   for (BasicBlock *BB : TheLoop->blocks()) {
-    // Scan the BB and collect legal loads and stores.
+    // Scan the BB and collect legal loads and stores. Also detect any
+    // convergent instructions.
     for (Instruction &I : *BB) {
+      if (auto *Call = dyn_cast<CallBase>(&I)) {
+        if (Call->isConvergent())
+          HasConvergentOp = true;
+      }
+
+      // With both a non-vectorizable memory instruction and a convergent
+      // operation, found in this loop, no reason to continue the search.
+      if (HasComplexMemInst && HasConvergentOp) {
+        CanVecMem = false;
+        return;
+      }
+
+      // Avoid hitting recordAnalysis multiple times.
+      if (HasComplexMemInst)
+        continue;
+
       // If this is a load, save it. If this instruction can read from memory
       // but is not a load, then we quit. Notice that we don't handle function
       // calls that read or write.
@@ -1807,12 +1827,18 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
           continue;
 
         auto *Ld = dyn_cast<LoadInst>(&I);
-        if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
+        if (!Ld) {
+          recordAnalysis("CantVectorizeInstruction", Ld)
+            << "instruction cannot be vectorized";
+          HasComplexMemInst = true;
+          continue;
+        }
+        if (!Ld->isSimple() && !IsAnnotatedParallel) {
           recordAnalysis("NonSimpleLoad", Ld)
               << "read with atomic ordering or volatile read";
           LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
-          CanVecMem = false;
-          return;
+          HasComplexMemInst = true;
+          continue;
         }
         NumLoads++;
         Loads.push_back(Ld);
@@ -1828,15 +1854,15 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
         if (!St) {
           recordAnalysis("CantVectorizeInstruction", St)
               << "instruction cannot be vectorized";
-          CanVecMem = false;
-          return;
+          HasComplexMemInst = true;
+          continue;
         }
         if (!St->isSimple() && !IsAnnotatedParallel) {
           recordAnalysis("NonSimpleStore", St)
               << "write with atomic ordering or volatile write";
           LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
-          CanVecMem = false;
-          return;
+          HasComplexMemInst = true;
+          continue;
         }
         NumStores++;
         Stores.push_back(St);
@@ -1847,6 +1873,11 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
     } // Next instr.
   } // Next block.
 
+  if (HasComplexMemInst) {
+    CanVecMem = false;
+    return;
+  }
+
   // Now we have two lists that hold the loads and the stores.
   // Next, we find the pointers that they use.
 
@@ -1964,7 +1995,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
   }
 
   LLVM_DEBUG(
-      dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
+    dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
 
   CanVecMem = true;
   if (Accesses.isDependencyCheckNeeded()) {
@@ -1999,6 +2030,15 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
     }
   }
 
+  if (HasConvergentOp) {
+    recordAnalysis("CantInsertRuntimeCheckWithConvergent")
+      << "cannot add control dependency to convergent operation";
+    LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
+                         "would be needed with a convergent operation\n");
+    CanVecMem = false;
+    return;
+  }
+
   if (CanVecMem)
     LLVM_DEBUG(
         dbgs() << "LAA: No unsafe dependent memory operations in loop.  We"
@@ -2252,7 +2292,7 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
 
   // Match the types so we can compare the stride and the BETakenCount.
   // The Stride can be positive/negative, so we sign extend Stride;
-  // The backdgeTakenCount is non-negative, so we zero extend BETakenCount.
+  // The backedgeTakenCount is non-negative, so we zero extend BETakenCount.
   const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
   uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
   uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
@@ -2287,6 +2327,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
       PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
       DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
       NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
+      HasConvergentOp(false),
       HasDependenceInvolvingLoopInvariantAddress(false) {
   if (canAnalyzeLoop())
     analyzeLoop(AA, LI, TLI, DT);
@@ -2303,6 +2344,9 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
     OS << "\n";
   }
 
+  if (HasConvergentOp)
+    OS.indent(Depth) << "Has convergent operation in loop\n";
+
   if (Report)
     OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
 
diff --git a/lib/Analysis/LoopAnalysisManager.cpp b/lib/Analysis/LoopAnalysisManager.cpp
index 2a3b29d7fbca..a10a87ce113b 100644
--- a/lib/Analysis/LoopAnalysisManager.cpp
+++ b/lib/Analysis/LoopAnalysisManager.cpp
@@ -1,9 +1,8 @@
 //===- LoopAnalysisManager.cpp - Loop analysis management -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -19,11 +18,6 @@
 using namespace llvm;
 
 namespace llvm {
-/// Enables memory ssa as a dependency for loop passes in legacy pass manager.
-cl::opt<bool> EnableMSSALoopDependency(
-    "enable-mssa-loop-dependency", cl::Hidden, cl::init(false),
-    cl::desc("Enable MemorySSA dependency for loop pass manager"));
-
 // Explicit template instantiations and specialization definitions for core
 // template typedefs.
 template class AllAnalysesOn<Loop>;
@@ -147,8 +141,6 @@ PreservedAnalyses llvm::getLoopPassPreservedAnalyses() {
   PA.preserve<LoopAnalysis>();
   PA.preserve<LoopAnalysisManagerFunctionProxy>();
   PA.preserve<ScalarEvolutionAnalysis>();
-  if (EnableMSSALoopDependency)
-    PA.preserve<MemorySSAAnalysis>();
   // FIXME: What we really want to do here is preserve an AA category, but that
   // concept doesn't exist yet.
   PA.preserve<AAManager>();
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index ef2b1257015c..aa5da0859805 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -1,9 +1,8 @@
 //===- LoopInfo.cpp - Natural Loop Calculator -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,8 +17,12 @@
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/IVDescriptors.h"
 #include "llvm/Analysis/LoopInfoImpl.h"
 #include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/CFG.h"
@@ -65,15 +68,16 @@ bool Loop::hasLoopInvariantOperands(const Instruction *I) const {
   return all_of(I->operands(), [this](Value *V) { return isLoopInvariant(V); });
 }
 
-bool Loop::makeLoopInvariant(Value *V, bool &Changed,
-                             Instruction *InsertPt) const {
+bool Loop::makeLoopInvariant(Value *V, bool &Changed, Instruction *InsertPt,
+                             MemorySSAUpdater *MSSAU) const {
   if (Instruction *I = dyn_cast<Instruction>(V))
-    return makeLoopInvariant(I, Changed, InsertPt);
+    return makeLoopInvariant(I, Changed, InsertPt, MSSAU);
   return true; // All non-instructions are loop-invariant.
 }
 
 bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
-                             Instruction *InsertPt) const {
+                             Instruction *InsertPt,
+                             MemorySSAUpdater *MSSAU) const {
   // Test if the value is already loop-invariant.
   if (isLoopInvariant(I))
     return true;
@@ -94,11 +98,14 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
   }
   // Don't hoist instructions with loop-variant operands.
   for (Value *Operand : I->operands())
-    if (!makeLoopInvariant(Operand, Changed, InsertPt))
+    if (!makeLoopInvariant(Operand, Changed, InsertPt, MSSAU))
       return false;
 
   // Hoist.
   I->moveBefore(InsertPt);
+  if (MSSAU)
+    if (auto *MUD = MSSAU->getMemorySSA()->getMemoryAccess(I))
+      MSSAU->moveToPlace(MUD, InsertPt->getParent(), MemorySSA::End);
 
   // There is possibility of hoisting this instruction above some arbitrary
   // condition. Any metadata defined on it can be control dependent on this
@@ -110,24 +117,37 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
   return true;
 }
 
-PHINode *Loop::getCanonicalInductionVariable() const {
+bool Loop::getIncomingAndBackEdge(BasicBlock *&Incoming,
+                                  BasicBlock *&Backedge) const {
   BasicBlock *H = getHeader();
 
-  BasicBlock *Incoming = nullptr, *Backedge = nullptr;
+  Incoming = nullptr;
+  Backedge = nullptr;
   pred_iterator PI = pred_begin(H);
   assert(PI != pred_end(H) && "Loop must have at least one backedge!");
   Backedge = *PI++;
   if (PI == pred_end(H))
-    return nullptr; // dead loop
+    return false; // dead loop
   Incoming = *PI++;
   if (PI != pred_end(H))
-    return nullptr; // multiple backedges?
+    return false; // multiple backedges?
 
   if (contains(Incoming)) {
     if (contains(Backedge))
-      return nullptr;
+      return false;
     std::swap(Incoming, Backedge);
   } else if (!contains(Backedge))
+    return false;
+
+  assert(Incoming && Backedge && "expected non-null incoming and backedges");
+  return true;
+}
+
+PHINode *Loop::getCanonicalInductionVariable() const {
+  BasicBlock *H = getHeader();
+
+  BasicBlock *Incoming = nullptr, *Backedge = nullptr;
+  if (!getIncomingAndBackEdge(Incoming, Backedge))
     return nullptr;
 
   // Loop over all of the PHI nodes, looking for a canonical indvar.
@@ -146,6 +166,218 @@ PHINode *Loop::getCanonicalInductionVariable() const {
   return nullptr;
 }
 
+/// Get the latch condition instruction.
+static ICmpInst *getLatchCmpInst(const Loop &L) {
+  if (BasicBlock *Latch = L.getLoopLatch())
+    if (BranchInst *BI = dyn_cast_or_null<BranchInst>(Latch->getTerminator()))
+      if (BI->isConditional())
+        return dyn_cast<ICmpInst>(BI->getCondition());
+
+  return nullptr;
+}
+
+/// Return the final value of the loop induction variable if found.
+static Value *findFinalIVValue(const Loop &L, const PHINode &IndVar,
+                               const Instruction &StepInst) {
+  ICmpInst *LatchCmpInst = getLatchCmpInst(L);
+  if (!LatchCmpInst)
+    return nullptr;
+
+  Value *Op0 = LatchCmpInst->getOperand(0);
+  Value *Op1 = LatchCmpInst->getOperand(1);
+  if (Op0 == &IndVar || Op0 == &StepInst)
+    return Op1;
+
+  if (Op1 == &IndVar || Op1 == &StepInst)
+    return Op0;
+
+  return nullptr;
+}
+
+Optional<Loop::LoopBounds> Loop::LoopBounds::getBounds(const Loop &L,
+                                                       PHINode &IndVar,
+                                                       ScalarEvolution &SE) {
+  InductionDescriptor IndDesc;
+  if (!InductionDescriptor::isInductionPHI(&IndVar, &L, &SE, IndDesc))
+    return None;
+
+  Value *InitialIVValue = IndDesc.getStartValue();
+  Instruction *StepInst = IndDesc.getInductionBinOp();
+  if (!InitialIVValue || !StepInst)
+    return None;
+
+  const SCEV *Step = IndDesc.getStep();
+  Value *StepInstOp1 = StepInst->getOperand(1);
+  Value *StepInstOp0 = StepInst->getOperand(0);
+  Value *StepValue = nullptr;
+  if (SE.getSCEV(StepInstOp1) == Step)
+    StepValue = StepInstOp1;
+  else if (SE.getSCEV(StepInstOp0) == Step)
+    StepValue = StepInstOp0;
+
+  Value *FinalIVValue = findFinalIVValue(L, IndVar, *StepInst);
+  if (!FinalIVValue)
+    return None;
+
+  return LoopBounds(L, *InitialIVValue, *StepInst, StepValue, *FinalIVValue,
+                    SE);
+}
+
+using Direction = Loop::LoopBounds::Direction;
+
+ICmpInst::Predicate Loop::LoopBounds::getCanonicalPredicate() const {
+  BasicBlock *Latch = L.getLoopLatch();
+  assert(Latch && "Expecting valid latch");
+
+  BranchInst *BI = dyn_cast_or_null<BranchInst>(Latch->getTerminator());
+  assert(BI && BI->isConditional() && "Expecting conditional latch branch");
+
+  ICmpInst *LatchCmpInst = dyn_cast<ICmpInst>(BI->getCondition());
+  assert(LatchCmpInst &&
+         "Expecting the latch compare instruction to be a CmpInst");
+
+  // Need to inverse the predicate when first successor is not the loop
+  // header
+  ICmpInst::Predicate Pred = (BI->getSuccessor(0) == L.getHeader())
+                                 ? LatchCmpInst->getPredicate()
+                                 : LatchCmpInst->getInversePredicate();
+
+  if (LatchCmpInst->getOperand(0) == &getFinalIVValue())
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+
+  // Need to flip strictness of the predicate when the latch compare instruction
+  // is not using StepInst
+  if (LatchCmpInst->getOperand(0) == &getStepInst() ||
+      LatchCmpInst->getOperand(1) == &getStepInst())
+    return Pred;
+
+  // Cannot flip strictness of NE and EQ
+  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
+    return ICmpInst::getFlippedStrictnessPredicate(Pred);
+
+  Direction D = getDirection();
+  if (D == Direction::Increasing)
+    return ICmpInst::ICMP_SLT;
+
+  if (D == Direction::Decreasing)
+    return ICmpInst::ICMP_SGT;
+
+  // If cannot determine the direction, then unable to find the canonical
+  // predicate
+  return ICmpInst::BAD_ICMP_PREDICATE;
+}
+
+Direction Loop::LoopBounds::getDirection() const {
+  if (const SCEVAddRecExpr *StepAddRecExpr =
+          dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&getStepInst())))
+    if (const SCEV *StepRecur = StepAddRecExpr->getStepRecurrence(SE)) {
+      if (SE.isKnownPositive(StepRecur))
+        return Direction::Increasing;
+      if (SE.isKnownNegative(StepRecur))
+        return Direction::Decreasing;
+    }
+
+  return Direction::Unknown;
+}
+
+Optional<Loop::LoopBounds> Loop::getBounds(ScalarEvolution &SE) const {
+  if (PHINode *IndVar = getInductionVariable(SE))
+    return LoopBounds::getBounds(*this, *IndVar, SE);
+
+  return None;
+}
+
+PHINode *Loop::getInductionVariable(ScalarEvolution &SE) const {
+  if (!isLoopSimplifyForm())
+    return nullptr;
+
+  BasicBlock *Header = getHeader();
+  assert(Header && "Expected a valid loop header");
+  ICmpInst *CmpInst = getLatchCmpInst(*this);
+  if (!CmpInst)
+    return nullptr;
+
+  Instruction *LatchCmpOp0 = dyn_cast<Instruction>(CmpInst->getOperand(0));
+  Instruction *LatchCmpOp1 = dyn_cast<Instruction>(CmpInst->getOperand(1));
+
+  for (PHINode &IndVar : Header->phis()) {
+    InductionDescriptor IndDesc;
+    if (!InductionDescriptor::isInductionPHI(&IndVar, this, &SE, IndDesc))
+      continue;
+
+    Instruction *StepInst = IndDesc.getInductionBinOp();
+
+    // case 1:
+    // IndVar = phi[{InitialValue, preheader}, {StepInst, latch}]
+    // StepInst = IndVar + step
+    // cmp = StepInst < FinalValue
+    if (StepInst == LatchCmpOp0 || StepInst == LatchCmpOp1)
+      return &IndVar;
+
+    // case 2:
+    // IndVar = phi[{InitialValue, preheader}, {StepInst, latch}]
+    // StepInst = IndVar + step
+    // cmp = IndVar < FinalValue
+    if (&IndVar == LatchCmpOp0 || &IndVar == LatchCmpOp1)
+      return &IndVar;
+  }
+
+  return nullptr;
+}
+
+bool Loop::getInductionDescriptor(ScalarEvolution &SE,
+                                  InductionDescriptor &IndDesc) const {
+  if (PHINode *IndVar = getInductionVariable(SE))
+    return InductionDescriptor::isInductionPHI(IndVar, this, &SE, IndDesc);
+
+  return false;
+}
+
+bool Loop::isAuxiliaryInductionVariable(PHINode &AuxIndVar,
+                                        ScalarEvolution &SE) const {
+  // Located in the loop header
+  BasicBlock *Header = getHeader();
+  if (AuxIndVar.getParent() != Header)
+    return false;
+
+  // No uses outside of the loop
+  for (User *U : AuxIndVar.users())
+    if (const Instruction *I = dyn_cast<Instruction>(U))
+      if (!contains(I))
+        return false;
+
+  InductionDescriptor IndDesc;
+  if (!InductionDescriptor::isInductionPHI(&AuxIndVar, this, &SE, IndDesc))
+    return false;
+
+  // The step instruction opcode should be add or sub.
+  if (IndDesc.getInductionOpcode() != Instruction::Add &&
+      IndDesc.getInductionOpcode() != Instruction::Sub)
+    return false;
+
+  // Incremented by a loop invariant step for each loop iteration
+  return SE.isLoopInvariant(IndDesc.getStep(), this);
+}
+
+bool Loop::isCanonical(ScalarEvolution &SE) const {
+  InductionDescriptor IndDesc;
+  if (!getInductionDescriptor(SE, IndDesc))
+    return false;
+
+  ConstantInt *Init = dyn_cast_or_null<ConstantInt>(IndDesc.getStartValue());
+  if (!Init || !Init->isZero())
+    return false;
+
+  if (IndDesc.getInductionOpcode() != Instruction::Add)
+    return false;
+
+  ConstantInt *Step = IndDesc.getConstIntStepValue();
+  if (!Step || !Step->isOne())
+    return false;
+
+  return true;
+}
+
 // Check that 'BB' doesn't have any uses outside of the 'L'
 static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
                                DominatorTree &DT) {
@@ -200,8 +432,11 @@ bool Loop::isLoopSimplifyForm() const {
 bool Loop::isSafeToClone() const {
   // Return false if any loop blocks contain indirectbrs, or there are any calls
   // to noduplicate functions.
+  // FIXME: it should be ok to clone CallBrInst's if we correctly update the
+  // operand list to reflect the newly cloned labels.
   for (BasicBlock *BB : this->blocks()) {
-    if (isa<IndirectBrInst>(BB->getTerminator()))
+    if (isa<IndirectBrInst>(BB->getTerminator()) ||
+        isa<CallBrInst>(BB->getTerminator()))
       return false;
 
     for (Instruction &I : *BB)
@@ -242,48 +477,20 @@ void Loop::setLoopID(MDNode *LoopID) const {
   assert((!LoopID || LoopID->getOperand(0) == LoopID) &&
          "Loop ID should refer to itself");
 
-  BasicBlock *H = getHeader();
-  for (BasicBlock *BB : this->blocks()) {
-    Instruction *TI = BB->getTerminator();
-    for (BasicBlock *Successor : successors(TI)) {
-      if (Successor == H) {
-        TI->setMetadata(LLVMContext::MD_loop, LoopID);
-        break;
-      }
-    }
-  }
+  SmallVector<BasicBlock *, 4> LoopLatches;
+  getLoopLatches(LoopLatches);
+  for (BasicBlock *BB : LoopLatches)
+    BB->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID);
 }
 
 void Loop::setLoopAlreadyUnrolled() {
-  MDNode *LoopID = getLoopID();
-  // First remove any existing loop unrolling metadata.
-  SmallVector<Metadata *, 4> MDs;
-  // Reserve first location for self reference to the LoopID metadata node.
-  MDs.push_back(nullptr);
-
-  if (LoopID) {
-    for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
-      bool IsUnrollMetadata = false;
-      MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
-      if (MD) {
-        const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
-        IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
-      }
-      if (!IsUnrollMetadata)
-        MDs.push_back(LoopID->getOperand(i));
-    }
-  }
-
-  // Add unroll(disable) metadata to disable future unrolling.
   LLVMContext &Context = getHeader()->getContext();
-  SmallVector<Metadata *, 1> DisableOperands;
-  DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
-  MDNode *DisableNode = MDNode::get(Context, DisableOperands);
-  MDs.push_back(DisableNode);
 
-  MDNode *NewLoopID = MDNode::get(Context, MDs);
-  // Set operand 0 to refer to the loop id itself.
-  NewLoopID->replaceOperandWith(0, NewLoopID);
+  MDNode *DisableUnrollMD =
+      MDNode::get(Context, MDString::get(Context, "llvm.loop.unroll.disable"));
+  MDNode *LoopID = getLoopID();
+  MDNode *NewLoopID = makePostTransformationMetadata(
+      Context, LoopID, {"llvm.loop.unroll."}, {DisableUnrollMD});
   setLoopID(NewLoopID);
 }
 
@@ -761,6 +968,46 @@ bool llvm::isValidAsAccessGroup(MDNode *Node) {
   return Node->getNumOperands() == 0 && Node->isDistinct();
 }
 
+MDNode *llvm::makePostTransformationMetadata(LLVMContext &Context,
+                                             MDNode *OrigLoopID,
+                                             ArrayRef<StringRef> RemovePrefixes,
+                                             ArrayRef<MDNode *> AddAttrs) {
+  // First remove any existing loop metadata related to this transformation.
+  SmallVector<Metadata *, 4> MDs;
+
+  // Reserve first location for self reference to the LoopID metadata node.
+  TempMDTuple TempNode = MDNode::getTemporary(Context, None);
+  MDs.push_back(TempNode.get());
+
+  // Remove metadata for the transformation that has been applied or that became
+  // outdated.
+  if (OrigLoopID) {
+    for (unsigned i = 1, ie = OrigLoopID->getNumOperands(); i < ie; ++i) {
+      bool IsVectorMetadata = false;
+      Metadata *Op = OrigLoopID->getOperand(i);
+      if (MDNode *MD = dyn_cast<MDNode>(Op)) {
+        const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+        if (S)
+          IsVectorMetadata =
+              llvm::any_of(RemovePrefixes, [S](StringRef Prefix) -> bool {
+                return S->getString().startswith(Prefix);
+              });
+      }
+      if (!IsVectorMetadata)
+        MDs.push_back(Op);
+    }
+  }
+
+  // Add metadata to avoid reapplying a transformation, such as
+  // llvm.loop.unroll.disable and llvm.loop.isvectorized.
+  MDs.append(AddAttrs.begin(), AddAttrs.end());
+
+  MDNode *NewLoopID = MDNode::getDistinct(Context, MDs);
+  // Replace the temporary node with a self-reference.
+  NewLoopID->replaceOperandWith(0, NewLoopID);
+  return NewLoopID;
+}
+
 //===----------------------------------------------------------------------===//
 // LoopInfo implementation
 //
@@ -792,7 +1039,7 @@ void LoopInfoWrapperPass::verifyAnalysis() const {
 
 void LoopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
-  AU.addRequired<DominatorTreeWrapperPass>();
+  AU.addRequiredTransitive<DominatorTreeWrapperPass>();
 }
 
 void LoopInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index a68f114b83a0..4ab3798039d8 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -1,9 +1,8 @@
 //===- LoopPass.cpp - Loop Pass and Loop Pass Manager ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -23,6 +22,7 @@
 #include "llvm/IR/PassTimingInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -210,6 +210,8 @@ bool LPPassManager::runOnFunction(Function &F) {
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       LoopPass *P = getContainedPass(Index);
 
+      llvm::TimeTraceScope LoopPassScope("RunLoopPass", P->getPassName());
+
       dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG,
                    CurrentLoop->getHeader()->getName());
       dumpRequiredSet(P);
@@ -384,16 +386,20 @@ void LoopPass::assignPassManager(PMStack &PMS,
   LPPM->add(this);
 }
 
+static std::string getDescription(const Loop &L) {
+  return "loop";
+}
+
 bool LoopPass::skipLoop(const Loop *L) const {
   const Function *F = L->getHeader()->getParent();
   if (!F)
     return false;
   // Check the opt bisect limit.
-  LLVMContext &Context = F->getContext();
-  if (!Context.getOptPassGate().shouldRunPass(this, *L))
+  OptPassGate &Gate = F->getContext().getOptPassGate();
+  if (Gate.isEnabled() && !Gate.shouldRunPass(this, getDescription(*L)))
     return true;
   // Check for the OptimizeNone attribute.
-  if (F->hasFnAttribute(Attribute::OptimizeNone)) {
+  if (F->hasOptNone()) {
     // FIXME: Report this to dbgs() only once per function.
     LLVM_DEBUG(dbgs() << "Skipping pass '" << getPassName() << "' in function "
                       << F->getName() << "\n");
diff --git a/lib/Analysis/LoopUnrollAnalyzer.cpp b/lib/Analysis/LoopUnrollAnalyzer.cpp
index c8b91a7a1a51..1728b5e9f6d2 100644
--- a/lib/Analysis/LoopUnrollAnalyzer.cpp
+++ b/lib/Analysis/LoopUnrollAnalyzer.cpp
@@ -1,9 +1,8 @@
 //===- LoopUnrollAnalyzer.cpp - Unrolling Effect Estimation -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index 907b321b231a..6e1bb50e8893 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -1,9 +1,8 @@
 //===- MemDepPrinter.cpp - Printer for MemoryDependenceAnalysis -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/MemDerefPrinter.cpp b/lib/Analysis/MemDerefPrinter.cpp
index 4a136c5a0c6d..77ebf89d9a08 100644
--- a/lib/Analysis/MemDerefPrinter.cpp
+++ b/lib/Analysis/MemDerefPrinter.cpp
@@ -1,9 +1,8 @@
 //===- MemDerefPrinter.cpp - Printer for isDereferenceablePointer ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -54,9 +53,10 @@ bool MemDerefPrinter::runOnFunction(Function &F) {
   for (auto &I: instructions(F)) {
     if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
       Value *PO = LI->getPointerOperand();
-      if (isDereferenceablePointer(PO, DL))
+      if (isDereferenceablePointer(PO, LI->getType(), DL))
         Deref.push_back(PO);
-      if (isDereferenceableAndAlignedPointer(PO, LI->getAlignment(), DL))
+      if (isDereferenceableAndAlignedPointer(PO, LI->getType(),
+                                             LI->getAlignment(), DL))
         DerefAndAligned.insert(PO);
     }
   }
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 686ad294378c..729dad463657 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -1,9 +1,8 @@
 //===- MemoryBuiltins.cpp - Identify calls to memory builtins -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -264,6 +263,19 @@ bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
   return getAllocationData(V, AllocLike, TLI, LookThroughBitCast).hasValue();
 }
 
+/// Tests if a value is a call or invoke to a library function that
+/// reallocates memory (e.g., realloc).
+bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+                     bool LookThroughBitCast) {
+  return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast).hasValue();
+}
+
+/// Tests if a functions is a call or invoke to a library function that
+/// reallocates memory (e.g., realloc).
+bool llvm::isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI) {
+  return getAllocationDataForFunction(F, ReallocLike, TLI).hasValue();
+}
+
 /// extractMallocCall - Returns the corresponding CallInst if the instruction
 /// is a malloc call.  Since CallInst::CreateMalloc() only creates calls, we
 /// ignore InvokeInst here.
@@ -359,19 +371,8 @@ const CallInst *llvm::extractCallocCall(const Value *I,
   return isCallocLikeFn(I, TLI) ? cast<CallInst>(I) : nullptr;
 }
 
-/// isFreeCall - Returns non-null if the value is a call to the builtin free()
-const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
-  bool IsNoBuiltinCall;
-  const Function *Callee =
-      getCalledFunction(I, /*LookThroughBitCast=*/false, IsNoBuiltinCall);
-  if (Callee == nullptr || IsNoBuiltinCall)
-    return nullptr;
-
-  StringRef FnName = Callee->getName();
-  LibFunc TLIFn;
-  if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
-    return nullptr;
-
+/// isLibFreeFunction - Returns true if the function is a builtin free()
+bool llvm::isLibFreeFunction(const Function *F, const LibFunc TLIFn) {
   unsigned ExpectedNumParams;
   if (TLIFn == LibFunc_free ||
       TLIFn == LibFunc_ZdlPv || // operator delete(void*)
@@ -402,22 +403,39 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
            TLIFn == LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t) // delete[](void*, align_val_t, nothrow)
     ExpectedNumParams = 3;
   else
-    return nullptr;
+    return false;
 
   // Check free prototype.
   // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
   // attribute will exist.
-  FunctionType *FTy = Callee->getFunctionType();
+  FunctionType *FTy = F->getFunctionType();
   if (!FTy->getReturnType()->isVoidTy())
-    return nullptr;
+    return false;
   if (FTy->getNumParams() != ExpectedNumParams)
+    return false;
+  if (FTy->getParamType(0) != Type::getInt8PtrTy(F->getContext()))
+    return false;
+
+  return true;
+}
+
+/// isFreeCall - Returns non-null if the value is a call to the builtin free()
+const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
+  bool IsNoBuiltinCall;
+  const Function *Callee =
+      getCalledFunction(I, /*LookThroughBitCast=*/false, IsNoBuiltinCall);
+  if (Callee == nullptr || IsNoBuiltinCall)
     return nullptr;
-  if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext()))
+
+  StringRef FnName = Callee->getName();
+  LibFunc TLIFn;
+  if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
     return nullptr;
 
-  return dyn_cast<CallInst>(I);
+  return isLibFreeFunction(Callee, TLIFn) ? dyn_cast<CallInst>(I) : nullptr;
 }
 
+
 //===----------------------------------------------------------------------===//
 //  Utility functions to compute size of objects.
 //
@@ -442,10 +460,10 @@ bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
   return true;
 }
 
-ConstantInt *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
-                                       const DataLayout &DL,
-                                       const TargetLibraryInfo *TLI,
-                                       bool MustSucceed) {
+Value *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
+                                 const DataLayout &DL,
+                                 const TargetLibraryInfo *TLI,
+                                 bool MustSucceed) {
   assert(ObjectSize->getIntrinsicID() == Intrinsic::objectsize &&
          "ObjectSize must be a call to llvm.objectsize!");
 
@@ -462,13 +480,35 @@ ConstantInt *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
   EvalOptions.NullIsUnknownSize =
       cast<ConstantInt>(ObjectSize->getArgOperand(2))->isOne();
 
-  // FIXME: Does it make sense to just return a failure value if the size won't
-  // fit in the output and `!MustSucceed`?
-  uint64_t Size;
   auto *ResultType = cast<IntegerType>(ObjectSize->getType());
-  if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, EvalOptions) &&
-      isUIntN(ResultType->getBitWidth(), Size))
-    return ConstantInt::get(ResultType, Size);
+  bool StaticOnly = cast<ConstantInt>(ObjectSize->getArgOperand(3))->isZero();
+  if (StaticOnly) {
+    // FIXME: Does it make sense to just return a failure value if the size won't
+    // fit in the output and `!MustSucceed`?
+    uint64_t Size;
+    if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, EvalOptions) &&
+        isUIntN(ResultType->getBitWidth(), Size))
+      return ConstantInt::get(ResultType, Size);
+  } else {
+    LLVMContext &Ctx = ObjectSize->getFunction()->getContext();
+    ObjectSizeOffsetEvaluator Eval(DL, TLI, Ctx, EvalOptions);
+    SizeOffsetEvalType SizeOffsetPair =
+        Eval.compute(ObjectSize->getArgOperand(0));
+
+    if (SizeOffsetPair != ObjectSizeOffsetEvaluator::unknown()) {
+      IRBuilder<TargetFolder> Builder(Ctx, TargetFolder(DL));
+      Builder.SetInsertPoint(ObjectSize);
+
+      // If we've outside the end of the object, then we can always access
+      // exactly 0 bytes.
+      Value *ResultSize =
+          Builder.CreateSub(SizeOffsetPair.first, SizeOffsetPair.second);
+      Value *UseZero =
+          Builder.CreateICmpULT(SizeOffsetPair.first, SizeOffsetPair.second);
+      return Builder.CreateSelect(UseZero, ConstantInt::get(ResultType, 0),
+                                  ResultSize);
+    }
+  }
 
   if (!MustSucceed)
     return nullptr;
@@ -684,7 +724,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){
   if (!GV.hasDefinitiveInitializer())
     return unknown();
 
-  APInt Size(IntTyBits, DL.getTypeAllocSize(GV.getType()->getElementType()));
+  APInt Size(IntTyBits, DL.getTypeAllocSize(GV.getValueType()));
   return std::make_pair(align(Size, GV.getAlignment()), Zero);
 }
 
@@ -743,9 +783,12 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) {
 
 ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(
     const DataLayout &DL, const TargetLibraryInfo *TLI, LLVMContext &Context,
-    bool RoundToAlign)
-    : DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)),
-      RoundToAlign(RoundToAlign) {
+    ObjectSizeOpts EvalOpts)
+    : DL(DL), TLI(TLI), Context(Context),
+      Builder(Context, TargetFolder(DL),
+              IRBuilderCallbackInserter(
+                  [&](Instruction *I) { InsertedInstructions.insert(I); })),
+      EvalOpts(EvalOpts) {
   // IntTy and Zero must be set for each compute() since the address space may
   // be different for later objects.
 }
@@ -767,17 +810,21 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
       if (CacheIt != CacheMap.end() && anyKnown(CacheIt->second))
         CacheMap.erase(CacheIt);
     }
+
+    // Erase any instructions we inserted as part of the traversal.
+    for (Instruction *I : InsertedInstructions) {
+      I->replaceAllUsesWith(UndefValue::get(I->getType()));
+      I->eraseFromParent();
+    }
   }
 
   SeenVals.clear();
+  InsertedInstructions.clear();
   return Result;
 }
 
 SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
-  ObjectSizeOpts ObjSizeOptions;
-  ObjSizeOptions.RoundToAlign = RoundToAlign;
-
-  ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, ObjSizeOptions);
+  ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, EvalOpts);
   SizeOffsetType Const = Visitor.compute(V);
   if (Visitor.bothKnown(Const))
     return std::make_pair(ConstantInt::get(Context, Const.first),
@@ -916,24 +963,28 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) {
     if (!bothKnown(EdgeData)) {
       OffsetPHI->replaceAllUsesWith(UndefValue::get(IntTy));
       OffsetPHI->eraseFromParent();
+      InsertedInstructions.erase(OffsetPHI);
       SizePHI->replaceAllUsesWith(UndefValue::get(IntTy));
       SizePHI->eraseFromParent();
+      InsertedInstructions.erase(SizePHI);
       return unknown();
     }
     SizePHI->addIncoming(EdgeData.first, PHI.getIncomingBlock(i));
     OffsetPHI->addIncoming(EdgeData.second, PHI.getIncomingBlock(i));
   }
 
-  Value *Size = SizePHI, *Offset = OffsetPHI, *Tmp;
-  if ((Tmp = SizePHI->hasConstantValue())) {
+  Value *Size = SizePHI, *Offset = OffsetPHI;
+  if (Value *Tmp = SizePHI->hasConstantValue()) {
     Size = Tmp;
     SizePHI->replaceAllUsesWith(Size);
     SizePHI->eraseFromParent();
+    InsertedInstructions.erase(SizePHI);
   }
-  if ((Tmp = OffsetPHI->hasConstantValue())) {
+  if (Value *Tmp = OffsetPHI->hasConstantValue()) {
     Offset = Tmp;
     OffsetPHI->replaceAllUsesWith(Offset);
     OffsetPHI->eraseFromParent();
+    InsertedInstructions.erase(OffsetPHI);
   }
   return std::make_pair(Size, Offset);
 }
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index e22182b99e11..b25b655165d7 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -1,9 +1,8 @@
 //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -327,7 +326,8 @@ static bool isVolatile(Instruction *Inst) {
 
 MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
     const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
-    BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
+    BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
+    OrderedBasicBlock *OBB) {
   MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
   if (QueryInst != nullptr) {
     if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
@@ -338,7 +338,7 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
     }
   }
   MemDepResult SimpleDep = getSimplePointerDependencyFrom(
-      MemLoc, isLoad, ScanIt, BB, QueryInst, Limit);
+      MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, OBB);
   if (SimpleDep.isDef())
     return SimpleDep;
   // Non-local invariant group dependency indicates there is non local Def
@@ -439,14 +439,13 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
 
 MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
     const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
-    BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
+    BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
+    OrderedBasicBlock *OBB) {
   bool isInvariantLoad = false;
 
-  if (!Limit) {
-    unsigned DefaultLimit = BlockScanLimit;
-    return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
-                                          &DefaultLimit);
-  }
+  unsigned DefaultLimit = BlockScanLimit;
+  if (!Limit)
+    Limit = &DefaultLimit;
 
   // We must be careful with atomic accesses, as they may allow another thread
   //   to touch this location, clobbering it. We are conservative: if the
@@ -488,11 +487,14 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
 
   const DataLayout &DL = BB->getModule()->getDataLayout();
 
-  // Create a numbered basic block to lazily compute and cache instruction
+  // If the caller did not provide an ordered basic block,
+  // create one to lazily compute and cache instruction
   // positions inside a BB. This is used to provide fast queries for relative
   // position between two instructions in a BB and can be used by
   // AliasAnalysis::callCapturesBefore.
-  OrderedBasicBlock OBB(BB);
+  OrderedBasicBlock OBBTmp(BB);
+  if (!OBB)
+    OBB = &OBBTmp;
 
   // Return "true" if and only if the instruction I is either a non-simple
   // load or a non-simple store.
@@ -673,7 +675,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
     // A release fence requires that all stores complete before it, but does
     // not prevent the reordering of following loads or stores 'before' the
     // fence.  As a result, we look past it when finding a dependency for
-    // loads.  DSE uses this to find preceeding stores to delete and thus we
+    // loads.  DSE uses this to find preceding stores to delete and thus we
     // can't bypass the fence if the query instruction is a store.
     if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
       if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
@@ -683,7 +685,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
     ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc);
     // If necessary, perform additional analysis.
     if (isModAndRefSet(MR))
-      MR = AA.callCapturesBefore(Inst, MemLoc, &DT, &OBB);
+      MR = AA.callCapturesBefore(Inst, MemLoc, &DT, OBB);
     switch (clearMust(MR)) {
     case ModRefInfo::NoModRef:
       // If the call has no effect on the queried pointer, just ignore it.
@@ -709,7 +711,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
   return MemDepResult::getNonFuncLocal();
 }
 
-MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
+MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst,
+                                                    OrderedBasicBlock *OBB) {
   Instruction *ScanPos = QueryInst;
 
   // Check for a cached result
@@ -747,8 +750,9 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
       if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
         isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
 
-      LocalCache = getPointerDependencyFrom(
-          MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);
+      LocalCache =
+          getPointerDependencyFrom(MemLoc, isLoad, ScanPos->getIterator(),
+                                   QueryParent, QueryInst, nullptr, OBB);
     } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
       bool isReadOnly = AA.onlyReadsMemory(QueryCall);
       LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
diff --git a/lib/Analysis/MemoryLocation.cpp b/lib/Analysis/MemoryLocation.cpp
index 27e8d72b8e89..163830eee797 100644
--- a/lib/Analysis/MemoryLocation.cpp
+++ b/lib/Analysis/MemoryLocation.cpp
@@ -1,9 +1,8 @@
 //===- MemoryLocation.cpp - Memory location descriptions -------------------==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp
index 6a5567ed765b..17f5d9b9f0ad 100644
--- a/lib/Analysis/MemorySSA.cpp
+++ b/lib/Analysis/MemorySSA.cpp
@@ -1,9 +1,8 @@
 //===- MemorySSA.cpp - Memory SSA Builder ---------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -82,6 +81,11 @@ bool llvm::VerifyMemorySSA = true;
 #else
 bool llvm::VerifyMemorySSA = false;
 #endif
+/// Enables memory ssa as a dependency for loop passes in legacy pass manager.
+cl::opt<bool> llvm::EnableMSSALoopDependency(
+    "enable-mssa-loop-dependency", cl::Hidden, cl::init(false),
+    cl::desc("Enable MemorySSA dependency for loop pass manager"));
+
 static cl::opt<bool, true>
     VerifyMemorySSAX("verify-memoryssa", cl::location(VerifyMemorySSA),
                      cl::Hidden, cl::desc("Enable verification of MemorySSA."));
@@ -252,10 +256,10 @@ struct ClobberAlias {
 
 // Return a pair of {IsClobber (bool), AR (AliasResult)}. It relies on AR being
 // ignored if IsClobber = false.
-static ClobberAlias instructionClobbersQuery(const MemoryDef *MD,
-                                             const MemoryLocation &UseLoc,
-                                             const Instruction *UseInst,
-                                             AliasAnalysis &AA) {
+template <typename AliasAnalysisType>
+static ClobberAlias
+instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc,
+                         const Instruction *UseInst, AliasAnalysisType &AA) {
   Instruction *DefInst = MD->getMemoryInst();
   assert(DefInst && "Defining instruction not actually an instruction");
   const auto *UseCall = dyn_cast<CallBase>(UseInst);
@@ -300,10 +304,11 @@ static ClobberAlias instructionClobbersQuery(const MemoryDef *MD,
   return {isModSet(I), AR};
 }
 
+template <typename AliasAnalysisType>
 static ClobberAlias instructionClobbersQuery(MemoryDef *MD,
                                              const MemoryUseOrDef *MU,
                                              const MemoryLocOrCall &UseMLOC,
-                                             AliasAnalysis &AA) {
+                                             AliasAnalysisType &AA) {
   // FIXME: This is a temporary hack to allow a single instructionClobbersQuery
   // to exist while MemoryLocOrCall is pushed through places.
   if (UseMLOC.IsCall)
@@ -346,12 +351,12 @@ struct UpwardsMemoryQuery {
 } // end anonymous namespace
 
 static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc,
-                           AliasAnalysis &AA) {
+                           BatchAAResults &AA) {
   Instruction *Inst = MD->getMemoryInst();
   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
     switch (II->getIntrinsicID()) {
     case Intrinsic::lifetime_end:
-      return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), Loc);
+      return AA.alias(MemoryLocation(II->getArgOperand(1)), Loc) == MustAlias;
     default:
       return false;
     }
@@ -359,13 +364,14 @@ static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc,
   return false;
 }
 
-static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA,
+template <typename AliasAnalysisType>
+static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA,
                                                    const Instruction *I) {
   // If the memory can't be changed, then loads of the memory can't be
   // clobbered.
   return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) ||
-                              AA.pointsToConstantMemory(cast<LoadInst>(I)->
-                                                          getPointerOperand()));
+                              AA.pointsToConstantMemory(MemoryLocation(
+                                  cast<LoadInst>(I)->getPointerOperand())));
 }
 
 /// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing
@@ -381,10 +387,12 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA,
 /// \param Query     The UpwardsMemoryQuery we used for our search.
 /// \param AA        The AliasAnalysis we used for our search.
 /// \param AllowImpreciseClobber Always false, unless we do relaxed verify.
-static void
+
+template <typename AliasAnalysisType>
+LLVM_ATTRIBUTE_UNUSED static void
 checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt,
                    const MemoryLocation &StartLoc, const MemorySSA &MSSA,
-                   const UpwardsMemoryQuery &Query, AliasAnalysis &AA,
+                   const UpwardsMemoryQuery &Query, AliasAnalysisType &AA,
                    bool AllowImpreciseClobber = false) {
   assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?");
 
@@ -474,7 +482,7 @@ namespace {
 
 /// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up
 /// in one class.
-class ClobberWalker {
+template <class AliasAnalysisType> class ClobberWalker {
   /// Save a few bytes by using unsigned instead of size_t.
   using ListIndex = unsigned;
 
@@ -498,9 +506,10 @@ class ClobberWalker {
   };
 
   const MemorySSA &MSSA;
-  AliasAnalysis &AA;
+  AliasAnalysisType &AA;
   DominatorTree &DT;
   UpwardsMemoryQuery *Query;
+  unsigned *UpwardWalkLimit;
 
   // Phi optimization bookkeeping
   SmallVector<DefPath, 32> Paths;
@@ -539,6 +548,16 @@ class ClobberWalker {
   walkToPhiOrClobber(DefPath &Desc, const MemoryAccess *StopAt = nullptr,
                      const MemoryAccess *SkipStopAt = nullptr) const {
     assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world");
+    assert(UpwardWalkLimit && "Need a valid walk limit");
+    bool LimitAlreadyReached = false;
+    // (*UpwardWalkLimit) may be 0 here, due to the loop in tryOptimizePhi. Set
+    // it to 1. This will not do any alias() calls. It either returns in the
+    // first iteration in the loop below, or is set back to 0 if all def chains
+    // are free of MemoryDefs.
+    if (!*UpwardWalkLimit) {
+      *UpwardWalkLimit = 1;
+      LimitAlreadyReached = true;
+    }
 
     for (MemoryAccess *Current : def_chain(Desc.Last)) {
       Desc.Last = Current;
@@ -548,6 +567,10 @@ class ClobberWalker {
       if (auto *MD = dyn_cast<MemoryDef>(Current)) {
         if (MSSA.isLiveOnEntryDef(MD))
           return {MD, true, MustAlias};
+
+        if (!--*UpwardWalkLimit)
+          return {Current, true, MayAlias};
+
         ClobberAlias CA =
             instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA);
         if (CA.IsClobber)
@@ -555,6 +578,9 @@ class ClobberWalker {
       }
     }
 
+    if (LimitAlreadyReached)
+      *UpwardWalkLimit = 0;
+
     assert(isa<MemoryPhi>(Desc.Last) &&
            "Ended at a non-clobber that's not a phi?");
     return {Desc.Last, false, MayAlias};
@@ -626,10 +652,12 @@ class ClobberWalker {
         SkipStopWhere = Query->OriginalAccess;
       }
 
-      UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere,
+      UpwardsWalkResult Res = walkToPhiOrClobber(Node,
+                                                 /*StopAt=*/StopWhere,
                                                  /*SkipStopAt=*/SkipStopWhere);
       if (Res.IsKnownClobber) {
         assert(Res.Result != StopWhere && Res.Result != SkipStopWhere);
+
         // If this wasn't a cache hit, we hit a clobber when walking. That's a
         // failure.
         TerminatedPath Term{Res.Result, PathIndex};
@@ -662,7 +690,7 @@ class ClobberWalker {
   struct generic_def_path_iterator
       : public iterator_facade_base<generic_def_path_iterator<T, Walker>,
                                     std::forward_iterator_tag, T *> {
-    generic_def_path_iterator() = default;
+    generic_def_path_iterator() {}
     generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {}
 
     T &operator*() const { return curNode(); }
@@ -887,13 +915,19 @@ class ClobberWalker {
   }
 
 public:
-  ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT)
+  ClobberWalker(const MemorySSA &MSSA, AliasAnalysisType &AA, DominatorTree &DT)
       : MSSA(MSSA), AA(AA), DT(DT) {}
 
+  AliasAnalysisType *getAA() { return &AA; }
   /// Finds the nearest clobber for the given query, optimizing phis if
   /// possible.
-  MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q) {
+  MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q,
+                            unsigned &UpWalkLimit) {
     Query = &Q;
+    UpwardWalkLimit = &UpWalkLimit;
+    // Starting limit must be > 0.
+    if (!UpWalkLimit)
+      UpWalkLimit++;
 
     MemoryAccess *Current = Start;
     // This walker pretends uses don't exist. If we're handed one, silently grab
@@ -918,13 +952,11 @@ public:
     }
 
 #ifdef EXPENSIVE_CHECKS
-    if (!Q.SkipSelfAccess)
+    if (!Q.SkipSelfAccess && *UpwardWalkLimit > 0)
       checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA);
 #endif
     return Result;
   }
-
-  void verify(const MemorySSA *MSSA) { assert(MSSA == &this->MSSA); }
 };
 
 struct RenamePassData {
@@ -947,77 +979,99 @@ struct RenamePassData {
 
 namespace llvm {
 
-class MemorySSA::ClobberWalkerBase {
-  ClobberWalker Walker;
+template <class AliasAnalysisType> class MemorySSA::ClobberWalkerBase {
+  ClobberWalker<AliasAnalysisType> Walker;
   MemorySSA *MSSA;
 
 public:
-  ClobberWalkerBase(MemorySSA *M, AliasAnalysis *A, DominatorTree *D)
+  ClobberWalkerBase(MemorySSA *M, AliasAnalysisType *A, DominatorTree *D)
       : Walker(*M, *A, *D), MSSA(M) {}
 
   MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *,
-                                              const MemoryLocation &);
-  // Second argument (bool), defines whether the clobber search should skip the
+                                              const MemoryLocation &,
+                                              unsigned &);
+  // Third argument (bool), defines whether the clobber search should skip the
   // original queried access. If true, there will be a follow-up query searching
   // for a clobber access past "self". Note that the Optimized access is not
   // updated if a new clobber is found by this SkipSelf search. If this
   // additional query becomes heavily used we may decide to cache the result.
   // Walker instantiations will decide how to set the SkipSelf bool.
-  MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, bool);
-  void verify(const MemorySSA *MSSA) { Walker.verify(MSSA); }
+  MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, unsigned &, bool);
 };
 
 /// A MemorySSAWalker that does AA walks to disambiguate accesses. It no
 /// longer does caching on its own, but the name has been retained for the
 /// moment.
+template <class AliasAnalysisType>
 class MemorySSA::CachingWalker final : public MemorySSAWalker {
-  ClobberWalkerBase *Walker;
+  ClobberWalkerBase<AliasAnalysisType> *Walker;
 
 public:
-  CachingWalker(MemorySSA *M, ClobberWalkerBase *W)
+  CachingWalker(MemorySSA *M, ClobberWalkerBase<AliasAnalysisType> *W)
       : MemorySSAWalker(M), Walker(W) {}
   ~CachingWalker() override = default;
 
   using MemorySSAWalker::getClobberingMemoryAccess;
 
-  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override;
+  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, unsigned &UWL) {
+    return Walker->getClobberingMemoryAccessBase(MA, UWL, false);
+  }
   MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
-                                          const MemoryLocation &Loc) override;
+                                          const MemoryLocation &Loc,
+                                          unsigned &UWL) {
+    return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL);
+  }
+
+  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override {
+    unsigned UpwardWalkLimit = MaxCheckLimit;
+    return getClobberingMemoryAccess(MA, UpwardWalkLimit);
+  }
+  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
+                                          const MemoryLocation &Loc) override {
+    unsigned UpwardWalkLimit = MaxCheckLimit;
+    return getClobberingMemoryAccess(MA, Loc, UpwardWalkLimit);
+  }
 
   void invalidateInfo(MemoryAccess *MA) override {
     if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
       MUD->resetOptimized();
   }
-
-  void verify(const MemorySSA *MSSA) override {
-    MemorySSAWalker::verify(MSSA);
-    Walker->verify(MSSA);
-  }
 };
 
+template <class AliasAnalysisType>
 class MemorySSA::SkipSelfWalker final : public MemorySSAWalker {
-  ClobberWalkerBase *Walker;
+  ClobberWalkerBase<AliasAnalysisType> *Walker;
 
 public:
-  SkipSelfWalker(MemorySSA *M, ClobberWalkerBase *W)
+  SkipSelfWalker(MemorySSA *M, ClobberWalkerBase<AliasAnalysisType> *W)
       : MemorySSAWalker(M), Walker(W) {}
   ~SkipSelfWalker() override = default;
 
   using MemorySSAWalker::getClobberingMemoryAccess;
 
-  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override;
+  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, unsigned &UWL) {
+    return Walker->getClobberingMemoryAccessBase(MA, UWL, true);
+  }
   MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
-                                          const MemoryLocation &Loc) override;
+                                          const MemoryLocation &Loc,
+                                          unsigned &UWL) {
+    return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL);
+  }
+
+  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override {
+    unsigned UpwardWalkLimit = MaxCheckLimit;
+    return getClobberingMemoryAccess(MA, UpwardWalkLimit);
+  }
+  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
+                                          const MemoryLocation &Loc) override {
+    unsigned UpwardWalkLimit = MaxCheckLimit;
+    return getClobberingMemoryAccess(MA, Loc, UpwardWalkLimit);
+  }
 
   void invalidateInfo(MemoryAccess *MA) override {
     if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
       MUD->resetOptimized();
   }
-
-  void verify(const MemorySSA *MSSA) override {
-    MemorySSAWalker::verify(MSSA);
-    Walker->verify(MSSA);
-  }
 };
 
 } // end namespace llvm
@@ -1071,6 +1125,8 @@ MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal,
 void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal,
                            SmallPtrSetImpl<BasicBlock *> &Visited,
                            bool SkipVisited, bool RenameAllUses) {
+  assert(Root && "Trying to rename accesses in an unreachable block");
+
   SmallVector<RenamePassData, 32> WorkStack;
   // Skip everything if we already renamed this block and we are skipping.
   // Note: You can't sink this into the if, because we need it to occur
@@ -1154,9 +1210,20 @@ void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) {
 }
 
 MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT)
-    : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr),
+    : AA(nullptr), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr),
       SkipWalker(nullptr), NextID(0) {
-  buildMemorySSA();
+  // Build MemorySSA using a batch alias analysis. This reuses the internal
+  // state that AA collects during an alias()/getModRefInfo() call. This is
+  // safe because there are no CFG changes while building MemorySSA and can
+  // significantly reduce the time spent by the compiler in AA, because we will
+  // make queries about all the instructions in the Function.
+  BatchAAResults BatchAA(*AA);
+  buildMemorySSA(BatchAA);
+  // Intentionally leave AA to nullptr while building so we don't accidently
+  // use non-batch AliasAnalysis.
+  this->AA = AA;
+  // Also create the walker here.
+  getWalker();
 }
 
 MemorySSA::~MemorySSA() {
@@ -1193,11 +1260,9 @@ namespace llvm {
 /// which is walking bottom-up.
 class MemorySSA::OptimizeUses {
 public:
-  OptimizeUses(MemorySSA *MSSA, MemorySSAWalker *Walker, AliasAnalysis *AA,
-               DominatorTree *DT)
-      : MSSA(MSSA), Walker(Walker), AA(AA), DT(DT) {
-    Walker = MSSA->getWalker();
-  }
+  OptimizeUses(MemorySSA *MSSA, CachingWalker<BatchAAResults> *Walker,
+               BatchAAResults *BAA, DominatorTree *DT)
+      : MSSA(MSSA), Walker(Walker), AA(BAA), DT(DT) {}
 
   void optimizeUses();
 
@@ -1225,8 +1290,8 @@ private:
                            DenseMap<MemoryLocOrCall, MemlocStackInfo> &);
 
   MemorySSA *MSSA;
-  MemorySSAWalker *Walker;
-  AliasAnalysis *AA;
+  CachingWalker<BatchAAResults> *Walker;
+  BatchAAResults *AA;
   DominatorTree *DT;
 };
 
@@ -1343,11 +1408,12 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
       continue;
     }
     bool FoundClobberResult = false;
+    unsigned UpwardWalkLimit = MaxCheckLimit;
     while (UpperBound > LocInfo.LowerBound) {
       if (isa<MemoryPhi>(VersionStack[UpperBound])) {
         // For phis, use the walker, see where we ended up, go there
-        Instruction *UseInst = MU->getMemoryInst();
-        MemoryAccess *Result = Walker->getClobberingMemoryAccess(UseInst);
+        MemoryAccess *Result =
+            Walker->getClobberingMemoryAccess(MU, UpwardWalkLimit);
         // We are guaranteed to find it or something is wrong
         while (VersionStack[UpperBound] != Result) {
           assert(UpperBound != 0);
@@ -1423,7 +1489,7 @@ void MemorySSA::placePHINodes(
     createMemoryPhi(BB);
 }
 
-void MemorySSA::buildMemorySSA() {
+void MemorySSA::buildMemorySSA(BatchAAResults &BAA) {
   // We create an access to represent "live on entry", for things like
   // arguments or users of globals, where the memory they use is defined before
   // the beginning of the function. We do not actually insert it into the IR.
@@ -1445,7 +1511,7 @@ void MemorySSA::buildMemorySSA() {
     AccessList *Accesses = nullptr;
     DefsList *Defs = nullptr;
     for (Instruction &I : B) {
-      MemoryUseOrDef *MUD = createNewAccess(&I);
+      MemoryUseOrDef *MUD = createNewAccess(&I, &BAA);
       if (!MUD)
         continue;
 
@@ -1469,9 +1535,9 @@ void MemorySSA::buildMemorySSA() {
   SmallPtrSet<BasicBlock *, 16> Visited;
   renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited);
 
-  CachingWalker *Walker = getWalkerImpl();
-
-  OptimizeUses(this, Walker, AA, DT).optimizeUses();
+  ClobberWalkerBase<BatchAAResults> WalkerBase(this, &BAA, DT);
+  CachingWalker<BatchAAResults> WalkerLocal(this, &WalkerBase);
+  OptimizeUses(this, &WalkerLocal, &BAA, DT).optimizeUses();
 
   // Mark the uses in unreachable blocks as live on entry, so that they go
   // somewhere.
@@ -1482,14 +1548,16 @@ void MemorySSA::buildMemorySSA() {
 
 MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); }
 
-MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() {
+MemorySSA::CachingWalker<AliasAnalysis> *MemorySSA::getWalkerImpl() {
   if (Walker)
     return Walker.get();
 
   if (!WalkerBase)
-    WalkerBase = llvm::make_unique<ClobberWalkerBase>(this, AA, DT);
+    WalkerBase =
+        llvm::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT);
 
-  Walker = llvm::make_unique<CachingWalker>(this, WalkerBase.get());
+  Walker =
+      llvm::make_unique<CachingWalker<AliasAnalysis>>(this, WalkerBase.get());
   return Walker.get();
 }
 
@@ -1498,9 +1566,11 @@ MemorySSAWalker *MemorySSA::getSkipSelfWalker() {
     return SkipWalker.get();
 
   if (!WalkerBase)
-    WalkerBase = llvm::make_unique<ClobberWalkerBase>(this, AA, DT);
+    WalkerBase =
+        llvm::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT);
 
-  SkipWalker = llvm::make_unique<SkipSelfWalker>(this, WalkerBase.get());
+  SkipWalker =
+      llvm::make_unique<SkipSelfWalker<AliasAnalysis>>(this, WalkerBase.get());
   return SkipWalker.get();
  }
 
@@ -1619,7 +1689,7 @@ MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I,
                                                MemoryAccess *Definition,
                                                const MemoryUseOrDef *Template) {
   assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI");
-  MemoryUseOrDef *NewAccess = createNewAccess(I, Template);
+  MemoryUseOrDef *NewAccess = createNewAccess(I, AA, Template);
   assert(
       NewAccess != nullptr &&
       "Tried to create a memory access for a non-memory touching instruction");
@@ -1642,7 +1712,9 @@ static inline bool isOrdered(const Instruction *I) {
 }
 
 /// Helper function to create new memory accesses
+template <typename AliasAnalysisType>
 MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
+                                           AliasAnalysisType *AAP,
                                            const MemoryUseOrDef *Template) {
   // The assume intrinsic has a control dependency which we model by claiming
   // that it writes arbitrarily. Ignore that fake memory dependency here.
@@ -1657,7 +1729,7 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
     Def = dyn_cast_or_null<MemoryDef>(Template) != nullptr;
     Use = dyn_cast_or_null<MemoryUse>(Template) != nullptr;
 #if !defined(NDEBUG)
-    ModRefInfo ModRef = AA->getModRefInfo(I, None);
+    ModRefInfo ModRef = AAP->getModRefInfo(I, None);
     bool DefCheck, UseCheck;
     DefCheck = isModSet(ModRef) || isOrdered(I);
     UseCheck = isRefSet(ModRef);
@@ -1665,7 +1737,7 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
 #endif
   } else {
     // Find out what affect this instruction has on memory.
-    ModRefInfo ModRef = AA->getModRefInfo(I, None);
+    ModRefInfo ModRef = AAP->getModRefInfo(I, None);
     // The isOrdered check is used to ensure that volatiles end up as defs
     // (atomics end up as ModRef right now anyway).  Until we separate the
     // ordering chain from the memory chain, this enables people to see at least
@@ -1718,7 +1790,7 @@ void MemorySSA::removeFromLookups(MemoryAccess *MA) {
     MUD->setDefiningAccess(nullptr);
   // Invalidate our walker's cache if necessary
   if (!isa<MemoryUse>(MA))
-    Walker->invalidateInfo(MA);
+    getWalker()->invalidateInfo(MA);
 
   Value *MemoryInst;
   if (const auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
@@ -1778,35 +1850,16 @@ void MemorySSA::verifyMemorySSA() const {
   verifyDomination(F);
   verifyOrdering(F);
   verifyDominationNumbers(F);
-  Walker->verify(this);
-  verifyClobberSanity(F);
-}
-
-/// Check sanity of the clobbering instruction for access MA.
-void MemorySSA::checkClobberSanityAccess(const MemoryAccess *MA) const {
-  if (const auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) {
-    if (!MUD->isOptimized())
-      return;
-    auto *I = MUD->getMemoryInst();
-    auto Loc = MemoryLocation::getOrNone(I);
-    if (Loc == None)
-      return;
-    auto *Clobber = MUD->getOptimized();
-    UpwardsMemoryQuery Q(I, MUD);
-    checkClobberSanity(MUD, Clobber, *Loc, *this, Q, *AA, true);
-  }
-}
-
-void MemorySSA::verifyClobberSanity(const Function &F) const {
-#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS)
-  for (const BasicBlock &BB : F) {
-    const AccessList *Accesses = getBlockAccesses(&BB);
-    if (!Accesses)
-      continue;
-    for (const MemoryAccess &MA : *Accesses)
-      checkClobberSanityAccess(&MA);
-  }
-#endif
+  // Previously, the verification used to also verify that the clobberingAccess
+  // cached by MemorySSA is the same as the clobberingAccess found at a later
+  // query to AA. This does not hold true in general due to the current fragility
+  // of BasicAA which has arbitrary caps on the things it analyzes before giving
+  // up. As a result, transformations that are correct, will lead to BasicAA
+  // returning different Alias answers before and after that transformation.
+  // Invalidating MemorySSA is not an option, as the results in BasicAA can be so
+  // random, in the worst case we'd need to rebuild MemorySSA from scratch after
+  // every transformation, which defeats the purpose of using it. For such an
+  // example, see test4 added in D51960.
 }
 
 /// Verify that all of the blocks we believe to have valid domination numbers
@@ -2162,6 +2215,15 @@ MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F,
   return MemorySSAAnalysis::Result(llvm::make_unique<MemorySSA>(F, &AA, &DT));
 }
 
+bool MemorySSAAnalysis::Result::invalidate(
+    Function &F, const PreservedAnalyses &PA,
+    FunctionAnalysisManager::Invalidator &Inv) {
+  auto PAC = PA.getChecker<MemorySSAAnalysis>();
+  return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
+         Inv.invalidate<AAManager>(F, PA) ||
+         Inv.invalidate<DominatorTreeAnalysis>(F, PA);
+}
+
 PreservedAnalyses MemorySSAPrinterPass::run(Function &F,
                                             FunctionAnalysisManager &AM) {
   OS << "MemorySSA for function: " << F.getName() << "\n";
@@ -2210,8 +2272,11 @@ MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {}
 /// the MemoryAccess that actually clobbers Loc.
 ///
 /// \returns our clobbering memory access
-MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(
-    MemoryAccess *StartingAccess, const MemoryLocation &Loc) {
+template <typename AliasAnalysisType>
+MemoryAccess *
+MemorySSA::ClobberWalkerBase<AliasAnalysisType>::getClobberingMemoryAccessBase(
+    MemoryAccess *StartingAccess, const MemoryLocation &Loc,
+    unsigned &UpwardWalkLimit) {
   if (isa<MemoryPhi>(StartingAccess))
     return StartingAccess;
 
@@ -2239,7 +2304,8 @@ MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(
                                      ? StartingUseOrDef->getDefiningAccess()
                                      : StartingUseOrDef;
 
-  MemoryAccess *Clobber = Walker.findClobber(DefiningAccess, Q);
+  MemoryAccess *Clobber =
+      Walker.findClobber(DefiningAccess, Q, UpwardWalkLimit);
   LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
   LLVM_DEBUG(dbgs() << *StartingUseOrDef << "\n");
   LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
@@ -2247,9 +2313,10 @@ MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(
   return Clobber;
 }
 
+template <typename AliasAnalysisType>
 MemoryAccess *
-MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA,
-                                                            bool SkipSelf) {
+MemorySSA::ClobberWalkerBase<AliasAnalysisType>::getClobberingMemoryAccessBase(
+    MemoryAccess *MA, unsigned &UpwardWalkLimit, bool SkipSelf) {
   auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA);
   // If this is a MemoryPhi, we can't do anything.
   if (!StartingAccess)
@@ -2275,7 +2342,7 @@ MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA,
 
   UpwardsMemoryQuery Q(I, StartingAccess);
 
-  if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) {
+  if (isUseTriviallyOptimizableToLiveOnEntry(*Walker.getAA(), I)) {
     MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef();
     StartingAccess->setOptimized(LiveOnEntry);
     StartingAccess->setOptimizedAccessType(None);
@@ -2295,7 +2362,7 @@ MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA,
       return DefiningAccess;
     }
 
-    OptimizedAccess = Walker.findClobber(DefiningAccess, Q);
+    OptimizedAccess = Walker.findClobber(DefiningAccess, Q, UpwardWalkLimit);
     StartingAccess->setOptimized(OptimizedAccess);
     if (MSSA->isLiveOnEntryDef(OptimizedAccess))
       StartingAccess->setOptimizedAccessType(None);
@@ -2311,10 +2378,10 @@ MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA,
 
   MemoryAccess *Result;
   if (SkipSelf && isa<MemoryPhi>(OptimizedAccess) &&
-      isa<MemoryDef>(StartingAccess)) {
+      isa<MemoryDef>(StartingAccess) && UpwardWalkLimit) {
     assert(isa<MemoryDef>(Q.OriginalAccess));
     Q.SkipSelfAccess = true;
-    Result = Walker.findClobber(OptimizedAccess, Q);
+    Result = Walker.findClobber(OptimizedAccess, Q, UpwardWalkLimit);
   } else
     Result = OptimizedAccess;
 
@@ -2324,28 +2391,6 @@ MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA,
   return Result;
 }
 
-MemoryAccess *
-MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
-  return Walker->getClobberingMemoryAccessBase(MA, false);
-}
-
-MemoryAccess *
-MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA,
-                                                    const MemoryLocation &Loc) {
-  return Walker->getClobberingMemoryAccessBase(MA, Loc);
-}
-
-MemoryAccess *
-MemorySSA::SkipSelfWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
-  return Walker->getClobberingMemoryAccessBase(MA, true);
-}
-
-MemoryAccess *
-MemorySSA::SkipSelfWalker::getClobberingMemoryAccess(MemoryAccess *MA,
-                                                    const MemoryLocation &Loc) {
-  return Walker->getClobberingMemoryAccessBase(MA, Loc);
-}
-
 MemoryAccess *
 DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
   if (auto *Use = dyn_cast<MemoryUseOrDef>(MA))
diff --git a/lib/Analysis/MemorySSAUpdater.cpp b/lib/Analysis/MemorySSAUpdater.cpp
index 6c817d203684..4c1feee7fd9a 100644
--- a/lib/Analysis/MemorySSAUpdater.cpp
+++ b/lib/Analysis/MemorySSAUpdater.cpp
@@ -1,9 +1,8 @@
 //===-- MemorySSAUpdater.cpp - Memory SSA Updater--------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------===//
 //
@@ -73,7 +72,10 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
     // potential phi node. This will insert phi nodes if we cycle in order to
     // break the cycle and have an operand.
     for (auto *Pred : predecessors(BB))
-      PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef));
+      if (MSSA->DT->isReachableFromEntry(Pred))
+        PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef));
+      else
+        PhiOps.push_back(MSSA->getLiveOnEntryDef());
 
     // Now try to simplify the ops to avoid placing a phi.
     // This may return null if we never created a phi yet, that's okay
@@ -157,8 +159,10 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd(
     DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &CachedPreviousDef) {
   auto *Defs = MSSA->getWritableBlockDefs(BB);
 
-  if (Defs)
+  if (Defs) {
+    CachedPreviousDef.insert({BB, &*Defs->rbegin()});
     return &*Defs->rbegin();
+  }
 
   return getPreviousDefRecursive(BB, CachedPreviousDef);
 }
@@ -270,6 +274,8 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
       // Also make sure we skip ourselves to avoid self references.
       if (isa<MemoryUse>(U.getUser()) || U.getUser() == MD)
         continue;
+      // Defs are automatically unoptimized when the user is set to MD below,
+      // because the isOptimized() call will fail to find the same ID.
       U.set(MD);
     }
   }
@@ -277,6 +283,9 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
   // and that def is now our defining access.
   MD->setDefiningAccess(DefBefore);
 
+  // Remember the index where we may insert new phis below.
+  unsigned NewPhiIndex = InsertedPHIs.size();
+
   SmallVector<WeakVH, 8> FixupList(InsertedPHIs.begin(), InsertedPHIs.end());
   if (!DefBeforeSameBlock) {
     // If there was a local def before us, we must have the same effect it
@@ -290,9 +299,56 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
     // backwards to find the def.  To make that work, we'd have to track whether
     // getDefRecursive only ever used the single predecessor case.  These types
     // of paths also only exist in between CFG simplifications.
+
+    // If this is the first def in the block and this insert is in an arbitrary
+    // place, compute IDF and place phis.
+    auto Iter = MD->getDefsIterator();
+    ++Iter;
+    auto IterEnd = MSSA->getBlockDefs(MD->getBlock())->end();
+    if (Iter == IterEnd) {
+      ForwardIDFCalculator IDFs(*MSSA->DT);
+      SmallVector<BasicBlock *, 32> IDFBlocks;
+      SmallPtrSet<BasicBlock *, 2> DefiningBlocks;
+      DefiningBlocks.insert(MD->getBlock());
+      IDFs.setDefiningBlocks(DefiningBlocks);
+      IDFs.calculate(IDFBlocks);
+      SmallVector<AssertingVH<MemoryPhi>, 4> NewInsertedPHIs;
+      for (auto *BBIDF : IDFBlocks)
+        if (!MSSA->getMemoryAccess(BBIDF)) {
+          auto *MPhi = MSSA->createMemoryPhi(BBIDF);
+          NewInsertedPHIs.push_back(MPhi);
+          // Add the phis created into the IDF blocks to NonOptPhis, so they are
+          // not optimized out as trivial by the call to getPreviousDefFromEnd
+          // below. Once they are complete, all these Phis are added to the
+          // FixupList, and removed from NonOptPhis inside fixupDefs().
+          NonOptPhis.insert(MPhi);
+        }
+
+      for (auto &MPhi : NewInsertedPHIs) {
+        auto *BBIDF = MPhi->getBlock();
+        for (auto *Pred : predecessors(BBIDF)) {
+          DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef;
+          MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef),
+                            Pred);
+        }
+      }
+
+      // Re-take the index where we're adding the new phis, because the above
+      // call to getPreviousDefFromEnd, may have inserted into InsertedPHIs.
+      NewPhiIndex = InsertedPHIs.size();
+      for (auto &MPhi : NewInsertedPHIs) {
+        InsertedPHIs.push_back(&*MPhi);
+        FixupList.push_back(&*MPhi);
+      }
+    }
+
     FixupList.push_back(MD);
   }
 
+  // Remember the index where we stopped inserting new phis above, since the
+  // fixupDefs call in the loop below may insert more, that are already minimal.
+  unsigned NewPhiIndexEnd = InsertedPHIs.size();
+
   while (!FixupList.empty()) {
     unsigned StartingPHISize = InsertedPHIs.size();
     fixupDefs(FixupList);
@@ -300,6 +356,12 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
     // Put any new phis on the fixup list, and process them
     FixupList.append(InsertedPHIs.begin() + StartingPHISize, InsertedPHIs.end());
   }
+
+  // Optimize potentially non-minimal phis added in this method.
+  unsigned NewPhiSize = NewPhiIndexEnd - NewPhiIndex;
+  if (NewPhiSize)
+    tryRemoveTrivialPhis(ArrayRef<WeakVH>(&InsertedPHIs[NewPhiIndex], NewPhiSize));
+
   // Now that all fixups are done, rename all uses if we are asked.
   if (RenameUses) {
     SmallPtrSet<BasicBlock *, 16> Visited;
@@ -401,8 +463,8 @@ void MemorySSAUpdater::removeEdge(BasicBlock *From, BasicBlock *To) {
   }
 }
 
-void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(BasicBlock *From,
-                                                      BasicBlock *To) {
+void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(const BasicBlock *From,
+                                                      const BasicBlock *To) {
   if (MemoryPhi *MPhi = MSSA->getMemoryAccess(To)) {
     bool Found = false;
     MPhi->unorderedDeleteIncomingIf([&](const MemoryAccess *, BasicBlock *B) {
@@ -420,7 +482,8 @@ void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(BasicBlock *From,
 
 void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
                                         const ValueToValueMapTy &VMap,
-                                        PhiToDefMap &MPhiMap) {
+                                        PhiToDefMap &MPhiMap,
+                                        bool CloneWasSimplified) {
   auto GetNewDefiningAccess = [&](MemoryAccess *MA) -> MemoryAccess * {
     MemoryAccess *InsnDefining = MA;
     if (MemoryUseOrDef *DefMUD = dyn_cast<MemoryUseOrDef>(InsnDefining)) {
@@ -450,16 +513,60 @@ void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
       // instructions. This occurs in LoopRotate when cloning instructions
       // from the old header to the old preheader. The cloned instruction may
       // also be a simplified Value, not an Instruction (see LoopRotate).
+      // Also in LoopRotate, even when it's an instruction, due to it being
+      // simplified, it may be a Use rather than a Def, so we cannot use MUD as
+      // template. Calls coming from updateForClonedBlockIntoPred, ensure this.
       if (Instruction *NewInsn =
               dyn_cast_or_null<Instruction>(VMap.lookup(Insn))) {
         MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess(
-            NewInsn, GetNewDefiningAccess(MUD->getDefiningAccess()), MUD);
+            NewInsn, GetNewDefiningAccess(MUD->getDefiningAccess()),
+            CloneWasSimplified ? nullptr : MUD);
         MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End);
       }
     }
   }
 }
 
+void MemorySSAUpdater::updatePhisWhenInsertingUniqueBackedgeBlock(
+    BasicBlock *Header, BasicBlock *Preheader, BasicBlock *BEBlock) {
+  auto *MPhi = MSSA->getMemoryAccess(Header);
+  if (!MPhi)
+    return;
+
+  // Create phi node in the backedge block and populate it with the same
+  // incoming values as MPhi. Skip incoming values coming from Preheader.
+  auto *NewMPhi = MSSA->createMemoryPhi(BEBlock);
+  bool HasUniqueIncomingValue = true;
+  MemoryAccess *UniqueValue = nullptr;
+  for (unsigned I = 0, E = MPhi->getNumIncomingValues(); I != E; ++I) {
+    BasicBlock *IBB = MPhi->getIncomingBlock(I);
+    MemoryAccess *IV = MPhi->getIncomingValue(I);
+    if (IBB != Preheader) {
+      NewMPhi->addIncoming(IV, IBB);
+      if (HasUniqueIncomingValue) {
+        if (!UniqueValue)
+          UniqueValue = IV;
+        else if (UniqueValue != IV)
+          HasUniqueIncomingValue = false;
+      }
+    }
+  }
+
+  // Update incoming edges into MPhi. Remove all but the incoming edge from
+  // Preheader. Add an edge from NewMPhi
+  auto *AccFromPreheader = MPhi->getIncomingValueForBlock(Preheader);
+  MPhi->setIncomingValue(0, AccFromPreheader);
+  MPhi->setIncomingBlock(0, Preheader);
+  for (unsigned I = MPhi->getNumIncomingValues() - 1; I >= 1; --I)
+    MPhi->unorderedDeleteIncoming(I);
+  MPhi->addIncoming(NewMPhi, BEBlock);
+
+  // If NewMPhi is a trivial phi, remove it. Its use in the header MPhi will be
+  // replaced with the unique value.
+  if (HasUniqueIncomingValue)
+    removeMemoryAccess(NewMPhi);
+}
+
 void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
                                            ArrayRef<BasicBlock *> ExitBlocks,
                                            const ValueToValueMapTy &VMap,
@@ -543,10 +650,13 @@ void MemorySSAUpdater::updateForClonedBlockIntoPred(
   // Defs from BB being used in BB will be replaced with the cloned defs from
   // VM. The uses of BB's Phi (if it exists) in BB will be replaced by the
   // incoming def into the Phi from P1.
+  // Instructions cloned into the predecessor are in practice sometimes
+  // simplified, so disable the use of the template, and create an access from
+  // scratch.
   PhiToDefMap MPhiMap;
   if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB))
     MPhiMap[MPhi] = MPhi->getIncomingValueForBlock(P1);
-  cloneUsesAndDefs(BB, P1, VM, MPhiMap);
+  cloneUsesAndDefs(BB, P1, VM, MPhiMap, /*CloneWasSimplified=*/true);
 }
 
 template <typename Iter>
@@ -599,7 +709,7 @@ void MemorySSAUpdater::applyUpdates(ArrayRef<CFGUpdate> Updates,
 
   if (!RevDeleteUpdates.empty()) {
     // Update for inserted edges: use newDT and snapshot CFG as if deletes had
-    // not occured.
+    // not occurred.
     // FIXME: This creates a new DT, so it's more expensive to do mix
     // delete/inserts vs just inserts. We can do an incremental update on the DT
     // to revert deletes, than re-delete the edges. Teaching DT to do this, is
@@ -697,7 +807,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
 
   // Map a BB to its predecessors: added + previously existing. To get a
   // deterministic order, store predecessors as SetVectors. The order in each
-  // will be defined by teh order in Updates (fixed) and the order given by
+  // will be defined by the order in Updates (fixed) and the order given by
   // children<> (also fixed). Since we further iterate over these ordered sets,
   // we lose the information of multiple edges possibly existing between two
   // blocks, so we'll keep and EdgeCount map for that.
@@ -756,15 +866,15 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
   for (auto *BB : NewBlocks)
     PredMap.erase(BB);
 
-  SmallVector<BasicBlock *, 8> BlocksToProcess;
   SmallVector<BasicBlock *, 16> BlocksWithDefsToReplace;
+  SmallVector<WeakVH, 8> InsertedPhis;
 
   // First create MemoryPhis in all blocks that don't have one. Create in the
   // order found in Updates, not in PredMap, to get deterministic numbering.
   for (auto &Edge : Updates) {
     BasicBlock *BB = Edge.getTo();
     if (PredMap.count(BB) && !MSSA->getMemoryAccess(BB))
-      MSSA->createMemoryPhi(BB);
+      InsertedPhis.push_back(MSSA->createMemoryPhi(BB));
   }
 
   // Now we'll fill in the MemoryPhis with the right incoming values.
@@ -831,10 +941,6 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
       for (auto *Pred : PrevBlockSet)
         for (int I = 0, E = EdgeCountMap[{Pred, BB}]; I < E; ++I)
           NewPhi->addIncoming(DefP1, Pred);
-
-      // Insert BB in the set of blocks that now have definition. We'll use this
-      // to compute IDF and add Phis there next.
-      BlocksToProcess.push_back(BB);
     }
 
     // Get all blocks that used to dominate BB and no longer do after adding
@@ -849,22 +955,41 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
     GetNoLongerDomBlocks(PrevIDom, NewIDom, BlocksWithDefsToReplace);
   }
 
+  tryRemoveTrivialPhis(InsertedPhis);
+  // Create the set of blocks that now have a definition. We'll use this to
+  // compute IDF and add Phis there next.
+  SmallVector<BasicBlock *, 8> BlocksToProcess;
+  for (auto &VH : InsertedPhis)
+    if (auto *MPhi = cast_or_null<MemoryPhi>(VH))
+      BlocksToProcess.push_back(MPhi->getBlock());
+
   // Compute IDF and add Phis in all IDF blocks that do not have one.
   SmallVector<BasicBlock *, 32> IDFBlocks;
   if (!BlocksToProcess.empty()) {
-    ForwardIDFCalculator IDFs(DT);
+    ForwardIDFCalculator IDFs(DT, GD);
     SmallPtrSet<BasicBlock *, 16> DefiningBlocks(BlocksToProcess.begin(),
                                                  BlocksToProcess.end());
     IDFs.setDefiningBlocks(DefiningBlocks);
     IDFs.calculate(IDFBlocks);
+
+    SmallSetVector<MemoryPhi *, 4> PhisToFill;
+    // First create all needed Phis.
+    for (auto *BBIDF : IDFBlocks)
+      if (!MSSA->getMemoryAccess(BBIDF)) {
+        auto *IDFPhi = MSSA->createMemoryPhi(BBIDF);
+        InsertedPhis.push_back(IDFPhi);
+        PhisToFill.insert(IDFPhi);
+      }
+    // Then update or insert their correct incoming values.
     for (auto *BBIDF : IDFBlocks) {
-      if (auto *IDFPhi = MSSA->getMemoryAccess(BBIDF)) {
+      auto *IDFPhi = MSSA->getMemoryAccess(BBIDF);
+      assert(IDFPhi && "Phi must exist");
+      if (!PhisToFill.count(IDFPhi)) {
         // Update existing Phi.
         // FIXME: some updates may be redundant, try to optimize and skip some.
         for (unsigned I = 0, E = IDFPhi->getNumIncomingValues(); I < E; ++I)
           IDFPhi->setIncomingValue(I, GetLastDef(IDFPhi->getIncomingBlock(I)));
       } else {
-        IDFPhi = MSSA->createMemoryPhi(BBIDF);
         for (auto &Pair : children<GraphDiffInvBBPair>({GD, BBIDF})) {
           BasicBlock *Pi = Pair.second;
           IDFPhi->addIncoming(GetLastDef(Pi), Pi);
@@ -907,6 +1032,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
       }
     }
   }
+  tryRemoveTrivialPhis(InsertedPhis);
 }
 
 // Move What before Where in the MemorySSA IR.
@@ -1052,7 +1178,7 @@ void MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor(
   }
 }
 
-void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) {
+void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA, bool OptimizePhis) {
   assert(!MSSA->isLiveOnEntryDef(MA) &&
          "Trying to remove the live on entry def");
   // We can only delete phi nodes if they have no uses, or we can replace all
@@ -1071,6 +1197,8 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) {
     NewDefTarget = cast<MemoryUseOrDef>(MA)->getDefiningAccess();
   }
 
+  SmallSetVector<MemoryPhi *, 4> PhisToCheck;
+
   // Re-point the uses at our defining access
   if (!isa<MemoryUse>(MA) && !MA->use_empty()) {
     // Reset optimized on users of this store, and reset the uses.
@@ -1090,6 +1218,9 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) {
       Use &U = *MA->use_begin();
       if (auto *MUD = dyn_cast<MemoryUseOrDef>(U.getUser()))
         MUD->resetOptimized();
+      if (OptimizePhis)
+        if (MemoryPhi *MP = dyn_cast<MemoryPhi>(U.getUser()))
+          PhisToCheck.insert(MP);
       U.set(NewDefTarget);
     }
   }
@@ -1098,10 +1229,25 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) {
   // are doing things here
   MSSA->removeFromLookups(MA);
   MSSA->removeFromLists(MA);
+
+  // Optionally optimize Phi uses. This will recursively remove trivial phis.
+  if (!PhisToCheck.empty()) {
+    SmallVector<WeakVH, 16> PhisToOptimize{PhisToCheck.begin(),
+                                           PhisToCheck.end()};
+    PhisToCheck.clear();
+
+    unsigned PhisSize = PhisToOptimize.size();
+    while (PhisSize-- > 0)
+      if (MemoryPhi *MP =
+              cast_or_null<MemoryPhi>(PhisToOptimize.pop_back_val())) {
+        auto OperRange = MP->operands();
+        tryRemoveTrivialPhi(MP, OperRange);
+      }
+  }
 }
 
 void MemorySSAUpdater::removeBlocks(
-    const SmallPtrSetImpl<BasicBlock *> &DeadBlocks) {
+    const SmallSetVector<BasicBlock *, 8> &DeadBlocks) {
   // First delete all uses of BB in MemoryPhis.
   for (BasicBlock *BB : DeadBlocks) {
     Instruction *TI = BB->getTerminator();
@@ -1133,6 +1279,51 @@ void MemorySSAUpdater::removeBlocks(
   }
 }
 
+void MemorySSAUpdater::tryRemoveTrivialPhis(ArrayRef<WeakVH> UpdatedPHIs) {
+  for (auto &VH : UpdatedPHIs)
+    if (auto *MPhi = cast_or_null<MemoryPhi>(VH)) {
+      auto OperRange = MPhi->operands();
+      tryRemoveTrivialPhi(MPhi, OperRange);
+    }
+}
+
+void MemorySSAUpdater::changeToUnreachable(const Instruction *I) {
+  const BasicBlock *BB = I->getParent();
+  // Remove memory accesses in BB for I and all following instructions.
+  auto BBI = I->getIterator(), BBE = BB->end();
+  // FIXME: If this becomes too expensive, iterate until the first instruction
+  // with a memory access, then iterate over MemoryAccesses.
+  while (BBI != BBE)
+    removeMemoryAccess(&*(BBI++));
+  // Update phis in BB's successors to remove BB.
+  SmallVector<WeakVH, 16> UpdatedPHIs;
+  for (const BasicBlock *Successor : successors(BB)) {
+    removeDuplicatePhiEdgesBetween(BB, Successor);
+    if (MemoryPhi *MPhi = MSSA->getMemoryAccess(Successor)) {
+      MPhi->unorderedDeleteIncomingBlock(BB);
+      UpdatedPHIs.push_back(MPhi);
+    }
+  }
+  // Optimize trivial phis.
+  tryRemoveTrivialPhis(UpdatedPHIs);
+}
+
+void MemorySSAUpdater::changeCondBranchToUnconditionalTo(const BranchInst *BI,
+                                                         const BasicBlock *To) {
+  const BasicBlock *BB = BI->getParent();
+  SmallVector<WeakVH, 16> UpdatedPHIs;
+  for (const BasicBlock *Succ : successors(BB)) {
+    removeDuplicatePhiEdgesBetween(BB, Succ);
+    if (Succ != To)
+      if (auto *MPhi = MSSA->getMemoryAccess(Succ)) {
+        MPhi->unorderedDeleteIncomingBlock(BB);
+        UpdatedPHIs.push_back(MPhi);
+      }
+  }
+  // Optimize trivial phis.
+  tryRemoveTrivialPhis(UpdatedPHIs);
+}
+
 MemoryAccess *MemorySSAUpdater::createMemoryAccessInBB(
     Instruction *I, MemoryAccess *Definition, const BasicBlock *BB,
     MemorySSA::InsertionPlace Point) {
diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp
index 1e321f17d59f..519242759824 100644
--- a/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -1,9 +1,8 @@
 //===-- ModuleDebugInfoPrinter.cpp - Prints module debug info metadata ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index 87f76d43bb1e..e25eb290a665 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -1,9 +1,8 @@
 //===- ModuleSummaryAnalysis.cpp - Module summary index builder -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -71,6 +70,11 @@ cl::opt<FunctionSummary::ForceSummaryHotnessType, true> FSEC(
                           "all-non-critical", "All non-critical edges."),
                clEnumValN(FunctionSummary::FSHT_All, "all", "All edges.")));
 
+cl::opt<std::string> ModuleSummaryDotFile(
+    "module-summary-dot-file", cl::init(""), cl::Hidden,
+    cl::value_desc("filename"),
+    cl::desc("File to emit dot graph of new summary into."));
+
 // Walk through the operands of a given User via worklist iteration and populate
 // the set of GlobalValue references encountered. Invoked either on an
 // Instruction or a GlobalVariable (which walks its initializer).
@@ -227,6 +231,13 @@ static bool isNonVolatileLoad(const Instruction *I) {
   return false;
 }
 
+static bool isNonVolatileStore(const Instruction *I) {
+  if (const auto *SI = dyn_cast<StoreInst>(I))
+    return !SI->isVolatile();
+
+  return false;
+}
+
 static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
                                    const Function &F, BlockFrequencyInfo *BFI,
                                    ProfileSummaryInfo *PSI, DominatorTree &DT,
@@ -241,7 +252,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
   // Map from callee ValueId to profile count. Used to accumulate profile
   // counts for all static calls to a given callee.
   MapVector<ValueInfo, CalleeInfo> CallGraphEdges;
-  SetVector<ValueInfo> RefEdges;
+  SetVector<ValueInfo> RefEdges, LoadRefEdges, StoreRefEdges;
   SetVector<GlobalValue::GUID> TypeTests;
   SetVector<FunctionSummary::VFuncId> TypeTestAssumeVCalls,
       TypeCheckedLoadVCalls;
@@ -254,6 +265,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
   // list.
   findRefEdges(Index, &F, RefEdges, Visited);
   std::vector<const Instruction *> NonVolatileLoads;
+  std::vector<const Instruction *> NonVolatileStores;
 
   bool HasInlineAsmMaybeReferencingInternal = false;
   for (const BasicBlock &BB : F)
@@ -261,12 +273,34 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
       if (isa<DbgInfoIntrinsic>(I))
         continue;
       ++NumInsts;
-      if (isNonVolatileLoad(&I)) {
-        // Postpone processing of non-volatile load instructions
-        // See comments below
-        Visited.insert(&I);
-        NonVolatileLoads.push_back(&I);
-        continue;
+      // Regular LTO module doesn't participate in ThinLTO import,
+      // so no reference from it can be read/writeonly, since this
+      // would require importing variable as local copy
+      if (IsThinLTO) {
+        if (isNonVolatileLoad(&I)) {
+          // Postpone processing of non-volatile load instructions
+          // See comments below
+          Visited.insert(&I);
+          NonVolatileLoads.push_back(&I);
+          continue;
+        } else if (isNonVolatileStore(&I)) {
+          Visited.insert(&I);
+          NonVolatileStores.push_back(&I);
+          // All references from second operand of store (destination address)
+          // can be considered write-only if they're not referenced by any
+          // non-store instruction. References from first operand of store
+          // (stored value) can't be treated either as read- or as write-only
+          // so we add them to RefEdges as we do with all other instructions
+          // except non-volatile load.
+          Value *Stored = I.getOperand(0);
+          if (auto *GV = dyn_cast<GlobalValue>(Stored))
+            // findRefEdges will try to examine GV operands, so instead
+            // of calling it we should add GV to RefEdges directly.
+            RefEdges.insert(Index.getOrInsertValueInfo(GV));
+          else if (auto *U = dyn_cast<User>(Stored))
+            findRefEdges(Index, U, RefEdges, Visited);
+          continue;
+        }
       }
       findRefEdges(Index, &I, RefEdges, Visited);
       auto CS = ImmutableCallSite(&I);
@@ -357,24 +391,61 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
       }
     }
 
-  // By now we processed all instructions in a function, except
-  // non-volatile loads. All new refs we add in a loop below
-  // are obviously constant. All constant refs are grouped in the
-  // end of RefEdges vector, so we can use a single integer value
-  // to identify them.
-  unsigned RefCnt = RefEdges.size();
-  for (const Instruction *I : NonVolatileLoads) {
-    Visited.erase(I);
-    findRefEdges(Index, I, RefEdges, Visited);
-  }
-  std::vector<ValueInfo> Refs = RefEdges.takeVector();
-  // Regular LTO module doesn't participate in ThinLTO import,
-  // so no reference from it can be readonly, since this would
-  // require importing variable as local copy
-  if (IsThinLTO)
-    for (; RefCnt < Refs.size(); ++RefCnt)
+  std::vector<ValueInfo> Refs;
+  if (IsThinLTO) {
+    auto AddRefEdges = [&](const std::vector<const Instruction *> &Instrs,
+                           SetVector<ValueInfo> &Edges,
+                           SmallPtrSet<const User *, 8> &Cache) {
+      for (const auto *I : Instrs) {
+        Cache.erase(I);
+        findRefEdges(Index, I, Edges, Cache);
+      }
+    };
+
+    // By now we processed all instructions in a function, except
+    // non-volatile loads and non-volatile value stores. Let's find
+    // ref edges for both of instruction sets
+    AddRefEdges(NonVolatileLoads, LoadRefEdges, Visited);
+    // We can add some values to the Visited set when processing load
+    // instructions which are also used by stores in NonVolatileStores.
+    // For example this can happen if we have following code:
+    //
+    // store %Derived* @foo, %Derived** bitcast (%Base** @bar to %Derived**)
+    // %42 = load %Derived*, %Derived** bitcast (%Base** @bar to %Derived**)
+    //
+    // After processing loads we'll add bitcast to the Visited set, and if
+    // we use the same set while processing stores, we'll never see store
+    // to @bar and @bar will be mistakenly treated as readonly.
+    SmallPtrSet<const llvm::User *, 8> StoreCache;
+    AddRefEdges(NonVolatileStores, StoreRefEdges, StoreCache);
+
+    // If both load and store instruction reference the same variable
+    // we won't be able to optimize it. Add all such reference edges
+    // to RefEdges set.
+    for (auto &VI : StoreRefEdges)
+      if (LoadRefEdges.remove(VI))
+        RefEdges.insert(VI);
+
+    unsigned RefCnt = RefEdges.size();
+    // All new reference edges inserted in two loops below are either
+    // read or write only. They will be grouped in the end of RefEdges
+    // vector, so we can use a single integer value to identify them.
+    for (auto &VI : LoadRefEdges)
+      RefEdges.insert(VI);
+
+    unsigned FirstWORef = RefEdges.size();
+    for (auto &VI : StoreRefEdges)
+      RefEdges.insert(VI);
+
+    Refs = RefEdges.takeVector();
+    for (; RefCnt < FirstWORef; ++RefCnt)
       Refs[RefCnt].setReadOnly();
 
+    for (; RefCnt < Refs.size(); ++RefCnt)
+      Refs[RefCnt].setWriteOnly();
+  } else {
+    Refs = RefEdges.takeVector();
+  }
   // Explicit add hot edges to enforce importing for designated GUIDs for
   // sample PGO, to enable the same inlines as the profiled optimized binary.
   for (auto &I : F.getImportGUIDs())
@@ -387,7 +458,8 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
   bool NotEligibleForImport =
       NonRenamableLocal || HasInlineAsmMaybeReferencingInternal;
   GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport,
-                                    /* Live = */ false, F.isDSOLocal());
+                                    /* Live = */ false, F.isDSOLocal(),
+                                    F.hasLinkOnceODRLinkage() && F.hasGlobalUnnamedAddr());
   FunctionSummary::FFlags FunFlags{
       F.hasFnAttribute(Attribute::ReadNone),
       F.hasFnAttribute(Attribute::ReadOnly),
@@ -406,26 +478,134 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
   Index.addGlobalValueSummary(F, std::move(FuncSummary));
 }
 
+/// Find function pointers referenced within the given vtable initializer
+/// (or subset of an initializer) \p I. The starting offset of \p I within
+/// the vtable initializer is \p StartingOffset. Any discovered function
+/// pointers are added to \p VTableFuncs along with their cumulative offset
+/// within the initializer.
+static void findFuncPointers(const Constant *I, uint64_t StartingOffset,
+                             const Module &M, ModuleSummaryIndex &Index,
+                             VTableFuncList &VTableFuncs) {
+  // First check if this is a function pointer.
+  if (I->getType()->isPointerTy()) {
+    auto Fn = dyn_cast<Function>(I->stripPointerCasts());
+    // We can disregard __cxa_pure_virtual as a possible call target, as
+    // calls to pure virtuals are UB.
+    if (Fn && Fn->getName() != "__cxa_pure_virtual")
+      VTableFuncs.push_back({Index.getOrInsertValueInfo(Fn), StartingOffset});
+    return;
+  }
+
+  // Walk through the elements in the constant struct or array and recursively
+  // look for virtual function pointers.
+  const DataLayout &DL = M.getDataLayout();
+  if (auto *C = dyn_cast<ConstantStruct>(I)) {
+    StructType *STy = dyn_cast<StructType>(C->getType());
+    assert(STy);
+    const StructLayout *SL = DL.getStructLayout(C->getType());
+
+    for (StructType::element_iterator EB = STy->element_begin(), EI = EB,
+                                      EE = STy->element_end();
+         EI != EE; ++EI) {
+      auto Offset = SL->getElementOffset(EI - EB);
+      unsigned Op = SL->getElementContainingOffset(Offset);
+      findFuncPointers(cast<Constant>(I->getOperand(Op)),
+                       StartingOffset + Offset, M, Index, VTableFuncs);
+    }
+  } else if (auto *C = dyn_cast<ConstantArray>(I)) {
+    ArrayType *ATy = C->getType();
+    Type *EltTy = ATy->getElementType();
+    uint64_t EltSize = DL.getTypeAllocSize(EltTy);
+    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
+      findFuncPointers(cast<Constant>(I->getOperand(i)),
+                       StartingOffset + i * EltSize, M, Index, VTableFuncs);
+    }
+  }
+}
+
+// Identify the function pointers referenced by vtable definition \p V.
+static void computeVTableFuncs(ModuleSummaryIndex &Index,
+                               const GlobalVariable &V, const Module &M,
+                               VTableFuncList &VTableFuncs) {
+  if (!V.isConstant())
+    return;
+
+  findFuncPointers(V.getInitializer(), /*StartingOffset=*/0, M, Index,
+                   VTableFuncs);
+
+#ifndef NDEBUG
+  // Validate that the VTableFuncs list is ordered by offset.
+  uint64_t PrevOffset = 0;
+  for (auto &P : VTableFuncs) {
+    // The findVFuncPointers traversal should have encountered the
+    // functions in offset order. We need to use ">=" since PrevOffset
+    // starts at 0.
+    assert(P.VTableOffset >= PrevOffset);
+    PrevOffset = P.VTableOffset;
+  }
+#endif
+}
+
+/// Record vtable definition \p V for each type metadata it references.
 static void
-computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V,
-                       DenseSet<GlobalValue::GUID> &CantBePromoted) {
+recordTypeIdCompatibleVtableReferences(ModuleSummaryIndex &Index,
+                                       const GlobalVariable &V,
+                                       SmallVectorImpl<MDNode *> &Types) {
+  for (MDNode *Type : Types) {
+    auto TypeID = Type->getOperand(1).get();
+
+    uint64_t Offset =
+        cast<ConstantInt>(
+            cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
+            ->getZExtValue();
+
+    if (auto *TypeId = dyn_cast<MDString>(TypeID))
+      Index.getOrInsertTypeIdCompatibleVtableSummary(TypeId->getString())
+          .push_back({Offset, Index.getOrInsertValueInfo(&V)});
+  }
+}
+
+static void computeVariableSummary(ModuleSummaryIndex &Index,
+                                   const GlobalVariable &V,
+                                   DenseSet<GlobalValue::GUID> &CantBePromoted,
+                                   const Module &M,
+                                   SmallVectorImpl<MDNode *> &Types) {
   SetVector<ValueInfo> RefEdges;
   SmallPtrSet<const User *, 8> Visited;
   bool HasBlockAddress = findRefEdges(Index, &V, RefEdges, Visited);
   bool NonRenamableLocal = isNonRenamableLocal(V);
   GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal,
-                                    /* Live = */ false, V.isDSOLocal());
+                                    /* Live = */ false, V.isDSOLocal(),
+                                    V.hasLinkOnceODRLinkage() && V.hasGlobalUnnamedAddr());
+
+  VTableFuncList VTableFuncs;
+  // If splitting is not enabled, then we compute the summary information
+  // necessary for index-based whole program devirtualization.
+  if (!Index.enableSplitLTOUnit()) {
+    Types.clear();
+    V.getMetadata(LLVMContext::MD_type, Types);
+    if (!Types.empty()) {
+      // Identify the function pointers referenced by this vtable definition.
+      computeVTableFuncs(Index, V, M, VTableFuncs);
+
+      // Record this vtable definition for each type metadata it references.
+      recordTypeIdCompatibleVtableReferences(Index, V, Types);
+    }
+  }
 
-  // Don't mark variables we won't be able to internalize as read-only.
-  GlobalVarSummary::GVarFlags VarFlags(
+  // Don't mark variables we won't be able to internalize as read/write-only.
+  bool CanBeInternalized =
       !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() &&
-      !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass());
+      !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass();
+  GlobalVarSummary::GVarFlags VarFlags(CanBeInternalized, CanBeInternalized);
   auto GVarSummary = llvm::make_unique<GlobalVarSummary>(Flags, VarFlags,
                                                          RefEdges.takeVector());
   if (NonRenamableLocal)
     CantBePromoted.insert(V.getGUID());
   if (HasBlockAddress)
     GVarSummary->setNotEligibleToImport();
+  if (!VTableFuncs.empty())
+    GVarSummary->setVTableFuncs(VTableFuncs);
   Index.addGlobalValueSummary(V, std::move(GVarSummary));
 }
 
@@ -434,12 +614,15 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
                     DenseSet<GlobalValue::GUID> &CantBePromoted) {
   bool NonRenamableLocal = isNonRenamableLocal(A);
   GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal,
-                                    /* Live = */ false, A.isDSOLocal());
+                                    /* Live = */ false, A.isDSOLocal(),
+                                    A.hasLinkOnceODRLinkage() && A.hasGlobalUnnamedAddr());
   auto AS = llvm::make_unique<AliasSummary>(Flags);
   auto *Aliasee = A.getBaseObject();
-  auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee);
-  assert(AliaseeSummary && "Alias expects aliasee summary to be parsed");
-  AS->setAliasee(AliaseeSummary);
+  auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID());
+  assert(AliaseeVI && "Alias expects aliasee summary to be available");
+  assert(AliaseeVI.getSummaryList().size() == 1 &&
+         "Expected a single entry per aliasee in per-module index");
+  AS->setAliasee(AliaseeVI, AliaseeVI.getSummaryList()[0].get());
   if (NonRenamableLocal)
     CantBePromoted.insert(A.getGUID());
   Index.addGlobalValueSummary(A, std::move(AS));
@@ -507,7 +690,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
           GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage,
                                               /* NotEligibleToImport = */ true,
                                               /* Live = */ true,
-                                              /* Local */ GV->isDSOLocal());
+                                              /* Local */ GV->isDSOLocal(),
+                                              GV->hasLinkOnceODRLinkage() && GV->hasGlobalUnnamedAddr());
           CantBePromoted.insert(GV->getGUID());
           // Create the appropriate summary type.
           if (Function *F = dyn_cast<Function>(GV)) {
@@ -531,7 +715,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
           } else {
             std::unique_ptr<GlobalVarSummary> Summary =
                 llvm::make_unique<GlobalVarSummary>(
-                    GVFlags, GlobalVarSummary::GVarFlags(),
+                    GVFlags, GlobalVarSummary::GVarFlags(false, false),
                     ArrayRef<ValueInfo>{});
             Index.addGlobalValueSummary(*GV, std::move(Summary));
           }
@@ -568,10 +752,11 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
 
   // Compute summaries for all variables defined in module, and save in the
   // index.
+  SmallVector<MDNode *, 2> Types;
   for (const GlobalVariable &G : M.globals()) {
     if (G.isDeclaration())
       continue;
-    computeVariableSummary(Index, G, CantBePromoted);
+    computeVariableSummary(Index, G, CantBePromoted, M, Types);
   }
 
   // Compute summaries for all aliases defined in module, and save in the
@@ -626,6 +811,15 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
     }
   }
 
+  if (!ModuleSummaryDotFile.empty()) {
+    std::error_code EC;
+    raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::F_None);
+    if (EC)
+      report_fatal_error(Twine("Failed to open dot file ") +
+                         ModuleSummaryDotFile + ": " + EC.message() + "\n");
+    Index.exportToDot(OSDot);
+  }
+
   return Index;
 }
 
diff --git a/lib/Analysis/MustExecute.cpp b/lib/Analysis/MustExecute.cpp
index 180c38ddacc2..b616cd6f762b 100644
--- a/lib/Analysis/MustExecute.cpp
+++ b/lib/Analysis/MustExecute.cpp
@@ -1,9 +1,8 @@
 //===- MustExecute.cpp - Printer for isGuaranteedToExecute ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -194,7 +193,8 @@ bool LoopSafetyInfo::allLoopPathsLeadToBlock(const Loop *CurLoop,
   SmallPtrSet<const BasicBlock *, 4> Predecessors;
   collectTransitivePredecessors(CurLoop, BB, Predecessors);
 
-  // Make sure that all successors of all predecessors of BB are either:
+  // Make sure that all successors of, all predecessors of BB which are not
+  // dominated by BB, are either:
   // 1) BB,
   // 2) Also predecessors of BB,
   // 3) Exit blocks which are not taken on 1st iteration.
@@ -204,6 +204,12 @@ bool LoopSafetyInfo::allLoopPathsLeadToBlock(const Loop *CurLoop,
     // Predecessor block may throw, so it has a side exit.
     if (blockMayThrow(Pred))
       return false;
+
+    // BB dominates Pred, so if Pred runs, BB must run.
+    // This is true when Pred is a loop latch.
+    if (DT->dominates(BB, Pred))
+      continue;
+
     for (auto *Succ : successors(Pred))
       if (CheckedSuccessors.insert(Succ).second &&
           Succ != BB && !Predecessors.count(Succ))
diff --git a/lib/Analysis/ObjCARCAliasAnalysis.cpp b/lib/Analysis/ObjCARCAliasAnalysis.cpp
index 95ae1a6e744f..811033e73147 100644
--- a/lib/Analysis/ObjCARCAliasAnalysis.cpp
+++ b/lib/Analysis/ObjCARCAliasAnalysis.cpp
@@ -1,9 +1,8 @@
 //===- ObjCARCAliasAnalysis.cpp - ObjC ARC Optimization -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -38,9 +37,10 @@ using namespace llvm;
 using namespace llvm::objcarc;
 
 AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA,
-                                   const MemoryLocation &LocB) {
+                                   const MemoryLocation &LocB,
+                                   AAQueryInfo &AAQI) {
   if (!EnableARCOpts)
-    return AAResultBase::alias(LocA, LocB);
+    return AAResultBase::alias(LocA, LocB, AAQI);
 
   // First, strip off no-ops, including ObjC-specific no-ops, and try making a
   // precise alias query.
@@ -48,7 +48,7 @@ AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA,
   const Value *SB = GetRCIdentityRoot(LocB.Ptr);
   AliasResult Result =
       AAResultBase::alias(MemoryLocation(SA, LocA.Size, LocA.AATags),
-                          MemoryLocation(SB, LocB.Size, LocB.AATags));
+                          MemoryLocation(SB, LocB.Size, LocB.AATags), AAQI);
   if (Result != MayAlias)
     return Result;
 
@@ -57,7 +57,7 @@ AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA,
   const Value *UA = GetUnderlyingObjCPtr(SA, DL);
   const Value *UB = GetUnderlyingObjCPtr(SB, DL);
   if (UA != SA || UB != SB) {
-    Result = AAResultBase::alias(MemoryLocation(UA), MemoryLocation(UB));
+    Result = AAResultBase::alias(MemoryLocation(UA), MemoryLocation(UB), AAQI);
     // We can't use MustAlias or PartialAlias results here because
     // GetUnderlyingObjCPtr may return an offsetted pointer value.
     if (Result == NoAlias)
@@ -70,22 +70,23 @@ AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA,
 }
 
 bool ObjCARCAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
-                                             bool OrLocal) {
+                                             AAQueryInfo &AAQI, bool OrLocal) {
   if (!EnableARCOpts)
-    return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+    return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
 
   // First, strip off no-ops, including ObjC-specific no-ops, and try making
   // a precise alias query.
   const Value *S = GetRCIdentityRoot(Loc.Ptr);
   if (AAResultBase::pointsToConstantMemory(
-          MemoryLocation(S, Loc.Size, Loc.AATags), OrLocal))
+          MemoryLocation(S, Loc.Size, Loc.AATags), AAQI, OrLocal))
     return true;
 
   // If that failed, climb to the underlying object, including climbing through
   // ObjC-specific no-ops, and try making an imprecise alias query.
   const Value *U = GetUnderlyingObjCPtr(S, DL);
   if (U != S)
-    return AAResultBase::pointsToConstantMemory(MemoryLocation(U), OrLocal);
+    return AAResultBase::pointsToConstantMemory(MemoryLocation(U), AAQI,
+                                                OrLocal);
 
   // If that failed, fail. We don't need to chain here, since that's covered
   // by the earlier precise query.
@@ -107,9 +108,10 @@ FunctionModRefBehavior ObjCARCAAResult::getModRefBehavior(const Function *F) {
 }
 
 ModRefInfo ObjCARCAAResult::getModRefInfo(const CallBase *Call,
-                                          const MemoryLocation &Loc) {
+                                          const MemoryLocation &Loc,
+                                          AAQueryInfo &AAQI) {
   if (!EnableARCOpts)
-    return AAResultBase::getModRefInfo(Call, Loc);
+    return AAResultBase::getModRefInfo(Call, Loc, AAQI);
 
   switch (GetBasicARCInstKind(Call)) {
   case ARCInstKind::Retain:
@@ -128,7 +130,7 @@ ModRefInfo ObjCARCAAResult::getModRefInfo(const CallBase *Call,
     break;
   }
 
-  return AAResultBase::getModRefInfo(Call, Loc);
+  return AAResultBase::getModRefInfo(Call, Loc, AAQI);
 }
 
 ObjCARCAAResult ObjCARCAA::run(Function &F, FunctionAnalysisManager &AM) {
diff --git a/lib/Analysis/ObjCARCAnalysisUtils.cpp b/lib/Analysis/ObjCARCAnalysisUtils.cpp
index d6db6386c38b..56d1cb421225 100644
--- a/lib/Analysis/ObjCARCAnalysisUtils.cpp
+++ b/lib/Analysis/ObjCARCAnalysisUtils.cpp
@@ -1,9 +1,8 @@
 //===- ObjCARCAnalysisUtils.cpp -------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/ObjCARCInstKind.cpp b/lib/Analysis/ObjCARCInstKind.cpp
index 31c432711834..0e96c6e975c9 100644
--- a/lib/Analysis/ObjCARCInstKind.cpp
+++ b/lib/Analysis/ObjCARCInstKind.cpp
@@ -1,9 +1,8 @@
 //===- ARCInstKind.cpp - ObjC ARC Optimization ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -482,6 +481,41 @@ bool llvm::objcarc::IsNoopOnNull(ARCInstKind Class) {
   llvm_unreachable("covered switch isn't covered?");
 }
 
+/// Test if the given class represents instructions which do nothing if
+/// passed a global variable.
+bool llvm::objcarc::IsNoopOnGlobal(ARCInstKind Class) {
+  switch (Class) {
+  case ARCInstKind::Retain:
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::ClaimRV:
+  case ARCInstKind::Release:
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+  case ARCInstKind::RetainBlock:
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+    return true;
+  case ARCInstKind::AutoreleasepoolPush:
+  case ARCInstKind::AutoreleasepoolPop:
+  case ARCInstKind::LoadWeakRetained:
+  case ARCInstKind::StoreWeak:
+  case ARCInstKind::InitWeak:
+  case ARCInstKind::LoadWeak:
+  case ARCInstKind::MoveWeak:
+  case ARCInstKind::CopyWeak:
+  case ARCInstKind::DestroyWeak:
+  case ARCInstKind::StoreStrong:
+  case ARCInstKind::IntrinsicUser:
+  case ARCInstKind::CallOrUser:
+  case ARCInstKind::Call:
+  case ARCInstKind::User:
+  case ARCInstKind::None:
+  case ARCInstKind::NoopCast:
+    return false;
+  }
+  llvm_unreachable("covered switch isn't covered?");
+}
+
 /// Test if the given class represents instructions which are always safe
 /// to mark with the "tail" keyword.
 bool llvm::objcarc::IsAlwaysTail(ARCInstKind Class) {
diff --git a/lib/Analysis/OptimizationRemarkEmitter.cpp b/lib/Analysis/OptimizationRemarkEmitter.cpp
index 8ece0a2a3ed3..72c40a0be232 100644
--- a/lib/Analysis/OptimizationRemarkEmitter.cpp
+++ b/lib/Analysis/OptimizationRemarkEmitter.cpp
@@ -1,9 +1,8 @@
 //===- OptimizationRemarkEmitter.cpp - Optimization Diagnostic --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/OrderedBasicBlock.cpp b/lib/Analysis/OrderedBasicBlock.cpp
index 5f4fe0f7dda2..48f2a4020c66 100644
--- a/lib/Analysis/OrderedBasicBlock.cpp
+++ b/lib/Analysis/OrderedBasicBlock.cpp
@@ -1,9 +1,8 @@
 //===- OrderedBasicBlock.cpp --------------------------------- -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -86,3 +85,27 @@ bool OrderedBasicBlock::dominates(const Instruction *A, const Instruction *B) {
 
   return comesBefore(A, B);
 }
+
+void OrderedBasicBlock::eraseInstruction(const Instruction *I) {
+  if (LastInstFound != BB->end() && I == &*LastInstFound) {
+    if (LastInstFound == BB->begin()) {
+      LastInstFound = BB->end();
+      NextInstPos = 0;
+    } else
+      LastInstFound--;
+  }
+
+  NumberedInsts.erase(I);
+}
+
+void OrderedBasicBlock::replaceInstruction(const Instruction *Old,
+                                           const Instruction *New) {
+  auto OI = NumberedInsts.find(Old);
+  if (OI == NumberedInsts.end())
+    return;
+
+  NumberedInsts.insert({New, OI->second});
+  if (LastInstFound != BB->end() && Old == &*LastInstFound)
+    LastInstFound = New->getIterator();
+  NumberedInsts.erase(Old);
+}
diff --git a/lib/Analysis/OrderedInstructions.cpp b/lib/Analysis/OrderedInstructions.cpp
index 7b155208c02e..458c0a7de6c2 100644
--- a/lib/Analysis/OrderedInstructions.cpp
+++ b/lib/Analysis/OrderedInstructions.cpp
@@ -1,9 +1,8 @@
 //===-- OrderedInstructions.cpp - Instruction dominance function ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index 858f08f6537a..7f77ab146c4c 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -1,9 +1,8 @@
 //===- PHITransAddr.cpp - PHI Translation for Addresses -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/PhiValues.cpp b/lib/Analysis/PhiValues.cpp
index 729227c86697..49749bc44746 100644
--- a/lib/Analysis/PhiValues.cpp
+++ b/lib/Analysis/PhiValues.cpp
@@ -1,9 +1,8 @@
 //===- PhiValues.cpp - Phi Value Analysis ---------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index e6b660fe26d7..4afe22bd5342 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -1,9 +1,8 @@
 //===- PostDominators.cpp - Post-Dominator Calculation --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp
index 1d70c75f2e1c..dce19d6d546e 100644
--- a/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/lib/Analysis/ProfileSummaryInfo.cpp
@@ -1,9 +1,8 @@
 //===- ProfileSummaryInfo.cpp - Global profile summary information --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -61,10 +60,9 @@ static cl::opt<int> ProfileSummaryColdCount(
 // Find the summary entry for a desired percentile of counts.
 static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS,
                                                         uint64_t Percentile) {
-  auto Compare = [](const ProfileSummaryEntry &Entry, uint64_t Percentile) {
+  auto It = partition_point(DS, [=](const ProfileSummaryEntry &Entry) {
     return Entry.Cutoff < Percentile;
-  };
-  auto It = std::lower_bound(DS.begin(), DS.end(), Percentile, Compare);
+  });
   // The required percentile has to be <= one of the percentiles in the
   // detailed summary.
   if (It == DS.end())
@@ -80,7 +78,14 @@ static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS,
 bool ProfileSummaryInfo::computeSummary() {
   if (Summary)
     return true;
-  auto *SummaryMD = M.getProfileSummary();
+  // First try to get context sensitive ProfileSummary.
+  auto *SummaryMD = M.getProfileSummary(/* IsCS */ true);
+  if (SummaryMD) {
+    Summary.reset(ProfileSummary::getFromMD(SummaryMD));
+    return true;
+  }
+  // This will actually return PSK_Instr or PSK_Sample summary.
+  SummaryMD = M.getProfileSummary(/* IsCS */ false);
   if (!SummaryMD)
     return false;
   Summary.reset(ProfileSummary::getFromMD(SummaryMD));
@@ -89,7 +94,8 @@ bool ProfileSummaryInfo::computeSummary() {
 
 Optional<uint64_t>
 ProfileSummaryInfo::getProfileCount(const Instruction *Inst,
-                                    BlockFrequencyInfo *BFI) {
+                                    BlockFrequencyInfo *BFI,
+                                    bool AllowSynthetic) {
   if (!Inst)
     return None;
   assert((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
@@ -105,7 +111,7 @@ ProfileSummaryInfo::getProfileCount(const Instruction *Inst,
     return None;
   }
   if (BFI)
-    return BFI->getBlockProfileCount(Inst->getParent());
+    return BFI->getBlockProfileCount(Inst->getParent(), AllowSynthetic);
   return None;
 }
 
diff --git a/lib/Analysis/PtrUseVisitor.cpp b/lib/Analysis/PtrUseVisitor.cpp
index 1fdaf4d55b59..9a834ba4866a 100644
--- a/lib/Analysis/PtrUseVisitor.cpp
+++ b/lib/Analysis/PtrUseVisitor.cpp
@@ -1,9 +1,8 @@
 //===- PtrUseVisitor.cpp - InstVisitors over a pointers uses --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -35,5 +34,11 @@ bool detail::PtrUseVisitorBase::adjustOffsetForGEP(GetElementPtrInst &GEPI) {
   if (!IsOffsetKnown)
     return false;
 
-  return GEPI.accumulateConstantOffset(DL, Offset);
+  APInt TmpOffset(DL.getIndexTypeSizeInBits(GEPI.getType()), 0);
+  if (GEPI.accumulateConstantOffset(DL, TmpOffset)) {
+    Offset += TmpOffset.sextOrTrunc(Offset.getBitWidth());
+    return true;
+  }
+
+  return false;
 }
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 2bd611350f46..8ba38adfb0d2 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -1,9 +1,8 @@
 //===- RegionInfo.cpp - SESE region detection analysis --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Detects single entry single exit regions in the control flow graph.
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index a101ff109199..6c0d17b45c62 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -1,9 +1,8 @@
 //===- RegionPass.cpp - Region Pass and Region Pass Manager ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -279,12 +278,17 @@ Pass *RegionPass::createPrinterPass(raw_ostream &O,
   return new PrintRegionPass(Banner, O);
 }
 
+static std::string getDescription(const Region &R) {
+  return "region";
+}
+
 bool RegionPass::skipRegion(Region &R) const {
   Function &F = *R.getEntry()->getParent();
-  if (!F.getContext().getOptPassGate().shouldRunPass(this, R))
+  OptPassGate &Gate = F.getContext().getOptPassGate();
+  if (Gate.isEnabled() && !Gate.shouldRunPass(this, getDescription(R)))
     return true;
 
-  if (F.hasFnAttribute(Attribute::OptimizeNone)) {
+  if (F.hasOptNone()) {
     // Report this only once per function.
     if (R.getEntry() == &F.getEntryBlock())
       LLVM_DEBUG(dbgs() << "Skipping pass '" << getPassName()
diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp
index 5986b8c4e0c3..5bdcb31fbe99 100644
--- a/lib/Analysis/RegionPrinter.cpp
+++ b/lib/Analysis/RegionPrinter.cpp
@@ -1,9 +1,8 @@
 //===- RegionPrinter.cpp - Print regions tree pass ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Print out the region tree of a function using dotty/graphviz.
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index e5134f2eeda9..bc2cfd6fcc42 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -1,9 +1,8 @@
 //===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -203,15 +202,20 @@ static cl::opt<unsigned> MaxConstantEvolvingDepth(
     cl::desc("Maximum depth of recursive constant evolving"), cl::init(32));
 
 static cl::opt<unsigned>
-    MaxExtDepth("scalar-evolution-max-ext-depth", cl::Hidden,
-                cl::desc("Maximum depth of recursive SExt/ZExt"),
-                cl::init(8));
+    MaxCastDepth("scalar-evolution-max-cast-depth", cl::Hidden,
+                 cl::desc("Maximum depth of recursive SExt/ZExt/Trunc"),
+                 cl::init(8));
 
 static cl::opt<unsigned>
     MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden,
                   cl::desc("Max coefficients in AddRec during evolving"),
                   cl::init(8));
 
+static cl::opt<unsigned>
+    HugeExprThreshold("scalar-evolution-huge-expr-threshold", cl::Hidden,
+                  cl::desc("Size of the expression which is considered huge"),
+                  cl::init(4096));
+
 //===----------------------------------------------------------------------===//
 //                           SCEV class definitions
 //===----------------------------------------------------------------------===//
@@ -273,7 +277,9 @@ void SCEV::print(raw_ostream &OS) const {
   case scAddExpr:
   case scMulExpr:
   case scUMaxExpr:
-  case scSMaxExpr: {
+  case scSMaxExpr:
+  case scUMinExpr:
+  case scSMinExpr: {
     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
     const char *OpStr = nullptr;
     switch (NAry->getSCEVType()) {
@@ -281,6 +287,12 @@ void SCEV::print(raw_ostream &OS) const {
     case scMulExpr: OpStr = " * "; break;
     case scUMaxExpr: OpStr = " umax "; break;
     case scSMaxExpr: OpStr = " smax "; break;
+    case scUMinExpr:
+      OpStr = " umin ";
+      break;
+    case scSMinExpr:
+      OpStr = " smin ";
+      break;
     }
     OS << "(";
     for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
@@ -349,6 +361,8 @@ Type *SCEV::getType() const {
   case scMulExpr:
   case scUMaxExpr:
   case scSMaxExpr:
+  case scUMinExpr:
+  case scSMinExpr:
     return cast<SCEVNAryExpr>(this)->getType();
   case scAddExpr:
     return cast<SCEVAddExpr>(this)->getType();
@@ -393,7 +407,7 @@ bool SCEV::isNonConstantNegative() const {
 }
 
 SCEVCouldNotCompute::SCEVCouldNotCompute() :
-  SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
+  SCEV(FoldingSetNodeIDRef(), scCouldNotCompute, 0) {}
 
 bool SCEVCouldNotCompute::classof(const SCEV *S) {
   return S->getSCEVType() == scCouldNotCompute;
@@ -422,7 +436,7 @@ ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
 
 SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
                            unsigned SCEVTy, const SCEV *op, Type *ty)
-  : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
+  : SCEV(ID, SCEVTy, computeExpressionSize(op)), Op(op), Ty(ty) {}
 
 SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
                                    const SCEV *op, Type *ty)
@@ -713,7 +727,9 @@ static int CompareSCEVComplexity(
   case scAddExpr:
   case scMulExpr:
   case scSMaxExpr:
-  case scUMaxExpr: {
+  case scUMaxExpr:
+  case scSMinExpr:
+  case scUMinExpr: {
     const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
     const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
 
@@ -795,11 +811,10 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
   }
 
   // Do the rough sort by complexity.
-  std::stable_sort(Ops.begin(), Ops.end(),
-                   [&](const SCEV *LHS, const SCEV *RHS) {
-                     return CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
-                                                  LHS, RHS, DT) < 0;
-                   });
+  llvm::stable_sort(Ops, [&](const SCEV *LHS, const SCEV *RHS) {
+    return CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LHS, RHS, DT) <
+           0;
+  });
 
   // Now that we are sorted by complexity, group elements of the same
   // complexity.  Note that this is, at worst, N^2, but the vector is likely to
@@ -846,6 +861,17 @@ static inline int sizeOfSCEV(const SCEV *S) {
   return F.Size;
 }
 
+/// Returns true if the subtree of \p S contains at least HugeExprThreshold
+/// nodes.
+static bool isHugeExpression(const SCEV *S) {
+  return S->getExpressionSize() >= HugeExprThreshold;
+}
+
+/// Returns true of \p Ops contains a huge SCEV (see definition above).
+static bool hasHugeExpression(ArrayRef<const SCEV *> Ops) {
+  return any_of(Ops, isHugeExpression);
+}
+
 namespace {
 
 struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> {
@@ -913,6 +939,8 @@ public:
   void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
   void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
   void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
+  void visitSMinExpr(const SCEVSMinExpr *Numerator) {}
+  void visitUMinExpr(const SCEVUMinExpr *Numerator) {}
   void visitUnknown(const SCEVUnknown *Numerator) {}
   void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
 
@@ -1219,8 +1247,8 @@ const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
 //                    SCEV Expression folder implementations
 //===----------------------------------------------------------------------===//
 
-const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
-                                             Type *Ty) {
+const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty,
+                                             unsigned Depth) {
   assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
          "This is not a truncating conversion!");
   assert(isSCEVable(Ty) &&
@@ -1241,15 +1269,23 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
 
   // trunc(trunc(x)) --> trunc(x)
   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
-    return getTruncateExpr(ST->getOperand(), Ty);
+    return getTruncateExpr(ST->getOperand(), Ty, Depth + 1);
 
   // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
   if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
-    return getTruncateOrSignExtend(SS->getOperand(), Ty);
+    return getTruncateOrSignExtend(SS->getOperand(), Ty, Depth + 1);
 
   // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
-    return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
+    return getTruncateOrZeroExtend(SZ->getOperand(), Ty, Depth + 1);
+
+  if (Depth > MaxCastDepth) {
+    SCEV *S =
+        new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty);
+    UniqueSCEVs.InsertNode(S, IP);
+    addToLoopUseLists(S);
+    return S;
+  }
 
   // trunc(x1 + ... + xN) --> trunc(x1) + ... + trunc(xN) and
   // trunc(x1 * ... * xN) --> trunc(x1) * ... * trunc(xN),
@@ -1261,7 +1297,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
     unsigned numTruncs = 0;
     for (unsigned i = 0, e = CommOp->getNumOperands(); i != e && numTruncs < 2;
          ++i) {
-      const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty);
+      const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty, Depth + 1);
       if (!isa<SCEVCastExpr>(CommOp->getOperand(i)) && isa<SCEVTruncateExpr>(S))
         numTruncs++;
       Operands.push_back(S);
@@ -1285,7 +1321,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
     SmallVector<const SCEV *, 4> Operands;
     for (const SCEV *Op : AddRec->operands())
-      Operands.push_back(getTruncateExpr(Op, Ty));
+      Operands.push_back(getTruncateExpr(Op, Ty, Depth + 1));
     return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
   }
 
@@ -1619,7 +1655,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
   ID.AddPointer(Ty);
   void *IP = nullptr;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  if (Depth > MaxExtDepth) {
+  if (Depth > MaxCastDepth) {
     SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
                                                      Op, Ty);
     UniqueSCEVs.InsertNode(S, IP);
@@ -1637,7 +1673,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
     unsigned NewBits = getTypeSizeInBits(Ty);
     if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
             CR.zextOrTrunc(NewBits)))
-      return getTruncateOrZeroExtend(X, Ty);
+      return getTruncateOrZeroExtend(X, Ty, Depth);
   }
 
   // If the input value is a chrec scev, and we can prove that the value
@@ -1679,9 +1715,9 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
         // Check whether the backedge-taken count can be losslessly casted to
         // the addrec's type. The count is always unsigned.
         const SCEV *CastedMaxBECount =
-          getTruncateOrZeroExtend(MaxBECount, Start->getType());
-        const SCEV *RecastedMaxBECount =
-          getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
+            getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth);
+        const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(
+            CastedMaxBECount, MaxBECount->getType(), Depth);
         if (MaxBECount == RecastedMaxBECount) {
           Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
           // Check whether Start+Step*MaxBECount has no unsigned overflow.
@@ -1930,7 +1966,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
   void *IP = nullptr;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   // Limit recursion depth.
-  if (Depth > MaxExtDepth) {
+  if (Depth > MaxCastDepth) {
     SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
                                                      Op, Ty);
     UniqueSCEVs.InsertNode(S, IP);
@@ -1948,7 +1984,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
     unsigned NewBits = getTypeSizeInBits(Ty);
     if (CR.truncate(TruncBits).signExtend(NewBits).contains(
             CR.sextOrTrunc(NewBits)))
-      return getTruncateOrSignExtend(X, Ty);
+      return getTruncateOrSignExtend(X, Ty, Depth);
   }
 
   if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
@@ -2023,9 +2059,9 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
         // Check whether the backedge-taken count can be losslessly casted to
         // the addrec's type. The count is always unsigned.
         const SCEV *CastedMaxBECount =
-          getTruncateOrZeroExtend(MaxBECount, Start->getType());
-        const SCEV *RecastedMaxBECount =
-          getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
+            getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth);
+        const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(
+            CastedMaxBECount, MaxBECount->getType(), Depth);
         if (MaxBECount == RecastedMaxBECount) {
           Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
           // Check whether Start+Step*MaxBECount has no signed overflow.
@@ -2295,7 +2331,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
 // can't-overflow flags for the operation if possible.
 static SCEV::NoWrapFlags
 StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
-                      const SmallVectorImpl<const SCEV *> &Ops,
+                      const ArrayRef<const SCEV *> Ops,
                       SCEV::NoWrapFlags Flags) {
   using namespace std::placeholders;
 
@@ -2405,7 +2441,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
   }
 
   // Limit recursion calls depth.
-  if (Depth > MaxArithDepth)
+  if (Depth > MaxArithDepth || hasHugeExpression(Ops))
     return getOrCreateAddExpr(Ops, Flags);
 
   // Okay, check to see if the same value occurs in the operand list more than
@@ -2743,7 +2779,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
 }
 
 const SCEV *
-ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops,
+ScalarEvolution::getOrCreateAddExpr(ArrayRef<const SCEV *> Ops,
                                     SCEV::NoWrapFlags Flags) {
   FoldingSetNodeID ID;
   ID.AddInteger(scAddExpr);
@@ -2765,7 +2801,7 @@ ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops,
 }
 
 const SCEV *
-ScalarEvolution::getOrCreateAddRecExpr(SmallVectorImpl<const SCEV *> &Ops,
+ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
                                        const Loop *L, SCEV::NoWrapFlags Flags) {
   FoldingSetNodeID ID;
   ID.AddInteger(scAddRecExpr);
@@ -2788,7 +2824,7 @@ ScalarEvolution::getOrCreateAddRecExpr(SmallVectorImpl<const SCEV *> &Ops,
 }
 
 const SCEV *
-ScalarEvolution::getOrCreateMulExpr(SmallVectorImpl<const SCEV *> &Ops,
+ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
                                     SCEV::NoWrapFlags Flags) {
   FoldingSetNodeID ID;
   ID.AddInteger(scMulExpr);
@@ -2884,7 +2920,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
   Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
 
   // Limit recursion calls depth.
-  if (Depth > MaxArithDepth)
+  if (Depth > MaxArithDepth || hasHugeExpression(Ops))
     return getOrCreateMulExpr(Ops, Flags);
 
   // If there are any constants, fold them together.
@@ -3057,7 +3093,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
       // Limit max number of arguments to avoid creation of unreasonably big
       // SCEVAddRecs with very complex operands.
       if (AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1 >
-          MaxAddRecSize)
+          MaxAddRecSize || isHugeExpression(AddRec) ||
+          isHugeExpression(OtherAddRec))
         continue;
 
       bool Overflow = false;
@@ -3090,7 +3127,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
         AddRecOps.push_back(getAddExpr(SumOps, SCEV::FlagAnyWrap, Depth + 1));
       }
       if (!Overflow) {
-        const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
+        const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRecLoop,
                                               SCEV::FlagAnyWrap);
         if (Ops.size() == 2) return NewAddRec;
         Ops[Idx] = NewAddRec;
@@ -3493,209 +3530,166 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
   return getAddExpr(BaseExpr, TotalOffset, Wrap);
 }
 
-const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
-                                         const SCEV *RHS) {
-  SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
-  return getSMaxExpr(Ops);
+std::tuple<const SCEV *, FoldingSetNodeID, void *>
+ScalarEvolution::findExistingSCEVInCache(int SCEVType,
+                                         ArrayRef<const SCEV *> Ops) {
+  FoldingSetNodeID ID;
+  void *IP = nullptr;
+  ID.AddInteger(SCEVType);
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    ID.AddPointer(Ops[i]);
+  return std::tuple<const SCEV *, FoldingSetNodeID, void *>(
+      UniqueSCEVs.FindNodeOrInsertPos(ID, IP), std::move(ID), IP);
 }
 
-const SCEV *
-ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
-  assert(!Ops.empty() && "Cannot get empty smax!");
+const SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind,
+                                           SmallVectorImpl<const SCEV *> &Ops) {
+  assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!");
   if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
-           "SCEVSMaxExpr operand types don't match!");
+           "Operand types don't match!");
 #endif
 
+  bool IsSigned = Kind == scSMaxExpr || Kind == scSMinExpr;
+  bool IsMax = Kind == scSMaxExpr || Kind == scUMaxExpr;
+
   // Sort by complexity, this groups all similar expression types together.
   GroupByComplexity(Ops, &LI, DT);
 
+  // Check if we have created the same expression before.
+  if (const SCEV *S = std::get<0>(findExistingSCEVInCache(Kind, Ops))) {
+    return S;
+  }
+
   // If there are any constants, fold them together.
   unsigned Idx = 0;
   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
     ++Idx;
     assert(Idx < Ops.size());
+    auto FoldOp = [&](const APInt &LHS, const APInt &RHS) {
+      if (Kind == scSMaxExpr)
+        return APIntOps::smax(LHS, RHS);
+      else if (Kind == scSMinExpr)
+        return APIntOps::smin(LHS, RHS);
+      else if (Kind == scUMaxExpr)
+        return APIntOps::umax(LHS, RHS);
+      else if (Kind == scUMinExpr)
+        return APIntOps::umin(LHS, RHS);
+      llvm_unreachable("Unknown SCEV min/max opcode");
+    };
+
     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
       // We found two constants, fold them together!
       ConstantInt *Fold = ConstantInt::get(
-          getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt()));
+          getContext(), FoldOp(LHSC->getAPInt(), RHSC->getAPInt()));
       Ops[0] = getConstant(Fold);
       Ops.erase(Ops.begin()+1);  // Erase the folded element
       if (Ops.size() == 1) return Ops[0];
       LHSC = cast<SCEVConstant>(Ops[0]);
     }
 
-    // If we are left with a constant minimum-int, strip it off.
-    if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
+    bool IsMinV = LHSC->getValue()->isMinValue(IsSigned);
+    bool IsMaxV = LHSC->getValue()->isMaxValue(IsSigned);
+
+    if (IsMax ? IsMinV : IsMaxV) {
+      // If we are left with a constant minimum(/maximum)-int, strip it off.
       Ops.erase(Ops.begin());
       --Idx;
-    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
-      // If we have an smax with a constant maximum-int, it will always be
-      // maximum-int.
-      return Ops[0];
+    } else if (IsMax ? IsMaxV : IsMinV) {
+      // If we have a max(/min) with a constant maximum(/minimum)-int,
+      // it will always be the extremum.
+      return LHSC;
     }
 
     if (Ops.size() == 1) return Ops[0];
   }
 
-  // Find the first SMax
-  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
+  // Find the first operation of the same kind
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < Kind)
     ++Idx;
 
-  // Check to see if one of the operands is an SMax. If so, expand its operands
-  // onto our operand list, and recurse to simplify.
+  // Check to see if one of the operands is of the same kind. If so, expand its
+  // operands onto our operand list, and recurse to simplify.
   if (Idx < Ops.size()) {
-    bool DeletedSMax = false;
-    while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
+    bool DeletedAny = false;
+    while (Ops[Idx]->getSCEVType() == Kind) {
+      const SCEVMinMaxExpr *SMME = cast<SCEVMinMaxExpr>(Ops[Idx]);
       Ops.erase(Ops.begin()+Idx);
-      Ops.append(SMax->op_begin(), SMax->op_end());
-      DeletedSMax = true;
+      Ops.append(SMME->op_begin(), SMME->op_end());
+      DeletedAny = true;
     }
 
-    if (DeletedSMax)
-      return getSMaxExpr(Ops);
+    if (DeletedAny)
+      return getMinMaxExpr(Kind, Ops);
   }
 
   // Okay, check to see if the same value occurs in the operand list twice.  If
   // so, delete one.  Since we sorted the list, these values are required to
   // be adjacent.
-  for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
-    //  X smax Y smax Y  -->  X smax Y
-    //  X smax Y         -->  X, if X is always greater than Y
-    if (Ops[i] == Ops[i+1] ||
-        isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
-      Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
-      --i; --e;
-    } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
-      Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
-      --i; --e;
+  llvm::CmpInst::Predicate GEPred =
+      IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
+  llvm::CmpInst::Predicate LEPred =
+      IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+  llvm::CmpInst::Predicate FirstPred = IsMax ? GEPred : LEPred;
+  llvm::CmpInst::Predicate SecondPred = IsMax ? LEPred : GEPred;
+  for (unsigned i = 0, e = Ops.size() - 1; i != e; ++i) {
+    if (Ops[i] == Ops[i + 1] ||
+        isKnownViaNonRecursiveReasoning(FirstPred, Ops[i], Ops[i + 1])) {
+      //  X op Y op Y  -->  X op Y
+      //  X op Y       -->  X, if we know X, Y are ordered appropriately
+      Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2);
+      --i;
+      --e;
+    } else if (isKnownViaNonRecursiveReasoning(SecondPred, Ops[i],
+                                               Ops[i + 1])) {
+      //  X op Y       -->  Y, if we know X, Y are ordered appropriately
+      Ops.erase(Ops.begin() + i, Ops.begin() + i + 1);
+      --i;
+      --e;
     }
+  }
 
   if (Ops.size() == 1) return Ops[0];
 
   assert(!Ops.empty() && "Reduced smax down to nothing!");
 
-  // Okay, it looks like we really DO need an smax expr.  Check to see if we
+  // Okay, it looks like we really DO need an expr.  Check to see if we
   // already have one, otherwise create a new one.
+  const SCEV *ExistingSCEV;
   FoldingSetNodeID ID;
-  ID.AddInteger(scSMaxExpr);
-  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-    ID.AddPointer(Ops[i]);
-  void *IP = nullptr;
-  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  void *IP;
+  std::tie(ExistingSCEV, ID, IP) = findExistingSCEVInCache(Kind, Ops);
+  if (ExistingSCEV)
+    return ExistingSCEV;
   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
-  SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
-                                             O, Ops.size());
+  SCEV *S = new (SCEVAllocator) SCEVMinMaxExpr(
+      ID.Intern(SCEVAllocator), static_cast<SCEVTypes>(Kind), O, Ops.size());
+
   UniqueSCEVs.InsertNode(S, IP);
   addToLoopUseLists(S);
   return S;
 }
 
-const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
-                                         const SCEV *RHS) {
+const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) {
   SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
-  return getUMaxExpr(Ops);
+  return getSMaxExpr(Ops);
 }
 
-const SCEV *
-ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
-  assert(!Ops.empty() && "Cannot get empty umax!");
-  if (Ops.size() == 1) return Ops[0];
-#ifndef NDEBUG
-  Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
-  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
-    assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
-           "SCEVUMaxExpr operand types don't match!");
-#endif
-
-  // Sort by complexity, this groups all similar expression types together.
-  GroupByComplexity(Ops, &LI, DT);
-
-  // If there are any constants, fold them together.
-  unsigned Idx = 0;
-  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
-    ++Idx;
-    assert(Idx < Ops.size());
-    while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
-      // We found two constants, fold them together!
-      ConstantInt *Fold = ConstantInt::get(
-          getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt()));
-      Ops[0] = getConstant(Fold);
-      Ops.erase(Ops.begin()+1);  // Erase the folded element
-      if (Ops.size() == 1) return Ops[0];
-      LHSC = cast<SCEVConstant>(Ops[0]);
-    }
-
-    // If we are left with a constant minimum-int, strip it off.
-    if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
-      Ops.erase(Ops.begin());
-      --Idx;
-    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
-      // If we have an umax with a constant maximum-int, it will always be
-      // maximum-int.
-      return Ops[0];
-    }
-
-    if (Ops.size() == 1) return Ops[0];
-  }
-
-  // Find the first UMax
-  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
-    ++Idx;
-
-  // Check to see if one of the operands is a UMax. If so, expand its operands
-  // onto our operand list, and recurse to simplify.
-  if (Idx < Ops.size()) {
-    bool DeletedUMax = false;
-    while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
-      Ops.erase(Ops.begin()+Idx);
-      Ops.append(UMax->op_begin(), UMax->op_end());
-      DeletedUMax = true;
-    }
-
-    if (DeletedUMax)
-      return getUMaxExpr(Ops);
-  }
-
-  // Okay, check to see if the same value occurs in the operand list twice.  If
-  // so, delete one.  Since we sorted the list, these values are required to
-  // be adjacent.
-  for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
-    //  X umax Y umax Y  -->  X umax Y
-    //  X umax Y         -->  X, if X is always greater than Y
-    if (Ops[i] == Ops[i + 1] || isKnownViaNonRecursiveReasoning(
-                                    ICmpInst::ICMP_UGE, Ops[i], Ops[i + 1])) {
-      Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2);
-      --i; --e;
-    } else if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, Ops[i],
-                                               Ops[i + 1])) {
-      Ops.erase(Ops.begin() + i, Ops.begin() + i + 1);
-      --i; --e;
-    }
-
-  if (Ops.size() == 1) return Ops[0];
+const SCEV *ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+  return getMinMaxExpr(scSMaxExpr, Ops);
+}
 
-  assert(!Ops.empty() && "Reduced umax down to nothing!");
+const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, const SCEV *RHS) {
+  SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
+  return getUMaxExpr(Ops);
+}
 
-  // Okay, it looks like we really DO need a umax expr.  Check to see if we
-  // already have one, otherwise create a new one.
-  FoldingSetNodeID ID;
-  ID.AddInteger(scUMaxExpr);
-  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-    ID.AddPointer(Ops[i]);
-  void *IP = nullptr;
-  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
-  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
-  SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
-                                             O, Ops.size());
-  UniqueSCEVs.InsertNode(S, IP);
-  addToLoopUseLists(S);
-  return S;
+const SCEV *ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+  return getMinMaxExpr(scUMaxExpr, Ops);
 }
 
 const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
@@ -3705,11 +3699,7 @@ const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
 }
 
 const SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
-  // ~smax(~x, ~y, ~z) == smin(x, y, z).
-  SmallVector<const SCEV *, 2> NotOps;
-  for (auto *S : Ops)
-    NotOps.push_back(getNotSCEV(S));
-  return getNotSCEV(getSMaxExpr(NotOps));
+  return getMinMaxExpr(scSMinExpr, Ops);
 }
 
 const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
@@ -3719,16 +3709,7 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
 }
 
 const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
-  assert(!Ops.empty() && "At least one operand must be!");
-  // Trivial case.
-  if (Ops.size() == 1)
-    return Ops[0];
-
-  // ~umax(~x, ~y, ~z) == umin(x, y, z).
-  SmallVector<const SCEV *, 2> NotOps;
-  for (auto *S : Ops)
-    NotOps.push_back(getNotSCEV(S));
-  return getNotSCEV(getUMaxExpr(NotOps));
+  return getMinMaxExpr(scUMinExpr, Ops);
 }
 
 const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
@@ -3892,7 +3873,7 @@ void ScalarEvolution::eraseValueFromMap(Value *V) {
 }
 
 /// Check whether value has nuw/nsw/exact set but SCEV does not.
-/// TODO: In reality it is better to check the poison recursevely
+/// TODO: In reality it is better to check the poison recursively
 /// but this is better than nothing.
 static bool SCEVLostPoisonFlags(const SCEV *S, const Value *V) {
   if (auto *I = dyn_cast<Instruction>(V)) {
@@ -3970,12 +3951,45 @@ const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
       V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags);
 }
 
+/// If Expr computes ~A, return A else return nullptr
+static const SCEV *MatchNotExpr(const SCEV *Expr) {
+  const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
+  if (!Add || Add->getNumOperands() != 2 ||
+      !Add->getOperand(0)->isAllOnesValue())
+    return nullptr;
+
+  const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
+  if (!AddRHS || AddRHS->getNumOperands() != 2 ||
+      !AddRHS->getOperand(0)->isAllOnesValue())
+    return nullptr;
+
+  return AddRHS->getOperand(1);
+}
+
 /// Return a SCEV corresponding to ~V = -1-V
 const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
     return getConstant(
                 cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
 
+  // Fold ~(u|s)(min|max)(~x, ~y) to (u|s)(max|min)(x, y)
+  if (const SCEVMinMaxExpr *MME = dyn_cast<SCEVMinMaxExpr>(V)) {
+    auto MatchMinMaxNegation = [&](const SCEVMinMaxExpr *MME) {
+      SmallVector<const SCEV *, 2> MatchedOperands;
+      for (const SCEV *Operand : MME->operands()) {
+        const SCEV *Matched = MatchNotExpr(Operand);
+        if (!Matched)
+          return (const SCEV *)nullptr;
+        MatchedOperands.push_back(Matched);
+      }
+      return getMinMaxExpr(
+          SCEVMinMaxExpr::negate(static_cast<SCEVTypes>(MME->getSCEVType())),
+          MatchedOperands);
+    };
+    if (const SCEV *Replaced = MatchMinMaxNegation(MME))
+      return Replaced;
+  }
+
   Type *Ty = V->getType();
   Ty = getEffectiveSCEVType(Ty);
   const SCEV *AllOnes =
@@ -4022,29 +4036,28 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
   return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags, Depth);
 }
 
-const SCEV *
-ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) {
+const SCEV *ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty,
+                                                     unsigned Depth) {
   Type *SrcTy = V->getType();
   assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
          "Cannot truncate or zero extend with non-integer arguments!");
   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
     return V;  // No conversion
   if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
-    return getTruncateExpr(V, Ty);
-  return getZeroExtendExpr(V, Ty);
+    return getTruncateExpr(V, Ty, Depth);
+  return getZeroExtendExpr(V, Ty, Depth);
 }
 
-const SCEV *
-ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
-                                         Type *Ty) {
+const SCEV *ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, Type *Ty,
+                                                     unsigned Depth) {
   Type *SrcTy = V->getType();
   assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
          "Cannot truncate or zero extend with non-integer arguments!");
   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
     return V;  // No conversion
   if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
-    return getTruncateExpr(V, Ty);
-  return getSignExtendExpr(V, Ty);
+    return getTruncateExpr(V, Ty, Depth);
+  return getSignExtendExpr(V, Ty, Depth);
 }
 
 const SCEV *
@@ -4530,52 +4543,21 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
     if (EVI->getNumIndices() != 1 || EVI->getIndices()[0] != 0)
       break;
 
-    auto *CI = dyn_cast<CallInst>(EVI->getAggregateOperand());
-    if (!CI)
+    auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand());
+    if (!WO)
       break;
 
-    if (auto *F = CI->getCalledFunction())
-      switch (F->getIntrinsicID()) {
-      case Intrinsic::sadd_with_overflow:
-      case Intrinsic::uadd_with_overflow:
-        if (!isOverflowIntrinsicNoWrap(cast<IntrinsicInst>(CI), DT))
-          return BinaryOp(Instruction::Add, CI->getArgOperand(0),
-                          CI->getArgOperand(1));
-
-        // Now that we know that all uses of the arithmetic-result component of
-        // CI are guarded by the overflow check, we can go ahead and pretend
-        // that the arithmetic is non-overflowing.
-        if (F->getIntrinsicID() == Intrinsic::sadd_with_overflow)
-          return BinaryOp(Instruction::Add, CI->getArgOperand(0),
-                          CI->getArgOperand(1), /* IsNSW = */ true,
-                          /* IsNUW = */ false);
-        else
-          return BinaryOp(Instruction::Add, CI->getArgOperand(0),
-                          CI->getArgOperand(1), /* IsNSW = */ false,
-                          /* IsNUW*/ true);
-      case Intrinsic::ssub_with_overflow:
-      case Intrinsic::usub_with_overflow:
-        if (!isOverflowIntrinsicNoWrap(cast<IntrinsicInst>(CI), DT))
-          return BinaryOp(Instruction::Sub, CI->getArgOperand(0),
-                          CI->getArgOperand(1));
-
-        // The same reasoning as sadd/uadd above.
-        if (F->getIntrinsicID() == Intrinsic::ssub_with_overflow)
-          return BinaryOp(Instruction::Sub, CI->getArgOperand(0),
-                          CI->getArgOperand(1), /* IsNSW = */ true,
-                          /* IsNUW = */ false);
-        else
-          return BinaryOp(Instruction::Sub, CI->getArgOperand(0),
-                          CI->getArgOperand(1), /* IsNSW = */ false,
-                          /* IsNUW = */ true);
-      case Intrinsic::smul_with_overflow:
-      case Intrinsic::umul_with_overflow:
-        return BinaryOp(Instruction::Mul, CI->getArgOperand(0),
-                        CI->getArgOperand(1));
-      default:
-        break;
-      }
-    break;
+    Instruction::BinaryOps BinOp = WO->getBinaryOp();
+    bool Signed = WO->isSigned();
+    // TODO: Should add nuw/nsw flags for mul as well.
+    if (BinOp == Instruction::Mul || !isOverflowIntrinsicNoWrap(WO, DT))
+      return BinaryOp(BinOp, WO->getLHS(), WO->getRHS());
+
+    // Now that we know that all uses of the arithmetic-result component of
+    // CI are guarded by the overflow check, we can go ahead and pretend
+    // that the arithmetic is non-overflowing.
+    return BinaryOp(BinOp, WO->getLHS(), WO->getRHS(),
+                    /* IsNSW = */ Signed, /* IsNUW = */ !Signed);
   }
 
   default:
@@ -5009,7 +4991,7 @@ const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN,
   // overflow.
   if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
     if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
-      (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
+      (void)getAddRecExpr(getAddExpr(StartVal, Accum, Flags), Accum, L, Flags);
 
   return PHISCEV;
 }
@@ -5196,6 +5178,8 @@ static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S,
       switch (S->getSCEVType()) {
       case scConstant: case scTruncate: case scZeroExtend: case scSignExtend:
       case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr:
+      case scUMinExpr:
+      case scSMinExpr:
         // These expressions are available if their operand(s) is/are.
         return true;
 
@@ -5551,6 +5535,9 @@ ScalarEvolution::getRangeRef(const SCEV *S,
   DenseMap<const SCEV *, ConstantRange> &Cache =
       SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
                                                        : SignedRanges;
+  ConstantRange::PreferredRangeType RangeType =
+      SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED
+          ? ConstantRange::Unsigned : ConstantRange::Signed;
 
   // See if we've computed this range already.
   DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S);
@@ -5581,53 +5568,60 @@ ScalarEvolution::getRangeRef(const SCEV *S,
     ConstantRange X = getRangeRef(Add->getOperand(0), SignHint);
     for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
       X = X.add(getRangeRef(Add->getOperand(i), SignHint));
-    return setRange(Add, SignHint, ConservativeResult.intersectWith(X));
+    return setRange(Add, SignHint,
+                    ConservativeResult.intersectWith(X, RangeType));
   }
 
   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
     ConstantRange X = getRangeRef(Mul->getOperand(0), SignHint);
     for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
       X = X.multiply(getRangeRef(Mul->getOperand(i), SignHint));
-    return setRange(Mul, SignHint, ConservativeResult.intersectWith(X));
+    return setRange(Mul, SignHint,
+                    ConservativeResult.intersectWith(X, RangeType));
   }
 
   if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
     ConstantRange X = getRangeRef(SMax->getOperand(0), SignHint);
     for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
       X = X.smax(getRangeRef(SMax->getOperand(i), SignHint));
-    return setRange(SMax, SignHint, ConservativeResult.intersectWith(X));
+    return setRange(SMax, SignHint,
+                    ConservativeResult.intersectWith(X, RangeType));
   }
 
   if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
     ConstantRange X = getRangeRef(UMax->getOperand(0), SignHint);
     for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
       X = X.umax(getRangeRef(UMax->getOperand(i), SignHint));
-    return setRange(UMax, SignHint, ConservativeResult.intersectWith(X));
+    return setRange(UMax, SignHint,
+                    ConservativeResult.intersectWith(X, RangeType));
   }
 
   if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
     ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint);
     ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint);
     return setRange(UDiv, SignHint,
-                    ConservativeResult.intersectWith(X.udiv(Y)));
+                    ConservativeResult.intersectWith(X.udiv(Y), RangeType));
   }
 
   if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
     ConstantRange X = getRangeRef(ZExt->getOperand(), SignHint);
     return setRange(ZExt, SignHint,
-                    ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
+                    ConservativeResult.intersectWith(X.zeroExtend(BitWidth),
+                                                     RangeType));
   }
 
   if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
     ConstantRange X = getRangeRef(SExt->getOperand(), SignHint);
     return setRange(SExt, SignHint,
-                    ConservativeResult.intersectWith(X.signExtend(BitWidth)));
+                    ConservativeResult.intersectWith(X.signExtend(BitWidth),
+                                                     RangeType));
   }
 
   if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
     ConstantRange X = getRangeRef(Trunc->getOperand(), SignHint);
     return setRange(Trunc, SignHint,
-                    ConservativeResult.intersectWith(X.truncate(BitWidth)));
+                    ConservativeResult.intersectWith(X.truncate(BitWidth),
+                                                     RangeType));
   }
 
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
@@ -5637,7 +5631,7 @@ ScalarEvolution::getRangeRef(const SCEV *S,
       if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
         if (!C->getValue()->isZero())
           ConservativeResult = ConservativeResult.intersectWith(
-              ConstantRange(C->getAPInt(), APInt(BitWidth, 0)));
+              ConstantRange(C->getAPInt(), APInt(BitWidth, 0)), RangeType);
 
     // If there's no signed wrap, and all the operands have the same sign or
     // zero, the value won't ever change sign.
@@ -5651,11 +5645,11 @@ ScalarEvolution::getRangeRef(const SCEV *S,
       if (AllNonNeg)
         ConservativeResult = ConservativeResult.intersectWith(
           ConstantRange(APInt(BitWidth, 0),
-                        APInt::getSignedMinValue(BitWidth)));
+                        APInt::getSignedMinValue(BitWidth)), RangeType);
       else if (AllNonPos)
         ConservativeResult = ConservativeResult.intersectWith(
           ConstantRange(APInt::getSignedMinValue(BitWidth),
-                        APInt(BitWidth, 1)));
+                        APInt(BitWidth, 1)), RangeType);
     }
 
     // TODO: non-affine addrec
@@ -5668,14 +5662,14 @@ ScalarEvolution::getRangeRef(const SCEV *S,
             BitWidth);
         if (!RangeFromAffine.isFullSet())
           ConservativeResult =
-              ConservativeResult.intersectWith(RangeFromAffine);
+              ConservativeResult.intersectWith(RangeFromAffine, RangeType);
 
         auto RangeFromFactoring = getRangeViaFactoring(
             AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount,
             BitWidth);
         if (!RangeFromFactoring.isFullSet())
           ConservativeResult =
-              ConservativeResult.intersectWith(RangeFromFactoring);
+              ConservativeResult.intersectWith(RangeFromFactoring, RangeType);
       }
     }
 
@@ -5686,7 +5680,8 @@ ScalarEvolution::getRangeRef(const SCEV *S,
     // Check if the IR explicitly contains !range metadata.
     Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
     if (MDRange.hasValue())
-      ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
+      ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue(),
+                                                            RangeType);
 
     // Split here to avoid paying the compile-time cost of calling both
     // computeKnownBits and ComputeNumSignBits.  This restriction can be lifted
@@ -5697,8 +5692,8 @@ ScalarEvolution::getRangeRef(const SCEV *S,
       KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
       if (Known.One != ~Known.Zero + 1)
         ConservativeResult =
-            ConservativeResult.intersectWith(ConstantRange(Known.One,
-                                                           ~Known.Zero + 1));
+            ConservativeResult.intersectWith(
+                ConstantRange(Known.One, ~Known.Zero + 1), RangeType);
     } else {
       assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
              "generalize as needed!");
@@ -5706,7 +5701,8 @@ ScalarEvolution::getRangeRef(const SCEV *S,
       if (NS > 1)
         ConservativeResult = ConservativeResult.intersectWith(
             ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
-                          APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1));
+                          APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1),
+            RangeType);
     }
 
     // A range of Phi is a subset of union of all ranges of its input.
@@ -5721,7 +5717,8 @@ ScalarEvolution::getRangeRef(const SCEV *S,
           if (RangeFromOps.isFullSet())
             break;
         }
-        ConservativeResult = ConservativeResult.intersectWith(RangeFromOps);
+        ConservativeResult =
+            ConservativeResult.intersectWith(RangeFromOps, RangeType);
         bool Erased = PendingPhiRanges.erase(Phi);
         assert(Erased && "Failed to erase Phi properly?");
         (void) Erased;
@@ -5751,7 +5748,7 @@ static ConstantRange getRangeForAffineARHelper(APInt Step,
   // FullRange), then we don't know anything about the final range either.
   // Return FullRange.
   if (StartRange.isFullSet())
-    return ConstantRange(BitWidth, /* isFullSet = */ true);
+    return ConstantRange::getFull(BitWidth);
 
   // If Step is signed and negative, then we use its absolute value, but we also
   // note that we're moving in the opposite direction.
@@ -5767,7 +5764,7 @@ static ConstantRange getRangeForAffineARHelper(APInt Step,
   // Check if Offset is more than full span of BitWidth. If it is, the
   // expression is guaranteed to overflow.
   if (APInt::getMaxValue(StartRange.getBitWidth()).udiv(Step).ult(MaxBECount))
-    return ConstantRange(BitWidth, /* isFullSet = */ true);
+    return ConstantRange::getFull(BitWidth);
 
   // Offset is by how much the expression can change. Checks above guarantee no
   // overflow here.
@@ -5786,7 +5783,7 @@ static ConstantRange getRangeForAffineARHelper(APInt Step,
   // range (due to wrap around). This means that the expression can take any
   // value in this bitwidth, and we have to return full range.
   if (StartRange.contains(MovedBoundary))
-    return ConstantRange(BitWidth, /* isFullSet = */ true);
+    return ConstantRange::getFull(BitWidth);
 
   APInt NewLower =
       Descending ? std::move(MovedBoundary) : std::move(StartLower);
@@ -5794,12 +5791,8 @@ static ConstantRange getRangeForAffineARHelper(APInt Step,
       Descending ? std::move(StartUpper) : std::move(MovedBoundary);
   NewUpper += 1;
 
-  // If we end up with full range, return a proper full range.
-  if (NewLower == NewUpper)
-    return ConstantRange(BitWidth, /* isFullSet = */ true);
-
   // No overflow detected, return [StartLower, StartUpper + Offset + 1) range.
-  return ConstantRange(std::move(NewLower), std::move(NewUpper));
+  return ConstantRange::getNonEmpty(std::move(NewLower), std::move(NewUpper));
 }
 
 ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
@@ -5832,7 +5825,7 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
       MaxBECountValue, BitWidth, /* Signed = */ false);
 
   // Finally, intersect signed and unsigned ranges.
-  return SR.intersectWith(UR);
+  return SR.intersectWith(UR, ConstantRange::Smallest);
 }
 
 ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
@@ -5916,17 +5909,17 @@ ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
 
   SelectPattern StartPattern(*this, BitWidth, Start);
   if (!StartPattern.isRecognized())
-    return ConstantRange(BitWidth, /* isFullSet = */ true);
+    return ConstantRange::getFull(BitWidth);
 
   SelectPattern StepPattern(*this, BitWidth, Step);
   if (!StepPattern.isRecognized())
-    return ConstantRange(BitWidth, /* isFullSet = */ true);
+    return ConstantRange::getFull(BitWidth);
 
   if (StartPattern.Condition != StepPattern.Condition) {
     // We don't handle this case today; but we could, by considering four
     // possibilities below instead of two. I'm not sure if there are cases where
     // that will help over what getRange already does, though.
-    return ConstantRange(BitWidth, /* isFullSet = */ true);
+    return ConstantRange::getFull(BitWidth);
   }
 
   // NB! Calling ScalarEvolution::getConstant is fine, but we should not try to
@@ -6128,7 +6121,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
     // to obey basic rules for definitions dominating uses which this
     // analysis depends on.
     if (!DT.isReachableFromEntry(I->getParent()))
-      return getUnknown(V);
+      return getUnknown(UndefValue::get(V->getType()));
   } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
     return getConstant(CI);
   else if (isa<ConstantPointerNull>(V))
@@ -6744,6 +6737,28 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
   return BackedgeTakenCounts.find(L)->second = std::move(Result);
 }
 
+void ScalarEvolution::forgetAllLoops() {
+  // This method is intended to forget all info about loops. It should
+  // invalidate caches as if the following happened:
+  // - The trip counts of all loops have changed arbitrarily
+  // - Every llvm::Value has been updated in place to produce a different
+  // result.
+  BackedgeTakenCounts.clear();
+  PredicatedBackedgeTakenCounts.clear();
+  LoopPropertiesCache.clear();
+  ConstantEvolutionLoopExitValue.clear();
+  ValueExprMap.clear();
+  ValuesAtScopes.clear();
+  LoopDispositions.clear();
+  BlockDispositions.clear();
+  UnsignedRanges.clear();
+  SignedRanges.clear();
+  ExprValueMap.clear();
+  HasRecMap.clear();
+  MinTrailingZerosCache.clear();
+  PredicatedSCEVRewrites.clear();
+}
+
 void ScalarEvolution::forgetLoop(const Loop *L) {
   // Drop any stored trip count value.
   auto RemoveLoopFromBackedgeMap =
@@ -6972,8 +6987,8 @@ ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M,
 /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
 /// computable exit into a persistent ExitNotTakenInfo array.
 ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
-    SmallVectorImpl<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo>
-        &&ExitCounts,
+    ArrayRef<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo>
+        ExitCounts,
     bool Complete, const SCEV *MaxCount, bool MaxOrZero)
     : MaxAndComplete(MaxCount, Complete), MaxOrZero(MaxOrZero) {
   using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo;
@@ -7256,6 +7271,14 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
         if (EL0.ExactNotTaken == EL1.ExactNotTaken)
           BECount = EL0.ExactNotTaken;
       }
+      // There are cases (e.g. PR26207) where computeExitLimitFromCond is able
+      // to be more aggressive when computing BECount than when computing
+      // MaxBECount.  In these cases it is possible for EL0.ExactNotTaken and
+      // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken
+      // to not.
+      if (isa<SCEVCouldNotCompute>(MaxBECount) &&
+          !isa<SCEVCouldNotCompute>(BECount))
+        MaxBECount = getConstant(getUnsignedRangeMax(BECount));
 
       return ExitLimit(BECount, MaxBECount, false,
                        {&EL0.Predicates, &EL1.Predicates});
@@ -7651,7 +7674,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
 static bool CanConstantFold(const Instruction *I) {
   if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
       isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
-      isa<LoadInst>(I))
+      isa<LoadInst>(I) || isa<ExtractValueInst>(I))
     return true;
 
   if (const CallInst *CI = dyn_cast<CallInst>(I))
@@ -8075,7 +8098,9 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
     }
     case scSMaxExpr:
     case scUMaxExpr:
-      break; // TODO: smax, umax.
+    case scSMinExpr:
+    case scUMinExpr:
+      break; // TODO: smax, umax, smin, umax.
   }
   return nullptr;
 }
@@ -8087,44 +8112,64 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
   // exit value from the loop without using SCEVs.
   if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
     if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
-      const Loop *LI = this->LI[I->getParent()];
-      if (LI && LI->getParentLoop() == L)  // Looking for loop exit value.
-        if (PHINode *PN = dyn_cast<PHINode>(I))
-          if (PN->getParent() == LI->getHeader()) {
-            // Okay, there is no closed form solution for the PHI node.  Check
-            // to see if the loop that contains it has a known backedge-taken
-            // count.  If so, we may be able to force computation of the exit
-            // value.
-            const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
-            if (const SCEVConstant *BTCC =
-                  dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
-
-              // This trivial case can show up in some degenerate cases where
-              // the incoming IR has not yet been fully simplified.
-              if (BTCC->getValue()->isZero()) {
-                Value *InitValue = nullptr;
-                bool MultipleInitValues = false;
-                for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
-                  if (!LI->contains(PN->getIncomingBlock(i))) {
-                    if (!InitValue)
-                      InitValue = PN->getIncomingValue(i);
-                    else if (InitValue != PN->getIncomingValue(i)) {
-                      MultipleInitValues = true;
-                      break;
-                    }
-                  }
-                  if (!MultipleInitValues && InitValue)
-                    return getSCEV(InitValue);
+      if (PHINode *PN = dyn_cast<PHINode>(I)) {
+        const Loop *LI = this->LI[I->getParent()];
+        // Looking for loop exit value.
+        if (LI && LI->getParentLoop() == L &&
+            PN->getParent() == LI->getHeader()) {
+          // Okay, there is no closed form solution for the PHI node.  Check
+          // to see if the loop that contains it has a known backedge-taken
+          // count.  If so, we may be able to force computation of the exit
+          // value.
+          const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
+          // This trivial case can show up in some degenerate cases where
+          // the incoming IR has not yet been fully simplified.
+          if (BackedgeTakenCount->isZero()) {
+            Value *InitValue = nullptr;
+            bool MultipleInitValues = false;
+            for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
+              if (!LI->contains(PN->getIncomingBlock(i))) {
+                if (!InitValue)
+                  InitValue = PN->getIncomingValue(i);
+                else if (InitValue != PN->getIncomingValue(i)) {
+                  MultipleInitValues = true;
+                  break;
                 }
               }
-              // Okay, we know how many times the containing loop executes.  If
-              // this is a constant evolving PHI node, get the final value at
-              // the specified iteration number.
-              Constant *RV =
-                  getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI);
-              if (RV) return getSCEV(RV);
             }
+            if (!MultipleInitValues && InitValue)
+              return getSCEV(InitValue);
           }
+          // Do we have a loop invariant value flowing around the backedge
+          // for a loop which must execute the backedge?
+          if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
+              isKnownPositive(BackedgeTakenCount) &&
+              PN->getNumIncomingValues() == 2) {
+            unsigned InLoopPred = LI->contains(PN->getIncomingBlock(0)) ? 0 : 1;
+            const SCEV *OnBackedge = getSCEV(PN->getIncomingValue(InLoopPred));
+            if (IsAvailableOnEntry(LI, DT, OnBackedge, PN->getParent()))
+              return OnBackedge;
+          }
+          if (auto *BTCC = dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
+            // Okay, we know how many times the containing loop executes.  If
+            // this is a constant evolving PHI node, get the final value at
+            // the specified iteration number.
+            Constant *RV =
+                getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI);
+            if (RV) return getSCEV(RV);
+          }
+        }
+
+        // If there is a single-input Phi, evaluate it at our scope. If we can
+        // prove that this replacement does not break LCSSA form, use new value.
+        if (PN->getNumOperands() == 1) {
+          const SCEV *Input = getSCEV(PN->getOperand(0));
+          const SCEV *InputAtScope = getSCEVAtScope(Input, L);
+          // TODO: We can generalize it using LI.replacementPreservesLCSSAForm,
+          // for the simplest case just support constants.
+          if (isa<SCEVConstant>(InputAtScope)) return InputAtScope;
+        }
+      }
 
       // Okay, this is an expression that we cannot symbolically evaluate
       // into a SCEV.  Check to see if it's possible to symbolically evaluate
@@ -8198,13 +8243,11 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
           NewOps.push_back(OpAtScope);
         }
         if (isa<SCEVAddExpr>(Comm))
-          return getAddExpr(NewOps);
+          return getAddExpr(NewOps, Comm->getNoWrapFlags());
         if (isa<SCEVMulExpr>(Comm))
-          return getMulExpr(NewOps);
-        if (isa<SCEVSMaxExpr>(Comm))
-          return getSMaxExpr(NewOps);
-        if (isa<SCEVUMaxExpr>(Comm))
-          return getUMaxExpr(NewOps);
+          return getMulExpr(NewOps, Comm->getNoWrapFlags());
+        if (isa<SCEVMinMaxExpr>(Comm))
+          return getMinMaxExpr(Comm->getSCEVType(), NewOps);
         llvm_unreachable("Unknown commutative SCEV type!");
       }
     }
@@ -10045,41 +10088,15 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
                                      getNotSCEV(FoundLHS));
 }
 
-/// If Expr computes ~A, return A else return nullptr
-static const SCEV *MatchNotExpr(const SCEV *Expr) {
-  const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
-  if (!Add || Add->getNumOperands() != 2 ||
-      !Add->getOperand(0)->isAllOnesValue())
-    return nullptr;
-
-  const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
-  if (!AddRHS || AddRHS->getNumOperands() != 2 ||
-      !AddRHS->getOperand(0)->isAllOnesValue())
-    return nullptr;
-
-  return AddRHS->getOperand(1);
-}
-
-/// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values?
-template<typename MaxExprType>
-static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr,
-                              const SCEV *Candidate) {
-  const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr);
-  if (!MaxExpr) return false;
-
-  return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end();
-}
-
-/// Is MaybeMinExpr an SMin or UMin of Candidate and some other values?
-template<typename MaxExprType>
-static bool IsMinConsistingOf(ScalarEvolution &SE,
-                              const SCEV *MaybeMinExpr,
-                              const SCEV *Candidate) {
-  const SCEV *MaybeMaxExpr = MatchNotExpr(MaybeMinExpr);
-  if (!MaybeMaxExpr)
+/// Is MaybeMinMaxExpr an (U|S)(Min|Max) of Candidate and some other values?
+template <typename MinMaxExprType>
+static bool IsMinMaxConsistingOf(const SCEV *MaybeMinMaxExpr,
+                                 const SCEV *Candidate) {
+  const MinMaxExprType *MinMaxExpr = dyn_cast<MinMaxExprType>(MaybeMinMaxExpr);
+  if (!MinMaxExpr)
     return false;
 
-  return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate));
+  return find(MinMaxExpr->operands(), Candidate) != MinMaxExpr->op_end();
 }
 
 static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE,
@@ -10128,20 +10145,20 @@ static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
     LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_SLE:
     return
-      // min(A, ...) <= A
-      IsMinConsistingOf<SCEVSMaxExpr>(SE, LHS, RHS) ||
-      // A <= max(A, ...)
-      IsMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
+        // min(A, ...) <= A
+        IsMinMaxConsistingOf<SCEVSMinExpr>(LHS, RHS) ||
+        // A <= max(A, ...)
+        IsMinMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
 
   case ICmpInst::ICMP_UGE:
     std::swap(LHS, RHS);
     LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_ULE:
     return
-      // min(A, ...) <= A
-      IsMinConsistingOf<SCEVUMaxExpr>(SE, LHS, RHS) ||
-      // A <= max(A, ...)
-      IsMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
+        // min(A, ...) <= A
+        IsMinMaxConsistingOf<SCEVUMinExpr>(LHS, RHS) ||
+        // A <= max(A, ...)
+        IsMinMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
   }
 
   llvm_unreachable("covered switch fell through?!");
@@ -10691,13 +10708,10 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
     IsSigned ? APIntOps::smax(getSignedRangeMin(RHS), Limit)
              : APIntOps::umax(getUnsignedRangeMin(RHS), Limit);
 
-
-  const SCEV *MaxBECount = getCouldNotCompute();
-  if (isa<SCEVConstant>(BECount))
-    MaxBECount = BECount;
-  else
-    MaxBECount = computeBECount(getConstant(MaxStart - MinEnd),
-                                getConstant(MinStride), false);
+  const SCEV *MaxBECount = isa<SCEVConstant>(BECount)
+                               ? BECount
+                               : computeBECount(getConstant(MaxStart - MinEnd),
+                                                getConstant(MinStride), false);
 
   if (isa<SCEVCouldNotCompute>(MaxBECount))
     MaxBECount = BECount;
@@ -10806,8 +10820,6 @@ static inline bool containsUndefs(const SCEV *S) {
   return SCEVExprContains(S, [](const SCEV *S) {
     if (const auto *SU = dyn_cast<SCEVUnknown>(S))
       return isa<UndefValue>(SU->getValue());
-    else if (const auto *SC = dyn_cast<SCEVConstant>(S))
-      return isa<UndefValue>(SC->getValue());
     return false;
   });
 }
@@ -11402,19 +11414,23 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
   L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
   OS << ": ";
 
-  SmallVector<BasicBlock *, 8> ExitBlocks;
-  L->getExitBlocks(ExitBlocks);
-  if (ExitBlocks.size() != 1)
+  SmallVector<BasicBlock *, 8> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+  if (ExitingBlocks.size() != 1)
     OS << "<multiple exits> ";
 
-  if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
-    OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L);
-  } else {
-    OS << "Unpredictable backedge-taken count. ";
-  }
+  if (SE->hasLoopInvariantBackedgeTakenCount(L))
+    OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L) << "\n";
+  else
+    OS << "Unpredictable backedge-taken count.\n";
 
-  OS << "\n"
-        "Loop ";
+  if (ExitingBlocks.size() > 1)
+    for (BasicBlock *ExitingBlock : ExitingBlocks) {
+      OS << "  exit count for " << ExitingBlock->getName() << ": "
+         << *SE->getExitCount(L, ExitingBlock) << "\n";
+    }
+
+  OS << "Loop ";
   L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
   OS << ": ";
 
@@ -11611,7 +11627,9 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
   case scAddExpr:
   case scMulExpr:
   case scUMaxExpr:
-  case scSMaxExpr: {
+  case scSMaxExpr:
+  case scUMinExpr:
+  case scSMinExpr: {
     bool HasVarying = false;
     for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) {
       LoopDisposition D = getLoopDisposition(Op, L);
@@ -11698,7 +11716,9 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
   case scAddExpr:
   case scMulExpr:
   case scUMaxExpr:
-  case scSMaxExpr: {
+  case scSMaxExpr:
+  case scUMinExpr:
+  case scSMinExpr: {
     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
     bool Proper = true;
     for (const SCEV *NAryOp : NAry->operands()) {
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 289d4f8ae49a..96da0a24cddd 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -1,9 +1,8 @@
 //===- ScalarEvolutionAliasAnalysis.cpp - SCEV-based Alias Analysis -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -23,7 +22,7 @@
 using namespace llvm;
 
 AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
-                                const MemoryLocation &LocB) {
+                                const MemoryLocation &LocB, AAQueryInfo &AAQI) {
   // If either of the memory references is empty, it doesn't matter what the
   // pointer values are. This allows the code below to ignore this special
   // case.
@@ -86,11 +85,12 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
                              AO ? AAMDNodes() : LocA.AATags),
               MemoryLocation(BO ? BO : LocB.Ptr,
                              BO ? LocationSize::unknown() : LocB.Size,
-                             BO ? AAMDNodes() : LocB.AATags)) == NoAlias)
+                             BO ? AAMDNodes() : LocB.AATags),
+              AAQI) == NoAlias)
       return NoAlias;
 
   // Forward the query to the next analysis.
-  return AAResultBase::alias(LocA, LocB);
+  return AAResultBase::alias(LocA, LocB, AAQI);
 }
 
 /// Given an expression, try to find a base value.
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index ca5cf1663b83..e8a95d35482c 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1,9 +1,8 @@
 //===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -61,12 +60,10 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
           // instructions that might be inserted before BIP.
           if (BasicBlock::iterator(CI) != IP || BIP == IP) {
             // Create a new cast, and leave the old cast in place in case
-            // it is being used as an insert point. Clear its operand
-            // so that it doesn't hold anything live.
+            // it is being used as an insert point.
             Ret = CastInst::Create(Op, V, Ty, "", &*IP);
             Ret->takeName(CI);
             CI->replaceAllUsesWith(Ret);
-            CI->setOperand(0, UndefValue::get(V->getType()));
             break;
           }
           Ret = CI;
@@ -167,9 +164,11 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
 }
 
 /// InsertBinop - Insert the specified binary operator, doing a small amount
-/// of work to avoid inserting an obviously redundant operation.
+/// of work to avoid inserting an obviously redundant operation, and hoisting
+/// to an outer loop when the opportunity is there and it is safe.
 Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
-                                 Value *LHS, Value *RHS) {
+                                 Value *LHS, Value *RHS,
+                                 SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
   // Fold a binop with constant operands.
   if (Constant *CLHS = dyn_cast<Constant>(LHS))
     if (Constant *CRHS = dyn_cast<Constant>(RHS))
@@ -188,20 +187,22 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
       if (isa<DbgInfoIntrinsic>(IP))
         ScanLimit++;
 
-      // Conservatively, do not use any instruction which has any of wrap/exact
-      // flags installed.
-      // TODO: Instead of simply disable poison instructions we can be clever
-      //       here and match SCEV to this instruction.
-      auto canGeneratePoison = [](Instruction *I) {
-        if (isa<OverflowingBinaryOperator>(I) &&
-            (I->hasNoSignedWrap() || I->hasNoUnsignedWrap()))
-          return true;
+      auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
+        // Ensure that no-wrap flags match.
+        if (isa<OverflowingBinaryOperator>(I)) {
+          if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW))
+            return true;
+          if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW))
+            return true;
+        }
+        // Conservatively, do not use any instruction which has any of exact
+        // flags installed.
         if (isa<PossiblyExactOperator>(I) && I->isExact())
           return true;
         return false;
       };
       if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
-          IP->getOperand(1) == RHS && !canGeneratePoison(&*IP))
+          IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP))
         return &*IP;
       if (IP == BlockBegin) break;
     }
@@ -211,19 +212,25 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
   DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
   SCEVInsertPointGuard Guard(Builder, this);
 
-  // Move the insertion point out of as many loops as we can.
-  while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
-    if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
-    BasicBlock *Preheader = L->getLoopPreheader();
-    if (!Preheader) break;
+  if (IsSafeToHoist) {
+    // Move the insertion point out of as many loops as we can.
+    while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
+      if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
+      BasicBlock *Preheader = L->getLoopPreheader();
+      if (!Preheader) break;
 
-    // Ok, move up a level.
-    Builder.SetInsertPoint(Preheader->getTerminator());
+      // Ok, move up a level.
+      Builder.SetInsertPoint(Preheader->getTerminator());
+    }
   }
 
   // If we haven't found this binop, insert it.
   Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS));
   BO->setDebugLoc(Loc);
+  if (Flags & SCEV::FlagNUW)
+    BO->setHasNoUnsignedWrap();
+  if (Flags & SCEV::FlagNSW)
+    BO->setHasNoSignedWrap();
   rememberInstruction(BO);
 
   return BO;
@@ -695,7 +702,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
 
   // Sort by loop. Use a stable sort so that constants follow non-constants and
   // pointer operands precede non-pointer operands.
-  std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT));
+  llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
 
   // Emit instructions to add all the operands. Hoist as much as possible
   // out of loops, and form meaningful getelementptrs where possible.
@@ -735,7 +742,8 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
       // Instead of doing a negate and add, just do a subtract.
       Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
       Sum = InsertNoopCastOfTo(Sum, Ty);
-      Sum = InsertBinop(Instruction::Sub, Sum, W);
+      Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap,
+                        /*IsSafeToHoist*/ true);
       ++I;
     } else {
       // A simple add.
@@ -743,7 +751,8 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
       Sum = InsertNoopCastOfTo(Sum, Ty);
       // Canonicalize a constant to the RHS.
       if (isa<Constant>(Sum)) std::swap(Sum, W);
-      Sum = InsertBinop(Instruction::Add, Sum, W);
+      Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(),
+                        /*IsSafeToHoist*/ true);
       ++I;
     }
   }
@@ -762,7 +771,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
     OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
 
   // Sort by loop. Use a stable sort so that constants follow non-constants.
-  std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT));
+  llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
 
   // Emit instructions to mul all the operands. Hoist as much as possible
   // out of loops.
@@ -795,9 +804,13 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
     if (Exponent & 1)
       Result = P;
     for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
-      P = InsertBinop(Instruction::Mul, P, P);
+      P = InsertBinop(Instruction::Mul, P, P, SCEV::FlagAnyWrap,
+                      /*IsSafeToHoist*/ true);
       if (Exponent & BinExp)
-        Result = Result ? InsertBinop(Instruction::Mul, Result, P) : P;
+        Result = Result ? InsertBinop(Instruction::Mul, Result, P,
+                                      SCEV::FlagAnyWrap,
+                                      /*IsSafeToHoist*/ true)
+                        : P;
     }
 
     I = E;
@@ -812,7 +825,8 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
     } else if (I->second->isAllOnesValue()) {
       // Instead of doing a multiply by negative one, just do a negate.
       Prod = InsertNoopCastOfTo(Prod, Ty);
-      Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
+      Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod,
+                         SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
       ++I;
     } else {
       // A simple mul.
@@ -824,10 +838,16 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
       if (match(W, m_Power2(RHS))) {
         // Canonicalize Prod*(1<<C) to Prod<<C.
         assert(!Ty->isVectorTy() && "vector types are not SCEVable");
+        auto NWFlags = S->getNoWrapFlags();
+        // clear nsw flag if shl will produce poison value.
+        if (RHS->logBase2() == RHS->getBitWidth() - 1)
+          NWFlags = ScalarEvolution::clearFlags(NWFlags, SCEV::FlagNSW);
         Prod = InsertBinop(Instruction::Shl, Prod,
-                           ConstantInt::get(Ty, RHS->logBase2()));
+                           ConstantInt::get(Ty, RHS->logBase2()), NWFlags,
+                           /*IsSafeToHoist*/ true);
       } else {
-        Prod = InsertBinop(Instruction::Mul, Prod, W);
+        Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(),
+                           /*IsSafeToHoist*/ true);
       }
     }
   }
@@ -843,11 +863,13 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
     const APInt &RHS = SC->getAPInt();
     if (RHS.isPowerOf2())
       return InsertBinop(Instruction::LShr, LHS,
-                         ConstantInt::get(Ty, RHS.logBase2()));
+                         ConstantInt::get(Ty, RHS.logBase2()),
+                         SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
   }
 
   Value *RHS = expandCodeFor(S->getRHS(), Ty);
-  return InsertBinop(Instruction::UDiv, LHS, RHS);
+  return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap,
+                     /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
 }
 
 /// Move parts of Base into Rest to leave Base with the minimal
@@ -1634,7 +1656,8 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
   for (int i = S->getNumOperands()-2; i >= 0; --i) {
     // In the case of mixed integer and pointer types, do the
     // rest of the comparisons as integer.
-    if (S->getOperand(i)->getType() != Ty) {
+    Type *OpTy = S->getOperand(i)->getType();
+    if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
       Ty = SE.getEffectiveSCEVType(Ty);
       LHS = InsertNoopCastOfTo(LHS, Ty);
     }
@@ -1658,7 +1681,8 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
   for (int i = S->getNumOperands()-2; i >= 0; --i) {
     // In the case of mixed integer and pointer types, do the
     // rest of the comparisons as integer.
-    if (S->getOperand(i)->getType() != Ty) {
+    Type *OpTy = S->getOperand(i)->getType();
+    if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
       Ty = SE.getEffectiveSCEVType(Ty);
       LHS = InsertNoopCastOfTo(LHS, Ty);
     }
@@ -1676,6 +1700,56 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
   return LHS;
 }
 
+Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
+  Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
+  Type *Ty = LHS->getType();
+  for (int i = S->getNumOperands() - 2; i >= 0; --i) {
+    // In the case of mixed integer and pointer types, do the
+    // rest of the comparisons as integer.
+    Type *OpTy = S->getOperand(i)->getType();
+    if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
+      Ty = SE.getEffectiveSCEVType(Ty);
+      LHS = InsertNoopCastOfTo(LHS, Ty);
+    }
+    Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+    Value *ICmp = Builder.CreateICmpSLT(LHS, RHS);
+    rememberInstruction(ICmp);
+    Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin");
+    rememberInstruction(Sel);
+    LHS = Sel;
+  }
+  // In the case of mixed integer and pointer types, cast the
+  // final result back to the pointer type.
+  if (LHS->getType() != S->getType())
+    LHS = InsertNoopCastOfTo(LHS, S->getType());
+  return LHS;
+}
+
+Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
+  Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
+  Type *Ty = LHS->getType();
+  for (int i = S->getNumOperands() - 2; i >= 0; --i) {
+    // In the case of mixed integer and pointer types, do the
+    // rest of the comparisons as integer.
+    Type *OpTy = S->getOperand(i)->getType();
+    if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
+      Ty = SE.getEffectiveSCEVType(Ty);
+      LHS = InsertNoopCastOfTo(LHS, Ty);
+    }
+    Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+    Value *ICmp = Builder.CreateICmpULT(LHS, RHS);
+    rememberInstruction(ICmp);
+    Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin");
+    rememberInstruction(Sel);
+    LHS = Sel;
+  }
+  // In the case of mixed integer and pointer types, cast the
+  // final result back to the pointer type.
+  if (LHS->getType() != S->getType())
+    LHS = InsertNoopCastOfTo(LHS, S->getType());
+  return LHS;
+}
+
 Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
                                    Instruction *IP) {
   setInsertPoint(IP);
@@ -1732,49 +1806,55 @@ Value *SCEVExpander::expand(const SCEV *S) {
   // Compute an insertion point for this SCEV object. Hoist the instructions
   // as far out in the loop nest as possible.
   Instruction *InsertPt = &*Builder.GetInsertPoint();
-  for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
-       L = L->getParentLoop())
-    if (SE.isLoopInvariant(S, L)) {
-      if (!L) break;
-      if (BasicBlock *Preheader = L->getLoopPreheader())
-        InsertPt = Preheader->getTerminator();
-      else {
-        // LSR sets the insertion point for AddRec start/step values to the
-        // block start to simplify value reuse, even though it's an invalid
-        // position. SCEVExpander must correct for this in all cases.
-        InsertPt = &*L->getHeader()->getFirstInsertionPt();
-      }
-    } else {
-      // We can move insertion point only if there is no div or rem operations
-      // otherwise we are risky to move it over the check for zero denominator.
-      auto SafeToHoist = [](const SCEV *S) {
-        return !SCEVExprContains(S, [](const SCEV *S) {
-                  if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
-                    if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
-                      // Division by non-zero constants can be hoisted.
-                      return SC->getValue()->isZero();
-                    // All other divisions should not be moved as they may be
-                    // divisions by zero and should be kept within the
-                    // conditions of the surrounding loops that guard their
-                    // execution (see PR35406).
-                    return true;
-                  }
-                  return false;
-                });
-      };
-      // If the SCEV is computable at this level, insert it into the header
-      // after the PHIs (and after any other instructions that we've inserted
-      // there) so that it is guaranteed to dominate any user inside the loop.
-      if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L) &&
-          SafeToHoist(S))
-        InsertPt = &*L->getHeader()->getFirstInsertionPt();
-      while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
-             (isInsertedInstruction(InsertPt) ||
-              isa<DbgInfoIntrinsic>(InsertPt))) {
-        InsertPt = &*std::next(InsertPt->getIterator());
+
+  // We can move insertion point only if there is no div or rem operations
+  // otherwise we are risky to move it over the check for zero denominator.
+  auto SafeToHoist = [](const SCEV *S) {
+    return !SCEVExprContains(S, [](const SCEV *S) {
+              if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
+                if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
+                  // Division by non-zero constants can be hoisted.
+                  return SC->getValue()->isZero();
+                // All other divisions should not be moved as they may be
+                // divisions by zero and should be kept within the
+                // conditions of the surrounding loops that guard their
+                // execution (see PR35406).
+                return true;
+              }
+              return false;
+            });
+  };
+  if (SafeToHoist(S)) {
+    for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
+         L = L->getParentLoop()) {
+      if (SE.isLoopInvariant(S, L)) {
+        if (!L) break;
+        if (BasicBlock *Preheader = L->getLoopPreheader())
+          InsertPt = Preheader->getTerminator();
+        else
+          // LSR sets the insertion point for AddRec start/step values to the
+          // block start to simplify value reuse, even though it's an invalid
+          // position. SCEVExpander must correct for this in all cases.
+          InsertPt = &*L->getHeader()->getFirstInsertionPt();
+      } else {
+        // If the SCEV is computable at this level, insert it into the header
+        // after the PHIs (and after any other instructions that we've inserted
+        // there) so that it is guaranteed to dominate any user inside the loop.
+        if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
+          InsertPt = &*L->getHeader()->getFirstInsertionPt();
+        while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
+               (isInsertedInstruction(InsertPt) ||
+                isa<DbgInfoIntrinsic>(InsertPt)))
+          InsertPt = &*std::next(InsertPt->getIterator());
+        break;
       }
-      break;
     }
+  }
+
+  // IndVarSimplify sometimes sets the insertion point at the block start, even
+  // when there are PHIs at that point.  We must correct for this.
+  if (isa<PHINode>(*InsertPt))
+    InsertPt = &*InsertPt->getParent()->getFirstInsertionPt();
 
   // Check to see if we already expanded this here.
   auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
@@ -2071,10 +2151,13 @@ bool SCEVExpander::isHighCostExpansionHelper(
 
   if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) {
     // If the divisor is a power of two and the SCEV type fits in a native
-    // integer, consider the division cheap irrespective of whether it occurs in
-    // the user code since it can be lowered into a right shift.
+    // integer (and the LHS not expensive), consider the division cheap
+    // irrespective of whether it occurs in the user code since it can be
+    // lowered into a right shift.
     if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS()))
       if (SC->getAPInt().isPowerOf2()) {
+        if (isHighCostExpansionHelper(UDivExpr->getLHS(), L, At, Processed))
+          return true;
         const DataLayout &DL =
             L->getHeader()->getParent()->getParent()->getDataLayout();
         unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth();
@@ -2102,7 +2185,7 @@ bool SCEVExpander::isHighCostExpansionHelper(
 
   // HowManyLessThans uses a Max expression whenever the loop is not guarded by
   // the exit condition.
-  if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
+  if (isa<SCEVMinMaxExpr>(S))
     return true;
 
   // Recurse past nary expressions, which commonly occur in the
@@ -2339,6 +2422,24 @@ bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) {
 
 bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint,
                       ScalarEvolution &SE) {
-  return isSafeToExpand(S, SE) && SE.dominates(S, InsertionPoint->getParent());
+  if (!isSafeToExpand(S, SE))
+    return false;
+  // We have to prove that the expanded site of S dominates InsertionPoint.
+  // This is easy when not in the same block, but hard when S is an instruction
+  // to be expanded somewhere inside the same block as our insertion point.
+  // What we really need here is something analogous to an OrderedBasicBlock,
+  // but for the moment, we paper over the problem by handling two common and
+  // cheap to check cases.
+  if (SE.properlyDominates(S, InsertionPoint->getParent()))
+    return true;
+  if (SE.dominates(S, InsertionPoint->getParent())) {
+    if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
+      return true;
+    if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
+      for (const Value *V : InsertionPoint->operand_values())
+        if (V == U->getValue())
+          return true;
+  }
+  return false;
 }
 }
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index 3740039b8f86..209ae66ca53e 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -1,9 +1,8 @@
 //===- ScalarEvolutionNormalization.cpp - See below -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/ScopedNoAliasAA.cpp b/lib/Analysis/ScopedNoAliasAA.cpp
index 9a581fe46afc..094e4a3d5dc8 100644
--- a/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/lib/Analysis/ScopedNoAliasAA.cpp
@@ -1,9 +1,8 @@
 //===- ScopedNoAliasAA.cpp - Scoped No-Alias Alias Analysis ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -76,9 +75,10 @@ public:
 } // end anonymous namespace
 
 AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA,
-                                         const MemoryLocation &LocB) {
+                                         const MemoryLocation &LocB,
+                                         AAQueryInfo &AAQI) {
   if (!EnableScopedNoAlias)
-    return AAResultBase::alias(LocA, LocB);
+    return AAResultBase::alias(LocA, LocB, AAQI);
 
   // Get the attached MDNodes.
   const MDNode *AScopes = LocA.AATags.Scope, *BScopes = LocB.AATags.Scope;
@@ -92,13 +92,14 @@ AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA,
     return NoAlias;
 
   // If they may alias, chain to the next AliasAnalysis.
-  return AAResultBase::alias(LocA, LocB);
+  return AAResultBase::alias(LocA, LocB, AAQI);
 }
 
 ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call,
-                                                const MemoryLocation &Loc) {
+                                                const MemoryLocation &Loc,
+                                                AAQueryInfo &AAQI) {
   if (!EnableScopedNoAlias)
-    return AAResultBase::getModRefInfo(Call, Loc);
+    return AAResultBase::getModRefInfo(Call, Loc, AAQI);
 
   if (!mayAliasInScopes(Loc.AATags.Scope,
                         Call->getMetadata(LLVMContext::MD_noalias)))
@@ -108,13 +109,14 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call,
                         Loc.AATags.NoAlias))
     return ModRefInfo::NoModRef;
 
-  return AAResultBase::getModRefInfo(Call, Loc);
+  return AAResultBase::getModRefInfo(Call, Loc, AAQI);
 }
 
 ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call1,
-                                                const CallBase *Call2) {
+                                                const CallBase *Call2,
+                                                AAQueryInfo &AAQI) {
   if (!EnableScopedNoAlias)
-    return AAResultBase::getModRefInfo(Call1, Call2);
+    return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
 
   if (!mayAliasInScopes(Call1->getMetadata(LLVMContext::MD_alias_scope),
                         Call2->getMetadata(LLVMContext::MD_noalias)))
@@ -124,7 +126,7 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call1,
                         Call1->getMetadata(LLVMContext::MD_noalias)))
     return ModRefInfo::NoModRef;
 
-  return AAResultBase::getModRefInfo(Call1, Call2);
+  return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
 }
 
 static void collectMDInDomain(const MDNode *List, const MDNode *Domain,
diff --git a/lib/Analysis/StackSafetyAnalysis.cpp b/lib/Analysis/StackSafetyAnalysis.cpp
index 66b03845864f..4cf235db86eb 100644
--- a/lib/Analysis/StackSafetyAnalysis.cpp
+++ b/lib/Analysis/StackSafetyAnalysis.cpp
@@ -1,9 +1,8 @@
 //===- StackSafetyAnalysis.cpp - Stack memory safety analysis -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -416,7 +415,9 @@ class StackSafetyDataFlowAnalysis {
       updateOneNode(F.first, F.second);
   }
   void runDataFlow();
+#ifndef NDEBUG
   void verifyFixedPoint();
+#endif
 
 public:
   StackSafetyDataFlowAnalysis(
@@ -527,11 +528,13 @@ void StackSafetyDataFlowAnalysis::runDataFlow() {
   }
 }
 
+#ifndef NDEBUG
 void StackSafetyDataFlowAnalysis::verifyFixedPoint() {
   WorkList.clear();
   updateAllNodes();
   assert(WorkList.empty());
 }
+#endif
 
 StackSafetyGlobalInfo StackSafetyDataFlowAnalysis::run() {
   runDataFlow();
diff --git a/lib/Analysis/StratifiedSets.h b/lib/Analysis/StratifiedSets.h
index 2f20cd12506c..60ea2451b0ef 100644
--- a/lib/Analysis/StratifiedSets.h
+++ b/lib/Analysis/StratifiedSets.h
@@ -1,9 +1,8 @@
 //===- StratifiedSets.h - Abstract stratified sets implementation. --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Analysis/SyncDependenceAnalysis.cpp b/lib/Analysis/SyncDependenceAnalysis.cpp
index e1a7e4476d12..3cf248a31142 100644
--- a/lib/Analysis/SyncDependenceAnalysis.cpp
+++ b/lib/Analysis/SyncDependenceAnalysis.cpp
@@ -1,10 +1,9 @@
 //===- SyncDependenceAnalysis.cpp - Divergent Branch Dependence Calculation
 //--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -219,14 +218,9 @@ struct DivergencePropagator {
   template <typename SuccessorIterable>
   std::unique_ptr<ConstBlockSet>
   computeJoinPoints(const BasicBlock &RootBlock,
-                    SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
+                    SuccessorIterable NodeSuccessors, const Loop *ParentLoop, const BasicBlock * PdBoundBlock) {
     assert(JoinBlocks);
 
-    // immediate post dominator (no join block beyond that block)
-    const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(&RootBlock));
-    const auto *IpdNode = PdNode->getIDom();
-    const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
-
     // bootstrap with branch targets
     for (const auto *SuccBlock : NodeSuccessors) {
       DefMap.emplace(SuccBlock, SuccBlock);
@@ -341,13 +335,23 @@ const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {
 
   // already available in cache?
   auto ItCached = CachedLoopExitJoins.find(&Loop);
-  if (ItCached != CachedLoopExitJoins.end())
+  if (ItCached != CachedLoopExitJoins.end()) {
     return *ItCached->second;
+  }
+
+  // dont propagte beyond the immediate post dom of the loop
+  const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Loop.getHeader()));
+  const auto *IpdNode = PdNode->getIDom();
+  const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
+  while (PdBoundBlock && Loop.contains(PdBoundBlock)) {
+    IpdNode = IpdNode->getIDom();
+    PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
+  }
 
   // compute all join points
   DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
   auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
-      *Loop.getHeader(), LoopExits, Loop.getParentLoop());
+      *Loop.getHeader(), LoopExits, Loop.getParentLoop(), PdBoundBlock);
 
   auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
   assert(ItInserted.second);
@@ -366,11 +370,16 @@ SyncDependenceAnalysis::join_blocks(const Instruction &Term) {
   if (ItCached != CachedBranchJoins.end())
     return *ItCached->second;
 
+  // dont propagate beyond the immediate post dominator of the branch
+  const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Term.getParent()));
+  const auto *IpdNode = PdNode->getIDom();
+  const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
+
   // compute all join points
   DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
   const auto &TermBlock = *Term.getParent();
   auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
-      TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
+      TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock), PdBoundBlock);
 
   auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
   assert(ItInserted.second);
diff --git a/lib/Analysis/SyntheticCountsUtils.cpp b/lib/Analysis/SyntheticCountsUtils.cpp
index c2d7bb11a4cf..22766e5f07f5 100644
--- a/lib/Analysis/SyntheticCountsUtils.cpp
+++ b/lib/Analysis/SyntheticCountsUtils.cpp
@@ -1,9 +1,8 @@
 //===--- SyntheticCountsUtils.cpp - synthetic counts propagation utils ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp
index 4643f75da42d..ef139d3257d2 100644
--- a/lib/Analysis/TargetLibraryInfo.cpp
+++ b/lib/Analysis/TargetLibraryInfo.cpp
@@ -1,9 +1,8 @@
 //===-- TargetLibraryInfo.cpp - Runtime library information ----------------==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -24,6 +23,8 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
                           "No vector functions library"),
                clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate",
                           "Accelerate framework"),
+               clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV",
+                          "IBM MASS vector library"),
                clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
                           "Intel SVML library")));
 
@@ -50,6 +51,16 @@ static bool hasSinCosPiStret(const Triple &T) {
   return true;
 }
 
+static bool hasBcmp(const Triple &TT) {
+  // Posix removed support from bcmp() in 2001, but the glibc and several
+  // implementations of the libc still have it.
+  if (TT.isOSLinux())
+    return TT.isGNUEnvironment() || TT.isMusl();
+  // Both NetBSD and OpenBSD are planning to remove the function. Windows does
+  // not have it.
+  return TT.isOSFreeBSD() || TT.isOSSolaris() || TT.isOSDarwin();
+}
+
 /// Initialize the set of available library functions based on the specified
 /// target triple. This should be carefully written so that a missing target
 /// triple gets a sane set of defaults.
@@ -78,8 +89,8 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
        ShouldSignExtI32Param = false;
   // PowerPC64, Sparc64, SystemZ need signext/zeroext on i32 parameters and
   // returns corresponding to C-level ints and unsigned ints.
-  if (T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le ||
-      T.getArch() == Triple::sparcv9 || T.getArch() == Triple::systemz) {
+  if (T.isPPC64() || T.getArch() == Triple::sparcv9 ||
+      T.getArch() == Triple::systemz) {
     ShouldExtI32Param = true;
     ShouldExtI32Return = true;
   }
@@ -142,6 +153,9 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
     TLI.setUnavailable(LibFunc_sincospif_stret);
   }
 
+  if (!hasBcmp(T))
+    TLI.setUnavailable(LibFunc_bcmp);
+
   if (T.isMacOSX() && T.getArch() == Triple::x86 &&
       !T.isMacOSXVersionLT(10, 7)) {
     // x86-32 OSX has a scheme where fwrite and fputs (and some other functions
@@ -153,33 +167,82 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
     TLI.setAvailableWithName(LibFunc_fputs, "fputs$UNIX2003");
   }
 
-  // iprintf and friends are only available on XCore and TCE.
-  if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) {
+  // iprintf and friends are only available on XCore, TCE, and Emscripten.
+  if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce &&
+      T.getOS() != Triple::Emscripten) {
     TLI.setUnavailable(LibFunc_iprintf);
     TLI.setUnavailable(LibFunc_siprintf);
     TLI.setUnavailable(LibFunc_fiprintf);
   }
 
+  // __small_printf and friends are only available on Emscripten.
+  if (T.getOS() != Triple::Emscripten) {
+    TLI.setUnavailable(LibFunc_small_printf);
+    TLI.setUnavailable(LibFunc_small_sprintf);
+    TLI.setUnavailable(LibFunc_small_fprintf);
+  }
+
   if (T.isOSWindows() && !T.isOSCygMing()) {
-    // Win32 does not support long double
+    // XXX: The earliest documentation available at the moment is for VS2015/VC19:
+    // https://docs.microsoft.com/en-us/cpp/c-runtime-library/floating-point-support?view=vs-2015
+    // XXX: In order to use an MSVCRT older than VC19,
+    // the specific library version must be explicit in the target triple,
+    // e.g., x86_64-pc-windows-msvc18.
+    bool hasPartialC99 = true;
+    if (T.isKnownWindowsMSVCEnvironment()) {
+      unsigned Major, Minor, Micro;
+      T.getEnvironmentVersion(Major, Minor, Micro);
+      hasPartialC99 = (Major == 0 || Major >= 19);
+    }
+
+    // Latest targets support C89 math functions, in part.
+    bool isARM = (T.getArch() == Triple::aarch64 ||
+                  T.getArch() == Triple::arm);
+    bool hasPartialFloat = (isARM ||
+                            T.getArch() == Triple::x86_64);
+
+    // Win32 does not support float C89 math functions, in general.
+    if (!hasPartialFloat) {
+      TLI.setUnavailable(LibFunc_acosf);
+      TLI.setUnavailable(LibFunc_asinf);
+      TLI.setUnavailable(LibFunc_atan2f);
+      TLI.setUnavailable(LibFunc_atanf);
+      TLI.setUnavailable(LibFunc_ceilf);
+      TLI.setUnavailable(LibFunc_cosf);
+      TLI.setUnavailable(LibFunc_coshf);
+      TLI.setUnavailable(LibFunc_expf);
+      TLI.setUnavailable(LibFunc_floorf);
+      TLI.setUnavailable(LibFunc_fmodf);
+      TLI.setUnavailable(LibFunc_log10f);
+      TLI.setUnavailable(LibFunc_logf);
+      TLI.setUnavailable(LibFunc_modff);
+      TLI.setUnavailable(LibFunc_powf);
+      TLI.setUnavailable(LibFunc_sinf);
+      TLI.setUnavailable(LibFunc_sinhf);
+      TLI.setUnavailable(LibFunc_sqrtf);
+      TLI.setUnavailable(LibFunc_tanf);
+      TLI.setUnavailable(LibFunc_tanhf);
+    }
+    if (!isARM)
+      TLI.setUnavailable(LibFunc_fabsf);
+    TLI.setUnavailable(LibFunc_frexpf);
+    TLI.setUnavailable(LibFunc_ldexpf);
+
+    // Win32 does not support long double C89 math functions.
     TLI.setUnavailable(LibFunc_acosl);
     TLI.setUnavailable(LibFunc_asinl);
-    TLI.setUnavailable(LibFunc_atanl);
     TLI.setUnavailable(LibFunc_atan2l);
+    TLI.setUnavailable(LibFunc_atanl);
     TLI.setUnavailable(LibFunc_ceill);
-    TLI.setUnavailable(LibFunc_copysignl);
     TLI.setUnavailable(LibFunc_cosl);
     TLI.setUnavailable(LibFunc_coshl);
     TLI.setUnavailable(LibFunc_expl);
-    TLI.setUnavailable(LibFunc_fabsf); // Win32 and Win64 both lack fabsf
     TLI.setUnavailable(LibFunc_fabsl);
     TLI.setUnavailable(LibFunc_floorl);
-    TLI.setUnavailable(LibFunc_fmaxl);
-    TLI.setUnavailable(LibFunc_fminl);
     TLI.setUnavailable(LibFunc_fmodl);
     TLI.setUnavailable(LibFunc_frexpl);
-    TLI.setUnavailable(LibFunc_ldexpf);
     TLI.setUnavailable(LibFunc_ldexpl);
+    TLI.setUnavailable(LibFunc_log10l);
     TLI.setUnavailable(LibFunc_logl);
     TLI.setUnavailable(LibFunc_modfl);
     TLI.setUnavailable(LibFunc_powl);
@@ -189,81 +252,66 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
     TLI.setUnavailable(LibFunc_tanl);
     TLI.setUnavailable(LibFunc_tanhl);
 
-    // Win32 only has C89 math
-    TLI.setUnavailable(LibFunc_acosh);
-    TLI.setUnavailable(LibFunc_acoshf);
+    // Win32 does not fully support C99 math functions.
+    if (!hasPartialC99) {
+      TLI.setUnavailable(LibFunc_acosh);
+      TLI.setUnavailable(LibFunc_acoshf);
+      TLI.setUnavailable(LibFunc_asinh);
+      TLI.setUnavailable(LibFunc_asinhf);
+      TLI.setUnavailable(LibFunc_atanh);
+      TLI.setUnavailable(LibFunc_atanhf);
+      TLI.setAvailableWithName(LibFunc_cabs, "_cabs");
+      TLI.setUnavailable(LibFunc_cabsf);
+      TLI.setUnavailable(LibFunc_cbrt);
+      TLI.setUnavailable(LibFunc_cbrtf);
+      TLI.setAvailableWithName(LibFunc_copysign, "_copysign");
+      TLI.setAvailableWithName(LibFunc_copysignf, "_copysignf");
+      TLI.setUnavailable(LibFunc_exp2);
+      TLI.setUnavailable(LibFunc_exp2f);
+      TLI.setUnavailable(LibFunc_expm1);
+      TLI.setUnavailable(LibFunc_expm1f);
+      TLI.setUnavailable(LibFunc_fmax);
+      TLI.setUnavailable(LibFunc_fmaxf);
+      TLI.setUnavailable(LibFunc_fmin);
+      TLI.setUnavailable(LibFunc_fminf);
+      TLI.setUnavailable(LibFunc_log1p);
+      TLI.setUnavailable(LibFunc_log1pf);
+      TLI.setUnavailable(LibFunc_log2);
+      TLI.setUnavailable(LibFunc_log2f);
+      TLI.setAvailableWithName(LibFunc_logb, "_logb");
+      if (hasPartialFloat)
+        TLI.setAvailableWithName(LibFunc_logbf, "_logbf");
+      else
+        TLI.setUnavailable(LibFunc_logbf);
+      TLI.setUnavailable(LibFunc_rint);
+      TLI.setUnavailable(LibFunc_rintf);
+      TLI.setUnavailable(LibFunc_round);
+      TLI.setUnavailable(LibFunc_roundf);
+      TLI.setUnavailable(LibFunc_trunc);
+      TLI.setUnavailable(LibFunc_truncf);
+    }
+
+    // Win32 does not support long double C99 math functions.
     TLI.setUnavailable(LibFunc_acoshl);
-    TLI.setUnavailable(LibFunc_asinh);
-    TLI.setUnavailable(LibFunc_asinhf);
     TLI.setUnavailable(LibFunc_asinhl);
-    TLI.setUnavailable(LibFunc_atanh);
-    TLI.setUnavailable(LibFunc_atanhf);
     TLI.setUnavailable(LibFunc_atanhl);
-    TLI.setUnavailable(LibFunc_cabs);
-    TLI.setUnavailable(LibFunc_cabsf);
     TLI.setUnavailable(LibFunc_cabsl);
-    TLI.setUnavailable(LibFunc_cbrt);
-    TLI.setUnavailable(LibFunc_cbrtf);
     TLI.setUnavailable(LibFunc_cbrtl);
-    TLI.setUnavailable(LibFunc_exp2);
-    TLI.setUnavailable(LibFunc_exp2f);
+    TLI.setUnavailable(LibFunc_copysignl);
     TLI.setUnavailable(LibFunc_exp2l);
-    TLI.setUnavailable(LibFunc_expm1);
-    TLI.setUnavailable(LibFunc_expm1f);
     TLI.setUnavailable(LibFunc_expm1l);
-    TLI.setUnavailable(LibFunc_log2);
-    TLI.setUnavailable(LibFunc_log2f);
-    TLI.setUnavailable(LibFunc_log2l);
-    TLI.setUnavailable(LibFunc_log1p);
-    TLI.setUnavailable(LibFunc_log1pf);
+    TLI.setUnavailable(LibFunc_fmaxl);
+    TLI.setUnavailable(LibFunc_fminl);
     TLI.setUnavailable(LibFunc_log1pl);
-    TLI.setUnavailable(LibFunc_logb);
-    TLI.setUnavailable(LibFunc_logbf);
+    TLI.setUnavailable(LibFunc_log2l);
     TLI.setUnavailable(LibFunc_logbl);
-    TLI.setUnavailable(LibFunc_nearbyint);
-    TLI.setUnavailable(LibFunc_nearbyintf);
     TLI.setUnavailable(LibFunc_nearbyintl);
-    TLI.setUnavailable(LibFunc_rint);
-    TLI.setUnavailable(LibFunc_rintf);
     TLI.setUnavailable(LibFunc_rintl);
-    TLI.setUnavailable(LibFunc_round);
-    TLI.setUnavailable(LibFunc_roundf);
     TLI.setUnavailable(LibFunc_roundl);
-    TLI.setUnavailable(LibFunc_trunc);
-    TLI.setUnavailable(LibFunc_truncf);
     TLI.setUnavailable(LibFunc_truncl);
 
-    // Win32 provides some C99 math with mangled names
-    TLI.setAvailableWithName(LibFunc_copysign, "_copysign");
-
-    if (T.getArch() == Triple::x86) {
-      // Win32 on x86 implements single-precision math functions as macros
-      TLI.setUnavailable(LibFunc_acosf);
-      TLI.setUnavailable(LibFunc_asinf);
-      TLI.setUnavailable(LibFunc_atanf);
-      TLI.setUnavailable(LibFunc_atan2f);
-      TLI.setUnavailable(LibFunc_ceilf);
-      TLI.setUnavailable(LibFunc_copysignf);
-      TLI.setUnavailable(LibFunc_cosf);
-      TLI.setUnavailable(LibFunc_coshf);
-      TLI.setUnavailable(LibFunc_expf);
-      TLI.setUnavailable(LibFunc_floorf);
-      TLI.setUnavailable(LibFunc_fminf);
-      TLI.setUnavailable(LibFunc_fmaxf);
-      TLI.setUnavailable(LibFunc_fmodf);
-      TLI.setUnavailable(LibFunc_logf);
-      TLI.setUnavailable(LibFunc_log10f);
-      TLI.setUnavailable(LibFunc_modff);
-      TLI.setUnavailable(LibFunc_powf);
-      TLI.setUnavailable(LibFunc_sinf);
-      TLI.setUnavailable(LibFunc_sinhf);
-      TLI.setUnavailable(LibFunc_sqrtf);
-      TLI.setUnavailable(LibFunc_tanf);
-      TLI.setUnavailable(LibFunc_tanhf);
-    }
-
-    // Win32 does *not* provide these functions, but they are
-    // generally available on POSIX-compliant systems:
+    // Win32 does not support these functions, but
+    // they are generally available on POSIX-compliant systems.
     TLI.setUnavailable(LibFunc_access);
     TLI.setUnavailable(LibFunc_bcmp);
     TLI.setUnavailable(LibFunc_bcopy);
@@ -318,12 +366,6 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
     TLI.setUnavailable(LibFunc_utime);
     TLI.setUnavailable(LibFunc_utimes);
     TLI.setUnavailable(LibFunc_write);
-
-    // Win32 does *not* provide provide these functions, but they are
-    // specified by C99:
-    TLI.setUnavailable(LibFunc_atoll);
-    TLI.setUnavailable(LibFunc_frexpf);
-    TLI.setUnavailable(LibFunc_llabs);
   }
 
   switch (T.getOS()) {
@@ -651,11 +693,21 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
     return ((NumParams == 2 || NumParams == 3) &&
             FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
+  case LibFunc_strcat_chk:
+    --NumParams;
+    if (!IsSizeTTy(FTy.getParamType(NumParams)))
+      return false;
+    LLVM_FALLTHROUGH;
   case LibFunc_strcat:
     return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
             FTy.getParamType(0) == FTy.getReturnType() &&
             FTy.getParamType(1) == FTy.getReturnType());
 
+  case LibFunc_strncat_chk:
+    --NumParams;
+    if (!IsSizeTTy(FTy.getParamType(NumParams)))
+      return false;
+    LLVM_FALLTHROUGH;
   case LibFunc_strncat:
     return (NumParams == 3 && FTy.getReturnType()->isPointerTy() &&
             FTy.getParamType(0) == FTy.getReturnType() &&
@@ -674,6 +726,19 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
             FTy.getParamType(0) == FTy.getParamType(1) &&
             FTy.getParamType(0) == PCharTy);
 
+  case LibFunc_strlcat_chk:
+  case LibFunc_strlcpy_chk:
+    --NumParams;
+    if (!IsSizeTTy(FTy.getParamType(NumParams)))
+      return false;
+    LLVM_FALLTHROUGH;
+  case LibFunc_strlcat:
+  case LibFunc_strlcpy:
+    return NumParams == 3 && IsSizeTTy(FTy.getReturnType()) &&
+           FTy.getParamType(0)->isPointerTy() &&
+           FTy.getParamType(1)->isPointerTy() &&
+           IsSizeTTy(FTy.getParamType(2));
+
   case LibFunc_strncpy_chk:
   case LibFunc_stpncpy_chk:
     --NumParams;
@@ -739,14 +804,32 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
   case LibFunc_stat:
   case LibFunc_statvfs:
   case LibFunc_siprintf:
+  case LibFunc_small_sprintf:
   case LibFunc_sprintf:
     return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy() &&
             FTy.getReturnType()->isIntegerTy(32));
+
+  case LibFunc_sprintf_chk:
+    return NumParams == 4 && FTy.getParamType(0)->isPointerTy() &&
+           FTy.getParamType(1)->isIntegerTy(32) &&
+           IsSizeTTy(FTy.getParamType(2)) &&
+           FTy.getParamType(3)->isPointerTy() &&
+           FTy.getReturnType()->isIntegerTy(32);
+
   case LibFunc_snprintf:
     return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(2)->isPointerTy() &&
             FTy.getReturnType()->isIntegerTy(32));
+
+  case LibFunc_snprintf_chk:
+    return NumParams == 5 && FTy.getParamType(0)->isPointerTy() &&
+           IsSizeTTy(FTy.getParamType(1)) &&
+           FTy.getParamType(2)->isIntegerTy(32) &&
+           IsSizeTTy(FTy.getParamType(3)) &&
+           FTy.getParamType(4)->isPointerTy() &&
+           FTy.getReturnType()->isIntegerTy(32);
+
   case LibFunc_setitimer:
     return (NumParams == 3 && FTy.getParamType(1)->isPointerTy() &&
             FTy.getParamType(2)->isPointerTy());
@@ -795,6 +878,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
             FTy.getParamType(1)->isIntegerTy() &&
             IsSizeTTy(FTy.getParamType(2)));
 
+  case LibFunc_memccpy_chk:
+      --NumParams;
+    if (!IsSizeTTy(FTy.getParamType(NumParams)))
+      return false;
+    LLVM_FALLTHROUGH;
   case LibFunc_memccpy:
     return (NumParams >= 2 && FTy.getParamType(1)->isPointerTy());
   case LibFunc_memalign:
@@ -836,6 +924,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
   case LibFunc_getenv:
   case LibFunc_getpwnam:
   case LibFunc_iprintf:
+  case LibFunc_small_printf:
   case LibFunc_pclose:
   case LibFunc_perror:
   case LibFunc_printf:
@@ -915,6 +1004,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
             FTy.getParamType(1)->isPointerTy());
   case LibFunc_fscanf:
   case LibFunc_fiprintf:
+  case LibFunc_small_fprintf:
   case LibFunc_fprintf:
     return (NumParams >= 2 && FTy.getReturnType()->isIntegerTy() &&
             FTy.getParamType(0)->isPointerTy() &&
@@ -961,9 +1051,17 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
   case LibFunc_vsprintf:
     return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
+  case LibFunc_vsprintf_chk:
+    return NumParams == 5 && FTy.getParamType(0)->isPointerTy() &&
+           FTy.getParamType(1)->isIntegerTy(32) &&
+           IsSizeTTy(FTy.getParamType(2)) && FTy.getParamType(3)->isPointerTy();
   case LibFunc_vsnprintf:
     return (NumParams == 4 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(2)->isPointerTy());
+  case LibFunc_vsnprintf_chk:
+    return NumParams == 6 && FTy.getParamType(0)->isPointerTy() &&
+           FTy.getParamType(2)->isIntegerTy(32) &&
+           IsSizeTTy(FTy.getParamType(3)) && FTy.getParamType(4)->isPointerTy();
   case LibFunc_open:
     return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy());
   case LibFunc_opendir:
@@ -1391,6 +1489,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
 
 bool TargetLibraryInfoImpl::getLibFunc(const Function &FDecl,
                                        LibFunc &F) const {
+  // Intrinsics don't overlap w/libcalls; if our module has a large number of
+  // intrinsics, this ends up being an interesting compile time win since we
+  // avoid string normalization and comparison. 
+  if (FDecl.isIntrinsic()) return false;
+  
   const DataLayout *DL =
       FDecl.getParent() ? &FDecl.getParent()->getDataLayout() : nullptr;
   return getLibFunc(FDecl.getName(), F) &&
@@ -1430,151 +1533,24 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
   switch (VecLib) {
   case Accelerate: {
     const VecDesc VecFuncs[] = {
-        // Floating-Point Arithmetic and Auxiliary Functions
-        {"ceilf", "vceilf", 4},
-        {"fabsf", "vfabsf", 4},
-        {"llvm.fabs.f32", "vfabsf", 4},
-        {"floorf", "vfloorf", 4},
-        {"sqrtf", "vsqrtf", 4},
-        {"llvm.sqrt.f32", "vsqrtf", 4},
-
-        // Exponential and Logarithmic Functions
-        {"expf", "vexpf", 4},
-        {"llvm.exp.f32", "vexpf", 4},
-        {"expm1f", "vexpm1f", 4},
-        {"logf", "vlogf", 4},
-        {"llvm.log.f32", "vlogf", 4},
-        {"log1pf", "vlog1pf", 4},
-        {"log10f", "vlog10f", 4},
-        {"llvm.log10.f32", "vlog10f", 4},
-        {"logbf", "vlogbf", 4},
-
-        // Trigonometric Functions
-        {"sinf", "vsinf", 4},
-        {"llvm.sin.f32", "vsinf", 4},
-        {"cosf", "vcosf", 4},
-        {"llvm.cos.f32", "vcosf", 4},
-        {"tanf", "vtanf", 4},
-        {"asinf", "vasinf", 4},
-        {"acosf", "vacosf", 4},
-        {"atanf", "vatanf", 4},
-
-        // Hyperbolic Functions
-        {"sinhf", "vsinhf", 4},
-        {"coshf", "vcoshf", 4},
-        {"tanhf", "vtanhf", 4},
-        {"asinhf", "vasinhf", 4},
-        {"acoshf", "vacoshf", 4},
-        {"atanhf", "vatanhf", 4},
+    #define TLI_DEFINE_ACCELERATE_VECFUNCS
+    #include "llvm/Analysis/VecFuncs.def"
+    };
+    addVectorizableFunctions(VecFuncs);
+    break;
+  }
+  case MASSV: {
+    const VecDesc VecFuncs[] = {
+    #define TLI_DEFINE_MASSV_VECFUNCS
+    #include "llvm/Analysis/VecFuncs.def"
     };
     addVectorizableFunctions(VecFuncs);
     break;
   }
   case SVML: {
     const VecDesc VecFuncs[] = {
-        {"sin", "__svml_sin2", 2},
-        {"sin", "__svml_sin4", 4},
-        {"sin", "__svml_sin8", 8},
-
-        {"sinf", "__svml_sinf4", 4},
-        {"sinf", "__svml_sinf8", 8},
-        {"sinf", "__svml_sinf16", 16},
-
-        {"llvm.sin.f64", "__svml_sin2", 2},
-        {"llvm.sin.f64", "__svml_sin4", 4},
-        {"llvm.sin.f64", "__svml_sin8", 8},
-
-        {"llvm.sin.f32", "__svml_sinf4", 4},
-        {"llvm.sin.f32", "__svml_sinf8", 8},
-        {"llvm.sin.f32", "__svml_sinf16", 16},
-
-        {"cos", "__svml_cos2", 2},
-        {"cos", "__svml_cos4", 4},
-        {"cos", "__svml_cos8", 8},
-
-        {"cosf", "__svml_cosf4", 4},
-        {"cosf", "__svml_cosf8", 8},
-        {"cosf", "__svml_cosf16", 16},
-
-        {"llvm.cos.f64", "__svml_cos2", 2},
-        {"llvm.cos.f64", "__svml_cos4", 4},
-        {"llvm.cos.f64", "__svml_cos8", 8},
-
-        {"llvm.cos.f32", "__svml_cosf4", 4},
-        {"llvm.cos.f32", "__svml_cosf8", 8},
-        {"llvm.cos.f32", "__svml_cosf16", 16},
-
-        {"pow", "__svml_pow2", 2},
-        {"pow", "__svml_pow4", 4},
-        {"pow", "__svml_pow8", 8},
-
-        {"powf", "__svml_powf4", 4},
-        {"powf", "__svml_powf8", 8},
-        {"powf", "__svml_powf16", 16},
-
-        { "__pow_finite", "__svml_pow2", 2 },
-        { "__pow_finite", "__svml_pow4", 4 },
-        { "__pow_finite", "__svml_pow8", 8 },
-
-        { "__powf_finite", "__svml_powf4", 4 },
-        { "__powf_finite", "__svml_powf8", 8 },
-        { "__powf_finite", "__svml_powf16", 16 },
-
-        {"llvm.pow.f64", "__svml_pow2", 2},
-        {"llvm.pow.f64", "__svml_pow4", 4},
-        {"llvm.pow.f64", "__svml_pow8", 8},
-
-        {"llvm.pow.f32", "__svml_powf4", 4},
-        {"llvm.pow.f32", "__svml_powf8", 8},
-        {"llvm.pow.f32", "__svml_powf16", 16},
-
-        {"exp", "__svml_exp2", 2},
-        {"exp", "__svml_exp4", 4},
-        {"exp", "__svml_exp8", 8},
-
-        {"expf", "__svml_expf4", 4},
-        {"expf", "__svml_expf8", 8},
-        {"expf", "__svml_expf16", 16},
-
-        { "__exp_finite", "__svml_exp2", 2 },
-        { "__exp_finite", "__svml_exp4", 4 },
-        { "__exp_finite", "__svml_exp8", 8 },
-
-        { "__expf_finite", "__svml_expf4", 4 },
-        { "__expf_finite", "__svml_expf8", 8 },
-        { "__expf_finite", "__svml_expf16", 16 },
-
-        {"llvm.exp.f64", "__svml_exp2", 2},
-        {"llvm.exp.f64", "__svml_exp4", 4},
-        {"llvm.exp.f64", "__svml_exp8", 8},
-
-        {"llvm.exp.f32", "__svml_expf4", 4},
-        {"llvm.exp.f32", "__svml_expf8", 8},
-        {"llvm.exp.f32", "__svml_expf16", 16},
-
-        {"log", "__svml_log2", 2},
-        {"log", "__svml_log4", 4},
-        {"log", "__svml_log8", 8},
-
-        {"logf", "__svml_logf4", 4},
-        {"logf", "__svml_logf8", 8},
-        {"logf", "__svml_logf16", 16},
-
-        { "__log_finite", "__svml_log2", 2 },
-        { "__log_finite", "__svml_log4", 4 },
-        { "__log_finite", "__svml_log8", 8 },
-
-        { "__logf_finite", "__svml_logf4", 4 },
-        { "__logf_finite", "__svml_logf8", 8 },
-        { "__logf_finite", "__svml_logf16", 16 },
-
-        {"llvm.log.f64", "__svml_log2", 2},
-        {"llvm.log.f64", "__svml_log4", 4},
-        {"llvm.log.f64", "__svml_log8", 8},
-
-        {"llvm.log.f32", "__svml_logf4", 4},
-        {"llvm.log.f32", "__svml_logf8", 8},
-        {"llvm.log.f32", "__svml_logf16", 16},
+    #define TLI_DEFINE_SVML_VECFUNCS
+    #include "llvm/Analysis/VecFuncs.def"
     };
     addVectorizableFunctions(VecFuncs);
     break;
@@ -1589,9 +1565,8 @@ bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
   if (funcName.empty())
     return false;
 
-  std::vector<VecDesc>::const_iterator I = std::lower_bound(
-      VectorDescs.begin(), VectorDescs.end(), funcName,
-      compareWithScalarFnName);
+  std::vector<VecDesc>::const_iterator I =
+      llvm::lower_bound(VectorDescs, funcName, compareWithScalarFnName);
   return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName;
 }
 
@@ -1600,8 +1575,8 @@ StringRef TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
   F = sanitizeFunctionName(F);
   if (F.empty())
     return F;
-  std::vector<VecDesc>::const_iterator I = std::lower_bound(
-      VectorDescs.begin(), VectorDescs.end(), F, compareWithScalarFnName);
+  std::vector<VecDesc>::const_iterator I =
+      llvm::lower_bound(VectorDescs, F, compareWithScalarFnName);
   while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) {
     if (I->VectorizationFactor == VF)
       return I->VectorFnName;
@@ -1616,8 +1591,8 @@ StringRef TargetLibraryInfoImpl::getScalarizedFunction(StringRef F,
   if (F.empty())
     return F;
 
-  std::vector<VecDesc>::const_iterator I = std::lower_bound(
-      ScalarDescs.begin(), ScalarDescs.end(), F, compareWithVectorFnName);
+  std::vector<VecDesc>::const_iterator I =
+      llvm::lower_bound(ScalarDescs, F, compareWithVectorFnName);
   if (I == VectorDescs.end() || StringRef(I->VectorFnName) != F)
     return StringRef();
   VF = I->VectorizationFactor;
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 9151d46c6cce..eb04c34453fb 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -19,6 +18,8 @@
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LoopIterator.h"
 #include <utility>
 
 using namespace llvm;
@@ -41,6 +42,101 @@ struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
 };
 }
 
+bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) {
+  // If the loop has irreducible control flow, it can not be converted to
+  // Hardware loop.
+  LoopBlocksRPO RPOT(L);  
+  RPOT.perform(&LI);
+  if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
+    return false;
+  return true;
+}
+
+bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
+                                               LoopInfo &LI, DominatorTree &DT,
+                                               bool ForceNestedLoop,
+                                               bool ForceHardwareLoopPHI) {
+  SmallVector<BasicBlock *, 4> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+
+  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
+                                               IE = ExitingBlocks.end();
+       I != IE; ++I) {
+    BasicBlock *BB = *I;
+
+    // If we pass the updated counter back through a phi, we need to know
+    // which latch the updated value will be coming from.
+    if (!L->isLoopLatch(BB)) {
+      if (ForceHardwareLoopPHI || CounterInReg)
+        continue;
+    }
+
+    const SCEV *EC = SE.getExitCount(L, BB);
+    if (isa<SCEVCouldNotCompute>(EC))
+      continue;
+    if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
+      if (ConstEC->getValue()->isZero())
+        continue;
+    } else if (!SE.isLoopInvariant(EC, L))
+      continue;
+
+    if (SE.getTypeSizeInBits(EC->getType()) > CountType->getBitWidth())
+      continue;
+
+    // If this exiting block is contained in a nested loop, it is not eligible
+    // for insertion of the branch-and-decrement since the inner loop would
+    // end up messing up the value in the CTR.
+    if (!IsNestingLegal && LI.getLoopFor(BB) != L && !ForceNestedLoop)
+      continue;
+
+    // We now have a loop-invariant count of loop iterations (which is not the
+    // constant zero) for which we know that this loop will not exit via this
+    // existing block.
+
+    // We need to make sure that this block will run on every loop iteration.
+    // For this to be true, we must dominate all blocks with backedges. Such
+    // blocks are in-loop predecessors to the header block.
+    bool NotAlways = false;
+    for (pred_iterator PI = pred_begin(L->getHeader()),
+                       PIE = pred_end(L->getHeader());
+         PI != PIE; ++PI) {
+      if (!L->contains(*PI))
+        continue;
+
+      if (!DT.dominates(*I, *PI)) {
+        NotAlways = true;
+        break;
+      }
+    }
+
+    if (NotAlways)
+      continue;
+
+    // Make sure this blocks ends with a conditional branch.
+    Instruction *TI = BB->getTerminator();
+    if (!TI)
+      continue;
+
+    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+      if (!BI->isConditional())
+        continue;
+
+      ExitBranch = BI;
+    } else
+      continue;
+
+    // Note that this block may not be the loop latch block, even if the loop
+    // has a latch block.
+    ExitBlock = *I;
+    ExitCount = EC;
+    break;
+  }
+
+  if (!ExitBlock)
+    return false;
+  return true;
+}
+
 TargetTransformInfo::TargetTransformInfo(const DataLayout &DL)
     : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
 
@@ -61,15 +157,17 @@ int TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty,
   return Cost;
 }
 
-int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs) const {
-  int Cost = TTIImpl->getCallCost(FTy, NumArgs);
+int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs,
+                                     const User *U) const {
+  int Cost = TTIImpl->getCallCost(FTy, NumArgs, U);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }
 
 int TargetTransformInfo::getCallCost(const Function *F,
-                                     ArrayRef<const Value *> Arguments) const {
-  int Cost = TTIImpl->getCallCost(F, Arguments);
+                                     ArrayRef<const Value *> Arguments,
+                                     const User *U) const {
+  int Cost = TTIImpl->getCallCost(F, Arguments, U);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }
@@ -78,6 +176,10 @@ unsigned TargetTransformInfo::getInliningThresholdMultiplier() const {
   return TTIImpl->getInliningThresholdMultiplier();
 }
 
+int TargetTransformInfo::getInlinerVectorBonusPercent() const {
+  return TTIImpl->getInlinerVectorBonusPercent();
+}
+
 int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
                                     ArrayRef<const Value *> Operands) const {
   return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
@@ -89,8 +191,9 @@ int TargetTransformInfo::getExtCost(const Instruction *I,
 }
 
 int TargetTransformInfo::getIntrinsicCost(
-    Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const {
-  int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
+    Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments,
+    const User *U) const {
+  int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }
@@ -128,6 +231,12 @@ bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
   return TTIImpl->isLoweredToCall(F);
 }
 
+bool TargetTransformInfo::isHardwareLoopProfitable(
+  Loop *L, ScalarEvolution &SE, AssumptionCache &AC,
+  TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const {
+  return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
+}
+
 void TargetTransformInfo::getUnrollingPreferences(
     Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
   return TTIImpl->getUnrollingPreferences(L, SE, UP);
@@ -159,10 +268,21 @@ bool TargetTransformInfo::canMacroFuseCmp() const {
   return TTIImpl->canMacroFuseCmp();
 }
 
+bool TargetTransformInfo::canSaveCmp(Loop *L, BranchInst **BI,
+                                     ScalarEvolution *SE, LoopInfo *LI,
+                                     DominatorTree *DT, AssumptionCache *AC,
+                                     TargetLibraryInfo *LibInfo) const {
+  return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
+}
+
 bool TargetTransformInfo::shouldFavorPostInc() const {
   return TTIImpl->shouldFavorPostInc();
 }
 
+bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const {
+  return TTIImpl->shouldFavorBackedgeIndex(L);
+}
+
 bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const {
   return TTIImpl->isLegalMaskedStore(DataType);
 }
@@ -171,6 +291,16 @@ bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const {
   return TTIImpl->isLegalMaskedLoad(DataType);
 }
 
+bool TargetTransformInfo::isLegalNTStore(Type *DataType,
+                                         unsigned Alignment) const {
+  return TTIImpl->isLegalNTStore(DataType, Alignment);
+}
+
+bool TargetTransformInfo::isLegalNTLoad(Type *DataType,
+                                        unsigned Alignment) const {
+  return TTIImpl->isLegalNTLoad(DataType, Alignment);
+}
+
 bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const {
   return TTIImpl->isLegalMaskedGather(DataType);
 }
@@ -179,6 +309,14 @@ bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const {
   return TTIImpl->isLegalMaskedScatter(DataType);
 }
 
+bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const {
+  return TTIImpl->isLegalMaskedCompressStore(DataType);
+}
+
+bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {
+  return TTIImpl->isLegalMaskedExpandLoad(DataType);
+}
+
 bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
   return TTIImpl->hasDivRemOp(DataType, IsSigned);
 }
@@ -259,9 +397,9 @@ bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) c
   return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
 }
 
-const TargetTransformInfo::MemCmpExpansionOptions *
-TargetTransformInfo::enableMemCmpExpansion(bool IsZeroCmp) const {
-  return TTIImpl->enableMemCmpExpansion(IsZeroCmp);
+TargetTransformInfo::MemCmpExpansionOptions
+TargetTransformInfo::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+  return TTIImpl->enableMemCmpExpansion(OptSize, IsZeroCmp);
 }
 
 bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
@@ -570,6 +708,12 @@ int TargetTransformInfo::getAddressComputationCost(Type *Tp,
   return Cost;
 }
 
+int TargetTransformInfo::getMemcpyCost(const Instruction *I) const {
+  int Cost = TTIImpl->getMemcpyCost(I);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
 int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
                                                     bool IsPairwiseForm) const {
   int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
@@ -688,6 +832,10 @@ bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
   return TTIImpl->shouldExpandReduction(II);
 }
 
+unsigned TargetTransformInfo::getGISelRematGlobalCost() const {
+  return TTIImpl->getGISelRematGlobalCost();
+}
+
 int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
   return TTIImpl->getInstructionLatency(I);
 }
@@ -1023,6 +1171,16 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
     return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
                                   Op1VP, Op2VP, Operands);
   }
+  case Instruction::FNeg: {
+    TargetTransformInfo::OperandValueKind Op1VK, Op2VK;
+    TargetTransformInfo::OperandValueProperties Op1VP, Op2VP;
+    Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
+    Op2VK = OK_AnyValue;
+    Op2VP = OP_None;
+    SmallVector<const Value *, 2> Operands(I->operand_values());
+    return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
+                                  Op1VP, Op2VP, Operands);
+  }
   case Instruction::Select: {
     const SelectInst *SI = cast<SelectInst>(I);
     Type *CondTy = SI->getCondition()->getType();
diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp
index 4dec53151ed6..879c7172d038 100644
--- a/lib/Analysis/Trace.cpp
+++ b/lib/Analysis/Trace.cpp
@@ -1,9 +1,8 @@
 //===- Trace.cpp - Implementation of Trace class --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 83974da30a54..3b9040aa0f52 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -1,9 +1,8 @@
 //===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -368,26 +367,28 @@ static bool isStructPathTBAA(const MDNode *MD) {
 }
 
 AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA,
-                                     const MemoryLocation &LocB) {
+                                     const MemoryLocation &LocB,
+                                     AAQueryInfo &AAQI) {
   if (!EnableTBAA)
-    return AAResultBase::alias(LocA, LocB);
+    return AAResultBase::alias(LocA, LocB, AAQI);
 
   // If accesses may alias, chain to the next AliasAnalysis.
   if (Aliases(LocA.AATags.TBAA, LocB.AATags.TBAA))
-    return AAResultBase::alias(LocA, LocB);
+    return AAResultBase::alias(LocA, LocB, AAQI);
 
   // Otherwise return a definitive result.
   return NoAlias;
 }
 
 bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
+                                               AAQueryInfo &AAQI,
                                                bool OrLocal) {
   if (!EnableTBAA)
-    return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+    return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
 
   const MDNode *M = Loc.AATags.TBAA;
   if (!M)
-    return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+    return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
 
   // If this is an "immutable" type, we can assume the pointer is pointing
   // to constant memory.
@@ -395,7 +396,7 @@ bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
       (isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable()))
     return true;
 
-  return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+  return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
 }
 
 FunctionModRefBehavior
@@ -421,29 +422,31 @@ FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) {
 }
 
 ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call,
-                                            const MemoryLocation &Loc) {
+                                            const MemoryLocation &Loc,
+                                            AAQueryInfo &AAQI) {
   if (!EnableTBAA)
-    return AAResultBase::getModRefInfo(Call, Loc);
+    return AAResultBase::getModRefInfo(Call, Loc, AAQI);
 
   if (const MDNode *L = Loc.AATags.TBAA)
     if (const MDNode *M = Call->getMetadata(LLVMContext::MD_tbaa))
       if (!Aliases(L, M))
         return ModRefInfo::NoModRef;
 
-  return AAResultBase::getModRefInfo(Call, Loc);
+  return AAResultBase::getModRefInfo(Call, Loc, AAQI);
 }
 
 ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call1,
-                                            const CallBase *Call2) {
+                                            const CallBase *Call2,
+                                            AAQueryInfo &AAQI) {
   if (!EnableTBAA)
-    return AAResultBase::getModRefInfo(Call1, Call2);
+    return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
 
   if (const MDNode *M1 = Call1->getMetadata(LLVMContext::MD_tbaa))
     if (const MDNode *M2 = Call2->getMetadata(LLVMContext::MD_tbaa))
       if (!Aliases(M1, M2))
         return ModRefInfo::NoModRef;
 
-  return AAResultBase::getModRefInfo(Call1, Call2);
+  return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
 }
 
 bool MDNode::isTBAAVtableAccess() const {
diff --git a/lib/Analysis/TypeMetadataUtils.cpp b/lib/Analysis/TypeMetadataUtils.cpp
index bd13a43b8d46..9311dfbc6eba 100644
--- a/lib/Analysis/TypeMetadataUtils.cpp
+++ b/lib/Analysis/TypeMetadataUtils.cpp
@@ -1,9 +1,8 @@
 //===- TypeMetadataUtils.cpp - Utilities related to type metadata ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/ValueLattice.cpp b/lib/Analysis/ValueLattice.cpp
index 7de437ca480e..a0115a0eec36 100644
--- a/lib/Analysis/ValueLattice.cpp
+++ b/lib/Analysis/ValueLattice.cpp
@@ -1,9 +1,8 @@
 //===- ValueLattice.cpp - Value constraint analysis -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Analysis/ValueLatticeUtils.cpp b/lib/Analysis/ValueLatticeUtils.cpp
index 22c9de4fe94d..3f9287e26ce7 100644
--- a/lib/Analysis/ValueLatticeUtils.cpp
+++ b/lib/Analysis/ValueLatticeUtils.cpp
@@ -1,9 +1,8 @@
 //===-- ValueLatticeUtils.cpp - Utils for solving lattices ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 0446426c0e66..c70906dcc629 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -1,9 +1,8 @@
 //===- ValueTracking.cpp - Walk computations to compute properties --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -39,7 +38,6 @@
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Dominators.h"
@@ -617,237 +615,242 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
     if (Depth == MaxDepth)
       continue;
 
+    ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
+    if (!Cmp)
+      continue;
+
     Value *A, *B;
-    auto m_V = m_CombineOr(m_Specific(V),
-                           m_CombineOr(m_PtrToInt(m_Specific(V)),
-                           m_BitCast(m_Specific(V))));
+    auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
 
     CmpInst::Predicate Pred;
     uint64_t C;
-    // assume(v = a)
-    if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) &&
-        Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      KnownBits RHSKnown(BitWidth);
-      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-      Known.Zero |= RHSKnown.Zero;
-      Known.One  |= RHSKnown.One;
-    // assume(v & b = a)
-    } else if (match(Arg,
-                     m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) &&
-               Pred == ICmpInst::ICMP_EQ &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      KnownBits RHSKnown(BitWidth);
-      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-      KnownBits MaskKnown(BitWidth);
-      computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
-
-      // For those bits in the mask that are known to be one, we can propagate
-      // known bits from the RHS to V.
-      Known.Zero |= RHSKnown.Zero & MaskKnown.One;
-      Known.One  |= RHSKnown.One  & MaskKnown.One;
-    // assume(~(v & b) = a)
-    } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
-                                   m_Value(A))) &&
-               Pred == ICmpInst::ICMP_EQ &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      KnownBits RHSKnown(BitWidth);
-      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-      KnownBits MaskKnown(BitWidth);
-      computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
-
-      // For those bits in the mask that are known to be one, we can propagate
-      // inverted known bits from the RHS to V.
-      Known.Zero |= RHSKnown.One  & MaskKnown.One;
-      Known.One  |= RHSKnown.Zero & MaskKnown.One;
-    // assume(v | b = a)
-    } else if (match(Arg,
-                     m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) &&
-               Pred == ICmpInst::ICMP_EQ &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      KnownBits RHSKnown(BitWidth);
-      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-      KnownBits BKnown(BitWidth);
-      computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
-
-      // For those bits in B that are known to be zero, we can propagate known
-      // bits from the RHS to V.
-      Known.Zero |= RHSKnown.Zero & BKnown.Zero;
-      Known.One  |= RHSKnown.One  & BKnown.Zero;
-    // assume(~(v | b) = a)
-    } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
-                                   m_Value(A))) &&
-               Pred == ICmpInst::ICMP_EQ &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      KnownBits RHSKnown(BitWidth);
-      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-      KnownBits BKnown(BitWidth);
-      computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
-
-      // For those bits in B that are known to be zero, we can propagate
-      // inverted known bits from the RHS to V.
-      Known.Zero |= RHSKnown.One  & BKnown.Zero;
-      Known.One  |= RHSKnown.Zero & BKnown.Zero;
-    // assume(v ^ b = a)
-    } else if (match(Arg,
-                     m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) &&
-               Pred == ICmpInst::ICMP_EQ &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      KnownBits RHSKnown(BitWidth);
-      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-      KnownBits BKnown(BitWidth);
-      computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
-
-      // For those bits in B that are known to be zero, we can propagate known
-      // bits from the RHS to V. For those bits in B that are known to be one,
-      // we can propagate inverted known bits from the RHS to V.
-      Known.Zero |= RHSKnown.Zero & BKnown.Zero;
-      Known.One  |= RHSKnown.One  & BKnown.Zero;
-      Known.Zero |= RHSKnown.One  & BKnown.One;
-      Known.One  |= RHSKnown.Zero & BKnown.One;
-    // assume(~(v ^ b) = a)
-    } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
-                                   m_Value(A))) &&
-               Pred == ICmpInst::ICMP_EQ &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      KnownBits RHSKnown(BitWidth);
-      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-      KnownBits BKnown(BitWidth);
-      computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
-
-      // For those bits in B that are known to be zero, we can propagate
-      // inverted known bits from the RHS to V. For those bits in B that are
-      // known to be one, we can propagate known bits from the RHS to V.
-      Known.Zero |= RHSKnown.One  & BKnown.Zero;
-      Known.One  |= RHSKnown.Zero & BKnown.Zero;
-      Known.Zero |= RHSKnown.Zero & BKnown.One;
-      Known.One  |= RHSKnown.One  & BKnown.One;
-    // assume(v << c = a)
-    } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
-                                   m_Value(A))) &&
-               Pred == ICmpInst::ICMP_EQ &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT) &&
-               C < BitWidth) {
-      KnownBits RHSKnown(BitWidth);
-      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-      // For those bits in RHS that are known, we can propagate them to known
-      // bits in V shifted to the right by C.
-      RHSKnown.Zero.lshrInPlace(C);
-      Known.Zero |= RHSKnown.Zero;
-      RHSKnown.One.lshrInPlace(C);
-      Known.One  |= RHSKnown.One;
-    // assume(~(v << c) = a)
-    } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
-                                   m_Value(A))) &&
-               Pred == ICmpInst::ICMP_EQ &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT) &&
-               C < BitWidth) {
-      KnownBits RHSKnown(BitWidth);
-      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-      // For those bits in RHS that are known, we can propagate them inverted
-      // to known bits in V shifted to the right by C.
-      RHSKnown.One.lshrInPlace(C);
-      Known.Zero |= RHSKnown.One;
-      RHSKnown.Zero.lshrInPlace(C);
-      Known.One  |= RHSKnown.Zero;
-    // assume(v >> c = a)
-    } else if (match(Arg,
-                     m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)),
-                              m_Value(A))) &&
-               Pred == ICmpInst::ICMP_EQ &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT) &&
-               C < BitWidth) {
-      KnownBits RHSKnown(BitWidth);
-      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-      // For those bits in RHS that are known, we can propagate them to known
-      // bits in V shifted to the right by C.
-      Known.Zero |= RHSKnown.Zero << C;
-      Known.One  |= RHSKnown.One  << C;
-    // assume(~(v >> c) = a)
-    } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))),
-                                   m_Value(A))) &&
-               Pred == ICmpInst::ICMP_EQ &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT) &&
-               C < BitWidth) {
-      KnownBits RHSKnown(BitWidth);
-      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-      // For those bits in RHS that are known, we can propagate them inverted
-      // to known bits in V shifted to the right by C.
-      Known.Zero |= RHSKnown.One  << C;
-      Known.One  |= RHSKnown.Zero << C;
-    // assume(v >=_s c) where c is non-negative
-    } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
-               Pred == ICmpInst::ICMP_SGE &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      KnownBits RHSKnown(BitWidth);
-      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-
-      if (RHSKnown.isNonNegative()) {
-        // We know that the sign bit is zero.
-        Known.makeNonNegative();
+    switch (Cmp->getPredicate()) {
+    default:
+      break;
+    case ICmpInst::ICMP_EQ:
+      // assume(v = a)
+      if (match(Cmp, m_c_ICmp(Pred, m_V, m_Value(A))) &&
+          isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+        KnownBits RHSKnown(BitWidth);
+        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+        Known.Zero |= RHSKnown.Zero;
+        Known.One  |= RHSKnown.One;
+      // assume(v & b = a)
+      } else if (match(Cmp,
+                       m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) &&
+                 isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+        KnownBits RHSKnown(BitWidth);
+        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+        KnownBits MaskKnown(BitWidth);
+        computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
+
+        // For those bits in the mask that are known to be one, we can propagate
+        // known bits from the RHS to V.
+        Known.Zero |= RHSKnown.Zero & MaskKnown.One;
+        Known.One  |= RHSKnown.One  & MaskKnown.One;
+      // assume(~(v & b) = a)
+      } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
+                                     m_Value(A))) &&
+                 isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+        KnownBits RHSKnown(BitWidth);
+        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+        KnownBits MaskKnown(BitWidth);
+        computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
+
+        // For those bits in the mask that are known to be one, we can propagate
+        // inverted known bits from the RHS to V.
+        Known.Zero |= RHSKnown.One  & MaskKnown.One;
+        Known.One  |= RHSKnown.Zero & MaskKnown.One;
+      // assume(v | b = a)
+      } else if (match(Cmp,
+                       m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) &&
+                 isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+        KnownBits RHSKnown(BitWidth);
+        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+        KnownBits BKnown(BitWidth);
+        computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
+
+        // For those bits in B that are known to be zero, we can propagate known
+        // bits from the RHS to V.
+        Known.Zero |= RHSKnown.Zero & BKnown.Zero;
+        Known.One  |= RHSKnown.One  & BKnown.Zero;
+      // assume(~(v | b) = a)
+      } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
+                                     m_Value(A))) &&
+                 isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+        KnownBits RHSKnown(BitWidth);
+        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+        KnownBits BKnown(BitWidth);
+        computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
+
+        // For those bits in B that are known to be zero, we can propagate
+        // inverted known bits from the RHS to V.
+        Known.Zero |= RHSKnown.One  & BKnown.Zero;
+        Known.One  |= RHSKnown.Zero & BKnown.Zero;
+      // assume(v ^ b = a)
+      } else if (match(Cmp,
+                       m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) &&
+                 isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+        KnownBits RHSKnown(BitWidth);
+        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+        KnownBits BKnown(BitWidth);
+        computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
+
+        // For those bits in B that are known to be zero, we can propagate known
+        // bits from the RHS to V. For those bits in B that are known to be one,
+        // we can propagate inverted known bits from the RHS to V.
+        Known.Zero |= RHSKnown.Zero & BKnown.Zero;
+        Known.One  |= RHSKnown.One  & BKnown.Zero;
+        Known.Zero |= RHSKnown.One  & BKnown.One;
+        Known.One  |= RHSKnown.Zero & BKnown.One;
+      // assume(~(v ^ b) = a)
+      } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
+                                     m_Value(A))) &&
+                 isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+        KnownBits RHSKnown(BitWidth);
+        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+        KnownBits BKnown(BitWidth);
+        computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
+
+        // For those bits in B that are known to be zero, we can propagate
+        // inverted known bits from the RHS to V. For those bits in B that are
+        // known to be one, we can propagate known bits from the RHS to V.
+        Known.Zero |= RHSKnown.One  & BKnown.Zero;
+        Known.One  |= RHSKnown.Zero & BKnown.Zero;
+        Known.Zero |= RHSKnown.Zero & BKnown.One;
+        Known.One  |= RHSKnown.One  & BKnown.One;
+      // assume(v << c = a)
+      } else if (match(Cmp, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
+                                     m_Value(A))) &&
+                 isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
+        KnownBits RHSKnown(BitWidth);
+        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+        // For those bits in RHS that are known, we can propagate them to known
+        // bits in V shifted to the right by C.
+        RHSKnown.Zero.lshrInPlace(C);
+        Known.Zero |= RHSKnown.Zero;
+        RHSKnown.One.lshrInPlace(C);
+        Known.One  |= RHSKnown.One;
+      // assume(~(v << c) = a)
+      } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
+                                     m_Value(A))) &&
+                 isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
+        KnownBits RHSKnown(BitWidth);
+        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+        // For those bits in RHS that are known, we can propagate them inverted
+        // to known bits in V shifted to the right by C.
+        RHSKnown.One.lshrInPlace(C);
+        Known.Zero |= RHSKnown.One;
+        RHSKnown.Zero.lshrInPlace(C);
+        Known.One  |= RHSKnown.Zero;
+      // assume(v >> c = a)
+      } else if (match(Cmp, m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)),
+                                     m_Value(A))) &&
+                 isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
+        KnownBits RHSKnown(BitWidth);
+        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+        // For those bits in RHS that are known, we can propagate them to known
+        // bits in V shifted to the right by C.
+        Known.Zero |= RHSKnown.Zero << C;
+        Known.One  |= RHSKnown.One  << C;
+      // assume(~(v >> c) = a)
+      } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))),
+                                     m_Value(A))) &&
+                 isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
+        KnownBits RHSKnown(BitWidth);
+        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+        // For those bits in RHS that are known, we can propagate them inverted
+        // to known bits in V shifted to the right by C.
+        Known.Zero |= RHSKnown.One  << C;
+        Known.One  |= RHSKnown.Zero << C;
       }
-    // assume(v >_s c) where c is at least -1.
-    } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
-               Pred == ICmpInst::ICMP_SGT &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      KnownBits RHSKnown(BitWidth);
-      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-
-      if (RHSKnown.isAllOnes() || RHSKnown.isNonNegative()) {
-        // We know that the sign bit is zero.
-        Known.makeNonNegative();
+      break;
+    case ICmpInst::ICMP_SGE:
+      // assume(v >=_s c) where c is non-negative
+      if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
+          isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+        KnownBits RHSKnown(BitWidth);
+        computeKnownBits(A, RHSKnown, Depth + 1, Query(Q, I));
+
+        if (RHSKnown.isNonNegative()) {
+          // We know that the sign bit is zero.
+          Known.makeNonNegative();
+        }
       }
-    // assume(v <=_s c) where c is negative
-    } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
-               Pred == ICmpInst::ICMP_SLE &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      KnownBits RHSKnown(BitWidth);
-      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-
-      if (RHSKnown.isNegative()) {
-        // We know that the sign bit is one.
-        Known.makeNegative();
+      break;
+    case ICmpInst::ICMP_SGT:
+      // assume(v >_s c) where c is at least -1.
+      if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
+          isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+        KnownBits RHSKnown(BitWidth);
+        computeKnownBits(A, RHSKnown, Depth + 1, Query(Q, I));
+
+        if (RHSKnown.isAllOnes() || RHSKnown.isNonNegative()) {
+          // We know that the sign bit is zero.
+          Known.makeNonNegative();
+        }
       }
-    // assume(v <_s c) where c is non-positive
-    } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
-               Pred == ICmpInst::ICMP_SLT &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      KnownBits RHSKnown(BitWidth);
-      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-
-      if (RHSKnown.isZero() || RHSKnown.isNegative()) {
-        // We know that the sign bit is one.
-        Known.makeNegative();
+      break;
+    case ICmpInst::ICMP_SLE:
+      // assume(v <=_s c) where c is negative
+      if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
+          isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+        KnownBits RHSKnown(BitWidth);
+        computeKnownBits(A, RHSKnown, Depth + 1, Query(Q, I));
+
+        if (RHSKnown.isNegative()) {
+          // We know that the sign bit is one.
+          Known.makeNegative();
+        }
       }
-    // assume(v <=_u c)
-    } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
-               Pred == ICmpInst::ICMP_ULE &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      KnownBits RHSKnown(BitWidth);
-      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-
-      // Whatever high bits in c are zero are known to be zero.
-      Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
-      // assume(v <_u c)
-    } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
-               Pred == ICmpInst::ICMP_ULT &&
-               isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
-      KnownBits RHSKnown(BitWidth);
-      computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-
-      // If the RHS is known zero, then this assumption must be wrong (nothing
-      // is unsigned less than zero). Signal a conflict and get out of here.
-      if (RHSKnown.isZero()) {
-        Known.Zero.setAllBits();
-        Known.One.setAllBits();
-        break;
+      break;
+    case ICmpInst::ICMP_SLT:
+      // assume(v <_s c) where c is non-positive
+      if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
+          isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+        KnownBits RHSKnown(BitWidth);
+        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+
+        if (RHSKnown.isZero() || RHSKnown.isNegative()) {
+          // We know that the sign bit is one.
+          Known.makeNegative();
+        }
       }
-
-      // Whatever high bits in c are zero are known to be zero (if c is a power
-      // of 2, then one more).
-      if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I)))
-        Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1);
-      else
+      break;
+    case ICmpInst::ICMP_ULE:
+      // assume(v <=_u c)
+      if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
+          isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+        KnownBits RHSKnown(BitWidth);
+        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+
+        // Whatever high bits in c are zero are known to be zero.
         Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
+      }
+      break;
+    case ICmpInst::ICMP_ULT:
+      // assume(v <_u c)
+      if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
+          isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+        KnownBits RHSKnown(BitWidth);
+        computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+
+        // If the RHS is known zero, then this assumption must be wrong (nothing
+        // is unsigned less than zero). Signal a conflict and get out of here.
+        if (RHSKnown.isZero()) {
+          Known.Zero.setAllBits();
+          Known.One.setAllBits();
+          break;
+        }
+
+        // Whatever high bits in c are zero are known to be zero (if c is a power
+        // of 2, then one more).
+        if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I)))
+          Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1);
+        else
+          Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
+      }
+      break;
     }
   }
 
@@ -1129,12 +1132,9 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
       Q.DL.getTypeSizeInBits(ScalarTy);
 
     assert(SrcBitWidth && "SrcBitWidth can't be zero");
-    Known = Known.zextOrTrunc(SrcBitWidth);
+    Known = Known.zextOrTrunc(SrcBitWidth, false);
     computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
-    Known = Known.zextOrTrunc(BitWidth);
-    // Any top bits are known to be zero.
-    if (BitWidth > SrcBitWidth)
-      Known.Zero.setBitsFrom(SrcBitWidth);
+    Known = Known.zextOrTrunc(BitWidth, true /* ExtendedBitsAreKnownZero */);
     break;
   }
   case Instruction::BitCast: {
@@ -1527,6 +1527,37 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
             Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt);
         break;
       }
+      case Intrinsic::uadd_sat:
+      case Intrinsic::usub_sat: {
+        bool IsAdd = II->getIntrinsicID() == Intrinsic::uadd_sat;
+        computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+        computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
+
+        // Add: Leading ones of either operand are preserved.
+        // Sub: Leading zeros of LHS and leading ones of RHS are preserved
+        // as leading zeros in the result.
+        unsigned LeadingKnown;
+        if (IsAdd)
+          LeadingKnown = std::max(Known.countMinLeadingOnes(),
+                                  Known2.countMinLeadingOnes());
+        else
+          LeadingKnown = std::max(Known.countMinLeadingZeros(),
+                                  Known2.countMinLeadingOnes());
+
+        Known = KnownBits::computeForAddSub(
+            IsAdd, /* NSW */ false, Known, Known2);
+
+        // We select between the operation result and all-ones/zero
+        // respectively, so we can preserve known ones/zeros.
+        if (IsAdd) {
+          Known.One.setHighBits(LeadingKnown);
+          Known.Zero.clearAllBits();
+        } else {
+          Known.Zero.setHighBits(LeadingKnown);
+          Known.One.clearAllBits();
+        }
+        break;
+      }
       case Intrinsic::x86_sse42_crc32_64_64:
         Known.Zero.setBitsFrom(32);
         break;
@@ -1967,6 +1998,15 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
       // Must be non-zero due to null test above.
       return true;
 
+    if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+      // See the comment for IntToPtr/PtrToInt instructions below.
+      if (CE->getOpcode() == Instruction::IntToPtr ||
+          CE->getOpcode() == Instruction::PtrToInt)
+        if (Q.DL.getTypeSizeInBits(CE->getOperand(0)->getType()) <=
+            Q.DL.getTypeSizeInBits(CE->getType()))
+          return isKnownNonZero(CE->getOperand(0), Depth, Q);
+    }
+
     // For constant vectors, check that all elements are undefined or known
     // non-zero to determine that the whole vector is known non-zero.
     if (auto *VecTy = dyn_cast<VectorType>(C->getType())) {
@@ -2037,11 +2077,33 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
     if (isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT))
       return true;
 
+    // Look through bitcast operations, GEPs, and int2ptr instructions as they
+    // do not alter the value, or at least not the nullness property of the
+    // value, e.g., int2ptr is allowed to zero/sign extend the value.
+    //
+    // Note that we have to take special care to avoid looking through
+    // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well
+    // as casts that can alter the value, e.g., AddrSpaceCasts.
     if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V))
       if (isGEPKnownNonNull(GEP, Depth, Q))
         return true;
+
+    if (auto *BCO = dyn_cast<BitCastOperator>(V))
+      return isKnownNonZero(BCO->getOperand(0), Depth, Q);
+
+    if (auto *I2P = dyn_cast<IntToPtrInst>(V))
+      if (Q.DL.getTypeSizeInBits(I2P->getSrcTy()) <=
+          Q.DL.getTypeSizeInBits(I2P->getDestTy()))
+        return isKnownNonZero(I2P->getOperand(0), Depth, Q);
   }
 
+  // Similar to int2ptr above, we can look through ptr2int here if the cast
+  // is a no-op or an extend and not a truncate.
+  if (auto *P2I = dyn_cast<PtrToIntInst>(V))
+    if (Q.DL.getTypeSizeInBits(P2I->getSrcTy()) <=
+        Q.DL.getTypeSizeInBits(P2I->getDestTy()))
+      return isKnownNonZero(P2I->getOperand(0), Depth, Q);
+
   unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), Q.DL);
 
   // X | Y != 0 if X != 0 or Y != 0.
@@ -3082,6 +3144,11 @@ bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
     case Intrinsic::sqrt:
       return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) &&
              CannotBeOrderedLessThanZero(II->getArgOperand(0), TLI);
+    case Intrinsic::minnum:
+    case Intrinsic::maxnum:
+      // If either operand is not NaN, the result is not NaN.
+      return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) ||
+             isKnownNeverNaN(II->getArgOperand(1), TLI, Depth + 1);
     default:
       return false;
     }
@@ -3107,7 +3174,7 @@ bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
   return true;
 }
 
-Value *llvm::isBytewiseValue(Value *V) {
+Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) {
 
   // All byte-wide stores are splatable, even of arbitrary variables.
   if (V->getType()->isIntegerTy(8))
@@ -3120,6 +3187,10 @@ Value *llvm::isBytewiseValue(Value *V) {
   if (isa<UndefValue>(V))
     return UndefInt8;
 
+  const uint64_t Size = DL.getTypeStoreSize(V->getType());
+  if (!Size)
+    return UndefInt8;
+
   Constant *C = dyn_cast<Constant>(V);
   if (!C) {
     // Conceptually, we could handle things like:
@@ -3146,7 +3217,8 @@ Value *llvm::isBytewiseValue(Value *V) {
     else if (CFP->getType()->isDoubleTy())
       Ty = Type::getInt64Ty(Ctx);
     // Don't handle long double formats, which have strange constraints.
-    return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty)) : nullptr;
+    return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty), DL)
+              : nullptr;
   }
 
   // We can handle constant integers that are multiple of 8 bits.
@@ -3159,6 +3231,17 @@ Value *llvm::isBytewiseValue(Value *V) {
     }
   }
 
+  if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+    if (CE->getOpcode() == Instruction::IntToPtr) {
+      auto PS = DL.getPointerSizeInBits(
+          cast<PointerType>(CE->getType())->getAddressSpace());
+      return isBytewiseValue(
+          ConstantExpr::getIntegerCast(CE->getOperand(0),
+                                       Type::getIntNTy(Ctx, PS), false),
+          DL);
+    }
+  }
+
   auto Merge = [&](Value *LHS, Value *RHS) -> Value * {
     if (LHS == RHS)
       return LHS;
@@ -3174,20 +3257,15 @@ Value *llvm::isBytewiseValue(Value *V) {
   if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) {
     Value *Val = UndefInt8;
     for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I)
-      if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I)))))
+      if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I), DL))))
         return nullptr;
     return Val;
   }
 
-  if (isa<ConstantVector>(C)) {
-    Constant *Splat = cast<ConstantVector>(C)->getSplatValue();
-    return Splat ? isBytewiseValue(Splat) : nullptr;
-  }
-
-  if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
+  if (isa<ConstantAggregate>(C)) {
     Value *Val = UndefInt8;
     for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I)
-      if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I)))))
+      if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I), DL))))
         return nullptr;
     return Val;
   }
@@ -3363,57 +3441,6 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
   return nullptr;
 }
 
-/// Analyze the specified pointer to see if it can be expressed as a base
-/// pointer plus a constant offset. Return the base and offset to the caller.
-Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
-                                              const DataLayout &DL) {
-  unsigned BitWidth = DL.getIndexTypeSizeInBits(Ptr->getType());
-  APInt ByteOffset(BitWidth, 0);
-
-  // We walk up the defs but use a visited set to handle unreachable code. In
-  // that case, we stop after accumulating the cycle once (not that it
-  // matters).
-  SmallPtrSet<Value *, 16> Visited;
-  while (Visited.insert(Ptr).second) {
-    if (Ptr->getType()->isVectorTy())
-      break;
-
-    if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
-      // If one of the values we have visited is an addrspacecast, then
-      // the pointer type of this GEP may be different from the type
-      // of the Ptr parameter which was passed to this function.  This
-      // means when we construct GEPOffset, we need to use the size
-      // of GEP's pointer type rather than the size of the original
-      // pointer type.
-      APInt GEPOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
-      if (!GEP->accumulateConstantOffset(DL, GEPOffset))
-        break;
-
-      APInt OrigByteOffset(ByteOffset);
-      ByteOffset += GEPOffset.sextOrTrunc(ByteOffset.getBitWidth());
-      if (ByteOffset.getMinSignedBits() > 64) {
-        // Stop traversal if the pointer offset wouldn't fit into int64_t
-        // (this should be removed if Offset is updated to an APInt)
-        ByteOffset = OrigByteOffset;
-        break;
-      }
-
-      Ptr = GEP->getPointerOperand();
-    } else if (Operator::getOpcode(Ptr) == Instruction::BitCast ||
-               Operator::getOpcode(Ptr) == Instruction::AddrSpaceCast) {
-      Ptr = cast<Operator>(Ptr)->getOperand(0);
-    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
-      if (GA->isInterposable())
-        break;
-      Ptr = GA->getAliasee();
-    } else {
-      break;
-    }
-  }
-  Offset = ByteOffset.getSExtValue();
-  return Ptr;
-}
-
 bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP,
                                        unsigned CharSize) {
   // Make sure the GEP has exactly three arguments.
@@ -3638,7 +3665,9 @@ const Value *llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call) {
 bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
     const CallBase *Call) {
   return Call->getIntrinsicID() == Intrinsic::launder_invariant_group ||
-         Call->getIntrinsicID() == Intrinsic::strip_invariant_group;
+         Call->getIntrinsicID() == Intrinsic::strip_invariant_group ||
+         Call->getIntrinsicID() == Intrinsic::aarch64_irg ||
+         Call->getIntrinsicID() == Intrinsic::aarch64_tagp;
 }
 
 /// \p PN defines a loop-variant pointer to an object.  Check if the
@@ -3717,26 +3746,27 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
   return V;
 }
 
-void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects,
+void llvm::GetUnderlyingObjects(const Value *V,
+                                SmallVectorImpl<const Value *> &Objects,
                                 const DataLayout &DL, LoopInfo *LI,
                                 unsigned MaxLookup) {
-  SmallPtrSet<Value *, 4> Visited;
-  SmallVector<Value *, 4> Worklist;
+  SmallPtrSet<const Value *, 4> Visited;
+  SmallVector<const Value *, 4> Worklist;
   Worklist.push_back(V);
   do {
-    Value *P = Worklist.pop_back_val();
+    const Value *P = Worklist.pop_back_val();
     P = GetUnderlyingObject(P, DL, MaxLookup);
 
     if (!Visited.insert(P).second)
       continue;
 
-    if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
+    if (auto *SI = dyn_cast<SelectInst>(P)) {
       Worklist.push_back(SI->getTrueValue());
       Worklist.push_back(SI->getFalseValue());
       continue;
     }
 
-    if (PHINode *PN = dyn_cast<PHINode>(P)) {
+    if (auto *PN = dyn_cast<PHINode>(P)) {
       // If this PHI changes the underlying object in every iteration of the
       // loop, don't look through it.  Consider:
       //   int **A;
@@ -3797,10 +3827,10 @@ bool llvm::getUnderlyingObjectsForCodeGen(const Value *V,
   do {
     V = Working.pop_back_val();
 
-    SmallVector<Value *, 4> Objs;
-    GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
+    SmallVector<const Value *, 4> Objs;
+    GetUnderlyingObjects(V, Objs, DL);
 
-    for (Value *V : Objs) {
+    for (const Value *V : Objs) {
       if (!Visited.insert(V).second)
         continue;
       if (Operator::getOpcode(V) == Instruction::IntToPtr) {
@@ -3888,7 +3918,8 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
       return false;
     const DataLayout &DL = LI->getModule()->getDataLayout();
     return isDereferenceableAndAlignedPointer(LI->getPointerOperand(),
-                                              LI->getAlignment(), DL, CtxI, DT);
+                                              LI->getType(), LI->getAlignment(),
+                                              DL, CtxI, DT);
   }
   case Instruction::Call: {
     auto *CI = cast<const CallInst>(Inst);
@@ -3901,6 +3932,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
   case Instruction::VAArg:
   case Instruction::Alloca:
   case Instruction::Invoke:
+  case Instruction::CallBr:
   case Instruction::PHI:
   case Instruction::Store:
   case Instruction::Ret:
@@ -3926,51 +3958,46 @@ bool llvm::mayBeMemoryDependent(const Instruction &I) {
   return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I);
 }
 
+/// Convert ConstantRange OverflowResult into ValueTracking OverflowResult.
+static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) {
+  switch (OR) {
+    case ConstantRange::OverflowResult::MayOverflow:
+      return OverflowResult::MayOverflow;
+    case ConstantRange::OverflowResult::AlwaysOverflowsLow:
+      return OverflowResult::AlwaysOverflowsLow;
+    case ConstantRange::OverflowResult::AlwaysOverflowsHigh:
+      return OverflowResult::AlwaysOverflowsHigh;
+    case ConstantRange::OverflowResult::NeverOverflows:
+      return OverflowResult::NeverOverflows;
+  }
+  llvm_unreachable("Unknown OverflowResult");
+}
+
+/// Combine constant ranges from computeConstantRange() and computeKnownBits().
+static ConstantRange computeConstantRangeIncludingKnownBits(
+    const Value *V, bool ForSigned, const DataLayout &DL, unsigned Depth,
+    AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
+    OptimizationRemarkEmitter *ORE = nullptr, bool UseInstrInfo = true) {
+  KnownBits Known = computeKnownBits(
+      V, DL, Depth, AC, CxtI, DT, ORE, UseInstrInfo);
+  ConstantRange CR1 = ConstantRange::fromKnownBits(Known, ForSigned);
+  ConstantRange CR2 = computeConstantRange(V, UseInstrInfo);
+  ConstantRange::PreferredRangeType RangeType =
+      ForSigned ? ConstantRange::Signed : ConstantRange::Unsigned;
+  return CR1.intersectWith(CR2, RangeType);
+}
+
 OverflowResult llvm::computeOverflowForUnsignedMul(
     const Value *LHS, const Value *RHS, const DataLayout &DL,
     AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
     bool UseInstrInfo) {
-  // Multiplying n * m significant bits yields a result of n + m significant
-  // bits. If the total number of significant bits does not exceed the
-  // result bit width (minus 1), there is no overflow.
-  // This means if we have enough leading zero bits in the operands
-  // we can guarantee that the result does not overflow.
-  // Ref: "Hacker's Delight" by Henry Warren
-  unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
-  KnownBits LHSKnown(BitWidth);
-  KnownBits RHSKnown(BitWidth);
-  computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT, nullptr,
-                   UseInstrInfo);
-  computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT, nullptr,
-                   UseInstrInfo);
-  // Note that underestimating the number of zero bits gives a more
-  // conservative answer.
-  unsigned ZeroBits = LHSKnown.countMinLeadingZeros() +
-                      RHSKnown.countMinLeadingZeros();
-  // First handle the easy case: if we have enough zero bits there's
-  // definitely no overflow.
-  if (ZeroBits >= BitWidth)
-    return OverflowResult::NeverOverflows;
-
-  // Get the largest possible values for each operand.
-  APInt LHSMax = ~LHSKnown.Zero;
-  APInt RHSMax = ~RHSKnown.Zero;
-
-  // We know the multiply operation doesn't overflow if the maximum values for
-  // each operand will not overflow after we multiply them together.
-  bool MaxOverflow;
-  (void)LHSMax.umul_ov(RHSMax, MaxOverflow);
-  if (!MaxOverflow)
-    return OverflowResult::NeverOverflows;
-
-  // We know it always overflows if multiplying the smallest possible values for
-  // the operands also results in overflow.
-  bool MinOverflow;
-  (void)LHSKnown.One.umul_ov(RHSKnown.One, MinOverflow);
-  if (MinOverflow)
-    return OverflowResult::AlwaysOverflows;
-
-  return OverflowResult::MayOverflow;
+  KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
+                                        nullptr, UseInstrInfo);
+  KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
+                                        nullptr, UseInstrInfo);
+  ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false);
+  ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false);
+  return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange));
 }
 
 OverflowResult
@@ -4020,69 +4047,13 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(
     const Value *LHS, const Value *RHS, const DataLayout &DL,
     AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
     bool UseInstrInfo) {
-  KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
-                                        nullptr, UseInstrInfo);
-  if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) {
-    KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
-                                          nullptr, UseInstrInfo);
-
-    if (LHSKnown.isNegative() && RHSKnown.isNegative()) {
-      // The sign bit is set in both cases: this MUST overflow.
-      return OverflowResult::AlwaysOverflows;
-    }
-
-    if (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()) {
-      // The sign bit is clear in both cases: this CANNOT overflow.
-      return OverflowResult::NeverOverflows;
-    }
-  }
-
-  return OverflowResult::MayOverflow;
-}
-
-/// Return true if we can prove that adding the two values of the
-/// knownbits will not overflow.
-/// Otherwise return false.
-static bool checkRippleForSignedAdd(const KnownBits &LHSKnown,
-                                    const KnownBits &RHSKnown) {
-  // Addition of two 2's complement numbers having opposite signs will never
-  // overflow.
-  if ((LHSKnown.isNegative() && RHSKnown.isNonNegative()) ||
-      (LHSKnown.isNonNegative() && RHSKnown.isNegative()))
-    return true;
-
-  // If either of the values is known to be non-negative, adding them can only
-  // overflow if the second is also non-negative, so we can assume that.
-  // Two non-negative numbers will only overflow if there is a carry to the
-  // sign bit, so we can check if even when the values are as big as possible
-  // there is no overflow to the sign bit.
-  if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) {
-    APInt MaxLHS = ~LHSKnown.Zero;
-    MaxLHS.clearSignBit();
-    APInt MaxRHS = ~RHSKnown.Zero;
-    MaxRHS.clearSignBit();
-    APInt Result = std::move(MaxLHS) + std::move(MaxRHS);
-    return Result.isSignBitClear();
-  }
-
-  // If either of the values is known to be negative, adding them can only
-  // overflow if the second is also negative, so we can assume that.
-  // Two negative number will only overflow if there is no carry to the sign
-  // bit, so we can check if even when the values are as small as possible
-  // there is overflow to the sign bit.
-  if (LHSKnown.isNegative() || RHSKnown.isNegative()) {
-    APInt MinLHS = LHSKnown.One;
-    MinLHS.clearSignBit();
-    APInt MinRHS = RHSKnown.One;
-    MinRHS.clearSignBit();
-    APInt Result = std::move(MinLHS) + std::move(MinRHS);
-    return Result.isSignBitSet();
-  }
-
-  // If we reached here it means that we know nothing about the sign bits.
-  // In this case we can't know if there will be an overflow, since by
-  // changing the sign bits any two values can be made to overflow.
-  return false;
+  ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
+      LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT,
+      nullptr, UseInstrInfo);
+  ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
+      RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT,
+      nullptr, UseInstrInfo);
+  return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange));
 }
 
 static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
@@ -4114,30 +4085,35 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
       ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1)
     return OverflowResult::NeverOverflows;
 
-  KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
-  KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
-
-  if (checkRippleForSignedAdd(LHSKnown, RHSKnown))
-    return OverflowResult::NeverOverflows;
+  ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
+      LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT);
+  ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
+      RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT);
+  OverflowResult OR =
+      mapOverflowResult(LHSRange.signedAddMayOverflow(RHSRange));
+  if (OR != OverflowResult::MayOverflow)
+    return OR;
 
   // The remaining code needs Add to be available. Early returns if not so.
   if (!Add)
     return OverflowResult::MayOverflow;
 
   // If the sign of Add is the same as at least one of the operands, this add
-  // CANNOT overflow. This is particularly useful when the sum is
-  // @llvm.assume'ed non-negative rather than proved so from analyzing its
-  // operands.
+  // CANNOT overflow. If this can be determined from the known bits of the
+  // operands the above signedAddMayOverflow() check will have already done so.
+  // The only other way to improve on the known bits is from an assumption, so
+  // call computeKnownBitsFromAssume() directly.
   bool LHSOrRHSKnownNonNegative =
-      (LHSKnown.isNonNegative() || RHSKnown.isNonNegative());
+      (LHSRange.isAllNonNegative() || RHSRange.isAllNonNegative());
   bool LHSOrRHSKnownNegative =
-      (LHSKnown.isNegative() || RHSKnown.isNegative());
+      (LHSRange.isAllNegative() || RHSRange.isAllNegative());
   if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
-    KnownBits AddKnown = computeKnownBits(Add, DL, /*Depth=*/0, AC, CxtI, DT);
+    KnownBits AddKnown(LHSRange.getBitWidth());
+    computeKnownBitsFromAssume(
+        Add, AddKnown, /*Depth=*/0, Query(DL, AC, CxtI, DT, true));
     if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) ||
-        (AddKnown.isNegative() && LHSOrRHSKnownNegative)) {
+        (AddKnown.isNegative() && LHSOrRHSKnownNegative))
       return OverflowResult::NeverOverflows;
-    }
   }
 
   return OverflowResult::MayOverflow;
@@ -4149,20 +4125,11 @@ OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS,
                                                    AssumptionCache *AC,
                                                    const Instruction *CxtI,
                                                    const DominatorTree *DT) {
-  KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
-  if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) {
-    KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
-
-    // If the LHS is negative and the RHS is non-negative, no unsigned wrap.
-    if (LHSKnown.isNegative() && RHSKnown.isNonNegative())
-      return OverflowResult::NeverOverflows;
-
-    // If the LHS is non-negative and the RHS negative, we always wrap.
-    if (LHSKnown.isNonNegative() && RHSKnown.isNegative())
-      return OverflowResult::AlwaysOverflows;
-  }
-
-  return OverflowResult::MayOverflow;
+  ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
+      LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT);
+  ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
+      RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT);
+  return mapOverflowResult(LHSRange.unsignedSubMayOverflow(RHSRange));
 }
 
 OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS,
@@ -4177,37 +4144,19 @@ OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS,
       ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1)
     return OverflowResult::NeverOverflows;
 
-  KnownBits LHSKnown = computeKnownBits(LHS, DL, 0, AC, CxtI, DT);
-
-  KnownBits RHSKnown = computeKnownBits(RHS, DL, 0, AC, CxtI, DT);
-
-  // Subtraction of two 2's complement numbers having identical signs will
-  // never overflow.
-  if ((LHSKnown.isNegative() && RHSKnown.isNegative()) ||
-      (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()))
-    return OverflowResult::NeverOverflows;
-
-  // TODO: implement logic similar to checkRippleForAdd
-  return OverflowResult::MayOverflow;
+  ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
+      LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT);
+  ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
+      RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT);
+  return mapOverflowResult(LHSRange.signedSubMayOverflow(RHSRange));
 }
 
-bool llvm::isOverflowIntrinsicNoWrap(const IntrinsicInst *II,
+bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO,
                                      const DominatorTree &DT) {
-#ifndef NDEBUG
-  auto IID = II->getIntrinsicID();
-  assert((IID == Intrinsic::sadd_with_overflow ||
-          IID == Intrinsic::uadd_with_overflow ||
-          IID == Intrinsic::ssub_with_overflow ||
-          IID == Intrinsic::usub_with_overflow ||
-          IID == Intrinsic::smul_with_overflow ||
-          IID == Intrinsic::umul_with_overflow) &&
-         "Not an overflow intrinsic!");
-#endif
-
   SmallVector<const BranchInst *, 2> GuardingBranches;
   SmallVector<const ExtractValueInst *, 2> Results;
 
-  for (const User *U : II->users()) {
+  for (const User *U : WO->users()) {
     if (const auto *EVI = dyn_cast<ExtractValueInst>(U)) {
       assert(EVI->getNumIndices() == 1 && "Obvious from CI's type");
 
@@ -4307,6 +4256,11 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
     if (!CS.doesNotThrow())
       return false;
 
+    // A function which doens't throw and has "willreturn" attribute will
+    // always return.
+    if (CS.hasFnAttr(Attribute::WillReturn))
+      return true;
+
     // Non-throwing call sites can loop infinitely, call exit/pthread_exit
     // etc. and thus not return.  However, LLVM already assumes that
     //
@@ -4325,7 +4279,8 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
     // is guaranteed to return.
     return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory() ||
            match(I, m_Intrinsic<Intrinsic::assume>()) ||
-           match(I, m_Intrinsic<Intrinsic::sideeffect>());
+           match(I, m_Intrinsic<Intrinsic::sideeffect>()) ||
+           match(I, m_Intrinsic<Intrinsic::experimental_widenable_condition>());
   }
 
   // Other instructions return normally.
@@ -4333,7 +4288,7 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
 }
 
 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) {
-  // TODO: This is slightly consdervative for invoke instruction since exiting
+  // TODO: This is slightly conservative for invoke instruction since exiting
   // via an exception *is* normal control for them.
   for (auto I = BB->begin(), E = BB->end(); I != E; ++I)
     if (!isGuaranteedToTransferExecutionToSuccessor(&*I))
@@ -4357,6 +4312,8 @@ bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
 }
 
 bool llvm::propagatesFullPoison(const Instruction *I) {
+  // TODO: This should include all instructions apart from phis, selects and
+  // call-like instructions.
   switch (I->getOpcode()) {
   case Instruction::Add:
   case Instruction::Sub:
@@ -4409,10 +4366,21 @@ const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) {
       return I->getOperand(1);
 
     default:
+      // Note: It's really tempting to think that a conditional branch or
+      // switch should be listed here, but that's incorrect.  It's not
+      // branching off of poison which is UB, it is executing a side effecting
+      // instruction which follows the branch.
       return nullptr;
   }
 }
 
+bool llvm::mustTriggerUB(const Instruction *I,
+                         const SmallSet<const Value *, 16>& KnownPoison) {
+  auto *NotPoison = getGuaranteedNonFullPoisonOp(I);
+  return (NotPoison && KnownPoison.count(NotPoison));
+}
+
+
 bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) {
   // We currently only look for uses of poison values within the same basic
   // block, as that makes it easier to guarantee that the uses will be
@@ -4436,8 +4404,7 @@ bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) {
   while (Iter++ < MaxDepth) {
     for (auto &I : make_range(Begin, End)) {
       if (&I != PoisonI) {
-        const Value *NotPoison = getGuaranteedNonFullPoisonOp(&I);
-        if (NotPoison != nullptr && YieldsPoison.count(NotPoison))
+        if (mustTriggerUB(&I, YieldsPoison))
           return true;
         if (!isGuaranteedToTransferExecutionToSuccessor(&I))
           return false;
@@ -4926,6 +4893,10 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
       if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes))
         return {SPF_ABS, SPNB_NA, false};
 
+      // (X >=s 0) ? X : -X or (X >=s 1) ? X : -X --> ABS(X)
+      if (Pred == ICmpInst::ICMP_SGE && match(CmpRHS, ZeroOrOne))
+        return {SPF_ABS, SPNB_NA, false};
+
       // (X <s 0) ? X : -X or (X <s 1) ? X : -X --> NABS(X)
       // (-X <s 0) ? -X : X or (-X <s 1) ? -X : X --> NABS(X)
       if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne))
@@ -5084,11 +5055,19 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
   CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition());
   if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false};
 
+  Value *TrueVal = SI->getTrueValue();
+  Value *FalseVal = SI->getFalseValue();
+
+  return llvm::matchDecomposedSelectPattern(CmpI, TrueVal, FalseVal, LHS, RHS,
+                                            CastOp, Depth);
+}
+
+SelectPatternResult llvm::matchDecomposedSelectPattern(
+    CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS,
+    Instruction::CastOps *CastOp, unsigned Depth) {
   CmpInst::Predicate Pred = CmpI->getPredicate();
   Value *CmpLHS = CmpI->getOperand(0);
   Value *CmpRHS = CmpI->getOperand(1);
-  Value *TrueVal = SI->getTrueValue();
-  Value *FalseVal = SI->getFalseValue();
   FastMathFlags FMF;
   if (isa<FPMathOperator>(CmpI))
     FMF = CmpI->getFastMathFlags();
@@ -5430,3 +5409,298 @@ Optional<bool> llvm::isImpliedByDomCondition(const Value *Cond,
   bool CondIsTrue = TrueBB == ContextBB;
   return isImpliedCondition(PredCond, Cond, DL, CondIsTrue);
 }
+
+static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
+                              APInt &Upper, const InstrInfoQuery &IIQ) {
+  unsigned Width = Lower.getBitWidth();
+  const APInt *C;
+  switch (BO.getOpcode()) {
+  case Instruction::Add:
+    if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
+      // FIXME: If we have both nuw and nsw, we should reduce the range further.
+      if (IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(&BO))) {
+        // 'add nuw x, C' produces [C, UINT_MAX].
+        Lower = *C;
+      } else if (IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(&BO))) {
+        if (C->isNegative()) {
+          // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
+          Lower = APInt::getSignedMinValue(Width);
+          Upper = APInt::getSignedMaxValue(Width) + *C + 1;
+        } else {
+          // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX].
+          Lower = APInt::getSignedMinValue(Width) + *C;
+          Upper = APInt::getSignedMaxValue(Width) + 1;
+        }
+      }
+    }
+    break;
+
+  case Instruction::And:
+    if (match(BO.getOperand(1), m_APInt(C)))
+      // 'and x, C' produces [0, C].
+      Upper = *C + 1;
+    break;
+
+  case Instruction::Or:
+    if (match(BO.getOperand(1), m_APInt(C)))
+      // 'or x, C' produces [C, UINT_MAX].
+      Lower = *C;
+    break;
+
+  case Instruction::AShr:
+    if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
+      // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C].
+      Lower = APInt::getSignedMinValue(Width).ashr(*C);
+      Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1;
+    } else if (match(BO.getOperand(0), m_APInt(C))) {
+      unsigned ShiftAmount = Width - 1;
+      if (!C->isNullValue() && IIQ.isExact(&BO))
+        ShiftAmount = C->countTrailingZeros();
+      if (C->isNegative()) {
+        // 'ashr C, x' produces [C, C >> (Width-1)]
+        Lower = *C;
+        Upper = C->ashr(ShiftAmount) + 1;
+      } else {
+        // 'ashr C, x' produces [C >> (Width-1), C]
+        Lower = C->ashr(ShiftAmount);
+        Upper = *C + 1;
+      }
+    }
+    break;
+
+  case Instruction::LShr:
+    if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
+      // 'lshr x, C' produces [0, UINT_MAX >> C].
+      Upper = APInt::getAllOnesValue(Width).lshr(*C) + 1;
+    } else if (match(BO.getOperand(0), m_APInt(C))) {
+      // 'lshr C, x' produces [C >> (Width-1), C].
+      unsigned ShiftAmount = Width - 1;
+      if (!C->isNullValue() && IIQ.isExact(&BO))
+        ShiftAmount = C->countTrailingZeros();
+      Lower = C->lshr(ShiftAmount);
+      Upper = *C + 1;
+    }
+    break;
+
+  case Instruction::Shl:
+    if (match(BO.getOperand(0), m_APInt(C))) {
+      if (IIQ.hasNoUnsignedWrap(&BO)) {
+        // 'shl nuw C, x' produces [C, C << CLZ(C)]
+        Lower = *C;
+        Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
+      } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw?
+        if (C->isNegative()) {
+          // 'shl nsw C, x' produces [C << CLO(C)-1, C]
+          unsigned ShiftAmount = C->countLeadingOnes() - 1;
+          Lower = C->shl(ShiftAmount);
+          Upper = *C + 1;
+        } else {
+          // 'shl nsw C, x' produces [C, C << CLZ(C)-1]
+          unsigned ShiftAmount = C->countLeadingZeros() - 1;
+          Lower = *C;
+          Upper = C->shl(ShiftAmount) + 1;
+        }
+      }
+    }
+    break;
+
+  case Instruction::SDiv:
+    if (match(BO.getOperand(1), m_APInt(C))) {
+      APInt IntMin = APInt::getSignedMinValue(Width);
+      APInt IntMax = APInt::getSignedMaxValue(Width);
+      if (C->isAllOnesValue()) {
+        // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
+        //    where C != -1 and C != 0 and C != 1
+        Lower = IntMin + 1;
+        Upper = IntMax + 1;
+      } else if (C->countLeadingZeros() < Width - 1) {
+        // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C]
+        //    where C != -1 and C != 0 and C != 1
+        Lower = IntMin.sdiv(*C);
+        Upper = IntMax.sdiv(*C);
+        if (Lower.sgt(Upper))
+          std::swap(Lower, Upper);
+        Upper = Upper + 1;
+        assert(Upper != Lower && "Upper part of range has wrapped!");
+      }
+    } else if (match(BO.getOperand(0), m_APInt(C))) {
+      if (C->isMinSignedValue()) {
+        // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
+        Lower = *C;
+        Upper = Lower.lshr(1) + 1;
+      } else {
+        // 'sdiv C, x' produces [-|C|, |C|].
+        Upper = C->abs() + 1;
+        Lower = (-Upper) + 1;
+      }
+    }
+    break;
+
+  case Instruction::UDiv:
+    if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
+      // 'udiv x, C' produces [0, UINT_MAX / C].
+      Upper = APInt::getMaxValue(Width).udiv(*C) + 1;
+    } else if (match(BO.getOperand(0), m_APInt(C))) {
+      // 'udiv C, x' produces [0, C].
+      Upper = *C + 1;
+    }
+    break;
+
+  case Instruction::SRem:
+    if (match(BO.getOperand(1), m_APInt(C))) {
+      // 'srem x, C' produces (-|C|, |C|).
+      Upper = C->abs();
+      Lower = (-Upper) + 1;
+    }
+    break;
+
+  case Instruction::URem:
+    if (match(BO.getOperand(1), m_APInt(C)))
+      // 'urem x, C' produces [0, C).
+      Upper = *C;
+    break;
+
+  default:
+    break;
+  }
+}
+
+static void setLimitsForIntrinsic(const IntrinsicInst &II, APInt &Lower,
+                                  APInt &Upper) {
+  unsigned Width = Lower.getBitWidth();
+  const APInt *C;
+  switch (II.getIntrinsicID()) {
+  case Intrinsic::uadd_sat:
+    // uadd.sat(x, C) produces [C, UINT_MAX].
+    if (match(II.getOperand(0), m_APInt(C)) ||
+        match(II.getOperand(1), m_APInt(C)))
+      Lower = *C;
+    break;
+  case Intrinsic::sadd_sat:
+    if (match(II.getOperand(0), m_APInt(C)) ||
+        match(II.getOperand(1), m_APInt(C))) {
+      if (C->isNegative()) {
+        // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)].
+        Lower = APInt::getSignedMinValue(Width);
+        Upper = APInt::getSignedMaxValue(Width) + *C + 1;
+      } else {
+        // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX].
+        Lower = APInt::getSignedMinValue(Width) + *C;
+        Upper = APInt::getSignedMaxValue(Width) + 1;
+      }
+    }
+    break;
+  case Intrinsic::usub_sat:
+    // usub.sat(C, x) produces [0, C].
+    if (match(II.getOperand(0), m_APInt(C)))
+      Upper = *C + 1;
+    // usub.sat(x, C) produces [0, UINT_MAX - C].
+    else if (match(II.getOperand(1), m_APInt(C)))
+      Upper = APInt::getMaxValue(Width) - *C + 1;
+    break;
+  case Intrinsic::ssub_sat:
+    if (match(II.getOperand(0), m_APInt(C))) {
+      if (C->isNegative()) {
+        // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)].
+        Lower = APInt::getSignedMinValue(Width);
+        Upper = *C - APInt::getSignedMinValue(Width) + 1;
+      } else {
+        // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX].
+        Lower = *C - APInt::getSignedMaxValue(Width);
+        Upper = APInt::getSignedMaxValue(Width) + 1;
+      }
+    } else if (match(II.getOperand(1), m_APInt(C))) {
+      if (C->isNegative()) {
+        // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]:
+        Lower = APInt::getSignedMinValue(Width) - *C;
+        Upper = APInt::getSignedMaxValue(Width) + 1;
+      } else {
+        // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C].
+        Lower = APInt::getSignedMinValue(Width);
+        Upper = APInt::getSignedMaxValue(Width) - *C + 1;
+      }
+    }
+    break;
+  default:
+    break;
+  }
+}
+
+static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower,
+                                      APInt &Upper) {
+  const Value *LHS, *RHS;
+  SelectPatternResult R = matchSelectPattern(&SI, LHS, RHS);
+  if (R.Flavor == SPF_UNKNOWN)
+    return;
+
+  unsigned BitWidth = SI.getType()->getScalarSizeInBits();
+
+  if (R.Flavor == SelectPatternFlavor::SPF_ABS) {
+    // If the negation part of the abs (in RHS) has the NSW flag,
+    // then the result of abs(X) is [0..SIGNED_MAX],
+    // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
+    Lower = APInt::getNullValue(BitWidth);
+    if (cast<Instruction>(RHS)->hasNoSignedWrap())
+      Upper = APInt::getSignedMaxValue(BitWidth) + 1;
+    else
+      Upper = APInt::getSignedMinValue(BitWidth) + 1;
+    return;
+  }
+
+  if (R.Flavor == SelectPatternFlavor::SPF_NABS) {
+    // The result of -abs(X) is <= 0.
+    Lower = APInt::getSignedMinValue(BitWidth);
+    Upper = APInt(BitWidth, 1);
+    return;
+  }
+
+  const APInt *C;
+  if (!match(LHS, m_APInt(C)) && !match(RHS, m_APInt(C)))
+    return;
+
+  switch (R.Flavor) {
+    case SPF_UMIN:
+      Upper = *C + 1;
+      break;
+    case SPF_UMAX:
+      Lower = *C;
+      break;
+    case SPF_SMIN:
+      Lower = APInt::getSignedMinValue(BitWidth);
+      Upper = *C + 1;
+      break;
+    case SPF_SMAX:
+      Lower = *C;
+      Upper = APInt::getSignedMaxValue(BitWidth) + 1;
+      break;
+    default:
+      break;
+  }
+}
+
+ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo) {
+  assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction");
+
+  const APInt *C;
+  if (match(V, m_APInt(C)))
+    return ConstantRange(*C);
+
+  InstrInfoQuery IIQ(UseInstrInfo);
+  unsigned BitWidth = V->getType()->getScalarSizeInBits();
+  APInt Lower = APInt(BitWidth, 0);
+  APInt Upper = APInt(BitWidth, 0);
+  if (auto *BO = dyn_cast<BinaryOperator>(V))
+    setLimitsForBinOp(*BO, Lower, Upper, IIQ);
+  else if (auto *II = dyn_cast<IntrinsicInst>(V))
+    setLimitsForIntrinsic(*II, Lower, Upper);
+  else if (auto *SI = dyn_cast<SelectInst>(V))
+    setLimitsForSelectPattern(*SI, Lower, Upper);
+
+  ConstantRange CR = ConstantRange::getNonEmpty(Lower, Upper);
+
+  if (auto *I = dyn_cast<Instruction>(V))
+    if (auto *Range = IIQ.getMetadata(I, LLVMContext::MD_range))
+      CR = CR.intersectWith(getConstantRangeFromMetadata(*Range));
+
+  return CR;
+}
diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp
index 5656a19d7e0d..986756eb2627 100644
--- a/lib/Analysis/VectorUtils.cpp
+++ b/lib/Analysis/VectorUtils.cpp
@@ -1,9 +1,8 @@
 //===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -38,8 +37,9 @@ static cl::opt<unsigned> MaxInterleaveGroupFactor(
     cl::init(8));
 
 /// Return true if all of the intrinsic's arguments and return type are scalars
-/// for the scalar form of the intrinsic and vectors for the vector form of the
-/// intrinsic.
+/// for the scalar form of the intrinsic, and vectors for the vector form of the
+/// intrinsic (except operands that are marked as always being scalar by
+/// hasVectorInstrinsicScalarOpd).
 bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
   switch (ID) {
   case Intrinsic::bswap: // Begin integer bit-manipulation.
@@ -49,6 +49,13 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
   case Intrinsic::cttz:
   case Intrinsic::fshl:
   case Intrinsic::fshr:
+  case Intrinsic::sadd_sat:
+  case Intrinsic::ssub_sat:
+  case Intrinsic::uadd_sat:
+  case Intrinsic::usub_sat:
+  case Intrinsic::smul_fix:
+  case Intrinsic::smul_fix_sat:
+  case Intrinsic::umul_fix:
   case Intrinsic::sqrt: // Begin floating-point.
   case Intrinsic::sin:
   case Intrinsic::cos:
@@ -74,18 +81,13 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
   case Intrinsic::fmuladd:
   case Intrinsic::powi:
   case Intrinsic::canonicalize:
-  case Intrinsic::sadd_sat:
-  case Intrinsic::ssub_sat:
-  case Intrinsic::uadd_sat:
-  case Intrinsic::usub_sat:
     return true;
   default:
     return false;
   }
 }
 
-/// Identifies if the intrinsic has a scalar operand. It check for
-/// ctlz,cttz and powi special intrinsics whose argument is scalar.
+/// Identifies if the vector form of the intrinsic has a scalar operand.
 bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
                                         unsigned ScalarOpdIdx) {
   switch (ID) {
@@ -93,6 +95,10 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
   case Intrinsic::cttz:
   case Intrinsic::powi:
     return (ScalarOpdIdx == 1);
+  case Intrinsic::smul_fix:
+  case Intrinsic::smul_fix_sat:
+  case Intrinsic::umul_fix:
+    return (ScalarOpdIdx == 2);
   default:
     return false;
   }
@@ -300,30 +306,60 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
 
 /// Get splat value if the input is a splat vector or return nullptr.
 /// This function is not fully general. It checks only 2 cases:
-/// the input value is (1) a splat constants vector or (2) a sequence
-/// of instructions that broadcast a single value into a vector.
-///
+/// the input value is (1) a splat constant vector or (2) a sequence
+/// of instructions that broadcasts a scalar at element 0.
 const llvm::Value *llvm::getSplatValue(const Value *V) {
-
-  if (auto *C = dyn_cast<Constant>(V))
-    if (isa<VectorType>(V->getType()))
+  if (isa<VectorType>(V->getType()))
+    if (auto *C = dyn_cast<Constant>(V))
       return C->getSplatValue();
 
-  auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V);
-  if (!ShuffleInst)
-    return nullptr;
-  // All-zero (or undef) shuffle mask elements.
-  for (int MaskElt : ShuffleInst->getShuffleMask())
-    if (MaskElt != 0 && MaskElt != -1)
-      return nullptr;
-  // The first shuffle source is 'insertelement' with index 0.
-  auto *InsertEltInst =
-    dyn_cast<InsertElementInst>(ShuffleInst->getOperand(0));
-  if (!InsertEltInst || !isa<ConstantInt>(InsertEltInst->getOperand(2)) ||
-      !cast<ConstantInt>(InsertEltInst->getOperand(2))->isZero())
-    return nullptr;
+  // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>
+  Value *Splat;
+  if (match(V, m_ShuffleVector(m_InsertElement(m_Value(), m_Value(Splat),
+                                               m_ZeroInt()),
+                               m_Value(), m_ZeroInt())))
+    return Splat;
 
-  return InsertEltInst->getOperand(1);
+  return nullptr;
+}
+
+// This setting is based on its counterpart in value tracking, but it could be
+// adjusted if needed.
+const unsigned MaxDepth = 6;
+
+bool llvm::isSplatValue(const Value *V, unsigned Depth) {
+  assert(Depth <= MaxDepth && "Limit Search Depth");
+
+  if (isa<VectorType>(V->getType())) {
+    if (isa<UndefValue>(V))
+      return true;
+    // FIXME: Constant splat analysis does not allow undef elements.
+    if (auto *C = dyn_cast<Constant>(V))
+      return C->getSplatValue() != nullptr;
+  }
+
+  // FIXME: Constant splat analysis does not allow undef elements.
+  Constant *Mask;
+  if (match(V, m_ShuffleVector(m_Value(), m_Value(), m_Constant(Mask))))
+    return Mask->getSplatValue() != nullptr;
+
+  // The remaining tests are all recursive, so bail out if we hit the limit.
+  if (Depth++ == MaxDepth)
+    return false;
+
+  // If both operands of a binop are splats, the result is a splat.
+  Value *X, *Y, *Z;
+  if (match(V, m_BinOp(m_Value(X), m_Value(Y))))
+    return isSplatValue(X, Depth) && isSplatValue(Y, Depth);
+
+  // If all operands of a select are splats, the result is a splat.
+  if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))
+    return isSplatValue(X, Depth) && isSplatValue(Y, Depth) &&
+           isSplatValue(Z, Depth);
+
+  // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).
+
+  return false;
 }
 
 MapVector<Instruction *, uint64_t>
@@ -711,6 +747,52 @@ Value *llvm::concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs) {
   return ResList[0];
 }
 
+bool llvm::maskIsAllZeroOrUndef(Value *Mask) {
+  auto *ConstMask = dyn_cast<Constant>(Mask);
+  if (!ConstMask)
+    return false;
+  if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
+    return true;
+  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
+       ++I) {
+    if (auto *MaskElt = ConstMask->getAggregateElement(I))
+      if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
+        continue;
+    return false;
+  }
+  return true;
+}
+
+
+bool llvm::maskIsAllOneOrUndef(Value *Mask) {
+  auto *ConstMask = dyn_cast<Constant>(Mask);
+  if (!ConstMask)
+    return false;
+  if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
+    return true;
+  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
+       ++I) {
+    if (auto *MaskElt = ConstMask->getAggregateElement(I))
+      if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
+        continue;
+    return false;
+  }
+  return true;
+}
+
+/// TODO: This is a lot like known bits, but for
+/// vectors.  Is there something we can common this with?
+APInt llvm::possiblyDemandedEltsInMask(Value *Mask) {
+
+  const unsigned VWidth = cast<VectorType>(Mask->getType())->getNumElements();
+  APInt DemandedElts = APInt::getAllOnesValue(VWidth);
+  if (auto *CV = dyn_cast<ConstantVector>(Mask))
+    for (unsigned i = 0; i < VWidth; i++)
+      if (CV->getAggregateElement(i)->isNullValue())
+        DemandedElts.clearBit(i);
+  return DemandedElts;
+}
+
 bool InterleavedAccessInfo::isStrided(int Stride) {
   unsigned Factor = std::abs(Stride);
   return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
@@ -992,7 +1074,7 @@ void InterleavedAccessInfo::analyzeInterleaving(
     // that all the pointers in the group don't wrap.
     // So we check only group member 0 (which is always guaranteed to exist),
     // and group member Factor - 1; If the latter doesn't exist we rely on
-    // peeling (if it is a non-reveresed accsess -- see Case 3).
+    // peeling (if it is a non-reversed accsess -- see Case 3).
     Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
     if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
                       /*ShouldCheckWrap=*/true)) {
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index eab7ec819536..72d2357c2933 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -1,9 +1,8 @@
 //===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -571,6 +570,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(align);
   KEYWORD(addrspace);
   KEYWORD(section);
+  KEYWORD(partition);
   KEYWORD(alias);
   KEYWORD(ifunc);
   KEYWORD(module);
@@ -650,6 +650,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(nobuiltin);
   KEYWORD(nocapture);
   KEYWORD(noduplicate);
+  KEYWORD(nofree);
   KEYWORD(noimplicitfloat);
   KEYWORD(noinline);
   KEYWORD(norecurse);
@@ -657,6 +658,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(nonnull);
   KEYWORD(noredzone);
   KEYWORD(noreturn);
+  KEYWORD(nosync);
   KEYWORD(nocf_check);
   KEYWORD(nounwind);
   KEYWORD(optforfuzzing);
@@ -677,14 +679,17 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(shadowcallstack);
   KEYWORD(sanitize_address);
   KEYWORD(sanitize_hwaddress);
+  KEYWORD(sanitize_memtag);
   KEYWORD(sanitize_thread);
   KEYWORD(sanitize_memory);
   KEYWORD(speculative_load_hardening);
   KEYWORD(swifterror);
   KEYWORD(swiftself);
   KEYWORD(uwtable);
+  KEYWORD(willreturn);
   KEYWORD(writeonly);
   KEYWORD(zeroext);
+  KEYWORD(immarg);
 
   KEYWORD(type);
   KEYWORD(opaque);
@@ -706,6 +711,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
   KEYWORD(umin);
 
+  KEYWORD(vscale);
   KEYWORD(x);
   KEYWORD(blockaddress);
 
@@ -733,6 +739,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(notEligibleToImport);
   KEYWORD(live);
   KEYWORD(dsoLocal);
+  KEYWORD(canAutoHide);
   KEYWORD(function);
   KEYWORD(insts);
   KEYWORD(funcFlags);
@@ -749,6 +756,8 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(critical);
   KEYWORD(relbf);
   KEYWORD(variable);
+  KEYWORD(vTableFuncs);
+  KEYWORD(virtFunc);
   KEYWORD(aliasee);
   KEYWORD(refs);
   KEYWORD(typeIdInfo);
@@ -761,6 +770,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(offset);
   KEYWORD(args);
   KEYWORD(typeid);
+  KEYWORD(typeidCompatibleVTable);
   KEYWORD(summary);
   KEYWORD(typeTestRes);
   KEYWORD(kind);
@@ -859,6 +869,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   INSTKEYWORD(invoke,      Invoke);
   INSTKEYWORD(resume,      Resume);
   INSTKEYWORD(unreachable, Unreachable);
+  INSTKEYWORD(callbr,      CallBr);
 
   INSTKEYWORD(alloca,      Alloca);
   INSTKEYWORD(load,        Load);
@@ -1047,7 +1058,17 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
   for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
     /*empty*/;
 
-  // Check to see if this really is a label afterall, e.g. "-1:".
+  // Check if this is a fully-numeric label:
+  if (isdigit(TokStart[0]) && CurPtr[0] == ':') {
+    uint64_t Val = atoull(TokStart, CurPtr);
+    ++CurPtr; // Skip the colon.
+    if ((unsigned)Val != Val)
+      Error("invalid value number (too large)!");
+    UIntVal = unsigned(Val);
+    return lltok::LabelID;
+  }
+
+  // Check to see if this really is a string label, e.g. "-1:".
   if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
     if (const char *End = isLabelTail(CurPtr)) {
       StrVal.assign(TokStart, End-1);
diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h
index 21deb6e08910..4d3a2920e937 100644
--- a/lib/AsmParser/LLLexer.h
+++ b/lib/AsmParser/LLLexer.h
@@ -1,9 +1,8 @@
 //===- LLLexer.h - Lexer for LLVM Assembly Files ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index ee634505581e..87dff6468f2d 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -1,9 +1,8 @@
 //===-- LLParser.cpp - Parser Class ---------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -164,6 +163,14 @@ bool LLParser::ValidateEndOfModule() {
       AS = AS.addAttributes(Context, AttributeList::FunctionIndex,
                             AttributeSet::get(Context, FnAttrs));
       II->setAttributes(AS);
+    } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(V)) {
+      AttributeList AS = CBI->getAttributes();
+      AttrBuilder FnAttrs(AS.getFnAttributes());
+      AS = AS.removeAttributes(Context, AttributeList::FunctionIndex);
+      FnAttrs.merge(B);
+      AS = AS.addAttributes(Context, AttributeList::FunctionIndex,
+                            AttributeSet::get(Context, FnAttrs));
+      CBI->setAttributes(AS);
     } else if (auto *GV = dyn_cast<GlobalVariable>(V)) {
       AttrBuilder Attrs(GV->getAttributes());
       Attrs.merge(B);
@@ -814,19 +821,26 @@ bool LLParser::ParseSummaryEntry() {
   if (!Index)
     return SkipModuleSummaryEntry();
 
+  bool result = false;
   switch (Lex.getKind()) {
   case lltok::kw_gv:
-    return ParseGVEntry(SummaryID);
+    result = ParseGVEntry(SummaryID);
+    break;
   case lltok::kw_module:
-    return ParseModuleEntry(SummaryID);
+    result = ParseModuleEntry(SummaryID);
+    break;
   case lltok::kw_typeid:
-    return ParseTypeIdEntry(SummaryID);
+    result = ParseTypeIdEntry(SummaryID);
+    break;
+  case lltok::kw_typeidCompatibleVTable:
+    result = ParseTypeIdCompatibleVtableEntry(SummaryID);
     break;
   default:
-    return Error(Lex.getLoc(), "unexpected summary kind");
+    result = Error(Lex.getLoc(), "unexpected summary kind");
+    break;
   }
   Lex.setIgnoreColonInIdentifiers(false);
-  return false;
+  return result;
 }
 
 static bool isValidVisibilityForLinkage(unsigned V, unsigned L) {
@@ -845,11 +859,14 @@ static void maybeSetDSOLocal(bool DSOLocal, GlobalValue &GV) {
 ///   ::= GlobalVar '=' OptionalLinkage OptionalPreemptionSpecifier
 ///                     OptionalVisibility OptionalDLLStorageClass
 ///                     OptionalThreadLocal OptionalUnnamedAddr
-//                      'alias|ifunc' IndirectSymbol
+///                     'alias|ifunc' IndirectSymbol IndirectSymbolAttr*
 ///
 /// IndirectSymbol
 ///   ::= TypeAndValue
 ///
+/// IndirectSymbolAttr
+///   ::= ',' 'partition' StringConstant
+///
 /// Everything through OptionalUnnamedAddr has already been parsed.
 ///
 bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
@@ -949,6 +966,21 @@ bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
   GA->setUnnamedAddr(UnnamedAddr);
   maybeSetDSOLocal(DSOLocal, *GA);
 
+  // At this point we've parsed everything except for the IndirectSymbolAttrs.
+  // Now parse them if there are any.
+  while (Lex.getKind() == lltok::comma) {
+    Lex.Lex();
+
+    if (Lex.getKind() == lltok::kw_partition) {
+      Lex.Lex();
+      GA->setPartition(Lex.getStrVal());
+      if (ParseToken(lltok::StringConstant, "expected partition string"))
+        return true;
+    } else {
+      return TokError("unknown alias or ifunc property!");
+    }
+  }
+
   if (Name.empty())
     NumberedVals.push_back(GA.get());
 
@@ -1084,6 +1116,11 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
       GV->setSection(Lex.getStrVal());
       if (ParseToken(lltok::StringConstant, "expected global section string"))
         return true;
+    } else if (Lex.getKind() == lltok::kw_partition) {
+      Lex.Lex();
+      GV->setPartition(Lex.getStrVal());
+      if (ParseToken(lltok::StringConstant, "expected partition string"))
+        return true;
     } else if (Lex.getKind() == lltok::kw_align) {
       unsigned Alignment;
       if (ParseOptionalAlignment(Alignment)) return true;
@@ -1243,12 +1280,14 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
     case lltok::kw_naked: B.addAttribute(Attribute::Naked); break;
     case lltok::kw_nobuiltin: B.addAttribute(Attribute::NoBuiltin); break;
     case lltok::kw_noduplicate: B.addAttribute(Attribute::NoDuplicate); break;
+    case lltok::kw_nofree: B.addAttribute(Attribute::NoFree); break;
     case lltok::kw_noimplicitfloat:
       B.addAttribute(Attribute::NoImplicitFloat); break;
     case lltok::kw_noinline: B.addAttribute(Attribute::NoInline); break;
     case lltok::kw_nonlazybind: B.addAttribute(Attribute::NonLazyBind); break;
     case lltok::kw_noredzone: B.addAttribute(Attribute::NoRedZone); break;
     case lltok::kw_noreturn: B.addAttribute(Attribute::NoReturn); break;
+    case lltok::kw_nosync: B.addAttribute(Attribute::NoSync); break;
     case lltok::kw_nocf_check: B.addAttribute(Attribute::NoCfCheck); break;
     case lltok::kw_norecurse: B.addAttribute(Attribute::NoRecurse); break;
     case lltok::kw_nounwind: B.addAttribute(Attribute::NoUnwind); break;
@@ -1272,6 +1311,8 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
       B.addAttribute(Attribute::SanitizeAddress); break;
     case lltok::kw_sanitize_hwaddress:
       B.addAttribute(Attribute::SanitizeHWAddress); break;
+    case lltok::kw_sanitize_memtag:
+      B.addAttribute(Attribute::SanitizeMemTag); break;
     case lltok::kw_sanitize_thread:
       B.addAttribute(Attribute::SanitizeThread); break;
     case lltok::kw_sanitize_memory:
@@ -1281,6 +1322,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
       break;
     case lltok::kw_strictfp: B.addAttribute(Attribute::StrictFP); break;
     case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break;
+    case lltok::kw_willreturn: B.addAttribute(Attribute::WillReturn); break;
     case lltok::kw_writeonly: B.addAttribute(Attribute::WriteOnly); break;
 
     // Error handling.
@@ -1303,6 +1345,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
     case lltok::kw_sret:
     case lltok::kw_swifterror:
     case lltok::kw_swiftself:
+    case lltok::kw_immarg:
       HaveError |=
         Error(Lex.getLoc(),
               "invalid use of parameter-only attribute on a function");
@@ -1566,7 +1609,13 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
       B.addAlignmentAttr(Alignment);
       continue;
     }
-    case lltok::kw_byval:           B.addAttribute(Attribute::ByVal); break;
+    case lltok::kw_byval: {
+      Type *Ty;
+      if (ParseByValWithOptionalType(Ty))
+        return true;
+      B.addByValAttr(Ty);
+      continue;
+    }
     case lltok::kw_dereferenceable: {
       uint64_t Bytes;
       if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes))
@@ -1596,6 +1645,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
     case lltok::kw_swiftself:       B.addAttribute(Attribute::SwiftSelf); break;
     case lltok::kw_writeonly:       B.addAttribute(Attribute::WriteOnly); break;
     case lltok::kw_zeroext:         B.addAttribute(Attribute::ZExt); break;
+    case lltok::kw_immarg:          B.addAttribute(Attribute::ImmArg); break;
 
     case lltok::kw_alignstack:
     case lltok::kw_alwaysinline:
@@ -1620,6 +1670,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
     case lltok::kw_returns_twice:
     case lltok::kw_sanitize_address:
     case lltok::kw_sanitize_hwaddress:
+    case lltok::kw_sanitize_memtag:
     case lltok::kw_sanitize_memory:
     case lltok::kw_sanitize_thread:
     case lltok::kw_speculative_load_hardening:
@@ -1690,6 +1741,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
     case lltok::kw_sret:
     case lltok::kw_swifterror:
     case lltok::kw_swiftself:
+    case lltok::kw_immarg:
       HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute");
       break;
 
@@ -1717,6 +1769,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
     case lltok::kw_returns_twice:
     case lltok::kw_sanitize_address:
     case lltok::kw_sanitize_hwaddress:
+    case lltok::kw_sanitize_memtag:
     case lltok::kw_sanitize_memory:
     case lltok::kw_sanitize_thread:
     case lltok::kw_speculative_load_hardening:
@@ -2417,6 +2470,22 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
   return false;
 }
 
+/// ParseByValWithOptionalType
+///   ::= byval
+///   ::= byval(<ty>)
+bool LLParser::ParseByValWithOptionalType(Type *&Result) {
+  Result = nullptr;
+  if (!EatIfPresent(lltok::kw_byval))
+    return true;
+  if (!EatIfPresent(lltok::lparen))
+    return false;
+  if (ParseType(Result))
+    return true;
+  if (!EatIfPresent(lltok::rparen))
+    return Error(Lex.getLoc(), "expected ')'");
+  return false;
+}
+
 /// ParseOptionalOperandBundles
 ///    ::= /*empty*/
 ///    ::= '[' OperandBundle [, OperandBundle ]* ']'
@@ -2684,7 +2753,18 @@ bool LLParser::ParseStructBody(SmallVectorImpl<Type*> &Body) {
 ///   Type
 ///     ::= '[' APSINTVAL 'x' Types ']'
 ///     ::= '<' APSINTVAL 'x' Types '>'
+///     ::= '<' 'vscale' 'x' APSINTVAL 'x' Types '>'
 bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) {
+  bool Scalable = false;
+
+  if (isVector && Lex.getKind() == lltok::kw_vscale) {
+    Lex.Lex(); // consume the 'vscale'
+    if (ParseToken(lltok::kw_x, "expected 'x' after vscale"))
+      return true;
+
+    Scalable = true;
+  }
+
   if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned() ||
       Lex.getAPSIntVal().getBitWidth() > 64)
     return TokError("expected number in address space");
@@ -2711,7 +2791,7 @@ bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) {
       return Error(SizeLoc, "size too large for vector");
     if (!VectorType::isValidElementType(EltTy))
       return Error(TypeLoc, "invalid vector element type");
-    Result = VectorType::get(EltTy, unsigned(Size));
+    Result = VectorType::get(EltTy, unsigned(Size), Scalable);
   } else {
     if (!ArrayType::isValidElementType(EltTy))
       return Error(TypeLoc, "invalid array element type");
@@ -2916,13 +2996,27 @@ BasicBlock *LLParser::PerFunctionState::GetBB(unsigned ID, LocTy Loc) {
 /// unnamed.  If there is an error, this returns null otherwise it returns
 /// the block being defined.
 BasicBlock *LLParser::PerFunctionState::DefineBB(const std::string &Name,
-                                                 LocTy Loc) {
+                                                 int NameID, LocTy Loc) {
   BasicBlock *BB;
-  if (Name.empty())
+  if (Name.empty()) {
+    if (NameID != -1 && unsigned(NameID) != NumberedVals.size()) {
+      P.Error(Loc, "label expected to be numbered '" +
+                       Twine(NumberedVals.size()) + "'");
+      return nullptr;
+    }
     BB = GetBB(NumberedVals.size(), Loc);
-  else
+    if (!BB) {
+      P.Error(Loc, "unable to create block numbered '" +
+                       Twine(NumberedVals.size()) + "'");
+      return nullptr;
+    }
+  } else {
     BB = GetBB(Name, Loc);
-  if (!BB) return nullptr; // Already diagnosed error.
+    if (!BB) {
+      P.Error(Loc, "unable to create block named '" + Name + "'");
+      return nullptr;
+    }
+  }
 
   // Move the block to the end of the function.  Forward ref'd blocks are
   // inserted wherever they happen to be referenced.
@@ -3342,7 +3436,6 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     unsigned Opc = Lex.getUIntVal();
     Constant *Val0, *Val1;
     Lex.Lex();
-    LocTy ModifierLoc = Lex.getLoc();
     if (Opc == Instruction::Add || Opc == Instruction::Sub ||
         Opc == Instruction::Mul || Opc == Instruction::Shl) {
       if (EatIfPresent(lltok::kw_nuw))
@@ -3365,12 +3458,6 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       return true;
     if (Val0->getType() != Val1->getType())
       return Error(ID.Loc, "operands of constexpr must have same type");
-    if (!Val0->getType()->isIntOrIntVectorTy()) {
-      if (NUW)
-        return Error(ModifierLoc, "nuw only applies to integer operations");
-      if (NSW)
-        return Error(ModifierLoc, "nsw only applies to integer operations");
-    }
     // Check that the type is valid for the operator.
     switch (Opc) {
     case Instruction::Add:
@@ -4640,6 +4727,24 @@ bool LLParser::ParseDILexicalBlockFile(MDNode *&Result, bool IsDistinct) {
   return false;
 }
 
+/// ParseDICommonBlock:
+///   ::= !DICommonBlock(scope: !0, file: !2, name: "COMMON name", line: 9)
+bool LLParser::ParseDICommonBlock(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  REQUIRED(scope, MDField, );                                                  \
+  OPTIONAL(declaration, MDField, );                                            \
+  OPTIONAL(name, MDStringField, );                                             \
+  OPTIONAL(file, MDField, );                                                   \
+  OPTIONAL(line, LineField, );						       
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(DICommonBlock,
+                           (Context, scope.Val, declaration.Val, name.Val,
+                            file.Val, line.Val));
+  return false;
+}
+
 /// ParseDINamespace:
 ///   ::= !DINamespace(scope: !0, file: !2, name: "SomeNamespace", line: 9)
 bool LLParser::ParseDINamespace(MDNode *&Result, bool IsDistinct) {
@@ -4828,6 +4933,15 @@ bool LLParser::ParseDIExpression(MDNode *&Result, bool IsDistinct) {
         return TokError(Twine("invalid DWARF op '") + Lex.getStrVal() + "'");
       }
 
+      if (Lex.getKind() == lltok::DwarfAttEncoding) {
+        if (unsigned Op = dwarf::getAttributeEncoding(Lex.getStrVal())) {
+          Lex.Lex();
+          Elements.push_back(Op);
+          continue;
+        }
+        return TokError(Twine("invalid DWARF attribute encoding '") + Lex.getStrVal() + "'");
+      }
+
       if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned())
         return TokError("expected unsigned integer");
 
@@ -5239,6 +5353,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   std::vector<unsigned> FwdRefAttrGrps;
   LocTy BuiltinLoc;
   std::string Section;
+  std::string Partition;
   unsigned Alignment;
   std::string GC;
   GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None;
@@ -5255,6 +5370,8 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
                                  BuiltinLoc) ||
       (EatIfPresent(lltok::kw_section) &&
        ParseStringConstant(Section)) ||
+      (EatIfPresent(lltok::kw_partition) &&
+       ParseStringConstant(Partition)) ||
       parseOptionalComdat(FunctionName, C) ||
       ParseOptionalAlignment(Alignment) ||
       (EatIfPresent(lltok::kw_gc) &&
@@ -5356,6 +5473,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   Fn->setUnnamedAddr(UnnamedAddr);
   Fn->setAlignment(Alignment);
   Fn->setSection(Section);
+  Fn->setPartition(Partition);
   Fn->setComdat(C);
   Fn->setPersonalityFn(PersonalityFn);
   if (!GC.empty()) Fn->setGC(GC);
@@ -5470,20 +5588,23 @@ bool LLParser::ParseFunctionBody(Function &Fn) {
 }
 
 /// ParseBasicBlock
-///   ::= LabelStr? Instruction*
+///   ::= (LabelStr|LabelID)? Instruction*
 bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
   // If this basic block starts out with a name, remember it.
   std::string Name;
+  int NameID = -1;
   LocTy NameLoc = Lex.getLoc();
   if (Lex.getKind() == lltok::LabelStr) {
     Name = Lex.getStrVal();
     Lex.Lex();
+  } else if (Lex.getKind() == lltok::LabelID) {
+    NameID = Lex.getUIntVal();
+    Lex.Lex();
   }
 
-  BasicBlock *BB = PFS.DefineBB(Name, NameLoc);
+  BasicBlock *BB = PFS.DefineBB(Name, NameID, NameLoc);
   if (!BB)
-    return Error(NameLoc,
-                 "unable to create block named '" + Name + "'");
+    return true;
 
   std::string NameStr;
 
@@ -5567,10 +5688,11 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   case lltok::kw_catchswitch: return ParseCatchSwitch(Inst, PFS);
   case lltok::kw_catchpad:    return ParseCatchPad(Inst, PFS);
   case lltok::kw_cleanuppad:  return ParseCleanupPad(Inst, PFS);
+  case lltok::kw_callbr:      return ParseCallBr(Inst, PFS);
   // Unary Operators.
   case lltok::kw_fneg: {
     FastMathFlags FMF = EatFastMathFlagsIfPresent();
-    int Res = ParseUnaryOp(Inst, PFS, KeywordVal, 2);
+    int Res = ParseUnaryOp(Inst, PFS, KeywordVal, /*IsFP*/true);
     if (Res != 0)
       return Res;
     if (FMF.any())
@@ -5586,7 +5708,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
     bool NSW = EatIfPresent(lltok::kw_nsw);
     if (!NUW) NUW = EatIfPresent(lltok::kw_nuw);
 
-    if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true;
+    if (ParseArithmetic(Inst, PFS, KeywordVal, /*IsFP*/false)) return true;
 
     if (NUW) cast<BinaryOperator>(Inst)->setHasNoUnsignedWrap(true);
     if (NSW) cast<BinaryOperator>(Inst)->setHasNoSignedWrap(true);
@@ -5598,7 +5720,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   case lltok::kw_fdiv:
   case lltok::kw_frem: {
     FastMathFlags FMF = EatFastMathFlagsIfPresent();
-    int Res = ParseArithmetic(Inst, PFS, KeywordVal, 2);
+    int Res = ParseArithmetic(Inst, PFS, KeywordVal, /*IsFP*/true);
     if (Res != 0)
       return Res;
     if (FMF.any())
@@ -5612,13 +5734,14 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   case lltok::kw_ashr: {
     bool Exact = EatIfPresent(lltok::kw_exact);
 
-    if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true;
+    if (ParseArithmetic(Inst, PFS, KeywordVal, /*IsFP*/false)) return true;
     if (Exact) cast<BinaryOperator>(Inst)->setIsExact(true);
     return false;
   }
 
   case lltok::kw_urem:
-  case lltok::kw_srem:   return ParseArithmetic(Inst, PFS, KeywordVal, 1);
+  case lltok::kw_srem:   return ParseArithmetic(Inst, PFS, KeywordVal,
+                                                /*IsFP*/false);
   case lltok::kw_and:
   case lltok::kw_or:
   case lltok::kw_xor:    return ParseLogical(Inst, PFS, KeywordVal);
@@ -5648,7 +5771,19 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   case lltok::kw_inttoptr:
   case lltok::kw_ptrtoint:       return ParseCast(Inst, PFS, KeywordVal);
   // Other.
-  case lltok::kw_select:         return ParseSelect(Inst, PFS);
+  case lltok::kw_select: {
+    FastMathFlags FMF = EatFastMathFlagsIfPresent();
+    int Res = ParseSelect(Inst, PFS);
+    if (Res != 0)
+      return Res;
+    if (FMF.any()) {
+      if (!Inst->getType()->isFPOrFPVectorTy())
+        return Error(Loc, "fast-math-flags specified for select without "
+                          "floating-point scalar or vector return type");
+      Inst->setFastMathFlags(FMF);
+    }
+    return 0;
+  }
   case lltok::kw_va_arg:         return ParseVA_Arg(Inst, PFS);
   case lltok::kw_extractelement: return ParseExtractElement(Inst, PFS);
   case lltok::kw_insertelement:  return ParseInsertElement(Inst, PFS);
@@ -6155,28 +6290,16 @@ bool LLParser::ParseCleanupPad(Instruction *&Inst, PerFunctionState &PFS) {
 /// ParseUnaryOp
 ///  ::= UnaryOp TypeAndValue ',' Value
 ///
-/// If OperandType is 0, then any FP or integer operand is allowed.  If it is 1,
-/// then any integer operand is allowed, if it is 2, any fp operand is allowed.
+/// If IsFP is false, then any integer operand is allowed, if it is true, any fp
+/// operand is allowed.
 bool LLParser::ParseUnaryOp(Instruction *&Inst, PerFunctionState &PFS,
-                            unsigned Opc, unsigned OperandType) {
+                            unsigned Opc, bool IsFP) {
   LocTy Loc; Value *LHS;
   if (ParseTypeAndValue(LHS, Loc, PFS))
     return true;
 
-  bool Valid;
-  switch (OperandType) {
-  default: llvm_unreachable("Unknown operand type!");
-  case 0: // int or FP.
-    Valid = LHS->getType()->isIntOrIntVectorTy() ||
-            LHS->getType()->isFPOrFPVectorTy();
-    break;
-  case 1: 
-    Valid = LHS->getType()->isIntOrIntVectorTy(); 
-    break;
-  case 2: 
-    Valid = LHS->getType()->isFPOrFPVectorTy(); 
-    break;
-  }
+  bool Valid = IsFP ? LHS->getType()->isFPOrFPVectorTy()
+                    : LHS->getType()->isIntOrIntVectorTy();
 
   if (!Valid)
     return Error(Loc, "invalid operand type for instruction");
@@ -6185,6 +6308,124 @@ bool LLParser::ParseUnaryOp(Instruction *&Inst, PerFunctionState &PFS,
   return false;
 }
 
+/// ParseCallBr
+///   ::= 'callbr' OptionalCallingConv OptionalAttrs Type Value ParamList
+///       OptionalAttrs OptionalOperandBundles 'to' TypeAndValue
+///       '[' LabelList ']'
+bool LLParser::ParseCallBr(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy CallLoc = Lex.getLoc();
+  AttrBuilder RetAttrs, FnAttrs;
+  std::vector<unsigned> FwdRefAttrGrps;
+  LocTy NoBuiltinLoc;
+  unsigned CC;
+  Type *RetType = nullptr;
+  LocTy RetTypeLoc;
+  ValID CalleeID;
+  SmallVector<ParamInfo, 16> ArgList;
+  SmallVector<OperandBundleDef, 2> BundleList;
+
+  BasicBlock *DefaultDest;
+  if (ParseOptionalCallingConv(CC) || ParseOptionalReturnAttrs(RetAttrs) ||
+      ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
+      ParseValID(CalleeID) || ParseParameterList(ArgList, PFS) ||
+      ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false,
+                                 NoBuiltinLoc) ||
+      ParseOptionalOperandBundles(BundleList, PFS) ||
+      ParseToken(lltok::kw_to, "expected 'to' in callbr") ||
+      ParseTypeAndBasicBlock(DefaultDest, PFS) ||
+      ParseToken(lltok::lsquare, "expected '[' in callbr"))
+    return true;
+
+  // Parse the destination list.
+  SmallVector<BasicBlock *, 16> IndirectDests;
+
+  if (Lex.getKind() != lltok::rsquare) {
+    BasicBlock *DestBB;
+    if (ParseTypeAndBasicBlock(DestBB, PFS))
+      return true;
+    IndirectDests.push_back(DestBB);
+
+    while (EatIfPresent(lltok::comma)) {
+      if (ParseTypeAndBasicBlock(DestBB, PFS))
+        return true;
+      IndirectDests.push_back(DestBB);
+    }
+  }
+
+  if (ParseToken(lltok::rsquare, "expected ']' at end of block list"))
+    return true;
+
+  // If RetType is a non-function pointer type, then this is the short syntax
+  // for the call, which means that RetType is just the return type.  Infer the
+  // rest of the function argument types from the arguments that are present.
+  FunctionType *Ty = dyn_cast<FunctionType>(RetType);
+  if (!Ty) {
+    // Pull out the types of all of the arguments...
+    std::vector<Type *> ParamTypes;
+    for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
+      ParamTypes.push_back(ArgList[i].V->getType());
+
+    if (!FunctionType::isValidReturnType(RetType))
+      return Error(RetTypeLoc, "Invalid result type for LLVM function");
+
+    Ty = FunctionType::get(RetType, ParamTypes, false);
+  }
+
+  CalleeID.FTy = Ty;
+
+  // Look up the callee.
+  Value *Callee;
+  if (ConvertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS,
+                          /*IsCall=*/true))
+    return true;
+
+  if (isa<InlineAsm>(Callee) && !Ty->getReturnType()->isVoidTy())
+    return Error(RetTypeLoc, "asm-goto outputs not supported");
+
+  // Set up the Attribute for the function.
+  SmallVector<Value *, 8> Args;
+  SmallVector<AttributeSet, 8> ArgAttrs;
+
+  // Loop through FunctionType's arguments and ensure they are specified
+  // correctly.  Also, gather any parameter attributes.
+  FunctionType::param_iterator I = Ty->param_begin();
+  FunctionType::param_iterator E = Ty->param_end();
+  for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+    Type *ExpectedTy = nullptr;
+    if (I != E) {
+      ExpectedTy = *I++;
+    } else if (!Ty->isVarArg()) {
+      return Error(ArgList[i].Loc, "too many arguments specified");
+    }
+
+    if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
+      return Error(ArgList[i].Loc, "argument is not of expected type '" +
+                                       getTypeString(ExpectedTy) + "'");
+    Args.push_back(ArgList[i].V);
+    ArgAttrs.push_back(ArgList[i].Attrs);
+  }
+
+  if (I != E)
+    return Error(CallLoc, "not enough parameters specified for call");
+
+  if (FnAttrs.hasAlignmentAttr())
+    return Error(CallLoc, "callbr instructions may not have an alignment");
+
+  // Finish off the Attribute and check them
+  AttributeList PAL =
+      AttributeList::get(Context, AttributeSet::get(Context, FnAttrs),
+                         AttributeSet::get(Context, RetAttrs), ArgAttrs);
+
+  CallBrInst *CBI =
+      CallBrInst::Create(Ty, Callee, DefaultDest, IndirectDests, Args,
+                         BundleList);
+  CBI->setCallingConv(CC);
+  CBI->setAttributes(PAL);
+  ForwardRefAttrGroups[CBI] = FwdRefAttrGrps;
+  Inst = CBI;
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 // Binary Operators.
 //===----------------------------------------------------------------------===//
@@ -6192,26 +6433,18 @@ bool LLParser::ParseUnaryOp(Instruction *&Inst, PerFunctionState &PFS,
 /// ParseArithmetic
 ///  ::= ArithmeticOps TypeAndValue ',' Value
 ///
-/// If OperandType is 0, then any FP or integer operand is allowed.  If it is 1,
-/// then any integer operand is allowed, if it is 2, any fp operand is allowed.
+/// If IsFP is false, then any integer operand is allowed, if it is true, any fp
+/// operand is allowed.
 bool LLParser::ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS,
-                               unsigned Opc, unsigned OperandType) {
+                               unsigned Opc, bool IsFP) {
   LocTy Loc; Value *LHS, *RHS;
   if (ParseTypeAndValue(LHS, Loc, PFS) ||
       ParseToken(lltok::comma, "expected ',' in arithmetic operation") ||
       ParseValue(LHS->getType(), RHS, PFS))
     return true;
 
-  bool Valid;
-  switch (OperandType) {
-  default: llvm_unreachable("Unknown operand type!");
-  case 0: // int or FP.
-    Valid = LHS->getType()->isIntOrIntVectorTy() ||
-            LHS->getType()->isFPOrFPVectorTy();
-    break;
-  case 1: Valid = LHS->getType()->isIntOrIntVectorTy(); break;
-  case 2: Valid = LHS->getType()->isFPOrFPVectorTy(); break;
-  }
+  bool Valid = IsFP ? LHS->getType()->isFPOrFPVectorTy()
+                    : LHS->getType()->isIntOrIntVectorTy();
 
   if (!Valid)
     return Error(Loc, "invalid operand type for instruction");
@@ -6816,6 +7049,7 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
   SyncScope::ID SSID = SyncScope::System;
   bool isVolatile = false;
+  bool IsFP = false;
   AtomicRMWInst::BinOp Operation;
 
   if (EatIfPresent(lltok::kw_volatile))
@@ -6834,6 +7068,14 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
   case lltok::kw_min: Operation = AtomicRMWInst::Min; break;
   case lltok::kw_umax: Operation = AtomicRMWInst::UMax; break;
   case lltok::kw_umin: Operation = AtomicRMWInst::UMin; break;
+  case lltok::kw_fadd:
+    Operation = AtomicRMWInst::FAdd;
+    IsFP = true;
+    break;
+  case lltok::kw_fsub:
+    Operation = AtomicRMWInst::FSub;
+    IsFP = true;
+    break;
   }
   Lex.Lex();  // Eat the operation.
 
@@ -6850,10 +7092,25 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
   if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
     return Error(ValLoc, "atomicrmw value and pointer type do not match");
 
-  if (!Val->getType()->isIntegerTy()) {
-    return Error(ValLoc, "atomicrmw " +
-                 AtomicRMWInst::getOperationName(Operation) +
-                 " operand must be an integer");
+  if (Operation == AtomicRMWInst::Xchg) {
+    if (!Val->getType()->isIntegerTy() &&
+        !Val->getType()->isFloatingPointTy()) {
+      return Error(ValLoc, "atomicrmw " +
+                   AtomicRMWInst::getOperationName(Operation) +
+                   " operand must be an integer or floating point type");
+    }
+  } else if (IsFP) {
+    if (!Val->getType()->isFloatingPointTy()) {
+      return Error(ValLoc, "atomicrmw " +
+                   AtomicRMWInst::getOperationName(Operation) +
+                   " operand must be a floating point type");
+    }
+  } else {
+    if (!Val->getType()->isIntegerTy()) {
+      return Error(ValLoc, "atomicrmw " +
+                   AtomicRMWInst::getOperationName(Operation) +
+                   " operand must be an integer");
+    }
   }
 
   unsigned Size = Val->getType()->getPrimitiveSizeInBits();
@@ -7249,6 +7506,92 @@ bool LLParser::ParseTypeIdSummary(TypeIdSummary &TIS) {
   return false;
 }
 
+static ValueInfo EmptyVI =
+    ValueInfo(false, (GlobalValueSummaryMapTy::value_type *)-8);
+
+/// TypeIdCompatibleVtableEntry
+///   ::= 'typeidCompatibleVTable' ':' '(' 'name' ':' STRINGCONSTANT ','
+///   TypeIdCompatibleVtableInfo
+///   ')'
+bool LLParser::ParseTypeIdCompatibleVtableEntry(unsigned ID) {
+  assert(Lex.getKind() == lltok::kw_typeidCompatibleVTable);
+  Lex.Lex();
+
+  std::string Name;
+  if (ParseToken(lltok::colon, "expected ':' here") ||
+      ParseToken(lltok::lparen, "expected '(' here") ||
+      ParseToken(lltok::kw_name, "expected 'name' here") ||
+      ParseToken(lltok::colon, "expected ':' here") ||
+      ParseStringConstant(Name))
+    return true;
+
+  TypeIdCompatibleVtableInfo &TI =
+      Index->getOrInsertTypeIdCompatibleVtableSummary(Name);
+  if (ParseToken(lltok::comma, "expected ',' here") ||
+      ParseToken(lltok::kw_summary, "expected 'summary' here") ||
+      ParseToken(lltok::colon, "expected ':' here") ||
+      ParseToken(lltok::lparen, "expected '(' here"))
+    return true;
+
+  IdToIndexMapType IdToIndexMap;
+  // Parse each call edge
+  do {
+    uint64_t Offset;
+    if (ParseToken(lltok::lparen, "expected '(' here") ||
+        ParseToken(lltok::kw_offset, "expected 'offset' here") ||
+        ParseToken(lltok::colon, "expected ':' here") || ParseUInt64(Offset) ||
+        ParseToken(lltok::comma, "expected ',' here"))
+      return true;
+
+    LocTy Loc = Lex.getLoc();
+    unsigned GVId;
+    ValueInfo VI;
+    if (ParseGVReference(VI, GVId))
+      return true;
+
+    // Keep track of the TypeIdCompatibleVtableInfo array index needing a
+    // forward reference. We will save the location of the ValueInfo needing an
+    // update, but can only do so once the std::vector is finalized.
+    if (VI == EmptyVI)
+      IdToIndexMap[GVId].push_back(std::make_pair(TI.size(), Loc));
+    TI.push_back({Offset, VI});
+
+    if (ParseToken(lltok::rparen, "expected ')' in call"))
+      return true;
+  } while (EatIfPresent(lltok::comma));
+
+  // Now that the TI vector is finalized, it is safe to save the locations
+  // of any forward GV references that need updating later.
+  for (auto I : IdToIndexMap) {
+    for (auto P : I.second) {
+      assert(TI[P.first].VTableVI == EmptyVI &&
+             "Forward referenced ValueInfo expected to be empty");
+      auto FwdRef = ForwardRefValueInfos.insert(std::make_pair(
+          I.first, std::vector<std::pair<ValueInfo *, LocTy>>()));
+      FwdRef.first->second.push_back(
+          std::make_pair(&TI[P.first].VTableVI, P.second));
+    }
+  }
+
+  if (ParseToken(lltok::rparen, "expected ')' here") ||
+      ParseToken(lltok::rparen, "expected ')' here"))
+    return true;
+
+  // Check if this ID was forward referenced, and if so, update the
+  // corresponding GUIDs.
+  auto FwdRefTIDs = ForwardRefTypeIds.find(ID);
+  if (FwdRefTIDs != ForwardRefTypeIds.end()) {
+    for (auto TIDRef : FwdRefTIDs->second) {
+      assert(!*TIDRef.first &&
+             "Forward referenced type id GUID expected to be 0");
+      *TIDRef.first = GlobalValue::getGUID(Name);
+    }
+    ForwardRefTypeIds.erase(FwdRefTIDs);
+  }
+
+  return false;
+}
+
 /// TypeTestResolution
 ///   ::= 'typeTestRes' ':' '(' 'kind' ':'
 ///         ( 'unsat' | 'byteArray' | 'inline' | 'single' | 'allOnes' ) ','
@@ -7523,9 +7866,13 @@ static const auto FwdVIRef = (GlobalValueSummaryMapTy::value_type *)-8;
 
 static void resolveFwdRef(ValueInfo *Fwd, ValueInfo &Resolved) {
   bool ReadOnly = Fwd->isReadOnly();
+  bool WriteOnly = Fwd->isWriteOnly();
+  assert(!(ReadOnly && WriteOnly));
   *Fwd = Resolved;
   if (ReadOnly)
     Fwd->setReadOnly();
+  if (WriteOnly)
+    Fwd->setWriteOnly();
 }
 
 /// Stores the given Name/GUID and associated summary into the Index.
@@ -7554,10 +7901,6 @@ void LLParser::AddGlobalValueToIndex(
     }
   }
 
-  // Add the summary if one was provided.
-  if (Summary)
-    Index->addGlobalValueSummary(VI, std::move(Summary));
-
   // Resolve forward references from calls/refs
   auto FwdRefVIs = ForwardRefValueInfos.find(ID);
   if (FwdRefVIs != ForwardRefValueInfos.end()) {
@@ -7575,11 +7918,16 @@ void LLParser::AddGlobalValueToIndex(
     for (auto AliaseeRef : FwdRefAliasees->second) {
       assert(!AliaseeRef.first->hasAliasee() &&
              "Forward referencing alias already has aliasee");
-      AliaseeRef.first->setAliasee(VI.getSummaryList().front().get());
+      assert(Summary && "Aliasee must be a definition");
+      AliaseeRef.first->setAliasee(VI, Summary.get());
     }
     ForwardRefAliasees.erase(FwdRefAliasees);
   }
 
+  // Add the summary if one was provided.
+  if (Summary)
+    Index->addGlobalValueSummary(VI, std::move(Summary));
+
   // Save the associated ValueInfo for use in later references by ID.
   if (ID == NumberedValueInfos.size())
     NumberedValueInfos.push_back(VI);
@@ -7683,7 +8031,7 @@ bool LLParser::ParseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
   StringRef ModulePath;
   GlobalValueSummary::GVFlags GVFlags = GlobalValueSummary::GVFlags(
       /*Linkage=*/GlobalValue::ExternalLinkage, /*NotEligibleToImport=*/false,
-      /*Live=*/false, /*IsLocal=*/false);
+      /*Live=*/false, /*IsLocal=*/false, /*CanAutoHide=*/false);
   unsigned InstCount;
   std::vector<FunctionSummary::EdgeTy> Calls;
   FunctionSummary::TypeIdInfo TypeIdInfo;
@@ -7753,9 +8101,11 @@ bool LLParser::ParseVariableSummary(std::string Name, GlobalValue::GUID GUID,
   StringRef ModulePath;
   GlobalValueSummary::GVFlags GVFlags = GlobalValueSummary::GVFlags(
       /*Linkage=*/GlobalValue::ExternalLinkage, /*NotEligibleToImport=*/false,
-      /*Live=*/false, /*IsLocal=*/false);
-  GlobalVarSummary::GVarFlags GVarFlags(/*ReadOnly*/ false);
+      /*Live=*/false, /*IsLocal=*/false, /*CanAutoHide=*/false);
+  GlobalVarSummary::GVarFlags GVarFlags(/*ReadOnly*/ false,
+                                        /* WriteOnly */ false);
   std::vector<ValueInfo> Refs;
+  VTableFuncList VTableFuncs;
   if (ParseToken(lltok::colon, "expected ':' here") ||
       ParseToken(lltok::lparen, "expected '(' here") ||
       ParseModuleReference(ModulePath) ||
@@ -7764,10 +8114,20 @@ bool LLParser::ParseVariableSummary(std::string Name, GlobalValue::GUID GUID,
       ParseGVarFlags(GVarFlags))
     return true;
 
-  // Parse optional refs field
-  if (EatIfPresent(lltok::comma)) {
-    if (ParseOptionalRefs(Refs))
-      return true;
+  // Parse optional fields
+  while (EatIfPresent(lltok::comma)) {
+    switch (Lex.getKind()) {
+    case lltok::kw_vTableFuncs:
+      if (ParseOptionalVTableFuncs(VTableFuncs))
+        return true;
+      break;
+    case lltok::kw_refs:
+      if (ParseOptionalRefs(Refs))
+        return true;
+      break;
+    default:
+      return Error(Lex.getLoc(), "expected optional variable summary field");
+    }
   }
 
   if (ParseToken(lltok::rparen, "expected ')' here"))
@@ -7777,6 +8137,7 @@ bool LLParser::ParseVariableSummary(std::string Name, GlobalValue::GUID GUID,
       llvm::make_unique<GlobalVarSummary>(GVFlags, GVarFlags, std::move(Refs));
 
   GS->setModulePath(ModulePath);
+  GS->setVTableFuncs(std::move(VTableFuncs));
 
   AddGlobalValueToIndex(Name, GUID, (GlobalValue::LinkageTypes)GVFlags.Linkage,
                         ID, std::move(GS));
@@ -7796,7 +8157,7 @@ bool LLParser::ParseAliasSummary(std::string Name, GlobalValue::GUID GUID,
   StringRef ModulePath;
   GlobalValueSummary::GVFlags GVFlags = GlobalValueSummary::GVFlags(
       /*Linkage=*/GlobalValue::ExternalLinkage, /*NotEligibleToImport=*/false,
-      /*Live=*/false, /*IsLocal=*/false);
+      /*Live=*/false, /*IsLocal=*/false, /*CanAutoHide=*/false);
   if (ParseToken(lltok::colon, "expected ':' here") ||
       ParseToken(lltok::lparen, "expected '(' here") ||
       ParseModuleReference(ModulePath) ||
@@ -7823,8 +8184,11 @@ bool LLParser::ParseAliasSummary(std::string Name, GlobalValue::GUID GUID,
     auto FwdRef = ForwardRefAliasees.insert(
         std::make_pair(GVId, std::vector<std::pair<AliasSummary *, LocTy>>()));
     FwdRef.first->second.push_back(std::make_pair(AS.get(), Loc));
-  } else
-    AS->setAliasee(AliaseeVI.getSummaryList().front().get());
+  } else {
+    auto Summary = Index->findSummaryInModule(AliaseeVI, ModulePath);
+    assert(Summary && "Aliasee must be a definition");
+    AS->setAliasee(AliaseeVI, Summary);
+  }
 
   AddGlobalValueToIndex(Name, GUID, (GlobalValue::LinkageTypes)GVFlags.Linkage,
                         ID, std::move(AS));
@@ -7856,7 +8220,7 @@ bool LLParser::ParseOptionalFFlags(FunctionSummary::FFlags &FFlags) {
     return true;
 
   do {
-    unsigned Val;
+    unsigned Val = 0;
     switch (Lex.getKind()) {
     case lltok::kw_readNone:
       Lex.Lex();
@@ -7994,6 +8358,67 @@ bool LLParser::ParseHotness(CalleeInfo::HotnessType &Hotness) {
   return false;
 }
 
+/// OptionalVTableFuncs
+///   := 'vTableFuncs' ':' '(' VTableFunc [',' VTableFunc]* ')'
+/// VTableFunc ::= '(' 'virtFunc' ':' GVReference ',' 'offset' ':' UInt64 ')'
+bool LLParser::ParseOptionalVTableFuncs(VTableFuncList &VTableFuncs) {
+  assert(Lex.getKind() == lltok::kw_vTableFuncs);
+  Lex.Lex();
+
+  if (ParseToken(lltok::colon, "expected ':' in vTableFuncs") |
+      ParseToken(lltok::lparen, "expected '(' in vTableFuncs"))
+    return true;
+
+  IdToIndexMapType IdToIndexMap;
+  // Parse each virtual function pair
+  do {
+    ValueInfo VI;
+    if (ParseToken(lltok::lparen, "expected '(' in vTableFunc") ||
+        ParseToken(lltok::kw_virtFunc, "expected 'callee' in vTableFunc") ||
+        ParseToken(lltok::colon, "expected ':'"))
+      return true;
+
+    LocTy Loc = Lex.getLoc();
+    unsigned GVId;
+    if (ParseGVReference(VI, GVId))
+      return true;
+
+    uint64_t Offset;
+    if (ParseToken(lltok::comma, "expected comma") ||
+        ParseToken(lltok::kw_offset, "expected offset") ||
+        ParseToken(lltok::colon, "expected ':'") || ParseUInt64(Offset))
+      return true;
+
+    // Keep track of the VTableFuncs array index needing a forward reference.
+    // We will save the location of the ValueInfo needing an update, but
+    // can only do so once the std::vector is finalized.
+    if (VI == EmptyVI)
+      IdToIndexMap[GVId].push_back(std::make_pair(VTableFuncs.size(), Loc));
+    VTableFuncs.push_back({VI, Offset});
+
+    if (ParseToken(lltok::rparen, "expected ')' in vTableFunc"))
+      return true;
+  } while (EatIfPresent(lltok::comma));
+
+  // Now that the VTableFuncs vector is finalized, it is safe to save the
+  // locations of any forward GV references that need updating later.
+  for (auto I : IdToIndexMap) {
+    for (auto P : I.second) {
+      assert(VTableFuncs[P.first].FuncVI == EmptyVI &&
+             "Forward referenced ValueInfo expected to be empty");
+      auto FwdRef = ForwardRefValueInfos.insert(std::make_pair(
+          I.first, std::vector<std::pair<ValueInfo *, LocTy>>()));
+      FwdRef.first->second.push_back(
+          std::make_pair(&VTableFuncs[P.first].FuncVI, P.second));
+    }
+  }
+
+  if (ParseToken(lltok::rparen, "expected ')' in vTableFuncs"))
+    return true;
+
+  return false;
+}
+
 /// OptionalRefs
 ///   := 'refs' ':' '(' GVReference [',' GVReference]* ')'
 bool LLParser::ParseOptionalRefs(std::vector<ValueInfo> &Refs) {
@@ -8019,10 +8444,11 @@ bool LLParser::ParseOptionalRefs(std::vector<ValueInfo> &Refs) {
     VContexts.push_back(VC);
   } while (EatIfPresent(lltok::comma));
 
-  // Sort value contexts so that ones with readonly ValueInfo are at the end
-  // of VContexts vector. This is needed to match immutableRefCount() behavior.
+  // Sort value contexts so that ones with writeonly
+  // and readonly ValueInfo  are at the end of VContexts vector.
+  // See FunctionSummary::specialRefCounts()
   llvm::sort(VContexts, [](const ValueContext &VC1, const ValueContext &VC2) {
-    return VC1.VI.isReadOnly() < VC2.VI.isReadOnly();
+    return VC1.VI.getAccessSpecifier() < VC2.VI.getAccessSpecifier();
   });
 
   IdToIndexMapType IdToIndexMap;
@@ -8283,41 +8709,55 @@ bool LLParser::ParseVFuncId(FunctionSummary::VFuncId &VFuncId,
 /// GVFlags
 ///   ::= 'flags' ':' '(' 'linkage' ':' OptionalLinkageAux ','
 ///         'notEligibleToImport' ':' Flag ',' 'live' ':' Flag ','
-///         'dsoLocal' ':' Flag ')'
+///         'dsoLocal' ':' Flag ',' 'canAutoHide' ':' Flag ')'
 bool LLParser::ParseGVFlags(GlobalValueSummary::GVFlags &GVFlags) {
   assert(Lex.getKind() == lltok::kw_flags);
   Lex.Lex();
 
-  bool HasLinkage;
   if (ParseToken(lltok::colon, "expected ':' here") ||
-      ParseToken(lltok::lparen, "expected '(' here") ||
-      ParseToken(lltok::kw_linkage, "expected 'linkage' here") ||
-      ParseToken(lltok::colon, "expected ':' here"))
-    return true;
-
-  GVFlags.Linkage = parseOptionalLinkageAux(Lex.getKind(), HasLinkage);
-  assert(HasLinkage && "Linkage not optional in summary entry");
-  Lex.Lex();
-
-  unsigned Flag;
-  if (ParseToken(lltok::comma, "expected ',' here") ||
-      ParseToken(lltok::kw_notEligibleToImport,
-                 "expected 'notEligibleToImport' here") ||
-      ParseToken(lltok::colon, "expected ':' here") || ParseFlag(Flag))
-    return true;
-  GVFlags.NotEligibleToImport = Flag;
-
-  if (ParseToken(lltok::comma, "expected ',' here") ||
-      ParseToken(lltok::kw_live, "expected 'live' here") ||
-      ParseToken(lltok::colon, "expected ':' here") || ParseFlag(Flag))
+      ParseToken(lltok::lparen, "expected '(' here"))
     return true;
-  GVFlags.Live = Flag;
 
-  if (ParseToken(lltok::comma, "expected ',' here") ||
-      ParseToken(lltok::kw_dsoLocal, "expected 'dsoLocal' here") ||
-      ParseToken(lltok::colon, "expected ':' here") || ParseFlag(Flag))
-    return true;
-  GVFlags.DSOLocal = Flag;
+  do {
+    unsigned Flag = 0;
+    switch (Lex.getKind()) {
+    case lltok::kw_linkage:
+      Lex.Lex();
+      if (ParseToken(lltok::colon, "expected ':'"))
+        return true;
+      bool HasLinkage;
+      GVFlags.Linkage = parseOptionalLinkageAux(Lex.getKind(), HasLinkage);
+      assert(HasLinkage && "Linkage not optional in summary entry");
+      Lex.Lex();
+      break;
+    case lltok::kw_notEligibleToImport:
+      Lex.Lex();
+      if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Flag))
+        return true;
+      GVFlags.NotEligibleToImport = Flag;
+      break;
+    case lltok::kw_live:
+      Lex.Lex();
+      if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Flag))
+        return true;
+      GVFlags.Live = Flag;
+      break;
+    case lltok::kw_dsoLocal:
+      Lex.Lex();
+      if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Flag))
+        return true;
+      GVFlags.DSOLocal = Flag;
+      break;
+    case lltok::kw_canAutoHide:
+      Lex.Lex();
+      if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Flag))
+        return true;
+      GVFlags.CanAutoHide = Flag;
+      break;
+    default:
+      return Error(Lex.getLoc(), "expected gv flag type");
+    }
+  } while (EatIfPresent(lltok::comma));
 
   if (ParseToken(lltok::rparen, "expected ')' here"))
     return true;
@@ -8326,24 +8766,41 @@ bool LLParser::ParseGVFlags(GlobalValueSummary::GVFlags &GVFlags) {
 }
 
 /// GVarFlags
-///   ::= 'varFlags' ':' '(' 'readonly' ':' Flag ')'
+///   ::= 'varFlags' ':' '(' 'readonly' ':' Flag
+///                      ',' 'writeonly' ':' Flag ')'
 bool LLParser::ParseGVarFlags(GlobalVarSummary::GVarFlags &GVarFlags) {
   assert(Lex.getKind() == lltok::kw_varFlags);
   Lex.Lex();
 
-  unsigned Flag;
   if (ParseToken(lltok::colon, "expected ':' here") ||
-      ParseToken(lltok::lparen, "expected '(' here") ||
-      ParseToken(lltok::kw_readonly, "expected 'readonly' here") ||
-      ParseToken(lltok::colon, "expected ':' here"))
+      ParseToken(lltok::lparen, "expected '(' here"))
     return true;
 
-  ParseFlag(Flag);
-  GVarFlags.ReadOnly = Flag;
+  auto ParseRest = [this](unsigned int &Val) {
+    Lex.Lex();
+    if (ParseToken(lltok::colon, "expected ':'"))
+      return true;
+    return ParseFlag(Val);
+  };
 
-  if (ParseToken(lltok::rparen, "expected ')' here"))
-    return true;
-  return false;
+  do {
+    unsigned Flag = 0;
+    switch (Lex.getKind()) {
+    case lltok::kw_readonly:
+      if (ParseRest(Flag))
+        return true;
+      GVarFlags.MaybeReadOnly = Flag;
+      break;
+    case lltok::kw_writeonly:
+      if (ParseRest(Flag))
+        return true;
+      GVarFlags.MaybeWriteOnly = Flag;
+      break;
+    default:
+      return Error(Lex.getLoc(), "expected gvar flag type");
+    }
+  } while (EatIfPresent(lltok::comma));
+  return ParseToken(lltok::rparen, "expected ')' here");
 }
 
 /// ModuleReference
@@ -8366,7 +8823,9 @@ bool LLParser::ParseModuleReference(StringRef &ModulePath) {
 /// GVReference
 ///   ::= SummaryID
 bool LLParser::ParseGVReference(ValueInfo &VI, unsigned &GVId) {
-  bool ReadOnly = EatIfPresent(lltok::kw_readonly);
+  bool WriteOnly = false, ReadOnly = EatIfPresent(lltok::kw_readonly);
+  if (!ReadOnly)
+    WriteOnly = EatIfPresent(lltok::kw_writeonly);
   if (ParseToken(lltok::SummaryID, "expected GV ID"))
     return true;
 
@@ -8381,5 +8840,7 @@ bool LLParser::ParseGVReference(ValueInfo &VI, unsigned &GVId) {
 
   if (ReadOnly)
     VI.setReadOnly();
+  if (WriteOnly)
+    VI.setWriteOnly();
   return false;
 }
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 5a0fc297265d..610e2e262008 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -1,9 +1,8 @@
 //===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -340,6 +339,7 @@ namespace llvm {
     bool ParseFnAttributeValuePairs(AttrBuilder &B,
                                     std::vector<unsigned> &FwdRefAttrGrps,
                                     bool inAttrGrp, LocTy &BuiltinLoc);
+    bool ParseByValWithOptionalType(Type *&Result);
 
     // Module Summary Index Parsing.
     bool SkipModuleSummaryEntry();
@@ -369,9 +369,11 @@ namespace llvm {
                          IdToIndexMapType &IdToIndexMap, unsigned Index);
     bool ParseVFuncId(FunctionSummary::VFuncId &VFuncId,
                       IdToIndexMapType &IdToIndexMap, unsigned Index);
+    bool ParseOptionalVTableFuncs(VTableFuncList &VTableFuncs);
     bool ParseOptionalRefs(std::vector<ValueInfo> &Refs);
     bool ParseTypeIdEntry(unsigned ID);
     bool ParseTypeIdSummary(TypeIdSummary &TIS);
+    bool ParseTypeIdCompatibleVtableEntry(unsigned ID);
     bool ParseTypeTestResolution(TypeTestResolution &TTRes);
     bool ParseOptionalWpdResolutions(
         std::map<uint64_t, WholeProgramDevirtResolution> &WPDResMap);
@@ -446,7 +448,7 @@ namespace llvm {
       /// DefineBB - Define the specified basic block, which is either named or
       /// unnamed.  If there is an error, this returns null otherwise it returns
       /// the block being defined.
-      BasicBlock *DefineBB(const std::string &Name, LocTy Loc);
+      BasicBlock *DefineBB(const std::string &Name, int NameID, LocTy Loc);
 
       bool resolveForwardRefBlockAddresses();
     };
@@ -571,11 +573,12 @@ namespace llvm {
     bool ParseCatchSwitch(Instruction *&Inst, PerFunctionState &PFS);
     bool ParseCatchPad(Instruction *&Inst, PerFunctionState &PFS);
     bool ParseCleanupPad(Instruction *&Inst, PerFunctionState &PFS);
+    bool ParseCallBr(Instruction *&Inst, PerFunctionState &PFS);
 
     bool ParseUnaryOp(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc,
-                      unsigned OperandType);
+                      bool IsFP);
     bool ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc,
-                         unsigned OperandType);
+                         bool IsFP);
     bool ParseLogical(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc);
     bool ParseCompare(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc);
     bool ParseCast(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc);
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index c2e2795a9467..0e9ba4db4742 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -1,9 +1,8 @@
 //===- LLToken.h - Token Codes for LLVM Assembly Files ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -38,6 +37,7 @@ enum Kind {
   bar,     // |
   colon,   // :
 
+  kw_vscale,
   kw_x,
   kw_true,
   kw_false,
@@ -114,6 +114,7 @@ enum Kind {
   kw_align,
   kw_addrspace,
   kw_section,
+  kw_partition,
   kw_alias,
   kw_ifunc,
   kw_module,
@@ -175,6 +176,7 @@ enum Kind {
   kw_argmemonly,
   kw_sanitize_address,
   kw_sanitize_hwaddress,
+  kw_sanitize_memtag,
   kw_builtin,
   kw_byval,
   kw_inalloca,
@@ -194,6 +196,7 @@ enum Kind {
   kw_nobuiltin,
   kw_nocapture,
   kw_noduplicate,
+  kw_nofree,
   kw_noimplicitfloat,
   kw_noinline,
   kw_norecurse,
@@ -201,6 +204,7 @@ enum Kind {
   kw_nonnull,
   kw_noredzone,
   kw_noreturn,
+  kw_nosync,
   kw_nocf_check,
   kw_nounwind,
   kw_optforfuzzing,
@@ -225,8 +229,10 @@ enum Kind {
   kw_swifterror,
   kw_swiftself,
   kw_uwtable,
+  kw_willreturn,
   kw_writeonly,
   kw_zeroext,
+  kw_immarg,
 
   kw_type,
   kw_opaque,
@@ -328,6 +334,7 @@ enum Kind {
   kw_catchret,
   kw_catchpad,
   kw_cleanuppad,
+  kw_callbr,
 
   kw_alloca,
   kw_load,
@@ -363,6 +370,7 @@ enum Kind {
   kw_notEligibleToImport,
   kw_live,
   kw_dsoLocal,
+  kw_canAutoHide,
   kw_function,
   kw_insts,
   kw_funcFlags,
@@ -379,6 +387,8 @@ enum Kind {
   kw_critical,
   kw_relbf,
   kw_variable,
+  kw_vTableFuncs,
+  kw_virtFunc,
   kw_aliasee,
   kw_refs,
   kw_typeIdInfo,
@@ -391,6 +401,7 @@ enum Kind {
   kw_offset,
   kw_args,
   kw_typeid,
+  kw_typeidCompatibleVTable,
   kw_summary,
   kw_typeTestRes,
   kw_kind,
@@ -421,6 +432,7 @@ enum Kind {
   kw_varFlags,
 
   // Unsigned Valued tokens (UIntVal).
+  LabelID,    // 42:
   GlobalID,   // @42
   LocalVarID, // %42
   AttrGrpID,  // #42
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index 1205dff24e8a..b13c6237f411 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -1,9 +1,8 @@
 //===- Parser.cpp - Main dispatch module for the Parser library -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
index b789f646b5f6..3f36dff9f55c 100644
--- a/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
+++ b/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPUMetadataVerifier.cpp - MsgPack Types ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -21,98 +20,92 @@ namespace HSAMD {
 namespace V3 {
 
 bool MetadataVerifier::verifyScalar(
-    msgpack::Node &Node, msgpack::ScalarNode::ScalarKind SKind,
-    function_ref<bool(msgpack::ScalarNode &)> verifyValue) {
-  auto ScalarPtr = dyn_cast<msgpack::ScalarNode>(&Node);
-  if (!ScalarPtr)
-    return false;
-  auto &Scalar = *ScalarPtr;
-  // Do not output extraneous tags for types we know from the spec.
-  Scalar.IgnoreTag = true;
-  if (Scalar.getScalarKind() != SKind) {
+    msgpack::DocNode &Node, msgpack::Type SKind,
+    function_ref<bool(msgpack::DocNode &)> verifyValue) {
+  if (!Node.isScalar())
+    return false;
+  if (Node.getKind() != SKind) {
     if (Strict)
       return false;
     // If we are not strict, we interpret string values as "implicitly typed"
     // and attempt to coerce them to the expected type here.
-    if (Scalar.getScalarKind() != msgpack::ScalarNode::SK_String)
+    if (Node.getKind() != msgpack::Type::String)
       return false;
-    std::string StringValue = Scalar.getString();
-    Scalar.setScalarKind(SKind);
-    if (Scalar.inputYAML(StringValue) != StringRef())
+    StringRef StringValue = Node.getString();
+    Node.fromString(StringValue);
+    if (Node.getKind() != SKind)
       return false;
   }
   if (verifyValue)
-    return verifyValue(Scalar);
+    return verifyValue(Node);
   return true;
 }
 
-bool MetadataVerifier::verifyInteger(msgpack::Node &Node) {
-  if (!verifyScalar(Node, msgpack::ScalarNode::SK_UInt))
-    if (!verifyScalar(Node, msgpack::ScalarNode::SK_Int))
+bool MetadataVerifier::verifyInteger(msgpack::DocNode &Node) {
+  if (!verifyScalar(Node, msgpack::Type::UInt))
+    if (!verifyScalar(Node, msgpack::Type::Int))
       return false;
   return true;
 }
 
 bool MetadataVerifier::verifyArray(
-    msgpack::Node &Node, function_ref<bool(msgpack::Node &)> verifyNode,
+    msgpack::DocNode &Node, function_ref<bool(msgpack::DocNode &)> verifyNode,
     Optional<size_t> Size) {
-  auto ArrayPtr = dyn_cast<msgpack::ArrayNode>(&Node);
-  if (!ArrayPtr)
+  if (!Node.isArray())
     return false;
-  auto &Array = *ArrayPtr;
+  auto &Array = Node.getArray();
   if (Size && Array.size() != *Size)
     return false;
   for (auto &Item : Array)
-    if (!verifyNode(*Item.get()))
+    if (!verifyNode(Item))
       return false;
 
   return true;
 }
 
 bool MetadataVerifier::verifyEntry(
-    msgpack::MapNode &MapNode, StringRef Key, bool Required,
-    function_ref<bool(msgpack::Node &)> verifyNode) {
+    msgpack::MapDocNode &MapNode, StringRef Key, bool Required,
+    function_ref<bool(msgpack::DocNode &)> verifyNode) {
   auto Entry = MapNode.find(Key);
   if (Entry == MapNode.end())
     return !Required;
-  return verifyNode(*Entry->second.get());
+  return verifyNode(Entry->second);
 }
 
 bool MetadataVerifier::verifyScalarEntry(
-    msgpack::MapNode &MapNode, StringRef Key, bool Required,
-    msgpack::ScalarNode::ScalarKind SKind,
-    function_ref<bool(msgpack::ScalarNode &)> verifyValue) {
-  return verifyEntry(MapNode, Key, Required, [=](msgpack::Node &Node) {
+    msgpack::MapDocNode &MapNode, StringRef Key, bool Required,
+    msgpack::Type SKind,
+    function_ref<bool(msgpack::DocNode &)> verifyValue) {
+  return verifyEntry(MapNode, Key, Required, [=](msgpack::DocNode &Node) {
     return verifyScalar(Node, SKind, verifyValue);
   });
 }
 
-bool MetadataVerifier::verifyIntegerEntry(msgpack::MapNode &MapNode,
+bool MetadataVerifier::verifyIntegerEntry(msgpack::MapDocNode &MapNode,
                                           StringRef Key, bool Required) {
-  return verifyEntry(MapNode, Key, Required, [this](msgpack::Node &Node) {
+  return verifyEntry(MapNode, Key, Required, [this](msgpack::DocNode &Node) {
     return verifyInteger(Node);
   });
 }
 
-bool MetadataVerifier::verifyKernelArgs(msgpack::Node &Node) {
-  auto ArgsMapPtr = dyn_cast<msgpack::MapNode>(&Node);
-  if (!ArgsMapPtr)
+bool MetadataVerifier::verifyKernelArgs(msgpack::DocNode &Node) {
+  if (!Node.isMap())
     return false;
-  auto &ArgsMap = *ArgsMapPtr;
+  auto &ArgsMap = Node.getMap();
 
   if (!verifyScalarEntry(ArgsMap, ".name", false,
-                         msgpack::ScalarNode::SK_String))
+                         msgpack::Type::String))
     return false;
   if (!verifyScalarEntry(ArgsMap, ".type_name", false,
-                         msgpack::ScalarNode::SK_String))
+                         msgpack::Type::String))
     return false;
   if (!verifyIntegerEntry(ArgsMap, ".size", true))
     return false;
   if (!verifyIntegerEntry(ArgsMap, ".offset", true))
     return false;
   if (!verifyScalarEntry(ArgsMap, ".value_kind", true,
-                         msgpack::ScalarNode::SK_String,
-                         [](msgpack::ScalarNode &SNode) {
+                         msgpack::Type::String,
+                         [](msgpack::DocNode &SNode) {
                            return StringSwitch<bool>(SNode.getString())
                                .Case("by_value", true)
                                .Case("global_buffer", true)
@@ -128,12 +121,13 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::Node &Node) {
                                .Case("hidden_printf_buffer", true)
                                .Case("hidden_default_queue", true)
                                .Case("hidden_completion_action", true)
+                               .Case("hidden_multigrid_sync_arg", true)
                                .Default(false);
                          }))
     return false;
   if (!verifyScalarEntry(ArgsMap, ".value_type", true,
-                         msgpack::ScalarNode::SK_String,
-                         [](msgpack::ScalarNode &SNode) {
+                         msgpack::Type::String,
+                         [](msgpack::DocNode &SNode) {
                            return StringSwitch<bool>(SNode.getString())
                                .Case("struct", true)
                                .Case("i8", true)
@@ -153,8 +147,8 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::Node &Node) {
   if (!verifyIntegerEntry(ArgsMap, ".pointee_align", false))
     return false;
   if (!verifyScalarEntry(ArgsMap, ".address_space", false,
-                         msgpack::ScalarNode::SK_String,
-                         [](msgpack::ScalarNode &SNode) {
+                         msgpack::Type::String,
+                         [](msgpack::DocNode &SNode) {
                            return StringSwitch<bool>(SNode.getString())
                                .Case("private", true)
                                .Case("global", true)
@@ -166,8 +160,8 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::Node &Node) {
                          }))
     return false;
   if (!verifyScalarEntry(ArgsMap, ".access", false,
-                         msgpack::ScalarNode::SK_String,
-                         [](msgpack::ScalarNode &SNode) {
+                         msgpack::Type::String,
+                         [](msgpack::DocNode &SNode) {
                            return StringSwitch<bool>(SNode.getString())
                                .Case("read_only", true)
                                .Case("write_only", true)
@@ -176,8 +170,8 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::Node &Node) {
                          }))
     return false;
   if (!verifyScalarEntry(ArgsMap, ".actual_access", false,
-                         msgpack::ScalarNode::SK_String,
-                         [](msgpack::ScalarNode &SNode) {
+                         msgpack::Type::String,
+                         [](msgpack::DocNode &SNode) {
                            return StringSwitch<bool>(SNode.getString())
                                .Case("read_only", true)
                                .Case("write_only", true)
@@ -186,36 +180,35 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::Node &Node) {
                          }))
     return false;
   if (!verifyScalarEntry(ArgsMap, ".is_const", false,
-                         msgpack::ScalarNode::SK_Boolean))
+                         msgpack::Type::Boolean))
     return false;
   if (!verifyScalarEntry(ArgsMap, ".is_restrict", false,
-                         msgpack::ScalarNode::SK_Boolean))
+                         msgpack::Type::Boolean))
     return false;
   if (!verifyScalarEntry(ArgsMap, ".is_volatile", false,
-                         msgpack::ScalarNode::SK_Boolean))
+                         msgpack::Type::Boolean))
     return false;
   if (!verifyScalarEntry(ArgsMap, ".is_pipe", false,
-                         msgpack::ScalarNode::SK_Boolean))
+                         msgpack::Type::Boolean))
     return false;
 
   return true;
 }
 
-bool MetadataVerifier::verifyKernel(msgpack::Node &Node) {
-  auto KernelMapPtr = dyn_cast<msgpack::MapNode>(&Node);
-  if (!KernelMapPtr)
+bool MetadataVerifier::verifyKernel(msgpack::DocNode &Node) {
+  if (!Node.isMap())
     return false;
-  auto &KernelMap = *KernelMapPtr;
+  auto &KernelMap = Node.getMap();
 
   if (!verifyScalarEntry(KernelMap, ".name", true,
-                         msgpack::ScalarNode::SK_String))
+                         msgpack::Type::String))
     return false;
   if (!verifyScalarEntry(KernelMap, ".symbol", true,
-                         msgpack::ScalarNode::SK_String))
+                         msgpack::Type::String))
     return false;
   if (!verifyScalarEntry(KernelMap, ".language", false,
-                         msgpack::ScalarNode::SK_String,
-                         [](msgpack::ScalarNode &SNode) {
+                         msgpack::Type::String,
+                         [](msgpack::DocNode &SNode) {
                            return StringSwitch<bool>(SNode.getString())
                                .Case("OpenCL C", true)
                                .Case("OpenCL C++", true)
@@ -227,41 +220,41 @@ bool MetadataVerifier::verifyKernel(msgpack::Node &Node) {
                          }))
     return false;
   if (!verifyEntry(
-          KernelMap, ".language_version", false, [this](msgpack::Node &Node) {
+          KernelMap, ".language_version", false, [this](msgpack::DocNode &Node) {
             return verifyArray(
                 Node,
-                [this](msgpack::Node &Node) { return verifyInteger(Node); }, 2);
+                [this](msgpack::DocNode &Node) { return verifyInteger(Node); }, 2);
           }))
     return false;
-  if (!verifyEntry(KernelMap, ".args", false, [this](msgpack::Node &Node) {
-        return verifyArray(Node, [this](msgpack::Node &Node) {
+  if (!verifyEntry(KernelMap, ".args", false, [this](msgpack::DocNode &Node) {
+        return verifyArray(Node, [this](msgpack::DocNode &Node) {
           return verifyKernelArgs(Node);
         });
       }))
     return false;
   if (!verifyEntry(KernelMap, ".reqd_workgroup_size", false,
-                   [this](msgpack::Node &Node) {
+                   [this](msgpack::DocNode &Node) {
                      return verifyArray(Node,
-                                        [this](msgpack::Node &Node) {
+                                        [this](msgpack::DocNode &Node) {
                                           return verifyInteger(Node);
                                         },
                                         3);
                    }))
     return false;
   if (!verifyEntry(KernelMap, ".workgroup_size_hint", false,
-                   [this](msgpack::Node &Node) {
+                   [this](msgpack::DocNode &Node) {
                      return verifyArray(Node,
-                                        [this](msgpack::Node &Node) {
+                                        [this](msgpack::DocNode &Node) {
                                           return verifyInteger(Node);
                                         },
                                         3);
                    }))
     return false;
   if (!verifyScalarEntry(KernelMap, ".vec_type_hint", false,
-                         msgpack::ScalarNode::SK_String))
+                         msgpack::Type::String))
     return false;
   if (!verifyScalarEntry(KernelMap, ".device_enqueue_symbol", false,
-                         msgpack::ScalarNode::SK_String))
+                         msgpack::Type::String))
     return false;
   if (!verifyIntegerEntry(KernelMap, ".kernarg_segment_size", true))
     return false;
@@ -287,29 +280,28 @@ bool MetadataVerifier::verifyKernel(msgpack::Node &Node) {
   return true;
 }
 
-bool MetadataVerifier::verify(msgpack::Node &HSAMetadataRoot) {
-  auto RootMapPtr = dyn_cast<msgpack::MapNode>(&HSAMetadataRoot);
-  if (!RootMapPtr)
+bool MetadataVerifier::verify(msgpack::DocNode &HSAMetadataRoot) {
+  if (!HSAMetadataRoot.isMap())
     return false;
-  auto &RootMap = *RootMapPtr;
+  auto &RootMap = HSAMetadataRoot.getMap();
 
   if (!verifyEntry(
-          RootMap, "amdhsa.version", true, [this](msgpack::Node &Node) {
+          RootMap, "amdhsa.version", true, [this](msgpack::DocNode &Node) {
             return verifyArray(
                 Node,
-                [this](msgpack::Node &Node) { return verifyInteger(Node); }, 2);
+                [this](msgpack::DocNode &Node) { return verifyInteger(Node); }, 2);
           }))
     return false;
   if (!verifyEntry(
-          RootMap, "amdhsa.printf", false, [this](msgpack::Node &Node) {
-            return verifyArray(Node, [this](msgpack::Node &Node) {
-              return verifyScalar(Node, msgpack::ScalarNode::SK_String);
+          RootMap, "amdhsa.printf", false, [this](msgpack::DocNode &Node) {
+            return verifyArray(Node, [this](msgpack::DocNode &Node) {
+              return verifyScalar(Node, msgpack::Type::String);
             });
           }))
     return false;
   if (!verifyEntry(RootMap, "amdhsa.kernels", true,
-                   [this](msgpack::Node &Node) {
-                     return verifyArray(Node, [this](msgpack::Node &Node) {
+                   [this](msgpack::DocNode &Node) {
+                     return verifyArray(Node, [this](msgpack::DocNode &Node) {
                        return verifyKernel(Node);
                      });
                    }))
diff --git a/lib/BinaryFormat/Dwarf.cpp b/lib/BinaryFormat/Dwarf.cpp
index 46f8056774b7..eb6bd33ce583 100644
--- a/lib/BinaryFormat/Dwarf.cpp
+++ b/lib/BinaryFormat/Dwarf.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/BinaryFormat/Dwarf.cpp - Dwarf Framework ------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -144,8 +143,12 @@ StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) {
   case DW_OP_##NAME:                                                           \
     return "DW_OP_" #NAME;
 #include "llvm/BinaryFormat/Dwarf.def"
+  case DW_OP_LLVM_convert:
+    return "DW_OP_LLVM_convert";
   case DW_OP_LLVM_fragment:
     return "DW_OP_LLVM_fragment";
+  case DW_OP_LLVM_tag_offset:
+    return "DW_OP_LLVM_tag_offset";
   }
 }
 
@@ -154,7 +157,9 @@ unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) {
 #define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR)                                \
   .Case("DW_OP_" #NAME, DW_OP_##NAME)
 #include "llvm/BinaryFormat/Dwarf.def"
+      .Case("DW_OP_LLVM_convert", DW_OP_LLVM_convert)
       .Case("DW_OP_LLVM_fragment", DW_OP_LLVM_fragment)
+      .Case("DW_OP_LLVM_tag_offset", DW_OP_LLVM_tag_offset)
       .Default(0);
 }
 
diff --git a/lib/BinaryFormat/Magic.cpp b/lib/BinaryFormat/Magic.cpp
index 78efa6ec87be..7dfe23690a50 100644
--- a/lib/BinaryFormat/Magic.cpp
+++ b/lib/BinaryFormat/Magic.cpp
@@ -1,9 +1,8 @@
 //===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -62,6 +61,15 @@ file_magic llvm::identify_magic(StringRef Magic) {
       return file_magic::wasm_object;
     break;
   }
+
+  case 0x01:
+    // XCOFF format
+    if (startswith(Magic, "\x01\xDF"))
+      return file_magic::xcoff_object_32;
+    if (startswith(Magic, "\x01\xF7"))
+      return file_magic::xcoff_object_64;
+    break;
+
   case 0xDE: // 0x0B17C0DE = BC wraper
     if (startswith(Magic, "\xDE\xC0\x17\x0B"))
       return file_magic::bitcode;
@@ -182,7 +190,8 @@ file_magic llvm::identify_magic(StringRef Magic) {
       return file_magic::coff_object;
     break;
 
-  case 'M': // Possible MS-DOS stub on Windows PE file or MSF/PDB file.
+  case 'M': // Possible MS-DOS stub on Windows PE file, MSF/PDB file or a
+            // Minidump file.
     if (startswith(Magic, "MZ") && Magic.size() >= 0x3c + 4) {
       uint32_t off = read32le(Magic.data() + 0x3c);
       // PE/COFF file, either EXE or DLL.
@@ -192,6 +201,8 @@ file_magic llvm::identify_magic(StringRef Magic) {
     }
     if (Magic.startswith("Microsoft C/C++ MSF 7.00\r\n"))
       return file_magic::pdb;
+    if (startswith(Magic, "MDMP"))
+      return file_magic::minidump;
     break;
 
   case 0x64: // x86-64 or ARM64 Windows.
diff --git a/lib/BinaryFormat/Minidump.cpp b/lib/BinaryFormat/Minidump.cpp
new file mode 100644
index 000000000000..b618fb157012
--- /dev/null
+++ b/lib/BinaryFormat/Minidump.cpp
@@ -0,0 +1,14 @@
+//===-- Minidump.cpp - Minidump constants and structures ---------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/Minidump.h"
+
+using namespace llvm::minidump;
+
+constexpr uint32_t Header::MagicSignature;
+constexpr uint16_t Header::MagicVersion;
diff --git a/lib/BinaryFormat/MsgPackDocument.cpp b/lib/BinaryFormat/MsgPackDocument.cpp
new file mode 100644
index 000000000000..e12c54a37ad0
--- /dev/null
+++ b/lib/BinaryFormat/MsgPackDocument.cpp
@@ -0,0 +1,245 @@
+//===-- MsgPackDocument.cpp - MsgPack Document --------------------------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file implements a class that exposes a simple in-memory representation
+/// of a document of MsgPack objects, that can be read from MsgPack, written to
+/// MsgPack, and inspected and modified in memory. This is intended to be a
+/// lighter-weight (in terms of memory allocations) replacement for
+/// MsgPackTypes.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/MsgPackDocument.h"
+#include "llvm/BinaryFormat/MsgPackWriter.h"
+
+using namespace llvm;
+using namespace msgpack;
+
+// Convert this DocNode into an empty array.
+void DocNode::convertToArray() { *this = getDocument()->getArrayNode(); }
+
+// Convert this DocNode into an empty map.
+void DocNode::convertToMap() { *this = getDocument()->getMapNode(); }
+
+/// Find the key in the MapDocNode.
+DocNode::MapTy::iterator MapDocNode::find(StringRef S) {
+  return find(getDocument()->getNode(S));
+}
+
+/// Member access for MapDocNode. The string data must remain valid for the
+/// lifetime of the Document.
+DocNode &MapDocNode::operator[](StringRef S) {
+  return (*this)[getDocument()->getNode(S)];
+}
+
+/// Member access for MapDocNode.
+DocNode &MapDocNode::operator[](DocNode Key) {
+  assert(!Key.isEmpty());
+  MapTy::value_type Entry(Key, DocNode());
+  auto ItAndInserted = Map->insert(Entry);
+  if (ItAndInserted.second) {
+    // Ensure a new element has its KindAndDoc initialized.
+    ItAndInserted.first->second = getDocument()->getNode();
+  }
+  return ItAndInserted.first->second;
+}
+
+/// Array element access. This extends the array if necessary.
+DocNode &ArrayDocNode::operator[](size_t Index) {
+  if (size() <= Index) {
+    // Ensure new elements have their KindAndDoc initialized.
+    Array->resize(Index + 1, getDocument()->getNode());
+  }
+  return (*Array)[Index];
+}
+
+// A level in the document reading stack.
+struct StackLevel {
+  DocNode Node;
+  size_t Length;
+  // Points to map entry when we have just processed a map key.
+  DocNode *MapEntry;
+};
+
+// Read a document from a binary msgpack blob.
+// The blob data must remain valid for the lifetime of this Document (because a
+// string object in the document contains a StringRef into the original blob).
+// If Multi, then this sets root to an array and adds top-level objects to it.
+// If !Multi, then it only reads a single top-level object, even if there are
+// more, and sets root to that.
+// Returns false if failed due to illegal format.
+bool Document::readFromBlob(StringRef Blob, bool Multi) {
+  msgpack::Reader MPReader(Blob);
+  SmallVector<StackLevel, 4> Stack;
+  if (Multi) {
+    // Create the array for multiple top-level objects.
+    Root = getArrayNode();
+    Stack.push_back(StackLevel({Root, (size_t)-1, nullptr}));
+  }
+  do {
+    // On to next element (or key if doing a map key next).
+    // Read the value.
+    Object Obj;
+    if (!MPReader.read(Obj)) {
+      if (Multi && Stack.size() == 1) {
+        // OK to finish here as we've just done a top-level element with Multi
+        break;
+      }
+      return false; // Finished too early
+    }
+    // Convert it into a DocNode.
+    DocNode Node;
+    switch (Obj.Kind) {
+    case Type::Nil:
+      Node = getNode();
+      break;
+    case Type::Int:
+      Node = getNode(Obj.Int);
+      break;
+    case Type::UInt:
+      Node = getNode(Obj.UInt);
+      break;
+    case Type::Boolean:
+      Node = getNode(Obj.Bool);
+      break;
+    case Type::Float:
+      Node = getNode(Obj.Float);
+      break;
+    case Type::String:
+      Node = getNode(Obj.Raw);
+      break;
+    case Type::Map:
+      Node = getMapNode();
+      break;
+    case Type::Array:
+      Node = getArrayNode();
+      break;
+    default:
+      return false; // Raw and Extension not supported
+    }
+
+    // Store it.
+    if (Stack.empty())
+      Root = Node;
+    else if (Stack.back().Node.getKind() == Type::Array) {
+      // Reading an array entry.
+      auto &Array = Stack.back().Node.getArray();
+      Array.push_back(Node);
+    } else {
+      auto &Map = Stack.back().Node.getMap();
+      if (!Stack.back().MapEntry) {
+        // Reading a map key.
+        Stack.back().MapEntry = &Map[Node];
+      } else {
+        // Reading the value for the map key read in the last iteration.
+        *Stack.back().MapEntry = Node;
+        Stack.back().MapEntry = nullptr;
+      }
+    }
+
+    // See if we're starting a new array or map.
+    switch (Node.getKind()) {
+    case msgpack::Type::Array:
+    case msgpack::Type::Map:
+      Stack.push_back(StackLevel({Node, Obj.Length, nullptr}));
+      break;
+    default:
+      break;
+    }
+
+    // Pop finished stack levels.
+    while (!Stack.empty()) {
+      if (Stack.back().Node.getKind() == msgpack::Type::Array) {
+        if (Stack.back().Node.getArray().size() != Stack.back().Length)
+          break;
+      } else {
+        if (Stack.back().MapEntry ||
+            Stack.back().Node.getMap().size() != Stack.back().Length)
+          break;
+      }
+      Stack.pop_back();
+    }
+  } while (!Stack.empty());
+  return true;
+}
+
+struct WriterStackLevel {
+  DocNode Node;
+  DocNode::MapTy::iterator MapIt;
+  DocNode::ArrayTy::iterator ArrayIt;
+  bool OnKey;
+};
+
+/// Write a MsgPack document to a binary MsgPack blob.
+void Document::writeToBlob(std::string &Blob) {
+  Blob.clear();
+  raw_string_ostream OS(Blob);
+  msgpack::Writer MPWriter(OS);
+  SmallVector<WriterStackLevel, 4> Stack;
+  DocNode Node = getRoot();
+  for (;;) {
+    switch (Node.getKind()) {
+    case Type::Array:
+      MPWriter.writeArraySize(Node.getArray().size());
+      Stack.push_back(
+          {Node, DocNode::MapTy::iterator(), Node.getArray().begin(), false});
+      break;
+    case Type::Map:
+      MPWriter.writeMapSize(Node.getMap().size());
+      Stack.push_back(
+          {Node, Node.getMap().begin(), DocNode::ArrayTy::iterator(), true});
+      break;
+    case Type::Nil:
+      MPWriter.writeNil();
+      break;
+    case Type::Boolean:
+      MPWriter.write(Node.getBool());
+      break;
+    case Type::Int:
+      MPWriter.write(Node.getInt());
+      break;
+    case Type::UInt:
+      MPWriter.write(Node.getUInt());
+      break;
+    case Type::String:
+      MPWriter.write(Node.getString());
+      break;
+    default:
+      llvm_unreachable("unhandled msgpack object kind");
+    }
+    // Pop finished stack levels.
+    while (!Stack.empty()) {
+      if (Stack.back().Node.getKind() == Type::Map) {
+        if (Stack.back().MapIt != Stack.back().Node.getMap().end())
+          break;
+      } else {
+        if (Stack.back().ArrayIt != Stack.back().Node.getArray().end())
+          break;
+      }
+      Stack.pop_back();
+    }
+    if (Stack.empty())
+      break;
+    // Get the next value.
+    if (Stack.back().Node.getKind() == Type::Map) {
+      if (Stack.back().OnKey) {
+        // Do the key of a key,value pair in a map.
+        Node = Stack.back().MapIt->first;
+        Stack.back().OnKey = false;
+      } else {
+        Node = Stack.back().MapIt->second;
+        ++Stack.back().MapIt;
+        Stack.back().OnKey = true;
+      }
+    } else {
+      Node = *Stack.back().ArrayIt;
+      ++Stack.back().ArrayIt;
+    }
+  }
+}
+
diff --git a/lib/BinaryFormat/MsgPackDocumentYAML.cpp b/lib/BinaryFormat/MsgPackDocumentYAML.cpp
new file mode 100644
index 000000000000..1d9c81ef8ebc
--- /dev/null
+++ b/lib/BinaryFormat/MsgPackDocumentYAML.cpp
@@ -0,0 +1,249 @@
+//===-- MsgPackDocumentYAML.cpp - MsgPack Document YAML interface -------*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// This file implements YAMLIO on a msgpack::Document.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/MsgPackDocument.h"
+#include "llvm/Support/YAMLTraits.h"
+
+using namespace llvm;
+using namespace msgpack;
+
+namespace {
+
+// Struct used to represent scalar node. (MapDocNode and ArrayDocNode already
+// exist in MsgPackDocument.h.)
+struct ScalarDocNode : DocNode {
+  ScalarDocNode(DocNode N) : DocNode(N) {}
+
+  /// Get the YAML tag for this ScalarDocNode. This normally returns ""; it only
+  /// returns something else if the result of toString would be ambiguous, e.g.
+  /// a string that parses as a number or boolean.
+  StringRef getYAMLTag() const;
+};
+
+} // namespace
+
+/// Convert this DocNode to a string, assuming it is scalar.
+std::string DocNode::toString() const {
+  std::string S;
+  raw_string_ostream OS(S);
+  switch (getKind()) {
+  case msgpack::Type::String:
+    OS << Raw;
+    break;
+  case msgpack::Type::Nil:
+    break;
+  case msgpack::Type::Boolean:
+    OS << (Bool ? "true" : "false");
+    break;
+  case msgpack::Type::Int:
+    OS << Int;
+    break;
+  case msgpack::Type::UInt:
+    if (getDocument()->getHexMode())
+      OS << format("%#llx", (unsigned long long)UInt);
+    else
+      OS << UInt;
+    break;
+  case msgpack::Type::Float:
+    OS << Float;
+    break;
+  default:
+    llvm_unreachable("not scalar");
+    break;
+  }
+  return OS.str();
+}
+
+/// Convert the StringRef and use it to set this DocNode (assuming scalar). If
+/// it is a string, copy the string into the Document's strings list so we do
+/// not rely on S having a lifetime beyond this call. Tag is "" or a YAML tag.
+StringRef DocNode::fromString(StringRef S, StringRef Tag) {
+  if (Tag == "tag:yaml.org,2002:str")
+    Tag = "";
+  if (Tag == "!int" || Tag == "") {
+    // Try unsigned int then signed int.
+    *this = getDocument()->getNode(uint64_t(0));
+    StringRef Err = yaml::ScalarTraits<uint64_t>::input(S, nullptr, getUInt());
+    if (Err != "") {
+      *this = getDocument()->getNode(int64_t(0));
+      Err = yaml::ScalarTraits<int64_t>::input(S, nullptr, getInt());
+    }
+    if (Err == "" || Tag != "")
+      return Err;
+  }
+  if (Tag == "!nil") {
+    *this = getDocument()->getNode();
+    return "";
+  }
+  if (Tag == "!bool" || Tag == "") {
+    *this = getDocument()->getNode(false);
+    StringRef Err = yaml::ScalarTraits<bool>::input(S, nullptr, getBool());
+    if (Err == "" || Tag != "")
+      return Err;
+  }
+  if (Tag == "!float" || Tag == "") {
+    *this = getDocument()->getNode(0.0);
+    StringRef Err = yaml::ScalarTraits<double>::input(S, nullptr, getFloat());
+    if (Err == "" || Tag != "")
+      return Err;
+  }
+  assert((Tag == "!str" || Tag == "") && "unsupported tag");
+  std::string V;
+  StringRef Err = yaml::ScalarTraits<std::string>::input(S, nullptr, V);
+  if (Err == "")
+    *this = getDocument()->getNode(V, /*Copy=*/true);
+  return Err;
+}
+
+/// Get the YAML tag for this ScalarDocNode. This normally returns ""; it only
+/// returns something else if the result of toString would be ambiguous, e.g.
+/// a string that parses as a number or boolean.
+StringRef ScalarDocNode::getYAMLTag() const {
+  if (getKind() == msgpack::Type::Nil)
+    return "!nil";
+  // Try converting both ways and see if we get the same kind. If not, we need
+  // a tag.
+  ScalarDocNode N = getDocument()->getNode();
+  N.fromString(toString(), "");
+  if (N.getKind() == getKind())
+    return "";
+  // Tolerate signedness of int changing, as tags do not differentiate between
+  // them anyway.
+  if (N.getKind() == msgpack::Type::UInt && getKind() == msgpack::Type::Int)
+    return "";
+  if (N.getKind() == msgpack::Type::Int && getKind() == msgpack::Type::UInt)
+    return "";
+  // We do need a tag.
+  switch (getKind()) {
+  case msgpack::Type::String:
+    return "!str";
+  case msgpack::Type::Int:
+    return "!int";
+  case msgpack::Type::UInt:
+    return "!int";
+  case msgpack::Type::Boolean:
+    return "!bool";
+  case msgpack::Type::Float:
+    return "!float";
+  default:
+    llvm_unreachable("unrecognized kind");
+  }
+}
+
+namespace llvm {
+namespace yaml {
+
+/// YAMLIO for DocNode
+template <> struct PolymorphicTraits<DocNode> {
+
+  static NodeKind getKind(const DocNode &N) {
+    switch (N.getKind()) {
+    case msgpack::Type::Map:
+      return NodeKind::Map;
+    case msgpack::Type::Array:
+      return NodeKind::Sequence;
+    default:
+      return NodeKind::Scalar;
+    }
+  }
+
+  static MapDocNode &getAsMap(DocNode &N) { return N.getMap(/*Convert=*/true); }
+
+  static ArrayDocNode &getAsSequence(DocNode &N) {
+    N.getArray(/*Convert=*/true);
+    return *static_cast<ArrayDocNode *>(&N);
+  }
+
+  static ScalarDocNode &getAsScalar(DocNode &N) {
+    return *static_cast<ScalarDocNode *>(&N);
+  }
+};
+
+/// YAMLIO for ScalarDocNode
+template <> struct TaggedScalarTraits<ScalarDocNode> {
+
+  static void output(const ScalarDocNode &S, void *Ctxt, raw_ostream &OS,
+                     raw_ostream &TagOS) {
+    TagOS << S.getYAMLTag();
+    OS << S.toString();
+  }
+
+  static StringRef input(StringRef Str, StringRef Tag, void *Ctxt,
+                         ScalarDocNode &S) {
+    return S.fromString(Str, Tag);
+  }
+
+  static QuotingType mustQuote(const ScalarDocNode &S, StringRef ScalarStr) {
+    switch (S.getKind()) {
+    case Type::Int:
+      return ScalarTraits<int64_t>::mustQuote(ScalarStr);
+    case Type::UInt:
+      return ScalarTraits<uint64_t>::mustQuote(ScalarStr);
+    case Type::Nil:
+      return ScalarTraits<StringRef>::mustQuote(ScalarStr);
+    case Type::Boolean:
+      return ScalarTraits<bool>::mustQuote(ScalarStr);
+    case Type::Float:
+      return ScalarTraits<double>::mustQuote(ScalarStr);
+    case Type::Binary:
+    case Type::String:
+      return ScalarTraits<std::string>::mustQuote(ScalarStr);
+    default:
+      llvm_unreachable("unrecognized ScalarKind");
+    }
+  }
+};
+
+/// YAMLIO for MapDocNode
+template <> struct CustomMappingTraits<MapDocNode> {
+
+  static void inputOne(IO &IO, StringRef Key, MapDocNode &M) {
+    ScalarDocNode KeyObj = M.getDocument()->getNode();
+    KeyObj.fromString(Key, "");
+    IO.mapRequired(Key.str().c_str(), M.getMap()[KeyObj]);
+  }
+
+  static void output(IO &IO, MapDocNode &M) {
+    for (auto I : M.getMap()) {
+      IO.mapRequired(I.first.toString().c_str(), I.second);
+    }
+  }
+};
+
+/// YAMLIO for ArrayNode
+template <> struct SequenceTraits<ArrayDocNode> {
+
+  static size_t size(IO &IO, ArrayDocNode &A) { return A.size(); }
+
+  static DocNode &element(IO &IO, ArrayDocNode &A, size_t Index) {
+    return A[Index];
+  }
+};
+
+} // namespace yaml
+} // namespace llvm
+
+/// Convert MsgPack Document to YAML text.
+void msgpack::Document::toYAML(raw_ostream &OS) {
+  yaml::Output Yout(OS);
+  Yout << getRoot();
+}
+
+/// Read YAML text into the MsgPack document. Returns false on failure.
+bool msgpack::Document::fromYAML(StringRef S) {
+  clear();
+  yaml::Input Yin(S);
+  Yin >> getRoot();
+  return !Yin.error();
+}
+
diff --git a/lib/BinaryFormat/MsgPackReader.cpp b/lib/BinaryFormat/MsgPackReader.cpp
index b510fdba9608..872a6e0e29f8 100644
--- a/lib/BinaryFormat/MsgPackReader.cpp
+++ b/lib/BinaryFormat/MsgPackReader.cpp
@@ -1,9 +1,8 @@
 //===- MsgPackReader.cpp - Simple MsgPack reader ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/BinaryFormat/MsgPackTypes.cpp b/lib/BinaryFormat/MsgPackTypes.cpp
deleted file mode 100644
index 4a8f70b10fb8..000000000000
--- a/lib/BinaryFormat/MsgPackTypes.cpp
+++ /dev/null
@@ -1,303 +0,0 @@
-//===- MsgPackTypes.cpp - MsgPack Types -------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// Implementation of types representing MessagePack "documents".
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/BinaryFormat/MsgPackTypes.h"
-#include "llvm/Support/Error.h"
-
-using namespace llvm;
-using namespace msgpack;
-
-namespace llvm {
-namespace msgpack {
-void ScalarNode::anchor() {}
-void ArrayNode::anchor() {}
-void MapNode::anchor() {}
-}
-}
-
-Expected<OptNodePtr> Node::readArray(Reader &MPReader, size_t Length) {
-  auto A = std::make_shared<ArrayNode>();
-  for (size_t I = 0; I < Length; ++I) {
-    auto OptNodeOrErr = Node::read(MPReader);
-    if (auto Err = OptNodeOrErr.takeError())
-      return std::move(Err);
-    if (!*OptNodeOrErr)
-      return make_error<StringError>(
-          "Insufficient array elements",
-          std::make_error_code(std::errc::invalid_argument));
-    A->push_back(std::move(**OptNodeOrErr));
-  }
-  return OptNodePtr(std::move(A));
-}
-
-Expected<OptNodePtr> Node::readMap(Reader &MPReader, size_t Length) {
-  auto M = std::make_shared<MapNode>();
-  for (size_t I = 0; I < Length; ++I) {
-    auto OptKeyOrErr = Node::read(MPReader);
-    if (auto Err = OptKeyOrErr.takeError())
-      return std::move(Err);
-    if (!*OptKeyOrErr)
-      return make_error<StringError>(
-          "Insufficient map elements",
-          std::make_error_code(std::errc::invalid_argument));
-    auto OptValOrErr = Node::read(MPReader);
-    if (auto Err = OptValOrErr.takeError())
-      return std::move(Err);
-    if (!*OptValOrErr)
-      return make_error<StringError>(
-          "Insufficient map elements",
-          std::make_error_code(std::errc::invalid_argument));
-    auto *Key = dyn_cast<ScalarNode>((*OptKeyOrErr)->get());
-    if (!Key)
-      return make_error<StringError>(
-          "Only string map keys are supported",
-          std::make_error_code(std::errc::invalid_argument));
-    if (Key->getScalarKind() != ScalarNode::SK_String)
-      return make_error<StringError>(
-          "Only string map keys are supported",
-          std::make_error_code(std::errc::invalid_argument));
-    M->try_emplace(Key->getString(), std::move(**OptValOrErr));
-  }
-  return OptNodePtr(std::move(M));
-}
-
-Expected<OptNodePtr> Node::read(Reader &MPReader) {
-  Object Obj;
-
-  auto ContinueOrErr = MPReader.read(Obj);
-  if (auto Err = ContinueOrErr.takeError())
-    return std::move(Err);
-  if (!*ContinueOrErr)
-    return None;
-
-  switch (Obj.Kind) {
-  case Type::Int:
-    return OptNodePtr(std::make_shared<ScalarNode>(Obj.Int));
-  case Type::UInt:
-    return OptNodePtr(std::make_shared<ScalarNode>(Obj.UInt));
-  case Type::Nil:
-    return OptNodePtr(std::make_shared<ScalarNode>());
-  case Type::Boolean:
-    return OptNodePtr(std::make_shared<ScalarNode>(Obj.Bool));
-  case Type::Float:
-    return OptNodePtr(std::make_shared<ScalarNode>(Obj.Float));
-  case Type::String:
-    return OptNodePtr(std::make_shared<ScalarNode>(Obj.Raw));
-  case Type::Binary:
-    return OptNodePtr(std::make_shared<ScalarNode>(Obj.Raw));
-  case Type::Array:
-    return Node::readArray(MPReader, Obj.Length);
-  case Type::Map:
-    return Node::readMap(MPReader, Obj.Length);
-  case Type::Extension:
-    return make_error<StringError>(
-        "Extension types are not supported",
-        std::make_error_code(std::errc::invalid_argument));
-  }
-  llvm_unreachable("msgpack::Type not handled");
-}
-
-void ScalarNode::destroy() {
-  switch (SKind) {
-  case SK_String:
-  case SK_Binary:
-    StringValue.~basic_string();
-    break;
-  default:
-    // POD types do not require destruction
-    break;
-  }
-}
-
-ScalarNode::ScalarNode(int64_t IntValue)
-    : Node(NK_Scalar), SKind(SK_Int), IntValue(IntValue) {}
-
-ScalarNode::ScalarNode(int32_t IntValue)
-    : ScalarNode(static_cast<int64_t>(IntValue)) {}
-
-ScalarNode::ScalarNode(uint64_t UIntValue)
-    : Node(NK_Scalar), SKind(SK_UInt), UIntValue(UIntValue) {}
-
-ScalarNode::ScalarNode(uint32_t IntValue)
-    : ScalarNode(static_cast<uint64_t>(IntValue)) {}
-
-ScalarNode::ScalarNode() : Node(NK_Scalar), SKind(SK_Nil) {}
-
-ScalarNode::ScalarNode(bool BoolValue)
-    : Node(NK_Scalar), SKind(SK_Boolean), BoolValue(BoolValue) {}
-
-ScalarNode::ScalarNode(double FloatValue)
-    : Node(NK_Scalar), SKind(SK_Float), BoolValue(FloatValue) {}
-
-ScalarNode::ScalarNode(StringRef StringValue)
-    : Node(NK_Scalar), SKind(SK_String) {
-  new (&this->StringValue) std::string(StringValue);
-}
-
-ScalarNode::ScalarNode(const char *StringValue)
-    : ScalarNode(StringRef(StringValue)) {}
-
-ScalarNode::ScalarNode(std::string &&StringValue)
-    : Node(NK_Scalar), SKind(SK_String) {
-  new (&this->StringValue) std::string(StringValue);
-}
-
-ScalarNode::ScalarNode(MemoryBufferRef BinaryValue)
-    : Node(NK_Scalar), SKind(SK_Binary) {
-  new (&StringValue) std::string(BinaryValue.getBuffer());
-}
-
-ScalarNode::~ScalarNode() { destroy(); }
-
-ScalarNode &ScalarNode::operator=(ScalarNode &&RHS) {
-  destroy();
-  switch (SKind = RHS.SKind) {
-  case SK_Int:
-    IntValue = RHS.IntValue;
-    break;
-  case SK_UInt:
-    UIntValue = RHS.UIntValue;
-    break;
-  case SK_Boolean:
-    BoolValue = RHS.BoolValue;
-    break;
-  case SK_Float:
-    FloatValue = RHS.FloatValue;
-    break;
-  case SK_String:
-  case SK_Binary:
-    new (&StringValue) std::string(std::move(RHS.StringValue));
-    break;
-  case SK_Nil:
-    // pass
-    break;
-  }
-  return *this;
-}
-
-StringRef ScalarNode::inputYAML(StringRef ScalarStr) {
-  switch (SKind) {
-  case SK_Int:
-    return yaml::ScalarTraits<int64_t>::input(ScalarStr, nullptr, IntValue);
-  case SK_UInt:
-    return yaml::ScalarTraits<uint64_t>::input(ScalarStr, nullptr, UIntValue);
-  case SK_Nil:
-    return StringRef();
-  case SK_Boolean:
-    return yaml::ScalarTraits<bool>::input(ScalarStr, nullptr, BoolValue);
-  case SK_Float:
-    return yaml::ScalarTraits<double>::input(ScalarStr, nullptr, FloatValue);
-  case SK_Binary:
-  case SK_String:
-    return yaml::ScalarTraits<std::string>::input(ScalarStr, nullptr,
-                                                  StringValue);
-  }
-  llvm_unreachable("unrecognized ScalarKind");
-}
-
-void ScalarNode::outputYAML(raw_ostream &OS) const {
-  switch (SKind) {
-  case SK_Int:
-    yaml::ScalarTraits<int64_t>::output(IntValue, nullptr, OS);
-    break;
-  case SK_UInt:
-    yaml::ScalarTraits<uint64_t>::output(UIntValue, nullptr, OS);
-    break;
-  case SK_Nil:
-    yaml::ScalarTraits<StringRef>::output("", nullptr, OS);
-    break;
-  case SK_Boolean:
-    yaml::ScalarTraits<bool>::output(BoolValue, nullptr, OS);
-    break;
-  case SK_Float:
-    yaml::ScalarTraits<double>::output(FloatValue, nullptr, OS);
-    break;
-  case SK_Binary:
-  case SK_String:
-    yaml::ScalarTraits<std::string>::output(StringValue, nullptr, OS);
-    break;
-  }
-}
-
-yaml::QuotingType ScalarNode::mustQuoteYAML(StringRef ScalarStr) const {
-  switch (SKind) {
-  case SK_Int:
-    return yaml::ScalarTraits<int64_t>::mustQuote(ScalarStr);
-  case SK_UInt:
-    return yaml::ScalarTraits<uint64_t>::mustQuote(ScalarStr);
-  case SK_Nil:
-    return yaml::ScalarTraits<StringRef>::mustQuote(ScalarStr);
-  case SK_Boolean:
-    return yaml::ScalarTraits<bool>::mustQuote(ScalarStr);
-  case SK_Float:
-    return yaml::ScalarTraits<double>::mustQuote(ScalarStr);
-  case SK_Binary:
-  case SK_String:
-    return yaml::ScalarTraits<std::string>::mustQuote(ScalarStr);
-  }
-  llvm_unreachable("unrecognized ScalarKind");
-}
-
-const char *ScalarNode::IntTag = "!int";
-const char *ScalarNode::NilTag = "!nil";
-const char *ScalarNode::BooleanTag = "!bool";
-const char *ScalarNode::FloatTag = "!float";
-const char *ScalarNode::StringTag = "!str";
-const char *ScalarNode::BinaryTag = "!bin";
-
-StringRef ScalarNode::getYAMLTag() const {
-  switch (SKind) {
-  case SK_Int:
-    return IntTag;
-  case SK_UInt:
-    return IntTag;
-  case SK_Nil:
-    return NilTag;
-  case SK_Boolean:
-    return BooleanTag;
-  case SK_Float:
-    return FloatTag;
-  case SK_String:
-    return StringTag;
-  case SK_Binary:
-    return BinaryTag;
-  }
-  llvm_unreachable("unrecognized ScalarKind");
-}
-
-void ScalarNode::write(Writer &MPWriter) {
-  switch (SKind) {
-  case SK_Int:
-    MPWriter.write(IntValue);
-    break;
-  case SK_UInt:
-    MPWriter.write(UIntValue);
-    break;
-  case SK_Nil:
-    MPWriter.writeNil();
-    break;
-  case SK_Boolean:
-    MPWriter.write(BoolValue);
-    break;
-  case SK_Float:
-    MPWriter.write(FloatValue);
-    break;
-  case SK_String:
-    MPWriter.write(StringValue);
-    break;
-  case SK_Binary:
-    MPWriter.write(MemoryBufferRef(StringValue, ""));
-    break;
-  }
-}
diff --git a/lib/BinaryFormat/MsgPackWriter.cpp b/lib/BinaryFormat/MsgPackWriter.cpp
index d024bb0fcdb2..b4d70e8f78c1 100644
--- a/lib/BinaryFormat/MsgPackWriter.cpp
+++ b/lib/BinaryFormat/MsgPackWriter.cpp
@@ -1,9 +1,8 @@
 //===- MsgPackWriter.cpp - Simple MsgPack writer ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/BinaryFormat/Wasm.cpp b/lib/BinaryFormat/Wasm.cpp
index 94d40bf02a39..d46be481edb3 100644
--- a/lib/BinaryFormat/Wasm.cpp
+++ b/lib/BinaryFormat/Wasm.cpp
@@ -1,16 +1,15 @@
 //===-- llvm/BinaryFormat/Wasm.cpp -------------------------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/BinaryFormat/Wasm.h"
 
-std::string llvm::wasm::toString(wasm::WasmSymbolType type) {
-  switch (type) {
+std::string llvm::wasm::toString(wasm::WasmSymbolType Type) {
+  switch (Type) {
   case wasm::WASM_SYMBOL_TYPE_FUNCTION:
     return "WASM_SYMBOL_TYPE_FUNCTION";
   case wasm::WASM_SYMBOL_TYPE_GLOBAL:
@@ -25,8 +24,8 @@ std::string llvm::wasm::toString(wasm::WasmSymbolType type) {
   llvm_unreachable("unknown symbol type");
 }
 
-std::string llvm::wasm::relocTypetoString(uint32_t type) {
-  switch (type) {
+std::string llvm::wasm::relocTypetoString(uint32_t Type) {
+  switch (Type) {
 #define WASM_RELOC(NAME, VALUE)                                                \
   case VALUE:                                                                  \
     return #NAME;
@@ -36,3 +35,17 @@ std::string llvm::wasm::relocTypetoString(uint32_t type) {
     llvm_unreachable("unknown reloc type");
   }
 }
+
+bool llvm::wasm::relocTypeHasAddend(uint32_t Type) {
+  switch (Type) {
+  case R_WASM_MEMORY_ADDR_LEB:
+  case R_WASM_MEMORY_ADDR_SLEB:
+  case R_WASM_MEMORY_ADDR_REL_SLEB:
+  case R_WASM_MEMORY_ADDR_I32:
+  case R_WASM_FUNCTION_OFFSET_I32:
+  case R_WASM_SECTION_OFFSET_I32:
+    return true;
+  default:
+    return false;
+  }
+}
diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp
index 3ec45956b3e5..5ac893aef14e 100644
--- a/lib/Bitcode/Reader/BitReader.cpp
+++ b/lib/Bitcode/Reader/BitReader.cpp
@@ -1,9 +1,8 @@
 //===-- BitReader.cpp -----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
new file mode 100644
index 000000000000..9c30d563a314
--- /dev/null
+++ b/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -0,0 +1,980 @@
+//===- BitcodeAnalyzer.cpp - Internal BitcodeAnalyzer implementation ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/BitcodeAnalyzer.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/Bitstream/BitCodes.h"
+#include "llvm/Bitstream/BitstreamReader.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/SHA1.h"
+
+using namespace llvm;
+
+static Error reportError(StringRef Message) {
+  return createStringError(std::errc::illegal_byte_sequence, Message.data());
+}
+
+/// Return a symbolic block name if known, otherwise return null.
+static Optional<const char *> GetBlockName(unsigned BlockID,
+                                           const BitstreamBlockInfo &BlockInfo,
+                                           CurStreamTypeType CurStreamType) {
+  // Standard blocks for all bitcode files.
+  if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
+    if (BlockID == bitc::BLOCKINFO_BLOCK_ID)
+      return "BLOCKINFO_BLOCK";
+    return None;
+  }
+
+  // Check to see if we have a blockinfo record for this block, with a name.
+  if (const BitstreamBlockInfo::BlockInfo *Info =
+          BlockInfo.getBlockInfo(BlockID)) {
+    if (!Info->Name.empty())
+      return Info->Name.c_str();
+  }
+
+  if (CurStreamType != LLVMIRBitstream)
+    return None;
+
+  switch (BlockID) {
+  default:
+    return None;
+  case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID:
+    return "OPERAND_BUNDLE_TAGS_BLOCK";
+  case bitc::MODULE_BLOCK_ID:
+    return "MODULE_BLOCK";
+  case bitc::PARAMATTR_BLOCK_ID:
+    return "PARAMATTR_BLOCK";
+  case bitc::PARAMATTR_GROUP_BLOCK_ID:
+    return "PARAMATTR_GROUP_BLOCK_ID";
+  case bitc::TYPE_BLOCK_ID_NEW:
+    return "TYPE_BLOCK_ID";
+  case bitc::CONSTANTS_BLOCK_ID:
+    return "CONSTANTS_BLOCK";
+  case bitc::FUNCTION_BLOCK_ID:
+    return "FUNCTION_BLOCK";
+  case bitc::IDENTIFICATION_BLOCK_ID:
+    return "IDENTIFICATION_BLOCK_ID";
+  case bitc::VALUE_SYMTAB_BLOCK_ID:
+    return "VALUE_SYMTAB";
+  case bitc::METADATA_BLOCK_ID:
+    return "METADATA_BLOCK";
+  case bitc::METADATA_KIND_BLOCK_ID:
+    return "METADATA_KIND_BLOCK";
+  case bitc::METADATA_ATTACHMENT_ID:
+    return "METADATA_ATTACHMENT_BLOCK";
+  case bitc::USELIST_BLOCK_ID:
+    return "USELIST_BLOCK_ID";
+  case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
+    return "GLOBALVAL_SUMMARY_BLOCK";
+  case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID:
+    return "FULL_LTO_GLOBALVAL_SUMMARY_BLOCK";
+  case bitc::MODULE_STRTAB_BLOCK_ID:
+    return "MODULE_STRTAB_BLOCK";
+  case bitc::STRTAB_BLOCK_ID:
+    return "STRTAB_BLOCK";
+  case bitc::SYMTAB_BLOCK_ID:
+    return "SYMTAB_BLOCK";
+  }
+}
+
+/// Return a symbolic code name if known, otherwise return null.
+static Optional<const char *> GetCodeName(unsigned CodeID, unsigned BlockID,
+                                          const BitstreamBlockInfo &BlockInfo,
+                                          CurStreamTypeType CurStreamType) {
+  // Standard blocks for all bitcode files.
+  if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
+    if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
+      switch (CodeID) {
+      default:
+        return None;
+      case bitc::BLOCKINFO_CODE_SETBID:
+        return "SETBID";
+      case bitc::BLOCKINFO_CODE_BLOCKNAME:
+        return "BLOCKNAME";
+      case bitc::BLOCKINFO_CODE_SETRECORDNAME:
+        return "SETRECORDNAME";
+      }
+    }
+    return None;
+  }
+
+  // Check to see if we have a blockinfo record for this record, with a name.
+  if (const BitstreamBlockInfo::BlockInfo *Info =
+          BlockInfo.getBlockInfo(BlockID)) {
+    for (unsigned i = 0, e = Info->RecordNames.size(); i != e; ++i)
+      if (Info->RecordNames[i].first == CodeID)
+        return Info->RecordNames[i].second.c_str();
+  }
+
+  if (CurStreamType != LLVMIRBitstream)
+    return None;
+
+#define STRINGIFY_CODE(PREFIX, CODE)                                           \
+  case bitc::PREFIX##_##CODE:                                                  \
+    return #CODE;
+  switch (BlockID) {
+  default:
+    return None;
+  case bitc::MODULE_BLOCK_ID:
+    switch (CodeID) {
+    default:
+      return None;
+      STRINGIFY_CODE(MODULE_CODE, VERSION)
+      STRINGIFY_CODE(MODULE_CODE, TRIPLE)
+      STRINGIFY_CODE(MODULE_CODE, DATALAYOUT)
+      STRINGIFY_CODE(MODULE_CODE, ASM)
+      STRINGIFY_CODE(MODULE_CODE, SECTIONNAME)
+      STRINGIFY_CODE(MODULE_CODE, DEPLIB) // FIXME: Remove in 4.0
+      STRINGIFY_CODE(MODULE_CODE, GLOBALVAR)
+      STRINGIFY_CODE(MODULE_CODE, FUNCTION)
+      STRINGIFY_CODE(MODULE_CODE, ALIAS)
+      STRINGIFY_CODE(MODULE_CODE, GCNAME)
+      STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
+      STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED)
+      STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME)
+      STRINGIFY_CODE(MODULE_CODE, HASH)
+    }
+  case bitc::IDENTIFICATION_BLOCK_ID:
+    switch (CodeID) {
+    default:
+      return None;
+      STRINGIFY_CODE(IDENTIFICATION_CODE, STRING)
+      STRINGIFY_CODE(IDENTIFICATION_CODE, EPOCH)
+    }
+  case bitc::PARAMATTR_BLOCK_ID:
+    switch (CodeID) {
+    default:
+      return None;
+    // FIXME: Should these be different?
+    case bitc::PARAMATTR_CODE_ENTRY_OLD:
+      return "ENTRY";
+    case bitc::PARAMATTR_CODE_ENTRY:
+      return "ENTRY";
+    }
+  case bitc::PARAMATTR_GROUP_BLOCK_ID:
+    switch (CodeID) {
+    default:
+      return None;
+    case bitc::PARAMATTR_GRP_CODE_ENTRY:
+      return "ENTRY";
+    }
+  case bitc::TYPE_BLOCK_ID_NEW:
+    switch (CodeID) {
+    default:
+      return None;
+      STRINGIFY_CODE(TYPE_CODE, NUMENTRY)
+      STRINGIFY_CODE(TYPE_CODE, VOID)
+      STRINGIFY_CODE(TYPE_CODE, FLOAT)
+      STRINGIFY_CODE(TYPE_CODE, DOUBLE)
+      STRINGIFY_CODE(TYPE_CODE, LABEL)
+      STRINGIFY_CODE(TYPE_CODE, OPAQUE)
+      STRINGIFY_CODE(TYPE_CODE, INTEGER)
+      STRINGIFY_CODE(TYPE_CODE, POINTER)
+      STRINGIFY_CODE(TYPE_CODE, ARRAY)
+      STRINGIFY_CODE(TYPE_CODE, VECTOR)
+      STRINGIFY_CODE(TYPE_CODE, X86_FP80)
+      STRINGIFY_CODE(TYPE_CODE, FP128)
+      STRINGIFY_CODE(TYPE_CODE, PPC_FP128)
+      STRINGIFY_CODE(TYPE_CODE, METADATA)
+      STRINGIFY_CODE(TYPE_CODE, STRUCT_ANON)
+      STRINGIFY_CODE(TYPE_CODE, STRUCT_NAME)
+      STRINGIFY_CODE(TYPE_CODE, STRUCT_NAMED)
+      STRINGIFY_CODE(TYPE_CODE, FUNCTION)
+    }
+
+  case bitc::CONSTANTS_BLOCK_ID:
+    switch (CodeID) {
+    default:
+      return None;
+      STRINGIFY_CODE(CST_CODE, SETTYPE)
+      STRINGIFY_CODE(CST_CODE, NULL)
+      STRINGIFY_CODE(CST_CODE, UNDEF)
+      STRINGIFY_CODE(CST_CODE, INTEGER)
+      STRINGIFY_CODE(CST_CODE, WIDE_INTEGER)
+      STRINGIFY_CODE(CST_CODE, FLOAT)
+      STRINGIFY_CODE(CST_CODE, AGGREGATE)
+      STRINGIFY_CODE(CST_CODE, STRING)
+      STRINGIFY_CODE(CST_CODE, CSTRING)
+      STRINGIFY_CODE(CST_CODE, CE_BINOP)
+      STRINGIFY_CODE(CST_CODE, CE_CAST)
+      STRINGIFY_CODE(CST_CODE, CE_GEP)
+      STRINGIFY_CODE(CST_CODE, CE_INBOUNDS_GEP)
+      STRINGIFY_CODE(CST_CODE, CE_SELECT)
+      STRINGIFY_CODE(CST_CODE, CE_EXTRACTELT)
+      STRINGIFY_CODE(CST_CODE, CE_INSERTELT)
+      STRINGIFY_CODE(CST_CODE, CE_SHUFFLEVEC)
+      STRINGIFY_CODE(CST_CODE, CE_CMP)
+      STRINGIFY_CODE(CST_CODE, INLINEASM)
+      STRINGIFY_CODE(CST_CODE, CE_SHUFVEC_EX)
+      STRINGIFY_CODE(CST_CODE, CE_UNOP)
+    case bitc::CST_CODE_BLOCKADDRESS:
+      return "CST_CODE_BLOCKADDRESS";
+      STRINGIFY_CODE(CST_CODE, DATA)
+    }
+  case bitc::FUNCTION_BLOCK_ID:
+    switch (CodeID) {
+    default:
+      return None;
+      STRINGIFY_CODE(FUNC_CODE, DECLAREBLOCKS)
+      STRINGIFY_CODE(FUNC_CODE, INST_BINOP)
+      STRINGIFY_CODE(FUNC_CODE, INST_CAST)
+      STRINGIFY_CODE(FUNC_CODE, INST_GEP_OLD)
+      STRINGIFY_CODE(FUNC_CODE, INST_INBOUNDS_GEP_OLD)
+      STRINGIFY_CODE(FUNC_CODE, INST_SELECT)
+      STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTELT)
+      STRINGIFY_CODE(FUNC_CODE, INST_INSERTELT)
+      STRINGIFY_CODE(FUNC_CODE, INST_SHUFFLEVEC)
+      STRINGIFY_CODE(FUNC_CODE, INST_CMP)
+      STRINGIFY_CODE(FUNC_CODE, INST_RET)
+      STRINGIFY_CODE(FUNC_CODE, INST_BR)
+      STRINGIFY_CODE(FUNC_CODE, INST_SWITCH)
+      STRINGIFY_CODE(FUNC_CODE, INST_INVOKE)
+      STRINGIFY_CODE(FUNC_CODE, INST_UNOP)
+      STRINGIFY_CODE(FUNC_CODE, INST_UNREACHABLE)
+      STRINGIFY_CODE(FUNC_CODE, INST_CLEANUPRET)
+      STRINGIFY_CODE(FUNC_CODE, INST_CATCHRET)
+      STRINGIFY_CODE(FUNC_CODE, INST_CATCHPAD)
+      STRINGIFY_CODE(FUNC_CODE, INST_PHI)
+      STRINGIFY_CODE(FUNC_CODE, INST_ALLOCA)
+      STRINGIFY_CODE(FUNC_CODE, INST_LOAD)
+      STRINGIFY_CODE(FUNC_CODE, INST_VAARG)
+      STRINGIFY_CODE(FUNC_CODE, INST_STORE)
+      STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTVAL)
+      STRINGIFY_CODE(FUNC_CODE, INST_INSERTVAL)
+      STRINGIFY_CODE(FUNC_CODE, INST_CMP2)
+      STRINGIFY_CODE(FUNC_CODE, INST_VSELECT)
+      STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC_AGAIN)
+      STRINGIFY_CODE(FUNC_CODE, INST_CALL)
+      STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC)
+      STRINGIFY_CODE(FUNC_CODE, INST_GEP)
+      STRINGIFY_CODE(FUNC_CODE, OPERAND_BUNDLE)
+      STRINGIFY_CODE(FUNC_CODE, INST_FENCE)
+      STRINGIFY_CODE(FUNC_CODE, INST_ATOMICRMW)
+      STRINGIFY_CODE(FUNC_CODE, INST_LOADATOMIC)
+      STRINGIFY_CODE(FUNC_CODE, INST_STOREATOMIC)
+      STRINGIFY_CODE(FUNC_CODE, INST_CMPXCHG)
+      STRINGIFY_CODE(FUNC_CODE, INST_CALLBR)
+    }
+  case bitc::VALUE_SYMTAB_BLOCK_ID:
+    switch (CodeID) {
+    default:
+      return None;
+      STRINGIFY_CODE(VST_CODE, ENTRY)
+      STRINGIFY_CODE(VST_CODE, BBENTRY)
+      STRINGIFY_CODE(VST_CODE, FNENTRY)
+      STRINGIFY_CODE(VST_CODE, COMBINED_ENTRY)
+    }
+  case bitc::MODULE_STRTAB_BLOCK_ID:
+    switch (CodeID) {
+    default:
+      return None;
+      STRINGIFY_CODE(MST_CODE, ENTRY)
+      STRINGIFY_CODE(MST_CODE, HASH)
+    }
+  case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
+  case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID:
+    switch (CodeID) {
+    default:
+      return None;
+      STRINGIFY_CODE(FS, PERMODULE)
+      STRINGIFY_CODE(FS, PERMODULE_PROFILE)
+      STRINGIFY_CODE(FS, PERMODULE_RELBF)
+      STRINGIFY_CODE(FS, PERMODULE_GLOBALVAR_INIT_REFS)
+      STRINGIFY_CODE(FS, PERMODULE_VTABLE_GLOBALVAR_INIT_REFS)
+      STRINGIFY_CODE(FS, COMBINED)
+      STRINGIFY_CODE(FS, COMBINED_PROFILE)
+      STRINGIFY_CODE(FS, COMBINED_GLOBALVAR_INIT_REFS)
+      STRINGIFY_CODE(FS, ALIAS)
+      STRINGIFY_CODE(FS, COMBINED_ALIAS)
+      STRINGIFY_CODE(FS, COMBINED_ORIGINAL_NAME)
+      STRINGIFY_CODE(FS, VERSION)
+      STRINGIFY_CODE(FS, FLAGS)
+      STRINGIFY_CODE(FS, TYPE_TESTS)
+      STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_VCALLS)
+      STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_VCALLS)
+      STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_CONST_VCALL)
+      STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_CONST_VCALL)
+      STRINGIFY_CODE(FS, VALUE_GUID)
+      STRINGIFY_CODE(FS, CFI_FUNCTION_DEFS)
+      STRINGIFY_CODE(FS, CFI_FUNCTION_DECLS)
+      STRINGIFY_CODE(FS, TYPE_ID)
+      STRINGIFY_CODE(FS, TYPE_ID_METADATA)
+    }
+  case bitc::METADATA_ATTACHMENT_ID:
+    switch (CodeID) {
+    default:
+      return None;
+      STRINGIFY_CODE(METADATA, ATTACHMENT)
+    }
+  case bitc::METADATA_BLOCK_ID:
+    switch (CodeID) {
+    default:
+      return None;
+      STRINGIFY_CODE(METADATA, STRING_OLD)
+      STRINGIFY_CODE(METADATA, VALUE)
+      STRINGIFY_CODE(METADATA, NODE)
+      STRINGIFY_CODE(METADATA, NAME)
+      STRINGIFY_CODE(METADATA, DISTINCT_NODE)
+      STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK
+      STRINGIFY_CODE(METADATA, LOCATION)
+      STRINGIFY_CODE(METADATA, OLD_NODE)
+      STRINGIFY_CODE(METADATA, OLD_FN_NODE)
+      STRINGIFY_CODE(METADATA, NAMED_NODE)
+      STRINGIFY_CODE(METADATA, GENERIC_DEBUG)
+      STRINGIFY_CODE(METADATA, SUBRANGE)
+      STRINGIFY_CODE(METADATA, ENUMERATOR)
+      STRINGIFY_CODE(METADATA, BASIC_TYPE)
+      STRINGIFY_CODE(METADATA, FILE)
+      STRINGIFY_CODE(METADATA, DERIVED_TYPE)
+      STRINGIFY_CODE(METADATA, COMPOSITE_TYPE)
+      STRINGIFY_CODE(METADATA, SUBROUTINE_TYPE)
+      STRINGIFY_CODE(METADATA, COMPILE_UNIT)
+      STRINGIFY_CODE(METADATA, SUBPROGRAM)
+      STRINGIFY_CODE(METADATA, LEXICAL_BLOCK)
+      STRINGIFY_CODE(METADATA, LEXICAL_BLOCK_FILE)
+      STRINGIFY_CODE(METADATA, NAMESPACE)
+      STRINGIFY_CODE(METADATA, TEMPLATE_TYPE)
+      STRINGIFY_CODE(METADATA, TEMPLATE_VALUE)
+      STRINGIFY_CODE(METADATA, GLOBAL_VAR)
+      STRINGIFY_CODE(METADATA, LOCAL_VAR)
+      STRINGIFY_CODE(METADATA, EXPRESSION)
+      STRINGIFY_CODE(METADATA, OBJC_PROPERTY)
+      STRINGIFY_CODE(METADATA, IMPORTED_ENTITY)
+      STRINGIFY_CODE(METADATA, MODULE)
+      STRINGIFY_CODE(METADATA, MACRO)
+      STRINGIFY_CODE(METADATA, MACRO_FILE)
+      STRINGIFY_CODE(METADATA, STRINGS)
+      STRINGIFY_CODE(METADATA, GLOBAL_DECL_ATTACHMENT)
+      STRINGIFY_CODE(METADATA, GLOBAL_VAR_EXPR)
+      STRINGIFY_CODE(METADATA, INDEX_OFFSET)
+      STRINGIFY_CODE(METADATA, INDEX)
+    }
+  case bitc::METADATA_KIND_BLOCK_ID:
+    switch (CodeID) {
+    default:
+      return None;
+      STRINGIFY_CODE(METADATA, KIND)
+    }
+  case bitc::USELIST_BLOCK_ID:
+    switch (CodeID) {
+    default:
+      return None;
+    case bitc::USELIST_CODE_DEFAULT:
+      return "USELIST_CODE_DEFAULT";
+    case bitc::USELIST_CODE_BB:
+      return "USELIST_CODE_BB";
+    }
+
+  case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID:
+    switch (CodeID) {
+    default:
+      return None;
+    case bitc::OPERAND_BUNDLE_TAG:
+      return "OPERAND_BUNDLE_TAG";
+    }
+  case bitc::STRTAB_BLOCK_ID:
+    switch (CodeID) {
+    default:
+      return None;
+    case bitc::STRTAB_BLOB:
+      return "BLOB";
+    }
+  case bitc::SYMTAB_BLOCK_ID:
+    switch (CodeID) {
+    default:
+      return None;
+    case bitc::SYMTAB_BLOB:
+      return "BLOB";
+    }
+  }
+#undef STRINGIFY_CODE
+}
+
+static void printSize(raw_ostream &OS, double Bits) {
+  OS << format("%.2f/%.2fB/%luW", Bits, Bits / 8, (unsigned long)(Bits / 32));
+}
+static void printSize(raw_ostream &OS, uint64_t Bits) {
+  OS << format("%lub/%.2fB/%luW", (unsigned long)Bits, (double)Bits / 8,
+               (unsigned long)(Bits / 32));
+}
+
+static Expected<CurStreamTypeType> ReadSignature(BitstreamCursor &Stream) {
+  auto tryRead = [&Stream](char &Dest, size_t size) -> Error {
+    if (Expected<SimpleBitstreamCursor::word_t> MaybeWord = Stream.Read(size))
+      Dest = MaybeWord.get();
+    else
+      return MaybeWord.takeError();
+    return Error::success();
+  };
+
+  char Signature[6];
+  if (Error Err = tryRead(Signature[0], 8))
+    return std::move(Err);
+  if (Error Err = tryRead(Signature[1], 8))
+    return std::move(Err);
+
+  // Autodetect the file contents, if it is one we know.
+  if (Signature[0] == 'C' && Signature[1] == 'P') {
+    if (Error Err = tryRead(Signature[2], 8))
+      return std::move(Err);
+    if (Error Err = tryRead(Signature[3], 8))
+      return std::move(Err);
+    if (Signature[2] == 'C' && Signature[3] == 'H')
+      return ClangSerializedASTBitstream;
+  } else if (Signature[0] == 'D' && Signature[1] == 'I') {
+    if (Error Err = tryRead(Signature[2], 8))
+      return std::move(Err);
+    if (Error Err = tryRead(Signature[3], 8))
+      return std::move(Err);
+    if (Signature[2] == 'A' && Signature[3] == 'G')
+      return ClangSerializedDiagnosticsBitstream;
+  } else {
+    if (Error Err = tryRead(Signature[2], 4))
+      return std::move(Err);
+    if (Error Err = tryRead(Signature[3], 4))
+      return std::move(Err);
+    if (Error Err = tryRead(Signature[4], 4))
+      return std::move(Err);
+    if (Error Err = tryRead(Signature[5], 4))
+      return std::move(Err);
+    if (Signature[0] == 'B' && Signature[1] == 'C' && Signature[2] == 0x0 &&
+        Signature[3] == 0xC && Signature[4] == 0xE && Signature[5] == 0xD)
+      return LLVMIRBitstream;
+  }
+  return UnknownBitstream;
+}
+
+static Expected<CurStreamTypeType> analyzeHeader(Optional<BCDumpOptions> O,
+                                                 BitstreamCursor &Stream) {
+  ArrayRef<uint8_t> Bytes = Stream.getBitcodeBytes();
+  const unsigned char *BufPtr = (const unsigned char *)Bytes.data();
+  const unsigned char *EndBufPtr = BufPtr + Bytes.size();
+
+  // If we have a wrapper header, parse it and ignore the non-bc file
+  // contents. The magic number is 0x0B17C0DE stored in little endian.
+  if (isBitcodeWrapper(BufPtr, EndBufPtr)) {
+    if (Bytes.size() < BWH_HeaderSize)
+      return reportError("Invalid bitcode wrapper header");
+
+    if (O) {
+      unsigned Magic = support::endian::read32le(&BufPtr[BWH_MagicField]);
+      unsigned Version = support::endian::read32le(&BufPtr[BWH_VersionField]);
+      unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
+      unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
+      unsigned CPUType = support::endian::read32le(&BufPtr[BWH_CPUTypeField]);
+
+      O->OS << "<BITCODE_WRAPPER_HEADER"
+            << " Magic=" << format_hex(Magic, 10)
+            << " Version=" << format_hex(Version, 10)
+            << " Offset=" << format_hex(Offset, 10)
+            << " Size=" << format_hex(Size, 10)
+            << " CPUType=" << format_hex(CPUType, 10) << "/>\n";
+    }
+
+    if (SkipBitcodeWrapperHeader(BufPtr, EndBufPtr, true))
+      return reportError("Invalid bitcode wrapper header");
+  }
+
+  // Use the cursor modified by skipping the wrapper header.
+  Stream = BitstreamCursor(ArrayRef<uint8_t>(BufPtr, EndBufPtr));
+
+  return ReadSignature(Stream);
+}
+
+static bool canDecodeBlob(unsigned Code, unsigned BlockID) {
+  return BlockID == bitc::METADATA_BLOCK_ID && Code == bitc::METADATA_STRINGS;
+}
+
+Error BitcodeAnalyzer::decodeMetadataStringsBlob(StringRef Indent,
+                                                 ArrayRef<uint64_t> Record,
+                                                 StringRef Blob,
+                                                 raw_ostream &OS) {
+  if (Blob.empty())
+    return reportError("Cannot decode empty blob.");
+
+  if (Record.size() != 2)
+    return reportError(
+        "Decoding metadata strings blob needs two record entries.");
+
+  unsigned NumStrings = Record[0];
+  unsigned StringsOffset = Record[1];
+  OS << " num-strings = " << NumStrings << " {\n";
+
+  StringRef Lengths = Blob.slice(0, StringsOffset);
+  SimpleBitstreamCursor R(Lengths);
+  StringRef Strings = Blob.drop_front(StringsOffset);
+  do {
+    if (R.AtEndOfStream())
+      return reportError("bad length");
+
+    Expected<uint32_t> MaybeSize = R.ReadVBR(6);
+    if (!MaybeSize)
+      return MaybeSize.takeError();
+    uint32_t Size = MaybeSize.get();
+    if (Strings.size() < Size)
+      return reportError("truncated chars");
+
+    OS << Indent << "    '";
+    OS.write_escaped(Strings.slice(0, Size), /*hex=*/true);
+    OS << "'\n";
+    Strings = Strings.drop_front(Size);
+  } while (--NumStrings);
+
+  OS << Indent << "  }";
+  return Error::success();
+}
+
+BitcodeAnalyzer::BitcodeAnalyzer(StringRef Buffer,
+                                 Optional<StringRef> BlockInfoBuffer)
+    : Stream(Buffer) {
+  if (BlockInfoBuffer)
+    BlockInfoStream.emplace(*BlockInfoBuffer);
+}
+
+Error BitcodeAnalyzer::analyze(Optional<BCDumpOptions> O,
+                               Optional<StringRef> CheckHash) {
+  Expected<CurStreamTypeType> MaybeType = analyzeHeader(O, Stream);
+  if (!MaybeType)
+    return MaybeType.takeError();
+  else
+    CurStreamType = *MaybeType;
+
+  Stream.setBlockInfo(&BlockInfo);
+
+  // Read block info from BlockInfoStream, if specified.
+  // The block info must be a top-level block.
+  if (BlockInfoStream) {
+    BitstreamCursor BlockInfoCursor(*BlockInfoStream);
+    Expected<CurStreamTypeType> H = analyzeHeader(O, BlockInfoCursor);
+    if (!H)
+      return H.takeError();
+
+    while (!BlockInfoCursor.AtEndOfStream()) {
+      Expected<unsigned> MaybeCode = BlockInfoCursor.ReadCode();
+      if (!MaybeCode)
+        return MaybeCode.takeError();
+      if (MaybeCode.get() != bitc::ENTER_SUBBLOCK)
+        return reportError("Invalid record at top-level in block info file");
+
+      Expected<unsigned> MaybeBlockID = BlockInfoCursor.ReadSubBlockID();
+      if (!MaybeBlockID)
+        return MaybeBlockID.takeError();
+      if (MaybeBlockID.get() == bitc::BLOCKINFO_BLOCK_ID) {
+        Expected<Optional<BitstreamBlockInfo>> MaybeNewBlockInfo =
+            BlockInfoCursor.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
+        if (!MaybeNewBlockInfo)
+          return MaybeNewBlockInfo.takeError();
+        Optional<BitstreamBlockInfo> NewBlockInfo =
+            std::move(MaybeNewBlockInfo.get());
+        if (!NewBlockInfo)
+          return reportError("Malformed BlockInfoBlock in block info file");
+        BlockInfo = std::move(*NewBlockInfo);
+        break;
+      }
+
+      if (Error Err = BlockInfoCursor.SkipBlock())
+        return Err;
+    }
+  }
+
+  // Parse the top-level structure.  We only allow blocks at the top-level.
+  while (!Stream.AtEndOfStream()) {
+    Expected<unsigned> MaybeCode = Stream.ReadCode();
+    if (!MaybeCode)
+      return MaybeCode.takeError();
+    if (MaybeCode.get() != bitc::ENTER_SUBBLOCK)
+      return reportError("Invalid record at top-level");
+
+    Expected<unsigned> MaybeBlockID = Stream.ReadSubBlockID();
+    if (!MaybeBlockID)
+      return MaybeBlockID.takeError();
+
+    if (Error E = parseBlock(MaybeBlockID.get(), 0, O, CheckHash))
+      return E;
+    ++NumTopBlocks;
+  }
+
+  return Error::success();
+}
+
+void BitcodeAnalyzer::printStats(BCDumpOptions O,
+                                 Optional<StringRef> Filename) {
+  uint64_t BufferSizeBits = Stream.getBitcodeBytes().size() * CHAR_BIT;
+  // Print a summary of the read file.
+  O.OS << "Summary ";
+  if (Filename)
+    O.OS << "of " << Filename->data() << ":\n";
+  O.OS << "         Total size: ";
+  printSize(O.OS, BufferSizeBits);
+  O.OS << "\n";
+  O.OS << "        Stream type: ";
+  switch (CurStreamType) {
+  case UnknownBitstream:
+    O.OS << "unknown\n";
+    break;
+  case LLVMIRBitstream:
+    O.OS << "LLVM IR\n";
+    break;
+  case ClangSerializedASTBitstream:
+    O.OS << "Clang Serialized AST\n";
+    break;
+  case ClangSerializedDiagnosticsBitstream:
+    O.OS << "Clang Serialized Diagnostics\n";
+    break;
+  }
+  O.OS << "  # Toplevel Blocks: " << NumTopBlocks << "\n";
+  O.OS << "\n";
+
+  // Emit per-block stats.
+  O.OS << "Per-block Summary:\n";
+  for (std::map<unsigned, PerBlockIDStats>::iterator I = BlockIDStats.begin(),
+                                                     E = BlockIDStats.end();
+       I != E; ++I) {
+    O.OS << "  Block ID #" << I->first;
+    if (Optional<const char *> BlockName =
+            GetBlockName(I->first, BlockInfo, CurStreamType))
+      O.OS << " (" << *BlockName << ")";
+    O.OS << ":\n";
+
+    const PerBlockIDStats &Stats = I->second;
+    O.OS << "      Num Instances: " << Stats.NumInstances << "\n";
+    O.OS << "         Total Size: ";
+    printSize(O.OS, Stats.NumBits);
+    O.OS << "\n";
+    double pct = (Stats.NumBits * 100.0) / BufferSizeBits;
+    O.OS << "    Percent of file: " << format("%2.4f%%", pct) << "\n";
+    if (Stats.NumInstances > 1) {
+      O.OS << "       Average Size: ";
+      printSize(O.OS, Stats.NumBits / (double)Stats.NumInstances);
+      O.OS << "\n";
+      O.OS << "  Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/"
+           << Stats.NumSubBlocks / (double)Stats.NumInstances << "\n";
+      O.OS << "    Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/"
+           << Stats.NumAbbrevs / (double)Stats.NumInstances << "\n";
+      O.OS << "    Tot/Avg Records: " << Stats.NumRecords << "/"
+           << Stats.NumRecords / (double)Stats.NumInstances << "\n";
+    } else {
+      O.OS << "      Num SubBlocks: " << Stats.NumSubBlocks << "\n";
+      O.OS << "        Num Abbrevs: " << Stats.NumAbbrevs << "\n";
+      O.OS << "        Num Records: " << Stats.NumRecords << "\n";
+    }
+    if (Stats.NumRecords) {
+      double pct = (Stats.NumAbbreviatedRecords * 100.0) / Stats.NumRecords;
+      O.OS << "    Percent Abbrevs: " << format("%2.4f%%", pct) << "\n";
+    }
+    O.OS << "\n";
+
+    // Print a histogram of the codes we see.
+    if (O.Histogram && !Stats.CodeFreq.empty()) {
+      std::vector<std::pair<unsigned, unsigned>> FreqPairs; // <freq,code>
+      for (unsigned i = 0, e = Stats.CodeFreq.size(); i != e; ++i)
+        if (unsigned Freq = Stats.CodeFreq[i].NumInstances)
+          FreqPairs.push_back(std::make_pair(Freq, i));
+      llvm::stable_sort(FreqPairs);
+      std::reverse(FreqPairs.begin(), FreqPairs.end());
+
+      O.OS << "\tRecord Histogram:\n";
+      O.OS << "\t\t  Count    # Bits     b/Rec   % Abv  Record Kind\n";
+      for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) {
+        const PerRecordStats &RecStats = Stats.CodeFreq[FreqPairs[i].second];
+
+        O.OS << format("\t\t%7d %9lu", RecStats.NumInstances,
+                       (unsigned long)RecStats.TotalBits);
+
+        if (RecStats.NumInstances > 1)
+          O.OS << format(" %9.1f",
+                         (double)RecStats.TotalBits / RecStats.NumInstances);
+        else
+          O.OS << "          ";
+
+        if (RecStats.NumAbbrev)
+          O.OS << format(" %7.2f", (double)RecStats.NumAbbrev /
+                                       RecStats.NumInstances * 100);
+        else
+          O.OS << "        ";
+
+        O.OS << "  ";
+        if (Optional<const char *> CodeName = GetCodeName(
+                FreqPairs[i].second, I->first, BlockInfo, CurStreamType))
+          O.OS << *CodeName << "\n";
+        else
+          O.OS << "UnknownCode" << FreqPairs[i].second << "\n";
+      }
+      O.OS << "\n";
+    }
+  }
+}
+
+Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel,
+                                  Optional<BCDumpOptions> O,
+                                  Optional<StringRef> CheckHash) {
+  std::string Indent(IndentLevel * 2, ' ');
+  uint64_t BlockBitStart = Stream.GetCurrentBitNo();
+
+  // Get the statistics for this BlockID.
+  PerBlockIDStats &BlockStats = BlockIDStats[BlockID];
+
+  BlockStats.NumInstances++;
+
+  // BLOCKINFO is a special part of the stream.
+  bool DumpRecords = O.hasValue();
+  if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
+    if (O)
+      O->OS << Indent << "<BLOCKINFO_BLOCK/>\n";
+    Expected<Optional<BitstreamBlockInfo>> MaybeNewBlockInfo =
+        Stream.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
+    if (!MaybeNewBlockInfo)
+      return MaybeNewBlockInfo.takeError();
+    Optional<BitstreamBlockInfo> NewBlockInfo =
+        std::move(MaybeNewBlockInfo.get());
+    if (!NewBlockInfo)
+      return reportError("Malformed BlockInfoBlock");
+    BlockInfo = std::move(*NewBlockInfo);
+    if (Error Err = Stream.JumpToBit(BlockBitStart))
+      return Err;
+    // It's not really interesting to dump the contents of the blockinfo
+    // block.
+    DumpRecords = false;
+  }
+
+  unsigned NumWords = 0;
+  if (Error Err = Stream.EnterSubBlock(BlockID, &NumWords))
+    return Err;
+
+  // Keep it for later, when we see a MODULE_HASH record
+  uint64_t BlockEntryPos = Stream.getCurrentByteNo();
+
+  Optional<const char *> BlockName = None;
+  if (DumpRecords) {
+    O->OS << Indent << "<";
+    if ((BlockName = GetBlockName(BlockID, BlockInfo, CurStreamType)))
+      O->OS << *BlockName;
+    else
+      O->OS << "UnknownBlock" << BlockID;
+
+    if (!O->Symbolic && BlockName)
+      O->OS << " BlockID=" << BlockID;
+
+    O->OS << " NumWords=" << NumWords
+          << " BlockCodeSize=" << Stream.getAbbrevIDWidth() << ">\n";
+  }
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Keep the offset to the metadata index if seen.
+  uint64_t MetadataIndexOffset = 0;
+
+  // Read all the records for this block.
+  while (1) {
+    if (Stream.AtEndOfStream())
+      return reportError("Premature end of bitstream");
+
+    uint64_t RecordStartBit = Stream.GetCurrentBitNo();
+
+    Expected<BitstreamEntry> MaybeEntry =
+        Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::Error:
+      return reportError("malformed bitcode file");
+    case BitstreamEntry::EndBlock: {
+      uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
+      BlockStats.NumBits += BlockBitEnd - BlockBitStart;
+      if (DumpRecords) {
+        O->OS << Indent << "</";
+        if (BlockName)
+          O->OS << *BlockName << ">\n";
+        else
+          O->OS << "UnknownBlock" << BlockID << ">\n";
+      }
+      return Error::success();
+    }
+
+    case BitstreamEntry::SubBlock: {
+      uint64_t SubBlockBitStart = Stream.GetCurrentBitNo();
+      if (Error E = parseBlock(Entry.ID, IndentLevel + 1, O, CheckHash))
+        return E;
+      ++BlockStats.NumSubBlocks;
+      uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo();
+
+      // Don't include subblock sizes in the size of this block.
+      BlockBitStart += SubBlockBitEnd - SubBlockBitStart;
+      continue;
+    }
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    if (Entry.ID == bitc::DEFINE_ABBREV) {
+      if (Error Err = Stream.ReadAbbrevRecord())
+        return Err;
+      ++BlockStats.NumAbbrevs;
+      continue;
+    }
+
+    Record.clear();
+
+    ++BlockStats.NumRecords;
+
+    StringRef Blob;
+    uint64_t CurrentRecordPos = Stream.GetCurrentBitNo();
+    Expected<unsigned> MaybeCode = Stream.readRecord(Entry.ID, Record, &Blob);
+    if (!MaybeCode)
+      return MaybeCode.takeError();
+    unsigned Code = MaybeCode.get();
+
+    // Increment the # occurrences of this code.
+    if (BlockStats.CodeFreq.size() <= Code)
+      BlockStats.CodeFreq.resize(Code + 1);
+    BlockStats.CodeFreq[Code].NumInstances++;
+    BlockStats.CodeFreq[Code].TotalBits +=
+        Stream.GetCurrentBitNo() - RecordStartBit;
+    if (Entry.ID != bitc::UNABBREV_RECORD) {
+      BlockStats.CodeFreq[Code].NumAbbrev++;
+      ++BlockStats.NumAbbreviatedRecords;
+    }
+
+    if (DumpRecords) {
+      O->OS << Indent << "  <";
+      Optional<const char *> CodeName =
+          GetCodeName(Code, BlockID, BlockInfo, CurStreamType);
+      if (CodeName)
+        O->OS << *CodeName;
+      else
+        O->OS << "UnknownCode" << Code;
+      if (!O->Symbolic && CodeName)
+        O->OS << " codeid=" << Code;
+      const BitCodeAbbrev *Abbv = nullptr;
+      if (Entry.ID != bitc::UNABBREV_RECORD) {
+        Abbv = Stream.getAbbrev(Entry.ID);
+        O->OS << " abbrevid=" << Entry.ID;
+      }
+
+      for (unsigned i = 0, e = Record.size(); i != e; ++i)
+        O->OS << " op" << i << "=" << (int64_t)Record[i];
+
+      // If we found a metadata index, let's verify that we had an offset
+      // before and validate its forward reference offset was correct!
+      if (BlockID == bitc::METADATA_BLOCK_ID) {
+        if (Code == bitc::METADATA_INDEX_OFFSET) {
+          if (Record.size() != 2)
+            O->OS << "(Invalid record)";
+          else {
+            auto Offset = Record[0] + (Record[1] << 32);
+            MetadataIndexOffset = Stream.GetCurrentBitNo() + Offset;
+          }
+        }
+        if (Code == bitc::METADATA_INDEX) {
+          O->OS << " (offset ";
+          if (MetadataIndexOffset == RecordStartBit)
+            O->OS << "match)";
+          else
+            O->OS << "mismatch: " << MetadataIndexOffset << " vs "
+                  << RecordStartBit << ")";
+        }
+      }
+
+      // If we found a module hash, let's verify that it matches!
+      if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH &&
+          CheckHash.hasValue()) {
+        if (Record.size() != 5)
+          O->OS << " (invalid)";
+        else {
+          // Recompute the hash and compare it to the one in the bitcode
+          SHA1 Hasher;
+          StringRef Hash;
+          Hasher.update(*CheckHash);
+          {
+            int BlockSize = (CurrentRecordPos / 8) - BlockEntryPos;
+            auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize);
+            Hasher.update(ArrayRef<uint8_t>(Ptr, BlockSize));
+            Hash = Hasher.result();
+          }
+          SmallString<20> RecordedHash;
+          RecordedHash.resize(20);
+          int Pos = 0;
+          for (auto &Val : Record) {
+            assert(!(Val >> 32) && "Unexpected high bits set");
+            RecordedHash[Pos++] = (Val >> 24) & 0xFF;
+            RecordedHash[Pos++] = (Val >> 16) & 0xFF;
+            RecordedHash[Pos++] = (Val >> 8) & 0xFF;
+            RecordedHash[Pos++] = (Val >> 0) & 0xFF;
+          }
+          if (Hash == RecordedHash)
+            O->OS << " (match)";
+          else
+            O->OS << " (!mismatch!)";
+        }
+      }
+
+      O->OS << "/>";
+
+      if (Abbv) {
+        for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
+          const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+          if (!Op.isEncoding() || Op.getEncoding() != BitCodeAbbrevOp::Array)
+            continue;
+          assert(i + 2 == e && "Array op not second to last");
+          std::string Str;
+          bool ArrayIsPrintable = true;
+          for (unsigned j = i - 1, je = Record.size(); j != je; ++j) {
+            if (!isPrint(static_cast<unsigned char>(Record[j]))) {
+              ArrayIsPrintable = false;
+              break;
+            }
+            Str += (char)Record[j];
+          }
+          if (ArrayIsPrintable)
+            O->OS << " record string = '" << Str << "'";
+          break;
+        }
+      }
+
+      if (Blob.data()) {
+        if (canDecodeBlob(Code, BlockID)) {
+          if (Error E = decodeMetadataStringsBlob(Indent, Record, Blob, O->OS))
+            return E;
+        } else {
+          O->OS << " blob data = ";
+          if (O->ShowBinaryBlobs) {
+            O->OS << "'";
+            O->OS.write_escaped(Blob, /*hex=*/true) << "'";
+          } else {
+            bool BlobIsPrintable = true;
+            for (unsigned i = 0, e = Blob.size(); i != e; ++i)
+              if (!isPrint(static_cast<unsigned char>(Blob[i]))) {
+                BlobIsPrintable = false;
+                break;
+              }
+
+            if (BlobIsPrintable)
+              O->OS << "'" << Blob << "'";
+            else
+              O->OS << "unprintable, " << Blob.size() << " bytes.";
+          }
+        }
+      }
+
+      O->OS << "\n";
+    }
+
+    // Make sure that we can skip the current record.
+    if (Error Err = Stream.JumpToBit(CurrentRecordPos))
+      return Err;
+    if (Expected<unsigned> Skipped = Stream.skipRecord(Entry.ID))
+      ; // Do nothing.
+    else
+      return Skipped.takeError();
+  }
+}
+
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index fe051e7a9125..29dc7f616392 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1,9 +1,8 @@
 //===- BitcodeReader.cpp - Internal BitcodeReader implementation ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -21,7 +20,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/Bitstream/BitstreamReader.h"
 #include "llvm/Bitcode/LLVMBitCodes.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/Argument.h"
@@ -106,18 +105,25 @@ static Error error(const Twine &Message) {
       Message, make_error_code(BitcodeError::CorruptedBitcode));
 }
 
-/// Helper to read the header common to all bitcode files.
-static bool hasValidBitcodeHeader(BitstreamCursor &Stream) {
-  // Sniff for the signature.
-  if (!Stream.canSkipToPos(4) ||
-      Stream.Read(8) != 'B' ||
-      Stream.Read(8) != 'C' ||
-      Stream.Read(4) != 0x0 ||
-      Stream.Read(4) != 0xC ||
-      Stream.Read(4) != 0xE ||
-      Stream.Read(4) != 0xD)
-    return false;
-  return true;
+static Error hasInvalidBitcodeHeader(BitstreamCursor &Stream) {
+  if (!Stream.canSkipToPos(4))
+    return createStringError(std::errc::illegal_byte_sequence,
+                             "file too small to contain bitcode header");
+  for (unsigned C : {'B', 'C'})
+    if (Expected<SimpleBitstreamCursor::word_t> Res = Stream.Read(8)) {
+      if (Res.get() != C)
+        return createStringError(std::errc::illegal_byte_sequence,
+                                 "file doesn't start with bitcode header");
+    } else
+      return Res.takeError();
+  for (unsigned C : {0x0, 0xC, 0xE, 0xD})
+    if (Expected<SimpleBitstreamCursor::word_t> Res = Stream.Read(4)) {
+      if (Res.get() != C)
+        return createStringError(std::errc::illegal_byte_sequence,
+                                 "file doesn't start with bitcode header");
+    } else
+      return Res.takeError();
+  return Error::success();
 }
 
 static Expected<BitstreamCursor> initStream(MemoryBufferRef Buffer) {
@@ -134,8 +140,8 @@ static Expected<BitstreamCursor> initStream(MemoryBufferRef Buffer) {
       return error("Invalid bitcode wrapper header");
 
   BitstreamCursor Stream(ArrayRef<uint8_t>(BufPtr, BufEnd));
-  if (!hasValidBitcodeHeader(Stream))
-    return error("Invalid bitcode signature");
+  if (Error Err = hasInvalidBitcodeHeader(Stream))
+    return std::move(Err);
 
   return std::move(Stream);
 }
@@ -165,8 +171,8 @@ static void stripTBAA(Module *M) {
 /// Read the "IDENTIFICATION_BLOCK_ID" block, do some basic enforcement on the
 /// "epoch" encoded in the bitcode, and return the producer name if any.
 static Expected<std::string> readIdentificationBlock(BitstreamCursor &Stream) {
-  if (Stream.EnterSubBlock(bitc::IDENTIFICATION_BLOCK_ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(bitc::IDENTIFICATION_BLOCK_ID))
+    return std::move(Err);
 
   // Read all the records.
   SmallVector<uint64_t, 64> Record;
@@ -174,7 +180,11 @@ static Expected<std::string> readIdentificationBlock(BitstreamCursor &Stream) {
   std::string ProducerIdentification;
 
   while (true) {
-    BitstreamEntry Entry = Stream.advance();
+    BitstreamEntry Entry;
+    if (Expected<BitstreamEntry> Res = Stream.advance())
+      Entry = Res.get();
+    else
+      return Res.takeError();
 
     switch (Entry.Kind) {
     default:
@@ -189,8 +199,10 @@ static Expected<std::string> readIdentificationBlock(BitstreamCursor &Stream) {
 
     // Read a record.
     Record.clear();
-    unsigned BitCode = Stream.readRecord(Entry.ID, Record);
-    switch (BitCode) {
+    Expected<unsigned> MaybeBitCode = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeBitCode)
+      return MaybeBitCode.takeError();
+    switch (MaybeBitCode.get()) {
     default: // Default behavior: reject
       return error("Invalid value");
     case bitc::IDENTIFICATION_CODE_STRING: // IDENTIFICATION: [strchr x N]
@@ -215,7 +227,12 @@ static Expected<std::string> readIdentificationCode(BitstreamCursor &Stream) {
     if (Stream.AtEndOfStream())
       return "";
 
-    BitstreamEntry Entry = Stream.advance();
+    BitstreamEntry Entry;
+    if (Expected<BitstreamEntry> Res = Stream.advance())
+      Entry = std::move(Res.get());
+    else
+      return Res.takeError();
+
     switch (Entry.Kind) {
     case BitstreamEntry::EndBlock:
     case BitstreamEntry::Error:
@@ -226,25 +243,30 @@ static Expected<std::string> readIdentificationCode(BitstreamCursor &Stream) {
         return readIdentificationBlock(Stream);
 
       // Ignore other sub-blocks.
-      if (Stream.SkipBlock())
-        return error("Malformed block");
+      if (Error Err = Stream.SkipBlock())
+        return std::move(Err);
       continue;
     case BitstreamEntry::Record:
-      Stream.skipRecord(Entry.ID);
-      continue;
+      if (Expected<unsigned> Skipped = Stream.skipRecord(Entry.ID))
+        continue;
+      else
+        return Skipped.takeError();
     }
   }
 }
 
 static Expected<bool> hasObjCCategoryInModule(BitstreamCursor &Stream) {
-  if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+    return std::move(Err);
 
   SmallVector<uint64_t, 64> Record;
   // Read all the records for this module.
 
   while (true) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
@@ -258,7 +280,10 @@ static Expected<bool> hasObjCCategoryInModule(BitstreamCursor &Stream) {
     }
 
     // Read a record.
-    switch (Stream.readRecord(Entry.ID, Record)) {
+    Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeRecord)
+      return MaybeRecord.takeError();
+    switch (MaybeRecord.get()) {
     default:
       break; // Default behavior, ignore unknown content.
     case bitc::MODULE_CODE_SECTIONNAME: { // SECTIONNAME: [strchr x N]
@@ -281,7 +306,11 @@ static Expected<bool> hasObjCCategory(BitstreamCursor &Stream) {
   // We expect a number of well-defined blocks, though we don't necessarily
   // need to understand them all.
   while (true) {
-    BitstreamEntry Entry = Stream.advance();
+    BitstreamEntry Entry;
+    if (Expected<BitstreamEntry> Res = Stream.advance())
+      Entry = std::move(Res.get());
+    else
+      return Res.takeError();
 
     switch (Entry.Kind) {
     case BitstreamEntry::Error:
@@ -294,20 +323,22 @@ static Expected<bool> hasObjCCategory(BitstreamCursor &Stream) {
         return hasObjCCategoryInModule(Stream);
 
       // Ignore other sub-blocks.
-      if (Stream.SkipBlock())
-        return error("Malformed block");
+      if (Error Err = Stream.SkipBlock())
+        return std::move(Err);
       continue;
 
     case BitstreamEntry::Record:
-      Stream.skipRecord(Entry.ID);
-      continue;
+      if (Expected<unsigned> Skipped = Stream.skipRecord(Entry.ID))
+        continue;
+      else
+        return Skipped.takeError();
     }
   }
 }
 
 static Expected<std::string> readModuleTriple(BitstreamCursor &Stream) {
-  if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+    return std::move(Err);
 
   SmallVector<uint64_t, 64> Record;
 
@@ -315,7 +346,10 @@ static Expected<std::string> readModuleTriple(BitstreamCursor &Stream) {
 
   // Read all the records for this module.
   while (true) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
@@ -329,7 +363,10 @@ static Expected<std::string> readModuleTriple(BitstreamCursor &Stream) {
     }
 
     // Read a record.
-    switch (Stream.readRecord(Entry.ID, Record)) {
+    Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeRecord)
+      return MaybeRecord.takeError();
+    switch (MaybeRecord.get()) {
     default: break;  // Default behavior, ignore unknown content.
     case bitc::MODULE_CODE_TRIPLE: {  // TRIPLE: [strchr x N]
       std::string S;
@@ -348,7 +385,10 @@ static Expected<std::string> readTriple(BitstreamCursor &Stream) {
   // We expect a number of well-defined blocks, though we don't necessarily
   // need to understand them all.
   while (true) {
-    BitstreamEntry Entry = Stream.advance();
+    Expected<BitstreamEntry> MaybeEntry = Stream.advance();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::Error:
@@ -361,13 +401,15 @@ static Expected<std::string> readTriple(BitstreamCursor &Stream) {
         return readModuleTriple(Stream);
 
       // Ignore other sub-blocks.
-      if (Stream.SkipBlock())
-        return error("Malformed block");
+      if (Error Err = Stream.SkipBlock())
+        return std::move(Err);
       continue;
 
     case BitstreamEntry::Record:
-      Stream.skipRecord(Entry.ID);
-      continue;
+      if (llvm::Expected<unsigned> Skipped = Stream.skipRecord(Entry.ID))
+        continue;
+      else
+        return Skipped.takeError();
     }
   }
 }
@@ -452,6 +494,7 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
   std::vector<std::string> GCTable;
 
   std::vector<Type*> TypeList;
+  DenseMap<Function *, FunctionType *> FunctionTypes;
   BitcodeReaderValueList ValueList;
   Optional<MetadataLoader> MDLoader;
   std::vector<Comdat *> ComdatList;
@@ -550,12 +593,42 @@ private:
   StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name);
   StructType *createIdentifiedStructType(LLVMContext &Context);
 
-  Type *getTypeByID(unsigned ID);
+  /// Map all pointer types within \param Ty to the opaque pointer
+  /// type in the same address space if opaque pointers are being
+  /// used, otherwise nop. This converts a bitcode-reader internal
+  /// type into one suitable for use in a Value.
+  Type *flattenPointerTypes(Type *Ty) {
+    return Ty;
+  }
+
+  /// Given a fully structured pointer type (i.e. not opaque), return
+  /// the flattened form of its element, suitable for use in a Value.
+  Type *getPointerElementFlatType(Type *Ty) {
+    return flattenPointerTypes(cast<PointerType>(Ty)->getElementType());
+  }
+
+  /// Given a fully structured pointer type, get its element type in
+  /// both fully structured form, and flattened form suitable for use
+  /// in a Value.
+  std::pair<Type *, Type *> getPointerElementTypes(Type *FullTy) {
+    Type *ElTy = cast<PointerType>(FullTy)->getElementType();
+    return std::make_pair(ElTy, flattenPointerTypes(ElTy));
+  }
 
-  Value *getFnValueByID(unsigned ID, Type *Ty) {
+  /// Return the flattened type (suitable for use in a Value)
+  /// specified by the given \param ID .
+  Type *getTypeByID(unsigned ID) {
+    return flattenPointerTypes(getFullyStructuredTypeByID(ID));
+  }
+
+  /// Return the fully structured (bitcode-reader internal) type
+  /// corresponding to the given \param ID .
+  Type *getFullyStructuredTypeByID(unsigned ID);
+
+  Value *getFnValueByID(unsigned ID, Type *Ty, Type **FullTy = nullptr) {
     if (Ty && Ty->isMetadataTy())
       return MetadataAsValue::get(Ty->getContext(), getFnMetadataByID(ID));
-    return ValueList.getValueFwdRef(ID, Ty);
+    return ValueList.getValueFwdRef(ID, Ty, FullTy);
   }
 
   Metadata *getFnMetadataByID(unsigned ID) {
@@ -577,7 +650,8 @@ private:
   /// Increment Slot past the number of slots used in the record. Return true on
   /// failure.
   bool getValueTypePair(SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
-                        unsigned InstNum, Value *&ResVal) {
+                        unsigned InstNum, Value *&ResVal,
+                        Type **FullTy = nullptr) {
     if (Slot == Record.size()) return true;
     unsigned ValNo = (unsigned)Record[Slot++];
     // Adjust the ValNo, if it was encoded relative to the InstNum.
@@ -586,7 +660,7 @@ private:
     if (ValNo < InstNum) {
       // If this is not a forward reference, just return the value we already
       // have.
-      ResVal = getFnValueByID(ValNo, nullptr);
+      ResVal = getFnValueByID(ValNo, nullptr, FullTy);
       return ResVal == nullptr;
     }
     if (Slot == Record.size())
@@ -594,6 +668,8 @@ private:
 
     unsigned TypeNo = (unsigned)Record[Slot++];
     ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo));
+    if (FullTy)
+      *FullTy = getFullyStructuredTypeByID(TypeNo);
     return ResVal == nullptr;
   }
 
@@ -639,6 +715,10 @@ private:
     return getFnValueByID(ValNo, Ty);
   }
 
+  /// Upgrades old-style typeless byval attributes by adding the corresponding
+  /// argument's pointee type.
+  void propagateByValTypes(CallBase *CB, ArrayRef<Type *> ArgsFullTys);
+
   /// Converts alignment exponent (i.e. power of two (or zero)) to the
   /// corresponding alignment to use. If alignment is too large, returns
   /// a corresponding error code.
@@ -748,6 +828,9 @@ private:
                                                     bool HasRelBF);
   Error parseEntireSummary(unsigned ID);
   Error parseModuleStringTable();
+  void parseTypeIdCompatibleVtableSummaryRecord(ArrayRef<uint64_t> Record);
+  void parseTypeIdCompatibleVtableInfo(ArrayRef<uint64_t> Record, size_t &Slot,
+                                       TypeIdCompatibleVtableInfo &TypeId);
 
   std::pair<ValueInfo, GlobalValue::GUID>
   getValueInfoFromValueId(unsigned ValueId);
@@ -775,7 +858,7 @@ BitcodeReader::BitcodeReader(BitstreamCursor Stream, StringRef Strtab,
                              StringRef ProducerIdentification,
                              LLVMContext &Context)
     : BitcodeReaderBase(std::move(Stream), Strtab), Context(Context),
-      ValueList(Context) {
+      ValueList(Context, Stream.SizeInBytes()) {
   this->ProducerIdentification = ProducerIdentification;
 }
 
@@ -894,13 +977,15 @@ static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags,
   // values as live.
   bool Live = (RawFlags & 0x2) || Version < 3;
   bool Local = (RawFlags & 0x4);
+  bool AutoHide = (RawFlags & 0x8);
 
-  return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, Live, Local);
+  return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, Live, Local, AutoHide);
 }
 
 // Decode the flags for GlobalVariable in the summary
 static GlobalVarSummary::GVarFlags getDecodedGVarFlags(uint64_t RawFlags) {
-  return GlobalVarSummary::GVarFlags((RawFlags & 0x1) ? true : false);
+  return GlobalVarSummary::GVarFlags((RawFlags & 0x1) ? true : false,
+                                     (RawFlags & 0x2) ? true : false);
 }
 
 static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) {
@@ -1035,6 +1120,8 @@ static AtomicRMWInst::BinOp getDecodedRMWOperation(unsigned Val) {
   case bitc::RMW_MIN: return AtomicRMWInst::Min;
   case bitc::RMW_UMAX: return AtomicRMWInst::UMax;
   case bitc::RMW_UMIN: return AtomicRMWInst::UMin;
+  case bitc::RMW_FADD: return AtomicRMWInst::FAdd;
+  case bitc::RMW_FSUB: return AtomicRMWInst::FSub;
   }
 }
 
@@ -1095,7 +1182,7 @@ static void upgradeDLLImportExportLinkage(GlobalValue *GV, unsigned Val) {
   }
 }
 
-Type *BitcodeReader::getTypeByID(unsigned ID) {
+Type *BitcodeReader::getFullyStructuredTypeByID(unsigned ID) {
   // The type table size is always specified correctly.
   if (ID >= TypeList.size())
     return nullptr;
@@ -1187,6 +1274,15 @@ static uint64_t getRawAttributeMask(Attribute::AttrKind Val) {
   case Attribute::ShadowCallStack: return 1ULL << 59;
   case Attribute::SpeculativeLoadHardening:
     return 1ULL << 60;
+  case Attribute::ImmArg:
+    return 1ULL << 61;
+  case Attribute::WillReturn:
+    return 1ULL << 62;
+  case Attribute::NoFree:
+    return 1ULL << 63;
+  case Attribute::NoSync:
+    llvm_unreachable("nosync attribute not supported in raw format");
+    break;
   case Attribute::Dereferenceable:
     llvm_unreachable("dereferenceable attribute not supported in raw format");
     break;
@@ -1200,6 +1296,9 @@ static uint64_t getRawAttributeMask(Attribute::AttrKind Val) {
   case Attribute::AllocSize:
     llvm_unreachable("allocsize not supported in raw format");
     break;
+  case Attribute::SanitizeMemTag:
+    llvm_unreachable("sanitize_memtag attribute not supported in raw format");
+    break;
   }
   llvm_unreachable("Unsupported attribute type");
 }
@@ -1209,10 +1308,12 @@ static void addRawAttributeValue(AttrBuilder &B, uint64_t Val) {
 
   for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds;
        I = Attribute::AttrKind(I + 1)) {
-    if (I == Attribute::Dereferenceable ||
+    if (I == Attribute::SanitizeMemTag ||
+        I == Attribute::Dereferenceable ||
         I == Attribute::DereferenceableOrNull ||
         I == Attribute::ArgMemOnly ||
-        I == Attribute::AllocSize)
+        I == Attribute::AllocSize ||
+        I == Attribute::NoSync)
       continue;
     if (uint64_t A = (Val & getRawAttributeMask(I))) {
       if (I == Attribute::Alignment)
@@ -1245,8 +1346,8 @@ static void decodeLLVMAttributesForBitcode(AttrBuilder &B,
 }
 
 Error BitcodeReader::parseAttributeBlock() {
-  if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID))
+    return Err;
 
   if (!MAttributes.empty())
     return error("Invalid multiple blocks");
@@ -1257,7 +1358,10 @@ Error BitcodeReader::parseAttributeBlock() {
 
   // Read all the records.
   while (true) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
@@ -1272,7 +1376,10 @@ Error BitcodeReader::parseAttributeBlock() {
 
     // Read a record.
     Record.clear();
-    switch (Stream.readRecord(Entry.ID, Record)) {
+    Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeRecord)
+      return MaybeRecord.takeError();
+    switch (MaybeRecord.get()) {
     default:  // Default behavior: ignore.
       break;
     case bitc::PARAMATTR_CODE_ENTRY_OLD: // ENTRY: [paramidx0, attr0, ...]
@@ -1345,6 +1452,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
     return Attribute::NoCapture;
   case bitc::ATTR_KIND_NO_DUPLICATE:
     return Attribute::NoDuplicate;
+  case bitc::ATTR_KIND_NOFREE:
+    return Attribute::NoFree;
   case bitc::ATTR_KIND_NO_IMPLICIT_FLOAT:
     return Attribute::NoImplicitFloat;
   case bitc::ATTR_KIND_NO_INLINE:
@@ -1365,6 +1474,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
     return Attribute::NoRedZone;
   case bitc::ATTR_KIND_NO_RETURN:
     return Attribute::NoReturn;
+  case bitc::ATTR_KIND_NOSYNC:
+    return Attribute::NoSync;
   case bitc::ATTR_KIND_NOCF_CHECK:
     return Attribute::NoCfCheck;
   case bitc::ATTR_KIND_NO_UNWIND:
@@ -1419,10 +1530,16 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
     return Attribute::SwiftSelf;
   case bitc::ATTR_KIND_UW_TABLE:
     return Attribute::UWTable;
+  case bitc::ATTR_KIND_WILLRETURN:
+    return Attribute::WillReturn;
   case bitc::ATTR_KIND_WRITEONLY:
     return Attribute::WriteOnly;
   case bitc::ATTR_KIND_Z_EXT:
     return Attribute::ZExt;
+  case bitc::ATTR_KIND_IMMARG:
+    return Attribute::ImmArg;
+  case bitc::ATTR_KIND_SANITIZE_MEMTAG:
+    return Attribute::SanitizeMemTag;
   }
 }
 
@@ -1444,8 +1561,8 @@ Error BitcodeReader::parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind) {
 }
 
 Error BitcodeReader::parseAttributeGroupBlock() {
-  if (Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID))
+    return Err;
 
   if (!MAttributeGroups.empty())
     return error("Invalid multiple blocks");
@@ -1454,7 +1571,10 @@ Error BitcodeReader::parseAttributeGroupBlock() {
 
   // Read all the records.
   while (true) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
@@ -1469,7 +1589,10 @@ Error BitcodeReader::parseAttributeGroupBlock() {
 
     // Read a record.
     Record.clear();
-    switch (Stream.readRecord(Entry.ID, Record)) {
+    Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeRecord)
+      return MaybeRecord.takeError();
+    switch (MaybeRecord.get()) {
     default:  // Default behavior: ignore.
       break;
     case bitc::PARAMATTR_GRP_CODE_ENTRY: { // ENTRY: [grpid, idx, a0, a1, ...]
@@ -1486,6 +1609,12 @@ Error BitcodeReader::parseAttributeGroupBlock() {
           if (Error Err = parseAttrKind(Record[++i], &Kind))
             return Err;
 
+          // Upgrade old-style byval attribute to one with a type, even if it's
+          // nullptr. We will have to insert the real type when we associate
+          // this AttributeList with a function.
+          if (Kind == Attribute::ByVal)
+            B.addByValAttr(nullptr);
+
           B.addAttribute(Kind);
         } else if (Record[i] == 1) { // Integer attribute
           Attribute::AttrKind Kind;
@@ -1501,9 +1630,7 @@ Error BitcodeReader::parseAttributeGroupBlock() {
             B.addDereferenceableOrNullAttr(Record[++i]);
           else if (Kind == Attribute::AllocSize)
             B.addAllocSizeAttrFromRawRepr(Record[++i]);
-        } else {                     // String attribute
-          assert((Record[i] == 3 || Record[i] == 4) &&
-                 "Invalid attribute group entry");
+        } else if (Record[i] == 3 || Record[i] == 4) { // String attribute
           bool HasValue = (Record[i++] == 4);
           SmallString<64> KindStr;
           SmallString<64> ValStr;
@@ -1521,6 +1648,15 @@ Error BitcodeReader::parseAttributeGroupBlock() {
           }
 
           B.addAttribute(KindStr.str(), ValStr.str());
+        } else {
+          assert((Record[i] == 5 || Record[i] == 6) &&
+                 "Invalid attribute group entry");
+          bool HasType = Record[i] == 6;
+          Attribute::AttrKind Kind;
+          if (Error Err = parseAttrKind(Record[++i], &Kind))
+            return Err;
+          if (Kind == Attribute::ByVal)
+            B.addByValAttr(HasType ? getTypeByID(Record[++i]) : nullptr);
         }
       }
 
@@ -1532,8 +1668,8 @@ Error BitcodeReader::parseAttributeGroupBlock() {
 }
 
 Error BitcodeReader::parseTypeTable() {
-  if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW))
+    return Err;
 
   return parseTypeTableBody();
 }
@@ -1549,7 +1685,10 @@ Error BitcodeReader::parseTypeTableBody() {
 
   // Read all the records for this type table.
   while (true) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
@@ -1567,7 +1706,10 @@ Error BitcodeReader::parseTypeTableBody() {
     // Read a record.
     Record.clear();
     Type *ResultTy = nullptr;
-    switch (Stream.readRecord(Entry.ID, Record)) {
+    Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeRecord)
+      return MaybeRecord.takeError();
+    switch (MaybeRecord.get()) {
     default:
       return error("Invalid value");
     case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
@@ -1752,7 +1894,8 @@ Error BitcodeReader::parseTypeTableBody() {
         return error("Invalid type");
       ResultTy = ArrayType::get(ResultTy, Record[0]);
       break;
-    case bitc::TYPE_CODE_VECTOR:    // VECTOR: [numelts, eltty]
+    case bitc::TYPE_CODE_VECTOR:    // VECTOR: [numelts, eltty] or
+                                    //         [numelts, eltty, scalable]
       if (Record.size() < 2)
         return error("Invalid record");
       if (Record[0] == 0)
@@ -1760,7 +1903,8 @@ Error BitcodeReader::parseTypeTableBody() {
       ResultTy = getTypeByID(Record[1]);
       if (!ResultTy || !StructType::isValidElementType(ResultTy))
         return error("Invalid type");
-      ResultTy = VectorType::get(ResultTy, Record[0]);
+      bool Scalable = Record.size() > 2 ? Record[2] : false;
+      ResultTy = VectorType::get(ResultTy, Record[0], Scalable);
       break;
     }
 
@@ -1775,8 +1919,8 @@ Error BitcodeReader::parseTypeTableBody() {
 }
 
 Error BitcodeReader::parseOperandBundleTags() {
-  if (Stream.EnterSubBlock(bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID))
+    return Err;
 
   if (!BundleTags.empty())
     return error("Invalid multiple blocks");
@@ -1784,7 +1928,10 @@ Error BitcodeReader::parseOperandBundleTags() {
   SmallVector<uint64_t, 64> Record;
 
   while (true) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
@@ -1799,7 +1946,10 @@ Error BitcodeReader::parseOperandBundleTags() {
 
     // Tags are implicitly mapped to integers by their order.
 
-    if (Stream.readRecord(Entry.ID, Record) != bitc::OPERAND_BUNDLE_TAG)
+    Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeRecord)
+      return MaybeRecord.takeError();
+    if (MaybeRecord.get() != bitc::OPERAND_BUNDLE_TAG)
       return error("Invalid record");
 
     // OPERAND_BUNDLE_TAG: [strchr x N]
@@ -1811,15 +1961,19 @@ Error BitcodeReader::parseOperandBundleTags() {
 }
 
 Error BitcodeReader::parseSyncScopeNames() {
-  if (Stream.EnterSubBlock(bitc::SYNC_SCOPE_NAMES_BLOCK_ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(bitc::SYNC_SCOPE_NAMES_BLOCK_ID))
+    return Err;
 
   if (!SSIDs.empty())
     return error("Invalid multiple synchronization scope names blocks");
 
   SmallVector<uint64_t, 64> Record;
   while (true) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
+
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
@@ -1836,7 +1990,10 @@ Error BitcodeReader::parseSyncScopeNames() {
     // Synchronization scope names are implicitly mapped to synchronization
     // scope IDs by their order.
 
-    if (Stream.readRecord(Entry.ID, Record) != bitc::SYNC_SCOPE_NAME)
+    Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeRecord)
+      return MaybeRecord.takeError();
+    if (MaybeRecord.get() != bitc::SYNC_SCOPE_NAME)
       return error("Invalid record");
 
     SmallString<16> SSN;
@@ -1877,22 +2034,18 @@ Expected<Value *> BitcodeReader::recordValue(SmallVectorImpl<uint64_t> &Record,
 
 /// Helper to note and return the current location, and jump to the given
 /// offset.
-static uint64_t jumpToValueSymbolTable(uint64_t Offset,
-                                       BitstreamCursor &Stream) {
+static Expected<uint64_t> jumpToValueSymbolTable(uint64_t Offset,
+                                                 BitstreamCursor &Stream) {
   // Save the current parsing location so we can jump back at the end
   // of the VST read.
   uint64_t CurrentBit = Stream.GetCurrentBitNo();
-  Stream.JumpToBit(Offset * 32);
-#ifndef NDEBUG
-  // Do some checking if we are in debug mode.
-  BitstreamEntry Entry = Stream.advance();
-  assert(Entry.Kind == BitstreamEntry::SubBlock);
-  assert(Entry.ID == bitc::VALUE_SYMTAB_BLOCK_ID);
-#else
-  // In NDEBUG mode ignore the output so we don't get an unused variable
-  // warning.
-  Stream.advance();
-#endif
+  if (Error JumpFailed = Stream.JumpToBit(Offset * 32))
+    return std::move(JumpFailed);
+  Expected<BitstreamEntry> MaybeEntry = Stream.advance();
+  if (!MaybeEntry)
+    return MaybeEntry.takeError();
+  assert(MaybeEntry.get().Kind == BitstreamEntry::SubBlock);
+  assert(MaybeEntry.get().ID == bitc::VALUE_SYMTAB_BLOCK_ID);
   return CurrentBit;
 }
 
@@ -1917,12 +2070,15 @@ Error BitcodeReader::parseGlobalValueSymbolTable() {
   unsigned FuncBitcodeOffsetDelta =
       Stream.getAbbrevIDWidth() + bitc::BlockIDWidth;
 
-  if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
+    return Err;
 
   SmallVector<uint64_t, 64> Record;
   while (true) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock:
@@ -1935,7 +2091,10 @@ Error BitcodeReader::parseGlobalValueSymbolTable() {
     }
 
     Record.clear();
-    switch (Stream.readRecord(Entry.ID, Record)) {
+    Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeRecord)
+      return MaybeRecord.takeError();
+    switch (MaybeRecord.get()) {
     case bitc::VST_CODE_FNENTRY: // [valueid, offset]
       setDeferredFunctionInfo(FuncBitcodeOffsetDelta,
                               cast<Function>(ValueList[Record[0]]), Record);
@@ -1952,12 +2111,16 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
   // VST (where we want to jump to the VST offset) and the function-level
   // VST (where we don't).
   if (Offset > 0) {
-    CurrentBit = jumpToValueSymbolTable(Offset, Stream);
+    Expected<uint64_t> MaybeCurrentBit = jumpToValueSymbolTable(Offset, Stream);
+    if (!MaybeCurrentBit)
+      return MaybeCurrentBit.takeError();
+    CurrentBit = MaybeCurrentBit.get();
     // If this module uses a string table, read this as a module-level VST.
     if (UseStrtab) {
       if (Error Err = parseGlobalValueSymbolTable())
         return Err;
-      Stream.JumpToBit(CurrentBit);
+      if (Error JumpFailed = Stream.JumpToBit(CurrentBit))
+        return JumpFailed;
       return Error::success();
     }
     // Otherwise, the VST will be in a similar format to a function-level VST,
@@ -1978,8 +2141,8 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
   unsigned FuncBitcodeOffsetDelta =
       Stream.getAbbrevIDWidth() + bitc::BlockIDWidth;
 
-  if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
+    return Err;
 
   SmallVector<uint64_t, 64> Record;
 
@@ -1989,7 +2152,10 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
   SmallString<128> ValueName;
 
   while (true) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
@@ -1997,7 +2163,8 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       if (Offset > 0)
-        Stream.JumpToBit(CurrentBit);
+        if (Error JumpFailed = Stream.JumpToBit(CurrentBit))
+          return JumpFailed;
       return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
@@ -2006,7 +2173,10 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
 
     // Read a record.
     Record.clear();
-    switch (Stream.readRecord(Entry.ID, Record)) {
+    Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeRecord)
+      return MaybeRecord.takeError();
+    switch (MaybeRecord.get()) {
     default:  // Default behavior: unknown type.
       break;
     case bitc::VST_CODE_ENTRY: {  // VST_CODE_ENTRY: [valueid, namechar x N]
@@ -2151,17 +2321,21 @@ static APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits) {
 }
 
 Error BitcodeReader::parseConstants() {
-  if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID))
+    return Err;
 
   SmallVector<uint64_t, 64> Record;
 
   // Read all the records for this value table.
   Type *CurTy = Type::getInt32Ty(Context);
+  Type *CurFullTy = Type::getInt32Ty(Context);
   unsigned NextCstNo = ValueList.size();
 
   while (true) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
@@ -2184,8 +2358,10 @@ Error BitcodeReader::parseConstants() {
     Record.clear();
     Type *VoidType = Type::getVoidTy(Context);
     Value *V = nullptr;
-    unsigned BitCode = Stream.readRecord(Entry.ID, Record);
-    switch (BitCode) {
+    Expected<unsigned> MaybeBitCode = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeBitCode)
+      return MaybeBitCode.takeError();
+    switch (unsigned BitCode = MaybeBitCode.get()) {
     default:  // Default behavior: unknown constant
     case bitc::CST_CODE_UNDEF:     // UNDEF
       V = UndefValue::get(CurTy);
@@ -2197,7 +2373,8 @@ Error BitcodeReader::parseConstants() {
         return error("Invalid record");
       if (TypeList[Record[0]] == VoidType)
         return error("Invalid constant type");
-      CurTy = TypeList[Record[0]];
+      CurFullTy = TypeList[Record[0]];
+      CurTy = flattenPointerTypes(CurFullTy);
       continue;  // Skip the ValueList manipulation.
     case bitc::CST_CODE_NULL:      // NULL
       V = Constant::getNullValue(CurTy);
@@ -2416,23 +2593,27 @@ Error BitcodeReader::parseConstants() {
         InBounds = true;
 
       SmallVector<Constant*, 16> Elts;
+      Type *Elt0FullTy = nullptr;
       while (OpNum != Record.size()) {
+        if (!Elt0FullTy)
+          Elt0FullTy = getFullyStructuredTypeByID(Record[OpNum]);
         Type *ElTy = getTypeByID(Record[OpNum++]);
         if (!ElTy)
           return error("Invalid record");
         Elts.push_back(ValueList.getConstantFwdRef(Record[OpNum++], ElTy));
       }
 
-      if (PointeeType &&
-          PointeeType !=
-              cast<PointerType>(Elts[0]->getType()->getScalarType())
-                  ->getElementType())
-        return error("Explicit gep operator type does not match pointee type "
-                     "of pointer operand");
-
       if (Elts.size() < 1)
         return error("Invalid gep with no operands");
 
+      Type *ImplicitPointeeType =
+          getPointerElementFlatType(Elt0FullTy->getScalarType());
+      if (!PointeeType)
+        PointeeType = ImplicitPointeeType;
+      else if (PointeeType != ImplicitPointeeType)
+        return error("Explicit gep operator type does not match pointee type "
+                     "of pointer operand");
+
       ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
       V = ConstantExpr::getGetElementPtr(PointeeType, Elts[0], Indices,
                                          InBounds, InRangeIndex);
@@ -2560,10 +2741,10 @@ Error BitcodeReader::parseConstants() {
         AsmStr += (char)Record[2+i];
       for (unsigned i = 0; i != ConstStrSize; ++i)
         ConstrStr += (char)Record[3+AsmStrSize+i];
-      PointerType *PTy = cast<PointerType>(CurTy);
       UpgradeInlineAsmString(&AsmStr);
-      V = InlineAsm::get(cast<FunctionType>(PTy->getElementType()),
-                         AsmStr, ConstrStr, HasSideEffects, IsAlignStack);
+      V = InlineAsm::get(
+          cast<FunctionType>(getPointerElementFlatType(CurFullTy)), AsmStr,
+          ConstrStr, HasSideEffects, IsAlignStack);
       break;
     }
     // This version adds support for the asm dialect keywords (e.g.,
@@ -2586,11 +2767,11 @@ Error BitcodeReader::parseConstants() {
         AsmStr += (char)Record[2+i];
       for (unsigned i = 0; i != ConstStrSize; ++i)
         ConstrStr += (char)Record[3+AsmStrSize+i];
-      PointerType *PTy = cast<PointerType>(CurTy);
       UpgradeInlineAsmString(&AsmStr);
-      V = InlineAsm::get(cast<FunctionType>(PTy->getElementType()),
-                         AsmStr, ConstrStr, HasSideEffects, IsAlignStack,
-                         InlineAsm::AsmDialect(AsmDialect));
+      V = InlineAsm::get(
+          cast<FunctionType>(getPointerElementFlatType(CurFullTy)), AsmStr,
+          ConstrStr, HasSideEffects, IsAlignStack,
+          InlineAsm::AsmDialect(AsmDialect));
       break;
     }
     case bitc::CST_CODE_BLOCKADDRESS:{
@@ -2636,20 +2817,25 @@ Error BitcodeReader::parseConstants() {
     }
     }
 
-    ValueList.assignValue(V, NextCstNo);
+    assert(V->getType() == flattenPointerTypes(CurFullTy) &&
+           "Incorrect fully structured type provided for Constant");
+    ValueList.assignValue(V, NextCstNo, CurFullTy);
     ++NextCstNo;
   }
 }
 
 Error BitcodeReader::parseUseLists() {
-  if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID))
+    return Err;
 
   // Read all the records.
   SmallVector<uint64_t, 64> Record;
 
   while (true) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
@@ -2665,7 +2851,10 @@ Error BitcodeReader::parseUseLists() {
     // Read a use list record.
     Record.clear();
     bool IsBB = false;
-    switch (Stream.readRecord(Entry.ID, Record)) {
+    Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeRecord)
+      return MaybeRecord.takeError();
+    switch (MaybeRecord.get()) {
     default:  // Default behavior: unknown type.
       break;
     case bitc::USELIST_CODE_BB:
@@ -2714,15 +2903,16 @@ Error BitcodeReader::rememberAndSkipMetadata() {
   DeferredMetadataInfo.push_back(CurBit);
 
   // Skip over the block for now.
-  if (Stream.SkipBlock())
-    return error("Invalid record");
+  if (Error Err = Stream.SkipBlock())
+    return Err;
   return Error::success();
 }
 
 Error BitcodeReader::materializeMetadata() {
   for (uint64_t BitPos : DeferredMetadataInfo) {
     // Move the bit stream to the saved position.
-    Stream.JumpToBit(BitPos);
+    if (Error JumpFailed = Stream.JumpToBit(BitPos))
+      return JumpFailed;
     if (Error Err = MDLoader->parseModuleMetadata())
       return Err;
   }
@@ -2760,8 +2950,8 @@ Error BitcodeReader::rememberAndSkipFunctionBody() {
   DeferredFunctionInfo[Fn] = CurBit;
 
   // Skip over the function block for now.
-  if (Stream.SkipBlock())
-    return error("Invalid record");
+  if (Error Err = Stream.SkipBlock())
+    return Err;
   return Error::success();
 }
 
@@ -2786,8 +2976,14 @@ Error BitcodeReader::globalCleanup() {
   }
 
   // Look for global variables which need to be renamed.
+  std::vector<std::pair<GlobalVariable *, GlobalVariable *>> UpgradedVariables;
   for (GlobalVariable &GV : TheModule->globals())
-    UpgradeGlobalVariable(&GV);
+    if (GlobalVariable *Upgraded = UpgradeGlobalVariable(&GV))
+      UpgradedVariables.emplace_back(&GV, Upgraded);
+  for (auto &Pair : UpgradedVariables) {
+    Pair.first->eraseFromParent();
+    TheModule->getGlobalList().push_back(Pair.second);
+  }
 
   // Force deallocation of memory for these vectors to favor the client that
   // want lazy deserialization.
@@ -2802,7 +2998,8 @@ Error BitcodeReader::globalCleanup() {
 /// or if we have an anonymous function being materialized, since anonymous
 /// functions do not have a name and are therefore not in the VST.
 Error BitcodeReader::rememberAndSkipFunctionBodies() {
-  Stream.JumpToBit(NextUnreadBit);
+  if (Error JumpFailed = Stream.JumpToBit(NextUnreadBit))
+    return JumpFailed;
 
   if (Stream.AtEndOfStream())
     return error("Could not find function in stream");
@@ -2817,7 +3014,11 @@ Error BitcodeReader::rememberAndSkipFunctionBodies() {
   SmallVector<uint64_t, 64> Record;
 
   while (true) {
-    BitstreamEntry Entry = Stream.advance();
+    Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    llvm::BitstreamEntry Entry = MaybeEntry.get();
+
     switch (Entry.Kind) {
     default:
       return error("Expect SubBlock");
@@ -2836,7 +3037,12 @@ Error BitcodeReader::rememberAndSkipFunctionBodies() {
 }
 
 bool BitcodeReaderBase::readBlockInfo() {
-  Optional<BitstreamBlockInfo> NewBlockInfo = Stream.ReadBlockInfoBlock();
+  Expected<Optional<BitstreamBlockInfo>> MaybeNewBlockInfo =
+      Stream.ReadBlockInfoBlock();
+  if (!MaybeNewBlockInfo)
+    return true; // FIXME Handle the error.
+  Optional<BitstreamBlockInfo> NewBlockInfo =
+      std::move(MaybeNewBlockInfo.get());
   if (!NewBlockInfo)
     return true;
   BlockInfo = std::move(*NewBlockInfo);
@@ -2878,14 +3084,16 @@ static void inferDSOLocal(GlobalValue *GV) {
 Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
   // v1: [pointer type, isconst, initid, linkage, alignment, section,
   // visibility, threadlocal, unnamed_addr, externally_initialized,
-  // dllstorageclass, comdat, attributes, preemption specifier] (name in VST)
+  // dllstorageclass, comdat, attributes, preemption specifier,
+  // partition strtab offset, partition strtab size] (name in VST)
   // v2: [strtab_offset, strtab_size, v1]
   StringRef Name;
   std::tie(Name, Record) = readNameFromStrtab(Record);
 
   if (Record.size() < 6)
     return error("Invalid record");
-  Type *Ty = getTypeByID(Record[0]);
+  Type *FullTy = getFullyStructuredTypeByID(Record[0]);
+  Type *Ty = flattenPointerTypes(FullTy);
   if (!Ty)
     return error("Invalid record");
   bool isConstant = Record[1] & 1;
@@ -2897,7 +3105,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
     if (!Ty->isPointerTy())
       return error("Invalid type for value");
     AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
-    Ty = cast<PointerType>(Ty)->getElementType();
+    std::tie(FullTy, Ty) = getPointerElementTypes(FullTy);
   }
 
   uint64_t RawLinkage = Record[3];
@@ -2943,7 +3151,10 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
   else
     upgradeDLLImportExportLinkage(NewGV, RawLinkage);
 
-  ValueList.push_back(NewGV);
+  FullTy = PointerType::get(FullTy, AddressSpace);
+  assert(NewGV->getType() == flattenPointerTypes(FullTy) &&
+         "Incorrect fully specified type for GlobalVariable");
+  ValueList.push_back(NewGV, FullTy);
 
   // Remember which value to use for the global initializer.
   if (unsigned InitID = Record[2])
@@ -2969,6 +3180,10 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
   }
   inferDSOLocal(NewGV);
 
+  // Check whether we have enough values to read a partition name.
+  if (Record.size() > 15)
+    NewGV->setPartition(StringRef(Strtab.data() + Record[14], Record[15]));
+
   return Error::success();
 }
 
@@ -2982,13 +3197,14 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
 
   if (Record.size() < 8)
     return error("Invalid record");
-  Type *Ty = getTypeByID(Record[0]);
-  if (!Ty)
-    return error("Invalid record");
-  if (auto *PTy = dyn_cast<PointerType>(Ty))
-    Ty = PTy->getElementType();
-  auto *FTy = dyn_cast<FunctionType>(Ty);
+  Type *FullFTy = getFullyStructuredTypeByID(Record[0]);
+  Type *FTy = flattenPointerTypes(FullFTy);
   if (!FTy)
+    return error("Invalid record");
+  if (isa<PointerType>(FTy))
+    std::tie(FullFTy, FTy) = getPointerElementTypes(FullFTy);
+
+  if (!isa<FunctionType>(FTy))
     return error("Invalid type for value");
   auto CC = static_cast<CallingConv::ID>(Record[1]);
   if (CC & ~CallingConv::MaxID)
@@ -2998,8 +3214,13 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
   if (Record.size() > 16)
     AddrSpace = Record[16];
 
-  Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage,
-                                    AddrSpace, Name, TheModule);
+  Function *Func =
+      Function::Create(cast<FunctionType>(FTy), GlobalValue::ExternalLinkage,
+                       AddrSpace, Name, TheModule);
+
+  assert(Func->getFunctionType() == flattenPointerTypes(FullFTy) &&
+         "Incorrect fully specified type provided for function");
+  FunctionTypes[Func] = cast<FunctionType>(FullFTy);
 
   Func->setCallingConv(CC);
   bool isProto = Record[2];
@@ -3007,6 +3228,19 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
   Func->setLinkage(getDecodedLinkage(RawLinkage));
   Func->setAttributes(getAttributes(Record[4]));
 
+  // Upgrade any old-style byval without a type by propagating the argument's
+  // pointee type. There should be no opaque pointers where the byval type is
+  // implicit.
+  for (unsigned i = 0; i != Func->arg_size(); ++i) {
+    if (!Func->hasParamAttribute(i, Attribute::ByVal))
+      continue;
+
+    Type *PTy = cast<FunctionType>(FullFTy)->getParamType(i);
+    Func->removeParamAttr(i, Attribute::ByVal);
+    Func->addParamAttr(i, Attribute::getWithByValType(
+                              Context, getPointerElementFlatType(PTy)));
+  }
+
   unsigned Alignment;
   if (Error Err = parseAlignmentValue(Record[5], Alignment))
     return Err;
@@ -3058,7 +3292,16 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
   }
   inferDSOLocal(Func);
 
-  ValueList.push_back(Func);
+  // Record[16] is the address space number.
+
+  // Check whether we have enough values to read a partition name.
+  if (Record.size() > 18)
+    Func->setPartition(StringRef(Strtab.data() + Record[17], Record[18]));
+
+  Type *FullTy = PointerType::get(FullFTy, AddrSpace);
+  assert(Func->getType() == flattenPointerTypes(FullTy) &&
+         "Incorrect fully specified type provided for Function");
+  ValueList.push_back(Func, FullTy);
 
   // If this is a function with a body, remember the prototype we are
   // creating now, so that we can match up the body with them later.
@@ -3087,7 +3330,8 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
   if (Record.size() < (3 + (unsigned)NewRecord))
     return error("Invalid record");
   unsigned OpNum = 0;
-  Type *Ty = getTypeByID(Record[OpNum++]);
+  Type *FullTy = getFullyStructuredTypeByID(Record[OpNum++]);
+  Type *Ty = flattenPointerTypes(FullTy);
   if (!Ty)
     return error("Invalid record");
 
@@ -3096,7 +3340,7 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
     auto *PTy = dyn_cast<PointerType>(Ty);
     if (!PTy)
       return error("Invalid type for value");
-    Ty = PTy->getElementType();
+    std::tie(FullTy, Ty) = getPointerElementTypes(FullTy);
     AddrSpace = PTy->getAddressSpace();
   } else {
     AddrSpace = Record[OpNum++];
@@ -3112,6 +3356,9 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
   else
     NewGA = GlobalIFunc::create(Ty, AddrSpace, getDecodedLinkage(Linkage), Name,
                                 nullptr, TheModule);
+
+  assert(NewGA->getValueType() == flattenPointerTypes(FullTy) &&
+         "Incorrect fully structured type provided for GlobalIndirectSymbol");
   // Old bitcode files didn't have visibility field.
   // Local linkage must have default visibility.
   if (OpNum != Record.size()) {
@@ -3135,23 +3382,37 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
     NewGA->setDSOLocal(getDecodedDSOLocal(Record[OpNum++]));
   inferDSOLocal(NewGA);
 
-  ValueList.push_back(NewGA);
+  // Check whether we have enough values to read a partition name.
+  if (OpNum + 1 < Record.size()) {
+    NewGA->setPartition(
+        StringRef(Strtab.data() + Record[OpNum], Record[OpNum + 1]));
+    OpNum += 2;
+  }
+
+  FullTy = PointerType::get(FullTy, AddrSpace);
+  assert(NewGA->getType() == flattenPointerTypes(FullTy) &&
+         "Incorrect fully structured type provided for GlobalIndirectSymbol");
+  ValueList.push_back(NewGA, FullTy);
   IndirectSymbolInits.push_back(std::make_pair(NewGA, Val));
   return Error::success();
 }
 
 Error BitcodeReader::parseModule(uint64_t ResumeBit,
                                  bool ShouldLazyLoadMetadata) {
-  if (ResumeBit)
-    Stream.JumpToBit(ResumeBit);
-  else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
-    return error("Invalid record");
+  if (ResumeBit) {
+    if (Error JumpFailed = Stream.JumpToBit(ResumeBit))
+      return JumpFailed;
+  } else if (Error Err = Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+    return Err;
 
   SmallVector<uint64_t, 64> Record;
 
   // Read all the records for this module.
   while (true) {
-    BitstreamEntry Entry = Stream.advance();
+    Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    llvm::BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::Error:
@@ -3162,8 +3423,8 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
     case BitstreamEntry::SubBlock:
       switch (Entry.ID) {
       default:  // Skip unknown content.
-        if (Stream.SkipBlock())
-          return error("Invalid record");
+        if (Error Err = Stream.SkipBlock())
+          return Err;
         break;
       case bitc::BLOCKINFO_BLOCK_ID:
         if (readBlockInfo())
@@ -3196,8 +3457,8 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
           // We must have had a VST forward declaration record, which caused
           // the parser to jump to and parse the VST earlier.
           assert(VSTOffset > 0);
-          if (Stream.SkipBlock())
-            return error("Invalid record");
+          if (Error Err = Stream.SkipBlock())
+            return Err;
         }
         break;
       case bitc::CONSTANTS_BLOCK_ID:
@@ -3249,8 +3510,8 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
             // materializing functions. The ResumeBit points to the
             // start of the last function block recorded in the
             // DeferredFunctionInfo map. Skip it.
-            if (Stream.SkipBlock())
-              return error("Invalid record");
+            if (Error Err = Stream.SkipBlock())
+              return Err;
             continue;
           }
         }
@@ -3294,8 +3555,10 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
     }
 
     // Read a record.
-    auto BitCode = Stream.readRecord(Entry.ID, Record);
-    switch (BitCode) {
+    Expected<unsigned> MaybeBitCode = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeBitCode)
+      return MaybeBitCode.takeError();
+    switch (unsigned BitCode = MaybeBitCode.get()) {
     default: break;  // Default behavior, ignore unknown content.
     case bitc::MODULE_CODE_VERSION: {
       Expected<unsigned> VersionOrErr = parseVersionRecord(Record);
@@ -3407,10 +3670,23 @@ Error BitcodeReader::typeCheckLoadStoreInst(Type *ValType, Type *PtrType) {
   return Error::success();
 }
 
+void BitcodeReader::propagateByValTypes(CallBase *CB,
+                                        ArrayRef<Type *> ArgsFullTys) {
+  for (unsigned i = 0; i != CB->arg_size(); ++i) {
+    if (!CB->paramHasAttr(i, Attribute::ByVal))
+      continue;
+
+    CB->removeParamAttr(i, Attribute::ByVal);
+    CB->addParamAttr(
+        i, Attribute::getWithByValType(
+               Context, getPointerElementFlatType(ArgsFullTys[i])));
+  }
+}
+
 /// Lazily parse the specified function body block.
 Error BitcodeReader::parseFunctionBody(Function *F) {
-  if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
+    return Err;
 
   // Unexpected unresolved metadata when parsing function.
   if (MDLoader->hasFwdRefs())
@@ -3421,9 +3697,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
   unsigned ModuleMDLoaderSize = MDLoader->size();
 
   // Add all the function arguments to the value table.
-  for (Argument &I : F->args())
-    ValueList.push_back(&I);
-
+  unsigned ArgNo = 0;
+  FunctionType *FullFTy = FunctionTypes[F];
+  for (Argument &I : F->args()) {
+    assert(I.getType() == flattenPointerTypes(FullFTy->getParamType(ArgNo)) &&
+           "Incorrect fully specified type for Function Argument");
+    ValueList.push_back(&I, FullFTy->getParamType(ArgNo++));
+  }
   unsigned NextValueNo = ValueList.size();
   BasicBlock *CurBB = nullptr;
   unsigned CurBBNo = 0;
@@ -3444,7 +3724,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
   SmallVector<uint64_t, 64> Record;
 
   while (true) {
-    BitstreamEntry Entry = Stream.advance();
+    Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    llvm::BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::Error:
@@ -3455,8 +3738,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
     case BitstreamEntry::SubBlock:
       switch (Entry.ID) {
       default:  // Skip unknown content.
-        if (Stream.SkipBlock())
-          return error("Invalid record");
+        if (Error Err = Stream.SkipBlock())
+          return Err;
         break;
       case bitc::CONSTANTS_BLOCK_ID:
         if (Error Err = parseConstants())
@@ -3492,8 +3775,11 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
     // Read a record.
     Record.clear();
     Instruction *I = nullptr;
-    unsigned BitCode = Stream.readRecord(Entry.ID, Record);
-    switch (BitCode) {
+    Type *FullTy = nullptr;
+    Expected<unsigned> MaybeBitCode = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeBitCode)
+      return MaybeBitCode.takeError();
+    switch (unsigned BitCode = MaybeBitCode.get()) {
     default: // Default behavior: reject
       return error("Invalid value");
     case bitc::FUNC_CODE_DECLAREBLOCKS: {   // DECLAREBLOCKS: [nblocks]
@@ -3634,7 +3920,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
           OpNum+2 != Record.size())
         return error("Invalid record");
 
-      Type *ResTy = getTypeByID(Record[OpNum]);
+      FullTy = getFullyStructuredTypeByID(Record[OpNum]);
+      Type *ResTy = flattenPointerTypes(FullTy);
       int Opc = getDecodedCastOpcode(Record[OpNum + 1]);
       if (Opc == -1 || !ResTy)
         return error("Invalid record");
@@ -3663,22 +3950,22 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
 
       if (BitCode == bitc::FUNC_CODE_INST_GEP) {
         InBounds = Record[OpNum++];
-        Ty = getTypeByID(Record[OpNum++]);
+        FullTy = getFullyStructuredTypeByID(Record[OpNum++]);
+        Ty = flattenPointerTypes(FullTy);
       } else {
         InBounds = BitCode == bitc::FUNC_CODE_INST_INBOUNDS_GEP_OLD;
         Ty = nullptr;
       }
 
       Value *BasePtr;
-      if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr))
+      Type *FullBaseTy = nullptr;
+      if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr, &FullBaseTy))
         return error("Invalid record");
 
-      if (!Ty)
-        Ty = cast<PointerType>(BasePtr->getType()->getScalarType())
-                 ->getElementType();
-      else if (Ty !=
-               cast<PointerType>(BasePtr->getType()->getScalarType())
-                   ->getElementType())
+      if (!Ty) {
+        std::tie(FullTy, Ty) =
+            getPointerElementTypes(FullBaseTy->getScalarType());
+      } else if (Ty != getPointerElementFlatType(FullBaseTy->getScalarType()))
         return error(
             "Explicit gep type does not match pointee type of pointer operand");
 
@@ -3691,6 +3978,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       }
 
       I = GetElementPtrInst::Create(Ty, BasePtr, GEPIdx);
+      FullTy = GetElementPtrInst::getGEPReturnType(FullTy, I, GEPIdx);
 
       InstructionList.push_back(I);
       if (InBounds)
@@ -3702,7 +3990,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
                                        // EXTRACTVAL: [opty, opval, n x indices]
       unsigned OpNum = 0;
       Value *Agg;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
+      if (getValueTypePair(Record, OpNum, NextValueNo, Agg, &FullTy))
         return error("Invalid record");
 
       unsigned RecSize = Record.size();
@@ -3710,26 +3998,25 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
         return error("EXTRACTVAL: Invalid instruction with 0 indices");
 
       SmallVector<unsigned, 4> EXTRACTVALIdx;
-      Type *CurTy = Agg->getType();
       for (; OpNum != RecSize; ++OpNum) {
-        bool IsArray = CurTy->isArrayTy();
-        bool IsStruct = CurTy->isStructTy();
+        bool IsArray = FullTy->isArrayTy();
+        bool IsStruct = FullTy->isStructTy();
         uint64_t Index = Record[OpNum];
 
         if (!IsStruct && !IsArray)
           return error("EXTRACTVAL: Invalid type");
         if ((unsigned)Index != Index)
           return error("Invalid value");
-        if (IsStruct && Index >= CurTy->getStructNumElements())
+        if (IsStruct && Index >= FullTy->getStructNumElements())
           return error("EXTRACTVAL: Invalid struct index");
-        if (IsArray && Index >= CurTy->getArrayNumElements())
+        if (IsArray && Index >= FullTy->getArrayNumElements())
           return error("EXTRACTVAL: Invalid array index");
         EXTRACTVALIdx.push_back((unsigned)Index);
 
         if (IsStruct)
-          CurTy = CurTy->getStructElementType(Index);
+          FullTy = FullTy->getStructElementType(Index);
         else
-          CurTy = CurTy->getArrayElementType();
+          FullTy = FullTy->getArrayElementType();
       }
 
       I = ExtractValueInst::Create(Agg, EXTRACTVALIdx);
@@ -3741,7 +4028,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
                            // INSERTVAL: [opty, opval, opty, opval, n x indices]
       unsigned OpNum = 0;
       Value *Agg;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
+      if (getValueTypePair(Record, OpNum, NextValueNo, Agg, &FullTy))
         return error("Invalid record");
       Value *Val;
       if (getValueTypePair(Record, OpNum, NextValueNo, Val))
@@ -3787,7 +4074,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       // handles select i1 ... in old bitcode
       unsigned OpNum = 0;
       Value *TrueVal, *FalseVal, *Cond;
-      if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
+      if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal, &FullTy) ||
           popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) ||
           popValue(Record, OpNum, NextValueNo, Type::getInt1Ty(Context), Cond))
         return error("Invalid record");
@@ -3802,7 +4089,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       // handles select i1 or select [N x i1]
       unsigned OpNum = 0;
       Value *TrueVal, *FalseVal, *Cond;
-      if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
+      if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal, &FullTy) ||
           popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) ||
           getValueTypePair(Record, OpNum, NextValueNo, Cond))
         return error("Invalid record");
@@ -3821,18 +4108,24 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
 
       I = SelectInst::Create(Cond, TrueVal, FalseVal);
       InstructionList.push_back(I);
+      if (OpNum < Record.size() && isa<FPMathOperator>(I)) {
+        FastMathFlags FMF = getDecodedFastMathFlags(Record[OpNum]);
+        if (FMF.any())
+          I->setFastMathFlags(FMF);
+      }
       break;
     }
 
     case bitc::FUNC_CODE_INST_EXTRACTELT: { // EXTRACTELT: [opty, opval, opval]
       unsigned OpNum = 0;
       Value *Vec, *Idx;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
+      if (getValueTypePair(Record, OpNum, NextValueNo, Vec, &FullTy) ||
           getValueTypePair(Record, OpNum, NextValueNo, Idx))
         return error("Invalid record");
       if (!Vec->getType()->isVectorTy())
         return error("Invalid type for value");
       I = ExtractElementInst::Create(Vec, Idx);
+      FullTy = FullTy->getVectorElementType();
       InstructionList.push_back(I);
       break;
     }
@@ -3840,7 +4133,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
     case bitc::FUNC_CODE_INST_INSERTELT: { // INSERTELT: [ty, opval,opval,opval]
       unsigned OpNum = 0;
       Value *Vec, *Elt, *Idx;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Vec))
+      if (getValueTypePair(Record, OpNum, NextValueNo, Vec, &FullTy))
         return error("Invalid record");
       if (!Vec->getType()->isVectorTy())
         return error("Invalid type for value");
@@ -3856,7 +4149,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
     case bitc::FUNC_CODE_INST_SHUFFLEVEC: {// SHUFFLEVEC: [opval,ty,opval,opval]
       unsigned OpNum = 0;
       Value *Vec1, *Vec2, *Mask;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) ||
+      if (getValueTypePair(Record, OpNum, NextValueNo, Vec1, &FullTy) ||
           popValue(Record, OpNum, NextValueNo, Vec1->getType(), Vec2))
         return error("Invalid record");
 
@@ -3865,6 +4158,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       if (!Vec1->getType()->isVectorTy() || !Vec2->getType()->isVectorTy())
         return error("Invalid type for value");
       I = new ShuffleVectorInst(Vec1, Vec2, Mask);
+      FullTy = VectorType::get(FullTy->getVectorElementType(),
+                               Mask->getType()->getVectorNumElements());
       InstructionList.push_back(I);
       break;
     }
@@ -3882,6 +4177,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
           popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS))
         return error("Invalid record");
 
+      if (OpNum >= Record.size())
+        return error(
+            "Invalid record: operand number exceeded available operands");
+
       unsigned PredVal = Record[OpNum];
       bool IsFP = LHS->getType()->isFPOrFPVectorTy();
       FastMathFlags FMF;
@@ -4168,31 +4467,40 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       BasicBlock *UnwindBB = getBasicBlock(Record[OpNum++]);
 
       FunctionType *FTy = nullptr;
-      if (CCInfo >> 13 & 1 &&
-          !(FTy = dyn_cast<FunctionType>(getTypeByID(Record[OpNum++]))))
-        return error("Explicit invoke type is not a function type");
+      FunctionType *FullFTy = nullptr;
+      if ((CCInfo >> 13) & 1) {
+        FullFTy =
+            dyn_cast<FunctionType>(getFullyStructuredTypeByID(Record[OpNum++]));
+        if (!FullFTy)
+          return error("Explicit invoke type is not a function type");
+        FTy = cast<FunctionType>(flattenPointerTypes(FullFTy));
+      }
 
       Value *Callee;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
+      if (getValueTypePair(Record, OpNum, NextValueNo, Callee, &FullTy))
         return error("Invalid record");
 
       PointerType *CalleeTy = dyn_cast<PointerType>(Callee->getType());
       if (!CalleeTy)
         return error("Callee is not a pointer");
       if (!FTy) {
-        FTy = dyn_cast<FunctionType>(CalleeTy->getElementType());
-        if (!FTy)
+        FullFTy =
+            dyn_cast<FunctionType>(cast<PointerType>(FullTy)->getElementType());
+        if (!FullFTy)
           return error("Callee is not of pointer to function type");
-      } else if (CalleeTy->getElementType() != FTy)
+        FTy = cast<FunctionType>(flattenPointerTypes(FullFTy));
+      } else if (getPointerElementFlatType(FullTy) != FTy)
         return error("Explicit invoke type does not match pointee type of "
                      "callee operand");
       if (Record.size() < FTy->getNumParams() + OpNum)
         return error("Insufficient operands to call");
 
       SmallVector<Value*, 16> Ops;
+      SmallVector<Type *, 16> ArgsFullTys;
       for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
         Ops.push_back(getValue(Record, OpNum, NextValueNo,
                                FTy->getParamType(i)));
+        ArgsFullTys.push_back(FullFTy->getParamType(i));
         if (!Ops.back())
           return error("Invalid record");
       }
@@ -4204,18 +4512,24 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
         // Read type/value pairs for varargs params.
         while (OpNum != Record.size()) {
           Value *Op;
-          if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+          Type *FullTy;
+          if (getValueTypePair(Record, OpNum, NextValueNo, Op, &FullTy))
             return error("Invalid record");
           Ops.push_back(Op);
+          ArgsFullTys.push_back(FullTy);
         }
       }
 
-      I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops, OperandBundles);
+      I = InvokeInst::Create(FTy, Callee, NormalBB, UnwindBB, Ops,
+                             OperandBundles);
+      FullTy = FullFTy->getReturnType();
       OperandBundles.clear();
       InstructionList.push_back(I);
       cast<InvokeInst>(I)->setCallingConv(
           static_cast<CallingConv::ID>(CallingConv::MaxID & CCInfo));
       cast<InvokeInst>(I)->setAttributes(PAL);
+      propagateByValTypes(cast<CallBase>(I), ArgsFullTys);
+
       break;
     }
     case bitc::FUNC_CODE_INST_RESUME: { // RESUME: [opval]
@@ -4227,6 +4541,82 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       InstructionList.push_back(I);
       break;
     }
+    case bitc::FUNC_CODE_INST_CALLBR: {
+      // CALLBR: [attr, cc, norm, transfs, fty, fnid, args]
+      unsigned OpNum = 0;
+      AttributeList PAL = getAttributes(Record[OpNum++]);
+      unsigned CCInfo = Record[OpNum++];
+
+      BasicBlock *DefaultDest = getBasicBlock(Record[OpNum++]);
+      unsigned NumIndirectDests = Record[OpNum++];
+      SmallVector<BasicBlock *, 16> IndirectDests;
+      for (unsigned i = 0, e = NumIndirectDests; i != e; ++i)
+        IndirectDests.push_back(getBasicBlock(Record[OpNum++]));
+
+      FunctionType *FTy = nullptr;
+      FunctionType *FullFTy = nullptr;
+      if ((CCInfo >> bitc::CALL_EXPLICIT_TYPE) & 1) {
+        FullFTy =
+            dyn_cast<FunctionType>(getFullyStructuredTypeByID(Record[OpNum++]));
+        if (!FullFTy)
+          return error("Explicit call type is not a function type");
+        FTy = cast<FunctionType>(flattenPointerTypes(FullFTy));
+      }
+
+      Value *Callee;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Callee, &FullTy))
+        return error("Invalid record");
+
+      PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
+      if (!OpTy)
+        return error("Callee is not a pointer type");
+      if (!FTy) {
+        FullFTy =
+            dyn_cast<FunctionType>(cast<PointerType>(FullTy)->getElementType());
+        if (!FullFTy)
+          return error("Callee is not of pointer to function type");
+        FTy = cast<FunctionType>(flattenPointerTypes(FullFTy));
+      } else if (getPointerElementFlatType(FullTy) != FTy)
+        return error("Explicit call type does not match pointee type of "
+                     "callee operand");
+      if (Record.size() < FTy->getNumParams() + OpNum)
+        return error("Insufficient operands to call");
+
+      SmallVector<Value*, 16> Args;
+      // Read the fixed params.
+      for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
+        if (FTy->getParamType(i)->isLabelTy())
+          Args.push_back(getBasicBlock(Record[OpNum]));
+        else
+          Args.push_back(getValue(Record, OpNum, NextValueNo,
+                                  FTy->getParamType(i)));
+        if (!Args.back())
+          return error("Invalid record");
+      }
+
+      // Read type/value pairs for varargs params.
+      if (!FTy->isVarArg()) {
+        if (OpNum != Record.size())
+          return error("Invalid record");
+      } else {
+        while (OpNum != Record.size()) {
+          Value *Op;
+          if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+            return error("Invalid record");
+          Args.push_back(Op);
+        }
+      }
+
+      I = CallBrInst::Create(FTy, Callee, DefaultDest, IndirectDests, Args,
+                             OperandBundles);
+      FullTy = FullFTy->getReturnType();
+      OperandBundles.clear();
+      InstructionList.push_back(I);
+      cast<CallBrInst>(I)->setCallingConv(
+          static_cast<CallingConv::ID>((0x7ff & CCInfo) >> bitc::CALL_CCONV));
+      cast<CallBrInst>(I)->setAttributes(PAL);
+      break;
+    }
     case bitc::FUNC_CODE_INST_UNREACHABLE: // UNREACHABLE
       I = new UnreachableInst(Context);
       InstructionList.push_back(I);
@@ -4234,7 +4624,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
     case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...]
       if (Record.size() < 1 || ((Record.size()-1)&1))
         return error("Invalid record");
-      Type *Ty = getTypeByID(Record[0]);
+      FullTy = getFullyStructuredTypeByID(Record[0]);
+      Type *Ty = flattenPointerTypes(FullTy);
       if (!Ty)
         return error("Invalid record");
 
@@ -4271,7 +4662,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
         if (Record.size() < 4)
           return error("Invalid record");
       }
-      Type *Ty = getTypeByID(Record[Idx++]);
+      FullTy = getFullyStructuredTypeByID(Record[Idx++]);
+      Type *Ty = flattenPointerTypes(FullTy);
       if (!Ty)
         return error("Invalid record");
       if (BitCode == bitc::FUNC_CODE_INST_LANDINGPAD_OLD) {
@@ -4324,12 +4716,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
                                 SwiftErrorMask;
       bool InAlloca = AlignRecord & InAllocaMask;
       bool SwiftError = AlignRecord & SwiftErrorMask;
-      Type *Ty = getTypeByID(Record[0]);
+      FullTy = getFullyStructuredTypeByID(Record[0]);
+      Type *Ty = flattenPointerTypes(FullTy);
       if ((AlignRecord & ExplicitTypeMask) == 0) {
         auto *PTy = dyn_cast_or_null<PointerType>(Ty);
         if (!PTy)
           return error("Old-style alloca with a non-pointer type");
-        Ty = PTy->getElementType();
+        std::tie(FullTy, Ty) = getPointerElementTypes(FullTy);
       }
       Type *OpTy = getTypeByID(Record[1]);
       Value *Size = getFnValueByID(Record[2], OpTy);
@@ -4348,29 +4741,34 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       AI->setUsedWithInAlloca(InAlloca);
       AI->setSwiftError(SwiftError);
       I = AI;
+      FullTy = PointerType::get(FullTy, AS);
       InstructionList.push_back(I);
       break;
     }
     case bitc::FUNC_CODE_INST_LOAD: { // LOAD: [opty, op, align, vol]
       unsigned OpNum = 0;
       Value *Op;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+      if (getValueTypePair(Record, OpNum, NextValueNo, Op, &FullTy) ||
           (OpNum + 2 != Record.size() && OpNum + 3 != Record.size()))
         return error("Invalid record");
 
+      if (!isa<PointerType>(Op->getType()))
+        return error("Load operand is not a pointer type");
+
       Type *Ty = nullptr;
-      if (OpNum + 3 == Record.size())
-        Ty = getTypeByID(Record[OpNum++]);
+      if (OpNum + 3 == Record.size()) {
+        FullTy = getFullyStructuredTypeByID(Record[OpNum++]);
+        Ty = flattenPointerTypes(FullTy);
+      } else
+        std::tie(FullTy, Ty) = getPointerElementTypes(FullTy);
+
       if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType()))
         return Err;
-      if (!Ty)
-        Ty = cast<PointerType>(Op->getType())->getElementType();
 
       unsigned Align;
       if (Error Err = parseAlignmentValue(Record[OpNum], Align))
         return Err;
       I = new LoadInst(Ty, Op, "", Record[OpNum + 1], Align);
-
       InstructionList.push_back(I);
       break;
     }
@@ -4378,17 +4776,22 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
        // LOADATOMIC: [opty, op, align, vol, ordering, ssid]
       unsigned OpNum = 0;
       Value *Op;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+      if (getValueTypePair(Record, OpNum, NextValueNo, Op, &FullTy) ||
           (OpNum + 4 != Record.size() && OpNum + 5 != Record.size()))
         return error("Invalid record");
 
+      if (!isa<PointerType>(Op->getType()))
+        return error("Load operand is not a pointer type");
+
       Type *Ty = nullptr;
-      if (OpNum + 5 == Record.size())
-        Ty = getTypeByID(Record[OpNum++]);
+      if (OpNum + 5 == Record.size()) {
+        FullTy = getFullyStructuredTypeByID(Record[OpNum++]);
+        Ty = flattenPointerTypes(FullTy);
+      } else
+        std::tie(FullTy, Ty) = getPointerElementTypes(FullTy);
+
       if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType()))
         return Err;
-      if (!Ty)
-        Ty = cast<PointerType>(Op->getType())->getElementType();
 
       AtomicOrdering Ordering = getDecodedOrdering(Record[OpNum + 2]);
       if (Ordering == AtomicOrdering::NotAtomic ||
@@ -4402,8 +4805,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       unsigned Align;
       if (Error Err = parseAlignmentValue(Record[OpNum], Align))
         return Err;
-      I = new LoadInst(Op, "", Record[OpNum+1], Align, Ordering, SSID);
-
+      I = new LoadInst(Ty, Op, "", Record[OpNum + 1], Align, Ordering, SSID);
       InstructionList.push_back(I);
       break;
     }
@@ -4411,12 +4813,12 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
     case bitc::FUNC_CODE_INST_STORE_OLD: { // STORE2:[ptrty, ptr, val, align, vol]
       unsigned OpNum = 0;
       Value *Val, *Ptr;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
+      Type *FullTy;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, &FullTy) ||
           (BitCode == bitc::FUNC_CODE_INST_STORE
                ? getValueTypePair(Record, OpNum, NextValueNo, Val)
                : popValue(Record, OpNum, NextValueNo,
-                          cast<PointerType>(Ptr->getType())->getElementType(),
-                          Val)) ||
+                          getPointerElementFlatType(FullTy), Val)) ||
           OpNum + 2 != Record.size())
         return error("Invalid record");
 
@@ -4434,13 +4836,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       // STOREATOMIC: [ptrty, ptr, val, align, vol, ordering, ssid]
       unsigned OpNum = 0;
       Value *Val, *Ptr;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
+      Type *FullTy;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, &FullTy) ||
           !isa<PointerType>(Ptr->getType()) ||
           (BitCode == bitc::FUNC_CODE_INST_STOREATOMIC
                ? getValueTypePair(Record, OpNum, NextValueNo, Val)
                : popValue(Record, OpNum, NextValueNo,
-                          cast<PointerType>(Ptr->getType())->getElementType(),
-                          Val)) ||
+                          getPointerElementFlatType(FullTy), Val)) ||
           OpNum + 4 != Record.size())
         return error("Invalid record");
 
@@ -4468,15 +4870,25 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       //          failureordering?, isweak?]
       unsigned OpNum = 0;
       Value *Ptr, *Cmp, *New;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
-          (BitCode == bitc::FUNC_CODE_INST_CMPXCHG
-               ? getValueTypePair(Record, OpNum, NextValueNo, Cmp)
-               : popValue(Record, OpNum, NextValueNo,
-                          cast<PointerType>(Ptr->getType())->getElementType(),
-                          Cmp)) ||
-          popValue(Record, OpNum, NextValueNo, Cmp->getType(), New) ||
+      if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, &FullTy))
+        return error("Invalid record");
+
+      if (!isa<PointerType>(Ptr->getType()))
+        return error("Cmpxchg operand is not a pointer type");
+
+      if (BitCode == bitc::FUNC_CODE_INST_CMPXCHG) {
+        if (getValueTypePair(Record, OpNum, NextValueNo, Cmp, &FullTy))
+          return error("Invalid record");
+      } else if (popValue(Record, OpNum, NextValueNo,
+                          getPointerElementFlatType(FullTy), Cmp))
+        return error("Invalid record");
+      else
+        FullTy = cast<PointerType>(FullTy)->getElementType();
+
+      if (popValue(Record, OpNum, NextValueNo, Cmp->getType(), New) ||
           Record.size() < OpNum + 3 || Record.size() > OpNum + 5)
         return error("Invalid record");
+
       AtomicOrdering SuccessOrdering = getDecodedOrdering(Record[OpNum + 1]);
       if (SuccessOrdering == AtomicOrdering::NotAtomic ||
           SuccessOrdering == AtomicOrdering::Unordered)
@@ -4494,6 +4906,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
 
       I = new AtomicCmpXchgInst(Ptr, Cmp, New, SuccessOrdering, FailureOrdering,
                                 SSID);
+      FullTy = StructType::get(Context, {FullTy, Type::getInt1Ty(Context)});
       cast<AtomicCmpXchgInst>(I)->setVolatile(Record[OpNum]);
 
       if (Record.size() < 8) {
@@ -4502,6 +4915,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
         // expecting the first component of a modern cmpxchg.
         CurBB->getInstList().push_back(I);
         I = ExtractValueInst::Create(I, 0);
+        FullTy = cast<StructType>(FullTy)->getElementType(0);
       } else {
         cast<AtomicCmpXchgInst>(I)->setWeak(Record[OpNum+4]);
       }
@@ -4513,11 +4927,11 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       // ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, ssid]
       unsigned OpNum = 0;
       Value *Ptr, *Val;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
+      if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, &FullTy) ||
           !isa<PointerType>(Ptr->getType()) ||
           popValue(Record, OpNum, NextValueNo,
-                    cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
-          OpNum+4 != Record.size())
+                   getPointerElementFlatType(FullTy), Val) ||
+          OpNum + 4 != Record.size())
         return error("Invalid record");
       AtomicRMWInst::BinOp Operation = getDecodedRMWOperation(Record[OpNum]);
       if (Operation < AtomicRMWInst::FIRST_BINOP ||
@@ -4529,6 +4943,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
         return error("Invalid record");
       SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]);
       I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SSID);
+      FullTy = getPointerElementFlatType(FullTy);
       cast<AtomicRMWInst>(I)->setVolatile(Record[OpNum+1]);
       InstructionList.push_back(I);
       break;
@@ -4563,28 +4978,36 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       }
 
       FunctionType *FTy = nullptr;
-      if (CCInfo >> bitc::CALL_EXPLICIT_TYPE & 1 &&
-          !(FTy = dyn_cast<FunctionType>(getTypeByID(Record[OpNum++]))))
-        return error("Explicit call type is not a function type");
+      FunctionType *FullFTy = nullptr;
+      if ((CCInfo >> bitc::CALL_EXPLICIT_TYPE) & 1) {
+        FullFTy =
+            dyn_cast<FunctionType>(getFullyStructuredTypeByID(Record[OpNum++]));
+        if (!FullFTy)
+          return error("Explicit call type is not a function type");
+        FTy = cast<FunctionType>(flattenPointerTypes(FullFTy));
+      }
 
       Value *Callee;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
+      if (getValueTypePair(Record, OpNum, NextValueNo, Callee, &FullTy))
         return error("Invalid record");
 
       PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
       if (!OpTy)
         return error("Callee is not a pointer type");
       if (!FTy) {
-        FTy = dyn_cast<FunctionType>(OpTy->getElementType());
-        if (!FTy)
+        FullFTy =
+            dyn_cast<FunctionType>(cast<PointerType>(FullTy)->getElementType());
+        if (!FullFTy)
           return error("Callee is not of pointer to function type");
-      } else if (OpTy->getElementType() != FTy)
+        FTy = cast<FunctionType>(flattenPointerTypes(FullFTy));
+      } else if (getPointerElementFlatType(FullTy) != FTy)
         return error("Explicit call type does not match pointee type of "
                      "callee operand");
       if (Record.size() < FTy->getNumParams() + OpNum)
         return error("Insufficient operands to call");
 
       SmallVector<Value*, 16> Args;
+      SmallVector<Type*, 16> ArgsFullTys;
       // Read the fixed params.
       for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
         if (FTy->getParamType(i)->isLabelTy())
@@ -4592,6 +5015,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
         else
           Args.push_back(getValue(Record, OpNum, NextValueNo,
                                   FTy->getParamType(i)));
+        ArgsFullTys.push_back(FullFTy->getParamType(i));
         if (!Args.back())
           return error("Invalid record");
       }
@@ -4603,13 +5027,16 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       } else {
         while (OpNum != Record.size()) {
           Value *Op;
-          if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+          Type *FullTy;
+          if (getValueTypePair(Record, OpNum, NextValueNo, Op, &FullTy))
             return error("Invalid record");
           Args.push_back(Op);
+          ArgsFullTys.push_back(FullTy);
         }
       }
 
       I = CallInst::Create(FTy, Callee, Args, OperandBundles);
+      FullTy = FullFTy->getReturnType();
       OperandBundles.clear();
       InstructionList.push_back(I);
       cast<CallInst>(I)->setCallingConv(
@@ -4623,6 +5050,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
         TCK = CallInst::TCK_NoTail;
       cast<CallInst>(I)->setTailCallKind(TCK);
       cast<CallInst>(I)->setAttributes(PAL);
+      propagateByValTypes(cast<CallBase>(I), ArgsFullTys);
       if (FMF.any()) {
         if (!isa<FPMathOperator>(I))
           return error("Fast-math-flags specified for call without "
@@ -4636,7 +5064,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
         return error("Invalid record");
       Type *OpTy = getTypeByID(Record[0]);
       Value *Op = getValue(Record, 1, NextValueNo, OpTy);
-      Type *ResTy = getTypeByID(Record[2]);
+      FullTy = getFullyStructuredTypeByID(Record[2]);
+      Type *ResTy = flattenPointerTypes(FullTy);
       if (!OpTy || !Op || !ResTy)
         return error("Invalid record");
       I = new VAArgInst(Op, ResTy);
@@ -4686,8 +5115,23 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
     }
 
     // Non-void values get registered in the value table for future use.
-    if (I && !I->getType()->isVoidTy())
-      ValueList.assignValue(I, NextValueNo++);
+    if (I && !I->getType()->isVoidTy()) {
+      if (!FullTy) {
+        FullTy = I->getType();
+        assert(
+            !FullTy->isPointerTy() && !isa<StructType>(FullTy) &&
+            !isa<ArrayType>(FullTy) &&
+            (!isa<VectorType>(FullTy) ||
+             FullTy->getVectorElementType()->isFloatingPointTy() ||
+             FullTy->getVectorElementType()->isIntegerTy()) &&
+            "Structured types must be assigned with corresponding non-opaque "
+            "pointer type");
+      }
+
+      assert(I->getType() == flattenPointerTypes(FullTy) &&
+             "Incorrect fully structured type provided for Instruction");
+      ValueList.assignValue(I, NextValueNo++, FullTy);
+    }
   }
 
 OutOfRecordLoop:
@@ -4769,8 +5213,8 @@ Error BitcodeReader::materialize(GlobalValue *GV) {
     return Err;
 
   // Move the bit stream to the saved position of the deferred function body.
-  Stream.JumpToBit(DFII->second);
-
+  if (Error JumpFailed = Stream.JumpToBit(DFII->second))
+    return JumpFailed;
   if (Error Err = parseFunctionBody(F))
     return Err;
   F->setIsMaterializable(false);
@@ -4933,10 +5377,13 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
     return Error::success();
 
   assert(Offset > 0 && "Expected non-zero VST offset");
-  uint64_t CurrentBit = jumpToValueSymbolTable(Offset, Stream);
+  Expected<uint64_t> MaybeCurrentBit = jumpToValueSymbolTable(Offset, Stream);
+  if (!MaybeCurrentBit)
+    return MaybeCurrentBit.takeError();
+  uint64_t CurrentBit = MaybeCurrentBit.get();
 
-  if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
+    return Err;
 
   SmallVector<uint64_t, 64> Record;
 
@@ -4944,7 +5391,10 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
   SmallString<128> ValueName;
 
   while (true) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
@@ -4952,7 +5402,8 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
       // Done parsing VST, jump back to wherever we came from.
-      Stream.JumpToBit(CurrentBit);
+      if (Error JumpFailed = Stream.JumpToBit(CurrentBit))
+        return JumpFailed;
       return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
@@ -4961,7 +5412,10 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
 
     // Read a record.
     Record.clear();
-    switch (Stream.readRecord(Entry.ID, Record)) {
+    Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeRecord)
+      return MaybeRecord.takeError();
+    switch (MaybeRecord.get()) {
     default: // Default behavior: ignore (e.g. VST_CODE_BBENTRY records).
       break;
     case bitc::VST_CODE_ENTRY: { // VST_CODE_ENTRY: [valueid, namechar x N]
@@ -5009,8 +5463,8 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
 // At the end of this routine the module Index is populated with a map
 // from global value id to GlobalValueSummary objects.
 Error ModuleSummaryIndexBitcodeReader::parseModule() {
-  if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+    return Err;
 
   SmallVector<uint64_t, 64> Record;
   DenseMap<unsigned, GlobalValue::LinkageTypes> ValueIdToLinkageMap;
@@ -5018,7 +5472,10 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() {
 
   // Read the index for this module.
   while (true) {
-    BitstreamEntry Entry = Stream.advance();
+    Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    llvm::BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::Error:
@@ -5029,8 +5486,8 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() {
     case BitstreamEntry::SubBlock:
       switch (Entry.ID) {
       default: // Skip unknown content.
-        if (Stream.SkipBlock())
-          return error("Invalid record");
+        if (Error Err = Stream.SkipBlock())
+          return Err;
         break;
       case bitc::BLOCKINFO_BLOCK_ID:
         // Need to parse these to get abbrev ids (e.g. for VST)
@@ -5043,8 +5500,8 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() {
         assert(((SeenValueSymbolTable && VSTOffset > 0) ||
                 !SeenGlobalValSummary) &&
                "Expected early VST parse via VSTOffset record");
-        if (Stream.SkipBlock())
-          return error("Invalid record");
+        if (Error Err = Stream.SkipBlock())
+          return Err;
         break;
       case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
       case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID:
@@ -5075,8 +5532,10 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() {
 
     case BitstreamEntry::Record: {
         Record.clear();
-        auto BitCode = Stream.readRecord(Entry.ID, Record);
-        switch (BitCode) {
+        Expected<unsigned> MaybeBitCode = Stream.readRecord(Entry.ID, Record);
+        if (!MaybeBitCode)
+          return MaybeBitCode.takeError();
+        switch (MaybeBitCode.get()) {
         default:
           break; // Default behavior, ignore unknown content.
         case bitc::MODULE_CODE_VERSION: {
@@ -5224,32 +5683,66 @@ static void parseTypeIdSummaryRecord(ArrayRef<uint64_t> Record,
     parseWholeProgramDevirtResolution(Record, Strtab, Slot, TypeId);
 }
 
-static void setImmutableRefs(std::vector<ValueInfo> &Refs, unsigned Count) {
-  // Read-only refs are in the end of the refs list.
-  for (unsigned RefNo = Refs.size() - Count; RefNo < Refs.size(); ++RefNo)
+void ModuleSummaryIndexBitcodeReader::parseTypeIdCompatibleVtableInfo(
+    ArrayRef<uint64_t> Record, size_t &Slot,
+    TypeIdCompatibleVtableInfo &TypeId) {
+  uint64_t Offset = Record[Slot++];
+  ValueInfo Callee = getValueInfoFromValueId(Record[Slot++]).first;
+  TypeId.push_back({Offset, Callee});
+}
+
+void ModuleSummaryIndexBitcodeReader::parseTypeIdCompatibleVtableSummaryRecord(
+    ArrayRef<uint64_t> Record) {
+  size_t Slot = 0;
+  TypeIdCompatibleVtableInfo &TypeId =
+      TheIndex.getOrInsertTypeIdCompatibleVtableSummary(
+          {Strtab.data() + Record[Slot],
+           static_cast<size_t>(Record[Slot + 1])});
+  Slot += 2;
+
+  while (Slot < Record.size())
+    parseTypeIdCompatibleVtableInfo(Record, Slot, TypeId);
+}
+
+static void setSpecialRefs(std::vector<ValueInfo> &Refs, unsigned ROCnt,
+                           unsigned WOCnt) {
+  // Readonly and writeonly refs are in the end of the refs list.
+  assert(ROCnt + WOCnt <= Refs.size());
+  unsigned FirstWORef = Refs.size() - WOCnt;
+  unsigned RefNo = FirstWORef - ROCnt;
+  for (; RefNo < FirstWORef; ++RefNo)
     Refs[RefNo].setReadOnly();
+  for (; RefNo < Refs.size(); ++RefNo)
+    Refs[RefNo].setWriteOnly();
 }
 
 // Eagerly parse the entire summary block. This populates the GlobalValueSummary
 // objects in the index.
 Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
-  if (Stream.EnterSubBlock(ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(ID))
+    return Err;
   SmallVector<uint64_t, 64> Record;
 
   // Parse version
   {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
+
     if (Entry.Kind != BitstreamEntry::Record)
       return error("Invalid Summary Block: record for version expected");
-    if (Stream.readRecord(Entry.ID, Record) != bitc::FS_VERSION)
+    Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeRecord)
+      return MaybeRecord.takeError();
+    if (MaybeRecord.get() != bitc::FS_VERSION)
       return error("Invalid Summary Block: version expected");
   }
   const uint64_t Version = Record[0];
   const bool IsOldProfileFormat = Version == 1;
-  if (Version < 1 || Version > 6)
+  if (Version < 1 || Version > 7)
     return error("Invalid summary version " + Twine(Version) +
-                 ". Version should be in the range [1-6].");
+                 ". Version should be in the range [1-7].");
   Record.clear();
 
   // Keep around the last seen summary to be used when we see an optional
@@ -5267,7 +5760,10 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       PendingTypeCheckedLoadConstVCalls;
 
   while (true) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
@@ -5288,8 +5784,10 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
     // in the combined index VST entries). The records also contain
     // information used for ThinLTO renaming and importing.
     Record.clear();
-    auto BitCode = Stream.readRecord(Entry.ID, Record);
-    switch (BitCode) {
+    Expected<unsigned> MaybeBitCode = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeBitCode)
+      return MaybeBitCode.takeError();
+    switch (unsigned BitCode = MaybeBitCode.get()) {
     default: // Default behavior: ignore.
       break;
     case bitc::FS_FLAGS: {  // [flags]
@@ -5343,15 +5841,19 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       unsigned InstCount = Record[2];
       uint64_t RawFunFlags = 0;
       unsigned NumRefs = Record[3];
-      unsigned NumImmutableRefs = 0;
+      unsigned NumRORefs = 0, NumWORefs = 0;
       int RefListStartIndex = 4;
       if (Version >= 4) {
         RawFunFlags = Record[3];
         NumRefs = Record[4];
         RefListStartIndex = 5;
         if (Version >= 5) {
-          NumImmutableRefs = Record[5];
+          NumRORefs = Record[5];
           RefListStartIndex = 6;
+          if (Version >= 7) {
+            NumWORefs = Record[6];
+            RefListStartIndex = 7;
+          }
         }
       }
 
@@ -5371,7 +5873,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       std::vector<FunctionSummary::EdgeTy> Calls = makeCallList(
           ArrayRef<uint64_t>(Record).slice(CallGraphEdgeStartIndex),
           IsOldProfileFormat, HasProfile, HasRelBF);
-      setImmutableRefs(Refs, NumImmutableRefs);
+      setSpecialRefs(Refs, NumRORefs, NumWORefs);
       auto FS = llvm::make_unique<FunctionSummary>(
           Flags, InstCount, getDecodedFFlags(RawFunFlags), /*EntryCount=*/0,
           std::move(Refs), std::move(Calls), std::move(PendingTypeTests),
@@ -5406,14 +5908,11 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       // ownership.
       AS->setModulePath(getThisModule()->first());
 
-      GlobalValue::GUID AliaseeGUID =
-          getValueInfoFromValueId(AliaseeID).first.getGUID();
-      auto AliaseeInModule =
-          TheIndex.findSummaryInModule(AliaseeGUID, ModulePath);
+      auto AliaseeVI = getValueInfoFromValueId(AliaseeID).first;
+      auto AliaseeInModule = TheIndex.findSummaryInModule(AliaseeVI, ModulePath);
       if (!AliaseeInModule)
         return error("Alias expects aliasee summary to be parsed");
-      AS->setAliasee(AliaseeInModule);
-      AS->setAliaseeGUID(AliaseeGUID);
+      AS->setAliasee(AliaseeVI, AliaseeInModule);
 
       auto GUID = getValueInfoFromValueId(ValueID);
       AS->setOriginalName(GUID.second);
@@ -5425,7 +5924,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       unsigned ValueID = Record[0];
       uint64_t RawFlags = Record[1];
       unsigned RefArrayStart = 2;
-      GlobalVarSummary::GVarFlags GVF;
+      GlobalVarSummary::GVarFlags GVF(/* ReadOnly */ false,
+                                      /* WriteOnly */ false);
       auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
       if (Version >= 5) {
         GVF = getDecodedGVarFlags(Record[2]);
@@ -5441,6 +5941,34 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       TheIndex.addGlobalValueSummary(GUID.first, std::move(FS));
       break;
     }
+    // FS_PERMODULE_VTABLE_GLOBALVAR_INIT_REFS: [valueid, flags, varflags,
+    //                        numrefs, numrefs x valueid,
+    //                        n x (valueid, offset)]
+    case bitc::FS_PERMODULE_VTABLE_GLOBALVAR_INIT_REFS: {
+      unsigned ValueID = Record[0];
+      uint64_t RawFlags = Record[1];
+      GlobalVarSummary::GVarFlags GVF = getDecodedGVarFlags(Record[2]);
+      unsigned NumRefs = Record[3];
+      unsigned RefListStartIndex = 4;
+      unsigned VTableListStartIndex = RefListStartIndex + NumRefs;
+      auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
+      std::vector<ValueInfo> Refs = makeRefList(
+          ArrayRef<uint64_t>(Record).slice(RefListStartIndex, NumRefs));
+      VTableFuncList VTableFuncs;
+      for (unsigned I = VTableListStartIndex, E = Record.size(); I != E; ++I) {
+        ValueInfo Callee = getValueInfoFromValueId(Record[I]).first;
+        uint64_t Offset = Record[++I];
+        VTableFuncs.push_back({Callee, Offset});
+      }
+      auto VS =
+          llvm::make_unique<GlobalVarSummary>(Flags, GVF, std::move(Refs));
+      VS->setModulePath(getThisModule()->first());
+      VS->setVTableFuncs(VTableFuncs);
+      auto GUID = getValueInfoFromValueId(ValueID);
+      VS->setOriginalName(GUID.second);
+      TheIndex.addGlobalValueSummary(GUID.first, std::move(VS));
+      break;
+    }
     // FS_COMBINED: [valueid, modid, flags, instcount, fflags, numrefs,
     //               numrefs x valueid, n x (valueid)]
     // FS_COMBINED_PROFILE: [valueid, modid, flags, instcount, fflags, numrefs,
@@ -5454,7 +5982,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       uint64_t RawFunFlags = 0;
       uint64_t EntryCount = 0;
       unsigned NumRefs = Record[4];
-      unsigned NumImmutableRefs = 0;
+      unsigned NumRORefs = 0, NumWORefs = 0;
       int RefListStartIndex = 5;
 
       if (Version >= 4) {
@@ -5462,13 +5990,19 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
         RefListStartIndex = 6;
         size_t NumRefsIndex = 5;
         if (Version >= 5) {
+          unsigned NumRORefsOffset = 1;
           RefListStartIndex = 7;
           if (Version >= 6) {
             NumRefsIndex = 6;
             EntryCount = Record[5];
             RefListStartIndex = 8;
+            if (Version >= 7) {
+              RefListStartIndex = 9;
+              NumWORefs = Record[8];
+              NumRORefsOffset = 2;
+            }
           }
-          NumImmutableRefs = Record[RefListStartIndex - 1];
+          NumRORefs = Record[RefListStartIndex - NumRORefsOffset];
         }
         NumRefs = Record[NumRefsIndex];
       }
@@ -5484,7 +6018,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
           ArrayRef<uint64_t>(Record).slice(CallGraphEdgeStartIndex),
           IsOldProfileFormat, HasProfile, false);
       ValueInfo VI = getValueInfoFromValueId(ValueID).first;
-      setImmutableRefs(Refs, NumImmutableRefs);
+      setSpecialRefs(Refs, NumRORefs, NumWORefs);
       auto FS = llvm::make_unique<FunctionSummary>(
           Flags, InstCount, getDecodedFFlags(RawFunFlags), EntryCount,
           std::move(Refs), std::move(Edges), std::move(PendingTypeTests),
@@ -5516,12 +6050,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       LastSeenSummary = AS.get();
       AS->setModulePath(ModuleIdMap[ModuleId]);
 
-      auto AliaseeGUID =
-          getValueInfoFromValueId(AliaseeValueId).first.getGUID();
-      auto AliaseeInModule =
-          TheIndex.findSummaryInModule(AliaseeGUID, AS->modulePath());
-      AS->setAliasee(AliaseeInModule);
-      AS->setAliaseeGUID(AliaseeGUID);
+      auto AliaseeVI = getValueInfoFromValueId(AliaseeValueId).first;
+      auto AliaseeInModule = TheIndex.findSummaryInModule(AliaseeVI, AS->modulePath());
+      AS->setAliasee(AliaseeVI, AliaseeInModule);
 
       ValueInfo VI = getValueInfoFromValueId(ValueID).first;
       LastSeenGUID = VI.getGUID();
@@ -5534,7 +6065,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       uint64_t ModuleId = Record[1];
       uint64_t RawFlags = Record[2];
       unsigned RefArrayStart = 3;
-      GlobalVarSummary::GVarFlags GVF;
+      GlobalVarSummary::GVarFlags GVF(/* ReadOnly */ false,
+                                      /* WriteOnly */ false);
       auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
       if (Version >= 5) {
         GVF = getDecodedGVarFlags(Record[3]);
@@ -5610,6 +6142,10 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
     case bitc::FS_TYPE_ID:
       parseTypeIdSummaryRecord(Record, Strtab, TheIndex);
       break;
+
+    case bitc::FS_TYPE_ID_METADATA:
+      parseTypeIdCompatibleVtableSummaryRecord(Record);
+      break;
     }
   }
   llvm_unreachable("Exit infinite loop");
@@ -5618,8 +6154,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
 // Parse the  module string table block into the Index.
 // This populates the ModulePathStringTable map in the index.
 Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
-  if (Stream.EnterSubBlock(bitc::MODULE_STRTAB_BLOCK_ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(bitc::MODULE_STRTAB_BLOCK_ID))
+    return Err;
 
   SmallVector<uint64_t, 64> Record;
 
@@ -5627,7 +6163,10 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
   ModuleSummaryIndex::ModuleInfo *LastSeenModule = nullptr;
 
   while (true) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
@@ -5641,7 +6180,10 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
     }
 
     Record.clear();
-    switch (Stream.readRecord(Entry.ID, Record)) {
+    Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeRecord)
+      return MaybeRecord.takeError();
+    switch (MaybeRecord.get()) {
     default: // Default behavior: ignore.
       break;
     case bitc::MST_CODE_ENTRY: {
@@ -5707,12 +6249,16 @@ const std::error_category &llvm::BitcodeErrorCategory() {
 
 static Expected<StringRef> readBlobInRecord(BitstreamCursor &Stream,
                                             unsigned Block, unsigned RecordID) {
-  if (Stream.EnterSubBlock(Block))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(Block))
+    return std::move(Err);
 
   StringRef Strtab;
   while (true) {
-    BitstreamEntry Entry = Stream.advance();
+    Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    llvm::BitstreamEntry Entry = MaybeEntry.get();
+
     switch (Entry.Kind) {
     case BitstreamEntry::EndBlock:
       return Strtab;
@@ -5721,14 +6267,18 @@ static Expected<StringRef> readBlobInRecord(BitstreamCursor &Stream,
       return error("Malformed block");
 
     case BitstreamEntry::SubBlock:
-      if (Stream.SkipBlock())
-        return error("Malformed block");
+      if (Error Err = Stream.SkipBlock())
+        return std::move(Err);
       break;
 
     case BitstreamEntry::Record:
       StringRef Blob;
       SmallVector<uint64_t, 1> Record;
-      if (Stream.readRecord(Entry.ID, Record, &Blob) == RecordID)
+      Expected<unsigned> MaybeRecord =
+          Stream.readRecord(Entry.ID, Record, &Blob);
+      if (!MaybeRecord)
+        return MaybeRecord.takeError();
+      if (MaybeRecord.get() == RecordID)
         Strtab = Blob;
       break;
     }
@@ -5764,7 +6314,11 @@ llvm::getBitcodeFileContents(MemoryBufferRef Buffer) {
     if (BCBegin + 8 >= Stream.getBitcodeBytes().size())
       return F;
 
-    BitstreamEntry Entry = Stream.advance();
+    Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    llvm::BitstreamEntry Entry = MaybeEntry.get();
+
     switch (Entry.Kind) {
     case BitstreamEntry::EndBlock:
     case BitstreamEntry::Error:
@@ -5774,10 +6328,16 @@ llvm::getBitcodeFileContents(MemoryBufferRef Buffer) {
       uint64_t IdentificationBit = -1ull;
       if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) {
         IdentificationBit = Stream.GetCurrentBitNo() - BCBegin * 8;
-        if (Stream.SkipBlock())
-          return error("Malformed block");
+        if (Error Err = Stream.SkipBlock())
+          return std::move(Err);
+
+        {
+          Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
+          if (!MaybeEntry)
+            return MaybeEntry.takeError();
+          Entry = MaybeEntry.get();
+        }
 
-        Entry = Stream.advance();
         if (Entry.Kind != BitstreamEntry::SubBlock ||
             Entry.ID != bitc::MODULE_BLOCK_ID)
           return error("Malformed block");
@@ -5785,8 +6345,8 @@ llvm::getBitcodeFileContents(MemoryBufferRef Buffer) {
 
       if (Entry.ID == bitc::MODULE_BLOCK_ID) {
         uint64_t ModuleBit = Stream.GetCurrentBitNo() - BCBegin * 8;
-        if (Stream.SkipBlock())
-          return error("Malformed block");
+        if (Error Err = Stream.SkipBlock())
+          return std::move(Err);
 
         F.Mods.push_back({Stream.getBitcodeBytes().slice(
                               BCBegin, Stream.getCurrentByteNo() - BCBegin),
@@ -5834,13 +6394,15 @@ llvm::getBitcodeFileContents(MemoryBufferRef Buffer) {
         continue;
       }
 
-      if (Stream.SkipBlock())
-        return error("Malformed block");
+      if (Error Err = Stream.SkipBlock())
+        return std::move(Err);
       continue;
     }
     case BitstreamEntry::Record:
-      Stream.skipRecord(Entry.ID);
-      continue;
+      if (Expected<unsigned> StreamFailed = Stream.skipRecord(Entry.ID))
+        continue;
+      else
+        return StreamFailed.takeError();
     }
   }
 }
@@ -5860,7 +6422,8 @@ BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll,
 
   std::string ProducerIdentification;
   if (IdentificationBit != -1ull) {
-    Stream.JumpToBit(IdentificationBit);
+    if (Error JumpFailed = Stream.JumpToBit(IdentificationBit))
+      return std::move(JumpFailed);
     Expected<std::string> ProducerIdentificationOrErr =
         readIdentificationBlock(Stream);
     if (!ProducerIdentificationOrErr)
@@ -5869,7 +6432,8 @@ BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll,
     ProducerIdentification = *ProducerIdentificationOrErr;
   }
 
-  Stream.JumpToBit(ModuleBit);
+  if (Error JumpFailed = Stream.JumpToBit(ModuleBit))
+    return std::move(JumpFailed);
   auto *R = new BitcodeReader(std::move(Stream), Strtab, ProducerIdentification,
                               Context);
 
@@ -5907,7 +6471,8 @@ BitcodeModule::getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata,
 Error BitcodeModule::readSummary(ModuleSummaryIndex &CombinedIndex,
                                  StringRef ModulePath, uint64_t ModuleId) {
   BitstreamCursor Stream(Buffer);
-  Stream.JumpToBit(ModuleBit);
+  if (Error JumpFailed = Stream.JumpToBit(ModuleBit))
+    return JumpFailed;
 
   ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, CombinedIndex,
                                     ModulePath, ModuleId);
@@ -5917,7 +6482,8 @@ Error BitcodeModule::readSummary(ModuleSummaryIndex &CombinedIndex,
 // Parse the specified bitcode buffer, returning the function info index.
 Expected<std::unique_ptr<ModuleSummaryIndex>> BitcodeModule::getSummary() {
   BitstreamCursor Stream(Buffer);
-  Stream.JumpToBit(ModuleBit);
+  if (Error JumpFailed = Stream.JumpToBit(ModuleBit))
+    return std::move(JumpFailed);
 
   auto Index = llvm::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
   ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, *Index,
@@ -5931,12 +6497,15 @@ Expected<std::unique_ptr<ModuleSummaryIndex>> BitcodeModule::getSummary() {
 
 static Expected<bool> getEnableSplitLTOUnitFlag(BitstreamCursor &Stream,
                                                 unsigned ID) {
-  if (Stream.EnterSubBlock(ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(ID))
+    return std::move(Err);
   SmallVector<uint64_t, 64> Record;
 
   while (true) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
@@ -5953,8 +6522,10 @@ static Expected<bool> getEnableSplitLTOUnitFlag(BitstreamCursor &Stream,
 
     // Look for the FS_FLAGS record.
     Record.clear();
-    auto BitCode = Stream.readRecord(Entry.ID, Record);
-    switch (BitCode) {
+    Expected<unsigned> MaybeBitCode = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeBitCode)
+      return MaybeBitCode.takeError();
+    switch (MaybeBitCode.get()) {
     default: // Default behavior: ignore.
       break;
     case bitc::FS_FLAGS: { // [flags]
@@ -5972,13 +6543,17 @@ static Expected<bool> getEnableSplitLTOUnitFlag(BitstreamCursor &Stream,
 // Check if the given bitcode buffer contains a global value summary block.
 Expected<BitcodeLTOInfo> BitcodeModule::getLTOInfo() {
   BitstreamCursor Stream(Buffer);
-  Stream.JumpToBit(ModuleBit);
+  if (Error JumpFailed = Stream.JumpToBit(ModuleBit))
+    return std::move(JumpFailed);
 
-  if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+    return std::move(Err);
 
   while (true) {
-    BitstreamEntry Entry = Stream.advance();
+    Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    llvm::BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::Error:
@@ -6007,13 +6582,15 @@ Expected<BitcodeLTOInfo> BitcodeModule::getLTOInfo() {
       }
 
       // Ignore other sub-blocks.
-      if (Stream.SkipBlock())
-        return error("Malformed block");
+      if (Error Err = Stream.SkipBlock())
+        return std::move(Err);
       continue;
 
     case BitstreamEntry::Record:
-      Stream.skipRecord(Entry.ID);
-      continue;
+      if (Expected<unsigned> StreamFailed = Stream.skipRecord(Entry.ID))
+        continue;
+      else
+        return StreamFailed.takeError();
     }
   }
 }
diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp
deleted file mode 100644
index 771cf3d927bc..000000000000
--- a/lib/Bitcode/Reader/BitstreamReader.cpp
+++ /dev/null
@@ -1,390 +0,0 @@
-//===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Bitcode/BitstreamReader.h"
-#include "llvm/ADT/StringRef.h"
-#include <cassert>
-#include <string>
-
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-//  BitstreamCursor implementation
-//===----------------------------------------------------------------------===//
-
-/// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
-/// the block, and return true if the block has an error.
-bool BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
-  // Save the current block's state on BlockScope.
-  BlockScope.push_back(Block(CurCodeSize));
-  BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
-
-  // Add the abbrevs specific to this block to the CurAbbrevs list.
-  if (BlockInfo) {
-    if (const BitstreamBlockInfo::BlockInfo *Info =
-            BlockInfo->getBlockInfo(BlockID)) {
-      CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(),
-                        Info->Abbrevs.end());
-    }
-  }
-
-  // Get the codesize of this block.
-  CurCodeSize = ReadVBR(bitc::CodeLenWidth);
-  // We can't read more than MaxChunkSize at a time
-  if (CurCodeSize > MaxChunkSize)
-    return true;
-
-  SkipToFourByteBoundary();
-  unsigned NumWords = Read(bitc::BlockSizeWidth);
-  if (NumWordsP) *NumWordsP = NumWords;
-
-  // Validate that this block is sane.
-  return CurCodeSize == 0 || AtEndOfStream();
-}
-
-static uint64_t readAbbreviatedField(BitstreamCursor &Cursor,
-                                     const BitCodeAbbrevOp &Op) {
-  assert(!Op.isLiteral() && "Not to be used with literals!");
-
-  // Decode the value as we are commanded.
-  switch (Op.getEncoding()) {
-  case BitCodeAbbrevOp::Array:
-  case BitCodeAbbrevOp::Blob:
-    llvm_unreachable("Should not reach here");
-  case BitCodeAbbrevOp::Fixed:
-    assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
-    return Cursor.Read((unsigned)Op.getEncodingData());
-  case BitCodeAbbrevOp::VBR:
-    assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
-    return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
-  case BitCodeAbbrevOp::Char6:
-    return BitCodeAbbrevOp::DecodeChar6(Cursor.Read(6));
-  }
-  llvm_unreachable("invalid abbreviation encoding");
-}
-
-static void skipAbbreviatedField(BitstreamCursor &Cursor,
-                                 const BitCodeAbbrevOp &Op) {
-  assert(!Op.isLiteral() && "Not to be used with literals!");
-
-  // Decode the value as we are commanded.
-  switch (Op.getEncoding()) {
-  case BitCodeAbbrevOp::Array:
-  case BitCodeAbbrevOp::Blob:
-    llvm_unreachable("Should not reach here");
-  case BitCodeAbbrevOp::Fixed:
-    assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
-    Cursor.Read((unsigned)Op.getEncodingData());
-    break;
-  case BitCodeAbbrevOp::VBR:
-    assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
-    Cursor.ReadVBR64((unsigned)Op.getEncodingData());
-    break;
-  case BitCodeAbbrevOp::Char6:
-    Cursor.Read(6);
-    break;
-  }
-}
-
-/// skipRecord - Read the current record and discard it.
-unsigned BitstreamCursor::skipRecord(unsigned AbbrevID) {
-  // Skip unabbreviated records by reading past their entries.
-  if (AbbrevID == bitc::UNABBREV_RECORD) {
-    unsigned Code = ReadVBR(6);
-    unsigned NumElts = ReadVBR(6);
-    for (unsigned i = 0; i != NumElts; ++i)
-      (void)ReadVBR64(6);
-    return Code;
-  }
-
-  const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
-  const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
-  unsigned Code;
-  if (CodeOp.isLiteral())
-    Code = CodeOp.getLiteralValue();
-  else {
-    if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
-        CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
-      report_fatal_error("Abbreviation starts with an Array or a Blob");
-    Code = readAbbreviatedField(*this, CodeOp);
-  }
-
-  for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
-    const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
-    if (Op.isLiteral())
-      continue;
-
-    if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
-        Op.getEncoding() != BitCodeAbbrevOp::Blob) {
-      skipAbbreviatedField(*this, Op);
-      continue;
-    }
-
-    if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
-      // Array case.  Read the number of elements as a vbr6.
-      unsigned NumElts = ReadVBR(6);
-
-      // Get the element encoding.
-      assert(i+2 == e && "array op not second to last?");
-      const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
-
-      // Read all the elements.
-      // Decode the value as we are commanded.
-      switch (EltEnc.getEncoding()) {
-      default:
-        report_fatal_error("Array element type can't be an Array or a Blob");
-      case BitCodeAbbrevOp::Fixed:
-        assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
-        JumpToBit(GetCurrentBitNo() + NumElts * EltEnc.getEncodingData());
-        break;
-      case BitCodeAbbrevOp::VBR:
-        assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
-        for (; NumElts; --NumElts)
-          ReadVBR64((unsigned)EltEnc.getEncodingData());
-        break;
-      case BitCodeAbbrevOp::Char6:
-        JumpToBit(GetCurrentBitNo() + NumElts * 6);
-        break;
-      }
-      continue;
-    }
-
-    assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
-    // Blob case.  Read the number of bytes as a vbr6.
-    unsigned NumElts = ReadVBR(6);
-    SkipToFourByteBoundary();  // 32-bit alignment
-
-    // Figure out where the end of this blob will be including tail padding.
-    size_t NewEnd = GetCurrentBitNo()+((NumElts+3)&~3)*8;
-
-    // If this would read off the end of the bitcode file, just set the
-    // record to empty and return.
-    if (!canSkipToPos(NewEnd/8)) {
-      skipToEnd();
-      break;
-    }
-
-    // Skip over the blob.
-    JumpToBit(NewEnd);
-  }
-  return Code;
-}
-
-unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
-                                     SmallVectorImpl<uint64_t> &Vals,
-                                     StringRef *Blob) {
-  if (AbbrevID == bitc::UNABBREV_RECORD) {
-    unsigned Code = ReadVBR(6);
-    unsigned NumElts = ReadVBR(6);
-    for (unsigned i = 0; i != NumElts; ++i)
-      Vals.push_back(ReadVBR64(6));
-    return Code;
-  }
-
-  const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
-
-  // Read the record code first.
-  assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?");
-  const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
-  unsigned Code;
-  if (CodeOp.isLiteral())
-    Code = CodeOp.getLiteralValue();
-  else {
-    if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
-        CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
-      report_fatal_error("Abbreviation starts with an Array or a Blob");
-    Code = readAbbreviatedField(*this, CodeOp);
-  }
-
-  for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
-    const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
-    if (Op.isLiteral()) {
-      Vals.push_back(Op.getLiteralValue());
-      continue;
-    }
-
-    if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
-        Op.getEncoding() != BitCodeAbbrevOp::Blob) {
-      Vals.push_back(readAbbreviatedField(*this, Op));
-      continue;
-    }
-
-    if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
-      // Array case.  Read the number of elements as a vbr6.
-      unsigned NumElts = ReadVBR(6);
-
-      // Get the element encoding.
-      if (i + 2 != e)
-        report_fatal_error("Array op not second to last");
-      const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
-      if (!EltEnc.isEncoding())
-        report_fatal_error(
-            "Array element type has to be an encoding of a type");
-
-      // Read all the elements.
-      switch (EltEnc.getEncoding()) {
-      default:
-        report_fatal_error("Array element type can't be an Array or a Blob");
-      case BitCodeAbbrevOp::Fixed:
-        for (; NumElts; --NumElts)
-          Vals.push_back(Read((unsigned)EltEnc.getEncodingData()));
-        break;
-      case BitCodeAbbrevOp::VBR:
-        for (; NumElts; --NumElts)
-          Vals.push_back(ReadVBR64((unsigned)EltEnc.getEncodingData()));
-        break;
-      case BitCodeAbbrevOp::Char6:
-        for (; NumElts; --NumElts)
-          Vals.push_back(BitCodeAbbrevOp::DecodeChar6(Read(6)));
-      }
-      continue;
-    }
-
-    assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
-    // Blob case.  Read the number of bytes as a vbr6.
-    unsigned NumElts = ReadVBR(6);
-    SkipToFourByteBoundary();  // 32-bit alignment
-
-    // Figure out where the end of this blob will be including tail padding.
-    size_t CurBitPos = GetCurrentBitNo();
-    size_t NewEnd = CurBitPos+((NumElts+3)&~3)*8;
-
-    // If this would read off the end of the bitcode file, just set the
-    // record to empty and return.
-    if (!canSkipToPos(NewEnd/8)) {
-      Vals.append(NumElts, 0);
-      skipToEnd();
-      break;
-    }
-
-    // Otherwise, inform the streamer that we need these bytes in memory.  Skip
-    // over tail padding first, in case jumping to NewEnd invalidates the Blob
-    // pointer.
-    JumpToBit(NewEnd);
-    const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts);
-
-    // If we can return a reference to the data, do so to avoid copying it.
-    if (Blob) {
-      *Blob = StringRef(Ptr, NumElts);
-    } else {
-      // Otherwise, unpack into Vals with zero extension.
-      for (; NumElts; --NumElts)
-        Vals.push_back((unsigned char)*Ptr++);
-    }
-  }
-
-  return Code;
-}
-
-void BitstreamCursor::ReadAbbrevRecord() {
-  auto Abbv = std::make_shared<BitCodeAbbrev>();
-  unsigned NumOpInfo = ReadVBR(5);
-  for (unsigned i = 0; i != NumOpInfo; ++i) {
-    bool IsLiteral = Read(1);
-    if (IsLiteral) {
-      Abbv->Add(BitCodeAbbrevOp(ReadVBR64(8)));
-      continue;
-    }
-
-    BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)Read(3);
-    if (BitCodeAbbrevOp::hasEncodingData(E)) {
-      uint64_t Data = ReadVBR64(5);
-
-      // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
-      // and vbr(0) as a literal zero.  This is decoded the same way, and avoids
-      // a slow path in Read() to have to handle reading zero bits.
-      if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
-          Data == 0) {
-        Abbv->Add(BitCodeAbbrevOp(0));
-        continue;
-      }
-
-      if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
-          Data > MaxChunkSize)
-        report_fatal_error(
-            "Fixed or VBR abbrev record with size > MaxChunkData");
-
-      Abbv->Add(BitCodeAbbrevOp(E, Data));
-    } else
-      Abbv->Add(BitCodeAbbrevOp(E));
-  }
-
-  if (Abbv->getNumOperandInfos() == 0)
-    report_fatal_error("Abbrev record with no operands");
-  CurAbbrevs.push_back(std::move(Abbv));
-}
-
-Optional<BitstreamBlockInfo>
-BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
-  if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return None;
-
-  BitstreamBlockInfo NewBlockInfo;
-
-  SmallVector<uint64_t, 64> Record;
-  BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr;
-
-  // Read all the records for this module.
-  while (true) {
-    BitstreamEntry Entry = advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
-
-    switch (Entry.Kind) {
-    case llvm::BitstreamEntry::SubBlock: // Handled for us already.
-    case llvm::BitstreamEntry::Error:
-      return None;
-    case llvm::BitstreamEntry::EndBlock:
-      return std::move(NewBlockInfo);
-    case llvm::BitstreamEntry::Record:
-      // The interesting case.
-      break;
-    }
-
-    // Read abbrev records, associate them with CurBID.
-    if (Entry.ID == bitc::DEFINE_ABBREV) {
-      if (!CurBlockInfo) return None;
-      ReadAbbrevRecord();
-
-      // ReadAbbrevRecord installs the abbrev in CurAbbrevs.  Move it to the
-      // appropriate BlockInfo.
-      CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back()));
-      CurAbbrevs.pop_back();
-      continue;
-    }
-
-    // Read a record.
-    Record.clear();
-    switch (readRecord(Entry.ID, Record)) {
-      default: break;  // Default behavior, ignore unknown content.
-      case bitc::BLOCKINFO_CODE_SETBID:
-        if (Record.size() < 1) return None;
-        CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
-        break;
-      case bitc::BLOCKINFO_CODE_BLOCKNAME: {
-        if (!CurBlockInfo) return None;
-        if (!ReadBlockInfoNames)
-          break; // Ignore name.
-        std::string Name;
-        for (unsigned i = 0, e = Record.size(); i != e; ++i)
-          Name += (char)Record[i];
-        CurBlockInfo->Name = Name;
-        break;
-      }
-      case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
-        if (!CurBlockInfo) return None;
-        if (!ReadBlockInfoNames)
-          break; // Ignore name.
-        std::string Name;
-        for (unsigned i = 1, e = Record.size(); i != e; ++i)
-          Name += (char)Record[i];
-        CurBlockInfo->RecordNames.push_back(std::make_pair((unsigned)Record[0],
-                                                           Name));
-        break;
-      }
-    }
-  }
-}
diff --git a/lib/Bitcode/Reader/MetadataLoader.cpp b/lib/Bitcode/Reader/MetadataLoader.cpp
index 3289aa0acddd..108f71189585 100644
--- a/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -1,9 +1,8 @@
 //===- MetadataLoader.cpp - Internal BitcodeReader implementation ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -23,7 +22,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/Bitstream/BitstreamReader.h"
 #include "llvm/Bitcode/LLVMBitCodes.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
@@ -104,7 +103,7 @@ static cl::opt<bool> DisableLazyLoading(
 
 namespace {
 
-static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; }
+static int64_t unrotateSign(uint64_t U) { return (U & 1) ? ~(U >> 1) : U >> 1; }
 
 class BitcodeReaderMetadataList {
   /// Array of metadata references.
@@ -131,8 +130,15 @@ class BitcodeReaderMetadataList {
 
   LLVMContext &Context;
 
+  /// Maximum number of valid references. Forward references exceeding the
+  /// maximum must be invalid.
+  unsigned RefsUpperBound;
+
 public:
-  BitcodeReaderMetadataList(LLVMContext &C) : Context(C) {}
+  BitcodeReaderMetadataList(LLVMContext &C, size_t RefsUpperBound)
+      : Context(C),
+        RefsUpperBound(std::min((size_t)std::numeric_limits<unsigned>::max(),
+                                RefsUpperBound)) {}
 
   // vector compatibility methods
   unsigned size() const { return MetadataPtrs.size(); }
@@ -219,6 +225,10 @@ void BitcodeReaderMetadataList::assignValue(Metadata *MD, unsigned Idx) {
 }
 
 Metadata *BitcodeReaderMetadataList::getMetadataFwdRef(unsigned Idx) {
+  // Bail out for a clearly invalid value.
+  if (Idx >= RefsUpperBound)
+    return nullptr;
+
   if (Idx >= size())
     resize(Idx + 1);
 
@@ -338,7 +348,7 @@ Metadata *BitcodeReaderMetadataList::resolveTypeRefArray(Metadata *MaybeTuple) {
   if (!Tuple || Tuple->isDistinct())
     return MaybeTuple;
 
-  // Look through the DITypeRefArray, upgrading each DITypeRef.
+  // Look through the DITypeRefArray, upgrading each DIType *.
   SmallVector<Metadata *, 32> Ops;
   Ops.reserve(Tuple->getNumOperands());
   for (Metadata *MD : Tuple->operands())
@@ -626,9 +636,10 @@ public:
                      BitcodeReaderValueList &ValueList,
                      std::function<Type *(unsigned)> getTypeByID,
                      bool IsImporting)
-      : MetadataList(TheModule.getContext()), ValueList(ValueList),
-        Stream(Stream), Context(TheModule.getContext()), TheModule(TheModule),
-        getTypeByID(std::move(getTypeByID)), IsImporting(IsImporting) {}
+      : MetadataList(TheModule.getContext(), Stream.SizeInBytes()),
+        ValueList(ValueList), Stream(Stream), Context(TheModule.getContext()),
+        TheModule(TheModule), getTypeByID(std::move(getTypeByID)),
+        IsImporting(IsImporting) {}
 
   Error parseMetadata(bool ModuleLevel);
 
@@ -675,8 +686,12 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
   SmallVector<uint64_t, 64> Record;
   // Get the abbrevs, and preload record positions to make them lazy-loadable.
   while (true) {
-    BitstreamEntry Entry = IndexCursor.advanceSkippingSubblocks(
+    Expected<BitstreamEntry> MaybeEntry = IndexCursor.advanceSkippingSubblocks(
         BitstreamCursor::AF_DontPopBlockAtEnd);
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
+
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
@@ -688,14 +703,22 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
       // The interesting case.
       ++NumMDRecordLoaded;
       uint64_t CurrentPos = IndexCursor.GetCurrentBitNo();
-      auto Code = IndexCursor.skipRecord(Entry.ID);
+      Expected<unsigned> MaybeCode = IndexCursor.skipRecord(Entry.ID);
+      if (!MaybeCode)
+        return MaybeCode.takeError();
+      unsigned Code = MaybeCode.get();
       switch (Code) {
       case bitc::METADATA_STRINGS: {
         // Rewind and parse the strings.
-        IndexCursor.JumpToBit(CurrentPos);
+        if (Error Err = IndexCursor.JumpToBit(CurrentPos))
+          return std::move(Err);
         StringRef Blob;
         Record.clear();
-        IndexCursor.readRecord(Entry.ID, Record, &Blob);
+        if (Expected<unsigned> MaybeRecord =
+                IndexCursor.readRecord(Entry.ID, Record, &Blob))
+          ;
+        else
+          return MaybeRecord.takeError();
         unsigned NumStrings = Record[0];
         MDStringRef.reserve(NumStrings);
         auto IndexNextMDString = [&](StringRef Str) {
@@ -708,26 +731,37 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
       case bitc::METADATA_INDEX_OFFSET: {
         // This is the offset to the index, when we see this we skip all the
         // records and load only an index to these.
-        IndexCursor.JumpToBit(CurrentPos);
+        if (Error Err = IndexCursor.JumpToBit(CurrentPos))
+          return std::move(Err);
         Record.clear();
-        IndexCursor.readRecord(Entry.ID, Record);
+        if (Expected<unsigned> MaybeRecord =
+                IndexCursor.readRecord(Entry.ID, Record))
+          ;
+        else
+          return MaybeRecord.takeError();
         if (Record.size() != 2)
           return error("Invalid record");
         auto Offset = Record[0] + (Record[1] << 32);
         auto BeginPos = IndexCursor.GetCurrentBitNo();
-        IndexCursor.JumpToBit(BeginPos + Offset);
-        Entry = IndexCursor.advanceSkippingSubblocks(
-            BitstreamCursor::AF_DontPopBlockAtEnd);
+        if (Error Err = IndexCursor.JumpToBit(BeginPos + Offset))
+          return std::move(Err);
+        Expected<BitstreamEntry> MaybeEntry =
+            IndexCursor.advanceSkippingSubblocks(
+                BitstreamCursor::AF_DontPopBlockAtEnd);
+        if (!MaybeEntry)
+          return MaybeEntry.takeError();
+        Entry = MaybeEntry.get();
         assert(Entry.Kind == BitstreamEntry::Record &&
                "Corrupted bitcode: Expected `Record` when trying to find the "
                "Metadata index");
         Record.clear();
-        auto Code = IndexCursor.readRecord(Entry.ID, Record);
-        (void)Code;
-        assert(Code == bitc::METADATA_INDEX && "Corrupted bitcode: Expected "
-                                               "`METADATA_INDEX` when trying "
-                                               "to find the Metadata index");
-
+        if (Expected<unsigned> MaybeCode =
+                IndexCursor.readRecord(Entry.ID, Record))
+          assert(MaybeCode.get() == bitc::METADATA_INDEX &&
+                 "Corrupted bitcode: Expected `METADATA_INDEX` when trying to "
+                 "find the Metadata index");
+        else
+          return MaybeCode.takeError();
         // Delta unpack
         auto CurrentValue = BeginPos;
         GlobalMetadataBitPosIndex.reserve(Record.size());
@@ -743,21 +777,33 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
         return error("Corrupted Metadata block");
       case bitc::METADATA_NAME: {
         // Named metadata need to be materialized now and aren't deferred.
-        IndexCursor.JumpToBit(CurrentPos);
+        if (Error Err = IndexCursor.JumpToBit(CurrentPos))
+          return std::move(Err);
         Record.clear();
-        unsigned Code = IndexCursor.readRecord(Entry.ID, Record);
-        assert(Code == bitc::METADATA_NAME);
+
+        unsigned Code;
+        if (Expected<unsigned> MaybeCode =
+                IndexCursor.readRecord(Entry.ID, Record)) {
+          Code = MaybeCode.get();
+          assert(Code == bitc::METADATA_NAME);
+        } else
+          return MaybeCode.takeError();
 
         // Read name of the named metadata.
         SmallString<8> Name(Record.begin(), Record.end());
-        Code = IndexCursor.ReadCode();
+        if (Expected<unsigned> MaybeCode = IndexCursor.ReadCode())
+          Code = MaybeCode.get();
+        else
+          return MaybeCode.takeError();
 
         // Named Metadata comes in two parts, we expect the name to be followed
         // by the node
         Record.clear();
-        unsigned NextBitCode = IndexCursor.readRecord(Code, Record);
-        assert(NextBitCode == bitc::METADATA_NAMED_NODE);
-        (void)NextBitCode;
+        if (Expected<unsigned> MaybeNextBitCode =
+                IndexCursor.readRecord(Code, Record))
+          assert(MaybeNextBitCode.get() == bitc::METADATA_NAMED_NODE);
+        else
+          return MaybeNextBitCode.takeError();
 
         // Read named metadata elements.
         unsigned Size = Record.size();
@@ -776,9 +822,14 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
       case bitc::METADATA_GLOBAL_DECL_ATTACHMENT: {
         // FIXME: we need to do this early because we don't materialize global
         // value explicitly.
-        IndexCursor.JumpToBit(CurrentPos);
+        if (Error Err = IndexCursor.JumpToBit(CurrentPos))
+          return std::move(Err);
         Record.clear();
-        IndexCursor.readRecord(Entry.ID, Record);
+        if (Expected<unsigned> MaybeRecord =
+                IndexCursor.readRecord(Entry.ID, Record))
+          ;
+        else
+          return MaybeRecord.takeError();
         if (Record.size() % 2 == 0)
           return error("Invalid record");
         unsigned ValueID = Record[0];
@@ -812,6 +863,7 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
       case bitc::METADATA_LEXICAL_BLOCK:
       case bitc::METADATA_LEXICAL_BLOCK_FILE:
       case bitc::METADATA_NAMESPACE:
+      case bitc::METADATA_COMMON_BLOCK:
       case bitc::METADATA_MACRO:
       case bitc::METADATA_MACRO_FILE:
       case bitc::METADATA_TEMPLATE_TYPE:
@@ -845,8 +897,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
   // skip the whole block in case we lazy-load.
   auto EntryPos = Stream.GetCurrentBitNo();
 
-  if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
+    return Err;
 
   SmallVector<uint64_t, 64> Record;
   PlaceholderQueue Placeholders;
@@ -871,9 +923,14 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
       // Return at the beginning of the block, since it is easy to skip it
       // entirely from there.
       Stream.ReadBlockEnd(); // Pop the abbrev block context.
-      Stream.JumpToBit(EntryPos);
-      if (Stream.SkipBlock())
-        return error("Invalid record");
+      if (Error Err = IndexCursor.JumpToBit(EntryPos))
+        return Err;
+      if (Error Err = Stream.SkipBlock()) {
+        // FIXME this drops the error on the floor, which
+        // ThinLTO/X86/debuginfo-cu-import.ll relies on.
+        consumeError(std::move(Err));
+        return Error::success();
+      }
       return Error::success();
     }
     // Couldn't load an index, fallback to loading all the block "old-style".
@@ -883,7 +940,10 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
 
   // Read all the records.
   while (true) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
@@ -902,10 +962,13 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
     Record.clear();
     StringRef Blob;
     ++NumMDRecordLoaded;
-    unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
-    if (Error Err =
-            parseOneMetadata(Record, Code, Placeholders, Blob, NextMetadataNo))
-      return Err;
+    if (Expected<unsigned> MaybeCode =
+            Stream.readRecord(Entry.ID, Record, &Blob)) {
+      if (Error Err = parseOneMetadata(Record, MaybeCode.get(), Placeholders,
+                                       Blob, NextMetadataNo))
+        return Err;
+    } else
+      return MaybeCode.takeError();
   }
 }
 
@@ -930,12 +993,25 @@ void MetadataLoader::MetadataLoaderImpl::lazyLoadOneMetadata(
   }
   SmallVector<uint64_t, 64> Record;
   StringRef Blob;
-  IndexCursor.JumpToBit(GlobalMetadataBitPosIndex[ID - MDStringRef.size()]);
-  auto Entry = IndexCursor.advanceSkippingSubblocks();
+  if (Error Err = IndexCursor.JumpToBit(
+          GlobalMetadataBitPosIndex[ID - MDStringRef.size()]))
+    report_fatal_error("lazyLoadOneMetadata failed jumping: " +
+                       toString(std::move(Err)));
+  Expected<BitstreamEntry> MaybeEntry = IndexCursor.advanceSkippingSubblocks();
+  if (!MaybeEntry)
+    // FIXME this drops the error on the floor.
+    report_fatal_error("lazyLoadOneMetadata failed advanceSkippingSubblocks: " +
+                       toString(MaybeEntry.takeError()));
+  BitstreamEntry Entry = MaybeEntry.get();
   ++NumMDRecordLoaded;
-  unsigned Code = IndexCursor.readRecord(Entry.ID, Record, &Blob);
-  if (Error Err = parseOneMetadata(Record, Code, Placeholders, Blob, ID))
-    report_fatal_error("Can't lazyload MD");
+  if (Expected<unsigned> MaybeCode =
+          IndexCursor.readRecord(Entry.ID, Record, &Blob)) {
+    if (Error Err =
+            parseOneMetadata(Record, MaybeCode.get(), Placeholders, Blob, ID))
+      report_fatal_error("Can't lazyload MD, parseOneMetadata: " +
+                         toString(std::move(Err)));
+  } else
+    report_fatal_error("Can't lazyload MD: " + toString(MaybeCode.takeError()));
 }
 
 /// Ensure that all forward-references and placeholders are resolved.
@@ -1032,12 +1108,17 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     // Read name of the named metadata.
     SmallString<8> Name(Record.begin(), Record.end());
     Record.clear();
-    Code = Stream.ReadCode();
+    Expected<unsigned> MaybeCode = Stream.ReadCode();
+    if (!MaybeCode)
+      return MaybeCode.takeError();
+    Code = MaybeCode.get();
 
     ++NumMDRecordLoaded;
-    unsigned NextBitCode = Stream.readRecord(Code, Record);
-    if (NextBitCode != bitc::METADATA_NAMED_NODE)
-      return error("METADATA_NAME not followed by METADATA_NAMED_NODE");
+    if (Expected<unsigned> MaybeNextBitCode = Stream.readRecord(Code, Record)) {
+      if (MaybeNextBitCode.get() != bitc::METADATA_NAMED_NODE)
+        return error("METADATA_NAME not followed by METADATA_NAMED_NODE");
+    } else
+      return MaybeNextBitCode.takeError();
 
     // Read named metadata elements.
     unsigned Size = Record.size();
@@ -1407,12 +1488,33 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
       return error("Invalid record");
 
     bool HasSPFlags = Record[0] & 4;
-    DISubprogram::DISPFlags SPFlags =
-        HasSPFlags
-            ? static_cast<DISubprogram::DISPFlags>(Record[9])
-            : DISubprogram::toSPFlags(
-                  /*IsLocalToUnit=*/Record[7], /*IsDefinition=*/Record[8],
-                  /*IsOptimized=*/Record[14], /*Virtuality=*/Record[11]);
+
+    DINode::DIFlags Flags;
+    DISubprogram::DISPFlags SPFlags;
+    if (!HasSPFlags)
+      Flags = static_cast<DINode::DIFlags>(Record[11 + 2]);
+    else {
+      Flags = static_cast<DINode::DIFlags>(Record[11]);
+      SPFlags = static_cast<DISubprogram::DISPFlags>(Record[9]);
+    }
+
+    // Support for old metadata when
+    // subprogram specific flags are placed in DIFlags.
+    const unsigned DIFlagMainSubprogram = 1 << 21;
+    bool HasOldMainSubprogramFlag = Flags & DIFlagMainSubprogram;
+    if (HasOldMainSubprogramFlag)
+      // Remove old DIFlagMainSubprogram from DIFlags.
+      // Note: This assumes that any future use of bit 21 defaults to it
+      // being 0.
+      Flags &= ~static_cast<DINode::DIFlags>(DIFlagMainSubprogram);
+
+    if (HasOldMainSubprogramFlag && HasSPFlags)
+      SPFlags |= DISubprogram::SPFlagMainSubprogram;
+    else if (!HasSPFlags)
+      SPFlags = DISubprogram::toSPFlags(
+                    /*IsLocalToUnit=*/Record[7], /*IsDefinition=*/Record[8],
+                    /*IsOptimized=*/Record[14], /*Virtuality=*/Record[11],
+                    /*DIFlagMainSubprogram*/HasOldMainSubprogramFlag);
 
     // All definitions should be distinct.
     IsDistinct = (Record[0] & 1) || (SPFlags & DISubprogram::SPFlagDefinition);
@@ -1456,7 +1558,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
          getDITypeRefOrNull(Record[8 + OffsetA]),           // containingType
          Record[10 + OffsetA],                              // virtualIndex
          HasThisAdj ? Record[16 + OffsetB] : 0,             // thisAdjustment
-         static_cast<DINode::DIFlags>(Record[11 + OffsetA]),// flags
+         Flags,                                             // flags
          SPFlags,                                           // SPFlags
          HasUnit ? CUorFn : nullptr,                        // unit
          getMDOrNull(Record[13 + OffsetB]),                 // templateParams
@@ -1508,6 +1610,17 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     NextMetadataNo++;
     break;
   }
+  case bitc::METADATA_COMMON_BLOCK: {
+    IsDistinct = Record[0] & 1;
+    MetadataList.assignValue(
+        GET_OR_DISTINCT(DICommonBlock,
+                        (Context, getMDOrNull(Record[1]),
+                         getMDOrNull(Record[2]), getMDString(Record[3]),
+                         getMDOrNull(Record[4]), Record[5])),
+        NextMetadataNo);
+    NextMetadataNo++;
+    break;
+  }
   case bitc::METADATA_NAMESPACE: {
     // Newer versions of DINamespace dropped file and line.
     MDString *Name;
@@ -1831,7 +1944,10 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataStrings(
     if (R.AtEndOfStream())
       return error("Invalid record: metadata strings bad length");
 
-    unsigned Size = R.ReadVBR(6);
+    Expected<uint32_t> MaybeSize = R.ReadVBR(6);
+    if (!MaybeSize)
+      return MaybeSize.takeError();
+    uint32_t Size = MaybeSize.get();
     if (Strings.size() < Size)
       return error("Invalid record: metadata strings truncated chars");
 
@@ -1860,14 +1976,17 @@ Error MetadataLoader::MetadataLoaderImpl::parseGlobalObjectAttachment(
 /// Parse metadata attachments.
 Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
     Function &F, const SmallVectorImpl<Instruction *> &InstructionList) {
-  if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
+    return Err;
 
   SmallVector<uint64_t, 64> Record;
   PlaceholderQueue Placeholders;
 
   while (true) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
@@ -1884,7 +2003,10 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
     // Read a metadata attachment record.
     Record.clear();
     ++NumMDRecordLoaded;
-    switch (Stream.readRecord(Entry.ID, Record)) {
+    Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeRecord)
+      return MaybeRecord.takeError();
+    switch (MaybeRecord.get()) {
     default: // Default behavior: ignore.
       break;
     case bitc::METADATA_ATTACHMENT: {
@@ -1958,14 +2080,17 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataKindRecord(
 
 /// Parse the metadata kinds out of the METADATA_KIND_BLOCK.
 Error MetadataLoader::MetadataLoaderImpl::parseMetadataKinds() {
-  if (Stream.EnterSubBlock(bitc::METADATA_KIND_BLOCK_ID))
-    return error("Invalid record");
+  if (Error Err = Stream.EnterSubBlock(bitc::METADATA_KIND_BLOCK_ID))
+    return Err;
 
   SmallVector<uint64_t, 64> Record;
 
   // Read all the records.
   while (true) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
     case BitstreamEntry::SubBlock: // Handled for us already.
@@ -1981,8 +2106,10 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataKinds() {
     // Read a record.
     Record.clear();
     ++NumMDRecordLoaded;
-    unsigned Code = Stream.readRecord(Entry.ID, Record);
-    switch (Code) {
+    Expected<unsigned> MaybeCode = Stream.readRecord(Entry.ID, Record);
+    if (!MaybeCode)
+      return MaybeCode.takeError();
+    switch (MaybeCode.get()) {
     default: // Default behavior: ignore.
       break;
     case bitc::METADATA_KIND: {
diff --git a/lib/Bitcode/Reader/MetadataLoader.h b/lib/Bitcode/Reader/MetadataLoader.h
index 07a77a086f32..fe2b20273249 100644
--- a/lib/Bitcode/Reader/MetadataLoader.h
+++ b/lib/Bitcode/Reader/MetadataLoader.h
@@ -1,9 +1,8 @@
 //===-- Bitcode/Reader/MetadataLoader.h - Load Metadatas -------*- C++ -*-====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Bitcode/Reader/ValueList.cpp b/lib/Bitcode/Reader/ValueList.cpp
index b3945a37408f..431995fd40ac 100644
--- a/lib/Bitcode/Reader/ValueList.cpp
+++ b/lib/Bitcode/Reader/ValueList.cpp
@@ -1,9 +1,8 @@
 //===- ValueList.cpp - Internal BitcodeReader implementation --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -66,15 +65,18 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantPlaceHolder, Value)
 
 } // end namespace llvm
 
-void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx) {
+void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx, Type *FullTy) {
   if (Idx == size()) {
-    push_back(V);
+    push_back(V, FullTy);
     return;
   }
 
   if (Idx >= size())
     resize(Idx + 1);
 
+  assert(FullTypes[Idx] == nullptr || FullTypes[Idx] == FullTy);
+  FullTypes[Idx] = FullTy;
+
   WeakTrackingVH &OldV = ValuePtrs[Idx];
   if (!OldV) {
     OldV = V;
@@ -95,6 +97,10 @@ void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx) {
 }
 
 Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx, Type *Ty) {
+  // Bail out for a clearly invalid value.
+  if (Idx >= RefsUpperBound)
+    return nullptr;
+
   if (Idx >= size())
     resize(Idx + 1);
 
@@ -110,9 +116,10 @@ Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx, Type *Ty) {
   return C;
 }
 
-Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty) {
-  // Bail out for a clearly invalid value. This would make us call resize(0)
-  if (Idx == std::numeric_limits<unsigned>::max())
+Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty,
+                                              Type **FullTy) {
+  // Bail out for a clearly invalid value.
+  if (Idx >= RefsUpperBound)
     return nullptr;
 
   if (Idx >= size())
@@ -122,6 +129,8 @@ Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty) {
     // If the types don't match, it's invalid.
     if (Ty && Ty != V->getType())
       return nullptr;
+    if (FullTy)
+      *FullTy = FullTypes[Idx];
     return V;
   }
 
@@ -181,8 +190,8 @@ void BitcodeReaderValueList::resolveConstantForwardRefs() {
           NewOp = RealVal;
         } else {
           // Otherwise, look up the placeholder in ResolveConstants.
-          ResolveConstantsTy::iterator It = std::lower_bound(
-              ResolveConstants.begin(), ResolveConstants.end(),
+          ResolveConstantsTy::iterator It = llvm::lower_bound(
+              ResolveConstants,
               std::pair<Constant *, unsigned>(cast<Constant>(*I), 0));
           assert(It != ResolveConstants.end() && It->first == *I);
           NewOp = operator[](It->second);
diff --git a/lib/Bitcode/Reader/ValueList.h b/lib/Bitcode/Reader/ValueList.h
index 5ad7899347ad..49900498c294 100644
--- a/lib/Bitcode/Reader/ValueList.h
+++ b/lib/Bitcode/Reader/ValueList.h
@@ -1,9 +1,8 @@
 //===-- Bitcode/Reader/ValueList.h - Number values --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -29,6 +28,13 @@ class Value;
 class BitcodeReaderValueList {
   std::vector<WeakTrackingVH> ValuePtrs;
 
+  /// Struct containing fully-specified copies of the type of each
+  /// value. When pointers are opaque, this will be contain non-opaque
+  /// variants so that restructuring instructions can determine their
+  /// type correctly even if being loaded from old bitcode where some
+  /// types are implicit.
+  std::vector<Type *> FullTypes;
+
   /// As we resolve forward-referenced constants, we add information about them
   /// to this vector.  This allows us to resolve them in bulk instead of
   /// resolving each reference at a time.  See the code in
@@ -40,8 +46,15 @@ class BitcodeReaderValueList {
   ResolveConstantsTy ResolveConstants;
   LLVMContext &Context;
 
+  /// Maximum number of valid references. Forward references exceeding the
+  /// maximum must be invalid.
+  unsigned RefsUpperBound;
+
 public:
-  BitcodeReaderValueList(LLVMContext &C) : Context(C) {}
+  BitcodeReaderValueList(LLVMContext &C, size_t RefsUpperBound)
+      : Context(C),
+        RefsUpperBound(std::min((size_t)std::numeric_limits<unsigned>::max(),
+                                RefsUpperBound)) {}
 
   ~BitcodeReaderValueList() {
     assert(ResolveConstants.empty() && "Constants not resolved?");
@@ -49,12 +62,19 @@ public:
 
   // vector compatibility methods
   unsigned size() const { return ValuePtrs.size(); }
-  void resize(unsigned N) { ValuePtrs.resize(N); }
-  void push_back(Value *V) { ValuePtrs.emplace_back(V); }
+  void resize(unsigned N) {
+    ValuePtrs.resize(N);
+    FullTypes.resize(N);
+  }
+  void push_back(Value *V, Type *Ty) {
+    ValuePtrs.emplace_back(V);
+    FullTypes.emplace_back(Ty);
+  }
 
   void clear() {
     assert(ResolveConstants.empty() && "Constants not resolved?");
     ValuePtrs.clear();
+    FullTypes.clear();
   }
 
   Value *operator[](unsigned i) const {
@@ -63,18 +83,22 @@ public:
   }
 
   Value *back() const { return ValuePtrs.back(); }
-  void pop_back() { ValuePtrs.pop_back(); }
+  void pop_back() {
+    ValuePtrs.pop_back();
+    FullTypes.pop_back();
+  }
   bool empty() const { return ValuePtrs.empty(); }
 
   void shrinkTo(unsigned N) {
     assert(N <= size() && "Invalid shrinkTo request!");
     ValuePtrs.resize(N);
+    FullTypes.resize(N);
   }
 
   Constant *getConstantFwdRef(unsigned Idx, Type *Ty);
-  Value *getValueFwdRef(unsigned Idx, Type *Ty);
+  Value *getValueFwdRef(unsigned Idx, Type *Ty, Type **FullTy = nullptr);
 
-  void assignValue(Value *V, unsigned Idx);
+  void assignValue(Value *V, unsigned Idx, Type *FullTy);
 
   /// Once all constants are read, this method bulk resolves any forward
   /// references.
diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp
index 763cd12aa2d7..76ca89147e52 100644
--- a/lib/Bitcode/Writer/BitWriter.cpp
+++ b/lib/Bitcode/Writer/BitWriter.cpp
@@ -1,9 +1,8 @@
 //===-- BitWriter.cpp -----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index ba4f932e2e6d..5c7b970a3a75 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1,9 +1,8 @@
 //===- Bitcode/Writer/BitcodeWriter.cpp - Bitcode Writer ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -25,8 +24,8 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/Bitcode/BitCodes.h"
-#include "llvm/Bitcode/BitstreamWriter.h"
+#include "llvm/Bitstream/BitCodes.h"
+#include "llvm/Bitstream/BitstreamWriter.h"
 #include "llvm/Bitcode/LLVMBitCodes.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/Attributes.h"
@@ -215,7 +214,8 @@ private:
                                            const Function &F);
   void writeModuleLevelReferences(const GlobalVariable &V,
                                   SmallVector<uint64_t, 64> &NameVals,
-                                  unsigned FSModRefsAbbrev);
+                                  unsigned FSModRefsAbbrev,
+                                  unsigned FSModVTableRefsAbbrev);
 
   void assignValueId(GlobalValue::GUID ValGUID) {
     GUIDToValueIdMap[ValGUID] = ++GlobalValueId;
@@ -318,6 +318,8 @@ private:
   void writeDILexicalBlockFile(const DILexicalBlockFile *N,
                                SmallVectorImpl<uint64_t> &Record,
                                unsigned Abbrev);
+  void writeDICommonBlock(const DICommonBlock *N,
+                          SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
   void writeDINamespace(const DINamespace *N, SmallVectorImpl<uint64_t> &Record,
                         unsigned Abbrev);
   void writeDIMacro(const DIMacro *N, SmallVectorImpl<uint64_t> &Record,
@@ -560,6 +562,8 @@ static unsigned getEncodedRMWOperation(AtomicRMWInst::BinOp Op) {
   case AtomicRMWInst::Min: return bitc::RMW_MIN;
   case AtomicRMWInst::UMax: return bitc::RMW_UMAX;
   case AtomicRMWInst::UMin: return bitc::RMW_UMIN;
+  case AtomicRMWInst::FAdd: return bitc::RMW_FADD;
+  case AtomicRMWInst::FSub: return bitc::RMW_FSUB;
   }
 }
 
@@ -635,6 +639,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
     return bitc::ATTR_KIND_NO_CAPTURE;
   case Attribute::NoDuplicate:
     return bitc::ATTR_KIND_NO_DUPLICATE;
+  case Attribute::NoFree:
+    return bitc::ATTR_KIND_NOFREE;
   case Attribute::NoImplicitFloat:
     return bitc::ATTR_KIND_NO_IMPLICIT_FLOAT;
   case Attribute::NoInline:
@@ -653,6 +659,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
     return bitc::ATTR_KIND_NO_RED_ZONE;
   case Attribute::NoReturn:
     return bitc::ATTR_KIND_NO_RETURN;
+  case Attribute::NoSync:
+    return bitc::ATTR_KIND_NOSYNC;
   case Attribute::NoCfCheck:
     return bitc::ATTR_KIND_NOCF_CHECK;
   case Attribute::NoUnwind:
@@ -707,10 +715,16 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
     return bitc::ATTR_KIND_SWIFT_SELF;
   case Attribute::UWTable:
     return bitc::ATTR_KIND_UW_TABLE;
+  case Attribute::WillReturn:
+    return bitc::ATTR_KIND_WILLRETURN;
   case Attribute::WriteOnly:
     return bitc::ATTR_KIND_WRITEONLY;
   case Attribute::ZExt:
     return bitc::ATTR_KIND_Z_EXT;
+  case Attribute::ImmArg:
+    return bitc::ATTR_KIND_IMMARG;
+  case Attribute::SanitizeMemTag:
+    return bitc::ATTR_KIND_SANITIZE_MEMTAG;
   case Attribute::EndAttrKinds:
     llvm_unreachable("Can not encode end-attribute kinds marker.");
   case Attribute::None:
@@ -742,7 +756,7 @@ void ModuleBitcodeWriter::writeAttributeGroupTable() {
         Record.push_back(1);
         Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
         Record.push_back(Attr.getValueAsInt());
-      } else {
+      } else if (Attr.isStringAttribute()) {
         StringRef Kind = Attr.getKindAsString();
         StringRef Val = Attr.getValueAsString();
 
@@ -753,6 +767,13 @@ void ModuleBitcodeWriter::writeAttributeGroupTable() {
           Record.append(Val.begin(), Val.end());
           Record.push_back(0);
         }
+      } else {
+        assert(Attr.isTypeAttribute());
+        Type *Ty = Attr.getValueAsType();
+        Record.push_back(Ty ? 6 : 5);
+        Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
+        if (Ty)
+          Record.push_back(VE.getTypeID(Attr.getValueAsType()));
       }
     }
 
@@ -926,10 +947,13 @@ void ModuleBitcodeWriter::writeTypeTable() {
     }
     case Type::VectorTyID: {
       VectorType *VT = cast<VectorType>(T);
-      // VECTOR [numelts, eltty]
+      // VECTOR [numelts, eltty] or
+      //        [numelts, eltty, scalable]
       Code = bitc::TYPE_CODE_VECTOR;
       TypeVals.push_back(VT->getNumElements());
       TypeVals.push_back(VE.getTypeID(VT->getElementType()));
+      if (VT->isScalable())
+        TypeVals.push_back(VT->isScalable());
       break;
     }
     }
@@ -991,6 +1015,7 @@ static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) {
   RawFlags |= Flags.NotEligibleToImport; // bool
   RawFlags |= (Flags.Live << 1);
   RawFlags |= (Flags.DSOLocal << 2);
+  RawFlags |= (Flags.CanAutoHide << 3);
 
   // Linkage don't need to be remapped at that time for the summary. Any future
   // change to the getEncodedLinkage() function will need to be taken into
@@ -1001,7 +1026,7 @@ static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) {
 }
 
 static uint64_t getEncodedGVarFlags(GlobalVarSummary::GVarFlags Flags) {
-  uint64_t RawFlags = Flags.ReadOnly;
+  uint64_t RawFlags = Flags.MaybeReadOnly | (Flags.MaybeWriteOnly << 1);
   return RawFlags;
 }
 
@@ -1256,7 +1281,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
         GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass ||
         GV.hasComdat() ||
         GV.hasAttributes() ||
-        GV.isDSOLocal()) {
+        GV.isDSOLocal() ||
+        GV.hasPartition()) {
       Vals.push_back(getEncodedVisibility(GV));
       Vals.push_back(getEncodedThreadLocalMode(GV));
       Vals.push_back(getEncodedUnnamedAddr(GV));
@@ -1268,6 +1294,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
       Vals.push_back(VE.getAttributeListID(AL));
 
       Vals.push_back(GV.isDSOLocal());
+      Vals.push_back(addToStrtab(GV.getPartition()));
+      Vals.push_back(GV.getPartition().size());
     } else {
       AbbrevToUse = SimpleGVarAbbrev;
     }
@@ -1305,6 +1333,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
 
     Vals.push_back(F.isDSOLocal());
     Vals.push_back(F.getAddressSpace());
+    Vals.push_back(addToStrtab(F.getPartition()));
+    Vals.push_back(F.getPartition().size());
 
     unsigned AbbrevToUse = 0;
     Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse);
@@ -1327,6 +1357,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
     Vals.push_back(getEncodedThreadLocalMode(A));
     Vals.push_back(getEncodedUnnamedAddr(A));
     Vals.push_back(A.isDSOLocal());
+    Vals.push_back(addToStrtab(A.getPartition()));
+    Vals.push_back(A.getPartition().size());
 
     unsigned AbbrevToUse = 0;
     Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse);
@@ -1345,6 +1377,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
     Vals.push_back(getEncodedLinkage(I));
     Vals.push_back(getEncodedVisibility(I));
     Vals.push_back(I.isDSOLocal());
+    Vals.push_back(addToStrtab(I.getPartition()));
+    Vals.push_back(I.getPartition().size());
     Stream.EmitRecord(bitc::MODULE_CODE_IFUNC, Vals);
     Vals.clear();
   }
@@ -1683,6 +1717,20 @@ void ModuleBitcodeWriter::writeDILexicalBlockFile(
   Record.clear();
 }
 
+void ModuleBitcodeWriter::writeDICommonBlock(const DICommonBlock *N,
+                                             SmallVectorImpl<uint64_t> &Record,
+                                             unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(VE.getMetadataOrNullID(N->getScope()));
+  Record.push_back(VE.getMetadataOrNullID(N->getDecl()));
+  Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+  Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+  Record.push_back(N->getLineNo());
+
+  Stream.EmitRecord(bitc::METADATA_COMMON_BLOCK, Record, Abbrev);
+  Record.clear();
+}
+
 void ModuleBitcodeWriter::writeDINamespace(const DINamespace *N,
                                            SmallVectorImpl<uint64_t> &Record,
                                            unsigned Abbrev) {
@@ -2616,12 +2664,16 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
     Vals.append(IVI->idx_begin(), IVI->idx_end());
     break;
   }
-  case Instruction::Select:
+  case Instruction::Select: {
     Code = bitc::FUNC_CODE_INST_VSELECT;
     pushValueAndType(I.getOperand(1), InstID, Vals);
     pushValue(I.getOperand(2), InstID, Vals);
     pushValueAndType(I.getOperand(0), InstID, Vals);
+    uint64_t Flags = getOptimizationFlags(&I);
+    if (Flags != 0)
+      Vals.push_back(Flags);
     break;
+  }
   case Instruction::ExtractElement:
     Code = bitc::FUNC_CODE_INST_EXTRACTELT;
     pushValueAndType(I.getOperand(0), InstID, Vals);
@@ -2776,6 +2828,41 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
       Vals.push_back(VE.getValueID(CatchSwitch.getUnwindDest()));
     break;
   }
+  case Instruction::CallBr: {
+    const CallBrInst *CBI = cast<CallBrInst>(&I);
+    const Value *Callee = CBI->getCalledValue();
+    FunctionType *FTy = CBI->getFunctionType();
+
+    if (CBI->hasOperandBundles())
+      writeOperandBundles(CBI, InstID);
+
+    Code = bitc::FUNC_CODE_INST_CALLBR;
+
+    Vals.push_back(VE.getAttributeListID(CBI->getAttributes()));
+
+    Vals.push_back(CBI->getCallingConv() << bitc::CALL_CCONV |
+                   1 << bitc::CALL_EXPLICIT_TYPE);
+
+    Vals.push_back(VE.getValueID(CBI->getDefaultDest()));
+    Vals.push_back(CBI->getNumIndirectDests());
+    for (unsigned i = 0, e = CBI->getNumIndirectDests(); i != e; ++i)
+      Vals.push_back(VE.getValueID(CBI->getIndirectDest(i)));
+
+    Vals.push_back(VE.getTypeID(FTy));
+    pushValueAndType(Callee, InstID, Vals);
+
+    // Emit value #'s for the fixed parameters.
+    for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+      pushValue(I.getOperand(i), InstID, Vals); // fixed param.
+
+    // Emit type/value pairs for varargs params.
+    if (FTy->isVarArg()) {
+      for (unsigned i = FTy->getNumParams(), e = CBI->getNumArgOperands();
+           i != e; ++i)
+        pushValueAndType(I.getOperand(i), InstID, Vals); // vararg
+    }
+    break;
+  }
   case Instruction::Unreachable:
     Code = bitc::FUNC_CODE_INST_UNREACHABLE;
     AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV;
@@ -3528,6 +3615,19 @@ static void writeTypeIdSummaryRecord(SmallVector<uint64_t, 64> &NameVals,
                                       W.second);
 }
 
+static void writeTypeIdCompatibleVtableSummaryRecord(
+    SmallVector<uint64_t, 64> &NameVals, StringTableBuilder &StrtabBuilder,
+    const std::string &Id, const TypeIdCompatibleVtableInfo &Summary,
+    ValueEnumerator &VE) {
+  NameVals.push_back(StrtabBuilder.add(Id));
+  NameVals.push_back(Id.size());
+
+  for (auto &P : Summary) {
+    NameVals.push_back(P.AddressPointOffset);
+    NameVals.push_back(VE.getValueID(P.VTableVI.getValue()));
+  }
+}
+
 // Helper to emit a single function summary record.
 void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
     SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
@@ -3538,11 +3638,13 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
   FunctionSummary *FS = cast<FunctionSummary>(Summary);
   writeFunctionTypeMetadataRecords(Stream, FS);
 
+  auto SpecialRefCnts = FS->specialRefCounts();
   NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
   NameVals.push_back(FS->instCount());
   NameVals.push_back(getEncodedFFlags(FS->fflags()));
   NameVals.push_back(FS->refs().size());
-  NameVals.push_back(FS->immutableRefCount());
+  NameVals.push_back(SpecialRefCnts.first);  // rorefcnt
+  NameVals.push_back(SpecialRefCnts.second); // worefcnt
 
   for (auto &RI : FS->refs())
     NameVals.push_back(VE.getValueID(RI.getValue()));
@@ -3572,7 +3674,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
 // and emit them in a summary record.
 void ModuleBitcodeWriterBase::writeModuleLevelReferences(
     const GlobalVariable &V, SmallVector<uint64_t, 64> &NameVals,
-    unsigned FSModRefsAbbrev) {
+    unsigned FSModRefsAbbrev, unsigned FSModVTableRefsAbbrev) {
   auto VI = Index->getValueInfo(V.getGUID());
   if (!VI || VI.getSummaryList().empty()) {
     // Only declarations should not have a summary (a declaration might however
@@ -3586,6 +3688,10 @@ void ModuleBitcodeWriterBase::writeModuleLevelReferences(
   NameVals.push_back(getEncodedGVSummaryFlags(VS->flags()));
   NameVals.push_back(getEncodedGVarFlags(VS->varflags()));
 
+  auto VTableFuncs = VS->vTableFuncs();
+  if (!VTableFuncs.empty())
+    NameVals.push_back(VS->refs().size());
+
   unsigned SizeBeforeRefs = NameVals.size();
   for (auto &RI : VS->refs())
     NameVals.push_back(VE.getValueID(RI.getValue()));
@@ -3593,15 +3699,26 @@ void ModuleBitcodeWriterBase::writeModuleLevelReferences(
   // been initialized from a DenseSet.
   llvm::sort(NameVals.begin() + SizeBeforeRefs, NameVals.end());
 
-  Stream.EmitRecord(bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS, NameVals,
-                    FSModRefsAbbrev);
+  if (VTableFuncs.empty())
+    Stream.EmitRecord(bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS, NameVals,
+                      FSModRefsAbbrev);
+  else {
+    // VTableFuncs pairs should already be sorted by offset.
+    for (auto &P : VTableFuncs) {
+      NameVals.push_back(VE.getValueID(P.FuncVI.getValue()));
+      NameVals.push_back(P.VTableOffset);
+    }
+
+    Stream.EmitRecord(bitc::FS_PERMODULE_VTABLE_GLOBALVAR_INIT_REFS, NameVals,
+                      FSModVTableRefsAbbrev);
+  }
   NameVals.clear();
 }
 
 // Current version for the summary.
 // This is bumped whenever we introduce changes in the way some record are
 // interpreted, like flags for instance.
-static const uint64_t INDEX_VERSION = 6;
+static const uint64_t INDEX_VERSION = 7;
 
 /// Emit the per-module summary section alongside the rest of
 /// the module's bitcode.
@@ -3643,7 +3760,8 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // fflags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // numrefs
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // immutablerefcnt
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // rorefcnt
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // worefcnt
   // numrefs x valueid, n x (valueid, hotness)
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
@@ -3660,7 +3778,8 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // fflags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // numrefs
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // immutablerefcnt
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // rorefcnt
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // worefcnt
   // numrefs x valueid, n x (valueid [, rel_block_freq])
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
@@ -3675,6 +3794,17 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   unsigned FSModRefsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
+  // Abbrev for FS_PERMODULE_VTABLE_GLOBALVAR_INIT_REFS.
+  Abbv = std::make_shared<BitCodeAbbrev>();
+  Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_VTABLE_GLOBALVAR_INIT_REFS));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
+  // numrefs x valueid, n x (valueid , offset)
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+  unsigned FSModVTableRefsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
   // Abbrev for FS_ALIAS.
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::FS_ALIAS));
@@ -3683,6 +3813,16 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
   unsigned FSAliasAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
+  // Abbrev for FS_TYPE_ID_METADATA
+  Abbv = std::make_shared<BitCodeAbbrev>();
+  Abbv->Add(BitCodeAbbrevOp(bitc::FS_TYPE_ID_METADATA));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // typeid strtab index
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // typeid length
+  // n x (valueid , offset)
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+  unsigned TypeIdCompatibleVtableAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
   SmallVector<uint64_t, 64> NameVals;
   // Iterate over the list of functions instead of the Index to
   // ensure the ordering is stable.
@@ -3707,7 +3847,8 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
   // Capture references from GlobalVariable initializers, which are outside
   // of a function scope.
   for (const GlobalVariable &G : M.globals())
-    writeModuleLevelReferences(G, NameVals, FSModRefsAbbrev);
+    writeModuleLevelReferences(G, NameVals, FSModRefsAbbrev,
+                               FSModVTableRefsAbbrev);
 
   for (const GlobalAlias &A : M.aliases()) {
     auto *Aliasee = A.getBaseObject();
@@ -3725,6 +3866,14 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
     NameVals.clear();
   }
 
+  for (auto &S : Index->typeIdCompatibleVtableMap()) {
+    writeTypeIdCompatibleVtableSummaryRecord(NameVals, StrtabBuilder, S.first,
+                                             S.second, VE);
+    Stream.EmitRecord(bitc::FS_TYPE_ID_METADATA, NameVals,
+                      TypeIdCompatibleVtableAbbrev);
+    NameVals.clear();
+  }
+
   Stream.ExitBlock();
 }
 
@@ -3762,7 +3911,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // fflags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // entrycount
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // numrefs
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // immutablerefcnt
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // rorefcnt
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // worefcnt
   // numrefs x valueid, n x (valueid)
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
@@ -3776,8 +3926,10 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // flags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // fflags
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // entrycount
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // numrefs
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // immutablerefcnt
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // rorefcnt
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // worefcnt
   // numrefs x valueid, n x (valueid, hotness)
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
@@ -3825,9 +3977,13 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
     NameVals.clear();
   };
 
+  std::set<GlobalValue::GUID> DefOrUseGUIDs;
   forEachSummary([&](GVInfo I, bool IsAliasee) {
     GlobalValueSummary *S = I.second;
     assert(S);
+    DefOrUseGUIDs.insert(I.first);
+    for (const ValueInfo &VI : S->refs())
+      DefOrUseGUIDs.insert(VI.getGUID());
 
     auto ValueId = getValueId(I.first);
     assert(ValueId);
@@ -3879,20 +4035,24 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
 
     // Fill in below
     NameVals.push_back(0); // numrefs
-    NameVals.push_back(0); // immutablerefcnt
+    NameVals.push_back(0); // rorefcnt
+    NameVals.push_back(0); // worefcnt
 
-    unsigned Count = 0, ImmutableRefCnt = 0;
+    unsigned Count = 0, RORefCnt = 0, WORefCnt = 0;
     for (auto &RI : FS->refs()) {
       auto RefValueId = getValueId(RI.getGUID());
       if (!RefValueId)
         continue;
       NameVals.push_back(*RefValueId);
       if (RI.isReadOnly())
-        ImmutableRefCnt++;
+        RORefCnt++;
+      else if (RI.isWriteOnly())
+        WORefCnt++;
       Count++;
     }
     NameVals[6] = Count;
-    NameVals[7] = ImmutableRefCnt;
+    NameVals[7] = RORefCnt;
+    NameVals[8] = WORefCnt;
 
     bool HasProfileData = false;
     for (auto &EI : FS->calls()) {
@@ -3968,20 +4128,30 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
 
   if (!Index.cfiFunctionDefs().empty()) {
     for (auto &S : Index.cfiFunctionDefs()) {
-      NameVals.push_back(StrtabBuilder.add(S));
-      NameVals.push_back(S.size());
+      if (DefOrUseGUIDs.count(
+              GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(S)))) {
+        NameVals.push_back(StrtabBuilder.add(S));
+        NameVals.push_back(S.size());
+      }
+    }
+    if (!NameVals.empty()) {
+      Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DEFS, NameVals);
+      NameVals.clear();
     }
-    Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DEFS, NameVals);
-    NameVals.clear();
   }
 
   if (!Index.cfiFunctionDecls().empty()) {
     for (auto &S : Index.cfiFunctionDecls()) {
-      NameVals.push_back(StrtabBuilder.add(S));
-      NameVals.push_back(S.size());
+      if (DefOrUseGUIDs.count(
+              GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(S)))) {
+        NameVals.push_back(StrtabBuilder.add(S));
+        NameVals.push_back(S.size());
+      }
+    }
+    if (!NameVals.empty()) {
+      Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DECLS, NameVals);
+      NameVals.clear();
     }
-    Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DECLS, NameVals);
-    NameVals.clear();
   }
 
   // Walk the GUIDs that were referenced, and write the
@@ -4055,15 +4225,15 @@ void ModuleBitcodeWriter::write() {
   // Emit blockinfo, which defines the standard abbreviations etc.
   writeBlockInfo();
 
+  // Emit information describing all of the types in the module.
+  writeTypeTable();
+
   // Emit information about attribute groups.
   writeAttributeGroupTable();
 
   // Emit information about parameter attributes.
   writeAttributeTable();
 
-  // Emit information describing all of the types in the module.
-  writeTypeTable();
-
   writeComdats();
 
   // Emit top-level description of module, including target triple, inline asm,
diff --git a/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
index 41212e575f8e..6796cf8cee54 100644
--- a/lib/Bitcode/Writer/BitcodeWriterPass.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -1,9 +1,8 @@
 //===- BitcodeWriterPass.cpp - Bitcode writing pass -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index deb04f1bb36c..f59c906c7b75 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -1,9 +1,8 @@
 //===- ValueEnumerator.cpp - Number values and types for bitcode writer ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -415,10 +414,8 @@ ValueEnumerator::ValueEnumerator(const Module &M,
           EnumerateMetadata(&F, MD->getMetadata());
         }
         EnumerateType(I.getType());
-        if (const CallInst *CI = dyn_cast<CallInst>(&I))
-          EnumerateAttributes(CI->getAttributes());
-        else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I))
-          EnumerateAttributes(II->getAttributes());
+        if (const auto *Call = dyn_cast<CallBase>(&I))
+          EnumerateAttributes(Call->getAttributes());
 
         // Enumerate metadata attached with this instruction.
         MDs.clear();
@@ -752,7 +749,8 @@ void ValueEnumerator::organizeMetadata() {
 
   // Rebuild MDs, index the metadata ranges for each function in FunctionMDs,
   // and fix up MetadataMap.
-  std::vector<const Metadata *> OldMDs = std::move(MDs);
+  std::vector<const Metadata *> OldMDs;
+  MDs.swap(OldMDs);
   MDs.reserve(OldMDs.size());
   for (unsigned I = 0, E = Order.size(); I != E && !Order[I].F; ++I) {
     auto *MD = Order[I].get(OldMDs);
@@ -951,9 +949,11 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
   incorporateFunctionMetadata(F);
 
   // Adding function arguments to the value table.
-  for (const auto &I : F.args())
+  for (const auto &I : F.args()) {
     EnumerateValue(&I);
-
+    if (I.hasAttribute(Attribute::ByVal))
+      EnumerateType(I.getParamByValType());
+  }
   FirstFuncConstantID = Values.size();
 
   // Add all function-level constants to the value table.
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index 011356c32601..112f0b4a1dc4 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -1,9 +1,8 @@
 //===- Bitcode/Writer/ValueEnumerator.h - Number values ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Bitstream/Reader/BitstreamReader.cpp b/lib/Bitstream/Reader/BitstreamReader.cpp
new file mode 100644
index 000000000000..a4a97ced5457
--- /dev/null
+++ b/lib/Bitstream/Reader/BitstreamReader.cpp
@@ -0,0 +1,510 @@
+//===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitstream/BitstreamReader.h"
+#include "llvm/ADT/StringRef.h"
+#include <cassert>
+#include <string>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  BitstreamCursor implementation
+//===----------------------------------------------------------------------===//
+
+/// Having read the ENTER_SUBBLOCK abbrevid, enter the block.
+Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
+  // Save the current block's state on BlockScope.
+  BlockScope.push_back(Block(CurCodeSize));
+  BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
+
+  // Add the abbrevs specific to this block to the CurAbbrevs list.
+  if (BlockInfo) {
+    if (const BitstreamBlockInfo::BlockInfo *Info =
+            BlockInfo->getBlockInfo(BlockID)) {
+      CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(),
+                        Info->Abbrevs.end());
+    }
+  }
+
+  // Get the codesize of this block.
+  Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth);
+  if (!MaybeVBR)
+    return MaybeVBR.takeError();
+  CurCodeSize = MaybeVBR.get();
+
+  if (CurCodeSize > MaxChunkSize)
+    return llvm::createStringError(
+        std::errc::illegal_byte_sequence,
+        "can't read more than %zu at a time, trying to read %u", +MaxChunkSize,
+        CurCodeSize);
+
+  SkipToFourByteBoundary();
+  Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth);
+  if (!MaybeNum)
+    return MaybeNum.takeError();
+  word_t NumWords = MaybeNum.get();
+  if (NumWordsP)
+    *NumWordsP = NumWords;
+
+  if (CurCodeSize == 0)
+    return llvm::createStringError(
+        std::errc::illegal_byte_sequence,
+        "can't enter sub-block: current code size is 0");
+  if (AtEndOfStream())
+    return llvm::createStringError(
+        std::errc::illegal_byte_sequence,
+        "can't enter sub block: already at end of stream");
+
+  return Error::success();
+}
+
+static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor,
+                                               const BitCodeAbbrevOp &Op) {
+  assert(!Op.isLiteral() && "Not to be used with literals!");
+
+  // Decode the value as we are commanded.
+  switch (Op.getEncoding()) {
+  case BitCodeAbbrevOp::Array:
+  case BitCodeAbbrevOp::Blob:
+    llvm_unreachable("Should not reach here");
+  case BitCodeAbbrevOp::Fixed:
+    assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
+    return Cursor.Read((unsigned)Op.getEncodingData());
+  case BitCodeAbbrevOp::VBR:
+    assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
+    return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
+  case BitCodeAbbrevOp::Char6:
+    if (Expected<unsigned> Res = Cursor.Read(6))
+      return BitCodeAbbrevOp::DecodeChar6(Res.get());
+    else
+      return Res.takeError();
+  }
+  llvm_unreachable("invalid abbreviation encoding");
+}
+
+static Error skipAbbreviatedField(BitstreamCursor &Cursor,
+                                  const BitCodeAbbrevOp &Op) {
+  assert(!Op.isLiteral() && "Not to be used with literals!");
+
+  // Decode the value as we are commanded.
+  switch (Op.getEncoding()) {
+  case BitCodeAbbrevOp::Array:
+  case BitCodeAbbrevOp::Blob:
+    llvm_unreachable("Should not reach here");
+  case BitCodeAbbrevOp::Fixed:
+    assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
+    if (Expected<unsigned> Res = Cursor.Read((unsigned)Op.getEncodingData()))
+      break;
+    else
+      return Res.takeError();
+  case BitCodeAbbrevOp::VBR:
+    assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
+    if (Expected<uint64_t> Res =
+            Cursor.ReadVBR64((unsigned)Op.getEncodingData()))
+      break;
+    else
+      return Res.takeError();
+  case BitCodeAbbrevOp::Char6:
+    if (Expected<unsigned> Res = Cursor.Read(6))
+      break;
+    else
+      return Res.takeError();
+  }
+  return ErrorSuccess();
+}
+
+/// skipRecord - Read the current record and discard it.
+Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
+  // Skip unabbreviated records by reading past their entries.
+  if (AbbrevID == bitc::UNABBREV_RECORD) {
+    Expected<uint32_t> MaybeCode = ReadVBR(6);
+    if (!MaybeCode)
+      return MaybeCode.takeError();
+    unsigned Code = MaybeCode.get();
+    Expected<uint32_t> MaybeVBR = ReadVBR(6);
+    if (!MaybeVBR)
+      return MaybeVBR.get();
+    unsigned NumElts = MaybeVBR.get();
+    for (unsigned i = 0; i != NumElts; ++i)
+      if (Expected<uint64_t> Res = ReadVBR64(6))
+        ; // Skip!
+      else
+        return Res.takeError();
+    return Code;
+  }
+
+  const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
+  const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
+  unsigned Code;
+  if (CodeOp.isLiteral())
+    Code = CodeOp.getLiteralValue();
+  else {
+    if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
+        CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
+      return llvm::createStringError(
+          std::errc::illegal_byte_sequence,
+          "Abbreviation starts with an Array or a Blob");
+    Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp);
+    if (!MaybeCode)
+      return MaybeCode.takeError();
+    Code = MaybeCode.get();
+  }
+
+  for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
+    const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+    if (Op.isLiteral())
+      continue;
+
+    if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
+        Op.getEncoding() != BitCodeAbbrevOp::Blob) {
+      if (Error Err = skipAbbreviatedField(*this, Op))
+        return std::move(Err);
+      continue;
+    }
+
+    if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
+      // Array case.  Read the number of elements as a vbr6.
+      Expected<uint32_t> MaybeNum = ReadVBR(6);
+      if (!MaybeNum)
+        return MaybeNum.takeError();
+      unsigned NumElts = MaybeNum.get();
+
+      // Get the element encoding.
+      assert(i+2 == e && "array op not second to last?");
+      const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
+
+      // Read all the elements.
+      // Decode the value as we are commanded.
+      switch (EltEnc.getEncoding()) {
+      default:
+        report_fatal_error("Array element type can't be an Array or a Blob");
+      case BitCodeAbbrevOp::Fixed:
+        assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
+        if (Error Err = JumpToBit(GetCurrentBitNo() +
+                                  NumElts * EltEnc.getEncodingData()))
+          return std::move(Err);
+        break;
+      case BitCodeAbbrevOp::VBR:
+        assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
+        for (; NumElts; --NumElts)
+          if (Expected<uint64_t> Res =
+                  ReadVBR64((unsigned)EltEnc.getEncodingData()))
+            ; // Skip!
+          else
+            return Res.takeError();
+        break;
+      case BitCodeAbbrevOp::Char6:
+        if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6))
+          return std::move(Err);
+        break;
+      }
+      continue;
+    }
+
+    assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
+    // Blob case.  Read the number of bytes as a vbr6.
+    Expected<uint32_t> MaybeNum = ReadVBR(6);
+    if (!MaybeNum)
+      return MaybeNum.takeError();
+    unsigned NumElts = MaybeNum.get();
+    SkipToFourByteBoundary();  // 32-bit alignment
+
+    // Figure out where the end of this blob will be including tail padding.
+    size_t NewEnd = GetCurrentBitNo()+((NumElts+3)&~3)*8;
+
+    // If this would read off the end of the bitcode file, just set the
+    // record to empty and return.
+    if (!canSkipToPos(NewEnd/8)) {
+      skipToEnd();
+      break;
+    }
+
+    // Skip over the blob.
+    if (Error Err = JumpToBit(NewEnd))
+      return std::move(Err);
+  }
+  return Code;
+}
+
+Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
+                                               SmallVectorImpl<uint64_t> &Vals,
+                                               StringRef *Blob) {
+  if (AbbrevID == bitc::UNABBREV_RECORD) {
+    Expected<uint32_t> MaybeCode = ReadVBR(6);
+    if (!MaybeCode)
+      return MaybeCode.takeError();
+    uint32_t Code = MaybeCode.get();
+    Expected<uint32_t> MaybeNumElts = ReadVBR(6);
+    if (!MaybeNumElts)
+      return MaybeNumElts.takeError();
+    uint32_t NumElts = MaybeNumElts.get();
+
+    for (unsigned i = 0; i != NumElts; ++i)
+      if (Expected<uint64_t> MaybeVal = ReadVBR64(6))
+        Vals.push_back(MaybeVal.get());
+      else
+        return MaybeVal.takeError();
+    return Code;
+  }
+
+  const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
+
+  // Read the record code first.
+  assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?");
+  const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
+  unsigned Code;
+  if (CodeOp.isLiteral())
+    Code = CodeOp.getLiteralValue();
+  else {
+    if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
+        CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
+      report_fatal_error("Abbreviation starts with an Array or a Blob");
+    if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp))
+      Code = MaybeCode.get();
+    else
+      return MaybeCode.takeError();
+  }
+
+  for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
+    const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+    if (Op.isLiteral()) {
+      Vals.push_back(Op.getLiteralValue());
+      continue;
+    }
+
+    if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
+        Op.getEncoding() != BitCodeAbbrevOp::Blob) {
+      if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op))
+        Vals.push_back(MaybeVal.get());
+      else
+        return MaybeVal.takeError();
+      continue;
+    }
+
+    if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
+      // Array case.  Read the number of elements as a vbr6.
+      Expected<uint32_t> MaybeNumElts = ReadVBR(6);
+      if (!MaybeNumElts)
+        return MaybeNumElts.takeError();
+      uint32_t NumElts = MaybeNumElts.get();
+
+      // Get the element encoding.
+      if (i + 2 != e)
+        report_fatal_error("Array op not second to last");
+      const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
+      if (!EltEnc.isEncoding())
+        report_fatal_error(
+            "Array element type has to be an encoding of a type");
+
+      // Read all the elements.
+      switch (EltEnc.getEncoding()) {
+      default:
+        report_fatal_error("Array element type can't be an Array or a Blob");
+      case BitCodeAbbrevOp::Fixed:
+        for (; NumElts; --NumElts)
+          if (Expected<SimpleBitstreamCursor::word_t> MaybeVal =
+                  Read((unsigned)EltEnc.getEncodingData()))
+            Vals.push_back(MaybeVal.get());
+          else
+            return MaybeVal.takeError();
+        break;
+      case BitCodeAbbrevOp::VBR:
+        for (; NumElts; --NumElts)
+          if (Expected<uint64_t> MaybeVal =
+                  ReadVBR64((unsigned)EltEnc.getEncodingData()))
+            Vals.push_back(MaybeVal.get());
+          else
+            return MaybeVal.takeError();
+        break;
+      case BitCodeAbbrevOp::Char6:
+        for (; NumElts; --NumElts)
+          if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6))
+            Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get()));
+          else
+            return MaybeVal.takeError();
+      }
+      continue;
+    }
+
+    assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
+    // Blob case.  Read the number of bytes as a vbr6.
+    Expected<uint32_t> MaybeNumElts = ReadVBR(6);
+    if (!MaybeNumElts)
+      return MaybeNumElts.takeError();
+    uint32_t NumElts = MaybeNumElts.get();
+    SkipToFourByteBoundary();  // 32-bit alignment
+
+    // Figure out where the end of this blob will be including tail padding.
+    size_t CurBitPos = GetCurrentBitNo();
+    size_t NewEnd = CurBitPos+((NumElts+3)&~3)*8;
+
+    // If this would read off the end of the bitcode file, just set the
+    // record to empty and return.
+    if (!canSkipToPos(NewEnd/8)) {
+      Vals.append(NumElts, 0);
+      skipToEnd();
+      break;
+    }
+
+    // Otherwise, inform the streamer that we need these bytes in memory.  Skip
+    // over tail padding first, in case jumping to NewEnd invalidates the Blob
+    // pointer.
+    if (Error Err = JumpToBit(NewEnd))
+      return std::move(Err);
+    const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts);
+
+    // If we can return a reference to the data, do so to avoid copying it.
+    if (Blob) {
+      *Blob = StringRef(Ptr, NumElts);
+    } else {
+      // Otherwise, unpack into Vals with zero extension.
+      for (; NumElts; --NumElts)
+        Vals.push_back((unsigned char)*Ptr++);
+    }
+  }
+
+  return Code;
+}
+
+Error BitstreamCursor::ReadAbbrevRecord() {
+  auto Abbv = std::make_shared<BitCodeAbbrev>();
+  Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5);
+  if (!MaybeNumOpInfo)
+    return MaybeNumOpInfo.takeError();
+  unsigned NumOpInfo = MaybeNumOpInfo.get();
+  for (unsigned i = 0; i != NumOpInfo; ++i) {
+    Expected<word_t> MaybeIsLiteral = Read(1);
+    if (!MaybeIsLiteral)
+      return MaybeIsLiteral.takeError();
+    bool IsLiteral = MaybeIsLiteral.get();
+    if (IsLiteral) {
+      Expected<uint64_t> MaybeOp = ReadVBR64(8);
+      if (!MaybeOp)
+        return MaybeOp.takeError();
+      Abbv->Add(BitCodeAbbrevOp(MaybeOp.get()));
+      continue;
+    }
+
+    Expected<word_t> MaybeEncoding = Read(3);
+    if (!MaybeEncoding)
+      return MaybeEncoding.takeError();
+    BitCodeAbbrevOp::Encoding E =
+        (BitCodeAbbrevOp::Encoding)MaybeEncoding.get();
+    if (BitCodeAbbrevOp::hasEncodingData(E)) {
+      Expected<uint64_t> MaybeData = ReadVBR64(5);
+      if (!MaybeData)
+        return MaybeData.takeError();
+      uint64_t Data = MaybeData.get();
+
+      // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
+      // and vbr(0) as a literal zero.  This is decoded the same way, and avoids
+      // a slow path in Read() to have to handle reading zero bits.
+      if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
+          Data == 0) {
+        Abbv->Add(BitCodeAbbrevOp(0));
+        continue;
+      }
+
+      if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
+          Data > MaxChunkSize)
+        report_fatal_error(
+            "Fixed or VBR abbrev record with size > MaxChunkData");
+
+      Abbv->Add(BitCodeAbbrevOp(E, Data));
+    } else
+      Abbv->Add(BitCodeAbbrevOp(E));
+  }
+
+  if (Abbv->getNumOperandInfos() == 0)
+    report_fatal_error("Abbrev record with no operands");
+  CurAbbrevs.push_back(std::move(Abbv));
+
+  return Error::success();
+}
+
+Expected<Optional<BitstreamBlockInfo>>
+BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
+  if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID))
+    return std::move(Err);
+
+  BitstreamBlockInfo NewBlockInfo;
+
+  SmallVector<uint64_t, 64> Record;
+  BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr;
+
+  // Read all the records for this module.
+  while (true) {
+    Expected<BitstreamEntry> MaybeEntry =
+        advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
+
+    switch (Entry.Kind) {
+    case llvm::BitstreamEntry::SubBlock: // Handled for us already.
+    case llvm::BitstreamEntry::Error:
+      return None;
+    case llvm::BitstreamEntry::EndBlock:
+      return std::move(NewBlockInfo);
+    case llvm::BitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read abbrev records, associate them with CurBID.
+    if (Entry.ID == bitc::DEFINE_ABBREV) {
+      if (!CurBlockInfo) return None;
+      if (Error Err = ReadAbbrevRecord())
+        return std::move(Err);
+
+      // ReadAbbrevRecord installs the abbrev in CurAbbrevs.  Move it to the
+      // appropriate BlockInfo.
+      CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back()));
+      CurAbbrevs.pop_back();
+      continue;
+    }
+
+    // Read a record.
+    Record.clear();
+    Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record);
+    if (!MaybeBlockInfo)
+      return MaybeBlockInfo.takeError();
+    switch (MaybeBlockInfo.get()) {
+    default:
+      break; // Default behavior, ignore unknown content.
+    case bitc::BLOCKINFO_CODE_SETBID:
+      if (Record.size() < 1)
+        return None;
+      CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
+      break;
+    case bitc::BLOCKINFO_CODE_BLOCKNAME: {
+      if (!CurBlockInfo)
+        return None;
+      if (!ReadBlockInfoNames)
+        break; // Ignore name.
+      std::string Name;
+      for (unsigned i = 0, e = Record.size(); i != e; ++i)
+        Name += (char)Record[i];
+      CurBlockInfo->Name = Name;
+      break;
+    }
+      case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
+        if (!CurBlockInfo) return None;
+        if (!ReadBlockInfoNames)
+          break; // Ignore name.
+        std::string Name;
+        for (unsigned i = 1, e = Record.size(); i != e; ++i)
+          Name += (char)Record[i];
+        CurBlockInfo->RecordNames.push_back(std::make_pair((unsigned)Record[0],
+                                                           Name));
+        break;
+      }
+      }
+  }
+}
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 632ea8e9cdc4..444f618d8b8c 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -1,9 +1,8 @@
 //===- AggressiveAntiDepBreaker.cpp - Anti-dep breaker --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index 5dce3c2499e5..0cf2e6d78f7f 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -1,9 +1,8 @@
 //==- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index 37dcb0be824e..c99800659bfd 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/AllocationOrder.cpp - Allocation Order ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
index 467bcc2edc6f..9247dd844936 100644
--- a/lib/CodeGen/AllocationOrder.h
+++ b/lib/CodeGen/AllocationOrder.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/AllocationOrder.h - Allocation Order -*- C++ -*-------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 797f05ee5cf3..d158e70b86ac 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -1,9 +1,8 @@
 //===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -83,6 +82,7 @@ unsigned llvm::ComputeLinearIndex(Type *Ty,
 ///
 void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
                            Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+                           SmallVectorImpl<EVT> *MemVTs,
                            SmallVectorImpl<uint64_t> *Offsets,
                            uint64_t StartingOffset) {
   // Given a struct type, recursively traverse the elements.
@@ -92,7 +92,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
                                       EI = EB,
                                       EE = STy->element_end();
          EI != EE; ++EI)
-      ComputeValueVTs(TLI, DL, *EI, ValueVTs, Offsets,
+      ComputeValueVTs(TLI, DL, *EI, ValueVTs, MemVTs, Offsets,
                       StartingOffset + SL->getElementOffset(EI - EB));
     return;
   }
@@ -101,7 +101,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
     Type *EltTy = ATy->getElementType();
     uint64_t EltSize = DL.getTypeAllocSize(EltTy);
     for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
-      ComputeValueVTs(TLI, DL, EltTy, ValueVTs, Offsets,
+      ComputeValueVTs(TLI, DL, EltTy, ValueVTs, MemVTs, Offsets,
                       StartingOffset + i * EltSize);
     return;
   }
@@ -110,10 +110,50 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
     return;
   // Base case: we can get an EVT for this LLVM IR type.
   ValueVTs.push_back(TLI.getValueType(DL, Ty));
+  if (MemVTs)
+    MemVTs->push_back(TLI.getMemValueType(DL, Ty));
   if (Offsets)
     Offsets->push_back(StartingOffset);
 }
 
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+                           Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+                           SmallVectorImpl<uint64_t> *Offsets,
+                           uint64_t StartingOffset) {
+  return ComputeValueVTs(TLI, DL, Ty, ValueVTs, /*MemVTs=*/nullptr, Offsets,
+                         StartingOffset);
+}
+
+void llvm::computeValueLLTs(const DataLayout &DL, Type &Ty,
+                            SmallVectorImpl<LLT> &ValueTys,
+                            SmallVectorImpl<uint64_t> *Offsets,
+                            uint64_t StartingOffset) {
+  // Given a struct type, recursively traverse the elements.
+  if (StructType *STy = dyn_cast<StructType>(&Ty)) {
+    const StructLayout *SL = DL.getStructLayout(STy);
+    for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I)
+      computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets,
+                       StartingOffset + SL->getElementOffset(I));
+    return;
+  }
+  // Given an array type, recursively traverse the elements.
+  if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) {
+    Type *EltTy = ATy->getElementType();
+    uint64_t EltSize = DL.getTypeAllocSize(EltTy);
+    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+      computeValueLLTs(DL, *EltTy, ValueTys, Offsets,
+                       StartingOffset + i * EltSize);
+    return;
+  }
+  // Interpret void as zero return values.
+  if (Ty.isVoidTy())
+    return;
+  // Base case: we can get an LLT for this LLVM IR type.
+  ValueTys.push_back(getLLTForType(Ty, DL));
+  if (Offsets != nullptr)
+    Offsets->push_back(StartingOffset * 8);
+}
+
 /// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
 GlobalValue *llvm::ExtractTypeInfo(Value *V) {
   V = V->stripPointerCasts();
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
index d93716287981..b11148595136 100644
--- a/lib/CodeGen/AntiDepBreaker.h
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 9011f025f595..f6ef85a5b78f 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -1,9 +1,8 @@
 //===-- CodeGen/AsmPrinter/ARMException.cpp - ARM EHABI Exception Impl ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/AsmPrinter/AccelTable.cpp b/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 95875ccb8a0b..b1b7921ea976 100644
--- a/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/AsmPrinter/AccelTable.cpp - Accelerator Tables --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -56,10 +55,10 @@ void AccelTableBase::finalize(AsmPrinter *Asm, StringRef Prefix) {
   // Create the individual hash data outputs.
   for (auto &E : Entries) {
     // Unique the entries.
-    std::stable_sort(E.second.Values.begin(), E.second.Values.end(),
-                     [](const AccelTableData *A, const AccelTableData *B) {
-                       return *A < *B;
-                     });
+    llvm::stable_sort(E.second.Values,
+                      [](const AccelTableData *A, const AccelTableData *B) {
+                        return *A < *B;
+                      });
     E.second.Values.erase(
         std::unique(E.second.Values.begin(), E.second.Values.end()),
         E.second.Values.end());
@@ -82,10 +81,9 @@ void AccelTableBase::finalize(AsmPrinter *Asm, StringRef Prefix) {
   // Sort the contents of the buckets by hash value so that hash collisions end
   // up together. Stable sort makes testing easier and doesn't cost much more.
   for (auto &Bucket : Buckets)
-    std::stable_sort(Bucket.begin(), Bucket.end(),
-                     [](HashData *LHS, HashData *RHS) {
-                       return LHS->HashValue < RHS->HashValue;
-                     });
+    llvm::stable_sort(Bucket, [](HashData *LHS, HashData *RHS) {
+      return LHS->HashValue < RHS->HashValue;
+    });
 }
 
 namespace {
@@ -557,8 +555,8 @@ void llvm::emitDWARF5AccelTable(
   SmallVector<unsigned, 1> CUIndex(CUs.size());
   int Count = 0;
   for (const auto &CU : enumerate(CUs)) {
-    if (CU.value()->getCUNode()->getNameTableKind() ==
-        DICompileUnit::DebugNameTableKind::None)
+    if (CU.value()->getCUNode()->getNameTableKind() !=
+        DICompileUnit::DebugNameTableKind::Default)
       continue;
     CUIndex[CU.index()] = Count++;
     assert(CU.index() == CU.value()->getUniqueID());
@@ -616,30 +614,10 @@ void AppleAccelTableStaticTypeData::emit(AsmPrinter *Asm) const {
   Asm->emitInt32(QualifiedNameHash);
 }
 
-#ifndef _MSC_VER
-// The lines below are rejected by older versions (TBD) of MSVC.
 constexpr AppleAccelTableData::Atom AppleAccelTableTypeData::Atoms[];
 constexpr AppleAccelTableData::Atom AppleAccelTableOffsetData::Atoms[];
 constexpr AppleAccelTableData::Atom AppleAccelTableStaticOffsetData::Atoms[];
 constexpr AppleAccelTableData::Atom AppleAccelTableStaticTypeData::Atoms[];
-#else
-// FIXME: Erase this path once the minimum MSCV version has been bumped.
-const SmallVector<AppleAccelTableData::Atom, 4>
-    AppleAccelTableOffsetData::Atoms = {
-        Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
-const SmallVector<AppleAccelTableData::Atom, 4> AppleAccelTableTypeData::Atoms =
-    {Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
-     Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
-     Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
-const SmallVector<AppleAccelTableData::Atom, 4>
-    AppleAccelTableStaticOffsetData::Atoms = {
-        Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
-const SmallVector<AppleAccelTableData::Atom, 4>
-    AppleAccelTableStaticTypeData::Atoms = {
-        Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
-        Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
-        Atom(5, dwarf::DW_FORM_data1), Atom(6, dwarf::DW_FORM_data4)};
-#endif
 
 #ifndef NDEBUG
 void AppleAccelTableWriter::Header::print(raw_ostream &OS) const {
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.cpp b/lib/CodeGen/AsmPrinter/AddressPool.cpp
index 042243b79259..f11c7de5ed8a 100644
--- a/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/AddressPool.cpp - Dwarf Debug Framework ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -24,21 +23,24 @@ unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
   return IterBool.first->second.Number;
 }
 
-
-void AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) {
+MCSymbol *AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) {
   static const uint8_t AddrSize = Asm.getDataLayout().getPointerSize();
-  uint64_t Length = sizeof(uint16_t) // version
-                  + sizeof(uint8_t)  // address_size
-                  + sizeof(uint8_t)  // segment_selector_size
-                  + AddrSize * Pool.size(); // entries
+  StringRef Prefix = "debug_addr_";
+  MCSymbol *BeginLabel = Asm.createTempSymbol(Prefix + "start");
+  MCSymbol *EndLabel = Asm.createTempSymbol(Prefix + "end");
+
   Asm.OutStreamer->AddComment("Length of contribution");
-  Asm.emitInt32(Length); // TODO: Support DWARF64 format.
+  Asm.EmitLabelDifference(EndLabel, BeginLabel,
+                          4); // TODO: Support DWARF64 format.
+  Asm.OutStreamer->EmitLabel(BeginLabel);
   Asm.OutStreamer->AddComment("DWARF version number");
   Asm.emitInt16(Asm.getDwarfVersion());
   Asm.OutStreamer->AddComment("Address size");
   Asm.emitInt8(AddrSize);
   Asm.OutStreamer->AddComment("Segment selector size");
   Asm.emitInt8(0); // TODO: Support non-zero segment_selector_size.
+
+  return EndLabel;
 }
 
 // Emit addresses into the section given.
@@ -49,8 +51,10 @@ void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
   // Start the dwarf addr section.
   Asm.OutStreamer->SwitchSection(AddrSection);
 
+  MCSymbol *EndLabel = nullptr;
+
   if (Asm.getDwarfVersion() >= 5)
-    emitHeader(Asm, AddrSection);
+    EndLabel = emitHeader(Asm, AddrSection);
 
   // Define the symbol that marks the start of the contribution.
   // It is referenced via DW_AT_addr_base.
@@ -67,4 +71,7 @@ void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
 
   for (const MCExpr *Entry : Entries)
     Asm.OutStreamer->EmitValue(Entry, Asm.getDataLayout().getPointerSize());
+
+  if (EndLabel)
+    Asm.OutStreamer->EmitLabel(EndLabel);
 }
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.h b/lib/CodeGen/AsmPrinter/AddressPool.h
index 2209c7eb50ed..f92cf72093ca 100644
--- a/lib/CodeGen/AsmPrinter/AddressPool.h
+++ b/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/AddressPool.h - Dwarf Debug Framework -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -55,7 +54,7 @@ public:
   void setLabel(MCSymbol *Sym) { AddressTableBaseSym = Sym; }
 
 private:
-  void emitHeader(AsmPrinter &Asm, MCSection *Section);
+  MCSymbol *emitHeader(AsmPrinter &Asm, MCSection *Section);
 
   /// Symbol designates the start of the contribution to the address table.
   MCSymbol *AddressTableBaseSym = nullptr;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 7070451e3330..54f6cc2d5571 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1,9 +1,8 @@
 //===- AsmPrinter.cpp - Common AsmPrinter code ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -35,7 +34,6 @@
 #include "llvm/BinaryFormat/COFF.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/BinaryFormat/ELF.h"
-#include "llvm/CodeGen/AsmPrinterHandler.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/CodeGen/GCStrategy.h"
@@ -60,7 +58,6 @@
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Comdat.h"
 #include "llvm/IR/Constant.h"
@@ -80,6 +77,7 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/RemarkStreamer.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -101,6 +99,9 @@
 #include "llvm/MC/MCValue.h"
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Pass.h"
+#include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkFormat.h"
+#include "llvm/Remarks/RemarkStringTable.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
@@ -143,9 +144,10 @@ static const char *const CodeViewLineTablesGroupDescription =
 
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
-static cl::opt<bool>
-    PrintSchedule("print-schedule", cl::Hidden, cl::init(false),
-                  cl::desc("Print 'sched: [latency:throughput]' in .s output"));
+static cl::opt<bool> EnableRemarksSection(
+    "remarks-section",
+    cl::desc("Emit a section containing remark diagnostics metadata"),
+    cl::init(false));
 
 char AsmPrinter::ID = 0;
 
@@ -232,6 +234,12 @@ void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
   S.EmitInstruction(Inst, getSubtargetInfo());
 }
 
+void AsmPrinter::emitInitialRawDwarfLocDirective(const MachineFunction &MF) {
+  assert(DD && "Dwarf debug file is not defined.");
+  assert(OutStreamer->hasRawTextSupport() && "Expected assembly output mode.");
+  (void)DD->emitInitialLocDirective(MF, /*CUID=*/0);
+}
+
 /// getCurrentSection() - Return the current section we are emitting to.
 const MCSection *AsmPrinter::getCurrentSection() const {
   return OutStreamer->getCurrentSectionOnly();
@@ -252,6 +260,9 @@ bool AsmPrinter::doInitialization(Module &M) {
   const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
     .Initialize(OutContext, TM);
 
+  const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
+      .getModuleMetadata(M);
+
   OutStreamer->InitSections(false);
 
   // Emit the version-min deployment target directive if needed.
@@ -300,16 +311,17 @@ bool AsmPrinter::doInitialization(Module &M) {
   if (MAI->doesSupportDebugInformation()) {
     bool EmitCodeView = MMI->getModule()->getCodeViewFlag();
     if (EmitCodeView && TM.getTargetTriple().isOSWindows()) {
-      Handlers.push_back(HandlerInfo(new CodeViewDebug(this),
-                                     DbgTimerName, DbgTimerDescription,
-                                     CodeViewLineTablesGroupName,
-                                     CodeViewLineTablesGroupDescription));
+      Handlers.emplace_back(llvm::make_unique<CodeViewDebug>(this),
+                            DbgTimerName, DbgTimerDescription,
+                            CodeViewLineTablesGroupName,
+                            CodeViewLineTablesGroupDescription);
     }
     if (!EmitCodeView || MMI->getModule()->getDwarfVersion()) {
       DD = new DwarfDebug(this, &M);
       DD->beginModule();
-      Handlers.push_back(HandlerInfo(DD, DbgTimerName, DbgTimerDescription,
-                                     DWARFGroupName, DWARFGroupDescription));
+      Handlers.emplace_back(std::unique_ptr<DwarfDebug>(DD), DbgTimerName,
+                            DbgTimerDescription, DWARFGroupName,
+                            DWARFGroupDescription);
     }
   }
 
@@ -362,14 +374,15 @@ bool AsmPrinter::doInitialization(Module &M) {
     break;
   }
   if (ES)
-    Handlers.push_back(HandlerInfo(ES, EHTimerName, EHTimerDescription,
-                                   DWARFGroupName, DWARFGroupDescription));
+    Handlers.emplace_back(std::unique_ptr<EHStreamer>(ES), EHTimerName,
+                          EHTimerDescription, DWARFGroupName,
+                          DWARFGroupDescription);
 
   if (mdconst::extract_or_null<ConstantInt>(
           MMI->getModule()->getModuleFlag("cfguardtable")))
-    Handlers.push_back(HandlerInfo(new WinCFGuard(this), CFGuardName,
-                                   CFGuardDescription, DWARFGroupName,
-                                   DWARFGroupDescription));
+    Handlers.emplace_back(llvm::make_unique<WinCFGuard>(this), CFGuardName,
+                          CFGuardDescription, DWARFGroupName,
+                          DWARFGroupDescription);
 
   return false;
 }
@@ -483,7 +496,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
 
   const DataLayout &DL = GV->getParent()->getDataLayout();
-  uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
+  uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
 
   // If the alignment is specified, we *must* obey it.  Overaligning a global
   // with a specified alignment is a prompt way to break globals emitted to
@@ -658,6 +671,9 @@ void AsmPrinter::EmitFunctionHeader() {
   if (MAI->hasDotTypeDotSizeDirective())
     OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
 
+  if (F.hasFnAttribute(Attribute::Cold))
+    OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_Cold);
+
   if (isVerbose()) {
     F.printAsOperand(OutStreamer->GetCommentOS(),
                    /*PrintType=*/false, F.getParent());
@@ -738,74 +754,30 @@ void AsmPrinter::EmitFunctionEntryLabel() {
 }
 
 /// emitComments - Pretty-print comments for instructions.
-/// It returns true iff the sched comment was emitted.
-///   Otherwise it returns false.
-static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
-                         AsmPrinter *AP) {
+static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
   const MachineFunction *MF = MI.getMF();
   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
 
   // Check for spills and reloads
-  int FI;
-
-  const MachineFrameInfo &MFI = MF->getFrameInfo();
-  bool Commented = false;
-
-  auto getSize =
-      [&MFI](const SmallVectorImpl<const MachineMemOperand *> &Accesses) {
-        unsigned Size = 0;
-        for (auto A : Accesses)
-          if (MFI.isSpillSlotObjectIndex(
-                  cast<FixedStackPseudoSourceValue>(A->getPseudoValue())
-                      ->getFrameIndex()))
-            Size += A->getSize();
-        return Size;
-      };
 
   // We assume a single instruction only has a spill or reload, not
   // both.
-  const MachineMemOperand *MMO;
-  SmallVector<const MachineMemOperand *, 2> Accesses;
-  if (TII->isLoadFromStackSlotPostFE(MI, FI)) {
-    if (MFI.isSpillSlotObjectIndex(FI)) {
-      MMO = *MI.memoperands_begin();
-      CommentOS << MMO->getSize() << "-byte Reload";
-      Commented = true;
-    }
-  } else if (TII->hasLoadFromStackSlot(MI, Accesses)) {
-    if (auto Size = getSize(Accesses)) {
-      CommentOS << Size << "-byte Folded Reload";
-      Commented = true;
-    }
-  } else if (TII->isStoreToStackSlotPostFE(MI, FI)) {
-    if (MFI.isSpillSlotObjectIndex(FI)) {
-      MMO = *MI.memoperands_begin();
-      CommentOS << MMO->getSize() << "-byte Spill";
-      Commented = true;
-    }
-  } else if (TII->hasStoreToStackSlot(MI, Accesses)) {
-    if (auto Size = getSize(Accesses)) {
-      CommentOS << Size << "-byte Folded Spill";
-      Commented = true;
-    }
+  Optional<unsigned> Size;
+  if ((Size = MI.getRestoreSize(TII))) {
+    CommentOS << *Size << "-byte Reload\n";
+  } else if ((Size = MI.getFoldedRestoreSize(TII))) {
+    if (*Size)
+      CommentOS << *Size << "-byte Folded Reload\n";
+  } else if ((Size = MI.getSpillSize(TII))) {
+    CommentOS << *Size << "-byte Spill\n";
+  } else if ((Size = MI.getFoldedSpillSize(TII))) {
+    if (*Size)
+      CommentOS << *Size << "-byte Folded Spill\n";
   }
 
   // Check for spill-induced copies
-  if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) {
-    Commented = true;
-    CommentOS << " Reload Reuse";
-  }
-
-  if (Commented) {
-    if (AP->EnablePrintSchedInfo) {
-      // If any comment was added above and we need sched info comment then add
-      // this new comment just after the above comment w/o "\n" between them.
-      CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n";
-      return true;
-    }
-    CommentOS << "\n";
-  }
-  return false;
+  if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
+    CommentOS << " Reload Reuse\n";
 }
 
 /// emitImplicitDef - This method emits the specified machine instruction
@@ -1093,10 +1065,8 @@ void AsmPrinter::EmitFunctionBody() {
         }
       }
 
-      if (isVerbose() && emitComments(MI, OutStreamer->GetCommentOS(), this)) {
-        MachineInstr *MIP = const_cast<MachineInstr *>(&MI);
-        MIP->setAsmPrinterFlag(MachineInstr::NoSchedComment);
-      }
+      if (isVerbose())
+        emitComments(MI, OutStreamer->GetCommentOS());
 
       switch (MI.getOpcode()) {
       case TargetOpcode::CFI_INSTRUCTION:
@@ -1105,11 +1075,13 @@ void AsmPrinter::EmitFunctionBody() {
       case TargetOpcode::LOCAL_ESCAPE:
         emitFrameAlloc(MI);
         break;
+      case TargetOpcode::ANNOTATION_LABEL:
       case TargetOpcode::EH_LABEL:
       case TargetOpcode::GC_LABEL:
         OutStreamer->EmitLabel(MI.getOperand(0).getMCSymbol());
         break;
       case TargetOpcode::INLINEASM:
+      case TargetOpcode::INLINEASM_BR:
         EmitInlineAsm(&MI);
         break;
       case TargetOpcode::DBG_VALUE:
@@ -1266,7 +1238,7 @@ static bool isGOTEquivalentCandidate(const GlobalVariable *GV,
   // GlobalVariable or Function, i.e., as GlobalValue.
   if (!GV->hasGlobalUnnamedAddr() || !GV->hasInitializer() ||
       !GV->isConstant() || !GV->isDiscardableIfUnused() ||
-      !dyn_cast<GlobalValue>(GV->getOperand(0)))
+      !isa<GlobalValue>(GV->getOperand(0)))
     return false;
 
   // To be a got equivalent, at least one of its users need to be a constant
@@ -1329,9 +1301,19 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
   else
     assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage");
 
+  bool IsFunction = GIS.getValueType()->isFunctionTy();
+
+  // Treat bitcasts of functions as functions also. This is important at least
+  // on WebAssembly where object and function addresses can't alias each other.
+  if (!IsFunction)
+    if (auto *CE = dyn_cast<ConstantExpr>(GIS.getIndirectSymbol()))
+      if (CE->getOpcode() == Instruction::BitCast)
+        IsFunction =
+          CE->getOperand(0)->getType()->getPointerElementType()->isFunctionTy();
+
   // Set the symbol type to function if the alias has a function type.
   // This affects codegen when the aliasee is not a function.
-  if (GIS.getType()->getPointerElementType()->isFunctionTy()) {
+  if (IsFunction) {
     OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction);
     if (isa<GlobalIFunc>(GIS))
       OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeIndFunction);
@@ -1363,6 +1345,66 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
   }
 }
 
+void AsmPrinter::emitRemarksSection(Module &M) {
+  RemarkStreamer *RS = M.getContext().getRemarkStreamer();
+  if (!RS)
+    return;
+  const remarks::Serializer &Serializer = RS->getSerializer();
+
+  // Switch to the right section: .remarks/__remarks.
+  MCSection *RemarksSection =
+      OutContext.getObjectFileInfo()->getRemarksSection();
+  OutStreamer->SwitchSection(RemarksSection);
+
+  // Emit the magic number.
+  OutStreamer->EmitBytes(remarks::Magic);
+  // Explicitly emit a '\0'.
+  OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
+
+  // Emit the version number: little-endian uint64_t.
+  // The version number is located at the offset 0x0 in the section.
+  std::array<char, 8> Version;
+  support::endian::write64le(Version.data(), remarks::Version);
+  OutStreamer->EmitBinaryData(StringRef(Version.data(), Version.size()));
+
+  // Emit the string table in the section.
+  // Note: we need to use the streamer here to emit it in the section. We can't
+  // just use the serialize function with a raw_ostream because of the way
+  // MCStreamers work.
+  uint64_t StrTabSize =
+      Serializer.StrTab ? Serializer.StrTab->SerializedSize : 0;
+  // Emit the total size of the string table (the size itself excluded):
+  // little-endian uint64_t.
+  // The total size is located after the version number.
+  // Note: even if no string table is used, emit 0.
+  std::array<char, 8> StrTabSizeBuf;
+  support::endian::write64le(StrTabSizeBuf.data(), StrTabSize);
+  OutStreamer->EmitBinaryData(
+      StringRef(StrTabSizeBuf.data(), StrTabSizeBuf.size()));
+
+  if (const Optional<remarks::StringTable> &StrTab = Serializer.StrTab) {
+    std::vector<StringRef> StrTabStrings = StrTab->serialize();
+    // Emit a list of null-terminated strings.
+    // Note: the order is important here: the ID used in the remarks corresponds
+    // to the position of the string in the section.
+    for (StringRef Str : StrTabStrings) {
+      OutStreamer->EmitBytes(Str);
+      // Explicitly emit a '\0'.
+      OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
+    }
+  }
+
+  // Emit the null-terminated absolute path to the remark file.
+  // The path is located at the offset 0x4 in the section.
+  StringRef FilenameRef = RS->getFilename();
+  SmallString<128> Filename = FilenameRef;
+  sys::fs::make_absolute(Filename);
+  assert(!Filename.empty() && "The filename can't be empty.");
+  OutStreamer->EmitBytes(Filename);
+  // Explicitly emit a '\0'.
+  OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
+}
+
 bool AsmPrinter::doFinalization(Module &M) {
   // Set the MachineFunction to nullptr so that we can catch attempted
   // accesses to MF specific features at the module level and so that
@@ -1394,6 +1436,12 @@ bool AsmPrinter::doFinalization(Module &M) {
     EmitVisibility(Name, V, false);
   }
 
+  // Emit the remarks section contents.
+  // FIXME: Figure out when is the safest time to emit this section. It should
+  // not come after debug info.
+  if (EnableRemarksSection)
+    emitRemarksSection(M);
+
   const TargetLoweringObjectFile &TLOF = getObjFileLowering();
 
   TLOF.emitModuleMetadata(*OutStreamer, M);
@@ -1448,7 +1496,6 @@ bool AsmPrinter::doFinalization(Module &M) {
     NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
                        HI.TimerGroupDescription, TimePassesIsEnabled);
     HI.Handler->endModule();
-    delete HI.Handler;
   }
   Handlers.clear();
   DD = nullptr;
@@ -1592,6 +1639,24 @@ bool AsmPrinter::doFinalization(Module &M) {
         OutStreamer->EmitAddrsigSym(getSymbol(&GV));
   }
 
+  // Emit symbol partition specifications (ELF only).
+  if (TM.getTargetTriple().isOSBinFormatELF()) {
+    unsigned UniqueID = 0;
+    for (const GlobalValue &GV : M.global_values()) {
+      if (!GV.hasPartition() || GV.isDeclarationForLinker() ||
+          GV.getVisibility() != GlobalValue::DefaultVisibility)
+        continue;
+
+      OutStreamer->SwitchSection(OutContext.getELFSection(
+          ".llvm_sympart", ELF::SHT_LLVM_SYMPART, 0, 0, "", ++UniqueID));
+      OutStreamer->EmitBytes(GV.getPartition());
+      OutStreamer->EmitZeros(1);
+      OutStreamer->EmitValue(
+          MCSymbolRefExpr::create(getSymbol(&GV), OutContext),
+          MAI->getCodePointerSize());
+    }
+  }
+
   // Allow the target to emit any magic that it wants at the end of the file,
   // after everything else has gone out.
   EmitEndOfAsmFile(M);
@@ -1628,11 +1693,6 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   }
 
   ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
-
-  const TargetSubtargetInfo &STI = MF.getSubtarget();
-  EnablePrintSchedInfo = PrintSchedule.getNumOccurrences()
-                             ? PrintSchedule
-                             : STI.supportPrintSchedInfo();
 }
 
 namespace {
@@ -1905,8 +1965,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
 }
 
 /// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
-/// global in the specified llvm.used list for which emitUsedDirectiveFor
-/// is true, as being used with this directive.
+/// global in the specified llvm.used list.
 void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) {
   // Should be an array of 'i8*'.
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
@@ -1933,7 +1992,7 @@ struct Structor {
 /// priority.
 void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
                                     bool isCtor) {
-  // Should be an array of '{ int, void ()* }' structs.  The first value is the
+  // Should be an array of '{ i32, void ()*, i8* }' structs.  The first value is the
   // init priority.
   if (!isa<ConstantArray>(List)) return;
 
@@ -1941,12 +2000,10 @@ void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
   const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
   if (!InitList) return; // Not an array!
   StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType());
-  // FIXME: Only allow the 3-field form in LLVM 4.0.
-  if (!ETy || ETy->getNumElements() < 2 || ETy->getNumElements() > 3)
-    return; // Not an array of two or three elements!
-  if (!isa<IntegerType>(ETy->getTypeAtIndex(0U)) ||
-      !isa<PointerType>(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr).
-  if (ETy->getNumElements() == 3 && !isa<PointerType>(ETy->getTypeAtIndex(2U)))
+  if (!ETy || ETy->getNumElements() != 3 ||
+      !isa<IntegerType>(ETy->getTypeAtIndex(0U)) ||
+      !isa<PointerType>(ETy->getTypeAtIndex(1U)) ||
+      !isa<PointerType>(ETy->getTypeAtIndex(2U)))
     return; // Not (int, ptr, ptr).
 
   // Gather the structors in a form that's convenient for sorting by priority.
@@ -1962,16 +2019,16 @@ void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
     Structor &S = Structors.back();
     S.Priority = Priority->getLimitedValue(65535);
     S.Func = CS->getOperand(1);
-    if (ETy->getNumElements() == 3 && !CS->getOperand(2)->isNullValue())
+    if (!CS->getOperand(2)->isNullValue())
       S.ComdatKey =
           dyn_cast<GlobalValue>(CS->getOperand(2)->stripPointerCasts());
   }
 
   // Emit the function pointers in the target-specific order
   unsigned Align = Log2_32(DL.getPointerPrefAlignment());
-  std::stable_sort(Structors.begin(), Structors.end(),
-                   [](const Structor &L,
-                      const Structor &R) { return L.Priority < R.Priority; });
+  llvm::stable_sort(Structors, [](const Structor &L, const Structor &R) {
+    return L.Priority < R.Priority;
+  });
   for (Structor &S : Structors) {
     const TargetLoweringObjectFile &Obj = getObjFileLowering();
     const MCSymbol *KeySym = nullptr;
@@ -2199,7 +2256,10 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
 
     // We can emit the pointer value into this slot if the slot is an
     // integer slot equal to the size of the pointer.
-    if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
+    //
+    // If the pointer is larger than the resultant integer, then
+    // as with Trunc just depend on the assembler to truncate it.
+    if (DL.getTypeAllocSize(Ty) <= DL.getTypeAllocSize(Op->getType()))
       return OpExpr;
 
     // Otherwise the pointer is smaller than the resultant integer, mask off
@@ -2740,7 +2800,7 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
 
 /// GetCPISymbol - Return the symbol for the specified constant pool entry.
 MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
-  if (getSubtargetInfo().getTargetTriple().isKnownWindowsMSVCEnvironment()) {
+  if (getSubtargetInfo().getTargetTriple().isWindowsMSVCEnvironment()) {
     const MachineConstantPoolEntry &CPE =
         MF->getConstantPool()->getConstants()[CPID];
     if (!CPE.isMachineConstantPoolEntry()) {
@@ -2858,7 +2918,7 @@ void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB,
                                          MCCodePaddingContext &Context) const {
   assert(MF != nullptr && "Machine function must be valid");
   Context.IsPaddingActive = !MF->hasInlineAsm() &&
-                            !MF->getFunction().optForSize() &&
+                            !MF->getFunction().hasOptSize() &&
                             TM.getOptLevel() != CodeGenOpt::None;
   Context.IsBasicBlockReachableViaFallthrough =
       std::find(MBB.pred_begin(), MBB.pred_end(), MBB.getPrevNode()) !=
@@ -2918,13 +2978,16 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
 
   // Print the main label for the block.
   if (MBB.pred_empty() ||
-      (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry())) {
+      (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry() &&
+       !MBB.hasLabelMustBeEmitted())) {
     if (isVerbose()) {
       // NOTE: Want this comment at start of line, don't emit with AddComment.
       OutStreamer->emitRawComment(" %bb." + Twine(MBB.getNumber()) + ":",
                                   false);
     }
   } else {
+    if (isVerbose() && MBB.hasLabelMustBeEmitted())
+      OutStreamer->AddComment("Label of block must be emitted");
     OutStreamer->EmitLabel(MBB.getSymbol());
   }
 }
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index afce3ad3133b..992e44d95306 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -1,9 +1,8 @@
 //===-- AsmPrinterDwarf.cpp - AsmPrinter Dwarf Support --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,6 +18,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
@@ -43,11 +43,11 @@ void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const {
   OutStreamer->EmitSLEB128IntValue(Value);
 }
 
-void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc) const {
+void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, unsigned PadTo) const {
   if (isVerbose() && Desc)
     OutStreamer->AddComment(Desc);
 
-  OutStreamer->EmitULEB128IntValue(Value);
+  OutStreamer->EmitULEB128IntValue(Value, PadTo);
 }
 
 /// Emit something like ".uleb128 Hi-Lo".
@@ -183,6 +183,25 @@ void AsmPrinter::EmitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const {
   EmitLabelPlusOffset(Label, Offset, MAI->getCodePointerSize());
 }
 
+void AsmPrinter::EmitCallSiteOffset(const MCSymbol *Hi,
+                                    const MCSymbol *Lo,
+                                    unsigned Encoding) const {
+  // The least significant 3 bits specify the width of the encoding
+  if ((Encoding & 0x7) == dwarf::DW_EH_PE_uleb128)
+    EmitLabelDifferenceAsULEB128(Hi, Lo);
+  else
+    EmitLabelDifference(Hi, Lo, GetSizeOfEncodedValue(Encoding));
+}
+
+void AsmPrinter::EmitCallSiteValue(uint64_t Value,
+                                   unsigned Encoding) const {
+  // The least significant 3 bits specify the width of the encoding
+  if ((Encoding & 0x7) == dwarf::DW_EH_PE_uleb128)
+    EmitULEB128(Value);
+  else
+    OutStreamer->EmitIntValue(Value, GetSizeOfEncodedValue(Encoding));
+}
+
 //===----------------------------------------------------------------------===//
 // Dwarf Lowering Routines
 //===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 62103e3107c0..7721e996aca5 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -1,9 +1,8 @@
 //===-- AsmPrinterInlineAsm.cpp - AsmPrinter Inline Asm Handling ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,7 +18,6 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
@@ -155,15 +153,10 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
                        " we don't have an asm parser for this target\n");
   Parser->setAssemblerDialect(Dialect);
   Parser->setTargetParser(*TAP.get());
-  Parser->setEnablePrintSchedInfo(EnablePrintSchedInfo);
   // Enable lexing Masm binary and hex integer literals in intel inline
   // assembly.
   if (Dialect == InlineAsm::AD_Intel)
     Parser->getLexer().setLexMasmIntegers(true);
-  if (MF) {
-    const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
-    TAP->SetFrameRegister(TRI->getFrameRegister(*MF));
-  }
 
   emitInlineAsmStart();
   // Don't implicitly switch to the text section before the asm.
@@ -176,9 +169,8 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
 }
 
 static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
-                               MachineModuleInfo *MMI, int InlineAsmVariant,
-                               AsmPrinter *AP, unsigned LocCookie,
-                               raw_ostream &OS) {
+                               MachineModuleInfo *MMI, AsmPrinter *AP,
+                               unsigned LocCookie, raw_ostream &OS) {
   // Switch to the inline assembly variant.
   OS << "\t.intel_syntax\n\t";
 
@@ -270,11 +262,9 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
         ++OpNo;  // Skip over the ID number.
 
         if (InlineAsm::isMemKind(OpFlags)) {
-          Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
-                                            /*Modifier*/ nullptr, OS);
+          Error = AP->PrintAsmMemoryOperand(MI, OpNo, /*Modifier*/ nullptr, OS);
         } else {
-          Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant,
-                                      /*Modifier*/ nullptr, OS);
+          Error = AP->PrintAsmOperand(MI, OpNo, /*Modifier*/ nullptr, OS);
         }
       }
       if (Error) {
@@ -291,9 +281,9 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
 }
 
 static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
-                                MachineModuleInfo *MMI, int InlineAsmVariant,
-                                int AsmPrinterVariant, AsmPrinter *AP,
-                                unsigned LocCookie, raw_ostream &OS) {
+                                MachineModuleInfo *MMI, int AsmPrinterVariant,
+                                AsmPrinter *AP, unsigned LocCookie,
+                                raw_ostream &OS) {
   int CurVariant = -1;            // The number of the {.|.|.} region we are in.
   const char *LastEmitted = AsmStr; // One past the last character emitted.
   unsigned NumOperands = MI->getNumOperands();
@@ -435,17 +425,25 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
           unsigned OpFlags = MI->getOperand(OpNo).getImm();
           ++OpNo;  // Skip over the ID number.
 
+          // FIXME: Shouldn't arch-independent output template handling go into
+          // PrintAsmOperand?
           if (Modifier[0] == 'l') { // Labels are target independent.
-            // FIXME: What if the operand isn't an MBB, report error?
-            const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
-            Sym->print(OS, AP->MAI);
+            if (MI->getOperand(OpNo).isBlockAddress()) {
+              const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
+              MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
+              Sym->print(OS, AP->MAI);
+            } else if (MI->getOperand(OpNo).isMBB()) {
+              const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
+              Sym->print(OS, AP->MAI);
+            } else {
+              Error = true;
+            }
           } else {
             if (InlineAsm::isMemKind(OpFlags)) {
-              Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
-                                                Modifier[0] ? Modifier : nullptr,
-                                                OS);
+              Error = AP->PrintAsmMemoryOperand(
+                  MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
             } else {
-              Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant,
+              Error = AP->PrintAsmOperand(MI, OpNo,
                                           Modifier[0] ? Modifier : nullptr, OS);
             }
           }
@@ -515,18 +513,11 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
 
   // The variant of the current asmprinter.
   int AsmPrinterVariant = MAI->getAssemblerDialect();
-  InlineAsm::AsmDialect InlineAsmVariant = MI->getInlineAsmDialect();
   AsmPrinter *AP = const_cast<AsmPrinter*>(this);
-  if (InlineAsmVariant == InlineAsm::AD_ATT)
-    EmitGCCInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AsmPrinterVariant,
-                        AP, LocCookie, OS);
+  if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT)
+    EmitGCCInlineAsmStr(AsmStr, MI, MMI, AsmPrinterVariant, AP, LocCookie, OS);
   else
-    EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS);
-
-  // Reset SanitizeAddress based on the function's attribute.
-  MCTargetOptions MCOptions = TM.Options.MCOptions;
-  MCOptions.SanitizeAddress =
-      MF->getFunction().hasFnAttribute(Attribute::SanitizeAddress);
+    EmitMSInlineAsmStr(AsmStr, MI, MMI, AP, LocCookie, OS);
 
   // Emit warnings if we use reserved registers on the clobber list, as
   // that might give surprising results.
@@ -566,7 +557,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
     SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, Note);
   }
 
-  EmitInlineAsm(OS.str(), getSubtargetInfo(), MCOptions, LocMD,
+  EmitInlineAsm(OS.str(), getSubtargetInfo(), TM.Options.MCOptions, LocMD,
                 MI->getInlineAsmDialect());
 
   // Emit the #NOAPP end marker.  This has to happen even if verbose-asm isn't
@@ -608,32 +599,50 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
   }
 }
 
+void AsmPrinter::PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS) {
+  assert(MO.isGlobal() && "caller should check MO.isGlobal");
+  getSymbol(MO.getGlobal())->print(OS, MAI);
+  printOffset(MO.getOffset(), OS);
+}
+
 /// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
 /// instruction, using the specified assembler variant.  Targets should
-/// override this to format as appropriate.
+/// override this to format as appropriate for machine specific ExtraCodes
+/// or when the arch-independent handling would be too complex otherwise.
 bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                 unsigned AsmVariant, const char *ExtraCode,
-                                 raw_ostream &O) {
+                                 const char *ExtraCode, raw_ostream &O) {
   // Does this asm operand have a single letter operand modifier?
   if (ExtraCode && ExtraCode[0]) {
     if (ExtraCode[1] != 0) return true; // Unknown modifier.
 
+    // https://gcc.gnu.org/onlinedocs/gccint/Output-Template.html
     const MachineOperand &MO = MI->getOperand(OpNo);
     switch (ExtraCode[0]) {
     default:
       return true;  // Unknown modifier.
+    case 'a': // Print as memory address.
+      if (MO.isReg()) {
+        PrintAsmMemoryOperand(MI, OpNo, nullptr, O);
+        return false;
+      }
+      LLVM_FALLTHROUGH; // GCC allows '%a' to behave like '%c' with immediates.
     case 'c': // Substitute immediate value without immediate syntax
-      if (MO.getType() != MachineOperand::MO_Immediate)
-        return true;
-      O << MO.getImm();
-      return false;
+      if (MO.isImm()) {
+        O << MO.getImm();
+        return false;
+      }
+      if (MO.isGlobal()) {
+        PrintSymbolOperand(MO, O);
+        return false;
+      }
+      return true;
     case 'n':  // Negate the immediate constant.
-      if (MO.getType() != MachineOperand::MO_Immediate)
+      if (!MO.isImm())
         return true;
       O << -MO.getImm();
       return false;
     case 's':  // The GCC deprecated s modifier
-      if (MO.getType() != MachineOperand::MO_Immediate)
+      if (!MO.isImm())
         return true;
       O << ((32 - MO.getImm()) & 31);
       return false;
@@ -643,7 +652,6 @@ bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
 }
 
 bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                                       unsigned AsmVariant,
                                        const char *ExtraCode, raw_ostream &O) {
   // Target doesn't support this yet!
   return true;
diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h
index 2163cc7e3e11..db2ff458eb2e 100644
--- a/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/ByteStreamer.h - ByteStreamer class --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -32,7 +31,7 @@ class ByteStreamer {
   // For now we're just handling the calls we need for dwarf emission/hashing.
   virtual void EmitInt8(uint8_t Byte, const Twine &Comment = "") = 0;
   virtual void EmitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0;
-  virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "") = 0;
+  virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "", unsigned PadTo = 0) = 0;
 };
 
 class APByteStreamer final : public ByteStreamer {
@@ -49,7 +48,7 @@ public:
     AP.OutStreamer->AddComment(Comment);
     AP.EmitSLEB128(DWord);
   }
-  void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
+  void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override {
     AP.OutStreamer->AddComment(Comment);
     AP.EmitULEB128(DWord);
   }
@@ -66,7 +65,7 @@ class HashingByteStreamer final : public ByteStreamer {
   void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {
     Hash.addSLEB128(DWord);
   }
-  void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
+  void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override {
     Hash.addULEB128(DWord);
   }
 };
@@ -103,9 +102,9 @@ public:
 
     }
   }
-  void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
+  void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override {
     raw_svector_ostream OSE(Buffer);
-    unsigned Length = encodeULEB128(DWord, OSE);
+    unsigned Length = encodeULEB128(DWord, OSE, PadTo);
     if (GenerateComments) {
       Comments.push_back(Comment.str());
       // Add some empty comments to keep the Buffer and Comments vectors aligned
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 8cabad4ad312..932959c311fa 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -1,9 +1,8 @@
 //===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -42,6 +41,7 @@
 #include "llvm/Config/llvm-config.h"
 #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h"
 #include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h"
 #include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
 #include "llvm/DebugInfo/CodeView/EnumTables.h"
@@ -51,6 +51,7 @@
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
 #include "llvm/DebugInfo/CodeView/TypeTableCollection.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfoMetadata.h"
@@ -67,6 +68,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/BinaryByteStream.h"
 #include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
@@ -93,6 +95,26 @@
 using namespace llvm;
 using namespace llvm::codeview;
 
+namespace {
+class CVMCAdapter : public CodeViewRecordStreamer {
+public:
+  CVMCAdapter(MCStreamer &OS) : OS(&OS) {}
+
+  void EmitBytes(StringRef Data) { OS->EmitBytes(Data); }
+
+  void EmitIntValue(uint64_t Value, unsigned Size) {
+    OS->EmitIntValueInHex(Value, Size);
+  }
+
+  void EmitBinaryData(StringRef Data) { OS->EmitBinaryData(Data); }
+
+  void AddComment(const Twine &T) { OS->AddComment(T); }
+
+private:
+  MCStreamer *OS = nullptr;
+};
+} // namespace
+
 static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
   switch (Type) {
   case Triple::ArchType::x86:
@@ -273,7 +295,7 @@ static const DISubprogram *getQualifiedNameComponents(
     StringRef ScopeName = getPrettyScopeName(Scope);
     if (!ScopeName.empty())
       QualifiedNameComponents.push_back(ScopeName);
-    Scope = Scope->getScope().resolve();
+    Scope = Scope->getScope();
   }
   return ClosestSubprogram;
 }
@@ -309,7 +331,7 @@ struct CodeViewDebug::TypeLoweringScope {
 };
 
 static std::string getFullyQualifiedName(const DIScope *Ty) {
-  const DIScope *Scope = Ty->getScope().resolve();
+  const DIScope *Scope = Ty->getScope();
   return getFullyQualifiedName(Scope, getPrettyScopeName(Ty));
 }
 
@@ -344,7 +366,7 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
   // MSVC.
   StringRef DisplayName = SP->getName().split('<').first;
 
-  const DIScope *Scope = SP->getScope().resolve();
+  const DIScope *Scope = SP->getScope();
   TypeIndex TI;
   if (const auto *Class = dyn_cast_or_null<DICompositeType>(Scope)) {
     // If the scope is a DICompositeType, then this must be a method. Member
@@ -364,8 +386,8 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
   return recordTypeIndexForDINode(SP, TI);
 }
 
-static bool isTrivial(const DICompositeType *DCTy) {
-  return ((DCTy->getFlags() & DINode::FlagTrivial) == DINode::FlagTrivial);
+static bool isNonTrivial(const DICompositeType *DCTy) {
+  return ((DCTy->getFlags() & DINode::FlagNonTrivial) == DINode::FlagNonTrivial);
 }
 
 static FunctionOptions
@@ -376,16 +398,16 @@ getFunctionOptions(const DISubroutineType *Ty,
   const DIType *ReturnTy = nullptr;
   if (auto TypeArray = Ty->getTypeArray()) {
     if (TypeArray.size())
-      ReturnTy = TypeArray[0].resolve();
+      ReturnTy = TypeArray[0];
   }
 
   if (auto *ReturnDCTy = dyn_cast_or_null<DICompositeType>(ReturnTy)) {
-    if (!isTrivial(ReturnDCTy))
+    if (isNonTrivial(ReturnDCTy))
       FO |= FunctionOptions::CxxReturnUdt;
   }
 
   // DISubroutineType is unnamed. Use DISubprogram's i.e. SPName in comparison.
-  if (ClassTy && !isTrivial(ClassTy) && SPName == ClassTy->getName()) {
+  if (ClassTy && isNonTrivial(ClassTy) && SPName == ClassTy->getName()) {
     FO |= FunctionOptions::Constructor;
 
   // TODO: put the FunctionOptions::ConstructorWithVirtualBases flag.
@@ -582,8 +604,9 @@ void CodeViewDebug::endModule() {
   clear();
 }
 
-static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S,
-    unsigned MaxFixedRecordLength = 0xF00) {
+static void
+emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S,
+                             unsigned MaxFixedRecordLength = 0xF00) {
   // The maximum CV record length is 0xFF00. Most of the strings we emit appear
   // after a fixed length portion of the record. The fixed length portion should
   // always be less than 0xF00 (3840) bytes, so truncate the string so that the
@@ -594,6 +617,13 @@ static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S,
   OS.EmitBytes(NullTerminatedString);
 }
 
+static StringRef getTypeLeafName(TypeLeafKind TypeKind) {
+  for (const EnumEntry<TypeLeafKind> &EE : getTypeLeafNames())
+    if (EE.Value == TypeKind)
+      return EE.Name;
+  return "";
+}
+
 void CodeViewDebug::emitTypeInformation() {
   if (TypeTable.empty())
     return;
@@ -610,31 +640,55 @@ void CodeViewDebug::emitTypeInformation() {
   }
 
   TypeTableCollection Table(TypeTable.records());
+  SmallString<512> CommentBlock;
+  raw_svector_ostream CommentOS(CommentBlock);
+  std::unique_ptr<ScopedPrinter> SP;
+  std::unique_ptr<TypeDumpVisitor> TDV;
+  TypeVisitorCallbackPipeline Pipeline;
+
+  if (OS.isVerboseAsm()) {
+    // To construct block comment describing the type record for readability.
+    SP = llvm::make_unique<ScopedPrinter>(CommentOS);
+    SP->setPrefix(CommentPrefix);
+    TDV = llvm::make_unique<TypeDumpVisitor>(Table, SP.get(), false);
+    Pipeline.addCallbackToPipeline(*TDV);
+  }
+
+  // To emit type record using Codeview MCStreamer adapter
+  CVMCAdapter CVMCOS(OS);
+  TypeRecordMapping typeMapping(CVMCOS);
+  Pipeline.addCallbackToPipeline(typeMapping);
+
   Optional<TypeIndex> B = Table.getFirst();
   while (B) {
     // This will fail if the record data is invalid.
     CVType Record = Table.getType(*B);
 
+    CommentBlock.clear();
+
+    auto RecordLen = Record.length();
+    auto RecordKind = Record.kind();
+    if (OS.isVerboseAsm())
+      CVMCOS.AddComment("Record length");
+    CVMCOS.EmitIntValue(RecordLen - 2, 2);
+    if (OS.isVerboseAsm())
+      CVMCOS.AddComment("Record kind: " + getTypeLeafName(RecordKind));
+    CVMCOS.EmitIntValue(RecordKind, sizeof(RecordKind));
+
+    Error E = codeview::visitTypeRecord(Record, *B, Pipeline);
+
+    if (E) {
+      logAllUnhandledErrors(std::move(E), errs(), "error: ");
+      llvm_unreachable("produced malformed type record");
+    }
+
     if (OS.isVerboseAsm()) {
-      // Emit a block comment describing the type record for readability.
-      SmallString<512> CommentBlock;
-      raw_svector_ostream CommentOS(CommentBlock);
-      ScopedPrinter SP(CommentOS);
-      SP.setPrefix(CommentPrefix);
-      TypeDumpVisitor TDV(Table, &SP, false);
-
-      Error E = codeview::visitTypeRecord(Record, *B, TDV);
-      if (E) {
-        logAllUnhandledErrors(std::move(E), errs(), "error: ");
-        llvm_unreachable("produced malformed type record");
-      }
       // emitRawComment will insert its own tab and comment string before
       // the first line, so strip off our first one. It also prints its own
       // newline.
       OS.emitRawComment(
           CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim());
     }
-    OS.EmitBinaryData(Record.str_data());
     B = Table.getNext(*B);
   }
 }
@@ -700,6 +754,8 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
     return SourceLanguage::Java;
   case dwarf::DW_LANG_D:
     return SourceLanguage::D;
+  case dwarf::DW_LANG_Swift:
+    return SourceLanguage::Swift;
   default:
     // There's no CodeView representation for this language, and CV doesn't
     // have an "unknown" option for the language field, so we'll use MASM,
@@ -973,8 +1029,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
   // If we have a display name, build the fully qualified name by walking the
   // chain of scopes.
   if (!SP->getName().empty())
-    FuncName =
-        getFullyQualifiedName(SP->getScope().resolve(), SP->getName());
+    FuncName = getFullyQualifiedName(SP->getScope(), SP->getName());
 
   // If our DISubprogram name is empty, use the mangled name.
   if (FuncName.empty())
@@ -1071,6 +1126,28 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
       endSymbolRecord(AnnotEnd);
     }
 
+    for (auto HeapAllocSite : FI.HeapAllocSites) {
+      MCSymbol *BeginLabel = std::get<0>(HeapAllocSite);
+      MCSymbol *EndLabel = std::get<1>(HeapAllocSite);
+
+      // The labels might not be defined if the instruction was replaced
+      // somewhere in the codegen pipeline.
+      if (!BeginLabel->isDefined() || !EndLabel->isDefined())
+        continue;
+
+      DIType *DITy = std::get<2>(HeapAllocSite);
+      MCSymbol *HeapAllocEnd = beginSymbolRecord(SymbolKind::S_HEAPALLOCSITE);
+      OS.AddComment("Call site offset");
+      OS.EmitCOFFSecRel32(BeginLabel, /*Offset=*/0);
+      OS.AddComment("Call site section index");
+      OS.EmitCOFFSectionIndex(BeginLabel);
+      OS.AddComment("Call instruction length");
+      OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2);
+      OS.AddComment("Type index");
+      OS.EmitIntValue(getCompleteTypeIndex(DITy).getIndex(), 4);
+      endSymbolRecord(HeapAllocEnd);
+    }
+
     if (SP != nullptr)
       emitDebugInfoForUDTs(LocalUDTs);
 
@@ -1118,9 +1195,15 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
     // If the variable has an attached offset expression, extract it.
     // FIXME: Try to handle DW_OP_deref as well.
     int64_t ExprOffset = 0;
-    if (VI.Expr)
-      if (!VI.Expr->extractIfOffset(ExprOffset))
+    bool Deref = false;
+    if (VI.Expr) {
+      // If there is one DW_OP_deref element, use offset of 0 and keep going.
+      if (VI.Expr->getNumElements() == 1 &&
+          VI.Expr->getElement(0) == llvm::dwarf::DW_OP_deref)
+        Deref = true;
+      else if (!VI.Expr->extractIfOffset(ExprOffset))
         continue;
+    }
 
     // Get the frame register used and the offset.
     unsigned FrameReg = 0;
@@ -1130,6 +1213,7 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
     // Calculate the label ranges.
     LocalVarDefRange DefRange =
         createDefRangeMem(CVReg, FrameOffset + ExprOffset);
+
     for (const InsnRange &Range : Scope->getRanges()) {
       const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
       const MCSymbol *End = getLabelAfterInsn(Range.second);
@@ -1140,6 +1224,9 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
     LocalVariable Var;
     Var.DIVar = VI.Var;
     Var.DefRanges.emplace_back(std::move(DefRange));
+    if (Deref)
+      Var.UseReferenceType = true;
+
     recordLocalVariable(std::move(Var), Scope);
   }
 }
@@ -1153,13 +1240,15 @@ static bool needsReferenceType(const DbgVariableLocation &Loc) {
 }
 
 void CodeViewDebug::calculateRanges(
-    LocalVariable &Var, const DbgValueHistoryMap::InstrRanges &Ranges) {
+    LocalVariable &Var, const DbgValueHistoryMap::Entries &Entries) {
   const TargetRegisterInfo *TRI = Asm->MF->getSubtarget().getRegisterInfo();
 
   // Calculate the definition ranges.
-  for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
-    const InsnRange &Range = *I;
-    const MachineInstr *DVInst = Range.first;
+  for (auto I = Entries.begin(), E = Entries.end(); I != E; ++I) {
+    const auto &Entry = *I;
+    if (!Entry.isDbgValue())
+      continue;
+    const MachineInstr *DVInst = Entry.getInstr();
     assert(DVInst->isDebugValue() && "Invalid History entry");
     // FIXME: Find a way to represent constant variables, since they are
     // relatively common.
@@ -1186,7 +1275,7 @@ void CodeViewDebug::calculateRanges(
       // Start over using that.
       Var.UseReferenceType = true;
       Var.DefRanges.clear();
-      calculateRanges(Var, Ranges);
+      calculateRanges(Var, Entries);
       return;
     }
 
@@ -1214,21 +1303,15 @@ void CodeViewDebug::calculateRanges(
     }
 
     // Compute the label range.
-    const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
-    const MCSymbol *End = getLabelAfterInsn(Range.second);
-    if (!End) {
-      // This range is valid until the next overlapping bitpiece. In the
-      // common case, ranges will not be bitpieces, so they will overlap.
-      auto J = std::next(I);
-      const DIExpression *DIExpr = DVInst->getDebugExpression();
-      while (J != E &&
-             !DIExpr->fragmentsOverlap(J->first->getDebugExpression()))
-        ++J;
-      if (J != E)
-        End = getLabelBeforeInsn(J->first);
-      else
-        End = Asm->getFunctionEnd();
-    }
+    const MCSymbol *Begin = getLabelBeforeInsn(Entry.getInstr());
+    const MCSymbol *End;
+    if (Entry.getEndIndex() != DbgValueHistoryMap::NoEntry) {
+      auto &EndingEntry = Entries[Entry.getEndIndex()];
+      End = EndingEntry.isDbgValue()
+                ? getLabelBeforeInsn(EndingEntry.getInstr())
+                : getLabelAfterInsn(EndingEntry.getInstr());
+    } else
+      End = Asm->getFunctionEnd();
 
     // If the last range end is our begin, just extend the last range.
     // Otherwise make a new range.
@@ -1256,7 +1339,7 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
     const DILocation *InlinedAt = IV.second;
 
     // Instruction ranges, specifying where IV is accessible.
-    const auto &Ranges = I.second;
+    const auto &Entries = I.second;
 
     LexicalScope *Scope = nullptr;
     if (InlinedAt)
@@ -1270,7 +1353,7 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
     LocalVariable Var;
     Var.DIVar = DIVar;
 
-    calculateRanges(Var, Ranges);
+    calculateRanges(Var, Entries);
     recordLocalVariable(std::move(Var), Scope);
   }
 }
@@ -1340,8 +1423,8 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
     FPO |= FrameProcedureOptions::SecurityChecks;
   FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedLocalFramePtrReg) << 14U);
   FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedParamFramePtrReg) << 16U);
-  if (Asm->TM.getOptLevel() != CodeGenOpt::None && !GV.optForSize() &&
-      !GV.hasFnAttribute(Attribute::OptimizeNone))
+  if (Asm->TM.getOptLevel() != CodeGenOpt::None &&
+      !GV.hasOptSize() && !GV.hasOptNone())
     FPO |= FrameProcedureOptions::OptimizedForSpeed;
   // FIXME: Set GuardCfg when it is implemented.
   CurFn->FrameProcOpts = FPO;
@@ -1379,7 +1462,7 @@ static bool shouldEmitUdt(const DIType *T) {
 
   // MSVC does not emit UDTs for typedefs that are scoped to classes.
   if (T->getTag() == dwarf::DW_TAG_typedef) {
-    if (DIScope *Scope = T->getScope().resolve()) {
+    if (DIScope *Scope = T->getScope()) {
       switch (Scope->getTag()) {
       case dwarf::DW_TAG_structure_type:
       case dwarf::DW_TAG_class_type:
@@ -1396,7 +1479,7 @@ static bool shouldEmitUdt(const DIType *T) {
     const DIDerivedType *DT = dyn_cast<DIDerivedType>(T);
     if (!DT)
       return true;
-    T = DT->getBaseType().resolve();
+    T = DT->getBaseType();
   }
   return true;
 }
@@ -1409,8 +1492,8 @@ void CodeViewDebug::addToUDTs(const DIType *Ty) {
     return;
 
   SmallVector<StringRef, 5> QualifiedNameComponents;
-  const DISubprogram *ClosestSubprogram = getQualifiedNameComponents(
-      Ty->getScope().resolve(), QualifiedNameComponents);
+  const DISubprogram *ClosestSubprogram =
+      getQualifiedNameComponents(Ty->getScope(), QualifiedNameComponents);
 
   std::string FullyQualifiedName =
       getQualifiedName(QualifiedNameComponents, getPrettyScopeName(Ty));
@@ -1479,8 +1562,7 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
 }
 
 TypeIndex CodeViewDebug::lowerTypeAlias(const DIDerivedType *Ty) {
-  DITypeRef UnderlyingTypeRef = Ty->getBaseType();
-  TypeIndex UnderlyingTypeIndex = getTypeIndex(UnderlyingTypeRef);
+  TypeIndex UnderlyingTypeIndex = getTypeIndex(Ty->getBaseType());
   StringRef TypeName = Ty->getName();
 
   addToUDTs(Ty);
@@ -1496,14 +1578,14 @@ TypeIndex CodeViewDebug::lowerTypeAlias(const DIDerivedType *Ty) {
 }
 
 TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
-  DITypeRef ElementTypeRef = Ty->getBaseType();
-  TypeIndex ElementTypeIndex = getTypeIndex(ElementTypeRef);
+  const DIType *ElementType = Ty->getBaseType();
+  TypeIndex ElementTypeIndex = getTypeIndex(ElementType);
   // IndexType is size_t, which depends on the bitness of the target.
   TypeIndex IndexType = getPointerSizeInBytes() == 8
                             ? TypeIndex(SimpleTypeKind::UInt64Quad)
                             : TypeIndex(SimpleTypeKind::UInt32Long);
 
-  uint64_t ElementSize = getBaseTypeSize(ElementTypeRef) / 8;
+  uint64_t ElementSize = getBaseTypeSize(ElementType) / 8;
 
   // Add subranges to array type.
   DINodeArray Elements = Ty->getElements();
@@ -1764,7 +1846,7 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {
       break;
     }
     if (IsModifier)
-      BaseTy = cast<DIDerivedType>(BaseTy)->getBaseType().resolve();
+      BaseTy = cast<DIDerivedType>(BaseTy)->getBaseType();
   }
 
   // Check if the inner type will use an LF_POINTER record. If so, the
@@ -1797,8 +1879,8 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {
 
 TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
   SmallVector<TypeIndex, 8> ReturnAndArgTypeIndices;
-  for (DITypeRef ArgTypeRef : Ty->getTypeArray())
-    ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef));
+  for (const DIType *ArgType : Ty->getTypeArray())
+    ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgType));
 
   // MSVC uses type none for variadic argument.
   if (ReturnAndArgTypeIndices.size() > 1 &&
@@ -1836,7 +1918,10 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
 
   unsigned Index = 0;
   SmallVector<TypeIndex, 8> ArgTypeIndices;
-  TypeIndex ReturnTypeIndex = getTypeIndex(ReturnAndArgs[Index++]);
+  TypeIndex ReturnTypeIndex = TypeIndex::Void();
+  if (ReturnAndArgs.size() > Index) {
+    ReturnTypeIndex = getTypeIndex(ReturnAndArgs[Index++]);
+  }
 
   // If the first argument is a pointer type and this isn't a static method,
   // treat it as the special 'this' parameter, which is encoded separately from
@@ -1844,7 +1929,7 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
   TypeIndex ThisTypeIndex;
   if (!IsStaticMethod && ReturnAndArgs.size() > Index) {
     if (const DIDerivedType *PtrTy =
-            dyn_cast_or_null<DIDerivedType>(ReturnAndArgs[Index].resolve())) {
+            dyn_cast_or_null<DIDerivedType>(ReturnAndArgs[Index])) {
       if (PtrTy->getTag() == dwarf::DW_TAG_pointer_type) {
         ThisTypeIndex = getTypeIndexForThisPtr(PtrTy, Ty);
         Index++;
@@ -1942,7 +2027,7 @@ static ClassOptions getCommonClassOptions(const DICompositeType *Ty) {
   // Put the Nested flag on a type if it appears immediately inside a tag type.
   // Do not walk the scope chain. Do not attempt to compute ContainsNestedClass
   // here. That flag is only set on definitions, and not forward declarations.
-  const DIScope *ImmediateScope = Ty->getScope().resolve();
+  const DIScope *ImmediateScope = Ty->getScope();
   if (ImmediateScope && isa<DICompositeType>(ImmediateScope))
     CO |= ClassOptions::Nested;
 
@@ -1955,7 +2040,7 @@ static ClassOptions getCommonClassOptions(const DICompositeType *Ty) {
       CO |= ClassOptions::Scoped;
   } else {
     for (const DIScope *Scope = ImmediateScope; Scope != nullptr;
-         Scope = Scope->getScope().resolve()) {
+         Scope = Scope->getScope()) {
       if (isa<DISubprogram>(Scope)) {
         CO |= ClassOptions::Scoped;
         break;
@@ -2075,7 +2160,7 @@ void CodeViewDebug::collectMemberInfo(ClassInfo &Info,
   // succeeds, and drop the member if that fails.
   assert((DDTy->getOffsetInBits() % 8) == 0 && "Unnamed bitfield member!");
   uint64_t Offset = DDTy->getOffsetInBits();
-  const DIType *Ty = DDTy->getBaseType().resolve();
+  const DIType *Ty = DDTy->getBaseType();
   bool FullyResolved = false;
   while (!FullyResolved) {
     switch (Ty->getTag()) {
@@ -2083,7 +2168,7 @@ void CodeViewDebug::collectMemberInfo(ClassInfo &Info,
     case dwarf::DW_TAG_volatile_type:
       // FIXME: we should apply the qualifier types to the indirect fields
       // rather than dropping them.
-      Ty = cast<DIDerivedType>(Ty)->getBaseType().resolve();
+      Ty = cast<DIDerivedType>(Ty)->getBaseType();
       break;
     default:
       FullyResolved = true;
@@ -2184,6 +2269,14 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) {
   if (ContainsNestedClass)
     CO |= ClassOptions::ContainsNestedClass;
 
+  // MSVC appears to set this flag by searching any destructor or method with
+  // FunctionOptions::Constructor among the emitted members. Clang AST has all
+  // the members, however special member functions are not yet emitted into 
+  // debug information. For now checking a class's non-triviality seems enough.
+  // FIXME: not true for a nested unnamed struct.
+  if (isNonTrivial(Ty))
+    CO |= ClassOptions::HasConstructorOrDestructor;
+
   std::string FullName = getFullyQualifiedName(Ty);
 
   uint64_t SizeInBytes = Ty->getSizeInBits() / 8;
@@ -2358,7 +2451,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
 
   // Create nested classes.
   for (const DIType *Nested : Info.NestedTypes) {
-    NestedTypeRecord R(getTypeIndex(DITypeRef(Nested)), Nested->getName());
+    NestedTypeRecord R(getTypeIndex(Nested), Nested->getName());
     ContinuationBuilder.writeMemberType(R);
     MemberCount++;
   }
@@ -2385,10 +2478,7 @@ TypeIndex CodeViewDebug::getVBPTypeIndex() {
   return VBPType;
 }
 
-TypeIndex CodeViewDebug::getTypeIndex(DITypeRef TypeRef, DITypeRef ClassTyRef) {
-  const DIType *Ty = TypeRef.resolve();
-  const DIType *ClassTy = ClassTyRef.resolve();
-
+TypeIndex CodeViewDebug::getTypeIndex(const DIType *Ty, const DIType *ClassTy) {
   // The null DIType is the void type. Don't try to hash it.
   if (!Ty)
     return TypeIndex::Void();
@@ -2431,8 +2521,7 @@ CodeViewDebug::getTypeIndexForThisPtr(const DIDerivedType *PtrTy,
   return recordTypeIndexForDINode(PtrTy, TI, SubroutineTy);
 }
 
-TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(DITypeRef TypeRef) {
-  DIType *Ty = TypeRef.resolve();
+TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(const DIType *Ty) {
   PointerRecord PR(getTypeIndex(Ty),
                    getPointerSizeInBytes() == 8 ? PointerKind::Near64
                                                 : PointerKind::Near32,
@@ -2441,9 +2530,7 @@ TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(DITypeRef TypeRef) {
   return TypeTable.writeLeafType(PR);
 }
 
-TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
-  const DIType *Ty = TypeRef.resolve();
-
+TypeIndex CodeViewDebug::getCompleteTypeIndex(const DIType *Ty) {
   // The null DIType is the void type. Don't try to hash it.
   if (!Ty)
     return TypeIndex::Void();
@@ -2454,7 +2541,7 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
   if (Ty->getTag() == dwarf::DW_TAG_typedef)
     (void)getTypeIndex(Ty);
   while (Ty->getTag() == dwarf::DW_TAG_typedef)
-    Ty = cast<DIDerivedType>(Ty)->getBaseType().resolve();
+    Ty = cast<DIDerivedType>(Ty)->getBaseType();
 
   // If this is a non-record type, the complete type index is the same as the
   // normal type index. Just call getTypeIndex.
@@ -2467,11 +2554,7 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
     return getTypeIndex(Ty);
   }
 
-  // Check if we've already translated the complete record type.
   const auto *CTy = cast<DICompositeType>(Ty);
-  auto InsertResult = CompleteTypeIndices.insert({CTy, TypeIndex()});
-  if (!InsertResult.second)
-    return InsertResult.first->second;
 
   TypeLoweringScope S(*this);
 
@@ -2489,6 +2572,13 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
       return FwdDeclTI;
   }
 
+  // Check if we've already translated the complete record type.
+  // Insert the type with a null TypeIndex to signify that the type is currently
+  // being lowered.
+  auto InsertResult = CompleteTypeIndices.insert({CTy, TypeIndex()});
+  if (!InsertResult.second)
+    return InsertResult.first->second;
+
   TypeIndex TI;
   switch (CTy->getTag()) {
   case dwarf::DW_TAG_class_type:
@@ -2799,6 +2889,7 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
   }
 
   CurFn->Annotations = MF->getCodeViewAnnotations();
+  CurFn->HeapAllocSites = MF->getCodeViewHeapAllocSites();
 
   CurFn->End = Asm->getFunctionEnd();
 
@@ -2914,10 +3005,19 @@ void CodeViewDebug::collectGlobalVariableInfo() {
   for (const MDNode *Node : CUs->operands()) {
     const auto *CU = cast<DICompileUnit>(Node);
     for (const auto *GVE : CU->getGlobalVariables()) {
+      const DIGlobalVariable *DIGV = GVE->getVariable();
+      const DIExpression *DIE = GVE->getExpression();
+
+      // Emit constant global variables in a global symbol section.
+      if (GlobalMap.count(GVE) == 0 && DIE->isConstant()) {
+        CVGlobalVariable CVGV = {DIGV, DIE};
+        GlobalVariables.emplace_back(std::move(CVGV));
+      }
+
       const auto *GV = GlobalMap.lookup(GVE);
       if (!GV || GV->isDeclarationForLinker())
         continue;
-      const DIGlobalVariable *DIGV = GVE->getVariable();
+
       DIScope *Scope = DIGV->getScope();
       SmallVector<CVGlobalVariable, 1> *VariableList;
       if (Scope && isa<DILocalScope>(Scope)) {
@@ -2932,7 +3032,7 @@ void CodeViewDebug::collectGlobalVariableInfo() {
         // Emit this global variable into a COMDAT section.
         VariableList = &ComdatVariables;
       else
-        // Emit this globla variable in a single global symbol section.
+        // Emit this global variable in a single global symbol section.
         VariableList = &GlobalVariables;
       CVGlobalVariable CVGV = {DIGV, GV};
       VariableList->emplace_back(std::move(CVGV));
@@ -2955,13 +3055,14 @@ void CodeViewDebug::emitDebugInfoForGlobals() {
   // Second, emit each global that is in a comdat into its own .debug$S
   // section along with its own symbol substream.
   for (const CVGlobalVariable &CVGV : ComdatVariables) {
-    MCSymbol *GVSym = Asm->getSymbol(CVGV.GV);
+    const GlobalVariable *GV = CVGV.GVInfo.get<const GlobalVariable *>();
+    MCSymbol *GVSym = Asm->getSymbol(GV);
     OS.AddComment("Symbol subsection for " +
-            Twine(GlobalValue::dropLLVMManglingEscape(CVGV.GV->getName())));
+                  Twine(GlobalValue::dropLLVMManglingEscape(GV->getName())));
     switchToDebugSectionForSymbol(GVSym);
     MCSymbol *EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
     // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
-    emitDebugInfoForGlobal(CVGV.DIGV, CVGV.GV, GVSym);
+    emitDebugInfoForGlobal(CVGV);
     endCVSubsection(EndLabel);
   }
 }
@@ -2981,31 +3082,63 @@ void CodeViewDebug::emitDebugInfoForRetainedTypes() {
 // Emit each global variable in the specified array.
 void CodeViewDebug::emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals) {
   for (const CVGlobalVariable &CVGV : Globals) {
-    MCSymbol *GVSym = Asm->getSymbol(CVGV.GV);
     // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
-    emitDebugInfoForGlobal(CVGV.DIGV, CVGV.GV, GVSym);
-  }
-}
-
-void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
-                                           const GlobalVariable *GV,
-                                           MCSymbol *GVSym) {
-  // DataSym record, see SymbolRecord.h for more info. Thread local data
-  // happens to have the same format as global data.
-  SymbolKind DataSym = GV->isThreadLocal()
-                           ? (DIGV->isLocalToUnit() ? SymbolKind::S_LTHREAD32
-                                                    : SymbolKind::S_GTHREAD32)
-                           : (DIGV->isLocalToUnit() ? SymbolKind::S_LDATA32
-                                                    : SymbolKind::S_GDATA32);
-  MCSymbol *DataEnd = beginSymbolRecord(DataSym);
-  OS.AddComment("Type");
-  OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4);
-  OS.AddComment("DataOffset");
-  OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0);
-  OS.AddComment("Segment");
-  OS.EmitCOFFSectionIndex(GVSym);
-  OS.AddComment("Name");
-  const unsigned LengthOfDataRecord = 12;
-  emitNullTerminatedSymbolName(OS, DIGV->getName(), LengthOfDataRecord);
-  endSymbolRecord(DataEnd);
+    emitDebugInfoForGlobal(CVGV);
+  }
+}
+
+void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
+  const DIGlobalVariable *DIGV = CVGV.DIGV;
+  if (const GlobalVariable *GV =
+          CVGV.GVInfo.dyn_cast<const GlobalVariable *>()) {
+    // DataSym record, see SymbolRecord.h for more info. Thread local data
+    // happens to have the same format as global data.
+    MCSymbol *GVSym = Asm->getSymbol(GV);
+    SymbolKind DataSym = GV->isThreadLocal()
+                             ? (DIGV->isLocalToUnit() ? SymbolKind::S_LTHREAD32
+                                                      : SymbolKind::S_GTHREAD32)
+                             : (DIGV->isLocalToUnit() ? SymbolKind::S_LDATA32
+                                                      : SymbolKind::S_GDATA32);
+    MCSymbol *DataEnd = beginSymbolRecord(DataSym);
+    OS.AddComment("Type");
+    OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4);
+    OS.AddComment("DataOffset");
+    OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0);
+    OS.AddComment("Segment");
+    OS.EmitCOFFSectionIndex(GVSym);
+    OS.AddComment("Name");
+    const unsigned LengthOfDataRecord = 12;
+    emitNullTerminatedSymbolName(OS, DIGV->getName(), LengthOfDataRecord);
+    endSymbolRecord(DataEnd);
+  } else {
+    // FIXME: Currently this only emits the global variables in the IR metadata.
+    // This should also emit enums and static data members.
+    const DIExpression *DIE = CVGV.GVInfo.get<const DIExpression *>();
+    assert(DIE->isConstant() &&
+           "Global constant variables must contain a constant expression.");
+    uint64_t Val = DIE->getElement(1);
+
+    MCSymbol *SConstantEnd = beginSymbolRecord(SymbolKind::S_CONSTANT);
+    OS.AddComment("Type");
+    OS.EmitIntValue(getTypeIndex(DIGV->getType()).getIndex(), 4);
+    OS.AddComment("Value");
+
+    // Encoded integers shouldn't need more than 10 bytes.
+    uint8_t data[10];
+    BinaryStreamWriter Writer(data, llvm::support::endianness::little);
+    CodeViewRecordIO IO(Writer);
+    cantFail(IO.mapEncodedInteger(Val));
+    StringRef SRef((char *)data, Writer.getOffset());
+    OS.EmitBinaryData(SRef);
+
+    OS.AddComment("Name");
+    const DIScope *Scope = DIGV->getScope();
+    // For static data members, get the scope from the declaration.
+    if (const auto *MemberDecl = dyn_cast_or_null<DIDerivedType>(
+            DIGV->getRawStaticDataMemberDeclaration()))
+      Scope = MemberDecl->getScope();
+    emitNullTerminatedSymbolName(OS,
+                                 getFullyQualifiedName(Scope, DIGV->getName()));
+    endSymbolRecord(SConstantEnd);
+  }
 }
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 21557ed1be35..ce57b789d7fa 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -1,9 +1,8 @@
 //===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,6 +17,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
@@ -101,7 +101,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
 
   struct CVGlobalVariable {
     const DIGlobalVariable *DIGV;
-    const GlobalVariable *GV;
+    PointerUnion<const GlobalVariable *, const DIExpression *> GVInfo;
   };
 
   struct InlineSite {
@@ -148,6 +148,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
     SmallVector<LexicalBlock *, 1> ChildBlocks;
 
     std::vector<std::pair<MCSymbol *, MDNode *>> Annotations;
+    std::vector<std::tuple<MCSymbol *, MCSymbol *, DIType *>> HeapAllocSites;
 
     const MCSymbol *Begin = nullptr;
     const MCSymbol *End = nullptr;
@@ -223,7 +224,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   codeview::TypeIndex getFuncIdForSubprogram(const DISubprogram *SP);
 
   void calculateRanges(LocalVariable &Var,
-                       const DbgValueHistoryMap::InstrRanges &Ranges);
+                       const DbgValueHistoryMap::Entries &Entries);
 
   static void collectInlineSiteChildren(SmallVectorImpl<unsigned> &Children,
                                         const FunctionInfo &FI,
@@ -313,8 +314,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
 
   void emitDebugInfoForGlobals();
   void emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals);
-  void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
-                              const GlobalVariable *GV, MCSymbol *GVSym);
+  void emitDebugInfoForGlobal(const CVGlobalVariable &CVGV);
 
   /// Opens a subsection of the given kind in a .debug$S codeview section.
   /// Returns an end label for use with endCVSubsection when the subsection is
@@ -373,14 +373,14 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
 
   /// Translates the DIType to codeview if necessary and returns a type index
   /// for it.
-  codeview::TypeIndex getTypeIndex(DITypeRef TypeRef,
-                                   DITypeRef ClassTyRef = DITypeRef());
+  codeview::TypeIndex getTypeIndex(const DIType *Ty,
+                                   const DIType *ClassTy = nullptr);
 
   codeview::TypeIndex
   getTypeIndexForThisPtr(const DIDerivedType *PtrTy,
                          const DISubroutineType *SubroutineTy);
 
-  codeview::TypeIndex getTypeIndexForReferenceTo(DITypeRef TypeRef);
+  codeview::TypeIndex getTypeIndexForReferenceTo(const DIType *Ty);
 
   codeview::TypeIndex getMemberFunctionType(const DISubprogram *SP,
                                             const DICompositeType *Class);
@@ -419,7 +419,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   /// use this entry point when generating symbol records. The complete and
   /// incomplete type indices only differ for record types. All other types use
   /// the same index.
-  codeview::TypeIndex getCompleteTypeIndex(DITypeRef TypeRef);
+  codeview::TypeIndex getCompleteTypeIndex(const DIType *Ty);
 
   codeview::TypeIndex lowerCompleteTypeClass(const DICompositeType *Ty);
   codeview::TypeIndex lowerCompleteTypeUnion(const DICompositeType *Ty);
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index e27659494f08..f4134da48caa 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -1,9 +1,8 @@
 //===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -212,7 +211,7 @@ const DIE *DIE::getUnitDie() const {
   return nullptr;
 }
 
-const DIEUnit *DIE::getUnit() const {
+DIEUnit *DIE::getUnit() const {
   const DIE *UnitDie = getUnitDie();
   if (UnitDie)
     return UnitDie->Owner.dyn_cast<DIEUnit*>();
@@ -506,6 +505,23 @@ unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
 LLVM_DUMP_METHOD
 void DIELabel::print(raw_ostream &O) const { O << "Lbl: " << Label->getName(); }
 
+//===----------------------------------------------------------------------===//
+// DIEBaseTypeRef Implementation
+//===----------------------------------------------------------------------===//
+
+void DIEBaseTypeRef::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+  uint64_t Offset = CU->ExprRefedBaseTypes[Index].Die->getOffset();
+  assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
+  AP->EmitULEB128(Offset, nullptr, ULEB128PadSize);
+}
+
+unsigned DIEBaseTypeRef::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+  return ULEB128PadSize;
+}
+
+LLVM_DUMP_METHOD
+void DIEBaseTypeRef::print(raw_ostream &O) const { O << "BaseTypeRef: " << Index; }
+
 //===----------------------------------------------------------------------===//
 // DIEDelta Implementation
 //===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp
index b8f1202494d7..bfac8850a2a6 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/DIEHash.cpp - Dwarf Hashing Framework ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -226,7 +225,7 @@ void DIEHash::hashLocList(const DIELocList &LocList) {
   DwarfDebug &DD = *AP->getDwarfDebug();
   const DebugLocStream &Locs = DD.getDebugLocs();
   for (const auto &Entry : Locs.getEntries(Locs.getList(LocList.getValue())))
-    DD.emitDebugLocEntry(Streamer, Entry);
+    DD.emitDebugLocEntry(Streamer, Entry, nullptr);
 }
 
 // Hash an individual attribute \param Attr based on the type of attribute and
@@ -310,6 +309,7 @@ void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) {
     // FIXME: It's uncertain whether or not we should handle this at the moment.
   case DIEValue::isExpr:
   case DIEValue::isLabel:
+  case DIEValue::isBaseTypeRef:
   case DIEValue::isDelta:
     llvm_unreachable("Add support for additional value types.");
   }
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h
index dae517ab2c29..2e49514c98be 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/DIEHash.h - Dwarf Hashing Framework -------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index 09867822c30a..ddd60575b6c0 100644
--- a/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -1,15 +1,15 @@
 //===- llvm/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -31,51 +31,62 @@ using namespace llvm;
 
 #define DEBUG_TYPE "dwarfdebug"
 
+namespace {
+using EntryIndex = DbgValueHistoryMap::EntryIndex;
+}
+
 // If @MI is a DBG_VALUE with debug value described by a
 // defined register, returns the number of this register.
 // In the other case, returns 0.
-static unsigned isDescribedByReg(const MachineInstr &MI) {
+static Register isDescribedByReg(const MachineInstr &MI) {
   assert(MI.isDebugValue());
   assert(MI.getNumOperands() == 4);
+  // If the location of variable is an entry value (DW_OP_entry_value)
+  // do not consider it as a register location.
+  if (MI.getDebugExpression()->isEntryValue())
+    return 0;
   // If location of variable is described using a register (directly or
   // indirectly), this register is always a first operand.
-  return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
+  return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : Register();
 }
 
-void DbgValueHistoryMap::startInstrRange(InlinedEntity Var,
-                                         const MachineInstr &MI) {
+bool DbgValueHistoryMap::startDbgValue(InlinedEntity Var,
+                                       const MachineInstr &MI,
+                                       EntryIndex &NewIndex) {
   // Instruction range should start with a DBG_VALUE instruction for the
   // variable.
   assert(MI.isDebugValue() && "not a DBG_VALUE");
-  auto &Ranges = VarInstrRanges[Var];
-  if (!Ranges.empty() && Ranges.back().second == nullptr &&
-      Ranges.back().first->isIdenticalTo(MI)) {
+  auto &Entries = VarEntries[Var];
+  if (!Entries.empty() && Entries.back().isDbgValue() &&
+      !Entries.back().isClosed() &&
+      Entries.back().getInstr()->isIdenticalTo(MI)) {
     LLVM_DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n"
-                      << "\t" << Ranges.back().first << "\t" << MI << "\n");
-    return;
+                      << "\t" << Entries.back().getInstr() << "\t" << MI
+                      << "\n");
+    return false;
   }
-  Ranges.push_back(std::make_pair(&MI, nullptr));
+  Entries.emplace_back(&MI, Entry::DbgValue);
+  NewIndex = Entries.size() - 1;
+  return true;
 }
 
-void DbgValueHistoryMap::endInstrRange(InlinedEntity Var,
-                                       const MachineInstr &MI) {
-  auto &Ranges = VarInstrRanges[Var];
-  // Verify that the current instruction range is not yet closed.
-  assert(!Ranges.empty() && Ranges.back().second == nullptr);
-  // For now, instruction ranges are not allowed to cross basic block
-  // boundaries.
-  assert(Ranges.back().first->getParent() == MI.getParent());
-  Ranges.back().second = &MI;
+EntryIndex DbgValueHistoryMap::startClobber(InlinedEntity Var,
+                                            const MachineInstr &MI) {
+  auto &Entries = VarEntries[Var];
+  // If an instruction clobbers multiple registers that the variable is
+  // described by, then we may have already created a clobbering instruction.
+  if (Entries.back().isClobber() && Entries.back().getInstr() == &MI)
+    return Entries.size() - 1;
+  Entries.emplace_back(&MI, Entry::Clobber);
+  return Entries.size() - 1;
 }
 
-unsigned DbgValueHistoryMap::getRegisterForVar(InlinedEntity Var) const {
-  const auto &I = VarInstrRanges.find(Var);
-  if (I == VarInstrRanges.end())
-    return 0;
-  const auto &Ranges = I->second;
-  if (Ranges.empty() || Ranges.back().second != nullptr)
-    return 0;
-  return isDescribedByReg(*Ranges.back().first);
+void DbgValueHistoryMap::Entry::endEntry(EntryIndex Index) {
+  // For now, instruction ranges are not allowed to cross basic block
+  // boundaries.
+  assert(isDbgValue() && "Setting end index for non-debug value");
+  assert(!isClosed() && "End index has already been set");
+  EndIndex = Index;
 }
 
 void DbgLabelInstrMap::addInstr(InlinedEntity Label, const MachineInstr &MI) {
@@ -89,6 +100,12 @@ namespace {
 using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
 using RegDescribedVarsMap = std::map<unsigned, SmallVector<InlinedEntity, 1>>;
 
+// Keeps track of the debug value entries that are currently live for each
+// inlined entity. As the history map entries are stored in a SmallVector, they
+// may be moved at insertion of new entries, so store indices rather than
+// pointers.
+using DbgValueEntriesMap = std::map<InlinedEntity, SmallSet<EntryIndex, 1>>;
+
 } // end anonymous namespace
 
 // Claim that @Var is not described by @RegNo anymore.
@@ -114,16 +131,88 @@ static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
   VarSet.push_back(Var);
 }
 
+/// Create a clobbering entry and end all open debug value entries
+/// for \p Var that are described by \p RegNo using that entry.
+static void clobberRegEntries(InlinedEntity Var, unsigned RegNo,
+                              const MachineInstr &ClobberingInstr,
+                              DbgValueEntriesMap &LiveEntries,
+                              DbgValueHistoryMap &HistMap) {
+  EntryIndex ClobberIndex = HistMap.startClobber(Var, ClobberingInstr);
+
+  // Close all entries whose values are described by the register.
+  SmallVector<EntryIndex, 4> IndicesToErase;
+  for (auto Index : LiveEntries[Var]) {
+    auto &Entry = HistMap.getEntry(Var, Index);
+    assert(Entry.isDbgValue() && "Not a DBG_VALUE in LiveEntries");
+    if (isDescribedByReg(*Entry.getInstr()) == RegNo) {
+      IndicesToErase.push_back(Index);
+      Entry.endEntry(ClobberIndex);
+    }
+  }
+
+  // Drop all entries that have ended.
+  for (auto Index : IndicesToErase)
+    LiveEntries[Var].erase(Index);
+}
+
+/// Add a new debug value for \p Var. Closes all overlapping debug values.
+static void handleNewDebugValue(InlinedEntity Var, const MachineInstr &DV,
+                                RegDescribedVarsMap &RegVars,
+                                DbgValueEntriesMap &LiveEntries,
+                                DbgValueHistoryMap &HistMap) {
+  EntryIndex NewIndex;
+  if (HistMap.startDbgValue(Var, DV, NewIndex)) {
+    SmallDenseMap<unsigned, bool, 4> TrackedRegs;
+
+    // If we have created a new debug value entry, close all preceding
+    // live entries that overlap.
+    SmallVector<EntryIndex, 4> IndicesToErase;
+    const DIExpression *DIExpr = DV.getDebugExpression();
+    for (auto Index : LiveEntries[Var]) {
+      auto &Entry = HistMap.getEntry(Var, Index);
+      assert(Entry.isDbgValue() && "Not a DBG_VALUE in LiveEntries");
+      const MachineInstr &DV = *Entry.getInstr();
+      bool Overlaps = DIExpr->fragmentsOverlap(DV.getDebugExpression());
+      if (Overlaps) {
+        IndicesToErase.push_back(Index);
+        Entry.endEntry(NewIndex);
+      }
+      if (unsigned Reg = isDescribedByReg(DV))
+        TrackedRegs[Reg] |= !Overlaps;
+    }
+
+    // If the new debug value is described by a register, add tracking of
+    // that register if it is not already tracked.
+    if (unsigned NewReg = isDescribedByReg(DV)) {
+      if (!TrackedRegs.count(NewReg))
+        addRegDescribedVar(RegVars, NewReg, Var);
+      LiveEntries[Var].insert(NewIndex);
+      TrackedRegs[NewReg] = true;
+    }
+
+    // Drop tracking of registers that are no longer used.
+    for (auto I : TrackedRegs)
+      if (!I.second)
+        dropRegDescribedVar(RegVars, I.first, Var);
+
+    // Drop all entries that have ended, and mark the new entry as live.
+    for (auto Index : IndicesToErase)
+      LiveEntries[Var].erase(Index);
+    LiveEntries[Var].insert(NewIndex);
+  }
+}
+
 // Terminate the location range for variables described by register at
 // @I by inserting @ClobberingInstr to their history.
 static void clobberRegisterUses(RegDescribedVarsMap &RegVars,
                                 RegDescribedVarsMap::iterator I,
                                 DbgValueHistoryMap &HistMap,
+                                DbgValueEntriesMap &LiveEntries,
                                 const MachineInstr &ClobberingInstr) {
   // Iterate over all variables described by this register and add this
   // instruction to their history, clobbering it.
   for (const auto &Var : I->second)
-    HistMap.endInstrRange(Var, ClobberingInstr);
+    clobberRegEntries(Var, I->first, ClobberingInstr, LiveEntries, HistMap);
   RegVars.erase(I);
 }
 
@@ -131,115 +220,25 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars,
 // @RegNo by inserting @ClobberingInstr to their history.
 static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo,
                                 DbgValueHistoryMap &HistMap,
+                                DbgValueEntriesMap &LiveEntries,
                                 const MachineInstr &ClobberingInstr) {
   const auto &I = RegVars.find(RegNo);
   if (I == RegVars.end())
     return;
-  clobberRegisterUses(RegVars, I, HistMap, ClobberingInstr);
-}
-
-// Returns the first instruction in @MBB which corresponds to
-// the function epilogue, or nullptr if @MBB doesn't contain an epilogue.
-static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) {
-  auto LastMI = MBB.getLastNonDebugInstr();
-  if (LastMI == MBB.end() || !LastMI->isReturn())
-    return nullptr;
-  // Assume that epilogue starts with instruction having the same debug location
-  // as the return instruction.
-  DebugLoc LastLoc = LastMI->getDebugLoc();
-  auto Res = LastMI;
-  for (MachineBasicBlock::const_reverse_iterator I = LastMI.getReverse(),
-                                                 E = MBB.rend();
-       I != E; ++I) {
-    if (I->getDebugLoc() != LastLoc)
-      return &*Res;
-    Res = &*I;
-  }
-  // If all instructions have the same debug location, assume whole MBB is
-  // an epilogue.
-  return &*MBB.begin();
-}
-
-// Collect registers that are modified in the function body (their
-// contents is changed outside of the prologue and epilogue).
-static void collectChangingRegs(const MachineFunction *MF,
-                                const TargetRegisterInfo *TRI,
-                                BitVector &Regs) {
-  for (const auto &MBB : *MF) {
-    auto FirstEpilogueInst = getFirstEpilogueInst(MBB);
-
-    for (const auto &MI : MBB) {
-      // Avoid looking at prologue or epilogue instructions.
-      if (&MI == FirstEpilogueInst)
-        break;
-      if (MI.getFlag(MachineInstr::FrameSetup))
-        continue;
-
-      // Look for register defs and register masks. Register masks are
-      // typically on calls and they clobber everything not in the mask.
-      for (const MachineOperand &MO : MI.operands()) {
-        // Skip virtual registers since they are handled by the parent.
-        if (MO.isReg() && MO.isDef() && MO.getReg() &&
-            !TRI->isVirtualRegister(MO.getReg())) {
-          for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
-               ++AI)
-            Regs.set(*AI);
-        } else if (MO.isRegMask()) {
-          Regs.setBitsNotInMask(MO.getRegMask());
-        }
-      }
-    }
-  }
+  clobberRegisterUses(RegVars, I, HistMap, LiveEntries, ClobberingInstr);
 }
 
 void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
                                      const TargetRegisterInfo *TRI,
                                      DbgValueHistoryMap &DbgValues,
                                      DbgLabelInstrMap &DbgLabels) {
-  BitVector ChangingRegs(TRI->getNumRegs());
-  collectChangingRegs(MF, TRI, ChangingRegs);
-
   const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
   unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+  unsigned FrameReg = TRI->getFrameRegister(*MF);
   RegDescribedVarsMap RegVars;
+  DbgValueEntriesMap LiveEntries;
   for (const auto &MBB : *MF) {
     for (const auto &MI : MBB) {
-      if (!MI.isDebugInstr()) {
-        // Not a DBG_VALUE instruction. It may clobber registers which describe
-        // some variables.
-        for (const MachineOperand &MO : MI.operands()) {
-          if (MO.isReg() && MO.isDef() && MO.getReg()) {
-            // Ignore call instructions that claim to clobber SP. The AArch64
-            // backend does this for aggregate function arguments.
-            if (MI.isCall() && MO.getReg() == SP)
-              continue;
-            // If this is a virtual register, only clobber it since it doesn't
-            // have aliases.
-            if (TRI->isVirtualRegister(MO.getReg()))
-              clobberRegisterUses(RegVars, MO.getReg(), DbgValues, MI);
-            // If this is a register def operand, it may end a debug value
-            // range.
-            else {
-              for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
-                   ++AI)
-                if (ChangingRegs.test(*AI))
-                  clobberRegisterUses(RegVars, *AI, DbgValues, MI);
-            }
-          } else if (MO.isRegMask()) {
-            // If this is a register mask operand, clobber all debug values in
-            // non-CSRs.
-            for (unsigned I : ChangingRegs.set_bits()) {
-              // Don't consider SP to be clobbered by register masks.
-              if (unsigned(I) != SP && TRI->isPhysicalRegister(I) &&
-                  MO.clobbersPhysReg(I)) {
-                clobberRegisterUses(RegVars, I, DbgValues, MI);
-              }
-            }
-          }
-        }
-        continue;
-      }
-
       if (MI.isDebugValue()) {
         assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!");
         // Use the base variable (without any DW_OP_piece expressions)
@@ -250,13 +249,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
                "Expected inlined-at fields to agree");
         InlinedEntity Var(RawVar, MI.getDebugLoc()->getInlinedAt());
 
-        if (unsigned PrevReg = DbgValues.getRegisterForVar(Var))
-          dropRegDescribedVar(RegVars, PrevReg, Var);
-
-        DbgValues.startInstrRange(Var, MI);
-
-        if (unsigned NewReg = isDescribedByReg(MI))
-          addRegDescribedVar(RegVars, NewReg, Var);
+        handleNewDebugValue(Var, MI, RegVars, LiveEntries, DbgValues);
       } else if (MI.isDebugLabel()) {
         assert(MI.getNumOperands() == 1 && "Invalid DBG_LABEL instruction!");
         const DILabel *RawLabel = MI.getDebugLabel();
@@ -268,18 +261,75 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
         InlinedEntity L(RawLabel, MI.getDebugLoc()->getInlinedAt());
         DbgLabels.addInstr(L, MI);
       }
-    }
 
-    // Make sure locations for register-described variables are valid only
-    // until the end of the basic block (unless it's the last basic block, in
-    // which case let their liveness run off to the end of the function).
+      if (MI.isDebugInstr())
+        continue;
+
+      // Not a DBG_VALUE instruction. It may clobber registers which describe
+      // some variables.
+      for (const MachineOperand &MO : MI.operands()) {
+        if (MO.isReg() && MO.isDef() && MO.getReg()) {
+          // Ignore call instructions that claim to clobber SP. The AArch64
+          // backend does this for aggregate function arguments.
+          if (MI.isCall() && MO.getReg() == SP)
+            continue;
+          // If this is a virtual register, only clobber it since it doesn't
+          // have aliases.
+          if (TRI->isVirtualRegister(MO.getReg()))
+            clobberRegisterUses(RegVars, MO.getReg(), DbgValues, LiveEntries,
+                                MI);
+          // If this is a register def operand, it may end a debug value
+          // range. Ignore frame-register defs in the epilogue and prologue,
+          // we expect debuggers to understand that stack-locations are
+          // invalid outside of the function body.
+          else if (MO.getReg() != FrameReg ||
+                   (!MI.getFlag(MachineInstr::FrameDestroy) &&
+                   !MI.getFlag(MachineInstr::FrameSetup))) {
+            for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
+                 ++AI)
+              clobberRegisterUses(RegVars, *AI, DbgValues, LiveEntries, MI);
+          }
+        } else if (MO.isRegMask()) {
+          // If this is a register mask operand, clobber all debug values in
+          // non-CSRs.
+          SmallVector<unsigned, 32> RegsToClobber;
+          // Don't consider SP to be clobbered by register masks.
+          for (auto It : RegVars) {
+            unsigned int Reg = It.first;
+            if (Reg != SP && TRI->isPhysicalRegister(Reg) &&
+                MO.clobbersPhysReg(Reg))
+              RegsToClobber.push_back(Reg);
+          }
+
+          for (unsigned Reg : RegsToClobber) {
+            clobberRegisterUses(RegVars, Reg, DbgValues, LiveEntries, MI);
+          }
+        }
+      } // End MO loop.
+    }   // End instr loop.
+
+    // Make sure locations for all variables are valid only until the end of
+    // the basic block (unless it's the last basic block, in which case let
+    // their liveness run off to the end of the function).
     if (!MBB.empty() && &MBB != &MF->back()) {
-      for (auto I = RegVars.begin(), E = RegVars.end(); I != E;) {
-        auto CurElem = I++; // CurElem can be erased below.
-        if (TRI->isVirtualRegister(CurElem->first) ||
-            ChangingRegs.test(CurElem->first))
-          clobberRegisterUses(RegVars, CurElem, DbgValues, MBB.back());
+      // Iterate over all variables that have open debug values.
+      for (auto &Pair : LiveEntries) {
+        if (Pair.second.empty())
+          continue;
+
+        // Create a clobbering entry.
+        EntryIndex ClobIdx = DbgValues.startClobber(Pair.first, MBB.back());
+
+        // End all entries.
+        for (EntryIndex Idx : Pair.second) {
+          DbgValueHistoryMap::Entry &Ent = DbgValues.getEntry(Pair.first, Idx);
+          assert(Ent.isDbgValue() && !Ent.isClosed());
+          Ent.endEntry(ClobIdx);
+        }
       }
+
+      LiveEntries.clear();
+      RegVars.clear();
     }
   }
 }
@@ -289,7 +339,7 @@ LLVM_DUMP_METHOD void DbgValueHistoryMap::dump() const {
   dbgs() << "DbgValueHistoryMap:\n";
   for (const auto &VarRangePair : *this) {
     const InlinedEntity &Var = VarRangePair.first;
-    const InstrRanges &Ranges = VarRangePair.second;
+    const Entries &Entries = VarRangePair.second;
 
     const DILocalVariable *LocalVar = cast<DILocalVariable>(Var.first);
     const DILocation *Location = Var.second;
@@ -304,10 +354,20 @@ LLVM_DUMP_METHOD void DbgValueHistoryMap::dump() const {
 
     dbgs() << " --\n";
 
-    for (const InstrRange &Range : Ranges) {
-      dbgs() << "   Begin: " << *Range.first;
-      if (Range.second)
-        dbgs() << "   End  : " << *Range.second;
+    for (const auto &E : enumerate(Entries)) {
+      const auto &Entry = E.value();
+      dbgs() << "  Entry[" << E.index() << "]: ";
+      if (Entry.isDbgValue())
+        dbgs() << "Debug value\n";
+      else
+        dbgs() << "Clobber\n";
+      dbgs() << "   Instr: " << *Entry.getInstr();
+      if (Entry.isDbgValue()) {
+        if (Entry.getEndIndex() == NoEntry)
+          dbgs() << "   - Valid until end of function\n";
+        else
+          dbgs() << "   - Closed by Entry[" << Entry.getEndIndex() << "]\n";
+      }
       dbgs() << "\n";
     }
   }
diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 551cd36d1984..22f458e4b03e 100644
--- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp -------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -141,10 +140,9 @@ DebugHandlerBase::getFunctionLocalOffsetAfterInsn(const MachineInstr *MI) {
 }
 
 /// If this type is derived from a base type then return base type size.
-uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {
-  DIType *Ty = TyRef.resolve();
+uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) {
   assert(Ty);
-  DIDerivedType *DDTy = dyn_cast<DIDerivedType>(Ty);
+  const DIDerivedType *DDTy = dyn_cast<DIDerivedType>(Ty);
   if (!DDTy)
     return Ty->getSizeInBits();
 
@@ -155,7 +153,7 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {
       Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_atomic_type)
     return DDTy->getSizeInBits();
 
-  DIType *BaseType = DDTy->getBaseType().resolve();
+  DIType *BaseType = DDTy->getBaseType();
 
   if (!BaseType)
     return 0;
@@ -212,36 +210,58 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
 
   // Request labels for the full history.
   for (const auto &I : DbgValues) {
-    const auto &Ranges = I.second;
-    if (Ranges.empty())
+    const auto &Entries = I.second;
+    if (Entries.empty())
       continue;
 
-    // The first mention of a function argument gets the CurrentFnBegin
-    // label, so arguments are visible when breaking at function entry.
-    const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable();
+    auto IsDescribedByReg = [](const MachineInstr *MI) {
+      return MI->getOperand(0).isReg() && MI->getOperand(0).getReg();
+    };
+
+    // The first mention of a function argument gets the CurrentFnBegin label,
+    // so arguments are visible when breaking at function entry.
+    //
+    // We do not change the label for values that are described by registers,
+    // as that could place them above their defining instructions. We should
+    // ideally not change the labels for constant debug values either, since
+    // doing that violates the ranges that are calculated in the history map.
+    // However, we currently do not emit debug values for constant arguments
+    // directly at the start of the function, so this code is still useful.
+    const DILocalVariable *DIVar =
+        Entries.front().getInstr()->getDebugVariable();
     if (DIVar->isParameter() &&
         getDISubprogram(DIVar->getScope())->describes(&MF->getFunction())) {
-      LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin();
-      if (Ranges.front().first->getDebugExpression()->isFragment()) {
+      if (!IsDescribedByReg(Entries.front().getInstr()))
+        LabelsBeforeInsn[Entries.front().getInstr()] = Asm->getFunctionBegin();
+      if (Entries.front().getInstr()->getDebugExpression()->isFragment()) {
         // Mark all non-overlapping initial fragments.
-        for (auto I = Ranges.begin(); I != Ranges.end(); ++I) {
-          const DIExpression *Fragment = I->first->getDebugExpression();
-          if (std::all_of(Ranges.begin(), I,
-                          [&](DbgValueHistoryMap::InstrRange Pred) {
-                            return !Fragment->fragmentsOverlap(
-                                Pred.first->getDebugExpression());
+        for (auto I = Entries.begin(); I != Entries.end(); ++I) {
+          if (!I->isDbgValue())
+            continue;
+          const DIExpression *Fragment = I->getInstr()->getDebugExpression();
+          if (std::any_of(Entries.begin(), I,
+                          [&](DbgValueHistoryMap::Entry Pred) {
+                            return Pred.isDbgValue() &&
+                                   Fragment->fragmentsOverlap(
+                                       Pred.getInstr()->getDebugExpression());
                           }))
-            LabelsBeforeInsn[I->first] = Asm->getFunctionBegin();
-          else
             break;
+          // The code that generates location lists for DWARF assumes that the
+          // entries' start labels are monotonically increasing, and since we
+          // don't change the label for fragments that are described by
+          // registers, we must bail out when encountering such a fragment.
+          if (IsDescribedByReg(I->getInstr()))
+            break;
+          LabelsBeforeInsn[I->getInstr()] = Asm->getFunctionBegin();
         }
       }
     }
 
-    for (const auto &Range : Ranges) {
-      requestLabelBeforeInsn(Range.first);
-      if (Range.second)
-        requestLabelAfterInsn(Range.second);
+    for (const auto &Entry : Entries) {
+      if (Entry.isDbgValue())
+        requestLabelBeforeInsn(Entry.getInstr());
+      else
+        requestLabelAfterInsn(Entry.getInstr());
     }
   }
 
diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index befa4b941c8d..17e39b3d3268 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/DebugLocEntry.h - Entry in debug_loc list -*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -21,6 +20,73 @@
 namespace llvm {
 class AsmPrinter;
 
+/// A single location or constant.
+class DbgValueLoc {
+  /// Any complex address location expression for this DbgValueLoc.
+  const DIExpression *Expression;
+
+  /// Type of entry that this represents.
+  enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt };
+  enum EntryType EntryKind;
+
+  /// Either a constant,
+  union {
+    int64_t Int;
+    const ConstantFP *CFP;
+    const ConstantInt *CIP;
+  } Constant;
+
+  /// Or a location in the machine frame.
+  MachineLocation Loc;
+
+public:
+  DbgValueLoc(const DIExpression *Expr, int64_t i)
+      : Expression(Expr), EntryKind(E_Integer) {
+    Constant.Int = i;
+  }
+  DbgValueLoc(const DIExpression *Expr, const ConstantFP *CFP)
+      : Expression(Expr), EntryKind(E_ConstantFP) {
+    Constant.CFP = CFP;
+  }
+  DbgValueLoc(const DIExpression *Expr, const ConstantInt *CIP)
+      : Expression(Expr), EntryKind(E_ConstantInt) {
+    Constant.CIP = CIP;
+  }
+  DbgValueLoc(const DIExpression *Expr, MachineLocation Loc)
+      : Expression(Expr), EntryKind(E_Location), Loc(Loc) {
+    assert(cast<DIExpression>(Expr)->isValid());
+  }
+
+  bool isLocation() const { return EntryKind == E_Location; }
+  bool isInt() const { return EntryKind == E_Integer; }
+  bool isConstantFP() const { return EntryKind == E_ConstantFP; }
+  bool isConstantInt() const { return EntryKind == E_ConstantInt; }
+  int64_t getInt() const { return Constant.Int; }
+  const ConstantFP *getConstantFP() const { return Constant.CFP; }
+  const ConstantInt *getConstantInt() const { return Constant.CIP; }
+  MachineLocation getLoc() const { return Loc; }
+  bool isFragment() const { return getExpression()->isFragment(); }
+  bool isEntryVal() const { return getExpression()->isEntryValue(); }
+  const DIExpression *getExpression() const { return Expression; }
+  friend bool operator==(const DbgValueLoc &, const DbgValueLoc &);
+  friend bool operator<(const DbgValueLoc &, const DbgValueLoc &);
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  LLVM_DUMP_METHOD void dump() const {
+    if (isLocation()) {
+      llvm::dbgs() << "Loc = { reg=" << Loc.getReg() << " ";
+      if (Loc.isIndirect())
+        llvm::dbgs() << "+0";
+      llvm::dbgs() << "} ";
+    } else if (isConstantInt())
+      Constant.CIP->dump();
+    else if (isConstantFP())
+      Constant.CFP->dump();
+    if (Expression)
+      Expression->dump();
+  }
+#endif
+};
+
 /// This struct describes location entries emitted in the .debug_loc
 /// section.
 class DebugLocEntry {
@@ -28,90 +94,20 @@ class DebugLocEntry {
   const MCSymbol *Begin;
   const MCSymbol *End;
 
-public:
-  /// A single location or constant.
-  struct Value {
-    Value(const DIExpression *Expr, int64_t i)
-        : Expression(Expr), EntryKind(E_Integer) {
-      Constant.Int = i;
-    }
-    Value(const DIExpression *Expr, const ConstantFP *CFP)
-        : Expression(Expr), EntryKind(E_ConstantFP) {
-      Constant.CFP = CFP;
-    }
-    Value(const DIExpression *Expr, const ConstantInt *CIP)
-        : Expression(Expr), EntryKind(E_ConstantInt) {
-      Constant.CIP = CIP;
-    }
-    Value(const DIExpression *Expr, MachineLocation Loc)
-        : Expression(Expr), EntryKind(E_Location), Loc(Loc) {
-      assert(cast<DIExpression>(Expr)->isValid());
-    }
-
-    /// Any complex address location expression for this Value.
-    const DIExpression *Expression;
-
-    /// Type of entry that this represents.
-    enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt };
-    enum EntryType EntryKind;
-
-    /// Either a constant,
-    union {
-      int64_t Int;
-      const ConstantFP *CFP;
-      const ConstantInt *CIP;
-    } Constant;
-
-    // Or a location in the machine frame.
-    MachineLocation Loc;
-
-    bool isLocation() const { return EntryKind == E_Location; }
-    bool isInt() const { return EntryKind == E_Integer; }
-    bool isConstantFP() const { return EntryKind == E_ConstantFP; }
-    bool isConstantInt() const { return EntryKind == E_ConstantInt; }
-    int64_t getInt() const { return Constant.Int; }
-    const ConstantFP *getConstantFP() const { return Constant.CFP; }
-    const ConstantInt *getConstantInt() const { return Constant.CIP; }
-    MachineLocation getLoc() const { return Loc; }
-    bool isFragment() const { return getExpression()->isFragment(); }
-    const DIExpression *getExpression() const { return Expression; }
-    friend bool operator==(const Value &, const Value &);
-    friend bool operator<(const Value &, const Value &);
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-    LLVM_DUMP_METHOD void dump() const {
-      if (isLocation()) {
-        llvm::dbgs() << "Loc = { reg=" << Loc.getReg() << " ";
-        if (Loc.isIndirect())
-          llvm::dbgs() << "+0";
-        llvm::dbgs() << "} ";
-      }
-      else if (isConstantInt())
-        Constant.CIP->dump();
-      else if (isConstantFP())
-        Constant.CFP->dump();
-      if (Expression)
-        Expression->dump();
-    }
-#endif
-  };
-
-private:
   /// A nonempty list of locations/constants belonging to this entry,
   /// sorted by offset.
-  SmallVector<Value, 1> Values;
+  SmallVector<DbgValueLoc, 1> Values;
 
 public:
-  DebugLocEntry(const MCSymbol *B, const MCSymbol *E, Value Val)
-      : Begin(B), End(E) {
-    Values.push_back(std::move(Val));
+  /// Create a location list entry for the range [\p Begin, \p End).
+  ///
+  /// \param Vals One or more values describing (parts of) the variable.
+  DebugLocEntry(const MCSymbol *Begin, const MCSymbol *End,
+                ArrayRef<DbgValueLoc> Vals)
+      : Begin(Begin), End(End) {
+    addValues(Vals);
   }
 
-  /// If this and Next are describing different pieces of the same
-  /// variable, merge them by appending Next's values to the current
-  /// list of values.
-  /// Return true if the merge was successful.
-  bool MergeValues(const DebugLocEntry &Next);
-
   /// Attempt to merge this DebugLocEntry with Next and return
   /// true if the merge was successful. Entries can be merged if they
   /// share the same Loc/Constant and if Next immediately follows this
@@ -127,35 +123,36 @@ public:
 
   const MCSymbol *getBeginSym() const { return Begin; }
   const MCSymbol *getEndSym() const { return End; }
-  ArrayRef<Value> getValues() const { return Values; }
-  void addValues(ArrayRef<DebugLocEntry::Value> Vals) {
+  ArrayRef<DbgValueLoc> getValues() const { return Values; }
+  void addValues(ArrayRef<DbgValueLoc> Vals) {
     Values.append(Vals.begin(), Vals.end());
     sortUniqueValues();
-    assert(all_of(Values, [](DebugLocEntry::Value V) {
-          return V.isFragment();
-        }) && "value must be a piece");
+    assert((Values.size() == 1 || all_of(Values, [](DbgValueLoc V) {
+              return V.isFragment();
+            })) && "must either have a single value or multiple pieces");
   }
 
   // Sort the pieces by offset.
   // Remove any duplicate entries by dropping all but the first.
   void sortUniqueValues() {
     llvm::sort(Values);
-    Values.erase(
-        std::unique(
-            Values.begin(), Values.end(), [](const Value &A, const Value &B) {
-              return A.getExpression() == B.getExpression();
-            }),
-        Values.end());
+    Values.erase(std::unique(Values.begin(), Values.end(),
+                             [](const DbgValueLoc &A, const DbgValueLoc &B) {
+                               return A.getExpression() == B.getExpression();
+                             }),
+                 Values.end());
   }
 
   /// Lower this entry into a DWARF expression.
-  void finalize(const AsmPrinter &AP, DebugLocStream::ListBuilder &List,
-                const DIBasicType *BT);
+  void finalize(const AsmPrinter &AP,
+                DebugLocStream::ListBuilder &List,
+                const DIBasicType *BT,
+                DwarfCompileUnit &TheCU);
 };
 
-/// Compare two Values for equality.
-inline bool operator==(const DebugLocEntry::Value &A,
-                       const DebugLocEntry::Value &B) {
+/// Compare two DbgValueLocs for equality.
+inline bool operator==(const DbgValueLoc &A,
+                       const DbgValueLoc &B) {
   if (A.EntryKind != B.EntryKind)
     return false;
 
@@ -163,21 +160,21 @@ inline bool operator==(const DebugLocEntry::Value &A,
     return false;
 
   switch (A.EntryKind) {
-  case DebugLocEntry::Value::E_Location:
+  case DbgValueLoc::E_Location:
     return A.Loc == B.Loc;
-  case DebugLocEntry::Value::E_Integer:
+  case DbgValueLoc::E_Integer:
     return A.Constant.Int == B.Constant.Int;
-  case DebugLocEntry::Value::E_ConstantFP:
+  case DbgValueLoc::E_ConstantFP:
     return A.Constant.CFP == B.Constant.CFP;
-  case DebugLocEntry::Value::E_ConstantInt:
+  case DbgValueLoc::E_ConstantInt:
     return A.Constant.CIP == B.Constant.CIP;
   }
   llvm_unreachable("unhandled EntryKind");
 }
 
 /// Compare two fragments based on their offset.
-inline bool operator<(const DebugLocEntry::Value &A,
-                      const DebugLocEntry::Value &B) {
+inline bool operator<(const DbgValueLoc &A,
+                      const DbgValueLoc &B) {
   return A.getExpression()->getFragmentInfo()->OffsetInBits <
          B.getExpression()->getFragmentInfo()->OffsetInBits;
 }
diff --git a/lib/CodeGen/AsmPrinter/DebugLocStream.cpp b/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
index 7e8ed7104af3..f483d532ff07 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
+++ b/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
@@ -1,9 +1,8 @@
 //===- DebugLocStream.cpp - DWARF debug_loc stream --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/AsmPrinter/DebugLocStream.h b/lib/CodeGen/AsmPrinter/DebugLocStream.h
index 8dcf5cbc1889..789291771b5a 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocStream.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -1,9 +1,8 @@
 //===--- lib/CodeGen/DebugLocStream.h - DWARF debug_loc stream --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 1990456cc555..207a7284dafa 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -1,9 +1,8 @@
 //===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 1dca3f0fce5b..9548ad9918c1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Units ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,6 +17,7 @@
 #include "DwarfUnit.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/Dwarf.h"
@@ -104,7 +104,7 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) {
   // extend .file to support this.
   unsigned CUID = Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID();
   if (!File)
-    return Asm->OutStreamer->EmitDwarfFileDirective(0, "", "", nullptr, None, CUID);
+    return Asm->OutStreamer->EmitDwarfFileDirective(0, "", "", None, None, CUID);
   return Asm->OutStreamer->EmitDwarfFileDirective(
       0, File->getDirectory(), File->getFilename(), getMD5AsBytes(File),
       File->getSource(), CUID);
@@ -119,17 +119,19 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
   assert(GV);
 
   auto *GVContext = GV->getScope();
-  auto *GTy = DD->resolve(GV->getType());
+  const DIType *GTy = GV->getType();
 
   // Construct the context before querying for the existence of the DIE in
   // case such construction creates the DIE.
-  DIE *ContextDIE = getOrCreateContextDIE(GVContext);
+  auto *CB = GVContext ? dyn_cast<DICommonBlock>(GVContext) : nullptr;
+  DIE *ContextDIE = CB ? getOrCreateCommonBlock(CB, GlobalExprs)
+    : getOrCreateContextDIE(GVContext);
 
   // Add to map.
   DIE *VariableDIE = &createAndAddDIE(GV->getTag(), *ContextDIE, GV);
   DIScope *DeclContext;
   if (auto *SDMDecl = GV->getStaticDataMemberDeclaration()) {
-    DeclContext = resolve(SDMDecl->getScope());
+    DeclContext = SDMDecl->getScope();
     assert(SDMDecl->isStaticMember() && "Expected static member decl");
     assert(GV->isDefinition());
     // We need the declaration DIE that is in the static member's class.
@@ -137,7 +139,7 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
     addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE);
     // If the global variable's type is different from the one in the class
     // member type, assume that it's more specific and also emit it.
-    if (GTy != DD->resolve(SDMDecl->getBaseType()))
+    if (GTy != SDMDecl->getBaseType())
       addType(*VariableDIE, GTy);
   } else {
     DeclContext = GV->getScope();
@@ -166,8 +168,16 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
     addTemplateParams(*VariableDIE, DINodeArray(TP));
 
   // Add location.
+  addLocationAttribute(VariableDIE, GV, GlobalExprs);
+
+  return VariableDIE;
+}
+
+void DwarfCompileUnit::addLocationAttribute(
+    DIE *VariableDIE, const DIGlobalVariable *GV, ArrayRef<GlobalExpr> GlobalExprs) {
   bool addToAccelTable = false;
   DIELoc *Loc = nullptr;
+  Optional<unsigned> NVPTXAddressSpace;
   std::unique_ptr<DIEDwarfExpression> DwarfExpr;
   for (const auto &GE : GlobalExprs) {
     const GlobalVariable *Global = GE.Var;
@@ -201,8 +211,24 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
       DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
     }
 
-    if (Expr)
+    if (Expr) {
+      // According to
+      // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+      // cuda-gdb requires DW_AT_address_class for all variables to be able to
+      // correctly interpret address space of the variable address.
+      // Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef
+      // sequence for the NVPTX + gdb target.
+      unsigned LocalNVPTXAddressSpace;
+      if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+        const DIExpression *NewExpr =
+            DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace);
+        if (NewExpr != Expr) {
+          Expr = NewExpr;
+          NVPTXAddressSpace = LocalNVPTXAddressSpace;
+        }
+      }
       DwarfExpr->addFragmentOffset(Expr);
+    }
 
     if (Global) {
       const MCSymbol *Sym = Asm->getSymbol(Global);
@@ -247,6 +273,15 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
       DwarfExpr->setMemoryLocationKind();
     DwarfExpr->addExpression(Expr);
   }
+  if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+    // According to
+    // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+    // cuda-gdb requires DW_AT_address_class for all variables to be able to
+    // correctly interpret address space of the variable address.
+    const unsigned NVPTX_ADDR_global_space = 5;
+    addUInt(*VariableDIE, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
+            NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_global_space);
+  }
   if (Loc)
     addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize());
 
@@ -262,8 +297,25 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
         DD->useAllLinkageNames())
       DD->addAccelName(*CUNode, GV->getLinkageName(), *VariableDIE);
   }
+}
 
-  return VariableDIE;
+DIE *DwarfCompileUnit::getOrCreateCommonBlock(
+    const DICommonBlock *CB, ArrayRef<GlobalExpr> GlobalExprs) {
+  // Construct the context before querying for the existence of the DIE in case
+  // such construction creates the DIE.
+  DIE *ContextDIE = getOrCreateContextDIE(CB->getScope());
+
+  if (DIE *NDie = getDIE(CB))
+    return NDie;
+  DIE &NDie = createAndAddDIE(dwarf::DW_TAG_common_block, *ContextDIE, CB);
+  StringRef Name = CB->getName().empty() ? "_BLNK_" : CB->getName();
+  addString(NDie, dwarf::DW_AT_name, Name);
+  addGlobalName(Name, NDie, CB->getScope());
+  if (CB->getFile())
+    addSourceLine(NDie, CB->getLineNo(), CB->getFile());
+  if (DIGlobalVariable *V = CB->getDecl())
+    getCU().addLocationAttribute(&NDie, V, GlobalExprs);
+  return &NDie;
 }
 
 void DwarfCompileUnit::addRange(RangeSpan Range) {
@@ -491,6 +543,8 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
   addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
           getOrCreateSourceID(IA->getFile()));
   addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());
+  if (IA->getColumn())
+    addUInt(*ScopeDIE, dwarf::DW_AT_call_column, None, IA->getColumn());
   if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4)
     addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None,
             IA->getDiscriminator());
@@ -555,36 +609,27 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
     return VariableDie;
   }
 
-  // Check if variable is described by a DBG_VALUE instruction.
-  if (const MachineInstr *DVInsn = DV.getMInsn()) {
-    assert(DVInsn->getNumOperands() == 4);
-    if (DVInsn->getOperand(0).isReg()) {
-      auto RegOp = DVInsn->getOperand(0);
-      auto Op1 = DVInsn->getOperand(1);
-      // If the second operand is an immediate, this is an indirect value.
-      assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset");
-      MachineLocation Location(RegOp.getReg(), Op1.isImm());
-      addVariableAddress(DV, *VariableDie, Location);
-    } else if (DVInsn->getOperand(0).isImm()) {
-      // This variable is described by a single constant.
-      // Check whether it has a DIExpression.
+  // Check if variable has a single location description.
+  if (auto *DVal = DV.getValueLoc()) {
+    if (DVal->isLocation())
+      addVariableAddress(DV, *VariableDie, DVal->getLoc());
+    else if (DVal->isInt()) {
       auto *Expr = DV.getSingleExpression();
       if (Expr && Expr->getNumElements()) {
         DIELoc *Loc = new (DIEValueAllocator) DIELoc;
         DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
         // If there is an expression, emit raw unsigned bytes.
         DwarfExpr.addFragmentOffset(Expr);
-        DwarfExpr.addUnsignedConstant(DVInsn->getOperand(0).getImm());
+        DwarfExpr.addUnsignedConstant(DVal->getInt());
         DwarfExpr.addExpression(Expr);
         addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
       } else
-        addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType());
-    } else if (DVInsn->getOperand(0).isFPImm())
-      addConstantFPValue(*VariableDie, DVInsn->getOperand(0));
-    else if (DVInsn->getOperand(0).isCImm())
-      addConstantValue(*VariableDie, DVInsn->getOperand(0).getCImm(),
-                       DV.getType());
-
+        addConstantValue(*VariableDie, DVal->getInt(), DV.getType());
+    } else if (DVal->isConstantFP()) {
+      addConstantFPValue(*VariableDie, DVal->getConstantFP());
+    } else if (DVal->isConstantInt()) {
+      addConstantValue(*VariableDie, DVal->getConstantInt(), DV.getType());
+    }
     return VariableDie;
   }
 
@@ -592,6 +637,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
   if (!DV.hasFrameIndexExprs())
     return VariableDie;
 
+  Optional<unsigned> NVPTXAddressSpace;
   DIELoc *Loc = new (DIEValueAllocator) DIELoc;
   DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
   for (auto &Fragment : DV.getFrameIndexExprs()) {
@@ -603,7 +649,23 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
     SmallVector<uint64_t, 8> Ops;
     Ops.push_back(dwarf::DW_OP_plus_uconst);
     Ops.push_back(Offset);
-    Ops.append(Expr->elements_begin(), Expr->elements_end());
+    // According to
+    // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+    // cuda-gdb requires DW_AT_address_class for all variables to be able to
+    // correctly interpret address space of the variable address.
+    // Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef
+    // sequence for the NVPTX + gdb target.
+    unsigned LocalNVPTXAddressSpace;
+    if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+      const DIExpression *NewExpr =
+          DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace);
+      if (NewExpr != Expr) {
+        Expr = NewExpr;
+        NVPTXAddressSpace = LocalNVPTXAddressSpace;
+      }
+    }
+    if (Expr)
+      Ops.append(Expr->elements_begin(), Expr->elements_end());
     DIExpressionCursor Cursor(Ops);
     DwarfExpr.setMemoryLocationKind();
     if (const MCSymbol *FrameSymbol = Asm->getFunctionFrameSymbol())
@@ -613,7 +675,19 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
           *Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg);
     DwarfExpr.addExpression(std::move(Cursor));
   }
+  if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+    // According to
+    // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+    // cuda-gdb requires DW_AT_address_class for all variables to be able to
+    // correctly interpret address space of the variable address.
+    const unsigned NVPTX_ADDR_local_space = 6;
+    addUInt(*VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
+            NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_local_space);
+  }
   addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
+  if (DwarfExpr.TagOffset)
+    addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+            *DwarfExpr.TagOffset);
 
   return VariableDie;
 }
@@ -800,7 +874,7 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
     ContextDIE = &getUnitDie();
     getOrCreateSubprogramDIE(SPDecl);
   } else {
-    ContextDIE = getOrCreateContextDIE(resolve(SP->getScope()));
+    ContextDIE = getOrCreateContextDIE(SP->getScope());
     // The scope may be shared with a subprogram that has already been
     // constructed in another CU, in which case we need to construct this
     // subprogram in the same CU.
@@ -849,7 +923,7 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(
   DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag());
   insertDIE(Module, IMDie);
   DIE *EntityDie;
-  auto *Entity = resolve(Module->getEntity());
+  auto *Entity = Module->getEntity();
   if (auto *NS = dyn_cast<DINamespace>(Entity))
     EntityDie = getOrCreateNameSpace(NS);
   else if (auto *M = dyn_cast<DIModule>(Entity))
@@ -958,7 +1032,9 @@ bool DwarfCompileUnit::hasDwarfPubSections() const {
     return true;
   case DICompileUnit::DebugNameTableKind::Default:
     return DD->tuneForGDB() && !includeMinimalInlineScopes() &&
-           !CUNode->isDebugDirectivesOnly();
+           !CUNode->isDebugDirectivesOnly() &&
+           DD->getAccelTableKind() != AccelTableKind::Apple &&
+           DD->getDwarfVersion() < 5;
   }
   llvm_unreachable("Unhandled DICompileUnit::DebugNameTableKind enum");
 }
@@ -1054,6 +1130,12 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
     DwarfExpr.setMemoryLocationKind();
 
   DIExpressionCursor Cursor(DIExpr);
+
+  if (DIExpr->isEntryValue()) {
+    DwarfExpr.setEntryValueFlag();
+    DwarfExpr.addEntryValueExpression(Cursor);
+  }
+
   const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
   if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
     return;
@@ -1112,7 +1194,7 @@ void DwarfCompileUnit::addAddressExpr(DIE &Die, dwarf::Attribute Attribute,
 void DwarfCompileUnit::applySubprogramAttributesToDefinition(
     const DISubprogram *SP, DIE &SPDie) {
   auto *SPDecl = SP->getDeclaration();
-  auto *Context = resolve(SPDecl ? SPDecl->getScope() : SP->getScope());
+  auto *Context = SPDecl ? SPDecl->getScope() : SP->getScope();
   applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes());
   addGlobalName(SP->getName(), SPDie, Context);
 }
@@ -1121,6 +1203,10 @@ bool DwarfCompileUnit::isDwoUnit() const {
   return DD->useSplitDwarf() && Skeleton;
 }
 
+void DwarfCompileUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
+  constructTypeDIE(D, CTy);
+}
+
 bool DwarfCompileUnit::includeMinimalInlineScopes() const {
   return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly ||
          (DD->useSplitDwarf() && !Skeleton);
@@ -1134,3 +1220,27 @@ void DwarfCompileUnit::addAddrTableBase() {
                                          : dwarf::DW_AT_GNU_addr_base,
                   Label, TLOF.getDwarfAddrSection()->getBeginSymbol());
 }
+
+void DwarfCompileUnit::addBaseTypeRef(DIEValueList &Die, int64_t Idx) {
+  Die.addValue(DIEValueAllocator, (dwarf::Attribute)0, dwarf::DW_FORM_udata,
+               new (DIEValueAllocator) DIEBaseTypeRef(this, Idx));
+}
+
+void DwarfCompileUnit::createBaseTypeDIEs() {
+  // Insert the base_type DIEs directly after the CU so that their offsets will
+  // fit in the fixed size ULEB128 used inside the location expressions.
+  // Maintain order by iterating backwards and inserting to the front of CU
+  // child list.
+  for (auto &Btr : reverse(ExprRefedBaseTypes)) {
+    DIE &Die = getUnitDie().addChildFront(
+      DIE::get(DIEValueAllocator, dwarf::DW_TAG_base_type));
+    SmallString<32> Str;
+    addString(Die, dwarf::DW_AT_name,
+              Twine(dwarf::AttributeEncodingString(Btr.Encoding) +
+                    "_" + Twine(Btr.BitSize)).toStringRef(Str));
+    addUInt(Die, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Btr.Encoding);
+    addUInt(Die, dwarf::DW_AT_byte_size, None, Btr.BitSize / 8);
+
+    Btr.Die = &Die;
+  }
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 9ec22f68c12f..ea980dfda17e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -101,6 +100,8 @@ class DwarfCompileUnit final : public DwarfUnit {
     return DU->getAbstractEntities();
   }
 
+  void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) override;
+
 public:
   DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A,
                    DwarfDebug *DW, DwarfFile *DWU);
@@ -125,11 +126,27 @@ public:
     const DIExpression *Expr;
   };
 
+  struct BaseTypeRef {
+    BaseTypeRef(unsigned BitSize, dwarf::TypeKind Encoding) :
+      BitSize(BitSize), Encoding(Encoding) {}
+    unsigned BitSize;
+    dwarf::TypeKind Encoding;
+    DIE *Die = nullptr;
+  };
+
+  std::vector<BaseTypeRef> ExprRefedBaseTypes;
+
   /// Get or create global variable DIE.
   DIE *
   getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV,
                                ArrayRef<GlobalExpr> GlobalExprs);
 
+  DIE *getOrCreateCommonBlock(const DICommonBlock *CB,
+                              ArrayRef<GlobalExpr> GlobalExprs);
+
+  void addLocationAttribute(DIE *ToDIE, const DIGlobalVariable *GV,
+                            ArrayRef<GlobalExpr> GlobalExprs);
+
   /// addLabelAddress - Add a dwarf label attribute data and value using
   /// either DW_FORM_addr or DW_FORM_GNU_addr_index.
   void addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
@@ -200,6 +217,8 @@ public:
                               SmallVectorImpl<DIE *> &Children,
                               bool *HasNonScopeChildren = nullptr);
 
+  void createBaseTypeDIEs();
+
   /// Construct a DIE for this subprogram scope.
   DIE &constructSubprogramScopeDIE(const DISubprogram *Sub,
                                    LexicalScope *Scope);
@@ -314,6 +333,8 @@ public:
   void setDWOId(uint64_t DwoId) { DWOId = DwoId; }
 
   bool hasDwarfPubSections() const;
+
+  void addBaseTypeRef(DIEValueList &Die, int64_t Idx);
 };
 
 } // end namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 1de2ffb6cfa1..71bb2b0858cc 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -42,6 +41,8 @@
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
@@ -162,6 +163,7 @@ static const char *const DWARFGroupName = "dwarf";
 static const char *const DWARFGroupDescription = "DWARF Emission";
 static const char *const DbgTimerName = "writer";
 static const char *const DbgTimerDescription = "DWARF Debug Writer";
+static constexpr unsigned ULEB128PadSize = 4;
 
 void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) {
   BS.EmitInt8(
@@ -177,6 +179,15 @@ void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) {
   BS.EmitULEB128(Value, Twine(Value));
 }
 
+void DebugLocDwarfExpression::emitData1(uint8_t Value) {
+  BS.EmitInt8(Value, Twine(Value));
+}
+
+void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
+  assert(Idx < (1ULL << (ULEB128PadSize * 7)) && "Idx wont fit");
+  BS.EmitULEB128(Idx, Twine(Idx), ULEB128PadSize);
+}
+
 bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
                                               unsigned MachineReg) {
   // This information is not available while emitting .debug_loc entries.
@@ -185,11 +196,11 @@ bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
 
 bool DbgVariable::isBlockByrefVariable() const {
   assert(getVariable() && "Invalid complex DbgVariable!");
-  return getVariable()->getType().resolve()->isBlockByrefStruct();
+  return getVariable()->getType()->isBlockByrefStruct();
 }
 
 const DIType *DbgVariable::getType() const {
-  DIType *Ty = getVariable()->getType().resolve();
+  DIType *Ty = getVariable()->getType();
   // FIXME: isBlockByrefVariable should be reformulated in terms of complex
   // addresses instead.
   if (Ty->isBlockByrefStruct()) {
@@ -221,18 +232,55 @@ const DIType *DbgVariable::getType() const {
     uint16_t tag = Ty->getTag();
 
     if (tag == dwarf::DW_TAG_pointer_type)
-      subType = resolve(cast<DIDerivedType>(Ty)->getBaseType());
+      subType = cast<DIDerivedType>(Ty)->getBaseType();
 
     auto Elements = cast<DICompositeType>(subType)->getElements();
     for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
       auto *DT = cast<DIDerivedType>(Elements[i]);
       if (getName() == DT->getName())
-        return resolve(DT->getBaseType());
+        return DT->getBaseType();
     }
   }
   return Ty;
 }
 
+/// Get .debug_loc entry for the instruction range starting at MI.
+static DbgValueLoc getDebugLocValue(const MachineInstr *MI) {
+  const DIExpression *Expr = MI->getDebugExpression();
+  assert(MI->getNumOperands() == 4);
+  if (MI->getOperand(0).isReg()) {
+    auto RegOp = MI->getOperand(0);
+    auto Op1 = MI->getOperand(1);
+    // If the second operand is an immediate, this is a
+    // register-indirect address.
+    assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset");
+    MachineLocation MLoc(RegOp.getReg(), Op1.isImm());
+    return DbgValueLoc(Expr, MLoc);
+  }
+  if (MI->getOperand(0).isImm())
+    return DbgValueLoc(Expr, MI->getOperand(0).getImm());
+  if (MI->getOperand(0).isFPImm())
+    return DbgValueLoc(Expr, MI->getOperand(0).getFPImm());
+  if (MI->getOperand(0).isCImm())
+    return DbgValueLoc(Expr, MI->getOperand(0).getCImm());
+
+  llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");
+}
+
+void DbgVariable::initializeDbgValue(const MachineInstr *DbgValue) {
+  assert(FrameIndexExprs.empty() && "Already initialized?");
+  assert(!ValueLoc.get() && "Already initialized?");
+
+  assert(getVariable() == DbgValue->getDebugVariable() && "Wrong variable");
+  assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() &&
+         "Wrong inlined-at");
+
+  ValueLoc = llvm::make_unique<DbgValueLoc>(getDebugLocValue(DbgValue));
+  if (auto *E = DbgValue->getDebugExpression())
+    if (E->getNumElements())
+      FrameIndexExprs.push_back({0, E});
+}
+
 ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
   if (FrameIndexExprs.size() == 1)
     return FrameIndexExprs;
@@ -252,8 +300,8 @@ ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
 }
 
 void DbgVariable::addMMIEntry(const DbgVariable &V) {
-  assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry");
-  assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry");
+  assert(DebugLocListIndex == ~0U && !ValueLoc.get() && "not an MMI entry");
+  assert(V.DebugLocListIndex == ~0U && !V.ValueLoc.get() && "not an MMI entry");
   assert(V.getVariable() == getVariable() && "conflicting variable");
   assert(V.getInlinedAt() == getInlinedAt() && "conflicting inlined-at location");
 
@@ -315,7 +363,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
       IsDarwin(A->TM.getTargetTriple().isOSDarwin()) {
   const Triple &TT = Asm->TM.getTargetTriple();
 
-  // Make sure we know our "debugger tuning."  The target option takes
+  // Make sure we know our "debugger tuning".  The target option takes
   // precedence; fall back to triple-based defaults.
   if (Asm->TM.Options.DebuggerTuning != DebuggerKind::Default)
     DebuggerTuning = Asm->TM.Options.DebuggerTuning;
@@ -658,6 +706,11 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
     NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
   }
 
+  // Create DIEs for function declarations used for call site debug info.
+  for (auto Scope : DIUnit->getRetainedTypes())
+    if (auto *SP = dyn_cast_or_null<DISubprogram>(Scope))
+      NewCU.getOrCreateSubprogramDIE(SP);
+
   CUMap.insert({DIUnit, &NewCU});
   CUDieMap.insert({&NewCU.getUnitDie(), &NewCU});
   return NewCU;
@@ -890,13 +943,6 @@ void DwarfDebug::finalizeModuleInfo() {
     // ranges for all subprogram DIEs for mach-o.
     DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
 
-    // We don't keep track of which addresses are used in which CU so this
-    // is a bit pessimistic under LTO.
-    if (!AddrPool.isEmpty() &&
-        (getDwarfVersion() >= 5 ||
-         (SkCU && !empty(TheCU.getUnitDie().children()))))
-      U.addAddrTableBase();
-
     if (unsigned NumRanges = TheCU.getRanges().size()) {
       if (NumRanges > 1 && useRangesSection())
         // A DW_AT_low_pc attribute may also be specified in combination with
@@ -909,6 +955,13 @@ void DwarfDebug::finalizeModuleInfo() {
       U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
     }
 
+    // We don't keep track of which addresses are used in which CU so this
+    // is a bit pessimistic under LTO.
+    if (!AddrPool.isEmpty() &&
+        (getDwarfVersion() >= 5 ||
+         (SkCU && !empty(TheCU.getUnitDie().children()))))
+      U.addAddrTableBase();
+
     if (getDwarfVersion() >= 5) {
       if (U.hasRangeLists())
         U.addRnglistsBase();
@@ -941,6 +994,11 @@ void DwarfDebug::endModule() {
   assert(CurFn == nullptr);
   assert(CurMI == nullptr);
 
+  for (const auto &P : CUMap) {
+    auto &CU = *P.second;
+    CU.createBaseTypeDIEs();
+  }
+
   // If we aren't actually generating debug info (check beginModule -
   // conditionalized on !DisableDebugInfoPrinting and the presence of the
   // llvm.dbg.cu metadata node)
@@ -1059,161 +1117,177 @@ void DwarfDebug::collectVariableInfoFromMFTable(
   }
 }
 
-// Get .debug_loc entry for the instruction range starting at MI.
-static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
-  const DIExpression *Expr = MI->getDebugExpression();
-  assert(MI->getNumOperands() == 4);
-  if (MI->getOperand(0).isReg()) {
-    auto RegOp = MI->getOperand(0);
-    auto Op1 = MI->getOperand(1);
-    // If the second operand is an immediate, this is a
-    // register-indirect address.
-    assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset");
-    MachineLocation MLoc(RegOp.getReg(), Op1.isImm());
-    return DebugLocEntry::Value(Expr, MLoc);
-  }
-  if (MI->getOperand(0).isImm())
-    return DebugLocEntry::Value(Expr, MI->getOperand(0).getImm());
-  if (MI->getOperand(0).isFPImm())
-    return DebugLocEntry::Value(Expr, MI->getOperand(0).getFPImm());
-  if (MI->getOperand(0).isCImm())
-    return DebugLocEntry::Value(Expr, MI->getOperand(0).getCImm());
-
-  llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");
-}
+/// Determine whether a *singular* DBG_VALUE is valid for the entirety of its
+/// enclosing lexical scope. The check ensures there are no other instructions
+/// in the same lexical scope preceding the DBG_VALUE and that its range is
+/// either open or otherwise rolls off the end of the scope.
+static bool validThroughout(LexicalScopes &LScopes,
+                            const MachineInstr *DbgValue,
+                            const MachineInstr *RangeEnd) {
+  assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location");
+  auto MBB = DbgValue->getParent();
+  auto DL = DbgValue->getDebugLoc();
+  auto *LScope = LScopes.findLexicalScope(DL);
+  // Scope doesn't exist; this is a dead DBG_VALUE.
+  if (!LScope)
+    return false;
+  auto &LSRange = LScope->getRanges();
+  if (LSRange.size() == 0)
+    return false;
 
-/// If this and Next are describing different fragments of the same
-/// variable, merge them by appending Next's values to the current
-/// list of values.
-/// Return true if the merge was successful.
-bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) {
-  if (Begin == Next.Begin) {
-    auto *FirstExpr = cast<DIExpression>(Values[0].Expression);
-    auto *FirstNextExpr = cast<DIExpression>(Next.Values[0].Expression);
-    if (!FirstExpr->isFragment() || !FirstNextExpr->isFragment())
+  // Determine if the DBG_VALUE is valid at the beginning of its lexical block.
+  const MachineInstr *LScopeBegin = LSRange.front().first;
+  // Early exit if the lexical scope begins outside of the current block.
+  if (LScopeBegin->getParent() != MBB)
+    return false;
+  MachineBasicBlock::const_reverse_iterator Pred(DbgValue);
+  for (++Pred; Pred != MBB->rend(); ++Pred) {
+    if (Pred->getFlag(MachineInstr::FrameSetup))
+      break;
+    auto PredDL = Pred->getDebugLoc();
+    if (!PredDL || Pred->isMetaInstruction())
+      continue;
+    // Check whether the instruction preceding the DBG_VALUE is in the same
+    // (sub)scope as the DBG_VALUE.
+    if (DL->getScope() == PredDL->getScope())
+      return false;
+    auto *PredScope = LScopes.findLexicalScope(PredDL);
+    if (!PredScope || LScope->dominates(PredScope))
       return false;
+  }
 
-    // We can only merge entries if none of the fragments overlap any others.
-    // In doing so, we can take advantage of the fact that both lists are
-    // sorted.
-    for (unsigned i = 0, j = 0; i < Values.size(); ++i) {
-      for (; j < Next.Values.size(); ++j) {
-        int res = cast<DIExpression>(Values[i].Expression)->fragmentCmp(
-            cast<DIExpression>(Next.Values[j].Expression));
-        if (res == 0) // The two expressions overlap, we can't merge.
-          return false;
-        // Values[i] is entirely before Next.Values[j],
-        // so go back to the next entry of Values.
-        else if (res == -1)
-          break;
-        // Next.Values[j] is entirely before Values[i], so go on to the
-        // next entry of Next.Values.
-      }
-    }
+  // If the range of the DBG_VALUE is open-ended, report success.
+  if (!RangeEnd)
+    return true;
 
-    addValues(Next.Values);
-    End = Next.End;
+  // Fail if there are instructions belonging to our scope in another block.
+  const MachineInstr *LScopeEnd = LSRange.back().second;
+  if (LScopeEnd->getParent() != MBB)
+    return false;
+
+  // Single, constant DBG_VALUEs in the prologue are promoted to be live
+  // throughout the function. This is a hack, presumably for DWARF v2 and not
+  // necessarily correct. It would be much better to use a dbg.declare instead
+  // if we know the constant is live throughout the scope.
+  if (DbgValue->getOperand(0).isImm() && MBB->pred_empty())
     return true;
-  }
+
   return false;
 }
 
 /// Build the location list for all DBG_VALUEs in the function that
-/// describe the same variable.  If the ranges of several independent
-/// fragments of the same variable overlap partially, split them up and
-/// combine the ranges. The resulting DebugLocEntries are will have
+/// describe the same variable. The resulting DebugLocEntries will have
 /// strict monotonically increasing begin addresses and will never
-/// overlap.
+/// overlap. If the resulting list has only one entry that is valid
+/// throughout variable's scope return true.
+//
+// See the definition of DbgValueHistoryMap::Entry for an explanation of the
+// different kinds of history map entries. One thing to be aware of is that if
+// a debug value is ended by another entry (rather than being valid until the
+// end of the function), that entry's instruction may or may not be included in
+// the range, depending on if the entry is a clobbering entry (it has an
+// instruction that clobbers one or more preceding locations), or if it is an
+// (overlapping) debug value entry. This distinction can be seen in the example
+// below. The first debug value is ended by the clobbering entry 2, and the
+// second and third debug values are ended by the overlapping debug value entry
+// 4.
 //
 // Input:
 //
-//   Ranges History [var, loc, fragment ofs size]
-// 0 |      [x, (reg0, fragment 0, 32)]
-// 1 | |    [x, (reg1, fragment 32, 32)] <- IsFragmentOfPrevEntry
-// 2 | |    ...
-// 3   |    [clobber reg0]
-// 4        [x, (mem, fragment 0, 64)] <- overlapping with both previous fragments of
-//                                     x.
+//   History map entries [type, end index, mi]
 //
-// Output:
+// 0 |      [DbgValue, 2, DBG_VALUE $reg0, [...] (fragment 0, 32)]
+// 1 | |    [DbgValue, 4, DBG_VALUE $reg1, [...] (fragment 32, 32)]
+// 2 | |    [Clobber, $reg0 = [...], -, -]
+// 3   | |  [DbgValue, 4, DBG_VALUE 123, [...] (fragment 64, 32)]
+// 4        [DbgValue, ~0, DBG_VALUE @g, [...] (fragment 0, 96)]
 //
-// [0-1]    [x, (reg0, fragment  0, 32)]
-// [1-3]    [x, (reg0, fragment  0, 32), (reg1, fragment 32, 32)]
-// [3-4]    [x, (reg1, fragment 32, 32)]
-// [4- ]    [x, (mem,  fragment  0, 64)]
-void
-DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
-                              const DbgValueHistoryMap::InstrRanges &Ranges) {
-  SmallVector<DebugLocEntry::Value, 4> OpenRanges;
-
-  for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
-    const MachineInstr *Begin = I->first;
-    const MachineInstr *End = I->second;
-    assert(Begin->isDebugValue() && "Invalid History entry");
-
-    // Check if a variable is inaccessible in this range.
-    if (Begin->getNumOperands() > 1 &&
-        Begin->getOperand(0).isReg() && !Begin->getOperand(0).getReg()) {
-      OpenRanges.clear();
-      continue;
-    }
-
-    // If this fragment overlaps with any open ranges, truncate them.
-    const DIExpression *DIExpr = Begin->getDebugExpression();
-    auto Last = remove_if(OpenRanges, [&](DebugLocEntry::Value R) {
-      return DIExpr->fragmentsOverlap(R.getExpression());
-    });
+// Output [start, end) [Value...]:
+//
+// [0-1)    [(reg0, fragment 0, 32)]
+// [1-3)    [(reg0, fragment 0, 32), (reg1, fragment 32, 32)]
+// [3-4)    [(reg1, fragment 32, 32), (123, fragment 64, 32)]
+// [4-)     [(@g, fragment 0, 96)]
+bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
+                                   const DbgValueHistoryMap::Entries &Entries) {
+  using OpenRange =
+      std::pair<DbgValueHistoryMap::EntryIndex, DbgValueLoc>;
+  SmallVector<OpenRange, 4> OpenRanges;
+  bool isSafeForSingleLocation = true;
+  const MachineInstr *StartDebugMI = nullptr;
+  const MachineInstr *EndMI = nullptr;
+
+  for (auto EB = Entries.begin(), EI = EB, EE = Entries.end(); EI != EE; ++EI) {
+    const MachineInstr *Instr = EI->getInstr();
+
+    // Remove all values that are no longer live.
+    size_t Index = std::distance(EB, EI);
+    auto Last =
+        remove_if(OpenRanges, [&](OpenRange &R) { return R.first <= Index; });
     OpenRanges.erase(Last, OpenRanges.end());
 
-    const MCSymbol *StartLabel = getLabelBeforeInsn(Begin);
-    assert(StartLabel && "Forgot label before DBG_VALUE starting a range!");
+    // If we are dealing with a clobbering entry, this iteration will result in
+    // a location list entry starting after the clobbering instruction.
+    const MCSymbol *StartLabel =
+        EI->isClobber() ? getLabelAfterInsn(Instr) : getLabelBeforeInsn(Instr);
+    assert(StartLabel &&
+           "Forgot label before/after instruction starting a range!");
 
     const MCSymbol *EndLabel;
-    if (End != nullptr)
-      EndLabel = getLabelAfterInsn(End);
-    else if (std::next(I) == Ranges.end())
+    if (std::next(EI) == Entries.end()) {
       EndLabel = Asm->getFunctionEnd();
+      if (EI->isClobber())
+        EndMI = EI->getInstr();
+    }
+    else if (std::next(EI)->isClobber())
+      EndLabel = getLabelAfterInsn(std::next(EI)->getInstr());
     else
-      EndLabel = getLabelBeforeInsn(std::next(I)->first);
+      EndLabel = getLabelBeforeInsn(std::next(EI)->getInstr());
     assert(EndLabel && "Forgot label after instruction ending a range!");
 
-    LLVM_DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n");
+    if (EI->isDbgValue())
+      LLVM_DEBUG(dbgs() << "DotDebugLoc: " << *Instr << "\n");
+
+    // If this history map entry has a debug value, add that to the list of
+    // open ranges and check if its location is valid for a single value
+    // location.
+    if (EI->isDbgValue()) {
+      // Do not add undef debug values, as they are redundant information in
+      // the location list entries. An undef debug results in an empty location
+      // description. If there are any non-undef fragments then padding pieces
+      // with empty location descriptions will automatically be inserted, and if
+      // all fragments are undef then the whole location list entry is
+      // redundant.
+      if (!Instr->isUndefDebugValue()) {
+        auto Value = getDebugLocValue(Instr);
+        OpenRanges.emplace_back(EI->getEndIndex(), Value);
+
+        // TODO: Add support for single value fragment locations.
+        if (Instr->getDebugExpression()->isFragment())
+          isSafeForSingleLocation = false;
+
+        if (!StartDebugMI)
+          StartDebugMI = Instr;
+      } else {
+        isSafeForSingleLocation = false;
+      }
+    }
 
-    auto Value = getDebugLocValue(Begin);
+    // Location list entries with empty location descriptions are redundant
+    // information in DWARF, so do not emit those.
+    if (OpenRanges.empty())
+      continue;
 
     // Omit entries with empty ranges as they do not have any effect in DWARF.
     if (StartLabel == EndLabel) {
-      // If this is a fragment, we must still add the value to the list of
-      // open ranges, since it may describe non-overlapping parts of the
-      // variable.
-      if (DIExpr->isFragment())
-        OpenRanges.push_back(Value);
       LLVM_DEBUG(dbgs() << "Omitting location list entry with empty range.\n");
       continue;
     }
 
-    DebugLocEntry Loc(StartLabel, EndLabel, Value);
-    bool couldMerge = false;
-
-    // If this is a fragment, it may belong to the current DebugLocEntry.
-    if (DIExpr->isFragment()) {
-      // Add this value to the list of open ranges.
-      OpenRanges.push_back(Value);
-
-      // Attempt to add the fragment to the last entry.
-      if (!DebugLoc.empty())
-        if (DebugLoc.back().MergeValues(Loc))
-          couldMerge = true;
-    }
-
-    if (!couldMerge) {
-      // Need to add a new DebugLocEntry. Add all values from still
-      // valid non-overlapping fragments.
-      if (OpenRanges.size())
-        Loc.addValues(OpenRanges);
-
-      DebugLoc.push_back(std::move(Loc));
-    }
+    SmallVector<DbgValueLoc, 4> Values;
+    for (auto &R : OpenRanges)
+      Values.push_back(R.second);
+    DebugLoc.emplace_back(StartLabel, EndLabel, Values);
 
     // Attempt to coalesce the ranges of two otherwise identical
     // DebugLocEntries.
@@ -1229,6 +1303,9 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
     if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry))
       DebugLoc.pop_back();
   }
+
+  return DebugLoc.size() == 1 && isSafeForSingleLocation &&
+         validThroughout(LScopes, StartDebugMI, EndMI);
 }
 
 DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU,
@@ -1253,64 +1330,6 @@ DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU,
   return ConcreteEntities.back().get();
 }
 
-/// Determine whether a *singular* DBG_VALUE is valid for the entirety of its
-/// enclosing lexical scope. The check ensures there are no other instructions
-/// in the same lexical scope preceding the DBG_VALUE and that its range is
-/// either open or otherwise rolls off the end of the scope.
-static bool validThroughout(LexicalScopes &LScopes,
-                            const MachineInstr *DbgValue,
-                            const MachineInstr *RangeEnd) {
-  assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location");
-  auto MBB = DbgValue->getParent();
-  auto DL = DbgValue->getDebugLoc();
-  auto *LScope = LScopes.findLexicalScope(DL);
-  // Scope doesn't exist; this is a dead DBG_VALUE.
-  if (!LScope)
-    return false;
-  auto &LSRange = LScope->getRanges();
-  if (LSRange.size() == 0)
-    return false;
-
-  // Determine if the DBG_VALUE is valid at the beginning of its lexical block.
-  const MachineInstr *LScopeBegin = LSRange.front().first;
-  // Early exit if the lexical scope begins outside of the current block.
-  if (LScopeBegin->getParent() != MBB)
-    return false;
-  MachineBasicBlock::const_reverse_iterator Pred(DbgValue);
-  for (++Pred; Pred != MBB->rend(); ++Pred) {
-    if (Pred->getFlag(MachineInstr::FrameSetup))
-      break;
-    auto PredDL = Pred->getDebugLoc();
-    if (!PredDL || Pred->isMetaInstruction())
-      continue;
-    // Check whether the instruction preceding the DBG_VALUE is in the same
-    // (sub)scope as the DBG_VALUE.
-    if (DL->getScope() == PredDL->getScope())
-      return false;
-    auto *PredScope = LScopes.findLexicalScope(PredDL);
-    if (!PredScope || LScope->dominates(PredScope))
-      return false;
-  }
-
-  // If the range of the DBG_VALUE is open-ended, report success.
-  if (!RangeEnd)
-    return true;
-
-  // Fail if there are instructions belonging to our scope in another block.
-  const MachineInstr *LScopeEnd = LSRange.back().second;
-  if (LScopeEnd->getParent() != MBB)
-    return false;
-
-  // Single, constant DBG_VALUEs in the prologue are promoted to be live
-  // throughout the function. This is a hack, presumably for DWARF v2 and not
-  // necessarily correct. It would be much better to use a dbg.declare instead
-  // if we know the constant is live throughout the scope.
-  if (DbgValue->getOperand(0).isImm() && MBB->pred_empty())
-    return true;
-
-  return false;
-}
-
 // Find variables for each lexical scope.
 void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
                                    const DISubprogram *SP,
@@ -1324,8 +1343,8 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
       continue;
 
     // Instruction ranges, specifying where IV is accessible.
-    const auto &Ranges = I.second;
-    if (Ranges.empty())
+    const auto &HistoryMapEntries = I.second;
+    if (HistoryMapEntries.empty())
       continue;
 
     LexicalScope *Scope = nullptr;
@@ -1342,15 +1361,24 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
     DbgVariable *RegVar = cast<DbgVariable>(createConcreteEntity(TheCU,
                                             *Scope, LocalVar, IV.second));
 
-    const MachineInstr *MInsn = Ranges.front().first;
+    const MachineInstr *MInsn = HistoryMapEntries.front().getInstr();
     assert(MInsn->isDebugValue() && "History must begin with debug value");
 
     // Check if there is a single DBG_VALUE, valid throughout the var's scope.
-    if (Ranges.size() == 1 &&
-        validThroughout(LScopes, MInsn, Ranges.front().second)) {
-      RegVar->initializeDbgValue(MInsn);
-      continue;
+    // If the history map contains a single debug value, there may be an
+    // additional entry which clobbers the debug value.
+    size_t HistSize = HistoryMapEntries.size();
+    bool SingleValueWithClobber =
+        HistSize == 2 && HistoryMapEntries[1].isClobber();
+    if (HistSize == 1 || SingleValueWithClobber) {
+      const auto *End =
+          SingleValueWithClobber ? HistoryMapEntries[1].getInstr() : nullptr;
+      if (validThroughout(LScopes, MInsn, End)) {
+        RegVar->initializeDbgValue(MInsn);
+        continue;
+      }
     }
+
     // Do not emit location lists if .debug_loc secton is disabled.
     if (!useLocSection())
       continue;
@@ -1360,7 +1388,15 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
 
     // Build the location list for this variable.
     SmallVector<DebugLocEntry, 8> Entries;
-    buildLocationList(Entries, Ranges);
+    bool isValidSingleLocation = buildLocationList(Entries, HistoryMapEntries);
+
+    // Check whether buildLocationList managed to merge all locations to one
+    // that is valid throughout the variable's scope. If so, produce single
+    // value location.
+    if (isValidSingleLocation) {
+      RegVar->initializeDbgValue(Entries[0].getValues()[0]);
+      continue;
+    }
 
     // If the variable has a DIBasicType, extract it.  Basic types cannot have
     // unique identifiers, so don't bother resolving the type with the
@@ -1370,7 +1406,7 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
 
     // Finalize the entry by lowering it into a DWARF bytestream.
     for (auto &Entry : Entries)
-      Entry.finalize(*Asm, List, BT);
+      Entry.finalize(*Asm, List, BT, TheCU);
   }
 
   // For each InlinedEntity collected from DBG_LABEL instructions, convert to
@@ -1489,7 +1525,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   // We have an explicit location, different from the previous location.
   // Don't repeat a line-0 record, but otherwise emit the new location.
   // (The new location might be an explicit line 0, which we do emit.)
-  if (PrevInstLoc && DL.getLine() == 0 && LastAsmLine == 0)
+  if (DL.getLine() == 0 && LastAsmLine == 0)
     return;
   unsigned Flags = 0;
   if (DL == PrologEndLoc) {
@@ -1521,6 +1557,46 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
   return DebugLoc();
 }
 
+/// Register a source line with debug info. Returns the  unique label that was
+/// emitted and which provides correspondence to the source line list.
+static void recordSourceLine(AsmPrinter &Asm, unsigned Line, unsigned Col,
+                             const MDNode *S, unsigned Flags, unsigned CUID,
+                             uint16_t DwarfVersion,
+                             ArrayRef<std::unique_ptr<DwarfCompileUnit>> DCUs) {
+  StringRef Fn;
+  unsigned FileNo = 1;
+  unsigned Discriminator = 0;
+  if (auto *Scope = cast_or_null<DIScope>(S)) {
+    Fn = Scope->getFilename();
+    if (Line != 0 && DwarfVersion >= 4)
+      if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
+        Discriminator = LBF->getDiscriminator();
+
+    FileNo = static_cast<DwarfCompileUnit &>(*DCUs[CUID])
+                 .getOrCreateSourceID(Scope->getFile());
+  }
+  Asm.OutStreamer->EmitDwarfLocDirective(FileNo, Line, Col, Flags, 0,
+                                         Discriminator, Fn);
+}
+
+DebugLoc DwarfDebug::emitInitialLocDirective(const MachineFunction &MF,
+                                             unsigned CUID) {
+  // Get beginning of function.
+  if (DebugLoc PrologEndLoc = findPrologueEndLoc(&MF)) {
+    // Ensure the compile unit is created if the function is called before
+    // beginFunction().
+    (void)getOrCreateDwarfCompileUnit(
+        MF.getFunction().getSubprogram()->getUnit());
+    // We'd like to list the prologue as "not statements" but GDB behaves
+    // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
+    const DISubprogram *SP = PrologEndLoc->getInlinedAtScope()->getSubprogram();
+    ::recordSourceLine(*Asm, SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT,
+                       CUID, getDwarfVersion(), getUnits());
+    return PrologEndLoc;
+  }
+  return DebugLoc();
+}
+
 // Gather pre-function debug information.  Assumes being called immediately
 // after the function entry point has been emitted.
 void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
@@ -1543,13 +1619,8 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
     Asm->OutStreamer->getContext().setDwarfCompileUnitID(CU.getUniqueID());
 
   // Record beginning of function.
-  PrologEndLoc = findPrologueEndLoc(MF);
-  if (PrologEndLoc) {
-    // We'd like to list the prologue as "not statements" but GDB behaves
-    // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
-    auto *SP = PrologEndLoc->getInlinedAtScope()->getSubprogram();
-    recordSourceLine(SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT);
-  }
+  PrologEndLoc = emitInitialLocDirective(
+      *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID());
 }
 
 void DwarfDebug::skippedNonDebugFunction() {
@@ -1647,21 +1718,9 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
 // emitted and which provides correspondence to the source line list.
 void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
                                   unsigned Flags) {
-  StringRef Fn;
-  unsigned FileNo = 1;
-  unsigned Discriminator = 0;
-  if (auto *Scope = cast_or_null<DIScope>(S)) {
-    Fn = Scope->getFilename();
-    if (Line != 0 && getDwarfVersion() >= 4)
-      if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
-        Discriminator = LBF->getDiscriminator();
-
-    unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID();
-    FileNo = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID])
-              .getOrCreateSourceID(Scope->getFile());
-  }
-  Asm->OutStreamer->EmitDwarfLocDirective(FileNo, Line, Col, Flags, 0,
-                                          Discriminator, Fn);
+  ::recordSourceLine(*Asm, Line, Col, S, Flags,
+                     Asm->OutStreamer->getContext().getDwarfCompileUnitID(),
+                     getDwarfVersion(), getUnits());
 }
 
 //===----------------------------------------------------------------------===//
@@ -1890,17 +1949,59 @@ void DwarfDebug::emitDebugStr() {
 }
 
 void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
-                                   const DebugLocStream::Entry &Entry) {
+                                   const DebugLocStream::Entry &Entry,
+                                   const DwarfCompileUnit *CU) {
   auto &&Comments = DebugLocs.getComments(Entry);
   auto Comment = Comments.begin();
   auto End = Comments.end();
-  for (uint8_t Byte : DebugLocs.getBytes(Entry))
-    Streamer.EmitInt8(Byte, Comment != End ? *(Comment++) : "");
+
+  // The expressions are inserted into a byte stream rather early (see
+  // DwarfExpression::addExpression) so for those ops (e.g. DW_OP_convert) that
+  // need to reference a base_type DIE the offset of that DIE is not yet known.
+  // To deal with this we instead insert a placeholder early and then extract
+  // it here and replace it with the real reference.
+  unsigned PtrSize = Asm->MAI->getCodePointerSize();
+  DWARFDataExtractor Data(StringRef(DebugLocs.getBytes(Entry).data(),
+                                    DebugLocs.getBytes(Entry).size()),
+                          Asm->getDataLayout().isLittleEndian(), PtrSize);
+  DWARFExpression Expr(Data, getDwarfVersion(), PtrSize);
+
+  using Encoding = DWARFExpression::Operation::Encoding;
+  uint32_t Offset = 0;
+  for (auto &Op : Expr) {
+    assert(Op.getCode() != dwarf::DW_OP_const_type &&
+           "3 operand ops not yet supported");
+    Streamer.EmitInt8(Op.getCode(), Comment != End ? *(Comment++) : "");
+    Offset++;
+    for (unsigned I = 0; I < 2; ++I) {
+      if (Op.getDescription().Op[I] == Encoding::SizeNA)
+        continue;
+      if (Op.getDescription().Op[I] == Encoding::BaseTypeRef) {
+          if (CU) {
+            uint64_t Offset = CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die->getOffset();
+            assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
+            Asm->EmitULEB128(Offset, nullptr, ULEB128PadSize);
+          } else {
+            // Emit a reference to the 'generic type'.
+            Asm->EmitULEB128(0, nullptr, ULEB128PadSize);
+          }
+          // Make sure comments stay aligned.
+          for (unsigned J = 0; J < ULEB128PadSize; ++J)
+            if (Comment != End)
+              Comment++;
+      } else {
+        for (uint32_t J = Offset; J < Op.getOperandEndOffset(I); ++J)
+          Streamer.EmitInt8(Data.getData()[J], Comment != End ? *(Comment++) : "");
+      }
+      Offset = Op.getOperandEndOffset(I);
+    }
+    assert(Offset == Op.getEndOffset());
+  }
 }
 
-static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
-                              const DebugLocEntry::Value &Value,
-                              DwarfExpression &DwarfExpr) {
+void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
+                                   const DbgValueLoc &Value,
+                                   DwarfExpression &DwarfExpr) {
   auto *DIExpr = Value.getExpression();
   DIExpressionCursor ExprCursor(DIExpr);
   DwarfExpr.addFragmentOffset(DIExpr);
@@ -1916,6 +2017,12 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
     if (Location.isIndirect())
       DwarfExpr.setMemoryLocationKind();
     DIExpressionCursor Cursor(DIExpr);
+
+    if (DIExpr->isEntryValue()) {
+      DwarfExpr.setEntryValueFlag();
+      DwarfExpr.addEntryValueExpression(Cursor);
+    }
+
     const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
     if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
       return;
@@ -1929,38 +2036,50 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
 
 void DebugLocEntry::finalize(const AsmPrinter &AP,
                              DebugLocStream::ListBuilder &List,
-                             const DIBasicType *BT) {
+                             const DIBasicType *BT,
+                             DwarfCompileUnit &TheCU) {
+  assert(!Values.empty() &&
+         "location list entries without values are redundant");
   assert(Begin != End && "unexpected location list entry with empty range");
   DebugLocStream::EntryBuilder Entry(List, Begin, End);
   BufferByteStreamer Streamer = Entry.getStreamer();
-  DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer);
-  const DebugLocEntry::Value &Value = Values[0];
+  DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer, TheCU);
+  const DbgValueLoc &Value = Values[0];
   if (Value.isFragment()) {
     // Emit all fragments that belong to the same variable and range.
-    assert(llvm::all_of(Values, [](DebugLocEntry::Value P) {
+    assert(llvm::all_of(Values, [](DbgValueLoc P) {
           return P.isFragment();
         }) && "all values are expected to be fragments");
     assert(std::is_sorted(Values.begin(), Values.end()) &&
            "fragments are expected to be sorted");
 
     for (auto Fragment : Values)
-      emitDebugLocValue(AP, BT, Fragment, DwarfExpr);
+      DwarfDebug::emitDebugLocValue(AP, BT, Fragment, DwarfExpr);
 
   } else {
     assert(Values.size() == 1 && "only fragments may have >1 value");
-    emitDebugLocValue(AP, BT, Value, DwarfExpr);
+    DwarfDebug::emitDebugLocValue(AP, BT, Value, DwarfExpr);
   }
   DwarfExpr.finalize();
 }
 
-void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {
+void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
+                                           const DwarfCompileUnit *CU) {
   // Emit the size.
   Asm->OutStreamer->AddComment("Loc expr size");
-  Asm->emitInt16(DebugLocs.getBytes(Entry).size());
-
+  if (getDwarfVersion() >= 5)
+    Asm->EmitULEB128(DebugLocs.getBytes(Entry).size());
+  else if (DebugLocs.getBytes(Entry).size() <= std::numeric_limits<uint16_t>::max())
+    Asm->emitInt16(DebugLocs.getBytes(Entry).size());
+  else {
+    // The entry is too big to fit into 16 bit, drop it as there is nothing we
+    // can do.
+    Asm->emitInt16(0);
+    return;
+  }
   // Emit the entry.
   APByteStreamer Streamer(*Asm);
-  emitDebugLocEntry(Streamer, Entry);
+  emitDebugLocEntry(Streamer, Entry, CU);
 }
 
 // Emit the common part of the DWARF 5 range/locations list tables header.
@@ -2060,7 +2179,7 @@ void DwarfDebug::emitDebugLoc() {
           Asm->EmitLabelDifference(Entry.EndSym, Base, Size);
         }
 
-        emitDebugLocEntryLocation(Entry);
+        emitDebugLocEntryLocation(Entry, CU);
         continue;
       }
 
@@ -2081,7 +2200,7 @@ void DwarfDebug::emitDebugLoc() {
         Asm->OutStreamer->EmitSymbolValue(Entry.EndSym, Size);
       }
 
-      emitDebugLocEntryLocation(Entry);
+      emitDebugLocEntryLocation(Entry, CU);
     }
 
     if (IsLocLists) {
@@ -2100,9 +2219,9 @@ void DwarfDebug::emitDebugLoc() {
 }
 
 void DwarfDebug::emitDebugLocDWO() {
-  Asm->OutStreamer->SwitchSection(
-      Asm->getObjFileLowering().getDwarfLocDWOSection());
   for (const auto &List : DebugLocs.getLists()) {
+    Asm->OutStreamer->SwitchSection(
+        Asm->getObjFileLowering().getDwarfLocDWOSection());
     Asm->OutStreamer->EmitLabel(List.Label);
     for (const auto &Entry : DebugLocs.getEntries(List)) {
       // GDB only supports startx_length in pre-standard split-DWARF.
@@ -2117,7 +2236,7 @@ void DwarfDebug::emitDebugLocDWO() {
       Asm->EmitULEB128(idx);
       Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4);
 
-      emitDebugLocEntryLocation(Entry);
+      emitDebugLocEntryLocation(Entry, List.CU);
     }
     Asm->emitInt8(dwarf::DW_LLE_end_of_list);
   }
@@ -2170,19 +2289,18 @@ void DwarfDebug::emitDebugARanges() {
     }
 
     // Sort the symbols by offset within the section.
-    std::stable_sort(
-        List.begin(), List.end(), [&](const SymbolCU &A, const SymbolCU &B) {
-          unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0;
-          unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0;
-
-          // Symbols with no order assigned should be placed at the end.
-          // (e.g. section end labels)
-          if (IA == 0)
-            return false;
-          if (IB == 0)
-            return true;
-          return IA < IB;
-        });
+    llvm::stable_sort(List, [&](const SymbolCU &A, const SymbolCU &B) {
+      unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0;
+      unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0;
+
+      // Symbols with no order assigned should be placed at the end.
+      // (e.g. section end labels)
+      if (IA == 0)
+        return false;
+      if (IB == 0)
+        return true;
+      return IA < IB;
+    });
 
     // Insert a final terminator.
     List.push_back(SymbolCU(nullptr, Asm->OutStreamer->endSection(Section)));
@@ -2687,6 +2805,22 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
   CU.addDIETypeSignature(RefDie, Signature);
 }
 
+DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD)
+    : DD(DD),
+      TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)) {
+  DD->TypeUnitsUnderConstruction.clear();
+  assert(TypeUnitsUnderConstruction.empty() || !DD->AddrPool.hasBeenUsed());
+}
+
+DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() {
+  DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction);
+  DD->AddrPool.resetUsedFlag();
+}
+
+DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() {
+  return NonTypeUnitContext(this);
+}
+
 // Add the Name along with its companion DIE to the appropriate accelerator
 // table (for AccelTableKind::Dwarf it's always AccelDebugNames, for
 // AccelTableKind::Apple, we use the table we got as an argument). If
@@ -2699,7 +2833,7 @@ void DwarfDebug::addAccelNameImpl(const DICompileUnit &CU,
     return;
 
   if (getAccelTableKind() != AccelTableKind::Apple &&
-      CU.getNameTableKind() == DICompileUnit::DebugNameTableKind::None)
+      CU.getNameTableKind() != DICompileUnit::DebugNameTableKind::Default)
     return;
 
   DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 8a31e989b289..3ac474e2bdda 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,6 +15,7 @@
 
 #include "AddressPool.h"
 #include "DebugLocStream.h"
+#include "DebugLocEntry.h"
 #include "DwarfFile.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
@@ -52,6 +52,7 @@ class ByteStreamer;
 class DebugLocEntry;
 class DIE;
 class DwarfCompileUnit;
+class DwarfExpression;
 class DwarfTypeUnit;
 class DwarfUnit;
 class LexicalScope;
@@ -111,12 +112,14 @@ public:
 ///
 /// Variables can be created from \c DBG_VALUE instructions.  Those whose
 /// location changes over time use \a DebugLocListIndex, while those with a
-/// single instruction use \a MInsn and (optionally) a single entry of \a Expr.
+/// single location use \a ValueLoc and (optionally) a single entry of \a Expr.
 ///
 /// Variables that have been optimized out use none of these fields.
 class DbgVariable : public DbgEntity {
-  unsigned DebugLocListIndex = ~0u;          /// Offset in DebugLocs.
-  const MachineInstr *MInsn = nullptr;       /// DBG_VALUE instruction.
+  /// Offset in DebugLocs.
+  unsigned DebugLocListIndex = ~0u;
+  /// Single value location description.
+  std::unique_ptr<DbgValueLoc> ValueLoc = nullptr;
 
   struct FrameIndexExpr {
     int FI;
@@ -136,7 +139,7 @@ public:
   /// Initialize from the MMI table.
   void initializeMMI(const DIExpression *E, int FI) {
     assert(FrameIndexExprs.empty() && "Already initialized?");
-    assert(!MInsn && "Already initialized?");
+    assert(!ValueLoc.get() && "Already initialized?");
 
     assert((!E || E->isValid()) && "Expected valid expression");
     assert(FI != std::numeric_limits<int>::max() && "Expected valid index");
@@ -144,35 +147,35 @@ public:
     FrameIndexExprs.push_back({FI, E});
   }
 
-  /// Initialize from a DBG_VALUE instruction.
-  void initializeDbgValue(const MachineInstr *DbgValue) {
+  // Initialize variable's location.
+  void initializeDbgValue(DbgValueLoc Value) {
     assert(FrameIndexExprs.empty() && "Already initialized?");
-    assert(!MInsn && "Already initialized?");
+    assert(!ValueLoc && "Already initialized?");
+    assert(!Value.getExpression()->isFragment() && "Fragments not supported.");
 
-    assert(getVariable() == DbgValue->getDebugVariable() && "Wrong variable");
-    assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() &&
-           "Wrong inlined-at");
-
-    MInsn = DbgValue;
-    if (auto *E = DbgValue->getDebugExpression())
+    ValueLoc = llvm::make_unique<DbgValueLoc>(Value);
+    if (auto *E = ValueLoc->getExpression())
       if (E->getNumElements())
         FrameIndexExprs.push_back({0, E});
   }
 
+  /// Initialize from a DBG_VALUE instruction.
+  void initializeDbgValue(const MachineInstr *DbgValue);
+
   // Accessors.
   const DILocalVariable *getVariable() const {
     return cast<DILocalVariable>(getEntity());
   }
 
   const DIExpression *getSingleExpression() const {
-    assert(MInsn && FrameIndexExprs.size() <= 1);
+    assert(ValueLoc.get() && FrameIndexExprs.size() <= 1);
     return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr;
   }
 
   void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
   unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
   StringRef getName() const { return getVariable()->getName(); }
-  const MachineInstr *getMInsn() const { return MInsn; }
+  const DbgValueLoc *getValueLoc() const { return ValueLoc.get(); }
   /// Get the FI entries, sorted by fragment offset.
   ArrayRef<FrameIndexExpr> getFrameIndexExprs() const;
   bool hasFrameIndexExprs() const { return !FrameIndexExprs.empty(); }
@@ -205,7 +208,7 @@ public:
   }
 
   bool hasComplexAddress() const {
-    assert(MInsn && "Expected DBG_VALUE, not MMI variable");
+    assert(ValueLoc.get() && "Expected DBG_VALUE, not MMI variable");
     assert((FrameIndexExprs.empty() ||
             (FrameIndexExprs.size() == 1 &&
              FrameIndexExprs[0].Expr->getNumElements())) &&
@@ -219,11 +222,6 @@ public:
   static bool classof(const DbgEntity *N) {
     return N->getDbgEntityID() == DbgVariableKind;
   }
-
-private:
-  template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
-    return Ref.resolve();
-  }
 };
 
 //===----------------------------------------------------------------------===//
@@ -254,11 +252,6 @@ public:
   static bool classof(const DbgEntity *N) {
     return N->getDbgEntityID() == DbgLabelKind;
   }
-
-private:
-  template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
-    return Ref.resolve();
-  }
 };
 
 /// Helper used to pair up a symbol and its DWARF compile unit.
@@ -558,9 +551,11 @@ class DwarfDebug : public DebugHandlerBase {
                          DenseSet<InlinedEntity> &ProcessedVars);
 
   /// Build the location list for all DBG_VALUEs in the
-  /// function that describe the same variable.
-  void buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
-                         const DbgValueHistoryMap::InstrRanges &Ranges);
+  /// function that describe the same variable. If the resulting 
+  /// list has only one entry that is valid for entire variable's
+  /// scope return true.
+  bool buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
+                         const DbgValueHistoryMap::Entries &Entries);
 
   /// Collect variable information from the side table maintained by MF.
   void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU,
@@ -593,6 +588,9 @@ public:
   /// Emit all Dwarf sections that should come after the content.
   void endModule() override;
 
+  /// Emits inital debug location directive.
+  DebugLoc emitInitialLocDirective(const MachineFunction &MF, unsigned CUID);
+
   /// Process beginning of an instruction.
   void beginInstruction(const MachineInstr *MI) override;
 
@@ -604,6 +602,19 @@ public:
   void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier,
                             DIE &Die, const DICompositeType *CTy);
 
+  friend class NonTypeUnitContext;
+  class NonTypeUnitContext {
+    DwarfDebug *DD;
+    decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction;
+    friend class DwarfDebug;
+    NonTypeUnitContext(DwarfDebug *DD);
+  public:
+    NonTypeUnitContext(NonTypeUnitContext&&) = default;
+    ~NonTypeUnitContext();
+  };
+
+  NonTypeUnitContext enterNonTypeUnitContext();
+
   /// Add a label so that arange data can be generated for it.
   void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }
 
@@ -680,15 +691,12 @@ public:
   /// Emit an entry for the debug loc section. This can be used to
   /// handle an entry that's going to be emitted into the debug loc section.
   void emitDebugLocEntry(ByteStreamer &Streamer,
-                         const DebugLocStream::Entry &Entry);
+                         const DebugLocStream::Entry &Entry,
+                         const DwarfCompileUnit *CU);
 
   /// Emit the location for a debug loc entry, including the size header.
-  void emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry);
-
-  /// Find the MDNode for the given reference.
-  template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
-    return Ref.resolve();
-  }
+  void emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
+                                 const DwarfCompileUnit *CU);
 
   void addSubprogramNames(const DICompileUnit &CU, const DISubprogram *SP,
                           DIE &Die);
@@ -728,6 +736,10 @@ public:
 
   void addSectionLabel(const MCSymbol *Sym);
   const MCSymbol *getSectionLabel(const MCSection *S);
+
+  static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
+                                const DbgValueLoc &Value,
+                                DwarfExpression &DwarfExpr);
 };
 
 } // end namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index b57ea8fc6322..24bbf58b91ec 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -1,9 +1,8 @@
 //===-- DwarfException.h - Dwarf Exception Framework -----------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 19c350afbf17..2858afaa1cf1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/DwarfExpression.cpp - Dwarf Debug Framework -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfExpression.h"
+#include "DwarfCompileUnit.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/BinaryFormat/Dwarf.h"
@@ -40,7 +40,7 @@ void DwarfExpression::emitConstu(uint64_t Value) {
 
 void DwarfExpression::addReg(int DwarfReg, const char *Comment) {
  assert(DwarfReg >= 0 && "invalid negative dwarf register number");
- assert((LocationKind == Unknown || LocationKind == Register) &&
+ assert((isUnknownLocation() || isRegisterLocation()) &&
         "location description already locked down");
  LocationKind = Register;
  if (DwarfReg < 32) {
@@ -53,7 +53,7 @@ void DwarfExpression::addReg(int DwarfReg, const char *Comment) {
 
 void DwarfExpression::addBReg(int DwarfReg, int Offset) {
   assert(DwarfReg >= 0 && "invalid negative dwarf register number");
-  assert(LocationKind != Register && "location description already locked down");
+  assert(!isRegisterLocation() && "location description already locked down");
   if (DwarfReg < 32) {
     emitOp(dwarf::DW_OP_breg0 + DwarfReg);
   } else {
@@ -184,20 +184,20 @@ void DwarfExpression::addStackValue() {
 }
 
 void DwarfExpression::addSignedConstant(int64_t Value) {
-  assert(LocationKind == Implicit || LocationKind == Unknown);
+  assert(isImplicitLocation() || isUnknownLocation());
   LocationKind = Implicit;
   emitOp(dwarf::DW_OP_consts);
   emitSigned(Value);
 }
 
 void DwarfExpression::addUnsignedConstant(uint64_t Value) {
-  assert(LocationKind == Implicit || LocationKind == Unknown);
+  assert(isImplicitLocation() || isUnknownLocation());
   LocationKind = Implicit;
   emitConstu(Value);
 }
 
 void DwarfExpression::addUnsignedConstant(const APInt &Value) {
-  assert(LocationKind == Implicit || LocationKind == Unknown);
+  assert(isImplicitLocation() || isUnknownLocation());
   LocationKind = Implicit;
 
   unsigned Size = Value.getBitWidth();
@@ -242,12 +242,16 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
   }
 
   // Handle simple register locations.
-  if (LocationKind != Memory && !HasComplexExpression) {
+  if (!isMemoryLocation() && !HasComplexExpression) {
     for (auto &Reg : DwarfRegs) {
       if (Reg.DwarfRegNo >= 0)
         addReg(Reg.DwarfRegNo, Reg.Comment);
       addOpPiece(Reg.Size);
     }
+
+    if (isEntryValue() && DwarfVersion >= 4)
+      emitOp(dwarf::DW_OP_stack_value);
+
     DwarfRegs.clear();
     return true;
   }
@@ -296,6 +300,19 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
   return true;
 }
 
+void DwarfExpression::addEntryValueExpression(DIExpressionCursor &ExprCursor) {
+  auto Op = ExprCursor.take();
+  assert(Op && Op->getOp() == dwarf::DW_OP_entry_value);
+  assert(!isMemoryLocation() &&
+         "We don't support entry values of memory locations yet");
+
+  if (DwarfVersion >= 5)
+    emitOp(dwarf::DW_OP_entry_value);
+  else
+    emitOp(dwarf::DW_OP_GNU_entry_value);
+  emitUnsigned(Op->getArg(0));
+}
+
 /// Assuming a well-formed expression, match "DW_OP_deref* DW_OP_LLVM_fragment?".
 static bool isMemoryLocation(DIExpressionCursor ExprCursor) {
   while (ExprCursor) {
@@ -319,6 +336,8 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
   if (SubRegisterSizeInBits && N && (N->getOp() != dwarf::DW_OP_LLVM_fragment))
     maskSubRegister();
 
+  Optional<DIExpression::ExprOperand> PrevConvertOp = None;
+
   while (ExprCursor) {
     auto Op = ExprCursor.take();
     switch (Op->getOp()) {
@@ -341,7 +360,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
         SizeInBits = std::min<unsigned>(SizeInBits, SubRegisterSizeInBits);
 
       // Emit a DW_OP_stack_value for implicit location descriptions.
-      if (LocationKind == Implicit)
+      if (isImplicitLocation())
         addStackValue();
 
       // Emit the DW_OP_piece.
@@ -352,7 +371,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
       return;
     }
     case dwarf::DW_OP_plus_uconst:
-      assert(LocationKind != Register);
+      assert(!isRegisterLocation());
       emitOp(dwarf::DW_OP_plus_uconst);
       emitUnsigned(Op->getArg(0));
       break;
@@ -373,8 +392,8 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
       emitOp(Op->getOp());
       break;
     case dwarf::DW_OP_deref:
-      assert(LocationKind != Register);
-      if (LocationKind != Memory && ::isMemoryLocation(ExprCursor))
+      assert(!isRegisterLocation());
+      if (!isMemoryLocation() && ::isMemoryLocation(ExprCursor))
         // Turning this into a memory location description makes the deref
         // implicit.
         LocationKind = Memory;
@@ -382,26 +401,69 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
         emitOp(dwarf::DW_OP_deref);
       break;
     case dwarf::DW_OP_constu:
-      assert(LocationKind != Register);
+      assert(!isRegisterLocation());
       emitConstu(Op->getArg(0));
       break;
+    case dwarf::DW_OP_LLVM_convert: {
+      unsigned BitSize = Op->getArg(0);
+      dwarf::TypeKind Encoding = static_cast<dwarf::TypeKind>(Op->getArg(1));
+      if (DwarfVersion >= 5) {
+        emitOp(dwarf::DW_OP_convert);
+        // Reuse the base_type if we already have one in this CU otherwise we
+        // create a new one.
+        unsigned I = 0, E = CU.ExprRefedBaseTypes.size();
+        for (; I != E; ++I)
+          if (CU.ExprRefedBaseTypes[I].BitSize == BitSize &&
+              CU.ExprRefedBaseTypes[I].Encoding == Encoding)
+            break;
+
+        if (I == E)
+          CU.ExprRefedBaseTypes.emplace_back(BitSize, Encoding);
+
+        // If targeting a location-list; simply emit the index into the raw
+        // byte stream as ULEB128, DwarfDebug::emitDebugLocEntry has been
+        // fitted with means to extract it later.
+        // If targeting a inlined DW_AT_location; insert a DIEBaseTypeRef
+        // (containing the index and a resolve mechanism during emit) into the
+        // DIE value list.
+        emitBaseTypeRef(I);
+      } else {
+        if (PrevConvertOp && PrevConvertOp->getArg(0) < BitSize) {
+          if (Encoding == dwarf::DW_ATE_signed)
+            emitLegacySExt(PrevConvertOp->getArg(0));
+          else if (Encoding == dwarf::DW_ATE_unsigned)
+            emitLegacyZExt(PrevConvertOp->getArg(0));
+          PrevConvertOp = None;
+        } else {
+          PrevConvertOp = Op;
+        }
+      }
+      break;
+    }
     case dwarf::DW_OP_stack_value:
       LocationKind = Implicit;
       break;
     case dwarf::DW_OP_swap:
-      assert(LocationKind != Register);
+      assert(!isRegisterLocation());
       emitOp(dwarf::DW_OP_swap);
       break;
     case dwarf::DW_OP_xderef:
-      assert(LocationKind != Register);
+      assert(!isRegisterLocation());
       emitOp(dwarf::DW_OP_xderef);
       break;
+    case dwarf::DW_OP_deref_size:
+      emitOp(dwarf::DW_OP_deref_size);
+      emitData1(Op->getArg(0));
+      break;
+    case dwarf::DW_OP_LLVM_tag_offset:
+      TagOffset = Op->getArg(0);
+      break;
     default:
       llvm_unreachable("unhandled opcode found in expression");
     }
   }
 
-  if (LocationKind == Implicit)
+  if (isImplicitLocation())
     // Turn this into an implicit location description.
     addStackValue();
 }
@@ -437,3 +499,25 @@ void DwarfExpression::addFragmentOffset(const DIExpression *Expr) {
     addOpPiece(FragmentOffset - OffsetInBits);
   OffsetInBits = FragmentOffset;
 }
+
+void DwarfExpression::emitLegacySExt(unsigned FromBits) {
+  // (((X >> (FromBits - 1)) * (~0)) << FromBits) | X
+  emitOp(dwarf::DW_OP_dup);
+  emitOp(dwarf::DW_OP_constu);
+  emitUnsigned(FromBits - 1);
+  emitOp(dwarf::DW_OP_shr);
+  emitOp(dwarf::DW_OP_lit0);
+  emitOp(dwarf::DW_OP_not);
+  emitOp(dwarf::DW_OP_mul);
+  emitOp(dwarf::DW_OP_constu);
+  emitUnsigned(FromBits);
+  emitOp(dwarf::DW_OP_shl);
+  emitOp(dwarf::DW_OP_or);
+}
+
+void DwarfExpression::emitLegacyZExt(unsigned FromBits) {
+  // (X & (1 << FromBits - 1))
+  emitOp(dwarf::DW_OP_constu);
+  emitUnsigned((1ULL << FromBits) - 1);
+  emitOp(dwarf::DW_OP_and);
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 91568ba6d107..ec2ef6e575f7 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/DwarfExpression.h - Dwarf Compile Unit ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -28,7 +27,7 @@ namespace llvm {
 class AsmPrinter;
 class APInt;
 class ByteStreamer;
-class DwarfUnit;
+class DwarfCompileUnit;
 class DIELoc;
 class TargetRegisterInfo;
 
@@ -105,23 +104,56 @@ protected:
     const char *Comment;
   };
 
+  DwarfCompileUnit &CU;
+
   /// The register location, if any.
   SmallVector<Register, 2> DwarfRegs;
 
   /// Current Fragment Offset in Bits.
   uint64_t OffsetInBits = 0;
-  unsigned DwarfVersion;
 
   /// Sometimes we need to add a DW_OP_bit_piece to describe a subregister.
-  unsigned SubRegisterSizeInBits = 0;
-  unsigned SubRegisterOffsetInBits = 0;
+  unsigned SubRegisterSizeInBits : 16;
+  unsigned SubRegisterOffsetInBits : 16;
 
   /// The kind of location description being produced.
-  enum { Unknown = 0, Register, Memory, Implicit } LocationKind = Unknown;
+  enum { Unknown = 0, Register, Memory, Implicit };
+
+  /// The flags of location description being produced.
+  enum { EntryValue = 1 };
+
+  unsigned LocationKind : 3;
+  unsigned LocationFlags : 2;
+  unsigned DwarfVersion : 4;
+
+public:
+  bool isUnknownLocation() const {
+    return LocationKind == Unknown;
+  }
+
+  bool isMemoryLocation() const {
+    return LocationKind == Memory;
+  }
+
+  bool isRegisterLocation() const {
+    return LocationKind == Register;
+  }
+
+  bool isImplicitLocation() const {
+    return LocationKind == Implicit;
+  }
+
+  bool isEntryValue() const {
+    return LocationFlags & EntryValue;
+  }
 
+  Optional<uint8_t> TagOffset;
+
+protected:
   /// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed
   /// to represent a subregister.
   void setSubRegisterPiece(unsigned SizeInBits, unsigned OffsetInBits) {
+    assert(SizeInBits < 65536 && OffsetInBits < 65536);
     SubRegisterSizeInBits = SizeInBits;
     SubRegisterOffsetInBits = OffsetInBits;
   }
@@ -138,6 +170,10 @@ protected:
   /// Emit a raw unsigned value.
   virtual void emitUnsigned(uint64_t Value) = 0;
 
+  virtual void emitData1(uint8_t Value) = 0;
+
+  virtual void emitBaseTypeRef(uint64_t Idx) = 0;
+
   /// Emit a normalized unsigned constant.
   void emitConstu(uint64_t Value);
 
@@ -200,7 +236,10 @@ protected:
   ~DwarfExpression() = default;
 
 public:
-  DwarfExpression(unsigned DwarfVersion) : DwarfVersion(DwarfVersion) {}
+  DwarfExpression(unsigned DwarfVersion, DwarfCompileUnit &CU)
+      : CU(CU), SubRegisterSizeInBits(0), SubRegisterOffsetInBits(0),
+        LocationKind(Unknown), LocationFlags(Unknown),
+        DwarfVersion(DwarfVersion) {}
 
   /// This needs to be called last to commit any pending changes.
   void finalize();
@@ -214,15 +253,17 @@ public:
   /// Emit an unsigned constant.
   void addUnsignedConstant(const APInt &Value);
 
-  bool isMemoryLocation() const { return LocationKind == Memory; }
-  bool isUnknownLocation() const { return LocationKind == Unknown; }
-
   /// Lock this down to become a memory location description.
   void setMemoryLocationKind() {
-    assert(LocationKind == Unknown);
+    assert(isUnknownLocation());
     LocationKind = Memory;
   }
 
+  /// Lock this down to become an entry value location.
+  void setEntryValueFlag() {
+    LocationFlags |= EntryValue;
+  }
+
   /// Emit a machine register location. As an optimization this may also consume
   /// the prefix of a DwarfExpression if a more efficient representation for
   /// combining the register location and the first operation exists.
@@ -237,6 +278,9 @@ public:
                                DIExpressionCursor &Expr, unsigned MachineReg,
                                unsigned FragmentOffsetInBits = 0);
 
+  /// Emit entry value dwarf operation.
+  void addEntryValueExpression(DIExpressionCursor &ExprCursor);
+
   /// Emit all remaining operations in the DIExpressionCursor.
   ///
   /// \param FragmentOffsetInBits     If this is one fragment out of multiple
@@ -248,6 +292,9 @@ public:
   /// If applicable, emit an empty DW_OP_piece / DW_OP_bit_piece to advance to
   /// the fragment described by \c Expr.
   void addFragmentOffset(const DIExpression *Expr);
+
+  void emitLegacySExt(unsigned FromBits);
+  void emitLegacyZExt(unsigned FromBits);
 };
 
 /// DwarfExpression implementation for .debug_loc entries.
@@ -257,27 +304,30 @@ class DebugLocDwarfExpression final : public DwarfExpression {
   void emitOp(uint8_t Op, const char *Comment = nullptr) override;
   void emitSigned(int64_t Value) override;
   void emitUnsigned(uint64_t Value) override;
+  void emitData1(uint8_t Value) override;
+  void emitBaseTypeRef(uint64_t Idx) override;
   bool isFrameRegister(const TargetRegisterInfo &TRI,
                        unsigned MachineReg) override;
 
 public:
-  DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS)
-      : DwarfExpression(DwarfVersion), BS(BS) {}
+  DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS, DwarfCompileUnit &CU)
+      : DwarfExpression(DwarfVersion, CU), BS(BS) {}
 };
 
 /// DwarfExpression implementation for singular DW_AT_location.
 class DIEDwarfExpression final : public DwarfExpression {
 const AsmPrinter &AP;
-  DwarfUnit &DU;
   DIELoc &DIE;
 
   void emitOp(uint8_t Op, const char *Comment = nullptr) override;
   void emitSigned(int64_t Value) override;
   void emitUnsigned(uint64_t Value) override;
+  void emitData1(uint8_t Value) override;
+  void emitBaseTypeRef(uint64_t Idx) override;
   bool isFrameRegister(const TargetRegisterInfo &TRI,
                        unsigned MachineReg) override;
 public:
-  DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE);
+  DIEDwarfExpression(const AsmPrinter &AP, DwarfCompileUnit &CU, DIELoc &DIE);
 
   DIELoc *finalize() {
     DwarfExpression::finalize();
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 78ccad481411..e3c9095d1343 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/DwarfFile.cpp - Dwarf Debug Framework -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -44,6 +43,11 @@ void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) {
   if (!S)
     return;
 
+  // Skip CUs that ended up not being needed (split CUs that were abandoned
+  // because they added no information beyond the non-split CU)
+  if (llvm::empty(TheU->getUnitDie().values()))
+    return;
+
   Asm->OutStreamer->SwitchSection(S);
   TheU->emitHeader(UseOffsets);
   Asm->emitDwarfDIE(TheU->getUnitDie());
@@ -63,6 +67,11 @@ void DwarfFile::computeSizeAndOffsets() {
     if (TheU->getCUNode()->isDebugDirectivesOnly())
       continue;
 
+    // Skip CUs that ended up not being needed (split CUs that were abandoned
+    // because they added no information beyond the non-split CU)
+    if (llvm::empty(TheU->getUnitDie().values()))
+      return;
+
     TheU->setDebugSectionOffset(SecOffset);
     SecOffset += computeSizeAndOffsetsForUnit(TheU.get());
   }
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h
index 51acca8c1e53..244678ce9dc1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/DwarfFile.h - Dwarf Debug Framework ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -59,7 +58,6 @@ public:
   MCSymbol *getSym() const { return RangeSym; }
   const DwarfCompileUnit &getCU() const { return *CU; }
   const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; }
-  void addRange(RangeSpan Range) { Ranges.push_back(Range); }
 };
 
 class DwarfFile {
@@ -148,7 +146,7 @@ public:
   void emitUnits(bool UseOffsets);
 
   /// Emit the given unit to its section.
-  void emitUnit(DwarfUnit *U, bool UseOffsets);
+  void emitUnit(DwarfUnit *TheU, bool UseOffsets);
 
   /// Emit a set of abbreviations to the specific section.
   void emitAbbrevs(MCSection *);
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index 02016534a774..2a76dcb1b082 100644
--- a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/DwarfStringPool.cpp - Dwarf Debug Framework -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
index f484540d8d37..c5f5637fdae3 100644
--- a/lib/CodeGen/AsmPrinter/DwarfStringPool.h
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/DwarfStringPool.h - Dwarf Debug Framework ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 80b365f1aa43..991ab94b50ab 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/DwarfUnit.cpp - Dwarf Type and Compile Units ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -47,21 +46,30 @@ using namespace llvm;
 
 #define DEBUG_TYPE "dwarfdebug"
 
-DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU,
+DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP,
+                                       DwarfCompileUnit &CU,
                                        DIELoc &DIE)
-    : DwarfExpression(AP.getDwarfVersion()), AP(AP), DU(DU),
+    : DwarfExpression(AP.getDwarfVersion(), CU), AP(AP),
       DIE(DIE) {}
 
 void DIEDwarfExpression::emitOp(uint8_t Op, const char* Comment) {
-  DU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
+  CU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
 }
 
 void DIEDwarfExpression::emitSigned(int64_t Value) {
-  DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value);
+  CU.addSInt(DIE, dwarf::DW_FORM_sdata, Value);
 }
 
 void DIEDwarfExpression::emitUnsigned(uint64_t Value) {
-  DU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
+  CU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
+}
+
+void DIEDwarfExpression::emitData1(uint8_t Value) {
+  CU.addUInt(DIE, dwarf::DW_FORM_data1, Value);
+}
+
+void DIEDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
+  CU.addBaseTypeRef(DIE, Idx);
 }
 
 bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
@@ -285,21 +293,21 @@ void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute,
     addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer);
 }
 
-MD5::MD5Result *DwarfUnit::getMD5AsBytes(const DIFile *File) const {
+Optional<MD5::MD5Result> DwarfUnit::getMD5AsBytes(const DIFile *File) const {
   assert(File);
   if (DD->getDwarfVersion() < 5)
-    return nullptr;
+    return None;
   Optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum();
   if (!Checksum || Checksum->Kind != DIFile::CSK_MD5)
-    return nullptr;
+    return None;
 
   // Convert the string checksum to an MD5Result for the streamer.
   // The verifier validates the checksum so we assume it's okay.
   // An MD5 checksum is 16 bytes.
   std::string ChecksumString = fromHex(Checksum->Value);
-  void *CKMem = Asm->OutStreamer->getContext().allocate(16, 1);
-  memcpy(CKMem, ChecksumString.data(), 16);
-  return reinterpret_cast<MD5::MD5Result *>(CKMem);
+  MD5::MD5Result CKMem;
+  std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.Bytes.data());
+  return CKMem;
 }
 
 unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) {
@@ -311,7 +319,9 @@ unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) {
     addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0);
   }
   return SplitLineTable->getFile(File->getDirectory(), File->getFilename(),
-                                 getMD5AsBytes(File), File->getSource());
+                                 getMD5AsBytes(File),
+                                 Asm->OutContext.getDwarfVersion(),
+                                 File->getSource());
 }
 
 void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
@@ -393,7 +403,6 @@ void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, const DIFile *File) {
     return;
 
   unsigned FileID = getOrCreateSourceID(File);
-  assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
 }
@@ -462,9 +471,8 @@ static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) {
     assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
            T == dwarf::DW_TAG_volatile_type ||
            T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type);
-    DITypeRef Deriv = DTy->getBaseType();
-    assert(Deriv && "Expected valid base type");
-    return isUnsignedDIType(DD, DD->resolve(Deriv));
+    assert(DTy->getBaseType() && "Expected valid base type");
+    return isUnsignedDIType(DD, DTy->getBaseType());
   }
 
   auto *BTy = cast<DIBasicType>(Ty);
@@ -523,6 +531,10 @@ void DwarfUnit::addConstantValue(DIE &Die, const MachineOperand &MO,
   addConstantValue(Die, isUnsignedDIType(DD, Ty), MO.getImm());
 }
 
+void DwarfUnit::addConstantValue(DIE &Die, uint64_t Val, const DIType *Ty) {
+  addConstantValue(Die, isUnsignedDIType(DD, Ty), Val);
+}
+
 void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) {
   // FIXME: This is a bit conservative/simple - it emits negative values always
   // sign extended to 64 bits rather than minimizing the number of bytes.
@@ -603,8 +615,8 @@ DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
   return getDIE(Context);
 }
 
-DIE *DwarfTypeUnit::createTypeDIE(const DICompositeType *Ty) {
-  auto *Context = resolve(Ty->getScope());
+DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) {
+  auto *Context = Ty->getScope();
   DIE *ContextDIE = getOrCreateContextDIE(Context);
 
   if (DIE *TyDIE = getDIE(Ty))
@@ -619,6 +631,37 @@ DIE *DwarfTypeUnit::createTypeDIE(const DICompositeType *Ty) {
   return &TyDIE;
 }
 
+DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE,
+                              const DIType *Ty) {
+  // Create new type.
+  DIE &TyDIE = createAndAddDIE(Ty->getTag(), ContextDIE, Ty);
+
+  updateAcceleratorTables(Context, Ty, TyDIE);
+
+  if (auto *BT = dyn_cast<DIBasicType>(Ty))
+    constructTypeDIE(TyDIE, BT);
+  else if (auto *STy = dyn_cast<DISubroutineType>(Ty))
+    constructTypeDIE(TyDIE, STy);
+  else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
+    if (DD->generateTypeUnits() && !Ty->isForwardDecl() &&
+        (Ty->getRawName() || CTy->getRawIdentifier())) {
+      // Skip updating the accelerator tables since this is not the full type.
+      if (MDString *TypeId = CTy->getRawIdentifier())
+        DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
+      else {
+        auto X = DD->enterNonTypeUnitContext();
+        finishNonUnitTypeDIE(TyDIE, CTy);
+      }
+      return &TyDIE;
+    }
+    constructTypeDIE(TyDIE, CTy);
+  } else {
+    constructTypeDIE(TyDIE, cast<DIDerivedType>(Ty));
+  }
+
+  return &TyDIE;
+}
+
 DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
   if (!TyNode)
     return nullptr;
@@ -627,43 +670,23 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
 
   // DW_TAG_restrict_type is not supported in DWARF2
   if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2)
-    return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType()));
+    return getOrCreateTypeDIE(cast<DIDerivedType>(Ty)->getBaseType());
 
   // DW_TAG_atomic_type is not supported in DWARF < 5
   if (Ty->getTag() == dwarf::DW_TAG_atomic_type && DD->getDwarfVersion() < 5)
-    return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType()));
+    return getOrCreateTypeDIE(cast<DIDerivedType>(Ty)->getBaseType());
 
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE.
-  auto *Context = resolve(Ty->getScope());
+  auto *Context = Ty->getScope();
   DIE *ContextDIE = getOrCreateContextDIE(Context);
   assert(ContextDIE);
 
   if (DIE *TyDIE = getDIE(Ty))
     return TyDIE;
 
-  // Create new type.
-  DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty);
-
-  updateAcceleratorTables(Context, Ty, TyDIE);
-
-  if (auto *BT = dyn_cast<DIBasicType>(Ty))
-    constructTypeDIE(TyDIE, BT);
-  else if (auto *STy = dyn_cast<DISubroutineType>(Ty))
-    constructTypeDIE(TyDIE, STy);
-  else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
-    if (DD->generateTypeUnits() && !Ty->isForwardDecl())
-      if (MDString *TypeId = CTy->getRawIdentifier()) {
-        DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
-        // Skip updating the accelerator tables since this is not the full type.
-        return &TyDIE;
-      }
-    constructTypeDIE(TyDIE, CTy);
-  } else {
-    constructTypeDIE(TyDIE, cast<DIDerivedType>(Ty));
-  }
-
-  return &TyDIE;
+  return static_cast<DwarfUnit *>(ContextDIE->getUnit())
+      ->createTypeDIE(Context, *ContextDIE, Ty);
 }
 
 void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
@@ -679,7 +702,7 @@ void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
     DD->addAccelType(*CUNode, Ty->getName(), TyDIE, Flags);
 
     if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) ||
-        isa<DINamespace>(Context))
+        isa<DINamespace>(Context) || isa<DICommonBlock>(Context))
       addGlobalType(Ty, TyDIE, Context);
   }
 }
@@ -702,8 +725,8 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
   SmallVector<const DIScope *, 1> Parents;
   while (!isa<DICompileUnit>(Context)) {
     Parents.push_back(Context);
-    if (Context->getScope())
-      Context = resolve(Context->getScope());
+    if (const DIScope *S = Context->getScope())
+      Context = S;
     else
       // Structure, etc types will have a NULL context if they're at the top
       // level.
@@ -754,7 +777,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
   uint16_t Tag = Buffer.getTag();
 
   // Map to main type, void will not have a type.
-  const DIType *FromTy = resolve(DTy->getBaseType());
+  const DIType *FromTy = DTy->getBaseType();
   if (FromTy)
     addType(Buffer, FromTy);
 
@@ -770,24 +793,23 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
     addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
 
   if (Tag == dwarf::DW_TAG_ptr_to_member_type)
-    addDIEEntry(
-        Buffer, dwarf::DW_AT_containing_type,
-        *getOrCreateTypeDIE(resolve(cast<DIDerivedType>(DTy)->getClassType())));
+    addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
+                *getOrCreateTypeDIE(cast<DIDerivedType>(DTy)->getClassType()));
   // Add source line info if available and TyDesc is not a forward declaration.
   if (!DTy->isForwardDecl())
     addSourceLine(Buffer, DTy);
 
-  // If DWARF address space value is other than None, add it for pointer and
-  // reference types as DW_AT_address_class.
-  if (DTy->getDWARFAddressSpace() && (Tag == dwarf::DW_TAG_pointer_type ||
-                                      Tag == dwarf::DW_TAG_reference_type))
+  // If DWARF address space value is other than None, add it.  The IR
+  // verifier checks that DWARF address space only exists for pointer
+  // or reference types.
+  if (DTy->getDWARFAddressSpace())
     addUInt(Buffer, dwarf::DW_AT_address_class, dwarf::DW_FORM_data4,
             DTy->getDWARFAddressSpace().getValue());
 }
 
 void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) {
   for (unsigned i = 1, N = Args.size(); i < N; ++i) {
-    const DIType *Ty = resolve(Args[i]);
+    const DIType *Ty = Args[i];
     if (!Ty) {
       assert(i == N-1 && "Unspecified parameter must be the last argument");
       createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer);
@@ -804,7 +826,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
   // Add return type.  A void return won't have a type.
   auto Elements = cast<DISubroutineType>(CTy)->getTypeArray();
   if (Elements.size())
-    if (auto RTy = resolve(Elements[0]))
+    if (auto RTy = Elements[0])
       addType(Buffer, RTy);
 
   bool isPrototyped = true;
@@ -875,7 +897,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
       else if (auto *DDTy = dyn_cast<DIDerivedType>(Element)) {
         if (DDTy->getTag() == dwarf::DW_TAG_friend) {
           DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer);
-          addType(ElemDie, resolve(DDTy->getBaseType()), dwarf::DW_AT_friend);
+          addType(ElemDie, DDTy->getBaseType(), dwarf::DW_AT_friend);
         } else if (DDTy->isStaticMember()) {
           getOrCreateStaticMemberDIE(DDTy);
         } else if (Tag == dwarf::DW_TAG_variant_part) {
@@ -884,7 +906,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
           DIE &Variant = createAndAddDIE(dwarf::DW_TAG_variant, Buffer);
           if (const ConstantInt *CI =
               dyn_cast_or_null<ConstantInt>(DDTy->getDiscriminantValue())) {
-            if (isUnsignedDIType(DD, resolve(Discriminator->getBaseType())))
+            if (isUnsignedDIType(DD, Discriminator->getBaseType()))
               addUInt(Variant, dwarf::DW_AT_discr_value, None, CI->getZExtValue());
             else
               addSInt(Variant, dwarf::DW_AT_discr_value, None, CI->getSExtValue());
@@ -898,7 +920,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
         StringRef PropertyName = Property->getName();
         addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
         if (Property->getType())
-          addType(ElemDie, resolve(Property->getType()));
+          addType(ElemDie, Property->getType());
         addSourceLine(ElemDie, Property);
         StringRef GetterName = Property->getGetterName();
         if (!GetterName.empty())
@@ -924,7 +946,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
     // inside C++ composite types to point to the base class with the vtable.
     // Rust uses DW_AT_containing_type to link a vtable to the type
     // for which it was created.
-    if (auto *ContainingType = resolve(CTy->getVTableHolder()))
+    if (auto *ContainingType = CTy->getVTableHolder())
       addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
                   *getOrCreateTypeDIE(ContainingType));
 
@@ -994,7 +1016,7 @@ void DwarfUnit::constructTemplateTypeParameterDIE(
       createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer);
   // Add the type if it exists, it could be void and therefore no type.
   if (TP->getType())
-    addType(ParamDIE, resolve(TP->getType()));
+    addType(ParamDIE, TP->getType());
   if (!TP->getName().empty())
     addString(ParamDIE, dwarf::DW_AT_name, TP->getName());
 }
@@ -1006,12 +1028,12 @@ void DwarfUnit::constructTemplateValueParameterDIE(
   // Add the type if there is one, template template and template parameter
   // packs will not have a type.
   if (VP->getTag() == dwarf::DW_TAG_template_value_parameter)
-    addType(ParamDIE, resolve(VP->getType()));
+    addType(ParamDIE, VP->getType());
   if (!VP->getName().empty())
     addString(ParamDIE, dwarf::DW_AT_name, VP->getName());
   if (Metadata *Val = VP->getValue()) {
     if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val))
-      addConstantValue(ParamDIE, CI, resolve(VP->getType()));
+      addConstantValue(ParamDIE, CI, VP->getType());
     else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) {
       // We cannot describe the location of dllimport'd entities: the
       // computation of their address requires loads from the IAT.
@@ -1085,7 +1107,7 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) {
   // such construction creates the DIE (as is the case for member function
   // declarations).
   DIE *ContextDIE =
-      Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP->getScope()));
+      Minimal ? &getUnitDie() : getOrCreateContextDIE(SP->getScope());
 
   if (DIE *SPDie = getDIE(SP))
     return SPDie;
@@ -1107,7 +1129,8 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) {
   if (SP->isDefinition())
     return &SPDie;
 
-  applySubprogramAttributes(SP, SPDie);
+  static_cast<DwarfUnit *>(SPDie.getUnit())
+      ->applySubprogramAttributes(SP, SPDie);
   return &SPDie;
 }
 
@@ -1197,7 +1220,7 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
   // Add a return type. If this is a type like a C/C++ void type we don't add a
   // return type.
   if (Args.size())
-    if (auto Ty = resolve(Args[0]))
+    if (auto Ty = Args[0])
       addType(SPDie, Ty);
 
   unsigned VK = SP->getVirtuality();
@@ -1209,8 +1232,7 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
       addUInt(*Block, dwarf::DW_FORM_udata, SP->getVirtualIndex());
       addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block);
     }
-    ContainingTypeMap.insert(
-        std::make_pair(&SPDie, resolve(SP->getContainingType())));
+    ContainingTypeMap.insert(std::make_pair(&SPDie, SP->getContainingType()));
   }
 
   if (!SP->isDefinition()) {
@@ -1261,6 +1283,12 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
 
   if (SP->isMainSubprogram())
     addFlag(SPDie, dwarf::DW_AT_main_subprogram);
+  if (SP->isPure())
+    addFlag(SPDie, dwarf::DW_AT_pure);
+  if (SP->isElemental())
+    addFlag(SPDie, dwarf::DW_AT_elemental);
+  if (SP->isRecursive())
+    addFlag(SPDie, dwarf::DW_AT_recursive);
 }
 
 void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
@@ -1310,7 +1338,7 @@ static bool hasVectorBeenPadded(const DICompositeType *CTy) {
   const uint64_t ActualSize = CTy->getSizeInBits();
 
   // Obtain the size of each element in the vector.
-  DIType *BaseTy = CTy->getBaseType().resolve();
+  DIType *BaseTy = CTy->getBaseType();
   assert(BaseTy && "Unknown vector element type.");
   const uint64_t ElementSize = BaseTy->getSizeInBits();
 
@@ -1338,7 +1366,7 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
   }
 
   // Emit the element type.
-  addType(Buffer, resolve(CTy->getBaseType()));
+  addType(Buffer, CTy->getBaseType());
 
   // Get an anonymous type for index type.
   // FIXME: This type should be passed down from the front end
@@ -1356,7 +1384,7 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
 }
 
 void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
-  const DIType *DTy = resolve(CTy->getBaseType());
+  const DIType *DTy = CTy->getBaseType();
   bool IsUnsigned = DTy && isUnsignedDIType(DD, DTy);
   if (DTy) {
     if (DD->getDwarfVersion() >= 3)
@@ -1365,6 +1393,9 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
       addFlag(Buffer, dwarf::DW_AT_enum_class);
   }
 
+  auto *Context = CTy->getScope();
+  bool IndexEnumerators = !Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) ||
+      isa<DINamespace>(Context) || isa<DICommonBlock>(Context);
   DINodeArray Elements = CTy->getElements();
 
   // Add enumerators to enumeration type.
@@ -1376,6 +1407,8 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
       addString(Enumerator, dwarf::DW_AT_name, Name);
       auto Value = static_cast<uint64_t>(Enum->getValue());
       addConstantValue(Enumerator, IsUnsigned, Value);
+      if (IndexEnumerators)
+        addGlobalName(Name, Enumerator, Context);
     }
   }
 }
@@ -1400,7 +1433,7 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
   if (!Name.empty())
     addString(MemberDie, dwarf::DW_AT_name, Name);
 
-  if (DIType *Resolved = resolve(DT->getBaseType()))
+  if (DIType *Resolved = DT->getBaseType())
     addType(MemberDie, Resolved);
 
   addSourceLine(MemberDie, DT);
@@ -1509,7 +1542,7 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
 
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE.
-  DIE *ContextDIE = getOrCreateContextDIE(resolve(DT->getScope()));
+  DIE *ContextDIE = getOrCreateContextDIE(DT->getScope());
   assert(dwarf::isType(ContextDIE->getTag()) &&
          "Static member should belong to a type.");
 
@@ -1518,7 +1551,7 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
 
   DIE &StaticMemberDIE = createAndAddDIE(DT->getTag(), *ContextDIE, DT);
 
-  const DIType *Ty = resolve(DT->getBaseType());
+  const DIType *Ty = DT->getBaseType();
 
   addString(StaticMemberDIE, dwarf::DW_AT_name, DT->getName());
   addType(StaticMemberDIE, Ty);
@@ -1671,3 +1704,11 @@ void DwarfUnit::addLoclistsBase() {
                   DU->getLoclistsTableBaseSym(),
                   TLOF.getDwarfLoclistsSection()->getBeginSymbol());
 }
+
+void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
+  addFlag(D, dwarf::DW_AT_declaration);
+  StringRef Name = CTy->getName();
+  if (!Name.empty())
+    addString(D, dwarf::DW_AT_name, Name);
+  getCU().createTypeDIE(CTy);
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index a59ebb7c1465..56c934a35ae8 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/DwarfUnit.h - Dwarf Compile Unit ---*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -199,6 +198,7 @@ public:
   void addConstantValue(DIE &Die, const ConstantInt *CI, const DIType *Ty);
   void addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty);
   void addConstantValue(DIE &Die, const APInt &Val, bool Unsigned);
+  void addConstantValue(DIE &Die, uint64_t Val, const DIType *Ty);
   void addConstantValue(DIE &Die, bool Unsigned, uint64_t Val);
 
   /// Add constant value entry in variable DIE.
@@ -237,6 +237,9 @@ public:
   void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
                                  bool SkipSPAttributes = false);
 
+  /// Creates type DIE with specific context.
+  DIE *createTypeDIE(const DIScope *Context, DIE &ContextDIE, const DIType *Ty);
+
   /// Find existing DIE or create new DIE for the given type.
   DIE *getOrCreateTypeDIE(const MDNode *TyNode);
 
@@ -294,7 +297,10 @@ public:
 
   /// If the \p File has an MD5 checksum, return it as an MD5Result
   /// allocated in the MCContext.
-  MD5::MD5Result *getMD5AsBytes(const DIFile *File) const;
+  Optional<MD5::MD5Result> getMD5AsBytes(const DIFile *File) const;
+
+  /// Get context owner's DIE.
+  DIE *createTypeDIE(const DICompositeType *Ty);
 
 protected:
   ~DwarfUnit();
@@ -306,17 +312,6 @@ protected:
   /// create a new ID and insert it in the line table.
   virtual unsigned getOrCreateSourceID(const DIFile *File) = 0;
 
-  /// Look in the DwarfDebug map for the MDNode that corresponds to the
-  /// reference.
-  template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
-    return Ref.resolve();
-  }
-
-  /// If this is a named finished type then include it in the list of types for
-  /// the accelerator tables.
-  void updateAcceleratorTables(const DIScope *Context, const DIType *Ty,
-                               const DIE &TyDIE);
-
   /// Emit the common part of the header for this unit.
   void emitCommonHeader(bool UseOffsets, dwarf::UnitType UT);
 
@@ -344,6 +339,13 @@ private:
   /// Set D as anonymous type for index which can be reused later.
   void setIndexTyDie(DIE *D) { IndexTyDie = D; }
 
+  virtual void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) = 0;
+
+  /// If this is a named finished type then include it in the list of types for
+  /// the accelerator tables.
+  void updateAcceleratorTables(const DIScope *Context, const DIType *Ty,
+                               const DIE &TyDIE);
+
   virtual bool isDwoUnit() const = 0;
   const MCSymbol *getCrossSectionRelativeBaseAddress() const override;
 };
@@ -356,6 +358,7 @@ class DwarfTypeUnit final : public DwarfUnit {
   bool UsedLineTable = false;
 
   unsigned getOrCreateSourceID(const DIFile *File) override;
+  void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) override;
   bool isDwoUnit() const override;
 
 public:
@@ -365,9 +368,6 @@ public:
   void setTypeSignature(uint64_t Signature) { TypeSignature = Signature; }
   void setType(const DIE *Ty) { this->Ty = Ty; }
 
-  /// Get context owner's DIE.
-  DIE *createTypeDIE(const DICompositeType *Ty);
-
   /// Emit the header for this unit, not including the initial length field.
   void emitHeader(bool UseOffsets) override;
   unsigned getHeaderSize() const override {
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 7599121de2b0..99e3687b36b8 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -1,9 +1,8 @@
 //===- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -379,7 +378,8 @@ MCSymbol *EHStreamer::emitExceptionTable() {
   bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
   bool IsWasm = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Wasm;
   unsigned CallSiteEncoding =
-      IsSJLJ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_uleb128;
+      IsSJLJ ? static_cast<unsigned>(dwarf::DW_EH_PE_udata4) :
+               Asm->getObjFileLowering().getCallSiteEncoding();
   bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty();
 
   // Type infos.
@@ -524,24 +524,24 @@ MCSymbol *EHStreamer::emitExceptionTable() {
       // Offset of the call site relative to the start of the procedure.
       if (VerboseAsm)
         Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<");
-      Asm->EmitLabelDifferenceAsULEB128(BeginLabel, EHFuncBeginSym);
+      Asm->EmitCallSiteOffset(BeginLabel, EHFuncBeginSym, CallSiteEncoding);
       if (VerboseAsm)
         Asm->OutStreamer->AddComment(Twine("  Call between ") +
                                      BeginLabel->getName() + " and " +
                                      EndLabel->getName());
-      Asm->EmitLabelDifferenceAsULEB128(EndLabel, BeginLabel);
+      Asm->EmitCallSiteOffset(EndLabel, BeginLabel, CallSiteEncoding);
 
       // Offset of the landing pad relative to the start of the procedure.
       if (!S.LPad) {
         if (VerboseAsm)
           Asm->OutStreamer->AddComment("    has no landing pad");
-        Asm->EmitULEB128(0);
+        Asm->EmitCallSiteValue(0, CallSiteEncoding);
       } else {
         if (VerboseAsm)
           Asm->OutStreamer->AddComment(Twine("    jumps to ") +
                                        S.LPad->LandingPadLabel->getName());
-        Asm->EmitLabelDifferenceAsULEB128(S.LPad->LandingPadLabel,
-                                          EHFuncBeginSym);
+        Asm->EmitCallSiteOffset(S.LPad->LandingPadLabel, EHFuncBeginSym,
+                                CallSiteEncoding);
       }
 
       // Offset of the first associated action record, relative to the start of
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h
index ce912d032c6d..e62cf17a05d4 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -1,9 +1,8 @@
 //===- EHStreamer.h - Exception Handling Directive Streamer -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index 34677ecc9e69..39392b79e960 100644
--- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -1,9 +1,8 @@
 //===- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 3479a00def23..3145cc90dc73 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -1,9 +1,8 @@
 //===- OcamlGCPrinter.cpp - Ocaml frametable emitter ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/AsmPrinter/WasmException.cpp b/lib/CodeGen/AsmPrinter/WasmException.cpp
index 527e5ae50146..444b0ed17b6d 100644
--- a/lib/CodeGen/AsmPrinter/WasmException.cpp
+++ b/lib/CodeGen/AsmPrinter/WasmException.cpp
@@ -1,9 +1,8 @@
 //===-- CodeGen/AsmPrinter/WasmException.cpp - Wasm Exception Impl --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,10 +18,10 @@
 using namespace llvm;
 
 void WasmException::endModule() {
-  // This is the symbol used in 'throw' and 'if_except' instruction to denote
+  // This is the symbol used in 'throw' and 'br_on_exn' instruction to denote
   // this is a C++ exception. This symbol has to be emitted somewhere once in
   // the module.  Check if the symbol has already been created, i.e., we have at
-  // least one 'throw' or 'if_except' instruction in the module, and emit the
+  // least one 'throw' or 'br_on_exn' instruction in the module, and emit the
   // symbol only if so.
   SmallString<60> NameStr;
   Mangler::getNameWithPrefix(NameStr, "__cpp_exception", Asm->getDataLayout());
diff --git a/lib/CodeGen/AsmPrinter/WasmException.h b/lib/CodeGen/AsmPrinter/WasmException.h
index cbdb42457cf8..1893b6b2df43 100644
--- a/lib/CodeGen/AsmPrinter/WasmException.h
+++ b/lib/CodeGen/AsmPrinter/WasmException.h
@@ -1,9 +1,8 @@
 //===-- WasmException.h - Wasm Exception Framework -------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
index 18d37caf57ee..290be81c6baa 100644
--- a/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
+++ b/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
@@ -1,9 +1,8 @@
 //===-- CodeGen/AsmPrinter/WinCFGuard.cpp - Control Flow Guard Impl ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/AsmPrinter/WinCFGuard.h b/lib/CodeGen/AsmPrinter/WinCFGuard.h
index 28f119e35966..def0a59ab007 100644
--- a/lib/CodeGen/AsmPrinter/WinCFGuard.h
+++ b/lib/CodeGen/AsmPrinter/WinCFGuard.h
@@ -1,9 +1,8 @@
 //===-- WinCFGuard.h - Windows Control Flow Guard Handling ----*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp
index cf8e8c69bc2a..155e91ce61a1 100644
--- a/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -1,9 +1,8 @@
 //===-- CodeGen/AsmPrinter/WinException.cpp - Dwarf Exception Impl ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -110,6 +109,12 @@ void WinException::beginFunction(const MachineFunction *MF) {
   beginFunclet(MF->front(), Asm->CurrentFnSym);
 }
 
+void WinException::markFunctionEnd() {
+  if (isAArch64 && CurrentFuncletEntry &&
+      (shouldEmitMoves || shouldEmitPersonality))
+    Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd();
+}
+
 /// endFunction - Gather and emit post-function exception information.
 ///
 void WinException::endFunction(const MachineFunction *MF) {
@@ -129,7 +134,7 @@ void WinException::endFunction(const MachineFunction *MF) {
     NonConstMF->tidyLandingPads();
   }
 
-  endFunclet();
+  endFuncletImpl();
 
   // endFunclet will emit the necessary .xdata tables for x64 SEH.
   if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets())
@@ -232,6 +237,15 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
 }
 
 void WinException::endFunclet() {
+  if (isAArch64 && CurrentFuncletEntry &&
+      (shouldEmitMoves || shouldEmitPersonality)) {
+    Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection);
+    Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd();
+  }
+  endFuncletImpl();
+}
+
+void WinException::endFuncletImpl() {
   // No funclet to process?  Great, we have nothing to do.
   if (!CurrentFuncletEntry)
     return;
@@ -247,8 +261,6 @@ void WinException::endFunclet() {
     // to EmitWinEHHandlerData below can calculate the size of the funclet or
     // function.
     if (isAArch64) {
-      Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection);
-      Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd();
       MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection(
           Asm->OutStreamer->getCurrentSectionOnly());
       Asm->OutStreamer->SwitchSection(XData);
@@ -545,15 +557,17 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) {
       OS.AddComment(Comment);
   };
 
-  // Emit a label assignment with the SEH frame offset so we can use it for
-  // llvm.eh.recoverfp.
-  StringRef FLinkageName =
-      GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
-  MCSymbol *ParentFrameOffset =
-      Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
-  const MCExpr *MCOffset =
-      MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx);
-  Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
+  if (!isAArch64) {
+    // Emit a label assignment with the SEH frame offset so we can use it for
+    // llvm.eh.recoverfp.
+    StringRef FLinkageName =
+        GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
+    MCSymbol *ParentFrameOffset =
+        Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
+    const MCExpr *MCOffset =
+        MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx);
+    Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
+  }
 
   // Use the assembler to compute the number of table entries through label
   // difference and division.
@@ -936,8 +950,7 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
   int FI = FuncInfo.EHRegNodeFrameIndex;
   if (FI != INT_MAX) {
     const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
-    unsigned UnusedReg;
-    Offset = TFI->getFrameIndexReference(*Asm->MF, FI, UnusedReg);
+    Offset = TFI->getNonLocalFrameIndexReference(*Asm->MF, FI);
   }
 
   MCContext &Ctx = Asm->OutContext;
diff --git a/lib/CodeGen/AsmPrinter/WinException.h b/lib/CodeGen/AsmPrinter/WinException.h
index 37c796f89765..dc5036302131 100644
--- a/lib/CodeGen/AsmPrinter/WinException.h
+++ b/lib/CodeGen/AsmPrinter/WinException.h
@@ -1,9 +1,8 @@
 //===-- WinException.h - Windows Exception Handling ----------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -86,6 +85,7 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
   /// only), it is relative to the frame pointer.
   int getFrameIndexOffset(int FrameIndex, const WinEHFuncInfo &FuncInfo);
 
+  void endFuncletImpl();
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
@@ -100,6 +100,8 @@ public:
   /// immediately after the function entry point.
   void beginFunction(const MachineFunction *MF) override;
 
+  void markFunctionEnd() override;
+
   /// Gather and emit post-function exception information.
   void endFunction(const MachineFunction *) override;
 
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index 95581c09dd1c..dc7eaf6a5fe7 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -1,9 +1,8 @@
 //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -361,7 +360,7 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
 /// Get the iX type with the same bitwidth as T.
 IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
                                                        const DataLayout &DL) {
-  EVT VT = TLI->getValueType(DL, T);
+  EVT VT = TLI->getMemValueType(DL, T);
   unsigned BitWidth = VT.getStoreSizeInBits();
   assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
   return IntegerType::get(T->getContext(), BitWidth);
@@ -382,7 +381,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
                               Addr->getType()->getPointerAddressSpace());
   Value *NewAddr = Builder.CreateBitCast(Addr, PT);
 
-  auto *NewLI = Builder.CreateLoad(NewAddr);
+  auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
   NewLI->setAlignment(LI->getAlignment());
   NewLI->setVolatile(LI->isVolatile());
   NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
@@ -431,6 +430,9 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
 bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
   IRBuilder<> Builder(LI);
   AtomicOrdering Order = LI->getOrdering();
+  if (Order == AtomicOrdering::Unordered)
+    Order = AtomicOrdering::Monotonic;
+
   Value *Addr = LI->getPointerOperand();
   Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
   Constant *DummyVal = Constant::getNullValue(Ty);
@@ -496,11 +498,26 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
                                  Value *Loaded, Value *NewVal,
                                  AtomicOrdering MemOpOrder,
                                  Value *&Success, Value *&NewLoaded) {
+  Type *OrigTy = NewVal->getType();
+
+  // This code can go away when cmpxchg supports FP types.
+  bool NeedBitcast = OrigTy->isFloatingPointTy();
+  if (NeedBitcast) {
+    IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
+    unsigned AS = Addr->getType()->getPointerAddressSpace();
+    Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS));
+    NewVal = Builder.CreateBitCast(NewVal, IntTy);
+    Loaded = Builder.CreateBitCast(Loaded, IntTy);
+  }
+
   Value* Pair = Builder.CreateAtomicCmpXchg(
       Addr, Loaded, NewVal, MemOpOrder,
       AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
   Success = Builder.CreateExtractValue(Pair, 1, "success");
   NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
+
+  if (NeedBitcast)
+    NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
 }
 
 /// Emit IR to implement the given atomicrmw operation on values in registers,
@@ -535,6 +552,10 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
   case AtomicRMWInst::UMin:
     NewVal = Builder.CreateICmpULE(Loaded, Inc);
     return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+  case AtomicRMWInst::FAdd:
+    return Builder.CreateFAdd(Loaded, Inc, "new");
+  case AtomicRMWInst::FSub:
+    return Builder.CreateFSub(Loaded, Inc, "new");
   default:
     llvm_unreachable("Unknown atomic op");
   }
@@ -564,6 +585,10 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
     unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
     unsigned ValueSize = getAtomicOpSize(AI);
     if (ValueSize < MinCASSize) {
+      // TODO: Handle atomicrmw fadd/fsub
+      if (AI->getType()->isFloatingPointTy())
+        return false;
+
       expandPartwordAtomicRMW(AI,
                               TargetLoweringBase::AtomicExpansionKind::CmpXChg);
     } else {
@@ -1090,11 +1115,11 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
                            SuccessOrder != AtomicOrdering::Monotonic &&
                            SuccessOrder != AtomicOrdering::Acquire &&
-                           !F->optForMinSize();
+                           !F->hasMinSize();
 
   // There's no overhead for sinking the release barrier in a weak cmpxchg, so
   // do it even on minsize.
-  bool UseUnconditionalReleaseBarrier = F->optForMinSize() && !CI->isWeak();
+  bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
 
   // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
   //
@@ -1533,6 +1558,8 @@ static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
   case AtomicRMWInst::Min:
   case AtomicRMWInst::UMax:
   case AtomicRMWInst::UMin:
+  case AtomicRMWInst::FAdd:
+  case AtomicRMWInst::FSub:
     // No atomic libcalls are available for max/min/umax/umin.
     return {};
   }
@@ -1671,16 +1698,25 @@ bool AtomicExpand::expandAtomicOpToLibcall(
   }
 
   // 'ptr' argument.
-  Value *PtrVal =
-      Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx));
+  // note: This assumes all address spaces share a common libfunc
+  // implementation and that addresses are convertable.  For systems without
+  // that property, we'd need to extend this mechanism to support AS-specific
+  // families of atomic intrinsics.
+  auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace();
+  Value *PtrVal = Builder.CreateBitCast(PointerOperand,
+                                        Type::getInt8PtrTy(Ctx, PtrTypeAS));
+  PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx));
   Args.push_back(PtrVal);
 
   // 'expected' argument, if present.
   if (CASExpected) {
     AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
     AllocaCASExpected->setAlignment(AllocaAlignment);
+    unsigned AllocaAS =  AllocaCASExpected->getType()->getPointerAddressSpace();
+
     AllocaCASExpected_i8 =
-        Builder.CreateBitCast(AllocaCASExpected, Type::getInt8PtrTy(Ctx));
+      Builder.CreateBitCast(AllocaCASExpected,
+                            Type::getInt8PtrTy(Ctx, AllocaAS));
     Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
     Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
     Args.push_back(AllocaCASExpected_i8);
@@ -1707,8 +1743,9 @@ bool AtomicExpand::expandAtomicOpToLibcall(
   if (!CASExpected && HasResult && !UseSizedLibcall) {
     AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
     AllocaResult->setAlignment(AllocaAlignment);
+    unsigned AllocaAS =  AllocaResult->getType()->getPointerAddressSpace();
     AllocaResult_i8 =
-        Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx));
+      Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));
     Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
     Args.push_back(AllocaResult_i8);
   }
@@ -1734,7 +1771,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
   for (Value *Arg : Args)
     ArgTys.push_back(Arg->getType());
   FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
-  Constant *LibcallFn =
+  FunctionCallee LibcallFn =
       M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
   CallInst *Call = Builder.CreateCall(LibcallFn, Args);
   Call->setAttributes(Attr);
@@ -1749,8 +1786,8 @@ bool AtomicExpand::expandAtomicOpToLibcall(
     // from call}
     Type *FinalResultTy = I->getType();
     Value *V = UndefValue::get(FinalResultTy);
-    Value *ExpectedOut =
-        Builder.CreateAlignedLoad(AllocaCASExpected, AllocaAlignment);
+    Value *ExpectedOut = Builder.CreateAlignedLoad(
+        CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
     Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
     V = Builder.CreateInsertValue(V, ExpectedOut, 0);
     V = Builder.CreateInsertValue(V, Result, 1);
@@ -1760,7 +1797,8 @@ bool AtomicExpand::expandAtomicOpToLibcall(
     if (UseSizedLibcall)
       V = Builder.CreateBitOrPointerCast(Result, I->getType());
     else {
-      V = Builder.CreateAlignedLoad(AllocaResult, AllocaAlignment);
+      V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
+                                    AllocaAlignment);
       Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
     }
     I->replaceAllUsesWith(V);
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index d11f375b176e..57cefae2066a 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -1,9 +1,8 @@
 //===- BasicTargetTransformInfo.cpp - Basic target-independent TTI impl ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index efbfd5f4ab2c..fb54b5d6c8d8 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -1,9 +1,8 @@
 //===- BranchFolding.cpp - Fold machine code branch instructions ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -722,7 +721,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
   // branch instruction, which is likely to be smaller than the 2
   // instructions that would be deleted in the merge.
   MachineFunction *MF = MBB1->getParent();
-  return EffectiveTailLen >= 2 && MF->getFunction().optForSize() &&
+  return EffectiveTailLen >= 2 && MF->getFunction().hasOptSize() &&
          (I1 == MBB1->begin() || I2 == MBB2->begin());
 }
 
@@ -1071,31 +1070,29 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
 
 bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
   bool MadeChange = false;
-  if (!EnableTailMerge) return MadeChange;
+  if (!EnableTailMerge)
+    return MadeChange;
 
   // First find blocks with no successors.
-  // Block placement does not create new tail merging opportunities for these
-  // blocks.
-  if (!AfterBlockPlacement) {
-    MergePotentials.clear();
-    for (MachineBasicBlock &MBB : MF) {
-      if (MergePotentials.size() == TailMergeThreshold)
-        break;
-      if (!TriedMerging.count(&MBB) && MBB.succ_empty())
-        MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB));
-    }
-
-    // If this is a large problem, avoid visiting the same basic blocks
-    // multiple times.
+  // Block placement may create new tail merging opportunities for these blocks.
+  MergePotentials.clear();
+  for (MachineBasicBlock &MBB : MF) {
     if (MergePotentials.size() == TailMergeThreshold)
-      for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
-        TriedMerging.insert(MergePotentials[i].getBlock());
-
-    // See if we can do any tail merging on those.
-    if (MergePotentials.size() >= 2)
-      MadeChange |= TryTailMergeBlocks(nullptr, nullptr, MinCommonTailLength);
+      break;
+    if (!TriedMerging.count(&MBB) && MBB.succ_empty())
+      MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB));
   }
 
+  // If this is a large problem, avoid visiting the same basic blocks
+  // multiple times.
+  if (MergePotentials.size() == TailMergeThreshold)
+    for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+      TriedMerging.insert(MergePotentials[i].getBlock());
+
+  // See if we can do any tail merging on those.
+  if (MergePotentials.size() >= 2)
+    MadeChange |= TryTailMergeBlocks(nullptr, nullptr, MinCommonTailLength);
+
   // Look at blocks (IBB) with multiple predecessors (PBB).
   // We change each predecessor to a canonical form, by
   // (1) temporarily removing any unconditional branch from the predecessor
@@ -1183,29 +1180,6 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
           }
         }
 
-        // Failing case: the only way IBB can be reached from PBB is via
-        // exception handling.  Happens for landing pads.  Would be nice to have
-        // a bit in the edge so we didn't have to do all this.
-        if (IBB->isEHPad()) {
-          MachineFunction::iterator IP = ++PBB->getIterator();
-          MachineBasicBlock *PredNextBB = nullptr;
-          if (IP != MF.end())
-            PredNextBB = &*IP;
-          if (!TBB) {
-            if (IBB != PredNextBB)      // fallthrough
-              continue;
-          } else if (FBB) {
-            if (TBB != IBB && FBB != IBB)   // cbr then ubr
-              continue;
-          } else if (Cond.empty()) {
-            if (TBB != IBB)               // ubr
-              continue;
-          } else {
-            if (TBB != IBB && IBB != PredNextBB)  // cbr
-              continue;
-          }
-        }
-
         // Remove the unconditional branch at the end, if any.
         if (TBB && (Cond.empty() || FBB)) {
           DebugLoc dl = PBB->findBranchDebugLoc();
@@ -1598,7 +1572,7 @@ ReoptimizeBlock:
   }
 
   if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 &&
-      MF.getFunction().optForSize()) {
+      MF.getFunction().hasOptSize()) {
     // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
     // direction, thereby defeating careful block placement and regressing
     // performance. Therefore, only consider this for optsize functions.
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index accd0ab7317b..761ff9c7d54e 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -1,9 +1,8 @@
 //===- BranchFolding.h - Fold machine code branch instructions --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp
index c092da2b6602..3ad6266d4f35 100644
--- a/lib/CodeGen/BranchRelaxation.cpp
+++ b/lib/CodeGen/BranchRelaxation.cpp
@@ -1,9 +1,8 @@
 //===- BranchRelaxation.cpp -----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/BreakFalseDeps.cpp b/lib/CodeGen/BreakFalseDeps.cpp
index 210699cbf239..cc4b2caa9bed 100644
--- a/lib/CodeGen/BreakFalseDeps.cpp
+++ b/lib/CodeGen/BreakFalseDeps.cpp
@@ -1,9 +1,8 @@
 //==- llvm/CodeGen/BreakFalseDeps.cpp - Break False Dependency Fix -*- C++ -*==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/BuiltinGCs.cpp b/lib/CodeGen/BuiltinGCs.cpp
index 93939e573b7b..bfc10cb3fef2 100644
--- a/lib/CodeGen/BuiltinGCs.cpp
+++ b/lib/CodeGen/BuiltinGCs.cpp
@@ -1,9 +1,8 @@
 //===- BuiltinGCs.cpp - Boilerplate for our built in GC types -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/CFIInstrInserter.cpp b/lib/CodeGen/CFIInstrInserter.cpp
index c4799855a2b3..1a4d54231cfd 100644
--- a/lib/CodeGen/CFIInstrInserter.cpp
+++ b/lib/CodeGen/CFIInstrInserter.cpp
@@ -1,9 +1,8 @@
 //===------ CFIInstrInserter.cpp - Insert additional CFI instructions -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 02347b9f0b5c..7164fdfb7886 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -1,9 +1,8 @@
 //===- CalcSpillWeights.cpp -----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 3593089b206d..497fcb147849 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -1,9 +1,8 @@
 //===-- CallingConvLower.cpp - Calling Conventions ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 66166482c78b..c37ed57781d4 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -1,9 +1,8 @@
 //===-- CodeGen.cpp -------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -31,14 +30,15 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeEarlyIfConverterPass(Registry);
   initializeEarlyMachineLICMPass(Registry);
   initializeEarlyTailDuplicatePass(Registry);
-  initializeExpandISelPseudosPass(Registry);
   initializeExpandMemCmpPassPass(Registry);
   initializeExpandPostRAPass(Registry);
   initializeFEntryInserterPass(Registry);
+  initializeFinalizeISelPass(Registry);
   initializeFinalizeMachineBundlesPass(Registry);
   initializeFuncletLayoutPass(Registry);
   initializeGCMachineCodeAnalysisPass(Registry);
   initializeGCModuleInfoPass(Registry);
+  initializeHardwareLoopsPass(Registry);
   initializeIfConverterPass(Registry);
   initializeImplicitNullChecksPass(Registry);
   initializeIndirectBrExpandPassPass(Registry);
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index c35f8666fa3c..52b4bbea012b 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -1,9 +1,8 @@
 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,6 +15,7 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -32,6 +32,7 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -292,15 +293,16 @@ class TypePromotionTransaction;
     /// Keep track of SExt promoted.
     ValueToSExts ValToSExtendedUses;
 
-    /// True if CFG is modified in any way.
-    bool ModifiedDT;
-
     /// True if optimizing for size.
     bool OptSize;
 
     /// DataLayout for the Function being processed.
     const DataLayout *DL = nullptr;
 
+    /// Building the dominator tree can be expensive, so we only build it
+    /// lazily and update it when required.
+    std::unique_ptr<DominatorTree> DT;
+
   public:
     static char ID; // Pass identification, replacement for typeid
 
@@ -339,6 +341,13 @@ class TypePromotionTransaction;
       }
     }
 
+    // Get the DominatorTree, building if necessary.
+    DominatorTree &getDT(Function &F) {
+      if (!DT)
+        DT = llvm::make_unique<DominatorTree>(F);
+      return *DT;
+    }
+
     bool eliminateFallThrough(Function &F);
     bool eliminateMostlyEmptyBlocks(Function &F);
     BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
@@ -355,11 +364,12 @@ class TypePromotionTransaction;
     bool optimizeExt(Instruction *&I);
     bool optimizeExtUses(Instruction *I);
     bool optimizeLoadExt(LoadInst *Load);
+    bool optimizeShiftInst(BinaryOperator *BO);
     bool optimizeSelectInst(SelectInst *SI);
     bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
     bool optimizeSwitchInst(SwitchInst *SI);
     bool optimizeExtractElementInst(Instruction *Inst);
-    bool dupRetToEnableTailCallOpts(BasicBlock *BB);
+    bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
     bool placeDbgValues(Function &F);
     bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
                       LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
@@ -374,8 +384,15 @@ class TypePromotionTransaction;
         bool AllowPromotionWithoutCommonHeader,
         bool HasPromoted, TypePromotionTransaction &TPT,
         SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
-    bool splitBranchCondition(Function &F);
+    bool splitBranchCondition(Function &F, bool &ModifiedDT);
     bool simplifyOffsetableRelocate(Instruction &I);
+
+    bool tryToSinkFreeOperands(Instruction *I);
+    bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp,
+                                     Intrinsic::ID IID);
+    bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT);
+    bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
+    bool combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
   };
 
 } // end anonymous namespace
@@ -401,7 +418,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   InsertedInsts.clear();
   PromotedInsts.clear();
 
-  ModifiedDT = false;
   if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
     TM = &TPC->getTM<TargetMachine>();
     SubtargetInfo = TM->getSubtargetImpl(F);
@@ -413,7 +429,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   BPI.reset(new BranchProbabilityInfo(F, *LI));
   BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
-  OptSize = F.optForSize();
+  OptSize = F.hasOptSize();
 
   ProfileSummaryInfo *PSI =
       &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
@@ -444,8 +460,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   // unconditional branch.
   EverMadeChange |= eliminateMostlyEmptyBlocks(F);
 
+  bool ModifiedDT = false;
   if (!DisableBranchOpts)
-    EverMadeChange |= splitBranchCondition(F);
+    EverMadeChange |= splitBranchCondition(F, ModifiedDT);
 
   // Split some critical edges where one of the sources is an indirect branch,
   // to help generate sane code for PHIs involving such edges.
@@ -454,6 +471,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   bool MadeChange = true;
   while (MadeChange) {
     MadeChange = false;
+    DT.reset();
     for (Function::iterator I = F.begin(); I != F.end(); ) {
       BasicBlock *BB = &*I++;
       bool ModifiedDTOnIteration = false;
@@ -654,6 +672,16 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
         BB->getSinglePredecessor()->getSingleSuccessor()))
     return false;
 
+  // Skip merging if the block's successor is also a successor to any callbr
+  // that leads to this block.
+  // FIXME: Is this really needed? Is this a correctness issue?
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+    if (auto *CBI = dyn_cast<CallBrInst>((*PI)->getTerminator()))
+      for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i)
+        if (DestBB == CBI->getSuccessor(i))
+          return false;
+  }
+
   // Try to skip merging if the unique predecessor of BB is terminated by a
   // switch or indirect branch instruction, and BB is used as an incoming block
   // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
@@ -1040,7 +1068,7 @@ bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
   return MadeChange;
 }
 
-/// SinkCast - Sink the specified cast instruction into its user blocks
+/// Sink the specified cast instruction into its user blocks.
 static bool SinkCast(CastInst *CI) {
   BasicBlock *DefBB = CI->getParent();
 
@@ -1114,8 +1142,8 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
   // Sink only "cheap" (or nop) address-space casts.  This is a weaker condition
   // than sinking only nop casts, but is helpful on some platforms.
   if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
-    if (!TLI.isCheapAddrSpaceCast(ASC->getSrcAddressSpace(),
-                                  ASC->getDestAddressSpace()))
+    if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
+                                 ASC->getDestAddressSpace()))
       return false;
   }
 
@@ -1148,54 +1176,169 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
   return SinkCast(CI);
 }
 
-/// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if
-/// possible.
-///
-/// Return true if any changes were made.
-static bool CombineUAddWithOverflow(CmpInst *CI) {
-  Value *A, *B;
-  Instruction *AddI;
-  if (!match(CI,
-             m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
+bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
+                                                 CmpInst *Cmp,
+                                                 Intrinsic::ID IID) {
+  if (BO->getParent() != Cmp->getParent()) {
+    // We used to use a dominator tree here to allow multi-block optimization.
+    // But that was problematic because:
+    // 1. It could cause a perf regression by hoisting the math op into the
+    //    critical path.
+    // 2. It could cause a perf regression by creating a value that was live
+    //    across multiple blocks and increasing register pressure.
+    // 3. Use of a dominator tree could cause large compile-time regression.
+    //    This is because we recompute the DT on every change in the main CGP
+    //    run-loop. The recomputing is probably unnecessary in many cases, so if
+    //    that was fixed, using a DT here would be ok.
+    return false;
+  }
+
+  // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
+  Value *Arg0 = BO->getOperand(0);
+  Value *Arg1 = BO->getOperand(1);
+  if (BO->getOpcode() == Instruction::Add &&
+      IID == Intrinsic::usub_with_overflow) {
+    assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
+    Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
+  }
+
+  // Insert at the first instruction of the pair.
+  Instruction *InsertPt = nullptr;
+  for (Instruction &Iter : *Cmp->getParent()) {
+    if (&Iter == BO || &Iter == Cmp) {
+      InsertPt = &Iter;
+      break;
+    }
+  }
+  assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
+
+  IRBuilder<> Builder(InsertPt);
+  Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
+  Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
+  Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
+  BO->replaceAllUsesWith(Math);
+  Cmp->replaceAllUsesWith(OV);
+  BO->eraseFromParent();
+  Cmp->eraseFromParent();
+  return true;
+}
+
+/// Match special-case patterns that check for unsigned add overflow.
+static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp,
+                                                   BinaryOperator *&Add) {
+  // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
+  // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
+  Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
+
+  // We are not expecting non-canonical/degenerate code. Just bail out.
+  if (isa<Constant>(A))
+    return false;
+
+  ICmpInst::Predicate Pred = Cmp->getPredicate();
+  if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
+    B = ConstantInt::get(B->getType(), 1);
+  else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
+    B = ConstantInt::get(B->getType(), -1);
+  else
     return false;
 
-  Type *Ty = AddI->getType();
-  if (!isa<IntegerType>(Ty))
+  // Check the users of the variable operand of the compare looking for an add
+  // with the adjusted constant.
+  for (User *U : A->users()) {
+    if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
+      Add = cast<BinaryOperator>(U);
+      return true;
+    }
+  }
+  return false;
+}
+
+/// Try to combine the compare into a call to the llvm.uadd.with.overflow
+/// intrinsic. Return true if any changes were made.
+bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
+                                               bool &ModifiedDT) {
+  Value *A, *B;
+  BinaryOperator *Add;
+  if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add))))
+    if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add))
+      return false;
+
+  if (!TLI->shouldFormOverflowOp(ISD::UADDO,
+                                 TLI->getValueType(*DL, Add->getType())))
     return false;
 
-  // We don't want to move around uses of condition values this late, so we we
+  // We don't want to move around uses of condition values this late, so we
   // check if it is legal to create the call to the intrinsic in the basic
-  // block containing the icmp:
+  // block containing the icmp.
+  if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
+    return false;
 
-  if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
+  if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow))
     return false;
 
-#ifndef NDEBUG
-  // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
-  // for now:
-  if (AddI->hasOneUse())
-    assert(*AddI->user_begin() == CI && "expected!");
-#endif
+  // Reset callers - do not crash by iterating over a dead instruction.
+  ModifiedDT = true;
+  return true;
+}
+
+bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
+                                               bool &ModifiedDT) {
+  // We are not expecting non-canonical/degenerate code. Just bail out.
+  Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
+  if (isa<Constant>(A) && isa<Constant>(B))
+    return false;
+
+  // Convert (A u> B) to (A u< B) to simplify pattern matching.
+  ICmpInst::Predicate Pred = Cmp->getPredicate();
+  if (Pred == ICmpInst::ICMP_UGT) {
+    std::swap(A, B);
+    Pred = ICmpInst::ICMP_ULT;
+  }
+  // Convert special-case: (A == 0) is the same as (A u< 1).
+  if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
+    B = ConstantInt::get(B->getType(), 1);
+    Pred = ICmpInst::ICMP_ULT;
+  }
+  // Convert special-case: (A != 0) is the same as (0 u< A).
+  if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
+    std::swap(A, B);
+    Pred = ICmpInst::ICMP_ULT;
+  }
+  if (Pred != ICmpInst::ICMP_ULT)
+    return false;
+
+  // Walk the users of a variable operand of a compare looking for a subtract or
+  // add with that same operand. Also match the 2nd operand of the compare to
+  // the add/sub, but that may be a negated constant operand of an add.
+  Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
+  BinaryOperator *Sub = nullptr;
+  for (User *U : CmpVariableOperand->users()) {
+    // A - B, A u< B --> usubo(A, B)
+    if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
+      Sub = cast<BinaryOperator>(U);
+      break;
+    }
+
+    // A + (-C), A u< C (canonicalized form of (sub A, C))
+    const APInt *CmpC, *AddC;
+    if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
+        match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
+      Sub = cast<BinaryOperator>(U);
+      break;
+    }
+  }
+  if (!Sub)
+    return false;
+
+  if (!TLI->shouldFormOverflowOp(ISD::USUBO,
+                                 TLI->getValueType(*DL, Sub->getType())))
+    return false;
+
+  if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow))
+    return false;
 
-  Module *M = CI->getModule();
-  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
-
-  auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
-
-  DebugLoc Loc = CI->getDebugLoc();
-  auto *UAddWithOverflow =
-      CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
-  UAddWithOverflow->setDebugLoc(Loc);
-  auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
-  UAdd->setDebugLoc(Loc);
-  auto *Overflow =
-      ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
-  Overflow->setDebugLoc(Loc);
-
-  CI->replaceAllUsesWith(Overflow);
-  AddI->replaceAllUsesWith(UAdd);
-  CI->eraseFromParent();
-  AddI->eraseFromParent();
+  // Reset callers - do not crash by iterating over a dead instruction.
+  ModifiedDT = true;
   return true;
 }
 
@@ -1205,18 +1348,19 @@ static bool CombineUAddWithOverflow(CmpInst *CI) {
 /// lose; some adjustment may be wanted there.
 ///
 /// Return true if any changes are made.
-static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
-  BasicBlock *DefBB = CI->getParent();
+static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
+  if (TLI.hasMultipleConditionRegisters())
+    return false;
 
   // Avoid sinking soft-FP comparisons, since this can move them into a loop.
-  if (TLI && TLI->useSoftFloat() && isa<FCmpInst>(CI))
+  if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
     return false;
 
   // Only insert a cmp in each block once.
   DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
 
   bool MadeChange = false;
-  for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
+  for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
        UI != E; ) {
     Use &TheUse = UI.getUse();
     Instruction *User = cast<Instruction>(*UI);
@@ -1230,6 +1374,7 @@ static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
 
     // Figure out which BB this cmp is used in.
     BasicBlock *UserBB = User->getParent();
+    BasicBlock *DefBB = Cmp->getParent();
 
     // If this user is in the same block as the cmp, don't change the cmp.
     if (UserBB == DefBB) continue;
@@ -1241,10 +1386,11 @@ static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
       assert(InsertPt != UserBB->end());
       InsertedCmp =
-          CmpInst::Create(CI->getOpcode(), CI->getPredicate(),
-                          CI->getOperand(0), CI->getOperand(1), "", &*InsertPt);
+          CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
+                          Cmp->getOperand(0), Cmp->getOperand(1), "",
+                          &*InsertPt);
       // Propagate the debug info.
-      InsertedCmp->setDebugLoc(CI->getDebugLoc());
+      InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
     }
 
     // Replace a use of the cmp with a use of the new cmp.
@@ -1254,19 +1400,22 @@ static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
   }
 
   // If we removed all uses, nuke the cmp.
-  if (CI->use_empty()) {
-    CI->eraseFromParent();
+  if (Cmp->use_empty()) {
+    Cmp->eraseFromParent();
     MadeChange = true;
   }
 
   return MadeChange;
 }
 
-static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
-  if (SinkCmpExpression(CI, TLI))
+bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {
+  if (sinkCmpExpression(Cmp, *TLI))
     return true;
 
-  if (CombineUAddWithOverflow(CI))
+  if (combineToUAddWithOverflow(Cmp, ModifiedDT))
+    return true;
+
+  if (combineToUSubWithOverflow(Cmp, ModifiedDT))
     return true;
 
   return false;
@@ -1301,7 +1450,7 @@ static bool sinkAndCmp0Expression(Instruction *AndI,
   for (auto *U : AndI->users()) {
     Instruction *User = cast<Instruction>(U);
 
-    // Only sink for and mask feeding icmp with 0.
+    // Only sink 'and' feeding icmp with 0.
     if (!isa<ICmpInst>(User))
       return false;
 
@@ -1704,9 +1853,23 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
   if (II) {
     switch (II->getIntrinsicID()) {
     default: break;
+    case Intrinsic::experimental_widenable_condition: {
+      // Give up on future widening oppurtunties so that we can fold away dead
+      // paths and merge blocks before going into block-local instruction
+      // selection.   
+      if (II->use_empty()) {
+        II->eraseFromParent();
+        return true;
+      }
+      Constant *RetVal = ConstantInt::getTrue(II->getContext());
+      resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
+        replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
+      });
+      return true;
+    }
     case Intrinsic::objectsize: {
       // Lower all uses of llvm.objectsize.*
-      ConstantInt *RetVal =
+      Value *RetVal =
           lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);
 
       resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
@@ -1735,6 +1898,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
       InsertedInsts.insert(ExtVal);
       return true;
     }
+
     case Intrinsic::launder_invariant_group:
     case Intrinsic::strip_invariant_group: {
       Value *ArgVal = II->getArgOperand(0);
@@ -1818,7 +1982,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
 ///   %tmp2 = tail call i32 @f2()
 ///   ret i32 %tmp2
 /// @endcode
-bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
+bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT) {
   if (!TLI)
     return false;
 
@@ -1846,10 +2010,8 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
   // return is the first instruction in the block.
   if (PN) {
     BasicBlock::iterator BI = BB->begin();
-    do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
-    if (&*BI == BCI)
-      // Also skip over the bitcast.
-      ++BI;
+    // Skip over debug and the bitcast.
+    do { ++BI; } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI);
     if (&*BI != RetI)
       return false;
   } else {
@@ -1865,7 +2027,9 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
   SmallVector<CallInst*, 4> TailCalls;
   if (PN) {
     for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
-      CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
+      // Look through bitcasts.
+      Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
+      CallInst *CI = dyn_cast<CallInst>(IncomingVal);
       // Make sure the phi value is indeed produced by the tail call.
       if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
           TLI->mayBeEmittedAsTailCall(CI) &&
@@ -1929,6 +2093,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
   Value *BaseReg = nullptr;
   Value *ScaledReg = nullptr;
   Value *OriginalValue = nullptr;
+  bool InBounds = true;
 
   enum FieldName {
     NoField        = 0x00,
@@ -1940,6 +2105,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
     MultipleFields = 0xff
   };
 
+
   ExtAddrMode() = default;
 
   void print(raw_ostream &OS) const;
@@ -1958,6 +2124,10 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
         ScaledReg->getType() != other.ScaledReg->getType())
       return MultipleFields;
 
+    // Conservatively reject 'inbounds' mismatches.
+    if (InBounds != other.InBounds)
+      return MultipleFields;
+
     // Check each field to see if it differs.
     unsigned Result = NoField;
     if (BaseReg != other.BaseReg)
@@ -2056,6 +2226,8 @@ static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
 void ExtAddrMode::print(raw_ostream &OS) const {
   bool NeedPlus = false;
   OS << "[";
+  if (InBounds)
+    OS << "inbounds ";
   if (BaseGV) {
     OS << (NeedPlus ? " + " : "")
        << "GV:";
@@ -3126,6 +3298,8 @@ private:
                     PhiNodeSet &PhiNodesToMatch) {
     SmallVector<PHIPair, 8> WorkList;
     Matcher.insert({ PHI, Candidate });
+    SmallSet<PHINode *, 8> MatchedPHIs;
+    MatchedPHIs.insert(PHI);
     WorkList.push_back({ PHI, Candidate });
     SmallSet<PHIPair, 8> Visited;
     while (!WorkList.empty()) {
@@ -3158,8 +3332,10 @@ private:
         if (Matcher.count({ FirstPhi, SecondPhi }))
           continue;
         // So the values are different and does not match. So we need them to
-        // match.
-        Matcher.insert({ FirstPhi, SecondPhi });
+        // match. (But we register no more than one match per PHI node, so that
+        // we won't later try to replace them twice.)
+        if (!MatchedPHIs.insert(FirstPhi).second)
+          Matcher.insert({ FirstPhi, SecondPhi });
         // But me must check it.
         WorkList.push_back({ FirstPhi, SecondPhi });
       }
@@ -3354,6 +3530,7 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
   ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
   if (isa<Instruction>(ScaleReg) &&  // not a constant expr.
       match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
+    TestAddrMode.InBounds = false;
     TestAddrMode.ScaledReg = AddLHS;
     TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
 
@@ -3928,6 +4105,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
         TPT.getRestorationPoint();
 
+    AddrMode.InBounds = false;
     if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
         matchAddr(AddrInst->getOperand(0), Depth+1))
       return true;
@@ -3954,6 +4132,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
   case Instruction::Mul:
   case Instruction::Shl: {
     // Can only handle X*C and X << C.
+    AddrMode.InBounds = false;
     ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
     if (!RHS || RHS->getBitWidth() > 64)
       return false;
@@ -4005,8 +4184,11 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
       if (ConstantOffset == 0 ||
           TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
         // Check to see if we can fold the base pointer in too.
-        if (matchAddr(AddrInst->getOperand(0), Depth+1))
+        if (matchAddr(AddrInst->getOperand(0), Depth+1)) {
+          if (!cast<GEPOperator>(AddrInst)->isInBounds())
+            AddrMode.InBounds = false;
           return true;
+        }
       } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
                  TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
                  ConstantOffset > 0) {
@@ -4020,15 +4202,11 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
         if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
             (BaseI && !isa<CastInst>(BaseI) &&
              !isa<GetElementPtrInst>(BaseI))) {
-          // If the base is an instruction, make sure the GEP is not in the same
-          // basic block as the base. If the base is an argument or global
-          // value, make sure the GEP is not in the entry block.  Otherwise,
-          // instruction selection can undo the split.  Also make sure the
-          // parent block allows inserting non-PHI instructions before the
-          // terminator.
+          // Make sure the parent block allows inserting non-PHI instructions
+          // before the terminator.
           BasicBlock *Parent =
               BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock();
-          if (GEP->getParent() != Parent && !Parent->getTerminator()->isEHPad())
+          if (!Parent->getTerminator()->isEHPad())
             LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
         }
       }
@@ -4042,6 +4220,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
 
     // See if the scale and offset amount is valid for this target.
     AddrMode.BaseOffs += ConstantOffset;
+    if (!cast<GEPOperator>(AddrInst)->isInBounds())
+      AddrMode.InBounds = false;
 
     // Match the base operand of the GEP.
     if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
@@ -4268,7 +4448,7 @@ static bool FindAllMemoryUses(
   if (!MightBeFoldableInst(I))
     return true;
 
-  const bool OptSize = I->getFunction()->optForSize();
+  const bool OptSize = I->getFunction()->hasOptSize();
 
   // Loop over all the uses, recursively processing them.
   for (Use &U : I->uses()) {
@@ -4556,8 +4736,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
         InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
 
     GetElementPtrInst *GEP = LargeOffsetGEP.first;
-    if (GEP && GEP->getParent() != MemoryInst->getParent() &&
-        !NewGEPBases.count(GEP)) {
+    if (GEP && !NewGEPBases.count(GEP)) {
       // If splitting the underlying data structure can reduce the offset of a
       // GEP, collect the GEP.  Skip the GEPs that are the new bases of
       // previously split data structures.
@@ -4727,7 +4906,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
           // SDAG consecutive load/store merging.
           if (ResultPtr->getType() != I8PtrTy)
             ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
-          ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
+          ResultPtr =
+              AddrMode.InBounds
+                  ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
+                                              "sunkaddr")
+                  : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
         }
 
         ResultIndex = V;
@@ -4738,7 +4921,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
       } else {
         if (ResultPtr->getType() != I8PtrTy)
           ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
-        SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
+        SunkAddr =
+            AddrMode.InBounds
+                ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
+                                            "sunkaddr")
+                : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
       }
 
       if (SunkAddr->getType() != Addr->getType())
@@ -5037,7 +5224,6 @@ bool CodeGenPrepare::tryToPromoteExts(
 
 /// Merging redundant sexts when one is dominating the other.
 bool CodeGenPrepare::mergeSExts(Function &F) {
-  DominatorTree DT(F);
   bool Changed = false;
   for (auto &Entry : ValToSExtendedUses) {
     SExts &Insts = Entry.second;
@@ -5048,7 +5234,7 @@ bool CodeGenPrepare::mergeSExts(Function &F) {
         continue;
       bool inserted = false;
       for (auto &Pt : CurPts) {
-        if (DT.dominates(Inst, Pt)) {
+        if (getDT(F).dominates(Inst, Pt)) {
           Pt->replaceAllUsesWith(Inst);
           RemovedInsts.insert(Pt);
           Pt->removeFromParent();
@@ -5057,7 +5243,7 @@ bool CodeGenPrepare::mergeSExts(Function &F) {
           Changed = true;
           break;
         }
-        if (!DT.dominates(Pt, Inst))
+        if (!getDT(F).dominates(Pt, Inst))
           // Give up if we need to merge in a common dominator as the
           // experiments show it is not profitable.
           continue;
@@ -5715,7 +5901,7 @@ static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
 static Value *getTrueOrFalseValue(
     SelectInst *SI, bool isTrue,
     const SmallPtrSet<const Instruction *, 2> &Selects) {
-  Value *V;
+  Value *V = nullptr;
 
   for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
        DefSI = dyn_cast<SelectInst>(V)) {
@@ -5723,9 +5909,44 @@ static Value *getTrueOrFalseValue(
            "The condition of DefSI does not match with SI");
     V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
   }
+
+  assert(V && "Failed to get select true/false value");
   return V;
 }
 
+bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
+  assert(Shift->isShift() && "Expected a shift");
+
+  // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
+  // general vector shifts, and (3) the shift amount is a select-of-splatted
+  // values, hoist the shifts before the select:
+  //   shift Op0, (select Cond, TVal, FVal) -->
+  //   select Cond, (shift Op0, TVal), (shift Op0, FVal)
+  //
+  // This is inverting a generic IR transform when we know that the cost of a
+  // general vector shift is more than the cost of 2 shift-by-scalars.
+  // We can't do this effectively in SDAG because we may not be able to
+  // determine if the select operands are splats from within a basic block.
+  Type *Ty = Shift->getType();
+  if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
+    return false;
+  Value *Cond, *TVal, *FVal;
+  if (!match(Shift->getOperand(1),
+             m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
+    return false;
+  if (!isSplatValue(TVal) || !isSplatValue(FVal))
+    return false;
+
+  IRBuilder<> Builder(Shift);
+  BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
+  Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
+  Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
+  Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
+  Shift->replaceAllUsesWith(NewSel);
+  Shift->eraseFromParent();
+  return true;
+}
+
 /// If we have a SelectInst that will likely profit from branch prediction,
 /// turn it into a branch.
 bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
@@ -5769,7 +5990,11 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
       !isFormingBranchFromSelectProfitable(TTI, TLI, SI))
     return false;
 
-  ModifiedDT = true;
+  // The DominatorTree needs to be rebuilt by any consumers after this
+  // transformation. We simply reset here rather than setting the ModifiedDT
+  // flag to avoid restarting the function walk in runOnFunction for each
+  // select optimized.
+  DT.reset();
 
   // Transform a sequence like this:
   //    start:
@@ -5943,6 +6168,7 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
       InsertedShuffle =
           new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),
                                 SVI->getOperand(2), "", &*InsertPt);
+      InsertedShuffle->setDebugLoc(SVI->getDebugLoc());
     }
 
     UI->replaceUsesOfWith(SVI, InsertedShuffle);
@@ -5958,6 +6184,48 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
   return MadeChange;
 }
 
+bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
+  // If the operands of I can be folded into a target instruction together with
+  // I, duplicate and sink them.
+  SmallVector<Use *, 4> OpsToSink;
+  if (!TLI || !TLI->shouldSinkOperands(I, OpsToSink))
+    return false;
+
+  // OpsToSink can contain multiple uses in a use chain (e.g.
+  // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
+  // uses must come first, which means they are sunk first, temporarily creating
+  // invalid IR. This will be fixed once their dominated users are sunk and
+  // updated.
+  BasicBlock *TargetBB = I->getParent();
+  bool Changed = false;
+  SmallVector<Use *, 4> ToReplace;
+  for (Use *U : OpsToSink) {
+    auto *UI = cast<Instruction>(U->get());
+    if (UI->getParent() == TargetBB || isa<PHINode>(UI))
+      continue;
+    ToReplace.push_back(U);
+  }
+
+  SmallPtrSet<Instruction *, 4> MaybeDead;
+  for (Use *U : ToReplace) {
+    auto *UI = cast<Instruction>(U->get());
+    Instruction *NI = UI->clone();
+    MaybeDead.insert(UI);
+    LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
+    NI->insertBefore(I);
+    InsertedInsts.insert(NI);
+    U->set(NI);
+    Changed = true;
+  }
+
+  // Remove instructions that are dead after sinking.
+  for (auto *I : MaybeDead)
+    if (!I->hasNUsesOrMore(1))
+      I->eraseFromParent();
+
+  return Changed;
+}
+
 bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
   if (!TLI || !DL)
     return false;
@@ -6412,14 +6680,17 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
                                 const TargetLowering &TLI) {
   // Handle simple but common cases only.
   Type *StoreType = SI.getValueOperand()->getType();
-  if (DL.getTypeStoreSizeInBits(StoreType) != DL.getTypeSizeInBits(StoreType) ||
+  if (!DL.typeSizeEqualsStoreSize(StoreType) ||
       DL.getTypeSizeInBits(StoreType) == 0)
     return false;
 
   unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
   Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
-  if (DL.getTypeStoreSizeInBits(SplitStoreType) !=
-      DL.getTypeSizeInBits(SplitStoreType))
+  if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
+    return false;
+
+  // Don't split the store if it is volatile.
+  if (SI.isVolatile())
     return false;
 
   // Match the following patterns:
@@ -6658,11 +6929,13 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
   if (InsertedInsts.count(I))
     return false;
 
+  // TODO: Move into the switch on opcode below here.
   if (PHINode *P = dyn_cast<PHINode>(I)) {
     // It is possible for very late stage optimizations (such as SimplifyCFG)
     // to introduce PHI nodes too late to be cleaned up.  If we detect such a
     // trivial PHI, go ahead and zap it here.
     if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) {
+      LargeOffsetGEPMap.erase(P);
       P->replaceAllUsesWith(V);
       P->eraseFromParent();
       ++NumPHIsElim;
@@ -6700,9 +6973,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
     return false;
   }
 
-  if (CmpInst *CI = dyn_cast<CmpInst>(I))
-    if (!TLI || !TLI->hasMultipleConditionRegisters())
-      return OptimizeCmpExpression(CI, TLI);
+  if (auto *Cmp = dyn_cast<CmpInst>(I))
+    if (TLI && optimizeCmp(Cmp, ModifiedDT))
+      return true;
 
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
     LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
@@ -6745,13 +7018,13 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
       EnableAndCmpSinking && TLI)
     return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);
 
+  // TODO: Move this into the switch on opcode - it handles shifts already.
   if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
                 BinOp->getOpcode() == Instruction::LShr)) {
     ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
     if (TLI && CI && TLI->hasExtractBitsInsn())
-      return OptimizeExtractBits(BinOp, CI, *TLI, *DL);
-
-    return false;
+      if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
+        return true;
   }
 
   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
@@ -6772,20 +7045,25 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
     return false;
   }
 
-  if (CallInst *CI = dyn_cast<CallInst>(I))
-    return optimizeCallInst(CI, ModifiedDT);
-
-  if (SelectInst *SI = dyn_cast<SelectInst>(I))
-    return optimizeSelectInst(SI);
-
-  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
-    return optimizeShuffleVectorInst(SVI);
-
-  if (auto *Switch = dyn_cast<SwitchInst>(I))
-    return optimizeSwitchInst(Switch);
+  if (tryToSinkFreeOperands(I))
+    return true;
 
-  if (isa<ExtractElementInst>(I))
-    return optimizeExtractElementInst(I);
+  switch (I->getOpcode()) {
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+    return optimizeShiftInst(cast<BinaryOperator>(I));
+  case Instruction::Call:
+    return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
+  case Instruction::Select:
+    return optimizeSelectInst(cast<SelectInst>(I));
+  case Instruction::ShuffleVector:
+    return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
+  case Instruction::Switch:
+    return optimizeSwitchInst(cast<SwitchInst>(I));
+  case Instruction::ExtractElement:
+    return optimizeExtractElementInst(cast<ExtractElementInst>(I));
+  }
 
   return false;
 }
@@ -6833,7 +7111,7 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
       }
     }
   }
-  MadeChange |= dupRetToEnableTailCallOpts(&BB);
+  MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
 
   return MadeChange;
 }
@@ -6909,7 +7187,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
 ///
 /// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
 ///
-bool CodeGenPrepare::splitBranchCondition(Function &F) {
+bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
   if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())
     return false;
 
@@ -6983,11 +7261,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
       std::swap(TBB, FBB);
 
     // Replace the old BB with the new BB.
-    for (PHINode &PN : TBB->phis()) {
-      int i;
-      while ((i = PN.getBasicBlockIndex(&BB)) >= 0)
-        PN.setIncomingBlock(i, TmpBB);
-    }
+    TBB->replacePhiUsesWith(&BB, TmpBB);
 
     // Add another incoming edge form the new BB.
     for (PHINode &PN : FBB->phis()) {
@@ -7066,10 +7340,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
       }
     }
 
-    // Note: No point in getting fancy here, since the DT info is never
-    // available to CodeGenPrepare.
     ModifiedDT = true;
-
     MadeChange = true;
 
     LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 5a5960b16130..4144c243a341 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -1,9 +1,8 @@
 //===- CriticalAntiDepBreaker.cpp - Anti-dep breaker ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 09c4423a2f05..4e127ce525c8 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index 68034afe98d5..b99be5d7a87c 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -1,9 +1,8 @@
 //=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This class implements a deterministic finite automaton (DFA) based
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index ff44c5660bad..049ce7063307 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -1,9 +1,8 @@
 //===- DeadMachineInstructionElim.cpp - Remove dead machine instructions --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -82,9 +81,11 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
         if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
           return false;
       } else {
-        if (!MRI->use_nodbg_empty(Reg))
-          // This def has a non-debug use. Don't delete the instruction!
-          return false;
+        for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) {
+          if (&Use != MI)
+            // This def has a non-debug use. Don't delete the instruction!
+            return false;
+        }
       }
     }
   }
diff --git a/lib/CodeGen/DetectDeadLanes.cpp b/lib/CodeGen/DetectDeadLanes.cpp
index c83db476a4de..fe78acf4d80a 100644
--- a/lib/CodeGen/DetectDeadLanes.cpp
+++ b/lib/CodeGen/DetectDeadLanes.cpp
@@ -1,9 +1,8 @@
 //===- DetectDeadLanes.cpp - SubRegister Lane Usage Analysis --*- C++ -*---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 4586649d17f0..ddd6cec5a178 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -1,9 +1,8 @@
 //===- DwarfEHPrepare - Prepare exception handling for code generation ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -46,7 +45,7 @@ namespace {
 
   class DwarfEHPrepare : public FunctionPass {
     // RewindFunction - _Unwind_Resume or the target equivalent.
-    Constant *RewindFunction = nullptr;
+    FunctionCallee RewindFunction = nullptr;
 
     DominatorTree *DT = nullptr;
     const TargetLowering *TLI = nullptr;
@@ -146,7 +145,7 @@ size_t DwarfEHPrepare::pruneUnreachableResumes(
   size_t ResumeIndex = 0;
   for (auto *RI : Resumes) {
     for (auto *LP : CleanupLPads) {
-      if (isPotentiallyReachable(LP, RI, DT)) {
+      if (isPotentiallyReachable(LP, RI, nullptr, DT)) {
         ResumeReachable.set(ResumeIndex);
         break;
       }
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index 364e1f030942..0a83760befaa 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -1,9 +1,8 @@
 //===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp
index 54c53eb16312..486720cadd27 100644
--- a/lib/CodeGen/EdgeBundles.cpp
+++ b/lib/CodeGen/EdgeBundles.cpp
@@ -1,9 +1,8 @@
 //===-------- EdgeBundles.cpp - Bundles of CFG edges ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -28,7 +27,7 @@ ViewEdgeBundles("view-edge-bundles", cl::Hidden,
 char EdgeBundles::ID = 0;
 
 INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges",
-                /* cfg = */true, /* analysis = */ true)
+                /* cfg = */true, /* is_analysis = */ true)
 
 char &llvm::EdgeBundlesID = EdgeBundles::ID;
 
diff --git a/lib/CodeGen/ExecutionDomainFix.cpp b/lib/CodeGen/ExecutionDomainFix.cpp
index 458dcf2b0e26..a2dd5eee33b7 100644
--- a/lib/CodeGen/ExecutionDomainFix.cpp
+++ b/lib/CodeGen/ExecutionDomainFix.cpp
@@ -1,9 +1,8 @@
 //===- ExecutionDomainFix.cpp - Fix execution domain issues ----*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -337,11 +336,10 @@ void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
     }
     // Sorted insertion.
     // Enables giving priority to the latest domains during merging.
-    auto I = std::upper_bound(
-        Regs.begin(), Regs.end(), rx, [&](int LHS, const int RHS) {
-          return RDA->getReachingDef(mi, RC->getRegister(LHS)) <
-                 RDA->getReachingDef(mi, RC->getRegister(RHS));
-        });
+    const int Def = RDA->getReachingDef(mi, RC->getRegister(rx));
+    auto I = partition_point(Regs, [&](int I) {
+      return RDA->getReachingDef(mi, RC->getRegister(I)) <= Def;
+    });
     Regs.insert(I, rx);
   }
 
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
deleted file mode 100644
index ec586a2caea3..000000000000
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-//===-- llvm/CodeGen/ExpandISelPseudos.cpp ----------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Expand Pseudo-instructions produced by ISel. These are usually to allow
-// the expansion to contain control flow, such as a conditional move
-// implemented with a conditional branch and a phi, or an atomic operation
-// implemented with a loop.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/Support/Debug.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "expand-isel-pseudos"
-
-namespace {
-  class ExpandISelPseudos : public MachineFunctionPass {
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    ExpandISelPseudos() : MachineFunctionPass(ID) {}
-
-  private:
-    bool runOnMachineFunction(MachineFunction &MF) override;
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-  };
-} // end anonymous namespace
-
-char ExpandISelPseudos::ID = 0;
-char &llvm::ExpandISelPseudosID = ExpandISelPseudos::ID;
-INITIALIZE_PASS(ExpandISelPseudos, DEBUG_TYPE,
-                "Expand ISel Pseudo-instructions", false, false)
-
-bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
-  bool Changed = false;
-  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
-
-  // Iterate through each instruction in the function, looking for pseudos.
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
-    MachineBasicBlock *MBB = &*I;
-    for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
-         MBBI != MBBE; ) {
-      MachineInstr &MI = *MBBI++;
-
-      // If MI is a pseudo, expand it.
-      if (MI.usesCustomInsertionHook()) {
-        Changed = true;
-        MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB);
-        // The expansion may involve new basic blocks.
-        if (NewMBB != MBB) {
-          MBB = NewMBB;
-          I = NewMBB->getIterator();
-          MBBI = NewMBB->begin();
-          MBBE = NewMBB->end();
-        }
-      }
-    }
-  }
-
-  return Changed;
-}
diff --git a/lib/CodeGen/ExpandMemCmp.cpp b/lib/CodeGen/ExpandMemCmp.cpp
index ee7683adbcdd..b425482e6adf 100644
--- a/lib/CodeGen/ExpandMemCmp.cpp
+++ b/lib/CodeGen/ExpandMemCmp.cpp
@@ -1,9 +1,8 @@
 //===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -37,6 +36,14 @@ static cl::opt<unsigned> MemCmpEqZeroNumLoadsPerBlock(
     cl::desc("The number of loads per basic block for inline expansion of "
              "memcmp that is only being compared against zero."));
 
+static cl::opt<unsigned> MaxLoadsPerMemcmp(
+    "max-loads-per-memcmp", cl::Hidden,
+    cl::desc("Set maximum number of loads used in expanded memcmp"));
+
+static cl::opt<unsigned> MaxLoadsPerMemcmpOptSize(
+    "max-loads-per-memcmp-opt-size", cl::Hidden,
+    cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"));
+
 namespace {
 
 
@@ -106,8 +113,7 @@ class MemCmpExpansion {
 public:
   MemCmpExpansion(CallInst *CI, uint64_t Size,
                   const TargetTransformInfo::MemCmpExpansionOptions &Options,
-                  unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
-                  unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout);
+                  const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout);
 
   unsigned getNumBlocks();
   uint64_t getNumLoads() const { return LoadSequence.size(); }
@@ -196,16 +202,10 @@ MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
 MemCmpExpansion::MemCmpExpansion(
     CallInst *const CI, uint64_t Size,
     const TargetTransformInfo::MemCmpExpansionOptions &Options,
-    const unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
-    const unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout)
-    : CI(CI),
-      Size(Size),
-      MaxLoadSize(0),
-      NumLoadsNonOneByte(0),
-      NumLoadsPerBlockForZeroCmp(MaxLoadsPerBlockForZeroCmp),
-      IsUsedForZeroCmp(IsUsedForZeroCmp),
-      DL(TheDataLayout),
-      Builder(CI) {
+    const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout)
+    : CI(CI), Size(Size), MaxLoadSize(0), NumLoadsNonOneByte(0),
+      NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
+      IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), Builder(CI) {
   assert(Size > 0 && "zero blocks");
   // Scale the max size down if the target can load more bytes than we need.
   llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
@@ -216,17 +216,17 @@ MemCmpExpansion::MemCmpExpansion(
   MaxLoadSize = LoadSizes.front();
   // Compute the decomposition.
   unsigned GreedyNumLoadsNonOneByte = 0;
-  LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, MaxNumLoads,
+  LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, Options.MaxNumLoads,
                                            GreedyNumLoadsNonOneByte);
   NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
-  assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
+  assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
   // If we allow overlapping loads and the load sequence is not already optimal,
   // use overlapping loads.
   if (Options.AllowOverlappingLoads &&
       (LoadSequence.empty() || LoadSequence.size() > 2)) {
     unsigned OverlappingNumLoadsNonOneByte = 0;
     auto OverlappingLoads = computeOverlappingLoadSequence(
-        Size, MaxLoadSize, MaxNumLoads, OverlappingNumLoadsNonOneByte);
+        Size, MaxLoadSize, Options.MaxNumLoads, OverlappingNumLoadsNonOneByte);
     if (!OverlappingLoads.empty() &&
         (LoadSequence.empty() ||
          OverlappingLoads.size() < LoadSequence.size())) {
@@ -234,7 +234,7 @@ MemCmpExpansion::MemCmpExpansion(
       NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
     }
   }
-  assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
+  assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
 }
 
 unsigned MemCmpExpansion::getNumBlocks() {
@@ -316,7 +316,7 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
   assert(LoadIndex < getNumLoads() &&
          "getCompareLoadPairs() called with no remaining loads");
   std::vector<Value *> XorList, OrList;
-  Value *Diff;
+  Value *Diff = nullptr;
 
   const unsigned NumLoads =
       std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);
@@ -393,6 +393,8 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
     while (OrList.size() != 1) {
       OrList = pairWiseOr(OrList);
     }
+
+    assert(Diff && "Failed to find comparison diff");
     Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
   }
 
@@ -722,7 +724,7 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
   NumMemCmpCalls++;
 
   // Early exit from expansion if -Oz.
-  if (CI->getFunction()->optForMinSize())
+  if (CI->getFunction()->hasMinSize())
     return false;
 
   // Early exit from expansion if size is not a constant.
@@ -739,18 +741,21 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
   // TTI call to check if target would like to expand memcmp. Also, get the
   // available load sizes.
   const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
-  const auto *const Options = TTI->enableMemCmpExpansion(IsUsedForZeroCmp);
+  auto Options = TTI->enableMemCmpExpansion(CI->getFunction()->hasOptSize(),
+                                            IsUsedForZeroCmp);
   if (!Options) return false;
 
-  const unsigned MaxNumLoads =
-      TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize());
+  if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
+    Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
+
+  if (CI->getFunction()->hasOptSize() &&
+      MaxLoadsPerMemcmpOptSize.getNumOccurrences())
+    Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
 
-  unsigned NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences()
-                                  ? MemCmpEqZeroNumLoadsPerBlock
-                                  : TLI->getMemcmpEqZeroLoadsPerBlock();
+  if (!CI->getFunction()->hasOptSize() && MaxLoadsPerMemcmp.getNumOccurrences())
+    Options.MaxNumLoads = MaxLoadsPerMemcmp;
 
-  MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads,
-                            IsUsedForZeroCmp, NumLoadsPerBlock, *DL);
+  MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL);
 
   // Don't expand if this will require more loads than desired by the target.
   if (Expansion.getNumLoads() == 0) {
@@ -824,7 +829,8 @@ bool ExpandMemCmpPass::runOnBlock(
     }
     LibFunc Func;
     if (TLI->getLibFunc(ImmutableCallSite(CI), Func) &&
-        Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TL, &DL)) {
+        (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
+        expandMemCmp(CI, TTI, TL, &DL)) {
       return true;
     }
   }
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index f2a2bcbb94b1..0ab70aff7dc4 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -1,9 +1,8 @@
 //===-- ExpandPostRAPseudos.cpp - Pseudo instruction expansion pass -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/ExpandReductions.cpp b/lib/CodeGen/ExpandReductions.cpp
index 7552ba8cd85d..1069a2423b8b 100644
--- a/lib/CodeGen/ExpandReductions.cpp
+++ b/lib/CodeGen/ExpandReductions.cpp
@@ -1,9 +1,8 @@
 //===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -30,9 +29,9 @@ namespace {
 
 unsigned getOpcode(Intrinsic::ID ID) {
   switch (ID) {
-  case Intrinsic::experimental_vector_reduce_fadd:
+  case Intrinsic::experimental_vector_reduce_v2_fadd:
     return Instruction::FAdd;
-  case Intrinsic::experimental_vector_reduce_fmul:
+  case Intrinsic::experimental_vector_reduce_v2_fmul:
     return Instruction::FMul;
   case Intrinsic::experimental_vector_reduce_add:
     return Instruction::Add;
@@ -84,22 +83,33 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
       Worklist.push_back(II);
 
   for (auto *II : Worklist) {
+    if (!TTI->shouldExpandReduction(II))
+      continue;
+
+    FastMathFlags FMF =
+        isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
+    Intrinsic::ID ID = II->getIntrinsicID();
+    RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID);
+
+    Value *Rdx = nullptr;
     IRBuilder<> Builder(II);
-    bool IsOrdered = false;
-    Value *Acc = nullptr;
-    Value *Vec = nullptr;
-    auto ID = II->getIntrinsicID();
-    auto MRK = RecurrenceDescriptor::MRK_Invalid;
+    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+    Builder.setFastMathFlags(FMF);
     switch (ID) {
-    case Intrinsic::experimental_vector_reduce_fadd:
-    case Intrinsic::experimental_vector_reduce_fmul:
+    case Intrinsic::experimental_vector_reduce_v2_fadd:
+    case Intrinsic::experimental_vector_reduce_v2_fmul: {
       // FMFs must be attached to the call, otherwise it's an ordered reduction
       // and it can't be handled by generating a shuffle sequence.
-      if (!II->getFastMathFlags().isFast())
-        IsOrdered = true;
-      Acc = II->getArgOperand(0);
-      Vec = II->getArgOperand(1);
-      break;
+      Value *Acc = II->getArgOperand(0);
+      Value *Vec = II->getArgOperand(1);
+      if (!FMF.allowReassoc())
+        Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
+      else {
+        Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+        Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
+                                  Acc, Rdx, "bin.rdx");
+      }
+    } break;
     case Intrinsic::experimental_vector_reduce_add:
     case Intrinsic::experimental_vector_reduce_mul:
     case Intrinsic::experimental_vector_reduce_and:
@@ -110,18 +120,13 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
     case Intrinsic::experimental_vector_reduce_umax:
     case Intrinsic::experimental_vector_reduce_umin:
     case Intrinsic::experimental_vector_reduce_fmax:
-    case Intrinsic::experimental_vector_reduce_fmin:
-      Vec = II->getArgOperand(0);
-      MRK = getMRK(ID);
-      break;
+    case Intrinsic::experimental_vector_reduce_fmin: {
+      Value *Vec = II->getArgOperand(0);
+      Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+    } break;
     default:
       continue;
     }
-    if (!TTI->shouldExpandReduction(II))
-      continue;
-    Value *Rdx =
-        IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK)
-                  : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
     II->replaceAllUsesWith(Rdx);
     II->eraseFromParent();
     Changed = true;
diff --git a/lib/CodeGen/FEntryInserter.cpp b/lib/CodeGen/FEntryInserter.cpp
index 4ddf9f92836c..a122f490884e 100644
--- a/lib/CodeGen/FEntryInserter.cpp
+++ b/lib/CodeGen/FEntryInserter.cpp
@@ -1,9 +1,8 @@
 //===-- FEntryInsertion.cpp - Patchable prologues for LLVM -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/FaultMaps.cpp b/lib/CodeGen/FaultMaps.cpp
index 361558a0e562..600f72d320eb 100644
--- a/lib/CodeGen/FaultMaps.cpp
+++ b/lib/CodeGen/FaultMaps.cpp
@@ -1,9 +1,8 @@
 //===- FaultMaps.cpp ------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/FinalizeISel.cpp b/lib/CodeGen/FinalizeISel.cpp
new file mode 100644
index 000000000000..772d7f71bb37
--- /dev/null
+++ b/lib/CodeGen/FinalizeISel.cpp
@@ -0,0 +1,76 @@
+//===-- llvm/CodeGen/FinalizeISel.cpp ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// This pass expands Pseudo-instructions produced by ISel, fixes register
+/// reservations and may do machine frame information adjustments.
+/// The pseudo instructions are used to allow the expansion to contain control
+/// flow, such as a conditional move implemented with a conditional branch and a
+/// phi, or an atomic operation implemented with a loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "finalize-isel"
+
+namespace {
+  class FinalizeISel : public MachineFunctionPass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    FinalizeISel() : MachineFunctionPass(ID) {}
+
+  private:
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+} // end anonymous namespace
+
+char FinalizeISel::ID = 0;
+char &llvm::FinalizeISelID = FinalizeISel::ID;
+INITIALIZE_PASS(FinalizeISel, DEBUG_TYPE,
+                "Finalize ISel and expand pseudo-instructions", false, false)
+
+bool FinalizeISel::runOnMachineFunction(MachineFunction &MF) {
+  bool Changed = false;
+  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
+
+  // Iterate through each instruction in the function, looking for pseudos.
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = &*I;
+    for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+         MBBI != MBBE; ) {
+      MachineInstr &MI = *MBBI++;
+
+      // If MI is a pseudo, expand it.
+      if (MI.usesCustomInsertionHook()) {
+        Changed = true;
+        MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB);
+        // The expansion may involve new basic blocks.
+        if (NewMBB != MBB) {
+          MBB = NewMBB;
+          I = NewMBB->getIterator();
+          MBBI = NewMBB->begin();
+          MBBE = NewMBB->end();
+        }
+      }
+    }
+  }
+
+  TLI->finalizeLowering(MF);
+
+  return Changed;
+}
diff --git a/lib/CodeGen/FuncletLayout.cpp b/lib/CodeGen/FuncletLayout.cpp
index 581cd423f2d4..75f6d0b8f0bf 100644
--- a/lib/CodeGen/FuncletLayout.cpp
+++ b/lib/CodeGen/FuncletLayout.cpp
@@ -1,9 +1,8 @@
 //===-- FuncletLayout.cpp - Contiguously lay out funclets -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index 1c80556dfef5..9c53550eaa9d 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -1,9 +1,8 @@
 //===-- GCMetadata.cpp - Garbage collector metadata -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp
index bc7beb6f6c2d..500dba9aea37 100644
--- a/lib/CodeGen/GCMetadataPrinter.cpp
+++ b/lib/CodeGen/GCMetadataPrinter.cpp
@@ -1,9 +1,8 @@
 //===- GCMetadataPrinter.cpp - Garbage collection infrastructure ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp
index e8ccd84b0b93..90571d090bfb 100644
--- a/lib/CodeGen/GCRootLowering.cpp
+++ b/lib/CodeGen/GCRootLowering.cpp
@@ -1,9 +1,8 @@
 //===-- GCRootLowering.cpp - Garbage collection infrastructure ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -214,7 +213,7 @@ bool LowerIntrinsics::DoLowering(Function &F, GCStrategy &S) {
       }
       case Intrinsic::gcread: {
         // Replace a read barrier with a simple load.
-        Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
+        Value *Ld = new LoadInst(CI->getType(), CI->getArgOperand(1), "", CI);
         Ld->takeName(CI);
         CI->replaceAllUsesWith(Ld);
         CI->eraseFromParent();
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 6be4c16c6301..43d06b0f82e9 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -1,9 +1,8 @@
 //===- GCStrategy.cpp - Garbage Collector Description ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/GlobalISel/CSEInfo.cpp b/lib/CodeGen/GlobalISel/CSEInfo.cpp
index 89c525c5ba15..4518dbee1a9f 100644
--- a/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -1,9 +1,8 @@
 //===- CSEInfo.cpp ------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -28,8 +27,8 @@ void UniqueMachineInstr::Profile(FoldingSetNodeID &ID) {
 }
 /// -----------------------------------------
 
-/// --------- CSEConfig ---------- ///
-bool CSEConfig::shouldCSEOpc(unsigned Opc) {
+/// --------- CSEConfigFull ---------- ///
+bool CSEConfigFull::shouldCSEOpc(unsigned Opc) {
   switch (Opc) {
   default:
     break;
@@ -61,6 +60,17 @@ bool CSEConfig::shouldCSEOpc(unsigned Opc) {
 bool CSEConfigConstantOnly::shouldCSEOpc(unsigned Opc) {
   return Opc == TargetOpcode::G_CONSTANT;
 }
+
+std::unique_ptr<CSEConfigBase>
+llvm::getStandardCSEConfigForOpt(CodeGenOpt::Level Level) {
+  std::unique_ptr<CSEConfigBase> Config;
+  if (Level == CodeGenOpt::None)
+    Config = make_unique<CSEConfigConstantOnly>();
+  else
+    Config = make_unique<CSEConfigFull>();
+  return Config;
+}
+
 /// -----------------------------------------
 
 /// -------- GISelCSEInfo -------------//
@@ -139,7 +149,7 @@ MachineInstr *GISelCSEInfo::getMachineInstrIfExists(FoldingSetNodeID &ID,
                                                     void *&InsertPos) {
   handleRecordedInsts();
   if (auto *Inst = getNodeIfExists(ID, MBB, InsertPos)) {
-    LLVM_DEBUG(dbgs() << "CSEInfo: Found Instr " << *Inst->MI << "\n";);
+    LLVM_DEBUG(dbgs() << "CSEInfo::Found Instr " << *Inst->MI;);
     return const_cast<MachineInstr *>(Inst->MI);
   }
   return nullptr;
@@ -158,14 +168,14 @@ void GISelCSEInfo::countOpcodeHit(unsigned Opc) {
 void GISelCSEInfo::recordNewInstruction(MachineInstr *MI) {
   if (shouldCSE(MI->getOpcode())) {
     TemporaryInsts.insert(MI);
-    LLVM_DEBUG(dbgs() << "CSEInfo: Recording new MI" << *MI << "\n";);
+    LLVM_DEBUG(dbgs() << "CSEInfo::Recording new MI " << *MI);
   }
 }
 
 void GISelCSEInfo::handleRecordedInst(MachineInstr *MI) {
   assert(shouldCSE(MI->getOpcode()) && "Invalid instruction for CSE");
   auto *UMI = InstrMapping.lookup(MI);
-  LLVM_DEBUG(dbgs() << "CSEInfo: Handling recorded MI" << *MI << "\n";);
+  LLVM_DEBUG(dbgs() << "CSEInfo::Handling recorded MI " << *MI);
   if (UMI) {
     // Invalidate this MI.
     invalidateUniqueMachineInstr(UMI);
@@ -224,14 +234,14 @@ void GISelCSEInfo::analyze(MachineFunction &MF) {
     for (MachineInstr &MI : MBB) {
       if (!shouldCSE(MI.getOpcode()))
         continue;
-      LLVM_DEBUG(dbgs() << "CSEInfo::Add MI: " << MI << "\n";);
+      LLVM_DEBUG(dbgs() << "CSEInfo::Add MI: " << MI);
       insertInstr(&MI);
     }
   }
 }
 
 void GISelCSEInfo::releaseMemory() {
-  // print();
+  print();
   CSEMap.clear();
   InstrMapping.clear();
   UniqueInstrAllocator.Reset();
@@ -245,11 +255,11 @@ void GISelCSEInfo::releaseMemory() {
 }
 
 void GISelCSEInfo::print() {
-#ifndef NDEBUG
-  for (auto &It : OpcodeHitTable) {
-    dbgs() << "CSE Count for Opc " << It.first << " : " << It.second << "\n";
-  };
-#endif
+  LLVM_DEBUG(for (auto &It
+                  : OpcodeHitTable) {
+    dbgs() << "CSEInfo::CSE Hit for Opc " << It.first << " : " << It.second
+           << "\n";
+  };);
 }
 /// -----------------------------------------
 // ---- Profiling methods for FoldingSetNode --- //
@@ -349,8 +359,9 @@ const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand(
   return *this;
 }
 
-GISelCSEInfo &GISelCSEAnalysisWrapper::get(std::unique_ptr<CSEConfig> CSEOpt,
-                                           bool Recompute) {
+GISelCSEInfo &
+GISelCSEAnalysisWrapper::get(std::unique_ptr<CSEConfigBase> CSEOpt,
+                             bool Recompute) {
   if (!AlreadyComputed || Recompute) {
     Info.setCSEConfig(std::move(CSEOpt));
     Info.analyze(*MF);
diff --git a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index 863efe0c3e34..461bc6038c2c 100644
--- a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/GlobalISel/CSEMIRBuilder.cpp - MIBuilder--*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -40,6 +39,7 @@ CSEMIRBuilder::getDominatingInstrForID(FoldingSetNodeID &ID,
   MachineInstr *MI =
       CSEInfo->getMachineInstrIfExists(ID, CurMBB, NodeInsertPos);
   if (MI) {
+    CSEInfo->countOpcodeHit(MI->getOpcode());
     auto CurrPos = getInsertPt();
     if (!dominates(MI, CurrPos))
       CurMBB->splice(CurrPos, CurMBB, MI);
@@ -195,6 +195,12 @@ MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res,
   constexpr unsigned Opc = TargetOpcode::G_CONSTANT;
   if (!canPerformCSEForOpc(Opc))
     return MachineIRBuilder::buildConstant(Res, Val);
+
+  // For vectors, CSE the element only for now.
+  LLT Ty = Res.getLLTTy(*getMRI());
+  if (Ty.isVector())
+    return buildSplatVector(Res, buildConstant(Ty.getElementType(), Val));
+
   FoldingSetNodeID ID;
   GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
   void *InsertPos = nullptr;
@@ -206,6 +212,7 @@ MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res,
     // Handle generating copies here.
     return generateCopiesIfRequired({Res}, MIB);
   }
+
   MachineInstrBuilder NewMIB = MachineIRBuilder::buildConstant(Res, Val);
   return memoizeMI(NewMIB, InsertPos);
 }
@@ -215,6 +222,12 @@ MachineInstrBuilder CSEMIRBuilder::buildFConstant(const DstOp &Res,
   constexpr unsigned Opc = TargetOpcode::G_FCONSTANT;
   if (!canPerformCSEForOpc(Opc))
     return MachineIRBuilder::buildFConstant(Res, Val);
+
+  // For vectors, CSE the element only for now.
+  LLT Ty = Res.getLLTTy(*getMRI());
+  if (Ty.isVector())
+    return buildSplatVector(Res, buildFConstant(Ty.getElementType(), Val));
+
   FoldingSetNodeID ID;
   GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
   void *InsertPos = nullptr;
diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp
index 724ecedf3b3f..a5d8205a34a8 100644
--- a/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -1,9 +1,8 @@
 //===-- lib/CodeGen/GlobalISel/CallLowering.cpp - Call lowering -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -13,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -21,13 +21,17 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 
+#define DEBUG_TYPE "call-lowering"
+
 using namespace llvm;
 
 void CallLowering::anchor() {}
 
-bool CallLowering::lowerCall(
-    MachineIRBuilder &MIRBuilder, ImmutableCallSite CS, unsigned ResReg,
-    ArrayRef<unsigned> ArgRegs, std::function<unsigned()> GetCalleeReg) const {
+bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS,
+                             ArrayRef<Register> ResRegs,
+                             ArrayRef<ArrayRef<Register>> ArgRegs,
+                             Register SwiftErrorVReg,
+                             std::function<unsigned()> GetCalleeReg) const {
   auto &DL = CS.getParent()->getParent()->getParent()->getDataLayout();
 
   // First step is to marshall all the function's parameters into the correct
@@ -40,8 +44,8 @@ bool CallLowering::lowerCall(
     ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{},
                     i < NumFixedArgs};
     setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS);
-    // We don't currently support swifterror or swiftself args.
-    if (OrigArg.Flags.isSwiftError() || OrigArg.Flags.isSwiftSelf())
+    // We don't currently support swiftself args.
+    if (OrigArg.Flags.isSwiftSelf())
       return false;
     OrigArgs.push_back(OrigArg);
     ++i;
@@ -53,11 +57,12 @@ bool CallLowering::lowerCall(
   else
     Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
 
-  ArgInfo OrigRet{ResReg, CS.getType(), ISD::ArgFlagsTy{}};
+  ArgInfo OrigRet{ResRegs, CS.getType(), ISD::ArgFlagsTy{}};
   if (!OrigRet.Ty->isVoidTy())
     setArgFlags(OrigRet, AttributeList::ReturnIndex, DL, CS);
 
-  return lowerCall(MIRBuilder, CS.getCallingConv(), Callee, OrigRet, OrigArgs);
+  return lowerCall(MIRBuilder, CS.getCallingConv(), Callee, OrigRet, OrigArgs,
+                   SwiftErrorVReg);
 }
 
 template <typename FuncInfoTy>
@@ -84,7 +89,10 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
 
   if (Arg.Flags.isByVal() || Arg.Flags.isInAlloca()) {
     Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
-    Arg.Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
+
+    auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType();
+    Arg.Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy));
+
     // For ByVal, alignment should be passed from FE.  BE will guess if
     // this info is not there but there are cases it cannot get right.
     unsigned FrameAlign;
@@ -109,21 +117,78 @@ CallLowering::setArgFlags<CallInst>(CallLowering::ArgInfo &Arg, unsigned OpIdx,
                                     const DataLayout &DL,
                                     const CallInst &FuncInfo) const;
 
+Register CallLowering::packRegs(ArrayRef<Register> SrcRegs, Type *PackedTy,
+                                MachineIRBuilder &MIRBuilder) const {
+  assert(SrcRegs.size() > 1 && "Nothing to pack");
+
+  const DataLayout &DL = MIRBuilder.getMF().getDataLayout();
+  MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+
+  LLT PackedLLT = getLLTForType(*PackedTy, DL);
+
+  SmallVector<LLT, 8> LLTs;
+  SmallVector<uint64_t, 8> Offsets;
+  computeValueLLTs(DL, *PackedTy, LLTs, &Offsets);
+  assert(LLTs.size() == SrcRegs.size() && "Regs / types mismatch");
+
+  Register Dst = MRI->createGenericVirtualRegister(PackedLLT);
+  MIRBuilder.buildUndef(Dst);
+  for (unsigned i = 0; i < SrcRegs.size(); ++i) {
+    Register NewDst = MRI->createGenericVirtualRegister(PackedLLT);
+    MIRBuilder.buildInsert(NewDst, Dst, SrcRegs[i], Offsets[i]);
+    Dst = NewDst;
+  }
+
+  return Dst;
+}
+
+void CallLowering::unpackRegs(ArrayRef<Register> DstRegs, Register SrcReg,
+                              Type *PackedTy,
+                              MachineIRBuilder &MIRBuilder) const {
+  assert(DstRegs.size() > 1 && "Nothing to unpack");
+
+  const DataLayout &DL = MIRBuilder.getMF().getDataLayout();
+
+  SmallVector<LLT, 8> LLTs;
+  SmallVector<uint64_t, 8> Offsets;
+  computeValueLLTs(DL, *PackedTy, LLTs, &Offsets);
+  assert(LLTs.size() == DstRegs.size() && "Regs / types mismatch");
+
+  for (unsigned i = 0; i < DstRegs.size(); ++i)
+    MIRBuilder.buildExtract(DstRegs[i], SrcReg, Offsets[i]);
+}
+
 bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
                                      ArrayRef<ArgInfo> Args,
                                      ValueHandler &Handler) const {
   MachineFunction &MF = MIRBuilder.getMF();
   const Function &F = MF.getFunction();
-  const DataLayout &DL = F.getParent()->getDataLayout();
-
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
+  return handleAssignments(CCInfo, ArgLocs, MIRBuilder, Args, Handler);
+}
+
+bool CallLowering::handleAssignments(CCState &CCInfo,
+                                     SmallVectorImpl<CCValAssign> &ArgLocs,
+                                     MachineIRBuilder &MIRBuilder,
+                                     ArrayRef<ArgInfo> Args,
+                                     ValueHandler &Handler) const {
+  MachineFunction &MF = MIRBuilder.getMF();
+  const Function &F = MF.getFunction();
+  const DataLayout &DL = F.getParent()->getDataLayout();
 
   unsigned NumArgs = Args.size();
   for (unsigned i = 0; i != NumArgs; ++i) {
     MVT CurVT = MVT::getVT(Args[i].Ty);
-    if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo))
-      return false;
+    if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) {
+      // Try to use the register type if we couldn't assign the VT.
+      if (!Handler.isArgumentHandler() || !CurVT.isValid())
+        return false;
+      CurVT = TLI->getRegisterTypeForCallingConv(
+          F.getContext(), F.getCallingConv(), EVT(CurVT));
+      if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo))
+        return false;
+    }
   }
 
   for (unsigned i = 0, e = Args.size(), j = 0; i != e; ++i, ++j) {
@@ -137,16 +202,49 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
       continue;
     }
 
-    if (VA.isRegLoc())
-      Handler.assignValueToReg(Args[i].Reg, VA.getLocReg(), VA);
-    else if (VA.isMemLoc()) {
-      unsigned Size = VA.getValVT() == MVT::iPTR
-                          ? DL.getPointerSize()
-                          : alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
+    assert(Args[i].Regs.size() == 1 &&
+           "Can't handle multiple virtual regs yet");
+
+    // FIXME: Pack registers if we have more than one.
+    Register ArgReg = Args[i].Regs[0];
+
+    if (VA.isRegLoc()) {
+      MVT OrigVT = MVT::getVT(Args[i].Ty);
+      MVT VAVT = VA.getValVT();
+      if (Handler.isArgumentHandler() && VAVT != OrigVT) {
+        if (VAVT.getSizeInBits() < OrigVT.getSizeInBits())
+          return false; // Can't handle this type of arg yet.
+        const LLT VATy(VAVT);
+        Register NewReg =
+            MIRBuilder.getMRI()->createGenericVirtualRegister(VATy);
+        Handler.assignValueToReg(NewReg, VA.getLocReg(), VA);
+        // If it's a vector type, we either need to truncate the elements
+        // or do an unmerge to get the lower block of elements.
+        if (VATy.isVector() &&
+            VATy.getNumElements() > OrigVT.getVectorNumElements()) {
+          const LLT OrigTy(OrigVT);
+          // Just handle the case where the VA type is 2 * original type.
+          if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) {
+            LLVM_DEBUG(dbgs()
+                       << "Incoming promoted vector arg has too many elts");
+            return false;
+          }
+          auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg});
+          MIRBuilder.buildCopy(ArgReg, Unmerge.getReg(0));
+        } else {
+          MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0);
+        }
+      } else {
+        Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+      }
+    } else if (VA.isMemLoc()) {
+      MVT VT = MVT::getVT(Args[i].Ty);
+      unsigned Size = VT == MVT::iPTR ? DL.getPointerSize()
+                                      : alignTo(VT.getSizeInBits(), 8) / 8;
       unsigned Offset = VA.getLocMemOffset();
       MachinePointerInfo MPO;
-      unsigned StackAddr = Handler.getStackAddress(Size, Offset, MPO);
-      Handler.assignValueToAddress(Args[i].Reg, StackAddr, Size, MPO, VA);
+      Register StackAddr = Handler.getStackAddress(Size, Offset, MPO);
+      Handler.assignValueToAddress(ArgReg, StackAddr, Size, MPO, VA);
     } else {
       // FIXME: Support byvals and other weirdness
       return false;
@@ -155,9 +253,11 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
   return true;
 }
 
-unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg,
+Register CallLowering::ValueHandler::extendRegister(Register ValReg,
                                                     CCValAssign &VA) {
   LLT LocTy{VA.getLocVT()};
+  if (LocTy.getSizeInBits() == MRI.getType(ValReg).getSizeInBits())
+    return ValReg;
   switch (VA.getLocInfo()) {
   default: break;
   case CCValAssign::Full:
@@ -170,12 +270,12 @@ unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg,
     return MIB->getOperand(0).getReg();
   }
   case CCValAssign::SExt: {
-    unsigned NewReg = MRI.createGenericVirtualRegister(LocTy);
+    Register NewReg = MRI.createGenericVirtualRegister(LocTy);
     MIRBuilder.buildSExt(NewReg, ValReg);
     return NewReg;
   }
   case CCValAssign::ZExt: {
-    unsigned NewReg = MRI.createGenericVirtualRegister(LocTy);
+    Register NewReg = MRI.createGenericVirtualRegister(LocTy);
     MIRBuilder.buildZExt(NewReg, ValReg);
     return NewReg;
   }
diff --git a/lib/CodeGen/GlobalISel/Combiner.cpp b/lib/CodeGen/GlobalISel/Combiner.cpp
index 45b0e36fd7d9..31cb1dbbc9b5 100644
--- a/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -1,9 +1,8 @@
 //===-- lib/CodeGen/GlobalISel/Combiner.cpp -------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -51,7 +50,7 @@ public:
   }
 
   void erasingInstr(MachineInstr &MI) override {
-    LLVM_DEBUG(dbgs() << "Erased: " << MI << "\n");
+    LLVM_DEBUG(dbgs() << "Erasing: " << MI << "\n");
     WorkList.remove(&MI);
   }
   void createdInstr(MachineInstr &MI) override {
@@ -130,9 +129,10 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
           CurMI->eraseFromParentAndMarkDBGValuesForRemoval();
           continue;
         }
-        WorkList.insert(CurMI);
+        WorkList.deferred_insert(CurMI);
       }
     }
+    WorkList.finalize();
     // Main Loop. Process the instructions here.
     while (!WorkList.empty()) {
       MachineInstr *CurrInst = WorkList.pop_back_val();
diff --git a/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b1c5670a6dec..9cbf3dd83ff1 100644
--- a/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1,9 +1,8 @@
 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
@@ -23,8 +22,8 @@ CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
                                MachineIRBuilder &B)
     : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer) {}
 
-void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, unsigned FromReg,
-                                    unsigned ToReg) const {
+void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
+                                    Register ToReg) const {
   Observer.changingAllUsesOfReg(MRI, FromReg);
 
   if (MRI.constrainRegAttrs(ToReg, FromReg))
@@ -37,7 +36,7 @@ void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, unsigned FromReg,
 
 void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
                                       MachineOperand &FromRegOp,
-                                      unsigned ToReg) const {
+                                      Register ToReg) const {
   assert(FromRegOp.getParent() && "Expected an operand in an MI");
   Observer.changingInstr(*FromRegOp.getParent());
 
@@ -47,6 +46,13 @@ void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
 }
 
 bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
+  if (matchCombineCopy(MI)) {
+    applyCombineCopy(MI);
+    return true;
+  }
+  return false;
+}
+bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
   if (MI.getOpcode() != TargetOpcode::COPY)
     return false;
   unsigned DstReg = MI.getOperand(0).getReg();
@@ -55,20 +61,18 @@ bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
   LLT SrcTy = MRI.getType(SrcReg);
   // Simple Copy Propagation.
   // a(sx) = COPY b(sx) -> Replace all uses of a with b.
-  if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy) {
-    MI.eraseFromParent();
-    replaceRegWith(MRI, DstReg, SrcReg);
+  if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy)
     return true;
-  }
   return false;
 }
+void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned SrcReg = MI.getOperand(1).getReg();
+  MI.eraseFromParent();
+  replaceRegWith(MRI, DstReg, SrcReg);
+}
 
 namespace {
-struct PreferredTuple {
-  LLT Ty;                // The result type of the extend.
-  unsigned ExtendOpcode; // G_ANYEXT/G_SEXT/G_ZEXT
-  MachineInstr *MI;
-};
 
 /// Select a preference between two uses. CurrentUse is the current preference
 /// while *ForCandidate is attributes of the candidate under consideration.
@@ -127,7 +131,8 @@ PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
 /// want to try harder to find a dominating block.
 static void InsertInsnsWithoutSideEffectsBeforeUse(
     MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
-    std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator)>
+    std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
+                       MachineOperand &UseMO)>
         Inserter) {
   MachineInstr &UseMI = *UseMO.getParent();
 
@@ -143,26 +148,26 @@ static void InsertInsnsWithoutSideEffectsBeforeUse(
   // the def instead of at the start of the block.
   if (InsertBB == DefMI.getParent()) {
     MachineBasicBlock::iterator InsertPt = &DefMI;
-    Inserter(InsertBB, std::next(InsertPt));
+    Inserter(InsertBB, std::next(InsertPt), UseMO);
     return;
   }
 
   // Otherwise we want the start of the BB
-  Inserter(InsertBB, InsertBB->getFirstNonPHI());
+  Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
 }
 } // end anonymous namespace
 
 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
-  struct InsertionPoint {
-    MachineOperand *UseMO;
-    MachineBasicBlock *InsertIntoBB;
-    MachineBasicBlock::iterator InsertBefore;
-    InsertionPoint(MachineOperand *UseMO, MachineBasicBlock *InsertIntoBB,
-                   MachineBasicBlock::iterator InsertBefore)
-        : UseMO(UseMO), InsertIntoBB(InsertIntoBB), InsertBefore(InsertBefore) {
-    }
-  };
+  PreferredTuple Preferred;
+  if (matchCombineExtendingLoads(MI, Preferred)) {
+    applyCombineExtendingLoads(MI, Preferred);
+    return true;
+  }
+  return false;
+}
 
+bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
+                                                PreferredTuple &Preferred) {
   // We match the loads and follow the uses to the extend instead of matching
   // the extends and following the def to the load. This is because the load
   // must remain in the same position for correctness (unless we also add code
@@ -182,6 +187,19 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
   if (!LoadValueTy.isScalar())
     return false;
 
+  // Most architectures are going to legalize <s8 loads into at least a 1 byte
+  // load, and the MMOs can only describe memory accesses in multiples of bytes.
+  // If we try to perform extload combining on those, we can end up with
+  // %a(s8) = extload %ptr (load 1 byte from %ptr)
+  // ... which is an illegal extload instruction.
+  if (LoadValueTy.getSizeInBits() < 8)
+    return false;
+
+  // For non power-of-2 types, they will very likely be legalized into multiple
+  // loads. Don't bother trying to match them into extending loads.
+  if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
+    return false;
+
   // Find the preferred type aside from the any-extends (unless it's the only
   // one) and non-extending ops. We'll emit an extending load to that type and
   // and emit a variant of (extend (trunc X)) for the others according to the
@@ -192,7 +210,7 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
                                  : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
                                        ? TargetOpcode::G_SEXT
                                        : TargetOpcode::G_ZEXT;
-  PreferredTuple Preferred = {LLT(), PreferredOpcode, nullptr};
+  Preferred = {LLT(), PreferredOpcode, nullptr};
   for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) {
     if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
         UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
@@ -211,9 +229,35 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
   assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
 
   LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
+  return true;
+}
 
+void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
+                                                PreferredTuple &Preferred) {
   // Rewrite the load to the chosen extending load.
-  unsigned ChosenDstReg = Preferred.MI->getOperand(0).getReg();
+  Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
+
+  // Inserter to insert a truncate back to the original type at a given point
+  // with some basic CSE to limit truncate duplication to one per BB.
+  DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
+  auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
+                           MachineBasicBlock::iterator InsertBefore,
+                           MachineOperand &UseMO) {
+    MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
+    if (PreviouslyEmitted) {
+      Observer.changingInstr(*UseMO.getParent());
+      UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
+      Observer.changedInstr(*UseMO.getParent());
+      return;
+    }
+
+    Builder.setInsertPt(*InsertIntoBB, InsertBefore);
+    Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
+    MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
+    EmittedInsns[InsertIntoBB] = NewMI;
+    replaceRegOpWith(MRI, UseMO, NewDstReg);
+  };
+
   Observer.changingInstr(MI);
   MI.setDesc(
       Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
@@ -223,10 +267,13 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
                                      : TargetOpcode::G_LOAD));
 
   // Rewrite all the uses to fix up the types.
-  SmallVector<MachineInstr *, 1> ScheduleForErase;
-  SmallVector<InsertionPoint, 4> ScheduleForInsert;
-  for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) {
-    MachineInstr *UseMI = UseMO.getParent();
+  auto &LoadValue = MI.getOperand(0);
+  SmallVector<MachineOperand *, 4> Uses;
+  for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
+    Uses.push_back(&UseMO);
+
+  for (auto *UseMO : Uses) {
+    MachineInstr *UseMI = UseMO->getParent();
 
     // If the extend is compatible with the preferred extend then we should fix
     // up the type and extend so that it uses the preferred use.
@@ -247,7 +294,8 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
           //    %2:_(s32) = G_SEXTLOAD ...
           //    ... = ... %2(s32)
           replaceRegWith(MRI, UseDstReg, ChosenDstReg);
-          ScheduleForErase.push_back(UseMO.getParent());
+          Observer.erasingInstr(*UseMO->getParent());
+          UseMO->getParent()->eraseFromParent();
         } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
           // If the preferred size is smaller, then keep the extend but extend
           // from the result of the extending load. For example:
@@ -272,59 +320,87 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
           //    %4:_(s8) = G_TRUNC %2:_(s32)
           //    %3:_(s64) = G_ZEXT %2:_(s8)
           //    ... = ... %3(s64)
-          InsertInsnsWithoutSideEffectsBeforeUse(
-              Builder, MI, UseMO,
-              [&](MachineBasicBlock *InsertIntoBB,
-                  MachineBasicBlock::iterator InsertBefore) {
-                ScheduleForInsert.emplace_back(&UseMO, InsertIntoBB, InsertBefore);
-              });
+          InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
+                                                 InsertTruncAt);
         }
         continue;
       }
       // The use is (one of) the uses of the preferred use we chose earlier.
       // We're going to update the load to def this value later so just erase
       // the old extend.
-      ScheduleForErase.push_back(UseMO.getParent());
+      Observer.erasingInstr(*UseMO->getParent());
+      UseMO->getParent()->eraseFromParent();
       continue;
     }
 
     // The use isn't an extend. Truncate back to the type we originally loaded.
     // This is free on many targets.
-    InsertInsnsWithoutSideEffectsBeforeUse(
-        Builder, MI, UseMO,
-        [&](MachineBasicBlock *InsertIntoBB,
-            MachineBasicBlock::iterator InsertBefore) {
-          ScheduleForInsert.emplace_back(&UseMO, InsertIntoBB, InsertBefore);
-        });
+    InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
   }
 
-  DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
-  for (auto &InsertionInfo : ScheduleForInsert) {
-    MachineOperand *UseMO = InsertionInfo.UseMO;
-    MachineBasicBlock *InsertIntoBB = InsertionInfo.InsertIntoBB;
-    MachineBasicBlock::iterator InsertBefore = InsertionInfo.InsertBefore;
-
-    MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
-    if (PreviouslyEmitted) {
-      Observer.changingInstr(*UseMO->getParent());
-      UseMO->setReg(PreviouslyEmitted->getOperand(0).getReg());
-      Observer.changedInstr(*UseMO->getParent());
-      continue;
-    }
-
-    Builder.setInsertPt(*InsertIntoBB, InsertBefore);
-    unsigned NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
-    MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
-    EmittedInsns[InsertIntoBB] = NewMI;
-    replaceRegOpWith(MRI, *UseMO, NewDstReg);
-  }
-  for (auto &EraseMI : ScheduleForErase) {
-    Observer.erasingInstr(*EraseMI);
-    EraseMI->eraseFromParent();
-  }
   MI.getOperand(0).setReg(ChosenDstReg);
   Observer.changedInstr(MI);
+}
+
+bool CombinerHelper::matchCombineBr(MachineInstr &MI) {
+  assert(MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR");
+  // Try to match the following:
+  // bb1:
+  //   %c(s32) = G_ICMP pred, %a, %b
+  //   %c1(s1) = G_TRUNC %c(s32)
+  //   G_BRCOND %c1, %bb2
+  //   G_BR %bb3
+  // bb2:
+  // ...
+  // bb3:
+
+  // The above pattern does not have a fall through to the successor bb2, always
+  // resulting in a branch no matter which path is taken. Here we try to find
+  // and replace that pattern with conditional branch to bb3 and otherwise
+  // fallthrough to bb2.
+
+  MachineBasicBlock *MBB = MI.getParent();
+  MachineBasicBlock::iterator BrIt(MI);
+  if (BrIt == MBB->begin())
+    return false;
+  assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
+
+  MachineInstr *BrCond = &*std::prev(BrIt);
+  if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
+    return false;
 
+  // Check that the next block is the conditional branch target.
+  if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
+    return false;
+
+  MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
+  if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
+      !MRI.hasOneUse(CmpMI->getOperand(0).getReg()))
+    return false;
+  return true;
+}
+
+bool CombinerHelper::tryCombineBr(MachineInstr &MI) {
+  if (!matchCombineBr(MI))
+    return false;
+  MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
+  MachineBasicBlock::iterator BrIt(MI);
+  MachineInstr *BrCond = &*std::prev(BrIt);
+  MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
+
+  CmpInst::Predicate InversePred = CmpInst::getInversePredicate(
+      (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate());
+
+  // Invert the G_ICMP condition.
+  Observer.changingInstr(*CmpMI);
+  CmpMI->getOperand(1).setPredicate(InversePred);
+  Observer.changedInstr(*CmpMI);
+
+  // Change the conditional branch target.
+  Observer.changingInstr(*BrCond);
+  BrCond->getOperand(1).setMBB(BrTarget);
+  Observer.changedInstr(*BrCond);
+  MI.eraseFromParent();
   return true;
 }
 
diff --git a/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp b/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
index c693acbbf10b..62b903c30b89 100644
--- a/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
+++ b/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
@@ -1,9 +1,8 @@
 //===-- lib/CodeGen/GlobalISel/GISelChangeObserver.cpp --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -27,6 +26,7 @@ void GISelChangeObserver::changingAllUsesOfReg(
 void GISelChangeObserver::finishedChangingAllUsesOfReg() {
   for (auto *ChangedMI : ChangingAllUsesOfReg)
     changedInstr(*ChangedMI);
+  ChangingAllUsesOfReg.clear();
 }
 
 RAIIDelegateInstaller::RAIIDelegateInstaller(MachineFunction &MF,
diff --git a/lib/CodeGen/GlobalISel/GlobalISel.cpp b/lib/CodeGen/GlobalISel/GlobalISel.cpp
index 00c6a9d63158..e0391e6f6467 100644
--- a/lib/CodeGen/GlobalISel/GlobalISel.cpp
+++ b/lib/CodeGen/GlobalISel/GlobalISel.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/GlobalISel/GlobalIsel.cpp --- GlobalISel ----*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 95f6274aa068..6e99bdbd8264 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -16,8 +15,11 @@
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
 #include "llvm/CodeGen/LowLevelType.h"
@@ -106,9 +108,7 @@ static void reportTranslationError(MachineFunction &MF,
     ORE.emit(R);
 }
 
-IRTranslator::IRTranslator() : MachineFunctionPass(ID) {
-  initializeIRTranslatorPass(*PassRegistry::getPassRegistry());
-}
+IRTranslator::IRTranslator() : MachineFunctionPass(ID) { }
 
 #ifndef NDEBUG
 namespace {
@@ -136,7 +136,11 @@ public:
     LLVM_DEBUG(dbgs() << "Checking DILocation from " << *CurrInst
                       << " was copied to " << MI);
 #endif
-    assert(CurrInst->getDebugLoc() == MI.getDebugLoc() &&
+    // We allow insts in the entry block to have a debug loc line of 0 because
+    // they could have originated from constants, and we don't want a jumpy
+    // debug experience.
+    assert((CurrInst->getDebugLoc() == MI.getDebugLoc() ||
+            MI.getDebugLoc().getLine() == 0) &&
            "Line info was not transferred to all instructions");
   }
 };
@@ -152,36 +156,6 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-static void computeValueLLTs(const DataLayout &DL, Type &Ty,
-                             SmallVectorImpl<LLT> &ValueTys,
-                             SmallVectorImpl<uint64_t> *Offsets = nullptr,
-                             uint64_t StartingOffset = 0) {
-  // Given a struct type, recursively traverse the elements.
-  if (StructType *STy = dyn_cast<StructType>(&Ty)) {
-    const StructLayout *SL = DL.getStructLayout(STy);
-    for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I)
-      computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets,
-                       StartingOffset + SL->getElementOffset(I));
-    return;
-  }
-  // Given an array type, recursively traverse the elements.
-  if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) {
-    Type *EltTy = ATy->getElementType();
-    uint64_t EltSize = DL.getTypeAllocSize(EltTy);
-    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
-      computeValueLLTs(DL, *EltTy, ValueTys, Offsets,
-                       StartingOffset + i * EltSize);
-    return;
-  }
-  // Interpret void as zero return values.
-  if (Ty.isVoidTy())
-    return;
-  // Base case: we can get an LLT for this LLVM IR type.
-  ValueTys.push_back(getLLTForType(Ty, DL));
-  if (Offsets != nullptr)
-    Offsets->push_back(StartingOffset * 8);
-}
-
 IRTranslator::ValueToVRegInfo::VRegListT &
 IRTranslator::allocateVRegs(const Value &Val) {
   assert(!VMap.contains(Val) && "Value already allocated in VMap");
@@ -195,7 +169,7 @@ IRTranslator::allocateVRegs(const Value &Val) {
   return *Regs;
 }
 
-ArrayRef<unsigned> IRTranslator::getOrCreateVRegs(const Value &Val) {
+ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) {
   auto VRegsIt = VMap.findVRegs(Val);
   if (VRegsIt != VMap.vregs_end())
     return *VRegsIt->second;
@@ -249,7 +223,7 @@ int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
   if (FrameIndices.find(&AI) != FrameIndices.end())
     return FrameIndices[&AI];
 
-  unsigned ElementSize = DL->getTypeStoreSize(AI.getAllocatedType());
+  unsigned ElementSize = DL->getTypeAllocSize(AI.getAllocatedType());
   unsigned Size =
       ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue();
 
@@ -311,21 +285,20 @@ void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) {
 
 bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
                                      MachineIRBuilder &MIRBuilder) {
-  // FIXME: handle signed/unsigned wrapping flags.
-
   // Get or create a virtual register for each value.
   // Unless the value is a Constant => loadimm cst?
   // or inline constant each time?
   // Creation of a virtual register needs to have a size.
-  unsigned Op0 = getOrCreateVReg(*U.getOperand(0));
-  unsigned Op1 = getOrCreateVReg(*U.getOperand(1));
-  unsigned Res = getOrCreateVReg(U);
-  auto FBinOp = MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1);
+  Register Op0 = getOrCreateVReg(*U.getOperand(0));
+  Register Op1 = getOrCreateVReg(*U.getOperand(1));
+  Register Res = getOrCreateVReg(U);
+  uint16_t Flags = 0;
   if (isa<Instruction>(U)) {
-    MachineInstr *FBinOpMI = FBinOp.getInstr();
     const Instruction &I = cast<Instruction>(U);
-    FBinOpMI->copyIRFlags(I);
+    Flags = MachineInstr::copyFlagsFromInstruction(I);
   }
+
+  MIRBuilder.buildInstr(Opcode, {Res}, {Op0, Op1}, Flags);
   return true;
 }
 
@@ -333,27 +306,38 @@ bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) {
   // -0.0 - X --> G_FNEG
   if (isa<Constant>(U.getOperand(0)) &&
       U.getOperand(0) == ConstantFP::getZeroValueForNegation(U.getType())) {
-    MIRBuilder.buildInstr(TargetOpcode::G_FNEG)
-        .addDef(getOrCreateVReg(U))
-        .addUse(getOrCreateVReg(*U.getOperand(1)));
+    Register Op1 = getOrCreateVReg(*U.getOperand(1));
+    Register Res = getOrCreateVReg(U);
+    uint16_t Flags = 0;
+    if (isa<Instruction>(U)) {
+      const Instruction &I = cast<Instruction>(U);
+      Flags = MachineInstr::copyFlagsFromInstruction(I);
+    }
+    // Negate the last operand of the FSUB
+    MIRBuilder.buildInstr(TargetOpcode::G_FNEG, {Res}, {Op1}, Flags);
     return true;
   }
   return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder);
 }
 
 bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
-  MIRBuilder.buildInstr(TargetOpcode::G_FNEG)
-      .addDef(getOrCreateVReg(U))
-      .addUse(getOrCreateVReg(*U.getOperand(1)));
+  Register Op0 = getOrCreateVReg(*U.getOperand(0));
+  Register Res = getOrCreateVReg(U);
+  uint16_t Flags = 0;
+  if (isa<Instruction>(U)) {
+    const Instruction &I = cast<Instruction>(U);
+    Flags = MachineInstr::copyFlagsFromInstruction(I);
+  }
+  MIRBuilder.buildInstr(TargetOpcode::G_FNEG, {Res}, {Op0}, Flags);
   return true;
 }
 
 bool IRTranslator::translateCompare(const User &U,
                                     MachineIRBuilder &MIRBuilder) {
   const CmpInst *CI = dyn_cast<CmpInst>(&U);
-  unsigned Op0 = getOrCreateVReg(*U.getOperand(0));
-  unsigned Op1 = getOrCreateVReg(*U.getOperand(1));
-  unsigned Res = getOrCreateVReg(U);
+  Register Op0 = getOrCreateVReg(*U.getOperand(0));
+  Register Op1 = getOrCreateVReg(*U.getOperand(1));
+  Register Res = getOrCreateVReg(U);
   CmpInst::Predicate Pred =
       CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>(
                                     cast<ConstantExpr>(U).getPredicate());
@@ -366,8 +350,8 @@ bool IRTranslator::translateCompare(const User &U,
     MIRBuilder.buildCopy(
         Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType())));
   else {
-    auto FCmp = MIRBuilder.buildFCmp(Pred, Res, Op0, Op1);
-    FCmp->copyIRFlags(*CI);
+    MIRBuilder.buildInstr(TargetOpcode::G_FCMP, {Res}, {Pred, Op0, Op1},
+                          MachineInstr::copyFlagsFromInstruction(*CI));
   }
 
   return true;
@@ -379,15 +363,20 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
   if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0)
     Ret = nullptr;
 
-  ArrayRef<unsigned> VRegs;
+  ArrayRef<Register> VRegs;
   if (Ret)
     VRegs = getOrCreateVRegs(*Ret);
 
+  Register SwiftErrorVReg = 0;
+  if (CLI->supportSwiftError() && SwiftError.getFunctionArg()) {
+    SwiftErrorVReg = SwiftError.getOrCreateVRegUseAt(
+        &RI, &MIRBuilder.getMBB(), SwiftError.getFunctionArg());
+  }
+
   // The target may mess up with the insertion point, but
   // this is not important as a return is the last instruction
   // of the block anyway.
-
-  return CLI->lowerReturn(MIRBuilder, Ret, VRegs);
+  return CLI->lowerReturn(MIRBuilder, Ret, VRegs, SwiftErrorVReg);
 }
 
 bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
@@ -395,7 +384,7 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
   unsigned Succ = 0;
   if (!BrInst.isUnconditional()) {
     // We want a G_BRCOND to the true BB followed by an unconditional branch.
-    unsigned Tst = getOrCreateVReg(*BrInst.getCondition());
+    Register Tst = getOrCreateVReg(*BrInst.getCondition());
     const BasicBlock &TrueTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ++));
     MachineBasicBlock &TrueBB = getMBB(TrueTgt);
     MIRBuilder.buildBrCond(Tst, TrueBB);
@@ -415,48 +404,429 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
   return true;
 }
 
-bool IRTranslator::translateSwitch(const User &U,
-                                   MachineIRBuilder &MIRBuilder) {
-  // For now, just translate as a chain of conditional branches.
-  // FIXME: could we share most of the logic/code in
-  // SelectionDAGBuilder::visitSwitch between SelectionDAG and GlobalISel?
-  // At first sight, it seems most of the logic in there is independent of
-  // SelectionDAG-specifics and a lot of work went in to optimize switch
-  // lowering in there.
-
-  const SwitchInst &SwInst = cast<SwitchInst>(U);
-  const unsigned SwCondValue = getOrCreateVReg(*SwInst.getCondition());
-  const BasicBlock *OrigBB = SwInst.getParent();
-
-  LLT LLTi1 = getLLTForType(*Type::getInt1Ty(U.getContext()), *DL);
-  for (auto &CaseIt : SwInst.cases()) {
-    const unsigned CaseValueReg = getOrCreateVReg(*CaseIt.getCaseValue());
-    const unsigned Tst = MRI->createGenericVirtualRegister(LLTi1);
-    MIRBuilder.buildICmp(CmpInst::ICMP_EQ, Tst, CaseValueReg, SwCondValue);
-    MachineBasicBlock &CurMBB = MIRBuilder.getMBB();
-    const BasicBlock *TrueBB = CaseIt.getCaseSuccessor();
-    MachineBasicBlock &TrueMBB = getMBB(*TrueBB);
-
-    MIRBuilder.buildBrCond(Tst, TrueMBB);
-    CurMBB.addSuccessor(&TrueMBB);
-    addMachineCFGPred({OrigBB, TrueBB}, &CurMBB);
-
-    MachineBasicBlock *FalseMBB =
-        MF->CreateMachineBasicBlock(SwInst.getParent());
-    // Insert the comparison blocks one after the other.
-    MF->insert(std::next(CurMBB.getIterator()), FalseMBB);
-    MIRBuilder.buildBr(*FalseMBB);
-    CurMBB.addSuccessor(FalseMBB);
-
-    MIRBuilder.setMBB(*FalseMBB);
-  }
-  // handle default case
-  const BasicBlock *DefaultBB = SwInst.getDefaultDest();
-  MachineBasicBlock &DefaultMBB = getMBB(*DefaultBB);
-  MIRBuilder.buildBr(DefaultMBB);
-  MachineBasicBlock &CurMBB = MIRBuilder.getMBB();
-  CurMBB.addSuccessor(&DefaultMBB);
-  addMachineCFGPred({OrigBB, DefaultBB}, &CurMBB);
+void IRTranslator::addSuccessorWithProb(MachineBasicBlock *Src,
+                                        MachineBasicBlock *Dst,
+                                        BranchProbability Prob) {
+  if (!FuncInfo.BPI) {
+    Src->addSuccessorWithoutProb(Dst);
+    return;
+  }
+  if (Prob.isUnknown())
+    Prob = getEdgeProbability(Src, Dst);
+  Src->addSuccessor(Dst, Prob);
+}
+
+BranchProbability
+IRTranslator::getEdgeProbability(const MachineBasicBlock *Src,
+                                 const MachineBasicBlock *Dst) const {
+  const BasicBlock *SrcBB = Src->getBasicBlock();
+  const BasicBlock *DstBB = Dst->getBasicBlock();
+  if (!FuncInfo.BPI) {
+    // If BPI is not available, set the default probability as 1 / N, where N is
+    // the number of successors.
+    auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
+    return BranchProbability(1, SuccSize);
+  }
+  return FuncInfo.BPI->getEdgeProbability(SrcBB, DstBB);
+}
+
+bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
+  using namespace SwitchCG;
+  // Extract cases from the switch.
+  const SwitchInst &SI = cast<SwitchInst>(U);
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
+  CaseClusterVector Clusters;
+  Clusters.reserve(SI.getNumCases());
+  for (auto &I : SI.cases()) {
+    MachineBasicBlock *Succ = &getMBB(*I.getCaseSuccessor());
+    assert(Succ && "Could not find successor mbb in mapping");
+    const ConstantInt *CaseVal = I.getCaseValue();
+    BranchProbability Prob =
+        BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
+            : BranchProbability(1, SI.getNumCases() + 1);
+    Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
+  }
+
+  MachineBasicBlock *DefaultMBB = &getMBB(*SI.getDefaultDest());
+
+  // Cluster adjacent cases with the same destination. We do this at all
+  // optimization levels because it's cheap to do and will make codegen faster
+  // if there are many clusters.
+  sortAndRangeify(Clusters);
+
+  MachineBasicBlock *SwitchMBB = &getMBB(*SI.getParent());
+
+  // If there is only the default destination, jump there directly.
+  if (Clusters.empty()) {
+    SwitchMBB->addSuccessor(DefaultMBB);
+    if (DefaultMBB != SwitchMBB->getNextNode())
+      MIB.buildBr(*DefaultMBB);
+    return true;
+  }
+
+  SL->findJumpTables(Clusters, &SI, DefaultMBB);
+
+  LLVM_DEBUG({
+    dbgs() << "Case clusters: ";
+    for (const CaseCluster &C : Clusters) {
+      if (C.Kind == CC_JumpTable)
+        dbgs() << "JT:";
+      if (C.Kind == CC_BitTests)
+        dbgs() << "BT:";
+
+      C.Low->getValue().print(dbgs(), true);
+      if (C.Low != C.High) {
+        dbgs() << '-';
+        C.High->getValue().print(dbgs(), true);
+      }
+      dbgs() << ' ';
+    }
+    dbgs() << '\n';
+  });
+
+  assert(!Clusters.empty());
+  SwitchWorkList WorkList;
+  CaseClusterIt First = Clusters.begin();
+  CaseClusterIt Last = Clusters.end() - 1;
+  auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
+  WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
+
+  // FIXME: At the moment we don't do any splitting optimizations here like
+  // SelectionDAG does, so this worklist only has one entry.
+  while (!WorkList.empty()) {
+    SwitchWorkListItem W = WorkList.back();
+    WorkList.pop_back();
+    if (!lowerSwitchWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB, MIB))
+      return false;
+  }
+  return true;
+}
+
+void IRTranslator::emitJumpTable(SwitchCG::JumpTable &JT,
+                                 MachineBasicBlock *MBB) {
+  // Emit the code for the jump table
+  assert(JT.Reg != -1U && "Should lower JT Header first!");
+  MachineIRBuilder MIB(*MBB->getParent());
+  MIB.setMBB(*MBB);
+  MIB.setDebugLoc(CurBuilder->getDebugLoc());
+
+  Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
+  const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
+
+  auto Table = MIB.buildJumpTable(PtrTy, JT.JTI);
+  MIB.buildBrJT(Table.getReg(0), JT.JTI, JT.Reg);
+}
+
+bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT,
+                                       SwitchCG::JumpTableHeader &JTH,
+                                       MachineBasicBlock *HeaderBB) {
+  MachineIRBuilder MIB(*HeaderBB->getParent());
+  MIB.setMBB(*HeaderBB);
+  MIB.setDebugLoc(CurBuilder->getDebugLoc());
+
+  const Value &SValue = *JTH.SValue;
+  // Subtract the lowest switch case value from the value being switched on.
+  const LLT SwitchTy = getLLTForType(*SValue.getType(), *DL);
+  Register SwitchOpReg = getOrCreateVReg(SValue);
+  auto FirstCst = MIB.buildConstant(SwitchTy, JTH.First);
+  auto Sub = MIB.buildSub({SwitchTy}, SwitchOpReg, FirstCst);
+
+  // This value may be smaller or larger than the target's pointer type, and
+  // therefore require extension or truncating.
+  Type *PtrIRTy = SValue.getType()->getPointerTo();
+  const LLT PtrScalarTy = LLT::scalar(DL->getTypeSizeInBits(PtrIRTy));
+  Sub = MIB.buildZExtOrTrunc(PtrScalarTy, Sub);
+
+  JT.Reg = Sub.getReg(0);
+
+  if (JTH.OmitRangeCheck) {
+    if (JT.MBB != HeaderBB->getNextNode())
+      MIB.buildBr(*JT.MBB);
+    return true;
+  }
+
+  // Emit the range check for the jump table, and branch to the default block
+  // for the switch statement if the value being switched on exceeds the
+  // largest case in the switch.
+  auto Cst = getOrCreateVReg(
+      *ConstantInt::get(SValue.getType(), JTH.Last - JTH.First));
+  Cst = MIB.buildZExtOrTrunc(PtrScalarTy, Cst).getReg(0);
+  auto Cmp = MIB.buildICmp(CmpInst::ICMP_UGT, LLT::scalar(1), Sub, Cst);
+
+  auto BrCond = MIB.buildBrCond(Cmp.getReg(0), *JT.Default);
+
+  // Avoid emitting unnecessary branches to the next block.
+  if (JT.MBB != HeaderBB->getNextNode())
+    BrCond = MIB.buildBr(*JT.MBB);
+  return true;
+}
+
+void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
+                                  MachineBasicBlock *SwitchBB,
+                                  MachineIRBuilder &MIB) {
+  Register CondLHS = getOrCreateVReg(*CB.CmpLHS);
+  Register Cond;
+  DebugLoc OldDbgLoc = MIB.getDebugLoc();
+  MIB.setDebugLoc(CB.DbgLoc);
+  MIB.setMBB(*CB.ThisBB);
+
+  if (CB.PredInfo.NoCmp) {
+    // Branch or fall through to TrueBB.
+    addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb);
+    addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()},
+                      CB.ThisBB);
+    CB.ThisBB->normalizeSuccProbs();
+    if (CB.TrueBB != CB.ThisBB->getNextNode())
+      MIB.buildBr(*CB.TrueBB);
+    MIB.setDebugLoc(OldDbgLoc);
+    return;
+  }
+
+  const LLT i1Ty = LLT::scalar(1);
+  // Build the compare.
+  if (!CB.CmpMHS) {
+    Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
+    Cond = MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
+  } else {
+    assert(CB.PredInfo.Pred == CmpInst::ICMP_ULE &&
+           "Can only handle ULE ranges");
+
+    const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
+    const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
+
+    Register CmpOpReg = getOrCreateVReg(*CB.CmpMHS);
+    if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+      Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
+      Cond =
+          MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, CmpOpReg, CondRHS).getReg(0);
+    } else {
+      const LLT &CmpTy = MRI->getType(CmpOpReg);
+      auto Sub = MIB.buildSub({CmpTy}, CmpOpReg, CondLHS);
+      auto Diff = MIB.buildConstant(CmpTy, High - Low);
+      Cond = MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, Sub, Diff).getReg(0);
+    }
+  }
+
+  // Update successor info
+  addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb);
+
+  addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()},
+                    CB.ThisBB);
+
+  // TrueBB and FalseBB are always different unless the incoming IR is
+  // degenerate. This only happens when running llc on weird IR.
+  if (CB.TrueBB != CB.FalseBB)
+    addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb);
+  CB.ThisBB->normalizeSuccProbs();
+
+  //  if (SwitchBB->getBasicBlock() != CB.FalseBB->getBasicBlock())
+    addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()},
+                      CB.ThisBB);
+
+  // If the lhs block is the next block, invert the condition so that we can
+  // fall through to the lhs instead of the rhs block.
+  if (CB.TrueBB == CB.ThisBB->getNextNode()) {
+    std::swap(CB.TrueBB, CB.FalseBB);
+    auto True = MIB.buildConstant(i1Ty, 1);
+    Cond = MIB.buildInstr(TargetOpcode::G_XOR, {i1Ty}, {Cond, True}, None)
+               .getReg(0);
+  }
+
+  MIB.buildBrCond(Cond, *CB.TrueBB);
+  MIB.buildBr(*CB.FalseBB);
+  MIB.setDebugLoc(OldDbgLoc);
+}
+
+bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
+                                          MachineBasicBlock *SwitchMBB,
+                                          MachineBasicBlock *CurMBB,
+                                          MachineBasicBlock *DefaultMBB,
+                                          MachineIRBuilder &MIB,
+                                          MachineFunction::iterator BBI,
+                                          BranchProbability UnhandledProbs,
+                                          SwitchCG::CaseClusterIt I,
+                                          MachineBasicBlock *Fallthrough,
+                                          bool FallthroughUnreachable) {
+  using namespace SwitchCG;
+  MachineFunction *CurMF = SwitchMBB->getParent();
+  // FIXME: Optimize away range check based on pivot comparisons.
+  JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
+  SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;
+  BranchProbability DefaultProb = W.DefaultProb;
+
+  // The jump block hasn't been inserted yet; insert it here.
+  MachineBasicBlock *JumpMBB = JT->MBB;
+  CurMF->insert(BBI, JumpMBB);
+
+  // Since the jump table block is separate from the switch block, we need
+  // to keep track of it as a machine predecessor to the default block,
+  // otherwise we lose the phi edges.
+  addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()},
+                    CurMBB);
+  addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()},
+                    JumpMBB);
+
+  auto JumpProb = I->Prob;
+  auto FallthroughProb = UnhandledProbs;
+
+  // If the default statement is a target of the jump table, we evenly
+  // distribute the default probability to successors of CurMBB. Also
+  // update the probability on the edge from JumpMBB to Fallthrough.
+  for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
+                                        SE = JumpMBB->succ_end();
+       SI != SE; ++SI) {
+    if (*SI == DefaultMBB) {
+      JumpProb += DefaultProb / 2;
+      FallthroughProb -= DefaultProb / 2;
+      JumpMBB->setSuccProbability(SI, DefaultProb / 2);
+      JumpMBB->normalizeSuccProbs();
+    } else {
+      // Also record edges from the jump table block to it's successors.
+      addMachineCFGPred({SwitchMBB->getBasicBlock(), (*SI)->getBasicBlock()},
+                        JumpMBB);
+    }
+  }
+
+  // Skip the range check if the fallthrough block is unreachable.
+  if (FallthroughUnreachable)
+    JTH->OmitRangeCheck = true;
+
+  if (!JTH->OmitRangeCheck)
+    addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
+  addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
+  CurMBB->normalizeSuccProbs();
+
+  // The jump table header will be inserted in our current block, do the
+  // range check, and fall through to our fallthrough block.
+  JTH->HeaderBB = CurMBB;
+  JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
+
+  // If we're in the right place, emit the jump table header right now.
+  if (CurMBB == SwitchMBB) {
+    if (!emitJumpTableHeader(*JT, *JTH, CurMBB))
+      return false;
+    JTH->Emitted = true;
+  }
+  return true;
+}
+bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
+                                            Value *Cond,
+                                            MachineBasicBlock *Fallthrough,
+                                            bool FallthroughUnreachable,
+                                            BranchProbability UnhandledProbs,
+                                            MachineBasicBlock *CurMBB,
+                                            MachineIRBuilder &MIB,
+                                            MachineBasicBlock *SwitchMBB) {
+  using namespace SwitchCG;
+  const Value *RHS, *LHS, *MHS;
+  CmpInst::Predicate Pred;
+  if (I->Low == I->High) {
+    // Check Cond == I->Low.
+    Pred = CmpInst::ICMP_EQ;
+    LHS = Cond;
+    RHS = I->Low;
+    MHS = nullptr;
+  } else {
+    // Check I->Low <= Cond <= I->High.
+    Pred = CmpInst::ICMP_ULE;
+    LHS = I->Low;
+    MHS = Cond;
+    RHS = I->High;
+  }
+
+  // If Fallthrough is unreachable, fold away the comparison.
+  // The false probability is the sum of all unhandled cases.
+  CaseBlock CB(Pred, FallthroughUnreachable, LHS, RHS, MHS, I->MBB, Fallthrough,
+               CurMBB, MIB.getDebugLoc(), I->Prob, UnhandledProbs);
+
+  emitSwitchCase(CB, SwitchMBB, MIB);
+  return true;
+}
+
+bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W,
+                                       Value *Cond,
+                                       MachineBasicBlock *SwitchMBB,
+                                       MachineBasicBlock *DefaultMBB,
+                                       MachineIRBuilder &MIB) {
+  using namespace SwitchCG;
+  MachineFunction *CurMF = FuncInfo.MF;
+  MachineBasicBlock *NextMBB = nullptr;
+  MachineFunction::iterator BBI(W.MBB);
+  if (++BBI != FuncInfo.MF->end())
+    NextMBB = &*BBI;
+
+  if (EnableOpts) {
+    // Here, we order cases by probability so the most likely case will be
+    // checked first. However, two clusters can have the same probability in
+    // which case their relative ordering is non-deterministic. So we use Low
+    // as a tie-breaker as clusters are guaranteed to never overlap.
+    llvm::sort(W.FirstCluster, W.LastCluster + 1,
+               [](const CaseCluster &a, const CaseCluster &b) {
+                 return a.Prob != b.Prob
+                            ? a.Prob > b.Prob
+                            : a.Low->getValue().slt(b.Low->getValue());
+               });
+
+    // Rearrange the case blocks so that the last one falls through if possible
+    // without changing the order of probabilities.
+    for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster;) {
+      --I;
+      if (I->Prob > W.LastCluster->Prob)
+        break;
+      if (I->Kind == CC_Range && I->MBB == NextMBB) {
+        std::swap(*I, *W.LastCluster);
+        break;
+      }
+    }
+  }
+
+  // Compute total probability.
+  BranchProbability DefaultProb = W.DefaultProb;
+  BranchProbability UnhandledProbs = DefaultProb;
+  for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
+    UnhandledProbs += I->Prob;
+
+  MachineBasicBlock *CurMBB = W.MBB;
+  for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
+    bool FallthroughUnreachable = false;
+    MachineBasicBlock *Fallthrough;
+    if (I == W.LastCluster) {
+      // For the last cluster, fall through to the default destination.
+      Fallthrough = DefaultMBB;
+      FallthroughUnreachable = isa<UnreachableInst>(
+          DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
+    } else {
+      Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
+      CurMF->insert(BBI, Fallthrough);
+    }
+    UnhandledProbs -= I->Prob;
+
+    switch (I->Kind) {
+    case CC_BitTests: {
+      LLVM_DEBUG(dbgs() << "Switch to bit test optimization unimplemented");
+      return false; // Bit tests currently unimplemented.
+    }
+    case CC_JumpTable: {
+      if (!lowerJumpTableWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
+                                  UnhandledProbs, I, Fallthrough,
+                                  FallthroughUnreachable)) {
+        LLVM_DEBUG(dbgs() << "Failed to lower jump table");
+        return false;
+      }
+      break;
+    }
+    case CC_Range: {
+      if (!lowerSwitchRangeWorkItem(I, Cond, Fallthrough,
+                                    FallthroughUnreachable, UnhandledProbs,
+                                    CurMBB, MIB, SwitchMBB)) {
+        LLVM_DEBUG(dbgs() << "Failed to lower switch range");
+        return false;
+      }
+      break;
+    }
+    }
+    CurMBB = Fallthrough;
+  }
 
   return true;
 }
@@ -465,7 +835,7 @@ bool IRTranslator::translateIndirectBr(const User &U,
                                        MachineIRBuilder &MIRBuilder) {
   const IndirectBrInst &BrInst = cast<IndirectBrInst>(U);
 
-  const unsigned Tgt = getOrCreateVReg(*BrInst.getAddress());
+  const Register Tgt = getOrCreateVReg(*BrInst.getAddress());
   MIRBuilder.buildBrIndirect(Tgt);
 
   // Link successors.
@@ -476,6 +846,14 @@ bool IRTranslator::translateIndirectBr(const User &U,
   return true;
 }
 
+static bool isSwiftError(const Value *V) {
+  if (auto Arg = dyn_cast<Argument>(V))
+    return Arg->hasSwiftErrorAttr();
+  if (auto AI = dyn_cast<AllocaInst>(V))
+    return AI->isSwiftError();
+  return false;
+}
+
 bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
   const LoadInst &LI = cast<LoadInst>(U);
 
@@ -486,13 +864,25 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
   if (DL->getTypeStoreSize(LI.getType()) == 0)
     return true;
 
-  ArrayRef<unsigned> Regs = getOrCreateVRegs(LI);
+  ArrayRef<Register> Regs = getOrCreateVRegs(LI);
   ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI);
-  unsigned Base = getOrCreateVReg(*LI.getPointerOperand());
+  Register Base = getOrCreateVReg(*LI.getPointerOperand());
+
+  Type *OffsetIRTy = DL->getIntPtrType(LI.getPointerOperandType());
+  LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
+
+  if (CLI->supportSwiftError() && isSwiftError(LI.getPointerOperand())) {
+    assert(Regs.size() == 1 && "swifterror should be single pointer");
+    Register VReg = SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(),
+                                                    LI.getPointerOperand());
+    MIRBuilder.buildCopy(Regs[0], VReg);
+    return true;
+  }
+
 
   for (unsigned i = 0; i < Regs.size(); ++i) {
-    unsigned Addr = 0;
-    MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8);
+    Register Addr;
+    MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8);
 
     MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);
     unsigned BaseAlign = getMemOpAlignment(LI);
@@ -515,13 +905,25 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
   if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0)
     return true;
 
-  ArrayRef<unsigned> Vals = getOrCreateVRegs(*SI.getValueOperand());
+  ArrayRef<Register> Vals = getOrCreateVRegs(*SI.getValueOperand());
   ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand());
-  unsigned Base = getOrCreateVReg(*SI.getPointerOperand());
+  Register Base = getOrCreateVReg(*SI.getPointerOperand());
+
+  Type *OffsetIRTy = DL->getIntPtrType(SI.getPointerOperandType());
+  LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
+
+  if (CLI->supportSwiftError() && isSwiftError(SI.getPointerOperand())) {
+    assert(Vals.size() == 1 && "swifterror should be single pointer");
+
+    Register VReg = SwiftError.getOrCreateVRegDefAt(&SI, &MIRBuilder.getMBB(),
+                                                    SI.getPointerOperand());
+    MIRBuilder.buildCopy(VReg, Vals[0]);
+    return true;
+  }
 
   for (unsigned i = 0; i < Vals.size(); ++i) {
-    unsigned Addr = 0;
-    MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8);
+    Register Addr;
+    MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8);
 
     MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);
     unsigned BaseAlign = getMemOpAlignment(SI);
@@ -562,10 +964,9 @@ bool IRTranslator::translateExtractValue(const User &U,
                                          MachineIRBuilder &MIRBuilder) {
   const Value *Src = U.getOperand(0);
   uint64_t Offset = getOffsetFromIndices(U, *DL);
-  ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src);
+  ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src);
   ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*Src);
-  unsigned Idx = std::lower_bound(Offsets.begin(), Offsets.end(), Offset) -
-                 Offsets.begin();
+  unsigned Idx = llvm::lower_bound(Offsets, Offset) - Offsets.begin();
   auto &DstRegs = allocateVRegs(U);
 
   for (unsigned i = 0; i < DstRegs.size(); ++i)
@@ -580,8 +981,8 @@ bool IRTranslator::translateInsertValue(const User &U,
   uint64_t Offset = getOffsetFromIndices(U, *DL);
   auto &DstRegs = allocateVRegs(U);
   ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U);
-  ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src);
-  ArrayRef<unsigned> InsertedRegs = getOrCreateVRegs(*U.getOperand(1));
+  ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src);
+  ArrayRef<Register> InsertedRegs = getOrCreateVRegs(*U.getOperand(1));
   auto InsertedIt = InsertedRegs.begin();
 
   for (unsigned i = 0; i < DstRegs.size(); ++i) {
@@ -596,19 +997,19 @@ bool IRTranslator::translateInsertValue(const User &U,
 
 bool IRTranslator::translateSelect(const User &U,
                                    MachineIRBuilder &MIRBuilder) {
-  unsigned Tst = getOrCreateVReg(*U.getOperand(0));
-  ArrayRef<unsigned> ResRegs = getOrCreateVRegs(U);
-  ArrayRef<unsigned> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
-  ArrayRef<unsigned> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
+  Register Tst = getOrCreateVReg(*U.getOperand(0));
+  ArrayRef<Register> ResRegs = getOrCreateVRegs(U);
+  ArrayRef<Register> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
+  ArrayRef<Register> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
 
   const SelectInst &SI = cast<SelectInst>(U);
-  const CmpInst *Cmp = dyn_cast<CmpInst>(SI.getCondition());
+  uint16_t Flags = 0;
+  if (const CmpInst *Cmp = dyn_cast<CmpInst>(SI.getCondition()))
+    Flags = MachineInstr::copyFlagsFromInstruction(*Cmp);
+
   for (unsigned i = 0; i < ResRegs.size(); ++i) {
-    auto Select =
-        MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]);
-    if (Cmp && isa<FPMathOperator>(Cmp)) {
-      Select->copyIRFlags(*Cmp);
-    }
+    MIRBuilder.buildInstr(TargetOpcode::G_SELECT, {ResRegs[i]},
+                          {Tst, Op0Regs[i], Op1Regs[i]}, Flags);
   }
 
   return true;
@@ -619,7 +1020,7 @@ bool IRTranslator::translateBitCast(const User &U,
   // If we're bitcasting to the source type, we can reuse the source vreg.
   if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
       getLLTForType(*U.getType(), *DL)) {
-    unsigned SrcReg = getOrCreateVReg(*U.getOperand(0));
+    Register SrcReg = getOrCreateVReg(*U.getOperand(0));
     auto &Regs = *VMap.getVRegs(U);
     // If we already assigned a vreg for this bitcast, we can't change that.
     // Emit a copy to satisfy the users we already emitted.
@@ -636,9 +1037,9 @@ bool IRTranslator::translateBitCast(const User &U,
 
 bool IRTranslator::translateCast(unsigned Opcode, const User &U,
                                  MachineIRBuilder &MIRBuilder) {
-  unsigned Op = getOrCreateVReg(*U.getOperand(0));
-  unsigned Res = getOrCreateVReg(U);
-  MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op);
+  Register Op = getOrCreateVReg(*U.getOperand(0));
+  Register Res = getOrCreateVReg(U);
+  MIRBuilder.buildInstr(Opcode, {Res}, {Op});
   return true;
 }
 
@@ -649,7 +1050,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
     return false;
 
   Value &Op0 = *U.getOperand(0);
-  unsigned BaseReg = getOrCreateVReg(Op0);
+  Register BaseReg = getOrCreateVReg(Op0);
   Type *PtrIRTy = Op0.getType();
   LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
   Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy);
@@ -674,43 +1075,43 @@ bool IRTranslator::translateGetElementPtr(const User &U,
       }
 
       if (Offset != 0) {
-        unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
-        unsigned OffsetReg =
-            getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset));
-        MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg);
+        Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
+        LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
+        auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset);
+        MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetMIB.getReg(0));
 
         BaseReg = NewBaseReg;
         Offset = 0;
       }
 
-      unsigned IdxReg = getOrCreateVReg(*Idx);
+      Register IdxReg = getOrCreateVReg(*Idx);
       if (MRI->getType(IdxReg) != OffsetTy) {
-        unsigned NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy);
+        Register NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy);
         MIRBuilder.buildSExtOrTrunc(NewIdxReg, IdxReg);
         IdxReg = NewIdxReg;
       }
 
       // N = N + Idx * ElementSize;
       // Avoid doing it for ElementSize of 1.
-      unsigned GepOffsetReg;
+      Register GepOffsetReg;
       if (ElementSize != 1) {
-        unsigned ElementSizeReg =
-            getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize));
-
         GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
-        MIRBuilder.buildMul(GepOffsetReg, ElementSizeReg, IdxReg);
+        auto ElementSizeMIB = MIRBuilder.buildConstant(
+            getLLTForType(*OffsetIRTy, *DL), ElementSize);
+        MIRBuilder.buildMul(GepOffsetReg, ElementSizeMIB.getReg(0), IdxReg);
       } else
         GepOffsetReg = IdxReg;
 
-      unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
+      Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
       MIRBuilder.buildGEP(NewBaseReg, BaseReg, GepOffsetReg);
       BaseReg = NewBaseReg;
     }
   }
 
   if (Offset != 0) {
-    unsigned OffsetReg = getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset));
-    MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetReg);
+    auto OffsetMIB =
+        MIRBuilder.buildConstant(getLLTForType(*OffsetIRTy, *DL), Offset);
+    MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0));
     return true;
   }
 
@@ -721,6 +1122,19 @@ bool IRTranslator::translateGetElementPtr(const User &U,
 bool IRTranslator::translateMemfunc(const CallInst &CI,
                                     MachineIRBuilder &MIRBuilder,
                                     unsigned ID) {
+
+  // If the source is undef, then just emit a nop.
+  if (isa<UndefValue>(CI.getArgOperand(1))) {
+    switch (ID) {
+    case Intrinsic::memmove:
+    case Intrinsic::memcpy:
+    case Intrinsic::memset:
+      return true;
+    default:
+      break;
+    }
+  }
+
   LLT SizeTy = getLLTForType(*CI.getArgOperand(2)->getType(), *DL);
   Type *DstTy = CI.getArgOperand(0)->getType();
   if (cast<PointerType>(DstTy)->getAddressSpace() != 0 ||
@@ -752,10 +1166,10 @@ bool IRTranslator::translateMemfunc(const CallInst &CI,
 
   return CLI->lowerCall(MIRBuilder, CI.getCallingConv(),
                         MachineOperand::CreateES(Callee),
-                        CallLowering::ArgInfo(0, CI.getType()), Args);
+                        CallLowering::ArgInfo({0}, CI.getType()), Args);
 }
 
-void IRTranslator::getStackGuard(unsigned DstReg,
+void IRTranslator::getStackGuard(Register DstReg,
                                  MachineIRBuilder &MIRBuilder) {
   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
   MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF));
@@ -778,7 +1192,7 @@ void IRTranslator::getStackGuard(unsigned DstReg,
 
 bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
                                               MachineIRBuilder &MIRBuilder) {
-  ArrayRef<unsigned> ResRegs = getOrCreateVRegs(CI);
+  ArrayRef<Register> ResRegs = getOrCreateVRegs(CI);
   MIRBuilder.buildInstr(Op)
       .addDef(ResRegs[0])
       .addDef(ResRegs[1])
@@ -788,19 +1202,123 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
   return true;
 }
 
+unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
+  switch (ID) {
+    default:
+      break;
+    case Intrinsic::bswap:
+      return TargetOpcode::G_BSWAP;
+    case Intrinsic::ceil:
+      return TargetOpcode::G_FCEIL;
+    case Intrinsic::cos:
+      return TargetOpcode::G_FCOS;
+    case Intrinsic::ctpop:
+      return TargetOpcode::G_CTPOP;
+    case Intrinsic::exp:
+      return TargetOpcode::G_FEXP;
+    case Intrinsic::exp2:
+      return TargetOpcode::G_FEXP2;
+    case Intrinsic::fabs:
+      return TargetOpcode::G_FABS;
+    case Intrinsic::copysign:
+      return TargetOpcode::G_FCOPYSIGN;
+    case Intrinsic::minnum:
+      return TargetOpcode::G_FMINNUM;
+    case Intrinsic::maxnum:
+      return TargetOpcode::G_FMAXNUM;
+    case Intrinsic::minimum:
+      return TargetOpcode::G_FMINIMUM;
+    case Intrinsic::maximum:
+      return TargetOpcode::G_FMAXIMUM;
+    case Intrinsic::canonicalize:
+      return TargetOpcode::G_FCANONICALIZE;
+    case Intrinsic::floor:
+      return TargetOpcode::G_FFLOOR;
+    case Intrinsic::fma:
+      return TargetOpcode::G_FMA;
+    case Intrinsic::log:
+      return TargetOpcode::G_FLOG;
+    case Intrinsic::log2:
+      return TargetOpcode::G_FLOG2;
+    case Intrinsic::log10:
+      return TargetOpcode::G_FLOG10;
+    case Intrinsic::nearbyint:
+      return TargetOpcode::G_FNEARBYINT;
+    case Intrinsic::pow:
+      return TargetOpcode::G_FPOW;
+    case Intrinsic::rint:
+      return TargetOpcode::G_FRINT;
+    case Intrinsic::round:
+      return TargetOpcode::G_INTRINSIC_ROUND;
+    case Intrinsic::sin:
+      return TargetOpcode::G_FSIN;
+    case Intrinsic::sqrt:
+      return TargetOpcode::G_FSQRT;
+    case Intrinsic::trunc:
+      return TargetOpcode::G_INTRINSIC_TRUNC;
+  }
+  return Intrinsic::not_intrinsic;
+}
+
+bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI,
+                                            Intrinsic::ID ID,
+                                            MachineIRBuilder &MIRBuilder) {
+
+  unsigned Op = getSimpleIntrinsicOpcode(ID);
+
+  // Is this a simple intrinsic?
+  if (Op == Intrinsic::not_intrinsic)
+    return false;
+
+  // Yes. Let's translate it.
+  SmallVector<llvm::SrcOp, 4> VRegs;
+  for (auto &Arg : CI.arg_operands())
+    VRegs.push_back(getOrCreateVReg(*Arg));
+
+  MIRBuilder.buildInstr(Op, {getOrCreateVReg(CI)}, VRegs,
+                        MachineInstr::copyFlagsFromInstruction(CI));
+  return true;
+}
+
 bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
                                            MachineIRBuilder &MIRBuilder) {
+
+  // If this is a simple intrinsic (that is, we just need to add a def of
+  // a vreg, and uses for each arg operand, then translate it.
+  if (translateSimpleIntrinsic(CI, ID, MIRBuilder))
+    return true;
+
   switch (ID) {
   default:
     break;
   case Intrinsic::lifetime_start:
-  case Intrinsic::lifetime_end:
-    // Stack coloring is not enabled in O0 (which we care about now) so we can
-    // drop these. Make sure someone notices when we start compiling at higher
-    // opts though.
-    if (MF->getTarget().getOptLevel() != CodeGenOpt::None)
-      return false;
+  case Intrinsic::lifetime_end: {
+    // No stack colouring in O0, discard region information.
+    if (MF->getTarget().getOptLevel() == CodeGenOpt::None)
+      return true;
+
+    unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START
+                                                  : TargetOpcode::LIFETIME_END;
+
+    // Get the underlying objects for the location passed on the lifetime
+    // marker.
+    SmallVector<const Value *, 4> Allocas;
+    GetUnderlyingObjects(CI.getArgOperand(1), Allocas, *DL);
+
+    // Iterate over each underlying object, creating lifetime markers for each
+    // static alloca. Quit if we find a non-static alloca.
+    for (const Value *V : Allocas) {
+      const AllocaInst *AI = dyn_cast<AllocaInst>(V);
+      if (!AI)
+        continue;
+
+      if (!AI->isStaticAlloca())
+        return true;
+
+      MIRBuilder.buildInstr(Op).addFrameIndex(getOrCreateFrameIndex(*AI));
+    }
     return true;
+  }
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst &DI = cast<DbgDeclareInst>(CI);
     assert(DI.getVariable() && "Missing variable");
@@ -848,10 +1366,11 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
     Value *Ptr = CI.getArgOperand(0);
     unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8;
 
+    // FIXME: Get alignment
     MIRBuilder.buildInstr(TargetOpcode::G_VASTART)
         .addUse(getOrCreateVReg(*Ptr))
         .addMemOperand(MF->getMachineMemOperand(
-            MachinePointerInfo(Ptr), MachineMemOperand::MOStore, ListSize, 0));
+            MachinePointerInfo(Ptr), MachineMemOperand::MOStore, ListSize, 1));
     return true;
   }
   case Intrinsic::dbg_value: {
@@ -868,7 +1387,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
     } else if (const auto *CI = dyn_cast<Constant>(V)) {
       MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
     } else {
-      unsigned Reg = getOrCreateVReg(*V);
+      Register Reg = getOrCreateVReg(*V);
       // FIXME: This does not handle register-indirect values at offset 0. The
       // direct/indirect thing shouldn't really be handled by something as
       // implicit as reg+noreg vs reg+imm in the first palce, but it seems
@@ -889,94 +1408,25 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
     return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
   case Intrinsic::smul_with_overflow:
     return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
-  case Intrinsic::pow: {
-    auto Pow = MIRBuilder.buildInstr(TargetOpcode::G_FPOW)
-        .addDef(getOrCreateVReg(CI))
-        .addUse(getOrCreateVReg(*CI.getArgOperand(0)))
-        .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
-    Pow->copyIRFlags(CI);
-    return true;
-  }
-  case Intrinsic::exp: {
-    auto Exp = MIRBuilder.buildInstr(TargetOpcode::G_FEXP)
-        .addDef(getOrCreateVReg(CI))
-        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
-    Exp->copyIRFlags(CI);
-    return true;
-  }
-  case Intrinsic::exp2: {
-    auto Exp2 = MIRBuilder.buildInstr(TargetOpcode::G_FEXP2)
-        .addDef(getOrCreateVReg(CI))
-        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
-    Exp2->copyIRFlags(CI);
-    return true;
-  }
-  case Intrinsic::log: {
-    auto Log = MIRBuilder.buildInstr(TargetOpcode::G_FLOG)
-        .addDef(getOrCreateVReg(CI))
-        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
-    Log->copyIRFlags(CI);
-    return true;
-  }
-  case Intrinsic::log2: {
-    auto Log2 = MIRBuilder.buildInstr(TargetOpcode::G_FLOG2)
-        .addDef(getOrCreateVReg(CI))
-        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
-    Log2->copyIRFlags(CI);
-    return true;
-  }
-  case Intrinsic::log10: {
-    auto Log10 = MIRBuilder.buildInstr(TargetOpcode::G_FLOG10)
-        .addDef(getOrCreateVReg(CI))
-        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
-    Log10->copyIRFlags(CI);
-    return true;
-  }
-  case Intrinsic::fabs: {
-    auto Fabs = MIRBuilder.buildInstr(TargetOpcode::G_FABS)
-        .addDef(getOrCreateVReg(CI))
-        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
-    Fabs->copyIRFlags(CI);
-    return true;
-  }
-  case Intrinsic::trunc:
-    MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC)
-        .addDef(getOrCreateVReg(CI))
-        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
-    return true;
-  case Intrinsic::round:
-    MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND)
-        .addDef(getOrCreateVReg(CI))
-        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
-    return true;
-  case Intrinsic::fma: {
-    auto FMA = MIRBuilder.buildInstr(TargetOpcode::G_FMA)
-        .addDef(getOrCreateVReg(CI))
-        .addUse(getOrCreateVReg(*CI.getArgOperand(0)))
-        .addUse(getOrCreateVReg(*CI.getArgOperand(1)))
-        .addUse(getOrCreateVReg(*CI.getArgOperand(2)));
-    FMA->copyIRFlags(CI);
-    return true;
-  }
   case Intrinsic::fmuladd: {
     const TargetMachine &TM = MF->getTarget();
     const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
-    unsigned Dst = getOrCreateVReg(CI);
-    unsigned Op0 = getOrCreateVReg(*CI.getArgOperand(0));
-    unsigned Op1 = getOrCreateVReg(*CI.getArgOperand(1));
-    unsigned Op2 = getOrCreateVReg(*CI.getArgOperand(2));
+    Register Dst = getOrCreateVReg(CI);
+    Register Op0 = getOrCreateVReg(*CI.getArgOperand(0));
+    Register Op1 = getOrCreateVReg(*CI.getArgOperand(1));
+    Register Op2 = getOrCreateVReg(*CI.getArgOperand(2));
     if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
         TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) {
       // TODO: Revisit this to see if we should move this part of the
       // lowering to the combiner.
-      auto FMA =  MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2});
-      FMA->copyIRFlags(CI);
+      MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2},
+                            MachineInstr::copyFlagsFromInstruction(CI));
     } else {
       LLT Ty = getLLTForType(*CI.getType(), *DL);
-      auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, {Ty}, {Op0, Op1});
-      FMul->copyIRFlags(CI);
-      auto FAdd =  MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Dst}, {FMul, Op2});
-      FAdd->copyIRFlags(CI);
+      auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, {Ty}, {Op0, Op1},
+                                        MachineInstr::copyFlagsFromInstruction(CI));
+      MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Dst}, {FMul, Op2},
+                            MachineInstr::copyFlagsFromInstruction(CI));
     }
     return true;
   }
@@ -986,7 +1436,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
     return translateMemfunc(CI, MIRBuilder, ID);
   case Intrinsic::eh_typeid_for: {
     GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
-    unsigned Reg = getOrCreateVReg(CI);
+    Register Reg = getOrCreateVReg(CI);
     unsigned TypeID = MF->getTypeIDFor(GV);
     MIRBuilder.buildConstant(Reg, TypeID);
     return true;
@@ -1008,7 +1458,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
     return true;
   case Intrinsic::stackprotector: {
     LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
-    unsigned GuardVal = MRI->createGenericVirtualRegister(PtrTy);
+    Register GuardVal = MRI->createGenericVirtualRegister(PtrTy);
     getStackGuard(GuardVal, MIRBuilder);
 
     AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1));
@@ -1023,6 +1473,34 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
                                   PtrTy.getSizeInBits() / 8, 8));
     return true;
   }
+  case Intrinsic::stacksave: {
+    // Save the stack pointer to the location provided by the intrinsic.
+    Register Reg = getOrCreateVReg(CI);
+    Register StackPtr = MF->getSubtarget()
+                            .getTargetLowering()
+                            ->getStackPointerRegisterToSaveRestore();
+
+    // If the target doesn't specify a stack pointer, then fall back.
+    if (!StackPtr)
+      return false;
+
+    MIRBuilder.buildCopy(Reg, StackPtr);
+    return true;
+  }
+  case Intrinsic::stackrestore: {
+    // Restore the stack pointer from the location provided by the intrinsic.
+    Register Reg = getOrCreateVReg(*CI.getArgOperand(0));
+    Register StackPtr = MF->getSubtarget()
+                            .getTargetLowering()
+                            ->getStackPointerRegisterToSaveRestore();
+
+    // If the target doesn't specify a stack pointer, then fall back.
+    if (!StackPtr)
+      return false;
+
+    MIRBuilder.buildCopy(StackPtr, Reg);
+    return true;
+  }
   case Intrinsic::cttz:
   case Intrinsic::ctlz: {
     ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1));
@@ -1037,24 +1515,18 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
         .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
     return true;
   }
-  case Intrinsic::ctpop: {
-    MIRBuilder.buildInstr(TargetOpcode::G_CTPOP)
-        .addDef(getOrCreateVReg(CI))
-        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
-    return true;
-  }
   case Intrinsic::invariant_start: {
     LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
-    unsigned Undef = MRI->createGenericVirtualRegister(PtrTy);
+    Register Undef = MRI->createGenericVirtualRegister(PtrTy);
     MIRBuilder.buildUndef(Undef);
     return true;
   }
   case Intrinsic::invariant_end:
     return true;
-  case Intrinsic::ceil:
-    MIRBuilder.buildInstr(TargetOpcode::G_FCEIL)
-        .addDef(getOrCreateVReg(CI))
-        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+  case Intrinsic::assume:
+  case Intrinsic::var_annotation:
+  case Intrinsic::sideeffect:
+    // Discard annotate attributes, assumptions, and artificial side-effects.
     return true;
   }
   return false;
@@ -1079,34 +1551,6 @@ bool IRTranslator::translateInlineAsm(const CallInst &CI,
   return true;
 }
 
-unsigned IRTranslator::packRegs(const Value &V,
-                                  MachineIRBuilder &MIRBuilder) {
-  ArrayRef<unsigned> Regs = getOrCreateVRegs(V);
-  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V);
-  LLT BigTy = getLLTForType(*V.getType(), *DL);
-
-  if (Regs.size() == 1)
-    return Regs[0];
-
-  unsigned Dst = MRI->createGenericVirtualRegister(BigTy);
-  MIRBuilder.buildUndef(Dst);
-  for (unsigned i = 0; i < Regs.size(); ++i) {
-    unsigned NewDst = MRI->createGenericVirtualRegister(BigTy);
-    MIRBuilder.buildInsert(NewDst, Dst, Regs[i], Offsets[i]);
-    Dst = NewDst;
-  }
-  return Dst;
-}
-
-void IRTranslator::unpackRegs(const Value &V, unsigned Src,
-                                MachineIRBuilder &MIRBuilder) {
-  ArrayRef<unsigned> Regs = getOrCreateVRegs(V);
-  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V);
-
-  for (unsigned i = 0; i < Regs.size(); ++i)
-    MIRBuilder.buildExtract(Regs[i], Src, Offsets[i]);
-}
-
 bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
   const CallInst &CI = cast<CallInst>(U);
   auto TII = MF->getTarget().getIntrinsicInfo();
@@ -1126,23 +1570,32 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
       ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
   }
 
-  bool IsSplitType = valueIsSplit(CI);
   if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) {
-    unsigned Res = IsSplitType ? MRI->createGenericVirtualRegister(
-                                     getLLTForType(*CI.getType(), *DL))
-                               : getOrCreateVReg(CI);
-
-    SmallVector<unsigned, 8> Args;
-    for (auto &Arg: CI.arg_operands())
-      Args.push_back(packRegs(*Arg, MIRBuilder));
+    ArrayRef<Register> Res = getOrCreateVRegs(CI);
+
+    SmallVector<ArrayRef<Register>, 8> Args;
+    Register SwiftInVReg = 0;
+    Register SwiftErrorVReg = 0;
+    for (auto &Arg: CI.arg_operands()) {
+      if (CLI->supportSwiftError() && isSwiftError(Arg)) {
+        assert(SwiftInVReg == 0 && "Expected only one swift error argument");
+        LLT Ty = getLLTForType(*Arg->getType(), *DL);
+        SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
+        MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt(
+                                              &CI, &MIRBuilder.getMBB(), Arg));
+        Args.emplace_back(makeArrayRef(SwiftInVReg));
+        SwiftErrorVReg =
+            SwiftError.getOrCreateVRegDefAt(&CI, &MIRBuilder.getMBB(), Arg);
+        continue;
+      }
+      Args.push_back(getOrCreateVRegs(*Arg));
+    }
 
     MF->getFrameInfo().setHasCalls(true);
-    bool Success = CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() {
-      return getOrCreateVReg(*CI.getCalledValue());
-    });
+    bool Success =
+        CLI->lowerCall(MIRBuilder, &CI, Res, Args, SwiftErrorVReg,
+                       [&]() { return getOrCreateVReg(*CI.getCalledValue()); });
 
-    if (IsSplitType)
-      unpackRegs(CI, Res, MIRBuilder);
     return Success;
   }
 
@@ -1151,35 +1604,39 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
   if (translateKnownIntrinsic(CI, ID, MIRBuilder))
     return true;
 
-  unsigned Res = 0;
-  if (!CI.getType()->isVoidTy()) {
-    if (IsSplitType)
-      Res =
-          MRI->createGenericVirtualRegister(getLLTForType(*CI.getType(), *DL));
-    else
-      Res = getOrCreateVReg(CI);
-  }
+  ArrayRef<Register> ResultRegs;
+  if (!CI.getType()->isVoidTy())
+    ResultRegs = getOrCreateVRegs(CI);
+
+  // Ignore the callsite attributes. Backend code is most likely not expecting
+  // an intrinsic to sometimes have side effects and sometimes not.
   MachineInstrBuilder MIB =
-      MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory());
+      MIRBuilder.buildIntrinsic(ID, ResultRegs, !F->doesNotAccessMemory());
+  if (isa<FPMathOperator>(CI))
+    MIB->copyIRFlags(CI);
 
   for (auto &Arg : CI.arg_operands()) {
     // Some intrinsics take metadata parameters. Reject them.
     if (isa<MetadataAsValue>(Arg))
       return false;
-    MIB.addUse(packRegs(*Arg, MIRBuilder));
+    ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg);
+    if (VRegs.size() > 1)
+      return false;
+    MIB.addUse(VRegs[0]);
   }
 
-  if (IsSplitType)
-    unpackRegs(CI, Res, MIRBuilder);
-
   // Add a MachineMemOperand if it is a target mem intrinsic.
   const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
   TargetLowering::IntrinsicInfo Info;
   // TODO: Add a GlobalISel version of getTgtMemIntrinsic.
   if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) {
+    unsigned Align = Info.align;
+    if (Align == 0)
+      Align = DL->getABITypeAlignment(Info.memVT.getTypeForEVT(F->getContext()));
+
     uint64_t Size = Info.memVT.getStoreSize();
     MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal),
-                                               Info.flags, Size, Info.align));
+                                               Info.flags, Size, Align));
   }
 
   return true;
@@ -1215,18 +1672,32 @@ bool IRTranslator::translateInvoke(const User &U,
   MCSymbol *BeginSymbol = Context.createTempSymbol();
   MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
 
-  unsigned Res =
-        MRI->createGenericVirtualRegister(getLLTForType(*I.getType(), *DL));
-  SmallVector<unsigned, 8> Args;
-  for (auto &Arg: I.arg_operands())
-    Args.push_back(packRegs(*Arg, MIRBuilder));
+  ArrayRef<Register> Res;
+  if (!I.getType()->isVoidTy())
+    Res = getOrCreateVRegs(I);
+  SmallVector<ArrayRef<Register>, 8> Args;
+  Register SwiftErrorVReg = 0;
+  Register SwiftInVReg = 0;
+  for (auto &Arg : I.arg_operands()) {
+    if (CLI->supportSwiftError() && isSwiftError(Arg)) {
+      assert(SwiftInVReg == 0 && "Expected only one swift error argument");
+      LLT Ty = getLLTForType(*Arg->getType(), *DL);
+      SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
+      MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt(
+                                            &I, &MIRBuilder.getMBB(), Arg));
+      Args.push_back(makeArrayRef(SwiftInVReg));
+      SwiftErrorVReg =
+          SwiftError.getOrCreateVRegDefAt(&I, &MIRBuilder.getMBB(), Arg);
+      continue;
+    }
+
+    Args.push_back(getOrCreateVRegs(*Arg));
+  }
 
-  if (!CLI->lowerCall(MIRBuilder, &I, Res, Args,
+  if (!CLI->lowerCall(MIRBuilder, &I, Res, Args, SwiftErrorVReg,
                       [&]() { return getOrCreateVReg(*I.getCalledValue()); }))
     return false;
 
-  unpackRegs(I, Res, MIRBuilder);
-
   MCSymbol *EndSymbol = Context.createTempSymbol();
   MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
 
@@ -1241,6 +1712,12 @@ bool IRTranslator::translateInvoke(const User &U,
   return true;
 }
 
+bool IRTranslator::translateCallBr(const User &U,
+                                   MachineIRBuilder &MIRBuilder) {
+  // FIXME: Implement this.
+  return false;
+}
+
 bool IRTranslator::translateLandingPad(const User &U,
                                        MachineIRBuilder &MIRBuilder) {
   const LandingPadInst &LP = cast<LandingPadInst>(U);
@@ -1270,7 +1747,7 @@ bool IRTranslator::translateLandingPad(const User &U,
     .addSym(MF->addLandingPad(&MBB));
 
   LLT Ty = getLLTForType(*LP.getType(), *DL);
-  unsigned Undef = MRI->createGenericVirtualRegister(Ty);
+  Register Undef = MRI->createGenericVirtualRegister(Ty);
   MIRBuilder.buildUndef(Undef);
 
   SmallVector<LLT, 2> Tys;
@@ -1279,20 +1756,20 @@ bool IRTranslator::translateLandingPad(const User &U,
   assert(Tys.size() == 2 && "Only two-valued landingpads are supported");
 
   // Mark exception register as live in.
-  unsigned ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn);
+  Register ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn);
   if (!ExceptionReg)
     return false;
 
   MBB.addLiveIn(ExceptionReg);
-  ArrayRef<unsigned> ResRegs = getOrCreateVRegs(LP);
+  ArrayRef<Register> ResRegs = getOrCreateVRegs(LP);
   MIRBuilder.buildCopy(ResRegs[0], ExceptionReg);
 
-  unsigned SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);
+  Register SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);
   if (!SelectorReg)
     return false;
 
   MBB.addLiveIn(SelectorReg);
-  unsigned PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);
+  Register PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);
   MIRBuilder.buildCopy(PtrVReg, SelectorReg);
   MIRBuilder.buildCast(ResRegs[1], PtrVReg);
 
@@ -1304,10 +1781,10 @@ bool IRTranslator::translateAlloca(const User &U,
   auto &AI = cast<AllocaInst>(U);
 
   if (AI.isSwiftError())
-    return false;
+    return true;
 
   if (AI.isStaticAlloca()) {
-    unsigned Res = getOrCreateVReg(AI);
+    Register Res = getOrCreateVReg(AI);
     int FI = getOrCreateFrameIndex(AI);
     MIRBuilder.buildFrameIndex(Res, FI);
     return true;
@@ -1322,29 +1799,29 @@ bool IRTranslator::translateAlloca(const User &U,
   unsigned Align =
       std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI.getAlignment());
 
-  unsigned NumElts = getOrCreateVReg(*AI.getArraySize());
+  Register NumElts = getOrCreateVReg(*AI.getArraySize());
 
   Type *IntPtrIRTy = DL->getIntPtrType(AI.getType());
   LLT IntPtrTy = getLLTForType(*IntPtrIRTy, *DL);
   if (MRI->getType(NumElts) != IntPtrTy) {
-    unsigned ExtElts = MRI->createGenericVirtualRegister(IntPtrTy);
+    Register ExtElts = MRI->createGenericVirtualRegister(IntPtrTy);
     MIRBuilder.buildZExtOrTrunc(ExtElts, NumElts);
     NumElts = ExtElts;
   }
 
-  unsigned AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
-  unsigned TySize =
+  Register AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
+  Register TySize =
       getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, -DL->getTypeAllocSize(Ty)));
   MIRBuilder.buildMul(AllocSize, NumElts, TySize);
 
   LLT PtrTy = getLLTForType(*AI.getType(), *DL);
   auto &TLI = *MF->getSubtarget().getTargetLowering();
-  unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+  Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
 
-  unsigned SPTmp = MRI->createGenericVirtualRegister(PtrTy);
+  Register SPTmp = MRI->createGenericVirtualRegister(PtrTy);
   MIRBuilder.buildCopy(SPTmp, SPReg);
 
-  unsigned AllocTmp = MRI->createGenericVirtualRegister(PtrTy);
+  Register AllocTmp = MRI->createGenericVirtualRegister(PtrTy);
   MIRBuilder.buildGEP(AllocTmp, SPTmp, AllocSize);
 
   // Handle alignment. We have to realign if the allocation granule was smaller
@@ -1357,7 +1834,7 @@ bool IRTranslator::translateAlloca(const User &U,
     // Round the size of the allocation up to the stack alignment size
     // by add SA-1 to the size. This doesn't overflow because we're computing
     // an address inside an alloca.
-    unsigned AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy);
+    Register AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy);
     MIRBuilder.buildPtrMask(AlignedAlloc, AllocTmp, Log2_32(Align));
     AllocTmp = AlignedAlloc;
   }
@@ -1387,7 +1864,7 @@ bool IRTranslator::translateInsertElement(const User &U,
   // If it is a <1 x Ty> vector, use the scalar as it is
   // not a legal vector type in LLT.
   if (U.getType()->getVectorNumElements() == 1) {
-    unsigned Elt = getOrCreateVReg(*U.getOperand(1));
+    Register Elt = getOrCreateVReg(*U.getOperand(1));
     auto &Regs = *VMap.getVRegs(U);
     if (Regs.empty()) {
       Regs.push_back(Elt);
@@ -1398,10 +1875,10 @@ bool IRTranslator::translateInsertElement(const User &U,
     return true;
   }
 
-  unsigned Res = getOrCreateVReg(U);
-  unsigned Val = getOrCreateVReg(*U.getOperand(0));
-  unsigned Elt = getOrCreateVReg(*U.getOperand(1));
-  unsigned Idx = getOrCreateVReg(*U.getOperand(2));
+  Register Res = getOrCreateVReg(U);
+  Register Val = getOrCreateVReg(*U.getOperand(0));
+  Register Elt = getOrCreateVReg(*U.getOperand(1));
+  Register Idx = getOrCreateVReg(*U.getOperand(2));
   MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx);
   return true;
 }
@@ -1411,7 +1888,7 @@ bool IRTranslator::translateExtractElement(const User &U,
   // If it is a <1 x Ty> vector, use the scalar as it is
   // not a legal vector type in LLT.
   if (U.getOperand(0)->getType()->getVectorNumElements() == 1) {
-    unsigned Elt = getOrCreateVReg(*U.getOperand(0));
+    Register Elt = getOrCreateVReg(*U.getOperand(0));
     auto &Regs = *VMap.getVRegs(U);
     if (Regs.empty()) {
       Regs.push_back(Elt);
@@ -1421,11 +1898,11 @@ bool IRTranslator::translateExtractElement(const User &U,
     }
     return true;
   }
-  unsigned Res = getOrCreateVReg(U);
-  unsigned Val = getOrCreateVReg(*U.getOperand(0));
+  Register Res = getOrCreateVReg(U);
+  Register Val = getOrCreateVReg(*U.getOperand(0));
   const auto &TLI = *MF->getSubtarget().getTargetLowering();
   unsigned PreferredVecIdxWidth = TLI.getVectorIdxTy(*DL).getSizeInBits();
-  unsigned Idx = 0;
+  Register Idx;
   if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) {
     if (CI->getBitWidth() != PreferredVecIdxWidth) {
       APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth);
@@ -1481,11 +1958,11 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U,
   Type *ValType = ResType->Type::getStructElementType(0);
 
   auto Res = getOrCreateVRegs(I);
-  unsigned OldValRes = Res[0];
-  unsigned SuccessRes = Res[1];
-  unsigned Addr = getOrCreateVReg(*I.getPointerOperand());
-  unsigned Cmp = getOrCreateVReg(*I.getCompareOperand());
-  unsigned NewVal = getOrCreateVReg(*I.getNewValOperand());
+  Register OldValRes = Res[0];
+  Register SuccessRes = Res[1];
+  Register Addr = getOrCreateVReg(*I.getPointerOperand());
+  Register Cmp = getOrCreateVReg(*I.getCompareOperand());
+  Register NewVal = getOrCreateVReg(*I.getNewValOperand());
 
   MIRBuilder.buildAtomicCmpXchgWithSuccess(
       OldValRes, SuccessRes, Addr, Cmp, NewVal,
@@ -1507,9 +1984,9 @@ bool IRTranslator::translateAtomicRMW(const User &U,
 
   Type *ResType = I.getType();
 
-  unsigned Res = getOrCreateVReg(I);
-  unsigned Addr = getOrCreateVReg(*I.getPointerOperand());
-  unsigned Val = getOrCreateVReg(*I.getValOperand());
+  Register Res = getOrCreateVReg(I);
+  Register Addr = getOrCreateVReg(*I.getPointerOperand());
+  Register Val = getOrCreateVReg(*I.getValOperand());
 
   unsigned Opcode = 0;
   switch (I.getOperation()) {
@@ -1560,6 +2037,14 @@ bool IRTranslator::translateAtomicRMW(const User &U,
   return true;
 }
 
+bool IRTranslator::translateFence(const User &U,
+                                  MachineIRBuilder &MIRBuilder) {
+  const FenceInst &Fence = cast<FenceInst>(U);
+  MIRBuilder.buildFence(static_cast<unsigned>(Fence.getOrdering()),
+                        Fence.getSyncScopeID());
+  return true;
+}
+
 void IRTranslator::finishPendingPhis() {
 #ifndef NDEBUG
   DILocationVerifier Verifier;
@@ -1569,27 +2054,20 @@ void IRTranslator::finishPendingPhis() {
   for (auto &Phi : PendingPHIs) {
     const PHINode *PI = Phi.first;
     ArrayRef<MachineInstr *> ComponentPHIs = Phi.second;
+    MachineBasicBlock *PhiMBB = ComponentPHIs[0]->getParent();
     EntryBuilder->setDebugLoc(PI->getDebugLoc());
 #ifndef NDEBUG
     Verifier.setCurrentInst(PI);
 #endif // ifndef NDEBUG
 
-    // All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator
-    // won't create extra control flow here, otherwise we need to find the
-    // dominating predecessor here (or perhaps force the weirder IRTranslators
-    // to provide a simple boundary).
-    SmallSet<const BasicBlock *, 4> HandledPreds;
-
+    SmallSet<const MachineBasicBlock *, 16> SeenPreds;
     for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) {
       auto IRPred = PI->getIncomingBlock(i);
-      if (HandledPreds.count(IRPred))
-        continue;
-
-      HandledPreds.insert(IRPred);
-      ArrayRef<unsigned> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i));
+      ArrayRef<Register> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i));
       for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) {
-        assert(Pred->isSuccessor(ComponentPHIs[0]->getParent()) &&
-               "incorrect CFG at MachineBasicBlock level");
+        if (SeenPreds.count(Pred) || !PhiMBB->isPredecessor(Pred))
+          continue;
+        SeenPreds.insert(Pred);
         for (unsigned j = 0; j < ValRegs.size(); ++j) {
           MachineInstrBuilder MIB(*MF, ComponentPHIs[j]);
           MIB.addUse(ValRegs[j]);
@@ -1611,8 +2089,15 @@ bool IRTranslator::valueIsSplit(const Value &V,
 
 bool IRTranslator::translate(const Instruction &Inst) {
   CurBuilder->setDebugLoc(Inst.getDebugLoc());
-  EntryBuilder->setDebugLoc(Inst.getDebugLoc());
-  switch(Inst.getOpcode()) {
+  // We only emit constants into the entry block from here. To prevent jumpy
+  // debug behaviour set the line to 0.
+  if (const DebugLoc &DL = Inst.getDebugLoc())
+    EntryBuilder->setDebugLoc(
+        DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt()));
+  else
+    EntryBuilder->setDebugLoc(DebugLoc());
+
+  switch (Inst.getOpcode()) {
 #define HANDLE_INST(NUM, OPCODE, CLASS)                                        \
   case Instruction::OPCODE:                                                    \
     return translate##OPCODE(Inst, *CurBuilder.get());
@@ -1622,7 +2107,7 @@ bool IRTranslator::translate(const Instruction &Inst) {
   }
 }
 
-bool IRTranslator::translate(const Constant &C, unsigned Reg) {
+bool IRTranslator::translate(const Constant &C, Register Reg) {
   if (auto CI = dyn_cast<ConstantInt>(&C))
     EntryBuilder->buildConstant(Reg, *CI);
   else if (auto CF = dyn_cast<ConstantFP>(&C))
@@ -1635,7 +2120,7 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
     unsigned NullSize = DL->getTypeSizeInBits(C.getType());
     auto *ZeroTy = Type::getIntNTy(C.getContext(), NullSize);
     auto *ZeroVal = ConstantInt::get(ZeroTy, 0);
-    unsigned ZeroReg = getOrCreateVReg(*ZeroVal);
+    Register ZeroReg = getOrCreateVReg(*ZeroVal);
     EntryBuilder->buildCast(Reg, ZeroReg);
   } else if (auto GV = dyn_cast<GlobalValue>(&C))
     EntryBuilder->buildGlobalValue(Reg, GV);
@@ -1645,7 +2130,7 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
     // Return the scalar if it is a <1 x Ty> vector.
     if (CAZ->getNumElements() == 1)
       return translate(*CAZ->getElementValue(0u), Reg);
-    SmallVector<unsigned, 4> Ops;
+    SmallVector<Register, 4> Ops;
     for (unsigned i = 0; i < CAZ->getNumElements(); ++i) {
       Constant &Elt = *CAZ->getElementValue(i);
       Ops.push_back(getOrCreateVReg(Elt));
@@ -1655,7 +2140,7 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
     // Return the scalar if it is a <1 x Ty> vector.
     if (CV->getNumElements() == 1)
       return translate(*CV->getElementAsConstant(0), Reg);
-    SmallVector<unsigned, 4> Ops;
+    SmallVector<Register, 4> Ops;
     for (unsigned i = 0; i < CV->getNumElements(); ++i) {
       Constant &Elt = *CV->getElementAsConstant(i);
       Ops.push_back(getOrCreateVReg(Elt));
@@ -1673,7 +2158,7 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
   } else if (auto CV = dyn_cast<ConstantVector>(&C)) {
     if (CV->getNumOperands() == 1)
       return translate(*CV->getOperand(0), Reg);
-    SmallVector<unsigned, 4> Ops;
+    SmallVector<Register, 4> Ops;
     for (unsigned i = 0; i < CV->getNumOperands(); ++i) {
       Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));
     }
@@ -1686,6 +2171,17 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
   return true;
 }
 
+void IRTranslator::finalizeBasicBlock() {
+  for (auto &JTCase : SL->JTCases) {
+    // Emit header first, if it wasn't already emitted.
+    if (!JTCase.first.Emitted)
+      emitJumpTableHeader(JTCase.second, JTCase.first, JTCase.first.HeaderBB);
+
+    emitJumpTable(JTCase.second, JTCase.second.MBB);
+  }
+  SL->JTCases.clear();
+}
+
 void IRTranslator::finalizeFunction() {
   // Release the memory used by the different maps we
   // needed during the translation.
@@ -1698,6 +2194,7 @@ void IRTranslator::finalizeFunction() {
   // destroying it twice (in ~IRTranslator() and ~LLVMContext())
   EntryBuilder.reset();
   CurBuilder.reset();
+  FuncInfo.clear();
 }
 
 bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
@@ -1710,13 +2207,13 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
   // Set the CSEConfig and run the analysis.
   GISelCSEInfo *CSEInfo = nullptr;
   TPC = &getAnalysis<TargetPassConfig>();
-  bool IsO0 = TPC->getOptLevel() == CodeGenOpt::Level::None;
-  // Disable CSE for O0.
-  bool EnableCSE = !IsO0 && EnableCSEInIRTranslator;
+  bool EnableCSE = EnableCSEInIRTranslator.getNumOccurrences()
+                       ? EnableCSEInIRTranslator
+                       : TPC->isGISelCSEEnabled();
+
   if (EnableCSE) {
     EntryBuilder = make_unique<CSEMIRBuilder>(CurMF);
-    std::unique_ptr<CSEConfig> Config = make_unique<CSEConfig>();
-    CSEInfo = &Wrapper.get(std::move(Config));
+    CSEInfo = &Wrapper.get(TPC->getCSEConfig());
     EntryBuilder->setCSEInfo(CSEInfo);
     CurBuilder = make_unique<CSEMIRBuilder>(CurMF);
     CurBuilder->setCSEInfo(CSEInfo);
@@ -1730,6 +2227,14 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
   MRI = &MF->getRegInfo();
   DL = &F.getParent()->getDataLayout();
   ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F);
+  FuncInfo.MF = MF;
+  FuncInfo.BPI = nullptr;
+  const auto &TLI = *MF->getSubtarget().getTargetLowering();
+  const TargetMachine &TM = MF->getTarget();
+  SL = make_unique<GISelSwitchLowering>(this, FuncInfo);
+  SL->init(TLI, TM, *DL);
+
+  EnableOpts = TM.getOptLevel() != CodeGenOpt::None && !skipFunction(F);
 
   assert(PendingPHIs.empty() && "stale PHIs");
 
@@ -1749,6 +2254,10 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
   MF->push_back(EntryBB);
   EntryBuilder->setMBB(*EntryBB);
 
+  DebugLoc DbgLoc = F.getEntryBlock().getFirstNonPHI()->getDebugLoc();
+  SwiftError.setFunction(CurMF);
+  SwiftError.createEntriesInEntryBlock(DbgLoc);
+
   // Create all blocks, in IR order, to preserve the layout.
   for (const BasicBlock &BB: F) {
     auto *&MBB = BBToMBB[&BB];
@@ -1764,20 +2273,25 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
   EntryBB->addSuccessor(&getMBB(F.front()));
 
   // Lower the actual args into this basic block.
-  SmallVector<unsigned, 8> VRegArgs;
+  SmallVector<ArrayRef<Register>, 8> VRegArgs;
   for (const Argument &Arg: F.args()) {
     if (DL->getTypeStoreSize(Arg.getType()) == 0)
       continue; // Don't handle zero sized types.
-    VRegArgs.push_back(
-        MRI->createGenericVirtualRegister(getLLTForType(*Arg.getType(), *DL)));
+    ArrayRef<Register> VRegs = getOrCreateVRegs(Arg);
+    VRegArgs.push_back(VRegs);
+
+    if (Arg.hasSwiftErrorAttr()) {
+      assert(VRegs.size() == 1 && "Too many vregs for Swift error");
+      SwiftError.setCurrentVReg(EntryBB, SwiftError.getFunctionArg(), VRegs[0]);
+    }
   }
 
   // We don't currently support translating swifterror or swiftself functions.
   for (auto &Arg : F.args()) {
-    if (Arg.hasSwiftErrorAttr() || Arg.hasSwiftSelfAttr()) {
+    if (Arg.hasSwiftSelfAttr()) {
       OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
                                  F.getSubprogram(), &F.getEntryBlock());
-      R << "unable to lower arguments due to swifterror/swiftself: "
+      R << "unable to lower arguments due to swiftself: "
         << ore::NV("Prototype", F.getType());
       reportTranslationError(*MF, *TPC, *ORE, R);
       return false;
@@ -1792,20 +2306,6 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
     return false;
   }
 
-  auto ArgIt = F.arg_begin();
-  for (auto &VArg : VRegArgs) {
-    // If the argument is an unsplit scalar then don't use unpackRegs to avoid
-    // creating redundant copies.
-    if (!valueIsSplit(*ArgIt, VMap.getOffsets(*ArgIt))) {
-      auto &VRegs = *VMap.getVRegs(cast<Value>(*ArgIt));
-      assert(VRegs.empty() && "VRegs already populated?");
-      VRegs.push_back(VArg);
-    } else {
-      unpackRegs(*ArgIt, VArg, *EntryBuilder.get());
-    }
-    ArgIt++;
-  }
-
   // Need to visit defs before uses when translating instructions.
   GISelObserverWrapper WrapperObserver;
   if (EnableCSE && CSEInfo)
@@ -1845,6 +2345,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
         reportTranslationError(*MF, *TPC, *ORE, R);
         return false;
       }
+
+      finalizeBasicBlock();
     }
 #ifndef NDEBUG
     WrapperObserver.removeObserver(&Verifier);
@@ -1853,6 +2355,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
 
   finishPendingPhis();
 
+  SwiftError.propagateVRegs();
+
   // Merge the argument lowering and constants block with its single
   // successor, the LLVM-IR entry block.  We want the basic block to
   // be maximal.
diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index c83c791327e4..70694fe6b6c8 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/GlobalISel/InstructionSelect.cpp - InstructionSelect ---==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -50,9 +49,7 @@ INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE,
                     "Select target instructions out of generic instructions",
                     false, false)
 
-InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) {
-  initializeInstructionSelectPass(*PassRegistry::getPassRegistry());
-}
+InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) { }
 
 void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<TargetPassConfig>();
@@ -90,10 +87,10 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
                          "instruction is not legal", *MI);
       return false;
     }
-#endif
   // FIXME: We could introduce new blocks and will need to fix the outer loop.
   // Until then, keep track of the number of blocks to assert that we don't.
   const size_t NumBlocks = MF.size();
+#endif
 
   for (MachineBasicBlock *MBB : post_order(&MF)) {
     if (MBB->empty())
@@ -145,8 +142,6 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
     }
   }
 
-  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
-
   for (MachineBasicBlock &MBB : MF) {
     if (MBB.empty())
       continue;
@@ -178,6 +173,8 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
     }
   }
 
+#ifndef NDEBUG
+  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
   // Now that selection is complete, there are no more generic vregs.  Verify
   // that the size of the now-constrained vreg is unchanged and that it has a
   // register class.
@@ -216,7 +213,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
     reportGISelFailure(MF, TPC, MORE, R);
     return false;
   }
-
+#endif
   auto &TLI = *MF.getSubtarget().getTargetLowering();
   TLI.finalizeLowering(MF);
 
diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 38913e4afcba..2ad35b3a72c9 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -42,16 +41,16 @@ bool InstructionSelector::constrainOperandRegToRegClass(
   MachineFunction &MF = *MBB.getParent();
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
-  return
-      constrainRegToClass(MRI, TII, RBI, I, I.getOperand(OpIdx).getReg(), RC);
+  return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, RC,
+                                  I.getOperand(OpIdx), OpIdx);
 }
 
 bool InstructionSelector::isOperandImmEqual(
     const MachineOperand &MO, int64_t Value,
     const MachineRegisterInfo &MRI) const {
   if (MO.isReg() && MO.getReg())
-    if (auto VRegVal = getConstantVRegVal(MO.getReg(), MRI))
-      return *VRegVal == Value;
+    if (auto VRegVal = getConstantVRegValWithLookThrough(MO.getReg(), MRI))
+      return VRegVal->Value == Value;
   return false;
 }
 
@@ -79,6 +78,6 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI,
       std::next(MI.getIterator()) == IntoMI.getIterator())
     return true;
 
-  return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() &&
-         empty(MI.implicit_operands());
+  return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
+         !MI.hasUnmodeledSideEffects() && empty(MI.implicit_operands());
 }
diff --git a/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index 94eab9ae00c8..601d50e9806f 100644
--- a/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -1,9 +1,8 @@
 //===- lib/CodeGen/GlobalISel/LegalizerPredicates.cpp - Predicates --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -39,15 +38,19 @@ LegalityPredicate LegalityPredicates::typePairInSet(
   };
 }
 
-LegalityPredicate LegalityPredicates::typePairAndMemSizeInSet(
+LegalityPredicate LegalityPredicates::typePairAndMemDescInSet(
     unsigned TypeIdx0, unsigned TypeIdx1, unsigned MMOIdx,
-    std::initializer_list<TypePairAndMemSize> TypesAndMemSizeInit) {
-  SmallVector<TypePairAndMemSize, 4> TypesAndMemSize = TypesAndMemSizeInit;
+    std::initializer_list<TypePairAndMemDesc> TypesAndMemDescInit) {
+  SmallVector<TypePairAndMemDesc, 4> TypesAndMemDesc = TypesAndMemDescInit;
   return [=](const LegalityQuery &Query) {
-    TypePairAndMemSize Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1],
-                                Query.MMODescrs[MMOIdx].SizeInBits};
-    return std::find(TypesAndMemSize.begin(), TypesAndMemSize.end(), Match) !=
-           TypesAndMemSize.end();
+    TypePairAndMemDesc Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1],
+                                Query.MMODescrs[MMOIdx].SizeInBits,
+                                Query.MMODescrs[MMOIdx].AlignInBits};
+    return std::find_if(
+      TypesAndMemDesc.begin(), TypesAndMemDesc.end(),
+      [=](const TypePairAndMemDesc &Entry) ->bool {
+        return Match.isCompatible(Entry);
+      }) != TypesAndMemDesc.end();
   };
 }
 
@@ -57,10 +60,30 @@ LegalityPredicate LegalityPredicates::isScalar(unsigned TypeIdx) {
   };
 }
 
+LegalityPredicate LegalityPredicates::isVector(unsigned TypeIdx) {
+  return [=](const LegalityQuery &Query) {
+    return Query.Types[TypeIdx].isVector();
+  };
+}
+
+LegalityPredicate LegalityPredicates::isPointer(unsigned TypeIdx) {
+  return [=](const LegalityQuery &Query) {
+    return Query.Types[TypeIdx].isPointer();
+  };
+}
+
+LegalityPredicate LegalityPredicates::isPointer(unsigned TypeIdx,
+                                                unsigned AddrSpace) {
+  return [=](const LegalityQuery &Query) {
+    LLT Ty = Query.Types[TypeIdx];
+    return Ty.isPointer() && Ty.getAddressSpace() == AddrSpace;
+  };
+}
+
 LegalityPredicate LegalityPredicates::narrowerThan(unsigned TypeIdx,
                                                    unsigned Size) {
   return [=](const LegalityQuery &Query) {
-    const LLT &QueryTy = Query.Types[TypeIdx];
+    const LLT QueryTy = Query.Types[TypeIdx];
     return QueryTy.isScalar() && QueryTy.getSizeInBits() < Size;
   };
 }
@@ -68,18 +91,49 @@ LegalityPredicate LegalityPredicates::narrowerThan(unsigned TypeIdx,
 LegalityPredicate LegalityPredicates::widerThan(unsigned TypeIdx,
                                                 unsigned Size) {
   return [=](const LegalityQuery &Query) {
-    const LLT &QueryTy = Query.Types[TypeIdx];
+    const LLT QueryTy = Query.Types[TypeIdx];
     return QueryTy.isScalar() && QueryTy.getSizeInBits() > Size;
   };
 }
 
+LegalityPredicate LegalityPredicates::scalarOrEltNarrowerThan(unsigned TypeIdx,
+                                                              unsigned Size) {
+  return [=](const LegalityQuery &Query) {
+    const LLT QueryTy = Query.Types[TypeIdx];
+    return QueryTy.getScalarSizeInBits() < Size;
+  };
+}
+
+LegalityPredicate LegalityPredicates::scalarOrEltWiderThan(unsigned TypeIdx,
+                                                           unsigned Size) {
+  return [=](const LegalityQuery &Query) {
+    const LLT QueryTy = Query.Types[TypeIdx];
+    return QueryTy.getScalarSizeInBits() > Size;
+  };
+}
+
+LegalityPredicate LegalityPredicates::scalarOrEltSizeNotPow2(unsigned TypeIdx) {
+  return [=](const LegalityQuery &Query) {
+    const LLT QueryTy = Query.Types[TypeIdx];
+    return !isPowerOf2_32(QueryTy.getScalarSizeInBits());
+  };
+}
+
 LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) {
   return [=](const LegalityQuery &Query) {
-    const LLT &QueryTy = Query.Types[TypeIdx];
+    const LLT QueryTy = Query.Types[TypeIdx];
     return QueryTy.isScalar() && !isPowerOf2_32(QueryTy.getSizeInBits());
   };
 }
 
+LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0,
+                                               unsigned TypeIdx1) {
+  return [=](const LegalityQuery &Query) {
+    return Query.Types[TypeIdx0].getSizeInBits() ==
+           Query.Types[TypeIdx1].getSizeInBits();
+  };
+}
+
 LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
   return [=](const LegalityQuery &Query) {
     return !isPowerOf2_32(Query.MMODescrs[MMOIdx].SizeInBits / 8);
@@ -88,8 +142,8 @@ LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
 
 LegalityPredicate LegalityPredicates::numElementsNotPow2(unsigned TypeIdx) {
   return [=](const LegalityQuery &Query) {
-    const LLT &QueryTy = Query.Types[TypeIdx];
-    return QueryTy.isVector() && isPowerOf2_32(QueryTy.getNumElements());
+    const LLT QueryTy = Query.Types[TypeIdx];
+    return QueryTy.isVector() && !isPowerOf2_32(QueryTy.getNumElements());
   };
 }
 
diff --git a/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
index a29b32ecdc03..fcbecf90a845 100644
--- a/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
@@ -1,9 +1,8 @@
 //===- lib/CodeGen/GlobalISel/LegalizerMutations.cpp - Mutations ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -27,25 +26,46 @@ LegalizeMutation LegalizeMutations::changeTo(unsigned TypeIdx,
   };
 }
 
-LegalizeMutation LegalizeMutations::widenScalarToNextPow2(unsigned TypeIdx,
-                                                          unsigned Min) {
+LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx,
+                                                    unsigned FromTypeIdx) {
   return [=](const LegalityQuery &Query) {
-    unsigned NewSizeInBits =
-        1 << Log2_32_Ceil(Query.Types[TypeIdx].getSizeInBits());
-    if (NewSizeInBits < Min)
-      NewSizeInBits = Min;
-    return std::make_pair(TypeIdx, LLT::scalar(NewSizeInBits));
+    const LLT OldTy = Query.Types[TypeIdx];
+    const LLT NewTy = Query.Types[FromTypeIdx];
+    return std::make_pair(TypeIdx, OldTy.changeElementType(NewTy));
+  };
+}
+
+LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx,
+                                                    LLT NewEltTy) {
+  return [=](const LegalityQuery &Query) {
+    const LLT OldTy = Query.Types[TypeIdx];
+    return std::make_pair(TypeIdx, OldTy.changeElementType(NewEltTy));
+  };
+}
+
+LegalizeMutation LegalizeMutations::widenScalarOrEltToNextPow2(unsigned TypeIdx,
+                                                               unsigned Min) {
+  return [=](const LegalityQuery &Query) {
+    const LLT Ty = Query.Types[TypeIdx];
+    unsigned NewEltSizeInBits =
+        std::max(1u << Log2_32_Ceil(Ty.getScalarSizeInBits()), Min);
+    return std::make_pair(TypeIdx, Ty.changeElementSize(NewEltSizeInBits));
   };
 }
 
 LegalizeMutation LegalizeMutations::moreElementsToNextPow2(unsigned TypeIdx,
                                                            unsigned Min) {
   return [=](const LegalityQuery &Query) {
-    const LLT &VecTy = Query.Types[TypeIdx];
-    unsigned NewNumElements = 1 << Log2_32_Ceil(VecTy.getNumElements());
-    if (NewNumElements < Min)
-      NewNumElements = Min;
-    return std::make_pair(
-        TypeIdx, LLT::vector(NewNumElements, VecTy.getScalarSizeInBits()));
+    const LLT VecTy = Query.Types[TypeIdx];
+    unsigned NewNumElements =
+        std::max(1u << Log2_32_Ceil(VecTy.getNumElements()), Min);
+    return std::make_pair(TypeIdx,
+                          LLT::vector(NewNumElements, VecTy.getElementType()));
+  };
+}
+
+LegalizeMutation LegalizeMutations::scalarize(unsigned TypeIdx) {
+  return [=](const LegalityQuery &Query) {
+    return std::make_pair(TypeIdx, Query.Types[TypeIdx].getElementType());
   };
 }
diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp
index 84131e59948c..b5b26bff34bb 100644
--- a/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/GlobalISel/Legalizer.cpp -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -28,6 +27,7 @@
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
 
 #include <iterator>
 
@@ -50,9 +50,7 @@ INITIALIZE_PASS_END(Legalizer, DEBUG_TYPE,
                     "Legalize the Machine IR a function's Machine IR", false,
                     false)
 
-Legalizer::Legalizer() : MachineFunctionPass(ID) {
-  initializeLegalizerPass(*PassRegistry::getPassRegistry());
-}
+Legalizer::Legalizer() : MachineFunctionPass(ID) { }
 
 void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<TargetPassConfig>();
@@ -77,6 +75,7 @@ static bool isArtifact(const MachineInstr &MI) {
   case TargetOpcode::G_UNMERGE_VALUES:
   case TargetOpcode::G_CONCAT_VECTORS:
   case TargetOpcode::G_BUILD_VECTOR:
+  case TargetOpcode::G_EXTRACT:
     return true;
   }
 }
@@ -87,12 +86,15 @@ namespace {
 class LegalizerWorkListManager : public GISelChangeObserver {
   InstListTy &InstList;
   ArtifactListTy &ArtifactList;
+#ifndef NDEBUG
+  SmallVector<MachineInstr *, 4> NewMIs;
+#endif
 
 public:
   LegalizerWorkListManager(InstListTy &Insts, ArtifactListTy &Arts)
       : InstList(Insts), ArtifactList(Arts) {}
 
-  void createdInstr(MachineInstr &MI) override {
+  void createdOrChangedInstr(MachineInstr &MI) {
     // Only legalize pre-isel generic instructions.
     // Legalization process could generate Target specific pseudo
     // instructions with generic types. Don't record them
@@ -102,7 +104,20 @@ public:
       else
         InstList.insert(&MI);
     }
+  }
+
+  void createdInstr(MachineInstr &MI) override {
     LLVM_DEBUG(dbgs() << ".. .. New MI: " << MI);
+    LLVM_DEBUG(NewMIs.push_back(&MI));
+    createdOrChangedInstr(MI);
+  }
+
+  void printNewInstrs() {
+    LLVM_DEBUG({
+      for (const auto *MI : NewMIs)
+        dbgs() << ".. .. New MI: " << *MI;
+      NewMIs.clear();
+    });
   }
 
   void erasingInstr(MachineInstr &MI) override {
@@ -119,7 +134,7 @@ public:
     // When insts change, we want to revisit them to legalize them again.
     // We'll consider them the same as created.
     LLVM_DEBUG(dbgs() << ".. .. Changed MI: " << MI);
-    createdInstr(MI);
+    createdOrChangedInstr(MI);
   }
 };
 } // namespace
@@ -155,20 +170,22 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
       if (!isPreISelGenericOpcode(MI.getOpcode()))
         continue;
       if (isArtifact(MI))
-        ArtifactList.insert(&MI);
+        ArtifactList.deferred_insert(&MI);
       else
-        InstList.insert(&MI);
+        InstList.deferred_insert(&MI);
     }
   }
+  ArtifactList.finalize();
+  InstList.finalize();
   std::unique_ptr<MachineIRBuilder> MIRBuilder;
   GISelCSEInfo *CSEInfo = nullptr;
-  bool IsO0 = TPC.getOptLevel() == CodeGenOpt::Level::None;
-  // Disable CSE for O0.
-  bool EnableCSE = !IsO0 && EnableCSEInLegalizer;
+  bool EnableCSE = EnableCSEInLegalizer.getNumOccurrences()
+                       ? EnableCSEInLegalizer
+                       : TPC.isGISelCSEEnabled();
+
   if (EnableCSE) {
     MIRBuilder = make_unique<CSEMIRBuilder>();
-    std::unique_ptr<CSEConfig> Config = make_unique<CSEConfig>();
-    CSEInfo = &Wrapper.get(std::move(Config));
+    CSEInfo = &Wrapper.get(TPC.getCSEConfig());
     MIRBuilder->setCSEInfo(CSEInfo);
   } else
     MIRBuilder = make_unique<MachineIRBuilder>();
@@ -210,6 +227,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
                            "unable to legalize instruction", MI);
         return false;
       }
+      WorkListObserver.printNewInstrs();
       Changed |= Res == LegalizerHelper::Legalized;
     }
     while (!ArtifactList.empty()) {
@@ -222,7 +240,9 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
         continue;
       }
       SmallVector<MachineInstr *, 4> DeadInstructions;
-      if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions)) {
+      if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions,
+                                            WrapperObserver)) {
+        WorkListObserver.printNewInstrs();
         for (auto *DeadMI : DeadInstructions) {
           LLVM_DEBUG(dbgs() << *DeadMI << "Is dead\n");
           RemoveDeadInstFromLists(DeadMI);
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index b3fc94cdec60..f5cf7fc9bd9b 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -30,6 +29,39 @@
 using namespace llvm;
 using namespace LegalizeActions;
 
+/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
+///
+/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
+/// with any leftover piece as type \p LeftoverTy
+///
+/// Returns -1 in the first element of the pair if the breakdown is not
+/// satisfiable.
+static std::pair<int, int>
+getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
+  assert(!LeftoverTy.isValid() && "this is an out argument");
+
+  unsigned Size = OrigTy.getSizeInBits();
+  unsigned NarrowSize = NarrowTy.getSizeInBits();
+  unsigned NumParts = Size / NarrowSize;
+  unsigned LeftoverSize = Size - NumParts * NarrowSize;
+  assert(Size > NarrowSize);
+
+  if (LeftoverSize == 0)
+    return {NumParts, 0};
+
+  if (NarrowTy.isVector()) {
+    unsigned EltSize = OrigTy.getScalarSizeInBits();
+    if (LeftoverSize % EltSize != 0)
+      return {-1, -1};
+    LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
+  } else {
+    LeftoverTy = LLT::scalar(LeftoverSize);
+  }
+
+  int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
+  return std::make_pair(NumParts, NumLeftover);
+}
+
 LegalizerHelper::LegalizerHelper(MachineFunction &MF,
                                  GISelChangeObserver &Observer,
                                  MachineIRBuilder &Builder)
@@ -50,6 +82,10 @@ LegalizerHelper::LegalizeResult
 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
   LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
 
+  if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
+      MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
+    return LI.legalizeIntrinsic(MI, MRI, MIRBuilder) ? Legalized
+                                                     : UnableToLegalize;
   auto Step = LI.getAction(MI, MRI);
   switch (Step.Action) {
   case Legal:
@@ -70,6 +106,9 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
   case FewerElements:
     LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
     return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
+  case MoreElements:
+    LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
+    return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
   case Custom:
     LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
     return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
@@ -80,13 +119,103 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
   }
 }
 
-void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
-                                   SmallVectorImpl<unsigned> &VRegs) {
+void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
+                                   SmallVectorImpl<Register> &VRegs) {
   for (int i = 0; i < NumParts; ++i)
     VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
   MIRBuilder.buildUnmerge(VRegs, Reg);
 }
 
+bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
+                                   LLT MainTy, LLT &LeftoverTy,
+                                   SmallVectorImpl<Register> &VRegs,
+                                   SmallVectorImpl<Register> &LeftoverRegs) {
+  assert(!LeftoverTy.isValid() && "this is an out argument");
+
+  unsigned RegSize = RegTy.getSizeInBits();
+  unsigned MainSize = MainTy.getSizeInBits();
+  unsigned NumParts = RegSize / MainSize;
+  unsigned LeftoverSize = RegSize - NumParts * MainSize;
+
+  // Use an unmerge when possible.
+  if (LeftoverSize == 0) {
+    for (unsigned I = 0; I < NumParts; ++I)
+      VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
+    MIRBuilder.buildUnmerge(VRegs, Reg);
+    return true;
+  }
+
+  if (MainTy.isVector()) {
+    unsigned EltSize = MainTy.getScalarSizeInBits();
+    if (LeftoverSize % EltSize != 0)
+      return false;
+    LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
+  } else {
+    LeftoverTy = LLT::scalar(LeftoverSize);
+  }
+
+  // For irregular sizes, extract the individual parts.
+  for (unsigned I = 0; I != NumParts; ++I) {
+    Register NewReg = MRI.createGenericVirtualRegister(MainTy);
+    VRegs.push_back(NewReg);
+    MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
+  }
+
+  for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
+       Offset += LeftoverSize) {
+    Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
+    LeftoverRegs.push_back(NewReg);
+    MIRBuilder.buildExtract(NewReg, Reg, Offset);
+  }
+
+  return true;
+}
+
+void LegalizerHelper::insertParts(Register DstReg,
+                                  LLT ResultTy, LLT PartTy,
+                                  ArrayRef<Register> PartRegs,
+                                  LLT LeftoverTy,
+                                  ArrayRef<Register> LeftoverRegs) {
+  if (!LeftoverTy.isValid()) {
+    assert(LeftoverRegs.empty());
+
+    if (!ResultTy.isVector()) {
+      MIRBuilder.buildMerge(DstReg, PartRegs);
+      return;
+    }
+
+    if (PartTy.isVector())
+      MIRBuilder.buildConcatVectors(DstReg, PartRegs);
+    else
+      MIRBuilder.buildBuildVector(DstReg, PartRegs);
+    return;
+  }
+
+  unsigned PartSize = PartTy.getSizeInBits();
+  unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
+
+  Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
+  MIRBuilder.buildUndef(CurResultReg);
+
+  unsigned Offset = 0;
+  for (Register PartReg : PartRegs) {
+    Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
+    MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
+    CurResultReg = NewResultReg;
+    Offset += PartSize;
+  }
+
+  for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
+    // Use the original output register for the final insert to avoid a copy.
+    Register NewResultReg = (I + 1 == E) ?
+      DstReg : MRI.createGenericVirtualRegister(ResultTy);
+
+    MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
+    CurResultReg = NewResultReg;
+    Offset += LeftoverPartSize;
+  }
+}
+
 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
   switch (Opcode) {
   case TargetOpcode::G_SDIV:
@@ -116,6 +245,12 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
   case TargetOpcode::G_FDIV:
     assert((Size == 32 || Size == 64) && "Unsupported size");
     return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
+  case TargetOpcode::G_FEXP:
+    assert((Size == 32 || Size == 64) && "Unsupported size");
+    return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32;
+  case TargetOpcode::G_FEXP2:
+    assert((Size == 32 || Size == 64) && "Unsupported size");
+    return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32;
   case TargetOpcode::G_FREM:
     return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
   case TargetOpcode::G_FPOW:
@@ -123,6 +258,32 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
   case TargetOpcode::G_FMA:
     assert((Size == 32 || Size == 64) && "Unsupported size");
     return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
+  case TargetOpcode::G_FSIN:
+    assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+    return Size == 128 ? RTLIB::SIN_F128
+                       : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32;
+  case TargetOpcode::G_FCOS:
+    assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+    return Size == 128 ? RTLIB::COS_F128
+                       : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32;
+  case TargetOpcode::G_FLOG10:
+    assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+    return Size == 128 ? RTLIB::LOG10_F128
+                       : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32;
+  case TargetOpcode::G_FLOG:
+    assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+    return Size == 128 ? RTLIB::LOG_F128
+                       : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32;
+  case TargetOpcode::G_FLOG2:
+    assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+    return Size == 128 ? RTLIB::LOG2_F128
+                       : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32;
+  case TargetOpcode::G_FCEIL:
+    assert((Size == 32 || Size == 64) && "Unsupported size");
+    return Size == 64 ? RTLIB::CEIL_F64 : RTLIB::CEIL_F32;
+  case TargetOpcode::G_FFLOOR:
+    assert((Size == 32 || Size == 64) && "Unsupported size");
+    return Size == 64 ? RTLIB::FLOOR_F64 : RTLIB::FLOOR_F32;
   }
   llvm_unreachable("Unknown libcall function");
 }
@@ -214,7 +375,20 @@ LegalizerHelper::libcall(MachineInstr &MI) {
   case TargetOpcode::G_FDIV:
   case TargetOpcode::G_FMA:
   case TargetOpcode::G_FPOW:
-  case TargetOpcode::G_FREM: {
+  case TargetOpcode::G_FREM:
+  case TargetOpcode::G_FCOS:
+  case TargetOpcode::G_FSIN:
+  case TargetOpcode::G_FLOG10:
+  case TargetOpcode::G_FLOG:
+  case TargetOpcode::G_FLOG2:
+  case TargetOpcode::G_FEXP:
+  case TargetOpcode::G_FEXP2:
+  case TargetOpcode::G_FCEIL:
+  case TargetOpcode::G_FFLOOR: {
+    if (Size > 64) {
+      LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n");
+      return UnableToLegalize;
+    }
     Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
     if (Status != Legalized)
@@ -250,10 +424,11 @@ LegalizerHelper::libcall(MachineInstr &MI) {
     // FIXME: Support other types
     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
-    if (ToSize != 32 || (FromSize != 32 && FromSize != 64))
+    if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
       return UnableToLegalize;
     LegalizeResult Status = conversionLibcall(
-        MI, MIRBuilder, Type::getInt32Ty(Ctx),
+        MI, MIRBuilder,
+        ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
         FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
     if (Status != Legalized)
       return Status;
@@ -264,12 +439,12 @@ LegalizerHelper::libcall(MachineInstr &MI) {
     // FIXME: Support other types
     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
-    if (FromSize != 32 || (ToSize != 32 && ToSize != 64))
+    if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
       return UnableToLegalize;
     LegalizeResult Status = conversionLibcall(
         MI, MIRBuilder,
         ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
-        Type::getInt32Ty(Ctx));
+        FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
     if (Status != Legalized)
       return Status;
     break;
@@ -283,10 +458,6 @@ LegalizerHelper::libcall(MachineInstr &MI) {
 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
                                                               unsigned TypeIdx,
                                                               LLT NarrowTy) {
-  // FIXME: Don't know how to handle secondary types yet.
-  if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT)
-    return UnableToLegalize;
-
   MIRBuilder.setInstr(MI);
 
   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
@@ -302,12 +473,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
       return UnableToLegalize;
     int NumParts = SizeOp0 / NarrowSize;
 
-    SmallVector<unsigned, 2> DstRegs;
+    SmallVector<Register, 2> DstRegs;
     for (int i = 0; i < NumParts; ++i)
       DstRegs.push_back(
           MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
 
-    unsigned DstReg = MI.getOperand(0).getReg();
+    Register DstReg = MI.getOperand(0).getReg();
     if(MRI.getType(DstReg).isVector())
       MIRBuilder.buildBuildVector(DstReg, DstRegs);
     else
@@ -315,6 +486,38 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     MI.eraseFromParent();
     return Legalized;
   }
+  case TargetOpcode::G_CONSTANT: {
+    LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+    const APInt &Val = MI.getOperand(1).getCImm()->getValue();
+    unsigned TotalSize = Ty.getSizeInBits();
+    unsigned NarrowSize = NarrowTy.getSizeInBits();
+    int NumParts = TotalSize / NarrowSize;
+
+    SmallVector<Register, 4> PartRegs;
+    for (int I = 0; I != NumParts; ++I) {
+      unsigned Offset = I * NarrowSize;
+      auto K = MIRBuilder.buildConstant(NarrowTy,
+                                        Val.lshr(Offset).trunc(NarrowSize));
+      PartRegs.push_back(K.getReg(0));
+    }
+
+    LLT LeftoverTy;
+    unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
+    SmallVector<Register, 1> LeftoverRegs;
+    if (LeftoverBits != 0) {
+      LeftoverTy = LLT::scalar(LeftoverBits);
+      auto K = MIRBuilder.buildConstant(
+        LeftoverTy,
+        Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
+      LeftoverRegs.push_back(K.getReg(0));
+    }
+
+    insertParts(MI.getOperand(0).getReg(),
+                Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
+
+    MI.eraseFromParent();
+    return Legalized;
+  }
   case TargetOpcode::G_ADD: {
     // FIXME: add support for when SizeOp0 isn't an exact multiple of
     // NarrowSize.
@@ -323,16 +526,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     // Expand in terms of carry-setting/consuming G_ADDE instructions.
     int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
 
-    SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
+    SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
 
-    unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
+    Register CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
     MIRBuilder.buildConstant(CarryIn, 0);
 
     for (int i = 0; i < NumParts; ++i) {
-      unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
-      unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
+      Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+      Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
 
       MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
                             Src2Regs[i], CarryIn);
@@ -340,67 +543,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
       DstRegs.push_back(DstReg);
       CarryIn = CarryOut;
     }
-    unsigned DstReg = MI.getOperand(0).getReg();
-    if(MRI.getType(DstReg).isVector())
-      MIRBuilder.buildBuildVector(DstReg, DstRegs);
-    else
-      MIRBuilder.buildMerge(DstReg, DstRegs);
-    MI.eraseFromParent();
-    return Legalized;
-  }
-  case TargetOpcode::G_EXTRACT: {
-    if (TypeIdx != 1)
-      return UnableToLegalize;
-
-    int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
-    // FIXME: add support for when SizeOp1 isn't an exact multiple of
-    // NarrowSize.
-    if (SizeOp1 % NarrowSize != 0)
-      return UnableToLegalize;
-    int NumParts = SizeOp1 / NarrowSize;
-
-    SmallVector<unsigned, 2> SrcRegs, DstRegs;
-    SmallVector<uint64_t, 2> Indexes;
-    extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
-
-    unsigned OpReg = MI.getOperand(0).getReg();
-    uint64_t OpStart = MI.getOperand(2).getImm();
-    uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
-    for (int i = 0; i < NumParts; ++i) {
-      unsigned SrcStart = i * NarrowSize;
-
-      if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
-        // No part of the extract uses this subregister, ignore it.
-        continue;
-      } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
-        // The entire subregister is extracted, forward the value.
-        DstRegs.push_back(SrcRegs[i]);
-        continue;
-      }
-
-      // OpSegStart is where this destination segment would start in OpReg if it
-      // extended infinitely in both directions.
-      int64_t ExtractOffset;
-      uint64_t SegSize;
-      if (OpStart < SrcStart) {
-        ExtractOffset = 0;
-        SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
-      } else {
-        ExtractOffset = OpStart - SrcStart;
-        SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
-      }
-
-      unsigned SegReg = SrcRegs[i];
-      if (ExtractOffset != 0 || SegSize != NarrowSize) {
-        // A genuine extract is needed.
-        SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
-        MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
-      }
-
-      DstRegs.push_back(SegReg);
-    }
-
-    unsigned DstReg = MI.getOperand(0).getReg();
+    Register DstReg = MI.getOperand(0).getReg();
     if(MRI.getType(DstReg).isVector())
       MIRBuilder.buildBuildVector(DstReg, DstRegs);
     else
@@ -408,178 +551,117 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     MI.eraseFromParent();
     return Legalized;
   }
-  case TargetOpcode::G_INSERT: {
+  case TargetOpcode::G_SUB: {
     // FIXME: add support for when SizeOp0 isn't an exact multiple of
     // NarrowSize.
     if (SizeOp0 % NarrowSize != 0)
       return UnableToLegalize;
 
-    int NumParts = SizeOp0 / NarrowSize;
-
-    SmallVector<unsigned, 2> SrcRegs, DstRegs;
-    SmallVector<uint64_t, 2> Indexes;
-    extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+    int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
 
-    unsigned OpReg = MI.getOperand(2).getReg();
-    uint64_t OpStart = MI.getOperand(3).getImm();
-    uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
-    for (int i = 0; i < NumParts; ++i) {
-      unsigned DstStart = i * NarrowSize;
-
-      if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
-        // No part of the insert affects this subregister, forward the original.
-        DstRegs.push_back(SrcRegs[i]);
-        continue;
-      } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
-        // The entire subregister is defined by this insert, forward the new
-        // value.
-        DstRegs.push_back(OpReg);
-        continue;
-      }
+    SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
+    extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
+    extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
 
-      // OpSegStart is where this destination segment would start in OpReg if it
-      // extended infinitely in both directions.
-      int64_t ExtractOffset, InsertOffset;
-      uint64_t SegSize;
-      if (OpStart < DstStart) {
-        InsertOffset = 0;
-        ExtractOffset = DstStart - OpStart;
-        SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
-      } else {
-        InsertOffset = OpStart - DstStart;
-        ExtractOffset = 0;
-        SegSize =
-            std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
-      }
+    Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+    Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
+    MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
+                          {Src1Regs[0], Src2Regs[0]});
+    DstRegs.push_back(DstReg);
+    Register BorrowIn = BorrowOut;
+    for (int i = 1; i < NumParts; ++i) {
+      DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+      BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
 
-      unsigned SegReg = OpReg;
-      if (ExtractOffset != 0 || SegSize != OpSize) {
-        // A genuine extract is needed.
-        SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
-        MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
-      }
+      MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
+                            {Src1Regs[i], Src2Regs[i], BorrowIn});
 
-      unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
-      MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
       DstRegs.push_back(DstReg);
+      BorrowIn = BorrowOut;
     }
-
-    assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
-    unsigned DstReg = MI.getOperand(0).getReg();
-    if(MRI.getType(DstReg).isVector())
-      MIRBuilder.buildBuildVector(DstReg, DstRegs);
-    else
-      MIRBuilder.buildMerge(DstReg, DstRegs);
+    MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
     MI.eraseFromParent();
     return Legalized;
   }
+  case TargetOpcode::G_MUL:
+  case TargetOpcode::G_UMULH:
+    return narrowScalarMul(MI, NarrowTy);
+  case TargetOpcode::G_EXTRACT:
+    return narrowScalarExtract(MI, TypeIdx, NarrowTy);
+  case TargetOpcode::G_INSERT:
+    return narrowScalarInsert(MI, TypeIdx, NarrowTy);
   case TargetOpcode::G_LOAD: {
-    // FIXME: add support for when SizeOp0 isn't an exact multiple of
-    // NarrowSize.
-    if (SizeOp0 % NarrowSize != 0)
-      return UnableToLegalize;
-
     const auto &MMO = **MI.memoperands_begin();
-    // This implementation doesn't work for atomics. Give up instead of doing
-    // something invalid.
-    if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
-        MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
+    Register DstReg = MI.getOperand(0).getReg();
+    LLT DstTy = MRI.getType(DstReg);
+    if (DstTy.isVector())
       return UnableToLegalize;
 
-    int NumParts = SizeOp0 / NarrowSize;
-    LLT OffsetTy = LLT::scalar(
-        MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
-
-    SmallVector<unsigned, 2> DstRegs;
-    for (int i = 0; i < NumParts; ++i) {
-      unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
-      unsigned SrcReg = 0;
-      unsigned Adjustment = i * NarrowSize / 8;
-      unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
-
-      MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
-          MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
-          NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
-          MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
-
-      MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy,
-                                Adjustment);
+    if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
+      Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
+      auto &MMO = **MI.memoperands_begin();
+      MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO);
+      MIRBuilder.buildAnyExt(DstReg, TmpReg);
+      MI.eraseFromParent();
+      return Legalized;
+    }
 
-      MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO);
+    return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
+  }
+  case TargetOpcode::G_ZEXTLOAD:
+  case TargetOpcode::G_SEXTLOAD: {
+    bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
+    Register DstReg = MI.getOperand(0).getReg();
+    Register PtrReg = MI.getOperand(1).getReg();
 
-      DstRegs.push_back(DstReg);
+    Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
+    auto &MMO = **MI.memoperands_begin();
+    if (MMO.getSizeInBits() == NarrowSize) {
+      MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
+    } else {
+      unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD
+        : TargetOpcode::G_SEXTLOAD;
+      MIRBuilder.buildInstr(ExtLoad)
+        .addDef(TmpReg)
+        .addUse(PtrReg)
+        .addMemOperand(&MMO);
     }
-    unsigned DstReg = MI.getOperand(0).getReg();
-    if(MRI.getType(DstReg).isVector())
-      MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+    if (ZExt)
+      MIRBuilder.buildZExt(DstReg, TmpReg);
     else
-      MIRBuilder.buildMerge(DstReg, DstRegs);
+      MIRBuilder.buildSExt(DstReg, TmpReg);
+
     MI.eraseFromParent();
     return Legalized;
   }
   case TargetOpcode::G_STORE: {
-    // FIXME: add support for when SizeOp0 isn't an exact multiple of
-    // NarrowSize.
-    if (SizeOp0 % NarrowSize != 0)
-      return UnableToLegalize;
-
     const auto &MMO = **MI.memoperands_begin();
-    // This implementation doesn't work for atomics. Give up instead of doing
-    // something invalid.
-    if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
-        MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
+
+    Register SrcReg = MI.getOperand(0).getReg();
+    LLT SrcTy = MRI.getType(SrcReg);
+    if (SrcTy.isVector())
       return UnableToLegalize;
 
     int NumParts = SizeOp0 / NarrowSize;
-    LLT OffsetTy = LLT::scalar(
-        MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
-
-    SmallVector<unsigned, 2> SrcRegs;
-    extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs);
-
-    for (int i = 0; i < NumParts; ++i) {
-      unsigned DstReg = 0;
-      unsigned Adjustment = i * NarrowSize / 8;
-      unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
-
-      MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
-          MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
-          NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
-          MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
-
-      MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy,
-                                Adjustment);
-
-      MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO);
-    }
-    MI.eraseFromParent();
-    return Legalized;
-  }
-  case TargetOpcode::G_CONSTANT: {
-    // FIXME: add support for when SizeOp0 isn't an exact multiple of
-    // NarrowSize.
-    if (SizeOp0 % NarrowSize != 0)
+    unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
+    unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
+    if (SrcTy.isVector() && LeftoverBits != 0)
       return UnableToLegalize;
-    int NumParts = SizeOp0 / NarrowSize;
-    const APInt &Cst = MI.getOperand(1).getCImm()->getValue();
-    LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
 
-    SmallVector<unsigned, 2> DstRegs;
-    for (int i = 0; i < NumParts; ++i) {
-      unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
-      ConstantInt *CI =
-          ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize));
-      MIRBuilder.buildConstant(DstReg, *CI);
-      DstRegs.push_back(DstReg);
+    if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
+      Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
+      auto &MMO = **MI.memoperands_begin();
+      MIRBuilder.buildTrunc(TmpReg, SrcReg);
+      MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO);
+      MI.eraseFromParent();
+      return Legalized;
     }
-    unsigned DstReg = MI.getOperand(0).getReg();
-    if(MRI.getType(DstReg).isVector())
-      MIRBuilder.buildBuildVector(DstReg, DstRegs);
-    else
-      MIRBuilder.buildMerge(DstReg, DstRegs);
-    MI.eraseFromParent();
-    return Legalized;
+
+    return reduceLoadStoreWidth(MI, 0, NarrowTy);
   }
+  case TargetOpcode::G_SELECT:
+    return narrowScalarSelect(MI, TypeIdx, NarrowTy);
   case TargetOpcode::G_AND:
   case TargetOpcode::G_OR:
   case TargetOpcode::G_XOR: {
@@ -592,44 +674,112 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     // ...
     // AN = BinOp<Ty/N> BN, CN
     // A = G_MERGE_VALUES A1, ..., AN
+    return narrowScalarBasic(MI, TypeIdx, NarrowTy);
+  }
+  case TargetOpcode::G_SHL:
+  case TargetOpcode::G_LSHR:
+  case TargetOpcode::G_ASHR:
+    return narrowScalarShift(MI, TypeIdx, NarrowTy);
+  case TargetOpcode::G_CTLZ:
+  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
+  case TargetOpcode::G_CTTZ:
+  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
+  case TargetOpcode::G_CTPOP:
+    if (TypeIdx != 0)
+      return UnableToLegalize; // TODO
 
-    // FIXME: add support for when SizeOp0 isn't an exact multiple of
-    // NarrowSize.
-    if (SizeOp0 % NarrowSize != 0)
+    Observer.changingInstr(MI);
+    narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
+    Observer.changedInstr(MI);
+    return Legalized;
+  case TargetOpcode::G_INTTOPTR:
+    if (TypeIdx != 1)
       return UnableToLegalize;
-    int NumParts = SizeOp0 / NarrowSize;
 
-    // List the registers where the destination will be scattered.
-    SmallVector<unsigned, 2> DstRegs;
-    // List the registers where the first argument will be split.
-    SmallVector<unsigned, 2> SrcsReg1;
-    // List the registers where the second argument will be split.
-    SmallVector<unsigned, 2> SrcsReg2;
-    // Create all the temporary registers.
-    for (int i = 0; i < NumParts; ++i) {
-      unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
-      unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy);
-      unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy);
+    Observer.changingInstr(MI);
+    narrowScalarSrc(MI, NarrowTy, 1);
+    Observer.changedInstr(MI);
+    return Legalized;
+  case TargetOpcode::G_PTRTOINT:
+    if (TypeIdx != 0)
+      return UnableToLegalize;
 
-      DstRegs.push_back(DstReg);
-      SrcsReg1.push_back(SrcReg1);
-      SrcsReg2.push_back(SrcReg2);
+    Observer.changingInstr(MI);
+    narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
+    Observer.changedInstr(MI);
+    return Legalized;
+  case TargetOpcode::G_PHI: {
+    unsigned NumParts = SizeOp0 / NarrowSize;
+    SmallVector<Register, 2> DstRegs;
+    SmallVector<SmallVector<Register, 2>, 2> SrcRegs;
+    DstRegs.resize(NumParts);
+    SrcRegs.resize(MI.getNumOperands() / 2);
+    Observer.changingInstr(MI);
+    for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
+      MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
+      MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
+      extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
+                   SrcRegs[i / 2]);
+    }
+    MachineBasicBlock &MBB = *MI.getParent();
+    MIRBuilder.setInsertPt(MBB, MI);
+    for (unsigned i = 0; i < NumParts; ++i) {
+      DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
+      MachineInstrBuilder MIB =
+          MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
+      for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
+        MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
     }
-    // Explode the big arguments into smaller chunks.
-    MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg());
-    MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg());
+    MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
+    MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+    Observer.changedInstr(MI);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+  case TargetOpcode::G_INSERT_VECTOR_ELT: {
+    if (TypeIdx != 2)
+      return UnableToLegalize;
 
-    // Do the operation on each small part.
-    for (int i = 0; i < NumParts; ++i)
-      MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]},
-                            {SrcsReg1[i], SrcsReg2[i]});
+    int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
+    Observer.changingInstr(MI);
+    narrowScalarSrc(MI, NarrowTy, OpIdx);
+    Observer.changedInstr(MI);
+    return Legalized;
+  }
+  case TargetOpcode::G_ICMP: {
+    uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+    if (NarrowSize * 2 != SrcSize)
+      return UnableToLegalize;
 
-    // Gather the destination registers into the final destination.
-    unsigned DstReg = MI.getOperand(0).getReg();
-    if(MRI.getType(DstReg).isVector())
-      MIRBuilder.buildBuildVector(DstReg, DstRegs);
-    else
-      MIRBuilder.buildMerge(DstReg, DstRegs);
+    Observer.changingInstr(MI);
+    Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
+    Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
+    MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2).getReg());
+
+    Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
+    Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
+    MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3).getReg());
+
+    CmpInst::Predicate Pred =
+        static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+
+    if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
+      MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
+      MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
+      MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
+      MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
+      MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero);
+    } else {
+      const LLT s1 = LLT::scalar(1);
+      MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, s1, LHSH, RHSH);
+      MachineInstrBuilder CmpHEQ =
+          MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, s1, LHSH, RHSH);
+      MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
+          ICmpInst::getUnsignedPredicate(Pred), s1, LHSL, RHSL);
+      MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH);
+    }
+    Observer.changedInstr(MI);
     MI.eraseFromParent();
     return Legalized;
   }
@@ -643,43 +793,358 @@ void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
   MO.setReg(ExtB->getOperand(0).getReg());
 }
 
+void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
+                                      unsigned OpIdx) {
+  MachineOperand &MO = MI.getOperand(OpIdx);
+  auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy},
+                                    {MO.getReg()});
+  MO.setReg(ExtB->getOperand(0).getReg());
+}
+
 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
                                      unsigned OpIdx, unsigned TruncOpcode) {
   MachineOperand &MO = MI.getOperand(OpIdx);
-  unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
   MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
   MO.setReg(DstExt);
 }
 
+void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
+                                      unsigned OpIdx, unsigned ExtOpcode) {
+  MachineOperand &MO = MI.getOperand(OpIdx);
+  Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
+  MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+  MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc});
+  MO.setReg(DstTrunc);
+}
+
+void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
+                                            unsigned OpIdx) {
+  MachineOperand &MO = MI.getOperand(OpIdx);
+  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
+  MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+  MIRBuilder.buildExtract(MO.getReg(), DstExt, 0);
+  MO.setReg(DstExt);
+}
+
+void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
+                                            unsigned OpIdx) {
+  MachineOperand &MO = MI.getOperand(OpIdx);
+
+  LLT OldTy = MRI.getType(MO.getReg());
+  unsigned OldElts = OldTy.getNumElements();
+  unsigned NewElts = MoreTy.getNumElements();
+
+  unsigned NumParts = NewElts / OldElts;
+
+  // Use concat_vectors if the result is a multiple of the number of elements.
+  if (NumParts * OldElts == NewElts) {
+    SmallVector<Register, 8> Parts;
+    Parts.push_back(MO.getReg());
+
+    Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
+    for (unsigned I = 1; I != NumParts; ++I)
+      Parts.push_back(ImpDef);
+
+    auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
+    MO.setReg(Concat.getReg(0));
+    return;
+  }
+
+  Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
+  Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
+  MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
+  MO.setReg(MoreReg);
+}
+
 LegalizerHelper::LegalizeResult
-LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
-  MIRBuilder.setInstr(MI);
+LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
+                                        LLT WideTy) {
+  if (TypeIdx != 1)
+    return UnableToLegalize;
 
-  switch (MI.getOpcode()) {
-  default:
+  Register DstReg = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  if (DstTy.isVector())
     return UnableToLegalize;
-  case TargetOpcode::G_UADDO:
-  case TargetOpcode::G_USUBO: {
-    if (TypeIdx == 1)
-      return UnableToLegalize; // TODO
-    auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
-                                         {MI.getOperand(2).getReg()});
-    auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
-                                         {MI.getOperand(3).getReg()});
-    unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
-                          ? TargetOpcode::G_ADD
-                          : TargetOpcode::G_SUB;
-    // Do the arithmetic in the larger type.
-    auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
-    LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
-    APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
-    auto AndOp = MIRBuilder.buildInstr(
-        TargetOpcode::G_AND, {WideTy},
-        {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
-    // There is no overflow if the AndOp is the same as NewOp.
-    MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
-                         AndOp);
+
+  Register Src1 = MI.getOperand(1).getReg();
+  LLT SrcTy = MRI.getType(Src1);
+  const int DstSize = DstTy.getSizeInBits();
+  const int SrcSize = SrcTy.getSizeInBits();
+  const int WideSize = WideTy.getSizeInBits();
+  const int NumMerge = (DstSize + WideSize - 1) / WideSize;
+
+  unsigned NumOps = MI.getNumOperands();
+  unsigned NumSrc = MI.getNumOperands() - 1;
+  unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
+
+  if (WideSize >= DstSize) {
+    // Directly pack the bits in the target type.
+    Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
+
+    for (unsigned I = 2; I != NumOps; ++I) {
+      const unsigned Offset = (I - 1) * PartSize;
+
+      Register SrcReg = MI.getOperand(I).getReg();
+      assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
+
+      auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
+
+      Register NextResult = I + 1 == NumOps && WideSize == DstSize ? DstReg :
+        MRI.createGenericVirtualRegister(WideTy);
+
+      auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
+      auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
+      MIRBuilder.buildOr(NextResult, ResultReg, Shl);
+      ResultReg = NextResult;
+    }
+
+    if (WideSize > DstSize)
+      MIRBuilder.buildTrunc(DstReg, ResultReg);
+
+    MI.eraseFromParent();
+    return Legalized;
+  }
+
+  // Unmerge the original values to the GCD type, and recombine to the next
+  // multiple greater than the original type.
+  //
+  // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
+  // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
+  // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
+  // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
+  // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
+  // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
+  // %12:_(s12) = G_MERGE_VALUES %10, %11
+  //
+  // Padding with undef if necessary:
+  //
+  // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
+  // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
+  // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
+  // %7:_(s2) = G_IMPLICIT_DEF
+  // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
+  // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
+  // %10:_(s12) = G_MERGE_VALUES %8, %9
+
+  const int GCD = greatestCommonDivisor(SrcSize, WideSize);
+  LLT GCDTy = LLT::scalar(GCD);
+
+  SmallVector<Register, 8> Parts;
+  SmallVector<Register, 8> NewMergeRegs;
+  SmallVector<Register, 8> Unmerges;
+  LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
+
+  // Decompose the original operands if they don't evenly divide.
+  for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
+    Register SrcReg = MI.getOperand(I).getReg();
+    if (GCD == SrcSize) {
+      Unmerges.push_back(SrcReg);
+    } else {
+      auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
+      for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
+        Unmerges.push_back(Unmerge.getReg(J));
+    }
+  }
+
+  // Pad with undef to the next size that is a multiple of the requested size.
+  if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
+    Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
+    for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
+      Unmerges.push_back(UndefReg);
+  }
+
+  const int PartsPerGCD = WideSize / GCD;
+
+  // Build merges of each piece.
+  ArrayRef<Register> Slicer(Unmerges);
+  for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
+    auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
+    NewMergeRegs.push_back(Merge.getReg(0));
+  }
+
+  // A truncate may be necessary if the requested type doesn't evenly divide the
+  // original result type.
+  if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
+    MIRBuilder.buildMerge(DstReg, NewMergeRegs);
+  } else {
+    auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
+    MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
+  }
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
+                                          LLT WideTy) {
+  if (TypeIdx != 0)
+    return UnableToLegalize;
+
+  unsigned NumDst = MI.getNumOperands() - 1;
+  Register SrcReg = MI.getOperand(NumDst).getReg();
+  LLT SrcTy = MRI.getType(SrcReg);
+  if (!SrcTy.isScalar())
+    return UnableToLegalize;
+
+  Register Dst0Reg = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(Dst0Reg);
+  if (!DstTy.isScalar())
+    return UnableToLegalize;
+
+  unsigned NewSrcSize = NumDst * WideTy.getSizeInBits();
+  LLT NewSrcTy = LLT::scalar(NewSrcSize);
+  unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits();
+
+  auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg);
+
+  for (unsigned I = 1; I != NumDst; ++I) {
+    auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I);
+    auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt);
+    WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl);
+  }
+
+  Observer.changingInstr(MI);
+
+  MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg());
+  for (unsigned I = 0; I != NumDst; ++I)
+    widenScalarDst(MI, WideTy, I);
+
+  Observer.changedInstr(MI);
+
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
+                                    LLT WideTy) {
+  Register DstReg = MI.getOperand(0).getReg();
+  Register SrcReg = MI.getOperand(1).getReg();
+  LLT SrcTy = MRI.getType(SrcReg);
+
+  LLT DstTy = MRI.getType(DstReg);
+  unsigned Offset = MI.getOperand(2).getImm();
+
+  if (TypeIdx == 0) {
+    if (SrcTy.isVector() || DstTy.isVector())
+      return UnableToLegalize;
+
+    SrcOp Src(SrcReg);
+    if (SrcTy.isPointer()) {
+      // Extracts from pointers can be handled only if they are really just
+      // simple integers.
+      const DataLayout &DL = MIRBuilder.getDataLayout();
+      if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
+        return UnableToLegalize;
+
+      LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
+      Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
+      SrcTy = SrcAsIntTy;
+    }
+
+    if (DstTy.isPointer())
+      return UnableToLegalize;
+
+    if (Offset == 0) {
+      // Avoid a shift in the degenerate case.
+      MIRBuilder.buildTrunc(DstReg,
+                            MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
+      MI.eraseFromParent();
+      return Legalized;
+    }
+
+    // Do a shift in the source type.
+    LLT ShiftTy = SrcTy;
+    if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
+      Src = MIRBuilder.buildAnyExt(WideTy, Src);
+      ShiftTy = WideTy;
+    } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
+      return UnableToLegalize;
+
+    auto LShr = MIRBuilder.buildLShr(
+      ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
+    MIRBuilder.buildTrunc(DstReg, LShr);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+
+  if (SrcTy.isScalar()) {
+    Observer.changingInstr(MI);
+    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+    Observer.changedInstr(MI);
+    return Legalized;
+  }
+
+  if (!SrcTy.isVector())
+    return UnableToLegalize;
+
+  if (DstTy != SrcTy.getElementType())
+    return UnableToLegalize;
+
+  if (Offset % SrcTy.getScalarSizeInBits() != 0)
+    return UnableToLegalize;
+
+  Observer.changingInstr(MI);
+  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+
+  MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
+                          Offset);
+  widenScalarDst(MI, WideTy.getScalarType(), 0);
+  Observer.changedInstr(MI);
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
+                                   LLT WideTy) {
+  if (TypeIdx != 0)
+    return UnableToLegalize;
+  Observer.changingInstr(MI);
+  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+  widenScalarDst(MI, WideTy);
+  Observer.changedInstr(MI);
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
+  MIRBuilder.setInstr(MI);
+
+  switch (MI.getOpcode()) {
+  default:
+    return UnableToLegalize;
+  case TargetOpcode::G_EXTRACT:
+    return widenScalarExtract(MI, TypeIdx, WideTy);
+  case TargetOpcode::G_INSERT:
+    return widenScalarInsert(MI, TypeIdx, WideTy);
+  case TargetOpcode::G_MERGE_VALUES:
+    return widenScalarMergeValues(MI, TypeIdx, WideTy);
+  case TargetOpcode::G_UNMERGE_VALUES:
+    return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
+  case TargetOpcode::G_UADDO:
+  case TargetOpcode::G_USUBO: {
+    if (TypeIdx == 1)
+      return UnableToLegalize; // TODO
+    auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
+                                         {MI.getOperand(2).getReg()});
+    auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
+                                         {MI.getOperand(3).getReg()});
+    unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
+                          ? TargetOpcode::G_ADD
+                          : TargetOpcode::G_SUB;
+    // Do the arithmetic in the larger type.
+    auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
+    LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
+    APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
+    auto AndOp = MIRBuilder.buildInstr(
+        TargetOpcode::G_AND, {WideTy},
+        {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
+    // There is no overflow if the AndOp is the same as NewOp.
+    MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
+                         AndOp);
     // Now trunc the NewOp to the original result.
     MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
     MI.eraseFromParent();
@@ -690,19 +1155,28 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
   case TargetOpcode::G_CTLZ:
   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
   case TargetOpcode::G_CTPOP: {
+    if (TypeIdx == 0) {
+      Observer.changingInstr(MI);
+      widenScalarDst(MI, WideTy, 0);
+      Observer.changedInstr(MI);
+      return Legalized;
+    }
+
+    Register SrcReg = MI.getOperand(1).getReg();
+
     // First ZEXT the input.
-    auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg());
-    LLT CurTy = MRI.getType(MI.getOperand(0).getReg());
+    auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
+    LLT CurTy = MRI.getType(SrcReg);
     if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
       // The count is the same in the larger type except if the original
       // value was zero.  This can be handled by setting the bit just off
       // the top of the original type.
       auto TopBit =
           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
-      MIBSrc = MIRBuilder.buildInstr(
-          TargetOpcode::G_OR, {WideTy},
-          {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())});
+      MIBSrc = MIRBuilder.buildOr(
+        WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
     }
+
     // Perform the operation at the larger size.
     auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
     // This is already the correct result for CTPOP and CTTZs
@@ -714,22 +1188,43 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
           TargetOpcode::G_SUB, {WideTy},
           {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
     }
-    auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
-    // Make the original instruction a trunc now, and update its source.
+
+    MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_BSWAP: {
     Observer.changingInstr(MI);
-    MI.setDesc(TII.get(TargetOpcode::G_TRUNC));
-    MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg());
+    Register DstReg = MI.getOperand(0).getReg();
+
+    Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
+    Register DstExt = MRI.createGenericVirtualRegister(WideTy);
+    Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
+    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+
+    MI.getOperand(0).setReg(DstExt);
+
+    MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+
+    LLT Ty = MRI.getType(DstReg);
+    unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
+    MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
+    MIRBuilder.buildInstr(TargetOpcode::G_LSHR)
+      .addDef(ShrReg)
+      .addUse(DstExt)
+      .addUse(ShiftAmtReg);
+
+    MIRBuilder.buildTrunc(DstReg, ShrReg);
     Observer.changedInstr(MI);
     return Legalized;
   }
-
   case TargetOpcode::G_ADD:
   case TargetOpcode::G_AND:
   case TargetOpcode::G_MUL:
   case TargetOpcode::G_OR:
   case TargetOpcode::G_XOR:
   case TargetOpcode::G_SUB:
-    // Perform operation at larger width (any extension is fine here, high bits
+    // Perform operation at larger width (any extension is fines here, high bits
     // don't affect the result) and then truncate the result back to the
     // original type.
     Observer.changingInstr(MI);
@@ -741,16 +1236,24 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
 
   case TargetOpcode::G_SHL:
     Observer.changingInstr(MI);
-    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
-    // The "number of bits to shift" operand must preserve its value as an
-    // unsigned integer:
-    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
-    widenScalarDst(MI, WideTy);
+
+    if (TypeIdx == 0) {
+      widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+      widenScalarDst(MI, WideTy);
+    } else {
+      assert(TypeIdx == 1);
+      // The "number of bits to shift" operand must preserve its value as an
+      // unsigned integer:
+      widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
+    }
+
     Observer.changedInstr(MI);
     return Legalized;
 
   case TargetOpcode::G_SDIV:
   case TargetOpcode::G_SREM:
+  case TargetOpcode::G_SMIN:
+  case TargetOpcode::G_SMAX:
     Observer.changingInstr(MI);
     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
@@ -759,18 +1262,28 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     return Legalized;
 
   case TargetOpcode::G_ASHR:
+  case TargetOpcode::G_LSHR:
     Observer.changingInstr(MI);
-    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
-    // The "number of bits to shift" operand must preserve its value as an
-    // unsigned integer:
-    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
-    widenScalarDst(MI, WideTy);
+
+    if (TypeIdx == 0) {
+      unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
+        TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
+
+      widenScalarSrc(MI, WideTy, 1, CvtOp);
+      widenScalarDst(MI, WideTy);
+    } else {
+      assert(TypeIdx == 1);
+      // The "number of bits to shift" operand must preserve its value as an
+      // unsigned integer:
+      widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
+    }
+
     Observer.changedInstr(MI);
     return Legalized;
-
   case TargetOpcode::G_UDIV:
   case TargetOpcode::G_UREM:
-  case TargetOpcode::G_LSHR:
+  case TargetOpcode::G_UMIN:
+  case TargetOpcode::G_UMAX:
     Observer.changingInstr(MI);
     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
@@ -788,8 +1301,9 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
       widenScalarDst(MI, WideTy);
     } else {
+      bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
       // Explicit extension is required here since high bits affect the result.
-      widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+      widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
     }
     Observer.changedInstr(MI);
     return Legalized;
@@ -819,23 +1333,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     Observer.changedInstr(MI);
     return Legalized;
 
-  case TargetOpcode::G_INSERT:
-    if (TypeIdx != 0)
-      return UnableToLegalize;
-    Observer.changingInstr(MI);
-    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
-    widenScalarDst(MI, WideTy);
-    Observer.changedInstr(MI);
-    return Legalized;
-
   case TargetOpcode::G_LOAD:
-    // For some types like i24, we might try to widen to i32. To properly handle
-    // this we should be using a dedicated extending load, until then avoid
-    // trying to legalize.
-    if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) !=
-        WideTy.getSizeInBits())
-      return UnableToLegalize;
-    LLVM_FALLTHROUGH;
   case TargetOpcode::G_SEXTLOAD:
   case TargetOpcode::G_ZEXTLOAD:
     Observer.changingInstr(MI);
@@ -844,12 +1342,19 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     return Legalized;
 
   case TargetOpcode::G_STORE: {
-    if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) ||
-        WideTy != LLT::scalar(8))
+    if (TypeIdx != 0)
+      return UnableToLegalize;
+
+    LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+    if (!isPowerOf2_32(Ty.getSizeInBits()))
       return UnableToLegalize;
 
     Observer.changingInstr(MI);
-    widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT);
+
+    unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
+      TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
+    widenScalarSrc(MI, WideTy, 0, ExtType);
+
     Observer.changedInstr(MI);
     return Legalized;
   }
@@ -871,14 +1376,19 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     bool LosesInfo;
     switch (WideTy.getSizeInBits()) {
     case 32:
-      Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo);
+      Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
+                  &LosesInfo);
       break;
     case 64:
-      Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo);
+      Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
+                  &LosesInfo);
       break;
     default:
-      llvm_unreachable("Unhandled fp widen type");
+      return UnableToLegalize;
     }
+
+    assert(!LosesInfo && "extend should always be lossless");
+
     Observer.changingInstr(MI);
     SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
 
@@ -894,7 +1404,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
   }
   case TargetOpcode::G_BRCOND:
     Observer.changingInstr(MI);
-    widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
+    widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
     Observer.changedInstr(MI);
     return Legalized;
 
@@ -947,23 +1457,103 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     Observer.changedInstr(MI);
     return Legalized;
   }
-  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+  case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
+    if (TypeIdx == 0) {
+      Register VecReg = MI.getOperand(1).getReg();
+      LLT VecTy = MRI.getType(VecReg);
+      Observer.changingInstr(MI);
+
+      widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
+                                     WideTy.getSizeInBits()),
+                     1, TargetOpcode::G_SEXT);
+
+      widenScalarDst(MI, WideTy, 0);
+      Observer.changedInstr(MI);
+      return Legalized;
+    }
+
     if (TypeIdx != 2)
       return UnableToLegalize;
     Observer.changingInstr(MI);
     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
     Observer.changedInstr(MI);
     return Legalized;
-
+  }
+  case TargetOpcode::G_FADD:
+  case TargetOpcode::G_FMUL:
+  case TargetOpcode::G_FSUB:
+  case TargetOpcode::G_FMA:
+  case TargetOpcode::G_FNEG:
+  case TargetOpcode::G_FABS:
+  case TargetOpcode::G_FCANONICALIZE:
+  case TargetOpcode::G_FMINNUM:
+  case TargetOpcode::G_FMAXNUM:
+  case TargetOpcode::G_FMINNUM_IEEE:
+  case TargetOpcode::G_FMAXNUM_IEEE:
+  case TargetOpcode::G_FMINIMUM:
+  case TargetOpcode::G_FMAXIMUM:
+  case TargetOpcode::G_FDIV:
+  case TargetOpcode::G_FREM:
   case TargetOpcode::G_FCEIL:
+  case TargetOpcode::G_FFLOOR:
+  case TargetOpcode::G_FCOS:
+  case TargetOpcode::G_FSIN:
+  case TargetOpcode::G_FLOG10:
+  case TargetOpcode::G_FLOG:
+  case TargetOpcode::G_FLOG2:
+  case TargetOpcode::G_FRINT:
+  case TargetOpcode::G_FNEARBYINT:
+  case TargetOpcode::G_FSQRT:
+  case TargetOpcode::G_FEXP:
+  case TargetOpcode::G_FEXP2:
+  case TargetOpcode::G_FPOW:
+  case TargetOpcode::G_INTRINSIC_TRUNC:
+  case TargetOpcode::G_INTRINSIC_ROUND:
+    assert(TypeIdx == 0);
+    Observer.changingInstr(MI);
+
+    for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
+      widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
+
+    widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+    Observer.changedInstr(MI);
+    return Legalized;
+  case TargetOpcode::G_INTTOPTR:
+    if (TypeIdx != 1)
+      return UnableToLegalize;
+
+    Observer.changingInstr(MI);
+    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+    Observer.changedInstr(MI);
+    return Legalized;
+  case TargetOpcode::G_PTRTOINT:
     if (TypeIdx != 0)
       return UnableToLegalize;
+
     Observer.changingInstr(MI);
-    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
-    widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+    widenScalarDst(MI, WideTy, 0);
+    Observer.changedInstr(MI);
+    return Legalized;
+  case TargetOpcode::G_BUILD_VECTOR: {
+    Observer.changingInstr(MI);
+
+    const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
+    for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
+      widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
+
+    // Avoid changing the result vector type if the source element type was
+    // requested.
+    if (TypeIdx == 1) {
+      auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
+      MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
+    } else {
+      widenScalarDst(MI, WideTy, 0);
+    }
+
     Observer.changedInstr(MI);
     return Legalized;
   }
+  }
 }
 
 LegalizerHelper::LegalizeResult
@@ -976,13 +1566,13 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
     return UnableToLegalize;
   case TargetOpcode::G_SREM:
   case TargetOpcode::G_UREM: {
-    unsigned QuotReg = MRI.createGenericVirtualRegister(Ty);
+    Register QuotReg = MRI.createGenericVirtualRegister(Ty);
     MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
         .addDef(QuotReg)
         .addUse(MI.getOperand(1).getReg())
         .addUse(MI.getOperand(2).getReg());
 
-    unsigned ProdReg = MRI.createGenericVirtualRegister(Ty);
+    Register ProdReg = MRI.createGenericVirtualRegister(Ty);
     MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
     MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
                         ProdReg);
@@ -993,10 +1583,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
   case TargetOpcode::G_UMULO: {
     // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
     // result.
-    unsigned Res = MI.getOperand(0).getReg();
-    unsigned Overflow = MI.getOperand(1).getReg();
-    unsigned LHS = MI.getOperand(2).getReg();
-    unsigned RHS = MI.getOperand(3).getReg();
+    Register Res = MI.getOperand(0).getReg();
+    Register Overflow = MI.getOperand(1).getReg();
+    Register LHS = MI.getOperand(2).getReg();
+    Register RHS = MI.getOperand(3).getReg();
 
     MIRBuilder.buildMul(Res, LHS, RHS);
 
@@ -1004,20 +1594,20 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
                           ? TargetOpcode::G_SMULH
                           : TargetOpcode::G_UMULH;
 
-    unsigned HiPart = MRI.createGenericVirtualRegister(Ty);
+    Register HiPart = MRI.createGenericVirtualRegister(Ty);
     MIRBuilder.buildInstr(Opcode)
       .addDef(HiPart)
       .addUse(LHS)
       .addUse(RHS);
 
-    unsigned Zero = MRI.createGenericVirtualRegister(Ty);
+    Register Zero = MRI.createGenericVirtualRegister(Ty);
     MIRBuilder.buildConstant(Zero, 0);
 
     // For *signed* multiply, overflow is detected by checking:
     // (hi != (lo >> bitwidth-1))
     if (Opcode == TargetOpcode::G_SMULH) {
-      unsigned Shifted = MRI.createGenericVirtualRegister(Ty);
-      unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
+      Register Shifted = MRI.createGenericVirtualRegister(Ty);
+      Register ShiftAmt = MRI.createGenericVirtualRegister(Ty);
       MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
       MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
         .addDef(Shifted)
@@ -1035,7 +1625,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
     // represent them.
     if (Ty.isVector())
       return UnableToLegalize;
-    unsigned Res = MI.getOperand(0).getReg();
+    Register Res = MI.getOperand(0).getReg();
     Type *ZeroTy;
     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
     switch (Ty.getSizeInBits()) {
@@ -1057,10 +1647,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
     ConstantFP &ZeroForNegation =
         *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
     auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
-    MIRBuilder.buildInstr(TargetOpcode::G_FSUB)
-        .addDef(Res)
-        .addUse(Zero->getOperand(0).getReg())
-        .addUse(MI.getOperand(1).getReg());
+    Register SubByReg = MI.getOperand(1).getReg();
+    Register ZeroReg = Zero->getOperand(0).getReg();
+    MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg},
+                          MI.getFlags());
     MI.eraseFromParent();
     return Legalized;
   }
@@ -1070,24 +1660,21 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
     // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
     if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
       return UnableToLegalize;
-    unsigned Res = MI.getOperand(0).getReg();
-    unsigned LHS = MI.getOperand(1).getReg();
-    unsigned RHS = MI.getOperand(2).getReg();
-    unsigned Neg = MRI.createGenericVirtualRegister(Ty);
+    Register Res = MI.getOperand(0).getReg();
+    Register LHS = MI.getOperand(1).getReg();
+    Register RHS = MI.getOperand(2).getReg();
+    Register Neg = MRI.createGenericVirtualRegister(Ty);
     MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
-    MIRBuilder.buildInstr(TargetOpcode::G_FADD)
-        .addDef(Res)
-        .addUse(LHS)
-        .addUse(Neg);
+    MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Res}, {LHS, Neg}, MI.getFlags());
     MI.eraseFromParent();
     return Legalized;
   }
   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
-    unsigned OldValRes = MI.getOperand(0).getReg();
-    unsigned SuccessRes = MI.getOperand(1).getReg();
-    unsigned Addr = MI.getOperand(2).getReg();
-    unsigned CmpVal = MI.getOperand(3).getReg();
-    unsigned NewVal = MI.getOperand(4).getReg();
+    Register OldValRes = MI.getOperand(0).getReg();
+    Register SuccessRes = MI.getOperand(1).getReg();
+    Register Addr = MI.getOperand(2).getReg();
+    Register CmpVal = MI.getOperand(3).getReg();
+    Register NewVal = MI.getOperand(4).getReg();
     MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
                                   **MI.memoperands_begin());
     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
@@ -1098,8 +1685,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
   case TargetOpcode::G_SEXTLOAD:
   case TargetOpcode::G_ZEXTLOAD: {
     // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
-    unsigned DstReg = MI.getOperand(0).getReg();
-    unsigned PtrReg = MI.getOperand(1).getReg();
+    Register DstReg = MI.getOperand(0).getReg();
+    Register PtrReg = MI.getOperand(1).getReg();
     LLT DstTy = MRI.getType(DstReg);
     auto &MMO = **MI.memoperands_begin();
 
@@ -1114,8 +1701,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
     }
 
     if (DstTy.isScalar()) {
-      unsigned TmpReg = MRI.createGenericVirtualRegister(
-          LLT::scalar(MMO.getSize() /* in bytes */ * 8));
+      Register TmpReg =
+          MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
       switch (MI.getOpcode()) {
       default:
@@ -1142,15 +1729,27 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
   case TargetOpcode::G_CTTZ:
   case TargetOpcode::G_CTPOP:
     return lowerBitCount(MI, TypeIdx, Ty);
+  case G_UADDO: {
+    Register Res = MI.getOperand(0).getReg();
+    Register CarryOut = MI.getOperand(1).getReg();
+    Register LHS = MI.getOperand(2).getReg();
+    Register RHS = MI.getOperand(3).getReg();
+
+    MIRBuilder.buildAdd(Res, LHS, RHS);
+    MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
+
+    MI.eraseFromParent();
+    return Legalized;
+  }
   case G_UADDE: {
-    unsigned Res = MI.getOperand(0).getReg();
-    unsigned CarryOut = MI.getOperand(1).getReg();
-    unsigned LHS = MI.getOperand(2).getReg();
-    unsigned RHS = MI.getOperand(3).getReg();
-    unsigned CarryIn = MI.getOperand(4).getReg();
+    Register Res = MI.getOperand(0).getReg();
+    Register CarryOut = MI.getOperand(1).getReg();
+    Register LHS = MI.getOperand(2).getReg();
+    Register RHS = MI.getOperand(3).getReg();
+    Register CarryIn = MI.getOperand(4).getReg();
 
-    unsigned TmpRes = MRI.createGenericVirtualRegister(Ty);
-    unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
+    Register TmpRes = MRI.createGenericVirtualRegister(Ty);
+    Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
 
     MIRBuilder.buildAdd(TmpRes, LHS, RHS);
     MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
@@ -1160,113 +1759,1325 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
     MI.eraseFromParent();
     return Legalized;
   }
+  case G_USUBO: {
+    Register Res = MI.getOperand(0).getReg();
+    Register BorrowOut = MI.getOperand(1).getReg();
+    Register LHS = MI.getOperand(2).getReg();
+    Register RHS = MI.getOperand(3).getReg();
+
+    MIRBuilder.buildSub(Res, LHS, RHS);
+    MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
+
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case G_USUBE: {
+    Register Res = MI.getOperand(0).getReg();
+    Register BorrowOut = MI.getOperand(1).getReg();
+    Register LHS = MI.getOperand(2).getReg();
+    Register RHS = MI.getOperand(3).getReg();
+    Register BorrowIn = MI.getOperand(4).getReg();
+
+    Register TmpRes = MRI.createGenericVirtualRegister(Ty);
+    Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
+    Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
+    Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
+
+    MIRBuilder.buildSub(TmpRes, LHS, RHS);
+    MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
+    MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
+    MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
+    MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
+    MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
+
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case G_UITOFP:
+    return lowerUITOFP(MI, TypeIdx, Ty);
+  case G_SITOFP:
+    return lowerSITOFP(MI, TypeIdx, Ty);
+  case G_SMIN:
+  case G_SMAX:
+  case G_UMIN:
+  case G_UMAX:
+    return lowerMinMax(MI, TypeIdx, Ty);
+  case G_FCOPYSIGN:
+    return lowerFCopySign(MI, TypeIdx, Ty);
+  case G_FMINNUM:
+  case G_FMAXNUM:
+    return lowerFMinNumMaxNum(MI);
   }
 }
 
-LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
-                                     LLT NarrowTy) {
-  // FIXME: Don't know how to handle secondary types yet.
-  if (TypeIdx != 0)
+LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
+    MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
+  SmallVector<Register, 2> DstRegs;
+
+  unsigned NarrowSize = NarrowTy.getSizeInBits();
+  Register DstReg = MI.getOperand(0).getReg();
+  unsigned Size = MRI.getType(DstReg).getSizeInBits();
+  int NumParts = Size / NarrowSize;
+  // FIXME: Don't know how to handle the situation where the small vectors
+  // aren't all the same size yet.
+  if (Size % NarrowSize != 0)
     return UnableToLegalize;
 
-  MIRBuilder.setInstr(MI);
-  switch (MI.getOpcode()) {
-  default:
+  for (int i = 0; i < NumParts; ++i) {
+    Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
+    MIRBuilder.buildUndef(TmpReg);
+    DstRegs.push_back(TmpReg);
+  }
+
+  if (NarrowTy.isVector())
+    MIRBuilder.buildConcatVectors(DstReg, DstRegs);
+  else
+    MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
+                                          LLT NarrowTy) {
+  const unsigned Opc = MI.getOpcode();
+  const unsigned NumOps = MI.getNumOperands() - 1;
+  const unsigned NarrowSize = NarrowTy.getSizeInBits();
+  const Register DstReg = MI.getOperand(0).getReg();
+  const unsigned Flags = MI.getFlags();
+  const LLT DstTy = MRI.getType(DstReg);
+  const unsigned Size = DstTy.getSizeInBits();
+  const int NumParts = Size / NarrowSize;
+  const LLT EltTy = DstTy.getElementType();
+  const unsigned EltSize = EltTy.getSizeInBits();
+  const unsigned BitsForNumParts = NarrowSize * NumParts;
+
+  // Check if we have any leftovers. If we do, then only handle the case where
+  // the leftover is one element.
+  if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size)
     return UnableToLegalize;
-  case TargetOpcode::G_IMPLICIT_DEF: {
-    SmallVector<unsigned, 2> DstRegs;
 
-    unsigned NarrowSize = NarrowTy.getSizeInBits();
-    unsigned DstReg = MI.getOperand(0).getReg();
-    unsigned Size = MRI.getType(DstReg).getSizeInBits();
-    int NumParts = Size / NarrowSize;
-    // FIXME: Don't know how to handle the situation where the small vectors
-    // aren't all the same size yet.
-    if (Size % NarrowSize != 0)
-      return UnableToLegalize;
+  if (BitsForNumParts != Size) {
+    Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
+    MIRBuilder.buildUndef(AccumDstReg);
+
+    // Handle the pieces which evenly divide into the requested type with
+    // extract/op/insert sequence.
+    for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) {
+      SmallVector<SrcOp, 4> SrcOps;
+      for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
+        Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
+        MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset);
+        SrcOps.push_back(PartOpReg);
+      }
 
-    for (int i = 0; i < NumParts; ++i) {
-      unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
-      MIRBuilder.buildUndef(TmpReg);
-      DstRegs.push_back(TmpReg);
+      Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
+      MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
+
+      Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
+      MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
+      AccumDstReg = PartInsertReg;
     }
 
-    if (NarrowTy.isVector())
-      MIRBuilder.buildConcatVectors(DstReg, DstRegs);
-    else
-      MIRBuilder.buildBuildVector(DstReg, DstRegs);
+    // Handle the remaining element sized leftover piece.
+    SmallVector<SrcOp, 4> SrcOps;
+    for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
+      Register PartOpReg = MRI.createGenericVirtualRegister(EltTy);
+      MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(),
+                              BitsForNumParts);
+      SrcOps.push_back(PartOpReg);
+    }
 
+    Register PartDstReg = MRI.createGenericVirtualRegister(EltTy);
+    MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
+    MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
     MI.eraseFromParent();
+
     return Legalized;
   }
-  case TargetOpcode::G_ADD: {
-    unsigned NarrowSize = NarrowTy.getSizeInBits();
-    unsigned DstReg = MI.getOperand(0).getReg();
-    unsigned Size = MRI.getType(DstReg).getSizeInBits();
-    int NumParts = Size / NarrowSize;
+
+  SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
+
+  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
+
+  if (NumOps >= 2)
+    extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
+
+  if (NumOps >= 3)
+    extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
+
+  for (int i = 0; i < NumParts; ++i) {
+    Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+
+    if (NumOps == 1)
+      MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
+    else if (NumOps == 2) {
+      MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
+    } else if (NumOps == 3) {
+      MIRBuilder.buildInstr(Opc, {DstReg},
+                            {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
+    }
+
+    DstRegs.push_back(DstReg);
+  }
+
+  if (NarrowTy.isVector())
+    MIRBuilder.buildConcatVectors(DstReg, DstRegs);
+  else
+    MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+// Handle splitting vector operations which need to have the same number of
+// elements in each type index, but each type index may have a different element
+// type.
+//
+// e.g.  <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
+//       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
+//       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
+//
+// Also handles some irregular breakdown cases, e.g.
+// e.g.  <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
+//       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
+//             s64 = G_SHL s64, s32
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorMultiEltType(
+  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
+  if (TypeIdx != 0)
+    return UnableToLegalize;
+
+  const LLT NarrowTy0 = NarrowTyArg;
+  const unsigned NewNumElts =
+      NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
+
+  const Register DstReg = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  LLT LeftoverTy0;
+
+  // All of the operands need to have the same number of elements, so if we can
+  // determine a type breakdown for the result type, we can for all of the
+  // source types.
+  int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
+  if (NumParts < 0)
+    return UnableToLegalize;
+
+  SmallVector<MachineInstrBuilder, 4> NewInsts;
+
+  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
+  SmallVector<Register, 4> PartRegs, LeftoverRegs;
+
+  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
+    LLT LeftoverTy;
+    Register SrcReg = MI.getOperand(I).getReg();
+    LLT SrcTyI = MRI.getType(SrcReg);
+    LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
+    LLT LeftoverTyI;
+
+    // Split this operand into the requested typed registers, and any leftover
+    // required to reproduce the original type.
+    if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
+                      LeftoverRegs))
+      return UnableToLegalize;
+
+    if (I == 1) {
+      // For the first operand, create an instruction for each part and setup
+      // the result.
+      for (Register PartReg : PartRegs) {
+        Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
+        NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
+                               .addDef(PartDstReg)
+                               .addUse(PartReg));
+        DstRegs.push_back(PartDstReg);
+      }
+
+      for (Register LeftoverReg : LeftoverRegs) {
+        Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
+        NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
+                               .addDef(PartDstReg)
+                               .addUse(LeftoverReg));
+        LeftoverDstRegs.push_back(PartDstReg);
+      }
+    } else {
+      assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
+
+      // Add the newly created operand splits to the existing instructions. The
+      // odd-sized pieces are ordered after the requested NarrowTyArg sized
+      // pieces.
+      unsigned InstCount = 0;
+      for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
+        NewInsts[InstCount++].addUse(PartRegs[J]);
+      for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
+        NewInsts[InstCount++].addUse(LeftoverRegs[J]);
+    }
+
+    PartRegs.clear();
+    LeftoverRegs.clear();
+  }
+
+  // Insert the newly built operations and rebuild the result register.
+  for (auto &MIB : NewInsts)
+    MIRBuilder.insertInstr(MIB);
+
+  insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
+                                          LLT NarrowTy) {
+  if (TypeIdx != 0)
+    return UnableToLegalize;
+
+  Register DstReg = MI.getOperand(0).getReg();
+  Register SrcReg = MI.getOperand(1).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  LLT SrcTy = MRI.getType(SrcReg);
+
+  LLT NarrowTy0 = NarrowTy;
+  LLT NarrowTy1;
+  unsigned NumParts;
+
+  if (NarrowTy.isVector()) {
+    // Uneven breakdown not handled.
+    NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
+    if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
+      return UnableToLegalize;
+
+    NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
+  } else {
+    NumParts = DstTy.getNumElements();
+    NarrowTy1 = SrcTy.getElementType();
+  }
+
+  SmallVector<Register, 4> SrcRegs, DstRegs;
+  extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
+
+  for (unsigned I = 0; I < NumParts; ++I) {
+    Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
+    MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode())
+      .addDef(DstReg)
+      .addUse(SrcRegs[I]);
+
+    NewInst->setFlags(MI.getFlags());
+    DstRegs.push_back(DstReg);
+  }
+
+  if (NarrowTy.isVector())
+    MIRBuilder.buildConcatVectors(DstReg, DstRegs);
+  else
+    MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
+                                        LLT NarrowTy) {
+  Register DstReg = MI.getOperand(0).getReg();
+  Register Src0Reg = MI.getOperand(2).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  LLT SrcTy = MRI.getType(Src0Reg);
+
+  unsigned NumParts;
+  LLT NarrowTy0, NarrowTy1;
+
+  if (TypeIdx == 0) {
+    unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
+    unsigned OldElts = DstTy.getNumElements();
+
+    NarrowTy0 = NarrowTy;
+    NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
+    NarrowTy1 = NarrowTy.isVector() ?
+      LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
+      SrcTy.getElementType();
+
+  } else {
+    unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
+    unsigned OldElts = SrcTy.getNumElements();
+
+    NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
+      NarrowTy.getNumElements();
+    NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
+                            DstTy.getScalarSizeInBits());
+    NarrowTy1 = NarrowTy;
+  }
+
+  // FIXME: Don't know how to handle the situation where the small vectors
+  // aren't all the same size yet.
+  if (NarrowTy1.isVector() &&
+      NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
+    return UnableToLegalize;
+
+  CmpInst::Predicate Pred
+    = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+
+  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
+  extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
+  extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
+
+  for (unsigned I = 0; I < NumParts; ++I) {
+    Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
+    DstRegs.push_back(DstReg);
+
+    if (MI.getOpcode() == TargetOpcode::G_ICMP)
+      MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
+    else {
+      MachineInstr *NewCmp
+        = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
+      NewCmp->setFlags(MI.getFlags());
+    }
+  }
+
+  if (NarrowTy1.isVector())
+    MIRBuilder.buildConcatVectors(DstReg, DstRegs);
+  else
+    MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
+                                           LLT NarrowTy) {
+  Register DstReg = MI.getOperand(0).getReg();
+  Register CondReg = MI.getOperand(1).getReg();
+
+  unsigned NumParts = 0;
+  LLT NarrowTy0, NarrowTy1;
+
+  LLT DstTy = MRI.getType(DstReg);
+  LLT CondTy = MRI.getType(CondReg);
+  unsigned Size = DstTy.getSizeInBits();
+
+  assert(TypeIdx == 0 || CondTy.isVector());
+
+  if (TypeIdx == 0) {
+    NarrowTy0 = NarrowTy;
+    NarrowTy1 = CondTy;
+
+    unsigned NarrowSize = NarrowTy0.getSizeInBits();
     // FIXME: Don't know how to handle the situation where the small vectors
     // aren't all the same size yet.
     if (Size % NarrowSize != 0)
       return UnableToLegalize;
 
-    SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
-    extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
-    extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
+    NumParts = Size / NarrowSize;
 
-    for (int i = 0; i < NumParts; ++i) {
-      unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
-      MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]);
-      DstRegs.push_back(DstReg);
+    // Need to break down the condition type
+    if (CondTy.isVector()) {
+      if (CondTy.getNumElements() == NumParts)
+        NarrowTy1 = CondTy.getElementType();
+      else
+        NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
+                                CondTy.getScalarSizeInBits());
     }
+  } else {
+    NumParts = CondTy.getNumElements();
+    if (NarrowTy.isVector()) {
+      // TODO: Handle uneven breakdown.
+      if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
+        return UnableToLegalize;
 
+      return UnableToLegalize;
+    } else {
+      NarrowTy0 = DstTy.getElementType();
+      NarrowTy1 = NarrowTy;
+    }
+  }
+
+  SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
+  if (CondTy.isVector())
+    extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
+
+  extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
+  extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
+
+  for (unsigned i = 0; i < NumParts; ++i) {
+    Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
+    MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
+                           Src1Regs[i], Src2Regs[i]);
+    DstRegs.push_back(DstReg);
+  }
+
+  if (NarrowTy0.isVector())
     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
-    MI.eraseFromParent();
-    return Legalized;
+  else
+    MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
+                                        LLT NarrowTy) {
+  const Register DstReg = MI.getOperand(0).getReg();
+  LLT PhiTy = MRI.getType(DstReg);
+  LLT LeftoverTy;
+
+  // All of the operands need to have the same number of elements, so if we can
+  // determine a type breakdown for the result type, we can for all of the
+  // source types.
+  int NumParts, NumLeftover;
+  std::tie(NumParts, NumLeftover)
+    = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
+  if (NumParts < 0)
+    return UnableToLegalize;
+
+  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
+  SmallVector<MachineInstrBuilder, 4> NewInsts;
+
+  const int TotalNumParts = NumParts + NumLeftover;
+
+  // Insert the new phis in the result block first.
+  for (int I = 0; I != TotalNumParts; ++I) {
+    LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
+    Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
+    NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
+                       .addDef(PartDstReg));
+    if (I < NumParts)
+      DstRegs.push_back(PartDstReg);
+    else
+      LeftoverDstRegs.push_back(PartDstReg);
   }
-  case TargetOpcode::G_LOAD:
-  case TargetOpcode::G_STORE: {
-    bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
-    unsigned ValReg = MI.getOperand(0).getReg();
-    unsigned AddrReg = MI.getOperand(1).getReg();
-    unsigned NarrowSize = NarrowTy.getSizeInBits();
-    unsigned Size = MRI.getType(ValReg).getSizeInBits();
-    unsigned NumParts = Size / NarrowSize;
-
-    SmallVector<unsigned, 8> NarrowRegs;
-    if (!IsLoad)
-      extractParts(ValReg, NarrowTy, NumParts, NarrowRegs);
-
-    const LLT OffsetTy =
-        LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
-    MachineFunction &MF = *MI.getMF();
-    MachineMemOperand *MMO = *MI.memoperands_begin();
-    for (unsigned Idx = 0; Idx < NumParts; ++Idx) {
-      unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8;
-      unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment);
-      unsigned NewAddrReg = 0;
-      MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment);
-      MachineMemOperand &NewMMO = *MF.getMachineMemOperand(
-          MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(),
-          NarrowTy.getSizeInBits() / 8, Alignment);
+
+  MachineBasicBlock *MBB = MI.getParent();
+  MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
+  insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
+
+  SmallVector<Register, 4> PartRegs, LeftoverRegs;
+
+  // Insert code to extract the incoming values in each predecessor block.
+  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
+    PartRegs.clear();
+    LeftoverRegs.clear();
+
+    Register SrcReg = MI.getOperand(I).getReg();
+    MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
+    MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
+
+    LLT Unused;
+    if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
+                      LeftoverRegs))
+      return UnableToLegalize;
+
+    // Add the newly created operand splits to the existing instructions. The
+    // odd-sized pieces are ordered after the requested NarrowTyArg sized
+    // pieces.
+    for (int J = 0; J != TotalNumParts; ++J) {
+      MachineInstrBuilder MIB = NewInsts[J];
+      MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
+      MIB.addMBB(&OpMBB);
+    }
+  }
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
+                                      LLT NarrowTy) {
+  // FIXME: Don't know how to handle secondary types yet.
+  if (TypeIdx != 0)
+    return UnableToLegalize;
+
+  MachineMemOperand *MMO = *MI.memoperands_begin();
+
+  // This implementation doesn't work for atomics. Give up instead of doing
+  // something invalid.
+  if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
+      MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
+    return UnableToLegalize;
+
+  bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
+  Register ValReg = MI.getOperand(0).getReg();
+  Register AddrReg = MI.getOperand(1).getReg();
+  LLT ValTy = MRI.getType(ValReg);
+
+  int NumParts = -1;
+  int NumLeftover = -1;
+  LLT LeftoverTy;
+  SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
+  if (IsLoad) {
+    std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
+  } else {
+    if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
+                     NarrowLeftoverRegs)) {
+      NumParts = NarrowRegs.size();
+      NumLeftover = NarrowLeftoverRegs.size();
+    }
+  }
+
+  if (NumParts == -1)
+    return UnableToLegalize;
+
+  const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
+
+  unsigned TotalSize = ValTy.getSizeInBits();
+
+  // Split the load/store into PartTy sized pieces starting at Offset. If this
+  // is a load, return the new registers in ValRegs. For a store, each elements
+  // of ValRegs should be PartTy. Returns the next offset that needs to be
+  // handled.
+  auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
+                             unsigned Offset) -> unsigned {
+    MachineFunction &MF = MIRBuilder.getMF();
+    unsigned PartSize = PartTy.getSizeInBits();
+    for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
+         Offset += PartSize, ++Idx) {
+      unsigned ByteSize = PartSize / 8;
+      unsigned ByteOffset = Offset / 8;
+      Register NewAddrReg;
+
+      MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
+
+      MachineMemOperand *NewMMO =
+        MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
+
       if (IsLoad) {
-        unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy);
-        NarrowRegs.push_back(Dst);
-        MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO);
+        Register Dst = MRI.createGenericVirtualRegister(PartTy);
+        ValRegs.push_back(Dst);
+        MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
       } else {
-        MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO);
+        MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
       }
     }
-    if (IsLoad) {
-      if (NarrowTy.isVector())
-        MIRBuilder.buildConcatVectors(ValReg, NarrowRegs);
-      else
-        MIRBuilder.buildBuildVector(ValReg, NarrowRegs);
-    }
+
+    return Offset;
+  };
+
+  unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
+
+  // Handle the rest of the register if this isn't an even type breakdown.
+  if (LeftoverTy.isValid())
+    splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
+
+  if (IsLoad) {
+    insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
+                LeftoverTy, NarrowLeftoverRegs);
+  }
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
+                                     LLT NarrowTy) {
+  using namespace TargetOpcode;
+
+  MIRBuilder.setInstr(MI);
+  switch (MI.getOpcode()) {
+  case G_IMPLICIT_DEF:
+    return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
+  case G_AND:
+  case G_OR:
+  case G_XOR:
+  case G_ADD:
+  case G_SUB:
+  case G_MUL:
+  case G_SMULH:
+  case G_UMULH:
+  case G_FADD:
+  case G_FMUL:
+  case G_FSUB:
+  case G_FNEG:
+  case G_FABS:
+  case G_FCANONICALIZE:
+  case G_FDIV:
+  case G_FREM:
+  case G_FMA:
+  case G_FPOW:
+  case G_FEXP:
+  case G_FEXP2:
+  case G_FLOG:
+  case G_FLOG2:
+  case G_FLOG10:
+  case G_FNEARBYINT:
+  case G_FCEIL:
+  case G_FFLOOR:
+  case G_FRINT:
+  case G_INTRINSIC_ROUND:
+  case G_INTRINSIC_TRUNC:
+  case G_FCOS:
+  case G_FSIN:
+  case G_FSQRT:
+  case G_BSWAP:
+  case G_SDIV:
+  case G_SMIN:
+  case G_SMAX:
+  case G_UMIN:
+  case G_UMAX:
+  case G_FMINNUM:
+  case G_FMAXNUM:
+  case G_FMINNUM_IEEE:
+  case G_FMAXNUM_IEEE:
+  case G_FMINIMUM:
+  case G_FMAXIMUM:
+    return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
+  case G_SHL:
+  case G_LSHR:
+  case G_ASHR:
+  case G_CTLZ:
+  case G_CTLZ_ZERO_UNDEF:
+  case G_CTTZ:
+  case G_CTTZ_ZERO_UNDEF:
+  case G_CTPOP:
+  case G_FCOPYSIGN:
+    return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
+  case G_ZEXT:
+  case G_SEXT:
+  case G_ANYEXT:
+  case G_FPEXT:
+  case G_FPTRUNC:
+  case G_SITOFP:
+  case G_UITOFP:
+  case G_FPTOSI:
+  case G_FPTOUI:
+  case G_INTTOPTR:
+  case G_PTRTOINT:
+  case G_ADDRSPACE_CAST:
+    return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
+  case G_ICMP:
+  case G_FCMP:
+    return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
+  case G_SELECT:
+    return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
+  case G_PHI:
+    return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
+  case G_LOAD:
+  case G_STORE:
+    return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
+  default:
+    return UnableToLegalize;
+  }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
+                                             const LLT HalfTy, const LLT AmtTy) {
+
+  Register InL = MRI.createGenericVirtualRegister(HalfTy);
+  Register InH = MRI.createGenericVirtualRegister(HalfTy);
+  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
+
+  if (Amt.isNullValue()) {
+    MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH});
     MI.eraseFromParent();
     return Legalized;
   }
+
+  LLT NVT = HalfTy;
+  unsigned NVTBits = HalfTy.getSizeInBits();
+  unsigned VTBits = 2 * NVTBits;
+
+  SrcOp Lo(Register(0)), Hi(Register(0));
+  if (MI.getOpcode() == TargetOpcode::G_SHL) {
+    if (Amt.ugt(VTBits)) {
+      Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
+    } else if (Amt.ugt(NVTBits)) {
+      Lo = MIRBuilder.buildConstant(NVT, 0);
+      Hi = MIRBuilder.buildShl(NVT, InL,
+                               MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
+    } else if (Amt == NVTBits) {
+      Lo = MIRBuilder.buildConstant(NVT, 0);
+      Hi = InL;
+    } else {
+      Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
+      auto OrLHS =
+          MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
+      auto OrRHS = MIRBuilder.buildLShr(
+          NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
+      Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
+    }
+  } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
+    if (Amt.ugt(VTBits)) {
+      Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
+    } else if (Amt.ugt(NVTBits)) {
+      Lo = MIRBuilder.buildLShr(NVT, InH,
+                                MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
+      Hi = MIRBuilder.buildConstant(NVT, 0);
+    } else if (Amt == NVTBits) {
+      Lo = InH;
+      Hi = MIRBuilder.buildConstant(NVT, 0);
+    } else {
+      auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
+
+      auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
+      auto OrRHS = MIRBuilder.buildShl(
+          NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
+
+      Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
+      Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
+    }
+  } else {
+    if (Amt.ugt(VTBits)) {
+      Hi = Lo = MIRBuilder.buildAShr(
+          NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
+    } else if (Amt.ugt(NVTBits)) {
+      Lo = MIRBuilder.buildAShr(NVT, InH,
+                                MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
+      Hi = MIRBuilder.buildAShr(NVT, InH,
+                                MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
+    } else if (Amt == NVTBits) {
+      Lo = InH;
+      Hi = MIRBuilder.buildAShr(NVT, InH,
+                                MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
+    } else {
+      auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
+
+      auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
+      auto OrRHS = MIRBuilder.buildShl(
+          NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
+
+      Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
+      Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
+    }
+  }
+
+  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()});
+  MI.eraseFromParent();
+
+  return Legalized;
+}
+
+// TODO: Optimize if constant shift amount.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
+                                   LLT RequestedTy) {
+  if (TypeIdx == 1) {
+    Observer.changingInstr(MI);
+    narrowScalarSrc(MI, RequestedTy, 2);
+    Observer.changedInstr(MI);
+    return Legalized;
+  }
+
+  Register DstReg = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  if (DstTy.isVector())
+    return UnableToLegalize;
+
+  Register Amt = MI.getOperand(2).getReg();
+  LLT ShiftAmtTy = MRI.getType(Amt);
+  const unsigned DstEltSize = DstTy.getScalarSizeInBits();
+  if (DstEltSize % 2 != 0)
+    return UnableToLegalize;
+
+  // Ignore the input type. We can only go to exactly half the size of the
+  // input. If that isn't small enough, the resulting pieces will be further
+  // legalized.
+  const unsigned NewBitSize = DstEltSize / 2;
+  const LLT HalfTy = LLT::scalar(NewBitSize);
+  const LLT CondTy = LLT::scalar(1);
+
+  if (const MachineInstr *KShiftAmt =
+          getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
+    return narrowScalarShiftByConstant(
+        MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
+  }
+
+  // TODO: Expand with known bits.
+
+  // Handle the fully general expansion by an unknown amount.
+  auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
+
+  Register InL = MRI.createGenericVirtualRegister(HalfTy);
+  Register InH = MRI.createGenericVirtualRegister(HalfTy);
+  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
+
+  auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
+  auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
+
+  auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
+  auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
+  auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
+
+  Register ResultRegs[2];
+  switch (MI.getOpcode()) {
+  case TargetOpcode::G_SHL: {
+    // Short: ShAmt < NewBitSize
+    auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt);
+
+    auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt);
+    auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
+    auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
+
+    // Long: ShAmt >= NewBitSize
+    auto LoL = MIRBuilder.buildConstant(HalfTy, 0);         // Lo part is zero.
+    auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
+
+    auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
+    auto Hi = MIRBuilder.buildSelect(
+        HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
+
+    ResultRegs[0] = Lo.getReg(0);
+    ResultRegs[1] = Hi.getReg(0);
+    break;
+  }
+  case TargetOpcode::G_LSHR: {
+    // Short: ShAmt < NewBitSize
+    auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt);
+
+    auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
+    auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
+    auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
+
+    // Long: ShAmt >= NewBitSize
+    auto HiL = MIRBuilder.buildConstant(HalfTy, 0);          // Hi part is zero.
+    auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
+
+    auto Lo = MIRBuilder.buildSelect(
+        HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
+    auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
+
+    ResultRegs[0] = Lo.getReg(0);
+    ResultRegs[1] = Hi.getReg(0);
+    break;
+  }
+  case TargetOpcode::G_ASHR: {
+    // Short: ShAmt < NewBitSize
+    auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt);
+
+    auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
+    auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack);
+    auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
+
+    // Long: ShAmt >= NewBitSize
+
+    // Sign of Hi part.
+    auto HiL = MIRBuilder.buildAShr(
+        HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1));
+
+    auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
+
+    auto Lo = MIRBuilder.buildSelect(
+        HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
+
+    auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
+
+    ResultRegs[0] = Lo.getReg(0);
+    ResultRegs[1] = Hi.getReg(0);
+    break;
+  }
+  default:
+    llvm_unreachable("not a shift");
+  }
+
+  MIRBuilder.buildMerge(DstReg, ResultRegs);
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
+                                       LLT MoreTy) {
+  assert(TypeIdx == 0 && "Expecting only Idx 0");
+
+  Observer.changingInstr(MI);
+  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
+    MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
+    MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
+    moreElementsVectorSrc(MI, MoreTy, I);
+  }
+
+  MachineBasicBlock &MBB = *MI.getParent();
+  MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
+  moreElementsVectorDst(MI, MoreTy, 0);
+  Observer.changedInstr(MI);
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
+                                    LLT MoreTy) {
+  MIRBuilder.setInstr(MI);
+  unsigned Opc = MI.getOpcode();
+  switch (Opc) {
+  case TargetOpcode::G_IMPLICIT_DEF: {
+    Observer.changingInstr(MI);
+    moreElementsVectorDst(MI, MoreTy, 0);
+    Observer.changedInstr(MI);
+    return Legalized;
+  }
+  case TargetOpcode::G_AND:
+  case TargetOpcode::G_OR:
+  case TargetOpcode::G_XOR:
+  case TargetOpcode::G_SMIN:
+  case TargetOpcode::G_SMAX:
+  case TargetOpcode::G_UMIN:
+  case TargetOpcode::G_UMAX: {
+    Observer.changingInstr(MI);
+    moreElementsVectorSrc(MI, MoreTy, 1);
+    moreElementsVectorSrc(MI, MoreTy, 2);
+    moreElementsVectorDst(MI, MoreTy, 0);
+    Observer.changedInstr(MI);
+    return Legalized;
   }
+  case TargetOpcode::G_EXTRACT:
+    if (TypeIdx != 1)
+      return UnableToLegalize;
+    Observer.changingInstr(MI);
+    moreElementsVectorSrc(MI, MoreTy, 1);
+    Observer.changedInstr(MI);
+    return Legalized;
+  case TargetOpcode::G_INSERT:
+    if (TypeIdx != 0)
+      return UnableToLegalize;
+    Observer.changingInstr(MI);
+    moreElementsVectorSrc(MI, MoreTy, 1);
+    moreElementsVectorDst(MI, MoreTy, 0);
+    Observer.changedInstr(MI);
+    return Legalized;
+  case TargetOpcode::G_SELECT:
+    if (TypeIdx != 0)
+      return UnableToLegalize;
+    if (MRI.getType(MI.getOperand(1).getReg()).isVector())
+      return UnableToLegalize;
+
+    Observer.changingInstr(MI);
+    moreElementsVectorSrc(MI, MoreTy, 2);
+    moreElementsVectorSrc(MI, MoreTy, 3);
+    moreElementsVectorDst(MI, MoreTy, 0);
+    Observer.changedInstr(MI);
+    return Legalized;
+  case TargetOpcode::G_PHI:
+    return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
+  default:
+    return UnableToLegalize;
+  }
+}
+
+void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
+                                        ArrayRef<Register> Src1Regs,
+                                        ArrayRef<Register> Src2Regs,
+                                        LLT NarrowTy) {
+  MachineIRBuilder &B = MIRBuilder;
+  unsigned SrcParts = Src1Regs.size();
+  unsigned DstParts = DstRegs.size();
+
+  unsigned DstIdx = 0; // Low bits of the result.
+  Register FactorSum =
+      B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
+  DstRegs[DstIdx] = FactorSum;
+
+  unsigned CarrySumPrevDstIdx;
+  SmallVector<Register, 4> Factors;
+
+  for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
+    // Collect low parts of muls for DstIdx.
+    for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
+         i <= std::min(DstIdx, SrcParts - 1); ++i) {
+      MachineInstrBuilder Mul =
+          B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
+      Factors.push_back(Mul.getReg(0));
+    }
+    // Collect high parts of muls from previous DstIdx.
+    for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
+         i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
+      MachineInstrBuilder Umulh =
+          B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
+      Factors.push_back(Umulh.getReg(0));
+    }
+    // Add CarrySum from additons calculated for previous DstIdx.
+    if (DstIdx != 1) {
+      Factors.push_back(CarrySumPrevDstIdx);
+    }
+
+    Register CarrySum;
+    // Add all factors and accumulate all carries into CarrySum.
+    if (DstIdx != DstParts - 1) {
+      MachineInstrBuilder Uaddo =
+          B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
+      FactorSum = Uaddo.getReg(0);
+      CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
+      for (unsigned i = 2; i < Factors.size(); ++i) {
+        MachineInstrBuilder Uaddo =
+            B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
+        FactorSum = Uaddo.getReg(0);
+        MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
+        CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
+      }
+    } else {
+      // Since value for the next index is not calculated, neither is CarrySum.
+      FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
+      for (unsigned i = 2; i < Factors.size(); ++i)
+        FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
+    }
+
+    CarrySumPrevDstIdx = CarrySum;
+    DstRegs[DstIdx] = FactorSum;
+    Factors.clear();
+  }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
+  Register DstReg = MI.getOperand(0).getReg();
+  Register Src1 = MI.getOperand(1).getReg();
+  Register Src2 = MI.getOperand(2).getReg();
+
+  LLT Ty = MRI.getType(DstReg);
+  if (Ty.isVector())
+    return UnableToLegalize;
+
+  unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
+  unsigned DstSize = Ty.getSizeInBits();
+  unsigned NarrowSize = NarrowTy.getSizeInBits();
+  if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
+    return UnableToLegalize;
+
+  unsigned NumDstParts = DstSize / NarrowSize;
+  unsigned NumSrcParts = SrcSize / NarrowSize;
+  bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
+  unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
+
+  SmallVector<Register, 2> Src1Parts, Src2Parts, DstTmpRegs;
+  extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
+  extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
+  DstTmpRegs.resize(DstTmpParts);
+  multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
+
+  // Take only high half of registers if this is high mul.
+  ArrayRef<Register> DstRegs(
+      IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
+  MIRBuilder.buildMerge(DstReg, DstRegs);
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
+                                     LLT NarrowTy) {
+  if (TypeIdx != 1)
+    return UnableToLegalize;
+
+  uint64_t NarrowSize = NarrowTy.getSizeInBits();
+
+  int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+  // FIXME: add support for when SizeOp1 isn't an exact multiple of
+  // NarrowSize.
+  if (SizeOp1 % NarrowSize != 0)
+    return UnableToLegalize;
+  int NumParts = SizeOp1 / NarrowSize;
+
+  SmallVector<Register, 2> SrcRegs, DstRegs;
+  SmallVector<uint64_t, 2> Indexes;
+  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+
+  Register OpReg = MI.getOperand(0).getReg();
+  uint64_t OpStart = MI.getOperand(2).getImm();
+  uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
+  for (int i = 0; i < NumParts; ++i) {
+    unsigned SrcStart = i * NarrowSize;
+
+    if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
+      // No part of the extract uses this subregister, ignore it.
+      continue;
+    } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
+      // The entire subregister is extracted, forward the value.
+      DstRegs.push_back(SrcRegs[i]);
+      continue;
+    }
+
+    // OpSegStart is where this destination segment would start in OpReg if it
+    // extended infinitely in both directions.
+    int64_t ExtractOffset;
+    uint64_t SegSize;
+    if (OpStart < SrcStart) {
+      ExtractOffset = 0;
+      SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
+    } else {
+      ExtractOffset = OpStart - SrcStart;
+      SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
+    }
+
+    Register SegReg = SrcRegs[i];
+    if (ExtractOffset != 0 || SegSize != NarrowSize) {
+      // A genuine extract is needed.
+      SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
+      MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
+    }
+
+    DstRegs.push_back(SegReg);
+  }
+
+  Register DstReg = MI.getOperand(0).getReg();
+  if(MRI.getType(DstReg).isVector())
+    MIRBuilder.buildBuildVector(DstReg, DstRegs);
+  else
+    MIRBuilder.buildMerge(DstReg, DstRegs);
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
+                                    LLT NarrowTy) {
+  // FIXME: Don't know how to handle secondary types yet.
+  if (TypeIdx != 0)
+    return UnableToLegalize;
+
+  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+  uint64_t NarrowSize = NarrowTy.getSizeInBits();
+
+  // FIXME: add support for when SizeOp0 isn't an exact multiple of
+  // NarrowSize.
+  if (SizeOp0 % NarrowSize != 0)
+    return UnableToLegalize;
+
+  int NumParts = SizeOp0 / NarrowSize;
+
+  SmallVector<Register, 2> SrcRegs, DstRegs;
+  SmallVector<uint64_t, 2> Indexes;
+  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+
+  Register OpReg = MI.getOperand(2).getReg();
+  uint64_t OpStart = MI.getOperand(3).getImm();
+  uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
+  for (int i = 0; i < NumParts; ++i) {
+    unsigned DstStart = i * NarrowSize;
+
+    if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
+      // No part of the insert affects this subregister, forward the original.
+      DstRegs.push_back(SrcRegs[i]);
+      continue;
+    } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
+      // The entire subregister is defined by this insert, forward the new
+      // value.
+      DstRegs.push_back(OpReg);
+      continue;
+    }
+
+    // OpSegStart is where this destination segment would start in OpReg if it
+    // extended infinitely in both directions.
+    int64_t ExtractOffset, InsertOffset;
+    uint64_t SegSize;
+    if (OpStart < DstStart) {
+      InsertOffset = 0;
+      ExtractOffset = DstStart - OpStart;
+      SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
+    } else {
+      InsertOffset = OpStart - DstStart;
+      ExtractOffset = 0;
+      SegSize =
+        std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
+    }
+
+    Register SegReg = OpReg;
+    if (ExtractOffset != 0 || SegSize != OpSize) {
+      // A genuine extract is needed.
+      SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
+      MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
+    }
+
+    Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+    MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
+    DstRegs.push_back(DstReg);
+  }
+
+  assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
+  Register DstReg = MI.getOperand(0).getReg();
+  if(MRI.getType(DstReg).isVector())
+    MIRBuilder.buildBuildVector(DstReg, DstRegs);
+  else
+    MIRBuilder.buildMerge(DstReg, DstRegs);
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
+                                   LLT NarrowTy) {
+  Register DstReg = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+
+  assert(MI.getNumOperands() == 3 && TypeIdx == 0);
+
+  SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
+  SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
+  SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
+  LLT LeftoverTy;
+  if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
+                    Src0Regs, Src0LeftoverRegs))
+    return UnableToLegalize;
+
+  LLT Unused;
+  if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
+                    Src1Regs, Src1LeftoverRegs))
+    llvm_unreachable("inconsistent extractParts result");
+
+  for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
+    auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
+                                        {Src0Regs[I], Src1Regs[I]});
+    DstRegs.push_back(Inst->getOperand(0).getReg());
+  }
+
+  for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
+    auto Inst = MIRBuilder.buildInstr(
+      MI.getOpcode(),
+      {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
+    DstLeftoverRegs.push_back(Inst->getOperand(0).getReg());
+  }
+
+  insertParts(DstReg, DstTy, NarrowTy, DstRegs,
+              LeftoverTy, DstLeftoverRegs);
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
+                                    LLT NarrowTy) {
+  if (TypeIdx != 0)
+    return UnableToLegalize;
+
+  Register CondReg = MI.getOperand(1).getReg();
+  LLT CondTy = MRI.getType(CondReg);
+  if (CondTy.isVector()) // TODO: Handle vselect
+    return UnableToLegalize;
+
+  Register DstReg = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+
+  SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
+  SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
+  SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
+  LLT LeftoverTy;
+  if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
+                    Src1Regs, Src1LeftoverRegs))
+    return UnableToLegalize;
+
+  LLT Unused;
+  if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
+                    Src2Regs, Src2LeftoverRegs))
+    llvm_unreachable("inconsistent extractParts result");
+
+  for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
+    auto Select = MIRBuilder.buildSelect(NarrowTy,
+                                         CondReg, Src1Regs[I], Src2Regs[I]);
+    DstRegs.push_back(Select->getOperand(0).getReg());
+  }
+
+  for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
+    auto Select = MIRBuilder.buildSelect(
+      LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
+    DstLeftoverRegs.push_back(Select->getOperand(0).getReg());
+  }
+
+  insertParts(DstReg, DstTy, NarrowTy, DstRegs,
+              LeftoverTy, DstLeftoverRegs);
+
+  MI.eraseFromParent();
+  return Legalized;
 }
 
 LegalizerHelper::LegalizeResult
@@ -1288,9 +3099,9 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
     return Legalized;
   }
   case TargetOpcode::G_CTLZ: {
-    unsigned SrcReg = MI.getOperand(1).getReg();
+    Register SrcReg = MI.getOperand(1).getReg();
     unsigned Len = Ty.getSizeInBits();
-    if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) {
+    if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
       // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
       auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
                                              {Ty}, {SrcReg});
@@ -1314,7 +3125,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
     // return Len - popcount(x);
     //
     // Ref: "Hacker's Delight" by Henry Warren
-    unsigned Op = SrcReg;
+    Register Op = SrcReg;
     unsigned NewLen = PowerOf2Ceil(Len);
     for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
       auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
@@ -1338,9 +3149,9 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
     return Legalized;
   }
   case TargetOpcode::G_CTTZ: {
-    unsigned SrcReg = MI.getOperand(1).getReg();
+    Register SrcReg = MI.getOperand(1).getReg();
     unsigned Len = Ty.getSizeInBits();
-    if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) {
+    if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
       // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
       // zero.
       auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
@@ -1365,8 +3176,8 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
         TargetOpcode::G_AND, {Ty},
         {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
                                        {SrcReg, MIBCstNeg1})});
-    if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) &&
-        isSupported({TargetOpcode::G_CTLZ, {Ty}})) {
+    if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
+        isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
       auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
       MIRBuilder.buildInstr(
           TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
@@ -1381,3 +3192,230 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
   }
   }
 }
+
+// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
+// representation.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
+  Register Dst = MI.getOperand(0).getReg();
+  Register Src = MI.getOperand(1).getReg();
+  const LLT S64 = LLT::scalar(64);
+  const LLT S32 = LLT::scalar(32);
+  const LLT S1 = LLT::scalar(1);
+
+  assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
+
+  // unsigned cul2f(ulong u) {
+  //   uint lz = clz(u);
+  //   uint e = (u != 0) ? 127U + 63U - lz : 0;
+  //   u = (u << lz) & 0x7fffffffffffffffUL;
+  //   ulong t = u & 0xffffffffffUL;
+  //   uint v = (e << 23) | (uint)(u >> 40);
+  //   uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
+  //   return as_float(v + r);
+  // }
+
+  auto Zero32 = MIRBuilder.buildConstant(S32, 0);
+  auto Zero64 = MIRBuilder.buildConstant(S64, 0);
+
+  auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
+
+  auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
+  auto Sub = MIRBuilder.buildSub(S32, K, LZ);
+
+  auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
+  auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
+
+  auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
+  auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
+
+  auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
+
+  auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
+  auto T = MIRBuilder.buildAnd(S64, U, Mask1);
+
+  auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
+  auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
+  auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
+
+  auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
+  auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
+  auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
+  auto One = MIRBuilder.buildConstant(S32, 1);
+
+  auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
+  auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
+  auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
+  MIRBuilder.buildAdd(Dst, V, R);
+
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+  Register Dst = MI.getOperand(0).getReg();
+  Register Src = MI.getOperand(1).getReg();
+  LLT DstTy = MRI.getType(Dst);
+  LLT SrcTy = MRI.getType(Src);
+
+  if (SrcTy != LLT::scalar(64))
+    return UnableToLegalize;
+
+  if (DstTy == LLT::scalar(32)) {
+    // TODO: SelectionDAG has several alternative expansions to port which may
+    // be more reasonble depending on the available instructions. If a target
+    // has sitofp, does not have CTLZ, or can efficiently use f64 as an
+    // intermediate type, this is probably worse.
+    return lowerU64ToF32BitOps(MI);
+  }
+
+  return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+  Register Dst = MI.getOperand(0).getReg();
+  Register Src = MI.getOperand(1).getReg();
+  LLT DstTy = MRI.getType(Dst);
+  LLT SrcTy = MRI.getType(Src);
+
+  const LLT S64 = LLT::scalar(64);
+  const LLT S32 = LLT::scalar(32);
+  const LLT S1 = LLT::scalar(1);
+
+  if (SrcTy != S64)
+    return UnableToLegalize;
+
+  if (DstTy == S32) {
+    // signed cl2f(long l) {
+    //   long s = l >> 63;
+    //   float r = cul2f((l + s) ^ s);
+    //   return s ? -r : r;
+    // }
+    Register L = Src;
+    auto SignBit = MIRBuilder.buildConstant(S64, 63);
+    auto S = MIRBuilder.buildAShr(S64, L, SignBit);
+
+    auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
+    auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
+    auto R = MIRBuilder.buildUITOFP(S32, Xor);
+
+    auto RNeg = MIRBuilder.buildFNeg(S32, R);
+    auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
+                                            MIRBuilder.buildConstant(S64, 0));
+    MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
+    return Legalized;
+  }
+
+  return UnableToLegalize;
+}
+
+static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
+  switch (Opc) {
+  case TargetOpcode::G_SMIN:
+    return CmpInst::ICMP_SLT;
+  case TargetOpcode::G_SMAX:
+    return CmpInst::ICMP_SGT;
+  case TargetOpcode::G_UMIN:
+    return CmpInst::ICMP_ULT;
+  case TargetOpcode::G_UMAX:
+    return CmpInst::ICMP_UGT;
+  default:
+    llvm_unreachable("not in integer min/max");
+  }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+  Register Dst = MI.getOperand(0).getReg();
+  Register Src0 = MI.getOperand(1).getReg();
+  Register Src1 = MI.getOperand(2).getReg();
+
+  const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
+  LLT CmpType = MRI.getType(Dst).changeElementSize(1);
+
+  auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
+  MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+  Register Dst = MI.getOperand(0).getReg();
+  Register Src0 = MI.getOperand(1).getReg();
+  Register Src1 = MI.getOperand(2).getReg();
+
+  const LLT Src0Ty = MRI.getType(Src0);
+  const LLT Src1Ty = MRI.getType(Src1);
+
+  const int Src0Size = Src0Ty.getScalarSizeInBits();
+  const int Src1Size = Src1Ty.getScalarSizeInBits();
+
+  auto SignBitMask = MIRBuilder.buildConstant(
+    Src0Ty, APInt::getSignMask(Src0Size));
+
+  auto NotSignBitMask = MIRBuilder.buildConstant(
+    Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
+
+  auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask);
+  MachineInstr *Or;
+
+  if (Src0Ty == Src1Ty) {
+    auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask);
+    Or = MIRBuilder.buildOr(Dst, And0, And1);
+  } else if (Src0Size > Src1Size) {
+    auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
+    auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
+    auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
+    auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask);
+    Or = MIRBuilder.buildOr(Dst, And0, And1);
+  } else {
+    auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
+    auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
+    auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
+    auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask);
+    Or = MIRBuilder.buildOr(Dst, And0, And1);
+  }
+
+  // Be careful about setting nsz/nnan/ninf on every instruction, since the
+  // constants are a nan and -0.0, but the final result should preserve
+  // everything.
+  if (unsigned Flags = MI.getFlags())
+    Or->setFlags(Flags);
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
+  unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
+    TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
+
+  Register Dst = MI.getOperand(0).getReg();
+  Register Src0 = MI.getOperand(1).getReg();
+  Register Src1 = MI.getOperand(2).getReg();
+  LLT Ty = MRI.getType(Dst);
+
+  if (!MI.getFlag(MachineInstr::FmNoNans)) {
+    // Insert canonicalizes if it's possible we need to quiet to get correct
+    // sNaN behavior.
+
+    // Note this must be done here, and not as an optimization combine in the
+    // absence of a dedicate quiet-snan instruction as we're using an
+    // omni-purpose G_FCANONICALIZE.
+    if (!isKnownNeverSNaN(Src0, MRI))
+      Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
+
+    if (!isKnownNeverSNaN(Src1, MRI))
+      Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
+  }
+
+  // If there are no nans, it's safe to simply replace this with the non-IEEE
+  // version.
+  MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
+  MI.eraseFromParent();
+  return Legalized;
+}
diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index fa36ede5b976..6e1de95b3277 100644
--- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -1,9 +1,8 @@
 //===- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -43,6 +42,45 @@ cl::opt<bool> llvm::DisableGISelLegalityCheck(
     cl::desc("Don't verify that MIR is fully legal between GlobalISel passes"),
     cl::Hidden);
 
+raw_ostream &llvm::operator<<(raw_ostream &OS, LegalizeAction Action) {
+  switch (Action) {
+  case Legal:
+    OS << "Legal";
+    break;
+  case NarrowScalar:
+    OS << "NarrowScalar";
+    break;
+  case WidenScalar:
+    OS << "WidenScalar";
+    break;
+  case FewerElements:
+    OS << "FewerElements";
+    break;
+  case MoreElements:
+    OS << "MoreElements";
+    break;
+  case Lower:
+    OS << "Lower";
+    break;
+  case Libcall:
+    OS << "Libcall";
+    break;
+  case Custom:
+    OS << "Custom";
+    break;
+  case Unsupported:
+    OS << "Unsupported";
+    break;
+  case NotFound:
+    OS << "NotFound";
+    break;
+  case UseLegacyRules:
+    OS << "UseLegacyRules";
+    break;
+  }
+  return OS;
+}
+
 raw_ostream &LegalityQuery::print(raw_ostream &OS) const {
   OS << Opcode << ", Tys={";
   for (const auto &Type : Types) {
@@ -59,6 +97,86 @@ raw_ostream &LegalityQuery::print(raw_ostream &OS) const {
   return OS;
 }
 
+#ifndef NDEBUG
+// Make sure the rule won't (trivially) loop forever.
+static bool hasNoSimpleLoops(const LegalizeRule &Rule, const LegalityQuery &Q,
+                             const std::pair<unsigned, LLT> &Mutation) {
+  switch (Rule.getAction()) {
+  case Custom:
+  case Lower:
+  case MoreElements:
+  case FewerElements:
+    break;
+  default:
+    return Q.Types[Mutation.first] != Mutation.second;
+  }
+  return true;
+}
+
+// Make sure the returned mutation makes sense for the match type.
+static bool mutationIsSane(const LegalizeRule &Rule,
+                           const LegalityQuery &Q,
+                           std::pair<unsigned, LLT> Mutation) {
+  // If the user wants a custom mutation, then we can't really say much about
+  // it. Return true, and trust that they're doing the right thing.
+  if (Rule.getAction() == Custom)
+    return true;
+
+  const unsigned TypeIdx = Mutation.first;
+  const LLT OldTy = Q.Types[TypeIdx];
+  const LLT NewTy = Mutation.second;
+
+  switch (Rule.getAction()) {
+  case FewerElements:
+  case MoreElements: {
+    if (!OldTy.isVector())
+      return false;
+
+    if (NewTy.isVector()) {
+      if (Rule.getAction() == FewerElements) {
+        // Make sure the element count really decreased.
+        if (NewTy.getNumElements() >= OldTy.getNumElements())
+          return false;
+      } else {
+        // Make sure the element count really increased.
+        if (NewTy.getNumElements() <= OldTy.getNumElements())
+          return false;
+      }
+    }
+
+    // Make sure the element type didn't change.
+    return NewTy.getScalarType() == OldTy.getElementType();
+  }
+  case NarrowScalar:
+  case WidenScalar: {
+    if (OldTy.isVector()) {
+      // Number of elements should not change.
+      if (!NewTy.isVector() || OldTy.getNumElements() != NewTy.getNumElements())
+        return false;
+    } else {
+      // Both types must be vectors
+      if (NewTy.isVector())
+        return false;
+    }
+
+    if (Rule.getAction() == NarrowScalar)  {
+      // Make sure the size really decreased.
+      if (NewTy.getScalarSizeInBits() >= OldTy.getScalarSizeInBits())
+        return false;
+    } else {
+      // Make sure the size really increased.
+      if (NewTy.getScalarSizeInBits() <= OldTy.getScalarSizeInBits())
+        return false;
+    }
+
+    return true;
+  }
+  default:
+    return true;
+  }
+}
+#endif
+
 LegalizeActionStep LegalizeRuleSet::apply(const LegalityQuery &Query) const {
   LLVM_DEBUG(dbgs() << "Applying legalizer ruleset to: "; Query.print(dbgs());
              dbgs() << "\n");
@@ -66,17 +184,15 @@ LegalizeActionStep LegalizeRuleSet::apply(const LegalityQuery &Query) const {
     LLVM_DEBUG(dbgs() << ".. fallback to legacy rules (no rules defined)\n");
     return {LegalizeAction::UseLegacyRules, 0, LLT{}};
   }
-  for (const auto &Rule : Rules) {
+  for (const LegalizeRule &Rule : Rules) {
     if (Rule.match(Query)) {
       LLVM_DEBUG(dbgs() << ".. match\n");
       std::pair<unsigned, LLT> Mutation = Rule.determineMutation(Query);
-      LLVM_DEBUG(dbgs() << ".. .. " << (unsigned)Rule.getAction() << ", "
+      LLVM_DEBUG(dbgs() << ".. .. " << Rule.getAction() << ", "
                         << Mutation.first << ", " << Mutation.second << "\n");
-      assert((Query.Types[Mutation.first] != Mutation.second ||
-              Rule.getAction() == Lower ||
-              Rule.getAction() == MoreElements ||
-              Rule.getAction() == FewerElements) &&
-             "Simple loop detected");
+      assert(mutationIsSane(Rule, Query, Mutation) &&
+             "legality mutation invalid for match");
+      assert(hasNoSimpleLoops(Rule, Query, Mutation) && "Simple loop detected");
       return {Rule.getAction(), Mutation.first, Mutation.second};
     } else
       LLVM_DEBUG(dbgs() << ".. no match\n");
@@ -180,16 +296,14 @@ void LegalizerInfo::computeTables() {
         if (TypeIdx < ScalarSizeChangeStrategies[OpcodeIdx].size() &&
             ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr)
           S = ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx];
-        llvm::sort(ScalarSpecifiedActions.begin(),
-                   ScalarSpecifiedActions.end());
+        llvm::sort(ScalarSpecifiedActions);
         checkPartialSizeAndActionsVector(ScalarSpecifiedActions);
         setScalarAction(Opcode, TypeIdx, S(ScalarSpecifiedActions));
       }
 
       // 2. Handle pointer types
       for (auto PointerSpecifiedActions : AddressSpace2SpecifiedActions) {
-        llvm::sort(PointerSpecifiedActions.second.begin(),
-                   PointerSpecifiedActions.second.end());
+        llvm::sort(PointerSpecifiedActions.second);
         checkPartialSizeAndActionsVector(PointerSpecifiedActions.second);
         // For pointer types, we assume that there isn't a meaningfull way
         // to change the number of bits used in the pointer.
@@ -201,8 +315,7 @@ void LegalizerInfo::computeTables() {
       // 3. Handle vector types
       SizeAndActionsVec ElementSizesSeen;
       for (auto VectorSpecifiedActions : ElemSize2SpecifiedActions) {
-        llvm::sort(VectorSpecifiedActions.second.begin(),
-                   VectorSpecifiedActions.second.end());
+        llvm::sort(VectorSpecifiedActions.second);
         const uint16_t ElementSize = VectorSpecifiedActions.first;
         ElementSizesSeen.push_back({ElementSize, Legal});
         checkPartialSizeAndActionsVector(VectorSpecifiedActions.second);
@@ -328,9 +441,8 @@ LegalizerInfo::getAction(const LegalityQuery &Query) const {
   for (unsigned i = 0; i < Query.Types.size(); ++i) {
     auto Action = getAspectAction({Query.Opcode, i, Query.Types[i]});
     if (Action.first != Legal) {
-      LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i
-                        << " Action=" << (unsigned)Action.first << ", "
-                        << Action.second << "\n");
+      LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Action="
+                        << Action.first << ", " << Action.second << "\n");
       return {Action.first, i, Action.second};
     } else
       LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Legal\n");
@@ -364,8 +476,9 @@ LegalizerInfo::getAction(const MachineInstr &MI,
 
   SmallVector<LegalityQuery::MemDesc, 2> MemDescrs;
   for (const auto &MMO : MI.memoperands())
-    MemDescrs.push_back(
-        {MMO->getSize() /* in bytes */ * 8, MMO->getOrdering()});
+    MemDescrs.push_back({8 * MMO->getSize() /* in bits */,
+                         8 * MMO->getAlignment(),
+                         MMO->getOrdering()});
 
   return getAction({MI.getOpcode(), Types, MemDescrs});
 }
@@ -375,6 +488,14 @@ bool LegalizerInfo::isLegal(const MachineInstr &MI,
   return getAction(MI, MRI).Action == Legal;
 }
 
+bool LegalizerInfo::isLegalOrCustom(const MachineInstr &MI,
+                                    const MachineRegisterInfo &MRI) const {
+  auto Action = getAction(MI, MRI).Action;
+  // If the action is custom, it may not necessarily modify the instruction,
+  // so we have to assume it's legal.
+  return Action == Legal || Action == Custom;
+}
+
 bool LegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
                                    MachineIRBuilder &MIRBuilder,
                                    GISelChangeObserver &Observer) const {
@@ -423,14 +544,10 @@ LegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Size) {
   // Find the last element in Vec that has a bitsize equal to or smaller than
   // the requested bit size.
   // That is the element just before the first element that is bigger than Size.
-  auto VecIt = std::upper_bound(
-      Vec.begin(), Vec.end(), Size,
-      [](const uint32_t Size, const SizeAndAction lhs) -> bool {
-        return Size < lhs.first;
-      });
-  assert(VecIt != Vec.begin() && "Does Vec not start with size 1?");
-  --VecIt;
-  int VecIdx = VecIt - Vec.begin();
+  auto It = partition_point(
+      Vec, [=](const SizeAndAction &A) { return A.first <= Size; });
+  assert(It != Vec.begin() && "Does Vec not start with size 1?");
+  int VecIdx = It - Vec.begin() - 1;
 
   LegalizeAction Action = Vec[VecIdx].second;
   switch (Action) {
@@ -541,6 +658,12 @@ LegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const {
                       IntermediateType.getScalarSizeInBits())};
 }
 
+bool LegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
+                                      MachineRegisterInfo &MRI,
+                                      MachineIRBuilder &MIRBuilder) const {
+  return true;
+}
+
 /// \pre Type indices of every opcode form a dense set starting from 0.
 void LegalizerInfo::verify(const MCInstrInfo &MII) const {
 #ifndef NDEBUG
@@ -584,7 +707,8 @@ const MachineInstr *llvm::machineFunctionIsIllegal(const MachineFunction &MF) {
     const MachineRegisterInfo &MRI = MF.getRegInfo();
     for (const MachineBasicBlock &MBB : MF)
       for (const MachineInstr &MI : MBB)
-        if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI))
+        if (isPreISelGenericOpcode(MI.getOpcode()) &&
+            !MLI->isLegalOrCustom(MI, MRI))
           return &MI;
   }
   return nullptr;
diff --git a/lib/CodeGen/GlobalISel/Localizer.cpp b/lib/CodeGen/GlobalISel/Localizer.cpp
index 52b340753a50..3592409710a7 100644
--- a/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -1,9 +1,8 @@
 //===- Localizer.cpp ---------------------- Localize some instrs -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -11,8 +10,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GlobalISel/Localizer.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 
@@ -21,17 +20,53 @@
 using namespace llvm;
 
 char Localizer::ID = 0;
-INITIALIZE_PASS(Localizer, DEBUG_TYPE,
-                "Move/duplicate certain instructions close to their use", false,
-                false)
+INITIALIZE_PASS_BEGIN(Localizer, DEBUG_TYPE,
+                      "Move/duplicate certain instructions close to their use",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(Localizer, DEBUG_TYPE,
+                    "Move/duplicate certain instructions close to their use",
+                    false, false)
 
-Localizer::Localizer() : MachineFunctionPass(ID) {
-  initializeLocalizerPass(*PassRegistry::getPassRegistry());
-}
+Localizer::Localizer() : MachineFunctionPass(ID) { }
 
-void Localizer::init(MachineFunction &MF) { MRI = &MF.getRegInfo(); }
+void Localizer::init(MachineFunction &MF) {
+  MRI = &MF.getRegInfo();
+  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(MF.getFunction());
+}
 
 bool Localizer::shouldLocalize(const MachineInstr &MI) {
+  // Assuming a spill and reload of a value has a cost of 1 instruction each,
+  // this helper function computes the maximum number of uses we should consider
+  // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We
+  // break even in terms of code size when the original MI has 2 users vs
+  // choosing to potentially spill. Any more than 2 users we we have a net code
+  // size increase. This doesn't take into account register pressure though.
+  auto maxUses = [](unsigned RematCost) {
+    // A cost of 1 means remats are basically free.
+    if (RematCost == 1)
+      return UINT_MAX;
+    if (RematCost == 2)
+      return 2U;
+
+    // Remat is too expensive, only sink if there's one user.
+    if (RematCost > 2)
+      return 1U;
+    llvm_unreachable("Unexpected remat cost");
+  };
+
+  // Helper to walk through uses and terminate if we've reached a limit. Saves
+  // us spending time traversing uses if all we want to know is if it's >= min.
+  auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) {
+    unsigned NumUses = 0;
+    auto UI = MRI->use_instr_nodbg_begin(Reg), UE = MRI->use_instr_nodbg_end();
+    for (; UI != UE && NumUses < MaxUses; ++UI) {
+      NumUses++;
+    }
+    // If we haven't reached the end yet then there are more than MaxUses users.
+    return UI == UE;
+  };
+
   switch (MI.getOpcode()) {
   default:
     return false;
@@ -40,11 +75,22 @@ bool Localizer::shouldLocalize(const MachineInstr &MI) {
   case TargetOpcode::G_CONSTANT:
   case TargetOpcode::G_FCONSTANT:
   case TargetOpcode::G_FRAME_INDEX:
+  case TargetOpcode::G_INTTOPTR:
     return true;
+  case TargetOpcode::G_GLOBAL_VALUE: {
+    unsigned RematCost = TTI->getGISelRematGlobalCost();
+    unsigned Reg = MI.getOperand(0).getReg();
+    unsigned MaxUses = maxUses(RematCost);
+    if (MaxUses == UINT_MAX)
+      return true; // Remats are "free" so always localize.
+    bool B = isUsesAtMost(Reg, MaxUses);
+    return B;
+  }
   }
 }
 
 void Localizer::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetTransformInfoWrapperPass>();
   getSelectionDAGFallbackAnalysisUsage(AU);
   MachineFunctionPass::getAnalysisUsage(AU);
 }
@@ -58,6 +104,107 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,
   return InsertMBB == Def.getParent();
 }
 
+bool Localizer::localizeInterBlock(MachineFunction &MF,
+                                   LocalizedSetVecT &LocalizedInstrs) {
+  bool Changed = false;
+  DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef;
+
+  // Since the IRTranslator only emits constants into the entry block, and the
+  // rest of the GISel pipeline generally emits constants close to their users,
+  // we only localize instructions in the entry block here. This might change if
+  // we start doing CSE across blocks.
+  auto &MBB = MF.front();
+  for (auto RI = MBB.rbegin(), RE = MBB.rend(); RI != RE; ++RI) {
+    MachineInstr &MI = *RI;
+    if (!shouldLocalize(MI))
+      continue;
+    LLVM_DEBUG(dbgs() << "Should localize: " << MI);
+    assert(MI.getDesc().getNumDefs() == 1 &&
+           "More than one definition not supported yet");
+    unsigned Reg = MI.getOperand(0).getReg();
+    // Check if all the users of MI are local.
+    // We are going to invalidation the list of use operands, so we
+    // can't use range iterator.
+    for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end();
+         MOIt != MOItEnd;) {
+      MachineOperand &MOUse = *MOIt++;
+      // Check if the use is already local.
+      MachineBasicBlock *InsertMBB;
+      LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
+                 dbgs() << "Checking use: " << MIUse
+                        << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
+      if (isLocalUse(MOUse, MI, InsertMBB))
+        continue;
+      LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
+      Changed = true;
+      auto MBBAndReg = std::make_pair(InsertMBB, Reg);
+      auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg);
+      if (NewVRegIt == MBBWithLocalDef.end()) {
+        // Create the localized instruction.
+        MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI);
+        LocalizedInstrs.insert(LocalizedMI);
+        MachineInstr &UseMI = *MOUse.getParent();
+        if (MRI->hasOneUse(Reg) && !UseMI.isPHI())
+          InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(UseMI), LocalizedMI);
+        else
+          InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()),
+                            LocalizedMI);
+
+        // Set a new register for the definition.
+        unsigned NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg));
+        MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg));
+        LocalizedMI->getOperand(0).setReg(NewReg);
+        NewVRegIt =
+            MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
+        LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
+      }
+      LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second)
+                        << '\n');
+      // Update the user reg.
+      MOUse.setReg(NewVRegIt->second);
+    }
+  }
+  return Changed;
+}
+
+bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) {
+  bool Changed = false;
+
+  // For each already-localized instruction which has multiple users, then we
+  // scan the block top down from the current position until we hit one of them.
+
+  // FIXME: Consider doing inst duplication if live ranges are very long due to
+  // many users, but this case may be better served by regalloc improvements.
+
+  for (MachineInstr *MI : LocalizedInstrs) {
+    unsigned Reg = MI->getOperand(0).getReg();
+    MachineBasicBlock &MBB = *MI->getParent();
+    // All of the user MIs of this reg.
+    SmallPtrSet<MachineInstr *, 32> Users;
+    for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
+      if (!UseMI.isPHI())
+        Users.insert(&UseMI);
+    }
+    // If all the users were PHIs then they're not going to be in our block,
+    // don't try to move this instruction.
+    if (Users.empty())
+      continue;
+
+    MachineBasicBlock::iterator II(MI);
+    ++II;
+    while (II != MBB.end() && !Users.count(&*II))
+      ++II;
+
+    LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *&*II
+                      << "\n");
+    assert(II != MBB.end() && "Didn't find the user in the MBB");
+    MI->removeFromParent();
+    MBB.insert(II, MI);
+    Changed = true;
+  }
+  return Changed;
+}
+
 bool Localizer::runOnMachineFunction(MachineFunction &MF) {
   // If the ISel pipeline failed, do not bother running that pass.
   if (MF.getProperties().hasProperty(
@@ -68,62 +215,10 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {
 
   init(MF);
 
-  bool Changed = false;
-  // Keep track of the instructions we localized.
-  // We won't need to process them if we see them later in the CFG.
-  SmallPtrSet<MachineInstr *, 16> LocalizedInstrs;
-  DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef;
-  // TODO: Do bottom up traversal.
-  for (MachineBasicBlock &MBB : MF) {
-    for (MachineInstr &MI : MBB) {
-      if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI))
-        continue;
-      LLVM_DEBUG(dbgs() << "Should localize: " << MI);
-      assert(MI.getDesc().getNumDefs() == 1 &&
-             "More than one definition not supported yet");
-      unsigned Reg = MI.getOperand(0).getReg();
-      // Check if all the users of MI are local.
-      // We are going to invalidation the list of use operands, so we
-      // can't use range iterator.
-      for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end();
-           MOIt != MOItEnd;) {
-        MachineOperand &MOUse = *MOIt++;
-        // Check if the use is already local.
-        MachineBasicBlock *InsertMBB;
-        LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
-                   dbgs() << "Checking use: " << MIUse
-                          << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
-        if (isLocalUse(MOUse, MI, InsertMBB))
-          continue;
-        LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
-        Changed = true;
-        auto MBBAndReg = std::make_pair(InsertMBB, Reg);
-        auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg);
-        if (NewVRegIt == MBBWithLocalDef.end()) {
-          // Create the localized instruction.
-          MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI);
-          LocalizedInstrs.insert(LocalizedMI);
-          // Don't try to be smart for the insertion point.
-          // There is no guarantee that the first seen use is the first
-          // use in the block.
-          InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()),
-                            LocalizedMI);
+  // Keep track of the instructions we localized. We'll do a second pass of
+  // intra-block localization to further reduce live ranges.
+  LocalizedSetVecT LocalizedInstrs;
 
-          // Set a new register for the definition.
-          unsigned NewReg =
-              MRI->createGenericVirtualRegister(MRI->getType(Reg));
-          MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg));
-          LocalizedMI->getOperand(0).setReg(NewReg);
-          NewVRegIt =
-              MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
-          LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
-        }
-        LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second)
-                          << '\n');
-        // Update the user reg.
-        MOUse.setReg(NewVRegIt->second);
-      }
-    }
-  }
-  return Changed;
+  bool Changed = localizeInterBlock(MF, LocalizedInstrs);
+  return Changed |= localizeIntraBlock(LocalizedInstrs);
 }
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 1f5611061994..b7a73326b85c 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/GlobalISel/MachineIRBuilder.cpp - MIBuilder--*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -17,6 +16,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DebugInfo.h"
@@ -87,7 +87,7 @@ MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) {
 }
 
 MachineInstrBuilder
-MachineIRBuilder::buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
+MachineIRBuilder::buildDirectDbgValue(Register Reg, const MDNode *Variable,
                                       const MDNode *Expr) {
   assert(isa<DILocalVariable>(Variable) && "not a variable");
   assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
@@ -100,7 +100,7 @@ MachineIRBuilder::buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
 }
 
 MachineInstrBuilder
-MachineIRBuilder::buildIndirectDbgValue(unsigned Reg, const MDNode *Variable,
+MachineIRBuilder::buildIndirectDbgValue(Register Reg, const MDNode *Variable,
                                         const MDNode *Expr) {
   assert(isa<DILocalVariable>(Variable) && "not a variable");
   assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
@@ -160,23 +160,32 @@ MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) {
   return MIB.addMetadata(Label);
 }
 
-MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) {
-  assert(getMRI()->getType(Res).isPointer() && "invalid operand type");
-  return buildInstr(TargetOpcode::G_FRAME_INDEX)
-      .addDef(Res)
-      .addFrameIndex(Idx);
+MachineInstrBuilder MachineIRBuilder::buildFrameIndex(const DstOp &Res,
+                                                      int Idx) {
+  assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
+  auto MIB = buildInstr(TargetOpcode::G_FRAME_INDEX);
+  Res.addDefToMIB(*getMRI(), MIB);
+  MIB.addFrameIndex(Idx);
+  return MIB;
 }
 
-MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res,
+MachineInstrBuilder MachineIRBuilder::buildGlobalValue(const DstOp &Res,
                                                        const GlobalValue *GV) {
-  assert(getMRI()->getType(Res).isPointer() && "invalid operand type");
-  assert(getMRI()->getType(Res).getAddressSpace() ==
+  assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
+  assert(Res.getLLTTy(*getMRI()).getAddressSpace() ==
              GV->getType()->getAddressSpace() &&
          "address space mismatch");
 
-  return buildInstr(TargetOpcode::G_GLOBAL_VALUE)
-      .addDef(Res)
-      .addGlobalAddress(GV);
+  auto MIB = buildInstr(TargetOpcode::G_GLOBAL_VALUE);
+  Res.addDefToMIB(*getMRI(), MIB);
+  MIB.addGlobalAddress(GV);
+  return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildJumpTable(const LLT PtrTy,
+                                                     unsigned JTI) {
+  return buildInstr(TargetOpcode::G_JUMP_TABLE, {PtrTy}, {})
+      .addJumpTableIndex(JTI);
 }
 
 void MachineIRBuilder::validateBinaryOp(const LLT &Res, const LLT &Op0,
@@ -185,20 +194,28 @@ void MachineIRBuilder::validateBinaryOp(const LLT &Res, const LLT &Op0,
   assert((Res == Op0 && Res == Op1) && "type mismatch");
 }
 
-MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,
-                                               unsigned Op1) {
-  assert(getMRI()->getType(Res).isPointer() &&
-         getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch");
-  assert(getMRI()->getType(Op1).isScalar() && "invalid offset type");
+void MachineIRBuilder::validateShiftOp(const LLT &Res, const LLT &Op0,
+                                       const LLT &Op1) {
+  assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
+  assert((Res == Op0) && "type mismatch");
+}
+
+MachineInstrBuilder MachineIRBuilder::buildGEP(const DstOp &Res,
+                                               const SrcOp &Op0,
+                                               const SrcOp &Op1) {
+  assert(Res.getLLTTy(*getMRI()).isPointer() &&
+         Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch");
+  assert(Op1.getLLTTy(*getMRI()).isScalar() && "invalid offset type");
 
-  return buildInstr(TargetOpcode::G_GEP)
-      .addDef(Res)
-      .addUse(Op0)
-      .addUse(Op1);
+  auto MIB = buildInstr(TargetOpcode::G_GEP);
+  Res.addDefToMIB(*getMRI(), MIB);
+  Op0.addSrcToMIB(MIB);
+  Op1.addSrcToMIB(MIB);
+  return MIB;
 }
 
 Optional<MachineInstrBuilder>
-MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0,
+MachineIRBuilder::materializeGEP(Register &Res, Register Op0,
                                  const LLT &ValueTy, uint64_t Value) {
   assert(Res == 0 && "Res is a result argument");
   assert(ValueTy.isScalar()  && "invalid offset type");
@@ -209,32 +226,43 @@ MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0,
   }
 
   Res = getMRI()->createGenericVirtualRegister(getMRI()->getType(Op0));
-  unsigned TmpReg = getMRI()->createGenericVirtualRegister(ValueTy);
-
-  buildConstant(TmpReg, Value);
-  return buildGEP(Res, Op0, TmpReg);
+  auto Cst = buildConstant(ValueTy, Value);
+  return buildGEP(Res, Op0, Cst.getReg(0));
 }
 
-MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0,
+MachineInstrBuilder MachineIRBuilder::buildPtrMask(const DstOp &Res,
+                                                   const SrcOp &Op0,
                                                    uint32_t NumBits) {
-  assert(getMRI()->getType(Res).isPointer() &&
-         getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch");
+  assert(Res.getLLTTy(*getMRI()).isPointer() &&
+         Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch");
 
-  return buildInstr(TargetOpcode::G_PTR_MASK)
-      .addDef(Res)
-      .addUse(Op0)
-      .addImm(NumBits);
+  auto MIB = buildInstr(TargetOpcode::G_PTR_MASK);
+  Res.addDefToMIB(*getMRI(), MIB);
+  Op0.addSrcToMIB(MIB);
+  MIB.addImm(NumBits);
+  return MIB;
 }
 
 MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
   return buildInstr(TargetOpcode::G_BR).addMBB(&Dest);
 }
 
-MachineInstrBuilder MachineIRBuilder::buildBrIndirect(unsigned Tgt) {
+MachineInstrBuilder MachineIRBuilder::buildBrIndirect(Register Tgt) {
   assert(getMRI()->getType(Tgt).isPointer() && "invalid branch destination");
   return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt);
 }
 
+MachineInstrBuilder MachineIRBuilder::buildBrJT(Register TablePtr,
+                                                unsigned JTI,
+                                                Register IndexReg) {
+  assert(getMRI()->getType(TablePtr).isPointer() &&
+         "Table reg must be a pointer");
+  return buildInstr(TargetOpcode::G_BRJT)
+      .addUse(TablePtr)
+      .addJumpTableIndex(JTI)
+      .addUse(IndexReg);
+}
+
 MachineInstrBuilder MachineIRBuilder::buildCopy(const DstOp &Res,
                                                 const SrcOp &Op) {
   return buildInstr(TargetOpcode::COPY, Res, Op);
@@ -243,36 +271,60 @@ MachineInstrBuilder MachineIRBuilder::buildCopy(const DstOp &Res,
 MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
                                                     const ConstantInt &Val) {
   LLT Ty = Res.getLLTTy(*getMRI());
+  LLT EltTy = Ty.getScalarType();
+  assert(EltTy.getScalarSizeInBits() == Val.getBitWidth() &&
+         "creating constant with the wrong size");
+
+  if (Ty.isVector()) {
+    auto Const = buildInstr(TargetOpcode::G_CONSTANT)
+    .addDef(getMRI()->createGenericVirtualRegister(EltTy))
+    .addCImm(&Val);
+    return buildSplatVector(Res, Const);
+  }
 
-  assert((Ty.isScalar() || Ty.isPointer()) && "invalid operand type");
-
-  const ConstantInt *NewVal = &Val;
-  if (Ty.getSizeInBits() != Val.getBitWidth())
-    NewVal = ConstantInt::get(getMF().getFunction().getContext(),
-                              Val.getValue().sextOrTrunc(Ty.getSizeInBits()));
-
-  auto MIB = buildInstr(TargetOpcode::G_CONSTANT);
-  Res.addDefToMIB(*getMRI(), MIB);
-  MIB.addCImm(NewVal);
-  return MIB;
+  auto Const = buildInstr(TargetOpcode::G_CONSTANT);
+  Res.addDefToMIB(*getMRI(), Const);
+  Const.addCImm(&Val);
+  return Const;
 }
 
 MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
                                                     int64_t Val) {
   auto IntN = IntegerType::get(getMF().getFunction().getContext(),
-                               Res.getLLTTy(*getMRI()).getSizeInBits());
+                               Res.getLLTTy(*getMRI()).getScalarSizeInBits());
   ConstantInt *CI = ConstantInt::get(IntN, Val, true);
   return buildConstant(Res, *CI);
 }
 
 MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
                                                      const ConstantFP &Val) {
-  assert(Res.getLLTTy(*getMRI()).isScalar() && "invalid operand type");
+  LLT Ty = Res.getLLTTy(*getMRI());
+  LLT EltTy = Ty.getScalarType();
 
-  auto MIB = buildInstr(TargetOpcode::G_FCONSTANT);
-  Res.addDefToMIB(*getMRI(), MIB);
-  MIB.addFPImm(&Val);
-  return MIB;
+  assert(APFloat::getSizeInBits(Val.getValueAPF().getSemantics())
+         == EltTy.getSizeInBits() &&
+         "creating fconstant with the wrong size");
+
+  assert(!Ty.isPointer() && "invalid operand type");
+
+  if (Ty.isVector()) {
+    auto Const = buildInstr(TargetOpcode::G_FCONSTANT)
+    .addDef(getMRI()->createGenericVirtualRegister(EltTy))
+    .addFPImm(&Val);
+
+    return buildSplatVector(Res, Const);
+  }
+
+  auto Const = buildInstr(TargetOpcode::G_FCONSTANT);
+  Res.addDefToMIB(*getMRI(), Const);
+  Const.addFPImm(&Val);
+  return Const;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
+                                                    const APInt &Val) {
+  ConstantInt *CI = ConstantInt::get(getMF().getFunction().getContext(), Val);
+  return buildConstant(Res, *CI);
 }
 
 MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
@@ -280,44 +332,62 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
   LLT DstTy = Res.getLLTTy(*getMRI());
   auto &Ctx = getMF().getFunction().getContext();
   auto *CFP =
-      ConstantFP::get(Ctx, getAPFloatFromSize(Val, DstTy.getSizeInBits()));
+      ConstantFP::get(Ctx, getAPFloatFromSize(Val, DstTy.getScalarSizeInBits()));
   return buildFConstant(Res, *CFP);
 }
 
-MachineInstrBuilder MachineIRBuilder::buildBrCond(unsigned Tst,
+MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
+                                                     const APFloat &Val) {
+  auto &Ctx = getMF().getFunction().getContext();
+  auto *CFP = ConstantFP::get(Ctx, Val);
+  return buildFConstant(Res, *CFP);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildBrCond(Register Tst,
                                                   MachineBasicBlock &Dest) {
   assert(getMRI()->getType(Tst).isScalar() && "invalid operand type");
 
   return buildInstr(TargetOpcode::G_BRCOND).addUse(Tst).addMBB(&Dest);
 }
 
-MachineInstrBuilder MachineIRBuilder::buildLoad(unsigned Res, unsigned Addr,
+MachineInstrBuilder MachineIRBuilder::buildLoad(const DstOp &Res,
+                                                const SrcOp &Addr,
                                                 MachineMemOperand &MMO) {
   return buildLoadInstr(TargetOpcode::G_LOAD, Res, Addr, MMO);
 }
 
 MachineInstrBuilder MachineIRBuilder::buildLoadInstr(unsigned Opcode,
-                                                     unsigned Res,
-                                                     unsigned Addr,
+                                                     const DstOp &Res,
+                                                     const SrcOp &Addr,
                                                      MachineMemOperand &MMO) {
-  assert(getMRI()->getType(Res).isValid() && "invalid operand type");
-  assert(getMRI()->getType(Addr).isPointer() && "invalid operand type");
+  assert(Res.getLLTTy(*getMRI()).isValid() && "invalid operand type");
+  assert(Addr.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
 
-  return buildInstr(Opcode)
-      .addDef(Res)
-      .addUse(Addr)
-      .addMemOperand(&MMO);
+  auto MIB = buildInstr(Opcode);
+  Res.addDefToMIB(*getMRI(), MIB);
+  Addr.addSrcToMIB(MIB);
+  MIB.addMemOperand(&MMO);
+  return MIB;
 }
 
-MachineInstrBuilder MachineIRBuilder::buildStore(unsigned Val, unsigned Addr,
+MachineInstrBuilder MachineIRBuilder::buildStore(const SrcOp &Val,
+                                                 const SrcOp &Addr,
                                                  MachineMemOperand &MMO) {
-  assert(getMRI()->getType(Val).isValid() && "invalid operand type");
-  assert(getMRI()->getType(Addr).isPointer() && "invalid operand type");
+  assert(Val.getLLTTy(*getMRI()).isValid() && "invalid operand type");
+  assert(Addr.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
 
-  return buildInstr(TargetOpcode::G_STORE)
-      .addUse(Val)
-      .addUse(Addr)
-      .addMemOperand(&MMO);
+  auto MIB = buildInstr(TargetOpcode::G_STORE);
+  Val.addSrcToMIB(MIB);
+  Addr.addSrcToMIB(MIB);
+  MIB.addMemOperand(&MMO);
+  return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildUAddo(const DstOp &Res,
+                                                 const DstOp &CarryOut,
+                                                 const SrcOp &Op0,
+                                                 const SrcOp &Op1) {
+  return buildInstr(TargetOpcode::G_UADDO, {Res, CarryOut}, {Op0, Op1});
 }
 
 MachineInstrBuilder MachineIRBuilder::buildUAdde(const DstOp &Res,
@@ -344,6 +414,25 @@ MachineInstrBuilder MachineIRBuilder::buildZExt(const DstOp &Res,
   return buildInstr(TargetOpcode::G_ZEXT, Res, Op);
 }
 
+unsigned MachineIRBuilder::getBoolExtOp(bool IsVec, bool IsFP) const {
+  const auto *TLI = getMF().getSubtarget().getTargetLowering();
+  switch (TLI->getBooleanContents(IsVec, IsFP)) {
+  case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+    return TargetOpcode::G_SEXT;
+  case TargetLoweringBase::ZeroOrOneBooleanContent:
+    return TargetOpcode::G_ZEXT;
+  default:
+    return TargetOpcode::G_ANYEXT;
+  }
+}
+
+MachineInstrBuilder MachineIRBuilder::buildBoolExt(const DstOp &Res,
+                                                   const SrcOp &Op,
+                                                   bool IsFP) {
+  unsigned ExtOp = getBoolExtOp(getMRI()->getType(Op.getReg()).isVector(), IsFP);
+  return buildInstr(ExtOp, Res, Op);
+}
+
 MachineInstrBuilder MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc,
                                                       const DstOp &Res,
                                                       const SrcOp &Op) {
@@ -403,29 +492,32 @@ MachineInstrBuilder MachineIRBuilder::buildCast(const DstOp &Dst,
   return buildInstr(Opcode, Dst, Src);
 }
 
-MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src,
+MachineInstrBuilder MachineIRBuilder::buildExtract(const DstOp &Dst,
+                                                   const SrcOp &Src,
                                                    uint64_t Index) {
+  LLT SrcTy = Src.getLLTTy(*getMRI());
+  LLT DstTy = Dst.getLLTTy(*getMRI());
+
 #ifndef NDEBUG
-  assert(getMRI()->getType(Src).isValid() && "invalid operand type");
-  assert(getMRI()->getType(Res).isValid() && "invalid operand type");
-  assert(Index + getMRI()->getType(Res).getSizeInBits() <=
-             getMRI()->getType(Src).getSizeInBits() &&
+  assert(SrcTy.isValid() && "invalid operand type");
+  assert(DstTy.isValid() && "invalid operand type");
+  assert(Index + DstTy.getSizeInBits() <= SrcTy.getSizeInBits() &&
          "extracting off end of register");
 #endif
 
-  if (getMRI()->getType(Res).getSizeInBits() ==
-      getMRI()->getType(Src).getSizeInBits()) {
+  if (DstTy.getSizeInBits() == SrcTy.getSizeInBits()) {
     assert(Index == 0 && "insertion past the end of a register");
-    return buildCast(Res, Src);
+    return buildCast(Dst, Src);
   }
 
-  return buildInstr(TargetOpcode::G_EXTRACT)
-      .addDef(Res)
-      .addUse(Src)
-      .addImm(Index);
+  auto Extract = buildInstr(TargetOpcode::G_EXTRACT);
+  Dst.addDefToMIB(*getMRI(), Extract);
+  Src.addSrcToMIB(Extract);
+  Extract.addImm(Index);
+  return Extract;
 }
 
-void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
+void MachineIRBuilder::buildSequence(Register Res, ArrayRef<Register> Ops,
                                      ArrayRef<uint64_t> Indices) {
 #ifndef NDEBUG
   assert(Ops.size() == Indices.size() && "incompatible args");
@@ -454,11 +546,11 @@ void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
     return;
   }
 
-  unsigned ResIn = getMRI()->createGenericVirtualRegister(ResTy);
+  Register ResIn = getMRI()->createGenericVirtualRegister(ResTy);
   buildUndef(ResIn);
 
   for (unsigned i = 0; i < Ops.size(); ++i) {
-    unsigned ResOut = i + 1 == Ops.size()
+    Register ResOut = i + 1 == Ops.size()
                           ? Res
                           : getMRI()->createGenericVirtualRegister(ResTy);
     buildInsert(ResOut, ResIn, Ops[i], Indices[i]);
@@ -471,11 +563,12 @@ MachineInstrBuilder MachineIRBuilder::buildUndef(const DstOp &Res) {
 }
 
 MachineInstrBuilder MachineIRBuilder::buildMerge(const DstOp &Res,
-                                                 ArrayRef<unsigned> Ops) {
+                                                 ArrayRef<Register> Ops) {
   // Unfortunately to convert from ArrayRef<LLT> to ArrayRef<SrcOp>,
   // we need some temporary storage for the DstOp objects. Here we use a
   // sufficiently large SmallVector to not go through the heap.
   SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+  assert(TmpVec.size() > 1);
   return buildInstr(TargetOpcode::G_MERGE_VALUES, Res, TmpVec);
 }
 
@@ -485,31 +578,48 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res,
   // we need some temporary storage for the DstOp objects. Here we use a
   // sufficiently large SmallVector to not go through the heap.
   SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end());
+  assert(TmpVec.size() > 1);
   return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
 }
 
-MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res,
+MachineInstrBuilder MachineIRBuilder::buildUnmerge(LLT Res,
+                                                   const SrcOp &Op) {
+  unsigned NumReg = Op.getLLTTy(*getMRI()).getSizeInBits() / Res.getSizeInBits();
+  SmallVector<Register, 8> TmpVec;
+  for (unsigned I = 0; I != NumReg; ++I)
+    TmpVec.push_back(getMRI()->createGenericVirtualRegister(Res));
+  return buildUnmerge(TmpVec, Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<Register> Res,
                                                    const SrcOp &Op) {
-  // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<DstOp>,
+  // Unfortunately to convert from ArrayRef<Register> to ArrayRef<DstOp>,
   // we need some temporary storage for the DstOp objects. Here we use a
   // sufficiently large SmallVector to not go through the heap.
   SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end());
+  assert(TmpVec.size() > 1);
   return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
 }
 
 MachineInstrBuilder MachineIRBuilder::buildBuildVector(const DstOp &Res,
-                                                       ArrayRef<unsigned> Ops) {
-  // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>,
+                                                       ArrayRef<Register> Ops) {
+  // Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>,
   // we need some temporary storage for the DstOp objects. Here we use a
   // sufficiently large SmallVector to not go through the heap.
   SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
   return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
 }
 
+MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res,
+                                                       const SrcOp &Src) {
+  SmallVector<SrcOp, 8> TmpVec(Res.getLLTTy(*getMRI()).getNumElements(), Src);
+  return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
+}
+
 MachineInstrBuilder
 MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res,
-                                        ArrayRef<unsigned> Ops) {
-  // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>,
+                                        ArrayRef<Register> Ops) {
+  // Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>,
   // we need some temporary storage for the DstOp objects. Here we use a
   // sufficiently large SmallVector to not go through the heap.
   SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
@@ -517,16 +627,16 @@ MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res,
 }
 
 MachineInstrBuilder
-MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<unsigned> Ops) {
-  // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>,
+MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<Register> Ops) {
+  // Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>,
   // we need some temporary storage for the DstOp objects. Here we use a
   // sufficiently large SmallVector to not go through the heap.
   SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
   return buildInstr(TargetOpcode::G_CONCAT_VECTORS, Res, TmpVec);
 }
 
-MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src,
-                                                  unsigned Op, unsigned Index) {
+MachineInstrBuilder MachineIRBuilder::buildInsert(Register Res, Register Src,
+                                                  Register Op, unsigned Index) {
   assert(Index + getMRI()->getType(Op).getSizeInBits() <=
              getMRI()->getType(Res).getSizeInBits() &&
          "insertion past the end of a register");
@@ -544,13 +654,25 @@ MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src,
 }
 
 MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
-                                                     unsigned Res,
+                                                     ArrayRef<Register> ResultRegs,
                                                      bool HasSideEffects) {
   auto MIB =
       buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
                                 : TargetOpcode::G_INTRINSIC);
-  if (Res)
-    MIB.addDef(Res);
+  for (unsigned ResultReg : ResultRegs)
+    MIB.addDef(ResultReg);
+  MIB.addIntrinsicID(ID);
+  return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
+                                                     ArrayRef<DstOp> Results,
+                                                     bool HasSideEffects) {
+  auto MIB =
+      buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
+                                : TargetOpcode::G_INTRINSIC);
+  for (DstOp Result : Results)
+    Result.addDefToMIB(*getMRI(), MIB);
   MIB.addIntrinsicID(ID);
   return MIB;
 }
@@ -601,8 +723,8 @@ MachineIRBuilder::buildExtractVectorElement(const DstOp &Res, const SrcOp &Val,
 }
 
 MachineInstrBuilder MachineIRBuilder::buildAtomicCmpXchgWithSuccess(
-    unsigned OldValRes, unsigned SuccessRes, unsigned Addr, unsigned CmpVal,
-    unsigned NewVal, MachineMemOperand &MMO) {
+    Register OldValRes, Register SuccessRes, Register Addr, Register CmpVal,
+    Register NewVal, MachineMemOperand &MMO) {
 #ifndef NDEBUG
   LLT OldValResTy = getMRI()->getType(OldValRes);
   LLT SuccessResTy = getMRI()->getType(SuccessRes);
@@ -628,8 +750,8 @@ MachineInstrBuilder MachineIRBuilder::buildAtomicCmpXchgWithSuccess(
 }
 
 MachineInstrBuilder
-MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
-                                     unsigned CmpVal, unsigned NewVal,
+MachineIRBuilder::buildAtomicCmpXchg(Register OldValRes, Register Addr,
+                                     Register CmpVal, Register NewVal,
                                      MachineMemOperand &MMO) {
 #ifndef NDEBUG
   LLT OldValResTy = getMRI()->getType(OldValRes);
@@ -653,9 +775,9 @@ MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
 }
 
 MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode,
-                                                     unsigned OldValRes,
-                                                     unsigned Addr,
-                                                     unsigned Val,
+                                                     Register OldValRes,
+                                                     Register Addr,
+                                                     Register Val,
                                                      MachineMemOperand &MMO) {
 #ifndef NDEBUG
   LLT OldValResTy = getMRI()->getType(OldValRes);
@@ -675,75 +797,82 @@ MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode,
 }
 
 MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWXchg(unsigned OldValRes, unsigned Addr,
-                                     unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWXchg(Register OldValRes, Register Addr,
+                                     Register Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XCHG, OldValRes, Addr, Val,
                         MMO);
 }
 MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWAdd(unsigned OldValRes, unsigned Addr,
-                                    unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWAdd(Register OldValRes, Register Addr,
+                                    Register Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_ADD, OldValRes, Addr, Val,
                         MMO);
 }
 MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWSub(unsigned OldValRes, unsigned Addr,
-                                    unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWSub(Register OldValRes, Register Addr,
+                                    Register Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_SUB, OldValRes, Addr, Val,
                         MMO);
 }
 MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWAnd(unsigned OldValRes, unsigned Addr,
-                                    unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWAnd(Register OldValRes, Register Addr,
+                                    Register Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_AND, OldValRes, Addr, Val,
                         MMO);
 }
 MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWNand(unsigned OldValRes, unsigned Addr,
-                                     unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWNand(Register OldValRes, Register Addr,
+                                     Register Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_NAND, OldValRes, Addr, Val,
                         MMO);
 }
-MachineInstrBuilder MachineIRBuilder::buildAtomicRMWOr(unsigned OldValRes,
-                                                       unsigned Addr,
-                                                       unsigned Val,
+MachineInstrBuilder MachineIRBuilder::buildAtomicRMWOr(Register OldValRes,
+                                                       Register Addr,
+                                                       Register Val,
                                                        MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_OR, OldValRes, Addr, Val,
                         MMO);
 }
 MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWXor(unsigned OldValRes, unsigned Addr,
-                                    unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWXor(Register OldValRes, Register Addr,
+                                    Register Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XOR, OldValRes, Addr, Val,
                         MMO);
 }
 MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWMax(unsigned OldValRes, unsigned Addr,
-                                    unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWMax(Register OldValRes, Register Addr,
+                                    Register Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MAX, OldValRes, Addr, Val,
                         MMO);
 }
 MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWMin(unsigned OldValRes, unsigned Addr,
-                                    unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWMin(Register OldValRes, Register Addr,
+                                    Register Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MIN, OldValRes, Addr, Val,
                         MMO);
 }
 MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWUmax(unsigned OldValRes, unsigned Addr,
-                                     unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWUmax(Register OldValRes, Register Addr,
+                                     Register Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMAX, OldValRes, Addr, Val,
                         MMO);
 }
 MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr,
-                                     unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWUmin(Register OldValRes, Register Addr,
+                                     Register Val, MachineMemOperand &MMO) {
   return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMIN, OldValRes, Addr, Val,
                         MMO);
 }
 
 MachineInstrBuilder
-MachineIRBuilder::buildBlockAddress(unsigned Res, const BlockAddress *BA) {
+MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) {
+  return buildInstr(TargetOpcode::G_FENCE)
+    .addImm(Ordering)
+    .addImm(Scope);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildBlockAddress(Register Res, const BlockAddress *BA) {
 #ifndef NDEBUG
   assert(getMRI()->getType(Res).isPointer() && "invalid res type");
 #endif
@@ -803,17 +932,18 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
   }
   case TargetOpcode::G_ADD:
   case TargetOpcode::G_AND:
-  case TargetOpcode::G_ASHR:
-  case TargetOpcode::G_LSHR:
   case TargetOpcode::G_MUL:
   case TargetOpcode::G_OR:
-  case TargetOpcode::G_SHL:
   case TargetOpcode::G_SUB:
   case TargetOpcode::G_XOR:
   case TargetOpcode::G_UDIV:
   case TargetOpcode::G_SDIV:
   case TargetOpcode::G_UREM:
-  case TargetOpcode::G_SREM: {
+  case TargetOpcode::G_SREM:
+  case TargetOpcode::G_SMIN:
+  case TargetOpcode::G_SMAX:
+  case TargetOpcode::G_UMIN:
+  case TargetOpcode::G_UMAX: {
     // All these are binary ops.
     assert(DstOps.size() == 1 && "Invalid Dst");
     assert(SrcOps.size() == 2 && "Invalid Srcs");
@@ -821,6 +951,17 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
                      SrcOps[0].getLLTTy(*getMRI()),
                      SrcOps[1].getLLTTy(*getMRI()));
     break;
+  }
+  case TargetOpcode::G_SHL:
+  case TargetOpcode::G_ASHR:
+  case TargetOpcode::G_LSHR: {
+    assert(DstOps.size() == 1 && "Invalid Dst");
+    assert(SrcOps.size() == 2 && "Invalid Srcs");
+    validateShiftOp(DstOps[0].getLLTTy(*getMRI()),
+                    SrcOps[0].getLLTTy(*getMRI()),
+                    SrcOps[1].getLLTTy(*getMRI()));
+    break;
+  }
   case TargetOpcode::G_SEXT:
   case TargetOpcode::G_ZEXT:
   case TargetOpcode::G_ANYEXT:
@@ -830,7 +971,7 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
                      SrcOps[0].getLLTTy(*getMRI()), true);
     break;
   case TargetOpcode::G_TRUNC:
-  case TargetOpcode::G_FPTRUNC:
+  case TargetOpcode::G_FPTRUNC: {
     assert(DstOps.size() == 1 && "Invalid Dst");
     assert(SrcOps.size() == 1 && "Invalid Srcs");
     validateTruncExt(DstOps[0].getLLTTy(*getMRI()),
@@ -839,10 +980,8 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
   }
   case TargetOpcode::COPY:
     assert(DstOps.size() == 1 && "Invalid Dst");
-    assert(SrcOps.size() == 1 && "Invalid Srcs");
-    assert(DstOps[0].getLLTTy(*getMRI()) == LLT() ||
-           SrcOps[0].getLLTTy(*getMRI()) == LLT() ||
-           DstOps[0].getLLTTy(*getMRI()) == SrcOps[0].getLLTTy(*getMRI()));
+    // If the caller wants to add a subreg source it has to be done separately
+    // so we may not have any SrcOps at this point yet.
     break;
   case TargetOpcode::G_FCMP:
   case TargetOpcode::G_ICMP: {
@@ -943,7 +1082,7 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
            "type mismatch in input list");
     assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
                DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
-           "input scalars do not exactly cover the outpur vector register");
+           "input scalars do not exactly cover the output vector register");
     break;
   }
   case TargetOpcode::G_BUILD_VECTOR_TRUNC: {
@@ -976,7 +1115,7 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
            "type mismatch in input list");
     assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
                DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
-           "input vectors do not exactly cover the outpur vector register");
+           "input vectors do not exactly cover the output vector register");
     break;
   }
   case TargetOpcode::G_UADDE: {
diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index dcc8b7cc23c5..42be88fcf947 100644
--- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -1,9 +1,8 @@
 //==- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -72,7 +71,6 @@ INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,
 
 RegBankSelect::RegBankSelect(Mode RunningMode)
     : MachineFunctionPass(ID), OptMode(RunningMode) {
-  initializeRegBankSelectPass(*PassRegistry::getPassRegistry());
   if (RegBankSelectMode.getNumOccurrences() != 0) {
     OptMode = RegBankSelectMode;
     if (RegBankSelectMode != RunningMode)
@@ -110,7 +108,7 @@ void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 bool RegBankSelect::assignmentMatch(
-    unsigned Reg, const RegisterBankInfo::ValueMapping &ValMapping,
+    Register Reg, const RegisterBankInfo::ValueMapping &ValMapping,
     bool &OnlyAssign) const {
   // By default we assume we will have to repair something.
   OnlyAssign = false;
@@ -135,34 +133,84 @@ bool RegBankSelect::assignmentMatch(
 bool RegBankSelect::repairReg(
     MachineOperand &MO, const RegisterBankInfo::ValueMapping &ValMapping,
     RegBankSelect::RepairingPlacement &RepairPt,
-    const iterator_range<SmallVectorImpl<unsigned>::const_iterator> &NewVRegs) {
-  if (ValMapping.NumBreakDowns != 1 && !TPC->isGlobalISelAbortEnabled())
-    return false;
-  assert(ValMapping.NumBreakDowns == 1 && "Not yet implemented");
+    const iterator_range<SmallVectorImpl<Register>::const_iterator> &NewVRegs) {
+
+  assert(ValMapping.NumBreakDowns == (unsigned)size(NewVRegs) &&
+         "need new vreg for each breakdown");
+
   // An empty range of new register means no repairing.
   assert(!empty(NewVRegs) && "We should not have to repair");
 
-  // Assume we are repairing a use and thus, the original reg will be
-  // the source of the repairing.
-  unsigned Src = MO.getReg();
-  unsigned Dst = *NewVRegs.begin();
-
-  // If we repair a definition, swap the source and destination for
-  // the repairing.
-  if (MO.isDef())
-    std::swap(Src, Dst);
-
-  assert((RepairPt.getNumInsertPoints() == 1 ||
-          TargetRegisterInfo::isPhysicalRegister(Dst)) &&
-         "We are about to create several defs for Dst");
-
-  // Build the instruction used to repair, then clone it at the right
-  // places. Avoiding buildCopy bypasses the check that Src and Dst have the
-  // same types because the type is a placeholder when this function is called.
-  MachineInstr *MI =
-      MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY).addDef(Dst).addUse(Src);
-  LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst)
-                    << '\n');
+  MachineInstr *MI;
+  if (ValMapping.NumBreakDowns == 1) {
+    // Assume we are repairing a use and thus, the original reg will be
+    // the source of the repairing.
+    Register Src = MO.getReg();
+    Register Dst = *NewVRegs.begin();
+
+    // If we repair a definition, swap the source and destination for
+    // the repairing.
+    if (MO.isDef())
+      std::swap(Src, Dst);
+
+    assert((RepairPt.getNumInsertPoints() == 1 ||
+            TargetRegisterInfo::isPhysicalRegister(Dst)) &&
+           "We are about to create several defs for Dst");
+
+    // Build the instruction used to repair, then clone it at the right
+    // places. Avoiding buildCopy bypasses the check that Src and Dst have the
+    // same types because the type is a placeholder when this function is called.
+    MI = MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY)
+      .addDef(Dst)
+      .addUse(Src);
+    LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst)
+               << '\n');
+  } else {
+    // TODO: Support with G_IMPLICIT_DEF + G_INSERT sequence or G_EXTRACT
+    // sequence.
+    assert(ValMapping.partsAllUniform() && "irregular breakdowns not supported");
+
+    LLT RegTy = MRI->getType(MO.getReg());
+    if (MO.isDef()) {
+      unsigned MergeOp;
+      if (RegTy.isVector()) {
+        if (ValMapping.NumBreakDowns == RegTy.getNumElements())
+          MergeOp = TargetOpcode::G_BUILD_VECTOR;
+        else {
+          assert(
+              (ValMapping.BreakDown[0].Length * ValMapping.NumBreakDowns ==
+               RegTy.getSizeInBits()) &&
+              (ValMapping.BreakDown[0].Length % RegTy.getScalarSizeInBits() ==
+               0) &&
+              "don't understand this value breakdown");
+
+          MergeOp = TargetOpcode::G_CONCAT_VECTORS;
+        }
+      } else
+        MergeOp = TargetOpcode::G_MERGE_VALUES;
+
+      auto MergeBuilder =
+        MIRBuilder.buildInstrNoInsert(MergeOp)
+        .addDef(MO.getReg());
+
+      for (Register SrcReg : NewVRegs)
+        MergeBuilder.addUse(SrcReg);
+
+      MI = MergeBuilder;
+    } else {
+      MachineInstrBuilder UnMergeBuilder =
+        MIRBuilder.buildInstrNoInsert(TargetOpcode::G_UNMERGE_VALUES);
+      for (Register DefReg : NewVRegs)
+        UnMergeBuilder.addDef(DefReg);
+
+      UnMergeBuilder.addUse(MO.getReg());
+      MI = UnMergeBuilder;
+    }
+  }
+
+  if (RepairPt.getNumInsertPoints() != 1)
+    report_fatal_error("need testcase to support multiple insertion points");
+
   // TODO:
   // Check if MI is legal. if not, we need to legalize all the
   // instructions we are going to insert.
@@ -195,7 +243,8 @@ uint64_t RegBankSelect::getRepairCost(
   const RegisterBank *CurRegBank = RBI->getRegBank(MO.getReg(), *MRI, *TRI);
   // If MO does not have a register bank, we should have just been
   // able to set one unless we have to break the value down.
-  assert((!IsSameNumOfValues || CurRegBank) && "We should not have to repair");
+  assert(CurRegBank || MO.isDef());
+
   // Def: Val <- NewDefs
   //     Same number of values: copy
   //     Different number: Val = build_sequence Defs1, Defs2, ...
@@ -206,6 +255,9 @@ uint64_t RegBankSelect::getRepairCost(
   // We should remember that this value is available somewhere else to
   // coalesce the value.
 
+  if (ValMapping.NumBreakDowns != 1)
+    return RBI->getBreakDownCost(ValMapping, CurRegBank);
+
   if (IsSameNumOfValues) {
     const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank;
     // If we repair a definition, swap the source and destination for
@@ -345,7 +397,7 @@ void RegBankSelect::tryAvoidingSplit(
   //   repairing.
 
   // Check if this is a physical or virtual register.
-  unsigned Reg = MO.getReg();
+  Register Reg = MO.getReg();
   if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
     // We are going to split every outgoing edges.
     // Check that this is possible.
@@ -416,7 +468,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
     const MachineOperand &MO = MI.getOperand(OpIdx);
     if (!MO.isReg())
       continue;
-    unsigned Reg = MO.getReg();
+    Register Reg = MO.getReg();
     if (!Reg)
       continue;
     LLVM_DEBUG(dbgs() << "Opd" << OpIdx << '\n');
@@ -542,7 +594,7 @@ bool RegBankSelect::applyMapping(
     MachineOperand &MO = MI.getOperand(OpIdx);
     const RegisterBankInfo::ValueMapping &ValMapping =
         InstrMapping.getOperandMapping(OpIdx);
-    unsigned Reg = MO.getReg();
+    Register Reg = MO.getReg();
 
     switch (RepairPt.getKind()) {
     case RepairingPlacement::Reassign:
@@ -605,7 +657,7 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
   LLVM_DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n');
   const Function &F = MF.getFunction();
   Mode SaveOptMode = OptMode;
-  if (F.hasFnAttribute(Attribute::OptimizeNone))
+  if (F.hasOptNone())
     OptMode = Mode::Fast;
   init(MF);
 
@@ -644,8 +696,21 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
                            "unable to map instruction", MI);
         return false;
       }
+
+      // It's possible the mapping changed control flow, and moved the following
+      // instruction to a new block, so figure out the new parent.
+      if (MII != End) {
+        MachineBasicBlock *NextInstBB = MII->getParent();
+        if (NextInstBB != MBB) {
+          LLVM_DEBUG(dbgs() << "Instruction mapping changed control flow\n");
+          MBB = NextInstBB;
+          MIRBuilder.setMBB(*MBB);
+          End = MBB->end();
+        }
+      }
     }
   }
+
   OptMode = SaveOptMode;
   return false;
 }
@@ -692,7 +757,7 @@ RegBankSelect::RepairingPlacement::RepairingPlacement(
     MachineBasicBlock &Pred = *MI.getOperand(OpIdx + 1).getMBB();
     // Check if we can move the insertion point prior to the
     // terminators of the predecessor.
-    unsigned Reg = MO.getReg();
+    Register Reg = MO.getReg();
     MachineBasicBlock::iterator It = Pred.getLastNonDebugInstr();
     for (auto Begin = Pred.begin(); It != Begin && It->isTerminator(); --It)
       if (It->modifiesRegister(Reg, &TRI)) {
diff --git a/lib/CodeGen/GlobalISel/RegisterBank.cpp b/lib/CodeGen/GlobalISel/RegisterBank.cpp
index 16f67a217ce1..4e41f338934d 100644
--- a/lib/CodeGen/GlobalISel/RegisterBank.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBank.cpp
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/GlobalISel/RegisterBank.cpp - Register Bank --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index 28404e52d6ea..159422e38878 100644
--- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/GlobalISel/RegisterBankInfo.cpp --------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -81,7 +80,7 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
 }
 
 const RegisterBank *
-RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
+RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI,
                              const TargetRegisterInfo &TRI) const {
   if (TargetRegisterInfo::isPhysicalRegister(Reg))
     return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI));
@@ -96,7 +95,7 @@ RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
 }
 
 const TargetRegisterClass &
-RegisterBankInfo::getMinimalPhysRegClass(unsigned Reg,
+RegisterBankInfo::getMinimalPhysRegClass(Register Reg,
                                          const TargetRegisterInfo &TRI) const {
   assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
          "Reg must be a physreg");
@@ -126,7 +125,7 @@ const RegisterBank *RegisterBankInfo::getRegBankFromConstraints(
 }
 
 const TargetRegisterClass *RegisterBankInfo::constrainGenericRegister(
-    unsigned Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI) {
+    Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI) {
 
   // If the register already has a class, fallback to MRI::constrainRegClass.
   auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
@@ -181,7 +180,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
     const MachineOperand &MO = MI.getOperand(OpIdx);
     if (!MO.isReg())
       continue;
-    unsigned Reg = MO.getReg();
+    Register Reg = MO.getReg();
     if (!Reg)
       continue;
     // The register bank of Reg is just a side effect of the current
@@ -208,19 +207,49 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
         continue;
       }
     }
-    const ValueMapping *ValMapping =
-        &getValueMapping(0, getSizeInBits(Reg, MRI, TRI), *CurRegBank);
+
+    unsigned Size = getSizeInBits(Reg, MRI, TRI);
+    const ValueMapping *ValMapping = &getValueMapping(0, Size, *CurRegBank);
     if (IsCopyLike) {
-      OperandsMapping[0] = ValMapping;
+      if (!OperandsMapping[0]) {
+        if (MI.isRegSequence()) {
+          // For reg_sequence, the result size does not match the input.
+          unsigned ResultSize = getSizeInBits(MI.getOperand(0).getReg(),
+                                              MRI, TRI);
+          OperandsMapping[0] = &getValueMapping(0, ResultSize, *CurRegBank);
+        } else {
+          OperandsMapping[0] = ValMapping;
+        }
+      }
+
+      // The default handling assumes any register bank can be copied to any
+      // other. If this isn't the case, the target should specially deal with
+      // reg_sequence/phi. There may also be unsatisfiable copies.
+      for (; OpIdx != EndIdx; ++OpIdx) {
+        const MachineOperand &MO = MI.getOperand(OpIdx);
+        if (!MO.isReg())
+          continue;
+        Register Reg = MO.getReg();
+        if (!Reg)
+          continue;
+
+        const RegisterBank *AltRegBank = getRegBank(Reg, MRI, TRI);
+        if (AltRegBank &&
+            cannotCopy(*CurRegBank, *AltRegBank, getSizeInBits(Reg, MRI, TRI)))
+          return getInvalidInstructionMapping();
+      }
+
       CompleteMapping = true;
       break;
     }
+
     OperandsMapping[OpIdx] = ValMapping;
   }
 
-  if (IsCopyLike && !CompleteMapping)
+  if (IsCopyLike && !CompleteMapping) {
     // No way to deduce the type from what we have.
     return getInvalidInstructionMapping();
+  }
 
   assert(CompleteMapping && "Setting an uncomplete mapping");
   return getInstructionMapping(
@@ -363,11 +392,8 @@ RegisterBankInfo::getInstructionMappingImpl(
   ++NumInstructionMappingsCreated;
 
   auto &InstrMapping = MapOfInstructionMappings[Hash];
-  if (IsInvalid)
-    InstrMapping = llvm::make_unique<InstructionMapping>();
-  else
-    InstrMapping = llvm::make_unique<InstructionMapping>(
-        ID, Cost, OperandsMapping, NumOperands);
+  InstrMapping = llvm::make_unique<InstructionMapping>(
+      ID, Cost, OperandsMapping, NumOperands);
   return *InstrMapping;
 }
 
@@ -382,8 +408,12 @@ RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
 RegisterBankInfo::InstructionMappings
 RegisterBankInfo::getInstrPossibleMappings(const MachineInstr &MI) const {
   InstructionMappings PossibleMappings;
-  // Put the default mapping first.
-  PossibleMappings.push_back(&getInstrMapping(MI));
+  const auto &Mapping = getInstrMapping(MI);
+  if (Mapping.isValid()) {
+    // Put the default mapping first.
+    PossibleMappings.push_back(&Mapping);
+  }
+
   // Then the alternative mapping, if any.
   InstructionMappings AltMappings = getInstrAlternativeMappings(MI);
   for (const InstructionMapping *AltMapping : AltMappings)
@@ -424,14 +454,14 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
     assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns ==
                1 &&
            "This mapping is too complex for this function");
-    iterator_range<SmallVectorImpl<unsigned>::const_iterator> NewRegs =
+    iterator_range<SmallVectorImpl<Register>::const_iterator> NewRegs =
         OpdMapper.getVRegs(OpIdx);
     if (empty(NewRegs)) {
       LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n");
       continue;
     }
-    unsigned OrigReg = MO.getReg();
-    unsigned NewReg = *NewRegs.begin();
+    Register OrigReg = MO.getReg();
+    Register NewReg = *NewRegs.begin();
     LLVM_DEBUG(dbgs() << " changed, replace " << printReg(OrigReg, nullptr));
     MO.setReg(NewReg);
     LLVM_DEBUG(dbgs() << " with " << printReg(NewReg, nullptr));
@@ -456,7 +486,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
   }
 }
 
-unsigned RegisterBankInfo::getSizeInBits(unsigned Reg,
+unsigned RegisterBankInfo::getSizeInBits(Register Reg,
                                          const MachineRegisterInfo &MRI,
                                          const TargetRegisterInfo &TRI) const {
   if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -498,6 +528,19 @@ void RegisterBankInfo::PartialMapping::print(raw_ostream &OS) const {
     OS << "nullptr";
 }
 
+bool RegisterBankInfo::ValueMapping::partsAllUniform() const {
+  if (NumBreakDowns < 2)
+    return true;
+
+  const PartialMapping *First = begin();
+  for (const PartialMapping *Part = First + 1; Part != end(); ++Part) {
+    if (Part->Length != First->Length || Part->RegBank != First->RegBank)
+      return false;
+  }
+
+  return true;
+}
+
 bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const {
   assert(NumBreakDowns && "Value mapped nowhere?!");
   unsigned OrigValueBitWidth = 0;
@@ -565,7 +608,7 @@ bool RegisterBankInfo::InstructionMapping::verify(
              "We should not care about non-reg mapping");
       continue;
     }
-    unsigned Reg = MO.getReg();
+    Register Reg = MO.getReg();
     if (!Reg)
       continue;
     assert(getOperandMapping(Idx).isValid() &&
@@ -610,7 +653,7 @@ RegisterBankInfo::OperandsMapper::OperandsMapper(
   assert(InstrMapping.verify(MI) && "Invalid mapping for MI");
 }
 
-iterator_range<SmallVectorImpl<unsigned>::iterator>
+iterator_range<SmallVectorImpl<Register>::iterator>
 RegisterBankInfo::OperandsMapper::getVRegsMem(unsigned OpIdx) {
   assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
   unsigned NumPartialVal =
@@ -626,18 +669,18 @@ RegisterBankInfo::OperandsMapper::getVRegsMem(unsigned OpIdx) {
     for (unsigned i = 0; i < NumPartialVal; ++i)
       NewVRegs.push_back(0);
   }
-  SmallVectorImpl<unsigned>::iterator End =
+  SmallVectorImpl<Register>::iterator End =
       getNewVRegsEnd(StartIdx, NumPartialVal);
 
   return make_range(&NewVRegs[StartIdx], End);
 }
 
-SmallVectorImpl<unsigned>::const_iterator
+SmallVectorImpl<Register>::const_iterator
 RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx,
                                                  unsigned NumVal) const {
   return const_cast<OperandsMapper *>(this)->getNewVRegsEnd(StartIdx, NumVal);
 }
-SmallVectorImpl<unsigned>::iterator
+SmallVectorImpl<Register>::iterator
 RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx,
                                                  unsigned NumVal) {
   assert((NewVRegs.size() == StartIdx + NumVal ||
@@ -649,11 +692,11 @@ RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx,
 
 void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) {
   assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
-  iterator_range<SmallVectorImpl<unsigned>::iterator> NewVRegsForOpIdx =
+  iterator_range<SmallVectorImpl<Register>::iterator> NewVRegsForOpIdx =
       getVRegsMem(OpIdx);
   const ValueMapping &ValMapping = getInstrMapping().getOperandMapping(OpIdx);
   const PartialMapping *PartMap = ValMapping.begin();
-  for (unsigned &NewVReg : NewVRegsForOpIdx) {
+  for (Register &NewVReg : NewVRegsForOpIdx) {
     assert(PartMap != ValMapping.end() && "Out-of-bound access");
     assert(NewVReg == 0 && "Register has already been created");
     // The new registers are always bound to scalar with the right size.
@@ -669,7 +712,7 @@ void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) {
 
 void RegisterBankInfo::OperandsMapper::setVRegs(unsigned OpIdx,
                                                 unsigned PartialMapIdx,
-                                                unsigned NewVReg) {
+                                                Register NewVReg) {
   assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
   assert(getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns >
              PartialMapIdx &&
@@ -681,7 +724,7 @@ void RegisterBankInfo::OperandsMapper::setVRegs(unsigned OpIdx,
   NewVRegs[OpToNewVRegIdx[OpIdx] + PartialMapIdx] = NewVReg;
 }
 
-iterator_range<SmallVectorImpl<unsigned>::const_iterator>
+iterator_range<SmallVectorImpl<Register>::const_iterator>
 RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx,
                                            bool ForDebug) const {
   (void)ForDebug;
@@ -693,12 +736,12 @@ RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx,
 
   unsigned PartMapSize =
       getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns;
-  SmallVectorImpl<unsigned>::const_iterator End =
+  SmallVectorImpl<Register>::const_iterator End =
       getNewVRegsEnd(StartIdx, PartMapSize);
-  iterator_range<SmallVectorImpl<unsigned>::const_iterator> Res =
+  iterator_range<SmallVectorImpl<Register>::const_iterator> Res =
       make_range(&NewVRegs[StartIdx], End);
 #ifndef NDEBUG
-  for (unsigned VReg : Res)
+  for (Register VReg : Res)
     assert((VReg || ForDebug) && "Some registers are uninitialized");
 #endif
   return Res;
@@ -747,7 +790,7 @@ void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS,
     IsFirst = false;
     OS << '(' << printReg(getMI().getOperand(Idx).getReg(), TRI) << ", [";
     bool IsFirstNewVReg = true;
-    for (unsigned VReg : getVRegs(Idx)) {
+    for (Register VReg : getVRegs(Idx)) {
       if (!IsFirstNewVReg)
         OS << ", ";
       IsFirstNewVReg = false;
diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp
index 59cbf93e7cd1..766ea1d60bac 100644
--- a/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/GlobalISel/Utils.cpp -------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file This file implements the utility functions used by the GlobalISel
@@ -30,20 +29,45 @@ using namespace llvm;
 
 unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI,
                                    const TargetInstrInfo &TII,
-                                   const RegisterBankInfo &RBI,
-                                   MachineInstr &InsertPt, unsigned Reg,
+                                   const RegisterBankInfo &RBI, unsigned Reg,
                                    const TargetRegisterClass &RegClass) {
-  if (!RBI.constrainGenericRegister(Reg, RegClass, MRI)) {
-    unsigned NewReg = MRI.createVirtualRegister(&RegClass);
-    BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(),
-            TII.get(TargetOpcode::COPY), NewReg)
-        .addReg(Reg);
-    return NewReg;
-  }
+  if (!RBI.constrainGenericRegister(Reg, RegClass, MRI))
+    return MRI.createVirtualRegister(&RegClass);
 
   return Reg;
 }
 
+unsigned llvm::constrainOperandRegClass(
+    const MachineFunction &MF, const TargetRegisterInfo &TRI,
+    MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
+    const RegisterBankInfo &RBI, MachineInstr &InsertPt,
+    const TargetRegisterClass &RegClass, const MachineOperand &RegMO,
+    unsigned OpIdx) {
+  unsigned Reg = RegMO.getReg();
+  // Assume physical registers are properly constrained.
+  assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+         "PhysReg not implemented");
+
+  unsigned ConstrainedReg = constrainRegToClass(MRI, TII, RBI, Reg, RegClass);
+  // If we created a new virtual register because the class is not compatible
+  // then create a copy between the new and the old register.
+  if (ConstrainedReg != Reg) {
+    MachineBasicBlock::iterator InsertIt(&InsertPt);
+    MachineBasicBlock &MBB = *InsertPt.getParent();
+    if (RegMO.isUse()) {
+      BuildMI(MBB, InsertIt, InsertPt.getDebugLoc(),
+              TII.get(TargetOpcode::COPY), ConstrainedReg)
+          .addReg(Reg);
+    } else {
+      assert(RegMO.isDef() && "Must be a definition");
+      BuildMI(MBB, std::next(InsertIt), InsertPt.getDebugLoc(),
+              TII.get(TargetOpcode::COPY), Reg)
+          .addReg(ConstrainedReg);
+    }
+  }
+  return ConstrainedReg;
+}
+
 unsigned llvm::constrainOperandRegClass(
     const MachineFunction &MF, const TargetRegisterInfo &TRI,
     MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
@@ -82,7 +106,8 @@ unsigned llvm::constrainOperandRegClass(
     // and they never reach this function.
     return Reg;
   }
-  return constrainRegToClass(MRI, TII, RBI, InsertPt, Reg, *RegClass);
+  return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt, *RegClass,
+                                  RegMO, OpIdx);
 }
 
 bool llvm::constrainSelectedInstRegOperands(MachineInstr &I,
@@ -184,18 +209,71 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
 
 Optional<int64_t> llvm::getConstantVRegVal(unsigned VReg,
                                            const MachineRegisterInfo &MRI) {
-  MachineInstr *MI = MRI.getVRegDef(VReg);
-  if (MI->getOpcode() != TargetOpcode::G_CONSTANT)
+  Optional<ValueAndVReg> ValAndVReg =
+      getConstantVRegValWithLookThrough(VReg, MRI, /*LookThroughInstrs*/ false);
+  assert((!ValAndVReg || ValAndVReg->VReg == VReg) &&
+         "Value found while looking through instrs");
+  if (!ValAndVReg)
+    return None;
+  return ValAndVReg->Value;
+}
+
+Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
+    unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
+  SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;
+  MachineInstr *MI;
+  while ((MI = MRI.getVRegDef(VReg)) &&
+         MI->getOpcode() != TargetOpcode::G_CONSTANT && LookThroughInstrs) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_TRUNC:
+    case TargetOpcode::G_SEXT:
+    case TargetOpcode::G_ZEXT:
+      SeenOpcodes.push_back(std::make_pair(
+          MI->getOpcode(),
+          MRI.getType(MI->getOperand(0).getReg()).getSizeInBits()));
+      VReg = MI->getOperand(1).getReg();
+      break;
+    case TargetOpcode::COPY:
+      VReg = MI->getOperand(1).getReg();
+      if (TargetRegisterInfo::isPhysicalRegister(VReg))
+        return None;
+      break;
+    case TargetOpcode::G_INTTOPTR:
+      VReg = MI->getOperand(1).getReg();
+      break;
+    default:
+      return None;
+    }
+  }
+  if (!MI || MI->getOpcode() != TargetOpcode::G_CONSTANT ||
+      (!MI->getOperand(1).isImm() && !MI->getOperand(1).isCImm()))
     return None;
 
-  if (MI->getOperand(1).isImm())
-    return MI->getOperand(1).getImm();
+  const MachineOperand &CstVal = MI->getOperand(1);
+  unsigned BitWidth = MRI.getType(MI->getOperand(0).getReg()).getSizeInBits();
+  APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm())
+                             : CstVal.getCImm()->getValue();
+  assert(Val.getBitWidth() == BitWidth &&
+         "Value bitwidth doesn't match definition type");
+  while (!SeenOpcodes.empty()) {
+    std::pair<unsigned, unsigned> OpcodeAndSize = SeenOpcodes.pop_back_val();
+    switch (OpcodeAndSize.first) {
+    case TargetOpcode::G_TRUNC:
+      Val = Val.trunc(OpcodeAndSize.second);
+      break;
+    case TargetOpcode::G_SEXT:
+      Val = Val.sext(OpcodeAndSize.second);
+      break;
+    case TargetOpcode::G_ZEXT:
+      Val = Val.zext(OpcodeAndSize.second);
+      break;
+    }
+  }
 
-  if (MI->getOperand(1).isCImm() &&
-      MI->getOperand(1).getCImm()->getBitWidth() <= 64)
-    return MI->getOperand(1).getCImm()->getSExtValue();
+  if (Val.getBitWidth() > 64)
+    return None;
 
-  return None;
+  return ValueAndVReg{Val.getSExtValue(), VReg};
 }
 
 const llvm::ConstantFP* llvm::getConstantFPVRegVal(unsigned VReg,
@@ -206,8 +284,8 @@ const llvm::ConstantFP* llvm::getConstantFPVRegVal(unsigned VReg,
   return MI->getOperand(1).getFPImm();
 }
 
-llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, unsigned Reg,
-                                       const MachineRegisterInfo &MRI) {
+llvm::MachineInstr *llvm::getDefIgnoringCopies(Register Reg,
+                                               const MachineRegisterInfo &MRI) {
   auto *DefMI = MRI.getVRegDef(Reg);
   auto DstTy = MRI.getType(DefMI->getOperand(0).getReg());
   if (!DstTy.isValid())
@@ -219,7 +297,13 @@ llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, unsigned Reg,
       break;
     DefMI = MRI.getVRegDef(SrcReg);
   }
-  return DefMI->getOpcode() == Opcode ? DefMI : nullptr;
+  return DefMI;
+}
+
+llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg,
+                                       const MachineRegisterInfo &MRI) {
+  MachineInstr *DefMI = getDefIgnoringCopies(Reg, MRI);
+  return DefMI && DefMI->getOpcode() == Opcode ? DefMI : nullptr;
 }
 
 APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) {
@@ -286,6 +370,31 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const unsigned Op1,
   return None;
 }
 
+bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
+                           bool SNaN) {
+  const MachineInstr *DefMI = MRI.getVRegDef(Val);
+  if (!DefMI)
+    return false;
+
+  if (DefMI->getFlag(MachineInstr::FmNoNans))
+    return true;
+
+  if (SNaN) {
+    // FP operations quiet. For now, just handle the ones inserted during
+    // legalization.
+    switch (DefMI->getOpcode()) {
+    case TargetOpcode::G_FPEXT:
+    case TargetOpcode::G_FPTRUNC:
+    case TargetOpcode::G_FCANONICALIZE:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  return false;
+}
+
 void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
   AU.addPreserved<StackProtector>();
 }
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index d3364952f244..09201c2e7bae 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -1,9 +1,8 @@
 //===- GlobalMerge.cpp - Internal globals merging -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -220,11 +219,11 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
                           Module &M, bool isConst, unsigned AddrSpace) const {
   auto &DL = M.getDataLayout();
   // FIXME: Find better heuristics
-  std::stable_sort(Globals.begin(), Globals.end(),
-                   [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
-                     return DL.getTypeAllocSize(GV1->getValueType()) <
-                            DL.getTypeAllocSize(GV2->getValueType());
-                   });
+  llvm::stable_sort(
+      Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
+        return DL.getTypeAllocSize(GV1->getValueType()) <
+               DL.getTypeAllocSize(GV2->getValueType());
+      });
 
   // If we want to just blindly group all globals together, do so.
   if (!GlobalMergeGroupByUse) {
@@ -331,7 +330,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
         Function *ParentFn = I->getParent()->getParent();
 
         // If we're only optimizing for size, ignore non-minsize functions.
-        if (OnlyOptimizeForSize && !ParentFn->optForMinSize())
+        if (OnlyOptimizeForSize && !ParentFn->hasMinSize())
           continue;
 
         size_t UGSIdx = GlobalUsesByFunction[ParentFn];
@@ -386,11 +385,11 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
   //
   // Multiply that by the size of the set to give us a crude profitability
   // metric.
-  std::stable_sort(UsedGlobalSets.begin(), UsedGlobalSets.end(),
-            [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
-              return UGS1.Globals.count() * UGS1.UsageCount <
-                     UGS2.Globals.count() * UGS2.UsageCount;
-            });
+  llvm::stable_sort(UsedGlobalSets,
+                    [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
+                      return UGS1.Globals.count() * UGS1.UsageCount <
+                             UGS2.Globals.count() * UGS2.UsageCount;
+                    });
 
   // We can choose to merge all globals together, but ignore globals never used
   // with another global.  This catches the obviously non-profitable cases of
diff --git a/lib/CodeGen/HardwareLoops.cpp b/lib/CodeGen/HardwareLoops.cpp
new file mode 100644
index 000000000000..5f57cabbe865
--- /dev/null
+++ b/lib/CodeGen/HardwareLoops.cpp
@@ -0,0 +1,463 @@
+//===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Insert hardware loop intrinsics into loops which are deemed profitable by
+/// the target, by querying TargetTransformInfo. A hardware loop comprises of
+/// two intrinsics: one, outside the loop, to set the loop iteration count and
+/// another, in the exit block, to decrement the counter. The decremented value
+/// can either be carried through the loop via a phi or handled in some opaque
+/// way by the target.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+#define DEBUG_TYPE "hardware-loops"
+
+#define HW_LOOPS_NAME "Hardware Loop Insertion"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
+                   cl::desc("Force hardware loops intrinsics to be inserted"));
+
+static cl::opt<bool>
+ForceHardwareLoopPHI(
+  "force-hardware-loop-phi", cl::Hidden, cl::init(false),
+  cl::desc("Force hardware loop counter to be updated through a phi"));
+
+static cl::opt<bool>
+ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
+                cl::desc("Force allowance of nested hardware loops"));
+
+static cl::opt<unsigned>
+LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
+            cl::desc("Set the loop decrement value"));
+
+static cl::opt<unsigned>
+CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
+                cl::desc("Set the loop counter bitwidth"));
+
+static cl::opt<bool>
+ForceGuardLoopEntry(
+  "force-hardware-loop-guard", cl::Hidden, cl::init(false),
+  cl::desc("Force generation of loop guard intrinsic"));
+
+STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
+
+namespace {
+
+  using TTI = TargetTransformInfo;
+
+  class HardwareLoops : public FunctionPass {
+  public:
+    static char ID;
+
+    HardwareLoops() : FunctionPass(ID) {
+      initializeHardwareLoopsPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F) override;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<LoopInfoWrapperPass>();
+      AU.addPreserved<LoopInfoWrapperPass>();
+      AU.addRequired<DominatorTreeWrapperPass>();
+      AU.addPreserved<DominatorTreeWrapperPass>();
+      AU.addRequired<ScalarEvolutionWrapperPass>();
+      AU.addRequired<AssumptionCacheTracker>();
+      AU.addRequired<TargetTransformInfoWrapperPass>();
+    }
+
+    // Try to convert the given Loop into a hardware loop.
+    bool TryConvertLoop(Loop *L);
+
+    // Given that the target believes the loop to be profitable, try to
+    // convert it.
+    bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
+
+  private:
+    ScalarEvolution *SE = nullptr;
+    LoopInfo *LI = nullptr;
+    const DataLayout *DL = nullptr;
+    const TargetTransformInfo *TTI = nullptr;
+    DominatorTree *DT = nullptr;
+    bool PreserveLCSSA = false;
+    AssumptionCache *AC = nullptr;
+    TargetLibraryInfo *LibInfo = nullptr;
+    Module *M = nullptr;
+    bool MadeChange = false;
+  };
+
+  class HardwareLoop {
+    // Expand the trip count scev into a value that we can use.
+    Value *InitLoopCount();
+
+    // Insert the set_loop_iteration intrinsic.
+    void InsertIterationSetup(Value *LoopCountInit);
+
+    // Insert the loop_decrement intrinsic.
+    void InsertLoopDec();
+
+    // Insert the loop_decrement_reg intrinsic.
+    Instruction *InsertLoopRegDec(Value *EltsRem);
+
+    // If the target requires the counter value to be updated in the loop,
+    // insert a phi to hold the value. The intended purpose is for use by
+    // loop_decrement_reg.
+    PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
+
+    // Create a new cmp, that checks the returned value of loop_decrement*,
+    // and update the exit branch to use it.
+    void UpdateBranch(Value *EltsRem);
+
+  public:
+    HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
+                 const DataLayout &DL) :
+      SE(SE), DL(DL), L(Info.L), M(L->getHeader()->getModule()),
+      ExitCount(Info.ExitCount),
+      CountType(Info.CountType),
+      ExitBranch(Info.ExitBranch),
+      LoopDecrement(Info.LoopDecrement),
+      UsePHICounter(Info.CounterInReg),
+      UseLoopGuard(Info.PerformEntryTest) { }
+
+    void Create();
+
+  private:
+    ScalarEvolution &SE;
+    const DataLayout &DL;
+    Loop *L                 = nullptr;
+    Module *M               = nullptr;
+    const SCEV *ExitCount   = nullptr;
+    Type *CountType         = nullptr;
+    BranchInst *ExitBranch  = nullptr;
+    Value *LoopDecrement    = nullptr;
+    bool UsePHICounter      = false;
+    bool UseLoopGuard       = false;
+    BasicBlock *BeginBB     = nullptr;
+  };
+}
+
+char HardwareLoops::ID = 0;
+
+bool HardwareLoops::runOnFunction(Function &F) {
+  if (skipFunction(F))
+    return false;
+
+  LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
+
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  DL = &F.getParent()->getDataLayout();
+  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+  LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
+  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+  M = F.getParent();
+
+  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) {
+    Loop *L = *I;
+    if (!L->getParentLoop())
+      TryConvertLoop(L);
+  }
+
+  return MadeChange;
+}
+
+// Return true if the search should stop, which will be when an inner loop is
+// converted and the parent loop doesn't support containing a hardware loop.
+bool HardwareLoops::TryConvertLoop(Loop *L) {
+  // Process nested loops first.
+  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    if (TryConvertLoop(*I))
+      return true; // Stop search.
+
+  HardwareLoopInfo HWLoopInfo(L);
+  if (!HWLoopInfo.canAnalyze(*LI))
+    return false;
+
+  if (TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo) ||
+      ForceHardwareLoops) {
+
+    // Allow overriding of the counter width and loop decrement value.
+    if (CounterBitWidth.getNumOccurrences())
+      HWLoopInfo.CountType =
+        IntegerType::get(M->getContext(), CounterBitWidth);
+
+    if (LoopDecrement.getNumOccurrences())
+      HWLoopInfo.LoopDecrement =
+        ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
+
+    MadeChange |= TryConvertLoop(HWLoopInfo);
+    return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
+  }
+
+  return false;
+}
+
+bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
+
+  Loop *L = HWLoopInfo.L;
+  LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
+
+  if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
+                                          ForceHardwareLoopPHI))
+    return false;
+
+  assert(
+      (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
+      "Hardware Loop must have set exit info.");
+
+  BasicBlock *Preheader = L->getLoopPreheader();
+
+  // If we don't have a preheader, then insert one.
+  if (!Preheader)
+    Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
+  if (!Preheader)
+    return false;
+
+  HardwareLoop HWLoop(HWLoopInfo, *SE, *DL);
+  HWLoop.Create();
+  ++NumHWLoops;
+  return true;
+}
+
+void HardwareLoop::Create() {
+  LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
+ 
+  Value *LoopCountInit = InitLoopCount();
+  if (!LoopCountInit)
+    return;
+
+  InsertIterationSetup(LoopCountInit);
+
+  if (UsePHICounter || ForceHardwareLoopPHI) {
+    Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
+    Value *EltsRem = InsertPHICounter(LoopCountInit, LoopDec);
+    LoopDec->setOperand(0, EltsRem);
+    UpdateBranch(LoopDec);
+  } else
+    InsertLoopDec();
+
+  // Run through the basic blocks of the loop and see if any of them have dead
+  // PHIs that can be removed.
+  for (auto I : L->blocks())
+    DeleteDeadPHIs(I);
+}
+
+static bool CanGenerateTest(Loop *L, Value *Count) {
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader->getSinglePredecessor())
+    return false;
+
+  BasicBlock *Pred = Preheader->getSinglePredecessor();
+  if (!isa<BranchInst>(Pred->getTerminator()))
+    return false;
+
+  auto *BI = cast<BranchInst>(Pred->getTerminator());
+  if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
+    return false;
+
+  // Check that the icmp is checking for equality of Count and zero and that
+  // a non-zero value results in entering the loop.
+  auto ICmp = cast<ICmpInst>(BI->getCondition());
+  LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
+  if (!ICmp->isEquality())
+    return false;
+
+  auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
+    if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
+      return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
+    return false;
+  };
+
+  if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1))
+    return false;
+
+  unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
+  if (BI->getSuccessor(SuccIdx) != Preheader)
+    return false;
+
+  return true;
+}
+
+Value *HardwareLoop::InitLoopCount() {
+  LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
+  // Can we replace a conditional branch with an intrinsic that sets the
+  // loop counter and tests that is not zero?
+
+  SCEVExpander SCEVE(SE, DL, "loopcnt");
+  if (!ExitCount->getType()->isPointerTy() &&
+      ExitCount->getType() != CountType)
+    ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
+
+  ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
+
+  // If we're trying to use the 'test and set' form of the intrinsic, we need
+  // to replace a conditional branch that is controlling entry to the loop. It
+  // is likely (guaranteed?) that the preheader has an unconditional branch to
+  // the loop header, so also check if it has a single predecessor.
+  if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
+                                  SE.getZero(ExitCount->getType()))) {
+    LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
+    UseLoopGuard |= ForceGuardLoopEntry;
+  } else
+    UseLoopGuard = false;
+
+  BasicBlock *BB = L->getLoopPreheader();
+  if (UseLoopGuard && BB->getSinglePredecessor() &&
+      cast<BranchInst>(BB->getTerminator())->isUnconditional())
+    BB = BB->getSinglePredecessor();
+
+  if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) {
+    LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
+               << *ExitCount << "\n");
+    return nullptr;
+  }
+
+  Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
+                                     BB->getTerminator());
+
+  // FIXME: We've expanded Count where we hope to insert the counter setting
+  // intrinsic. But, in the case of the 'test and set' form, we may fallback to
+  // the just 'set' form and in which case the insertion block is most likely
+  // different. It means there will be instruction(s) in a block that possibly
+  // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
+  // but it's doesn't appear to work in all cases.
+
+  UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
+  BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
+  LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
+             << " - Expanded Count in " << BB->getName() << "\n"
+             << " - Will insert set counter intrinsic into: "
+             << BeginBB->getName() << "\n");
+  return Count;
+}
+
+void HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
+  IRBuilder<> Builder(BeginBB->getTerminator());
+  Type *Ty = LoopCountInit->getType();
+  Intrinsic::ID ID = UseLoopGuard ?
+    Intrinsic::test_set_loop_iterations : Intrinsic::set_loop_iterations;
+  Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
+  Value *SetCount = Builder.CreateCall(LoopIter, LoopCountInit);
+
+  // Use the return value of the intrinsic to control the entry of the loop.
+  if (UseLoopGuard) {
+    assert((isa<BranchInst>(BeginBB->getTerminator()) &&
+            cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
+           "Expected conditional branch");
+    auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
+    LoopGuard->setCondition(SetCount);
+    if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
+      LoopGuard->swapSuccessors();
+  }
+  LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: "
+             << *SetCount << "\n");
+}
+
+void HardwareLoop::InsertLoopDec() {
+  IRBuilder<> CondBuilder(ExitBranch);
+
+  Function *DecFunc =
+    Intrinsic::getDeclaration(M, Intrinsic::loop_decrement,
+                              LoopDecrement->getType());
+  Value *Ops[] = { LoopDecrement };
+  Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops);
+  Value *OldCond = ExitBranch->getCondition();
+  ExitBranch->setCondition(NewCond);
+
+  // The false branch must exit the loop.
+  if (!L->contains(ExitBranch->getSuccessor(0)))
+    ExitBranch->swapSuccessors();
+
+  // The old condition may be dead now, and may have even created a dead PHI
+  // (the original induction variable).
+  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+
+  LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
+}
+
+Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
+  IRBuilder<> CondBuilder(ExitBranch);
+
+  Function *DecFunc =
+      Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
+                                { EltsRem->getType(), EltsRem->getType(),
+                                  LoopDecrement->getType()
+                                });
+  Value *Ops[] = { EltsRem, LoopDecrement };
+  Value *Call = CondBuilder.CreateCall(DecFunc, Ops);
+
+  LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
+  return cast<Instruction>(Call);
+}
+
+PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
+  BasicBlock *Preheader = L->getLoopPreheader();
+  BasicBlock *Header = L->getHeader();
+  BasicBlock *Latch = ExitBranch->getParent();
+  IRBuilder<> Builder(Header->getFirstNonPHI());
+  PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
+  Index->addIncoming(NumElts, Preheader);
+  Index->addIncoming(EltsRem, Latch);
+  LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
+  return Index;
+}
+
+void HardwareLoop::UpdateBranch(Value *EltsRem) {
+  IRBuilder<> CondBuilder(ExitBranch);
+  Value *NewCond =
+    CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
+  Value *OldCond = ExitBranch->getCondition();
+  ExitBranch->setCondition(NewCond);
+
+  // The false branch must exit the loop.
+  if (!L->contains(ExitBranch->getSuccessor(0)))
+    ExitBranch->swapSuccessors();
+
+  // The old condition may be dead now, and may have even created a dead PHI
+  // (the original induction variable).
+  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+}
+
+INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
+
+FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index ceeba639ee09..b17a253fe23f 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -1,9 +1,8 @@
 //===- IfConversion.cpp - Machine code if conversion pass -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1317,7 +1316,7 @@ void IfConverter::AnalyzeBlocks(
     AnalyzeBlock(MBB, Tokens);
 
   // Sort to favor more complex ifcvt scheme.
-  std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
+  llvm::stable_sort(Tokens, IfcvtTokenCmp);
 }
 
 /// Returns true either if ToMBB is the next block after MBB or that all the
diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp
index f411ee6745d0..1e82ea659617 100644
--- a/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/lib/CodeGen/ImplicitNullChecks.cpp
@@ -1,9 +1,8 @@
 //===- ImplicitNullChecks.cpp - Fold null checks into memory accesses -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -181,7 +180,8 @@ class ImplicitNullChecks : public MachineFunctionPass {
   /// Returns AR_NoAlias if \p MI memory operation does not alias with
   /// \p PrevMI, AR_MayAlias if they may alias and AR_WillAliasEverything if
   /// they may alias and any further memory operation may alias with \p PrevMI.
-  AliasResult areMemoryOpsAliased(MachineInstr &MI, MachineInstr *PrevMI);
+  AliasResult areMemoryOpsAliased(const MachineInstr &MI,
+                                  const MachineInstr *PrevMI) const;
 
   enum SuitabilityResult {
     SR_Suitable,
@@ -195,7 +195,8 @@ class ImplicitNullChecks : public MachineFunctionPass {
   /// no sense to continue lookup due to any other instruction will not be able
   /// to be used. \p PrevInsts is the set of instruction seen since
   /// the explicit null check on \p PointerReg.
-  SuitabilityResult isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
+  SuitabilityResult isSuitableMemoryOp(const MachineInstr &MI,
+                                       unsigned PointerReg,
                                        ArrayRef<MachineInstr *> PrevInsts);
 
   /// Return true if \p FaultingMI can be hoisted from after the
@@ -228,7 +229,8 @@ public:
 } // end anonymous namespace
 
 bool ImplicitNullChecks::canHandle(const MachineInstr *MI) {
-  if (MI->isCall() || MI->hasUnmodeledSideEffects())
+  if (MI->isCall() || MI->mayRaiseFPException() ||
+      MI->hasUnmodeledSideEffects())
     return false;
   auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); };
   (void)IsRegMask;
@@ -319,8 +321,8 @@ static bool AnyAliasLiveIn(const TargetRegisterInfo *TRI,
 }
 
 ImplicitNullChecks::AliasResult
-ImplicitNullChecks::areMemoryOpsAliased(MachineInstr &MI,
-                                        MachineInstr *PrevMI) {
+ImplicitNullChecks::areMemoryOpsAliased(const MachineInstr &MI,
+                                        const MachineInstr *PrevMI) const {
   // If it is not memory access, skip the check.
   if (!(PrevMI->mayStore() || PrevMI->mayLoad()))
     return AR_NoAlias;
@@ -357,10 +359,11 @@ ImplicitNullChecks::areMemoryOpsAliased(MachineInstr &MI,
 }
 
 ImplicitNullChecks::SuitabilityResult
-ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
+ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI,
+                                       unsigned PointerReg,
                                        ArrayRef<MachineInstr *> PrevInsts) {
   int64_t Offset;
-  MachineOperand *BaseOp;
+  const MachineOperand *BaseOp;
 
   if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) ||
       !BaseOp->isReg() || BaseOp->getReg() != PointerReg)
diff --git a/lib/CodeGen/IndirectBrExpandPass.cpp b/lib/CodeGen/IndirectBrExpandPass.cpp
index 7b05ebf820fd..7ac093ba4a71 100644
--- a/lib/CodeGen/IndirectBrExpandPass.cpp
+++ b/lib/CodeGen/IndirectBrExpandPass.cpp
@@ -1,9 +1,8 @@
 //===- IndirectBrExpandPass.cpp - Expand indirectbr to switch -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -149,11 +148,9 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) {
     ConstantInt *BBIndexC = ConstantInt::get(ITy, BBIndex);
 
     // Now rewrite the blockaddress to an integer constant based on the index.
-    // FIXME: We could potentially preserve the uses as arguments to inline asm.
-    // This would allow some uses such as diagnostic information in crashes to
-    // have higher quality even when this transform is enabled, but would break
-    // users that round-trip blockaddresses through inline assembly and then
-    // back into an indirectbr.
+    // FIXME: This part doesn't properly recognize other uses of blockaddress
+    // expressions, for instance, where they are used to pass labels to
+    // asm-goto. This part of the pass needs a rework.
     BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(BBIndexC, BA->getType()));
   }
 
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 007e9283d833..41ae8061a917 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -1,9 +1,8 @@
 //===- InlineSpiller.cpp - Insert spills and restores inline --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -76,6 +75,10 @@ STATISTIC(NumRemats,          "Number of rematerialized defs for spilling");
 
 static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden,
                                      cl::desc("Disable inline spill hoisting"));
+static cl::opt<bool>
+RestrictStatepointRemat("restrict-statepoint-remat",
+                       cl::init(false), cl::Hidden,
+                       cl::desc("Restrict remat for statepoint operands"));
 
 namespace {
 
@@ -215,6 +218,7 @@ private:
   void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI);
 
   void markValueUsed(LiveInterval*, VNInfo*);
+  bool canGuaranteeAssignmentAfterRemat(unsigned VReg, MachineInstr &MI);
   bool reMaterializeFor(LiveInterval &, MachineInstr &MI);
   void reMaterializeAll();
 
@@ -514,6 +518,28 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
   } while (!WorkList.empty());
 }
 
+bool InlineSpiller::canGuaranteeAssignmentAfterRemat(unsigned VReg,
+                                                     MachineInstr &MI) {
+  if (!RestrictStatepointRemat)
+    return true;
+  // Here's a quick explanation of the problem we're trying to handle here:
+  // * There are some pseudo instructions with more vreg uses than there are
+  //   physical registers on the machine.
+  // * This is normally handled by spilling the vreg, and folding the reload
+  //   into the user instruction.  (Thus decreasing the number of used vregs
+  //   until the remainder can be assigned to physregs.)
+  // * However, since we may try to spill vregs in any order, we can end up
+  //   trying to spill each operand to the instruction, and then rematting it
+  //   instead.  When that happens, the new live intervals (for the remats) are
+  //   expected to be trivially assignable (i.e. RS_Done).  However, since we
+  //   may have more remats than physregs, we're guaranteed to fail to assign
+  //   one.
+  // At the moment, we only handle this for STATEPOINTs since they're the only
+  // psuedo op where we've seen this.  If we start seeing other instructions
+  // with the same problem, we need to revisit this.
+  return (MI.getOpcode() != TargetOpcode::STATEPOINT);
+}
+
 /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
 bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
   // Analyze instruction
@@ -569,6 +595,14 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
     return true;
   }
 
+  // If we can't guarantee that we'll be able to actually assign the new vreg,
+  // we can't remat.
+  if (!canGuaranteeAssignmentAfterRemat(VirtReg.reg, MI)) {
+    markValueUsed(&VirtReg, ParentVNI);
+    LLVM_DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI);
+    return false;
+  }
+
   // Allocate a new register for the remat.
   unsigned NewVReg = Edit->createFrom(Original);
 
@@ -799,11 +833,11 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
   if (FoldOps.empty())
     return false;
 
-  MachineInstrSpan MIS(MI);
+  MachineInstrSpan MIS(MI, MI->getParent());
 
   MachineInstr *FoldMI =
       LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS)
-             : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS);
+             : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS, &VRM);
   if (!FoldMI)
     return false;
 
@@ -834,6 +868,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
       HSpiller.rmFromMergeableSpills(*MI, FI))
     --NumSpills;
   LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI);
+  if (MI->isCall())
+    MI->getMF()->updateCallSiteInfo(MI, FoldMI);
   MI->eraseFromParent();
 
   // Insert any new instructions other than FoldMI into the LIS maps.
@@ -871,7 +907,7 @@ void InlineSpiller::insertReload(unsigned NewVReg,
                                  MachineBasicBlock::iterator MI) {
   MachineBasicBlock &MBB = *MI->getParent();
 
-  MachineInstrSpan MIS(MI);
+  MachineInstrSpan MIS(MI, &MBB);
   TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot,
                            MRI.getRegClass(NewVReg), &TRI);
 
@@ -901,7 +937,7 @@ void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill,
                                  MachineBasicBlock::iterator MI) {
   MachineBasicBlock &MBB = *MI->getParent();
 
-  MachineInstrSpan MIS(MI);
+  MachineInstrSpan MIS(MI, &MBB);
   bool IsRealSpill = true;
   if (isFullUndefDef(*MI)) {
     // Don't spill undef value.
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index 82f6e8d8e234..7b50dac4cd1a 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -1,9 +1,8 @@
 //===- InterferenceCache.cpp - Caching per-block interference -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 160e2b16e294..50c6ac62d194 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -1,9 +1,8 @@
 //===- InterferenceCache.h - Caching per-block interference ----*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/InterleavedAccessPass.cpp b/lib/CodeGen/InterleavedAccessPass.cpp
index fd2ff162630a..14bc560a561c 100644
--- a/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/lib/CodeGen/InterleavedAccessPass.cpp
@@ -1,9 +1,8 @@
 //===- InterleavedAccessPass.cpp ------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -164,14 +163,19 @@ static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor,
 ///     <0, 2, 4, 6>    (mask of index 0 to extract even elements)
 ///     <1, 3, 5, 7>    (mask of index 1 to extract odd elements)
 static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
-                               unsigned &Index, unsigned MaxFactor) {
+                               unsigned &Index, unsigned MaxFactor,
+                               unsigned NumLoadElements) {
   if (Mask.size() < 2)
     return false;
 
   // Check potential Factors.
-  for (Factor = 2; Factor <= MaxFactor; Factor++)
+  for (Factor = 2; Factor <= MaxFactor; Factor++) {
+    // Make sure we don't produce a load wider than the input load.
+    if (Mask.size() * Factor > NumLoadElements)
+      return false;
     if (isDeInterleaveMaskOfFactor(Mask, Factor, Index))
       return true;
+  }
 
   return false;
 }
@@ -303,9 +307,10 @@ bool InterleavedAccess::lowerInterleavedLoad(
 
   unsigned Factor, Index;
 
+  unsigned NumLoadElements = LI->getType()->getVectorNumElements();
   // Check if the first shufflevector is DE-interleave shuffle.
   if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index,
-                          MaxFactor))
+                          MaxFactor, NumLoadElements))
     return false;
 
   // Holds the corresponding index for each DE-interleave shuffle.
diff --git a/lib/CodeGen/InterleavedLoadCombinePass.cpp b/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 989fa164ad2d..9525da849e2a 100644
--- a/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -1,9 +1,8 @@
 //===- InterleavedLoadCombine.cpp - Combine Interleaved Loads ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -961,6 +960,7 @@ public:
     if (!PtrTy) {
       Result = Polynomial();
       BasePtr = nullptr;
+      return;
     }
     unsigned PointerBits =
         DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace());
@@ -1219,7 +1219,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
                                       "interleaved.wide.ptrcast");
 
   // Create the wide load and update the MemorySSA.
-  auto LI = Builder.CreateAlignedLoad(CI, InsertionPoint->getAlignment(),
+  auto LI = Builder.CreateAlignedLoad(ILTy, CI, InsertionPoint->getAlignment(),
                                       "interleaved.wide.load");
   auto MSSAU = MemorySSAUpdater(&MSSA);
   MemoryUse *MSSALoad = cast<MemoryUse>(MSSAU.createMemoryAccessBefore(
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 707113bd973b..8cbd8bcaeabb 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -1,9 +1,8 @@
 //===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -24,39 +23,6 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-template <class ArgIt>
-static void EnsureFunctionExists(Module &M, const char *Name,
-                                 ArgIt ArgBegin, ArgIt ArgEnd,
-                                 Type *RetTy) {
-  // Insert a correctly-typed definition now.
-  std::vector<Type *> ParamTys;
-  for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
-    ParamTys.push_back(I->getType());
-  M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
-}
-
-static void EnsureFPIntrinsicsExist(Module &M, Function &Fn,
-                                    const char *FName,
-                                    const char *DName, const char *LDName) {
-  // Insert definitions for all the floating point types.
-  switch((int)Fn.arg_begin()->getType()->getTypeID()) {
-  case Type::FloatTyID:
-    EnsureFunctionExists(M, FName, Fn.arg_begin(), Fn.arg_end(),
-                         Type::getFloatTy(M.getContext()));
-    break;
-  case Type::DoubleTyID:
-    EnsureFunctionExists(M, DName, Fn.arg_begin(), Fn.arg_end(),
-                         Type::getDoubleTy(M.getContext()));
-    break;
-  case Type::X86_FP80TyID:
-  case Type::FP128TyID:
-  case Type::PPC_FP128TyID:
-    EnsureFunctionExists(M, LDName, Fn.arg_begin(), Fn.arg_end(),
-                         Fn.arg_begin()->getType());
-    break;
-  }
-}
-
 /// This function is used when we want to lower an intrinsic call to a call of
 /// an external function. This handles hard cases such as when there was already
 /// a prototype for the external function, but that prototype doesn't match the
@@ -72,8 +38,8 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
   std::vector<Type *> ParamTys;
   for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
     ParamTys.push_back((*I)->getType());
-  Constant* FCache = M->getOrInsertFunction(NewFn,
-                                  FunctionType::get(RetTy, ParamTys, false));
+  FunctionCallee FCache =
+      M->getOrInsertFunction(NewFn, FunctionType::get(RetTy, ParamTys, false));
 
   IRBuilder<> Builder(CI->getParent(), CI->getIterator());
   SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
@@ -92,75 +58,6 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
 #  define setjmp_undefined_for_msvc
 #endif
 
-void IntrinsicLowering::AddPrototypes(Module &M) {
-  LLVMContext &Context = M.getContext();
-  for (auto &F : M)
-    if (F.isDeclaration() && !F.use_empty())
-      switch (F.getIntrinsicID()) {
-      default: break;
-      case Intrinsic::setjmp:
-        EnsureFunctionExists(M, "setjmp", F.arg_begin(), F.arg_end(),
-                             Type::getInt32Ty(M.getContext()));
-        break;
-      case Intrinsic::longjmp:
-        EnsureFunctionExists(M, "longjmp", F.arg_begin(), F.arg_end(),
-                             Type::getVoidTy(M.getContext()));
-        break;
-      case Intrinsic::siglongjmp:
-        EnsureFunctionExists(M, "abort", F.arg_end(), F.arg_end(),
-                             Type::getVoidTy(M.getContext()));
-        break;
-      case Intrinsic::memcpy:
-        M.getOrInsertFunction("memcpy",
-          Type::getInt8PtrTy(Context),
-                              Type::getInt8PtrTy(Context),
-                              Type::getInt8PtrTy(Context),
-                              DL.getIntPtrType(Context));
-        break;
-      case Intrinsic::memmove:
-        M.getOrInsertFunction("memmove",
-          Type::getInt8PtrTy(Context),
-                              Type::getInt8PtrTy(Context),
-                              Type::getInt8PtrTy(Context),
-                              DL.getIntPtrType(Context));
-        break;
-      case Intrinsic::memset:
-        M.getOrInsertFunction("memset",
-          Type::getInt8PtrTy(Context),
-                              Type::getInt8PtrTy(Context),
-                              Type::getInt32Ty(M.getContext()),
-                              DL.getIntPtrType(Context));
-        break;
-      case Intrinsic::sqrt:
-        EnsureFPIntrinsicsExist(M, F, "sqrtf", "sqrt", "sqrtl");
-        break;
-      case Intrinsic::sin:
-        EnsureFPIntrinsicsExist(M, F, "sinf", "sin", "sinl");
-        break;
-      case Intrinsic::cos:
-        EnsureFPIntrinsicsExist(M, F, "cosf", "cos", "cosl");
-        break;
-      case Intrinsic::pow:
-        EnsureFPIntrinsicsExist(M, F, "powf", "pow", "powl");
-        break;
-      case Intrinsic::log:
-        EnsureFPIntrinsicsExist(M, F, "logf", "log", "logl");
-        break;
-      case Intrinsic::log2:
-        EnsureFPIntrinsicsExist(M, F, "log2f", "log2", "log2l");
-        break;
-      case Intrinsic::log10:
-        EnsureFPIntrinsicsExist(M, F, "log10f", "log10", "log10l");
-        break;
-      case Intrinsic::exp:
-        EnsureFPIntrinsicsExist(M, F, "expf", "exp", "expl");
-        break;
-      case Intrinsic::exp2:
-        EnsureFPIntrinsicsExist(M, F, "exp2f", "exp2", "exp2l");
-        break;
-      }
-}
-
 /// Emit the code to lower bswap of V before the specified instruction IP.
 static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
   assert(V->getType()->isIntOrIntVectorTy() && "Can't bswap a non-integer type!");
@@ -601,7 +498,7 @@ bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
 
   // Okay, we can do this xform, do so now.
   Module *M = CI->getModule();
-  Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty);
+  Function *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty);
 
   Value *Op = CI->getArgOperand(0);
   Op = CallInst::Create(Int, Op, CI->getName(), CI);
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 52e832cc38c1..886ae7e94adb 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -1,9 +1,8 @@
 //===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -202,6 +201,15 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
     return true;
 
   if (!TargetPassConfig::willCompleteCodeGenPipeline()) {
+    if (this->getTargetTriple().isOSAIX()) {
+      // On AIX, we might manifest MCSymbols during SDAG lowering. For MIR
+      // testing to be meaningful, we need to ensure that the symbols created
+      // are MCSymbolXCOFF variants, which requires that
+      // the TargetLoweringObjectFile instance has been initialized.
+      MCContext &Ctx = MMI->getContext();
+      const_cast<TargetLoweringObjectFile &>(*this->getObjFileLowering())
+          .Initialize(Ctx, *this);
+    }
     PM.add(createPrintMIRPass(Out));
   } else if (addAsmPrinter(PM, Out, DwoOut, FileType, MMI->getContext()))
     return true;
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index f9f33a98a9d1..8a7a41d0f763 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -1,9 +1,8 @@
 //===---- LatencyPriorityQueue.cpp - A latency-oriented priority queue ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
index 5b52cc66a297..200ac0ba15bf 100644
--- a/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
@@ -1,9 +1,8 @@
 ///===- LazyMachineBlockFrequencyInfo.cpp - Lazy Machine Block Frequency --===//
 ///
-///                     The LLVM Compiler Infrastructure
-///
-/// This file is distributed under the University of Illinois Open Source
-/// License. See LICENSE.TXT for details.
+/// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+/// See https://llvm.org/LICENSE.txt for license information.
+/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 ///
 ///===---------------------------------------------------------------------===//
 /// \file
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index d06821bdfcce..503821537ed9 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -1,9 +1,8 @@
 //===- LexicalScopes.cpp - Collecting lexical scope info ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp
index fc0ebea2d36c..a669e64692b9 100644
--- a/lib/CodeGen/LiveDebugValues.cpp
+++ b/lib/CodeGen/LiveDebugValues.cpp
@@ -1,9 +1,8 @@
 //===- LiveDebugValues.cpp - Tracking Debug Value MIs ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -21,6 +20,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -35,13 +35,15 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
@@ -57,6 +59,7 @@
 #include <cstdint>
 #include <functional>
 #include <queue>
+#include <tuple>
 #include <utility>
 #include <vector>
 
@@ -68,12 +71,12 @@ STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
 
 // If @MI is a DBG_VALUE with debug value described by a defined
 // register, returns the number of this register. In the other case, returns 0.
-static unsigned isDbgValueDescribedByReg(const MachineInstr &MI) {
+static Register isDbgValueDescribedByReg(const MachineInstr &MI) {
   assert(MI.isDebugValue() && "expected a DBG_VALUE");
   assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
   // If location of variable is described using a register (directly
   // or indirectly), this register is always a first operand.
-  return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
+  return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : Register();
 }
 
 namespace {
@@ -86,6 +89,8 @@ private:
   BitVector CalleeSavedRegs;
   LexicalScopes LS;
 
+  enum struct TransferKind { TransferCopy, TransferSpill, TransferRestore };
+
   /// Keeps track of lexical scopes associated with a user value's source
   /// location.
   class UserValueScopes {
@@ -105,51 +110,134 @@ private:
     }
   };
 
-  /// Based on std::pair so it can be used as an index into a DenseMap.
-  using DebugVariableBase =
-      std::pair<const DILocalVariable *, const DILocation *>;
-  /// A potentially inlined instance of a variable.
-  struct DebugVariable : public DebugVariableBase {
-    DebugVariable(const DILocalVariable *Var, const DILocation *InlinedAt)
-        : DebugVariableBase(Var, InlinedAt) {}
-
-    const DILocalVariable *getVar() const { return this->first; }
-    const DILocation *getInlinedAt() const { return this->second; }
-
-    bool operator<(const DebugVariable &DV) const {
-      if (getVar() == DV.getVar())
-        return getInlinedAt() < DV.getInlinedAt();
-      return getVar() < DV.getVar();
+  using FragmentInfo = DIExpression::FragmentInfo;
+  using OptFragmentInfo = Optional<DIExpression::FragmentInfo>;
+
+  /// Storage for identifying a potentially inlined instance of a variable,
+  /// or a fragment thereof.
+  class DebugVariable {
+    const DILocalVariable *Variable;
+    OptFragmentInfo Fragment;
+    const DILocation *InlinedAt;
+
+    /// Fragment that will overlap all other fragments. Used as default when
+    /// caller demands a fragment.
+    static const FragmentInfo DefaultFragment;
+
+  public:
+    DebugVariable(const DILocalVariable *Var, OptFragmentInfo &&FragmentInfo,
+                  const DILocation *InlinedAt)
+        : Variable(Var), Fragment(FragmentInfo), InlinedAt(InlinedAt) {}
+
+    DebugVariable(const DILocalVariable *Var, OptFragmentInfo &FragmentInfo,
+                  const DILocation *InlinedAt)
+        : Variable(Var), Fragment(FragmentInfo), InlinedAt(InlinedAt) {}
+
+    DebugVariable(const DILocalVariable *Var, const DIExpression *DIExpr,
+                  const DILocation *InlinedAt)
+        : DebugVariable(Var, DIExpr->getFragmentInfo(), InlinedAt) {}
+
+    DebugVariable(const MachineInstr &MI)
+        : DebugVariable(MI.getDebugVariable(),
+                        MI.getDebugExpression()->getFragmentInfo(),
+                        MI.getDebugLoc()->getInlinedAt()) {}
+
+    const DILocalVariable *getVar() const { return Variable; }
+    const OptFragmentInfo &getFragment() const { return Fragment; }
+    const DILocation *getInlinedAt() const { return InlinedAt; }
+
+    const FragmentInfo getFragmentDefault() const {
+      return Fragment.getValueOr(DefaultFragment);
+    }
+
+    static bool isFragmentDefault(FragmentInfo &F) {
+      return F == DefaultFragment;
+    }
+
+    bool operator==(const DebugVariable &Other) const {
+      return std::tie(Variable, Fragment, InlinedAt) ==
+             std::tie(Other.Variable, Other.Fragment, Other.InlinedAt);
+    }
+
+    bool operator<(const DebugVariable &Other) const {
+      return std::tie(Variable, Fragment, InlinedAt) <
+             std::tie(Other.Variable, Other.Fragment, Other.InlinedAt);
     }
   };
 
+  friend struct llvm::DenseMapInfo<DebugVariable>;
+
   /// A pair of debug variable and value location.
   struct VarLoc {
+    // The location at which a spilled variable resides. It consists of a
+    // register and an offset.
+    struct SpillLoc {
+      unsigned SpillBase;
+      int SpillOffset;
+      bool operator==(const SpillLoc &Other) const {
+        return SpillBase == Other.SpillBase && SpillOffset == Other.SpillOffset;
+      }
+    };
+
     const DebugVariable Var;
     const MachineInstr &MI; ///< Only used for cloning a new DBG_VALUE.
     mutable UserValueScopes UVS;
-    enum { InvalidKind = 0, RegisterKind } Kind = InvalidKind;
+    enum VarLocKind {
+      InvalidKind = 0,
+      RegisterKind,
+      SpillLocKind,
+      ImmediateKind,
+      EntryValueKind
+    } Kind = InvalidKind;
 
     /// The value location. Stored separately to avoid repeatedly
     /// extracting it from MI.
     union {
       uint64_t RegNo;
+      SpillLoc SpillLocation;
       uint64_t Hash;
+      int64_t Immediate;
+      const ConstantFP *FPImm;
+      const ConstantInt *CImm;
     } Loc;
 
-    VarLoc(const MachineInstr &MI, LexicalScopes &LS)
-        : Var(MI.getDebugVariable(), MI.getDebugLoc()->getInlinedAt()), MI(MI),
-          UVS(MI.getDebugLoc(), LS) {
+    VarLoc(const MachineInstr &MI, LexicalScopes &LS,
+          VarLocKind K = InvalidKind)
+        : Var(MI), MI(MI), UVS(MI.getDebugLoc(), LS){
       static_assert((sizeof(Loc) == sizeof(uint64_t)),
                     "hash does not cover all members of Loc");
       assert(MI.isDebugValue() && "not a DBG_VALUE");
       assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
       if (int RegNo = isDbgValueDescribedByReg(MI)) {
-        Kind = RegisterKind;
+        Kind = MI.isDebugEntryValue() ? EntryValueKind : RegisterKind;
         Loc.RegNo = RegNo;
+      } else if (MI.getOperand(0).isImm()) {
+        Kind = ImmediateKind;
+        Loc.Immediate = MI.getOperand(0).getImm();
+      } else if (MI.getOperand(0).isFPImm()) {
+        Kind = ImmediateKind;
+        Loc.FPImm = MI.getOperand(0).getFPImm();
+      } else if (MI.getOperand(0).isCImm()) {
+        Kind = ImmediateKind;
+        Loc.CImm = MI.getOperand(0).getCImm();
       }
+      assert((Kind != ImmediateKind || !MI.isDebugEntryValue()) &&
+             "entry values must be register locations");
+    }
+
+    /// The constructor for spill locations.
+    VarLoc(const MachineInstr &MI, unsigned SpillBase, int SpillOffset,
+           LexicalScopes &LS)
+        : Var(MI), MI(MI), UVS(MI.getDebugLoc(), LS) {
+      assert(MI.isDebugValue() && "not a DBG_VALUE");
+      assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
+      Kind = SpillLocKind;
+      Loc.SpillLocation = {SpillBase, SpillOffset};
     }
 
+    // Is the Loc field a constant or constant object?
+    bool isConstant() const { return Kind == ImmediateKind; }
+
     /// If this variable is described by a register, return it,
     /// otherwise return 0.
     unsigned isDescribedByReg() const {
@@ -167,17 +255,18 @@ private:
 #endif
 
     bool operator==(const VarLoc &Other) const {
-      return Var == Other.Var && Loc.Hash == Other.Loc.Hash;
+      return Kind == Other.Kind && Var == Other.Var &&
+             Loc.Hash == Other.Loc.Hash;
     }
 
     /// This operator guarantees that VarLocs are sorted by Variable first.
     bool operator<(const VarLoc &Other) const {
-      if (Var == Other.Var)
-        return Loc.Hash < Other.Loc.Hash;
-      return Var < Other.Var;
+      return std::tie(Var, Kind, Loc.Hash) <
+             std::tie(Other.Var, Other.Kind, Other.Loc.Hash);
     }
   };
 
+  using DebugParamMap = SmallDenseMap<const DILocalVariable *, MachineInstr *>;
   using VarLocMap = UniqueVector<VarLoc>;
   using VarLocSet = SparseBitVector<>;
   using VarLocInMBB = SmallDenseMap<const MachineBasicBlock *, VarLocSet>;
@@ -187,26 +276,35 @@ private:
   };
   using TransferMap = SmallVector<TransferDebugPair, 4>;
 
+  // Types for recording sets of variable fragments that overlap. For a given
+  // local variable, we record all other fragments of that variable that could
+  // overlap it, to reduce search time.
+  using FragmentOfVar =
+      std::pair<const DILocalVariable *, DIExpression::FragmentInfo>;
+  using OverlapMap =
+      DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>;
+
+  // Helper while building OverlapMap, a map of all fragments seen for a given
+  // DILocalVariable.
+  using VarToFragments =
+      DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>;
+
   /// This holds the working set of currently open ranges. For fast
   /// access, this is done both as a set of VarLocIDs, and a map of
   /// DebugVariable to recent VarLocID. Note that a DBG_VALUE ends all
   /// previous open ranges for the same variable.
   class OpenRangesSet {
     VarLocSet VarLocs;
-    SmallDenseMap<DebugVariableBase, unsigned, 8> Vars;
+    SmallDenseMap<DebugVariable, unsigned, 8> Vars;
+    OverlapMap &OverlappingFragments;
 
   public:
+    OpenRangesSet(OverlapMap &_OLapMap) : OverlappingFragments(_OLapMap) {}
+
     const VarLocSet &getVarLocs() const { return VarLocs; }
 
     /// Terminate all open ranges for Var by removing it from the set.
-    void erase(DebugVariable Var) {
-      auto It = Vars.find(Var);
-      if (It != Vars.end()) {
-        unsigned ID = It->second;
-        VarLocs.reset(ID);
-        Vars.erase(It);
-      }
-    }
+    void erase(DebugVariable Var);
 
     /// Terminate all open ranges listed in \c KillSet by removing
     /// them from the set.
@@ -217,7 +315,7 @@ private:
     }
 
     /// Insert a new range into the set.
-    void insert(unsigned VarLocID, DebugVariableBase Var) {
+    void insert(unsigned VarLocID, DebugVariable Var) {
       VarLocs.set(VarLocID);
       Vars.insert({Var, VarLocID});
     }
@@ -237,24 +335,43 @@ private:
 
   bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF,
                           unsigned &Reg);
-  int extractSpillBaseRegAndOffset(const MachineInstr &MI, unsigned &Reg);
+  /// If a given instruction is identified as a spill, return the spill location
+  /// and set \p Reg to the spilled register.
+  Optional<VarLoc::SpillLoc> isRestoreInstruction(const MachineInstr &MI,
+                                                  MachineFunction *MF,
+                                                  unsigned &Reg);
+  /// Given a spill instruction, extract the register and offset used to
+  /// address the spill location in a target independent way.
+  VarLoc::SpillLoc extractSpillBaseRegAndOffset(const MachineInstr &MI);
   void insertTransferDebugPair(MachineInstr &MI, OpenRangesSet &OpenRanges,
                                TransferMap &Transfers, VarLocMap &VarLocIDs,
-                               unsigned OldVarID, unsigned NewReg = 0);
+                               unsigned OldVarID, TransferKind Kind,
+                               unsigned NewReg = 0);
 
   void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
                           VarLocMap &VarLocIDs);
-  void transferSpillInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
-                         VarLocMap &VarLocIDs, TransferMap &Transfers);
+  void transferSpillOrRestoreInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
+                                  VarLocMap &VarLocIDs, TransferMap &Transfers);
+  void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges,
+                       VarLocMap &VarLocIDs, TransferMap &Transfers,
+                       DebugParamMap &DebugEntryVals,
+                       SparseBitVector<> &KillSet);
   void transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges,
                             VarLocMap &VarLocIDs, TransferMap &Transfers);
   void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges,
-                           const VarLocMap &VarLocIDs);
+                           VarLocMap &VarLocIDs, TransferMap &Transfers,
+                           DebugParamMap &DebugEntryVals);
   bool transferTerminatorInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
                               VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
+
   bool process(MachineInstr &MI, OpenRangesSet &OpenRanges,
                VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
-               TransferMap &Transfers, bool transferChanges);
+               TransferMap &Transfers, DebugParamMap &DebugEntryVals,
+               bool transferChanges, OverlapMap &OverlapFragments,
+               VarToFragments &SeenFragments);
+
+  void accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments,
+                             OverlapMap &OLapMap);
 
   bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
             const VarLocMap &VarLocIDs,
@@ -289,10 +406,46 @@ public:
 
 } // end anonymous namespace
 
+namespace llvm {
+
+template <> struct DenseMapInfo<LiveDebugValues::DebugVariable> {
+  using DV = LiveDebugValues::DebugVariable;
+  using OptFragmentInfo = LiveDebugValues::OptFragmentInfo;
+  using FragmentInfo = LiveDebugValues::FragmentInfo;
+
+  // Empty key: no key should be generated that has no DILocalVariable.
+  static inline DV getEmptyKey() {
+    return DV(nullptr, OptFragmentInfo(), nullptr);
+  }
+
+  // Difference in tombstone is that the Optional is meaningful
+  static inline DV getTombstoneKey() {
+    return DV(nullptr, OptFragmentInfo({0, 0}), nullptr);
+  }
+
+  static unsigned getHashValue(const DV &D) {
+    unsigned HV = 0;
+    const OptFragmentInfo &Fragment = D.getFragment();
+    if (Fragment)
+      HV = DenseMapInfo<FragmentInfo>::getHashValue(*Fragment);
+
+    return hash_combine(D.getVar(), HV, D.getInlinedAt());
+  }
+
+  static bool isEqual(const DV &A, const DV &B) { return A == B; }
+};
+
+} // namespace llvm
+
 //===----------------------------------------------------------------------===//
 //            Implementation
 //===----------------------------------------------------------------------===//
 
+const DIExpression::FragmentInfo
+    LiveDebugValues::DebugVariable::DefaultFragment = {
+        std::numeric_limits<uint64_t>::max(),
+        std::numeric_limits<uint64_t>::min()};
+
 char LiveDebugValues::ID = 0;
 
 char &llvm::LiveDebugValuesID = LiveDebugValues::ID;
@@ -312,6 +465,39 @@ void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const {
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
+/// Erase a variable from the set of open ranges, and additionally erase any
+/// fragments that may overlap it.
+void LiveDebugValues::OpenRangesSet::erase(DebugVariable Var) {
+  // Erasure helper.
+  auto DoErase = [this](DebugVariable VarToErase) {
+    auto It = Vars.find(VarToErase);
+    if (It != Vars.end()) {
+      unsigned ID = It->second;
+      VarLocs.reset(ID);
+      Vars.erase(It);
+    }
+  };
+
+  // Erase the variable/fragment that ends here.
+  DoErase(Var);
+
+  // Extract the fragment. Interpret an empty fragment as one that covers all
+  // possible bits.
+  FragmentInfo ThisFragment = Var.getFragmentDefault();
+
+  // There may be fragments that overlap the designated fragment. Look them up
+  // in the pre-computed overlap map, and erase them too.
+  auto MapIt = OverlappingFragments.find({Var.getVar(), ThisFragment});
+  if (MapIt != OverlappingFragments.end()) {
+    for (auto Fragment : MapIt->second) {
+      LiveDebugValues::OptFragmentInfo FragmentHolder;
+      if (!DebugVariable::isFragmentDefault(Fragment))
+        FragmentHolder = LiveDebugValues::OptFragmentInfo(Fragment);
+      DoErase({Var.getVar(), FragmentHolder, Var.getInlinedAt()});
+    }
+  }
+}
+
 //===----------------------------------------------------------------------===//
 //            Debug Range Extension Implementation
 //===----------------------------------------------------------------------===//
@@ -339,10 +525,8 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
 }
 #endif
 
-/// Given a spill instruction, extract the register and offset used to
-/// address the spill location in a target independent way.
-int LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI,
-                                                  unsigned &Reg) {
+LiveDebugValues::VarLoc::SpillLoc
+LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
   assert(MI.hasOneMemOperand() &&
          "Spill instruction does not have exactly one memory operand?");
   auto MMOI = MI.memoperands_begin();
@@ -351,7 +535,9 @@ int LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI,
          "Inconsistent memory operand in spill instruction");
   int FI = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex();
   const MachineBasicBlock *MBB = MI.getParent();
-  return TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
+  unsigned Reg;
+  int Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
+  return {Reg, Offset};
 }
 
 /// End all previous ranges related to @MI and start a new range from @MI
@@ -362,21 +548,72 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
   if (!MI.isDebugValue())
     return;
   const DILocalVariable *Var = MI.getDebugVariable();
+  const DIExpression *Expr = MI.getDebugExpression();
   const DILocation *DebugLoc = MI.getDebugLoc();
   const DILocation *InlinedAt = DebugLoc->getInlinedAt();
   assert(Var->isValidLocationForIntrinsic(DebugLoc) &&
          "Expected inlined-at fields to agree");
 
   // End all previous ranges of Var.
-  DebugVariable V(Var, InlinedAt);
+  DebugVariable V(Var, Expr, InlinedAt);
   OpenRanges.erase(V);
 
   // Add the VarLoc to OpenRanges from this DBG_VALUE.
-  // TODO: Currently handles DBG_VALUE which has only reg as location.
-  if (isDbgValueDescribedByReg(MI)) {
+  unsigned ID;
+  if (isDbgValueDescribedByReg(MI) || MI.getOperand(0).isImm() ||
+      MI.getOperand(0).isFPImm() || MI.getOperand(0).isCImm()) {
+    // Use normal VarLoc constructor for registers and immediates.
     VarLoc VL(MI, LS);
-    unsigned ID = VarLocIDs.insert(VL);
+    ID = VarLocIDs.insert(VL);
+    OpenRanges.insert(ID, VL.Var);
+  } else if (MI.hasOneMemOperand()) {
+    // It's a stack spill -- fetch spill base and offset.
+    VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI);
+    VarLoc VL(MI, SpillLocation.SpillBase, SpillLocation.SpillOffset, LS);
+    ID = VarLocIDs.insert(VL);
     OpenRanges.insert(ID, VL.Var);
+  } else {
+    // This must be an undefined location. We should leave OpenRanges closed.
+    assert(MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == 0 &&
+           "Unexpected non-undef DBG_VALUE encountered");
+  }
+}
+
+void LiveDebugValues::emitEntryValues(MachineInstr &MI,
+                                      OpenRangesSet &OpenRanges,
+                                      VarLocMap &VarLocIDs,
+                                      TransferMap &Transfers,
+                                      DebugParamMap &DebugEntryVals,
+                                      SparseBitVector<> &KillSet) {
+  MachineFunction *MF = MI.getParent()->getParent();
+  for (unsigned ID : KillSet) {
+    if (!VarLocIDs[ID].Var.getVar()->isParameter())
+      continue;
+
+    const MachineInstr *CurrDebugInstr = &VarLocIDs[ID].MI;
+
+    // If parameter's DBG_VALUE is not in the map that means we can't
+    // generate parameter's entry value.
+    if (!DebugEntryVals.count(CurrDebugInstr->getDebugVariable()))
+      continue;
+
+    auto ParamDebugInstr = DebugEntryVals[CurrDebugInstr->getDebugVariable()];
+    DIExpression *NewExpr = DIExpression::prepend(
+        ParamDebugInstr->getDebugExpression(), DIExpression::EntryValue);
+    MachineInstr *EntryValDbgMI =
+        BuildMI(*MF, ParamDebugInstr->getDebugLoc(), ParamDebugInstr->getDesc(),
+                ParamDebugInstr->isIndirectDebugValue(),
+                ParamDebugInstr->getOperand(0).getReg(),
+                ParamDebugInstr->getDebugVariable(), NewExpr);
+
+    if (ParamDebugInstr->isIndirectDebugValue())
+      EntryValDbgMI->getOperand(1).setImm(
+          ParamDebugInstr->getOperand(1).getImm());
+
+    Transfers.push_back({&MI, EntryValDbgMI});
+    VarLoc VL(*EntryValDbgMI, LS);
+    unsigned EntryValLocID = VarLocIDs.insert(VL);
+    OpenRanges.insert(EntryValLocID, VL.Var);
   }
 }
 
@@ -387,51 +624,92 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
 /// otherwise it is variable's location on the stack.
 void LiveDebugValues::insertTransferDebugPair(
     MachineInstr &MI, OpenRangesSet &OpenRanges, TransferMap &Transfers,
-    VarLocMap &VarLocIDs, unsigned OldVarID, unsigned NewReg) {
-  const MachineInstr *DMI = &VarLocIDs[OldVarID].MI;
+    VarLocMap &VarLocIDs, unsigned OldVarID, TransferKind Kind,
+    unsigned NewReg) {
+  const MachineInstr *DebugInstr = &VarLocIDs[OldVarID].MI;
   MachineFunction *MF = MI.getParent()->getParent();
-  MachineInstr *NewDMI;
-  if (NewReg) {
+  MachineInstr *NewDebugInstr;
+
+  auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &DebugInstr,
+                        &VarLocIDs](VarLoc &VL, MachineInstr *NewDebugInstr) {
+    unsigned LocId = VarLocIDs.insert(VL);
+
+    // Close this variable's previous location range.
+    DebugVariable V(*DebugInstr);
+    OpenRanges.erase(V);
+
+    OpenRanges.insert(LocId, VL.Var);
+    // The newly created DBG_VALUE instruction NewDebugInstr must be inserted
+    // after MI. Keep track of the pairing.
+    TransferDebugPair MIP = {&MI, NewDebugInstr};
+    Transfers.push_back(MIP);
+  };
+
+  // End all previous ranges of Var.
+  OpenRanges.erase(VarLocIDs[OldVarID].Var);
+  switch (Kind) {
+  case TransferKind::TransferCopy: {
+    assert(NewReg &&
+           "No register supplied when handling a copy of a debug value");
     // Create a DBG_VALUE instruction to describe the Var in its new
     // register location.
-    NewDMI = BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(),
-                     DMI->isIndirectDebugValue(), NewReg,
-                     DMI->getDebugVariable(), DMI->getDebugExpression());
-    if (DMI->isIndirectDebugValue())
-      NewDMI->getOperand(1).setImm(DMI->getOperand(1).getImm());
+    NewDebugInstr = BuildMI(
+        *MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(),
+        DebugInstr->isIndirectDebugValue(), NewReg,
+        DebugInstr->getDebugVariable(), DebugInstr->getDebugExpression());
+    if (DebugInstr->isIndirectDebugValue())
+      NewDebugInstr->getOperand(1).setImm(DebugInstr->getOperand(1).getImm());
+    VarLoc VL(*NewDebugInstr, LS);
+    ProcessVarLoc(VL, NewDebugInstr);
     LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register copy: ";
-               NewDMI->print(dbgs(), false, false, false, TII));
-  } else {
+               NewDebugInstr->print(dbgs(), /*IsStandalone*/false,
+                                    /*SkipOpers*/false, /*SkipDebugLoc*/false,
+                                    /*AddNewLine*/true, TII));
+    return;
+  }
+  case TransferKind::TransferSpill: {
     // Create a DBG_VALUE instruction to describe the Var in its spilled
     // location.
-    unsigned SpillBase;
-    int SpillOffset = extractSpillBaseRegAndOffset(MI, SpillBase);
-    auto *SpillExpr = DIExpression::prepend(DMI->getDebugExpression(),
-                                            DIExpression::NoDeref, SpillOffset);
-    NewDMI = BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), true, SpillBase,
-                     DMI->getDebugVariable(), SpillExpr);
+    VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI);
+    auto *SpillExpr = DIExpression::prepend(DebugInstr->getDebugExpression(),
+                                            DIExpression::ApplyOffset,
+                                            SpillLocation.SpillOffset);
+    NewDebugInstr = BuildMI(
+        *MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), true,
+        SpillLocation.SpillBase, DebugInstr->getDebugVariable(), SpillExpr);
+    VarLoc VL(*NewDebugInstr, SpillLocation.SpillBase,
+              SpillLocation.SpillOffset, LS);
+    ProcessVarLoc(VL, NewDebugInstr);
     LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: ";
-               NewDMI->print(dbgs(), false, false, false, TII));
+               NewDebugInstr->print(dbgs(), /*IsStandalone*/false,
+                                    /*SkipOpers*/false, /*SkipDebugLoc*/false,
+                                    /*AddNewLine*/true, TII));
+    return;
   }
-
-  // The newly created DBG_VALUE instruction NewDMI must be inserted after
-  // MI. Keep track of the pairing.
-  TransferDebugPair MIP = {&MI, NewDMI};
-  Transfers.push_back(MIP);
-
-  // End all previous ranges of Var.
-  OpenRanges.erase(VarLocIDs[OldVarID].Var);
-
-  // Add the VarLoc to OpenRanges.
-  VarLoc VL(*NewDMI, LS);
-  unsigned LocID = VarLocIDs.insert(VL);
-  OpenRanges.insert(LocID, VL.Var);
+  case TransferKind::TransferRestore: {
+    assert(NewReg &&
+           "No register supplied when handling a restore of a debug value");
+    MachineFunction *MF = MI.getMF();
+    DIBuilder DIB(*const_cast<Function &>(MF->getFunction()).getParent());
+    NewDebugInstr =
+        BuildMI(*MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), false,
+                NewReg, DebugInstr->getDebugVariable(), DIB.createExpression());
+    VarLoc VL(*NewDebugInstr, LS);
+    ProcessVarLoc(VL, NewDebugInstr);
+    LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register restore: ";
+               NewDebugInstr->print(dbgs(), /*IsStandalone*/false,
+                                    /*SkipOpers*/false, /*SkipDebugLoc*/false,
+                                    /*AddNewLine*/true, TII));
+    return;
+  }
+  }
+  llvm_unreachable("Invalid transfer kind");
 }
 
 /// A definition of a register may mark the end of a range.
-void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
-                                          OpenRangesSet &OpenRanges,
-                                          const VarLocMap &VarLocIDs) {
+void LiveDebugValues::transferRegisterDef(
+    MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs,
+    TransferMap &Transfers, DebugParamMap &DebugEntryVals) {
   MachineFunction *MF = MI.getMF();
   const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
   unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
@@ -461,6 +739,13 @@ void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
     }
   }
   OpenRanges.erase(KillSet, VarLocIDs);
+
+  if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
+    auto &TM = TPC->getTM<TargetMachine>();
+    if (TM.Options.EnableDebugEntryValues)
+      emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, DebugEntryVals,
+                      KillSet);
+  }
 }
 
 /// Decide if @MI is a spill instruction and return true if it is. We use 2
@@ -471,24 +756,15 @@ void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
 /// other spills). We do not handle this yet (more than one memory operand).
 bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
                                          MachineFunction *MF, unsigned &Reg) {
-  const MachineFrameInfo &FrameInfo = MF->getFrameInfo();
-  int FI;
   SmallVector<const MachineMemOperand*, 1> Accesses;
 
   // TODO: Handle multiple stores folded into one.
   if (!MI.hasOneMemOperand())
     return false;
 
-  // To identify a spill instruction, use the same criteria as in AsmPrinter.
-  if (!((TII->isStoreToStackSlotPostFE(MI, FI) &&
-         FrameInfo.isSpillSlotObjectIndex(FI)) ||
-        (TII->hasStoreToStackSlot(MI, Accesses) &&
-         llvm::any_of(Accesses, [&FrameInfo](const MachineMemOperand *MMO) {
-           return FrameInfo.isSpillSlotObjectIndex(
-               cast<FixedStackPseudoSourceValue>(MMO->getPseudoValue())
-                   ->getFrameIndex());
-         }))))
-    return false;
+  if (!MI.getSpillSize(TII) && !MI.getFoldedSpillSize(TII))
+    return false; // This is not a spill instruction, since no valid size was
+                  // returned from either function.
 
   auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) {
     if (!MO.isReg() || !MO.isUse()) {
@@ -525,29 +801,67 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
   return false;
 }
 
+Optional<LiveDebugValues::VarLoc::SpillLoc>
+LiveDebugValues::isRestoreInstruction(const MachineInstr &MI,
+                                      MachineFunction *MF, unsigned &Reg) {
+  if (!MI.hasOneMemOperand())
+    return None;
+
+  // FIXME: Handle folded restore instructions with more than one memory
+  // operand.
+  if (MI.getRestoreSize(TII)) {
+    Reg = MI.getOperand(0).getReg();
+    return extractSpillBaseRegAndOffset(MI);
+  }
+  return None;
+}
+
 /// A spilled register may indicate that we have to end the current range of
 /// a variable and create a new one for the spill location.
+/// A restored register may indicate the reverse situation.
 /// We don't want to insert any instructions in process(), so we just create
 /// the DBG_VALUE without inserting it and keep track of it in \p Transfers.
 /// It will be inserted into the BB when we're done iterating over the
 /// instructions.
-void LiveDebugValues::transferSpillInst(MachineInstr &MI,
-                                        OpenRangesSet &OpenRanges,
-                                        VarLocMap &VarLocIDs,
-                                        TransferMap &Transfers) {
-  unsigned Reg;
+void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
+                                                 OpenRangesSet &OpenRanges,
+                                                 VarLocMap &VarLocIDs,
+                                                 TransferMap &Transfers) {
   MachineFunction *MF = MI.getMF();
-  if (!isSpillInstruction(MI, MF, Reg))
-    return;
+  TransferKind TKind;
+  unsigned Reg;
+  Optional<VarLoc::SpillLoc> Loc;
 
-  // Check if the register is the location of a debug value.
+  LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump(););
+
+  if (isSpillInstruction(MI, MF, Reg)) {
+    TKind = TransferKind::TransferSpill;
+    LLVM_DEBUG(dbgs() << "Recognized as spill: "; MI.dump(););
+    LLVM_DEBUG(dbgs() << "Register: " << Reg << " " << printReg(Reg, TRI)
+                      << "\n");
+  } else {
+    if (!(Loc = isRestoreInstruction(MI, MF, Reg)))
+      return;
+    TKind = TransferKind::TransferRestore;
+    LLVM_DEBUG(dbgs() << "Recognized as restore: "; MI.dump(););
+    LLVM_DEBUG(dbgs() << "Register: " << Reg << " " << printReg(Reg, TRI)
+                      << "\n");
+  }
+  // Check if the register or spill location is the location of a debug value.
   for (unsigned ID : OpenRanges.getVarLocs()) {
-    if (VarLocIDs[ID].isDescribedByReg() == Reg) {
+    if (TKind == TransferKind::TransferSpill &&
+        VarLocIDs[ID].isDescribedByReg() == Reg) {
       LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '('
                         << VarLocIDs[ID].Var.getVar()->getName() << ")\n");
-      insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID);
-      return;
-    }
+    } else if (TKind == TransferKind::TransferRestore &&
+               VarLocIDs[ID].Loc.SpillLocation == *Loc) {
+      LLVM_DEBUG(dbgs() << "Restoring Register " << printReg(Reg, TRI) << '('
+                        << VarLocIDs[ID].Var.getVar()->getName() << ")\n");
+    } else
+      continue;
+    insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID, TKind,
+                            Reg);
+    return;
   }
 }
 
@@ -585,7 +899,7 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
   for (unsigned ID : OpenRanges.getVarLocs()) {
     if (VarLocIDs[ID].isDescribedByReg() == SrcReg) {
       insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID,
-                              DestReg);
+                              TransferKind::TransferCopy, DestReg);
       return;
     }
   }
@@ -612,20 +926,92 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
   });
   VarLocSet &VLS = OutLocs[CurMBB];
   Changed = VLS |= OpenRanges.getVarLocs();
+  // New OutLocs set may be different due to spill, restore or register
+  // copy instruction processing.
+  if (Changed)
+    VLS = OpenRanges.getVarLocs();
   OpenRanges.clear();
   return Changed;
 }
 
+/// Accumulate a mapping between each DILocalVariable fragment and other
+/// fragments of that DILocalVariable which overlap. This reduces work during
+/// the data-flow stage from "Find any overlapping fragments" to "Check if the
+/// known-to-overlap fragments are present".
+/// \param MI A previously unprocessed DEBUG_VALUE instruction to analyze for
+///           fragment usage.
+/// \param SeenFragments Map from DILocalVariable to all fragments of that
+///           Variable which are known to exist.
+/// \param OverlappingFragments The overlap map being constructed, from one
+///           Var/Fragment pair to a vector of fragments known to overlap.
+void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI,
+                                            VarToFragments &SeenFragments,
+                                            OverlapMap &OverlappingFragments) {
+  DebugVariable MIVar(MI);
+  FragmentInfo ThisFragment = MIVar.getFragmentDefault();
+
+  // If this is the first sighting of this variable, then we are guaranteed
+  // there are currently no overlapping fragments either. Initialize the set
+  // of seen fragments, record no overlaps for the current one, and return.
+  auto SeenIt = SeenFragments.find(MIVar.getVar());
+  if (SeenIt == SeenFragments.end()) {
+    SmallSet<FragmentInfo, 4> OneFragment;
+    OneFragment.insert(ThisFragment);
+    SeenFragments.insert({MIVar.getVar(), OneFragment});
+
+    OverlappingFragments.insert({{MIVar.getVar(), ThisFragment}, {}});
+    return;
+  }
+
+  // If this particular Variable/Fragment pair already exists in the overlap
+  // map, it has already been accounted for.
+  auto IsInOLapMap =
+      OverlappingFragments.insert({{MIVar.getVar(), ThisFragment}, {}});
+  if (!IsInOLapMap.second)
+    return;
+
+  auto &ThisFragmentsOverlaps = IsInOLapMap.first->second;
+  auto &AllSeenFragments = SeenIt->second;
+
+  // Otherwise, examine all other seen fragments for this variable, with "this"
+  // fragment being a previously unseen fragment. Record any pair of
+  // overlapping fragments.
+  for (auto &ASeenFragment : AllSeenFragments) {
+    // Does this previously seen fragment overlap?
+    if (DIExpression::fragmentsOverlap(ThisFragment, ASeenFragment)) {
+      // Yes: Mark the current fragment as being overlapped.
+      ThisFragmentsOverlaps.push_back(ASeenFragment);
+      // Mark the previously seen fragment as being overlapped by the current
+      // one.
+      auto ASeenFragmentsOverlaps =
+          OverlappingFragments.find({MIVar.getVar(), ASeenFragment});
+      assert(ASeenFragmentsOverlaps != OverlappingFragments.end() &&
+             "Previously seen var fragment has no vector of overlaps");
+      ASeenFragmentsOverlaps->second.push_back(ThisFragment);
+    }
+  }
+
+  AllSeenFragments.insert(ThisFragment);
+}
+
 /// This routine creates OpenRanges and OutLocs.
 bool LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
                               VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
-                              TransferMap &Transfers, bool transferChanges) {
+                              TransferMap &Transfers, DebugParamMap &DebugEntryVals,
+                              bool transferChanges,
+                              OverlapMap &OverlapFragments,
+                              VarToFragments &SeenFragments) {
   bool Changed = false;
   transferDebugValue(MI, OpenRanges, VarLocIDs);
-  transferRegisterDef(MI, OpenRanges, VarLocIDs);
+  transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers,
+                      DebugEntryVals);
   if (transferChanges) {
     transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers);
-    transferSpillInst(MI, OpenRanges, VarLocIDs, Transfers);
+    transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers);
+  } else {
+    // Build up a map of overlapping fragments on the first run through.
+    if (MI.isDebugValue())
+      accumulateFragmentMap(MI, SeenFragments, OverlapFragments);
   }
   Changed = transferTerminatorInst(MI, OpenRanges, OutLocs, VarLocIDs);
   return Changed;
@@ -713,13 +1099,23 @@ bool LiveDebugValues::join(
     // new range is started for the var from the mbb's beginning by inserting
     // a new DBG_VALUE. process() will end this range however appropriate.
     const VarLoc &DiffIt = VarLocIDs[ID];
-    const MachineInstr *DMI = &DiffIt.MI;
-    MachineInstr *MI =
-        BuildMI(MBB, MBB.instr_begin(), DMI->getDebugLoc(), DMI->getDesc(),
-                DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(),
-                DMI->getDebugVariable(), DMI->getDebugExpression());
-    if (DMI->isIndirectDebugValue())
-      MI->getOperand(1).setImm(DMI->getOperand(1).getImm());
+    const MachineInstr *DebugInstr = &DiffIt.MI;
+    MachineInstr *MI = nullptr;
+    if (DiffIt.isConstant()) {
+      MachineOperand MO(DebugInstr->getOperand(0));
+      MI = BuildMI(MBB, MBB.instr_begin(), DebugInstr->getDebugLoc(),
+                   DebugInstr->getDesc(), false, MO,
+                   DebugInstr->getDebugVariable(),
+                   DebugInstr->getDebugExpression());
+    } else {
+      MI = BuildMI(MBB, MBB.instr_begin(), DebugInstr->getDebugLoc(),
+                   DebugInstr->getDesc(), DebugInstr->isIndirectDebugValue(),
+                   DebugInstr->getOperand(0).getReg(),
+                   DebugInstr->getDebugVariable(),
+                   DebugInstr->getDebugExpression());
+      if (DebugInstr->isIndirectDebugValue())
+        MI->getOperand(1).setImm(DebugInstr->getOperand(1).getImm());
+    }
     LLVM_DEBUG(dbgs() << "Inserted: "; MI->dump(););
     ILS.set(ID);
     ++NumInserted;
@@ -737,11 +1133,15 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
   bool OLChanged = false;
   bool MBBJoined = false;
 
-  VarLocMap VarLocIDs;      // Map VarLoc<>unique ID for use in bitvectors.
-  OpenRangesSet OpenRanges; // Ranges that are open until end of bb.
-  VarLocInMBB OutLocs;      // Ranges that exist beyond bb.
-  VarLocInMBB InLocs;       // Ranges that are incoming after joining.
-  TransferMap Transfers;    // DBG_VALUEs associated with spills.
+  VarLocMap VarLocIDs;         // Map VarLoc<>unique ID for use in bitvectors.
+  OverlapMap OverlapFragments; // Map of overlapping variable fragments
+  OpenRangesSet OpenRanges(OverlapFragments);
+                              // Ranges that are open until end of bb.
+  VarLocInMBB OutLocs;        // Ranges that exist beyond bb.
+  VarLocInMBB InLocs;         // Ranges that are incoming after joining.
+  TransferMap Transfers;      // DBG_VALUEs associated with spills.
+
+  VarToFragments SeenFragments;
 
   // Blocks which are artificial, i.e. blocks which exclusively contain
   // instructions without locations, or with line 0 locations.
@@ -758,15 +1158,61 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
 
   enum : bool { dontTransferChanges = false, transferChanges = true };
 
+  // Besides parameter's modification, check whether a DBG_VALUE is inlined
+  // in order to deduce whether the variable that it tracks comes from
+  // a different function. If that is the case we can't track its entry value.
+  auto IsUnmodifiedFuncParam = [&](const MachineInstr &MI) {
+    auto *DIVar = MI.getDebugVariable();
+    return DIVar->isParameter() && DIVar->isNotModified() &&
+           !MI.getDebugLoc()->getInlinedAt();
+  };
+
+  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
+  unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+  unsigned FP = TRI->getFrameRegister(MF);
+  auto IsRegOtherThanSPAndFP = [&](const MachineOperand &Op) -> bool {
+    return Op.isReg() && Op.getReg() != SP && Op.getReg() != FP;
+  };
+
+  // Working set of currently collected debug variables mapped to DBG_VALUEs
+  // representing candidates for production of debug entry values.
+  DebugParamMap DebugEntryVals;
+
+  MachineBasicBlock &First_MBB = *(MF.begin());
+  // Only in the case of entry MBB collect DBG_VALUEs representing
+  // function parameters in order to generate debug entry values for them.
+  // Currently, we generate debug entry values only for parameters that are
+  // unmodified throughout the function and located in a register.
+  // TODO: Add support for parameters that are described as fragments.
+  // TODO: Add support for modified arguments that can be expressed
+  // by using its entry value.
+  // TODO: Add support for local variables that are expressed in terms of
+  // parameters entry values.
+  for (auto &MI : First_MBB)
+    if (MI.isDebugValue() && IsUnmodifiedFuncParam(MI) &&
+        !MI.isIndirectDebugValue() && IsRegOtherThanSPAndFP(MI.getOperand(0)) &&
+        !DebugEntryVals.count(MI.getDebugVariable()) &&
+        !MI.getDebugExpression()->isFragment())
+      DebugEntryVals[MI.getDebugVariable()] = &MI;
+
   // Initialize every mbb with OutLocs.
   // We are not looking at any spill instructions during the initial pass
   // over the BBs. The LiveDebugVariables pass has already created DBG_VALUE
   // instructions for spills of registers that are known to be user variables
   // within the BB in which the spill occurs.
-  for (auto &MBB : MF)
-    for (auto &MI : MBB)
-      process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers,
-              dontTransferChanges);
+  for (auto &MBB : MF) {
+    for (auto &MI : MBB) {
+      process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, DebugEntryVals,
+              dontTransferChanges, OverlapFragments, SeenFragments);
+    }
+    // Add any entry DBG_VALUE instructions necessitated by parameter
+    // clobbering.
+    for (auto &TR : Transfers) {
+      MBB.insertAfter(MachineBasicBlock::iterator(*TR.TransferInst),
+                     TR.DebugInst);
+    }
+    Transfers.clear();
+  }
 
   auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool {
     if (const DebugLoc &DL = MI.getDebugLoc())
@@ -812,8 +1258,10 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
         // examine spill instructions to see whether they spill registers that
         // correspond to user variables.
         for (auto &MI : *MBB)
-          OLChanged |= process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers,
-                               transferChanges);
+          OLChanged |=
+              process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers,
+                      DebugEntryVals, transferChanges, OverlapFragments,
+                      SeenFragments);
 
         // Add any DBG_VALUE instructions necessitated by spills.
         for (auto &TR : Transfers)
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index d0d889782a35..656ec7d4bdfd 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -1,9 +1,8 @@
 //===- LiveDebugVariables.cpp - Tracking debug info variables -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -23,6 +22,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -71,6 +71,7 @@ EnableLDV("live-debug-variables", cl::init(true),
           cl::desc("Enable the live debug variables pass"), cl::Hidden);
 
 STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted");
+STATISTIC(NumInsertedDebugLabels, "Number of DBG_LABELs inserted");
 
 char LiveDebugVariables::ID = 0;
 
@@ -166,10 +167,6 @@ class UserValue {
   /// Map of slot indices where this value is live.
   LocMap locInts;
 
-  /// Set of interval start indexes that have been trimmed to the
-  /// lexical scope.
-  SmallSet<SlotIndex, 2> trimmedDefs;
-
   /// Insert a DBG_VALUE into MBB at Idx for LocNo.
   void insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
                         SlotIndex StopIdx, DbgValueLocation Loc, bool Spilled,
@@ -339,6 +336,37 @@ public:
   void print(raw_ostream &, const TargetRegisterInfo *);
 };
 
+/// A user label is a part of a debug info user label.
+class UserLabel {
+  const DILabel *Label; ///< The debug info label we are part of.
+  DebugLoc dl;          ///< The debug location for the label. This is
+                        ///< used by dwarf writer to find lexical scope.
+  SlotIndex loc;        ///< Slot used by the debug label.
+
+  /// Insert a DBG_LABEL into MBB at Idx.
+  void insertDebugLabel(MachineBasicBlock *MBB, SlotIndex Idx,
+                        LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+public:
+  /// Create a new UserLabel.
+  UserLabel(const DILabel *label, DebugLoc L, SlotIndex Idx)
+      : Label(label), dl(std::move(L)), loc(Idx) {}
+
+  /// Does this UserLabel match the parameters?
+  bool match(const DILabel *L, const DILocation *IA,
+             const SlotIndex Index) const {
+    return Label == L && dl->getInlinedAt() == IA && loc == Index;
+  }
+
+  /// Recreate DBG_LABEL instruction from data structures.
+  void emitDebugLabel(LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+  /// Return DebugLoc of this UserLabel.
+  DebugLoc getDebugLoc() { return dl; }
+
+  void print(raw_ostream &, const TargetRegisterInfo *);
+};
+
 /// Implementation of the LiveDebugVariables pass.
 class LDVImpl {
   LiveDebugVariables &pass;
@@ -356,6 +384,9 @@ class LDVImpl {
   /// All allocated UserValue instances.
   SmallVector<std::unique_ptr<UserValue>, 8> userValues;
 
+  /// All allocated UserLabel instances.
+  SmallVector<std::unique_ptr<UserLabel>, 2> userLabels;
+
   /// Map virtual register to eq class leader.
   using VRMap = DenseMap<unsigned, UserValue *>;
   VRMap virtRegToEqClass;
@@ -379,6 +410,14 @@ class LDVImpl {
   /// \returns True if the DBG_VALUE instruction should be deleted.
   bool handleDebugValue(MachineInstr &MI, SlotIndex Idx);
 
+  /// Add DBG_LABEL instruction to UserLabel.
+  ///
+  /// \param MI DBG_LABEL instruction
+  /// \param Idx Last valid SlotIndex before instruction.
+  ///
+  /// \returns True if the DBG_LABEL instruction should be deleted.
+  bool handleDebugLabel(MachineInstr &MI, SlotIndex Idx);
+
   /// Collect and erase all DBG_VALUE instructions, adding a UserValue def
   /// for each instruction.
   ///
@@ -400,6 +439,7 @@ public:
   void clear() {
     MF = nullptr;
     userValues.clear();
+    userLabels.clear();
     virtRegToEqClass.clear();
     userVarMap.clear();
     // Make sure we call emitDebugValues if the machine function was modified.
@@ -445,13 +485,23 @@ static void printDebugLoc(const DebugLoc &DL, raw_ostream &CommentOS,
   CommentOS << " ]";
 }
 
-static void printExtendedName(raw_ostream &OS, const DILocalVariable *V,
+static void printExtendedName(raw_ostream &OS, const DINode *Node,
                               const DILocation *DL) {
-  const LLVMContext &Ctx = V->getContext();
-  StringRef Res = V->getName();
+  const LLVMContext &Ctx = Node->getContext();
+  StringRef Res;
+  unsigned Line;
+  if (const auto *V = dyn_cast<const DILocalVariable>(Node)) {
+    Res = V->getName();
+    Line = V->getLine();
+  } else if (const auto *L = dyn_cast<const DILabel>(Node)) {
+    Res = L->getName();
+    Line = L->getLine();
+  }
+
   if (!Res.empty())
-    OS << Res << "," << V->getLine();
-  if (auto *InlinedAt = DL->getInlinedAt()) {
+    OS << Res << "," << Line;
+  auto *InlinedAt = DL ? DL->getInlinedAt() : nullptr;
+  if (InlinedAt) {
     if (DebugLoc InlinedAtDL = InlinedAt) {
       OS << " @[";
       printDebugLoc(InlinedAtDL, OS, Ctx);
@@ -461,9 +511,8 @@ static void printExtendedName(raw_ostream &OS, const DILocalVariable *V,
 }
 
 void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
-  auto *DV = cast<DILocalVariable>(Variable);
   OS << "!\"";
-  printExtendedName(OS, DV, dl);
+  printExtendedName(OS, Variable, dl);
 
   OS << "\"\t";
   for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
@@ -483,10 +532,22 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
   OS << '\n';
 }
 
+void UserLabel::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
+  OS << "!\"";
+  printExtendedName(OS, Label, dl);
+
+  OS << "\"\t";
+  OS << loc;
+  OS << '\n';
+}
+
 void LDVImpl::print(raw_ostream &OS) {
   OS << "********** DEBUG VARIABLES **********\n";
-  for (unsigned i = 0, e = userValues.size(); i != e; ++i)
-    userValues[i]->print(OS, TRI);
+  for (auto &userValue : userValues)
+    userValue->print(OS, TRI);
+  OS << "********** DEBUG LABELS **********\n";
+  for (auto &userLabel : userLabels)
+    userLabel->print(OS, TRI);
 }
 #endif
 
@@ -556,7 +617,7 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
     } else {
       // The DBG_VALUE is only valid if either Reg is live out from Idx, or Reg
       // is defined dead at Idx (where Idx is the slot index for the instruction
-      // preceeding the DBG_VALUE).
+      // preceding the DBG_VALUE).
       const LiveInterval &LI = LIS->getInterval(Reg);
       LiveQueryResult LRQ = LI.Query(Idx);
       if (!LRQ.valueOutOrDead()) {
@@ -587,6 +648,29 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
   return true;
 }
 
+bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) {
+  // DBG_LABEL label
+  if (MI.getNumOperands() != 1 || !MI.getOperand(0).isMetadata()) {
+    LLVM_DEBUG(dbgs() << "Can't handle " << MI);
+    return false;
+  }
+
+  // Get or create the UserLabel for label here.
+  const DILabel *Label = MI.getDebugLabel();
+  const DebugLoc &DL = MI.getDebugLoc();
+  bool Found = false;
+  for (auto const &L : userLabels) {
+    if (L->match(Label, DL->getInlinedAt(), Idx)) {
+      Found = true;
+      break;
+    }
+  }
+  if (!Found)
+    userLabels.push_back(llvm::make_unique<UserLabel>(Label, DL, Idx));
+
+  return true;
+}
+
 bool LDVImpl::collectDebugValues(MachineFunction &mf) {
   bool Changed = false;
   for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE;
@@ -610,7 +694,8 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
       do {
         // Only handle DBG_VALUE in handleDebugValue(). Skip all other
         // kinds of debug instructions.
-        if (MBBI->isDebugValue() && handleDebugValue(*MBBI, Idx)) {
+        if ((MBBI->isDebugValue() && handleDebugValue(*MBBI, Idx)) ||
+            (MBBI->isDebugLabel() && handleDebugLabel(*MBBI, Idx))) {
           MBBI = MBB->erase(MBBI);
           Changed = true;
         } else
@@ -655,10 +740,8 @@ void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR,
   }
 
   // Limited by the next def.
-  if (I.valid() && I.start() < Stop) {
+  if (I.valid() && I.start() < Stop)
     Stop = I.start();
-    ToEnd = false;
-  }
   // Limited by VNI's live range.
   else if (!ToEnd && Kills)
     Kills->push_back(Stop);
@@ -826,8 +909,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
       ++I;
 
       // If the interval also overlaps the start of the "next" (i.e.
-      // current) range create a new interval for the remainder (which
-      // may be further trimmed).
+      // current) range create a new interval for the remainder
       if (RStart < IStop)
         I.insert(RStart, IStop, Loc);
     }
@@ -837,13 +919,6 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
     if (!I.valid())
       return;
 
-    if (I.start() < RStart) {
-      // Interval start overlaps range - trim to the scope range.
-      I.setStartUnchecked(RStart);
-      // Remember that this interval was trimmed.
-      trimmedDefs.insert(RStart);
-    }
-
     // The end of a lexical scope range is the last instruction in the
     // range. To convert to an interval we need the index of the
     // instruction after it.
@@ -1227,11 +1302,13 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
   // that the original virtual register was a pointer. Also, add the stack slot
   // offset for the spilled register to the expression.
   const DIExpression *Expr = Expression;
+  uint8_t DIExprFlags = DIExpression::ApplyOffset;
   bool IsIndirect = Loc.wasIndirect();
   if (Spilled) {
-    auto Deref = IsIndirect ? DIExpression::WithDeref : DIExpression::NoDeref;
+    if (IsIndirect)
+      DIExprFlags |= DIExpression::DerefAfter;
     Expr =
-        DIExpression::prepend(Expr, DIExpression::NoDeref, SpillOffset, Deref);
+        DIExpression::prepend(Expr, DIExprFlags, SpillOffset);
     IsIndirect = true;
   }
 
@@ -1247,6 +1324,15 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
   } while (I != MBB->end());
 }
 
+void UserLabel::insertDebugLabel(MachineBasicBlock *MBB, SlotIndex Idx,
+                                 LiveIntervals &LIS,
+                                 const TargetInstrInfo &TII) {
+  MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
+  ++NumInsertedDebugLabels;
+  BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_LABEL))
+      .addMetadata(Label);
+}
+
 void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
                                 const TargetInstrInfo &TII,
                                 const TargetRegisterInfo &TRI,
@@ -1262,12 +1348,6 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
     bool Spilled = SpillIt != SpillOffsets.end();
     unsigned SpillOffset = Spilled ? SpillIt->second : 0;
 
-    // If the interval start was trimmed to the lexical scope insert the
-    // DBG_VALUE at the previous index (otherwise it appears after the
-    // first instruction in the range).
-    if (trimmedDefs.count(Start))
-      Start = Start.getPrevIndex();
-
     LLVM_DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << Loc.locNo());
     MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();
     SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB);
@@ -1295,16 +1375,31 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
   }
 }
 
+void UserLabel::emitDebugLabel(LiveIntervals &LIS, const TargetInstrInfo &TII) {
+  LLVM_DEBUG(dbgs() << "\t" << loc);
+  MachineFunction::iterator MBB = LIS.getMBBFromIndex(loc)->getIterator();
+
+  LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB));
+  insertDebugLabel(&*MBB, loc, LIS, TII);
+
+  LLVM_DEBUG(dbgs() << '\n');
+}
+
 void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
   LLVM_DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
   if (!MF)
     return;
   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
   SpillOffsetMap SpillOffsets;
-  for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
-    LLVM_DEBUG(userValues[i]->print(dbgs(), TRI));
-    userValues[i]->rewriteLocations(*VRM, *MF, *TII, *TRI, SpillOffsets);
-    userValues[i]->emitDebugValues(VRM, *LIS, *TII, *TRI, SpillOffsets);
+  for (auto &userValue : userValues) {
+    LLVM_DEBUG(userValue->print(dbgs(), TRI));
+    userValue->rewriteLocations(*VRM, *MF, *TII, *TRI, SpillOffsets);
+    userValue->emitDebugValues(VRM, *LIS, *TII, *TRI, SpillOffsets);
+  }
+  LLVM_DEBUG(dbgs() << "********** EMITTING LIVE DEBUG LABELS **********\n");
+  for (auto &userLabel : userLabels) {
+    LLVM_DEBUG(userLabel->print(dbgs(), TRI));
+    userLabel->emitDebugLabel(*LIS, *TII);
   }
   EmitDone = true;
 }
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index 0060399c2b04..0cbe10c6a422 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -1,9 +1,8 @@
 //===- LiveDebugVariables.h - Tracking debug info variables -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 2340b6abd87c..70b2a77fe800 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -1,9 +1,8 @@
 //===- LiveInterval.cpp - Live Interval Representation --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -297,9 +296,7 @@ private:
 
   iterator find(SlotIndex Pos) { return LR->find(Pos); }
 
-  iterator findInsertPos(Segment S) {
-    return std::upper_bound(LR->begin(), LR->end(), S.start);
-  }
+  iterator findInsertPos(Segment S) { return llvm::upper_bound(*LR, S.start); }
 };
 
 //===----------------------------------------------------------------------===//
@@ -880,8 +877,53 @@ void LiveInterval::clearSubRanges() {
   SubRanges = nullptr;
 }
 
-void LiveInterval::refineSubRanges(BumpPtrAllocator &Allocator,
-    LaneBitmask LaneMask, std::function<void(LiveInterval::SubRange&)> Apply) {
+/// For each VNI in \p SR, check whether or not that value defines part
+/// of the mask describe by \p LaneMask and if not, remove that value
+/// from \p SR.
+static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR,
+                                       LaneBitmask LaneMask,
+                                       const SlotIndexes &Indexes,
+                                       const TargetRegisterInfo &TRI) {
+  // Phys reg should not be tracked at subreg level.
+  // Same for noreg (Reg == 0).
+  if (!TargetRegisterInfo::isVirtualRegister(Reg) || !Reg)
+    return;
+  // Remove the values that don't define those lanes.
+  SmallVector<VNInfo *, 8> ToBeRemoved;
+  for (VNInfo *VNI : SR.valnos) {
+    if (VNI->isUnused())
+      continue;
+    // PHI definitions don't have MI attached, so there is nothing
+    // we can use to strip the VNI.
+    if (VNI->isPHIDef())
+      continue;
+    const MachineInstr *MI = Indexes.getInstructionFromIndex(VNI->def);
+    assert(MI && "Cannot find the definition of a value");
+    bool hasDef = false;
+    for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) {
+      if (!MOI->isReg() || !MOI->isDef())
+        continue;
+      if (MOI->getReg() != Reg)
+        continue;
+      if ((TRI.getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask).none())
+        continue;
+      hasDef = true;
+      break;
+    }
+
+    if (!hasDef)
+      ToBeRemoved.push_back(VNI);
+  }
+  for (VNInfo *VNI : ToBeRemoved)
+    SR.removeValNo(VNI);
+
+  assert(!SR.empty() && "At least one value should be defined by this mask");
+}
+
+void LiveInterval::refineSubRanges(
+    BumpPtrAllocator &Allocator, LaneBitmask LaneMask,
+    std::function<void(LiveInterval::SubRange &)> Apply,
+    const SlotIndexes &Indexes, const TargetRegisterInfo &TRI) {
   LaneBitmask ToApply = LaneMask;
   for (SubRange &SR : subranges()) {
     LaneBitmask SRMask = SR.LaneMask;
@@ -899,6 +941,10 @@ void LiveInterval::refineSubRanges(BumpPtrAllocator &Allocator,
       SR.LaneMask = SRMask & ~Matching;
       // Create a new subrange for the matching part
       MatchingRange = createSubRangeFrom(Allocator, Matching, SR);
+      // Now that the subrange is split in half, make sure we
+      // only keep in the subranges the VNIs that touch the related half.
+      stripValuesNotDefiningMask(reg, *MatchingRange, Matching, Indexes, TRI);
+      stripValuesNotDefiningMask(reg, SR, SR.LaneMask, Indexes, TRI);
     }
     Apply(*MatchingRange);
     ToApply &= ~Matching;
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index 36428e0335f9..43fa8f2d7157 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -1,9 +1,8 @@
 //===- LiveIntervalUnion.cpp - Live interval union data structure ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/LiveIntervals.cpp b/lib/CodeGen/LiveIntervals.cpp
index 471775f8706b..aa85569063b3 100644
--- a/lib/CodeGen/LiveIntervals.cpp
+++ b/lib/CodeGen/LiveIntervals.cpp
@@ -1,9 +1,8 @@
 //===- LiveIntervals.cpp - Live Interval Analysis -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -901,8 +900,7 @@ bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
 
   // We are going to enumerate all the register mask slots contained in LI.
   // Start with a binary search of RegMaskSlots to find a starting point.
-  ArrayRef<SlotIndex>::iterator SlotI =
-    std::lower_bound(Slots.begin(), Slots.end(), LiveI->start);
+  ArrayRef<SlotIndex>::iterator SlotI = llvm::lower_bound(Slots, LiveI->start);
   ArrayRef<SlotIndex>::iterator SlotE = Slots.end();
 
   // No slots in range, LI begins after the last call.
@@ -1371,8 +1369,7 @@ private:
 
   void updateRegMaskSlots() {
     SmallVectorImpl<SlotIndex>::iterator RI =
-      std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(),
-                       OldIdx);
+        llvm::lower_bound(LIS.RegMaskSlots, OldIdx);
     assert(RI != LIS.RegMaskSlots.end() && *RI == OldIdx.getRegSlot() &&
            "No RegMask at OldIdx.");
     *RI = NewIdx.getRegSlot();
diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp
index 619643acb6d3..cd3d248ac878 100644
--- a/lib/CodeGen/LivePhysRegs.cpp
+++ b/lib/CodeGen/LivePhysRegs.cpp
@@ -1,9 +1,8 @@
 //===--- LivePhysRegs.cpp - Live Physical Register Set --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index 70e135ab1aff..d670f28df6ba 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -1,9 +1,8 @@
 //===- LiveRangeCalc.cpp - Calculate live ranges --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -96,10 +95,11 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
       }
 
       LI.refineSubRanges(*Alloc, SubMask,
-          [&MO, this](LiveInterval::SubRange &SR) {
-        if (MO.isDef())
-          createDeadDef(*Indexes, *Alloc, SR, MO);
-      });
+                         [&MO, this](LiveInterval::SubRange &SR) {
+                           if (MO.isDef())
+                             createDeadDef(*Indexes, *Alloc, SR, MO);
+                         },
+                         *Indexes, TRI);
     }
 
     // Create the def in the main liverange. We do not have to do this if
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index 9f226b154a67..11aea5a3b016 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -1,9 +1,8 @@
 //===- LiveRangeCalc.h - Calculate live ranges ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 8dfe8b68c3af..882e562ba95c 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -1,9 +1,8 @@
 //===-- LiveRangeEdit.cpp - Basic tools for editing a register live range -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -232,6 +231,8 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
     return false;
   LLVM_DEBUG(dbgs() << "                folded: " << *FoldMI);
   LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI);
+  if (UseMI->isCall())
+    UseMI->getMF()->updateCallSiteInfo(UseMI, FoldMI);
   UseMI->eraseFromParent();
   DefMI->addRegisterDead(LI->reg, nullptr);
   Dead.push_back(DefMI);
diff --git a/lib/CodeGen/LiveRangeShrink.cpp b/lib/CodeGen/LiveRangeShrink.cpp
index f75d513c89f5..8818f1ce0ad9 100644
--- a/lib/CodeGen/LiveRangeShrink.cpp
+++ b/lib/CodeGen/LiveRangeShrink.cpp
@@ -1,9 +1,8 @@
 //===- LiveRangeShrink.cpp - Move instructions to shrink live range -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 ///===---------------------------------------------------------------------===//
 ///
diff --git a/lib/CodeGen/LiveRangeUtils.h b/lib/CodeGen/LiveRangeUtils.h
index bd57609c3d84..0e6bfeb0d4a5 100644
--- a/lib/CodeGen/LiveRangeUtils.h
+++ b/lib/CodeGen/LiveRangeUtils.h
@@ -1,9 +1,8 @@
 //===-- LiveRangeUtils.h - Live Range modification utilities ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index e72977b02675..ce99e5535c25 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -1,9 +1,8 @@
 //===- LiveRegMatrix.cpp - Track register interference --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp
index c22681385492..6afb7fb7aa11 100644
--- a/lib/CodeGen/LiveRegUnits.cpp
+++ b/lib/CodeGen/LiveRegUnits.cpp
@@ -1,9 +1,8 @@
 //===- LiveRegUnits.cpp - Register Unit Set -------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -126,13 +125,15 @@ void LiveRegUnits::addPristines(const MachineFunction &MF) {
 
 void LiveRegUnits::addLiveOuts(const MachineBasicBlock &MBB) {
   const MachineFunction &MF = *MBB.getParent();
-  if (!MBB.succ_empty()) {
-    addPristines(MF);
-    // To get the live-outs we simply merge the live-ins of all successors.
-    for (const MachineBasicBlock *Succ : MBB.successors())
-      addBlockLiveIns(*this, *Succ);
-  } else if (MBB.isReturnBlock()) {
-    // For the return block: Add all callee saved registers.
+
+  addPristines(MF);
+
+  // To get the live-outs we simply merge the live-ins of all successors.
+  for (const MachineBasicBlock *Succ : MBB.successors())
+    addBlockLiveIns(*this, *Succ);
+
+  // For the return block: Add all callee saved registers.
+  if (MBB.isReturnBlock()) {
     const MachineFrameInfo &MFI = MF.getFrameInfo();
     if (MFI.isCalleeSavedInfoValid())
       addCalleeSavedRegs(*this, MF);
diff --git a/lib/CodeGen/LiveStacks.cpp b/lib/CodeGen/LiveStacks.cpp
index 80ecfdb7a507..f55977d72723 100644
--- a/lib/CodeGen/LiveStacks.cpp
+++ b/lib/CodeGen/LiveStacks.cpp
@@ -1,9 +1,8 @@
 //===-- LiveStacks.cpp - Live Stack Slot Analysis -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 0b92eab83806..aaff982ef1b0 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -1,9 +1,8 @@
 //===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -401,7 +400,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
                                                 true/*IsImp*/, true/*IsKill*/));
     else {
       MachineOperand *MO =
-        LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI);
+        LastRefOrPartRef->findRegisterDefOperand(Reg, false, false, TRI);
       bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
       // If the last reference is the last def, then it's not used at all.
       // That is, unless we are currently processing the last reference itself.
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 795028e97929..b14d76a585f7 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -1,9 +1,8 @@
 //===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -200,19 +199,27 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // Make sure that the stack protector comes before the local variables on the
   // stack.
   SmallSet<int, 16> ProtectedObjs;
-  if (MFI.getStackProtectorIndex() >= 0) {
+  if (MFI.hasStackProtectorIndex()) {
+    int StackProtectorFI = MFI.getStackProtectorIndex();
+
+    // We need to make sure we didn't pre-allocate the stack protector when
+    // doing this.
+    // If we already have a stack protector, this will re-assign it to a slot
+    // that is **not** covering the protected objects.
+    assert(!MFI.isObjectPreAllocated(StackProtectorFI) &&
+           "Stack protector pre-allocated in LocalStackSlotAllocation");
+
     StackObjSet LargeArrayObjs;
     StackObjSet SmallArrayObjs;
     StackObjSet AddrOfObjs;
 
-    AdjustStackOffset(MFI, MFI.getStackProtectorIndex(), Offset,
-                      StackGrowsDown, MaxAlign);
+    AdjustStackOffset(MFI, StackProtectorFI, Offset, StackGrowsDown, MaxAlign);
 
     // Assign large stack objects first.
     for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
       if (MFI.isDeadObjectIndex(i))
         continue;
-      if (MFI.getStackProtectorIndex() == (int)i)
+      if (StackProtectorFI == (int)i)
         continue;
 
       switch (MFI.getObjectSSPLayout(i)) {
diff --git a/lib/CodeGen/LoopTraversal.cpp b/lib/CodeGen/LoopTraversal.cpp
index a02d10e09d7d..9490dfc40a82 100644
--- a/lib/CodeGen/LoopTraversal.cpp
+++ b/lib/CodeGen/LoopTraversal.cpp
@@ -1,9 +1,8 @@
 //===- LoopTraversal.cpp - Optimal basic block traversal order --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/LowLevelType.cpp b/lib/CodeGen/LowLevelType.cpp
index 1c682e72fa49..ca0daa14fedf 100644
--- a/lib/CodeGen/LowLevelType.cpp
+++ b/lib/CodeGen/LowLevelType.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/LowLevelType.cpp -------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/LowerEmuTLS.cpp b/lib/CodeGen/LowerEmuTLS.cpp
index 36c1d358a9bd..c8cf6abda4fc 100644
--- a/lib/CodeGen/LowerEmuTLS.cpp
+++ b/lib/CodeGen/LowerEmuTLS.cpp
@@ -1,9 +1,8 @@
 //===- LowerEmuTLS.cpp - Add __emutls_[vt].* variables --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/MIRCanonicalizerPass.cpp b/lib/CodeGen/MIRCanonicalizerPass.cpp
index f17c23619ed5..f49bc854e23f 100644
--- a/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -1,9 +1,8 @@
 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -105,6 +104,8 @@ INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
                     "Rename Register Operands Canonically", false, false)
 
 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
+  if (MF.empty())
+    return {};
   ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
   std::vector<MachineBasicBlock *> RPOList;
   for (auto MBB : RPOT) {
@@ -179,6 +180,8 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
   }
 
   std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
+  std::map<unsigned, MachineInstr *> MultiUserLookup;
+  unsigned UseToBringDefCloserToCount = 0;
   std::vector<MachineInstr *> PseudoIdempotentInstructions;
   std::vector<unsigned> PhysRegDefs;
   for (auto *II : Instructions) {
@@ -254,6 +257,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
       if (Delta < Distance) {
         Distance = Delta;
         UseToBringDefCloserTo = UseInst;
+        MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
       }
     }
 
@@ -293,11 +297,11 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
   }
 
   // Sort the defs for users of multiple defs lexographically.
-  for (const auto &E : MultiUsers) {
+  for (const auto &E : MultiUserLookup) {
 
     auto UseI =
         std::find_if(MBB->instr_begin(), MBB->instr_end(),
-                     [&](MachineInstr &MI) -> bool { return &MI == E.first; });
+                     [&](MachineInstr &MI) -> bool { return &MI == E.second; });
 
     if (UseI == MBB->instr_end())
       continue;
@@ -305,7 +309,8 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
     LLVM_DEBUG(
         dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
     Changed |= rescheduleLexographically(
-        E.second, MBB, [&]() -> MachineBasicBlock::iterator { return UseI; });
+        MultiUsers[E.second], MBB,
+        [&]() -> MachineBasicBlock::iterator { return UseI; });
   }
 
   PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
@@ -342,15 +347,23 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) {
       continue;
     if (!TargetRegisterInfo::isVirtualRegister(Src))
       continue;
+    // Not folding COPY instructions if regbankselect has not set the RCs.
+    // Why are we only considering Register Classes? Because the verifier
+    // sometimes gets upset if the register classes don't match even if the
+    // types do. A future patch might add COPY folding for matching types in
+    // pre-registerbankselect code.
+    if (!MRI.getRegClassOrNull(Dst))
+      continue;
     if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
       continue;
 
-    for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) {
-      MachineOperand *MO = &*UI;
+    std::vector<MachineOperand *> Uses;
+    for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI)
+      Uses.push_back(&*UI);
+    for (auto *MO : Uses)
       MO->setReg(Src);
-      Changed = true;
-    }
 
+    Changed = true;
     MI->eraseFromParent();
   }
 
@@ -474,18 +487,14 @@ class NamedVRegCursor {
   unsigned virtualVRegNumber;
 
 public:
-  NamedVRegCursor(MachineRegisterInfo &MRI) : MRI(MRI) {
-    unsigned VRegGapIndex = 0;
-    const unsigned VR_GAP = (++VRegGapIndex * 1000);
-
-    unsigned I = MRI.createIncompleteVirtualRegister();
-    const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP;
-
-    virtualVRegNumber = E;
-  }
+  NamedVRegCursor(MachineRegisterInfo &MRI) : MRI(MRI), virtualVRegNumber(0) {}
 
   void SkipVRegs() {
     unsigned VRegGapIndex = 1;
+    if (!virtualVRegNumber) {
+      VRegGapIndex = 0;
+      virtualVRegNumber = MRI.createIncompleteVirtualRegister();
+    }
     const unsigned VR_GAP = (++VRegGapIndex * 1000);
 
     unsigned I = virtualVRegNumber;
@@ -501,14 +510,17 @@ public:
     return virtualVRegNumber;
   }
 
-  unsigned createVirtualRegister(const TargetRegisterClass *RC) {
+  unsigned createVirtualRegister(unsigned VReg) {
+    if (!virtualVRegNumber)
+      SkipVRegs();
     std::string S;
     raw_string_ostream OS(S);
     OS << "namedVReg" << (virtualVRegNumber & ~0x80000000);
     OS.flush();
     virtualVRegNumber++;
-
-    return MRI.createVirtualRegister(RC, OS.str());
+    if (auto RC = MRI.getRegClassOrNull(VReg))
+      return MRI.createVirtualRegister(RC, OS.str());
+    return MRI.createGenericVirtualRegister(MRI.getType(VReg), OS.str());
   }
 };
 } // namespace
@@ -558,7 +570,7 @@ GetVRegRenameMap(const std::vector<TypedVReg> &VRegs,
       continue;
     }
 
-    auto Rename = NVC.createVirtualRegister(MRI.getRegClass(Reg));
+    auto Rename = NVC.createVirtualRegister(Reg);
 
     if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) {
       LLVM_DEBUG(dbgs() << "Mapping vreg ";);
@@ -735,14 +747,15 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
   // of the MachineBasicBlock so that they are named in the order that we sorted
   // them alphabetically. Eventually we wont need SkipVRegs because we will use
   // named vregs instead.
-  NVC.SkipVRegs();
+  if (IdempotentInstCount)
+    NVC.SkipVRegs();
 
   auto MII = MBB->begin();
   for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) {
     MachineInstr &MI = *MII++;
     Changed = true;
     unsigned vRegToRename = MI.getOperand(0).getReg();
-    auto Rename = NVC.createVirtualRegister(MRI.getRegClass(vRegToRename));
+    auto Rename = NVC.createVirtualRegister(vRegToRename);
 
     std::vector<MachineOperand *> RenameMOs;
     for (auto &MO : MRI.reg_operands(vRegToRename)) {
diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp
index 265877c2f5b4..4899bd3f5811 100644
--- a/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/lib/CodeGen/MIRParser/MILexer.cpp
@@ -1,9 +1,8 @@
 //===- MILexer.cpp - Machine instructions lexer implementation ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -205,6 +204,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
       .Case("nuw" , MIToken::kw_nuw)
       .Case("nsw" , MIToken::kw_nsw)
       .Case("exact" , MIToken::kw_exact)
+      .Case("fpexcept", MIToken::kw_fpexcept)
       .Case("debug-location", MIToken::kw_debug_location)
       .Case("same_value", MIToken::kw_cfi_same_value)
       .Case("offset", MIToken::kw_cfi_offset)
diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h
index ceff79087d81..0fe3f9f706db 100644
--- a/lib/CodeGen/MIRParser/MILexer.h
+++ b/lib/CodeGen/MIRParser/MILexer.h
@@ -1,9 +1,8 @@
 //===- MILexer.h - Lexer for machine instructions ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -74,6 +73,7 @@ struct MIToken {
     kw_nuw,
     kw_nsw,
     kw_exact,
+    kw_fpexcept,
     kw_debug_location,
     kw_cfi_same_value,
     kw_cfi_offset,
diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp
index 6f2d8bb53ac8..c0b800a0b870 100644
--- a/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/lib/CodeGen/MIRParser/MIParser.cpp
@@ -1,9 +1,8 @@
 //===- MIParser.cpp - Machine instructions parser implementation ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -11,7 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MIParser.h"
+#include "llvm/CodeGen/MIRParser/MIParser.h"
 #include "MILexer.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/APSInt.h"
@@ -27,6 +26,8 @@
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/AsmParser/SlotMapping.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
 #include "llvm/CodeGen/MIRPrinter.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -81,12 +82,242 @@
 
 using namespace llvm;
 
+void PerTargetMIParsingState::setTarget(
+  const TargetSubtargetInfo &NewSubtarget) {
+
+  // If the subtarget changed, over conservatively assume everything is invalid.
+  if (&Subtarget == &NewSubtarget)
+    return;
+
+  Names2InstrOpCodes.clear();
+  Names2Regs.clear();
+  Names2RegMasks.clear();
+  Names2SubRegIndices.clear();
+  Names2TargetIndices.clear();
+  Names2DirectTargetFlags.clear();
+  Names2BitmaskTargetFlags.clear();
+  Names2MMOTargetFlags.clear();
+
+  initNames2RegClasses();
+  initNames2RegBanks();
+}
+
+void PerTargetMIParsingState::initNames2Regs() {
+  if (!Names2Regs.empty())
+    return;
+
+  // The '%noreg' register is the register 0.
+  Names2Regs.insert(std::make_pair("noreg", 0));
+  const auto *TRI = Subtarget.getRegisterInfo();
+  assert(TRI && "Expected target register info");
+
+  for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) {
+    bool WasInserted =
+        Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I))
+            .second;
+    (void)WasInserted;
+    assert(WasInserted && "Expected registers to be unique case-insensitively");
+  }
+}
+
+bool PerTargetMIParsingState::getRegisterByName(StringRef RegName,
+                                                unsigned &Reg) {
+  initNames2Regs();
+  auto RegInfo = Names2Regs.find(RegName);
+  if (RegInfo == Names2Regs.end())
+    return true;
+  Reg = RegInfo->getValue();
+  return false;
+}
+
+void PerTargetMIParsingState::initNames2InstrOpCodes() {
+  if (!Names2InstrOpCodes.empty())
+    return;
+  const auto *TII = Subtarget.getInstrInfo();
+  assert(TII && "Expected target instruction info");
+  for (unsigned I = 0, E = TII->getNumOpcodes(); I < E; ++I)
+    Names2InstrOpCodes.insert(std::make_pair(StringRef(TII->getName(I)), I));
+}
+
+bool PerTargetMIParsingState::parseInstrName(StringRef InstrName,
+                                             unsigned &OpCode) {
+  initNames2InstrOpCodes();
+  auto InstrInfo = Names2InstrOpCodes.find(InstrName);
+  if (InstrInfo == Names2InstrOpCodes.end())
+    return true;
+  OpCode = InstrInfo->getValue();
+  return false;
+}
+
+void PerTargetMIParsingState::initNames2RegMasks() {
+  if (!Names2RegMasks.empty())
+    return;
+  const auto *TRI = Subtarget.getRegisterInfo();
+  assert(TRI && "Expected target register info");
+  ArrayRef<const uint32_t *> RegMasks = TRI->getRegMasks();
+  ArrayRef<const char *> RegMaskNames = TRI->getRegMaskNames();
+  assert(RegMasks.size() == RegMaskNames.size());
+  for (size_t I = 0, E = RegMasks.size(); I < E; ++I)
+    Names2RegMasks.insert(
+        std::make_pair(StringRef(RegMaskNames[I]).lower(), RegMasks[I]));
+}
+
+const uint32_t *PerTargetMIParsingState::getRegMask(StringRef Identifier) {
+  initNames2RegMasks();
+  auto RegMaskInfo = Names2RegMasks.find(Identifier);
+  if (RegMaskInfo == Names2RegMasks.end())
+    return nullptr;
+  return RegMaskInfo->getValue();
+}
+
+void PerTargetMIParsingState::initNames2SubRegIndices() {
+  if (!Names2SubRegIndices.empty())
+    return;
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+  for (unsigned I = 1, E = TRI->getNumSubRegIndices(); I < E; ++I)
+    Names2SubRegIndices.insert(
+        std::make_pair(TRI->getSubRegIndexName(I), I));
+}
+
+unsigned PerTargetMIParsingState::getSubRegIndex(StringRef Name) {
+  initNames2SubRegIndices();
+  auto SubRegInfo = Names2SubRegIndices.find(Name);
+  if (SubRegInfo == Names2SubRegIndices.end())
+    return 0;
+  return SubRegInfo->getValue();
+}
+
+void PerTargetMIParsingState::initNames2TargetIndices() {
+  if (!Names2TargetIndices.empty())
+    return;
+  const auto *TII = Subtarget.getInstrInfo();
+  assert(TII && "Expected target instruction info");
+  auto Indices = TII->getSerializableTargetIndices();
+  for (const auto &I : Indices)
+    Names2TargetIndices.insert(std::make_pair(StringRef(I.second), I.first));
+}
+
+bool PerTargetMIParsingState::getTargetIndex(StringRef Name, int &Index) {
+  initNames2TargetIndices();
+  auto IndexInfo = Names2TargetIndices.find(Name);
+  if (IndexInfo == Names2TargetIndices.end())
+    return true;
+  Index = IndexInfo->second;
+  return false;
+}
+
+void PerTargetMIParsingState::initNames2DirectTargetFlags() {
+  if (!Names2DirectTargetFlags.empty())
+    return;
+
+  const auto *TII = Subtarget.getInstrInfo();
+  assert(TII && "Expected target instruction info");
+  auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
+  for (const auto &I : Flags)
+    Names2DirectTargetFlags.insert(
+        std::make_pair(StringRef(I.second), I.first));
+}
+
+bool PerTargetMIParsingState::getDirectTargetFlag(StringRef Name,
+                                                  unsigned &Flag) {
+  initNames2DirectTargetFlags();
+  auto FlagInfo = Names2DirectTargetFlags.find(Name);
+  if (FlagInfo == Names2DirectTargetFlags.end())
+    return true;
+  Flag = FlagInfo->second;
+  return false;
+}
+
+void PerTargetMIParsingState::initNames2BitmaskTargetFlags() {
+  if (!Names2BitmaskTargetFlags.empty())
+    return;
+
+  const auto *TII = Subtarget.getInstrInfo();
+  assert(TII && "Expected target instruction info");
+  auto Flags = TII->getSerializableBitmaskMachineOperandTargetFlags();
+  for (const auto &I : Flags)
+    Names2BitmaskTargetFlags.insert(
+        std::make_pair(StringRef(I.second), I.first));
+}
+
+bool PerTargetMIParsingState::getBitmaskTargetFlag(StringRef Name,
+                                                   unsigned &Flag) {
+  initNames2BitmaskTargetFlags();
+  auto FlagInfo = Names2BitmaskTargetFlags.find(Name);
+  if (FlagInfo == Names2BitmaskTargetFlags.end())
+    return true;
+  Flag = FlagInfo->second;
+  return false;
+}
+
+void PerTargetMIParsingState::initNames2MMOTargetFlags() {
+  if (!Names2MMOTargetFlags.empty())
+    return;
+
+  const auto *TII = Subtarget.getInstrInfo();
+  assert(TII && "Expected target instruction info");
+  auto Flags = TII->getSerializableMachineMemOperandTargetFlags();
+  for (const auto &I : Flags)
+    Names2MMOTargetFlags.insert(std::make_pair(StringRef(I.second), I.first));
+}
+
+bool PerTargetMIParsingState::getMMOTargetFlag(StringRef Name,
+                                               MachineMemOperand::Flags &Flag) {
+  initNames2MMOTargetFlags();
+  auto FlagInfo = Names2MMOTargetFlags.find(Name);
+  if (FlagInfo == Names2MMOTargetFlags.end())
+    return true;
+  Flag = FlagInfo->second;
+  return false;
+}
+
+void PerTargetMIParsingState::initNames2RegClasses() {
+  if (!Names2RegClasses.empty())
+    return;
+
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+  for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; ++I) {
+    const auto *RC = TRI->getRegClass(I);
+    Names2RegClasses.insert(
+        std::make_pair(StringRef(TRI->getRegClassName(RC)).lower(), RC));
+  }
+}
+
+void PerTargetMIParsingState::initNames2RegBanks() {
+  if (!Names2RegBanks.empty())
+    return;
+
+  const RegisterBankInfo *RBI = Subtarget.getRegBankInfo();
+  // If the target does not support GlobalISel, we may not have a
+  // register bank info.
+  if (!RBI)
+    return;
+
+  for (unsigned I = 0, E = RBI->getNumRegBanks(); I < E; ++I) {
+    const auto &RegBank = RBI->getRegBank(I);
+    Names2RegBanks.insert(
+        std::make_pair(StringRef(RegBank.getName()).lower(), &RegBank));
+  }
+}
+
+const TargetRegisterClass *
+PerTargetMIParsingState::getRegClass(StringRef Name) {
+  auto RegClassInfo = Names2RegClasses.find(Name);
+  if (RegClassInfo == Names2RegClasses.end())
+    return nullptr;
+  return RegClassInfo->getValue();
+}
+
+const RegisterBank *PerTargetMIParsingState::getRegBank(StringRef Name) {
+  auto RegBankInfo = Names2RegBanks.find(Name);
+  if (RegBankInfo == Names2RegBanks.end())
+    return nullptr;
+  return RegBankInfo->getValue();
+}
+
 PerFunctionMIParsingState::PerFunctionMIParsingState(MachineFunction &MF,
-    SourceMgr &SM, const SlotMapping &IRSlots,
-    const Name2RegClassMap &Names2RegClasses,
-    const Name2RegBankMap &Names2RegBanks)
-  : MF(MF), SM(&SM), IRSlots(IRSlots), Names2RegClasses(Names2RegClasses),
-    Names2RegBanks(Names2RegBanks) {
+    SourceMgr &SM, const SlotMapping &IRSlots, PerTargetMIParsingState &T)
+  : MF(MF), SM(&SM), IRSlots(IRSlots), Target(T) {
 }
 
 VRegInfo &PerFunctionMIParsingState::getVRegInfo(unsigned Num) {
@@ -137,26 +368,10 @@ class MIParser {
   StringRef Source, CurrentSource;
   MIToken Token;
   PerFunctionMIParsingState &PFS;
-  /// Maps from instruction names to op codes.
-  StringMap<unsigned> Names2InstrOpCodes;
-  /// Maps from register names to registers.
-  StringMap<unsigned> Names2Regs;
-  /// Maps from register mask names to register masks.
-  StringMap<const uint32_t *> Names2RegMasks;
-  /// Maps from subregister names to subregister indices.
-  StringMap<unsigned> Names2SubRegIndices;
   /// Maps from slot numbers to function's unnamed basic blocks.
   DenseMap<unsigned, const BasicBlock *> Slots2BasicBlocks;
   /// Maps from slot numbers to function's unnamed values.
   DenseMap<unsigned, const Value *> Slots2Values;
-  /// Maps from target index names to target indices.
-  StringMap<int> Names2TargetIndices;
-  /// Maps from direct target flag names to the direct target flag values.
-  StringMap<unsigned> Names2DirectTargetFlags;
-  /// Maps from direct target flag names to the bitmask target flag values.
-  StringMap<unsigned> Names2BitmaskTargetFlags;
-  /// Maps from MMO target flag names to MMO target flag values.
-  StringMap<MachineMemOperand::Flags> Names2MMOTargetFlags;
 
 public:
   MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
@@ -281,12 +496,6 @@ private:
   /// Otherwise return false.
   bool consumeIfPresent(MIToken::TokenKind TokenKind);
 
-  void initNames2InstrOpCodes();
-
-  /// Try to convert an instruction name to an opcode. Return true if the
-  /// instruction name is invalid.
-  bool parseInstrName(StringRef InstrName, unsigned &OpCode);
-
   bool parseInstruction(unsigned &OpCode, unsigned &Flags);
 
   bool assignRegisterTies(MachineInstr &MI,
@@ -295,62 +504,11 @@ private:
   bool verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands,
                               const MCInstrDesc &MCID);
 
-  void initNames2Regs();
-
-  /// Try to convert a register name to a register number. Return true if the
-  /// register name is invalid.
-  bool getRegisterByName(StringRef RegName, unsigned &Reg);
-
-  void initNames2RegMasks();
-
-  /// Check if the given identifier is a name of a register mask.
-  ///
-  /// Return null if the identifier isn't a register mask.
-  const uint32_t *getRegMask(StringRef Identifier);
-
-  void initNames2SubRegIndices();
-
-  /// Check if the given identifier is a name of a subregister index.
-  ///
-  /// Return 0 if the name isn't a subregister index class.
-  unsigned getSubRegIndex(StringRef Name);
-
   const BasicBlock *getIRBlock(unsigned Slot);
   const BasicBlock *getIRBlock(unsigned Slot, const Function &F);
 
   const Value *getIRValue(unsigned Slot);
 
-  void initNames2TargetIndices();
-
-  /// Try to convert a name of target index to the corresponding target index.
-  ///
-  /// Return true if the name isn't a name of a target index.
-  bool getTargetIndex(StringRef Name, int &Index);
-
-  void initNames2DirectTargetFlags();
-
-  /// Try to convert a name of a direct target flag to the corresponding
-  /// target flag.
-  ///
-  /// Return true if the name isn't a name of a direct flag.
-  bool getDirectTargetFlag(StringRef Name, unsigned &Flag);
-
-  void initNames2BitmaskTargetFlags();
-
-  /// Try to convert a name of a bitmask target flag to the corresponding
-  /// target flag.
-  ///
-  /// Return true if the name isn't a name of a bitmask target flag.
-  bool getBitmaskTargetFlag(StringRef Name, unsigned &Flag);
-
-  void initNames2MMOTargetFlags();
-
-  /// Try to convert a name of a MachineMemOperand target flag to the
-  /// corresponding target flag.
-  ///
-  /// Return true if the name isn't a name of a target MMO flag.
-  bool getMMOTargetFlag(StringRef Name, MachineMemOperand::Flags &Flag);
-
   /// Get or create an MCSymbol for a given name.
   MCSymbol *getOrCreateMCSymbol(StringRef Name);
 
@@ -978,7 +1136,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
          Token.is(MIToken::kw_reassoc) ||
          Token.is(MIToken::kw_nuw) ||
          Token.is(MIToken::kw_nsw) ||
-         Token.is(MIToken::kw_exact)) {
+         Token.is(MIToken::kw_exact) ||
+         Token.is(MIToken::kw_fpexcept)) {
     // Mine frame and fast math flags
     if (Token.is(MIToken::kw_frame_setup))
       Flags |= MachineInstr::FrameSetup;
@@ -1004,13 +1163,15 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
       Flags |= MachineInstr::NoSWrap;
     if (Token.is(MIToken::kw_exact))
       Flags |= MachineInstr::IsExact;
+    if (Token.is(MIToken::kw_fpexcept))
+      Flags |= MachineInstr::FPExcept;
 
     lex();
   }
   if (Token.isNot(MIToken::Identifier))
     return error("expected a machine instruction");
   StringRef InstrName = Token.stringValue();
-  if (parseInstrName(InstrName, OpCode))
+  if (PFS.Target.parseInstrName(InstrName, OpCode))
     return error(Twine("unknown machine instruction name '") + InstrName + "'");
   lex();
   return false;
@@ -1019,7 +1180,7 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
 bool MIParser::parseNamedRegister(unsigned &Reg) {
   assert(Token.is(MIToken::NamedRegister) && "Needs NamedRegister token");
   StringRef Name = Token.stringValue();
-  if (getRegisterByName(Name, Reg))
+  if (PFS.Target.getRegisterByName(Name, Reg))
     return error(Twine("unknown register name '") + Name + "'");
   return false;
 }
@@ -1070,21 +1231,20 @@ bool MIParser::parseRegisterClassOrBank(VRegInfo &RegInfo) {
   StringRef Name = Token.stringValue();
 
   // Was it a register class?
-  auto RCNameI = PFS.Names2RegClasses.find(Name);
-  if (RCNameI != PFS.Names2RegClasses.end()) {
+  const TargetRegisterClass *RC = PFS.Target.getRegClass(Name);
+  if (RC) {
     lex();
-    const TargetRegisterClass &RC = *RCNameI->getValue();
 
     switch (RegInfo.Kind) {
     case VRegInfo::UNKNOWN:
     case VRegInfo::NORMAL:
       RegInfo.Kind = VRegInfo::NORMAL;
-      if (RegInfo.Explicit && RegInfo.D.RC != &RC) {
+      if (RegInfo.Explicit && RegInfo.D.RC != RC) {
         const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
         return error(Loc, Twine("conflicting register classes, previously: ") +
                      Twine(TRI.getRegClassName(RegInfo.D.RC)));
       }
-      RegInfo.D.RC = &RC;
+      RegInfo.D.RC = RC;
       RegInfo.Explicit = true;
       return false;
 
@@ -1098,10 +1258,9 @@ bool MIParser::parseRegisterClassOrBank(VRegInfo &RegInfo) {
   // Should be a register bank or a generic register.
   const RegisterBank *RegBank = nullptr;
   if (Name != "_") {
-    auto RBNameI = PFS.Names2RegBanks.find(Name);
-    if (RBNameI == PFS.Names2RegBanks.end())
+    RegBank = PFS.Target.getRegBank(Name);
+    if (!RegBank)
       return error(Loc, "expected '_', register class, or register bank name");
-    RegBank = RBNameI->getValue();
   }
 
   lex();
@@ -1173,7 +1332,7 @@ bool MIParser::parseSubRegisterIndex(unsigned &SubReg) {
   if (Token.isNot(MIToken::Identifier))
     return error("expected a subregister index after '.'");
   auto Name = Token.stringValue();
-  SubReg = getSubRegIndex(Name);
+  SubReg = PFS.Target.getSubRegIndex(Name);
   if (!SubReg)
     return error(Twine("use of unknown subregister index '") + Name + "'");
   lex();
@@ -1341,6 +1500,19 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) {
   return false;
 }
 
+// See LLT implemntation for bit size limits.
+static bool verifyScalarSize(uint64_t Size) {
+  return Size != 0 && isUInt<16>(Size);
+}
+
+static bool verifyVectorElementCount(uint64_t NumElts) {
+  return NumElts != 0 && isUInt<16>(NumElts);
+}
+
+static bool verifyAddrSpace(uint64_t AddrSpace) {
+  return isUInt<24>(AddrSpace);
+}
+
 bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
   if (Token.range().front() == 's' || Token.range().front() == 'p') {
     StringRef SizeStr = Token.range().drop_front();
@@ -1349,12 +1521,19 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
   }
 
   if (Token.range().front() == 's') {
-    Ty = LLT::scalar(APSInt(Token.range().drop_front()).getZExtValue());
+    auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue();
+    if (!verifyScalarSize(ScalarSize))
+      return error("invalid size for scalar type");
+
+    Ty = LLT::scalar(ScalarSize);
     lex();
     return false;
   } else if (Token.range().front() == 'p') {
     const DataLayout &DL = MF.getDataLayout();
-    unsigned AS = APSInt(Token.range().drop_front()).getZExtValue();
+    uint64_t AS = APSInt(Token.range().drop_front()).getZExtValue();
+    if (!verifyAddrSpace(AS))
+      return error("invalid address space number");
+
     Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
     lex();
     return false;
@@ -1369,6 +1548,9 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
   if (Token.isNot(MIToken::IntegerLiteral))
     return error(Loc, "expected <M x sN> or <M x pA> for vector type");
   uint64_t NumElements = Token.integerValue().getZExtValue();
+  if (!verifyVectorElementCount(NumElements))
+    return error("invalid number of vector elements");
+
   lex();
 
   if (Token.isNot(MIToken::Identifier) || Token.stringValue() != "x")
@@ -1381,11 +1563,17 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
   if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit))
     return error("expected integers after 's'/'p' type character");
 
-  if (Token.range().front() == 's')
-    Ty = LLT::scalar(APSInt(Token.range().drop_front()).getZExtValue());
-  else if (Token.range().front() == 'p') {
+  if (Token.range().front() == 's') {
+    auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue();
+    if (!verifyScalarSize(ScalarSize))
+      return error("invalid size for scalar type");
+    Ty = LLT::scalar(ScalarSize);
+  } else if (Token.range().front() == 'p') {
     const DataLayout &DL = MF.getDataLayout();
-    unsigned AS = APSInt(Token.range().drop_front()).getZExtValue();
+    uint64_t AS = APSInt(Token.range().drop_front()).getZExtValue();
+    if (!verifyAddrSpace(AS))
+      return error("invalid address space number");
+
     Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
   } else
     return error(Loc, "expected <M x sN> or <M x pA> for vector type");
@@ -1625,7 +1813,7 @@ bool MIParser::parseMCSymbolOperand(MachineOperand &Dest) {
 bool MIParser::parseSubRegisterIndexOperand(MachineOperand &Dest) {
   assert(Token.is(MIToken::SubRegisterIndex));
   StringRef Name = Token.stringValue();
-  unsigned SubRegIndex = getSubRegIndex(Token.stringValue());
+  unsigned SubRegIndex = PFS.Target.getSubRegIndex(Token.stringValue());
   if (SubRegIndex == 0)
     return error(Twine("unknown subregister index '") + Name + "'");
   lex();
@@ -1669,6 +1857,11 @@ bool MIParser::parseDIExpression(MDNode *&Expr) {
           Elements.push_back(Op);
           continue;
         }
+        if (unsigned Enc = dwarf::getAttributeEncoding(Token.stringValue())) {
+          lex();
+          Elements.push_back(Enc);
+          continue;
+        }
         return error(Twine("invalid DWARF op '") + Token.stringValue() + "'");
       }
 
@@ -2100,7 +2293,7 @@ bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) {
   if (Token.isNot(MIToken::Identifier))
     return error("expected the name of the target index");
   int Index = 0;
-  if (getTargetIndex(Token.stringValue(), Index))
+  if (PFS.Target.getTargetIndex(Token.stringValue(), Index))
     return error("use of undefined target index '" + Token.stringValue() + "'");
   lex();
   if (expectAndConsume(MIToken::rparen))
@@ -2242,7 +2435,7 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
   case MIToken::Error:
     return true;
   case MIToken::Identifier:
-    if (const auto *RegMask = getRegMask(Token.stringValue())) {
+    if (const auto *RegMask = PFS.Target.getRegMask(Token.stringValue())) {
       Dest = MachineOperand::CreateRegMask(RegMask);
       lex();
       break;
@@ -2268,8 +2461,8 @@ bool MIParser::parseMachineOperandAndTargetFlags(
       return true;
     if (Token.isNot(MIToken::Identifier))
       return error("expected the name of the target flag");
-    if (getDirectTargetFlag(Token.stringValue(), TF)) {
-      if (getBitmaskTargetFlag(Token.stringValue(), TF))
+    if (PFS.Target.getDirectTargetFlag(Token.stringValue(), TF)) {
+      if (PFS.Target.getBitmaskTargetFlag(Token.stringValue(), TF))
         return error("use of undefined target flag '" + Token.stringValue() +
                      "'");
     }
@@ -2279,7 +2472,7 @@ bool MIParser::parseMachineOperandAndTargetFlags(
       if (Token.isNot(MIToken::Identifier))
         return error("expected the name of the target flag");
       unsigned BitFlag = 0;
-      if (getBitmaskTargetFlag(Token.stringValue(), BitFlag))
+      if (PFS.Target.getBitmaskTargetFlag(Token.stringValue(), BitFlag))
         return error("use of undefined target flag '" + Token.stringValue() +
                      "'");
       // TODO: Report an error when using a duplicate bit target flag.
@@ -2325,6 +2518,10 @@ bool MIParser::parseAlignment(unsigned &Alignment) {
   if (getUnsigned(Alignment))
     return true;
   lex();
+
+  if (!isPowerOf2_32(Alignment))
+    return error("expected a power-of-2 literal after 'align'");
+
   return false;
 }
 
@@ -2436,7 +2633,7 @@ bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) {
     break;
   case MIToken::StringConstant: {
     MachineMemOperand::Flags TF;
-    if (getMMOTargetFlag(Token.stringValue(), TF))
+    if (PFS.Target.getMMOTargetFlag(Token.stringValue(), TF))
       return error("use of undefined target MMO flag '" + Token.stringValue() +
                    "'");
     Flags |= TF;
@@ -2711,87 +2908,6 @@ bool MIParser::parsePreOrPostInstrSymbol(MCSymbol *&Symbol) {
   return false;
 }
 
-void MIParser::initNames2InstrOpCodes() {
-  if (!Names2InstrOpCodes.empty())
-    return;
-  const auto *TII = MF.getSubtarget().getInstrInfo();
-  assert(TII && "Expected target instruction info");
-  for (unsigned I = 0, E = TII->getNumOpcodes(); I < E; ++I)
-    Names2InstrOpCodes.insert(std::make_pair(StringRef(TII->getName(I)), I));
-}
-
-bool MIParser::parseInstrName(StringRef InstrName, unsigned &OpCode) {
-  initNames2InstrOpCodes();
-  auto InstrInfo = Names2InstrOpCodes.find(InstrName);
-  if (InstrInfo == Names2InstrOpCodes.end())
-    return true;
-  OpCode = InstrInfo->getValue();
-  return false;
-}
-
-void MIParser::initNames2Regs() {
-  if (!Names2Regs.empty())
-    return;
-  // The '%noreg' register is the register 0.
-  Names2Regs.insert(std::make_pair("noreg", 0));
-  const auto *TRI = MF.getSubtarget().getRegisterInfo();
-  assert(TRI && "Expected target register info");
-  for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) {
-    bool WasInserted =
-        Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I))
-            .second;
-    (void)WasInserted;
-    assert(WasInserted && "Expected registers to be unique case-insensitively");
-  }
-}
-
-bool MIParser::getRegisterByName(StringRef RegName, unsigned &Reg) {
-  initNames2Regs();
-  auto RegInfo = Names2Regs.find(RegName);
-  if (RegInfo == Names2Regs.end())
-    return true;
-  Reg = RegInfo->getValue();
-  return false;
-}
-
-void MIParser::initNames2RegMasks() {
-  if (!Names2RegMasks.empty())
-    return;
-  const auto *TRI = MF.getSubtarget().getRegisterInfo();
-  assert(TRI && "Expected target register info");
-  ArrayRef<const uint32_t *> RegMasks = TRI->getRegMasks();
-  ArrayRef<const char *> RegMaskNames = TRI->getRegMaskNames();
-  assert(RegMasks.size() == RegMaskNames.size());
-  for (size_t I = 0, E = RegMasks.size(); I < E; ++I)
-    Names2RegMasks.insert(
-        std::make_pair(StringRef(RegMaskNames[I]).lower(), RegMasks[I]));
-}
-
-const uint32_t *MIParser::getRegMask(StringRef Identifier) {
-  initNames2RegMasks();
-  auto RegMaskInfo = Names2RegMasks.find(Identifier);
-  if (RegMaskInfo == Names2RegMasks.end())
-    return nullptr;
-  return RegMaskInfo->getValue();
-}
-
-void MIParser::initNames2SubRegIndices() {
-  if (!Names2SubRegIndices.empty())
-    return;
-  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  for (unsigned I = 1, E = TRI->getNumSubRegIndices(); I < E; ++I)
-    Names2SubRegIndices.insert(
-        std::make_pair(StringRef(TRI->getSubRegIndexName(I)).lower(), I));
-}
-
-unsigned MIParser::getSubRegIndex(StringRef Name) {
-  initNames2SubRegIndices();
-  auto SubRegInfo = Names2SubRegIndices.find(Name);
-  if (SubRegInfo == Names2SubRegIndices.end())
-    return 0;
-  return SubRegInfo->getValue();
-}
-
 static void initSlots2BasicBlocks(
     const Function &F,
     DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) {
@@ -2861,86 +2977,6 @@ const Value *MIParser::getIRValue(unsigned Slot) {
   return ValueInfo->second;
 }
 
-void MIParser::initNames2TargetIndices() {
-  if (!Names2TargetIndices.empty())
-    return;
-  const auto *TII = MF.getSubtarget().getInstrInfo();
-  assert(TII && "Expected target instruction info");
-  auto Indices = TII->getSerializableTargetIndices();
-  for (const auto &I : Indices)
-    Names2TargetIndices.insert(std::make_pair(StringRef(I.second), I.first));
-}
-
-bool MIParser::getTargetIndex(StringRef Name, int &Index) {
-  initNames2TargetIndices();
-  auto IndexInfo = Names2TargetIndices.find(Name);
-  if (IndexInfo == Names2TargetIndices.end())
-    return true;
-  Index = IndexInfo->second;
-  return false;
-}
-
-void MIParser::initNames2DirectTargetFlags() {
-  if (!Names2DirectTargetFlags.empty())
-    return;
-  const auto *TII = MF.getSubtarget().getInstrInfo();
-  assert(TII && "Expected target instruction info");
-  auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
-  for (const auto &I : Flags)
-    Names2DirectTargetFlags.insert(
-        std::make_pair(StringRef(I.second), I.first));
-}
-
-bool MIParser::getDirectTargetFlag(StringRef Name, unsigned &Flag) {
-  initNames2DirectTargetFlags();
-  auto FlagInfo = Names2DirectTargetFlags.find(Name);
-  if (FlagInfo == Names2DirectTargetFlags.end())
-    return true;
-  Flag = FlagInfo->second;
-  return false;
-}
-
-void MIParser::initNames2BitmaskTargetFlags() {
-  if (!Names2BitmaskTargetFlags.empty())
-    return;
-  const auto *TII = MF.getSubtarget().getInstrInfo();
-  assert(TII && "Expected target instruction info");
-  auto Flags = TII->getSerializableBitmaskMachineOperandTargetFlags();
-  for (const auto &I : Flags)
-    Names2BitmaskTargetFlags.insert(
-        std::make_pair(StringRef(I.second), I.first));
-}
-
-bool MIParser::getBitmaskTargetFlag(StringRef Name, unsigned &Flag) {
-  initNames2BitmaskTargetFlags();
-  auto FlagInfo = Names2BitmaskTargetFlags.find(Name);
-  if (FlagInfo == Names2BitmaskTargetFlags.end())
-    return true;
-  Flag = FlagInfo->second;
-  return false;
-}
-
-void MIParser::initNames2MMOTargetFlags() {
-  if (!Names2MMOTargetFlags.empty())
-    return;
-  const auto *TII = MF.getSubtarget().getInstrInfo();
-  assert(TII && "Expected target instruction info");
-  auto Flags = TII->getSerializableMachineMemOperandTargetFlags();
-  for (const auto &I : Flags)
-    Names2MMOTargetFlags.insert(
-        std::make_pair(StringRef(I.second), I.first));
-}
-
-bool MIParser::getMMOTargetFlag(StringRef Name,
-                                MachineMemOperand::Flags &Flag) {
-  initNames2MMOTargetFlags();
-  auto FlagInfo = Names2MMOTargetFlags.find(Name);
-  if (FlagInfo == Names2MMOTargetFlags.end())
-    return true;
-  Flag = FlagInfo->second;
-  return false;
-}
-
 MCSymbol *MIParser::getOrCreateMCSymbol(StringRef Name) {
   // FIXME: Currently we can't recognize temporary or local symbols and call all
   // of the appropriate forms to create them. However, this handles basic cases
diff --git a/lib/CodeGen/MIRParser/MIParser.h b/lib/CodeGen/MIRParser/MIParser.h
deleted file mode 100644
index b06ceb21b740..000000000000
--- a/lib/CodeGen/MIRParser/MIParser.h
+++ /dev/null
@@ -1,125 +0,0 @@
-//===- MIParser.h - Machine Instructions Parser -----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the function that parses the machine instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
-#define LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/Support/Allocator.h"
-
-namespace llvm {
-
-class MachineBasicBlock;
-class MachineFunction;
-class MDNode;
-class RegisterBank;
-struct SlotMapping;
-class SMDiagnostic;
-class SourceMgr;
-class StringRef;
-class TargetRegisterClass;
-
-struct VRegInfo {
-  enum uint8_t {
-    UNKNOWN, NORMAL, GENERIC, REGBANK
-  } Kind = UNKNOWN;
-  bool Explicit = false; ///< VReg was explicitly specified in the .mir file.
-  union {
-    const TargetRegisterClass *RC;
-    const RegisterBank *RegBank;
-  } D;
-  unsigned VReg;
-  unsigned PreferredReg = 0;
-};
-
-using Name2RegClassMap = StringMap<const TargetRegisterClass *>;
-using Name2RegBankMap = StringMap<const RegisterBank *>;
-
-struct PerFunctionMIParsingState {
-  BumpPtrAllocator Allocator;
-  MachineFunction &MF;
-  SourceMgr *SM;
-  const SlotMapping &IRSlots;
-  const Name2RegClassMap &Names2RegClasses;
-  const Name2RegBankMap &Names2RegBanks;
-
-  DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
-  DenseMap<unsigned, VRegInfo*> VRegInfos;
-  StringMap<VRegInfo*> VRegInfosNamed;
-  DenseMap<unsigned, int> FixedStackObjectSlots;
-  DenseMap<unsigned, int> StackObjectSlots;
-  DenseMap<unsigned, unsigned> ConstantPoolSlots;
-  DenseMap<unsigned, unsigned> JumpTableSlots;
-
-  PerFunctionMIParsingState(MachineFunction &MF, SourceMgr &SM,
-                            const SlotMapping &IRSlots,
-                            const Name2RegClassMap &Names2RegClasses,
-                            const Name2RegBankMap &Names2RegBanks);
-
-  VRegInfo &getVRegInfo(unsigned Num);
-  VRegInfo &getVRegInfoNamed(StringRef RegName);
-};
-
-/// Parse the machine basic block definitions, and skip the machine
-/// instructions.
-///
-/// This function runs the first parsing pass on the machine function's body.
-/// It parses only the machine basic block definitions and creates the machine
-/// basic blocks in the given machine function.
-///
-/// The machine instructions aren't parsed during the first pass because all
-/// the machine basic blocks aren't defined yet - this makes it impossible to
-/// resolve the machine basic block references.
-///
-/// Return true if an error occurred.
-bool parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS,
-                                       StringRef Src, SMDiagnostic &Error);
-
-/// Parse the machine instructions.
-///
-/// This function runs the second parsing pass on the machine function's body.
-/// It skips the machine basic block definitions and parses only the machine
-/// instructions and basic block attributes like liveins and successors.
-///
-/// The second parsing pass assumes that the first parsing pass already ran
-/// on the given source string.
-///
-/// Return true if an error occurred.
-bool parseMachineInstructions(PerFunctionMIParsingState &PFS, StringRef Src,
-                              SMDiagnostic &Error);
-
-bool parseMBBReference(PerFunctionMIParsingState &PFS,
-                       MachineBasicBlock *&MBB, StringRef Src,
-                       SMDiagnostic &Error);
-
-bool parseRegisterReference(PerFunctionMIParsingState &PFS,
-                            unsigned &Reg, StringRef Src,
-                            SMDiagnostic &Error);
-
-bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, unsigned &Reg,
-                                 StringRef Src, SMDiagnostic &Error);
-
-bool parseVirtualRegisterReference(PerFunctionMIParsingState &PFS,
-                                   VRegInfo *&Info, StringRef Src,
-                                   SMDiagnostic &Error);
-
-bool parseStackObjectReference(PerFunctionMIParsingState &PFS, int &FI,
-                               StringRef Src, SMDiagnostic &Error);
-
-bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src,
-                 SMDiagnostic &Error);
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp
index 00da92a92ec6..b242934def80 100644
--- a/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -1,9 +1,8 @@
 //===- MIRParser.cpp - MIR serialization format parser implementation -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MIRParser/MIRParser.h"
-#include "MIParser.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
@@ -22,12 +20,14 @@
 #include "llvm/AsmParser/SlotMapping.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MIRParser/MIParser.h"
 #include "llvm/CodeGen/MIRYamlMapping.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
@@ -40,6 +40,7 @@
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/YAMLTraits.h"
+#include "llvm/Target/TargetMachine.h"
 #include <memory>
 
 using namespace llvm;
@@ -54,10 +55,8 @@ class MIRParserImpl {
   StringRef Filename;
   LLVMContext &Context;
   SlotMapping IRSlots;
-  /// Maps from register class names to register classes.
-  Name2RegClassMap Names2RegClasses;
-  /// Maps from register bank names to register banks.
-  Name2RegBankMap Names2RegBanks;
+  std::unique_ptr<PerTargetMIParsingState> Target;
+
   /// True when the MIR file doesn't have LLVM IR. Dummy IR functions are
   /// created and inserted into the given module when this is true.
   bool NoLLVMIR = false;
@@ -117,6 +116,9 @@ public:
   bool initializeFrameInfo(PerFunctionMIParsingState &PFS,
                            const yaml::MachineFunction &YamlMF);
 
+  bool initializeCallSiteInfo(PerFunctionMIParsingState &PFS,
+                              const yaml::MachineFunction &YamlMF);
+
   bool parseCalleeSavedRegister(PerFunctionMIParsingState &PFS,
                                 std::vector<CalleeSavedInfo> &CSIInfo,
                                 const yaml::StringValue &RegisterSource,
@@ -151,20 +153,6 @@ private:
   SMDiagnostic diagFromBlockStringDiag(const SMDiagnostic &Error,
                                        SMRange SourceRange);
 
-  void initNames2RegClasses(const MachineFunction &MF);
-  void initNames2RegBanks(const MachineFunction &MF);
-
-  /// Check if the given identifier is a name of a register class.
-  ///
-  /// Return null if the name isn't a register class.
-  const TargetRegisterClass *getRegClass(const MachineFunction &MF,
-                                         StringRef Name);
-
-  /// Check if the given identifier is a name of a register bank.
-  ///
-  /// Return null if the name isn't a register bank.
-  const RegisterBank *getRegBank(const MachineFunction &MF, StringRef Name);
-
   void computeFunctionProperties(MachineFunction &MF);
 };
 
@@ -271,8 +259,9 @@ bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
 /// Create an empty function with the given name.
 static Function *createDummyFunction(StringRef Name, Module &M) {
   auto &Context = M.getContext();
-  Function *F = cast<Function>(M.getOrInsertFunction(
-      Name, FunctionType::get(Type::getVoidTy(Context), false)));
+  Function *F =
+      Function::Create(FunctionType::get(Type::getVoidTy(Context), false),
+                       Function::ExternalLinkage, Name, M);
   BasicBlock *BB = BasicBlock::Create(Context, "entry", F);
   new UnreachableInst(Context, BB);
   return F;
@@ -282,6 +271,11 @@ bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) {
   // Parse the yaml.
   yaml::MachineFunction YamlMF;
   yaml::EmptyContext Ctx;
+
+  const LLVMTargetMachine &TM = MMI.getTarget();
+  YamlMF.MachineFuncInfo = std::unique_ptr<yaml::MachineFunctionInfo>(
+      TM.createDefaultFuncInfoYAML());
+
   yaml::yamlize(In, YamlMF, false, Ctx);
   if (In.error())
     return true;
@@ -346,12 +340,58 @@ void MIRParserImpl::computeFunctionProperties(MachineFunction &MF) {
     Properties.set(MachineFunctionProperties::Property::NoVRegs);
 }
 
+bool MIRParserImpl::initializeCallSiteInfo(
+    PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF) {
+  MachineFunction &MF = PFS.MF;
+  SMDiagnostic Error;
+  const LLVMTargetMachine &TM = MF.getTarget();
+  for (auto YamlCSInfo : YamlMF.CallSitesInfo) {
+    yaml::CallSiteInfo::MachineInstrLoc MILoc = YamlCSInfo.CallLocation;
+    if (MILoc.BlockNum >= MF.size())
+      return error(Twine(MF.getName()) +
+                   Twine(" call instruction block out of range.") +
+                   " Unable to reference bb:" + Twine(MILoc.BlockNum));
+    auto CallB = std::next(MF.begin(), MILoc.BlockNum);
+    if (MILoc.Offset >= CallB->size())
+      return error(Twine(MF.getName()) +
+                   Twine(" call instruction offset out of range.") +
+                   "Unable to reference instruction at bb: " +
+                   Twine(MILoc.BlockNum) + " at offset:" + Twine(MILoc.Offset));
+    auto CallI = std::next(CallB->begin(), MILoc.Offset);
+    if (!CallI->isCall())
+      return error(Twine(MF.getName()) +
+                   Twine(" call site info should reference call "
+                         "instruction. Instruction at bb:") +
+                   Twine(MILoc.BlockNum) + " at offset:" + Twine(MILoc.Offset) +
+                   " is not a call instruction");
+    MachineFunction::CallSiteInfo CSInfo;
+    for (auto ArgRegPair : YamlCSInfo.ArgForwardingRegs) {
+      unsigned Reg = 0;
+      if (parseNamedRegisterReference(PFS, Reg, ArgRegPair.Reg.Value, Error))
+        return error(Error, ArgRegPair.Reg.SourceRange);
+      CSInfo.emplace_back(Reg, ArgRegPair.ArgNo);
+    }
+
+    if (TM.Options.EnableDebugEntryValues)
+      MF.addCallArgsForwardingRegs(&*CallI, std::move(CSInfo));
+  }
+
+  if (YamlMF.CallSitesInfo.size() && !TM.Options.EnableDebugEntryValues)
+    return error(Twine("Call site info provided but not used"));
+  return false;
+}
+
 bool
 MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
                                          MachineFunction &MF) {
   // TODO: Recreate the machine function.
-  initNames2RegClasses(MF);
-  initNames2RegBanks(MF);
+  if (Target) {
+    // Avoid clearing state if we're using the same subtarget again.
+    Target->setTarget(MF.getSubtarget());
+  } else {
+    Target.reset(new PerTargetMIParsingState(MF.getSubtarget()));
+  }
+
   if (YamlMF.Alignment)
     MF.setAlignment(YamlMF.Alignment);
   MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
@@ -367,8 +407,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
   if (YamlMF.FailedISel)
     MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
 
-  PerFunctionMIParsingState PFS(MF, SM, IRSlots, Names2RegClasses,
-                                Names2RegBanks);
+  PerFunctionMIParsingState PFS(MF, SM, IRSlots, *Target);
   if (parseRegisterInfo(PFS, YamlMF))
     return true;
   if (!YamlMF.Constants.empty()) {
@@ -419,8 +458,32 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
   if (setupRegisterInfo(PFS, YamlMF))
     return true;
 
+  if (YamlMF.MachineFuncInfo) {
+    const LLVMTargetMachine &TM = MF.getTarget();
+    // Note this is called after the initial constructor of the
+    // MachineFunctionInfo based on the MachineFunction, which may depend on the
+    // IR.
+
+    SMRange SrcRange;
+    if (TM.parseMachineFunctionInfo(*YamlMF.MachineFuncInfo, PFS, Error,
+                                    SrcRange)) {
+      return error(Error, SrcRange);
+    }
+  }
+
+  // Set the reserved registers after parsing MachineFuncInfo. The target may
+  // have been recording information used to select the reserved registers
+  // there.
+  // FIXME: This is a temporary workaround until the reserved registers can be
+  // serialized.
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  MRI.freezeReservedRegs(MF);
+
   computeFunctionProperties(MF);
 
+  if (initializeCallSiteInfo(PFS, YamlMF))
+    return false;
+
   MF.getSubtarget().mirFileLoaded(MF);
 
   MF.verify();
@@ -449,12 +512,12 @@ bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS,
       Info.Kind = VRegInfo::GENERIC;
       Info.D.RegBank = nullptr;
     } else {
-      const auto *RC = getRegClass(MF, VReg.Class.Value);
+      const auto *RC = Target->getRegClass(VReg.Class.Value);
       if (RC) {
         Info.Kind = VRegInfo::NORMAL;
         Info.D.RC = RC;
       } else {
-        const RegisterBank *RegBank = getRegBank(MF, VReg.Class.Value);
+        const RegisterBank *RegBank = Target->getRegBank(VReg.Class.Value);
         if (!RegBank)
           return error(
               VReg.Class.SourceRange.Start,
@@ -557,9 +620,6 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,
     }
   }
 
-  // FIXME: This is a temporary workaround until the reserved registers can be
-  // serialized.
-  MRI.freezeReservedRegs(MF);
   return Error;
 }
 
@@ -567,6 +627,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
                                         const yaml::MachineFunction &YamlMF) {
   MachineFunction &MF = PFS.MF;
   MachineFrameInfo &MFI = MF.getFrameInfo();
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
   const Function &F = MF.getFunction();
   const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo;
   MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken);
@@ -608,8 +669,12 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
                                         Object.IsImmutable, Object.IsAliased);
     else
       ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset);
-    MFI.setObjectAlignment(ObjectIdx, Object.Alignment);
+
+    if (!TFI->isSupportedStackID(Object.StackID))
+      return error(Object.ID.SourceRange.Start,
+                   Twine("StackID is not supported by target"));
     MFI.setStackID(ObjectIdx, Object.StackID);
+    MFI.setObjectAlignment(ObjectIdx, Object.Alignment);
     if (!PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID.Value,
                                                          ObjectIdx))
              .second)
@@ -637,14 +702,17 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
                          "' isn't defined in the function '" + F.getName() +
                          "'");
     }
+    if (!TFI->isSupportedStackID(Object.StackID))
+      return error(Object.ID.SourceRange.Start,
+                   Twine("StackID is not supported by target"));
     if (Object.Type == yaml::MachineStackObject::VariableSized)
       ObjectIdx = MFI.CreateVariableSizedObject(Object.Alignment, Alloca);
     else
       ObjectIdx = MFI.CreateStackObject(
           Object.Size, Object.Alignment,
-          Object.Type == yaml::MachineStackObject::SpillSlot, Alloca);
+          Object.Type == yaml::MachineStackObject::SpillSlot, Alloca,
+          Object.StackID);
     MFI.setObjectOffset(ObjectIdx, Object.Offset);
-    MFI.setStackID(ObjectIdx, Object.StackID);
 
     if (!PFS.StackObjectSlots.insert(std::make_pair(Object.ID.Value, ObjectIdx))
              .second)
@@ -844,48 +912,6 @@ SMDiagnostic MIRParserImpl::diagFromBlockStringDiag(const SMDiagnostic &Error,
                       Error.getFixIts());
 }
 
-void MIRParserImpl::initNames2RegClasses(const MachineFunction &MF) {
-  if (!Names2RegClasses.empty())
-    return;
-  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; ++I) {
-    const auto *RC = TRI->getRegClass(I);
-    Names2RegClasses.insert(
-        std::make_pair(StringRef(TRI->getRegClassName(RC)).lower(), RC));
-  }
-}
-
-void MIRParserImpl::initNames2RegBanks(const MachineFunction &MF) {
-  if (!Names2RegBanks.empty())
-    return;
-  const RegisterBankInfo *RBI = MF.getSubtarget().getRegBankInfo();
-  // If the target does not support GlobalISel, we may not have a
-  // register bank info.
-  if (!RBI)
-    return;
-  for (unsigned I = 0, E = RBI->getNumRegBanks(); I < E; ++I) {
-    const auto &RegBank = RBI->getRegBank(I);
-    Names2RegBanks.insert(
-        std::make_pair(StringRef(RegBank.getName()).lower(), &RegBank));
-  }
-}
-
-const TargetRegisterClass *MIRParserImpl::getRegClass(const MachineFunction &MF,
-                                                      StringRef Name) {
-  auto RegClassInfo = Names2RegClasses.find(Name);
-  if (RegClassInfo == Names2RegClasses.end())
-    return nullptr;
-  return RegClassInfo->getValue();
-}
-
-const RegisterBank *MIRParserImpl::getRegBank(const MachineFunction &MF,
-                                              StringRef Name) {
-  auto RegBankInfo = Names2RegBanks.find(Name);
-  if (RegBankInfo == Names2RegBanks.end())
-    return nullptr;
-  return RegBankInfo->getValue();
-}
-
 MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl)
     : Impl(std::move(Impl)) {}
 
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index d9dcc428943f..0a95a0ced0f5 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -1,9 +1,8 @@
 //===- MIRPrinter.cpp - MIR serialization format printer ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -36,6 +35,7 @@
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
@@ -129,6 +129,9 @@ public:
                const MachineJumpTableInfo &JTI);
   void convertStackObjects(yaml::MachineFunction &YMF,
                            const MachineFunction &MF, ModuleSlotTracker &MST);
+  void convertCallSiteObjects(yaml::MachineFunction &YMF,
+                              const MachineFunction &MF,
+                              ModuleSlotTracker &MST);
 
 private:
   void initRegisterMaskIds(const MachineFunction &MF);
@@ -212,10 +215,16 @@ void MIRPrinter::print(const MachineFunction &MF) {
   MST.incorporateFunction(MF.getFunction());
   convert(MST, YamlMF.FrameInfo, MF.getFrameInfo());
   convertStackObjects(YamlMF, MF, MST);
+  convertCallSiteObjects(YamlMF, MF, MST);
   if (const auto *ConstantPool = MF.getConstantPool())
     convert(YamlMF, *ConstantPool);
   if (const auto *JumpTableInfo = MF.getJumpTableInfo())
     convert(MST, YamlMF.JumpTableInfo, *JumpTableInfo);
+
+  const TargetMachine &TM = MF.getTarget();
+  YamlMF.MachineFuncInfo =
+      std::unique_ptr<yaml::MachineFunctionInfo>(TM.convertFuncInfoToYAML(MF));
+
   raw_string_ostream StrOS(YamlMF.Body.Value.Value);
   bool IsNewlineNeeded = false;
   for (const auto &MBB : MF) {
@@ -352,7 +361,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   // Process fixed stack objects.
   unsigned ID = 0;
-  for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
+  for (int I = MFI.getObjectIndexBegin(); I < 0; ++I, ++ID) {
     if (MFI.isDeadObjectIndex(I))
       continue;
 
@@ -364,17 +373,17 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
     YamlObject.Offset = MFI.getObjectOffset(I);
     YamlObject.Size = MFI.getObjectSize(I);
     YamlObject.Alignment = MFI.getObjectAlignment(I);
-    YamlObject.StackID = MFI.getStackID(I);
+    YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I);
     YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I);
     YamlObject.IsAliased = MFI.isAliasedObjectIndex(I);
     YMF.FixedStackObjects.push_back(YamlObject);
     StackObjectOperandMapping.insert(
-        std::make_pair(I, FrameIndexOperand::createFixed(ID++)));
+        std::make_pair(I, FrameIndexOperand::createFixed(ID)));
   }
 
   // Process ordinary stack objects.
   ID = 0;
-  for (int I = 0, E = MFI.getObjectIndexEnd(); I < E; ++I) {
+  for (int I = 0, E = MFI.getObjectIndexEnd(); I < E; ++I, ++ID) {
     if (MFI.isDeadObjectIndex(I))
       continue;
 
@@ -391,14 +400,17 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
     YamlObject.Offset = MFI.getObjectOffset(I);
     YamlObject.Size = MFI.getObjectSize(I);
     YamlObject.Alignment = MFI.getObjectAlignment(I);
-    YamlObject.StackID = MFI.getStackID(I);
+    YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I);
 
     YMF.StackObjects.push_back(YamlObject);
     StackObjectOperandMapping.insert(std::make_pair(
-        I, FrameIndexOperand::create(YamlObject.Name.Value, ID++)));
+        I, FrameIndexOperand::create(YamlObject.Name.Value, ID)));
   }
 
   for (const auto &CSInfo : MFI.getCalleeSavedInfo()) {
+    if (!CSInfo.isSpilledToReg() && MFI.isDeadObjectIndex(CSInfo.getFrameIdx()))
+      continue;
+
     yaml::StringValue Reg;
     printRegMIR(CSInfo.getReg(), Reg, TRI);
     if (!CSInfo.isSpilledToReg()) {
@@ -452,6 +464,39 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
   }
 }
 
+void MIRPrinter::convertCallSiteObjects(yaml::MachineFunction &YMF,
+                                        const MachineFunction &MF,
+                                        ModuleSlotTracker &MST) {
+  const auto *TRI = MF.getSubtarget().getRegisterInfo();
+  for (auto CSInfo : MF.getCallSitesInfo()) {
+    yaml::CallSiteInfo YmlCS;
+    yaml::CallSiteInfo::MachineInstrLoc CallLocation;
+
+    // Prepare instruction position.
+    MachineBasicBlock::const_iterator CallI = CSInfo.first->getIterator();
+    CallLocation.BlockNum = CallI->getParent()->getNumber();
+    // Get call instruction offset from the beginning of block.
+    CallLocation.Offset = std::distance(CallI->getParent()->begin(), CallI);
+    YmlCS.CallLocation = CallLocation;
+    // Construct call arguments and theirs forwarding register info.
+    for (auto ArgReg : CSInfo.second) {
+      yaml::CallSiteInfo::ArgRegPair YmlArgReg;
+      YmlArgReg.ArgNo = ArgReg.ArgNo;
+      printRegMIR(ArgReg.Reg, YmlArgReg.Reg, TRI);
+      YmlCS.ArgForwardingRegs.emplace_back(YmlArgReg);
+    }
+    YMF.CallSitesInfo.push_back(YmlCS);
+  }
+
+  // Sort call info by position of call instructions.
+  llvm::sort(YMF.CallSitesInfo.begin(), YMF.CallSitesInfo.end(),
+             [](yaml::CallSiteInfo A, yaml::CallSiteInfo B) {
+               if (A.CallLocation.BlockNum == B.CallLocation.BlockNum)
+                 return A.CallLocation.Offset < B.CallLocation.Offset;
+               return A.CallLocation.BlockNum < B.CallLocation.BlockNum;
+             });
+}
+
 void MIRPrinter::convert(yaml::MachineFunction &MF,
                          const MachineConstantPool &ConstantPool) {
   unsigned ID = 0;
@@ -706,6 +751,8 @@ void MIPrinter::print(const MachineInstr &MI) {
     OS << "nsw ";
   if (MI.getFlag(MachineInstr::IsExact))
     OS << "exact ";
+  if (MI.getFlag(MachineInstr::FPExcept))
+    OS << "fpexcept ";
 
   OS << TII->getName(MI.getOpcode());
   if (I < E)
diff --git a/lib/CodeGen/MIRPrintingPass.cpp b/lib/CodeGen/MIRPrintingPass.cpp
index 1a8427430ea0..e032fffd658c 100644
--- a/lib/CodeGen/MIRPrintingPass.cpp
+++ b/lib/CodeGen/MIRPrintingPass.cpp
@@ -1,9 +1,8 @@
 //===- MIRPrintingPass.cpp - Pass that prints out using the MIR format ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 03771bc5dae1..4d29e883d879 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -133,8 +132,12 @@ void ilist_traits<MachineInstr>::transferNodesFromList(ilist_traits &FromList,
                                                        instr_iterator First,
                                                        instr_iterator Last) {
   assert(Parent->getParent() == FromList.Parent->getParent() &&
-        "MachineInstr parent mismatch!");
-  assert(this != &FromList && "Called without a real transfer...");
+         "cannot transfer MachineInstrs between MachineFunctions");
+
+  // If it's within the same BB, there's nothing to do.
+  if (this == &FromList)
+    return;
+
   assert(Parent != FromList.Parent && "Two lists have the same parent?");
 
   // If splicing between two blocks within the same function, just update the
@@ -995,7 +998,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
     while (!KilledRegs.empty()) {
       unsigned Reg = KilledRegs.pop_back_val();
       for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
-        if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
+        if (!(--I)->addRegisterKilled(Reg, TRI, /* AddIfNotFound= */ false))
           continue;
         if (TargetRegisterInfo::isVirtualRegister(Reg))
           LV->getVarInfo(Reg).Kills.push_back(&*I);
diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 3459a9f71a73..53a35b7e89c2 100644
--- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -1,9 +1,8 @@
 //===- MachineBlockFrequencyInfo.cpp - MBB Frequency Analysis -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 4fee9c4ea027..639b588766a1 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1,9 +1,8 @@
 //===- MachineBlockPlacement.cpp - Basic Block Code Layout optimization ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -452,15 +451,28 @@ class MachineBlockPlacement : public MachineFunctionPass {
 
   void buildChain(const MachineBasicBlock *BB, BlockChain &Chain,
                   BlockFilterSet *BlockFilter = nullptr);
+  bool canMoveBottomBlockToTop(const MachineBasicBlock *BottomBlock,
+                               const MachineBasicBlock *OldTop);
+  bool hasViableTopFallthrough(const MachineBasicBlock *Top,
+                               const BlockFilterSet &LoopBlockSet);
+  BlockFrequency TopFallThroughFreq(const MachineBasicBlock *Top,
+                                    const BlockFilterSet &LoopBlockSet);
+  BlockFrequency FallThroughGains(const MachineBasicBlock *NewTop,
+                                  const MachineBasicBlock *OldTop,
+                                  const MachineBasicBlock *ExitBB,
+                                  const BlockFilterSet &LoopBlockSet);
+  MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop,
+      const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
   MachineBasicBlock *findBestLoopTop(
       const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
   MachineBasicBlock *findBestLoopExit(
-      const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
+      const MachineLoop &L, const BlockFilterSet &LoopBlockSet,
+      BlockFrequency &ExitFreq);
   BlockFilterSet collectLoopBlockSet(const MachineLoop &L);
   void buildLoopChains(const MachineLoop &L);
   void rotateLoop(
       BlockChain &LoopChain, const MachineBasicBlock *ExitingBB,
-      const BlockFilterSet &LoopBlockSet);
+      BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet);
   void rotateLoopWithProfile(
       BlockChain &LoopChain, const MachineLoop &L,
       const BlockFilterSet &LoopBlockSet);
@@ -938,8 +950,8 @@ MachineBlockPlacement::getBestNonConflictingEdges(
   // Sort for highest frequency.
   auto Cmp = [](WeightedEdge A, WeightedEdge B) { return A.Weight > B.Weight; };
 
-  std::stable_sort(Edges[0].begin(), Edges[0].end(), Cmp);
-  std::stable_sort(Edges[1].begin(), Edges[1].end(), Cmp);
+  llvm::stable_sort(Edges[0], Cmp);
+  llvm::stable_sort(Edges[1], Cmp);
   auto BestA = Edges[0].begin();
   auto BestB = Edges[1].begin();
   // Arrange for the correct answer to be in BestA and BestB
@@ -1527,15 +1539,12 @@ MachineBlockPlacement::selectBestSuccessor(
   // profitable than BestSucc. Position is important because we preserve it and
   // prefer first best match. Here we aren't comparing in order, so we capture
   // the position instead.
-  if (DupCandidates.size() != 0) {
-    auto cmp =
-        [](const std::tuple<BranchProbability, MachineBasicBlock *> &a,
-           const std::tuple<BranchProbability, MachineBasicBlock *> &b) {
-          return std::get<0>(a) > std::get<0>(b);
-        };
-    std::stable_sort(DupCandidates.begin(), DupCandidates.end(), cmp);
-  }
-  for(auto &Tup : DupCandidates) {
+  llvm::stable_sort(DupCandidates,
+                    [](std::tuple<BranchProbability, MachineBasicBlock *> L,
+                       std::tuple<BranchProbability, MachineBasicBlock *> R) {
+                      return std::get<0>(L) > std::get<0>(R);
+                    });
+  for (auto &Tup : DupCandidates) {
     BranchProbability DupProb;
     MachineBasicBlock *Succ;
     std::tie(DupProb, Succ) = Tup;
@@ -1757,63 +1766,238 @@ void MachineBlockPlacement::buildChain(
                     << getBlockName(*Chain.begin()) << "\n");
 }
 
-/// Find the best loop top block for layout.
+// If bottom of block BB has only one successor OldTop, in most cases it is
+// profitable to move it before OldTop, except the following case:
+//
+//     -->OldTop<-
+//     |    .    |
+//     |    .    |
+//     |    .    |
+//     ---Pred   |
+//          |    |
+//         BB-----
+//
+// If BB is moved before OldTop, Pred needs a taken branch to BB, and it can't
+// layout the other successor below it, so it can't reduce taken branch.
+// In this case we keep its original layout.
+bool
+MachineBlockPlacement::canMoveBottomBlockToTop(
+    const MachineBasicBlock *BottomBlock,
+    const MachineBasicBlock *OldTop) {
+  if (BottomBlock->pred_size() != 1)
+    return true;
+  MachineBasicBlock *Pred = *BottomBlock->pred_begin();
+  if (Pred->succ_size() != 2)
+    return true;
+
+  MachineBasicBlock *OtherBB = *Pred->succ_begin();
+  if (OtherBB == BottomBlock)
+    OtherBB = *Pred->succ_rbegin();
+  if (OtherBB == OldTop)
+    return false;
+
+  return true;
+}
+
+// Find out the possible fall through frequence to the top of a loop.
+BlockFrequency
+MachineBlockPlacement::TopFallThroughFreq(
+    const MachineBasicBlock *Top,
+    const BlockFilterSet &LoopBlockSet) {
+  BlockFrequency MaxFreq = 0;
+  for (MachineBasicBlock *Pred : Top->predecessors()) {
+    BlockChain *PredChain = BlockToChain[Pred];
+    if (!LoopBlockSet.count(Pred) &&
+        (!PredChain || Pred == *std::prev(PredChain->end()))) {
+      // Found a Pred block can be placed before Top.
+      // Check if Top is the best successor of Pred.
+      auto TopProb = MBPI->getEdgeProbability(Pred, Top);
+      bool TopOK = true;
+      for (MachineBasicBlock *Succ : Pred->successors()) {
+        auto SuccProb = MBPI->getEdgeProbability(Pred, Succ);
+        BlockChain *SuccChain = BlockToChain[Succ];
+        // Check if Succ can be placed after Pred.
+        // Succ should not be in any chain, or it is the head of some chain.
+        if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
+            (!SuccChain || Succ == *SuccChain->begin())) {
+          TopOK = false;
+          break;
+        }
+      }
+      if (TopOK) {
+        BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) *
+                                  MBPI->getEdgeProbability(Pred, Top);
+        if (EdgeFreq > MaxFreq)
+          MaxFreq = EdgeFreq;
+      }
+    }
+  }
+  return MaxFreq;
+}
+
+// Compute the fall through gains when move NewTop before OldTop.
+//
+// In following diagram, edges marked as "-" are reduced fallthrough, edges
+// marked as "+" are increased fallthrough, this function computes
+//
+//      SUM(increased fallthrough) - SUM(decreased fallthrough)
+//
+//              |
+//              | -
+//              V
+//        --->OldTop
+//        |     .
+//        |     .
+//       +|     .    +
+//        |   Pred --->
+//        |     |-
+//        |     V
+//        --- NewTop <---
+//              |-
+//              V
+//
+BlockFrequency
+MachineBlockPlacement::FallThroughGains(
+    const MachineBasicBlock *NewTop,
+    const MachineBasicBlock *OldTop,
+    const MachineBasicBlock *ExitBB,
+    const BlockFilterSet &LoopBlockSet) {
+  BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
+  BlockFrequency FallThrough2Exit = 0;
+  if (ExitBB)
+    FallThrough2Exit = MBFI->getBlockFreq(NewTop) *
+        MBPI->getEdgeProbability(NewTop, ExitBB);
+  BlockFrequency BackEdgeFreq = MBFI->getBlockFreq(NewTop) *
+      MBPI->getEdgeProbability(NewTop, OldTop);
+
+  // Find the best Pred of NewTop.
+   MachineBasicBlock *BestPred = nullptr;
+   BlockFrequency FallThroughFromPred = 0;
+   for (MachineBasicBlock *Pred : NewTop->predecessors()) {
+     if (!LoopBlockSet.count(Pred))
+       continue;
+     BlockChain *PredChain = BlockToChain[Pred];
+     if (!PredChain || Pred == *std::prev(PredChain->end())) {
+       BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) *
+           MBPI->getEdgeProbability(Pred, NewTop);
+       if (EdgeFreq > FallThroughFromPred) {
+         FallThroughFromPred = EdgeFreq;
+         BestPred = Pred;
+       }
+     }
+   }
+
+   // If NewTop is not placed after Pred, another successor can be placed
+   // after Pred.
+   BlockFrequency NewFreq = 0;
+   if (BestPred) {
+     for (MachineBasicBlock *Succ : BestPred->successors()) {
+       if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
+         continue;
+       if (ComputedEdges.find(Succ) != ComputedEdges.end())
+         continue;
+       BlockChain *SuccChain = BlockToChain[Succ];
+       if ((SuccChain && (Succ != *SuccChain->begin())) ||
+           (SuccChain == BlockToChain[BestPred]))
+         continue;
+       BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) *
+           MBPI->getEdgeProbability(BestPred, Succ);
+       if (EdgeFreq > NewFreq)
+         NewFreq = EdgeFreq;
+     }
+     BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) *
+         MBPI->getEdgeProbability(BestPred, NewTop);
+     if (NewFreq > OrigEdgeFreq) {
+       // If NewTop is not the best successor of Pred, then Pred doesn't
+       // fallthrough to NewTop. So there is no FallThroughFromPred and
+       // NewFreq.
+       NewFreq = 0;
+       FallThroughFromPred = 0;
+     }
+   }
+
+   BlockFrequency Result = 0;
+   BlockFrequency Gains = BackEdgeFreq + NewFreq;
+   BlockFrequency Lost = FallThrough2Top + FallThrough2Exit +
+       FallThroughFromPred;
+   if (Gains > Lost)
+     Result = Gains - Lost;
+   return Result;
+}
+
+/// Helper function of findBestLoopTop. Find the best loop top block
+/// from predecessors of old top.
+///
+/// Look for a block which is strictly better than the old top for laying
+/// out before the old top of the loop. This looks for only two patterns:
+///
+///     1. a block has only one successor, the old loop top
+///
+///        Because such a block will always result in an unconditional jump,
+///        rotating it in front of the old top is always profitable.
+///
+///     2. a block has two successors, one is old top, another is exit
+///        and it has more than one predecessors
 ///
-/// Look for a block which is strictly better than the loop header for laying
-/// out at the top of the loop. This looks for one and only one pattern:
-/// a latch block with no conditional exit. This block will cause a conditional
-/// jump around it or will be the bottom of the loop if we lay it out in place,
-/// but if it it doesn't end up at the bottom of the loop for any reason,
-/// rotation alone won't fix it. Because such a block will always result in an
-/// unconditional jump (for the backedge) rotating it in front of the loop
-/// header is always profitable.
+///        If it is below one of its predecessors P, only P can fall through to
+///        it, all other predecessors need a jump to it, and another conditional
+///        jump to loop header. If it is moved before loop header, all its
+///        predecessors jump to it, then fall through to loop header. So all its
+///        predecessors except P can reduce one taken branch.
+///        At the same time, move it before old top increases the taken branch
+///        to loop exit block, so the reduced taken branch will be compared with
+///        the increased taken branch to the loop exit block.
 MachineBasicBlock *
-MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
-                                       const BlockFilterSet &LoopBlockSet) {
-  // Placing the latch block before the header may introduce an extra branch
-  // that skips this block the first time the loop is executed, which we want
-  // to avoid when optimising for size.
-  // FIXME: in theory there is a case that does not introduce a new branch,
-  // i.e. when the layout predecessor does not fallthrough to the loop header.
-  // In practice this never happens though: there always seems to be a preheader
-  // that can fallthrough and that is also placed before the header.
-  if (F->getFunction().optForSize())
-    return L.getHeader();
-
+MachineBlockPlacement::findBestLoopTopHelper(
+    MachineBasicBlock *OldTop,
+    const MachineLoop &L,
+    const BlockFilterSet &LoopBlockSet) {
   // Check that the header hasn't been fused with a preheader block due to
   // crazy branches. If it has, we need to start with the header at the top to
   // prevent pulling the preheader into the loop body.
-  BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
+  BlockChain &HeaderChain = *BlockToChain[OldTop];
   if (!LoopBlockSet.count(*HeaderChain.begin()))
-    return L.getHeader();
+    return OldTop;
 
-  LLVM_DEBUG(dbgs() << "Finding best loop top for: "
-                    << getBlockName(L.getHeader()) << "\n");
+  LLVM_DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(OldTop)
+                    << "\n");
 
-  BlockFrequency BestPredFreq;
+  BlockFrequency BestGains = 0;
   MachineBasicBlock *BestPred = nullptr;
-  for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {
+  for (MachineBasicBlock *Pred : OldTop->predecessors()) {
     if (!LoopBlockSet.count(Pred))
       continue;
-    LLVM_DEBUG(dbgs() << "    header pred: " << getBlockName(Pred) << ", has "
+    if (Pred == L.getHeader())
+      continue;
+    LLVM_DEBUG(dbgs() << "   old top pred: " << getBlockName(Pred) << ", has "
                       << Pred->succ_size() << " successors, ";
                MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
-    if (Pred->succ_size() > 1)
+    if (Pred->succ_size() > 2)
       continue;
 
-    BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
-    if (!BestPred || PredFreq > BestPredFreq ||
-        (!(PredFreq < BestPredFreq) &&
-         Pred->isLayoutSuccessor(L.getHeader()))) {
+    MachineBasicBlock *OtherBB = nullptr;
+    if (Pred->succ_size() == 2) {
+      OtherBB = *Pred->succ_begin();
+      if (OtherBB == OldTop)
+        OtherBB = *Pred->succ_rbegin();
+    }
+
+    if (!canMoveBottomBlockToTop(Pred, OldTop))
+      continue;
+
+    BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB,
+                                            LoopBlockSet);
+    if ((Gains > 0) && (Gains > BestGains ||
+        ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) {
       BestPred = Pred;
-      BestPredFreq = PredFreq;
+      BestGains = Gains;
     }
   }
 
   // If no direct predecessor is fine, just use the loop header.
   if (!BestPred) {
     LLVM_DEBUG(dbgs() << "    final top unchanged\n");
-    return L.getHeader();
+    return OldTop;
   }
 
   // Walk backwards through any straight line of predecessors.
@@ -1826,6 +2010,34 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
   return BestPred;
 }
 
+/// Find the best loop top block for layout.
+///
+/// This function iteratively calls findBestLoopTopHelper, until no new better
+/// BB can be found.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
+                                       const BlockFilterSet &LoopBlockSet) {
+  // Placing the latch block before the header may introduce an extra branch
+  // that skips this block the first time the loop is executed, which we want
+  // to avoid when optimising for size.
+  // FIXME: in theory there is a case that does not introduce a new branch,
+  // i.e. when the layout predecessor does not fallthrough to the loop header.
+  // In practice this never happens though: there always seems to be a preheader
+  // that can fallthrough and that is also placed before the header.
+  if (F->getFunction().hasOptSize())
+    return L.getHeader();
+
+  MachineBasicBlock *OldTop = nullptr;
+  MachineBasicBlock *NewTop = L.getHeader();
+  while (NewTop != OldTop) {
+    OldTop = NewTop;
+    NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
+    if (NewTop != OldTop)
+      ComputedEdges[NewTop] = { OldTop, false };
+  }
+  return NewTop;
+}
+
 /// Find the best loop exiting block for layout.
 ///
 /// This routine implements the logic to analyze the loop looking for the best
@@ -1833,7 +2045,8 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
 /// fallthrough opportunities.
 MachineBasicBlock *
 MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
-                                        const BlockFilterSet &LoopBlockSet) {
+                                        const BlockFilterSet &LoopBlockSet,
+                                        BlockFrequency &ExitFreq) {
   // We don't want to layout the loop linearly in all cases. If the loop header
   // is just a normal basic block in the loop, we want to look for what block
   // within the loop is the best one to layout at the top. However, if the loop
@@ -1944,9 +2157,43 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
 
   LLVM_DEBUG(dbgs() << "  Best exiting block: " << getBlockName(ExitingBB)
                     << "\n");
+  ExitFreq = BestExitEdgeFreq;
   return ExitingBB;
 }
 
+/// Check if there is a fallthrough to loop header Top.
+///
+///   1. Look for a Pred that can be layout before Top.
+///   2. Check if Top is the most possible successor of Pred.
+bool
+MachineBlockPlacement::hasViableTopFallthrough(
+    const MachineBasicBlock *Top,
+    const BlockFilterSet &LoopBlockSet) {
+  for (MachineBasicBlock *Pred : Top->predecessors()) {
+    BlockChain *PredChain = BlockToChain[Pred];
+    if (!LoopBlockSet.count(Pred) &&
+        (!PredChain || Pred == *std::prev(PredChain->end()))) {
+      // Found a Pred block can be placed before Top.
+      // Check if Top is the best successor of Pred.
+      auto TopProb = MBPI->getEdgeProbability(Pred, Top);
+      bool TopOK = true;
+      for (MachineBasicBlock *Succ : Pred->successors()) {
+        auto SuccProb = MBPI->getEdgeProbability(Pred, Succ);
+        BlockChain *SuccChain = BlockToChain[Succ];
+        // Check if Succ can be placed after Pred.
+        // Succ should not be in any chain, or it is the head of some chain.
+        if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {
+          TopOK = false;
+          break;
+        }
+      }
+      if (TopOK)
+        return true;
+    }
+  }
+  return false;
+}
+
 /// Attempt to rotate an exiting block to the bottom of the loop.
 ///
 /// Once we have built a chain, try to rotate it to line up the hot exit block
@@ -1955,6 +2202,7 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
 /// of its bottom already, don't rotate it.
 void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
                                        const MachineBasicBlock *ExitingBB,
+                                       BlockFrequency ExitFreq,
                                        const BlockFilterSet &LoopBlockSet) {
   if (!ExitingBB)
     return;
@@ -1966,15 +2214,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
   if (Bottom == ExitingBB)
     return;
 
-  bool ViableTopFallthrough = false;
-  for (MachineBasicBlock *Pred : Top->predecessors()) {
-    BlockChain *PredChain = BlockToChain[Pred];
-    if (!LoopBlockSet.count(Pred) &&
-        (!PredChain || Pred == *std::prev(PredChain->end()))) {
-      ViableTopFallthrough = true;
-      break;
-    }
-  }
+  bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
 
   // If the header has viable fallthrough, check whether the current loop
   // bottom is a viable exiting block. If so, bail out as rotating will
@@ -1986,6 +2226,12 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
           (!SuccChain || Succ == *SuccChain->begin()))
         return;
     }
+
+    // Rotate will destroy the top fallthrough, we need to ensure the new exit
+    // frequency is larger than top fallthrough.
+    BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
+    if (FallThrough2Top >= ExitFreq)
+      return;
   }
 
   BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB);
@@ -2041,8 +2287,6 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
 void MachineBlockPlacement::rotateLoopWithProfile(
     BlockChain &LoopChain, const MachineLoop &L,
     const BlockFilterSet &LoopBlockSet) {
-  auto HeaderBB = L.getHeader();
-  auto HeaderIter = llvm::find(LoopChain, HeaderBB);
   auto RotationPos = LoopChain.end();
 
   BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
@@ -2062,12 +2306,13 @@ void MachineBlockPlacement::rotateLoopWithProfile(
   // chain head is not the loop header. As we only consider natural loops with
   // single header, this computation can be done only once.
   BlockFrequency HeaderFallThroughCost(0);
-  for (auto *Pred : HeaderBB->predecessors()) {
+  MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();
+  for (auto *Pred : ChainHeaderBB->predecessors()) {
     BlockChain *PredChain = BlockToChain[Pred];
     if (!LoopBlockSet.count(Pred) &&
         (!PredChain || Pred == *std::prev(PredChain->end()))) {
-      auto EdgeFreq =
-          MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, HeaderBB);
+      auto EdgeFreq = MBFI->getBlockFreq(Pred) *
+          MBPI->getEdgeProbability(Pred, ChainHeaderBB);
       auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost);
       // If the predecessor has only an unconditional jump to the header, we
       // need to consider the cost of this jump.
@@ -2117,7 +2362,7 @@ void MachineBlockPlacement::rotateLoopWithProfile(
     // If the current BB is the loop header, we need to take into account the
     // cost of the missed fall through edge from outside of the loop to the
     // header.
-    if (Iter != HeaderIter)
+    if (Iter != LoopChain.begin())
       Cost += HeaderFallThroughCost;
 
     // Collect the loop exit cost by summing up frequencies of all exit edges
@@ -2238,9 +2483,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
   // loop. This will default to the header, but may end up as one of the
   // predecessors to the header if there is one which will result in strictly
   // fewer branches in the loop body.
-  // When we use profile data to rotate the loop, this is unnecessary.
-  MachineBasicBlock *LoopTop =
-      RotateLoopWithProfile ? L.getHeader() : findBestLoopTop(L, LoopBlockSet);
+  MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
 
   // If we selected just the header for the loop top, look for a potentially
   // profitable exit block in the event that rotating the loop can eliminate
@@ -2249,8 +2492,9 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
   // Loops are processed innermost to uttermost, make sure we clear
   // PreferredLoopExit before processing a new loop.
   PreferredLoopExit = nullptr;
+  BlockFrequency ExitFreq;
   if (!RotateLoopWithProfile && LoopTop == L.getHeader())
-    PreferredLoopExit = findBestLoopExit(L, LoopBlockSet);
+    PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
 
   BlockChain &LoopChain = *BlockToChain[LoopTop];
 
@@ -2270,7 +2514,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
   if (RotateLoopWithProfile)
     rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
   else
-    rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet);
+    rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
 
   LLVM_DEBUG({
     // Crash at the end so we get all of the debugging output first.
@@ -2497,8 +2741,8 @@ void MachineBlockPlacement::alignBlocks() {
   // exclusively on the loop info here so that we can align backedges in
   // unnatural CFGs and backedges that were introduced purely because of the
   // loop rotations done during this layout pass.
-  if (F->getFunction().optForMinSize() ||
-      (F->getFunction().optForSize() && !TLI->alignLoopsWithOptSize()))
+  if (F->getFunction().hasMinSize() ||
+      (F->getFunction().hasOptSize() && !TLI->alignLoopsWithOptSize()))
     return;
   BlockChain &FunctionChain = *BlockToChain[&F->front()];
   if (FunctionChain.begin() == FunctionChain.end())
@@ -2773,7 +3017,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
 
   if (allowTailDupPlacement()) {
     MPDT = &getAnalysis<MachinePostDominatorTree>();
-    if (MF.getFunction().optForSize())
+    if (MF.getFunction().hasOptSize())
       TailDupSize = 1;
     bool PreRegAlloc = false;
     TailDup.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ true, TailDupSize);
@@ -2796,7 +3040,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
 
     if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
                             getAnalysisIfAvailable<MachineModuleInfo>(), MLI,
-                            /*AfterBlockPlacement=*/true)) {
+                            /*AfterPlacement=*/true)) {
       // Redo the layout if tail merging creates/removes/moves blocks.
       BlockToChain.clear();
       ComputedEdges.clear();
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index e4952aaaba06..d2277ce51746 100644
--- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -1,9 +1,8 @@
 //===- MachineBranchProbabilityInfo.cpp - Machine Branch Probability Info -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 6ee8571c28aa..2df6d40d9293 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -1,9 +1,8 @@
 //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -20,6 +19,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -50,6 +50,8 @@ using namespace llvm;
 
 STATISTIC(NumCoalesces, "Number of copies coalesced");
 STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
+STATISTIC(NumPREs,      "Number of partial redundant expression"
+                        " transformed to fully redundant");
 STATISTIC(NumPhysCSEs,
           "Number of physreg referencing common subexpr eliminated");
 STATISTIC(NumCrossBBCSEs,
@@ -85,6 +87,7 @@ namespace {
 
     void releaseMemory() override {
       ScopeMap.clear();
+      PREMap.clear();
       Exps.clear();
     }
 
@@ -95,9 +98,12 @@ namespace {
         ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait,
                         AllocatorTy>;
     using ScopeType = ScopedHTType::ScopeTy;
+    using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>;
 
     unsigned LookAheadLimit = 0;
     DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
+    DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait>
+        PREMap;
     ScopedHTType VNT;
     SmallVector<MachineInstr *, 64> Exps;
     unsigned CurrVN = 0;
@@ -109,22 +115,24 @@ namespace {
                                 MachineBasicBlock::const_iterator E) const;
     bool hasLivePhysRegDefUses(const MachineInstr *MI,
                                const MachineBasicBlock *MBB,
-                               SmallSet<unsigned,8> &PhysRefs,
-                               SmallVectorImpl<unsigned> &PhysDefs,
-                               bool &PhysUseDef) const;
+                               SmallSet<unsigned, 8> &PhysRefs,
+                               PhysDefVector &PhysDefs, bool &PhysUseDef) const;
     bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
-                          SmallSet<unsigned,8> &PhysRefs,
-                          SmallVectorImpl<unsigned> &PhysDefs,
-                          bool &NonLocal) const;
+                          SmallSet<unsigned, 8> &PhysRefs,
+                          PhysDefVector &PhysDefs, bool &NonLocal) const;
     bool isCSECandidate(MachineInstr *MI);
     bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
-                           MachineInstr *CSMI, MachineInstr *MI);
+                           MachineBasicBlock *CSBB, MachineInstr *MI);
     void EnterScope(MachineBasicBlock *MBB);
     void ExitScope(MachineBasicBlock *MBB);
-    bool ProcessBlock(MachineBasicBlock *MBB);
+    bool ProcessBlockCSE(MachineBasicBlock *MBB);
     void ExitScopeIfDone(MachineDomTreeNode *Node,
                          DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
     bool PerformCSE(MachineDomTreeNode *Node);
+
+    bool isPRECandidate(MachineInstr *MI);
+    bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
+    bool PerformSimplePRE(MachineDominatorTree *DT);
   };
 
 } // end anonymous namespace
@@ -256,9 +264,9 @@ static bool isCallerPreservedOrConstPhysReg(unsigned Reg,
 /// instruction does not uses a physical register.
 bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
                                        const MachineBasicBlock *MBB,
-                                       SmallSet<unsigned,8> &PhysRefs,
-                                       SmallVectorImpl<unsigned> &PhysDefs,
-                                       bool &PhysUseDef) const{
+                                       SmallSet<unsigned, 8> &PhysRefs,
+                                       PhysDefVector &PhysDefs,
+                                       bool &PhysUseDef) const {
   // First, add all uses to PhysRefs.
   for (const MachineOperand &MO : MI->operands()) {
     if (!MO.isReg() || MO.isDef())
@@ -278,7 +286,8 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
   // (which currently contains only uses), set the PhysUseDef flag.
   PhysUseDef = false;
   MachineBasicBlock::const_iterator I = MI; I = std::next(I);
-  for (const MachineOperand &MO : MI->operands()) {
+  for (const auto &MOP : llvm::enumerate(MI->operands())) {
+    const MachineOperand &MO = MOP.value();
     if (!MO.isReg() || !MO.isDef())
       continue;
     unsigned Reg = MO.getReg();
@@ -293,20 +302,21 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
     // common since this pass is run before livevariables. We can scan
     // forward a few instructions and check if it is obviously dead.
     if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
-      PhysDefs.push_back(Reg);
+      PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
   }
 
   // Finally, add all defs to PhysRefs as well.
   for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
-    for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI)
+    for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); AI.isValid();
+         ++AI)
       PhysRefs.insert(*AI);
 
   return !PhysRefs.empty();
 }
 
 bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
-                                  SmallSet<unsigned,8> &PhysRefs,
-                                  SmallVectorImpl<unsigned> &PhysDefs,
+                                  SmallSet<unsigned, 8> &PhysRefs,
+                                  PhysDefVector &PhysDefs,
                                   bool &NonLocal) const {
   // For now conservatively returns false if the common subexpression is
   // not in the same basic block as the given instruction. The only exception
@@ -320,7 +330,8 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
       return false;
 
     for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
-      if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i]))
+      if (MRI->isAllocatable(PhysDefs[i].second) ||
+          MRI->isReserved(PhysDefs[i].second))
         // Avoid extending live range of physical registers if they are
         //allocatable or reserved.
         return false;
@@ -381,7 +392,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
 
   // Ignore stuff that we obviously can't move.
   if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
-      MI->hasUnmodeledSideEffects())
+      MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects())
     return false;
 
   if (MI->mayLoad()) {
@@ -404,9 +415,10 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
 }
 
 /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
-/// common expression that defines Reg.
+/// common expression that defines Reg. CSBB is basic block where CSReg is
+/// defined.
 bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
-                                   MachineInstr *CSMI, MachineInstr *MI) {
+                                   MachineBasicBlock *CSBB, MachineInstr *MI) {
   // FIXME: Heuristics that works around the lack the live range splitting.
 
   // If CSReg is used at all uses of Reg, CSE should not increase register
@@ -432,7 +444,6 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
   // an immediate predecessor. We don't want to increase register pressure and
   // end up causing other computation to be spilled.
   if (TII->isAsCheapAsAMove(*MI)) {
-    MachineBasicBlock *CSBB = CSMI->getParent();
     MachineBasicBlock *BB = MI->getParent();
     if (CSBB != BB && !CSBB->isSuccessor(BB))
       return false;
@@ -487,7 +498,7 @@ void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
   ScopeMap.erase(SI);
 }
 
-bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
+bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
   bool Changed = false;
 
   SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
@@ -536,7 +547,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
     // It's also not safe if the instruction uses physical registers.
     bool CrossMBBPhysDef = false;
     SmallSet<unsigned, 8> PhysRefs;
-    SmallVector<unsigned, 2> PhysDefs;
+    PhysDefVector PhysDefs;
     bool PhysUseDef = false;
     if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
                                           PhysDefs, PhysUseDef)) {
@@ -597,7 +608,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
              TargetRegisterInfo::isVirtualRegister(NewReg) &&
              "Do not CSE physical register defs!");
 
-      if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
+      if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) {
         LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
         DoCSE = false;
         break;
@@ -635,6 +646,9 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
       // we should make sure it is not dead at CSMI.
       for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
         CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
+      for (auto PhysDef : PhysDefs)
+        if (!MI->getOperand(PhysDef.first).isDead())
+          CSMI->getOperand(PhysDef.first).setIsDead(false);
 
       // Go through implicit defs of CSMI and MI, and clear the kill flags on
       // their uses in all the instructions between CSMI and MI.
@@ -663,9 +677,9 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
         // Add physical register defs now coming in from a predecessor to MBB
         // livein list.
         while (!PhysDefs.empty()) {
-          unsigned LiveIn = PhysDefs.pop_back_val();
-          if (!MBB->isLiveIn(LiveIn))
-            MBB->addLiveIn(LiveIn);
+          auto LiveIn = PhysDefs.pop_back_val();
+          if (!MBB->isLiveIn(LiveIn.second))
+            MBB->addLiveIn(LiveIn.second);
         }
         ++NumCrossBBCSEs;
       }
@@ -734,7 +748,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
   for (MachineDomTreeNode *Node : Scopes) {
     MachineBasicBlock *MBB = Node->getBlock();
     EnterScope(MBB);
-    Changed |= ProcessBlock(MBB);
+    Changed |= ProcessBlockCSE(MBB);
     // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
     ExitScopeIfDone(Node, OpenChildren);
   }
@@ -742,6 +756,104 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
   return Changed;
 }
 
+// We use stronger checks for PRE candidate rather than for CSE ones to embrace
+// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
+// to exclude instrs created by PRE that won't be CSEed later.
+bool MachineCSE::isPRECandidate(MachineInstr *MI) {
+  if (!isCSECandidate(MI) ||
+      MI->isNotDuplicable() ||
+      MI->mayLoad() ||
+      MI->isAsCheapAsAMove() ||
+      MI->getNumDefs() != 1 ||
+      MI->getNumExplicitDefs() != 1)
+    return false;
+
+  for (auto def : MI->defs())
+    if (!TRI->isVirtualRegister(def.getReg()))
+      return false;
+
+  for (auto use : MI->uses())
+    if (use.isReg() && !TRI->isVirtualRegister(use.getReg()))
+      return false;
+
+  return true;
+}
+
+bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
+                                 MachineBasicBlock *MBB) {
+  bool Changed = false;
+  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
+    MachineInstr *MI = &*I;
+    ++I;
+
+    if (!isPRECandidate(MI))
+      continue;
+
+    if (!PREMap.count(MI)) {
+      PREMap[MI] = MBB;
+      continue;
+    }
+
+    auto MBB1 = PREMap[MI];
+    assert(
+        !DT->properlyDominates(MBB, MBB1) &&
+        "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
+    auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
+    if (!CMBB->isLegalToHoistInto())
+      continue;
+
+    // Two instrs are partial redundant if their basic blocks are reachable
+    // from one to another but one doesn't dominate another.
+    if (CMBB != MBB1) {
+      auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
+      if (BB != nullptr && BB1 != nullptr &&
+          (isPotentiallyReachable(BB1, BB) ||
+           isPotentiallyReachable(BB, BB1))) {
+
+        assert(MI->getOperand(0).isDef() &&
+               "First operand of instr with one explicit def must be this def");
+        unsigned VReg = MI->getOperand(0).getReg();
+        unsigned NewReg = MRI->cloneVirtualRegister(VReg);
+        if (!isProfitableToCSE(NewReg, VReg, CMBB, MI))
+          continue;
+        MachineInstr &NewMI =
+            TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI);
+        NewMI.getOperand(0).setReg(NewReg);
+
+        PREMap[MI] = CMBB;
+        ++NumPREs;
+        Changed = true;
+      }
+    }
+  }
+  return Changed;
+}
+
+// This simple PRE (partial redundancy elimination) pass doesn't actually
+// eliminate partial redundancy but transforms it to full redundancy,
+// anticipating that the next CSE step will eliminate this created redundancy.
+// If CSE doesn't eliminate this, than created instruction will remain dead
+// and eliminated later by Remove Dead Machine Instructions pass.
+bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
+  SmallVector<MachineDomTreeNode *, 32> BBs;
+
+  PREMap.clear();
+  bool Changed = false;
+  BBs.push_back(DT->getRootNode());
+  do {
+    auto Node = BBs.pop_back_val();
+    const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
+    for (MachineDomTreeNode *Child : Children)
+      BBs.push_back(Child);
+
+    MachineBasicBlock *MBB = Node->getBlock();
+    Changed |= ProcessBlockPRE(DT, MBB);
+
+  } while (!BBs.empty());
+
+  return Changed;
+}
+
 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
@@ -752,5 +864,8 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   DT = &getAnalysis<MachineDominatorTree>();
   LookAheadLimit = TII->getMachineCSELookAheadLimit();
-  return PerformCSE(DT->getRootNode());
+  bool ChangedPRE, ChangedCSE;
+  ChangedPRE = PerformSimplePRE(DT);
+  ChangedCSE = PerformCSE(DT->getRootNode());
+  return ChangedPRE || ChangedCSE;
 }
diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp
index f51b482e20e3..0584ec0bd2b3 100644
--- a/lib/CodeGen/MachineCombiner.cpp
+++ b/lib/CodeGen/MachineCombiner.cpp
@@ -1,9 +1,8 @@
 //===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -559,16 +558,15 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
         continue;
 
       LLVM_DEBUG(if (dump_intrs) {
-        dbgs() << "\tFor the Pattern (" << (int)P << ") these instructions could be removed\n";
-        for (auto const *InstrPtr : DelInstrs) {
-          dbgs() << "\t\t" << STI->getSchedInfoStr(*InstrPtr) << ": ";
-          InstrPtr->print(dbgs(), false, false, false, TII);
-        }
+        dbgs() << "\tFor the Pattern (" << (int)P
+               << ") these instructions could be removed\n";
+        for (auto const *InstrPtr : DelInstrs)
+          InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
+                          /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
         dbgs() << "\tThese instructions could replace the removed ones\n";
-        for (auto const *InstrPtr : InsInstrs) {
-          dbgs() << "\t\t" << STI->getSchedInfoStr(*InstrPtr) << ": ";
-          InstrPtr->print(dbgs(), false, false, false, TII);
-        }
+        for (auto const *InstrPtr : InsInstrs)
+          InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
+                          /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
       });
 
       bool SubstituteAlways = false;
@@ -641,7 +639,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
   MLI = &getAnalysis<MachineLoopInfo>();
   Traces = &getAnalysis<MachineTraceMetrics>();
   MinInstr = nullptr;
-  OptSize = MF.getFunction().optForSize();
+  OptSize = MF.getFunction().hasOptSize();
 
   LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
   if (!TII->useMachineCombiner()) {
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index 19879fe89007..9fc12ac89e12 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -1,9 +1,8 @@
 //===- MachineCopyPropagation.cpp - Machine Copy Propagation Pass ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/MachineDominanceFrontier.cpp b/lib/CodeGen/MachineDominanceFrontier.cpp
index b559e4e513a6..6704298c17d6 100644
--- a/lib/CodeGen/MachineDominanceFrontier.cpp
+++ b/lib/CodeGen/MachineDominanceFrontier.cpp
@@ -1,9 +1,8 @@
 //===- MachineDominanceFrontier.cpp ---------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 6b2802626456..1dfba8638c22 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -1,9 +1,8 @@
 //===- MachineDominators.cpp - Machine Dominator Calculation --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/MachineFrameInfo.cpp b/lib/CodeGen/MachineFrameInfo.cpp
index 0b316871dbdf..bae3a4333bda 100644
--- a/lib/CodeGen/MachineFrameInfo.cpp
+++ b/lib/CodeGen/MachineFrameInfo.cpp
@@ -1,9 +1,8 @@
 //===-- MachineFrameInfo.cpp ---------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -57,7 +56,8 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
                                 !IsSpillSlot, StackID));
   int Index = (int)Objects.size() - NumFixedObjects - 1;
   assert(Index >= 0 && "Bad frame index!");
-  ensureMaxAlignment(Alignment);
+  if (StackID == 0)
+    ensureMaxAlignment(Alignment);
   return Index;
 }
 
@@ -92,7 +92,7 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
   Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
   Objects.insert(Objects.begin(),
                  StackObject(Size, Alignment, SPOffset, IsImmutable,
-                             /*isSpillSlot=*/false, /*Alloca=*/nullptr,
+                             /*IsSpillSlot=*/false, /*Alloca=*/nullptr,
                              IsAliased));
   return -++NumFixedObjects;
 }
@@ -142,11 +142,15 @@ unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
   // should keep in mind that there's tight coupling between the two.
 
   for (int i = getObjectIndexBegin(); i != 0; ++i) {
+    // Only estimate stack size of default stack.
+    if (getStackID(i) != TargetStackID::Default)
+      continue;
     int FixedOff = -getObjectOffset(i);
     if (FixedOff > Offset) Offset = FixedOff;
   }
   for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) {
-    if (isDeadObjectIndex(i))
+    // Only estimate stack size of live objects on default stack.
+    if (isDeadObjectIndex(i) || getStackID(i) != TargetStackID::Default)
       continue;
     Offset += getObjectSize(i);
     unsigned Align = getObjectAlignment(i);
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 3495319670a5..4df5ce2dcedc 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -1,9 +1,8 @@
 //===- MachineFunction.cpp ------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -44,6 +43,7 @@
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
@@ -165,7 +165,7 @@ void MachineFunction::init() {
                       !F.hasFnAttribute("no-realign-stack");
   FrameInfo = new (Allocator) MachineFrameInfo(
       getFnStackAlignment(STI, F), /*StackRealignable=*/CanRealignSP,
-      /*ForceRealign=*/CanRealignSP &&
+      /*ForcedRealign=*/CanRealignSP &&
           F.hasFnAttribute(Attribute::StackAlignment));
 
   if (F.hasFnAttribute(Attribute::StackAlignment))
@@ -175,7 +175,7 @@ void MachineFunction::init() {
   Alignment = STI->getTargetLowering()->getMinFunctionAlignment();
 
   // FIXME: Shouldn't use pref alignment if explicit alignment is set on F.
-  // FIXME: Use Function::optForSize().
+  // FIXME: Use Function::hasOptSize().
   if (!F.hasFnAttribute(Attribute::OptimizeForSize))
     Alignment = std::max(Alignment,
                          STI->getTargetLowering()->getPrefFunctionAlignment());
@@ -274,6 +274,12 @@ bool MachineFunction::shouldSplitStack() const {
   return getFunction().hasFnAttribute("split-stack");
 }
 
+LLVM_NODISCARD unsigned
+MachineFunction::addFrameInst(const MCCFIInstruction &Inst) {
+  FrameInstructions.push_back(Inst);
+  return FrameInstructions.size() - 1;
+}
+
 /// This discards all of the MachineBasicBlock numbers and recomputes them.
 /// This guarantees that the MBB numbers are sequential, dense, and match the
 /// ordering of the blocks within the function.  If a specific MachineBasicBlock
@@ -357,6 +363,13 @@ MachineInstr &MachineFunction::CloneMachineInstrBundle(MachineBasicBlock &MBB,
 /// ~MachineInstr() destructor must be empty.
 void
 MachineFunction::DeleteMachineInstr(MachineInstr *MI) {
+  // Verify that a call site info is at valid state. This assertion should
+  // be triggered during the implementation of support for the
+  // call site info of a new architecture. If the assertion is triggered,
+  // back trace will tell where to insert a call to updateCallSiteInfo().
+  assert((!MI->isCall(MachineInstr::IgnoreBundle) ||
+          CallSitesInfo.find(MI) == CallSitesInfo.end()) &&
+         "Call site info was not updated!");
   // Strip it for parts. The operand array and the MI object itself are
   // independently recyclable.
   if (MI->Operands)
@@ -396,19 +409,18 @@ MachineMemOperand *MachineFunction::getMachineMemOperand(
 MachineMemOperand *
 MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
                                       int64_t Offset, uint64_t Size) {
-  if (MMO->getValue())
-    return new (Allocator)
-               MachineMemOperand(MachinePointerInfo(MMO->getValue(),
-                                                    MMO->getOffset()+Offset),
-                                 MMO->getFlags(), Size, MMO->getBaseAlignment(),
-                                 AAMDNodes(), nullptr, MMO->getSyncScopeID(),
-                                 MMO->getOrdering(), MMO->getFailureOrdering());
+  const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
+
+  // If there is no pointer value, the offset isn't tracked so we need to adjust
+  // the base alignment.
+  unsigned Align = PtrInfo.V.isNull()
+                       ? MinAlign(MMO->getBaseAlignment(), Offset)
+                       : MMO->getBaseAlignment();
+
   return new (Allocator)
-             MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(),
-                                                  MMO->getOffset()+Offset),
-                               MMO->getFlags(), Size, MMO->getBaseAlignment(),
-                               AAMDNodes(), nullptr, MMO->getSyncScopeID(),
-                               MMO->getOrdering(), MMO->getFailureOrdering());
+      MachineMemOperand(PtrInfo.getWithOffset(Offset), MMO->getFlags(), Size,
+                        Align, AAMDNodes(), nullptr, MMO->getSyncScopeID(),
+                        MMO->getOrdering(), MMO->getFailureOrdering());
 }
 
 MachineMemOperand *
@@ -425,6 +437,15 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
                                MMO->getOrdering(), MMO->getFailureOrdering());
 }
 
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
+                                      MachineMemOperand::Flags Flags) {
+  return new (Allocator) MachineMemOperand(
+      MMO->getPointerInfo(), Flags, MMO->getSize(), MMO->getBaseAlignment(),
+      MMO->getAAInfo(), MMO->getRanges(), MMO->getSyncScopeID(),
+      MMO->getOrdering(), MMO->getFailureOrdering());
+}
+
 MachineInstr::ExtraInfo *
 MachineFunction::createMIExtraInfo(ArrayRef<MachineMemOperand *> MMOs,
                                    MCSymbol *PreInstrSymbol,
@@ -802,6 +823,32 @@ try_next:;
   return FilterID;
 }
 
+void MachineFunction::addCodeViewHeapAllocSite(MachineInstr *I, MDNode *MD) {
+  MCSymbol *BeginLabel = Ctx.createTempSymbol("heapallocsite", true);
+  MCSymbol *EndLabel = Ctx.createTempSymbol("heapallocsite", true);
+  I->setPreInstrSymbol(*this, BeginLabel);
+  I->setPostInstrSymbol(*this, EndLabel);
+
+  DIType *DI = dyn_cast<DIType>(MD);
+  CodeViewHeapAllocSites.push_back(std::make_tuple(BeginLabel, EndLabel, DI));
+}
+
+void MachineFunction::updateCallSiteInfo(const MachineInstr *Old,
+                                         const MachineInstr *New) {
+  if (!Target.Options.EnableDebugEntryValues || Old == New)
+    return;
+
+  assert(Old->isCall() && (!New || New->isCall()) &&
+         "Call site info referes only to call instructions!");
+  CallSiteInfoMap::iterator CSIt = CallSitesInfo.find(Old);
+  if (CSIt == CallSitesInfo.end())
+    return;
+  CallSiteInfo CSInfo = std::move(CSIt->second);
+  CallSitesInfo.erase(CSIt);
+  if (New)
+    CallSitesInfo[New] = CSInfo;
+}
+
 /// \}
 
 //===----------------------------------------------------------------------===//
@@ -888,9 +935,11 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const {
   OS << "Jump Tables:\n";
 
   for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
-    OS << printJumpTableEntryReference(i) << ": ";
+    OS << printJumpTableEntryReference(i) << ':';
     for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j)
       OS << ' ' << printMBBReference(*JumpTables[i].MBBs[j]);
+    if (i != e)
+      OS << '\n';
   }
 
   OS << '\n';
diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp
index 5db4e299fa70..0da4cf3fc90c 100644
--- a/lib/CodeGen/MachineFunctionPass.cpp
+++ b/lib/CodeGen/MachineFunctionPass.cpp
@@ -1,9 +1,8 @@
 //===-- MachineFunctionPass.cpp -------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 9c96ba748778..0ea8975cc74c 100644
--- a/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -1,9 +1,8 @@
 //===-- MachineFunctionPrinterPass.cpp ------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 764a84c7e132..e5c398a2d10c 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -1,9 +1,8 @@
 //===- lib/CodeGen/MachineInstr.cpp ---------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -26,6 +25,7 @@
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
@@ -50,9 +50,9 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
-#include "llvm/IR/Operator.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSymbol.h"
@@ -225,12 +225,13 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
   }
 
 #ifndef NDEBUG
-  bool isMetaDataOp = Op.getType() == MachineOperand::MO_Metadata;
+  bool isDebugOp = Op.getType() == MachineOperand::MO_Metadata ||
+                   Op.getType() == MachineOperand::MO_MCSymbol;
   // OpNo now points as the desired insertion point.  Unless this is a variadic
   // instruction, only implicit regs are allowed beyond MCID->getNumOperands().
   // RegMask operands go between the explicit and implicit operands.
   assert((isImpReg || Op.isRegMask() || MCID->isVariadic() ||
-          OpNo < MCID->getNumOperands() || isMetaDataOp) &&
+          OpNo < MCID->getNumOperands() || isDebugOp) &&
          "Trying to add an operand to a machine instr that is already done!");
 #endif
 
@@ -512,45 +513,65 @@ void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
       MF.createMIExtraInfo(memoperands(), getPreInstrSymbol(), Symbol));
 }
 
+void MachineInstr::cloneInstrSymbols(MachineFunction &MF,
+                                     const MachineInstr &MI) {
+  if (this == &MI)
+    // Nothing to do for a self-clone!
+    return;
+
+  assert(&MF == MI.getMF() &&
+         "Invalid machine functions when cloning instruction symbols!");
+
+  setPreInstrSymbol(MF, MI.getPreInstrSymbol());
+  setPostInstrSymbol(MF, MI.getPostInstrSymbol());
+}
+
 uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
   // For now, the just return the union of the flags. If the flags get more
   // complicated over time, we might need more logic here.
   return getFlags() | Other.getFlags();
 }
 
-void MachineInstr::copyIRFlags(const Instruction &I) {
+uint16_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
+  uint16_t MIFlags = 0;
   // Copy the wrapping flags.
   if (const OverflowingBinaryOperator *OB =
           dyn_cast<OverflowingBinaryOperator>(&I)) {
     if (OB->hasNoSignedWrap())
-      setFlag(MachineInstr::MIFlag::NoSWrap);
+      MIFlags |= MachineInstr::MIFlag::NoSWrap;
     if (OB->hasNoUnsignedWrap())
-      setFlag(MachineInstr::MIFlag::NoUWrap);
+      MIFlags |= MachineInstr::MIFlag::NoUWrap;
   }
 
   // Copy the exact flag.
   if (const PossiblyExactOperator *PE = dyn_cast<PossiblyExactOperator>(&I))
     if (PE->isExact())
-      setFlag(MachineInstr::MIFlag::IsExact);
+      MIFlags |= MachineInstr::MIFlag::IsExact;
 
   // Copy the fast-math flags.
   if (const FPMathOperator *FP = dyn_cast<FPMathOperator>(&I)) {
     const FastMathFlags Flags = FP->getFastMathFlags();
     if (Flags.noNaNs())
-      setFlag(MachineInstr::MIFlag::FmNoNans);
+      MIFlags |= MachineInstr::MIFlag::FmNoNans;
     if (Flags.noInfs())
-      setFlag(MachineInstr::MIFlag::FmNoInfs);
+      MIFlags |= MachineInstr::MIFlag::FmNoInfs;
     if (Flags.noSignedZeros())
-      setFlag(MachineInstr::MIFlag::FmNsz);
+      MIFlags |= MachineInstr::MIFlag::FmNsz;
     if (Flags.allowReciprocal())
-      setFlag(MachineInstr::MIFlag::FmArcp);
+      MIFlags |= MachineInstr::MIFlag::FmArcp;
     if (Flags.allowContract())
-      setFlag(MachineInstr::MIFlag::FmContract);
+      MIFlags |= MachineInstr::MIFlag::FmContract;
     if (Flags.approxFunc())
-      setFlag(MachineInstr::MIFlag::FmAfn);
+      MIFlags |= MachineInstr::MIFlag::FmAfn;
     if (Flags.allowReassoc())
-      setFlag(MachineInstr::MIFlag::FmReassoc);
+      MIFlags |= MachineInstr::MIFlag::FmReassoc;
   }
+
+  return MIFlags;
+}
+
+void MachineInstr::copyIRFlags(const Instruction &I) {
+  Flags = copyFlagsFromInstruction(I);
 }
 
 bool MachineInstr::hasPropertyInBundle(uint64_t Mask, QueryType Type) const {
@@ -1157,7 +1178,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
   }
 
   if (isPosition() || isDebugInstr() || isTerminator() ||
-      hasUnmodeledSideEffects())
+      mayRaiseFPException() || hasUnmodeledSideEffects())
     return false;
 
   // See if this instruction does a load.  If so, we have to guarantee that the
@@ -1173,8 +1194,8 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
   return true;
 }
 
-bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
-                            bool UseTBAA) {
+bool MachineInstr::mayAlias(AliasAnalysis *AA, const MachineInstr &Other,
+                            bool UseTBAA) const {
   const MachineFunction *MF = getMF();
   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
   const MachineFrameInfo &MFI = MF->getFrameInfo();
@@ -1304,7 +1325,11 @@ bool MachineInstr::isDereferenceableInvariantLoad(AliasAnalysis *AA) const {
   const MachineFrameInfo &MFI = getParent()->getParent()->getFrameInfo();
 
   for (MachineMemOperand *MMO : memoperands()) {
-    if (MMO->isVolatile()) return false;
+    if (!MMO->isUnordered())
+      // If the memory operand has ordering side effects, we can't move the
+      // instruction.  Such an instruction is technically an invariant load,
+      // but the caller code would need updated to expect that.
+      return false;
     if (MMO->isStore()) return false;
     if (MMO->isInvariant() && MMO->isDereferenceable())
       continue;
@@ -1447,7 +1472,7 @@ void MachineInstr::print(raw_ostream &OS, bool IsStandalone, bool SkipOpers,
   ModuleSlotTracker MST(M);
   if (F)
     MST.incorporateFunction(*F);
-  print(OS, MST, IsStandalone, SkipOpers, SkipDebugLoc, TII);
+  print(OS, MST, IsStandalone, SkipOpers, SkipDebugLoc, AddNewLine, TII);
 }
 
 void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
@@ -1519,6 +1544,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
     OS << "nsw ";
   if (getFlag(MachineInstr::IsExact))
     OS << "exact ";
+  if (getFlag(MachineInstr::FPExcept))
+    OS << "fpexcept ";
 
   // Print the opcode name.
   if (TII)
@@ -1905,7 +1932,7 @@ void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) {
 void MachineInstr::addRegisterDefined(unsigned Reg,
                                       const TargetRegisterInfo *RegInfo) {
   if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-    MachineOperand *MO = findRegisterDefOperand(Reg, false, RegInfo);
+    MachineOperand *MO = findRegisterDefOperand(Reg, false, false, RegInfo);
     if (MO)
       return;
   } else {
@@ -2050,7 +2077,7 @@ static const DIExpression *computeExprForSpill(const MachineInstr &MI) {
   const DIExpression *Expr = MI.getDebugExpression();
   if (MI.isIndirectDebugValue()) {
     assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset");
-    Expr = DIExpression::prepend(Expr, DIExpression::WithDeref);
+    Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
   }
   return Expr;
 }
@@ -2100,3 +2127,54 @@ void MachineInstr::changeDebugValuesDefReg(unsigned Reg) {
   for (auto *DBI : DbgValues)
     DBI->getOperand(0).setReg(Reg);
 }
+
+using MMOList = SmallVector<const MachineMemOperand *, 2>;
+
+static unsigned getSpillSlotSize(MMOList &Accesses,
+                                 const MachineFrameInfo &MFI) {
+  unsigned Size = 0;
+  for (auto A : Accesses)
+    if (MFI.isSpillSlotObjectIndex(
+            cast<FixedStackPseudoSourceValue>(A->getPseudoValue())
+                ->getFrameIndex()))
+      Size += A->getSize();
+  return Size;
+}
+
+Optional<unsigned>
+MachineInstr::getSpillSize(const TargetInstrInfo *TII) const {
+  int FI;
+  if (TII->isStoreToStackSlotPostFE(*this, FI)) {
+    const MachineFrameInfo &MFI = getMF()->getFrameInfo();
+    if (MFI.isSpillSlotObjectIndex(FI))
+      return (*memoperands_begin())->getSize();
+  }
+  return None;
+}
+
+Optional<unsigned>
+MachineInstr::getFoldedSpillSize(const TargetInstrInfo *TII) const {
+  MMOList Accesses;
+  if (TII->hasStoreToStackSlot(*this, Accesses))
+    return getSpillSlotSize(Accesses, getMF()->getFrameInfo());
+  return None;
+}
+
+Optional<unsigned>
+MachineInstr::getRestoreSize(const TargetInstrInfo *TII) const {
+  int FI;
+  if (TII->isLoadFromStackSlotPostFE(*this, FI)) {
+    const MachineFrameInfo &MFI = getMF()->getFrameInfo();
+    if (MFI.isSpillSlotObjectIndex(FI))
+      return (*memoperands_begin())->getSize();
+  }
+  return None;
+}
+
+Optional<unsigned>
+MachineInstr::getFoldedRestoreSize(const TargetInstrInfo *TII) const {
+  MMOList Accesses;
+  if (TII->hasLoadFromStackSlot(*this, Accesses))
+    return getSpillSlotSize(Accesses, getMF()->getFrameInfo());
+  return None;
+}
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index ae378cc8c464..32e266e9401e 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -1,9 +1,8 @@
 //===-- lib/CodeGen/MachineInstrBundle.cpp --------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 58fd1f238420..1107e609c258 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -1,9 +1,8 @@
 //===- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index 2bce59235057..3b8b430d1b0f 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -1,9 +1,8 @@
 //===- MachineLoopInfo.cpp - Natural Loop Calculator ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 6ef8de88f8b1..aadcd7319799 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -206,11 +205,11 @@ MachineModuleInfo::~MachineModuleInfo() = default;
 bool MachineModuleInfo::doInitialization(Module &M) {
   ObjFileMMI = nullptr;
   CurCallSite = 0;
-  UsesVAFloatArgument = UsesMorestackAddr = false;
+  UsesMSVCFloatingPoint = UsesMorestackAddr = false;
   HasSplitStack = HasNosplitStack = false;
   AddrLabelSymbols = nullptr;
   TheModule = &M;
-  DbgInfoAvailable = !empty(M.debug_compile_units());
+  DbgInfoAvailable = !llvm::empty(M.debug_compile_units());
   return false;
 }
 
@@ -328,22 +327,3 @@ char FreeMachineFunction::ID;
 FunctionPass *llvm::createFreeMachineFunctionPass() {
   return new FreeMachineFunction();
 }
-
-//===- MMI building helpers -----------------------------------------------===//
-
-void llvm::computeUsesVAFloatArgument(const CallInst &I,
-                                      MachineModuleInfo &MMI) {
-  FunctionType *FT =
-      cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
-  if (FT->isVarArg() && !MMI.usesVAFloatArgument()) {
-    for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
-      Type *T = I.getArgOperand(i)->getType();
-      for (auto i : post_order(T)) {
-        if (i->isFloatingPointTy()) {
-          MMI.setUsesVAFloatArgument(true);
-          return;
-        }
-      }
-    }
-  }
-}
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
index 7b4f64bfe60d..16d24880ebe4 100644
--- a/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/MachineModuleInfoImpls.cpp ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/MachineOperand.cpp b/lib/CodeGen/MachineOperand.cpp
index 05e51e1873cf..4fa4ea7f6cf5 100644
--- a/lib/CodeGen/MachineOperand.cpp
+++ b/lib/CodeGen/MachineOperand.cpp
@@ -1,9 +1,8 @@
 //===- lib/CodeGen/MachineOperand.cpp -------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -25,6 +24,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetMachine.h"
 
@@ -181,6 +181,19 @@ void MachineOperand::ChangeToES(const char *SymName,
   setTargetFlags(TargetFlags);
 }
 
+void MachineOperand::ChangeToGA(const GlobalValue *GV, int64_t Offset,
+                                unsigned char TargetFlags) {
+  assert((!isReg() || !isTied()) &&
+         "Cannot change a tied operand into a global address");
+
+  removeRegFromUses();
+
+  OpKind = MO_GlobalAddress;
+  Contents.OffsetedInfo.Val.GV = GV;
+  setOffset(Offset);
+  setTargetFlags(TargetFlags);
+}
+
 void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) {
   assert((!isReg() || !isTied()) &&
          "Cannot change a tied operand into an MCSymbol");
@@ -329,7 +342,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
   switch (MO.getType()) {
   case MachineOperand::MO_Register:
     // Register operands don't have target flags.
-    return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef());
+    return hash_combine(MO.getType(), (unsigned)MO.getReg(), MO.getSubReg(), MO.isDef());
   case MachineOperand::MO_Immediate:
     return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm());
   case MachineOperand::MO_CImmediate:
@@ -348,7 +361,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
     return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
   case MachineOperand::MO_ExternalSymbol:
     return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(),
-                        MO.getSymbolName());
+                        StringRef(MO.getSymbolName()));
   case MachineOperand::MO_GlobalAddress:
     return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(),
                         MO.getOffset());
@@ -994,7 +1007,7 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
   assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue *>() ||
           isa<PointerType>(PtrInfo.V.get<const Value *>()->getType())) &&
          "invalid pointer value");
-  assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
+  assert(getBaseAlignment() == a && a != 0 && "Alignment is not a power of 2!");
   assert((isLoad() || isStore()) && "Not a load/store!");
 
   AtomicInfo.SSID = static_cast<unsigned>(SSID);
@@ -1125,7 +1138,7 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
       printLLVMNameWithoutPrefix(
           OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol());
       break;
-    case PseudoSourceValue::TargetCustom:
+    default:
       // FIXME: This is not necessarily the correct MIR serialization format for
       // a custom pseudo source value, but at least it allows
       // -print-machineinstrs to work on a target with custom pseudo source
diff --git a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index 906d5560d568..27db9106b337 100644
--- a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -1,9 +1,8 @@
 ///===- MachineOptimizationRemarkEmitter.cpp - Opt Diagnostic -*- C++ -*---===//
 ///
-///                     The LLVM Compiler Infrastructure
-///
-/// This file is distributed under the University of Illinois Open Source
-/// License. See LICENSE.TXT for details.
+/// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+/// See https://llvm.org/LICENSE.txt for license information.
+/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 ///
 ///===---------------------------------------------------------------------===//
 /// \file
diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp
index ad96c0e579e4..80a235aeaa5c 100644
--- a/lib/CodeGen/MachineOutliner.cpp
+++ b/lib/CodeGen/MachineOutliner.cpp
@@ -1,9 +1,8 @@
 //===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -74,8 +73,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include <functional>
-#include <map>
-#include <sstream>
 #include <tuple>
 #include <vector>
 
@@ -1095,19 +1092,15 @@ MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF,
                                         InstructionMapper &Mapper,
                                         unsigned Name) {
 
-  // Create the function name. This should be unique. For now, just hash the
-  // module name and include it in the function name plus the number of this
-  // function.
-  std::ostringstream NameStream;
+  // Create the function name. This should be unique.
   // FIXME: We should have a better naming scheme. This should be stable,
   // regardless of changes to the outliner's cost model/traversal order.
-  NameStream << "OUTLINED_FUNCTION_" << Name;
+  std::string FunctionName = ("OUTLINED_FUNCTION_" + Twine(Name)).str();
 
   // Create the function using an IR-level function.
   LLVMContext &C = M.getContext();
-  Function *F = dyn_cast<Function>(
-      M.getOrInsertFunction(NameStream.str(), Type::getVoidTy(C)));
-  assert(F && "Function was null!");
+  Function *F = Function::Create(FunctionType::get(Type::getVoidTy(C), false),
+                                 Function::ExternalLinkage, FunctionName, M);
 
   // NOTE: If this is linkonceodr, then we can take advantage of linker deduping
   // which gives us better results when we outline from linkonceodr functions.
@@ -1205,11 +1198,10 @@ bool MachineOutliner::outline(Module &M,
   unsigned OutlinedFunctionNum = 0;
 
   // Sort by benefit. The most beneficial functions should be outlined first.
-  std::stable_sort(
-      FunctionList.begin(), FunctionList.end(),
-      [](const OutlinedFunction &LHS, const OutlinedFunction &RHS) {
-        return LHS.getBenefit() > RHS.getBenefit();
-      });
+  llvm::stable_sort(FunctionList, [](const OutlinedFunction &LHS,
+                                     const OutlinedFunction &RHS) {
+    return LHS.getBenefit() > RHS.getBenefit();
+  });
 
   // Walk over each function, outlining them as we go along. Functions are
   // outlined greedily, based off the sort above.
@@ -1253,8 +1245,9 @@ bool MachineOutliner::outline(Module &M,
       if (MBB.getParent()->getProperties().hasProperty(
               MachineFunctionProperties::Property::TracksLiveness)) {
         // Helper lambda for adding implicit def operands to the call
-        // instruction.
-        auto CopyDefs = [&CallInst](MachineInstr &MI) {
+        // instruction. It also updates call site information for moved
+        // code.
+        auto CopyDefsAndUpdateCalls = [&CallInst](MachineInstr &MI) {
           for (MachineOperand &MOP : MI.operands()) {
             // Skip over anything that isn't a register.
             if (!MOP.isReg())
@@ -1266,13 +1259,16 @@ bool MachineOutliner::outline(Module &M,
                   MOP.getReg(), true, /* isDef = true */
                   true /* isImp = true */));
           }
+          if (MI.isCall())
+            MI.getMF()->updateCallSiteInfo(&MI);
         };
         // Copy over the defs in the outlined range.
         // First inst in outlined range <-- Anything that's defined in this
         // ...                           .. range has to be added as an
         // implicit Last inst in outlined range  <-- def to the call
-        // instruction.
-        std::for_each(CallInst, std::next(EndIt), CopyDefs);
+        // instruction. Also remove call site information for outlined block
+        // of code.
+        std::for_each(CallInst, std::next(EndIt), CopyDefsAndUpdateCalls);
       }
 
       // Erase from the point after where the call was inserted up to, and
diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp
index 4d451bdd7f69..54df522d371a 100644
--- a/lib/CodeGen/MachinePipeliner.cpp
+++ b/lib/CodeGen/MachinePipeliner.cpp
@@ -1,9 +1,8 @@
 //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -97,6 +96,14 @@ using namespace llvm;
 STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
 STATISTIC(NumPipelined, "Number of loops software pipelined");
 STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
+STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");
+STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");
+STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");
+STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");
+STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");
+STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");
+STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");
+STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
 
 /// A command line option to turn software pipelining on or off.
 static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
@@ -141,6 +148,11 @@ static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
                                      cl::ReallyHidden, cl::init(false),
                                      cl::ZeroOrMore, cl::desc("Ignore RecMII"));
 
+static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
+                                    cl::init(false));
+static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden,
+                                      cl::init(false));
+
 namespace llvm {
 
 // A command line option to enable the CopyToPhi DAG mutation.
@@ -180,6 +192,16 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
       !EnableSWPOptSize.getPosition())
     return false;
 
+  if (!mf.getSubtarget().enableMachinePipeliner())
+    return false;
+
+  // Cannot pipeline loops without instruction itineraries if we are using
+  // DFA for the pipeliner.
+  if (mf.getSubtarget().useDFAforSMS() &&
+      (!mf.getSubtarget().getInstrItineraryData() ||
+       mf.getSubtarget().getInstrItineraryData()->isEmpty()))
+    return false;
+
   MF = &mf;
   MLI = &getAnalysis<MachineLoopInfo>();
   MDT = &getAnalysis<MachineDominatorTree>();
@@ -211,8 +233,11 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
   }
 #endif
 
-  if (!canPipelineLoop(L))
+  setPragmaPipelineOptions(L);
+  if (!canPipelineLoop(L)) {
+    LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
     return Changed;
+  }
 
   ++NumTrytoPipeline;
 
@@ -221,6 +246,50 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
   return Changed;
 }
 
+void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
+  MachineBasicBlock *LBLK = L.getTopBlock();
+
+  if (LBLK == nullptr)
+    return;
+
+  const BasicBlock *BBLK = LBLK->getBasicBlock();
+  if (BBLK == nullptr)
+    return;
+
+  const Instruction *TI = BBLK->getTerminator();
+  if (TI == nullptr)
+    return;
+
+  MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop);
+  if (LoopID == nullptr)
+    return;
+
+  assert(LoopID->getNumOperands() > 0 && "requires atleast one operand");
+  assert(LoopID->getOperand(0) == LoopID && "invalid loop");
+
+  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
+    MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+
+    if (MD == nullptr)
+      continue;
+
+    MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+
+    if (S == nullptr)
+      continue;
+
+    if (S->getString() == "llvm.loop.pipeline.initiationinterval") {
+      assert(MD->getNumOperands() == 2 &&
+             "Pipeline initiation interval hint metadata should have two operands.");
+      II_setByPragma =
+          mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
+      assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");
+    } else if (S->getString() == "llvm.loop.pipeline.disable") {
+      disabledByPragma = true;
+    }
+  }
+}
+
 /// Return true if the loop can be software pipelined.  The algorithm is
 /// restricted to loops with a single basic block.  Make sure that the
 /// branch in the loop can be analyzed.
@@ -228,21 +297,36 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
   if (L.getNumBlocks() != 1)
     return false;
 
+  if (disabledByPragma)
+    return false;
+
   // Check if the branch can't be understood because we can't do pipelining
   // if that's the case.
   LI.TBB = nullptr;
   LI.FBB = nullptr;
   LI.BrCond.clear();
-  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond))
+  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {
+    LLVM_DEBUG(
+        dbgs() << "Unable to analyzeBranch, can NOT pipeline current Loop\n");
+    NumFailBranch++;
     return false;
+  }
 
   LI.LoopInductionVar = nullptr;
   LI.LoopCompare = nullptr;
-  if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare))
+  if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare)) {
+    LLVM_DEBUG(
+        dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n");
+    NumFailLoop++;
     return false;
+  }
 
-  if (!L.getLoopPreheader())
+  if (!L.getLoopPreheader()) {
+    LLVM_DEBUG(
+        dbgs() << "Preheader not found, can NOT pipeline current Loop\n");
+    NumFailPreheader++;
     return false;
+  }
 
   // Remove any subregisters from inputs to phi nodes.
   preprocessPhiNodes(*L.getHeader());
@@ -286,7 +370,8 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
 bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
   assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
 
-  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo);
+  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
+                        II_setByPragma);
 
   MachineBasicBlock *MBB = L.getHeader();
   // The kernel should not include any terminator instructions.  These
@@ -309,6 +394,20 @@ bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
   return SMS.hasNewSchedule();
 }
 
+void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
+  if (II_setByPragma > 0)
+    MII = II_setByPragma;
+  else
+    MII = std::max(ResMII, RecMII);
+}
+
+void SwingSchedulerDAG::setMAX_II() {
+  if (II_setByPragma > 0)
+    MAX_II = II_setByPragma;
+  else
+    MAX_II = MII + 10;
+}
+
 /// We override the schedule function in ScheduleDAGInstrs to implement the
 /// scheduling part of the Swing Modulo Scheduling algorithm.
 void SwingSchedulerDAG::schedule() {
@@ -335,17 +434,28 @@ void SwingSchedulerDAG::schedule() {
   if (SwpIgnoreRecMII)
     RecMII = 0;
 
-  MII = std::max(ResMII, RecMII);
-  LLVM_DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII
-                    << ", res=" << ResMII << ")\n");
+  setMII(ResMII, RecMII);
+  setMAX_II();
+
+  LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II
+                    << " (rec=" << RecMII << ", res=" << ResMII << ")\n");
 
   // Can't schedule a loop without a valid MII.
-  if (MII == 0)
+  if (MII == 0) {
+    LLVM_DEBUG(
+        dbgs()
+        << "0 is not a valid Minimal Initiation Interval, can NOT schedule\n");
+    NumFailZeroMII++;
     return;
+  }
 
   // Don't pipeline large loops.
-  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii)
+  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) {
+    LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
+                      << ", we don't pipleline large loops\n");
+    NumFailLargeMaxMII++;
     return;
+  }
 
   computeNodeFunctions(NodeSets);
 
@@ -362,7 +472,7 @@ void SwingSchedulerDAG::schedule() {
     }
   });
 
-  std::stable_sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>());
+  llvm::stable_sort(NodeSets, std::greater<NodeSet>());
 
   groupRemainingNodes(NodeSets);
 
@@ -383,17 +493,27 @@ void SwingSchedulerDAG::schedule() {
   SMSchedule Schedule(Pass.MF);
   Scheduled = schedulePipeline(Schedule);
 
-  if (!Scheduled)
+  if (!Scheduled){
+    LLVM_DEBUG(dbgs() << "No schedule found, return\n");
+    NumFailNoSchedule++;
     return;
+  }
 
   unsigned numStages = Schedule.getMaxStageCount();
   // No need to generate pipeline if there are no overlapped iterations.
-  if (numStages == 0)
+  if (numStages == 0) {
+    LLVM_DEBUG(
+        dbgs() << "No overlapped iterations, no need to generate pipeline\n");
+    NumFailZeroStage++;
     return;
-
+  }
   // Check that the maximum stage count is less than user-defined limit.
-  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages)
+  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages) {
+    LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages
+                      << " : too many stages, abort\n");
+    NumFailLargeMaxStage++;
     return;
+  }
 
   generatePipelinedLoop(Schedule);
   ++NumPipelined;
@@ -467,7 +587,8 @@ static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
 /// Return true if the instruction causes a chain between memory
 /// references before and after it.
 static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) {
-  return MI.isCall() || MI.hasUnmodeledSideEffects() ||
+  return MI.isCall() || MI.mayRaiseFPException() ||
+         MI.hasUnmodeledSideEffects() ||
          (MI.hasOrderedMemoryRef() &&
           (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
 }
@@ -475,16 +596,16 @@ static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) {
 /// Return the underlying objects for the memory references of an instruction.
 /// This function calls the code in ValueTracking, but first checks that the
 /// instruction has a memory operand.
-static void getUnderlyingObjects(MachineInstr *MI,
-                                 SmallVectorImpl<Value *> &Objs,
+static void getUnderlyingObjects(const MachineInstr *MI,
+                                 SmallVectorImpl<const Value *> &Objs,
                                  const DataLayout &DL) {
   if (!MI->hasOneMemOperand())
     return;
   MachineMemOperand *MM = *MI->memoperands_begin();
   if (!MM->getValue())
     return;
-  GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL);
-  for (Value *V : Objs) {
+  GetUnderlyingObjects(MM->getValue(), Objs, DL);
+  for (const Value *V : Objs) {
     if (!isIdentifiedObject(V)) {
       Objs.clear();
       return;
@@ -498,7 +619,7 @@ static void getUnderlyingObjects(MachineInstr *MI,
 /// dependence. This code is very similar to the code in ScheduleDAGInstrs
 /// but that code doesn't create loop carried dependences.
 void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
-  MapVector<Value *, SmallVector<SUnit *, 4>> PendingLoads;
+  MapVector<const Value *, SmallVector<SUnit *, 4>> PendingLoads;
   Value *UnknownValue =
     UndefValue::get(Type::getVoidTy(MF.getFunction().getContext()));
   for (auto &SU : SUnits) {
@@ -506,7 +627,7 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
     if (isDependenceBarrier(MI, AA))
       PendingLoads.clear();
     else if (MI.mayLoad()) {
-      SmallVector<Value *, 4> Objs;
+      SmallVector<const Value *, 4> Objs;
       getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
       if (Objs.empty())
         Objs.push_back(UnknownValue);
@@ -515,12 +636,12 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
         SUs.push_back(&SU);
       }
     } else if (MI.mayStore()) {
-      SmallVector<Value *, 4> Objs;
+      SmallVector<const Value *, 4> Objs;
       getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
       if (Objs.empty())
         Objs.push_back(UnknownValue);
       for (auto V : Objs) {
-        MapVector<Value *, SmallVector<SUnit *, 4>>::iterator I =
+        MapVector<const Value *, SmallVector<SUnit *, 4>>::iterator I =
             PendingLoads.find(V);
         if (I == PendingLoads.end())
           continue;
@@ -531,7 +652,7 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
           // First, perform the cheaper check that compares the base register.
           // If they are the same and the load offset is less than the store
           // offset, then mark the dependence as loop carried potentially.
-          MachineOperand *BaseOp1, *BaseOp2;
+          const MachineOperand *BaseOp1, *BaseOp2;
           int64_t Offset1, Offset2;
           if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) &&
               TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) {
@@ -744,27 +865,55 @@ namespace {
 // the number of functional unit choices.
 struct FuncUnitSorter {
   const InstrItineraryData *InstrItins;
+  const MCSubtargetInfo *STI;
   DenseMap<unsigned, unsigned> Resources;
 
-  FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
+  FuncUnitSorter(const TargetSubtargetInfo &TSI)
+      : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
 
   // Compute the number of functional unit alternatives needed
   // at each stage, and take the minimum value. We prioritize the
   // instructions by the least number of choices first.
   unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
-    unsigned schedClass = Inst->getDesc().getSchedClass();
+    unsigned SchedClass = Inst->getDesc().getSchedClass();
     unsigned min = UINT_MAX;
-    for (const InstrStage *IS = InstrItins->beginStage(schedClass),
-                          *IE = InstrItins->endStage(schedClass);
-         IS != IE; ++IS) {
-      unsigned funcUnits = IS->getUnits();
-      unsigned numAlternatives = countPopulation(funcUnits);
-      if (numAlternatives < min) {
-        min = numAlternatives;
-        F = funcUnits;
+    if (InstrItins && !InstrItins->isEmpty()) {
+      for (const InstrStage &IS :
+           make_range(InstrItins->beginStage(SchedClass),
+                      InstrItins->endStage(SchedClass))) {
+        unsigned funcUnits = IS.getUnits();
+        unsigned numAlternatives = countPopulation(funcUnits);
+        if (numAlternatives < min) {
+          min = numAlternatives;
+          F = funcUnits;
+        }
       }
+      return min;
+    }
+    if (STI && STI->getSchedModel().hasInstrSchedModel()) {
+      const MCSchedClassDesc *SCDesc =
+          STI->getSchedModel().getSchedClassDesc(SchedClass);
+      if (!SCDesc->isValid())
+        // No valid Schedule Class Desc for schedClass, should be
+        // Pseudo/PostRAPseudo
+        return min;
+
+      for (const MCWriteProcResEntry &PRE :
+           make_range(STI->getWriteProcResBegin(SCDesc),
+                      STI->getWriteProcResEnd(SCDesc))) {
+        if (!PRE.Cycles)
+          continue;
+        const MCProcResourceDesc *ProcResource =
+            STI->getSchedModel().getProcResource(PRE.ProcResourceIdx);
+        unsigned NumUnits = ProcResource->NumUnits;
+        if (NumUnits < min) {
+          min = NumUnits;
+          F = PRE.ProcResourceIdx;
+        }
+      }
+      return min;
     }
-    return min;
+    llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
   }
 
   // Compute the critical resources needed by the instruction. This
@@ -774,13 +923,34 @@ struct FuncUnitSorter {
   // the same, highly used, functional unit have high priority.
   void calcCriticalResources(MachineInstr &MI) {
     unsigned SchedClass = MI.getDesc().getSchedClass();
-    for (const InstrStage *IS = InstrItins->beginStage(SchedClass),
-                          *IE = InstrItins->endStage(SchedClass);
-         IS != IE; ++IS) {
-      unsigned FuncUnits = IS->getUnits();
-      if (countPopulation(FuncUnits) == 1)
-        Resources[FuncUnits]++;
+    if (InstrItins && !InstrItins->isEmpty()) {
+      for (const InstrStage &IS :
+           make_range(InstrItins->beginStage(SchedClass),
+                      InstrItins->endStage(SchedClass))) {
+        unsigned FuncUnits = IS.getUnits();
+        if (countPopulation(FuncUnits) == 1)
+          Resources[FuncUnits]++;
+      }
+      return;
+    }
+    if (STI && STI->getSchedModel().hasInstrSchedModel()) {
+      const MCSchedClassDesc *SCDesc =
+          STI->getSchedModel().getSchedClassDesc(SchedClass);
+      if (!SCDesc->isValid())
+        // No valid Schedule Class Desc for schedClass, should be
+        // Pseudo/PostRAPseudo
+        return;
+
+      for (const MCWriteProcResEntry &PRE :
+           make_range(STI->getWriteProcResBegin(SCDesc),
+                      STI->getWriteProcResEnd(SCDesc))) {
+        if (!PRE.Cycles)
+          continue;
+        Resources[PRE.ProcResourceIdx]++;
+      }
+      return;
     }
+    llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
   }
 
   /// Return true if IS1 has less priority than IS2.
@@ -803,14 +973,15 @@ struct FuncUnitSorter {
 /// to add it to each existing DFA, until a legal space is found. If the
 /// instruction cannot be reserved in an existing DFA, we create a new one.
 unsigned SwingSchedulerDAG::calculateResMII() {
-  SmallVector<DFAPacketizer *, 8> Resources;
+
+  LLVM_DEBUG(dbgs() << "calculateResMII:\n");
+  SmallVector<ResourceManager*, 8> Resources;
   MachineBasicBlock *MBB = Loop.getHeader();
-  Resources.push_back(TII->CreateTargetScheduleState(MF.getSubtarget()));
+  Resources.push_back(new ResourceManager(&MF.getSubtarget()));
 
   // Sort the instructions by the number of available choices for scheduling,
   // least to most. Use the number of critical resources as the tie breaker.
-  FuncUnitSorter FUS =
-      FuncUnitSorter(MF.getSubtarget().getInstrItineraryData());
+  FuncUnitSorter FUS = FuncUnitSorter(MF.getSubtarget());
   for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(),
                                    E = MBB->getFirstTerminator();
        I != E; ++I)
@@ -832,33 +1003,40 @@ unsigned SwingSchedulerDAG::calculateResMII() {
     // DFA is needed for each cycle.
     unsigned NumCycles = getSUnit(MI)->Latency;
     unsigned ReservedCycles = 0;
-    SmallVectorImpl<DFAPacketizer *>::iterator RI = Resources.begin();
-    SmallVectorImpl<DFAPacketizer *>::iterator RE = Resources.end();
+    SmallVectorImpl<ResourceManager *>::iterator RI = Resources.begin();
+    SmallVectorImpl<ResourceManager *>::iterator RE = Resources.end();
+    LLVM_DEBUG({
+      dbgs() << "Trying to reserve resource for " << NumCycles
+             << " cycles for \n";
+      MI->dump();
+    });
     for (unsigned C = 0; C < NumCycles; ++C)
       while (RI != RE) {
-        if ((*RI++)->canReserveResources(*MI)) {
+        if ((*RI)->canReserveResources(*MI)) {
+          (*RI)->reserveResources(*MI);
           ++ReservedCycles;
           break;
         }
+        RI++;
       }
-    // Start reserving resources using existing DFAs.
-    for (unsigned C = 0; C < ReservedCycles; ++C) {
-      --RI;
-      (*RI)->reserveResources(*MI);
-    }
+    LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
+                      << ", NumCycles:" << NumCycles << "\n");
     // Add new DFAs, if needed, to reserve resources.
     for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
-      DFAPacketizer *NewResource =
-          TII->CreateTargetScheduleState(MF.getSubtarget());
+      LLVM_DEBUG(if (SwpDebugResource) dbgs()
+                 << "NewResource created to reserve resources"
+                 << "\n");
+      ResourceManager *NewResource = new ResourceManager(&MF.getSubtarget());
       assert(NewResource->canReserveResources(*MI) && "Reserve error.");
       NewResource->reserveResources(*MI);
       Resources.push_back(NewResource);
     }
   }
   int Resmii = Resources.size();
+  LLVM_DEBUG(dbgs() << "Retrun Res MII:" << Resmii << "\n");
   // Delete the memory for each of the DFAs that were created earlier.
-  for (DFAPacketizer *RI : Resources) {
-    DFAPacketizer *D = RI;
+  for (ResourceManager *RI : Resources) {
+    ResourceManager *D = RI;
     delete D;
   }
   Resources.clear();
@@ -1517,7 +1695,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
   }
 }
 
-/// Add the node to the set, and add all is its connected nodes to the set.
+/// Add the node to the set, and add all of its connected nodes to the set.
 void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
                                           SetVector<SUnit *> &NodesAdded) {
   NewSet.insert(SU);
@@ -1741,12 +1919,16 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
 /// Process the nodes in the computed order and create the pipelined schedule
 /// of the instructions, if possible. Return true if a schedule is found.
 bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
-  if (NodeOrder.empty())
+
+  if (NodeOrder.empty()){
+    LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );
     return false;
+  }
 
   bool scheduleFound = false;
+  unsigned II = 0;
   // Keep increasing II until a valid schedule is found.
-  for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) {
+  for (II = MII; II <= MAX_II && !scheduleFound; ++II) {
     Schedule.reset();
     Schedule.setInitiationInterval(II);
     LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
@@ -1767,13 +1949,14 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
       Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
                             II, this);
       LLVM_DEBUG({
+        dbgs() << "\n";
         dbgs() << "Inst (" << SU->NodeNum << ") ";
         SU->getInstr()->dump();
         dbgs() << "\n";
       });
       LLVM_DEBUG({
-        dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart
-               << " me: " << SchedEnd << " ms: " << SchedStart << "\n";
+        dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
+                         LateStart, SchedEnd, SchedStart);
       });
 
       if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
@@ -1818,7 +2001,8 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
       scheduleFound = Schedule.isValidSchedule(this);
   }
 
-  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n");
+  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << " (II=" << II
+                    << ")\n");
 
   if (scheduleFound)
     Schedule.finalizeSchedule(this);
@@ -1847,6 +2031,10 @@ void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
   InstrMapTy InstrMap;
 
   SmallVector<MachineBasicBlock *, 4> PrologBBs;
+
+  MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
+  assert(PreheaderBB != nullptr &&
+         "Need to add code to handle loops w/o preheader");
   // Generate the prolog instructions that set up the pipeline.
   generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
   MF.insert(BB->getIterator(), KernelBB);
@@ -1903,7 +2091,7 @@ void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
   removeDeadInstructions(KernelBB, EpilogBBs);
 
   // Add branches between prolog and epilog blocks.
-  addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
+  addBranches(*PreheaderBB, PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
 
   // Remove the original loop since it's no longer referenced.
   for (auto &I : *BB)
@@ -2242,7 +2430,7 @@ void SwingSchedulerDAG::generateExistingPhis(
         // Use the value defined by the Phi, unless we're generating the first
         // epilog and the Phi refers to a Phi in a different stage.
         else if (VRMap[PrevStage - np].count(Def) &&
-                 (!LoopDefIsPhi || PrevStage != LastStageNum))
+                 (!LoopDefIsPhi || (PrevStage != LastStageNum) || (LoopValStage == StageScheduled)))
           PhiOp2 = VRMap[PrevStage - np][Def];
       }
 
@@ -2588,7 +2776,8 @@ static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
 /// Create branches from each prolog basic block to the appropriate epilog
 /// block.  These edges are needed if the loop ends before reaching the
 /// kernel.
-void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
+void SwingSchedulerDAG::addBranches(MachineBasicBlock &PreheaderBB,
+                                    MBBVectorTy &PrologBBs,
                                     MachineBasicBlock *KernelBB,
                                     MBBVectorTy &EpilogBBs,
                                     SMSchedule &Schedule, ValueMapTy *VRMap) {
@@ -2615,8 +2804,8 @@ void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
     // Check if the LOOP0 has already been removed. If so, then there is no need
     // to reduce the trip count.
     if (LC != 0)
-      LC = TII->reduceLoopCount(*Prolog, IndVar, *Cmp, Cond, PrevInsts, j,
-                                MaxIter);
+      LC = TII->reduceLoopCount(*Prolog, PreheaderBB, IndVar, *Cmp, Cond,
+                                PrevInsts, j, MaxIter);
 
     // Record the value of the first trip count, which is used to determine if
     // branches and blocks can be removed for constant trip counts.
@@ -2657,7 +2846,7 @@ void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
 /// during each iteration. Set Delta to the amount of the change.
 bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  MachineOperand *BaseOp;
+  const MachineOperand *BaseOp;
   int64_t Offset;
   if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
     return false;
@@ -2698,7 +2887,9 @@ void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
     return;
   SmallVector<MachineMemOperand *, 2> NewMMOs;
   for (MachineMemOperand *MMO : NewMI.memoperands()) {
-    if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) ||
+    // TODO: Figure out whether isAtomic is really necessary (see D57601).
+    if (MMO->isVolatile() || MMO->isAtomic() ||
+        (MMO->isInvariant() && MMO->isDereferenceable()) ||
         (!MMO->getValue())) {
       NewMMOs.push_back(MMO);
       continue;
@@ -3058,6 +3249,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
 
   // Assume ordered loads and stores may have a loop carried dependence.
   if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
+      SI->mayRaiseFPException() || DI->mayRaiseFPException() ||
       SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
     return true;
 
@@ -3069,7 +3261,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
   if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
     return true;
 
-  MachineOperand *BaseOpS, *BaseOpD;
+  const MachineOperand *BaseOpS, *BaseOpD;
   int64_t OffsetS, OffsetD;
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) ||
@@ -3097,12 +3289,14 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
 
   // This is the main test, which checks the offset values and the loop
   // increment value to determine if the accesses may be loop carried.
-  if (OffsetS >= OffsetD)
-    return OffsetS + AccessSizeS > DeltaS;
-  else
-    return OffsetD + AccessSizeD > DeltaD;
+  if (AccessSizeS == MemoryLocation::UnknownSize ||
+      AccessSizeD == MemoryLocation::UnknownSize)
+    return true;
 
-  return true;
+  if (DeltaS != DeltaD || DeltaS < AccessSizeS || DeltaD < AccessSizeD)
+    return true;
+
+  return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD);
 }
 
 void SwingSchedulerDAG::postprocessDAG() {
@@ -3117,6 +3311,10 @@ void SwingSchedulerDAG::postprocessDAG() {
 /// the relative values of StartCycle and EndCycle.
 bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
   bool forward = true;
+  LLVM_DEBUG({
+    dbgs() << "Trying to insert node between " << StartCycle << " and "
+           << EndCycle << " II: " << II << "\n";
+  });
   if (StartCycle > EndCycle)
     forward = false;
 
@@ -3125,8 +3323,9 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
   for (int curCycle = StartCycle; curCycle != termCycle;
        forward ? ++curCycle : --curCycle) {
 
-    // Add the already scheduled instructions at the specified cycle to the DFA.
-    Resources->clearResources();
+    // Add the already scheduled instructions at the specified cycle to the
+    // DFA.
+    ProcItinResources.clearResources();
     for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
          checkCycle <= LastCycle; checkCycle += II) {
       std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
@@ -3136,13 +3335,13 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
            I != E; ++I) {
         if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
           continue;
-        assert(Resources->canReserveResources(*(*I)->getInstr()) &&
+        assert(ProcItinResources.canReserveResources(*(*I)->getInstr()) &&
                "These instructions have already been scheduled.");
-        Resources->reserveResources(*(*I)->getInstr());
+        ProcItinResources.reserveResources(*(*I)->getInstr());
       }
     }
     if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
-        Resources->canReserveResources(*SU->getInstr())) {
+        ProcItinResources.canReserveResources(*SU->getInstr())) {
       LLVM_DEBUG({
         dbgs() << "\tinsert at cycle " << curCycle << " ";
         SU->getInstr()->dump();
@@ -3360,6 +3559,14 @@ void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
         if (Pos < MoveUse)
           MoveUse = Pos;
       }
+      // We did not handle HW dependences in previous for loop,
+      // and we normally set Latency = 0 for Anti deps,
+      // so may have nodes in same cycle with Anti denpendent on HW regs.
+      else if (S.getKind() == SDep::Anti && stageScheduled(*I) == StageInst1) {
+        OrderBeforeUse = true;
+        if ((MoveUse == 0) || (Pos < MoveUse))
+          MoveUse = Pos;
+      }
     }
     for (auto &P : SU->Preds) {
       if (P.getSUnit() != *I)
@@ -3523,9 +3730,8 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
 
     for (SDep &PredEdge : SU->Preds) {
       SUnit *PredSU = PredEdge.getSUnit();
-      unsigned PredIndex =
-          std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
-                                        std::make_pair(PredSU, 0), CompareKey));
+      unsigned PredIndex = std::get<1>(
+          *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey));
       if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
         PredBefore = true;
         Pred = PredSU;
@@ -3535,9 +3741,13 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
 
     for (SDep &SuccEdge : SU->Succs) {
       SUnit *SuccSU = SuccEdge.getSUnit();
-      unsigned SuccIndex =
-          std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
-                                        std::make_pair(SuccSU, 0), CompareKey));
+      // Do not process a boundary node, it was not included in NodeOrder,
+      // hence not in Indices either, call to std::lower_bound() below will
+      // return Indices.end().
+      if (SuccSU->isBoundaryNode())
+        continue;
+      unsigned SuccIndex = std::get<1>(
+          *llvm::lower_bound(Indices, std::make_pair(SuccSU, 0), CompareKey));
       if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
         SuccBefore = true;
         Succ = SuccSU;
@@ -3548,9 +3758,8 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
     if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
       // instructions in circuits are allowed to be scheduled
       // after both a successor and predecessor.
-      bool InCircuit = std::any_of(
-          Circuits.begin(), Circuits.end(),
-          [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
+      bool InCircuit = llvm::any_of(
+          Circuits, [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
       if (InCircuit)
         LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
       else {
@@ -3740,5 +3949,140 @@ LLVM_DUMP_METHOD void NodeSet::dump() const { print(dbgs()); }
 
 #endif
 
+void ResourceManager::initProcResourceVectors(
+    const MCSchedModel &SM, SmallVectorImpl<uint64_t> &Masks) {
+  unsigned ProcResourceID = 0;
+
+  // We currently limit the resource kinds to 64 and below so that we can use
+  // uint64_t for Masks
+  assert(SM.getNumProcResourceKinds() < 64 &&
+         "Too many kinds of resources, unsupported");
+  // Create a unique bitmask for every processor resource unit.
+  // Skip resource at index 0, since it always references 'InvalidUnit'.
+  Masks.resize(SM.getNumProcResourceKinds());
+  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+    if (Desc.SubUnitsIdxBegin)
+      continue;
+    Masks[I] = 1ULL << ProcResourceID;
+    ProcResourceID++;
+  }
+  // Create a unique bitmask for every processor resource group.
+  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+    if (!Desc.SubUnitsIdxBegin)
+      continue;
+    Masks[I] = 1ULL << ProcResourceID;
+    for (unsigned U = 0; U < Desc.NumUnits; ++U)
+      Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]];
+    ProcResourceID++;
+  }
+  LLVM_DEBUG({
+    if (SwpShowResMask) {
+      dbgs() << "ProcResourceDesc:\n";
+      for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+        const MCProcResourceDesc *ProcResource = SM.getProcResource(I);
+        dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
+                         ProcResource->Name, I, Masks[I],
+                         ProcResource->NumUnits);
+      }
+      dbgs() << " -----------------\n";
+    }
+  });
+}
+
+bool ResourceManager::canReserveResources(const MCInstrDesc *MID) const {
+
+  LLVM_DEBUG({
+    if (SwpDebugResource)
+      dbgs() << "canReserveResources:\n";
+  });
+  if (UseDFA)
+    return DFAResources->canReserveResources(MID);
+
+  unsigned InsnClass = MID->getSchedClass();
+  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
+  if (!SCDesc->isValid()) {
+    LLVM_DEBUG({
+      dbgs() << "No valid Schedule Class Desc for schedClass!\n";
+      dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
+    });
+    return true;
+  }
+
+  const MCWriteProcResEntry *I = STI->getWriteProcResBegin(SCDesc);
+  const MCWriteProcResEntry *E = STI->getWriteProcResEnd(SCDesc);
+  for (; I != E; ++I) {
+    if (!I->Cycles)
+      continue;
+    const MCProcResourceDesc *ProcResource =
+        SM.getProcResource(I->ProcResourceIdx);
+    unsigned NumUnits = ProcResource->NumUnits;
+    LLVM_DEBUG({
+      if (SwpDebugResource)
+        dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
+                         ProcResource->Name, I->ProcResourceIdx,
+                         ProcResourceCount[I->ProcResourceIdx], NumUnits,
+                         I->Cycles);
+    });
+    if (ProcResourceCount[I->ProcResourceIdx] >= NumUnits)
+      return false;
+  }
+  LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return true\n\n";);
+  return true;
+}
+
+void ResourceManager::reserveResources(const MCInstrDesc *MID) {
+  LLVM_DEBUG({
+    if (SwpDebugResource)
+      dbgs() << "reserveResources:\n";
+  });
+  if (UseDFA)
+    return DFAResources->reserveResources(MID);
 
+  unsigned InsnClass = MID->getSchedClass();
+  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
+  if (!SCDesc->isValid()) {
+    LLVM_DEBUG({
+      dbgs() << "No valid Schedule Class Desc for schedClass!\n";
+      dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
+    });
+    return;
+  }
+  for (const MCWriteProcResEntry &PRE :
+       make_range(STI->getWriteProcResBegin(SCDesc),
+                  STI->getWriteProcResEnd(SCDesc))) {
+    if (!PRE.Cycles)
+      continue;
+    ++ProcResourceCount[PRE.ProcResourceIdx];
+    LLVM_DEBUG({
+      if (SwpDebugResource) {
+        const MCProcResourceDesc *ProcResource =
+            SM.getProcResource(PRE.ProcResourceIdx);
+        dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
+                         ProcResource->Name, PRE.ProcResourceIdx,
+                         ProcResourceCount[PRE.ProcResourceIdx],
+                         ProcResource->NumUnits, PRE.Cycles);
+      }
+    });
+  }
+  LLVM_DEBUG({
+    if (SwpDebugResource)
+      dbgs() << "reserveResources: done!\n\n";
+  });
+}
+
+bool ResourceManager::canReserveResources(const MachineInstr &MI) const {
+  return canReserveResources(&MI.getDesc());
+}
+
+void ResourceManager::reserveResources(const MachineInstr &MI) {
+  return reserveResources(&MI.getDesc());
+}
+
+void ResourceManager::clearResources() {
+  if (UseDFA)
+    return DFAResources->clearResources();
+  std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0);
+}
 
diff --git a/lib/CodeGen/MachinePostDominators.cpp b/lib/CodeGen/MachinePostDominators.cpp
index 488377998cb3..7f220ed1fd8f 100644
--- a/lib/CodeGen/MachinePostDominators.cpp
+++ b/lib/CodeGen/MachinePostDominators.cpp
@@ -1,9 +1,8 @@
 //===- MachinePostDominators.cpp -Machine Post Dominator Calculation ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/MachineRegionInfo.cpp b/lib/CodeGen/MachineRegionInfo.cpp
index 2619d8f78276..2961d456be0d 100644
--- a/lib/CodeGen/MachineRegionInfo.cpp
+++ b/lib/CodeGen/MachineRegionInfo.cpp
@@ -1,9 +1,8 @@
 //===- lib/Codegen/MachineRegionInfo.cpp ----------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 6e5ca45d5e5e..f0fd0405d69d 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===- lib/Codegen/MachineRegisterInfo.cpp --------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -155,7 +154,7 @@ unsigned MachineRegisterInfo::createIncompleteVirtualRegister(StringRef Name) {
 /// createVirtualRegister - Create and return a new virtual register in the
 /// function with the specified register class.
 ///
-unsigned
+Register
 MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
                                            StringRef Name) {
   assert(RegClass && "Cannot create register without RegClass!");
@@ -170,7 +169,7 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
   return Reg;
 }
 
-unsigned MachineRegisterInfo::cloneVirtualRegister(unsigned VReg,
+Register MachineRegisterInfo::cloneVirtualRegister(Register VReg,
                                                    StringRef Name) {
   unsigned Reg = createIncompleteVirtualRegister(Name);
   VRegInfo[Reg].first = VRegInfo[VReg].first;
@@ -185,7 +184,7 @@ void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) {
   VRegToType[VReg] = Ty;
 }
 
-unsigned
+Register
 MachineRegisterInfo::createGenericVirtualRegister(LLT Ty, StringRef Name) {
   // New virtual register number.
   unsigned Reg = createIncompleteVirtualRegister(Name);
@@ -424,6 +423,13 @@ bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
   return ++UI == use_nodbg_end();
 }
 
+bool MachineRegisterInfo::hasOneNonDBGUser(unsigned RegNo) const {
+  use_instr_nodbg_iterator UI = use_instr_nodbg_begin(RegNo);
+  if (UI == use_instr_nodbg_end())
+    return false;
+  return ++UI == use_instr_nodbg_end();
+}
+
 /// clearKillFlags - Iterate over all the uses of the given register and
 /// clear the kill flag from the MachineOperand. This function is used by
 /// optimization passes which extend register lifetimes and need only
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 542491eabbf2..e8b42047b49f 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -1,9 +1,8 @@
 //===- MachineSSAUpdater.cpp - Unstructured SSA Update Tool ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 90dad9d399fe..ae1170ad1be6 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -1,9 +1,8 @@
 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -487,13 +486,17 @@ getSchedRegions(MachineBasicBlock *MBB,
       MachineInstr &MI = *std::prev(I);
       if (isSchedBoundary(&MI, &*MBB, MF, TII))
         break;
-      if (!MI.isDebugInstr())
+      if (!MI.isDebugInstr()) {
         // MBB::size() uses instr_iterator to count. Here we need a bundle to
         // count as a single instruction.
         ++NumRegionInstrs;
+      }
     }
 
-    Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
+    // It's possible we found a scheduling region that only has debug
+    // instructions. Don't bother scheduling these.
+    if (NumRegionInstrs != 0)
+      Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
   }
 
   if (RegionsTopDown)
@@ -605,23 +608,6 @@ LLVM_DUMP_METHOD void ReadyQueue::dump() const {
 // Provide a vtable anchor.
 ScheduleDAGMI::~ScheduleDAGMI() = default;
 
-bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
-  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
-}
-
-bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
-  if (SuccSU != &ExitSU) {
-    // Do not use WillCreateCycle, it assumes SD scheduling.
-    // If Pred is reachable from Succ, then the edge creates a cycle.
-    if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
-      return false;
-    Topo.AddPred(SuccSU, PredDep.getSUnit());
-  }
-  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
-  // Return true regardless of whether a new edge needed to be inserted.
-  return true;
-}
-
 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
 /// NumPredsLeft reaches zero, release the successor node.
 ///
@@ -762,8 +748,6 @@ void ScheduleDAGMI::schedule() {
   // Build the DAG.
   buildSchedGraph(AA);
 
-  Topo.InitDAGTopologicalSorting();
-
   postprocessDAG();
 
   SmallVector<SUnit*, 8> TopRoots, BotRoots;
@@ -1212,8 +1196,6 @@ void ScheduleDAGMILive::schedule() {
   LLVM_DEBUG(SchedImpl->dumpPolicy());
   buildDAGWithRegPressure();
 
-  Topo.InitDAGTopologicalSorting();
-
   postprocessDAG();
 
   SmallVector<SUnit*, 8> TopRoots, BotRoots;
@@ -1484,10 +1466,10 @@ namespace {
 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
   struct MemOpInfo {
     SUnit *SU;
-    MachineOperand *BaseOp;
+    const MachineOperand *BaseOp;
     int64_t Offset;
 
-    MemOpInfo(SUnit *su, MachineOperand *Op, int64_t ofs)
+    MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs)
         : SU(su), BaseOp(Op), Offset(ofs) {}
 
     bool operator<(const MemOpInfo &RHS) const {
@@ -1533,7 +1515,7 @@ public:
   void apply(ScheduleDAGInstrs *DAGInstrs) override;
 
 protected:
-  void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
+  void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG);
 };
 
 class StoreClusterMutation : public BaseMemOpClusterMutation {
@@ -1570,10 +1552,10 @@ createStoreClusterDAGMutation(const TargetInstrInfo *TII,
 } // end namespace llvm
 
 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
-    ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
+    ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {
   SmallVector<MemOpInfo, 32> MemOpRecords;
   for (SUnit *SU : MemOps) {
-    MachineOperand *BaseOp;
+    const MachineOperand *BaseOp;
     int64_t Offset;
     if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))
       MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));
@@ -1610,9 +1592,7 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
 }
 
 /// Callback from DAG postProcessing to create cluster edges for loads.
-void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
-  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
-
+void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
   // Map DAG NodeNum to store chain ID.
   DenseMap<unsigned, unsigned> StoreChainIDs;
   // Map each store chain to a set of dependent MemOps.
@@ -1857,9 +1837,15 @@ SchedBoundary::~SchedBoundary() { delete HazardRec; }
 
 /// Given a Count of resource usage and a Latency value, return true if a
 /// SchedBoundary becomes resource limited.
+/// If we are checking after scheduling a node, we should return true when
+/// we just reach the resource limit.
 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
-                               unsigned Latency) {
-  return (int)(Count - (Latency * LFactor)) > (int)LFactor;
+                               unsigned Latency, bool AfterSchedNode) {
+  int ResCntFactor = (int)(Count - (Latency * LFactor));
+  if (AfterSchedNode)
+    return ResCntFactor >= (int)LFactor;
+  else
+    return ResCntFactor > (int)LFactor;
 }
 
 void SchedBoundary::reset() {
@@ -1883,6 +1869,7 @@ void SchedBoundary::reset() {
   ZoneCritResIdx = 0;
   IsResourceLimited = false;
   ReservedCycles.clear();
+  ReservedCyclesIndex.clear();
 #ifndef NDEBUG
   // Track the maximum number of stall cycles that could arise either from the
   // latency of a DAG edge or the number of cycles that a processor resource is
@@ -1921,8 +1908,17 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
   SchedModel = smodel;
   Rem = rem;
   if (SchedModel->hasInstrSchedModel()) {
-    ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
-    ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
+    unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
+    ReservedCyclesIndex.resize(ResourceCount);
+    ExecutedResCounts.resize(ResourceCount);
+    unsigned NumUnits = 0;
+
+    for (unsigned i = 0; i < ResourceCount; ++i) {
+      ReservedCyclesIndex[i] = NumUnits;
+      NumUnits += SchedModel->getProcResource(i)->NumUnits;
+    }
+
+    ReservedCycles.resize(NumUnits, InvalidCycle);
   }
 }
 
@@ -1943,11 +1939,11 @@ unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
   return 0;
 }
 
-/// Compute the next cycle at which the given processor resource can be
-/// scheduled.
-unsigned SchedBoundary::
-getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
-  unsigned NextUnreserved = ReservedCycles[PIdx];
+/// Compute the next cycle at which the given processor resource unit
+/// can be scheduled.
+unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
+                                                       unsigned Cycles) {
+  unsigned NextUnreserved = ReservedCycles[InstanceIdx];
   // If this resource has never been used, always return cycle zero.
   if (NextUnreserved == InvalidCycle)
     return 0;
@@ -1957,6 +1953,29 @@ getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
   return NextUnreserved;
 }
 
+/// Compute the next cycle at which the given processor resource can be
+/// scheduled.  Returns the next cycle and the index of the processor resource
+/// instance in the reserved cycles vector.
+std::pair<unsigned, unsigned>
+SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
+  unsigned MinNextUnreserved = InvalidCycle;
+  unsigned InstanceIdx = 0;
+  unsigned StartIndex = ReservedCyclesIndex[PIdx];
+  unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
+  assert(NumberOfInstances > 0 &&
+         "Cannot have zero instances of a ProcResource");
+
+  for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
+       ++I) {
+    unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
+    if (MinNextUnreserved > NextUnreserved) {
+      InstanceIdx = I;
+      MinNextUnreserved = NextUnreserved;
+    }
+  }
+  return std::make_pair(MinNextUnreserved, InstanceIdx);
+}
+
 /// Does this SU have a hazard within the current instruction group.
 ///
 /// The scheduler supports two modes of hazard recognition. The first is the
@@ -1998,14 +2017,16 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
                      SchedModel->getWriteProcResEnd(SC))) {
       unsigned ResIdx = PE.ProcResourceIdx;
       unsigned Cycles = PE.Cycles;
-      unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles);
+      unsigned NRCycle, InstanceIdx;
+      std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles);
       if (NRCycle > CurrCycle) {
 #ifndef NDEBUG
         MaxObservedStall = std::max(Cycles, MaxObservedStall);
 #endif
         LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
-                          << SchedModel->getResourceName(ResIdx) << "="
-                          << NRCycle << "c\n");
+                          << SchedModel->getResourceName(ResIdx)
+                          << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx]  << ']'
+                          << "=" << NRCycle << "c\n");
         return true;
       }
     }
@@ -2119,7 +2140,7 @@ void SchedBoundary::bumpCycle(unsigned NextCycle) {
   CheckPending = true;
   IsResourceLimited =
       checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
-                         getScheduledLatency());
+                         getScheduledLatency(), true);
 
   LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
                     << '\n');
@@ -2160,10 +2181,12 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
                       << "c\n");
   }
   // For reserved resources, record the highest cycle using the resource.
-  unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
+  unsigned NextAvailable, InstanceIdx;
+  std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles);
   if (NextAvailable > CurrCycle) {
     LLVM_DEBUG(dbgs() << "  Resource conflict: "
-                      << SchedModel->getProcResource(PIdx)->Name
+                      << SchedModel->getResourceName(PIdx)
+                      << '[' << InstanceIdx - ReservedCyclesIndex[PIdx]  << ']'
                       << " reserved until @" << NextAvailable << "\n");
   }
   return NextAvailable;
@@ -2179,6 +2202,8 @@ void SchedBoundary::bumpNode(SUnit *SU) {
       HazardRec->Reset();
     }
     HazardRec->EmitInstruction(SU);
+    // Scheduling an instruction may have made pending instructions available.
+    CheckPending = true;
   }
   // checkHazard should prevent scheduling multiple instructions per cycle that
   // exceed the issue width.
@@ -2251,12 +2276,13 @@ void SchedBoundary::bumpNode(SUnit *SU) {
              PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
         unsigned PIdx = PI->ProcResourceIdx;
         if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
+          unsigned ReservedUntil, InstanceIdx;
+          std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0);
           if (isTop()) {
-            ReservedCycles[PIdx] =
-              std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
-          }
-          else
-            ReservedCycles[PIdx] = NextCycle;
+            ReservedCycles[InstanceIdx] =
+                std::max(ReservedUntil, NextCycle + PI->Cycles);
+          } else
+            ReservedCycles[InstanceIdx] = NextCycle;
         }
       }
     }
@@ -2282,7 +2308,7 @@ void SchedBoundary::bumpNode(SUnit *SU) {
     // resource limited. If a stall occurred, bumpCycle does this.
     IsResourceLimited =
         checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
-                           getScheduledLatency());
+                           getScheduledLatency(), true);
 
   // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
   // resets CurrMOps. Loop to handle instructions with more MOps than issue in
@@ -2501,7 +2527,7 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
     RemLatency = computeRemLatency(CurrZone);
     RemLatencyComputed = true;
     OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
-                                         OtherCount, RemLatency);
+                                         OtherCount, RemLatency, false);
   }
 
   // Schedule aggressively for latency in PostRA mode. We don't check for
@@ -2741,8 +2767,10 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
   MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
 
   // After subtarget overrides, apply command line options.
-  if (!EnableRegPressure)
+  if (!EnableRegPressure) {
     RegionPolicy.ShouldTrackPressure = false;
+    RegionPolicy.ShouldTrackLaneMasks = false;
+  }
 
   // Check -misched-topdown/bottomup can force or unforce scheduling direction.
   // e.g. -misched-bottomup=false allows scheduling in both directions.
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index cdc597db6401..41db2c88ce50 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -1,9 +1,8 @@
 //===- MachineSink.cpp - Sinking for machine instructions -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -585,9 +584,8 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
       AllSuccs.push_back(DTChild->getBlock());
 
   // Sort Successors according to their loop depth or block frequency info.
-  std::stable_sort(
-      AllSuccs.begin(), AllSuccs.end(),
-      [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
+  llvm::stable_sort(
+      AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
         uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
         uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
         bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
@@ -716,7 +714,7 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
       !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit))
     return false;
 
-  MachineOperand *BaseOp;
+  const MachineOperand *BaseOp;
   int64_t Offset;
   if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
     return false;
@@ -1203,6 +1201,9 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
 }
 
 bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction()))
+    return false;
+
   bool Changed = false;
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index e62ed3094651..f9505df4e7f4 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -1,9 +1,8 @@
 //===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 534d3699db29..0ad792ac62cf 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -1,9 +1,8 @@
 //===- MachineVerifier.cpp - Machine Code Verifier ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -219,7 +218,7 @@ namespace {
 
     bool isAllocatable(unsigned Reg) const {
       return Reg < TRI->getNumRegs() && TRI->isInAllocatableClass(Reg) &&
-        !regsReserved.test(Reg);
+             !regsReserved.test(Reg);
     }
 
     // Analysis information if available
@@ -231,6 +230,9 @@ namespace {
     void visitMachineFunctionBefore();
     void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
     void visitMachineBundleBefore(const MachineInstr *MI);
+
+    bool verifyVectorElementMatch(LLT Ty0, LLT Ty1, const MachineInstr *MI);
+    void verifyPreISelGenericInstruction(const MachineInstr *MI);
     void visitMachineInstrBefore(const MachineInstr *MI);
     void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
     void visitMachineInstrAfter(const MachineInstr *MI);
@@ -838,7 +840,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
   if (MI->isTerminator() && !TII->isPredicated(*MI)) {
     if (!FirstTerminator)
       FirstTerminator = MI;
-  } else if (FirstTerminator) {
+  } else if (FirstTerminator && !MI->isDebugEntryValue()) {
     report("Non-terminator instruction after the first terminator", MI);
     errs() << "First terminator was:\t" << *FirstTerminator;
   }
@@ -889,109 +891,150 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {
   }
 }
 
-void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (MI->getNumOperands() < MCID.getNumOperands()) {
-    report("Too few operands", MI);
-    errs() << MCID.getNumOperands() << " operands expected, but "
-           << MI->getNumOperands() << " given.\n";
+/// Check that types are consistent when two operands need to have the same
+/// number of vector elements.
+/// \return true if the types are valid.
+bool MachineVerifier::verifyVectorElementMatch(LLT Ty0, LLT Ty1,
+                                               const MachineInstr *MI) {
+  if (Ty0.isVector() != Ty1.isVector()) {
+    report("operand types must be all-vector or all-scalar", MI);
+    // Generally we try to report as many issues as possible at once, but in
+    // this case it's not clear what should we be comparing the size of the
+    // scalar with: the size of the whole vector or its lane. Instead of
+    // making an arbitrary choice and emitting not so helpful message, let's
+    // avoid the extra noise and stop here.
+    return false;
   }
 
-  if (MI->isPHI()) {
-    if (MF->getProperties().hasProperty(
-            MachineFunctionProperties::Property::NoPHIs))
-      report("Found PHI instruction with NoPHIs property set", MI);
+  if (Ty0.isVector() && Ty0.getNumElements() != Ty1.getNumElements()) {
+    report("operand types must preserve number of vector elements", MI);
+    return false;
+  }
 
-    if (FirstNonPHI)
-      report("Found PHI instruction after non-PHI", MI);
-  } else if (FirstNonPHI == nullptr)
-    FirstNonPHI = MI;
+  return true;
+}
 
-  // Check the tied operands.
-  if (MI->isInlineAsm())
-    verifyInlineAsm(MI);
+void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
+  if (isFunctionSelected)
+    report("Unexpected generic instruction in a Selected function", MI);
 
-  // Check the MachineMemOperands for basic consistency.
-  for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
-                                  E = MI->memoperands_end();
+  const MCInstrDesc &MCID = MI->getDesc();
+  unsigned NumOps = MI->getNumOperands();
+
+  // Check types.
+  SmallVector<LLT, 4> Types;
+  for (unsigned I = 0, E = std::min(MCID.getNumOperands(), NumOps);
        I != E; ++I) {
-    if ((*I)->isLoad() && !MI->mayLoad())
-      report("Missing mayLoad flag", MI);
-    if ((*I)->isStore() && !MI->mayStore())
-      report("Missing mayStore flag", MI);
-  }
+    if (!MCID.OpInfo[I].isGenericType())
+      continue;
+    // Generic instructions specify type equality constraints between some of
+    // their operands. Make sure these are consistent.
+    size_t TypeIdx = MCID.OpInfo[I].getGenericTypeIndex();
+    Types.resize(std::max(TypeIdx + 1, Types.size()));
+
+    const MachineOperand *MO = &MI->getOperand(I);
+    if (!MO->isReg()) {
+      report("generic instruction must use register operands", MI);
+      continue;
+    }
 
-  // Debug values must not have a slot index.
-  // Other instructions must have one, unless they are inside a bundle.
-  if (LiveInts) {
-    bool mapped = !LiveInts->isNotInMIMap(*MI);
-    if (MI->isDebugInstr()) {
-      if (mapped)
-        report("Debug instruction has a slot index", MI);
-    } else if (MI->isInsideBundle()) {
-      if (mapped)
-        report("Instruction inside bundle has a slot index", MI);
+    LLT OpTy = MRI->getType(MO->getReg());
+    // Don't report a type mismatch if there is no actual mismatch, only a
+    // type missing, to reduce noise:
+    if (OpTy.isValid()) {
+      // Only the first valid type for a type index will be printed: don't
+      // overwrite it later so it's always clear which type was expected:
+      if (!Types[TypeIdx].isValid())
+        Types[TypeIdx] = OpTy;
+      else if (Types[TypeIdx] != OpTy)
+        report("Type mismatch in generic instruction", MO, I, OpTy);
     } else {
-      if (!mapped)
-        report("Missing slot index", MI);
+      // Generic instructions must have types attached to their operands.
+      report("Generic instruction is missing a virtual register type", MO, I);
     }
   }
 
-  if (isPreISelGenericOpcode(MCID.getOpcode())) {
-    if (isFunctionSelected)
-      report("Unexpected generic instruction in a Selected function", MI);
-
-    // Check types.
-    SmallVector<LLT, 4> Types;
-    for (unsigned I = 0; I < MCID.getNumOperands(); ++I) {
-      if (!MCID.OpInfo[I].isGenericType())
-        continue;
-      // Generic instructions specify type equality constraints between some of
-      // their operands. Make sure these are consistent.
-      size_t TypeIdx = MCID.OpInfo[I].getGenericTypeIndex();
-      Types.resize(std::max(TypeIdx + 1, Types.size()));
-
-      const MachineOperand *MO = &MI->getOperand(I);
-      LLT OpTy = MRI->getType(MO->getReg());
-      // Don't report a type mismatch if there is no actual mismatch, only a
-      // type missing, to reduce noise:
-      if (OpTy.isValid()) {
-        // Only the first valid type for a type index will be printed: don't
-        // overwrite it later so it's always clear which type was expected:
-        if (!Types[TypeIdx].isValid())
-          Types[TypeIdx] = OpTy;
-        else if (Types[TypeIdx] != OpTy)
-          report("Type mismatch in generic instruction", MO, I, OpTy);
-      } else {
-        // Generic instructions must have types attached to their operands.
-        report("Generic instruction is missing a virtual register type", MO, I);
-      }
-    }
-
-    // Generic opcodes must not have physical register operands.
-    for (unsigned I = 0; I < MI->getNumOperands(); ++I) {
-      const MachineOperand *MO = &MI->getOperand(I);
-      if (MO->isReg() && TargetRegisterInfo::isPhysicalRegister(MO->getReg()))
-        report("Generic instruction cannot have physical register", MO, I);
-    }
+  // Generic opcodes must not have physical register operands.
+  for (unsigned I = 0; I < MI->getNumOperands(); ++I) {
+    const MachineOperand *MO = &MI->getOperand(I);
+    if (MO->isReg() && TargetRegisterInfo::isPhysicalRegister(MO->getReg()))
+      report("Generic instruction cannot have physical register", MO, I);
   }
 
+  // Avoid out of bounds in checks below. This was already reported earlier.
+  if (MI->getNumOperands() < MCID.getNumOperands())
+    return;
+
   StringRef ErrorInfo;
   if (!TII->verifyInstruction(*MI, ErrorInfo))
     report(ErrorInfo.data(), MI);
 
   // Verify properties of various specific instruction types
-  switch(MI->getOpcode()) {
-  default:
+  switch (MI->getOpcode()) {
+  case TargetOpcode::G_CONSTANT:
+  case TargetOpcode::G_FCONSTANT: {
+    if (MI->getNumOperands() < MCID.getNumOperands())
+      break;
+
+    LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+    if (DstTy.isVector())
+      report("Instruction cannot use a vector result type", MI);
+
+    if (MI->getOpcode() == TargetOpcode::G_CONSTANT) {
+      if (!MI->getOperand(1).isCImm()) {
+        report("G_CONSTANT operand must be cimm", MI);
+        break;
+      }
+
+      const ConstantInt *CI = MI->getOperand(1).getCImm();
+      if (CI->getBitWidth() != DstTy.getSizeInBits())
+        report("inconsistent constant size", MI);
+    } else {
+      if (!MI->getOperand(1).isFPImm()) {
+        report("G_FCONSTANT operand must be fpimm", MI);
+        break;
+      }
+      const ConstantFP *CF = MI->getOperand(1).getFPImm();
+
+      if (APFloat::getSizeInBits(CF->getValueAPF().getSemantics()) !=
+          DstTy.getSizeInBits()) {
+        report("inconsistent constant size", MI);
+      }
+    }
+
     break;
+  }
   case TargetOpcode::G_LOAD:
   case TargetOpcode::G_STORE:
+  case TargetOpcode::G_ZEXTLOAD:
+  case TargetOpcode::G_SEXTLOAD: {
+    LLT ValTy = MRI->getType(MI->getOperand(0).getReg());
+    LLT PtrTy = MRI->getType(MI->getOperand(1).getReg());
+    if (!PtrTy.isPointer())
+      report("Generic memory instruction must access a pointer", MI);
+
     // Generic loads and stores must have a single MachineMemOperand
     // describing that access.
-    if (!MI->hasOneMemOperand())
+    if (!MI->hasOneMemOperand()) {
       report("Generic instruction accessing memory must have one mem operand",
              MI);
+    } else {
+      const MachineMemOperand &MMO = **MI->memoperands_begin();
+      if (MI->getOpcode() == TargetOpcode::G_ZEXTLOAD ||
+          MI->getOpcode() == TargetOpcode::G_SEXTLOAD) {
+        if (MMO.getSizeInBits() >= ValTy.getSizeInBits())
+          report("Generic extload must have a narrower memory type", MI);
+      } else if (MI->getOpcode() == TargetOpcode::G_LOAD) {
+        if (MMO.getSize() > ValTy.getSizeInBytes())
+          report("load memory size cannot exceed result size", MI);
+      } else if (MI->getOpcode() == TargetOpcode::G_STORE) {
+        if (ValTy.getSizeInBytes() < MMO.getSize())
+          report("store memory size cannot exceed value size", MI);
+      }
+    }
+
     break;
+  }
   case TargetOpcode::G_PHI: {
     LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
     if (!DstTy.isValid() ||
@@ -1009,6 +1052,70 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
              MI);
     break;
   }
+  case TargetOpcode::G_BITCAST: {
+    LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+    LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+    if (!DstTy.isValid() || !SrcTy.isValid())
+      break;
+
+    if (SrcTy.isPointer() != DstTy.isPointer())
+      report("bitcast cannot convert between pointers and other types", MI);
+
+    if (SrcTy.getSizeInBits() != DstTy.getSizeInBits())
+      report("bitcast sizes must match", MI);
+    break;
+  }
+  case TargetOpcode::G_INTTOPTR:
+  case TargetOpcode::G_PTRTOINT:
+  case TargetOpcode::G_ADDRSPACE_CAST: {
+    LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+    LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+    if (!DstTy.isValid() || !SrcTy.isValid())
+      break;
+
+    verifyVectorElementMatch(DstTy, SrcTy, MI);
+
+    DstTy = DstTy.getScalarType();
+    SrcTy = SrcTy.getScalarType();
+
+    if (MI->getOpcode() == TargetOpcode::G_INTTOPTR) {
+      if (!DstTy.isPointer())
+        report("inttoptr result type must be a pointer", MI);
+      if (SrcTy.isPointer())
+        report("inttoptr source type must not be a pointer", MI);
+    } else if (MI->getOpcode() == TargetOpcode::G_PTRTOINT) {
+      if (!SrcTy.isPointer())
+        report("ptrtoint source type must be a pointer", MI);
+      if (DstTy.isPointer())
+        report("ptrtoint result type must not be a pointer", MI);
+    } else {
+      assert(MI->getOpcode() == TargetOpcode::G_ADDRSPACE_CAST);
+      if (!SrcTy.isPointer() || !DstTy.isPointer())
+        report("addrspacecast types must be pointers", MI);
+      else {
+        if (SrcTy.getAddressSpace() == DstTy.getAddressSpace())
+          report("addrspacecast must convert different address spaces", MI);
+      }
+    }
+
+    break;
+  }
+  case TargetOpcode::G_GEP: {
+    LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+    LLT PtrTy = MRI->getType(MI->getOperand(1).getReg());
+    LLT OffsetTy = MRI->getType(MI->getOperand(2).getReg());
+    if (!DstTy.isValid() || !PtrTy.isValid() || !OffsetTy.isValid())
+      break;
+
+    if (!PtrTy.getScalarType().isPointer())
+      report("gep first operand must be a pointer", MI);
+
+    if (OffsetTy.getScalarType().isPointer())
+      report("gep offset operand must not be a pointer", MI);
+
+    // TODO: Is the offset allowed to be a scalar with a vector?
+    break;
+  }
   case TargetOpcode::G_SEXT:
   case TargetOpcode::G_ZEXT:
   case TargetOpcode::G_ANYEXT:
@@ -1021,30 +1128,18 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
     // instructions aren't guaranteed to have the right number of operands or
     // types attached to them at this point
     assert(MCID.getNumOperands() == 2 && "Expected 2 operands G_*{EXT,TRUNC}");
-    if (MI->getNumOperands() < MCID.getNumOperands())
-      break;
     LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
     LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
     if (!DstTy.isValid() || !SrcTy.isValid())
       break;
 
-    LLT DstElTy = DstTy.isVector() ? DstTy.getElementType() : DstTy;
-    LLT SrcElTy = SrcTy.isVector() ? SrcTy.getElementType() : SrcTy;
+    LLT DstElTy = DstTy.getScalarType();
+    LLT SrcElTy = SrcTy.getScalarType();
     if (DstElTy.isPointer() || SrcElTy.isPointer())
       report("Generic extend/truncate can not operate on pointers", MI);
 
-    if (DstTy.isVector() != SrcTy.isVector()) {
-      report("Generic extend/truncate must be all-vector or all-scalar", MI);
-      // Generally we try to report as many issues as possible at once, but in
-      // this case it's not clear what should we be comparing the size of the
-      // scalar with: the size of the whole vector or its lane. Instead of
-      // making an arbitrary choice and emitting not so helpful message, let's
-      // avoid the extra noise and stop here.
-      break;
-    }
-    if (DstTy.isVector() && DstTy.getNumElements() != SrcTy.getNumElements())
-      report("Generic vector extend/truncate must preserve number of lanes",
-             MI);
+    verifyVectorElementMatch(DstTy, SrcTy, MI);
+
     unsigned DstSize = DstElTy.getSizeInBits();
     unsigned SrcSize = SrcElTy.getSizeInBits();
     switch (MI->getOpcode()) {
@@ -1061,6 +1156,17 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
     }
     break;
   }
+  case TargetOpcode::G_SELECT: {
+    LLT SelTy = MRI->getType(MI->getOperand(0).getReg());
+    LLT CondTy = MRI->getType(MI->getOperand(1).getReg());
+    if (!SelTy.isValid() || !CondTy.isValid())
+      break;
+
+    // Scalar condition select on a vector is valid.
+    if (CondTy.isVector())
+      verifyVectorElementMatch(SelTy, CondTy, MI);
+    break;
+  }
   case TargetOpcode::G_MERGE_VALUES: {
     // G_MERGE_VALUES should only be used to merge scalars into a larger scalar,
     // e.g. s2N = MERGE sN, sN
@@ -1070,6 +1176,16 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
     LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
     if (DstTy.isVector() || SrcTy.isVector())
       report("G_MERGE_VALUES cannot operate on vectors", MI);
+
+    const unsigned NumOps = MI->getNumOperands();
+    if (DstTy.getSizeInBits() != SrcTy.getSizeInBits() * (NumOps - 1))
+      report("G_MERGE_VALUES result size is inconsistent", MI);
+
+    for (unsigned I = 2; I != NumOps; ++I) {
+      if (MRI->getType(MI->getOperand(I).getReg()) != SrcTy)
+        report("G_MERGE_VALUES source types do not match", MI);
+    }
+
     break;
   }
   case TargetOpcode::G_UNMERGE_VALUES: {
@@ -1092,18 +1208,23 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
     // must match the dest vector size.
     LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
     LLT SrcEltTy = MRI->getType(MI->getOperand(1).getReg());
-    if (!DstTy.isVector() || SrcEltTy.isVector())
+    if (!DstTy.isVector() || SrcEltTy.isVector()) {
       report("G_BUILD_VECTOR must produce a vector from scalar operands", MI);
+      break;
+    }
+
+    if (DstTy.getElementType() != SrcEltTy)
+      report("G_BUILD_VECTOR result element type must match source type", MI);
+
+    if (DstTy.getNumElements() != MI->getNumOperands() - 1)
+      report("G_BUILD_VECTOR must have an operand for each elemement", MI);
+
     for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
       if (MRI->getType(MI->getOperand(1).getReg()) !=
           MRI->getType(MI->getOperand(i).getReg()))
         report("G_BUILD_VECTOR source operand types are not homogeneous", MI);
     }
-    if (DstTy.getSizeInBits() !=
-        SrcEltTy.getSizeInBits() * (MI->getNumOperands() - 1))
-      report("G_BUILD_VECTOR src operands total size don't match dest "
-             "size.",
-             MI);
+
     break;
   }
   case TargetOpcode::G_BUILD_VECTOR_TRUNC: {
@@ -1144,6 +1265,176 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
       report("G_CONCAT_VECTOR num dest and source elements should match", MI);
     break;
   }
+  case TargetOpcode::G_ICMP:
+  case TargetOpcode::G_FCMP: {
+    LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+    LLT SrcTy = MRI->getType(MI->getOperand(2).getReg());
+
+    if ((DstTy.isVector() != SrcTy.isVector()) ||
+        (DstTy.isVector() && DstTy.getNumElements() != SrcTy.getNumElements()))
+      report("Generic vector icmp/fcmp must preserve number of lanes", MI);
+
+    break;
+  }
+  case TargetOpcode::G_EXTRACT: {
+    const MachineOperand &SrcOp = MI->getOperand(1);
+    if (!SrcOp.isReg()) {
+      report("extract source must be a register", MI);
+      break;
+    }
+
+    const MachineOperand &OffsetOp = MI->getOperand(2);
+    if (!OffsetOp.isImm()) {
+      report("extract offset must be a constant", MI);
+      break;
+    }
+
+    unsigned DstSize = MRI->getType(MI->getOperand(0).getReg()).getSizeInBits();
+    unsigned SrcSize = MRI->getType(SrcOp.getReg()).getSizeInBits();
+    if (SrcSize == DstSize)
+      report("extract source must be larger than result", MI);
+
+    if (DstSize + OffsetOp.getImm() > SrcSize)
+      report("extract reads past end of register", MI);
+    break;
+  }
+  case TargetOpcode::G_INSERT: {
+    const MachineOperand &SrcOp = MI->getOperand(2);
+    if (!SrcOp.isReg()) {
+      report("insert source must be a register", MI);
+      break;
+    }
+
+    const MachineOperand &OffsetOp = MI->getOperand(3);
+    if (!OffsetOp.isImm()) {
+      report("insert offset must be a constant", MI);
+      break;
+    }
+
+    unsigned DstSize = MRI->getType(MI->getOperand(0).getReg()).getSizeInBits();
+    unsigned SrcSize = MRI->getType(SrcOp.getReg()).getSizeInBits();
+
+    if (DstSize <= SrcSize)
+      report("inserted size must be smaller than total register", MI);
+
+    if (SrcSize + OffsetOp.getImm() > DstSize)
+      report("insert writes past end of register", MI);
+
+    break;
+  }
+  case TargetOpcode::G_JUMP_TABLE: {
+    if (!MI->getOperand(1).isJTI())
+      report("G_JUMP_TABLE source operand must be a jump table index", MI);
+    LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+    if (!DstTy.isPointer())
+      report("G_JUMP_TABLE dest operand must have a pointer type", MI);
+    break;
+  }
+  case TargetOpcode::G_BRJT: {
+    if (!MRI->getType(MI->getOperand(0).getReg()).isPointer())
+      report("G_BRJT src operand 0 must be a pointer type", MI);
+
+    if (!MI->getOperand(1).isJTI())
+      report("G_BRJT src operand 1 must be a jump table index", MI);
+
+    const auto &IdxOp = MI->getOperand(2);
+    if (!IdxOp.isReg() || MRI->getType(IdxOp.getReg()).isPointer())
+      report("G_BRJT src operand 2 must be a scalar reg type", MI);
+    break;
+  }
+  case TargetOpcode::G_INTRINSIC:
+  case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
+    // TODO: Should verify number of def and use operands, but the current
+    // interface requires passing in IR types for mangling.
+    const MachineOperand &IntrIDOp = MI->getOperand(MI->getNumExplicitDefs());
+    if (!IntrIDOp.isIntrinsicID()) {
+      report("G_INTRINSIC first src operand must be an intrinsic ID", MI);
+      break;
+    }
+
+    bool NoSideEffects = MI->getOpcode() == TargetOpcode::G_INTRINSIC;
+    unsigned IntrID = IntrIDOp.getIntrinsicID();
+    if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) {
+      AttributeList Attrs
+        = Intrinsic::getAttributes(MF->getFunction().getContext(),
+                                   static_cast<Intrinsic::ID>(IntrID));
+      bool DeclHasSideEffects = !Attrs.hasFnAttribute(Attribute::ReadNone);
+      if (NoSideEffects && DeclHasSideEffects) {
+        report("G_INTRINSIC used with intrinsic that accesses memory", MI);
+        break;
+      }
+      if (!NoSideEffects && !DeclHasSideEffects) {
+        report("G_INTRINSIC_W_SIDE_EFFECTS used with readnone intrinsic", MI);
+        break;
+      }
+    }
+
+    break;
+  }
+  default:
+    break;
+  }
+}
+
+void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (MI->getNumOperands() < MCID.getNumOperands()) {
+    report("Too few operands", MI);
+    errs() << MCID.getNumOperands() << " operands expected, but "
+           << MI->getNumOperands() << " given.\n";
+  }
+
+  if (MI->isPHI()) {
+    if (MF->getProperties().hasProperty(
+            MachineFunctionProperties::Property::NoPHIs))
+      report("Found PHI instruction with NoPHIs property set", MI);
+
+    if (FirstNonPHI)
+      report("Found PHI instruction after non-PHI", MI);
+  } else if (FirstNonPHI == nullptr)
+    FirstNonPHI = MI;
+
+  // Check the tied operands.
+  if (MI->isInlineAsm())
+    verifyInlineAsm(MI);
+
+  // Check the MachineMemOperands for basic consistency.
+  for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+                                  E = MI->memoperands_end();
+       I != E; ++I) {
+    if ((*I)->isLoad() && !MI->mayLoad())
+      report("Missing mayLoad flag", MI);
+    if ((*I)->isStore() && !MI->mayStore())
+      report("Missing mayStore flag", MI);
+  }
+
+  // Debug values must not have a slot index.
+  // Other instructions must have one, unless they are inside a bundle.
+  if (LiveInts) {
+    bool mapped = !LiveInts->isNotInMIMap(*MI);
+    if (MI->isDebugInstr()) {
+      if (mapped)
+        report("Debug instruction has a slot index", MI);
+    } else if (MI->isInsideBundle()) {
+      if (mapped)
+        report("Instruction inside bundle has a slot index", MI);
+    } else {
+      if (!mapped)
+        report("Missing slot index", MI);
+    }
+  }
+
+  if (isPreISelGenericOpcode(MCID.getOpcode())) {
+    verifyPreISelGenericInstruction(MI);
+    return;
+  }
+
+  StringRef ErrorInfo;
+  if (!TII->verifyInstruction(*MI, ErrorInfo))
+    report(ErrorInfo.data(), MI);
+
+  // Verify properties of various specific instruction types
+  switch (MI->getOpcode()) {
   case TargetOpcode::COPY: {
     if (foundErrors)
       break;
@@ -1193,7 +1484,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
     VerifyStackMapConstant(VarStart + StatepointOpers::NumDeoptOperandsOffset);
 
     // TODO: verify we have properly encoded deopt arguments
-  };
+    break;
+  }
 }
 
 void
@@ -1356,7 +1648,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
           return;
         }
         if (SubIdx)  {
-          report("Generic virtual register does not subregister index", MO,
+          report("Generic virtual register does not allow subregister index", MO,
                  MONum);
           return;
         }
@@ -1911,6 +2203,10 @@ void MachineVerifier::visitMachineFunctionAfter() {
     verifyLiveVariables();
   if (LiveInts)
     verifyLiveIntervals();
+
+  for (auto CSInfo : MF->getCallSitesInfo())
+    if (!CSInfo.first->isCall())
+      report("Call site info referencing instruction that is not call", MF);
 }
 
 void MachineVerifier::verifyLiveVariables() {
diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp
index 82b6d642c73b..2db1e86905a4 100644
--- a/lib/CodeGen/MacroFusion.cpp
+++ b/lib/CodeGen/MacroFusion.cpp
@@ -1,9 +1,8 @@
 //===- MacroFusion.cpp - Macro Fusion -------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -37,7 +36,7 @@ static bool isHazard(const SDep &Dep) {
   return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output;
 }
 
-static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
+static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
                                 SUnit &SecondSU) {
   // Check that neither instr is already paired with another along the edge
   // between them.
@@ -49,7 +48,7 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
     if (SI.isCluster())
       return false;
   // Though the reachability checks above could be made more generic,
-  // perhaps as part of ScheduleDAGMI::addEdge(), since such edges are valid,
+  // perhaps as part of ScheduleDAGInstrs::addEdge(), since such edges are valid,
   // the extra computation cost makes it less interesting in general cases.
 
   // Create a single weak edge between the adjacent instrs. The only effect is
@@ -118,7 +117,7 @@ namespace {
 class MacroFusion : public ScheduleDAGMutation {
   ShouldSchedulePredTy shouldScheduleAdjacent;
   bool FuseBlock;
-  bool scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU);
+  bool scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU);
 
 public:
   MacroFusion(ShouldSchedulePredTy shouldScheduleAdjacent, bool FuseBlock)
@@ -129,9 +128,7 @@ public:
 
 } // end anonymous namespace
 
-void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
-  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
-
+void MacroFusion::apply(ScheduleDAGInstrs *DAG) {
   if (FuseBlock)
     // For each of the SUnits in the scheduling block, try to fuse the instr in
     // it with one in its predecessors.
@@ -145,7 +142,7 @@ void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
 
 /// Implement the fusion of instr pairs in the scheduling DAG,
 /// anchored at the instr in AnchorSU..
-bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) {
+bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU) {
   const MachineInstr &AnchorMI = *AnchorSU.getInstr();
   const TargetInstrInfo &TII = *DAG.TII;
   const TargetSubtargetInfo &ST = DAG.MF.getSubtarget();
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index 770f6c5b0403..c70b62252139 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -1,9 +1,8 @@
 //===- OptimizePHIs.cpp - Optimize machine instruction PHIs ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -182,11 +181,12 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
       if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg)))
         continue;
 
-      // for the case SingleValReg taken from copy instr
-      MRI->clearKillFlags(SingleValReg);
-
       MRI->replaceRegWith(OldReg, SingleValReg);
       MI->eraseFromParent();
+
+      // The kill flags on OldReg and SingleValReg may no longer be correct.
+      MRI->clearKillFlags(SingleValReg);
+
       ++NumPHICycles;
       Changed = true;
       continue;
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index b9801c6fd97b..948a5835438c 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -1,9 +1,8 @@
 //===- PhiElimination.cpp - Eliminate PHI nodes by inserting copies -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/PHIEliminationUtils.cpp b/lib/CodeGen/PHIEliminationUtils.cpp
index 4e67ff2e5088..3a2cdaf3bd3c 100644
--- a/lib/CodeGen/PHIEliminationUtils.cpp
+++ b/lib/CodeGen/PHIEliminationUtils.cpp
@@ -1,9 +1,8 @@
 //===-- PHIEliminationUtils.cpp - Helper functions for PHI elimination ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/PHIEliminationUtils.h b/lib/CodeGen/PHIEliminationUtils.h
index b997d7ac5f4f..0ff3a41f47d3 100644
--- a/lib/CodeGen/PHIEliminationUtils.h
+++ b/lib/CodeGen/PHIEliminationUtils.h
@@ -1,9 +1,8 @@
 //=- PHIEliminationUtils.h - Helper functions for PHI elimination -*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/ParallelCG.cpp b/lib/CodeGen/ParallelCG.cpp
index bc3f2a6e9b5a..e4c73658cb4f 100644
--- a/lib/CodeGen/ParallelCG.cpp
+++ b/lib/CodeGen/ParallelCG.cpp
@@ -1,9 +1,8 @@
 //===-- ParallelCG.cpp ----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/PatchableFunction.cpp b/lib/CodeGen/PatchableFunction.cpp
index afb4b0a7e174..a3fa1b0ad8ed 100644
--- a/lib/CodeGen/PatchableFunction.cpp
+++ b/lib/CodeGen/PatchableFunction.cpp
@@ -1,9 +1,8 @@
 //===-- PatchableFunction.cpp - Patchable prologues for LLVM -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 1d058ccfb633..b918396aa8c5 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -1,9 +1,8 @@
 //===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1307,7 +1306,7 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy(
 
 /// Check whether MI is a candidate for folding into a later instruction.
 /// We only fold loads to virtual registers and the virtual register defined
-/// has a single use.
+/// has a single user.
 bool PeepholeOptimizer::isLoadFoldable(
     MachineInstr &MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
   if (!MI.canFoldAsLoad() || !MI.mayLoad())
@@ -1317,12 +1316,12 @@ bool PeepholeOptimizer::isLoadFoldable(
     return false;
 
   unsigned Reg = MI.getOperand(0).getReg();
-  // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting
+  // To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting
   // loads. It should be checked when processing uses of the load, since
   // uses can be removed during peephole.
   if (!MI.getOperand(0).getSubReg() &&
       TargetRegisterInfo::isVirtualRegister(Reg) &&
-      MRI->hasOneNonDBGUse(Reg)) {
+      MRI->hasOneNonDBGUser(Reg)) {
     FoldAsLoadDefCandidates.insert(Reg);
     return true;
   }
@@ -1778,6 +1777,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
               LocalMIs.erase(MI);
               LocalMIs.erase(DefMI);
               LocalMIs.insert(FoldMI);
+              if (MI->isCall())
+                MI->getMF()->updateCallSiteInfo(MI, FoldMI);
               MI->eraseFromParent();
               DefMI->eraseFromParent();
               MRI->markUsesInDebugValueAsUndef(FoldedReg);
@@ -1826,7 +1827,7 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
   assert(Def->isBitcast() && "Invalid definition");
 
   // Bail if there are effects that a plain copy will not expose.
-  if (Def->hasUnmodeledSideEffects())
+  if (Def->mayRaiseFPException() || Def->hasUnmodeledSideEffects())
     return ValueTrackerResult();
 
   // Bitcasts with more than one def are not supported.
@@ -1901,13 +1902,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
   // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
   // Check if one of the operand defines the subreg we are interested in.
   for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
-    if (RegSeqInput.SubIdx == DefSubReg) {
-      if (RegSeqInput.SubReg)
-        // Bail if we have to compose sub registers.
-        return ValueTrackerResult();
-
+    if (RegSeqInput.SubIdx == DefSubReg)
       return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
-    }
   }
 
   // If the subreg we are tracking is super-defined by another subreg,
diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp
index f9d4a9746e41..0a3838617bc5 100644
--- a/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -1,9 +1,8 @@
 //===----- PostRAHazardRecognizer.cpp - hazard recognizer -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index dd0a5fe1b39d..5bea9f2893c9 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -1,9 +1,8 @@
 //===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/PreISelIntrinsicLowering.cpp b/lib/CodeGen/PreISelIntrinsicLowering.cpp
index b0e9ac03612d..2752e186875c 100644
--- a/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -1,9 +1,8 @@
 //===- PreISelIntrinsicLowering.cpp - Pre-ISel intrinsic lowering pass ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -45,7 +44,7 @@ static bool lowerLoadRelative(Function &F) {
     Value *OffsetPtr =
         B.CreateGEP(Int8Ty, CI->getArgOperand(0), CI->getArgOperand(1));
     Value *OffsetPtrI32 = B.CreateBitCast(OffsetPtr, Int32PtrTy);
-    Value *OffsetI32 = B.CreateAlignedLoad(OffsetPtrI32, 4);
+    Value *OffsetI32 = B.CreateAlignedLoad(Int32Ty, OffsetPtrI32, 4);
 
     Value *ResultPtr = B.CreateGEP(Int8Ty, CI->getArgOperand(0), OffsetI32);
 
@@ -65,9 +64,9 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
   // If we haven't already looked up this function, check to see if the
   // program already contains a function with this name.
   Module *M = F.getParent();
-  Constant* FCache = M->getOrInsertFunction(NewFn, F.getFunctionType());
+  FunctionCallee FCache = M->getOrInsertFunction(NewFn, F.getFunctionType());
 
-  if (Function* Fn = dyn_cast<Function>(FCache)) {
+  if (Function *Fn = dyn_cast<Function>(FCache.getCallee())) {
     Fn->setLinkage(F.getLinkage());
     if (setNonLazyBind && !Fn->isWeakForLinker()) {
       // If we have Native ARC, set nonlazybind attribute for these APIs for
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 7e9b4af12ee9..b38987ad1c90 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -1,9 +1,8 @@
 //===---------------------- ProcessImplicitDefs.cpp -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 23754e487a18..d463bee67595 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -1,9 +1,8 @@
 //===- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -32,6 +31,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
@@ -169,6 +169,46 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
 /// StackObjSet - A set of stack object indexes
 using StackObjSet = SmallSetVector<int, 8>;
 
+using SavedDbgValuesMap =
+    SmallDenseMap<MachineBasicBlock *, SmallVector<MachineInstr *, 4>, 4>;
+
+/// Stash DBG_VALUEs that describe parameters and which are placed at the start
+/// of the block. Later on, after the prologue code has been emitted, the
+/// stashed DBG_VALUEs will be reinserted at the start of the block.
+static void stashEntryDbgValues(MachineBasicBlock &MBB,
+                                SavedDbgValuesMap &EntryDbgValues) {
+  SmallVector<const MachineInstr *, 4> FrameIndexValues;
+
+  for (auto &MI : MBB) {
+    if (!MI.isDebugInstr())
+      break;
+    if (!MI.isDebugValue() || !MI.getDebugVariable()->isParameter())
+      continue;
+    if (MI.getOperand(0).isFI()) {
+      // We can only emit valid locations for frame indices after the frame
+      // setup, so do not stash away them.
+      FrameIndexValues.push_back(&MI);
+      continue;
+    }
+    const DILocalVariable *Var = MI.getDebugVariable();
+    const DIExpression *Expr = MI.getDebugExpression();
+    auto Overlaps = [Var, Expr](const MachineInstr *DV) {
+      return Var == DV->getDebugVariable() &&
+             Expr->fragmentsOverlap(DV->getDebugExpression());
+    };
+    // See if the debug value overlaps with any preceding debug value that will
+    // not be stashed. If that is the case, then we can't stash this value, as
+    // we would then reorder the values at reinsertion.
+    if (llvm::none_of(FrameIndexValues, Overlaps))
+      EntryDbgValues[&MBB].push_back(&MI);
+  }
+
+  // Remove stashed debug values from the block.
+  if (EntryDbgValues.count(&MBB))
+    for (auto *MI : EntryDbgValues[&MBB])
+      MI->removeFromParent();
+}
+
 /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
 /// frame indexes with appropriate references.
 bool PEI::runOnMachineFunction(MachineFunction &MF) {
@@ -179,8 +219,6 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
 
   RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr;
   FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF);
-  FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) ||
-    TRI->requiresFrameIndexReplacementScavenging(MF);
   ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
 
   // Calculate the MaxCallFrameSize and AdjustsStack variables for the
@@ -192,6 +230,11 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
   // place all spills in the entry block, all restores in return blocks.
   calculateSaveRestoreBlocks(MF);
 
+  // Stash away DBG_VALUEs that should not be moved by insertion of prolog code.
+  SavedDbgValuesMap EntryDbgValues;
+  for (MachineBasicBlock *SaveBlock : SaveBlocks)
+    stashEntryDbgValues(*SaveBlock, EntryDbgValues);
+
   // Handle CSR spilling and restoring, for targets that need it.
   if (MF.getTarget().usesPhysRegsForPEI())
     spillCalleeSavedRegs(MF);
@@ -211,6 +254,10 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
   if (!F.hasFnAttribute(Attribute::Naked))
     insertPrologEpilogCode(MF);
 
+  // Reinsert stashed debug values at the start of the entry blocks.
+  for (auto &I : EntryDbgValues)
+    I.first->insert(I.first->begin(), I.second.begin(), I.second.end());
+
   // Replace all MO_FrameIndex operands with physical register references
   // and actual offsets.
   //
@@ -495,9 +542,16 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
     for (const CalleeSavedInfo &CS : CSI) {
       // Insert the spill to the stack frame.
       unsigned Reg = CS.getReg();
-      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-      TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
-                              TRI);
+
+      if (CS.isSpilledToReg()) {
+        BuildMI(SaveBlock, I, DebugLoc(),
+                TII.get(TargetOpcode::COPY), CS.getDstReg())
+          .addReg(Reg, getKillRegState(true));
+      } else {
+        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+        TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
+                                TRI);
+      }
     }
   }
 }
@@ -517,12 +571,17 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
   if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
     for (const CalleeSavedInfo &CI : reverse(CSI)) {
       unsigned Reg = CI.getReg();
-      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-      TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
-      assert(I != RestoreBlock.begin() &&
-             "loadRegFromStackSlot didn't insert any code!");
-      // Insert in reverse order.  loadRegFromStackSlot can insert
-      // multiple instructions.
+      if (CI.isSpilledToReg()) {
+        BuildMI(RestoreBlock, I, DebugLoc(), TII.get(TargetOpcode::COPY), Reg)
+          .addReg(CI.getDstReg(), getKillRegState(true));
+      } else {
+        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+        TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
+        assert(I != RestoreBlock.begin() &&
+               "loadRegFromStackSlot didn't insert any code!");
+        // Insert in reverse order.  loadRegFromStackSlot can insert
+        // multiple instructions.
+      }
     }
   }
 }
@@ -615,10 +674,13 @@ computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown,
   SmallVector<int, 16> AllocatedFrameSlots;
   // Add fixed objects.
   for (int i = MFI.getObjectIndexBegin(); i != 0; ++i)
-    AllocatedFrameSlots.push_back(i);
+    // StackSlot scavenging is only implemented for the default stack.
+    if (MFI.getStackID(i) == TargetStackID::Default)
+      AllocatedFrameSlots.push_back(i);
   // Add callee-save objects.
   for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i)
-    AllocatedFrameSlots.push_back(i);
+    if (MFI.getStackID(i) == TargetStackID::Default)
+      AllocatedFrameSlots.push_back(i);
 
   for (int i : AllocatedFrameSlots) {
     // These are converted from int64_t, but they should always fit in int
@@ -740,11 +802,23 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
   // Skew to be applied to alignment.
   unsigned Skew = TFI.getStackAlignmentSkew(MF);
 
+#ifdef EXPENSIVE_CHECKS
+  for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i)
+    if (!MFI.isDeadObjectIndex(i) &&
+        MFI.getStackID(i) == TargetStackID::Default)
+      assert(MFI.getObjectAlignment(i) <= MFI.getMaxAlignment() &&
+             "MaxAlignment is invalid");
+#endif
+
   // If there are fixed sized objects that are preallocated in the local area,
   // non-fixed objects can't be allocated right at the start of local area.
   // Adjust 'Offset' to point to the end of last fixed sized preallocated
   // object.
   for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) {
+    if (MFI.getStackID(i) !=
+        TargetStackID::Default) // Only allocate objects on the default stack.
+      continue;
+
     int64_t FixedOff;
     if (StackGrowsDown) {
       // The maximum distance from the stack pointer is at lower address of
@@ -763,6 +837,10 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
   // callee saved registers.
   if (StackGrowsDown) {
     for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
+      if (MFI.getStackID(i) !=
+          TargetStackID::Default) // Only allocate objects on the default stack.
+        continue;
+
       // If the stack grows down, we need to add the size to find the lowest
       // address of the object.
       Offset += MFI.getObjectSize(i);
@@ -777,6 +855,10 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
   } else if (MaxCSFrameIndex >= MinCSFrameIndex) {
     // Be careful about underflow in comparisons agains MinCSFrameIndex.
     for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) {
+      if (MFI.getStackID(i) !=
+          TargetStackID::Default) // Only allocate objects on the default stack.
+        continue;
+
       if (MFI.isDeadObjectIndex(i))
         continue;
 
@@ -845,18 +927,26 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
   // Make sure that the stack protector comes before the local variables on the
   // stack.
   SmallSet<int, 16> ProtectedObjs;
-  if (MFI.getStackProtectorIndex() >= 0) {
+  if (MFI.hasStackProtectorIndex()) {
+    int StackProtectorFI = MFI.getStackProtectorIndex();
     StackObjSet LargeArrayObjs;
     StackObjSet SmallArrayObjs;
     StackObjSet AddrOfObjs;
 
-    AdjustStackOffset(MFI, MFI.getStackProtectorIndex(), StackGrowsDown,
-                      Offset, MaxAlign, Skew);
+    // If we need a stack protector, we need to make sure that
+    // LocalStackSlotPass didn't already allocate a slot for it.
+    // If we are told to use the LocalStackAllocationBlock, the stack protector
+    // is expected to be already pre-allocated.
+    if (!MFI.getUseLocalStackAllocationBlock())
+      AdjustStackOffset(MFI, StackProtectorFI, StackGrowsDown, Offset, MaxAlign,
+                        Skew);
+    else if (!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex()))
+      llvm_unreachable(
+          "Stack protector not pre-allocated by LocalStackSlotPass.");
 
     // Assign large stack objects first.
     for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
-      if (MFI.isObjectPreAllocated(i) &&
-          MFI.getUseLocalStackAllocationBlock())
+      if (MFI.isObjectPreAllocated(i) && MFI.getUseLocalStackAllocationBlock())
         continue;
       if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
         continue;
@@ -864,8 +954,10 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
         continue;
       if (MFI.isDeadObjectIndex(i))
         continue;
-      if (MFI.getStackProtectorIndex() == (int)i ||
-          EHRegNodeFrameIndex == (int)i)
+      if (StackProtectorFI == (int)i || EHRegNodeFrameIndex == (int)i)
+        continue;
+      if (MFI.getStackID(i) !=
+          TargetStackID::Default) // Only allocate objects on the default stack.
         continue;
 
       switch (MFI.getObjectSSPLayout(i)) {
@@ -884,6 +976,15 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
       llvm_unreachable("Unexpected SSPLayoutKind.");
     }
 
+    // We expect **all** the protected stack objects to be pre-allocated by
+    // LocalStackSlotPass. If it turns out that PEI still has to allocate some
+    // of them, we may end up messing up the expected order of the objects.
+    if (MFI.getUseLocalStackAllocationBlock() &&
+        !(LargeArrayObjs.empty() && SmallArrayObjs.empty() &&
+          AddrOfObjs.empty()))
+      llvm_unreachable("Found protected stack objects not pre-allocated by "
+                       "LocalStackSlotPass.");
+
     AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
                           Offset, MaxAlign, Skew);
     AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
@@ -905,11 +1006,13 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
       continue;
     if (MFI.isDeadObjectIndex(i))
       continue;
-    if (MFI.getStackProtectorIndex() == (int)i ||
-        EHRegNodeFrameIndex == (int)i)
+    if (MFI.getStackProtectorIndex() == (int)i || EHRegNodeFrameIndex == (int)i)
       continue;
     if (ProtectedObjs.count(i))
       continue;
+    if (MFI.getStackID(i) !=
+        TargetStackID::Default) // Only allocate objects on the default stack.
+      continue;
 
     // Add the objects that we need to allocate to our working set.
     ObjectsToAllocate.push_back(i);
@@ -1026,8 +1129,16 @@ void PEI::insertPrologEpilogCode(MachineFunction &MF) {
 /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
 /// register references and actual offsets.
 void PEI::replaceFrameIndices(MachineFunction &MF) {
-  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
-  if (!TFI.needsFrameIndexResolution(MF)) return;
+  const auto &ST = MF.getSubtarget();
+  const TargetFrameLowering &TFI = *ST.getFrameLowering();
+  if (!TFI.needsFrameIndexResolution(MF))
+    return;
+
+  const TargetRegisterInfo *TRI = ST.getRegisterInfo();
+
+  // Allow the target to determine this after knowing the frame size.
+  FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) ||
+    TRI->requiresFrameIndexReplacementScavenging(MF);
 
   // Store SPAdj at exit of a basic block.
   SmallVector<int, 8> SPState;
@@ -1095,12 +1206,37 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
         assert(i == 0 && "Frame indices can only appear as the first "
                          "operand of a DBG_VALUE machine instruction");
         unsigned Reg;
+        unsigned FrameIdx = MI.getOperand(0).getIndex();
+        unsigned Size = MF.getFrameInfo().getObjectSize(FrameIdx);
+
         int64_t Offset =
-            TFI->getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg);
+            TFI->getFrameIndexReference(MF, FrameIdx, Reg);
         MI.getOperand(0).ChangeToRegister(Reg, false /*isDef*/);
         MI.getOperand(0).setIsDebug();
-        auto *DIExpr = DIExpression::prepend(MI.getDebugExpression(),
-                                             DIExpression::NoDeref, Offset);
+
+        const DIExpression *DIExpr = MI.getDebugExpression();
+
+        // If we have a direct DBG_VALUE, and its location expression isn't
+        // currently complex, then adding an offset will morph it into a
+        // complex location that is interpreted as being a memory address.
+        // This changes a pointer-valued variable to dereference that pointer,
+        // which is incorrect. Fix by adding DW_OP_stack_value.
+        unsigned PrependFlags = DIExpression::ApplyOffset;
+        if (!MI.isIndirectDebugValue() && !DIExpr->isComplex())
+          PrependFlags |= DIExpression::StackValue;
+
+        // If we have DBG_VALUE that is indirect and has a Implicit location
+        // expression need to insert a deref before prepending a Memory
+        // location expression. Also after doing this we change the DBG_VALUE
+        // to be direct.
+        if (MI.isIndirectDebugValue() && DIExpr->isImplicit()) {
+          SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size};
+          bool WithStackValue = true;
+          DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue);
+          // Make the DBG_VALUE direct.
+          MI.getOperand(1).ChangeToRegister(0, false);
+        }
+        DIExpr = DIExpression::prepend(DIExpr, PrependFlags, Offset);
         MI.getOperand(3).setMetadata(DIExpr);
         continue;
       }
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 6ca8d86e3f8e..da3ef4b771f3 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/PseudoSourceValue.cpp ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/ReachingDefAnalysis.cpp b/lib/CodeGen/ReachingDefAnalysis.cpp
index a9f0a9387297..f05c97ad621e 100644
--- a/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -1,9 +1,8 @@
 //===---- ReachingDefAnalysis.cpp - Reaching Def Analysis ---*- C++ -*-----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index bc28a054c680..1cbe75c27d13 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -1,9 +1,8 @@
 //===- RegAllocBase.cpp - Register Allocator Base Class -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -20,6 +19,7 @@
 #include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
@@ -119,16 +119,19 @@ void RegAllocBase::allocatePhysRegs() {
       for (MachineRegisterInfo::reg_instr_iterator
            I = MRI->reg_instr_begin(VirtReg->reg), E = MRI->reg_instr_end();
            I != E; ) {
-        MachineInstr *TmpMI = &*(I++);
-        if (TmpMI->isInlineAsm()) {
-          MI = TmpMI;
+        MI = &*(I++);
+        if (MI->isInlineAsm())
           break;
-        }
       }
-      if (MI)
+      if (MI && MI->isInlineAsm()) {
         MI->emitError("inline assembly requires more registers than available");
-      else
+      } else if (MI) {
+        LLVMContext &Context =
+            MI->getParent()->getParent()->getMMI().getModule()->getContext();
+        Context.emitError("ran out of registers during register allocation");
+      } else {
         report_fatal_error("ran out of registers during register allocation");
+      }
       // Keep going after reporting the error.
       VRM->assignVirt2Phys(VirtReg->reg,
                  RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index 686ffc36e049..6a7cc5ba4308 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -1,9 +1,8 @@
 //===- RegAllocBase.h - basic regalloc interface and driver -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index daeff3fc3963..46f6946f7003 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -1,9 +1,8 @@
 //===-- RegAllocBasic.cpp - Basic Register Allocator ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index eb3a4e481f5d..2ffa5e389f89 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -1,9 +1,8 @@
 //===- RegAllocFast.cpp - A fast register allocator for debug code --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -102,6 +101,10 @@ namespace {
 
     DenseMap<unsigned, SmallVector<MachineInstr *, 2>> LiveDbgValueMap;
 
+    /// Has a bit set for every virtual register for which it was determined
+    /// that it is alive across blocks.
+    BitVector MayLiveAcrossBlocks;
+
     /// State of a physical register.
     enum RegState {
       /// A disabled register is not available for allocation, but an alias may
@@ -152,6 +155,7 @@ namespace {
     enum : unsigned {
       spillClean = 50,
       spillDirty = 100,
+      spillPrefBonus = 20,
       spillImpossible = ~0u
     };
 
@@ -204,19 +208,26 @@ namespace {
     }
 
     void allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint);
+    void allocVirtRegUndef(MachineOperand &MO);
     MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
                             unsigned Hint);
     LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
                            unsigned Hint);
-    void spillAll(MachineBasicBlock::iterator MI);
+    void spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut);
     bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
 
+    unsigned traceCopies(unsigned VirtReg) const;
+    unsigned traceCopyChain(unsigned Reg) const;
+
     int getStackSpaceFor(unsigned VirtReg);
     void spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
                MCPhysReg AssignedReg, bool Kill);
     void reload(MachineBasicBlock::iterator Before, unsigned VirtReg,
                 MCPhysReg PhysReg);
 
+    bool mayLiveOut(unsigned VirtReg);
+    bool mayLiveIn(unsigned VirtReg);
+
     void dumpState();
   };
 
@@ -251,6 +262,53 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg) {
   return FrameIdx;
 }
 
+/// Returns false if \p VirtReg is known to not live out of the current block.
+bool RegAllocFast::mayLiveOut(unsigned VirtReg) {
+  if (MayLiveAcrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg))) {
+    // Cannot be live-out if there are no successors.
+    return !MBB->succ_empty();
+  }
+
+  // If this block loops back to itself, it would be necessary to check whether
+  // the use comes after the def.
+  if (MBB->isSuccessor(MBB)) {
+    MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg));
+    return true;
+  }
+
+  // See if the first \p Limit uses of the register are all in the current
+  // block.
+  static const unsigned Limit = 8;
+  unsigned C = 0;
+  for (const MachineInstr &UseInst : MRI->reg_nodbg_instructions(VirtReg)) {
+    if (UseInst.getParent() != MBB || ++C >= Limit) {
+      MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg));
+      // Cannot be live-out if there are no successors.
+      return !MBB->succ_empty();
+    }
+  }
+
+  return false;
+}
+
+/// Returns false if \p VirtReg is known to not be live into the current block.
+bool RegAllocFast::mayLiveIn(unsigned VirtReg) {
+  if (MayLiveAcrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg)))
+    return !MBB->pred_empty();
+
+  // See if the first \p Limit def of the register are all in the current block.
+  static const unsigned Limit = 8;
+  unsigned C = 0;
+  for (const MachineInstr &DefInst : MRI->def_instructions(VirtReg)) {
+    if (DefInst.getParent() != MBB || ++C >= Limit) {
+      MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg));
+      return !MBB->pred_empty();
+    }
+  }
+
+  return false;
+}
+
 /// Insert spill instruction for \p AssignedReg before \p Before. Update
 /// DBG_VALUEs with \p VirtReg operands with the stack slot.
 void RegAllocFast::spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
@@ -374,7 +432,7 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) {
 }
 
 /// Spill all dirty virtregs without killing them.
-void RegAllocFast::spillAll(MachineBasicBlock::iterator MI) {
+void RegAllocFast::spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut) {
   if (LiveVirtRegs.empty())
     return;
   // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
@@ -382,6 +440,8 @@ void RegAllocFast::spillAll(MachineBasicBlock::iterator MI) {
   for (LiveReg &LR : LiveVirtRegs) {
     if (!LR.PhysReg)
       continue;
+    if (OnlyLiveOut && !mayLiveOut(LR.VirtReg))
+      continue;
     spillVirtReg(MI, LR);
   }
   LiveVirtRegs.clear();
@@ -558,8 +618,48 @@ void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) {
   setPhysRegState(PhysReg, VirtReg);
 }
 
+static bool isCoalescable(const MachineInstr &MI) {
+  return MI.isFullCopy();
+}
+
+unsigned RegAllocFast::traceCopyChain(unsigned Reg) const {
+  static const unsigned ChainLengthLimit = 3;
+  unsigned C = 0;
+  do {
+    if (TargetRegisterInfo::isPhysicalRegister(Reg))
+      return Reg;
+    assert(TargetRegisterInfo::isVirtualRegister(Reg));
+
+    MachineInstr *VRegDef = MRI->getUniqueVRegDef(Reg);
+    if (!VRegDef || !isCoalescable(*VRegDef))
+      return 0;
+    Reg = VRegDef->getOperand(1).getReg();
+  } while (++C <= ChainLengthLimit);
+  return 0;
+}
+
+/// Check if any of \p VirtReg's definitions is a copy. If it is follow the
+/// chain of copies to check whether we reach a physical register we can
+/// coalesce with.
+unsigned RegAllocFast::traceCopies(unsigned VirtReg) const {
+  static const unsigned DefLimit = 3;
+  unsigned C = 0;
+  for (const MachineInstr &MI : MRI->def_instructions(VirtReg)) {
+    if (isCoalescable(MI)) {
+      unsigned Reg = MI.getOperand(1).getReg();
+      Reg = traceCopyChain(Reg);
+      if (Reg != 0)
+        return Reg;
+    }
+
+    if (++C >= DefLimit)
+      break;
+  }
+  return 0;
+}
+
 /// Allocates a physical register for VirtReg.
-void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) {
+void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
   const unsigned VirtReg = LR.VirtReg;
 
   assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
@@ -567,32 +667,54 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) {
 
   const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
   LLVM_DEBUG(dbgs() << "Search register for " << printReg(VirtReg)
-                    << " in class " << TRI->getRegClassName(&RC) << '\n');
+                    << " in class " << TRI->getRegClassName(&RC)
+                    << " with hint " << printReg(Hint0, TRI) << '\n');
 
   // Take hint when possible.
-  if (TargetRegisterInfo::isPhysicalRegister(Hint) &&
-      MRI->isAllocatable(Hint) && RC.contains(Hint)) {
+  if (TargetRegisterInfo::isPhysicalRegister(Hint0) &&
+      MRI->isAllocatable(Hint0) && RC.contains(Hint0)) {
     // Ignore the hint if we would have to spill a dirty register.
-    unsigned Cost = calcSpillCost(Hint);
+    unsigned Cost = calcSpillCost(Hint0);
     if (Cost < spillDirty) {
+      LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI)
+                        << '\n');
       if (Cost)
-        definePhysReg(MI, Hint, regFree);
-      assignVirtToPhysReg(LR, Hint);
+        definePhysReg(MI, Hint0, regFree);
+      assignVirtToPhysReg(LR, Hint0);
       return;
+    } else {
+      LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI)
+                        << "occupied\n");
     }
+  } else {
+    Hint0 = 0;
   }
 
-  // First try to find a completely free register.
-  ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
-  for (MCPhysReg PhysReg : AllocationOrder) {
-    if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) {
-      assignVirtToPhysReg(LR, PhysReg);
+  // Try other hint.
+  unsigned Hint1 = traceCopies(VirtReg);
+  if (TargetRegisterInfo::isPhysicalRegister(Hint1) &&
+      MRI->isAllocatable(Hint1) && RC.contains(Hint1) &&
+      !isRegUsedInInstr(Hint1)) {
+    // Ignore the hint if we would have to spill a dirty register.
+    unsigned Cost = calcSpillCost(Hint1);
+    if (Cost < spillDirty) {
+      LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI)
+                        << '\n');
+      if (Cost)
+        definePhysReg(MI, Hint1, regFree);
+      assignVirtToPhysReg(LR, Hint1);
       return;
+    } else {
+      LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI)
+                        << "occupied\n");
     }
+  } else {
+    Hint1 = 0;
   }
 
   MCPhysReg BestReg = 0;
   unsigned BestCost = spillImpossible;
+  ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
   for (MCPhysReg PhysReg : AllocationOrder) {
     LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << ' ');
     unsigned Cost = calcSpillCost(PhysReg);
@@ -602,6 +724,10 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) {
       assignVirtToPhysReg(LR, PhysReg);
       return;
     }
+
+    if (PhysReg == Hint1 || PhysReg == Hint0)
+      Cost -= spillPrefBonus;
+
     if (Cost < BestCost) {
       BestReg = PhysReg;
       BestCost = Cost;
@@ -624,6 +750,31 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) {
   assignVirtToPhysReg(LR, BestReg);
 }
 
+void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
+  assert(MO.isUndef() && "expected undef use");
+  unsigned VirtReg = MO.getReg();
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Expected virtreg");
+
+  LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
+  MCPhysReg PhysReg;
+  if (LRI != LiveVirtRegs.end() && LRI->PhysReg) {
+    PhysReg = LRI->PhysReg;
+  } else {
+    const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+    ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
+    assert(!AllocationOrder.empty() && "Allocation order must not be empty");
+    PhysReg = AllocationOrder[0];
+  }
+
+  unsigned SubRegIdx = MO.getSubReg();
+  if (SubRegIdx != 0) {
+    PhysReg = TRI->getSubReg(PhysReg, SubRegIdx);
+    MO.setSubReg(0);
+  }
+  MO.setReg(PhysReg);
+  MO.setIsRenamable(true);
+}
+
 /// Allocates a register for VirtReg and mark it as dirty.
 MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
                                       unsigned VirtReg, unsigned Hint) {
@@ -941,12 +1092,23 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
 
   // Second scan.
   // Allocate virtreg uses.
+  bool HasUndefUse = false;
   for (unsigned I = 0; I != VirtOpEnd; ++I) {
     MachineOperand &MO = MI.getOperand(I);
     if (!MO.isReg()) continue;
     unsigned Reg = MO.getReg();
     if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
     if (MO.isUse()) {
+      if (MO.isUndef()) {
+        HasUndefUse = true;
+        // There is no need to allocate a register for an undef use.
+        continue;
+      }
+
+      // Populate MayLiveAcrossBlocks in case the use block is allocated before
+      // the def block (removing the vreg uses).
+      mayLiveIn(Reg);
+
       LiveReg &LR = reloadVirtReg(MI, I, Reg, CopyDstReg);
       MCPhysReg PhysReg = LR.PhysReg;
       CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0;
@@ -955,6 +1117,22 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
     }
   }
 
+  // Allocate undef operands. This is a separate step because in a situation
+  // like  ` = OP undef %X, %X`    both operands need the same register assign
+  // so we should perform the normal assignment first.
+  if (HasUndefUse) {
+    for (MachineOperand &MO : MI.uses()) {
+      if (!MO.isReg() || !MO.isUse())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(Reg))
+        continue;
+
+      assert(MO.isUndef() && "Should only have undef virtreg uses left");
+      allocVirtRegUndef(MO);
+    }
+  }
+
   // Track registers defined by instruction - early clobbers and tied uses at
   // this point.
   UsedInInstr.clear();
@@ -979,10 +1157,24 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
     // definitions may be used later on and we do not want to reuse
     // those for virtual registers in between.
     LLVM_DEBUG(dbgs() << "  Spilling remaining registers before call.\n");
-    spillAll(MI);
+    spillAll(MI, /*OnlyLiveOut*/ false);
   }
 
   // Third scan.
+  // Mark all physreg defs as used before allocating virtreg defs.
+  for (unsigned I = 0; I != DefOpEnd; ++I) {
+    const MachineOperand &MO = MI.getOperand(I);
+    if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
+      continue;
+    unsigned Reg = MO.getReg();
+
+    if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg) ||
+        !MRI->isAllocatable(Reg))
+      continue;
+    definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
+  }
+
+  // Fourth scan.
   // Allocate defs and collect dead defs.
   for (unsigned I = 0; I != DefOpEnd; ++I) {
     const MachineOperand &MO = MI.getOperand(I);
@@ -990,11 +1182,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
       continue;
     unsigned Reg = MO.getReg();
 
-    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-      if (!MRI->isAllocatable(Reg)) continue;
-      definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
+    // We have already dealt with phys regs in the previous scan.
+    if (TargetRegisterInfo::isPhysicalRegister(Reg))
       continue;
-    }
     MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg);
     if (setPhysReg(MI, MI.getOperand(I), PhysReg)) {
       VirtDead.push_back(Reg);
@@ -1089,7 +1279,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
 
   // Spill all physical registers holding virtual registers now.
   LLVM_DEBUG(dbgs() << "Spilling live registers at end of block.\n");
-  spillAll(MBB.getFirstTerminator());
+  spillAll(MBB.getFirstTerminator(), /*OnlyLiveOut*/ true);
 
   // Erase all the coalesced copies. We are delaying it until now because
   // LiveVirtRegs might refer to the instrs.
@@ -1118,6 +1308,8 @@ bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
   unsigned NumVirtRegs = MRI->getNumVirtRegs();
   StackSlotForVirtReg.resize(NumVirtRegs);
   LiveVirtRegs.setUniverse(NumVirtRegs);
+  MayLiveAcrossBlocks.clear();
+  MayLiveAcrossBlocks.resize(NumVirtRegs);
 
   // Loop over all of the basic blocks, eliminating virtual register references
   for (MachineBasicBlock &MBB : MF)
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 81b21b442437..771fc46415db 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -1,9 +1,8 @@
 //===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -138,7 +137,7 @@ CSRFirstTimeCost("regalloc-csr-first-time-cost",
               cl::init(0), cl::Hidden);
 
 static cl::opt<bool> ConsiderLocalIntervalCost(
-    "condsider-local-interval-cost", cl::Hidden,
+    "consider-local-interval-cost", cl::Hidden,
     cl::desc("Consider the cost of local intervals created by a split "
              "candidate when choosing the best split candidate."),
     cl::init(false));
@@ -465,7 +464,8 @@ private:
   void calcGapWeights(unsigned, SmallVectorImpl<float>&);
   unsigned canReassign(LiveInterval &VirtReg, unsigned PrevReg);
   bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
-  bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
+  bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&,
+                            const SmallVirtRegSet&);
   bool canEvictInterferenceInRange(LiveInterval &VirtReg, unsigned PhysReg,
                                    SlotIndex Start, SlotIndex End,
                                    EvictionCost &MaxCost);
@@ -479,9 +479,11 @@ private:
                                   const SmallVirtRegSet &FixedRegisters);
 
   unsigned tryAssign(LiveInterval&, AllocationOrder&,
-                     SmallVectorImpl<unsigned>&);
+                     SmallVectorImpl<unsigned>&,
+                     const SmallVirtRegSet&);
   unsigned tryEvict(LiveInterval&, AllocationOrder&,
-                    SmallVectorImpl<unsigned>&, unsigned = ~0u);
+                    SmallVectorImpl<unsigned>&, unsigned,
+                    const SmallVirtRegSet&);
   unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
                           SmallVectorImpl<unsigned>&);
   unsigned isSplitBenefitWorthCost(LiveInterval &VirtReg);
@@ -508,7 +510,8 @@ private:
   unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
     SmallVectorImpl<unsigned>&);
   unsigned trySplit(LiveInterval&, AllocationOrder&,
-                    SmallVectorImpl<unsigned>&);
+                    SmallVectorImpl<unsigned>&,
+                    const SmallVirtRegSet&);
   unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &,
                                    SmallVectorImpl<unsigned> &,
                                    SmallVirtRegSet &, unsigned);
@@ -758,7 +761,8 @@ LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
 /// tryAssign - Try to assign VirtReg to an available register.
 unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
                              AllocationOrder &Order,
-                             SmallVectorImpl<unsigned> &NewVRegs) {
+                             SmallVectorImpl<unsigned> &NewVRegs,
+                             const SmallVirtRegSet &FixedRegisters) {
   Order.rewind();
   unsigned PhysReg;
   while ((PhysReg = Order.next()))
@@ -776,7 +780,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
       LLVM_DEBUG(dbgs() << "missed hint " << printReg(Hint, TRI) << '\n');
       EvictionCost MaxCost;
       MaxCost.setBrokenHints(1);
-      if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
+      if (canEvictInterference(VirtReg, Hint, true, MaxCost, FixedRegisters)) {
         evictInterference(VirtReg, Hint, NewVRegs);
         return Hint;
       }
@@ -794,7 +798,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
 
   LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
                     << Cost << '\n');
-  unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost);
+  unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
   return CheapReg ? CheapReg : PhysReg;
 }
 
@@ -866,7 +870,8 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
 ///                when returning true.
 /// @returns True when interference can be evicted cheaper than MaxCost.
 bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
-                                    bool IsHint, EvictionCost &MaxCost) {
+                                    bool IsHint, EvictionCost &MaxCost,
+                                    const SmallVirtRegSet &FixedRegisters) {
   // It is only possible to evict virtual register interference.
   if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
     return false;
@@ -896,6 +901,13 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
       LiveInterval *Intf = Q.interferingVRegs()[i - 1];
       assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) &&
              "Only expecting virtual register interference from query");
+
+      // Do not allow eviction of a virtual register if we are in the middle
+      // of last-chance recoloring and this virtual register is one that we
+      // have scavenged a physical register for.
+      if (FixedRegisters.count(Intf->reg))
+        return false;
+
       // Never evict spill products. They cannot split or spill.
       if (getStage(*Intf) == RS_Done)
         return false;
@@ -1094,7 +1106,8 @@ bool RAGreedy::isUnusedCalleeSavedReg(unsigned PhysReg) const {
 unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
                             AllocationOrder &Order,
                             SmallVectorImpl<unsigned> &NewVRegs,
-                            unsigned CostPerUseLimit) {
+                            unsigned CostPerUseLimit,
+                            const SmallVirtRegSet &FixedRegisters) {
   NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
                      TimePassesIsEnabled);
 
@@ -1142,7 +1155,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
       continue;
     }
 
-    if (!canEvictInterference(VirtReg, PhysReg, false, BestCost))
+    if (!canEvictInterference(VirtReg, PhysReg, false, BestCost,
+                              FixedRegisters))
       continue;
 
     // Best so far.
@@ -2248,8 +2262,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
     ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
     LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
     // Constrain to VirtReg's live range.
-    unsigned ri = std::lower_bound(RMS.begin(), RMS.end(),
-                                   Uses.front().getRegSlot()) - RMS.begin();
+    unsigned ri =
+        llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
     unsigned re = RMS.size();
     for (unsigned i = 0; i != NumGaps && ri != re; ++i) {
       // Look for Uses[i] <= RMS <= Uses[i+1].
@@ -2444,7 +2458,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
 /// assignable.
 /// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
 unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
-                            SmallVectorImpl<unsigned>&NewVRegs) {
+                            SmallVectorImpl<unsigned>&NewVRegs,
+                            const SmallVirtRegSet &FixedRegisters) {
   // Ranges must be Split2 or less.
   if (getStage(VirtReg) >= RS_Spill)
     return 0;
@@ -2472,7 +2487,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
   if (SA->didRepairRange()) {
     // VirtReg has changed, so all cached queries are invalid.
     Matrix->invalidateVirtRegs();
-    if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
+    if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters))
       return PhysReg;
   }
 
@@ -2611,6 +2626,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
   DenseMap<unsigned, unsigned> VirtRegToPhysReg;
   // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
   // this recoloring "session".
+  assert(!FixedRegisters.count(VirtReg.reg));
   FixedRegisters.insert(VirtReg.reg);
   SmallVector<unsigned, 4> CurrentNewVRegs;
 
@@ -2858,14 +2874,14 @@ void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) {
     if (!Instr.isFullCopy())
       continue;
     // Look for the other end of the copy.
-    unsigned OtherReg = Instr.getOperand(0).getReg();
+    Register OtherReg = Instr.getOperand(0).getReg();
     if (OtherReg == Reg) {
       OtherReg = Instr.getOperand(1).getReg();
       if (OtherReg == Reg)
         continue;
     }
     // Get the current assignment.
-    unsigned OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg)
+    Register OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg)
                                 ? OtherReg
                                 : VRM->getPhys(OtherReg);
     // Push the collected information.
@@ -3022,7 +3038,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
   unsigned CostPerUseLimit = ~0u;
   // First try assigning a free register.
   AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
-  if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) {
+  if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
     // If VirtReg got an assignment, the eviction info is no longre relevant.
     LastEvicted.clearEvicteeInfo(VirtReg.reg);
     // When NewVRegs is not empty, we may have made decisions such as evicting
@@ -3049,7 +3065,8 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
   // get a second chance until they have been split.
   if (Stage != RS_Split)
     if (unsigned PhysReg =
-            tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) {
+            tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
+                     FixedRegisters)) {
       unsigned Hint = MRI->getSimpleHint(VirtReg.reg);
       // If VirtReg has a hint and that hint is broken record this
       // virtual register as a recoloring candidate for broken hint.
@@ -3079,7 +3096,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
   if (Stage < RS_Spill) {
     // Try splitting VirtReg or interferences.
     unsigned NewVRegSizeBefore = NewVRegs.size();
-    unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
+    unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
     if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) {
       // If VirtReg got split, the eviction info is no longre relevant.
       LastEvicted.clearEvicteeInfo(VirtReg.reg);
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index c19001c8403d..7a5a6c148ed4 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -1,9 +1,8 @@
 //===- RegAllocPBQP.cpp ---- PBQP Register Allocator ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp
index 66c7c5cd7dbf..b37dfada7101 100644
--- a/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -1,9 +1,8 @@
 //===-- RegUsageInfoCollector.cpp - Register Usage Information Collector --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -78,14 +77,48 @@ FunctionPass *llvm::createRegUsageInfoCollector() {
   return new RegUsageInfoCollector();
 }
 
+// TODO: Move to hook somwehere?
+
+// Return true if it is useful to track the used registers for IPRA / no CSR
+// optimizations. This is not useful for entry points, and computing the
+// register usage information is expensive.
+static bool isCallableFunction(const MachineFunction &MF) {
+  switch (MF.getFunction().getCallingConv()) {
+  case CallingConv::AMDGPU_VS:
+  case CallingConv::AMDGPU_GS:
+  case CallingConv::AMDGPU_PS:
+  case CallingConv::AMDGPU_CS:
+  case CallingConv::AMDGPU_HS:
+  case CallingConv::AMDGPU_ES:
+  case CallingConv::AMDGPU_LS:
+  case CallingConv::AMDGPU_KERNEL:
+    return false;
+  default:
+    return true;
+  }
+}
+
 bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
   MachineRegisterInfo *MRI = &MF.getRegInfo();
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   const LLVMTargetMachine &TM = MF.getTarget();
 
   LLVM_DEBUG(dbgs() << " -------------------- " << getPassName()
-                    << " -------------------- \n");
-  LLVM_DEBUG(dbgs() << "Function Name : " << MF.getName() << "\n");
+                    << " -------------------- \nFunction Name : "
+                    << MF.getName() << '\n');
+
+  // Analyzing the register usage may be expensive on some targets.
+  if (!isCallableFunction(MF)) {
+    LLVM_DEBUG(dbgs() << "Not analyzing non-callable function\n");
+    return false;
+  }
+
+  // If there are no callers, there's no point in computing more precise
+  // register usage here.
+  if (MF.getFunction().use_empty()) {
+    LLVM_DEBUG(dbgs() << "Not analyzing function with no callers\n");
+    return false;
+  }
 
   std::vector<uint32_t> RegMask;
 
@@ -111,6 +144,7 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
   };
   // Scan all the physical registers. When a register is defined in the current
   // function set it and all the aliasing registers as defined in the regmask.
+  // FIXME: Rewrite to use regunits.
   for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
     // Don't count registers that are saved and restored.
     if (SavedRegs.test(PReg))
@@ -136,11 +170,14 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
                       << " function optimized for not having CSR.\n");
   }
 
-  for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)
-    if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg))
-      LLVM_DEBUG(dbgs() << printReg(PReg, TRI) << " ");
+  LLVM_DEBUG(
+    for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
+      if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg))
+        dbgs() << printReg(PReg, TRI) << " ";
+    }
 
-  LLVM_DEBUG(dbgs() << " \n----------------------------------------\n");
+    dbgs() << " \n----------------------------------------\n";
+  );
 
   PRUI.storeUpdateRegUsageInfo(F, RegMask);
 
@@ -155,38 +192,17 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
   // Target will return the set of registers that it saves/restores as needed.
   SavedRegs.clear();
   TFI.determineCalleeSaves(MF, SavedRegs);
+  if (SavedRegs.none())
+    return;
 
   // Insert subregs.
   const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
   for (unsigned i = 0; CSRegs[i]; ++i) {
-    unsigned Reg = CSRegs[i];
-    if (SavedRegs.test(Reg))
-      for (MCSubRegIterator SR(Reg, &TRI, false); SR.isValid(); ++SR)
+    MCPhysReg Reg = CSRegs[i];
+    if (SavedRegs.test(Reg)) {
+      // Save subregisters
+      for (MCSubRegIterator SR(Reg, &TRI); SR.isValid(); ++SR)
         SavedRegs.set(*SR);
-  }
-
-  // Insert any register fully saved via subregisters.
-  for (const TargetRegisterClass *RC : TRI.regclasses()) {
-    if (!RC->CoveredBySubRegs)
-       continue;
-
-    for (unsigned PReg = 1, PRegE = TRI.getNumRegs(); PReg < PRegE; ++PReg) {
-      if (SavedRegs.test(PReg))
-        continue;
-
-      // Check if PReg is fully covered by its subregs.
-      if (!RC->contains(PReg))
-        continue;
-
-      // Add PReg to SavedRegs if all subregs are saved.
-      bool AllSubRegsSaved = true;
-      for (MCSubRegIterator SR(PReg, &TRI, false); SR.isValid(); ++SR)
-        if (!SavedRegs.test(*SR)) {
-          AllSubRegsSaved = false;
-          break;
-        }
-      if (AllSubRegsSaved)
-        SavedRegs.set(PReg);
     }
   }
 }
diff --git a/lib/CodeGen/RegUsageInfoPropagate.cpp b/lib/CodeGen/RegUsageInfoPropagate.cpp
index 256de295821d..fc4be82d215e 100644
--- a/lib/CodeGen/RegUsageInfoPropagate.cpp
+++ b/lib/CodeGen/RegUsageInfoPropagate.cpp
@@ -1,9 +1,8 @@
 //=--- RegUsageInfoPropagate.cpp - Register Usage Informartion Propagation --=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index add8faec97d4..530e0cccf1d4 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -1,9 +1,8 @@
 //===- RegisterClassInfo.cpp - Dynamic Register Class Info ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -91,6 +90,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
 void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
   assert(RC && "no register class given");
   RCInfo &RCI = RegClass[RC->getID()];
+  auto &STI = MF->getSubtarget();
 
   // Raw register count, including all reserved regs.
   unsigned NumRegs = RC->getNumRegs();
@@ -115,7 +115,8 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
     unsigned Cost = TRI->getCostPerUse(PhysReg);
     MinCost = std::min(MinCost, Cost);
 
-    if (CalleeSavedAliases[PhysReg])
+    if (CalleeSavedAliases[PhysReg] &&
+        !STI.ignoreCSRForAllocationOrder(*MF, PhysReg))
       // PhysReg aliases a CSR, save it for later.
       CSRAlias.push_back(PhysReg);
     else {
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 2a06d5e95fbb..2db6ab454cea 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -1,9 +1,8 @@
 //===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -105,6 +104,19 @@ static cl::opt<unsigned> LateRematUpdateThreshold(
              "repeated work. "),
     cl::init(100));
 
+static cl::opt<unsigned> LargeIntervalSizeThreshold(
+    "large-interval-size-threshold", cl::Hidden,
+    cl::desc("If the valnos size of an interval is larger than the threshold, "
+             "it is regarded as a large interval. "),
+    cl::init(100));
+
+static cl::opt<unsigned> LargeIntervalFreqThreshold(
+    "large-interval-freq-threshold", cl::Hidden,
+    cl::desc("For a large interval, if it is coalesed with other live "
+             "intervals many times more than the threshold, stop its "
+             "coalescing to control the compile time. "),
+    cl::init(100));
+
 namespace {
 
   class RegisterCoalescer : public MachineFunctionPass,
@@ -153,6 +165,10 @@ namespace {
     /// lateLiveIntervalUpdate is called.
     DenseSet<unsigned> ToBeUpdated;
 
+    /// Record how many times the large live interval with many valnos
+    /// has been tried to join with other live interval.
+    DenseMap<unsigned, unsigned long> LargeLIVisitCounter;
+
     /// Recursively eliminate dead defs in DeadDefs.
     void eliminateDeadDefs();
 
@@ -195,6 +211,11 @@ namespace {
     /// Attempt joining two virtual registers. Return true on success.
     bool joinVirtRegs(CoalescerPair &CP);
 
+    /// If a live interval has many valnos and is coalesced with other
+    /// live intervals many times, we regard such live interval as having
+    /// high compile time cost.
+    bool isHighCostLiveInterval(LiveInterval &LI);
+
     /// Attempt joining with a reserved physreg.
     bool joinReservedPhysReg(CoalescerPair &CP);
 
@@ -337,9 +358,10 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
                     "Simple Register Coalescing", false, false)
 
-static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
-                        unsigned &Src, unsigned &Dst,
-                        unsigned &SrcSub, unsigned &DstSub) {
+LLVM_NODISCARD static bool isMoveInstr(const TargetRegisterInfo &tri,
+                                       const MachineInstr *MI, unsigned &Src,
+                                       unsigned &Dst, unsigned &SrcSub,
+                                       unsigned &DstSub) {
   if (MI->isCopy()) {
     Dst = MI->getOperand(0).getReg();
     DstSub = MI->getOperand(0).getSubReg();
@@ -672,8 +694,7 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
 
   for (LiveRange::Segment &ASeg : IntA.segments) {
     if (ASeg.valno != AValNo) continue;
-    LiveInterval::iterator BI =
-      std::upper_bound(IntB.begin(), IntB.end(), ASeg.start);
+    LiveInterval::iterator BI = llvm::upper_bound(IntB, ASeg.start);
     if (BI != IntB.begin())
       --BI;
     for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) {
@@ -903,23 +924,32 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
     }
     SlotIndex AIdx = CopyIdx.getRegSlot(true);
     LaneBitmask MaskA;
+    const SlotIndexes &Indexes = *LIS->getSlotIndexes();
     for (LiveInterval::SubRange &SA : IntA.subranges()) {
       VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
-      assert(ASubValNo != nullptr);
+      // Even if we are dealing with a full copy, some lanes can
+      // still be undefined.
+      // E.g.,
+      // undef A.subLow = ...
+      // B = COPY A <== A.subHigh is undefined here and does
+      //                not have a value number.
+      if (!ASubValNo)
+        continue;
       MaskA |= SA.LaneMask;
 
-      IntB.refineSubRanges(Allocator, SA.LaneMask,
-          [&Allocator,&SA,CopyIdx,ASubValNo,&ShrinkB]
-            (LiveInterval::SubRange &SR) {
-        VNInfo *BSubValNo = SR.empty()
-          ? SR.getNextValue(CopyIdx, Allocator)
-          : SR.getVNInfoAt(CopyIdx);
-        assert(BSubValNo != nullptr);
-        auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
-        ShrinkB |= P.second;
-        if (P.first)
-          BSubValNo->def = ASubValNo->def;
-      });
+      IntB.refineSubRanges(
+          Allocator, SA.LaneMask,
+          [&Allocator, &SA, CopyIdx, ASubValNo,
+           &ShrinkB](LiveInterval::SubRange &SR) {
+            VNInfo *BSubValNo = SR.empty() ? SR.getNextValue(CopyIdx, Allocator)
+                                           : SR.getVNInfoAt(CopyIdx);
+            assert(BSubValNo != nullptr);
+            auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
+            ShrinkB |= P.second;
+            if (P.first)
+              BSubValNo->def = ASubValNo->def;
+          },
+          Indexes, *TRI);
     }
     // Go over all subranges of IntB that have not been covered by IntA,
     // and delete the segments starting at CopyIdx. This can happen if
@@ -947,7 +977,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
 
 /// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
 /// predecessor of BB2, and if B is not redefined on the way from A = B
-/// in BB2 to B = A in BB2, B = A in BB2 is partially redundant if the
+/// in BB0 to B = A in BB2, B = A in BB2 is partially redundant if the
 /// execution goes through the path from BB0 to BB2. We may move B = A
 /// to the predecessor without such reversed copy.
 /// So we will transform the program from:
@@ -1494,7 +1524,8 @@ MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
   // CoalescerPair may have a new register class with adjusted subreg indices
   // at this point.
   unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
-  isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
+  if(!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+    return nullptr;
 
   SlotIndex Idx = LIS->getInstructionIndex(*CopyMI);
   const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
@@ -1994,19 +2025,19 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
   if (CP.isFlipped()) {
     // Physreg is copied into vreg
     //   %y = COPY %physreg_x
-    //   ...  //< no other def of %x here
+    //   ...  //< no other def of %physreg_x here
     //   use %y
     // =>
     //   ...
-    //   use %x
+    //   use %physreg_x
     CopyMI = MRI->getVRegDef(SrcReg);
   } else {
     // VReg is copied into physreg:
     //   %y = def
-    //   ... //< no other def or use of %y here
-    //   %y = COPY %physreg_x
+    //   ... //< no other def or use of %physreg_x here
+    //   %physreg_x = COPY %y
     // =>
-    //   %y = def
+    //   %physreg_x = def
     //   ...
     if (!MRI->hasOneNonDBGUse(SrcReg)) {
       LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
@@ -3010,7 +3041,9 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
       // If a subrange starts at the copy then an undefined value has been
       // copied and we must remove that subrange value as well.
       VNInfo *ValueOut = Q.valueOutOrDead();
-      if (ValueOut != nullptr && Q.valueIn() == nullptr) {
+      if (ValueOut != nullptr && (Q.valueIn() == nullptr ||
+                                  (V.Identical && V.Resolution == CR_Erase &&
+                                   ValueOut->def == Def))) {
         LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
                           << " at " << Def << "\n");
         SmallVector<SlotIndex,8> EndPoints;
@@ -3019,7 +3052,7 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
         // Mark value number as unused.
         ValueOut->markUnused();
 
-        if (V.Identical && S.Query(OtherDef).valueOut()) {
+        if (V.Identical && S.Query(OtherDef).valueOutOrDead()) {
           // If V is identical to V.OtherVNI (and S was live at OtherDef),
           // then we can't simply prune V from S. V needs to be replaced
           // with V.OtherVNI.
@@ -3241,16 +3274,29 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
                                           LaneBitmask LaneMask,
                                           CoalescerPair &CP) {
   BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
-  LI.refineSubRanges(Allocator, LaneMask,
-      [this,&Allocator,&ToMerge,&CP](LiveInterval::SubRange &SR) {
-    if (SR.empty()) {
-      SR.assign(ToMerge, Allocator);
-    } else {
-      // joinSubRegRange() destroys the merged range, so we need a copy.
-      LiveRange RangeCopy(ToMerge, Allocator);
-      joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
-    }
-  });
+  LI.refineSubRanges(
+      Allocator, LaneMask,
+      [this, &Allocator, &ToMerge, &CP](LiveInterval::SubRange &SR) {
+        if (SR.empty()) {
+          SR.assign(ToMerge, Allocator);
+        } else {
+          // joinSubRegRange() destroys the merged range, so we need a copy.
+          LiveRange RangeCopy(ToMerge, Allocator);
+          joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
+        }
+      },
+      *LIS->getSlotIndexes(), *TRI);
+}
+
+bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) {
+  if (LI.valnos.size() < LargeIntervalSizeThreshold)
+    return false;
+  auto &Counter = LargeLIVisitCounter[LI.reg];
+  if (Counter < LargeIntervalFreqThreshold) {
+    Counter++;
+    return false;
+  }
+  return true;
 }
 
 bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
@@ -3265,6 +3311,9 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
 
   LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n');
 
+  if (isHighCostLiveInterval(LHS) || isHighCostLiveInterval(RHS))
+    return false;
+
   // First compute NewVNInfo and the simple value mappings.
   // Detect impossible conflicts early.
   if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
@@ -3474,7 +3523,8 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
   if (!UseTerminalRule)
     return false;
   unsigned DstReg, DstSubReg, SrcReg, SrcSubReg;
-  isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg);
+  if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
+    return false;
   // Check if the destination of this copy has any other affinity.
   if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
       // If SrcReg is a physical register, the copy won't be coalesced.
@@ -3498,8 +3548,9 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
     if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
       continue;
     unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg;
-    isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
-                OtherSubReg);
+    if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
+                OtherSubReg))
+      return false;
     if (OtherReg == SrcReg)
       OtherReg = OtherSrcReg;
     // Check if OtherReg is a non-terminal.
@@ -3620,6 +3671,7 @@ void RegisterCoalescer::releaseMemory() {
   WorkList.clear();
   DeadDefs.clear();
   InflateRegs.clear();
+  LargeLIVisitCounter.clear();
 }
 
 bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h
index 1a46f6d053e6..f505d46cd338 100644
--- a/lib/CodeGen/RegisterCoalescer.h
+++ b/lib/CodeGen/RegisterCoalescer.h
@@ -1,9 +1,8 @@
 //===- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 1099e468e885..7d9b3aa9b2d7 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -1,9 +1,8 @@
 //===- RegisterPressure.cpp - Dynamic Register Pressure -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -846,7 +845,7 @@ void RegPressureTracker::recedeSkipDebugValues() {
   CurrPos = skipDebugInstructionsBackward(std::prev(CurrPos), MBB->begin());
 
   SlotIndex SlotIdx;
-  if (RequireIntervals)
+  if (RequireIntervals && !CurrPos->isDebugInstr())
     SlotIdx = LIS->getInstructionIndex(*CurrPos).getRegSlot();
 
   // Open the top of the region using slot indexes.
@@ -856,6 +855,12 @@ void RegPressureTracker::recedeSkipDebugValues() {
 
 void RegPressureTracker::recede(SmallVectorImpl<RegisterMaskPair> *LiveUses) {
   recedeSkipDebugValues();
+  if (CurrPos->isDebugValue()) {
+    // It's possible to only have debug_value instructions and hit the start of
+    // the block.
+    assert(CurrPos == MBB->begin());
+    return;
+  }
 
   const MachineInstr &MI = *CurrPos;
   RegisterOperands RegOpers;
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 3660586c1358..bb19110e6d70 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -1,9 +1,8 @@
 //===- RegisterScavenging.cpp - Machine register scavenging ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -534,7 +533,7 @@ RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj,
 
 unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
                                         MachineBasicBlock::iterator I,
-                                        int SPAdj) {
+                                        int SPAdj, bool AllowSpill) {
   MachineInstr &MI = *I;
   const MachineFunction &MF = *MI.getMF();
   // Consider all allocatable registers in the register class initially
@@ -565,6 +564,9 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
     return SReg;
   }
 
+  if (!AllowSpill)
+    return 0;
+
   ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI);
   Scavenged.Restore = &*std::prev(UseMI);
 
@@ -576,7 +578,8 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
 
 unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
                                                  MachineBasicBlock::iterator To,
-                                                 bool RestoreAfter, int SPAdj) {
+                                                 bool RestoreAfter, int SPAdj,
+                                                 bool AllowSpill) {
   const MachineBasicBlock &MBB = *To->getParent();
   const MachineFunction &MF = *MBB.getParent();
 
@@ -590,21 +593,25 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
   MachineBasicBlock::iterator SpillBefore = P.second;
   assert(Reg != 0 && "No register left to scavenge!");
   // Found an available register?
-  if (SpillBefore != MBB.end()) {
-    MachineBasicBlock::iterator ReloadAfter =
-      RestoreAfter ? std::next(MBBI) : MBBI;
-    MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter);
-    if (ReloadBefore != MBB.end())
-      LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n');
-    ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore);
-    Scavenged.Restore = &*std::prev(SpillBefore);
-    LiveUnits.removeReg(Reg);
-    LLVM_DEBUG(dbgs() << "Scavenged register with spill: " << printReg(Reg, TRI)
-                      << " until " << *SpillBefore);
-  } else {
+  if (SpillBefore == MBB.end()) {
     LLVM_DEBUG(dbgs() << "Scavenged free register: " << printReg(Reg, TRI)
-                      << '\n');
+               << '\n');
+    return Reg;
   }
+
+  if (!AllowSpill)
+    return 0;
+
+  MachineBasicBlock::iterator ReloadAfter =
+    RestoreAfter ? std::next(MBBI) : MBBI;
+  MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter);
+  if (ReloadBefore != MBB.end())
+    LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n');
+  ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore);
+  Scavenged.Restore = &*std::prev(SpillBefore);
+  LiveUnits.removeReg(Reg);
+  LLVM_DEBUG(dbgs() << "Scavenged register with spill: " << printReg(Reg, TRI)
+             << " until " << *SpillBefore);
   return Reg;
 }
 
diff --git a/lib/CodeGen/RegisterUsageInfo.cpp b/lib/CodeGen/RegisterUsageInfo.cpp
index 6b9880a8913f..6858d7233bc5 100644
--- a/lib/CodeGen/RegisterUsageInfo.cpp
+++ b/lib/CodeGen/RegisterUsageInfo.cpp
@@ -1,9 +1,8 @@
 //===- RegisterUsageInfo.cpp - Register Usage Information Storage ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp
index 156d1c81c238..22cff48c3051 100644
--- a/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -1,9 +1,8 @@
 //===-- RenameIndependentSubregs.cpp - Live Interval Analysis -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/ResetMachineFunctionPass.cpp b/lib/CodeGen/ResetMachineFunctionPass.cpp
index a02302e6ff99..019de6554d2a 100644
--- a/lib/CodeGen/ResetMachineFunctionPass.cpp
+++ b/lib/CodeGen/ResetMachineFunctionPass.cpp
@@ -1,9 +1,8 @@
 //===-- ResetMachineFunctionPass.cpp - Reset Machine Function ----*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -27,6 +26,7 @@ using namespace llvm;
 #define DEBUG_TYPE "reset-machine-function"
 
 STATISTIC(NumFunctionsReset, "Number of functions reset");
+STATISTIC(NumFunctionsVisited, "Number of functions visited");
 
 namespace {
   class ResetMachineFunction : public MachineFunctionPass {
@@ -51,6 +51,7 @@ namespace {
     }
 
     bool runOnMachineFunction(MachineFunction &MF) override {
+      ++NumFunctionsVisited;
       // No matter what happened, whether we successfully selected the function
       // or not, nothing is going to use the vreg types after us. Make sure they
       // disappear.
diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp
index c356fb57ac6d..a6bc7330e2cc 100644
--- a/lib/CodeGen/SafeStack.cpp
+++ b/lib/CodeGen/SafeStack.cpp
@@ -1,9 +1,8 @@
 //===- SafeStack.cpp - Safe Stack Insertion -------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -372,7 +371,7 @@ Value *SafeStack::getStackGuard(IRBuilder<> &IRB, Function &F) {
   if (!StackGuardVar)
     StackGuardVar =
         F.getParent()->getOrInsertGlobal("__stack_chk_guard", StackPtrTy);
-  return IRB.CreateLoad(StackGuardVar, "StackGuard");
+  return IRB.CreateLoad(StackPtrTy, StackGuardVar, "StackGuard");
 }
 
 void SafeStack::findInsts(Function &F,
@@ -453,7 +452,8 @@ SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F,
     ++NumUnsafeStackRestorePoints;
 
     IRB.SetInsertPoint(I->getNextNode());
-    Value *CurrentTop = DynamicTop ? IRB.CreateLoad(DynamicTop) : StaticTop;
+    Value *CurrentTop =
+        DynamicTop ? IRB.CreateLoad(StackPtrTy, DynamicTop) : StaticTop;
     IRB.CreateStore(CurrentTop, UnsafeStackPtr);
   }
 
@@ -462,7 +462,7 @@ SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F,
 
 void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,
                                 AllocaInst *StackGuardSlot, Value *StackGuard) {
-  Value *V = IRB.CreateLoad(StackGuardSlot);
+  Value *V = IRB.CreateLoad(StackPtrTy, StackGuardSlot);
   Value *Cmp = IRB.CreateICmpNE(StackGuard, V);
 
   auto SuccessProb = BranchProbabilityInfo::getBranchProbStackProtector(true);
@@ -475,8 +475,8 @@ void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,
                                 /* Unreachable */ true, Weights);
   IRBuilder<> IRBFail(CheckTerm);
   // FIXME: respect -fsanitize-trap / -ftrap-function here?
-  Constant *StackChkFail = F.getParent()->getOrInsertFunction(
-      "__stack_chk_fail", IRB.getVoidTy());
+  FunctionCallee StackChkFail =
+      F.getParent()->getOrInsertFunction("__stack_chk_fail", IRB.getVoidTy());
   IRBFail.CreateCall(StackChkFail, {});
 }
 
@@ -550,7 +550,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
 
   if (StackGuardSlot) {
     unsigned Offset = SSL.getObjectOffset(StackGuardSlot);
-    Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8*
+    Value *Off = IRB.CreateGEP(Int8Ty, BasePointer, // BasePointer is i8*
                                ConstantInt::get(Int32Ty, -Offset));
     Value *NewAI =
         IRB.CreateBitCast(Off, StackGuardSlot->getType(), "StackGuardSlot");
@@ -569,14 +569,14 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
     if (Size == 0)
       Size = 1; // Don't create zero-sized stack objects.
 
-    Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8*
+    Value *Off = IRB.CreateGEP(Int8Ty, BasePointer, // BasePointer is i8*
                                ConstantInt::get(Int32Ty, -Offset));
     Value *NewArg = IRB.CreateBitCast(Off, Arg->getType(),
                                      Arg->getName() + ".unsafe-byval");
 
     // Replace alloc with the new location.
     replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB,
-                      DIExpression::NoDeref, -Offset, DIExpression::NoDeref);
+                      DIExpression::ApplyOffset, -Offset);
     Arg->replaceAllUsesWith(NewArg);
     IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode());
     IRB.CreateMemCpy(Off, Align, Arg, Arg->getParamAlignment(), Size);
@@ -587,12 +587,8 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
     IRB.SetInsertPoint(AI);
     unsigned Offset = SSL.getObjectOffset(AI);
 
-    uint64_t Size = getStaticAllocaAllocationSize(AI);
-    if (Size == 0)
-      Size = 1; // Don't create zero-sized stack objects.
-
-    replaceDbgDeclareForAlloca(AI, BasePointer, DIB, DIExpression::NoDeref,
-                               -Offset, DIExpression::NoDeref);
+    replaceDbgDeclareForAlloca(AI, BasePointer, DIB, DIExpression::ApplyOffset,
+                               -Offset);
     replaceDbgValueForAlloca(AI, BasePointer, DIB, -Offset);
 
     // Replace uses of the alloca with the new location.
@@ -609,20 +605,16 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
         InsertBefore = User;
 
       IRBuilder<> IRBUser(InsertBefore);
-      Value *Off = IRBUser.CreateGEP(BasePointer, // BasePointer is i8*
+      Value *Off = IRBUser.CreateGEP(Int8Ty, BasePointer, // BasePointer is i8*
                                      ConstantInt::get(Int32Ty, -Offset));
       Value *Replacement = IRBUser.CreateBitCast(Off, AI->getType(), Name);
 
-      if (auto *PHI = dyn_cast<PHINode>(User)) {
+      if (auto *PHI = dyn_cast<PHINode>(User))
         // PHI nodes may have multiple incoming edges from the same BB (why??),
         // all must be updated at once with the same incoming value.
-        auto *BB = PHI->getIncomingBlock(U);
-        for (unsigned I = 0; I < PHI->getNumIncomingValues(); ++I)
-          if (PHI->getIncomingBlock(I) == BB)
-            PHI->setIncomingValue(I, Replacement);
-      } else {
+        PHI->setIncomingValueForBlock(PHI->getIncomingBlock(U), Replacement);
+      else
         U.set(Replacement);
-      }
     }
 
     AI->eraseFromParent();
@@ -637,7 +629,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
   IRB.SetInsertPoint(BasePointer->getNextNode());
 
   Value *StaticTop =
-      IRB.CreateGEP(BasePointer, ConstantInt::get(Int32Ty, -FrameSize),
+      IRB.CreateGEP(Int8Ty, BasePointer, ConstantInt::get(Int32Ty, -FrameSize),
                     "unsafe_stack_static_top");
   IRB.CreateStore(StaticTop, UnsafeStackPtr);
   return StaticTop;
@@ -660,7 +652,8 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
     uint64_t TySize = DL.getTypeAllocSize(Ty);
     Value *Size = IRB.CreateMul(ArraySize, ConstantInt::get(IntPtrTy, TySize));
 
-    Value *SP = IRB.CreatePtrToInt(IRB.CreateLoad(UnsafeStackPtr), IntPtrTy);
+    Value *SP = IRB.CreatePtrToInt(IRB.CreateLoad(StackPtrTy, UnsafeStackPtr),
+                                   IntPtrTy);
     SP = IRB.CreateSub(SP, Size);
 
     // Align the SP value to satisfy the AllocaInst, type and stack alignments.
@@ -682,8 +675,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
     if (AI->hasName() && isa<Instruction>(NewAI))
       NewAI->takeName(AI);
 
-    replaceDbgDeclareForAlloca(AI, NewAI, DIB, DIExpression::NoDeref, 0,
-                               DIExpression::NoDeref);
+    replaceDbgDeclareForAlloca(AI, NewAI, DIB, DIExpression::ApplyOffset, 0);
     AI->replaceAllUsesWith(NewAI);
     AI->eraseFromParent();
   }
@@ -698,7 +690,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
 
       if (II->getIntrinsicID() == Intrinsic::stacksave) {
         IRBuilder<> IRB(II);
-        Instruction *LI = IRB.CreateLoad(UnsafeStackPtr);
+        Instruction *LI = IRB.CreateLoad(StackPtrTy, UnsafeStackPtr);
         LI->takeName(II);
         II->replaceAllUsesWith(LI);
         II->eraseFromParent();
@@ -727,7 +719,7 @@ void SafeStack::TryInlinePointerAddress() {
   if (!isa<CallInst>(UnsafeStackPtr))
     return;
 
-  if(F.hasFnAttribute(Attribute::OptimizeNone))
+  if(F.hasOptNone())
     return;
 
   CallSite CS(UnsafeStackPtr);
@@ -783,7 +775,7 @@ bool SafeStack::run() {
   if (DISubprogram *SP = F.getSubprogram())
     IRB.SetCurrentDebugLocation(DebugLoc::get(SP->getScopeLine(), 0, SP));
   if (SafeStackUsePointerAddress) {
-    Value *Fn = F.getParent()->getOrInsertFunction(
+    FunctionCallee Fn = F.getParent()->getOrInsertFunction(
         "__safestack_pointer_address", StackPtrTy->getPointerTo(0));
     UnsafeStackPtr = IRB.CreateCall(Fn);
   } else {
@@ -793,7 +785,7 @@ bool SafeStack::run() {
   // Load the current stack pointer (we'll also use it as a base pointer).
   // FIXME: use a dedicated register for it ?
   Instruction *BasePointer =
-      IRB.CreateLoad(UnsafeStackPtr, false, "unsafe_stack_ptr");
+      IRB.CreateLoad(StackPtrTy, UnsafeStackPtr, false, "unsafe_stack_ptr");
   assert(BasePointer->getType() == StackPtrTy);
 
   AllocaInst *StackGuardSlot = nullptr;
diff --git a/lib/CodeGen/SafeStackColoring.cpp b/lib/CodeGen/SafeStackColoring.cpp
index 726c38002817..04a5c4b6d892 100644
--- a/lib/CodeGen/SafeStackColoring.cpp
+++ b/lib/CodeGen/SafeStackColoring.cpp
@@ -1,9 +1,8 @@
 //===- SafeStackColoring.cpp - SafeStack frame coloring -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/SafeStackColoring.h b/lib/CodeGen/SafeStackColoring.h
index 902e63ebeb7e..b696b1b6baed 100644
--- a/lib/CodeGen/SafeStackColoring.h
+++ b/lib/CodeGen/SafeStackColoring.h
@@ -1,9 +1,8 @@
 //===- SafeStackColoring.h - SafeStack frame coloring ----------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/SafeStackLayout.cpp b/lib/CodeGen/SafeStackLayout.cpp
index 07b6a5d1883b..09964866e4d3 100644
--- a/lib/CodeGen/SafeStackLayout.cpp
+++ b/lib/CodeGen/SafeStackLayout.cpp
@@ -1,9 +1,8 @@
 //===- SafeStackLayout.cpp - SafeStack frame layout -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/SafeStackLayout.h b/lib/CodeGen/SafeStackLayout.h
index ac531d800f6e..349d9a8b595c 100644
--- a/lib/CodeGen/SafeStackLayout.h
+++ b/lib/CodeGen/SafeStackLayout.h
@@ -1,9 +1,8 @@
 //===- SafeStackLayout.h - SafeStack frame layout --------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
index 2684f92b3a93..7776dffb4e9c 100644
--- a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -1,10 +1,9 @@
 //===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
 //                                    instrinsics
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -124,7 +123,7 @@ static bool isConstantIntVector(Value *Mask) {
 //  %10 = extractelement <16 x i1> %mask, i32 2
 //  br i1 %10, label %cond.load4, label %else5
 //
-static void scalarizeMaskedLoad(CallInst *CI) {
+static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) {
   Value *Ptr = CI->getArgOperand(0);
   Value *Alignment = CI->getArgOperand(1);
   Value *Mask = CI->getArgOperand(2);
@@ -144,7 +143,7 @@ static void scalarizeMaskedLoad(CallInst *CI) {
 
   // Short-cut if the mask is all-true.
   if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
-    Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal);
+    Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal);
     CI->replaceAllUsesWith(NewI);
     CI->eraseFromParent();
     return;
@@ -152,9 +151,9 @@ static void scalarizeMaskedLoad(CallInst *CI) {
 
   // Adjust alignment for the scalar instruction.
   AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
-  // Bitcast %addr fron i8* to EltTy*
+  // Bitcast %addr from i8* to EltTy*
   Type *NewPtrType =
-      EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
+      EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
   Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
   unsigned VectorWidth = VecType->getNumElements();
 
@@ -165,11 +164,9 @@ static void scalarizeMaskedLoad(CallInst *CI) {
     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
         continue;
-      Value *Gep =
-          Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
-      LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
-      VResult =
-          Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
+      Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
+      LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AlignVal);
+      VResult = Builder.CreateInsertElement(VResult, Load, Idx);
     }
     CI->replaceAllUsesWith(VResult);
     CI->eraseFromParent();
@@ -184,8 +181,7 @@ static void scalarizeMaskedLoad(CallInst *CI) {
     //  br i1 %mask_1, label %cond.load, label %else
     //
 
-    Value *Predicate =
-        Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
+    Value *Predicate = Builder.CreateExtractElement(Mask, Idx);
 
     // Create "cond" block
     //
@@ -197,11 +193,9 @@ static void scalarizeMaskedLoad(CallInst *CI) {
                                                      "cond.load");
     Builder.SetInsertPoint(InsertPt);
 
-    Value *Gep =
-        Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
-    LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
-    Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
-                                                    Builder.getInt32(Idx));
+    Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
+    LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AlignVal);
+    Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
 
     // Create "else" block, fill it in the next iteration
     BasicBlock *NewIfBlock =
@@ -222,6 +216,8 @@ static void scalarizeMaskedLoad(CallInst *CI) {
 
   CI->replaceAllUsesWith(VResult);
   CI->eraseFromParent();
+
+  ModifiedDT = true;
 }
 
 // Translate a masked store intrinsic, like
@@ -250,7 +246,7 @@ static void scalarizeMaskedLoad(CallInst *CI) {
 //   store i32 %6, i32* %7
 //   br label %else2
 //   . . .
-static void scalarizeMaskedStore(CallInst *CI) {
+static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) {
   Value *Src = CI->getArgOperand(0);
   Value *Ptr = CI->getArgOperand(1);
   Value *Alignment = CI->getArgOperand(2);
@@ -276,9 +272,9 @@ static void scalarizeMaskedStore(CallInst *CI) {
 
   // Adjust alignment for the scalar instruction.
   AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
-  // Bitcast %addr fron i8* to EltTy*
+  // Bitcast %addr from i8* to EltTy*
   Type *NewPtrType =
-      EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
+      EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
   Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
   unsigned VectorWidth = VecType->getNumElements();
 
@@ -286,9 +282,8 @@ static void scalarizeMaskedStore(CallInst *CI) {
     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
         continue;
-      Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
-      Value *Gep =
-          Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+      Value *OneElt = Builder.CreateExtractElement(Src, Idx);
+      Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
       Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
     }
     CI->eraseFromParent();
@@ -301,8 +296,7 @@ static void scalarizeMaskedStore(CallInst *CI) {
     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
     //  br i1 %mask_1, label %cond.store, label %else
     //
-    Value *Predicate =
-        Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
+    Value *Predicate = Builder.CreateExtractElement(Mask, Idx);
 
     // Create "cond" block
     //
@@ -314,9 +308,8 @@ static void scalarizeMaskedStore(CallInst *CI) {
         IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
     Builder.SetInsertPoint(InsertPt);
 
-    Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
-    Value *Gep =
-        Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+    Value *OneElt = Builder.CreateExtractElement(Src, Idx);
+    Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
     Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
 
     // Create "else" block, fill it in the next iteration
@@ -329,6 +322,8 @@ static void scalarizeMaskedStore(CallInst *CI) {
     IfBlock = NewIfBlock;
   }
   CI->eraseFromParent();
+
+  ModifiedDT = true;
 }
 
 // Translate a masked gather intrinsic like
@@ -360,13 +355,14 @@ static void scalarizeMaskedStore(CallInst *CI) {
 // . . .
 // %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
 // ret <16 x i32> %Result
-static void scalarizeMaskedGather(CallInst *CI) {
+static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) {
   Value *Ptrs = CI->getArgOperand(0);
   Value *Alignment = CI->getArgOperand(1);
   Value *Mask = CI->getArgOperand(2);
   Value *Src0 = CI->getArgOperand(3);
 
   VectorType *VecType = cast<VectorType>(CI->getType());
+  Type *EltTy = VecType->getElementType();
 
   IRBuilder<> Builder(CI->getContext());
   Instruction *InsertPt = CI;
@@ -385,12 +381,11 @@ static void scalarizeMaskedGather(CallInst *CI) {
     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
       if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
         continue;
-      Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
-                                                "Ptr" + Twine(Idx));
+      Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
       LoadInst *Load =
-          Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
-      VResult = Builder.CreateInsertElement(
-          VResult, Load, Builder.getInt32(Idx), "Res" + Twine(Idx));
+          Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
+      VResult =
+          Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
     }
     CI->replaceAllUsesWith(VResult);
     CI->eraseFromParent();
@@ -404,8 +399,8 @@ static void scalarizeMaskedGather(CallInst *CI) {
     //  br i1 %Mask1, label %cond.load, label %else
     //
 
-    Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
-                                                    "Mask" + Twine(Idx));
+    Value *Predicate =
+        Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
 
     // Create "cond" block
     //
@@ -416,13 +411,11 @@ static void scalarizeMaskedGather(CallInst *CI) {
     BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
     Builder.SetInsertPoint(InsertPt);
 
-    Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
-                                              "Ptr" + Twine(Idx));
+    Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
     LoadInst *Load =
-        Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
-    Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
-                                                    Builder.getInt32(Idx),
-                                                    "Res" + Twine(Idx));
+        Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
+    Value *NewVResult =
+        Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
 
     // Create "else" block, fill it in the next iteration
     BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
@@ -441,6 +434,8 @@ static void scalarizeMaskedGather(CallInst *CI) {
 
   CI->replaceAllUsesWith(VResult);
   CI->eraseFromParent();
+
+  ModifiedDT = true;
 }
 
 // Translate a masked scatter intrinsic, like
@@ -469,7 +464,7 @@ static void scalarizeMaskedGather(CallInst *CI) {
 // store i32 %Elt1, i32* %Ptr1, align 4
 // br label %else2
 //   . . .
-static void scalarizeMaskedScatter(CallInst *CI) {
+static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) {
   Value *Src = CI->getArgOperand(0);
   Value *Ptrs = CI->getArgOperand(1);
   Value *Alignment = CI->getArgOperand(2);
@@ -493,12 +488,11 @@ static void scalarizeMaskedScatter(CallInst *CI) {
   // Shorten the way if the mask is a vector of constants.
   if (isConstantIntVector(Mask)) {
     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
-      if (cast<ConstantVector>(Mask)->getAggregateElement(Idx)->isNullValue())
+      if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
         continue;
-      Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
-                                                   "Elt" + Twine(Idx));
-      Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
-                                                "Ptr" + Twine(Idx));
+      Value *OneElt =
+          Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
+      Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
       Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
     }
     CI->eraseFromParent();
@@ -511,8 +505,8 @@ static void scalarizeMaskedScatter(CallInst *CI) {
     //  %Mask1 = extractelement <16 x i1> %Mask, i32 Idx
     //  br i1 %Mask1, label %cond.store, label %else
     //
-    Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
-                                                    "Mask" + Twine(Idx));
+    Value *Predicate =
+        Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
 
     // Create "cond" block
     //
@@ -523,10 +517,8 @@ static void scalarizeMaskedScatter(CallInst *CI) {
     BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
     Builder.SetInsertPoint(InsertPt);
 
-    Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
-                                                 "Elt" + Twine(Idx));
-    Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
-                                              "Ptr" + Twine(Idx));
+    Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
+    Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
     Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
 
     // Create "else" block, fill it in the next iteration
@@ -538,6 +530,156 @@ static void scalarizeMaskedScatter(CallInst *CI) {
     IfBlock = NewIfBlock;
   }
   CI->eraseFromParent();
+
+  ModifiedDT = true;
+}
+
+static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) {
+  Value *Ptr = CI->getArgOperand(0);
+  Value *Mask = CI->getArgOperand(1);
+  Value *PassThru = CI->getArgOperand(2);
+
+  VectorType *VecType = cast<VectorType>(CI->getType());
+
+  Type *EltTy = VecType->getElementType();
+
+  IRBuilder<> Builder(CI->getContext());
+  Instruction *InsertPt = CI;
+  BasicBlock *IfBlock = CI->getParent();
+
+  Builder.SetInsertPoint(InsertPt);
+  Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+  unsigned VectorWidth = VecType->getNumElements();
+
+  // The result vector
+  Value *VResult = PassThru;
+
+  for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+    // Fill the "else" block, created in the previous iteration
+    //
+    //  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
+    //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+    //  br i1 %mask_1, label %cond.load, label %else
+    //
+
+    Value *Predicate =
+        Builder.CreateExtractElement(Mask, Idx);
+
+    // Create "cond" block
+    //
+    //  %EltAddr = getelementptr i32* %1, i32 0
+    //  %Elt = load i32* %EltAddr
+    //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
+    //
+    BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(),
+                                                     "cond.load");
+    Builder.SetInsertPoint(InsertPt);
+
+    LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, 1);
+    Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
+
+    // Move the pointer if there are more blocks to come.
+    Value *NewPtr;
+    if ((Idx + 1) != VectorWidth)
+      NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
+
+    // Create "else" block, fill it in the next iteration
+    BasicBlock *NewIfBlock =
+        CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
+    Builder.SetInsertPoint(InsertPt);
+    Instruction *OldBr = IfBlock->getTerminator();
+    BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
+    OldBr->eraseFromParent();
+    BasicBlock *PrevIfBlock = IfBlock;
+    IfBlock = NewIfBlock;
+
+    // Create the phi to join the new and previous value.
+    PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+    ResultPhi->addIncoming(NewVResult, CondBlock);
+    ResultPhi->addIncoming(VResult, PrevIfBlock);
+    VResult = ResultPhi;
+
+    // Add a PHI for the pointer if this isn't the last iteration.
+    if ((Idx + 1) != VectorWidth) {
+      PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
+      PtrPhi->addIncoming(NewPtr, CondBlock);
+      PtrPhi->addIncoming(Ptr, PrevIfBlock);
+      Ptr = PtrPhi;
+    }
+  }
+
+  CI->replaceAllUsesWith(VResult);
+  CI->eraseFromParent();
+
+  ModifiedDT = true;
+}
+
+static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) {
+  Value *Src = CI->getArgOperand(0);
+  Value *Ptr = CI->getArgOperand(1);
+  Value *Mask = CI->getArgOperand(2);
+
+  VectorType *VecType = cast<VectorType>(Src->getType());
+
+  IRBuilder<> Builder(CI->getContext());
+  Instruction *InsertPt = CI;
+  BasicBlock *IfBlock = CI->getParent();
+
+  Builder.SetInsertPoint(InsertPt);
+  Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+  Type *EltTy = VecType->getVectorElementType();
+
+  unsigned VectorWidth = VecType->getNumElements();
+
+  for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+    // Fill the "else" block, created in the previous iteration
+    //
+    //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+    //  br i1 %mask_1, label %cond.store, label %else
+    //
+    Value *Predicate = Builder.CreateExtractElement(Mask, Idx);
+
+    // Create "cond" block
+    //
+    //  %OneElt = extractelement <16 x i32> %Src, i32 Idx
+    //  %EltAddr = getelementptr i32* %1, i32 0
+    //  %store i32 %OneElt, i32* %EltAddr
+    //
+    BasicBlock *CondBlock =
+        IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
+    Builder.SetInsertPoint(InsertPt);
+
+    Value *OneElt = Builder.CreateExtractElement(Src, Idx);
+    Builder.CreateAlignedStore(OneElt, Ptr, 1);
+
+    // Move the pointer if there are more blocks to come.
+    Value *NewPtr;
+    if ((Idx + 1) != VectorWidth)
+      NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
+
+    // Create "else" block, fill it in the next iteration
+    BasicBlock *NewIfBlock =
+        CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
+    Builder.SetInsertPoint(InsertPt);
+    Instruction *OldBr = IfBlock->getTerminator();
+    BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
+    OldBr->eraseFromParent();
+    BasicBlock *PrevIfBlock = IfBlock;
+    IfBlock = NewIfBlock;
+
+    // Add a PHI for the pointer if this isn't the last iteration.
+    if ((Idx + 1) != VectorWidth) {
+      PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
+      PtrPhi->addIncoming(NewPtr, CondBlock);
+      PtrPhi->addIncoming(Ptr, PrevIfBlock);
+      Ptr = PtrPhi;
+    }
+  }
+  CI->eraseFromParent();
+
+  ModifiedDT = true;
 }
 
 bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) {
@@ -587,33 +729,35 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
       break;
     case Intrinsic::masked_load:
       // Scalarize unsupported vector masked load
-      if (!TTI->isLegalMaskedLoad(CI->getType())) {
-        scalarizeMaskedLoad(CI);
-        ModifiedDT = true;
-        return true;
-      }
-      return false;
+      if (TTI->isLegalMaskedLoad(CI->getType()))
+        return false;
+      scalarizeMaskedLoad(CI, ModifiedDT);
+      return true;
     case Intrinsic::masked_store:
-      if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
-        scalarizeMaskedStore(CI);
-        ModifiedDT = true;
-        return true;
-      }
-      return false;
+      if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType()))
+        return false;
+      scalarizeMaskedStore(CI, ModifiedDT);
+      return true;
     case Intrinsic::masked_gather:
-      if (!TTI->isLegalMaskedGather(CI->getType())) {
-        scalarizeMaskedGather(CI);
-        ModifiedDT = true;
-        return true;
-      }
-      return false;
+      if (TTI->isLegalMaskedGather(CI->getType()))
+        return false;
+      scalarizeMaskedGather(CI, ModifiedDT);
+      return true;
     case Intrinsic::masked_scatter:
-      if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
-        scalarizeMaskedScatter(CI);
-        ModifiedDT = true;
-        return true;
-      }
-      return false;
+      if (TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType()))
+        return false;
+      scalarizeMaskedScatter(CI, ModifiedDT);
+      return true;
+    case Intrinsic::masked_expandload:
+      if (TTI->isLegalMaskedExpandLoad(CI->getType()))
+        return false;
+      scalarizeMaskedExpandLoad(CI, ModifiedDT);
+      return true;
+    case Intrinsic::masked_compressstore:
+      if (TTI->isLegalMaskedCompressStore(CI->getArgOperand(0)->getType()))
+        return false;
+      scalarizeMaskedCompressStore(CI, ModifiedDT);
+      return true;
     }
   }
 
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 6c135b3d69d6..dc3a11670a16 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -1,9 +1,8 @@
 //===- ScheduleDAG.cpp - Implement the ScheduleDAG class ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,6 +14,7 @@
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
@@ -38,6 +38,10 @@ using namespace llvm;
 
 #define DEBUG_TYPE "pre-RA-sched"
 
+STATISTIC(NumNewPredsAdded, "Number of times a  single predecessor was added");
+STATISTIC(NumTopoInits,
+          "Number of times the topological order has been recomputed");
+
 #ifndef NDEBUG
 static cl::opt<bool> StressSchedOpt(
   "stress-sched", cl::Hidden, cl::init(false),
@@ -458,6 +462,11 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
   // On insertion of the edge X->Y, the algorithm first marks by calling DFS
   // the nodes reachable from Y, and then shifts them using Shift to lie
   // immediately after X in Index2Node.
+
+  // Cancel pending updates, mark as valid.
+  Dirty = false;
+  Updates.clear();
+
   unsigned DAGSize = SUnits.size();
   std::vector<SUnit*> WorkList;
   WorkList.reserve(DAGSize);
@@ -498,6 +507,7 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
   }
 
   Visited.resize(DAGSize);
+  NumTopoInits++;
 
 #ifndef NDEBUG
   // Check correctness of the ordering
@@ -510,6 +520,31 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
 #endif
 }
 
+void ScheduleDAGTopologicalSort::FixOrder() {
+  // Recompute from scratch after new nodes have been added.
+  if (Dirty) {
+    InitDAGTopologicalSorting();
+    return;
+  }
+
+  // Otherwise apply updates one-by-one.
+  for (auto &U : Updates)
+    AddPred(U.first, U.second);
+  Updates.clear();
+}
+
+void ScheduleDAGTopologicalSort::AddPredQueued(SUnit *Y, SUnit *X) {
+  // Recomputing the order from scratch is likely more efficient than applying
+  // updates one-by-one for too many updates. The current cut-off is arbitrarily
+  // chosen.
+  Dirty = Dirty || Updates.size() > 10;
+
+  if (Dirty)
+    return;
+
+  Updates.emplace_back(Y, X);
+}
+
 void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
   int UpperBound, LowerBound;
   LowerBound = Node2Index[Y->NodeNum];
@@ -524,6 +559,8 @@ void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
     // Recompute topological indexes.
     Shift(Visited, LowerBound, UpperBound);
   }
+
+  NumNewPredsAdded++;
 }
 
 void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
@@ -665,6 +702,7 @@ void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
 }
 
 bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
+  FixOrder();
   // Is SU reachable from TargetSU via successor edges?
   if (IsReachable(SU, TargetSU))
     return true;
@@ -677,6 +715,7 @@ bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
 
 bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
                                              const SUnit *TargetSU) {
+  FixOrder();
   // If insertion of the edge SU->TargetSU would create a cycle
   // then there is a path from TargetSU to SU.
   int UpperBound, LowerBound;
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 99406ed1496a..d5ad7e92299d 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -1,9 +1,8 @@
 //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -115,7 +114,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
     : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
       RemoveKillFlags(RemoveKillFlags),
       UnknownValue(UndefValue::get(
-                             Type::getVoidTy(mf.getFunction().getContext()))) {
+                             Type::getVoidTy(mf.getFunction().getContext()))), Topo(SUnits, &ExitSU) {
   DbgValues.clear();
 
   const TargetSubtargetInfo &ST = mf.getSubtarget();
@@ -132,7 +131,8 @@ static bool getUnderlyingObjectsForInstr(const MachineInstr *MI,
                                          const DataLayout &DL) {
   auto allMMOsOkay = [&]() {
     for (const MachineMemOperand *MMO : MI->memoperands()) {
-      if (MMO->isVolatile())
+      // TODO: Figure out whether isAtomic is really necessary (see D57601).
+      if (MMO->isVolatile() || MMO->isAtomic())
         return false;
 
       if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
@@ -743,6 +743,14 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
   // done.
   Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
 
+  // Track all instructions that may raise floating-point exceptions.
+  // These do not depend on one other (or normal loads or stores), but
+  // must not be rescheduled across global barriers.  Note that we don't
+  // really need a "map" here since we don't track those MIs by value;
+  // using the same Value2SUsMap data type here is simply a matter of
+  // convenience.
+  Value2SUsMap FPExceptions;
+
   // Remove any stale debug info; sometimes BuildSchedGraph is called again
   // without emitting the info from the previous call.
   DbgValues.clear();
@@ -870,10 +878,26 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       addBarrierChain(Loads);
       addBarrierChain(NonAliasStores);
       addBarrierChain(NonAliasLoads);
+      addBarrierChain(FPExceptions);
 
       continue;
     }
 
+    // Instructions that may raise FP exceptions may not be moved
+    // across any global barriers.
+    if (MI.mayRaiseFPException()) {
+      if (BarrierChain)
+        BarrierChain->addPredBarrier(SU);
+
+      FPExceptions.insert(SU, UnknownValue);
+
+      if (FPExceptions.size() >= HugeRegion) {
+        LLVM_DEBUG(dbgs() << "Reducing FPExceptions map.\n";);
+        Value2SUsMap empty;
+        reduceHugeMemNodeMaps(FPExceptions, empty, getReductionSize());
+      }
+    }
+
     // If it's not a store or a variant load, we're done.
     if (!MI.mayStore() &&
         !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)))
@@ -968,6 +992,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
   Uses.clear();
   CurrentVRegDefs.clear();
   CurrentVRegUses.clear();
+
+  Topo.MarkDirty();
 }
 
 raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) {
@@ -1089,22 +1115,21 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
     if (!MI.isBundled()) {
       toggleKills(MRI, LiveRegs, MI, true);
     } else {
-      MachineBasicBlock::instr_iterator First = MI.getIterator();
-      if (MI.isBundle()) {
+      MachineBasicBlock::instr_iterator Bundle = MI.getIterator();
+      if (MI.isBundle())
         toggleKills(MRI, LiveRegs, MI, false);
-        ++First;
-      }
+
       // Some targets make the (questionable) assumtion that the instructions
       // inside the bundle are ordered and consequently only the last use of
       // a register inside the bundle can kill it.
-      MachineBasicBlock::instr_iterator I = std::next(First);
+      MachineBasicBlock::instr_iterator I = std::next(Bundle);
       while (I->isBundledWithSucc())
         ++I;
       do {
         if (!I->isDebugInstr())
           toggleKills(MRI, LiveRegs, *I, true);
         --I;
-      } while(I != First);
+      } while (I != Bundle);
     }
   }
 }
@@ -1146,6 +1171,23 @@ std::string ScheduleDAGInstrs::getDAGName() const {
   return "dag." + BB->getFullName();
 }
 
+bool ScheduleDAGInstrs::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
+  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
+}
+
+bool ScheduleDAGInstrs::addEdge(SUnit *SuccSU, const SDep &PredDep) {
+  if (SuccSU != &ExitSU) {
+    // Do not use WillCreateCycle, it assumes SD scheduling.
+    // If Pred is reachable from Succ, then the edge creates a cycle.
+    if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
+      return false;
+    Topo.AddPredQueued(SuccSU, PredDep.getSUnit());
+  }
+  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
+  // Return true regardless of whether a new edge needed to be inserted.
+  return true;
+}
+
 //===----------------------------------------------------------------------===//
 // SchedDFSResult Implementation
 //===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index ff2085aae865..8d04711f07c6 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -1,9 +1,8 @@
 //===-- ScheduleDAGPrinter.cpp - Implement ScheduleDAG::viewGraph() -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index 4301372179b8..a9fda56f2dac 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -1,9 +1,8 @@
 //===- ScoreboardHazardRecognizer.cpp - Scheduler Support -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ff5505c97721..49c922f560fa 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1,9 +1,8 @@
 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -112,6 +111,10 @@ static cl::opt<bool>
   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
                     cl::desc("DAG combiner may split indexing from loads"));
 
+static cl::opt<unsigned> TokenFactorInlineLimit(
+    "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
+    cl::desc("Limit the number of operands to inline for Token Factors"));
+
 namespace {
 
   class DAGCombiner {
@@ -138,6 +141,10 @@ namespace {
     /// them) when they are deleted from the underlying DAG. It relies on
     /// stable indices of nodes within the worklist.
     DenseMap<SDNode *, unsigned> WorklistMap;
+    /// This records all nodes attempted to add to the worklist since we
+    /// considered a new worklist entry. As we keep do not add duplicate nodes
+    /// in the worklist, this is different from the tail of the worklist.
+    SmallSetVector<SDNode *, 32> PruningList;
 
     /// Set of nodes which have been combined (at least once).
     ///
@@ -155,6 +162,37 @@ namespace {
         AddToWorklist(Node);
     }
 
+    // Prune potentially dangling nodes. This is called after
+    // any visit to a node, but should also be called during a visit after any
+    // failed combine which may have created a DAG node.
+    void clearAddedDanglingWorklistEntries() {
+      // Check any nodes added to the worklist to see if they are prunable.
+      while (!PruningList.empty()) {
+        auto *N = PruningList.pop_back_val();
+        if (N->use_empty())
+          recursivelyDeleteUnusedNodes(N);
+      }
+    }
+
+    SDNode *getNextWorklistEntry() {
+      // Before we do any work, remove nodes that are not in use.
+      clearAddedDanglingWorklistEntries();
+      SDNode *N = nullptr;
+      // The Worklist holds the SDNodes in order, but it may contain null
+      // entries.
+      while (!N && !Worklist.empty()) {
+        N = Worklist.pop_back_val();
+      }
+
+      if (N) {
+        bool GoodWorklistEntry = WorklistMap.erase(N);
+        (void)GoodWorklistEntry;
+        assert(GoodWorklistEntry &&
+               "Found a worklist entry without a corresponding map entry!");
+      }
+      return N;
+    }
+
     /// Call the node-specific routine that folds each particular type of node.
     SDValue visit(SDNode *N);
 
@@ -162,7 +200,7 @@ namespace {
     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
           OptLevel(OL), AA(AA) {
-      ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
+      ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
 
       MaximumLegalStoreInBits = 0;
       for (MVT VT : MVT::all_valuetypes())
@@ -172,6 +210,11 @@ namespace {
           MaximumLegalStoreInBits = VT.getSizeInBits();
     }
 
+    void ConsiderForPruning(SDNode *N) {
+      // Mark this for potential pruning.
+      PruningList.insert(N);
+    }
+
     /// Add to the worklist making sure its instance is at the back (next to be
     /// processed.)
     void AddToWorklist(SDNode *N) {
@@ -183,6 +226,8 @@ namespace {
       if (N->getOpcode() == ISD::HANDLENODE)
         return;
 
+      ConsiderForPruning(N);
+
       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
         Worklist.push_back(N);
     }
@@ -190,6 +235,7 @@ namespace {
     /// Remove all instances of N from the worklist.
     void removeFromWorklist(SDNode *N) {
       CombinedNodes.erase(N);
+      PruningList.remove(N);
 
       auto It = WorklistMap.find(N);
       if (It == WorklistMap.end())
@@ -229,8 +275,15 @@ namespace {
     /// If so, return true.
     bool SimplifyDemandedBits(SDValue Op) {
       unsigned BitWidth = Op.getScalarValueSizeInBits();
-      APInt Demanded = APInt::getAllOnesValue(BitWidth);
-      return SimplifyDemandedBits(Op, Demanded);
+      APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
+      return SimplifyDemandedBits(Op, DemandedBits);
+    }
+
+    bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
+      EVT VT = Op.getValueType();
+      unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
+      APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+      return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
     }
 
     /// Check the specified vector node value to see if it can be simplified or
@@ -238,12 +291,13 @@ namespace {
     /// elements. If so, return true.
     bool SimplifyDemandedVectorElts(SDValue Op) {
       unsigned NumElts = Op.getValueType().getVectorNumElements();
-      APInt Demanded = APInt::getAllOnesValue(NumElts);
-      return SimplifyDemandedVectorElts(Op, Demanded);
+      APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+      return SimplifyDemandedVectorElts(Op, DemandedElts);
     }
 
-    bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
-    bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
+    bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+                              const APInt &DemandedElts);
+    bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
                                     bool AssumeSingleUse = false);
 
     bool CombineToPreIndexedLoadStore(SDNode *N);
@@ -291,15 +345,16 @@ namespace {
     SDValue visitTokenFactor(SDNode *N);
     SDValue visitMERGE_VALUES(SDNode *N);
     SDValue visitADD(SDNode *N);
-    SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
+    SDValue visitADDLike(SDNode *N);
+    SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
     SDValue visitSUB(SDNode *N);
     SDValue visitADDSAT(SDNode *N);
     SDValue visitSUBSAT(SDNode *N);
     SDValue visitADDC(SDNode *N);
-    SDValue visitUADDO(SDNode *N);
+    SDValue visitADDO(SDNode *N);
     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
     SDValue visitSUBC(SDNode *N);
-    SDValue visitUSUBO(SDNode *N);
+    SDValue visitSUBO(SDNode *N);
     SDValue visitADDE(SDNode *N);
     SDValue visitADDCARRY(SDNode *N);
     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
@@ -316,8 +371,7 @@ namespace {
     SDValue visitMULHS(SDNode *N);
     SDValue visitSMUL_LOHI(SDNode *N);
     SDValue visitUMUL_LOHI(SDNode *N);
-    SDValue visitSMULO(SDNode *N);
-    SDValue visitUMULO(SDNode *N);
+    SDValue visitMULO(SDNode *N);
     SDValue visitIMINMAX(SDNode *N);
     SDValue visitAND(SDNode *N);
     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
@@ -386,6 +440,7 @@ namespace {
     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
 
     SDValue visitSTORE(SDNode *N);
+    SDValue visitLIFETIME_END(SDNode *N);
     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
     SDValue visitBUILD_VECTOR(SDNode *N);
@@ -400,13 +455,19 @@ namespace {
     SDValue visitMSCATTER(SDNode *N);
     SDValue visitFP_TO_FP16(SDNode *N);
     SDValue visitFP16_TO_FP(SDNode *N);
+    SDValue visitVECREDUCE(SDNode *N);
 
     SDValue visitFADDForFMACombine(SDNode *N);
     SDValue visitFSUBForFMACombine(SDNode *N);
     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 
     SDValue XformToShuffleWithZero(SDNode *N);
-    SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
+    bool reassociationCanBreakAddressingModePattern(unsigned Opc,
+                                                    const SDLoc &DL, SDValue N0,
+                                                    SDValue N1);
+    SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
+                                      SDValue N1);
+    SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
                            SDValue N1, SDNodeFlags Flags);
 
     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
@@ -466,6 +527,7 @@ namespace {
                               const SDLoc &DL);
     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
     SDValue MatchLoadCombine(SDNode *N);
+    SDValue MatchStoreCombine(StoreSDNode *N);
     SDValue ReduceLoadWidth(SDNode *N);
     SDValue ReduceLoadOpStoreWidth(SDNode *N);
     SDValue splitMergedValStore(StoreSDNode *ST);
@@ -475,7 +537,8 @@ namespace {
     SDValue reduceBuildVecToShuffle(SDNode *N);
     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
                                   ArrayRef<int> VectorMask, SDValue VecIn1,
-                                  SDValue VecIn2, unsigned LeftIdx);
+                                  SDValue VecIn2, unsigned LeftIdx,
+                                  bool DidSplitVec);
     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
 
     /// Walk up chain skipping non-aliasing memory nodes,
@@ -484,7 +547,7 @@ namespace {
                           SmallVectorImpl<SDValue> &Aliases);
 
     /// Return true if there is any possibility that the two addresses overlap.
-    bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
+    bool isAlias(SDNode *Op0, SDNode *Op1) const;
 
     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
     /// chain (aliasing node.)
@@ -642,6 +705,18 @@ public:
   }
 };
 
+class WorklistInserter : public SelectionDAG::DAGUpdateListener {
+  DAGCombiner &DC;
+
+public:
+  explicit WorklistInserter(DAGCombiner &dc)
+      : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
+
+  // FIXME: Ideally we could add N to the worklist, but this causes exponential
+  //        compile time costs in large DAGs, e.g. Halide.
+  void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
+};
+
 } // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
@@ -697,20 +772,23 @@ void DAGCombiner::deleteAndRecombine(SDNode *N) {
 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
                                const TargetLowering &TLI,
                                const TargetOptions *Options,
+                               bool ForCodeSize,
                                unsigned Depth = 0) {
   // fneg is removable even if it has multiple uses.
-  if (Op.getOpcode() == ISD::FNEG) return 2;
+  if (Op.getOpcode() == ISD::FNEG)
+    return 2;
 
   // Don't allow anything with multiple uses unless we know it is free.
   EVT VT = Op.getValueType();
   const SDNodeFlags Flags = Op->getFlags();
-  if (!Op.hasOneUse())
-    if (!(Op.getOpcode() == ISD::FP_EXTEND &&
-          TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
-      return 0;
+  if (!Op.hasOneUse() &&
+      !(Op.getOpcode() == ISD::FP_EXTEND &&
+        TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
+    return 0;
 
   // Don't recurse exponentially.
-  if (Depth > 6) return 0;
+  if (Depth > 6)
+    return 0;
 
   switch (Op.getOpcode()) {
   default: return false;
@@ -721,7 +799,25 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
     // Don't invert constant FP values after legalization unless the target says
     // the negated constant is legal.
     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
-      TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
+           TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
+                            ForCodeSize);
+  }
+  case ISD::BUILD_VECTOR: {
+    // Only permit BUILD_VECTOR of constants.
+    if (llvm::any_of(Op->op_values(), [&](SDValue N) {
+          return !N.isUndef() && !isa<ConstantFPSDNode>(N);
+        }))
+      return 0;
+    if (!LegalOperations)
+      return 1;
+    if (TLI.isOperationLegal(ISD::ConstantFP, VT) &&
+        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+      return 1;
+    return llvm::all_of(Op->op_values(), [&](SDValue N) {
+      return N.isUndef() ||
+             TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
+                              ForCodeSize);
+    });
   }
   case ISD::FADD:
     if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
@@ -733,15 +829,14 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
 
     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
-                                    Options, Depth + 1))
+                                    Options, ForCodeSize, Depth + 1))
       return V;
     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
-                              Depth + 1);
+                              ForCodeSize, Depth + 1);
   case ISD::FSUB:
     // We can't turn -(A-B) into B-A when we honor signed zeros.
-    if (!Options->NoSignedZerosFPMath &&
-        !Flags.hasNoSignedZeros())
+    if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
       return 0;
 
     // fold (fneg (fsub A, B)) -> (fsub B, A)
@@ -751,30 +846,31 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
   case ISD::FDIV:
     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
-                                    Options, Depth + 1))
+                                    Options, ForCodeSize, Depth + 1))
       return V;
 
     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
-                              Depth + 1);
+                              ForCodeSize, Depth + 1);
 
   case ISD::FP_EXTEND:
   case ISD::FP_ROUND:
   case ISD::FSIN:
     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
-                              Depth + 1);
+                              ForCodeSize, Depth + 1);
   }
 }
 
 /// If isNegatibleForFree returns true, return the newly negated expression.
 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
-                                    bool LegalOperations, unsigned Depth = 0) {
-  const TargetOptions &Options = DAG.getTarget().Options;
+                                    bool LegalOperations, bool ForCodeSize,
+                                    unsigned Depth = 0) {
   // fneg is removable even if it has multiple uses.
-  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
+  if (Op.getOpcode() == ISD::FNEG)
+    return Op.getOperand(0);
 
   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
-
-  const SDNodeFlags Flags = Op.getNode()->getFlags();
+  const TargetOptions &Options = DAG.getTarget().Options;
+  const SDNodeFlags Flags = Op->getFlags();
 
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Unknown code");
@@ -783,24 +879,41 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
     V.changeSign();
     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
   }
+  case ISD::BUILD_VECTOR: {
+    SmallVector<SDValue, 4> Ops;
+    for (SDValue C : Op->op_values()) {
+      if (C.isUndef()) {
+        Ops.push_back(C);
+        continue;
+      }
+      APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
+      V.changeSign();
+      Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
+    }
+    return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
+  }
   case ISD::FADD:
     assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
 
     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
-                           DAG.getTargetLoweringInfo(), &Options, Depth+1))
+                           DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
+                           Depth + 1))
       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
                          GetNegatedExpression(Op.getOperand(0), DAG,
-                                              LegalOperations, Depth+1),
+                                              LegalOperations, ForCodeSize,
+                                              Depth + 1),
                          Op.getOperand(1), Flags);
     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
                        GetNegatedExpression(Op.getOperand(1), DAG,
-                                            LegalOperations, Depth+1),
+                                            LegalOperations, ForCodeSize,
+                                            Depth + 1),
                        Op.getOperand(0), Flags);
   case ISD::FSUB:
     // fold (fneg (fsub 0, B)) -> B
-    if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
+    if (ConstantFPSDNode *N0CFP =
+            isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
       if (N0CFP->isZero())
         return Op.getOperand(1);
 
@@ -812,28 +925,33 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
   case ISD::FDIV:
     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
-                           DAG.getTargetLoweringInfo(), &Options, Depth+1))
+                           DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
+                           Depth + 1))
       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
                          GetNegatedExpression(Op.getOperand(0), DAG,
-                                              LegalOperations, Depth+1),
+                                              LegalOperations, ForCodeSize,
+                                              Depth + 1),
                          Op.getOperand(1), Flags);
 
     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
                        Op.getOperand(0),
                        GetNegatedExpression(Op.getOperand(1), DAG,
-                                            LegalOperations, Depth+1), Flags);
+                                            LegalOperations, ForCodeSize,
+                                            Depth + 1), Flags);
 
   case ISD::FP_EXTEND:
   case ISD::FSIN:
     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
                        GetNegatedExpression(Op.getOperand(0), DAG,
-                                            LegalOperations, Depth+1));
+                                            LegalOperations, ForCodeSize,
+                                            Depth + 1));
   case ISD::FP_ROUND:
-      return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
-                         GetNegatedExpression(Op.getOperand(0), DAG,
-                                              LegalOperations, Depth+1),
-                         Op.getOperand(1));
+    return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
+                       GetNegatedExpression(Op.getOperand(0), DAG,
+                                            LegalOperations, ForCodeSize,
+                                            Depth + 1),
+                       Op.getOperand(1));
   }
 }
 
@@ -924,53 +1042,113 @@ static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
 }
 
-SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
-                                    SDValue N1, SDNodeFlags Flags) {
-  // Don't reassociate reductions.
-  if (Flags.hasVectorReduction())
-    return SDValue();
+bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
+                                                             const SDLoc &DL,
+                                                             SDValue N0,
+                                                             SDValue N1) {
+  // Currently this only tries to ensure we don't undo the GEP splits done by
+  // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
+  // we check if the following transformation would be problematic:
+  // (load/store (add, (add, x, offset1), offset2)) ->
+  // (load/store (add, x, offset1+offset2)).
 
-  EVT VT = N0.getValueType();
-  if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
-    if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
-      if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
-        // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
-        if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
-          return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
-        return SDValue();
-      }
-      if (N0.hasOneUse()) {
-        // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
-        // use
-        SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
-        if (!OpNode.getNode())
-          return SDValue();
-        AddToWorklist(OpNode.getNode());
-        return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
-      }
+  if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
+    return false;
+
+  if (N0.hasOneUse())
+    return false;
+
+  auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+  auto *C2 = dyn_cast<ConstantSDNode>(N1);
+  if (!C1 || !C2)
+    return false;
+
+  const APInt &C1APIntVal = C1->getAPIntValue();
+  const APInt &C2APIntVal = C2->getAPIntValue();
+  if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
+    return false;
+
+  const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
+  if (CombinedValueIntVal.getBitWidth() > 64)
+    return false;
+  const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
+
+  for (SDNode *Node : N0->uses()) {
+    auto LoadStore = dyn_cast<MemSDNode>(Node);
+    if (LoadStore) {
+      // Is x[offset2] already not a legal addressing mode? If so then
+      // reassociating the constants breaks nothing (we test offset2 because
+      // that's the one we hope to fold into the load or store).
+      TargetLoweringBase::AddrMode AM;
+      AM.HasBaseReg = true;
+      AM.BaseOffs = C2APIntVal.getSExtValue();
+      EVT VT = LoadStore->getMemoryVT();
+      unsigned AS = LoadStore->getAddressSpace();
+      Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
+      if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+        continue;
+
+      // Would x[offset1+offset2] still be a legal addressing mode?
+      AM.BaseOffs = CombinedValue;
+      if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+        return true;
     }
   }
 
-  if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
-    if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
-      if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
-        // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
-        if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
-          return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+  return false;
+}
+
+// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
+// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
+SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
+                                               SDValue N0, SDValue N1) {
+  EVT VT = N0.getValueType();
+
+  if (N0.getOpcode() != Opc)
+    return SDValue();
+
+  // Don't reassociate reductions.
+  if (N0->getFlags().hasVectorReduction())
+    return SDValue();
+
+  if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
+    if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+      // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
+      if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
+        return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+      return SDValue();
+    }
+    if (N0.hasOneUse()) {
+      // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
+      //              iff (op x, c1) has one use
+      SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
+      if (!OpNode.getNode())
         return SDValue();
-      }
-      if (N1.hasOneUse()) {
-        // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
-        // use
-        SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
-        if (!OpNode.getNode())
-          return SDValue();
-        AddToWorklist(OpNode.getNode());
-        return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
-      }
+      AddToWorklist(OpNode.getNode());
+      return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
     }
   }
+  return SDValue();
+}
+
+// Try to reassociate commutative binops.
+SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
+                                    SDValue N1, SDNodeFlags Flags) {
+  assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
+  // Don't reassociate reductions.
+  if (Flags.hasVectorReduction())
+    return SDValue();
 
+  // Floating-point reassociation is not allowed without loose FP math.
+  if (N0.getValueType().isFloatingPoint() ||
+      N1.getValueType().isFloatingPoint())
+    if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
+      return SDValue();
+
+  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
+    return Combined;
+  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
+    return Combined;
   return SDValue();
 }
 
@@ -1026,10 +1204,11 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 
 /// Check the specified integer node value to see if it can be simplified or if
 /// things it uses can be simplified by bit propagation. If so, return true.
-bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
+bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+                                       const APInt &DemandedElts) {
   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
   KnownBits Known;
-  if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
+  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
     return false;
 
   // Revisit the node.
@@ -1048,12 +1227,13 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
 /// Check the specified vector node value to see if it can be simplified or
 /// if things it uses can be simplified as it only uses some of the elements.
 /// If so, return true.
-bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
+bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
+                                             const APInt &DemandedElts,
                                              bool AssumeSingleUse) {
   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
   APInt KnownUndef, KnownZero;
-  if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
-                                      0, AssumeSingleUse))
+  if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
+                                      TLO, 0, AssumeSingleUse))
     return false;
 
   // Revisit the node.
@@ -1383,6 +1563,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
   LegalOperations = Level >= AfterLegalizeVectorOps;
   LegalTypes = Level >= AfterLegalizeTypes;
 
+  WorklistInserter AddNodes(*this);
+
   // Add all the dag nodes to the worklist.
   for (SDNode &Node : DAG.allnodes())
     AddToWorklist(&Node);
@@ -1392,19 +1574,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
   // changes of the root.
   HandleSDNode Dummy(DAG.getRoot());
 
-  // While the worklist isn't empty, find a node and try to combine it.
-  while (!WorklistMap.empty()) {
-    SDNode *N;
-    // The Worklist holds the SDNodes in order, but it may contain null entries.
-    do {
-      N = Worklist.pop_back_val();
-    } while (!N);
-
-    bool GoodWorklistEntry = WorklistMap.erase(N);
-    (void)GoodWorklistEntry;
-    assert(GoodWorklistEntry &&
-           "Found a worklist entry without a corresponding map entry!");
-
+  // While we have a valid worklist entry node, try to combine it.
+  while (SDNode *N = getNextWorklistEntry()) {
     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
     // N is deleted from the DAG, since they too may now be dead or may have a
     // reduced number of uses, allowing other xforms.
@@ -1493,9 +1664,11 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::SSUBSAT:
   case ISD::USUBSAT:            return visitSUBSAT(N);
   case ISD::ADDC:               return visitADDC(N);
-  case ISD::UADDO:              return visitUADDO(N);
+  case ISD::SADDO:
+  case ISD::UADDO:              return visitADDO(N);
   case ISD::SUBC:               return visitSUBC(N);
-  case ISD::USUBO:              return visitUSUBO(N);
+  case ISD::SSUBO:
+  case ISD::USUBO:              return visitSUBO(N);
   case ISD::ADDE:               return visitADDE(N);
   case ISD::ADDCARRY:           return visitADDCARRY(N);
   case ISD::SUBE:               return visitSUBE(N);
@@ -1509,8 +1682,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::MULHS:              return visitMULHS(N);
   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
-  case ISD::SMULO:              return visitSMULO(N);
-  case ISD::UMULO:              return visitUMULO(N);
+  case ISD::SMULO:
+  case ISD::UMULO:              return visitMULO(N);
   case ISD::SMIN:
   case ISD::SMAX:
   case ISD::UMIN:
@@ -1590,8 +1763,22 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::MLOAD:              return visitMLOAD(N);
   case ISD::MSCATTER:           return visitMSCATTER(N);
   case ISD::MSTORE:             return visitMSTORE(N);
+  case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
+  case ISD::VECREDUCE_FADD:
+  case ISD::VECREDUCE_FMUL:
+  case ISD::VECREDUCE_ADD:
+  case ISD::VECREDUCE_MUL:
+  case ISD::VECREDUCE_AND:
+  case ISD::VECREDUCE_OR:
+  case ISD::VECREDUCE_XOR:
+  case ISD::VECREDUCE_SMAX:
+  case ISD::VECREDUCE_SMIN:
+  case ISD::VECREDUCE_UMAX:
+  case ISD::VECREDUCE_UMIN:
+  case ISD::VECREDUCE_FMAX:
+  case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
   }
   return SDValue();
 }
@@ -1644,7 +1831,7 @@ SDValue DAGCombiner::combine(SDNode *N) {
     }
   }
 
-  // If N is a commutative binary node, try eliminate it if the commuted
+  // If N is a commutative binary node, try to eliminate it if the commuted
   // version is already present in the DAG.
   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
       N->getNumValues() == 1) {
@@ -1693,6 +1880,12 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
   if (OptLevel == CodeGenOpt::None)
     return SDValue();
 
+  // If the sole user is a token factor, we should make sure we have a
+  // chance to merge them together. This prevents TF chains from inhibiting
+  // optimizations.
+  if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
+    AddToWorklist(*(N->use_begin()));
+
   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
   SmallPtrSet<SDNode*, 16> SeenOps;
@@ -1704,8 +1897,19 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
   // Iterate through token factors.  The TFs grows when new token factors are
   // encountered.
   for (unsigned i = 0; i < TFs.size(); ++i) {
-    SDNode *TF = TFs[i];
+    // Limit number of nodes to inline, to avoid quadratic compile times.
+    // We have to add the outstanding Token Factors to Ops, otherwise we might
+    // drop Ops from the resulting Token Factors.
+    if (Ops.size() > TokenFactorInlineLimit) {
+      for (unsigned j = i; j < TFs.size(); j++)
+        Ops.emplace_back(TFs[j], 0);
+      // Drop unprocessed Token Factors from TFs, so we do not add them to the
+      // combiner worklist later.
+      TFs.resize(i);
+      break;
+    }
 
+    SDNode *TF = TFs[i];
     // Check each of the operands.
     for (const SDValue &Op : TF->op_values()) {
       switch (Op.getOpcode()) {
@@ -1719,8 +1923,6 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
           // Queue up for processing.
           TFs.push_back(Op.getNode());
-          // Clean up in case the token factor is removed.
-          AddToWorklist(Op.getNode());
           Changed = true;
           break;
         }
@@ -1737,6 +1939,11 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
     }
   }
 
+  // Re-visit inlined Token Factors, to clean them up in case they have been
+  // removed. Skip the first Token Factor, as this is the current node.
+  for (unsigned i = 1, e = TFs.size(); i < e; i++)
+    AddToWorklist(TFs[i]);
+
   // Remove Nodes that are chained to another node in the list. Do so
   // by walking up chains breath-first stopping when we've seen
   // another operand. In general we must climb to the EntryNode, but we can exit
@@ -1803,6 +2010,8 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
       for (const SDValue &Op : CurNode->op_values())
         AddToWorklist(i, Op.getNode(), CurOpNumber);
       break;
+    case ISD::LIFETIME_START:
+    case ISD::LIFETIME_END:
     case ISD::CopyFromReg:
     case ISD::CopyToReg:
       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
@@ -1831,9 +2040,9 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
           if (SeenChains.count(Op.getNode()) == 0)
             PrunedOps.push_back(Op);
         }
-        Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
+        Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
       } else {
-        Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
+        Result = DAG.getTokenFactor(SDLoc(N), Ops);
       }
     }
     return Result;
@@ -1869,7 +2078,8 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
 }
 
 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
-  assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
+  assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
+         "Unexpected binary operator");
 
   // Don't do this unless the old select is going away. We want to eliminate the
   // binary operator, not replace a binop with a select.
@@ -1940,7 +2150,9 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
       !isConstantFPBuildVectorOrConstantFP(NewCF))
     return SDValue();
 
-  return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
+  SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
+  SelectOp->setFlags(BO->getFlags());
+  return SelectOp;
 }
 
 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
@@ -1990,6 +2202,7 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
 
   // We need a constant operand for the add/sub, and the other operand is a
   // logical shift right: add (srl), C or sub C, (srl).
+  // TODO - support non-uniform vector amounts.
   bool IsAdd = N->getOpcode() == ISD::ADD;
   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
@@ -2006,7 +2219,7 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
   EVT VT = ShiftOp.getValueType();
   SDValue ShAmt = ShiftOp.getOperand(1);
   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
-  if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
+  if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
     return SDValue();
 
   // Eliminate the 'not' by adjusting the shift and add/sub constant:
@@ -2019,7 +2232,10 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
   return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
 }
 
-SDValue DAGCombiner::visitADD(SDNode *N) {
+/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
+/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
+/// are no common bits set in the operands).
+SDValue DAGCombiner::visitADDLike(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N0.getValueType();
@@ -2058,13 +2274,22 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
     return N0;
 
   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
+    // fold ((A-c1)+c2) -> (A+(c2-c1))
+    if (N0.getOpcode() == ISD::SUB &&
+        isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
+      SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(),
+                                               N0.getOperand(1).getNode());
+      assert(Sub && "Constant folding failed");
+      return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
+    }
+
     // fold ((c1-A)+c2) -> (c1+c2)-A
     if (N0.getOpcode() == ISD::SUB &&
         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
-      // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
-      return DAG.getNode(ISD::SUB, DL, VT,
-                         DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
-                         N0.getOperand(1));
+      SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(),
+                                               N0.getOperand(0).getNode());
+      assert(Add && "Constant folding failed");
+      return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
     }
 
     // add (sext i1 X), 1 -> zext (not i1 X)
@@ -2097,9 +2322,10 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
     return NewSel;
 
   // reassociate add
-  if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
-    return RADD;
-
+  if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
+    if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
+      return RADD;
+  }
   // fold ((0-A) + B) -> B-A
   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
@@ -2116,6 +2342,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
     return N0.getOperand(0);
 
+  // fold ((A-B)+(C-A)) -> (C-B)
+  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
+      N0.getOperand(0) == N1.getOperand(1))
+    return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
+                       N0.getOperand(1));
+
+  // fold ((A-B)+(B-C)) -> (A-C)
+  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
+      N0.getOperand(1) == N1.getOperand(0))
+    return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
+                       N1.getOperand(1));
+
   // fold (A+(B-(A+C))) to (B-C)
   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
       N0 == N1.getOperand(1).getOperand(0))
@@ -2148,31 +2386,93 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
   }
 
+  // fold (add (umax X, C), -C) --> (usubsat X, C)
+  if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
+    auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
+      return (!Max && !Op) ||
+             (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
+    };
+    if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
+                                  /*AllowUndefs*/ true))
+      return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
+                         N0.getOperand(1));
+  }
+
+  if (SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  if (isOneOrOneSplat(N1)) {
+    // fold (add (xor a, -1), 1) -> (sub 0, a)
+    if (isBitwiseNot(N0))
+      return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+                         N0.getOperand(0));
+
+    // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
+    if (N0.getOpcode() == ISD::ADD ||
+        N0.getOpcode() == ISD::UADDO ||
+        N0.getOpcode() == ISD::SADDO) {
+      SDValue A, Xor;
+
+      if (isBitwiseNot(N0.getOperand(0))) {
+        A = N0.getOperand(1);
+        Xor = N0.getOperand(0);
+      } else if (isBitwiseNot(N0.getOperand(1))) {
+        A = N0.getOperand(0);
+        Xor = N0.getOperand(1);
+      }
+
+      if (Xor)
+        return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
+    }
+
+    // Look for:
+    //   add (add x, y), 1
+    // And if the target does not like this form then turn into:
+    //   sub y, (xor x, -1)
+    if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
+        N0.getOpcode() == ISD::ADD) {
+      SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
+                                DAG.getAllOnesConstant(DL, VT));
+      return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
+    }
+  }
+
+  // (x - y) + -1  ->  add (xor y, -1), x
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+      isAllOnesOrAllOnesSplat(N1)) {
+    SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
+    return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+  }
+
+  if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
+    return Combined;
+
+  if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
+    return Combined;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitADD(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT VT = N0.getValueType();
+  SDLoc DL(N);
+
+  if (SDValue Combined = visitADDLike(N))
+    return Combined;
+
   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
     return V;
 
   if (SDValue V = foldAddSubOfSignBit(N, DAG))
     return V;
 
-  if (SimplifyDemandedBits(SDValue(N, 0)))
-    return SDValue(N, 0);
-
   // fold (a+b) -> (a|b) iff a and b share no bits.
   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
       DAG.haveNoCommonBitsSet(N0, N1))
     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
 
-  // fold (add (xor a, -1), 1) -> (sub 0, a)
-  if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
-    return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
-                       N0.getOperand(0));
-
-  if (SDValue Combined = visitADDLike(N0, N1, N))
-    return Combined;
-
-  if (SDValue Combined = visitADDLike(N1, N0, N))
-    return Combined;
-
   return SDValue();
 }
 
@@ -2246,6 +2546,10 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
     return SDValue();
 
+  EVT VT = V.getNode()->getValueType(0);
+  if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
+    return SDValue();
+
   // If the result is masked, then no matter what kind of bool it is we can
   // return. If it isn't, then we need to make sure the bool type is either 0 or
   // 1 and not other values.
@@ -2257,7 +2561,26 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
   return SDValue();
 }
 
-SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
+/// Given the operands of an add/sub operation, see if the 2nd operand is a
+/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
+/// the opcode and bypass the mask operation.
+static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
+                                 SelectionDAG &DAG, const SDLoc &DL) {
+  if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
+    return SDValue();
+
+  EVT VT = N0.getValueType();
+  if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
+    return SDValue();
+
+  // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
+  // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
+  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
+}
+
+/// Helper for doing combines based on N0 and N1 being added to each other.
+SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
+                                          SDNode *LocReference) {
   EVT VT = N0.getValueType();
   SDLoc DL(LocReference);
 
@@ -2269,21 +2592,42 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference)
                                    N1.getOperand(0).getOperand(1),
                                    N1.getOperand(1)));
 
-  if (N1.getOpcode() == ISD::AND) {
-    SDValue AndOp0 = N1.getOperand(0);
-    unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
-    unsigned DestBits = VT.getScalarSizeInBits();
-
-    // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
-    // and similar xforms where the inner op is either ~0 or 0.
-    if (NumSignBits == DestBits && isOneOrOneSplat(N1->getOperand(1)))
-      return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
-  }
+  if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
+    return V;
 
-  // add (sext i1), X -> sub X, (zext i1)
+  // Look for:
+  //   add (add x, 1), y
+  // And if the target does not like this form then turn into:
+  //   sub y, (xor x, -1)
+  if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
+      N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
+    SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
+                              DAG.getAllOnesConstant(DL, VT));
+    return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
+  }
+
+  // Hoist one-use subtraction by non-opaque constant:
+  //   (x - C) + y  ->  (x + y) - C
+  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+      isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
+    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
+    return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
+  }
+  // Hoist one-use subtraction from non-opaque constant:
+  //   (C - x) + y  ->  (y - x) + C
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+      isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
+    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
+    return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
+  }
+
+  // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
+  // rather than 'add 0/-1' (the zext should get folded).
+  // add (sext i1 Y), X --> sub X, (zext i1 Y)
   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
-      N0.getOperand(0).getValueType() == MVT::i1 &&
-      !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
+      N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
+      TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
   }
@@ -2344,8 +2688,10 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
   return SDValue();
 }
 
-static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
+static SDValue flipBoolean(SDValue V, const SDLoc &DL,
                            SelectionDAG &DAG, const TargetLowering &TLI) {
+  EVT VT = V.getValueType();
+
   SDValue Cst;
   switch (TLI.getBooleanContents(VT)) {
   case TargetLowering::ZeroOrOneBooleanContent:
@@ -2353,35 +2699,60 @@ static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
     Cst = DAG.getConstant(1, DL, VT);
     break;
   case TargetLowering::ZeroOrNegativeOneBooleanContent:
-    Cst = DAG.getConstant(-1, DL, VT);
+    Cst = DAG.getAllOnesConstant(DL, VT);
     break;
   }
 
   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
 }
 
-static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
-  if (V.getOpcode() != ISD::XOR) return false;
-  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1));
-  if (!Const) return false;
+/**
+ * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
+ * then the flip also occurs if computing the inverse is the same cost.
+ * This function returns an empty SDValue in case it cannot flip the boolean
+ * without increasing the cost of the computation. If you want to flip a boolean
+ * no matter what, use flipBoolean.
+ */
+static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
+                                  const TargetLowering &TLI,
+                                  bool Force) {
+  if (Force && isa<ConstantSDNode>(V))
+    return flipBoolean(V, SDLoc(V), DAG, TLI);
+
+  if (V.getOpcode() != ISD::XOR)
+    return SDValue();
+
+  ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
+  if (!Const)
+    return SDValue();
 
+  EVT VT = V.getValueType();
+
+  bool IsFlip = false;
   switch(TLI.getBooleanContents(VT)) {
     case TargetLowering::ZeroOrOneBooleanContent:
-      return Const->isOne();
+      IsFlip = Const->isOne();
+      break;
     case TargetLowering::ZeroOrNegativeOneBooleanContent:
-      return Const->isAllOnesValue();
+      IsFlip = Const->isAllOnesValue();
+      break;
     case TargetLowering::UndefinedBooleanContent:
-      return (Const->getAPIntValue() & 0x01) == 1;
+      IsFlip = (Const->getAPIntValue() & 0x01) == 1;
+      break;
   }
-  llvm_unreachable("Unsupported boolean content");
+
+  if (IsFlip)
+    return V.getOperand(0);
+  if (Force)
+    return flipBoolean(V, SDLoc(V), DAG, TLI);
+  return SDValue();
 }
 
-SDValue DAGCombiner::visitUADDO(SDNode *N) {
+SDValue DAGCombiner::visitADDO(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N0.getValueType();
-  if (VT.isVector())
-    return SDValue();
+  bool IsSigned = (ISD::SADDO == N->getOpcode());
 
   EVT CarryVT = N->getValueType(1);
   SDLoc DL(N);
@@ -2392,40 +2763,42 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) {
                      DAG.getUNDEF(CarryVT));
 
   // canonicalize constant to RHS.
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  if (N0C && !N1C)
-    return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
+  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+    return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
 
-  // fold (uaddo x, 0) -> x + no carry out
-  if (isNullConstant(N1))
+  // fold (addo x, 0) -> x + no carry out
+  if (isNullOrNullSplat(N1))
     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
 
-  // If it cannot overflow, transform into an add.
-  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
-    return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
-                     DAG.getConstant(0, DL, CarryVT));
+  if (!IsSigned) {
+    // If it cannot overflow, transform into an add.
+    if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
+      return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+                       DAG.getConstant(0, DL, CarryVT));
 
-  // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
-  if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
-    SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
-                              DAG.getConstant(0, DL, VT),
-                              N0.getOperand(0));
-    return CombineTo(N, Sub,
-                     flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
-  }
+    // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
+    if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
+      SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
+                                DAG.getConstant(0, DL, VT), N0.getOperand(0));
+      return CombineTo(N, Sub,
+                       flipBoolean(Sub.getValue(1), DL, DAG, TLI));
+    }
 
-  if (SDValue Combined = visitUADDOLike(N0, N1, N))
-    return Combined;
+    if (SDValue Combined = visitUADDOLike(N0, N1, N))
+      return Combined;
 
-  if (SDValue Combined = visitUADDOLike(N1, N0, N))
-    return Combined;
+    if (SDValue Combined = visitUADDOLike(N1, N0, N))
+      return Combined;
+  }
 
   return SDValue();
 }
 
 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
-  auto VT = N0.getValueType();
+  EVT VT = N0.getValueType();
+  if (VT.isVector())
+    return SDValue();
 
   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
   // If Y + 1 cannot overflow.
@@ -2484,11 +2857,10 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
   }
 
-  EVT CarryVT = CarryIn.getValueType();
-
   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
   if (isNullConstant(N0) && isNullConstant(N1)) {
     EVT VT = N0.getValueType();
+    EVT CarryVT = CarryIn.getValueType();
     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
     AddToWorklist(CarryExt.getNode());
     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
@@ -2496,16 +2868,6 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
                      DAG.getConstant(0, DL, CarryVT));
   }
 
-  // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
-  if (isBitwiseNot(N0) && isNullConstant(N1) &&
-      isBooleanFlip(CarryIn, CarryVT, TLI)) {
-    SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
-                              DAG.getConstant(0, DL, N0.getValueType()),
-                              N0.getOperand(0), CarryIn.getOperand(0));
-    return CombineTo(N, Sub,
-                     flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
-  }
-
   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
     return Combined;
 
@@ -2515,12 +2877,112 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
   return SDValue();
 }
 
+/**
+ * If we are facing some sort of diamond carry propapagtion pattern try to
+ * break it up to generate something like:
+ *   (addcarry X, 0, (addcarry A, B, Z):Carry)
+ *
+ * The end result is usually an increase in operation required, but because the
+ * carry is now linearized, other tranforms can kick in and optimize the DAG.
+ *
+ * Patterns typically look something like
+ *            (uaddo A, B)
+ *             /       \
+ *          Carry      Sum
+ *            |          \
+ *            | (addcarry *, 0, Z)
+ *            |       /
+ *             \   Carry
+ *              |   /
+ * (addcarry X, *, *)
+ *
+ * But numerous variation exist. Our goal is to identify A, B, X and Z and
+ * produce a combine with a single path for carry propagation.
+ */
+static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
+                                      SDValue X, SDValue Carry0, SDValue Carry1,
+                                      SDNode *N) {
+  if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
+    return SDValue();
+  if (Carry1.getOpcode() != ISD::UADDO)
+    return SDValue();
+
+  SDValue Z;
+
+  /**
+   * First look for a suitable Z. It will present itself in the form of
+   * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
+   */
+  if (Carry0.getOpcode() == ISD::ADDCARRY &&
+      isNullConstant(Carry0.getOperand(1))) {
+    Z = Carry0.getOperand(2);
+  } else if (Carry0.getOpcode() == ISD::UADDO &&
+             isOneConstant(Carry0.getOperand(1))) {
+    EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
+    Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
+  } else {
+    // We couldn't find a suitable Z.
+    return SDValue();
+  }
+
+
+  auto cancelDiamond = [&](SDValue A,SDValue B) {
+    SDLoc DL(N);
+    SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
+    Combiner.AddToWorklist(NewY.getNode());
+    return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
+                       DAG.getConstant(0, DL, X.getValueType()),
+                       NewY.getValue(1));
+  };
+
+  /**
+   *      (uaddo A, B)
+   *           |
+   *          Sum
+   *           |
+   * (addcarry *, 0, Z)
+   */
+  if (Carry0.getOperand(0) == Carry1.getValue(0)) {
+    return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
+  }
+
+  /**
+   * (addcarry A, 0, Z)
+   *         |
+   *        Sum
+   *         |
+   *  (uaddo *, B)
+   */
+  if (Carry1.getOperand(0) == Carry0.getValue(0)) {
+    return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
+  }
+
+  if (Carry1.getOperand(1) == Carry0.getValue(0)) {
+    return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
+  }
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
                                        SDNode *N) {
+  // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
+  if (isBitwiseNot(N0))
+    if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
+      SDLoc DL(N);
+      SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
+                                N0.getOperand(0), NotC);
+      return CombineTo(N, Sub,
+                       flipBoolean(Sub.getValue(1), DL, DAG, TLI));
+    }
+
   // Iff the flag result is dead:
   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
+  // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
+  // or the dependency between the instructions.
   if ((N0.getOpcode() == ISD::ADD ||
-       (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
+       (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
+        N0.getValue(1) != CarryIn)) &&
       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
                        N0.getOperand(0), N0.getOperand(1), CarryIn);
@@ -2529,35 +2991,13 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
    * When one of the addcarry argument is itself a carry, we may be facing
    * a diamond carry propagation. In which case we try to transform the DAG
    * to ensure linear carry propagation if that is possible.
-   *
-   * We are trying to get:
-   *   (addcarry X, 0, (addcarry A, B, Z):Carry)
    */
   if (auto Y = getAsCarry(TLI, N1)) {
-    /**
-     *            (uaddo A, B)
-     *             /       \
-     *          Carry      Sum
-     *            |          \
-     *            | (addcarry *, 0, Z)
-     *            |       /
-     *             \   Carry
-     *              |   /
-     * (addcarry X, *, *)
-     */
-    if (Y.getOpcode() == ISD::UADDO &&
-        CarryIn.getResNo() == 1 &&
-        CarryIn.getOpcode() == ISD::ADDCARRY &&
-        isNullConstant(CarryIn.getOperand(1)) &&
-        CarryIn.getOperand(0) == Y.getValue(0)) {
-      auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
-                              Y.getOperand(0), Y.getOperand(1),
-                              CarryIn.getOperand(2));
-      AddToWorklist(NewY.getNode());
-      return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
-                         DAG.getConstant(0, SDLoc(N), N0.getValueType()),
-                         NewY.getValue(1));
-    }
+    // Because both are carries, Y and Z can be swapped.
+    if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
+      return R;
+    if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
+      return R;
   }
 
   return SDValue();
@@ -2620,7 +3060,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
     // -(X >>s 31) -> (X >>u 31)
     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
-      if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
+      if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
@@ -2662,16 +3102,48 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
     return N0.getOperand(0);
 
+  // fold (A+C1)-C2 -> A+(C1-C2)
+  if (N0.getOpcode() == ISD::ADD &&
+      isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+      isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
+    SDValue NewC = DAG.FoldConstantArithmetic(
+        ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
+    assert(NewC && "Constant folding failed");
+    return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
+  }
+
   // fold C2-(A+C1) -> (C2-C1)-A
   if (N1.getOpcode() == ISD::ADD) {
     SDValue N11 = N1.getOperand(1);
     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
-      SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
+      SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
+                                                N11.getNode());
+      assert(NewC && "Constant folding failed");
       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
     }
   }
 
+  // fold (A-C1)-C2 -> A-(C1+C2)
+  if (N0.getOpcode() == ISD::SUB &&
+      isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+      isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
+    SDValue NewC = DAG.FoldConstantArithmetic(
+        ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
+    assert(NewC && "Constant folding failed");
+    return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
+  }
+
+  // fold (c1-A)-c2 -> (c1-c2)-A
+  if (N0.getOpcode() == ISD::SUB &&
+      isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+      isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
+    SDValue NewC = DAG.FoldConstantArithmetic(
+        ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode());
+    assert(NewC && "Constant folding failed");
+    return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
+  }
+
   // fold ((A+(B+or-C))-B) -> A+or-C
   if (N0.getOpcode() == ISD::ADD &&
       (N0.getOperand(1).getOpcode() == ISD::SUB ||
@@ -2728,6 +3200,63 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (SDValue V = foldAddSubOfSignBit(N, DAG))
     return V;
 
+  if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
+    return V;
+
+  // (x - y) - 1  ->  add (xor y, -1), x
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
+    SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
+                              DAG.getAllOnesConstant(DL, VT));
+    return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+  }
+
+  // Look for:
+  //   sub y, (xor x, -1)
+  // And if the target does not like this form then turn into:
+  //   add (add x, y), 1
+  if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
+    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
+    return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
+  }
+
+  // Hoist one-use addition by non-opaque constant:
+  //   (x + C) - y  ->  (x - y) + C
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
+      isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
+    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
+    return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
+  }
+  // y - (x + C)  ->  (y - x) - C
+  if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
+      isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
+    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
+    return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
+  }
+  // (x - C) - y  ->  (x - y) - C
+  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+      isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
+    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
+    return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
+  }
+  // (C - x) - y  ->  C - (x + y)
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+      isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
+    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
+    return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
+  }
+
+  // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
+  // rather than 'sub 0/1' (the sext should get folded).
+  // sub X, (zext i1 Y) --> add X, (sext i1 Y)
+  if (N1.getOpcode() == ISD::ZERO_EXTEND &&
+      N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
+      TLI.getBooleanContents(VT) ==
+          TargetLowering::ZeroOrNegativeOneBooleanContent) {
+    SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
+    return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
+  }
+
   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
@@ -2772,7 +3301,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
     SDValue ShAmt = N1.getOperand(1);
     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
-    if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
+    if (ShAmtC &&
+        ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
     }
@@ -2846,12 +3376,11 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) {
   return SDValue();
 }
 
-SDValue DAGCombiner::visitUSUBO(SDNode *N) {
+SDValue DAGCombiner::visitSUBO(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N0.getValueType();
-  if (VT.isVector())
-    return SDValue();
+  bool IsSigned = (ISD::SSUBO == N->getOpcode());
 
   EVT CarryVT = N->getValueType(1);
   SDLoc DL(N);
@@ -2861,17 +3390,25 @@ SDValue DAGCombiner::visitUSUBO(SDNode *N) {
     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
                      DAG.getUNDEF(CarryVT));
 
-  // fold (usubo x, x) -> 0 + no borrow
+  // fold (subo x, x) -> 0 + no borrow
   if (N0 == N1)
     return CombineTo(N, DAG.getConstant(0, DL, VT),
                      DAG.getConstant(0, DL, CarryVT));
 
-  // fold (usubo x, 0) -> x + no borrow
-  if (isNullConstant(N1))
+  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+
+  // fold (subox, c) -> (addo x, -c)
+  if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
+    return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
+                       DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
+  }
+
+  // fold (subo x, 0) -> x + no borrow
+  if (isNullOrNullSplat(N1))
     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
 
   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
-  if (isAllOnesConstant(N0))
+  if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
                      DAG.getConstant(0, DL, CarryVT));
 
@@ -3012,13 +3549,13 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
       MathOp = ISD::SUB;
 
     if (MathOp != ISD::DELETED_NODE) {
-      unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
-                                          : (MulC + 1).logBase2();
-      assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
-             "Not expecting multiply-by-constant that could have simplified");
+      unsigned ShAmt =
+          MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
+      assert(ShAmt < VT.getScalarSizeInBits() &&
+             "multiply-by-constant generated out of bounds shift");
       SDLoc DL(N);
-      SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
-                                DAG.getConstant(ShAmt, DL, VT));
+      SDValue Shl =
+          DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
       SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
       if (ConstValue1.isNegative())
         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
@@ -3069,7 +3606,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
                                      N0.getOperand(1), N1));
 
   // reassociate mul
-  if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
+  if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
     return RMUL;
 
   return SDValue();
@@ -3612,7 +4149,6 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
-    SDLoc DL(N);
     unsigned NumEltBits = VT.getScalarSizeInBits();
     SDValue LogBase2 = BuildLogBase2(N1, DL);
     SDValue SRLAmt = DAG.getNode(
@@ -3753,22 +4289,14 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
   return SDValue();
 }
 
-SDValue DAGCombiner::visitSMULO(SDNode *N) {
-  // (smulo x, 2) -> (saddo x, x)
-  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
-    if (C2->getAPIntValue() == 2)
-      return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
-                         N->getOperand(0), N->getOperand(0));
+SDValue DAGCombiner::visitMULO(SDNode *N) {
+  bool IsSigned = (ISD::SMULO == N->getOpcode());
 
-  return SDValue();
-}
-
-SDValue DAGCombiner::visitUMULO(SDNode *N) {
-  // (umulo x, 2) -> (uaddo x, x)
-  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+  // (mulo x, 2) -> (addo x, x)
+  if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1)))
     if (C2->getAPIntValue() == 2)
-      return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
-                         N->getOperand(0), N->getOperand(0));
+      return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N),
+                         N->getVTList(), N->getOperand(0), N->getOperand(0));
 
   return SDValue();
 }
@@ -4075,6 +4603,33 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
       SDValue Zero = DAG.getConstant(0, DL, OpVT);
       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
     }
+
+    // Turn compare of constants whose difference is 1 bit into add+and+setcc.
+    // TODO - support non-uniform vector amounts.
+    if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
+      // Match a shared variable operand and 2 non-opaque constant operands.
+      ConstantSDNode *C0 = isConstOrConstSplat(LR);
+      ConstantSDNode *C1 = isConstOrConstSplat(RR);
+      if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
+        // Canonicalize larger constant as C0.
+        if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
+          std::swap(C0, C1);
+
+        // The difference of the constants must be a single bit.
+        const APInt &C0Val = C0->getAPIntValue();
+        const APInt &C1Val = C1->getAPIntValue();
+        if ((C0Val - C1Val).isPowerOf2()) {
+          // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
+          // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
+          SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
+          SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
+          SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
+          SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
+          SDValue Zero = DAG.getConstant(0, DL, OpVT);
+          return DAG.getSetCC(DL, VT, And, Zero, CC0);
+        }
+      }
+    }
   }
 
   // Canonicalize equivalent operands to LL == RL.
@@ -4259,7 +4814,8 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
   // Ensure that this isn't going to produce an unsupported unaligned access.
   if (ShAmt &&
       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
-                              LDST->getAddressSpace(), ShAmt / 8))
+                              LDST->getAddressSpace(), ShAmt / 8,
+                              LDST->getMemOperand()->getFlags()))
     return false;
 
   // It's not possible to generate a constant of extended or untyped type.
@@ -4316,9 +4872,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
                                     SDNode *&NodeToMask) {
   // Recursively search for the operands, looking for loads which can be
   // narrowed.
-  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
-    SDValue Op = N->getOperand(i);
-
+  for (SDValue Op : N->op_values()) {
     if (Op.getValueType().isVector())
       return false;
 
@@ -4480,7 +5034,7 @@ SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
   SDValue N1 = N->getOperand(1);
 
   // Do we actually prefer shifts over mask?
-  if (!TLI.preferShiftsToClearExtremeBits(N0))
+  if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
     return SDValue();
 
   // Try to match  (-1 '[outer] logical shift' y)
@@ -4575,7 +5129,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     return NewSel;
 
   // reassociate and
-  if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
+  if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
     return RAND;
 
   // Try to convert a constant mask AND into a shuffle clear mask.
@@ -4644,24 +5198,22 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
         // the first vector value and FF for the rest, repeating. We need a mask
         // that will apply equally to all members of the vector, so AND all the
         // lanes of the constant together.
-        EVT VT = Vector->getValueType(0);
-        unsigned BitWidth = VT.getScalarSizeInBits();
+        unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
 
         // If the splat value has been compressed to a bitlength lower
         // than the size of the vector lane, we need to re-expand it to
         // the lane size.
-        if (BitWidth > SplatBitSize)
-          for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
-               SplatBitSize < BitWidth;
-               SplatBitSize = SplatBitSize * 2)
+        if (EltBitWidth > SplatBitSize)
+          for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
+               SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
             SplatValue |= SplatValue.shl(SplatBitSize);
 
         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
-        if (SplatBitSize % BitWidth == 0) {
-          Constant = APInt::getAllOnesValue(BitWidth);
-          for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
-            Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+        if ((SplatBitSize % EltBitWidth) == 0) {
+          Constant = APInt::getAllOnesValue(EltBitWidth);
+          for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
+            Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
         }
       }
     }
@@ -4763,54 +5315,39 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
         return SubRHS;
       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
-        return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
-    }
-  }
-
-  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
-  // fold (and (sra)) -> (and (srl)) when possible.
-  if (SimplifyDemandedBits(SDValue(N, 0)))
-    return SDValue(N, 0);
-
-  // fold (zext_inreg (extload x)) -> (zextload x)
-  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
-    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    EVT MemVT = LN0->getMemoryVT();
-    // If we zero all the possible extended bits, then we can turn this into
-    // a zextload if we are running before legalize or the operation is legal.
-    unsigned BitWidth = N1.getScalarValueSizeInBits();
-    if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                           BitWidth - MemVT.getScalarSizeInBits())) &&
-        ((!LegalOperations && !LN0->isVolatile()) ||
-         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
-      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
-                                       LN0->getChain(), LN0->getBasePtr(),
-                                       MemVT, LN0->getMemOperand());
-      AddToWorklist(N);
-      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
-      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+        return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
     }
   }
+
+  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+  // fold (and (sra)) -> (and (srl)) when possible.
+  if (SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  // fold (zext_inreg (extload x)) -> (zextload x)
   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
-  if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
-      N0.hasOneUse()) {
+  if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      (ISD::isEXTLoad(N0.getNode()) ||
+       (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
     EVT MemVT = LN0->getMemoryVT();
     // If we zero all the possible extended bits, then we can turn this into
     // a zextload if we are running before legalize or the operation is legal.
-    unsigned BitWidth = N1.getScalarValueSizeInBits();
-    if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                           BitWidth - MemVT.getScalarSizeInBits())) &&
+    unsigned ExtBitSize = N1.getScalarValueSizeInBits();
+    unsigned MemBitSize = MemVT.getScalarSizeInBits();
+    APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
+    if (DAG.MaskedValueIsZero(N1, ExtBits) &&
         ((!LegalOperations && !LN0->isVolatile()) ||
          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
-      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
-                                       LN0->getChain(), LN0->getBasePtr(),
-                                       MemVT, LN0->getMemOperand());
+      SDValue ExtLoad =
+          DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
+                         LN0->getBasePtr(), MemVT, LN0->getMemOperand());
       AddToWorklist(N);
       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
-      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+      return SDValue(N, 0); // Return N so it doesn't get rechecked!
     }
   }
+
   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
@@ -5155,6 +5692,23 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
   return SDValue();
 }
 
+/// OR combines for which the commuted variant will be tried as well.
+static SDValue visitORCommutative(
+    SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
+  EVT VT = N0.getValueType();
+  if (N0.getOpcode() == ISD::AND) {
+    // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
+    if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
+      return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
+
+    // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
+    if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
+      return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
+  }
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitOR(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -5284,7 +5838,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
     return BSwap;
 
   // reassociate or
-  if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
+  if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
     return ROR;
 
   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
@@ -5302,6 +5856,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
     }
   }
 
+  if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
+    return Combined;
+  if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
+    return Combined;
+
   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
   if (N0.getOpcode() == N1.getOpcode())
     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
@@ -5318,6 +5877,12 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   if (SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
+  // If OR can be rewritten into ADD, try combines based on ADD.
+  if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
+      DAG.haveNoCommonBitsSet(N0, N1))
+    if (SDValue Combined = visitADDLike(N))
+      return Combined;
+
   return SDValue();
 }
 
@@ -5869,6 +6434,213 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
   return None;
 }
 
+static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
+  return i;
+}
+
+static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
+  return BW - i - 1;
+}
+
+// Check if the bytes offsets we are looking at match with either big or
+// little endian value loaded. Return true for big endian, false for little
+// endian, and None if match failed.
+static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets,
+                                  int64_t FirstOffset) {
+  // The endian can be decided only when it is 2 bytes at least.
+  unsigned Width = ByteOffsets.size();
+  if (Width < 2)
+    return None;
+
+  bool BigEndian = true, LittleEndian = true;
+  for (unsigned i = 0; i < Width; i++) {
+    int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
+    LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
+    BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
+    if (!BigEndian && !LittleEndian)
+      return None;
+  }
+
+  assert((BigEndian != LittleEndian) && "It should be either big endian or"
+                                        "little endian");
+  return BigEndian;
+}
+
+static SDValue stripTruncAndExt(SDValue Value) {
+  switch (Value.getOpcode()) {
+  case ISD::TRUNCATE:
+  case ISD::ZERO_EXTEND:
+  case ISD::SIGN_EXTEND:
+  case ISD::ANY_EXTEND:
+    return stripTruncAndExt(Value.getOperand(0));
+  }
+  return Value;
+}
+
+/// Match a pattern where a wide type scalar value is stored by several narrow
+/// stores. Fold it into a single store or a BSWAP and a store if the targets
+/// supports it.
+///
+/// Assuming little endian target:
+///  i8 *p = ...
+///  i32 val = ...
+///  p[0] = (val >> 0) & 0xFF;
+///  p[1] = (val >> 8) & 0xFF;
+///  p[2] = (val >> 16) & 0xFF;
+///  p[3] = (val >> 24) & 0xFF;
+/// =>
+///  *((i32)p) = val;
+///
+///  i8 *p = ...
+///  i32 val = ...
+///  p[0] = (val >> 24) & 0xFF;
+///  p[1] = (val >> 16) & 0xFF;
+///  p[2] = (val >> 8) & 0xFF;
+///  p[3] = (val >> 0) & 0xFF;
+/// =>
+///  *((i32)p) = BSWAP(val);
+SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
+  // Collect all the stores in the chain.
+  SDValue Chain;
+  SmallVector<StoreSDNode *, 8> Stores;
+  for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
+    if (Store->getMemoryVT() != MVT::i8 ||
+        Store->isVolatile() || Store->isIndexed())
+      return SDValue();
+    Stores.push_back(Store);
+    Chain = Store->getChain();
+  }
+  // Handle the simple type only.
+  unsigned Width = Stores.size();
+  EVT VT = EVT::getIntegerVT(
+    *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
+  if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+    return SDValue();
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
+    return SDValue();
+
+  // Check if all the bytes of the combined value we are looking at are stored 
+  // to the same base address. Collect bytes offsets from Base address into 
+  // ByteOffsets. 
+  SDValue CombinedValue;
+  SmallVector<int64_t, 4> ByteOffsets(Width, INT64_MAX);
+  int64_t FirstOffset = INT64_MAX;
+  StoreSDNode *FirstStore = nullptr;
+  Optional<BaseIndexOffset> Base;
+  for (auto Store : Stores) {
+    // All the stores store different byte of the CombinedValue. A truncate is
+    // required to get that byte value.
+    SDValue Trunc = Store->getValue();
+    if (Trunc.getOpcode() != ISD::TRUNCATE)
+      return SDValue();
+    // A shift operation is required to get the right byte offset, except the
+    // first byte.
+    int64_t Offset = 0;
+    SDValue Value = Trunc.getOperand(0);
+    if (Value.getOpcode() == ISD::SRL ||
+        Value.getOpcode() == ISD::SRA) {
+      ConstantSDNode *ShiftOffset =
+        dyn_cast<ConstantSDNode>(Value.getOperand(1));
+      // Trying to match the following pattern. The shift offset must be 
+      // a constant and a multiple of 8. It is the byte offset in "y".
+      // 
+      // x = srl y, offset
+      // i8 z = trunc x 
+      // store z, ...
+      if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
+        return SDValue();
+  
+     Offset = ShiftOffset->getSExtValue()/8;
+     Value = Value.getOperand(0);
+    }
+
+    // Stores must share the same combined value with different offsets.
+    if (!CombinedValue)
+      CombinedValue = Value;
+    else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
+      return SDValue();
+
+    // The trunc and all the extend operation should be stripped to get the
+    // real value we are stored.
+    else if (CombinedValue.getValueType() != VT) {
+      if (Value.getValueType() == VT ||
+          Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
+        CombinedValue = Value;
+      // Give up if the combined value type is smaller than the store size.
+      if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
+        return SDValue();
+    }
+
+    // Stores must share the same base address
+    BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
+    int64_t ByteOffsetFromBase = 0;
+    if (!Base)
+      Base = Ptr;
+    else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
+      return SDValue();
+
+    // Remember the first byte store
+    if (ByteOffsetFromBase < FirstOffset) {
+      FirstStore = Store;
+      FirstOffset = ByteOffsetFromBase;
+    }
+    // Map the offset in the store and the offset in the combined value, and
+    // early return if it has been set before.
+    if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX)
+      return SDValue();
+    ByteOffsets[Offset] = ByteOffsetFromBase;
+  }
+
+  assert(FirstOffset != INT64_MAX && "First byte offset must be set");
+  assert(FirstStore && "First store must be set");
+
+  // Check if the bytes of the combined value we are looking at match with 
+  // either big or little endian value store.
+  Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
+  if (!IsBigEndian.hasValue())
+    return SDValue();
+
+  // The node we are looking at matches with the pattern, check if we can
+  // replace it with a single bswap if needed and store.
+
+  // If the store needs byte swap check if the target supports it
+  bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
+
+  // Before legalize we can introduce illegal bswaps which will be later
+  // converted to an explicit bswap sequence. This way we end up with a single
+  // store and byte shuffling instead of several stores and byte shuffling.
+  if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
+    return SDValue();
+
+  // Check that a store of the wide type is both allowed and fast on the target
+  bool Fast = false;
+  bool Allowed =
+      TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+                             *FirstStore->getMemOperand(), &Fast);
+  if (!Allowed || !Fast)
+    return SDValue();
+
+  if (VT != CombinedValue.getValueType()) {
+    assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
+           "Get unexpected store value to combine");
+    CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
+                             CombinedValue);
+  }
+
+  if (NeedsBswap)
+    CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
+
+  SDValue NewStore =
+    DAG.getStore(Chain, SDLoc(N),  CombinedValue, FirstStore->getBasePtr(),
+                 FirstStore->getPointerInfo(), FirstStore->getAlignment());
+
+  // Rely on other DAG combine rules to remove the other individual stores.
+  DAG.ReplaceAllUsesWith(N, NewStore.getNode());
+  return NewStore;
+}
+
 /// Match a pattern where a wide type scalar value is loaded by several narrow
 /// loads and combined by shifts and ors. Fold it into a single load or a load
 /// and a BSWAP if the targets supports it.
@@ -5916,11 +6688,6 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
     return SDValue();
 
-  std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
-    unsigned BW, unsigned i) { return i; };
-  std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
-    unsigned BW, unsigned i) { return BW - i - 1; };
-
   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
   auto MemoryByteOffset = [&] (ByteProvider P) {
     assert(P.isMemory() && "Must be a memory byte provider");
@@ -5987,15 +6754,10 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
 
   // Check if the bytes of the OR we are looking at match with either big or
   // little endian value load
-  bool BigEndian = true, LittleEndian = true;
-  for (unsigned i = 0; i < ByteWidth; i++) {
-    int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
-    LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
-    BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
-    if (!BigEndian && !LittleEndian)
-      return SDValue();
-  }
-  assert((BigEndian != LittleEndian) && "should be either or");
+  Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
+  if (!IsBigEndian.hasValue())
+    return SDValue();
+
   assert(FirstByteProvider && "must be set");
 
   // Ensure that the first byte is loaded from zero offset of the first load.
@@ -6008,7 +6770,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
   // replace it with a single load and bswap if needed.
 
   // If the load needs byte swap check if the target supports it
-  bool NeedsBswap = IsBigEndianTarget != BigEndian;
+  bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
 
   // Before legalize we can introduce illegal bswaps which will be later
   // converted to an explicit bswap sequence. This way we end up with a single
@@ -6019,8 +6781,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
   // Check that a load of the wide type is both allowed and fast on the target
   bool Fast = false;
   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
-                                        VT, FirstLoad->getAddressSpace(),
-                                        FirstLoad->getAlignment(), &Fast);
+                                        VT, *FirstLoad->getMemOperand(), &Fast);
   if (!Allowed || !Fast)
     return SDValue();
 
@@ -6160,7 +6921,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
     return NewSel;
 
   // reassociate xor
-  if (SDValue RXOR = ReassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
+  if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
     return RXOR;
 
   // fold !(x cc y) -> (x !cc y)
@@ -6218,6 +6979,16 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
       return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
     }
   }
+
+  // fold (not (neg x)) -> (add X, -1)
+  // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
+  // Y is a constant or the subtract has a single use.
+  if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
+      isNullConstant(N0.getOperand(0))) {
+    return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
+                       DAG.getAllOnesConstant(DL, VT));
+  }
+
   // fold (xor (and x, y), y) -> (and (not x), y)
   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
     SDValue X = N0.getOperand(0);
@@ -6310,11 +7081,16 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
 
 /// Handle transforms common to the three shifts, when the shift amount is a
 /// constant.
+/// We are looking for: (shift being one of shl/sra/srl)
+///   shift (binop X, C0), C1
+/// And want to transform into:
+///   binop (shift X, C1), (shift C0, C1)
 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
   // Do not turn a 'not' into a regular xor.
   if (isBitwiseNot(N->getOperand(0)))
     return SDValue();
 
+  // The inner binop must be one-use, since we want to replace it.
   SDNode *LHS = N->getOperand(0).getNode();
   if (!LHS->hasOneUse()) return SDValue();
 
@@ -6322,56 +7098,43 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
   // thing happens with address calculations, so it's important to canonicalize
   // it.
-  bool HighBitSet = false;  // Can we transform this if the high bit is set?
-
   switch (LHS->getOpcode()) {
-  default: return SDValue();
+  default:
+    return SDValue();
   case ISD::OR:
   case ISD::XOR:
-    HighBitSet = false; // We can only transform sra if the high bit is clear.
-    break;
   case ISD::AND:
-    HighBitSet = true;  // We can only transform sra if the high bit is set.
     break;
   case ISD::ADD:
     if (N->getOpcode() != ISD::SHL)
       return SDValue(); // only shl(add) not sr[al](add).
-    HighBitSet = false; // We can only transform sra if the high bit is clear.
     break;
   }
 
   // We require the RHS of the binop to be a constant and not opaque as well.
   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
-  if (!BinOpCst) return SDValue();
+  if (!BinOpCst)
+    return SDValue();
 
   // FIXME: disable this unless the input to the binop is a shift by a constant
-  // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
-  SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
-  bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
-                 BinOpLHSVal->getOpcode() == ISD::SRA ||
-                 BinOpLHSVal->getOpcode() == ISD::SRL;
-  bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
-                        BinOpLHSVal->getOpcode() == ISD::SELECT;
-
-  if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
-      !isCopyOrSelect)
+  // or is copy/select. Enable this in other cases when figure out it's exactly
+  // profitable.
+  SDValue BinOpLHSVal = LHS->getOperand(0);
+  bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
+                            BinOpLHSVal.getOpcode() == ISD::SRA ||
+                            BinOpLHSVal.getOpcode() == ISD::SRL) &&
+                           isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
+  bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
+                        BinOpLHSVal.getOpcode() == ISD::SELECT;
+
+  if (!IsShiftByConstant && !IsCopyOrSelect)
     return SDValue();
 
-  if (isCopyOrSelect && N->hasOneUse())
+  if (IsCopyOrSelect && N->hasOneUse())
     return SDValue();
 
   EVT VT = N->getValueType(0);
 
-  // If this is a signed shift right, and the high bit is modified by the
-  // logical operation, do not perform the transformation. The highBitSet
-  // boolean indicates the value of the high bit of the constant which would
-  // cause it to be modified for this operation.
-  if (N->getOpcode() == ISD::SRA) {
-    bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
-    if (BinOpRHSSignSet != HighBitSet)
-      return SDValue();
-  }
-
   if (!TLI.isDesirableToCommuteWithShift(N, Level))
     return SDValue();
 
@@ -6395,11 +7158,12 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
   assert(N->getOperand(0).getOpcode() == ISD::AND);
 
   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
-  if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
+  EVT TruncVT = N->getValueType(0);
+  if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
+      TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
     SDValue N01 = N->getOperand(0).getOperand(1);
     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
       SDLoc DL(N);
-      EVT TruncVT = N->getValueType(0);
       SDValue N00 = N->getOperand(0).getOperand(0);
       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
@@ -6431,6 +7195,7 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
   }
 
   // fold (rot x, c) -> (rot x, c % BitSize)
+  // TODO - support non-uniform vector amounts.
   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
     if (Cst->getAPIntValue().uge(Bitsize)) {
       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
@@ -6476,6 +7241,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
     return V;
 
   EVT VT = N0.getValueType();
+  EVT ShiftVT = N1.getValueType();
   unsigned OpSizeInBits = VT.getScalarSizeInBits();
 
   // fold vector ops
@@ -6506,6 +7272,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   ConstantSDNode *N1C = isConstOrConstSplat(N1);
 
   // fold (shl c1, c2) -> c1<<c2
+  // TODO - support non-uniform vector shift amounts.
   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   if (N0C && N1C && !N1C->isOpaque())
     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
@@ -6517,6 +7284,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   if (DAG.MaskedValueIsZero(SDValue(N, 0),
                             APInt::getAllOnesValue(OpSizeInBits)))
     return DAG.getConstant(0, SDLoc(N), VT);
+
   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
   if (N1.getOpcode() == ISD::TRUNCATE &&
       N1.getOperand(0).getOpcode() == ISD::AND) {
@@ -6524,6 +7292,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
   }
 
+  // TODO - support non-uniform vector shift amounts.
   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
@@ -6548,69 +7317,86 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
     };
     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
       SDLoc DL(N);
-      EVT ShiftVT = N1.getValueType();
       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
     }
   }
 
-  // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
+  // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
   // For this to be valid, the second form must not preserve any of the bits
   // that are shifted out by the inner shift in the first form.  This means
   // the outer shift size must be >= the number of bits added by the ext.
   // As a corollary, we don't care what kind of ext it is.
-  if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
-              N0.getOpcode() == ISD::ANY_EXTEND ||
-              N0.getOpcode() == ISD::SIGN_EXTEND) &&
+  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+       N0.getOpcode() == ISD::ANY_EXTEND ||
+       N0.getOpcode() == ISD::SIGN_EXTEND) &&
       N0.getOperand(0).getOpcode() == ISD::SHL) {
     SDValue N0Op0 = N0.getOperand(0);
-    if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
-      APInt c1 = N0Op0C1->getAPIntValue();
-      APInt c2 = N1C->getAPIntValue();
-      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+    SDValue InnerShiftAmt = N0Op0.getOperand(1);
+    EVT InnerVT = N0Op0.getValueType();
+    uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
 
-      EVT InnerShiftVT = N0Op0.getValueType();
-      uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
-      if (c2.uge(OpSizeInBits - InnerShiftSize)) {
-        SDLoc DL(N0);
-        APInt Sum = c1 + c2;
-        if (Sum.uge(OpSizeInBits))
-          return DAG.getConstant(0, DL, VT);
+    auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
+                                                         ConstantSDNode *RHS) {
+      APInt c1 = LHS->getAPIntValue();
+      APInt c2 = RHS->getAPIntValue();
+      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+      return c2.uge(OpSizeInBits - InnerBitwidth) &&
+             (c1 + c2).uge(OpSizeInBits);
+    };
+    if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
+                                  /*AllowUndefs*/ false,
+                                  /*AllowTypeMismatch*/ true))
+      return DAG.getConstant(0, SDLoc(N), VT);
 
-        return DAG.getNode(
-            ISD::SHL, DL, VT,
-            DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
-            DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
-      }
+    auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
+                                                      ConstantSDNode *RHS) {
+      APInt c1 = LHS->getAPIntValue();
+      APInt c2 = RHS->getAPIntValue();
+      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+      return c2.uge(OpSizeInBits - InnerBitwidth) &&
+             (c1 + c2).ult(OpSizeInBits);
+    };
+    if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
+                                  /*AllowUndefs*/ false,
+                                  /*AllowTypeMismatch*/ true)) {
+      SDLoc DL(N);
+      SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
+      SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
+      Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
+      return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
     }
   }
 
   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
   // Only fold this if the inner zext has no other uses to avoid increasing
   // the total number of instructions.
-  if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
+  if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
       N0.getOperand(0).getOpcode() == ISD::SRL) {
     SDValue N0Op0 = N0.getOperand(0);
-    if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
-      if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
-        uint64_t c1 = N0Op0C1->getZExtValue();
-        uint64_t c2 = N1C->getZExtValue();
-        if (c1 == c2) {
-          SDValue NewOp0 = N0.getOperand(0);
-          EVT CountVT = NewOp0.getOperand(1).getValueType();
-          SDLoc DL(N);
-          SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
-                                       NewOp0,
-                                       DAG.getConstant(c2, DL, CountVT));
-          AddToWorklist(NewSHL.getNode());
-          return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
-        }
-      }
+    SDValue InnerShiftAmt = N0Op0.getOperand(1);
+
+    auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
+      APInt c1 = LHS->getAPIntValue();
+      APInt c2 = RHS->getAPIntValue();
+      zeroExtendToMatch(c1, c2);
+      return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
+    };
+    if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
+                                  /*AllowUndefs*/ false,
+                                  /*AllowTypeMismatch*/ true)) {
+      SDLoc DL(N);
+      EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
+      SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
+      NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
+      AddToWorklist(NewSHL.getNode());
+      return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
     }
   }
 
   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
+  // TODO - support non-uniform vector shift amounts.
   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
       N0->getFlags().hasExact()) {
     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
@@ -6619,9 +7405,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
       SDLoc DL(N);
       if (C1 <= C2)
         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
-                           DAG.getConstant(C2 - C1, DL, N1.getValueType()));
+                           DAG.getConstant(C2 - C1, DL, ShiftVT));
       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
-                         DAG.getConstant(C1 - C2, DL, N1.getValueType()));
+                         DAG.getConstant(C1 - C2, DL, ShiftVT));
     }
   }
 
@@ -6629,11 +7415,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   //                               (and (srl x, (sub c1, c2), MASK)
   // Only fold this if the inner shift has no other uses -- if it does, folding
   // this will increase the total number of instructions.
+  // TODO - drop hasOneUse requirement if c1 == c2?
+  // TODO - support non-uniform vector shift amounts.
   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
-      TLI.shouldFoldShiftPairToMask(N, Level)) {
+      TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
-      uint64_t c1 = N0C1->getZExtValue();
-      if (c1 < OpSizeInBits) {
+      if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
+        uint64_t c1 = N0C1->getZExtValue();
         uint64_t c2 = N1C->getZExtValue();
         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
         SDValue Shift;
@@ -6641,12 +7429,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
           Mask <<= c2 - c1;
           SDLoc DL(N);
           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
-                              DAG.getConstant(c2 - c1, DL, N1.getValueType()));
+                              DAG.getConstant(c2 - c1, DL, ShiftVT));
         } else {
           Mask.lshrInPlace(c1 - c2);
           SDLoc DL(N);
           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
-                              DAG.getConstant(c1 - c2, DL, N1.getValueType()));
+                              DAG.getConstant(c1 - c2, DL, ShiftVT));
         }
         SDLoc DL(N0);
         return DAG.getNode(ISD::AND, DL, VT, Shift,
@@ -6719,6 +7507,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   ConstantSDNode *N1C = isConstOrConstSplat(N1);
 
   // fold (sra c1, c2) -> (sra c1, c2)
+  // TODO - support non-uniform vector shift amounts.
   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   if (N0C && N1C && !N1C->isOpaque())
     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
@@ -6815,32 +7604,32 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
   }
 
+  // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
   //      if c1 is equal to the number of bits the trunc removes
+  // TODO - support non-uniform vector shift amounts.
   if (N0.getOpcode() == ISD::TRUNCATE &&
       (N0.getOperand(0).getOpcode() == ISD::SRL ||
        N0.getOperand(0).getOpcode() == ISD::SRA) &&
       N0.getOperand(0).hasOneUse() &&
-      N0.getOperand(0).getOperand(1).hasOneUse() &&
-      N1C) {
+      N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
     SDValue N0Op0 = N0.getOperand(0);
     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
-      unsigned LargeShiftVal = LargeShift->getZExtValue();
       EVT LargeVT = N0Op0.getValueType();
-
-      if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
+      unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
+      if (LargeShift->getAPIntValue() == TruncBits) {
         SDLoc DL(N);
-        SDValue Amt =
-          DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
-                          getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
-        SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
-                                  N0Op0.getOperand(0), Amt);
+        SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
+                                      getShiftAmountTy(LargeVT));
+        SDValue SRA =
+            DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
       }
     }
   }
 
   // Simplify, based on bits shifted out of the LHS.
+  // TODO - support non-uniform vector shift amounts.
   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
@@ -6872,6 +7661,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   ConstantSDNode *N1C = isConstOrConstSplat(N1);
 
   // fold (srl c1, c2) -> c1 >>u c2
+  // TODO - support non-uniform vector shift amounts.
   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   if (N0C && N1C && !N1C->isOpaque())
     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
@@ -6912,6 +7702,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   }
 
   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
+  // TODO - support non-uniform vector shift amounts.
   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
       N0.getOperand(0).getOpcode() == ISD::SRL) {
     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
@@ -6935,6 +7726,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   }
 
   // fold (srl (shl x, c), c) -> (and x, cst2)
+  // TODO - (srl (shl x, c1), c2).
   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
     SDLoc DL(N);
@@ -6945,11 +7737,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   }
 
   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
+  // TODO - support non-uniform vector shift amounts.
   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
     // Shifting in all undef bits?
     EVT SmallVT = N0.getOperand(0).getValueType();
     unsigned BitSize = SmallVT.getScalarSizeInBits();
-    if (N1C->getZExtValue() >= BitSize)
+    if (N1C->getAPIntValue().uge(BitSize))
       return DAG.getUNDEF(VT);
 
     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
@@ -6970,7 +7763,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
 
   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
   // bit, which is unmodified by sra.
-  if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
+  if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
     if (N0.getOpcode() == ISD::SRA)
       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
   }
@@ -7021,6 +7814,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
 
   // fold operands of srl based on knowledge that the low bits are not
   // demanded.
+  // TODO - support non-uniform vector shift amounts.
   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
@@ -7079,13 +7873,49 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
       return IsFSHL ? N0 : N1;
 
-  // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
+  auto IsUndefOrZero = [](SDValue V) {
+    return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
+  };
+
+  // TODO - support non-uniform vector shift amounts.
   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
+    EVT ShAmtTy = N2.getValueType();
+
+    // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
     if (Cst->getAPIntValue().uge(BitWidth)) {
       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
-                         DAG.getConstant(RotAmt, SDLoc(N), N2.getValueType()));
+                         DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
     }
+
+    unsigned ShAmt = Cst->getZExtValue();
+    if (ShAmt == 0)
+      return IsFSHL ? N0 : N1;
+
+    // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
+    // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
+    // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
+    // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
+    if (IsUndefOrZero(N0))
+      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
+                         DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
+                                         SDLoc(N), ShAmtTy));
+    if (IsUndefOrZero(N1))
+      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
+                         DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
+                                         SDLoc(N), ShAmtTy));
+  }
+
+  // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
+  // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
+  // iff We know the shift amount is in range.
+  // TODO: when is it worth doing SUB(BW, N2) as well?
+  if (isPowerOf2_32(BitWidth)) {
+    APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
+    if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
+      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
+    if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
+      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
   }
 
   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
@@ -7096,6 +7926,10 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
   if (N0 == N1 && hasOperation(RotOpc, VT))
     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
 
+  // Simplify, based on bits shifted out of N0/N1.
+  if (SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
   return SDValue();
 }
 
@@ -7207,11 +8041,14 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
 
 // FIXME: This should be checking for no signed zeros on individual operands, as
 // well as no nans.
-static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS) {
+static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
+                                         SDValue RHS,
+                                         const TargetLowering &TLI) {
   const TargetOptions &Options = DAG.getTarget().Options;
   EVT VT = LHS.getValueType();
 
   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
+         TLI.isProfitableToCombineMinNumMaxNum(VT) &&
          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
 }
 
@@ -7364,6 +8201,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
   EVT VT = N->getValueType(0);
   EVT VT0 = N0.getValueType();
   SDLoc DL(N);
+  SDNodeFlags Flags = N->getFlags();
 
   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
     return V;
@@ -7414,20 +8252,26 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
       SDValue Cond0 = N0->getOperand(0);
       SDValue Cond1 = N0->getOperand(1);
       SDValue InnerSelect =
-          DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
+          DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
       if (normalizeToSequence || !InnerSelect.use_empty())
         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
-                           InnerSelect, N2);
+                           InnerSelect, N2, Flags);
+      // Cleanup on failure.
+      if (InnerSelect.use_empty())
+        recursivelyDeleteUnusedNodes(InnerSelect.getNode());
     }
     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
       SDValue Cond0 = N0->getOperand(0);
       SDValue Cond1 = N0->getOperand(1);
-      SDValue InnerSelect =
-          DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
+      SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
+                                        Cond1, N1, N2, Flags);
       if (normalizeToSequence || !InnerSelect.use_empty())
         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
-                           InnerSelect);
+                           InnerSelect, Flags);
+      // Cleanup on failure.
+      if (InnerSelect.use_empty())
+        recursivelyDeleteUnusedNodes(InnerSelect.getNode());
     }
 
     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
@@ -7439,12 +8283,14 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
         // Create the actual and node if we can generate good code for it.
         if (!normalizeToSequence) {
           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
-          return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
+          return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
+                             N2, Flags);
         }
         // Otherwise see if we can optimize the "and" to a better pattern.
-        if (SDValue Combined = visitANDLike(N0, N1_0, N))
+        if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
-                             N2);
+                             N2, Flags);
+        }
       }
     }
     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
@@ -7456,20 +8302,22 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
         // Create the actual or node if we can generate good code for it.
         if (!normalizeToSequence) {
           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
-          return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
+          return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, 
+                             N2_2, Flags);
         }
         // Otherwise see if we can optimize to a better pattern.
         if (SDValue Combined = visitORLike(N0, N2_0, N))
           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
-                             N2_2);
+                             N2_2, Flags);
       }
     }
   }
 
-  if (VT0 == MVT::i1) {
-    // select (not Cond), N1, N2 -> select Cond, N2, N1
-    if (isBitwiseNot(N0))
-      return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);
+  // select (not Cond), N1, N2 -> select Cond, N2, N1
+  if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
+    SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
+    SelectOp->setFlags(Flags);
+    return SelectOp;
   }
 
   // Fold selects based on a setcc into other things, such as min/max/abs.
@@ -7481,7 +8329,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
     // select (fcmp gt x, y), x, y -> fmaxnum x, y
     //
     // This is OK if we don't care what happens if either operand is a NaN.
-    if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2))
+    if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
                                                 CC, TLI, DAG))
         return FMinMax;
@@ -7516,9 +8364,16 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
     }
 
     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
-        (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)))
-      return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
-                         N0.getOperand(2));
+        (!LegalOperations &&
+         TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
+      // Any flags available in a select/setcc fold will be on the setcc as they
+      // migrated from fcmp
+      Flags = N0.getNode()->getFlags();
+      SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
+                                       N2, N0.getOperand(2));
+      SelectNode->setFlags(Flags);
+      return SelectNode;
+    }
 
     return SimplifySelect(DL, N0, N1, N2);
   }
@@ -7599,14 +8454,19 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
 }
 
 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
-  if (Level >= AfterLegalizeTypes)
-    return SDValue();
-
   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
   SDValue Mask = MSC->getMask();
-  SDValue Data  = MSC->getValue();
+  SDValue Data = MSC->getValue();
+  SDValue Chain = MSC->getChain();
   SDLoc DL(N);
 
+  // Zap scatters with a zero mask.
+  if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+    return Chain;
+
+  if (Level >= AfterLegalizeTypes)
+    return SDValue();
+
   // If the MSCATTER data type requires splitting and the mask is provided by a
   // SETCC, then split both nodes and its operands before legalization. This
   // prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -7624,8 +8484,6 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
   EVT LoVT, HiVT;
   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
 
-  SDValue Chain = MSC->getChain();
-
   EVT MemoryVT = MSC->getMemoryVT();
   unsigned Alignment = MSC->getOriginalAlignment();
 
@@ -7658,15 +8516,20 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
 }
 
 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
-  if (Level >= AfterLegalizeTypes)
-    return SDValue();
-
-  MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
+  MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
   SDValue Mask = MST->getMask();
-  SDValue Data  = MST->getValue();
+  SDValue Data = MST->getValue();
+  SDValue Chain = MST->getChain();
   EVT VT = Data.getValueType();
   SDLoc DL(N);
 
+  // Zap masked stores with a zero mask.
+  if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+    return Chain;
+
+  if (Level >= AfterLegalizeTypes)
+    return SDValue();
+
   // If the MSTORE data type requires splitting and the mask is provided by a
   // SETCC, then split both nodes and its operands before legalization. This
   // prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -7680,17 +8543,11 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
     SDValue MaskLo, MaskHi, Lo, Hi;
     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
 
-    SDValue Chain = MST->getChain();
     SDValue Ptr   = MST->getBasePtr();
 
     EVT MemoryVT = MST->getMemoryVT();
     unsigned Alignment = MST->getOriginalAlignment();
 
-    // if Alignment is equal to the vector size,
-    // take the half of it for the second part
-    unsigned SecondHalfAlignment =
-      (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
-
     EVT LoMemVT, HiMemVT;
     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
 
@@ -7712,7 +8569,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
 
     MMO = DAG.getMachineFunction().getMachineMemOperand(
         MST->getPointerInfo().getWithOffset(HiOffset),
-        MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
+        MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment,
         MST->getAAInfo(), MST->getRanges());
 
     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
@@ -7728,13 +8585,17 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
 }
 
 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
-  if (Level >= AfterLegalizeTypes)
-    return SDValue();
-
   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
   SDValue Mask = MGT->getMask();
   SDLoc DL(N);
 
+  // Zap gathers with a zero mask.
+  if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+    return CombineTo(N, MGT->getPassThru(), MGT->getChain());
+
+  if (Level >= AfterLegalizeTypes)
+    return SDValue();
+
   // If the MGATHER result requires splitting and the mask is provided by a
   // SETCC, then split both nodes and its operands before legalization. This
   // prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -7805,13 +8666,17 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
 }
 
 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
-  if (Level >= AfterLegalizeTypes)
-    return SDValue();
-
-  MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
+  MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
   SDValue Mask = MLD->getMask();
   SDLoc DL(N);
 
+  // Zap masked loads with a zero mask.
+  if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+    return CombineTo(N, MLD->getPassThru(), MLD->getChain());
+
+  if (Level >= AfterLegalizeTypes)
+    return SDValue();
+
   // If the MLOAD result requires splitting and the mask is provided by a
   // SETCC, then split both nodes and its operands before legalization. This
   // prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -7839,12 +8704,6 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
     EVT MemoryVT = MLD->getMemoryVT();
     unsigned Alignment = MLD->getOriginalAlignment();
 
-    // if Alignment is equal to the vector size,
-    // take the half of it for the second part
-    unsigned SecondHalfAlignment =
-      (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
-         Alignment/2 : Alignment;
-
     EVT LoMemVT, HiMemVT;
     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
 
@@ -7862,7 +8721,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
 
     MMO = DAG.getMachineFunction().getMachineMemOperand(
         MLD->getPointerInfo().getWithOffset(HiOffset),
-        MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
+        MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment,
         MLD->getAAInfo(), MLD->getRanges());
 
     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
@@ -7943,11 +8802,16 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   SDValue N2 = N->getOperand(2);
+  EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
     return V;
 
+  // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
+  if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
+    return DAG.getSelect(DL, VT, F, N2, N1);
+
   // Canonicalize integer abs.
   // vselect (setg[te] X,  0),  X, -X ->
   // vselect (setgt    X, -1),  X, -X ->
@@ -7987,11 +8851,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
     // This is OK if we don't care about what happens if either operand is a
     // NaN.
     //
-    EVT VT = N->getValueType(0);
-    if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), N0.getOperand(1))) {
-      ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+    if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0),
+                                                       N0.getOperand(1), TLI)) {
       if (SDValue FMinMax = combineMinNumMaxNum(
-            DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
+              DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
         return FMinMax;
     }
 
@@ -8080,9 +8943,11 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
       return N2;
     } else if (SCC.getOpcode() == ISD::SETCC) {
       // Fold to a simpler select_cc
-      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
-                         SCC.getOperand(0), SCC.getOperand(1), N2, N3,
-                         SCC.getOperand(2));
+      SDValue SelectOp = DAG.getNode(
+          ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
+          SCC.getOperand(1), N2, N3, SCC.getOperand(2));
+      SelectOp->setFlags(SCC->getFlags());
+      return SelectOp;
     }
   }
 
@@ -8148,6 +9013,7 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
   unsigned Opcode = N->getOpcode();
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
+  SDLoc DL(N);
 
   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
@@ -8158,7 +9024,33 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
   // fold (zext c1) -> c1
   // fold (aext c1) -> c1
   if (isa<ConstantSDNode>(N0))
-    return DAG.getNode(Opcode, SDLoc(N), VT, N0);
+    return DAG.getNode(Opcode, DL, VT, N0);
+
+  // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
+  // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
+  // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
+  if (N0->getOpcode() == ISD::SELECT) {
+    SDValue Op1 = N0->getOperand(1);
+    SDValue Op2 = N0->getOperand(2);
+    if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
+        (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
+      // For any_extend, choose sign extension of the constants to allow a
+      // possible further transform to sign_extend_inreg.i.e.
+      //
+      // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
+      // t2: i64 = any_extend t1
+      // -->
+      // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
+      // -->
+      // t4: i64 = sign_extend_inreg t3
+      unsigned FoldOpc = Opcode;
+      if (FoldOpc == ISD::ANY_EXTEND)
+        FoldOpc = ISD::SIGN_EXTEND;
+      return DAG.getSelect(DL, VT, N0->getOperand(0),
+                           DAG.getNode(FoldOpc, DL, VT, Op1),
+                           DAG.getNode(FoldOpc, DL, VT, Op2));
+    }
+  }
 
   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
@@ -8173,7 +9065,6 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
   SmallVector<SDValue, 8> Elts;
   unsigned NumElts = VT.getVectorNumElements();
-  SDLoc DL(N);
 
   // For zero-extensions, UNDEF elements still guarantee to have the upper
   // bits set to zero.
@@ -8387,6 +9278,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
   assert(N->getOpcode() == ISD::ZERO_EXTEND);
   EVT VT = N->getValueType(0);
+  EVT OrigVT = N->getOperand(0).getValueType();
+  if (TLI.isZExtFree(OrigVT, VT))
+    return SDValue();
 
   // and/or/xor
   SDValue N0 = N->getOperand(0);
@@ -8450,6 +9344,10 @@ SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
                                 Load->getValueType(0), ExtLoad);
     CombineTo(Load, Trunc, ExtLoad.getValue(1));
   }
+
+  // N0 is dead at this point.
+  recursivelyDeleteUnusedNodes(N0.getNode());
+
   return SDValue(N,0); // Return N so it doesn't get rechecked!
 }
 
@@ -8509,19 +9407,21 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
                                                    : ISD::isZEXTLoad(N0Node);
   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
-    return {};
+    return SDValue();
 
   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   EVT MemVT = LN0->getMemoryVT();
   if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
-    return {};
+    return SDValue();
 
   SDValue ExtLoad =
       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
   Combiner.CombineTo(N, ExtLoad);
   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+  if (LN0->use_empty())
+    Combiner.recursivelyDeleteUnusedNodes(LN0);
   return SDValue(N, 0); // Return N so it doesn't get rechecked!
 }
 
@@ -8559,6 +9459,7 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
   Combiner.CombineTo(N, ExtLoad);
   if (NoReplaceTrunc) {
     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+    Combiner.recursivelyDeleteUnusedNodes(LN0);
   } else {
     SDValue Trunc =
         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
@@ -8804,6 +9705,25 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
     return NewVSel;
 
+  // Eliminate this sign extend by doing a negation in the destination type:
+  // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
+  if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
+      isNullOrNullSplat(N0.getOperand(0)) &&
+      N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
+      TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
+    SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
+    return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
+  }
+  // Eliminate this sign extend by doing a decrement in the destination type:
+  // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
+  if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
+      isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
+      N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+      TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
+    SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
+    return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
+  }
+
   return SDValue();
 }
 
@@ -9061,14 +9981,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
       N0.hasOneUse()) {
     SDValue ShAmt = N0.getOperand(1);
-    unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
     if (N0.getOpcode() == ISD::SHL) {
       SDValue InnerZExt = N0.getOperand(0);
       // If the original shl may be shifting out bits, do not perform this
       // transformation.
       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
         InnerZExt.getOperand(0).getValueSizeInBits();
-      if (ShAmtVal > KnownZeroBits)
+      if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
         return SDValue();
     }
 
@@ -9162,6 +10081,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
       CombineTo(N, ExtLoad);
       if (NoReplaceTrunc) {
         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+        recursivelyDeleteUnusedNodes(LN0);
       } else {
         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
                                     N0.getValueType(), ExtLoad);
@@ -9185,6 +10105,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
                                        MemVT, LN0->getMemOperand());
       CombineTo(N, ExtLoad);
       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+      recursivelyDeleteUnusedNodes(LN0);
       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
     }
   }
@@ -9574,14 +10495,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
   if (N0.getOpcode() == ISD::SRL) {
-    if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
-      if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
+    if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+      if (ShAmt->getAPIntValue().ule(VTBits - EVTBits)) {
         // We can turn this into an SRA iff the input to the SRL is already sign
         // extended enough.
         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
-        if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
-          return DAG.getNode(ISD::SRA, SDLoc(N), VT,
-                             N0.getOperand(0), N0.getOperand(1));
+        if (((VTBits - EVTBits) - ShAmt->getZExtValue()) < InSignBits)
+          return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
+                             N0.getOperand(1));
       }
   }
 
@@ -9667,10 +10588,11 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
+  EVT SrcVT = N0.getValueType();
   bool isLE = DAG.getDataLayout().isLittleEndian();
 
   // noop truncate
-  if (N0.getValueType() == N->getValueType(0))
+  if (SrcVT == VT)
     return N0;
 
   // fold (truncate (truncate x)) -> (truncate x)
@@ -9740,7 +10662,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
 
   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
-    EVT SrcVT = N0.getValueType();
     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
         TLI.isTruncateFree(SrcVT, VT)) {
       SDLoc SL(N0);
@@ -9753,7 +10674,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
 
   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
-      (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
+      (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
     SDValue Amt = N0.getOperand(1);
     KnownBits Known = DAG.computeKnownBits(Amt);
@@ -9771,6 +10692,19 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
     }
   }
 
+  // Attempt to pre-truncate BUILD_VECTOR sources.
+  if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
+      TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
+    SDLoc DL(N);
+    EVT SVT = VT.getScalarType();
+    SmallVector<SDValue, 8> TruncOps;
+    for (const SDValue &Op : N0->op_values()) {
+      SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
+      TruncOps.push_back(TruncOp);
+    }
+    return DAG.getBuildVector(VT, DL, TruncOps);
+  }
+
   // Fold a series of buildvector, bitcast, and truncate if possible.
   // For example fold
   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
@@ -9906,7 +10840,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   // When the adde's carry is not used.
   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
-      (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
+      // We only do for addcarry before legalize operation
+      ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
+       TLI.isOperationLegal(N0.getOpcode(), VT))) {
     SDLoc SL(N);
     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
@@ -10070,14 +11006,17 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
     return DAG.getUNDEF(VT);
 
   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
-  // Only do this before legalize types, since we might create an illegal
-  // scalar type. Even if we knew we wouldn't create an illegal scalar type
-  // we can only do this before legalize ops, since the target maybe
-  // depending on the bitcast.
+  // Only do this before legalize types, unless both types are integer and the
+  // scalar type is legal. Only do this before legalize ops, since the target
+  // maybe depending on the bitcast.
   // First check to see if this is all constant.
-  if (!LegalTypes &&
+  // TODO: Support FP bitcasts after legalize types.
+  if (VT.isVector() &&
+      (!LegalTypes ||
+       (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
+        TLI.isTypeLegal(VT.getVectorElementType()))) &&
       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
-      VT.isVector() && cast<BuildVectorSDNode>(N0)->isConstant())
+      cast<BuildVectorSDNode>(N0)->isConstant())
     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
                                              VT.getVectorElementType());
 
@@ -10113,18 +11052,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
       // as we assume software couldn't rely on the number of accesses of an
       // illegal type.
       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
-       TLI.isOperationLegal(ISD::LOAD, VT)) &&
-      TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
+       TLI.isOperationLegal(ISD::LOAD, VT))) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    unsigned OrigAlign = LN0->getAlignment();
 
-    bool Fast = false;
-    if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
-                               LN0->getAddressSpace(), OrigAlign, &Fast) &&
-        Fast) {
+    if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
+                                    *LN0->getMemOperand())) {
       SDValue Load =
           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
-                      LN0->getPointerInfo(), OrigAlign,
+                      LN0->getPointerInfo(), LN0->getAlignment(),
                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
       return Load;
@@ -11071,15 +12006,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
 
   // fold (fadd A, (fneg B)) -> (fsub A, B)
   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
-      isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
+      isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2)
     return DAG.getNode(ISD::FSUB, DL, VT, N0,
-                       GetNegatedExpression(N1, DAG, LegalOperations), Flags);
+                       GetNegatedExpression(N1, DAG, LegalOperations,
+                                            ForCodeSize), Flags);
 
   // fold (fadd (fneg A), B) -> (fsub B, A)
   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
-      isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
+      isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2)
     return DAG.getNode(ISD::FSUB, DL, VT, N1,
-                       GetNegatedExpression(N0, DAG, LegalOperations), Flags);
+                       GetNegatedExpression(N0, DAG, LegalOperations,
+                                            ForCodeSize), Flags);
 
   auto isFMulNegTwo = [](SDValue FMul) {
     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
@@ -11105,8 +12042,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   // Selection pass has a hard time dealing with FP constants.
   bool AllowNewConst = (Level < AfterLegalizeDAG);
 
-  // If 'unsafe math' or nnan is enabled, fold lots of things.
-  if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
+  // If nnan is enabled, fold lots of things.
+  if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
     // If allowed, fold (fadd (fneg x), x) -> 0.0
     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
       return DAG.getConstantFP(0.0, DL, VT);
@@ -11246,16 +12183,20 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
 
   if (N0 == N1) {
     // (fsub x, x) -> 0.0
-    if (Options.UnsafeFPMath || Flags.hasNoNaNs())
+    if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
       return DAG.getConstantFP(0.0f, DL, VT);
   }
 
   // (fsub -0.0, N1) -> -N1
+  // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
+  //       FSUB does not specify the sign bit of a NaN. Also note that for
+  //       the same reason, the inverse transform is not safe, unless fast math
+  //       flags are in play.
   if (N0CFP && N0CFP->isZero()) {
     if (N0CFP->isNegative() ||
         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
-      if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
-        return GetNegatedExpression(N1, DAG, LegalOperations);
+      if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
+        return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
     }
@@ -11273,9 +12214,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   }
 
   // fold (fsub A, (fneg B)) -> (fadd A, B)
-  if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
+  if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
     return DAG.getNode(ISD::FADD, DL, VT, N0,
-                       GetNegatedExpression(N1, DAG, LegalOperations), Flags);
+                       GetNegatedExpression(N1, DAG, LegalOperations,
+                                            ForCodeSize), Flags);
 
   // FSUB -> FMA combines:
   if (SDValue Fused = visitFSUBForFMACombine(N)) {
@@ -11319,7 +12261,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   if (SDValue NewSel = foldBinOpIntoSelect(N))
     return NewSel;
 
-  if (Options.UnsafeFPMath ||
+  if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) ||
       (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
     // fold (fmul A, 0) -> 0
     if (N1CFP && N1CFP->isZero())
@@ -11361,14 +12303,18 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
       return DAG.getNode(ISD::FNEG, DL, VT, N0);
 
   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
-  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
-    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
+  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
+                                       ForCodeSize)) {
+    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
+                                         ForCodeSize)) {
       // Both can be negated for free, check to see if at least one is cheaper
       // negated.
       if (LHSNeg == 2 || RHSNeg == 2)
         return DAG.getNode(ISD::FMUL, DL, VT,
-                           GetNegatedExpression(N0, DAG, LegalOperations),
-                           GetNegatedExpression(N1, DAG, LegalOperations),
+                           GetNegatedExpression(N0, DAG, LegalOperations,
+                                                ForCodeSize),
+                           GetNegatedExpression(N1, DAG, LegalOperations,
+                                                ForCodeSize),
                            Flags);
     }
   }
@@ -11506,7 +12452,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
     // fma (fneg x), K, y -> fma x -K, y
     if (N0.getOpcode() == ISD::FNEG &&
         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
-         (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
+         (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
+                                              ForCodeSize)))) {
       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
     }
@@ -11541,22 +12488,33 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
+  // TODO: Limit this transform based on optsize/minsize - it always creates at
+  //       least 1 extra instruction. But the perf win may be substantial enough
+  //       that only minsize should restrict this.
   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
   const SDNodeFlags Flags = N->getFlags();
   if (!UnsafeMath && !Flags.hasAllowReciprocal())
     return SDValue();
 
-  // Skip if current node is a reciprocal.
+  // Skip if current node is a reciprocal/fneg-reciprocal.
   SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  if (N0CFP && N0CFP->isExactlyValue(1.0))
+  ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
+  if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
     return SDValue();
 
   // Exit early if the target does not want this transform or if there can't
   // possibly be enough uses of the divisor to make the transform worthwhile.
   SDValue N1 = N->getOperand(1);
   unsigned MinUses = TLI.combineRepeatedFPDivisors();
-  if (!MinUses || N1->use_size() < MinUses)
+
+  // For splat vectors, scale the number of uses by the splat factor. If we can
+  // convert the division into a scalar op, that will likely be much faster.
+  unsigned NumElts = 1;
+  EVT VT = N->getValueType(0);
+  if (VT.isVector() && DAG.isSplatValue(N1))
+    NumElts = VT.getVectorNumElements();
+
+  if (!MinUses || (N1->use_size() * NumElts) < MinUses)
     return SDValue();
 
   // Find all FDIV users of the same divisor.
@@ -11573,10 +12531,9 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
 
   // Now that we have the actual number of divisor uses, make sure it meets
   // the minimum threshold specified by the target.
-  if (Users.size() < MinUses)
+  if ((Users.size() * NumElts) < MinUses)
     return SDValue();
 
-  EVT VT = N->getValueType(0);
   SDLoc DL(N);
   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
@@ -11619,6 +12576,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
   if (SDValue NewSel = foldBinOpIntoSelect(N))
     return NewSel;
 
+  if (SDValue V = combineRepeatedFPDivisors(N))
+    return V;
+
   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
     if (N1CFP) {
@@ -11634,7 +12594,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
            // backend)... we should handle this gracefully after Legalize.
            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
-           TLI.isFPImmLegal(Recip, VT)))
+           TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
         return DAG.getNode(ISD::FMUL, DL, VT, N0,
                            DAG.getConstantFP(Recip, DL, VT), Flags);
     }
@@ -11692,21 +12652,22 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
   }
 
   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
-  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
-    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
+  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
+                                       ForCodeSize)) {
+    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
+                                         ForCodeSize)) {
       // Both can be negated for free, check to see if at least one is cheaper
       // negated.
       if (LHSNeg == 2 || RHSNeg == 2)
         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
-                           GetNegatedExpression(N0, DAG, LegalOperations),
-                           GetNegatedExpression(N1, DAG, LegalOperations),
+                           GetNegatedExpression(N0, DAG, LegalOperations,
+                                                ForCodeSize),
+                           GetNegatedExpression(N1, DAG, LegalOperations,
+                                                ForCodeSize),
                            Flags);
     }
   }
 
-  if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
-    return CombineRepeatedDivisors;
-
   return SDValue();
 }
 
@@ -11838,18 +12799,24 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
   }
 
-  // Try to convert x ** (1/4) into square roots.
+  // Try to convert x ** (1/4) and x ** (3/4) into square roots.
   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
   // TODO: This could be extended (using a target hook) to handle smaller
   // power-of-2 fractional exponents.
-  if (ExponentC->getValueAPF().isExactlyValue(0.25)) {
+  bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
+  bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
+  if (ExponentIs025 || ExponentIs075) {
     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
+    // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
+    // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
     // For regular numbers, rounding may cause the results to differ.
     // Therefore, we require { nsz ninf afn } for this transform.
     // TODO: We could select out the special cases if we don't have nsz/ninf.
     SDNodeFlags Flags = N->getFlags();
-    if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() ||
+
+    // We only need no signed zeros for the 0.25 case.
+    if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
         !Flags.hasApproximateFuncs())
       return SDValue();
 
@@ -11859,13 +12826,17 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
 
     // Assume that libcalls are the smallest code.
     // TODO: This restriction should probably be lifted for vectors.
-    if (DAG.getMachineFunction().getFunction().optForSize())
+    if (DAG.getMachineFunction().getFunction().hasOptSize())
       return SDValue();
 
     // pow(X, 0.25) --> sqrt(sqrt(X))
     SDLoc DL(N);
     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
-    return DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
+    SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
+    if (ExponentIs025)
+      return SqrtSqrt;
+    // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
+    return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
   }
 
   return SDValue();
@@ -11911,6 +12882,10 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   EVT VT = N->getValueType(0);
   EVT OpVT = N0.getValueType();
 
+  // [us]itofp(undef) = 0, because the result value is bounded.
+  if (N0.isUndef())
+    return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
   // fold (sint_to_fp c1) -> c1fp
   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
       // ...but only if the target supports immediate floating-point values
@@ -11968,6 +12943,10 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   EVT VT = N->getValueType(0);
   EVT OpVT = N0.getValueType();
 
+  // [us]itofp(undef) = 0, because the result value is bounded.
+  if (N0.isUndef())
+    return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
   // fold (uint_to_fp c1) -> c1fp
   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
       // ...but only if the target supports immediate floating-point values
@@ -12051,6 +13030,10 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
+  // fold (fp_to_sint undef) -> undef
+  if (N0.isUndef())
+    return DAG.getUNDEF(VT);
+
   // fold (fp_to_sint c1fp) -> c1
   if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
@@ -12062,6 +13045,10 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
+  // fold (fp_to_uint undef) -> undef
+  if (N0.isUndef())
+    return DAG.getUNDEF(VT);
+
   // fold (fp_to_uint c1fp) -> c1
   if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
@@ -12250,8 +13237,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
 
   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
-                         &DAG.getTarget().Options))
-    return GetNegatedExpression(N0, DAG, LegalOperations);
+                         &DAG.getTarget().Options, ForCodeSize))
+    return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
 
   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
   // constant pool values.
@@ -12287,7 +13274,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
       APFloat CVal = CFP1->getValueAPF();
       CVal.changeSign();
       if (Level >= AfterLegalizeDAG &&
-          (TLI.isFPImmLegal(CVal, VT) ||
+          (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
            TLI.isOperationLegal(ISD::ConstantFP, VT)))
         return DAG.getNode(
             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
@@ -12556,6 +13543,7 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
 
   TargetLowering::AddrMode AM;
   if (N->getOpcode() == ISD::ADD) {
+    AM.HasBaseReg = true;
     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
     if (Offset)
       // [reg +/- imm]
@@ -12564,6 +13552,7 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
       // [reg +/- reg]
       AM.Scale = 1;
   } else if (N->getOpcode() == ISD::SUB) {
+    AM.HasBaseReg = true;
     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
     if (Offset)
       // [reg +/- imm]
@@ -12653,7 +13642,13 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
   // Check #2.
   if (!isLoad) {
     SDValue Val = cast<StoreSDNode>(N)->getValue();
-    if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
+
+    // Would require a copy.
+    if (Val == BasePtr)
+      return false;
+
+    // Would create a cycle.
+    if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
       return false;
   }
 
@@ -13190,7 +14185,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
 
   if (LD->isUnindexed()) {
     // Walk up chain skipping non-aliasing memory nodes.
-    SDValue BetterChain = FindBetterChain(N, Chain);
+    SDValue BetterChain = FindBetterChain(LD, Chain);
 
     // If there is a better chain.
     if (Chain != BetterChain) {
@@ -13378,7 +14373,7 @@ struct LoadedSlice {
   /// Get the alignment of the load used for this slice.
   unsigned getAlignment() const {
     unsigned Alignment = Origin->getAlignment();
-    unsigned Offset = getOffsetFromBase();
+    uint64_t Offset = getOffsetFromBase();
     if (Offset != 0)
       Alignment = MinAlign(Alignment, Alignment + Offset);
     return Alignment;
@@ -13500,9 +14495,11 @@ struct LoadedSlice {
     assert(DAG && "Missing context");
     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
     EVT ResVT = Use->getValueType(0);
-    const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
+    const TargetRegisterClass *ResRC =
+        TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
     const TargetRegisterClass *ArgRC =
-        TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
+        TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
+                           Use->getOperand(0)->isDivergent());
     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
       return false;
 
@@ -13826,7 +14823,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
 
   // For narrowing to be valid, it must be the case that the load the
-  // immediately preceeding memory operation before the store.
+  // immediately preceding memory operation before the store.
   if (LD == Chain.getNode())
     ; // ok.
   else if (Chain->getOpcode() == ISD::TokenFactor &&
@@ -14039,11 +15036,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
 /// load / store operations if the target deems the transformation profitable.
 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
   StoreSDNode *ST  = cast<StoreSDNode>(N);
-  SDValue Chain = ST->getChain();
   SDValue Value = ST->getValue();
   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
-      Value.hasOneUse() &&
-      Chain == SDValue(Value.getNode(), 1)) {
+      Value.hasOneUse()) {
     LoadSDNode *LD = cast<LoadSDNode>(Value);
     EVT VT = LD->getMemoryVT();
     if (!VT.isFloatingPoint() ||
@@ -14073,7 +15068,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
                     LD->getPointerInfo(), LDAlign);
 
     SDValue NewST =
-        DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
+        DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
                      ST->getPointerInfo(), STAlign);
 
     AddToWorklist(NewLD.getNode());
@@ -14171,14 +15166,14 @@ SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
     Visited.insert(StoreNodes[i].MemNode);
   }
 
-  // don't include nodes that are children
+  // don't include nodes that are children or repeated nodes.
   for (unsigned i = 0; i < NumStores; ++i) {
-    if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
+    if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
       Chains.push_back(StoreNodes[i].MemNode->getChain());
   }
 
   assert(Chains.size() > 0 && "Chain should have generated a chain");
-  return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
+  return DAG.getTokenFactor(StoreDL, Chains);
 }
 
 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
@@ -14372,15 +15367,19 @@ void DAGCombiner::getStoreMergeCandidates(
     // Loads must only have one use.
     if (!Ld->hasNUsesOfValue(1, 0))
       return;
-    // The memory operands must not be volatile.
+    // The memory operands must not be volatile/indexed.
     if (Ld->isVolatile() || Ld->isIndexed())
       return;
   }
   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
                             int64_t &Offset) -> bool {
+    // The memory operands must not be volatile/indexed.
     if (Other->isVolatile() || Other->isIndexed())
       return false;
-    SDValue Val = peekThroughBitcasts(Other->getValue());
+    // Don't mix temporal stores with non-temporal stores.
+    if (St->isNonTemporal() != Other->isNonTemporal())
+      return false;
+    SDValue OtherBC = peekThroughBitcasts(Other->getValue());
     // Allow merging constants of different types as integers.
     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
                                            : Other->getMemoryVT() != MemVT;
@@ -14388,16 +15387,19 @@ void DAGCombiner::getStoreMergeCandidates(
       if (NoTypeMatch)
         return false;
       // The Load's Base Ptr must also match
-      if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
-        auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
+      if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
+        BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
         if (LoadVT != OtherLd->getMemoryVT())
           return false;
         // Loads must only have one use.
         if (!OtherLd->hasNUsesOfValue(1, 0))
           return false;
-        // The memory operands must not be volatile.
+        // The memory operands must not be volatile/indexed.
         if (OtherLd->isVolatile() || OtherLd->isIndexed())
           return false;
+        // Don't mix temporal loads with non-temporal loads.
+        if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
+          return false;
         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
           return false;
       } else
@@ -14406,17 +15408,17 @@ void DAGCombiner::getStoreMergeCandidates(
     if (IsConstantSrc) {
       if (NoTypeMatch)
         return false;
-      if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
+      if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
         return false;
     }
     if (IsExtractVecSrc) {
       // Do not merge truncated stores here.
       if (Other->isTruncatingStore())
         return false;
-      if (!MemVT.bitsEq(Val.getValueType()))
+      if (!MemVT.bitsEq(OtherBC.getValueType()))
         return false;
-      if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
-          Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+      if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
+          OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
         return false;
     }
     Ptr = BaseIndexOffset::match(Other, DAG);
@@ -14441,9 +15443,11 @@ void DAGCombiner::getStoreMergeCandidates(
 
   RootNode = St->getChain().getNode();
 
+  unsigned NumNodesExplored = 0;
   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
     RootNode = Ldn->getChain().getNode();
-    for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
+    for (auto I = RootNode->use_begin(), E = RootNode->use_end();
+         I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
           if (I2.getOperandNo() == 0)
@@ -14454,7 +15458,8 @@ void DAGCombiner::getStoreMergeCandidates(
                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
             }
   } else
-    for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
+    for (auto I = RootNode->use_begin(), E = RootNode->use_end();
+         I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
       if (I.getOperandNo() == 0)
         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
           BaseIndexOffset Ptr;
@@ -14551,6 +15556,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
                        isa<ConstantFPSDNode>(StoredVal);
   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
+  bool IsNonTemporalStore = St->isNonTemporal();
+  bool IsNonTemporalLoad =
+      IsLoadSrc && cast<LoadSDNode>(StoredVal)->isNonTemporal();
 
   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
     return false;
@@ -14652,8 +15660,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
 
           if (TLI.isTypeLegal(StoreTy) &&
               TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
-              TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
-                                     FirstStoreAlign, &IsFast) &&
+              TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                     *FirstInChain->getMemOperand(), &IsFast) &&
               IsFast) {
             LastIntegerTrunc = false;
             LastLegalType = i + 1;
@@ -14664,8 +15672,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
                 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
             if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
                 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
-                TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
-                                       FirstStoreAlign, &IsFast) &&
+                TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                       *FirstInChain->getMemOperand(),
+                                       &IsFast) &&
                 IsFast) {
               LastIntegerTrunc = true;
               LastLegalType = i + 1;
@@ -14683,8 +15692,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
             EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
             if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
                 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
-                TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
-                                       FirstStoreAlign, &IsFast) &&
+                TLI.allowsMemoryAccess(
+                    Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) &&
                 IsFast)
               LastLegalVectorType = i + 1;
           }
@@ -14755,8 +15764,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
 
           if (TLI.isTypeLegal(Ty) &&
               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
-              TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
-                                     FirstStoreAlign, &IsFast) &&
+              TLI.allowsMemoryAccess(Context, DL, Ty,
+                                     *FirstInChain->getMemOperand(), &IsFast) &&
               IsFast)
             NumStoresToMerge = i + 1;
         }
@@ -14847,7 +15856,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
       unsigned FirstStoreAlign = FirstInChain->getAlignment();
       LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
-      unsigned FirstLoadAS = FirstLoad->getAddressSpace();
       unsigned FirstLoadAlign = FirstLoad->getAlignment();
 
       // Scan the memory operations on the chain and find the first
@@ -14887,11 +15895,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
         bool IsFastSt, IsFastLd;
         if (TLI.isTypeLegal(StoreTy) &&
             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
-            TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
-                                   FirstStoreAlign, &IsFastSt) &&
+            TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                   *FirstInChain->getMemOperand(), &IsFastSt) &&
             IsFastSt &&
-            TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
-                                   FirstLoadAlign, &IsFastLd) &&
+            TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                   *FirstLoad->getMemOperand(), &IsFastLd) &&
             IsFastLd) {
           LastLegalVectorType = i + 1;
         }
@@ -14901,11 +15909,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
         StoreTy = EVT::getIntegerVT(Context, SizeInBits);
         if (TLI.isTypeLegal(StoreTy) &&
             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
-            TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
-                                   FirstStoreAlign, &IsFastSt) &&
+            TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                   *FirstInChain->getMemOperand(), &IsFastSt) &&
             IsFastSt &&
-            TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
-                                   FirstLoadAlign, &IsFastLd) &&
+            TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                   *FirstLoad->getMemOperand(), &IsFastLd) &&
             IsFastLd) {
           LastLegalIntegerType = i + 1;
           DoIntegerTruncate = false;
@@ -14920,11 +15928,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
               TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
                                  StoreTy) &&
               TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
-              TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
-                                     FirstStoreAlign, &IsFastSt) &&
+              TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                     *FirstInChain->getMemOperand(),
+                                     &IsFastSt) &&
               IsFastSt &&
-              TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
-                                     FirstLoadAlign, &IsFastLd) &&
+              TLI.allowsMemoryAccess(Context, DL, StoreTy,
+                                     *FirstLoad->getMemOperand(), &IsFastLd) &&
               IsFastLd) {
             LastLegalIntegerType = i + 1;
             DoIntegerTruncate = true;
@@ -14994,26 +16003,32 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
       SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
       AddToWorklist(NewStoreChain.getNode());
 
-      MachineMemOperand::Flags MMOFlags =
+      MachineMemOperand::Flags LdMMOFlags =
           isDereferenceable ? MachineMemOperand::MODereferenceable
                             : MachineMemOperand::MONone;
+      if (IsNonTemporalLoad)
+        LdMMOFlags |= MachineMemOperand::MONonTemporal;
+
+      MachineMemOperand::Flags StMMOFlags =
+          IsNonTemporalStore ? MachineMemOperand::MONonTemporal
+                             : MachineMemOperand::MONone;
 
       SDValue NewLoad, NewStore;
       if (UseVectorTy || !DoIntegerTruncate) {
         NewLoad =
             DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
                         FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
-                        FirstLoadAlign, MMOFlags);
+                        FirstLoadAlign, LdMMOFlags);
         NewStore = DAG.getStore(
             NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
-            FirstInChain->getPointerInfo(), FirstStoreAlign);
+            FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
       } else { // This must be the truncstore/extload case
         EVT ExtendedTy =
             TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
         NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
                                  FirstLoad->getChain(), FirstLoad->getBasePtr(),
                                  FirstLoad->getPointerInfo(), JointMemOpVT,
-                                 FirstLoadAlign, MMOFlags);
+                                 FirstLoadAlign, LdMMOFlags);
         NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
                                      FirstInChain->getBasePtr(),
                                      FirstInChain->getPointerInfo(),
@@ -15168,16 +16183,11 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
     // illegal type.
     if (((!LegalOperations && !ST->isVolatile()) ||
          TLI.isOperationLegal(ISD::STORE, SVT)) &&
-        TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
-      unsigned OrigAlign = ST->getAlignment();
-      bool Fast = false;
-      if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
-                                 ST->getAddressSpace(), OrigAlign, &Fast) &&
-          Fast) {
-        return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
-                            ST->getPointerInfo(), OrigAlign,
-                            ST->getMemOperand()->getFlags(), ST->getAAInfo());
-      }
+        TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
+                                     DAG, *ST->getMemOperand())) {
+      return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
+                          ST->getPointerInfo(), ST->getAlignment(),
+                          ST->getMemOperand()->getFlags(), ST->getAAInfo());
     }
   }
 
@@ -15205,6 +16215,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
   if (SDValue NewST = TransformFPLoadStorePair(N))
     return NewST;
 
+  // Try transforming several stores into STORE (BSWAP).
+  if (SDValue Store = MatchStoreCombine(ST))
+    return Store;
+
   if (ST->isUnindexed()) {
     // Walk up chain skipping non-aliasing memory nodes, on this store and any
     // adjacent stores.
@@ -15221,23 +16235,22 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       Value.getValueType().isInteger() &&
       (!isa<ConstantSDNode>(Value) ||
        !cast<ConstantSDNode>(Value)->isOpaque())) {
+    APInt TruncDemandedBits =
+        APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
+                             ST->getMemoryVT().getScalarSizeInBits());
+
     // See if we can simplify the input to this truncstore with knowledge that
     // only the low bits are being used.  For example:
     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
-    SDValue Shorter = DAG.GetDemandedBits(
-        Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
-                                    ST->getMemoryVT().getScalarSizeInBits()));
+    SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits);
     AddToWorklist(Value.getNode());
-    if (Shorter.getNode())
-      return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
-                               Ptr, ST->getMemoryVT(), ST->getMemOperand());
+    if (Shorter)
+      return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
+                               ST->getMemOperand());
 
     // Otherwise, see if we can simplify the operation with
     // SimplifyDemandedBits, which only works if the value has a single use.
-    if (SimplifyDemandedBits(
-            Value,
-            APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
-                                 ST->getMemoryVT().getScalarSizeInBits()))) {
+    if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
       // Re-visit the store if anything changed and the store hasn't been merged
       // with another node (N is deleted) SimplifyDemandedBits will add Value's
       // node back to the worklist if necessary, but we also need to re-visit
@@ -15263,25 +16276,55 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
 
   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
-        !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
-        ST->getMemoryVT() == ST1->getMemoryVT()) {
-      // If this is a store followed by a store with the same value to the same
-      // location, then the store is dead/noop.
-      if (ST1->getValue() == Value) {
-        // The store is dead, remove it.
+        !ST1->isVolatile()) {
+      if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
+          ST->getMemoryVT() == ST1->getMemoryVT()) {
+        // If this is a store followed by a store with the same value to the
+        // same location, then the store is dead/noop.
         return Chain;
       }
 
-      // If this is a store who's preceeding store to the same location
-      // and no one other node is chained to that store we can effectively
-      // drop the store. Do not remove stores to undef as they may be used as
-      // data sinks.
       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
           !ST1->getBasePtr().isUndef()) {
-        // ST1 is fully overwritten and can be elided. Combine with it's chain
-        // value.
-        CombineTo(ST1, ST1->getChain());
-        return SDValue();
+        const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
+        const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
+        unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
+        unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
+        // If this is a store who's preceding store to a subset of the current
+        // location and no one other node is chained to that store we can
+        // effectively drop the store. Do not remove stores to undef as they may
+        // be used as data sinks.
+        if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
+          CombineTo(ST1, ST1->getChain());
+          return SDValue();
+        }
+
+        // If ST stores to a subset of preceding store's write set, we may be
+        // able to fold ST's value into the preceding stored value. As we know
+        // the other uses of ST1's chain are unconcerned with ST, this folding
+        // will not affect those nodes.
+        int64_t BitOffset;
+        if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize,
+                               BitOffset)) {
+          SDValue ChainValue = ST1->getValue();
+          if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) {
+            if (auto *C = dyn_cast<ConstantSDNode>(Value)) {
+              APInt Val = C1->getAPIntValue();
+              APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize);
+              // FIXME: Handle Big-endian mode.
+              if (!DAG.getDataLayout().isBigEndian()) {
+                Val.insertBits(InsertVal, BitOffset);
+                SDValue NewSDVal =
+                    DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(),
+                                    C1->isTargetOpcode(), C1->isOpaque());
+                SDNode *NewST1 = DAG.UpdateNodeOperands(
+                    ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2),
+                    ST1->getOperand(3));
+                return CombineTo(ST, SDValue(NewST1, 0));
+              }
+            }
+          }
+        } // End ST subset of ST1 case.
       }
     }
   }
@@ -15299,7 +16342,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
   // Always perform this optimization before types are legal. If the target
   // prefers, also try this after legalization to catch stores that were created
   // by intrinsics or other nodes.
-  if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
+  if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
     while (true) {
       // There can be multiple store sequences on the same chain.
       // Keep trying to merge store sequences until we are unable to do so
@@ -15333,6 +16376,54 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
   return ReduceLoadOpStoreWidth(N);
 }
 
+SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
+  const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
+  if (!LifetimeEnd->hasOffset())
+    return SDValue();
+
+  const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
+                                        LifetimeEnd->getOffset(), false);
+
+  // We walk up the chains to find stores.
+  SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
+  while (!Chains.empty()) {
+    SDValue Chain = Chains.back();
+    Chains.pop_back();
+    if (!Chain.hasOneUse())
+      continue;
+    switch (Chain.getOpcode()) {
+    case ISD::TokenFactor:
+      for (unsigned Nops = Chain.getNumOperands(); Nops;)
+        Chains.push_back(Chain.getOperand(--Nops));
+      break;
+    case ISD::LIFETIME_START:
+    case ISD::LIFETIME_END:
+      // We can forward past any lifetime start/end that can be proven not to
+      // alias the node.
+      if (!isAlias(Chain.getNode(), N))
+        Chains.push_back(Chain.getOperand(0));
+      break;
+    case ISD::STORE: {
+      StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
+      if (ST->isVolatile() || ST->isIndexed())
+        continue;
+      const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
+      // If we store purely within object bounds just before its lifetime ends,
+      // we can remove the store.
+      if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
+                                   ST->getMemoryVT().getStoreSizeInBits())) {
+        LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
+                   dbgs() << "\nwithin LIFETIME_END of : ";
+                   LifetimeEndBase.dump(); dbgs() << "\n");
+        CombineTo(ST, ST->getChain());
+        return SDValue(N, 0);
+      }
+    }
+    }
+  }
+  return SDValue();
+}
+
 /// For the instruction sequence of store below, F and I values
 /// are bundled together as an i64 value before being stored into memory.
 /// Sometimes it is more efficent to generate separate stores for F and I,
@@ -15616,7 +16707,9 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
     Offset = DAG.getNode(
         ISD::MUL, DL, PtrType, Offset,
         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
-    MPI = OriginalLoad->getPointerInfo();
+    // Discard the pointer info except the address space because the memory
+    // operand can't represent this new access since the offset is variable.
+    MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
   }
   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
 
@@ -15668,14 +16761,15 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
 /// the math/logic after an extract element of a vector.
 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
                                        bool LegalOperations) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue Vec = ExtElt->getOperand(0);
   SDValue Index = ExtElt->getOperand(1);
   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
-  if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse())
+  if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
+      Vec.getNode()->getNumValues() != 1)
     return SDValue();
 
   // Targets may want to avoid this to prevent an expensive register transfer.
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (!TLI.shouldScalarizeBinop(Vec))
     return SDValue();
 
@@ -16073,7 +17167,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
                                            ArrayRef<int> VectorMask,
                                            SDValue VecIn1, SDValue VecIn2,
-                                           unsigned LeftIdx) {
+                                           unsigned LeftIdx, bool DidSplitVec) {
   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
 
@@ -16081,17 +17175,12 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
   EVT InVT1 = VecIn1.getValueType();
   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
 
-  unsigned Vec2Offset = 0;
   unsigned NumElems = VT.getVectorNumElements();
   unsigned ShuffleNumElems = NumElems;
 
-  // In case both the input vectors are extracted from same base
-  // vector we do not need extra addend (Vec2Offset) while
-  // computing shuffle mask.
-  if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
-      !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
-      !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
-    Vec2Offset = InVT1.getVectorNumElements();
+  // If we artificially split a vector in two already, then the offsets in the
+  // operands will all be based off of VecIn1, even those in VecIn2.
+  unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
 
   // We can't generate a shuffle node with mismatched input and output types.
   // Try to make the types match the type of the output.
@@ -16214,23 +17303,29 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
   // The build vector contains some number of undef elements and exactly
   // one other element. That other element must be a zero-extended scalar
   // extracted from a vector at a constant index to turn this into a shuffle.
+  // Also, require that the build vector does not implicitly truncate/extend
+  // its elements.
   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
+  EVT VT = BV->getValueType(0);
   SDValue Zext = BV->getOperand(ZextElt);
   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
-      !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)))
+      !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
+      Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
     return SDValue();
 
-  // The zero-extend must be a multiple of the source size.
+  // The zero-extend must be a multiple of the source size, and we must be
+  // building a vector of the same size as the source of the extract element.
   SDValue Extract = Zext.getOperand(0);
   unsigned DestSize = Zext.getValueSizeInBits();
   unsigned SrcSize = Extract.getValueSizeInBits();
-  if (DestSize % SrcSize != 0)
+  if (DestSize % SrcSize != 0 ||
+      Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
     return SDValue();
 
   // Create a shuffle mask that will combine the extracted element with zeros
   // and undefs.
-  int ZextRatio =  DestSize / SrcSize;
+  int ZextRatio = DestSize / SrcSize;
   int NumMaskElts = NumBVOps * ZextRatio;
   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
   for (int i = 0; i != NumMaskElts; ++i) {
@@ -16260,7 +17355,7 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
   SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
                                       ShufMask);
-  return DAG.getBitcast(BV->getValueType(0), Shuf);
+  return DAG.getBitcast(VT, Shuf);
 }
 
 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
@@ -16316,7 +17411,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
       return SDValue();
     SDValue ExtractedFromVec = Op.getOperand(0);
 
-    APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
+    const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
       return SDValue();
 
@@ -16344,6 +17439,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
   // vector, then split the vector efficiently based on the maximum
   // vector access index and adjust the VectorMask and
   // VecIn accordingly.
+  bool DidSplitVec = false;
   if (VecIn.size() == 2) {
     unsigned MaxIndex = 0;
     unsigned NearestPow2 = 0;
@@ -16374,6 +17470,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
         VecIn.pop_back();
         VecIn.push_back(VecIn1);
         VecIn.push_back(VecIn2);
+        DidSplitVec = true;
 
         for (unsigned i = 0; i < NumElems; i++) {
           if (VectorMask[i] <= 0)
@@ -16411,7 +17508,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
 
     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
-                                                VecRight, LeftIdx))
+                                                VecRight, LeftIdx, DidSplitVec))
       Shuffles.push_back(Shuffle);
     else
       return SDValue();
@@ -16477,18 +17574,20 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
 
 // Try to turn a build vector of zero extends of extract vector elts into a
 // a vector zero extend and possibly an extract subvector.
-// TODO: Support sign extend or any extend?
+// TODO: Support sign extend?
 // TODO: Allow undef elements?
-// TODO: Don't require the extracts to start at element 0.
 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
   if (LegalOperations)
     return SDValue();
 
   EVT VT = N->getValueType(0);
 
+  bool FoundZeroExtend = false;
   SDValue Op0 = N->getOperand(0);
   auto checkElem = [&](SDValue Op) -> int64_t {
-    if (Op.getOpcode() == ISD::ZERO_EXTEND &&
+    unsigned Opc = Op.getOpcode();
+    FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
+    if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
@@ -16520,7 +17619,8 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
   SDLoc DL(N);
   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
                    Op0.getOperand(0).getOperand(1));
-  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
+  return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
+                     VT, In);
 }
 
 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
@@ -16885,14 +17985,14 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
         return SDValue();
     }
 
-    unsigned IdentityIndex = i * PartNumElem;
-    ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+    auto *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
     // The extract index must be constant.
     if (!CS)
       return SDValue();
 
     // Check that we are reading from the identity index.
-    if (CS->getZExtValue() != IdentityIndex)
+    unsigned IdentityIndex = i * PartNumElem;
+    if (CS->getAPIntValue() != IdentityIndex)
       return SDValue();
   }
 
@@ -16902,12 +18002,59 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
   return SDValue();
 }
 
+static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
+                                              SelectionDAG &DAG) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  SDValue BinOp = Extract->getOperand(0);
+  unsigned BinOpcode = BinOp.getOpcode();
+  if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
+    return SDValue();
+
+  SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
+  SDValue Index = Extract->getOperand(1);
+  EVT VT = Extract->getValueType(0);
+
+  // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
+  // if the source subvector is the same type as the one being extracted.
+  auto GetSubVector = [VT, Index](SDValue V) -> SDValue {
+    if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
+        V.getOperand(1).getValueType() == VT && V.getOperand(2) == Index) {
+      return V.getOperand(1);
+    }
+    auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+    if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
+        V.getOperand(0).getValueType() == VT &&
+        (IndexC->getZExtValue() % VT.getVectorNumElements()) == 0) {
+      uint64_t SubIdx = IndexC->getZExtValue() / VT.getVectorNumElements();
+      return V.getOperand(SubIdx);
+    }
+    return SDValue();
+  };
+  SDValue Sub0 = GetSubVector(Bop0);
+  SDValue Sub1 = GetSubVector(Bop1);
+
+  // TODO: We could handle the case where only 1 operand is being inserted by
+  //       creating an extract of the other operand, but that requires checking
+  //       number of uses and/or costs.
+  if (!Sub0 || !Sub1 || !TLI.isOperationLegalOrCustom(BinOpcode, VT))
+    return SDValue();
+
+  // We are inserting both operands of the wide binop only to extract back
+  // to the narrow vector size. Eliminate all of the insert/extract:
+  // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
+  return DAG.getNode(BinOpcode, SDLoc(Extract), VT, Sub0, Sub1,
+                     BinOp->getFlags());
+}
+
 /// If we are extracting a subvector produced by a wide binary operator try
 /// to use a narrow binary operator and/or avoid concatenation and extraction.
 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
   // some of these bailouts with other transforms.
 
+  if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
+    return V;
+
   // The extract index must be a constant, so we can map it to a concat operand.
   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
   if (!ExtractIndexC)
@@ -16915,8 +18062,10 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
 
   // We are looking for an optionally bitcasted wide vector binary operator
   // feeding an extract subvector.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
-  if (!ISD::isBinaryOp(BinOp.getNode()))
+  unsigned BOpcode = BinOp.getOpcode();
+  if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
     return SDValue();
 
   // The binop must be a vector type, so we can extract some fraction of it.
@@ -16945,8 +18094,6 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
   // Bail out if the target does not support a narrower version of the binop.
   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
                                    WideNumElts / NarrowingRatio);
-  unsigned BOpcode = BinOp.getOpcode();
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
     return SDValue();
 
@@ -16986,35 +18133,35 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
 
   // We need at least one concatenation operation of a binop operand to make
   // this transform worthwhile. The concat must double the input vector sizes.
-  // TODO: Should we also handle INSERT_SUBVECTOR patterns?
-  SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0));
-  SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1));
-  bool ConcatL =
-      LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
-  bool ConcatR =
-      RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
-  if (!ConcatL && !ConcatR)
+  auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
+    if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
+      return V.getOperand(ConcatOpNum);
     return SDValue();
+  };
+  SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
+  SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
+
+  if (SubVecL || SubVecR) {
+    // If a binop operand was not the result of a concat, we must extract a
+    // half-sized operand for our new narrow binop:
+    // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
+    // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
+    // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
+    SDLoc DL(Extract);
+    SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
+    SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
+                        : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+                                      BinOp.getOperand(0), IndexC);
 
-  // If one of the binop operands was not the result of a concat, we must
-  // extract a half-sized operand for our new narrow binop.
-  SDLoc DL(Extract);
-
-  // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
-  // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
-  // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
-  SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
-                      : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
-                                    BinOp.getOperand(0),
-                                    DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
+    SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
+                        : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+                                      BinOp.getOperand(1), IndexC);
 
-  SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
-                      : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
-                                    BinOp.getOperand(1),
-                                    DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
+    SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
+    return DAG.getBitcast(VT, NarrowBinOp);
+  }
 
-  SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
-  return DAG.getBitcast(VT, NarrowBinOp);
+  return SDValue();
 }
 
 /// If we are extracting a subvector from a wide vector load, convert to a
@@ -17052,7 +18199,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
   return NewLd;
 }
 
-SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
+SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
   EVT NVT = N->getValueType(0);
   SDValue V = N->getOperand(0);
 
@@ -17064,14 +18211,51 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
       return NarrowLoad;
 
+  // Combine an extract of an extract into a single extract_subvector.
+  // ext (ext X, C), 0 --> ext X, C
+  SDValue Index = N->getOperand(1);
+  if (isNullConstant(Index) && V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+      V.hasOneUse() && isa<ConstantSDNode>(V.getOperand(1))) {
+    if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
+                                    V.getConstantOperandVal(1)) &&
+        TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
+      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
+                         V.getOperand(1));
+    }
+  }
+
+  // Try to move vector bitcast after extract_subv by scaling extraction index:
+  // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
+  if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST &&
+      V.getOperand(0).getValueType().isVector()) {
+    SDValue SrcOp = V.getOperand(0);
+    EVT SrcVT = SrcOp.getValueType();
+    unsigned SrcNumElts = SrcVT.getVectorNumElements();
+    unsigned DestNumElts = V.getValueType().getVectorNumElements();
+    if ((SrcNumElts % DestNumElts) == 0) {
+      unsigned SrcDestRatio = SrcNumElts / DestNumElts;
+      unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio;
+      EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
+                                      NewExtNumElts);
+      if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
+        unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio;
+        SDLoc DL(N);
+        SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
+        SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
+                                         V.getOperand(0), NewIndex);
+        return DAG.getBitcast(NVT, NewExtract);
+      }
+    }
+    // TODO - handle (DestNumElts % SrcNumElts) == 0
+  }
+
   // Combine:
   //    (extract_subvec (concat V1, V2, ...), i)
   // Into:
   //    Vi if possible
   // Only operand 0 is checked as 'concat' assumes all inputs of the same
   // type.
-  if (V.getOpcode() == ISD::CONCAT_VECTORS &&
-      isa<ConstantSDNode>(N->getOperand(1)) &&
+  if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index) &&
       V.getOperand(0).getValueType() == NVT) {
     unsigned Idx = N->getConstantOperandVal(1);
     unsigned NumElems = NVT.getVectorNumElements();
@@ -17084,7 +18268,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
 
   // If the input is a build vector. Try to make a smaller build vector.
   if (V.getOpcode() == ISD::BUILD_VECTOR) {
-    if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+    if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) {
       EVT InVT = V.getValueType();
       unsigned ExtractSize = NVT.getSizeInBits();
       unsigned EltSize = InVT.getScalarSizeInBits();
@@ -17092,26 +18276,27 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
       if (ExtractSize % EltSize == 0) {
         unsigned NumElems = ExtractSize / EltSize;
         EVT EltVT = InVT.getVectorElementType();
-        EVT ExtractVT = NumElems == 1 ? EltVT :
-          EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
+        EVT ExtractVT = NumElems == 1 ? EltVT
+                                      : EVT::getVectorVT(*DAG.getContext(),
+                                                         EltVT, NumElems);
         if ((Level < AfterLegalizeDAG ||
              (NumElems == 1 ||
               TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
             (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
-          unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
-                            EltSize;
+          unsigned IdxVal = IdxC->getZExtValue();
+          IdxVal *= NVT.getScalarSizeInBits();
+          IdxVal /= EltSize;
+
           if (NumElems == 1) {
             SDValue Src = V->getOperand(IdxVal);
             if (EltVT != Src.getValueType())
               Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
-
             return DAG.getBitcast(NVT, Src);
           }
 
           // Extract the pieces from the original build_vector.
-          SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
-                                            makeArrayRef(V->op_begin() + IdxVal,
-                                                         NumElems));
+          SDValue BuildVec = DAG.getBuildVector(
+              ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems));
           return DAG.getBitcast(NVT, BuildVec);
         }
       }
@@ -17126,9 +18311,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
       return SDValue();
 
     // Only handle cases where both indexes are constants.
-    auto *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    auto *ExtIdx = dyn_cast<ConstantSDNode>(Index);
     auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
-
     if (InsIdx && ExtIdx) {
       // Combine:
       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
@@ -17141,7 +18325,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
       return DAG.getNode(
           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
           DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
-                         N->getOperand(1));
+          Index);
     }
   }
 
@@ -17154,6 +18338,53 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
   return SDValue();
 }
 
+/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
+/// followed by concatenation. Narrow vector ops may have better performance
+/// than wide ops, and this can unlock further narrowing of other vector ops.
+/// Targets can invert this transform later if it is not profitable.
+static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
+                                         SelectionDAG &DAG) {
+  SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
+  if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
+      N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
+      !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
+    return SDValue();
+
+  // Split the wide shuffle mask into halves. Any mask element that is accessing
+  // operand 1 is offset down to account for narrowing of the vectors.
+  ArrayRef<int> Mask = Shuf->getMask();
+  EVT VT = Shuf->getValueType(0);
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned HalfNumElts = NumElts / 2;
+  SmallVector<int, 16> Mask0(HalfNumElts, -1);
+  SmallVector<int, 16> Mask1(HalfNumElts, -1);
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (Mask[i] == -1)
+      continue;
+    int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
+    if (i < HalfNumElts)
+      Mask0[i] = M;
+    else
+      Mask1[i - HalfNumElts] = M;
+  }
+
+  // Ask the target if this is a valid transform.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
+                                HalfNumElts);
+  if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
+      !TLI.isShuffleMaskLegal(Mask1, HalfVT))
+    return SDValue();
+
+  // shuffle (concat X, undef), (concat Y, undef), Mask -->
+  // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
+  SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
+  SDLoc DL(Shuf);
+  SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
+  SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
+  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
+}
+
 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
 // or turn a shuffle of a single concat into simpler shuffle then concat.
 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
@@ -17163,20 +18394,24 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+  ArrayRef<int> Mask = SVN->getMask();
 
   SmallVector<SDValue, 4> Ops;
   EVT ConcatVT = N0.getOperand(0).getValueType();
   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
   unsigned NumConcats = NumElts / NumElemsPerConcat;
 
+  auto IsUndefMaskElt = [](int i) { return i == -1; };
+
   // Special case: shuffle(concat(A,B)) can be more efficiently represented
   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
   // half vector elements.
   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
-      std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
-                  SVN->getMask().end(), [](int i) { return i == -1; })) {
-    N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
-                              makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
+      llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
+                   IsUndefMaskElt)) {
+    N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
+                              N0.getOperand(1),
+                              Mask.slice(0, NumElemsPerConcat));
     N1 = DAG.getUNDEF(ConcatVT);
     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
   }
@@ -17184,35 +18419,32 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
   // Look at every vector that's inserted. We're looking for exact
   // subvector-sized copies from a concatenated vector
   for (unsigned I = 0; I != NumConcats; ++I) {
-    // Make sure we're dealing with a copy.
     unsigned Begin = I * NumElemsPerConcat;
-    bool AllUndef = true, NoUndef = true;
-    for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
-      if (SVN->getMaskElt(J) >= 0)
-        AllUndef = false;
-      else
-        NoUndef = false;
+    ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
+
+    // Make sure we're dealing with a copy.
+    if (llvm::all_of(SubMask, IsUndefMaskElt)) {
+      Ops.push_back(DAG.getUNDEF(ConcatVT));
+      continue;
     }
 
-    if (NoUndef) {
-      if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
+    int OpIdx = -1;
+    for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
+      if (IsUndefMaskElt(SubMask[i]))
+        continue;
+      if ((SubMask[i] % (int)NumElemsPerConcat) != i)
         return SDValue();
-
-      for (unsigned J = 1; J != NumElemsPerConcat; ++J)
-        if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
-          return SDValue();
-
-      unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
-      if (FirstElt < N0.getNumOperands())
-        Ops.push_back(N0.getOperand(FirstElt));
-      else
-        Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
-
-    } else if (AllUndef) {
-      Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
-    } else { // Mixed with general masks and undefs, can't do optimization.
-      return SDValue();
+      int EltOpIdx = SubMask[i] / NumElemsPerConcat;
+      if (0 <= OpIdx && EltOpIdx != OpIdx)
+        return SDValue();
+      OpIdx = EltOpIdx;
     }
+    assert(0 <= OpIdx && "Unknown concat_vectors op");
+
+    if (OpIdx < (int)N0.getNumOperands())
+      Ops.push_back(N0.getOperand(OpIdx));
+    else
+      Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
   }
 
   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
@@ -17278,8 +18510,8 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
       if (S.getOpcode() == ISD::BUILD_VECTOR) {
         Op = S.getOperand(Idx);
       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
-        assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
-        Op = S.getOperand(0);
+        SDValue Op0 = S.getOperand(0);
+        Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
       } else {
         // Operand can't be combined - bail out.
         return SDValue();
@@ -17433,11 +18665,17 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
 // If splat-mask contains undef elements, we need to be careful about
 // introducing undef's in the folded mask which are not the result of composing
 // the masks of the shuffles.
-static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
-                                     ShuffleVectorSDNode *Splat,
-                                     SelectionDAG &DAG) {
+static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
+                                        SelectionDAG &DAG) {
+  if (!Shuf->getOperand(1).isUndef())
+    return SDValue();
+  auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
+  if (!Splat || !Splat->isSplat())
+    return SDValue();
+
+  ArrayRef<int> ShufMask = Shuf->getMask();
   ArrayRef<int> SplatMask = Splat->getMask();
-  assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
+  assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
 
   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
   // every undef mask element in the splat-shuffle has a corresponding undef
@@ -17463,13 +18701,13 @@ static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
         return false;
     return true;
   };
-  if (CanSimplifyToExistingSplat(UserMask, SplatMask))
-    return SDValue(Splat, 0);
+  if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
+    return Shuf->getOperand(0);
 
   // Create a new shuffle with a mask that is composed of the two shuffles'
   // masks.
   SmallVector<int, 32> NewMask;
-  for (int Idx : UserMask)
+  for (int Idx : ShufMask)
     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
 
   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
@@ -17555,6 +18793,34 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
                      Op1, Op0.getOperand(1), NewInsIndex);
 }
 
+/// If we have a unary shuffle of a shuffle, see if it can be folded away
+/// completely. This has the potential to lose undef knowledge because the first
+/// shuffle may not have an undef mask element where the second one does. So
+/// only call this after doing simplifications based on demanded elements.
+static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
+  // shuf (shuf0 X, Y, Mask0), undef, Mask
+  auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
+  if (!Shuf0 || !Shuf->getOperand(1).isUndef())
+    return SDValue();
+
+  ArrayRef<int> Mask = Shuf->getMask();
+  ArrayRef<int> Mask0 = Shuf0->getMask();
+  for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
+    // Ignore undef elements.
+    if (Mask[i] == -1)
+      continue;
+    assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
+
+    // Is the element of the shuffle operand chosen by this shuffle the same as
+    // the element chosen by the shuffle operand itself?
+    if (Mask0[Mask[i]] != Mask0[i])
+      return SDValue();
+  }
+  // Every element of this shuffle is identical to the result of the previous
+  // shuffle, so we can replace this value.
+  return Shuf->getOperand(0);
+}
+
 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   EVT VT = N->getValueType(0);
   unsigned NumElts = VT.getVectorNumElements();
@@ -17604,19 +18870,35 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
     return InsElt;
 
-  // A shuffle of a single vector that is a splat can always be folded.
-  if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
-    if (N1->isUndef() && N0Shuf->isSplat())
-      return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
+  // A shuffle of a single vector that is a splatted value can always be folded.
+  if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
+    return V;
 
   // If it is a splat, check if the argument vector is another splat or a
   // build_vector.
   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
-    SDNode *V = N0.getNode();
+    int SplatIndex = SVN->getSplatIndex();
+    if (TLI.isExtractVecEltCheap(VT, SplatIndex) &&
+        TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
+      // splat (vector_bo L, R), Index -->
+      // splat (scalar_bo (extelt L, Index), (extelt R, Index))
+      SDValue L = N0.getOperand(0), R = N0.getOperand(1);
+      SDLoc DL(N);
+      EVT EltVT = VT.getScalarType();
+      SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL);
+      SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
+      SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
+      SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
+                                  N0.getNode()->getFlags());
+      SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
+      SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
+      return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
+    }
 
     // If this is a bit convert that changes the element type of the vector but
     // not the number of vector elements, look through it.  Be careful not to
     // look though conversions that change things like v4f32 to v2f64.
+    SDNode *V = N0.getNode();
     if (V->getOpcode() == ISD::BITCAST) {
       SDValue ConvInput = V->getOperand(0);
       if (ConvInput.getValueType().isVector() &&
@@ -17649,7 +18931,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
         return N0;
 
       // Canonicalize any other splat as a build_vector.
-      const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
+      SDValue Splatted = V->getOperand(SplatIndex);
       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
 
@@ -17665,6 +18947,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
     return SDValue(N, 0);
 
+  // This is intentionally placed after demanded elements simplification because
+  // it could eliminate knowledge of undef elements created by this shuffle.
+  if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
+    return ShufOp;
+
   // Match shuffles that can be converted to any_vector_extend_in_reg.
   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
     return V;
@@ -17704,7 +18991,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
       return NewMask;
     };
-    
+
     SDValue BC0 = peekThroughOneUseBitcasts(N0);
     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
       EVT SVT = VT.getScalarType();
@@ -17884,6 +19171,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
   }
 
+  if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
+    return V;
+
   return SDValue();
 }
 
@@ -18006,7 +19296,44 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
   if (!isa<ConstantSDNode>(N2))
     return SDValue();
 
-  unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+  uint64_t InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+
+  // Push subvector bitcasts to the output, adjusting the index as we go.
+  // insert_subvector(bitcast(v), bitcast(s), c1)
+  // -> bitcast(insert_subvector(v, s, c2))
+  if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
+      N1.getOpcode() == ISD::BITCAST) {
+    SDValue N0Src = peekThroughBitcasts(N0);
+    SDValue N1Src = peekThroughBitcasts(N1);
+    EVT N0SrcSVT = N0Src.getValueType().getScalarType();
+    EVT N1SrcSVT = N1Src.getValueType().getScalarType();
+    if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
+        N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
+      EVT NewVT;
+      SDLoc DL(N);
+      SDValue NewIdx;
+      MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+      LLVMContext &Ctx = *DAG.getContext();
+      unsigned NumElts = VT.getVectorNumElements();
+      unsigned EltSizeInBits = VT.getScalarSizeInBits();
+      if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
+        unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
+        NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
+        NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT);
+      } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
+        unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
+        if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
+          NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
+          NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT);
+        }
+      }
+      if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
+        SDValue Res = DAG.getBitcast(NewVT, N0Src);
+        Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
+        return DAG.getBitcast(VT, Res);
+      }
+    }
+  }
 
   // Canonicalize insert_subvector dag nodes.
   // Example:
@@ -18070,6 +19397,36 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N0.getValueType();
+  unsigned Opcode = N->getOpcode();
+
+  // VECREDUCE over 1-element vector is just an extract.
+  if (VT.getVectorNumElements() == 1) {
+    SDLoc dl(N);
+    SDValue Res = DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
+        DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+    if (Res.getValueType() != N->getValueType(0))
+      Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
+    return Res;
+  }
+
+  // On an boolean vector an and/or reduction is the same as a umin/umax
+  // reduction. Convert them if the latter is legal while the former isn't.
+  if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
+    unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
+        ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
+    if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
+        TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
+        DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
+      return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
+  }
+
+  return SDValue();
+}
+
 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
 /// with the destination vector and a zero vector.
 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
@@ -18161,6 +19518,53 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
   return SDValue();
 }
 
+/// If a vector binop is performed on splat values, it may be profitable to
+/// extract, scalarize, and insert/splat.
+static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  unsigned Opcode = N->getOpcode();
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  // TODO: Remove/replace the extract cost check? If the elements are available
+  //       as scalars, then there may be no extract cost. Should we ask if
+  //       inserting a scalar back into a vector is cheap instead?
+  int Index0, Index1;
+  SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
+  SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
+  if (!Src0 || !Src1 || Index0 != Index1 ||
+      Src0.getValueType().getVectorElementType() != EltVT ||
+      Src1.getValueType().getVectorElementType() != EltVT ||
+      !TLI.isExtractVecEltCheap(VT, Index0) ||
+      !TLI.isOperationLegalOrCustom(Opcode, EltVT))
+    return SDValue();
+
+  SDLoc DL(N);
+  SDValue IndexC =
+      DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
+  SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC);
+  SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC);
+  SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
+
+  // If all lanes but 1 are undefined, no need to splat the scalar result.
+  // TODO: Keep track of undefs and use that info in the general case.
+  if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
+      count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
+      count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
+    // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
+    // build_vec ..undef, (bo X, Y), undef...
+    SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
+    Ops[Index0] = ScalarBO;
+    return DAG.getBuildVector(VT, DL, Ops);
+  }
+
+  // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
+  SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
+  return DAG.getBuildVector(VT, DL, Ops);
+}
+
 /// Visit a binary vector operation, like ADD.
 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
   assert(N->getValueType(0).isVector() &&
@@ -18169,34 +19573,63 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
   SDValue Ops[] = {LHS, RHS};
+  EVT VT = N->getValueType(0);
+  unsigned Opcode = N->getOpcode();
 
   // See if we can constant fold the vector operation.
   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
-          N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
+          Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
     return Fold;
 
-  // Type legalization might introduce new shuffles in the DAG.
-  // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
-  //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
-  if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
-      isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
-      LHS.getOperand(1).isUndef() &&
-      RHS.getOperand(1).isUndef()) {
-    ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
-    ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
-
-    if (SVN0->getMask().equals(SVN1->getMask())) {
-      EVT VT = N->getValueType(0);
-      SDValue UndefVector = LHS.getOperand(1);
-      SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
-                                     LHS.getOperand(0), RHS.getOperand(0),
-                                     N->getFlags());
-      AddUsersToWorklist(N);
-      return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
-                                  SVN0->getMask());
+  // Move unary shuffles with identical masks after a vector binop:
+  // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
+  //   --> shuffle (VBinOp A, B), Undef, Mask
+  // This does not require type legality checks because we are creating the
+  // same types of operations that are in the original sequence. We do have to
+  // restrict ops like integer div that have immediate UB (eg, div-by-zero)
+  // though. This code is adapted from the identical transform in instcombine.
+  if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
+      Opcode != ISD::UREM && Opcode != ISD::SREM &&
+      Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
+    auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
+    auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
+    if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
+        LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
+        (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
+      SDLoc DL(N);
+      SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
+                                     RHS.getOperand(0), N->getFlags());
+      SDValue UndefV = LHS.getOperand(1);
+      return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
+    }
+  }
+
+  // The following pattern is likely to emerge with vector reduction ops. Moving
+  // the binary operation ahead of insertion may allow using a narrower vector
+  // instruction that has better performance than the wide version of the op:
+  // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
+  if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
+      RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
+      LHS.getOperand(2) == RHS.getOperand(2) &&
+      (LHS.hasOneUse() || RHS.hasOneUse())) {
+    SDValue X = LHS.getOperand(1);
+    SDValue Y = RHS.getOperand(1);
+    SDValue Z = LHS.getOperand(2);
+    EVT NarrowVT = X.getValueType();
+    if (NarrowVT == Y.getValueType() &&
+        TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
+      // (binop undef, undef) may not return undef, so compute that result.
+      SDLoc DL(N);
+      SDValue VecC =
+          DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
+      SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
+      return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
     }
   }
 
+  if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
+    return V;
+
   return SDValue();
 }
 
@@ -18214,13 +19647,16 @@ SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
     // Check to see if we got a select_cc back (to turn into setcc/select).
     // Otherwise, just return whatever node we got back, like fabs.
     if (SCC.getOpcode() == ISD::SELECT_CC) {
+      const SDNodeFlags Flags = N0.getNode()->getFlags();
       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
                                   N0.getValueType(),
                                   SCC.getOperand(0), SCC.getOperand(1),
-                                  SCC.getOperand(4));
+                                  SCC.getOperand(4), Flags);
       AddToWorklist(SETCC.getNode());
-      return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
-                           SCC.getOperand(2), SCC.getOperand(3));
+      SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
+                                         SCC.getOperand(2), SCC.getOperand(3));
+      SelectNode->setFlags(Flags);
+      return SelectNode;
     }
 
     return SCC;
@@ -18305,6 +19741,10 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
         // locations are not in the default address space.
         LLD->getPointerInfo().getAddrSpace() != 0 ||
         RLD->getPointerInfo().getAddrSpace() != 0 ||
+        // We can't produce a CMOV of a TargetFrameIndex since we won't
+        // generate the address generation required.
+        LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
+        RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
                                       LLD->getBasePtr().getValueType()))
       return false;
@@ -18501,8 +19941,8 @@ SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
 
   // If a constant can be materialized without loads, this does not make sense.
   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
-      TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) ||
-      TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0)))
+      TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
+      TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
     return SDValue();
 
   // If both constants have multiple uses, then we won't need to do an extra
@@ -18547,20 +19987,20 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
   if (N2 == N3) return N2;
 
   EVT CmpOpVT = N0.getValueType();
+  EVT CmpResVT = getSetCCResultType(CmpOpVT);
   EVT VT = N2.getValueType();
   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
 
   // Determine if the condition we're dealing with is constant.
-  SDValue SCC = SimplifySetCC(getSetCCResultType(CmpOpVT), N0, N1, CC, DL,
-                              false);
-  if (SCC.getNode()) AddToWorklist(SCC.getNode());
-
-  if (auto *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
-    // fold select_cc true, x, y -> x
-    // fold select_cc false, x, y -> y
-    return !SCCC->isNullValue() ? N2 : N3;
+  if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
+    AddToWorklist(SCC.getNode());
+    if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
+      // fold select_cc true, x, y -> x
+      // fold select_cc false, x, y -> y
+      return !(SCCC->isNullValue()) ? N2 : N3;
+    }
   }
 
   if (SDValue V =
@@ -18621,7 +20061,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
     SDValue Temp, SCC;
     // zext (setcc n0, n1)
     if (LegalTypes) {
-      SCC = DAG.getSetCC(DL, getSetCCResultType(CmpOpVT), N0, N1, CC);
+      SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
       if (VT.bitsLT(SCC.getValueType()))
         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
       else
@@ -18644,36 +20084,6 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
                                        getShiftAmountTy(Temp.getValueType())));
   }
 
-  // Check to see if this is an integer abs.
-  // select_cc setg[te] X,  0,  X, -X ->
-  // select_cc setgt    X, -1,  X, -X ->
-  // select_cc setl[te] X,  0, -X,  X ->
-  // select_cc setlt    X,  1, -X,  X ->
-  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
-  if (N1C) {
-    ConstantSDNode *SubC = nullptr;
-    if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
-         (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
-        N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
-      SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
-    else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
-              (N1C->isOne() && CC == ISD::SETLT)) &&
-             N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
-      SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
-
-    if (SubC && SubC->isNullValue() && CmpOpVT.isInteger()) {
-      SDLoc DL(N0);
-      SDValue Shift = DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
-                                  DAG.getConstant(CmpOpVT.getSizeInBits() - 1,
-                                                  DL,
-                                                  getShiftAmountTy(CmpOpVT)));
-      SDValue Add = DAG.getNode(ISD::ADD, DL, CmpOpVT, N0, Shift);
-      AddToWorklist(Shift.getNode());
-      AddToWorklist(Add.getNode());
-      return DAG.getNode(ISD::XOR, DL, CmpOpVT, Add, Shift);
-    }
-  }
-
   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
@@ -18728,7 +20138,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
   // when optimising for minimum size, we don't want to expand a div to a mul
   // and a shift.
-  if (DAG.getMachineFunction().getFunction().optForMinSize())
+  if (DAG.getMachineFunction().getFunction().hasMinSize())
     return SDValue();
 
   SmallVector<SDNode *, 8> Built;
@@ -18769,7 +20179,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
   // when optimising for minimum size, we don't want to expand a div to a mul
   // and a shift.
-  if (DAG.getMachineFunction().getFunction().optForMinSize())
+  if (DAG.getMachineFunction().getFunction().hasMinSize())
     return SDValue();
 
   SmallVector<SDNode *, 8> Built;
@@ -18821,7 +20231,6 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
     AddToWorklist(Est.getNode());
 
     if (Iterations) {
-      EVT VT = Op.getValueType();
       SDLoc DL(Op);
       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
 
@@ -18977,7 +20386,6 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
       if (!Reciprocal) {
         // The estimate is now completely wrong if the input was exactly 0.0 or
         // possibly a denormal. Force the answer to 0.0 for those cases.
-        EVT VT = Op.getValueType();
         SDLoc DL(Op);
         EVT CCVT = getSetCCResultType(VT);
         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
@@ -19020,79 +20428,95 @@ SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
 }
 
 /// Return true if there is any possibility that the two addresses overlap.
-bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
-  // If they are the same then they must be aliases.
-  if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
+bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
 
-  // If they are both volatile then they cannot be reordered.
-  if (Op0->isVolatile() && Op1->isVolatile()) return true;
+  struct MemUseCharacteristics {
+    bool IsVolatile;
+    SDValue BasePtr;
+    int64_t Offset;
+    Optional<int64_t> NumBytes;
+    MachineMemOperand *MMO;
+  };
 
-  // If one operation reads from invariant memory, and the other may store, they
-  // cannot alias. These should really be checking the equivalent of mayWrite,
-  // but it only matters for memory nodes other than load /store.
-  if (Op0->isInvariant() && Op1->writeMem())
-    return false;
+  auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
+    if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
+      int64_t Offset = 0;
+      if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
+        Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
+                     ? C->getSExtValue()
+                     : (LSN->getAddressingMode() == ISD::PRE_DEC)
+                           ? -1 * C->getSExtValue()
+                           : 0;
+      return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/,
+              Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
+              LSN->getMemOperand()};
+    }
+    if (const auto *LN = cast<LifetimeSDNode>(N))
+      return {false /*isVolatile*/, LN->getOperand(1),
+              (LN->hasOffset()) ? LN->getOffset() : 0,
+              (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
+                                : Optional<int64_t>(),
+              (MachineMemOperand *)nullptr};
+    // Default.
+    return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/,
+            Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
+  };
 
-  if (Op1->isInvariant() && Op0->writeMem())
-    return false;
+  MemUseCharacteristics MUC0 = getCharacteristics(Op0),
+                        MUC1 = getCharacteristics(Op1);
 
-  unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
-  unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
-
-  // Check for BaseIndexOffset matching.
-  BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
-  BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
-  int64_t PtrDiff;
-  if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
-    if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
-      return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
-
-    // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
-    // able to calculate their relative offset if at least one arises
-    // from an alloca. However, these allocas cannot overlap and we
-    // can infer there is no alias.
-    if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
-      if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
-        MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
-        // If the base are the same frame index but the we couldn't find a
-        // constant offset, (indices are different) be conservative.
-        if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
-                       !MFI.isFixedObjectIndex(B->getIndex())))
-          return false;
-      }
+  // If they are to the same address, then they must be aliases.
+  if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
+      MUC0.Offset == MUC1.Offset)
+    return true;
+
+  // If they are both volatile then they cannot be reordered.
+  if (MUC0.IsVolatile && MUC1.IsVolatile)
+    return true;
 
-    bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
-    bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
-    bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
-    bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
-    bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
-    bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
-
-    // If of mismatched base types or checkable indices we can check
-    // they do not alias.
-    if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
-         (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
-        (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
+  if (MUC0.MMO && MUC1.MMO) {
+    if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
+        (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
       return false;
   }
 
+  // Try to prove that there is aliasing, or that there is no aliasing. Either
+  // way, we can return now. If nothing can be proved, proceed with more tests.
+  bool IsAlias;
+  if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
+                                       DAG, IsAlias))
+    return IsAlias;
+
+  // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
+  // either are not known.
+  if (!MUC0.MMO || !MUC1.MMO)
+    return true;
+
+  // If one operation reads from invariant memory, and the other may store, they
+  // cannot alias. These should really be checking the equivalent of mayWrite,
+  // but it only matters for memory nodes other than load /store.
+  if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
+      (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
+    return false;
+
   // If we know required SrcValue1 and SrcValue2 have relatively large
   // alignment compared to the size and offset of the access, we may be able
   // to prove they do not alias. This check is conservative for now to catch
   // cases created by splitting vector types.
-  int64_t SrcValOffset0 = Op0->getSrcValueOffset();
-  int64_t SrcValOffset1 = Op1->getSrcValueOffset();
-  unsigned OrigAlignment0 = Op0->getOriginalAlignment();
-  unsigned OrigAlignment1 = Op1->getOriginalAlignment();
+  int64_t SrcValOffset0 = MUC0.MMO->getOffset();
+  int64_t SrcValOffset1 = MUC1.MMO->getOffset();
+  unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment();
+  unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment();
   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
-      NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
+      MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() &&
+      *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) {
     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
 
     // There is no overlap between these relatively aligned accesses of
     // similar size. Return no alias.
-    if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
-        (OffAlign1 + NumBytes1) <= OffAlign0)
+    if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 ||
+        (OffAlign1 + *MUC1.NumBytes) <= OffAlign0)
       return false;
   }
 
@@ -19105,17 +20529,16 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
     UseAA = false;
 #endif
 
-  if (UseAA && AA &&
-      Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
+  if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) {
     // Use alias analysis information.
     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
-    int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
-    int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
-    AliasResult AAResult =
-        AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
-                                 UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
-                  MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
-                                 UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
+    int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset;
+    int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset;
+    AliasResult AAResult = AA->alias(
+        MemoryLocation(MUC0.MMO->getValue(), Overlap0,
+                       UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
+        MemoryLocation(MUC1.MMO->getValue(), Overlap1,
+                       UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
     if (AAResult == NoAlias)
       return false;
   }
@@ -19132,18 +20555,64 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
 
   // Get alias information for node.
-  bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
+  const bool IsLoad = isa<LoadSDNode>(N) && !cast<LoadSDNode>(N)->isVolatile();
 
   // Starting off.
   Chains.push_back(OriginalChain);
   unsigned Depth = 0;
 
+  // Attempt to improve chain by a single step
+  std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
+    switch (C.getOpcode()) {
+    case ISD::EntryToken:
+      // No need to mark EntryToken.
+      C = SDValue();
+      return true;
+    case ISD::LOAD:
+    case ISD::STORE: {
+      // Get alias information for C.
+      bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
+                      !cast<LSBaseSDNode>(C.getNode())->isVolatile();
+      if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
+        // Look further up the chain.
+        C = C.getOperand(0);
+        return true;
+      }
+      // Alias, so stop here.
+      return false;
+    }
+
+    case ISD::CopyFromReg:
+      // Always forward past past CopyFromReg.
+      C = C.getOperand(0);
+      return true;
+
+    case ISD::LIFETIME_START:
+    case ISD::LIFETIME_END: {
+      // We can forward past any lifetime start/end that can be proven not to
+      // alias the memory access.
+      if (!isAlias(N, C.getNode())) {
+        // Look further up the chain.
+        C = C.getOperand(0);
+        return true;
+      }
+      return false;
+    }
+    default:
+      return false;
+    }
+  };
+
   // Look at each chain and determine if it is an alias.  If so, add it to the
   // aliases list.  If not, then continue up the chain looking for the next
   // candidate.
   while (!Chains.empty()) {
     SDValue Chain = Chains.pop_back_val();
 
+    // Don't bother if we've seen Chain before.
+    if (!Visited.insert(Chain.getNode()).second)
+      continue;
+
     // For TokenFactor nodes, look at each operand and only continue up the
     // chain until we reach the depth limit.
     //
@@ -19156,58 +20625,30 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
       return;
     }
 
-    // Don't bother if we've been before.
-    if (!Visited.insert(Chain.getNode()).second)
-      continue;
-
-    switch (Chain.getOpcode()) {
-    case ISD::EntryToken:
-      // Entry token is ideal chain operand, but handled in FindBetterChain.
-      break;
-
-    case ISD::LOAD:
-    case ISD::STORE: {
-      // Get alias information for Chain.
-      bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
-          !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
-
-      // If chain is alias then stop here.
-      if (!(IsLoad && IsOpLoad) &&
-          isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
-        Aliases.push_back(Chain);
-      } else {
-        // Look further up the chain.
-        Chains.push_back(Chain.getOperand(0));
-        ++Depth;
-      }
-      break;
-    }
-
-    case ISD::TokenFactor:
+    if (Chain.getOpcode() == ISD::TokenFactor) {
       // We have to check each of the operands of the token factor for "small"
       // token factors, so we queue them up.  Adding the operands to the queue
       // (stack) in reverse order maintains the original order and increases the
       // likelihood that getNode will find a matching token factor (CSE.)
       if (Chain.getNumOperands() > 16) {
         Aliases.push_back(Chain);
-        break;
+        continue;
       }
       for (unsigned n = Chain.getNumOperands(); n;)
         Chains.push_back(Chain.getOperand(--n));
       ++Depth;
-      break;
-
-    case ISD::CopyFromReg:
-      // Forward past CopyFromReg.
-      Chains.push_back(Chain.getOperand(0));
+      continue;
+    }
+    // Everything else
+    if (ImproveChain(Chain)) {
+      // Updated Chain Found, Consider new chain if one exists.
+      if (Chain.getNode())
+        Chains.push_back(Chain);
       ++Depth;
-      break;
-
-    default:
-      // For all other instructions we will just have to take what we can get.
-      Aliases.push_back(Chain);
-      break;
+      continue;
     }
+    // No Improved Chain Possible, treat as Alias.
+    Aliases.push_back(Chain);
   }
 }
 
@@ -19232,13 +20673,15 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
     return Aliases[0];
 
   // Construct a custom tailored token factor.
-  return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
+  return DAG.getTokenFactor(SDLoc(N), Aliases);
 }
 
+namespace {
 // TODO: Replace with with std::monostate when we move to C++17.
 struct UnitT { } Unit;
 bool operator==(const UnitT &, const UnitT &) { return true; }
 bool operator!=(const UnitT &, const UnitT &) { return false; }
+} // namespace
 
 // This function tries to collect a bunch of potentially interesting
 // nodes to improve the chains of, all at once. This might seem
@@ -19349,7 +20792,7 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
   if (AddNewChain)
     TFOps.insert(TFOps.begin(), NewChain);
 
-  SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps);
+  SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
   CombineTo(St, TF);
 
   AddToWorklist(STChain);
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index a9a3c44ea0c9..22c23ba877e8 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1,9 +1,8 @@
 //===- FastISel.cpp - Implementation of the FastISel class ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -782,7 +781,7 @@ bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
       unsigned Reg = getRegForValue(Val);
       if (!Reg)
         return false;
-      Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+      Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
     }
   }
   return true;
@@ -831,8 +830,8 @@ bool FastISel::selectStackmap(const CallInst *I) {
   const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
   for (unsigned i = 0; ScratchRegs[i]; ++i)
     Ops.push_back(MachineOperand::CreateReg(
-        ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false,
-        /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true));
+        ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false,
+        /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true));
 
   // Issue CALLSEQ_START
   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
@@ -942,7 +941,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
     assert(CLI.NumResultRegs == 0 && "Unexpected result register.");
     CLI.ResultReg = createResultReg(TLI.getRegClassFor(MVT::i64));
     CLI.NumResultRegs = 1;
-    Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*IsDef=*/true));
+    Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*isDef=*/true));
   }
 
   // Add the <id> and <numBytes> constants.
@@ -991,13 +990,13 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
       unsigned Reg = getRegForValue(I->getArgOperand(i));
       if (!Reg)
         return false;
-      Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+      Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
     }
   }
 
   // Push the arguments from the call instruction.
   for (auto Reg : CLI.OutRegs)
-    Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+    Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
 
   // Push live variables for the stack map.
   if (!addStackMapLiveVars(Ops, I, NumMetaOpers + NumArgs))
@@ -1011,13 +1010,13 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
   const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
   for (unsigned i = 0; ScratchRegs[i]; ++i)
     Ops.push_back(MachineOperand::CreateReg(
-        ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false,
-        /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true));
+        ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false,
+        /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true));
 
   // Add implicit defs (return values).
   for (auto Reg : CLI.InRegs)
-    Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/true,
-                                            /*IsImpl=*/true));
+    Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/true,
+                                            /*isImp=*/true));
 
   // Insert the patchpoint instruction before the call generated by the target.
   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc,
@@ -1045,9 +1044,9 @@ bool FastISel::selectXRayCustomEvent(const CallInst *I) {
     return true; // don't do anything to this instruction.
   SmallVector<MachineOperand, 8> Ops;
   Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
-                                          /*IsDef=*/false));
+                                          /*isDef=*/false));
   Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
-                                          /*IsDef=*/false));
+                                          /*isDef=*/false));
   MachineInstrBuilder MIB =
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
               TII.get(TargetOpcode::PATCHABLE_EVENT_CALL));
@@ -1064,11 +1063,11 @@ bool FastISel::selectXRayTypedEvent(const CallInst *I) {
     return true; // don't do anything to this instruction.
   SmallVector<MachineOperand, 8> Ops;
   Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
-                                          /*IsDef=*/false));
+                                          /*isDef=*/false));
   Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
-                                          /*IsDef=*/false));
+                                          /*isDef=*/false));
   Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)),
-                                          /*IsDef=*/false));
+                                          /*isDef=*/false));
   MachineInstrBuilder MIB =
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
               TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL));
@@ -1205,9 +1204,11 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
     if (Arg.IsByVal || Arg.IsInAlloca) {
       PointerType *Ty = cast<PointerType>(Arg.Ty);
       Type *ElementTy = Ty->getElementType();
-      unsigned FrameSize = DL.getTypeAllocSize(ElementTy);
-      // For ByVal, alignment should come from FE. BE will guess if this info is
-      // not there, but there are cases it cannot get right.
+      unsigned FrameSize =
+          DL.getTypeAllocSize(Arg.ByValType ? Arg.ByValType : ElementTy);
+
+      // For ByVal, alignment should come from FE. BE will guess if this info
+      // is not there, but there are cases it cannot get right.
       unsigned FrameAlign = Arg.Alignment;
       if (!FrameAlign)
         FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL);
@@ -1235,6 +1236,12 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
   if (CLI.NumResultRegs && CLI.CS)
     updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs);
 
+  // Set labels for heapallocsite call.
+  if (CLI.CS && CLI.CS->getInstruction()->getMetadata("heapallocsite")) {
+    MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite");
+    MF->addCodeViewHeapAllocSite(CLI.Call, MD);
+  }
+
   return true;
 }
 
@@ -1304,9 +1311,6 @@ bool FastISel::selectCall(const User *I) {
     return true;
   }
 
-  MachineModuleInfo &MMI = FuncInfo.MF->getMMI();
-  computeUsesVAFloatArgument(*Call, MMI);
-
   // Handle intrinsic function calls.
   if (const auto *II = dyn_cast<IntrinsicInst>(Call))
     return selectIntrinsicCall(II);
@@ -1710,14 +1714,11 @@ void FastISel::finishCondBranch(const BasicBlock *BranchBB,
 }
 
 /// Emit an FNeg operation.
-bool FastISel::selectFNeg(const User *I) {
-  Value *X;
-  if (!match(I, m_FNeg(m_Value(X))))
-    return false;
-  unsigned OpReg = getRegForValue(X);
+bool FastISel::selectFNeg(const User *I, const Value *In) {
+  unsigned OpReg = getRegForValue(In);
   if (!OpReg)
     return false;
-  bool OpRegIsKill = hasTrivialKill(I);
+  bool OpRegIsKill = hasTrivialKill(In);
 
   // If the target has ISD::FNEG, use it.
   EVT VT = TLI.getValueType(DL, I->getType());
@@ -1804,9 +1805,13 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
     return selectBinaryOp(I, ISD::FADD);
   case Instruction::Sub:
     return selectBinaryOp(I, ISD::SUB);
-  case Instruction::FSub: 
+  case Instruction::FSub: {
     // FNeg is currently represented in LLVM IR as a special case of FSub.
-    return selectFNeg(I) || selectBinaryOp(I, ISD::FSUB);
+    Value *X;
+    if (match(I, m_FNeg(m_Value(X))))
+       return selectFNeg(I, X);
+    return selectBinaryOp(I, ISD::FSUB);
+  }
   case Instruction::Mul:
     return selectBinaryOp(I, ISD::MUL);
   case Instruction::FMul:
@@ -1836,6 +1841,9 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
   case Instruction::Xor:
     return selectBinaryOp(I, ISD::XOR);
 
+  case Instruction::FNeg:
+    return selectFNeg(I, I->getOperand(0));
+
   case Instruction::GetElementPtr:
     return selectGetElementPtr(I);
 
@@ -1869,6 +1877,13 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
     return false;
 
   case Instruction::Call:
+    // On AIX, call lowering uses the DAG-ISEL path currently so that the
+    // callee of the direct function call instruction will be mapped to the
+    // symbol for the function's entry point, which is distinct from the
+    // function descriptor symbol. The latter is the symbol whose XCOFF symbol
+    // name is the C-linkage name of the source level function.
+    if (TM.getTargetTriple().isOSAIX())
+      return false;
     return selectCall(I);
 
   case Instruction::BitCast:
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index fba728625b07..8b1759246b76 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -1,9 +1,8 @@
 //===-- FunctionLoweringInfo.cpp ------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -86,6 +85,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
   RegInfo = &MF->getRegInfo();
   const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
   unsigned StackAlign = TFI->getStackAlignment();
+  DA = DAG->getDivergenceAnalysis();
 
   // Check whether the function can return without sret-demotion.
   SmallVector<ISD::OutputArg, 4> Outs;
@@ -151,7 +151,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
           auto Iter = CatchObjects.find(AI);
           if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) {
             FrameIndex = MF->getFrameInfo().CreateFixedObject(
-                TySize, 0, /*Immutable=*/false, /*isAliased=*/true);
+                TySize, 0, /*IsImmutable=*/false, /*isAliased=*/true);
             MF->getFrameInfo().setObjectAlignment(FrameIndex, Align);
           } else {
             FrameIndex =
@@ -322,13 +322,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
       NewMap[MBBMap[Src]] = MBBMap[Dst];
     }
     EHInfo.EHPadUnwindMap = std::move(NewMap);
-    NewMap.clear();
-    for (auto &KV : EHInfo.ThrowUnwindMap) {
-      const auto *Src = KV.first.get<const BasicBlock *>();
-      const auto *Dst = KV.second.get<const BasicBlock *>();
-      NewMap[MBBMap[Src]] = MBBMap[Dst];
-    }
-    EHInfo.ThrowUnwindMap = std::move(NewMap);
   }
 }
 
@@ -343,6 +336,7 @@ void FunctionLoweringInfo::clear() {
   LiveOutRegInfo.clear();
   VisitedBBs.clear();
   ArgDbgValues.clear();
+  DescribedArgs.clear();
   ByValArgFrameIndexMap.clear();
   RegFixups.clear();
   RegsWithFixups.clear();
@@ -352,9 +346,9 @@ void FunctionLoweringInfo::clear() {
 }
 
 /// CreateReg - Allocate a single virtual register for the given type.
-unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
+unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) {
   return RegInfo->createVirtualRegister(
-      MF->getSubtarget().getTargetLowering()->getRegClassFor(VT));
+      MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent));
 }
 
 /// CreateRegs - Allocate the appropriate number of virtual registers of
@@ -364,7 +358,7 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
 /// In the case that the given value has struct or array type, this function
 /// will assign registers for each member or element.
 ///
-unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
+unsigned FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) {
   const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
 
   SmallVector<EVT, 4> ValueVTs;
@@ -377,13 +371,18 @@ unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
 
     unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT);
     for (unsigned i = 0; i != NumRegs; ++i) {
-      unsigned R = CreateReg(RegisterVT);
+      unsigned R = CreateReg(RegisterVT, isDivergent);
       if (!FirstReg) FirstReg = R;
     }
   }
   return FirstReg;
 }
 
+unsigned FunctionLoweringInfo::CreateRegs(const Value *V) {
+  return CreateRegs(V->getType(), DA && !TLI->requiresUniformRegister(*MF, V) &&
+                                      DA->isDivergent(V));
+}
+
 /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
 /// register is a PHI destination and the PHI's LiveOutInfo is not valid. If
 /// the register's LiveOutInfo is for a smaller bit width, it is extended to
@@ -400,7 +399,7 @@ FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) {
 
   if (BitWidth > LOI->Known.getBitWidth()) {
     LOI->NumSignBits = 1;
-    LOI->Known = LOI->Known.zextOrTrunc(BitWidth);
+    LOI->Known = LOI->Known.zext(BitWidth, false /* => any extend */);
   }
 
   return LOI;
@@ -526,56 +525,6 @@ unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
   return VReg;
 }
 
-unsigned
-FunctionLoweringInfo::getOrCreateSwiftErrorVReg(const MachineBasicBlock *MBB,
-                                                const Value *Val) {
-  auto Key = std::make_pair(MBB, Val);
-  auto It = SwiftErrorVRegDefMap.find(Key);
-  // If this is the first use of this swifterror value in this basic block,
-  // create a new virtual register.
-  // After we processed all basic blocks we will satisfy this "upwards exposed
-  // use" by inserting a copy or phi at the beginning of this block.
-  if (It == SwiftErrorVRegDefMap.end()) {
-    auto &DL = MF->getDataLayout();
-    const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
-    auto VReg = MF->getRegInfo().createVirtualRegister(RC);
-    SwiftErrorVRegDefMap[Key] = VReg;
-    SwiftErrorVRegUpwardsUse[Key] = VReg;
-    return VReg;
-  } else return It->second;
-}
-
-void FunctionLoweringInfo::setCurrentSwiftErrorVReg(
-    const MachineBasicBlock *MBB, const Value *Val, unsigned VReg) {
-  SwiftErrorVRegDefMap[std::make_pair(MBB, Val)] = VReg;
-}
-
-std::pair<unsigned, bool>
-FunctionLoweringInfo::getOrCreateSwiftErrorVRegDefAt(const Instruction *I) {
-  auto Key = PointerIntPair<const Instruction *, 1, bool>(I, true);
-  auto It = SwiftErrorVRegDefUses.find(Key);
-  if (It == SwiftErrorVRegDefUses.end()) {
-    auto &DL = MF->getDataLayout();
-    const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
-    unsigned VReg =  MF->getRegInfo().createVirtualRegister(RC);
-    SwiftErrorVRegDefUses[Key] = VReg;
-    return std::make_pair(VReg, true);
-  }
-  return std::make_pair(It->second, false);
-}
-
-std::pair<unsigned, bool>
-FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) {
-  auto Key = PointerIntPair<const Instruction *, 1, bool>(I, false);
-  auto It = SwiftErrorVRegDefUses.find(Key);
-  if (It == SwiftErrorVRegDefUses.end()) {
-    unsigned VReg = getOrCreateSwiftErrorVReg(MBB, Val);
-    SwiftErrorVRegDefUses[Key] = VReg;
-    return std::make_pair(VReg, true);
-  }
-  return std::make_pair(It->second, false);
-}
-
 const Value *
 FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) {
   if (VirtReg2Value.empty()) {
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 6a6114677cc2..9bc07d35dfc5 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -1,9 +1,8 @@
 //==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -106,7 +105,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
 
   // Stick to the preferred register classes for legal types.
   if (TLI->isTypeLegal(VT))
-    UseRC = TLI->getRegClassFor(VT);
+    UseRC = TLI->getRegClassFor(VT, Node->isDivergent());
 
   if (!IsClone && !IsCloned)
     for (SDNode *User : Node->uses()) {
@@ -165,7 +164,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
            "Incompatible phys register def and uses!");
     DstRC = UseRC;
   } else {
-    DstRC = TLI->getRegClassFor(VT);
+    DstRC = TLI->getRegClassFor(VT, Node->isDivergent());
   }
 
   // If all uses are reading from the src physical register and copying the
@@ -187,24 +186,6 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
   assert(isNew && "Node emitted out of order - early");
 }
 
-/// getDstOfCopyToRegUse - If the only use of the specified result number of
-/// node is a CopyToReg, return its destination register. Return 0 otherwise.
-unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node,
-                                                unsigned ResNo) const {
-  if (!Node->hasOneUse())
-    return 0;
-
-  SDNode *User = *Node->use_begin();
-  if (User->getOpcode() == ISD::CopyToReg &&
-      User->getOperand(2).getNode() == Node &&
-      User->getOperand(2).getResNo() == ResNo) {
-    unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
-    if (TargetRegisterInfo::isVirtualRegister(Reg))
-      return Reg;
-  }
-  return 0;
-}
-
 void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
                                        MachineInstrBuilder &MIB,
                                        const MCInstrDesc &II,
@@ -226,8 +207,9 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
     // type correctly. For example, a 64-bit float (X86::FR64) can't live in
     // the 32-bit float super-class (X86::FR32).
     if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) {
-      const TargetRegisterClass *VTRC =
-        TLI->getRegClassFor(Node->getSimpleValueType(i));
+      const TargetRegisterClass *VTRC = TLI->getRegClassFor(
+          Node->getSimpleValueType(i),
+          (Node->isDivergent() || (RC && TRI->isDivergentRegClass(RC))));
       if (RC)
         VTRC = TRI->getCommonSubClass(RC, VTRC);
       if (VTRC)
@@ -286,14 +268,11 @@ unsigned InstrEmitter::getVR(SDValue Op,
   if (Op.isMachineOpcode() &&
       Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
     // Add an IMPLICIT_DEF instruction before every use.
-    unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
     // IMPLICIT_DEF can produce any type of result so its MCInstrDesc
     // does not include operand register class info.
-    if (!VReg) {
-      const TargetRegisterClass *RC =
-        TLI->getRegClassFor(Op.getSimpleValueType());
-      VReg = MRI->createVirtualRegister(RC);
-    }
+    const TargetRegisterClass *RC = TLI->getRegClassFor(
+        Op.getSimpleValueType(), Op.getNode()->isDivergent());
+    unsigned VReg = MRI->createVirtualRegister(RC);
     BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
             TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
     return VReg;
@@ -396,11 +375,15 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
   } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
     unsigned VReg = R->getReg();
     MVT OpVT = Op.getSimpleValueType();
-    const TargetRegisterClass *OpRC =
-        TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT) : nullptr;
     const TargetRegisterClass *IIRC =
         II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF))
            : nullptr;
+    const TargetRegisterClass *OpRC =
+        TLI->isTypeLegal(OpVT)
+            ? TLI->getRegClassFor(OpVT,
+                                  Op.getNode()->isDivergent() ||
+                                      (IIRC && TRI->isDivergentRegClass(IIRC)))
+            : nullptr;
 
     if (OpRC && IIRC && OpRC != IIRC &&
         TargetRegisterInfo::isVirtualRegister(VReg)) {
@@ -465,7 +448,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
 }
 
 unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
-                                          MVT VT, const DebugLoc &DL) {
+                                          MVT VT, bool isDivergent, const DebugLoc &DL) {
   const TargetRegisterClass *VRC = MRI->getRegClass(VReg);
   const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx);
 
@@ -480,7 +463,7 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
 
   // VReg couldn't be reasonably constrained.  Emit a COPY to a new virtual
   // register instead.
-  RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT), SubIdx);
+  RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx);
   assert(RC && "No legal register class for VT supports that SubIdx");
   unsigned NewReg = MRI->createVirtualRegister(RC);
   BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg)
@@ -515,7 +498,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
     // classes.
     unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
     const TargetRegisterClass *TRC =
-      TLI->getRegClassFor(Node->getSimpleValueType(0));
+      TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
 
     unsigned Reg;
     MachineInstr *DefMI;
@@ -549,8 +532,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
       if (TargetRegisterInfo::isVirtualRegister(Reg))
         Reg = ConstrainForSubReg(Reg, SubIdx,
                                  Node->getOperand(0).getSimpleValueType(),
-                                 Node->getDebugLoc());
-
+                                 Node->isDivergent(), Node->getDebugLoc());
       // Create the destreg if it is missing.
       if (VRBase == 0)
         VRBase = MRI->createVirtualRegister(TRC);
@@ -585,7 +567,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
     //
     // There is no constraint on the %src register class.
     //
-    const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0));
+    const TargetRegisterClass *SRC =
+        TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
     SRC = TRI->getSubClassWithSubReg(SRC, SubIdx);
     assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG");
 
@@ -900,6 +883,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
 
     if (Flags.hasExact())
       MI->setFlag(MachineInstr::MIFlag::IsExact);
+
+    if (Flags.hasFPExcept())
+      MI->setFlag(MachineInstr::MIFlag::FPExcept);
   }
 
   // Emit all of the actual operands of this instruction, adding them to the
@@ -1007,14 +993,23 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
   case ISD::TokenFactor: // fall thru
     break;
   case ISD::CopyToReg: {
-    unsigned SrcReg;
+    unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
     SDValue SrcVal = Node->getOperand(2);
+    if (TargetRegisterInfo::isVirtualRegister(DestReg) &&
+        SrcVal.isMachineOpcode() &&
+        SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+      // Instead building a COPY to that vreg destination, build an
+      // IMPLICIT_DEF instruction instead.
+      BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+              TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
+      break;
+    }
+    unsigned SrcReg;
     if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
       SrcReg = R->getReg();
     else
       SrcReg = getVR(SrcVal, VRBaseMap);
 
-    unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
     if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
       break;
 
@@ -1049,14 +1044,18 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
     break;
   }
 
-  case ISD::INLINEASM: {
+  case ISD::INLINEASM:
+  case ISD::INLINEASM_BR: {
     unsigned NumOps = Node->getNumOperands();
     if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
       --NumOps;  // Ignore the glue operand.
 
     // Create the inline asm machine instruction.
-    MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(),
-                                      TII->get(TargetOpcode::INLINEASM));
+    unsigned TgtOpc = Node->getOpcode() == ISD::INLINEASM_BR
+                          ? TargetOpcode::INLINEASM_BR
+                          : TargetOpcode::INLINEASM;
+    MachineInstrBuilder MIB =
+        BuildMI(*MF, Node->getDebugLoc(), TII->get(TgtOpc));
 
     // Add the asm string as an external symbol operand.
     SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString);
@@ -1137,7 +1136,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
     // then remove the early-clobber flag.
     for (unsigned Reg : ECRegs) {
       if (MIB->readsRegister(Reg, TRI)) {
-        MachineOperand *MO = MIB->findRegisterDefOperand(Reg, false, TRI);
+        MachineOperand *MO = 
+            MIB->findRegisterDefOperand(Reg, false, false, TRI);
         assert(MO && "No def operand for clobbered register?");
         MO->setIsEarlyClobber(false);
       }
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 701b6368690b..cfe99dd977b5 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -1,9 +1,8 @@
 //===- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG -*- C++ -*--==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -43,11 +42,6 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
                        unsigned SrcReg,
                        DenseMap<SDValue, unsigned> &VRBaseMap);
 
-  /// getDstOfCopyToRegUse - If the only use of the specified result number of
-  /// node is a CopyToReg, return its destination register. Return 0 otherwise.
-  unsigned getDstOfOnlyCopyToRegUse(SDNode *Node,
-                                    unsigned ResNo) const;
-
   void CreateVirtualRegisters(SDNode *Node,
                               MachineInstrBuilder &MIB,
                               const MCInstrDesc &II,
@@ -84,7 +78,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
   /// supports SubIdx sub-registers.  Emit a copy if that isn't possible.
   /// Return the virtual register to use.
   unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT,
-                              const DebugLoc &DL);
+                              bool isDivergent, const DebugLoc &DL);
 
   /// EmitSubregNode - Generate machine code for subreg nodes.
   ///
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index d3aea37f944d..bf817f00f83d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1,9 +1,8 @@
 //===- LegalizeDAG.cpp - Implement SelectionDAG::Legalize -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -137,8 +136,6 @@ private:
                              bool &NeedInvert, const SDLoc &dl);
 
   SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
-  SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops,
-                        unsigned NumOps, bool isSigned, const SDLoc &dl);
 
   std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
                                                  SDNode *Node, bool isSigned);
@@ -152,11 +149,17 @@ private:
                            RTLIB::Libcall Call_I32,
                            RTLIB::Libcall Call_I64,
                            RTLIB::Libcall Call_I128);
+  SDValue ExpandArgFPLibCall(SDNode *Node,
+                             RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
+                             RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
+                             RTLIB::Libcall Call_PPCF128);
   void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
   void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 
   SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
                            const SDLoc &dl);
+  SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
+                           const SDLoc &dl, SDValue ChainIn);
   SDValue ExpandBUILD_VECTOR(SDNode *Node);
   SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
   void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
@@ -489,10 +492,9 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
       // If this is an unaligned store and the target doesn't support it,
       // expand it.
       EVT MemVT = ST->getMemoryVT();
-      unsigned AS = ST->getAddressSpace();
-      unsigned Align = ST->getAlignment();
       const DataLayout &DL = DAG.getDataLayout();
-      if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+      if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
+                                  *ST->getMemOperand())) {
         LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n");
         SDValue Result = TLI.expandUnalignedStore(ST, DAG);
         ReplaceNode(SDValue(ST, 0), Result);
@@ -542,7 +544,9 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
   } else if (StWidth & (StWidth - 1)) {
     // If not storing a power-of-2 number of bits, expand as two stores.
     assert(!StVT.isVector() && "Unsupported truncstore!");
-    unsigned RoundWidth = 1 << Log2_32(StWidth);
+    unsigned LogStWidth = Log2_32(StWidth);
+    assert(LogStWidth < 32);
+    unsigned RoundWidth = 1 << LogStWidth;
     assert(RoundWidth < StWidth);
     unsigned ExtraWidth = StWidth - RoundWidth;
     assert(ExtraWidth < RoundWidth);
@@ -602,11 +606,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
     default: llvm_unreachable("This action is not supported yet!");
     case TargetLowering::Legal: {
       EVT MemVT = ST->getMemoryVT();
-      unsigned AS = ST->getAddressSpace();
-      unsigned Align = ST->getAlignment();
       // If this is an unaligned store and the target doesn't support it,
       // expand it.
-      if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+      if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
+                                  *ST->getMemOperand())) {
         SDValue Result = TLI.expandUnalignedStore(ST, DAG);
         ReplaceNode(SDValue(ST, 0), Result);
       }
@@ -663,13 +666,12 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
     default: llvm_unreachable("This action is not supported yet!");
     case TargetLowering::Legal: {
       EVT MemVT = LD->getMemoryVT();
-      unsigned AS = LD->getAddressSpace();
-      unsigned Align = LD->getAlignment();
       const DataLayout &DL = DAG.getDataLayout();
       // If this is an unaligned load and the target doesn't support it,
       // expand it.
-      if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
-        std::tie(RVal, RChain) =  TLI.expandUnalignedLoad(LD, DAG);
+      if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
+                                  *LD->getMemOperand())) {
+        std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG);
       }
       break;
     }
@@ -756,7 +758,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
   } else if (SrcWidth & (SrcWidth - 1)) {
     // If not loading a power-of-2 number of bits, expand as two loads.
     assert(!SrcVT.isVector() && "Unsupported extload!");
-    unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+    unsigned LogSrcWidth = Log2_32(SrcWidth);
+    assert(LogSrcWidth < 32);
+    unsigned RoundWidth = 1 << LogSrcWidth;
     assert(RoundWidth < SrcWidth);
     unsigned ExtraWidth = SrcWidth - RoundWidth;
     assert(ExtraWidth < RoundWidth);
@@ -853,10 +857,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
         // If this is an unaligned load and the target doesn't support it,
         // expand it.
         EVT MemVT = LD->getMemoryVT();
-        unsigned AS = LD->getAddressSpace();
-        unsigned Align = LD->getAlignment();
         const DataLayout &DL = DAG.getDataLayout();
-        if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+        if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
+                                    *LD->getMemOperand())) {
           std::tie(Value, Chain) = TLI.expandUnalignedLoad(LD, DAG);
         }
       }
@@ -994,6 +997,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:
   case ISD::EXTRACT_VECTOR_ELT:
+  case ISD::LROUND:
+  case ISD::LLROUND:
+  case ISD::LRINT:
+  case ISD::LLRINT:
     Action = TLI.getOperationAction(Node->getOpcode(),
                                     Node->getOperand(0).getValueType());
     break;
@@ -1114,6 +1121,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   case ISD::STRICT_FFLOOR:
   case ISD::STRICT_FROUND:
   case ISD::STRICT_FTRUNC:
+  case ISD::STRICT_FP_ROUND:
+  case ISD::STRICT_FP_EXTEND:
     // These pseudo-ops get legalized as if they were their non-strict
     // equivalent.  For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
     // is also legal, but if ISD::FSQRT requires expansion then so does
@@ -1128,7 +1137,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
     break;
   }
-  case ISD::SMULFIX: {
+  case ISD::SMULFIX:
+  case ISD::SMULFIXSAT:
+  case ISD::UMULFIX: {
     unsigned Scale = Node->getConstantOperandVal(2);
     Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
                                               Node->getValueType(0), Scale);
@@ -1142,6 +1153,22 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     Action = TLI.getOperationAction(Node->getOpcode(),
                     cast<MaskedStoreSDNode>(Node)->getValue().getValueType());
     break;
+  case ISD::VECREDUCE_FADD:
+  case ISD::VECREDUCE_FMUL:
+  case ISD::VECREDUCE_ADD:
+  case ISD::VECREDUCE_MUL:
+  case ISD::VECREDUCE_AND:
+  case ISD::VECREDUCE_OR:
+  case ISD::VECREDUCE_XOR:
+  case ISD::VECREDUCE_SMAX:
+  case ISD::VECREDUCE_SMIN:
+  case ISD::VECREDUCE_UMAX:
+  case ISD::VECREDUCE_UMIN:
+  case ISD::VECREDUCE_FMAX:
+  case ISD::VECREDUCE_FMIN:
+    Action = TLI.getOperationAction(
+        Node->getOpcode(), Node->getOperand(0).getValueType());
+    break;
   default:
     if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
       Action = TargetLowering::Legal;
@@ -1386,6 +1413,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
   // Emit a store of each element to the stack slot.
   SmallVector<SDValue, 8> Stores;
   unsigned TypeByteSize = EltVT.getSizeInBits() / 8;
+  assert(TypeByteSize > 0 && "Vector element type too small for stack store!");
   // Store (in the right endianness) the elements to memory.
   for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
     // Ignore undef elements.
@@ -1723,6 +1751,12 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
 /// The resultant code need not be legal.
 SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
                                                EVT DestVT, const SDLoc &dl) {
+  return EmitStackConvert(SrcOp, SlotVT, DestVT, dl, DAG.getEntryNode());
+}
+
+SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
+                                               EVT DestVT, const SDLoc &dl,
+                                               SDValue Chain) {
   // Create the stack frame object.
   unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment(
       SrcOp.getValueType().getTypeForEVT(*DAG.getContext()));
@@ -1743,19 +1777,19 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
   // later than DestVT.
   SDValue Store;
 
-  if (SrcSize > SlotSize)
-    Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo,
+  if (SrcSize > SlotSize) 
+    Store = DAG.getTruncStore(Chain, dl, SrcOp, FIPtr, PtrInfo,
                               SlotVT, SrcAlign);
   else {
     assert(SrcSize == SlotSize && "Invalid store");
     Store =
-        DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo, SrcAlign);
+        DAG.getStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SrcAlign);
   }
 
   // Result is a load from the stack slot.
   if (SlotSize == DestSize)
     return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign);
-
+    
   assert(SlotSize < DestSize && "Unknown extension!");
   return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT,
                         DestAlign);
@@ -2049,41 +2083,6 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
   return CallInfo.first;
 }
 
-/// Generate a libcall taking the given operands as arguments
-/// and returning a result of type RetVT.
-SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
-                                            const SDValue *Ops, unsigned NumOps,
-                                            bool isSigned, const SDLoc &dl) {
-  TargetLowering::ArgListTy Args;
-  Args.reserve(NumOps);
-
-  TargetLowering::ArgListEntry Entry;
-  for (unsigned i = 0; i != NumOps; ++i) {
-    Entry.Node = Ops[i];
-    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
-    Entry.IsSExt = isSigned;
-    Entry.IsZExt = !isSigned;
-    Args.push_back(Entry);
-  }
-  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
-                                         TLI.getPointerTy(DAG.getDataLayout()));
-
-  Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
-
-  TargetLowering::CallLoweringInfo CLI(DAG);
-  CLI.setDebugLoc(dl)
-      .setChain(DAG.getEntryNode())
-      .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
-                    std::move(Args))
-      .setSExtResult(isSigned)
-      .setZExtResult(!isSigned)
-      .setIsPostTypeLegalization(true);
-
-  std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
-
-  return CallInfo.first;
-}
-
 // Expand a node into a call to a libcall. Similar to
 // ExpandLibCall except that the first operand is the in-chain.
 std::pair<SDValue, SDValue>
@@ -2160,6 +2159,27 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
   return ExpandLibCall(LC, Node, isSigned);
 }
 
+/// Expand the node to a libcall based on first argument type (for instance
+/// lround and its variant).
+SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
+                                                 RTLIB::Libcall Call_F32,
+                                                 RTLIB::Libcall Call_F64,
+                                                 RTLIB::Libcall Call_F80,
+                                                 RTLIB::Libcall Call_F128,
+                                                 RTLIB::Libcall Call_PPCF128) {
+  RTLIB::Libcall LC;
+  switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unexpected request for libcall!");
+  case MVT::f32:     LC = Call_F32; break;
+  case MVT::f64:     LC = Call_F64; break;
+  case MVT::f80:     LC = Call_F80; break;
+  case MVT::f128:    LC = Call_F128; break;
+  case MVT::ppcf128: LC = Call_PPCF128; break;
+  }
+
+  return ExpandLibCall(LC, Node, false);
+}
+
 /// Issue libcalls to __{u}divmod to compute div / rem pairs.
 void
 SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
@@ -2530,16 +2550,12 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
   // TODO: We can easily support i4/i2 legal types if any target ever does.
   if (Sz >= 8 && isPowerOf2_32(Sz)) {
     // Create the masks - repeating the pattern every byte.
-    APInt MaskHi4(Sz, 0), MaskHi2(Sz, 0), MaskHi1(Sz, 0);
-    APInt MaskLo4(Sz, 0), MaskLo2(Sz, 0), MaskLo1(Sz, 0);
-    for (unsigned J = 0; J != Sz; J += 8) {
-      MaskHi4 = MaskHi4 | (0xF0ull << J);
-      MaskLo4 = MaskLo4 | (0x0Full << J);
-      MaskHi2 = MaskHi2 | (0xCCull << J);
-      MaskLo2 = MaskLo2 | (0x33ull << J);
-      MaskHi1 = MaskHi1 | (0xAAull << J);
-      MaskLo1 = MaskLo1 | (0x55ull << J);
-    }
+    APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0));
+    APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC));
+    APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA));
+    APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F));
+    APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33));
+    APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55));
 
     // BSWAP if the type is wider than a single byte.
     Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
@@ -2593,9 +2609,8 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) {
   switch (VT.getSimpleVT().getScalarType().SimpleTy) {
   default: llvm_unreachable("Unhandled Expand type in BSWAP!");
   case MVT::i16:
-    Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
-    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
-    return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+    // Use a rotate by 8. This can be further expanded if necessary.
+    return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
   case MVT::i32:
     Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
     Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
@@ -2799,12 +2814,27 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     }
     break;
   }
+  case ISD::STRICT_FP_ROUND:
+    Tmp1 = EmitStackConvert(Node->getOperand(1), 
+                            Node->getValueType(0),
+                            Node->getValueType(0), dl, Node->getOperand(0));
+    ReplaceNode(Node, Tmp1.getNode());
+    LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_ROUND node\n");
+    return true;
   case ISD::FP_ROUND:
   case ISD::BITCAST:
-    Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
+    Tmp1 = EmitStackConvert(Node->getOperand(0), 
+                            Node->getValueType(0),
                             Node->getValueType(0), dl);
     Results.push_back(Tmp1);
     break;
+  case ISD::STRICT_FP_EXTEND:
+    Tmp1 = EmitStackConvert(Node->getOperand(1),
+                            Node->getOperand(1).getValueType(),
+                            Node->getValueType(0), dl, Node->getOperand(0));
+    ReplaceNode(Node, Tmp1.getNode());
+    LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_EXTEND node\n");
+    return true;
   case ISD::FP_EXTEND:
     Tmp1 = EmitStackConvert(Node->getOperand(0),
                             Node->getOperand(0).getValueType(),
@@ -2875,6 +2905,30 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     if (TLI.expandFP_TO_UINT(Node, Tmp1, DAG))
       Results.push_back(Tmp1);
     break;
+  case ISD::LROUND:
+    Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
+                                         RTLIB::LROUND_F64, RTLIB::LROUND_F80,
+                                         RTLIB::LROUND_F128,
+                                         RTLIB::LROUND_PPCF128));
+    break;
+  case ISD::LLROUND:
+    Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,
+                                         RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,
+                                         RTLIB::LLROUND_F128,
+                                         RTLIB::LLROUND_PPCF128));
+    break;
+  case ISD::LRINT:
+    Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,
+                                         RTLIB::LRINT_F64, RTLIB::LRINT_F80,
+                                         RTLIB::LRINT_F128,
+                                         RTLIB::LRINT_PPCF128));
+    break;
+  case ISD::LLRINT:
+    Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,
+                                         RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,
+                                         RTLIB::LLRINT_F128,
+                                         RTLIB::LLRINT_PPCF128));
+    break;
   case ISD::VAARG:
     Results.push_back(DAG.expandVAArg(Node));
     Results.push_back(Results[0].getValue(1));
@@ -3117,7 +3171,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
     // Check to see if this FP immediate is already legal.
     // If this is a legal constant, turn it into a TargetConstantFP node.
-    if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
+    if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0),
+                          DAG.getMachineFunction().getFunction().hasOptSize()))
       Results.push_back(ExpandConstantFP(CFP, true));
     break;
   }
@@ -3291,176 +3346,75 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(TLI.expandAddSubSat(Node, DAG));
     break;
   case ISD::SMULFIX:
-    Results.push_back(TLI.getExpandedFixedPointMultiplication(Node, DAG));
+  case ISD::SMULFIXSAT:
+  case ISD::UMULFIX:
+    Results.push_back(TLI.expandFixedPointMul(Node, DAG));
     break;
-  case ISD::SADDO:
-  case ISD::SSUBO: {
+  case ISD::ADDCARRY:
+  case ISD::SUBCARRY: {
     SDValue LHS = Node->getOperand(0);
     SDValue RHS = Node->getOperand(1);
-    SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
-                              ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
-                              LHS, RHS);
-    Results.push_back(Sum);
-    EVT ResultType = Node->getValueType(1);
-    EVT OType = getSetCCResultType(Node->getValueType(0));
-
-    SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
-
-    //   LHSSign -> LHS >= 0
-    //   RHSSign -> RHS >= 0
-    //   SumSign -> Sum >= 0
-    //
-    //   Add:
-    //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
-    //   Sub:
-    //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
-    SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
-    SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
-    SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
-                                      Node->getOpcode() == ISD::SADDO ?
-                                      ISD::SETEQ : ISD::SETNE);
-
-    SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
-    SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
-
-    SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
-    Results.push_back(DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType));
-    break;
-  }
-  case ISD::UADDO:
-  case ISD::USUBO: {
-    SDValue LHS = Node->getOperand(0);
-    SDValue RHS = Node->getOperand(1);
-    bool IsAdd = Node->getOpcode() == ISD::UADDO;
-    // If ADD/SUBCARRY is legal, use that instead.
-    unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
-    if (TLI.isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
-      SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
-      SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
-                                      { LHS, RHS, CarryIn });
-      Results.push_back(SDValue(NodeCarry.getNode(), 0));
-      Results.push_back(SDValue(NodeCarry.getNode(), 1));
-      break;
-    }
+    SDValue Carry = Node->getOperand(2);
+
+    bool IsAdd = Node->getOpcode() == ISD::ADDCARRY;
 
-    SDValue Sum = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
-                              LHS.getValueType(), LHS, RHS);
-    Results.push_back(Sum);
+    // Initial add of the 2 operands.
+    unsigned Op = IsAdd ? ISD::ADD : ISD::SUB;
+    EVT VT = LHS.getValueType();
+    SDValue Sum = DAG.getNode(Op, dl, VT, LHS, RHS);
 
-    EVT ResultType = Node->getValueType(1);
+    // Initial check for overflow.
+    EVT CarryType = Node->getValueType(1);
     EVT SetCCType = getSetCCResultType(Node->getValueType(0));
     ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
-    SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC);
-
-    Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType));
+    SDValue Overflow = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC);
+
+    // Add of the sum and the carry.
+    SDValue CarryExt =
+        DAG.getZeroExtendInReg(DAG.getZExtOrTrunc(Carry, dl, VT), dl, MVT::i1);
+    SDValue Sum2 = DAG.getNode(Op, dl, VT, Sum, CarryExt);
+
+    // Second check for overflow. If we are adding, we can only overflow if the
+    // initial sum is all 1s ang the carry is set, resulting in a new sum of 0.
+    // If we are subtracting, we can only overflow if the initial sum is 0 and
+    // the carry is set, resulting in a new sum of all 1s.
+    SDValue Zero = DAG.getConstant(0, dl, VT);
+    SDValue Overflow2 =
+        IsAdd ? DAG.getSetCC(dl, SetCCType, Sum2, Zero, ISD::SETEQ)
+              : DAG.getSetCC(dl, SetCCType, Sum, Zero, ISD::SETEQ);
+    Overflow2 = DAG.getNode(ISD::AND, dl, SetCCType, Overflow2,
+                            DAG.getZExtOrTrunc(Carry, dl, SetCCType));
+
+    SDValue ResultCarry =
+        DAG.getNode(ISD::OR, dl, SetCCType, Overflow, Overflow2);
+
+    Results.push_back(Sum2);
+    Results.push_back(DAG.getBoolExtOrTrunc(ResultCarry, dl, CarryType, VT));
+    break;
+  }
+  case ISD::SADDO:
+  case ISD::SSUBO: {
+    SDValue Result, Overflow;
+    TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
+    Results.push_back(Result);
+    Results.push_back(Overflow);
+    break;
+  }
+  case ISD::UADDO:
+  case ISD::USUBO: {
+    SDValue Result, Overflow;
+    TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
+    Results.push_back(Result);
+    Results.push_back(Overflow);
     break;
   }
   case ISD::UMULO:
   case ISD::SMULO: {
-    EVT VT = Node->getValueType(0);
-    EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
-    SDValue LHS = Node->getOperand(0);
-    SDValue RHS = Node->getOperand(1);
-    SDValue BottomHalf;
-    SDValue TopHalf;
-    static const unsigned Ops[2][3] =
-        { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
-          { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
-    bool isSigned = Node->getOpcode() == ISD::SMULO;
-    if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
-      BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
-      TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
-    } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
-      BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
-                               RHS);
-      TopHalf = BottomHalf.getValue(1);
-    } else if (TLI.isTypeLegal(WideVT)) {
-      LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
-      RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
-      Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
-      BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
-                               DAG.getIntPtrConstant(0, dl));
-      TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
-                            DAG.getIntPtrConstant(1, dl));
-    } else {
-      // We can fall back to a libcall with an illegal type for the MUL if we
-      // have a libcall big enough.
-      // Also, we can fall back to a division in some cases, but that's a big
-      // performance hit in the general case.
-      RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
-      if (WideVT == MVT::i16)
-        LC = RTLIB::MUL_I16;
-      else if (WideVT == MVT::i32)
-        LC = RTLIB::MUL_I32;
-      else if (WideVT == MVT::i64)
-        LC = RTLIB::MUL_I64;
-      else if (WideVT == MVT::i128)
-        LC = RTLIB::MUL_I128;
-      assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
-
-      SDValue HiLHS;
-      SDValue HiRHS;
-      if (isSigned) {
-        // The high part is obtained by SRA'ing all but one of the bits of low
-        // part.
-        unsigned LoSize = VT.getSizeInBits();
-        HiLHS =
-            DAG.getNode(ISD::SRA, dl, VT, LHS,
-                        DAG.getConstant(LoSize - 1, dl,
-                                        TLI.getPointerTy(DAG.getDataLayout())));
-        HiRHS =
-            DAG.getNode(ISD::SRA, dl, VT, RHS,
-                        DAG.getConstant(LoSize - 1, dl,
-                                        TLI.getPointerTy(DAG.getDataLayout())));
-      } else {
-          HiLHS = DAG.getConstant(0, dl, VT);
-          HiRHS = DAG.getConstant(0, dl, VT);
-      }
-
-      // Here we're passing the 2 arguments explicitly as 4 arguments that are
-      // pre-lowered to the correct types. This all depends upon WideVT not
-      // being a legal type for the architecture and thus has to be split to
-      // two arguments.
-      SDValue Ret;
-      if(DAG.getDataLayout().isLittleEndian()) {
-        // Halves of WideVT are packed into registers in different order
-        // depending on platform endianness. This is usually handled by
-        // the C calling convention, but we can't defer to it in
-        // the legalizer.
-        SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
-        Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
-      } else {
-        SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
-        Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
-      }
-      assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
-             "Ret value is a collection of constituent nodes holding result.");
-      BottomHalf = Ret.getOperand(0);
-      TopHalf = Ret.getOperand(1);
+    SDValue Result, Overflow;
+    if (TLI.expandMULO(Node, Result, Overflow, DAG)) {
+      Results.push_back(Result);
+      Results.push_back(Overflow);
     }
-
-    if (isSigned) {
-      Tmp1 = DAG.getConstant(
-          VT.getSizeInBits() - 1, dl,
-          TLI.getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
-      Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
-      TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, Tmp1,
-                             ISD::SETNE);
-    } else {
-      TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf,
-                             DAG.getConstant(0, dl, VT), ISD::SETNE);
-    }
-
-    // Truncate the result if SetCC returns a larger type than needed.
-    EVT RType = Node->getValueType(1);
-    if (RType.getSizeInBits() < TopHalf.getValueSizeInBits())
-      TopHalf = DAG.getNode(ISD::TRUNCATE, dl, RType, TopHalf);
-
-    assert(RType.getSizeInBits() == TopHalf.getValueSizeInBits() &&
-           "Unexpected result type for S/UMULO legalization");
-
-    Results.push_back(BottomHalf);
-    Results.push_back(TopHalf);
     break;
   }
   case ISD::BUILD_PAIR: {
@@ -3487,6 +3441,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                              DAG.getConstant(0, dl, Tmp1.getValueType()),
                              Tmp2, Tmp3, ISD::SETNE);
     }
+    Tmp1->setFlags(Node->getFlags());
     Results.push_back(Tmp1);
     break;
   case ISD::BR_JT: {
@@ -3570,7 +3525,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       // condition code, create a new SETCC node.
       if (Tmp3.getNode())
         Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
-                           Tmp1, Tmp2, Tmp3);
+                           Tmp1, Tmp2, Tmp3, Node->getFlags());
 
       // If we expanded the SETCC by inverting the condition code, then wrap
       // the existing SETCC in a NOT to restore the intended condition.
@@ -3598,6 +3553,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                        DAG.getConstant(TrueValue, dl, VT),
                        DAG.getConstant(0, dl, VT),
                        Tmp3);
+    Tmp1->setFlags(Node->getFlags());
     Results.push_back(Tmp1);
     break;
   }
@@ -3617,9 +3573,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       assert(!TLI.isOperationExpand(ISD::SELECT, VT) &&
              "Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be "
              "expanded.");
-      EVT CCVT =
-          TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
-      SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC);
+      EVT CCVT = getSetCCResultType(CmpVT);
+      SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC, Node->getFlags());
       Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4));
       break;
     }
@@ -3635,6 +3590,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       // Use the new condition code and swap true and false
       Legalized = true;
       Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC);
+      Tmp1->setFlags(Node->getFlags());
     } else {
       // If The inverse is not legal, then try to swap the arguments using
       // the inverse condition code.
@@ -3644,6 +3600,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
         // lhs and rhs.
         Legalized = true;
         Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC);
+        Tmp1->setFlags(Node->getFlags());
       }
     }
 
@@ -3670,6 +3627,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
         Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1,
                            Tmp2, Tmp3, Tmp4, CC);
       }
+      Tmp1->setFlags(Node->getFlags());
     }
     Results.push_back(Tmp1);
     break;
@@ -3729,6 +3687,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     ReplaceNode(SDValue(Node, 0), Result);
     break;
   }
+  case ISD::VECREDUCE_FADD:
+  case ISD::VECREDUCE_FMUL:
+  case ISD::VECREDUCE_ADD:
+  case ISD::VECREDUCE_MUL:
+  case ISD::VECREDUCE_AND:
+  case ISD::VECREDUCE_OR:
+  case ISD::VECREDUCE_XOR:
+  case ISD::VECREDUCE_SMAX:
+  case ISD::VECREDUCE_SMIN:
+  case ISD::VECREDUCE_UMAX:
+  case ISD::VECREDUCE_UMIN:
+  case ISD::VECREDUCE_FMAX:
+  case ISD::VECREDUCE_FMIN:
+    Results.push_back(TLI.expandVecReduce(Node, DAG));
+    break;
   case ISD::GLOBAL_OFFSET_TABLE:
   case ISD::GlobalAddress:
   case ISD::GlobalTLSAddress:
@@ -4273,6 +4246,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
     // Perform the larger operation, then round down.
     Tmp1 = DAG.getSelect(dl, NVT, Tmp1, Tmp2, Tmp3);
+    Tmp1->setFlags(Node->getFlags());
     if (TruncOp != ISD::FP_ROUND)
       Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);
     else
@@ -4303,8 +4277,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     }
     Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
     Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
-    Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
-                                  Tmp1, Tmp2, Node->getOperand(2)));
+    Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), Tmp1,
+                                  Tmp2, Node->getOperand(2), Node->getFlags()));
     break;
   }
   case ISD::BR_CC: {
@@ -4532,6 +4506,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     Results.push_back(CvtVec);
     break;
   }
+  case ISD::ATOMIC_SWAP: {
+    AtomicSDNode *AM = cast<AtomicSDNode>(Node);
+    SDLoc SL(Node);
+    SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NVT, AM->getVal());
+    assert(NVT.getSizeInBits() == OVT.getSizeInBits() &&
+           "unexpected promotion type");
+    assert(AM->getMemoryVT().getSizeInBits() == NVT.getSizeInBits() &&
+           "unexpected atomic_swap with illegal type");
+
+    SDValue NewAtomic
+      = DAG.getAtomic(ISD::ATOMIC_SWAP, SL, NVT,
+                      DAG.getVTList(NVT, MVT::Other),
+                      { AM->getChain(), AM->getBasePtr(), CastVal },
+                      AM->getMemOperand());
+    Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewAtomic));
+    Results.push_back(NewAtomic.getValue(1));
+    break;
+  }
   }
 
   // Replace the original node with the legalized result.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 4644e9588e7b..b4849b2881e6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1,9 +1,8 @@
 //===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -104,6 +103,7 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::FSUB:        R = SoftenFloatRes_FSUB(N); break;
     case ISD::FTRUNC:      R = SoftenFloatRes_FTRUNC(N); break;
     case ISD::LOAD:        R = SoftenFloatRes_LOAD(N, ResNo); break;
+    case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
     case ISD::SELECT:      R = SoftenFloatRes_SELECT(N, ResNo); break;
     case ISD::SELECT_CC:   R = SoftenFloatRes_SELECT_CC(N, ResNo); break;
     case ISD::SINT_TO_FP:
@@ -440,6 +440,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) {
     return SDValue(N, ResNo);
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDLoc dl(N);
+
+  EVT FloatVT = N->getValueType(ResNo);
+  if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) {
+    // Expand Y = FNEG(X) -> Y = X ^ sign mask
+    APInt SignMask = APInt::getSignMask(NVT.getSizeInBits());
+    return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)),
+                       DAG.getConstant(SignMask, dl, NVT));
+  }
+
   // Expand Y = FNEG(X) -> Y = SUB -0.0, X
   SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)),
                      GetSoftenedFloat(N->getOperand(0)) };
@@ -763,6 +772,10 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
   case ISD::FP_ROUND:    Res = SoftenFloatOp_FP_ROUND(N); break;
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:  Res = SoftenFloatOp_FP_TO_XINT(N); break;
+  case ISD::LROUND:      Res = SoftenFloatOp_LROUND(N); break;
+  case ISD::LLROUND:     Res = SoftenFloatOp_LLROUND(N); break;
+  case ISD::LRINT:       Res = SoftenFloatOp_LRINT(N); break;
+  case ISD::LLRINT:      Res = SoftenFloatOp_LLRINT(N); break;
   case ISD::SELECT:      Res = SoftenFloatOp_SELECT(N); break;
   case ISD::SELECT_CC:   Res = SoftenFloatOp_SELECT_CC(N); break;
   case ISD::SETCC:       Res = SoftenFloatOp_SETCC(N); break;
@@ -1029,6 +1042,61 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
                       ST->getMemOperand());
 }
 
+SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+                                           RTLIB::LROUND_F32,
+                                           RTLIB::LROUND_F64,
+                                           RTLIB::LROUND_F80,
+                                           RTLIB::LROUND_F128,
+                                           RTLIB::LROUND_PPCF128),
+                         NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+                                           RTLIB::LLROUND_F32,
+                                           RTLIB::LLROUND_F64,
+                                           RTLIB::LLROUND_F80,
+                                           RTLIB::LLROUND_F128,
+                                           RTLIB::LLROUND_PPCF128),
+                         NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+                                           RTLIB::LRINT_F32,
+                                           RTLIB::LRINT_F64,
+                                           RTLIB::LRINT_F80,
+                                           RTLIB::LRINT_F128,
+                                           RTLIB::LRINT_PPCF128),
+                         NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+                                           RTLIB::LLRINT_F32,
+                                           RTLIB::LLRINT_F64,
+                                           RTLIB::LLRINT_F80,
+                                           RTLIB::LLRINT_F128,
+                                           RTLIB::LLRINT_PPCF128),
+                         NVT, Op, false, SDLoc(N)).first;
+}
 
 //===----------------------------------------------------------------------===//
 //  Float Result Expansion
@@ -1562,6 +1630,10 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
   case ISD::FP_ROUND:   Res = ExpandFloatOp_FP_ROUND(N); break;
   case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
   case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+  case ISD::LROUND:     Res = ExpandFloatOp_LROUND(N); break;
+  case ISD::LLROUND:    Res = ExpandFloatOp_LLROUND(N); break;
+  case ISD::LRINT:      Res = ExpandFloatOp_LRINT(N); break;
+  case ISD::LLRINT:     Res = ExpandFloatOp_LLRINT(N); break;
   case ISD::SELECT_CC:  Res = ExpandFloatOp_SELECT_CC(N); break;
   case ISD::SETCC:      Res = ExpandFloatOp_SETCC(N); break;
   case ISD::STORE:      Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
@@ -1732,6 +1804,54 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
                            ST->getMemoryVT(), ST->getMemOperand());
 }
 
+SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+                                           RTLIB::LROUND_F32,
+                                           RTLIB::LROUND_F64,
+                                           RTLIB::LROUND_F80,
+                                           RTLIB::LROUND_F128,
+                                           RTLIB::LROUND_PPCF128),
+                         RVT, N->getOperand(0), false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+                                           RTLIB::LLROUND_F32,
+                                           RTLIB::LLROUND_F64,
+                                           RTLIB::LLROUND_F80,
+                                           RTLIB::LLROUND_F128,
+                                           RTLIB::LLROUND_PPCF128),
+                         RVT, N->getOperand(0), false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+                                           RTLIB::LRINT_F32,
+                                           RTLIB::LRINT_F64,
+                                           RTLIB::LRINT_F80,
+                                           RTLIB::LRINT_F128,
+                                           RTLIB::LRINT_PPCF128),
+                         RVT, N->getOperand(0), false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+                                           RTLIB::LLRINT_F32,
+                                           RTLIB::LLRINT_F64,
+                                           RTLIB::LLRINT_F80,
+                                           RTLIB::LLRINT_F128,
+                                           RTLIB::LLRINT_PPCF128),
+                         RVT, N->getOperand(0), false, SDLoc(N)).first;
+}
+
 //===----------------------------------------------------------------------===//
 //  Float Operand Promotion
 //===----------------------------------------------------------------------===//
@@ -1748,6 +1868,8 @@ static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) {
 }
 
 bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
+  LLVM_DEBUG(dbgs() << "Promote float operand " << OpNo << ": "; N->dump(&DAG);
+             dbgs() << "\n");
   SDValue R = SDValue();
 
   if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
@@ -1762,6 +1884,10 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
   // a part of PromoteFloatResult.
   switch (N->getOpcode()) {
     default:
+  #ifndef NDEBUG
+      dbgs() << "PromoteFloatOperand Op #" << OpNo << ": ";
+      N->dump(&DAG); dbgs() << "\n";
+  #endif
       llvm_unreachable("Do not know how to promote this operator's operand!");
 
     case ISD::BITCAST:    R = PromoteFloatOp_BITCAST(N, OpNo); break;
@@ -1872,6 +1998,8 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) {
 //===----------------------------------------------------------------------===//
 
 void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
+  LLVM_DEBUG(dbgs() << "Promote float result " << ResNo << ": "; N->dump(&DAG);
+             dbgs() << "\n");
   SDValue R = SDValue();
 
   switch (N->getOpcode()) {
@@ -1880,6 +2008,10 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::FP16_TO_FP:
     case ISD::FP_TO_FP16:
     default:
+#ifndef NDEBUG
+      dbgs() << "PromoteFloatResult #" << ResNo << ": ";
+      N->dump(&DAG); dbgs() << "\n";
+#endif
       llvm_unreachable("Do not know how to promote this operator's result!");
 
     case ISD::BITCAST:    R = PromoteFloatRes_BITCAST(N); break;
@@ -1932,7 +2064,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::SINT_TO_FP:
     case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break;
     case ISD::UNDEF:      R = PromoteFloatRes_UNDEF(N); break;
-
+    case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
   }
 
   if (R.getNode())
@@ -2166,3 +2298,29 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {
                                                N->getValueType(0)));
 }
 
+SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+
+  AtomicSDNode *AM = cast<AtomicSDNode>(N);
+  SDLoc SL(N);
+
+  SDValue CastVal = BitConvertToInteger(AM->getVal());
+  EVT CastVT = CastVal.getValueType();
+
+  SDValue NewAtomic
+    = DAG.getAtomic(ISD::ATOMIC_SWAP, SL, CastVT,
+                    DAG.getVTList(CastVT, MVT::Other),
+                    { AM->getChain(), AM->getBasePtr(), CastVal },
+                    AM->getMemOperand());
+
+  SDValue ResultCast = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT,
+                                   NewAtomic);
+  // Legalize the chain result by replacing uses of the old value chain with the
+  // new one
+  ReplaceValueWith(SDValue(N, 1), NewAtomic.getValue(1));
+
+  return ResultCast;
+
+}
+
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 5fbc70fce60d..15ac45c37c66 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1,9 +1,8 @@
 //===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -149,7 +148,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::UADDSAT:
   case ISD::SSUBSAT:
   case ISD::USUBSAT:     Res = PromoteIntRes_ADDSUBSAT(N); break;
-  case ISD::SMULFIX:     Res = PromoteIntRes_SMULFIX(N); break;
+  case ISD::SMULFIX:
+  case ISD::SMULFIXSAT:
+  case ISD::UMULFIX:     Res = PromoteIntRes_MULFIX(N); break;
+  case ISD::ABS:         Res = PromoteIntRes_ABS(N); break;
 
   case ISD::ATOMIC_LOAD:
     Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break;
@@ -172,6 +174,18 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
     Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo);
     break;
+
+  case ISD::VECREDUCE_ADD:
+  case ISD::VECREDUCE_MUL:
+  case ISD::VECREDUCE_AND:
+  case ISD::VECREDUCE_OR:
+  case ISD::VECREDUCE_XOR:
+  case ISD::VECREDUCE_SMAX:
+  case ISD::VECREDUCE_SMIN:
+  case ISD::VECREDUCE_UMAX:
+  case ISD::VECREDUCE_UMIN:
+    Res = PromoteIntRes_VECREDUCE(N);
+    break;
   }
 
   // If the result is null then the sub-method took care of registering it.
@@ -293,21 +307,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
                          BitConvertToInteger(GetScalarizedVector(InOp)));
     break;
   case TargetLowering::TypeSplitVector: {
-    // For example, i32 = BITCAST v2i16 on alpha.  Convert the split
-    // pieces of the input into integers and reassemble in the final type.
-    SDValue Lo, Hi;
-    GetSplitVector(N->getOperand(0), Lo, Hi);
-    Lo = BitConvertToInteger(Lo);
-    Hi = BitConvertToInteger(Hi);
-
-    if (DAG.getDataLayout().isBigEndian())
-      std::swap(Lo, Hi);
-
-    InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
-                       EVT::getIntegerVT(*DAG.getContext(),
-                                         NOutVT.getSizeInBits()),
-                       JoinIntegers(Lo, Hi));
-    return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
+    if (!NOutVT.isVector()) {
+      // For example, i32 = BITCAST v2i16 on alpha.  Convert the split
+      // pieces of the input into integers and reassemble in the final type.
+      SDValue Lo, Hi;
+      GetSplitVector(N->getOperand(0), Lo, Hi);
+      Lo = BitConvertToInteger(Lo);
+      Hi = BitConvertToInteger(Hi);
+
+      if (DAG.getDataLayout().isBigEndian())
+        std::swap(Lo, Hi);
+
+      InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
+                         EVT::getIntegerVT(*DAG.getContext(),
+                                           NOutVT.getSizeInBits()),
+                         JoinIntegers(Lo, Hi));
+      return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
+    }
+    break;
   }
   case TargetLowering::TypeWidenVector:
     // The input is widened to the same size. Convert to the widened value.
@@ -555,7 +572,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
   SDLoc dl(N);
   SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
                                   N->getMask(), ExtPassThru, N->getMemoryVT(),
-                                  N->getMemOperand(), ISD::SEXTLOAD);
+                                  N->getMemOperand(), ISD::EXTLOAD);
   // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -582,23 +599,27 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
 
 /// Promote the overflow flag of an overflowing arithmetic node.
 SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
-  // Simply change the return type of the boolean result.
+  // Change the return type of the boolean result while obeying
+  // getSetCCResultType.
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
-  EVT ValueVTs[] = { N->getValueType(0), NVT };
+  EVT VT = N->getValueType(0);
+  EVT SVT = getSetCCResultType(VT);
   SDValue Ops[3] = { N->getOperand(0), N->getOperand(1) };
   unsigned NumOps = N->getNumOperands();
   assert(NumOps <= 3 && "Too many operands");
   if (NumOps == 3)
     Ops[2] = N->getOperand(2);
 
-  SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N),
-                            DAG.getVTList(ValueVTs), makeArrayRef(Ops, NumOps));
+  SDLoc dl(N);
+  SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(VT, SVT),
+                            makeArrayRef(Ops, NumOps));
 
   // Modified the sum result - switch anything that used the old sum to use
   // the new one.
   ReplaceValueWith(SDValue(N, 0), Res);
 
-  return SDValue(Res.getNode(), 1);
+  // Convert to the expected type.
+  return DAG.getBoolExtOrTrunc(Res.getValue(1), dl, NVT, VT);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
@@ -646,12 +667,39 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
   return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
 }
 
-SDValue DAGTypeLegalizer::PromoteIntRes_SMULFIX(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
   // Can just promote the operands then continue with operation.
   SDLoc dl(N);
-  SDValue Op1Promoted = SExtPromotedInteger(N->getOperand(0));
-  SDValue Op2Promoted = SExtPromotedInteger(N->getOperand(1));
+  SDValue Op1Promoted, Op2Promoted;
+  bool Signed =
+      N->getOpcode() == ISD::SMULFIX || N->getOpcode() == ISD::SMULFIXSAT;
+  if (Signed) {
+    Op1Promoted = SExtPromotedInteger(N->getOperand(0));
+    Op2Promoted = SExtPromotedInteger(N->getOperand(1));
+  } else {
+    Op1Promoted = ZExtPromotedInteger(N->getOperand(0));
+    Op2Promoted = ZExtPromotedInteger(N->getOperand(1));
+  }
+  EVT OldType = N->getOperand(0).getValueType();
   EVT PromotedType = Op1Promoted.getValueType();
+  unsigned DiffSize =
+      PromotedType.getScalarSizeInBits() - OldType.getScalarSizeInBits();
+
+  bool Saturating = N->getOpcode() == ISD::SMULFIXSAT;
+  if (Saturating) {
+    // Promoting the operand and result values changes the saturation width,
+    // which is extends the values that we clamp to on saturation. This could be
+    // resolved by shifting one of the operands the same amount, which would
+    // also shift the result we compare against, then shifting back.
+    EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
+    Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
+                              DAG.getConstant(DiffSize, dl, ShiftTy));
+    SDValue Result = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
+                                 Op2Promoted, N->getOperand(2));
+    unsigned ShiftOp = Signed ? ISD::SRA : ISD::SRL;
+    return DAG.getNode(ShiftOp, dl, PromotedType, Result,
+                       DAG.getConstant(DiffSize, dl, ShiftTy));
+  }
   return DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted,
                      N->getOperand(2));
 }
@@ -875,7 +923,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
 
   // Calculate the overflow flag: zero extend the arithmetic result from
   // the original type.
-  SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT);
+  SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT.getScalarType());
   // Overflowed if and only if this is not equal to Res.
   Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
 
@@ -917,6 +965,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
   return SDValue(Res.getNode(), 0);
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) {
+  SDValue Op0 = SExtPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0);
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
   // Promote the overflow bit trivially.
   if (ResNo == 1)
@@ -946,9 +999,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
   SDValue Overflow;
   if (N->getOpcode() == ISD::UMULO) {
     // Unsigned overflow occurred if the high part is non-zero.
+    unsigned Shift = SmallVT.getScalarSizeInBits();
+    EVT ShiftTy = getShiftAmountTyForConstant(Shift, Mul.getValueType(),
+                                              TLI, DAG);
     SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
-                             DAG.getIntPtrConstant(SmallVT.getSizeInBits(),
-                                                   DL));
+                             DAG.getConstant(Shift, DL, ShiftTy));
     Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
                             DAG.getConstant(0, DL, Hi.getValueType()),
                             ISD::SETNE);
@@ -1091,7 +1146,21 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
 
   case ISD::PREFETCH: Res = PromoteIntOp_PREFETCH(N, OpNo); break;
 
-  case ISD::SMULFIX: Res = PromoteIntOp_SMULFIX(N); break;
+  case ISD::SMULFIX:
+  case ISD::SMULFIXSAT:
+  case ISD::UMULFIX: Res = PromoteIntOp_MULFIX(N); break;
+
+  case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;
+
+  case ISD::VECREDUCE_ADD:
+  case ISD::VECREDUCE_MUL:
+  case ISD::VECREDUCE_AND:
+  case ISD::VECREDUCE_OR:
+  case ISD::VECREDUCE_XOR:
+  case ISD::VECREDUCE_SMAX:
+  case ISD::VECREDUCE_SMIN:
+  case ISD::VECREDUCE_UMAX:
+  case ISD::VECREDUCE_UMIN: Res = PromoteIntOp_VECREDUCE(N); break;
   }
 
   // If the result is null, the sub-method took care of registering results etc.
@@ -1434,24 +1503,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
   SDValue Carry = N->getOperand(2);
   SDLoc DL(N);
 
-  auto VT = getSetCCResultType(LHS.getValueType());
-  TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(VT);
-  switch (BoolType) {
-  case TargetLoweringBase::UndefinedBooleanContent:
-    Carry = DAG.getAnyExtOrTrunc(Carry, DL, VT);
-    break;
-  case TargetLoweringBase::ZeroOrOneBooleanContent:
-    Carry = DAG.getZExtOrTrunc(Carry, DL, VT);
-    break;
-  case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
-    Carry = DAG.getSExtOrTrunc(Carry, DL, VT);
-    break;
-  }
+  Carry = PromoteTargetBoolean(Carry, LHS.getValueType());
 
   return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0);
 }
 
-SDValue DAGTypeLegalizer::PromoteIntOp_SMULFIX(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntOp_MULFIX(SDNode *N) {
   SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
   return SDValue(
       DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0);
@@ -1475,6 +1532,44 @@ SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) {
                  0);
 }
 
+SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) {
+  SDValue Op = SExtPromotedInteger(N->getOperand(1));
+  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
+  SDLoc dl(N);
+  SDValue Op;
+  switch (N->getOpcode()) {
+  default: llvm_unreachable("Expected integer vector reduction");
+  case ISD::VECREDUCE_ADD:
+  case ISD::VECREDUCE_MUL:
+  case ISD::VECREDUCE_AND:
+  case ISD::VECREDUCE_OR:
+  case ISD::VECREDUCE_XOR:
+    Op = GetPromotedInteger(N->getOperand(0));
+    break;
+  case ISD::VECREDUCE_SMAX:
+  case ISD::VECREDUCE_SMIN:
+    Op = SExtPromotedInteger(N->getOperand(0));
+    break;
+  case ISD::VECREDUCE_UMAX:
+  case ISD::VECREDUCE_UMIN:
+    Op = ZExtPromotedInteger(N->getOperand(0));
+    break;
+  }
+
+  EVT EltVT = Op.getValueType().getVectorElementType();
+  EVT VT = N->getValueType(0);
+  if (VT.bitsGE(EltVT))
+    return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op);
+
+  // Result size must be >= element size. If this is not the case after
+  // promotion, also promote the result type and then truncate.
+  SDValue Reduce = DAG.getNode(N->getOpcode(), dl, EltVT, Op);
+  return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce);
+}
+
 //===----------------------------------------------------------------------===//
 //  Integer Result Expansion
 //===----------------------------------------------------------------------===//
@@ -1499,7 +1594,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
     dbgs() << "ExpandIntegerResult #" << ResNo << ": ";
     N->dump(&DAG); dbgs() << "\n";
 #endif
-    llvm_unreachable("Do not know how to expand the result of this operator!");
+    report_fatal_error("Do not know how to expand the result of this "
+                       "operator!");
 
   case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
   case ISD::SELECT:       SplitRes_SELECT(N, Lo, Hi); break;
@@ -1518,6 +1614,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::BITREVERSE:  ExpandIntRes_BITREVERSE(N, Lo, Hi); break;
   case ISD::BSWAP:       ExpandIntRes_BSWAP(N, Lo, Hi); break;
   case ISD::Constant:    ExpandIntRes_Constant(N, Lo, Hi); break;
+  case ISD::ABS:         ExpandIntRes_ABS(N, Lo, Hi); break;
   case ISD::CTLZ_ZERO_UNDEF:
   case ISD::CTLZ:        ExpandIntRes_CTLZ(N, Lo, Hi); break;
   case ISD::CTPOP:       ExpandIntRes_CTPOP(N, Lo, Hi); break;
@@ -1526,6 +1623,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break;
   case ISD::FP_TO_SINT:  ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
   case ISD::FP_TO_UINT:  ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
+  case ISD::LLROUND:     ExpandIntRes_LLROUND(N, Lo, Hi); break;
+  case ISD::LLRINT:      ExpandIntRes_LLRINT(N, Lo, Hi); break;
   case ISD::LOAD:        ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
   case ISD::MUL:         ExpandIntRes_MUL(N, Lo, Hi); break;
   case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
@@ -1613,7 +1712,20 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::UADDSAT:
   case ISD::SSUBSAT:
   case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break;
-  case ISD::SMULFIX: ExpandIntRes_SMULFIX(N, Lo, Hi); break;
+
+  case ISD::SMULFIX:
+  case ISD::SMULFIXSAT:
+  case ISD::UMULFIX: ExpandIntRes_MULFIX(N, Lo, Hi); break;
+
+  case ISD::VECREDUCE_ADD:
+  case ISD::VECREDUCE_MUL:
+  case ISD::VECREDUCE_AND:
+  case ISD::VECREDUCE_OR:
+  case ISD::VECREDUCE_XOR:
+  case ISD::VECREDUCE_SMAX:
+  case ISD::VECREDUCE_SMIN:
+  case ISD::VECREDUCE_UMAX:
+  case ISD::VECREDUCE_UMIN: ExpandIntRes_VECREDUCE(N, Lo, Hi); break;
   }
 
   // If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -2267,6 +2379,25 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
                        IsOpaque);
 }
 
+void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  SDLoc dl(N);
+
+  // abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo)
+  EVT VT = N->getValueType(0);
+  SDValue N0 = N->getOperand(0);
+  SDValue Neg = DAG.getNode(ISD::SUB, dl, VT,
+                            DAG.getConstant(0, dl, VT), N0);
+  SDValue NegLo, NegHi;
+  SplitInteger(Neg, NegLo, NegHi);
+
+  GetExpandedInteger(N0, Lo, Hi);
+  EVT NVT = Lo.getValueType();
+  SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT),
+                                 DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT);
+  Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo);
+  Hi = DAG.getSelect(dl, NVT, HiIsNeg, NegHi, Hi);
+}
+
 void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
   SDLoc dl(N);
@@ -2361,6 +2492,58 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
                Lo, Hi);
 }
 
+void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo,
+                                            SDValue &Hi) {
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  if (VT == MVT::f32)
+    LC = RTLIB::LLROUND_F32;
+  else if (VT == MVT::f64)
+    LC = RTLIB::LLROUND_F64;
+  else if (VT == MVT::f80)
+    LC = RTLIB::LLROUND_F80;
+  else if (VT == MVT::f128)
+    LC = RTLIB::LLROUND_F128;
+  else if (VT == MVT::ppcf128)
+    LC = RTLIB::LLROUND_PPCF128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!");
+
+  SDValue Op = N->getOperand(0);
+  if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
+    Op = GetPromotedFloat(Op);
+
+  SDLoc dl(N);
+  EVT RetVT = N->getValueType(0);
+  SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first,
+               Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_LLRINT(SDNode *N, SDValue &Lo,
+                                            SDValue &Hi) {
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  if (VT == MVT::f32)
+    LC = RTLIB::LLRINT_F32;
+  else if (VT == MVT::f64)
+    LC = RTLIB::LLRINT_F64;
+  else if (VT == MVT::f80)
+    LC = RTLIB::LLRINT_F80;
+  else if (VT == MVT::f128)
+    LC = RTLIB::LLRINT_F128;
+  else if (VT == MVT::ppcf128)
+    LC = RTLIB::LLRINT_PPCF128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!");
+
+  SDValue Op = N->getOperand(0);
+  if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
+    Op = GetPromotedFloat(Op);
+
+  SDLoc dl(N);
+  EVT RetVT = N->getValueType(0);
+  SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first,
+               Lo, Hi);
+}
+
 void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
   if (ISD::isNormalLoad(N)) {
@@ -2581,15 +2764,39 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo,
   SplitInteger(Result, Lo, Hi);
 }
 
-void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
-                                            SDValue &Hi) {
+/// This performs an expansion of the integer result for a fixed point
+/// multiplication. The default expansion performs rounding down towards
+/// negative infinity, though targets that do care about rounding should specify
+/// a target hook for rounding and provide their own expansion or lowering of
+/// fixed point multiplication to be consistent with rounding.
+void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
   SDLoc dl(N);
   EVT VT = N->getValueType(0);
+  unsigned VTSize = VT.getScalarSizeInBits();
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
   uint64_t Scale = N->getConstantOperandVal(2);
+  bool Saturating = N->getOpcode() == ISD::SMULFIXSAT;
+  EVT BoolVT = getSetCCResultType(VT);
+  SDValue Zero = DAG.getConstant(0, dl, VT);
   if (!Scale) {
-    SDValue Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+    SDValue Result;
+    if (!Saturating) {
+      Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+    } else {
+      Result = DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+      SDValue Product = Result.getValue(0);
+      SDValue Overflow = Result.getValue(1);
+
+      APInt MinVal = APInt::getSignedMinValue(VTSize);
+      APInt MaxVal = APInt::getSignedMaxValue(VTSize);
+      SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+      SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+      SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
+      Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+      Result = DAG.getSelect(dl, VT, Overflow, Result, Product);
+    }
     SplitInteger(Result, Lo, Hi);
     return;
   }
@@ -2600,15 +2807,19 @@ void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
   GetExpandedInteger(RHS, RL, RH);
   SmallVector<SDValue, 4> Result;
 
-  if (!TLI.expandMUL_LOHI(ISD::SMUL_LOHI, VT, dl, LHS, RHS, Result, NVT, DAG,
+  bool Signed = (N->getOpcode() == ISD::SMULFIX ||
+                 N->getOpcode() == ISD::SMULFIXSAT);
+  unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
+  if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG,
                           TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
                           LL, LH, RL, RH)) {
-    report_fatal_error("Unable to expand SMUL_FIX using SMUL_LOHI.");
+    report_fatal_error("Unable to expand MUL_FIX using MUL_LOHI.");
     return;
   }
 
-  unsigned VTSize = VT.getScalarSizeInBits();
   unsigned NVTSize = NVT.getScalarSizeInBits();
+  assert((VTSize == NVTSize * 2) && "Expected the new value type to be half "
+                                    "the size of the current value type");
   EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
 
   // Shift whole amount by scale.
@@ -2617,6 +2828,11 @@ void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
   SDValue ResultHL = Result[2];
   SDValue ResultHH = Result[3];
 
+  SDValue SatMax, SatMin;
+  SDValue NVTZero = DAG.getConstant(0, dl, NVT);
+  SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT);
+  EVT BoolNVT = getSetCCResultType(NVT);
+
   // After getting the multplication result in 4 parts, we need to perform a
   // shift right by the amount of the scale to get the result in that scale.
   // Let's say we multiply 2 64 bit numbers. The resulting value can be held in
@@ -2645,11 +2861,60 @@ void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
     Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);
     Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
                      DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));
+
+    // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the
+    // highest bit of HH determines saturation direction in the event of
+    // saturation.
+    // The number of overflow bits we can check are VTSize - Scale + 1 (we
+    // include the sign bit). If these top bits are > 0, then we overflowed past
+    // the max value. If these top bits are < -1, then we overflowed past the
+    // min value. Otherwise, we did not overflow.
+    if (Saturating) {
+      unsigned OverflowBits = VTSize - Scale + 1;
+      assert(OverflowBits <= VTSize && OverflowBits > NVTSize &&
+             "Extent of overflow bits must start within HL");
+      SDValue HLHiMask = DAG.getConstant(
+          APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT);
+      SDValue HLLoMask = DAG.getConstant(
+          APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT);
+
+      // HH > 0 or HH == 0 && HL > HLLoMask
+      SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
+      SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
+      SDValue HLPos =
+          DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT);
+      SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos,
+                           DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLPos));
+
+      // HH < -1 or HH == -1 && HL < HLHiMask
+      SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
+      SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
+      SDValue HLNeg =
+          DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT);
+      SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg,
+                           DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLNeg));
+    }
   } else if (Scale == NVTSize) {
     // If the scales are equal, Lo and Hi are ResultLH and Result HL,
     // respectively. Avoid shifting to prevent undefined behavior.
     Lo = ResultLH;
     Hi = ResultHL;
+
+    // We overflow max if HH > 0 or HH == 0 && HL sign bit is 1.
+    // We overflow min if HH < -1 or HH == -1 && HL sign bit is 0.
+    if (Saturating) {
+      SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
+      SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
+      SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT);
+      SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos,
+                           DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLNeg));
+
+      SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
+      SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
+      SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE);
+      SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg,
+                           DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLPos));
+    }
   } else if (Scale < VTSize) {
     // If the scale is instead less than the old VT size, but greater than or
     // equal to the expanded VT size, the first part of the result (ResultLL) is
@@ -2664,9 +2929,39 @@ void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
     Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, SRLAmnt);
     Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
                      DAG.getNode(ISD::SHL, dl, NVT, ResultHH, SHLAmnt));
+
+    // This is similar to the case when we saturate if Scale < NVTSize, but we
+    // only need to chech HH.
+    if (Saturating) {
+      unsigned OverflowBits = VTSize - Scale + 1;
+      SDValue HHHiMask = DAG.getConstant(
+          APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT);
+      SDValue HHLoMask = DAG.getConstant(
+          APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT);
+
+      SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT);
+      SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT);
+    }
+  } else if (Scale == VTSize) {
+    assert(
+        !Signed &&
+        "Only unsigned types can have a scale equal to the operand bit width");
+
+    Lo = ResultHL;
+    Hi = ResultHH;
   } else {
-    llvm_unreachable(
-        "Expected the scale to be less than the width of the operands");
+    llvm_unreachable("Expected the scale to be less than or equal to the width "
+                     "of the operands");
+  }
+
+  if (Saturating) {
+    APInt LHMax = APInt::getSignedMaxValue(NVTSize);
+    APInt LLMax = APInt::getAllOnesValue(NVTSize);
+    APInt LHMin = APInt::getSignedMinValue(NVTSize);
+    Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LHMax, dl, NVT), Hi);
+    Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(LHMin, dl, NVT), Hi);
+    Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LLMax, dl, NVT), Lo);
+    Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo);
   }
 }
 
@@ -2765,11 +3060,15 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
   }
 
   // Next check to see if the target supports this SHL_PARTS operation or if it
-  // will custom expand it.
+  // will custom expand it. Don't lower this to SHL_PARTS when we optimise for
+  // size, but create a libcall instead.
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);
-  if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
-      Action == TargetLowering::Custom) {
+  const bool LegalOrCustom =
+    (Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+    Action == TargetLowering::Custom;
+
+  if (LegalOrCustom && TLI.shouldExpandShift(DAG, N)) {
     // Expand the subcomponents.
     SDValue LHSL, LHSH;
     GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
@@ -3145,6 +3444,14 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
   ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
 }
 
+void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N,
+                                              SDValue &Lo, SDValue &Hi) {
+  // TODO For VECREDUCE_(AND|OR|XOR) we could split the vector and calculate
+  // both halves independently.
+  SDValue Res = TLI.expandVecReduce(N, DAG);
+  SplitInteger(Res, Lo, Hi);
+}
+
 //===----------------------------------------------------------------------===//
 //  Integer Operand Expansion
 //===----------------------------------------------------------------------===//
@@ -3167,7 +3474,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
     dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
     N->dump(&DAG); dbgs() << "\n";
   #endif
-    llvm_unreachable("Do not know how to expand this operator's operand!");
+    report_fatal_error("Do not know how to expand this operator's operand!");
 
   case ISD::BITCAST:           Res = ExpandOp_BITCAST(N); break;
   case ISD::BR_CC:             Res = ExpandIntOp_BR_CC(N); break;
@@ -3632,8 +3939,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
 
 
 SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
-  SDValue InOp0 = N->getOperand(0);
-  EVT InVT = InOp0.getValueType();
 
   EVT OutVT = N->getValueType(0);
   EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
@@ -3644,6 +3949,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
   SDLoc dl(N);
   SDValue BaseIdx = N->getOperand(1);
 
+  SDValue InOp0 = N->getOperand(0);
+  if (getTypeAction(InOp0.getValueType()) == TargetLowering::TypePromoteInteger)
+    InOp0 = GetPromotedInteger(N->getOperand(0));
+
+  EVT InVT = InOp0.getValueType();
+
   SmallVector<SDValue, 8> Ops;
   Ops.reserve(OutNumElems);
   for (unsigned i = 0; i != OutNumElems; ++i) {
@@ -3654,7 +3965,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
     SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
       InVT.getVectorElementType(), N->getOperand(0), Index);
 
-    SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, Ext);
+    SDValue Op = DAG.getAnyExtOrTrunc(Ext, dl, NOutVTElem);
     // Insert the converted element to the new vector.
     Ops.push_back(Op);
   }
@@ -3809,6 +4120,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
     V0, ConvElem, N->getOperand(2));
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_VECREDUCE(SDNode *N) {
+  // The VECREDUCE result size may be larger than the element size, so
+  // we can simply change the result type.
+  SDLoc dl(N);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
 SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
   SDLoc dl(N);
   SDValue V0 = GetPromotedInteger(N->getOperand(0));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index a9f144c06e9a..14fd5be23ccb 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -1,9 +1,8 @@
 //===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -708,6 +707,7 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
   auto &OpIdEntry = PromotedIntegers[getTableId(Op)];
   assert((OpIdEntry == 0) && "Node is already promoted!");
   OpIdEntry = getTableId(Result);
+  Result->setFlags(Op->getFlags());
 
   DAG.transferDbgValues(Op, Result);
 }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 032000f6cb79..1d489b1b3a33 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1,9 +1,8 @@
 //===-- LegalizeTypes.h - DAG Type Legalizer class definition ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -345,8 +344,10 @@ private:
   SDValue PromoteIntRes_VAARG(SDNode *N);
   SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
   SDValue PromoteIntRes_ADDSUBSAT(SDNode *N);
-  SDValue PromoteIntRes_SMULFIX(SDNode *N);
+  SDValue PromoteIntRes_MULFIX(SDNode *N);
   SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
+  SDValue PromoteIntRes_VECREDUCE(SDNode *N);
+  SDValue PromoteIntRes_ABS(SDNode *N);
 
   // Integer Operand Promotion.
   bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
@@ -379,7 +380,9 @@ private:
   SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N);
   SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo);
-  SDValue PromoteIntOp_SMULFIX(SDNode *N);
+  SDValue PromoteIntOp_MULFIX(SDNode *N);
+  SDValue PromoteIntOp_FPOWI(SDNode *N);
+  SDValue PromoteIntOp_VECREDUCE(SDNode *N);
 
   void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
 
@@ -402,6 +405,7 @@ private:
   void ExpandIntRes_AssertSext        (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_AssertZext        (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_Constant          (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_ABS               (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_CTLZ              (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_CTPOP             (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_CTTZ              (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -414,6 +418,8 @@ private:
   void ExpandIntRes_FLT_ROUNDS        (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_FP_TO_SINT        (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_FP_TO_UINT        (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_LLROUND           (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_LLRINT            (SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void ExpandIntRes_Logical           (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_ADDSUB            (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -435,9 +441,10 @@ private:
   void ExpandIntRes_UADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_XMULO             (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_ADDSUBSAT         (SDNode *N, SDValue &Lo, SDValue &Hi);
-  void ExpandIntRes_SMULFIX           (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_MULFIX            (SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void ExpandIntRes_ATOMIC_LOAD       (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_VECREDUCE         (SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void ExpandShiftByConstant(SDNode *N, const APInt &Amt,
                              SDValue &Lo, SDValue &Hi);
@@ -548,6 +555,10 @@ private:
   SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
   SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
   SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
+  SDValue SoftenFloatOp_LROUND(SDNode *N);
+  SDValue SoftenFloatOp_LLROUND(SDNode *N);
+  SDValue SoftenFloatOp_LRINT(SDNode *N);
+  SDValue SoftenFloatOp_LLRINT(SDNode *N);
   SDValue SoftenFloatOp_SELECT(SDNode *N);
   SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
   SDValue SoftenFloatOp_SETCC(SDNode *N);
@@ -607,6 +618,10 @@ private:
   SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
   SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
   SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
+  SDValue ExpandFloatOp_LROUND(SDNode *N);
+  SDValue ExpandFloatOp_LLROUND(SDNode *N);
+  SDValue ExpandFloatOp_LRINT(SDNode *N);
+  SDValue ExpandFloatOp_LLRINT(SDNode *N);
   SDValue ExpandFloatOp_SELECT_CC(SDNode *N);
   SDValue ExpandFloatOp_SETCC(SDNode *N);
   SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
@@ -640,6 +655,7 @@ private:
   SDValue PromoteFloatRes_SELECT_CC(SDNode *N);
   SDValue PromoteFloatRes_UnaryOp(SDNode *N);
   SDValue PromoteFloatRes_UNDEF(SDNode *N);
+  SDValue BitcastToInt_ATOMIC_SWAP(SDNode *N);
   SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N);
 
   bool PromoteFloatOperand(SDNode *N, unsigned OpNo);
@@ -673,6 +689,7 @@ private:
   SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
   SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
   SDValue ScalarizeVecRes_StrictFPOp(SDNode *N);
+  SDValue ScalarizeVecRes_OverflowOp(SDNode *N, unsigned ResNo);
   SDValue ScalarizeVecRes_InregOp(SDNode *N);
   SDValue ScalarizeVecRes_VecInregOp(SDNode *N);
 
@@ -680,6 +697,7 @@ private:
   SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
   SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
+  SDValue ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N);
   SDValue ScalarizeVecRes_FPOWI(SDNode *N);
   SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
   SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
@@ -691,7 +709,7 @@ private:
   SDValue ScalarizeVecRes_UNDEF(SDNode *N);
   SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
 
-  SDValue ScalarizeVecRes_SMULFIX(SDNode *N);
+  SDValue ScalarizeVecRes_MULFIX(SDNode *N);
 
   // Vector Operand Scalarization: <1 x ty> -> ty.
   bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
@@ -703,6 +721,8 @@ private:
   SDValue ScalarizeVecOp_VSETCC(SDNode *N);
   SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
   SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);
+  SDValue ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, unsigned OpNo);
+  SDValue ScalarizeVecOp_VECREDUCE(SDNode *N);
 
   //===--------------------------------------------------------------------===//
   // Vector Splitting Support: LegalizeVectorTypes.cpp
@@ -727,8 +747,10 @@ private:
   void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
+                              SDValue &Lo, SDValue &Hi);
 
-  void SplitVecRes_SMULFIX(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -745,6 +767,7 @@ private:
   void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
                                   SDValue &Hi);
+  void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi);
 
   // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
   bool SplitVectorOperand(SDNode *N, unsigned OpNo);
@@ -808,7 +831,9 @@ private:
   SDValue WidenVecRes_Binary(SDNode *N);
   SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
   SDValue WidenVecRes_StrictFP(SDNode *N);
+  SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo);
   SDValue WidenVecRes_Convert(SDNode *N);
+  SDValue WidenVecRes_Convert_StrictFP(SDNode *N);
   SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
   SDValue WidenVecRes_POWI(SDNode *N);
   SDValue WidenVecRes_Shift(SDNode *N);
@@ -827,9 +852,16 @@ private:
   SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo);
   SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
   SDValue WidenVecOp_SETCC(SDNode* N);
+  SDValue WidenVecOp_VSELECT(SDNode *N);
 
   SDValue WidenVecOp_Convert(SDNode *N);
   SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
+  SDValue WidenVecOp_VECREDUCE(SDNode *N);
+
+  /// Helper function to generate a set of operations to perform
+  /// a vector operation for a wider type.
+  ///
+  SDValue UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE);
 
   //===--------------------------------------------------------------------===//
   // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index b9d370441c3e..943f63f46c47 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -1,9 +1,8 @@
 //===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 4923a529c21b..10b8b705869e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1,9 +1,8 @@
 //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -141,7 +140,11 @@ class VectorLegalizer {
   SDValue ExpandFunnelShift(SDValue Op);
   SDValue ExpandROT(SDValue Op);
   SDValue ExpandFMINNUM_FMAXNUM(SDValue Op);
+  SDValue ExpandUADDSUBO(SDValue Op);
+  SDValue ExpandSADDSUBO(SDValue Op);
+  SDValue ExpandMULO(SDValue Op);
   SDValue ExpandAddSubSat(SDValue Op);
+  SDValue ExpandFixedPointMul(SDValue Op);
   SDValue ExpandStrictFPOp(SDValue Op);
 
   /// Implements vector promotion.
@@ -263,7 +266,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
         LLVM_FALLTHROUGH;
       case TargetLowering::Expand:
         Changed = true;
-        return LegalizeOp(ExpandLoad(Op));
+        return ExpandLoad(Op);
       }
     }
   } else if (Op.getOpcode() == ISD::STORE) {
@@ -288,17 +291,18 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
       }
       case TargetLowering::Expand:
         Changed = true;
-        return LegalizeOp(ExpandStore(Op));
+        return ExpandStore(Op);
       }
     }
   }
 
-  bool HasVectorValue = false;
-  for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
-       J != E;
-       ++J)
-    HasVectorValue |= J->isVector();
-  if (!HasVectorValue)
+  bool HasVectorValueOrOp = false;
+  for (auto J = Node->value_begin(), E = Node->value_end(); J != E; ++J)
+    HasVectorValueOrOp |= J->isVector();
+  for (const SDValue &Op : Node->op_values())
+    HasVectorValueOrOp |= Op.getValueType().isVector();
+
+  if (!HasVectorValueOrOp)
     return TranslateLegalizeResults(Op, Result);
 
   TargetLowering::LegalizeAction Action = TargetLowering::Legal;
@@ -329,6 +333,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::STRICT_FFLOOR:
   case ISD::STRICT_FROUND:
   case ISD::STRICT_FTRUNC:
+  case ISD::STRICT_FP_ROUND:
+  case ISD::STRICT_FP_EXTEND:
     // These pseudo-ops get legalized as if they were their non-strict
     // equivalent.  For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
     // is also legal, but if ISD::FSQRT requires expansion then so does
@@ -418,6 +424,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::UMAX:
   case ISD::SMUL_LOHI:
   case ISD::UMUL_LOHI:
+  case ISD::SADDO:
+  case ISD::UADDO:
+  case ISD::SSUBO:
+  case ISD::USUBO:
+  case ISD::SMULO:
+  case ISD::UMULO:
   case ISD::FCANONICALIZE:
   case ISD::SADDSAT:
   case ISD::UADDSAT:
@@ -425,7 +437,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::USUBSAT:
     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
     break;
-  case ISD::SMULFIX: {
+  case ISD::SMULFIX:
+  case ISD::SMULFIXSAT:
+  case ISD::UMULFIX: {
     unsigned Scale = Node->getConstantOperandVal(2);
     Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
                                               Node->getValueType(0), Scale);
@@ -437,6 +451,19 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
     break;
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:
+  case ISD::VECREDUCE_ADD:
+  case ISD::VECREDUCE_MUL:
+  case ISD::VECREDUCE_AND:
+  case ISD::VECREDUCE_OR:
+  case ISD::VECREDUCE_XOR:
+  case ISD::VECREDUCE_SMAX:
+  case ISD::VECREDUCE_SMIN:
+  case ISD::VECREDUCE_UMAX:
+  case ISD::VECREDUCE_UMIN:
+  case ISD::VECREDUCE_FADD:
+  case ISD::VECREDUCE_FMUL:
+  case ISD::VECREDUCE_FMAX:
+  case ISD::VECREDUCE_FMIN:
     Action = TLI.getOperationAction(Node->getOpcode(),
                                     Node->getOperand(0).getValueType());
     break;
@@ -650,23 +677,21 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
       LoadChains.push_back(ScalarLoad.getValue(1));
     }
 
-    // Extract bits, pack and extend/trunc them into destination type.
-    unsigned SrcEltBits = SrcEltVT.getSizeInBits();
-    SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT);
-
     unsigned BitOffset = 0;
     unsigned WideIdx = 0;
     unsigned WideBits = WideVT.getSizeInBits();
 
+    // Extract bits, pack and extend/trunc them into destination type.
+    unsigned SrcEltBits = SrcEltVT.getSizeInBits();
+    SDValue SrcEltBitMask = DAG.getConstant(
+        APInt::getLowBitsSet(WideBits, SrcEltBits), dl, WideVT);
+
     for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
-      SDValue Lo, Hi, ShAmt;
+      assert(BitOffset < WideBits && "Unexpected offset!");
 
-      if (BitOffset < WideBits) {
-        ShAmt = DAG.getConstant(
-            BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
-        Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
-        Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
-      }
+      SDValue ShAmt = DAG.getConstant(
+          BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
+      SDValue Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
 
       BitOffset += SrcEltBits;
       if (BitOffset >= WideBits) {
@@ -676,13 +701,13 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
           ShAmt = DAG.getConstant(
               SrcEltBits - BitOffset, dl,
               TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
-          Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
-          Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
+          SDValue Hi =
+              DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
+          Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
         }
       }
 
-      if (Hi.getNode())
-        Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
+      Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
 
       switch (ExtType) {
       default: llvm_unreachable("Unknown extended-load op!");
@@ -778,11 +803,23 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
   case ISD::FMINNUM:
   case ISD::FMAXNUM:
     return ExpandFMINNUM_FMAXNUM(Op);
+  case ISD::UADDO:
+  case ISD::USUBO:
+    return ExpandUADDSUBO(Op);
+  case ISD::SADDO:
+  case ISD::SSUBO:
+    return ExpandSADDSUBO(Op);
+  case ISD::UMULO:
+  case ISD::SMULO:
+    return ExpandMULO(Op);
   case ISD::USUBSAT:
   case ISD::SSUBSAT:
   case ISD::UADDSAT:
   case ISD::SADDSAT:
     return ExpandAddSubSat(Op);
+  case ISD::SMULFIX:
+  case ISD::UMULFIX:
+    return ExpandFixedPointMul(Op);
   case ISD::STRICT_FADD:
   case ISD::STRICT_FSUB:
   case ISD::STRICT_FMUL:
@@ -808,6 +845,20 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
   case ISD::STRICT_FROUND:
   case ISD::STRICT_FTRUNC:
     return ExpandStrictFPOp(Op);
+  case ISD::VECREDUCE_ADD:
+  case ISD::VECREDUCE_MUL:
+  case ISD::VECREDUCE_AND:
+  case ISD::VECREDUCE_OR:
+  case ISD::VECREDUCE_XOR:
+  case ISD::VECREDUCE_SMAX:
+  case ISD::VECREDUCE_SMIN:
+  case ISD::VECREDUCE_UMAX:
+  case ISD::VECREDUCE_UMIN:
+  case ISD::VECREDUCE_FADD:
+  case ISD::VECREDUCE_FMUL:
+  case ISD::VECREDUCE_FMAX:
+  case ISD::VECREDUCE_FMIN:
+    return TLI.expandVecReduce(Op.getNode(), DAG);
   default:
     return DAG.UnrollVectorOp(Op.getNode());
   }
@@ -898,6 +949,19 @@ SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
   EVT SrcVT = Src.getValueType();
   int NumSrcElements = SrcVT.getVectorNumElements();
 
+  // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
+  // into a larger vector type.
+  if (SrcVT.bitsLE(VT)) {
+    assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
+           "ANY_EXTEND_VECTOR_INREG vector size mismatch");
+    NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
+    SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
+                             NumSrcElements);
+    Src = DAG.getNode(
+        ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src,
+        DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+  }
+
   // Build a base mask of undef shuffles.
   SmallVector<int, 16> ShuffleMask;
   ShuffleMask.resize(NumSrcElements, -1);
@@ -945,6 +1009,19 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
   EVT SrcVT = Src.getValueType();
   int NumSrcElements = SrcVT.getVectorNumElements();
 
+  // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
+  // into a larger vector type.
+  if (SrcVT.bitsLE(VT)) {
+    assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
+           "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
+    NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
+    SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
+                             NumSrcElements);
+    Src = DAG.getNode(
+        ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src,
+        DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+  }
+
   // Build up a zero vector to blend into this one.
   SDValue Zero = DAG.getConstant(0, DL, SrcVT);
 
@@ -1212,12 +1289,58 @@ SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) {
   return DAG.UnrollVectorOp(Op.getNode());
 }
 
+SDValue VectorLegalizer::ExpandUADDSUBO(SDValue Op) {
+  SDValue Result, Overflow;
+  TLI.expandUADDSUBO(Op.getNode(), Result, Overflow, DAG);
+
+  if (Op.getResNo() == 0) {
+    AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
+    return Result;
+  } else {
+    AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
+    return Overflow;
+  }
+}
+
+SDValue VectorLegalizer::ExpandSADDSUBO(SDValue Op) {
+  SDValue Result, Overflow;
+  TLI.expandSADDSUBO(Op.getNode(), Result, Overflow, DAG);
+
+  if (Op.getResNo() == 0) {
+    AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
+    return Result;
+  } else {
+    AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
+    return Overflow;
+  }
+}
+
+SDValue VectorLegalizer::ExpandMULO(SDValue Op) {
+  SDValue Result, Overflow;
+  if (!TLI.expandMULO(Op.getNode(), Result, Overflow, DAG))
+    std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Op.getNode());
+
+  if (Op.getResNo() == 0) {
+    AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
+    return Result;
+  } else {
+    AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
+    return Overflow;
+  }
+}
+
 SDValue VectorLegalizer::ExpandAddSubSat(SDValue Op) {
   if (SDValue Expanded = TLI.expandAddSubSat(Op.getNode(), DAG))
     return Expanded;
   return DAG.UnrollVectorOp(Op.getNode());
 }
 
+SDValue VectorLegalizer::ExpandFixedPointMul(SDValue Op) {
+  if (SDValue Expanded = TLI.expandFixedPointMul(Op.getNode(), DAG))
+    return Expanded;
+  return DAG.UnrollVectorOp(Op.getNode());
+}
+
 SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
   EVT VT = Op.getValueType();
   EVT EltVT = VT.getVectorElementType();
@@ -1245,7 +1368,7 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
 
       if (OperVT.isVector())
         Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                           EltVT, Oper, Idx);
+                           OperVT.getVectorElementType(), Oper, Idx);
 
       Opers.push_back(Oper);
     }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index f367e9358576..7e4d52617977 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1,9 +1,8 @@
 //===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -51,6 +50,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::BITCAST:           R = ScalarizeVecRes_BITCAST(N); break;
   case ISD::BUILD_VECTOR:      R = ScalarizeVecRes_BUILD_VECTOR(N); break;
   case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+  case ISD::STRICT_FP_ROUND:   R = ScalarizeVecRes_STRICT_FP_ROUND(N); break;
   case ISD::FP_ROUND:          R = ScalarizeVecRes_FP_ROUND(N); break;
   case ISD::FP_ROUND_INREG:    R = ScalarizeVecRes_InregOp(N); break;
   case ISD::FPOWI:             R = ScalarizeVecRes_FPOWI(N); break;
@@ -69,6 +69,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::ZERO_EXTEND_VECTOR_INREG:
     R = ScalarizeVecRes_VecInregOp(N);
     break;
+  case ISD::ABS:
   case ISD::ANY_EXTEND:
   case ISD::BITREVERSE:
   case ISD::BSWAP:
@@ -170,10 +171,21 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::STRICT_FFLOOR:
   case ISD::STRICT_FROUND:
   case ISD::STRICT_FTRUNC:
+  case ISD::STRICT_FP_EXTEND:
     R = ScalarizeVecRes_StrictFPOp(N);
     break;
+  case ISD::UADDO:
+  case ISD::SADDO:
+  case ISD::USUBO:
+  case ISD::SSUBO:
+  case ISD::UMULO:
+  case ISD::SMULO:
+    R = ScalarizeVecRes_OverflowOp(N, ResNo);
+    break;
   case ISD::SMULFIX:
-    R = ScalarizeVecRes_SMULFIX(N);
+  case ISD::SMULFIXSAT:
+  case ISD::UMULFIX:
+    R = ScalarizeVecRes_MULFIX(N);
     break;
   }
 
@@ -197,7 +209,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
                      Op0.getValueType(), Op0, Op1, Op2);
 }
 
-SDValue DAGTypeLegalizer::ScalarizeVecRes_SMULFIX(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecRes_MULFIX(SDNode *N) {
   SDValue Op0 = GetScalarizedVector(N->getOperand(0));
   SDValue Op1 = GetScalarizedVector(N->getOperand(1));
   SDValue Op2 = N->getOperand(2);
@@ -235,6 +247,43 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
   return Result;
 }
 
+SDValue DAGTypeLegalizer::ScalarizeVecRes_OverflowOp(SDNode *N,
+                                                     unsigned ResNo) {
+  SDLoc DL(N);
+  EVT ResVT = N->getValueType(0);
+  EVT OvVT = N->getValueType(1);
+
+  SDValue ScalarLHS, ScalarRHS;
+  if (getTypeAction(ResVT) == TargetLowering::TypeScalarizeVector) {
+    ScalarLHS = GetScalarizedVector(N->getOperand(0));
+    ScalarRHS = GetScalarizedVector(N->getOperand(1));
+  } else {
+    SmallVector<SDValue, 1> ElemsLHS, ElemsRHS;
+    DAG.ExtractVectorElements(N->getOperand(0), ElemsLHS);
+    DAG.ExtractVectorElements(N->getOperand(1), ElemsRHS);
+    ScalarLHS = ElemsLHS[0];
+    ScalarRHS = ElemsRHS[0];
+  }
+
+  SDVTList ScalarVTs = DAG.getVTList(
+      ResVT.getVectorElementType(), OvVT.getVectorElementType());
+  SDNode *ScalarNode = DAG.getNode(
+      N->getOpcode(), DL, ScalarVTs, ScalarLHS, ScalarRHS).getNode();
+
+  // Replace the other vector result not being explicitly scalarized here.
+  unsigned OtherNo = 1 - ResNo;
+  EVT OtherVT = N->getValueType(OtherNo);
+  if (getTypeAction(OtherVT) == TargetLowering::TypeScalarizeVector) {
+    SetScalarizedVector(SDValue(N, OtherNo), SDValue(ScalarNode, OtherNo));
+  } else {
+    SDValue OtherVal = DAG.getNode(
+        ISD::SCALAR_TO_VECTOR, DL, OtherVT, SDValue(ScalarNode, OtherNo));
+    ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+  }
+
+  return SDValue(ScalarNode, ResNo);
+}
+
 SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
                                                        unsigned ResNo) {
   SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
@@ -275,6 +324,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
                      NewVT, Op, N->getOperand(1));
 }
 
+SDValue DAGTypeLegalizer::ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N) {
+  EVT NewVT = N->getValueType(0).getVectorElementType();
+  SDValue Op = GetScalarizedVector(N->getOperand(1));
+  SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
+                            { NewVT, MVT::Other }, 
+                            { N->getOperand(0), Op, N->getOperand(2) });
+  // Legalize the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+  return Res;
+}
+
 SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
   SDValue Op = GetScalarizedVector(N->getOperand(0));
   return DAG.getNode(ISD::FPOWI, SDLoc(N),
@@ -558,9 +619,27 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::STORE:
       Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
       break;
+    case ISD::STRICT_FP_ROUND:
+      Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo);
+      break;
     case ISD::FP_ROUND:
       Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
       break;
+    case ISD::VECREDUCE_FADD:
+    case ISD::VECREDUCE_FMUL:
+    case ISD::VECREDUCE_ADD:
+    case ISD::VECREDUCE_MUL:
+    case ISD::VECREDUCE_AND:
+    case ISD::VECREDUCE_OR:
+    case ISD::VECREDUCE_XOR:
+    case ISD::VECREDUCE_SMAX:
+    case ISD::VECREDUCE_SMIN:
+    case ISD::VECREDUCE_UMAX:
+    case ISD::VECREDUCE_UMIN:
+    case ISD::VECREDUCE_FMAX:
+    case ISD::VECREDUCE_FMIN:
+      Res = ScalarizeVecOp_VECREDUCE(N);
+      break;
     }
   }
 
@@ -691,6 +770,28 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
   return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
 }
 
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, 
+                                                         unsigned OpNo) {
+  assert(OpNo == 1 && "Wrong operand for scalarization!");
+  SDValue Elt = GetScalarizedVector(N->getOperand(1));
+  SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
+                            { N->getValueType(0).getVectorElementType(), 
+                              MVT::Other },
+                            { N->getOperand(0), Elt, N->getOperand(2) });
+  // Legalize the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+  return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+} 
+
+SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) {
+  SDValue Res = GetScalarizedVector(N->getOperand(0));
+  // Result type may be wider than element type.
+  if (Res.getValueType() != N->getValueType(0))
+    Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Res);
+  return Res;
+}
+
 //===----------------------------------------------------------------------===//
 //  Result Vector Splitting
 //===----------------------------------------------------------------------===//
@@ -748,6 +849,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::VECTOR_SHUFFLE:
     SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
     break;
+  case ISD::VAARG:
+    SplitVecRes_VAARG(N, Lo, Hi);
+    break;
 
   case ISD::ANY_EXTEND_VECTOR_INREG:
   case ISD::SIGN_EXTEND_VECTOR_INREG:
@@ -755,6 +859,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
     SplitVecRes_ExtVecInRegOp(N, Lo, Hi);
     break;
 
+  case ISD::ABS:
   case ISD::BITREVERSE:
   case ISD::BSWAP:
   case ISD::CTLZ:
@@ -774,7 +879,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FNEARBYINT:
   case ISD::FNEG:
   case ISD::FP_EXTEND:
+  case ISD::STRICT_FP_EXTEND:
   case ISD::FP_ROUND:
+  case ISD::STRICT_FP_ROUND:
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
   case ISD::FRINT:
@@ -859,8 +966,18 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::STRICT_FTRUNC:
     SplitVecRes_StrictFPOp(N, Lo, Hi);
     break;
+  case ISD::UADDO:
+  case ISD::SADDO:
+  case ISD::USUBO:
+  case ISD::SSUBO:
+  case ISD::UMULO:
+  case ISD::SMULO:
+    SplitVecRes_OverflowOp(N, ResNo, Lo, Hi);
+    break;
   case ISD::SMULFIX:
-    SplitVecRes_SMULFIX(N, Lo, Hi);
+  case ISD::SMULFIXSAT:
+  case ISD::UMULFIX:
+    SplitVecRes_MULFIX(N, Lo, Hi);
     break;
   }
 
@@ -899,8 +1016,7 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
                    Op0Hi, Op1Hi, Op2Hi);
 }
 
-void DAGTypeLegalizer::SplitVecRes_SMULFIX(SDNode *N, SDValue &Lo,
-                                           SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
   SDValue LHSLo, LHSHi;
   GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
   SDValue RHSLo, RHSHi;
@@ -1205,6 +1321,104 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
   ReplaceValueWith(SDValue(N, 1), Chain);
 }
 
+SDValue DAGTypeLegalizer::UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE) {
+  SDValue Chain = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  unsigned NE = VT.getVectorNumElements();
+  EVT EltVT = VT.getVectorElementType();
+  SDLoc dl(N);
+
+  SmallVector<SDValue, 8> Scalars;
+  SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
+  // If ResNE is 0, fully unroll the vector op.
+  if (ResNE == 0)
+    ResNE = NE;
+  else if (NE > ResNE)
+    NE = ResNE;
+
+  //The results of each unrolled operation, including the chain.
+  EVT ChainVTs[] = {EltVT, MVT::Other};
+  SmallVector<SDValue, 8> Chains;
+
+  unsigned i;
+  for (i = 0; i != NE; ++i) {
+    Operands[0] = Chain;
+    for (unsigned j = 1, e = N->getNumOperands(); j != e; ++j) {
+      SDValue Operand = N->getOperand(j);
+      EVT OperandVT = Operand.getValueType();
+      if (OperandVT.isVector()) {
+        EVT OperandEltVT = OperandVT.getVectorElementType();
+        Operands[j] =
+            DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand,
+                    DAG.getConstant(i, dl, TLI.getVectorIdxTy(
+                          DAG.getDataLayout())));
+      } else {
+        Operands[j] = Operand;
+      }
+    }
+    SDValue Scalar = DAG.getNode(N->getOpcode(), dl, ChainVTs, Operands);
+    Scalar.getNode()->setFlags(N->getFlags());
+
+    //Add in the scalar as well as its chain value to the
+    //result vectors.
+    Scalars.push_back(Scalar);
+    Chains.push_back(Scalar.getValue(1));
+  }
+
+  for (; i < ResNE; ++i)
+    Scalars.push_back(DAG.getUNDEF(EltVT));
+
+  // Build a new factor node to connect the chain back together.
+  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+  ReplaceValueWith(SDValue(N, 1), Chain);
+
+  // Create a new BUILD_VECTOR node
+  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, ResNE);
+  return DAG.getBuildVector(VecVT, dl, Scalars);
+}
+
+void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
+                                              SDValue &Lo, SDValue &Hi) {
+  SDLoc dl(N);
+  EVT ResVT = N->getValueType(0);
+  EVT OvVT = N->getValueType(1);
+  EVT LoResVT, HiResVT, LoOvVT, HiOvVT;
+  std::tie(LoResVT, HiResVT) = DAG.GetSplitDestVTs(ResVT);
+  std::tie(LoOvVT, HiOvVT) = DAG.GetSplitDestVTs(OvVT);
+
+  SDValue LoLHS, HiLHS, LoRHS, HiRHS;
+  if (getTypeAction(ResVT) == TargetLowering::TypeSplitVector) {
+    GetSplitVector(N->getOperand(0), LoLHS, HiLHS);
+    GetSplitVector(N->getOperand(1), LoRHS, HiRHS);
+  } else {
+    std::tie(LoLHS, HiLHS) = DAG.SplitVectorOperand(N, 0);
+    std::tie(LoRHS, HiRHS) = DAG.SplitVectorOperand(N, 1);
+  }
+
+  unsigned Opcode = N->getOpcode();
+  SDVTList LoVTs = DAG.getVTList(LoResVT, LoOvVT);
+  SDVTList HiVTs = DAG.getVTList(HiResVT, HiOvVT);
+  SDNode *LoNode = DAG.getNode(Opcode, dl, LoVTs, LoLHS, LoRHS).getNode();
+  SDNode *HiNode = DAG.getNode(Opcode, dl, HiVTs, HiLHS, HiRHS).getNode();
+
+  Lo = SDValue(LoNode, ResNo);
+  Hi = SDValue(HiNode, ResNo);
+
+  // Replace the other vector result not being explicitly split here.
+  unsigned OtherNo = 1 - ResNo;
+  EVT OtherVT = N->getValueType(OtherNo);
+  if (getTypeAction(OtherVT) == TargetLowering::TypeSplitVector) {
+    SetSplitVector(SDValue(N, OtherNo),
+                   SDValue(LoNode, OtherNo), SDValue(HiNode, OtherNo));
+  } else {
+    SDValue OtherVal = DAG.getNode(
+        ISD::CONCAT_VECTORS, dl, OtherVT,
+        SDValue(LoNode, OtherNo), SDValue(HiNode, OtherNo));
+    ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+  }
+}
+
 void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
                                                      SDValue &Hi) {
   SDValue Vec = N->getOperand(0);
@@ -1344,12 +1558,6 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
   unsigned Alignment = MLD->getOriginalAlignment();
   ISD::LoadExtType ExtType = MLD->getExtensionType();
 
-  // if Alignment is equal to the vector size,
-  // take the half of it for the second part
-  unsigned SecondHalfAlignment =
-    (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
-     Alignment/2 : Alignment;
-
   // Split Mask operand
   SDValue MaskLo, MaskHi;
   if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
@@ -1381,7 +1589,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
 
   MMO = DAG.getMachineFunction().getMachineMemOperand(
       MLD->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOLoad,
-      HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(),
+      HiMemVT.getStoreSize(), Alignment, MLD->getAAInfo(),
       MLD->getRanges());
 
   Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, PassThruHi, HiMemVT, MMO,
@@ -1496,15 +1704,34 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
 
   // If the input also splits, handle it directly for a compile time speedup.
   // Otherwise split it by hand.
-  EVT InVT = N->getOperand(0).getValueType();
+  unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
+  EVT InVT = N->getOperand(OpNo).getValueType();
   if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
-    GetSplitVector(N->getOperand(0), Lo, Hi);
+    GetSplitVector(N->getOperand(OpNo), Lo, Hi);
   else
-    std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
+    std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, OpNo);
 
   if (N->getOpcode() == ISD::FP_ROUND) {
     Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
     Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
+  } else if (N->getOpcode() == ISD::STRICT_FP_ROUND) {
+    Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other }, 
+                     { N->getOperand(0), Lo, N->getOperand(2) });
+    Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other }, 
+                     { N->getOperand(0), Hi, N->getOperand(2) });
+    SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 
+                                   Lo.getValue(1), Hi.getValue(1));
+    ReplaceValueWith(SDValue(N, 1), NewChain);
+  } else if (N->isStrictFPOpcode()) {
+    Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other }, 
+                     { N->getOperand(0), Lo });
+    Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other }, 
+                     { N->getOperand(0), Hi });
+    // Legalize the chain result - switch anything that used the old chain to
+    // use the new one.
+    SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 
+                                   Lo.getValue(1), Hi.getValue(1));
+    ReplaceValueWith(SDValue(N, 1), NewChain);
   } else {
     Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
     Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
@@ -1669,6 +1896,26 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
   }
 }
 
+void DAGTypeLegalizer::SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  EVT OVT = N->getValueType(0);
+  EVT NVT = OVT.getHalfNumVectorElementsVT(*DAG.getContext());
+  SDValue Chain = N->getOperand(0);
+  SDValue Ptr = N->getOperand(1);
+  SDValue SV = N->getOperand(2);
+  SDLoc dl(N);
+
+  const unsigned Alignment = DAG.getDataLayout().getABITypeAlignment(
+      NVT.getTypeForEVT(*DAG.getContext()));
+
+  Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, SV, Alignment);
+  Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, SV, Alignment);
+  Chain = Hi.getValue(1);
+
+  // Modified the chain - switch anything that used the old chain to use
+  // the new one.
+  ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
 
 //===----------------------------------------------------------------------===//
 //  Operand Vector Splitting
@@ -1705,6 +1952,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::TRUNCATE:
       Res = SplitVecOp_TruncateHelper(N);
       break;
+    case ISD::STRICT_FP_ROUND:
     case ISD::FP_ROUND:          Res = SplitVecOp_FP_ROUND(N); break;
     case ISD::FCOPYSIGN:         Res = SplitVecOp_FCOPYSIGN(N); break;
     case ISD::STORE:
@@ -1734,6 +1982,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::CTTZ:
     case ISD::CTLZ:
     case ISD::CTPOP:
+    case ISD::STRICT_FP_EXTEND:
     case ISD::FP_EXTEND:
     case ISD::SIGN_EXTEND:
     case ISD::ZERO_EXTEND:
@@ -1775,7 +2024,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
   if (Res.getNode() == N)
     return true;
 
-  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+  if (N->isStrictFPOpcode())
+    assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 &&
+           "Invalid operand expansion");
+  else
+    assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
          "Invalid operand expansion");
 
   ReplaceValueWith(SDValue(N, 0), Res);
@@ -1863,14 +2116,30 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
   EVT ResVT = N->getValueType(0);
   SDValue Lo, Hi;
   SDLoc dl(N);
-  GetSplitVector(N->getOperand(0), Lo, Hi);
+  GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi);
   EVT InVT = Lo.getValueType();
 
   EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
                                InVT.getVectorNumElements());
 
-  Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
-  Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
+  if (N->isStrictFPOpcode()) {
+    Lo = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other }, 
+                     { N->getOperand(0), Lo });
+    Hi = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other }, 
+                     { N->getOperand(0), Hi });
+
+    // Build a factor node to remember that this operation is independent
+    // of the other one.
+    SDValue Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                             Hi.getValue(1));
+  
+    // Legalize the chain result - switch anything that used the old chain to
+    // use the new one.
+    ReplaceValueWith(SDValue(N, 1), Ch);
+  } else {
+    Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
+    Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
+  }
 
   return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
 }
@@ -1920,7 +2189,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
 
   if (isa<ConstantSDNode>(Idx)) {
     uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
-    assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!");
 
     SDValue Lo, Hi;
     GetSplitVector(Vec, Lo, Hi);
@@ -2079,12 +2347,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
   else
     std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
 
-  // if Alignment is equal to the vector size,
-  // take the half of it for the second part
-  unsigned SecondHalfAlignment =
-    (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
-       Alignment/2 : Alignment;
-
   SDValue Lo, Hi;
   MachineMemOperand *MMO = DAG.getMachineFunction().
     getMachineMemOperand(N->getPointerInfo(),
@@ -2101,7 +2363,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
 
   MMO = DAG.getMachineFunction().getMachineMemOperand(
       N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore,
-      HiMemVT.getStoreSize(), SecondHalfAlignment, N->getAAInfo(),
+      HiMemVT.getStoreSize(), Alignment, N->getAAInfo(),
       N->getRanges());
 
   Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
@@ -2343,14 +2605,26 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
   EVT ResVT = N->getValueType(0);
   SDValue Lo, Hi;
   SDLoc DL(N);
-  GetSplitVector(N->getOperand(0), Lo, Hi);
+  GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi);
   EVT InVT = Lo.getValueType();
 
   EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
                                InVT.getVectorNumElements());
 
-  Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
-  Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+  if (N->isStrictFPOpcode()) {
+    Lo = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other }, 
+                     { N->getOperand(0), Lo, N->getOperand(2) });
+    Hi = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other }, 
+                     { N->getOperand(0), Hi, N->getOperand(2) });
+    // Legalize the chain result - switch anything that used the old chain to
+    // use the new one.
+    SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 
+                                   Lo.getValue(1), Hi.getValue(1));
+    ReplaceValueWith(SDValue(N, 1), NewChain);
+  } else {
+    Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
+    Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+  }
 
   return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
 }
@@ -2472,6 +2746,15 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     Res = WidenVecRes_StrictFP(N);
     break;
 
+  case ISD::UADDO:
+  case ISD::SADDO:
+  case ISD::USUBO:
+  case ISD::SSUBO:
+  case ISD::UMULO:
+  case ISD::SMULO:
+    Res = WidenVecRes_OverflowOp(N, ResNo);
+    break;
+
   case ISD::FCOPYSIGN:
     Res = WidenVecRes_FCOPYSIGN(N);
     break;
@@ -2505,6 +2788,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     Res = WidenVecRes_Convert(N);
     break;
 
+  case ISD::STRICT_FP_EXTEND:
+  case ISD::STRICT_FP_ROUND:
+    Res = WidenVecRes_Convert_StrictFP(N);
+    break;
+
   case ISD::FABS:
   case ISD::FCEIL:
   case ISD::FCOS:
@@ -2523,13 +2811,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     // We're going to widen this vector op to a legal type by padding with undef
     // elements. If the wide vector op is eventually going to be expanded to
     // scalar libcalls, then unroll into scalar ops now to avoid unnecessary
-    // libcalls on the undef elements. We are assuming that if the scalar op
-    // requires expanding, then the vector op needs expanding too.
+    // libcalls on the undef elements.
     EVT VT = N->getValueType(0);
-    if (TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
-      EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
-      assert(!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) &&
-             "Target supports vector op, but scalar requires expansion?");
+    EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+    if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) &&
+        TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
       Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
       break;
     }
@@ -2539,11 +2825,14 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   // any other unary ops.
   LLVM_FALLTHROUGH;
 
+  case ISD::ABS:
   case ISD::BITREVERSE:
   case ISD::BSWAP:
   case ISD::CTLZ:
+  case ISD::CTLZ_ZERO_UNDEF:
   case ISD::CTPOP:
   case ISD::CTTZ:
+  case ISD::CTTZ_ZERO_UNDEF:
   case ISD::FNEG:
   case ISD::FCANONICALIZE:
     Res = WidenVecRes_Unary(N);
@@ -2593,14 +2882,13 @@ static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI,
 
   SDLoc dl(ConcatOps[0]);
   EVT WidenEltVT = WidenVT.getVectorElementType();
-  int Idx = 0;
 
   // while (Some element of ConcatOps is not of type MaxVT) {
   //   From the end of ConcatOps, collect elements of the same type and put
   //   them into an op of the next larger supported type
   // }
   while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
-    Idx = ConcatEnd - 1;
+    int Idx = ConcatEnd - 1;
     VT = ConcatOps[Idx--].getValueType();
     while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
       Idx--;
@@ -2750,7 +3038,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
 
   // No legal vector version so unroll the vector operation and then widen.
   if (NumElts == 1)
-    return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+    return UnrollVectorOp_StrictFP(N, WidenVT.getVectorNumElements());
 
   // Since the operation can trap, apply operation on the original vector.
   EVT MaxVT = VT;
@@ -2846,6 +3134,58 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
   return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT);
 }
 
+SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) {
+  SDLoc DL(N);
+  EVT ResVT = N->getValueType(0);
+  EVT OvVT = N->getValueType(1);
+  EVT WideResVT, WideOvVT;
+  SDValue WideLHS, WideRHS;
+
+  // TODO: This might result in a widen/split loop.
+  if (ResNo == 0) {
+    WideResVT = TLI.getTypeToTransformTo(*DAG.getContext(), ResVT);
+    WideOvVT = EVT::getVectorVT(
+        *DAG.getContext(), OvVT.getVectorElementType(),
+        WideResVT.getVectorNumElements());
+
+    WideLHS = GetWidenedVector(N->getOperand(0));
+    WideRHS = GetWidenedVector(N->getOperand(1));
+  } else {
+    WideOvVT = TLI.getTypeToTransformTo(*DAG.getContext(), OvVT);
+    WideResVT = EVT::getVectorVT(
+        *DAG.getContext(), ResVT.getVectorElementType(),
+        WideOvVT.getVectorNumElements());
+
+    SDValue Zero = DAG.getConstant(
+        0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
+    WideLHS = DAG.getNode(
+        ISD::INSERT_SUBVECTOR, DL, WideResVT, DAG.getUNDEF(WideResVT),
+        N->getOperand(0), Zero);
+    WideRHS = DAG.getNode(
+        ISD::INSERT_SUBVECTOR, DL, WideResVT, DAG.getUNDEF(WideResVT),
+        N->getOperand(1), Zero);
+  }
+
+  SDVTList WideVTs = DAG.getVTList(WideResVT, WideOvVT);
+  SDNode *WideNode = DAG.getNode(
+      N->getOpcode(), DL, WideVTs, WideLHS, WideRHS).getNode();
+
+  // Replace the other vector result not being explicitly widened here.
+  unsigned OtherNo = 1 - ResNo;
+  EVT OtherVT = N->getValueType(OtherNo);
+  if (getTypeAction(OtherVT) == TargetLowering::TypeWidenVector) {
+    SetWidenedVector(SDValue(N, OtherNo), SDValue(WideNode, OtherNo));
+  } else {
+    SDValue Zero = DAG.getConstant(
+        0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
+    SDValue OtherVal = DAG.getNode(
+        ISD::EXTRACT_SUBVECTOR, DL, OtherVT, SDValue(WideNode, OtherNo), Zero);
+    ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+  }
+
+  return SDValue(WideNode, ResNo);
+}
+
 SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
   SDValue InOp = N->getOperand(0);
   SDLoc DL(N);
@@ -2929,6 +3269,43 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
   return DAG.getBuildVector(WidenVT, DL, Ops);
 }
 
+SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
+  SDValue InOp = N->getOperand(1);
+  SDLoc DL(N);
+  SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+  SmallVector<EVT, 2> WidenVTs = { WidenVT, MVT::Other };
+
+  EVT InVT = InOp.getValueType();
+  EVT InEltVT = InVT.getVectorElementType();
+
+  unsigned Opcode = N->getOpcode();
+
+  // FIXME: Optimizations need to be implemented here.
+
+  // Otherwise unroll into some nasty scalar code and rebuild the vector.
+  EVT EltVT = WidenVT.getVectorElementType();
+  SmallVector<EVT, 2> EltVTs = { EltVT, MVT::Other };
+  SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+  SmallVector<SDValue, 32> OpChains;
+  // Use the original element count so we don't do more scalar opts than
+  // necessary.
+  unsigned MinElts = N->getValueType(0).getVectorNumElements();
+  for (unsigned i=0; i < MinElts; ++i) {
+    NewOps[1] = DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+        DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+    Ops[i] = DAG.getNode(Opcode, DL, EltVTs, NewOps);
+    OpChains.push_back(Ops[i].getValue(1));
+  }
+  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OpChains);
+  ReplaceValueWith(SDValue(N, 1), NewChain);
+
+  return DAG.getBuildVector(WidenVT, DL, Ops);
+}
+
 SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
   unsigned Opcode = N->getOpcode();
   SDValue InOp = N->getOperand(0);
@@ -3654,8 +4031,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
     return Res;
   }
 
-  InOp1 = GetWidenedVector(InOp1);
-  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+  // If the inputs also widen, handle them directly. Otherwise widen by hand.
+  SDValue InOp2 = N->getOperand(1);
+  if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+    InOp1 = GetWidenedVector(InOp1);
+    InOp2 = GetWidenedVector(InOp2);
+  } else {
+    InOp1 = DAG.WidenVector(InOp1, SDLoc(N));
+    InOp2 = DAG.WidenVector(InOp2, SDLoc(N));
+  }
 
   // Assume that the input and output will be widen appropriately.  If not,
   // we will have to unroll it at some point.
@@ -3698,6 +4082,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
   case ISD::MGATHER:            Res = WidenVecOp_MGATHER(N, OpNo); break;
   case ISD::MSCATTER:           Res = WidenVecOp_MSCATTER(N, OpNo); break;
   case ISD::SETCC:              Res = WidenVecOp_SETCC(N); break;
+  case ISD::VSELECT:            Res = WidenVecOp_VSELECT(N); break;
   case ISD::FCOPYSIGN:          Res = WidenVecOp_FCOPYSIGN(N); break;
 
   case ISD::ANY_EXTEND:
@@ -3707,6 +4092,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
     break;
 
   case ISD::FP_EXTEND:
+  case ISD::STRICT_FP_EXTEND:
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
   case ISD::SINT_TO_FP:
@@ -3714,6 +4100,22 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
   case ISD::TRUNCATE:
     Res = WidenVecOp_Convert(N);
     break;
+
+  case ISD::VECREDUCE_FADD:
+  case ISD::VECREDUCE_FMUL:
+  case ISD::VECREDUCE_ADD:
+  case ISD::VECREDUCE_MUL:
+  case ISD::VECREDUCE_AND:
+  case ISD::VECREDUCE_OR:
+  case ISD::VECREDUCE_XOR:
+  case ISD::VECREDUCE_SMAX:
+  case ISD::VECREDUCE_SMIN:
+  case ISD::VECREDUCE_UMAX:
+  case ISD::VECREDUCE_UMIN:
+  case ISD::VECREDUCE_FMAX:
+  case ISD::VECREDUCE_FMIN:
+    Res = WidenVecOp_VECREDUCE(N);
+    break;
   }
 
   // If Res is null, the sub-method took care of registering the result.
@@ -3725,8 +4127,12 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
     return true;
 
 
-  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
-         "Invalid operand expansion");
+  if (N->isStrictFPOpcode())
+    assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 &&
+           "Invalid operand expansion");
+  else
+    assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+           "Invalid operand expansion");
 
   ReplaceValueWith(SDValue(N, 0), Res);
   return false;
@@ -3806,7 +4212,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
   EVT EltVT = VT.getVectorElementType();
   SDLoc dl(N);
   unsigned NumElts = VT.getVectorNumElements();
-  SDValue InOp = N->getOperand(0);
+  SDValue InOp = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);
   assert(getTypeAction(InOp.getValueType()) ==
              TargetLowering::TypeWidenVector &&
          "Unexpected type action");
@@ -3815,10 +4221,19 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
   unsigned Opcode = N->getOpcode();
 
   // See if a widened result type would be legal, if so widen the node.
+  // FIXME: This isn't safe for StrictFP. Other optimization here is needed.
   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
                                 InVT.getVectorNumElements());
-  if (TLI.isTypeLegal(WideVT)) {
-    SDValue Res = DAG.getNode(Opcode, dl, WideVT, InOp);
+  if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) {
+    SDValue Res;
+    if (N->isStrictFPOpcode()) {
+      Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other }, 
+                        { N->getOperand(0), InOp });
+      // Legalize the chain result - switch anything that used the old chain to
+      // use the new one.
+      ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+    } else
+      Res = DAG.getNode(Opcode, dl, WideVT, InOp);
     return DAG.getNode(
         ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
         DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
@@ -3828,12 +4243,26 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
 
   // Unroll the convert into some scalar code and create a nasty build vector.
   SmallVector<SDValue, 16> Ops(NumElts);
-  for (unsigned i=0; i < NumElts; ++i)
-    Ops[i] = DAG.getNode(
-        Opcode, dl, EltVT,
-        DAG.getNode(
-            ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
-            DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+  if (N->isStrictFPOpcode()) {
+    SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+    SmallVector<SDValue, 32> OpChains;
+    for (unsigned i=0; i < NumElts; ++i) {
+      NewOps[1] = DAG.getNode(
+          ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+          DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+      Ops[i] = DAG.getNode(Opcode, dl, { EltVT, MVT::Other }, NewOps);
+      OpChains.push_back(Ops[i].getValue(1));
+    }
+    SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
+    ReplaceValueWith(SDValue(N, 1), NewChain);
+  } else {
+    for (unsigned i = 0; i < NumElts; ++i)
+      Ops[i] = DAG.getNode(
+          Opcode, dl, EltVT,
+          DAG.getNode(
+              ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+              DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+  }
 
   return DAG.getBuildVector(VT, dl, Ops);
 }
@@ -3859,6 +4288,24 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
     }
   }
 
+  // Handle a case like bitcast v12i8 -> v3i32. Normally that would get widened
+  // to v16i8 -> v4i32, but for a target where v3i32 is legal but v12i8 is not,
+  // we end up here. Handling the case here with EXTRACT_SUBVECTOR avoids
+  // having to copy via memory.
+  if (VT.isVector()) {
+    EVT EltVT = VT.getVectorElementType();
+    unsigned EltSize = EltVT.getSizeInBits();
+    if (InWidenSize % EltSize == 0) {
+      unsigned NewNumElts = InWidenSize / EltSize;
+      EVT NewVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NewNumElts);
+      if (TLI.isTypeLegal(NewVT)) {
+        SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
+        return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, BitOp,
+            DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+      }
+    }
+  }
+
   return CreateStackStoreLoad(InOp, VT);
 }
 
@@ -4000,10 +4447,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
   SDValue Index = MSC->getIndex();
   SDValue Scale = MSC->getScale();
 
-  unsigned NumElts;
   if (OpNo == 1) {
     DataOp = GetWidenedVector(DataOp);
-    NumElts = DataOp.getValueType().getVectorNumElements();
+    unsigned NumElts = DataOp.getValueType().getVectorNumElements();
 
     // Widen index.
     EVT IndexVT = Index.getValueType();
@@ -4041,8 +4487,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
 
   // Get a new SETCC node to compare the newly widened operands.
   // Only some of the compared elements are legal.
-  EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
-                                   InOp0.getValueType());
+  EVT SVT = getSetCCResultType(InOp0.getValueType());
   // The result type is legal, if its vXi1, keep vXi1 for the new SETCC.
   if (VT.getScalarType() == MVT::i1)
     SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
@@ -4062,6 +4507,80 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
   return PromoteTargetBoolean(CC, VT);
 }
 
+SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
+  SDLoc dl(N);
+  SDValue Op = GetWidenedVector(N->getOperand(0));
+  EVT OrigVT = N->getOperand(0).getValueType();
+  EVT WideVT = Op.getValueType();
+  EVT ElemVT = OrigVT.getVectorElementType();
+
+  SDValue NeutralElem;
+  switch (N->getOpcode()) {
+  case ISD::VECREDUCE_ADD:
+  case ISD::VECREDUCE_OR:
+  case ISD::VECREDUCE_XOR:
+  case ISD::VECREDUCE_UMAX:
+    NeutralElem = DAG.getConstant(0, dl, ElemVT);
+    break;
+  case ISD::VECREDUCE_MUL:
+    NeutralElem = DAG.getConstant(1, dl, ElemVT);
+    break;
+  case ISD::VECREDUCE_AND:
+  case ISD::VECREDUCE_UMIN:
+    NeutralElem = DAG.getAllOnesConstant(dl, ElemVT);
+    break;
+  case ISD::VECREDUCE_SMAX:
+    NeutralElem = DAG.getConstant(
+        APInt::getSignedMinValue(ElemVT.getSizeInBits()), dl, ElemVT);
+    break;
+  case ISD::VECREDUCE_SMIN:
+    NeutralElem = DAG.getConstant(
+        APInt::getSignedMaxValue(ElemVT.getSizeInBits()), dl, ElemVT);
+    break;
+  case ISD::VECREDUCE_FADD:
+    NeutralElem = DAG.getConstantFP(0.0, dl, ElemVT);
+    break;
+  case ISD::VECREDUCE_FMUL:
+    NeutralElem = DAG.getConstantFP(1.0, dl, ElemVT);
+    break;
+  case ISD::VECREDUCE_FMAX:
+    NeutralElem = DAG.getConstantFP(
+        std::numeric_limits<double>::infinity(), dl, ElemVT);
+    break;
+  case ISD::VECREDUCE_FMIN:
+    NeutralElem = DAG.getConstantFP(
+        -std::numeric_limits<double>::infinity(), dl, ElemVT);
+    break;
+  }
+
+  // Pad the vector with the neutral element.
+  unsigned OrigElts = OrigVT.getVectorNumElements();
+  unsigned WideElts = WideVT.getVectorNumElements();
+  for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
+    Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
+        DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+  return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Op, N->getFlags());
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
+  // This only gets called in the case that the left and right inputs and
+  // result are of a legal odd vector type, and the condition is illegal i1 of
+  // the same odd width that needs widening.
+  EVT VT = N->getValueType(0);
+  assert(VT.isVector() && !VT.isPow2VectorType() && isTypeLegal(VT));
+
+  SDValue Cond = GetWidenedVector(N->getOperand(0));
+  SDValue LeftIn = DAG.WidenVector(N->getOperand(1), SDLoc(N));
+  SDValue RightIn = DAG.WidenVector(N->getOperand(2), SDLoc(N));
+  SDLoc DL(N);
+
+  SDValue Select = DAG.getNode(N->getOpcode(), DL, LeftIn.getValueType(), Cond,
+                               LeftIn, RightIn);
+  return DAG.getNode(
+      ISD::EXTRACT_SUBVECTOR, DL, VT, Select,
+      DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+}
 
 //===----------------------------------------------------------------------===//
 // Vector Widening Utilities
@@ -4102,6 +4621,8 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
         isPowerOf2_32(WidenWidth / MemVTWidth) &&
         (MemVTWidth <= Width ||
          (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+      if (MemVTWidth == WidenWidth)
+        return MemVT;
       RetVT = MemVT;
       break;
     }
@@ -4113,7 +4634,10 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
        VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
     EVT MemVT = (MVT::SimpleValueType) VT;
     unsigned MemVTWidth = MemVT.getSizeInBits();
-    if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+    auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
+    if ((Action == TargetLowering::TypeLegal ||
+         Action == TargetLowering::TypePromoteInteger) &&
+        WidenEltVT == MemVT.getVectorElementType() &&
         (WidenWidth % MemVTWidth) == 0 &&
         isPowerOf2_32(WidenWidth / MemVTWidth) &&
         (MemVTWidth <= Width ||
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 7f369c746d24..34660e3a48ec 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -1,9 +1,8 @@
 //===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -85,6 +84,7 @@ ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
       case ISD::CopyFromReg:    NumberDeps++;  break;
       case ISD::CopyToReg:      break;
       case ISD::INLINEASM:      break;
+      case ISD::INLINEASM_BR:   break;
     }
     if (!ScegN->isMachineOpcode())
       continue;
@@ -121,6 +121,7 @@ unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
       case ISD::CopyFromReg:    break;
       case ISD::CopyToReg:      NumberDeps++;  break;
       case ISD::INLINEASM:      break;
+      case ISD::INLINEASM_BR:   break;
     }
     if (!ScegN->isMachineOpcode())
       continue;
@@ -446,6 +447,7 @@ int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
         break;
 
       case ISD::INLINEASM:
+      case ISD::INLINEASM_BR:
         ResCount += PriorityThree;
         break;
       }
@@ -548,6 +550,7 @@ void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
           NodeNumDefs++;
           break;
         case ISD::INLINEASM:
+        case ISD::INLINEASM_BR:
           NodeNumDefs++;
           break;
       }
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index f7566b246f32..65b9d017fc5c 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -1,9 +1,8 @@
 //===-- llvm/CodeGen/SDNodeDbgValue.h - SelectionDAG dbg_value --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -136,7 +135,8 @@ public:
   /// dbg.addr is emitted twice.
   void clearIsEmitted() { Emitted = false; }
 
-  LLVM_DUMP_METHOD void dump(raw_ostream &OS) const;
+  LLVM_DUMP_METHOD void dump() const;
+  LLVM_DUMP_METHOD void print(raw_ostream &OS) const;
 };
 
 /// Holds the information from a dbg_label node through SDISel.
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 90e109b022fd..2cb850fa1a3d 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -1,9 +1,8 @@
 //===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -480,7 +479,8 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
   }
 
   for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
-    if (Node->getOpcode() == ISD::INLINEASM) {
+    if (Node->getOpcode() == ISD::INLINEASM ||
+        Node->getOpcode() == ISD::INLINEASM_BR) {
       // Inline asm can clobber physical defs.
       unsigned NumOps = Node->getNumOperands();
       if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 8d75b8133a30..34b4c8502353 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1,9 +1,8 @@
 //===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -220,6 +219,14 @@ public:
     return Topo.WillCreateCycle(SU, TargetSU);
   }
 
+  /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU.
+  /// This returns true if this is a new predecessor.
+  /// Does *NOT* update the topological ordering! It just queues an update.
+  void AddPredQueued(SUnit *SU, const SDep &D) {
+    Topo.AddPredQueued(SU, D.getSUnit());
+    SU->addPred(D);
+  }
+
   /// AddPred - adds a predecessor edge to SUnit SU.
   /// This returns true if this is a new predecessor.
   /// Updates the topological ordering if required.
@@ -267,24 +274,22 @@ private:
   void ListScheduleBottomUp();
 
   /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
-  /// Updates the topological ordering if required.
   SUnit *CreateNewSUnit(SDNode *N) {
     unsigned NumSUnits = SUnits.size();
     SUnit *NewNode = newSUnit(N);
     // Update the topological ordering.
     if (NewNode->NodeNum >= NumSUnits)
-      Topo.InitDAGTopologicalSorting();
+      Topo.MarkDirty();
     return NewNode;
   }
 
   /// CreateClone - Creates a new SUnit from an existing one.
-  /// Updates the topological ordering if required.
   SUnit *CreateClone(SUnit *N) {
     unsigned NumSUnits = SUnits.size();
     SUnit *NewNode = Clone(N);
     // Update the topological ordering.
     if (NewNode->NodeNum >= NumSUnits)
-      Topo.InitDAGTopologicalSorting();
+      Topo.MarkDirty();
     return NewNode;
   }
 
@@ -366,7 +371,7 @@ void ScheduleDAGRRList::Schedule() {
   BuildSchedGraph(nullptr);
 
   LLVM_DEBUG(dump());
-  Topo.InitDAGTopologicalSorting();
+  Topo.MarkDirty();
 
   AvailableQueue->initNodes(SUnits);
 
@@ -709,6 +714,7 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) {
     // removed.
     return;
   case ISD::INLINEASM:
+  case ISD::INLINEASM_BR:
     // For inline asm, clear the pipeline state.
     HazardRec->Reset();
     return;
@@ -1017,8 +1023,9 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
     NewSU = &SUnits[N->getNodeId()];
     // If NewSU has already been scheduled, we need to clone it, but this
     // negates the benefit to unfolding so just return SU.
-    if (NewSU->isScheduled)
+    if (NewSU->isScheduled) {
       return SU;
+    }
     isNewN = false;
   } else {
     NewSU = CreateNewSUnit(N);
@@ -1071,23 +1078,23 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
   for (const SDep &Pred : ChainPreds) {
     RemovePred(SU, Pred);
     if (isNewLoad)
-      AddPred(LoadSU, Pred);
+      AddPredQueued(LoadSU, Pred);
   }
   for (const SDep &Pred : LoadPreds) {
     RemovePred(SU, Pred);
     if (isNewLoad)
-      AddPred(LoadSU, Pred);
+      AddPredQueued(LoadSU, Pred);
   }
   for (const SDep &Pred : NodePreds) {
     RemovePred(SU, Pred);
-    AddPred(NewSU, Pred);
+    AddPredQueued(NewSU, Pred);
   }
   for (SDep D : NodeSuccs) {
     SUnit *SuccDep = D.getSUnit();
     D.setSUnit(SU);
     RemovePred(SuccDep, D);
     D.setSUnit(NewSU);
-    AddPred(SuccDep, D);
+    AddPredQueued(SuccDep, D);
     // Balance register pressure.
     if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
         !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
@@ -1099,7 +1106,7 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
     RemovePred(SuccDep, D);
     if (isNewLoad) {
       D.setSUnit(LoadSU);
-      AddPred(SuccDep, D);
+      AddPredQueued(SuccDep, D);
     }
   }
 
@@ -1107,7 +1114,7 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
   // by LoadSU.
   SDep D(LoadSU, SDep::Data, 0);
   D.setLatency(LoadSU->Latency);
-  AddPred(NewSU, D);
+  AddPredQueued(NewSU, D);
 
   if (isNewLoad)
     AvailableQueue->addNode(LoadSU);
@@ -1179,7 +1186,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
   // New SUnit has the exact same predecessors.
   for (SDep &Pred : SU->Preds)
     if (!Pred.isArtificial())
-      AddPred(NewSU, Pred);
+      AddPredQueued(NewSU, Pred);
 
   // Only copy scheduled successors. Cut them from old node's successor
   // list and move them over.
@@ -1191,7 +1198,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     if (SuccSU->isScheduled) {
       SDep D = Succ;
       D.setSUnit(NewSU);
-      AddPred(SuccSU, D);
+      AddPredQueued(SuccSU, D);
       D.setSUnit(SU);
       DelDeps.push_back(std::make_pair(SuccSU, D));
     }
@@ -1230,14 +1237,14 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
     if (SuccSU->isScheduled) {
       SDep D = Succ;
       D.setSUnit(CopyToSU);
-      AddPred(SuccSU, D);
+      AddPredQueued(SuccSU, D);
       DelDeps.push_back(std::make_pair(SuccSU, Succ));
     }
     else {
       // Avoid scheduling the def-side copy before other successors. Otherwise
       // we could introduce another physreg interference on the copy and
       // continue inserting copies indefinitely.
-      AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
+      AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial));
     }
   }
   for (auto &DelDep : DelDeps)
@@ -1245,10 +1252,10 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
 
   SDep FromDep(SU, SDep::Data, Reg);
   FromDep.setLatency(SU->Latency);
-  AddPred(CopyFromSU, FromDep);
+  AddPredQueued(CopyFromSU, FromDep);
   SDep ToDep(CopyFromSU, SDep::Data, 0);
   ToDep.setLatency(CopyFromSU->Latency);
-  AddPred(CopyToSU, ToDep);
+  AddPredQueued(CopyToSU, ToDep);
 
   AvailableQueue->updateNode(SU);
   AvailableQueue->addNode(CopyFromSU);
@@ -1348,7 +1355,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
   }
 
   for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
-    if (Node->getOpcode() == ISD::INLINEASM) {
+    if (Node->getOpcode() == ISD::INLINEASM ||
+        Node->getOpcode() == ISD::INLINEASM_BR) {
       // Inline asm can clobber physical defs.
       unsigned NumOps = Node->getNumOperands();
       if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
@@ -1477,6 +1485,11 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
   if (CurSU)
     return CurSU;
 
+  // We query the topological order in the loop body, so make sure outstanding
+  // updates are applied before entering it (we only enter the loop if there
+  // are some interferences). If we make changes to the ordering, we exit
+  // the loop.
+
   // All candidates are delayed due to live physical reg dependencies.
   // Try backtracking, code duplication, or inserting cross class copies
   // to resolve it.
@@ -1506,7 +1519,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
       }
       LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
                         << ") to SU(" << TrySU->NodeNum << ")\n");
-      AddPred(TrySU, SDep(BtSU, SDep::Artificial));
+      AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial));
 
       // If one or more successors has been unscheduled, then the current
       // node is no longer available.
@@ -1560,14 +1573,14 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
       InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
       LLVM_DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
                         << " to SU #" << Copies.front()->NodeNum << "\n");
-      AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
+      AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial));
       NewDef = Copies.back();
     }
 
     LLVM_DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
                       << " to SU #" << TrySU->NodeNum << "\n");
     LiveRegDefs[Reg] = NewDef;
-    AddPred(NewDef, SDep(TrySU, SDep::Artificial));
+    AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial));
     TrySU->isAvailable = false;
     CurSU = NewDef;
   }
@@ -2939,6 +2952,29 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
             (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
         continue;
 
+    SDNode *PredFrameSetup = nullptr;
+    for (const SDep &Pred : SU.Preds)
+      if (Pred.isCtrl() && Pred.getSUnit()) {
+        // Find the predecessor which is not data dependence.
+        SDNode *PredND = Pred.getSUnit()->getNode();
+
+        // If PredND is FrameSetup, we should not pre-scheduled the node,
+        // or else, when bottom up scheduling, ADJCALLSTACKDOWN and
+        // ADJCALLSTACKUP may hold CallResource too long and make other
+        // calls can't be scheduled. If there's no other available node
+        // to schedule, the schedular will try to rename the register by
+        // creating copy to avoid the conflict which will fail because
+        // CallResource is not a real physical register.
+        if (PredND && PredND->isMachineOpcode() &&
+            (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
+          PredFrameSetup = PredND;
+          break;
+        }
+      }
+    // Skip the node has FrameSetup parent.
+    if (PredFrameSetup != nullptr)
+      continue;
+
     // Locate the single data predecessor.
     SUnit *PredSU = nullptr;
     for (const SDep &Pred : SU.Preds)
@@ -2993,9 +3029,9 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
       if (SuccSU != &SU) {
         Edge.setSUnit(PredSU);
         scheduleDAG->RemovePred(SuccSU, Edge);
-        scheduleDAG->AddPred(&SU, Edge);
+        scheduleDAG->AddPredQueued(&SU, Edge);
         Edge.setSUnit(&SU);
-        scheduleDAG->AddPred(SuccSU, Edge);
+        scheduleDAG->AddPredQueued(SuccSU, Edge);
         --i;
       }
     }
@@ -3077,7 +3113,7 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
           LLVM_DEBUG(dbgs()
                      << "    Adding a pseudo-two-addr edge from SU #"
                      << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
-          scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
+          scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial));
         }
       }
     }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index e258f0a218a5..568c6191e512 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -1,9 +1,8 @@
 //===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -206,6 +205,19 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
   if (!Chain)
     return;
 
+  // Skip any load instruction that has a tied input. There may be an additional
+  // dependency requiring a different order than by increasing offsets, and the
+  // added glue may introduce a cycle.
+  auto hasTiedInput = [this](const SDNode *N) {
+    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+    for (unsigned I = 0; I != MCID.getNumOperands(); ++I) {
+      if (MCID.getOperandConstraint(I, MCOI::TIED_TO) != -1)
+        return true;
+    }
+
+    return false;
+  };
+
   // Look for other loads of the same chain. Find loads that are loading from
   // the same base pointer and different offsets.
   SmallPtrSet<SDNode*, 16> Visited;
@@ -213,6 +225,10 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
   DenseMap<long long, SDNode*> O2SMap;  // Map from offset to SDNode.
   bool Cluster = false;
   SDNode *Base = Node;
+
+  if (hasTiedInput(Base))
+    return;
+
   // This algorithm requires a reasonably low use count before finding a match
   // to avoid uselessly blowing up compile time in large blocks.
   unsigned UseCount = 0;
@@ -223,10 +239,12 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
       continue;
     int64_t Offset1, Offset2;
     if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
-        Offset1 == Offset2)
+        Offset1 == Offset2 ||
+        hasTiedInput(User)) {
       // FIXME: Should be ok if they addresses are identical. But earlier
       // optimizations really should have eliminated one of the loads.
       continue;
+    }
     if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
       Offsets.push_back(Offset1);
     O2SMap.insert(std::make_pair(Offset2, User));
@@ -741,28 +759,27 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
 static void
 ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
                   DenseMap<SDValue, unsigned> &VRBaseMap,
-                  SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
-                  SmallSet<unsigned, 8> &Seen) {
+                  SmallVectorImpl<std::pair<unsigned, MachineInstr *>> &Orders,
+                  SmallSet<unsigned, 8> &Seen, MachineInstr *NewInsn) {
   unsigned Order = N->getIROrder();
-  if (!Order || !Seen.insert(Order).second) {
+  if (!Order || Seen.count(Order)) {
     // Process any valid SDDbgValues even if node does not have any order
     // assigned.
     ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
     return;
   }
 
-  MachineBasicBlock *BB = Emitter.getBlock();
-  auto IP = Emitter.getInsertPos();
-  if (IP == BB->begin() || BB->back().isPHI() ||
-      // Fast-isel may have inserted some instructions, in which case the
-      // BB->back().isPHI() test will not fire when we want it to.
-      std::prev(IP)->isPHI()) {
-    // Did not insert any instruction.
-    Orders.push_back({Order, (MachineInstr *)nullptr});
-    return;
+  // If a new instruction was generated for this Order number, record it.
+  // Otherwise, leave this order number unseen: we will either find later
+  // instructions for it, or leave it unseen if there were no instructions at
+  // all.
+  if (NewInsn) {
+    Seen.insert(Order);
+    Orders.push_back({Order, NewInsn});
   }
 
-  Orders.push_back({Order, &*std::prev(IP)});
+  // Even if no instruction was generated, a Value may have become defined via
+  // earlier nodes. Try to process them now.
   ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
 }
 
@@ -815,6 +832,43 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
   SmallSet<unsigned, 8> Seen;
   bool HasDbg = DAG->hasDebugValues();
 
+  // Emit a node, and determine where its first instruction is for debuginfo.
+  // Zero, one, or multiple instructions can be created when emitting a node.
+  auto EmitNode =
+      [&](SDNode *Node, bool IsClone, bool IsCloned,
+          DenseMap<SDValue, unsigned> &VRBaseMap) -> MachineInstr * {
+    // Fetch instruction prior to this, or end() if nonexistant.
+    auto GetPrevInsn = [&](MachineBasicBlock::iterator I) {
+      if (I == BB->begin())
+        return BB->end();
+      else
+        return std::prev(Emitter.getInsertPos());
+    };
+
+    MachineBasicBlock::iterator Before = GetPrevInsn(Emitter.getInsertPos());
+    Emitter.EmitNode(Node, IsClone, IsCloned, VRBaseMap);
+    MachineBasicBlock::iterator After = GetPrevInsn(Emitter.getInsertPos());
+
+    // If the iterator did not change, no instructions were inserted.
+    if (Before == After)
+      return nullptr;
+
+    MachineInstr *MI;
+    if (Before == BB->end()) {
+      // There were no prior instructions; the new ones must start at the
+      // beginning of the block.
+      MI = &Emitter.getBlock()->instr_front();
+    } else {
+      // Return first instruction after the pre-existing instructions.
+      MI = &*std::next(Before);
+    }
+
+    if (MI->isCall() && DAG->getTarget().Options.EnableDebugEntryValues)
+      MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node));
+
+    return MI;
+  };
+
   // If this is the first BB, emit byval parameter dbg_value's.
   if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
     SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
@@ -851,18 +905,18 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
       GluedNodes.push_back(N);
     while (!GluedNodes.empty()) {
       SDNode *N = GluedNodes.back();
-      Emitter.EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap);
+      auto NewInsn = EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap);
       // Remember the source order of the inserted instruction.
       if (HasDbg)
-        ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
+        ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn);
       GluedNodes.pop_back();
     }
-    Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
-                     VRBaseMap);
+    auto NewInsn =
+        EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap);
     // Remember the source order of the inserted instruction.
     if (HasDbg)
-      ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders,
-                        Seen);
+      ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen,
+                        NewInsn);
   }
 
   // Insert all the dbg_values which have not already been inserted in source
@@ -873,7 +927,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
     // Sort the source order instructions and use the order to insert debug
     // values. Use stable_sort so that DBG_VALUEs are inserted in the same order
     // regardless of the host's implementation fo std::sort.
-    std::stable_sort(Orders.begin(), Orders.end(), less_first());
+    llvm::stable_sort(Orders, less_first());
     std::stable_sort(DAG->DbgBegin(), DAG->DbgEnd(),
                      [](const SDDbgValue *LHS, const SDDbgValue *RHS) {
                        return LHS->getOrder() < RHS->getOrder();
@@ -887,8 +941,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
       unsigned Order = Orders[i].first;
       MachineInstr *MI = Orders[i].second;
       // Insert all SDDbgValue's whose order(s) are before "Order".
-      if (!MI)
-        continue;
+      assert(MI);
       for (; DI != DE; ++DI) {
         if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order)
           break;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 3fa7ad895725..5163b4fa4fd3 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -1,9 +1,8 @@
 //===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 416061475b1a..ab06b55b49fd 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -1,9 +1,8 @@
 //===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 647496c1afcb..5852e693fa9f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1,9 +1,8 @@
 //===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -86,6 +85,7 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
 // Default null implementations of the callbacks.
 void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
 void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
+void SelectionDAG::DAGUpdateListener::NodeInserted(SDNode *) {}
 
 void SelectionDAG::DAGNodeDeletedListener::anchor() {}
 
@@ -262,12 +262,7 @@ bool ISD::allOperandsUndef(const SDNode *N) {
   // is probably the desired behavior.
   if (N->getNumOperands() == 0)
     return false;
-
-  for (const SDValue &Op : N->op_values())
-    if (!Op.isUndef())
-      return false;
-
-  return true;
+  return all_of(N->op_values(), [](SDValue Op) { return Op.isUndef(); });
 }
 
 bool ISD::matchUnaryPredicate(SDValue Op,
@@ -299,8 +294,8 @@ bool ISD::matchUnaryPredicate(SDValue Op,
 bool ISD::matchBinaryPredicate(
     SDValue LHS, SDValue RHS,
     std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
-    bool AllowUndefs) {
-  if (LHS.getValueType() != RHS.getValueType())
+    bool AllowUndefs, bool AllowTypeMismatch) {
+  if (!AllowTypeMismatch && LHS.getValueType() != RHS.getValueType())
     return false;
 
   // TODO: Add support for scalar UNDEF cases?
@@ -323,8 +318,8 @@ bool ISD::matchBinaryPredicate(
     auto *RHSCst = dyn_cast<ConstantSDNode>(RHSOp);
     if ((!LHSCst && !LHSUndef) || (!RHSCst && !RHSUndef))
       return false;
-    if (LHSOp.getValueType() != SVT ||
-        LHSOp.getValueType() != RHSOp.getValueType())
+    if (!AllowTypeMismatch && (LHSOp.getValueType() != SVT ||
+                               LHSOp.getValueType() != RHSOp.getValueType()))
       return false;
     if (!Match(LHSCst, RHSCst))
       return false;
@@ -518,6 +513,13 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   case ISD::TargetFrameIndex:
     ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
     break;
+  case ISD::LIFETIME_START:
+  case ISD::LIFETIME_END:
+    if (cast<LifetimeSDNode>(N)->hasOffset()) {
+      ID.AddInteger(cast<LifetimeSDNode>(N)->getSize());
+      ID.AddInteger(cast<LifetimeSDNode>(N)->getOffset());
+    }
+    break;
   case ISD::JumpTable:
   case ISD::TargetJumpTable:
     ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
@@ -834,6 +836,8 @@ void SelectionDAG::InsertNode(SDNode *N) {
   N->PersistentId = NextPersistentId++;
   VerifySDNode(N);
 #endif
+  for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+    DUL->NodeInserted(N);
 }
 
 /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
@@ -1136,6 +1140,18 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
                  getConstant(Imm, DL, Op.getValueType()));
 }
 
+SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
+  // Only unsigned pointer semantics are supported right now. In the future this
+  // might delegate to TLI to check pointer signedness.
+  return getZExtOrTrunc(Op, DL, VT);
+}
+
+SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
+  // Only unsigned pointer semantics are supported right now. In the future this
+  // might delegate to TLI to check pointer signedness.
+  return getZeroExtendInReg(Op, DL, VT);
+}
+
 /// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
 SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
   EVT EltVT = VT.getScalarType();
@@ -1274,6 +1290,12 @@ SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL,
   return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget);
 }
 
+SDValue SelectionDAG::getShiftAmountConstant(uint64_t Val, EVT VT,
+                                             const SDLoc &DL, bool LegalTypes) {
+  EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout(), LegalTypes);
+  return getConstant(Val, DL, ShiftVT);
+}
+
 SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT,
                                     bool isTarget) {
   return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget);
@@ -1403,7 +1425,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
   assert((TargetFlags == 0 || isTarget) &&
          "Cannot set target flags on target-independent globals");
   if (Alignment == 0)
-    Alignment = MF->getFunction().optForSize()
+    Alignment = MF->getFunction().hasOptSize()
                     ? getDataLayout().getABITypeAlignment(C->getType())
                     : getDataLayout().getPrefTypeAlignment(C->getType());
   unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
@@ -1770,7 +1792,8 @@ SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl,
   if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  auto *N = newSDNode<LabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label);
+  auto *N =
+      newSDNode<LabelSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), Label);
   createOperands(N, Ops);
 
   CSEMap.InsertNode(N, IP);
@@ -1965,10 +1988,30 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
   case ISD::SETUO:
   case ISD::SETUEQ:
   case ISD::SETUNE:
-    assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!");
+    assert(!OpVT.isInteger() && "Illegal setcc for integer!");
     break;
   }
 
+  if (OpVT.isInteger()) {
+    // For EQ and NE, we can always pick a value for the undef to make the
+    // predicate pass or fail, so we can return undef.
+    // Matches behavior in llvm::ConstantFoldCompareInstruction.
+    // icmp eq/ne X, undef -> undef.
+    if ((N1.isUndef() || N2.isUndef()) &&
+        (Cond == ISD::SETEQ || Cond == ISD::SETNE))
+      return getUNDEF(VT);
+
+    // If both operands are undef, we can return undef for int comparison.
+    // icmp undef, undef -> undef.
+    if (N1.isUndef() && N2.isUndef())
+      return getUNDEF(VT);
+
+    // icmp X, X -> true/false
+    // icmp X, undef -> true/false because undef could be X.
+    if (N1 == N2)
+      return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT);
+  }
+
   if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2)) {
     const APInt &C2 = N2C->getAPIntValue();
     if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
@@ -1989,71 +2032,88 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
       }
     }
   }
-  if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1)) {
-    if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2)) {
-      APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());
-      switch (Cond) {
-      default: break;
-      case ISD::SETEQ:  if (R==APFloat::cmpUnordered)
-                          return getUNDEF(VT);
-                        LLVM_FALLTHROUGH;
-      case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT,
-                                               OpVT);
-      case ISD::SETNE:  if (R==APFloat::cmpUnordered)
-                          return getUNDEF(VT);
-                        LLVM_FALLTHROUGH;
-      case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
-                                               R==APFloat::cmpLessThan, dl, VT,
-                                               OpVT);
-      case ISD::SETLT:  if (R==APFloat::cmpUnordered)
-                          return getUNDEF(VT);
-                        LLVM_FALLTHROUGH;
-      case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT,
-                                               OpVT);
-      case ISD::SETGT:  if (R==APFloat::cmpUnordered)
-                          return getUNDEF(VT);
-                        LLVM_FALLTHROUGH;
-      case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl,
-                                               VT, OpVT);
-      case ISD::SETLE:  if (R==APFloat::cmpUnordered)
-                          return getUNDEF(VT);
-                        LLVM_FALLTHROUGH;
-      case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan ||
-                                               R==APFloat::cmpEqual, dl, VT,
-                                               OpVT);
-      case ISD::SETGE:  if (R==APFloat::cmpUnordered)
-                          return getUNDEF(VT);
-                        LLVM_FALLTHROUGH;
-      case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
-                                           R==APFloat::cmpEqual, dl, VT, OpVT);
-      case ISD::SETO:   return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT,
-                                               OpVT);
-      case ISD::SETUO:  return getBoolConstant(R==APFloat::cmpUnordered, dl, VT,
-                                               OpVT);
-      case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered ||
-                                               R==APFloat::cmpEqual, dl, VT,
-                                               OpVT);
-      case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT,
-                                               OpVT);
-      case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered ||
-                                               R==APFloat::cmpLessThan, dl, VT,
-                                               OpVT);
-      case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan ||
-                                               R==APFloat::cmpUnordered, dl, VT,
-                                               OpVT);
-      case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl,
-                                               VT, OpVT);
-      case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT,
-                                               OpVT);
-      }
-    } else {
-      // Ensure that the constant occurs on the RHS.
-      ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond);
-      MVT CompVT = N1.getValueType().getSimpleVT();
-      if (!TLI->isCondCodeLegal(SwappedCond, CompVT))
-        return SDValue();
 
-      return getSetCC(dl, VT, N2, N1, SwappedCond);
+  auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
+
+  if (N1CFP && N2CFP) {
+    APFloat::cmpResult R = N1CFP->getValueAPF().compare(N2CFP->getValueAPF());
+    switch (Cond) {
+    default: break;
+    case ISD::SETEQ:  if (R==APFloat::cmpUnordered)
+                        return getUNDEF(VT);
+                      LLVM_FALLTHROUGH;
+    case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT,
+                                             OpVT);
+    case ISD::SETNE:  if (R==APFloat::cmpUnordered)
+                        return getUNDEF(VT);
+                      LLVM_FALLTHROUGH;
+    case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+                                             R==APFloat::cmpLessThan, dl, VT,
+                                             OpVT);
+    case ISD::SETLT:  if (R==APFloat::cmpUnordered)
+                        return getUNDEF(VT);
+                      LLVM_FALLTHROUGH;
+    case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT,
+                                             OpVT);
+    case ISD::SETGT:  if (R==APFloat::cmpUnordered)
+                        return getUNDEF(VT);
+                      LLVM_FALLTHROUGH;
+    case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl,
+                                             VT, OpVT);
+    case ISD::SETLE:  if (R==APFloat::cmpUnordered)
+                        return getUNDEF(VT);
+                      LLVM_FALLTHROUGH;
+    case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan ||
+                                             R==APFloat::cmpEqual, dl, VT,
+                                             OpVT);
+    case ISD::SETGE:  if (R==APFloat::cmpUnordered)
+                        return getUNDEF(VT);
+                      LLVM_FALLTHROUGH;
+    case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+                                         R==APFloat::cmpEqual, dl, VT, OpVT);
+    case ISD::SETO:   return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT,
+                                             OpVT);
+    case ISD::SETUO:  return getBoolConstant(R==APFloat::cmpUnordered, dl, VT,
+                                             OpVT);
+    case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered ||
+                                             R==APFloat::cmpEqual, dl, VT,
+                                             OpVT);
+    case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT,
+                                             OpVT);
+    case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered ||
+                                             R==APFloat::cmpLessThan, dl, VT,
+                                             OpVT);
+    case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+                                             R==APFloat::cmpUnordered, dl, VT,
+                                             OpVT);
+    case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl,
+                                             VT, OpVT);
+    case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT,
+                                             OpVT);
+    }
+  } else if (N1CFP && OpVT.isSimple() && !N2.isUndef()) {
+    // Ensure that the constant occurs on the RHS.
+    ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond);
+    if (!TLI->isCondCodeLegal(SwappedCond, OpVT.getSimpleVT()))
+      return SDValue();
+    return getSetCC(dl, VT, N2, N1, SwappedCond);
+  } else if ((N2CFP && N2CFP->getValueAPF().isNaN()) ||
+             (OpVT.isFloatingPoint() && (N1.isUndef() || N2.isUndef()))) {
+    // If an operand is known to be a nan (or undef that could be a nan), we can
+    // fold it.
+    // Choosing NaN for the undef will always make unordered comparison succeed
+    // and ordered comparison fails.
+    // Matches behavior in llvm::ConstantFoldCompareInstruction.
+    switch (ISD::getUnorderedFlavor(Cond)) {
+    default:
+      llvm_unreachable("Unknown flavor!");
+    case 0: // Known false.
+      return getBoolConstant(false, dl, VT, OpVT);
+    case 1: // Known true.
+      return getBoolConstant(true, dl, VT, OpVT);
+    case 2: // Undefined.
+      return getUNDEF(VT);
     }
   }
 
@@ -2062,16 +2122,32 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
 }
 
 /// See if the specified operand can be simplified with the knowledge that only
-/// the bits specified by Mask are used.
-SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {
+/// the bits specified by DemandedBits are used.
+/// TODO: really we should be making this into the DAG equivalent of
+/// SimplifyMultipleUseDemandedBits and not generate any new nodes.
+SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) {
+  EVT VT = V.getValueType();
+  APInt DemandedElts = VT.isVector()
+                           ? APInt::getAllOnesValue(VT.getVectorNumElements())
+                           : APInt(1, 1);
+  return GetDemandedBits(V, DemandedBits, DemandedElts);
+}
+
+/// See if the specified operand can be simplified with the knowledge that only
+/// the bits specified by DemandedBits are used in the elements specified by
+/// DemandedElts.
+/// TODO: really we should be making this into the DAG equivalent of
+/// SimplifyMultipleUseDemandedBits and not generate any new nodes.
+SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
+                                      const APInt &DemandedElts) {
   switch (V.getOpcode()) {
   default:
     break;
   case ISD::Constant: {
-    const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
+    auto *CV = cast<ConstantSDNode>(V.getNode());
     assert(CV && "Const value should be ConstSDNode.");
     const APInt &CVal = CV->getAPIntValue();
-    APInt NewVal = CVal & Mask;
+    APInt NewVal = CVal & DemandedBits;
     if (NewVal != CVal)
       return getConstant(NewVal, SDLoc(V), V.getValueType());
     break;
@@ -2079,44 +2155,51 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {
   case ISD::OR:
   case ISD::XOR:
     // If the LHS or RHS don't contribute bits to the or, drop them.
-    if (MaskedValueIsZero(V.getOperand(0), Mask))
+    if (MaskedValueIsZero(V.getOperand(0), DemandedBits))
       return V.getOperand(1);
-    if (MaskedValueIsZero(V.getOperand(1), Mask))
+    if (MaskedValueIsZero(V.getOperand(1), DemandedBits))
       return V.getOperand(0);
     break;
   case ISD::SRL:
     // Only look at single-use SRLs.
     if (!V.getNode()->hasOneUse())
       break;
-    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+    if (auto *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
       // See if we can recursively simplify the LHS.
       unsigned Amt = RHSC->getZExtValue();
 
       // Watch out for shift count overflow though.
-      if (Amt >= Mask.getBitWidth())
+      if (Amt >= DemandedBits.getBitWidth())
         break;
-      APInt NewMask = Mask << Amt;
-      if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
+      APInt SrcDemandedBits = DemandedBits << Amt;
+      if (SDValue SimplifyLHS =
+              GetDemandedBits(V.getOperand(0), SrcDemandedBits))
         return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS,
                        V.getOperand(1));
     }
     break;
   case ISD::AND: {
     // X & -1 -> X (ignoring bits which aren't demanded).
-    ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1));
-    if (AndVal && Mask.isSubsetOf(AndVal->getAPIntValue()))
-      return V.getOperand(0);
+    // Also handle the case where masked out bits in X are known to be zero.
+    if (ConstantSDNode *RHSC = isConstOrConstSplat(V.getOperand(1))) {
+      const APInt &AndVal = RHSC->getAPIntValue();
+      if (DemandedBits.isSubsetOf(AndVal) ||
+          DemandedBits.isSubsetOf(computeKnownBits(V.getOperand(0)).Zero |
+                                  AndVal))
+        return V.getOperand(0);
+    }
     break;
   }
   case ISD::ANY_EXTEND: {
     SDValue Src = V.getOperand(0);
     unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
     // Being conservative here - only peek through if we only demand bits in the
-    // non-extended source (even though the extended bits are technically undef).
-    if (Mask.getActiveBits() > SrcBitWidth)
+    // non-extended source (even though the extended bits are technically
+    // undef).
+    if (DemandedBits.getActiveBits() > SrcBitWidth)
       break;
-    APInt SrcMask = Mask.trunc(SrcBitWidth);
-    if (SDValue DemandedSrc = GetDemandedBits(Src, SrcMask))
+    APInt SrcDemandedBits = DemandedBits.trunc(SrcBitWidth);
+    if (SDValue DemandedSrc = GetDemandedBits(Src, SrcDemandedBits))
       return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc);
     break;
   }
@@ -2125,7 +2208,7 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {
     unsigned ExVTBits = ExVT.getScalarSizeInBits();
 
     // If none of the extended bits are demanded, eliminate the sextinreg.
-    if (Mask.getActiveBits() <= ExVTBits)
+    if (DemandedBits.getActiveBits() <= ExVTBits)
       return V.getOperand(0);
 
     break;
@@ -2143,9 +2226,28 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
 /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use
 /// this predicate to simplify operations downstream.  Mask is known to be zero
 /// for bits that V cannot have.
-bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
+bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask,
+                                     unsigned Depth) const {
+  EVT VT = V.getValueType();
+  APInt DemandedElts = VT.isVector()
+                           ? APInt::getAllOnesValue(VT.getVectorNumElements())
+                           : APInt(1, 1);
+  return MaskedValueIsZero(V, Mask, DemandedElts, Depth);
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero in
+/// DemandedElts.  We use this predicate to simplify operations downstream.
+/// Mask is known to be zero for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask,
+                                     const APInt &DemandedElts,
                                      unsigned Depth) const {
-  return Mask.isSubsetOf(computeKnownBits(Op, Depth).Zero);
+  return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero);
+}
+
+/// MaskedValueIsAllOnes - Return true if '(Op & Mask) == Mask'.
+bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask,
+                                        unsigned Depth) const {
+  return Mask.isSubsetOf(computeKnownBits(V, Depth).One);
 }
 
 /// isSplatValue - Return true if the vector V has the same value
@@ -2244,28 +2346,50 @@ bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) {
          (AllowUndefs || !UndefElts);
 }
 
-/// Helper function that checks to see if a node is a constant or a
-/// build vector of splat constants at least within the demanded elts.
-static ConstantSDNode *isConstOrDemandedConstSplat(SDValue N,
-                                                   const APInt &DemandedElts) {
-  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
-    return CN;
-  if (N.getOpcode() != ISD::BUILD_VECTOR)
-    return nullptr;
-  EVT VT = N.getValueType();
-  ConstantSDNode *Cst = nullptr;
-  unsigned NumElts = VT.getVectorNumElements();
-  assert(DemandedElts.getBitWidth() == NumElts && "Unexpected vector size");
-  for (unsigned i = 0; i != NumElts; ++i) {
-    if (!DemandedElts[i])
-      continue;
-    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(i));
-    if (!C || (Cst && Cst->getAPIntValue() != C->getAPIntValue()) ||
-        C->getValueType(0) != VT.getScalarType())
-      return nullptr;
-    Cst = C;
+SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {
+  V = peekThroughExtractSubvectors(V);
+
+  EVT VT = V.getValueType();
+  unsigned Opcode = V.getOpcode();
+  switch (Opcode) {
+  default: {
+    APInt UndefElts;
+    APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+    if (isSplatValue(V, DemandedElts, UndefElts)) {
+      // Handle case where all demanded elements are UNDEF.
+      if (DemandedElts.isSubsetOf(UndefElts)) {
+        SplatIdx = 0;
+        return getUNDEF(VT);
+      }
+      SplatIdx = (UndefElts & DemandedElts).countTrailingOnes();
+      return V;
+    }
+    break;
+  }
+  case ISD::VECTOR_SHUFFLE: {
+    // Check if this is a shuffle node doing a splat.
+    // TODO - remove this and rely purely on SelectionDAG::isSplatValue,
+    // getTargetVShiftNode currently struggles without the splat source.
+    auto *SVN = cast<ShuffleVectorSDNode>(V);
+    if (!SVN->isSplat())
+      break;
+    int Idx = SVN->getSplatIndex();
+    int NumElts = V.getValueType().getVectorNumElements();
+    SplatIdx = Idx % NumElts;
+    return V.getOperand(Idx / NumElts);
   }
-  return Cst;
+  }
+
+  return SDValue();
+}
+
+SDValue SelectionDAG::getSplatValue(SDValue V) {
+  int SplatIdx;
+  if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx))
+    return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V),
+                   SrcVector.getValueType().getScalarType(), SrcVector,
+                   getIntPtrConstant(SplatIdx, SDLoc(V)));
+  return SDValue();
 }
 
 /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
@@ -2708,8 +2832,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
     break;
   case ISD::FSHL:
   case ISD::FSHR:
-    if (ConstantSDNode *C =
-            isConstOrDemandedConstSplat(Op.getOperand(2), DemandedElts)) {
+    if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(2), DemandedElts)) {
       unsigned Amt = C->getAPIntValue().urem(BitWidth);
 
       // For fshl, 0-shift returns the 1st arg.
@@ -2801,8 +2924,59 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
   }
   case ISD::LOAD: {
     LoadSDNode *LD = cast<LoadSDNode>(Op);
-    // If this is a ZEXTLoad and we are looking at the loaded value.
-    if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
+    const Constant *Cst = TLI->getTargetConstantFromLoad(LD);
+    if (ISD::isNON_EXTLoad(LD) && Cst) {
+      // Determine any common known bits from the loaded constant pool value.
+      Type *CstTy = Cst->getType();
+      if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits()) {
+        // If its a vector splat, then we can (quickly) reuse the scalar path.
+        // NOTE: We assume all elements match and none are UNDEF.
+        if (CstTy->isVectorTy()) {
+          if (const Constant *Splat = Cst->getSplatValue()) {
+            Cst = Splat;
+            CstTy = Cst->getType();
+          }
+        }
+        // TODO - do we need to handle different bitwidths?
+        if (CstTy->isVectorTy() && BitWidth == CstTy->getScalarSizeInBits()) {
+          // Iterate across all vector elements finding common known bits.
+          Known.One.setAllBits();
+          Known.Zero.setAllBits();
+          for (unsigned i = 0; i != NumElts; ++i) {
+            if (!DemandedElts[i])
+              continue;
+            if (Constant *Elt = Cst->getAggregateElement(i)) {
+              if (auto *CInt = dyn_cast<ConstantInt>(Elt)) {
+                const APInt &Value = CInt->getValue();
+                Known.One &= Value;
+                Known.Zero &= ~Value;
+                continue;
+              }
+              if (auto *CFP = dyn_cast<ConstantFP>(Elt)) {
+                APInt Value = CFP->getValueAPF().bitcastToAPInt();
+                Known.One &= Value;
+                Known.Zero &= ~Value;
+                continue;
+              }
+            }
+            Known.One.clearAllBits();
+            Known.Zero.clearAllBits();
+            break;
+          }
+        } else if (BitWidth == CstTy->getPrimitiveSizeInBits()) {
+          if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
+            const APInt &Value = CInt->getValue();
+            Known.One = Value;
+            Known.Zero = ~Value;
+          } else if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
+            APInt Value = CFP->getValueAPF().bitcastToAPInt();
+            Known.One = Value;
+            Known.Zero = ~Value;
+          }
+        }
+      }
+    } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
+      // If this is a ZEXTLoad and we are looking at the loaded value.
       EVT VT = LD->getMemoryVT();
       unsigned MemBits = VT.getScalarSizeInBits();
       Known.Zero.setBitsFrom(MemBits);
@@ -2816,15 +2990,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
     EVT InVT = Op.getOperand(0).getValueType();
     APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
     Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
-    Known = Known.zext(BitWidth);
-    Known.Zero.setBitsFrom(InVT.getScalarSizeInBits());
+    Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
     break;
   }
   case ISD::ZERO_EXTEND: {
-    EVT InVT = Op.getOperand(0).getValueType();
     Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-    Known = Known.zext(BitWidth);
-    Known.Zero.setBitsFrom(InVT.getScalarSizeInBits());
+    Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
     break;
   }
   case ISD::SIGN_EXTEND_VECTOR_INREG: {
@@ -2845,7 +3016,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
   }
   case ISD::ANY_EXTEND: {
     Known = computeKnownBits(Op.getOperand(0), Depth+1);
-    Known = Known.zext(BitWidth);
+    Known = Known.zext(BitWidth, false /* ExtendedBitsAreKnownZero */);
     break;
   }
   case ISD::TRUNCATE: {
@@ -2878,39 +3049,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
     LLVM_FALLTHROUGH;
   case ISD::SUB:
   case ISD::SUBC: {
-    if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) {
-      // We know that the top bits of C-X are clear if X contains less bits
-      // than C (i.e. no wrap-around can happen).  For example, 20-X is
-      // positive if we can prove that X is >= 0 and < 16.
-      if (CLHS->getAPIntValue().isNonNegative()) {
-        unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
-        // NLZ can't be BitWidth with no sign bit
-        APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
-        Known2 = computeKnownBits(Op.getOperand(1), DemandedElts,
-                         Depth + 1);
-
-        // If all of the MaskV bits are known to be zero, then we know the
-        // output top bits are zero, because we now know that the output is
-        // from [0-C].
-        if ((Known2.Zero & MaskV) == MaskV) {
-          unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
-          // Top bits known zero.
-          Known.Zero.setHighBits(NLZ2);
-        }
-      }
-    }
-
-    // If low bits are know to be zero in both operands, then we know they are
-    // going to be 0 in the result. Both addition and complement operations
-    // preserve the low zero bits.
-    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-    unsigned KnownZeroLow = Known2.countMinTrailingZeros();
-    if (KnownZeroLow == 0)
-      break;
-
+    Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
-    KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
-    Known.Zero.setLowBits(KnownZeroLow);
+    Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false,
+                                        Known, Known2);
     break;
   }
   case ISD::UADDO:
@@ -2928,34 +3070,26 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
   case ISD::ADD:
   case ISD::ADDC:
   case ISD::ADDE: {
-    // Output known-0 bits are known if clear or set in both the low clear bits
-    // common to both LHS & RHS.  For example, 8+(X<<3) is known to have the
-    // low 3 bits clear.
-    // Output known-0 bits are also known if the top bits of each input are
-    // known to be clear. For example, if one input has the top 10 bits clear
-    // and the other has the top 8 bits clear, we know the top 7 bits of the
-    // output must be clear.
-    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-    unsigned KnownZeroHigh = Known2.countMinLeadingZeros();
-    unsigned KnownZeroLow = Known2.countMinTrailingZeros();
+    assert(Op.getResNo() == 0 && "We only compute knownbits for the sum here.");
+
+    // With ADDE and ADDCARRY, a carry bit may be added in.
+    KnownBits Carry(1);
+    if (Opcode == ISD::ADDE)
+      // Can't track carry from glue, set carry to unknown.
+      Carry.resetAll();
+    else if (Opcode == ISD::ADDCARRY)
+      // TODO: Compute known bits for the carry operand. Not sure if it is worth
+      // the trouble (how often will we find a known carry bit). And I haven't
+      // tested this very much yet, but something like this might work:
+      //   Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+      //   Carry = Carry.zextOrTrunc(1, false);
+      Carry.resetAll();
+    else
+      Carry.setAllZero();
 
+    Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
-    KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros());
-    KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
-
-    if (Opcode == ISD::ADDE || Opcode == ISD::ADDCARRY) {
-      // With ADDE and ADDCARRY, a carry bit may be added in, so we can only
-      // use this information if we know (at least) that the low two bits are
-      // clear. We then return to the caller that the low bit is unknown but
-      // that other bits are known zero.
-      if (KnownZeroLow >= 2)
-        Known.Zero.setBits(1, KnownZeroLow);
-      break;
-    }
-
-    Known.Zero.setLowBits(KnownZeroLow);
-    if (KnownZeroHigh > 1)
-      Known.Zero.setHighBits(KnownZeroHigh - 1);
+    Known = KnownBits::computeForAddCarry(Known, Known2, Carry);
     break;
   }
   case ISD::SREM:
@@ -3010,21 +3144,20 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
   case ISD::EXTRACT_ELEMENT: {
     Known = computeKnownBits(Op.getOperand(0), Depth+1);
     const unsigned Index = Op.getConstantOperandVal(1);
-    const unsigned BitWidth = Op.getValueSizeInBits();
+    const unsigned EltBitWidth = Op.getValueSizeInBits();
 
     // Remove low part of known bits mask
-    Known.Zero = Known.Zero.getHiBits(Known.Zero.getBitWidth() - Index * BitWidth);
-    Known.One = Known.One.getHiBits(Known.One.getBitWidth() - Index * BitWidth);
+    Known.Zero = Known.Zero.getHiBits(Known.getBitWidth() - Index * EltBitWidth);
+    Known.One = Known.One.getHiBits(Known.getBitWidth() - Index * EltBitWidth);
 
     // Remove high part of known bit mask
-    Known = Known.trunc(BitWidth);
+    Known = Known.trunc(EltBitWidth);
     break;
   }
   case ISD::EXTRACT_VECTOR_ELT: {
     SDValue InVec = Op.getOperand(0);
     SDValue EltNo = Op.getOperand(1);
     EVT VecVT = InVec.getValueType();
-    const unsigned BitWidth = Op.getValueSizeInBits();
     const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
     const unsigned NumSrcElts = VecVT.getVectorNumElements();
     // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
@@ -3042,7 +3175,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
       Known = computeKnownBits(InVec, Depth + 1);
     }
     if (BitWidth > EltBitWidth)
-      Known = Known.zext(BitWidth);
+      Known = Known.zext(BitWidth, false /* => any extend */);
     break;
   }
   case ISD::INSERT_VECTOR_ELT: {
@@ -3146,10 +3279,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
     // the minimum of the clamp min/max range.
     bool IsMax = (Opcode == ISD::SMAX);
     ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr;
-    if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)))
+    if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts)))
       if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX))
-        CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1),
-                                              DemandedElts);
+        CstHigh =
+            isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts);
     if (CstLow && CstHigh) {
       if (!IsMax)
         std::swap(CstLow, CstHigh);
@@ -3430,7 +3563,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
     // SRA X, C   -> adds C sign bits.
     if (ConstantSDNode *C =
-            isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) {
+            isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {
       APInt ShiftVal = C->getAPIntValue();
       ShiftVal += Tmp;
       Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue();
@@ -3438,7 +3571,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     return Tmp;
   case ISD::SHL:
     if (ConstantSDNode *C =
-            isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) {
+            isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {
       // shl destroys sign bits.
       Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
       if (C->getAPIntValue().uge(VTBits) ||      // Bad shift.
@@ -3478,10 +3611,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     // the minimum of the clamp min/max range.
     bool IsMax = (Opcode == ISD::SMAX);
     ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr;
-    if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)))
+    if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts)))
       if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX))
-        CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1),
-                                              DemandedElts);
+        CstHigh =
+            isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts);
     if (CstLow && CstHigh) {
       if (!IsMax)
         std::swap(CstLow, CstHigh);
@@ -3621,7 +3754,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     SDValue InVec = Op.getOperand(0);
     SDValue InVal = Op.getOperand(1);
     SDValue EltNo = Op.getOperand(2);
-    unsigned NumElts = InVec.getValueType().getVectorNumElements();
 
     ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
     if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
@@ -3752,13 +3884,43 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
       unsigned ExtType = LD->getExtensionType();
       switch (ExtType) {
-        default: break;
-        case ISD::SEXTLOAD:    // '17' bits known
-          Tmp = LD->getMemoryVT().getScalarSizeInBits();
-          return VTBits-Tmp+1;
-        case ISD::ZEXTLOAD:    // '16' bits known
-          Tmp = LD->getMemoryVT().getScalarSizeInBits();
-          return VTBits-Tmp;
+      default: break;
+      case ISD::SEXTLOAD: // e.g. i16->i32 = '17' bits known.
+        Tmp = LD->getMemoryVT().getScalarSizeInBits();
+        return VTBits - Tmp + 1;
+      case ISD::ZEXTLOAD: // e.g. i16->i32 = '16' bits known.
+        Tmp = LD->getMemoryVT().getScalarSizeInBits();
+        return VTBits - Tmp;
+      case ISD::NON_EXTLOAD:
+        if (const Constant *Cst = TLI->getTargetConstantFromLoad(LD)) {
+          // We only need to handle vectors - computeKnownBits should handle
+          // scalar cases.
+          Type *CstTy = Cst->getType();
+          if (CstTy->isVectorTy() &&
+              (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits()) {
+            Tmp = VTBits;
+            for (unsigned i = 0; i != NumElts; ++i) {
+              if (!DemandedElts[i])
+                continue;
+              if (Constant *Elt = Cst->getAggregateElement(i)) {
+                if (auto *CInt = dyn_cast<ConstantInt>(Elt)) {
+                  const APInt &Value = CInt->getValue();
+                  Tmp = std::min(Tmp, Value.getNumSignBits());
+                  continue;
+                }
+                if (auto *CFP = dyn_cast<ConstantFP>(Elt)) {
+                  APInt Value = CFP->getValueAPF().bitcastToAPInt();
+                  Tmp = std::min(Tmp, Value.getNumSignBits());
+                  continue;
+                }
+              }
+              // Unknown type. Conservatively assume no bits match sign bit.
+              return 1;
+            }
+            return Tmp;
+          }
+        }
+        break;
       }
     }
   }
@@ -3803,8 +3965,7 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
     return false;
 
   if (Op.getOpcode() == ISD::OR &&
-      !MaskedValueIsZero(Op.getOperand(0),
-                     cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue()))
+      !MaskedValueIsZero(Op.getOperand(0), Op.getConstantOperandAPInt(1)))
     return false;
 
   return true;
@@ -4013,7 +4174,9 @@ static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT,
   return SDValue();
 }
 
-static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
+/// Try to simplify vector concatenation to an input value, undef, or build
+/// vector.
+static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
                                   ArrayRef<SDValue> Ops,
                                   SelectionDAG &DAG) {
   assert(!Ops.empty() && "Can't concatenate an empty list of vectors!");
@@ -4033,6 +4196,31 @@ static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
   if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
     return DAG.getUNDEF(VT);
 
+  // Scan the operands and look for extract operations from a single source
+  // that correspond to insertion at the same location via this concatenation:
+  // concat (extract X, 0*subvec_elts), (extract X, 1*subvec_elts), ...
+  SDValue IdentitySrc;
+  bool IsIdentity = true;
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    SDValue Op = Ops[i];
+    unsigned IdentityIndex = i * Op.getValueType().getVectorNumElements();
+    if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+        Op.getOperand(0).getValueType() != VT ||
+        (IdentitySrc && Op.getOperand(0) != IdentitySrc) ||
+        !isa<ConstantSDNode>(Op.getOperand(1)) ||
+        Op.getConstantOperandVal(1) != IdentityIndex) {
+      IsIdentity = false;
+      break;
+    }
+    assert((!IdentitySrc || IdentitySrc == Op.getOperand(0)) &&
+           "Unexpected identity source vector for concat of extracts");
+    IdentitySrc = Op.getOperand(0);
+  }
+  if (IsIdentity) {
+    assert(IdentitySrc && "Failed to set source vector of extracts");
+    return IdentitySrc;
+  }
+
   // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be
   // simplified to one big BUILD_VECTOR.
   // FIXME: Add support for SCALAR_TO_VECTOR as well.
@@ -4288,9 +4476,23 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     if (Operand.isUndef())
       return getUNDEF(VT);
     break;
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+    if (Operand.isUndef())
+      return getUNDEF(VT);
+    break;
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+    // [us]itofp(undef) = 0, because the result value is bounded.
+    if (Operand.isUndef())
+      return getConstantFP(0.0, DL, VT);
+    break;
   case ISD::SIGN_EXTEND:
     assert(VT.isInteger() && Operand.getValueType().isInteger() &&
            "Invalid SIGN_EXTEND!");
+    assert(VT.isVector() == Operand.getValueType().isVector() &&
+           "SIGN_EXTEND result type type should be vector iff the operand "
+           "type is vector!");
     if (Operand.getValueType() == VT) return Operand;   // noop extension
     assert((!VT.isVector() ||
             VT.getVectorNumElements() ==
@@ -4307,6 +4509,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
   case ISD::ZERO_EXTEND:
     assert(VT.isInteger() && Operand.getValueType().isInteger() &&
            "Invalid ZERO_EXTEND!");
+    assert(VT.isVector() == Operand.getValueType().isVector() &&
+           "ZERO_EXTEND result type type should be vector iff the operand "
+           "type is vector!");
     if (Operand.getValueType() == VT) return Operand;   // noop extension
     assert((!VT.isVector() ||
             VT.getVectorNumElements() ==
@@ -4323,6 +4528,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
   case ISD::ANY_EXTEND:
     assert(VT.isInteger() && Operand.getValueType().isInteger() &&
            "Invalid ANY_EXTEND!");
+    assert(VT.isVector() == Operand.getValueType().isVector() &&
+           "ANY_EXTEND result type type should be vector iff the operand "
+           "type is vector!");
     if (Operand.getValueType() == VT) return Operand;   // noop extension
     assert((!VT.isVector() ||
             VT.getVectorNumElements() ==
@@ -4350,6 +4558,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
   case ISD::TRUNCATE:
     assert(VT.isInteger() && Operand.getValueType().isInteger() &&
            "Invalid TRUNCATE!");
+    assert(VT.isVector() == Operand.getValueType().isVector() &&
+           "TRUNCATE result type type should be vector iff the operand "
+           "type is vector!");
     if (Operand.getValueType() == VT) return Operand;   // noop truncate
     assert((!VT.isVector() ||
             VT.getVectorNumElements() ==
@@ -4429,6 +4640,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
       return Operand.getOperand(0);
     break;
   case ISD::FNEG:
+    // Negation of an unknown bag of bits is still completely undefined.
+    if (OpOpcode == ISD::UNDEF)
+      return getUNDEF(VT);
+
     // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
     if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) &&
         OpOpcode == ISD::FSUB)
@@ -4513,13 +4728,13 @@ static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
 }
 
 SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
-                                             EVT VT, const ConstantSDNode *Cst1,
-                                             const ConstantSDNode *Cst2) {
-  if (Cst1->isOpaque() || Cst2->isOpaque())
+                                             EVT VT, const ConstantSDNode *C1,
+                                             const ConstantSDNode *C2) {
+  if (C1->isOpaque() || C2->isOpaque())
     return SDValue();
 
-  std::pair<APInt, bool> Folded = FoldValue(Opcode, Cst1->getAPIntValue(),
-                                            Cst2->getAPIntValue());
+  std::pair<APInt, bool> Folded = FoldValue(Opcode, C1->getAPIntValue(),
+                                            C2->getAPIntValue());
   if (!Folded.second)
     return SDValue();
   return getConstant(Folded.first, DL, VT);
@@ -4532,16 +4747,16 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
     return SDValue();
   if (!TLI->isOffsetFoldingLegal(GA))
     return SDValue();
-  const ConstantSDNode *Cst2 = dyn_cast<ConstantSDNode>(N2);
-  if (!Cst2)
+  auto *C2 = dyn_cast<ConstantSDNode>(N2);
+  if (!C2)
     return SDValue();
-  int64_t Offset = Cst2->getSExtValue();
+  int64_t Offset = C2->getSExtValue();
   switch (Opcode) {
   case ISD::ADD: break;
   case ISD::SUB: Offset = -uint64_t(Offset); break;
   default: return SDValue();
   }
-  return getGlobalAddress(GA->getGlobal(), SDLoc(Cst2), VT,
+  return getGlobalAddress(GA->getGlobal(), SDLoc(C2), VT,
                           GA->getOffset() + uint64_t(Offset));
 }
 
@@ -4571,21 +4786,20 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
 }
 
 SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
-                                             EVT VT, SDNode *Cst1,
-                                             SDNode *Cst2) {
+                                             EVT VT, SDNode *N1, SDNode *N2) {
   // If the opcode is a target-specific ISD node, there's nothing we can
   // do here and the operand rules may not line up with the below, so
   // bail early.
   if (Opcode >= ISD::BUILTIN_OP_END)
     return SDValue();
 
-  if (isUndef(Opcode, {SDValue(Cst1, 0), SDValue(Cst2, 0)}))
+  if (isUndef(Opcode, {SDValue(N1, 0), SDValue(N2, 0)}))
     return getUNDEF(VT);
 
   // Handle the case of two scalars.
-  if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) {
-    if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) {
-      SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2);
+  if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) {
+    if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) {
+      SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, C1, C2);
       assert((!Folded || !VT.isVector()) &&
              "Can't fold vectors ops with scalar operands");
       return Folded;
@@ -4593,19 +4807,19 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
   }
 
   // fold (add Sym, c) -> Sym+c
-  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst1))
-    return FoldSymbolOffset(Opcode, VT, GA, Cst2);
+  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N1))
+    return FoldSymbolOffset(Opcode, VT, GA, N2);
   if (TLI->isCommutativeBinOp(Opcode))
-    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst2))
-      return FoldSymbolOffset(Opcode, VT, GA, Cst1);
+    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2))
+      return FoldSymbolOffset(Opcode, VT, GA, N1);
 
   // For vectors, extract each constant element and fold them individually.
   // Either input may be an undef value.
-  auto *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
-  if (!BV1 && !Cst1->isUndef())
+  auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+  if (!BV1 && !N1->isUndef())
     return SDValue();
-  auto *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
-  if (!BV2 && !Cst2->isUndef())
+  auto *BV2 = dyn_cast<BuildVectorSDNode>(N2);
+  if (!BV2 && !N2->isUndef())
     return SDValue();
   // If both operands are undef, that's handled the same way as scalars.
   if (!BV1 && !BV2)
@@ -4755,6 +4969,64 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
   return V;
 }
 
+SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
+                                         EVT VT, SDValue N1, SDValue N2) {
+  // TODO: We don't do any constant folding for strict FP opcodes here, but we
+  //       should. That will require dealing with a potentially non-default
+  //       rounding mode, checking the "opStatus" return value from the APFloat
+  //       math calculations, and possibly other variations.
+  auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
+  auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
+  if (N1CFP && N2CFP) {
+    APFloat C1 = N1CFP->getValueAPF(), C2 = N2CFP->getValueAPF();
+    switch (Opcode) {
+    case ISD::FADD:
+      C1.add(C2, APFloat::rmNearestTiesToEven);
+      return getConstantFP(C1, DL, VT);
+    case ISD::FSUB:
+      C1.subtract(C2, APFloat::rmNearestTiesToEven);
+      return getConstantFP(C1, DL, VT);
+    case ISD::FMUL:
+      C1.multiply(C2, APFloat::rmNearestTiesToEven);
+      return getConstantFP(C1, DL, VT);
+    case ISD::FDIV:
+      C1.divide(C2, APFloat::rmNearestTiesToEven);
+      return getConstantFP(C1, DL, VT);
+    case ISD::FREM:
+      C1.mod(C2);
+      return getConstantFP(C1, DL, VT);
+    case ISD::FCOPYSIGN:
+      C1.copySign(C2);
+      return getConstantFP(C1, DL, VT);
+    default: break;
+    }
+  }
+  if (N1CFP && Opcode == ISD::FP_ROUND) {
+    APFloat C1 = N1CFP->getValueAPF();    // make copy
+    bool Unused;
+    // This can return overflow, underflow, or inexact; we don't care.
+    // FIXME need to be more flexible about rounding mode.
+    (void) C1.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+                      &Unused);
+    return getConstantFP(C1, DL, VT);
+  }
+
+  switch (Opcode) {
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::FDIV:
+  case ISD::FREM:
+    // If both operands are undef, the result is undef. If 1 operand is undef,
+    // the result is NaN. This should match the behavior of the IR optimizer.
+    if (N1.isUndef() && N2.isUndef())
+      return getUNDEF(VT);
+    if (N1.isUndef() || N2.isUndef())
+      return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT);
+  }
+  return SDValue();
+}
+
 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
                               SDValue N1, SDValue N2, const SDNodeFlags Flags) {
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
@@ -4791,9 +5063,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     break;
   }
   case ISD::CONCAT_VECTORS: {
-    // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
     SDValue Ops[] = {N1, N2};
-    if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+    if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))
       return V;
     break;
   }
@@ -4847,6 +5118,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
     assert(N1.getValueType() == N2.getValueType() &&
            N1.getValueType() == VT && "Binary operator types must match!");
+    if (SDValue V = simplifyFPBinop(Opcode, N1, N2))
+      return V;
     break;
   case ISD::FCOPYSIGN:   // N1 and result must match.  N1/N2 need not match.
     assert(N1.getValueType() == VT &&
@@ -5100,73 +5373,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
           FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode()))
     return SV;
 
-  // Constant fold FP operations.
-  bool HasFPExceptions = TLI->hasFloatingPointExceptions();
-  if (N1CFP) {
-    if (N2CFP) {
-      APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
-      APFloat::opStatus s;
-      switch (Opcode) {
-      case ISD::FADD:
-        s = V1.add(V2, APFloat::rmNearestTiesToEven);
-        if (!HasFPExceptions || s != APFloat::opInvalidOp)
-          return getConstantFP(V1, DL, VT);
-        break;
-      case ISD::FSUB:
-        s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
-        if (!HasFPExceptions || s!=APFloat::opInvalidOp)
-          return getConstantFP(V1, DL, VT);
-        break;
-      case ISD::FMUL:
-        s = V1.multiply(V2, APFloat::rmNearestTiesToEven);
-        if (!HasFPExceptions || s!=APFloat::opInvalidOp)
-          return getConstantFP(V1, DL, VT);
-        break;
-      case ISD::FDIV:
-        s = V1.divide(V2, APFloat::rmNearestTiesToEven);
-        if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
-                                 s!=APFloat::opDivByZero)) {
-          return getConstantFP(V1, DL, VT);
-        }
-        break;
-      case ISD::FREM :
-        s = V1.mod(V2);
-        if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
-                                 s!=APFloat::opDivByZero)) {
-          return getConstantFP(V1, DL, VT);
-        }
-        break;
-      case ISD::FCOPYSIGN:
-        V1.copySign(V2);
-        return getConstantFP(V1, DL, VT);
-      default: break;
-      }
-    }
-
-    if (Opcode == ISD::FP_ROUND) {
-      APFloat V = N1CFP->getValueAPF();    // make copy
-      bool ignored;
-      // This can return overflow, underflow, or inexact; we don't care.
-      // FIXME need to be more flexible about rounding mode.
-      (void)V.convert(EVTToAPFloatSemantics(VT),
-                      APFloat::rmNearestTiesToEven, &ignored);
-      return getConstantFP(V, DL, VT);
-    }
-  }
-
-  switch (Opcode) {
-  case ISD::FADD:
-  case ISD::FSUB:
-  case ISD::FMUL:
-  case ISD::FDIV:
-  case ISD::FREM:
-    // If both operands are undef, the result is undef. If 1 operand is undef,
-    // the result is NaN. This should match the behavior of the IR optimizer.
-    if (N1.isUndef() && N2.isUndef())
-      return getUNDEF(VT);
-    if (N1.isUndef() || N2.isUndef())
-      return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT);
-  }
+  if (SDValue V = foldConstantFPMath(Opcode, DL, VT, N1, N2))
+    return V;
 
   // Canonicalize an UNDEF to the RHS, even over a constant.
   if (N1.isUndef()) {
@@ -5261,10 +5469,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
       APFloat  V1 = N1CFP->getValueAPF();
       const APFloat &V2 = N2CFP->getValueAPF();
       const APFloat &V3 = N3CFP->getValueAPF();
-      APFloat::opStatus s =
-        V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
-      if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp)
-        return getConstantFP(V1, DL, VT);
+      V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
+      return getConstantFP(V1, DL, VT);
     }
     break;
   }
@@ -5276,9 +5482,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     break;
   }
   case ISD::CONCAT_VECTORS: {
-    // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
     SDValue Ops[] = {N1, N2, N3};
-    if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+    if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))
       return V;
     break;
   }
@@ -5317,6 +5522,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     break;
   }
   case ISD::INSERT_SUBVECTOR: {
+    // Inserting undef into undef is still undef.
+    if (N1.isUndef() && N2.isUndef())
+      return getUNDEF(VT);
     SDValue Index = N3;
     if (VT.isSimple() && N1.getValueType().isSimple()
         && N2.getValueType().isSimple()) {
@@ -5337,6 +5545,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
       // Trivial insertion.
       if (VT.getSimpleVT() == N2.getSimpleValueType())
         return N2;
+
+      // If this is an insert of an extracted vector into an undef vector, we
+      // can just use the input to the extract.
+      if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+          N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT)
+        return N2.getOperand(0);
     }
     break;
   }
@@ -5521,116 +5735,12 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) {
                                   SrcDelta + G->getOffset());
 }
 
-/// Determines the optimal series of memory ops to replace the memset / memcpy.
-/// Return true if the number of memory ops is below the threshold (Limit).
-/// It returns the types of the sequence of memory ops to perform
-/// memset / memcpy by reference.
-static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
-                                     unsigned Limit, uint64_t Size,
-                                     unsigned DstAlign, unsigned SrcAlign,
-                                     bool IsMemset,
-                                     bool ZeroMemset,
-                                     bool MemcpyStrSrc,
-                                     bool AllowOverlap,
-                                     unsigned DstAS, unsigned SrcAS,
-                                     SelectionDAG &DAG,
-                                     const TargetLowering &TLI) {
-  assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
-         "Expecting memcpy / memset source to meet alignment requirement!");
-  // If 'SrcAlign' is zero, that means the memory operation does not need to
-  // load the value, i.e. memset or memcpy from constant string. Otherwise,
-  // it's the inferred alignment of the source. 'DstAlign', on the other hand,
-  // is the specified alignment of the memory operation. If it is zero, that
-  // means it's possible to change the alignment of the destination.
-  // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
-  // not need to be loaded.
-  EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
-                                   IsMemset, ZeroMemset, MemcpyStrSrc,
-                                   DAG.getMachineFunction());
-
-  if (VT == MVT::Other) {
-    // Use the largest integer type whose alignment constraints are satisfied.
-    // We only need to check DstAlign here as SrcAlign is always greater or
-    // equal to DstAlign (or zero).
-    VT = MVT::i64;
-    while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
-           !TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
-      VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
-    assert(VT.isInteger());
-
-    // Find the largest legal integer type.
-    MVT LVT = MVT::i64;
-    while (!TLI.isTypeLegal(LVT))
-      LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
-    assert(LVT.isInteger());
-
-    // If the type we've chosen is larger than the largest legal integer type
-    // then use that instead.
-    if (VT.bitsGT(LVT))
-      VT = LVT;
-  }
-
-  unsigned NumMemOps = 0;
-  while (Size != 0) {
-    unsigned VTSize = VT.getSizeInBits() / 8;
-    while (VTSize > Size) {
-      // For now, only use non-vector load / store's for the left-over pieces.
-      EVT NewVT = VT;
-      unsigned NewVTSize;
-
-      bool Found = false;
-      if (VT.isVector() || VT.isFloatingPoint()) {
-        NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
-        if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) &&
-            TLI.isSafeMemOpType(NewVT.getSimpleVT()))
-          Found = true;
-        else if (NewVT == MVT::i64 &&
-                 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
-                 TLI.isSafeMemOpType(MVT::f64)) {
-          // i64 is usually not legal on 32-bit targets, but f64 may be.
-          NewVT = MVT::f64;
-          Found = true;
-        }
-      }
-
-      if (!Found) {
-        do {
-          NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
-          if (NewVT == MVT::i8)
-            break;
-        } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT()));
-      }
-      NewVTSize = NewVT.getSizeInBits() / 8;
-
-      // If the new VT cannot cover all of the remaining bits, then consider
-      // issuing a (or a pair of) unaligned and overlapping load / store.
-      bool Fast;
-      if (NumMemOps && AllowOverlap && NewVTSize < Size &&
-          TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) &&
-          Fast)
-        VTSize = Size;
-      else {
-        VT = NewVT;
-        VTSize = NewVTSize;
-      }
-    }
-
-    if (++NumMemOps > Limit)
-      return false;
-
-    MemOps.push_back(VT);
-    Size -= VTSize;
-  }
-
-  return true;
-}
-
 static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
   // On Darwin, -Os means optimize for size without hurting performance, so
   // only really optimize for size when -Oz (MinSize) is used.
   if (MF.getTarget().getTargetTriple().isOSDarwin())
-    return MF.getFunction().optForMinSize();
-  return MF.getFunction().optForSize();
+    return MF.getFunction().hasMinSize();
+  return MF.getFunction().hasOptSize();
 }
 
 static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
@@ -5665,6 +5775,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
                                        MachinePointerInfo DstPtrInfo,
                                        MachinePointerInfo SrcPtrInfo) {
   // Turn a memcpy of undef to nop.
+  // FIXME: We need to honor volatile even is Src is undef.
   if (Src.isUndef())
     return Chain;
 
@@ -5691,13 +5802,12 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
   bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
   unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
 
-  if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
-                                (DstAlignCanChange ? 0 : Align),
-                                (isZeroConstant ? 0 : SrcAlign),
-                                false, false, CopyFromConstant, true,
-                                DstPtrInfo.getAddrSpace(),
-                                SrcPtrInfo.getAddrSpace(),
-                                DAG, TLI))
+  if (!TLI.findOptimalMemOpLowering(
+          MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align),
+          (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false,
+          /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant,
+          /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(),
+          SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes()))
     return SDValue();
 
   if (DstAlignCanChange) {
@@ -5851,6 +5961,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
                                         MachinePointerInfo DstPtrInfo,
                                         MachinePointerInfo SrcPtrInfo) {
   // Turn a memmove of undef to nop.
+  // FIXME: We need to honor volatile even is Src is undef.
   if (Src.isUndef())
     return Chain;
 
@@ -5871,13 +5982,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
   if (Align > SrcAlign)
     SrcAlign = Align;
   unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
-
-  if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
-                                (DstAlignCanChange ? 0 : Align), SrcAlign,
-                                false, false, false, false,
-                                DstPtrInfo.getAddrSpace(),
-                                SrcPtrInfo.getAddrSpace(),
-                                DAG, TLI))
+  // FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in
+  // findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the
+  // correct code.
+  bool AllowOverlap = false;
+  if (!TLI.findOptimalMemOpLowering(
+          MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign,
+          /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
+          AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+          MF.getFunction().getAttributes()))
     return SDValue();
 
   if (DstAlignCanChange) {
@@ -5956,6 +6069,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
                                uint64_t Size, unsigned Align, bool isVol,
                                MachinePointerInfo DstPtrInfo) {
   // Turn a memset of undef to nop.
+  // FIXME: We need to honor volatile even is Src is undef.
   if (Src.isUndef())
     return Chain;
 
@@ -5972,11 +6086,12 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
     DstAlignCanChange = true;
   bool IsZeroVal =
     isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
-  if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
-                                Size, (DstAlignCanChange ? 0 : Align), 0,
-                                true, IsZeroVal, false, true,
-                                DstPtrInfo.getAddrSpace(), ~0u,
-                                DAG, TLI))
+  if (!TLI.findOptimalMemOpLowering(
+          MemOps, TLI.getMaxStoresPerMemset(OptSize), Size,
+          (DstAlignCanChange ? 0 : Align), 0, /*IsMemset=*/true,
+          /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
+          /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u,
+          MF.getFunction().getAttributes()))
     return SDValue();
 
   if (DstAlignCanChange) {
@@ -6097,9 +6212,11 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
   // Emit a library call.
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+  Entry.Ty = Type::getInt8PtrTy(*getContext());
   Entry.Node = Dst; Args.push_back(Entry);
   Entry.Node = Src; Args.push_back(Entry);
+
+  Entry.Ty = getDataLayout().getIntPtrType(*getContext());
   Entry.Node = Size; Args.push_back(Entry);
   // FIXME: pass in SDLoc
   TargetLowering::CallLoweringInfo CLI(*this);
@@ -6199,9 +6316,11 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
   // Emit a library call.
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+  Entry.Ty = Type::getInt8PtrTy(*getContext());
   Entry.Node = Dst; Args.push_back(Entry);
   Entry.Node = Src; Args.push_back(Entry);
+
+  Entry.Ty = getDataLayout().getIntPtrType(*getContext());
   Entry.Node = Size; Args.push_back(Entry);
   // FIXME:  pass in SDLoc
   TargetLowering::CallLoweringInfo CLI(*this);
@@ -6294,16 +6413,15 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
   checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
 
   // Emit a library call.
-  Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext());
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  Entry.Node = Dst; Entry.Ty = IntPtrTy;
+  Entry.Node = Dst; Entry.Ty = Type::getInt8PtrTy(*getContext());
   Args.push_back(Entry);
   Entry.Node = Src;
   Entry.Ty = Src.getValueType().getTypeForEVT(*getContext());
   Args.push_back(Entry);
   Entry.Node = Size;
-  Entry.Ty = IntPtrTy;
+  Entry.Ty = getDataLayout().getIntPtrType(*getContext());
   Args.push_back(Entry);
 
   // FIXME: pass in SDLoc
@@ -6384,32 +6502,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getAtomicCmpSwap(
-    unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain,
-    SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo,
-    unsigned Alignment, AtomicOrdering SuccessOrdering,
-    AtomicOrdering FailureOrdering, SyncScope::ID SSID) {
-  assert(Opcode == ISD::ATOMIC_CMP_SWAP ||
-         Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
-  assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
-
-  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
-    Alignment = getEVTAlignment(MemVT);
-
-  MachineFunction &MF = getMachineFunction();
-
-  // FIXME: Volatile isn't really correct; we should keep track of atomic
-  // orderings in the memoperand.
-  auto Flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad |
-               MachineMemOperand::MOStore;
-  MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
-                            AAMDNodes(), nullptr, SSID, SuccessOrdering,
-                            FailureOrdering);
-
-  return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO);
-}
-
 SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl,
                                        EVT MemVT, SDVTList VTs, SDValue Chain,
                                        SDValue Ptr, SDValue Cmp, SDValue Swp,
@@ -6422,35 +6514,6 @@ SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl,
   return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
 }
 
-SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
-                                SDValue Chain, SDValue Ptr, SDValue Val,
-                                const Value *PtrVal, unsigned Alignment,
-                                AtomicOrdering Ordering,
-                                SyncScope::ID SSID) {
-  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
-    Alignment = getEVTAlignment(MemVT);
-
-  MachineFunction &MF = getMachineFunction();
-  // An atomic store does not load. An atomic load does not store.
-  // (An atomicrmw obviously both loads and stores.)
-  // For now, atomics are considered to be volatile always, and they are
-  // chained as such.
-  // FIXME: Volatile isn't really correct; we should keep track of atomic
-  // orderings in the memoperand.
-  auto Flags = MachineMemOperand::MOVolatile;
-  if (Opcode != ISD::ATOMIC_STORE)
-    Flags |= MachineMemOperand::MOLoad;
-  if (Opcode != ISD::ATOMIC_LOAD)
-    Flags |= MachineMemOperand::MOStore;
-
-  MachineMemOperand *MMO =
-    MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
-                            MemVT.getStoreSize(), Alignment, AAMDNodes(),
-                            nullptr, SSID, Ordering);
-
-  return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
-}
-
 SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
                                 SDValue Chain, SDValue Ptr, SDValue Val,
                                 MachineMemOperand *MMO) {
@@ -6465,6 +6528,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
           Opcode == ISD::ATOMIC_LOAD_MAX ||
           Opcode == ISD::ATOMIC_LOAD_UMIN ||
           Opcode == ISD::ATOMIC_LOAD_UMAX ||
+          Opcode == ISD::ATOMIC_LOAD_FADD ||
+          Opcode == ISD::ATOMIC_LOAD_FSUB ||
           Opcode == ISD::ATOMIC_SWAP ||
           Opcode == ISD::ATOMIC_STORE) &&
          "Invalid Atomic Op");
@@ -6502,7 +6567,7 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {
 SDValue SelectionDAG::getMemIntrinsicNode(
     unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
     EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align,
-    MachineMemOperand::Flags Flags, unsigned Size) {
+    MachineMemOperand::Flags Flags, unsigned Size, const AAMDNodes &AAInfo) {
   if (Align == 0)  // Ensure that codegen never sees alignment 0
     Align = getEVTAlignment(MemVT);
 
@@ -6511,7 +6576,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(
 
   MachineFunction &MF = getMachineFunction();
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PtrInfo, Flags, Size, Align);
+      MF.getMachineMemOperand(PtrInfo, Flags, Size, Align, AAInfo);
 
   return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO);
 }
@@ -6557,6 +6622,36 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
   return SDValue(N, 0);
 }
 
+SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl,
+                                      SDValue Chain, int FrameIndex,
+                                      int64_t Size, int64_t Offset) {
+  const unsigned Opcode = IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END;
+  const auto VTs = getVTList(MVT::Other);
+  SDValue Ops[2] = {
+      Chain,
+      getFrameIndex(FrameIndex,
+                    getTargetLoweringInfo().getFrameIndexTy(getDataLayout()),
+                    true)};
+
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opcode, VTs, Ops);
+  ID.AddInteger(FrameIndex);
+  ID.AddInteger(Size);
+  ID.AddInteger(Offset);
+  void *IP = nullptr;
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+    return SDValue(E, 0);
+
+  LifetimeSDNode *N = newSDNode<LifetimeSDNode>(
+      Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, Size, Offset);
+  createOperands(N, Ops);
+  CSEMap.InsertNode(N, IP);
+  InsertNode(N);
+  SDValue V(N, 0);
+  NewSDValueDbgMsg(V, "Creating new node: ", this);
+  return V;
+}
+
 /// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
 /// MachinePointerInfo record from it.  This is particularly useful because the
 /// code generator has many cases where it doesn't bother passing in a
@@ -6875,7 +6970,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
   SDValue Ops[] = { Chain, Ptr, Mask, PassThru };
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
-  ID.AddInteger(VT.getRawBits());
+  ID.AddInteger(MemVT.getRawBits());
   ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>(
       dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
@@ -6901,12 +6996,11 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
                                      bool IsTruncating, bool IsCompressing) {
   assert(Chain.getValueType() == MVT::Other &&
         "Invalid chain type");
-  EVT VT = Val.getValueType();
   SDVTList VTs = getVTList(MVT::Other);
   SDValue Ops[] = { Chain, Val, Ptr, Mask };
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
-  ID.AddInteger(VT.getRawBits());
+  ID.AddInteger(MemVT.getRawBits());
   ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>(
       dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
@@ -7057,6 +7151,31 @@ SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) {
   return SDValue();
 }
 
+// TODO: Use fast-math-flags to enable more simplifications.
+SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y) {
+  ConstantFPSDNode *YC = isConstOrConstSplatFP(Y, /* AllowUndefs */ true);
+  if (!YC)
+    return SDValue();
+
+  // X + -0.0 --> X
+  if (Opcode == ISD::FADD)
+    if (YC->getValueAPF().isNegZero())
+      return X;
+
+  // X - +0.0 --> X
+  if (Opcode == ISD::FSUB)
+    if (YC->getValueAPF().isPosZero())
+      return X;
+
+  // X * 1.0 --> X
+  // X / 1.0 --> X
+  if (Opcode == ISD::FMUL || Opcode == ISD::FDIV)
+    if (YC->getValueAPF().isExactlyValue(1.0))
+      return X;
+
+  return SDValue();
+}
+
 SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain,
                                SDValue Ptr, SDValue SV, unsigned Align) {
   SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) };
@@ -7098,8 +7217,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
       return V;
     break;
   case ISD::CONCAT_VECTORS:
-    // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
-    if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+    if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))
       return V;
     break;
   case ISD::SELECT_CC:
@@ -7629,56 +7747,50 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
 SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
   unsigned OrigOpc = Node->getOpcode();
   unsigned NewOpc;
-  bool IsUnary = false;
-  bool IsTernary = false;
   switch (OrigOpc) {
   default:
     llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
-  case ISD::STRICT_FADD: NewOpc = ISD::FADD; break;
-  case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break;
-  case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
-  case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
-  case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
-  case ISD::STRICT_FMA: NewOpc = ISD::FMA; IsTernary = true; break;
-  case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break;
-  case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
-  case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
-  case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; IsUnary = true; break;
-  case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; IsUnary = true; break;
-  case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; IsUnary = true; break;
-  case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; IsUnary = true; break;
-  case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; IsUnary = true; break;
-  case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; IsUnary = true; break;
-  case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; IsUnary = true; break;
-  case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; IsUnary = true; break;
-  case ISD::STRICT_FNEARBYINT:
-    NewOpc = ISD::FNEARBYINT;
-    IsUnary = true;
-    break;
-  case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break;
-  case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break;
-  case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; IsUnary = true; break;
-  case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; IsUnary = true; break;
-  case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; IsUnary = true; break;
-  case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; IsUnary = true; break;
-  }
+  case ISD::STRICT_FADD:       NewOpc = ISD::FADD;       break;
+  case ISD::STRICT_FSUB:       NewOpc = ISD::FSUB;       break;
+  case ISD::STRICT_FMUL:       NewOpc = ISD::FMUL;       break;
+  case ISD::STRICT_FDIV:       NewOpc = ISD::FDIV;       break;
+  case ISD::STRICT_FREM:       NewOpc = ISD::FREM;       break;
+  case ISD::STRICT_FMA:        NewOpc = ISD::FMA;        break;
+  case ISD::STRICT_FSQRT:      NewOpc = ISD::FSQRT;      break;
+  case ISD::STRICT_FPOW:       NewOpc = ISD::FPOW;       break;
+  case ISD::STRICT_FPOWI:      NewOpc = ISD::FPOWI;      break;
+  case ISD::STRICT_FSIN:       NewOpc = ISD::FSIN;       break;
+  case ISD::STRICT_FCOS:       NewOpc = ISD::FCOS;       break;
+  case ISD::STRICT_FEXP:       NewOpc = ISD::FEXP;       break;
+  case ISD::STRICT_FEXP2:      NewOpc = ISD::FEXP2;      break;
+  case ISD::STRICT_FLOG:       NewOpc = ISD::FLOG;       break;
+  case ISD::STRICT_FLOG10:     NewOpc = ISD::FLOG10;     break;
+  case ISD::STRICT_FLOG2:      NewOpc = ISD::FLOG2;      break;
+  case ISD::STRICT_FRINT:      NewOpc = ISD::FRINT;      break;
+  case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break;
+  case ISD::STRICT_FMAXNUM:    NewOpc = ISD::FMAXNUM;    break;
+  case ISD::STRICT_FMINNUM:    NewOpc = ISD::FMINNUM;    break;
+  case ISD::STRICT_FCEIL:      NewOpc = ISD::FCEIL;      break;
+  case ISD::STRICT_FFLOOR:     NewOpc = ISD::FFLOOR;     break;
+  case ISD::STRICT_FROUND:     NewOpc = ISD::FROUND;     break;
+  case ISD::STRICT_FTRUNC:     NewOpc = ISD::FTRUNC;     break;
+  case ISD::STRICT_FP_ROUND:   NewOpc = ISD::FP_ROUND;   break;
+  case ISD::STRICT_FP_EXTEND:  NewOpc = ISD::FP_EXTEND;  break;
+  }
+
+  assert(Node->getNumValues() == 2 && "Unexpected number of results!");
 
   // We're taking this node out of the chain, so we need to re-link things.
   SDValue InputChain = Node->getOperand(0);
   SDValue OutputChain = SDValue(Node, 1);
   ReplaceAllUsesOfValueWith(OutputChain, InputChain);
 
-  SDVTList VTs = getVTList(Node->getOperand(1).getValueType());
-  SDNode *Res = nullptr;
-  if (IsUnary)
-    Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
-  else if (IsTernary)
-    Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
-                                           Node->getOperand(2),
-                                           Node->getOperand(3)});
-  else
-    Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
-                                           Node->getOperand(2) });
+  SmallVector<SDValue, 3> Ops;
+  for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
+    Ops.push_back(Node->getOperand(i));
+
+  SDVTList VTs = getVTList(Node->getValueType(0));
+  SDNode *Res = MorphNodeTo(Node, NewOpc, VTs, Ops);
 
   // MorphNodeTo can operate in two ways: if an existing node with the
   // specified operands exists, it can just return it.  Otherwise, it
@@ -7980,9 +8092,8 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
         // DIExpression, we need to mark the expression with a
         // DW_OP_stack_value.
         auto *DIExpr = DV->getExpression();
-        DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset,
-                                       DIExpression::NoDeref,
-                                       DIExpression::WithStackValue);
+        DIExpr =
+            DIExpression::prepend(DIExpr, DIExpression::StackValue, Offset);
         SDDbgValue *Clone =
             getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(),
                         DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());
@@ -8288,19 +8399,17 @@ void SelectionDAG::updateDivergence(SDNode * N)
   }
 }
 
-
-void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) {
+void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) {
   DenseMap<SDNode *, unsigned> Degree;
   Order.reserve(AllNodes.size());
-  for (auto & N : allnodes()) {
+  for (auto &N : allnodes()) {
     unsigned NOps = N.getNumOperands();
     Degree[&N] = NOps;
     if (0 == NOps)
       Order.push_back(&N);
   }
-  for (std::vector<SDNode *>::iterator I = Order.begin();
-  I!=Order.end();++I) {
-    SDNode * N = *I;
+  for (size_t I = 0; I != Order.size(); ++I) {
+    SDNode *N = Order[I];
     for (auto U : N->uses()) {
       unsigned &UnsortedOps = Degree[U];
       if (0 == --UnsortedOps)
@@ -8310,9 +8419,8 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) {
 }
 
 #ifndef NDEBUG
-void SelectionDAG::VerifyDAGDiverence()
-{
-  std::vector<SDNode*> TopoOrder;
+void SelectionDAG::VerifyDAGDiverence() {
+  std::vector<SDNode *> TopoOrder;
   CreateTopologicalOrder(TopoOrder);
   const TargetLowering &TLI = getTargetLoweringInfo();
   DenseMap<const SDNode *, bool> DivergenceMap;
@@ -8338,7 +8446,6 @@ void SelectionDAG::VerifyDAGDiverence()
 }
 #endif
 
-
 /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
 /// uses of other values produced by From.getNode() alone.  The same value
 /// may appear in both the From and To list.  The Deleted vector is
@@ -8584,14 +8691,24 @@ SDValue llvm::peekThroughOneUseBitcasts(SDValue V) {
   return V;
 }
 
-bool llvm::isBitwiseNot(SDValue V) {
+SDValue llvm::peekThroughExtractSubvectors(SDValue V) {
+  while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+    V = V.getOperand(0);
+  return V;
+}
+
+bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) {
   if (V.getOpcode() != ISD::XOR)
     return false;
-  ConstantSDNode *C = isConstOrConstSplat(peekThroughBitcasts(V.getOperand(1)));
-  return C && C->isAllOnesValue();
+  V = peekThroughBitcasts(V.getOperand(1));
+  unsigned NumBits = V.getScalarValueSizeInBits();
+  ConstantSDNode *C =
+      isConstOrConstSplat(V, AllowUndefs, /*AllowTruncation*/ true);
+  return C && (C->getAPIntValue().countTrailingOnes() >= NumBits);
 }
 
-ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) {
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs,
+                                          bool AllowTruncation) {
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
     return CN;
 
@@ -8599,10 +8716,39 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) {
     BitVector UndefElements;
     ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
 
-    // BuildVectors can truncate their operands. Ignore that case here.
-    if (CN && (UndefElements.none() || AllowUndefs) &&
-        CN->getValueType(0) == N.getValueType().getScalarType())
-      return CN;
+    // BuildVectors can truncate their operands. Ignore that case here unless
+    // AllowTruncation is set.
+    if (CN && (UndefElements.none() || AllowUndefs)) {
+      EVT CVT = CN->getValueType(0);
+      EVT NSVT = N.getValueType().getScalarType();
+      assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension");
+      if (AllowTruncation || (CVT == NSVT))
+        return CN;
+    }
+  }
+
+  return nullptr;
+}
+
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
+                                          bool AllowUndefs,
+                                          bool AllowTruncation) {
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
+    return CN;
+
+  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+    BitVector UndefElements;
+    ConstantSDNode *CN = BV->getConstantSplatNode(DemandedElts, &UndefElements);
+
+    // BuildVectors can truncate their operands. Ignore that case here unless
+    // AllowTruncation is set.
+    if (CN && (UndefElements.none() || AllowUndefs)) {
+      EVT CVT = CN->getValueType(0);
+      EVT NSVT = N.getValueType().getScalarType();
+      assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension");
+      if (AllowTruncation || (CVT == NSVT))
+        return CN;
+    }
   }
 
   return nullptr;
@@ -8622,9 +8768,26 @@ ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) {
   return nullptr;
 }
 
-bool llvm::isNullOrNullSplat(SDValue N) {
+ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N,
+                                              const APInt &DemandedElts,
+                                              bool AllowUndefs) {
+  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
+    return CN;
+
+  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+    BitVector UndefElements;
+    ConstantFPSDNode *CN =
+        BV->getConstantFPSplatNode(DemandedElts, &UndefElements);
+    if (CN && (UndefElements.none() || AllowUndefs))
+      return CN;
+  }
+
+  return nullptr;
+}
+
+bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) {
   // TODO: may want to use peekThroughBitcast() here.
-  ConstantSDNode *C = isConstOrConstSplat(N);
+  ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs);
   return C && C->isNullValue();
 }
 
@@ -8773,17 +8936,12 @@ bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
 
 /// isOperand - Return true if this node is an operand of N.
 bool SDValue::isOperandOf(const SDNode *N) const {
-  for (const SDValue &Op : N->op_values())
-    if (*this == Op)
-      return true;
-  return false;
+  return any_of(N->op_values(), [this](SDValue Op) { return *this == Op; });
 }
 
 bool SDNode::isOperandOf(const SDNode *N) const {
-  for (const SDValue &Op : N->op_values())
-    if (this == Op.getNode())
-      return true;
-  return false;
+  return any_of(N->op_values(),
+                [this](SDValue Op) { return this == Op.getNode(); });
 }
 
 /// reachesChainWithoutSideEffects - Return true if this operand (which must
@@ -8973,6 +9131,56 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
   return getBuildVector(VecVT, dl, Scalars);
 }
 
+std::pair<SDValue, SDValue> SelectionDAG::UnrollVectorOverflowOp(
+    SDNode *N, unsigned ResNE) {
+  unsigned Opcode = N->getOpcode();
+  assert((Opcode == ISD::UADDO || Opcode == ISD::SADDO ||
+          Opcode == ISD::USUBO || Opcode == ISD::SSUBO ||
+          Opcode == ISD::UMULO || Opcode == ISD::SMULO) &&
+         "Expected an overflow opcode");
+
+  EVT ResVT = N->getValueType(0);
+  EVT OvVT = N->getValueType(1);
+  EVT ResEltVT = ResVT.getVectorElementType();
+  EVT OvEltVT = OvVT.getVectorElementType();
+  SDLoc dl(N);
+
+  // If ResNE is 0, fully unroll the vector op.
+  unsigned NE = ResVT.getVectorNumElements();
+  if (ResNE == 0)
+    ResNE = NE;
+  else if (NE > ResNE)
+    NE = ResNE;
+
+  SmallVector<SDValue, 8> LHSScalars;
+  SmallVector<SDValue, 8> RHSScalars;
+  ExtractVectorElements(N->getOperand(0), LHSScalars, 0, NE);
+  ExtractVectorElements(N->getOperand(1), RHSScalars, 0, NE);
+
+  EVT SVT = TLI->getSetCCResultType(getDataLayout(), *getContext(), ResEltVT);
+  SDVTList VTs = getVTList(ResEltVT, SVT);
+  SmallVector<SDValue, 8> ResScalars;
+  SmallVector<SDValue, 8> OvScalars;
+  for (unsigned i = 0; i < NE; ++i) {
+    SDValue Res = getNode(Opcode, dl, VTs, LHSScalars[i], RHSScalars[i]);
+    SDValue Ov =
+        getSelect(dl, OvEltVT, Res.getValue(1),
+                  getBoolConstant(true, dl, OvEltVT, ResVT),
+                  getConstant(0, dl, OvEltVT));
+
+    ResScalars.push_back(Res);
+    OvScalars.push_back(Ov);
+  }
+
+  ResScalars.append(ResNE - NE, getUNDEF(ResEltVT));
+  OvScalars.append(ResNE - NE, getUNDEF(OvEltVT));
+
+  EVT NewResVT = EVT::getVectorVT(*getContext(), ResEltVT, ResNE);
+  EVT NewOvVT = EVT::getVectorVT(*getContext(), OvEltVT, ResNE);
+  return std::make_pair(getBuildVector(NewResVT, dl, ResScalars),
+                        getBuildVector(NewOvVT, dl, OvScalars));
+}
+
 bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
                                                   LoadSDNode *Base,
                                                   unsigned Bytes,
@@ -9014,7 +9222,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
 
   // If this is a direct reference to a stack slot, use information about the
   // stack slot's alignment.
-  int FrameIdx = 1 << 31;
+  int FrameIdx = INT_MIN;
   int64_t FrameOffset = 0;
   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
     FrameIdx = FI->getIndex();
@@ -9025,7 +9233,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
     FrameOffset = Ptr.getConstantOperandVal(1);
   }
 
-  if (FrameIdx != (1 << 31)) {
+  if (FrameIdx != INT_MIN) {
     const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
     unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
                                     FrameOffset);
@@ -9065,6 +9273,15 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,
   return std::make_pair(Lo, Hi);
 }
 
+/// Widen the vector up to the next power of two using INSERT_SUBVECTOR.
+SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) {
+  EVT VT = N.getValueType();
+  EVT WideVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(),
+                                NextPowerOf2(VT.getVectorNumElements()));
+  return getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, getUNDEF(WideVT), N,
+                 getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout())));
+}
+
 void SelectionDAG::ExtractVectorElements(SDValue Op,
                                          SmallVectorImpl<SDValue> &Args,
                                          unsigned Start, unsigned Count) {
@@ -9158,13 +9375,20 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
   return true;
 }
 
-SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
+SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts,
+                                         BitVector *UndefElements) const {
   if (UndefElements) {
     UndefElements->clear();
     UndefElements->resize(getNumOperands());
   }
+  assert(getNumOperands() == DemandedElts.getBitWidth() &&
+         "Unexpected vector size");
+  if (!DemandedElts)
+    return SDValue();
   SDValue Splatted;
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    if (!DemandedElts[i])
+      continue;
     SDValue Op = getOperand(i);
     if (Op.isUndef()) {
       if (UndefElements)
@@ -9177,19 +9401,39 @@ SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
   }
 
   if (!Splatted) {
-    assert(getOperand(0).isUndef() &&
+    unsigned FirstDemandedIdx = DemandedElts.countTrailingZeros();
+    assert(getOperand(FirstDemandedIdx).isUndef() &&
            "Can only have a splat without a constant for all undefs.");
-    return getOperand(0);
+    return getOperand(FirstDemandedIdx);
   }
 
   return Splatted;
 }
 
+SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
+  APInt DemandedElts = APInt::getAllOnesValue(getNumOperands());
+  return getSplatValue(DemandedElts, UndefElements);
+}
+
+ConstantSDNode *
+BuildVectorSDNode::getConstantSplatNode(const APInt &DemandedElts,
+                                        BitVector *UndefElements) const {
+  return dyn_cast_or_null<ConstantSDNode>(
+      getSplatValue(DemandedElts, UndefElements));
+}
+
 ConstantSDNode *
 BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const {
   return dyn_cast_or_null<ConstantSDNode>(getSplatValue(UndefElements));
 }
 
+ConstantFPSDNode *
+BuildVectorSDNode::getConstantFPSplatNode(const APInt &DemandedElts,
+                                          BitVector *UndefElements) const {
+  return dyn_cast_or_null<ConstantFPSDNode>(
+      getSplatValue(DemandedElts, UndefElements));
+}
+
 ConstantFPSDNode *
 BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const {
   return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements));
@@ -9228,7 +9472,10 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
   for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)
     /* search */;
 
-  assert(i != e && "VECTOR_SHUFFLE node with all undef indices!");
+  // If all elements are undefined, this shuffle can be considered a splat
+  // (although it should eventually get simplified away completely).
+  if (i == e)
+    return true;
 
   // Make sure all remaining elements are either undef or the same as the first
   // non-undef value.
@@ -9266,8 +9513,7 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {
 
 void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
   assert(!Node->OperandList && "Node already has operands");
-  assert(std::numeric_limits<decltype(SDNode::NumOperands)>::max() >=
-             Vals.size() &&
+  assert(SDNode::getMaxNumOperands() >= Vals.size() &&
          "too many operands to fit into SDNode");
   SDUse *Ops = OperandRecycler.allocate(
       ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
@@ -9287,6 +9533,19 @@ void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
   checkForCycles(Node);
 }
 
+SDValue SelectionDAG::getTokenFactor(const SDLoc &DL,
+                                     SmallVectorImpl<SDValue> &Vals) {
+  size_t Limit = SDNode::getMaxNumOperands();
+  while (Vals.size() > Limit) {
+    unsigned SliceIdx = Vals.size() - Limit;
+    auto ExtractedTFs = ArrayRef<SDValue>(Vals).slice(SliceIdx, Limit);
+    SDValue NewTF = getNode(ISD::TokenFactor, DL, MVT::Other, ExtractedTFs);
+    Vals.erase(Vals.begin() + SliceIdx, Vals.end());
+    Vals.emplace_back(NewTF);
+  }
+  return getNode(ISD::TokenFactor, DL, MVT::Other, Vals);
+}
+
 #ifndef NDEBUG
 static void checkForCyclesHelper(const SDNode *N,
                                  SmallPtrSetImpl<const SDNode*> &Visited,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 488bac1a9a80..9592bc30a4e1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -1,9 +1,8 @@
 //==- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp - DAG Address Analysis --==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -25,8 +24,10 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other,
   // Conservatively fail if we a match failed..
   if (!Base.getNode() || !Other.Base.getNode())
     return false;
+  if (!hasValidOffset() || !Other.hasValidOffset())
+    return false;
   // Initial Offset difference.
-  Off = Other.Offset - Offset;
+  Off = *Other.Offset - *Offset;
 
   if ((Other.Index == Index) && (Other.IsIndexSignExt == IsIndexSignExt)) {
     // Trivial match.
@@ -60,24 +61,110 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other,
 
     const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
 
-    // Match non-equal FrameIndexes - If both frame indices are fixed
-    // we know their relative offsets and can compare them. Otherwise
-    // we must be conservative.
+    // Match FrameIndexes.
     if (auto *A = dyn_cast<FrameIndexSDNode>(Base))
-      if (auto *B = dyn_cast<FrameIndexSDNode>(Other.Base))
+      if (auto *B = dyn_cast<FrameIndexSDNode>(Other.Base)) {
+        // Equal FrameIndexes - offsets are directly comparable.
+        if (A->getIndex() == B->getIndex())
+          return true;
+        // Non-equal FrameIndexes - If both frame indices are fixed
+        // we know their relative offsets and can compare them. Otherwise
+        // we must be conservative.
         if (MFI.isFixedObjectIndex(A->getIndex()) &&
             MFI.isFixedObjectIndex(B->getIndex())) {
           Off += MFI.getObjectOffset(B->getIndex()) -
                  MFI.getObjectOffset(A->getIndex());
           return true;
         }
+      }
   }
   return false;
 }
 
+bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
+                                      const Optional<int64_t> NumBytes0,
+                                      const SDNode *Op1,
+                                      const Optional<int64_t> NumBytes1,
+                                      const SelectionDAG &DAG, bool &IsAlias) {
+
+  BaseIndexOffset BasePtr0 = match(Op0, DAG);
+  BaseIndexOffset BasePtr1 = match(Op1, DAG);
+
+  if (!(BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()))
+    return false;
+  int64_t PtrDiff;
+  if (NumBytes0.hasValue() && NumBytes1.hasValue() &&
+      BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) {
+    // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
+    // following situations arise:
+    IsAlias = !(
+        // [----BasePtr0----]
+        //                         [---BasePtr1--]
+        // ========PtrDiff========>
+        (*NumBytes0 <= PtrDiff) ||
+        //                     [----BasePtr0----]
+        // [---BasePtr1--]
+        // =====(-PtrDiff)====>
+        (PtrDiff + *NumBytes1 <= 0)); // i.e. *NumBytes1 < -PtrDiff.
+    return true;
+  }
+  // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
+  // able to calculate their relative offset if at least one arises
+  // from an alloca. However, these allocas cannot overlap and we
+  // can infer there is no alias.
+  if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
+    if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
+      MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+      // If the base are the same frame index but the we couldn't find a
+      // constant offset, (indices are different) be conservative.
+      if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
+                     !MFI.isFixedObjectIndex(B->getIndex()))) {
+        IsAlias = false;
+        return true;
+      }
+    }
+
+  bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
+  bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
+  bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
+  bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
+  bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
+  bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
+
+  // If of mismatched base types or checkable indices we can check
+  // they do not alias.
+  if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
+       (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
+      (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) {
+    IsAlias = false;
+    return true;
+  }
+  return false; // Cannot determine whether the pointers alias.
+}
+
+bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize,
+                               const BaseIndexOffset &Other,
+                               int64_t OtherBitSize, int64_t &BitOffset) const {
+  int64_t Offset;
+  if (!equalBaseIndex(Other, DAG, Offset))
+    return false;
+  if (Offset >= 0) {
+    // Other is after *this:
+    // [-------*this---------]
+    //            [---Other--]
+    // ==Offset==>
+    BitOffset = 8 * Offset;
+    return BitOffset + OtherBitSize <= BitSize;
+  }
+  // Other starts strictly before *this, it cannot be fully contained.
+  //    [-------*this---------]
+  // [--Other--]
+  return false;
+}
+
 /// Parses tree in Ptr for base, index, offset addresses.
-BaseIndexOffset BaseIndexOffset::match(const LSBaseSDNode *N,
-                                       const SelectionDAG &DAG) {
+static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
+                                   const SelectionDAG &DAG) {
   SDValue Ptr = N->getBasePtr();
 
   // (((B + I*M) + c)) + c ...
@@ -178,3 +265,33 @@ BaseIndexOffset BaseIndexOffset::match(const LSBaseSDNode *N,
   }
   return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt);
 }
+
+BaseIndexOffset BaseIndexOffset::match(const SDNode *N,
+                                       const SelectionDAG &DAG) {
+  if (const auto *LS0 = dyn_cast<LSBaseSDNode>(N))
+    return matchLSNode(LS0, DAG);
+  if (const auto *LN = dyn_cast<LifetimeSDNode>(N)) {
+    if (LN->hasOffset())
+      return BaseIndexOffset(LN->getOperand(1), SDValue(), LN->getOffset(),
+                             false);
+    return BaseIndexOffset(LN->getOperand(1), SDValue(), false);
+  }
+  return BaseIndexOffset();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+
+LLVM_DUMP_METHOD void BaseIndexOffset::dump() const {
+  print(dbgs());
+}
+
+void BaseIndexOffset::print(raw_ostream& OS) const {
+  OS << "BaseIndexOffset base=[";
+  Base->print(OS);
+  OS << "] index=[";
+  if (Index)
+    Index->print(OS);
+  OS << "] offset=" << Offset;
+}
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 871ab9b29881..e818dd27c05e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1,9 +1,8 @@
 //===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -55,6 +54,7 @@
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
 #include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
@@ -109,6 +109,7 @@
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
@@ -123,6 +124,7 @@
 
 using namespace llvm;
 using namespace PatternMatch;
+using namespace SwitchCG;
 
 #define DEBUG_TYPE "isel"
 
@@ -215,8 +217,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
       unsigned ValueBits = ValueVT.getSizeInBits();
 
       // Assemble the power of 2 part.
-      unsigned RoundParts = NumParts & (NumParts - 1) ?
-        1 << Log2_32(NumParts) : NumParts;
+      unsigned RoundParts =
+          (NumParts & (NumParts - 1)) ? 1 << Log2_32(NumParts) : NumParts;
       unsigned RoundBits = PartBits * RoundParts;
       EVT RoundVT = RoundBits == ValueBits ?
         ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
@@ -322,7 +324,15 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
     return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
   }
 
-  llvm_unreachable("Unknown mismatch!");
+  // Handle MMX to a narrower integer type by bitcasting MMX to integer and
+  // then truncating.
+  if (PartEVT == MVT::x86mmx && ValueVT.isInteger() &&
+      ValueVT.bitsLT(PartEVT)) {
+    Val = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Val);
+    return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+  }
+
+  report_fatal_error("Unknown mismatch in getCopyFromParts!");
 }
 
 static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
@@ -573,7 +583,8 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
     unsigned RoundBits = RoundParts * PartBits;
     unsigned OddParts = NumParts - RoundParts;
     SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
-                                 DAG.getIntPtrConstant(RoundBits, DL));
+      DAG.getShiftAmountConstant(RoundBits, ValueVT, DL, /*LegalTypes*/false));
+
     getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V,
                    CallConv);
 
@@ -1003,6 +1014,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
   DL = &DAG.getDataLayout();
   Context = DAG.getContext();
   LPadToCallSiteMap.clear();
+  SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout());
 }
 
 void SelectionDAGBuilder::clear() {
@@ -1032,19 +1044,7 @@ SDValue SelectionDAGBuilder::getRoot() {
   }
 
   // Otherwise, we have to make a token factor node.
-  // If we have >= 2^16 loads then split across multiple token factors as
-  // there's a 64k limit on the number of SDNode operands.
-  SDValue Root;
-  size_t Limit = (1 << 16) - 1;
-  while (PendingLoads.size() > Limit) {
-    unsigned SliceIdx = PendingLoads.size() - Limit;
-    auto ExtractedTFs = ArrayRef<SDValue>(PendingLoads).slice(SliceIdx, Limit);
-    SDValue NewTF =
-        DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, ExtractedTFs);
-    PendingLoads.erase(PendingLoads.begin() + SliceIdx, PendingLoads.end());
-    PendingLoads.emplace_back(NewTF);
-  }
-  Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, PendingLoads);
+  SDValue Root = DAG.getTokenFactor(getCurSDLoc(), PendingLoads);
   PendingLoads.clear();
   DAG.setRoot(Root);
   return Root;
@@ -1144,6 +1144,13 @@ void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
 
   for (auto &DDIMI : DanglingDebugInfoMap) {
     DanglingDebugInfoVector &DDIV = DDIMI.second;
+
+    // If debug info is to be dropped, run it through final checks to see
+    // whether it can be salvaged.
+    for (auto &DDI : DDIV)
+      if (isMatchingDbgValue(DDI))
+        salvageUnresolvedDbgValue(DDI);
+
     DDIV.erase(remove_if(DDIV, isMatchingDbgValue), DDIV.end());
   }
 }
@@ -1169,6 +1176,12 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
            "Expected inlined-at fields to agree");
     SDDbgValue *SDV;
     if (Val.getNode()) {
+      // FIXME: I doubt that it is correct to resolve a dangling DbgValue as a
+      // FuncArgumentDbgValue (it would be hoisted to the function entry, and if
+      // we couldn't resolve it directly when examining the DbgValue intrinsic
+      // in the first place we should not be more successful here). Unless we
+      // have some test case that prove this to be correct we should avoid
+      // calling EmitFuncArgumentDbgValue here.
       if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
         LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
                           << DbgSDNodeOrder << "] for:\n  " << *DI << "\n");
@@ -1186,12 +1199,173 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
       } else
         LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI
                           << "in EmitFuncArgumentDbgValue\n");
-    } else
+    } else {
       LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+      auto Undef =
+          UndefValue::get(DDI.getDI()->getVariableLocation()->getType());
+      auto SDV =
+          DAG.getConstantDbgValue(Variable, Expr, Undef, dl, DbgSDNodeOrder);
+      DAG.AddDbgValue(SDV, nullptr, false);
+    }
   }
   DDIV.clear();
 }
 
+void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
+  Value *V = DDI.getDI()->getValue();
+  DILocalVariable *Var = DDI.getDI()->getVariable();
+  DIExpression *Expr = DDI.getDI()->getExpression();
+  DebugLoc DL = DDI.getdl();
+  DebugLoc InstDL = DDI.getDI()->getDebugLoc();
+  unsigned SDOrder = DDI.getSDNodeOrder();
+
+  // Currently we consider only dbg.value intrinsics -- we tell the salvager
+  // that DW_OP_stack_value is desired.
+  assert(isa<DbgValueInst>(DDI.getDI()));
+  bool StackValue = true;
+
+  // Can this Value can be encoded without any further work?
+  if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder))
+    return;
+
+  // Attempt to salvage back through as many instructions as possible. Bail if
+  // a non-instruction is seen, such as a constant expression or global
+  // variable. FIXME: Further work could recover those too.
+  while (isa<Instruction>(V)) {
+    Instruction &VAsInst = *cast<Instruction>(V);
+    DIExpression *NewExpr = salvageDebugInfoImpl(VAsInst, Expr, StackValue);
+
+    // If we cannot salvage any further, and haven't yet found a suitable debug
+    // expression, bail out.
+    if (!NewExpr)
+      break;
+
+    // New value and expr now represent this debuginfo.
+    V = VAsInst.getOperand(0);
+    Expr = NewExpr;
+
+    // Some kind of simplification occurred: check whether the operand of the
+    // salvaged debug expression can be encoded in this DAG.
+    if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder)) {
+      LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n  "
+                        << DDI.getDI() << "\nBy stripping back to:\n  " << V);
+      return;
+    }
+  }
+
+  // This was the final opportunity to salvage this debug information, and it
+  // couldn't be done. Place an undef DBG_VALUE at this location to terminate
+  // any earlier variable location.
+  auto Undef = UndefValue::get(DDI.getDI()->getVariableLocation()->getType());
+  auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder);
+  DAG.AddDbgValue(SDV, nullptr, false);
+
+  LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n  " << DDI.getDI()
+                    << "\n");
+  LLVM_DEBUG(dbgs() << "  Last seen at:\n    " << *DDI.getDI()->getOperand(0)
+                    << "\n");
+}
+
+bool SelectionDAGBuilder::handleDebugValue(const Value *V, DILocalVariable *Var,
+                                           DIExpression *Expr, DebugLoc dl,
+                                           DebugLoc InstDL, unsigned Order) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  SDDbgValue *SDV;
+  if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) ||
+      isa<ConstantPointerNull>(V)) {
+    SDV = DAG.getConstantDbgValue(Var, Expr, V, dl, SDNodeOrder);
+    DAG.AddDbgValue(SDV, nullptr, false);
+    return true;
+  }
+
+  // If the Value is a frame index, we can create a FrameIndex debug value
+  // without relying on the DAG at all.
+  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+    auto SI = FuncInfo.StaticAllocaMap.find(AI);
+    if (SI != FuncInfo.StaticAllocaMap.end()) {
+      auto SDV =
+          DAG.getFrameIndexDbgValue(Var, Expr, SI->second,
+                                    /*IsIndirect*/ false, dl, SDNodeOrder);
+      // Do not attach the SDNodeDbgValue to an SDNode: this variable location
+      // is still available even if the SDNode gets optimized out.
+      DAG.AddDbgValue(SDV, nullptr, false);
+      return true;
+    }
+  }
+
+  // Do not use getValue() in here; we don't want to generate code at
+  // this point if it hasn't been done yet.
+  SDValue N = NodeMap[V];
+  if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
+    N = UnusedArgNodeMap[V];
+  if (N.getNode()) {
+    if (EmitFuncArgumentDbgValue(V, Var, Expr, dl, false, N))
+      return true;
+    SDV = getDbgValue(N, Var, Expr, dl, SDNodeOrder);
+    DAG.AddDbgValue(SDV, N.getNode(), false);
+    return true;
+  }
+
+  // Special rules apply for the first dbg.values of parameter variables in a
+  // function. Identify them by the fact they reference Argument Values, that
+  // they're parameters, and they are parameters of the current function. We
+  // need to let them dangle until they get an SDNode.
+  bool IsParamOfFunc = isa<Argument>(V) && Var->isParameter() &&
+                       !InstDL.getInlinedAt();
+  if (!IsParamOfFunc) {
+    // The value is not used in this block yet (or it would have an SDNode).
+    // We still want the value to appear for the user if possible -- if it has
+    // an associated VReg, we can refer to that instead.
+    auto VMI = FuncInfo.ValueMap.find(V);
+    if (VMI != FuncInfo.ValueMap.end()) {
+      unsigned Reg = VMI->second;
+      // If this is a PHI node, it may be split up into several MI PHI nodes
+      // (in FunctionLoweringInfo::set).
+      RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
+                       V->getType(), None);
+      if (RFV.occupiesMultipleRegs()) {
+        unsigned Offset = 0;
+        unsigned BitsToDescribe = 0;
+        if (auto VarSize = Var->getSizeInBits())
+          BitsToDescribe = *VarSize;
+        if (auto Fragment = Expr->getFragmentInfo())
+          BitsToDescribe = Fragment->SizeInBits;
+        for (auto RegAndSize : RFV.getRegsAndSizes()) {
+          unsigned RegisterSize = RegAndSize.second;
+          // Bail out if all bits are described already.
+          if (Offset >= BitsToDescribe)
+            break;
+          unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe)
+              ? BitsToDescribe - Offset
+              : RegisterSize;
+          auto FragmentExpr = DIExpression::createFragmentExpression(
+              Expr, Offset, FragmentSize);
+          if (!FragmentExpr)
+              continue;
+          SDV = DAG.getVRegDbgValue(Var, *FragmentExpr, RegAndSize.first,
+                                    false, dl, SDNodeOrder);
+          DAG.AddDbgValue(SDV, nullptr, false);
+          Offset += RegisterSize;
+        }
+      } else {
+        SDV = DAG.getVRegDbgValue(Var, Expr, Reg, false, dl, SDNodeOrder);
+        DAG.AddDbgValue(SDV, nullptr, false);
+      }
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void SelectionDAGBuilder::resolveOrClearDbgInfo() {
+  // Try to fixup any remaining dangling debug info -- and drop it if we can't.
+  for (auto &Pair : DanglingDebugInfoMap)
+    for (auto &DDI : Pair.second)
+      salvageUnresolvedDbgValue(DDI);
+  clearDanglingDebugInfo();
+}
+
 /// getCopyFromRegs - If there was virtual register allocated for the value V
 /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
 SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
@@ -1469,6 +1643,36 @@ void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
   }
 }
 
+// For wasm, there's alwyas a single catch pad attached to a catchswitch, and
+// the control flow always stops at the single catch pad, as it does for a
+// cleanup pad. In case the exception caught is not of the types the catch pad
+// catches, it will be rethrown by a rethrow.
+static void findWasmUnwindDestinations(
+    FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
+    BranchProbability Prob,
+    SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
+        &UnwindDests) {
+  while (EHPadBB) {
+    const Instruction *Pad = EHPadBB->getFirstNonPHI();
+    if (isa<CleanupPadInst>(Pad)) {
+      // Stop on cleanup pads.
+      UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+      UnwindDests.back().first->setIsEHScopeEntry();
+      break;
+    } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+      // Add the catchpad handlers to the possible destinations. We don't
+      // continue to the unwind destination of the catchswitch for wasm.
+      for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+        UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
+        UnwindDests.back().first->setIsEHScopeEntry();
+      }
+      break;
+    } else {
+      continue;
+    }
+  }
+}
+
 /// When an invoke or a cleanupret unwinds to the next EH pad, there are
 /// many places it could ultimately go. In the IR, we have a single unwind
 /// destination, but in the machine CFG, we enumerate all the possible blocks.
@@ -1489,6 +1693,13 @@ static void findUnwindDestinations(
   bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
   bool IsSEH = isAsynchronousEHPersonality(Personality);
 
+  if (IsWasmCXX) {
+    findWasmUnwindDestinations(FuncInfo, EHPadBB, Prob, UnwindDests);
+    assert(UnwindDests.size() <= 1 &&
+           "There should be at most one unwind destination for wasm");
+    return;
+  }
+
   while (EHPadBB) {
     const Instruction *Pad = EHPadBB->getFirstNonPHI();
     BasicBlock *NewEHPadBB = nullptr;
@@ -1501,8 +1712,7 @@ static void findUnwindDestinations(
       // personalities.
       UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
       UnwindDests.back().first->setIsEHScopeEntry();
-      if (!IsWasmCXX)
-        UnwindDests.back().first->setIsEHFuncletEntry();
+      UnwindDests.back().first->setIsEHFuncletEntry();
       break;
     } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
       // Add the catchpad handlers to the possible destinations.
@@ -1588,9 +1798,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
                                         DemoteReg, PtrValueVTs[0]);
     SDValue RetOp = getValue(I.getOperand(0));
 
-    SmallVector<EVT, 4> ValueVTs;
+    SmallVector<EVT, 4> ValueVTs, MemVTs;
     SmallVector<uint64_t, 4> Offsets;
-    ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
+    ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs,
+                    &Offsets);
     unsigned NumValues = ValueVTs.size();
 
     SmallVector<SDValue, 4> Chains(NumValues);
@@ -1598,8 +1809,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
       // An aggregate return value cannot wrap around the address space, so
       // offsets to its parts don't wrap either.
       SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]);
-      Chains[i] = DAG.getStore(
-          Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i),
+
+      SDValue Val = RetOp.getValue(i);
+      if (MemVTs[i] != ValueVTs[i])
+        Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]);
+      Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val,
           // FIXME: better loc info would be nice.
           Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
     }
@@ -1615,6 +1829,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
 
       const Function *F = I.getParent()->getParent();
 
+      bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
+          I.getOperand(0)->getType(), F->getCallingConv(),
+          /*IsVarArg*/ false);
+
       ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
       if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
                                           Attribute::SExt))
@@ -1647,6 +1865,18 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
         if (RetInReg)
           Flags.setInReg();
 
+        if (I.getOperand(0)->getType()->isPointerTy()) {
+          Flags.setPointer();
+          Flags.setPointerAddrSpace(
+              cast<PointerType>(I.getOperand(0)->getType())->getAddressSpace());
+        }
+
+        if (NeedsRegBlock) {
+          Flags.setInConsecutiveRegs();
+          if (j == NumValues - 1)
+            Flags.setInConsecutiveRegsLast();
+        }
+
         // Propagate extension type if any
         if (ExtendKind == ISD::SIGN_EXTEND)
           Flags.setSExt();
@@ -1668,7 +1898,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
   const Function *F = I.getParent()->getParent();
   if (TLI.supportSwiftError() &&
       F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
-    assert(FuncInfo.SwiftErrorArg && "Need a swift error argument");
+    assert(SwiftError.getFunctionArg() && "Need a swift error argument");
     ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
     Flags.setSwiftError();
     Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
@@ -1677,8 +1907,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
                                   0 /*partOffs*/));
     // Create SDNode for the swifterror virtual register.
     OutVals.push_back(
-        DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt(
-                            &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first,
+        DAG.getRegister(SwiftError.getOrCreateVRegUseAt(
+                            &I, FuncInfo.MBB, SwiftError.getFunctionArg()),
                         EVT(TLI.getPointerTy(DL))));
   }
 
@@ -1825,7 +2055,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
 
       CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
                    TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
-      SwitchCases.push_back(CB);
+      SL->SwitchCases.push_back(CB);
       return;
     }
   }
@@ -1834,7 +2064,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
   ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
   CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
                nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
-  SwitchCases.push_back(CB);
+  SL->SwitchCases.push_back(CB);
 }
 
 void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
@@ -2043,27 +2273,27 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
       // If the compares in later blocks need to use values not currently
       // exported from this block, export them now.  This block should always
       // be the first entry.
-      assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
+      assert(SL->SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
 
       // Allow some cases to be rejected.
-      if (ShouldEmitAsBranches(SwitchCases)) {
-        for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
-          ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
-          ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
+      if (ShouldEmitAsBranches(SL->SwitchCases)) {
+        for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) {
+          ExportFromCurrentBlock(SL->SwitchCases[i].CmpLHS);
+          ExportFromCurrentBlock(SL->SwitchCases[i].CmpRHS);
         }
 
         // Emit the branch for this block.
-        visitSwitchCase(SwitchCases[0], BrMBB);
-        SwitchCases.erase(SwitchCases.begin());
+        visitSwitchCase(SL->SwitchCases[0], BrMBB);
+        SL->SwitchCases.erase(SL->SwitchCases.begin());
         return;
       }
 
       // Okay, we decided not to do this, remove any inserted MBB's and clear
       // SwitchCases.
-      for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
-        FuncInfo.MF->erase(SwitchCases[i].ThisBB);
+      for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i)
+        FuncInfo.MF->erase(SL->SwitchCases[i].ThisBB);
 
-      SwitchCases.clear();
+      SL->SwitchCases.clear();
     }
   }
 
@@ -2084,6 +2314,20 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
   SDValue CondLHS = getValue(CB.CmpLHS);
   SDLoc dl = CB.DL;
 
+  if (CB.CC == ISD::SETTRUE) {
+    // Branch or fall through to TrueBB.
+    addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
+    SwitchBB->normalizeSuccProbs();
+    if (CB.TrueBB != NextBlock(SwitchBB)) {
+      DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, getControlRoot(),
+                              DAG.getBasicBlock(CB.TrueBB)));
+    }
+    return;
+  }
+
+  auto &TLI = DAG.getTargetLoweringInfo();
+  EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), CB.CmpLHS->getType());
+
   // Build the setcc now.
   if (!CB.CmpMHS) {
     // Fold "(X == true)" to X and "(X == false)" to !X to
@@ -2095,8 +2339,18 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
              CB.CC == ISD::SETEQ) {
       SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
       Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
-    } else
-      Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
+    } else {
+      SDValue CondRHS = getValue(CB.CmpRHS);
+
+      // If a pointer's DAG type is larger than its memory type then the DAG
+      // values are zero-extended. This breaks signed comparisons so truncate
+      // back to the underlying type before doing the compare.
+      if (CondLHS.getValueType() != MemVT) {
+        CondLHS = DAG.getPtrExtOrTrunc(CondLHS, getCurSDLoc(), MemVT);
+        CondRHS = DAG.getPtrExtOrTrunc(CondRHS, getCurSDLoc(), MemVT);
+      }
+      Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, CondRHS, CB.CC);
+    }
   } else {
     assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
 
@@ -2147,7 +2401,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
 }
 
 /// visitJumpTable - Emit JumpTable node in the current MBB
-void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
+void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) {
   // Emit the code for the jump table
   assert(JT.Reg != -1U && "Should lower JT Header first!");
   EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
@@ -2162,14 +2416,12 @@ void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
 
 /// visitJumpTableHeader - This function emits necessary code to produce index
 /// in the JumpTable from switch case.
-void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
+void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
                                                JumpTableHeader &JTH,
                                                MachineBasicBlock *SwitchBB) {
   SDLoc dl = getCurSDLoc();
 
-  // Subtract the lowest switch case value from the value being switched on and
-  // conditional branch to default mbb if the result is greater than the
-  // difference between smallest and largest cases.
+  // Subtract the lowest switch case value from the value being switched on.
   SDValue SwitchOp = getValue(JTH.SValue);
   EVT VT = SwitchOp.getValueType();
   SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
@@ -2189,24 +2441,33 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
                                     JumpTableReg, SwitchOp);
   JT.Reg = JumpTableReg;
 
-  // Emit the range check for the jump table, and branch to the default block
-  // for the switch statement if the value being switched on exceeds the largest
-  // case in the switch.
-  SDValue CMP = DAG.getSetCC(
-      dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
-                                 Sub.getValueType()),
-      Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
-
-  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
-                               MVT::Other, CopyTo, CMP,
-                               DAG.getBasicBlock(JT.Default));
-
-  // Avoid emitting unnecessary branches to the next block.
-  if (JT.MBB != NextBlock(SwitchBB))
-    BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
-                         DAG.getBasicBlock(JT.MBB));
-
-  DAG.setRoot(BrCond);
+  if (!JTH.OmitRangeCheck) {
+    // Emit the range check for the jump table, and branch to the default block
+    // for the switch statement if the value being switched on exceeds the
+    // largest case in the switch.
+    SDValue CMP = DAG.getSetCC(
+        dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+                                   Sub.getValueType()),
+        Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
+
+    SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+                                 MVT::Other, CopyTo, CMP,
+                                 DAG.getBasicBlock(JT.Default));
+
+    // Avoid emitting unnecessary branches to the next block.
+    if (JT.MBB != NextBlock(SwitchBB))
+      BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+                           DAG.getBasicBlock(JT.MBB));
+
+    DAG.setRoot(BrCond);
+  } else {
+    // Avoid emitting unnecessary branches to the next block.
+    if (JT.MBB != NextBlock(SwitchBB))
+      DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, CopyTo,
+                              DAG.getBasicBlock(JT.MBB)));
+    else
+      DAG.setRoot(CopyTo);
+  }
 }
 
 /// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
@@ -2215,6 +2476,7 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
                                  SDValue &Chain) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+  EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
   MachineFunction &MF = DAG.getMachineFunction();
   Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent());
   MachineSDNode *Node =
@@ -2227,6 +2489,8 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
         MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy));
     DAG.setNodeMemRefs(Node, {MemRef});
   }
+  if (PtrTy != PtrMemTy)
+    return DAG.getPtrExtOrTrunc(SDValue(Node, 0), DL, PtrMemTy);
   return SDValue(Node, 0);
 }
 
@@ -2242,6 +2506,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
   // First create the loads to the guard/stack slot for the comparison.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+  EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
 
   MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
   int FI = MFI.getStackProtectorIndex();
@@ -2254,7 +2519,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
 
   // Generate code to load the content of the guard slot.
   SDValue GuardVal = DAG.getLoad(
-      PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
+      PtrMemTy, dl, DAG.getEntryNode(), StackSlotPtr,
       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align,
       MachineMemOperand::MOVolatile);
 
@@ -2262,27 +2527,26 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
     GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);
 
   // Retrieve guard check function, nullptr if instrumentation is inlined.
-  if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) {
+  if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) {
     // The target provides a guard check function to validate the guard value.
     // Generate a call to that function with the content of the guard slot as
     // argument.
-    auto *Fn = cast<Function>(GuardCheck);
-    FunctionType *FnTy = Fn->getFunctionType();
+    FunctionType *FnTy = GuardCheckFn->getFunctionType();
     assert(FnTy->getNumParams() == 1 && "Invalid function signature");
 
     TargetLowering::ArgListTy Args;
     TargetLowering::ArgListEntry Entry;
     Entry.Node = GuardVal;
     Entry.Ty = FnTy->getParamType(0);
-    if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
+    if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg))
       Entry.IsInReg = true;
     Args.push_back(Entry);
 
     TargetLowering::CallLoweringInfo CLI(DAG);
     CLI.setDebugLoc(getCurSDLoc())
-      .setChain(DAG.getEntryNode())
-      .setCallee(Fn->getCallingConv(), FnTy->getReturnType(),
-                 getValue(GuardCheck), std::move(Args));
+        .setChain(DAG.getEntryNode())
+        .setCallee(GuardCheckFn->getCallingConv(), FnTy->getReturnType(),
+                   getValue(GuardCheckFn), std::move(Args));
 
     std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
     DAG.setRoot(Result.second);
@@ -2298,9 +2562,9 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
     const Value *IRGuard = TLI.getSDagStackGuard(M);
     SDValue GuardPtr = getValue(IRGuard);
 
-    Guard =
-        DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0),
-                    Align, MachineMemOperand::MOVolatile);
+    Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr,
+                        MachinePointerInfo(IRGuard, 0), Align,
+                        MachineMemOperand::MOVolatile);
   }
 
   // Perform the comparison via a subtract/getsetcc.
@@ -2339,6 +2603,12 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
   SDValue Chain =
       TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
                       None, false, getCurSDLoc(), false, false).second;
+  // On PS4, the "return address" must still be within the calling function,
+  // even if it's at the very end, so emit an explicit TRAP here.
+  // Passing 'true' for doesNotReturn above won't generate the trap for us.
+  if (TM.getTargetTriple().isPS4CPU())
+    Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
+
   DAG.setRoot(Chain);
 }
 
@@ -2493,6 +2763,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
     case Intrinsic::experimental_gc_statepoint:
       LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
       break;
+    case Intrinsic::wasm_rethrow_in_catch: {
+      // This is usually done in visitTargetIntrinsic, but this intrinsic is
+      // special because it can be invoked, so we manually lower it to a DAG
+      // node here.
+      SmallVector<SDValue, 8> Ops;
+      Ops.push_back(getRoot()); // inchain
+      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+      Ops.push_back(
+          DAG.getTargetConstant(Intrinsic::wasm_rethrow_in_catch, getCurSDLoc(),
+                                TLI.getPointerTy(DAG.getDataLayout())));
+      SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain
+      DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops));
+      break;
+    }
     }
   } else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) {
     // Currently we do not lower any intrinsic calls with deopt operand bundles.
@@ -2528,6 +2812,35 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
   InvokeMBB->normalizeSuccProbs();
 
   // Drop into normal successor.
+  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(),
+                          DAG.getBasicBlock(Return)));
+}
+
+void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
+  MachineBasicBlock *CallBrMBB = FuncInfo.MBB;
+
+  // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
+  // have to do anything here to lower funclet bundles.
+  assert(!I.hasOperandBundlesOtherThan(
+             {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
+         "Cannot lower callbrs with arbitrary operand bundles yet!");
+
+  assert(isa<InlineAsm>(I.getCalledValue()) &&
+         "Only know how to handle inlineasm callbr");
+  visitInlineAsm(&I);
+
+  // Retrieve successors.
+  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()];
+
+  // Update successor info.
+  addSuccessorWithProb(CallBrMBB, Return);
+  for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
+    MachineBasicBlock *Target = FuncInfo.MBBMap[I.getIndirectDest(i)];
+    addSuccessorWithProb(CallBrMBB, Target);
+  }
+  CallBrMBB->normalizeSuccProbs();
+
+  // Drop into default successor.
   DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
                           MVT::Other, getControlRoot(),
                           DAG.getBasicBlock(Return)));
@@ -2585,49 +2898,17 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
   setValue(&LP, Res);
 }
 
-void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
-#ifndef NDEBUG
-  for (const CaseCluster &CC : Clusters)
-    assert(CC.Low == CC.High && "Input clusters must be single-case");
-#endif
-
-  llvm::sort(Clusters, [](const CaseCluster &a, const CaseCluster &b) {
-    return a.Low->getValue().slt(b.Low->getValue());
-  });
-
-  // Merge adjacent clusters with the same destination.
-  const unsigned N = Clusters.size();
-  unsigned DstIndex = 0;
-  for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
-    CaseCluster &CC = Clusters[SrcIndex];
-    const ConstantInt *CaseVal = CC.Low;
-    MachineBasicBlock *Succ = CC.MBB;
-
-    if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
-        (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
-      // If this case has the same successor and is a neighbour, merge it into
-      // the previous cluster.
-      Clusters[DstIndex - 1].High = CaseVal;
-      Clusters[DstIndex - 1].Prob += CC.Prob;
-    } else {
-      std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
-                   sizeof(Clusters[SrcIndex]));
-    }
-  }
-  Clusters.resize(DstIndex);
-}
-
 void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
                                            MachineBasicBlock *Last) {
   // Update JTCases.
-  for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
-    if (JTCases[i].first.HeaderBB == First)
-      JTCases[i].first.HeaderBB = Last;
+  for (unsigned i = 0, e = SL->JTCases.size(); i != e; ++i)
+    if (SL->JTCases[i].first.HeaderBB == First)
+      SL->JTCases[i].first.HeaderBB = Last;
 
   // Update BitTestCases.
-  for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
-    if (BitTestCases[i].Parent == First)
-      BitTestCases[i].Parent = Last;
+  for (unsigned i = 0, e = SL->BitTestCases.size(); i != e; ++i)
+    if (SL->BitTestCases[i].Parent == First)
+      SL->BitTestCases[i].Parent = Last;
 }
 
 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
@@ -2916,6 +3197,18 @@ void SelectionDAGBuilder::visitICmp(const User &I) {
   SDValue Op2 = getValue(I.getOperand(1));
   ISD::CondCode Opcode = getICmpCondCode(predicate);
 
+  auto &TLI = DAG.getTargetLoweringInfo();
+  EVT MemVT =
+      TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
+
+  // If a pointer's DAG type is larger than its memory type then the DAG values
+  // are zero-extended. This breaks signed comparisons so truncate back to the
+  // underlying type before doing the compare.
+  if (Op1.getValueType() != MemVT) {
+    Op1 = DAG.getPtrExtOrTrunc(Op1, getCurSDLoc(), MemVT);
+    Op2 = DAG.getPtrExtOrTrunc(Op2, getCurSDLoc(), MemVT);
+  }
+
   EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
                                                         I.getType());
   setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
@@ -2963,6 +3256,8 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
   ISD::NodeType OpCode = Cond.getValueType().isVector() ?
     ISD::VSELECT : ISD::SELECT;
 
+  bool IsUnaryAbs = false;
+
   // Min/max matching is only viable if all output VTs are the same.
   if (is_splat(ValueVTs)) {
     EVT VT = ValueVTs[0];
@@ -3023,10 +3318,16 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
         break;
       }
       break;
+    case SPF_ABS:
+      IsUnaryAbs = true;
+      Opc = ISD::ABS;
+      break;
+    case SPF_NABS:
+      // TODO: we need to produce sub(0, abs(X)).
     default: break;
     }
 
-    if (Opc != ISD::DELETED_NODE &&
+    if (!IsUnaryAbs && Opc != ISD::DELETED_NODE &&
         (TLI.isOperationLegalOrCustom(Opc, VT) ||
          (UseScalarMinMax &&
           TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
@@ -3039,15 +3340,30 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
       RHSVal = getValue(RHS);
       BaseOps = {};
     }
+
+    if (IsUnaryAbs) {
+      OpCode = Opc;
+      LHSVal = getValue(LHS);
+      BaseOps = {};
+    }
   }
 
-  for (unsigned i = 0; i != NumValues; ++i) {
-    SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
-    Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
-    Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
-    Values[i] = DAG.getNode(OpCode, getCurSDLoc(),
-                            LHSVal.getNode()->getValueType(LHSVal.getResNo()+i),
-                            Ops);
+  if (IsUnaryAbs) {
+    for (unsigned i = 0; i != NumValues; ++i) {
+      Values[i] =
+          DAG.getNode(OpCode, getCurSDLoc(),
+                      LHSVal.getNode()->getValueType(LHSVal.getResNo() + i),
+                      SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
+    }
+  } else {
+    for (unsigned i = 0; i != NumValues; ++i) {
+      SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
+      Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
+      Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
+      Values[i] = DAG.getNode(
+          OpCode, getCurSDLoc(),
+          LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops);
+    }
   }
 
   setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
@@ -3135,18 +3451,26 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) {
   // What to do depends on the size of the integer and the size of the pointer.
   // We can either truncate, zero extend, or no-op, accordingly.
   SDValue N = getValue(I.getOperand(0));
+  auto &TLI = DAG.getTargetLoweringInfo();
   EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
                                                         I.getType());
-  setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
+  EVT PtrMemVT =
+      TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
+  N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
+  N = DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT);
+  setValue(&I, N);
 }
 
 void SelectionDAGBuilder::visitIntToPtr(const User &I) {
   // What to do depends on the size of the integer and the size of the pointer.
   // We can either truncate, zero extend, or no-op, accordingly.
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
-                                                        I.getType());
-  setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
+  auto &TLI = DAG.getTargetLoweringInfo();
+  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+  EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
+  N = DAG.getZExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
+  N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), DestVT);
+  setValue(&I, N);
 }
 
 void SelectionDAGBuilder::visitBitCast(const User &I) {
@@ -3284,12 +3608,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
     MOps1[0] = Src1;
     MOps2[0] = Src2;
 
-    Src1 = Src1.isUndef()
-               ? DAG.getUNDEF(PaddedVT)
-               : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
-    Src2 = Src2.isUndef()
-               ? DAG.getUNDEF(PaddedVT)
-               : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
+    Src1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
+    Src2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
 
     // Readjust mask for new input vector length.
     SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
@@ -3498,6 +3818,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
   unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
   SDValue N = getValue(Op0);
   SDLoc dl = getCurSDLoc();
+  auto &TLI = DAG.getTargetLoweringInfo();
+  MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS);
+  MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS);
 
   // Normalize Vector GEP - all scalar operands should be converted to the
   // splat vector.
@@ -3555,6 +3878,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
         if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
           Flags.setNoUnsignedWrap(true);
 
+        OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType());
+
         N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
         continue;
       }
@@ -3580,7 +3905,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
                              N.getValueType(), IdxN,
                              DAG.getConstant(Amt, dl, IdxN.getValueType()));
         } else {
-          SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType());
+          SDValue Scale = DAG.getConstant(ElementSize.getZExtValue(), dl,
+                                          IdxN.getValueType());
           IdxN = DAG.getNode(ISD::MUL, dl,
                              N.getValueType(), IdxN, Scale);
         }
@@ -3591,6 +3917,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
     }
   }
 
+  if (PtrMemTy != PtrTy && !cast<GEPOperator>(I).isInBounds())
+    N = DAG.getPtrExtendInReg(N, dl, PtrMemTy);
+
   setValue(&I, N);
 }
 
@@ -3675,16 +4004,17 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
   bool isVolatile = I.isVolatile();
   bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
   bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
-  bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout());
+  bool isDereferenceable =
+      isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout());
   unsigned Alignment = I.getAlignment();
 
   AAMDNodes AAInfo;
   I.getAAMetadata(AAInfo);
   const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
 
-  SmallVector<EVT, 4> ValueVTs;
+  SmallVector<EVT, 4> ValueVTs, MemVTs;
   SmallVector<uint64_t, 4> Offsets;
-  ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets);
+  ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets);
   unsigned NumValues = ValueVTs.size();
   if (NumValues == 0)
     return;
@@ -3750,12 +4080,15 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
       MMOFlags |= MachineMemOperand::MODereferenceable;
     MMOFlags |= TLI.getMMOFlags(I);
 
-    SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A,
+    SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A,
                             MachinePointerInfo(SV, Offsets[i]), Alignment,
                             MMOFlags, AAInfo, Ranges);
+    Chains[ChainI] = L.getValue(1);
+
+    if (MemVTs[i] != ValueVTs[i])
+      L = DAG.getZExtOrTrunc(L, dl, ValueVTs[i]);
 
     Values[i] = L;
-    Chains[ChainI] = L.getValue(1);
   }
 
   if (!ConstantMemory) {
@@ -3785,15 +4118,13 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
 
   SDValue Src = getValue(SrcV);
   // Create a virtual register, then update the virtual register.
-  unsigned VReg; bool CreatedVReg;
-  std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I);
+  unsigned VReg =
+      SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand());
   // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
   // Chain can be getRoot or getControlRoot.
   SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
                                       SDValue(Src.getNode(), Src.getResNo()));
   DAG.setRoot(CopyNode);
-  if (CreatedVReg)
-    FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
 }
 
 void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
@@ -3826,8 +4157,7 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
   // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
   SDValue L = DAG.getCopyFromReg(
       getRoot(), getCurSDLoc(),
-      FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first,
-      ValueVTs[0]);
+      SwiftError.getOrCreateVRegUseAt(&I, FuncInfo.MBB, SV), ValueVTs[0]);
 
   setValue(&I, L);
 }
@@ -3854,10 +4184,10 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
     }
   }
 
-  SmallVector<EVT, 4> ValueVTs;
+  SmallVector<EVT, 4> ValueVTs, MemVTs;
   SmallVector<uint64_t, 4> Offsets;
   ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
-                  SrcV->getType(), ValueVTs, &Offsets);
+                  SrcV->getType(), ValueVTs, &MemVTs, &Offsets);
   unsigned NumValues = ValueVTs.size();
   if (NumValues == 0)
     return;
@@ -3899,9 +4229,12 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
     }
     SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
                               DAG.getConstant(Offsets[i], dl, PtrVT), Flags);
-    SDValue St = DAG.getStore(
-        Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add,
-        MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo);
+    SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i);
+    if (MemVTs[i] != ValueVTs[i])
+      Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]);
+    SDValue St =
+        DAG.getStore(Root, dl, Val, Add, MachinePointerInfo(PtrV, Offsets[i]),
+                     Alignment, MMOFlags, AAInfo);
     Chains[ChainI] = St;
   }
 
@@ -4181,19 +4514,34 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
 
 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
   SDLoc dl = getCurSDLoc();
-  AtomicOrdering SuccessOrder = I.getSuccessOrdering();
-  AtomicOrdering FailureOrder = I.getFailureOrdering();
+  AtomicOrdering SuccessOrdering = I.getSuccessOrdering();
+  AtomicOrdering FailureOrdering = I.getFailureOrdering();
   SyncScope::ID SSID = I.getSyncScopeID();
 
   SDValue InChain = getRoot();
 
   MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
   SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
-  SDValue L = DAG.getAtomicCmpSwap(
-      ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain,
-      getValue(I.getPointerOperand()), getValue(I.getCompareOperand()),
-      getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()),
-      /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID);
+
+  auto Alignment = DAG.getEVTAlignment(MemVT);
+
+  auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+  if (I.isVolatile())
+    Flags |= MachineMemOperand::MOVolatile;
+  Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I);
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
+                            Flags, MemVT.getStoreSize(), Alignment,
+                            AAMDNodes(), nullptr, SSID, SuccessOrdering,
+                            FailureOrdering);
+
+  SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
+                                   dl, MemVT, VTs, InChain,
+                                   getValue(I.getPointerOperand()),
+                                   getValue(I.getCompareOperand()),
+                                   getValue(I.getNewValOperand()), MMO);
 
   SDValue OutChain = L.getValue(2);
 
@@ -4217,20 +4565,32 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
   case AtomicRMWInst::Min:  NT = ISD::ATOMIC_LOAD_MIN; break;
   case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
   case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
+  case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break;
+  case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break;
   }
-  AtomicOrdering Order = I.getOrdering();
+  AtomicOrdering Ordering = I.getOrdering();
   SyncScope::ID SSID = I.getSyncScopeID();
 
   SDValue InChain = getRoot();
 
+  auto MemVT = getValue(I.getValOperand()).getSimpleValueType();
+  auto Alignment = DAG.getEVTAlignment(MemVT);
+
+  auto Flags = MachineMemOperand::MOLoad |  MachineMemOperand::MOStore;
+  if (I.isVolatile())
+    Flags |= MachineMemOperand::MOVolatile;
+  Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I);
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags,
+                            MemVT.getStoreSize(), Alignment, AAMDNodes(),
+                            nullptr, SSID, Ordering);
+
   SDValue L =
-    DAG.getAtomic(NT, dl,
-                  getValue(I.getValOperand()).getSimpleValueType(),
-                  InChain,
-                  getValue(I.getPointerOperand()),
-                  getValue(I.getValOperand()),
-                  I.getPointerOperand(),
-                  /* Alignment=*/ 0, Order, SSID);
+    DAG.getAtomic(NT, dl, MemVT, InChain,
+                  getValue(I.getPointerOperand()), getValue(I.getValOperand()),
+                  MMO);
 
   SDValue OutChain = L.getValue(1);
 
@@ -4259,27 +4619,39 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+  EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
 
   if (!TLI.supportsUnalignedAtomics() &&
-      I.getAlignment() < VT.getStoreSize())
+      I.getAlignment() < MemVT.getSizeInBits() / 8)
     report_fatal_error("Cannot generate unaligned atomic load");
 
+  auto Flags = MachineMemOperand::MOLoad;
+  if (I.isVolatile())
+    Flags |= MachineMemOperand::MOVolatile;
+  if (I.getMetadata(LLVMContext::MD_invariant_load) != nullptr)
+    Flags |= MachineMemOperand::MOInvariant;
+  if (isDereferenceablePointer(I.getPointerOperand(), I.getType(),
+                               DAG.getDataLayout()))
+    Flags |= MachineMemOperand::MODereferenceable;
+
+  Flags |= TLI.getMMOFlags(I);
+
   MachineMemOperand *MMO =
       DAG.getMachineFunction().
       getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
-                           MachineMemOperand::MOVolatile |
-                           MachineMemOperand::MOLoad,
-                           VT.getStoreSize(),
+                           Flags, MemVT.getStoreSize(),
                            I.getAlignment() ? I.getAlignment() :
-                                              DAG.getEVTAlignment(VT),
+                                              DAG.getEVTAlignment(MemVT),
                            AAMDNodes(), nullptr, SSID, Order);
 
   InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
   SDValue L =
-      DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
+      DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
                     getValue(I.getPointerOperand()), MMO);
 
   SDValue OutChain = L.getValue(1);
+  if (MemVT != VT)
+    L = DAG.getPtrExtOrTrunc(L, dl, VT);
 
   setValue(&I, L);
   DAG.setRoot(OutChain);
@@ -4288,25 +4660,36 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
 void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
   SDLoc dl = getCurSDLoc();
 
-  AtomicOrdering Order = I.getOrdering();
+  AtomicOrdering Ordering = I.getOrdering();
   SyncScope::ID SSID = I.getSyncScopeID();
 
   SDValue InChain = getRoot();
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT VT =
-      TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
+  EVT MemVT =
+      TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
 
-  if (I.getAlignment() < VT.getStoreSize())
+  if (I.getAlignment() < MemVT.getSizeInBits() / 8)
     report_fatal_error("Cannot generate unaligned atomic store");
 
-  SDValue OutChain =
-    DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
-                  InChain,
-                  getValue(I.getPointerOperand()),
-                  getValue(I.getValueOperand()),
-                  I.getPointerOperand(), I.getAlignment(),
-                  Order, SSID);
+  auto Flags = MachineMemOperand::MOStore;
+  if (I.isVolatile())
+    Flags |= MachineMemOperand::MOVolatile;
+  Flags |= TLI.getMMOFlags(I);
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags,
+                            MemVT.getStoreSize(), I.getAlignment(), AAMDNodes(),
+                            nullptr, SSID, Ordering);
+
+  SDValue Val = getValue(I.getValueOperand());
+  if (Val.getValueType() != MemVT)
+    Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT);
+
+  SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain,
+                                   getValue(I.getPointerOperand()), Val, MMO);
+
 
   DAG.setRoot(OutChain);
 }
@@ -4364,10 +4747,12 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   SDValue Result;
   if (IsTgtIntrinsic) {
     // This is target intrinsic that touches memory
-    Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs,
-      Ops, Info.memVT,
-      MachinePointerInfo(Info.ptrVal, Info.offset), Info.align,
-      Info.flags, Info.size);
+    AAMDNodes AAInfo;
+    I.getAAMetadata(AAInfo);
+    Result =
+        DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
+                                MachinePointerInfo(Info.ptrVal, Info.offset),
+                                Info.align, Info.flags, Info.size, AAInfo);
   } else if (!HasChain) {
     Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
   } else if (!I.getType()->isVoidTy()) {
@@ -4889,7 +5274,7 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
       return DAG.getConstantFP(1.0, DL, LHS.getValueType());
 
     const Function &F = DAG.getMachineFunction().getFunction();
-    if (!F.optForSize() ||
+    if (!F.hasOptSize() ||
         // If optimizing for size, don't insert too many multiplies.
         // This inserts up to 5 multiplies.
         countPopulation(Val) + Log2_32(Val) < 7) {
@@ -4952,6 +5337,71 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
   if (!Arg)
     return false;
 
+  if (!IsDbgDeclare) {
+    // ArgDbgValues are hoisted to the beginning of the entry block. So we
+    // should only emit as ArgDbgValue if the dbg.value intrinsic is found in
+    // the entry block.
+    bool IsInEntryBlock = FuncInfo.MBB == &FuncInfo.MF->front();
+    if (!IsInEntryBlock)
+      return false;
+
+    // ArgDbgValues are hoisted to the beginning of the entry block.  So we
+    // should only emit as ArgDbgValue if the dbg.value intrinsic describes a
+    // variable that also is a param.
+    //
+    // Although, if we are at the top of the entry block already, we can still
+    // emit using ArgDbgValue. This might catch some situations when the
+    // dbg.value refers to an argument that isn't used in the entry block, so
+    // any CopyToReg node would be optimized out and the only way to express
+    // this DBG_VALUE is by using the physical reg (or FI) as done in this
+    // method.  ArgDbgValues are hoisted to the beginning of the entry block. So
+    // we should only emit as ArgDbgValue if the Variable is an argument to the
+    // current function, and the dbg.value intrinsic is found in the entry
+    // block.
+    bool VariableIsFunctionInputArg = Variable->isParameter() &&
+        !DL->getInlinedAt();
+    bool IsInPrologue = SDNodeOrder == LowestSDNodeOrder;
+    if (!IsInPrologue && !VariableIsFunctionInputArg)
+      return false;
+
+    // Here we assume that a function argument on IR level only can be used to
+    // describe one input parameter on source level. If we for example have
+    // source code like this
+    //
+    //    struct A { long x, y; };
+    //    void foo(struct A a, long b) {
+    //      ...
+    //      b = a.x;
+    //      ...
+    //    }
+    //
+    // and IR like this
+    //
+    //  define void @foo(i32 %a1, i32 %a2, i32 %b)  {
+    //  entry:
+    //    call void @llvm.dbg.value(metadata i32 %a1, "a", DW_OP_LLVM_fragment
+    //    call void @llvm.dbg.value(metadata i32 %a2, "a", DW_OP_LLVM_fragment
+    //    call void @llvm.dbg.value(metadata i32 %b, "b",
+    //    ...
+    //    call void @llvm.dbg.value(metadata i32 %a1, "b"
+    //    ...
+    //
+    // then the last dbg.value is describing a parameter "b" using a value that
+    // is an argument. But since we already has used %a1 to describe a parameter
+    // we should not handle that last dbg.value here (that would result in an
+    // incorrect hoisting of the DBG_VALUE to the function entry).
+    // Notice that we allow one dbg.value per IR level argument, to accomodate
+    // for the situation with fragments above.
+    if (VariableIsFunctionInputArg) {
+      unsigned ArgNo = Arg->getArgNo();
+      if (ArgNo >= FuncInfo.DescribedArgs.size())
+        FuncInfo.DescribedArgs.resize(ArgNo + 1, false);
+      else if (!IsInPrologue && FuncInfo.DescribedArgs.test(ArgNo))
+        return false;
+      FuncInfo.DescribedArgs.set(ArgNo);
+    }
+  }
+
   MachineFunction &MF = DAG.getMachineFunction();
   const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
 
@@ -4976,12 +5426,14 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
     }
   }
 
-  if (!Op && N.getNode())
+  if (!Op && N.getNode()) {
     // Check if frame index is available.
-    if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
+    SDValue LCandidate = peekThroughBitcasts(N);
+    if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(LCandidate.getNode()))
       if (FrameIndexSDNode *FINode =
           dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
         Op = MachineOperand::CreateFI(FINode->getIndex());
+  }
 
   if (!Op) {
     // Check if ValueMap has reg number.
@@ -5055,11 +5507,29 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
 #  define setjmp_undefined_for_msvc
 #endif
 
-/// Lower the call to the specified intrinsic function. If we want to emit this
-/// as a call to a named external function, return the name. Otherwise, lower it
-/// and return null.
-const char *
-SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
+static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
+  switch (Intrinsic) {
+  case Intrinsic::smul_fix:
+    return ISD::SMULFIX;
+  case Intrinsic::umul_fix:
+    return ISD::UMULFIX;
+  default:
+    llvm_unreachable("Unhandled fixed point intrinsic");
+  }
+}
+
+void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I,
+                                           const char *FunctionName) {
+  assert(FunctionName && "FunctionName must not be nullptr");
+  SDValue Callee = DAG.getExternalSymbol(
+      FunctionName,
+      DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
+  LowerCallTo(&I, Callee, I.isTailCall());
+}
+
+/// Lower the call to the specified intrinsic function.
+void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
+                                             unsigned Intrinsic) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDLoc sdl = getCurSDLoc();
   DebugLoc dl = getCurDebugLoc();
@@ -5069,28 +5539,28 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   default:
     // By default, turn this into a target intrinsic node.
     visitTargetIntrinsic(I, Intrinsic);
-    return nullptr;
-  case Intrinsic::vastart:  visitVAStart(I); return nullptr;
-  case Intrinsic::vaend:    visitVAEnd(I); return nullptr;
-  case Intrinsic::vacopy:   visitVACopy(I); return nullptr;
+    return;
+  case Intrinsic::vastart:  visitVAStart(I); return;
+  case Intrinsic::vaend:    visitVAEnd(I); return;
+  case Intrinsic::vacopy:   visitVACopy(I); return;
   case Intrinsic::returnaddress:
     setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
                              TLI.getPointerTy(DAG.getDataLayout()),
                              getValue(I.getArgOperand(0))));
-    return nullptr;
+    return;
   case Intrinsic::addressofreturnaddress:
     setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
                              TLI.getPointerTy(DAG.getDataLayout())));
-    return nullptr;
+    return;
   case Intrinsic::sponentry:
     setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl,
                              TLI.getPointerTy(DAG.getDataLayout())));
-    return nullptr;
+    return;
   case Intrinsic::frameaddress:
     setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
                              TLI.getPointerTy(DAG.getDataLayout()),
                              getValue(I.getArgOperand(0))));
-    return nullptr;
+    return;
   case Intrinsic::read_register: {
     Value *Reg = I.getArgOperand(0);
     SDValue Chain = getRoot();
@@ -5101,7 +5571,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       DAG.getVTList(VT, MVT::Other), Chain, RegName);
     setValue(&I, Res);
     DAG.setRoot(Res.getValue(1));
-    return nullptr;
+    return;
   }
   case Intrinsic::write_register: {
     Value *Reg = I.getArgOperand(0);
@@ -5111,12 +5581,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
         DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
     DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
                             RegName, getValue(RegValue)));
-    return nullptr;
+    return;
   }
   case Intrinsic::setjmp:
-    return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
+    lowerCallToExternalSymbol(I, &"_setjmp"[!TLI.usesUnderscoreSetJmp()]);
+    return;
   case Intrinsic::longjmp:
-    return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
+    lowerCallToExternalSymbol(I, &"_longjmp"[!TLI.usesUnderscoreLongJmp()]);
+    return;
   case Intrinsic::memcpy: {
     const auto &MCI = cast<MemCpyInst>(I);
     SDValue Op1 = getValue(I.getArgOperand(0));
@@ -5135,7 +5607,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                MachinePointerInfo(I.getArgOperand(0)),
                                MachinePointerInfo(I.getArgOperand(1)));
     updateDAGForMaybeTailCall(MC);
-    return nullptr;
+    return;
   }
   case Intrinsic::memset: {
     const auto &MSI = cast<MemSetInst>(I);
@@ -5149,7 +5621,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
                                isTC, MachinePointerInfo(I.getArgOperand(0)));
     updateDAGForMaybeTailCall(MS);
-    return nullptr;
+    return;
   }
   case Intrinsic::memmove: {
     const auto &MMI = cast<MemMoveInst>(I);
@@ -5168,7 +5640,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                 isTC, MachinePointerInfo(I.getArgOperand(0)),
                                 MachinePointerInfo(I.getArgOperand(1)));
     updateDAGForMaybeTailCall(MM);
-    return nullptr;
+    return;
   }
   case Intrinsic::memcpy_element_unordered_atomic: {
     const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I);
@@ -5186,7 +5658,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                      MachinePointerInfo(MI.getRawDest()),
                                      MachinePointerInfo(MI.getRawSource()));
     updateDAGForMaybeTailCall(MC);
-    return nullptr;
+    return;
   }
   case Intrinsic::memmove_element_unordered_atomic: {
     auto &MI = cast<AtomicMemMoveInst>(I);
@@ -5204,7 +5676,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                       MachinePointerInfo(MI.getRawDest()),
                                       MachinePointerInfo(MI.getRawSource()));
     updateDAGForMaybeTailCall(MC);
-    return nullptr;
+    return;
   }
   case Intrinsic::memset_element_unordered_atomic: {
     auto &MI = cast<AtomicMemSetInst>(I);
@@ -5220,7 +5692,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                      LengthTy, ElemSz, isTC,
                                      MachinePointerInfo(MI.getRawDest()));
     updateDAGForMaybeTailCall(MC);
-    return nullptr;
+    return;
   }
   case Intrinsic::dbg_addr:
   case Intrinsic::dbg_declare: {
@@ -5235,7 +5707,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     if (!Address || isa<UndefValue>(Address) ||
         (Address->use_empty() && !isa<Argument>(Address))) {
       LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
-      return nullptr;
+      return;
     }
 
     bool isParameter = Variable->isParameter() || isa<Argument>(Address);
@@ -5264,7 +5736,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
             Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder);
         DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter);
       }
-      return nullptr;
+      return;
     }
 
     SDValue &N = NodeMap[Address];
@@ -5286,7 +5758,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
         // Address is an argument, so try to emit its dbg value using
         // virtual register info from the FuncInfo.ValueMap.
         EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N);
-        return nullptr;
+        return;
       } else {
         SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
                               true, dl, SDNodeOrder);
@@ -5300,7 +5772,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
         LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
       }
     }
-    return nullptr;
+    return;
   }
   case Intrinsic::dbg_label: {
     const DbgLabelInst &DI = cast<DbgLabelInst>(I);
@@ -5310,7 +5782,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDDbgLabel *SDV;
     SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder);
     DAG.AddDbgLabel(SDV);
-    return nullptr;
+    return;
   }
   case Intrinsic::dbg_value: {
     const DbgValueInst &DI = cast<DbgValueInst>(I);
@@ -5321,88 +5793,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     dropDanglingDebugInfo(Variable, Expression);
     const Value *V = DI.getValue();
     if (!V)
-      return nullptr;
+      return;
 
-    SDDbgValue *SDV;
-    if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) ||
-        isa<ConstantPointerNull>(V)) {
-      SDV = DAG.getConstantDbgValue(Variable, Expression, V, dl, SDNodeOrder);
-      DAG.AddDbgValue(SDV, nullptr, false);
-      return nullptr;
-    }
-
-    // Do not use getValue() in here; we don't want to generate code at
-    // this point if it hasn't been done yet.
-    SDValue N = NodeMap[V];
-    if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
-      N = UnusedArgNodeMap[V];
-    if (N.getNode()) {
-      if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, false, N))
-        return nullptr;
-      SDV = getDbgValue(N, Variable, Expression, dl, SDNodeOrder);
-      DAG.AddDbgValue(SDV, N.getNode(), false);
-      return nullptr;
-    }
-
-    // PHI nodes have already been selected, so we should know which VReg that
-    // is assigns to already.
-    if (isa<PHINode>(V)) {
-      auto VMI = FuncInfo.ValueMap.find(V);
-      if (VMI != FuncInfo.ValueMap.end()) {
-        unsigned Reg = VMI->second;
-        // The PHI node may be split up into several MI PHI nodes (in
-        // FunctionLoweringInfo::set).
-        RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
-                         V->getType(), None);
-        if (RFV.occupiesMultipleRegs()) {
-          unsigned Offset = 0;
-          unsigned BitsToDescribe = 0;
-          if (auto VarSize = Variable->getSizeInBits())
-            BitsToDescribe = *VarSize;
-          if (auto Fragment = Expression->getFragmentInfo())
-            BitsToDescribe = Fragment->SizeInBits;
-          for (auto RegAndSize : RFV.getRegsAndSizes()) {
-            unsigned RegisterSize = RegAndSize.second;
-            // Bail out if all bits are described already.
-            if (Offset >= BitsToDescribe)
-              break;
-            unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe)
-                ? BitsToDescribe - Offset
-                : RegisterSize;
-            auto FragmentExpr = DIExpression::createFragmentExpression(
-                Expression, Offset, FragmentSize);
-            if (!FragmentExpr)
-                continue;
-            SDV = DAG.getVRegDbgValue(Variable, *FragmentExpr, RegAndSize.first,
-                                      false, dl, SDNodeOrder);
-            DAG.AddDbgValue(SDV, nullptr, false);
-            Offset += RegisterSize;
-          }
-        } else {
-          SDV = DAG.getVRegDbgValue(Variable, Expression, Reg, false, dl,
-                                    SDNodeOrder);
-          DAG.AddDbgValue(SDV, nullptr, false);
-        }
-        return nullptr;
-      }
-    }
+    if (handleDebugValue(V, Variable, Expression, dl, DI.getDebugLoc(),
+        SDNodeOrder))
+      return;
 
-    // TODO: When we get here we will either drop the dbg.value completely, or
-    // we try to move it forward by letting it dangle for awhile. So we should
-    // probably add an extra DbgValue to the DAG here, with a reference to
-    // "noreg", to indicate that we have lost the debug location for the
-    // variable.
+    // TODO: Dangling debug info will eventually either be resolved or produce
+    // an Undef DBG_VALUE. However in the resolution case, a gap may appear
+    // between the original dbg.value location and its resolved DBG_VALUE, which
+    // we should ideally fill with an extra Undef DBG_VALUE.
 
-    if (!V->use_empty() ) {
-      // Do not call getValue(V) yet, as we don't want to generate code.
-      // Remember it for later.
-      DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder);
-      return nullptr;
-    }
-
-    LLVM_DEBUG(dbgs() << "Dropping debug location info for:\n  " << DI << "\n");
-    LLVM_DEBUG(dbgs() << "  Last seen at:\n    " << *V << "\n");
-    return nullptr;
+    DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder);
+    return;
   }
 
   case Intrinsic::eh_typeid_for: {
@@ -5411,7 +5814,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV);
     Res = DAG.getConstant(TypeID, sdl, MVT::i32);
     setValue(&I, Res);
-    return nullptr;
+    return;
   }
 
   case Intrinsic::eh_return_i32:
@@ -5422,15 +5825,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                             getControlRoot(),
                             getValue(I.getArgOperand(0)),
                             getValue(I.getArgOperand(1))));
-    return nullptr;
+    return;
   case Intrinsic::eh_unwind_init:
     DAG.getMachineFunction().setCallsUnwindInit(true);
-    return nullptr;
+    return;
   case Intrinsic::eh_dwarf_cfa:
     setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl,
                              TLI.getPointerTy(DAG.getDataLayout()),
                              getValue(I.getArgOperand(0))));
-    return nullptr;
+    return;
   case Intrinsic::eh_sjlj_callsite: {
     MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
     ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
@@ -5438,7 +5841,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
 
     MMI.setCurrentCallSite(CI->getZExtValue());
-    return nullptr;
+    return;
   }
   case Intrinsic::eh_sjlj_functioncontext: {
     // Get and store the index of the function context.
@@ -5447,7 +5850,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
     int FI = FuncInfo.StaticAllocaMap[FnCtx];
     MFI.setFunctionContextIndex(FI);
-    return nullptr;
+    return;
   }
   case Intrinsic::eh_sjlj_setjmp: {
     SDValue Ops[2];
@@ -5457,34 +5860,34 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                              DAG.getVTList(MVT::i32, MVT::Other), Ops);
     setValue(&I, Op.getValue(0));
     DAG.setRoot(Op.getValue(1));
-    return nullptr;
+    return;
   }
   case Intrinsic::eh_sjlj_longjmp:
     DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
                             getRoot(), getValue(I.getArgOperand(0))));
-    return nullptr;
+    return;
   case Intrinsic::eh_sjlj_setup_dispatch:
     DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
                             getRoot()));
-    return nullptr;
+    return;
   case Intrinsic::masked_gather:
     visitMaskedGather(I);
-    return nullptr;
+    return;
   case Intrinsic::masked_load:
     visitMaskedLoad(I);
-    return nullptr;
+    return;
   case Intrinsic::masked_scatter:
     visitMaskedScatter(I);
-    return nullptr;
+    return;
   case Intrinsic::masked_store:
     visitMaskedStore(I);
-    return nullptr;
+    return;
   case Intrinsic::masked_expandload:
     visitMaskedLoad(I, true /* IsExpanding */);
-    return nullptr;
+    return;
   case Intrinsic::masked_compressstore:
     visitMaskedStore(I, true /* IsCompressing */);
-    return nullptr;
+    return;
   case Intrinsic::x86_mmx_pslli_w:
   case Intrinsic::x86_mmx_pslli_d:
   case Intrinsic::x86_mmx_pslli_q:
@@ -5496,7 +5899,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDValue ShAmt = getValue(I.getArgOperand(1));
     if (isa<ConstantSDNode>(ShAmt)) {
       visitTargetIntrinsic(I, Intrinsic);
-      return nullptr;
+      return;
     }
     unsigned NewIntrinsic = 0;
     EVT ShAmtVT = MVT::v2i32;
@@ -5542,31 +5945,31 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                        DAG.getConstant(NewIntrinsic, sdl, MVT::i32),
                        getValue(I.getArgOperand(0)), ShAmt);
     setValue(&I, Res);
-    return nullptr;
+    return;
   }
   case Intrinsic::powi:
     setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
                             getValue(I.getArgOperand(1)), DAG));
-    return nullptr;
+    return;
   case Intrinsic::log:
     setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
-    return nullptr;
+    return;
   case Intrinsic::log2:
     setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
-    return nullptr;
+    return;
   case Intrinsic::log10:
     setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
-    return nullptr;
+    return;
   case Intrinsic::exp:
     setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
-    return nullptr;
+    return;
   case Intrinsic::exp2:
     setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
-    return nullptr;
+    return;
   case Intrinsic::pow:
     setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)),
                            getValue(I.getArgOperand(1)), DAG, TLI));
-    return nullptr;
+    return;
   case Intrinsic::sqrt:
   case Intrinsic::fabs:
   case Intrinsic::sin:
@@ -5597,61 +6000,71 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     setValue(&I, DAG.getNode(Opcode, sdl,
                              getValue(I.getArgOperand(0)).getValueType(),
                              getValue(I.getArgOperand(0))));
-    return nullptr;
+    return;
+  }
+  case Intrinsic::lround:
+  case Intrinsic::llround:
+  case Intrinsic::lrint:
+  case Intrinsic::llrint: {
+    unsigned Opcode;
+    switch (Intrinsic) {
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    case Intrinsic::lround:  Opcode = ISD::LROUND;  break;
+    case Intrinsic::llround: Opcode = ISD::LLROUND; break;
+    case Intrinsic::lrint:   Opcode = ISD::LRINT;   break;
+    case Intrinsic::llrint:  Opcode = ISD::LLRINT;  break;
+    }
+
+    EVT RetVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+    setValue(&I, DAG.getNode(Opcode, sdl, RetVT,
+                             getValue(I.getArgOperand(0))));
+    return;
   }
-  case Intrinsic::minnum: {
-    auto VT = getValue(I.getArgOperand(0)).getValueType();
-    unsigned Opc =
-        I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT)
-            ? ISD::FMINIMUM
-            : ISD::FMINNUM;
-    setValue(&I, DAG.getNode(Opc, sdl, VT,
+  case Intrinsic::minnum:
+    setValue(&I, DAG.getNode(ISD::FMINNUM, sdl,
+                             getValue(I.getArgOperand(0)).getValueType(),
                              getValue(I.getArgOperand(0)),
                              getValue(I.getArgOperand(1))));
-    return nullptr;
-  }
-  case Intrinsic::maxnum: {
-    auto VT = getValue(I.getArgOperand(0)).getValueType();
-    unsigned Opc =
-        I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT)
-            ? ISD::FMAXIMUM
-            : ISD::FMAXNUM;
-    setValue(&I, DAG.getNode(Opc, sdl, VT,
+    return;
+  case Intrinsic::maxnum:
+    setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl,
+                             getValue(I.getArgOperand(0)).getValueType(),
                              getValue(I.getArgOperand(0)),
                              getValue(I.getArgOperand(1))));
-    return nullptr;
-  }
+    return;
   case Intrinsic::minimum:
     setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl,
                              getValue(I.getArgOperand(0)).getValueType(),
                              getValue(I.getArgOperand(0)),
                              getValue(I.getArgOperand(1))));
-    return nullptr;
+    return;
   case Intrinsic::maximum:
     setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl,
                              getValue(I.getArgOperand(0)).getValueType(),
                              getValue(I.getArgOperand(0)),
                              getValue(I.getArgOperand(1))));
-    return nullptr;
+    return;
   case Intrinsic::copysign:
     setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
                              getValue(I.getArgOperand(0)).getValueType(),
                              getValue(I.getArgOperand(0)),
                              getValue(I.getArgOperand(1))));
-    return nullptr;
+    return;
   case Intrinsic::fma:
     setValue(&I, DAG.getNode(ISD::FMA, sdl,
                              getValue(I.getArgOperand(0)).getValueType(),
                              getValue(I.getArgOperand(0)),
                              getValue(I.getArgOperand(1)),
                              getValue(I.getArgOperand(2))));
-    return nullptr;
+    return;
   case Intrinsic::experimental_constrained_fadd:
   case Intrinsic::experimental_constrained_fsub:
   case Intrinsic::experimental_constrained_fmul:
   case Intrinsic::experimental_constrained_fdiv:
   case Intrinsic::experimental_constrained_frem:
   case Intrinsic::experimental_constrained_fma:
+  case Intrinsic::experimental_constrained_fptrunc:
+  case Intrinsic::experimental_constrained_fpext:
   case Intrinsic::experimental_constrained_sqrt:
   case Intrinsic::experimental_constrained_pow:
   case Intrinsic::experimental_constrained_powi:
@@ -5671,7 +6084,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::experimental_constrained_round:
   case Intrinsic::experimental_constrained_trunc:
     visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
-    return nullptr;
+    return;
   case Intrinsic::fmuladd: {
     EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
     if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
@@ -5693,7 +6106,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                 getValue(I.getArgOperand(2)));
       setValue(&I, Add);
     }
-    return nullptr;
+    return;
   }
   case Intrinsic::convert_to_fp16:
     setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
@@ -5701,17 +6114,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                          getValue(I.getArgOperand(0)),
                                          DAG.getTargetConstant(0, sdl,
                                                                MVT::i32))));
-    return nullptr;
+    return;
   case Intrinsic::convert_from_fp16:
     setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl,
                              TLI.getValueType(DAG.getDataLayout(), I.getType()),
                              DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
                                          getValue(I.getArgOperand(0)))));
-    return nullptr;
+    return;
   case Intrinsic::pcmarker: {
     SDValue Tmp = getValue(I.getArgOperand(0));
     DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
-    return nullptr;
+    return;
   }
   case Intrinsic::readcyclecounter: {
     SDValue Op = getRoot();
@@ -5719,25 +6132,25 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                       DAG.getVTList(MVT::i64, MVT::Other), Op);
     setValue(&I, Res);
     DAG.setRoot(Res.getValue(1));
-    return nullptr;
+    return;
   }
   case Intrinsic::bitreverse:
     setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
                              getValue(I.getArgOperand(0)).getValueType(),
                              getValue(I.getArgOperand(0))));
-    return nullptr;
+    return;
   case Intrinsic::bswap:
     setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
                              getValue(I.getArgOperand(0)).getValueType(),
                              getValue(I.getArgOperand(0))));
-    return nullptr;
+    return;
   case Intrinsic::cttz: {
     SDValue Arg = getValue(I.getArgOperand(0));
     ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
     EVT Ty = Arg.getValueType();
     setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
                              sdl, Ty, Arg));
-    return nullptr;
+    return;
   }
   case Intrinsic::ctlz: {
     SDValue Arg = getValue(I.getArgOperand(0));
@@ -5745,13 +6158,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     EVT Ty = Arg.getValueType();
     setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
                              sdl, Ty, Arg));
-    return nullptr;
+    return;
   }
   case Intrinsic::ctpop: {
     SDValue Arg = getValue(I.getArgOperand(0));
     EVT Ty = Arg.getValueType();
     setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg));
-    return nullptr;
+    return;
   }
   case Intrinsic::fshl:
   case Intrinsic::fshr: {
@@ -5767,7 +6180,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
     if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) {
       setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z));
-      return nullptr;
+      return;
     }
 
     // When X == Y, this is rotate. If the data type has a power-of-2 size, we
@@ -5777,7 +6190,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
       if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) {
         setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
-        return nullptr;
+        return;
       }
 
       // Some targets only rotate one way. Try the opposite direction.
@@ -5786,7 +6199,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
         // Negate the shift amount because it is safe to ignore the high bits.
         SDValue NegShAmt = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
         setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, NegShAmt));
-        return nullptr;
+        return;
       }
 
       // fshl (rotl): (X << (Z % BW)) | (X >> ((0 - Z) % BW))
@@ -5796,7 +6209,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : NShAmt);
       SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, X, IsFSHL ? NShAmt : ShAmt);
       setValue(&I, DAG.getNode(ISD::OR, sdl, VT, ShX, ShY));
-      return nullptr;
+      return;
     }
 
     // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
@@ -5816,39 +6229,48 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     // For fshr, 0-shift returns the 2nd arg (Y).
     SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ);
     setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Or));
-    return nullptr;
+    return;
   }
   case Intrinsic::sadd_sat: {
     SDValue Op1 = getValue(I.getArgOperand(0));
     SDValue Op2 = getValue(I.getArgOperand(1));
     setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2));
-    return nullptr;
+    return;
   }
   case Intrinsic::uadd_sat: {
     SDValue Op1 = getValue(I.getArgOperand(0));
     SDValue Op2 = getValue(I.getArgOperand(1));
     setValue(&I, DAG.getNode(ISD::UADDSAT, sdl, Op1.getValueType(), Op1, Op2));
-    return nullptr;
+    return;
   }
   case Intrinsic::ssub_sat: {
     SDValue Op1 = getValue(I.getArgOperand(0));
     SDValue Op2 = getValue(I.getArgOperand(1));
     setValue(&I, DAG.getNode(ISD::SSUBSAT, sdl, Op1.getValueType(), Op1, Op2));
-    return nullptr;
+    return;
   }
   case Intrinsic::usub_sat: {
     SDValue Op1 = getValue(I.getArgOperand(0));
     SDValue Op2 = getValue(I.getArgOperand(1));
     setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2));
-    return nullptr;
+    return;
   }
-  case Intrinsic::smul_fix: {
+  case Intrinsic::smul_fix:
+  case Intrinsic::umul_fix: {
     SDValue Op1 = getValue(I.getArgOperand(0));
     SDValue Op2 = getValue(I.getArgOperand(1));
     SDValue Op3 = getValue(I.getArgOperand(2));
-    setValue(&I,
-             DAG.getNode(ISD::SMULFIX, sdl, Op1.getValueType(), Op1, Op2, Op3));
-    return nullptr;
+    setValue(&I, DAG.getNode(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
+                             Op1.getValueType(), Op1, Op2, Op3));
+    return;
+  }
+  case Intrinsic::smul_fix_sat: {
+    SDValue Op1 = getValue(I.getArgOperand(0));
+    SDValue Op2 = getValue(I.getArgOperand(1));
+    SDValue Op3 = getValue(I.getArgOperand(2));
+    setValue(&I, DAG.getNode(ISD::SMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2,
+                             Op3));
+    return;
   }
   case Intrinsic::stacksave: {
     SDValue Op = getRoot();
@@ -5857,26 +6279,26 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
         DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op);
     setValue(&I, Res);
     DAG.setRoot(Res.getValue(1));
-    return nullptr;
+    return;
   }
   case Intrinsic::stackrestore:
     Res = getValue(I.getArgOperand(0));
     DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
-    return nullptr;
+    return;
   case Intrinsic::get_dynamic_area_offset: {
     SDValue Op = getRoot();
     EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
     EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
     // Result type for @llvm.get.dynamic.area.offset should match PtrTy for
     // target.
-    if (PtrTy != ResTy)
+    if (PtrTy.getSizeInBits() < ResTy.getSizeInBits())
       report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
                          " intrinsic!");
     Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy),
                       Op);
     DAG.setRoot(Op);
     setValue(&I, Res);
-    return nullptr;
+    return;
   }
   case Intrinsic::stackguard: {
     EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
@@ -5896,7 +6318,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       Res = TLI.emitStackGuardXorFP(DAG, Res, sdl);
     DAG.setRoot(Chain);
     setValue(&I, Res);
-    return nullptr;
+    return;
   }
   case Intrinsic::stackprotector: {
     // Emit code into the DAG to store the stack guard onto the stack.
@@ -5923,7 +6345,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                        /* Alignment = */ 0, MachineMemOperand::MOVolatile);
     setValue(&I, Res);
     DAG.setRoot(Res);
-    return nullptr;
+    return;
   }
   case Intrinsic::objectsize: {
     // If we don't know by now, we're never going to know.
@@ -5940,14 +6362,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       Res = DAG.getConstant(0, sdl, Ty);
 
     setValue(&I, Res);
-    return nullptr;
+    return;
   }
 
   case Intrinsic::is_constant:
     // If this wasn't constant-folded away by now, then it's not a
     // constant.
     setValue(&I, DAG.getConstant(0, sdl, MVT::i1));
-    return nullptr;
+    return;
 
   case Intrinsic::annotation:
   case Intrinsic::ptr_annotation:
@@ -5955,12 +6377,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::strip_invariant_group:
     // Drop the intrinsic, but forward the value
     setValue(&I, getValue(I.getOperand(0)));
-    return nullptr;
+    return;
   case Intrinsic::assume:
   case Intrinsic::var_annotation:
   case Intrinsic::sideeffect:
     // Discard annotate attributes, assumptions, and artificial side-effects.
-    return nullptr;
+    return;
 
   case Intrinsic::codeview_annotation: {
     // Emit a label associated with this metadata.
@@ -5971,7 +6393,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     MF.addCodeViewAnnotation(Label, cast<MDNode>(MD));
     Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label);
     DAG.setRoot(Res);
-    return nullptr;
+    return;
   }
 
   case Intrinsic::init_trampoline: {
@@ -5988,13 +6410,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops);
 
     DAG.setRoot(Res);
-    return nullptr;
+    return;
   }
   case Intrinsic::adjust_trampoline:
     setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
                              TLI.getPointerTy(DAG.getDataLayout()),
                              getValue(I.getArgOperand(0))));
-    return nullptr;
+    return;
   case Intrinsic::gcroot: {
     assert(DAG.getMachineFunction().getFunction().hasGC() &&
            "only valid in functions with gc specified, enforced by Verifier");
@@ -6004,19 +6426,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 
     FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
     GFI->addStackRoot(FI->getIndex(), TypeMap);
-    return nullptr;
+    return;
   }
   case Intrinsic::gcread:
   case Intrinsic::gcwrite:
     llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
   case Intrinsic::flt_rounds:
     setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32));
-    return nullptr;
+    return;
 
   case Intrinsic::expect:
     // Just replace __builtin_expect(exp, c) with EXP.
     setValue(&I, getValue(I.getArgOperand(0)));
-    return nullptr;
+    return;
 
   case Intrinsic::debugtrap:
   case Intrinsic::trap: {
@@ -6028,7 +6450,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
         ISD::TRAP : ISD::DEBUGTRAP;
       DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot()));
-      return nullptr;
+      return;
     }
     TargetLowering::ArgListTy Args;
 
@@ -6041,7 +6463,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 
     std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
     DAG.setRoot(Result.second);
-    return nullptr;
+    return;
   }
 
   case Intrinsic::uadd_with_overflow:
@@ -6063,9 +6485,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDValue Op1 = getValue(I.getArgOperand(0));
     SDValue Op2 = getValue(I.getArgOperand(1));
 
-    SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+    EVT ResultVT = Op1.getValueType();
+    EVT OverflowVT = MVT::i1;
+    if (ResultVT.isVector())
+      OverflowVT = EVT::getVectorVT(
+          *Context, OverflowVT, ResultVT.getVectorNumElements());
+
+    SDVTList VTs = DAG.getVTList(ResultVT, OverflowVT);
     setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2));
-    return nullptr;
+    return;
   }
   case Intrinsic::prefetch: {
     SDValue Ops[5];
@@ -6088,21 +6516,24 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     PendingLoads.push_back(Result);
     Result = getRoot();
     DAG.setRoot(Result);
-    return nullptr;
+    return;
   }
   case Intrinsic::lifetime_start:
   case Intrinsic::lifetime_end: {
     bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
     // Stack coloring is not enabled in O0, discard region information.
     if (TM.getOptLevel() == CodeGenOpt::None)
-      return nullptr;
+      return;
 
-    SmallVector<Value *, 4> Allocas;
-    GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL);
+    const int64_t ObjectSize =
+        cast<ConstantInt>(I.getArgOperand(0))->getSExtValue();
+    Value *const ObjectPtr = I.getArgOperand(1);
+    SmallVector<const Value *, 4> Allocas;
+    GetUnderlyingObjects(ObjectPtr, Allocas, *DL);
 
-    for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(),
+    for (SmallVectorImpl<const Value*>::iterator Object = Allocas.begin(),
            E = Allocas.end(); Object != E; ++Object) {
-      AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object);
+      const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object);
 
       // Could not find an Alloca.
       if (!LifetimeObject)
@@ -6112,49 +6543,50 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       // valid frame index.
       auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject);
       if (SI == FuncInfo.StaticAllocaMap.end())
-        return nullptr;
-
-      int FI = SI->second;
-
-      SDValue Ops[2];
-      Ops[0] = getRoot();
-      Ops[1] =
-          DAG.getFrameIndex(FI, TLI.getFrameIndexTy(DAG.getDataLayout()), true);
-      unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
+        return;
 
-      Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops);
+      const int FrameIndex = SI->second;
+      int64_t Offset;
+      if (GetPointerBaseWithConstantOffset(
+              ObjectPtr, Offset, DAG.getDataLayout()) != LifetimeObject)
+        Offset = -1; // Cannot determine offset from alloca to lifetime object.
+      Res = DAG.getLifetimeNode(IsStart, sdl, getRoot(), FrameIndex, ObjectSize,
+                                Offset);
       DAG.setRoot(Res);
     }
-    return nullptr;
+    return;
   }
   case Intrinsic::invariant_start:
     // Discard region information.
     setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
-    return nullptr;
+    return;
   case Intrinsic::invariant_end:
     // Discard region information.
-    return nullptr;
+    return;
   case Intrinsic::clear_cache:
-    return TLI.getClearCacheBuiltinName();
+    /// FunctionName may be null.
+    if (const char *FunctionName = TLI.getClearCacheBuiltinName())
+      lowerCallToExternalSymbol(I, FunctionName);
+    return;
   case Intrinsic::donothing:
     // ignore
-    return nullptr;
+    return;
   case Intrinsic::experimental_stackmap:
     visitStackmap(I);
-    return nullptr;
+    return;
   case Intrinsic::experimental_patchpoint_void:
   case Intrinsic::experimental_patchpoint_i64:
     visitPatchpoint(&I);
-    return nullptr;
+    return;
   case Intrinsic::experimental_gc_statepoint:
     LowerStatepoint(ImmutableStatepoint(&I));
-    return nullptr;
+    return;
   case Intrinsic::experimental_gc_result:
     visitGCResult(cast<GCResultInst>(I));
-    return nullptr;
+    return;
   case Intrinsic::experimental_gc_relocate:
     visitGCRelocate(cast<GCRelocateInst>(I));
-    return nullptr;
+    return;
   case Intrinsic::instrprof_increment:
     llvm_unreachable("instrprof failed to lower an increment");
   case Intrinsic::instrprof_value_profile:
@@ -6182,7 +6614,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
           .addFrameIndex(FI);
     }
 
-    return nullptr;
+    return;
   }
 
   case Intrinsic::localrecover: {
@@ -6211,7 +6643,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal);
     setValue(&I, Add);
 
-    return nullptr;
+    return;
   }
 
   case Intrinsic::eh_exceptionpointer:
@@ -6226,7 +6658,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     if (Intrinsic == Intrinsic::eh_exceptioncode)
       N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
     setValue(&I, N);
-    return nullptr;
+    return;
   }
   case Intrinsic::xray_customevent: {
     // Here we want to make sure that the intrinsic behaves as if it has a
@@ -6234,7 +6666,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     // FIXME: Support other platforms later.
     const auto &Triple = DAG.getTarget().getTargetTriple();
     if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
-      return nullptr;
+      return;
 
     SDLoc DL = getCurSDLoc();
     SmallVector<SDValue, 8> Ops;
@@ -6257,7 +6689,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDValue patchableNode = SDValue(MN, 0);
     DAG.setRoot(patchableNode);
     setValue(&I, patchableNode);
-    return nullptr;
+    return;
   }
   case Intrinsic::xray_typedevent: {
     // Here we want to make sure that the intrinsic behaves as if it has a
@@ -6265,7 +6697,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     // FIXME: Support other platforms later.
     const auto &Triple = DAG.getTarget().getTargetTriple();
     if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
-      return nullptr;
+      return;
 
     SDLoc DL = getCurSDLoc();
     SmallVector<SDValue, 8> Ops;
@@ -6292,14 +6724,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDValue patchableNode = SDValue(MN, 0);
     DAG.setRoot(patchableNode);
     setValue(&I, patchableNode);
-    return nullptr;
+    return;
   }
   case Intrinsic::experimental_deoptimize:
     LowerDeoptimizeCall(&I);
-    return nullptr;
+    return;
 
-  case Intrinsic::experimental_vector_reduce_fadd:
-  case Intrinsic::experimental_vector_reduce_fmul:
+  case Intrinsic::experimental_vector_reduce_v2_fadd:
+  case Intrinsic::experimental_vector_reduce_v2_fmul:
   case Intrinsic::experimental_vector_reduce_add:
   case Intrinsic::experimental_vector_reduce_mul:
   case Intrinsic::experimental_vector_reduce_and:
@@ -6312,11 +6744,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::experimental_vector_reduce_fmax:
   case Intrinsic::experimental_vector_reduce_fmin:
     visitVectorReduce(I, Intrinsic);
-    return nullptr;
+    return;
 
   case Intrinsic::icall_branch_funnel: {
     SmallVector<SDValue, 16> Ops;
-    Ops.push_back(DAG.getRoot());
     Ops.push_back(getValue(I.getArgOperand(0)));
 
     int64_t Offset;
@@ -6359,20 +6790,34 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       Ops.push_back(T.Target);
     }
 
+    Ops.push_back(DAG.getRoot()); // Chain
     SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL,
                                  getCurSDLoc(), MVT::Other, Ops),
               0);
     DAG.setRoot(N);
     setValue(&I, N);
     HasTailCall = true;
-    return nullptr;
+    return;
   }
 
   case Intrinsic::wasm_landingpad_index:
     // Information this intrinsic contained has been transferred to
     // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely
     // delete it now.
-    return nullptr;
+    return;
+
+  case Intrinsic::aarch64_settag:
+  case Intrinsic::aarch64_settag_zero: {
+    const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+    bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero;
+    SDValue Val = TSI.EmitTargetCodeForSetTag(
+        DAG, getCurSDLoc(), getRoot(), getValue(I.getArgOperand(0)),
+        getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)),
+        ZeroMemory);
+    DAG.setRoot(Val);
+    setValue(&I, Val);
+    return;
+  }
   }
 }
 
@@ -6400,6 +6845,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
   case Intrinsic::experimental_constrained_fma:
     Opcode = ISD::STRICT_FMA;
     break;
+  case Intrinsic::experimental_constrained_fptrunc:
+    Opcode = ISD::STRICT_FP_ROUND;
+    break;
+  case Intrinsic::experimental_constrained_fpext:
+    Opcode = ISD::STRICT_FP_EXTEND;
+    break;
   case Intrinsic::experimental_constrained_sqrt:
     Opcode = ISD::STRICT_FSQRT;
     break;
@@ -6463,7 +6914,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
 
   SDVTList VTs = DAG.getVTList(ValueVTs);
   SDValue Result;
-  if (FPI.isUnaryOp())
+  if (Opcode == ISD::STRICT_FP_ROUND)
+    Result = DAG.getNode(Opcode, sdl, VTs,
+                          { Chain, getValue(FPI.getArgOperand(0)),
+                               DAG.getTargetConstant(0, sdl,
+                               TLI.getPointerTy(DAG.getDataLayout())) });
+  else if (FPI.isUnaryOp())
     Result = DAG.getNode(Opcode, sdl, VTs,
                          { Chain, getValue(FPI.getArgOperand(0)) });
   else if (FPI.isTernaryOp())
@@ -6476,6 +6932,13 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
                          { Chain, getValue(FPI.getArgOperand(0)),
                            getValue(FPI.getArgOperand(1))  });
 
+  if (FPI.getExceptionBehavior() !=
+      ConstrainedFPIntrinsic::ExceptionBehavior::ebIgnore) {
+    SDNodeFlags Flags;
+    Flags.setFPExcept(true);
+    Result->setFlags(Flags);
+  }
+
   assert(Result.getNode()->getNumValues() == 2);
   SDValue OutChain = Result.getValue(1);
   DAG.setRoot(OutChain);
@@ -6596,11 +7059,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
       SwiftErrorVal = V;
       // We find the virtual register for the actual swifterror argument.
       // Instead of using the Value, we use the virtual register instead.
-      Entry.Node = DAG.getRegister(FuncInfo
-                                       .getOrCreateSwiftErrorVRegUseAt(
-                                           CS.getInstruction(), FuncInfo.MBB, V)
-                                       .first,
-                                   EVT(TLI.getPointerTy(DL)));
+      Entry.Node = DAG.getRegister(
+          SwiftError.getOrCreateVRegUseAt(CS.getInstruction(), FuncInfo.MBB, V),
+          EVT(TLI.getPointerTy(DL)));
     }
 
     Args.push_back(Entry);
@@ -6641,13 +7102,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
   if (SwiftErrorVal && TLI.supportSwiftError()) {
     // Get the last element of InVals.
     SDValue Src = CLI.InVals.back();
-    unsigned VReg; bool CreatedVReg;
-    std::tie(VReg, CreatedVReg) =
-        FuncInfo.getOrCreateSwiftErrorVRegDefAt(CS.getInstruction());
+    unsigned VReg = SwiftError.getOrCreateVRegDefAt(
+        CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal);
     SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
-    // We update the virtual register for the actual swifterror argument.
-    if (CreatedVReg)
-      FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg);
     DAG.setRoot(CopyNode);
   }
 }
@@ -6995,10 +7452,6 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
     return;
   }
 
-  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
-  computeUsesVAFloatArgument(I, MMI);
-
-  const char *RenameFn = nullptr;
   if (Function *F = I.getCalledFunction()) {
     if (F->isDeclaration()) {
       // Is this an LLVM intrinsic or a target-specific intrinsic?
@@ -7008,9 +7461,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
           IID = II->getIntrinsicID(F);
 
       if (IID) {
-        RenameFn = visitIntrinsicCall(I, IID);
-        if (!RenameFn)
-          return;
+        visitIntrinsicCall(I, IID);
+        return;
       }
     }
 
@@ -7159,20 +7611,14 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
     }
   }
 
-  SDValue Callee;
-  if (!RenameFn)
-    Callee = getValue(I.getCalledValue());
-  else
-    Callee = DAG.getExternalSymbol(
-        RenameFn,
-        DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
-
   // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
   // have to do anything here to lower funclet bundles.
   assert(!I.hasOperandBundlesOtherThan(
              {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
          "Cannot lower calls with arbitrary operand bundles!");
 
+  SDValue Callee = getValue(I.getCalledValue());
+
   if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
     LowerCallSiteWithDeoptBundle(&I, Callee, nullptr);
   else
@@ -7328,8 +7774,9 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
   MachineFunction &MF = DAG.getMachineFunction();
   int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
-  Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot,
-                       MachinePointerInfo::getFixedStack(MF, SSFI));
+  Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot,
+                            MachinePointerInfo::getFixedStack(MF, SSFI),
+                            TLI.getMemValueType(DL, Ty));
   OpInfo.CallOperand = StackSlot;
 
   return Chain;
@@ -7353,6 +7800,10 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
   SmallVector<unsigned, 4> Regs;
   const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
 
+  // No work to do for memory operations.
+  if (OpInfo.ConstraintType == TargetLowering::C_Memory)
+    return;
+
   // If this is a constraint for a single physreg, or a constraint for a
   // register class, find it.
   unsigned AssignedReg;
@@ -7435,7 +7886,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
 
   for (; NumRegs; --NumRegs, ++I) {
     assert(I != RC->end() && "Ran out of registers to allocate!");
-    auto R = (AssignedReg) ? *I : RegInfo.createVirtualRegister(RC);
+    Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RC);
     Regs.push_back(R);
   }
 
@@ -7509,9 +7960,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
       DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS);
 
-  bool hasMemory = false;
-
-  // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
+  // First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack,
+  // AsmDialect, MayLoad, MayStore).
+  bool HasSideEffect = IA->hasSideEffects();
   ExtraFlags ExtraInfo(CS);
 
   unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
@@ -7527,7 +7978,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
       // Process the call argument. BasicBlocks are labels, currently appearing
       // only in asm's.
-      if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+      const Instruction *I = CS.getInstruction();
+      if (isa<CallBrInst>(I) &&
+          (ArgNo - 1) >= (cast<CallBrInst>(I)->getNumArgOperands() -
+                          cast<CallBrInst>(I)->getNumIndirectDests())) {
+        const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal);
+        EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true);
+        OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT);
+      } else if (const auto *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
         OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
       } else {
         OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
@@ -7554,8 +8012,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       OpInfo.ConstraintVT = MVT::Other;
     }
 
-    if (!hasMemory)
-      hasMemory = OpInfo.hasMemory(TLI);
+    if (!HasSideEffect)
+      HasSideEffect = OpInfo.hasMemory(TLI);
 
     // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
     // FIXME: Could we compute this on OpInfo rather than T?
@@ -7566,17 +8024,20 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     ExtraInfo.update(T);
   }
 
-  SDValue Chain, Flag;
 
   // We won't need to flush pending loads if this asm doesn't touch
   // memory and is nonvolatile.
-  if (hasMemory || IA->hasSideEffects())
-    Chain = getRoot();
-  else
-    Chain = DAG.getRoot();
+  SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();
+
+  bool IsCallBr = isa<CallBrInst>(CS.getInstruction());
+  if (IsCallBr) {
+    // If this is a callbr we need to flush pending exports since inlineasm_br
+    // is a terminator. We need to do this before nodes are glued to
+    // the inlineasm_br node.
+    Chain = getControlRoot();
+  }
 
-  // Second pass over the constraints: compute which constraint option to use
-  // and assign registers to constraints that want a specific physreg.
+  // Second pass over the constraints: compute which constraint option to use.
   for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
     // If this is an output operand with a matching input operand, look up the
     // matching input. If their types mismatch, e.g. one is an integer, the
@@ -7612,28 +8073,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       OpInfo.isIndirect = true;
     }
 
-    // If this constraint is for a specific register, allocate it before
-    // anything else.
-    SDISelAsmOperandInfo &RefOpInfo =
-        OpInfo.isMatchingInputConstraint()
-            ? ConstraintOperands[OpInfo.getMatchedOperand()]
-            : OpInfo;
-    if (RefOpInfo.ConstraintType == TargetLowering::C_Register)
-      GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
-  }
-
-  // Third pass - Loop over all of the operands, assigning virtual or physregs
-  // to register class operands.
-  for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
-    SDISelAsmOperandInfo &RefOpInfo =
-        OpInfo.isMatchingInputConstraint()
-            ? ConstraintOperands[OpInfo.getMatchedOperand()]
-            : OpInfo;
-
-    // C_Register operands have already been allocated, Other/Memory don't need
-    // to be.
-    if (RefOpInfo.ConstraintType == TargetLowering::C_RegisterClass)
-      GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
   }
 
   // AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -7653,21 +8092,21 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   AsmNodeOperands.push_back(DAG.getTargetConstant(
       ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
 
-  // Loop over all of the inputs, copying the operand values into the
-  // appropriate registers and processing the output regs.
-  RegsForValue RetValRegs;
-
-  // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
-  std::vector<std::pair<RegsForValue, Value *>> IndirectStoresToEmit;
-
+  // Third pass: Loop over operands to prepare DAG-level operands.. As part of
+  // this, assign virtual and physical registers for inputs and otput.
   for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
+    // Assign Registers.
+    SDISelAsmOperandInfo &RefOpInfo =
+        OpInfo.isMatchingInputConstraint()
+            ? ConstraintOperands[OpInfo.getMatchedOperand()]
+            : OpInfo;
+    GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
+
     switch (OpInfo.Type) {
     case InlineAsm::isOutput:
-      if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
-          OpInfo.ConstraintType != TargetLowering::C_Register) {
-        // Memory output, or 'other' output (e.g. 'X' constraint).
-        assert(OpInfo.isIndirect && "Memory output must be indirect operand");
-
+      if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+          (OpInfo.ConstraintType == TargetLowering::C_Other &&
+           OpInfo.isIndirect)) {
         unsigned ConstraintID =
             TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
         assert(ConstraintID != InlineAsm::Constraint_Unknown &&
@@ -7680,38 +8119,27 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
                                                         MVT::i32));
         AsmNodeOperands.push_back(OpInfo.CallOperand);
         break;
-      }
-
-      // Otherwise, this is a register or register class output.
-
-      // Copy the output from the appropriate register.  Find a register that
-      // we can use.
-      if (OpInfo.AssignedRegs.Regs.empty()) {
-        emitInlineAsmError(
-            CS, "couldn't allocate output register for constraint '" +
-                    Twine(OpInfo.ConstraintCode) + "'");
-        return;
-      }
+      } else if ((OpInfo.ConstraintType == TargetLowering::C_Other &&
+                  !OpInfo.isIndirect) ||
+                 OpInfo.ConstraintType == TargetLowering::C_Register ||
+                 OpInfo.ConstraintType == TargetLowering::C_RegisterClass) {
+        // Otherwise, this outputs to a register (directly for C_Register /
+        // C_RegisterClass, and a target-defined fashion for C_Other). Find a
+        // register that we can use.
+        if (OpInfo.AssignedRegs.Regs.empty()) {
+          emitInlineAsmError(
+              CS, "couldn't allocate output register for constraint '" +
+                      Twine(OpInfo.ConstraintCode) + "'");
+          return;
+        }
 
-      // If this is an indirect operand, store through the pointer after the
-      // asm.
-      if (OpInfo.isIndirect) {
-        IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
-                                                      OpInfo.CallOperandVal));
-      } else {
-        // This is the result value of the call.
-        assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
-        // Concatenate this output onto the outputs list.
-        RetValRegs.append(OpInfo.AssignedRegs);
+        // Add information to the INLINEASM node to know that this register is
+        // set.
+        OpInfo.AssignedRegs.AddInlineAsmOperands(
+            OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber
+                                  : InlineAsm::Kind_RegDef,
+            false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
       }
-
-      // Add information to the INLINEASM node to know that this register is
-      // set.
-      OpInfo.AssignedRegs
-          .AddInlineAsmOperands(OpInfo.isEarlyClobber
-                                    ? InlineAsm::Kind_RegDefEarlyClobber
-                                    : InlineAsm::Kind_RegDef,
-                                false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
       break;
 
     case InlineAsm::isInput: {
@@ -7865,98 +8293,117 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
   if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
 
-  Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(),
+  unsigned ISDOpc = IsCallBr ? ISD::INLINEASM_BR : ISD::INLINEASM;
+  Chain = DAG.getNode(ISDOpc, getCurSDLoc(),
                       DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
   Flag = Chain.getValue(1);
 
-  // If this asm returns a register value, copy the result from that register
-  // and set it as the value of the call.
-  if (!RetValRegs.Regs.empty()) {
-    SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
-                                             Chain, &Flag, CS.getInstruction());
-
-    llvm::Type *CSResultType = CS.getType();
-    unsigned numRet;
-    ArrayRef<Type *> ResultTypes;
-    SmallVector<SDValue, 1> ResultValues(1);
-    if (StructType *StructResult = dyn_cast<StructType>(CSResultType)) {
-      numRet = StructResult->getNumElements();
-      assert(Val->getNumOperands() == numRet &&
-             "Mismatch in number of output operands in asm result");
-      ResultTypes = StructResult->elements();
-      ArrayRef<SDUse> ValueUses = Val->ops();
-      ResultValues.resize(numRet);
-      std::transform(ValueUses.begin(), ValueUses.end(), ResultValues.begin(),
-                     [](const SDUse &u) -> SDValue { return u.get(); });
-    } else {
-      numRet = 1;
-      ResultValues[0] = Val;
-      ResultTypes = makeArrayRef(CSResultType);
-    }
-    SmallVector<EVT, 1> ResultVTs(numRet);
-    for (unsigned i = 0; i < numRet; i++) {
-      EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), ResultTypes[i]);
-      SDValue Val = ResultValues[i];
-      assert(ResultTypes[i]->isSized() && "Unexpected unsized type");
-      // If the type of the inline asm call site return value is different but
-      // has same size as the type of the asm output bitcast it.  One example
-      // of this is for vectors with different width / number of elements.
-      // This can happen for register classes that can contain multiple
-      // different value types.  The preg or vreg allocated may not have the
-      // same VT as was expected.
-      //
-      // This can also happen for a return value that disagrees with the
-      // register class it is put in, eg. a double in a general-purpose
-      // register on a 32-bit machine.
-      if (ResultVT != Val.getValueType() &&
-          ResultVT.getSizeInBits() == Val.getValueSizeInBits())
-        Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, Val);
-      else if (ResultVT != Val.getValueType() && ResultVT.isInteger() &&
-               Val.getValueType().isInteger()) {
-        // If a result value was tied to an input value, the computed result
-        // may have a wider width than the expected result.  Extract the
-        // relevant portion.
-        Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, Val);
-      }
+  // Do additional work to generate outputs.
 
-      assert(ResultVT == Val.getValueType() && "Asm result value mismatch!");
-      ResultVTs[i] = ResultVT;
-      ResultValues[i] = Val;
-    }
+  SmallVector<EVT, 1> ResultVTs;
+  SmallVector<SDValue, 1> ResultValues;
+  SmallVector<SDValue, 8> OutChains;
 
-    Val = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
-                      DAG.getVTList(ResultVTs), ResultValues);
-    setValue(CS.getInstruction(), Val);
-    // Don't need to use this as a chain in this case.
-    if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
-      return;
-  }
+  llvm::Type *CSResultType = CS.getType();
+  ArrayRef<Type *> ResultTypes;
+  if (StructType *StructResult = dyn_cast<StructType>(CSResultType))
+    ResultTypes = StructResult->elements();
+  else if (!CSResultType->isVoidTy())
+    ResultTypes = makeArrayRef(CSResultType);
+
+  auto CurResultType = ResultTypes.begin();
+  auto handleRegAssign = [&](SDValue V) {
+    assert(CurResultType != ResultTypes.end() && "Unexpected value");
+    assert((*CurResultType)->isSized() && "Unexpected unsized type");
+    EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), *CurResultType);
+    ++CurResultType;
+    // If the type of the inline asm call site return value is different but has
+    // same size as the type of the asm output bitcast it.  One example of this
+    // is for vectors with different width / number of elements.  This can
+    // happen for register classes that can contain multiple different value
+    // types.  The preg or vreg allocated may not have the same VT as was
+    // expected.
+    //
+    // This can also happen for a return value that disagrees with the register
+    // class it is put in, eg. a double in a general-purpose register on a
+    // 32-bit machine.
+    if (ResultVT != V.getValueType() &&
+        ResultVT.getSizeInBits() == V.getValueSizeInBits())
+      V = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, V);
+    else if (ResultVT != V.getValueType() && ResultVT.isInteger() &&
+             V.getValueType().isInteger()) {
+      // If a result value was tied to an input value, the computed result
+      // may have a wider width than the expected result.  Extract the
+      // relevant portion.
+      V = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, V);
+    }
+    assert(ResultVT == V.getValueType() && "Asm result value mismatch!");
+    ResultVTs.push_back(ResultVT);
+    ResultValues.push_back(V);
+  };
 
-  std::vector<std::pair<SDValue, const Value *>> StoresToEmit;
+  // Deal with output operands.
+  for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
+    if (OpInfo.Type == InlineAsm::isOutput) {
+      SDValue Val;
+      // Skip trivial output operands.
+      if (OpInfo.AssignedRegs.Regs.empty())
+        continue;
+
+      switch (OpInfo.ConstraintType) {
+      case TargetLowering::C_Register:
+      case TargetLowering::C_RegisterClass:
+        Val = OpInfo.AssignedRegs.getCopyFromRegs(
+            DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction());
+        break;
+      case TargetLowering::C_Other:
+        Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
+                                              OpInfo, DAG);
+        break;
+      case TargetLowering::C_Memory:
+        break; // Already handled.
+      case TargetLowering::C_Unknown:
+        assert(false && "Unexpected unknown constraint");
+      }
 
-  // Process indirect outputs, first output all of the flagged copies out of
-  // physregs.
-  for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
-    RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
-    const Value *Ptr = IndirectStoresToEmit[i].second;
-    SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
-                                             Chain, &Flag, IA);
-    StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
+      // Indirect output manifest as stores. Record output chains.
+      if (OpInfo.isIndirect) {
+        const Value *Ptr = OpInfo.CallOperandVal;
+        assert(Ptr && "Expected value CallOperandVal for indirect asm operand");
+        SDValue Store = DAG.getStore(Chain, getCurSDLoc(), Val, getValue(Ptr),
+                                     MachinePointerInfo(Ptr));
+        OutChains.push_back(Store);
+      } else {
+        // generate CopyFromRegs to associated registers.
+        assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+        if (Val.getOpcode() == ISD::MERGE_VALUES) {
+          for (const SDValue &V : Val->op_values())
+            handleRegAssign(V);
+        } else
+          handleRegAssign(Val);
+      }
+    }
   }
 
-  // Emit the non-flagged stores from the physregs.
-  SmallVector<SDValue, 8> OutChains;
-  for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) {
-    SDValue Val = DAG.getStore(Chain, getCurSDLoc(), StoresToEmit[i].first,
-                               getValue(StoresToEmit[i].second),
-                               MachinePointerInfo(StoresToEmit[i].second));
-    OutChains.push_back(Val);
+  // Set results.
+  if (!ResultValues.empty()) {
+    assert(CurResultType == ResultTypes.end() &&
+           "Mismatch in number of ResultTypes");
+    assert(ResultValues.size() == ResultTypes.size() &&
+           "Mismatch in number of output operands in asm result");
+
+    SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+                            DAG.getVTList(ResultVTs), ResultValues);
+    setValue(CS.getInstruction(), V);
   }
 
+  // Collect store chains.
   if (!OutChains.empty())
     Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains);
 
-  DAG.setRoot(Chain);
+  // Only Update Root if inline assembly has a memory effect.
+  if (ResultValues.empty() || HasSideEffect || !OutChains.empty() || IsCallBr)
+    DAG.setRoot(Chain);
 }
 
 void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS,
@@ -7989,12 +8436,16 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
 void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   const DataLayout &DL = DAG.getDataLayout();
-  SDValue V = DAG.getVAArg(TLI.getValueType(DAG.getDataLayout(), I.getType()),
-                           getCurSDLoc(), getRoot(), getValue(I.getOperand(0)),
-                           DAG.getSrcValue(I.getOperand(0)),
-                           DL.getABITypeAlignment(I.getType()));
-  setValue(&I, V);
+  SDValue V = DAG.getVAArg(
+      TLI.getMemValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(),
+      getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)),
+      DL.getABITypeAlignment(I.getType()));
   DAG.setRoot(V.getValue(1));
+
+  if (I.getType()->isPointerTy())
+    V = DAG.getPtrExtOrTrunc(
+        V, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()));
+  setValue(&I, V);
 }
 
 void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
@@ -8021,7 +8472,7 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
     return Op;
 
   ConstantRange CR = getConstantRangeFromMetadata(*Range);
-  if (CR.isFullSet() || CR.isEmptySet() || CR.isWrappedSet())
+  if (CR.isFullSet() || CR.isEmptySet() || CR.isUpperWrapped())
     return Op;
 
   APInt Lo = CR.getUnsignedMin();
@@ -8058,7 +8509,7 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
 /// convention or require stack pointer adjustment. Only a subset of the
 /// intrinsic's operands need to participate in the calling convention.
 void SelectionDAGBuilder::populateCallLoweringInfo(
-    TargetLowering::CallLoweringInfo &CLI, ImmutableCallSite CS,
+    TargetLowering::CallLoweringInfo &CLI, const CallBase *Call,
     unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy,
     bool IsPatchPoint) {
   TargetLowering::ArgListTy Args;
@@ -8068,21 +8519,21 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
   // Attributes for args start at offset 1, after the return attribute.
   for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs;
        ArgI != ArgE; ++ArgI) {
-    const Value *V = CS->getOperand(ArgI);
+    const Value *V = Call->getOperand(ArgI);
 
     assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
 
     TargetLowering::ArgListEntry Entry;
     Entry.Node = getValue(V);
     Entry.Ty = V->getType();
-    Entry.setAttributes(&CS, ArgI);
+    Entry.setAttributes(Call, ArgI);
     Args.push_back(Entry);
   }
 
   CLI.setDebugLoc(getCurSDLoc())
       .setChain(getRoot())
-      .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args))
-      .setDiscardResult(CS->use_empty())
+      .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args))
+      .setDiscardResult(Call->use_empty())
       .setIsPatchPoint(IsPatchPoint);
 }
 
@@ -8093,7 +8544,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
 /// avoid constant materialization and register allocation.
 ///
 /// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
-/// generate addess computation nodes, and so ExpandISelPseudo can convert the
+/// generate addess computation nodes, and so FinalizeISel can convert the
 /// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
 /// address materialization and register allocation, but may also be required
 /// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
@@ -8226,8 +8677,8 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
     IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
 
   TargetLowering::CallLoweringInfo CLI(DAG);
-  populateCallLoweringInfo(CLI, CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy,
-                           true);
+  populateCallLoweringInfo(CLI, cast<CallBase>(CS.getInstruction()),
+                           NumMetaOpers, NumCallArgs, Callee, ReturnTy, true);
   std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
 
   SDNode *CallEnd = Result.second.getNode();
@@ -8351,15 +8802,17 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
     FMF = I.getFastMathFlags();
 
   switch (Intrinsic) {
-  case Intrinsic::experimental_vector_reduce_fadd:
-    if (FMF.isFast())
-      Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2);
+  case Intrinsic::experimental_vector_reduce_v2_fadd:
+    if (FMF.allowReassoc())
+      Res = DAG.getNode(ISD::FADD, dl, VT, Op1,
+                        DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2));
     else
       Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2);
     break;
-  case Intrinsic::experimental_vector_reduce_fmul:
-    if (FMF.isFast())
-      Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2);
+  case Intrinsic::experimental_vector_reduce_v2_fmul:
+    if (FMF.allowReassoc())
+      Res = DAG.getNode(ISD::FMUL, dl, VT, Op1,
+                        DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2));
     else
       Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2);
     break;
@@ -8433,8 +8886,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
 
   if (CLI.IsPostTypeLegalization) {
     // If we are lowering a libcall after legalization, split the return type.
-    SmallVector<EVT, 4> OldRetTys = std::move(RetTys);
-    SmallVector<uint64_t, 4> OldOffsets = std::move(Offsets);
+    SmallVector<EVT, 4> OldRetTys;
+    SmallVector<uint64_t, 4> OldOffsets;
+    RetTys.swap(OldRetTys);
+    Offsets.swap(OldOffsets);
+
     for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) {
       EVT RetVT = OldRetTys[i];
       uint64_t Offset = OldOffsets[i];
@@ -8489,7 +8945,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
     // points into the callers stack frame.
     CLI.IsTailCall = false;
   } else {
+    bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
+        CLI.RetTy, CLI.CallConv, CLI.IsVarArg);
     for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+      ISD::ArgFlagsTy Flags;
+      if (NeedsRegBlock) {
+        Flags.setInConsecutiveRegs();
+        if (I == RetTys.size() - 1)
+          Flags.setInConsecutiveRegsLast();
+      }
       EVT VT = RetTys[I];
       MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
                                                      CLI.CallConv, VT);
@@ -8497,9 +8961,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
                                                        CLI.CallConv, VT);
       for (unsigned i = 0; i != NumRegs; ++i) {
         ISD::InputArg MyFlags;
+        MyFlags.Flags = Flags;
         MyFlags.VT = RegisterVT;
         MyFlags.ArgVT = VT;
         MyFlags.Used = CLI.IsReturnValueUsed;
+        if (CLI.RetTy->isPointerTy()) {
+          MyFlags.Flags.setPointer();
+          MyFlags.Flags.setPointerAddrSpace(
+              cast<PointerType>(CLI.RetTy)->getAddressSpace());
+        }
         if (CLI.RetSExt)
           MyFlags.Flags.setSExt();
         if (CLI.RetZExt)
@@ -8550,6 +9020,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
       // specify the alignment it wants.
       unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL);
 
+      if (Args[i].Ty->isPointerTy()) {
+        Flags.setPointer();
+        Flags.setPointerAddrSpace(
+            cast<PointerType>(Args[i].Ty)->getAddressSpace());
+      }
       if (Args[i].IsZExt)
         Flags.setZExt();
       if (Args[i].IsSExt)
@@ -8587,8 +9062,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
       if (Args[i].IsByVal || Args[i].IsInAlloca) {
         PointerType *Ty = cast<PointerType>(Args[i].Ty);
         Type *ElementTy = Ty->getElementType();
-        Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
-        // For ByVal, alignment should come from FE.  BE will guess if this
+
+        unsigned FrameSize = DL.getTypeAllocSize(
+            Args[i].ByValType ? Args[i].ByValType : ElementTy);
+        Flags.setByValSize(FrameSize);
+
         // info is not there but there are cases it cannot get right.
         unsigned FrameAlign;
         if (Args[i].Alignment)
@@ -8619,8 +9097,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
       // for now.
       if (Args[i].IsReturned && !Op.getValueType().isVector() &&
           CanLowerReturn) {
-        assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues &&
-               "unexpected use of 'returned'");
+        assert((CLI.RetTy == Args[i].Ty ||
+                (CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() &&
+                 CLI.RetTy->getPointerAddressSpace() ==
+                     Args[i].Ty->getPointerAddressSpace())) &&
+               RetTys.size() == NumValues && "unexpected use of 'returned'");
         // Before passing 'returned' to the target lowering code, ensure that
         // either the register MVT and the actual EVT are the same size or that
         // the return value and argument are extended in the same way; in these
@@ -9023,7 +9504,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
     unsigned PartBase = 0;
     Type *FinalType = Arg.getType();
     if (Arg.hasAttribute(Attribute::ByVal))
-      FinalType = cast<PointerType>(FinalType)->getElementType();
+      FinalType = Arg.getParamByValType();
     bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
         FinalType, F.getCallingConv(), F.isVarArg());
     for (unsigned Value = 0, NumValues = ValueVTs.size();
@@ -9038,6 +9519,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
       unsigned OriginalAlignment =
           TLI->getABIAlignmentForCallingConv(ArgTy, DL);
 
+      if (Arg.getType()->isPointerTy()) {
+        Flags.setPointer();
+        Flags.setPointerAddrSpace(
+            cast<PointerType>(Arg.getType())->getAddressSpace());
+      }
       if (Arg.hasAttribute(Attribute::ZExt))
         Flags.setZExt();
       if (Arg.hasAttribute(Attribute::SExt))
@@ -9078,11 +9564,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
           Flags.setByVal();
       }
       if (Flags.isByVal() || Flags.isInAlloca()) {
-        PointerType *Ty = cast<PointerType>(Arg.getType());
-        Type *ElementTy = Ty->getElementType();
-        Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
-        // For ByVal, alignment should be passed from FE.  BE will guess if
-        // this info is not there but there are cases it cannot get right.
+        Type *ElementTy = Arg.getParamByValType();
+
+        // For ByVal, size and alignment should be passed from FE.  BE will
+        // guess if this info is not there but there are cases it cannot get
+        // right.
+        unsigned FrameSize = DL.getTypeAllocSize(Arg.getParamByValType());
+        Flags.setByValSize(FrameSize);
+
         unsigned FrameAlign;
         if (Arg.getParamAlignment())
           FrameAlign = Arg.getParamAlignment();
@@ -9263,17 +9752,16 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
     if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
       unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
       if (TargetRegisterInfo::isVirtualRegister(Reg))
-        FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB,
-                                           FuncInfo->SwiftErrorArg, Reg);
+        SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(),
+                                   Reg);
     }
 
     // If this argument is live outside of the entry block, insert a copy from
     // wherever we got it to the vreg that other BB's will reference it as.
-    if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
+    if (Res.getOpcode() == ISD::CopyFromReg) {
       // If we can, though, try to skip creating an unnecessary vreg.
       // FIXME: This isn't very clean... it would be nice to make this more
-      // general.  It's also subtly incompatible with the hacks FastISel
-      // uses with vregs.
+      // general.
       unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
         FuncInfo->ValueMap[&Arg] = Reg;
@@ -9354,7 +9842,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
       if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
         unsigned &RegOut = ConstantsOut[C];
         if (RegOut == 0) {
-          RegOut = FuncInfo.CreateRegs(C->getType());
+          RegOut = FuncInfo.CreateRegs(C);
           CopyValueToVirtualRegister(C, RegOut);
         }
         Reg = RegOut;
@@ -9367,7 +9855,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
           assert(isa<AllocaInst>(PHIOp) &&
                  FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
                  "Didn't codegen value into a register!??");
-          Reg = FuncInfo.CreateRegs(PHIOp->getType());
+          Reg = FuncInfo.CreateRegs(PHIOp);
           CopyValueToVirtualRegister(PHIOp, Reg);
         }
       }
@@ -9432,450 +9920,6 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
     HasTailCall = true;
 }
 
-uint64_t
-SelectionDAGBuilder::getJumpTableRange(const CaseClusterVector &Clusters,
-                                       unsigned First, unsigned Last) const {
-  assert(Last >= First);
-  const APInt &LowCase = Clusters[First].Low->getValue();
-  const APInt &HighCase = Clusters[Last].High->getValue();
-  assert(LowCase.getBitWidth() == HighCase.getBitWidth());
-
-  // FIXME: A range of consecutive cases has 100% density, but only requires one
-  // comparison to lower. We should discriminate against such consecutive ranges
-  // in jump tables.
-
-  return (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100) + 1;
-}
-
-uint64_t SelectionDAGBuilder::getJumpTableNumCases(
-    const SmallVectorImpl<unsigned> &TotalCases, unsigned First,
-    unsigned Last) const {
-  assert(Last >= First);
-  assert(TotalCases[Last] >= TotalCases[First]);
-  uint64_t NumCases =
-      TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]);
-  return NumCases;
-}
-
-bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters,
-                                         unsigned First, unsigned Last,
-                                         const SwitchInst *SI,
-                                         MachineBasicBlock *DefaultMBB,
-                                         CaseCluster &JTCluster) {
-  assert(First <= Last);
-
-  auto Prob = BranchProbability::getZero();
-  unsigned NumCmps = 0;
-  std::vector<MachineBasicBlock*> Table;
-  DenseMap<MachineBasicBlock*, BranchProbability> JTProbs;
-
-  // Initialize probabilities in JTProbs.
-  for (unsigned I = First; I <= Last; ++I)
-    JTProbs[Clusters[I].MBB] = BranchProbability::getZero();
-
-  for (unsigned I = First; I <= Last; ++I) {
-    assert(Clusters[I].Kind == CC_Range);
-    Prob += Clusters[I].Prob;
-    const APInt &Low = Clusters[I].Low->getValue();
-    const APInt &High = Clusters[I].High->getValue();
-    NumCmps += (Low == High) ? 1 : 2;
-    if (I != First) {
-      // Fill the gap between this and the previous cluster.
-      const APInt &PreviousHigh = Clusters[I - 1].High->getValue();
-      assert(PreviousHigh.slt(Low));
-      uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1;
-      for (uint64_t J = 0; J < Gap; J++)
-        Table.push_back(DefaultMBB);
-    }
-    uint64_t ClusterSize = (High - Low).getLimitedValue() + 1;
-    for (uint64_t J = 0; J < ClusterSize; ++J)
-      Table.push_back(Clusters[I].MBB);
-    JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
-  }
-
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  unsigned NumDests = JTProbs.size();
-  if (TLI.isSuitableForBitTests(
-          NumDests, NumCmps, Clusters[First].Low->getValue(),
-          Clusters[Last].High->getValue(), DAG.getDataLayout())) {
-    // Clusters[First..Last] should be lowered as bit tests instead.
-    return false;
-  }
-
-  // Create the MBB that will load from and jump through the table.
-  // Note: We create it here, but it's not inserted into the function yet.
-  MachineFunction *CurMF = FuncInfo.MF;
-  MachineBasicBlock *JumpTableMBB =
-      CurMF->CreateMachineBasicBlock(SI->getParent());
-
-  // Add successors. Note: use table order for determinism.
-  SmallPtrSet<MachineBasicBlock *, 8> Done;
-  for (MachineBasicBlock *Succ : Table) {
-    if (Done.count(Succ))
-      continue;
-    addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]);
-    Done.insert(Succ);
-  }
-  JumpTableMBB->normalizeSuccProbs();
-
-  unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding())
-                     ->createJumpTableIndex(Table);
-
-  // Set up the jump table info.
-  JumpTable JT(-1U, JTI, JumpTableMBB, nullptr);
-  JumpTableHeader JTH(Clusters[First].Low->getValue(),
-                      Clusters[Last].High->getValue(), SI->getCondition(),
-                      nullptr, false);
-  JTCases.emplace_back(std::move(JTH), std::move(JT));
-
-  JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High,
-                                     JTCases.size() - 1, Prob);
-  return true;
-}
-
-void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
-                                         const SwitchInst *SI,
-                                         MachineBasicBlock *DefaultMBB) {
-#ifndef NDEBUG
-  // Clusters must be non-empty, sorted, and only contain Range clusters.
-  assert(!Clusters.empty());
-  for (CaseCluster &C : Clusters)
-    assert(C.Kind == CC_Range);
-  for (unsigned i = 1, e = Clusters.size(); i < e; ++i)
-    assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue()));
-#endif
-
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  if (!TLI.areJTsAllowed(SI->getParent()->getParent()))
-    return;
-
-  const int64_t N = Clusters.size();
-  const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries();
-  const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2;
-
-  if (N < 2 || N < MinJumpTableEntries)
-    return;
-
-  // TotalCases[i]: Total nbr of cases in Clusters[0..i].
-  SmallVector<unsigned, 8> TotalCases(N);
-  for (unsigned i = 0; i < N; ++i) {
-    const APInt &Hi = Clusters[i].High->getValue();
-    const APInt &Lo = Clusters[i].Low->getValue();
-    TotalCases[i] = (Hi - Lo).getLimitedValue() + 1;
-    if (i != 0)
-      TotalCases[i] += TotalCases[i - 1];
-  }
-
-  // Cheap case: the whole range may be suitable for jump table.
-  uint64_t Range = getJumpTableRange(Clusters,0, N - 1);
-  uint64_t NumCases = getJumpTableNumCases(TotalCases, 0, N - 1);
-  assert(NumCases < UINT64_MAX / 100);
-  assert(Range >= NumCases);
-  if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) {
-    CaseCluster JTCluster;
-    if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
-      Clusters[0] = JTCluster;
-      Clusters.resize(1);
-      return;
-    }
-  }
-
-  // The algorithm below is not suitable for -O0.
-  if (TM.getOptLevel() == CodeGenOpt::None)
-    return;
-
-  // Split Clusters into minimum number of dense partitions. The algorithm uses
-  // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code
-  // for the Case Statement'" (1994), but builds the MinPartitions array in
-  // reverse order to make it easier to reconstruct the partitions in ascending
-  // order. In the choice between two optimal partitionings, it picks the one
-  // which yields more jump tables.
-
-  // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
-  SmallVector<unsigned, 8> MinPartitions(N);
-  // LastElement[i] is the last element of the partition starting at i.
-  SmallVector<unsigned, 8> LastElement(N);
-  // PartitionsScore[i] is used to break ties when choosing between two
-  // partitionings resulting in the same number of partitions.
-  SmallVector<unsigned, 8> PartitionsScore(N);
-  // For PartitionsScore, a small number of comparisons is considered as good as
-  // a jump table and a single comparison is considered better than a jump
-  // table.
-  enum PartitionScores : unsigned {
-    NoTable = 0,
-    Table = 1,
-    FewCases = 1,
-    SingleCase = 2
-  };
-
-  // Base case: There is only one way to partition Clusters[N-1].
-  MinPartitions[N - 1] = 1;
-  LastElement[N - 1] = N - 1;
-  PartitionsScore[N - 1] = PartitionScores::SingleCase;
-
-  // Note: loop indexes are signed to avoid underflow.
-  for (int64_t i = N - 2; i >= 0; i--) {
-    // Find optimal partitioning of Clusters[i..N-1].
-    // Baseline: Put Clusters[i] into a partition on its own.
-    MinPartitions[i] = MinPartitions[i + 1] + 1;
-    LastElement[i] = i;
-    PartitionsScore[i] = PartitionsScore[i + 1] + PartitionScores::SingleCase;
-
-    // Search for a solution that results in fewer partitions.
-    for (int64_t j = N - 1; j > i; j--) {
-      // Try building a partition from Clusters[i..j].
-      uint64_t Range = getJumpTableRange(Clusters, i, j);
-      uint64_t NumCases = getJumpTableNumCases(TotalCases, i, j);
-      assert(NumCases < UINT64_MAX / 100);
-      assert(Range >= NumCases);
-      if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) {
-        unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
-        unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1];
-        int64_t NumEntries = j - i + 1;
-
-        if (NumEntries == 1)
-          Score += PartitionScores::SingleCase;
-        else if (NumEntries <= SmallNumberOfEntries)
-          Score += PartitionScores::FewCases;
-        else if (NumEntries >= MinJumpTableEntries)
-          Score += PartitionScores::Table;
-
-        // If this leads to fewer partitions, or to the same number of
-        // partitions with better score, it is a better partitioning.
-        if (NumPartitions < MinPartitions[i] ||
-            (NumPartitions == MinPartitions[i] && Score > PartitionsScore[i])) {
-          MinPartitions[i] = NumPartitions;
-          LastElement[i] = j;
-          PartitionsScore[i] = Score;
-        }
-      }
-    }
-  }
-
-  // Iterate over the partitions, replacing some with jump tables in-place.
-  unsigned DstIndex = 0;
-  for (unsigned First = 0, Last; First < N; First = Last + 1) {
-    Last = LastElement[First];
-    assert(Last >= First);
-    assert(DstIndex <= First);
-    unsigned NumClusters = Last - First + 1;
-
-    CaseCluster JTCluster;
-    if (NumClusters >= MinJumpTableEntries &&
-        buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
-      Clusters[DstIndex++] = JTCluster;
-    } else {
-      for (unsigned I = First; I <= Last; ++I)
-        std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I]));
-    }
-  }
-  Clusters.resize(DstIndex);
-}
-
-bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
-                                        unsigned First, unsigned Last,
-                                        const SwitchInst *SI,
-                                        CaseCluster &BTCluster) {
-  assert(First <= Last);
-  if (First == Last)
-    return false;
-
-  BitVector Dests(FuncInfo.MF->getNumBlockIDs());
-  unsigned NumCmps = 0;
-  for (int64_t I = First; I <= Last; ++I) {
-    assert(Clusters[I].Kind == CC_Range);
-    Dests.set(Clusters[I].MBB->getNumber());
-    NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
-  }
-  unsigned NumDests = Dests.count();
-
-  APInt Low = Clusters[First].Low->getValue();
-  APInt High = Clusters[Last].High->getValue();
-  assert(Low.slt(High));
-
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  const DataLayout &DL = DAG.getDataLayout();
-  if (!TLI.isSuitableForBitTests(NumDests, NumCmps, Low, High, DL))
-    return false;
-
-  APInt LowBound;
-  APInt CmpRange;
-
-  const int BitWidth = TLI.getPointerTy(DL).getSizeInBits();
-  assert(TLI.rangeFitsInWord(Low, High, DL) &&
-         "Case range must fit in bit mask!");
-
-  // Check if the clusters cover a contiguous range such that no value in the
-  // range will jump to the default statement.
-  bool ContiguousRange = true;
-  for (int64_t I = First + 1; I <= Last; ++I) {
-    if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) {
-      ContiguousRange = false;
-      break;
-    }
-  }
-
-  if (Low.isStrictlyPositive() && High.slt(BitWidth)) {
-    // Optimize the case where all the case values fit in a word without having
-    // to subtract minValue. In this case, we can optimize away the subtraction.
-    LowBound = APInt::getNullValue(Low.getBitWidth());
-    CmpRange = High;
-    ContiguousRange = false;
-  } else {
-    LowBound = Low;
-    CmpRange = High - Low;
-  }
-
-  CaseBitsVector CBV;
-  auto TotalProb = BranchProbability::getZero();
-  for (unsigned i = First; i <= Last; ++i) {
-    // Find the CaseBits for this destination.
-    unsigned j;
-    for (j = 0; j < CBV.size(); ++j)
-      if (CBV[j].BB == Clusters[i].MBB)
-        break;
-    if (j == CBV.size())
-      CBV.push_back(
-          CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero()));
-    CaseBits *CB = &CBV[j];
-
-    // Update Mask, Bits and ExtraProb.
-    uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue();
-    uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue();
-    assert(Hi >= Lo && Hi < 64 && "Invalid bit case!");
-    CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo;
-    CB->Bits += Hi - Lo + 1;
-    CB->ExtraProb += Clusters[i].Prob;
-    TotalProb += Clusters[i].Prob;
-  }
-
-  BitTestInfo BTI;
-  llvm::sort(CBV, [](const CaseBits &a, const CaseBits &b) {
-    // Sort by probability first, number of bits second, bit mask third.
-    if (a.ExtraProb != b.ExtraProb)
-      return a.ExtraProb > b.ExtraProb;
-    if (a.Bits != b.Bits)
-      return a.Bits > b.Bits;
-    return a.Mask < b.Mask;
-  });
-
-  for (auto &CB : CBV) {
-    MachineBasicBlock *BitTestBB =
-        FuncInfo.MF->CreateMachineBasicBlock(SI->getParent());
-    BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb));
-  }
-  BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange),
-                            SI->getCondition(), -1U, MVT::Other, false,
-                            ContiguousRange, nullptr, nullptr, std::move(BTI),
-                            TotalProb);
-
-  BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
-                                    BitTestCases.size() - 1, TotalProb);
-  return true;
-}
-
-void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters,
-                                              const SwitchInst *SI) {
-// Partition Clusters into as few subsets as possible, where each subset has a
-// range that fits in a machine word and has <= 3 unique destinations.
-
-#ifndef NDEBUG
-  // Clusters must be sorted and contain Range or JumpTable clusters.
-  assert(!Clusters.empty());
-  assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable);
-  for (const CaseCluster &C : Clusters)
-    assert(C.Kind == CC_Range || C.Kind == CC_JumpTable);
-  for (unsigned i = 1; i < Clusters.size(); ++i)
-    assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue()));
-#endif
-
-  // The algorithm below is not suitable for -O0.
-  if (TM.getOptLevel() == CodeGenOpt::None)
-    return;
-
-  // If target does not have legal shift left, do not emit bit tests at all.
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  const DataLayout &DL = DAG.getDataLayout();
-
-  EVT PTy = TLI.getPointerTy(DL);
-  if (!TLI.isOperationLegal(ISD::SHL, PTy))
-    return;
-
-  int BitWidth = PTy.getSizeInBits();
-  const int64_t N = Clusters.size();
-
-  // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
-  SmallVector<unsigned, 8> MinPartitions(N);
-  // LastElement[i] is the last element of the partition starting at i.
-  SmallVector<unsigned, 8> LastElement(N);
-
-  // FIXME: This might not be the best algorithm for finding bit test clusters.
-
-  // Base case: There is only one way to partition Clusters[N-1].
-  MinPartitions[N - 1] = 1;
-  LastElement[N - 1] = N - 1;
-
-  // Note: loop indexes are signed to avoid underflow.
-  for (int64_t i = N - 2; i >= 0; --i) {
-    // Find optimal partitioning of Clusters[i..N-1].
-    // Baseline: Put Clusters[i] into a partition on its own.
-    MinPartitions[i] = MinPartitions[i + 1] + 1;
-    LastElement[i] = i;
-
-    // Search for a solution that results in fewer partitions.
-    // Note: the search is limited by BitWidth, reducing time complexity.
-    for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) {
-      // Try building a partition from Clusters[i..j].
-
-      // Check the range.
-      if (!TLI.rangeFitsInWord(Clusters[i].Low->getValue(),
-                               Clusters[j].High->getValue(), DL))
-        continue;
-
-      // Check nbr of destinations and cluster types.
-      // FIXME: This works, but doesn't seem very efficient.
-      bool RangesOnly = true;
-      BitVector Dests(FuncInfo.MF->getNumBlockIDs());
-      for (int64_t k = i; k <= j; k++) {
-        if (Clusters[k].Kind != CC_Range) {
-          RangesOnly = false;
-          break;
-        }
-        Dests.set(Clusters[k].MBB->getNumber());
-      }
-      if (!RangesOnly || Dests.count() > 3)
-        break;
-
-      // Check if it's a better partition.
-      unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
-      if (NumPartitions < MinPartitions[i]) {
-        // Found a better partition.
-        MinPartitions[i] = NumPartitions;
-        LastElement[i] = j;
-      }
-    }
-  }
-
-  // Iterate over the partitions, replacing with bit-test clusters in-place.
-  unsigned DstIndex = 0;
-  for (unsigned First = 0, Last; First < N; First = Last + 1) {
-    Last = LastElement[First];
-    assert(First <= Last);
-    assert(DstIndex <= First);
-
-    CaseCluster BitTestCluster;
-    if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) {
-      Clusters[DstIndex++] = BitTestCluster;
-    } else {
-      size_t NumClusters = Last - First + 1;
-      std::memmove(&Clusters[DstIndex], &Clusters[First],
-                   sizeof(Clusters[0]) * NumClusters);
-      DstIndex += NumClusters;
-    }
-  }
-  Clusters.resize(DstIndex);
-}
-
 void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
                                         MachineBasicBlock *SwitchMBB,
                                         MachineBasicBlock *DefaultMBB) {
@@ -9977,10 +10021,13 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
 
   MachineBasicBlock *CurMBB = W.MBB;
   for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
+    bool FallthroughUnreachable = false;
     MachineBasicBlock *Fallthrough;
     if (I == W.LastCluster) {
       // For the last cluster, fall through to the default destination.
       Fallthrough = DefaultMBB;
+      FallthroughUnreachable = isa<UnreachableInst>(
+          DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
     } else {
       Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
       CurMF->insert(BBI, Fallthrough);
@@ -9992,8 +10039,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
     switch (I->Kind) {
       case CC_JumpTable: {
         // FIXME: Optimize away range check based on pivot comparisons.
-        JumpTableHeader *JTH = &JTCases[I->JTCasesIndex].first;
-        JumpTable *JT = &JTCases[I->JTCasesIndex].second;
+        JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
+        SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;
 
         // The jump block hasn't been inserted yet; insert it here.
         MachineBasicBlock *JumpMBB = JT->MBB;
@@ -10017,7 +10064,13 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
           }
         }
 
-        addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
+        if (FallthroughUnreachable) {
+          // Skip the range check if the fallthrough block is unreachable.
+          JTH->OmitRangeCheck = true;
+        }
+
+        if (!JTH->OmitRangeCheck)
+          addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
         addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
         CurMBB->normalizeSuccProbs();
 
@@ -10034,8 +10087,10 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
         break;
       }
       case CC_BitTests: {
+        // FIXME: If Fallthrough is unreachable, skip the range check.
+
         // FIXME: Optimize away range check based on pivot comparisons.
-        BitTestBlock *BTB = &BitTestCases[I->BTCasesIndex];
+        BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
 
         // The bit test blocks haven't been inserted yet; insert them here.
         for (BitTestCase &BTC : BTB->Cases)
@@ -10078,6 +10133,10 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
           RHS = I->High;
         }
 
+        // If Fallthrough is unreachable, fold away the comparison.
+        if (FallthroughUnreachable)
+          CC = ISD::SETTRUE;
+
         // The false probability is the sum of all unhandled cases.
         CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB,
                      getCurSDLoc(), I->Prob, UnhandledProbs);
@@ -10085,7 +10144,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
         if (CurMBB == SwitchMBB)
           visitSwitchCase(CB, SwitchMBB);
         else
-          SwitchCases.push_back(CB);
+          SL->SwitchCases.push_back(CB);
 
         break;
       }
@@ -10236,7 +10295,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
   if (W.MBB == SwitchMBB)
     visitSwitchCase(CB, SwitchMBB);
   else
-    SwitchCases.push_back(CB);
+    SL->SwitchCases.push_back(CB);
 }
 
 // Scale CaseProb after peeling a case with the probablity of PeeledCaseProb
@@ -10265,7 +10324,7 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
   // Don't perform if there is only one cluster or optimizing for size.
   if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 ||
       TM.getOptLevel() == CodeGenOpt::None ||
-      SwitchMBB->getParent()->getFunction().optForMinSize())
+      SwitchMBB->getParent()->getFunction().hasMinSize())
     return SwitchMBB;
 
   BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100);
@@ -10331,38 +10390,6 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
   // if there are many clusters.
   sortAndRangeify(Clusters);
 
-  if (TM.getOptLevel() != CodeGenOpt::None) {
-    // Replace an unreachable default with the most popular destination.
-    // FIXME: Exploit unreachable default more aggressively.
-    bool UnreachableDefault =
-        isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg());
-    if (UnreachableDefault && !Clusters.empty()) {
-      DenseMap<const BasicBlock *, unsigned> Popularity;
-      unsigned MaxPop = 0;
-      const BasicBlock *MaxBB = nullptr;
-      for (auto I : SI.cases()) {
-        const BasicBlock *BB = I.getCaseSuccessor();
-        if (++Popularity[BB] > MaxPop) {
-          MaxPop = Popularity[BB];
-          MaxBB = BB;
-        }
-      }
-      // Set new default.
-      assert(MaxPop > 0 && MaxBB);
-      DefaultMBB = FuncInfo.MBBMap[MaxBB];
-
-      // Remove cases that were pointing to the destination that is now the
-      // default.
-      CaseClusterVector New;
-      New.reserve(Clusters.size());
-      for (CaseCluster &CC : Clusters) {
-        if (CC.MBB != DefaultMBB)
-          New.push_back(CC);
-      }
-      Clusters = std::move(New);
-    }
-  }
-
   // The branch probablity of the peeled case.
   BranchProbability PeeledCaseProb = BranchProbability::getZero();
   MachineBasicBlock *PeeledSwitchMBB =
@@ -10380,8 +10407,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
     return;
   }
 
-  findJumpTables(Clusters, &SI, DefaultMBB);
-  findBitTestClusters(Clusters, &SI);
+  SL->findJumpTables(Clusters, &SI, DefaultMBB);
+  SL->findBitTestClusters(Clusters, &SI);
 
   LLVM_DEBUG({
     dbgs() << "Case clusters: ";
@@ -10420,7 +10447,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
     unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
 
     if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
-        !DefaultMBB->getParent()->getFunction().optForMinSize()) {
+        !DefaultMBB->getParent()->getFunction().hasMinSize()) {
       // For optimized builds, lower large range as a balanced binary tree.
       splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
       continue;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 5f9cdb69daf7..0072e33f23b7 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -1,9 +1,8 @@
 //===- SelectionDAGBuilder.h - Selection-DAG building -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,11 +17,13 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/SwitchLoweringUtils.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/CallSite.h"
@@ -47,6 +48,7 @@ class AtomicRMWInst;
 class BasicBlock;
 class BranchInst;
 class CallInst;
+class CallBrInst;
 class CatchPadInst;
 class CatchReturnInst;
 class CatchSwitchInst;
@@ -76,6 +78,7 @@ class ResumeInst;
 class ReturnInst;
 class SDDbgValue;
 class StoreInst;
+class SwiftErrorValueTracking;
 class SwitchInst;
 class TargetLibraryInfo;
 class TargetMachine;
@@ -91,16 +94,16 @@ class Value;
 /// implementation that is parameterized by a TargetLowering object.
 ///
 class SelectionDAGBuilder {
-  /// CurInst - The current instruction being visited
+  /// The current instruction being visited.
   const Instruction *CurInst = nullptr;
 
   DenseMap<const Value*, SDValue> NodeMap;
 
-  /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used
+  /// Maps argument value for unused arguments. This is used
   /// to preserve debug information for incoming arguments.
   DenseMap<const Value*, SDValue> UnusedArgNodeMap;
 
-  /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap.
+  /// Helper type for DanglingDebugInfoMap.
   class DanglingDebugInfo {
     const DbgValueInst* DI = nullptr;
     DebugLoc dl;
@@ -116,18 +119,17 @@ class SelectionDAGBuilder {
     unsigned getSDNodeOrder() { return SDNodeOrder; }
   };
 
-  /// DanglingDebugInfoVector - Helper type for DanglingDebugInfoMap.
+  /// Helper type for DanglingDebugInfoMap.
   typedef std::vector<DanglingDebugInfo> DanglingDebugInfoVector;
 
-  /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not
-  /// yet seen the referent.  We defer handling these until we do see it.
-  DenseMap<const Value*, DanglingDebugInfoVector> DanglingDebugInfoMap;
+  /// Keeps track of dbg_values for which we have not yet seen the referent.
+  /// We defer handling these until we do see it.
+  MapVector<const Value*, DanglingDebugInfoVector> DanglingDebugInfoMap;
 
 public:
-  /// PendingLoads - Loads are not emitted to the program immediately.  We bunch
-  /// them up and then emit token factor nodes when possible.  This allows us to
-  /// get simple disambiguation between loads without worrying about alias
-  /// analysis.
+  /// Loads are not emitted to the program immediately.  We bunch them up and
+  /// then emit token factor nodes when possible.  This allows us to get simple
+  /// disambiguation between loads without worrying about alias analysis.
   SmallVector<SDValue, 8> PendingLoads;
 
   /// State used while lowering a statepoint sequence (gc_statepoint,
@@ -135,247 +137,37 @@ public:
   StatepointLoweringState StatepointLowering;
 
 private:
-  /// PendingExports - CopyToReg nodes that copy values to virtual registers
-  /// for export to other blocks need to be emitted before any terminator
-  /// instruction, but they have no other ordering requirements. We bunch them
-  /// up and the emit a single tokenfactor for them just before terminator
-  /// instructions.
+  /// CopyToReg nodes that copy values to virtual registers for export to other
+  /// blocks need to be emitted before any terminator instruction, but they have
+  /// no other ordering requirements. We bunch them up and the emit a single
+  /// tokenfactor for them just before terminator instructions.
   SmallVector<SDValue, 8> PendingExports;
 
-  /// SDNodeOrder - A unique monotonically increasing number used to order the
-  /// SDNodes we create.
+  /// A unique monotonically increasing number used to order the SDNodes we
+  /// create.
   unsigned SDNodeOrder;
 
-  enum CaseClusterKind {
-    /// A cluster of adjacent case labels with the same destination, or just one
-    /// case.
-    CC_Range,
-    /// A cluster of cases suitable for jump table lowering.
-    CC_JumpTable,
-    /// A cluster of cases suitable for bit test lowering.
-    CC_BitTests
-  };
-
-  /// A cluster of case labels.
-  struct CaseCluster {
-    CaseClusterKind Kind;
-    const ConstantInt *Low, *High;
-    union {
-      MachineBasicBlock *MBB;
-      unsigned JTCasesIndex;
-      unsigned BTCasesIndex;
-    };
-    BranchProbability Prob;
-
-    static CaseCluster range(const ConstantInt *Low, const ConstantInt *High,
-                             MachineBasicBlock *MBB, BranchProbability Prob) {
-      CaseCluster C;
-      C.Kind = CC_Range;
-      C.Low = Low;
-      C.High = High;
-      C.MBB = MBB;
-      C.Prob = Prob;
-      return C;
-    }
-
-    static CaseCluster jumpTable(const ConstantInt *Low,
-                                 const ConstantInt *High, unsigned JTCasesIndex,
-                                 BranchProbability Prob) {
-      CaseCluster C;
-      C.Kind = CC_JumpTable;
-      C.Low = Low;
-      C.High = High;
-      C.JTCasesIndex = JTCasesIndex;
-      C.Prob = Prob;
-      return C;
-    }
-
-    static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High,
-                                unsigned BTCasesIndex, BranchProbability Prob) {
-      CaseCluster C;
-      C.Kind = CC_BitTests;
-      C.Low = Low;
-      C.High = High;
-      C.BTCasesIndex = BTCasesIndex;
-      C.Prob = Prob;
-      return C;
-    }
-  };
-
-  using CaseClusterVector = std::vector<CaseCluster>;
-  using CaseClusterIt = CaseClusterVector::iterator;
-
-  struct CaseBits {
-    uint64_t Mask = 0;
-    MachineBasicBlock* BB = nullptr;
-    unsigned Bits = 0;
-    BranchProbability ExtraProb;
-
-    CaseBits() = default;
-    CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
-             BranchProbability Prob):
-      Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) {}
-  };
-
-  using CaseBitsVector = std::vector<CaseBits>;
-
-  /// Sort Clusters and merge adjacent cases.
-  void sortAndRangeify(CaseClusterVector &Clusters);
-
-  /// CaseBlock - This structure is used to communicate between
-  /// SelectionDAGBuilder and SDISel for the code generation of additional basic
-  /// blocks needed by multi-case switch statements.
-  struct CaseBlock {
-    // CC - the condition code to use for the case block's setcc node
-    ISD::CondCode CC;
-
-    // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.
-    // Emit by default LHS op RHS. MHS is used for range comparisons:
-    // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
-    const Value *CmpLHS, *CmpMHS, *CmpRHS;
-
-    // TrueBB/FalseBB - the block to branch to if the setcc is true/false.
-    MachineBasicBlock *TrueBB, *FalseBB;
-
-    // ThisBB - the block into which to emit the code for the setcc and branches
-    MachineBasicBlock *ThisBB;
-
-    /// The debug location of the instruction this CaseBlock was
-    /// produced from.
-    SDLoc DL;
-
-    // TrueProb/FalseProb - branch weights.
-    BranchProbability TrueProb, FalseProb;
-
-    CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
-              const Value *cmpmiddle, MachineBasicBlock *truebb,
-              MachineBasicBlock *falsebb, MachineBasicBlock *me,
-              SDLoc dl,
-              BranchProbability trueprob = BranchProbability::getUnknown(),
-              BranchProbability falseprob = BranchProbability::getUnknown())
-        : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
-          TrueBB(truebb), FalseBB(falsebb), ThisBB(me), DL(dl),
-          TrueProb(trueprob), FalseProb(falseprob) {}
-  };
-
-  struct JumpTable {
-    /// Reg - the virtual register containing the index of the jump table entry
-    //. to jump to.
-    unsigned Reg;
-    /// JTI - the JumpTableIndex for this jump table in the function.
-    unsigned JTI;
-    /// MBB - the MBB into which to emit the code for the indirect jump.
-    MachineBasicBlock *MBB;
-    /// Default - the MBB of the default bb, which is a successor of the range
-    /// check MBB.  This is when updating PHI nodes in successors.
-    MachineBasicBlock *Default;
-
-    JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
-              MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
-  };
-  struct JumpTableHeader {
-    APInt First;
-    APInt Last;
-    const Value *SValue;
-    MachineBasicBlock *HeaderBB;
-    bool Emitted;
-
-    JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
-                    bool E = false)
-        : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H),
-          Emitted(E) {}
-  };
-  using JumpTableBlock = std::pair<JumpTableHeader, JumpTable>;
-
-  struct BitTestCase {
-    uint64_t Mask;
-    MachineBasicBlock *ThisBB;
-    MachineBasicBlock *TargetBB;
-    BranchProbability ExtraProb;
-
-    BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr,
-                BranchProbability Prob):
-      Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) {}
-  };
-
-  using BitTestInfo = SmallVector<BitTestCase, 3>;
-
-  struct BitTestBlock {
-    APInt First;
-    APInt Range;
-    const Value *SValue;
-    unsigned Reg;
-    MVT RegVT;
-    bool Emitted;
-    bool ContiguousRange;
-    MachineBasicBlock *Parent;
-    MachineBasicBlock *Default;
-    BitTestInfo Cases;
-    BranchProbability Prob;
-    BranchProbability DefaultProb;
-
-    BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT,
-                 bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
-                 BitTestInfo C, BranchProbability Pr)
-        : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
-          RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D),
-          Cases(std::move(C)), Prob(Pr) {}
-  };
-
-  /// Return the range of value in [First..Last].
-  uint64_t getJumpTableRange(const CaseClusterVector &Clusters, unsigned First,
-                             unsigned Last) const;
-
-  /// Return the number of cases in [First..Last].
-  uint64_t getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases,
-                                unsigned First, unsigned Last) const;
-
-  /// Build a jump table cluster from Clusters[First..Last]. Returns false if it
-  /// decides it's not a good idea.
-  bool buildJumpTable(const CaseClusterVector &Clusters, unsigned First,
-                      unsigned Last, const SwitchInst *SI,
-                      MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster);
-
-  /// Find clusters of cases suitable for jump table lowering.
-  void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI,
-                      MachineBasicBlock *DefaultMBB);
-
-  /// Build a bit test cluster from Clusters[First..Last]. Returns false if it
-  /// decides it's not a good idea.
-  bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last,
-                     const SwitchInst *SI, CaseCluster &BTCluster);
-
-  /// Find clusters of cases suitable for bit test lowering.
-  void findBitTestClusters(CaseClusterVector &Clusters, const SwitchInst *SI);
-
-  struct SwitchWorkListItem {
-    MachineBasicBlock *MBB;
-    CaseClusterIt FirstCluster;
-    CaseClusterIt LastCluster;
-    const ConstantInt *GE;
-    const ConstantInt *LT;
-    BranchProbability DefaultProb;
-  };
-  using SwitchWorkList = SmallVector<SwitchWorkListItem, 4>;
-
   /// Determine the rank by weight of CC in [First,Last]. If CC has more weight
   /// than each cluster in the range, its rank is 0.
-  static unsigned caseClusterRank(const CaseCluster &CC, CaseClusterIt First,
-                                  CaseClusterIt Last);
+  unsigned caseClusterRank(const SwitchCG::CaseCluster &CC,
+                           SwitchCG::CaseClusterIt First,
+                           SwitchCG::CaseClusterIt Last);
 
   /// Emit comparison and split W into two subtrees.
-  void splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W,
-                     Value *Cond, MachineBasicBlock *SwitchMBB);
+  void splitWorkItem(SwitchCG::SwitchWorkList &WorkList,
+                     const SwitchCG::SwitchWorkListItem &W, Value *Cond,
+                     MachineBasicBlock *SwitchMBB);
 
   /// Lower W.
-  void lowerWorkItem(SwitchWorkListItem W, Value *Cond,
+  void lowerWorkItem(SwitchCG::SwitchWorkListItem W, Value *Cond,
                      MachineBasicBlock *SwitchMBB,
                      MachineBasicBlock *DefaultMBB);
 
   /// Peel the top probability case if it exceeds the threshold
-  MachineBasicBlock *peelDominantCaseCluster(const SwitchInst &SI,
-                                             CaseClusterVector &Clusters,
-                                             BranchProbability &PeeledCaseProb);
+  MachineBasicBlock *
+  peelDominantCaseCluster(const SwitchInst &SI,
+                          SwitchCG::CaseClusterVector &Clusters,
+                          BranchProbability &PeeledCaseProb);
 
   /// A class which encapsulates all of the information needed to generate a
   /// stack protector check and signals to isel via its state being initialized
@@ -588,17 +380,22 @@ public:
   AliasAnalysis *AA = nullptr;
   const TargetLibraryInfo *LibInfo;
 
-  /// SwitchCases - Vector of CaseBlock structures used to communicate
-  /// SwitchInst code generation information.
-  std::vector<CaseBlock> SwitchCases;
+  class SDAGSwitchLowering : public SwitchCG::SwitchLowering {
+  public:
+    SDAGSwitchLowering(SelectionDAGBuilder *sdb, FunctionLoweringInfo &funcinfo)
+        : SwitchCG::SwitchLowering(funcinfo), SDB(sdb) {}
+
+    virtual void addSuccessorWithProb(
+        MachineBasicBlock *Src, MachineBasicBlock *Dst,
+        BranchProbability Prob = BranchProbability::getUnknown()) override {
+      SDB->addSuccessorWithProb(Src, Dst, Prob);
+    }
 
-  /// JTCases - Vector of JumpTable structures used to communicate
-  /// SwitchInst code generation information.
-  std::vector<JumpTableBlock> JTCases;
+  private:
+    SelectionDAGBuilder *SDB;
+  };
 
-  /// BitTestCases - Vector of BitTestBlock structures used to communicate
-  /// SwitchInst code generation information.
-  std::vector<BitTestBlock> BitTestCases;
+  std::unique_ptr<SDAGSwitchLowering> SL;
 
   /// A StackProtectorDescriptor structure used to communicate stack protector
   /// information in between SelectBasicBlock and FinishBasicBlock.
@@ -608,27 +405,29 @@ public:
   // PHI nodes.
   DenseMap<const Constant *, unsigned> ConstantsOut;
 
-  /// FuncInfo - Information about the function as a whole.
-  ///
+  /// Information about the function as a whole.
   FunctionLoweringInfo &FuncInfo;
 
-  /// GFI - Garbage collection metadata for the function.
+  /// Information about the swifterror values used throughout the function.
+  SwiftErrorValueTracking &SwiftError;
+
+  /// Garbage collection metadata for the function.
   GCFunctionInfo *GFI;
 
-  /// LPadToCallSiteMap - Map a landing pad to the call site indexes.
+  /// Map a landing pad to the call site indexes.
   DenseMap<MachineBasicBlock *, SmallVector<unsigned, 4>> LPadToCallSiteMap;
 
-  /// HasTailCall - This is set to true if a call in the current
-  /// block has been translated as a tail call. In this case,
-  /// no subsequent DAG nodes should be created.
+  /// This is set to true if a call in the current block has been translated as
+  /// a tail call. In this case, no subsequent DAG nodes should be created.
   bool HasTailCall = false;
 
   LLVMContext *Context;
 
   SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
-                      CodeGenOpt::Level ol)
-    : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag),
-      FuncInfo(funcinfo) {}
+                      SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol)
+      : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag),
+        SL(make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo),
+        SwiftError(swifterror) {}
 
   void init(GCFunctionInfo *gfi, AliasAnalysis *AA,
             const TargetLibraryInfo *li);
@@ -670,20 +469,34 @@ public:
 
   void visit(unsigned Opcode, const User &I);
 
-  /// getCopyFromRegs - If there was virtual register allocated for the value V
-  /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
+  /// If there was virtual register allocated for the value V emit CopyFromReg
+  /// of the specified type Ty. Return empty SDValue() otherwise.
   SDValue getCopyFromRegs(const Value *V, Type *Ty);
 
   /// If we have dangling debug info that describes \p Variable, or an
   /// overlapping part of variable considering the \p Expr, then this method
-  /// weill drop that debug info as it isn't valid any longer.
+  /// will drop that debug info as it isn't valid any longer.
   void dropDanglingDebugInfo(const DILocalVariable *Variable,
                              const DIExpression *Expr);
 
-  // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
-  // generate the debug data structures now that we've seen its definition.
+  /// If we saw an earlier dbg_value referring to V, generate the debug data
+  /// structures now that we've seen its definition.
   void resolveDanglingDebugInfo(const Value *V, SDValue Val);
 
+  /// For the given dangling debuginfo record, perform last-ditch efforts to
+  /// resolve the debuginfo to something that is represented in this DAG. If
+  /// this cannot be done, produce an Undef debug value record.
+  void salvageUnresolvedDbgValue(DanglingDebugInfo &DDI);
+
+  /// For a given Value, attempt to create and record a SDDbgValue in the
+  /// SelectionDAG.
+  bool handleDebugValue(const Value *V, DILocalVariable *Var,
+                        DIExpression *Expr, DebugLoc CurDL,
+                        DebugLoc InstDL, unsigned Order);
+
+  /// Evict any dangling debug information, attempting to salvage it first.
+  void resolveOrClearDbgInfo();
+
   SDValue getValue(const Value *V);
   bool findValue(const Value *V) const;
 
@@ -720,7 +533,7 @@ public:
                                     MachineBasicBlock *SwitchBB,
                                     BranchProbability TProb, BranchProbability FProb,
                                     bool InvertCond);
-  bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
+  bool ShouldEmitAsBranches(const std::vector<SwitchCG::CaseBlock> &Cases);
   bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
   void CopyToExportRegsIfNeeded(const Value *V);
   void ExportFromCurrentBlock(const Value *V);
@@ -733,7 +546,7 @@ public:
                                  SDValue Op);
 
   void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI,
-                                ImmutableCallSite CS, unsigned ArgIdx,
+                                const CallBase *Call, unsigned ArgIdx,
                                 unsigned NumArgs, SDValue Callee,
                                 Type *ReturnTy, bool IsPatchPoint);
 
@@ -741,7 +554,7 @@ public:
   lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
                  const BasicBlock *EHPadBB = nullptr);
 
-  /// UpdateSplitBlock - When an MBB was split during scheduling, update the
+  /// When an MBB was split during scheduling, update the
   /// references that need to refer to the last resulting block.
   void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
 
@@ -797,13 +610,13 @@ public:
   void LowerStatepoint(ImmutableStatepoint ISP,
                        const BasicBlock *EHPadBB = nullptr);
 
-  void LowerCallSiteWithDeoptBundle(ImmutableCallSite CS, SDValue Callee,
+  void LowerCallSiteWithDeoptBundle(const CallBase *Call, SDValue Callee,
                                     const BasicBlock *EHPadBB);
 
   void LowerDeoptimizeCall(const CallInst *CI);
   void LowerDeoptimizingReturn();
 
-  void LowerCallSiteWithDeoptBundleImpl(ImmutableCallSite CS, SDValue Callee,
+  void LowerCallSiteWithDeoptBundleImpl(const CallBase *Call, SDValue Callee,
                                         const BasicBlock *EHPadBB,
                                         bool VarArgDisallowed,
                                         bool ForceVoidReturnTy);
@@ -833,25 +646,24 @@ private:
       BranchProbability Prob = BranchProbability::getUnknown());
 
 public:
-  void visitSwitchCase(CaseBlock &CB,
-                       MachineBasicBlock *SwitchBB);
+  void visitSwitchCase(SwitchCG::CaseBlock &CB, MachineBasicBlock *SwitchBB);
   void visitSPDescriptorParent(StackProtectorDescriptor &SPD,
                                MachineBasicBlock *ParentBB);
   void visitSPDescriptorFailure(StackProtectorDescriptor &SPD);
-  void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
-  void visitBitTestCase(BitTestBlock &BB,
-                        MachineBasicBlock* NextMBB,
-                        BranchProbability BranchProbToNext,
-                        unsigned Reg,
-                        BitTestCase &B,
-                        MachineBasicBlock *SwitchBB);
-  void visitJumpTable(JumpTable &JT);
-  void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
+  void visitBitTestHeader(SwitchCG::BitTestBlock &B,
+                          MachineBasicBlock *SwitchBB);
+  void visitBitTestCase(SwitchCG::BitTestBlock &BB, MachineBasicBlock *NextMBB,
+                        BranchProbability BranchProbToNext, unsigned Reg,
+                        SwitchCG::BitTestCase &B, MachineBasicBlock *SwitchBB);
+  void visitJumpTable(SwitchCG::JumpTable &JT);
+  void visitJumpTableHeader(SwitchCG::JumpTable &JT,
+                            SwitchCG::JumpTableHeader &JTH,
                             MachineBasicBlock *SwitchBB);
 
 private:
   // These all get lowered before this pass.
   void visitInvoke(const InvokeInst &I);
+  void visitCallBr(const CallBrInst &I);
   void visitResume(const ResumeInst &I);
 
   void visitUnary(const User &I, unsigned Opcode);
@@ -932,7 +744,7 @@ private:
   void visitStoreToSwiftError(const StoreInst &I);
 
   void visitInlineAsm(ImmutableCallSite CS);
-  const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
+  void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
   void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
   void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
 
@@ -982,9 +794,12 @@ private:
   SDDbgValue *getDbgValue(SDValue N, DILocalVariable *Variable,
                           DIExpression *Expr, const DebugLoc &dl,
                           unsigned DbgSDNodeOrder);
+
+  /// Lowers CallInst to an external symbol.
+  void lowerCallToExternalSymbol(const CallInst &I, const char *FunctionName);
 };
 
-/// RegsForValue - This struct represents the registers (physical or virtual)
+/// This struct represents the registers (physical or virtual)
 /// that a particular set of values is assigned, and the type information about
 /// the value. The most common situation is to represent one value at a time,
 /// but struct or array values are handled element-wise as multiple values.  The
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 43df2abb674b..da3049881d31 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -1,9 +1,8 @@
 //===- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -96,6 +95,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::ATOMIC_LOAD_MAX:            return "AtomicLoadMax";
   case ISD::ATOMIC_LOAD_UMIN:           return "AtomicLoadUMin";
   case ISD::ATOMIC_LOAD_UMAX:           return "AtomicLoadUMax";
+  case ISD::ATOMIC_LOAD_FADD:           return "AtomicLoadFAdd";
   case ISD::ATOMIC_LOAD:                return "AtomicLoad";
   case ISD::ATOMIC_STORE:               return "AtomicStore";
   case ISD::PCMARKER:                   return "PCMarker";
@@ -145,6 +145,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
     unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
     if (IID < Intrinsic::num_intrinsics)
       return Intrinsic::getName((Intrinsic::ID)IID, None);
+    else if (!G)
+      return "Unknown intrinsic";
     else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
       return TII->getName(IID);
     llvm_unreachable("Invalid intrinsic ID");
@@ -170,7 +172,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::UNDEF:                      return "undef";
   case ISD::MERGE_VALUES:               return "merge_values";
   case ISD::INLINEASM:                  return "inlineasm";
+  case ISD::INLINEASM_BR:               return "inlineasm_br";
   case ISD::EH_LABEL:                   return "eh_label";
+  case ISD::ANNOTATION_LABEL:           return "annotation_label";
   case ISD::HANDLENODE:                 return "handlenode";
 
   // Unary operators
@@ -297,7 +301,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::UADDSAT:                    return "uaddsat";
   case ISD::SSUBSAT:                    return "ssubsat";
   case ISD::USUBSAT:                    return "usubsat";
+
   case ISD::SMULFIX:                    return "smulfix";
+  case ISD::SMULFIXSAT:                 return "smulfixsat";
+  case ISD::UMULFIX:                    return "umulfix";
 
   // Conversion operators.
   case ISD::SIGN_EXTEND:                return "sign_extend";
@@ -309,9 +316,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::ZERO_EXTEND_VECTOR_INREG:   return "zero_extend_vector_inreg";
   case ISD::TRUNCATE:                   return "truncate";
   case ISD::FP_ROUND:                   return "fp_round";
+  case ISD::STRICT_FP_ROUND:            return "strict_fp_round";
   case ISD::FLT_ROUNDS_:                return "flt_rounds";
   case ISD::FP_ROUND_INREG:             return "fp_round_inreg";
   case ISD::FP_EXTEND:                  return "fp_extend";
+  case ISD::STRICT_FP_EXTEND:           return "strict_fp_extend";
 
   case ISD::SINT_TO_FP:                 return "sint_to_fp";
   case ISD::UINT_TO_FP:                 return "uint_to_fp";
@@ -321,6 +330,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::ADDRSPACECAST:              return "addrspacecast";
   case ISD::FP16_TO_FP:                 return "fp16_to_fp";
   case ISD::FP_TO_FP16:                 return "fp_to_fp16";
+  case ISD::LROUND:                     return "lround";
+  case ISD::LLROUND:                    return "llround";
+  case ISD::LRINT:                      return "lrint";
+  case ISD::LLRINT:                     return "llrint";
 
     // Control flow instructions
   case ISD::BR:                         return "br";
@@ -649,6 +662,36 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
     if (*AM)
       OS << ", " << AM;
 
+    OS << ">";
+  } else if (const MaskedLoadSDNode *MLd = dyn_cast<MaskedLoadSDNode>(this)) {
+    OS << "<";
+
+    printMemOperand(OS, *MLd->getMemOperand(), G);
+
+    bool doExt = true;
+    switch (MLd->getExtensionType()) {
+    default: doExt = false; break;
+    case ISD::EXTLOAD:  OS << ", anyext"; break;
+    case ISD::SEXTLOAD: OS << ", sext"; break;
+    case ISD::ZEXTLOAD: OS << ", zext"; break;
+    }
+    if (doExt)
+      OS << " from " << MLd->getMemoryVT().getEVTString();
+
+    if (MLd->isExpandingLoad())
+      OS << ", expanding";
+
+    OS << ">";
+  } else if (const MaskedStoreSDNode *MSt = dyn_cast<MaskedStoreSDNode>(this)) {
+    OS << "<";
+    printMemOperand(OS, *MSt->getMemOperand(), G);
+
+    if (MSt->isTruncatingStore())
+      OS << ", trunc to " << MSt->getMemoryVT().getEVTString();
+
+    if (MSt->isCompressingStore())
+      OS << ", compressing";
+
     OS << ">";
   } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
     OS << "<";
@@ -675,6 +718,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
        << " -> "
        << ASC->getDestAddressSpace()
        << ']';
+  } else if (const LifetimeSDNode *LN = dyn_cast<LifetimeSDNode>(this)) {
+    if (LN->hasOffset())
+      OS << "<" << LN->getOffset() << " to " << LN->getOffset() + LN->getSize() << ">";
   }
 
   if (VerboseDAGDumping) {
@@ -684,45 +730,63 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
     if (getNodeId() != -1)
       OS << " [ID=" << getNodeId() << ']';
     if (!(isa<ConstantSDNode>(this) || (isa<ConstantFPSDNode>(this))))
-      OS << "# D:" << isDivergent();
-
-    if (!G)
-      return;
-
-    DILocation *L = getDebugLoc();
-    if (!L)
-      return;
-
-    if (auto *Scope = L->getScope())
-      OS << Scope->getFilename();
-    else
-      OS << "<unknown>";
-    OS << ':' << L->getLine();
-    if (unsigned C = L->getColumn())
-      OS << ':' << C;
-
-    for (SDDbgValue *Dbg : G->GetDbgValues(this)) {
-      if (Dbg->getKind() != SDDbgValue::SDNODE || Dbg->isInvalidated())
-        continue;
-      Dbg->dump(OS);
-    }
+      OS << " # D:" << isDivergent();
+
+    if (G && !G->GetDbgValues(this).empty()) {
+      OS << " [NoOfDbgValues=" << G->GetDbgValues(this).size() << ']';
+      for (SDDbgValue *Dbg : G->GetDbgValues(this))
+        if (!Dbg->isInvalidated())
+          Dbg->print(OS);
+    } else if (getHasDebugValue())
+      OS << " [NoOfDbgValues>0]";
   }
 }
 
-LLVM_DUMP_METHOD void SDDbgValue::dump(raw_ostream &OS) const {
- OS << " DbgVal";
- if (kind==SDNODE)
-   OS << '(' << u.s.ResNo << ')';
- OS << ":\"" << Var->getName() << '"';
+LLVM_DUMP_METHOD void SDDbgValue::print(raw_ostream &OS) const {
+  OS << " DbgVal(Order=" << getOrder() << ')';
+  if (isInvalidated()) OS << "(Invalidated)";
+  if (isEmitted()) OS << "(Emitted)";
+  switch (getKind()) {
+  case SDNODE:
+    if (getSDNode())
+      OS << "(SDNODE=" << PrintNodeId(*getSDNode()) << ':' <<  getResNo() << ')';
+    else
+      OS << "(SDNODE)";
+    break;
+  case CONST:
+    OS << "(CONST)";
+    break;
+  case FRAMEIX:
+    OS << "(FRAMEIX=" << getFrameIx() << ')';
+    break;
+  case VREG:
+    OS << "(VREG=" << getVReg() << ')';
+    break;
+  }
+  if (isIndirect()) OS << "(Indirect)";
+  OS << ":\"" << Var->getName() << '"';
 #ifndef NDEBUG
- if (Expr->getNumElements())
-   Expr->dump();
+  if (Expr->getNumElements())
+    Expr->dump();
 #endif
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void SDDbgValue::dump() const {
+  if (isInvalidated())
+    return;
+  print(dbgs());
+  dbgs() << "\n";
+}
+#endif
+
 /// Return true if this node is so simple that we should just print it inline
 /// if it appears as an operand.
-static bool shouldPrintInline(const SDNode &Node) {
+static bool shouldPrintInline(const SDNode &Node, const SelectionDAG *G) {
+  // Avoid lots of cluttering when inline printing nodes with associated
+  // DbgValues in verbose mode.
+  if (VerboseDAGDumping && G && !G->GetDbgValues(&Node).empty())
+    return false;
   if (Node.getOpcode() == ISD::EntryToken)
     return false;
   return Node.getNumOperands() == 0;
@@ -731,7 +795,7 @@ static bool shouldPrintInline(const SDNode &Node) {
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
   for (const SDValue &Op : N->op_values()) {
-    if (shouldPrintInline(*Op.getNode()))
+    if (shouldPrintInline(*Op.getNode(), G))
       continue;
     if (Op.getNode()->hasOneUse())
       DumpNodes(Op.getNode(), indent+2, G);
@@ -748,12 +812,24 @@ LLVM_DUMP_METHOD void SelectionDAG::dump() const {
        I != E; ++I) {
     const SDNode *N = &*I;
     if (!N->hasOneUse() && N != getRoot().getNode() &&
-        (!shouldPrintInline(*N) || N->use_empty()))
+        (!shouldPrintInline(*N, this) || N->use_empty()))
       DumpNodes(N, 2, this);
   }
 
   if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
-  dbgs() << "\n\n";
+  dbgs() << "\n";
+
+  if (VerboseDAGDumping) {
+    if (DbgBegin() != DbgEnd())
+      dbgs() << "SDDbgValues:\n";
+    for (auto *Dbg : make_range(DbgBegin(), DbgEnd()))
+      Dbg->dump();
+    if (ByvalParmDbgBegin() != ByvalParmDbgEnd())
+      dbgs() << "Byval SDDbgValues:\n";
+    for (auto *Dbg : make_range(ByvalParmDbgBegin(), ByvalParmDbgEnd()))
+      Dbg->dump();
+  }
+  dbgs() << "\n";
 }
 #endif
 
@@ -769,7 +845,7 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
   if (!Value.getNode()) {
     OS << "<null>";
     return false;
-  } else if (shouldPrintInline(*Value.getNode())) {
+  } else if (shouldPrintInline(*Value.getNode(), G)) {
     OS << Value->getOperationName(G) << ':';
     Value->print_types(OS, G);
     Value->print_details(OS, G);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index af5c2433fa2f..bdf9f2c166e1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1,9 +1,8 @@
 //===- SelectionDAGISel.cpp - Implement the SelectionDAGISel class --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -42,6 +41,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachinePassRegistry.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -49,6 +49,7 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -63,6 +64,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
@@ -306,8 +308,9 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
                                    CodeGenOpt::Level OL) :
   MachineFunctionPass(ID), TM(tm),
   FuncInfo(new FunctionLoweringInfo()),
+  SwiftError(new SwiftErrorValueTracking()),
   CurDAG(new SelectionDAG(tm, OL)),
-  SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
+  SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, *SwiftError, OL)),
   AA(), GFI(),
   OptLevel(OL),
   DAGSize(0) {
@@ -323,6 +326,7 @@ SelectionDAGISel::~SelectionDAGISel() {
   delete SDB;
   delete CurDAG;
   delete FuncInfo;
+  delete SwiftError;
 }
 
 void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -379,6 +383,30 @@ static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT,
   }
 }
 
+static void computeUsesMSVCFloatingPoint(const Triple &TT, const Function &F,
+                                         MachineModuleInfo &MMI) {
+  // Only needed for MSVC
+  if (!TT.isWindowsMSVCEnvironment())
+    return;
+
+  // If it's already set, nothing to do.
+  if (MMI.usesMSVCFloatingPoint())
+    return;
+
+  for (const Instruction &I : instructions(F)) {
+    if (I.getType()->isFPOrFPVectorTy()) {
+      MMI.setUsesMSVCFloatingPoint(true);
+      return;
+    }
+    for (const auto &Op : I.operands()) {
+      if (Op->getType()->isFPOrFPVectorTy()) {
+        MMI.setUsesMSVCFloatingPoint(true);
+        return;
+      }
+    }
+  }
+}
+
 bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   // If we already selected that function, we do not need to run SDISel.
   if (mf.getProperties().hasProperty(
@@ -421,6 +449,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   CurDAG->init(*MF, *ORE, this, LibInfo,
    getAnalysisIfAvailable<LegacyDivergenceAnalysis>());
   FuncInfo->set(Fn, *MF, CurDAG);
+  SwiftError->setFunction(*MF);
 
   // Now get the optional analyzes if we want to.
   // This is based on the possibly changed OptLevel (after optnone is taken
@@ -474,6 +503,40 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
     Fn.getContext().diagnose(DiagFallback);
   }
 
+  // Replace forward-declared registers with the registers containing
+  // the desired value.
+  // Note: it is important that this happens **before** the call to
+  // EmitLiveInCopies, since implementations can skip copies of unused
+  // registers. If we don't apply the reg fixups before, some registers may
+  // appear as unused and will be skipped, resulting in bad MI.
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  for (DenseMap<unsigned, unsigned>::iterator I = FuncInfo->RegFixups.begin(),
+                                              E = FuncInfo->RegFixups.end();
+       I != E; ++I) {
+    unsigned From = I->first;
+    unsigned To = I->second;
+    // If To is also scheduled to be replaced, find what its ultimate
+    // replacement is.
+    while (true) {
+      DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To);
+      if (J == E)
+        break;
+      To = J->second;
+    }
+    // Make sure the new register has a sufficiently constrained register class.
+    if (TargetRegisterInfo::isVirtualRegister(From) &&
+        TargetRegisterInfo::isVirtualRegister(To))
+      MRI.constrainRegClass(To, MRI.getRegClass(From));
+    // Replace it.
+
+    // Replacing one register with another won't touch the kill flags.
+    // We need to conservatively clear the kill flags as a kill on the old
+    // register might dominate existing uses of the new register.
+    if (!MRI.use_empty(To))
+      MRI.clearKillFlags(From);
+    MRI.replaceRegWith(From, To);
+  }
+
   // If the first basic block in the function has live ins that need to be
   // copied into vregs, emit the copies into the top of the block before
   // emitting the code for the block.
@@ -507,7 +570,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
     MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1];
     bool hasFI = MI->getOperand(0).isFI();
-    unsigned Reg =
+    Register Reg =
         hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg();
     if (TargetRegisterInfo::isPhysicalRegister(Reg))
       EntryMBB->insert(EntryMBB->begin(), MI);
@@ -590,9 +653,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   // Determine if there is a call to setjmp in the machine function.
   MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice());
 
+  // Determine if floating point is used for msvc
+  computeUsesMSVCFloatingPoint(TM.getTargetTriple(), Fn, MF->getMMI());
+
   // Replace forward-declared registers with the registers containing
   // the desired value.
-  MachineRegisterInfo &MRI = MF->getRegInfo();
   for (DenseMap<unsigned, unsigned>::iterator
        I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end();
        I != E; ++I) {
@@ -663,6 +728,7 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
   // Make sure the root of the DAG is up-to-date.
   CurDAG->setRoot(SDB->getControlRoot());
   HadTailCall = SDB->HasTailCall;
+  SDB->resolveOrClearDbgInfo();
   SDB->clear();
 
   // Final step, emit the lowered DAG as machine code.
@@ -713,8 +779,6 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   StringRef GroupName = "sdag";
   StringRef GroupDescription = "Instruction Selection and Scheduling";
   std::string BlockName;
-  int BlockNumber = -1;
-  (void)BlockNumber;
   bool MatchFilterBB = false; (void)MatchFilterBB;
 #ifndef NDEBUG
   TargetTransformInfo &TTI =
@@ -735,7 +799,6 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
       ViewSUnitDAGs)
 #endif
   {
-    BlockNumber = FuncInfo->MBB->getNumber();
     BlockName =
         (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str();
   }
@@ -1092,16 +1155,14 @@ void SelectionDAGISel::DoInstructionSelection() {
 #endif
 
       // When we are using non-default rounding modes or FP exception behavior
-      // FP operations are represented by StrictFP pseudo-operations.  They
-      // need to be simplified here so that the target-specific instruction
-      // selectors know how to handle them.
-      //
-      // If the current node is a strict FP pseudo-op, the isStrictFPOp()
-      // function will provide the corresponding normal FP opcode to which the
-      // node should be mutated.
-      //
-      // FIXME: The backends need a way to handle FP constraints.
-      if (Node->isStrictFPOpcode())
+      // FP operations are represented by StrictFP pseudo-operations.  For
+      // targets that do not (yet) understand strict FP operations directly,
+      // we convert them to normal FP opcodes instead at this point.  This
+      // will allow them to be handled by existing target-specific instruction
+      // selectors.
+      if (Node->isStrictFPOpcode() &&
+          (TLI->getOperationAction(Node->getOpcode(), Node->getValueType(0))
+           != TargetLowering::Legal))
         Node = CurDAG->mutateStrictFPToFP(Node);
 
       LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: ";
@@ -1228,77 +1289,6 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
          !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
 }
 
-/// Set up SwiftErrorVals by going through the function. If the function has
-/// swifterror argument, it will be the first entry.
-static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI,
-                                FunctionLoweringInfo *FuncInfo) {
-  if (!TLI->supportSwiftError())
-    return;
-
-  FuncInfo->SwiftErrorVals.clear();
-  FuncInfo->SwiftErrorVRegDefMap.clear();
-  FuncInfo->SwiftErrorVRegUpwardsUse.clear();
-  FuncInfo->SwiftErrorVRegDefUses.clear();
-  FuncInfo->SwiftErrorArg = nullptr;
-
-  // Check if function has a swifterror argument.
-  bool HaveSeenSwiftErrorArg = false;
-  for (Function::const_arg_iterator AI = Fn.arg_begin(), AE = Fn.arg_end();
-       AI != AE; ++AI)
-    if (AI->hasSwiftErrorAttr()) {
-      assert(!HaveSeenSwiftErrorArg &&
-             "Must have only one swifterror parameter");
-      (void)HaveSeenSwiftErrorArg; // silence warning.
-      HaveSeenSwiftErrorArg = true;
-      FuncInfo->SwiftErrorArg = &*AI;
-      FuncInfo->SwiftErrorVals.push_back(&*AI);
-    }
-
-  for (const auto &LLVMBB : Fn)
-    for (const auto &Inst : LLVMBB) {
-      if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&Inst))
-        if (Alloca->isSwiftError())
-          FuncInfo->SwiftErrorVals.push_back(Alloca);
-    }
-}
-
-static void createSwiftErrorEntriesInEntryBlock(FunctionLoweringInfo *FuncInfo,
-                                                FastISel *FastIS,
-                                                const TargetLowering *TLI,
-                                                const TargetInstrInfo *TII,
-                                                SelectionDAGBuilder *SDB) {
-  if (!TLI->supportSwiftError())
-    return;
-
-  // We only need to do this when we have swifterror parameter or swifterror
-  // alloc.
-  if (FuncInfo->SwiftErrorVals.empty())
-    return;
-
-  assert(FuncInfo->MBB == &*FuncInfo->MF->begin() &&
-         "expected to insert into entry block");
-  auto &DL = FuncInfo->MF->getDataLayout();
-  auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
-  for (const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
-    // We will always generate a copy from the argument. It is always used at
-    // least by the 'return' of the swifterror.
-    if (FuncInfo->SwiftErrorArg && FuncInfo->SwiftErrorArg == SwiftErrorVal)
-      continue;
-    unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
-    // Assign Undef to Vreg. We construct MI directly to make sure it works
-    // with FastISel.
-    BuildMI(*FuncInfo->MBB, FuncInfo->MBB->getFirstNonPHI(),
-            SDB->getCurDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
-            VReg);
-
-    // Keep FastIS informed about the value we just inserted.
-    if (FastIS)
-      FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
-
-    FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorVal, VReg);
-  }
-}
-
 /// Collect llvm.dbg.declare information. This is done after argument lowering
 /// in case the declarations refer to arguments.
 static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
@@ -1337,202 +1327,13 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
 
       DIExpression *Expr = DI->getExpression();
       if (Offset.getBoolValue())
-        Expr = DIExpression::prepend(Expr, DIExpression::NoDeref,
+        Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset,
                                      Offset.getZExtValue());
       MF->setVariableDbgInfo(DI->getVariable(), Expr, FI, DI->getDebugLoc());
     }
   }
 }
 
-/// Propagate swifterror values through the machine function CFG.
-static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
-  auto *TLI = FuncInfo->TLI;
-  if (!TLI->supportSwiftError())
-    return;
-
-  // We only need to do this when we have swifterror parameter or swifterror
-  // alloc.
-  if (FuncInfo->SwiftErrorVals.empty())
-    return;
-
-  // For each machine basic block in reverse post order.
-  ReversePostOrderTraversal<MachineFunction *> RPOT(FuncInfo->MF);
-  for (MachineBasicBlock *MBB : RPOT) {
-    // For each swifterror value in the function.
-    for(const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
-      auto Key = std::make_pair(MBB, SwiftErrorVal);
-      auto UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key);
-      auto VRegDefIt = FuncInfo->SwiftErrorVRegDefMap.find(Key);
-      bool UpwardsUse = UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end();
-      unsigned UUseVReg = UpwardsUse ? UUseIt->second : 0;
-      bool DownwardDef = VRegDefIt != FuncInfo->SwiftErrorVRegDefMap.end();
-      assert(!(UpwardsUse && !DownwardDef) &&
-             "We can't have an upwards use but no downwards def");
-
-      // If there is no upwards exposed use and an entry for the swifterror in
-      // the def map for this value we don't need to do anything: We already
-      // have a downward def for this basic block.
-      if (!UpwardsUse && DownwardDef)
-        continue;
-
-      // Otherwise we either have an upwards exposed use vreg that we need to
-      // materialize or need to forward the downward def from predecessors.
-
-      // Check whether we have a single vreg def from all predecessors.
-      // Otherwise we need a phi.
-      SmallVector<std::pair<MachineBasicBlock *, unsigned>, 4> VRegs;
-      SmallSet<const MachineBasicBlock*, 8> Visited;
-      for (auto *Pred : MBB->predecessors()) {
-        if (!Visited.insert(Pred).second)
-          continue;
-        VRegs.push_back(std::make_pair(
-            Pred, FuncInfo->getOrCreateSwiftErrorVReg(Pred, SwiftErrorVal)));
-        if (Pred != MBB)
-          continue;
-        // We have a self-edge.
-        // If there was no upwards use in this basic block there is now one: the
-        // phi needs to use it self.
-        if (!UpwardsUse) {
-          UpwardsUse = true;
-          UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key);
-          assert(UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end());
-          UUseVReg = UUseIt->second;
-        }
-      }
-
-      // We need a phi node if we have more than one predecessor with different
-      // downward defs.
-      bool needPHI =
-          VRegs.size() >= 1 &&
-          std::find_if(
-              VRegs.begin(), VRegs.end(),
-              [&](const std::pair<const MachineBasicBlock *, unsigned> &V)
-                  -> bool { return V.second != VRegs[0].second; }) !=
-              VRegs.end();
-
-      // If there is no upwards exposed used and we don't need a phi just
-      // forward the swifterror vreg from the predecessor(s).
-      if (!UpwardsUse && !needPHI) {
-        assert(!VRegs.empty() &&
-               "No predecessors? The entry block should bail out earlier");
-        // Just forward the swifterror vreg from the predecessor(s).
-        FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, VRegs[0].second);
-        continue;
-      }
-
-      auto DLoc = isa<Instruction>(SwiftErrorVal)
-                      ? cast<Instruction>(SwiftErrorVal)->getDebugLoc()
-                      : DebugLoc();
-      const auto *TII = FuncInfo->MF->getSubtarget().getInstrInfo();
-
-      // If we don't need a phi create a copy to the upward exposed vreg.
-      if (!needPHI) {
-        assert(UpwardsUse);
-        assert(!VRegs.empty() &&
-               "No predecessors?  Is the Calling Convention correct?");
-        unsigned DestReg = UUseVReg;
-        BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY),
-                DestReg)
-            .addReg(VRegs[0].second);
-        continue;
-      }
-
-      // We need a phi: if there is an upwards exposed use we already have a
-      // destination virtual register number otherwise we generate a new one.
-      auto &DL = FuncInfo->MF->getDataLayout();
-      auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
-      unsigned PHIVReg =
-          UpwardsUse ? UUseVReg
-                     : FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
-      MachineInstrBuilder SwiftErrorPHI =
-          BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc,
-                  TII->get(TargetOpcode::PHI), PHIVReg);
-      for (auto BBRegPair : VRegs) {
-        SwiftErrorPHI.addReg(BBRegPair.second).addMBB(BBRegPair.first);
-      }
-
-      // We did not have a definition in this block before: store the phi's vreg
-      // as this block downward exposed def.
-      if (!UpwardsUse)
-        FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, PHIVReg);
-    }
-  }
-}
-
-static void preassignSwiftErrorRegs(const TargetLowering *TLI,
-                                    FunctionLoweringInfo *FuncInfo,
-                                    BasicBlock::const_iterator Begin,
-                                    BasicBlock::const_iterator End) {
-  if (!TLI->supportSwiftError() || FuncInfo->SwiftErrorVals.empty())
-    return;
-
-  // Iterator over instructions and assign vregs to swifterror defs and uses.
-  for (auto It = Begin; It != End; ++It) {
-    ImmutableCallSite CS(&*It);
-    if (CS) {
-      // A call-site with a swifterror argument is both use and def.
-      const Value *SwiftErrorAddr = nullptr;
-      for (auto &Arg : CS.args()) {
-        if (!Arg->isSwiftError())
-          continue;
-        // Use of swifterror.
-        assert(!SwiftErrorAddr && "Cannot have multiple swifterror arguments");
-        SwiftErrorAddr = &*Arg;
-        assert(SwiftErrorAddr->isSwiftError() &&
-               "Must have a swifterror value argument");
-        unsigned VReg; bool CreatedReg;
-        std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt(
-          &*It, FuncInfo->MBB, SwiftErrorAddr);
-        assert(CreatedReg);
-      }
-      if (!SwiftErrorAddr)
-        continue;
-
-      // Def of swifterror.
-      unsigned VReg; bool CreatedReg;
-      std::tie(VReg, CreatedReg) =
-          FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It);
-      assert(CreatedReg);
-      FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg);
-
-    // A load is a use.
-    } else if (const LoadInst *LI = dyn_cast<const LoadInst>(&*It)) {
-      const Value *V = LI->getOperand(0);
-      if (!V->isSwiftError())
-        continue;
-
-      unsigned VReg; bool CreatedReg;
-      std::tie(VReg, CreatedReg) =
-          FuncInfo->getOrCreateSwiftErrorVRegUseAt(LI, FuncInfo->MBB, V);
-      assert(CreatedReg);
-
-    // A store is a def.
-    } else if (const StoreInst *SI = dyn_cast<const StoreInst>(&*It)) {
-      const Value *SwiftErrorAddr = SI->getOperand(1);
-      if (!SwiftErrorAddr->isSwiftError())
-        continue;
-
-      // Def of swifterror.
-      unsigned VReg; bool CreatedReg;
-      std::tie(VReg, CreatedReg) =
-          FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It);
-      assert(CreatedReg);
-      FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg);
-
-    // A return in a swiferror returning function is a use.
-    } else if (const ReturnInst *R = dyn_cast<const ReturnInst>(&*It)) {
-      const Function *F = R->getParent()->getParent();
-      if(!F->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
-        continue;
-
-      unsigned VReg; bool CreatedReg;
-      std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt(
-          R, FuncInfo->MBB, FuncInfo->SwiftErrorArg);
-      assert(CreatedReg);
-    }
-  }
-}
-
 void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   FastISelFailed = false;
   // Initialize the Fast-ISel state, if needed.
@@ -1542,8 +1343,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
     FastIS = TLI->createFastISel(*FuncInfo, LibInfo);
   }
 
-  setupSwiftErrorVals(Fn, TLI, FuncInfo);
-
   ReversePostOrderTraversal<const Function*> RPOT(&Fn);
 
   // Lower arguments up front. An RPO iteration always visits the entry block
@@ -1589,7 +1388,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
     else
       FastIS->setLastLocalValue(nullptr);
   }
-  createSwiftErrorEntriesInEntryBlock(FuncInfo, FastIS, TLI, TII, SDB);
+
+  bool Inserted = SwiftError->createEntriesInEntryBlock(SDB->getCurDebugLoc());
+
+  if (FastIS && Inserted)
+    FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
 
   processDbgDeclares(FuncInfo);
 
@@ -1644,7 +1447,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
       unsigned NumFastIselRemaining = std::distance(Begin, End);
 
       // Pre-assign swifterror vregs.
-      preassignSwiftErrorRegs(TLI, FuncInfo, Begin, End);
+      SwiftError->preassignVRegs(FuncInfo->MBB, Begin, End);
 
       // Do FastISel on as many instructions as possible.
       for (; BI != Begin; --BI) {
@@ -1692,7 +1495,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
         // to keep track of gc-relocates for a particular gc-statepoint. This is
         // done by SelectionDAGBuilder::LowerAsSTATEPOINT, called before
         // visitGCRelocate.
-        if (isa<CallInst>(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst)) {
+        if (isa<CallInst>(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst) &&
+            !isGCResult(Inst)) {
           OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
                                      Inst->getDebugLoc(), LLVMBB);
 
@@ -1712,7 +1516,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
               !Inst->use_empty()) {
             unsigned &R = FuncInfo->ValueMap[Inst];
             if (!R)
-              R = FuncInfo->CreateRegs(Inst->getType());
+              R = FuncInfo->CreateRegs(Inst);
           }
 
           bool HadTailCall = false;
@@ -1799,7 +1603,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
 
   SP.copyToMachineFrameInfo(MF->getFrameInfo());
 
-  propagateSwiftErrorVRegs(FuncInfo);
+  SwiftError->propagateVRegs();
 
   delete FastIS;
   SDB->clearDanglingDebugInfo();
@@ -1969,7 +1773,7 @@ SelectionDAGISel::FinishBasicBlock() {
   }
 
   // Lower each BitTestBlock.
-  for (auto &BTB : SDB->BitTestCases) {
+  for (auto &BTB : SDB->SL->BitTestCases) {
     // Lower header first, if it wasn't already lowered
     if (!BTB.Emitted) {
       // Set the current basic block to the mbb we wish to insert the code into
@@ -2050,30 +1854,30 @@ SelectionDAGISel::FinishBasicBlock() {
       }
     }
   }
-  SDB->BitTestCases.clear();
+  SDB->SL->BitTestCases.clear();
 
   // If the JumpTable record is filled in, then we need to emit a jump table.
   // Updating the PHI nodes is tricky in this case, since we need to determine
   // whether the PHI is a successor of the range check MBB or the jump table MBB
-  for (unsigned i = 0, e = SDB->JTCases.size(); i != e; ++i) {
+  for (unsigned i = 0, e = SDB->SL->JTCases.size(); i != e; ++i) {
     // Lower header first, if it wasn't already lowered
-    if (!SDB->JTCases[i].first.Emitted) {
+    if (!SDB->SL->JTCases[i].first.Emitted) {
       // Set the current basic block to the mbb we wish to insert the code into
-      FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB;
+      FuncInfo->MBB = SDB->SL->JTCases[i].first.HeaderBB;
       FuncInfo->InsertPt = FuncInfo->MBB->end();
       // Emit the code
-      SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first,
-                                FuncInfo->MBB);
+      SDB->visitJumpTableHeader(SDB->SL->JTCases[i].second,
+                                SDB->SL->JTCases[i].first, FuncInfo->MBB);
       CurDAG->setRoot(SDB->getRoot());
       SDB->clear();
       CodeGenAndEmitDAG();
     }
 
     // Set the current basic block to the mbb we wish to insert the code into
-    FuncInfo->MBB = SDB->JTCases[i].second.MBB;
+    FuncInfo->MBB = SDB->SL->JTCases[i].second.MBB;
     FuncInfo->InsertPt = FuncInfo->MBB->end();
     // Emit the code
-    SDB->visitJumpTable(SDB->JTCases[i].second);
+    SDB->visitJumpTable(SDB->SL->JTCases[i].second);
     CurDAG->setRoot(SDB->getRoot());
     SDB->clear();
     CodeGenAndEmitDAG();
@@ -2086,31 +1890,31 @@ SelectionDAGISel::FinishBasicBlock() {
       assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
       // "default" BB. We can go there only from header BB.
-      if (PHIBB == SDB->JTCases[i].second.Default)
+      if (PHIBB == SDB->SL->JTCases[i].second.Default)
         PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
-           .addMBB(SDB->JTCases[i].first.HeaderBB);
+           .addMBB(SDB->SL->JTCases[i].first.HeaderBB);
       // JT BB. Just iterate over successors here
       if (FuncInfo->MBB->isSuccessor(PHIBB))
         PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(FuncInfo->MBB);
     }
   }
-  SDB->JTCases.clear();
+  SDB->SL->JTCases.clear();
 
   // If we generated any switch lowering information, build and codegen any
   // additional DAGs necessary.
-  for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
+  for (unsigned i = 0, e = SDB->SL->SwitchCases.size(); i != e; ++i) {
     // Set the current basic block to the mbb we wish to insert the code into
-    FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
+    FuncInfo->MBB = SDB->SL->SwitchCases[i].ThisBB;
     FuncInfo->InsertPt = FuncInfo->MBB->end();
 
     // Determine the unique successors.
     SmallVector<MachineBasicBlock *, 2> Succs;
-    Succs.push_back(SDB->SwitchCases[i].TrueBB);
-    if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB)
-      Succs.push_back(SDB->SwitchCases[i].FalseBB);
+    Succs.push_back(SDB->SL->SwitchCases[i].TrueBB);
+    if (SDB->SL->SwitchCases[i].TrueBB != SDB->SL->SwitchCases[i].FalseBB)
+      Succs.push_back(SDB->SL->SwitchCases[i].FalseBB);
 
     // Emit the code. Note that this could result in FuncInfo->MBB being split.
-    SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB);
+    SDB->visitSwitchCase(SDB->SL->SwitchCases[i], FuncInfo->MBB);
     CurDAG->setRoot(SDB->getRoot());
     SDB->clear();
     CodeGenAndEmitDAG();
@@ -2146,7 +1950,7 @@ SelectionDAGISel::FinishBasicBlock() {
       }
     }
   }
-  SDB->SwitchCases.clear();
+  SDB->SL->SwitchCases.clear();
 }
 
 /// Create the scheduler. If a specific scheduler was specified
@@ -2413,14 +2217,14 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
   return !findNonImmUse(Root, N.getNode(), U, IgnoreChains);
 }
 
-void SelectionDAGISel::Select_INLINEASM(SDNode *N) {
+void SelectionDAGISel::Select_INLINEASM(SDNode *N, bool Branch) {
   SDLoc DL(N);
 
   std::vector<SDValue> Ops(N->op_begin(), N->op_end());
   SelectInlineAsmMemoryOperands(Ops, DL);
 
   const EVT VTs[] = {MVT::Other, MVT::Glue};
-  SDValue New = CurDAG->getNode(ISD::INLINEASM, DL, VTs, Ops);
+  SDValue New = CurDAG->getNode(Branch ? ISD::INLINEASM_BR : ISD::INLINEASM, DL, VTs, Ops);
   New->setNodeId(-1);
   ReplaceUses(N, New.getNode());
   CurDAG->RemoveDeadNode(N);
@@ -2727,6 +2531,14 @@ CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
       (ISD::CondCode)MatcherTable[MatcherIndex++];
 }
 
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckChild2CondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+                    SDValue N) {
+  if (2 >= N.getNumOperands())
+    return false;
+  return ::CheckCondCode(MatcherTable, MatcherIndex, N.getOperand(2));
+}
+
 LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
 CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
                SDValue N, const TargetLowering *TLI, const DataLayout &DL) {
@@ -2842,6 +2654,9 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
   case SelectionDAGISel::OPC_CheckCondCode:
     Result = !::CheckCondCode(Table, Index, N);
     return Index;
+  case SelectionDAGISel::OPC_CheckChild2CondCode:
+    Result = !::CheckChild2CondCode(Table, Index, N);
+    return Index;
   case SelectionDAGISel::OPC_CheckValueType:
     Result = !::CheckValueType(Table, Index, N, SDISel.TLI,
                                SDISel.CurDAG->getDataLayout());
@@ -2970,7 +2785,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
     CurDAG->RemoveDeadNode(NodeToMatch);
     return;
   case ISD::INLINEASM:
-    Select_INLINEASM(NodeToMatch);
+  case ISD::INLINEASM_BR:
+    Select_INLINEASM(NodeToMatch,
+                     NodeToMatch->getOpcode() == ISD::INLINEASM_BR);
     return;
   case ISD::READ_REGISTER:
     Select_READ_REGISTER(NodeToMatch);
@@ -3328,6 +3145,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
     case OPC_CheckCondCode:
       if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
       continue;
+    case OPC_CheckChild2CondCode:
+      if (!::CheckChild2CondCode(MatcherTable, MatcherIndex, N)) break;
+      continue;
     case OPC_CheckValueType:
       if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI,
                             CurDAG->getDataLayout()))
@@ -3348,6 +3168,12 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
     case OPC_CheckOrImm:
       if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
       continue;
+    case OPC_CheckImmAllOnesV:
+      if (!ISD::isBuildVectorAllOnes(N.getNode())) break;
+      continue;
+    case OPC_CheckImmAllZerosV:
+      if (!ISD::isBuildVectorAllZeros(N.getNode())) break;
+      continue;
 
     case OPC_CheckFoldableChainNode: {
       assert(NodeStack.size() != 1 && "No parent node");
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 3b19bff4743d..cdc09d59f6a4 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -1,9 +1,8 @@
 //===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
index 3a283bc5fdc0..3a2df6f60593 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
@@ -1,9 +1,8 @@
 //===- SelectionDAGTargetInfo.cpp - SelectionDAG Info ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 90a1b350fc94..395e9a8a4fc5 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -1,9 +1,8 @@
 //===- StatepointLowering.cpp - SDAGBuilder's statepoint code -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -348,16 +347,28 @@ static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo(
   return std::make_pair(ReturnValue, CallEnd->getOperand(0).getNode());
 }
 
+static MachineMemOperand* getMachineMemOperand(MachineFunction &MF,
+                                               FrameIndexSDNode &FI) {
+  auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI.getIndex());
+  auto MMOFlags = MachineMemOperand::MOStore |
+    MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
+  auto &MFI = MF.getFrameInfo();
+  return MF.getMachineMemOperand(PtrInfo, MMOFlags, 
+                                 MFI.getObjectSize(FI.getIndex()),
+                                 MFI.getObjectAlignment(FI.getIndex()));
+}
+
 /// Spill a value incoming to the statepoint. It might be either part of
 /// vmstate
 /// or gcstate. In both cases unconditionally spill it on the stack unless it
 /// is a null constant. Return pair with first element being frame index
 /// containing saved value and second element with outgoing chain from the
 /// emitted store
-static std::pair<SDValue, SDValue>
+static std::tuple<SDValue, SDValue, MachineMemOperand*>
 spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
                              SelectionDAGBuilder &Builder) {
   SDValue Loc = Builder.StatepointLowering.getLocation(Incoming);
+  MachineMemOperand* MMO = nullptr;
 
   // Emit new store if we didn't do it for this ptr before
   if (!Loc.getNode()) {
@@ -367,10 +378,6 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
     // We use TargetFrameIndex so that isel will not select it into LEA
     Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy());
 
-    // TODO: We can create TokenFactor node instead of
-    //       chaining stores one after another, this may allow
-    //       a bit more optimal scheduling for them
-
 #ifndef NDEBUG
     // Right now we always allocate spill slots that are of the same
     // size as the value we're about to spill (the size of spillee can
@@ -382,15 +389,18 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
            "Bad spill:  stack slot does not match!");
 #endif
 
+    auto &MF = Builder.DAG.getMachineFunction();
+    auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);
     Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc,
-                                 MachinePointerInfo::getFixedStack(
-                                     Builder.DAG.getMachineFunction(), Index));
+                                 PtrInfo);
 
+    MMO = getMachineMemOperand(MF, *cast<FrameIndexSDNode>(Loc));
+    
     Builder.StatepointLowering.setLocation(Incoming, Loc);
   }
 
   assert(Loc.getNode());
-  return std::make_pair(Loc, Chain);
+  return std::make_tuple(Loc, Chain, MMO);
 }
 
 /// Lower a single value incoming to a statepoint node.  This value can be
@@ -398,7 +408,11 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
 /// case constants and allocas, then fall back to spilling if required.
 static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
                                          SmallVectorImpl<SDValue> &Ops,
+                                         SmallVectorImpl<MachineMemOperand*> &MemRefs,
                                          SelectionDAGBuilder &Builder) {
+  // Note: We know all of these spills are independent, but don't bother to
+  // exploit that chain wise.  DAGCombine will happily do so as needed, so
+  // doing it here would be a small compile time win at most.
   SDValue Chain = Builder.getRoot();
 
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) {
@@ -417,6 +431,11 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
            "Incoming value is a frame index!");
     Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
                                                   Builder.getFrameIndexTy()));
+
+    auto &MF = Builder.DAG.getMachineFunction();
+    auto *MMO = getMachineMemOperand(MF, *FI);
+    MemRefs.push_back(MMO);
+    
   } else if (LiveInOnly) {
     // If this value is live in (not live-on-return, or live-through), we can
     // treat it the same way patchpoint treats it's "live in" values.  We'll
@@ -433,8 +452,10 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
     // need to be optional since it requires a lot of complexity on the
     // runtime side which not all would support.
     auto Res = spillIncomingStatepointValue(Incoming, Chain, Builder);
-    Ops.push_back(Res.first);
-    Chain = Res.second;
+    Ops.push_back(std::get<0>(Res));
+    if (auto *MMO = std::get<2>(Res))
+      MemRefs.push_back(MMO);
+    Chain = std::get<1>(Res);;
   }
 
   Builder.DAG.setRoot(Chain);
@@ -449,7 +470,7 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
 /// will be set to the last value spilled (if any were).
 static void
 lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
-                        SelectionDAGBuilder::StatepointLoweringInfo &SI,
+                        SmallVectorImpl<MachineMemOperand*> &MemRefs,                                    SelectionDAGBuilder::StatepointLoweringInfo &SI,
                         SelectionDAGBuilder &Builder) {
   // Lower the deopt and gc arguments for this statepoint.  Layout will be:
   // deopt argument length, deopt arguments.., gc arguments...
@@ -533,7 +554,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
     if (!Incoming.getNode())
       Incoming = Builder.getValue(V);
     const bool LiveInValue = LiveInDeopt && !isGCValue(V);
-    lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, Builder);
+    lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, MemRefs, Builder);
   }
 
   // Finally, go ahead and lower all the gc arguments.  There's no prefixed
@@ -544,11 +565,11 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   for (unsigned i = 0; i < SI.Bases.size(); ++i) {
     const Value *Base = SI.Bases[i];
     lowerIncomingStatepointValue(Builder.getValue(Base), /*LiveInOnly*/ false,
-                                 Ops, Builder);
+                                 Ops, MemRefs, Builder);
 
     const Value *Ptr = SI.Ptrs[i];
     lowerIncomingStatepointValue(Builder.getValue(Ptr), /*LiveInOnly*/ false,
-                                 Ops, Builder);
+                                 Ops, MemRefs, Builder);
   }
 
   // If there are any explicit spill slots passed to the statepoint, record
@@ -564,6 +585,10 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
              "Incoming value is a frame index!");
       Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
                                                     Builder.getFrameIndexTy()));
+
+      auto &MF = Builder.DAG.getMachineFunction();
+      auto *MMO = getMachineMemOperand(MF, *FI);
+      MemRefs.push_back(MMO);
     }
   }
 
@@ -630,7 +655,8 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
 
   // Lower statepoint vmstate and gcstate arguments
   SmallVector<SDValue, 10> LoweredMetaArgs;
-  lowerStatepointMetaArgs(LoweredMetaArgs, SI, *this);
+  SmallVector<MachineMemOperand*, 16> MemRefs;
+  lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, SI, *this);
 
   // Now that we've emitted the spills, we need to update the root so that the
   // call sequence is ordered correctly.
@@ -746,8 +772,9 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
   // input.  This allows someone else to chain off us as needed.
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
 
-  SDNode *StatepointMCNode =
-      DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);
+  MachineSDNode *StatepointMCNode =
+    DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);
+  DAG.setNodeMemRefs(StatepointMCNode, MemRefs);
 
   SDNode *SinkNode = StatepointMCNode;
 
@@ -799,7 +826,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
 void
 SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
                                      const BasicBlock *EHPadBB /*= nullptr*/) {
-  assert(ISP.getCallSite().getCallingConv() != CallingConv::AnyReg &&
+  assert(ISP.getCall()->getCallingConv() != CallingConv::AnyReg &&
          "anyregcc is not supported on statepoints!");
 
 #ifndef NDEBUG
@@ -832,7 +859,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
   }
 
   StatepointLoweringInfo SI(DAG);
-  populateCallLoweringInfo(SI.CLI, ISP.getCallSite(),
+  populateCallLoweringInfo(SI.CLI, ISP.getCall(),
                            ImmutableStatepoint::CallArgsBeginPos,
                            ISP.getNumCallArgs(), ActualCallee,
                            ISP.getActualReturnType(), false /* IsPatchPoint */);
@@ -859,7 +886,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
   const GCResultInst *GCResult = ISP.getGCResult();
   Type *RetTy = ISP.getActualReturnType();
   if (!RetTy->isVoidTy() && GCResult) {
-    if (GCResult->getParent() != ISP.getCallSite().getParent()) {
+    if (GCResult->getParent() != ISP.getCall()->getParent()) {
       // Result value will be used in a different basic block so we need to
       // export it now.  Default exporting mechanism will not work here because
       // statepoint call has a different type than the actual call. It means
@@ -871,7 +898,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
       unsigned Reg = FuncInfo.CreateRegs(RetTy);
       RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
                        DAG.getDataLayout(), Reg, RetTy,
-                       ISP.getCallSite().getCallingConv());
+                       ISP.getCall()->getCallingConv());
       SDValue Chain = DAG.getEntryNode();
 
       RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr);
@@ -891,22 +918,22 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
 }
 
 void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
-    ImmutableCallSite CS, SDValue Callee, const BasicBlock *EHPadBB,
+    const CallBase *Call, SDValue Callee, const BasicBlock *EHPadBB,
     bool VarArgDisallowed, bool ForceVoidReturnTy) {
   StatepointLoweringInfo SI(DAG);
-  unsigned ArgBeginIndex = CS.arg_begin() - CS.getInstruction()->op_begin();
+  unsigned ArgBeginIndex = Call->arg_begin() - Call->op_begin();
   populateCallLoweringInfo(
-      SI.CLI, CS, ArgBeginIndex, CS.getNumArgOperands(), Callee,
-      ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : CS.getType(),
+      SI.CLI, Call, ArgBeginIndex, Call->getNumArgOperands(), Callee,
+      ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : Call->getType(),
       false);
   if (!VarArgDisallowed)
-    SI.CLI.IsVarArg = CS.getFunctionType()->isVarArg();
+    SI.CLI.IsVarArg = Call->getFunctionType()->isVarArg();
 
-  auto DeoptBundle = *CS.getOperandBundle(LLVMContext::OB_deopt);
+  auto DeoptBundle = *Call->getOperandBundle(LLVMContext::OB_deopt);
 
   unsigned DefaultID = StatepointDirectives::DeoptBundleStatepointID;
 
-  auto SD = parseStatepointDirectivesFromAttrs(CS.getAttributes());
+  auto SD = parseStatepointDirectivesFromAttrs(Call->getAttributes());
   SI.ID = SD.StatepointID.getValueOr(DefaultID);
   SI.NumPatchBytes = SD.NumPatchBytes.getValueOr(0);
 
@@ -918,15 +945,14 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
   // NB! The GC arguments are deliberately left empty.
 
   if (SDValue ReturnVal = LowerAsSTATEPOINT(SI)) {
-    const Instruction *Inst = CS.getInstruction();
-    ReturnVal = lowerRangeToAssertZExt(DAG, *Inst, ReturnVal);
-    setValue(Inst, ReturnVal);
+    ReturnVal = lowerRangeToAssertZExt(DAG, *Call, ReturnVal);
+    setValue(Call, ReturnVal);
   }
 }
 
 void SelectionDAGBuilder::LowerCallSiteWithDeoptBundle(
-    ImmutableCallSite CS, SDValue Callee, const BasicBlock *EHPadBB) {
-  LowerCallSiteWithDeoptBundleImpl(CS, Callee, EHPadBB,
+    const CallBase *Call, SDValue Callee, const BasicBlock *EHPadBB) {
+  LowerCallSiteWithDeoptBundleImpl(Call, Callee, EHPadBB,
                                    /* VarArgDisallowed = */ false,
                                    /* ForceVoidReturnTy  = */ false);
 }
@@ -986,11 +1012,11 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
   }
 
   SDValue SpillSlot =
-      DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy());
+    DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy());
 
-  // Be conservative: flush all pending loads
-  // TODO: Probably we can be less restrictive on this,
-  // it may allow more scheduling opportunities.
+  // Note: We know all of these reloads are independent, but don't bother to
+  // exploit that chain wise.  DAGCombine will happily do so as needed, so
+  // doing it here would be a small compile time win at most.
   SDValue Chain = getRoot();
 
   SDValue SpillLoad =
@@ -1000,7 +1026,6 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
                   MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
                                                     *DerivedPtrLocation));
 
-  // Again, be conservative, don't emit pending loads
   DAG.setRoot(SpillLoad.getValue(1));
 
   assert(SpillLoad.getNode());
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.h b/lib/CodeGen/SelectionDAG/StatepointLowering.h
index 372c82a359f6..70507932681d 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.h
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.h
@@ -1,9 +1,8 @@
 //===- StatepointLowering.h - SDAGBuilder's statepoint code ---*- C++ -*---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -67,13 +66,18 @@ public:
   /// before the next statepoint.  If we don't see it, we'll report
   /// an assertion.
   void scheduleRelocCall(const CallInst &RelocCall) {
-    PendingGCRelocateCalls.push_back(&RelocCall);
+    // We are not interested in lowering dead instructions.
+    if (!RelocCall.use_empty())
+      PendingGCRelocateCalls.push_back(&RelocCall);
   }
 
   /// Remove this gc_relocate from the list we're expecting to see
   /// before the next statepoint.  If we weren't expecting to see
   /// it, we'll report an assertion.
   void relocCallVisited(const CallInst &RelocCall) {
+    // We are not interested in lowering dead instructions.
+    if (RelocCall.use_empty())
+      return;
     auto I = llvm::find(PendingGCRelocateCalls, &RelocCall);
     assert(I != PendingGCRelocateCalls.end() &&
            "Visited unexpected gcrelocate call");
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a2f05c1e3cef..b260cd91d468 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1,9 +1,8 @@
 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -100,19 +99,22 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
 
 /// Set CallLoweringInfo attribute flags based on a call instruction
 /// and called function attributes.
-void TargetLoweringBase::ArgListEntry::setAttributes(ImmutableCallSite *CS,
+void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
                                                      unsigned ArgIdx) {
-  IsSExt = CS->paramHasAttr(ArgIdx, Attribute::SExt);
-  IsZExt = CS->paramHasAttr(ArgIdx, Attribute::ZExt);
-  IsInReg = CS->paramHasAttr(ArgIdx, Attribute::InReg);
-  IsSRet = CS->paramHasAttr(ArgIdx, Attribute::StructRet);
-  IsNest = CS->paramHasAttr(ArgIdx, Attribute::Nest);
-  IsByVal = CS->paramHasAttr(ArgIdx, Attribute::ByVal);
-  IsInAlloca = CS->paramHasAttr(ArgIdx, Attribute::InAlloca);
-  IsReturned = CS->paramHasAttr(ArgIdx, Attribute::Returned);
-  IsSwiftSelf = CS->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
-  IsSwiftError = CS->paramHasAttr(ArgIdx, Attribute::SwiftError);
-  Alignment  = CS->getParamAlignment(ArgIdx);
+  IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
+  IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
+  IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
+  IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
+  IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
+  IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
+  IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
+  IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
+  IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
+  IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
+  Alignment = Call->getParamAlignment(ArgIdx);
+  ByValType = nullptr;
+  if (Call->paramHasAttr(ArgIdx, Attribute::ByVal))
+    ByValType = Call->getParamByValType(ArgIdx);
 }
 
 /// Generate a libcall taking the given operands as arguments and returning a
@@ -121,7 +123,8 @@ std::pair<SDValue, SDValue>
 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
                             ArrayRef<SDValue> Ops, bool isSigned,
                             const SDLoc &dl, bool doesNotReturn,
-                            bool isReturnValueUsed) const {
+                            bool isReturnValueUsed,
+                            bool isPostTypeLegalization) const {
   TargetLowering::ArgListTy Args;
   Args.reserve(Ops.size());
 
@@ -147,11 +150,114 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
       .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
       .setNoReturn(doesNotReturn)
       .setDiscardResult(!isReturnValueUsed)
+      .setIsPostTypeLegalization(isPostTypeLegalization)
       .setSExtResult(signExtend)
       .setZExtResult(!signExtend);
   return LowerCallTo(CLI);
 }
 
+bool
+TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
+                                         unsigned Limit, uint64_t Size,
+                                         unsigned DstAlign, unsigned SrcAlign,
+                                         bool IsMemset,
+                                         bool ZeroMemset,
+                                         bool MemcpyStrSrc,
+                                         bool AllowOverlap,
+                                         unsigned DstAS, unsigned SrcAS,
+                                         const AttributeList &FuncAttributes) const {
+  // If 'SrcAlign' is zero, that means the memory operation does not need to
+  // load the value, i.e. memset or memcpy from constant string. Otherwise,
+  // it's the inferred alignment of the source. 'DstAlign', on the other hand,
+  // is the specified alignment of the memory operation. If it is zero, that
+  // means it's possible to change the alignment of the destination.
+  // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
+  // not need to be loaded.
+  if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
+    return false;
+
+  EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
+                               IsMemset, ZeroMemset, MemcpyStrSrc,
+                               FuncAttributes);
+
+  if (VT == MVT::Other) {
+    // Use the largest integer type whose alignment constraints are satisfied.
+    // We only need to check DstAlign here as SrcAlign is always greater or
+    // equal to DstAlign (or zero).
+    VT = MVT::i64;
+    while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
+           !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
+      VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+    assert(VT.isInteger());
+
+    // Find the largest legal integer type.
+    MVT LVT = MVT::i64;
+    while (!isTypeLegal(LVT))
+      LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
+    assert(LVT.isInteger());
+
+    // If the type we've chosen is larger than the largest legal integer type
+    // then use that instead.
+    if (VT.bitsGT(LVT))
+      VT = LVT;
+  }
+
+  unsigned NumMemOps = 0;
+  while (Size != 0) {
+    unsigned VTSize = VT.getSizeInBits() / 8;
+    while (VTSize > Size) {
+      // For now, only use non-vector load / store's for the left-over pieces.
+      EVT NewVT = VT;
+      unsigned NewVTSize;
+
+      bool Found = false;
+      if (VT.isVector() || VT.isFloatingPoint()) {
+        NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
+        if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
+            isSafeMemOpType(NewVT.getSimpleVT()))
+          Found = true;
+        else if (NewVT == MVT::i64 &&
+                 isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
+                 isSafeMemOpType(MVT::f64)) {
+          // i64 is usually not legal on 32-bit targets, but f64 may be.
+          NewVT = MVT::f64;
+          Found = true;
+        }
+      }
+
+      if (!Found) {
+        do {
+          NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
+          if (NewVT == MVT::i8)
+            break;
+        } while (!isSafeMemOpType(NewVT.getSimpleVT()));
+      }
+      NewVTSize = NewVT.getSizeInBits() / 8;
+
+      // If the new VT cannot cover all of the remaining bits, then consider
+      // issuing a (or a pair of) unaligned and overlapping load / store.
+      bool Fast;
+      if (NumMemOps && AllowOverlap && NewVTSize < Size &&
+          allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign,
+                                         MachineMemOperand::MONone, &Fast) &&
+          Fast)
+        VTSize = Size;
+      else {
+        VT = NewVT;
+        VTSize = NewVTSize;
+      }
+    }
+
+    if (++NumMemOps > Limit)
+      return false;
+
+    MemOps.push_back(VT);
+    Size -= VTSize;
+  }
+
+  return true;
+}
+
 /// Soften the operands of a comparison. This code is shared among BR_CC,
 /// SELECT_CC, and SETCC handlers.
 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
@@ -346,7 +452,6 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 /// return true.
 bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
                                             TargetLoweringOpt &TLO) const {
-  SelectionDAG &DAG = TLO.DAG;
   SDLoc DL(Op);
   unsigned Opcode = Op.getOpcode();
 
@@ -372,8 +477,8 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
 
     if (!C.isSubsetOf(Demanded)) {
       EVT VT = Op.getValueType();
-      SDValue NewC = DAG.getConstant(Demanded & C, DL, VT);
-      SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
+      SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT);
+      SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
       return TLO.CombineTo(Op, NewOp);
     }
 
@@ -487,6 +592,10 @@ bool TargetLowering::SimplifyDemandedBits(
   // Don't know anything.
   Known = KnownBits(BitWidth);
 
+  // Undef operand.
+  if (Op.isUndef())
+    return false;
+
   if (Op.getOpcode() == ISD::Constant) {
     // We know all of the bits for a constant!
     Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
@@ -509,40 +618,116 @@ bool TargetLowering::SimplifyDemandedBits(
     DemandedElts = APInt::getAllOnesValue(NumElts);
   } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
     // Not demanding any bits/elts from Op.
-    if (!Op.isUndef())
-      return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
-    return false;
+    return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
   } else if (Depth == 6) { // Limit search depth.
     return false;
   }
 
   KnownBits Known2, KnownOut;
   switch (Op.getOpcode()) {
+  case ISD::SCALAR_TO_VECTOR: {
+    if (!DemandedElts[0])
+      return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+
+    KnownBits SrcKnown;
+    SDValue Src = Op.getOperand(0);
+    unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
+    APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
+    if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
+      return true;
+    Known = SrcKnown.zextOrTrunc(BitWidth, false);
+    break;
+  }
   case ISD::BUILD_VECTOR:
-    // Collect the known bits that are shared by every constant vector element.
-    Known.Zero.setAllBits(); Known.One.setAllBits();
-    for (SDValue SrcOp : Op->ops()) {
-      if (!isa<ConstantSDNode>(SrcOp)) {
-        // We can only handle all constant values - bail out with no known bits.
-        Known = KnownBits(BitWidth);
-        return false;
-      }
-      Known2.One = cast<ConstantSDNode>(SrcOp)->getAPIntValue();
-      Known2.Zero = ~Known2.One;
-
-      // BUILD_VECTOR can implicitly truncate sources, we must handle this.
-      if (Known2.One.getBitWidth() != BitWidth) {
-        assert(Known2.getBitWidth() > BitWidth &&
-               "Expected BUILD_VECTOR implicit truncation");
-        Known2 = Known2.trunc(BitWidth);
+    // Collect the known bits that are shared by every demanded element.
+    // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
+    Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+    return false; // Don't fall through, will infinitely loop.
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Op);
+    if (getTargetConstantFromLoad(LD)) {
+      Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+      return false; // Don't fall through, will infinitely loop.
+    }
+    break;
+  }
+  case ISD::INSERT_VECTOR_ELT: {
+    SDValue Vec = Op.getOperand(0);
+    SDValue Scl = Op.getOperand(1);
+    auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+    EVT VecVT = Vec.getValueType();
+
+    // If index isn't constant, assume we need all vector elements AND the
+    // inserted element.
+    APInt DemandedVecElts(DemandedElts);
+    if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
+      unsigned Idx = CIdx->getZExtValue();
+      DemandedVecElts.clearBit(Idx);
+
+      // Inserted element is not required.
+      if (!DemandedElts[Idx])
+        return TLO.CombineTo(Op, Vec);
+    }
+
+    KnownBits KnownScl;
+    unsigned NumSclBits = Scl.getScalarValueSizeInBits();
+    APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
+    if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
+      return true;
+
+    Known = KnownScl.zextOrTrunc(BitWidth, false);
+
+    KnownBits KnownVec;
+    if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
+                             Depth + 1))
+      return true;
+
+    if (!!DemandedVecElts) {
+      Known.One &= KnownVec.One;
+      Known.Zero &= KnownVec.Zero;
+    }
+
+    return false;
+  }
+  case ISD::INSERT_SUBVECTOR: {
+    SDValue Base = Op.getOperand(0);
+    SDValue Sub = Op.getOperand(1);
+    EVT SubVT = Sub.getValueType();
+    unsigned NumSubElts = SubVT.getVectorNumElements();
+
+    // If index isn't constant, assume we need the original demanded base
+    // elements and ALL the inserted subvector elements.
+    APInt BaseElts = DemandedElts;
+    APInt SubElts = APInt::getAllOnesValue(NumSubElts);
+    if (isa<ConstantSDNode>(Op.getOperand(2))) {
+      const APInt &Idx = Op.getConstantOperandAPInt(2);
+      if (Idx.ule(NumElts - NumSubElts)) {
+        unsigned SubIdx = Idx.getZExtValue();
+        SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
+        BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
       }
+    }
 
-      // Known bits are the values that are shared by every element.
-      // TODO: support per-element known bits.
-      Known.One &= Known2.One;
-      Known.Zero &= Known2.Zero;
+    KnownBits KnownSub, KnownBase;
+    if (SimplifyDemandedBits(Sub, DemandedBits, SubElts, KnownSub, TLO,
+                             Depth + 1))
+      return true;
+    if (SimplifyDemandedBits(Base, DemandedBits, BaseElts, KnownBase, TLO,
+                             Depth + 1))
+      return true;
+
+    Known.Zero.setAllBits();
+    Known.One.setAllBits();
+    if (!!SubElts) {
+        Known.One &= KnownSub.One;
+        Known.Zero &= KnownSub.Zero;
     }
-    return false; // Don't fall through, will infinitely loop.
+    if (!!BaseElts) {
+        Known.One &= KnownBase.One;
+        Known.Zero &= KnownBase.Zero;
+    }
+    break;
+  }
   case ISD::CONCAT_VECTORS: {
     Known.Zero.setAllBits();
     Known.One.setAllBits();
@@ -640,11 +825,12 @@ bool TargetLowering::SimplifyDemandedBits(
       }
     }
 
-    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
+    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
+                             Depth + 1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
-    if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts, Known2, TLO,
-                             Depth + 1))
+    if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
+                             Known2, TLO, Depth + 1))
       return true;
     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
 
@@ -674,11 +860,12 @@ bool TargetLowering::SimplifyDemandedBits(
     SDValue Op0 = Op.getOperand(0);
     SDValue Op1 = Op.getOperand(1);
 
-    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
+    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
+                             Depth + 1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
-    if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts, Known2, TLO,
-                             Depth + 1))
+    if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
+                             Known2, TLO, Depth + 1))
       return true;
     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
 
@@ -705,10 +892,12 @@ bool TargetLowering::SimplifyDemandedBits(
     SDValue Op0 = Op.getOperand(0);
     SDValue Op1 = Op.getOperand(1);
 
-    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
+    if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
+                             Depth + 1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
-    if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO, Depth + 1))
+    if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
+                             Depth + 1))
       return true;
     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
 
@@ -831,20 +1020,23 @@ bool TargetLowering::SimplifyDemandedBits(
     SDValue Op0 = Op.getOperand(0);
     SDValue Op1 = Op.getOperand(1);
 
-    if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
+    if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
       // If the shift count is an invalid immediate, don't do anything.
       if (SA->getAPIntValue().uge(BitWidth))
         break;
 
       unsigned ShAmt = SA->getZExtValue();
+      if (ShAmt == 0)
+        return TLO.CombineTo(Op, Op0);
 
       // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
       // single shift.  We can do this if the bottom bits (which are shifted
       // out) are never demanded.
+      // TODO - support non-uniform vector amounts.
       if (Op0.getOpcode() == ISD::SRL) {
-        if (ShAmt &&
-            (DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
-          if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) {
+        if ((DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+          if (ConstantSDNode *SA2 =
+                  isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
             if (SA2->getAPIntValue().ult(BitWidth)) {
               unsigned C1 = SA2->getZExtValue();
               unsigned Opc = ISD::SHL;
@@ -862,8 +1054,14 @@ bool TargetLowering::SimplifyDemandedBits(
         }
       }
 
-      if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts, Known, TLO,
-                               Depth + 1))
+      if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts,
+                               Known, TLO, Depth + 1))
+        return true;
+
+      // Try shrinking the operation as long as the shift amount will still be
+      // in range.
+      if ((ShAmt < DemandedBits.getActiveBits()) &&
+          ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
         return true;
 
       // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
@@ -919,12 +1117,16 @@ bool TargetLowering::SimplifyDemandedBits(
     SDValue Op0 = Op.getOperand(0);
     SDValue Op1 = Op.getOperand(1);
 
-    if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
+    if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
       // If the shift count is an invalid immediate, don't do anything.
       if (SA->getAPIntValue().uge(BitWidth))
         break;
 
       unsigned ShAmt = SA->getZExtValue();
+      if (ShAmt == 0)
+        return TLO.CombineTo(Op, Op0);
+
+      EVT ShiftVT = Op1.getValueType();
       APInt InDemandedMask = (DemandedBits << ShAmt);
 
       // If the shift is exact, then it does demand the low bits (and knows that
@@ -935,10 +1137,11 @@ bool TargetLowering::SimplifyDemandedBits(
       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
       // single shift.  We can do this if the top bits (which are shifted out)
       // are never demanded.
+      // TODO - support non-uniform vector amounts.
       if (Op0.getOpcode() == ISD::SHL) {
-        if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) {
-          if (ShAmt &&
-              (DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
+        if (ConstantSDNode *SA2 =
+                isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
+          if ((DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
             if (SA2->getAPIntValue().ult(BitWidth)) {
               unsigned C1 = SA2->getZExtValue();
               unsigned Opc = ISD::SRL;
@@ -948,7 +1151,7 @@ bool TargetLowering::SimplifyDemandedBits(
                 Opc = ISD::SHL;
               }
 
-              SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
+              SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
               return TLO.CombineTo(
                   Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
             }
@@ -957,7 +1160,8 @@ bool TargetLowering::SimplifyDemandedBits(
       }
 
       // Compute the new bits that are at the top now.
-      if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1))
+      if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
+                               Depth + 1))
         return true;
       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
       Known.Zero.lshrInPlace(ShAmt);
@@ -978,12 +1182,15 @@ bool TargetLowering::SimplifyDemandedBits(
     if (DemandedBits.isOneValue())
       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
 
-    if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
+    if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
       // If the shift count is an invalid immediate, don't do anything.
       if (SA->getAPIntValue().uge(BitWidth))
         break;
 
       unsigned ShAmt = SA->getZExtValue();
+      if (ShAmt == 0)
+        return TLO.CombineTo(Op, Op0);
+
       APInt InDemandedMask = (DemandedBits << ShAmt);
 
       // If the shift is exact, then it does demand the low bits (and knows that
@@ -996,7 +1203,8 @@ bool TargetLowering::SimplifyDemandedBits(
       if (DemandedBits.countLeadingZeros() < ShAmt)
         InDemandedMask.setSignBit();
 
-      if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1))
+      if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
+                               Depth + 1))
         return true;
       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
       Known.Zero.lshrInPlace(ShAmt);
@@ -1026,6 +1234,55 @@ bool TargetLowering::SimplifyDemandedBits(
     }
     break;
   }
+  case ISD::FSHL:
+  case ISD::FSHR: {
+    SDValue Op0 = Op.getOperand(0);
+    SDValue Op1 = Op.getOperand(1);
+    SDValue Op2 = Op.getOperand(2);
+    bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
+
+    if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
+      unsigned Amt = SA->getAPIntValue().urem(BitWidth);
+
+      // For fshl, 0-shift returns the 1st arg.
+      // For fshr, 0-shift returns the 2nd arg.
+      if (Amt == 0) {
+        if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
+                                 Known, TLO, Depth + 1))
+          return true;
+        break;
+      }
+
+      // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
+      // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
+      APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
+      APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
+      if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
+                               Depth + 1))
+        return true;
+      if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
+                               Depth + 1))
+        return true;
+
+      Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
+      Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
+      Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
+      Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
+      Known.One |= Known2.One;
+      Known.Zero |= Known2.Zero;
+    }
+    break;
+  }
+  case ISD::BITREVERSE: {
+    SDValue Src = Op.getOperand(0);
+    APInt DemandedSrcBits = DemandedBits.reverseBits();
+    if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
+                             Depth + 1))
+      return true;
+    Known.One = Known2.One.reverseBits();
+    Known.Zero = Known2.Zero.reverseBits();
+    break;
+  }
   case ISD::SIGN_EXTEND_INREG: {
     SDValue Op0 = Op.getOperand(0);
     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
@@ -1033,8 +1290,8 @@ bool TargetLowering::SimplifyDemandedBits(
 
     // If we only care about the highest bit, don't bother shifting right.
     if (DemandedBits.isSignMask()) {
-      bool AlreadySignExtended =
-          TLO.DAG.ComputeNumSignBits(Op0) >= BitWidth - ExVTBits + 1;
+      unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0);
+      bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
       // However if the input is already sign extended we expect the sign
       // extension to be dropped altogether later and do not simplify.
       if (!AlreadySignExtended) {
@@ -1099,79 +1356,116 @@ bool TargetLowering::SimplifyDemandedBits(
       return true;
 
     Known.Zero = KnownLo.Zero.zext(BitWidth) |
-                KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
+                 KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
 
     Known.One = KnownLo.One.zext(BitWidth) |
-               KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
+                KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
     break;
   }
-  case ISD::ZERO_EXTEND: {
+  case ISD::ZERO_EXTEND:
+  case ISD::ZERO_EXTEND_VECTOR_INREG: {
     SDValue Src = Op.getOperand(0);
-    unsigned InBits = Src.getScalarValueSizeInBits();
+    EVT SrcVT = Src.getValueType();
+    unsigned InBits = SrcVT.getScalarSizeInBits();
+    unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+    bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
 
     // If none of the top bits are demanded, convert this into an any_extend.
-    if (DemandedBits.getActiveBits() <= InBits)
-      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src));
+    if (DemandedBits.getActiveBits() <= InBits) {
+      // If we only need the non-extended bits of the bottom element
+      // then we can just bitcast to the result.
+      if (IsVecInReg && DemandedElts == 1 &&
+          VT.getSizeInBits() == SrcVT.getSizeInBits() &&
+          TLO.DAG.getDataLayout().isLittleEndian())
+        return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
+
+      unsigned Opc =
+          IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
+      if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
+        return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
+    }
 
     APInt InDemandedBits = DemandedBits.trunc(InBits);
-    if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth+1))
+    APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
+    if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
+                             Depth + 1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
-    Known = Known.zext(BitWidth);
-    Known.Zero.setBitsFrom(InBits);
+    assert(Known.getBitWidth() == InBits && "Src width has changed?");
+    Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
     break;
   }
-  case ISD::SIGN_EXTEND: {
+  case ISD::SIGN_EXTEND:
+  case ISD::SIGN_EXTEND_VECTOR_INREG: {
     SDValue Src = Op.getOperand(0);
-    unsigned InBits = Src.getScalarValueSizeInBits();
+    EVT SrcVT = Src.getValueType();
+    unsigned InBits = SrcVT.getScalarSizeInBits();
+    unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+    bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
 
     // If none of the top bits are demanded, convert this into an any_extend.
-    if (DemandedBits.getActiveBits() <= InBits)
-      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src));
+    if (DemandedBits.getActiveBits() <= InBits) {
+      // If we only need the non-extended bits of the bottom element
+      // then we can just bitcast to the result.
+      if (IsVecInReg && DemandedElts == 1 &&
+          VT.getSizeInBits() == SrcVT.getSizeInBits() &&
+          TLO.DAG.getDataLayout().isLittleEndian())
+        return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
+
+      unsigned Opc =
+          IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
+      if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
+        return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
+    }
+
+    APInt InDemandedBits = DemandedBits.trunc(InBits);
+    APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
 
     // Since some of the sign extended bits are demanded, we know that the sign
     // bit is demanded.
-    APInt InDemandedBits = DemandedBits.trunc(InBits);
     InDemandedBits.setBit(InBits - 1);
 
-    if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1))
+    if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
+                             Depth + 1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+    assert(Known.getBitWidth() == InBits && "Src width has changed?");
+
     // If the sign bit is known one, the top bits match.
     Known = Known.sext(BitWidth);
 
     // If the sign bit is known zero, convert this to a zero extend.
-    if (Known.isNonNegative())
-      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Src));
+    if (Known.isNonNegative()) {
+      unsigned Opc =
+          IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
+      if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
+        return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
+    }
     break;
   }
-  case ISD::SIGN_EXTEND_VECTOR_INREG: {
-    // TODO - merge this with SIGN_EXTEND above?
+  case ISD::ANY_EXTEND:
+  case ISD::ANY_EXTEND_VECTOR_INREG: {
     SDValue Src = Op.getOperand(0);
-    unsigned InBits = Src.getScalarValueSizeInBits();
-
-    APInt InDemandedBits = DemandedBits.trunc(InBits);
+    EVT SrcVT = Src.getValueType();
+    unsigned InBits = SrcVT.getScalarSizeInBits();
+    unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+    bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
 
-    // If some of the sign extended bits are demanded, we know that the sign
-    // bit is demanded.
-    if (InBits < DemandedBits.getActiveBits())
-      InDemandedBits.setBit(InBits - 1);
+    // If we only need the bottom element then we can just bitcast.
+    // TODO: Handle ANY_EXTEND?
+    if (IsVecInReg && DemandedElts == 1 &&
+        VT.getSizeInBits() == SrcVT.getSizeInBits() &&
+        TLO.DAG.getDataLayout().isLittleEndian())
+      return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
 
-    if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1))
-      return true;
-    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
-    // If the sign bit is known one, the top bits match.
-    Known = Known.sext(BitWidth);
-    break;
-  }
-  case ISD::ANY_EXTEND: {
-    SDValue Src = Op.getOperand(0);
-    unsigned InBits = Src.getScalarValueSizeInBits();
     APInt InDemandedBits = DemandedBits.trunc(InBits);
-    if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth+1))
+    APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
+    if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
+                             Depth + 1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
-    Known = Known.zext(BitWidth);
+    assert(Known.getBitWidth() == InBits && "Src width has changed?");
+    Known = Known.zext(BitWidth, false /* => any extend */);
     break;
   }
   case ISD::TRUNCATE: {
@@ -1198,29 +1492,29 @@ bool TargetLowering::SimplifyDemandedBits(
           // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
           // undesirable.
           break;
-        ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
-        if (!ShAmt)
+
+        auto *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
+        if (!ShAmt || ShAmt->getAPIntValue().uge(BitWidth))
           break;
+
         SDValue Shift = Src.getOperand(1);
-        if (TLO.LegalTypes()) {
-          uint64_t ShVal = ShAmt->getZExtValue();
+        uint64_t ShVal = ShAmt->getZExtValue();
+
+        if (TLO.LegalTypes())
           Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
-        }
 
-        if (ShAmt->getZExtValue() < BitWidth) {
-          APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
-                                                 OperandBitWidth - BitWidth);
-          HighBits.lshrInPlace(ShAmt->getZExtValue());
-          HighBits = HighBits.trunc(BitWidth);
-
-          if (!(HighBits & DemandedBits)) {
-            // None of the shifted in bits are needed.  Add a truncate of the
-            // shift input, then shift it.
-            SDValue NewTrunc =
-                TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
-            return TLO.CombineTo(
-                Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
-          }
+        APInt HighBits =
+            APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
+        HighBits.lshrInPlace(ShVal);
+        HighBits = HighBits.trunc(BitWidth);
+
+        if (!(HighBits & DemandedBits)) {
+          // None of the shifted in bits are needed.  Add a truncate of the
+          // shift input, then shift it.
+          SDValue NewTrunc =
+              TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
+          return TLO.CombineTo(
+              Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
         }
         break;
       }
@@ -1234,8 +1528,8 @@ bool TargetLowering::SimplifyDemandedBits(
     // demanded by its users.
     EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
     APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
-    if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits,
-                             Known, TLO, Depth+1))
+    if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
+                             TLO, Depth + 1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
 
@@ -1266,7 +1560,7 @@ bool TargetLowering::SimplifyDemandedBits(
 
     Known = Known2;
     if (BitWidth > EltBitWidth)
-      Known = Known.zext(BitWidth);
+      Known = Known.zext(BitWidth, false /* => any extend */);
     break;
   }
   case ISD::BITCAST: {
@@ -1297,40 +1591,68 @@ bool TargetLowering::SimplifyDemandedBits(
                              TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
       }
     }
-    // If bitcast from a vector, see if we can use SimplifyDemandedVectorElts by
-    // demanding the element if any bits from it are demanded.
+
+    // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
+    // Demand the elt/bit if any of the original elts/bits are demanded.
     // TODO - bigendian once we have test coverage.
     // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support.
     if (SrcVT.isVector() && NumSrcEltBits > 1 &&
         (BitWidth % NumSrcEltBits) == 0 &&
         TLO.DAG.getDataLayout().isLittleEndian()) {
       unsigned Scale = BitWidth / NumSrcEltBits;
-      auto GetDemandedSubMask = [&](APInt &DemandedSubElts) -> bool {
-        DemandedSubElts = APInt::getNullValue(Scale);
-        for (unsigned i = 0; i != Scale; ++i) {
-          unsigned Offset = i * NumSrcEltBits;
-          APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
-          if (!Sub.isNullValue())
-            DemandedSubElts.setBit(i);
+      unsigned NumSrcElts = SrcVT.getVectorNumElements();
+      APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
+      APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+      for (unsigned i = 0; i != Scale; ++i) {
+        unsigned Offset = i * NumSrcEltBits;
+        APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
+        if (!Sub.isNullValue()) {
+          DemandedSrcBits |= Sub;
+          for (unsigned j = 0; j != NumElts; ++j)
+            if (DemandedElts[j])
+              DemandedSrcElts.setBit((j * Scale) + i);
         }
+      }
+
+      APInt KnownSrcUndef, KnownSrcZero;
+      if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
+                                     KnownSrcZero, TLO, Depth + 1))
         return true;
-      };
 
-      APInt DemandedSubElts;
-      if (GetDemandedSubMask(DemandedSubElts)) {
-        unsigned NumSrcElts = SrcVT.getVectorNumElements();
-        APInt DemandedElts = APInt::getSplat(NumSrcElts, DemandedSubElts);
+      KnownBits KnownSrcBits;
+      if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
+                               KnownSrcBits, TLO, Depth + 1))
+        return true;
+    } else if ((NumSrcEltBits % BitWidth) == 0 &&
+               TLO.DAG.getDataLayout().isLittleEndian()) {
+      unsigned Scale = NumSrcEltBits / BitWidth;
+      unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+      APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
+      APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+      for (unsigned i = 0; i != NumElts; ++i)
+        if (DemandedElts[i]) {
+          unsigned Offset = (i % Scale) * BitWidth;
+          DemandedSrcBits.insertBits(DemandedBits, Offset);
+          DemandedSrcElts.setBit(i / Scale);
+        }
 
-        APInt KnownUndef, KnownZero;
-        if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
-                                       TLO, Depth + 1))
+      if (SrcVT.isVector()) {
+        APInt KnownSrcUndef, KnownSrcZero;
+        if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
+                                       KnownSrcZero, TLO, Depth + 1))
           return true;
       }
+
+      KnownBits KnownSrcBits;
+      if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
+                               KnownSrcBits, TLO, Depth + 1))
+        return true;
     }
+
     // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
     // recursive call where Known may be useful to the caller.
     if (Depth > 0) {
-      Known = TLO.DAG.computeKnownBits(Op, Depth);
+      Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
       return false;
     }
     break;
@@ -1343,8 +1665,10 @@ bool TargetLowering::SimplifyDemandedBits(
     SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
     unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
     APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
-    if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO, Depth + 1) ||
-        SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO, Depth + 1) ||
+    if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
+                             Depth + 1) ||
+        SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
+                             Depth + 1) ||
         // See if the operation should be performed at a smaller bit width.
         ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
       SDNodeFlags Flags = Op.getNode()->getFlags();
@@ -1353,8 +1677,8 @@ bool TargetLowering::SimplifyDemandedBits(
         // won't wrap after simplification.
         Flags.setNoSignedWrap(false);
         Flags.setNoUnsignedWrap(false);
-        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
-                                        Flags);
+        SDValue NewOp =
+            TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
         return TLO.CombineTo(Op, NewOp);
       }
       return true;
@@ -1431,15 +1755,64 @@ bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
     DCI.AddToWorklist(Op.getNode());
     DCI.CommitTargetLoweringOpt(TLO);
   }
+
   return Simplified;
 }
 
+/// Given a vector binary operation and known undefined elements for each input
+/// operand, compute whether each element of the output is undefined.
+static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
+                                         const APInt &UndefOp0,
+                                         const APInt &UndefOp1) {
+  EVT VT = BO.getValueType();
+  assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
+         "Vector binop only");
+
+  EVT EltVT = VT.getVectorElementType();
+  unsigned NumElts = VT.getVectorNumElements();
+  assert(UndefOp0.getBitWidth() == NumElts &&
+         UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
+
+  auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
+                                   const APInt &UndefVals) {
+    if (UndefVals[Index])
+      return DAG.getUNDEF(EltVT);
+
+    if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
+      // Try hard to make sure that the getNode() call is not creating temporary
+      // nodes. Ignore opaque integers because they do not constant fold.
+      SDValue Elt = BV->getOperand(Index);
+      auto *C = dyn_cast<ConstantSDNode>(Elt);
+      if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
+        return Elt;
+    }
+
+    return SDValue();
+  };
+
+  APInt KnownUndef = APInt::getNullValue(NumElts);
+  for (unsigned i = 0; i != NumElts; ++i) {
+    // If both inputs for this element are either constant or undef and match
+    // the element type, compute the constant/undef result for this element of
+    // the vector.
+    // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
+    // not handle FP constants. The code within getNode() should be refactored
+    // to avoid the danger of creating a bogus temporary node here.
+    SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
+    SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
+    if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
+      if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
+        KnownUndef.setBit(i);
+  }
+  return KnownUndef;
+}
+
 bool TargetLowering::SimplifyDemandedVectorElts(
-    SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef,
+    SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
     APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
     bool AssumeSingleUse) const {
   EVT VT = Op.getValueType();
-  APInt DemandedElts = DemandedEltMask;
+  APInt DemandedElts = OriginalDemandedElts;
   unsigned NumElts = DemandedElts.getBitWidth();
   assert(VT.isVector() && "Expected vector op");
   assert(VT.getVectorNumElements() == NumElts &&
@@ -1617,7 +1990,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
     SDValue Sub = Op.getOperand(1);
     EVT SubVT = Sub.getValueType();
     unsigned NumSubElts = SubVT.getVectorNumElements();
-    const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(2))->getAPIntValue();
+    const APInt &Idx = Op.getConstantOperandAPInt(2);
     if (Idx.ugt(NumElts - NumSubElts))
       break;
     unsigned SubIdx = Idx.getZExtValue();
@@ -1786,18 +2159,26 @@ bool TargetLowering::SimplifyDemandedVectorElts(
     }
     break;
   }
+  case ISD::ANY_EXTEND_VECTOR_INREG:
   case ISD::SIGN_EXTEND_VECTOR_INREG:
   case ISD::ZERO_EXTEND_VECTOR_INREG: {
     APInt SrcUndef, SrcZero;
     SDValue Src = Op.getOperand(0);
     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
     APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
-    if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef,
-                                   SrcZero, TLO, Depth + 1))
+    if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
+                                   Depth + 1))
       return true;
     KnownZero = SrcZero.zextOrTrunc(NumElts);
     KnownUndef = SrcUndef.zextOrTrunc(NumElts);
 
+    if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
+        Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
+        DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) {
+      // aext - if we just need the bottom element then we can bitcast.
+      return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
+    }
+
     if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
       // zext(undef) upper bits are guaranteed to be zero.
       if (DemandedElts.isSubsetOf(KnownUndef))
@@ -1806,6 +2187,9 @@ bool TargetLowering::SimplifyDemandedVectorElts(
     }
     break;
   }
+
+  // TODO: There are more binop opcodes that could be handled here - MUL, MIN,
+  // MAX, saturated math, etc.
   case ISD::OR:
   case ISD::XOR:
   case ISD::ADD:
@@ -1815,17 +2199,38 @@ bool TargetLowering::SimplifyDemandedVectorElts(
   case ISD::FMUL:
   case ISD::FDIV:
   case ISD::FREM: {
-    APInt SrcUndef, SrcZero;
-    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
-                                   SrcZero, TLO, Depth + 1))
+    APInt UndefRHS, ZeroRHS;
+    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
+                                   ZeroRHS, TLO, Depth + 1))
       return true;
-    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
-                                   KnownZero, TLO, Depth + 1))
+    APInt UndefLHS, ZeroLHS;
+    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
+                                   ZeroLHS, TLO, Depth + 1))
       return true;
-    KnownZero &= SrcZero;
-    KnownUndef &= SrcUndef;
+
+    KnownZero = ZeroLHS & ZeroRHS;
+    KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
+    break;
+  }
+  case ISD::SHL:
+  case ISD::SRL:
+  case ISD::SRA:
+  case ISD::ROTL:
+  case ISD::ROTR: {
+    APInt UndefRHS, ZeroRHS;
+    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
+                                   ZeroRHS, TLO, Depth + 1))
+      return true;
+    APInt UndefLHS, ZeroLHS;
+    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
+                                   ZeroLHS, TLO, Depth + 1))
+      return true;
+
+    KnownZero = ZeroLHS;
+    KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
     break;
   }
+  case ISD::MUL:
   case ISD::AND: {
     APInt SrcUndef, SrcZero;
     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
@@ -1837,6 +2242,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
 
     // If either side has a zero element, then the result element is zero, even
     // if the other is an UNDEF.
+    // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
+    // and then handle 'and' nodes with the rest of the binop opcodes.
     KnownZero |= SrcZero;
     KnownUndef &= SrcUndef;
     KnownUndef &= ~KnownZero;
@@ -1864,8 +2271,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
     } else {
       KnownBits Known;
       APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
-      if (SimplifyDemandedBits(Op, DemandedBits, DemandedEltMask, Known, TLO,
-                               Depth, AssumeSingleUse))
+      if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
+                               TLO, Depth, AssumeSingleUse))
         return true;
     }
     break;
@@ -1950,6 +2357,10 @@ bool TargetLowering::SimplifyDemandedBitsForTargetNode(
   return false;
 }
 
+const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
+  return nullptr;
+}
+
 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
                                                   const SelectionDAG &DAG,
                                                   bool SNaN,
@@ -2044,10 +2455,9 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
 
 /// This helper function of SimplifySetCC tries to optimize the comparison when
 /// either operand of the SetCC node is a bitwise-and instruction.
-SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
-                                             ISD::CondCode Cond,
-                                             DAGCombinerInfo &DCI,
-                                             const SDLoc &DL) const {
+SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
+                                         ISD::CondCode Cond, const SDLoc &DL,
+                                         DAGCombinerInfo &DCI) const {
   // Match these patterns in any of their permutations:
   // (X & Y) == Y
   // (X & Y) != Y
@@ -2200,6 +2610,49 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
   return T2;
 }
 
+/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
+/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
+/// handle the commuted versions of these patterns.
+SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
+                                           ISD::CondCode Cond, const SDLoc &DL,
+                                           DAGCombinerInfo &DCI) const {
+  unsigned BOpcode = N0.getOpcode();
+  assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
+         "Unexpected binop");
+  assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
+
+  // (X + Y) == X --> Y == 0
+  // (X - Y) == X --> Y == 0
+  // (X ^ Y) == X --> Y == 0
+  SelectionDAG &DAG = DCI.DAG;
+  EVT OpVT = N0.getValueType();
+  SDValue X = N0.getOperand(0);
+  SDValue Y = N0.getOperand(1);
+  if (X == N1)
+    return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
+
+  if (Y != N1)
+    return SDValue();
+
+  // (X + Y) == Y --> X == 0
+  // (X ^ Y) == Y --> X == 0
+  if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
+    return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
+
+  // The shift would not be valid if the operands are boolean (i1).
+  if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
+    return SDValue();
+
+  // (X - Y) == Y --> X == Y << 1
+  EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
+                                 !DCI.isBeforeLegalize());
+  SDValue One = DAG.getConstant(1, DL, ShiftVT);
+  SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
+  if (!DCI.isCalledByLegalizer())
+    DCI.AddToWorklist(YShl1.getNode());
+  return DAG.getSetCC(DL, VT, X, YShl1, Cond);
+}
+
 /// Try to simplify a setcc built with the specified operands and cc. If it is
 /// unable to simplify it, return a null SDValue.
 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
@@ -2209,14 +2662,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
   SelectionDAG &DAG = DCI.DAG;
   EVT OpVT = N0.getValueType();
 
-  // These setcc operations always fold.
-  switch (Cond) {
-  default: break;
-  case ISD::SETFALSE:
-  case ISD::SETFALSE2: return DAG.getBoolConstant(false, dl, VT, OpVT);
-  case ISD::SETTRUE:
-  case ISD::SETTRUE2:  return DAG.getBoolConstant(true, dl, VT, OpVT);
-  }
+  // Constant fold or commute setcc.
+  if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
+    return Fold;
 
   // Ensure that the constant occurs on the RHS and fold constant comparisons.
   // TODO: Handle non-splat vector constants. All undef causes trouble.
@@ -2226,6 +2674,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
 
+  // If we have a subtract with the same 2 non-constant operands as this setcc
+  // -- but in reverse order -- then try to commute the operands of this setcc
+  // to match. A matching pair of setcc (cmp) and sub may be combined into 1
+  // instruction on some targets.
+  if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
+      (DCI.isBeforeLegalizeOps() ||
+       isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
+      DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) &&
+      !DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } ))
+    return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
+
   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
     const APInt &C1 = N1C->getAPIntValue();
 
@@ -2235,8 +2694,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
         N0.getOperand(0).getOpcode() == ISD::CTLZ &&
         N0.getOperand(1).getOpcode() == ISD::Constant) {
-      const APInt &ShAmt
-        = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+      const APInt &ShAmt = N0.getConstantOperandAPInt(1);
       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
           ShAmt == Log2_32(N0.getValueSizeInBits())) {
         if ((C1 == 0) == (Cond == ISD::SETEQ)) {
@@ -2275,7 +2733,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
       }
 
-      // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
+      // If ctpop is not supported, expand a power-of-2 comparison based on it.
+      if (C1 == 1 && !isOperationLegalOrCustom(ISD::CTPOP, CTVT) &&
+          (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+        // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
+        // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
+        SDValue Zero = DAG.getConstant(0, dl, CTVT);
+        SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
+        ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, true);
+        SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
+        SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
+        SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
+        SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
+        unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
+        return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
+      }
     }
 
     // (zext x) == C --> x == (trunc C)
@@ -2387,8 +2859,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         // 8 bits, but have to be careful...
         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
           origWidth = Lod->getMemoryVT().getSizeInBits();
-        const APInt &Mask =
-          cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+        const APInt &Mask = N0.getConstantOperandAPInt(1);
         for (unsigned width = origWidth / 2; width>=8; width /= 2) {
           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
           for (unsigned offset=0; offset<origWidth/width; offset++) {
@@ -2480,7 +2951,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         break;
       }
       default:
-        break;   // todo, be more careful with signed comparisons
+        break; // todo, be more careful with signed comparisons
       }
     } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
@@ -2501,7 +2972,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       } else {
         APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
         ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
-                              DAG.getConstant(Imm, dl, Op0Ty));
+                             DAG.getConstant(Imm, dl, Op0Ty));
       }
       if (!DCI.isCalledByLegalizer())
         DCI.AddToWorklist(ZextOp.getNode());
@@ -2598,6 +3069,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       }
     }
 
+    // Given:
+    //   icmp eq/ne (urem %x, %y), 0
+    // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
+    //   icmp eq/ne %x, 0
+    if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
+        (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+      KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
+      KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
+      if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
+        return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
+    }
+
     if (SDValue V =
             optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
       return V;
@@ -2805,25 +3288,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     }
   }
 
-  if (isa<ConstantFPSDNode>(N0.getNode())) {
-    // Constant fold or commute setcc.
-    SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
-    if (O.getNode()) return O;
-  } else if (auto *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
-    // If the RHS of an FP comparison is a constant, simplify it away in
-    // some cases.
-    if (CFP->getValueAPF().isNaN()) {
-      // If an operand is known to be a nan, we can fold it.
-      switch (ISD::getUnorderedFlavor(Cond)) {
-      default: llvm_unreachable("Unknown flavor!");
-      case 0:  // Known false.
-        return DAG.getBoolConstant(false, dl, VT, OpVT);
-      case 1:  // Known true.
-        return DAG.getBoolConstant(true, dl, VT, OpVT);
-      case 2:  // Undefined.
-        return DAG.getUNDEF(VT);
-      }
-    }
+  if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
+    auto *CFP = cast<ConstantFPSDNode>(N1);
+    assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
 
     // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
     // constant if knowing that the operand is non-nan is enough.  We prefer to
@@ -2883,15 +3350,12 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
   if (N0 == N1) {
     // The sext(setcc()) => setcc() optimization relies on the appropriate
     // constant being emitted.
+    assert(!N0.getValueType().isInteger() &&
+           "Integer types should be handled by FoldSetCC");
 
     bool EqTrue = ISD::isTrueWhenEqual(Cond);
-
-    // We can always fold X == X for integer setcc's.
-    if (N0.getValueType().isInteger())
-      return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
-
     unsigned UOF = ISD::getUnorderedFlavor(Cond);
-    if (UOF == 2)   // FP operators that are undefined on NaNs.
+    if (UOF == 2) // FP operators that are undefined on NaNs.
       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
     if (UOF == unsigned(EqTrue))
       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
@@ -2900,7 +3364,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
     if (NewCond != Cond &&
         (DCI.isBeforeLegalizeOps() ||
-         isCondCodeLegal(NewCond, N0.getSimpleValueType())))
+                            isCondCodeLegal(NewCond, N0.getSimpleValueType())))
       return DAG.getSetCC(dl, VT, N0, N1, NewCond);
   }
 
@@ -2969,69 +3433,39 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
       }
 
-      // Simplify (X+Z) == X -->  Z == 0
+      // (X+Y) == X --> Y == 0 and similar folds.
       // Don't do this if X is an immediate that can fold into a cmp
-      // instruction and X+Z has other uses. It could be an induction variable
+      // instruction and X+Y has other uses. It could be an induction variable
       // chain, and the transform would increase register pressure.
-      if (!LegalRHSImm || N0.getNode()->hasOneUse()) {
-        if (N0.getOperand(0) == N1)
-          return DAG.getSetCC(dl, VT, N0.getOperand(1),
-                              DAG.getConstant(0, dl, N0.getValueType()), Cond);
-        if (N0.getOperand(1) == N1) {
-          if (isCommutativeBinOp(N0.getOpcode()))
-            return DAG.getSetCC(dl, VT, N0.getOperand(0),
-                                DAG.getConstant(0, dl, N0.getValueType()),
-                                Cond);
-          if (N0.getNode()->hasOneUse()) {
-            assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
-            auto &DL = DAG.getDataLayout();
-            // (Z-X) == X  --> Z == X<<1
-            SDValue SH = DAG.getNode(
-                ISD::SHL, dl, N1.getValueType(), N1,
-                DAG.getConstant(1, dl,
-                                getShiftAmountTy(N1.getValueType(), DL,
-                                                 !DCI.isBeforeLegalize())));
-            if (!DCI.isCalledByLegalizer())
-              DCI.AddToWorklist(SH.getNode());
-            return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
-          }
-        }
-      }
+      if (!LegalRHSImm || N0.hasOneUse())
+        if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
+          return V;
     }
 
     if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
-        N1.getOpcode() == ISD::XOR) {
-      // Simplify  X == (X+Z) -->  Z == 0
-      if (N1.getOperand(0) == N0)
-        return DAG.getSetCC(dl, VT, N1.getOperand(1),
-                        DAG.getConstant(0, dl, N1.getValueType()), Cond);
-      if (N1.getOperand(1) == N0) {
-        if (isCommutativeBinOp(N1.getOpcode()))
-          return DAG.getSetCC(dl, VT, N1.getOperand(0),
-                          DAG.getConstant(0, dl, N1.getValueType()), Cond);
-        if (N1.getNode()->hasOneUse()) {
-          assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
-          auto &DL = DAG.getDataLayout();
-          // X == (Z-X)  --> X<<1 == Z
-          SDValue SH = DAG.getNode(
-              ISD::SHL, dl, N1.getValueType(), N0,
-              DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL,
-                                                      !DCI.isBeforeLegalize())));
-          if (!DCI.isCalledByLegalizer())
-            DCI.AddToWorklist(SH.getNode());
-          return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
-        }
-      }
-    }
+        N1.getOpcode() == ISD::XOR)
+      if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
+        return V;
 
-    if (SDValue V = simplifySetCCWithAnd(VT, N0, N1, Cond, DCI, dl))
+    if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
       return V;
   }
 
+  // Fold remainder of division by a constant.
+  if (N0.getOpcode() == ISD::UREM && N0.hasOneUse() &&
+      (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+    AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+
+    // When division is cheap or optimizing for minimum size,
+    // fall through to DIVREM creation by skipping this fold.
+    if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize))
+      if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
+        return Folded;
+  }
+
   // Fold away ALL boolean setcc's.
-  SDValue Temp;
   if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
-    EVT OpVT = N0.getValueType();
+    SDValue Temp;
     switch (Cond) {
     default: llvm_unreachable("Unknown integer setcc!");
     case ISD::SETEQ:  // X == Y  -> ~(X^Y)
@@ -3134,18 +3568,18 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
     switch (Constraint[0]) {
     default: break;
     case 'r': return C_RegisterClass;
-    case 'm':    // memory
-    case 'o':    // offsetable
-    case 'V':    // not offsetable
+    case 'm': // memory
+    case 'o': // offsetable
+    case 'V': // not offsetable
       return C_Memory;
-    case 'i':    // Simple Integer or Relocatable Constant
-    case 'n':    // Simple Integer
-    case 'E':    // Floating Point Constant
-    case 'F':    // Floating Point Constant
-    case 's':    // Relocatable Constant
-    case 'p':    // Address.
-    case 'X':    // Allow ANY value.
-    case 'I':    // Target registers.
+    case 'i': // Simple Integer or Relocatable Constant
+    case 'n': // Simple Integer
+    case 'E': // Floating Point Constant
+    case 'F': // Floating Point Constant
+    case 's': // Relocatable Constant
+    case 'p': // Address.
+    case 'X': // Allow ANY value.
+    case 'I': // Target registers.
     case 'J':
     case 'K':
     case 'L':
@@ -3159,7 +3593,7 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
     }
   }
 
-  if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') {
+  if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
     if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
       return C_Memory;
     return C_Register;
@@ -3170,14 +3604,20 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
 /// Try to replace an X constraint, which matches anything, with another that
 /// has more specific requirements based on the type of the corresponding
 /// operand.
-const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
+const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
   if (ConstraintVT.isInteger())
     return "r";
   if (ConstraintVT.isFloatingPoint())
-    return "f";      // works for many targets
+    return "f"; // works for many targets
   return nullptr;
 }
 
+SDValue TargetLowering::LowerAsmOutputForConstraint(
+    SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo,
+    SelectionDAG &DAG) const {
+  return SDValue();
+}
+
 /// Lower the specified operand into the Ops vector.
 /// If it is invalid, don't add anything to Ops.
 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
@@ -3191,7 +3631,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   switch (ConstraintLetter) {
   default: break;
   case 'X':     // Allows any operand; labels (basic block) use this.
-    if (Op.getOpcode() == ISD::BasicBlock) {
+    if (Op.getOpcode() == ISD::BasicBlock ||
+        Op.getOpcode() == ISD::TargetBlockAddress) {
       Ops.push_back(Op);
       return;
     }
@@ -3199,46 +3640,57 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   case 'i':    // Simple Integer or Relocatable Constant
   case 'n':    // Simple Integer
   case 's': {  // Relocatable Constant
-    // These operands are interested in values of the form (GV+C), where C may
-    // be folded in as an offset of GV, or it may be explicitly added.  Also, it
-    // is possible and fine if either GV or C are missing.
-    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
-    GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
-
-    // If we have "(add GV, C)", pull out GV/C
-    if (Op.getOpcode() == ISD::ADD) {
-      C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
-      GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
-      if (!C || !GA) {
-        C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
-        GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
-      }
-      if (!C || !GA) {
-        C = nullptr;
-        GA = nullptr;
-      }
-    }
 
-    // If we find a valid operand, map to the TargetXXX version so that the
-    // value itself doesn't get selected.
-    if (GA) {   // Either &GV   or   &GV+C
-      if (ConstraintLetter != 'n') {
-        int64_t Offs = GA->getOffset();
-        if (C) Offs += C->getZExtValue();
-        Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
-                                                 C ? SDLoc(C) : SDLoc(),
-                                                 Op.getValueType(), Offs));
-      }
-      return;
-    }
-    if (C) {   // just C, no GV.
-      // Simple constants are not allowed for 's'.
-      if (ConstraintLetter != 's') {
+    GlobalAddressSDNode *GA;
+    ConstantSDNode *C;
+    BlockAddressSDNode *BA;
+    uint64_t Offset = 0;
+
+    // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
+    // etc., since getelementpointer is variadic. We can't use
+    // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
+    // while in this case the GA may be furthest from the root node which is
+    // likely an ISD::ADD.
+    while (1) {
+      if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') {
+        Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
+                                                 GA->getValueType(0),
+                                                 Offset + GA->getOffset()));
+        return;
+      } else if ((C = dyn_cast<ConstantSDNode>(Op)) &&
+                 ConstraintLetter != 's') {
         // gcc prints these as sign extended.  Sign extend value to 64 bits
         // now; without this it would get ZExt'd later in
         // ScheduleDAGSDNodes::EmitNode, which is very generic.
-        Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
+        bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
+        BooleanContent BCont = getBooleanContents(MVT::i64);
+        ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
+                                      : ISD::SIGN_EXTEND;
+        int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue()
+                                                    : C->getSExtValue();
+        Ops.push_back(DAG.getTargetConstant(Offset + ExtVal,
                                             SDLoc(C), MVT::i64));
+        return;
+      } else if ((BA = dyn_cast<BlockAddressSDNode>(Op)) &&
+                 ConstraintLetter != 'n') {
+        Ops.push_back(DAG.getTargetBlockAddress(
+            BA->getBlockAddress(), BA->getValueType(0),
+            Offset + BA->getOffset(), BA->getTargetFlags()));
+        return;
+      } else {
+        const unsigned OpCode = Op.getOpcode();
+        if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
+          if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
+            Op = Op.getOperand(1);
+          // Subtraction is not commutative.
+          else if (OpCode == ISD::ADD &&
+                   (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
+            Op = Op.getOperand(0);
+          else
+            return;
+          Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
+          continue;
+        }
       }
       return;
     }
@@ -3252,14 +3704,14 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
                                              StringRef Constraint,
                                              MVT VT) const {
   if (Constraint.empty() || Constraint[0] != '{')
-    return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr));
-  assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
+    return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
+  assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
 
   // Remove the braces from around the name.
-  StringRef RegName(Constraint.data()+1, Constraint.size()-2);
+  StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
 
-  std::pair<unsigned, const TargetRegisterClass*> R =
-    std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr));
+  std::pair<unsigned, const TargetRegisterClass *> R =
+      std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
 
   // Figure out which register class contains this reg.
   for (const TargetRegisterClass *RC : RI->regclasses()) {
@@ -3271,8 +3723,8 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
     for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
          I != E; ++I) {
       if (RegName.equals_lower(RI->getRegAsmName(*I))) {
-        std::pair<unsigned, const TargetRegisterClass*> S =
-          std::make_pair(*I, RC);
+        std::pair<unsigned, const TargetRegisterClass *> S =
+            std::make_pair(*I, RC);
 
         // If this register class has the requested value type, return it,
         // otherwise keep searching and return the first class found
@@ -3321,8 +3773,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
 
   // Do a prepass over the constraints, canonicalizing them, and building up the
   // ConstraintOperands list.
-  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
-  unsigned ResNo = 0;   // ResNo - The result number of the next output.
+  unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+  unsigned ResNo = 0; // ResNo - The result number of the next output.
 
   for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
     ConstraintOperands.emplace_back(std::move(CI));
@@ -3391,7 +3843,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
         case 64:
         case 128:
           OpInfo.ConstraintVT =
-            MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
+              MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
           break;
         }
       } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
@@ -3416,8 +3868,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
       for (maIndex = 0; maIndex < maCount; ++maIndex) {
         int weightSum = 0;
         for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
-            cIndex != eIndex; ++cIndex) {
-          AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+             cIndex != eIndex; ++cIndex) {
+          AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
           if (OpInfo.Type == InlineAsm::isClobber)
             continue;
 
@@ -3432,7 +3884,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
                    Input.ConstraintVT.isInteger()) ||
                   (OpInfo.ConstraintVT.getSizeInBits() !=
                    Input.ConstraintVT.getSizeInBits())) {
-                weightSum = -1;  // Can't match.
+                weightSum = -1; // Can't match.
                 break;
               }
             }
@@ -3453,8 +3905,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
 
       // Now select chosen alternative in each constraint.
       for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
-          cIndex != eIndex; ++cIndex) {
-        AsmOperandInfo& cInfo = ConstraintOperands[cIndex];
+           cIndex != eIndex; ++cIndex) {
+        AsmOperandInfo &cInfo = ConstraintOperands[cIndex];
         if (cInfo.Type == InlineAsm::isClobber)
           continue;
         cInfo.selectAlternative(bestMAIndex);
@@ -3464,8 +3916,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
 
   // Check and hook up tied operands, choose constraint code to use.
   for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
-      cIndex != eIndex; ++cIndex) {
-    AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+       cIndex != eIndex; ++cIndex) {
+    AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
 
     // If this is an output operand with a matching input operand, look up the
     // matching input. If their types mismatch, e.g. one is an integer, the
@@ -3577,9 +4029,9 @@ TargetLowering::ConstraintWeight
         weight = CW_Register;
       break;
     case 'X': // any operand.
-    default:
-      weight = CW_Default;
-      break;
+  default:
+    weight = CW_Default;
+    break;
   }
   return weight;
 }
@@ -3678,6 +4130,9 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
       return;
     }
 
+    if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress)
+      return;
+
     // Otherwise, try to resolve it to something we know about by looking at
     // the actual operand type.
     if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
@@ -3749,12 +4204,12 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
 }
 
 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
-                                     SelectionDAG &DAG,
-                                     SmallVectorImpl<SDNode *> &Created) const {
+                              SelectionDAG &DAG,
+                              SmallVectorImpl<SDNode *> &Created) const {
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (TLI.isIntDivCheap(N->getValueType(0), Attr))
-    return SDValue(N,0); // Lower SDIV as SDIV
+    return SDValue(N, 0); // Lower SDIV as SDIV
   return SDValue();
 }
 
@@ -4000,6 +4455,104 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
   return DAG.getSelect(dl, VT, IsOne, N0, Q);
 }
 
+/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
+/// where the divisor is constant and the comparison target is zero,
+/// return a DAG expression that will generate the same comparison result
+/// using only multiplications, additions and shifts/rotations.
+/// Ref: "Hacker's Delight" 10-17.
+SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
+                                        SDValue CompTargetNode,
+                                        ISD::CondCode Cond,
+                                        DAGCombinerInfo &DCI,
+                                        const SDLoc &DL) const {
+  SmallVector<SDNode *, 2> Built;
+  if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
+                                         DCI, DL, Built)) {
+    for (SDNode *N : Built)
+      DCI.AddToWorklist(N);
+    return Folded;
+  }
+
+  return SDValue();
+}
+
+SDValue
+TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
+                                  SDValue CompTargetNode, ISD::CondCode Cond,
+                                  DAGCombinerInfo &DCI, const SDLoc &DL,
+                                  SmallVectorImpl<SDNode *> &Created) const {
+  // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
+  // - D must be constant with D = D0 * 2^K where D0 is odd and D0 != 1
+  // - P is the multiplicative inverse of D0 modulo 2^W
+  // - Q = floor((2^W - 1) / D0)
+  // where W is the width of the common type of N and D.
+  assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+         "Only applicable for (in)equality comparisons.");
+
+  EVT VT = REMNode.getValueType();
+
+  // If MUL is unavailable, we cannot proceed in any case.
+  if (!isOperationLegalOrCustom(ISD::MUL, VT))
+    return SDValue();
+
+  // TODO: Add non-uniform constant support.
+  ConstantSDNode *Divisor = isConstOrConstSplat(REMNode->getOperand(1));
+  ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
+  if (!Divisor || !CompTarget || Divisor->isNullValue() ||
+      !CompTarget->isNullValue())
+    return SDValue();
+
+  const APInt &D = Divisor->getAPIntValue();
+
+  // Decompose D into D0 * 2^K
+  unsigned K = D.countTrailingZeros();
+  bool DivisorIsEven = (K != 0);
+  APInt D0 = D.lshr(K);
+
+  // The fold is invalid when D0 == 1.
+  // This is reachable because visitSetCC happens before visitREM.
+  if (D0.isOneValue())
+    return SDValue();
+
+  // P = inv(D0, 2^W)
+  // 2^W requires W + 1 bits, so we have to extend and then truncate.
+  unsigned W = D.getBitWidth();
+  APInt P = D0.zext(W + 1)
+                .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
+                .trunc(W);
+  assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
+  assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+
+  // Q = floor((2^W - 1) / D)
+  APInt Q = APInt::getAllOnesValue(W).udiv(D);
+
+  SelectionDAG &DAG = DCI.DAG;
+
+  SDValue PVal = DAG.getConstant(P, DL, VT);
+  SDValue QVal = DAG.getConstant(Q, DL, VT);
+  // (mul N, P)
+  SDValue Op1 = DAG.getNode(ISD::MUL, DL, VT, REMNode->getOperand(0), PVal);
+  Created.push_back(Op1.getNode());
+
+  // Rotate right only if D was even.
+  if (DivisorIsEven) {
+    // We need ROTR to do this.
+    if (!isOperationLegalOrCustom(ISD::ROTR, VT))
+      return SDValue();
+    SDValue ShAmt =
+        DAG.getConstant(K, DL, getShiftAmountTy(VT, DAG.getDataLayout()));
+    SDNodeFlags Flags;
+    Flags.setExact(true);
+    // UREM: (rotr (mul N, P), K)
+    Op1 = DAG.getNode(ISD::ROTR, DL, VT, Op1, ShAmt, Flags);
+    Created.push_back(Op1.getNode());
+  }
+
+  // UREM: (setule/setugt (rotr (mul N, P), K), Q)
+  return DAG.getSetCC(DL, SETCCVT, Op1, QVal,
+                      ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+}
+
 bool TargetLowering::
 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
@@ -4308,7 +4861,7 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
 }
 
 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
-                               SelectionDAG &DAG) const {
+                                      SelectionDAG &DAG) const {
   SDValue Src = Node->getOperand(0);
   EVT SrcVT = Src.getValueType();
   EVT DstVT = Node->getValueType(0);
@@ -4320,7 +4873,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
 
   // Expand f32 -> i64 conversion
   // This algorithm comes from compiler-rt's implementation of fixsfdi:
-  // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
+  // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
   unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
   EVT IntVT = SrcVT.changeTypeToInteger();
   EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
@@ -4544,6 +5097,17 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
     return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
   }
 
+  // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
+  // instead if there are no NaNs.
+  if (Node->getFlags().hasNoNaNs()) {
+    unsigned IEEE2018Op =
+        Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
+    if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
+      return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
+                         Node->getOperand(1), Node->getFlags());
+    }
+  }
+
   return SDValue();
 }
 
@@ -4771,7 +5335,7 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
   SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
 
-  return DAG.getMergeValues({ Value, NewChain }, SL);
+  return DAG.getMergeValues({Value, NewChain}, SL);
 }
 
 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
@@ -4826,7 +5390,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
 
   // Store Stride in bytes
   unsigned Stride = MemSclVT.getSizeInBits() / 8;
-  assert (Stride && "Zero stride!");
+  assert(Stride && "Zero stride!");
   // Extract each of the elements from the original vector and save them into
   // memory individually.
   SmallVector<SDValue, 8> Stores;
@@ -5013,17 +5577,16 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
   EVT VT = Val.getValueType();
   int Alignment = ST->getAlignment();
   auto &MF = DAG.getMachineFunction();
-  EVT MemVT = ST->getMemoryVT();
+  EVT StoreMemVT = ST->getMemoryVT();
 
   SDLoc dl(ST);
-  if (MemVT.isFloatingPoint() || MemVT.isVector()) {
+  if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
     if (isTypeLegal(intVT)) {
       if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
-          MemVT.isVector()) {
+          StoreMemVT.isVector()) {
         // Scalarize the store and let the individual components be handled.
         SDValue Result = scalarizeVectorStore(ST, DAG);
-
         return Result;
       }
       // Expand to a bitconvert of the value to the integer type of the
@@ -5036,24 +5599,22 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
     }
     // Do a (aligned) store to a stack slot, then copy from the stack slot
     // to the final destination using (unaligned) integer loads and stores.
-    EVT StoredVT = ST->getMemoryVT();
-    MVT RegVT =
-      getRegisterType(*DAG.getContext(),
-                      EVT::getIntegerVT(*DAG.getContext(),
-                                        StoredVT.getSizeInBits()));
+    MVT RegVT = getRegisterType(
+        *DAG.getContext(),
+        EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
     EVT PtrVT = Ptr.getValueType();
-    unsigned StoredBytes = StoredVT.getStoreSize();
+    unsigned StoredBytes = StoreMemVT.getStoreSize();
     unsigned RegBytes = RegVT.getSizeInBits() / 8;
     unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
 
     // Make sure the stack slot is also aligned for the register type.
-    SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
+    SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
     auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
 
     // Perform the original store, only redirected to the stack slot.
     SDValue Store = DAG.getTruncStore(
         Chain, dl, Val, StackPtr,
-        MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoredVT);
+        MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
 
     EVT StackPtrVT = StackPtr.getValueType();
 
@@ -5082,17 +5643,17 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
     // The last store may be partial.  Do a truncating store.  On big-endian
     // machines this requires an extending load from the stack slot to ensure
     // that the bits are in the right place.
-    EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
-                                  8 * (StoredBytes - Offset));
+    EVT LoadMemVT =
+        EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
 
     // Load from the stack slot.
     SDValue Load = DAG.getExtLoad(
         ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
-        MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT);
+        MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
 
     Stores.push_back(
         DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
-                          ST->getPointerInfo().getWithOffset(Offset), MemVT,
+                          ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
                           MinAlign(ST->getAlignment(), Offset),
                           ST->getMemOperand()->getFlags(), ST->getAAInfo()));
     // The order of the stores doesn't matter - say it with a TokenFactor.
@@ -5100,18 +5661,16 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
     return Result;
   }
 
-  assert(ST->getMemoryVT().isInteger() &&
-         !ST->getMemoryVT().isVector() &&
+  assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
          "Unaligned store of unknown type.");
   // Get the half-size VT
-  EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext());
+  EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
   int NumBits = NewStoredVT.getSizeInBits();
   int IncrementSize = NumBits / 8;
 
   // Divide the stored value in two parts.
-  SDValue ShiftAmount =
-      DAG.getConstant(NumBits, dl, getShiftAmountTy(Val.getValueType(),
-                                                    DAG.getDataLayout()));
+  SDValue ShiftAmount = DAG.getConstant(
+      NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
   SDValue Lo = Val;
   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
 
@@ -5130,7 +5689,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
       ST->getMemOperand()->getFlags(), ST->getAAInfo());
 
   SDValue Result =
-    DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+      DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
   return Result;
 }
 
@@ -5242,7 +5801,7 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
   // At last for X86 targets, maybe good for other targets too?
   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
-  MFI.setAdjustsStack(true);  // Is this only for X86 target?
+  MFI.setAdjustsStack(true); // Is this only for X86 target?
   MFI.setHasCalls(true);
 
   assert((GA->getOffset() == 0) &&
@@ -5282,15 +5841,19 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
   EVT VT = LHS.getValueType();
   SDLoc dl(Node);
 
+  assert(VT == RHS.getValueType() && "Expected operands to be the same type");
+  assert(VT.isInteger() && "Expected operands to be integers");
+
   // usub.sat(a, b) -> umax(a, b) - b
   if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) {
     SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
     return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
   }
 
-  if (VT.isVector()) {
-    // TODO: Consider not scalarizing here.
-    return SDValue();
+  if (Opcode == ISD::UADDSAT && isOperationLegalOrCustom(ISD::UMIN, VT)) {
+    SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
+    SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
+    return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
   }
 
   unsigned OverflowOp;
@@ -5312,96 +5875,410 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
                      "addition or subtraction node.");
   }
 
-  assert(LHS.getValueType().isScalarInteger() &&
-         "Expected operands to be integers. Vector of int arguments should "
-         "already be unrolled.");
-  assert(RHS.getValueType().isScalarInteger() &&
-         "Expected operands to be integers. Vector of int arguments should "
-         "already be unrolled.");
-  assert(LHS.getValueType() == RHS.getValueType() &&
-         "Expected both operands to be the same type");
-
-  unsigned BitWidth = LHS.getValueSizeInBits();
-  EVT ResultType = LHS.getValueType();
-  EVT BoolVT =
-      getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ResultType);
-  SDValue Result =
-      DAG.getNode(OverflowOp, dl, DAG.getVTList(ResultType, BoolVT), LHS, RHS);
+  unsigned BitWidth = LHS.getScalarValueSizeInBits();
+  EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+  SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT),
+                               LHS, RHS);
   SDValue SumDiff = Result.getValue(0);
   SDValue Overflow = Result.getValue(1);
-  SDValue Zero = DAG.getConstant(0, dl, ResultType);
+  SDValue Zero = DAG.getConstant(0, dl, VT);
+  SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
 
   if (Opcode == ISD::UADDSAT) {
-    // Just need to check overflow for SatMax.
-    APInt MaxVal = APInt::getMaxValue(BitWidth);
-    SDValue SatMax = DAG.getConstant(MaxVal, dl, ResultType);
-    return DAG.getSelect(dl, ResultType, Overflow, SatMax, SumDiff);
+    if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
+      // (LHS + RHS) | OverflowMask
+      SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
+      return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
+    }
+    // Overflow ? 0xffff.... : (LHS + RHS)
+    return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
   } else if (Opcode == ISD::USUBSAT) {
-    // Just need to check overflow for SatMin.
-    APInt MinVal = APInt::getMinValue(BitWidth);
-    SDValue SatMin = DAG.getConstant(MinVal, dl, ResultType);
-    return DAG.getSelect(dl, ResultType, Overflow, SatMin, SumDiff);
+    if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
+      // (LHS - RHS) & ~OverflowMask
+      SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
+      SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
+      return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
+    }
+    // Overflow ? 0 : (LHS - RHS)
+    return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
   } else {
     // SatMax -> Overflow && SumDiff < 0
     // SatMin -> Overflow && SumDiff >= 0
     APInt MinVal = APInt::getSignedMinValue(BitWidth);
     APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
-    SDValue SatMin = DAG.getConstant(MinVal, dl, ResultType);
-    SDValue SatMax = DAG.getConstant(MaxVal, dl, ResultType);
+    SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+    SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
     SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
-    Result = DAG.getSelect(dl, ResultType, SumNeg, SatMax, SatMin);
-    return DAG.getSelect(dl, ResultType, Overflow, Result, SumDiff);
+    Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
+    return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
   }
 }
 
 SDValue
-TargetLowering::getExpandedFixedPointMultiplication(SDNode *Node,
-                                                    SelectionDAG &DAG) const {
-  assert(Node->getOpcode() == ISD::SMULFIX && "Expected opcode to be SMULFIX.");
-  assert(Node->getNumOperands() == 3 &&
-         "Expected signed fixed point multiplication to have 3 operands.");
+TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
+  assert((Node->getOpcode() == ISD::SMULFIX ||
+          Node->getOpcode() == ISD::UMULFIX ||
+          Node->getOpcode() == ISD::SMULFIXSAT) &&
+         "Expected a fixed point multiplication opcode");
 
   SDLoc dl(Node);
   SDValue LHS = Node->getOperand(0);
   SDValue RHS = Node->getOperand(1);
-  assert(LHS.getValueType().isScalarInteger() &&
-         "Expected operands to be integers. Vector of int arguments should "
-         "already be unrolled.");
-  assert(RHS.getValueType().isScalarInteger() &&
-         "Expected operands to be integers. Vector of int arguments should "
-         "already be unrolled.");
-  assert(LHS.getValueType() == RHS.getValueType() &&
-         "Expected both operands to be the same type");
-
-  unsigned Scale = Node->getConstantOperandVal(2);
   EVT VT = LHS.getValueType();
-  assert(Scale < VT.getScalarSizeInBits() &&
-         "Expected scale to be less than the number of bits.");
+  unsigned Scale = Node->getConstantOperandVal(2);
+  bool Saturating = Node->getOpcode() == ISD::SMULFIXSAT;
+  EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+  unsigned VTSize = VT.getScalarSizeInBits();
+
+  if (!Scale) {
+    // [us]mul.fix(a, b, 0) -> mul(a, b)
+    if (!Saturating && isOperationLegalOrCustom(ISD::MUL, VT)) {
+      return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+    } else if (Saturating && isOperationLegalOrCustom(ISD::SMULO, VT)) {
+      SDValue Result =
+          DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+      SDValue Product = Result.getValue(0);
+      SDValue Overflow = Result.getValue(1);
+      SDValue Zero = DAG.getConstant(0, dl, VT);
+
+      APInt MinVal = APInt::getSignedMinValue(VTSize);
+      APInt MaxVal = APInt::getSignedMaxValue(VTSize);
+      SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+      SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+      SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
+      Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+      return DAG.getSelect(dl, VT, Overflow, Result, Product);
+    }
+  }
 
-  if (!Scale)
-    return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+  bool Signed =
+      Node->getOpcode() == ISD::SMULFIX || Node->getOpcode() == ISD::SMULFIXSAT;
+  assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
+         "Expected scale to be less than the number of bits if signed or at "
+         "most the number of bits if unsigned.");
+  assert(LHS.getValueType() == RHS.getValueType() &&
+         "Expected both operands to be the same type");
 
   // Get the upper and lower bits of the result.
   SDValue Lo, Hi;
-  if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
-    SDValue Result =
-        DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), LHS, RHS);
+  unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
+  unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
+  if (isOperationLegalOrCustom(LoHiOp, VT)) {
+    SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
     Lo = Result.getValue(0);
     Hi = Result.getValue(1);
-  } else if (isOperationLegalOrCustom(ISD::MULHS, VT)) {
+  } else if (isOperationLegalOrCustom(HiOp, VT)) {
     Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
-    Hi = DAG.getNode(ISD::MULHS, dl, VT, LHS, RHS);
+    Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
+  } else if (VT.isVector()) {
+    return SDValue();
   } else {
-    report_fatal_error("Unable to expand signed fixed point multiplication.");
+    report_fatal_error("Unable to expand fixed point multiplication.");
   }
 
+  if (Scale == VTSize)
+    // Result is just the top half since we'd be shifting by the width of the
+    // operand.
+    return Hi;
+
   // The result will need to be shifted right by the scale since both operands
   // are scaled. The result is given to us in 2 halves, so we only want part of
   // both in the result.
   EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
-  Lo = DAG.getNode(ISD::SRL, dl, VT, Lo, DAG.getConstant(Scale, dl, ShiftTy));
-  Hi = DAG.getNode(
-      ISD::SHL, dl, VT, Hi,
-      DAG.getConstant(VT.getScalarSizeInBits() - Scale, dl, ShiftTy));
-  return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
+  SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
+                               DAG.getConstant(Scale, dl, ShiftTy));
+  if (!Saturating)
+    return Result;
+
+  unsigned OverflowBits = VTSize - Scale + 1; // +1 for the sign
+  SDValue HiMask =
+      DAG.getConstant(APInt::getHighBitsSet(VTSize, OverflowBits), dl, VT);
+  SDValue LoMask = DAG.getConstant(
+      APInt::getLowBitsSet(VTSize, VTSize - OverflowBits), dl, VT);
+  APInt MaxVal = APInt::getSignedMaxValue(VTSize);
+  APInt MinVal = APInt::getSignedMinValue(VTSize);
+
+  Result = DAG.getSelectCC(dl, Hi, LoMask,
+                           DAG.getConstant(MaxVal, dl, VT), Result,
+                           ISD::SETGT);
+  return DAG.getSelectCC(dl, Hi, HiMask,
+                         DAG.getConstant(MinVal, dl, VT), Result,
+                         ISD::SETLT);
+}
+
+void TargetLowering::expandUADDSUBO(
+    SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
+  SDLoc dl(Node);
+  SDValue LHS = Node->getOperand(0);
+  SDValue RHS = Node->getOperand(1);
+  bool IsAdd = Node->getOpcode() == ISD::UADDO;
+
+  // If ADD/SUBCARRY is legal, use that instead.
+  unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
+  if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
+    SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
+    SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
+                                    { LHS, RHS, CarryIn });
+    Result = SDValue(NodeCarry.getNode(), 0);
+    Overflow = SDValue(NodeCarry.getNode(), 1);
+    return;
+  }
+
+  Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
+                            LHS.getValueType(), LHS, RHS);
+
+  EVT ResultType = Node->getValueType(1);
+  EVT SetCCType = getSetCCResultType(
+      DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
+  ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
+  SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
+  Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
+}
+
+void TargetLowering::expandSADDSUBO(
+    SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
+  SDLoc dl(Node);
+  SDValue LHS = Node->getOperand(0);
+  SDValue RHS = Node->getOperand(1);
+  bool IsAdd = Node->getOpcode() == ISD::SADDO;
+
+  Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
+                            LHS.getValueType(), LHS, RHS);
+
+  EVT ResultType = Node->getValueType(1);
+  EVT OType = getSetCCResultType(
+      DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
+
+  // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
+  unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
+  if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
+    SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
+    SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
+    Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
+    return;
+  }
+
+  SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
+
+  //   LHSSign -> LHS >= 0
+  //   RHSSign -> RHS >= 0
+  //   SumSign -> Result >= 0
+  //
+  //   Add:
+  //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+  //   Sub:
+  //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+  SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+  SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+  SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+                                    IsAdd ? ISD::SETEQ : ISD::SETNE);
+
+  SDValue SumSign = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETGE);
+  SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+  SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+  Overflow = DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType);
+}
+
+bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
+                                SDValue &Overflow, SelectionDAG &DAG) const {
+  SDLoc dl(Node);
+  EVT VT = Node->getValueType(0);
+  EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+  SDValue LHS = Node->getOperand(0);
+  SDValue RHS = Node->getOperand(1);
+  bool isSigned = Node->getOpcode() == ISD::SMULO;
+
+  // For power-of-two multiplications we can use a simpler shift expansion.
+  if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
+    const APInt &C = RHSC->getAPIntValue();
+    // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
+    if (C.isPowerOf2()) {
+      // smulo(x, signed_min) is same as umulo(x, signed_min).
+      bool UseArithShift = isSigned && !C.isMinSignedValue();
+      EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
+      SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
+      Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
+      Overflow = DAG.getSetCC(dl, SetCCVT,
+          DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
+                      dl, VT, Result, ShiftAmt),
+          LHS, ISD::SETNE);
+      return true;
+    }
+  }
+
+  EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
+  if (VT.isVector())
+    WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
+                              VT.getVectorNumElements());
+
+  SDValue BottomHalf;
+  SDValue TopHalf;
+  static const unsigned Ops[2][3] =
+      { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
+        { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
+  if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
+    BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+    TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
+  } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
+    BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
+                             RHS);
+    TopHalf = BottomHalf.getValue(1);
+  } else if (isTypeLegal(WideVT)) {
+    LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+    RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+    SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+    BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
+    SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
+        getShiftAmountTy(WideVT, DAG.getDataLayout()));
+    TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
+                          DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
+  } else {
+    if (VT.isVector())
+      return false;
+
+    // We can fall back to a libcall with an illegal type for the MUL if we
+    // have a libcall big enough.
+    // Also, we can fall back to a division in some cases, but that's a big
+    // performance hit in the general case.
+    RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+    if (WideVT == MVT::i16)
+      LC = RTLIB::MUL_I16;
+    else if (WideVT == MVT::i32)
+      LC = RTLIB::MUL_I32;
+    else if (WideVT == MVT::i64)
+      LC = RTLIB::MUL_I64;
+    else if (WideVT == MVT::i128)
+      LC = RTLIB::MUL_I128;
+    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
+
+    SDValue HiLHS;
+    SDValue HiRHS;
+    if (isSigned) {
+      // The high part is obtained by SRA'ing all but one of the bits of low
+      // part.
+      unsigned LoSize = VT.getSizeInBits();
+      HiLHS =
+          DAG.getNode(ISD::SRA, dl, VT, LHS,
+                      DAG.getConstant(LoSize - 1, dl,
+                                      getPointerTy(DAG.getDataLayout())));
+      HiRHS =
+          DAG.getNode(ISD::SRA, dl, VT, RHS,
+                      DAG.getConstant(LoSize - 1, dl,
+                                      getPointerTy(DAG.getDataLayout())));
+    } else {
+        HiLHS = DAG.getConstant(0, dl, VT);
+        HiRHS = DAG.getConstant(0, dl, VT);
+    }
+
+    // Here we're passing the 2 arguments explicitly as 4 arguments that are
+    // pre-lowered to the correct types. This all depends upon WideVT not
+    // being a legal type for the architecture and thus has to be split to
+    // two arguments.
+    SDValue Ret;
+    if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
+      // Halves of WideVT are packed into registers in different order
+      // depending on platform endianness. This is usually handled by
+      // the C calling convention, but we can't defer to it in
+      // the legalizer.
+      SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
+      Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl,
+          /* doesNotReturn */ false, /* isReturnValueUsed */ true,
+          /* isPostTypeLegalization */ true).first;
+    } else {
+      SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
+      Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl,
+          /* doesNotReturn */ false, /* isReturnValueUsed */ true,
+          /* isPostTypeLegalization */ true).first;
+    }
+    assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
+           "Ret value is a collection of constituent nodes holding result.");
+    if (DAG.getDataLayout().isLittleEndian()) {
+      // Same as above.
+      BottomHalf = Ret.getOperand(0);
+      TopHalf = Ret.getOperand(1);
+    } else {
+      BottomHalf = Ret.getOperand(1);
+      TopHalf = Ret.getOperand(0);
+    }
+  }
+
+  Result = BottomHalf;
+  if (isSigned) {
+    SDValue ShiftAmt = DAG.getConstant(
+        VT.getScalarSizeInBits() - 1, dl,
+        getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
+    SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
+    Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
+  } else {
+    Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
+                            DAG.getConstant(0, dl, VT), ISD::SETNE);
+  }
+
+  // Truncate the result if SetCC returns a larger type than needed.
+  EVT RType = Node->getValueType(1);
+  if (RType.getSizeInBits() < Overflow.getValueSizeInBits())
+    Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
+
+  assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
+         "Unexpected result type for S/UMULO legalization");
+  return true;
+}
+
+SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
+  SDLoc dl(Node);
+  bool NoNaN = Node->getFlags().hasNoNaNs();
+  unsigned BaseOpcode = 0;
+  switch (Node->getOpcode()) {
+  default: llvm_unreachable("Expected VECREDUCE opcode");
+  case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
+  case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
+  case ISD::VECREDUCE_ADD:  BaseOpcode = ISD::ADD; break;
+  case ISD::VECREDUCE_MUL:  BaseOpcode = ISD::MUL; break;
+  case ISD::VECREDUCE_AND:  BaseOpcode = ISD::AND; break;
+  case ISD::VECREDUCE_OR:   BaseOpcode = ISD::OR; break;
+  case ISD::VECREDUCE_XOR:  BaseOpcode = ISD::XOR; break;
+  case ISD::VECREDUCE_SMAX: BaseOpcode = ISD::SMAX; break;
+  case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break;
+  case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break;
+  case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break;
+  case ISD::VECREDUCE_FMAX:
+    BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
+    break;
+  case ISD::VECREDUCE_FMIN:
+    BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
+    break;
+  }
+
+  SDValue Op = Node->getOperand(0);
+  EVT VT = Op.getValueType();
+
+  // Try to use a shuffle reduction for power of two vectors.
+  if (VT.isPow2VectorType()) {
+    while (VT.getVectorNumElements() > 1) {
+      EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
+      if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
+        break;
+
+      SDValue Lo, Hi;
+      std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
+      Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
+      VT = HalfVT;
+    }
+  }
+
+  EVT EltVT = VT.getVectorElementType();
+  unsigned NumElts = VT.getVectorNumElements();
+
+  SmallVector<SDValue, 8> Ops;
+  DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
+
+  SDValue Res = Ops[0];
+  for (unsigned i = 1; i < NumElts; i++)
+    Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
+
+  // Result type may be wider than element type.
+  if (EltVT != Node->getValueType(0))
+    Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
+  return Res;
 }
diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp
index 3e12b32b12d4..17a4d76c4c80 100644
--- a/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -1,9 +1,8 @@
 //===- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -313,7 +312,8 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
   AtEntry.SetInsertPoint(IP->getParent(), IP);
 
   // Initialize the map pointer and load the current head of the shadow stack.
-  Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
+  Instruction *CurrentHead =
+      AtEntry.CreateLoad(StackEntryTy->getPointerTo(), Head, "gc_currhead");
   Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
                                        StackEntry, 0, 1, "gc_frame.map");
   AtEntry.CreateStore(FrameMap, EntryMapPtr);
@@ -354,7 +354,8 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
     Instruction *EntryNextPtr2 =
         CreateGEP(Context, *AtExit, ConcreteStackEntryTy, StackEntry, 0, 0,
                   "gc_frame.next");
-    Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
+    Value *SavedHead = AtExit->CreateLoad(StackEntryTy->getPointerTo(),
+                                          EntryNextPtr2, "gc_savedhead");
     AtExit->CreateStore(SavedHead, Head);
   }
 
diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp
index d3454ca6ba6a..2db0ea570598 100644
--- a/lib/CodeGen/ShrinkWrap.cpp
+++ b/lib/CodeGen/ShrinkWrap.cpp
@@ -1,9 +1,8 @@
 //===- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -259,6 +258,15 @@ INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
 
 bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
                                  RegScavenger *RS) const {
+  // This prevents premature stack popping when occurs a indirect stack
+  // access. It is overly aggressive for the moment.
+  // TODO: - Obvious non-stack loads and store, such as global values,
+  //         are known to not access the stack.
+  //       - Further, data dependency and alias analysis can validate
+  //         that load and stores never derive from the stack pointer.
+  if (MI.mayLoadOrStore())
+    return true;
+
   if (MI.getOpcode() == FrameSetupOpcode ||
       MI.getOpcode() == FrameDestroyOpcode) {
     LLVM_DEBUG(dbgs() << "Frame instruction: " << MI << '\n');
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 5d2669f5ae92..23e5ce0acae8 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -1,9 +1,8 @@
 //===- SjLjEHPrepare.cpp - Eliminate Invoke & Unwind instructions ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -40,15 +39,15 @@ class SjLjEHPrepare : public FunctionPass {
   Type *doubleUnderDataTy;
   Type *doubleUnderJBufTy;
   Type *FunctionContextTy;
-  Constant *RegisterFn;
-  Constant *UnregisterFn;
-  Constant *BuiltinSetupDispatchFn;
-  Constant *FrameAddrFn;
-  Constant *StackAddrFn;
-  Constant *StackRestoreFn;
-  Constant *LSDAAddrFn;
-  Constant *CallSiteFn;
-  Constant *FuncCtxFn;
+  FunctionCallee RegisterFn;
+  FunctionCallee UnregisterFn;
+  Function *BuiltinSetupDispatchFn;
+  Function *FrameAddrFn;
+  Function *StackAddrFn;
+  Function *StackRestoreFn;
+  Function *LSDAAddrFn;
+  Function *CallSiteFn;
+  Function *FuncCtxFn;
   AllocaInst *FuncCtx;
 
 public:
@@ -190,14 +189,16 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
         Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 2, "__data");
 
     // The exception values come back in context->__data[0].
+    Type *Int32Ty = Type::getInt32Ty(F.getContext());
     Value *ExceptionAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
                                                       0, 0, "exception_gep");
-    Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val");
+    Value *ExnVal = Builder.CreateLoad(Int32Ty, ExceptionAddr, true, "exn_val");
     ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy());
 
     Value *SelectorAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
                                                      0, 1, "exn_selector_gep");
-    Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
+    Value *SelVal =
+        Builder.CreateLoad(Int32Ty, SelectorAddr, true, "exn_selector_val");
 
     substituteLPadValues(LPI, ExnVal, SelVal);
   }
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index fccbb8ec91cb..9fff873324d0 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -1,9 +1,8 @@
 //===-- SlotIndexes.cpp - Slot Indexes Pass  ------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -23,7 +22,6 @@ INITIALIZE_PASS(SlotIndexes, DEBUG_TYPE,
                 "Slot index numbering", false, false)
 
 STATISTIC(NumLocalRenum,  "Number of local renumberings");
-STATISTIC(NumGlobalRenum, "Number of global renumberings");
 
 void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const {
   au.setPreservesAll();
@@ -95,7 +93,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
   }
 
   // Sort the Idx2MBBMap
-  llvm::sort(idx2MBBMap, Idx2MBBCompare());
+  llvm::sort(idx2MBBMap, less_first());
 
   LLVM_DEBUG(mf->print(dbgs(), this));
 
@@ -145,20 +143,6 @@ void SlotIndexes::removeSingleMachineInstrFromMaps(MachineInstr &MI) {
   }
 }
 
-void SlotIndexes::renumberIndexes() {
-  // Renumber updates the index of every element of the index list.
-  LLVM_DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");
-  ++NumGlobalRenum;
-
-  unsigned index = 0;
-
-  for (IndexList::iterator I = indexList.begin(), E = indexList.end();
-       I != E; ++I) {
-    I->setIndex(index);
-    index += SlotIndex::InstrDist;
-  }
-}
-
 // Renumber indexes locally after curItr was inserted, but failed to get a new
 // index.
 void SlotIndexes::renumberIndexes(IndexList::iterator curItr) {
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index f6786b30b21c..11452fdb747a 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -1,9 +1,8 @@
 //===- SpillPlacement.cpp - Optimal Spill Code Placement ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h
index aa3ac444e0da..aa0e07ef92e3 100644
--- a/lib/CodeGen/SpillPlacement.h
+++ b/lib/CodeGen/SpillPlacement.h
@@ -1,9 +1,8 @@
 //===- SpillPlacement.h - Optimal Spill Code Placement ---------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index 330ee81342b6..66dabf78f873 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/Spiller.h - Spiller -------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index d639f4475301..5c944fe3f6b3 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -1,9 +1,8 @@
 //===- SplitKit.cpp - Toolkit for splitting live ranges -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -521,17 +520,18 @@ SlotIndex SplitEditor::buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,
       .addReg(FromReg, 0, SubIdx);
 
   BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
+  SlotIndexes &Indexes = *LIS.getSlotIndexes();
   if (FirstCopy) {
-    SlotIndexes &Indexes = *LIS.getSlotIndexes();
     Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
   } else {
     CopyMI->bundleWithPred();
   }
   LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubIdx);
   DestLI.refineSubRanges(Allocator, LaneMask,
-                         [Def, &Allocator](LiveInterval::SubRange& SR) {
-    SR.createDeadDef(Def, Allocator);
-  });
+                         [Def, &Allocator](LiveInterval::SubRange &SR) {
+                           SR.createDeadDef(Def, Allocator);
+                         },
+                         Indexes, TRI);
   return Def;
 }
 
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index bcc8f8cf18bc..86ad3811e3ad 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -1,9 +1,8 @@
 //===- SplitKit.h - Toolkit for splitting live ranges -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index eb8552915e2a..641b54205d62 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -1,9 +1,8 @@
 //===- StackColoring.cpp --------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1221,11 +1220,12 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
 
   // Sort the slots according to their size. Place unused slots at the end.
   // Use stable sort to guarantee deterministic code generation.
-  std::stable_sort(SortedSlots.begin(), SortedSlots.end(),
-                   [this](int LHS, int RHS) {
+  llvm::stable_sort(SortedSlots, [this](int LHS, int RHS) {
     // We use -1 to denote a uninteresting slot. Place these slots at the end.
-    if (LHS == -1) return false;
-    if (RHS == -1) return true;
+    if (LHS == -1)
+      return false;
+    if (RHS == -1)
+      return true;
     // Sort according to size.
     return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
   });
diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 00cf8070be5e..fb2abf3daa7f 100644
--- a/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -1,9 +1,8 @@
 //===-- StackMapLivenessAnalysis.cpp - StackMap live Out Analysis ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index 0676fa2421e8..ae9401b89700 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -1,9 +1,8 @@
 //===- StackMaps.cpp ------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 3b578c7391da..809960c7fdf9 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -1,9 +1,8 @@
 //===- StackProtector.cpp - Stack Protector Insertion ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,6 +17,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/Passes.h"
@@ -157,40 +157,6 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
   return NeedsProtector;
 }
 
-bool StackProtector::HasAddressTaken(const Instruction *AI) {
-  for (const User *U : AI->users()) {
-    if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
-      if (AI == SI->getValueOperand())
-        return true;
-    } else if (const PtrToIntInst *SI = dyn_cast<PtrToIntInst>(U)) {
-      if (AI == SI->getOperand(0))
-        return true;
-    } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
-      // Ignore intrinsics that are not calls. TODO: Use isLoweredToCall().
-      if (!isa<DbgInfoIntrinsic>(CI) && !CI->isLifetimeStartOrEnd())
-        return true;
-    } else if (isa<InvokeInst>(U)) {
-      return true;
-    } else if (const SelectInst *SI = dyn_cast<SelectInst>(U)) {
-      if (HasAddressTaken(SI))
-        return true;
-    } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
-      // Keep track of what PHI nodes we have already visited to ensure
-      // they are only visited once.
-      if (VisitedPHIs.insert(PN).second)
-        if (HasAddressTaken(PN))
-          return true;
-    } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
-      if (HasAddressTaken(GEP))
-        return true;
-    } else if (const BitCastInst *BI = dyn_cast<BitCastInst>(U)) {
-      if (HasAddressTaken(BI))
-        return true;
-    }
-  }
-  return false;
-}
-
 /// Search for the first call to the llvm.stackprotector intrinsic and return it
 /// if present.
 static const CallInst *findStackProtectorIntrinsic(Function &F) {
@@ -298,7 +264,9 @@ bool StackProtector::RequiresStackProtector() {
           continue;
         }
 
-        if (Strong && HasAddressTaken(AI)) {
+        if (Strong && PointerMayBeCaptured(AI,
+                                           /* ReturnCaptures */ false,
+                                           /* StoreCaptures */ true)) {
           ++NumAddrTaken;
           Layout.insert(std::make_pair(AI, MachineFrameInfo::SSPLK_AddrOf));
           ORE.emit([&]() {
@@ -323,7 +291,7 @@ static Value *getStackGuard(const TargetLoweringBase *TLI, Module *M,
                             IRBuilder<> &B,
                             bool *SupportsSelectionDAGSP = nullptr) {
   if (Value *Guard = TLI->getIRStackGuard(B))
-    return B.CreateLoad(Guard, true, "StackGuard");
+    return B.CreateLoad(B.getInt8PtrTy(), Guard, true, "StackGuard");
 
   // Use SelectionDAG SSP handling, since there isn't an IR guard.
   //
@@ -414,15 +382,14 @@ bool StackProtector::InsertStackProtectors() {
     // Generate epilogue instrumentation. The epilogue intrumentation can be
     // function-based or inlined depending on which mechanism the target is
     // providing.
-    if (Value* GuardCheck = TLI->getSSPStackGuardCheck(*M)) {
+    if (Function *GuardCheck = TLI->getSSPStackGuardCheck(*M)) {
       // Generate the function-based epilogue instrumentation.
       // The target provides a guard check function, generate a call to it.
       IRBuilder<> B(RI);
-      LoadInst *Guard = B.CreateLoad(AI, true, "Guard");
+      LoadInst *Guard = B.CreateLoad(B.getInt8PtrTy(), AI, true, "Guard");
       CallInst *Call = B.CreateCall(GuardCheck, {Guard});
-      llvm::Function *Function = cast<llvm::Function>(GuardCheck);
-      Call->setAttributes(Function->getAttributes());
-      Call->setCallingConv(Function->getCallingConv());
+      Call->setAttributes(GuardCheck->getAttributes());
+      Call->setCallingConv(GuardCheck->getCallingConv());
     } else {
       // Generate the epilogue with inline instrumentation.
       // If we do not support SelectionDAG based tail calls, generate IR level
@@ -474,7 +441,7 @@ bool StackProtector::InsertStackProtectors() {
       // Generate the stack protector instructions in the old basic block.
       IRBuilder<> B(BB);
       Value *Guard = getStackGuard(TLI, M, B);
-      LoadInst *LI2 = B.CreateLoad(AI, true);
+      LoadInst *LI2 = B.CreateLoad(B.getInt8PtrTy(), AI, true);
       Value *Cmp = B.CreateICmpEQ(Guard, LI2);
       auto SuccessProb =
           BranchProbabilityInfo::getBranchProbStackProtector(true);
@@ -500,14 +467,13 @@ BasicBlock *StackProtector::CreateFailBB() {
   IRBuilder<> B(FailBB);
   B.SetCurrentDebugLocation(DebugLoc::get(0, 0, F->getSubprogram()));
   if (Trip.isOSOpenBSD()) {
-    Constant *StackChkFail =
-        M->getOrInsertFunction("__stack_smash_handler",
-                               Type::getVoidTy(Context),
-                               Type::getInt8PtrTy(Context));
+    FunctionCallee StackChkFail = M->getOrInsertFunction(
+        "__stack_smash_handler", Type::getVoidTy(Context),
+        Type::getInt8PtrTy(Context));
 
     B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH"));
   } else {
-    Constant *StackChkFail =
+    FunctionCallee StackChkFail =
         M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context));
 
     B.CreateCall(StackChkFail, {});
@@ -517,7 +483,7 @@ BasicBlock *StackProtector::CreateFailBB() {
 }
 
 bool StackProtector::shouldEmitSDCheck(const BasicBlock &BB) const {
-  return HasPrologue && !HasIRCheck && dyn_cast<ReturnInst>(BB.getTerminator());
+  return HasPrologue && !HasIRCheck && isa<ReturnInst>(BB.getTerminator());
 }
 
 void StackProtector::copyToMachineFrameInfo(MachineFrameInfo &MFI) const {
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index d8c6a249e4da..99b533e10b87 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -1,9 +1,8 @@
 //===- StackSlotColoring.cpp - Stack slot coloring pass. ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -243,7 +242,7 @@ void StackSlotColoring::InitializeSlots() {
   LLVM_DEBUG(dbgs() << '\n');
 
   // Sort them by weight.
-  std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+  llvm::stable_sort(SSIntervals, IntervalSorter());
 
   NextColors.resize(AllColors.size());
 
@@ -348,7 +347,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
     li->weight = SlotWeights[SS];
   }
   // Sort them by new weight.
-  std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+  llvm::stable_sort(SSIntervals, IntervalSorter());
 
 #ifndef NDEBUG
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i)
diff --git a/lib/CodeGen/SwiftErrorValueTracking.cpp b/lib/CodeGen/SwiftErrorValueTracking.cpp
new file mode 100644
index 000000000000..96821cadb1b6
--- /dev/null
+++ b/lib/CodeGen/SwiftErrorValueTracking.cpp
@@ -0,0 +1,312 @@
+//===-- SwiftErrorValueTracking.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a limited mem2reg-like analysis to promote uses of function
+// arguments and allocas marked with swiftalloc from memory into virtual
+// registers tracked by this class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/Value.h"
+
+using namespace llvm;
+
+Register SwiftErrorValueTracking::getOrCreateVReg(const MachineBasicBlock *MBB,
+                                                  const Value *Val) {
+  auto Key = std::make_pair(MBB, Val);
+  auto It = VRegDefMap.find(Key);
+  // If this is the first use of this swifterror value in this basic block,
+  // create a new virtual register.
+  // After we processed all basic blocks we will satisfy this "upwards exposed
+  // use" by inserting a copy or phi at the beginning of this block.
+  if (It == VRegDefMap.end()) {
+    auto &DL = MF->getDataLayout();
+    const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+    auto VReg = MF->getRegInfo().createVirtualRegister(RC);
+    VRegDefMap[Key] = VReg;
+    VRegUpwardsUse[Key] = VReg;
+    return VReg;
+  } else
+    return It->second;
+}
+
+void SwiftErrorValueTracking::setCurrentVReg(const MachineBasicBlock *MBB,
+                                             const Value *Val, Register VReg) {
+  VRegDefMap[std::make_pair(MBB, Val)] = VReg;
+}
+
+Register SwiftErrorValueTracking::getOrCreateVRegDefAt(
+    const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) {
+  auto Key = PointerIntPair<const Instruction *, 1, bool>(I, true);
+  auto It = VRegDefUses.find(Key);
+  if (It != VRegDefUses.end())
+    return It->second;
+
+  auto &DL = MF->getDataLayout();
+  const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+  Register VReg = MF->getRegInfo().createVirtualRegister(RC);
+  VRegDefUses[Key] = VReg;
+  setCurrentVReg(MBB, Val, VReg);
+  return VReg;
+}
+
+Register SwiftErrorValueTracking::getOrCreateVRegUseAt(
+    const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) {
+  auto Key = PointerIntPair<const Instruction *, 1, bool>(I, false);
+  auto It = VRegDefUses.find(Key);
+  if (It != VRegDefUses.end())
+    return It->second;
+
+  Register VReg = getOrCreateVReg(MBB, Val);
+  VRegDefUses[Key] = VReg;
+  return VReg;
+}
+
+/// Set up SwiftErrorVals by going through the function. If the function has
+/// swifterror argument, it will be the first entry.
+void SwiftErrorValueTracking::setFunction(MachineFunction &mf) {
+  MF = &mf;
+  Fn = &MF->getFunction();
+  TLI = MF->getSubtarget().getTargetLowering();
+  TII = MF->getSubtarget().getInstrInfo();
+
+  if (!TLI->supportSwiftError())
+    return;
+
+  SwiftErrorVals.clear();
+  VRegDefMap.clear();
+  VRegUpwardsUse.clear();
+  VRegDefUses.clear();
+  SwiftErrorArg = nullptr;
+
+  // Check if function has a swifterror argument.
+  bool HaveSeenSwiftErrorArg = false;
+  for (Function::const_arg_iterator AI = Fn->arg_begin(), AE = Fn->arg_end();
+       AI != AE; ++AI)
+    if (AI->hasSwiftErrorAttr()) {
+      assert(!HaveSeenSwiftErrorArg &&
+             "Must have only one swifterror parameter");
+      (void)HaveSeenSwiftErrorArg; // silence warning.
+      HaveSeenSwiftErrorArg = true;
+      SwiftErrorArg = &*AI;
+      SwiftErrorVals.push_back(&*AI);
+    }
+
+  for (const auto &LLVMBB : *Fn)
+    for (const auto &Inst : LLVMBB) {
+      if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&Inst))
+        if (Alloca->isSwiftError())
+          SwiftErrorVals.push_back(Alloca);
+    }
+}
+
+bool SwiftErrorValueTracking::createEntriesInEntryBlock(DebugLoc DbgLoc) {
+  if (!TLI->supportSwiftError())
+    return false;
+
+  // We only need to do this when we have swifterror parameter or swifterror
+  // alloc.
+  if (SwiftErrorVals.empty())
+    return false;
+
+  MachineBasicBlock *MBB = &*MF->begin();
+  auto &DL = MF->getDataLayout();
+  auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+  bool Inserted = false;
+  for (const auto *SwiftErrorVal : SwiftErrorVals) {
+    // We will always generate a copy from the argument. It is always used at
+    // least by the 'return' of the swifterror.
+    if (SwiftErrorArg && SwiftErrorArg == SwiftErrorVal)
+      continue;
+    Register VReg = MF->getRegInfo().createVirtualRegister(RC);
+    // Assign Undef to Vreg. We construct MI directly to make sure it works
+    // with FastISel.
+    BuildMI(*MBB, MBB->getFirstNonPHI(), DbgLoc,
+            TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
+
+    setCurrentVReg(MBB, SwiftErrorVal, VReg);
+    Inserted = true;
+  }
+
+  return Inserted;
+}
+
+/// Propagate swifterror values through the machine function CFG.
+void SwiftErrorValueTracking::propagateVRegs() {
+  if (!TLI->supportSwiftError())
+    return;
+
+  // We only need to do this when we have swifterror parameter or swifterror
+  // alloc.
+  if (SwiftErrorVals.empty())
+    return;
+
+  // For each machine basic block in reverse post order.
+  ReversePostOrderTraversal<MachineFunction *> RPOT(MF);
+  for (MachineBasicBlock *MBB : RPOT) {
+    // For each swifterror value in the function.
+    for (const auto *SwiftErrorVal : SwiftErrorVals) {
+      auto Key = std::make_pair(MBB, SwiftErrorVal);
+      auto UUseIt = VRegUpwardsUse.find(Key);
+      auto VRegDefIt = VRegDefMap.find(Key);
+      bool UpwardsUse = UUseIt != VRegUpwardsUse.end();
+      Register UUseVReg = UpwardsUse ? UUseIt->second : Register();
+      bool DownwardDef = VRegDefIt != VRegDefMap.end();
+      assert(!(UpwardsUse && !DownwardDef) &&
+             "We can't have an upwards use but no downwards def");
+
+      // If there is no upwards exposed use and an entry for the swifterror in
+      // the def map for this value we don't need to do anything: We already
+      // have a downward def for this basic block.
+      if (!UpwardsUse && DownwardDef)
+        continue;
+
+      // Otherwise we either have an upwards exposed use vreg that we need to
+      // materialize or need to forward the downward def from predecessors.
+
+      // Check whether we have a single vreg def from all predecessors.
+      // Otherwise we need a phi.
+      SmallVector<std::pair<MachineBasicBlock *, Register>, 4> VRegs;
+      SmallSet<const MachineBasicBlock *, 8> Visited;
+      for (auto *Pred : MBB->predecessors()) {
+        if (!Visited.insert(Pred).second)
+          continue;
+        VRegs.push_back(std::make_pair(
+            Pred, getOrCreateVReg(Pred, SwiftErrorVal)));
+        if (Pred != MBB)
+          continue;
+        // We have a self-edge.
+        // If there was no upwards use in this basic block there is now one: the
+        // phi needs to use it self.
+        if (!UpwardsUse) {
+          UpwardsUse = true;
+          UUseIt = VRegUpwardsUse.find(Key);
+          assert(UUseIt != VRegUpwardsUse.end());
+          UUseVReg = UUseIt->second;
+        }
+      }
+
+      // We need a phi node if we have more than one predecessor with different
+      // downward defs.
+      bool needPHI =
+          VRegs.size() >= 1 &&
+          std::find_if(
+              VRegs.begin(), VRegs.end(),
+              [&](const std::pair<const MachineBasicBlock *, Register> &V)
+                  -> bool { return V.second != VRegs[0].second; }) !=
+              VRegs.end();
+
+      // If there is no upwards exposed used and we don't need a phi just
+      // forward the swifterror vreg from the predecessor(s).
+      if (!UpwardsUse && !needPHI) {
+        assert(!VRegs.empty() &&
+               "No predecessors? The entry block should bail out earlier");
+        // Just forward the swifterror vreg from the predecessor(s).
+        setCurrentVReg(MBB, SwiftErrorVal, VRegs[0].second);
+        continue;
+      }
+
+      auto DLoc = isa<Instruction>(SwiftErrorVal)
+                      ? cast<Instruction>(SwiftErrorVal)->getDebugLoc()
+                      : DebugLoc();
+      const auto *TII = MF->getSubtarget().getInstrInfo();
+
+      // If we don't need a phi create a copy to the upward exposed vreg.
+      if (!needPHI) {
+        assert(UpwardsUse);
+        assert(!VRegs.empty() &&
+               "No predecessors?  Is the Calling Convention correct?");
+        Register DestReg = UUseVReg;
+        BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY),
+                DestReg)
+            .addReg(VRegs[0].second);
+        continue;
+      }
+
+      // We need a phi: if there is an upwards exposed use we already have a
+      // destination virtual register number otherwise we generate a new one.
+      auto &DL = MF->getDataLayout();
+      auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+      Register PHIVReg =
+          UpwardsUse ? UUseVReg : MF->getRegInfo().createVirtualRegister(RC);
+      MachineInstrBuilder PHI =
+          BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc,
+                  TII->get(TargetOpcode::PHI), PHIVReg);
+      for (auto BBRegPair : VRegs) {
+        PHI.addReg(BBRegPair.second).addMBB(BBRegPair.first);
+      }
+
+      // We did not have a definition in this block before: store the phi's vreg
+      // as this block downward exposed def.
+      if (!UpwardsUse)
+        setCurrentVReg(MBB, SwiftErrorVal, PHIVReg);
+    }
+  }
+}
+
+void SwiftErrorValueTracking::preassignVRegs(
+    MachineBasicBlock *MBB, BasicBlock::const_iterator Begin,
+    BasicBlock::const_iterator End) {
+  if (!TLI->supportSwiftError() || SwiftErrorVals.empty())
+    return;
+
+  // Iterator over instructions and assign vregs to swifterror defs and uses.
+  for (auto It = Begin; It != End; ++It) {
+    ImmutableCallSite CS(&*It);
+    if (CS) {
+      // A call-site with a swifterror argument is both use and def.
+      const Value *SwiftErrorAddr = nullptr;
+      for (auto &Arg : CS.args()) {
+        if (!Arg->isSwiftError())
+          continue;
+        // Use of swifterror.
+        assert(!SwiftErrorAddr && "Cannot have multiple swifterror arguments");
+        SwiftErrorAddr = &*Arg;
+        assert(SwiftErrorAddr->isSwiftError() &&
+               "Must have a swifterror value argument");
+        getOrCreateVRegUseAt(&*It, MBB, SwiftErrorAddr);
+      }
+      if (!SwiftErrorAddr)
+        continue;
+
+      // Def of swifterror.
+      getOrCreateVRegDefAt(&*It, MBB, SwiftErrorAddr);
+
+      // A load is a use.
+    } else if (const LoadInst *LI = dyn_cast<const LoadInst>(&*It)) {
+      const Value *V = LI->getOperand(0);
+      if (!V->isSwiftError())
+        continue;
+
+      getOrCreateVRegUseAt(LI, MBB, V);
+
+      // A store is a def.
+    } else if (const StoreInst *SI = dyn_cast<const StoreInst>(&*It)) {
+      const Value *SwiftErrorAddr = SI->getOperand(1);
+      if (!SwiftErrorAddr->isSwiftError())
+        continue;
+
+      // Def of swifterror.
+      getOrCreateVRegDefAt(&*It, MBB, SwiftErrorAddr);
+
+      // A return in a swiferror returning function is a use.
+    } else if (const ReturnInst *R = dyn_cast<const ReturnInst>(&*It)) {
+      const Function *F = R->getParent()->getParent();
+      if (!F->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+        continue;
+
+      getOrCreateVRegUseAt(R, MBB, SwiftErrorArg);
+    }
+  }
+}
diff --git a/lib/CodeGen/SwitchLoweringUtils.cpp b/lib/CodeGen/SwitchLoweringUtils.cpp
new file mode 100644
index 000000000000..83acf7f80715
--- /dev/null
+++ b/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -0,0 +1,489 @@
+//===- SwitchLoweringUtils.cpp - Switch Lowering --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains switch inst lowering optimizations and utilities for
+// codegen, so that it can be used for both SelectionDAG and GlobalISel.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/SwitchLoweringUtils.h"
+
+using namespace llvm;
+using namespace SwitchCG;
+
+uint64_t SwitchCG::getJumpTableRange(const CaseClusterVector &Clusters,
+                                     unsigned First, unsigned Last) {
+  assert(Last >= First);
+  const APInt &LowCase = Clusters[First].Low->getValue();
+  const APInt &HighCase = Clusters[Last].High->getValue();
+  assert(LowCase.getBitWidth() == HighCase.getBitWidth());
+
+  // FIXME: A range of consecutive cases has 100% density, but only requires one
+  // comparison to lower. We should discriminate against such consecutive ranges
+  // in jump tables.
+  return (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100) + 1;
+}
+
+uint64_t
+SwitchCG::getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases,
+                               unsigned First, unsigned Last) {
+  assert(Last >= First);
+  assert(TotalCases[Last] >= TotalCases[First]);
+  uint64_t NumCases =
+      TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]);
+  return NumCases;
+}
+
+void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
+                                              const SwitchInst *SI,
+                                              MachineBasicBlock *DefaultMBB) {
+#ifndef NDEBUG
+  // Clusters must be non-empty, sorted, and only contain Range clusters.
+  assert(!Clusters.empty());
+  for (CaseCluster &C : Clusters)
+    assert(C.Kind == CC_Range);
+  for (unsigned i = 1, e = Clusters.size(); i < e; ++i)
+    assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue()));
+#endif
+
+  assert(TLI && "TLI not set!");
+  if (!TLI->areJTsAllowed(SI->getParent()->getParent()))
+    return;
+
+  const unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries();
+  const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2;
+
+  // Bail if not enough cases.
+  const int64_t N = Clusters.size();
+  if (N < 2 || N < MinJumpTableEntries)
+    return;
+
+  // Accumulated number of cases in each cluster and those prior to it.
+  SmallVector<unsigned, 8> TotalCases(N);
+  for (unsigned i = 0; i < N; ++i) {
+    const APInt &Hi = Clusters[i].High->getValue();
+    const APInt &Lo = Clusters[i].Low->getValue();
+    TotalCases[i] = (Hi - Lo).getLimitedValue() + 1;
+    if (i != 0)
+      TotalCases[i] += TotalCases[i - 1];
+  }
+
+  uint64_t Range = getJumpTableRange(Clusters,0, N - 1);
+  uint64_t NumCases = getJumpTableNumCases(TotalCases, 0, N - 1);
+  assert(NumCases < UINT64_MAX / 100);
+  assert(Range >= NumCases);
+
+  // Cheap case: the whole range may be suitable for jump table.
+  if (TLI->isSuitableForJumpTable(SI, NumCases, Range)) {
+    CaseCluster JTCluster;
+    if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
+      Clusters[0] = JTCluster;
+      Clusters.resize(1);
+      return;
+    }
+  }
+
+  // The algorithm below is not suitable for -O0.
+  if (TM->getOptLevel() == CodeGenOpt::None)
+    return;
+
+  // Split Clusters into minimum number of dense partitions. The algorithm uses
+  // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code
+  // for the Case Statement'" (1994), but builds the MinPartitions array in
+  // reverse order to make it easier to reconstruct the partitions in ascending
+  // order. In the choice between two optimal partitionings, it picks the one
+  // which yields more jump tables.
+
+  // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
+  SmallVector<unsigned, 8> MinPartitions(N);
+  // LastElement[i] is the last element of the partition starting at i.
+  SmallVector<unsigned, 8> LastElement(N);
+  // PartitionsScore[i] is used to break ties when choosing between two
+  // partitionings resulting in the same number of partitions.
+  SmallVector<unsigned, 8> PartitionsScore(N);
+  // For PartitionsScore, a small number of comparisons is considered as good as
+  // a jump table and a single comparison is considered better than a jump
+  // table.
+  enum PartitionScores : unsigned {
+    NoTable = 0,
+    Table = 1,
+    FewCases = 1,
+    SingleCase = 2
+  };
+
+  // Base case: There is only one way to partition Clusters[N-1].
+  MinPartitions[N - 1] = 1;
+  LastElement[N - 1] = N - 1;
+  PartitionsScore[N - 1] = PartitionScores::SingleCase;
+
+  // Note: loop indexes are signed to avoid underflow.
+  for (int64_t i = N - 2; i >= 0; i--) {
+    // Find optimal partitioning of Clusters[i..N-1].
+    // Baseline: Put Clusters[i] into a partition on its own.
+    MinPartitions[i] = MinPartitions[i + 1] + 1;
+    LastElement[i] = i;
+    PartitionsScore[i] = PartitionsScore[i + 1] + PartitionScores::SingleCase;
+
+    // Search for a solution that results in fewer partitions.
+    for (int64_t j = N - 1; j > i; j--) {
+      // Try building a partition from Clusters[i..j].
+      Range = getJumpTableRange(Clusters, i, j);
+      NumCases = getJumpTableNumCases(TotalCases, i, j);
+      assert(NumCases < UINT64_MAX / 100);
+      assert(Range >= NumCases);
+
+      if (TLI->isSuitableForJumpTable(SI, NumCases, Range)) {
+        unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
+        unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1];
+        int64_t NumEntries = j - i + 1;
+
+        if (NumEntries == 1)
+          Score += PartitionScores::SingleCase;
+        else if (NumEntries <= SmallNumberOfEntries)
+          Score += PartitionScores::FewCases;
+        else if (NumEntries >= MinJumpTableEntries)
+          Score += PartitionScores::Table;
+
+        // If this leads to fewer partitions, or to the same number of
+        // partitions with better score, it is a better partitioning.
+        if (NumPartitions < MinPartitions[i] ||
+            (NumPartitions == MinPartitions[i] && Score > PartitionsScore[i])) {
+          MinPartitions[i] = NumPartitions;
+          LastElement[i] = j;
+          PartitionsScore[i] = Score;
+        }
+      }
+    }
+  }
+
+  // Iterate over the partitions, replacing some with jump tables in-place.
+  unsigned DstIndex = 0;
+  for (unsigned First = 0, Last; First < N; First = Last + 1) {
+    Last = LastElement[First];
+    assert(Last >= First);
+    assert(DstIndex <= First);
+    unsigned NumClusters = Last - First + 1;
+
+    CaseCluster JTCluster;
+    if (NumClusters >= MinJumpTableEntries &&
+        buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
+      Clusters[DstIndex++] = JTCluster;
+    } else {
+      for (unsigned I = First; I <= Last; ++I)
+        std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I]));
+    }
+  }
+  Clusters.resize(DstIndex);
+}
+
+bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters,
+                                              unsigned First, unsigned Last,
+                                              const SwitchInst *SI,
+                                              MachineBasicBlock *DefaultMBB,
+                                              CaseCluster &JTCluster) {
+  assert(First <= Last);
+
+  auto Prob = BranchProbability::getZero();
+  unsigned NumCmps = 0;
+  std::vector<MachineBasicBlock*> Table;
+  DenseMap<MachineBasicBlock*, BranchProbability> JTProbs;
+
+  // Initialize probabilities in JTProbs.
+  for (unsigned I = First; I <= Last; ++I)
+    JTProbs[Clusters[I].MBB] = BranchProbability::getZero();
+
+  for (unsigned I = First; I <= Last; ++I) {
+    assert(Clusters[I].Kind == CC_Range);
+    Prob += Clusters[I].Prob;
+    const APInt &Low = Clusters[I].Low->getValue();
+    const APInt &High = Clusters[I].High->getValue();
+    NumCmps += (Low == High) ? 1 : 2;
+    if (I != First) {
+      // Fill the gap between this and the previous cluster.
+      const APInt &PreviousHigh = Clusters[I - 1].High->getValue();
+      assert(PreviousHigh.slt(Low));
+      uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1;
+      for (uint64_t J = 0; J < Gap; J++)
+        Table.push_back(DefaultMBB);
+    }
+    uint64_t ClusterSize = (High - Low).getLimitedValue() + 1;
+    for (uint64_t J = 0; J < ClusterSize; ++J)
+      Table.push_back(Clusters[I].MBB);
+    JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
+  }
+
+  unsigned NumDests = JTProbs.size();
+  if (TLI->isSuitableForBitTests(NumDests, NumCmps,
+                                 Clusters[First].Low->getValue(),
+                                 Clusters[Last].High->getValue(), *DL)) {
+    // Clusters[First..Last] should be lowered as bit tests instead.
+    return false;
+  }
+
+  // Create the MBB that will load from and jump through the table.
+  // Note: We create it here, but it's not inserted into the function yet.
+  MachineFunction *CurMF = FuncInfo.MF;
+  MachineBasicBlock *JumpTableMBB =
+      CurMF->CreateMachineBasicBlock(SI->getParent());
+
+  // Add successors. Note: use table order for determinism.
+  SmallPtrSet<MachineBasicBlock *, 8> Done;
+  for (MachineBasicBlock *Succ : Table) {
+    if (Done.count(Succ))
+      continue;
+    addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]);
+    Done.insert(Succ);
+  }
+  JumpTableMBB->normalizeSuccProbs();
+
+  unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI->getJumpTableEncoding())
+                     ->createJumpTableIndex(Table);
+
+  // Set up the jump table info.
+  JumpTable JT(-1U, JTI, JumpTableMBB, nullptr);
+  JumpTableHeader JTH(Clusters[First].Low->getValue(),
+                      Clusters[Last].High->getValue(), SI->getCondition(),
+                      nullptr, false);
+  JTCases.emplace_back(std::move(JTH), std::move(JT));
+
+  JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High,
+                                     JTCases.size() - 1, Prob);
+  return true;
+}
+
+void SwitchCG::SwitchLowering::findBitTestClusters(CaseClusterVector &Clusters,
+                                                   const SwitchInst *SI) {
+  // Partition Clusters into as few subsets as possible, where each subset has a
+  // range that fits in a machine word and has <= 3 unique destinations.
+
+#ifndef NDEBUG
+  // Clusters must be sorted and contain Range or JumpTable clusters.
+  assert(!Clusters.empty());
+  assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable);
+  for (const CaseCluster &C : Clusters)
+    assert(C.Kind == CC_Range || C.Kind == CC_JumpTable);
+  for (unsigned i = 1; i < Clusters.size(); ++i)
+    assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue()));
+#endif
+
+  // The algorithm below is not suitable for -O0.
+  if (TM->getOptLevel() == CodeGenOpt::None)
+    return;
+
+  // If target does not have legal shift left, do not emit bit tests at all.
+  EVT PTy = TLI->getPointerTy(*DL);
+  if (!TLI->isOperationLegal(ISD::SHL, PTy))
+    return;
+
+  int BitWidth = PTy.getSizeInBits();
+  const int64_t N = Clusters.size();
+
+  // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
+  SmallVector<unsigned, 8> MinPartitions(N);
+  // LastElement[i] is the last element of the partition starting at i.
+  SmallVector<unsigned, 8> LastElement(N);
+
+  // FIXME: This might not be the best algorithm for finding bit test clusters.
+
+  // Base case: There is only one way to partition Clusters[N-1].
+  MinPartitions[N - 1] = 1;
+  LastElement[N - 1] = N - 1;
+
+  // Note: loop indexes are signed to avoid underflow.
+  for (int64_t i = N - 2; i >= 0; --i) {
+    // Find optimal partitioning of Clusters[i..N-1].
+    // Baseline: Put Clusters[i] into a partition on its own.
+    MinPartitions[i] = MinPartitions[i + 1] + 1;
+    LastElement[i] = i;
+
+    // Search for a solution that results in fewer partitions.
+    // Note: the search is limited by BitWidth, reducing time complexity.
+    for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) {
+      // Try building a partition from Clusters[i..j].
+
+      // Check the range.
+      if (!TLI->rangeFitsInWord(Clusters[i].Low->getValue(),
+                                Clusters[j].High->getValue(), *DL))
+        continue;
+
+      // Check nbr of destinations and cluster types.
+      // FIXME: This works, but doesn't seem very efficient.
+      bool RangesOnly = true;
+      BitVector Dests(FuncInfo.MF->getNumBlockIDs());
+      for (int64_t k = i; k <= j; k++) {
+        if (Clusters[k].Kind != CC_Range) {
+          RangesOnly = false;
+          break;
+        }
+        Dests.set(Clusters[k].MBB->getNumber());
+      }
+      if (!RangesOnly || Dests.count() > 3)
+        break;
+
+      // Check if it's a better partition.
+      unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
+      if (NumPartitions < MinPartitions[i]) {
+        // Found a better partition.
+        MinPartitions[i] = NumPartitions;
+        LastElement[i] = j;
+      }
+    }
+  }
+
+  // Iterate over the partitions, replacing with bit-test clusters in-place.
+  unsigned DstIndex = 0;
+  for (unsigned First = 0, Last; First < N; First = Last + 1) {
+    Last = LastElement[First];
+    assert(First <= Last);
+    assert(DstIndex <= First);
+
+    CaseCluster BitTestCluster;
+    if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) {
+      Clusters[DstIndex++] = BitTestCluster;
+    } else {
+      size_t NumClusters = Last - First + 1;
+      std::memmove(&Clusters[DstIndex], &Clusters[First],
+                   sizeof(Clusters[0]) * NumClusters);
+      DstIndex += NumClusters;
+    }
+  }
+  Clusters.resize(DstIndex);
+}
+
+bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters,
+                                             unsigned First, unsigned Last,
+                                             const SwitchInst *SI,
+                                             CaseCluster &BTCluster) {
+  assert(First <= Last);
+  if (First == Last)
+    return false;
+
+  BitVector Dests(FuncInfo.MF->getNumBlockIDs());
+  unsigned NumCmps = 0;
+  for (int64_t I = First; I <= Last; ++I) {
+    assert(Clusters[I].Kind == CC_Range);
+    Dests.set(Clusters[I].MBB->getNumber());
+    NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
+  }
+  unsigned NumDests = Dests.count();
+
+  APInt Low = Clusters[First].Low->getValue();
+  APInt High = Clusters[Last].High->getValue();
+  assert(Low.slt(High));
+
+  if (!TLI->isSuitableForBitTests(NumDests, NumCmps, Low, High, *DL))
+    return false;
+
+  APInt LowBound;
+  APInt CmpRange;
+
+  const int BitWidth = TLI->getPointerTy(*DL).getSizeInBits();
+  assert(TLI->rangeFitsInWord(Low, High, *DL) &&
+         "Case range must fit in bit mask!");
+
+  // Check if the clusters cover a contiguous range such that no value in the
+  // range will jump to the default statement.
+  bool ContiguousRange = true;
+  for (int64_t I = First + 1; I <= Last; ++I) {
+    if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) {
+      ContiguousRange = false;
+      break;
+    }
+  }
+
+  if (Low.isStrictlyPositive() && High.slt(BitWidth)) {
+    // Optimize the case where all the case values fit in a word without having
+    // to subtract minValue. In this case, we can optimize away the subtraction.
+    LowBound = APInt::getNullValue(Low.getBitWidth());
+    CmpRange = High;
+    ContiguousRange = false;
+  } else {
+    LowBound = Low;
+    CmpRange = High - Low;
+  }
+
+  CaseBitsVector CBV;
+  auto TotalProb = BranchProbability::getZero();
+  for (unsigned i = First; i <= Last; ++i) {
+    // Find the CaseBits for this destination.
+    unsigned j;
+    for (j = 0; j < CBV.size(); ++j)
+      if (CBV[j].BB == Clusters[i].MBB)
+        break;
+    if (j == CBV.size())
+      CBV.push_back(
+          CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero()));
+    CaseBits *CB = &CBV[j];
+
+    // Update Mask, Bits and ExtraProb.
+    uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue();
+    uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue();
+    assert(Hi >= Lo && Hi < 64 && "Invalid bit case!");
+    CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo;
+    CB->Bits += Hi - Lo + 1;
+    CB->ExtraProb += Clusters[i].Prob;
+    TotalProb += Clusters[i].Prob;
+  }
+
+  BitTestInfo BTI;
+  llvm::sort(CBV, [](const CaseBits &a, const CaseBits &b) {
+    // Sort by probability first, number of bits second, bit mask third.
+    if (a.ExtraProb != b.ExtraProb)
+      return a.ExtraProb > b.ExtraProb;
+    if (a.Bits != b.Bits)
+      return a.Bits > b.Bits;
+    return a.Mask < b.Mask;
+  });
+
+  for (auto &CB : CBV) {
+    MachineBasicBlock *BitTestBB =
+        FuncInfo.MF->CreateMachineBasicBlock(SI->getParent());
+    BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb));
+  }
+  BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange),
+                            SI->getCondition(), -1U, MVT::Other, false,
+                            ContiguousRange, nullptr, nullptr, std::move(BTI),
+                            TotalProb);
+
+  BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
+                                    BitTestCases.size() - 1, TotalProb);
+  return true;
+}
+
+void SwitchCG::sortAndRangeify(CaseClusterVector &Clusters) {
+#ifndef NDEBUG
+  for (const CaseCluster &CC : Clusters)
+    assert(CC.Low == CC.High && "Input clusters must be single-case");
+#endif
+
+  llvm::sort(Clusters, [](const CaseCluster &a, const CaseCluster &b) {
+    return a.Low->getValue().slt(b.Low->getValue());
+  });
+
+  // Merge adjacent clusters with the same destination.
+  const unsigned N = Clusters.size();
+  unsigned DstIndex = 0;
+  for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
+    CaseCluster &CC = Clusters[SrcIndex];
+    const ConstantInt *CaseVal = CC.Low;
+    MachineBasicBlock *Succ = CC.MBB;
+
+    if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
+        (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
+      // If this case has the same successor and is a neighbour, merge it into
+      // the previous cluster.
+      Clusters[DstIndex - 1].High = CaseVal;
+      Clusters[DstIndex - 1].Prob += CC.Prob;
+    } else {
+      std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
+                   sizeof(Clusters[SrcIndex]));
+    }
+  }
+  Clusters.resize(DstIndex);
+}
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 25cd7802264e..ba348b4a9d41 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -1,9 +1,8 @@
 //===- TailDuplication.cpp - Duplicate blocks into predecessors' tails ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp
index b118c176a897..a0590a8a6cc6 100644
--- a/lib/CodeGen/TailDuplicator.cpp
+++ b/lib/CodeGen/TailDuplicator.cpp
@@ -1,9 +1,8 @@
 //===- TailDuplicator.cpp - Duplicate blocks into predecessors' tails -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -435,7 +434,7 @@ void TailDuplicator::duplicateInstruction(
             if (NewRC == nullptr)
               NewRC = OrigRC;
             unsigned NewReg = MRI->createVirtualRegister(NewRC);
-            BuildMI(*PredBB, MI, MI->getDebugLoc(),
+            BuildMI(*PredBB, NewMI, NewMI.getDebugLoc(),
                     TII->get(TargetOpcode::COPY), NewReg)
                 .addReg(VI->second.Reg, 0, VI->second.SubReg);
             LocalVRMap.erase(VI);
@@ -558,7 +557,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
   unsigned MaxDuplicateCount;
   if (TailDupSize == 0 &&
       TailDuplicateSize.getNumOccurrences() == 0 &&
-      MF->getFunction().optForSize())
+      MF->getFunction().hasOptSize())
     MaxDuplicateCount = 1;
   else if (TailDupSize == 0)
     MaxDuplicateCount = TailDuplicateSize;
@@ -857,11 +856,6 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
     }
     appendCopies(PredBB, CopyInfos, Copies);
 
-    // Simplify
-    MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
-    SmallVector<MachineOperand, 4> PredCond;
-    TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond);
-
     NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch
 
     // Update the CFG.
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index cf78fb5a1f12..9c4483cb240d 100644
--- a/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -1,9 +1,8 @@
 //===- TargetFrameLoweringImpl.cpp - Implement target frame interface ------==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index 2a17af391105..868617ffe14d 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -1,9 +1,8 @@
 //===-- TargetInstrInfo.cpp - Target Instruction Information --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -86,11 +85,13 @@ static bool isAsmComment(const char *Str, const MCAsmInfo &MAI) {
 /// simple--i.e. not a logical or arithmetic expression--size values without
 /// the optional fill value. This is primarily used for creating arbitrary
 /// sized inline asm blocks for testing purposes.
-unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
-                                             const MCAsmInfo &MAI) const {
+unsigned TargetInstrInfo::getInlineAsmLength(
+  const char *Str,
+  const MCAsmInfo &MAI, const TargetSubtargetInfo *STI) const {
   // Count the number of instructions in the asm.
   bool AtInsnStart = true;
   unsigned Length = 0;
+  const unsigned MaxInstLength = MAI.getMaxInstLength(STI);
   for (; *Str; ++Str) {
     if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
                                 strlen(MAI.getSeparatorString())) == 0) {
@@ -102,7 +103,7 @@ unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
     }
 
     if (AtInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
-      unsigned AddLength = MAI.getMaxInstLength();
+      unsigned AddLength = MaxInstLength;
       if (strncmp(Str, ".space", 6) == 0) {
         char *EStr;
         int SpaceSize;
@@ -136,8 +137,14 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
   // Save off the debug loc before erasing the instruction.
   DebugLoc DL = Tail->getDebugLoc();
 
-  // Remove all the dead instructions from the end of MBB.
-  MBB->erase(Tail, MBB->end());
+  // Update call site info and remove all the dead instructions
+  // from the end of MBB.
+  while (Tail != MBB->end()) {
+    auto MI = Tail++;
+    if (MI->isCall())
+      MBB->getParent()->updateCallSiteInfo(&*MI);
+    MBB->erase(MI);
+  }
 
   // If MBB isn't immediately before MBB, insert a branch to it.
   if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest))
@@ -162,9 +169,9 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
   assert(MI.getOperand(Idx1).isReg() && MI.getOperand(Idx2).isReg() &&
          "This only knows how to commute register operands so far");
 
-  unsigned Reg0 = HasDef ? MI.getOperand(0).getReg() : 0;
-  unsigned Reg1 = MI.getOperand(Idx1).getReg();
-  unsigned Reg2 = MI.getOperand(Idx2).getReg();
+  Register Reg0 = HasDef ? MI.getOperand(0).getReg() : Register();
+  Register Reg1 = MI.getOperand(Idx1).getReg();
+  Register Reg2 = MI.getOperand(Idx2).getReg();
   unsigned SubReg0 = HasDef ? MI.getOperand(0).getSubReg() : 0;
   unsigned SubReg1 = MI.getOperand(Idx1).getSubReg();
   unsigned SubReg2 = MI.getOperand(Idx2).getSubReg();
@@ -523,7 +530,8 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
 
 MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
                                                  ArrayRef<unsigned> Ops, int FI,
-                                                 LiveIntervals *LIS) const {
+                                                 LiveIntervals *LIS,
+                                                 VirtRegMap *VRM) const {
   auto Flags = MachineMemOperand::MONone;
   for (unsigned OpIdx : Ops)
     Flags |= MI.getOperand(OpIdx).isDef() ? MachineMemOperand::MOStore
@@ -569,7 +577,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
       MBB->insert(MI, NewMI);
   } else {
     // Ask the target to do the actual folding.
-    NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS);
+    NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM);
   }
 
   if (NewMI) {
@@ -898,7 +906,8 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
     return true;
 
   // Avoid instructions obviously unsafe for remat.
-  if (MI.isNotDuplicable() || MI.mayStore() || MI.hasUnmodeledSideEffects())
+  if (MI.isNotDuplicable() || MI.mayStore() || MI.mayRaiseFPException() ||
+      MI.hasUnmodeledSideEffects())
     return false;
 
   // Don't remat inline asm. We have no idea how expensive it is
@@ -1010,7 +1019,7 @@ ScheduleHazardRecognizer *TargetInstrInfo::
 CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
                                const ScheduleDAG *DAG) const {
   return (ScheduleHazardRecognizer *)
-    new ScoreboardHazardRecognizer(II, DAG, "misched");
+    new ScoreboardHazardRecognizer(II, DAG, "machine-scheduler");
 }
 
 // Default implementation of CreateTargetPostRAHazardRecognizer.
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index e86190375642..9b28c1a6c450 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -1,9 +1,8 @@
 //===- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -74,8 +73,8 @@ static cl::opt<unsigned> MinimumJumpTableEntries
    cl::desc("Set minimum number of entries to use a jump table."));
 
 static cl::opt<unsigned> MaximumJumpTableSize
-  ("max-jump-table-size", cl::init(0), cl::Hidden,
-   cl::desc("Set maximum size of jump tables; zero for no limit."));
+  ("max-jump-table-size", cl::init(UINT_MAX), cl::Hidden,
+   cl::desc("Set maximum size of jump tables."));
 
 /// Minimum jump table density for normal functions.
 static cl::opt<unsigned>
@@ -124,6 +123,34 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
   for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC)
     setLibcallCallingConv((RTLIB::Libcall)LC, CallingConv::C);
 
+  // For IEEE quad-precision libcall names, PPC uses "kf" instead of "tf".
+  if (TT.getArch() == Triple::ppc || TT.isPPC64()) {
+    setLibcallName(RTLIB::ADD_F128, "__addkf3");
+    setLibcallName(RTLIB::SUB_F128, "__subkf3");
+    setLibcallName(RTLIB::MUL_F128, "__mulkf3");
+    setLibcallName(RTLIB::DIV_F128, "__divkf3");
+    setLibcallName(RTLIB::FPEXT_F32_F128, "__extendsfkf2");
+    setLibcallName(RTLIB::FPEXT_F64_F128, "__extenddfkf2");
+    setLibcallName(RTLIB::FPROUND_F128_F32, "__trunckfsf2");
+    setLibcallName(RTLIB::FPROUND_F128_F64, "__trunckfdf2");
+    setLibcallName(RTLIB::FPTOSINT_F128_I32, "__fixkfsi");
+    setLibcallName(RTLIB::FPTOSINT_F128_I64, "__fixkfdi");
+    setLibcallName(RTLIB::FPTOUINT_F128_I32, "__fixunskfsi");
+    setLibcallName(RTLIB::FPTOUINT_F128_I64, "__fixunskfdi");
+    setLibcallName(RTLIB::SINTTOFP_I32_F128, "__floatsikf");
+    setLibcallName(RTLIB::SINTTOFP_I64_F128, "__floatdikf");
+    setLibcallName(RTLIB::UINTTOFP_I32_F128, "__floatunsikf");
+    setLibcallName(RTLIB::UINTTOFP_I64_F128, "__floatundikf");
+    setLibcallName(RTLIB::OEQ_F128, "__eqkf2");
+    setLibcallName(RTLIB::UNE_F128, "__nekf2");
+    setLibcallName(RTLIB::OGE_F128, "__gekf2");
+    setLibcallName(RTLIB::OLT_F128, "__ltkf2");
+    setLibcallName(RTLIB::OLE_F128, "__lekf2");
+    setLibcallName(RTLIB::OGT_F128, "__gtkf2");
+    setLibcallName(RTLIB::UO_F128, "__unordkf2");
+    setLibcallName(RTLIB::O_F128, "__unordkf2");
+  }
+
   // A few names are different on particular architectures or environments.
   if (TT.isOSDarwin()) {
     // For f16/f32 conversions, Darwin uses the standard naming scheme, instead
@@ -546,7 +573,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
   JumpIsExpensive = JumpIsExpensiveOverride;
   PredictableSelectIsExpensive = false;
   EnableExtLdPromotion = false;
-  HasFloatingPointExceptions = true;
   StackPointerRegisterToSaveRestore = 0;
   BooleanContents = UndefinedBooleanContent;
   BooleanFloatContents = UndefinedBooleanContent;
@@ -583,6 +609,14 @@ void TargetLoweringBase::initActions() {
   std::fill(std::begin(TargetDAGCombineArray),
             std::end(TargetDAGCombineArray), 0);
 
+  for (MVT VT : MVT::fp_valuetypes()) {
+    MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
+    if (IntVT.isValid()) {
+      setOperationAction(ISD::ATOMIC_SWAP, VT, Promote);
+      AddPromotedToType(ISD::ATOMIC_SWAP, VT, IntVT);
+    }
+  }
+
   // Set default actions for various operations.
   for (MVT VT : MVT::all_valuetypes()) {
     // Default all indexed load / store to expand.
@@ -617,6 +651,8 @@ void TargetLoweringBase::initActions() {
     setOperationAction(ISD::SSUBSAT, VT, Expand);
     setOperationAction(ISD::USUBSAT, VT, Expand);
     setOperationAction(ISD::SMULFIX, VT, Expand);
+    setOperationAction(ISD::SMULFIXSAT, VT, Expand);
+    setOperationAction(ISD::UMULFIX, VT, Expand);
 
     // Overflow operations default to expand
     setOperationAction(ISD::SADDO, VT, Expand);
@@ -655,8 +691,51 @@ void TargetLoweringBase::initActions() {
       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
     }
 
+    // Constrained floating-point operations default to expand.
+    setOperationAction(ISD::STRICT_FADD, VT, Expand);
+    setOperationAction(ISD::STRICT_FSUB, VT, Expand);
+    setOperationAction(ISD::STRICT_FMUL, VT, Expand);
+    setOperationAction(ISD::STRICT_FDIV, VT, Expand);
+    setOperationAction(ISD::STRICT_FREM, VT, Expand);
+    setOperationAction(ISD::STRICT_FMA, VT, Expand);
+    setOperationAction(ISD::STRICT_FSQRT, VT, Expand);
+    setOperationAction(ISD::STRICT_FPOW, VT, Expand);
+    setOperationAction(ISD::STRICT_FPOWI, VT, Expand);
+    setOperationAction(ISD::STRICT_FSIN, VT, Expand);
+    setOperationAction(ISD::STRICT_FCOS, VT, Expand);
+    setOperationAction(ISD::STRICT_FEXP, VT, Expand);
+    setOperationAction(ISD::STRICT_FEXP2, VT, Expand);
+    setOperationAction(ISD::STRICT_FLOG, VT, Expand);
+    setOperationAction(ISD::STRICT_FLOG10, VT, Expand);
+    setOperationAction(ISD::STRICT_FLOG2, VT, Expand);
+    setOperationAction(ISD::STRICT_FRINT, VT, Expand);
+    setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand);
+    setOperationAction(ISD::STRICT_FCEIL, VT, Expand);
+    setOperationAction(ISD::STRICT_FFLOOR, VT, Expand);
+    setOperationAction(ISD::STRICT_FROUND, VT, Expand);
+    setOperationAction(ISD::STRICT_FTRUNC, VT, Expand);
+    setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand);
+    setOperationAction(ISD::STRICT_FMINNUM, VT, Expand);
+    setOperationAction(ISD::STRICT_FP_ROUND, VT, Expand);
+    setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand);
+
     // For most targets @llvm.get.dynamic.area.offset just returns 0.
     setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
+
+    // Vector reduction default to expand.
+    setOperationAction(ISD::VECREDUCE_FADD, VT, Expand);
+    setOperationAction(ISD::VECREDUCE_FMUL, VT, Expand);
+    setOperationAction(ISD::VECREDUCE_ADD, VT, Expand);
+    setOperationAction(ISD::VECREDUCE_MUL, VT, Expand);
+    setOperationAction(ISD::VECREDUCE_AND, VT, Expand);
+    setOperationAction(ISD::VECREDUCE_OR, VT, Expand);
+    setOperationAction(ISD::VECREDUCE_XOR, VT, Expand);
+    setOperationAction(ISD::VECREDUCE_SMAX, VT, Expand);
+    setOperationAction(ISD::VECREDUCE_SMIN, VT, Expand);
+    setOperationAction(ISD::VECREDUCE_UMAX, VT, Expand);
+    setOperationAction(ISD::VECREDUCE_UMIN, VT, Expand);
+    setOperationAction(ISD::VECREDUCE_FMAX, VT, Expand);
+    setOperationAction(ISD::VECREDUCE_FMIN, VT, Expand);
   }
 
   // Most targets ignore the @llvm.prefetch intrinsic.
@@ -688,6 +767,10 @@ void TargetLoweringBase::initActions() {
     setOperationAction(ISD::FRINT,      VT, Expand);
     setOperationAction(ISD::FTRUNC,     VT, Expand);
     setOperationAction(ISD::FROUND,     VT, Expand);
+    setOperationAction(ISD::LROUND,     VT, Expand);
+    setOperationAction(ISD::LLROUND,    VT, Expand);
+    setOperationAction(ISD::LRINT,      VT, Expand);
+    setOperationAction(ISD::LLRINT,     VT, Expand);
   }
 
   // Default ISD::TRAP to expand (which turns it into abort).
@@ -700,7 +783,7 @@ void TargetLoweringBase::initActions() {
 
 MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL,
                                                EVT) const {
-  return MVT::getIntegerVT(8 * DL.getPointerSize(0));
+  return MVT::getIntegerVT(DL.getPointerSizeInBits(0));
 }
 
 EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
@@ -985,16 +1068,16 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
     // Add a new memory operand for this FI.
     assert(MFI.getObjectOffset(FI) != -1);
 
-    auto Flags = MachineMemOperand::MOLoad;
-    if (MI->getOpcode() == TargetOpcode::STATEPOINT) {
-      Flags |= MachineMemOperand::MOStore;
-      Flags |= MachineMemOperand::MOVolatile;
+    // Note: STATEPOINT MMOs are added during SelectionDAG.  STACKMAP, and
+    // PATCHPOINT should be updated to do the same. (TODO)
+    if (MI->getOpcode() != TargetOpcode::STATEPOINT) {
+      auto Flags = MachineMemOperand::MOLoad;
+      MachineMemOperand *MMO = MF.getMachineMemOperand(
+          MachinePointerInfo::getFixedStack(MF, FI), Flags,
+          MF.getDataLayout().getPointerSize(), MFI.getObjectAlignment(FI));
+      MIB->addMemOperand(MF, MMO);
     }
-    MachineMemOperand *MMO = MF.getMachineMemOperand(
-        MachinePointerInfo::getFixedStack(MF, FI), Flags,
-        MF.getDataLayout().getPointerSize(), MFI.getObjectAlignment(FI));
-    MIB->addMemOperand(MF, MMO);
-
+    
     // Replace the instruction and update the operand index.
     MBB->insert(MachineBasicBlock::iterator(MI), MIB);
     OperIdx += (MIB->getNumOperands() - MI->getNumOperands()) - 1;
@@ -1393,7 +1476,7 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
       Flags.setZExt();
 
     for (unsigned i = 0; i < NumParts; ++i)
-      Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isFixed=*/true, 0, 0));
+      Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isfixed=*/true, 0, 0));
   }
 }
 
@@ -1409,6 +1492,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
                                             const DataLayout &DL, EVT VT,
                                             unsigned AddrSpace,
                                             unsigned Alignment,
+                                            MachineMemOperand::Flags Flags,
                                             bool *Fast) const {
   // Check if the specified alignment is sufficient based on the data layout.
   // TODO: While using the data layout works in practice, a better solution
@@ -1424,7 +1508,15 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
   }
 
   // This is a misaligned access.
-  return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast);
+  return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast);
+}
+
+bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
+                                            const DataLayout &DL, EVT VT,
+                                            const MachineMemOperand &MMO,
+                                            bool *Fast) const {
+  return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(),
+                            MMO.getAlignment(), MMO.getFlags(), Fast);
 }
 
 BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const {
@@ -1447,6 +1539,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
   case Switch:         return 0;
   case IndirectBr:     return 0;
   case Invoke:         return 0;
+  case CallBr:         return 0;
   case Resume:         return 0;
   case Unreachable:    return 0;
   case CleanupRet:     return 0;
@@ -1580,8 +1673,8 @@ Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
   // thread's unsafe stack pointer.
   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
   Type *StackPtrTy = Type::getInt8PtrTy(M->getContext());
-  Value *Fn = M->getOrInsertFunction("__safestack_pointer_address",
-                                     StackPtrTy->getPointerTo(0));
+  FunctionCallee Fn = M->getOrInsertFunction("__safestack_pointer_address",
+                                             StackPtrTy->getPointerTo(0));
   return IRB.CreateCall(Fn);
 }
 
@@ -1656,7 +1749,7 @@ Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {
   return M.getNamedValue("__stack_chk_guard");
 }
 
-Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
+Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
   return nullptr;
 }
 
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index cb2fe691d702..4c8f75b237aa 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -219,6 +218,16 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
       PersonalityEncoding = dwarf::DW_EH_PE_absptr;
       TTypeEncoding = dwarf::DW_EH_PE_absptr;
     }
+    CallSiteEncoding = dwarf::DW_EH_PE_udata4;
+    break;
+  case Triple::riscv32:
+  case Triple::riscv64:
+    LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+    PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+                          dwarf::DW_EH_PE_sdata4;
+    TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+                    dwarf::DW_EH_PE_sdata4;
+    CallSiteEncoding = dwarf::DW_EH_PE_udata4;
     break;
   case Triple::sparcv9:
     LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
@@ -272,6 +281,19 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
     }
   }
 
+  if (NamedMDNode *DependentLibraries = M.getNamedMetadata("llvm.dependent-libraries")) {
+    auto *S = C.getELFSection(".deplibs", ELF::SHT_LLVM_DEPENDENT_LIBRARIES,
+                              ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, "");
+
+    Streamer.SwitchSection(S);
+
+    for (const auto &Operand : DependentLibraries->operands()) {
+      Streamer.EmitBytes(
+          cast<MDString>(cast<MDNode>(Operand)->getOperand(0))->getString());
+      Streamer.EmitIntValue(0, 1);
+    }
+  }
+
   unsigned Version = 0;
   unsigned Flags = 0;
   StringRef Section;
@@ -1458,7 +1480,7 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
                                               const TargetMachine &TM) {
   TargetLoweringObjectFile::Initialize(Ctx, TM);
   const Triple &T = TM.getTargetTriple();
-  if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
+  if (T.isWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
     StaticCtorSection =
         Ctx.getCOFFSection(".CRT$XCU", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
                                            COFF::IMAGE_SCN_MEM_READ,
@@ -1484,7 +1506,7 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx,
                                                    unsigned Priority,
                                                    const MCSymbol *KeySym,
                                                    MCSectionCOFF *Default) {
-  if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
+  if (T.isWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
     // If the priority is the default, use .CRT$XCU, possibly associative.
     if (Priority == 65535)
       return Ctx.getAssociativeCOFFSection(Default, KeySym, 0);
@@ -1544,9 +1566,7 @@ const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference(
     const GlobalValue *LHS, const GlobalValue *RHS,
     const TargetMachine &TM) const {
   const Triple &T = TM.getTargetTriple();
-  if (!T.isKnownWindowsMSVCEnvironment() &&
-      !T.isWindowsItaniumEnvironment() &&
-      !T.isWindowsCoreCLREnvironment())
+  if (T.isOSCygMing())
     return nullptr;
 
   // Our symbols should exist in address space zero, cowardly no-op if
@@ -1694,8 +1714,11 @@ MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
     Group = C->getName();
   }
 
-  return getContext().getWasmSection(Name, Kind, Group,
-                                     MCContext::GenericSectionID);
+  MCSectionWasm* Section =
+      getContext().getWasmSection(Name, Kind, Group,
+                                  MCContext::GenericSectionID);
+
+  return Section;
 }
 
 static MCSectionWasm *selectWasmSectionForGlobal(
@@ -1724,6 +1747,7 @@ static MCSectionWasm *selectWasmSectionForGlobal(
     UniqueID = *NextUniqueID;
     (*NextUniqueID)++;
   }
+
   return Ctx.getWasmSection(Name, Kind, Group, UniqueID);
 }
 
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
index 3c133fb8594e..039748d817ca 100644
--- a/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -1,9 +1,8 @@
 //===-- TargetOptionsImpl.cpp - Options that apply to all targets ----------==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index 28126fcf766d..36df02692f86 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -1,9 +1,8 @@
 //===- TargetPassConfig.cpp - Target independent code generation passes ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -23,6 +22,7 @@
 #include "llvm/Analysis/ScopedNoAliasAA.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/CodeGen/CSEConfigBase.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachinePassRegistry.h"
 #include "llvm/CodeGen/Passes.h"
@@ -408,7 +408,7 @@ TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
     TM.Options.EnableIPRA = EnableIPRA;
   else {
     // If not explicitly specified, use target default.
-    TM.Options.EnableIPRA = TM.useIPRA();
+    TM.Options.EnableIPRA |= TM.useIPRA();
   }
 
   if (TM.Options.EnableIPRA)
@@ -646,7 +646,7 @@ void TargetPassConfig::addIRPasses() {
     // into optimally-sized loads and compares. The transforms are enabled by a
     // target lowering hook.
     if (!DisableMergeICmps)
-      addPass(createMergeICmpsPass());
+      addPass(createMergeICmpsLegacyPass());
     addPass(createExpandMemCmpPass());
   }
 
@@ -815,6 +815,13 @@ bool TargetPassConfig::addCoreISelPasses() {
   } else if (addInstSelector())
     return true;
 
+  // Expand pseudo-instructions emitted by ISel. Don't run the verifier before
+  // FinalizeISel.
+  addPass(&FinalizeISelID);
+
+  // Print the instruction selected machine code...
+  printAndVerify("After Instruction Selection");
+
   return false;
 }
 
@@ -874,12 +881,6 @@ void TargetPassConfig::addMachinePasses() {
     }
   }
 
-  // Print the instruction selected machine code...
-  printAndVerify("After Instruction Selection");
-
-  // Expand pseudo-instructions emitted by ISel.
-  addPass(&ExpandISelPseudosID);
-
   // Add passes that optimize machine instructions in SSA form.
   if (getOptLevel() != CodeGenOpt::None) {
     addMachineSSAOptimization();
@@ -898,13 +899,9 @@ void TargetPassConfig::addMachinePasses() {
   // Run register allocation and passes that are tightly coupled with it,
   // including phi elimination and scheduling.
   if (getOptimizeRegAlloc())
-    addOptimizedRegAlloc(createRegAllocPass(true));
-  else {
-    if (RegAlloc != &useDefaultRegisterAllocator &&
-        RegAlloc != &createFastRegisterAllocator)
-      report_fatal_error("Must use fast (default) register allocator for unoptimized regalloc.");
-    addFastRegAlloc(createRegAllocPass(false));
-  }
+    addOptimizedRegAlloc();
+  else
+    addFastRegAlloc();
 
   // Run post-ra passes.
   addPostRegAlloc();
@@ -1039,10 +1036,6 @@ bool TargetPassConfig::getOptimizeRegAlloc() const {
   llvm_unreachable("Invalid optimize-regalloc state");
 }
 
-/// RegisterRegAlloc's global Registry tracks allocator registration.
-MachinePassRegistry<RegisterRegAlloc::FunctionPassCtor>
-    RegisterRegAlloc::Registry;
-
 /// A dummy default pass factory indicates whether the register allocator is
 /// overridden on the command line.
 static llvm::once_flag InitializeDefaultRegisterAllocatorFlag;
@@ -1053,12 +1046,8 @@ defaultRegAlloc("default",
                 useDefaultRegisterAllocator);
 
 static void initializeDefaultRegisterAllocatorOnce() {
-  RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
-
-  if (!Ctor) {
-    Ctor = RegAlloc;
+  if (!RegisterRegAlloc::getDefault())
     RegisterRegAlloc::setDefault(RegAlloc);
-  }
 }
 
 /// Instantiate the default register allocator pass for this target for either
@@ -1098,6 +1087,33 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
   return createTargetRegisterAllocator(Optimized);
 }
 
+bool TargetPassConfig::addRegAssignmentFast() {
+  if (RegAlloc != &useDefaultRegisterAllocator &&
+      RegAlloc != &createFastRegisterAllocator)
+    report_fatal_error("Must use fast (default) register allocator for unoptimized regalloc.");
+
+  addPass(createRegAllocPass(false));
+  return true;
+}
+
+bool TargetPassConfig::addRegAssignmentOptimized() {
+  // Add the selected register allocation pass.
+  addPass(createRegAllocPass(true));
+
+  // Allow targets to change the register assignments before rewriting.
+  addPreRewrite();
+
+  // Finally rewrite virtual registers.
+  addPass(&VirtRegRewriterID);
+  // Perform stack slot coloring and post-ra machine LICM.
+  //
+  // FIXME: Re-enable coloring with register when it's capable of adding
+  // kill markers.
+  addPass(&StackSlotColoringID);
+
+  return true;
+}
+
 /// Return true if the default global register allocator is in use and
 /// has not be overriden on the command line with '-regalloc=...'
 bool TargetPassConfig::usingDefaultRegAlloc() const {
@@ -1106,18 +1122,17 @@ bool TargetPassConfig::usingDefaultRegAlloc() const {
 
 /// Add the minimum set of target-independent passes that are required for
 /// register allocation. No coalescing or scheduling.
-void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+void TargetPassConfig::addFastRegAlloc() {
   addPass(&PHIEliminationID, false);
   addPass(&TwoAddressInstructionPassID, false);
 
-  if (RegAllocPass)
-    addPass(RegAllocPass);
+  addRegAssignmentFast();
 }
 
 /// Add standard target-independent passes that are tightly coupled with
 /// optimized register allocation, including coalescing, machine instruction
 /// scheduling, and register allocation itself.
-void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+void TargetPassConfig::addOptimizedRegAlloc() {
   addPass(&DetectDeadLanesID, false);
 
   addPass(&ProcessImplicitDefsID, false);
@@ -1149,21 +1164,10 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
   // PreRA instruction scheduling.
   addPass(&MachineSchedulerID);
 
-  if (RegAllocPass) {
-    // Add the selected register allocation pass.
-    addPass(RegAllocPass);
-
-    // Allow targets to change the register assignments before rewriting.
-    addPreRewrite();
-
-    // Finally rewrite virtual registers.
-    addPass(&VirtRegRewriterID);
-
-    // Perform stack slot coloring and post-ra machine LICM.
-    //
-    // FIXME: Re-enable coloring with register when it's capable of adding
-    // kill markers.
-    addPass(&StackSlotColoringID);
+  if (addRegAssignmentOptimized()) {
+    // Allow targets to expand pseudo instructions depending on the choice of
+    // registers before MachineCopyPropagation.
+    addPostRewrite();
 
     // Copy propagate to forward register uses and try to eliminate COPYs that
     // were not coalesced.
@@ -1221,3 +1225,11 @@ bool TargetPassConfig::isGlobalISelAbortEnabled() const {
 bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const {
   return TM->Options.GlobalISelAbort == GlobalISelAbortMode::DisableWithDiag;
 }
+
+bool TargetPassConfig::isGISelCSEEnabled() const {
+  return true;
+}
+
+std::unique_ptr<CSEConfigBase> TargetPassConfig::getCSEConfig() const {
+  return make_unique<CSEConfigBase>();
+}
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index 661dc18f7a85..f1b2ecf3243b 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -1,9 +1,8 @@
 //==- TargetRegisterInfo.cpp - Target Register Information Implementation --==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,6 +13,7 @@
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -398,6 +398,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
   const std::pair<unsigned, SmallVector<unsigned, 4>> &Hints_MRI =
     MRI.getRegAllocationHints(VirtReg);
 
+  SmallSet<unsigned, 32> HintedRegs;
   // First hint may be a target hint.
   bool Skip = (Hints_MRI.first != 0);
   for (auto Reg : Hints_MRI.second) {
@@ -411,6 +412,10 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
     if (VRM && isVirtualRegister(Phys))
       Phys = VRM->getPhys(Phys);
 
+    // Don't add the same reg twice (Hints_MRI may contain multiple virtual
+    // registers allocated to the same physreg).
+    if (!HintedRegs.insert(Phys).second)
+      continue;
     // Check that Phys is a valid hint in VirtReg's register class.
     if (!isPhysicalRegister(Phys))
       continue;
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index 3cff31ad4933..195279719ad4 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -1,9 +1,8 @@
 //===- llvm/Target/TargetSchedule.cpp - Sched Machine Model ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/CodeGen/TargetSubtargetInfo.cpp b/lib/CodeGen/TargetSubtargetInfo.cpp
index fa29c05fd6c2..59eb2f9c88cb 100644
--- a/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -1,9 +1,8 @@
 //===- TargetSubtargetInfo.cpp - General Target Information ----------------==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,24 +11,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-#include <string>
 
 using namespace llvm;
 
 TargetSubtargetInfo::TargetSubtargetInfo(
     const Triple &TT, StringRef CPU, StringRef FS,
-    ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD,
-    const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR,
+    ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetSubTypeKV> PD,
+    const MCWriteProcResEntry *WPR,
     const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA,
     const InstrStage *IS, const unsigned *OC, const unsigned *FP)
-    : MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP) {
+    : MCSubtargetInfo(TT, CPU, FS, PF, PD, WPR, WL, RA, IS, OC, FP) {
 }
 
 TargetSubtargetInfo::~TargetSubtargetInfo() = default;
@@ -67,50 +58,4 @@ bool TargetSubtargetInfo::useAA() const {
   return false;
 }
 
-static std::string createSchedInfoStr(unsigned Latency, double RThroughput) {
-  static const char *SchedPrefix = " sched: [";
-  std::string Comment;
-  raw_string_ostream CS(Comment);
-  if (RThroughput != 0.0)
-    CS << SchedPrefix << Latency << format(":%2.2f", RThroughput)
-       << "]";
-  else
-    CS << SchedPrefix << Latency << ":?]";
-  CS.flush();
-  return Comment;
-}
-
-/// Returns string representation of scheduler comment
-std::string TargetSubtargetInfo::getSchedInfoStr(const MachineInstr &MI) const {
-  if (MI.isPseudo() || MI.isTerminator())
-    return std::string();
-  // We don't cache TSchedModel because it depends on TargetInstrInfo
-  // that could be changed during the compilation
-  TargetSchedModel TSchedModel;
-  TSchedModel.init(this);
-  unsigned Latency = TSchedModel.computeInstrLatency(&MI);
-  double RThroughput = TSchedModel.computeReciprocalThroughput(&MI);
-  return createSchedInfoStr(Latency, RThroughput);
-}
-
-/// Returns string representation of scheduler comment
-std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const {
-  // We don't cache TSchedModel because it depends on TargetInstrInfo
-  // that could be changed during the compilation
-  TargetSchedModel TSchedModel;
-  TSchedModel.init(this);
-  unsigned Latency;
-  if (TSchedModel.hasInstrSchedModel())
-    Latency = TSchedModel.computeInstrLatency(MCI);
-  else if (TSchedModel.hasInstrItineraries()) {
-    auto *ItinData = TSchedModel.getInstrItineraries();
-    Latency = ItinData->getStageLatency(
-        getInstrInfo()->get(MCI.getOpcode()).getSchedClass());
-  } else
-    return std::string();
-  double RThroughput = TSchedModel.computeReciprocalThroughput(MCI);
-  return createSchedInfoStr(Latency, RThroughput);
-}
-
-void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const {
-}
+void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const { }
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 4b72f6a84ca1..43d876646967 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1,9 +1,8 @@
 //===- TwoAddressInstructionPass.cpp - Two-Address instruction pass -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1245,8 +1244,13 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
         ++NumAggrCommuted;
         // There might be more than two commutable operands, update BaseOp and
         // continue scanning.
+        // FIXME: This assumes that the new instruction's operands are in the
+        // same positions and were simply swapped.
         BaseOpReg = OtherOpReg;
         BaseOpKilled = OtherOpKilled;
+        // Resamples OpsNum in case the number of operands was reduced. This
+        // happens with X86.
+        OpsNum = MI->getDesc().getNumOperands();
         continue;
       }
       // If this was a commute based on kill, we won't do better continuing.
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 5288ca672774..177bab32bccc 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -1,9 +1,8 @@
 //===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -38,43 +37,13 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 using namespace llvm;
 
-static bool eliminateUnreachableBlock(Function &F) {
-  df_iterator_default_set<BasicBlock*> Reachable;
-
-  // Mark all reachable blocks.
-  for (BasicBlock *BB : depth_first_ext(&F, Reachable))
-    (void)BB/* Mark all reachable blocks */;
-
-  // Loop over all dead blocks, remembering them and deleting all instructions
-  // in them.
-  std::vector<BasicBlock*> DeadBlocks;
-  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
-    if (!Reachable.count(&*I)) {
-      BasicBlock *BB = &*I;
-      DeadBlocks.push_back(BB);
-      while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
-        PN->replaceAllUsesWith(Constant::getNullValue(PN->getType()));
-        BB->getInstList().pop_front();
-      }
-      for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
-        (*SI)->removePredecessor(BB);
-      BB->dropAllReferences();
-    }
-
-  // Actually remove the blocks now.
-  for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
-    DeadBlocks[i]->eraseFromParent();
-  }
-
-  return !DeadBlocks.empty();
-}
-
 namespace {
 class UnreachableBlockElimLegacyPass : public FunctionPass {
   bool runOnFunction(Function &F) override {
-    return eliminateUnreachableBlock(F);
+    return llvm::EliminateUnreachableBlocks(F);
   }
 
 public:
@@ -99,7 +68,7 @@ FunctionPass *llvm::createUnreachableBlockEliminationPass() {
 
 PreservedAnalyses UnreachableBlockElimPass::run(Function &F,
                                                 FunctionAnalysisManager &AM) {
-  bool Changed = eliminateUnreachableBlock(F);
+  bool Changed = llvm::EliminateUnreachableBlocks(F);
   if (!Changed)
     return PreservedAnalyses::all();
   PreservedAnalyses PA;
diff --git a/lib/CodeGen/ValueTypes.cpp b/lib/CodeGen/ValueTypes.cpp
index adb7075de651..a911cdcbec9d 100644
--- a/lib/CodeGen/ValueTypes.cpp
+++ b/lib/CodeGen/ValueTypes.cpp
@@ -1,9 +1,8 @@
 //===----------- ValueTypes.cpp - Implementation of EVT methods -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -166,11 +165,18 @@ std::string EVT::getEVTString() const {
   case MVT::v128i16: return "v128i16";
   case MVT::v1i32:   return "v1i32";
   case MVT::v2i32:   return "v2i32";
+  case MVT::v3i32:   return "v3i32";
   case MVT::v4i32:   return "v4i32";
+  case MVT::v5i32:   return "v5i32";
   case MVT::v8i32:   return "v8i32";
   case MVT::v16i32:  return "v16i32";
   case MVT::v32i32:  return "v32i32";
   case MVT::v64i32:  return "v64i32";
+  case MVT::v128i32: return "v128i32";
+  case MVT::v256i32: return "v256i32";
+  case MVT::v512i32: return "v512i32";
+  case MVT::v1024i32:return "v1024i32";
+  case MVT::v2048i32:return "v2048i32";
   case MVT::v1i64:   return "v1i64";
   case MVT::v2i64:   return "v2i64";
   case MVT::v4i64:   return "v4i64";
@@ -183,16 +189,25 @@ std::string EVT::getEVTString() const {
   case MVT::v2f16:   return "v2f16";
   case MVT::v4f16:   return "v4f16";
   case MVT::v8f16:   return "v8f16";
+  case MVT::v3f32:   return "v3f32";
   case MVT::v4f32:   return "v4f32";
+  case MVT::v5f32:   return "v5f32";
   case MVT::v8f32:   return "v8f32";
   case MVT::v16f32:  return "v16f32";
+  case MVT::v32f32:  return "v32f32";
+  case MVT::v64f32:  return "v64f32";
+  case MVT::v128f32: return "v128f32";
+  case MVT::v256f32: return "v256f32";
+  case MVT::v512f32: return "v512f32";
+  case MVT::v1024f32:return "v1024f32";
+  case MVT::v2048f32:return "v2048f32";
   case MVT::v1f64:   return "v1f64";
   case MVT::v2f64:   return "v2f64";
   case MVT::v4f64:   return "v4f64";
   case MVT::v8f64:   return "v8f64";
   case MVT::Metadata:return "Metadata";
   case MVT::Untyped: return "Untyped";
-  case MVT::ExceptRef: return "ExceptRef";
+  case MVT::exnref : return "exnref";
   }
 }
 
@@ -247,11 +262,18 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
   case MVT::v128i16: return VectorType::get(Type::getInt16Ty(Context), 128);
   case MVT::v1i32:   return VectorType::get(Type::getInt32Ty(Context), 1);
   case MVT::v2i32:   return VectorType::get(Type::getInt32Ty(Context), 2);
+  case MVT::v3i32:   return VectorType::get(Type::getInt32Ty(Context), 3);
   case MVT::v4i32:   return VectorType::get(Type::getInt32Ty(Context), 4);
+  case MVT::v5i32:   return VectorType::get(Type::getInt32Ty(Context), 5);
   case MVT::v8i32:   return VectorType::get(Type::getInt32Ty(Context), 8);
   case MVT::v16i32:  return VectorType::get(Type::getInt32Ty(Context), 16);
   case MVT::v32i32:  return VectorType::get(Type::getInt32Ty(Context), 32);
   case MVT::v64i32:  return VectorType::get(Type::getInt32Ty(Context), 64);
+  case MVT::v128i32: return VectorType::get(Type::getInt32Ty(Context), 128);
+  case MVT::v256i32: return VectorType::get(Type::getInt32Ty(Context), 256);
+  case MVT::v512i32: return VectorType::get(Type::getInt32Ty(Context), 512);
+  case MVT::v1024i32:return VectorType::get(Type::getInt32Ty(Context), 1024);
+  case MVT::v2048i32:return VectorType::get(Type::getInt32Ty(Context), 2048);
   case MVT::v1i64:   return VectorType::get(Type::getInt64Ty(Context), 1);
   case MVT::v2i64:   return VectorType::get(Type::getInt64Ty(Context), 2);
   case MVT::v4i64:   return VectorType::get(Type::getInt64Ty(Context), 4);
@@ -264,9 +286,18 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
   case MVT::v8f16:   return VectorType::get(Type::getHalfTy(Context), 8);
   case MVT::v1f32:   return VectorType::get(Type::getFloatTy(Context), 1);
   case MVT::v2f32:   return VectorType::get(Type::getFloatTy(Context), 2);
+  case MVT::v3f32:   return VectorType::get(Type::getFloatTy(Context), 3);
   case MVT::v4f32:   return VectorType::get(Type::getFloatTy(Context), 4);
+  case MVT::v5f32:   return VectorType::get(Type::getFloatTy(Context), 5);
   case MVT::v8f32:   return VectorType::get(Type::getFloatTy(Context), 8);
-  case MVT::v16f32:   return VectorType::get(Type::getFloatTy(Context), 16);
+  case MVT::v16f32:  return VectorType::get(Type::getFloatTy(Context), 16);
+  case MVT::v32f32:  return VectorType::get(Type::getFloatTy(Context), 32);
+  case MVT::v64f32:  return VectorType::get(Type::getFloatTy(Context), 64);
+  case MVT::v128f32: return VectorType::get(Type::getFloatTy(Context), 128);
+  case MVT::v256f32: return VectorType::get(Type::getFloatTy(Context), 256);
+  case MVT::v512f32: return VectorType::get(Type::getFloatTy(Context), 512);
+  case MVT::v1024f32:return VectorType::get(Type::getFloatTy(Context), 1024);
+  case MVT::v2048f32:return VectorType::get(Type::getFloatTy(Context), 2048);
   case MVT::v1f64:   return VectorType::get(Type::getDoubleTy(Context), 1);
   case MVT::v2f64:   return VectorType::get(Type::getDoubleTy(Context), 2);
   case MVT::v4f64:   return VectorType::get(Type::getDoubleTy(Context), 4);
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index ed7bef667e77..4a06704a8876 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -1,9 +1,8 @@
 //===- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -385,7 +384,7 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
   // give us additional liveness information: The target (super-)register
   // must not be valid before this point. Replace the COPY with a KILL
   // instruction to maintain this information.
-  if (MI.getOperand(0).isUndef() || MI.getNumOperands() > 2) {
+  if (MI.getOperand(1).isUndef() || MI.getNumOperands() > 2) {
     MI.setDesc(TII->get(TargetOpcode::KILL));
     LLVM_DEBUG(dbgs() << "  replace by: " << MI);
     return;
diff --git a/lib/CodeGen/WasmEHPrepare.cpp b/lib/CodeGen/WasmEHPrepare.cpp
index e5002eb95346..865a1cfbf43a 100644
--- a/lib/CodeGen/WasmEHPrepare.cpp
+++ b/lib/CodeGen/WasmEHPrepare.cpp
@@ -1,14 +1,14 @@
 //===-- WasmEHPrepare - Prepare excepton handling for WebAssembly --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This transformation is designed for use by code generators which use
-// WebAssembly exception handling scheme.
+// WebAssembly exception handling scheme. This currently supports C++
+// exceptions.
 //
 // WebAssembly exception handling uses Windows exception IR for the middle level
 // representation. This pass does the following transformation for every
@@ -23,53 +23,20 @@
 //
 // - After:
 //   catchpad ...
-//   exn = wasm.catch(0); // 0 is a tag for C++
-//   wasm.landingpad.index(index);
+//   exn = wasm.extract.exception();
 //   // Only add below in case it's not a single catch (...)
+//   wasm.landingpad.index(index);
 //   __wasm_lpad_context.lpad_index = index;
 //   __wasm_lpad_context.lsda = wasm.lsda();
 //   _Unwind_CallPersonality(exn);
-//   int selector = __wasm.landingpad_context.selector;
+//   selector = __wasm.landingpad_context.selector;
 //   ...
 //
-// Also, does the following for a cleanuppad block with a call to
-// __clang_call_terminate():
-// - Before:
-//   cleanuppad ...
-//   exn = wasm.get.exception();
-//   __clang_call_terminate(exn);
-//
-// - After:
-//   cleanuppad ...
-//   exn = wasm.catch(0); // 0 is a tag for C++
-//   __clang_call_terminate(exn);
-//
-//
-// * Background: WebAssembly EH instructions
-// WebAssembly's try and catch instructions are structured as follows:
-// try
-//   instruction*
-// catch (C++ tag)
-//   instruction*
-// ...
-// catch_all
-//   instruction*
-// try_end
-//
-// A catch instruction in WebAssembly does not correspond to a C++ catch clause.
-// In WebAssembly, there is a single catch instruction for all C++ exceptions.
-// There can be more catch instructions for exceptions in other languages, but
-// they are not generated for now. catch_all catches all exceptions including
-// foreign exceptions (e.g. JavaScript). We turn catchpads into catch (C++ tag)
-// and cleanuppads into catch_all, with one exception: cleanuppad with a call to
-// __clang_call_terminate should be both in catch (C++ tag) and catch_all.
-//
 //
 // * Background: Direct personality function call
 // In WebAssembly EH, the VM is responsible for unwinding the stack once an
 // exception is thrown. After the stack is unwound, the control flow is
-// transfered to WebAssembly 'catch' instruction, which returns a caught
-// exception object.
+// transfered to WebAssembly 'catch' instruction.
 //
 // Unwinding the stack is not done by libunwind but the VM, so the personality
 // function in libcxxabi cannot be called from libunwind during the unwinding
@@ -137,19 +104,19 @@ class WasmEHPrepare : public FunctionPass {
   Value *LSDAField = nullptr;      // lsda field
   Value *SelectorField = nullptr;  // selector
 
-  Function *ThrowF = nullptr;           // wasm.throw() intrinsic
-  Function *CatchF = nullptr;           // wasm.catch.extract() intrinsic
-  Function *LPadIndexF = nullptr;       // wasm.landingpad.index() intrinsic
-  Function *LSDAF = nullptr;            // wasm.lsda() intrinsic
-  Function *GetExnF = nullptr;          // wasm.get.exception() intrinsic
-  Function *GetSelectorF = nullptr;     // wasm.get.ehselector() intrinsic
-  Function *CallPersonalityF = nullptr; // _Unwind_CallPersonality() wrapper
-  Function *ClangCallTermF = nullptr;   // __clang_call_terminate() function
+  Function *ThrowF = nullptr;       // wasm.throw() intrinsic
+  Function *LPadIndexF = nullptr;   // wasm.landingpad.index() intrinsic
+  Function *LSDAF = nullptr;        // wasm.lsda() intrinsic
+  Function *GetExnF = nullptr;      // wasm.get.exception() intrinsic
+  Function *ExtractExnF = nullptr;  // wasm.extract.exception() intrinsic
+  Function *GetSelectorF = nullptr; // wasm.get.ehselector() intrinsic
+  FunctionCallee CallPersonalityF =
+      nullptr; // _Unwind_CallPersonality() wrapper
 
   bool prepareEHPads(Function &F);
   bool prepareThrows(Function &F);
 
-  void prepareEHPad(BasicBlock *BB, unsigned Index);
+  void prepareEHPad(BasicBlock *BB, bool NeedLSDA, unsigned Index = 0);
   void prepareTerminateCleanupPad(BasicBlock *BB);
 
 public:
@@ -209,14 +176,12 @@ bool WasmEHPrepare::prepareThrows(Function &F) {
 
   // wasm.throw() intinsic, which will be lowered to wasm 'throw' instruction.
   ThrowF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_throw);
-
   // Insert an unreachable instruction after a call to @llvm.wasm.throw and
   // delete all following instructions within the BB, and delete all the dead
   // children of the BB as well.
   for (User *U : ThrowF->users()) {
-    // A call to @llvm.wasm.throw() is only generated from
-    // __builtin_wasm_throw() builtin call within libcxxabi, and cannot be an
-    // InvokeInst.
+    // A call to @llvm.wasm.throw() is only generated from __cxa_throw()
+    // builtin call within libcxxabi, and cannot be an InvokeInst.
     auto *ThrowI = cast<CallInst>(U);
     if (ThrowI->getFunction() != &F)
       continue;
@@ -263,8 +228,6 @@ bool WasmEHPrepare::prepareEHPads(Function &F) {
   SelectorField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 2,
                                          "selector_gep");
 
-  // wasm.catch() intinsic, which will be lowered to wasm 'catch' instruction.
-  CatchF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_catch);
   // wasm.landingpad.index() intrinsic, which is to specify landingpad index
   LPadIndexF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_landingpad_index);
   // wasm.lsda() intrinsic. Returns the address of LSDA table for the current
@@ -275,14 +238,18 @@ bool WasmEHPrepare::prepareEHPads(Function &F) {
   GetExnF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_exception);
   GetSelectorF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_ehselector);
 
-  // _Unwind_CallPersonality() wrapper function, which calls the personality
-  CallPersonalityF = cast<Function>(M.getOrInsertFunction(
-      "_Unwind_CallPersonality", IRB.getInt32Ty(), IRB.getInt8PtrTy()));
-  CallPersonalityF->setDoesNotThrow();
+  // wasm.extract.exception() is the same as wasm.get.exception() but it does
+  // not take a token argument. This will be lowered down to EXTRACT_EXCEPTION
+  // pseudo instruction in instruction selection, which will be expanded using
+  // 'br_on_exn' instruction later.
+  ExtractExnF =
+      Intrinsic::getDeclaration(&M, Intrinsic::wasm_extract_exception);
 
-  // __clang_call_terminate() function, which is inserted by clang in case a
-  // cleanup throws
-  ClangCallTermF = M.getFunction("__clang_call_terminate");
+  // _Unwind_CallPersonality() wrapper function, which calls the personality
+  CallPersonalityF = M.getOrInsertFunction(
+      "_Unwind_CallPersonality", IRB.getInt32Ty(), IRB.getInt8PtrTy());
+  if (Function *F = dyn_cast<Function>(CallPersonalityF.getCallee()))
+    F->setDoesNotThrow();
 
   unsigned Index = 0;
   for (auto *BB : CatchPads) {
@@ -290,60 +257,52 @@ bool WasmEHPrepare::prepareEHPads(Function &F) {
     // In case of a single catch (...), we don't need to emit LSDA
     if (CPI->getNumArgOperands() == 1 &&
         cast<Constant>(CPI->getArgOperand(0))->isNullValue())
-      prepareEHPad(BB, -1);
+      prepareEHPad(BB, false);
     else
-      prepareEHPad(BB, Index++);
+      prepareEHPad(BB, true, Index++);
   }
 
-  if (!ClangCallTermF)
-    return !CatchPads.empty();
-
-  // Cleanuppads will turn into catch_all later, but cleanuppads with a call to
-  // __clang_call_terminate() is a special case. __clang_call_terminate() takes
-  // an exception object, so we have to duplicate call in both 'catch <C++ tag>'
-  // and 'catch_all' clauses. Here we only insert a call to catch; the
-  // duplication will be done later. In catch_all, the exception object will be
-  // set to null.
+  // Cleanup pads don't need LSDA.
   for (auto *BB : CleanupPads)
-    for (auto &I : *BB)
-      if (auto *CI = dyn_cast<CallInst>(&I))
-        if (CI->getCalledValue() == ClangCallTermF)
-          prepareEHPad(BB, -1);
+    prepareEHPad(BB, false);
 
   return true;
 }
 
-void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) {
+// Prepare an EH pad for Wasm EH handling. If NeedLSDA is false, Index is
+// ignored.
+void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedLSDA,
+                                 unsigned Index) {
   assert(BB->isEHPad() && "BB is not an EHPad!");
   IRBuilder<> IRB(BB->getContext());
-
   IRB.SetInsertPoint(&*BB->getFirstInsertionPt());
-  // The argument to wasm.catch() is the tag for C++ exceptions, which we set to
-  // 0 for this module.
-  // Pseudocode: void *exn = wasm.catch(0);
-  Instruction *Exn = IRB.CreateCall(CatchF, IRB.getInt32(0), "exn");
-  // Replace the return value of wasm.get.exception() with the return value from
-  // wasm.catch().
+
   auto *FPI = cast<FuncletPadInst>(BB->getFirstNonPHI());
   Instruction *GetExnCI = nullptr, *GetSelectorCI = nullptr;
   for (auto &U : FPI->uses()) {
     if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
       if (CI->getCalledValue() == GetExnF)
         GetExnCI = CI;
-      else if (CI->getCalledValue() == GetSelectorF)
+      if (CI->getCalledValue() == GetSelectorF)
         GetSelectorCI = CI;
     }
   }
 
-  assert(GetExnCI && "wasm.get.exception() call does not exist");
-  GetExnCI->replaceAllUsesWith(Exn);
+  // Cleanup pads w/o __clang_call_terminate call do not have any of
+  // wasm.get.exception() or wasm.get.ehselector() calls. We need to do nothing.
+  if (!GetExnCI) {
+    assert(!GetSelectorCI &&
+           "wasm.get.ehselector() cannot exist w/o wasm.get.exception()");
+    return;
+  }
+
+  Instruction *ExtractExnCI = IRB.CreateCall(ExtractExnF, {}, "exn");
+  GetExnCI->replaceAllUsesWith(ExtractExnCI);
   GetExnCI->eraseFromParent();
 
   // In case it is a catchpad with single catch (...) or a cleanuppad, we don't
   // need to call personality function because we don't need a selector.
-  if (FPI->getNumArgOperands() == 0 ||
-      (FPI->getNumArgOperands() == 1 &&
-       cast<Constant>(FPI->getArgOperand(0))->isNullValue())) {
+  if (!NeedLSDA) {
     if (GetSelectorCI) {
       assert(GetSelectorCI->use_empty() &&
              "wasm.get.ehselector() still has uses!");
@@ -351,7 +310,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) {
     }
     return;
   }
-  IRB.SetInsertPoint(Exn->getNextNode());
+  IRB.SetInsertPoint(ExtractExnCI->getNextNode());
 
   // This is to create a map of <landingpad EH label, landingpad index> in
   // SelectionDAGISel, which is to be used in EHStreamer to emit LSDA tables.
@@ -373,12 +332,13 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) {
     IRB.CreateStore(IRB.CreateCall(LSDAF), LSDAField);
 
   // Pseudocode: _Unwind_CallPersonality(exn);
-  CallInst *PersCI =
-      IRB.CreateCall(CallPersonalityF, Exn, OperandBundleDef("funclet", CPI));
+  CallInst *PersCI = IRB.CreateCall(CallPersonalityF, ExtractExnCI,
+                                    OperandBundleDef("funclet", CPI));
   PersCI->setDoesNotThrow();
 
   // Pseudocode: int selector = __wasm.landingpad_context.selector;
-  Instruction *Selector = IRB.CreateLoad(SelectorField, "selector");
+  Instruction *Selector =
+      IRB.CreateLoad(IRB.getInt32Ty(), SelectorField, "selector");
 
   // Replace the return value from wasm.get.ehselector() with the selector value
   // loaded from __wasm_lpad_context.selector.
@@ -388,15 +348,15 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) {
 }
 
 void llvm::calculateWasmEHInfo(const Function *F, WasmEHFuncInfo &EHInfo) {
+  // If an exception is not caught by a catchpad (i.e., it is a foreign
+  // exception), it will unwind to its parent catchswitch's unwind destination.
+  // We don't record an unwind destination for cleanuppads because every
+  // exception should be caught by it.
   for (const auto &BB : *F) {
     if (!BB.isEHPad())
       continue;
     const Instruction *Pad = BB.getFirstNonPHI();
 
-    // If an exception is not caught by a catchpad (i.e., it is a foreign
-    // exception), it will unwind to its parent catchswitch's unwind
-    // destination. We don't record an unwind destination for cleanuppads
-    // because every exception should be caught by it.
     if (const auto *CatchPad = dyn_cast<CatchPadInst>(Pad)) {
       const auto *UnwindBB = CatchPad->getCatchSwitch()->getUnwindDest();
       if (!UnwindBB)
@@ -409,22 +369,4 @@ void llvm::calculateWasmEHInfo(const Function *F, WasmEHFuncInfo &EHInfo) {
         EHInfo.setEHPadUnwindDest(&BB, UnwindBB);
     }
   }
-
-  // Record the unwind destination for invoke and cleanupret instructions.
-  for (const auto &BB : *F) {
-    const Instruction *TI = BB.getTerminator();
-    BasicBlock *UnwindBB = nullptr;
-    if (const auto *Invoke = dyn_cast<InvokeInst>(TI))
-      UnwindBB = Invoke->getUnwindDest();
-    else if (const auto *CleanupRet = dyn_cast<CleanupReturnInst>(TI))
-      UnwindBB = CleanupRet->getUnwindDest();
-    if (!UnwindBB)
-      continue;
-    const Instruction *UnwindPad = UnwindBB->getFirstNonPHI();
-    if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UnwindPad))
-      // Currently there should be only one handler per a catchswitch.
-      EHInfo.setThrowUnwindDest(&BB, *CatchSwitch->handlers().begin());
-    else // cleanuppad
-      EHInfo.setThrowUnwindDest(&BB, UnwindBB);
-  }
 }
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index 6a15240fa6e0..cdf79374e974 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -1,9 +1,8 @@
 //===-- WinEHPrepare - Prepare exception handling for code generation ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1080,7 +1079,8 @@ AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) {
     SpillSlot = new AllocaInst(PN->getType(), DL->getAllocaAddrSpace(), nullptr,
                                Twine(PN->getName(), ".wineh.spillslot"),
                                &F.getEntryBlock().front());
-    Value *V = new LoadInst(SpillSlot, Twine(PN->getName(), ".wineh.reload"),
+    Value *V = new LoadInst(PN->getType(), SpillSlot,
+                            Twine(PN->getName(), ".wineh.reload"),
                             &*PHIBlock->getFirstInsertionPt());
     PN->replaceAllUsesWith(V);
     return SpillSlot;
@@ -1222,14 +1222,16 @@ void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
     Value *&Load = Loads[IncomingBlock];
     // Insert the load into the predecessor block
     if (!Load)
-      Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"),
-                          /*Volatile=*/false, IncomingBlock->getTerminator());
+      Load = new LoadInst(V->getType(), SpillSlot,
+                          Twine(V->getName(), ".wineh.reload"),
+                          /*isVolatile=*/false, IncomingBlock->getTerminator());
 
     U.set(Load);
   } else {
     // Reload right before the old use.
-    auto *Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"),
-                              /*Volatile=*/false, UsingInst);
+    auto *Load = new LoadInst(V->getType(), SpillSlot,
+                              Twine(V->getName(), ".wineh.reload"),
+                              /*isVolatile=*/false, UsingInst);
     U.set(Load);
   }
 }
diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp
index 32a7457c2060..19c59e9542b4 100644
--- a/lib/CodeGen/XRayInstrumentation.cpp
+++ b/lib/CodeGen/XRayInstrumentation.cpp
@@ -1,9 +1,8 @@
 //===- XRayInstrumentation.cpp - Adds XRay instrumentation to functions. --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -111,6 +110,8 @@ void XRayInstrumentation::replaceRetWithPatchableRet(
         for (auto &MO : T.operands())
           MIB.add(MO);
         Terminators.push_back(&T);
+        if (T.isCall())
+          MF.updateCallSiteInfo(&T);
       }
     }
   }
diff --git a/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp b/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp
index 8828671d9be9..86a6f9eebfa2 100644
--- a/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp
+++ b/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp
@@ -1,9 +1,8 @@
 //===- AppendingTypeTableBuilder.cpp --------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -50,13 +49,8 @@ Optional<TypeIndex> AppendingTypeTableBuilder::getNext(TypeIndex Prev) {
   return Prev;
 }
 
-CVType AppendingTypeTableBuilder::getType(TypeIndex Index) {
-  CVType Type;
-  Type.RecordData = SeenRecords[Index.toArrayIndex()];
-  const RecordPrefix *P =
-      reinterpret_cast<const RecordPrefix *>(Type.RecordData.data());
-  Type.Type = static_cast<TypeLeafKind>(uint16_t(P->RecordKind));
-  return Type;
+CVType AppendingTypeTableBuilder::getType(TypeIndex Index){
+  return CVType(SeenRecords[Index.toArrayIndex()]);
 }
 
 StringRef AppendingTypeTableBuilder::getTypeName(TypeIndex Index) {
diff --git a/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp b/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
index cbcaa5692828..48b9b0496ffe 100644
--- a/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
+++ b/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
@@ -1,9 +1,8 @@
 //===- CVSymbolVisitor.cpp --------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -21,7 +20,7 @@ CVSymbolVisitor::CVSymbolVisitor(SymbolVisitorCallbacks &Callbacks)
 template <typename T>
 static Error visitKnownRecord(CVSymbol &Record,
                               SymbolVisitorCallbacks &Callbacks) {
-  SymbolRecordKind RK = static_cast<SymbolRecordKind>(Record.Type);
+  SymbolRecordKind RK = static_cast<SymbolRecordKind>(Record.kind());
   T KnownRecord(RK);
   if (auto EC = Callbacks.visitKnownRecord(Record, KnownRecord))
     return EC;
@@ -30,7 +29,7 @@ static Error visitKnownRecord(CVSymbol &Record,
 
 static Error finishVisitation(CVSymbol &Record,
                               SymbolVisitorCallbacks &Callbacks) {
-  switch (Record.Type) {
+  switch (Record.kind()) {
   default:
     if (auto EC = Callbacks.visitUnknownSymbol(Record))
       return EC;
diff --git a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
index a4182a3b2fa1..ec4773d571c8 100644
--- a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
+++ b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
@@ -1,9 +1,8 @@
 //===- CVTypeVisitor.cpp ----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -23,7 +22,7 @@ using namespace llvm::codeview;
 
 template <typename T>
 static Error visitKnownRecord(CVType &Record, TypeVisitorCallbacks &Callbacks) {
-  TypeRecordKind RK = static_cast<TypeRecordKind>(Record.Type);
+  TypeRecordKind RK = static_cast<TypeRecordKind>(Record.kind());
   T KnownRecord(RK);
   if (auto EC = Callbacks.visitKnownRecord(Record, KnownRecord))
     return EC;
@@ -97,7 +96,7 @@ CVTypeVisitor::CVTypeVisitor(TypeVisitorCallbacks &Callbacks)
     : Callbacks(Callbacks) {}
 
 Error CVTypeVisitor::finishVisitation(CVType &Record) {
-  switch (Record.Type) {
+  switch (Record.kind()) {
   default:
     if (auto EC = Callbacks.visitUnknownType(Record))
       return EC;
@@ -210,6 +209,14 @@ struct VisitHelper {
     }
   }
 
+  VisitHelper(TypeVisitorCallbackPipeline &Callbacks, VisitorDataSource Source)
+      : Visitor((Source == VDS_BytesPresent) ? Pipeline : Callbacks) {
+    if (Source == VDS_BytesPresent) {
+      Pipeline = Callbacks;
+      Pipeline.addCallbackToPipelineFront(Deserializer);
+    }
+  }
+
   TypeDeserializer Deserializer;
   TypeVisitorCallbackPipeline Pipeline;
   CVTypeVisitor Visitor;
@@ -223,6 +230,13 @@ Error llvm::codeview::visitTypeRecord(CVType &Record, TypeIndex Index,
   return V.Visitor.visitTypeRecord(Record, Index);
 }
 
+Error llvm::codeview::visitTypeRecord(CVType &Record, TypeIndex Index,
+                                      TypeVisitorCallbackPipeline &Callbacks,
+                                      VisitorDataSource Source) {
+  VisitHelper V(Callbacks, Source);
+  return V.Visitor.visitTypeRecord(Record, Index);
+}
+
 Error llvm::codeview::visitTypeRecord(CVType &Record,
                                       TypeVisitorCallbacks &Callbacks,
                                       VisitorDataSource Source) {
diff --git a/lib/DebugInfo/CodeView/CodeViewError.cpp b/lib/DebugInfo/CodeView/CodeViewError.cpp
index 2a9753add311..69390c708f59 100644
--- a/lib/DebugInfo/CodeView/CodeViewError.cpp
+++ b/lib/DebugInfo/CodeView/CodeViewError.cpp
@@ -1,9 +1,8 @@
 //===- CodeViewError.cpp - Error extensions for CodeView --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -14,6 +13,7 @@
 using namespace llvm;
 using namespace llvm::codeview;
 
+namespace {
 // FIXME: This class is only here to support the transition to llvm::Error. It
 // will be removed once this transition is complete. Clients should prefer to
 // deal with the Error value directly, rather than converting to error_code.
@@ -39,6 +39,7 @@ public:
     llvm_unreachable("Unrecognized cv_error_code");
   }
 };
+} // namespace
 
 static llvm::ManagedStatic<CodeViewErrorCategory> CodeViewErrCategory;
 const std::error_category &llvm::codeview::CVErrorCategory() {
diff --git a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
index 4fc14480578e..2f49474115a1 100644
--- a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
+++ b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
@@ -1,9 +1,8 @@
 //===- CodeViewRecordIO.cpp -------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -21,6 +20,7 @@ Error CodeViewRecordIO::beginRecord(Optional<uint32_t> MaxLength) {
   Limit.MaxLength = MaxLength;
   Limit.BeginOffset = getCurrentOffset();
   Limits.push_back(Limit);
+  resetStreamedLen();
   return Error::success();
 }
 
@@ -35,10 +35,29 @@ Error CodeViewRecordIO::endRecord() {
   // we don't know how big the record is until we're finished writing it, so
   // even though we don't commit the extraneous data, we still can't guarantee
   // we're at the end of the allocated data.
+
+  if (isStreaming()) {
+    // For streaming mode, add padding to align with 4 byte boundaries for each
+    // record
+    uint32_t Align = getStreamedLen() % 4;
+    if (Align == 0)
+      return Error::success();
+
+    int PaddingBytes = 4 - Align;
+    while (PaddingBytes > 0) {
+      char Pad = static_cast<uint8_t>(LF_PAD0 + PaddingBytes);
+      StringRef BytesSR = StringRef(&Pad, sizeof(Pad));
+      Streamer->EmitBytes(BytesSR);
+      --PaddingBytes;
+    }
+  }
   return Error::success();
 }
 
 uint32_t CodeViewRecordIO::maxFieldLength() const {
+  if (isStreaming())
+    return 0;
+
   assert(!Limits.empty() && "Not in a record!");
 
   // The max length of the next field is the minimum of all lengths that would
@@ -78,8 +97,13 @@ Error CodeViewRecordIO::skipPadding() {
   return Reader->skip(BytesToAdvance);
 }
 
-Error CodeViewRecordIO::mapByteVectorTail(ArrayRef<uint8_t> &Bytes) {
-  if (isWriting()) {
+Error CodeViewRecordIO::mapByteVectorTail(ArrayRef<uint8_t> &Bytes,
+                                          const Twine &Comment) {
+  if (isStreaming()) {
+    emitComment(Comment);
+    Streamer->EmitBinaryData(toStringRef(Bytes));
+    incrStreamedLen(Bytes.size());
+  } else if (isWriting()) {
     if (auto EC = Writer->writeBytes(Bytes))
       return EC;
   } else {
@@ -89,9 +113,10 @@ Error CodeViewRecordIO::mapByteVectorTail(ArrayRef<uint8_t> &Bytes) {
   return Error::success();
 }
 
-Error CodeViewRecordIO::mapByteVectorTail(std::vector<uint8_t> &Bytes) {
+Error CodeViewRecordIO::mapByteVectorTail(std::vector<uint8_t> &Bytes,
+                                          const Twine &Comment) {
   ArrayRef<uint8_t> BytesRef(Bytes);
-  if (auto EC = mapByteVectorTail(BytesRef))
+  if (auto EC = mapByteVectorTail(BytesRef, Comment))
     return EC;
   if (!isWriting())
     Bytes.assign(BytesRef.begin(), BytesRef.end());
@@ -99,22 +124,31 @@ Error CodeViewRecordIO::mapByteVectorTail(std::vector<uint8_t> &Bytes) {
   return Error::success();
 }
 
-Error CodeViewRecordIO::mapInteger(TypeIndex &TypeInd) {
-  if (isWriting()) {
+Error CodeViewRecordIO::mapInteger(TypeIndex &TypeInd, const Twine &Comment) {
+  if (isStreaming()) {
+    emitComment(Comment);
+    Streamer->EmitIntValue(TypeInd.getIndex(), sizeof(TypeInd.getIndex()));
+    incrStreamedLen(sizeof(TypeInd.getIndex()));
+  } else if (isWriting()) {
     if (auto EC = Writer->writeInteger(TypeInd.getIndex()))
       return EC;
-    return Error::success();
+  } else {
+    uint32_t I;
+    if (auto EC = Reader->readInteger(I))
+      return EC;
+    TypeInd.setIndex(I);
   }
-
-  uint32_t I;
-  if (auto EC = Reader->readInteger(I))
-    return EC;
-  TypeInd.setIndex(I);
   return Error::success();
 }
 
-Error CodeViewRecordIO::mapEncodedInteger(int64_t &Value) {
-  if (isWriting()) {
+Error CodeViewRecordIO::mapEncodedInteger(int64_t &Value,
+                                          const Twine &Comment) {
+  if (isStreaming()) {
+    if (Value >= 0)
+      emitEncodedUnsignedInteger(static_cast<uint64_t>(Value), Comment);
+    else
+      emitEncodedSignedInteger(Value, Comment);
+  } else if (isWriting()) {
     if (Value >= 0) {
       if (auto EC = writeEncodedUnsignedInteger(static_cast<uint64_t>(Value)))
         return EC;
@@ -132,8 +166,11 @@ Error CodeViewRecordIO::mapEncodedInteger(int64_t &Value) {
   return Error::success();
 }
 
-Error CodeViewRecordIO::mapEncodedInteger(uint64_t &Value) {
-  if (isWriting()) {
+Error CodeViewRecordIO::mapEncodedInteger(uint64_t &Value,
+                                          const Twine &Comment) {
+  if (isStreaming())
+    emitEncodedUnsignedInteger(Value, Comment);
+  else if (isWriting()) {
     if (auto EC = writeEncodedUnsignedInteger(Value))
       return EC;
   } else {
@@ -145,18 +182,28 @@ Error CodeViewRecordIO::mapEncodedInteger(uint64_t &Value) {
   return Error::success();
 }
 
-Error CodeViewRecordIO::mapEncodedInteger(APSInt &Value) {
-  if (isWriting()) {
+Error CodeViewRecordIO::mapEncodedInteger(APSInt &Value, const Twine &Comment) {
+  if (isStreaming()) {
+    if (Value.isSigned())
+      emitEncodedSignedInteger(Value.getSExtValue(), Comment);
+    else
+      emitEncodedUnsignedInteger(Value.getZExtValue(), Comment);
+  } else if (isWriting()) {
     if (Value.isSigned())
       return writeEncodedSignedInteger(Value.getSExtValue());
     return writeEncodedUnsignedInteger(Value.getZExtValue());
-  }
-
-  return consume(*Reader, Value);
+  } else
+    return consume(*Reader, Value);
+  return Error::success();
 }
 
-Error CodeViewRecordIO::mapStringZ(StringRef &Value) {
-  if (isWriting()) {
+Error CodeViewRecordIO::mapStringZ(StringRef &Value, const Twine &Comment) {
+  if (isStreaming()) {
+    auto NullTerminatedString = StringRef(Value.data(), Value.size() + 1);
+    emitComment(Comment);
+    Streamer->EmitBytes(NullTerminatedString);
+    incrStreamedLen(NullTerminatedString.size());
+  } else if (isWriting()) {
     // Truncate if we attempt to write too much.
     StringRef S = Value.take_front(maxFieldLength() - 1);
     if (auto EC = Writer->writeCString(S))
@@ -168,8 +215,18 @@ Error CodeViewRecordIO::mapStringZ(StringRef &Value) {
   return Error::success();
 }
 
-Error CodeViewRecordIO::mapGuid(GUID &Guid) {
+Error CodeViewRecordIO::mapGuid(GUID &Guid, const Twine &Comment) {
   constexpr uint32_t GuidSize = 16;
+
+  if (isStreaming()) {
+    StringRef GuidSR =
+        StringRef((reinterpret_cast<const char *>(&Guid)), GuidSize);
+    emitComment(Comment);
+    Streamer->EmitBytes(GuidSR);
+    incrStreamedLen(GuidSize);
+    return Error::success();
+  }
+
   if (maxFieldLength() < GuidSize)
     return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
 
@@ -185,13 +242,17 @@ Error CodeViewRecordIO::mapGuid(GUID &Guid) {
   return Error::success();
 }
 
-Error CodeViewRecordIO::mapStringZVectorZ(std::vector<StringRef> &Value) {
-  if (isWriting()) {
+Error CodeViewRecordIO::mapStringZVectorZ(std::vector<StringRef> &Value,
+                                          const Twine &Comment) {
+
+  if (!isReading()) {
+    emitComment(Comment);
     for (auto V : Value) {
       if (auto EC = mapStringZ(V))
         return EC;
     }
-    if (auto EC = Writer->writeInteger<uint8_t>(0))
+    uint8_t FinalZero = 0;
+    if (auto EC = mapInteger(FinalZero))
       return EC;
   } else {
     StringRef S;
@@ -206,6 +267,56 @@ Error CodeViewRecordIO::mapStringZVectorZ(std::vector<StringRef> &Value) {
   return Error::success();
 }
 
+void CodeViewRecordIO::emitEncodedSignedInteger(const int64_t &Value,
+                                                const Twine &Comment) {
+  assert(Value < 0 && "Encoded integer is not signed!");
+  if (Value >= std::numeric_limits<int8_t>::min()) {
+    Streamer->EmitIntValue(LF_CHAR, 2);
+    emitComment(Comment);
+    Streamer->EmitIntValue(Value, 1);
+    incrStreamedLen(3);
+  } else if (Value >= std::numeric_limits<int16_t>::min()) {
+    Streamer->EmitIntValue(LF_SHORT, 2);
+    emitComment(Comment);
+    Streamer->EmitIntValue(Value, 2);
+    incrStreamedLen(4);
+  } else if (Value >= std::numeric_limits<int32_t>::min()) {
+    Streamer->EmitIntValue(LF_LONG, 2);
+    emitComment(Comment);
+    Streamer->EmitIntValue(Value, 4);
+    incrStreamedLen(6);
+  } else {
+    Streamer->EmitIntValue(LF_QUADWORD, 2);
+    emitComment(Comment);
+    Streamer->EmitIntValue(Value, 4);
+    incrStreamedLen(6);
+  }
+}
+
+void CodeViewRecordIO::emitEncodedUnsignedInteger(const uint64_t &Value,
+                                                  const Twine &Comment) {
+  if (Value < LF_NUMERIC) {
+    emitComment(Comment);
+    Streamer->EmitIntValue(Value, 2);
+    incrStreamedLen(2);
+  } else if (Value <= std::numeric_limits<uint16_t>::max()) {
+    Streamer->EmitIntValue(LF_USHORT, 2);
+    emitComment(Comment);
+    Streamer->EmitIntValue(Value, 2);
+    incrStreamedLen(4);
+  } else if (Value <= std::numeric_limits<uint32_t>::max()) {
+    Streamer->EmitIntValue(LF_ULONG, 2);
+    emitComment(Comment);
+    Streamer->EmitIntValue(Value, 4);
+    incrStreamedLen(6);
+  } else {
+    Streamer->EmitIntValue(LF_UQUADWORD, 2);
+    emitComment(Comment);
+    Streamer->EmitIntValue(Value, 8);
+    incrStreamedLen(6);
+  }
+}
+
 Error CodeViewRecordIO::writeEncodedSignedInteger(const int64_t &Value) {
   assert(Value < 0 && "Encoded integer is not signed!");
   if (Value >= std::numeric_limits<int8_t>::min()) {
diff --git a/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp b/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp
index f180fc6990fc..799cffb7116e 100644
--- a/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp
+++ b/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp
@@ -66,14 +66,11 @@ void ContinuationRecordBuilder::begin(ContinuationRecordKind RecordKind) {
   InjectedSegmentBytes =
       ArrayRef<uint8_t>(FLIB, FLIB + sizeof(SegmentInjection));
 
-  CVType Type;
-  Type.Type = getTypeLeafKind(RecordKind);
+  // Seed the first record with an appropriate record prefix.
+  RecordPrefix Prefix(getTypeLeafKind(RecordKind));
+  CVType Type(&Prefix, sizeof(Prefix));
   cantFail(Mapping.visitTypeBegin(Type));
 
-  // Seed the first trecord with an appropriate record prefix.
-  RecordPrefix Prefix;
-  Prefix.RecordLen = 0;
-  Prefix.RecordKind = Type.Type;
   cantFail(SegmentWriter.writeObject(Prefix));
 }
 
@@ -156,14 +153,9 @@ CVType ContinuationRecordBuilder::createSegmentRecord(
   MutableArrayRef<uint8_t> Data = Buffer.data();
   Data = Data.slice(OffBegin, OffEnd - OffBegin);
 
-  CVType Type;
-  Type.Type = getTypeLeafKind(*Kind);
-  Type.RecordData = Data;
-
   // Write the length to the RecordPrefix, making sure it does not include
   // sizeof(RecordPrefix.Length)
   RecordPrefix *Prefix = reinterpret_cast<RecordPrefix *>(Data.data());
-  assert(Prefix->RecordKind == Type.Type);
   Prefix->RecordLen = Data.size() - sizeof(RecordPrefix::RecordLen);
 
   if (RefersTo.hasValue()) {
@@ -175,12 +167,12 @@ CVType ContinuationRecordBuilder::createSegmentRecord(
     CR->IndexRef = RefersTo->getIndex();
   }
 
-  return Type;
+  return CVType(Data);
 }
 
 std::vector<CVType> ContinuationRecordBuilder::end(TypeIndex Index) {
-  CVType Type;
-  Type.Type = getTypeLeafKind(*Kind);
+  RecordPrefix Prefix(getTypeLeafKind(*Kind));
+  CVType Type(&Prefix, sizeof(Prefix));
   cantFail(Mapping.visitTypeEnd(Type));
 
   // We're now done, and we have a series of segments each beginning at an
diff --git a/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp b/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp
index 0f155a95d607..3d28bac00c44 100644
--- a/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp
@@ -1,9 +1,8 @@
 //===- DebugChecksumsSubsection.cpp ---------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp b/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp
index cef27787cfd1..b23410409f88 100644
--- a/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp
@@ -1,9 +1,8 @@
 //===- DebugCrossExSubsection.cpp -----------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp b/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
index 4001741f560a..dbadafd3aaf3 100644
--- a/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
@@ -1,9 +1,8 @@
 //===- DebugCrossImpSubsection.cpp ----------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp b/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp
index 5881bf177a55..be8c32d5b294 100644
--- a/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp
@@ -1,9 +1,8 @@
 //===- DebugFrameDataSubsection.cpp -----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp b/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
index 077c103a615b..48ec7e4ecdd6 100644
--- a/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
@@ -1,9 +1,8 @@
 //===- DebugInlineeLinesSubsection.cpp ------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp b/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp
index 57ad40819fbc..ea16c0a6c671 100644
--- a/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp
@@ -1,9 +1,8 @@
 //===- DebugLinesSubsection.cpp -------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
index 9b251f5931b3..63342749918d 100644
--- a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
@@ -1,9 +1,8 @@
 //===- DebugStringTableSubsection.cpp - CodeView String Table -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/DebugSubsection.cpp b/lib/DebugInfo/CodeView/DebugSubsection.cpp
index 67b428bfa713..3f93463fe6d6 100644
--- a/lib/DebugInfo/CodeView/DebugSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugSubsection.cpp
@@ -1,9 +1,8 @@
 //===- DebugSubsection.cpp -----------------------------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
index 55f343c11e7f..0f704f286ee9 100644
--- a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
+++ b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
@@ -1,9 +1,8 @@
 //===- DebugSubsectionRecord.cpp ------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp b/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp
index 9b824333369b..7968b6a2d757 100644
--- a/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp
+++ b/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp
@@ -1,9 +1,8 @@
 //===- DebugSubsectionVisitor.cpp -------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp b/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp
index 60fbf9d747b2..52328967357b 100644
--- a/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp
@@ -1,9 +1,8 @@
 //===- DebugSymbolRVASubsection.cpp ---------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp b/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp
index dc8ba8c929ae..c833103663e4 100644
--- a/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp
@@ -1,9 +1,8 @@
 //===- DebugSymbolsSubsection.cpp -------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -31,4 +30,4 @@ Error DebugSymbolsSubsection::commit(BinaryStreamWriter &Writer) const {
 void DebugSymbolsSubsection::addSymbol(CVSymbol Symbol) {
   Records.push_back(Symbol);
   Length += Symbol.length();
-}
\ No newline at end of file
+}
diff --git a/lib/DebugInfo/CodeView/EnumTables.cpp b/lib/DebugInfo/CodeView/EnumTables.cpp
index ef4e42f79ebc..54e68ae4ea9f 100644
--- a/lib/DebugInfo/CodeView/EnumTables.cpp
+++ b/lib/DebugInfo/CodeView/EnumTables.cpp
@@ -1,9 +1,8 @@
 //===- EnumTables.cpp - Enum to string conversion tables ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -32,10 +31,20 @@ static const EnumEntry<TypeLeafKind> TypeLeafNames[] = {
 #undef CV_TYPE
 };
 
-static const EnumEntry<uint16_t> RegisterNames[] = {
+static const EnumEntry<uint16_t> RegisterNames_X86[] = {
+#define CV_REGISTERS_X86
+#define CV_REGISTER(name, val) CV_ENUM_CLASS_ENT(RegisterId, name),
+#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
+#undef CV_REGISTER
+#undef CV_REGISTERS_X86
+};
+
+static const EnumEntry<uint16_t> RegisterNames_ARM64[] = {
+#define CV_REGISTERS_ARM64
 #define CV_REGISTER(name, val) CV_ENUM_CLASS_ENT(RegisterId, name),
 #include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
 #undef CV_REGISTER
+#undef CV_REGISTERS_ARM64
 };
 
 static const EnumEntry<uint32_t> PublicSymFlagNames[] = {
@@ -87,6 +96,7 @@ static const EnumEntry<codeview::SourceLanguage> SourceLanguages[] = {
     CV_ENUM_ENT(SourceLanguage, ILAsm),   CV_ENUM_ENT(SourceLanguage, Java),
     CV_ENUM_ENT(SourceLanguage, JScript), CV_ENUM_ENT(SourceLanguage, MSIL),
     CV_ENUM_ENT(SourceLanguage, HLSL),    CV_ENUM_ENT(SourceLanguage, D),
+    CV_ENUM_ENT(SourceLanguage, Swift),
 };
 
 static const EnumEntry<uint32_t> CompileSym2FlagNames[] = {
@@ -171,6 +181,7 @@ static const EnumEntry<unsigned> CPUTypeNames[] = {
     CV_ENUM_CLASS_ENT(CPUType, ARM_XMAC),
     CV_ENUM_CLASS_ENT(CPUType, ARM_WMMX),
     CV_ENUM_CLASS_ENT(CPUType, ARM7),
+    CV_ENUM_CLASS_ENT(CPUType, ARM64),
     CV_ENUM_CLASS_ENT(CPUType, Omni),
     CV_ENUM_CLASS_ENT(CPUType, Ia64),
     CV_ENUM_CLASS_ENT(CPUType, Ia64_2),
@@ -300,8 +311,11 @@ ArrayRef<EnumEntry<TypeLeafKind>> getTypeLeafNames() {
   return makeArrayRef(TypeLeafNames);
 }
 
-ArrayRef<EnumEntry<uint16_t>> getRegisterNames() {
-  return makeArrayRef(RegisterNames);
+ArrayRef<EnumEntry<uint16_t>> getRegisterNames(CPUType Cpu) {
+  if (Cpu == CPUType::ARM64) {
+    return makeArrayRef(RegisterNames_ARM64);
+  }
+  return makeArrayRef(RegisterNames_X86);
 }
 
 ArrayRef<EnumEntry<uint32_t>> getPublicSymFlagNames() {
diff --git a/lib/DebugInfo/CodeView/Formatters.cpp b/lib/DebugInfo/CodeView/Formatters.cpp
index b8d89c76da3b..a7a8c7ff82bf 100644
--- a/lib/DebugInfo/CodeView/Formatters.cpp
+++ b/lib/DebugInfo/CodeView/Formatters.cpp
@@ -1,9 +1,8 @@
 //===- Formatters.cpp -----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp b/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp
index e76f9e12f0af..a7ad1d045f04 100644
--- a/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp
+++ b/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp
@@ -1,9 +1,8 @@
 //===- GlobalTypeTableBuilder.cpp -----------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -53,14 +52,7 @@ Optional<TypeIndex> GlobalTypeTableBuilder::getNext(TypeIndex Prev) {
 }
 
 CVType GlobalTypeTableBuilder::getType(TypeIndex Index) {
-  CVType Type;
-  Type.RecordData = SeenRecords[Index.toArrayIndex()];
-  if (!Type.RecordData.empty()) {
-    assert(Type.RecordData.size() >= sizeof(RecordPrefix));
-    const RecordPrefix *P =
-        reinterpret_cast<const RecordPrefix *>(Type.RecordData.data());
-    Type.Type = static_cast<TypeLeafKind>(uint16_t(P->RecordKind));
-  }
+  CVType Type(SeenRecords[Index.toArrayIndex()]);
   return Type;
 }
 
diff --git a/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
index ddcad8c631d7..dc1253b7a39f 100644
--- a/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
+++ b/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
@@ -1,9 +1,8 @@
 //===- LazyRandomTypeCollection.cpp ---------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/Line.cpp b/lib/DebugInfo/CodeView/Line.cpp
index 4cb766b5fd26..53adc8cac511 100644
--- a/lib/DebugInfo/CodeView/Line.cpp
+++ b/lib/DebugInfo/CodeView/Line.cpp
@@ -1,9 +1,8 @@
 //===-- Line.cpp ----------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp b/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp
index 8aee4aa2e2ae..4d7cd468f3ee 100644
--- a/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp
+++ b/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp
@@ -1,9 +1,8 @@
 //===- MergingTypeTableBuilder.cpp ----------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -53,11 +52,7 @@ Optional<TypeIndex> MergingTypeTableBuilder::getNext(TypeIndex Prev) {
 }
 
 CVType MergingTypeTableBuilder::getType(TypeIndex Index) {
-  CVType Type;
-  Type.RecordData = SeenRecords[Index.toArrayIndex()];
-  const RecordPrefix *P =
-      reinterpret_cast<const RecordPrefix *>(Type.RecordData.data());
-  Type.Type = static_cast<TypeLeafKind>(uint16_t(P->RecordKind));
+  CVType Type(SeenRecords[Index.toArrayIndex()]);
   return Type;
 }
 
diff --git a/lib/DebugInfo/CodeView/RecordName.cpp b/lib/DebugInfo/CodeView/RecordName.cpp
index d868ae237a44..cfaad1581159 100644
--- a/lib/DebugInfo/CodeView/RecordName.cpp
+++ b/lib/DebugInfo/CodeView/RecordName.cpp
@@ -1,9 +1,8 @@
 //===- RecordName.cpp ----------------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/RecordSerialization.cpp b/lib/DebugInfo/CodeView/RecordSerialization.cpp
index bff9a619a846..e7f032f9c670 100644
--- a/lib/DebugInfo/CodeView/RecordSerialization.cpp
+++ b/lib/DebugInfo/CodeView/RecordSerialization.cpp
@@ -1,9 +1,8 @@
 //===-- RecordSerialization.cpp -------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp b/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp
index d28b7c3c2d83..654c40a7470d 100644
--- a/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp
+++ b/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp
@@ -3,13 +3,6 @@
 using namespace llvm;
 using namespace llvm::codeview;
 
-static void writeRecordPrefix(BinaryStreamWriter &Writer, TypeLeafKind Kind) {
-  RecordPrefix Prefix;
-  Prefix.RecordKind = Kind;
-  Prefix.RecordLen = 0;
-  cantFail(Writer.writeObject(Prefix));
-}
-
 static void addPadding(BinaryStreamWriter &Writer) {
   uint32_t Align = Writer.getOffset() % 4;
   if (Align == 0)
@@ -32,10 +25,12 @@ ArrayRef<uint8_t> SimpleTypeSerializer::serialize(T &Record) {
   BinaryStreamWriter Writer(ScratchBuffer, support::little);
   TypeRecordMapping Mapping(Writer);
 
-  CVType CVT;
-  CVT.Type = static_cast<TypeLeafKind>(Record.getKind());
+  // Write the record prefix first with a dummy length but real kind.
+  RecordPrefix DummyPrefix(uint16_t(Record.getKind()));
+  cantFail(Writer.writeObject(DummyPrefix));
 
-  writeRecordPrefix(Writer, CVT.Type);
+  RecordPrefix *Prefix = reinterpret_cast<RecordPrefix *>(ScratchBuffer.data());
+  CVType CVT(Prefix, sizeof(RecordPrefix));
 
   cantFail(Mapping.visitTypeBegin(CVT));
   cantFail(Mapping.visitKnownRecord(CVT, Record));
@@ -43,8 +38,7 @@ ArrayRef<uint8_t> SimpleTypeSerializer::serialize(T &Record) {
 
   addPadding(Writer);
 
-  RecordPrefix *Prefix = reinterpret_cast<RecordPrefix *>(ScratchBuffer.data());
-
+  // Update the size and kind after serialization.
   Prefix->RecordKind = CVT.kind();
   Prefix->RecordLen = Writer.getOffset() - sizeof(uint16_t);
 
diff --git a/lib/DebugInfo/CodeView/StringsAndChecksums.cpp b/lib/DebugInfo/CodeView/StringsAndChecksums.cpp
index 85d9dbb8c7df..9e204eec8604 100644
--- a/lib/DebugInfo/CodeView/StringsAndChecksums.cpp
+++ b/lib/DebugInfo/CodeView/StringsAndChecksums.cpp
@@ -1,9 +1,8 @@
 //===- StringsAndChecksums.cpp --------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp
index 04e0bab745d3..27cb7e35234b 100644
--- a/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -1,9 +1,8 @@
 //===-- SymbolDumper.cpp - CodeView symbol info dumper ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -102,10 +101,10 @@ void CVSymbolDumperImpl::printTypeIndex(StringRef FieldName, TypeIndex TI) {
 }
 
 Error CVSymbolDumperImpl::visitSymbolBegin(CVSymbol &CVR) {
-  W.startLine() << getSymbolKindName(CVR.Type);
+  W.startLine() << getSymbolKindName(CVR.kind());
   W.getOStream() << " {\n";
   W.indent();
-  W.printEnum("Kind", unsigned(CVR.Type), getSymbolTypeNames());
+  W.printEnum("Kind", unsigned(CVR.kind()), getSymbolTypeNames());
   return Error::success();
 }
 
@@ -326,7 +325,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(
 Error CVSymbolDumperImpl::visitKnownRecord(
     CVSymbol &CVR, DefRangeRegisterRelSym &DefRangeRegisterRel) {
   W.printEnum("BaseRegister", uint16_t(DefRangeRegisterRel.Hdr.Register),
-              getRegisterNames());
+              getRegisterNames(CompilationCPUType));
   W.printBoolean("HasSpilledUDTMember",
                  DefRangeRegisterRel.hasSpilledUDTMember());
   W.printNumber("OffsetInParent", DefRangeRegisterRel.offsetInParent());
@@ -340,7 +339,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(
 Error CVSymbolDumperImpl::visitKnownRecord(
     CVSymbol &CVR, DefRangeRegisterSym &DefRangeRegister) {
   W.printEnum("Register", uint16_t(DefRangeRegister.Hdr.Register),
-              getRegisterNames());
+              getRegisterNames(CompilationCPUType));
   W.printNumber("MayHaveNoName", DefRangeRegister.Hdr.MayHaveNoName);
   printLocalVariableAddrRange(DefRangeRegister.Range,
                               DefRangeRegister.getRelocationOffset());
@@ -351,7 +350,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(
 Error CVSymbolDumperImpl::visitKnownRecord(
     CVSymbol &CVR, DefRangeSubfieldRegisterSym &DefRangeSubfieldRegister) {
   W.printEnum("Register", uint16_t(DefRangeSubfieldRegister.Hdr.Register),
-              getRegisterNames());
+              getRegisterNames(CompilationCPUType));
   W.printNumber("MayHaveNoName", DefRangeSubfieldRegister.Hdr.MayHaveNoName);
   W.printNumber("OffsetInParent", DefRangeSubfieldRegister.Hdr.OffsetInParent);
   printLocalVariableAddrRange(DefRangeSubfieldRegister.Range,
@@ -404,7 +403,8 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
                                      FrameCookie.getRelocationOffset(),
                                      FrameCookie.CodeOffset, &LinkageName);
   }
-  W.printEnum("Register", uint16_t(FrameCookie.Register), getRegisterNames());
+  W.printEnum("Register", uint16_t(FrameCookie.Register),
+              getRegisterNames(CompilationCPUType));
   W.printEnum("CookieKind", uint16_t(FrameCookie.CookieKind),
               getFrameCookieKindNames());
   W.printHex("Flags", FrameCookie.Flags);
@@ -425,10 +425,10 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
                getFrameProcSymFlagNames());
   W.printEnum("LocalFramePtrReg",
               uint16_t(FrameProc.getLocalFramePtrReg(CompilationCPUType)),
-              getRegisterNames());
+              getRegisterNames(CompilationCPUType));
   W.printEnum("ParamFramePtrReg",
               uint16_t(FrameProc.getParamFramePtrReg(CompilationCPUType)),
-              getRegisterNames());
+              getRegisterNames(CompilationCPUType));
   return Error::success();
 }
 
@@ -506,7 +506,8 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
 Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
                                            RegisterSym &Register) {
   printTypeIndex("Type", Register.Index);
-  W.printEnum("Seg", uint16_t(Register.Register), getRegisterNames());
+  W.printEnum("Seg", uint16_t(Register.Register),
+              getRegisterNames(CompilationCPUType));
   W.printString("Name", Register.Name);
   return Error::success();
 }
@@ -600,7 +601,8 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
                                            RegRelativeSym &RegRel) {
   W.printHex("Offset", RegRel.Offset);
   printTypeIndex("Type", RegRel.Type);
-  W.printEnum("Register", uint16_t(RegRel.Register), getRegisterNames());
+  W.printEnum("Register", uint16_t(RegRel.Register),
+              getRegisterNames(CompilationCPUType));
   W.printString("VarName", RegRel.Name);
   return Error::success();
 }
@@ -631,6 +633,18 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
   return Error::success();
 }
 
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
+                                           AnnotationSym &Annot) {
+  W.printHex("Offset", Annot.CodeOffset);
+  W.printHex("Segment", Annot.Segment);
+
+  ListScope S(W, "Strings");
+  for (StringRef Str : Annot.Strings)
+    W.printString(Str);
+
+  return Error::success();
+}
+
 Error CVSymbolDumperImpl::visitUnknownSymbol(CVSymbol &CVR) {
   W.printNumber("Length", CVR.length());
   return Error::success();
diff --git a/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp b/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp
index 01746138ad1f..51a5a9e9243e 100644
--- a/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp
+++ b/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp
@@ -1,9 +1,8 @@
 //===- SymbolRecordHelpers.cpp ----------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
index 2af8205cebc3..70889839ef48 100644
--- a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
+++ b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
@@ -1,9 +1,8 @@
 //===- SymbolRecordMapping.cpp -----------------------------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -472,6 +471,18 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
   return Error::success();
 }
 
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            AnnotationSym &Annot) {
+
+  error(IO.mapInteger(Annot.CodeOffset));
+  error(IO.mapInteger(Annot.Segment));
+  error(IO.mapVectorN<uint16_t>(
+      Annot.Strings,
+      [](CodeViewRecordIO &IO, StringRef &S) { return IO.mapStringZ(S); }));
+
+  return Error::success();
+}
+
 RegisterId codeview::decodeFramePtrReg(EncodedFramePtrReg EncodedReg,
                                        CPUType CPU) {
   assert(unsigned(EncodedReg) < 4);
diff --git a/lib/DebugInfo/CodeView/SymbolSerializer.cpp b/lib/DebugInfo/CodeView/SymbolSerializer.cpp
index 0071ecc85685..de9bb42b1798 100644
--- a/lib/DebugInfo/CodeView/SymbolSerializer.cpp
+++ b/lib/DebugInfo/CodeView/SymbolSerializer.cpp
@@ -1,9 +1,8 @@
 //===- SymbolSerializer.cpp -----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
index f5d3bea43a14..d5fea5ee5e29 100644
--- a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
+++ b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
@@ -1,9 +1,8 @@
 //===-- TypeDumpVisitor.cpp - CodeView type info dumper ----------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -172,11 +171,11 @@ Error TypeDumpVisitor::visitTypeBegin(CVType &Record) {
 }
 
 Error TypeDumpVisitor::visitTypeBegin(CVType &Record, TypeIndex Index) {
-  W->startLine() << getLeafTypeName(Record.Type);
+  W->startLine() << getLeafTypeName(Record.kind());
   W->getOStream() << " (" << HexNumber(Index.getIndex()) << ")";
   W->getOStream() << " {\n";
   W->indent();
-  W->printEnum("TypeLeafKind", unsigned(Record.Type),
+  W->printEnum("TypeLeafKind", unsigned(Record.kind()),
                makeArrayRef(LeafTypeNames));
   return Error::success();
 }
diff --git a/lib/DebugInfo/CodeView/TypeHashing.cpp b/lib/DebugInfo/CodeView/TypeHashing.cpp
index 826faef35875..2dbc11a84f0b 100644
--- a/lib/DebugInfo/CodeView/TypeHashing.cpp
+++ b/lib/DebugInfo/CodeView/TypeHashing.cpp
@@ -1,9 +1,8 @@
 //===- TypeHashing.cpp -------------------------------------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -55,10 +54,16 @@ GloballyHashedType::hashType(ArrayRef<uint8_t> RecordData,
         reinterpret_cast<const TypeIndex *>(RefData.data()), Ref.Count);
     for (TypeIndex TI : Indices) {
       ArrayRef<uint8_t> BytesToHash;
-      if (TI.isSimple() || TI.isNoneType() || TI.toArrayIndex() >= Prev.size()) {
+      if (TI.isSimple() || TI.isNoneType()) {
         const uint8_t *IndexBytes = reinterpret_cast<const uint8_t *>(&TI);
         BytesToHash = makeArrayRef(IndexBytes, sizeof(TypeIndex));
       } else {
+        if (TI.toArrayIndex() >= Prev.size() ||
+            Prev[TI.toArrayIndex()].empty()) {
+          // There are references to yet-unhashed records. Suspend hashing for
+          // this record until all the other records are processed.
+          return {};
+        }
         BytesToHash = Prev[TI.toArrayIndex()].Hash;
       }
       S.update(BytesToHash);
diff --git a/lib/DebugInfo/CodeView/TypeIndex.cpp b/lib/DebugInfo/CodeView/TypeIndex.cpp
index 332d67470da5..604d342448d3 100644
--- a/lib/DebugInfo/CodeView/TypeIndex.cpp
+++ b/lib/DebugInfo/CodeView/TypeIndex.cpp
@@ -1,9 +1,8 @@
 //===-- TypeIndex.cpp - CodeView type index ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
index 839ab6f0a705..e84e1c9cea78 100644
--- a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
+++ b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
@@ -1,9 +1,8 @@
 //===- TypeIndexDiscovery.cpp -----------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
@@ -364,14 +363,16 @@ static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind,
   // values.  One idea is to define some structures representing these types
   // that would allow the use of offsetof().
   switch (Kind) {
-  case SymbolKind::S_GPROC32:
-  case SymbolKind::S_LPROC32:
   case SymbolKind::S_GPROC32_ID:
   case SymbolKind::S_LPROC32_ID:
   case SymbolKind::S_LPROC32_DPC:
   case SymbolKind::S_LPROC32_DPC_ID:
     Refs.push_back({TiRefKind::IndexRef, 24, 1}); // LF_FUNC_ID
     break;
+  case SymbolKind::S_GPROC32:
+  case SymbolKind::S_LPROC32:
+    Refs.push_back({TiRefKind::TypeRef, 24, 1}); // Type
+    break;
   case SymbolKind::S_UDT:
     Refs.push_back({TiRefKind::TypeRef, 0, 1}); // UDT
     break;
diff --git a/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp b/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp
index 2a66474cf5b6..8e632f3be460 100644
--- a/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp
+++ b/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp
@@ -1,9 +1,8 @@
 //===- TypeRecordHelpers.cpp ------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/TypeRecordMapping.cpp b/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
index 3203ff64d3b1..47928c2eef64 100644
--- a/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
+++ b/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
@@ -1,9 +1,8 @@
 //===- TypeRecordMapping.cpp ------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -22,19 +21,19 @@ struct MapOneMethodRecord {
       : IsFromOverloadList(IsFromOverloadList) {}
 
   Error operator()(CodeViewRecordIO &IO, OneMethodRecord &Method) const {
-    error(IO.mapInteger(Method.Attrs.Attrs));
+    error(IO.mapInteger(Method.Attrs.Attrs, "AccessSpecifier"));
     if (IsFromOverloadList) {
       uint16_t Padding = 0;
-      error(IO.mapInteger(Padding));
+      error(IO.mapInteger(Padding, "Padding"));
     }
-    error(IO.mapInteger(Method.Type));
+    error(IO.mapInteger(Method.Type, "Type"));
     if (Method.isIntroducingVirtual()) {
-      error(IO.mapInteger(Method.VFTableOffset));
-    } else if (!IO.isWriting())
+      error(IO.mapInteger(Method.VFTableOffset, "VFTableOffset"));
+    } else if (IO.isReading())
       Method.VFTableOffset = -1;
 
     if (!IsFromOverloadList)
-      error(IO.mapStringZ(Method.Name));
+      error(IO.mapStringZ(Method.Name, "Name"));
 
     return Error::success();
   }
@@ -73,9 +72,12 @@ static Error mapNameAndUniqueName(CodeViewRecordIO &IO, StringRef &Name,
       error(IO.mapStringZ(N));
     }
   } else {
-    error(IO.mapStringZ(Name));
+    // Reading & Streaming mode come after writing mode is executed for each
+    // record. Truncating large names are done during writing, so its not
+    // necessary to do it while reading or streaming.
+    error(IO.mapStringZ(Name, "Name"));
     if (HasUniqueName)
-      error(IO.mapStringZ(UniqueName));
+      error(IO.mapStringZ(UniqueName, "LinkageName"));
   }
 
   return Error::success();
@@ -89,14 +91,18 @@ Error TypeRecordMapping::visitTypeBegin(CVType &CVR) {
   // split with continuation records.  All other record types cannot be
   // longer than the maximum record length.
   Optional<uint32_t> MaxLen;
-  if (CVR.Type != TypeLeafKind::LF_FIELDLIST &&
-      CVR.Type != TypeLeafKind::LF_METHODLIST)
+  if (CVR.kind() != TypeLeafKind::LF_FIELDLIST &&
+      CVR.kind() != TypeLeafKind::LF_METHODLIST)
     MaxLen = MaxRecordLength - sizeof(RecordPrefix);
   error(IO.beginRecord(MaxLen));
-  TypeKind = CVR.Type;
+  TypeKind = CVR.kind();
   return Error::success();
 }
 
+Error TypeRecordMapping::visitTypeBegin(CVType &CVR, TypeIndex Index) {
+  return visitTypeBegin(CVR);
+}
+
 Error TypeRecordMapping::visitTypeEnd(CVType &Record) {
   assert(TypeKind.hasValue() && "Not in a type mapping!");
   assert(!MemberKind.hasValue() && "Still in a member mapping!");
@@ -127,7 +133,7 @@ Error TypeRecordMapping::visitMemberEnd(CVMemberRecord &Record) {
   assert(TypeKind.hasValue() && "Not in a type mapping!");
   assert(MemberKind.hasValue() && "Not in a member mapping!");
 
-  if (!IO.isWriting()) {
+  if (IO.isReading()) {
     if (auto EC = IO.skipPadding())
       return EC;
   }
@@ -138,33 +144,32 @@ Error TypeRecordMapping::visitMemberEnd(CVMemberRecord &Record) {
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ModifierRecord &Record) {
-  error(IO.mapInteger(Record.ModifiedType));
-  error(IO.mapEnum(Record.Modifiers));
-
+  error(IO.mapInteger(Record.ModifiedType, "ModifiedType"));
+  error(IO.mapEnum(Record.Modifiers, "Modifiers"));
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           ProcedureRecord &Record) {
-  error(IO.mapInteger(Record.ReturnType));
-  error(IO.mapEnum(Record.CallConv));
-  error(IO.mapEnum(Record.Options));
-  error(IO.mapInteger(Record.ParameterCount));
-  error(IO.mapInteger(Record.ArgumentList));
+  error(IO.mapInteger(Record.ReturnType, "ReturnType"));
+  error(IO.mapEnum(Record.CallConv, "CallingConvention"));
+  error(IO.mapEnum(Record.Options, "FunctionOptions"));
+  error(IO.mapInteger(Record.ParameterCount, "NumParameters"));
+  error(IO.mapInteger(Record.ArgumentList, "ArgListType"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           MemberFunctionRecord &Record) {
-  error(IO.mapInteger(Record.ReturnType));
-  error(IO.mapInteger(Record.ClassType));
-  error(IO.mapInteger(Record.ThisType));
-  error(IO.mapEnum(Record.CallConv));
-  error(IO.mapEnum(Record.Options));
-  error(IO.mapInteger(Record.ParameterCount));
-  error(IO.mapInteger(Record.ArgumentList));
-  error(IO.mapInteger(Record.ThisPointerAdjustment));
+  error(IO.mapInteger(Record.ReturnType, "ReturnType"));
+  error(IO.mapInteger(Record.ClassType, "ClassType"));
+  error(IO.mapInteger(Record.ThisType, "ThisType"));
+  error(IO.mapEnum(Record.CallConv, "CallingConvention"));
+  error(IO.mapEnum(Record.Options, "FunctionOptions"));
+  error(IO.mapInteger(Record.ParameterCount, "NumParameters"));
+  error(IO.mapInteger(Record.ArgumentList, "ArgListType"));
+  error(IO.mapInteger(Record.ThisPointerAdjustment, "ThisAdjustment"));
 
   return Error::success();
 }
@@ -172,8 +177,10 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ArgListRecord &Record) {
   error(IO.mapVectorN<uint32_t>(
       Record.ArgIndices,
-      [](CodeViewRecordIO &IO, TypeIndex &N) { return IO.mapInteger(N); }));
-
+      [](CodeViewRecordIO &IO, TypeIndex &N) {
+        return IO.mapInteger(N, "Argument");
+      },
+      "NumArgs"));
   return Error::success();
 }
 
@@ -181,47 +188,50 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           StringListRecord &Record) {
   error(IO.mapVectorN<uint32_t>(
       Record.StringIndices,
-      [](CodeViewRecordIO &IO, TypeIndex &N) { return IO.mapInteger(N); }));
+      [](CodeViewRecordIO &IO, TypeIndex &N) {
+        return IO.mapInteger(N, "Strings");
+      },
+      "NumStrings"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, PointerRecord &Record) {
-  error(IO.mapInteger(Record.ReferentType));
-  error(IO.mapInteger(Record.Attrs));
+  error(IO.mapInteger(Record.ReferentType, "PointeeType"));
+  error(IO.mapInteger(Record.Attrs, "Attributes"));
 
   if (Record.isPointerToMember()) {
-    if (!IO.isWriting())
+    if (IO.isReading())
       Record.MemberInfo.emplace();
 
     MemberPointerInfo &M = *Record.MemberInfo;
-    error(IO.mapInteger(M.ContainingType));
-    error(IO.mapEnum(M.Representation));
+    error(IO.mapInteger(M.ContainingType, "ClassType"));
+    error(IO.mapEnum(M.Representation, "Representation"));
   }
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ArrayRecord &Record) {
-  error(IO.mapInteger(Record.ElementType));
-  error(IO.mapInteger(Record.IndexType));
-  error(IO.mapEncodedInteger(Record.Size));
-  error(IO.mapStringZ(Record.Name));
+  error(IO.mapInteger(Record.ElementType, "ElementType"));
+  error(IO.mapInteger(Record.IndexType, "IndexType"));
+  error(IO.mapEncodedInteger(Record.Size, "SizeOf"));
+  error(IO.mapStringZ(Record.Name, "Name"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ClassRecord &Record) {
-  assert((CVR.Type == TypeLeafKind::LF_STRUCTURE) ||
-         (CVR.Type == TypeLeafKind::LF_CLASS) ||
-         (CVR.Type == TypeLeafKind::LF_INTERFACE));
-
-  error(IO.mapInteger(Record.MemberCount));
-  error(IO.mapEnum(Record.Options));
-  error(IO.mapInteger(Record.FieldList));
-  error(IO.mapInteger(Record.DerivationList));
-  error(IO.mapInteger(Record.VTableShape));
-  error(IO.mapEncodedInteger(Record.Size));
+  assert((CVR.kind() == TypeLeafKind::LF_STRUCTURE) ||
+         (CVR.kind() == TypeLeafKind::LF_CLASS) ||
+         (CVR.kind() == TypeLeafKind::LF_INTERFACE));
+
+  error(IO.mapInteger(Record.MemberCount, "MemberCount"));
+  error(IO.mapEnum(Record.Options, "Properties"));
+  error(IO.mapInteger(Record.FieldList, "FieldList"));
+  error(IO.mapInteger(Record.DerivationList, "DerivedFrom"));
+  error(IO.mapInteger(Record.VTableShape, "VShape"));
+  error(IO.mapEncodedInteger(Record.Size, "SizeOf"));
   error(mapNameAndUniqueName(IO, Record.Name, Record.UniqueName,
                              Record.hasUniqueName()));
 
@@ -229,10 +239,10 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ClassRecord &Record) {
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, UnionRecord &Record) {
-  error(IO.mapInteger(Record.MemberCount));
-  error(IO.mapEnum(Record.Options));
-  error(IO.mapInteger(Record.FieldList));
-  error(IO.mapEncodedInteger(Record.Size));
+  error(IO.mapInteger(Record.MemberCount, "MemberCount"));
+  error(IO.mapEnum(Record.Options, "Properties"));
+  error(IO.mapInteger(Record.FieldList, "FieldList"));
+  error(IO.mapEncodedInteger(Record.Size, "SizeOf"));
   error(mapNameAndUniqueName(IO, Record.Name, Record.UniqueName,
                              Record.hasUniqueName()));
 
@@ -240,10 +250,10 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, UnionRecord &Record) {
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, EnumRecord &Record) {
-  error(IO.mapInteger(Record.MemberCount));
-  error(IO.mapEnum(Record.Options));
-  error(IO.mapInteger(Record.UnderlyingType));
-  error(IO.mapInteger(Record.FieldList));
+  error(IO.mapInteger(Record.MemberCount, "NumEnumerators"));
+  error(IO.mapEnum(Record.Options, "Properties"));
+  error(IO.mapInteger(Record.UnderlyingType, "UnderlyingType"));
+  error(IO.mapInteger(Record.FieldList, "FieldListType"));
   error(mapNameAndUniqueName(IO, Record.Name, Record.UniqueName,
                              Record.hasUniqueName()));
 
@@ -251,9 +261,9 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, EnumRecord &Record) {
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, BitFieldRecord &Record) {
-  error(IO.mapInteger(Record.Type));
-  error(IO.mapInteger(Record.BitSize));
-  error(IO.mapInteger(Record.BitOffset));
+  error(IO.mapInteger(Record.Type, "Type"));
+  error(IO.mapInteger(Record.BitSize, "BitSize"));
+  error(IO.mapInteger(Record.BitOffset, "BitOffset"));
 
   return Error::success();
 }
@@ -261,10 +271,10 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, BitFieldRecord &Record) {
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           VFTableShapeRecord &Record) {
   uint16_t Size;
-  if (IO.isWriting()) {
+  if (!IO.isReading()) {
     ArrayRef<VFTableSlotKind> Slots = Record.getSlots();
     Size = Slots.size();
-    error(IO.mapInteger(Size));
+    error(IO.mapInteger(Size, "VFEntryCount"));
 
     for (size_t SlotIndex = 0; SlotIndex < Slots.size(); SlotIndex += 2) {
       uint8_t Byte = static_cast<uint8_t>(Slots[SlotIndex]) << 4;
@@ -288,61 +298,64 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, VFTableRecord &Record) {
-  error(IO.mapInteger(Record.CompleteClass));
-  error(IO.mapInteger(Record.OverriddenVFTable));
-  error(IO.mapInteger(Record.VFPtrOffset));
+  error(IO.mapInteger(Record.CompleteClass, "CompleteClass"));
+  error(IO.mapInteger(Record.OverriddenVFTable, "OverriddenVFTable"));
+  error(IO.mapInteger(Record.VFPtrOffset, "VFPtrOffset"));
   uint32_t NamesLen = 0;
-  if (IO.isWriting()) {
+  if (!IO.isReading()) {
     for (auto Name : Record.MethodNames)
       NamesLen += Name.size() + 1;
   }
   error(IO.mapInteger(NamesLen));
   error(IO.mapVectorTail(
       Record.MethodNames,
-      [](CodeViewRecordIO &IO, StringRef &S) { return IO.mapStringZ(S); }));
+      [](CodeViewRecordIO &IO, StringRef &S) {
+        return IO.mapStringZ(S, "MethodName");
+      },
+      "VFTableName"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, StringIdRecord &Record) {
-  error(IO.mapInteger(Record.Id));
-  error(IO.mapStringZ(Record.String));
+  error(IO.mapInteger(Record.Id, "Id"));
+  error(IO.mapStringZ(Record.String, "StringData"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           UdtSourceLineRecord &Record) {
-  error(IO.mapInteger(Record.UDT));
-  error(IO.mapInteger(Record.SourceFile));
-  error(IO.mapInteger(Record.LineNumber));
+  error(IO.mapInteger(Record.UDT, "UDT"));
+  error(IO.mapInteger(Record.SourceFile, "SourceFile"));
+  error(IO.mapInteger(Record.LineNumber, "LineNumber"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           UdtModSourceLineRecord &Record) {
-  error(IO.mapInteger(Record.UDT));
-  error(IO.mapInteger(Record.SourceFile));
-  error(IO.mapInteger(Record.LineNumber));
-  error(IO.mapInteger(Record.Module));
+  error(IO.mapInteger(Record.UDT, "UDT"));
+  error(IO.mapInteger(Record.SourceFile, "SourceFile"));
+  error(IO.mapInteger(Record.LineNumber, "LineNumber"));
+  error(IO.mapInteger(Record.Module, "Module"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, FuncIdRecord &Record) {
-  error(IO.mapInteger(Record.ParentScope));
-  error(IO.mapInteger(Record.FunctionType));
-  error(IO.mapStringZ(Record.Name));
+  error(IO.mapInteger(Record.ParentScope, "ParentScope"));
+  error(IO.mapInteger(Record.FunctionType, "FunctionType"));
+  error(IO.mapStringZ(Record.Name, "Name"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           MemberFuncIdRecord &Record) {
-  error(IO.mapInteger(Record.ClassType));
-  error(IO.mapInteger(Record.FunctionType));
-  error(IO.mapStringZ(Record.Name));
+  error(IO.mapInteger(Record.ClassType, "ClassType"));
+  error(IO.mapInteger(Record.FunctionType, "FunctionType"));
+  error(IO.mapStringZ(Record.Name, "Name"));
 
   return Error::success();
 }
@@ -351,7 +364,10 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           BuildInfoRecord &Record) {
   error(IO.mapVectorN<uint16_t>(
       Record.ArgIndices,
-      [](CodeViewRecordIO &IO, TypeIndex &N) { return IO.mapInteger(N); }));
+      [](CodeViewRecordIO &IO, TypeIndex &N) {
+        return IO.mapInteger(N, "Argument");
+      },
+      "NumArgs"));
 
   return Error::success();
 }
@@ -360,7 +376,7 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           MethodOverloadListRecord &Record) {
   // TODO: Split the list into multiple records if it's longer than 64KB, using
   // a subrecord of TypeRecordKind::Index to chain the records together.
-  error(IO.mapVectorTail(Record.Methods, MapOneMethodRecord(true)));
+  error(IO.mapVectorTail(Record.Methods, MapOneMethodRecord(true), "Method"));
 
   return Error::success();
 }
@@ -374,22 +390,22 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           TypeServer2Record &Record) {
-  error(IO.mapGuid(Record.Guid));
-  error(IO.mapInteger(Record.Age));
-  error(IO.mapStringZ(Record.Name));
+  error(IO.mapGuid(Record.Guid, "Guid"));
+  error(IO.mapInteger(Record.Age, "Age"));
+  error(IO.mapStringZ(Record.Name, "Name"));
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, LabelRecord &Record) {
-  error(IO.mapEnum(Record.Mode));
+  error(IO.mapEnum(Record.Mode, "Mode"));
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
                                           BaseClassRecord &Record) {
-  error(IO.mapInteger(Record.Attrs.Attrs));
-  error(IO.mapInteger(Record.Type));
-  error(IO.mapEncodedInteger(Record.Offset));
+  error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier"));
+  error(IO.mapInteger(Record.Type, "BaseType"));
+  error(IO.mapEncodedInteger(Record.Offset, "BaseOffset"));
 
   return Error::success();
 }
@@ -399,27 +415,27 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
   error(IO.mapInteger(Record.Attrs.Attrs));
 
   // FIXME: Handle full APInt such as __int128.
-  error(IO.mapEncodedInteger(Record.Value));
-  error(IO.mapStringZ(Record.Name));
+  error(IO.mapEncodedInteger(Record.Value, "EnumValue"));
+  error(IO.mapStringZ(Record.Name, "Name"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
                                           DataMemberRecord &Record) {
-  error(IO.mapInteger(Record.Attrs.Attrs));
-  error(IO.mapInteger(Record.Type));
-  error(IO.mapEncodedInteger(Record.FieldOffset));
-  error(IO.mapStringZ(Record.Name));
+  error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier"));
+  error(IO.mapInteger(Record.Type, "Type"));
+  error(IO.mapEncodedInteger(Record.FieldOffset, "FieldOffset"));
+  error(IO.mapStringZ(Record.Name, "Name"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
                                           OverloadedMethodRecord &Record) {
-  error(IO.mapInteger(Record.NumOverloads));
-  error(IO.mapInteger(Record.MethodList));
-  error(IO.mapStringZ(Record.Name));
+  error(IO.mapInteger(Record.NumOverloads, "MethodCount"));
+  error(IO.mapInteger(Record.MethodList, "MethodListIndex"));
+  error(IO.mapStringZ(Record.Name, "Name"));
 
   return Error::success();
 }
@@ -434,9 +450,9 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
 Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
                                           NestedTypeRecord &Record) {
   uint16_t Padding = 0;
-  error(IO.mapInteger(Padding));
-  error(IO.mapInteger(Record.Type));
-  error(IO.mapStringZ(Record.Name));
+  error(IO.mapInteger(Padding, "Padding"));
+  error(IO.mapInteger(Record.Type, "Type"));
+  error(IO.mapStringZ(Record.Name, "Name"));
 
   return Error::success();
 }
@@ -444,9 +460,9 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
 Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
                                           StaticDataMemberRecord &Record) {
 
-  error(IO.mapInteger(Record.Attrs.Attrs));
-  error(IO.mapInteger(Record.Type));
-  error(IO.mapStringZ(Record.Name));
+  error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier"));
+  error(IO.mapInteger(Record.Type, "Type"));
+  error(IO.mapStringZ(Record.Name, "Name"));
 
   return Error::success();
 }
@@ -454,11 +470,11 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
 Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
                                           VirtualBaseClassRecord &Record) {
 
-  error(IO.mapInteger(Record.Attrs.Attrs));
-  error(IO.mapInteger(Record.BaseType));
-  error(IO.mapInteger(Record.VBPtrType));
-  error(IO.mapEncodedInteger(Record.VBPtrOffset));
-  error(IO.mapEncodedInteger(Record.VTableIndex));
+  error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier"));
+  error(IO.mapInteger(Record.BaseType, "BaseType"));
+  error(IO.mapInteger(Record.VBPtrType, "VBPtrType"));
+  error(IO.mapEncodedInteger(Record.VBPtrOffset, "VBPtrOffset"));
+  error(IO.mapEncodedInteger(Record.VTableIndex, "VBTableIndex"));
 
   return Error::success();
 }
@@ -466,8 +482,8 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
 Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
                                           VFPtrRecord &Record) {
   uint16_t Padding = 0;
-  error(IO.mapInteger(Padding));
-  error(IO.mapInteger(Record.Type));
+  error(IO.mapInteger(Padding, "Padding"));
+  error(IO.mapInteger(Record.Type, "Type"));
 
   return Error::success();
 }
@@ -475,23 +491,23 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
 Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
                                           ListContinuationRecord &Record) {
   uint16_t Padding = 0;
-  error(IO.mapInteger(Padding));
-  error(IO.mapInteger(Record.ContinuationIndex));
+  error(IO.mapInteger(Padding, "Padding"));
+  error(IO.mapInteger(Record.ContinuationIndex, "ContinuationIndex"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           PrecompRecord &Precomp) {
-  error(IO.mapInteger(Precomp.StartTypeIndex));
-  error(IO.mapInteger(Precomp.TypesCount));
-  error(IO.mapInteger(Precomp.Signature));
-  error(IO.mapStringZ(Precomp.PrecompFilePath));
+  error(IO.mapInteger(Precomp.StartTypeIndex, "StartIndex"));
+  error(IO.mapInteger(Precomp.TypesCount, "Count"));
+  error(IO.mapInteger(Precomp.Signature, "Signature"));
+  error(IO.mapStringZ(Precomp.PrecompFilePath, "PrecompFile"));
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           EndPrecompRecord &EndPrecomp) {
-  error(IO.mapInteger(EndPrecomp.Signature));
+  error(IO.mapInteger(EndPrecomp.Signature, "Signature"));
   return Error::success();
 }
diff --git a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
index bae11ce6a6a1..aba0e96d606e 100644
--- a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
+++ b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
@@ -1,9 +1,8 @@
 //===-- TypeStreamMerger.cpp ------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/CodeView/TypeTableCollection.cpp b/lib/DebugInfo/CodeView/TypeTableCollection.cpp
index cf951baa5111..e13068b5b1eb 100644
--- a/lib/DebugInfo/CodeView/TypeTableCollection.cpp
+++ b/lib/DebugInfo/CodeView/TypeTableCollection.cpp
@@ -1,9 +1,8 @@
 //===- TypeTableCollection.cpp -------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -37,11 +36,7 @@ Optional<TypeIndex> TypeTableCollection::getNext(TypeIndex Prev) {
 
 CVType TypeTableCollection::getType(TypeIndex Index) {
   assert(Index.toArrayIndex() < Records.size());
-  ArrayRef<uint8_t> Bytes = Records[Index.toArrayIndex()];
-  const RecordPrefix *Prefix =
-      reinterpret_cast<const RecordPrefix *>(Bytes.data());
-  TypeLeafKind Kind = static_cast<TypeLeafKind>(uint16_t(Prefix->RecordKind));
-  return CVType(Kind, Bytes);
+  return CVType(Records[Index.toArrayIndex()]);
 }
 
 StringRef TypeTableCollection::getTypeName(TypeIndex Index) {
diff --git a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
index f49ab40fad9a..f4dd79937608 100644
--- a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
@@ -1,9 +1,8 @@
 //===- DWARFAbbreviationDeclaration.cpp -----------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -164,11 +163,11 @@ Optional<DWARFFormValue> DWARFAbbreviationDeclaration::getAttributeValue(
   for (const auto &Spec : AttributeSpecs) {
     if (*MatchAttrIndex == AttrIndex) {
       // We have arrived at the attribute to extract, extract if from Offset.
+      if (Spec.isImplicitConst())
+        return DWARFFormValue::createFromSValue(Spec.Form,
+                                                Spec.getImplicitConstValue());
+
       DWARFFormValue FormValue(Spec.Form);
-      if (Spec.isImplicitConst()) {
-        FormValue.setSValue(Spec.getImplicitConstValue());
-        return FormValue;
-      }
       if (FormValue.extractValue(DebugInfoData, &Offset, U.getFormParams(), &U))
         return FormValue;
     }
diff --git a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index 54daf34ff253..0721efb40f6a 100644
--- a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -1,9 +1,8 @@
 //===- DWARFAcceleratorTable.cpp ------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -42,7 +41,7 @@ static Atom formatAtom(unsigned Atom) { return {Atom}; }
 
 DWARFAcceleratorTable::~DWARFAcceleratorTable() = default;
 
-llvm::Error AppleAcceleratorTable::extract() {
+Error AppleAcceleratorTable::extract() {
   uint32_t Offset = 0;
 
   // Check that we can at least read the header.
@@ -377,7 +376,7 @@ void DWARFDebugNames::Header::dump(ScopedPrinter &W) const {
   W.startLine() << "Augmentation: '" << AugmentationString << "'\n";
 }
 
-llvm::Error DWARFDebugNames::Header::extract(const DWARFDataExtractor &AS,
+Error DWARFDebugNames::Header::extract(const DWARFDataExtractor &AS,
                                              uint32_t *Offset) {
   // Check that we can read the fixed-size part.
   if (!AS.isValidOffset(*Offset + sizeof(HeaderPOD) - 1))
@@ -519,6 +518,7 @@ Error DWARFDebugNames::NameIndex::extract() {
                                "Duplicate abbreviation code.");
   }
 }
+
 DWARFDebugNames::Entry::Entry(const NameIndex &NameIdx, const Abbrev &Abbr)
     : NameIdx(&NameIdx), Abbr(&Abbr) {
   // This merely creates form values. It is up to the caller
@@ -585,13 +585,14 @@ uint32_t DWARFDebugNames::NameIndex::getCUOffset(uint32_t CU) const {
 
 uint32_t DWARFDebugNames::NameIndex::getLocalTUOffset(uint32_t TU) const {
   assert(TU < Hdr.LocalTypeUnitCount);
-  uint32_t Offset = CUsBase + Hdr.CompUnitCount * 4;
+  uint32_t Offset = CUsBase + 4 * (Hdr.CompUnitCount + TU);
   return Section.AccelSection.getRelocatedValue(4, &Offset);
 }
 
 uint64_t DWARFDebugNames::NameIndex::getForeignTUSignature(uint32_t TU) const {
   assert(TU < Hdr.ForeignTypeUnitCount);
-  uint32_t Offset = CUsBase + (Hdr.CompUnitCount + Hdr.LocalTypeUnitCount) * 4;
+  uint32_t Offset =
+      CUsBase + 4 * (Hdr.CompUnitCount + Hdr.LocalTypeUnitCount) + 8 * TU;
   return Section.AccelSection.getU64(&Offset);
 }
 
@@ -754,11 +755,11 @@ LLVM_DUMP_METHOD void DWARFDebugNames::NameIndex::dump(ScopedPrinter &W) const {
     dumpName(W, NTE, None);
 }
 
-llvm::Error DWARFDebugNames::extract() {
+Error DWARFDebugNames::extract() {
   uint32_t Offset = 0;
   while (AccelSection.isValidOffset(Offset)) {
     NameIndex Next(*this, Offset);
-    if (llvm::Error E = Next.extract())
+    if (Error E = Next.extract())
       return E;
     Offset = Next.getNextUnitOffset();
     NameIndices.push_back(std::move(Next));
diff --git a/lib/DebugInfo/DWARF/DWARFAddressRange.cpp b/lib/DebugInfo/DWARF/DWARFAddressRange.cpp
index 86c8d19c02f4..ef6da08d34aa 100644
--- a/lib/DebugInfo/DWARF/DWARFAddressRange.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAddressRange.cpp
@@ -1,9 +1,8 @@
 //===- DWARFDebugAranges.cpp ------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
index 00a23b3898fa..74cce42466dd 100644
--- a/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
@@ -1,9 +1,8 @@
 //===-- DWARFCompileUnit.cpp ----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index e6620ee3dd1d..5ede9bf59619 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -1,9 +1,8 @@
 //===- DWARFContext.cpp ---------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -37,11 +36,12 @@
 #include "llvm/Object/Decompressor.h"
 #include "llvm/Object/MachO.h"
 #include "llvm/Object/ObjectFile.h"
-#include "llvm/Object/RelocVisitor.h"
+#include "llvm/Object/RelocationResolver.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/DataExtractor.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -102,7 +102,8 @@ static ContributionCollection
 collectContributionData(DWARFContext::unit_iterator_range Units) {
   ContributionCollection Contributions;
   for (const auto &U : Units)
-    Contributions.push_back(U->getStringOffsetsTableContribution());
+    if (const auto &C = U->getStringOffsetsTableContribution())
+      Contributions.push_back(C);
   // Sort the contributions so that any invalid ones are placed at
   // the start of the contributions vector. This way they are reported
   // first.
@@ -158,9 +159,9 @@ static void dumpDWARFv5StringOffsetsSection(
 
     // Detect overlapping contributions.
     if (Offset > ContributionHeader) {
-      OS << "error: overlapping contributions to string offsets table in "
-            "section ."
-         << SectionName << ".\n";
+      WithColor::error()
+          << "overlapping contributions to string offsets table in section ."
+          << SectionName << ".\n";
       return;
     }
     // Report a gap in the table.
@@ -269,11 +270,11 @@ static void dumpAddrSection(raw_ostream &OS, DWARFDataExtractor &AddrData,
 }
 
 // Dump the .debug_rnglists or .debug_rnglists.dwo section (DWARF v5).
-static void
-dumpRnglistsSection(raw_ostream &OS, DWARFDataExtractor &rnglistData,
-                    llvm::function_ref<Optional<SectionedAddress>(uint32_t)>
-                        LookupPooledAddress,
-                    DIDumpOptions DumpOpts) {
+static void dumpRnglistsSection(
+    raw_ostream &OS, DWARFDataExtractor &rnglistData,
+    llvm::function_ref<Optional<object::SectionedAddress>(uint32_t)>
+        LookupPooledAddress,
+    DIDumpOptions DumpOpts) {
   uint32_t Offset = 0;
   while (rnglistData.isValidOffset(Offset)) {
     llvm::DWARFDebugRnglistTable Rnglists;
@@ -926,6 +927,9 @@ DWARFContext::DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address) {
     DWARFDie DIE = Worklist.back();
     Worklist.pop_back();
 
+    if (!DIE.isValid())
+      continue;
+
     if (DIE.getTag() == DW_TAG_lexical_block &&
         DIE.addressRangeContainsAddress(Address)) {
       Result.BlockDIE = DIE;
@@ -939,6 +943,8 @@ DWARFContext::DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address) {
   return Result;
 }
 
+/// TODO: change input parameter from "uint64_t Address"
+///       into "SectionedAddress Address"
 static bool getFunctionNameAndStartLineForAddress(DWARFCompileUnit *CU,
                                                   uint64_t Address,
                                                   FunctionNameKind Kind,
@@ -967,36 +973,155 @@ static bool getFunctionNameAndStartLineForAddress(DWARFCompileUnit *CU,
   return FoundResult;
 }
 
-DILineInfo DWARFContext::getLineInfoForAddress(uint64_t Address,
+static Optional<uint64_t> getTypeSize(DWARFDie Type, uint64_t PointerSize) {
+  if (auto SizeAttr = Type.find(DW_AT_byte_size))
+    if (Optional<uint64_t> Size = SizeAttr->getAsUnsignedConstant())
+      return Size;
+
+  switch (Type.getTag()) {
+  case DW_TAG_pointer_type:
+  case DW_TAG_reference_type:
+  case DW_TAG_rvalue_reference_type:
+    return PointerSize;
+  case DW_TAG_ptr_to_member_type: {
+    if (DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type))
+      if (BaseType.getTag() == DW_TAG_subroutine_type)
+        return 2 * PointerSize;
+    return PointerSize;
+  }
+  case DW_TAG_const_type:
+  case DW_TAG_volatile_type:
+  case DW_TAG_restrict_type:
+  case DW_TAG_typedef: {
+    if (DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type))
+      return getTypeSize(BaseType, PointerSize);
+    break;
+  }
+  case DW_TAG_array_type: {
+    DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type);
+    if (!BaseType)
+      return Optional<uint64_t>();
+    Optional<uint64_t> BaseSize = getTypeSize(BaseType, PointerSize);
+    if (!BaseSize)
+      return Optional<uint64_t>();
+    uint64_t Size = *BaseSize;
+    for (DWARFDie Child : Type) {
+      if (Child.getTag() != DW_TAG_subrange_type)
+        continue;
+
+      if (auto ElemCountAttr = Child.find(DW_AT_count))
+        if (Optional<uint64_t> ElemCount =
+                ElemCountAttr->getAsUnsignedConstant())
+          Size *= *ElemCount;
+      if (auto UpperBoundAttr = Child.find(DW_AT_upper_bound))
+        if (Optional<int64_t> UpperBound =
+                UpperBoundAttr->getAsSignedConstant()) {
+          int64_t LowerBound = 0;
+          if (auto LowerBoundAttr = Child.find(DW_AT_lower_bound))
+            LowerBound = LowerBoundAttr->getAsSignedConstant().getValueOr(0);
+          Size *= *UpperBound - LowerBound + 1;
+        }
+    }
+    return Size;
+  }
+  default:
+    break;
+  }
+  return Optional<uint64_t>();
+}
+
+void DWARFContext::addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram,
+                                   DWARFDie Die, std::vector<DILocal> &Result) {
+  if (Die.getTag() == DW_TAG_variable ||
+      Die.getTag() == DW_TAG_formal_parameter) {
+    DILocal Local;
+    if (auto NameAttr = Subprogram.find(DW_AT_name))
+      if (Optional<const char *> Name = NameAttr->getAsCString())
+        Local.FunctionName = *Name;
+    if (auto LocationAttr = Die.find(DW_AT_location))
+      if (Optional<ArrayRef<uint8_t>> Location = LocationAttr->getAsBlock())
+        if (!Location->empty() && (*Location)[0] == DW_OP_fbreg)
+          Local.FrameOffset =
+              decodeSLEB128(Location->data() + 1, nullptr, Location->end());
+    if (auto TagOffsetAttr = Die.find(DW_AT_LLVM_tag_offset))
+      Local.TagOffset = TagOffsetAttr->getAsUnsignedConstant();
+
+    if (auto Origin =
+            Die.getAttributeValueAsReferencedDie(DW_AT_abstract_origin))
+      Die = Origin;
+    if (auto NameAttr = Die.find(DW_AT_name))
+      if (Optional<const char *> Name = NameAttr->getAsCString())
+        Local.Name = *Name;
+    if (auto Type = Die.getAttributeValueAsReferencedDie(DW_AT_type))
+      Local.Size = getTypeSize(Type, getCUAddrSize());
+    if (auto DeclFileAttr = Die.find(DW_AT_decl_file)) {
+      if (const auto *LT = CU->getContext().getLineTableForUnit(CU))
+        LT->getFileNameByIndex(
+            DeclFileAttr->getAsUnsignedConstant().getValue(),
+            CU->getCompilationDir(),
+            DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
+            Local.DeclFile);
+    }
+    if (auto DeclLineAttr = Die.find(DW_AT_decl_line))
+      Local.DeclLine = DeclLineAttr->getAsUnsignedConstant().getValue();
+
+    Result.push_back(Local);
+    return;
+  }
+
+  if (Die.getTag() == DW_TAG_inlined_subroutine)
+    if (auto Origin =
+            Die.getAttributeValueAsReferencedDie(DW_AT_abstract_origin))
+      Subprogram = Origin;
+
+  for (auto Child : Die)
+    addLocalsForDie(CU, Subprogram, Child, Result);
+}
+
+std::vector<DILocal>
+DWARFContext::getLocalsForAddress(object::SectionedAddress Address) {
+  std::vector<DILocal> Result;
+  DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address);
+  if (!CU)
+    return Result;
+
+  DWARFDie Subprogram = CU->getSubroutineForAddress(Address.Address);
+  if (Subprogram.isValid())
+    addLocalsForDie(CU, Subprogram, Subprogram, Result);
+  return Result;
+}
+
+DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address,
                                                DILineInfoSpecifier Spec) {
   DILineInfo Result;
 
-  DWARFCompileUnit *CU = getCompileUnitForAddress(Address);
+  DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address);
   if (!CU)
     return Result;
-  getFunctionNameAndStartLineForAddress(CU, Address, Spec.FNKind,
-                                        Result.FunctionName,
-                                        Result.StartLine);
+
+  getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind,
+                                        Result.FunctionName, Result.StartLine);
   if (Spec.FLIKind != FileLineInfoKind::None) {
-    if (const DWARFLineTable *LineTable = getLineTableForUnit(CU))
-      LineTable->getFileLineInfoForAddress(Address, CU->getCompilationDir(),
-                                           Spec.FLIKind, Result);
+    if (const DWARFLineTable *LineTable = getLineTableForUnit(CU)) {
+      LineTable->getFileLineInfoForAddress(
+          {Address.Address, Address.SectionIndex}, CU->getCompilationDir(),
+          Spec.FLIKind, Result);
+    }
   }
   return Result;
 }
 
-DILineInfoTable
-DWARFContext::getLineInfoForAddressRange(uint64_t Address, uint64_t Size,
-                                         DILineInfoSpecifier Spec) {
+DILineInfoTable DWARFContext::getLineInfoForAddressRange(
+    object::SectionedAddress Address, uint64_t Size, DILineInfoSpecifier Spec) {
   DILineInfoTable  Lines;
-  DWARFCompileUnit *CU = getCompileUnitForAddress(Address);
+  DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address);
   if (!CU)
     return Lines;
 
   std::string FunctionName = "<invalid>";
   uint32_t StartLine = 0;
-  getFunctionNameAndStartLineForAddress(CU, Address, Spec.FNKind, FunctionName,
-                                        StartLine);
+  getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind,
+                                        FunctionName, StartLine);
 
   // If the Specifier says we don't need FileLineInfo, just
   // return the top-most function at the starting address.
@@ -1004,7 +1129,7 @@ DWARFContext::getLineInfoForAddressRange(uint64_t Address, uint64_t Size,
     DILineInfo Result;
     Result.FunctionName = FunctionName;
     Result.StartLine = StartLine;
-    Lines.push_back(std::make_pair(Address, Result));
+    Lines.push_back(std::make_pair(Address.Address, Result));
     return Lines;
   }
 
@@ -1012,8 +1137,10 @@ DWARFContext::getLineInfoForAddressRange(uint64_t Address, uint64_t Size,
 
   // Get the index of row we're looking for in the line table.
   std::vector<uint32_t> RowVector;
-  if (!LineTable->lookupAddressRange(Address, Size, RowVector))
+  if (!LineTable->lookupAddressRange({Address.Address, Address.SectionIndex},
+                                     Size, RowVector)) {
     return Lines;
+  }
 
   for (uint32_t RowIndex : RowVector) {
     // Take file number and line/column from the row.
@@ -1025,33 +1152,33 @@ DWARFContext::getLineInfoForAddressRange(uint64_t Address, uint64_t Size,
     Result.Line = Row.Line;
     Result.Column = Row.Column;
     Result.StartLine = StartLine;
-    Lines.push_back(std::make_pair(Row.Address, Result));
+    Lines.push_back(std::make_pair(Row.Address.Address, Result));
   }
 
   return Lines;
 }
 
 DIInliningInfo
-DWARFContext::getInliningInfoForAddress(uint64_t Address,
+DWARFContext::getInliningInfoForAddress(object::SectionedAddress Address,
                                         DILineInfoSpecifier Spec) {
   DIInliningInfo InliningInfo;
 
-  DWARFCompileUnit *CU = getCompileUnitForAddress(Address);
+  DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address);
   if (!CU)
     return InliningInfo;
 
   const DWARFLineTable *LineTable = nullptr;
   SmallVector<DWARFDie, 4> InlinedChain;
-  CU->getInlinedChainForAddress(Address, InlinedChain);
+  CU->getInlinedChainForAddress(Address.Address, InlinedChain);
   if (InlinedChain.size() == 0) {
     // If there is no DIE for address (e.g. it is in unavailable .dwo file),
     // try to at least get file/line info from symbol table.
     if (Spec.FLIKind != FileLineInfoKind::None) {
       DILineInfo Frame;
       LineTable = getLineTableForUnit(CU);
-      if (LineTable &&
-          LineTable->getFileLineInfoForAddress(Address, CU->getCompilationDir(),
-                                               Spec.FLIKind, Frame))
+      if (LineTable && LineTable->getFileLineInfoForAddress(
+                           {Address.Address, Address.SectionIndex},
+                           CU->getCompilationDir(), Spec.FLIKind, Frame))
         InliningInfo.addFrame(Frame);
     }
     return InliningInfo;
@@ -1073,8 +1200,9 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address,
         LineTable = getLineTableForUnit(CU);
         // For the topmost routine, get file/line info from line table.
         if (LineTable)
-          LineTable->getFileLineInfoForAddress(Address, CU->getCompilationDir(),
-                                               Spec.FLIKind, Frame);
+          LineTable->getFileLineInfoForAddress(
+              {Address.Address, Address.SectionIndex}, CU->getCompilationDir(),
+              Spec.FLIKind, Frame);
       } else {
         // Otherwise, use call file, call line and call column from
         // previous DIE in inlined chain.
@@ -1402,8 +1530,14 @@ public:
       // Try to obtain an already relocated version of this section.
       // Else use the unrelocated section from the object file. We'll have to
       // apply relocations ourselves later.
-      if (!L || !L->getLoadedSectionContents(*RelocatedSection, Data))
-        Section.getContents(Data);
+      if (!L || !L->getLoadedSectionContents(*RelocatedSection, Data)) {
+        Expected<StringRef> E = Section.getContents();
+        if (E)
+          Data = *E;
+        else
+          // maybeDecompress below will error.
+          consumeError(E.takeError());
+      }
 
       if (auto Err = maybeDecompress(Section, Name, Data)) {
         ErrorPolicy EP = HandleError(createError(
@@ -1495,6 +1629,9 @@ public:
 
       // Symbol to [address, section index] cache mapping.
       std::map<SymbolRef, SymInfo> AddrCache;
+      bool (*Supports)(uint64_t);
+      RelocationResolver Resolver;
+      std::tie(Supports, Resolver) = getRelocationResolver(Obj);
       for (const RelocationRef &Reloc : Section.relocations()) {
         // FIXME: it's not clear how to correctly handle scattered
         // relocations.
@@ -1509,9 +1646,31 @@ public:
           continue;
         }
 
-        object::RelocVisitor V(Obj);
-        uint64_t Val = V.visit(Reloc.getType(), Reloc, SymInfoOrErr->Address);
-        if (V.error()) {
+        // Check if Resolver can handle this relocation type early so as not to
+        // handle invalid cases in DWARFDataExtractor.
+        //
+        // TODO Don't store Resolver in every RelocAddrEntry.
+        if (Supports && Supports(Reloc.getType())) {
+          auto I = Map->try_emplace(
+              Reloc.getOffset(),
+              RelocAddrEntry{SymInfoOrErr->SectionIndex, Reloc,
+                             SymInfoOrErr->Address,
+                             Optional<object::RelocationRef>(), 0, Resolver});
+          // If we didn't successfully insert that's because we already had a
+          // relocation for that offset. Store it as a second relocation in the
+          // same RelocAddrEntry instead.
+          if (!I.second) {
+            RelocAddrEntry &entry = I.first->getSecond();
+            if (entry.Reloc2) {
+              ErrorPolicy EP = HandleError(createError(
+                  "At most two relocations per offset are supported"));
+              if (EP == ErrorPolicy::Halt)
+                return;
+            }
+            entry.Reloc2 = Reloc;
+            entry.SymbolValue2 = SymInfoOrErr->Address;
+          }
+        } else {
           SmallString<32> Type;
           Reloc.getTypeName(Type);
           ErrorPolicy EP = HandleError(
@@ -1519,10 +1678,7 @@ public:
                           errorCodeToError(object_error::parse_failed)));
           if (EP == ErrorPolicy::Halt)
             return;
-          continue;
         }
-        RelocAddrEntry Rel = {SymInfoOrErr->SectionIndex, Val};
-        Map->insert({Reloc.getOffset(), Rel});
       }
     }
 
diff --git a/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp b/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
index 03e317461396..b9adf8cb1d99 100644
--- a/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
@@ -1,9 +1,8 @@
 //===- DWARFDataExtractor.cpp ---------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -16,15 +15,19 @@ using namespace llvm;
 uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint32_t *Off,
                                                uint64_t *SecNdx) const {
   if (SecNdx)
-    *SecNdx = -1ULL;
+    *SecNdx = object::SectionedAddress::UndefSection;
   if (!Section)
     return getUnsigned(Off, Size);
-  Optional<RelocAddrEntry> Rel = Obj->find(*Section, *Off);
-  if (!Rel)
-    return getUnsigned(Off, Size);
+  Optional<RelocAddrEntry> E = Obj->find(*Section, *Off);
+  uint64_t A = getUnsigned(Off, Size);
+  if (!E)
+    return A;
   if (SecNdx)
-    *SecNdx = Rel->SectionIndex;
-  return getUnsigned(Off, Size) + Rel->Value;
+    *SecNdx = E->SectionIndex;
+  uint64_t R = E->Resolver(E->Reloc, E->SymbolValue, A);
+  if (E->Reloc2)
+    R = E->Resolver(*E->Reloc2, E->SymbolValue2, R);
+  return R;
 }
 
 Optional<uint64_t>
diff --git a/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
index 4830c36a8ee7..31b324e5eb27 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
@@ -1,9 +1,8 @@
 //===- DWARFDebugAbbrev.cpp -----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -84,12 +83,12 @@ void DWARFDebugAbbrev::parse() const {
   if (!Data)
     return;
   uint32_t Offset = 0;
-  DWARFAbbreviationDeclarationSet AbbrDecls;
   auto I = AbbrDeclSets.begin();
   while (Data->isValidOffset(Offset)) {
     while (I != AbbrDeclSets.end() && I->first < Offset)
       ++I;
     uint32_t CUAbbrOffset = Offset;
+    DWARFAbbreviationDeclarationSet AbbrDecls;
     if (!AbbrDecls.extract(*Data, &Offset))
       break;
     AbbrDeclSets.insert(I, std::make_pair(CUAbbrOffset, std::move(AbbrDecls)));
diff --git a/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp b/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
index 22759bfac26c..58626539bba4 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
@@ -1,9 +1,8 @@
 //===- DWARFDebugAddr.cpp -------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -148,28 +147,13 @@ void DWARFDebugAddrTable::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
                HeaderData.Length, HeaderData.Version, HeaderData.AddrSize,
                HeaderData.SegSize);
 
-  static const char *Fmt32 = "0x%8.8" PRIx64;
-  static const char *Fmt64 = "0x%16.16" PRIx64;
-  std::string AddrFmt = "\n";
-  std::string AddrFmtVerbose = " => ";
-  if (HeaderData.AddrSize == 4) {
-    AddrFmt.append(Fmt32);
-    AddrFmtVerbose.append(Fmt32);
-  }
-  else {
-    AddrFmt.append(Fmt64);
-    AddrFmtVerbose.append(Fmt64);
-  }
-
   if (Addrs.size() > 0) {
-    OS << "Addrs: [";
-    for (uint64_t Addr : Addrs) {
-      OS << format(AddrFmt.c_str(), Addr);
-      if (DumpOpts.Verbose)
-        OS << format(AddrFmtVerbose.c_str(),
-                     Addr + HeaderOffset + sizeof(HeaderData));
-    }
-    OS << "\n]\n";
+    const char *AddrFmt = (HeaderData.AddrSize == 4) ? "0x%8.8" PRIx64 "\n"
+                                                     : "0x%16.16" PRIx64 "\n";
+    OS << "Addrs: [\n";
+    for (uint64_t Addr : Addrs)
+      OS << format(AddrFmt, Addr);
+    OS << "]\n";
   }
 }
 
diff --git a/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp b/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
index b9ef6905912a..6551b61accb8 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
@@ -1,9 +1,8 @@
 //===- DWARFDebugArangeSet.cpp --------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
index e8c5dec821b4..6460c9feeab8 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
@@ -1,9 +1,8 @@
 //===- DWARFDebugAranges.cpp ----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -115,20 +114,9 @@ void DWARFDebugAranges::construct() {
 }
 
 uint32_t DWARFDebugAranges::findAddress(uint64_t Address) const {
-  if (!Aranges.empty()) {
-    Range range(Address);
-    RangeCollIterator begin = Aranges.begin();
-    RangeCollIterator end = Aranges.end();
-    RangeCollIterator pos =
-        std::lower_bound(begin, end, range);
-
-    if (pos != end && pos->containsAddress(Address)) {
-      return pos->CUOffset;
-    } else if (pos != begin) {
-      --pos;
-      if (pos->containsAddress(Address))
-        return pos->CUOffset;
-    }
-  }
+  RangeCollIterator It =
+      partition_point(Aranges, [=](Range R) { return R.HighPC() <= Address; });
+  if (It != Aranges.end() && It->LowPC <= Address)
+    return It->CUOffset;
   return -1U;
 }
diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index ba55ffc28174..b3f23366f2a2 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -1,9 +1,8 @@
 //===- DWARFDebugFrame.h - Parsing of .debug_frame ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -267,7 +266,7 @@ void CFIProgram::printOperand(raw_ostream &OS, const MCRegisterInfo *MRI,
   case OT_Expression:
     assert(Instr.Expression && "missing DWARFExpression object");
     OS << " ";
-    Instr.Expression->print(OS, MRI, IsEH);
+    Instr.Expression->print(OS, MRI, nullptr, IsEH);
     break;
   }
 }
@@ -301,7 +300,7 @@ void CIE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const {
   OS << format("  Data alignment factor: %d\n", (int32_t)DataAlignmentFactor);
   OS << format("  Return address column: %d\n", (int32_t)ReturnAddressRegister);
   if (Personality)
-    OS << format("  Personality Address: %08x\n", *Personality);
+    OS << format("  Personality Address: %016" PRIx64 "\n", *Personality);
   if (!AugmentationData.empty()) {
     OS << "  Augmentation data:    ";
     for (uint8_t Byte : AugmentationData)
@@ -320,7 +319,7 @@ void FDE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const {
                (uint32_t)InitialLocation,
                (uint32_t)InitialLocation + (uint32_t)AddressRange);
   if (LSDAAddress)
-    OS << format("  LSDA Address: %08x\n", *LSDAAddress);
+    OS << format("  LSDA Address: %016" PRIx64 "\n", *LSDAAddress);
   CFIs.dump(OS, MRI, IsEH);
   OS << "\n";
 }
@@ -533,10 +532,9 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) {
 }
 
 FrameEntry *DWARFDebugFrame::getEntryAtOffset(uint64_t Offset) const {
-  auto It =
-      std::lower_bound(Entries.begin(), Entries.end(), Offset,
-                       [](const std::unique_ptr<FrameEntry> &E,
-                          uint64_t Offset) { return E->getOffset() < Offset; });
+  auto It = partition_point(Entries, [=](const std::unique_ptr<FrameEntry> &E) {
+    return E->getOffset() < Offset;
+  });
   if (It != Entries.end() && (*It)->getOffset() == Offset)
     return It->get();
   return nullptr;
diff --git a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 976bc4651ae6..d8a755e90df4 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -1,9 +1,8 @@
 //===- DWARFDebugInfoEntry.cpp --------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 1d621ff244f3..a1cb1e8582ed 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -1,9 +1,8 @@
 //===- DWARFDebugLine.cpp -------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -67,6 +66,26 @@ void DWARFDebugLine::ContentTypeTracker::trackContentType(
 
 DWARFDebugLine::Prologue::Prologue() { clear(); }
 
+bool DWARFDebugLine::Prologue::hasFileAtIndex(uint64_t FileIndex) const {
+  uint16_t DwarfVersion = getVersion();
+  assert(DwarfVersion != 0 &&
+         "line table prologue has no dwarf version information");
+  if (DwarfVersion >= 5)
+    return FileIndex < FileNames.size();
+  return FileIndex != 0 && FileIndex <= FileNames.size();
+}
+
+const llvm::DWARFDebugLine::FileNameEntry &
+DWARFDebugLine::Prologue::getFileNameEntry(uint64_t Index) const {
+  uint16_t DwarfVersion = getVersion();
+  assert(DwarfVersion != 0 &&
+         "line table prologue has no dwarf version information");
+  // In DWARF v5 the file names are 0-indexed.
+  if (DwarfVersion >= 5)
+    return FileNames[Index];
+  return FileNames[Index - 1];
+}
+
 void DWARFDebugLine::Prologue::clear() {
   TotalLength = PrologueLength = 0;
   SegSelectorSize = 0;
@@ -145,8 +164,8 @@ parseV2DirFileTables(const DWARFDataExtractor &DebugLineData,
     StringRef S = DebugLineData.getCStrRef(OffsetPtr);
     if (S.empty())
       break;
-    DWARFFormValue Dir(dwarf::DW_FORM_string);
-    Dir.setPValue(S.data());
+    DWARFFormValue Dir =
+        DWARFFormValue::createFromPValue(dwarf::DW_FORM_string, S.data());
     IncludeDirectories.push_back(Dir);
   }
 
@@ -155,8 +174,8 @@ parseV2DirFileTables(const DWARFDataExtractor &DebugLineData,
     if (Name.empty())
       break;
     DWARFDebugLine::FileNameEntry FileEntry;
-    FileEntry.Name.setForm(dwarf::DW_FORM_string);
-    FileEntry.Name.setPValue(Name.data());
+    FileEntry.Name =
+        DWARFFormValue::createFromPValue(dwarf::DW_FORM_string, Name.data());
     FileEntry.DirIdx = DebugLineData.getULEB128(OffsetPtr);
     FileEntry.ModTime = DebugLineData.getULEB128(OffsetPtr);
     FileEntry.Length = DebugLineData.getULEB128(OffsetPtr);
@@ -281,11 +300,11 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
   const uint64_t PrologueOffset = *OffsetPtr;
 
   clear();
-  TotalLength = DebugLineData.getU32(OffsetPtr);
+  TotalLength = DebugLineData.getRelocatedValue(4, OffsetPtr);
   if (TotalLength == UINT32_MAX) {
     FormParams.Format = dwarf::DWARF64;
     TotalLength = DebugLineData.getU64(OffsetPtr);
-  } else if (TotalLength >= 0xffffff00) {
+  } else if (TotalLength >= 0xfffffff0) {
     return createStringError(errc::invalid_argument,
         "parsing line table prologue at offset 0x%8.8" PRIx64
         " unsupported reserved unit length found of value 0x%8.8" PRIx64,
@@ -306,7 +325,8 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
     SegSelectorSize = DebugLineData.getU8(OffsetPtr);
   }
 
-  PrologueLength = DebugLineData.getUnsigned(OffsetPtr, sizeofPrologueLength());
+  PrologueLength =
+      DebugLineData.getRelocatedValue(sizeofPrologueLength(), OffsetPtr);
   const uint64_t EndPrologueOffset = PrologueLength + *OffsetPtr;
   MinInstLength = DebugLineData.getU8(OffsetPtr);
   if (getVersion() >= 4)
@@ -348,13 +368,15 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
 DWARFDebugLine::Row::Row(bool DefaultIsStmt) { reset(DefaultIsStmt); }
 
 void DWARFDebugLine::Row::postAppend() {
+  Discriminator = 0;
   BasicBlock = false;
   PrologueEnd = false;
   EpilogueBegin = false;
 }
 
 void DWARFDebugLine::Row::reset(bool DefaultIsStmt) {
-  Address = 0;
+  Address.Address = 0;
+  Address.SectionIndex = object::SectionedAddress::UndefSection;
   Line = 1;
   Column = 0;
   File = 1;
@@ -374,7 +396,7 @@ void DWARFDebugLine::Row::dumpTableHeader(raw_ostream &OS) {
 }
 
 void DWARFDebugLine::Row::dump(raw_ostream &OS) const {
-  OS << format("0x%16.16" PRIx64 " %6u %6u", Address, Line, Column)
+  OS << format("0x%16.16" PRIx64 " %6u %6u", Address.Address, Line, Column)
      << format(" %6u %3u %13u ", File, Isa, Discriminator)
      << (IsStmt ? " is_stmt" : "") << (BasicBlock ? " basic_block" : "")
      << (PrologueEnd ? " prologue_end" : "")
@@ -387,6 +409,7 @@ DWARFDebugLine::Sequence::Sequence() { reset(); }
 void DWARFDebugLine::Sequence::reset() {
   LowPC = 0;
   HighPC = 0;
+  SectionIndex = object::SectionedAddress::UndefSection;
   FirstRowIndex = 0;
   LastRowIndex = 0;
   Empty = true;
@@ -423,19 +446,20 @@ void DWARFDebugLine::ParsingState::resetRowAndSequence() {
   Sequence.reset();
 }
 
-void DWARFDebugLine::ParsingState::appendRowToMatrix(uint32_t Offset) {
+void DWARFDebugLine::ParsingState::appendRowToMatrix() {
+  unsigned RowNumber = LineTable->Rows.size();
   if (Sequence.Empty) {
     // Record the beginning of instruction sequence.
     Sequence.Empty = false;
-    Sequence.LowPC = Row.Address;
+    Sequence.LowPC = Row.Address.Address;
     Sequence.FirstRowIndex = RowNumber;
   }
-  ++RowNumber;
   LineTable->appendRow(Row);
   if (Row.EndSequence) {
     // Record the end of instruction sequence.
-    Sequence.HighPC = Row.Address;
-    Sequence.LastRowIndex = RowNumber;
+    Sequence.HighPC = Row.Address.Address;
+    Sequence.LastRowIndex = RowNumber + 1;
+    Sequence.SectionIndex = Row.Address.SectionIndex;
     if (Sequence.isValid())
       LineTable->appendSequence(Sequence);
     Sequence.reset();
@@ -538,7 +562,7 @@ Error DWARFDebugLine::LineTable::parse(
         // address is that of the byte after the last target machine instruction
         // of the sequence.
         State.Row.EndSequence = true;
-        State.appendRowToMatrix(*OffsetPtr);
+        State.appendRowToMatrix();
         if (OS) {
           *OS << "\n";
           OS->indent(12);
@@ -566,9 +590,10 @@ Error DWARFDebugLine::LineTable::parse(
                              ExtOffset, DebugLineData.getAddressSize(),
                              Len - 1);
         }
-        State.Row.Address = DebugLineData.getRelocatedAddress(OffsetPtr);
+        State.Row.Address.Address = DebugLineData.getRelocatedAddress(
+            OffsetPtr, &State.Row.Address.SectionIndex);
         if (OS)
-          *OS << format(" (0x%16.16" PRIx64 ")", State.Row.Address);
+          *OS << format(" (0x%16.16" PRIx64 ")", State.Row.Address.Address);
         break;
 
       case DW_LNE_define_file:
@@ -595,8 +620,8 @@ Error DWARFDebugLine::LineTable::parse(
         {
           FileNameEntry FileEntry;
           const char *Name = DebugLineData.getCStr(OffsetPtr);
-          FileEntry.Name.setForm(dwarf::DW_FORM_string);
-          FileEntry.Name.setPValue(Name);
+          FileEntry.Name =
+              DWARFFormValue::createFromPValue(dwarf::DW_FORM_string, Name);
           FileEntry.DirIdx = DebugLineData.getULEB128(OffsetPtr);
           FileEntry.ModTime = DebugLineData.getULEB128(OffsetPtr);
           FileEntry.Length = DebugLineData.getULEB128(OffsetPtr);
@@ -637,15 +662,14 @@ Error DWARFDebugLine::LineTable::parse(
       // Standard Opcodes
       case DW_LNS_copy:
         // Takes no arguments. Append a row to the matrix using the
-        // current values of the state-machine registers. Then set
-        // the basic_block register to false.
-        State.appendRowToMatrix(*OffsetPtr);
+        // current values of the state-machine registers.
         if (OS) {
           *OS << "\n";
           OS->indent(12);
           State.Row.dump(*OS);
           *OS << "\n";
         }
+        State.appendRowToMatrix();
         break;
 
       case DW_LNS_advance_pc:
@@ -655,7 +679,7 @@ Error DWARFDebugLine::LineTable::parse(
         {
           uint64_t AddrOffset =
               DebugLineData.getULEB128(OffsetPtr) * Prologue.MinInstLength;
-          State.Row.Address += AddrOffset;
+          State.Row.Address.Address += AddrOffset;
           if (OS)
             *OS << " (" << AddrOffset << ")";
         }
@@ -713,7 +737,7 @@ Error DWARFDebugLine::LineTable::parse(
           uint8_t AdjustOpcode = 255 - Prologue.OpcodeBase;
           uint64_t AddrOffset =
               (AdjustOpcode / Prologue.LineRange) * Prologue.MinInstLength;
-          State.Row.Address += AddrOffset;
+          State.Row.Address.Address += AddrOffset;
           if (OS)
             *OS
                 << format(" (0x%16.16" PRIx64 ")", AddrOffset);
@@ -731,11 +755,11 @@ Error DWARFDebugLine::LineTable::parse(
         // requires the use of DW_LNS_advance_pc. Such assemblers, however,
         // can use DW_LNS_fixed_advance_pc instead, sacrificing compression.
         {
-          uint16_t PCOffset = DebugLineData.getU16(OffsetPtr);
-          State.Row.Address += PCOffset;
+          uint16_t PCOffset = DebugLineData.getRelocatedValue(2, OffsetPtr);
+          State.Row.Address.Address += PCOffset;
           if (OS)
             *OS
-                << format(" (0x%16.16" PRIx64 ")", PCOffset);
+                << format(" (0x%4.4" PRIx16 ")", PCOffset);
         }
         break;
 
@@ -815,18 +839,16 @@ Error DWARFDebugLine::LineTable::parse(
       int32_t LineOffset =
           Prologue.LineBase + (AdjustOpcode % Prologue.LineRange);
       State.Row.Line += LineOffset;
-      State.Row.Address += AddrOffset;
+      State.Row.Address.Address += AddrOffset;
 
       if (OS) {
-        *OS << "address += " << ((uint32_t)AdjustOpcode)
-            << ",  line += " << LineOffset << "\n";
+        *OS << "address += " << AddrOffset << ",  line += " << LineOffset
+            << "\n";
         OS->indent(12);
         State.Row.dump(*OS);
       }
 
-      State.appendRowToMatrix(*OffsetPtr);
-      // Reset discriminator to 0.
-      State.Row.Discriminator = 0;
+      State.appendRowToMatrix();
     }
     if(OS)
       *OS << "\n";
@@ -839,7 +861,7 @@ Error DWARFDebugLine::LineTable::parse(
 
   // Sort all sequences so that address lookup will work faster.
   if (!Sequences.empty()) {
-    llvm::sort(Sequences, Sequence::orderByLowPC);
+    llvm::sort(Sequences, Sequence::orderByHighPC);
     // Note: actually, instruction address ranges of sequences should not
     // overlap (in shared objects and executables). If they do, the address
     // lookup would still work, though, but result would be ambiguous.
@@ -851,74 +873,88 @@ Error DWARFDebugLine::LineTable::parse(
   return Error::success();
 }
 
-uint32_t
-DWARFDebugLine::LineTable::findRowInSeq(const DWARFDebugLine::Sequence &Seq,
-                                        uint64_t Address) const {
+uint32_t DWARFDebugLine::LineTable::findRowInSeq(
+    const DWARFDebugLine::Sequence &Seq,
+    object::SectionedAddress Address) const {
   if (!Seq.containsPC(Address))
     return UnknownRowIndex;
-  // Search for instruction address in the rows describing the sequence.
-  // Rows are stored in a vector, so we may use arithmetical operations with
-  // iterators.
+  assert(Seq.SectionIndex == Address.SectionIndex);
+  // In some cases, e.g. first instruction in a function, the compiler generates
+  // two entries, both with the same address. We want the last one.
+  //
+  // In general we want a non-empty range: the last row whose address is less
+  // than or equal to Address. This can be computed as upper_bound - 1.
   DWARFDebugLine::Row Row;
   Row.Address = Address;
   RowIter FirstRow = Rows.begin() + Seq.FirstRowIndex;
   RowIter LastRow = Rows.begin() + Seq.LastRowIndex;
-  LineTable::RowIter RowPos = std::lower_bound(
-      FirstRow, LastRow, Row, DWARFDebugLine::Row::orderByAddress);
-  if (RowPos == LastRow) {
-    return Seq.LastRowIndex - 1;
-  }
-  uint32_t Index = Seq.FirstRowIndex + (RowPos - FirstRow);
-  if (RowPos->Address > Address) {
-    if (RowPos == FirstRow)
-      return UnknownRowIndex;
-    else
-      Index--;
-  }
-  return Index;
+  assert(FirstRow->Address.Address <= Row.Address.Address &&
+         Row.Address.Address < LastRow[-1].Address.Address);
+  RowIter RowPos = std::upper_bound(FirstRow + 1, LastRow - 1, Row,
+                                    DWARFDebugLine::Row::orderByAddress) -
+                   1;
+  assert(Seq.SectionIndex == RowPos->Address.SectionIndex);
+  return RowPos - Rows.begin();
 }
 
-uint32_t DWARFDebugLine::LineTable::lookupAddress(uint64_t Address) const {
-  if (Sequences.empty())
-    return UnknownRowIndex;
+uint32_t DWARFDebugLine::LineTable::lookupAddress(
+    object::SectionedAddress Address) const {
+
+  // Search for relocatable addresses
+  uint32_t Result = lookupAddressImpl(Address);
+
+  if (Result != UnknownRowIndex ||
+      Address.SectionIndex == object::SectionedAddress::UndefSection)
+    return Result;
+
+  // Search for absolute addresses
+  Address.SectionIndex = object::SectionedAddress::UndefSection;
+  return lookupAddressImpl(Address);
+}
+
+uint32_t DWARFDebugLine::LineTable::lookupAddressImpl(
+    object::SectionedAddress Address) const {
   // First, find an instruction sequence containing the given address.
   DWARFDebugLine::Sequence Sequence;
-  Sequence.LowPC = Address;
-  SequenceIter FirstSeq = Sequences.begin();
-  SequenceIter LastSeq = Sequences.end();
-  SequenceIter SeqPos = std::lower_bound(
-      FirstSeq, LastSeq, Sequence, DWARFDebugLine::Sequence::orderByLowPC);
-  DWARFDebugLine::Sequence FoundSeq;
-  if (SeqPos == LastSeq) {
-    FoundSeq = Sequences.back();
-  } else if (SeqPos->LowPC == Address) {
-    FoundSeq = *SeqPos;
-  } else {
-    if (SeqPos == FirstSeq)
-      return UnknownRowIndex;
-    FoundSeq = *(SeqPos - 1);
-  }
-  return findRowInSeq(FoundSeq, Address);
+  Sequence.SectionIndex = Address.SectionIndex;
+  Sequence.HighPC = Address.Address;
+  SequenceIter It = llvm::upper_bound(Sequences, Sequence,
+                                      DWARFDebugLine::Sequence::orderByHighPC);
+  if (It == Sequences.end() || It->SectionIndex != Address.SectionIndex)
+    return UnknownRowIndex;
+  return findRowInSeq(*It, Address);
 }
 
 bool DWARFDebugLine::LineTable::lookupAddressRange(
-    uint64_t Address, uint64_t Size, std::vector<uint32_t> &Result) const {
+    object::SectionedAddress Address, uint64_t Size,
+    std::vector<uint32_t> &Result) const {
+
+  // Search for relocatable addresses
+  if (lookupAddressRangeImpl(Address, Size, Result))
+    return true;
+
+  if (Address.SectionIndex == object::SectionedAddress::UndefSection)
+    return false;
+
+  // Search for absolute addresses
+  Address.SectionIndex = object::SectionedAddress::UndefSection;
+  return lookupAddressRangeImpl(Address, Size, Result);
+}
+
+bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
+    object::SectionedAddress Address, uint64_t Size,
+    std::vector<uint32_t> &Result) const {
   if (Sequences.empty())
     return false;
-  uint64_t EndAddr = Address + Size;
+  uint64_t EndAddr = Address.Address + Size;
   // First, find an instruction sequence containing the given address.
   DWARFDebugLine::Sequence Sequence;
-  Sequence.LowPC = Address;
-  SequenceIter FirstSeq = Sequences.begin();
+  Sequence.SectionIndex = Address.SectionIndex;
+  Sequence.HighPC = Address.Address;
   SequenceIter LastSeq = Sequences.end();
-  SequenceIter SeqPos = std::lower_bound(
-      FirstSeq, LastSeq, Sequence, DWARFDebugLine::Sequence::orderByLowPC);
-  if (SeqPos == LastSeq || SeqPos->LowPC != Address) {
-    if (SeqPos == FirstSeq)
-      return false;
-    SeqPos--;
-  }
-  if (!SeqPos->containsPC(Address))
+  SequenceIter SeqPos = llvm::upper_bound(
+      Sequences, Sequence, DWARFDebugLine::Sequence::orderByHighPC);
+  if (SeqPos == LastSeq || !SeqPos->containsPC(Address))
     return false;
 
   SequenceIter StartPos = SeqPos;
@@ -935,7 +971,8 @@ bool DWARFDebugLine::LineTable::lookupAddressRange(
       FirstRowIndex = findRowInSeq(CurSeq, Address);
 
     // Figure out the last row in the range.
-    uint32_t LastRowIndex = findRowInSeq(CurSeq, EndAddr - 1);
+    uint32_t LastRowIndex =
+        findRowInSeq(CurSeq, {EndAddr - 1, Address.SectionIndex});
     if (LastRowIndex == UnknownRowIndex)
       LastRowIndex = CurSeq.LastRowIndex - 1;
 
@@ -952,15 +989,11 @@ bool DWARFDebugLine::LineTable::lookupAddressRange(
   return true;
 }
 
-bool DWARFDebugLine::LineTable::hasFileAtIndex(uint64_t FileIndex) const {
-  return FileIndex != 0 && FileIndex <= Prologue.FileNames.size();
-}
-
 Optional<StringRef> DWARFDebugLine::LineTable::getSourceByIndex(uint64_t FileIndex,
                                                                 FileLineInfoKind Kind) const {
-  if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex))
+  if (Kind == FileLineInfoKind::None || !Prologue.hasFileAtIndex(FileIndex))
     return None;
-  const FileNameEntry &Entry = Prologue.FileNames[FileIndex - 1];
+  const FileNameEntry &Entry = Prologue.getFileNameEntry(FileIndex);
   if (Optional<const char *> source = Entry.Source.getAsCString())
     return StringRef(*source);
   return None;
@@ -974,13 +1007,13 @@ static bool isPathAbsoluteOnWindowsOrPosix(const Twine &Path) {
          sys::path::is_absolute(Path, sys::path::Style::windows);
 }
 
-bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
-                                                   const char *CompDir,
-                                                   FileLineInfoKind Kind,
-                                                   std::string &Result) const {
+bool DWARFDebugLine::Prologue::getFileNameByIndex(uint64_t FileIndex,
+                                                  StringRef CompDir,
+                                                  FileLineInfoKind Kind,
+                                                  std::string &Result) const {
   if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex))
     return false;
-  const FileNameEntry &Entry = Prologue.FileNames[FileIndex - 1];
+  const FileNameEntry &Entry = getFileNameEntry(FileIndex);
   StringRef FileName = Entry.Name.getAsCString().getValue();
   if (Kind != FileLineInfoKind::AbsoluteFilePath ||
       isPathAbsoluteOnWindowsOrPosix(FileName)) {
@@ -989,21 +1022,22 @@ bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
   }
 
   SmallString<16> FilePath;
-  uint64_t IncludeDirIndex = Entry.DirIdx;
   StringRef IncludeDir;
   // Be defensive about the contents of Entry.
-  if (IncludeDirIndex > 0 &&
-      IncludeDirIndex <= Prologue.IncludeDirectories.size())
-    IncludeDir = Prologue.IncludeDirectories[IncludeDirIndex - 1]
-                     .getAsCString()
-                     .getValue();
-
-  // We may still need to append compilation directory of compile unit.
-  // We know that FileName is not absolute, the only way to have an
-  // absolute path at this point would be if IncludeDir is absolute.
-  if (CompDir && Kind == FileLineInfoKind::AbsoluteFilePath &&
-      !isPathAbsoluteOnWindowsOrPosix(IncludeDir))
-    sys::path::append(FilePath, CompDir);
+  if (getVersion() >= 5) {
+    if (Entry.DirIdx < IncludeDirectories.size())
+      IncludeDir = IncludeDirectories[Entry.DirIdx].getAsCString().getValue();
+  } else {
+    if (0 < Entry.DirIdx && Entry.DirIdx <= IncludeDirectories.size())
+      IncludeDir =
+          IncludeDirectories[Entry.DirIdx - 1].getAsCString().getValue();
+
+    // We may still need to append compilation directory of compile unit.
+    // We know that FileName is not absolute, the only way to have an
+    // absolute path at this point would be if IncludeDir is absolute.
+    if (!CompDir.empty() && !isPathAbsoluteOnWindowsOrPosix(IncludeDir))
+      sys::path::append(FilePath, CompDir);
+  }
 
   // sys::path::append skips empty strings.
   sys::path::append(FilePath, IncludeDir, FileName);
@@ -1012,8 +1046,8 @@ bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
 }
 
 bool DWARFDebugLine::LineTable::getFileLineInfoForAddress(
-    uint64_t Address, const char *CompDir, FileLineInfoKind Kind,
-    DILineInfo &Result) const {
+    object::SectionedAddress Address, const char *CompDir,
+    FileLineInfoKind Kind, DILineInfo &Result) const {
   // Get the index of row we're looking for in the line table.
   uint32_t RowIndex = lookupAddress(Address);
   if (RowIndex == -1U)
@@ -1058,7 +1092,7 @@ DWARFDebugLine::SectionParser::SectionParser(DWARFDataExtractor &Data,
 }
 
 bool DWARFDebugLine::Prologue::totalLengthIsValid() const {
-  return TotalLength == 0xffffffff || TotalLength < 0xffffff00;
+  return TotalLength == 0xffffffff || TotalLength < 0xfffffff0;
 }
 
 DWARFDebugLine::LineTable DWARFDebugLine::SectionParser::parseNext(
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
index f8b5ff6ec8fb..6d8f4bee77c4 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@@ -1,9 +1,8 @@
 //===- DWARFDebugLoc.cpp --------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -31,15 +30,16 @@ using namespace llvm;
 // non-LLVM tools.
 static void dumpExpression(raw_ostream &OS, ArrayRef<char> Data,
                            bool IsLittleEndian, unsigned AddressSize,
-                           const MCRegisterInfo *MRI) {
+                           const MCRegisterInfo *MRI, DWARFUnit *U) {
   DWARFDataExtractor Extractor(StringRef(Data.data(), Data.size()),
                                IsLittleEndian, AddressSize);
-  DWARFExpression(Extractor, dwarf::DWARF_VERSION, AddressSize).print(OS, MRI);
+  DWARFExpression(Extractor, dwarf::DWARF_VERSION, AddressSize).print(OS, MRI, U);
 }
 
 void DWARFDebugLoc::LocationList::dump(raw_ostream &OS, bool IsLittleEndian,
                                        unsigned AddressSize,
                                        const MCRegisterInfo *MRI,
+                                       DWARFUnit *U,
                                        uint64_t BaseAddress,
                                        unsigned Indent) const {
   for (const Entry &E : Entries) {
@@ -51,15 +51,14 @@ void DWARFDebugLoc::LocationList::dump(raw_ostream &OS, bool IsLittleEndian,
                  BaseAddress + E.End);
     OS << ": ";
 
-    dumpExpression(OS, E.Loc, IsLittleEndian, AddressSize, MRI);
+    dumpExpression(OS, E.Loc, IsLittleEndian, AddressSize, MRI, U);
   }
 }
 
 DWARFDebugLoc::LocationList const *
 DWARFDebugLoc::getLocationListAtOffset(uint64_t Offset) const {
-  auto It = std::lower_bound(
-      Locations.begin(), Locations.end(), Offset,
-      [](const LocationList &L, uint64_t Offset) { return L.Offset < Offset; });
+  auto It = partition_point(
+      Locations, [=](const LocationList &L) { return L.Offset < Offset; });
   if (It != Locations.end() && It->Offset == Offset)
     return &(*It);
   return nullptr;
@@ -69,7 +68,7 @@ void DWARFDebugLoc::dump(raw_ostream &OS, const MCRegisterInfo *MRI,
                          Optional<uint64_t> Offset) const {
   auto DumpLocationList = [&](const LocationList &L) {
     OS << format("0x%8.8x: ", L.Offset);
-    L.dump(OS, IsLittleEndian, AddressSize, MRI, 0, 12);
+    L.dump(OS, IsLittleEndian, AddressSize, MRI, nullptr, 0, 12);
     OS << "\n\n";
   };
 
@@ -184,7 +183,8 @@ DWARFDebugLoclists::parseOneLocationList(DataExtractor Data, unsigned *Offset,
     }
 
     if (Kind != dwarf::DW_LLE_base_address) {
-      unsigned Bytes = Data.getU16(Offset);
+      unsigned Bytes =
+          Version >= 5 ? Data.getULEB128(Offset) : Data.getU16(Offset);
       // A single location description describing the location of the object...
       StringRef str = Data.getData().substr(*Offset, Bytes);
       *Offset += Bytes;
@@ -212,9 +212,8 @@ void DWARFDebugLoclists::parse(DataExtractor data, unsigned Version) {
 
 DWARFDebugLoclists::LocationList const *
 DWARFDebugLoclists::getLocationListAtOffset(uint64_t Offset) const {
-  auto It = std::lower_bound(
-      Locations.begin(), Locations.end(), Offset,
-      [](const LocationList &L, uint64_t Offset) { return L.Offset < Offset; });
+  auto It = partition_point(
+      Locations, [=](const LocationList &L) { return L.Offset < Offset; });
   if (It != Locations.end() && It->Offset == Offset)
     return &(*It);
   return nullptr;
@@ -224,6 +223,7 @@ void DWARFDebugLoclists::LocationList::dump(raw_ostream &OS, uint64_t BaseAddr,
                                             bool IsLittleEndian,
                                             unsigned AddressSize,
                                             const MCRegisterInfo *MRI,
+                                            DWARFUnit *U,
                                             unsigned Indent) const {
   for (const Entry &E : Entries) {
     switch (E.Kind) {
@@ -253,7 +253,7 @@ void DWARFDebugLoclists::LocationList::dump(raw_ostream &OS, uint64_t BaseAddr,
       llvm_unreachable("unreachable locations list kind");
     }
 
-    dumpExpression(OS, E.Loc, IsLittleEndian, AddressSize, MRI);
+    dumpExpression(OS, E.Loc, IsLittleEndian, AddressSize, MRI, U);
   }
 }
 
@@ -262,7 +262,7 @@ void DWARFDebugLoclists::dump(raw_ostream &OS, uint64_t BaseAddr,
                               Optional<uint64_t> Offset) const {
   auto DumpLocationList = [&](const LocationList &L) {
     OS << format("0x%8.8x: ", L.Offset);
-    L.dump(OS, BaseAddr, IsLittleEndian, AddressSize, MRI, /*Indent=*/12);
+    L.dump(OS, BaseAddr, IsLittleEndian, AddressSize, MRI, nullptr, /*Indent=*/12);
     OS << "\n\n";
   };
 
diff --git a/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
index 6d789c3027a5..3317a778cc70 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
@@ -1,9 +1,8 @@
 //===- DWARFDebugMacro.cpp ------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp b/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
index abd1ad59a9c1..963ec64f5e91 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
@@ -1,9 +1,8 @@
 //===- DWARFDebugPubTable.cpp ---------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
index dfb913000a46..d8df81a0aa0b 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
@@ -1,9 +1,8 @@
 //===- DWARFDebugRangesList.cpp -------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -69,7 +68,7 @@ void DWARFDebugRangeList::dump(raw_ostream &OS) const {
 }
 
 DWARFAddressRangesVector DWARFDebugRangeList::getAbsoluteRanges(
-    llvm::Optional<SectionedAddress> BaseAddr) const {
+    llvm::Optional<object::SectionedAddress> BaseAddr) const {
   DWARFAddressRangesVector Res;
   for (const RangeListEntry &RLE : Entries) {
     if (RLE.isBaseAddressSelectionEntry(AddressSize)) {
diff --git a/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp b/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
index 60c6eb30857f..5ac3326f6681 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
@@ -1,9 +1,8 @@
 //===- DWARFDebugRnglists.cpp ---------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -113,9 +112,8 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End,
   return Error::success();
 }
 
-DWARFAddressRangesVector
-DWARFDebugRnglist::getAbsoluteRanges(llvm::Optional<SectionedAddress> BaseAddr,
-                                     DWARFUnit &U) const {
+DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges(
+    llvm::Optional<object::SectionedAddress> BaseAddr, DWARFUnit &U) const {
   DWARFAddressRangesVector Res;
   for (const RangeListEntry &RLE : Entries) {
     if (RLE.EntryKind == dwarf::DW_RLE_end_of_list)
@@ -175,7 +173,7 @@ DWARFDebugRnglist::getAbsoluteRanges(llvm::Optional<SectionedAddress> BaseAddr,
 void RangeListEntry::dump(
     raw_ostream &OS, uint8_t AddrSize, uint8_t MaxEncodingStringLength,
     uint64_t &CurrentBase, DIDumpOptions DumpOpts,
-    llvm::function_ref<Optional<SectionedAddress>(uint32_t)>
+    llvm::function_ref<Optional<object::SectionedAddress>(uint32_t)>
         LookupPooledAddress) const {
   auto PrintRawEntry = [](raw_ostream &OS, const RangeListEntry &Entry,
                           uint8_t AddrSize, DIDumpOptions DumpOpts) {
@@ -203,7 +201,6 @@ void RangeListEntry::dump(
   case dwarf::DW_RLE_end_of_list:
     OS << (DumpOpts.Verbose ? "" : "<End of list>");
     break;
-    //  case dwarf::DW_RLE_base_addressx:
   case dwarf::DW_RLE_base_addressx: {
     if (auto SA = LookupPooledAddress(Value0))
       CurrentBase = SA->Address;
@@ -240,7 +237,7 @@ void RangeListEntry::dump(
       Start = SA->Address;
     DWARFAddressRange(Start, Start + Value1).dump(OS, AddrSize, DumpOpts);
     break;
-  } break;
+  }
   default:
     llvm_unreachable("Unsupported range list encoding");
   }
diff --git a/lib/DebugInfo/DWARF/DWARFDie.cpp b/lib/DebugInfo/DWARF/DWARFDie.cpp
index 81ef0c8c7aec..d638dc4239f4 100644
--- a/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -1,9 +1,8 @@
 //===- DWARFDie.cpp -------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -87,7 +86,7 @@ static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue,
     DataExtractor Data(StringRef((const char *)Expr.data(), Expr.size()),
                        Ctx.isLittleEndian(), 0);
     DWARFExpression(Data, U->getVersion(), U->getAddressByteSize())
-        .print(OS, MRI);
+        .print(OS, MRI, U);
     return;
   }
 
@@ -101,10 +100,10 @@ static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue,
       auto LL = DebugLoc.parseOneLocationList(Data, &Offset);
       if (LL) {
         uint64_t BaseAddr = 0;
-        if (Optional<SectionedAddress> BA = U->getBaseAddress())
+        if (Optional<object::SectionedAddress> BA = U->getBaseAddress())
           BaseAddr = BA->Address;
-        LL->dump(OS, Ctx.isLittleEndian(), Obj.getAddressSize(), MRI, BaseAddr,
-                 Indent);
+        LL->dump(OS, Ctx.isLittleEndian(), Obj.getAddressSize(), MRI, U,
+                 BaseAddr, Indent);
       } else
         OS << "error extracting location list.";
       return;
@@ -126,12 +125,12 @@ static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue,
           Data, &Offset, UseLocLists ? U->getVersion() : 4);
 
       uint64_t BaseAddr = 0;
-      if (Optional<SectionedAddress> BA = U->getBaseAddress())
+      if (Optional<object::SectionedAddress> BA = U->getBaseAddress())
         BaseAddr = BA->Address;
 
       if (LL)
         LL->dump(OS, BaseAddr, Ctx.isLittleEndian(), Obj.getAddressSize(), MRI,
-                 Indent);
+                 U, Indent);
       else
         OS << "error extracting location list.";
     }
@@ -279,11 +278,7 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
     OS << formatv(" [{0}]", Form);
 
   DWARFUnit *U = Die.getDwarfUnit();
-  DWARFFormValue formValue(Form);
-
-  if (!formValue.extractValue(U->getDebugInfoExtractor(), OffsetPtr,
-                              U->getFormParams(), U))
-    return;
+  DWARFFormValue FormValue = DWARFFormValue::createFromUnit(Form, U, OffsetPtr);
 
   OS << "\t(";
 
@@ -294,35 +289,33 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
     Color = HighlightColor::String;
     if (const auto *LT = U->getContext().getLineTableForUnit(U))
       if (LT->getFileNameByIndex(
-              formValue.getAsUnsignedConstant().getValue(),
+              FormValue.getAsUnsignedConstant().getValue(),
               U->getCompilationDir(),
               DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) {
         File = '"' + File + '"';
         Name = File;
       }
-  } else if (Optional<uint64_t> Val = formValue.getAsUnsignedConstant())
+  } else if (Optional<uint64_t> Val = FormValue.getAsUnsignedConstant())
     Name = AttributeValueString(Attr, *Val);
 
   if (!Name.empty())
     WithColor(OS, Color) << Name;
   else if (Attr == DW_AT_decl_line || Attr == DW_AT_call_line)
-    OS << *formValue.getAsUnsignedConstant();
+    OS << *FormValue.getAsUnsignedConstant();
   else if (Attr == DW_AT_high_pc && !DumpOpts.ShowForm && !DumpOpts.Verbose &&
-           formValue.getAsUnsignedConstant()) {
+           FormValue.getAsUnsignedConstant()) {
     if (DumpOpts.ShowAddresses) {
       // Print the actual address rather than the offset.
       uint64_t LowPC, HighPC, Index;
       if (Die.getLowAndHighPC(LowPC, HighPC, Index))
         OS << format("0x%016" PRIx64, HighPC);
       else
-        formValue.dump(OS, DumpOpts);
+        FormValue.dump(OS, DumpOpts);
     }
-  } else if (Attr == DW_AT_location || Attr == DW_AT_frame_base ||
-             Attr == DW_AT_data_member_location ||
-             Attr == DW_AT_GNU_call_site_value)
-    dumpLocation(OS, formValue, U, sizeof(BaseIndent) + Indent + 4, DumpOpts);
+  } else if (DWARFAttribute::mayHaveLocationDescription(Attr))
+    dumpLocation(OS, FormValue, U, sizeof(BaseIndent) + Indent + 4, DumpOpts);
   else
-    formValue.dump(OS, DumpOpts);
+    FormValue.dump(OS, DumpOpts);
 
   std::string Space = DumpOpts.ShowAddresses ? " " : "";
 
@@ -331,25 +324,25 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
   // interesting. These attributes are handled below.
   if (Attr == DW_AT_specification || Attr == DW_AT_abstract_origin) {
     if (const char *Name =
-            Die.getAttributeValueAsReferencedDie(formValue).getName(
+            Die.getAttributeValueAsReferencedDie(FormValue).getName(
                 DINameKind::LinkageName))
       OS << Space << "\"" << Name << '\"';
   } else if (Attr == DW_AT_type) {
     OS << Space << "\"";
-    dumpTypeName(OS, Die.getAttributeValueAsReferencedDie(formValue));
+    dumpTypeName(OS, Die.getAttributeValueAsReferencedDie(FormValue));
     OS << '"';
   } else if (Attr == DW_AT_APPLE_property_attribute) {
-    if (Optional<uint64_t> OptVal = formValue.getAsUnsignedConstant())
+    if (Optional<uint64_t> OptVal = FormValue.getAsUnsignedConstant())
       dumpApplePropertyAttribute(OS, *OptVal);
   } else if (Attr == DW_AT_ranges) {
     const DWARFObject &Obj = Die.getDwarfUnit()->getContext().getDWARFObj();
     // For DW_FORM_rnglistx we need to dump the offset separately, since
     // we have only dumped the index so far.
-    if (formValue.getForm() == DW_FORM_rnglistx)
+    if (FormValue.getForm() == DW_FORM_rnglistx)
       if (auto RangeListOffset =
-              U->getRnglistOffset(*formValue.getAsSectionOffset())) {
-        DWARFFormValue FV(dwarf::DW_FORM_sec_offset);
-        FV.setUValue(*RangeListOffset);
+              U->getRnglistOffset(*FormValue.getAsSectionOffset())) {
+        DWARFFormValue FV = DWARFFormValue::createFromUValue(
+            dwarf::DW_FORM_sec_offset, *RangeListOffset);
         FV.dump(OS, DumpOpts);
       }
     if (auto RangesOrError = Die.getAddressRanges())
@@ -403,6 +396,7 @@ DWARFDie::findRecursively(ArrayRef<dwarf::Attribute> Attrs) const {
   // DWARF. This corresponds to following the DW_AT_abstract_origin and
   // DW_AT_specification just once.
   SmallSet<DWARFDie, 3> Seen;
+  Seen.insert(*this);
 
   while (!Worklist.empty()) {
     DWARFDie Die = Worklist.back();
@@ -411,19 +405,16 @@ DWARFDie::findRecursively(ArrayRef<dwarf::Attribute> Attrs) const {
     if (!Die.isValid())
       continue;
 
-    if (Seen.count(Die))
-      continue;
-
-    Seen.insert(Die);
-
     if (auto Value = Die.find(Attrs))
       return Value;
 
     if (auto D = Die.getAttributeValueAsReferencedDie(DW_AT_abstract_origin))
-      Worklist.push_back(D);
+      if (Seen.insert(D).second)
+        Worklist.push_back(D);
 
     if (auto D = Die.getAttributeValueAsReferencedDie(DW_AT_specification))
-      Worklist.push_back(D);
+      if (Seen.insert(D).second)
+        Worklist.push_back(D);
   }
 
   return None;
@@ -438,9 +429,11 @@ DWARFDie::getAttributeValueAsReferencedDie(dwarf::Attribute Attr) const {
 
 DWARFDie
 DWARFDie::getAttributeValueAsReferencedDie(const DWARFFormValue &V) const {
-  if (auto SpecRef = toReference(V)) {
-    if (auto SpecUnit = U->getUnitVector().getUnitForOffset(*SpecRef))
-      return SpecUnit->getDIEForOffset(*SpecRef);
+  if (auto SpecRef = V.getAsRelativeReference()) {
+    if (SpecRef->Unit)
+      return SpecRef->Unit->getDIEForOffset(SpecRef->Unit->getOffset() + SpecRef->Offset);
+    if (auto SpecUnit = U->getUnitVector().getUnitForOffset(SpecRef->Offset))
+      return SpecUnit->getDIEForOffset(SpecRef->Offset);
   }
   return DWARFDie();
 }
@@ -560,10 +553,12 @@ void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine,
 
 /// Helper to dump a DIE with all of its parents, but no siblings.
 static unsigned dumpParentChain(DWARFDie Die, raw_ostream &OS, unsigned Indent,
-                                DIDumpOptions DumpOpts) {
+                                DIDumpOptions DumpOpts, unsigned Depth = 0) {
   if (!Die)
     return Indent;
-  Indent = dumpParentChain(Die.getParent(), OS, Indent, DumpOpts);
+  if (DumpOpts.ParentRecurseDepth > 0 && Depth >= DumpOpts.ParentRecurseDepth)
+    return Indent;
+  Indent = dumpParentChain(Die.getParent(), OS, Indent, DumpOpts, Depth + 1);
   Die.dump(OS, Indent, DumpOpts);
   return Indent + 2;
 }
@@ -611,8 +606,8 @@ void DWARFDie::dump(raw_ostream &OS, unsigned Indent,
         }
 
         DWARFDie child = getFirstChild();
-        if (DumpOpts.ShowChildren && DumpOpts.RecurseDepth > 0 && child) {
-          DumpOpts.RecurseDepth--;
+        if (DumpOpts.ShowChildren && DumpOpts.ChildRecurseDepth > 0 && child) {
+          DumpOpts.ChildRecurseDepth--;
           DIDumpOptions ChildDumpOpts = DumpOpts;
           ChildDumpOpts.ShowParents = false;
           while (child) {
@@ -668,7 +663,7 @@ iterator_range<DWARFDie::attribute_iterator> DWARFDie::attributes() const {
 }
 
 DWARFDie::attribute_iterator::attribute_iterator(DWARFDie D, bool End)
-    : Die(D), AttrValue(0), Index(0) {
+    : Die(D), Index(0) {
   auto AbbrDecl = Die.getAbbreviationDeclarationPtr();
   assert(AbbrDecl && "Must have abbreviation declaration");
   if (End) {
@@ -690,18 +685,15 @@ void DWARFDie::attribute_iterator::updateForIndex(
     AttrValue.Attr = AbbrDecl.getAttrByIndex(Index);
     // Add the previous byte size of any previous attribute value.
     AttrValue.Offset += AttrValue.ByteSize;
-    AttrValue.Value.setForm(AbbrDecl.getFormByIndex(Index));
     uint32_t ParseOffset = AttrValue.Offset;
     auto U = Die.getDwarfUnit();
     assert(U && "Die must have valid DWARF unit");
-    bool b = AttrValue.Value.extractValue(U->getDebugInfoExtractor(),
-                                          &ParseOffset, U->getFormParams(), U);
-    (void)b;
-    assert(b && "extractValue cannot fail on fully parsed DWARF");
+    AttrValue.Value = DWARFFormValue::createFromUnit(
+        AbbrDecl.getFormByIndex(Index), U, &ParseOffset);
     AttrValue.ByteSize = ParseOffset - AttrValue.Offset;
   } else {
     assert(Index == NumAttrs && "Indexes should be [0, NumAttrs) only");
-    AttrValue.clear();
+    AttrValue = {};
   }
 }
 
@@ -710,3 +702,39 @@ DWARFDie::attribute_iterator &DWARFDie::attribute_iterator::operator++() {
     updateForIndex(*AbbrDecl, Index + 1);
   return *this;
 }
+
+bool DWARFAttribute::mayHaveLocationDescription(dwarf::Attribute Attr) {
+  switch (Attr) {
+  // From the DWARF v5 specification.
+  case DW_AT_location:
+  case DW_AT_byte_size:
+  case DW_AT_bit_size:
+  case DW_AT_string_length:
+  case DW_AT_lower_bound:
+  case DW_AT_return_addr:
+  case DW_AT_bit_stride:
+  case DW_AT_upper_bound:
+  case DW_AT_count:
+  case DW_AT_data_member_location:
+  case DW_AT_frame_base:
+  case DW_AT_segment:
+  case DW_AT_static_link:
+  case DW_AT_use_location:
+  case DW_AT_vtable_elem_location:
+  case DW_AT_allocated:
+  case DW_AT_associated:
+  case DW_AT_byte_stride:
+  case DW_AT_rank:
+  case DW_AT_call_value:
+  case DW_AT_call_origin:
+  case DW_AT_call_target:
+  case DW_AT_call_target_clobbered:
+  case DW_AT_call_data_location:
+  case DW_AT_call_data_value:
+  // Extensions.
+  case DW_AT_GNU_call_site_value:
+    return true;
+  default:
+    return false;
+  }
+}
diff --git a/lib/DebugInfo/DWARF/DWARFExpression.cpp b/lib/DebugInfo/DWARF/DWARFExpression.cpp
index 2df4456053fb..470d4b5364b4 100644
--- a/lib/DebugInfo/DWARF/DWARFExpression.cpp
+++ b/lib/DebugInfo/DWARF/DWARFExpression.cpp
@@ -1,13 +1,13 @@
 //===-- DWARFExpression.cpp -----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/DWARF/DWARFExpression.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/Format.h"
@@ -97,6 +97,11 @@ static DescVector getDescriptions() {
   Descriptions[DW_OP_addrx] = Desc(Op::Dwarf4, Op::SizeLEB);
   Descriptions[DW_OP_GNU_addr_index] = Desc(Op::Dwarf4, Op::SizeLEB);
   Descriptions[DW_OP_GNU_const_index] = Desc(Op::Dwarf4, Op::SizeLEB);
+  Descriptions[DW_OP_GNU_entry_value] = Desc(Op::Dwarf4, Op::SizeLEB);
+
+  Descriptions[DW_OP_convert] = Desc(Op::Dwarf5, Op::BaseTypeRef);
+  Descriptions[DW_OP_entry_value] = Desc(Op::Dwarf5, Op::SizeLEB);
+
   return Descriptions;
 }
 
@@ -152,17 +157,21 @@ bool DWARFExpression::Operation::extract(DataExtractor Data, uint16_t Version,
     case Operation::SizeAddr:
       if (AddressSize == 8) {
         Operands[Operand] = Data.getU64(&Offset);
-      } else {
-        assert(AddressSize == 4);
+      } else if (AddressSize == 4) {
         Operands[Operand] = Data.getU32(&Offset);
+      } else {
+        assert(AddressSize == 2);
+        Operands[Operand] = Data.getU16(&Offset);
       }
       break;
     case Operation::SizeRefAddr:
       if (getRefAddrSize(AddressSize, Version) == 8) {
         Operands[Operand] = Data.getU64(&Offset);
-      } else {
-        assert(getRefAddrSize(AddressSize, Version) == 4);
+      } else if (getRefAddrSize(AddressSize, Version) == 4) {
         Operands[Operand] = Data.getU32(&Offset);
+      } else {
+        assert(getRefAddrSize(AddressSize, Version) == 2);
+        Operands[Operand] = Data.getU16(&Offset);
       }
       break;
     case Operation::SizeLEB:
@@ -171,6 +180,9 @@ bool DWARFExpression::Operation::extract(DataExtractor Data, uint16_t Version,
       else
         Operands[Operand] = Data.getULEB128(&Offset);
       break;
+    case Operation::BaseTypeRef:
+      Operands[Operand] = Data.getULEB128(&Offset);
+      break;
     case Operation::SizeBlock:
       // We need a size, so this cannot be the first operand
       if (Operand == 0)
@@ -182,6 +194,8 @@ bool DWARFExpression::Operation::extract(DataExtractor Data, uint16_t Version,
     default:
       llvm_unreachable("Unknown DWARFExpression Op size");
     }
+
+    OperandEndOffsets[Operand] = Offset;
   }
 
   EndOffset = Offset;
@@ -222,6 +236,7 @@ static bool prettyPrintRegisterOp(raw_ostream &OS, uint8_t Opcode,
 bool DWARFExpression::Operation::print(raw_ostream &OS,
                                        const DWARFExpression *Expr,
                                        const MCRegisterInfo *RegInfo,
+                                       DWARFUnit *U,
                                        bool isEH) {
   if (Error) {
     OS << "<decoding error>";
@@ -245,14 +260,25 @@ bool DWARFExpression::Operation::print(raw_ostream &OS,
     if (Size == Operation::SizeNA)
       break;
 
-    if (Size == Operation::SizeBlock) {
+    if (Size == Operation::BaseTypeRef && U) {
+      auto Die = U->getDIEForOffset(U->getOffset() + Operands[Operand]);
+      if (Die && Die.getTag() == dwarf::DW_TAG_base_type) {
+        OS << format(" (0x%08x)", U->getOffset() + Operands[Operand]);
+        if (auto Name = Die.find(dwarf::DW_AT_name))
+          OS << " \"" << Name->getAsCString() << "\"";
+      } else {
+        OS << format(" <invalid base_type ref: 0x%" PRIx64 ">",
+                     Operands[Operand]);
+      }
+    } else if (Size == Operation::SizeBlock) {
       uint32_t Offset = Operands[Operand];
       for (unsigned i = 0; i < Operands[Operand - 1]; ++i)
         OS << format(" 0x%02x", Expr->Data.getU8(&Offset));
     } else {
       if (Signed)
         OS << format(" %+" PRId64, (int64_t)Operands[Operand]);
-      else
+      else if (Opcode != DW_OP_entry_value &&
+               Opcode != DW_OP_GNU_entry_value)
         OS << format(" 0x%" PRIx64, Operands[Operand]);
     }
   }
@@ -260,17 +286,60 @@ bool DWARFExpression::Operation::print(raw_ostream &OS,
 }
 
 void DWARFExpression::print(raw_ostream &OS, const MCRegisterInfo *RegInfo,
-                            bool IsEH) const {
+                            DWARFUnit *U, bool IsEH) const {
+  uint32_t EntryValExprSize = 0;
   for (auto &Op : *this) {
-    if (!Op.print(OS, this, RegInfo, IsEH)) {
+    if (!Op.print(OS, this, RegInfo, U, IsEH)) {
       uint32_t FailOffset = Op.getEndOffset();
       while (FailOffset < Data.getData().size())
         OS << format(" %02x", Data.getU8(&FailOffset));
       return;
     }
+
+    if (Op.getCode() == DW_OP_entry_value ||
+        Op.getCode() == DW_OP_GNU_entry_value) {
+      OS << "(";
+      EntryValExprSize = Op.getRawOperand(0);
+      continue;
+    }
+
+    if (EntryValExprSize) {
+      EntryValExprSize--;
+      if (EntryValExprSize == 0)
+        OS << ")";
+    }
+
     if (Op.getEndOffset() < Data.getData().size())
       OS << ", ";
   }
 }
 
+bool DWARFExpression::Operation::verify(DWARFUnit *U) {
+
+  for (unsigned Operand = 0; Operand < 2; ++Operand) {
+    unsigned Size = Desc.Op[Operand];
+
+    if (Size == Operation::SizeNA)
+      break;
+
+    if (Size == Operation::BaseTypeRef) {
+      auto Die = U->getDIEForOffset(U->getOffset() + Operands[Operand]);
+      if (!Die || Die.getTag() != dwarf::DW_TAG_base_type) {
+        Error = true;
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+bool DWARFExpression::verify(DWARFUnit *U) {
+  for (auto &Op : *this)
+    if (!Op.verify(U))
+      return false;
+
+  return true;
+}
+
 } // namespace llvm
diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 7719fea63120..290d35511cdb 100644
--- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -1,9 +1,8 @@
 //===- DWARFFormValue.cpp -------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -78,6 +77,34 @@ static const DWARFFormValue::FormClass DWARF5FormClasses[] = {
 
 };
 
+DWARFFormValue DWARFFormValue::createFromSValue(dwarf::Form F, int64_t V) {
+  return DWARFFormValue(F, ValueType(V));
+}
+
+DWARFFormValue DWARFFormValue::createFromUValue(dwarf::Form F, uint64_t V) {
+  return DWARFFormValue(F, ValueType(V));
+}
+
+DWARFFormValue DWARFFormValue::createFromPValue(dwarf::Form F, const char *V) {
+  return DWARFFormValue(F, ValueType(V));
+}
+
+DWARFFormValue DWARFFormValue::createFromBlockValue(dwarf::Form F,
+                                                    ArrayRef<uint8_t> D) {
+  ValueType V;
+  V.uval = D.size();
+  V.data = D.data();
+  return DWARFFormValue(F, V);
+}
+
+DWARFFormValue DWARFFormValue::createFromUnit(dwarf::Form F, const DWARFUnit *U,
+                                              uint32_t *OffsetPtr) {
+  DWARFFormValue FormValue(F);
+  FormValue.extractValue(U->getDebugInfoExtractor(), OffsetPtr,
+                         U->getFormParams(), U);
+  return FormValue;
+}
+
 bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData,
                                uint32_t *OffsetPtr,
                                const dwarf::FormParams Params) {
@@ -193,13 +220,17 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const {
   default:
     break;
   }
-  // In DWARF3 DW_FORM_data4 and DW_FORM_data8 served also as a section offset.
-  // Don't check for DWARF version here, as some producers may still do this
-  // by mistake. Also accept DW_FORM_[line_]strp since these are
-  // .debug_[line_]str section offsets.
-  return (Form == DW_FORM_data4 || Form == DW_FORM_data8 ||
-          Form == DW_FORM_strp || Form == DW_FORM_line_strp) &&
-         FC == FC_SectionOffset;
+
+  if (FC == FC_SectionOffset) {
+    if (Form == DW_FORM_strp || Form == DW_FORM_line_strp)
+      return true;
+    // In DWARF3 DW_FORM_data4 and DW_FORM_data8 served also as a section
+    // offset. If we don't have a DWARFUnit, default to the old behavior.
+    if (Form == DW_FORM_data4 || Form == DW_FORM_data8)
+      return !U || U->getVersion() <= 3;
+  }
+
+  return false;
 }
 
 bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
@@ -268,7 +299,7 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
     case DW_FORM_data8:
     case DW_FORM_ref8:
     case DW_FORM_ref_sup8:
-      Value.uval = Data.getU64(OffsetPtr);
+      Value.uval = Data.getRelocatedValue(8, OffsetPtr);
       break;
     case DW_FORM_data16:
       // Treat this like a 16-byte block.
@@ -323,7 +354,7 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
     StringRef Str = Data.getData().substr(*OffsetPtr, Value.uval);
     Value.data = nullptr;
     if (!Str.empty()) {
-      Value.data = reinterpret_cast<const uint8_t *>(Str.data());
+      Value.data = Str.bytes_begin();
       *OffsetPtr += Value.uval;
     }
   }
@@ -333,7 +364,7 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
 
 void DWARFFormValue::dumpSectionedAddress(raw_ostream &OS,
                                           DIDumpOptions DumpOpts,
-                                          SectionedAddress SA) const {
+                                          object::SectionedAddress SA) const {
   OS << format("0x%016" PRIx64, SA.Address);
   dumpAddressSection(U->getContext().getDWARFObj(), OS, DumpOpts,
                      SA.SectionIndex);
@@ -370,12 +401,14 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
   case DW_FORM_addrx3:
   case DW_FORM_addrx4:
   case DW_FORM_GNU_addr_index: {
-    Optional<SectionedAddress> A = U->getAddrOffsetSectionItem(UValue);
+    if (U == nullptr) {
+      OS << "<invalid dwarf unit>";
+      break;
+    }
+    Optional<object::SectionedAddress> A = U->getAddrOffsetSectionItem(UValue);
     if (!A || DumpOpts.Verbose)
       AddrOS << format("indexed (%8.8x) address = ", (uint32_t)UValue);
-    if (U == nullptr)
-      OS << "<invalid dwarf unit>";
-    else if (A)
+    if (A)
       dumpSectionedAddress(AddrOS, DumpOpts, *A);
     else
       OS << "<no .debug_addr section>";
@@ -591,14 +624,15 @@ Optional<uint64_t> DWARFFormValue::getAsAddress() const {
     return SA->Address;
   return None;
 }
-Optional<SectionedAddress> DWARFFormValue::getAsSectionedAddress() const {
+Optional<object::SectionedAddress>
+DWARFFormValue::getAsSectionedAddress() const {
   if (!isFormClass(FC_Address))
     return None;
   if (Form == DW_FORM_GNU_addr_index || Form == DW_FORM_addrx) {
     uint32_t Index = Value.uval;
     if (!U)
       return None;
-    Optional<SectionedAddress> SA = U->getAddrOffsetSectionItem(Index);
+    Optional<object::SectionedAddress> SA = U->getAddrOffsetSectionItem(Index);
     if (!SA)
       return None;
     return SA;
@@ -607,6 +641,12 @@ Optional<SectionedAddress> DWARFFormValue::getAsSectionedAddress() const {
 }
 
 Optional<uint64_t> DWARFFormValue::getAsReference() const {
+  if (auto R = getAsRelativeReference())
+    return R->Unit ? R->Unit->getOffset() + R->Offset : R->Offset;
+  return None;
+}
+  
+Optional<DWARFFormValue::UnitOffset> DWARFFormValue::getAsRelativeReference() const {
   if (!isFormClass(FC_Reference))
     return None;
   switch (Form) {
@@ -617,11 +657,11 @@ Optional<uint64_t> DWARFFormValue::getAsReference() const {
   case DW_FORM_ref_udata:
     if (!U)
       return None;
-    return Value.uval + U->getOffset();
+    return UnitOffset{const_cast<DWARFUnit*>(U), Value.uval};
   case DW_FORM_ref_addr:
   case DW_FORM_ref_sig8:
   case DW_FORM_GNU_ref_alt:
-    return Value.uval;
+    return UnitOffset{nullptr, Value.uval};
   default:
     return None;
   }
diff --git a/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
index 1abd931e3b8b..f5f975578082 100644
--- a/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
+++ b/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
@@ -1,9 +1,8 @@
 //===- DWARFGdbIndex.cpp --------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -121,7 +120,7 @@ bool DWARFGdbIndex::parseImpl(DataExtractor Data) {
     return false;
 
   CuListOffset = Data.getU32(&Offset);
-  uint32_t CuTypesOffset = Data.getU32(&Offset);
+  TuListOffset = Data.getU32(&Offset);
   AddressAreaOffset = Data.getU32(&Offset);
   SymbolTableOffset = Data.getU32(&Offset);
   ConstantPoolOffset = Data.getU32(&Offset);
@@ -129,7 +128,7 @@ bool DWARFGdbIndex::parseImpl(DataExtractor Data) {
   if (Offset != CuListOffset)
     return false;
 
-  uint32_t CuListSize = (CuTypesOffset - CuListOffset) / 16;
+  uint32_t CuListSize = (TuListOffset - CuListOffset) / 16;
   CuList.reserve(CuListSize);
   for (uint32_t i = 0; i < CuListSize; ++i) {
     uint64_t CuOffset = Data.getU64(&Offset);
@@ -139,7 +138,7 @@ bool DWARFGdbIndex::parseImpl(DataExtractor Data) {
 
   // CU Types are no longer needed as DWARF skeleton type units never made it
   // into the standard.
-  uint32_t TuListSize = (AddressAreaOffset - CuTypesOffset) / 24;
+  uint32_t TuListSize = (AddressAreaOffset - TuListOffset) / 24;
   TuList.resize(TuListSize);
   for (uint32_t I = 0; I < TuListSize; ++I) {
     uint64_t CuOffset = Data.getU64(&Offset);
diff --git a/lib/DebugInfo/DWARF/DWARFListTable.cpp b/lib/DebugInfo/DWARF/DWARFListTable.cpp
index 462c036d73ad..e38e706227da 100644
--- a/lib/DebugInfo/DWARF/DWARFListTable.cpp
+++ b/lib/DebugInfo/DWARF/DWARFListTable.cpp
@@ -1,9 +1,8 @@
 //===- DWARFListTable.cpp ---------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -26,7 +25,7 @@ Error DWARFListTableHeader::extract(DWARFDataExtractor Data,
                        "%s table length at offset 0x%" PRIx32,
                        SectionName.data(), *OffsetPtr);
   // TODO: Add support for DWARF64.
-  HeaderData.Length = Data.getU32(OffsetPtr);
+  HeaderData.Length = Data.getRelocatedValue(4, OffsetPtr);
   if (HeaderData.Length == 0xffffffffu)
     return createStringError(errc::not_supported,
                        "DWARF64 is not supported in %s at offset 0x%" PRIx32,
@@ -74,7 +73,7 @@ Error DWARFListTableHeader::extract(DWARFDataExtractor Data,
         SectionName.data(), HeaderOffset, HeaderData.OffsetEntryCount);
   Data.setAddressSize(HeaderData.AddrSize);
   for (uint32_t I = 0; I < HeaderData.OffsetEntryCount; ++I)
-    Offsets.push_back(Data.getU32(OffsetPtr));
+    Offsets.push_back(Data.getRelocatedValue(4, OffsetPtr));
   return Error::success();
 }
 
diff --git a/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp b/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
index 00be75e1a94d..844920ba5b11 100644
--- a/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
@@ -1,9 +1,8 @@
 //===- DWARFTypeUnit.cpp --------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp
index 80234665bdeb..b74acf60c747 100644
--- a/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -1,9 +1,8 @@
 //===- DWARFUnit.cpp ------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -198,7 +197,7 @@ DWARFDataExtractor DWARFUnit::getDebugInfoExtractor() const {
                             getAddressByteSize());
 }
 
-Optional<SectionedAddress>
+Optional<object::SectionedAddress>
 DWARFUnit::getAddrOffsetSectionItem(uint32_t Index) const {
   if (IsDWO) {
     auto R = Context.info_section_units();
@@ -242,17 +241,21 @@ bool DWARFUnitHeader::extract(DWARFContext &Context,
   IndexEntry = Entry;
   if (!IndexEntry && Index)
     IndexEntry = Index->getFromOffset(*offset_ptr);
-  Length = debug_info.getU32(offset_ptr);
-  // FIXME: Support DWARF64.
-  unsigned SizeOfLength = 4;
+  Length = debug_info.getRelocatedValue(4, offset_ptr);
   FormParams.Format = DWARF32;
+  unsigned SizeOfLength = 4;
+  if (Length == 0xffffffff) {
+    Length = debug_info.getU64(offset_ptr);
+    FormParams.Format = DWARF64;
+    SizeOfLength = 8;
+  }
   FormParams.Version = debug_info.getU16(offset_ptr);
   if (FormParams.Version >= 5) {
     UnitType = debug_info.getU8(offset_ptr);
     FormParams.AddrSize = debug_info.getU8(offset_ptr);
-    AbbrOffset = debug_info.getU32(offset_ptr);
+    AbbrOffset = debug_info.getRelocatedValue(FormParams.getDwarfOffsetByteSize(), offset_ptr);
   } else {
-    AbbrOffset = debug_info.getRelocatedValue(4, offset_ptr);
+    AbbrOffset = debug_info.getRelocatedValue(FormParams.getDwarfOffsetByteSize(), offset_ptr);
     FormParams.AddrSize = debug_info.getU8(offset_ptr);
     // Fake a unit type based on the section type.  This isn't perfect,
     // but distinguishing compile and type units is generally enough.
@@ -432,12 +435,17 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
     // which may differ from the unit's format.
     DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection,
                           isLittleEndian, 0);
-    if (IsDWO)
-      StringOffsetsTableContribution =
-          determineStringOffsetsTableContributionDWO(DA);
-    else if (getVersion() >= 5)
-      StringOffsetsTableContribution =
-          determineStringOffsetsTableContribution(DA);
+    if (IsDWO || getVersion() >= 5) {
+      auto StringOffsetOrError =
+          IsDWO ? determineStringOffsetsTableContributionDWO(DA)
+                : determineStringOffsetsTableContribution(DA);
+      if (!StringOffsetOrError) {
+        WithColor::error() << "invalid contribution to string offsets table in section .debug_str_offsets[.dwo]: "
+                           << toString(StringOffsetOrError.takeError()) << '\n';
+      } else {
+        StringOffsetsTableContribution = *StringOffsetOrError;
+      }
+    }
 
     // DWARF v5 uses the .debug_rnglists and .debug_rnglists.dwo sections to
     // describe address ranges.
@@ -634,7 +642,7 @@ DWARFUnit::getInlinedChainForAddress(uint64_t Address,
   // First, find the subroutine that contains the given address (the leaf
   // of inlined chain).
   DWARFDie SubroutineDIE =
-      (DWO ? DWO.get() : this)->getSubroutineForAddress(Address);
+      (DWO ? *DWO : *this).getSubroutineForAddress(Address);
 
   if (!SubroutineDIE)
     return;
@@ -745,7 +753,7 @@ const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const {
   return Abbrevs;
 }
 
-llvm::Optional<SectionedAddress> DWARFUnit::getBaseAddress() {
+llvm::Optional<object::SectionedAddress> DWARFUnit::getBaseAddress() {
   if (BaseAddr)
     return BaseAddr;
 
@@ -755,7 +763,7 @@ llvm::Optional<SectionedAddress> DWARFUnit::getBaseAddress() {
   return BaseAddr;
 }
 
-Optional<StrOffsetsContributionDescriptor>
+Expected<StrOffsetsContributionDescriptor>
 StrOffsetsContributionDescriptor::validateContributionSize(
     DWARFDataExtractor &DA) {
   uint8_t EntrySize = getDwarfOffsetByteSize();
@@ -766,58 +774,94 @@ StrOffsetsContributionDescriptor::validateContributionSize(
   if (ValidationSize >= Size)
     if (DA.isValidOffsetForDataOfSize((uint32_t)Base, ValidationSize))
       return *this;
-  return None;
+  return createStringError(errc::invalid_argument, "length exceeds section size");
 }
 
 // Look for a DWARF64-formatted contribution to the string offsets table
 // starting at a given offset and record it in a descriptor.
-static Optional<StrOffsetsContributionDescriptor>
+static Expected<StrOffsetsContributionDescriptor>
 parseDWARF64StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
   if (!DA.isValidOffsetForDataOfSize(Offset, 16))
-    return None;
+    return createStringError(errc::invalid_argument, "section offset exceeds section size");
 
   if (DA.getU32(&Offset) != 0xffffffff)
-    return None;
+    return createStringError(errc::invalid_argument, "32 bit contribution referenced from a 64 bit unit");
 
   uint64_t Size = DA.getU64(&Offset);
   uint8_t Version = DA.getU16(&Offset);
   (void)DA.getU16(&Offset); // padding
   // The encoded length includes the 2-byte version field and the 2-byte
   // padding, so we need to subtract them out when we populate the descriptor.
-  return {{Offset, Size - 4, Version, DWARF64}};
+  return StrOffsetsContributionDescriptor(Offset, Size - 4, Version, DWARF64);
 }
 
 // Look for a DWARF32-formatted contribution to the string offsets table
 // starting at a given offset and record it in a descriptor.
-static Optional<StrOffsetsContributionDescriptor>
+static Expected<StrOffsetsContributionDescriptor>
 parseDWARF32StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
   if (!DA.isValidOffsetForDataOfSize(Offset, 8))
-    return None;
+    return createStringError(errc::invalid_argument, "section offset exceeds section size");
+
   uint32_t ContributionSize = DA.getU32(&Offset);
   if (ContributionSize >= 0xfffffff0)
-    return None;
+    return createStringError(errc::invalid_argument, "invalid length");
+
   uint8_t Version = DA.getU16(&Offset);
   (void)DA.getU16(&Offset); // padding
   // The encoded length includes the 2-byte version field and the 2-byte
   // padding, so we need to subtract them out when we populate the descriptor.
-  return {{Offset, ContributionSize - 4, Version, DWARF32}};
+  return StrOffsetsContributionDescriptor(Offset, ContributionSize - 4, Version,
+                                          DWARF32);
+}
+
+static Expected<StrOffsetsContributionDescriptor>
+parseDWARFStringOffsetsTableHeader(DWARFDataExtractor &DA,
+                                   llvm::dwarf::DwarfFormat Format,
+                                   uint64_t Offset) {
+  StrOffsetsContributionDescriptor Desc;
+  switch (Format) {
+  case dwarf::DwarfFormat::DWARF64: {
+    if (Offset < 16)
+      return createStringError(errc::invalid_argument, "insufficient space for 64 bit header prefix");
+    auto DescOrError = parseDWARF64StringOffsetsTableHeader(DA, (uint32_t)Offset - 16);
+    if (!DescOrError)
+      return DescOrError.takeError();
+    Desc = *DescOrError;
+    break;
+  }
+  case dwarf::DwarfFormat::DWARF32: {
+    if (Offset < 8)
+      return createStringError(errc::invalid_argument, "insufficient space for 32 bit header prefix");
+    auto DescOrError = parseDWARF32StringOffsetsTableHeader(DA, (uint32_t)Offset - 8);
+    if (!DescOrError)
+      return DescOrError.takeError();
+    Desc = *DescOrError;
+    break;
+  }
+  }
+  return Desc.validateContributionSize(DA);
 }
 
-Optional<StrOffsetsContributionDescriptor>
+Expected<Optional<StrOffsetsContributionDescriptor>>
 DWARFUnit::determineStringOffsetsTableContribution(DWARFDataExtractor &DA) {
-  auto Offset = toSectionOffset(getUnitDIE().find(DW_AT_str_offsets_base), 0);
-  Optional<StrOffsetsContributionDescriptor> Descriptor;
-  // Attempt to find a DWARF64 contribution 16 bytes before the base.
-  if (Offset >= 16)
-    Descriptor =
-        parseDWARF64StringOffsetsTableHeader(DA, (uint32_t)Offset - 16);
-  // Try to find a DWARF32 contribution 8 bytes before the base.
-  if (!Descriptor && Offset >= 8)
-    Descriptor = parseDWARF32StringOffsetsTableHeader(DA, (uint32_t)Offset - 8);
-  return Descriptor ? Descriptor->validateContributionSize(DA) : Descriptor;
-}
-
-Optional<StrOffsetsContributionDescriptor>
+  uint64_t Offset;
+  if (IsDWO) {
+    Offset = 0;
+    if (DA.getData().data() == nullptr)
+      return None;
+  } else {
+    auto OptOffset = toSectionOffset(getUnitDIE().find(DW_AT_str_offsets_base));
+    if (!OptOffset)
+      return None;
+    Offset = *OptOffset;
+  }
+  auto DescOrError = parseDWARFStringOffsetsTableHeader(DA, Header.getFormat(), Offset);
+  if (!DescOrError)
+    return DescOrError.takeError();
+  return *DescOrError;
+}
+
+Expected<Optional<StrOffsetsContributionDescriptor>>
 DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor & DA) {
   uint64_t Offset = 0;
   auto IndexEntry = Header.getIndexEntry();
@@ -826,19 +870,24 @@ DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor & DA) {
   if (C)
     Offset = C->Offset;
   if (getVersion() >= 5) {
+    if (DA.getData().data() == nullptr)
+      return None;
+    Offset += Header.getFormat() == dwarf::DwarfFormat::DWARF32 ? 8 : 16;
     // Look for a valid contribution at the given offset.
-    auto Descriptor =
-        parseDWARF64StringOffsetsTableHeader(DA, (uint32_t)Offset);
-    if (!Descriptor)
-      Descriptor = parseDWARF32StringOffsetsTableHeader(DA, (uint32_t)Offset);
-    return Descriptor ? Descriptor->validateContributionSize(DA) : Descriptor;
+    auto DescOrError = parseDWARFStringOffsetsTableHeader(DA, Header.getFormat(), Offset);
+    if (!DescOrError)
+      return DescOrError.takeError();
+    return *DescOrError;
   }
   // Prior to DWARF v5, we derive the contribution size from the
   // index table (in a package file). In a .dwo file it is simply
   // the length of the string offsets section.
   if (!IndexEntry)
-    return {{0, StringOffsetSection.Data.size(), 4, DWARF32}};
+    return {
+        Optional<StrOffsetsContributionDescriptor>(
+            {0, StringOffsetSection.Data.size(), 4, DWARF32})};
   if (C)
-    return {{C->Offset, C->Length, 4, DWARF32}};
+    return {Optional<StrOffsetsContributionDescriptor>(
+        {C->Offset, C->Length, 4, DWARF32})};
   return None;
 }
diff --git a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
index 84b6c4b81817..047c63461ccf 100644
--- a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
+++ b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
@@ -1,9 +1,8 @@
 //===- DWARFUnitIndex.cpp -------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -173,10 +172,9 @@ DWARFUnitIndex::getFromOffset(uint32_t Offset) const {
              E2->Contributions[InfoColumn].Offset;
     });
   }
-  auto I =
-      llvm::upper_bound(OffsetLookup, Offset, [&](uint32_t Offset, Entry *E2) {
-        return Offset < E2->Contributions[InfoColumn].Offset;
-      });
+  auto I = partition_point(OffsetLookup, [&](Entry *E2) {
+    return E2->Contributions[InfoColumn].Offset <= Offset;
+  });
   if (I == OffsetLookup.begin())
     return nullptr;
   --I;
diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index f8370178b627..c2b3189514a8 100644
--- a/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -1,9 +1,8 @@
 //===- DWARFVerifier.cpp --------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/DebugInfo/DWARF/DWARFVerifier.h"
@@ -61,55 +60,47 @@ DWARFVerifier::DieRangeInfo::insert(const DieRangeInfo &RI) {
 }
 
 bool DWARFVerifier::DieRangeInfo::contains(const DieRangeInfo &RHS) const {
-  // Both list of ranges are sorted so we can make this fast.
-
-  if (Ranges.empty() || RHS.Ranges.empty())
-    return false;
-
-  // Since the ranges are sorted we can advance where we start searching with
-  // this object's ranges as we traverse RHS.Ranges.
-  auto End = Ranges.end();
-  auto Iter = findRange(RHS.Ranges.front());
+  auto I1 = Ranges.begin(), E1 = Ranges.end();
+  auto I2 = RHS.Ranges.begin(), E2 = RHS.Ranges.end();
+  if (I2 == E2)
+    return true;
 
-  // Now linearly walk the ranges in this object and see if they contain each
-  // ranges from RHS.Ranges.
-  for (const auto &R : RHS.Ranges) {
-    while (Iter != End) {
-      if (Iter->contains(R))
-        break;
-      ++Iter;
+  DWARFAddressRange R = *I2;
+  while (I1 != E1) {
+    bool Covered = I1->LowPC <= R.LowPC;
+    if (R.LowPC == R.HighPC || (Covered && R.HighPC <= I1->HighPC)) {
+      if (++I2 == E2)
+        return true;
+      R = *I2;
+      continue;
     }
-    if (Iter == End)
+    if (!Covered)
       return false;
+    if (R.LowPC < I1->HighPC)
+      R.LowPC = I1->HighPC;
+    ++I1;
   }
-  return true;
+  return false;
 }
 
 bool DWARFVerifier::DieRangeInfo::intersects(const DieRangeInfo &RHS) const {
-  if (Ranges.empty() || RHS.Ranges.empty())
-    return false;
-
-  auto End = Ranges.end();
-  auto Iter = findRange(RHS.Ranges.front());
-  for (const auto &R : RHS.Ranges) {
-    if (Iter == End)
-      return false;
-    if (R.HighPC <= Iter->LowPC)
-      continue;
-    while (Iter != End) {
-      if (Iter->intersects(R))
-        return true;
-      ++Iter;
-    }
+  auto I1 = Ranges.begin(), E1 = Ranges.end();
+  auto I2 = RHS.Ranges.begin(), E2 = RHS.Ranges.end();
+  while (I1 != E1 && I2 != E2) {
+    if (I1->intersects(*I2))
+      return true;
+    if (I1->LowPC < I2->LowPC)
+      ++I1;
+    else
+      ++I2;
   }
-
   return false;
 }
 
 bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
                                      uint32_t *Offset, unsigned UnitIndex,
                                      uint8_t &UnitType, bool &isUnitDWARF64) {
-  uint32_t AbbrOffset, Length;
+  uint64_t AbbrOffset, Length;
   uint8_t AddrSize = 0;
   uint16_t Version;
   bool Success = true;
@@ -123,22 +114,19 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
   uint32_t OffsetStart = *Offset;
   Length = DebugInfoData.getU32(Offset);
   if (Length == UINT32_MAX) {
+    Length = DebugInfoData.getU64(Offset);
     isUnitDWARF64 = true;
-    OS << format(
-        "Unit[%d] is in 64-bit DWARF format; cannot verify from this point.\n",
-        UnitIndex);
-    return false;
   }
   Version = DebugInfoData.getU16(Offset);
 
   if (Version >= 5) {
     UnitType = DebugInfoData.getU8(Offset);
     AddrSize = DebugInfoData.getU8(Offset);
-    AbbrOffset = DebugInfoData.getU32(Offset);
+    AbbrOffset = isUnitDWARF64 ? DebugInfoData.getU64(Offset) : DebugInfoData.getU32(Offset);
     ValidType = dwarf::isUnitType(UnitType);
   } else {
     UnitType = 0;
-    AbbrOffset = DebugInfoData.getU32(Offset);
+    AbbrOffset = isUnitDWARF64 ? DebugInfoData.getU64(Offset) : DebugInfoData.getU32(Offset);
     AddrSize = DebugInfoData.getU8(Offset);
   }
 
@@ -166,7 +154,7 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
     if (!ValidAddrSize)
       note() << "The address size is unsupported.\n";
   }
-  *Offset = OffsetStart + Length + 4;
+  *Offset = OffsetStart + Length + (isUnitDWARF64 ? 12 : 4);
   return Success;
 }
 
@@ -179,21 +167,11 @@ unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit) {
     if (Die.getTag() == DW_TAG_null)
       continue;
 
-    bool HasTypeAttr = false;
     for (auto AttrValue : Die.attributes()) {
       NumUnitErrors += verifyDebugInfoAttribute(Die, AttrValue);
       NumUnitErrors += verifyDebugInfoForm(Die, AttrValue);
-      HasTypeAttr |= (AttrValue.Attr == DW_AT_type);
     }
 
-    if (!HasTypeAttr && (Die.getTag() == DW_TAG_formal_parameter ||
-                         Die.getTag() == DW_TAG_variable ||
-                         Die.getTag() == DW_TAG_array_type)) {
-      error() << "DIE with tag " << TagString(Die.getTag())
-              << " is missing type attribute:\n";
-      dump(Die) << '\n';
-      NumUnitErrors++;
-    }
     NumUnitErrors += verifyDebugInfoCallSite(Die);
   }
 
@@ -281,19 +259,12 @@ bool DWARFVerifier::handleDebugAbbrev() {
   OS << "Verifying .debug_abbrev...\n";
 
   const DWARFObject &DObj = DCtx.getDWARFObj();
-  bool noDebugAbbrev = DObj.getAbbrevSection().empty();
-  bool noDebugAbbrevDWO = DObj.getAbbrevDWOSection().empty();
-
-  if (noDebugAbbrev && noDebugAbbrevDWO) {
-    return true;
-  }
-
   unsigned NumErrors = 0;
-  if (!noDebugAbbrev)
+  if (!DObj.getAbbrevSection().empty())
     NumErrors += verifyAbbrevSection(DCtx.getDebugAbbrev());
-
-  if (!noDebugAbbrevDWO)
+  if (!DObj.getAbbrevDWOSection().empty())
     NumErrors += verifyAbbrevSection(DCtx.getDebugAbbrevDWO());
+
   return NumErrors == 0;
 }
 
@@ -503,7 +474,7 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
       bool Error = llvm::any_of(Expression, [](DWARFExpression::Operation &Op) {
         return Op.isError();
       });
-      if (Error)
+      if (Error || !Expression.verify(U))
         ReportError("DIE contains invalid DWARF expression:");
     };
     if (Optional<ArrayRef<uint8_t>> Expr = AttrValue.Value.getAsBlock()) {
@@ -629,7 +600,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
       dump(Die) << '\n';
       break;
     }
-    // Check that the index is within the bounds of the section. 
+    // Check that the index is within the bounds of the section.
     unsigned ItemSize = DieCU->getDwarfStringOffsetsByteSize();
     // Use a 64-bit type to calculate the offset to guard against overflow.
     uint64_t Offset =
@@ -664,9 +635,9 @@ unsigned DWARFVerifier::verifyDebugInfoReferences() {
   // getting the DIE by offset and emitting an error
   OS << "Verifying .debug_info references...\n";
   unsigned NumErrors = 0;
-  for (auto Pair : ReferenceToDIEOffsets) {
-    auto Die = DCtx.getDIEForOffset(Pair.first);
-    if (Die)
+  for (const std::pair<uint64_t, std::set<uint32_t>> &Pair :
+       ReferenceToDIEOffsets) {
+    if (DCtx.getDIEForOffset(Pair.first))
       continue;
     ++NumErrors;
     error() << "invalid DIE reference " << format("0x%08" PRIx64, Pair.first)
@@ -731,7 +702,6 @@ void DWARFVerifier::verifyDebugLineRows() {
       continue;
 
     // Verify prologue.
-    uint32_t MaxFileIndex = LineTable->Prologue.FileNames.size();
     uint32_t MaxDirIndex = LineTable->Prologue.IncludeDirectories.size();
     uint32_t FileIndex = 1;
     StringMap<uint16_t> FullPathMap;
@@ -773,7 +743,7 @@ void DWARFVerifier::verifyDebugLineRows() {
     uint32_t RowIndex = 0;
     for (const auto &Row : LineTable->Rows) {
       // Verify row address.
-      if (Row.Address < PrevAddress) {
+      if (Row.Address.Address < PrevAddress) {
         ++NumDebugLineErrors;
         error() << ".debug_line["
                 << format("0x%08" PRIx64,
@@ -789,13 +759,16 @@ void DWARFVerifier::verifyDebugLineRows() {
       }
 
       // Verify file index.
-      if (Row.File > MaxFileIndex) {
+      if (!LineTable->hasFileAtIndex(Row.File)) {
         ++NumDebugLineErrors;
+        bool isDWARF5 = LineTable->Prologue.getVersion() >= 5;
         error() << ".debug_line["
                 << format("0x%08" PRIx64,
                           *toSectionOffset(Die.find(DW_AT_stmt_list)))
                 << "][" << RowIndex << "] has invalid file index " << Row.File
-                << " (valid values are [1," << MaxFileIndex << "]):\n";
+                << " (valid values are [" << (isDWARF5 ? "0," : "1,")
+                << LineTable->Prologue.FileNames.size()
+                << (isDWARF5 ? ")" : "]") << "):\n";
         DWARFDebugLine::Row::dumpTableHeader(OS);
         Row.dump(OS);
         OS << '\n';
@@ -803,7 +776,7 @@ void DWARFVerifier::verifyDebugLineRows() {
       if (Row.EndSequence)
         PrevAddress = 0;
       else
-        PrevAddress = Row.Address;
+        PrevAddress = Row.Address.Address;
       ++RowIndex;
     }
   }
diff --git a/lib/DebugInfo/GSYM/FunctionInfo.cpp b/lib/DebugInfo/GSYM/FunctionInfo.cpp
new file mode 100644
index 000000000000..55c36a55b4be
--- /dev/null
+++ b/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -0,0 +1,22 @@
+//===- FunctionInfo.cpp -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
+
+using namespace llvm;
+using namespace gsym;
+
+raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) {
+  OS << '[' << HEX64(FI.Range.Start) << '-' << HEX64(FI.Range.End) << "): "
+     << "Name=" << HEX32(FI.Name) << '\n';
+  for (const auto &Line : FI.Lines)
+    OS << Line << '\n';
+  OS << FI.Inline;
+  return OS;
+}
diff --git a/lib/DebugInfo/GSYM/InlineInfo.cpp b/lib/DebugInfo/GSYM/InlineInfo.cpp
new file mode 100644
index 000000000000..781c1755241d
--- /dev/null
+++ b/lib/DebugInfo/GSYM/InlineInfo.cpp
@@ -0,0 +1,59 @@
+//===- InlineInfo.cpp -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/FileEntry.h"
+#include "llvm/DebugInfo/GSYM/InlineInfo.h"
+#include <algorithm>
+#include <inttypes.h>
+
+using namespace llvm;
+using namespace gsym;
+
+
+raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const InlineInfo &II) {
+  if (!II.isValid())
+    return OS;
+  bool First = true;
+  for (auto Range : II.Ranges) {
+    if (First)
+      First = false;
+    else
+      OS << ' ';
+    OS << Range;
+  }
+  OS << " Name = " << HEX32(II.Name) << ", CallFile = " << II.CallFile
+     << ", CallLine = " << II.CallFile << '\n';
+  for (const auto &Child : II.Children)
+    OS << Child;
+  return OS;
+}
+
+static bool getInlineStackHelper(const InlineInfo &II, uint64_t Addr,
+    std::vector<const InlineInfo *> &InlineStack) {
+  if (II.Ranges.contains(Addr)) {
+    // If this is the top level that represents the concrete function,
+    // there will be no name and we shoud clear the inline stack. Otherwise
+    // we have found an inline call stack that we need to insert.
+    if (II.Name != 0)
+      InlineStack.insert(InlineStack.begin(), &II);
+    for (const auto &Child : II.Children) {
+      if (::getInlineStackHelper(Child, Addr, InlineStack))
+        break;
+    }
+    return !InlineStack.empty();
+  }
+  return false;
+}
+
+llvm::Optional<InlineInfo::InlineArray> InlineInfo::getInlineStack(uint64_t Addr) const {
+  InlineArray Result;
+  if (getInlineStackHelper(*this, Addr, Result))
+    return Result;
+  return llvm::None;
+}
diff --git a/lib/DebugInfo/GSYM/Range.cpp b/lib/DebugInfo/GSYM/Range.cpp
new file mode 100644
index 000000000000..ca61984dacbd
--- /dev/null
+++ b/lib/DebugInfo/GSYM/Range.cpp
@@ -0,0 +1,55 @@
+//===- Range.cpp ------------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/Range.h"
+#include <algorithm>
+#include <inttypes.h>
+
+using namespace llvm;
+using namespace gsym;
+
+
+void AddressRanges::insert(AddressRange Range) {
+  if (Range.size() == 0)
+    return;
+
+  auto It = llvm::upper_bound(Ranges, Range);
+  auto It2 = It;
+  while (It2 != Ranges.end() && It2->Start < Range.End)
+    ++It2;
+  if (It != It2) {
+    Range.End = std::max(Range.End, It2[-1].End);
+    It = Ranges.erase(It, It2);
+  }
+  if (It != Ranges.begin() && Range.Start < It[-1].End)
+    It[-1].End = std::max(It[-1].End, Range.End);
+  else
+    Ranges.insert(It, Range);
+}
+
+bool AddressRanges::contains(uint64_t Addr) const {
+  auto It = std::partition_point(
+      Ranges.begin(), Ranges.end(),
+      [=](const AddressRange &R) { return R.Start <= Addr; });
+  return It != Ranges.begin() && Addr < It[-1].End;
+}
+
+raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRange &R) {
+  return OS << '[' << HEX64(R.Start) << " - " << HEX64(R.End) << ")";
+}
+
+raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRanges &AR) {
+  size_t Size = AR.size();
+  for (size_t I = 0; I < Size; ++I) {
+    if (I)
+      OS << ' ';
+    OS << AR[I];
+  }
+  return OS;
+}
diff --git a/lib/DebugInfo/MSF/MSFBuilder.cpp b/lib/DebugInfo/MSF/MSFBuilder.cpp
index 71609919558a..c6fe764ab7e0 100644
--- a/lib/DebugInfo/MSF/MSFBuilder.cpp
+++ b/lib/DebugInfo/MSF/MSFBuilder.cpp
@@ -1,9 +1,8 @@
 //===- MSFBuilder.cpp -----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/MSF/MSFCommon.cpp b/lib/DebugInfo/MSF/MSFCommon.cpp
index d398304375ac..fb4f0700059c 100644
--- a/lib/DebugInfo/MSF/MSFCommon.cpp
+++ b/lib/DebugInfo/MSF/MSFCommon.cpp
@@ -1,9 +1,8 @@
 //===- MSFCommon.cpp - Common types and functions for MSF files -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/MSF/MSFError.cpp b/lib/DebugInfo/MSF/MSFError.cpp
index bfac6bebba3f..b368b802c564 100644
--- a/lib/DebugInfo/MSF/MSFError.cpp
+++ b/lib/DebugInfo/MSF/MSFError.cpp
@@ -1,9 +1,8 @@
 //===- MSFError.cpp - Error extensions for MSF files ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -14,6 +13,7 @@
 using namespace llvm;
 using namespace llvm::msf;
 
+namespace {
 // FIXME: This class is only here to support the transition to llvm::Error. It
 // will be removed once this transition is complete. Clients should prefer to
 // deal with the Error value directly, rather than converting to error_code.
@@ -39,6 +39,7 @@ public:
     llvm_unreachable("Unrecognized msf_error_code");
   }
 };
+} // namespace
 
 static llvm::ManagedStatic<MSFErrorCategory> MSFCategory;
 const std::error_category &llvm::msf::MSFErrCategory() { return *MSFCategory; }
diff --git a/lib/DebugInfo/MSF/MappedBlockStream.cpp b/lib/DebugInfo/MSF/MappedBlockStream.cpp
index dec28eb30697..df925771f0d9 100644
--- a/lib/DebugInfo/MSF/MappedBlockStream.cpp
+++ b/lib/DebugInfo/MSF/MappedBlockStream.cpp
@@ -1,9 +1,8 @@
 //===- MappedBlockStream.cpp - Reads stream data from an MSF file ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/DIA/DIADataStream.cpp b/lib/DebugInfo/PDB/DIA/DIADataStream.cpp
index 6a10513fad97..8a806f298d0f 100644
--- a/lib/DebugInfo/PDB/DIA/DIADataStream.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIADataStream.cpp
@@ -1,9 +1,8 @@
 //===- DIADataStream.cpp - DIA implementation of IPDBDataStream -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp
index d2451f13e6cb..e4cb4daf94b1 100644
--- a/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp
@@ -1,9 +1,8 @@
 //==- DIAEnumDebugStreams.cpp - DIA Debug Stream Enumerator impl -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumFrameData.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumFrameData.cpp
index f873f3525df5..8a181b448a27 100644
--- a/lib/DebugInfo/PDB/DIA/DIAEnumFrameData.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAEnumFrameData.cpp
@@ -1,9 +1,8 @@
 //==- DIAEnumFrameData.cpp ---------------------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumInjectedSources.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumInjectedSources.cpp
index 6c361b81e33d..7226ab2ba0a0 100644
--- a/lib/DebugInfo/PDB/DIA/DIAEnumInjectedSources.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAEnumInjectedSources.cpp
@@ -1,9 +1,8 @@
 //==- DIAEnumSourceFiles.cpp - DIA Source File Enumerator impl ---*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp
index 0820d9dc7c9f..6f1d7733fb2d 100644
--- a/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp
@@ -1,9 +1,8 @@
 //==- DIAEnumLineNumbers.cpp - DIA Line Number Enumerator impl ---*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumSectionContribs.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumSectionContribs.cpp
index 90c857aa5713..4f9b232a024a 100644
--- a/lib/DebugInfo/PDB/DIA/DIAEnumSectionContribs.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAEnumSectionContribs.cpp
@@ -1,9 +1,8 @@
 //==- DIAEnumSectionContribs.cpp ---------------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp
index 06595e7ec1c8..943e9e1b4d58 100644
--- a/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp
@@ -1,9 +1,8 @@
 //==- DIAEnumSourceFiles.cpp - DIA Source File Enumerator impl ---*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp
index 48bc32767e6c..5153596d52ae 100644
--- a/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp
@@ -1,9 +1,8 @@
 //==- DIAEnumSymbols.cpp - DIA Symbol Enumerator impl ------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumTables.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumTables.cpp
index 6fa096156d48..335b575d6542 100644
--- a/lib/DebugInfo/PDB/DIA/DIAEnumTables.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAEnumTables.cpp
@@ -1,9 +1,8 @@
 //===- DIAEnumTables.cpp - DIA Table Enumerator Impl ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/DIA/DIAFrameData.cpp b/lib/DebugInfo/PDB/DIA/DIAFrameData.cpp
index 533cce7923c0..7975156b1abd 100644
--- a/lib/DebugInfo/PDB/DIA/DIAFrameData.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAFrameData.cpp
@@ -1,9 +1,8 @@
 //===- DIAFrameData.cpp - DIA impl. of IPDBFrameData -------------- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/DIA/DIAInjectedSource.cpp b/lib/DebugInfo/PDB/DIA/DIAInjectedSource.cpp
index 1d642f221d79..032b230b5faa 100644
--- a/lib/DebugInfo/PDB/DIA/DIAInjectedSource.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAInjectedSource.cpp
@@ -1,9 +1,8 @@
 //===- DIAInjectedSource.cpp - DIA impl for IPDBInjectedSource --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -42,11 +41,11 @@ std::string DIAInjectedSource::getVirtualFileName() const {
                           &IDiaInjectedSource::get_virtualFilename);
 }
 
-PDB_SourceCompression DIAInjectedSource::getCompression() const {
+uint32_t DIAInjectedSource::getCompression() const {
   DWORD Compression = 0;
   if (S_OK != SourceFile->get_sourceCompression(&Compression))
     return PDB_SourceCompression::None;
-  return static_cast<PDB_SourceCompression>(Compression);
+  return static_cast<uint32_t>(Compression);
 }
 
 std::string DIAInjectedSource::getCode() const {
diff --git a/lib/DebugInfo/PDB/DIA/DIALineNumber.cpp b/lib/DebugInfo/PDB/DIA/DIALineNumber.cpp
index b19be6b595ab..3af02ea36c7b 100644
--- a/lib/DebugInfo/PDB/DIA/DIALineNumber.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIALineNumber.cpp
@@ -1,9 +1,8 @@
 //===- DIALineNumber.cpp - DIA implementation of IPDBLineNumber -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
index cd4d00a13b18..a8ae076e1d6c 100644
--- a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
@@ -1,9 +1,8 @@
 //===- DIARawSymbol.cpp - DIA implementation of IPDBRawSymbol ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp b/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp
index 8e233ca15161..e2d928f2c4b2 100644
--- a/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp
@@ -1,9 +1,8 @@
 //===- DIASectionContrib.cpp - DIA impl. of IPDBSectionContrib ---- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/DIA/DIASession.cpp b/lib/DebugInfo/PDB/DIA/DIASession.cpp
index bd375e172ac0..4e0b8587c613 100644
--- a/lib/DebugInfo/PDB/DIA/DIASession.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIASession.cpp
@@ -1,9 +1,8 @@
 //===- DIASession.cpp - DIA implementation of IPDBSession -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/DebugInfo/PDB/DIA/DIASession.h"
diff --git a/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp b/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp
index d3e408166a87..21e757c3a060 100644
--- a/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp
@@ -1,9 +1,8 @@
 //===- DIASourceFile.cpp - DIA implementation of IPDBSourceFile -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/DIA/DIATable.cpp b/lib/DebugInfo/PDB/DIA/DIATable.cpp
index 6017081b2cb6..33d74abd740e 100644
--- a/lib/DebugInfo/PDB/DIA/DIATable.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIATable.cpp
@@ -1,9 +1,8 @@
 //===- DIATable.cpp - DIA implementation of IPDBTable -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/GenericError.cpp b/lib/DebugInfo/PDB/GenericError.cpp
index 256952073e88..70dc094c42ec 100644
--- a/lib/DebugInfo/PDB/GenericError.cpp
+++ b/lib/DebugInfo/PDB/GenericError.cpp
@@ -1,9 +1,8 @@
 //===- Error.cpp - system_error extensions for PDB --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -14,6 +13,7 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
+namespace {
 // FIXME: This class is only here to support the transition to llvm::Error. It
 // will be removed once this transition is complete. Clients should prefer to
 // deal with the Error value directly, rather than converting to error_code.
@@ -40,6 +40,7 @@ public:
     llvm_unreachable("Unrecognized generic_error_code");
   }
 };
+} // namespace
 
 static llvm::ManagedStatic<PDBErrorCategory> PDBCategory;
 const std::error_category &llvm::pdb::PDBErrCategory() { return *PDBCategory; }
diff --git a/lib/DebugInfo/PDB/IPDBSourceFile.cpp b/lib/DebugInfo/PDB/IPDBSourceFile.cpp
index 8cb1fbef51f4..113ee04bab95 100644
--- a/lib/DebugInfo/PDB/IPDBSourceFile.cpp
+++ b/lib/DebugInfo/PDB/IPDBSourceFile.cpp
@@ -1,9 +1,8 @@
 //===- IPDBSourceFile.cpp - base interface for a PDB source file ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp
index 931ac7bb81db..5095efcdee3c 100644
--- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp
@@ -1,9 +1,8 @@
 //===- DbiModuleDescriptor.cpp - PDB module information -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
index ab93efc839a9..20b6c6142547 100644
--- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
@@ -1,9 +1,8 @@
 //===- DbiModuleDescriptorBuilder.cpp - PDB Mod Info Creation ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -104,7 +103,6 @@ uint32_t DbiModuleDescriptorBuilder::calculateSerializedLength() const {
 }
 
 void DbiModuleDescriptorBuilder::finalize() {
-  Layout.SC.Imod = Layout.Mod;
   Layout.FileNameOffs = 0; // TODO: Fix this
   Layout.Flags = 0;        // TODO: Fix this
   Layout.C11Bytes = 0;
@@ -117,12 +115,15 @@ void DbiModuleDescriptorBuilder::finalize() {
 
   // This value includes both the signature field as well as the record bytes
   // from the symbol stream.
-  Layout.SymBytes = SymbolByteSize + sizeof(uint32_t);
+  Layout.SymBytes =
+      Layout.ModDiStream == kInvalidStreamIndex ? 0 : getNextSymbolOffset();
 }
 
 Error DbiModuleDescriptorBuilder::finalizeMsfLayout() {
   this->Layout.ModDiStream = kInvalidStreamIndex;
   uint32_t C13Size = calculateC13DebugInfoSize();
+  if (!C13Size && !SymbolByteSize)
+    return Error::success();
   auto ExpectedSN =
       MSF.addStream(calculateDiSymbolStreamSize(SymbolByteSize, C13Size));
   if (!ExpectedSN)
diff --git a/lib/DebugInfo/PDB/Native/DbiModuleList.cpp b/lib/DebugInfo/PDB/Native/DbiModuleList.cpp
index eea70b229c67..5cf014e881cd 100644
--- a/lib/DebugInfo/PDB/Native/DbiModuleList.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiModuleList.cpp
@@ -1,9 +1,8 @@
 //===- DbiModuleList.cpp - PDB module information list --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/DbiStream.cpp b/lib/DebugInfo/PDB/Native/DbiStream.cpp
index 60ac17b655a7..4eb16804171d 100644
--- a/lib/DebugInfo/PDB/Native/DbiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStream.cpp
@@ -1,9 +1,8 @@
 //===- DbiStream.cpp - PDB Dbi Stream (Stream 3) Access -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -127,8 +126,10 @@ Error DbiStream::reload(PDBFile *Pdb) {
     return EC;
   if (auto EC = initializeSectionMapData())
     return EC;
-  if (auto EC = initializeFpoRecords(Pdb))
+  if (auto EC = initializeOldFpoRecords(Pdb))
     return EC;
+  if (auto EC = initializeNewFpoRecords(Pdb))
+     return EC;
 
   if (Reader.bytesRemaining() > 0)
     return make_error<RawError>(raw_error_code::corrupt_file,
@@ -201,8 +202,16 @@ FixedStreamArray<object::coff_section> DbiStream::getSectionHeaders() const {
   return SectionHeaders;
 }
 
-FixedStreamArray<object::FpoData> DbiStream::getFpoRecords() {
-  return FpoRecords;
+bool DbiStream::hasOldFpoRecords() const { return OldFpoStream != nullptr; }
+
+FixedStreamArray<object::FpoData> DbiStream::getOldFpoRecords() const {
+  return OldFpoRecords;
+}
+
+bool DbiStream::hasNewFpoRecords() const { return NewFpoStream != nullptr; }
+
+const DebugFrameDataSubsectionRef &DbiStream::getNewFpoRecords() const {
+  return NewFpoRecords;
 }
 
 const DbiModuleList &DbiStream::modules() const { return Modules; }
@@ -247,22 +256,15 @@ Error DbiStream::initializeSectionContributionData() {
 
 // Initializes this->SectionHeaders.
 Error DbiStream::initializeSectionHeadersData(PDBFile *Pdb) {
-  if (!Pdb)
-    return Error::success();
-
-  if (DbgStreams.size() == 0)
-    return Error::success();
+  Expected<std::unique_ptr<msf::MappedBlockStream>> ExpectedStream =
+      createIndexedStreamForHeaderType(Pdb, DbgHeaderType::SectionHdr);
+  if (auto EC = ExpectedStream.takeError())
+    return EC;
 
-  uint32_t StreamNum = getDebugStreamIndex(DbgHeaderType::SectionHdr);
-  if (StreamNum == kInvalidStreamIndex)
+  auto &SHS = *ExpectedStream;
+  if (!SHS)
     return Error::success();
 
-  if (StreamNum >= Pdb->getNumStreams())
-    return make_error<RawError>(raw_error_code::no_stream);
-
-  auto SHS = MappedBlockStream::createIndexedStream(
-      Pdb->getMsfLayout(), Pdb->getMsfBuffer(), StreamNum, Pdb->getAllocator());
-
   size_t StreamLen = SHS->getLength();
   if (StreamLen % sizeof(object::coff_section))
     return make_error<RawError>(raw_error_code::corrupt_file,
@@ -279,39 +281,65 @@ Error DbiStream::initializeSectionHeadersData(PDBFile *Pdb) {
 }
 
 // Initializes this->Fpos.
-Error DbiStream::initializeFpoRecords(PDBFile *Pdb) {
-  if (!Pdb)
-    return Error::success();
-
-  if (DbgStreams.size() == 0)
-    return Error::success();
-
-  uint32_t StreamNum = getDebugStreamIndex(DbgHeaderType::NewFPO);
+Error DbiStream::initializeOldFpoRecords(PDBFile *Pdb) {
+  Expected<std::unique_ptr<msf::MappedBlockStream>> ExpectedStream =
+      createIndexedStreamForHeaderType(Pdb, DbgHeaderType::FPO);
+  if (auto EC = ExpectedStream.takeError())
+    return EC;
 
-  // This means there is no FPO data.
-  if (StreamNum == kInvalidStreamIndex)
+  auto &FS = *ExpectedStream;
+  if (!FS)
     return Error::success();
 
-  if (StreamNum >= Pdb->getNumStreams())
-    return make_error<RawError>(raw_error_code::no_stream);
-
-  auto FS = MappedBlockStream::createIndexedStream(
-      Pdb->getMsfLayout(), Pdb->getMsfBuffer(), StreamNum, Pdb->getAllocator());
-
   size_t StreamLen = FS->getLength();
   if (StreamLen % sizeof(object::FpoData))
     return make_error<RawError>(raw_error_code::corrupt_file,
-                                "Corrupted New FPO stream.");
+                                "Corrupted Old FPO stream.");
 
   size_t NumRecords = StreamLen / sizeof(object::FpoData);
   BinaryStreamReader Reader(*FS);
-  if (auto EC = Reader.readArray(FpoRecords, NumRecords))
+  if (auto EC = Reader.readArray(OldFpoRecords, NumRecords))
     return make_error<RawError>(raw_error_code::corrupt_file,
-                                "Corrupted New FPO stream.");
-  FpoStream = std::move(FS);
+                                "Corrupted Old FPO stream.");
+  OldFpoStream = std::move(FS);
   return Error::success();
 }
 
+Error DbiStream::initializeNewFpoRecords(PDBFile *Pdb) {
+  Expected<std::unique_ptr<msf::MappedBlockStream>> ExpectedStream =
+      createIndexedStreamForHeaderType(Pdb, DbgHeaderType::NewFPO);
+  if (auto EC = ExpectedStream.takeError())
+    return EC;
+
+  auto &FS = *ExpectedStream;
+  if (!FS)
+    return Error::success();
+
+  if (auto EC = NewFpoRecords.initialize(*FS))
+    return EC;
+
+  NewFpoStream = std::move(FS);
+  return Error::success();
+}
+
+Expected<std::unique_ptr<msf::MappedBlockStream>>
+DbiStream::createIndexedStreamForHeaderType(PDBFile *Pdb,
+                                            DbgHeaderType Type) const {
+  if (!Pdb)
+    return nullptr;
+
+  if (DbgStreams.empty())
+    return nullptr;
+
+  uint32_t StreamNum = getDebugStreamIndex(Type);
+
+  // This means there is no such stream.
+  if (StreamNum == kInvalidStreamIndex)
+    return nullptr;
+
+  return Pdb->safelyCreateIndexedStream(StreamNum);
+}
+
 BinarySubstreamRef DbiStream::getSectionContributionData() const {
   return SecContrSubstream;
 }
diff --git a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index 094216ea800a..b7ade0072ee5 100644
--- a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -1,9 +1,8 @@
 //===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/EnumTables.cpp b/lib/DebugInfo/PDB/Native/EnumTables.cpp
index b3837dc72e5b..f5125393695b 100644
--- a/lib/DebugInfo/PDB/Native/EnumTables.cpp
+++ b/lib/DebugInfo/PDB/Native/EnumTables.cpp
@@ -1,9 +1,8 @@
 //===- EnumTables.cpp - Enum to string conversion tables --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
index 57da7003da2b..8ed5b8b44c59 100644
--- a/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
@@ -1,9 +1,8 @@
 //===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -31,14 +30,14 @@ using namespace llvm::pdb;
 using namespace llvm::codeview;
 
 struct llvm::pdb::GSIHashStreamBuilder {
-  struct UdtDenseMapInfo {
+  struct SymbolDenseMapInfo {
     static inline CVSymbol getEmptyKey() {
       static CVSymbol Empty;
       return Empty;
     }
     static inline CVSymbol getTombstoneKey() {
-      static CVSymbol Tombstone(static_cast<SymbolKind>(-1),
-                                ArrayRef<uint8_t>());
+      static CVSymbol Tombstone(
+          DenseMapInfo<ArrayRef<uint8_t>>::getTombstoneKey());
       return Tombstone;
     }
     static unsigned getHashValue(const CVSymbol &Val) {
@@ -51,7 +50,7 @@ struct llvm::pdb::GSIHashStreamBuilder {
 
   std::vector<CVSymbol> Records;
   uint32_t StreamIndex;
-  llvm::DenseSet<CVSymbol, UdtDenseMapInfo> UdtHashes;
+  llvm::DenseSet<CVSymbol, SymbolDenseMapInfo> SymbolHashes;
   std::vector<PSHashRecord> HashRecords;
   std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap;
   std::vector<support::ulittle32_t> HashBuckets;
@@ -67,8 +66,8 @@ struct llvm::pdb::GSIHashStreamBuilder {
                                                CodeViewContainer::Pdb));
   }
   void addSymbol(const CVSymbol &Symbol) {
-    if (Symbol.kind() == S_UDT) {
-      auto Iter = UdtHashes.insert(Symbol);
+    if (Symbol.kind() == S_UDT || Symbol.kind() == S_CONSTANT) {
+      auto Iter = SymbolHashes.insert(Symbol);
       if (!Iter.second)
         return;
     }
@@ -263,8 +262,7 @@ static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) {
     SymOffsets.push_back(SymOffset);
     SymOffset += Sym.length();
   }
-  std::stable_sort(PublicsByAddr.begin(), PublicsByAddr.end(),
-                   comparePubSymByAddrAndName);
+  llvm::stable_sort(PublicsByAddr, comparePubSymByAddrAndName);
 
   // Fill in the symbol offsets in the appropriate order.
   std::vector<ulittle32_t> AddrMap;
diff --git a/lib/DebugInfo/PDB/Native/GlobalsStream.cpp b/lib/DebugInfo/PDB/Native/GlobalsStream.cpp
index e36319566821..f27d60f46815 100644
--- a/lib/DebugInfo/PDB/Native/GlobalsStream.cpp
+++ b/lib/DebugInfo/PDB/Native/GlobalsStream.cpp
@@ -1,9 +1,8 @@
 //===- GlobalsStream.cpp - PDB Index of Symbols by Name ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/DebugInfo/PDB/Native/Hash.cpp b/lib/DebugInfo/PDB/Native/Hash.cpp
index 61188ece2dcb..b5c139ecbec0 100644
--- a/lib/DebugInfo/PDB/Native/Hash.cpp
+++ b/lib/DebugInfo/PDB/Native/Hash.cpp
@@ -1,9 +1,8 @@
 //===- Hash.cpp - PDB Hash Functions --------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/HashTable.cpp b/lib/DebugInfo/PDB/Native/HashTable.cpp
index cfabc9cd1ad8..dfdcdf1f4eaf 100644
--- a/lib/DebugInfo/PDB/Native/HashTable.cpp
+++ b/lib/DebugInfo/PDB/Native/HashTable.cpp
@@ -1,9 +1,8 @@
 //===- HashTable.cpp - PDB Hash Table -------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/InfoStream.cpp b/lib/DebugInfo/PDB/Native/InfoStream.cpp
index 973a520ffca9..f41bb32d69af 100644
--- a/lib/DebugInfo/PDB/Native/InfoStream.cpp
+++ b/lib/DebugInfo/PDB/Native/InfoStream.cpp
@@ -1,9 +1,8 @@
 //===- InfoStream.cpp - PDB Info Stream (Stream 1) Access -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
index 3b5a2accdba6..42daa7cae799 100644
--- a/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
@@ -1,9 +1,8 @@
 //===- InfoStreamBuilder.cpp - PDB Info Stream Creation ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp b/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp
new file mode 100644
index 000000000000..3f4101db7b93
--- /dev/null
+++ b/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp
@@ -0,0 +1,65 @@
+//===- InjectedSourceStream.cpp - PDB Headerblock Stream Access -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"
+
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/Native/Hash.h"
+#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
+#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace llvm::msf;
+using namespace llvm::support;
+using namespace llvm::pdb;
+
+InjectedSourceStream::InjectedSourceStream(
+    std::unique_ptr<MappedBlockStream> Stream)
+    : Stream(std::move(Stream)) {}
+
+Error InjectedSourceStream::reload(const PDBStringTable &Strings) {
+  BinaryStreamReader Reader(*Stream);
+
+  if (auto EC = Reader.readObject(Header))
+    return EC;
+
+  if (Header->Version !=
+      static_cast<uint32_t>(PdbRaw_SrcHeaderBlockVer::SrcVerOne))
+    return make_error<RawError>(raw_error_code::corrupt_file,
+                                "Invalid headerblock header version");
+
+  if (auto EC = InjectedSourceTable.load(Reader))
+    return EC;
+
+  for (const auto& Entry : *this) {
+    if (Entry.second.Size != sizeof(SrcHeaderBlockEntry))
+      return make_error<RawError>(raw_error_code::corrupt_file,
+                                  "Invalid headerbock entry size");
+    if (Entry.second.Version !=
+        static_cast<uint32_t>(PdbRaw_SrcHeaderBlockVer::SrcVerOne))
+      return make_error<RawError>(raw_error_code::corrupt_file,
+                                  "Invalid headerbock entry version");
+
+    // Check that all name references are valid.
+    auto Name = Strings.getStringForID(Entry.second.FileNI);
+    if (!Name)
+      return Name.takeError();
+    auto ObjName = Strings.getStringForID(Entry.second.ObjNI);
+    if (!ObjName)
+      return ObjName.takeError();
+    auto VName = Strings.getStringForID(Entry.second.VFileNI);
+    if (!VName)
+      return VName.takeError();
+  }
+
+  assert(Reader.bytesRemaining() == 0);
+  return Error::success();
+}
diff --git a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
index 8c97f4a012f0..1445f0bd9e1b 100644
--- a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
+++ b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
@@ -1,9 +1,8 @@
 //===- ModuleDebugStream.cpp - PDB Module Info Stream Access --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,6 +14,7 @@
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecordHelpers.h"
 #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
+#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
 #include "llvm/Support/BinaryStreamReader.h"
 #include "llvm/Support/BinaryStreamRef.h"
@@ -37,6 +37,17 @@ ModuleDebugStreamRef::~ModuleDebugStreamRef() = default;
 Error ModuleDebugStreamRef::reload() {
   BinaryStreamReader Reader(*Stream);
 
+  if (Mod.getModuleStreamIndex() != llvm::pdb::kInvalidStreamIndex) {
+    if (Error E = reloadSerialize(Reader))
+      return E;
+  }
+  if (Reader.bytesRemaining() > 0)
+    return make_error<RawError>(raw_error_code::corrupt_file,
+                                "Unexpected bytes in module stream.");
+  return Error::success();
+}
+
+Error ModuleDebugStreamRef::reloadSerialize(BinaryStreamReader &Reader) {
   uint32_t SymbolSize = Mod.getSymbolDebugInfoByteSize();
   uint32_t C11Size = Mod.getC11LineInfoByteSize();
   uint32_t C13Size = Mod.getC13LineInfoByteSize();
@@ -72,10 +83,6 @@ Error ModuleDebugStreamRef::reload() {
     return EC;
   if (auto EC = Reader.readSubstream(GlobalRefsSubstream, GlobalRefsSize))
     return EC;
-  if (Reader.bytesRemaining() > 0)
-    return make_error<RawError>(raw_error_code::corrupt_file,
-                                "Unexpected bytes in module stream.");
-
   return Error::success();
 }
 
diff --git a/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp b/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
index a4eaed90837d..4a88391494cd 100644
--- a/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
+++ b/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
@@ -1,9 +1,8 @@
 //===- NamedStreamMap.cpp - PDB Named Stream Map --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -35,6 +34,7 @@ uint16_t NamedStreamMapTraits::hashLookupKey(StringRef S) const {
   // Here, the type HASH is a typedef of unsigned short.
   // ** It is not a bug that we truncate the result of hashStringV1, in fact
   //    it is a bug if we do not! **
+  // See NMTNI::hash() in the reference implementation.
   return static_cast<uint16_t>(hashStringV1(S));
 }
 
@@ -46,8 +46,7 @@ uint32_t NamedStreamMapTraits::lookupKeyToStorageKey(StringRef S) {
   return NS->appendStringData(S);
 }
 
-NamedStreamMap::NamedStreamMap()
-    : HashTraits(*this), OffsetIndexMap(1, HashTraits) {}
+NamedStreamMap::NamedStreamMap() : HashTraits(*this), OffsetIndexMap(1) {}
 
 Error NamedStreamMap::load(BinaryStreamReader &Stream) {
   uint32_t StringBufferSize;
@@ -99,7 +98,7 @@ uint32_t NamedStreamMap::hashString(uint32_t Offset) const {
 }
 
 bool NamedStreamMap::get(StringRef Stream, uint32_t &StreamNo) const {
-  auto Iter = OffsetIndexMap.find_as(Stream);
+  auto Iter = OffsetIndexMap.find_as(Stream, HashTraits);
   if (Iter == OffsetIndexMap.end())
     return false;
   StreamNo = (*Iter).second;
@@ -123,5 +122,5 @@ uint32_t NamedStreamMap::appendStringData(StringRef S) {
 }
 
 void NamedStreamMap::set(StringRef Stream, uint32_t StreamNo) {
-  OffsetIndexMap.set_as(Stream, support::ulittle32_t(StreamNo));
+  OffsetIndexMap.set_as(Stream, support::ulittle32_t(StreamNo), HashTraits);
 }
diff --git a/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
index efa70b0e7bd8..39ae84acba20 100644
--- a/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
@@ -1,9 +1,8 @@
 //===- NativeCompilandSymbol.cpp - Native impl for compilands ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp b/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp
index 6eece3df2db3..54646867bc5f 100644
--- a/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp
@@ -1,9 +1,8 @@
 //==- NativeEnumGlobals.cpp - Native Global Enumerator impl ------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp b/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
new file mode 100644
index 000000000000..f17ff5bb01f2
--- /dev/null
+++ b/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
@@ -0,0 +1,120 @@
+//==- NativeEnumInjectedSources.cpp - Native Injected Source Enumerator --*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h"
+
+#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
+
+namespace llvm {
+namespace pdb {
+
+namespace {
+
+Expected<std::string> readStreamData(BinaryStream &Stream, uint32_t Limit) {
+  uint32_t Offset = 0, DataLength = std::min(Limit, Stream.getLength());
+  std::string Result;
+  Result.reserve(DataLength);
+  while (Offset < DataLength) {
+    ArrayRef<uint8_t> Data;
+    if (auto E = Stream.readLongestContiguousChunk(Offset, Data))
+      return std::move(E);
+    Data = Data.take_front(DataLength - Offset);
+    Offset += Data.size();
+    Result += toStringRef(Data);
+  }
+  return Result;
+}
+
+class NativeInjectedSource final : public IPDBInjectedSource {
+  const SrcHeaderBlockEntry &Entry;
+  const PDBStringTable &Strings;
+  PDBFile &File;
+
+public:
+  NativeInjectedSource(const SrcHeaderBlockEntry &Entry,
+                       PDBFile &File, const PDBStringTable &Strings)
+      : Entry(Entry), Strings(Strings), File(File) {}
+
+  uint32_t getCrc32() const override { return Entry.CRC; }
+  uint64_t getCodeByteSize() const override { return Entry.FileSize; }
+
+  std::string getFileName() const override {
+    auto Name = Strings.getStringForID(Entry.FileNI);
+    assert(Name && "InjectedSourceStream should have rejected this");
+    return *Name;
+  }
+
+  std::string getObjectFileName() const override {
+    auto ObjName = Strings.getStringForID(Entry.ObjNI);
+    assert(ObjName && "InjectedSourceStream should have rejected this");
+    return *ObjName;
+  }
+
+  std::string getVirtualFileName() const override {
+    auto VName = Strings.getStringForID(Entry.VFileNI);
+    assert(VName && "InjectedSourceStream should have rejected this");
+    return *VName;
+  }
+
+  uint32_t getCompression() const override { return Entry.Compression; }
+
+  std::string getCode() const override {
+    // Get name of stream storing the data.
+    auto VName = Strings.getStringForID(Entry.VFileNI);
+    assert(VName && "InjectedSourceStream should have rejected this");
+    std::string StreamName = ("/src/files/" + *VName).str();
+
+    // Find stream with that name and read its data.
+    // FIXME: Consider validating (or even loading) all this in
+    // InjectedSourceStream so that no error can happen here.
+    auto ExpectedFileStream = File.safelyCreateNamedStream(StreamName);
+    if (!ExpectedFileStream) {
+      consumeError(ExpectedFileStream.takeError());
+      return "(failed to open data stream)";
+    }
+
+    auto Data = readStreamData(**ExpectedFileStream, Entry.FileSize);
+    if (!Data) {
+      consumeError(Data.takeError());
+      return "(failed to read data)";
+    }
+    return *Data;
+  }
+};
+
+} // namespace
+
+NativeEnumInjectedSources::NativeEnumInjectedSources(
+    PDBFile &File, const InjectedSourceStream &IJS,
+    const PDBStringTable &Strings)
+    : File(File), Stream(IJS), Strings(Strings), Cur(Stream.begin()) {}
+
+uint32_t NativeEnumInjectedSources::getChildCount() const {
+  return static_cast<uint32_t>(Stream.size());
+}
+
+std::unique_ptr<IPDBInjectedSource>
+NativeEnumInjectedSources::getChildAtIndex(uint32_t N) const {
+  if (N >= getChildCount())
+    return nullptr;
+  return make_unique<NativeInjectedSource>(std::next(Stream.begin(), N)->second,
+                                           File, Strings);
+}
+
+std::unique_ptr<IPDBInjectedSource> NativeEnumInjectedSources::getNext() {
+  if (Cur == Stream.end())
+    return nullptr;
+  return make_unique<NativeInjectedSource>((Cur++)->second, File, Strings);
+}
+
+void NativeEnumInjectedSources::reset() { Cur = Stream.begin(); }
+
+}
+}
diff --git a/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp b/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
index 6e4d56443a07..c6621924b516 100644
--- a/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
@@ -1,9 +1,8 @@
 //==- NativeEnumModules.cpp - Native Symbol Enumerator impl ------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp b/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp
index 288a9128147a..ac217df1ee48 100644
--- a/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp
@@ -1,9 +1,8 @@
 //==- NativeEnumTypes.cpp - Native Type Enumerator impl ----------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
index 6dde5d08a500..3f393409129b 100644
--- a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
@@ -1,9 +1,8 @@
 //===- NativeExeSymbol.cpp - native impl for PDBSymbolExe -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
index 62950cb3e52a..8e43cf24495a 100644
--- a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
@@ -1,9 +1,8 @@
 //===- NativeRawSymbol.cpp - Native implementation of IPDBRawSymbol -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/NativeSession.cpp b/lib/DebugInfo/PDB/Native/NativeSession.cpp
index 7807e312365c..8a49cb1c5963 100644
--- a/lib/DebugInfo/PDB/Native/NativeSession.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeSession.cpp
@@ -1,9 +1,8 @@
 //===- NativeSession.cpp - Native implementation of IPDBSession -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -14,6 +13,7 @@
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
 #include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
 #include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h"
 #include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
 #include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h"
 #include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
@@ -192,7 +192,17 @@ std::unique_ptr<IPDBEnumTables> NativeSession::getEnumTables() const {
 
 std::unique_ptr<IPDBEnumInjectedSources>
 NativeSession::getInjectedSources() const {
-  return nullptr;
+  auto ISS = Pdb->getInjectedSourceStream();
+  if (!ISS) {
+    consumeError(ISS.takeError());
+    return nullptr;
+  }
+  auto Strings = Pdb->getStringTable();
+  if (!Strings) {
+    consumeError(Strings.takeError());
+    return nullptr;
+  }
+  return make_unique<NativeEnumInjectedSources>(*Pdb, *ISS, *Strings);
 }
 
 std::unique_ptr<IPDBEnumSectionContribs>
diff --git a/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp b/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp
index 6ebb8cae3a65..704c1254afbf 100644
--- a/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp
@@ -1,9 +1,8 @@
 //===- NativeSymbolEnumerator.cpp - info about enumerators ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp b/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp
index a52561728a98..80d455ad66e9 100644
--- a/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp
@@ -1,9 +1,8 @@
 //===- NativeTypeArray.cpp - info about arrays ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp b/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp
index 7b0f13f3c075..a08663aa91ba 100644
--- a/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp
@@ -1,9 +1,8 @@
 //===- NativeTypeBuiltin.cpp -------------------------------------- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp b/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp
index 37176fe083b9..9f5e86281a23 100644
--- a/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp
@@ -1,9 +1,8 @@
 //===- NativeTypeEnum.cpp - info about enum type ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp b/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp
index a9590fffdb87..405303469c18 100644
--- a/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp
@@ -1,9 +1,8 @@
 //===- NativeTypeFunctionSig.cpp - info about function signature -*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp b/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp
index bd8ecb6c4007..32dcfc235954 100644
--- a/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp
@@ -1,9 +1,8 @@
 //===- NativeTypePointer.cpp - info about pointer type ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp b/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp
index 3abf91dcc6a3..be67846c0b24 100644
--- a/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp
@@ -1,9 +1,8 @@
 //===- NativeTypeUDT.cpp - info about class/struct type ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/PDBFile.cpp b/lib/DebugInfo/PDB/Native/PDBFile.cpp
index a1f8786ff12f..983031dfcb78 100644
--- a/lib/DebugInfo/PDB/Native/PDBFile.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBFile.cpp
@@ -1,9 +1,8 @@
 //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,6 +14,7 @@
 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"
 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
@@ -234,7 +234,8 @@ ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
   return ContainerLayout.DirectoryBlocks;
 }
 
-std::unique_ptr<MappedBlockStream> PDBFile::createIndexedStream(uint16_t SN) {
+std::unique_ptr<MappedBlockStream>
+PDBFile::createIndexedStream(uint16_t SN) const {
   if (SN == kInvalidStreamIndex)
     return nullptr;
   return MappedBlockStream::createIndexedStream(ContainerLayout, *Buffer, SN,
@@ -259,8 +260,8 @@ Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
     if (!DbiS)
       return DbiS.takeError();
 
-    auto GlobalS = safelyCreateIndexedStream(
-        ContainerLayout, *Buffer, DbiS->getGlobalSymbolStreamIndex());
+    auto GlobalS =
+        safelyCreateIndexedStream(DbiS->getGlobalSymbolStreamIndex());
     if (!GlobalS)
       return GlobalS.takeError();
     auto TempGlobals = llvm::make_unique<GlobalsStream>(std::move(*GlobalS));
@@ -273,7 +274,7 @@ Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
 
 Expected<InfoStream &> PDBFile::getPDBInfoStream() {
   if (!Info) {
-    auto InfoS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamPDB);
+    auto InfoS = safelyCreateIndexedStream(StreamPDB);
     if (!InfoS)
       return InfoS.takeError();
     auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS));
@@ -286,7 +287,7 @@ Expected<InfoStream &> PDBFile::getPDBInfoStream() {
 
 Expected<DbiStream &> PDBFile::getPDBDbiStream() {
   if (!Dbi) {
-    auto DbiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamDBI);
+    auto DbiS = safelyCreateIndexedStream(StreamDBI);
     if (!DbiS)
       return DbiS.takeError();
     auto TempDbi = llvm::make_unique<DbiStream>(std::move(*DbiS));
@@ -299,7 +300,7 @@ Expected<DbiStream &> PDBFile::getPDBDbiStream() {
 
 Expected<TpiStream &> PDBFile::getPDBTpiStream() {
   if (!Tpi) {
-    auto TpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamTPI);
+    auto TpiS = safelyCreateIndexedStream(StreamTPI);
     if (!TpiS)
       return TpiS.takeError();
     auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS));
@@ -315,7 +316,7 @@ Expected<TpiStream &> PDBFile::getPDBIpiStream() {
     if (!hasPDBIpiStream())
       return make_error<RawError>(raw_error_code::no_stream);
 
-    auto IpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamIPI);
+    auto IpiS = safelyCreateIndexedStream(StreamIPI);
     if (!IpiS)
       return IpiS.takeError();
     auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS));
@@ -332,8 +333,8 @@ Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
     if (!DbiS)
       return DbiS.takeError();
 
-    auto PublicS = safelyCreateIndexedStream(
-        ContainerLayout, *Buffer, DbiS->getPublicSymbolStreamIndex());
+    auto PublicS =
+        safelyCreateIndexedStream(DbiS->getPublicSymbolStreamIndex());
     if (!PublicS)
       return PublicS.takeError();
     auto TempPublics = llvm::make_unique<PublicsStream>(std::move(*PublicS));
@@ -351,8 +352,7 @@ Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
       return DbiS.takeError();
 
     uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
-    auto SymbolS =
-        safelyCreateIndexedStream(ContainerLayout, *Buffer, SymbolStreamNum);
+    auto SymbolS = safelyCreateIndexedStream(SymbolStreamNum);
     if (!SymbolS)
       return SymbolS.takeError();
 
@@ -366,17 +366,7 @@ Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
 
 Expected<PDBStringTable &> PDBFile::getStringTable() {
   if (!Strings) {
-    auto IS = getPDBInfoStream();
-    if (!IS)
-      return IS.takeError();
-
-    Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
-    if (!ExpectedNSI)
-      return ExpectedNSI.takeError();
-    uint32_t NameStreamIndex = *ExpectedNSI;
-
-    auto NS =
-        safelyCreateIndexedStream(ContainerLayout, *Buffer, NameStreamIndex);
+    auto NS = safelyCreateNamedStream("/names");
     if (!NS)
       return NS.takeError();
 
@@ -391,6 +381,24 @@ Expected<PDBStringTable &> PDBFile::getStringTable() {
   return *Strings;
 }
 
+Expected<InjectedSourceStream &> PDBFile::getInjectedSourceStream() {
+  if (!InjectedSources) {
+    auto IJS = safelyCreateNamedStream("/src/headerblock");
+    if (!IJS)
+      return IJS.takeError();
+
+    auto Strings = getStringTable();
+    if (!Strings)
+      return Strings.takeError();
+
+    auto IJ = llvm::make_unique<InjectedSourceStream>(std::move(*IJS));
+    if (auto EC = IJ->reload(*Strings))
+      return std::move(EC);
+    InjectedSources = std::move(IJ);
+  }
+  return *InjectedSources;
+}
+
 uint32_t PDBFile::getPointerSize() {
   auto DbiS = getPDBDbiStream();
   if (!DbiS)
@@ -459,16 +467,41 @@ bool PDBFile::hasPDBStringTable() {
   return true;
 }
 
+bool PDBFile::hasPDBInjectedSourceStream() {
+  auto IS = getPDBInfoStream();
+  if (!IS)
+    return false;
+  Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/src/headerblock");
+  if (!ExpectedNSI) {
+    consumeError(ExpectedNSI.takeError());
+    return false;
+  }
+  assert(*ExpectedNSI < getNumStreams());
+  return true;
+}
+
 /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
 /// stream with that index actually exists.  If it does not, the return value
 /// will have an MSFError with code msf_error_code::no_stream.  Else, the return
 /// value will contain the stream returned by createIndexedStream().
 Expected<std::unique_ptr<MappedBlockStream>>
-PDBFile::safelyCreateIndexedStream(const MSFLayout &Layout,
-                                   BinaryStreamRef MsfData,
-                                   uint32_t StreamIndex) const {
+PDBFile::safelyCreateIndexedStream(uint32_t StreamIndex) const {
   if (StreamIndex >= getNumStreams())
+    // This rejects kInvalidStreamIndex with an error as well.
     return make_error<RawError>(raw_error_code::no_stream);
-  return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex,
-                                                Allocator);
+  return createIndexedStream(StreamIndex);
+}
+
+Expected<std::unique_ptr<MappedBlockStream>>
+PDBFile::safelyCreateNamedStream(StringRef Name) {
+  auto IS = getPDBInfoStream();
+  if (!IS)
+    return IS.takeError();
+
+  Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name);
+  if (!ExpectedNSI)
+    return ExpectedNSI.takeError();
+  uint32_t NameStreamIndex = *ExpectedNSI;
+
+  return safelyCreateIndexedStream(NameStreamIndex);
 }
diff --git a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
index e0ceb7499ee5..8f5a048ea4b5 100644
--- a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
@@ -1,9 +1,8 @@
 //===- PDBFileBuilder.cpp - PDB File Creation -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -35,7 +34,7 @@ using namespace llvm::support;
 
 PDBFileBuilder::PDBFileBuilder(BumpPtrAllocator &Allocator)
     : Allocator(Allocator), InjectedSourceHashTraits(Strings),
-      InjectedSourceTable(2, InjectedSourceHashTraits) {}
+      InjectedSourceTable(2) {}
 
 PDBFileBuilder::~PDBFileBuilder() {}
 
@@ -190,7 +189,8 @@ Error PDBFileBuilder::finalizeMsfLayout() {
           static_cast<uint32_t>(PdbRaw_SrcHeaderBlockVer::SrcVerOne);
       Entry.CRC = CRC.getCRC();
       StringRef VName = getStringTableBuilder().getStringForId(IS.VNameIndex);
-      InjectedSourceTable.set_as(VName, std::move(Entry));
+      InjectedSourceTable.set_as(VName, std::move(Entry),
+                                 InjectedSourceHashTraits);
     }
 
     uint32_t SrcHeaderBlockSize =
diff --git a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
index afeea32043dd..2be1656e06bb 100644
--- a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
@@ -1,9 +1,8 @@
 //===- PDBStringTable.cpp - PDB String Table ---------------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
index d9dcabf3d958..f7f36901e4d4 100644
--- a/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
@@ -1,9 +1,8 @@
 //===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -27,7 +26,13 @@ StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table)
     : Table(&Table) {}
 
 uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const {
-  return Table->getIdForString(S);
+  // The reference implementation doesn't include code for /src/headerblock
+  // handling, but it can only read natvis entries lld's PDB files if
+  // this hash function truncates the hash to 16 bit.
+  // PDB/include/misc.h in the reference implementation has a hashSz() function
+  // that returns an unsigned short, that seems what's being used for
+  // /src/headerblock.
+  return static_cast<uint16_t>(Table->getIdForString(S));
 }
 
 StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const {
@@ -50,63 +55,75 @@ StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const {
   return Strings.getStringForId(Id);
 }
 
-// This is a precomputed list of Buckets given the specified number of
-// strings.  Matching the reference algorithm exactly is not strictly
-// necessary for correctness, but it helps when comparing LLD's PDBs with
-// Microsoft's PDBs so as to eliminate superfluous differences.
-static std::map<uint32_t, uint32_t> StringsToBuckets = {
-    {1, 2},
-    {2, 4},
-    {4, 7},
-    {6, 11},
-    {9, 17},
-    {13, 26},
-    {20, 40},
-    {31, 61},
-    {46, 92},
-    {70, 139},
-    {105, 209},
-    {157, 314},
-    {236, 472},
-    {355, 709},
-    {532, 1064},
-    {799, 1597},
-    {1198, 2396},
-    {1798, 3595},
-    {2697, 5393},
-    {4045, 8090},
-    {6068, 12136},
-    {9103, 18205},
-    {13654, 27308},
-    {20482, 40963},
-    {30723, 61445},
-    {46084, 92168},
-    {69127, 138253},
-    {103690, 207380},
-    {155536, 311071},
-    {233304, 466607},
-    {349956, 699911},
-    {524934, 1049867},
-    {787401, 1574801},
-    {1181101, 2362202},
-    {1771652, 3543304},
-    {2657479, 5314957},
-    {3986218, 7972436},
-    {5979328, 11958655},
-    {8968992, 17937983},
-    {13453488, 26906975},
-    {20180232, 40360463},
-    {30270348, 60540695},
-    {45405522, 90811043},
-    {68108283, 136216565},
-    {102162424, 204324848},
-    {153243637, 306487273},
-    {229865455, 459730910},
-    {344798183, 689596366},
-    {517197275, 1034394550},
-    {775795913, 1551591826}};
-
 static uint32_t computeBucketCount(uint32_t NumStrings) {
+  // This is a precomputed list of Buckets given the specified number of
+  // strings.  Matching the reference algorithm exactly is not strictly
+  // necessary for correctness, but it helps when comparing LLD's PDBs with
+  // Microsoft's PDBs so as to eliminate superfluous differences.
+  // The reference implementation does (in nmt.h, NMT::grow()):
+  //   unsigned StringCount = 0;
+  //   unsigned BucketCount = 1;
+  //   fn insert() {
+  //     ++StringCount;
+  //     if (BucketCount * 3 / 4 < StringCount)
+  //       BucketCount = BucketCount * 3 / 2 + 1;
+  //   }
+  // This list contains all StringCount, BucketCount pairs where BucketCount was
+  // just incremented.  It ends before the first BucketCount entry where
+  // BucketCount * 3 would overflow a 32-bit unsigned int.
+  static std::map<uint32_t, uint32_t> StringsToBuckets = {
+      {0, 1},
+      {1, 2},
+      {2, 4},
+      {4, 7},
+      {6, 11},
+      {9, 17},
+      {13, 26},
+      {20, 40},
+      {31, 61},
+      {46, 92},
+      {70, 139},
+      {105, 209},
+      {157, 314},
+      {236, 472},
+      {355, 709},
+      {532, 1064},
+      {799, 1597},
+      {1198, 2396},
+      {1798, 3595},
+      {2697, 5393},
+      {4045, 8090},
+      {6068, 12136},
+      {9103, 18205},
+      {13654, 27308},
+      {20482, 40963},
+      {30723, 61445},
+      {46084, 92168},
+      {69127, 138253},
+      {103690, 207380},
+      {155536, 311071},
+      {233304, 466607},
+      {349956, 699911},
+      {524934, 1049867},
+      {787401, 1574801},
+      {1181101, 2362202},
+      {1771652, 3543304},
+      {2657479, 5314957},
+      {3986218, 7972436},
+      {5979328, 11958655},
+      {8968992, 17937983},
+      {13453488, 26906975},
+      {20180232, 40360463},
+      {30270348, 60540695},
+      {45405522, 90811043},
+      {68108283, 136216565},
+      {102162424, 204324848},
+      {153243637, 306487273},
+      {229865455, 459730910},
+      {344798183, 689596366},
+      {517197275, 1034394550},
+      {775795913, 1551591826},
+      {1163693870, 2327387740}};
   auto Entry = StringsToBuckets.lower_bound(NumStrings);
   assert(Entry != StringsToBuckets.end());
   return Entry->second;
diff --git a/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/lib/DebugInfo/PDB/Native/PublicsStream.cpp
index f6466eb80464..a33bf03bf8fb 100644
--- a/lib/DebugInfo/PDB/Native/PublicsStream.cpp
+++ b/lib/DebugInfo/PDB/Native/PublicsStream.cpp
@@ -1,9 +1,8 @@
 //===- PublicsStream.cpp - PDB Public Symbol Stream -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/DebugInfo/PDB/Native/RawError.cpp b/lib/DebugInfo/PDB/Native/RawError.cpp
index dec9797088f2..ed6cf0839675 100644
--- a/lib/DebugInfo/PDB/Native/RawError.cpp
+++ b/lib/DebugInfo/PDB/Native/RawError.cpp
@@ -5,6 +5,7 @@
 using namespace llvm;
 using namespace llvm::pdb;
 
+namespace {
 // FIXME: This class is only here to support the transition to llvm::Error. It
 // will be removed once this transition is complete. Clients should prefer to
 // deal with the Error value directly, rather than converting to error_code.
@@ -44,6 +45,7 @@ public:
     llvm_unreachable("Unrecognized raw_error_code");
   }
 };
+} // namespace
 
 static llvm::ManagedStatic<RawErrorCategory> RawCategory;
 const std::error_category &llvm::pdb::RawErrCategory() { return *RawCategory; }
diff --git a/lib/DebugInfo/PDB/Native/SymbolStream.cpp b/lib/DebugInfo/PDB/Native/SymbolStream.cpp
index 2d8d04ceca4d..003840b6e67e 100644
--- a/lib/DebugInfo/PDB/Native/SymbolStream.cpp
+++ b/lib/DebugInfo/PDB/Native/SymbolStream.cpp
@@ -1,9 +1,8 @@
 //===- SymbolStream.cpp - PDB Symbol Stream Access ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/TpiHashing.cpp b/lib/DebugInfo/PDB/Native/TpiHashing.cpp
index 18708826ffc7..b21b82bf76fd 100644
--- a/lib/DebugInfo/PDB/Native/TpiHashing.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiHashing.cpp
@@ -1,9 +1,8 @@
 //===- TpiHashing.cpp -----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/Native/TpiStream.cpp b/lib/DebugInfo/PDB/Native/TpiStream.cpp
index f234d446e6a0..8ee7f897b8bb 100644
--- a/lib/DebugInfo/PDB/Native/TpiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiStream.cpp
@@ -1,9 +1,8 @@
 //===- TpiStream.cpp - PDB Type Info (TPI) Stream 2 Access ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -79,14 +78,13 @@ Error TpiStream::reload() {
 
   // Hash indices, hash values, etc come from the hash stream.
   if (Header->HashStreamIndex != kInvalidStreamIndex) {
-    if (Header->HashStreamIndex >= Pdb.getNumStreams())
+    auto HS = Pdb.safelyCreateIndexedStream(Header->HashStreamIndex);
+    if (!HS) {
+      consumeError(HS.takeError());
       return make_error<RawError>(raw_error_code::corrupt_file,
                                   "Invalid TPI hash stream index.");
-
-    auto HS = MappedBlockStream::createIndexedStream(
-        Pdb.getMsfLayout(), Pdb.getMsfBuffer(), Header->HashStreamIndex,
-        Pdb.getAllocator());
-    BinaryStreamReader HSR(*HS);
+    }
+    BinaryStreamReader HSR(**HS);
 
     // There should be a hash value for every type record, or no hashes at all.
     uint32_t NumHashValues =
@@ -111,7 +109,7 @@ Error TpiStream::reload() {
         return EC;
     }
 
-    HashStream = std::move(HS);
+    HashStream = std::move(*HS);
   }
 
   Types = llvm::make_unique<LazyRandomTypeCollection>(
diff --git a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
index 8dd30018028e..6b308453c2de 100644
--- a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
@@ -1,9 +1,8 @@
 //===- TpiStreamBuilder.cpp -   -------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -77,7 +76,7 @@ Error TpiStreamBuilder::finalize() {
   H->HashStreamIndex = HashStreamIndex;
   H->HashAuxStreamIndex = kInvalidStreamIndex;
   H->HashKeySize = sizeof(ulittle32_t);
-  H->NumHashBuckets = MinTpiHashBuckets;
+  H->NumHashBuckets = MaxTpiHashBuckets - 1;
 
   // Recall that hash values go into a completely different stream identified by
   // the `HashStreamIndex` field of the `TpiStreamHeader`.  Therefore, the data
@@ -130,7 +129,7 @@ Error TpiStreamBuilder::finalizeMsfLayout() {
     ulittle32_t *H = Allocator.Allocate<ulittle32_t>(TypeHashes.size());
     MutableArrayRef<ulittle32_t> HashBuffer(H, TypeHashes.size());
     for (uint32_t I = 0; I < TypeHashes.size(); ++I) {
-      HashBuffer[I] = TypeHashes[I] % MinTpiHashBuckets;
+      HashBuffer[I] = TypeHashes[I] % (MaxTpiHashBuckets - 1);
     }
     ArrayRef<uint8_t> Bytes(
         reinterpret_cast<const uint8_t *>(HashBuffer.data()),
@@ -153,9 +152,12 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout,
   if (auto EC = Writer.writeObject(*Header))
     return EC;
 
-  for (auto Rec : TypeRecords)
+  for (auto Rec : TypeRecords) {
+    assert(!Rec.empty()); // An empty record will not write anything, but it
+                          // would shift all offsets from here on.
     if (auto EC = Writer.writeBytes(Rec))
       return EC;
+  }
 
   if (HashStreamIndex != kInvalidStreamIndex) {
     auto HVS = WritableMappedBlockStream::createIndexedStream(
diff --git a/lib/DebugInfo/PDB/PDB.cpp b/lib/DebugInfo/PDB/PDB.cpp
index fc1ad8bcd7cd..e7b968cb7bea 100644
--- a/lib/DebugInfo/PDB/PDB.cpp
+++ b/lib/DebugInfo/PDB/PDB.cpp
@@ -1,9 +1,8 @@
 //===- PDB.cpp - base header file for creating a PDB reader ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBContext.cpp b/lib/DebugInfo/PDB/PDBContext.cpp
index df0feac2bc40..e452f1d4ced7 100644
--- a/lib/DebugInfo/PDB/PDBContext.cpp
+++ b/lib/DebugInfo/PDB/PDBContext.cpp
@@ -1,9 +1,8 @@
 //===-- PDBContext.cpp ------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===/
 
@@ -31,14 +30,14 @@ PDBContext::PDBContext(const COFFObjectFile &Object,
 
 void PDBContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts){}
 
-DILineInfo PDBContext::getLineInfoForAddress(uint64_t Address,
+DILineInfo PDBContext::getLineInfoForAddress(object::SectionedAddress Address,
                                              DILineInfoSpecifier Specifier) {
   DILineInfo Result;
-  Result.FunctionName = getFunctionName(Address, Specifier.FNKind);
+  Result.FunctionName = getFunctionName(Address.Address, Specifier.FNKind);
 
   uint32_t Length = 1;
   std::unique_ptr<PDBSymbol> Symbol =
-      Session->findSymbolByAddress(Address, PDB_SymType::None);
+      Session->findSymbolByAddress(Address.Address, PDB_SymType::None);
   if (auto Func = dyn_cast_or_null<PDBSymbolFunc>(Symbol.get())) {
     Length = Func->getLength();
   } else if (auto Data = dyn_cast_or_null<PDBSymbolData>(Symbol.get())) {
@@ -47,7 +46,7 @@ DILineInfo PDBContext::getLineInfoForAddress(uint64_t Address,
 
   // If we couldn't find a symbol, then just assume 1 byte, so that we get
   // only the line number of the first instruction.
-  auto LineNumbers = Session->findLineNumbersByAddress(Address, Length);
+  auto LineNumbers = Session->findLineNumbersByAddress(Address.Address, Length);
   if (!LineNumbers || LineNumbers->getChildCount() == 0)
     return Result;
 
@@ -64,26 +63,27 @@ DILineInfo PDBContext::getLineInfoForAddress(uint64_t Address,
 }
 
 DILineInfoTable
-PDBContext::getLineInfoForAddressRange(uint64_t Address, uint64_t Size,
+PDBContext::getLineInfoForAddressRange(object::SectionedAddress Address,
+                                       uint64_t Size,
                                        DILineInfoSpecifier Specifier) {
   if (Size == 0)
     return DILineInfoTable();
 
   DILineInfoTable Table;
-  auto LineNumbers = Session->findLineNumbersByAddress(Address, Size);
+  auto LineNumbers = Session->findLineNumbersByAddress(Address.Address, Size);
   if (!LineNumbers || LineNumbers->getChildCount() == 0)
     return Table;
 
   while (auto LineInfo = LineNumbers->getNext()) {
-    DILineInfo LineEntry =
-        getLineInfoForAddress(LineInfo->getVirtualAddress(), Specifier);
+    DILineInfo LineEntry = getLineInfoForAddress(
+        {LineInfo->getVirtualAddress(), Address.SectionIndex}, Specifier);
     Table.push_back(std::make_pair(LineInfo->getVirtualAddress(), LineEntry));
   }
   return Table;
 }
 
 DIInliningInfo
-PDBContext::getInliningInfoForAddress(uint64_t Address,
+PDBContext::getInliningInfoForAddress(object::SectionedAddress Address,
                                       DILineInfoSpecifier Specifier) {
   DIInliningInfo InlineInfo;
   DILineInfo Frame = getLineInfoForAddress(Address, Specifier);
@@ -91,6 +91,11 @@ PDBContext::getInliningInfoForAddress(uint64_t Address,
   return InlineInfo;
 }
 
+std::vector<DILocal>
+PDBContext::getLocalsForAddress(object::SectionedAddress Address) {
+  return std::vector<DILocal>();
+}
+
 std::string PDBContext::getFunctionName(uint64_t Address,
                                         DINameKind NameKind) const {
   if (NameKind == DINameKind::None)
diff --git a/lib/DebugInfo/PDB/PDBExtras.cpp b/lib/DebugInfo/PDB/PDBExtras.cpp
index 0d8af232cd92..354a99476c4b 100644
--- a/lib/DebugInfo/PDB/PDBExtras.cpp
+++ b/lib/DebugInfo/PDB/PDBExtras.cpp
@@ -1,9 +1,8 @@
 //===- PDBExtras.cpp - helper functions and classes for PDBs --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -118,13 +117,37 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_DataKind &Data) {
 }
 
 raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
-                                   const codeview::RegisterId &Reg) {
-  switch (Reg) {
-#define CV_REGISTER(name, val) case codeview::RegisterId::name: OS << #name; return OS;
+                                   const llvm::codeview::CPURegister &CpuReg) {
+  if (CpuReg.Cpu == llvm::codeview::CPUType::ARM64) {
+    switch (CpuReg.Reg) {
+#define CV_REGISTERS_ARM64
+#define CV_REGISTER(name, val)                                                 \
+  case codeview::RegisterId::name:                                             \
+    OS << #name;                                                               \
+    return OS;
+#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
+#undef CV_REGISTER
+#undef CV_REGISTERS_ARM64
+
+    default:
+      break;
+    }
+  } else {
+    switch (CpuReg.Reg) {
+#define CV_REGISTERS_X86
+#define CV_REGISTER(name, val)                                                 \
+  case codeview::RegisterId::name:                                             \
+    OS << #name;                                                               \
+    return OS;
 #include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
 #undef CV_REGISTER
+#undef CV_REGISTERS_X86
+
+    default:
+      break;
+    }
   }
-  OS << static_cast<int>(Reg);
+  OS << static_cast<int>(CpuReg.Reg);
   return OS;
 }
 
@@ -193,6 +216,7 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_Lang &Lang) {
     CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, MSIL, OS)
     CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, HLSL, OS)
     CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, D, OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, Swift, OS)
   }
   return OS;
 }
@@ -296,14 +320,17 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
   return OS;
 }
 
-raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
-                                   const PDB_SourceCompression &Compression) {
+raw_ostream &llvm::pdb::dumpPDBSourceCompression(raw_ostream &OS,
+                                                 uint32_t Compression) {
   switch (Compression) {
     CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SourceCompression, None, OS)
     CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SourceCompression, Huffman, OS)
     CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SourceCompression, LZ, OS)
     CASE_OUTPUT_ENUM_CLASS_STR(PDB_SourceCompression, RunLengthEncoded, "RLE",
                                OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SourceCompression, DotNet, OS)
+  default:
+    OS << "Unknown (" << Compression << ")";
   }
   return OS;
 }
diff --git a/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp b/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp
index 951909295d13..8eb3311b09e3 100644
--- a/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp
+++ b/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp
@@ -1,9 +1,8 @@
 //===- PDBInterfaceAnchors.h - defines class anchor funcions ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Class anchors are necessary per the LLVM Coding style guide, to ensure that
diff --git a/lib/DebugInfo/PDB/PDBSymDumper.cpp b/lib/DebugInfo/PDB/PDBSymDumper.cpp
index 2f819312e54e..0956a32f4a49 100644
--- a/lib/DebugInfo/PDB/PDBSymDumper.cpp
+++ b/lib/DebugInfo/PDB/PDBSymDumper.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymDumper.cpp - ---------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbol.cpp b/lib/DebugInfo/PDB/PDBSymbol.cpp
index d492edafdafe..34c8ac41d45b 100644
--- a/lib/DebugInfo/PDB/PDBSymbol.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbol.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbol.cpp - base class for user-facing symbol types --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp b/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
index cb1a9bee8024..0fa83efb7ae0 100644
--- a/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolAnnotation.cpp - --------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolBlock.cpp b/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
index 13eec9734d02..9452282a8817 100644
--- a/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolBlock.cpp - -------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp b/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
index bbc5e6dd2a17..9b2883546305 100644
--- a/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolCompiland.cpp - compiland details ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -91,16 +90,16 @@ std::string PDBSymbolCompiland::getSourceFileFullPath() const {
   PDB_Lang Lang = Details ? Details->getLanguage() : PDB_Lang::Cpp;
   auto SrcFiles = Session.getSourceFilesForCompiland(*this);
   if (SrcFiles) {
-    bool LangC = (Lang == PDB_Lang::Cpp || Lang == PDB_Lang::C);
     while (auto File = SrcFiles->getNext()) {
       std::string FileName = File->getFileName();
       auto file_extension = sys::path::extension(FileName);
       if (StringSwitch<bool>(file_extension.lower())
-              .Case(".cpp", LangC)
-              .Case(".c", LangC)
-              .Case(".cc", LangC)
-              .Case(".cxx", LangC)
+              .Case(".cpp", Lang == PDB_Lang::Cpp)
+              .Case(".cc", Lang == PDB_Lang::Cpp)
+              .Case(".cxx", Lang == PDB_Lang::Cpp)
+              .Case(".c", Lang == PDB_Lang::C)
               .Case(".asm", Lang == PDB_Lang::Masm)
+              .Case(".swift", Lang == PDB_Lang::Swift)
               .Default(false))
         return File->getFileName();
     }
diff --git a/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp b/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
index bdd8535a3ef3..0d86dfe1e632 100644
--- a/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolCompilandDetails.cpp - compiland details --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp b/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
index f88df2df6be4..61f119405fd9 100644
--- a/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolCompilandEnv.cpp - compiland env variables ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolCustom.cpp b/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
index 10a21806adb6..6c9a4aa76c3d 100644
--- a/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolCustom.cpp - compiler-specific types ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolData.cpp b/lib/DebugInfo/PDB/PDBSymbolData.cpp
index 7de94670bcb3..d2b82111ccd5 100644
--- a/lib/DebugInfo/PDB/PDBSymbolData.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolData.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolData.cpp - PDB data (e.g. variable) accessors ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolExe.cpp b/lib/DebugInfo/PDB/PDBSymbolExe.cpp
index eb409412af59..c85756c43e47 100644
--- a/lib/DebugInfo/PDB/PDBSymbolExe.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolExe.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolExe.cpp - ---------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
index 75063cb3e7f8..7c3ba981fd6b 100644
--- a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolFunc.cpp - --------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp b/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
index af8aafa7be96..66433dc17b49 100644
--- a/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolFuncDebugEnd.cpp - ------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp b/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
index 77b510873bea..fe32c93c0121 100644
--- a/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolFuncDebugStart.cpp - ----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolLabel.cpp b/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
index c802b97925e6..1fffe69a0c83 100644
--- a/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolLabel.cpp - -------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp b/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
index a2dd2ab92dd9..08697683f641 100644
--- a/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolPublicSymbol.cpp - ------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolThunk.cpp b/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
index d227e3a7a60c..6483858183e5 100644
--- a/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolThunk.cpp - -------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
index a2064d1ac1eb..a0d521abe43f 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeArray.cpp - ---------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
index f0376c05557f..08467059b5e1 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeBaseClass.cpp - -----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
index a9f59e5f9d4d..a0dd9ef601c0 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeBuiltin.cpp - ------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
index cfb347fbac55..6723894c90ea 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeCustom.cpp - --------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
index 4eb48997635a..4a25a391f278 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
@@ -1,10 +1,9 @@
 //===- PDBSymbolTypeDimension.cpp - --------------------------------*- C++
 //-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
index 2e88d9eb284a..b9fdf6aec811 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeEnum.cpp - --------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
index 00d2d51aa8a7..4ffea42cbb0a 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeFriend.cpp - --------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
index 0399e110d592..683e93548fb1 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeFunctionArg.cpp - --------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
index c0564d3941dd..292320a6fe6d 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeFunctionSig.cpp - --------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
index 1faaf9c67a2c..e80e6c716572 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymboTypelManaged.cpp - ------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp b/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
index cf5a369116a9..462fc315359b 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypePointer.cpp -----------------------------------*- C++ -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
index 1838f1612b49..70749d9bf5f5 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeTypedef.cpp ---------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
index 2f5222f34fe4..d302c29a3bec 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeUDT.cpp - --------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
index 0262f91e8336..4e2a45116d51 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeVTable.cpp - --------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
index 16c3a3606981..78957620e083 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolTypeVTableShape.cpp - ---------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp b/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
index 7bcf9457a2b6..650d01183171 100644
--- a/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolUnknown.cpp - -----------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp b/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
index ecf2126f8802..74afbdb18086 100644
--- a/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
@@ -1,9 +1,8 @@
 //===- PDBSymbolUsingNamespace.cpp - ------------------- --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/PDB/UDTLayout.cpp b/lib/DebugInfo/PDB/UDTLayout.cpp
index 5f4390bbaf12..acb1599480b0 100644
--- a/lib/DebugInfo/PDB/UDTLayout.cpp
+++ b/lib/DebugInfo/PDB/UDTLayout.cpp
@@ -1,9 +1,8 @@
 //===- UDTLayout.cpp ------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/DebugInfo/Symbolize/DIPrinter.cpp b/lib/DebugInfo/Symbolize/DIPrinter.cpp
index c1e2536d6e20..b2bfef251485 100644
--- a/lib/DebugInfo/Symbolize/DIPrinter.cpp
+++ b/lib/DebugInfo/Symbolize/DIPrinter.cpp
@@ -1,9 +1,8 @@
 //===- lib/DebugInfo/Symbolize/DIPrinter.cpp ------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,6 +18,7 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/LineIterator.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cmath>
@@ -78,8 +78,13 @@ void DIPrinter::print(const DILineInfo &Info, bool Inlined) {
   std::string Filename = Info.FileName;
   if (Filename == kDILineInfoBadString)
     Filename = kBadString;
+  else if (Basenames)
+    Filename = llvm::sys::path::filename(Filename);
   if (!Verbose) {
-    OS << Filename << ":" << Info.Line << ":" << Info.Column << "\n";
+    OS << Filename << ":" << Info.Line;
+    if (Style == OutputStyle::LLVM)
+      OS << ":" << Info.Column;
+    OS << "\n";
     printContext(Filename, Info.Line);
     return;
   }
@@ -117,5 +122,28 @@ DIPrinter &DIPrinter::operator<<(const DIGlobal &Global) {
   return *this;
 }
 
+DIPrinter &DIPrinter::operator<<(const DILocal &Local) {
+  OS << Local.FunctionName << '\n';
+  OS << Local.Name << '\n';
+  if (Local.DeclFile.empty())
+    OS << "??";
+  else
+    OS << Local.DeclFile;
+  OS << ':' << Local.DeclLine << '\n';
+  if (Local.FrameOffset)
+    OS << *Local.FrameOffset << ' ';
+  else
+    OS << "?? ";
+  if (Local.Size)
+    OS << *Local.Size << ' ';
+  else
+    OS << "?? ";
+  if (Local.TagOffset)
+    OS << *Local.TagOffset << '\n';
+  else
+    OS << "??\n";
+  return *this;
+}
+
 } // end namespace symbolize
 } // end namespace llvm
diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
index 08be524ab464..2765bf44d504 100644
--- a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
+++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
@@ -1,9 +1,8 @@
 //===- SymbolizableObjectFile.cpp -----------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -43,8 +42,9 @@ getDILineInfoSpecifier(FunctionNameKind FNKind) {
 }
 
 ErrorOr<std::unique_ptr<SymbolizableObjectFile>>
-SymbolizableObjectFile::create(object::ObjectFile *Obj,
+SymbolizableObjectFile::create(const object::ObjectFile *Obj,
                                std::unique_ptr<DIContext> DICtx) {
+  assert(DICtx);
   std::unique_ptr<SymbolizableObjectFile> res(
       new SymbolizableObjectFile(Obj, std::move(DICtx)));
   std::unique_ptr<DataExtractor> OpdExtractor;
@@ -54,13 +54,13 @@ SymbolizableObjectFile::create(object::ObjectFile *Obj,
   if (Obj->getArch() == Triple::ppc64) {
     for (section_iterator Section : Obj->sections()) {
       StringRef Name;
-      StringRef Data;
       if (auto EC = Section->getName(Name))
         return EC;
       if (Name == ".opd") {
-        if (auto EC = Section->getContents(Data))
-          return EC;
-        OpdExtractor.reset(new DataExtractor(Data, Obj->isLittleEndian(),
+        Expected<StringRef> E = Section->getContents();
+        if (!E)
+          return errorToErrorCode(E.takeError());
+        OpdExtractor.reset(new DataExtractor(*E, Obj->isLittleEndian(),
                                              Obj->getBytesInAddress()));
         OpdAddress = Section->getAddress();
         break;
@@ -79,10 +79,30 @@ SymbolizableObjectFile::create(object::ObjectFile *Obj,
       if (auto EC = res->addCoffExportSymbols(CoffObj))
         return EC;
   }
+
+  std::vector<std::pair<SymbolDesc, StringRef>> &Fs = res->Functions,
+                                                &Os = res->Objects;
+  auto Uniquify = [](std::vector<std::pair<SymbolDesc, StringRef>> &S) {
+    // Sort by (Addr,Size,Name). If several SymbolDescs share the same Addr,
+    // pick the one with the largest Size. This helps us avoid symbols with no
+    // size information (Size=0).
+    llvm::sort(S);
+    auto I = S.begin(), E = S.end(), J = S.begin();
+    while (I != E) {
+      auto OI = I;
+      while (++I != E && OI->first.Addr == I->first.Addr) {
+      }
+      *J++ = I[-1];
+    }
+    S.erase(J, S.end());
+  };
+  Uniquify(Fs);
+  Uniquify(Os);
+
   return std::move(res);
 }
 
-SymbolizableObjectFile::SymbolizableObjectFile(ObjectFile *Obj,
+SymbolizableObjectFile::SymbolizableObjectFile(const ObjectFile *Obj,
                                                std::unique_ptr<DIContext> DICtx)
     : Module(Obj), DebugInfoContext(std::move(DICtx)) {}
 
@@ -128,7 +148,7 @@ std::error_code SymbolizableObjectFile::addCoffExportSymbols(
     uint64_t SymbolStart = ImageBase + Export.Offset;
     uint64_t SymbolSize = NextOffset - Export.Offset;
     SymbolDesc SD = {SymbolStart, SymbolSize};
-    Functions.insert(std::make_pair(SD, Export.Name));
+    Functions.emplace_back(SD, Export.Name);
   }
   return std::error_code();
 }
@@ -137,6 +157,11 @@ std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
                                                   uint64_t SymbolSize,
                                                   DataExtractor *OpdExtractor,
                                                   uint64_t OpdAddress) {
+  // Avoid adding symbols from an unknown/undefined section.
+  const ObjectFile *Obj = Symbol.getObject();
+  Expected<section_iterator> Sec = Symbol.getSection();
+  if (!Sec || (Obj && Obj->section_end() == *Sec))
+    return std::error_code();
   Expected<SymbolRef::Type> SymbolTypeOrErr = Symbol.getType();
   if (!SymbolTypeOrErr)
     return errorToErrorCode(SymbolTypeOrErr.takeError());
@@ -170,7 +195,7 @@ std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
   // with same address size. Make sure we choose the correct one.
   auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects;
   SymbolDesc SD = { SymbolAddress, SymbolSize };
-  M.insert(std::make_pair(SD, SymbolName));
+  M.emplace_back(SD, SymbolName);
   return std::error_code();
 }
 
@@ -191,12 +216,10 @@ bool SymbolizableObjectFile::getNameFromSymbolTable(SymbolRef::Type Type,
                                                     std::string &Name,
                                                     uint64_t &Addr,
                                                     uint64_t &Size) const {
-  const auto &SymbolMap = Type == SymbolRef::ST_Function ? Functions : Objects;
-  if (SymbolMap.empty())
-    return false;
-  SymbolDesc SD = { Address, Address };
-  auto SymbolIterator = SymbolMap.upper_bound(SD);
-  if (SymbolIterator == SymbolMap.begin())
+  const auto &Symbols = Type == SymbolRef::ST_Function ? Functions : Objects;
+  std::pair<SymbolDesc, StringRef> SD{{Address, UINT64_C(-1)}, StringRef()};
+  auto SymbolIterator = llvm::upper_bound(Symbols, SD);
+  if (SymbolIterator == Symbols.begin())
     return false;
   --SymbolIterator;
   if (SymbolIterator->first.Size != 0 &&
@@ -218,19 +241,21 @@ bool SymbolizableObjectFile::shouldOverrideWithSymbolTable(
          isa<DWARFContext>(DebugInfoContext.get());
 }
 
-DILineInfo SymbolizableObjectFile::symbolizeCode(uint64_t ModuleOffset,
-                                                 FunctionNameKind FNKind,
-                                                 bool UseSymbolTable) const {
-  DILineInfo LineInfo;
-  if (DebugInfoContext) {
-    LineInfo = DebugInfoContext->getLineInfoForAddress(
-        ModuleOffset, getDILineInfoSpecifier(FNKind));
-  }
+DILineInfo
+SymbolizableObjectFile::symbolizeCode(object::SectionedAddress ModuleOffset,
+                                      FunctionNameKind FNKind,
+                                      bool UseSymbolTable) const {
+  if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
+    ModuleOffset.SectionIndex =
+        getModuleSectionIndexForAddress(ModuleOffset.Address);
+  DILineInfo LineInfo = DebugInfoContext->getLineInfoForAddress(
+      ModuleOffset, getDILineInfoSpecifier(FNKind));
+
   // Override function name from symbol table if necessary.
   if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
     std::string FunctionName;
     uint64_t Start, Size;
-    if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
+    if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
                                FunctionName, Start, Size)) {
       LineInfo.FunctionName = FunctionName;
     }
@@ -239,12 +264,14 @@ DILineInfo SymbolizableObjectFile::symbolizeCode(uint64_t ModuleOffset,
 }
 
 DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
-    uint64_t ModuleOffset, FunctionNameKind FNKind, bool UseSymbolTable) const {
-  DIInliningInfo InlinedContext;
+    object::SectionedAddress ModuleOffset, FunctionNameKind FNKind,
+    bool UseSymbolTable) const {
+  if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
+    ModuleOffset.SectionIndex =
+        getModuleSectionIndexForAddress(ModuleOffset.Address);
+  DIInliningInfo InlinedContext = DebugInfoContext->getInliningInfoForAddress(
+      ModuleOffset, getDILineInfoSpecifier(FNKind));
 
-  if (DebugInfoContext)
-    InlinedContext = DebugInfoContext->getInliningInfoForAddress(
-        ModuleOffset, getDILineInfoSpecifier(FNKind));
   // Make sure there is at least one frame in context.
   if (InlinedContext.getNumberOfFrames() == 0)
     InlinedContext.addFrame(DILineInfo());
@@ -253,7 +280,7 @@ DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
   if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
     std::string FunctionName;
     uint64_t Start, Size;
-    if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
+    if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
                                FunctionName, Start, Size)) {
       InlinedContext.getMutableFrame(InlinedContext.getNumberOfFrames() - 1)
           ->FunctionName = FunctionName;
@@ -263,9 +290,34 @@ DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
   return InlinedContext;
 }
 
-DIGlobal SymbolizableObjectFile::symbolizeData(uint64_t ModuleOffset) const {
+DIGlobal SymbolizableObjectFile::symbolizeData(
+    object::SectionedAddress ModuleOffset) const {
   DIGlobal Res;
-  getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Res.Name, Res.Start,
-                         Res.Size);
+  getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset.Address, Res.Name,
+                         Res.Start, Res.Size);
   return Res;
 }
+
+std::vector<DILocal> SymbolizableObjectFile::symbolizeFrame(
+    object::SectionedAddress ModuleOffset) const {
+  if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
+    ModuleOffset.SectionIndex =
+        getModuleSectionIndexForAddress(ModuleOffset.Address);
+  return DebugInfoContext->getLocalsForAddress(ModuleOffset);
+}
+
+/// Search for the first occurence of specified Address in ObjectFile.
+uint64_t SymbolizableObjectFile::getModuleSectionIndexForAddress(
+    uint64_t Address) const {
+
+  for (SectionRef Sec : Module->sections()) {
+    if (!Sec.isText() || Sec.isVirtual())
+      continue;
+
+    if (Address >= Sec.getAddress() &&
+        Address < Sec.getAddress() + Sec.getSize())
+      return Sec.getIndex();
+  }
+
+  return object::SectionedAddress::UndefSection;
+}
diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
index 216cca8de4f5..9cab94178c1b 100644
--- a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
+++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
@@ -1,9 +1,8 @@
 //===- SymbolizableObjectFile.h ---------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -32,14 +31,17 @@ namespace symbolize {
 class SymbolizableObjectFile : public SymbolizableModule {
 public:
   static ErrorOr<std::unique_ptr<SymbolizableObjectFile>>
-  create(object::ObjectFile *Obj, std::unique_ptr<DIContext> DICtx);
+  create(const object::ObjectFile *Obj, std::unique_ptr<DIContext> DICtx);
 
-  DILineInfo symbolizeCode(uint64_t ModuleOffset, FunctionNameKind FNKind,
+  DILineInfo symbolizeCode(object::SectionedAddress ModuleOffset,
+                           FunctionNameKind FNKind,
                            bool UseSymbolTable) const override;
-  DIInliningInfo symbolizeInlinedCode(uint64_t ModuleOffset,
+  DIInliningInfo symbolizeInlinedCode(object::SectionedAddress ModuleOffset,
                                       FunctionNameKind FNKind,
                                       bool UseSymbolTable) const override;
-  DIGlobal symbolizeData(uint64_t ModuleOffset) const override;
+  DIGlobal symbolizeData(object::SectionedAddress ModuleOffset) const override;
+  std::vector<DILocal>
+  symbolizeFrame(object::SectionedAddress ModuleOffset) const override;
 
   // Return true if this is a 32-bit x86 PE COFF module.
   bool isWin32Module() const override;
@@ -63,7 +65,10 @@ private:
                             uint64_t OpdAddress = 0);
   std::error_code addCoffExportSymbols(const object::COFFObjectFile *CoffObj);
 
-  object::ObjectFile *Module;
+  /// Search for the first occurence of specified Address in ObjectFile.
+  uint64_t getModuleSectionIndexForAddress(uint64_t Address) const;
+
+  const object::ObjectFile *Module;
   std::unique_ptr<DIContext> DebugInfoContext;
 
   struct SymbolDesc {
@@ -72,14 +77,14 @@ private:
     // the following symbol.
     uint64_t Size;
 
-    friend bool operator<(const SymbolDesc &s1, const SymbolDesc &s2) {
-      return s1.Addr < s2.Addr;
+    bool operator<(const SymbolDesc &RHS) const {
+      return Addr != RHS.Addr ? Addr < RHS.Addr : Size < RHS.Size;
     }
   };
-  std::map<SymbolDesc, StringRef> Functions;
-  std::map<SymbolDesc, StringRef> Objects;
+  std::vector<std::pair<SymbolDesc, StringRef>> Functions;
+  std::vector<std::pair<SymbolDesc, StringRef>> Objects;
 
-  SymbolizableObjectFile(object::ObjectFile *Obj,
+  SymbolizableObjectFile(const object::ObjectFile *Obj,
                          std::unique_ptr<DIContext> DICtx);
 };
 
diff --git a/lib/DebugInfo/Symbolize/Symbolize.cpp b/lib/DebugInfo/Symbolize/Symbolize.cpp
index 59a85d6c3fcf..6a619f8f2f37 100644
--- a/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -1,9 +1,8 @@
 //===-- LLVMSymbolize.cpp -------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,7 +16,6 @@
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/BinaryFormat/COFF.h"
-#include "llvm/Config/config.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/PDB/PDB.h"
 #include "llvm/DebugInfo/PDB/PDBContext.h"
@@ -25,6 +23,7 @@
 #include "llvm/Object/COFF.h"
 #include "llvm/Object/MachO.h"
 #include "llvm/Object/MachOUniversal.h"
+#include "llvm/Support/CRC.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/DataExtractor.h"
@@ -34,7 +33,6 @@
 #include "llvm/Support/Path.h"
 #include <algorithm>
 #include <cassert>
-#include <cstdlib>
 #include <cstring>
 
 #if defined(_MSC_VER)
@@ -54,14 +52,8 @@ namespace llvm {
 namespace symbolize {
 
 Expected<DILineInfo>
-LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
-                              uint64_t ModuleOffset, StringRef DWPName) {
-  SymbolizableModule *Info;
-  if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName, DWPName))
-    Info = InfoOrErr.get();
-  else
-    return InfoOrErr.takeError();
-
+LLVMSymbolizer::symbolizeCodeCommon(SymbolizableModule *Info,
+                                    object::SectionedAddress ModuleOffset) {
   // A null module means an error has already been reported. Return an empty
   // result.
   if (!Info)
@@ -70,7 +62,7 @@ LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
   // If the user is giving us relative addresses, add the preferred base of the
   // object to the offset before we do the query. It's what DIContext expects.
   if (Opts.RelativeAddresses)
-    ModuleOffset += Info->getModulePreferredBase();
+    ModuleOffset.Address += Info->getModulePreferredBase();
 
   DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts.PrintFunctions,
                                             Opts.UseSymbolTable);
@@ -79,11 +71,37 @@ LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
   return LineInfo;
 }
 
+Expected<DILineInfo>
+LLVMSymbolizer::symbolizeCode(const ObjectFile &Obj,
+                              object::SectionedAddress ModuleOffset) {
+  StringRef ModuleName = Obj.getFileName();
+  auto I = Modules.find(ModuleName);
+  if (I != Modules.end())
+    return symbolizeCodeCommon(I->second.get(), ModuleOffset);
+
+  std::unique_ptr<DIContext> Context =
+        DWARFContext::create(Obj, nullptr, DWARFContext::defaultErrorHandler);
+  Expected<SymbolizableModule *> InfoOrErr =
+                     createModuleInfo(&Obj, std::move(Context), ModuleName);
+  if (!InfoOrErr)
+    return InfoOrErr.takeError();
+  return symbolizeCodeCommon(*InfoOrErr, ModuleOffset);
+}
+
+Expected<DILineInfo>
+LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
+                              object::SectionedAddress ModuleOffset) {
+  Expected<SymbolizableModule *> InfoOrErr = getOrCreateModuleInfo(ModuleName);
+  if (!InfoOrErr)
+    return InfoOrErr.takeError();
+  return symbolizeCodeCommon(*InfoOrErr, ModuleOffset);
+}
+
 Expected<DIInliningInfo>
 LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName,
-                                     uint64_t ModuleOffset, StringRef DWPName) {
+                                     object::SectionedAddress ModuleOffset) {
   SymbolizableModule *Info;
-  if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName, DWPName))
+  if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName))
     Info = InfoOrErr.get();
   else
     return InfoOrErr.takeError();
@@ -96,7 +114,7 @@ LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName,
   // If the user is giving us relative addresses, add the preferred base of the
   // object to the offset before we do the query. It's what DIContext expects.
   if (Opts.RelativeAddresses)
-    ModuleOffset += Info->getModulePreferredBase();
+    ModuleOffset.Address += Info->getModulePreferredBase();
 
   DIInliningInfo InlinedContext = Info->symbolizeInlinedCode(
       ModuleOffset, Opts.PrintFunctions, Opts.UseSymbolTable);
@@ -109,8 +127,9 @@ LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName,
   return InlinedContext;
 }
 
-Expected<DIGlobal> LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
-                                                 uint64_t ModuleOffset) {
+Expected<DIGlobal>
+LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
+                              object::SectionedAddress ModuleOffset) {
   SymbolizableModule *Info;
   if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName))
     Info = InfoOrErr.get();
@@ -126,7 +145,7 @@ Expected<DIGlobal> LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
   // the object to the offset before we do the query. It's what DIContext
   // expects.
   if (Opts.RelativeAddresses)
-    ModuleOffset += Info->getModulePreferredBase();
+    ModuleOffset.Address += Info->getModulePreferredBase();
 
   DIGlobal Global = Info->symbolizeData(ModuleOffset);
   if (Opts.Demangle)
@@ -134,6 +153,29 @@ Expected<DIGlobal> LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
   return Global;
 }
 
+Expected<std::vector<DILocal>>
+LLVMSymbolizer::symbolizeFrame(const std::string &ModuleName,
+                               object::SectionedAddress ModuleOffset) {
+  SymbolizableModule *Info;
+  if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName))
+    Info = InfoOrErr.get();
+  else
+    return InfoOrErr.takeError();
+
+  // A null module means an error has already been reported. Return an empty
+  // result.
+  if (!Info)
+    return std::vector<DILocal>();
+
+  // If the user is giving us relative addresses, add the preferred base of
+  // the object to the offset before we do the query. It's what DIContext
+  // expects.
+  if (Opts.RelativeAddresses)
+    ModuleOffset.Address += Info->getModulePreferredBase();
+
+  return Info->symbolizeFrame(ModuleOffset);
+}
+
 void LLVMSymbolizer::flush() {
   ObjectForUBPathAndArch.clear();
   BinaryForPath.clear();
@@ -163,42 +205,45 @@ bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
       MemoryBuffer::getFileOrSTDIN(Path);
   if (!MB)
     return false;
-  return !zlib::isAvailable() || CRCHash == zlib::crc32(MB.get()->getBuffer());
+  return CRCHash == llvm::crc32(0, MB.get()->getBuffer());
 }
 
 bool findDebugBinary(const std::string &OrigPath,
                      const std::string &DebuglinkName, uint32_t CRCHash,
+                     const std::string &FallbackDebugPath,
                      std::string &Result) {
-  std::string OrigRealPath = OrigPath;
-#if defined(HAVE_REALPATH)
-  if (char *RP = realpath(OrigPath.c_str(), nullptr)) {
-    OrigRealPath = RP;
-    free(RP);
-  }
-#endif
-  SmallString<16> OrigDir(OrigRealPath);
+  SmallString<16> OrigDir(OrigPath);
   llvm::sys::path::remove_filename(OrigDir);
   SmallString<16> DebugPath = OrigDir;
-  // Try /path/to/original_binary/debuglink_name
+  // Try relative/path/to/original_binary/debuglink_name
   llvm::sys::path::append(DebugPath, DebuglinkName);
   if (checkFileCRC(DebugPath, CRCHash)) {
     Result = DebugPath.str();
     return true;
   }
-  // Try /path/to/original_binary/.debug/debuglink_name
+  // Try relative/path/to/original_binary/.debug/debuglink_name
   DebugPath = OrigDir;
   llvm::sys::path::append(DebugPath, ".debug", DebuglinkName);
   if (checkFileCRC(DebugPath, CRCHash)) {
     Result = DebugPath.str();
     return true;
   }
+  // Make the path absolute so that lookups will go to
+  // "/usr/lib/debug/full/path/to/debug", not
+  // "/usr/lib/debug/to/debug"
+  llvm::sys::fs::make_absolute(OrigDir);
+  if (!FallbackDebugPath.empty()) {
+    // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name
+    DebugPath = FallbackDebugPath;
+  } else {
 #if defined(__NetBSD__)
-  // Try /usr/libdata/debug/path/to/original_binary/debuglink_name
-  DebugPath = "/usr/libdata/debug";
+    // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name
+    DebugPath = "/usr/libdata/debug";
 #else
-  // Try /usr/lib/debug/path/to/original_binary/debuglink_name
-  DebugPath = "/usr/lib/debug";
+    // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name
+    DebugPath = "/usr/lib/debug";
 #endif
+  }
   llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir),
                           DebuglinkName);
   if (checkFileCRC(DebugPath, CRCHash)) {
@@ -217,9 +262,12 @@ bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
     Section.getName(Name);
     Name = Name.substr(Name.find_first_not_of("._"));
     if (Name == "gnu_debuglink") {
-      StringRef Data;
-      Section.getContents(Data);
-      DataExtractor DE(Data, Obj->isLittleEndian(), 0);
+      Expected<StringRef> ContentsOrErr = Section.getContents();
+      if (!ContentsOrErr) {
+        consumeError(ContentsOrErr.takeError());
+        return false;
+      }
+      DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0);
       uint32_t Offset = 0;
       if (const char *DebugNameStr = DE.getCStr(&Offset)) {
         // 4-byte align the offset.
@@ -284,7 +332,8 @@ ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path,
   std::string DebugBinaryPath;
   if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash))
     return nullptr;
-  if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath))
+  if (!findDebugBinary(Path, DebuglinkName, CRCHash, Opts.FallbackDebugPath,
+                       DebugBinaryPath))
     return nullptr;
   auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
   if (!DbgObjOrErr) {
@@ -298,15 +347,14 @@ ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path,
 Expected<LLVMSymbolizer::ObjectPair>
 LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
                                       const std::string &ArchName) {
-  const auto &I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName));
-  if (I != ObjectPairForPathArch.end()) {
+  auto I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName));
+  if (I != ObjectPairForPathArch.end())
     return I->second;
-  }
 
   auto ObjOrErr = getOrCreateObject(Path, ArchName);
   if (!ObjOrErr) {
-    ObjectPairForPathArch.insert(std::make_pair(std::make_pair(Path, ArchName),
-                                                ObjectPair(nullptr, nullptr)));
+    ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName),
+                                  ObjectPair(nullptr, nullptr));
     return ObjOrErr.takeError();
   }
 
@@ -321,46 +369,43 @@ LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
   if (!DbgObj)
     DbgObj = Obj;
   ObjectPair Res = std::make_pair(Obj, DbgObj);
-  ObjectPairForPathArch.insert(
-      std::make_pair(std::make_pair(Path, ArchName), Res));
+  ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res);
   return Res;
 }
 
 Expected<ObjectFile *>
 LLVMSymbolizer::getOrCreateObject(const std::string &Path,
                                   const std::string &ArchName) {
-  const auto &I = BinaryForPath.find(Path);
-  Binary *Bin = nullptr;
-  if (I == BinaryForPath.end()) {
+  Binary *Bin;
+  auto Pair = BinaryForPath.emplace(Path, OwningBinary<Binary>());
+  if (!Pair.second) {
+    Bin = Pair.first->second.getBinary();
+  } else {
     Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path);
-    if (!BinOrErr) {
-      BinaryForPath.insert(std::make_pair(Path, OwningBinary<Binary>()));
+    if (!BinOrErr)
       return BinOrErr.takeError();
-    }
-    Bin = BinOrErr->getBinary();
-    BinaryForPath.insert(std::make_pair(Path, std::move(BinOrErr.get())));
-  } else {
-    Bin = I->second.getBinary();
+    Pair.first->second = std::move(BinOrErr.get());
+    Bin = Pair.first->second.getBinary();
   }
 
   if (!Bin)
     return static_cast<ObjectFile *>(nullptr);
 
   if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) {
-    const auto &I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName));
-    if (I != ObjectForUBPathAndArch.end()) {
+    auto I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName));
+    if (I != ObjectForUBPathAndArch.end())
       return I->second.get();
-    }
+
     Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
         UB->getObjectForArch(ArchName);
     if (!ObjOrErr) {
-      ObjectForUBPathAndArch.insert(std::make_pair(
-          std::make_pair(Path, ArchName), std::unique_ptr<ObjectFile>()));
+      ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
+                                     std::unique_ptr<ObjectFile>());
       return ObjOrErr.takeError();
     }
     ObjectFile *Res = ObjOrErr->get();
-    ObjectForUBPathAndArch.insert(std::make_pair(std::make_pair(Path, ArchName),
-                                                 std::move(ObjOrErr.get())));
+    ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
+                                   std::move(ObjOrErr.get()));
     return Res;
   }
   if (Bin->isObject()) {
@@ -370,12 +415,28 @@ LLVMSymbolizer::getOrCreateObject(const std::string &Path,
 }
 
 Expected<SymbolizableModule *>
-LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName,
-                                      StringRef DWPName) {
-  const auto &I = Modules.find(ModuleName);
-  if (I != Modules.end()) {
+LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj,
+                                 std::unique_ptr<DIContext> Context,
+                                 StringRef ModuleName) {
+  auto InfoOrErr =
+      SymbolizableObjectFile::create(Obj, std::move(Context));
+  std::unique_ptr<SymbolizableModule> SymMod;
+  if (InfoOrErr)
+    SymMod = std::move(*InfoOrErr);
+  auto InsertResult =
+      Modules.insert(std::make_pair(ModuleName, std::move(SymMod)));
+  assert(InsertResult.second);
+  if (std::error_code EC = InfoOrErr.getError())
+    return errorCodeToError(EC);
+  return InsertResult.first->second.get();
+}
+
+Expected<SymbolizableModule *>
+LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
+  auto I = Modules.find(ModuleName);
+  if (I != Modules.end())
     return I->second.get();
-  }
+
   std::string BinaryName = ModuleName;
   std::string ArchName = Opts.DefaultArch;
   size_t ColonPos = ModuleName.find_last_of(':');
@@ -390,8 +451,7 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName,
   auto ObjectsOrErr = getOrCreateObjectPair(BinaryName, ArchName);
   if (!ObjectsOrErr) {
     // Failed to find valid object file.
-    Modules.insert(
-        std::make_pair(ModuleName, std::unique_ptr<SymbolizableModule>()));
+    Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>());
     return ObjectsOrErr.takeError();
   }
   ObjectPair Objects = ObjectsOrErr.get();
@@ -408,8 +468,7 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName,
       std::unique_ptr<IPDBSession> Session;
       if (auto Err = loadDataForEXE(PDB_ReaderType::DIA,
                                     Objects.first->getFileName(), Session)) {
-        Modules.insert(
-            std::make_pair(ModuleName, std::unique_ptr<SymbolizableModule>()));
+        Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>());
         // Return along the PDB filename to provide more context
         return createFileError(PDBFileName, std::move(Err));
       }
@@ -417,20 +476,10 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName,
     }
   }
   if (!Context)
-    Context = DWARFContext::create(*Objects.second, nullptr,
-                                   DWARFContext::defaultErrorHandler, DWPName);
-  assert(Context);
-  auto InfoOrErr =
-      SymbolizableObjectFile::create(Objects.first, std::move(Context));
-  std::unique_ptr<SymbolizableModule> SymMod;
-  if (InfoOrErr)
-    SymMod = std::move(InfoOrErr.get());
-  auto InsertResult =
-      Modules.insert(std::make_pair(ModuleName, std::move(SymMod)));
-  assert(InsertResult.second);
-  if (auto EC = InfoOrErr.getError())
-    return errorCodeToError(EC);
-  return InsertResult.first->second.get();
+    Context =
+        DWARFContext::create(*Objects.second, nullptr,
+                             DWARFContext::defaultErrorHandler, Opts.DWPName);
+  return createModuleInfo(Objects.first, std::move(Context), ModuleName);
 }
 
 namespace {
diff --git a/lib/Demangle/Demangle.cpp b/lib/Demangle/Demangle.cpp
new file mode 100644
index 000000000000..5f921537b9bd
--- /dev/null
+++ b/lib/Demangle/Demangle.cpp
@@ -0,0 +1,36 @@
+//===-- Demangle.cpp - Common demangling functions ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This file contains definitions of common demangling functions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Demangle/Demangle.h"
+#include <cstdlib>
+
+static bool isItaniumEncoding(const std::string &MangledName) {
+  size_t Pos = MangledName.find_first_not_of('_');
+  // A valid Itanium encoding requires 1-4 leading underscores, followed by 'Z'.
+  return Pos > 0 && Pos <= 4 && MangledName[Pos] == 'Z';
+}
+
+std::string llvm::demangle(const std::string &MangledName) {
+  char *Demangled;
+  if (isItaniumEncoding(MangledName))
+    Demangled = itaniumDemangle(MangledName.c_str(), nullptr, nullptr, nullptr);
+  else
+    Demangled =
+        microsoftDemangle(MangledName.c_str(), nullptr, nullptr, nullptr);
+
+  if (!Demangled)
+    return MangledName;
+
+  std::string Ret = Demangled;
+  free(Demangled);
+  return Ret;
+}
diff --git a/lib/Demangle/ItaniumDemangle.cpp b/lib/Demangle/ItaniumDemangle.cpp
index b2de0be2b70c..5c99c70e3cc6 100644
--- a/lib/Demangle/ItaniumDemangle.cpp
+++ b/lib/Demangle/ItaniumDemangle.cpp
@@ -1,9 +1,8 @@
 //===------------------------- ItaniumDemangle.cpp ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Demangle/MicrosoftDemangle.cpp b/lib/Demangle/MicrosoftDemangle.cpp
index 51ffa0bff7f3..bf7d77638f34 100644
--- a/lib/Demangle/MicrosoftDemangle.cpp
+++ b/lib/Demangle/MicrosoftDemangle.cpp
@@ -1,9 +1,8 @@
 //===- MicrosoftDemangle.cpp ----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,7 +17,7 @@
 #include "llvm/Demangle/Demangle.h"
 #include "llvm/Demangle/MicrosoftDemangleNodes.h"
 
-#include "llvm/Demangle/Compiler.h"
+#include "llvm/Demangle/DemangleConfig.h"
 #include "llvm/Demangle/StringView.h"
 #include "llvm/Demangle/Utility.h"
 
@@ -59,14 +58,18 @@ static bool isMemberPointer(StringView MangledName, bool &Error) {
     // what.
     break;
   default:
-    Error = true;
-    return false;
+    // isMemberPointer() is called only if isPointerType() returns true,
+    // and it rejects other prefixes.
+    DEMANGLE_UNREACHABLE;
   }
 
   // If it starts with a number, then 6 indicates a non-member function
   // pointer, and 8 indicates a member function pointer.
   if (startsWithDigit(MangledName)) {
-    assert(MangledName[0] == '6' || MangledName[0] == '8');
+    if (MangledName[0] != '6' && MangledName[0] != '8') {
+      Error = true;
+      return false;
+    }
     return (MangledName[0] == '8');
   }
 
@@ -76,7 +79,10 @@ static bool isMemberPointer(StringView MangledName, bool &Error) {
   MangledName.consumeFront('I'); // restrict
   MangledName.consumeFront('F'); // unaligned
 
-  assert(!MangledName.empty());
+  if (MangledName.empty()) {
+    Error = true;
+    return false;
+  }
 
   // The next value should be either ABCD (non-member) or QRST (member).
   switch (MangledName.front()) {
@@ -136,8 +142,6 @@ consumeSpecialIntrinsicKind(StringView &MangledName) {
 static bool startsWithLocalScopePattern(StringView S) {
   if (!S.consumeFront('?'))
     return false;
-  if (S.size() < 2)
-    return false;
 
   size_t End = S.find('?');
   if (End == StringView::npos)
@@ -234,10 +238,10 @@ demanglePointerCVQualifiers(StringView &MangledName) {
   case 'S':
     return std::make_pair(Qualifiers(Q_Const | Q_Volatile),
                           PointerAffinity::Pointer);
-  default:
-    assert(false && "Ty is not a pointer type!");
   }
-  return std::make_pair(Q_None, PointerAffinity::Pointer);
+  // This function is only called if isPointerType() returns true,
+  // and it only returns true for the six cases listed above.
+  DEMANGLE_UNREACHABLE;
 }
 
 StringView Demangler::copyString(StringView Borrowed) {
@@ -265,12 +269,16 @@ Demangler::demangleSpecialTableSymbolNode(StringView &MangledName,
     NI->Name = "`RTTI Complete Object Locator'";
     break;
   default:
-    LLVM_BUILTIN_UNREACHABLE;
+    DEMANGLE_UNREACHABLE;
   }
   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
   SpecialTableSymbolNode *STSN = Arena.alloc<SpecialTableSymbolNode>();
   STSN->Name = QN;
   bool IsMember = false;
+  if (MangledName.empty()) {
+    Error = true;
+    return nullptr;
+  }
   char Front = MangledName.popFront();
   if (Front != '6' && Front != '7') {
     Error = true;
@@ -284,9 +292,10 @@ Demangler::demangleSpecialTableSymbolNode(StringView &MangledName,
 }
 
 LocalStaticGuardVariableNode *
-Demangler::demangleLocalStaticGuard(StringView &MangledName) {
+Demangler::demangleLocalStaticGuard(StringView &MangledName, bool IsThread) {
   LocalStaticGuardIdentifierNode *LSGI =
       Arena.alloc<LocalStaticGuardIdentifierNode>();
+  LSGI->IsThread = IsThread;
   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI);
   LocalStaticGuardVariableNode *LSGVN =
       Arena.alloc<LocalStaticGuardVariableNode>();
@@ -379,11 +388,11 @@ FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName,
   if (MangledName.consumeFront('?'))
     IsKnownStaticDataMember = true;
 
-  QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
+  SymbolNode *Symbol = demangleDeclarator(MangledName);
+  if (Error)
+    return nullptr;
 
-  SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
   FunctionSymbolNode *FSN = nullptr;
-  Symbol->Name = QN;
 
   if (Symbol->kind() == NodeKind::VariableSymbol) {
     DSIN->Variable = static_cast<VariableSymbolNode *>(Symbol);
@@ -401,7 +410,8 @@ FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName,
     }
 
     FSN = demangleFunctionEncoding(MangledName);
-    FSN->Name = synthesizeQualifiedName(Arena, DSIN);
+    if (FSN)
+      FSN->Name = synthesizeQualifiedName(Arena, DSIN);
   } else {
     if (IsKnownStaticDataMember) {
       // This was supposed to be a static data member, but we got a function.
@@ -419,10 +429,10 @@ FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName,
 
 SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) {
   SpecialIntrinsicKind SIK = consumeSpecialIntrinsicKind(MangledName);
-  if (SIK == SpecialIntrinsicKind::None)
-    return nullptr;
 
   switch (SIK) {
+  case SpecialIntrinsicKind::None:
+    return nullptr;
   case SpecialIntrinsicKind::StringLiteralSymbol:
     return demangleStringLiteral(MangledName);
   case SpecialIntrinsicKind::Vftable:
@@ -433,7 +443,9 @@ SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) {
   case SpecialIntrinsicKind::VcallThunk:
     return demangleVcallThunkNode(MangledName);
   case SpecialIntrinsicKind::LocalStaticGuard:
-    return demangleLocalStaticGuard(MangledName);
+    return demangleLocalStaticGuard(MangledName, /*IsThread=*/false);
+  case SpecialIntrinsicKind::LocalStaticThreadGuard:
+    return demangleLocalStaticGuard(MangledName, /*IsThread=*/true);
   case SpecialIntrinsicKind::RttiTypeDescriptor: {
     TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
     if (Error)
@@ -453,11 +465,16 @@ SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) {
   case SpecialIntrinsicKind::RttiBaseClassDescriptor:
     return demangleRttiBaseClassDescriptorNode(Arena, MangledName);
   case SpecialIntrinsicKind::DynamicInitializer:
-    return demangleInitFiniStub(MangledName, false);
+    return demangleInitFiniStub(MangledName, /*IsDestructor=*/false);
   case SpecialIntrinsicKind::DynamicAtexitDestructor:
-    return demangleInitFiniStub(MangledName, true);
-  default:
+    return demangleInitFiniStub(MangledName, /*IsDestructor=*/true);
+  case SpecialIntrinsicKind::Typeof:
+  case SpecialIntrinsicKind::UdtReturning:
+    // It's unclear which tools produces these manglings, so demangling
+    // support is not (yet?) implemented.
     break;
+  case SpecialIntrinsicKind::Unknown:
+    DEMANGLE_UNREACHABLE; // Never returned by consumeSpecialIntrinsicKind.
   }
   Error = true;
   return nullptr;
@@ -467,11 +484,15 @@ IdentifierNode *
 Demangler::demangleFunctionIdentifierCode(StringView &MangledName) {
   assert(MangledName.startsWith('?'));
   MangledName = MangledName.dropFront();
+  if (MangledName.empty()) {
+    Error = true;
+    return nullptr;
+  }
 
   if (MangledName.consumeFront("__"))
     return demangleFunctionIdentifierCode(
         MangledName, FunctionIdentifierCodeGroup::DoubleUnder);
-  else if (MangledName.consumeFront("_"))
+  if (MangledName.consumeFront("_"))
     return demangleFunctionIdentifierCode(MangledName,
                                           FunctionIdentifierCodeGroup::Under);
   return demangleFunctionIdentifierCode(MangledName,
@@ -497,16 +518,22 @@ LiteralOperatorIdentifierNode *
 Demangler::demangleLiteralOperatorIdentifier(StringView &MangledName) {
   LiteralOperatorIdentifierNode *N =
       Arena.alloc<LiteralOperatorIdentifierNode>();
-  N->Name = demangleSimpleString(MangledName, false);
+  N->Name = demangleSimpleString(MangledName, /*Memorize=*/false);
   return N;
 }
 
-static IntrinsicFunctionKind
-translateIntrinsicFunctionCode(char CH, FunctionIdentifierCodeGroup Group) {
+IntrinsicFunctionKind
+Demangler::translateIntrinsicFunctionCode(char CH,
+                                          FunctionIdentifierCodeGroup Group) {
+  using IFK = IntrinsicFunctionKind;
+  if (!(CH >= '0' && CH <= '9') && !(CH >= 'A' && CH <= 'Z')) {
+    Error = true;
+    return IFK::None;
+  }
+
   // Not all ? identifiers are intrinsics *functions*.  This function only maps
   // operator codes for the special functions, all others are handled elsewhere,
   // hence the IFK::None entries in the table.
-  using IFK = IntrinsicFunctionKind;
   static IFK Basic[36] = {
       IFK::None,             // ?0 # Foo::Foo()
       IFK::None,             // ?1 # Foo::~Foo()
@@ -606,8 +633,8 @@ translateIntrinsicFunctionCode(char CH, FunctionIdentifierCodeGroup Group) {
                                        // iter
       IFK::None,                       // ?__J local static thread guard
       IFK::None,                       // ?__K operator ""_name
-      IFK::CoAwait,                    // ?__L co_await
-      IFK::None,                       // ?__M <unused>
+      IFK::CoAwait,                    // ?__L operator co_await
+      IFK::Spaceship,                  // ?__M operator<=>
       IFK::None,                       // ?__N <unused>
       IFK::None,                       // ?__O <unused>
       IFK::None,                       // ?__P <unused>
@@ -632,12 +659,16 @@ translateIntrinsicFunctionCode(char CH, FunctionIdentifierCodeGroup Group) {
   case FunctionIdentifierCodeGroup::DoubleUnder:
     return DoubleUnder[Index];
   }
-  LLVM_BUILTIN_UNREACHABLE;
+  DEMANGLE_UNREACHABLE;
 }
 
 IdentifierNode *
 Demangler::demangleFunctionIdentifierCode(StringView &MangledName,
                                           FunctionIdentifierCodeGroup Group) {
+  if (MangledName.empty()) {
+    Error = true;
+    return nullptr;
+  }
   switch (Group) {
   case FunctionIdentifierCodeGroup::Basic:
     switch (char CH = MangledName.popFront()) {
@@ -650,7 +681,6 @@ Demangler::demangleFunctionIdentifierCode(StringView &MangledName,
       return Arena.alloc<IntrinsicFunctionIdentifierNode>(
           translateIntrinsicFunctionCode(CH, Group));
     }
-    break;
   case FunctionIdentifierCodeGroup::Under:
     return Arena.alloc<IntrinsicFunctionIdentifierNode>(
         translateIntrinsicFunctionCode(MangledName.popFront(), Group));
@@ -663,13 +693,17 @@ Demangler::demangleFunctionIdentifierCode(StringView &MangledName,
           translateIntrinsicFunctionCode(CH, Group));
     }
   }
-  // No Mangling Yet:      Spaceship,                    // operator<=>
 
-  return nullptr;
+  DEMANGLE_UNREACHABLE;
 }
 
 SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName,
                                              QualifiedNameNode *Name) {
+  if (MangledName.empty()) {
+    Error = true;
+    return nullptr;
+  }
+
   // Read a variable.
   switch (MangledName.front()) {
   case '0':
@@ -680,8 +714,6 @@ SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName,
     StorageClass SC = demangleVariableStorageClass(MangledName);
     return demangleVariableEncoding(MangledName, SC);
   }
-  case '8':
-    return nullptr;
   }
   FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName);
 
@@ -689,23 +721,74 @@ SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName,
   if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) {
     ConversionOperatorIdentifierNode *COIN =
         static_cast<ConversionOperatorIdentifierNode *>(UQN);
-    COIN->TargetType = FSN->Signature->ReturnType;
+    if (FSN)
+      COIN->TargetType = FSN->Signature->ReturnType;
   }
   return FSN;
 }
 
+SymbolNode *Demangler::demangleDeclarator(StringView &MangledName) {
+  // What follows is a main symbol name. This may include namespaces or class
+  // back references.
+  QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
+  if (Error)
+    return nullptr;
+
+  SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
+  if (Error)
+    return nullptr;
+  Symbol->Name = QN;
+
+  IdentifierNode *UQN = QN->getUnqualifiedIdentifier();
+  if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) {
+    ConversionOperatorIdentifierNode *COIN =
+        static_cast<ConversionOperatorIdentifierNode *>(UQN);
+    if (!COIN->TargetType) {
+      Error = true;
+      return nullptr;
+    }
+  }
+  return Symbol;
+}
+
+SymbolNode *Demangler::demangleMD5Name(StringView &MangledName) {
+  assert(MangledName.startsWith("??@"));
+  // This is an MD5 mangled name.  We can't demangle it, just return the
+  // mangled name.
+  // An MD5 mangled name is ??@ followed by 32 characters and a terminating @.
+  size_t MD5Last = MangledName.find('@', strlen("??@"));
+  if (MD5Last == StringView::npos) {
+    Error = true;
+    return nullptr;
+  }
+  const char *Start = MangledName.begin();
+  MangledName = MangledName.dropFront(MD5Last + 1);
+
+  // There are two additional special cases for MD5 names:
+  // 1. For complete object locators where the object name is long enough
+  //    for the object to have an MD5 name, the complete object locator is
+  //    called ??@...@??_R4@ (with a trailing "??_R4@" instead of the usual
+  //    leading "??_R4". This is handled here.
+  // 2. For catchable types, in versions of MSVC before 2015 (<1900) or after
+  //    2017.2 (>= 1914), the catchable type mangling is _CT??@...@??@...@8
+  //    instead of_CT??@...@8 with just one MD5 name. Since we don't yet
+  //    demangle catchable types anywhere, this isn't handled for MD5 names
+  //    either.
+  MangledName.consumeFront("??_R4@");
+
+  StringView MD5(Start, MangledName.begin());
+  SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol);
+  S->Name = synthesizeQualifiedName(Arena, MD5);
+
+  return S;
+}
+
 // Parser entry point.
 SymbolNode *Demangler::parse(StringView &MangledName) {
-  // We can't demangle MD5 names, just output them as-is.
-  // Also, MSVC-style mangled symbols must start with '?'.
-  if (MangledName.startsWith("??@")) {
-    // This is an MD5 mangled name.  We can't demangle it, just return the
-    // mangled name.
-    SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol);
-    S->Name = synthesizeQualifiedName(Arena, MangledName);
-    return S;
-  }
+  if (MangledName.startsWith("??@"))
+    return demangleMD5Name(MangledName);
 
+  // MSVC-style mangled symbols must start with '?'.
   if (!MangledName.startsWith('?')) {
     Error = true;
     return nullptr;
@@ -718,21 +801,7 @@ SymbolNode *Demangler::parse(StringView &MangledName) {
   if (SymbolNode *SI = demangleSpecialIntrinsic(MangledName))
     return SI;
 
-  // What follows is a main symbol name. This may include namespaces or class
-  // back references.
-  QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
-  if (Error)
-    return nullptr;
-
-  SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
-  if (Symbol) {
-    Symbol->Name = QN;
-  }
-
-  if (Error)
-    return nullptr;
-
-  return Symbol;
+  return demangleDeclarator(MangledName);
 }
 
 TagTypeNode *Demangler::parseTagUniqueName(StringView &MangledName) {
@@ -759,6 +828,9 @@ VariableSymbolNode *Demangler::demangleVariableEncoding(StringView &MangledName,
   VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop);
   VSN->SC = SC;
 
+  if (Error)
+    return nullptr;
+
   // <variable-type> ::= <type> <cvr-qualifiers>
   //                 ::= <type> <pointee-cvr-qualifiers> # pointers, references
   switch (VSN->Type->kind()) {
@@ -797,7 +869,7 @@ VariableSymbolNode *Demangler::demangleVariableEncoding(StringView &MangledName,
 // <number>               ::= [?] <non-negative integer>
 //
 // <non-negative integer> ::= <decimal digit> # when 1 <= Number <= 10
-//                        ::= <hex digit>+ @  # when Numbrer == 0 or >= 10
+//                        ::= <hex digit>+ @  # when Number == 0 or >= 10
 //
 // <hex-digit>            ::= [A-P]           # A = 0, B = 1, ...
 std::pair<uint64_t, bool> Demangler::demangleNumber(StringView &MangledName) {
@@ -906,8 +978,18 @@ Demangler::demangleTemplateInstantiationName(StringView &MangledName,
   if (Error)
     return nullptr;
 
-  if (NBB & NBB_Template)
+  if (NBB & NBB_Template) {
+    // NBB_Template is only set for types and non-leaf names ("a::" in "a::b").
+    // Structors and conversion operators only makes sense in a leaf name, so
+    // reject them in NBB_Template contexts.
+    if (Identifier->kind() == NodeKind::ConversionOperatorIdentifier ||
+        Identifier->kind() == NodeKind::StructorIdentifier) {
+      Error = true;
+      return nullptr;
+    }
+
     memorizeIdentifier(Identifier);
+  }
 
   return Identifier;
 }
@@ -931,6 +1013,7 @@ static uint8_t rebasedHexDigitToNumber(char C) {
 }
 
 uint8_t Demangler::demangleCharLiteral(StringView &MangledName) {
+  assert(!MangledName.empty());
   if (!MangledName.startsWith('?'))
     return MangledName.popFront();
 
@@ -988,7 +1071,7 @@ wchar_t Demangler::demangleWcharLiteral(StringView &MangledName) {
   uint8_t C1, C2;
 
   C1 = demangleCharLiteral(MangledName);
-  if (Error)
+  if (Error || MangledName.empty())
     goto WCharLiteralError;
   C2 = demangleCharLiteral(MangledName);
   if (Error)
@@ -1007,10 +1090,8 @@ static void writeHexDigit(char *Buffer, uint8_t Digit) {
 }
 
 static void outputHex(OutputStream &OS, unsigned C) {
-  if (C == 0) {
-    OS << "\\x00";
-    return;
-  }
+  assert (C != 0);
+
   // It's easier to do the math if we can work from right to left, but we need
   // to print the numbers from left to right.  So render this into a temporary
   // buffer first, then output the temporary buffer.  Each byte is of the form
@@ -1019,23 +1100,26 @@ static void outputHex(OutputStream &OS, unsigned C) {
   char TempBuffer[17];
 
   ::memset(TempBuffer, 0, sizeof(TempBuffer));
-  constexpr int MaxPos = 15;
+  constexpr int MaxPos = sizeof(TempBuffer) - 1;
 
-  int Pos = MaxPos - 1;
+  int Pos = MaxPos - 1; // TempBuffer[MaxPos] is the terminating \0.
   while (C != 0) {
     for (int I = 0; I < 2; ++I) {
       writeHexDigit(&TempBuffer[Pos--], C % 16);
       C /= 16;
     }
-    TempBuffer[Pos--] = 'x';
-    TempBuffer[Pos--] = '\\';
-    assert(Pos >= 0);
   }
+  TempBuffer[Pos--] = 'x';
+  assert(Pos >= 0);
+  TempBuffer[Pos--] = '\\';
   OS << StringView(&TempBuffer[Pos + 1]);
 }
 
 static void outputEscapedChar(OutputStream &OS, unsigned C) {
   switch (C) {
+  case '\0': // nul
+    OS << "\\0";
+    return;
   case '\'': // single quote
     OS << "\\\'";
     return;
@@ -1100,8 +1184,11 @@ static unsigned countEmbeddedNulls(const uint8_t *StringBytes,
   return Result;
 }
 
+// A mangled (non-wide) string literal stores the total length of the string it
+// refers to (passed in NumBytes), and it contains up to 32 bytes of actual text
+// (passed in StringBytes, NumChars).
 static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars,
-                                  unsigned NumBytes) {
+                                  uint64_t NumBytes) {
   assert(NumBytes > 0);
 
   // If the number of bytes is odd, this is guaranteed to be a char string.
@@ -1113,7 +1200,7 @@ static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars,
   // 2-byte, or 4-byte null terminator.
   if (NumBytes < 32) {
     unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars);
-    if (TrailingNulls >= 4)
+    if (TrailingNulls >= 4 && NumBytes % 4 == 0)
       return 4;
     if (TrailingNulls >= 2)
       return 2;
@@ -1127,7 +1214,7 @@ static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars,
   // perfect and is biased towards languages that have ascii alphabets, but this
   // was always going to be best effort since the encoding is lossy.
   unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars);
-  if (Nulls >= 2 * NumChars / 3)
+  if (Nulls >= 2 * NumChars / 3 && NumBytes % 4 == 0)
     return 4;
   if (Nulls >= NumChars / 3)
     return 2;
@@ -1178,6 +1265,11 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
 
   EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>();
 
+  // Must happen before the first `goto StringLiteralError`.
+  if (!initializeOutputStream(nullptr, nullptr, OS, 1024))
+    // FIXME: Propagate out-of-memory as an error?
+    std::terminate();
+
   // Prefix indicating the beginning of a string literal
   if (!MangledName.consumeFront("@_"))
     goto StringLiteralError;
@@ -1188,7 +1280,7 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
   switch (MangledName.popFront()) {
   case '1':
     IsWcharT = true;
-    LLVM_FALLTHROUGH;
+    DEMANGLE_FALLTHROUGH;
   case '0':
     break;
   default:
@@ -1197,7 +1289,7 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
 
   // Encoded Length
   std::tie(StringByteSize, IsNegative) = demangleNumber(MangledName);
-  if (Error || IsNegative)
+  if (Error || IsNegative || StringByteSize < (IsWcharT ? 2 : 1))
     goto StringLiteralError;
 
   // CRC 32 (always 8 characters plus a terminator)
@@ -1209,16 +1301,14 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
   if (MangledName.empty())
     goto StringLiteralError;
 
-  if (!initializeOutputStream(nullptr, nullptr, OS, 1024))
-    // FIXME: Propagate out-of-memory as an error?
-    std::terminate();
   if (IsWcharT) {
     Result->Char = CharKind::Wchar;
     if (StringByteSize > 64)
       Result->IsTruncated = true;
 
     while (!MangledName.consumeFront('@')) {
-      assert(StringByteSize >= 2);
+      if (MangledName.size() < 2)
+        goto StringLiteralError;
       wchar_t W = demangleWcharLiteral(MangledName);
       if (StringByteSize != 2 || Result->IsTruncated)
         outputEscapedChar(OS, W);
@@ -1234,7 +1324,8 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
 
     unsigned BytesDecoded = 0;
     while (!MangledName.consumeFront('@')) {
-      assert(StringByteSize >= 1);
+      if (MangledName.size() < 1 || BytesDecoded >= MaxStringByteLength)
+        goto StringLiteralError;
       StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName);
     }
 
@@ -1255,7 +1346,7 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
       Result->Char = CharKind::Char32;
       break;
     default:
-      LLVM_BUILTIN_UNREACHABLE;
+      DEMANGLE_UNREACHABLE;
     }
     const unsigned NumChars = BytesDecoded / CharBytes;
     for (unsigned CharIndex = 0; CharIndex < NumChars; ++CharIndex) {
@@ -1274,15 +1365,20 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
 
 StringLiteralError:
   Error = true;
+  std::free(OS.getBuffer());
   return nullptr;
 }
 
+// Returns MangledName's prefix before the first '@', or an error if
+// MangledName contains no '@' or the prefix has length 0.
 StringView Demangler::demangleSimpleString(StringView &MangledName,
                                            bool Memorize) {
   StringView S;
   for (size_t i = 0; i < MangledName.size(); ++i) {
     if (MangledName[i] != '@')
       continue;
+    if (i == 0)
+      break;
     S = MangledName.substr(0, i);
     MangledName = MangledName.dropFront(i + 1);
 
@@ -1319,8 +1415,10 @@ Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
 
   NamedIdentifierNode *Identifier = Arena.alloc<NamedIdentifierNode>();
   MangledName.consumeFront('?');
-  auto Number = demangleNumber(MangledName);
-  assert(!Number.second);
+  uint64_t Number = 0;
+  bool IsNegative = false;
+  std::tie(Number, IsNegative) = demangleNumber(MangledName);
+  assert(!IsNegative);
 
   // One ? to terminate the number
   MangledName.consumeFront('?');
@@ -1338,7 +1436,7 @@ Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
   OS << '`';
   Scope->output(OS, OF_Default);
   OS << '\'';
-  OS << "::`" << Number.first << "'";
+  OS << "::`" << Number << "'";
   OS << '\0';
   char *Result = OS.getBuffer();
   Identifier->Name = copyString(Result);
@@ -1349,7 +1447,8 @@ Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
 // Parses a type name in the form of A@B@C@@ which represents C::B::A.
 QualifiedNameNode *
 Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) {
-  IdentifierNode *Identifier = demangleUnqualifiedTypeName(MangledName, true);
+  IdentifierNode *Identifier =
+      demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true);
   if (Error)
     return nullptr;
   assert(Identifier);
@@ -1381,9 +1480,12 @@ Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) {
     return nullptr;
 
   if (Identifier->kind() == NodeKind::StructorIdentifier) {
+    if (QN->Components->Count < 2) {
+      Error = true;
+      return nullptr;
+    }
     StructorIdentifierNode *SIN =
         static_cast<StructorIdentifierNode *>(Identifier);
-    assert(QN->Components->Count >= 2);
     Node *ClassNode = QN->Components->Nodes[QN->Components->Count - 2];
     SIN->Class = static_cast<IdentifierNode *>(ClassNode);
   }
@@ -1415,7 +1517,7 @@ Demangler::demangleUnqualifiedSymbolName(StringView &MangledName,
     return demangleTemplateInstantiationName(MangledName, NBB);
   if (MangledName.startsWith('?'))
     return demangleFunctionIdentifierCode(MangledName);
-  return demangleSimpleName(MangledName, (NBB & NBB_Simple) != 0);
+  return demangleSimpleName(MangledName, /*Memorize=*/(NBB & NBB_Simple) != 0);
 }
 
 IdentifierNode *Demangler::demangleNameScopePiece(StringView &MangledName) {
@@ -1431,7 +1533,7 @@ IdentifierNode *Demangler::demangleNameScopePiece(StringView &MangledName) {
   if (startsWithLocalScopePattern(MangledName))
     return demangleLocallyScopedNamePiece(MangledName);
 
-  return demangleSimpleName(MangledName, true);
+  return demangleSimpleName(MangledName, /*Memorize=*/true);
 }
 
 static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head,
@@ -1489,11 +1591,11 @@ FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
   case 'C':
     return FuncClass(FC_Private | FC_Static);
   case 'D':
-    return FuncClass(FC_Private | FC_Static);
+    return FuncClass(FC_Private | FC_Static | FC_Far);
   case 'E':
     return FuncClass(FC_Private | FC_Virtual);
   case 'F':
-    return FuncClass(FC_Private | FC_Virtual);
+    return FuncClass(FC_Private | FC_Virtual | FC_Far);
   case 'G':
     return FuncClass(FC_Private | FC_StaticThisAdjust);
   case 'H':
@@ -1538,7 +1640,8 @@ FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
     FuncClass VFlag = FC_VirtualThisAdjust;
     if (MangledName.consumeFront('R'))
       VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx);
-
+    if (MangledName.empty())
+      break;
     switch (MangledName.popFront()) {
     case '0':
       return FuncClass(FC_Private | FC_Virtual | VFlag);
@@ -1561,6 +1664,11 @@ FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
 }
 
 CallingConv Demangler::demangleCallingConvention(StringView &MangledName) {
+  if (MangledName.empty()) {
+    Error = true;
+    return CallingConv::None;
+  }
+
   switch (MangledName.popFront()) {
   case 'A':
   case 'B':
@@ -1591,7 +1699,7 @@ CallingConv Demangler::demangleCallingConvention(StringView &MangledName) {
 }
 
 StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) {
-  assert(std::isdigit(MangledName.front()));
+  assert(MangledName.front() >= '0' && MangledName.front() <= '4');
 
   switch (MangledName.popFront()) {
   case '0':
@@ -1605,12 +1713,15 @@ StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) {
   case '4':
     return StorageClass::FunctionLocalStatic;
   }
-  Error = true;
-  return StorageClass::None;
+  DEMANGLE_UNREACHABLE;
 }
 
 std::pair<Qualifiers, bool>
 Demangler::demangleQualifiers(StringView &MangledName) {
+  if (MangledName.empty()) {
+    Error = true;
+    return std::make_pair(Q_None, false);
+  }
 
   switch (MangledName.popFront()) {
   // Member qualifiers
@@ -1649,6 +1760,11 @@ TypeNode *Demangler::demangleType(StringView &MangledName,
       std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
   }
 
+  if (MangledName.empty()) {
+    Error = true;
+    return nullptr;
+  }
+
   TypeNode *Ty = nullptr;
   if (isTagType(MangledName))
     Ty = demangleClassType(MangledName);
@@ -1710,7 +1826,7 @@ FunctionSignatureNode *Demangler::demangleFunctionType(StringView &MangledName,
   if (!IsStructor)
     FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result);
 
-  FTy->Params = demangleFunctionParameterList(MangledName);
+  FTy->Params = demangleFunctionParameterList(MangledName, FTy->IsVariadic);
 
   FTy->IsNoexcept = demangleThrowSpecification(MangledName);
 
@@ -1723,6 +1839,11 @@ Demangler::demangleFunctionEncoding(StringView &MangledName) {
   if (MangledName.consumeFront("$$J0"))
     ExtraFlags = FC_ExternC;
 
+  if (MangledName.empty()) {
+    Error = true;
+    return nullptr;
+  }
+
   FuncClass FC = demangleFunctionClass(MangledName);
   FC = FuncClass(ExtraFlags | FC);
 
@@ -1750,6 +1871,10 @@ Demangler::demangleFunctionEncoding(StringView &MangledName) {
     bool HasThisQuals = !(FC & (FC_Global | FC_Static));
     FSN = demangleFunctionType(MangledName, HasThisQuals);
   }
+
+  if (Error)
+    return nullptr;
+
   if (TTN) {
     *static_cast<FunctionSignatureNode *>(TTN) = *FSN;
     FSN = TTN;
@@ -1766,7 +1891,7 @@ CustomTypeNode *Demangler::demangleCustomType(StringView &MangledName) {
   MangledName.popFront();
 
   CustomTypeNode *CTN = Arena.alloc<CustomTypeNode>();
-  CTN->Identifier = demangleUnqualifiedTypeName(MangledName, true);
+  CTN->Identifier = demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true);
   if (!MangledName.consumeFront('@'))
     Error = true;
   if (Error)
@@ -1820,6 +1945,8 @@ PrimitiveTypeNode *Demangler::demanglePrimitiveType(StringView &MangledName) {
       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint64);
     case 'W':
       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Wchar);
+    case 'Q':
+      return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char8);
     case 'S':
       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16);
     case 'U':
@@ -1846,7 +1973,7 @@ TagTypeNode *Demangler::demangleClassType(StringView &MangledName) {
     TT = Arena.alloc<TagTypeNode>(TagKind::Class);
     break;
   case 'W':
-    if (MangledName.popFront() != '4') {
+    if (!MangledName.consumeFront('4')) {
       Error = true;
       return nullptr;
     }
@@ -1890,6 +2017,8 @@ PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) {
   Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
   Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
 
+  // isMemberPointer() only returns true if there is at least one character
+  // after the qualifiers.
   if (MangledName.consumeFront("8")) {
     Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
     Pointer->Pointee = demangleFunctionType(MangledName, true);
@@ -1897,11 +2026,12 @@ PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) {
     Qualifiers PointeeQuals = Q_None;
     bool IsMember = false;
     std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName);
-    assert(IsMember);
+    assert(IsMember || Error);
     Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
 
     Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop);
-    Pointer->Pointee->Quals = PointeeQuals;
+    if (Pointer->Pointee)
+      Pointer->Pointee->Quals = PointeeQuals;
   }
 
   return Pointer;
@@ -1938,7 +2068,7 @@ ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) {
   for (uint64_t I = 0; I < Rank; ++I) {
     uint64_t D = 0;
     std::tie(D, IsNegative) = demangleNumber(MangledName);
-    if (IsNegative) {
+    if (Error || IsNegative) {
       Error = true;
       return nullptr;
     }
@@ -1963,12 +2093,12 @@ ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) {
   return ATy;
 }
 
-// Reads a function or a template parameters.
-NodeArrayNode *
-Demangler::demangleFunctionParameterList(StringView &MangledName) {
+// Reads a function's parameters.
+NodeArrayNode *Demangler::demangleFunctionParameterList(StringView &MangledName,
+                                                        bool &IsVariadic) {
   // Empty parameter list.
   if (MangledName.consumeFront('X'))
-    return {};
+    return nullptr;
 
   NodeList *Head = Arena.alloc<NodeList>();
   NodeList **Current = &Head;
@@ -1981,7 +2111,7 @@ Demangler::demangleFunctionParameterList(StringView &MangledName) {
       size_t N = MangledName[0] - '0';
       if (N >= Backrefs.FunctionParamCount) {
         Error = true;
-        return {};
+        return nullptr;
       }
       MangledName = MangledName.dropFront();
 
@@ -2012,7 +2142,7 @@ Demangler::demangleFunctionParameterList(StringView &MangledName) {
   }
 
   if (Error)
-    return {};
+    return nullptr;
 
   NodeArrayNode *NA = nodeListToNodeArray(Arena, Head, Count);
   // A non-empty parameter list is terminated by either 'Z' (variadic) parameter
@@ -2022,13 +2152,11 @@ Demangler::demangleFunctionParameterList(StringView &MangledName) {
     return NA;
 
   if (MangledName.consumeFront('Z')) {
-    // This is a variadic parameter list.  We probably need a variadic node to
-    // append to the end.
+    IsVariadic = true;
     return NA;
   }
 
-  Error = true;
-  return {};
+  DEMANGLE_UNREACHABLE;
 }
 
 NodeArrayNode *
@@ -2037,7 +2165,7 @@ Demangler::demangleTemplateParameterList(StringView &MangledName) {
   NodeList **Current = &Head;
   size_t Count = 0;
 
-  while (!Error && !MangledName.startsWith('@')) {
+  while (!MangledName.startsWith('@')) {
     if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") ||
         MangledName.consumeFront("$$$V") || MangledName.consumeFront("$$Z")) {
       // parameter pack separator
@@ -2070,12 +2198,16 @@ Demangler::demangleTemplateParameterList(StringView &MangledName) {
       MangledName = MangledName.dropFront();
       // 1 - single inheritance       <name>
       // H - multiple inheritance     <name> <number>
-      // I - virtual inheritance      <name> <number> <number> <number>
+      // I - virtual inheritance      <name> <number> <number>
       // J - unspecified inheritance  <name> <number> <number> <number>
       char InheritanceSpecifier = MangledName.popFront();
       SymbolNode *S = nullptr;
       if (MangledName.startsWith('?')) {
         S = parse(MangledName);
+        if (Error || !S->Name) {
+          Error = true;
+          return nullptr;
+        }
         memorizeIdentifier(S->Name->getUnqualifiedIdentifier());
       }
 
@@ -2083,20 +2215,19 @@ Demangler::demangleTemplateParameterList(StringView &MangledName) {
       case 'J':
         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
             demangleSigned(MangledName);
-        LLVM_FALLTHROUGH;
+        DEMANGLE_FALLTHROUGH;
       case 'I':
         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
             demangleSigned(MangledName);
-        LLVM_FALLTHROUGH;
+        DEMANGLE_FALLTHROUGH;
       case 'H':
         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
             demangleSigned(MangledName);
-        LLVM_FALLTHROUGH;
+        DEMANGLE_FALLTHROUGH;
       case '1':
         break;
       default:
-        Error = true;
-        break;
+        DEMANGLE_UNREACHABLE;
       }
       TPRN->Affinity = PointerAffinity::Pointer;
       TPRN->Symbol = S;
@@ -2117,18 +2248,15 @@ Demangler::demangleTemplateParameterList(StringView &MangledName) {
       case 'G':
         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
             demangleSigned(MangledName);
-        LLVM_FALLTHROUGH;
+        DEMANGLE_FALLTHROUGH;
       case 'F':
         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
             demangleSigned(MangledName);
         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
             demangleSigned(MangledName);
-        LLVM_FALLTHROUGH;
-      case '0':
         break;
       default:
-        Error = true;
-        break;
+        DEMANGLE_UNREACHABLE;
       }
       TPRN->IsMemberPointer = true;
 
@@ -2148,15 +2276,14 @@ Demangler::demangleTemplateParameterList(StringView &MangledName) {
     Current = &TP.Next;
   }
 
-  if (Error)
-    return nullptr;
+  // The loop above returns nullptr on Error.
+  assert(!Error);
 
   // Template parameter lists cannot be variadic, so it can only be terminated
-  // by @.
-  if (MangledName.consumeFront('@'))
-    return nodeListToNodeArray(Arena, Head, Count);
-  Error = true;
-  return nullptr;
+  // by @ (as opposed to 'Z' in the function parameter case).
+  assert(MangledName.startsWith('@')); // The above loop exits only on '@'.
+  MangledName.consumeFront('@');
+  return nodeListToNodeArray(Arena, Head, Count);
 }
 
 void Demangler::dumpBackReferences() {
diff --git a/lib/Demangle/MicrosoftDemangleNodes.cpp b/lib/Demangle/MicrosoftDemangleNodes.cpp
index 622f8e75e351..63ca475ec1fe 100644
--- a/lib/Demangle/MicrosoftDemangleNodes.cpp
+++ b/lib/Demangle/MicrosoftDemangleNodes.cpp
@@ -1,9 +1,8 @@
 //===- MicrosoftDemangle.cpp ----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,7 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Demangle/MicrosoftDemangleNodes.h"
-#include "llvm/Demangle/Compiler.h"
+#include "llvm/Demangle/DemangleConfig.h"
 #include "llvm/Demangle/Utility.h"
 #include <cctype>
 #include <string>
@@ -35,21 +34,20 @@ static void outputSpaceIfNecessary(OutputStream &OS) {
     OS << " ";
 }
 
-static bool outputSingleQualifier(OutputStream &OS, Qualifiers Q) {
+static void outputSingleQualifier(OutputStream &OS, Qualifiers Q) {
   switch (Q) {
   case Q_Const:
     OS << "const";
-    return true;
+    break;
   case Q_Volatile:
     OS << "volatile";
-    return true;
+    break;
   case Q_Restrict:
     OS << "__restrict";
-    return true;
+    break;
   default:
     break;
   }
-  return false;
 }
 
 static bool outputQualifierIfPresent(OutputStream &OS, Qualifiers Q,
@@ -131,6 +129,7 @@ void PrimitiveTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
     OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Char, "char");
     OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Schar, "signed char");
     OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Uchar, "unsigned char");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Char8, "char8_t");
     OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Char16, "char16_t");
     OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Char32, "char32_t");
     OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Short, "short");
@@ -338,8 +337,9 @@ void IntrinsicFunctionIdentifierNode::output(OutputStream &OS,
                             "`vector vbase copy constructor iterator'");
     OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, ManVectorVbaseCopyCtorIter,
                             "`managed vector vbase copy constructor iterator'");
-    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, CoAwait, "co_await");
-    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Spaceship, "operator <=>");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, CoAwait,
+                            "operator co_await");
+    OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Spaceship, "operator<=>");
   case IntrinsicFunctionKind::MaxIntrinsic:
   case IntrinsicFunctionKind::None:
     break;
@@ -349,7 +349,10 @@ void IntrinsicFunctionIdentifierNode::output(OutputStream &OS,
 
 void LocalStaticGuardIdentifierNode::output(OutputStream &OS,
                                             OutputFlags Flags) const {
-  OS << "`local static guard'";
+  if (IsThread)
+    OS << "`local static thread guard'";
+  else
+    OS << "`local static guard'";
   if (ScopeIndex > 0)
     OS << "{" << ScopeIndex << "}";
 }
@@ -411,6 +414,12 @@ void FunctionSignatureNode::outputPost(OutputStream &OS,
       Params->output(OS, Flags);
     else
       OS << "void";
+
+    if (IsVariadic) {
+      if (OS.back() != '(')
+        OS << ", ";
+      OS << "...";
+    }
     OS << ")";
   }
 
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index ae96c7f5955f..1c6c0406d048 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -1,9 +1,8 @@
 //===-- ExecutionEngine.cpp - Common Implementation shared by EEs ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1020,32 +1019,6 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
   return Result;
 }
 
-/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
-/// with the integer held in IntVal.
-static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
-                             unsigned StoreBytes) {
-  assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
-  const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
-
-  if (sys::IsLittleEndianHost) {
-    // Little-endian host - the source is ordered from LSB to MSB.  Order the
-    // destination from LSB to MSB: Do a straight copy.
-    memcpy(Dst, Src, StoreBytes);
-  } else {
-    // Big-endian host - the source is an array of 64 bit words ordered from
-    // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
-    // from MSB to LSB: Reverse the word order, but not the bytes in a word.
-    while (StoreBytes > sizeof(uint64_t)) {
-      StoreBytes -= sizeof(uint64_t);
-      // May not be aligned so use memcpy.
-      memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
-      Src += sizeof(uint64_t);
-    }
-
-    memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
-  }
-}
-
 void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
                                          GenericValue *Ptr, Type *Ty) {
   const unsigned StoreBytes = getDataLayout().getTypeStoreSize(Ty);
@@ -1093,33 +1066,6 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
     std::reverse((uint8_t*)Ptr, StoreBytes + (uint8_t*)Ptr);
 }
 
-/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
-/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
-static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
-  assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
-  uint8_t *Dst = reinterpret_cast<uint8_t *>(
-                   const_cast<uint64_t *>(IntVal.getRawData()));
-
-  if (sys::IsLittleEndianHost)
-    // Little-endian host - the destination must be ordered from LSB to MSB.
-    // The source is ordered from LSB to MSB: Do a straight copy.
-    memcpy(Dst, Src, LoadBytes);
-  else {
-    // Big-endian - the destination is an array of 64 bit words ordered from
-    // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
-    // ordered from MSB to LSB: Reverse the word order, but not the bytes in
-    // a word.
-    while (LoadBytes > sizeof(uint64_t)) {
-      LoadBytes -= sizeof(uint64_t);
-      // May not be aligned so use memcpy.
-      memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
-      Dst += sizeof(uint64_t);
-    }
-
-    memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
-  }
-}
-
 /// FIXME: document
 ///
 void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index 3be4bec566a0..c741fe2b3778 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -1,9 +1,8 @@
 //===-- ExecutionEngineBindings.cpp - C bindings for EEs ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/ExecutionEngine/GDBRegistrationListener.cpp b/lib/ExecutionEngine/GDBRegistrationListener.cpp
index 8204f5a90268..08d20156a590 100644
--- a/lib/ExecutionEngine/GDBRegistrationListener.cpp
+++ b/lib/ExecutionEngine/GDBRegistrationListener.cpp
@@ -1,9 +1,8 @@
 //===----- GDBRegistrationListener.cpp - Registers objects with GDB -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
index e9051c198506..1ebc820a8b49 100644
--- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -1,9 +1,8 @@
 //===-- IntelJITEventListener.cpp - Tell Intel profiler about JITed code --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -142,13 +141,25 @@ void IntelJITEventListener::notifyObjectLoaded(
     uint64_t Addr = *AddrOrErr;
     uint64_t Size = P.second;
 
+    auto SecOrErr = Sym.getSection();
+    if (!SecOrErr) {
+      // TODO: Actually report errors helpfully.
+      consumeError(SecOrErr.takeError());
+      continue;
+    }
+    object::section_iterator Sec = *SecOrErr;
+    if (Sec == Obj.section_end())
+      continue;
+    uint64_t Index = Sec->getIndex();
+
     // Record this address in a local vector
     Functions.push_back((void*)Addr);
 
     // Build the function loaded notification message
     iJIT_Method_Load FunctionMessage =
       FunctionDescToIntelJITFormat(*Wrapper, Name->data(), Addr, Size);
-    DILineInfoTable Lines = Context->getLineInfoForAddressRange(Addr, Size);
+    DILineInfoTable Lines =
+      Context->getLineInfoForAddressRange({Addr, Index}, Size);
     DILineInfoTable::iterator Begin = Lines.begin();
     DILineInfoTable::iterator End = Lines.end();
     for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
index 777d0f179cb5..68699c6a2200 100644
--- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
@@ -1,9 +1,8 @@
 //===-- IntelJITEventsWrapper.h - Intel JIT Events API Wrapper --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h b/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h
index 61d8cc75d9f2..16ce672150cc 100644
--- a/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h
+++ b/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h
@@ -1,9 +1,8 @@
 /*===-- ittnotify_config.h - JIT Profiling API internal config-----*- C -*-===*
  *
- *                     The LLVM Compiler Infrastructure
- *
- * This file is distributed under the University of Illinois Open Source
- * License. See LICENSE.TXT for details.
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  *
  *===----------------------------------------------------------------------===*
  *
diff --git a/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h b/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h
index 5df752f66f10..15008fe93e60 100644
--- a/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h
+++ b/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h
@@ -1,9 +1,8 @@
 /*===-- ittnotify_types.h - JIT Profiling API internal types--------*- C -*-===*
  *
- *                     The LLVM Compiler Infrastructure
- *
- * This file is distributed under the University of Illinois Open Source
- * License. See LICENSE.TXT for details.
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  *
  *===----------------------------------------------------------------------===*
  *
diff --git a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c
index bc8fea148749..074e0735628a 100644
--- a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c
+++ b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c
@@ -1,9 +1,8 @@
 /*===-- jitprofiling.c - JIT (Just-In-Time) Profiling API----------*- C -*-===*
  *
- *                     The LLVM Compiler Infrastructure
- *
- * This file is distributed under the University of Illinois Open Source
- * License. See LICENSE.TXT for details.
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  *
  *===----------------------------------------------------------------------===*
  *
diff --git a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h
index efd2b1a33f75..ba627b430ff1 100644
--- a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h
+++ b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h
@@ -1,9 +1,8 @@
 /*===-- jitprofiling.h - JIT Profiling API-------------------------*- C -*-===*
  *
- *                     The LLVM Compiler Infrastructure
- *
- * This file is distributed under the University of Illinois Open Source
- * License. See LICENSE.TXT for details.
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  *
  *===----------------------------------------------------------------------===*
  *
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 98dca1102759..51f31d3d5d8f 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -1,9 +1,8 @@
 //===-- Execution.cpp - Implement code to simulate the program ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -43,6 +42,60 @@ static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF) {
   SF.Values[V] = Val;
 }
 
+//===----------------------------------------------------------------------===//
+//                    Unary Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+static void executeFNegInst(GenericValue &Dest, GenericValue Src, Type *Ty) {
+  switch (Ty->getTypeID()) {
+  case Type::FloatTyID:
+    Dest.FloatVal = -Src.FloatVal;
+    break;
+  case Type::DoubleTyID:
+    Dest.DoubleVal = -Src.DoubleVal;
+    break;
+  default:
+    llvm_unreachable("Unhandled type for FNeg instruction");
+  }
+}
+
+void Interpreter::visitUnaryOperator(UnaryOperator &I) {
+  ExecutionContext &SF = ECStack.back();
+  Type *Ty = I.getOperand(0)->getType();
+  GenericValue Src = getOperandValue(I.getOperand(0), SF);
+  GenericValue R; // Result
+
+  // First process vector operation
+  if (Ty->isVectorTy()) {
+    R.AggregateVal.resize(Src.AggregateVal.size());
+
+    switch(I.getOpcode()) {
+    default:
+      llvm_unreachable("Don't know how to handle this unary operator");
+      break;
+    case Instruction::FNeg:
+      if (cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
+        for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
+          R.AggregateVal[i].FloatVal = -Src.AggregateVal[i].FloatVal;
+      } else if (cast<VectorType>(Ty)->getElementType()->isDoubleTy()) {
+        for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
+          R.AggregateVal[i].DoubleVal = -Src.AggregateVal[i].DoubleVal;
+      } else {
+        llvm_unreachable("Unhandled type for FNeg instruction");
+      }
+      break;
+    }
+  } else {
+    switch (I.getOpcode()) {
+    default:
+      llvm_unreachable("Don't know how to handle this unary operator");
+      break;
+    case Instruction::FNeg: executeFNegInst(R, Src, Ty); break;
+    }
+  }
+  SetValue(&I, R, SF);
+}
+
 //===----------------------------------------------------------------------===//
 //                    Binary Instruction Implementations
 //===----------------------------------------------------------------------===//
@@ -2113,7 +2166,7 @@ void Interpreter::run() {
     // Track the number of dynamic instructions executed.
     ++NumDynamicInsts;
 
-    LLVM_DEBUG(dbgs() << "About to interpret: " << I);
+    LLVM_DEBUG(dbgs() << "About to interpret: " << I << "\n");
     visit(I);   // Dispatch to one of the visit* methods...
   }
 }
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 334fcacf8078..c3a2ccc582c9 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -1,9 +1,8 @@
 //===-- ExternalFunctions.cpp - Implement External Functions --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
index 9818adfff82e..5727f7adb49c 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.cpp
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
@@ -1,9 +1,8 @@
 //===- Interpreter.cpp - Top-Level LLVM Interpreter Implementation --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index 33542e7e43ad..e72d778317d6 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -1,9 +1,8 @@
 //===-- Interpreter.h ------------------------------------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -125,6 +124,7 @@ public:
   void visitSwitchInst(SwitchInst &I);
   void visitIndirectBrInst(IndirectBrInst &I);
 
+  void visitUnaryOperator(UnaryOperator &I);
   void visitBinaryOperator(BinaryOperator &I);
   void visitICmpInst(ICmpInst &I);
   void visitFCmpInst(FCmpInst &I);
diff --git a/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h b/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h
new file mode 100644
index 000000000000..1271ad962b38
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h
@@ -0,0 +1,82 @@
+//===--- BasicGOTAndStubsBuilder.h - Generic GOT/Stub creation --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A base for simple GOT and stub creation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_JITLINK_BASICGOTANDSTUBSBUILDER_H
+#define LLVM_LIB_EXECUTIONENGINE_JITLINK_BASICGOTANDSTUBSBUILDER_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+namespace llvm {
+namespace jitlink {
+
+template <typename BuilderImpl> class BasicGOTAndStubsBuilder {
+public:
+  BasicGOTAndStubsBuilder(AtomGraph &G) : G(G) {}
+
+  void run() {
+    // We're going to be adding new atoms, but we don't want to iterate over
+    // the newly added ones, so just copy the existing atoms out.
+    std::vector<DefinedAtom *> DAs(G.defined_atoms().begin(),
+                                   G.defined_atoms().end());
+
+    for (auto *DA : DAs)
+      for (auto &E : DA->edges())
+        if (impl().isGOTEdge(E))
+          impl().fixGOTEdge(E, getGOTEntryAtom(E.getTarget()));
+        else if (impl().isExternalBranchEdge(E))
+          impl().fixExternalBranchEdge(E, getStubAtom(E.getTarget()));
+  }
+
+protected:
+  Atom &getGOTEntryAtom(Atom &Target) {
+    assert(Target.hasName() && "GOT edge cannot point to anonymous target");
+
+    auto GOTEntryI = GOTEntries.find(Target.getName());
+
+    // Build the entry if it doesn't exist.
+    if (GOTEntryI == GOTEntries.end()) {
+      auto &GOTEntry = impl().createGOTEntry(Target);
+      GOTEntryI =
+          GOTEntries.insert(std::make_pair(Target.getName(), &GOTEntry)).first;
+    }
+
+    assert(GOTEntryI != GOTEntries.end() && "Could not get GOT entry atom");
+    return *GOTEntryI->second;
+  }
+
+  Atom &getStubAtom(Atom &Target) {
+    assert(Target.hasName() &&
+           "External branch edge can not point to an anonymous target");
+    auto StubI = Stubs.find(Target.getName());
+
+    if (StubI == Stubs.end()) {
+      auto &StubAtom = impl().createStub(Target);
+      StubI = Stubs.insert(std::make_pair(Target.getName(), &StubAtom)).first;
+    }
+
+    assert(StubI != Stubs.end() && "Count not get stub atom");
+    return *StubI->second;
+  }
+
+  AtomGraph &G;
+
+private:
+  BuilderImpl &impl() { return static_cast<BuilderImpl &>(*this); }
+
+  DenseMap<StringRef, DefinedAtom *> GOTEntries;
+  DenseMap<StringRef, DefinedAtom *> Stubs;
+};
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_LIB_EXECUTIONENGINE_JITLINK_BASICGOTANDSTUBSBUILDER_H
diff --git a/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp b/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
new file mode 100644
index 000000000000..25f0e9040ffe
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
@@ -0,0 +1,544 @@
+//===-------- JITLink_EHFrameSupport.cpp - JITLink eh-frame utils ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EHFrameSupportImpl.h"
+
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/Support/DynamicLibrary.h"
+
+#define DEBUG_TYPE "jitlink"
+
+namespace llvm {
+namespace jitlink {
+
+EHFrameParser::EHFrameParser(AtomGraph &G, Section &EHFrameSection,
+                             StringRef EHFrameContent,
+                             JITTargetAddress EHFrameAddress,
+                             Edge::Kind FDEToCIERelocKind,
+                             Edge::Kind FDEToTargetRelocKind)
+    : G(G), EHFrameSection(EHFrameSection), EHFrameContent(EHFrameContent),
+      EHFrameAddress(EHFrameAddress),
+      EHFrameReader(EHFrameContent, G.getEndianness()),
+      FDEToCIERelocKind(FDEToCIERelocKind),
+      FDEToTargetRelocKind(FDEToTargetRelocKind) {}
+
+Error EHFrameParser::atomize() {
+  while (!EHFrameReader.empty()) {
+    size_t RecordOffset = EHFrameReader.getOffset();
+
+    LLVM_DEBUG({
+      dbgs() << "Processing eh-frame record at "
+             << format("0x%016" PRIx64, EHFrameAddress + RecordOffset)
+             << " (offset " << RecordOffset << ")\n";
+    });
+
+    size_t CIELength = 0;
+    uint32_t CIELengthField;
+    if (auto Err = EHFrameReader.readInteger(CIELengthField))
+      return Err;
+
+    // Process CIE length/extended-length fields to build the atom.
+    //
+    // The value of these fields describe the length of the *rest* of the CIE
+    // (not including data up to the end of the field itself) so we have to
+    // bump CIELength to include the data up to the end of the field: 4 bytes
+    // for Length, or 12 bytes (4 bytes + 8 bytes) for ExtendedLength.
+    if (CIELengthField == 0) // Length 0 means end of __eh_frame section.
+      break;
+
+    // If the regular length field's value is 0xffffffff, use extended length.
+    if (CIELengthField == 0xffffffff) {
+      uint64_t CIEExtendedLengthField;
+      if (auto Err = EHFrameReader.readInteger(CIEExtendedLengthField))
+        return Err;
+      if (CIEExtendedLengthField > EHFrameReader.bytesRemaining())
+        return make_error<JITLinkError>("CIE record extends past the end of "
+                                        "the __eh_frame section");
+      if (CIEExtendedLengthField + 12 > std::numeric_limits<size_t>::max())
+        return make_error<JITLinkError>("CIE record too large to process");
+      CIELength = CIEExtendedLengthField + 12;
+    } else {
+      if (CIELengthField > EHFrameReader.bytesRemaining())
+        return make_error<JITLinkError>("CIE record extends past the end of "
+                                        "the __eh_frame section");
+      CIELength = CIELengthField + 4;
+    }
+
+    LLVM_DEBUG(dbgs() << "  length: " << CIELength << "\n");
+
+    // Add an atom for this record.
+    CurRecordAtom = &G.addAnonymousAtom(
+        EHFrameSection, EHFrameAddress + RecordOffset, G.getPointerSize());
+    CurRecordAtom->setContent(EHFrameContent.substr(RecordOffset, CIELength));
+
+    // Read the CIE Pointer.
+    size_t CIEPointerAddress = EHFrameAddress + EHFrameReader.getOffset();
+    uint32_t CIEPointer;
+    if (auto Err = EHFrameReader.readInteger(CIEPointer))
+      return Err;
+
+    // Based on the CIE pointer value, parse this as a CIE or FDE record.
+    if (CIEPointer == 0) {
+      if (auto Err = processCIE())
+        return Err;
+    } else {
+      if (auto Err = processFDE(CIEPointerAddress, CIEPointer))
+        return Err;
+    }
+
+    EHFrameReader.setOffset(RecordOffset + CIELength);
+  }
+
+  return Error::success();
+}
+
+Expected<EHFrameParser::AugmentationInfo>
+EHFrameParser::parseAugmentationString() {
+  AugmentationInfo AugInfo;
+  uint8_t NextChar;
+  uint8_t *NextField = &AugInfo.Fields[0];
+
+  if (auto Err = EHFrameReader.readInteger(NextChar))
+    return std::move(Err);
+
+  while (NextChar != 0) {
+    switch (NextChar) {
+    case 'z':
+      AugInfo.AugmentationDataPresent = true;
+      break;
+    case 'e':
+      if (auto Err = EHFrameReader.readInteger(NextChar))
+        return std::move(Err);
+      if (NextChar != 'h')
+        return make_error<JITLinkError>("Unrecognized substring e" +
+                                        Twine(NextChar) +
+                                        " in augmentation string");
+      AugInfo.EHDataFieldPresent = true;
+      break;
+    case 'L':
+    case 'P':
+    case 'R':
+      *NextField++ = NextChar;
+      break;
+    default:
+      return make_error<JITLinkError>("Unrecognized character " +
+                                      Twine(NextChar) +
+                                      " in augmentation string");
+    }
+
+    if (auto Err = EHFrameReader.readInteger(NextChar))
+      return std::move(Err);
+  }
+
+  return std::move(AugInfo);
+}
+
+Expected<JITTargetAddress> EHFrameParser::readAbsolutePointer() {
+  static_assert(sizeof(JITTargetAddress) == sizeof(uint64_t),
+                "Result must be able to hold a uint64_t");
+  JITTargetAddress Addr;
+  if (G.getPointerSize() == 8) {
+    if (auto Err = EHFrameReader.readInteger(Addr))
+      return std::move(Err);
+  } else if (G.getPointerSize() == 4) {
+    uint32_t Addr32;
+    if (auto Err = EHFrameReader.readInteger(Addr32))
+      return std::move(Err);
+    Addr = Addr32;
+  } else
+    llvm_unreachable("Pointer size is not 32-bit or 64-bit");
+  return Addr;
+}
+
+Error EHFrameParser::processCIE() {
+  // Use the dwarf namespace for convenient access to pointer encoding
+  // constants.
+  using namespace dwarf;
+
+  LLVM_DEBUG(dbgs() << "  Record is CIE\n");
+
+  CIEInformation CIEInfo(*CurRecordAtom);
+
+  uint8_t Version = 0;
+  if (auto Err = EHFrameReader.readInteger(Version))
+    return Err;
+
+  if (Version != 0x01)
+    return make_error<JITLinkError>("Bad CIE version " + Twine(Version) +
+                                    " (should be 0x01) in eh-frame");
+
+  auto AugInfo = parseAugmentationString();
+  if (!AugInfo)
+    return AugInfo.takeError();
+
+  // Skip the EH Data field if present.
+  if (AugInfo->EHDataFieldPresent)
+    if (auto Err = EHFrameReader.skip(G.getPointerSize()))
+      return Err;
+
+  // Read and sanity check the code alignment factor.
+  {
+    uint64_t CodeAlignmentFactor = 0;
+    if (auto Err = EHFrameReader.readULEB128(CodeAlignmentFactor))
+      return Err;
+    if (CodeAlignmentFactor != 1)
+      return make_error<JITLinkError>("Unsupported CIE code alignment factor " +
+                                      Twine(CodeAlignmentFactor) +
+                                      " (expected 1)");
+  }
+
+  // Read and sanity check the data alignment factor.
+  {
+    int64_t DataAlignmentFactor = 0;
+    if (auto Err = EHFrameReader.readSLEB128(DataAlignmentFactor))
+      return Err;
+    if (DataAlignmentFactor != -8)
+      return make_error<JITLinkError>("Unsupported CIE data alignment factor " +
+                                      Twine(DataAlignmentFactor) +
+                                      " (expected -8)");
+  }
+
+  // Skip the return address register field.
+  if (auto Err = EHFrameReader.skip(1))
+    return Err;
+
+  uint64_t AugmentationDataLength = 0;
+  if (auto Err = EHFrameReader.readULEB128(AugmentationDataLength))
+    return Err;
+
+  uint32_t AugmentationDataStartOffset = EHFrameReader.getOffset();
+
+  uint8_t *NextField = &AugInfo->Fields[0];
+  while (uint8_t Field = *NextField++) {
+    switch (Field) {
+    case 'L': {
+      CIEInfo.FDEsHaveLSDAField = true;
+      uint8_t LSDAPointerEncoding;
+      if (auto Err = EHFrameReader.readInteger(LSDAPointerEncoding))
+        return Err;
+      if (LSDAPointerEncoding != (DW_EH_PE_pcrel | DW_EH_PE_absptr))
+        return make_error<JITLinkError>(
+            "Unsupported LSDA pointer encoding " +
+            formatv("{0:x2}", LSDAPointerEncoding) + " in CIE at " +
+            formatv("{0:x16}", CurRecordAtom->getAddress()));
+      break;
+    }
+    case 'P': {
+      uint8_t PersonalityPointerEncoding = 0;
+      if (auto Err = EHFrameReader.readInteger(PersonalityPointerEncoding))
+        return Err;
+      if (PersonalityPointerEncoding !=
+          (DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4))
+        return make_error<JITLinkError>(
+            "Unspported personality pointer "
+            "encoding " +
+            formatv("{0:x2}", PersonalityPointerEncoding) + " in CIE at " +
+            formatv("{0:x16}", CurRecordAtom->getAddress()));
+      uint32_t PersonalityPointerAddress;
+      if (auto Err = EHFrameReader.readInteger(PersonalityPointerAddress))
+        return Err;
+      break;
+    }
+    case 'R': {
+      uint8_t FDEPointerEncoding;
+      if (auto Err = EHFrameReader.readInteger(FDEPointerEncoding))
+        return Err;
+      if (FDEPointerEncoding != (DW_EH_PE_pcrel | DW_EH_PE_absptr))
+        return make_error<JITLinkError>(
+            "Unsupported FDE address pointer "
+            "encoding " +
+            formatv("{0:x2}", FDEPointerEncoding) + " in CIE at " +
+            formatv("{0:x16}", CurRecordAtom->getAddress()));
+      break;
+    }
+    default:
+      llvm_unreachable("Invalid augmentation string field");
+    }
+  }
+
+  if (EHFrameReader.getOffset() - AugmentationDataStartOffset >
+      AugmentationDataLength)
+    return make_error<JITLinkError>("Read past the end of the augmentation "
+                                    "data while parsing fields");
+
+  assert(!CIEInfos.count(CurRecordAtom->getAddress()) &&
+         "Multiple CIEs recorded at the same address?");
+  CIEInfos[CurRecordAtom->getAddress()] = std::move(CIEInfo);
+
+  return Error::success();
+}
+
+Error EHFrameParser::processFDE(JITTargetAddress CIEPointerAddress,
+                                uint32_t CIEPointer) {
+  LLVM_DEBUG(dbgs() << "  Record is FDE\n");
+
+  LLVM_DEBUG({
+    dbgs() << "  CIE pointer: "
+           << format("0x%016" PRIx64, CIEPointerAddress - CIEPointer) << "\n";
+  });
+
+  auto CIEInfoItr = CIEInfos.find(CIEPointerAddress - CIEPointer);
+  if (CIEInfoItr == CIEInfos.end())
+    return make_error<JITLinkError>(
+        "FDE at " + formatv("{0:x16}", CurRecordAtom->getAddress()) +
+        " points to non-existant CIE at " +
+        formatv("{0:x16}", CIEPointerAddress - CIEPointer));
+  auto &CIEInfo = CIEInfoItr->second;
+
+  // The CIEPointer looks good. Add a relocation.
+  CurRecordAtom->addEdge(FDEToCIERelocKind,
+                         CIEPointerAddress - CurRecordAtom->getAddress(),
+                         *CIEInfo.CIEAtom, 0);
+
+  // Read and sanity check the PC-start pointer and size.
+  JITTargetAddress PCBeginAddress = EHFrameAddress + EHFrameReader.getOffset();
+
+  auto PCBeginDelta = readAbsolutePointer();
+  if (!PCBeginDelta)
+    return PCBeginDelta.takeError();
+
+  JITTargetAddress PCBegin = PCBeginAddress + *PCBeginDelta;
+  LLVM_DEBUG({
+    dbgs() << "  PC begin: " << format("0x%016" PRIx64, PCBegin) << "\n";
+  });
+
+  auto *TargetAtom = G.getAtomByAddress(PCBegin);
+
+  if (!TargetAtom)
+    return make_error<JITLinkError>("FDE PC-begin " +
+                                    formatv("{0:x16}", PCBegin) +
+                                    " does not point at atom");
+
+  if (TargetAtom->getAddress() != PCBegin)
+    return make_error<JITLinkError>(
+        "FDE PC-begin " + formatv("{0:x16}", PCBegin) +
+        " does not point to start of atom at " +
+        formatv("{0:x16}", TargetAtom->getAddress()));
+
+  LLVM_DEBUG(dbgs() << "  FDE target: " << *TargetAtom << "\n");
+
+  // The PC-start pointer and size look good. Add relocations.
+  CurRecordAtom->addEdge(FDEToTargetRelocKind,
+                         PCBeginAddress - CurRecordAtom->getAddress(),
+                         *TargetAtom, 0);
+
+  // Add a keep-alive relocation from the function to the FDE to ensure it is
+  // not dead stripped.
+  TargetAtom->addEdge(Edge::KeepAlive, 0, *CurRecordAtom, 0);
+
+  // Skip over the PC range size field.
+  if (auto Err = EHFrameReader.skip(G.getPointerSize()))
+    return Err;
+
+  if (CIEInfo.FDEsHaveLSDAField) {
+    uint64_t AugmentationDataSize;
+    if (auto Err = EHFrameReader.readULEB128(AugmentationDataSize))
+      return Err;
+    if (AugmentationDataSize != G.getPointerSize())
+      return make_error<JITLinkError>(
+          "Unexpected FDE augmentation data size (expected " +
+          Twine(G.getPointerSize()) + ", got " + Twine(AugmentationDataSize) +
+          ") for FDE at " + formatv("{0:x16}", CurRecordAtom->getAddress()));
+    JITTargetAddress LSDAAddress = EHFrameAddress + EHFrameReader.getOffset();
+    auto LSDADelta = readAbsolutePointer();
+    if (!LSDADelta)
+      return LSDADelta.takeError();
+
+    JITTargetAddress LSDA = LSDAAddress + *LSDADelta;
+
+    auto *LSDAAtom = G.getAtomByAddress(LSDA);
+
+    if (!LSDAAtom)
+      return make_error<JITLinkError>("FDE LSDA " + formatv("{0:x16}", LSDA) +
+                                      " does not point at atom");
+
+    if (LSDAAtom->getAddress() != LSDA)
+      return make_error<JITLinkError>(
+          "FDE LSDA " + formatv("{0:x16}", LSDA) +
+          " does not point to start of atom at " +
+          formatv("{0:x16}", LSDAAtom->getAddress()));
+
+    LLVM_DEBUG(dbgs() << "  FDE LSDA: " << *LSDAAtom << "\n");
+
+    // LSDA looks good. Add relocations.
+    CurRecordAtom->addEdge(FDEToTargetRelocKind,
+                           LSDAAddress - CurRecordAtom->getAddress(), *LSDAAtom,
+                           0);
+  }
+
+  return Error::success();
+}
+
+Error addEHFrame(AtomGraph &G, Section &EHFrameSection,
+                 StringRef EHFrameContent, JITTargetAddress EHFrameAddress,
+                 Edge::Kind FDEToCIERelocKind,
+                 Edge::Kind FDEToTargetRelocKind) {
+  return EHFrameParser(G, EHFrameSection, EHFrameContent, EHFrameAddress,
+                       FDEToCIERelocKind, FDEToTargetRelocKind)
+      .atomize();
+}
+
+// Determine whether we can register EH tables.
+#if (defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__ia64__) &&      \
+     !(defined(_AIX) && defined(__ibmxl__)) && !defined(__SEH__) &&            \
+     !defined(__USING_SJLJ_EXCEPTIONS__))
+#define HAVE_EHTABLE_SUPPORT 1
+#else
+#define HAVE_EHTABLE_SUPPORT 0
+#endif
+
+#if HAVE_EHTABLE_SUPPORT
+extern "C" void __register_frame(const void *);
+extern "C" void __deregister_frame(const void *);
+
+Error registerFrameWrapper(const void *P) {
+  __register_frame(P);
+  return Error::success();
+}
+
+Error deregisterFrameWrapper(const void *P) {
+  __deregister_frame(P);
+  return Error::success();
+}
+
+#else
+
+// The building compiler does not have __(de)register_frame but
+// it may be found at runtime in a dynamically-loaded library.
+// For example, this happens when building LLVM with Visual C++
+// but using the MingW runtime.
+static Error registerFrameWrapper(const void *P) {
+  static void((*RegisterFrame)(const void *)) = 0;
+
+  if (!RegisterFrame)
+    *(void **)&RegisterFrame =
+        llvm::sys::DynamicLibrary::SearchForAddressOfSymbol("__register_frame");
+
+  if (RegisterFrame) {
+    RegisterFrame(P);
+    return Error::success();
+  }
+
+  return make_error<JITLinkError>("could not register eh-frame: "
+                                  "__register_frame function not found");
+}
+
+static Error deregisterFrameWrapper(const void *P) {
+  static void((*DeregisterFrame)(const void *)) = 0;
+
+  if (!DeregisterFrame)
+    *(void **)&DeregisterFrame =
+        llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(
+            "__deregister_frame");
+
+  if (DeregisterFrame) {
+    DeregisterFrame(P);
+    return Error::success();
+  }
+
+  return make_error<JITLinkError>("could not deregister eh-frame: "
+                                  "__deregister_frame function not found");
+}
+#endif
+
+#ifdef __APPLE__
+
+template <typename HandleFDEFn>
+Error walkAppleEHFrameSection(const char *const SectionStart,
+                              HandleFDEFn HandleFDE) {
+  const char *CurCFIRecord = SectionStart;
+  uint64_t Size = *reinterpret_cast<const uint32_t *>(CurCFIRecord);
+
+  while (Size != 0) {
+    const char *OffsetField = CurCFIRecord + (Size == 0xffffffff ? 12 : 4);
+    if (Size == 0xffffffff)
+      Size = *reinterpret_cast<const uint64_t *>(CurCFIRecord + 4) + 12;
+    else
+      Size += 4;
+    uint32_t Offset = *reinterpret_cast<const uint32_t *>(OffsetField);
+    if (Offset != 0)
+      if (auto Err = HandleFDE(CurCFIRecord))
+        return Err;
+
+    LLVM_DEBUG({
+      dbgs() << "Registering eh-frame section:\n";
+      dbgs() << "Processing " << (Offset ? "FDE" : "CIE") << " @"
+             << (void *)CurCFIRecord << ": [";
+      for (unsigned I = 0; I < Size; ++I)
+        dbgs() << format(" 0x%02" PRIx8, *(CurCFIRecord + I));
+      dbgs() << " ]\n";
+    });
+    CurCFIRecord += Size;
+
+    Size = *reinterpret_cast<const uint32_t *>(CurCFIRecord);
+  }
+
+  return Error::success();
+}
+
+#endif // __APPLE__
+
+Error registerEHFrameSection(const void *EHFrameSectionAddr) {
+#ifdef __APPLE__
+  // On Darwin __register_frame has to be called for each FDE entry.
+  return walkAppleEHFrameSection(static_cast<const char *>(EHFrameSectionAddr),
+                                 registerFrameWrapper);
+#else
+  // On Linux __register_frame takes a single argument:
+  // a pointer to the start of the .eh_frame section.
+
+  // How can it find the end? Because crtendS.o is linked
+  // in and it has an .eh_frame section with four zero chars.
+  return registerFrameWrapper(EHFrameSectionAddr);
+#endif
+}
+
+Error deregisterEHFrameSection(const void *EHFrameSectionAddr) {
+#ifdef __APPLE__
+  return walkAppleEHFrameSection(static_cast<const char *>(EHFrameSectionAddr),
+                                 deregisterFrameWrapper);
+#else
+  return deregisterFrameWrapper(EHFrameSectionAddr);
+#endif
+}
+
+EHFrameRegistrar::~EHFrameRegistrar() {}
+
+InProcessEHFrameRegistrar &InProcessEHFrameRegistrar::getInstance() {
+  static InProcessEHFrameRegistrar Instance;
+  return Instance;
+}
+
+InProcessEHFrameRegistrar::InProcessEHFrameRegistrar() {}
+
+AtomGraphPassFunction
+createEHFrameRecorderPass(const Triple &TT,
+                          StoreFrameAddressFunction StoreFrameAddress) {
+  const char *EHFrameSectionName = nullptr;
+  if (TT.getObjectFormat() == Triple::MachO)
+    EHFrameSectionName = "__eh_frame";
+  else
+    EHFrameSectionName = ".eh_frame";
+
+  auto RecordEHFrame = [EHFrameSectionName,
+                        StoreFrameAddress](AtomGraph &G) -> Error {
+    // Search for a non-empty eh-frame and record the address of the first atom
+    // in it.
+    JITTargetAddress Addr = 0;
+    if (auto *S = G.findSectionByName(EHFrameSectionName))
+      Addr = S->getRange().getStart();
+    StoreFrameAddress(Addr);
+    return Error::success();
+  };
+
+  return RecordEHFrame;
+}
+
+} // end namespace jitlink
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h b/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h
new file mode 100644
index 000000000000..d679edef7ea6
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h
@@ -0,0 +1,72 @@
+//===------- EHFrameSupportImpl.h - JITLink eh-frame utils ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// EHFrame registration support for JITLink.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_JITLINK_EHFRAMESUPPORTIMPL_H
+#define LLVM_LIB_EXECUTIONENGINE_JITLINK_EHFRAMESUPPORTIMPL_H
+
+#include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h"
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/Support/BinaryStreamReader.h"
+
+namespace llvm {
+namespace jitlink {
+
+/// A generic parser for eh-frame sections.
+///
+/// Adds atoms representing CIE and FDE entries, using the given FDE-to-CIE and
+/// FDEToTarget relocation kinds.
+class EHFrameParser {
+public:
+  EHFrameParser(AtomGraph &G, Section &EHFrameSection, StringRef EHFrameContent,
+                JITTargetAddress EHFrameAddress, Edge::Kind FDEToCIERelocKind,
+                Edge::Kind FDEToTargetRelocKind);
+  Error atomize();
+
+private:
+  struct AugmentationInfo {
+    bool AugmentationDataPresent = false;
+    bool EHDataFieldPresent = false;
+    uint8_t Fields[4] = {0x0, 0x0, 0x0, 0x0};
+  };
+
+  Expected<AugmentationInfo> parseAugmentationString();
+  Expected<JITTargetAddress> readAbsolutePointer();
+  Error processCIE();
+  Error processFDE(JITTargetAddress CIEPointerAddress, uint32_t CIEPointer);
+
+  struct CIEInformation {
+    CIEInformation() = default;
+    CIEInformation(DefinedAtom &CIEAtom) : CIEAtom(&CIEAtom) {}
+    DefinedAtom *CIEAtom = nullptr;
+    bool FDEsHaveLSDAField = false;
+  };
+
+  AtomGraph &G;
+  Section &EHFrameSection;
+  StringRef EHFrameContent;
+  JITTargetAddress EHFrameAddress;
+  BinaryStreamReader EHFrameReader;
+  DefinedAtom *CurRecordAtom = nullptr;
+  DenseMap<JITTargetAddress, CIEInformation> CIEInfos;
+  Edge::Kind FDEToCIERelocKind;
+  Edge::Kind FDEToTargetRelocKind;
+};
+
+Error addEHFrame(AtomGraph &G, Section &EHFrameSection,
+                 StringRef EHFrameContent, JITTargetAddress EHFrameAddress,
+                 Edge::Kind FDEToCIERelocKind, Edge::Kind FDEToTargetRelocKind);
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_LIB_EXECUTIONENGINE_JITLINK_EHFRAMESUPPORTIMPL_H
diff --git a/lib/ExecutionEngine/JITLink/JITLink.cpp b/lib/ExecutionEngine/JITLink/JITLink.cpp
new file mode 100644
index 000000000000..9d0a7459dc09
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/JITLink.cpp
@@ -0,0 +1,172 @@
+//===------------- JITLink.cpp - Core Run-time JIT linker APIs ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/ExecutionEngine/JITLink/MachO.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+#define DEBUG_TYPE "jitlink"
+
+namespace {
+
+enum JITLinkErrorCode { GenericJITLinkError = 1 };
+
+// FIXME: This class is only here to support the transition to llvm::Error. It
+// will be removed once this transition is complete. Clients should prefer to
+// deal with the Error value directly, rather than converting to error_code.
+class JITLinkerErrorCategory : public std::error_category {
+public:
+  const char *name() const noexcept override { return "runtimedyld"; }
+
+  std::string message(int Condition) const override {
+    switch (static_cast<JITLinkErrorCode>(Condition)) {
+    case GenericJITLinkError:
+      return "Generic JITLink error";
+    }
+    llvm_unreachable("Unrecognized JITLinkErrorCode");
+  }
+};
+
+static ManagedStatic<JITLinkerErrorCategory> JITLinkerErrorCategory;
+
+} // namespace
+
+namespace llvm {
+namespace jitlink {
+
+char JITLinkError::ID = 0;
+
+void JITLinkError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; }
+
+std::error_code JITLinkError::convertToErrorCode() const {
+  return std::error_code(GenericJITLinkError, *JITLinkerErrorCategory);
+}
+
+const StringRef getGenericEdgeKindName(Edge::Kind K) {
+  switch (K) {
+  case Edge::Invalid:
+    return "INVALID RELOCATION";
+  case Edge::KeepAlive:
+    return "Keep-Alive";
+  case Edge::LayoutNext:
+    return "Layout-Next";
+  default:
+    llvm_unreachable("Unrecognized relocation kind");
+  }
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const Atom &A) {
+  OS << "<";
+  if (A.getName().empty())
+    OS << "anon@" << format("0x%016" PRIx64, A.getAddress());
+  else
+    OS << A.getName();
+  OS << " [";
+  if (A.isDefined()) {
+    auto &DA = static_cast<const DefinedAtom &>(A);
+    OS << " section=" << DA.getSection().getName();
+    if (DA.isLive())
+      OS << " live";
+    if (DA.shouldDiscard())
+      OS << " should-discard";
+  } else
+    OS << " external";
+  OS << " ]>";
+  return OS;
+}
+
+void printEdge(raw_ostream &OS, const Atom &FixupAtom, const Edge &E,
+               StringRef EdgeKindName) {
+  OS << "edge@" << formatv("{0:x16}", FixupAtom.getAddress() + E.getOffset())
+     << ": " << FixupAtom << " + " << E.getOffset() << " -- " << EdgeKindName
+     << " -> " << E.getTarget() << " + " << E.getAddend();
+}
+
+Section::~Section() {
+  for (auto *DA : DefinedAtoms)
+    DA->~DefinedAtom();
+}
+
+void AtomGraph::dump(raw_ostream &OS,
+                     std::function<StringRef(Edge::Kind)> EdgeKindToName) {
+  if (!EdgeKindToName)
+    EdgeKindToName = [](Edge::Kind K) { return StringRef(); };
+
+  OS << "Defined atoms:\n";
+  for (auto *DA : defined_atoms()) {
+    OS << "  " << format("0x%016" PRIx64, DA->getAddress()) << ": " << *DA
+       << "\n";
+    for (auto &E : DA->edges()) {
+      OS << "    ";
+      StringRef EdgeName = (E.getKind() < Edge::FirstRelocation
+                                ? getGenericEdgeKindName(E.getKind())
+                                : EdgeKindToName(E.getKind()));
+
+      if (!EdgeName.empty())
+        printEdge(OS, *DA, E, EdgeName);
+      else {
+        auto EdgeNumberString = std::to_string(E.getKind());
+        printEdge(OS, *DA, E, EdgeNumberString);
+      }
+      OS << "\n";
+    }
+  }
+
+  OS << "Absolute atoms:\n";
+  for (auto *A : absolute_atoms())
+    OS << "  " << format("0x%016" PRIx64, A->getAddress()) << ": " << *A
+       << "\n";
+
+  OS << "External atoms:\n";
+  for (auto *A : external_atoms())
+    OS << "  " << format("0x%016" PRIx64, A->getAddress()) << ": " << *A
+       << "\n";
+}
+
+JITLinkContext::~JITLinkContext() {}
+
+bool JITLinkContext::shouldAddDefaultTargetPasses(const Triple &TT) const {
+  return true;
+}
+
+AtomGraphPassFunction JITLinkContext::getMarkLivePass(const Triple &TT) const {
+  return AtomGraphPassFunction();
+}
+
+Error JITLinkContext::modifyPassConfig(const Triple &TT,
+                                       PassConfiguration &Config) {
+  return Error::success();
+}
+
+Error markAllAtomsLive(AtomGraph &G) {
+  for (auto *DA : G.defined_atoms())
+    DA->setLive(true);
+  return Error::success();
+}
+
+void jitLink(std::unique_ptr<JITLinkContext> Ctx) {
+  auto Magic = identify_magic(Ctx->getObjectBuffer().getBuffer());
+  switch (Magic) {
+  case file_magic::macho_object:
+    return jitLink_MachO(std::move(Ctx));
+  default:
+    Ctx->notifyFailed(make_error<JITLinkError>("Unsupported file format"));
+  };
+}
+
+} // end namespace jitlink
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
new file mode 100644
index 000000000000..96e074da122b
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
@@ -0,0 +1,481 @@
+//===--------- JITLinkGeneric.cpp - Generic JIT linker utilities ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic JITLinker utility class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JITLinkGeneric.h"
+#include "EHFrameSupportImpl.h"
+
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+#define DEBUG_TYPE "jitlink"
+
+namespace llvm {
+namespace jitlink {
+
+JITLinkerBase::~JITLinkerBase() {}
+
+void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) {
+
+  // Build the atom graph.
+  if (auto GraphOrErr = buildGraph(Ctx->getObjectBuffer()))
+    G = std::move(*GraphOrErr);
+  else
+    return Ctx->notifyFailed(GraphOrErr.takeError());
+  assert(G && "Graph should have been created by buildGraph above");
+
+  // Prune and optimize the graph.
+  if (auto Err = runPasses(Passes.PrePrunePasses, *G))
+    return Ctx->notifyFailed(std::move(Err));
+
+  LLVM_DEBUG({
+    dbgs() << "Atom graph \"" << G->getName() << "\" pre-pruning:\n";
+    dumpGraph(dbgs());
+  });
+
+  prune(*G);
+
+  LLVM_DEBUG({
+    dbgs() << "Atom graph \"" << G->getName() << "\" post-pruning:\n";
+    dumpGraph(dbgs());
+  });
+
+  // Run post-pruning passes.
+  if (auto Err = runPasses(Passes.PostPrunePasses, *G))
+    return Ctx->notifyFailed(std::move(Err));
+
+  // Sort atoms into segments.
+  layOutAtoms();
+
+  // Allocate memory for segments.
+  if (auto Err = allocateSegments(Layout))
+    return Ctx->notifyFailed(std::move(Err));
+
+  // Notify client that the defined atoms have been assigned addresses.
+  Ctx->notifyResolved(*G);
+
+  auto ExternalSymbols = getExternalSymbolNames();
+
+  // We're about to hand off ownership of ourself to the continuation. Grab a
+  // pointer to the context so that we can call it to initiate the lookup.
+  //
+  // FIXME: Once callee expressions are defined to be sequenced before argument
+  // expressions (c++17) we can simplify all this to:
+  //
+  // Ctx->lookup(std::move(UnresolvedExternals),
+  //             [Self=std::move(Self)](Expected<AsyncLookupResult> Result) {
+  //               Self->linkPhase2(std::move(Self), std::move(Result));
+  //             });
+  //
+  // FIXME: Use move capture once we have c++14.
+  auto *TmpCtx = Ctx.get();
+  auto *UnownedSelf = Self.release();
+  auto Phase2Continuation =
+      [UnownedSelf](Expected<AsyncLookupResult> LookupResult) {
+        std::unique_ptr<JITLinkerBase> Self(UnownedSelf);
+        UnownedSelf->linkPhase2(std::move(Self), std::move(LookupResult));
+      };
+  TmpCtx->lookup(std::move(ExternalSymbols), std::move(Phase2Continuation));
+}
+
+void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
+                               Expected<AsyncLookupResult> LR) {
+  // If the lookup failed, bail out.
+  if (!LR)
+    return deallocateAndBailOut(LR.takeError());
+
+  // Assign addresses to external atoms.
+  applyLookupResult(*LR);
+
+  LLVM_DEBUG({
+    dbgs() << "Atom graph \"" << G->getName() << "\" before copy-and-fixup:\n";
+    dumpGraph(dbgs());
+  });
+
+  // Copy atom content to working memory and fix up.
+  if (auto Err = copyAndFixUpAllAtoms(Layout, *Alloc))
+    return deallocateAndBailOut(std::move(Err));
+
+  LLVM_DEBUG({
+    dbgs() << "Atom graph \"" << G->getName() << "\" after copy-and-fixup:\n";
+    dumpGraph(dbgs());
+  });
+
+  if (auto Err = runPasses(Passes.PostFixupPasses, *G))
+    return deallocateAndBailOut(std::move(Err));
+
+  // FIXME: Use move capture once we have c++14.
+  auto *UnownedSelf = Self.release();
+  auto Phase3Continuation = [UnownedSelf](Error Err) {
+    std::unique_ptr<JITLinkerBase> Self(UnownedSelf);
+    UnownedSelf->linkPhase3(std::move(Self), std::move(Err));
+  };
+
+  Alloc->finalizeAsync(std::move(Phase3Continuation));
+}
+
+void JITLinkerBase::linkPhase3(std::unique_ptr<JITLinkerBase> Self, Error Err) {
+  if (Err)
+    return deallocateAndBailOut(std::move(Err));
+  Ctx->notifyFinalized(std::move(Alloc));
+}
+
+Error JITLinkerBase::runPasses(AtomGraphPassList &Passes, AtomGraph &G) {
+  for (auto &P : Passes)
+    if (auto Err = P(G))
+      return Err;
+  return Error::success();
+}
+
+void JITLinkerBase::layOutAtoms() {
+  // Group sections by protections, and whether or not they're zero-fill.
+  for (auto &S : G->sections()) {
+
+    // Skip empty sections.
+    if (S.atoms_empty())
+      continue;
+
+    auto &SL = Layout[S.getProtectionFlags()];
+    if (S.isZeroFill())
+      SL.ZeroFillSections.push_back(SegmentLayout::SectionLayout(S));
+    else
+      SL.ContentSections.push_back(SegmentLayout::SectionLayout(S));
+  }
+
+  // Sort sections within the layout by ordinal.
+  {
+    auto CompareByOrdinal = [](const SegmentLayout::SectionLayout &LHS,
+                               const SegmentLayout::SectionLayout &RHS) {
+      return LHS.S->getSectionOrdinal() < RHS.S->getSectionOrdinal();
+    };
+    for (auto &KV : Layout) {
+      auto &SL = KV.second;
+      std::sort(SL.ContentSections.begin(), SL.ContentSections.end(),
+                CompareByOrdinal);
+      std::sort(SL.ZeroFillSections.begin(), SL.ZeroFillSections.end(),
+                CompareByOrdinal);
+    }
+  }
+
+  // Add atoms to the sections.
+  for (auto &KV : Layout) {
+    auto &SL = KV.second;
+    for (auto *SIList : {&SL.ContentSections, &SL.ZeroFillSections}) {
+      for (auto &SI : *SIList) {
+        // First build the set of layout-heads (i.e. "heads" of layout-next
+        // chains) by copying the section atoms, then eliminating any that
+        // appear as layout-next targets.
+        DenseSet<DefinedAtom *> LayoutHeads;
+        for (auto *DA : SI.S->atoms())
+          LayoutHeads.insert(DA);
+
+        for (auto *DA : SI.S->atoms())
+          if (DA->hasLayoutNext())
+            LayoutHeads.erase(&DA->getLayoutNext());
+
+        // Next, sort the layout heads by address order.
+        std::vector<DefinedAtom *> OrderedLayoutHeads;
+        OrderedLayoutHeads.reserve(LayoutHeads.size());
+        for (auto *DA : LayoutHeads)
+          OrderedLayoutHeads.push_back(DA);
+
+        // Now sort the list of layout heads by address.
+        std::sort(OrderedLayoutHeads.begin(), OrderedLayoutHeads.end(),
+                  [](const DefinedAtom *LHS, const DefinedAtom *RHS) {
+                    return LHS->getAddress() < RHS->getAddress();
+                  });
+
+        // Now populate the SI.Atoms field by appending each of the chains.
+        for (auto *DA : OrderedLayoutHeads) {
+          SI.Atoms.push_back(DA);
+          while (DA->hasLayoutNext()) {
+            auto &Next = DA->getLayoutNext();
+            SI.Atoms.push_back(&Next);
+            DA = &Next;
+          }
+        }
+      }
+    }
+  }
+
+  LLVM_DEBUG({
+    dbgs() << "Segment ordering:\n";
+    for (auto &KV : Layout) {
+      dbgs() << "  Segment "
+             << static_cast<sys::Memory::ProtectionFlags>(KV.first) << ":\n";
+      auto &SL = KV.second;
+      for (auto &SIEntry :
+           {std::make_pair(&SL.ContentSections, "content sections"),
+            std::make_pair(&SL.ZeroFillSections, "zero-fill sections")}) {
+        auto &SIList = *SIEntry.first;
+        dbgs() << "    " << SIEntry.second << ":\n";
+        for (auto &SI : SIList) {
+          dbgs() << "      " << SI.S->getName() << ":\n";
+          for (auto *DA : SI.Atoms)
+            dbgs() << "        " << *DA << "\n";
+        }
+      }
+    }
+  });
+}
+
+Error JITLinkerBase::allocateSegments(const SegmentLayoutMap &Layout) {
+
+  // Compute segment sizes and allocate memory.
+  LLVM_DEBUG(dbgs() << "JIT linker requesting: { ");
+  JITLinkMemoryManager::SegmentsRequestMap Segments;
+  for (auto &KV : Layout) {
+    auto &Prot = KV.first;
+    auto &SegLayout = KV.second;
+
+    // Calculate segment content size.
+    size_t SegContentSize = 0;
+    for (auto &SI : SegLayout.ContentSections) {
+      assert(!SI.S->atoms_empty() && "Sections in layout must not be empty");
+      assert(!SI.Atoms.empty() && "Section layouts must not be empty");
+
+      // Bump to section alignment before processing atoms.
+      SegContentSize = alignTo(SegContentSize, SI.S->getAlignment());
+
+      for (auto *DA : SI.Atoms) {
+        SegContentSize = alignTo(SegContentSize, DA->getAlignment());
+        SegContentSize += DA->getSize();
+      }
+    }
+
+    // Get segment content alignment.
+    unsigned SegContentAlign = 1;
+    if (!SegLayout.ContentSections.empty()) {
+      auto &FirstContentSection = SegLayout.ContentSections.front();
+      SegContentAlign =
+          std::max(FirstContentSection.S->getAlignment(),
+                   FirstContentSection.Atoms.front()->getAlignment());
+    }
+
+    // Calculate segment zero-fill size.
+    uint64_t SegZeroFillSize = 0;
+    for (auto &SI : SegLayout.ZeroFillSections) {
+      assert(!SI.S->atoms_empty() && "Sections in layout must not be empty");
+      assert(!SI.Atoms.empty() && "Section layouts must not be empty");
+
+      // Bump to section alignment before processing atoms.
+      SegZeroFillSize = alignTo(SegZeroFillSize, SI.S->getAlignment());
+
+      for (auto *DA : SI.Atoms) {
+        SegZeroFillSize = alignTo(SegZeroFillSize, DA->getAlignment());
+        SegZeroFillSize += DA->getSize();
+      }
+    }
+
+    // Calculate segment zero-fill alignment.
+    uint32_t SegZeroFillAlign = 1;
+
+    if (!SegLayout.ZeroFillSections.empty()) {
+      auto &FirstZeroFillSection = SegLayout.ZeroFillSections.front();
+      SegZeroFillAlign =
+          std::max(FirstZeroFillSection.S->getAlignment(),
+                   FirstZeroFillSection.Atoms.front()->getAlignment());
+    }
+
+    if (SegContentSize == 0)
+      SegContentAlign = SegZeroFillAlign;
+
+    if (SegContentAlign % SegZeroFillAlign != 0)
+      return make_error<JITLinkError>("First content atom alignment does not "
+                                      "accommodate first zero-fill atom "
+                                      "alignment");
+
+    Segments[Prot] = {SegContentSize, SegContentAlign, SegZeroFillSize,
+                      SegZeroFillAlign};
+
+    LLVM_DEBUG({
+      dbgs() << (&KV == &*Layout.begin() ? "" : "; ")
+             << static_cast<sys::Memory::ProtectionFlags>(Prot) << ": "
+             << SegContentSize << " content bytes (alignment "
+             << SegContentAlign << ") + " << SegZeroFillSize
+             << " zero-fill bytes (alignment " << SegZeroFillAlign << ")";
+    });
+  }
+  LLVM_DEBUG(dbgs() << " }\n");
+
+  if (auto AllocOrErr = Ctx->getMemoryManager().allocate(Segments))
+    Alloc = std::move(*AllocOrErr);
+  else
+    return AllocOrErr.takeError();
+
+  LLVM_DEBUG({
+    dbgs() << "JIT linker got working memory:\n";
+    for (auto &KV : Layout) {
+      auto Prot = static_cast<sys::Memory::ProtectionFlags>(KV.first);
+      dbgs() << "  " << Prot << ": "
+             << (const void *)Alloc->getWorkingMemory(Prot).data() << "\n";
+    }
+  });
+
+  // Update atom target addresses.
+  for (auto &KV : Layout) {
+    auto &Prot = KV.first;
+    auto &SL = KV.second;
+
+    JITTargetAddress AtomTargetAddr =
+        Alloc->getTargetMemory(static_cast<sys::Memory::ProtectionFlags>(Prot));
+
+    for (auto *SIList : {&SL.ContentSections, &SL.ZeroFillSections})
+      for (auto &SI : *SIList) {
+        AtomTargetAddr = alignTo(AtomTargetAddr, SI.S->getAlignment());
+        for (auto *DA : SI.Atoms) {
+          AtomTargetAddr = alignTo(AtomTargetAddr, DA->getAlignment());
+          DA->setAddress(AtomTargetAddr);
+          AtomTargetAddr += DA->getSize();
+        }
+      }
+  }
+
+  return Error::success();
+}
+
+DenseSet<StringRef> JITLinkerBase::getExternalSymbolNames() const {
+  // Identify unresolved external atoms.
+  DenseSet<StringRef> UnresolvedExternals;
+  for (auto *DA : G->external_atoms()) {
+    assert(DA->getAddress() == 0 &&
+           "External has already been assigned an address");
+    assert(DA->getName() != StringRef() && DA->getName() != "" &&
+           "Externals must be named");
+    UnresolvedExternals.insert(DA->getName());
+  }
+  return UnresolvedExternals;
+}
+
+void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) {
+  for (auto &KV : Result) {
+    Atom &A = G->getAtomByName(KV.first);
+    assert(A.getAddress() == 0 && "Atom already resolved");
+    A.setAddress(KV.second.getAddress());
+  }
+
+  LLVM_DEBUG({
+    dbgs() << "Externals after applying lookup result:\n";
+    for (auto *A : G->external_atoms())
+      dbgs() << "  " << A->getName() << ": "
+             << formatv("{0:x16}", A->getAddress()) << "\n";
+  });
+  assert(llvm::all_of(G->external_atoms(),
+                      [](Atom *A) { return A->getAddress() != 0; }) &&
+         "All atoms should have been resolved by this point");
+}
+
+void JITLinkerBase::deallocateAndBailOut(Error Err) {
+  assert(Err && "Should not be bailing out on success value");
+  assert(Alloc && "can not call deallocateAndBailOut before allocation");
+  Ctx->notifyFailed(joinErrors(std::move(Err), Alloc->deallocate()));
+}
+
+void JITLinkerBase::dumpGraph(raw_ostream &OS) {
+  assert(G && "Graph is not set yet");
+  G->dump(dbgs(), [this](Edge::Kind K) { return getEdgeKindName(K); });
+}
+
+void prune(AtomGraph &G) {
+  std::vector<DefinedAtom *> Worklist;
+  DenseMap<DefinedAtom *, std::vector<Edge *>> EdgesToUpdate;
+
+  // Build the initial worklist from all atoms initially live.
+  for (auto *DA : G.defined_atoms()) {
+    if (!DA->isLive() || DA->shouldDiscard())
+      continue;
+
+    for (auto &E : DA->edges()) {
+      if (!E.getTarget().isDefined())
+        continue;
+
+      auto &EDT = static_cast<DefinedAtom &>(E.getTarget());
+
+      if (EDT.shouldDiscard())
+        EdgesToUpdate[&EDT].push_back(&E);
+      else if (E.isKeepAlive() && !EDT.isLive())
+        Worklist.push_back(&EDT);
+    }
+  }
+
+  // Propagate live flags to all atoms reachable from the initial live set.
+  while (!Worklist.empty()) {
+    DefinedAtom &NextLive = *Worklist.back();
+    Worklist.pop_back();
+
+    assert(!NextLive.shouldDiscard() &&
+           "should-discard nodes should never make it into the worklist");
+
+    // If this atom has already been marked as live, or is marked to be
+    // discarded, then skip it.
+    if (NextLive.isLive())
+      continue;
+
+    // Otherwise set it as live and add any non-live atoms that it points to
+    // to the worklist.
+    NextLive.setLive(true);
+
+    for (auto &E : NextLive.edges()) {
+      if (!E.getTarget().isDefined())
+        continue;
+
+      auto &EDT = static_cast<DefinedAtom &>(E.getTarget());
+
+      if (EDT.shouldDiscard())
+        EdgesToUpdate[&EDT].push_back(&E);
+      else if (E.isKeepAlive() && !EDT.isLive())
+        Worklist.push_back(&EDT);
+    }
+  }
+
+  // Collect atoms to remove, then remove them from the graph.
+  std::vector<DefinedAtom *> AtomsToRemove;
+  for (auto *DA : G.defined_atoms())
+    if (DA->shouldDiscard() || !DA->isLive())
+      AtomsToRemove.push_back(DA);
+
+  LLVM_DEBUG(dbgs() << "Pruning atoms:\n");
+  for (auto *DA : AtomsToRemove) {
+    LLVM_DEBUG(dbgs() << "  " << *DA << "... ");
+
+    // Check whether we need to replace this atom with an external atom.
+    //
+    // We replace if all of the following hold:
+    //   (1) The atom is marked should-discard,
+    //   (2) it has live edges (i.e. edges from live atoms) pointing to it.
+    //
+    // Otherwise we simply delete the atom.
+
+    G.removeDefinedAtom(*DA);
+
+    auto EdgesToUpdateItr = EdgesToUpdate.find(DA);
+    if (EdgesToUpdateItr != EdgesToUpdate.end()) {
+      auto &ExternalReplacement = G.addExternalAtom(DA->getName());
+      for (auto *EdgeToUpdate : EdgesToUpdateItr->second)
+        EdgeToUpdate->setTarget(ExternalReplacement);
+      LLVM_DEBUG(dbgs() << "replaced with " << ExternalReplacement << "\n");
+    } else
+      LLVM_DEBUG(dbgs() << "deleted\n");
+  }
+
+  // Finally, discard any absolute symbols that were marked should-discard.
+  {
+    std::vector<Atom *> AbsoluteAtomsToRemove;
+    for (auto *A : G.absolute_atoms())
+      if (A->shouldDiscard() || A->isLive())
+        AbsoluteAtomsToRemove.push_back(A);
+    for (auto *A : AbsoluteAtomsToRemove)
+      G.removeAbsoluteAtom(*A);
+  }
+}
+
+} // end namespace jitlink
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/JITLink/JITLinkGeneric.h b/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
new file mode 100644
index 000000000000..e6fd6e38f7a6
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
@@ -0,0 +1,256 @@
+//===------ JITLinkGeneric.h - Generic JIT linker utilities -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic JITLinker utilities. E.g. graph pruning, eh-frame parsing.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_EXECUTIONENGINE_JITLINK_JITLINKGENERIC_H
+#define LIB_EXECUTIONENGINE_JITLINK_JITLINKGENERIC_H
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+#define DEBUG_TYPE "jitlink"
+
+namespace llvm {
+
+class MemoryBufferRef;
+
+namespace jitlink {
+
+/// Base class for a JIT linker.
+///
+/// A JITLinkerBase instance links one object file into an ongoing JIT
+/// session. Symbol resolution and finalization operations are pluggable,
+/// and called using continuation passing (passing a continuation for the
+/// remaining linker work) to allow them to be performed asynchronously.
+class JITLinkerBase {
+public:
+  JITLinkerBase(std::unique_ptr<JITLinkContext> Ctx, PassConfiguration Passes)
+      : Ctx(std::move(Ctx)), Passes(std::move(Passes)) {
+    assert(this->Ctx && "Ctx can not be null");
+  }
+
+  virtual ~JITLinkerBase();
+
+protected:
+  struct SegmentLayout {
+    using SectionAtomsList = std::vector<DefinedAtom *>;
+    struct SectionLayout {
+      SectionLayout(Section &S) : S(&S) {}
+
+      Section *S;
+      SectionAtomsList Atoms;
+    };
+
+    using SectionLayoutList = std::vector<SectionLayout>;
+
+    SectionLayoutList ContentSections;
+    SectionLayoutList ZeroFillSections;
+  };
+
+  using SegmentLayoutMap = DenseMap<unsigned, SegmentLayout>;
+
+  // Phase 1:
+  //   1.1: Build atom graph
+  //   1.2: Run pre-prune passes
+  //   1.2: Prune graph
+  //   1.3: Run post-prune passes
+  //   1.4: Sort atoms into segments
+  //   1.5: Allocate segment memory
+  //   1.6: Identify externals and make an async call to resolve function
+  void linkPhase1(std::unique_ptr<JITLinkerBase> Self);
+
+  // Phase 2:
+  //   2.1: Apply resolution results
+  //   2.2: Fix up atom contents
+  //   2.3: Call OnResolved callback
+  //   2.3: Make an async call to transfer and finalize memory.
+  void linkPhase2(std::unique_ptr<JITLinkerBase> Self,
+                  Expected<AsyncLookupResult> LookupResult);
+
+  // Phase 3:
+  //   3.1: Call OnFinalized callback, handing off allocation.
+  void linkPhase3(std::unique_ptr<JITLinkerBase> Self, Error Err);
+
+  // Build a graph from the given object buffer.
+  // To be implemented by the client.
+  virtual Expected<std::unique_ptr<AtomGraph>>
+  buildGraph(MemoryBufferRef ObjBuffer) = 0;
+
+  // For debug dumping of the atom graph.
+  virtual StringRef getEdgeKindName(Edge::Kind K) const = 0;
+
+private:
+  // Run all passes in the given pass list, bailing out immediately if any pass
+  // returns an error.
+  Error runPasses(AtomGraphPassList &Passes, AtomGraph &G);
+
+  // Copy atom contents and apply relocations.
+  // Implemented in JITLinker.
+  virtual Error
+  copyAndFixUpAllAtoms(const SegmentLayoutMap &Layout,
+                       JITLinkMemoryManager::Allocation &Alloc) const = 0;
+
+  void layOutAtoms();
+  Error allocateSegments(const SegmentLayoutMap &Layout);
+  DenseSet<StringRef> getExternalSymbolNames() const;
+  void applyLookupResult(AsyncLookupResult LR);
+  void deallocateAndBailOut(Error Err);
+
+  void dumpGraph(raw_ostream &OS);
+
+  std::unique_ptr<JITLinkContext> Ctx;
+  PassConfiguration Passes;
+  std::unique_ptr<AtomGraph> G;
+  SegmentLayoutMap Layout;
+  std::unique_ptr<JITLinkMemoryManager::Allocation> Alloc;
+};
+
+template <typename LinkerImpl> class JITLinker : public JITLinkerBase {
+public:
+  using JITLinkerBase::JITLinkerBase;
+
+  /// Link constructs a LinkerImpl instance and calls linkPhase1.
+  /// Link should be called with the constructor arguments for LinkerImpl, which
+  /// will be forwarded to the constructor.
+  template <typename... ArgTs> static void link(ArgTs &&... Args) {
+    auto L = llvm::make_unique<LinkerImpl>(std::forward<ArgTs>(Args)...);
+
+    // Ownership of the linker is passed into the linker's doLink function to
+    // allow it to be passed on to async continuations.
+    //
+    // FIXME: Remove LTmp once we have c++17.
+    // C++17 sequencing rules guarantee that function name expressions are
+    // sequenced before arguments, so L->linkPhase1(std::move(L), ...) will be
+    // well formed.
+    auto &LTmp = *L;
+    LTmp.linkPhase1(std::move(L));
+  }
+
+private:
+  const LinkerImpl &impl() const {
+    return static_cast<const LinkerImpl &>(*this);
+  }
+
+  Error
+  copyAndFixUpAllAtoms(const SegmentLayoutMap &Layout,
+                       JITLinkMemoryManager::Allocation &Alloc) const override {
+    LLVM_DEBUG(dbgs() << "Copying and fixing up atoms:\n");
+    for (auto &KV : Layout) {
+      auto &Prot = KV.first;
+      auto &SegLayout = KV.second;
+
+      auto SegMem = Alloc.getWorkingMemory(
+          static_cast<sys::Memory::ProtectionFlags>(Prot));
+      char *LastAtomEnd = SegMem.data();
+      char *AtomDataPtr = LastAtomEnd;
+
+      LLVM_DEBUG({
+        dbgs() << "  Processing segment "
+               << static_cast<sys::Memory::ProtectionFlags>(Prot) << " [ "
+               << (const void *)SegMem.data() << " .. "
+               << (const void *)((char *)SegMem.data() + SegMem.size())
+               << " ]\n    Processing content sections:\n";
+      });
+
+      for (auto &SI : SegLayout.ContentSections) {
+        LLVM_DEBUG(dbgs() << "    " << SI.S->getName() << ":\n");
+
+        AtomDataPtr += alignmentAdjustment(AtomDataPtr, SI.S->getAlignment());
+
+        LLVM_DEBUG({
+          dbgs() << "      Bumped atom pointer to " << (const void *)AtomDataPtr
+                 << " to meet section alignment "
+                 << " of " << SI.S->getAlignment() << "\n";
+        });
+
+        for (auto *DA : SI.Atoms) {
+
+          // Align.
+          AtomDataPtr += alignmentAdjustment(AtomDataPtr, DA->getAlignment());
+          LLVM_DEBUG({
+            dbgs() << "      Bumped atom pointer to "
+                   << (const void *)AtomDataPtr << " to meet alignment of "
+                   << DA->getAlignment() << "\n";
+          });
+
+          // Zero pad up to alignment.
+          LLVM_DEBUG({
+            if (LastAtomEnd != AtomDataPtr)
+              dbgs() << "      Zero padding from " << (const void *)LastAtomEnd
+                     << " to " << (const void *)AtomDataPtr << "\n";
+          });
+          while (LastAtomEnd != AtomDataPtr)
+            *LastAtomEnd++ = 0;
+
+          // Copy initial atom content.
+          LLVM_DEBUG({
+            dbgs() << "      Copying atom " << *DA << " content, "
+                   << DA->getContent().size() << " bytes, from "
+                   << (const void *)DA->getContent().data() << " to "
+                   << (const void *)AtomDataPtr << "\n";
+          });
+          memcpy(AtomDataPtr, DA->getContent().data(), DA->getContent().size());
+
+          // Copy atom data and apply fixups.
+          LLVM_DEBUG(dbgs() << "      Applying fixups.\n");
+          for (auto &E : DA->edges()) {
+
+            // Skip non-relocation edges.
+            if (!E.isRelocation())
+              continue;
+
+            // Dispatch to LinkerImpl for fixup.
+            if (auto Err = impl().applyFixup(*DA, E, AtomDataPtr))
+              return Err;
+          }
+
+          // Point the atom's content to the fixed up buffer.
+          DA->setContent(StringRef(AtomDataPtr, DA->getContent().size()));
+
+          // Update atom end pointer.
+          LastAtomEnd = AtomDataPtr + DA->getContent().size();
+          AtomDataPtr = LastAtomEnd;
+        }
+      }
+
+      // Zero pad the rest of the segment.
+      LLVM_DEBUG({
+        dbgs() << "    Zero padding end of segment from "
+               << (const void *)LastAtomEnd << " to "
+               << (const void *)((char *)SegMem.data() + SegMem.size()) << "\n";
+      });
+      while (LastAtomEnd != SegMem.data() + SegMem.size())
+        *LastAtomEnd++ = 0;
+    }
+
+    return Error::success();
+  }
+};
+
+/// Dead strips and replaces discarded definitions with external atoms.
+///
+/// Finds the set of nodes reachable from any node initially marked live
+/// (nodes marked should-discard are treated as not live, even if they are
+/// reachable). All nodes not marked as live at the end of this process,
+/// are deleted. Nodes that are live, but marked should-discard are replaced
+/// with external atoms and all edges to them are re-written.
+void prune(AtomGraph &G);
+
+Error addEHFrame(AtomGraph &G, Section &EHFrameSection,
+                 StringRef EHFrameContent, JITTargetAddress EHFrameAddress,
+                 Edge::Kind FDEToCIERelocKind, Edge::Kind FDEToTargetRelocKind);
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#undef DEBUG_TYPE // "jitlink"
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_JITLINKGENERIC_H
diff --git a/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
new file mode 100644
index 000000000000..267307cfde05
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
@@ -0,0 +1,105 @@
+//===--- JITLinkMemoryManager.cpp - JITLinkMemoryManager implementation ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
+#include "llvm/Support/Process.h"
+
+namespace llvm {
+namespace jitlink {
+
+JITLinkMemoryManager::~JITLinkMemoryManager() = default;
+JITLinkMemoryManager::Allocation::~Allocation() = default;
+
+Expected<std::unique_ptr<JITLinkMemoryManager::Allocation>>
+InProcessMemoryManager::allocate(const SegmentsRequestMap &Request) {
+
+  using AllocationMap = DenseMap<unsigned, sys::MemoryBlock>;
+
+  // Local class for allocation.
+  class IPMMAlloc : public Allocation {
+  public:
+    IPMMAlloc(AllocationMap SegBlocks) : SegBlocks(std::move(SegBlocks)) {}
+    MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) override {
+      assert(SegBlocks.count(Seg) && "No allocation for segment");
+      return {static_cast<char *>(SegBlocks[Seg].base()),
+              SegBlocks[Seg].allocatedSize()};
+    }
+    JITTargetAddress getTargetMemory(ProtectionFlags Seg) override {
+      assert(SegBlocks.count(Seg) && "No allocation for segment");
+      return reinterpret_cast<JITTargetAddress>(SegBlocks[Seg].base());
+    }
+    void finalizeAsync(FinalizeContinuation OnFinalize) override {
+      OnFinalize(applyProtections());
+    }
+    Error deallocate() override {
+      for (auto &KV : SegBlocks)
+        if (auto EC = sys::Memory::releaseMappedMemory(KV.second))
+          return errorCodeToError(EC);
+      return Error::success();
+    }
+
+  private:
+    Error applyProtections() {
+      for (auto &KV : SegBlocks) {
+        auto &Prot = KV.first;
+        auto &Block = KV.second;
+        if (auto EC = sys::Memory::protectMappedMemory(Block, Prot))
+          return errorCodeToError(EC);
+        if (Prot & sys::Memory::MF_EXEC)
+          sys::Memory::InvalidateInstructionCache(Block.base(),
+                                                  Block.allocatedSize());
+      }
+      return Error::success();
+    }
+
+    AllocationMap SegBlocks;
+  };
+
+  AllocationMap Blocks;
+  const sys::Memory::ProtectionFlags ReadWrite =
+      static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+                                                sys::Memory::MF_WRITE);
+
+  for (auto &KV : Request) {
+    auto &Seg = KV.second;
+
+    if (Seg.getContentAlignment() > sys::Process::getPageSizeEstimate())
+      return make_error<StringError>("Cannot request higher than page "
+                                     "alignment",
+                                     inconvertibleErrorCode());
+
+    if (sys::Process::getPageSizeEstimate() % Seg.getContentAlignment() != 0)
+      return make_error<StringError>("Page size is not a multiple of "
+                                     "alignment",
+                                     inconvertibleErrorCode());
+
+    uint64_t ZeroFillStart =
+        alignTo(Seg.getContentSize(), Seg.getZeroFillAlignment());
+    uint64_t SegmentSize = ZeroFillStart + Seg.getZeroFillSize();
+
+    std::error_code EC;
+    auto SegMem =
+        sys::Memory::allocateMappedMemory(SegmentSize, nullptr, ReadWrite, EC);
+
+    if (EC)
+      return errorCodeToError(EC);
+
+    // Zero out the zero-fill memory.
+    memset(static_cast<char *>(SegMem.base()) + ZeroFillStart, 0,
+           Seg.getZeroFillSize());
+
+    // Record the block for this segment.
+    Blocks[KV.first] = std::move(SegMem);
+  }
+  return std::unique_ptr<InProcessMemoryManager::Allocation>(
+      new IPMMAlloc(std::move(Blocks)));
+}
+
+} // end namespace jitlink
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/JITLink/MachO.cpp b/lib/ExecutionEngine/JITLink/MachO.cpp
new file mode 100644
index 000000000000..15995b8ce98f
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/MachO.cpp
@@ -0,0 +1,78 @@
+//===-------------- MachO.cpp - JIT linker function for MachO -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachO jit-link function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/MachO.h"
+
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/ExecutionEngine/JITLink/MachO_x86_64.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "jitlink"
+
+namespace llvm {
+namespace jitlink {
+
+void jitLink_MachO(std::unique_ptr<JITLinkContext> Ctx) {
+
+  // We don't want to do full MachO validation here. Just parse enough of the
+  // header to find out what MachO linker to use.
+
+  StringRef Data = Ctx->getObjectBuffer().getBuffer();
+  if (Data.size() < 4) {
+    Ctx->notifyFailed(make_error<JITLinkError>("Truncated MachO buffer"));
+    return;
+  }
+
+  uint32_t Magic;
+  memcpy(&Magic, Data.data(), sizeof(uint32_t));
+  LLVM_DEBUG({
+    dbgs() << "jitLink_MachO: magic = " << format("0x%08" PRIx32, Magic)
+           << ", identifier = \""
+           << Ctx->getObjectBuffer().getBufferIdentifier() << "\"\n";
+  });
+
+  if (Magic == MachO::MH_MAGIC || Magic == MachO::MH_CIGAM) {
+    Ctx->notifyFailed(
+        make_error<JITLinkError>("MachO 32-bit platforms not supported"));
+    return;
+  } else if (Magic == MachO::MH_MAGIC_64 || Magic == MachO::MH_CIGAM_64) {
+    MachO::mach_header_64 Header;
+
+    memcpy(&Header, Data.data(), sizeof(MachO::mach_header_64));
+    if (Magic == MachO::MH_CIGAM_64)
+      swapStruct(Header);
+
+    LLVM_DEBUG({
+      dbgs() << "jitLink_MachO: cputype = "
+             << format("0x%08" PRIx32, Header.cputype)
+             << ", cpusubtype = " << format("0x%08" PRIx32, Header.cpusubtype)
+             << "\n";
+    });
+
+    switch (Header.cputype) {
+    case MachO::CPU_TYPE_X86_64:
+      return jitLink_MachO_x86_64(std::move(Ctx));
+    }
+    Ctx->notifyFailed(make_error<JITLinkError>("MachO-64 CPU type not valid"));
+    return;
+  }
+
+  Ctx->notifyFailed(make_error<JITLinkError>("MachO magic not valid"));
+}
+
+} // end namespace jitlink
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.cpp b/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.cpp
new file mode 100644
index 000000000000..1501c7ad0bc5
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.cpp
@@ -0,0 +1,411 @@
+//=--------- MachOAtomGraphBuilder.cpp - MachO AtomGraph builder ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic MachO AtomGraph buliding code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MachOAtomGraphBuilder.h"
+
+#define DEBUG_TYPE "jitlink"
+
+namespace llvm {
+namespace jitlink {
+
+MachOAtomGraphBuilder::~MachOAtomGraphBuilder() {}
+
+Expected<std::unique_ptr<AtomGraph>> MachOAtomGraphBuilder::buildGraph() {
+  if (auto Err = parseSections())
+    return std::move(Err);
+
+  if (auto Err = addAtoms())
+    return std::move(Err);
+
+  if (auto Err = addRelocations())
+    return std::move(Err);
+
+  return std::move(G);
+}
+
+MachOAtomGraphBuilder::MachOAtomGraphBuilder(const object::MachOObjectFile &Obj)
+    : Obj(Obj),
+      G(llvm::make_unique<AtomGraph>(Obj.getFileName(), getPointerSize(Obj),
+                                     getEndianness(Obj))) {}
+
+void MachOAtomGraphBuilder::addCustomAtomizer(StringRef SectionName,
+                                              CustomAtomizeFunction Atomizer) {
+  assert(!CustomAtomizeFunctions.count(SectionName) &&
+         "Custom atomizer for this section already exists");
+  CustomAtomizeFunctions[SectionName] = std::move(Atomizer);
+}
+
+bool MachOAtomGraphBuilder::areLayoutLocked(const Atom &A, const Atom &B) {
+  // If these atoms are the same then they're trivially "locked".
+  if (&A == &B)
+    return true;
+
+  // If A and B are different, check whether either is undefined. (in which
+  // case they are not locked).
+  if (!A.isDefined() || !B.isDefined())
+    return false;
+
+  // A and B are different, but they're both defined atoms. We need to check
+  // whether they're part of the same alt_entry chain.
+  auto &DA = static_cast<const DefinedAtom &>(A);
+  auto &DB = static_cast<const DefinedAtom &>(B);
+
+  auto AStartItr = AltEntryStarts.find(&DA);
+  if (AStartItr == AltEntryStarts.end()) // If A is not in a chain bail out.
+    return false;
+
+  auto BStartItr = AltEntryStarts.find(&DB);
+  if (BStartItr == AltEntryStarts.end()) // If B is not in a chain bail out.
+    return false;
+
+  // A and B are layout locked if they're in the same chain.
+  return AStartItr->second == BStartItr->second;
+}
+
+unsigned
+MachOAtomGraphBuilder::getPointerSize(const object::MachOObjectFile &Obj) {
+  return Obj.is64Bit() ? 8 : 4;
+}
+
+support::endianness
+MachOAtomGraphBuilder::getEndianness(const object::MachOObjectFile &Obj) {
+  return Obj.isLittleEndian() ? support::little : support::big;
+}
+
+MachOAtomGraphBuilder::MachOSection &MachOAtomGraphBuilder::getCommonSection() {
+  if (!CommonSymbolsSection) {
+    auto Prot = static_cast<sys::Memory::ProtectionFlags>(
+        sys::Memory::MF_READ | sys::Memory::MF_WRITE);
+    auto &GenericSection = G->createSection("<common>", 1, Prot, true);
+    CommonSymbolsSection = MachOSection(GenericSection);
+  }
+  return *CommonSymbolsSection;
+}
+
+Error MachOAtomGraphBuilder::parseSections() {
+  for (auto &SecRef : Obj.sections()) {
+    assert((SecRef.getAlignment() <= std::numeric_limits<uint32_t>::max()) &&
+           "Section alignment does not fit in 32 bits");
+
+    StringRef Name;
+    if (auto EC = SecRef.getName(Name))
+      return errorCodeToError(EC);
+
+    unsigned SectionIndex = SecRef.getIndex() + 1;
+
+    uint32_t Align = SecRef.getAlignment();
+    if (!isPowerOf2_32(Align))
+      return make_error<JITLinkError>("Section " + Name +
+                                      " has non-power-of-2 "
+                                      "alignment");
+
+    // FIXME: Get real section permissions
+    // How, exactly, on MachO?
+    sys::Memory::ProtectionFlags Prot;
+    if (SecRef.isText())
+      Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+                                                       sys::Memory::MF_EXEC);
+    else
+      Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+                                                       sys::Memory::MF_WRITE);
+
+    auto &GenericSection = G->createSection(Name, Align, Prot, SecRef.isBSS());
+
+    LLVM_DEBUG({
+      dbgs() << "Adding section " << Name << ": "
+             << format("0x%016" PRIx64, SecRef.getAddress())
+             << ", align: " << SecRef.getAlignment() << "\n";
+    });
+
+    assert(!Sections.count(SectionIndex) && "Section index already in use");
+
+    auto &MachOSec =
+        Sections
+            .try_emplace(SectionIndex, GenericSection, SecRef.getAddress(),
+                         SecRef.getAlignment())
+            .first->second;
+
+    if (!SecRef.isVirtual()) {
+      // If this section has content then record it.
+      Expected<StringRef> Content = SecRef.getContents();
+      if (!Content)
+        return Content.takeError();
+      if (Content->size() != SecRef.getSize())
+        return make_error<JITLinkError>("Section content size does not match "
+                                        "declared size for " +
+                                        Name);
+      MachOSec.setContent(*Content);
+    } else {
+      // If this is a zero-fill section then just record the size.
+      MachOSec.setZeroFill(SecRef.getSize());
+    }
+
+    uint32_t SectionFlags =
+        Obj.is64Bit() ? Obj.getSection64(SecRef.getRawDataRefImpl()).flags
+                      : Obj.getSection(SecRef.getRawDataRefImpl()).flags;
+
+    MachOSec.setNoDeadStrip(SectionFlags & MachO::S_ATTR_NO_DEAD_STRIP);
+  }
+
+  return Error::success();
+}
+
+// Adds atoms with identified start addresses (but not lengths) for all named
+// atoms.
+// Also, for every section that contains named atoms, but does not have an
+// atom at offset zero of that section, constructs an anonymous atom covering
+// that range.
+Error MachOAtomGraphBuilder::addNonCustomAtoms() {
+  using AddrToAtomMap = std::map<JITTargetAddress, DefinedAtom *>;
+  DenseMap<MachOSection *, AddrToAtomMap> SecToAtoms;
+
+  DenseMap<MachOSection *, unsigned> FirstOrdinal;
+  std::vector<DefinedAtom *> AltEntryAtoms;
+
+  DenseSet<StringRef> ProcessedSymbols; // Used to check for duplicate defs.
+
+  for (auto SymI = Obj.symbol_begin(), SymE = Obj.symbol_end(); SymI != SymE;
+       ++SymI) {
+    object::SymbolRef Sym(SymI->getRawDataRefImpl(), &Obj);
+
+    auto Name = Sym.getName();
+    if (!Name)
+      return Name.takeError();
+
+    // Bail out on duplicate definitions: There should never be more than one
+    // definition for a symbol in a given object file.
+    if (ProcessedSymbols.count(*Name))
+      return make_error<JITLinkError>("Duplicate definition within object: " +
+                                      *Name);
+    else
+      ProcessedSymbols.insert(*Name);
+
+    auto Addr = Sym.getAddress();
+    if (!Addr)
+      return Addr.takeError();
+
+    auto SymType = Sym.getType();
+    if (!SymType)
+      return SymType.takeError();
+
+    auto Flags = Sym.getFlags();
+
+    if (Flags & object::SymbolRef::SF_Undefined) {
+      LLVM_DEBUG(dbgs() << "Adding undef atom \"" << *Name << "\"\n");
+      G->addExternalAtom(*Name);
+      continue;
+    } else if (Flags & object::SymbolRef::SF_Absolute) {
+      LLVM_DEBUG(dbgs() << "Adding absolute \"" << *Name << "\" addr: "
+                        << format("0x%016" PRIx64, *Addr) << "\n");
+      auto &A = G->addAbsoluteAtom(*Name, *Addr);
+      A.setGlobal(Flags & object::SymbolRef::SF_Global);
+      A.setExported(Flags & object::SymbolRef::SF_Exported);
+      A.setWeak(Flags & object::SymbolRef::SF_Weak);
+      continue;
+    } else if (Flags & object::SymbolRef::SF_Common) {
+      LLVM_DEBUG({
+        dbgs() << "Adding common \"" << *Name
+               << "\" addr: " << format("0x%016" PRIx64, *Addr) << "\n";
+      });
+      auto &A =
+          G->addCommonAtom(getCommonSection().getGenericSection(), *Name, *Addr,
+                           std::max(Sym.getAlignment(), 1U),
+                           Obj.getCommonSymbolSize(Sym.getRawDataRefImpl()));
+      A.setGlobal(Flags & object::SymbolRef::SF_Global);
+      A.setExported(Flags & object::SymbolRef::SF_Exported);
+      continue;
+    }
+
+    LLVM_DEBUG(dbgs() << "Adding defined atom \"" << *Name << "\"\n");
+
+    // This atom is neither undefined nor absolute, so it must be defined in
+    // this object. Get its section index.
+    auto SecItr = Sym.getSection();
+    if (!SecItr)
+      return SecItr.takeError();
+
+    uint64_t SectionIndex = (*SecItr)->getIndex() + 1;
+
+    LLVM_DEBUG(dbgs() << "  to section index " << SectionIndex << "\n");
+
+    auto SecByIndexItr = Sections.find(SectionIndex);
+    if (SecByIndexItr == Sections.end())
+      return make_error<JITLinkError>("Unrecognized section index in macho");
+
+    auto &Sec = SecByIndexItr->second;
+
+    auto &DA = G->addDefinedAtom(Sec.getGenericSection(), *Name, *Addr,
+                                 std::max(Sym.getAlignment(), 1U));
+
+    DA.setGlobal(Flags & object::SymbolRef::SF_Global);
+    DA.setExported(Flags & object::SymbolRef::SF_Exported);
+    DA.setWeak(Flags & object::SymbolRef::SF_Weak);
+
+    DA.setCallable(*SymType & object::SymbolRef::ST_Function);
+
+    // Check NDesc flags.
+    {
+      uint16_t NDesc = 0;
+      if (Obj.is64Bit())
+        NDesc = Obj.getSymbol64TableEntry(SymI->getRawDataRefImpl()).n_desc;
+      else
+        NDesc = Obj.getSymbolTableEntry(SymI->getRawDataRefImpl()).n_desc;
+
+      // Record atom for alt-entry post-processing (where the layout-next
+      // constraints will be added).
+      if (NDesc & MachO::N_ALT_ENTRY)
+        AltEntryAtoms.push_back(&DA);
+
+      // If this atom has a no-dead-strip attr attached then mark it live.
+      if (NDesc & MachO::N_NO_DEAD_STRIP)
+        DA.setLive(true);
+    }
+
+    LLVM_DEBUG({
+      dbgs() << "  Added " << *Name
+             << " addr: " << format("0x%016" PRIx64, *Addr)
+             << ", align: " << DA.getAlignment()
+             << ", section: " << Sec.getGenericSection().getName() << "\n";
+    });
+
+    auto &SecAtoms = SecToAtoms[&Sec];
+    SecAtoms[DA.getAddress() - Sec.getAddress()] = &DA;
+  }
+
+  // Add anonymous atoms.
+  for (auto &KV : Sections) {
+    auto &S = KV.second;
+
+    // Skip empty sections.
+    if (S.empty())
+      continue;
+
+    // Skip sections with custom handling.
+    if (CustomAtomizeFunctions.count(S.getName()))
+      continue;
+
+    auto SAI = SecToAtoms.find(&S);
+
+    // If S is not in the SecToAtoms map then it contained no named atom. Add
+    // one anonymous atom to cover the whole section.
+    if (SAI == SecToAtoms.end()) {
+      SecToAtoms[&S][0] = &G->addAnonymousAtom(
+          S.getGenericSection(), S.getAddress(), S.getAlignment());
+      continue;
+    }
+
+    // Otherwise, check whether this section had an atom covering offset zero.
+    // If not, add one.
+    auto &SecAtoms = SAI->second;
+    if (!SecAtoms.count(0))
+      SecAtoms[0] = &G->addAnonymousAtom(S.getGenericSection(), S.getAddress(),
+                                         S.getAlignment());
+  }
+
+  LLVM_DEBUG(dbgs() << "MachOGraphBuilder setting atom content\n");
+
+  // Set atom contents and any section-based flags.
+  for (auto &KV : SecToAtoms) {
+    auto &S = *KV.first;
+    auto &SecAtoms = KV.second;
+
+    // Iterate the atoms in reverse order and set up their contents.
+    JITTargetAddress LastAtomAddr = S.getSize();
+    for (auto I = SecAtoms.rbegin(), E = SecAtoms.rend(); I != E; ++I) {
+      auto Offset = I->first;
+      auto &A = *I->second;
+      LLVM_DEBUG({
+        dbgs() << "  " << A << " to [ " << S.getAddress() + Offset << " .. "
+               << S.getAddress() + LastAtomAddr << " ]\n";
+      });
+
+      if (S.isZeroFill())
+        A.setZeroFill(LastAtomAddr - Offset);
+      else
+        A.setContent(S.getContent().substr(Offset, LastAtomAddr - Offset));
+
+      // If the section has no-dead-strip set then mark the atom as live.
+      if (S.isNoDeadStrip())
+        A.setLive(true);
+
+      LastAtomAddr = Offset;
+    }
+  }
+
+  LLVM_DEBUG(dbgs() << "Adding alt-entry starts\n");
+
+  // Sort alt-entry atoms by address in ascending order.
+  llvm::sort(AltEntryAtoms.begin(), AltEntryAtoms.end(),
+             [](const DefinedAtom *LHS, const DefinedAtom *RHS) {
+               return LHS->getAddress() < RHS->getAddress();
+             });
+
+  // Process alt-entry atoms in address order to build the table of alt-entry
+  // atoms to alt-entry chain starts.
+  for (auto *DA : AltEntryAtoms) {
+    assert(!AltEntryStarts.count(DA) && "Duplicate entry in AltEntryStarts");
+
+    // DA is an alt-entry atom. Look for the predecessor atom that it is locked
+    // to, bailing out if we do not find one.
+    auto AltEntryPred = G->findAtomByAddress(DA->getAddress() - 1);
+    if (!AltEntryPred)
+      return AltEntryPred.takeError();
+
+    // Add a LayoutNext edge from the predecessor to this atom.
+    AltEntryPred->setLayoutNext(*DA);
+
+    // Check to see whether the predecessor itself is an alt-entry atom.
+    auto AltEntryStartItr = AltEntryStarts.find(&*AltEntryPred);
+    if (AltEntryStartItr != AltEntryStarts.end()) {
+      // If the predecessor was an alt-entry atom then re-use its value.
+      LLVM_DEBUG({
+        dbgs() << "  " << *DA << " -> " << *AltEntryStartItr->second
+               << " (based on existing entry for " << *AltEntryPred << ")\n";
+      });
+      AltEntryStarts[DA] = AltEntryStartItr->second;
+    } else {
+      // If the predecessor does not have an entry then add an entry for this
+      // atom (i.e. the alt_entry atom) and a self-reference entry for the
+      /// predecessory atom that is the start of this chain.
+      LLVM_DEBUG({
+        dbgs() << "  " << *AltEntryPred << " -> " << *AltEntryPred << "\n"
+               << "  " << *DA << " -> " << *AltEntryPred << "\n";
+      });
+      AltEntryStarts[&*AltEntryPred] = &*AltEntryPred;
+      AltEntryStarts[DA] = &*AltEntryPred;
+    }
+  }
+
+  return Error::success();
+}
+
+Error MachOAtomGraphBuilder::addAtoms() {
+  // Add all named atoms.
+  if (auto Err = addNonCustomAtoms())
+    return Err;
+
+  // Process special sections.
+  for (auto &KV : Sections) {
+    auto &S = KV.second;
+    auto HI = CustomAtomizeFunctions.find(S.getGenericSection().getName());
+    if (HI != CustomAtomizeFunctions.end()) {
+      auto &Atomize = HI->second;
+      if (auto Err = Atomize(S))
+        return Err;
+    }
+  }
+
+  return Error::success();
+}
+
+} // end namespace jitlink
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.h b/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.h
new file mode 100644
index 000000000000..72d441b24d06
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.h
@@ -0,0 +1,138 @@
+//===----- MachOAtomGraphBuilder.h - MachO AtomGraph builder ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic MachO AtomGraph building code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_EXECUTIONENGINE_JITLINK_MACHOATOMGRAPHBUILDER_H
+#define LIB_EXECUTIONENGINE_JITLINK_MACHOATOMGRAPHBUILDER_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+#include "JITLinkGeneric.h"
+
+#include "llvm/Object/MachO.h"
+
+namespace llvm {
+namespace jitlink {
+
+class MachOAtomGraphBuilder {
+public:
+  virtual ~MachOAtomGraphBuilder();
+  Expected<std::unique_ptr<AtomGraph>> buildGraph();
+
+protected:
+  using OffsetToAtomMap = std::map<JITTargetAddress, DefinedAtom *>;
+
+  class MachOSection {
+  public:
+    MachOSection() = default;
+
+    /// Create a MachO section with the given address and alignment.
+    MachOSection(Section &GenericSection, JITTargetAddress Address,
+                 unsigned Alignment)
+        : Address(Address), GenericSection(&GenericSection),
+          Alignment(Alignment) {}
+
+    /// Create a section without address, content or size (used for common
+    /// symbol sections).
+    MachOSection(Section &GenericSection) : GenericSection(&GenericSection) {}
+
+    Section &getGenericSection() const {
+      assert(GenericSection && "Section is null");
+      return *GenericSection;
+    }
+
+    StringRef getName() const {
+      assert(GenericSection && "No generic section attached");
+      return GenericSection->getName();
+    }
+
+    MachOSection &setContent(StringRef Content) {
+      assert(!ContentPtr && !Size && "Content/zeroFill already set");
+      ContentPtr = Content.data();
+      Size = Content.size();
+      return *this;
+    }
+
+    MachOSection &setZeroFill(uint64_t Size) {
+      assert(!ContentPtr && !this->Size && "Content/zeroFill already set");
+      this->Size = Size;
+      return *this;
+    }
+
+    bool isZeroFill() const { return !ContentPtr; }
+
+    bool empty() const { return getSize() == 0; }
+
+    size_t getSize() const { return Size; }
+
+    StringRef getContent() const {
+      assert(ContentPtr && "getContent() called on zero-fill section");
+      return {ContentPtr, static_cast<size_t>(Size)};
+    }
+
+    JITTargetAddress getAddress() const { return Address; }
+
+    unsigned getAlignment() const { return Alignment; }
+
+    MachOSection &setNoDeadStrip(bool NoDeadStrip) {
+      this->NoDeadStrip = NoDeadStrip;
+      return *this;
+    }
+
+    bool isNoDeadStrip() const { return NoDeadStrip; }
+
+  private:
+    JITTargetAddress Address = 0;
+    Section *GenericSection = nullptr;
+    const char *ContentPtr = nullptr;
+    uint64_t Size = 0;
+    unsigned Alignment = 0;
+    bool NoDeadStrip = false;
+  };
+
+  using CustomAtomizeFunction = std::function<Error(MachOSection &S)>;
+
+  MachOAtomGraphBuilder(const object::MachOObjectFile &Obj);
+
+  AtomGraph &getGraph() const { return *G; }
+
+  const object::MachOObjectFile &getObject() const { return Obj; }
+
+  void addCustomAtomizer(StringRef SectionName, CustomAtomizeFunction Atomizer);
+
+  virtual Error addRelocations() = 0;
+
+  /// Returns true if Atom A and Atom B are at a fixed offset from one another
+  /// (i.e. if they're part of the same alt-entry chain).
+  bool areLayoutLocked(const Atom &A, const Atom &B);
+
+private:
+  static unsigned getPointerSize(const object::MachOObjectFile &Obj);
+  static support::endianness getEndianness(const object::MachOObjectFile &Obj);
+
+  MachOSection &getCommonSection();
+
+  Error parseSections();
+  Error addNonCustomAtoms();
+  Error addAtoms();
+
+  const object::MachOObjectFile &Obj;
+  std::unique_ptr<AtomGraph> G;
+  DenseMap<const DefinedAtom *, const DefinedAtom *> AltEntryStarts;
+  DenseMap<unsigned, MachOSection> Sections;
+  StringMap<CustomAtomizeFunction> CustomAtomizeFunctions;
+  Optional<MachOSection> CommonSymbolsSection;
+};
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LIB_EXECUTIONENGINE_JITLINK_MACHOATOMGRAPHBUILDER_H
diff --git a/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
new file mode 100644
index 000000000000..4010678c6d33
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
@@ -0,0 +1,608 @@
+//===---- MachO_x86_64.cpp -JIT linker implementation for MachO/x86-64 ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// MachO/x86-64 jit-link implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/MachO_x86_64.h"
+
+#include "BasicGOTAndStubsBuilder.h"
+#include "MachOAtomGraphBuilder.h"
+
+#define DEBUG_TYPE "jitlink"
+
+using namespace llvm;
+using namespace llvm::jitlink;
+using namespace llvm::jitlink::MachO_x86_64_Edges;
+
+namespace {
+
+class MachOAtomGraphBuilder_x86_64 : public MachOAtomGraphBuilder {
+public:
+  MachOAtomGraphBuilder_x86_64(const object::MachOObjectFile &Obj)
+      : MachOAtomGraphBuilder(Obj),
+        NumSymbols(Obj.getSymtabLoadCommand().nsyms) {
+    addCustomAtomizer("__eh_frame", [this](MachOSection &EHFrameSection) {
+      return addEHFrame(getGraph(), EHFrameSection.getGenericSection(),
+                        EHFrameSection.getContent(),
+                        EHFrameSection.getAddress(), NegDelta32, Delta64);
+    });
+  }
+
+private:
+  static Expected<MachOX86RelocationKind>
+  getRelocationKind(const MachO::relocation_info &RI) {
+    switch (RI.r_type) {
+    case MachO::X86_64_RELOC_UNSIGNED:
+      if (!RI.r_pcrel && RI.r_length == 3)
+        return RI.r_extern ? Pointer64 : Pointer64Anon;
+      break;
+    case MachO::X86_64_RELOC_SIGNED:
+      if (RI.r_pcrel && RI.r_length == 2)
+        return RI.r_extern ? PCRel32 : PCRel32Anon;
+      break;
+    case MachO::X86_64_RELOC_BRANCH:
+      if (RI.r_pcrel && RI.r_extern && RI.r_length == 2)
+        return Branch32;
+      break;
+    case MachO::X86_64_RELOC_GOT_LOAD:
+      if (RI.r_pcrel && RI.r_extern && RI.r_length == 2)
+        return PCRel32GOTLoad;
+      break;
+    case MachO::X86_64_RELOC_GOT:
+      if (RI.r_pcrel && RI.r_extern && RI.r_length == 2)
+        return PCRel32GOT;
+      break;
+    case MachO::X86_64_RELOC_SUBTRACTOR:
+      // SUBTRACTOR must be non-pc-rel, extern, with length 2 or 3.
+      // Initially represent SUBTRACTOR relocations with 'Delta<W>'. They may
+      // be turned into NegDelta<W> by parsePairRelocation.
+      if (!RI.r_pcrel && RI.r_extern) {
+        if (RI.r_length == 2)
+          return Delta32;
+        else if (RI.r_length == 3)
+          return Delta64;
+      }
+      break;
+    case MachO::X86_64_RELOC_SIGNED_1:
+      if (RI.r_pcrel && RI.r_length == 2)
+        return RI.r_extern ? PCRel32Minus1 : PCRel32Minus1Anon;
+      break;
+    case MachO::X86_64_RELOC_SIGNED_2:
+      if (RI.r_pcrel && RI.r_length == 2)
+        return RI.r_extern ? PCRel32Minus2 : PCRel32Minus2Anon;
+      break;
+    case MachO::X86_64_RELOC_SIGNED_4:
+      if (RI.r_pcrel && RI.r_length == 2)
+        return RI.r_extern ? PCRel32Minus4 : PCRel32Minus4Anon;
+      break;
+    case MachO::X86_64_RELOC_TLV:
+      if (RI.r_pcrel && RI.r_extern && RI.r_length == 2)
+        return PCRel32TLV;
+      break;
+    }
+
+    return make_error<JITLinkError>(
+        "Unsupported x86-64 relocation: address=" +
+        formatv("{0:x8}", RI.r_address) +
+        ", symbolnum=" + formatv("{0:x6}", RI.r_symbolnum) +
+        ", kind=" + formatv("{0:x1}", RI.r_type) +
+        ", pc_rel=" + (RI.r_pcrel ? "true" : "false") +
+        ", extern= " + (RI.r_extern ? "true" : "false") +
+        ", length=" + formatv("{0:d}", RI.r_length));
+  }
+
+  Expected<Atom &> findAtomBySymbolIndex(const MachO::relocation_info &RI) {
+    auto &Obj = getObject();
+    if (RI.r_symbolnum >= NumSymbols)
+      return make_error<JITLinkError>("Symbol index out of range");
+    auto SymI = Obj.getSymbolByIndex(RI.r_symbolnum);
+    auto Name = SymI->getName();
+    if (!Name)
+      return Name.takeError();
+    return getGraph().getAtomByName(*Name);
+  }
+
+  MachO::relocation_info
+  getRelocationInfo(const object::relocation_iterator RelItr) {
+    MachO::any_relocation_info ARI =
+        getObject().getRelocation(RelItr->getRawDataRefImpl());
+    MachO::relocation_info RI;
+    memcpy(&RI, &ARI, sizeof(MachO::relocation_info));
+    return RI;
+  }
+
+  using PairRelocInfo = std::tuple<MachOX86RelocationKind, Atom *, uint64_t>;
+
+  // Parses paired SUBTRACTOR/UNSIGNED relocations and, on success,
+  // returns the edge kind and addend to be used.
+  Expected<PairRelocInfo>
+  parsePairRelocation(DefinedAtom &AtomToFix, Edge::Kind SubtractorKind,
+                      const MachO::relocation_info &SubRI,
+                      JITTargetAddress FixupAddress, const char *FixupContent,
+                      object::relocation_iterator &UnsignedRelItr,
+                      object::relocation_iterator &RelEnd) {
+    using namespace support;
+
+    assert(((SubtractorKind == Delta32 && SubRI.r_length == 2) ||
+            (SubtractorKind == Delta64 && SubRI.r_length == 3)) &&
+           "Subtractor kind should match length");
+    assert(SubRI.r_extern && "SUBTRACTOR reloc symbol should be extern");
+    assert(!SubRI.r_pcrel && "SUBTRACTOR reloc should not be PCRel");
+
+    if (UnsignedRelItr == RelEnd)
+      return make_error<JITLinkError>("x86_64 SUBTRACTOR without paired "
+                                      "UNSIGNED relocation");
+
+    auto UnsignedRI = getRelocationInfo(UnsignedRelItr);
+
+    if (SubRI.r_address != UnsignedRI.r_address)
+      return make_error<JITLinkError>("x86_64 SUBTRACTOR and paired UNSIGNED "
+                                      "point to different addresses");
+
+    if (SubRI.r_length != UnsignedRI.r_length)
+      return make_error<JITLinkError>("length of x86_64 SUBTRACTOR and paired "
+                                      "UNSIGNED reloc must match");
+
+    auto FromAtom = findAtomBySymbolIndex(SubRI);
+    if (!FromAtom)
+      return FromAtom.takeError();
+
+    // Read the current fixup value.
+    uint64_t FixupValue = 0;
+    if (SubRI.r_length == 3)
+      FixupValue = *(const little64_t *)FixupContent;
+    else
+      FixupValue = *(const little32_t *)FixupContent;
+
+    // Find 'ToAtom' using symbol number or address, depending on whether the
+    // paired UNSIGNED relocation is extern.
+    Atom *ToAtom = nullptr;
+    if (UnsignedRI.r_extern) {
+      // Find target atom by symbol index.
+      if (auto ToAtomOrErr = findAtomBySymbolIndex(UnsignedRI))
+        ToAtom = &*ToAtomOrErr;
+      else
+        return ToAtomOrErr.takeError();
+    } else {
+      if (auto ToAtomOrErr = getGraph().findAtomByAddress(FixupValue))
+        ToAtom = &*ToAtomOrErr;
+      else
+        return ToAtomOrErr.takeError();
+      FixupValue -= ToAtom->getAddress();
+    }
+
+    MachOX86RelocationKind DeltaKind;
+    Atom *TargetAtom;
+    uint64_t Addend;
+    if (areLayoutLocked(AtomToFix, *FromAtom)) {
+      TargetAtom = ToAtom;
+      DeltaKind = (SubRI.r_length == 3) ? Delta64 : Delta32;
+      Addend = FixupValue + (FixupAddress - FromAtom->getAddress());
+      // FIXME: handle extern 'from'.
+    } else if (areLayoutLocked(AtomToFix, *ToAtom)) {
+      TargetAtom = &*FromAtom;
+      DeltaKind = (SubRI.r_length == 3) ? NegDelta64 : NegDelta32;
+      Addend = FixupValue - (FixupAddress - ToAtom->getAddress());
+    } else {
+      // AtomToFix was neither FromAtom nor ToAtom.
+      return make_error<JITLinkError>("SUBTRACTOR relocation must fix up "
+                                      "either 'A' or 'B' (or an atom in one "
+                                      "of their alt-entry groups)");
+    }
+
+    return PairRelocInfo(DeltaKind, TargetAtom, Addend);
+  }
+
+  Error addRelocations() override {
+    using namespace support;
+    auto &G = getGraph();
+    auto &Obj = getObject();
+
+    for (auto &S : Obj.sections()) {
+
+      JITTargetAddress SectionAddress = S.getAddress();
+
+      for (auto RelItr = S.relocation_begin(), RelEnd = S.relocation_end();
+           RelItr != RelEnd; ++RelItr) {
+
+        MachO::relocation_info RI = getRelocationInfo(RelItr);
+
+        // Sanity check the relocation kind.
+        auto Kind = getRelocationKind(RI);
+        if (!Kind)
+          return Kind.takeError();
+
+        // Find the address of the value to fix up.
+        JITTargetAddress FixupAddress = SectionAddress + (uint32_t)RI.r_address;
+
+        LLVM_DEBUG({
+          dbgs() << "Processing relocation at "
+                 << format("0x%016" PRIx64, FixupAddress) << "\n";
+        });
+
+        // Find the atom that the fixup points to.
+        DefinedAtom *AtomToFix = nullptr;
+        {
+          auto AtomToFixOrErr = G.findAtomByAddress(FixupAddress);
+          if (!AtomToFixOrErr)
+            return AtomToFixOrErr.takeError();
+          AtomToFix = &*AtomToFixOrErr;
+        }
+
+        if (FixupAddress + static_cast<JITTargetAddress>(1ULL << RI.r_length) >
+            AtomToFix->getAddress() + AtomToFix->getContent().size())
+          return make_error<JITLinkError>(
+              "Relocation content extends past end of fixup atom");
+
+        // Get a pointer to the fixup content.
+        const char *FixupContent = AtomToFix->getContent().data() +
+                                   (FixupAddress - AtomToFix->getAddress());
+
+        // The target atom and addend will be populated by the switch below.
+        Atom *TargetAtom = nullptr;
+        uint64_t Addend = 0;
+
+        switch (*Kind) {
+        case Branch32:
+        case PCRel32:
+        case PCRel32GOTLoad:
+        case PCRel32GOT:
+          if (auto TargetAtomOrErr = findAtomBySymbolIndex(RI))
+            TargetAtom = &*TargetAtomOrErr;
+          else
+            return TargetAtomOrErr.takeError();
+          Addend = *(const ulittle32_t *)FixupContent;
+          break;
+        case Pointer64:
+          if (auto TargetAtomOrErr = findAtomBySymbolIndex(RI))
+            TargetAtom = &*TargetAtomOrErr;
+          else
+            return TargetAtomOrErr.takeError();
+          Addend = *(const ulittle64_t *)FixupContent;
+          break;
+        case Pointer64Anon: {
+          JITTargetAddress TargetAddress = *(const ulittle64_t *)FixupContent;
+          if (auto TargetAtomOrErr = G.findAtomByAddress(TargetAddress))
+            TargetAtom = &*TargetAtomOrErr;
+          else
+            return TargetAtomOrErr.takeError();
+          Addend = TargetAddress - TargetAtom->getAddress();
+          break;
+        }
+        case PCRel32Minus1:
+        case PCRel32Minus2:
+        case PCRel32Minus4:
+          if (auto TargetAtomOrErr = findAtomBySymbolIndex(RI))
+            TargetAtom = &*TargetAtomOrErr;
+          else
+            return TargetAtomOrErr.takeError();
+          Addend = *(const ulittle32_t *)FixupContent +
+                   (1 << (*Kind - PCRel32Minus1));
+          break;
+        case PCRel32Anon: {
+          JITTargetAddress TargetAddress =
+              FixupAddress + 4 + *(const ulittle32_t *)FixupContent;
+          if (auto TargetAtomOrErr = G.findAtomByAddress(TargetAddress))
+            TargetAtom = &*TargetAtomOrErr;
+          else
+            return TargetAtomOrErr.takeError();
+          Addend = TargetAddress - TargetAtom->getAddress();
+          break;
+        }
+        case PCRel32Minus1Anon:
+        case PCRel32Minus2Anon:
+        case PCRel32Minus4Anon: {
+          JITTargetAddress Delta =
+              static_cast<JITTargetAddress>(1ULL << (*Kind - PCRel32Minus1Anon));
+          JITTargetAddress TargetAddress =
+              FixupAddress + 4 + Delta + *(const ulittle32_t *)FixupContent;
+          if (auto TargetAtomOrErr = G.findAtomByAddress(TargetAddress))
+            TargetAtom = &*TargetAtomOrErr;
+          else
+            return TargetAtomOrErr.takeError();
+          Addend = TargetAddress - TargetAtom->getAddress();
+          break;
+        }
+        case Delta32:
+        case Delta64: {
+          // We use Delta32/Delta64 to represent SUBTRACTOR relocations.
+          // parsePairRelocation handles the paired reloc, and returns the
+          // edge kind to be used (either Delta32/Delta64, or
+          // NegDelta32/NegDelta64, depending on the direction of the
+          // subtraction) along with the addend.
+          auto PairInfo =
+              parsePairRelocation(*AtomToFix, *Kind, RI, FixupAddress,
+                                  FixupContent, ++RelItr, RelEnd);
+          if (!PairInfo)
+            return PairInfo.takeError();
+          std::tie(*Kind, TargetAtom, Addend) = *PairInfo;
+          assert(TargetAtom && "No target atom from parsePairRelocation?");
+          break;
+        }
+        default:
+          llvm_unreachable("Special relocation kind should not appear in "
+                           "mach-o file");
+        }
+
+        LLVM_DEBUG({
+          Edge GE(*Kind, FixupAddress - AtomToFix->getAddress(), *TargetAtom,
+                  Addend);
+          printEdge(dbgs(), *AtomToFix, GE,
+                    getMachOX86RelocationKindName(*Kind));
+          dbgs() << "\n";
+        });
+        AtomToFix->addEdge(*Kind, FixupAddress - AtomToFix->getAddress(),
+                           *TargetAtom, Addend);
+      }
+    }
+    return Error::success();
+  }
+
+  unsigned NumSymbols = 0;
+};
+
+class MachO_x86_64_GOTAndStubsBuilder
+    : public BasicGOTAndStubsBuilder<MachO_x86_64_GOTAndStubsBuilder> {
+public:
+  MachO_x86_64_GOTAndStubsBuilder(AtomGraph &G)
+      : BasicGOTAndStubsBuilder<MachO_x86_64_GOTAndStubsBuilder>(G) {}
+
+  bool isGOTEdge(Edge &E) const {
+    return E.getKind() == PCRel32GOT || E.getKind() == PCRel32GOTLoad;
+  }
+
+  DefinedAtom &createGOTEntry(Atom &Target) {
+    auto &GOTEntryAtom = G.addAnonymousAtom(getGOTSection(), 0x0, 8);
+    GOTEntryAtom.setContent(
+        StringRef(reinterpret_cast<const char *>(NullGOTEntryContent), 8));
+    GOTEntryAtom.addEdge(Pointer64, 0, Target, 0);
+    return GOTEntryAtom;
+  }
+
+  void fixGOTEdge(Edge &E, Atom &GOTEntry) {
+    assert((E.getKind() == PCRel32GOT || E.getKind() == PCRel32GOTLoad) &&
+           "Not a GOT edge?");
+    E.setKind(PCRel32);
+    E.setTarget(GOTEntry);
+    // Leave the edge addend as-is.
+  }
+
+  bool isExternalBranchEdge(Edge &E) {
+    return E.getKind() == Branch32 && !E.getTarget().isDefined();
+  }
+
+  DefinedAtom &createStub(Atom &Target) {
+    auto &StubAtom = G.addAnonymousAtom(getStubsSection(), 0x0, 2);
+    StubAtom.setContent(
+        StringRef(reinterpret_cast<const char *>(StubContent), 6));
+
+    // Re-use GOT entries for stub targets.
+    auto &GOTEntryAtom = getGOTEntryAtom(Target);
+    StubAtom.addEdge(PCRel32, 2, GOTEntryAtom, 0);
+
+    return StubAtom;
+  }
+
+  void fixExternalBranchEdge(Edge &E, Atom &Stub) {
+    assert(E.getKind() == Branch32 && "Not a Branch32 edge?");
+    assert(E.getAddend() == 0 && "Branch32 edge has non-zero addend?");
+    E.setTarget(Stub);
+  }
+
+private:
+  Section &getGOTSection() {
+    if (!GOTSection)
+      GOTSection = &G.createSection("$__GOT", 8, sys::Memory::MF_READ, false);
+    return *GOTSection;
+  }
+
+  Section &getStubsSection() {
+    if (!StubsSection) {
+      auto StubsProt = static_cast<sys::Memory::ProtectionFlags>(
+          sys::Memory::MF_READ | sys::Memory::MF_EXEC);
+      StubsSection = &G.createSection("$__STUBS", 8, StubsProt, false);
+    }
+    return *StubsSection;
+  }
+
+  static const uint8_t NullGOTEntryContent[8];
+  static const uint8_t StubContent[6];
+  Section *GOTSection = nullptr;
+  Section *StubsSection = nullptr;
+};
+
+const uint8_t MachO_x86_64_GOTAndStubsBuilder::NullGOTEntryContent[8] = {
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+const uint8_t MachO_x86_64_GOTAndStubsBuilder::StubContent[6] = {
+    0xFF, 0x25, 0x00, 0x00, 0x00, 0x00};
+} // namespace
+
+namespace llvm {
+namespace jitlink {
+
+class MachOJITLinker_x86_64 : public JITLinker<MachOJITLinker_x86_64> {
+  friend class JITLinker<MachOJITLinker_x86_64>;
+
+public:
+  MachOJITLinker_x86_64(std::unique_ptr<JITLinkContext> Ctx,
+                        PassConfiguration PassConfig)
+      : JITLinker(std::move(Ctx), std::move(PassConfig)) {}
+
+private:
+  StringRef getEdgeKindName(Edge::Kind R) const override {
+    return getMachOX86RelocationKindName(R);
+  }
+
+  Expected<std::unique_ptr<AtomGraph>>
+  buildGraph(MemoryBufferRef ObjBuffer) override {
+    auto MachOObj = object::ObjectFile::createMachOObjectFile(ObjBuffer);
+    if (!MachOObj)
+      return MachOObj.takeError();
+    return MachOAtomGraphBuilder_x86_64(**MachOObj).buildGraph();
+  }
+
+  static Error targetOutOfRangeError(const Atom &A, const Edge &E) {
+    std::string ErrMsg;
+    {
+      raw_string_ostream ErrStream(ErrMsg);
+      ErrStream << "Relocation target out of range: ";
+      printEdge(ErrStream, A, E, getMachOX86RelocationKindName(E.getKind()));
+      ErrStream << "\n";
+    }
+    return make_error<JITLinkError>(std::move(ErrMsg));
+  }
+
+  Error applyFixup(DefinedAtom &A, const Edge &E, char *AtomWorkingMem) const {
+    using namespace support;
+
+    char *FixupPtr = AtomWorkingMem + E.getOffset();
+    JITTargetAddress FixupAddress = A.getAddress() + E.getOffset();
+
+    switch (E.getKind()) {
+    case Branch32:
+    case PCRel32:
+    case PCRel32Anon: {
+      int64_t Value =
+          E.getTarget().getAddress() - (FixupAddress + 4) + E.getAddend();
+      if (Value < std::numeric_limits<int32_t>::min() ||
+          Value > std::numeric_limits<int32_t>::max())
+        return targetOutOfRangeError(A, E);
+      *(little32_t *)FixupPtr = Value;
+      break;
+    }
+    case Pointer64:
+    case Pointer64Anon: {
+      uint64_t Value = E.getTarget().getAddress() + E.getAddend();
+      *(ulittle64_t *)FixupPtr = Value;
+      break;
+    }
+    case PCRel32Minus1:
+    case PCRel32Minus2:
+    case PCRel32Minus4: {
+      int Delta = 4 + (1 << (E.getKind() - PCRel32Minus1));
+      int64_t Value =
+          E.getTarget().getAddress() - (FixupAddress + Delta) + E.getAddend();
+      if (Value < std::numeric_limits<int32_t>::min() ||
+          Value > std::numeric_limits<int32_t>::max())
+        return targetOutOfRangeError(A, E);
+      *(little32_t *)FixupPtr = Value;
+      break;
+    }
+    case PCRel32Minus1Anon:
+    case PCRel32Minus2Anon:
+    case PCRel32Minus4Anon: {
+      int Delta = 4 + (1 << (E.getKind() - PCRel32Minus1Anon));
+      int64_t Value =
+          E.getTarget().getAddress() - (FixupAddress + Delta) + E.getAddend();
+      if (Value < std::numeric_limits<int32_t>::min() ||
+          Value > std::numeric_limits<int32_t>::max())
+        return targetOutOfRangeError(A, E);
+      *(little32_t *)FixupPtr = Value;
+      break;
+    }
+    case Delta32:
+    case Delta64:
+    case NegDelta32:
+    case NegDelta64: {
+      int64_t Value;
+      if (E.getKind() == Delta32 || E.getKind() == Delta64)
+        Value = E.getTarget().getAddress() - FixupAddress + E.getAddend();
+      else
+        Value = FixupAddress - E.getTarget().getAddress() + E.getAddend();
+
+      if (E.getKind() == Delta32 || E.getKind() == NegDelta32) {
+        if (Value < std::numeric_limits<int32_t>::min() ||
+            Value > std::numeric_limits<int32_t>::max())
+          return targetOutOfRangeError(A, E);
+        *(little32_t *)FixupPtr = Value;
+      } else
+        *(little64_t *)FixupPtr = Value;
+      break;
+    }
+    default:
+      llvm_unreachable("Unrecognized edge kind");
+    }
+
+    return Error::success();
+  }
+
+  uint64_t NullValue = 0;
+};
+
+void jitLink_MachO_x86_64(std::unique_ptr<JITLinkContext> Ctx) {
+  PassConfiguration Config;
+  Triple TT("x86_64-apple-macosx");
+
+  if (Ctx->shouldAddDefaultTargetPasses(TT)) {
+    // Add a mark-live pass.
+    if (auto MarkLive = Ctx->getMarkLivePass(TT))
+      Config.PrePrunePasses.push_back(std::move(MarkLive));
+    else
+      Config.PrePrunePasses.push_back(markAllAtomsLive);
+
+    // Add an in-place GOT/Stubs pass.
+    Config.PostPrunePasses.push_back([](AtomGraph &G) -> Error {
+      MachO_x86_64_GOTAndStubsBuilder(G).run();
+      return Error::success();
+    });
+  }
+
+  if (auto Err = Ctx->modifyPassConfig(TT, Config))
+    return Ctx->notifyFailed(std::move(Err));
+
+  // Construct a JITLinker and run the link function.
+  MachOJITLinker_x86_64::link(std::move(Ctx), std::move(Config));
+}
+
+StringRef getMachOX86RelocationKindName(Edge::Kind R) {
+  switch (R) {
+  case Branch32:
+    return "Branch32";
+  case Pointer64:
+    return "Pointer64";
+  case Pointer64Anon:
+    return "Pointer64Anon";
+  case PCRel32:
+    return "PCRel32";
+  case PCRel32Minus1:
+    return "PCRel32Minus1";
+  case PCRel32Minus2:
+    return "PCRel32Minus2";
+  case PCRel32Minus4:
+    return "PCRel32Minus4";
+  case PCRel32Anon:
+    return "PCRel32Anon";
+  case PCRel32Minus1Anon:
+    return "PCRel32Minus1Anon";
+  case PCRel32Minus2Anon:
+    return "PCRel32Minus2Anon";
+  case PCRel32Minus4Anon:
+    return "PCRel32Minus4Anon";
+  case PCRel32GOTLoad:
+    return "PCRel32GOTLoad";
+  case PCRel32GOT:
+    return "PCRel32GOT";
+  case PCRel32TLV:
+    return "PCRel32TLV";
+  case Delta32:
+    return "Delta32";
+  case Delta64:
+    return "Delta64";
+  case NegDelta32:
+    return "NegDelta32";
+  case NegDelta64:
+    return "NegDelta64";
+  default:
+    return getGenericEdgeKindName(static_cast<Edge::Kind>(R));
+  }
+}
+
+} // end namespace jitlink
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index ffc6707e1488..08815b7a80ae 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -1,9 +1,8 @@
 //===-- MCJIT.cpp - MC-based Just-in-Time Compiler ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index 1119e138720f..77097fc0d17e 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -1,9 +1,8 @@
 //===-- MCJIT.h - Class definition for the MCJIT ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index 21af6b585c41..2ad9d24555f3 100644
--- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -1,9 +1,8 @@
 //===-- OProfileJITEventListener.cpp - Tell OProfile about JITted code ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
index b473ac3faf4c..1a2667736926 100644
--- a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
@@ -1,9 +1,8 @@
 //===-- OProfileWrapper.cpp - OProfile JIT API Wrapper implementation -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
index 241eb3600da7..99bf53bc3afa 100644
--- a/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
+++ b/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
@@ -1,9 +1,8 @@
 //===----- CompileOnDemandLayer.cpp - Lazily emit IR on first call --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/ExecutionEngine/Orc/CompileUtils.cpp b/lib/ExecutionEngine/Orc/CompileUtils.cpp
new file mode 100644
index 000000000000..d46b6fcf9a5f
--- /dev/null
+++ b/lib/ExecutionEngine/Orc/CompileUtils.cpp
@@ -0,0 +1,86 @@
+//===------ CompileUtils.cpp - Utilities for compiling IR in the JIT ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ExecutionEngine/ObjectCache.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SmallVectorMemoryBuffer.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <algorithm>
+
+namespace llvm {
+namespace orc {
+
+/// Compile a Module to an ObjectFile.
+SimpleCompiler::CompileResult SimpleCompiler::operator()(Module &M) {
+  CompileResult CachedObject = tryToLoadFromObjectCache(M);
+  if (CachedObject)
+    return CachedObject;
+
+  SmallVector<char, 0> ObjBufferSV;
+
+  {
+    raw_svector_ostream ObjStream(ObjBufferSV);
+
+    legacy::PassManager PM;
+    MCContext *Ctx;
+    if (TM.addPassesToEmitMC(PM, Ctx, ObjStream))
+      llvm_unreachable("Target does not support MC emission.");
+    PM.run(M);
+  }
+
+  auto ObjBuffer = llvm::make_unique<SmallVectorMemoryBuffer>(
+      std::move(ObjBufferSV),
+      "<in memory object compiled from " + M.getModuleIdentifier() + ">");
+
+  auto Obj = object::ObjectFile::createObjectFile(ObjBuffer->getMemBufferRef());
+
+  if (Obj) {
+    notifyObjectCompiled(M, *ObjBuffer);
+    return std::move(ObjBuffer);
+  }
+
+  // TODO: Actually report errors helpfully.
+  consumeError(Obj.takeError());
+  return nullptr;
+}
+
+SimpleCompiler::CompileResult
+SimpleCompiler::tryToLoadFromObjectCache(const Module &M) {
+  if (!ObjCache)
+    return CompileResult();
+
+  return ObjCache->getObject(&M);
+}
+
+void SimpleCompiler::notifyObjectCompiled(const Module &M,
+                                          const MemoryBuffer &ObjBuffer) {
+  if (ObjCache)
+    ObjCache->notifyObjectCompiled(&M, ObjBuffer.getMemBufferRef());
+}
+
+ConcurrentIRCompiler::ConcurrentIRCompiler(JITTargetMachineBuilder JTMB,
+                                           ObjectCache *ObjCache)
+    : JTMB(std::move(JTMB)), ObjCache(ObjCache) {}
+
+std::unique_ptr<MemoryBuffer> ConcurrentIRCompiler::operator()(Module &M) {
+  auto TM = cantFail(JTMB.createTargetMachine());
+  SimpleCompiler C(*TM, ObjCache);
+  return C(M);
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/Orc/Core.cpp b/lib/ExecutionEngine/Orc/Core.cpp
index 73c0bcdf7d28..dac37e030e0c 100644
--- a/lib/ExecutionEngine/Orc/Core.cpp
+++ b/lib/ExecutionEngine/Orc/Core.cpp
@@ -1,9 +1,8 @@
 //===--- Core.cpp - Core ORC APIs (MaterializationUnit, JITDylib, etc.) ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -27,17 +26,17 @@ namespace {
 
 #ifndef NDEBUG
 
-cl::opt<bool> PrintHidden("debug-orc-print-hidden", cl::init(false),
+cl::opt<bool> PrintHidden("debug-orc-print-hidden", cl::init(true),
                           cl::desc("debug print hidden symbols defined by "
                                    "materialization units"),
                           cl::Hidden);
 
-cl::opt<bool> PrintCallable("debug-orc-print-callable", cl::init(false),
+cl::opt<bool> PrintCallable("debug-orc-print-callable", cl::init(true),
                             cl::desc("debug print callable symbols defined by "
                                      "materialization units"),
                             cl::Hidden);
 
-cl::opt<bool> PrintData("debug-orc-print-data", cl::init(false),
+cl::opt<bool> PrintData("debug-orc-print-data", cl::init(true),
                         cl::desc("debug print data symbols defined by "
                                  "materialization units"),
                         cl::Hidden);
@@ -134,8 +133,6 @@ struct PrintSymbolMapElemsMatchingCLOpts {
 namespace llvm {
 namespace orc {
 
-  SymbolStringPool::PoolMapEntry SymbolStringPtr::Tombstone(0);
-
 char FailedToMaterialize::ID = 0;
 char SymbolsNotFound::ID = 0;
 char SymbolsCouldNotBeRemoved::ID = 0;
@@ -222,6 +219,31 @@ raw_ostream &operator<<(raw_ostream &OS, const JITDylibSearchList &JDs) {
   return OS;
 }
 
+raw_ostream &operator<<(raw_ostream &OS, const SymbolAliasMap &Aliases) {
+  OS << "{";
+  for (auto &KV : Aliases)
+    OS << " " << *KV.first << ": " << KV.second.Aliasee << " "
+       << KV.second.AliasFlags;
+  OS << " }\n";
+  return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const SymbolState &S) {
+  switch (S) {
+  case SymbolState::Invalid:
+    return OS << "Invalid";
+  case SymbolState::NeverSearched:
+    return OS << "Never-Searched";
+  case SymbolState::Materializing:
+    return OS << "Materializing";
+  case SymbolState::Resolved:
+    return OS << "Resolved";
+  case SymbolState::Ready:
+    return OS << "Ready";
+  }
+  llvm_unreachable("Invalid state");
+}
+
 FailedToMaterialize::FailedToMaterialize(SymbolNameSet Symbols)
     : Symbols(std::move(Symbols)) {
   assert(!this->Symbols.empty() && "Can not fail to resolve an empty set");
@@ -262,85 +284,46 @@ void SymbolsCouldNotBeRemoved::log(raw_ostream &OS) const {
 }
 
 AsynchronousSymbolQuery::AsynchronousSymbolQuery(
-    const SymbolNameSet &Symbols, SymbolsResolvedCallback NotifySymbolsResolved,
-    SymbolsReadyCallback NotifySymbolsReady)
-    : NotifySymbolsResolved(std::move(NotifySymbolsResolved)),
-      NotifySymbolsReady(std::move(NotifySymbolsReady)) {
-  NotYetResolvedCount = NotYetReadyCount = Symbols.size();
+    const SymbolNameSet &Symbols, SymbolState RequiredState,
+    SymbolsResolvedCallback NotifyComplete)
+    : NotifyComplete(std::move(NotifyComplete)), RequiredState(RequiredState) {
+  assert(RequiredState >= SymbolState::Resolved &&
+         "Cannot query for a symbols that have not reached the resolve state "
+         "yet");
+
+  OutstandingSymbolsCount = Symbols.size();
 
   for (auto &S : Symbols)
     ResolvedSymbols[S] = nullptr;
 }
 
-void AsynchronousSymbolQuery::resolve(const SymbolStringPtr &Name,
-                                      JITEvaluatedSymbol Sym) {
+void AsynchronousSymbolQuery::notifySymbolMetRequiredState(
+    const SymbolStringPtr &Name, JITEvaluatedSymbol Sym) {
   auto I = ResolvedSymbols.find(Name);
   assert(I != ResolvedSymbols.end() &&
          "Resolving symbol outside the requested set");
   assert(I->second.getAddress() == 0 && "Redundantly resolving symbol Name");
   I->second = std::move(Sym);
-  --NotYetResolvedCount;
-}
-
-void AsynchronousSymbolQuery::handleFullyResolved() {
-  assert(NotYetResolvedCount == 0 && "Not fully resolved?");
-
-  if (!NotifySymbolsResolved) {
-    // handleFullyResolved may be called by handleFullyReady (see comments in
-    // that method), in which case this is a no-op, so bail out.
-    assert(!NotifySymbolsReady &&
-           "NotifySymbolsResolved already called or an error occurred");
-    return;
-  }
-
-  auto TmpNotifySymbolsResolved = std::move(NotifySymbolsResolved);
-  NotifySymbolsResolved = SymbolsResolvedCallback();
-  TmpNotifySymbolsResolved(std::move(ResolvedSymbols));
-}
-
-void AsynchronousSymbolQuery::notifySymbolReady() {
-  assert(NotYetReadyCount != 0 && "All symbols already emitted");
-  --NotYetReadyCount;
+  --OutstandingSymbolsCount;
 }
 
-void AsynchronousSymbolQuery::handleFullyReady() {
-  assert(NotifySymbolsReady &&
-         "NotifySymbolsReady already called or an error occurred");
+void AsynchronousSymbolQuery::handleComplete() {
+  assert(OutstandingSymbolsCount == 0 &&
+         "Symbols remain, handleComplete called prematurely");
 
-  auto TmpNotifySymbolsReady = std::move(NotifySymbolsReady);
-  NotifySymbolsReady = SymbolsReadyCallback();
-
-  if (NotYetResolvedCount == 0 && NotifySymbolsResolved) {
-    // The NotifyResolved callback of one query must have caused this query to
-    // become ready (i.e. there is still a handleFullyResolved callback waiting
-    // to be made back up the stack). Fold the handleFullyResolved call into
-    // this one before proceeding. This will cause the call further up the
-    // stack to become a no-op.
-    handleFullyResolved();
-  }
-
-  assert(QueryRegistrations.empty() &&
-         "Query is still registered with some symbols");
-  assert(!NotifySymbolsResolved && "Resolution not applied yet");
-  TmpNotifySymbolsReady(Error::success());
+  auto TmpNotifyComplete = std::move(NotifyComplete);
+  NotifyComplete = SymbolsResolvedCallback();
+  TmpNotifyComplete(std::move(ResolvedSymbols));
 }
 
-bool AsynchronousSymbolQuery::canStillFail() {
-  return (NotifySymbolsResolved || NotifySymbolsReady);
-}
+bool AsynchronousSymbolQuery::canStillFail() { return !!NotifyComplete; }
 
 void AsynchronousSymbolQuery::handleFailed(Error Err) {
   assert(QueryRegistrations.empty() && ResolvedSymbols.empty() &&
-         NotYetResolvedCount == 0 && NotYetReadyCount == 0 &&
+         OutstandingSymbolsCount == 0 &&
          "Query should already have been abandoned");
-  if (NotifySymbolsResolved) {
-    NotifySymbolsResolved(std::move(Err));
-    NotifySymbolsResolved = SymbolsResolvedCallback();
-  } else {
-    assert(NotifySymbolsReady && "Failed after both callbacks issued?");
-    NotifySymbolsReady(std::move(Err));
-  }
-  NotifySymbolsReady = SymbolsReadyCallback();
+  NotifyComplete(std::move(Err));
+  NotifyComplete = SymbolsResolvedCallback();
 }
 
 void AsynchronousSymbolQuery::addQueryDependence(JITDylib &JD,
@@ -363,8 +346,7 @@ void AsynchronousSymbolQuery::removeQueryDependence(
 
 void AsynchronousSymbolQuery::detach() {
   ResolvedSymbols.clear();
-  NotYetResolvedCount = 0;
-  NotYetReadyCount = 0;
+  OutstandingSymbolsCount = 0;
   for (auto &KV : QueryRegistrations)
     KV.first->detachQueryHelper(*this, KV.second);
   QueryRegistrations.clear();
@@ -374,11 +356,6 @@ MaterializationResponsibility::MaterializationResponsibility(
     JITDylib &JD, SymbolFlagsMap SymbolFlags, VModuleKey K)
     : JD(JD), SymbolFlags(std::move(SymbolFlags)), K(std::move(K)) {
   assert(!this->SymbolFlags.empty() && "Materializing nothing?");
-
-#ifndef NDEBUG
-  for (auto &KV : this->SymbolFlags)
-    KV.second |= JITSymbolFlags::Materializing;
-#endif
 }
 
 MaterializationResponsibility::~MaterializationResponsibility() {
@@ -390,16 +367,15 @@ SymbolNameSet MaterializationResponsibility::getRequestedSymbols() const {
   return JD.getRequestedSymbols(SymbolFlags);
 }
 
-void MaterializationResponsibility::resolve(const SymbolMap &Symbols) {
-  LLVM_DEBUG(dbgs() << "In " << JD.getName() << " resolving " << Symbols
-                    << "\n");
+void MaterializationResponsibility::notifyResolved(const SymbolMap &Symbols) {
+  LLVM_DEBUG({
+    dbgs() << "In " << JD.getName() << " resolving " << Symbols << "\n";
+  });
 #ifndef NDEBUG
   for (auto &KV : Symbols) {
     auto I = SymbolFlags.find(KV.first);
     assert(I != SymbolFlags.end() &&
            "Resolving symbol outside this responsibility set");
-    assert(I->second.isMaterializing() && "Duplicate resolution");
-    I->second &= ~JITSymbolFlags::Materializing;
     if (I->second.isWeak())
       assert(I->second == (KV.second.getFlags() | JITSymbolFlags::Weak) &&
              "Resolving symbol with incorrect flags");
@@ -412,12 +388,11 @@ void MaterializationResponsibility::resolve(const SymbolMap &Symbols) {
   JD.resolve(Symbols);
 }
 
-void MaterializationResponsibility::emit() {
-#ifndef NDEBUG
-  for (auto &KV : SymbolFlags)
-    assert(!KV.second.isMaterializing() &&
-           "Failed to resolve symbol before emission");
-#endif // NDEBUG
+void MaterializationResponsibility::notifyEmitted() {
+
+  LLVM_DEBUG({
+    dbgs() << "In " << JD.getName() << " emitting " << SymbolFlags << "\n";
+  });
 
   JD.emit(SymbolFlags);
   SymbolFlags.clear();
@@ -429,19 +404,19 @@ Error MaterializationResponsibility::defineMaterializing(
   // It's ok if we hit a duplicate here: In that case the new version will be
   // discarded, and the JITDylib::defineMaterializing method will return a
   // duplicate symbol error.
-  for (auto &KV : NewSymbolFlags) {
-    auto I = SymbolFlags.insert(KV).first;
-    (void)I;
-#ifndef NDEBUG
-    I->second |= JITSymbolFlags::Materializing;
-#endif
-  }
+  for (auto &KV : NewSymbolFlags)
+    SymbolFlags.insert(KV);
 
   return JD.defineMaterializing(NewSymbolFlags);
 }
 
 void MaterializationResponsibility::failMaterialization() {
 
+  LLVM_DEBUG({
+    dbgs() << "In " << JD.getName() << " failing materialization for "
+           << SymbolFlags << "\n";
+  });
+
   SymbolNameSet FailedSymbols;
   for (auto &KV : SymbolFlags)
     FailedSymbols.insert(KV.first);
@@ -510,8 +485,8 @@ StringRef AbsoluteSymbolsMaterializationUnit::getName() const {
 
 void AbsoluteSymbolsMaterializationUnit::materialize(
     MaterializationResponsibility R) {
-  R.resolve(Symbols);
-  R.emit();
+  R.notifyResolved(Symbols);
+  R.notifyEmitted();
 }
 
 void AbsoluteSymbolsMaterializationUnit::discard(const JITDylib &JD,
@@ -559,6 +534,14 @@ void ReExportsMaterializationUnit::materialize(
     Aliases.erase(I);
   }
 
+  LLVM_DEBUG({
+    ES.runSessionLocked([&]() {
+      dbgs() << "materializing reexports: target = " << TgtJD.getName()
+             << ", source = " << SrcJD.getName() << " " << RequestedAliases
+             << "\n";
+    });
+  });
+
   if (!Aliases.empty()) {
     if (SourceJD)
       R.replace(reexports(*SourceJD, std::move(Aliases), MatchNonExported));
@@ -641,7 +624,7 @@ void ReExportsMaterializationUnit::materialize(
         }
     };
 
-    auto OnResolve = [QueryInfo](Expected<SymbolMap> Result) {
+    auto OnComplete = [QueryInfo](Expected<SymbolMap> Result) {
       if (Result) {
         SymbolMap ResolutionMap;
         for (auto &KV : QueryInfo->Aliases) {
@@ -650,8 +633,8 @@ void ReExportsMaterializationUnit::materialize(
           ResolutionMap[KV.first] = JITEvaluatedSymbol(
               (*Result)[KV.second.Aliasee].getAddress(), KV.second.AliasFlags);
         }
-        QueryInfo->R.resolve(ResolutionMap);
-        QueryInfo->R.emit();
+        QueryInfo->R.notifyResolved(ResolutionMap);
+        QueryInfo->R.notifyEmitted();
       } else {
         auto &ES = QueryInfo->R.getTargetJITDylib().getExecutionSession();
         ES.reportError(Result.takeError());
@@ -659,10 +642,8 @@ void ReExportsMaterializationUnit::materialize(
       }
     };
 
-    auto OnReady = [&ES](Error Err) { ES.reportError(std::move(Err)); };
-
     ES.lookup(JITDylibSearchList({{&SrcJD, MatchNonExported}}), QuerySymbols,
-              std::move(OnResolve), std::move(OnReady),
+              SymbolState::Resolved, std::move(OnComplete),
               std::move(RegisterDependencies));
   }
 }
@@ -687,17 +668,20 @@ Expected<SymbolAliasMap>
 buildSimpleReexportsAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols) {
   auto Flags = SourceJD.lookupFlags(Symbols);
 
-  if (Flags.size() != Symbols.size()) {
+  if (!Flags)
+    return Flags.takeError();
+
+  if (Flags->size() != Symbols.size()) {
     SymbolNameSet Unresolved = Symbols;
-    for (auto &KV : Flags)
+    for (auto &KV : *Flags)
       Unresolved.erase(KV.first);
     return make_error<SymbolsNotFound>(std::move(Unresolved));
   }
 
   SymbolAliasMap Result;
   for (auto &Name : Symbols) {
-    assert(Flags.count(Name) && "Missing entry in flags map");
-    Result[Name] = SymbolAliasMapEntry(Name, Flags[Name]);
+    assert(Flags->count(Name) && "Missing entry in flags map");
+    Result[Name] = SymbolAliasMapEntry(Name, (*Flags)[Name]);
   }
 
   return Result;
@@ -709,14 +693,17 @@ ReexportsGenerator::ReexportsGenerator(JITDylib &SourceJD,
     : SourceJD(SourceJD), MatchNonExported(MatchNonExported),
       Allow(std::move(Allow)) {}
 
-SymbolNameSet ReexportsGenerator::operator()(JITDylib &JD,
-                                             const SymbolNameSet &Names) {
+Expected<SymbolNameSet>
+ReexportsGenerator::operator()(JITDylib &JD, const SymbolNameSet &Names) {
   orc::SymbolNameSet Added;
   orc::SymbolAliasMap AliasMap;
 
   auto Flags = SourceJD.lookupFlags(Names);
 
-  for (auto &KV : Flags) {
+  if (!Flags)
+    return Flags.takeError();
+
+  for (auto &KV : *Flags) {
     if (Allow && !Allow(KV.first))
       continue;
     AliasMap[KV.first] = SymbolAliasMapEntry(KV.first, KV.second);
@@ -731,21 +718,19 @@ SymbolNameSet ReexportsGenerator::operator()(JITDylib &JD,
 
 Error JITDylib::defineMaterializing(const SymbolFlagsMap &SymbolFlags) {
   return ES.runSessionLocked([&]() -> Error {
-    std::vector<SymbolMap::iterator> AddedSyms;
+    std::vector<SymbolTable::iterator> AddedSyms;
 
     for (auto &KV : SymbolFlags) {
-      SymbolMap::iterator EntryItr;
+      SymbolTable::iterator EntryItr;
       bool Added;
 
-      auto NewFlags = KV.second;
-      NewFlags |= JITSymbolFlags::Materializing;
-
-      std::tie(EntryItr, Added) = Symbols.insert(
-          std::make_pair(KV.first, JITEvaluatedSymbol(0, NewFlags)));
+      std::tie(EntryItr, Added) =
+          Symbols.insert(std::make_pair(KV.first, SymbolTableEntry(KV.second)));
 
-      if (Added)
+      if (Added) {
         AddedSyms.push_back(EntryItr);
-      else {
+        EntryItr->second.setState(SymbolState::Materializing);
+      } else {
         // Remove any symbols already added.
         for (auto &SI : AddedSyms)
           Symbols.erase(SI);
@@ -769,9 +754,10 @@ void JITDylib::replace(std::unique_ptr<MaterializationUnit> MU) {
         for (auto &KV : MU->getSymbols()) {
           auto SymI = Symbols.find(KV.first);
           assert(SymI != Symbols.end() && "Replacing unknown symbol");
-          assert(!SymI->second.getFlags().isLazy() &&
-                 SymI->second.getFlags().isMaterializing() &&
-                 "Can not replace symbol that is not materializing");
+          assert(SymI->second.isInMaterializationPhase() &&
+                 "Can not call replace on a symbol that is not materializing");
+          assert(!SymI->second.hasMaterializerAttached() &&
+                 "Symbol should not have materializer attached already");
           assert(UnmaterializedInfos.count(KV.first) == 0 &&
                  "Symbol being replaced should have no UnmaterializedInfo");
         }
@@ -782,7 +768,7 @@ void JITDylib::replace(std::unique_ptr<MaterializationUnit> MU) {
         for (auto &KV : MU->getSymbols()) {
           auto MII = MaterializingInfos.find(KV.first);
           if (MII != MaterializingInfos.end()) {
-            if (!MII->second.PendingQueries.empty())
+            if (MII->second.hasQueriesPending())
               return std::move(MU);
           }
         }
@@ -790,16 +776,15 @@ void JITDylib::replace(std::unique_ptr<MaterializationUnit> MU) {
         // Otherwise, make MU responsible for all the symbols.
         auto UMI = std::make_shared<UnmaterializedInfo>(std::move(MU));
         for (auto &KV : UMI->MU->getSymbols()) {
-          assert(!KV.second.isLazy() &&
-                 "Lazy flag should be managed internally.");
-          assert(!KV.second.isMaterializing() &&
-                 "Materializing flags should be managed internally.");
-
           auto SymI = Symbols.find(KV.first);
-          JITSymbolFlags ReplaceFlags = KV.second;
-          ReplaceFlags |= JITSymbolFlags::Lazy;
-          SymI->second = JITEvaluatedSymbol(SymI->second.getAddress(),
-                                            std::move(ReplaceFlags));
+          assert(SymI->second.getState() == SymbolState::Materializing &&
+                 "Can not replace a symbol that is not materializing");
+          assert(!SymI->second.hasMaterializerAttached() &&
+                 "Can not replace a symbol that has a materializer attached");
+          assert(UnmaterializedInfos.count(KV.first) == 0 &&
+                 "Unexpected materializer entry in map");
+          SymI->second.setAddress(SymI->second.getAddress());
+          SymI->second.setMaterializerAttached(true);
           UnmaterializedInfos[KV.first] = UMI;
         }
 
@@ -817,14 +802,14 @@ JITDylib::getRequestedSymbols(const SymbolFlagsMap &SymbolFlags) const {
 
     for (auto &KV : SymbolFlags) {
       assert(Symbols.count(KV.first) && "JITDylib does not cover this symbol?");
-      assert(Symbols.find(KV.first)->second.getFlags().isMaterializing() &&
-             "getRequestedSymbols can only be called for materializing "
-             "symbols");
+      assert(Symbols.find(KV.first)->second.isInMaterializationPhase() &&
+             "getRequestedSymbols can only be called for symbols that have "
+             "started materializing");
       auto I = MaterializingInfos.find(KV.first);
       if (I == MaterializingInfos.end())
         continue;
 
-      if (!I->second.PendingQueries.empty())
+      if (I->second.hasQueriesPending())
         RequestedSymbols.insert(KV.first);
     }
 
@@ -835,9 +820,8 @@ JITDylib::getRequestedSymbols(const SymbolFlagsMap &SymbolFlags) const {
 void JITDylib::addDependencies(const SymbolStringPtr &Name,
                                const SymbolDependenceMap &Dependencies) {
   assert(Symbols.count(Name) && "Name not in symbol table");
-  assert((Symbols[Name].getFlags().isLazy() ||
-          Symbols[Name].getFlags().isMaterializing()) &&
-         "Symbol is not lazy or materializing");
+  assert(Symbols[Name].isInMaterializationPhase() &&
+         "Can not add dependencies for a symbol that is not materializing");
 
   auto &MI = MaterializingInfos[Name];
   assert(!MI.IsEmitted && "Can not add dependencies to an emitted symbol");
@@ -852,9 +836,8 @@ void JITDylib::addDependencies(const SymbolStringPtr &Name,
       // Assert that this symbol exists and has not been emitted already.
       auto SymI = OtherJITDylib.Symbols.find(OtherSymbol);
       assert(SymI != OtherJITDylib.Symbols.end() &&
-             (SymI->second.getFlags().isLazy() ||
-              SymI->second.getFlags().isMaterializing()) &&
-             "Dependency on emitted symbol");
+             (SymI->second.getState() != SymbolState::Ready &&
+              "Dependency on emitted symbol"));
 #endif
 
       auto &OtherMI = OtherJITDylib.MaterializingInfos[OtherSymbol];
@@ -873,54 +856,52 @@ void JITDylib::addDependencies(const SymbolStringPtr &Name,
 }
 
 void JITDylib::resolve(const SymbolMap &Resolved) {
-  auto FullyResolvedQueries = ES.runSessionLocked([&, this]() {
-    AsynchronousSymbolQuerySet FullyResolvedQueries;
+  auto CompletedQueries = ES.runSessionLocked([&, this]() {
+    AsynchronousSymbolQuerySet CompletedQueries;
     for (const auto &KV : Resolved) {
       auto &Name = KV.first;
       auto Sym = KV.second;
 
-      assert(!Sym.getFlags().isLazy() && !Sym.getFlags().isMaterializing() &&
-             "Materializing flags should be managed internally");
-
       auto I = Symbols.find(Name);
 
       assert(I != Symbols.end() && "Symbol not found");
-      assert(!I->second.getFlags().isLazy() &&
-             I->second.getFlags().isMaterializing() &&
+      assert(!I->second.hasMaterializerAttached() &&
+             "Resolving symbol with materializer attached?");
+      assert(I->second.getState() == SymbolState::Materializing &&
              "Symbol should be materializing");
       assert(I->second.getAddress() == 0 && "Symbol has already been resolved");
 
       assert((Sym.getFlags() & ~JITSymbolFlags::Weak) ==
-                 (JITSymbolFlags::stripTransientFlags(I->second.getFlags()) &
-                  ~JITSymbolFlags::Weak) &&
+                 (I->second.getFlags() & ~JITSymbolFlags::Weak) &&
              "Resolved flags should match the declared flags");
 
       // Once resolved, symbols can never be weak.
       JITSymbolFlags ResolvedFlags = Sym.getFlags();
       ResolvedFlags &= ~JITSymbolFlags::Weak;
-      ResolvedFlags |= JITSymbolFlags::Materializing;
-      I->second = JITEvaluatedSymbol(Sym.getAddress(), ResolvedFlags);
+      I->second.setAddress(Sym.getAddress());
+      I->second.setFlags(ResolvedFlags);
+      I->second.setState(SymbolState::Resolved);
 
       auto &MI = MaterializingInfos[Name];
-      for (auto &Q : MI.PendingQueries) {
-        Q->resolve(Name, Sym);
-        if (Q->isFullyResolved())
-          FullyResolvedQueries.insert(Q);
+      for (auto &Q : MI.takeQueriesMeeting(SymbolState::Resolved)) {
+        Q->notifySymbolMetRequiredState(Name, Sym);
+        if (Q->isComplete())
+          CompletedQueries.insert(std::move(Q));
       }
     }
 
-    return FullyResolvedQueries;
+    return CompletedQueries;
   });
 
-  for (auto &Q : FullyResolvedQueries) {
-    assert(Q->isFullyResolved() && "Q not fully resolved");
-    Q->handleFullyResolved();
+  for (auto &Q : CompletedQueries) {
+    assert(Q->isComplete() && "Q not completed");
+    Q->handleComplete();
   }
 }
 
 void JITDylib::emit(const SymbolFlagsMap &Emitted) {
-  auto FullyReadyQueries = ES.runSessionLocked([&, this]() {
-    AsynchronousSymbolQuerySet ReadyQueries;
+  auto CompletedQueries = ES.runSessionLocked([&, this]() {
+    AsynchronousSymbolQuerySet CompletedQueries;
 
     for (const auto &KV : Emitted) {
       const auto &Name = KV.first;
@@ -962,20 +943,22 @@ void JITDylib::emit(const SymbolFlagsMap &Emitted) {
               DependantMI.UnemittedDependencies.empty()) {
             assert(DependantMI.Dependants.empty() &&
                    "Dependants should be empty by now");
-            for (auto &Q : DependantMI.PendingQueries) {
-              Q->notifySymbolReady();
-              if (Q->isFullyReady())
-                ReadyQueries.insert(Q);
-              Q->removeQueryDependence(DependantJD, DependantName);
-            }
 
             // Since this dependant is now ready, we erase its MaterializingInfo
             // and update its materializing state.
-            assert(DependantJD.Symbols.count(DependantName) &&
+            auto DependantSymI = DependantJD.Symbols.find(DependantName);
+            assert(DependantSymI != DependantJD.Symbols.end() &&
                    "Dependant has no entry in the Symbols table");
-            auto &DependantSym = DependantJD.Symbols[DependantName];
-            DependantSym.setFlags(DependantSym.getFlags() &
-                                  ~JITSymbolFlags::Materializing);
+            DependantSymI->second.setState(SymbolState::Ready);
+
+            for (auto &Q : DependantMI.takeQueriesMeeting(SymbolState::Ready)) {
+              Q->notifySymbolMetRequiredState(
+                  DependantName, DependantSymI->second.getSymbol());
+              if (Q->isComplete())
+                CompletedQueries.insert(Q);
+              Q->removeQueryDependence(DependantJD, DependantName);
+            }
+
             DependantJD.MaterializingInfos.erase(DependantMII);
           }
         }
@@ -984,26 +967,25 @@ void JITDylib::emit(const SymbolFlagsMap &Emitted) {
       MI.IsEmitted = true;
 
       if (MI.UnemittedDependencies.empty()) {
-        for (auto &Q : MI.PendingQueries) {
-          Q->notifySymbolReady();
-          if (Q->isFullyReady())
-            ReadyQueries.insert(Q);
+        auto SymI = Symbols.find(Name);
+        assert(SymI != Symbols.end() && "Symbol has no entry in Symbols table");
+        SymI->second.setState(SymbolState::Ready);
+        for (auto &Q : MI.takeQueriesMeeting(SymbolState::Ready)) {
+          Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol());
+          if (Q->isComplete())
+            CompletedQueries.insert(Q);
           Q->removeQueryDependence(*this, Name);
         }
-        assert(Symbols.count(Name) &&
-               "Symbol has no entry in the Symbols table");
-        auto &Sym = Symbols[Name];
-        Sym.setFlags(Sym.getFlags() & ~JITSymbolFlags::Materializing);
         MaterializingInfos.erase(MII);
       }
     }
 
-    return ReadyQueries;
+    return CompletedQueries;
   });
 
-  for (auto &Q : FullyReadyQueries) {
-    assert(Q->isFullyReady() && "Q is not fully ready");
-    Q->handleFullyReady();
+  for (auto &Q : CompletedQueries) {
+    assert(Q->isComplete() && "Q is not complete");
+    Q->handleComplete();
   }
 }
 
@@ -1013,6 +995,7 @@ void JITDylib::notifyFailed(const SymbolNameSet &FailedSymbols) {
 
   auto FailedQueriesToNotify = ES.runSessionLocked([&, this]() {
     AsynchronousSymbolQuerySet FailedQueries;
+    std::vector<MaterializingInfosMap::iterator> MIIsToRemove;
 
     for (auto &Name : FailedSymbols) {
       auto I = Symbols.find(Name);
@@ -1026,17 +1009,40 @@ void JITDylib::notifyFailed(const SymbolNameSet &FailedSymbols) {
       if (MII == MaterializingInfos.end())
         continue;
 
+      // Remove this symbol from the dependants list of any dependencies.
+      for (auto &KV : MII->second.UnemittedDependencies) {
+        auto *DependencyJD = KV.first;
+        auto &Dependencies = KV.second;
+        for (auto &DependencyName : Dependencies) {
+          auto DependencyMII =
+              DependencyJD->MaterializingInfos.find(DependencyName);
+          assert(DependencyMII != DependencyJD->MaterializingInfos.end() &&
+                 "Unemitted dependency must have a MaterializingInfo entry");
+          assert(DependencyMII->second.Dependants.count(this) &&
+                 "Dependency's dependants list does not contain this JITDylib");
+          assert(DependencyMII->second.Dependants[this].count(Name) &&
+                 "Dependency's dependants list does not contain dependant");
+          DependencyMII->second.Dependants[this].erase(Name);
+        }
+      }
+
       // Copy all the queries to the FailedQueries list, then abandon them.
       // This has to be a copy, and the copy has to come before the abandon
       // operation: Each Q.detach() call will reach back into this
       // PendingQueries list to remove Q.
-      for (auto &Q : MII->second.PendingQueries)
+      for (auto &Q : MII->second.pendingQueries())
         FailedQueries.insert(Q);
 
-      for (auto &Q : FailedQueries)
-        Q->detach();
+      MIIsToRemove.push_back(std::move(MII));
+    }
+
+    // Detach failed queries.
+    for (auto &Q : FailedQueries)
+      Q->detach();
 
-      assert(MII->second.PendingQueries.empty() &&
+    // Remove the MaterializingInfos.
+    for (auto &MII : MIIsToRemove) {
+      assert(!MII->second.hasQueriesPending() &&
              "Queries remain after symbol was failed");
 
       MaterializingInfos.erase(MII);
@@ -1052,9 +1058,11 @@ void JITDylib::notifyFailed(const SymbolNameSet &FailedSymbols) {
 void JITDylib::setSearchOrder(JITDylibSearchList NewSearchOrder,
                               bool SearchThisJITDylibFirst,
                               bool MatchNonExportedInThisDylib) {
-  if (SearchThisJITDylibFirst && NewSearchOrder.front().first != this)
-    NewSearchOrder.insert(NewSearchOrder.begin(),
-                          {this, MatchNonExportedInThisDylib});
+  if (SearchThisJITDylibFirst) {
+    if (NewSearchOrder.empty() || NewSearchOrder.front().first != this)
+      NewSearchOrder.insert(NewSearchOrder.begin(),
+                            {this, MatchNonExportedInThisDylib});
+  }
 
   ES.runSessionLocked([&]() { SearchOrder = std::move(NewSearchOrder); });
 }
@@ -1092,7 +1100,7 @@ void JITDylib::removeFromSearchOrder(JITDylib &JD) {
 Error JITDylib::remove(const SymbolNameSet &Names) {
   return ES.runSessionLocked([&]() -> Error {
     using SymbolMaterializerItrPair =
-        std::pair<SymbolMap::iterator, UnmaterializedInfosMap::iterator>;
+        std::pair<SymbolTable::iterator, UnmaterializedInfosMap::iterator>;
     std::vector<SymbolMaterializerItrPair> SymbolsToRemove;
     SymbolNameSet Missing;
     SymbolNameSet Materializing;
@@ -1107,13 +1115,14 @@ Error JITDylib::remove(const SymbolNameSet &Names) {
       }
 
       // Note symbol materializing.
-      if (I->second.getFlags().isMaterializing()) {
+      if (I->second.isInMaterializationPhase()) {
         Materializing.insert(Name);
         continue;
       }
 
-      auto UMII = I->second.getFlags().isLazy() ? UnmaterializedInfos.find(Name)
-                                                : UnmaterializedInfos.end();
+      auto UMII = I->second.hasMaterializerAttached()
+                      ? UnmaterializedInfos.find(Name)
+                      : UnmaterializedInfos.end();
       SymbolsToRemove.push_back(std::make_pair(I, UMII));
     }
 
@@ -1143,16 +1152,23 @@ Error JITDylib::remove(const SymbolNameSet &Names) {
   });
 }
 
-SymbolFlagsMap JITDylib::lookupFlags(const SymbolNameSet &Names) {
-  return ES.runSessionLocked([&, this]() {
+Expected<SymbolFlagsMap> JITDylib::lookupFlags(const SymbolNameSet &Names) {
+  return ES.runSessionLocked([&, this]() -> Expected<SymbolFlagsMap> {
     SymbolFlagsMap Result;
     auto Unresolved = lookupFlagsImpl(Result, Names);
-    if (DefGenerator && !Unresolved.empty()) {
-      auto NewDefs = DefGenerator(*this, Unresolved);
-      if (!NewDefs.empty()) {
-        auto Unresolved2 = lookupFlagsImpl(Result, NewDefs);
+    if (!Unresolved)
+      return Unresolved.takeError();
+
+    if (DefGenerator && !Unresolved->empty()) {
+      auto NewDefs = DefGenerator(*this, *Unresolved);
+      if (!NewDefs)
+        return NewDefs.takeError();
+      if (!NewDefs->empty()) {
+        auto Unresolved2 = lookupFlagsImpl(Result, *NewDefs);
+        if (!Unresolved2)
+          return Unresolved2.takeError();
         (void)Unresolved2;
-        assert(Unresolved2.empty() &&
+        assert(Unresolved2->empty() &&
                "All fallback defs should have been found by lookupFlagsImpl");
       }
     };
@@ -1160,41 +1176,42 @@ SymbolFlagsMap JITDylib::lookupFlags(const SymbolNameSet &Names) {
   });
 }
 
-SymbolNameSet JITDylib::lookupFlagsImpl(SymbolFlagsMap &Flags,
-                                        const SymbolNameSet &Names) {
+Expected<SymbolNameSet> JITDylib::lookupFlagsImpl(SymbolFlagsMap &Flags,
+                                                  const SymbolNameSet &Names) {
   SymbolNameSet Unresolved;
 
   for (auto &Name : Names) {
     auto I = Symbols.find(Name);
-
-    if (I == Symbols.end()) {
+    if (I != Symbols.end()) {
+      assert(!Flags.count(Name) && "Symbol already present in Flags map");
+      Flags[Name] = I->second.getFlags();
+    } else
       Unresolved.insert(Name);
-      continue;
-    }
-
-    assert(!Flags.count(Name) && "Symbol already present in Flags map");
-    Flags[Name] = JITSymbolFlags::stripTransientFlags(I->second.getFlags());
   }
 
   return Unresolved;
 }
 
-void JITDylib::lodgeQuery(std::shared_ptr<AsynchronousSymbolQuery> &Q,
-                          SymbolNameSet &Unresolved, bool MatchNonExported,
-                          MaterializationUnitList &MUs) {
+Error JITDylib::lodgeQuery(std::shared_ptr<AsynchronousSymbolQuery> &Q,
+                           SymbolNameSet &Unresolved, bool MatchNonExported,
+                           MaterializationUnitList &MUs) {
   assert(Q && "Query can not be null");
 
   lodgeQueryImpl(Q, Unresolved, MatchNonExported, MUs);
   if (DefGenerator && !Unresolved.empty()) {
     auto NewDefs = DefGenerator(*this, Unresolved);
-    if (!NewDefs.empty()) {
-      for (auto &D : NewDefs)
+    if (!NewDefs)
+      return NewDefs.takeError();
+    if (!NewDefs->empty()) {
+      for (auto &D : *NewDefs)
         Unresolved.erase(D);
-      lodgeQueryImpl(Q, NewDefs, MatchNonExported, MUs);
-      assert(NewDefs.empty() &&
+      lodgeQueryImpl(Q, *NewDefs, MatchNonExported, MUs);
+      assert(NewDefs->empty() &&
              "All fallback defs should have been found by lookupImpl");
     }
   }
+
+  return Error::success();
 }
 
 void JITDylib::lodgeQueryImpl(
@@ -1204,6 +1221,7 @@ void JITDylib::lodgeQueryImpl(
 
   std::vector<SymbolStringPtr> ToRemove;
   for (auto Name : Unresolved) {
+
     // Search for the name in Symbols. Skip it if not found.
     auto SymI = Symbols.find(Name);
     if (SymI == Symbols.end())
@@ -1213,20 +1231,22 @@ void JITDylib::lodgeQueryImpl(
     if (!SymI->second.getFlags().isExported() && !MatchNonExported)
       continue;
 
-    // If we matched against Name in JD, mark it to be removed from the Unresolved
-    // set.
+    // If we matched against Name in JD, mark it to be removed from the
+    // Unresolved set.
     ToRemove.push_back(Name);
 
-    // If the symbol has an address then resolve it.
-    if (SymI->second.getAddress() != 0)
-      Q->resolve(Name, SymI->second);
+    // If this symbol already meets the required state for then notify the
+    // query and continue.
+    if (SymI->second.getState() >= Q->getRequiredState()) {
+      Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol());
+      continue;
+    }
 
-    // If the symbol is lazy, get the MaterialiaztionUnit for it.
-    if (SymI->second.getFlags().isLazy()) {
+    // Otherwise this symbol does not yet meet the required state. Check whether
+    // it has a materializer attached, and if so prepare to run it.
+    if (SymI->second.hasMaterializerAttached()) {
       assert(SymI->second.getAddress() == 0 &&
-             "Lazy symbol should not have a resolved address");
-      assert(!SymI->second.getFlags().isMaterializing() &&
-             "Materializing and lazy should not both be set");
+             "Symbol not resolved but already has address?");
       auto UMII = UnmaterializedInfos.find(Name);
       assert(UMII != UnmaterializedInfos.end() &&
              "Lazy symbol should have UnmaterializedInfo");
@@ -1237,27 +1257,20 @@ void JITDylib::lodgeQueryImpl(
       // materializing state.
       for (auto &KV : MU->getSymbols()) {
         auto SymK = Symbols.find(KV.first);
-        auto Flags = SymK->second.getFlags();
-        Flags &= ~JITSymbolFlags::Lazy;
-        Flags |= JITSymbolFlags::Materializing;
-        SymK->second.setFlags(Flags);
+        SymK->second.setMaterializerAttached(false);
+        SymK->second.setState(SymbolState::Materializing);
         UnmaterializedInfos.erase(KV.first);
       }
 
       // Add MU to the list of MaterializationUnits to be materialized.
       MUs.push_back(std::move(MU));
-    } else if (!SymI->second.getFlags().isMaterializing()) {
-      // The symbol is neither lazy nor materializing, so it must be
-      // ready. Notify the query and continue.
-      Q->notifySymbolReady();
-      continue;
     }
 
     // Add the query to the PendingQueries list.
-    assert(SymI->second.getFlags().isMaterializing() &&
+    assert(SymI->second.isInMaterializationPhase() &&
            "By this line the symbol should be materializing");
     auto &MI = MaterializingInfos[Name];
-    MI.PendingQueries.push_back(Q);
+    MI.addQuery(Q);
     Q->addQueryDependence(*this, Name);
   }
 
@@ -1266,40 +1279,43 @@ void JITDylib::lodgeQueryImpl(
     Unresolved.erase(Name);
 }
 
-SymbolNameSet JITDylib::legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
-                                     SymbolNameSet Names) {
+Expected<SymbolNameSet>
+JITDylib::legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
+                       SymbolNameSet Names) {
   assert(Q && "Query can not be null");
 
   ES.runOutstandingMUs();
 
-  LookupImplActionFlags ActionFlags = None;
+  bool QueryComplete = false;
   std::vector<std::unique_ptr<MaterializationUnit>> MUs;
 
   SymbolNameSet Unresolved = std::move(Names);
-  ES.runSessionLocked([&, this]() {
-    ActionFlags = lookupImpl(Q, MUs, Unresolved);
+  auto Err = ES.runSessionLocked([&, this]() -> Error {
+    QueryComplete = lookupImpl(Q, MUs, Unresolved);
     if (DefGenerator && !Unresolved.empty()) {
-      assert(ActionFlags == None &&
-             "ActionFlags set but unresolved symbols remain?");
+      assert(!QueryComplete && "query complete but unresolved symbols remain?");
       auto NewDefs = DefGenerator(*this, Unresolved);
-      if (!NewDefs.empty()) {
-        for (auto &D : NewDefs)
+      if (!NewDefs)
+        return NewDefs.takeError();
+      if (!NewDefs->empty()) {
+        for (auto &D : *NewDefs)
           Unresolved.erase(D);
-        ActionFlags = lookupImpl(Q, MUs, NewDefs);
-        assert(NewDefs.empty() &&
+        QueryComplete = lookupImpl(Q, MUs, *NewDefs);
+        assert(NewDefs->empty() &&
                "All fallback defs should have been found by lookupImpl");
       }
     }
+    return Error::success();
   });
 
-  assert((MUs.empty() || ActionFlags == None) &&
-         "If action flags are set, there should be no work to do (so no MUs)");
+  if (Err)
+    return std::move(Err);
 
-  if (ActionFlags & NotifyFullyResolved)
-    Q->handleFullyResolved();
+  assert((MUs.empty() || !QueryComplete) &&
+         "If action flags are set, there should be no work to do (so no MUs)");
 
-  if (ActionFlags & NotifyFullyReady)
-    Q->handleFullyReady();
+  if (QueryComplete)
+    Q->handleComplete();
 
   // FIXME: Swap back to the old code below once RuntimeDyld works with
   //        callbacks from asynchronous queries.
@@ -1318,13 +1334,13 @@ SymbolNameSet JITDylib::legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
   return Unresolved;
 }
 
-JITDylib::LookupImplActionFlags
-JITDylib::lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
-                     std::vector<std::unique_ptr<MaterializationUnit>> &MUs,
-                     SymbolNameSet &Unresolved) {
-  LookupImplActionFlags ActionFlags = None;
-  std::vector<SymbolStringPtr> ToRemove;
+bool JITDylib::lookupImpl(
+    std::shared_ptr<AsynchronousSymbolQuery> &Q,
+    std::vector<std::unique_ptr<MaterializationUnit>> &MUs,
+    SymbolNameSet &Unresolved) {
+  bool QueryComplete = false;
 
+  std::vector<SymbolStringPtr> ToRemove;
   for (auto Name : Unresolved) {
 
     // Search for the name in Symbols. Skip it if not found.
@@ -1335,19 +1351,17 @@ JITDylib::lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
     // If we found Name, mark it to be removed from the Unresolved set.
     ToRemove.push_back(Name);
 
-    // If the symbol has an address then resolve it.
-    if (SymI->second.getAddress() != 0) {
-      Q->resolve(Name, SymI->second);
-      if (Q->isFullyResolved())
-        ActionFlags |= NotifyFullyResolved;
+    if (SymI->second.getState() >= Q->getRequiredState()) {
+      Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol());
+      if (Q->isComplete())
+        QueryComplete = true;
+      continue;
     }
 
     // If the symbol is lazy, get the MaterialiaztionUnit for it.
-    if (SymI->second.getFlags().isLazy()) {
+    if (SymI->second.hasMaterializerAttached()) {
       assert(SymI->second.getAddress() == 0 &&
              "Lazy symbol should not have a resolved address");
-      assert(!SymI->second.getFlags().isMaterializing() &&
-             "Materializing and lazy should not both be set");
       auto UMII = UnmaterializedInfos.find(Name);
       assert(UMII != UnmaterializedInfos.end() &&
              "Lazy symbol should have UnmaterializedInfo");
@@ -1358,29 +1372,21 @@ JITDylib::lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
       // materializing state.
       for (auto &KV : MU->getSymbols()) {
         auto SymK = Symbols.find(KV.first);
-        auto Flags = SymK->second.getFlags();
-        Flags &= ~JITSymbolFlags::Lazy;
-        Flags |= JITSymbolFlags::Materializing;
-        SymK->second.setFlags(Flags);
+        assert(SymK != Symbols.end() && "Missing symbol table entry");
+        SymK->second.setState(SymbolState::Materializing);
+        SymK->second.setMaterializerAttached(false);
         UnmaterializedInfos.erase(KV.first);
       }
 
       // Add MU to the list of MaterializationUnits to be materialized.
       MUs.push_back(std::move(MU));
-    } else if (!SymI->second.getFlags().isMaterializing()) {
-      // The symbol is neither lazy nor materializing, so it must be ready.
-      // Notify the query and continue.
-      Q->notifySymbolReady();
-      if (Q->isFullyReady())
-        ActionFlags |= NotifyFullyReady;
-      continue;
     }
 
     // Add the query to the PendingQueries list.
-    assert(SymI->second.getFlags().isMaterializing() &&
+    assert(SymI->second.isInMaterializationPhase() &&
            "By this line the symbol should be materializing");
     auto &MI = MaterializingInfos[Name];
-    MI.PendingQueries.push_back(Q);
+    MI.addQuery(Q);
     Q->addQueryDependence(*this, Name);
   }
 
@@ -1388,7 +1394,7 @@ JITDylib::lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
   for (auto &Name : ToRemove)
     Unresolved.erase(Name);
 
-  return ActionFlags;
+  return QueryComplete;
 }
 
 void JITDylib::dump(raw_ostream &OS) {
@@ -1405,21 +1411,19 @@ void JITDylib::dump(raw_ostream &OS) {
     for (auto &KV : Symbols) {
       OS << "    \"" << *KV.first << "\": ";
       if (auto Addr = KV.second.getAddress())
-        OS << format("0x%016" PRIx64, Addr) << ", " << KV.second.getFlags();
+        OS << format("0x%016" PRIx64, Addr) << ", " << KV.second.getFlags()
+           << " ";
       else
-        OS << "<not resolved>";
-      if (KV.second.getFlags().isLazy() ||
-          KV.second.getFlags().isMaterializing()) {
-        OS << " (";
-        if (KV.second.getFlags().isLazy()) {
-          auto I = UnmaterializedInfos.find(KV.first);
-          assert(I != UnmaterializedInfos.end() &&
-                 "Lazy symbol should have UnmaterializedInfo");
-          OS << " Lazy (MU=" << I->second->MU.get() << ")";
-        }
-        if (KV.second.getFlags().isMaterializing())
-          OS << " Materializing";
-        OS << ", " << KV.second.getFlags() << " )\n";
+        OS << "<not resolved> ";
+
+      OS << KV.second.getState();
+
+      if (KV.second.hasMaterializerAttached()) {
+        OS << " (Materializer ";
+        auto I = UnmaterializedInfos.find(KV.first);
+        assert(I != UnmaterializedInfos.end() &&
+               "Lazy symbol should have UnmaterializedInfo");
+        OS << I->second->MU.get() << ")\n";
       } else
         OS << "\n";
     }
@@ -1430,10 +1434,10 @@ void JITDylib::dump(raw_ostream &OS) {
       OS << "    \"" << *KV.first << "\":\n"
          << "      IsEmitted = " << (KV.second.IsEmitted ? "true" : "false")
          << "\n"
-         << "      " << KV.second.PendingQueries.size()
+         << "      " << KV.second.pendingQueries().size()
          << " pending queries: { ";
-      for (auto &Q : KV.second.PendingQueries)
-        OS << Q.get() << " ";
+      for (const auto &Q : KV.second.pendingQueries())
+        OS << Q.get() << " (" << Q->getRequiredState() << ") ";
       OS << "}\n      Dependants:\n";
       for (auto &KV2 : KV.second.Dependants)
         OS << "        " << KV2.first->getName() << ": " << KV2.second << "\n";
@@ -1444,6 +1448,51 @@ void JITDylib::dump(raw_ostream &OS) {
   });
 }
 
+void JITDylib::MaterializingInfo::addQuery(
+    std::shared_ptr<AsynchronousSymbolQuery> Q) {
+
+  auto I = std::lower_bound(
+      PendingQueries.rbegin(), PendingQueries.rend(), Q->getRequiredState(),
+      [](const std::shared_ptr<AsynchronousSymbolQuery> &V, SymbolState S) {
+        return V->getRequiredState() <= S;
+      });
+  PendingQueries.insert(I.base(), std::move(Q));
+}
+
+void JITDylib::MaterializingInfo::removeQuery(
+    const AsynchronousSymbolQuery &Q) {
+  // FIXME: Implement 'find_as' for shared_ptr<T>/T*.
+  auto I =
+      std::find_if(PendingQueries.begin(), PendingQueries.end(),
+                   [&Q](const std::shared_ptr<AsynchronousSymbolQuery> &V) {
+                     return V.get() == &Q;
+                   });
+  assert(I != PendingQueries.end() &&
+         "Query is not attached to this MaterializingInfo");
+  PendingQueries.erase(I);
+}
+
+JITDylib::AsynchronousSymbolQueryList
+JITDylib::MaterializingInfo::takeQueriesMeeting(SymbolState RequiredState) {
+  AsynchronousSymbolQueryList Result;
+  while (!PendingQueries.empty()) {
+    if (PendingQueries.back()->getRequiredState() > RequiredState)
+      break;
+
+    Result.push_back(std::move(PendingQueries.back()));
+    PendingQueries.pop_back();
+  }
+
+  return Result;
+}
+
+JITDylib::AsynchronousSymbolQueryList
+JITDylib::MaterializingInfo::takeAllQueries() {
+  AsynchronousSymbolQueryList Result;
+  std::swap(Result, PendingQueries);
+  return Result;
+}
+
 JITDylib::JITDylib(ExecutionSession &ES, std::string Name)
     : ES(ES), JITDylibName(std::move(Name)) {
   SearchOrder.push_back({this, true});
@@ -1451,77 +1500,52 @@ JITDylib::JITDylib(ExecutionSession &ES, std::string Name)
 
 Error JITDylib::defineImpl(MaterializationUnit &MU) {
   SymbolNameSet Duplicates;
-  SymbolNameSet MUDefsOverridden;
-
-  struct ExistingDefOverriddenEntry {
-    SymbolMap::iterator ExistingDefItr;
-    JITSymbolFlags NewFlags;
-  };
-  std::vector<ExistingDefOverriddenEntry> ExistingDefsOverridden;
-
-  for (auto &KV : MU.getSymbols()) {
-    assert(!KV.second.isLazy() && "Lazy flag should be managed internally.");
-    assert(!KV.second.isMaterializing() &&
-           "Materializing flags should be managed internally.");
+  std::vector<SymbolStringPtr> ExistingDefsOverridden;
+  std::vector<SymbolStringPtr> MUDefsOverridden;
 
-    SymbolMap::iterator EntryItr;
-    bool Added;
+  for (const auto &KV : MU.getSymbols()) {
+    auto I = Symbols.find(KV.first);
 
-    auto NewFlags = KV.second;
-    NewFlags |= JITSymbolFlags::Lazy;
-
-    std::tie(EntryItr, Added) = Symbols.insert(
-        std::make_pair(KV.first, JITEvaluatedSymbol(0, NewFlags)));
-
-    if (!Added) {
+    if (I != Symbols.end()) {
       if (KV.second.isStrong()) {
-        if (EntryItr->second.getFlags().isStrong() ||
-            (EntryItr->second.getFlags() & JITSymbolFlags::Materializing))
+        if (I->second.getFlags().isStrong() ||
+            I->second.getState() > SymbolState::NeverSearched)
           Duplicates.insert(KV.first);
-        else
-          ExistingDefsOverridden.push_back({EntryItr, NewFlags});
+        else {
+          assert(I->second.getState() == SymbolState::NeverSearched &&
+                 "Overridden existing def should be in the never-searched "
+                 "state");
+          ExistingDefsOverridden.push_back(KV.first);
+        }
       } else
-        MUDefsOverridden.insert(KV.first);
+        MUDefsOverridden.push_back(KV.first);
     }
   }
 
-  if (!Duplicates.empty()) {
-    // We need to remove the symbols we added.
-    for (auto &KV : MU.getSymbols()) {
-      if (Duplicates.count(KV.first))
-        continue;
-
-      bool Found = false;
-      for (const auto &EDO : ExistingDefsOverridden)
-        if (EDO.ExistingDefItr->first == KV.first)
-          Found = true;
-
-      if (!Found)
-        Symbols.erase(KV.first);
-    }
-
-    // FIXME: Return all duplicates.
+  // If there were any duplicate definitions then bail out.
+  if (!Duplicates.empty())
     return make_error<DuplicateDefinition>(**Duplicates.begin());
-  }
 
-  // Update flags on existing defs and call discard on their materializers.
-  for (auto &EDO : ExistingDefsOverridden) {
-    assert(EDO.ExistingDefItr->second.getFlags().isLazy() &&
-           !EDO.ExistingDefItr->second.getFlags().isMaterializing() &&
-           "Overridden existing def should be in the Lazy state");
+  // Discard any overridden defs in this MU.
+  for (auto &S : MUDefsOverridden)
+    MU.doDiscard(*this, S);
 
-    EDO.ExistingDefItr->second.setFlags(EDO.NewFlags);
+  // Discard existing overridden defs.
+  for (auto &S : ExistingDefsOverridden) {
 
-    auto UMII = UnmaterializedInfos.find(EDO.ExistingDefItr->first);
+    auto UMII = UnmaterializedInfos.find(S);
     assert(UMII != UnmaterializedInfos.end() &&
            "Overridden existing def should have an UnmaterializedInfo");
-
-    UMII->second->MU->doDiscard(*this, EDO.ExistingDefItr->first);
+    UMII->second->MU->doDiscard(*this, S);
   }
 
-  // Discard overridden symbols povided by MU.
-  for (auto &Sym : MUDefsOverridden)
-    MU.doDiscard(*this, Sym);
+  // Finally, add the defs from this MU.
+  for (auto &KV : MU.getSymbols()) {
+    auto &SymEntry = Symbols[KV.first];
+    SymEntry.setFlags(KV.second);
+    SymEntry.setState(SymbolState::NeverSearched);
+    SymEntry.setMaterializerAttached(true);
+  }
 
   return Error::success();
 }
@@ -1532,17 +1556,7 @@ void JITDylib::detachQueryHelper(AsynchronousSymbolQuery &Q,
     assert(MaterializingInfos.count(QuerySymbol) &&
            "QuerySymbol does not have MaterializingInfo");
     auto &MI = MaterializingInfos[QuerySymbol];
-
-    auto IdenticalQuery =
-        [&](const std::shared_ptr<AsynchronousSymbolQuery> &R) {
-          return R.get() == &Q;
-        };
-
-    auto I = std::find_if(MI.PendingQueries.begin(), MI.PendingQueries.end(),
-                          IdenticalQuery);
-    assert(I != MI.PendingQueries.end() &&
-           "Query Q should be in the PendingQueries list for QuerySymbol");
-    MI.PendingQueries.erase(I);
+    MI.removeQuery(Q);
   }
 }
 
@@ -1582,8 +1596,18 @@ JITDylib &ExecutionSession::getMainJITDylib() {
   return runSessionLocked([this]() -> JITDylib & { return *JDs.front(); });
 }
 
+JITDylib *ExecutionSession::getJITDylibByName(StringRef Name) {
+  return runSessionLocked([&, this]() -> JITDylib * {
+    for (auto &JD : JDs)
+      if (JD->getName() == Name)
+        return JD.get();
+    return nullptr;
+  });
+}
+
 JITDylib &ExecutionSession::createJITDylib(std::string Name,
                                            bool AddToMainDylibSearchOrder) {
+  assert(!getJITDylibByName(Name) && "JITDylib with that name already exists");
   return runSessionLocked([&, this]() -> JITDylib & {
     JDs.push_back(
         std::unique_ptr<JITDylib>(new JITDylib(*this, std::move(Name))));
@@ -1610,74 +1634,36 @@ void ExecutionSession::legacyFailQuery(AsynchronousSymbolQuery &Q, Error Err) {
 
 Expected<SymbolMap> ExecutionSession::legacyLookup(
     LegacyAsyncLookupFunction AsyncLookup, SymbolNameSet Names,
-    bool WaitUntilReady, RegisterDependenciesFunction RegisterDependencies) {
+    SymbolState RequiredState,
+    RegisterDependenciesFunction RegisterDependencies) {
 #if LLVM_ENABLE_THREADS
   // In the threaded case we use promises to return the results.
   std::promise<SymbolMap> PromisedResult;
-  std::mutex ErrMutex;
   Error ResolutionError = Error::success();
-  std::promise<void> PromisedReady;
-  Error ReadyError = Error::success();
-  auto OnResolve = [&](Expected<SymbolMap> R) {
+  auto NotifyComplete = [&](Expected<SymbolMap> R) {
     if (R)
       PromisedResult.set_value(std::move(*R));
     else {
-      {
-        ErrorAsOutParameter _(&ResolutionError);
-        std::lock_guard<std::mutex> Lock(ErrMutex);
-        ResolutionError = R.takeError();
-      }
+      ErrorAsOutParameter _(&ResolutionError);
+      ResolutionError = R.takeError();
       PromisedResult.set_value(SymbolMap());
     }
   };
-
-  std::function<void(Error)> OnReady;
-  if (WaitUntilReady) {
-    OnReady = [&](Error Err) {
-      if (Err) {
-        ErrorAsOutParameter _(&ReadyError);
-        std::lock_guard<std::mutex> Lock(ErrMutex);
-        ReadyError = std::move(Err);
-      }
-      PromisedReady.set_value();
-    };
-  } else {
-    OnReady = [&](Error Err) {
-      if (Err)
-        reportError(std::move(Err));
-    };
-  }
-
 #else
   SymbolMap Result;
   Error ResolutionError = Error::success();
-  Error ReadyError = Error::success();
 
-  auto OnResolve = [&](Expected<SymbolMap> R) {
+  auto NotifyComplete = [&](Expected<SymbolMap> R) {
     ErrorAsOutParameter _(&ResolutionError);
     if (R)
       Result = std::move(*R);
     else
       ResolutionError = R.takeError();
   };
-
-  std::function<void(Error)> OnReady;
-  if (WaitUntilReady) {
-    OnReady = [&](Error Err) {
-      ErrorAsOutParameter _(&ReadyError);
-      if (Err)
-        ReadyError = std::move(Err);
-    };
-  } else {
-    OnReady = [&](Error Err) {
-      if (Err)
-        reportError(std::move(Err));
-    };
-  }
 #endif
 
   auto Query = std::make_shared<AsynchronousSymbolQuery>(
-      Names, std::move(OnResolve), std::move(OnReady));
+      Names, RequiredState, std::move(NotifyComplete));
   // FIXME: This should be run session locked along with the registration code
   // and error reporting below.
   SymbolNameSet UnresolvedSymbols = AsyncLookup(Query, std::move(Names));
@@ -1701,39 +1687,13 @@ Expected<SymbolMap> ExecutionSession::legacyLookup(
 #if LLVM_ENABLE_THREADS
   auto ResultFuture = PromisedResult.get_future();
   auto Result = ResultFuture.get();
-
-  {
-    std::lock_guard<std::mutex> Lock(ErrMutex);
-    if (ResolutionError) {
-      // ReadyError will never be assigned. Consume the success value.
-      cantFail(std::move(ReadyError));
-      return std::move(ResolutionError);
-    }
-  }
-
-  if (WaitUntilReady) {
-    auto ReadyFuture = PromisedReady.get_future();
-    ReadyFuture.get();
-
-    {
-      std::lock_guard<std::mutex> Lock(ErrMutex);
-      if (ReadyError)
-        return std::move(ReadyError);
-    }
-  } else
-    cantFail(std::move(ReadyError));
-
+  if (ResolutionError)
+    return std::move(ResolutionError);
   return std::move(Result);
 
 #else
-  if (ResolutionError) {
-    // ReadyError will never be assigned. Consume the success value.
-    cantFail(std::move(ReadyError));
+  if (ResolutionError)
     return std::move(ResolutionError);
-  }
-
-  if (ReadyError)
-    return std::move(ReadyError);
 
   return Result;
 #endif
@@ -1741,9 +1701,16 @@ Expected<SymbolMap> ExecutionSession::legacyLookup(
 
 void ExecutionSession::lookup(
     const JITDylibSearchList &SearchOrder, SymbolNameSet Symbols,
-    SymbolsResolvedCallback OnResolve, SymbolsReadyCallback OnReady,
+    SymbolState RequiredState, SymbolsResolvedCallback NotifyComplete,
     RegisterDependenciesFunction RegisterDependencies) {
 
+  LLVM_DEBUG({
+    runSessionLocked([&]() {
+      dbgs() << "Looking up " << Symbols << " in " << SearchOrder
+             << " (required state: " << RequiredState << ")\n";
+    });
+  });
+
   // lookup can be re-entered recursively if running on a single thread. Run any
   // outstanding MUs in case this query depends on them, otherwise this lookup
   // will starve waiting for a result from an MU that is stuck in the queue.
@@ -1751,38 +1718,32 @@ void ExecutionSession::lookup(
 
   auto Unresolved = std::move(Symbols);
   std::map<JITDylib *, MaterializationUnitList> CollectedMUsMap;
-  auto Q = std::make_shared<AsynchronousSymbolQuery>(
-      Unresolved, std::move(OnResolve), std::move(OnReady));
-  bool QueryIsFullyResolved = false;
-  bool QueryIsFullyReady = false;
-  bool QueryFailed = false;
-
-  runSessionLocked([&]() {
-    for (auto &KV : SearchOrder) {
-      assert(KV.first && "JITDylibList entries must not be null");
-      assert(!CollectedMUsMap.count(KV.first) &&
-             "JITDylibList should not contain duplicate entries");
-
-      auto &JD = *KV.first;
-      auto MatchNonExported = KV.second;
-      JD.lodgeQuery(Q, Unresolved, MatchNonExported, CollectedMUsMap[&JD]);
-    }
+  auto Q = std::make_shared<AsynchronousSymbolQuery>(Unresolved, RequiredState,
+                                                     std::move(NotifyComplete));
+  bool QueryComplete = false;
+
+  auto LodgingErr = runSessionLocked([&]() -> Error {
+    auto LodgeQuery = [&]() -> Error {
+      for (auto &KV : SearchOrder) {
+        assert(KV.first && "JITDylibList entries must not be null");
+        assert(!CollectedMUsMap.count(KV.first) &&
+               "JITDylibList should not contain duplicate entries");
+
+        auto &JD = *KV.first;
+        auto MatchNonExported = KV.second;
+        if (auto Err = JD.lodgeQuery(Q, Unresolved, MatchNonExported,
+                                     CollectedMUsMap[&JD]))
+          return Err;
+      }
 
-    if (Unresolved.empty()) {
-      // Query lodged successfully.
+      if (!Unresolved.empty())
+        return make_error<SymbolsNotFound>(std::move(Unresolved));
 
-      // Record whether this query is fully ready / resolved. We will use
-      // this to call handleFullyResolved/handleFullyReady outside the session
-      // lock.
-      QueryIsFullyResolved = Q->isFullyResolved();
-      QueryIsFullyReady = Q->isFullyReady();
+      return Error::success();
+    };
 
-      // Call the register dependencies function.
-      if (RegisterDependencies && !Q->QueryRegistrations.empty())
-        RegisterDependencies(Q->QueryRegistrations);
-    } else {
-      // Query failed due to unresolved symbols.
-      QueryFailed = true;
+    if (auto Err = LodgeQuery()) {
+      // Query failed.
 
       // Disconnect the query from its dependencies.
       Q->detach();
@@ -1791,19 +1752,32 @@ void ExecutionSession::lookup(
       for (auto &KV : CollectedMUsMap)
         for (auto &MU : KV.second)
           KV.first->replace(std::move(MU));
+
+      return Err;
     }
+
+    // Query lodged successfully.
+
+    // Record whether this query is fully ready / resolved. We will use
+    // this to call handleFullyResolved/handleFullyReady outside the session
+    // lock.
+    QueryComplete = Q->isComplete();
+
+    // Call the register dependencies function.
+    if (RegisterDependencies && !Q->QueryRegistrations.empty())
+      RegisterDependencies(Q->QueryRegistrations);
+
+    return Error::success();
   });
 
-  if (QueryFailed) {
-    Q->handleFailed(make_error<SymbolsNotFound>(std::move(Unresolved)));
+  if (LodgingErr) {
+    Q->handleFailed(std::move(LodgingErr));
     return;
-  } else {
-    if (QueryIsFullyResolved)
-      Q->handleFullyResolved();
-    if (QueryIsFullyReady)
-      Q->handleFullyReady();
   }
 
+  if (QueryComplete)
+    Q->handleComplete();
+
   // Move the MUs to the OutstandingMUs list, then materialize.
   {
     std::lock_guard<std::recursive_mutex> Lock(OutstandingMUsMutex);
@@ -1816,113 +1790,55 @@ void ExecutionSession::lookup(
   runOutstandingMUs();
 }
 
-Expected<SymbolMap> ExecutionSession::lookup(
-    const JITDylibSearchList &SearchOrder, const SymbolNameSet &Symbols,
-    RegisterDependenciesFunction RegisterDependencies, bool WaitUntilReady) {
+Expected<SymbolMap>
+ExecutionSession::lookup(const JITDylibSearchList &SearchOrder,
+                         const SymbolNameSet &Symbols,
+                         SymbolState RequiredState,
+                         RegisterDependenciesFunction RegisterDependencies) {
 #if LLVM_ENABLE_THREADS
   // In the threaded case we use promises to return the results.
   std::promise<SymbolMap> PromisedResult;
-  std::mutex ErrMutex;
   Error ResolutionError = Error::success();
-  std::promise<void> PromisedReady;
-  Error ReadyError = Error::success();
-  auto OnResolve = [&](Expected<SymbolMap> R) {
+
+  auto NotifyComplete = [&](Expected<SymbolMap> R) {
     if (R)
       PromisedResult.set_value(std::move(*R));
     else {
-      {
-        ErrorAsOutParameter _(&ResolutionError);
-        std::lock_guard<std::mutex> Lock(ErrMutex);
-        ResolutionError = R.takeError();
-      }
+      ErrorAsOutParameter _(&ResolutionError);
+      ResolutionError = R.takeError();
       PromisedResult.set_value(SymbolMap());
     }
   };
 
-  std::function<void(Error)> OnReady;
-  if (WaitUntilReady) {
-    OnReady = [&](Error Err) {
-      if (Err) {
-        ErrorAsOutParameter _(&ReadyError);
-        std::lock_guard<std::mutex> Lock(ErrMutex);
-        ReadyError = std::move(Err);
-      }
-      PromisedReady.set_value();
-    };
-  } else {
-    OnReady = [&](Error Err) {
-      if (Err)
-        reportError(std::move(Err));
-    };
-  }
-
 #else
   SymbolMap Result;
   Error ResolutionError = Error::success();
-  Error ReadyError = Error::success();
 
-  auto OnResolve = [&](Expected<SymbolMap> R) {
+  auto NotifyComplete = [&](Expected<SymbolMap> R) {
     ErrorAsOutParameter _(&ResolutionError);
     if (R)
       Result = std::move(*R);
     else
       ResolutionError = R.takeError();
   };
-
-  std::function<void(Error)> OnReady;
-  if (WaitUntilReady) {
-    OnReady = [&](Error Err) {
-      ErrorAsOutParameter _(&ReadyError);
-      if (Err)
-        ReadyError = std::move(Err);
-    };
-  } else {
-    OnReady = [&](Error Err) {
-      if (Err)
-        reportError(std::move(Err));
-    };
-  }
 #endif
 
   // Perform the asynchronous lookup.
-  lookup(SearchOrder, Symbols, OnResolve, OnReady, RegisterDependencies);
+  lookup(SearchOrder, Symbols, RequiredState, NotifyComplete,
+         RegisterDependencies);
 
 #if LLVM_ENABLE_THREADS
   auto ResultFuture = PromisedResult.get_future();
   auto Result = ResultFuture.get();
 
-  {
-    std::lock_guard<std::mutex> Lock(ErrMutex);
-    if (ResolutionError) {
-      // ReadyError will never be assigned. Consume the success value.
-      cantFail(std::move(ReadyError));
-      return std::move(ResolutionError);
-    }
-  }
-
-  if (WaitUntilReady) {
-    auto ReadyFuture = PromisedReady.get_future();
-    ReadyFuture.get();
-
-    {
-      std::lock_guard<std::mutex> Lock(ErrMutex);
-      if (ReadyError)
-        return std::move(ReadyError);
-    }
-  } else
-    cantFail(std::move(ReadyError));
+  if (ResolutionError)
+    return std::move(ResolutionError);
 
   return std::move(Result);
 
 #else
-  if (ResolutionError) {
-    // ReadyError will never be assigned. Consume the success value.
-    cantFail(std::move(ReadyError));
+  if (ResolutionError)
     return std::move(ResolutionError);
-  }
-
-  if (ReadyError)
-    return std::move(ReadyError);
 
   return Result;
 #endif
@@ -1933,8 +1849,8 @@ ExecutionSession::lookup(const JITDylibSearchList &SearchOrder,
                          SymbolStringPtr Name) {
   SymbolNameSet Names({Name});
 
-  if (auto ResultMap = lookup(SearchOrder, std::move(Names),
-                              NoDependenciesToRegister, true)) {
+  if (auto ResultMap = lookup(SearchOrder, std::move(Names), SymbolState::Ready,
+                              NoDependenciesToRegister)) {
     assert(ResultMap->size() == 1 && "Unexpected number of results");
     assert(ResultMap->count(Name) && "Missing result for symbol");
     return std::move(ResultMap->begin()->second);
diff --git a/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
index 7c3c50b4d6e5..f7fc5f8f1797 100644
--- a/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
+++ b/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
@@ -1,9 +1,8 @@
 //===---- ExecutionUtils.cpp - Utilities for executing functions in Orc ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -130,8 +129,7 @@ Error CtorDtorRunner::run() {
 
   auto &ES = JD.getExecutionSession();
   if (auto CtorDtorMap =
-          ES.lookup(JITDylibSearchList({{&JD, true}}), std::move(Names),
-                    NoDependenciesToRegister, true)) {
+          ES.lookup(JITDylibSearchList({{&JD, true}}), std::move(Names))) {
     for (auto &KV : CtorDtorsByPriority) {
       for (auto &Name : KV.second) {
         assert(CtorDtorMap->count(Name) && "No entry for Name");
@@ -140,13 +138,10 @@ Error CtorDtorRunner::run() {
         CtorDtor();
       }
     }
+    CtorDtorsByPriority.clear();
     return Error::success();
   } else
     return CtorDtorMap.takeError();
-
-  CtorDtorsByPriority.clear();
-
-  return Error::success();
 }
 
 void LocalCXXRuntimeOverridesBase::runDestructors() {
@@ -179,22 +174,24 @@ Error LocalCXXRuntimeOverrides::enable(JITDylib &JD,
 }
 
 DynamicLibrarySearchGenerator::DynamicLibrarySearchGenerator(
-    sys::DynamicLibrary Dylib, const DataLayout &DL, SymbolPredicate Allow)
+    sys::DynamicLibrary Dylib, char GlobalPrefix, SymbolPredicate Allow)
     : Dylib(std::move(Dylib)), Allow(std::move(Allow)),
-      GlobalPrefix(DL.getGlobalPrefix()) {}
+      GlobalPrefix(GlobalPrefix) {}
 
 Expected<DynamicLibrarySearchGenerator>
-DynamicLibrarySearchGenerator::Load(const char *FileName, const DataLayout &DL,
+DynamicLibrarySearchGenerator::Load(const char *FileName, char GlobalPrefix,
                                     SymbolPredicate Allow) {
   std::string ErrMsg;
   auto Lib = sys::DynamicLibrary::getPermanentLibrary(FileName, &ErrMsg);
   if (!Lib.isValid())
     return make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode());
-  return DynamicLibrarySearchGenerator(std::move(Lib), DL, std::move(Allow));
+  return DynamicLibrarySearchGenerator(std::move(Lib), GlobalPrefix,
+                                       std::move(Allow));
 }
 
-SymbolNameSet DynamicLibrarySearchGenerator::
-operator()(JITDylib &JD, const SymbolNameSet &Names) {
+Expected<SymbolNameSet>
+DynamicLibrarySearchGenerator::operator()(JITDylib &JD,
+                                          const SymbolNameSet &Names) {
   orc::SymbolNameSet Added;
   orc::SymbolMap NewSymbols;
 
@@ -210,7 +207,8 @@ operator()(JITDylib &JD, const SymbolNameSet &Names) {
     if (HasGlobalPrefix && (*Name).front() != GlobalPrefix)
       continue;
 
-    std::string Tmp((*Name).data() + (HasGlobalPrefix ? 1 : 0), (*Name).size());
+    std::string Tmp((*Name).data() + HasGlobalPrefix,
+                    (*Name).size() - HasGlobalPrefix);
     if (void *Addr = Dylib.getAddressOfSymbol(Tmp.c_str())) {
       Added.insert(Name);
       NewSymbols[Name] = JITEvaluatedSymbol(
diff --git a/lib/ExecutionEngine/Orc/IRCompileLayer.cpp b/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
index d952d1be70da..81dfc02f55b2 100644
--- a/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
+++ b/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
@@ -1,9 +1,8 @@
 //===--------------- IRCompileLayer.cpp - IR Compiling Layer --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/ExecutionEngine/Orc/IRTransformLayer.cpp b/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
index 7bc0d696e3ac..e3519284613e 100644
--- a/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
+++ b/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
@@ -1,9 +1,8 @@
 //===-------------- IRTransformLayer.cpp - IR Transform Layer -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index 82000ec5b32b..cc3656fe5dc5 100644
--- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -1,9 +1,8 @@
 //===---- IndirectionUtils.cpp - Utilities for call indirection in Orc ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -38,8 +37,8 @@ private:
   void materialize(MaterializationResponsibility R) override {
     SymbolMap Result;
     Result[Name] = JITEvaluatedSymbol(Compile(), JITSymbolFlags::Exported);
-    R.resolve(Result);
-    R.emit();
+    R.notifyResolved(Result);
+    R.notifyEmitted();
   }
 
   void discard(const JITDylib &JD, const SymbolStringPtr &Name) override {
@@ -238,11 +237,11 @@ void makeStub(Function &F, Value &ImplPointer) {
   Module &M = *F.getParent();
   BasicBlock *EntryBlock = BasicBlock::Create(M.getContext(), "entry", &F);
   IRBuilder<> Builder(EntryBlock);
-  LoadInst *ImplAddr = Builder.CreateLoad(&ImplPointer);
+  LoadInst *ImplAddr = Builder.CreateLoad(F.getType(), &ImplPointer);
   std::vector<Value*> CallArgs;
   for (auto &A : F.args())
     CallArgs.push_back(&A);
-  CallInst *Call = Builder.CreateCall(ImplAddr, CallArgs);
+  CallInst *Call = Builder.CreateCall(F.getFunctionType(), ImplAddr, CallArgs);
   Call->setTailCall();
   Call->setAttributes(F.getAttributes());
   if (F.getReturnType()->isVoidTy())
diff --git a/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp b/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
index 4af09d196ff9..df23547a9de3 100644
--- a/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
+++ b/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
@@ -1,9 +1,8 @@
 //===----- JITTargetMachineBuilder.cpp - Build TargetMachines for JIT -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/ExecutionEngine/Orc/LLJIT.cpp b/lib/ExecutionEngine/Orc/LLJIT.cpp
index e2089f9106bd..b120691faf07 100644
--- a/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ b/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -1,58 +1,37 @@
 //===--------- LLJIT.cpp - An ORC-based JIT for compiling LLVM IR ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ExecutionEngine/Orc/LLJIT.h"
 #include "llvm/ExecutionEngine/Orc/OrcError.h"
+#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/IR/Mangler.h"
 
-namespace {
+namespace llvm {
+namespace orc {
 
-  // A SimpleCompiler that owns its TargetMachine.
-  class TMOwningSimpleCompiler : public llvm::orc::SimpleCompiler {
-  public:
-    TMOwningSimpleCompiler(std::unique_ptr<llvm::TargetMachine> TM)
-      : llvm::orc::SimpleCompiler(*TM), TM(std::move(TM)) {}
-  private:
-    // FIXME: shared because std::functions (and thus
-    // IRCompileLayer::CompileFunction) are not moveable.
-    std::shared_ptr<llvm::TargetMachine> TM;
-  };
+Error LLJITBuilderState::prepareForConstruction() {
 
-} // end anonymous namespace
+  if (!JTMB) {
+    if (auto JTMBOrErr = JITTargetMachineBuilder::detectHost())
+      JTMB = std::move(*JTMBOrErr);
+    else
+      return JTMBOrErr.takeError();
+  }
 
-namespace llvm {
-namespace orc {
+  return Error::success();
+}
 
 LLJIT::~LLJIT() {
   if (CompileThreads)
     CompileThreads->wait();
 }
 
-Expected<std::unique_ptr<LLJIT>>
-LLJIT::Create(JITTargetMachineBuilder JTMB, DataLayout DL,
-              unsigned NumCompileThreads) {
-
-  if (NumCompileThreads == 0) {
-    // If NumCompileThreads == 0 then create a single-threaded LLJIT instance.
-    auto TM = JTMB.createTargetMachine();
-    if (!TM)
-      return TM.takeError();
-    return std::unique_ptr<LLJIT>(new LLJIT(llvm::make_unique<ExecutionSession>(),
-                                            std::move(*TM), std::move(DL)));
-  }
-
-  return std::unique_ptr<LLJIT>(new LLJIT(llvm::make_unique<ExecutionSession>(),
-                                          std::move(JTMB), std::move(DL),
-                                          NumCompileThreads));
-}
-
 Error LLJIT::defineAbsolute(StringRef Name, JITEvaluatedSymbol Sym) {
   auto InternedName = ES->intern(Name);
   SymbolMap Symbols({{InternedName, Sym}});
@@ -65,13 +44,13 @@ Error LLJIT::addIRModule(JITDylib &JD, ThreadSafeModule TSM) {
   if (auto Err = applyDataLayout(*TSM.getModule()))
     return Err;
 
-  return CompileLayer.add(JD, std::move(TSM), ES->allocateVModule());
+  return CompileLayer->add(JD, std::move(TSM), ES->allocateVModule());
 }
 
 Error LLJIT::addObjectFile(JITDylib &JD, std::unique_ptr<MemoryBuffer> Obj) {
   assert(Obj && "Can not add null object");
 
-  return ObjLinkingLayer.add(JD, std::move(Obj), ES->allocateVModule());
+  return ObjLinkingLayer->add(JD, std::move(Obj), ES->allocateVModule());
 }
 
 Expected<JITEvaluatedSymbol> LLJIT::lookupLinkerMangled(JITDylib &JD,
@@ -79,42 +58,76 @@ Expected<JITEvaluatedSymbol> LLJIT::lookupLinkerMangled(JITDylib &JD,
   return ES->lookup(JITDylibSearchList({{&JD, true}}), ES->intern(Name));
 }
 
-LLJIT::LLJIT(std::unique_ptr<ExecutionSession> ES,
-             std::unique_ptr<TargetMachine> TM, DataLayout DL)
-    : ES(std::move(ES)), Main(this->ES->getMainJITDylib()), DL(std::move(DL)),
-      ObjLinkingLayer(
-          *this->ES,
-          []() { return llvm::make_unique<SectionMemoryManager>(); }),
-      CompileLayer(*this->ES, ObjLinkingLayer,
-                   TMOwningSimpleCompiler(std::move(TM))),
-      CtorRunner(Main), DtorRunner(Main) {}
-
-LLJIT::LLJIT(std::unique_ptr<ExecutionSession> ES, JITTargetMachineBuilder JTMB,
-             DataLayout DL, unsigned NumCompileThreads)
-    : ES(std::move(ES)), Main(this->ES->getMainJITDylib()), DL(std::move(DL)),
-      ObjLinkingLayer(
-          *this->ES,
-          []() { return llvm::make_unique<SectionMemoryManager>(); }),
-      CompileLayer(*this->ES, ObjLinkingLayer,
-                   ConcurrentIRCompiler(std::move(JTMB))),
-      CtorRunner(Main), DtorRunner(Main) {
-  assert(NumCompileThreads != 0 &&
-         "Multithreaded LLJIT instance can not be created with 0 threads");
-
-  // Move modules to new contexts when they're emitted so that we can compile
-  // them in parallel.
-  CompileLayer.setCloneToNewContextOnEmit(true);
-
-  // Create a thread pool to compile on and set the execution session
-  // dispatcher to use the thread pool.
-  CompileThreads = llvm::make_unique<ThreadPool>(NumCompileThreads);
-  this->ES->setDispatchMaterialization(
-      [this](JITDylib &JD, std::unique_ptr<MaterializationUnit> MU) {
-        // FIXME: Switch to move capture once we have c++14.
-        auto SharedMU = std::shared_ptr<MaterializationUnit>(std::move(MU));
-        auto Work = [SharedMU, &JD]() { SharedMU->doMaterialize(JD); };
-        CompileThreads->async(std::move(Work));
-      });
+std::unique_ptr<ObjectLayer>
+LLJIT::createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES) {
+
+  // If the config state provided an ObjectLinkingLayer factory then use it.
+  if (S.CreateObjectLinkingLayer)
+    return S.CreateObjectLinkingLayer(ES);
+
+  // Otherwise default to creating an RTDyldObjectLinkingLayer that constructs
+  // a new SectionMemoryManager for each object.
+  auto GetMemMgr = []() { return llvm::make_unique<SectionMemoryManager>(); };
+  return llvm::make_unique<RTDyldObjectLinkingLayer>(ES, std::move(GetMemMgr));
+}
+
+Expected<IRCompileLayer::CompileFunction>
+LLJIT::createCompileFunction(LLJITBuilderState &S,
+                             JITTargetMachineBuilder JTMB) {
+
+  /// If there is a custom compile function creator set then use it.
+  if (S.CreateCompileFunction)
+    return S.CreateCompileFunction(std::move(JTMB));
+
+  // Otherwise default to creating a SimpleCompiler, or ConcurrentIRCompiler,
+  // depending on the number of threads requested.
+  if (S.NumCompileThreads > 0)
+    return ConcurrentIRCompiler(std::move(JTMB));
+
+  auto TM = JTMB.createTargetMachine();
+  if (!TM)
+    return TM.takeError();
+
+  return TMOwningSimpleCompiler(std::move(*TM));
+}
+
+LLJIT::LLJIT(LLJITBuilderState &S, Error &Err)
+    : ES(S.ES ? std::move(S.ES) : llvm::make_unique<ExecutionSession>()),
+      Main(this->ES->getMainJITDylib()), DL(""), CtorRunner(Main),
+      DtorRunner(Main) {
+
+  ErrorAsOutParameter _(&Err);
+
+  ObjLinkingLayer = createObjectLinkingLayer(S, *ES);
+
+  if (auto DLOrErr = S.JTMB->getDefaultDataLayoutForTarget())
+    DL = std::move(*DLOrErr);
+  else {
+    Err = DLOrErr.takeError();
+    return;
+  }
+
+  {
+    auto CompileFunction = createCompileFunction(S, std::move(*S.JTMB));
+    if (!CompileFunction) {
+      Err = CompileFunction.takeError();
+      return;
+    }
+    CompileLayer = llvm::make_unique<IRCompileLayer>(
+        *ES, *ObjLinkingLayer, std::move(*CompileFunction));
+  }
+
+  if (S.NumCompileThreads > 0) {
+    CompileLayer->setCloneToNewContextOnEmit(true);
+    CompileThreads = llvm::make_unique<ThreadPool>(S.NumCompileThreads);
+    ES->setDispatchMaterialization(
+        [this](JITDylib &JD, std::unique_ptr<MaterializationUnit> MU) {
+          // FIXME: Switch to move capture once we have c++14.
+          auto SharedMU = std::shared_ptr<MaterializationUnit>(std::move(MU));
+          auto Work = [SharedMU, &JD]() { SharedMU->doMaterialize(JD); };
+          CompileThreads->async(std::move(Work));
+        });
+  }
 }
 
 std::string LLJIT::mangle(StringRef UnmangledName) {
@@ -143,35 +156,11 @@ void LLJIT::recordCtorDtors(Module &M) {
   DtorRunner.add(getDestructors(M));
 }
 
-Expected<std::unique_ptr<LLLazyJIT>>
-LLLazyJIT::Create(JITTargetMachineBuilder JTMB, DataLayout DL,
-                  JITTargetAddress ErrorAddr, unsigned NumCompileThreads) {
-  auto ES = llvm::make_unique<ExecutionSession>();
-
-  const Triple &TT = JTMB.getTargetTriple();
-
-  auto LCTMgr = createLocalLazyCallThroughManager(TT, *ES, ErrorAddr);
-  if (!LCTMgr)
-    return LCTMgr.takeError();
-
-  auto ISMBuilder = createLocalIndirectStubsManagerBuilder(TT);
-  if (!ISMBuilder)
-    return make_error<StringError>(
-        std::string("No indirect stubs manager builder for ") + TT.str(),
-        inconvertibleErrorCode());
-
-  if (NumCompileThreads == 0) {
-    auto TM = JTMB.createTargetMachine();
-    if (!TM)
-      return TM.takeError();
-    return std::unique_ptr<LLLazyJIT>(
-        new LLLazyJIT(std::move(ES), std::move(*TM), std::move(DL),
-                      std::move(*LCTMgr), std::move(ISMBuilder)));
-  }
-
-  return std::unique_ptr<LLLazyJIT>(new LLLazyJIT(
-      std::move(ES), std::move(JTMB), std::move(DL), NumCompileThreads,
-      std::move(*LCTMgr), std::move(ISMBuilder)));
+Error LLLazyJITBuilderState::prepareForConstruction() {
+  if (auto Err = LLJITBuilderState::prepareForConstruction())
+    return Err;
+  TT = JTMB->getTargetTriple();
+  return Error::success();
 }
 
 Error LLLazyJIT::addLazyIRModule(JITDylib &JD, ThreadSafeModule TSM) {
@@ -182,28 +171,55 @@ Error LLLazyJIT::addLazyIRModule(JITDylib &JD, ThreadSafeModule TSM) {
 
   recordCtorDtors(*TSM.getModule());
 
-  return CODLayer.add(JD, std::move(TSM), ES->allocateVModule());
+  return CODLayer->add(JD, std::move(TSM), ES->allocateVModule());
 }
 
-LLLazyJIT::LLLazyJIT(
-    std::unique_ptr<ExecutionSession> ES, std::unique_ptr<TargetMachine> TM,
-    DataLayout DL, std::unique_ptr<LazyCallThroughManager> LCTMgr,
-    std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder)
-    : LLJIT(std::move(ES), std::move(TM), std::move(DL)),
-      LCTMgr(std::move(LCTMgr)), TransformLayer(*this->ES, CompileLayer),
-      CODLayer(*this->ES, TransformLayer, *this->LCTMgr,
-               std::move(ISMBuilder)) {}
-
-LLLazyJIT::LLLazyJIT(
-    std::unique_ptr<ExecutionSession> ES, JITTargetMachineBuilder JTMB,
-    DataLayout DL, unsigned NumCompileThreads,
-    std::unique_ptr<LazyCallThroughManager> LCTMgr,
-    std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder)
-    : LLJIT(std::move(ES), std::move(JTMB), std::move(DL), NumCompileThreads),
-      LCTMgr(std::move(LCTMgr)), TransformLayer(*this->ES, CompileLayer),
-      CODLayer(*this->ES, TransformLayer, *this->LCTMgr,
-               std::move(ISMBuilder)) {
-  CODLayer.setCloneToNewContextOnEmit(true);
+LLLazyJIT::LLLazyJIT(LLLazyJITBuilderState &S, Error &Err) : LLJIT(S, Err) {
+
+  // If LLJIT construction failed then bail out.
+  if (Err)
+    return;
+
+  ErrorAsOutParameter _(&Err);
+
+  /// Take/Create the lazy-compile callthrough manager.
+  if (S.LCTMgr)
+    LCTMgr = std::move(S.LCTMgr);
+  else {
+    if (auto LCTMgrOrErr = createLocalLazyCallThroughManager(
+            S.TT, *ES, S.LazyCompileFailureAddr))
+      LCTMgr = std::move(*LCTMgrOrErr);
+    else {
+      Err = LCTMgrOrErr.takeError();
+      return;
+    }
+  }
+
+  // Take/Create the indirect stubs manager builder.
+  auto ISMBuilder = std::move(S.ISMBuilder);
+
+  // If none was provided, try to build one.
+  if (!ISMBuilder)
+    ISMBuilder = createLocalIndirectStubsManagerBuilder(S.TT);
+
+  // No luck. Bail out.
+  if (!ISMBuilder) {
+    Err = make_error<StringError>("Could not construct "
+                                  "IndirectStubsManagerBuilder for target " +
+                                      S.TT.str(),
+                                  inconvertibleErrorCode());
+    return;
+  }
+
+  // Create the transform layer.
+  TransformLayer = llvm::make_unique<IRTransformLayer>(*ES, *CompileLayer);
+
+  // Create the COD layer.
+  CODLayer = llvm::make_unique<CompileOnDemandLayer>(
+      *ES, *TransformLayer, *LCTMgr, std::move(ISMBuilder));
+
+  if (S.NumCompileThreads > 0)
+    CODLayer->setCloneToNewContextOnEmit(true);
 }
 
 } // End namespace orc.
diff --git a/lib/ExecutionEngine/Orc/Layer.cpp b/lib/ExecutionEngine/Orc/Layer.cpp
index 11af76825e9f..3ed2dabf4545 100644
--- a/lib/ExecutionEngine/Orc/Layer.cpp
+++ b/lib/ExecutionEngine/Orc/Layer.cpp
@@ -1,9 +1,8 @@
 //===-------------------- Layer.cpp - Layer interfaces --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -88,17 +87,15 @@ void BasicIRLayerMaterializationUnit::materialize(
 
 #ifndef NDEBUG
   auto &ES = R.getTargetJITDylib().getExecutionSession();
+  auto &N = R.getTargetJITDylib().getName();
 #endif // NDEBUG
 
   auto Lock = TSM.getContextLock();
-  LLVM_DEBUG(ES.runSessionLocked([&]() {
-    dbgs() << "Emitting, for " << R.getTargetJITDylib().getName() << ", "
-           << *this << "\n";
-  }););
+  LLVM_DEBUG(ES.runSessionLocked(
+      [&]() { dbgs() << "Emitting, for " << N << ", " << *this << "\n"; }););
   L.emit(std::move(R), std::move(TSM));
   LLVM_DEBUG(ES.runSessionLocked([&]() {
-    dbgs() << "Finished emitting, for " << R.getTargetJITDylib().getName()
-           << ", " << *this << "\n";
+    dbgs() << "Finished emitting, for " << N << ", " << *this << "\n";
   }););
 }
 
diff --git a/lib/ExecutionEngine/Orc/LazyReexports.cpp b/lib/ExecutionEngine/Orc/LazyReexports.cpp
index 55f4a7c5afce..fc8205845654 100644
--- a/lib/ExecutionEngine/Orc/LazyReexports.cpp
+++ b/lib/ExecutionEngine/Orc/LazyReexports.cpp
@@ -1,9 +1,8 @@
 //===---------- LazyReexports.cpp - Utilities for lazy reexports ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -52,18 +51,15 @@ LazyCallThroughManager::callThroughToSymbol(JITTargetAddress TrampolineAddr) {
     SymbolName = I->second.second;
   }
 
-  auto LookupResult = ES.lookup(JITDylibSearchList({{SourceJD, true}}),
-                                {SymbolName}, NoDependenciesToRegister, true);
+  auto LookupResult =
+      ES.lookup(JITDylibSearchList({{SourceJD, true}}), SymbolName);
 
   if (!LookupResult) {
     ES.reportError(LookupResult.takeError());
     return ErrorHandlerAddr;
   }
 
-  assert(LookupResult->size() == 1 && "Unexpected number of results");
-  assert(LookupResult->count(SymbolName) && "Unexpected result");
-
-  auto ResolvedAddr = LookupResult->begin()->second.getAddress();
+  auto ResolvedAddr = LookupResult->getAddress();
 
   std::shared_ptr<NotifyResolvedFunction> NotifyResolved = nullptr;
   {
@@ -182,8 +178,8 @@ void LazyReexportsMaterializationUnit::materialize(
   for (auto &Alias : RequestedAliases)
     Stubs[Alias.first] = ISManager.findStub(*Alias.first, false);
 
-  R.resolve(Stubs);
-  R.emit();
+  R.notifyResolved(Stubs);
+  R.notifyEmitted();
 }
 
 void LazyReexportsMaterializationUnit::discard(const JITDylib &JD,
diff --git a/lib/ExecutionEngine/Orc/Legacy.cpp b/lib/ExecutionEngine/Orc/Legacy.cpp
index ddb72544b770..ce6368b57a89 100644
--- a/lib/ExecutionEngine/Orc/Legacy.cpp
+++ b/lib/ExecutionEngine/Orc/Legacy.cpp
@@ -1,9 +1,8 @@
 //===------- Legacy.cpp - Adapters for ExecutionEngine API interop --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -37,8 +36,7 @@ void JITSymbolResolverAdapter::lookup(const LookupSet &Symbols,
   };
 
   auto Q = std::make_shared<AsynchronousSymbolQuery>(
-      InternedSymbols, OnResolvedWithUnwrap,
-      [this](Error Err) { ES.reportError(std::move(Err)); });
+      InternedSymbols, SymbolState::Resolved, OnResolvedWithUnwrap);
 
   auto Unresolved = R.lookup(Q, InternedSymbols);
   if (Unresolved.empty()) {
diff --git a/lib/ExecutionEngine/Orc/NullResolver.cpp b/lib/ExecutionEngine/Orc/NullResolver.cpp
index 922fc6f021ce..5b4345b870bb 100644
--- a/lib/ExecutionEngine/Orc/NullResolver.cpp
+++ b/lib/ExecutionEngine/Orc/NullResolver.cpp
@@ -1,9 +1,8 @@
 //===---------- NullResolver.cpp - Reject symbol lookup requests ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
new file mode 100644
index 000000000000..def0b300eca1
--- /dev/null
+++ b/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
@@ -0,0 +1,483 @@
+//===------- ObjectLinkingLayer.cpp - JITLink backed ORC ObjectLayer ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h"
+
+#include <vector>
+
+#define DEBUG_TYPE "orc"
+
+using namespace llvm;
+using namespace llvm::jitlink;
+using namespace llvm::orc;
+
+namespace llvm {
+namespace orc {
+
+class ObjectLinkingLayerJITLinkContext final : public JITLinkContext {
+public:
+  ObjectLinkingLayerJITLinkContext(ObjectLinkingLayer &Layer,
+                                   MaterializationResponsibility MR,
+                                   std::unique_ptr<MemoryBuffer> ObjBuffer)
+      : Layer(Layer), MR(std::move(MR)), ObjBuffer(std::move(ObjBuffer)) {}
+
+  JITLinkMemoryManager &getMemoryManager() override { return Layer.MemMgr; }
+
+  MemoryBufferRef getObjectBuffer() const override {
+    return ObjBuffer->getMemBufferRef();
+  }
+
+  void notifyFailed(Error Err) override {
+    Layer.getExecutionSession().reportError(std::move(Err));
+    MR.failMaterialization();
+  }
+
+  void lookup(const DenseSet<StringRef> &Symbols,
+              JITLinkAsyncLookupContinuation LookupContinuation) override {
+
+    JITDylibSearchList SearchOrder;
+    MR.getTargetJITDylib().withSearchOrderDo(
+        [&](const JITDylibSearchList &JDs) { SearchOrder = JDs; });
+
+    auto &ES = Layer.getExecutionSession();
+
+    SymbolNameSet InternedSymbols;
+    for (auto &S : Symbols)
+      InternedSymbols.insert(ES.intern(S));
+
+    // OnResolve -- De-intern the symbols and pass the result to the linker.
+    // FIXME: Capture LookupContinuation by move once we have c++14.
+    auto SharedLookupContinuation =
+        std::make_shared<JITLinkAsyncLookupContinuation>(
+            std::move(LookupContinuation));
+    auto OnResolve = [SharedLookupContinuation](Expected<SymbolMap> Result) {
+      if (!Result)
+        (*SharedLookupContinuation)(Result.takeError());
+      else {
+        AsyncLookupResult LR;
+        for (auto &KV : *Result)
+          LR[*KV.first] = KV.second;
+        (*SharedLookupContinuation)(std::move(LR));
+      }
+    };
+
+    ES.lookup(SearchOrder, std::move(InternedSymbols), SymbolState::Resolved,
+              std::move(OnResolve), [this](const SymbolDependenceMap &Deps) {
+                registerDependencies(Deps);
+              });
+  }
+
+  void notifyResolved(AtomGraph &G) override {
+    auto &ES = Layer.getExecutionSession();
+
+    SymbolFlagsMap ExtraSymbolsToClaim;
+    bool AutoClaim = Layer.AutoClaimObjectSymbols;
+
+    SymbolMap InternedResult;
+    for (auto *DA : G.defined_atoms())
+      if (DA->hasName() && DA->isGlobal()) {
+        auto InternedName = ES.intern(DA->getName());
+        JITSymbolFlags Flags;
+
+        if (DA->isExported())
+          Flags |= JITSymbolFlags::Exported;
+        if (DA->isWeak())
+          Flags |= JITSymbolFlags::Weak;
+        if (DA->isCallable())
+          Flags |= JITSymbolFlags::Callable;
+        if (DA->isCommon())
+          Flags |= JITSymbolFlags::Common;
+
+        InternedResult[InternedName] =
+            JITEvaluatedSymbol(DA->getAddress(), Flags);
+        if (AutoClaim && !MR.getSymbols().count(InternedName)) {
+          assert(!ExtraSymbolsToClaim.count(InternedName) &&
+                 "Duplicate symbol to claim?");
+          ExtraSymbolsToClaim[InternedName] = Flags;
+        }
+      }
+
+    for (auto *A : G.absolute_atoms())
+      if (A->hasName()) {
+        auto InternedName = ES.intern(A->getName());
+        JITSymbolFlags Flags;
+        Flags |= JITSymbolFlags::Absolute;
+        if (A->isWeak())
+          Flags |= JITSymbolFlags::Weak;
+        if (A->isCallable())
+          Flags |= JITSymbolFlags::Callable;
+        InternedResult[InternedName] =
+            JITEvaluatedSymbol(A->getAddress(), Flags);
+        if (AutoClaim && !MR.getSymbols().count(InternedName)) {
+          assert(!ExtraSymbolsToClaim.count(InternedName) &&
+                 "Duplicate symbol to claim?");
+          ExtraSymbolsToClaim[InternedName] = Flags;
+        }
+      }
+
+    if (!ExtraSymbolsToClaim.empty())
+      if (auto Err = MR.defineMaterializing(ExtraSymbolsToClaim))
+        return notifyFailed(std::move(Err));
+
+    MR.notifyResolved(InternedResult);
+
+    Layer.notifyLoaded(MR);
+  }
+
+  void notifyFinalized(
+      std::unique_ptr<JITLinkMemoryManager::Allocation> A) override {
+
+    if (auto Err = Layer.notifyEmitted(MR, std::move(A))) {
+      Layer.getExecutionSession().reportError(std::move(Err));
+      MR.failMaterialization();
+
+      return;
+    }
+    MR.notifyEmitted();
+  }
+
+  AtomGraphPassFunction getMarkLivePass(const Triple &TT) const override {
+    return [this](AtomGraph &G) { return markResponsibilitySymbolsLive(G); };
+  }
+
+  Error modifyPassConfig(const Triple &TT, PassConfiguration &Config) override {
+    // Add passes to mark duplicate defs as should-discard, and to walk the
+    // atom graph to build the symbol dependence graph.
+    Config.PrePrunePasses.push_back(
+        [this](AtomGraph &G) { return markSymbolsToDiscard(G); });
+    Config.PostPrunePasses.push_back(
+        [this](AtomGraph &G) { return computeNamedSymbolDependencies(G); });
+
+    Layer.modifyPassConfig(MR, TT, Config);
+
+    return Error::success();
+  }
+
+private:
+  using AnonAtomNamedDependenciesMap =
+      DenseMap<const DefinedAtom *, SymbolNameSet>;
+
+  Error markSymbolsToDiscard(AtomGraph &G) {
+    auto &ES = Layer.getExecutionSession();
+    for (auto *DA : G.defined_atoms())
+      if (DA->isWeak() && DA->hasName()) {
+        auto S = ES.intern(DA->getName());
+        auto I = MR.getSymbols().find(S);
+        if (I == MR.getSymbols().end())
+          DA->setShouldDiscard(true);
+      }
+
+    for (auto *A : G.absolute_atoms())
+      if (A->isWeak() && A->hasName()) {
+        auto S = ES.intern(A->getName());
+        auto I = MR.getSymbols().find(S);
+        if (I == MR.getSymbols().end())
+          A->setShouldDiscard(true);
+      }
+
+    return Error::success();
+  }
+
+  Error markResponsibilitySymbolsLive(AtomGraph &G) const {
+    auto &ES = Layer.getExecutionSession();
+    for (auto *DA : G.defined_atoms())
+      if (DA->hasName() &&
+          MR.getSymbols().count(ES.intern(DA->getName())))
+        DA->setLive(true);
+    return Error::success();
+  }
+
+  Error computeNamedSymbolDependencies(AtomGraph &G) {
+    auto &ES = MR.getTargetJITDylib().getExecutionSession();
+    auto AnonDeps = computeAnonDeps(G);
+
+    for (auto *DA : G.defined_atoms()) {
+
+      // Skip anonymous and non-global atoms: we do not need dependencies for
+      // these.
+      if (!DA->hasName() || !DA->isGlobal())
+        continue;
+
+      auto DAName = ES.intern(DA->getName());
+      SymbolNameSet &DADeps = NamedSymbolDeps[DAName];
+
+      for (auto &E : DA->edges()) {
+        auto &TA = E.getTarget();
+
+        if (TA.hasName())
+          DADeps.insert(ES.intern(TA.getName()));
+        else {
+          assert(TA.isDefined() && "Anonymous atoms must be defined");
+          auto &DTA = static_cast<DefinedAtom &>(TA);
+          auto I = AnonDeps.find(&DTA);
+          if (I != AnonDeps.end())
+            for (auto &S : I->second)
+              DADeps.insert(S);
+        }
+      }
+    }
+
+    return Error::success();
+  }
+
+  AnonAtomNamedDependenciesMap computeAnonDeps(AtomGraph &G) {
+
+    auto &ES = MR.getTargetJITDylib().getExecutionSession();
+    AnonAtomNamedDependenciesMap DepMap;
+
+    // For all anonymous atoms:
+    // (1) Add their named dependencies.
+    // (2) Add them to the worklist for further iteration if they have any
+    //     depend on any other anonymous atoms.
+    struct WorklistEntry {
+      WorklistEntry(DefinedAtom *DA, DenseSet<DefinedAtom *> DAAnonDeps)
+          : DA(DA), DAAnonDeps(std::move(DAAnonDeps)) {}
+
+      DefinedAtom *DA = nullptr;
+      DenseSet<DefinedAtom *> DAAnonDeps;
+    };
+    std::vector<WorklistEntry> Worklist;
+    for (auto *DA : G.defined_atoms())
+      if (!DA->hasName()) {
+        auto &DANamedDeps = DepMap[DA];
+        DenseSet<DefinedAtom *> DAAnonDeps;
+
+        for (auto &E : DA->edges()) {
+          auto &TA = E.getTarget();
+          if (TA.hasName())
+            DANamedDeps.insert(ES.intern(TA.getName()));
+          else {
+            assert(TA.isDefined() && "Anonymous atoms must be defined");
+            DAAnonDeps.insert(static_cast<DefinedAtom *>(&TA));
+          }
+        }
+
+        if (!DAAnonDeps.empty())
+          Worklist.push_back(WorklistEntry(DA, std::move(DAAnonDeps)));
+      }
+
+    // Loop over all anonymous atoms with anonymous dependencies, propagating
+    // their respective *named* dependencies. Iterate until we hit a stable
+    // state.
+    bool Changed;
+    do {
+      Changed = false;
+      for (auto &WLEntry : Worklist) {
+        auto *DA = WLEntry.DA;
+        auto &DANamedDeps = DepMap[DA];
+        auto &DAAnonDeps = WLEntry.DAAnonDeps;
+
+        for (auto *TA : DAAnonDeps) {
+          auto I = DepMap.find(TA);
+          if (I != DepMap.end())
+            for (const auto &S : I->second)
+              Changed |= DANamedDeps.insert(S).second;
+        }
+      }
+    } while (Changed);
+
+    return DepMap;
+  }
+
+  void registerDependencies(const SymbolDependenceMap &QueryDeps) {
+    for (auto &NamedDepsEntry : NamedSymbolDeps) {
+      auto &Name = NamedDepsEntry.first;
+      auto &NameDeps = NamedDepsEntry.second;
+      SymbolDependenceMap SymbolDeps;
+
+      for (const auto &QueryDepsEntry : QueryDeps) {
+        JITDylib &SourceJD = *QueryDepsEntry.first;
+        const SymbolNameSet &Symbols = QueryDepsEntry.second;
+        auto &DepsForJD = SymbolDeps[&SourceJD];
+
+        for (const auto &S : Symbols)
+          if (NameDeps.count(S))
+            DepsForJD.insert(S);
+
+        if (DepsForJD.empty())
+          SymbolDeps.erase(&SourceJD);
+      }
+
+      MR.addDependencies(Name, SymbolDeps);
+    }
+  }
+
+  ObjectLinkingLayer &Layer;
+  MaterializationResponsibility MR;
+  std::unique_ptr<MemoryBuffer> ObjBuffer;
+  DenseMap<SymbolStringPtr, SymbolNameSet> NamedSymbolDeps;
+};
+
+ObjectLinkingLayer::Plugin::~Plugin() {}
+
+ObjectLinkingLayer::ObjectLinkingLayer(ExecutionSession &ES,
+                                       JITLinkMemoryManager &MemMgr)
+    : ObjectLayer(ES), MemMgr(MemMgr) {}
+
+ObjectLinkingLayer::~ObjectLinkingLayer() {
+  if (auto Err = removeAllModules())
+    getExecutionSession().reportError(std::move(Err));
+}
+
+void ObjectLinkingLayer::emit(MaterializationResponsibility R,
+                              std::unique_ptr<MemoryBuffer> O) {
+  assert(O && "Object must not be null");
+  jitLink(llvm::make_unique<ObjectLinkingLayerJITLinkContext>(
+      *this, std::move(R), std::move(O)));
+}
+
+void ObjectLinkingLayer::modifyPassConfig(MaterializationResponsibility &MR,
+                                          const Triple &TT,
+                                          PassConfiguration &PassConfig) {
+  for (auto &P : Plugins)
+    P->modifyPassConfig(MR, TT, PassConfig);
+}
+
+void ObjectLinkingLayer::notifyLoaded(MaterializationResponsibility &MR) {
+  for (auto &P : Plugins)
+    P->notifyLoaded(MR);
+}
+
+Error ObjectLinkingLayer::notifyEmitted(MaterializationResponsibility &MR,
+                                        AllocPtr Alloc) {
+  Error Err = Error::success();
+  for (auto &P : Plugins)
+    Err = joinErrors(std::move(Err), P->notifyEmitted(MR));
+
+  if (Err)
+    return Err;
+
+  {
+    std::lock_guard<std::mutex> Lock(LayerMutex);
+    UntrackedAllocs.push_back(std::move(Alloc));
+  }
+
+  return Error::success();
+}
+
+Error ObjectLinkingLayer::removeModule(VModuleKey K) {
+  Error Err = Error::success();
+
+  for (auto &P : Plugins)
+    Err = joinErrors(std::move(Err), P->notifyRemovingModule(K));
+
+  AllocPtr Alloc;
+
+  {
+    std::lock_guard<std::mutex> Lock(LayerMutex);
+    auto AllocItr = TrackedAllocs.find(K);
+    Alloc = std::move(AllocItr->second);
+    TrackedAllocs.erase(AllocItr);
+  }
+
+  assert(Alloc && "No allocation for key K");
+
+  return joinErrors(std::move(Err), Alloc->deallocate());
+}
+
+Error ObjectLinkingLayer::removeAllModules() {
+
+  Error Err = Error::success();
+
+  for (auto &P : Plugins)
+    Err = joinErrors(std::move(Err), P->notifyRemovingAllModules());
+
+  std::vector<AllocPtr> Allocs;
+  {
+    std::lock_guard<std::mutex> Lock(LayerMutex);
+    Allocs = std::move(UntrackedAllocs);
+
+    for (auto &KV : TrackedAllocs)
+      Allocs.push_back(std::move(KV.second));
+
+    TrackedAllocs.clear();
+  }
+
+  while (!Allocs.empty()) {
+    Err = joinErrors(std::move(Err), Allocs.back()->deallocate());
+    Allocs.pop_back();
+  }
+
+  return Err;
+}
+
+EHFrameRegistrationPlugin::EHFrameRegistrationPlugin(
+    jitlink::EHFrameRegistrar &Registrar)
+    : Registrar(Registrar) {}
+
+void EHFrameRegistrationPlugin::modifyPassConfig(
+    MaterializationResponsibility &MR, const Triple &TT,
+    PassConfiguration &PassConfig) {
+  assert(!InProcessLinks.count(&MR) && "Link for MR already being tracked?");
+
+  PassConfig.PostFixupPasses.push_back(
+      createEHFrameRecorderPass(TT, [this, &MR](JITTargetAddress Addr) {
+        if (Addr)
+          InProcessLinks[&MR] = Addr;
+      }));
+}
+
+Error EHFrameRegistrationPlugin::notifyEmitted(
+    MaterializationResponsibility &MR) {
+
+  auto EHFrameAddrItr = InProcessLinks.find(&MR);
+  if (EHFrameAddrItr == InProcessLinks.end())
+    return Error::success();
+
+  auto EHFrameAddr = EHFrameAddrItr->second;
+  assert(EHFrameAddr && "eh-frame addr to register can not be null");
+
+  InProcessLinks.erase(EHFrameAddrItr);
+  if (auto Key = MR.getVModuleKey())
+    TrackedEHFrameAddrs[Key] = EHFrameAddr;
+  else
+    UntrackedEHFrameAddrs.push_back(EHFrameAddr);
+
+  return Registrar.registerEHFrames(EHFrameAddr);
+}
+
+Error EHFrameRegistrationPlugin::notifyRemovingModule(VModuleKey K) {
+  auto EHFrameAddrItr = TrackedEHFrameAddrs.find(K);
+  if (EHFrameAddrItr == TrackedEHFrameAddrs.end())
+    return Error::success();
+
+  auto EHFrameAddr = EHFrameAddrItr->second;
+  assert(EHFrameAddr && "Tracked eh-frame addr must not be null");
+
+  TrackedEHFrameAddrs.erase(EHFrameAddrItr);
+
+  return Registrar.deregisterEHFrames(EHFrameAddr);
+}
+
+Error EHFrameRegistrationPlugin::notifyRemovingAllModules() {
+
+  std::vector<JITTargetAddress> EHFrameAddrs = std::move(UntrackedEHFrameAddrs);
+  EHFrameAddrs.reserve(EHFrameAddrs.size() + TrackedEHFrameAddrs.size());
+
+  for (auto &KV : TrackedEHFrameAddrs)
+    EHFrameAddrs.push_back(KV.second);
+
+  TrackedEHFrameAddrs.clear();
+
+  Error Err = Error::success();
+
+  while (!EHFrameAddrs.empty()) {
+    auto EHFrameAddr = EHFrameAddrs.back();
+    assert(EHFrameAddr && "Untracked eh-frame addr must not be null");
+    EHFrameAddrs.pop_back();
+    Err = joinErrors(std::move(Err), Registrar.deregisterEHFrames(EHFrameAddr));
+  }
+
+  return Err;
+}
+
+} // End namespace orc.
+} // End namespace llvm.
diff --git a/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp b/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp
index 825f53204736..815517321b76 100644
--- a/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp
+++ b/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp
@@ -1,9 +1,8 @@
 //===---------- ObjectTransformLayer.cpp - Object Transform Layer ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/lib/ExecutionEngine/Orc/OrcABISupport.cpp
index aa4055542426..8ed23de419d1 100644
--- a/lib/ExecutionEngine/Orc/OrcABISupport.cpp
+++ b/lib/ExecutionEngine/Orc/OrcABISupport.cpp
@@ -1,9 +1,8 @@
 //===------------- OrcABISupport.cpp - ABI specific support code ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -148,7 +147,7 @@ Error OrcAArch64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
   const unsigned StubSize = IndirectStubsInfo::StubSize;
 
   // Emit at least MinStubs, rounded up to fill the pages allocated.
-  unsigned PageSize = sys::Process::getPageSize();
+  static const unsigned PageSize = sys::Process::getPageSizeEstimate();
   unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
   unsigned NumStubs = (NumPages * PageSize) / StubSize;
 
@@ -230,7 +229,7 @@ Error OrcX86_64_Base::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
   const unsigned StubSize = IndirectStubsInfo::StubSize;
 
   // Emit at least MinStubs, rounded up to fill the pages allocated.
-  unsigned PageSize = sys::Process::getPageSize();
+  static const unsigned PageSize = sys::Process::getPageSizeEstimate();
   unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
   unsigned NumStubs = (NumPages * PageSize) / StubSize;
 
@@ -498,7 +497,7 @@ Error OrcI386::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
   const unsigned StubSize = IndirectStubsInfo::StubSize;
 
   // Emit at least MinStubs, rounded up to fill the pages allocated.
-  unsigned PageSize = sys::Process::getPageSize();
+  static const unsigned PageSize = sys::Process::getPageSizeEstimate();
   unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
   unsigned NumStubs = (NumPages * PageSize) / StubSize;
 
@@ -684,7 +683,7 @@ Error OrcMips32_Base::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
   const unsigned StubSize = IndirectStubsInfo::StubSize;
 
   // Emit at least MinStubs, rounded up to fill the pages allocated.
-  unsigned PageSize = sys::Process::getPageSize();
+  static const unsigned PageSize = sys::Process::getPageSizeEstimate();
   unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
   unsigned NumStubs = (NumPages * PageSize) / StubSize;
 
@@ -930,7 +929,7 @@ Error OrcMips64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
   const unsigned StubSize = IndirectStubsInfo::StubSize;
 
   // Emit at least MinStubs, rounded up to fill the pages allocated.
-  unsigned PageSize = sys::Process::getPageSize();
+  static const unsigned PageSize = sys::Process::getPageSizeEstimate();
   unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
   unsigned NumStubs = (NumPages * PageSize) / StubSize;
 
diff --git a/lib/ExecutionEngine/Orc/OrcCBindings.cpp b/lib/ExecutionEngine/Orc/OrcCBindings.cpp
index 6dea64a6e78f..28c8479abba4 100644
--- a/lib/ExecutionEngine/Orc/OrcCBindings.cpp
+++ b/lib/ExecutionEngine/Orc/OrcCBindings.cpp
@@ -1,9 +1,8 @@
 //===----------- OrcCBindings.cpp - C bindings for the Orc APIs -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
index 817a4b89bfb0..98129e1690d2 100644
--- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
+++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
@@ -1,9 +1,8 @@
 //===- OrcCBindingsStack.h - Orc JIT stack for C bindings -----*- C++ -*---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -154,8 +153,8 @@ private:
       for (auto &S : Symbols) {
         if (auto Sym = findSymbol(*S)) {
           if (auto Addr = Sym.getAddress()) {
-            Query->resolve(S, JITEvaluatedSymbol(*Addr, Sym.getFlags()));
-            Query->notifySymbolReady();
+            Query->notifySymbolMetRequiredState(
+                S, JITEvaluatedSymbol(*Addr, Sym.getFlags()));
           } else {
             Stack.ES.legacyFailQuery(*Query, Addr.takeError());
             return orc::SymbolNameSet();
@@ -167,11 +166,8 @@ private:
           UnresolvedSymbols.insert(S);
       }
 
-      if (Query->isFullyResolved())
-        Query->handleFullyResolved();
-
-      if (Query->isFullyReady())
-        Query->handleFullyReady();
+      if (Query->isComplete())
+        Query->handleComplete();
 
       return UnresolvedSymbols;
     }
@@ -215,28 +211,31 @@ public:
                     IndirectStubsManagerBuilder IndirectStubsMgrBuilder)
       : CCMgr(createCompileCallbackManager(TM, ES)), DL(TM.createDataLayout()),
         IndirectStubsMgr(IndirectStubsMgrBuilder()),
-        ObjectLayer(ES,
-                    [this](orc::VModuleKey K) {
-                      auto ResolverI = Resolvers.find(K);
-                      assert(ResolverI != Resolvers.end() &&
-                             "No resolver for module K");
-                      auto Resolver = std::move(ResolverI->second);
-                      Resolvers.erase(ResolverI);
-                      return ObjLayerT::Resources{
-                          std::make_shared<SectionMemoryManager>(), Resolver};
-                    },
-                    nullptr,
-                    [this](orc::VModuleKey K, const object::ObjectFile &Obj,
-                           const RuntimeDyld::LoadedObjectInfo &LoadedObjInfo) {
-		      this->notifyFinalized(K, Obj, LoadedObjInfo);
-                    },
-                    [this](orc::VModuleKey K, const object::ObjectFile &Obj) {
-		      this->notifyFreed(K, Obj);
-                    }),
-        CompileLayer(ObjectLayer, orc::SimpleCompiler(TM)),
+        ObjectLayer(
+            AcknowledgeORCv1Deprecation, ES,
+            [this](orc::VModuleKey K) {
+              auto ResolverI = Resolvers.find(K);
+              assert(ResolverI != Resolvers.end() &&
+                     "No resolver for module K");
+              auto Resolver = std::move(ResolverI->second);
+              Resolvers.erase(ResolverI);
+              return ObjLayerT::Resources{
+                  std::make_shared<SectionMemoryManager>(), Resolver};
+            },
+            nullptr,
+            [this](orc::VModuleKey K, const object::ObjectFile &Obj,
+                   const RuntimeDyld::LoadedObjectInfo &LoadedObjInfo) {
+              this->notifyFinalized(K, Obj, LoadedObjInfo);
+            },
+            [this](orc::VModuleKey K, const object::ObjectFile &Obj) {
+              this->notifyFreed(K, Obj);
+            }),
+        CompileLayer(AcknowledgeORCv1Deprecation, ObjectLayer,
+                     orc::SimpleCompiler(TM)),
         CODLayer(createCODLayer(ES, CompileLayer, CCMgr.get(),
                                 std::move(IndirectStubsMgrBuilder), Resolvers)),
         CXXRuntimeOverrides(
+            AcknowledgeORCv1Deprecation,
             [this](const std::string &S) { return mangle(S); }) {}
 
   Error shutdown() {
@@ -312,7 +311,8 @@ public:
 
     // Run the static constructors, and save the static destructor runner for
     // execution when the JIT is torn down.
-    orc::LegacyCtorDtorRunner<OrcCBindingsStack> CtorRunner(std::move(CtorNames), K);
+    orc::LegacyCtorDtorRunner<OrcCBindingsStack> CtorRunner(
+        AcknowledgeORCv1Deprecation, std::move(CtorNames), K);
     if (auto Err = CtorRunner.runViaLayer(*this))
       return std::move(Err);
 
@@ -469,7 +469,7 @@ private:
       return nullptr;
 
     return llvm::make_unique<CODLayerT>(
-        ES, CompileLayer,
+        AcknowledgeORCv1Deprecation, ES, CompileLayer,
         [&Resolvers](orc::VModuleKey K) {
           auto ResolverI = Resolvers.find(K);
           assert(ResolverI != Resolvers.end() && "No resolver for module K");
diff --git a/lib/ExecutionEngine/Orc/OrcError.cpp b/lib/ExecutionEngine/Orc/OrcError.cpp
index f4102b359a6b..e6e9a095319c 100644
--- a/lib/ExecutionEngine/Orc/OrcError.cpp
+++ b/lib/ExecutionEngine/Orc/OrcError.cpp
@@ -1,9 +1,8 @@
 //===---------------- OrcError.cpp - Error codes for ORC ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
index 617bc2fc64b5..772a9c2c4ab2 100644
--- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
+++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
@@ -1,9 +1,8 @@
 //===-------- OrcMCJITReplacement.cpp - Orc-based MCJIT replacement -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -128,7 +127,8 @@ void OrcMCJITReplacement::runStaticConstructorsDestructors(bool isDtors) {
   auto &CtorDtorsMap = isDtors ? UnexecutedDestructors : UnexecutedConstructors;
 
   for (auto &KV : CtorDtorsMap)
-    cantFail(LegacyCtorDtorRunner<LazyEmitLayerT>(std::move(KV.second), KV.first)
+    cantFail(LegacyCtorDtorRunner<LazyEmitLayerT>(
+                 AcknowledgeORCv1Deprecation, std::move(KV.second), KV.first)
                  .runViaLayer(LazyEmitLayer));
 
   CtorDtorsMap.clear();
diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index 36e7e83a8bab..169dc8f1d02b 100644
--- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -1,9 +1,8 @@
 //===- OrcMCJITReplacement.h - Orc based MCJIT replacement ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -177,8 +176,8 @@ class OrcMCJITReplacement : public ExecutionEngine {
       for (auto &S : Symbols) {
         if (auto Sym = M.findMangledSymbol(*S)) {
           if (auto Addr = Sym.getAddress()) {
-            Query->resolve(S, JITEvaluatedSymbol(*Addr, Sym.getFlags()));
-            Query->notifySymbolReady();
+            Query->notifySymbolMetRequiredState(
+                S, JITEvaluatedSymbol(*Addr, Sym.getFlags()));
             NewSymbolsResolved = true;
           } else {
             M.ES.legacyFailQuery(*Query, Addr.takeError());
@@ -190,8 +189,8 @@ class OrcMCJITReplacement : public ExecutionEngine {
         } else {
           if (auto Sym2 = M.ClientResolver->findSymbol(*S)) {
             if (auto Addr = Sym2.getAddress()) {
-              Query->resolve(S, JITEvaluatedSymbol(*Addr, Sym2.getFlags()));
-              Query->notifySymbolReady();
+              Query->notifySymbolMetRequiredState(
+                  S, JITEvaluatedSymbol(*Addr, Sym2.getFlags()));
               NewSymbolsResolved = true;
             } else {
               M.ES.legacyFailQuery(*Query, Addr.takeError());
@@ -205,11 +204,8 @@ class OrcMCJITReplacement : public ExecutionEngine {
         }
       }
 
-      if (NewSymbolsResolved && Query->isFullyResolved())
-        Query->handleFullyResolved();
-
-      if (NewSymbolsResolved && Query->isFullyReady())
-        Query->handleFullyReady();
+      if (NewSymbolsResolved && Query->isComplete())
+        Query->handleComplete();
 
       return UnresolvedSymbols;
     }
@@ -236,24 +232,24 @@ public:
   OrcMCJITReplacement(std::shared_ptr<MCJITMemoryManager> MemMgr,
                       std::shared_ptr<LegacyJITSymbolResolver> ClientResolver,
                       std::unique_ptr<TargetMachine> TM)
-      : ExecutionEngine(TM->createDataLayout()),
-        TM(std::move(TM)),
+      : ExecutionEngine(TM->createDataLayout()), TM(std::move(TM)),
         MemMgr(
             std::make_shared<MCJITReplacementMemMgr>(*this, std::move(MemMgr))),
         Resolver(std::make_shared<LinkingORCResolver>(*this)),
         ClientResolver(std::move(ClientResolver)), NotifyObjectLoaded(*this),
         NotifyFinalized(*this),
         ObjectLayer(
-            ES,
+            AcknowledgeORCv1Deprecation, ES,
             [this](VModuleKey K) {
               return ObjectLayerT::Resources{this->MemMgr, this->Resolver};
             },
             NotifyObjectLoaded, NotifyFinalized),
-        CompileLayer(ObjectLayer, SimpleCompiler(*this->TM),
+        CompileLayer(AcknowledgeORCv1Deprecation, ObjectLayer,
+                     SimpleCompiler(*this->TM),
                      [this](VModuleKey K, std::unique_ptr<Module> M) {
                        Modules.push_back(std::move(M));
                      }),
-        LazyEmitLayer(CompileLayer) {}
+        LazyEmitLayer(AcknowledgeORCv1Deprecation, CompileLayer) {}
 
   static void Register() {
     OrcMCJITReplacementCtor = createOrcMCJITReplacement;
diff --git a/lib/ExecutionEngine/Orc/RPCUtils.cpp b/lib/ExecutionEngine/Orc/RPCUtils.cpp
index 2a7ab5ca8180..367b3639f841 100644
--- a/lib/ExecutionEngine/Orc/RPCUtils.cpp
+++ b/lib/ExecutionEngine/Orc/RPCUtils.cpp
@@ -1,9 +1,8 @@
 //===--------------- RPCUtils.cpp - RPCUtils implementation ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
index 299d76183cd4..b22ecd5f80a1 100644
--- a/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
+++ b/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
@@ -1,9 +1,8 @@
 //===-- RTDyldObjectLinkingLayer.cpp - RuntimeDyld backed ORC ObjectLayer -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -42,9 +41,6 @@ public:
           OnResolved(Result);
         };
 
-    // We're not waiting for symbols to be ready. Just log any errors.
-    auto OnReady = [&ES](Error Err) { ES.reportError(std::move(Err)); };
-
     // Register dependencies for all symbols contained in this set.
     auto RegisterDependencies = [&](const SymbolDependenceMap &Deps) {
       MR.addDependenciesForAll(Deps);
@@ -53,8 +49,8 @@ public:
     JITDylibSearchList SearchOrder;
     MR.getTargetJITDylib().withSearchOrderDo(
         [&](const JITDylibSearchList &JDs) { SearchOrder = JDs; });
-    ES.lookup(SearchOrder, InternedSymbols, OnResolvedWithUnwrap, OnReady,
-              RegisterDependencies);
+    ES.lookup(SearchOrder, InternedSymbols, SymbolState::Resolved,
+              OnResolvedWithUnwrap, RegisterDependencies);
   }
 
   Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) {
@@ -78,11 +74,8 @@ namespace llvm {
 namespace orc {
 
 RTDyldObjectLinkingLayer::RTDyldObjectLinkingLayer(
-    ExecutionSession &ES, GetMemoryManagerFunction GetMemoryManager,
-    NotifyLoadedFunction NotifyLoaded, NotifyEmittedFunction NotifyEmitted)
-    : ObjectLayer(ES), GetMemoryManager(GetMemoryManager),
-      NotifyLoaded(std::move(NotifyLoaded)),
-      NotifyEmitted(std::move(NotifyEmitted)) {}
+    ExecutionSession &ES, GetMemoryManagerFunction GetMemoryManager)
+    : ObjectLayer(ES), GetMemoryManager(GetMemoryManager) {}
 
 void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R,
                                     std::unique_ptr<MemoryBuffer> O) {
@@ -96,7 +89,13 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R,
 
   auto &ES = getExecutionSession();
 
-  auto Obj = object::ObjectFile::createObjectFile(*O);
+  // Create a MemoryBufferRef backed MemoryBuffer (i.e. shallow) copy of the
+  // the underlying buffer to pass into RuntimeDyld. This allows us to hold
+  // ownership of the real underlying buffer and return it to the user once
+  // the object has been emitted.
+  auto ObjBuffer = MemoryBuffer::getMemBuffer(O->getMemBufferRef(), false);
+
+  auto Obj = object::ObjectFile::createObjectFile(*ObjBuffer);
 
   if (!Obj) {
     getExecutionSession().reportError(Obj.takeError());
@@ -134,13 +133,8 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R,
 
   JITDylibSearchOrderResolver Resolver(*SharedR);
 
-  /* Thoughts on proper cross-dylib weak symbol handling:
-   *
-   * Change selection of canonical defs to be a manually triggered process, and
-   * add a 'canonical' bit to symbol definitions. When canonical def selection
-   * is triggered, sweep the JITDylibs to mark defs as canonical, discard
-   * duplicate defs.
-   */
+  // FIXME: Switch to move-capture for the 'O' buffer once we have c++14.
+  MemoryBuffer *UnownedObjBuffer = O.release();
   jitLinkForORC(
       **Obj, std::move(O), *MemMgr, Resolver, ProcessAllSections,
       [this, K, SharedR, &Obj, InternalSymbols](
@@ -149,8 +143,9 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R,
         return onObjLoad(K, *SharedR, **Obj, std::move(LoadedObjInfo),
                          ResolvedSymbols, *InternalSymbols);
       },
-      [this, K, SharedR](Error Err) {
-        onObjEmit(K, *SharedR, std::move(Err));
+      [this, K, SharedR, UnownedObjBuffer](Error Err) {
+        std::unique_ptr<MemoryBuffer> ObjBuffer(UnownedObjBuffer);
+        onObjEmit(K, std::move(ObjBuffer), *SharedR, std::move(Err));
       });
 }
 
@@ -177,7 +172,7 @@ Error RTDyldObjectLinkingLayer::onObjLoad(
       auto I = R.getSymbols().find(InternedName);
 
       if (OverrideObjectFlags && I != R.getSymbols().end())
-        Flags = JITSymbolFlags::stripTransientFlags(I->second);
+        Flags = I->second;
       else if (AutoClaimObjectSymbols && I == R.getSymbols().end())
         ExtraSymbolsToClaim[InternedName] = Flags;
     }
@@ -189,7 +184,7 @@ Error RTDyldObjectLinkingLayer::onObjLoad(
     if (auto Err = R.defineMaterializing(ExtraSymbolsToClaim))
       return Err;
 
-  R.resolve(Symbols);
+  R.notifyResolved(Symbols);
 
   if (NotifyLoaded)
     NotifyLoaded(K, Obj, *LoadedObjInfo);
@@ -197,20 +192,29 @@ Error RTDyldObjectLinkingLayer::onObjLoad(
   return Error::success();
 }
 
-void RTDyldObjectLinkingLayer::onObjEmit(VModuleKey K,
-                                          MaterializationResponsibility &R,
-                                          Error Err) {
+void RTDyldObjectLinkingLayer::onObjEmit(
+    VModuleKey K, std::unique_ptr<MemoryBuffer> ObjBuffer,
+    MaterializationResponsibility &R, Error Err) {
   if (Err) {
     getExecutionSession().reportError(std::move(Err));
     R.failMaterialization();
     return;
   }
 
-  R.emit();
+  R.notifyEmitted();
 
   if (NotifyEmitted)
-    NotifyEmitted(K);
+    NotifyEmitted(K, std::move(ObjBuffer));
 }
 
+LegacyRTDyldObjectLinkingLayer::LegacyRTDyldObjectLinkingLayer(
+    ExecutionSession &ES, ResourcesGetter GetResources,
+    NotifyLoadedFtor NotifyLoaded, NotifyFinalizedFtor NotifyFinalized,
+    NotifyFreedFtor NotifyFreed)
+    : ES(ES), GetResources(std::move(GetResources)),
+      NotifyLoaded(std::move(NotifyLoaded)),
+      NotifyFinalized(std::move(NotifyFinalized)),
+      NotifyFreed(std::move(NotifyFreed)), ProcessAllSections(false) {}
+
 } // End namespace orc.
 } // End namespace llvm.
diff --git a/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp b/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
index 9525b168fbd3..4cb7376758a7 100644
--- a/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
+++ b/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
@@ -1,10 +1,9 @@
 //===-- ThreadSafeModule.cpp - Thread safe Module, Context, and Utilities
 //h-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
index f195d0282998..5606421a3cb0 100644
--- a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
+++ b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
@@ -1,9 +1,8 @@
 //===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -266,16 +265,22 @@ void PerfJITEventListener::notifyObjectLoaded(
       consumeError(AddrOrErr.takeError());
       continue;
     }
-    uint64_t Addr = *AddrOrErr;
     uint64_t Size = P.second;
+    object::SectionedAddress Address;
+    Address.Address = *AddrOrErr;
+
+    uint64_t SectionIndex = object::SectionedAddress::UndefSection;
+    if (auto SectOrErr = Sym.getSection())
+        if (*SectOrErr != Obj.section_end())
+            SectionIndex = SectOrErr.get()->getIndex();
 
     // According to spec debugging info has to come before loading the
     // corresonding code load.
     DILineInfoTable Lines = Context->getLineInfoForAddressRange(
-        Addr, Size, FileLineInfoKind::AbsoluteFilePath);
+        {*AddrOrErr, SectionIndex}, Size, FileLineInfoKind::AbsoluteFilePath);
 
-    NotifyDebug(Addr, Lines);
-    NotifyCode(Name, Addr, Size);
+    NotifyDebug(*AddrOrErr, Lines);
+    NotifyCode(Name, *AddrOrErr, Size);
   }
 
   Dumpstream->flush();
@@ -336,8 +341,8 @@ bool PerfJITEventListener::OpenMarker() {
   //
   // Mapping must be PROT_EXEC to ensure it is captured by perf record
   // even when not using -d option.
-  MarkerAddr = ::mmap(NULL, sys::Process::getPageSize(), PROT_READ | PROT_EXEC,
-                      MAP_PRIVATE, DumpFd, 0);
+  MarkerAddr = ::mmap(NULL, sys::Process::getPageSizeEstimate(),
+                      PROT_READ | PROT_EXEC, MAP_PRIVATE, DumpFd, 0);
 
   if (MarkerAddr == MAP_FAILED) {
     errs() << "could not mmap JIT marker\n";
@@ -350,7 +355,7 @@ void PerfJITEventListener::CloseMarker() {
   if (!MarkerAddr)
     return;
 
-  munmap(MarkerAddr, sys::Process::getPageSize());
+  munmap(MarkerAddr, sys::Process::getPageSizeEstimate());
   MarkerAddr = nullptr;
 }
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp b/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
index 0553c217c2a2..4e2d0f422f39 100644
--- a/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
@@ -1,9 +1,8 @@
 //===----------- JITSymbol.cpp - JITSymbol class implementation -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp b/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
index 75d4c2b5134e..46604ff4000c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
@@ -1,9 +1,8 @@
 //===-- RTDyldMemoryManager.cpp - Memory manager for MC-JIT -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -33,8 +32,9 @@ namespace llvm {
 RTDyldMemoryManager::~RTDyldMemoryManager() {}
 
 // Determine whether we can register EH tables.
-#if (defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__ia64__) && \
-     !defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__))
+#if (defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__ia64__) &&      \
+     !(defined(_AIX) && defined(__ibmxl__)) && !defined(__SEH__) &&            \
+     !defined(__USING_SJLJ_EXCEPTIONS__))
 #define HAVE_EHTABLE_SUPPORT 1
 #else
 #define HAVE_EHTABLE_SUPPORT 0
@@ -48,7 +48,7 @@ extern "C" void __deregister_frame(void *);
 // it may be found at runtime in a dynamically-loaded library.
 // For example, this happens when building LLVM with Visual C++
 // but using the MingW runtime.
-void __register_frame(void *p) {
+static void __register_frame(void *p) {
   static bool Searched = false;
   static void((*rf)(void *)) = 0;
 
@@ -61,7 +61,7 @@ void __register_frame(void *p) {
     rf(p);
 }
 
-void __deregister_frame(void *p) {
+static void __deregister_frame(void *p) {
   static bool Searched = false;
   static void((*df)(void *)) = 0;
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 53cb782c55c4..e26e6ce45db4 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -1,9 +1,8 @@
 //===-- RuntimeDyld.cpp - Run-time dynamic linker for MC-JIT ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,7 +12,6 @@
 
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "RuntimeDyldCOFF.h"
-#include "RuntimeDyldCheckerImpl.h"
 #include "RuntimeDyldELF.h"
 #include "RuntimeDyldImpl.h"
 #include "RuntimeDyldMachO.h"
@@ -376,10 +374,55 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
       else
         return IOrErr.takeError();
 
-    // If there is an attached checker, notify it about the stubs for this
-    // section so that they can be verified.
-    if (Checker)
-      Checker->registerStubMap(Obj.getFileName(), SectionID, Stubs);
+    // If there is a NotifyStubEmitted callback set, call it to register any
+    // stubs created for this section.
+    if (NotifyStubEmitted) {
+      StringRef FileName = Obj.getFileName();
+      StringRef SectionName = Sections[SectionID].getName();
+      for (auto &KV : Stubs) {
+
+        auto &VR = KV.first;
+        uint64_t StubAddr = KV.second;
+
+        // If this is a named stub, just call NotifyStubEmitted.
+        if (VR.SymbolName) {
+          NotifyStubEmitted(FileName, SectionName, VR.SymbolName, SectionID,
+                            StubAddr);
+          continue;
+        }
+
+        // Otherwise we will have to try a reverse lookup on the globla symbol table.
+        for (auto &GSTMapEntry : GlobalSymbolTable) {
+          StringRef SymbolName = GSTMapEntry.first();
+          auto &GSTEntry = GSTMapEntry.second;
+          if (GSTEntry.getSectionID() == VR.SectionID &&
+              GSTEntry.getOffset() == VR.Offset) {
+            NotifyStubEmitted(FileName, SectionName, SymbolName, SectionID,
+                              StubAddr);
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  // Process remaining sections
+  if (ProcessAllSections) {
+    LLVM_DEBUG(dbgs() << "Process remaining sections:\n");
+    for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end();
+         SI != SE; ++SI) {
+
+      /* Ignore already loaded sections */
+      if (LocalSections.find(*SI) != LocalSections.end())
+        continue;
+
+      bool IsCode = SI->isText();
+      if (auto SectionIDOrErr =
+              findOrEmitSection(Obj, *SI, IsCode, LocalSections))
+        LLVM_DEBUG(dbgs() << "\tSectionID: " << (*SectionIDOrErr) << "\n");
+      else
+        return SectionIDOrErr.takeError();
+    }
   }
 
   // Give the subclasses a chance to tie-up any loose ends.
@@ -497,7 +540,14 @@ Error RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
         return errorCodeToError(EC);
 
       uint64_t StubBufSize = computeSectionStubBufSize(Obj, Section);
-      uint64_t SectionSize = DataSize + StubBufSize;
+
+      uint64_t PaddingSize = 0;
+      if (Name == ".eh_frame")
+        PaddingSize += 4;
+      if (StubBufSize != 0)
+        PaddingSize += getStubAlignment() - 1;
+
+      uint64_t SectionSize = DataSize + PaddingSize + StubBufSize;
 
       // The .eh_frame section (at least on Linux) needs an extra four bytes
       // padded
@@ -703,9 +753,6 @@ Error RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
     Addr += Size;
   }
 
-  if (Checker)
-    Checker->registerSection(Obj.getFileName(), SectionID);
-
   return Error::success();
 }
 
@@ -725,6 +772,11 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
   bool IsReadOnly = isReadOnlyData(Section);
   uint64_t DataSize = Section.getSize();
 
+  // An alignment of 0 (at least with ELF) is identical to an alignment of 1,
+  // while being more "polite".  Other formats do not support 0-aligned sections
+  // anyway, so we should guarantee that the alignment is always at least 1.
+  Alignment = std::max(1u, Alignment);
+
   StringRef Name;
   if (auto EC = Section.getName(Name))
     return errorCodeToError(EC);
@@ -747,18 +799,19 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
   if (!IsVirtual && !IsZeroInit) {
     // In either case, set the location of the unrelocated section in memory,
     // since we still process relocations for it even if we're not applying them.
-    if (auto EC = Section.getContents(data))
-      return errorCodeToError(EC);
+    if (Expected<StringRef> E = Section.getContents())
+      data = *E;
+    else
+      return E.takeError();
     pData = data.data();
   }
 
-  // Code section alignment needs to be at least as high as stub alignment or
-  // padding calculations may by incorrect when the section is remapped to a
-  // higher alignment.
-  if (IsCode) {
+  // If there are any stubs then the section alignment needs to be at least as
+  // high as stub alignment or padding calculations may by incorrect when the
+  // section is remapped.
+  if (StubBufSize != 0) {
     Alignment = std::max(Alignment, getStubAlignment());
-    if (StubBufSize > 0)
-      PaddingSize += getStubAlignment() - 1;
+    PaddingSize += getStubAlignment() - 1;
   }
 
   // Some sections, such as debug info, don't need to be loaded for execution.
@@ -789,7 +842,7 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
       // Align DataSize to stub alignment if we have any stubs (PaddingSize will
       // have been increased above to account for this).
       if (StubBufSize > 0)
-        DataSize &= ~(getStubAlignment() - 1);
+        DataSize &= -(uint64_t)getStubAlignment();
     }
 
     LLVM_DEBUG(dbgs() << "emitSection SectionID: " << SectionID << " Name: "
@@ -817,9 +870,6 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
   if (!IsRequired)
     Sections.back().setLoadAddress(0);
 
-  if (Checker)
-    Checker->registerSection(Obj.getFileName(), SectionID);
-
   return SectionID;
 }
 
@@ -1202,42 +1252,43 @@ RuntimeDyld::RuntimeDyld(RuntimeDyld::MemoryManager &MemMgr,
   // permissions are applied.
   Dyld = nullptr;
   ProcessAllSections = false;
-  Checker = nullptr;
 }
 
 RuntimeDyld::~RuntimeDyld() {}
 
 static std::unique_ptr<RuntimeDyldCOFF>
-createRuntimeDyldCOFF(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM,
-                      JITSymbolResolver &Resolver, bool ProcessAllSections,
-                      RuntimeDyldCheckerImpl *Checker) {
+createRuntimeDyldCOFF(
+                     Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM,
+                     JITSymbolResolver &Resolver, bool ProcessAllSections,
+                     RuntimeDyld::NotifyStubEmittedFunction NotifyStubEmitted) {
   std::unique_ptr<RuntimeDyldCOFF> Dyld =
     RuntimeDyldCOFF::create(Arch, MM, Resolver);
   Dyld->setProcessAllSections(ProcessAllSections);
-  Dyld->setRuntimeDyldChecker(Checker);
+  Dyld->setNotifyStubEmitted(std::move(NotifyStubEmitted));
   return Dyld;
 }
 
 static std::unique_ptr<RuntimeDyldELF>
 createRuntimeDyldELF(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM,
                      JITSymbolResolver &Resolver, bool ProcessAllSections,
-                     RuntimeDyldCheckerImpl *Checker) {
+                     RuntimeDyld::NotifyStubEmittedFunction NotifyStubEmitted) {
   std::unique_ptr<RuntimeDyldELF> Dyld =
       RuntimeDyldELF::create(Arch, MM, Resolver);
   Dyld->setProcessAllSections(ProcessAllSections);
-  Dyld->setRuntimeDyldChecker(Checker);
+  Dyld->setNotifyStubEmitted(std::move(NotifyStubEmitted));
   return Dyld;
 }
 
 static std::unique_ptr<RuntimeDyldMachO>
-createRuntimeDyldMachO(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM,
-                       JITSymbolResolver &Resolver,
-                       bool ProcessAllSections,
-                       RuntimeDyldCheckerImpl *Checker) {
+createRuntimeDyldMachO(
+                     Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM,
+                     JITSymbolResolver &Resolver,
+                     bool ProcessAllSections,
+                     RuntimeDyld::NotifyStubEmittedFunction NotifyStubEmitted) {
   std::unique_ptr<RuntimeDyldMachO> Dyld =
     RuntimeDyldMachO::create(Arch, MM, Resolver);
   Dyld->setProcessAllSections(ProcessAllSections);
-  Dyld->setRuntimeDyldChecker(Checker);
+  Dyld->setNotifyStubEmitted(std::move(NotifyStubEmitted));
   return Dyld;
 }
 
@@ -1247,15 +1298,16 @@ RuntimeDyld::loadObject(const ObjectFile &Obj) {
     if (Obj.isELF())
       Dyld =
           createRuntimeDyldELF(static_cast<Triple::ArchType>(Obj.getArch()),
-                               MemMgr, Resolver, ProcessAllSections, Checker);
+                               MemMgr, Resolver, ProcessAllSections,
+                               std::move(NotifyStubEmitted));
     else if (Obj.isMachO())
       Dyld = createRuntimeDyldMachO(
                static_cast<Triple::ArchType>(Obj.getArch()), MemMgr, Resolver,
-               ProcessAllSections, Checker);
+               ProcessAllSections, std::move(NotifyStubEmitted));
     else if (Obj.isCOFF())
       Dyld = createRuntimeDyldCOFF(
                static_cast<Triple::ArchType>(Obj.getArch()), MemMgr, Resolver,
-               ProcessAllSections, Checker);
+               ProcessAllSections, std::move(NotifyStubEmitted));
     else
       report_fatal_error("Incompatible object format!");
   }
@@ -1274,6 +1326,11 @@ void *RuntimeDyld::getSymbolLocalAddress(StringRef Name) const {
   return Dyld->getSymbolLocalAddress(Name);
 }
 
+unsigned RuntimeDyld::getSymbolSectionID(StringRef Name) const {
+  assert(Dyld && "No RuntimeDyld instance attached");
+  return Dyld->getSymbolSectionID(Name);
+}
+
 JITEvaluatedSymbol RuntimeDyld::getSymbol(StringRef Name) const {
   if (!Dyld)
     return nullptr;
@@ -1312,6 +1369,16 @@ void RuntimeDyld::finalizeWithMemoryManagerLocking() {
   }
 }
 
+StringRef RuntimeDyld::getSectionContent(unsigned SectionID) const {
+  assert(Dyld && "No Dyld instance attached");
+  return Dyld->getSectionContent(SectionID);
+}
+
+uint64_t RuntimeDyld::getSectionLoadAddress(unsigned SectionID) const {
+  assert(Dyld && "No Dyld instance attached");
+  return Dyld->getSectionLoadAddress(SectionID);
+}
+
 void RuntimeDyld::registerEHFrames() {
   if (Dyld)
     Dyld->registerEHFrames();
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
index 340ddaab186d..d4e3b0ba7670 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
@@ -1,9 +1,8 @@
 //===-- RuntimeDyldCOFF.cpp - Run-time dynamic linker for MC-JIT -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h
index 729a358fa0ea..4efd18a2e6c5 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h
@@ -1,9 +1,8 @@
 //===-- RuntimeDyldCOFF.h - Run-time dynamic linker for MC-JIT ---*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index 6eb6256080ff..ec31ea4e573c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -1,23 +1,21 @@
 //===--- RuntimeDyldChecker.cpp - RuntimeDyld tester framework --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ExecutionEngine/RuntimeDyldChecker.h"
 #include "RuntimeDyldCheckerImpl.h"
-#include "RuntimeDyldImpl.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/MSVCErrorWorkarounds.h"
 #include "llvm/Support/Path.h"
 #include <cctype>
-#include <future>
 #include <memory>
 #include <utility>
 
@@ -321,22 +319,22 @@ private:
     return std::make_pair(EvalResult(NextPC), RemainingExpr);
   }
 
-  // Evaluate a call to stub_addr.
+  // Evaluate a call to stub_addr/got_addr.
   // Look up and return the address of the stub for the given
   // (<file name>, <section name>, <symbol name>) tuple.
   // On success, returns a pair containing the stub address, plus the expression
   // remaining to be evaluated.
-  std::pair<EvalResult, StringRef> evalStubAddr(StringRef Expr,
-                                                ParseContext PCtx) const {
+  std::pair<EvalResult, StringRef>
+  evalStubOrGOTAddr(StringRef Expr, ParseContext PCtx, bool IsStubAddr) const {
     if (!Expr.startswith("("))
       return std::make_pair(unexpectedToken(Expr, Expr, "expected '('"), "");
     StringRef RemainingExpr = Expr.substr(1).ltrim();
 
     // Handle file-name specially, as it may contain characters that aren't
     // legal for symbols.
-    StringRef FileName;
+    StringRef StubContainerName;
     size_t ComaIdx = RemainingExpr.find(',');
-    FileName = RemainingExpr.substr(0, ComaIdx).rtrim();
+    StubContainerName = RemainingExpr.substr(0, ComaIdx).rtrim();
     RemainingExpr = RemainingExpr.substr(ComaIdx).ltrim();
 
     if (!RemainingExpr.startswith(","))
@@ -344,14 +342,6 @@ private:
           unexpectedToken(RemainingExpr, Expr, "expected ','"), "");
     RemainingExpr = RemainingExpr.substr(1).ltrim();
 
-    StringRef SectionName;
-    std::tie(SectionName, RemainingExpr) = parseSymbol(RemainingExpr);
-
-    if (!RemainingExpr.startswith(","))
-      return std::make_pair(
-          unexpectedToken(RemainingExpr, Expr, "expected ','"), "");
-    RemainingExpr = RemainingExpr.substr(1).ltrim();
-
     StringRef Symbol;
     std::tie(Symbol, RemainingExpr) = parseSymbol(RemainingExpr);
 
@@ -362,8 +352,8 @@ private:
 
     uint64_t StubAddr;
     std::string ErrorMsg = "";
-    std::tie(StubAddr, ErrorMsg) = Checker.getStubAddrFor(
-        FileName, SectionName, Symbol, PCtx.IsInsideLoad);
+    std::tie(StubAddr, ErrorMsg) = Checker.getStubOrGOTAddrFor(
+        StubContainerName, Symbol, PCtx.IsInsideLoad, IsStubAddr);
 
     if (ErrorMsg != "")
       return std::make_pair(EvalResult(ErrorMsg), "");
@@ -423,7 +413,9 @@ private:
     else if (Symbol == "next_pc")
       return evalNextPC(RemainingExpr, PCtx);
     else if (Symbol == "stub_addr")
-      return evalStubAddr(RemainingExpr, PCtx);
+      return evalStubOrGOTAddr(RemainingExpr, PCtx, true);
+    else if (Symbol == "got_addr")
+      return evalStubOrGOTAddr(RemainingExpr, PCtx, false);
     else if (Symbol == "section_addr")
       return evalSectionAddr(RemainingExpr, PCtx);
 
@@ -534,6 +526,11 @@ private:
 
     uint64_t LoadAddr = LoadAddrExprResult.getValue();
 
+    // If there is no error but the content pointer is null then this is a
+    // zero-fill symbol/section.
+    if (LoadAddr == 0)
+      return std::make_pair(0, RemainingExpr);
+
     return std::make_pair(
         EvalResult(Checker.readMemoryAtAddr(LoadAddr, ReadSize)),
         RemainingExpr);
@@ -666,27 +663,29 @@ private:
 
   bool decodeInst(StringRef Symbol, MCInst &Inst, uint64_t &Size) const {
     MCDisassembler *Dis = Checker.Disassembler;
-    StringRef SectionMem = Checker.getSubsectionStartingAt(Symbol);
-    ArrayRef<uint8_t> SectionBytes(
-        reinterpret_cast<const uint8_t *>(SectionMem.data()),
-        SectionMem.size());
+    StringRef SymbolMem = Checker.getSymbolContent(Symbol);
+    ArrayRef<uint8_t> SymbolBytes(SymbolMem.bytes_begin(), SymbolMem.size());
 
     MCDisassembler::DecodeStatus S =
-        Dis->getInstruction(Inst, Size, SectionBytes, 0, nulls(), nulls());
+        Dis->getInstruction(Inst, Size, SymbolBytes, 0, nulls(), nulls());
 
     return (S == MCDisassembler::Success);
   }
 };
 }
 
-RuntimeDyldCheckerImpl::RuntimeDyldCheckerImpl(RuntimeDyld &RTDyld,
-                                               MCDisassembler *Disassembler,
-                                               MCInstPrinter *InstPrinter,
-                                               raw_ostream &ErrStream)
-    : RTDyld(RTDyld), Disassembler(Disassembler), InstPrinter(InstPrinter),
-      ErrStream(ErrStream) {
-  RTDyld.Checker = this;
-}
+RuntimeDyldCheckerImpl::RuntimeDyldCheckerImpl(
+    IsSymbolValidFunction IsSymbolValid, GetSymbolInfoFunction GetSymbolInfo,
+    GetSectionInfoFunction GetSectionInfo, GetStubInfoFunction GetStubInfo,
+    GetGOTInfoFunction GetGOTInfo, support::endianness Endianness,
+    MCDisassembler *Disassembler, MCInstPrinter *InstPrinter,
+    raw_ostream &ErrStream)
+    : IsSymbolValid(std::move(IsSymbolValid)),
+      GetSymbolInfo(std::move(GetSymbolInfo)),
+      GetSectionInfo(std::move(GetSectionInfo)),
+      GetStubInfo(std::move(GetStubInfo)), GetGOTInfo(std::move(GetGOTInfo)),
+      Endianness(Endianness), Disassembler(Disassembler),
+      InstPrinter(InstPrinter), ErrStream(ErrStream) {}
 
 bool RuntimeDyldCheckerImpl::check(StringRef CheckExpr) const {
   CheckExpr = CheckExpr.trim();
@@ -731,242 +730,134 @@ bool RuntimeDyldCheckerImpl::checkAllRulesInBuffer(StringRef RulePrefix,
   return DidAllTestsPass && (NumRules != 0);
 }
 
-Expected<JITSymbolResolver::LookupResult> RuntimeDyldCheckerImpl::lookup(
-    const JITSymbolResolver::LookupSet &Symbols) const {
-
-#ifdef _MSC_VER
-  using ExpectedLookupResult = MSVCPExpected<JITSymbolResolver::LookupResult>;
-#else
-  using ExpectedLookupResult = Expected<JITSymbolResolver::LookupResult>;
-#endif
-
-  auto ResultP = std::make_shared<std::promise<ExpectedLookupResult>>();
-  auto ResultF = ResultP->get_future();
-
-  getRTDyld().Resolver.lookup(
-      Symbols, [=](Expected<JITSymbolResolver::LookupResult> Result) {
-        ResultP->set_value(std::move(Result));
-      });
-  return ResultF.get();
-}
-
 bool RuntimeDyldCheckerImpl::isSymbolValid(StringRef Symbol) const {
-  if (getRTDyld().getSymbol(Symbol))
-    return true;
-  auto Result = lookup({Symbol});
+  return IsSymbolValid(Symbol);
+}
 
-  if (!Result) {
-    logAllUnhandledErrors(Result.takeError(), errs(), "RTDyldChecker: ");
-    return false;
+uint64_t RuntimeDyldCheckerImpl::getSymbolLocalAddr(StringRef Symbol) const {
+  auto SymInfo = GetSymbolInfo(Symbol);
+  if (!SymInfo) {
+    logAllUnhandledErrors(SymInfo.takeError(), errs(), "RTDyldChecker: ");
+    return 0;
   }
 
-  assert(Result->count(Symbol) && "Missing symbol result");
-  return true;
-}
+  if (SymInfo->isZeroFill())
+    return 0;
 
-uint64_t RuntimeDyldCheckerImpl::getSymbolLocalAddr(StringRef Symbol) const {
   return static_cast<uint64_t>(
-      reinterpret_cast<uintptr_t>(getRTDyld().getSymbolLocalAddress(Symbol)));
+      reinterpret_cast<uintptr_t>(SymInfo->getContent().data()));
 }
 
 uint64_t RuntimeDyldCheckerImpl::getSymbolRemoteAddr(StringRef Symbol) const {
-  if (auto InternalSymbol = getRTDyld().getSymbol(Symbol))
-    return InternalSymbol.getAddress();
-
-  auto Result = lookup({Symbol});
-  if (!Result) {
-    logAllUnhandledErrors(Result.takeError(), errs(), "RTDyldChecker: ");
+  auto SymInfo = GetSymbolInfo(Symbol);
+  if (!SymInfo) {
+    logAllUnhandledErrors(SymInfo.takeError(), errs(), "RTDyldChecker: ");
     return 0;
   }
-  auto I = Result->find(Symbol);
-  assert(I != Result->end() && "Missing symbol result");
-  return I->second.getAddress();
+
+  return SymInfo->getTargetAddress();
 }
 
 uint64_t RuntimeDyldCheckerImpl::readMemoryAtAddr(uint64_t SrcAddr,
                                                   unsigned Size) const {
   uintptr_t PtrSizedAddr = static_cast<uintptr_t>(SrcAddr);
   assert(PtrSizedAddr == SrcAddr && "Linker memory pointer out-of-range.");
-  uint8_t *Src = reinterpret_cast<uint8_t*>(PtrSizedAddr);
-  return getRTDyld().readBytesUnaligned(Src, Size);
+  void *Ptr = reinterpret_cast<void*>(PtrSizedAddr);
+
+  switch (Size) {
+  case 1:
+    return support::endian::read<uint8_t>(Ptr, Endianness);
+  case 2:
+    return support::endian::read<uint16_t>(Ptr, Endianness);
+  case 4:
+    return support::endian::read<uint32_t>(Ptr, Endianness);
+  case 8:
+    return support::endian::read<uint64_t>(Ptr, Endianness);
+  }
+  llvm_unreachable("Unsupported read size");
 }
 
-
-std::pair<const RuntimeDyldCheckerImpl::SectionAddressInfo*, std::string>
-RuntimeDyldCheckerImpl::findSectionAddrInfo(StringRef FileName,
-                                            StringRef SectionName) const {
-
-  auto SectionMapItr = Stubs.find(FileName);
-  if (SectionMapItr == Stubs.end()) {
-    std::string ErrorMsg = "File '";
-    ErrorMsg += FileName;
-    ErrorMsg += "' not found. ";
-    if (Stubs.empty())
-      ErrorMsg += "No stubs registered.";
-    else {
-      ErrorMsg += "Available files are:";
-      for (const auto& StubEntry : Stubs) {
-        ErrorMsg += " '";
-        ErrorMsg += StubEntry.first;
-        ErrorMsg += "'";
-      }
-    }
-    ErrorMsg += "\n";
-    return std::make_pair(nullptr, ErrorMsg);
+StringRef RuntimeDyldCheckerImpl::getSymbolContent(StringRef Symbol) const {
+  auto SymInfo = GetSymbolInfo(Symbol);
+  if (!SymInfo) {
+    logAllUnhandledErrors(SymInfo.takeError(), errs(), "RTDyldChecker: ");
+    return StringRef();
   }
-
-  auto SectionInfoItr = SectionMapItr->second.find(SectionName);
-  if (SectionInfoItr == SectionMapItr->second.end())
-    return std::make_pair(nullptr,
-                          ("Section '" + SectionName + "' not found in file '" +
-                           FileName + "'\n").str());
-
-  return std::make_pair(&SectionInfoItr->second, std::string(""));
+  return SymInfo->getContent();
 }
 
 std::pair<uint64_t, std::string> RuntimeDyldCheckerImpl::getSectionAddr(
     StringRef FileName, StringRef SectionName, bool IsInsideLoad) const {
 
-  const SectionAddressInfo *SectionInfo = nullptr;
-  {
-    std::string ErrorMsg;
-    std::tie(SectionInfo, ErrorMsg) =
-      findSectionAddrInfo(FileName, SectionName);
-    if (ErrorMsg != "")
-      return std::make_pair(0, ErrorMsg);
-  }
-
-  unsigned SectionID = SectionInfo->SectionID;
-  uint64_t Addr;
-  if (IsInsideLoad)
-    Addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(
-        getRTDyld().Sections[SectionID].getAddress()));
-  else
-    Addr = getRTDyld().Sections[SectionID].getLoadAddress();
-
-  return std::make_pair(Addr, std::string(""));
-}
-
-std::pair<uint64_t, std::string> RuntimeDyldCheckerImpl::getStubAddrFor(
-    StringRef FileName, StringRef SectionName, StringRef SymbolName,
-    bool IsInsideLoad) const {
-
-  const SectionAddressInfo *SectionInfo = nullptr;
-  {
-    std::string ErrorMsg;
-    std::tie(SectionInfo, ErrorMsg) =
-      findSectionAddrInfo(FileName, SectionName);
-    if (ErrorMsg != "")
-      return std::make_pair(0, ErrorMsg);
+  auto SecInfo = GetSectionInfo(FileName, SectionName);
+  if (!SecInfo) {
+    std::string ErrMsg;
+    {
+      raw_string_ostream ErrMsgStream(ErrMsg);
+      logAllUnhandledErrors(SecInfo.takeError(), ErrMsgStream,
+                            "RTDyldChecker: ");
+    }
+    return std::make_pair(0, std::move(ErrMsg));
   }
 
-  unsigned SectionID = SectionInfo->SectionID;
-  const StubOffsetsMap &SymbolStubs = SectionInfo->StubOffsets;
-  auto StubOffsetItr = SymbolStubs.find(SymbolName);
-  if (StubOffsetItr == SymbolStubs.end())
-    return std::make_pair(0,
-                          ("Stub for symbol '" + SymbolName + "' not found. "
-                           "If '" + SymbolName + "' is an internal symbol this "
-                           "may indicate that the stub target offset is being "
-                           "computed incorrectly.\n").str());
+  // If this address is being looked up in "load" mode, return the content
+  // pointer, otherwise return the target address.
 
-  uint64_t StubOffset = StubOffsetItr->second;
+  uint64_t Addr = 0;
 
-  uint64_t Addr;
   if (IsInsideLoad) {
-    uintptr_t SectionBase = reinterpret_cast<uintptr_t>(
-        getRTDyld().Sections[SectionID].getAddress());
-    Addr = static_cast<uint64_t>(SectionBase) + StubOffset;
-  } else {
-    uint64_t SectionBase = getRTDyld().Sections[SectionID].getLoadAddress();
-    Addr = SectionBase + StubOffset;
-  }
-
-  return std::make_pair(Addr, std::string(""));
-}
-
-StringRef
-RuntimeDyldCheckerImpl::getSubsectionStartingAt(StringRef Name) const {
-  RTDyldSymbolTable::const_iterator pos =
-      getRTDyld().GlobalSymbolTable.find(Name);
-  if (pos == getRTDyld().GlobalSymbolTable.end())
-    return StringRef();
-  const auto &SymInfo = pos->second;
-  uint8_t *SectionAddr = getRTDyld().getSectionAddress(SymInfo.getSectionID());
-  return StringRef(reinterpret_cast<const char *>(SectionAddr) +
-                       SymInfo.getOffset(),
-                   getRTDyld().Sections[SymInfo.getSectionID()].getSize() -
-                       SymInfo.getOffset());
-}
-
-Optional<uint64_t>
-RuntimeDyldCheckerImpl::getSectionLoadAddress(void *LocalAddress) const {
-  for (auto &S : getRTDyld().Sections) {
-    if (S.getAddress() == LocalAddress)
-      return S.getLoadAddress();
-  }
-  return Optional<uint64_t>();
-}
-
-void RuntimeDyldCheckerImpl::registerSection(
-    StringRef FilePath, unsigned SectionID) {
-  StringRef FileName = sys::path::filename(FilePath);
-  const SectionEntry &Section = getRTDyld().Sections[SectionID];
-  StringRef SectionName = Section.getName();
+    if (SecInfo->isZeroFill())
+      Addr = 0;
+    else
+      Addr = pointerToJITTargetAddress(SecInfo->getContent().data());
+  } else
+    Addr = SecInfo->getTargetAddress();
 
-  Stubs[FileName][SectionName].SectionID = SectionID;
+  return std::make_pair(Addr, "");
 }
 
-void RuntimeDyldCheckerImpl::registerStubMap(
-    StringRef FilePath, unsigned SectionID,
-    const RuntimeDyldImpl::StubMap &RTDyldStubs) {
-  StringRef FileName = sys::path::filename(FilePath);
-  const SectionEntry &Section = getRTDyld().Sections[SectionID];
-  StringRef SectionName = Section.getName();
-
-  Stubs[FileName][SectionName].SectionID = SectionID;
+std::pair<uint64_t, std::string> RuntimeDyldCheckerImpl::getStubOrGOTAddrFor(
+    StringRef StubContainerName, StringRef SymbolName, bool IsInsideLoad,
+    bool IsStubAddr) const {
 
-  for (auto &StubMapEntry : RTDyldStubs) {
-    std::string SymbolName = "";
+  auto StubInfo = IsStubAddr ? GetStubInfo(StubContainerName, SymbolName)
+                             : GetGOTInfo(StubContainerName, SymbolName);
 
-    if (StubMapEntry.first.SymbolName)
-      SymbolName = StubMapEntry.first.SymbolName;
-    else {
-      // If this is a (Section, Offset) pair, do a reverse lookup in the
-      // global symbol table to find the name.
-      for (auto &GSTEntry : getRTDyld().GlobalSymbolTable) {
-        const auto &SymInfo = GSTEntry.second;
-        if (SymInfo.getSectionID() == StubMapEntry.first.SectionID &&
-            SymInfo.getOffset() ==
-              static_cast<uint64_t>(StubMapEntry.first.Offset)) {
-          SymbolName = GSTEntry.first();
-          break;
-        }
-      }
+  if (!StubInfo) {
+    std::string ErrMsg;
+    {
+      raw_string_ostream ErrMsgStream(ErrMsg);
+      logAllUnhandledErrors(StubInfo.takeError(), ErrMsgStream,
+                            "RTDyldChecker: ");
     }
-
-    if (SymbolName != "")
-      Stubs[FileName][SectionName].StubOffsets[SymbolName] =
-        StubMapEntry.second;
+    return std::make_pair((uint64_t)0, std::move(ErrMsg));
   }
-}
 
-RuntimeDyldChecker::RuntimeDyldChecker(RuntimeDyld &RTDyld,
-                                       MCDisassembler *Disassembler,
-                                       MCInstPrinter *InstPrinter,
-                                       raw_ostream &ErrStream)
-    : Impl(make_unique<RuntimeDyldCheckerImpl>(RTDyld, Disassembler,
-                                               InstPrinter, ErrStream)) {}
+  uint64_t Addr = 0;
 
-RuntimeDyldChecker::~RuntimeDyldChecker() {}
+  if (IsInsideLoad) {
+    if (StubInfo->isZeroFill())
+      return std::make_pair((uint64_t)0, "Detected zero-filled stub/GOT entry");
+    Addr = pointerToJITTargetAddress(StubInfo->getContent().data());
+  } else
+    Addr = StubInfo->getTargetAddress();
 
-RuntimeDyld& RuntimeDyldChecker::getRTDyld() {
-  return Impl->RTDyld;
+  return std::make_pair(Addr, "");
 }
 
-const RuntimeDyld& RuntimeDyldChecker::getRTDyld() const {
-  return Impl->RTDyld;
-}
+RuntimeDyldChecker::RuntimeDyldChecker(
+    IsSymbolValidFunction IsSymbolValid, GetSymbolInfoFunction GetSymbolInfo,
+    GetSectionInfoFunction GetSectionInfo, GetStubInfoFunction GetStubInfo,
+    GetGOTInfoFunction GetGOTInfo, support::endianness Endianness,
+    MCDisassembler *Disassembler, MCInstPrinter *InstPrinter,
+    raw_ostream &ErrStream)
+    : Impl(::llvm::make_unique<RuntimeDyldCheckerImpl>(
+          std::move(IsSymbolValid), std::move(GetSymbolInfo),
+          std::move(GetSectionInfo), std::move(GetStubInfo),
+          std::move(GetGOTInfo), Endianness, Disassembler, InstPrinter,
+          ErrStream)) {}
+
+RuntimeDyldChecker::~RuntimeDyldChecker() {}
 
 bool RuntimeDyldChecker::check(StringRef CheckExpr) const {
   return Impl->check(CheckExpr);
@@ -982,8 +873,3 @@ RuntimeDyldChecker::getSectionAddr(StringRef FileName, StringRef SectionName,
                                    bool LocalAddress) {
   return Impl->getSectionAddr(FileName, SectionName, LocalAddress);
 }
-
-Optional<uint64_t>
-RuntimeDyldChecker::getSectionLoadAddress(void *LocalAddress) const {
-  return Impl->getSectionLoadAddress(LocalAddress);
-}
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
index 6da1a68d06d6..ac9d4d460217 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
@@ -1,9 +1,8 @@
 //===-- RuntimeDyldCheckerImpl.h -- RuntimeDyld test framework --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -16,14 +15,22 @@ namespace llvm {
 
 class RuntimeDyldCheckerImpl {
   friend class RuntimeDyldChecker;
-  friend class RuntimeDyldImpl;
   friend class RuntimeDyldCheckerExprEval;
-  friend class RuntimeDyldELF;
+
+  using IsSymbolValidFunction =
+    RuntimeDyldChecker::IsSymbolValidFunction;
+  using GetSymbolInfoFunction = RuntimeDyldChecker::GetSymbolInfoFunction;
+  using GetSectionInfoFunction = RuntimeDyldChecker::GetSectionInfoFunction;
+  using GetStubInfoFunction = RuntimeDyldChecker::GetStubInfoFunction;
+  using GetGOTInfoFunction = RuntimeDyldChecker::GetGOTInfoFunction;
 
 public:
-  RuntimeDyldCheckerImpl(RuntimeDyld &RTDyld, MCDisassembler *Disassembler,
-                         MCInstPrinter *InstPrinter,
-                         llvm::raw_ostream &ErrStream);
+  RuntimeDyldCheckerImpl(
+      IsSymbolValidFunction IsSymbolValid, GetSymbolInfoFunction GetSymbolInfo,
+      GetSectionInfoFunction GetSectionInfo, GetStubInfoFunction GetStubInfo,
+      GetGOTInfoFunction GetGOTInfo, support::endianness Endianness,
+      MCDisassembler *Disassembler, MCInstPrinter *InstPrinter,
+      llvm::raw_ostream &ErrStream);
 
   bool check(StringRef CheckExpr) const;
   bool checkAllRulesInBuffer(StringRef RulePrefix, MemoryBuffer *MemBuf) const;
@@ -31,15 +38,6 @@ public:
 private:
 
   // StubMap typedefs.
-  typedef std::map<std::string, uint64_t> StubOffsetsMap;
-  struct SectionAddressInfo {
-    uint64_t SectionID;
-    StubOffsetsMap StubOffsets;
-  };
-  typedef std::map<std::string, SectionAddressInfo> SectionMap;
-  typedef std::map<std::string, SectionMap> StubMap;
-
-  RuntimeDyldImpl &getRTDyld() const { return *RTDyld.Dyld; }
 
   Expected<JITSymbolResolver::LookupResult>
   lookup(const JITSymbolResolver::LookupSet &Symbols) const;
@@ -49,32 +47,27 @@ private:
   uint64_t getSymbolRemoteAddr(StringRef Symbol) const;
   uint64_t readMemoryAtAddr(uint64_t Addr, unsigned Size) const;
 
-  std::pair<const SectionAddressInfo*, std::string> findSectionAddrInfo(
-                                                   StringRef FileName,
-                                                   StringRef SectionName) const;
+  StringRef getSymbolContent(StringRef Symbol) const;
 
   std::pair<uint64_t, std::string> getSectionAddr(StringRef FileName,
                                                   StringRef SectionName,
                                                   bool IsInsideLoad) const;
 
-  std::pair<uint64_t, std::string> getStubAddrFor(StringRef FileName,
-                                                  StringRef SectionName,
-                                                  StringRef Symbol,
-                                                  bool IsInsideLoad) const;
-  StringRef getSubsectionStartingAt(StringRef Name) const;
+  std::pair<uint64_t, std::string>
+  getStubOrGOTAddrFor(StringRef StubContainerName, StringRef Symbol,
+                      bool IsInsideLoad, bool IsStubAddr) const;
 
   Optional<uint64_t> getSectionLoadAddress(void *LocalAddr) const;
 
-  void registerSection(StringRef FilePath, unsigned SectionID);
-  void registerStubMap(StringRef FilePath, unsigned SectionID,
-                       const RuntimeDyldImpl::StubMap &RTDyldStubs);
-
-  RuntimeDyld &RTDyld;
+  IsSymbolValidFunction IsSymbolValid;
+  GetSymbolInfoFunction GetSymbolInfo;
+  GetSectionInfoFunction GetSectionInfo;
+  GetStubInfoFunction GetStubInfo;
+  GetGOTInfoFunction GetGOTInfo;
+  support::endianness Endianness;
   MCDisassembler *Disassembler;
   MCInstPrinter *InstPrinter;
   llvm::raw_ostream &ErrStream;
-
-  StubMap Stubs;
 };
 }
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 226ee715e18b..60041a45e2b8 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -1,9 +1,8 @@
 //===-- RuntimeDyldELF.cpp - Run-time dynamic linker for MC-JIT -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1857,9 +1856,6 @@ Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
     Sections[GOTSectionID] =
         SectionEntry(".got", Addr, TotalSize, TotalSize, 0);
 
-    if (Checker)
-      Checker->registerSection(Obj.getFileName(), GOTSectionID);
-
     // For now, initialize all GOT entries to zero.  We'll fill them in as
     // needed when GOT-based relocations are applied.
     memset(Addr, 0, TotalSize);
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index f37bd0bbaea6..ef0784e2273b 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -1,9 +1,8 @@
 //===-- RuntimeDyldELF.h - Run-time dynamic linker for MC-JIT ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -61,7 +60,7 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
   void resolveBPFRelocation(const SectionEntry &Section, uint64_t Offset,
                             uint64_t Value, uint32_t Type, int64_t Addend);
 
-  unsigned getMaxStubSize() override {
+  unsigned getMaxStubSize() const override {
     if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be)
       return 20; // movz; movk; movk; movk; br
     if (Arch == Triple::arm || Arch == Triple::thumb)
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 4c650e09ac1f..68b3468fbc9d 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -1,9 +1,8 @@
 //===-- RuntimeDyldImpl.h - Run-time dynamic linker for MC-JIT --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -241,7 +240,6 @@ typedef StringMap<SymbolTableEntry> RTDyldSymbolTable;
 
 class RuntimeDyldImpl {
   friend class RuntimeDyld::LoadedObjectInfo;
-  friend class RuntimeDyldCheckerImpl;
 protected:
   static const unsigned AbsoluteSymbolSection = ~0U;
 
@@ -251,9 +249,6 @@ protected:
   // The symbol resolver to use for external symbols.
   JITSymbolResolver &Resolver;
 
-  // Attached RuntimeDyldChecker instance. Null if no instance attached.
-  RuntimeDyldCheckerImpl *Checker;
-
   // A list of all sections emitted by the dynamic linker.  These sections are
   // referenced in the code by means of their index in this list - SectionID.
   typedef SmallVector<SectionEntry, 64> SectionList;
@@ -313,20 +308,16 @@ protected:
   // the end of the list while the list is being processed.
   sys::Mutex lock;
 
-  virtual unsigned getMaxStubSize() = 0;
+  using NotifyStubEmittedFunction =
+    RuntimeDyld::NotifyStubEmittedFunction;
+  NotifyStubEmittedFunction NotifyStubEmitted;
+
+  virtual unsigned getMaxStubSize() const = 0;
   virtual unsigned getStubAlignment() = 0;
 
   bool HasError;
   std::string ErrorStr;
 
-  uint64_t getSectionLoadAddress(unsigned SectionID) const {
-    return Sections[SectionID].getLoadAddress();
-  }
-
-  uint8_t *getSectionAddress(unsigned SectionID) const {
-    return Sections[SectionID].getAddress();
-  }
-
   void writeInt16BE(uint8_t *Addr, uint16_t Value) {
     if (IsTargetLittleEndian)
       sys::swapByteOrder(Value);
@@ -472,7 +463,7 @@ protected:
 public:
   RuntimeDyldImpl(RuntimeDyld::MemoryManager &MemMgr,
                   JITSymbolResolver &Resolver)
-    : MemMgr(MemMgr), Resolver(Resolver), Checker(nullptr),
+    : MemMgr(MemMgr), Resolver(Resolver),
       ProcessAllSections(false), HasError(false) {
   }
 
@@ -482,13 +473,22 @@ public:
     this->ProcessAllSections = ProcessAllSections;
   }
 
-  void setRuntimeDyldChecker(RuntimeDyldCheckerImpl *Checker) {
-    this->Checker = Checker;
-  }
-
   virtual std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
   loadObject(const object::ObjectFile &Obj) = 0;
 
+  uint64_t getSectionLoadAddress(unsigned SectionID) const {
+    return Sections[SectionID].getLoadAddress();
+  }
+
+  uint8_t *getSectionAddress(unsigned SectionID) const {
+    return Sections[SectionID].getAddress();
+  }
+
+  StringRef getSectionContent(unsigned SectionID) const {
+    return StringRef(reinterpret_cast<char *>(Sections[SectionID].getAddress()),
+                     Sections[SectionID].getStubOffset() + getMaxStubSize());
+  }
+
   uint8_t* getSymbolLocalAddress(StringRef Name) const {
     // FIXME: Just look up as a function for now. Overly simple of course.
     // Work in progress.
@@ -502,6 +502,13 @@ public:
     return getSectionAddress(SymInfo.getSectionID()) + SymInfo.getOffset();
   }
 
+  unsigned getSymbolSectionID(StringRef Name) const {
+    auto GSTItr = GlobalSymbolTable.find(Name);
+    if (GSTItr == GlobalSymbolTable.end())
+      return ~0U;
+    return GSTItr->second.getSectionID();
+  }
+
   JITEvaluatedSymbol getSymbol(StringRef Name) const {
     // FIXME: Just look up as a function for now. Overly simple of course.
     // Work in progress.
@@ -560,6 +567,10 @@ public:
 
   virtual bool isCompatibleFile(const ObjectFile &Obj) const = 0;
 
+  void setNotifyStubEmitted(NotifyStubEmittedFunction NotifyStubEmitted) {
+    this->NotifyStubEmitted = std::move(NotifyStubEmitted);
+  }
+
   virtual void registerEHFrames();
 
   void deregisterEHFrames();
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index d47fcd45be88..202c3ca1c507 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -1,9 +1,8 @@
 //===-- RuntimeDyldMachO.cpp - Run-time dynamic linker for MC-JIT -*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index d71ca4e54953..650e7b79fbb8 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -1,9 +1,8 @@
 //===-- RuntimeDyldMachO.h - Run-time dynamic linker for MC-JIT ---*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h
index dd65051edad7..40910bea0c36 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h
@@ -1,9 +1,8 @@
 //===--- RuntimeDyldCOFFI386.h --- COFF/X86_64 specific code ---*- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -28,16 +27,16 @@ public:
                       JITSymbolResolver &Resolver)
       : RuntimeDyldCOFF(MM, Resolver) {}
 
-  unsigned getMaxStubSize() override {
+  unsigned getMaxStubSize() const override {
     return 8; // 2-byte jmp instruction + 32-bit relative address + 2 byte pad
   }
 
   unsigned getStubAlignment() override { return 1; }
 
-  Expected<relocation_iterator>
+  Expected<object::relocation_iterator>
   processRelocationRef(unsigned SectionID,
-                       relocation_iterator RelI,
-                       const ObjectFile &Obj,
+                       object::relocation_iterator RelI,
+                       const object::ObjectFile &Obj,
                        ObjSectionToIDMap &ObjSectionToID,
                        StubMap &Stubs) override {
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
index 8723dd0fd0ea..bb2e9626e0b0 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
@@ -1,9 +1,8 @@
 //===--- RuntimeDyldCOFFThumb.h --- COFF/Thumb specific code ---*- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -22,9 +21,10 @@
 
 namespace llvm {
 
-static bool isThumbFunc(symbol_iterator Symbol, const ObjectFile &Obj,
-                        section_iterator Section) {
-  Expected<SymbolRef::Type> SymTypeOrErr = Symbol->getType();
+static bool isThumbFunc(object::symbol_iterator Symbol,
+                        const object::ObjectFile &Obj,
+                        object::section_iterator Section) {
+  Expected<object::SymbolRef::Type> SymTypeOrErr = Symbol->getType();
   if (!SymTypeOrErr) {
     std::string Buf;
     raw_string_ostream OS(Buf);
@@ -33,12 +33,14 @@ static bool isThumbFunc(symbol_iterator Symbol, const ObjectFile &Obj,
     report_fatal_error(Buf);
   }
 
-  if (*SymTypeOrErr != SymbolRef::ST_Function)
+  if (*SymTypeOrErr != object::SymbolRef::ST_Function)
     return false;
 
   // We check the IMAGE_SCN_MEM_16BIT flag in the section of the symbol to tell
   // if it's thumb or not
-  return cast<COFFObjectFile>(Obj).getCOFFSection(*Section)->Characteristics &
+  return cast<object::COFFObjectFile>(Obj)
+             .getCOFFSection(*Section)
+             ->Characteristics &
          COFF::IMAGE_SCN_MEM_16BIT;
 }
 
@@ -48,16 +50,16 @@ public:
                        JITSymbolResolver &Resolver)
       : RuntimeDyldCOFF(MM, Resolver) {}
 
-  unsigned getMaxStubSize() override {
+  unsigned getMaxStubSize() const override {
     return 16; // 8-byte load instructions, 4-byte jump, 4-byte padding
   }
 
   unsigned getStubAlignment() override { return 1; }
 
-  Expected<relocation_iterator>
+  Expected<object::relocation_iterator>
   processRelocationRef(unsigned SectionID,
-                       relocation_iterator RelI,
-                       const ObjectFile &Obj,
+                       object::relocation_iterator RelI,
+                       const object::ObjectFile &Obj,
                        ObjSectionToIDMap &ObjSectionToID,
                        StubMap &Stubs) override {
     auto Symbol = RelI->getSymbol();
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
index aee5f6dc3746..d2d74534cf90 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
@@ -1,9 +1,8 @@
 //===-- RuntimeDyldCOFFX86_64.h --- COFF/X86_64 specific code ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -62,7 +61,7 @@ public:
   unsigned getStubAlignment() override { return 1; }
 
   // 2-byte jmp instruction + 32-bit relative address + 64-bit absolute jump
-  unsigned getMaxStubSize() override { return 14; }
+  unsigned getMaxStubSize() const override { return 14; }
 
   // The target location for the relocation is described by RE.SectionID and
   // RE.Offset.  RE.SectionID can be used to find the SectionEntry.  Each
@@ -187,21 +186,21 @@ public:
     return std::make_tuple(Offset, RelType, Addend);
   }
 
-  Expected<relocation_iterator>
+  Expected<object::relocation_iterator>
   processRelocationRef(unsigned SectionID,
-                       relocation_iterator RelI,
-                       const ObjectFile &Obj,
+                       object::relocation_iterator RelI,
+                       const object::ObjectFile &Obj,
                        ObjSectionToIDMap &ObjSectionToID,
                        StubMap &Stubs) override {
     // If possible, find the symbol referred to in the relocation,
     // and the section that contains it.
-    symbol_iterator Symbol = RelI->getSymbol();
+    object::symbol_iterator Symbol = RelI->getSymbol();
     if (Symbol == Obj.symbol_end())
       report_fatal_error("Unknown symbol in relocation");
     auto SectionOrError = Symbol->getSection();
     if (!SectionOrError)
       return SectionOrError.takeError();
-    section_iterator SecI = *SectionOrError;
+    object::section_iterator SecI = *SectionOrError;
     // If there is no section, this must be an external reference.
     const bool IsExtern = SecI == Obj.section_end();
 
@@ -280,11 +279,11 @@ public:
     UnregisteredEHFrameSections.clear();
   }
 
-  Error finalizeLoad(const ObjectFile &Obj,
+  Error finalizeLoad(const object::ObjectFile &Obj,
                      ObjSectionToIDMap &SectionMap) override {
     // Look for and record the EH frame section IDs.
     for (const auto &SectionPair : SectionMap) {
-      const SectionRef &Section = SectionPair.first;
+      const object::SectionRef &Section = SectionPair.first;
       StringRef Name;
       if (auto EC = Section.getName(Name))
         return errorCodeToError(EC);
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp
index 3a166b40af2d..17cbe612fb43 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp
@@ -1,9 +1,8 @@
 //===-- RuntimeDyldELFMips.cpp ---- ELF/Mips specific code. -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h
index f53b9e6bd75a..14fb36f070f8 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h
@@ -1,9 +1,8 @@
 //===-- RuntimeDyldELFMips.h ---- ELF/Mips specific code. -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
index 2a619c549cfa..f2ee1b06d494 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
@@ -1,9 +1,8 @@
 //===-- RuntimeDyldMachOAArch64.h -- MachO/AArch64 specific code. -*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -27,7 +26,7 @@ public:
                           JITSymbolResolver &Resolver)
       : RuntimeDyldMachOCRTPBase(MM, Resolver) {}
 
-  unsigned getMaxStubSize() override { return 8; }
+  unsigned getMaxStubSize() const override { return 8; }
 
   unsigned getStubAlignment() override { return 8; }
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
index ab7cd2bdae15..3bec8b979f7d 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -1,9 +1,8 @@
 //===----- RuntimeDyldMachOARM.h ---- MachO/ARM specific code. ----*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -30,7 +29,7 @@ public:
                       JITSymbolResolver &Resolver)
     : RuntimeDyldMachOCRTPBase(MM, Resolver) {}
 
-  unsigned getMaxStubSize() override { return 8; }
+  unsigned getMaxStubSize() const override { return 8; }
 
   unsigned getStubAlignment() override { return 4; }
 
@@ -225,7 +224,7 @@ public:
       HighInsn = (HighInsn & 0xf800) | ((Value >> 12) & 0x7ff);
 
       uint16_t LowInsn = readBytesUnaligned(LocalAddress + 2, 2);
-      assert((LowInsn & 0xf800) != 0xf8000 &&
+      assert((LowInsn & 0xf800) == 0xf800 &&
              "Unrecognized thumb branch encoding (BR22 low bits)");
       LowInsn = (LowInsn & 0xf800) | ((Value >> 1) & 0x7ff);
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
index d384d70b8b0f..f0de27ba14bb 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
@@ -1,9 +1,8 @@
 //===---- RuntimeDyldMachOI386.h ---- MachO/I386 specific code. ---*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -27,7 +26,7 @@ public:
                        JITSymbolResolver &Resolver)
       : RuntimeDyldMachOCRTPBase(MM, Resolver) {}
 
-  unsigned getMaxStubSize() override { return 0; }
+  unsigned getMaxStubSize() const override { return 0; }
 
   unsigned getStubAlignment() override { return 1; }
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
index 9732ea6a0cd2..28febbdb948c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
@@ -1,9 +1,8 @@
 //===-- RuntimeDyldMachOX86_64.h ---- MachO/X86_64 specific code. -*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -27,9 +26,9 @@ public:
                          JITSymbolResolver &Resolver)
       : RuntimeDyldMachOCRTPBase(MM, Resolver) {}
 
-  unsigned getMaxStubSize() override { return 8; }
+  unsigned getMaxStubSize() const override { return 8; }
 
-  unsigned getStubAlignment() override { return 1; }
+  unsigned getStubAlignment() override { return 8; }
 
   Expected<relocation_iterator>
   processRelocationRef(unsigned SectionID, relocation_iterator RelI,
diff --git a/lib/ExecutionEngine/SectionMemoryManager.cpp b/lib/ExecutionEngine/SectionMemoryManager.cpp
index 05ab4a074e37..925049b2a1b4 100644
--- a/lib/ExecutionEngine/SectionMemoryManager.cpp
+++ b/lib/ExecutionEngine/SectionMemoryManager.cpp
@@ -1,9 +1,8 @@
 //===- SectionMemoryManager.cpp - Memory manager for MCJIT/RtDyld *- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -65,9 +64,9 @@ uint8_t *SectionMemoryManager::allocateSection(
   // Look in the list of free memory regions and use a block there if one
   // is available.
   for (FreeMemBlock &FreeMB : MemGroup.FreeMem) {
-    if (FreeMB.Free.size() >= RequiredSize) {
+    if (FreeMB.Free.allocatedSize() >= RequiredSize) {
       Addr = (uintptr_t)FreeMB.Free.base();
-      uintptr_t EndOfBlock = Addr + FreeMB.Free.size();
+      uintptr_t EndOfBlock = Addr + FreeMB.Free.allocatedSize();
       // Align the address.
       Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
 
@@ -116,7 +115,7 @@ uint8_t *SectionMemoryManager::allocateSection(
   // Remember that we allocated this memory
   MemGroup.AllocatedMem.push_back(MB);
   Addr = (uintptr_t)MB.base();
-  uintptr_t EndOfBlock = Addr + MB.size();
+  uintptr_t EndOfBlock = Addr + MB.allocatedSize();
 
   // Align the address.
   Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
@@ -173,12 +172,12 @@ bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg) {
 }
 
 static sys::MemoryBlock trimBlockToPageSize(sys::MemoryBlock M) {
-  static const size_t PageSize = sys::Process::getPageSize();
+  static const size_t PageSize = sys::Process::getPageSizeEstimate();
 
   size_t StartOverlap =
       (PageSize - ((uintptr_t)M.base() % PageSize)) % PageSize;
 
-  size_t TrimmedSize = M.size();
+  size_t TrimmedSize = M.allocatedSize();
   TrimmedSize -= StartOverlap;
   TrimmedSize -= TrimmedSize % PageSize;
 
@@ -186,8 +185,9 @@ static sys::MemoryBlock trimBlockToPageSize(sys::MemoryBlock M) {
                            TrimmedSize);
 
   assert(((uintptr_t)Trimmed.base() % PageSize) == 0);
-  assert((Trimmed.size() % PageSize) == 0);
-  assert(M.base() <= Trimmed.base() && Trimmed.size() <= M.size());
+  assert((Trimmed.allocatedSize() % PageSize) == 0);
+  assert(M.base() <= Trimmed.base() &&
+         Trimmed.allocatedSize() <= M.allocatedSize());
 
   return Trimmed;
 }
@@ -210,17 +210,19 @@ SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup,
   }
 
   // Remove all blocks which are now empty
-  MemGroup.FreeMem.erase(
-      remove_if(MemGroup.FreeMem,
-                [](FreeMemBlock &FreeMB) { return FreeMB.Free.size() == 0; }),
-      MemGroup.FreeMem.end());
+  MemGroup.FreeMem.erase(remove_if(MemGroup.FreeMem,
+                                   [](FreeMemBlock &FreeMB) {
+                                     return FreeMB.Free.allocatedSize() == 0;
+                                   }),
+                         MemGroup.FreeMem.end());
 
   return std::error_code();
 }
 
 void SectionMemoryManager::invalidateInstructionCache() {
   for (sys::MemoryBlock &Block : CodeMem.PendingMem)
-    sys::Memory::InvalidateInstructionCache(Block.base(), Block.size());
+    sys::Memory::InvalidateInstructionCache(Block.base(),
+                                            Block.allocatedSize());
 }
 
 SectionMemoryManager::~SectionMemoryManager() {
diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp
index 9626b8d3ffa3..0d9c6cfa0908 100644
--- a/lib/ExecutionEngine/TargetSelect.cpp
+++ b/lib/ExecutionEngine/TargetSelect.cpp
@@ -1,9 +1,8 @@
 //===-- TargetSelect.cpp - Target Chooser Code ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/FuzzMutate/FuzzerCLI.cpp b/lib/FuzzMutate/FuzzerCLI.cpp
index a70dad37dfcf..63d31c035390 100644
--- a/lib/FuzzMutate/FuzzerCLI.cpp
+++ b/lib/FuzzMutate/FuzzerCLI.cpp
@@ -1,9 +1,8 @@
 //===-- FuzzerCLI.cpp -----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/FuzzMutate/IRMutator.cpp b/lib/FuzzMutate/IRMutator.cpp
index 40e402cdadef..2fc65981f1db 100644
--- a/lib/FuzzMutate/IRMutator.cpp
+++ b/lib/FuzzMutate/IRMutator.cpp
@@ -1,9 +1,8 @@
 //===-- IRMutator.cpp -----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/FuzzMutate/OpDescriptor.cpp b/lib/FuzzMutate/OpDescriptor.cpp
index 1c5d8f606aea..67d44be8b699 100644
--- a/lib/FuzzMutate/OpDescriptor.cpp
+++ b/lib/FuzzMutate/OpDescriptor.cpp
@@ -1,9 +1,8 @@
 //===-- OpDescriptor.cpp --------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/FuzzMutate/Operations.cpp b/lib/FuzzMutate/Operations.cpp
index b842f6d64fb1..cf55d09caf7e 100644
--- a/lib/FuzzMutate/Operations.cpp
+++ b/lib/FuzzMutate/Operations.cpp
@@ -1,9 +1,8 @@
 //===-- Operations.cpp ----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/FuzzMutate/RandomIRBuilder.cpp b/lib/FuzzMutate/RandomIRBuilder.cpp
index 337184535558..1295714839e8 100644
--- a/lib/FuzzMutate/RandomIRBuilder.cpp
+++ b/lib/FuzzMutate/RandomIRBuilder.cpp
@@ -1,9 +1,8 @@
 //===-- RandomIRBuilder.cpp -----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -54,7 +53,8 @@ Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
       IP = ++I->getIterator();
       assert(IP != BB.end() && "guaranteed by the findPointer");
     }
-    auto *NewLoad = new LoadInst(Ptr, "L", &*IP);
+    auto *NewLoad = new LoadInst(
+        cast<PointerType>(Ptr->getType())->getElementType(), Ptr, "L", &*IP);
 
     // Only sample this load if it really matches the descriptor
     if (Pred.matches(Srcs, NewLoad))
diff --git a/lib/IR/AbstractCallSite.cpp b/lib/IR/AbstractCallSite.cpp
new file mode 100644
index 000000000000..b7a81030f41c
--- /dev/null
+++ b/lib/IR/AbstractCallSite.cpp
@@ -0,0 +1,134 @@
+//===-- AbstractCallSite.cpp - Implementation of abstract call sites ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements abstract call sites which unify the interface for
+// direct, indirect, and callback call sites.
+//
+// For more information see:
+// https://llvm.org/devmtg/2018-10/talk-abstracts.html#talk20
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "abstract-call-sites"
+
+STATISTIC(NumCallbackCallSites, "Number of callback call sites created");
+STATISTIC(NumDirectAbstractCallSites,
+          "Number of direct abstract call sites created");
+STATISTIC(NumInvalidAbstractCallSitesUnknownUse,
+          "Number of invalid abstract call sites created (unknown use)");
+STATISTIC(NumInvalidAbstractCallSitesUnknownCallee,
+          "Number of invalid abstract call sites created (unknown callee)");
+STATISTIC(NumInvalidAbstractCallSitesNoCallback,
+          "Number of invalid abstract call sites created (no callback)");
+
+/// Create an abstract call site from a use.
+AbstractCallSite::AbstractCallSite(const Use *U) : CS(U->getUser()) {
+
+  // First handle unknown users.
+  if (!CS) {
+
+    // If the use is actually in a constant cast expression which itself
+    // has only one use, we look through the constant cast expression.
+    // This happens by updating the use @p U to the use of the constant
+    // cast expression and afterwards re-initializing CS accordingly.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U->getUser()))
+      if (CE->getNumUses() == 1 && CE->isCast()) {
+        U = &*CE->use_begin();
+        CS = CallSite(U->getUser());
+      }
+
+    if (!CS) {
+      NumInvalidAbstractCallSitesUnknownUse++;
+      return;
+    }
+  }
+
+  // Then handle direct or indirect calls. Thus, if U is the callee of the
+  // call site CS it is not a callback and we are done.
+  if (CS.isCallee(U)) {
+    NumDirectAbstractCallSites++;
+    return;
+  }
+
+  // If we cannot identify the broker function we cannot create a callback and
+  // invalidate the abstract call site.
+  Function *Callee = CS.getCalledFunction();
+  if (!Callee) {
+    NumInvalidAbstractCallSitesUnknownCallee++;
+    CS = CallSite();
+    return;
+  }
+
+  MDNode *CallbackMD = Callee->getMetadata(LLVMContext::MD_callback);
+  if (!CallbackMD) {
+    NumInvalidAbstractCallSitesNoCallback++;
+    CS = CallSite();
+    return;
+  }
+
+  unsigned UseIdx = CS.getArgumentNo(U);
+  MDNode *CallbackEncMD = nullptr;
+  for (const MDOperand &Op : CallbackMD->operands()) {
+    MDNode *OpMD = cast<MDNode>(Op.get());
+    auto *CBCalleeIdxAsCM = cast<ConstantAsMetadata>(OpMD->getOperand(0));
+    uint64_t CBCalleeIdx =
+        cast<ConstantInt>(CBCalleeIdxAsCM->getValue())->getZExtValue();
+    if (CBCalleeIdx != UseIdx)
+      continue;
+    CallbackEncMD = OpMD;
+    break;
+  }
+
+  if (!CallbackEncMD) {
+    NumInvalidAbstractCallSitesNoCallback++;
+    CS = CallSite();
+    return;
+  }
+
+  NumCallbackCallSites++;
+
+  assert(CallbackEncMD->getNumOperands() >= 2 && "Incomplete !callback metadata");
+
+  unsigned NumCallOperands = CS.getNumArgOperands();
+  // Skip the var-arg flag at the end when reading the metadata.
+  for (unsigned u = 0, e = CallbackEncMD->getNumOperands() - 1; u < e; u++) {
+    Metadata *OpAsM = CallbackEncMD->getOperand(u).get();
+    auto *OpAsCM = cast<ConstantAsMetadata>(OpAsM);
+    assert(OpAsCM->getType()->isIntegerTy(64) &&
+           "Malformed !callback metadata");
+
+    int64_t Idx = cast<ConstantInt>(OpAsCM->getValue())->getSExtValue();
+    assert(-1 <= Idx && Idx <= NumCallOperands &&
+           "Out-of-bounds !callback metadata index");
+
+    CI.ParameterEncoding.push_back(Idx);
+  }
+
+  if (!Callee->isVarArg())
+    return;
+
+  Metadata *VarArgFlagAsM =
+      CallbackEncMD->getOperand(CallbackEncMD->getNumOperands() - 1).get();
+  auto *VarArgFlagAsCM = cast<ConstantAsMetadata>(VarArgFlagAsM);
+  assert(VarArgFlagAsCM->getType()->isIntegerTy(1) &&
+         "Malformed !callback metadata var-arg flag");
+
+  if (VarArgFlagAsCM->getValue()->isNullValue())
+    return;
+
+  // Add all variadic arguments at the end.
+  for (unsigned u = Callee->arg_size(); u < NumCallOperands; u++)
+    CI.ParameterEncoding.push_back(u);
+}
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index a5dc623e1a30..eb5760daecb3 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -1,9 +1,8 @@
 //===- AsmWriter.cpp - Printing LLVM as an assembly file ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -621,7 +620,10 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) {
   }
   case Type::VectorTyID: {
     VectorType *PTy = cast<VectorType>(Ty);
-    OS << "<" << PTy->getNumElements() << " x ";
+    OS << "<";
+    if (PTy->isScalable())
+      OS << "vscale x ";
+    OS << PTy->getNumElements() << " x ";
     print(PTy->getElementType(), OS);
     OS << '>';
     return;
@@ -1038,6 +1040,9 @@ void SlotTracker::processIndex() {
        TidIter != TheIndex->typeIds().end(); TidIter++)
     CreateTypeIdSlot(TidIter->second.first);
 
+  for (auto &TId : TheIndex->typeIdCompatibleVtableMap())
+    CreateGUIDSlot(GlobalValue::getGUID(TId.first));
+
   ST_DEBUG("end processIndex!\n");
 }
 
@@ -2002,6 +2007,19 @@ static void writeDINamespace(raw_ostream &Out, const DINamespace *N,
   Out << ")";
 }
 
+static void writeDICommonBlock(raw_ostream &Out, const DICommonBlock *N,
+                               TypePrinting *TypePrinter, SlotTracker *Machine,
+                               const Module *Context) {
+  Out << "!DICommonBlock(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+  Printer.printMetadata("scope", N->getRawScope(), false);
+  Printer.printMetadata("declaration", N->getRawDecl(), false);
+  Printer.printString("name", N->getName());
+  Printer.printMetadata("file", N->getRawFile());
+  Printer.printInt("line", N->getLineNo());
+  Out << ")";
+}
+
 static void writeDIMacro(raw_ostream &Out, const DIMacro *N,
                          TypePrinting *TypePrinter, SlotTracker *Machine,
                          const Module *Context) {
@@ -2124,8 +2142,13 @@ static void writeDIExpression(raw_ostream &Out, const DIExpression *N,
       assert(!OpStr.empty() && "Expected valid opcode");
 
       Out << FS << OpStr;
-      for (unsigned A = 0, AE = I->getNumArgs(); A != AE; ++A)
-        Out << FS << I->getArg(A);
+      if (I->getOp() == dwarf::DW_OP_LLVM_convert) {
+        Out << FS << I->getArg(0);
+        Out << FS << dwarf::AttributeEncodingString(I->getArg(1));
+      } else {
+        for (unsigned A = 0, AE = I->getNumArgs(); A != AE; ++A)
+          Out << FS << I->getArg(A);
+      }
     }
   } else {
     for (const auto &I : N->getElements())
@@ -2393,6 +2416,7 @@ public:
   void printGlobalVarSummary(const GlobalVarSummary *GS);
   void printFunctionSummary(const FunctionSummary *FS);
   void printTypeIdSummary(const TypeIdSummary &TIS);
+  void printTypeIdCompatibleVtableSummary(const TypeIdCompatibleVtableInfo &TI);
   void printTypeTestResolution(const TypeTestResolution &TTRes);
   void printArgs(const std::vector<uint64_t> &Args);
   void printWPDRes(const WholeProgramDevirtResolution &WPDRes);
@@ -2695,6 +2719,15 @@ void AssemblyWriter::printModuleSummaryIndex() {
     printTypeIdSummary(TidIter->second.second);
     Out << ") ; guid = " << TidIter->first << "\n";
   }
+
+  // Print the TypeIdCompatibleVtableMap entries.
+  for (auto &TId : TheIndex->typeIdCompatibleVtableMap()) {
+    auto GUID = GlobalValue::getGUID(TId.first);
+    Out << "^" << Machine.getGUIDSlot(GUID)
+        << " = typeidCompatibleVTable: (name: \"" << TId.first << "\"";
+    printTypeIdCompatibleVtableSummary(TId.second);
+    Out << ") ; guid = " << GUID << "\n";
+  }
 }
 
 static const char *
@@ -2777,6 +2810,19 @@ void AssemblyWriter::printTypeIdSummary(const TypeIdSummary &TIS) {
   Out << ")";
 }
 
+void AssemblyWriter::printTypeIdCompatibleVtableSummary(
+    const TypeIdCompatibleVtableInfo &TI) {
+  Out << ", summary: (";
+  FieldSeparator FS;
+  for (auto &P : TI) {
+    Out << FS;
+    Out << "(offset: " << P.AddressPointOffset << ", ";
+    Out << "^" << Machine.getGUIDSlot(P.VTableVI.getGUID());
+    Out << ")";
+  }
+  Out << ")";
+}
+
 void AssemblyWriter::printArgs(const std::vector<uint64_t> &Args) {
   Out << "args: (";
   FieldSeparator FS;
@@ -2845,7 +2891,21 @@ void AssemblyWriter::printAliasSummary(const AliasSummary *AS) {
 }
 
 void AssemblyWriter::printGlobalVarSummary(const GlobalVarSummary *GS) {
-  Out << ", varFlags: (readonly: " << GS->VarFlags.ReadOnly << ")";
+  Out << ", varFlags: (readonly: " << GS->VarFlags.MaybeReadOnly << ", "
+      << "writeonly: " << GS->VarFlags.MaybeWriteOnly << ")";
+
+  auto VTableFuncs = GS->vTableFuncs();
+  if (!VTableFuncs.empty()) {
+    Out << ", vTableFuncs: (";
+    FieldSeparator FS;
+    for (auto &P : VTableFuncs) {
+      Out << FS;
+      Out << "(virtFunc: ^" << Machine.getGUIDSlot(P.FuncVI.getGUID())
+          << ", offset: " << P.VTableOffset;
+      Out << ")";
+    }
+    Out << ")";
+  }
 }
 
 static std::string getLinkageName(GlobalValue::LinkageTypes LT) {
@@ -3024,6 +3084,7 @@ void AssemblyWriter::printSummary(const GlobalValueSummary &Summary) {
   Out << ", notEligibleToImport: " << GVFlags.NotEligibleToImport;
   Out << ", live: " << GVFlags.Live;
   Out << ", dsoLocal: " << GVFlags.DSOLocal;
+  Out << ", canAutoHide: " << GVFlags.CanAutoHide;
   Out << ")";
 
   if (Summary.getSummaryKind() == GlobalValueSummary::AliasKind)
@@ -3041,6 +3102,8 @@ void AssemblyWriter::printSummary(const GlobalValueSummary &Summary) {
       Out << FS;
       if (Ref.isReadOnly())
         Out << "readonly ";
+      else if (Ref.isWriteOnly())
+        Out << "writeonly ";
       Out << "^" << Machine.getGUIDSlot(Ref.getGUID());
     }
     Out << ")";
@@ -3229,6 +3292,12 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
     printEscapedString(GV->getSection(), Out);
     Out << '"';
   }
+  if (GV->hasPartition()) {
+    Out << ", partition \"";
+    printEscapedString(GV->getPartition(), Out);
+    Out << '"';
+  }
+
   maybePrintComdat(Out, *GV);
   if (GV->getAlignment())
     Out << ", align " << GV->getAlignment();
@@ -3280,6 +3349,12 @@ void AssemblyWriter::printIndirectSymbol(const GlobalIndirectSymbol *GIS) {
     writeOperand(IS, !isa<ConstantExpr>(IS));
   }
 
+  if (GIS->hasPartition()) {
+    Out << ", partition \"";
+    printEscapedString(GIS->getPartition(), Out);
+    Out << '"';
+  }
+
   printInfoComment(*GIS);
   Out << '\n';
 }
@@ -3420,6 +3495,11 @@ void AssemblyWriter::printFunction(const Function *F) {
     printEscapedString(F->getSection(), Out);
     Out << '"';
   }
+  if (F->hasPartition()) {
+    Out << " partition \"";
+    printEscapedString(F->getPartition(), Out);
+    Out << '"';
+  }
   maybePrintComdat(Out, *F);
   if (F->getAlignment())
     Out << " align " << F->getAlignment();
@@ -3478,23 +3558,24 @@ void AssemblyWriter::printArgument(const Argument *Arg, AttributeSet Attrs) {
 
 /// printBasicBlock - This member is called for each basic block in a method.
 void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
+  bool IsEntryBlock = BB == &BB->getParent()->getEntryBlock();
   if (BB->hasName()) {              // Print out the label if it exists...
     Out << "\n";
     PrintLLVMName(Out, BB->getName(), LabelPrefix);
     Out << ':';
-  } else if (!BB->use_empty()) {      // Don't print block # of no uses...
-    Out << "\n; <label>:";
+  } else if (!IsEntryBlock) {
+    Out << "\n";
     int Slot = Machine.getLocalSlot(BB);
     if (Slot != -1)
       Out << Slot << ":";
     else
-      Out << "<badref>";
+      Out << "<badref>:";
   }
 
   if (!BB->getParent()) {
     Out.PadToColumn(50);
     Out << "; Error: Block without parent!";
-  } else if (BB != &BB->getParent()->getEntryBlock()) {  // Not the entry block?
+  } else if (!IsEntryBlock) {
     // Output predecessors for the block.
     Out.PadToColumn(50);
     Out << ";";
@@ -3837,6 +3918,51 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     writeOperand(II->getNormalDest(), true);
     Out << " unwind ";
     writeOperand(II->getUnwindDest(), true);
+  } else if (const CallBrInst *CBI = dyn_cast<CallBrInst>(&I)) {
+    Operand = CBI->getCalledValue();
+    FunctionType *FTy = CBI->getFunctionType();
+    Type *RetTy = FTy->getReturnType();
+    const AttributeList &PAL = CBI->getAttributes();
+
+    // Print the calling convention being used.
+    if (CBI->getCallingConv() != CallingConv::C) {
+      Out << " ";
+      PrintCallingConv(CBI->getCallingConv(), Out);
+    }
+
+    if (PAL.hasAttributes(AttributeList::ReturnIndex))
+      Out << ' ' << PAL.getAsString(AttributeList::ReturnIndex);
+
+    // If possible, print out the short form of the callbr instruction. We can
+    // only do this if the first argument is a pointer to a nonvararg function,
+    // and if the return type is not a pointer to a function.
+    //
+    Out << ' ';
+    TypePrinter.print(FTy->isVarArg() ? FTy : RetTy, Out);
+    Out << ' ';
+    writeOperand(Operand, false);
+    Out << '(';
+    for (unsigned op = 0, Eop = CBI->getNumArgOperands(); op < Eop; ++op) {
+      if (op)
+        Out << ", ";
+      writeParamOperand(CBI->getArgOperand(op), PAL.getParamAttributes(op));
+    }
+
+    Out << ')';
+    if (PAL.hasAttributes(AttributeList::FunctionIndex))
+      Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
+
+    writeOperandBundles(CBI);
+
+    Out << "\n          to ";
+    writeOperand(CBI->getDefaultDest(), true);
+    Out << " [";
+    for (unsigned i = 0, e = CBI->getNumIndirectDests(); i != e; ++i) {
+      if (i != 0)
+        Out << ", ";
+      writeOperand(CBI->getIndirectDest(i), true);
+    }
+    Out << ']';
   } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
     Out << ' ';
     if (AI->isUsedWithInAlloca())
diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h
index bb0c072e4781..f989fa3b910e 100644
--- a/lib/IR/AttributeImpl.h
+++ b/lib/IR/AttributeImpl.h
@@ -1,9 +1,8 @@
 //===- AttributeImpl.h - Attribute Internals --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -30,6 +29,7 @@
 namespace llvm {
 
 class LLVMContext;
+class Type;
 
 //===----------------------------------------------------------------------===//
 /// \class
@@ -42,7 +42,8 @@ protected:
   enum AttrEntryKind {
     EnumAttrEntry,
     IntAttrEntry,
-    StringAttrEntry
+    StringAttrEntry,
+    TypeAttrEntry,
   };
 
   AttributeImpl(AttrEntryKind KindID) : KindID(KindID) {}
@@ -57,6 +58,7 @@ public:
   bool isEnumAttribute() const { return KindID == EnumAttrEntry; }
   bool isIntAttribute() const { return KindID == IntAttrEntry; }
   bool isStringAttribute() const { return KindID == StringAttrEntry; }
+  bool isTypeAttribute() const { return KindID == TypeAttrEntry; }
 
   bool hasAttribute(Attribute::AttrKind A) const;
   bool hasAttribute(StringRef Kind) const;
@@ -67,16 +69,20 @@ public:
   StringRef getKindAsString() const;
   StringRef getValueAsString() const;
 
+  Type *getValueAsType() const;
+
   /// Used when sorting the attributes.
   bool operator<(const AttributeImpl &AI) const;
 
   void Profile(FoldingSetNodeID &ID) const {
     if (isEnumAttribute())
-      Profile(ID, getKindAsEnum(), 0);
+      Profile(ID, getKindAsEnum(), static_cast<uint64_t>(0));
     else if (isIntAttribute())
       Profile(ID, getKindAsEnum(), getValueAsInt());
-    else
+    else if (isStringAttribute())
       Profile(ID, getKindAsString(), getValueAsString());
+    else
+      Profile(ID, getKindAsEnum(), getValueAsType());
   }
 
   static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind,
@@ -89,6 +95,12 @@ public:
     ID.AddString(Kind);
     if (!Values.empty()) ID.AddString(Values);
   }
+
+  static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind,
+                      Type *Ty) {
+    ID.AddInteger(Kind);
+    ID.AddPointer(Ty);
+  }
 };
 
 //===----------------------------------------------------------------------===//
@@ -146,6 +158,18 @@ public:
   StringRef getStringValue() const { return Val; }
 };
 
+class TypeAttributeImpl : public EnumAttributeImpl {
+  virtual void anchor();
+
+  Type *Ty;
+
+public:
+  TypeAttributeImpl(Attribute::AttrKind Kind, Type *Ty)
+      : EnumAttributeImpl(TypeAttrEntry, Kind), Ty(Ty) {}
+
+  Type *getTypeValue() const { return Ty; }
+};
+
 //===----------------------------------------------------------------------===//
 /// \class
 /// This class represents a group of attributes that apply to one
@@ -155,9 +179,9 @@ class AttributeSetNode final
       private TrailingObjects<AttributeSetNode, Attribute> {
   friend TrailingObjects;
 
-  /// Bitset with a bit for each available attribute Attribute::AttrKind.
-  uint64_t AvailableAttrs;
   unsigned NumAttrs; ///< Number of attributes in this node.
+  /// Bitset with a bit for each available attribute Attribute::AttrKind.
+  uint8_t AvailableAttrs[12] = {};
 
   AttributeSetNode(ArrayRef<Attribute> Attrs);
 
@@ -176,7 +200,7 @@ public:
   unsigned getNumAttributes() const { return NumAttrs; }
 
   bool hasAttribute(Attribute::AttrKind Kind) const {
-    return AvailableAttrs & ((uint64_t)1) << Kind;
+    return AvailableAttrs[Kind / 8] & ((uint64_t)1) << (Kind % 8);
   }
   bool hasAttribute(StringRef Kind) const;
   bool hasAttributes() const { return NumAttrs != 0; }
@@ -190,6 +214,7 @@ public:
   uint64_t getDereferenceableOrNullBytes() const;
   std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
   std::string getAsString(bool InAttrGrp) const;
+  Type *getByValType() const;
 
   using iterator = const Attribute *;
 
@@ -219,10 +244,10 @@ class AttributeListImpl final
   friend TrailingObjects;
 
 private:
-  /// Bitset with a bit for each available attribute Attribute::AttrKind.
-  uint64_t AvailableFunctionAttrs;
   LLVMContext &Context;
   unsigned NumAttrSets; ///< Number of entries in this set.
+  /// Bitset with a bit for each available attribute Attribute::AttrKind.
+  uint8_t AvailableFunctionAttrs[12] = {};
 
   // Helper fn for TrailingObjects class.
   size_t numTrailingObjects(OverloadToken<AttributeSet>) { return NumAttrSets; }
@@ -242,7 +267,7 @@ public:
   /// Return true if the AttributeSet or the FunctionIndex has an
   /// enum attribute of the given kind.
   bool hasFnAttribute(Attribute::AttrKind Kind) const {
-    return AvailableFunctionAttrs & ((uint64_t)1) << Kind;
+    return AvailableFunctionAttrs[Kind / 8] & ((uint64_t)1) << (Kind % 8);
   }
 
   using iterator = const AttributeSet *;
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index ff46debb7a9e..bb90bcd7dd74 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -1,9 +1,8 @@
 //===- Attributes.cpp - Implement AttributesList --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -122,6 +121,27 @@ Attribute Attribute::get(LLVMContext &Context, StringRef Kind, StringRef Val) {
   return Attribute(PA);
 }
 
+Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind,
+                         Type *Ty) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  FoldingSetNodeID ID;
+  ID.AddInteger(Kind);
+  ID.AddPointer(Ty);
+
+  void *InsertPoint;
+  AttributeImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint);
+
+  if (!PA) {
+    // If we didn't find any existing attributes of the same shape then create a
+    // new one and insert it.
+    PA = new TypeAttributeImpl(Kind, Ty);
+    pImpl->AttrsSet.InsertNode(PA, InsertPoint);
+  }
+
+  // Return the Attribute that we found or created.
+  return Attribute(PA);
+}
+
 Attribute Attribute::getWithAlignment(LLVMContext &Context, uint64_t Align) {
   assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
   assert(Align <= 0x40000000 && "Alignment too large.");
@@ -147,6 +167,10 @@ Attribute Attribute::getWithDereferenceableOrNullBytes(LLVMContext &Context,
   return get(Context, DereferenceableOrNull, Bytes);
 }
 
+Attribute Attribute::getWithByValType(LLVMContext &Context, Type *Ty) {
+  return get(Context, ByVal, Ty);
+}
+
 Attribute
 Attribute::getWithAllocSizeArgs(LLVMContext &Context, unsigned ElemSizeArg,
                                 const Optional<unsigned> &NumElemsArg) {
@@ -171,9 +195,13 @@ bool Attribute::isStringAttribute() const {
   return pImpl && pImpl->isStringAttribute();
 }
 
+bool Attribute::isTypeAttribute() const {
+  return pImpl && pImpl->isTypeAttribute();
+}
+
 Attribute::AttrKind Attribute::getKindAsEnum() const {
   if (!pImpl) return None;
-  assert((isEnumAttribute() || isIntAttribute()) &&
+  assert((isEnumAttribute() || isIntAttribute() || isTypeAttribute()) &&
          "Invalid attribute type to get the kind as an enum!");
   return pImpl->getKindAsEnum();
 }
@@ -199,6 +227,14 @@ StringRef Attribute::getValueAsString() const {
   return pImpl->getValueAsString();
 }
 
+Type *Attribute::getValueAsType() const {
+  if (!pImpl) return {};
+  assert(isTypeAttribute() &&
+         "Invalid attribute type to get the value as a type!");
+  return pImpl->getValueAsType();
+}
+
+
 bool Attribute::hasAttribute(AttrKind Kind) const {
   return (pImpl && pImpl->hasAttribute(Kind)) || (!pImpl && Kind == None);
 }
@@ -247,14 +283,14 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return "sanitize_address";
   if (hasAttribute(Attribute::SanitizeHWAddress))
     return "sanitize_hwaddress";
+  if (hasAttribute(Attribute::SanitizeMemTag))
+    return "sanitize_memtag";
   if (hasAttribute(Attribute::AlwaysInline))
     return "alwaysinline";
   if (hasAttribute(Attribute::ArgMemOnly))
     return "argmemonly";
   if (hasAttribute(Attribute::Builtin))
     return "builtin";
-  if (hasAttribute(Attribute::ByVal))
-    return "byval";
   if (hasAttribute(Attribute::Convergent))
     return "convergent";
   if (hasAttribute(Attribute::SwiftError))
@@ -287,6 +323,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return "nocapture";
   if (hasAttribute(Attribute::NoDuplicate))
     return "noduplicate";
+  if (hasAttribute(Attribute::NoFree))
+    return "nofree";
   if (hasAttribute(Attribute::NoImplicitFloat))
     return "noimplicitfloat";
   if (hasAttribute(Attribute::NoInline))
@@ -299,6 +337,10 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return "noredzone";
   if (hasAttribute(Attribute::NoReturn))
     return "noreturn";
+  if (hasAttribute(Attribute::NoSync))
+    return "nosync";
+  if (hasAttribute(Attribute::WillReturn))
+    return "willreturn";
   if (hasAttribute(Attribute::NoCfCheck))
     return "nocf_check";
   if (hasAttribute(Attribute::NoRecurse))
@@ -351,6 +393,21 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return "zeroext";
   if (hasAttribute(Attribute::Cold))
     return "cold";
+  if (hasAttribute(Attribute::ImmArg))
+    return "immarg";
+
+  if (hasAttribute(Attribute::ByVal)) {
+    std::string Result;
+    Result += "byval";
+    if (Type *Ty = getValueAsType()) {
+      raw_string_ostream OS(Result);
+      Result += '(';
+      Ty->print(OS, false, true);
+      OS.flush();
+      Result += ')';
+    }
+    return Result;
+  }
 
   // FIXME: These should be output like this:
   //
@@ -450,6 +507,8 @@ void IntAttributeImpl::anchor() {}
 
 void StringAttributeImpl::anchor() {}
 
+void TypeAttributeImpl::anchor() {}
+
 bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const {
   if (isStringAttribute()) return false;
   return getKindAsEnum() == A;
@@ -461,7 +520,7 @@ bool AttributeImpl::hasAttribute(StringRef Kind) const {
 }
 
 Attribute::AttrKind AttributeImpl::getKindAsEnum() const {
-  assert(isEnumAttribute() || isIntAttribute());
+  assert(isEnumAttribute() || isIntAttribute() || isTypeAttribute());
   return static_cast<const EnumAttributeImpl *>(this)->getEnumKind();
 }
 
@@ -480,6 +539,11 @@ StringRef AttributeImpl::getValueAsString() const {
   return static_cast<const StringAttributeImpl *>(this)->getStringValue();
 }
 
+Type *AttributeImpl::getValueAsType() const {
+  assert(isTypeAttribute());
+  return static_cast<const TypeAttributeImpl *>(this)->getTypeValue();
+}
+
 bool AttributeImpl::operator<(const AttributeImpl &AI) const {
   // This sorts the attributes with Attribute::AttrKinds coming first (sorted
   // relative to their enum value) and then strings.
@@ -487,10 +551,23 @@ bool AttributeImpl::operator<(const AttributeImpl &AI) const {
     if (AI.isEnumAttribute()) return getKindAsEnum() < AI.getKindAsEnum();
     if (AI.isIntAttribute()) return true;
     if (AI.isStringAttribute()) return true;
+    if (AI.isTypeAttribute()) return true;
+  }
+
+  if (isTypeAttribute()) {
+    if (AI.isEnumAttribute()) return false;
+    if (AI.isTypeAttribute()) {
+      assert(getKindAsEnum() != AI.getKindAsEnum() &&
+             "Comparison of types would be unstable");
+      return getKindAsEnum() < AI.getKindAsEnum();
+    }
+    if (AI.isIntAttribute()) return true;
+    if (AI.isStringAttribute()) return true;
   }
 
   if (isIntAttribute()) {
     if (AI.isEnumAttribute()) return false;
+    if (AI.isTypeAttribute()) return false;
     if (AI.isIntAttribute()) {
       if (getKindAsEnum() == AI.getKindAsEnum())
         return getValueAsInt() < AI.getValueAsInt();
@@ -499,7 +576,9 @@ bool AttributeImpl::operator<(const AttributeImpl &AI) const {
     if (AI.isStringAttribute()) return true;
   }
 
+  assert(isStringAttribute());
   if (AI.isEnumAttribute()) return false;
+  if (AI.isTypeAttribute()) return false;
   if (AI.isIntAttribute()) return false;
   if (getKindAsString() == AI.getKindAsString())
     return getValueAsString() < AI.getValueAsString();
@@ -607,6 +686,10 @@ uint64_t AttributeSet::getDereferenceableOrNullBytes() const {
   return SetNode ? SetNode->getDereferenceableOrNullBytes() : 0;
 }
 
+Type *AttributeSet::getByValType() const {
+  return SetNode ? SetNode->getByValType() : nullptr;
+}
+
 std::pair<unsigned, Optional<unsigned>> AttributeSet::getAllocSizeArgs() const {
   return SetNode ? SetNode->getAllocSizeArgs()
                  : std::pair<unsigned, Optional<unsigned>>(0, 0);
@@ -637,13 +720,18 @@ LLVM_DUMP_METHOD void AttributeSet::dump() const {
 //===----------------------------------------------------------------------===//
 
 AttributeSetNode::AttributeSetNode(ArrayRef<Attribute> Attrs)
-    : AvailableAttrs(0), NumAttrs(Attrs.size()) {
+    : NumAttrs(Attrs.size()) {
   // There's memory after the node where we can store the entries in.
   llvm::copy(Attrs, getTrailingObjects<Attribute>());
 
+  static_assert(Attribute::EndAttrKinds <=
+                    sizeof(AvailableAttrs) * CHAR_BIT,
+                "Too many attributes");
+
   for (const auto I : *this) {
     if (!I.isStringAttribute()) {
-      AvailableAttrs |= ((uint64_t)1) << I.getKindAsEnum();
+      Attribute::AttrKind Kind = I.getKindAsEnum();
+      AvailableAttrs[Kind / 8] |= 1ULL << (Kind % 8);
     }
   }
 }
@@ -690,6 +778,9 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, const AttrBuilder &B) {
 
     Attribute Attr;
     switch (Kind) {
+    case Attribute::ByVal:
+      Attr = Attribute::getWithByValType(C, B.getByValType());
+      break;
     case Attribute::Alignment:
       Attr = Attribute::getWithAlignment(C, B.getAlignment());
       break;
@@ -759,6 +850,13 @@ unsigned AttributeSetNode::getStackAlignment() const {
   return 0;
 }
 
+Type *AttributeSetNode::getByValType() const {
+  for (const auto I : *this)
+    if (I.hasAttribute(Attribute::ByVal))
+      return I.getValueAsType();
+  return 0;
+}
+
 uint64_t AttributeSetNode::getDereferenceableBytes() const {
   for (const auto I : *this)
     if (I.hasAttribute(Attribute::Dereferenceable))
@@ -805,7 +903,7 @@ static constexpr unsigned attrIdxToArrayIdx(unsigned Index) {
 
 AttributeListImpl::AttributeListImpl(LLVMContext &C,
                                      ArrayRef<AttributeSet> Sets)
-    : AvailableFunctionAttrs(0), Context(C), NumAttrSets(Sets.size()) {
+    : Context(C), NumAttrSets(Sets.size()) {
   assert(!Sets.empty() && "pointless AttributeListImpl");
 
   // There's memory after the node where we can store the entries in.
@@ -818,8 +916,10 @@ AttributeListImpl::AttributeListImpl(LLVMContext &C,
   static_assert(attrIdxToArrayIdx(AttributeList::FunctionIndex) == 0U,
                 "function should be stored in slot 0");
   for (const auto I : Sets[0]) {
-    if (!I.isStringAttribute())
-      AvailableFunctionAttrs |= 1ULL << I.getKindAsEnum();
+    if (!I.isStringAttribute()) {
+      Attribute::AttrKind Kind = I.getKindAsEnum();
+      AvailableFunctionAttrs[Kind / 8] |= 1ULL << (Kind % 8);
+    }
   }
 }
 
@@ -1257,6 +1357,11 @@ unsigned AttributeList::getParamAlignment(unsigned ArgNo) const {
   return getAttributes(ArgNo + FirstArgIndex).getAlignment();
 }
 
+Type *AttributeList::getParamByValType(unsigned Index) const {
+  return getAttributes(Index+FirstArgIndex).getByValType();
+}
+
+
 unsigned AttributeList::getStackAlignment(unsigned Index) const {
   return getAttributes(Index).getStackAlignment();
 }
@@ -1335,6 +1440,7 @@ void AttrBuilder::clear() {
   TargetDepAttrs.clear();
   Alignment = StackAlignment = DerefBytes = DerefOrNullBytes = 0;
   AllocSizeArgs = 0;
+  ByValType = nullptr;
 }
 
 AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) {
@@ -1359,6 +1465,8 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) {
     Alignment = Attr.getAlignment();
   else if (Kind == Attribute::StackAlignment)
     StackAlignment = Attr.getStackAlignment();
+  else if (Kind == Attribute::ByVal)
+    ByValType = Attr.getValueAsType();
   else if (Kind == Attribute::Dereferenceable)
     DerefBytes = Attr.getDereferenceableBytes();
   else if (Kind == Attribute::DereferenceableOrNull)
@@ -1381,6 +1489,8 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
     Alignment = 0;
   else if (Val == Attribute::StackAlignment)
     StackAlignment = 0;
+  else if (Val == Attribute::ByVal)
+    ByValType = nullptr;
   else if (Val == Attribute::Dereferenceable)
     DerefBytes = 0;
   else if (Val == Attribute::DereferenceableOrNull)
@@ -1463,6 +1573,12 @@ AttrBuilder &AttrBuilder::addAllocSizeAttrFromRawRepr(uint64_t RawArgs) {
   return *this;
 }
 
+AttrBuilder &AttrBuilder::addByValAttr(Type *Ty) {
+  Attrs[Attribute::ByVal] = true;
+  ByValType = Ty;
+  return *this;
+}
+
 AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
   // FIXME: What if both have alignments, but they don't match?!
   if (!Alignment)
@@ -1480,6 +1596,9 @@ AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
   if (!AllocSizeArgs)
     AllocSizeArgs = B.AllocSizeArgs;
 
+  if (!ByValType)
+    ByValType = B.ByValType;
+
   Attrs |= B.Attrs;
 
   for (auto I : B.td_attrs())
@@ -1505,6 +1624,9 @@ AttrBuilder &AttrBuilder::remove(const AttrBuilder &B) {
   if (B.AllocSizeArgs)
     AllocSizeArgs = 0;
 
+  if (B.ByValType)
+    ByValType = nullptr;
+
   Attrs &= ~B.Attrs;
 
   for (auto I : B.td_attrs())
@@ -1564,7 +1686,7 @@ bool AttrBuilder::operator==(const AttrBuilder &B) {
       return false;
 
   return Alignment == B.Alignment && StackAlignment == B.StackAlignment &&
-         DerefBytes == B.DerefBytes;
+         DerefBytes == B.DerefBytes && ByValType == B.ByValType;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index b2eb8b09982e..a2d820352825 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -1,9 +1,8 @@
 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -199,14 +198,14 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
       Name.startswith("avx512.mask.pmull.") || // Added in 4.0
       Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
       Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
-      Name == "avx512.mask.cvtudq2ps.128" || // Added in 7.0
-      Name == "avx512.mask.cvtudq2ps.256" || // Added in 7.0
-      Name == "avx512.mask.cvtqq2pd.128" || // Added in 7.0
-      Name == "avx512.mask.cvtqq2pd.256" || // Added in 7.0
-      Name == "avx512.mask.cvtuqq2pd.128" || // Added in 7.0
-      Name == "avx512.mask.cvtuqq2pd.256" || // Added in 7.0
-      Name == "avx512.mask.cvtdq2ps.128" || // Added in 7.0
-      Name == "avx512.mask.cvtdq2ps.256" || // Added in 7.0
+      Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
+      Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
+      Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
+      Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
+      Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
+      Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
+      Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
+      Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
       Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
       Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
       Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
@@ -216,7 +215,6 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
       Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
       Name == "avx512.cvtusi2sd" || // Added in 7.0
       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
-      Name.startswith("avx512.mask.permvar.") || // Added in 7.0
       Name == "sse2.pmulu.dq" || // Added in 7.0
       Name == "sse41.pmuldq" || // Added in 7.0
       Name == "avx2.pmulu.dq" || // Added in 7.0
@@ -300,6 +298,11 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
       Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
       Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
       Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
+      Name.startswith("avx512.mask.conflict.") || // Added in 9.0
+      Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
+      Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
+      Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
+      Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
       Name == "sse.cvtsi2ss" || // Added in 7.0
       Name == "sse.cvtsi642ss" || // Added in 7.0
       Name == "sse2.cvtsi2sd" || // Added in 7.0
@@ -338,6 +341,16 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
       Name.startswith("avx512.mask.load.") || // Added in 3.9
       Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
       Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
+      Name.startswith("avx512.mask.expand.b") || // Added in 9.0
+      Name.startswith("avx512.mask.expand.w") || // Added in 9.0
+      Name.startswith("avx512.mask.expand.d") || // Added in 9.0
+      Name.startswith("avx512.mask.expand.q") || // Added in 9.0
+      Name.startswith("avx512.mask.expand.p") || // Added in 9.0
+      Name.startswith("avx512.mask.compress.b") || // Added in 9.0
+      Name.startswith("avx512.mask.compress.w") || // Added in 9.0
+      Name.startswith("avx512.mask.compress.d") || // Added in 9.0
+      Name.startswith("avx512.mask.compress.q") || // Added in 9.0
+      Name.startswith("avx512.mask.compress.p") || // Added in 9.0
       Name == "sse42.crc32.64.8" || // Added in 3.4
       Name.startswith("avx.vbroadcast.s") || // Added in 3.5
       Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
@@ -362,8 +375,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
       Name == "xop.vpcmov.256" || // Added in 5.0
       Name.startswith("avx512.mask.move.s") || // Added in 4.0
       Name.startswith("avx512.cvtmask2") || // Added in 5.0
-      (Name.startswith("xop.vpcom") && // Added in 3.2
-       F->arg_size() == 2) ||
+      Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
       Name.startswith("xop.vprot") || // Added in 8.0
       Name.startswith("avx512.prol") || // Added in 8.0
       Name.startswith("avx512.pror") || // Added in 8.0
@@ -373,8 +385,6 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
       Name.startswith("avx512.mask.prol.") ||  // Added in 8.0
       Name.startswith("avx512.ptestm") || //Added in 6.0
       Name.startswith("avx512.ptestnm") || //Added in 6.0
-      Name.startswith("sse2.pavg") || // Added in 6.0
-      Name.startswith("avx2.pavg") || // Added in 6.0
       Name.startswith("avx512.mask.pavg")) // Added in 6.0
     return true;
 
@@ -469,12 +479,23 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
     }
   }
 
+  if (Name == "seh.recoverfp") {
+    NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
+    return true;
+  }
+
   return false;
 }
 
 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
   assert(F && "Illegal to upgrade a non-existent Function.");
 
+  // Upgrade intrinsics "clang.arc.use" which doesn't start with "llvm.".
+  if (F->getName() == "clang.arc.use") {
+    NewFn = nullptr;
+    return true;
+  }
+
   // Quickly eliminate it, if it's not a candidate.
   StringRef Name = F->getName();
   if (Name.size() <= 8 || !Name.startswith("llvm."))
@@ -544,9 +565,16 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
       return true;
     }
-    if (Name == "x86.seh.recoverfp") {
-      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
-      return true;
+    if (Name.startswith("aarch64.neon.addp")) {
+      if (F->arg_size() != 2)
+        break; // Invalid IR.
+      auto fArgs = F->getFunctionType()->params();
+      VectorType *ArgTy = dyn_cast<VectorType>(fArgs[0]);
+      if (ArgTy && ArgTy->getElementType()->isFloatingPointTy()) {
+        NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                          Intrinsic::aarch64_neon_faddp, fArgs);
+        return true;
+      }
     }
     break;
   }
@@ -574,6 +602,26 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
     }
     break;
   }
+  case 'e': {
+    SmallVector<StringRef, 2> Groups;
+    Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+");
+    if (R.match(Name, &Groups)) {
+      Intrinsic::ID ID = Intrinsic::not_intrinsic;
+      if (Groups[1] == "fadd")
+        ID = Intrinsic::experimental_vector_reduce_v2_fadd;
+      if (Groups[1] == "fmul")
+        ID = Intrinsic::experimental_vector_reduce_v2_fmul;
+
+      if (ID != Intrinsic::not_intrinsic) {
+        rename(F);
+        auto Args = F->getFunctionType()->params();
+        Type *Tys[] = {F->getFunctionType()->getReturnType(), Args[1]};
+        NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
+        return true;
+      }
+    }
+    break;
+  }
   case 'i':
   case 'l': {
     bool IsLifetimeStart = Name.startswith("lifetime.start");
@@ -716,6 +764,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
                         .Cases("clz.ll", "popc.ll", "h2f", true)
                         .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
                         .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
+                        .StartsWith("atomic.load.add.f32.p", true)
+                        .StartsWith("atomic.load.add.f64.p", true)
                         .Default(false);
       if (Expand) {
         NewFn = nullptr;
@@ -729,7 +779,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
     // address space.
     if (Name.startswith("objectsize.")) {
       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
-      if (F->arg_size() == 2 ||
+      if (F->arg_size() == 2 || F->arg_size() == 3 ||
           F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
         rename(F);
         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
@@ -777,9 +827,35 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
   return Upgraded;
 }
 
-bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
-  // Nothing to do yet.
-  return false;
+GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
+  if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
+                          GV->getName() == "llvm.global_dtors")) ||
+      !GV->hasInitializer())
+    return nullptr;
+  ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
+  if (!ATy)
+    return nullptr;
+  StructType *STy = dyn_cast<StructType>(ATy->getElementType());
+  if (!STy || STy->getNumElements() != 2)
+    return nullptr;
+
+  LLVMContext &C = GV->getContext();
+  IRBuilder<> IRB(C);
+  auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
+                               IRB.getInt8PtrTy());
+  Constant *Init = GV->getInitializer();
+  unsigned N = Init->getNumOperands();
+  std::vector<Constant *> NewCtors(N);
+  for (unsigned i = 0; i != N; ++i) {
+    auto Ctor = cast<Constant>(Init->getOperand(i));
+    NewCtors[i] = ConstantStruct::get(
+        EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
+        Constant::getNullValue(IRB.getInt8PtrTy()));
+  }
+  Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
+
+  return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
+                            NewInit, GV->getName());
 }
 
 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
@@ -1053,6 +1129,45 @@ static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
   return Res;
 }
 
+static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
+                              bool IsSigned) {
+  Type *Ty = CI.getType();
+  Value *LHS = CI.getArgOperand(0);
+  Value *RHS = CI.getArgOperand(1);
+
+  CmpInst::Predicate Pred;
+  switch (Imm) {
+  case 0x0:
+    Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+    break;
+  case 0x1:
+    Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+    break;
+  case 0x2:
+    Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+    break;
+  case 0x3:
+    Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
+    break;
+  case 0x4:
+    Pred = ICmpInst::ICMP_EQ;
+    break;
+  case 0x5:
+    Pred = ICmpInst::ICMP_NE;
+    break;
+  case 0x6:
+    return Constant::getNullValue(Ty); // FALSE
+  case 0x7:
+    return Constant::getAllOnesValue(Ty); // TRUE
+  default:
+    llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
+  }
+
+  Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
+  Value *Ext = Builder.CreateSExt(Cmp, Ty);
+  return Ext;
+}
+
 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
                                     bool IsShiftRight, bool ZeroMask) {
   Type *Ty = CI.getType();
@@ -1110,16 +1225,16 @@ static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
                                 Value *Ptr, Value *Passthru, Value *Mask,
                                 bool Aligned) {
+  Type *ValTy = Passthru->getType();
   // Cast the pointer to the right type.
-  Ptr = Builder.CreateBitCast(Ptr,
-                             llvm::PointerType::getUnqual(Passthru->getType()));
+  Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
   unsigned Align =
     Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
 
   // If the mask is all ones just emit a regular store.
   if (const auto *C = dyn_cast<Constant>(Mask))
     if (C->isAllOnesValue())
-      return Builder.CreateAlignedLoad(Ptr, Align);
+      return Builder.CreateAlignedLoad(ValTy, Ptr, Align);
 
   // Convert the mask from an integer type to a vector of i1.
   unsigned NumElts = Passthru->getType()->getVectorNumElements();
@@ -1462,6 +1577,36 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
       IID = Intrinsic::x86_avx512_pmultishift_qb_512;
     else
       llvm_unreachable("Unexpected intrinsic");
+  } else if (Name.startswith("conflict.")) {
+    if (Name[9] == 'd' && VecWidth == 128)
+      IID = Intrinsic::x86_avx512_conflict_d_128;
+    else if (Name[9] == 'd' && VecWidth == 256)
+      IID = Intrinsic::x86_avx512_conflict_d_256;
+    else if (Name[9] == 'd' && VecWidth == 512)
+      IID = Intrinsic::x86_avx512_conflict_d_512;
+    else if (Name[9] == 'q' && VecWidth == 128)
+      IID = Intrinsic::x86_avx512_conflict_q_128;
+    else if (Name[9] == 'q' && VecWidth == 256)
+      IID = Intrinsic::x86_avx512_conflict_q_256;
+    else if (Name[9] == 'q' && VecWidth == 512)
+      IID = Intrinsic::x86_avx512_conflict_q_512;
+    else
+      llvm_unreachable("Unexpected intrinsic");
+  } else if (Name.startswith("pavg.")) {
+    if (Name[5] == 'b' && VecWidth == 128)
+      IID = Intrinsic::x86_sse2_pavg_b;
+    else if (Name[5] == 'b' && VecWidth == 256)
+      IID = Intrinsic::x86_avx2_pavg_b;
+    else if (Name[5] == 'b' && VecWidth == 512)
+      IID = Intrinsic::x86_avx512_pavg_b_512;
+    else if (Name[5] == 'w' && VecWidth == 128)
+      IID = Intrinsic::x86_sse2_pavg_w;
+    else if (Name[5] == 'w' && VecWidth == 256)
+      IID = Intrinsic::x86_avx2_pavg_w;
+    else if (Name[5] == 'w' && VecWidth == 512)
+      IID = Intrinsic::x86_avx512_pavg_w_512;
+    else
+      llvm_unreachable("Unexpected intrinsic");
   } else
     return false;
 
@@ -1503,6 +1648,14 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
     // Get the Function's name.
     StringRef Name = F->getName();
 
+    // clang.arc.use is an old name for llvm.arc.clang.arc.use. It is dropped
+    // from upgrader because the optimizer now only recognizes intrinsics for
+    // ARC runtime calls.
+    if (Name == "clang.arc.use") {
+      CI->eraseFromParent();
+      return;
+    }
+
     assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
     Name = Name.substr(5);
 
@@ -1917,38 +2070,47 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
                          Name == "avx.cvtdq2.ps.256" ||
                          Name.startswith("avx512.mask.cvtdq2pd.") ||
                          Name.startswith("avx512.mask.cvtudq2pd.") ||
-                         Name == "avx512.mask.cvtdq2ps.128" ||
-                         Name == "avx512.mask.cvtdq2ps.256" ||
-                         Name == "avx512.mask.cvtudq2ps.128" ||
-                         Name == "avx512.mask.cvtudq2ps.256" ||
-                         Name == "avx512.mask.cvtqq2pd.128" ||
-                         Name == "avx512.mask.cvtqq2pd.256" ||
-                         Name == "avx512.mask.cvtuqq2pd.128" ||
-                         Name == "avx512.mask.cvtuqq2pd.256" ||
+                         Name.startswith("avx512.mask.cvtdq2ps.") ||
+                         Name.startswith("avx512.mask.cvtudq2ps.") ||
+                         Name.startswith("avx512.mask.cvtqq2pd.") ||
+                         Name.startswith("avx512.mask.cvtuqq2pd.") ||
+                         Name == "avx512.mask.cvtqq2ps.256" ||
+                         Name == "avx512.mask.cvtqq2ps.512" ||
+                         Name == "avx512.mask.cvtuqq2ps.256" ||
+                         Name == "avx512.mask.cvtuqq2ps.512" ||
                          Name == "sse2.cvtps2pd" ||
                          Name == "avx.cvt.ps2.pd.256" ||
                          Name == "avx512.mask.cvtps2pd.128" ||
                          Name == "avx512.mask.cvtps2pd.256")) {
       Type *DstTy = CI->getType();
       Rep = CI->getArgOperand(0);
+      Type *SrcTy = Rep->getType();
 
       unsigned NumDstElts = DstTy->getVectorNumElements();
-      if (NumDstElts < Rep->getType()->getVectorNumElements()) {
+      if (NumDstElts < SrcTy->getVectorNumElements()) {
         assert(NumDstElts == 2 && "Unexpected vector size");
         uint32_t ShuffleMask[2] = { 0, 1 };
         Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
       }
 
-      bool IsPS2PD = (StringRef::npos != Name.find("ps2"));
+      bool IsPS2PD = SrcTy->getVectorElementType()->isFloatTy();
       bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
       if (IsPS2PD)
         Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
-      else if (IsUnsigned)
-        Rep = Builder.CreateUIToFP(Rep, DstTy, "cvt");
-      else
-        Rep = Builder.CreateSIToFP(Rep, DstTy, "cvt");
+      else if (CI->getNumArgOperands() == 4 &&
+               (!isa<ConstantInt>(CI->getArgOperand(3)) ||
+                cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
+        Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
+                                       : Intrinsic::x86_avx512_sitofp_round;
+        Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
+                                                { DstTy, SrcTy });
+        Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
+      } else {
+        Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
+                         : Builder.CreateSIToFP(Rep, DstTy, "cvt");
+      }
 
-      if (CI->getNumArgOperands() == 3)
+      if (CI->getNumArgOperands() >= 3)
         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
                             CI->getArgOperand(1));
     } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
@@ -1989,52 +2151,56 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
                                                 Intrinsic::masked_compressstore,
                                                 ResultTy);
       Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
+    } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
+                         Name.startswith("avx512.mask.expand."))) {
+      Type *ResultTy = CI->getType();
+
+      Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
+                                     ResultTy->getVectorNumElements());
+
+      bool IsCompress = Name[12] == 'c';
+      Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
+                                     : Intrinsic::x86_avx512_mask_expand;
+      Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
+      Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
+                                       MaskVec });
     } else if (IsX86 && Name.startswith("xop.vpcom")) {
-      Intrinsic::ID intID;
-      if (Name.endswith("ub"))
-        intID = Intrinsic::x86_xop_vpcomub;
-      else if (Name.endswith("uw"))
-        intID = Intrinsic::x86_xop_vpcomuw;
-      else if (Name.endswith("ud"))
-        intID = Intrinsic::x86_xop_vpcomud;
-      else if (Name.endswith("uq"))
-        intID = Intrinsic::x86_xop_vpcomuq;
-      else if (Name.endswith("b"))
-        intID = Intrinsic::x86_xop_vpcomb;
-      else if (Name.endswith("w"))
-        intID = Intrinsic::x86_xop_vpcomw;
-      else if (Name.endswith("d"))
-        intID = Intrinsic::x86_xop_vpcomd;
-      else if (Name.endswith("q"))
-        intID = Intrinsic::x86_xop_vpcomq;
+      bool IsSigned;
+      if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
+          Name.endswith("uq"))
+        IsSigned = false;
+      else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
+               Name.endswith("q"))
+        IsSigned = true;
       else
         llvm_unreachable("Unknown suffix");
 
-      Name = Name.substr(9); // strip off "xop.vpcom"
       unsigned Imm;
-      if (Name.startswith("lt"))
-        Imm = 0;
-      else if (Name.startswith("le"))
-        Imm = 1;
-      else if (Name.startswith("gt"))
-        Imm = 2;
-      else if (Name.startswith("ge"))
-        Imm = 3;
-      else if (Name.startswith("eq"))
-        Imm = 4;
-      else if (Name.startswith("ne"))
-        Imm = 5;
-      else if (Name.startswith("false"))
-        Imm = 6;
-      else if (Name.startswith("true"))
-        Imm = 7;
-      else
-        llvm_unreachable("Unknown condition");
+      if (CI->getNumArgOperands() == 3) {
+        Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+      } else {
+        Name = Name.substr(9); // strip off "xop.vpcom"
+        if (Name.startswith("lt"))
+          Imm = 0;
+        else if (Name.startswith("le"))
+          Imm = 1;
+        else if (Name.startswith("gt"))
+          Imm = 2;
+        else if (Name.startswith("ge"))
+          Imm = 3;
+        else if (Name.startswith("eq"))
+          Imm = 4;
+        else if (Name.startswith("ne"))
+          Imm = 5;
+        else if (Name.startswith("false"))
+          Imm = 6;
+        else if (Name.startswith("true"))
+          Imm = 7;
+        else
+          llvm_unreachable("Unknown condition");
+      }
 
-      Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
-      Rep =
-          Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
-                                     Builder.getInt8(Imm)});
+      Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
     } else if (IsX86 && Name.startswith("xop.vpcmov")) {
       Value *Sel = CI->getArgOperand(2);
       Value *NotSel = Builder.CreateNot(Sel);
@@ -2103,6 +2269,14 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       if (CI->getNumArgOperands() == 3)
         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
                             CI->getArgOperand(1));
+    } else if (Name == "avx512.mask.pmov.qd.256" ||
+               Name == "avx512.mask.pmov.qd.512" ||
+               Name == "avx512.mask.pmov.wb.256" ||
+               Name == "avx512.mask.pmov.wb.512") {
+      Type *Ty = CI->getArgOperand(1)->getType();
+      Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
+      Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
+                          CI->getArgOperand(1));
     } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
                          Name == "avx2.vbroadcasti128")) {
       // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
@@ -2111,7 +2285,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       Type *VT = VectorType::get(EltTy, NumSrcElts);
       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
                                             PointerType::getUnqual(VT));
-      Value *Load = Builder.CreateAlignedLoad(Op, 1);
+      Value *Load = Builder.CreateAlignedLoad(VT, Op, 1);
       if (NumSrcElts == 2)
         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
                                           { 0, 1, 0, 1 });
@@ -2857,28 +3031,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       // Convert the type of the pointer to a pointer to the stored type.
       Value *BC =
           Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
-      LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
+      LoadInst *LI = Builder.CreateAlignedLoad(VTy, BC, VTy->getBitWidth() / 8);
       LI->setMetadata(M->getMDKindID("nontemporal"), Node);
       Rep = LI;
-    } else if (IsX86 &&
-               (Name.startswith("sse2.pavg") || Name.startswith("avx2.pavg") ||
-                Name.startswith("avx512.mask.pavg"))) {
-      // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w,
-      // llvm.x86.avx512.mask.pavg.b/w
-      Value *A = CI->getArgOperand(0);
-      Value *B = CI->getArgOperand(1);
-      VectorType *ZextType = VectorType::getExtendedElementVectorType(
-          cast<VectorType>(A->getType()));
-      Value *ExtendedA = Builder.CreateZExt(A, ZextType);
-      Value *ExtendedB = Builder.CreateZExt(B, ZextType);
-      Value *Sum = Builder.CreateAdd(ExtendedA, ExtendedB);
-      Value *AddOne = Builder.CreateAdd(Sum, ConstantInt::get(ZextType, 1));
-      Value *ShiftR = Builder.CreateLShr(AddOne, ConstantInt::get(ZextType, 1));
-      Rep = Builder.CreateTrunc(ShiftR, A->getType());
-      if (CI->getNumArgOperands() > 2) {
-        Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
-                            CI->getArgOperand(2));
-      }
     } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
                          Name.startswith("fma.vfmsub.") ||
                          Name.startswith("fma.vfnmadd.") ||
@@ -3273,6 +3428,12 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       Value *Cmp = Builder.CreateICmpSGE(
           Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
       Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
+    } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
+                          Name.startswith("atomic.load.add.f64.p"))) {
+      Value *Ptr = CI->getArgOperand(0);
+      Value *Val = CI->getArgOperand(1);
+      Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val,
+                                    AtomicOrdering::SequentiallyConsistent);
     } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
                           Name == "max.ui" || Name == "max.ull")) {
       Value *Arg0 = CI->getArgOperand(0);
@@ -3334,7 +3495,28 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
     DefaultCase();
     return;
   }
-
+  case Intrinsic::experimental_vector_reduce_v2_fmul: {
+    SmallVector<Value *, 2> Args;
+    if (CI->isFast())
+      Args.push_back(ConstantFP::get(CI->getOperand(0)->getType(), 1.0));
+    else
+      Args.push_back(CI->getOperand(0));
+    Args.push_back(CI->getOperand(1));
+    NewCall = Builder.CreateCall(NewFn, Args);
+    cast<Instruction>(NewCall)->copyFastMathFlags(CI);
+    break;
+  }
+  case Intrinsic::experimental_vector_reduce_v2_fadd: {
+    SmallVector<Value *, 2> Args;
+    if (CI->isFast())
+      Args.push_back(Constant::getNullValue(CI->getOperand(0)->getType()));
+    else
+      Args.push_back(CI->getOperand(0));
+    Args.push_back(CI->getOperand(1));
+    NewCall = Builder.CreateCall(NewFn, Args);
+    cast<Instruction>(NewCall)->copyFastMathFlags(CI);
+    break;
+  }
   case Intrinsic::arm_neon_vld1:
   case Intrinsic::arm_neon_vld2:
   case Intrinsic::arm_neon_vld3:
@@ -3371,8 +3553,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
     Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
                                    ? Builder.getFalse()
                                    : CI->getArgOperand(2);
+    Value *Dynamic =
+        CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
     NewCall = Builder.CreateCall(
-        NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
+        NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
     break;
   }
 
@@ -3649,8 +3833,8 @@ bool llvm::UpgradeDebugInfo(Module &M) {
 
 bool llvm::UpgradeRetainReleaseMarker(Module &M) {
   bool Changed = false;
-  NamedMDNode *ModRetainReleaseMarker =
-      M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker");
+  const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
+  NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
   if (ModRetainReleaseMarker) {
     MDNode *Op = ModRetainReleaseMarker->getOperand(0);
     if (Op) {
@@ -3660,11 +3844,11 @@ bool llvm::UpgradeRetainReleaseMarker(Module &M) {
         ID->getString().split(ValueComp, "#");
         if (ValueComp.size() == 2) {
           std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
-          Metadata *Ops[1] = {MDString::get(M.getContext(), NewValue)};
-          ModRetainReleaseMarker->setOperand(0,
-                                             MDNode::get(M.getContext(), Ops));
-          Changed = true;
+          ID = MDString::get(M.getContext(), NewValue);
         }
+        M.addModuleFlag(Module::Error, MarkerKey, ID);
+        M.eraseNamedMetadata(ModRetainReleaseMarker);
+        Changed = true;
       }
     }
   }
diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp
index 375924360dda..34410712645d 100644
--- a/lib/IR/BasicBlock.cpp
+++ b/lib/IR/BasicBlock.cpp
@@ -1,9 +1,8 @@
 //===-- BasicBlock.cpp - Implement BasicBlock related methods -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -300,7 +299,7 @@ iterator_range<BasicBlock::phi_iterator> BasicBlock::phis() {
 /// called while the predecessor still refers to this block.
 ///
 void BasicBlock::removePredecessor(BasicBlock *Pred,
-                                   bool DontDeleteUselessPHIs) {
+                                   bool KeepOneInputPHIs) {
   assert((hasNUsesOrMore(16)||// Reduce cost of this assertion for complex CFGs.
           find(pred_begin(this), pred_end(this), Pred) != pred_end(this)) &&
          "removePredecessor: BB is not a predecessor!");
@@ -331,11 +330,11 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
   }
 
   // <= Two predecessors BEFORE I remove one?
-  if (max_idx <= 2 && !DontDeleteUselessPHIs) {
+  if (max_idx <= 2 && !KeepOneInputPHIs) {
     // Yup, loop through and nuke the PHI nodes
     while (PHINode *PN = dyn_cast<PHINode>(&front())) {
       // Remove the predecessor first.
-      PN->removeIncomingValue(Pred, !DontDeleteUselessPHIs);
+      PN->removeIncomingValue(Pred, !KeepOneInputPHIs);
 
       // If the PHI _HAD_ two uses, replace PHI node with its now *single* value
       if (max_idx == 2) {
@@ -360,7 +359,7 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
       // If all incoming values to the Phi are the same, we can replace the Phi
       // with that value.
       Value* PNV = nullptr;
-      if (!DontDeleteUselessPHIs && (PNV = PN->hasConstantValue()))
+      if (!KeepOneInputPHIs && (PNV = PN->hasConstantValue()))
         if (PNV != PN) {
           PN->replaceAllUsesWith(PNV);
           PN->eraseFromParent();
@@ -426,41 +425,37 @@ BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
   // Now we must loop through all of the successors of the New block (which
   // _were_ the successors of the 'this' block), and update any PHI nodes in
   // successors.  If there were PHI nodes in the successors, then they need to
-  // know that incoming branches will be from New, not from Old.
+  // know that incoming branches will be from New, not from Old (this).
   //
-  for (succ_iterator I = succ_begin(New), E = succ_end(New); I != E; ++I) {
-    // Loop over any phi nodes in the basic block, updating the BB field of
-    // incoming values...
-    BasicBlock *Successor = *I;
-    for (auto &PN : Successor->phis()) {
-      int Idx = PN.getBasicBlockIndex(this);
-      while (Idx != -1) {
-        PN.setIncomingBlock((unsigned)Idx, New);
-        Idx = PN.getBasicBlockIndex(this);
-      }
-    }
-  }
+  New->replaceSuccessorsPhiUsesWith(this, New);
   return New;
 }
 
-void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
+void BasicBlock::replacePhiUsesWith(BasicBlock *Old, BasicBlock *New) {
+  // N.B. This might not be a complete BasicBlock, so don't assume
+  // that it ends with a non-phi instruction.
+  for (iterator II = begin(), IE = end(); II != IE; ++II) {
+    PHINode *PN = dyn_cast<PHINode>(II);
+    if (!PN)
+      break;
+    PN->replaceIncomingBlockWith(Old, New);
+  }
+}
+
+void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *Old,
+                                              BasicBlock *New) {
   Instruction *TI = getTerminator();
   if (!TI)
     // Cope with being called on a BasicBlock that doesn't have a terminator
     // yet. Clang's CodeGenFunction::EmitReturnBlock() likes to do this.
     return;
-  for (BasicBlock *Succ : successors(TI)) {
-    // N.B. Succ might not be a complete BasicBlock, so don't assume
-    // that it ends with a non-phi instruction.
-    for (iterator II = Succ->begin(), IE = Succ->end(); II != IE; ++II) {
-      PHINode *PN = dyn_cast<PHINode>(II);
-      if (!PN)
-        break;
-      int i;
-      while ((i = PN->getBasicBlockIndex(this)) >= 0)
-        PN->setIncomingBlock(i, New);
-    }
-  }
+  llvm::for_each(successors(TI), [Old, New](BasicBlock *Succ) {
+    Succ->replacePhiUsesWith(Old, New);
+  });
+}
+
+void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
+  this->replaceSuccessorsPhiUsesWith(this, New);
 }
 
 /// Return true if this basic block is a landing pad. I.e., it's
diff --git a/lib/IR/Comdat.cpp b/lib/IR/Comdat.cpp
index 3b1f7d62cdae..c9f715daf457 100644
--- a/lib/IR/Comdat.cpp
+++ b/lib/IR/Comdat.cpp
@@ -1,9 +1,8 @@
 //===- Comdat.cpp - Implement Metadata classes ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp
index 57de6b042303..835fbb3443b8 100644
--- a/lib/IR/ConstantFold.cpp
+++ b/lib/IR/ConstantFold.cpp
@@ -1,9 +1,8 @@
 //===- ConstantFold.cpp - LLVM constant folder ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -27,6 +26,7 @@
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -268,19 +268,20 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
     ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1));
     if (!Amt)
       return nullptr;
-    unsigned ShAmt = Amt->getZExtValue();
+    APInt ShAmt = Amt->getValue();
     // Cannot analyze non-byte shifts.
     if ((ShAmt & 7) != 0)
       return nullptr;
-    ShAmt >>= 3;
+    ShAmt.lshrInPlace(3);
 
     // If the extract is known to be all zeros, return zero.
-    if (ByteStart >= CSize-ShAmt)
-      return Constant::getNullValue(IntegerType::get(CE->getContext(),
-                                                     ByteSize*8));
+    if (ShAmt.uge(CSize - ByteStart))
+      return Constant::getNullValue(
+          IntegerType::get(CE->getContext(), ByteSize * 8));
     // If the extract is known to be fully in the input, extract it.
-    if (ByteStart+ByteSize+ShAmt <= CSize)
-      return ExtractConstantBytes(CE->getOperand(0), ByteStart+ShAmt, ByteSize);
+    if (ShAmt.ule(CSize - (ByteStart + ByteSize)))
+      return ExtractConstantBytes(CE->getOperand(0),
+                                  ByteStart + ShAmt.getZExtValue(), ByteSize);
 
     // TODO: Handle the 'partially zero' case.
     return nullptr;
@@ -290,19 +291,20 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
     ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1));
     if (!Amt)
       return nullptr;
-    unsigned ShAmt = Amt->getZExtValue();
+    APInt ShAmt = Amt->getValue();
     // Cannot analyze non-byte shifts.
     if ((ShAmt & 7) != 0)
       return nullptr;
-    ShAmt >>= 3;
+    ShAmt.lshrInPlace(3);
 
     // If the extract is known to be all zeros, return zero.
-    if (ByteStart+ByteSize <= ShAmt)
-      return Constant::getNullValue(IntegerType::get(CE->getContext(),
-                                                     ByteSize*8));
+    if (ShAmt.uge(ByteStart + ByteSize))
+      return Constant::getNullValue(
+          IntegerType::get(CE->getContext(), ByteSize * 8));
     // If the extract is known to be fully in the input, extract it.
-    if (ByteStart >= ShAmt)
-      return ExtractConstantBytes(CE->getOperand(0), ByteStart-ShAmt, ByteSize);
+    if (ShAmt.ule(ByteStart))
+      return ExtractConstantBytes(CE->getOperand(0),
+                                  ByteStart - ShAmt.getZExtValue(), ByteSize);
 
     // TODO: Handle the 'partially zero' case.
     return nullptr;
@@ -916,6 +918,52 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
   return ConstantVector::get(Result);
 }
 
+Constant *llvm::ConstantFoldUnaryInstruction(unsigned Opcode, Constant *C) {
+  assert(Instruction::isUnaryOp(Opcode) && "Non-unary instruction detected");
+
+  // Handle scalar UndefValue. Vectors are always evaluated per element.
+  bool HasScalarUndef = !C->getType()->isVectorTy() && isa<UndefValue>(C);
+
+  if (HasScalarUndef) {
+    switch (static_cast<Instruction::UnaryOps>(Opcode)) {
+    case Instruction::FNeg:
+      return C; // -undef -> undef
+    case Instruction::UnaryOpsEnd:
+      llvm_unreachable("Invalid UnaryOp");
+    }
+  }
+
+  // Constant should not be UndefValue, unless these are vector constants.
+  assert(!HasScalarUndef && "Unexpected UndefValue");
+  // We only have FP UnaryOps right now.
+  assert(!isa<ConstantInt>(C) && "Unexpected Integer UnaryOp");
+
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+    const APFloat &CV = CFP->getValueAPF();
+    switch (Opcode) {
+    default:
+      break;
+    case Instruction::FNeg:
+      return ConstantFP::get(C->getContext(), neg(CV));
+    }
+  } else if (VectorType *VTy = dyn_cast<VectorType>(C->getType())) {
+    // Fold each element and create a vector constant from those constants.
+    SmallVector<Constant*, 16> Result;
+    Type *Ty = IntegerType::get(VTy->getContext(), 32);
+    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+      Constant *ExtractIdx = ConstantInt::get(Ty, i);
+      Constant *Elt = ConstantExpr::getExtractElement(C, ExtractIdx);
+
+      Result.push_back(ConstantExpr::get(Opcode, Elt));
+    }
+
+    return ConstantVector::get(Result);
+  }
+
+  // We don't know how to fold this.
+  return nullptr;
+}
+
 Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
                                               Constant *C2) {
   assert(Instruction::isBinaryOp(Opcode) && "Non-binary instruction detected");
@@ -1077,10 +1125,29 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
             isa<GlobalValue>(CE1->getOperand(0))) {
           GlobalValue *GV = cast<GlobalValue>(CE1->getOperand(0));
 
-          // Functions are at least 4-byte aligned.
-          unsigned GVAlign = GV->getAlignment();
-          if (isa<Function>(GV))
-            GVAlign = std::max(GVAlign, 4U);
+          unsigned GVAlign;
+
+          if (Module *TheModule = GV->getParent()) {
+            GVAlign = GV->getPointerAlignment(TheModule->getDataLayout());
+
+            // If the function alignment is not specified then assume that it
+            // is 4.
+            // This is dangerous; on x86, the alignment of the pointer
+            // corresponds to the alignment of the function, but might be less
+            // than 4 if it isn't explicitly specified.
+            // However, a fix for this behaviour was reverted because it
+            // increased code size (see https://reviews.llvm.org/D55115)
+            // FIXME: This code should be deleted once existing targets have
+            // appropriate defaults
+            if (GVAlign == 0U && isa<Function>(GV))
+              GVAlign = 4U;
+          } else if (isa<Function>(GV)) {
+            // Without a datalayout we have to assume the worst case: that the
+            // function pointer isn't aligned at all.
+            GVAlign = 0U;
+          } else {
+            GVAlign = GV->getAlignment();
+          }
 
           if (GVAlign > 1) {
             unsigned DstWidth = CI2->getType()->getBitWidth();
@@ -1360,8 +1427,9 @@ static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) {
   assert(V1->getType() == V2->getType() &&
          "Cannot compare values of different types!");
 
-  // Handle degenerate case quickly
-  if (V1 == V2) return FCmpInst::FCMP_OEQ;
+  // We do not know if a constant expression will evaluate to a number or NaN.
+  // Therefore, we can only say that the relation is unordered or equal.
+  if (V1 == V2) return FCmpInst::FCMP_UEQ;
 
   if (!isa<ConstantExpr>(V1)) {
     if (!isa<ConstantExpr>(V2)) {
@@ -1552,7 +1620,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
     case Instruction::ZExt:
     case Instruction::SExt:
       // We can't evaluate floating point casts or truncations.
-      if (CE1Op0->getType()->isFloatingPointTy())
+      if (CE1Op0->getType()->isFPOrFPVectorTy())
         break;
 
       // If the cast is not actually changing bits, and the second operand is a
@@ -1856,7 +1924,6 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
     default: llvm_unreachable("Unknown relation!");
     case FCmpInst::FCMP_UNO:
     case FCmpInst::FCMP_ORD:
-    case FCmpInst::FCMP_UEQ:
     case FCmpInst::FCMP_UNE:
     case FCmpInst::FCMP_ULT:
     case FCmpInst::FCMP_UGT:
@@ -1902,6 +1969,13 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
       else if (pred == FCmpInst::FCMP_ONE || pred == FCmpInst::FCMP_UNE)
         Result = 1;
       break;
+    case FCmpInst::FCMP_UEQ: // We know that C1 == C2 || isUnordered(C1, C2).
+      // We can only partially decide this relation.
+      if (pred == FCmpInst::FCMP_ONE)
+        Result = 0;
+      else if (pred == FCmpInst::FCMP_UEQ)
+        Result = 1;
+      break;
     }
 
     // If we evaluated the result, return it now.
@@ -1981,11 +2055,13 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
 
     // If the right hand side is a bitcast, try using its inverse to simplify
     // it by moving it to the left hand side.  We can't do this if it would turn
-    // a vector compare into a scalar compare or visa versa.
+    // a vector compare into a scalar compare or visa versa, or if it would turn
+    // the operands into FP values.
     if (ConstantExpr *CE2 = dyn_cast<ConstantExpr>(C2)) {
       Constant *CE2Op0 = CE2->getOperand(0);
       if (CE2->getOpcode() == Instruction::BitCast &&
-          CE2->getType()->isVectorTy() == CE2Op0->getType()->isVectorTy()) {
+          CE2->getType()->isVectorTy() == CE2Op0->getType()->isVectorTy() &&
+          !CE2Op0->getType()->isFPOrFPVectorTy()) {
         Constant *Inverse = ConstantExpr::getBitCast(C1, CE2Op0->getType());
         return ConstantExpr::getICmp(pred, Inverse, CE2Op0);
       }
@@ -2072,7 +2148,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
   if (Idxs.empty()) return C;
 
   Type *GEPTy = GetElementPtrInst::getGEPReturnType(
-      C, makeArrayRef((Value *const *)Idxs.data(), Idxs.size()));
+      PointeeTy, C, makeArrayRef((Value *const *)Idxs.data(), Idxs.size()));
 
   if (isa<UndefValue>(C))
     return UndefValue::get(GEPTy);
diff --git a/lib/IR/ConstantFold.h b/lib/IR/ConstantFold.h
index 2d8de1132b96..9ad6e14e9e40 100644
--- a/lib/IR/ConstantFold.h
+++ b/lib/IR/ConstantFold.h
@@ -1,9 +1,8 @@
 //===-- ConstantFolding.h - Internal Constant Folding Interface -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -44,6 +43,7 @@ template <typename T> class ArrayRef;
                                                 ArrayRef<unsigned> Idxs);
   Constant *ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val,
                                                ArrayRef<unsigned> Idxs);
+  Constant *ConstantFoldUnaryInstruction(unsigned Opcode, Constant *V);
   Constant *ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1,
                                           Constant *V2);
   Constant *ConstantFoldCompareInstruction(unsigned short predicate,
diff --git a/lib/IR/ConstantRange.cpp b/lib/IR/ConstantRange.cpp
index 39a0b13c4e0c..920fdc01a14f 100644
--- a/lib/IR/ConstantRange.cpp
+++ b/lib/IR/ConstantRange.cpp
@@ -1,9 +1,8 @@
 //===- ConstantRange.cpp - ConstantRange implementation -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -32,6 +31,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
@@ -54,6 +54,26 @@ ConstantRange::ConstantRange(APInt L, APInt U)
          "Lower == Upper, but they aren't min or max value!");
 }
 
+ConstantRange ConstantRange::fromKnownBits(const KnownBits &Known,
+                                           bool IsSigned) {
+  assert(!Known.hasConflict() && "Expected valid KnownBits");
+
+  if (Known.isUnknown())
+    return getFull(Known.getBitWidth());
+
+  // For unsigned ranges, or signed ranges with known sign bit, create a simple
+  // range between the smallest and largest possible value.
+  if (!IsSigned || Known.isNegative() || Known.isNonNegative())
+    return ConstantRange(Known.One, ~Known.Zero + 1);
+
+  // If we don't know the sign bit, pick the lower bound as a negative number
+  // and the upper bound as a non-negative one.
+  APInt Lower = Known.One, Upper = ~Known.Zero;
+  Lower.setSignBit();
+  Upper.clearSignBit();
+  return ConstantRange(Lower, Upper + 1);
+}
+
 ConstantRange ConstantRange::makeAllowedICmpRegion(CmpInst::Predicate Pred,
                                                    const ConstantRange &CR) {
   if (CR.isEmptySet())
@@ -68,55 +88,39 @@ ConstantRange ConstantRange::makeAllowedICmpRegion(CmpInst::Predicate Pred,
   case CmpInst::ICMP_NE:
     if (CR.isSingleElement())
       return ConstantRange(CR.getUpper(), CR.getLower());
-    return ConstantRange(W);
+    return getFull(W);
   case CmpInst::ICMP_ULT: {
     APInt UMax(CR.getUnsignedMax());
     if (UMax.isMinValue())
-      return ConstantRange(W, /* empty */ false);
+      return getEmpty(W);
     return ConstantRange(APInt::getMinValue(W), std::move(UMax));
   }
   case CmpInst::ICMP_SLT: {
     APInt SMax(CR.getSignedMax());
     if (SMax.isMinSignedValue())
-      return ConstantRange(W, /* empty */ false);
+      return getEmpty(W);
     return ConstantRange(APInt::getSignedMinValue(W), std::move(SMax));
   }
-  case CmpInst::ICMP_ULE: {
-    APInt UMax(CR.getUnsignedMax());
-    if (UMax.isMaxValue())
-      return ConstantRange(W);
-    return ConstantRange(APInt::getMinValue(W), std::move(UMax) + 1);
-  }
-  case CmpInst::ICMP_SLE: {
-    APInt SMax(CR.getSignedMax());
-    if (SMax.isMaxSignedValue())
-      return ConstantRange(W);
-    return ConstantRange(APInt::getSignedMinValue(W), std::move(SMax) + 1);
-  }
+  case CmpInst::ICMP_ULE:
+    return getNonEmpty(APInt::getMinValue(W), CR.getUnsignedMax() + 1);
+  case CmpInst::ICMP_SLE:
+    return getNonEmpty(APInt::getSignedMinValue(W), CR.getSignedMax() + 1);
   case CmpInst::ICMP_UGT: {
     APInt UMin(CR.getUnsignedMin());
     if (UMin.isMaxValue())
-      return ConstantRange(W, /* empty */ false);
+      return getEmpty(W);
     return ConstantRange(std::move(UMin) + 1, APInt::getNullValue(W));
   }
   case CmpInst::ICMP_SGT: {
     APInt SMin(CR.getSignedMin());
     if (SMin.isMaxSignedValue())
-      return ConstantRange(W, /* empty */ false);
+      return getEmpty(W);
     return ConstantRange(std::move(SMin) + 1, APInt::getSignedMinValue(W));
   }
-  case CmpInst::ICMP_UGE: {
-    APInt UMin(CR.getUnsignedMin());
-    if (UMin.isMinValue())
-      return ConstantRange(W);
-    return ConstantRange(std::move(UMin), APInt::getNullValue(W));
-  }
-  case CmpInst::ICMP_SGE: {
-    APInt SMin(CR.getSignedMin());
-    if (SMin.isMinSignedValue())
-      return ConstantRange(W);
-    return ConstantRange(std::move(SMin), APInt::getSignedMinValue(W));
-  }
+  case CmpInst::ICMP_UGE:
+    return getNonEmpty(CR.getUnsignedMin(), APInt::getNullValue(W));
+  case CmpInst::ICMP_SGE:
+    return getNonEmpty(CR.getSignedMin(), APInt::getSignedMinValue(W));
   }
 }
 
@@ -176,146 +180,106 @@ bool ConstantRange::getEquivalentICmp(CmpInst::Predicate &Pred,
   return Success;
 }
 
+/// Exact mul nuw region for single element RHS.
+static ConstantRange makeExactMulNUWRegion(const APInt &V) {
+  unsigned BitWidth = V.getBitWidth();
+  if (V == 0)
+    return ConstantRange::getFull(V.getBitWidth());
+
+  return ConstantRange::getNonEmpty(
+      APIntOps::RoundingUDiv(APInt::getMinValue(BitWidth), V,
+                             APInt::Rounding::UP),
+      APIntOps::RoundingUDiv(APInt::getMaxValue(BitWidth), V,
+                             APInt::Rounding::DOWN) + 1);
+}
+
+/// Exact mul nsw region for single element RHS.
+static ConstantRange makeExactMulNSWRegion(const APInt &V) {
+  // Handle special case for 0, -1 and 1. See the last for reason why we
+  // specialize -1 and 1.
+  unsigned BitWidth = V.getBitWidth();
+  if (V == 0 || V.isOneValue())
+    return ConstantRange::getFull(BitWidth);
+
+  APInt MinValue = APInt::getSignedMinValue(BitWidth);
+  APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
+  // e.g. Returning [-127, 127], represented as [-127, -128).
+  if (V.isAllOnesValue())
+    return ConstantRange(-MaxValue, MinValue);
+
+  APInt Lower, Upper;
+  if (V.isNegative()) {
+    Lower = APIntOps::RoundingSDiv(MaxValue, V, APInt::Rounding::UP);
+    Upper = APIntOps::RoundingSDiv(MinValue, V, APInt::Rounding::DOWN);
+  } else {
+    Lower = APIntOps::RoundingSDiv(MinValue, V, APInt::Rounding::UP);
+    Upper = APIntOps::RoundingSDiv(MaxValue, V, APInt::Rounding::DOWN);
+  }
+  // ConstantRange ctor take a half inclusive interval [Lower, Upper + 1).
+  // Upper + 1 is guaranteed not to overflow, because |divisor| > 1. 0, -1,
+  // and 1 are already handled as special cases.
+  return ConstantRange(Lower, Upper + 1);
+}
+
 ConstantRange
 ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp,
                                           const ConstantRange &Other,
                                           unsigned NoWrapKind) {
   using OBO = OverflowingBinaryOperator;
 
-  // Computes the intersection of CR0 and CR1.  It is different from
-  // intersectWith in that the ConstantRange returned will only contain elements
-  // in both CR0 and CR1 (i.e. SubsetIntersect(X, Y) is a *subset*, proper or
-  // not, of both X and Y).
-  auto SubsetIntersect =
-      [](const ConstantRange &CR0, const ConstantRange &CR1) {
-    return CR0.inverse().unionWith(CR1.inverse()).inverse();
-  };
-
   assert(Instruction::isBinaryOp(BinOp) && "Binary operators only!");
 
   assert((NoWrapKind == OBO::NoSignedWrap ||
-          NoWrapKind == OBO::NoUnsignedWrap ||
-          NoWrapKind == (OBO::NoUnsignedWrap | OBO::NoSignedWrap)) &&
+          NoWrapKind == OBO::NoUnsignedWrap) &&
          "NoWrapKind invalid!");
 
+  bool Unsigned = NoWrapKind == OBO::NoUnsignedWrap;
   unsigned BitWidth = Other.getBitWidth();
-  ConstantRange Result(BitWidth);
 
   switch (BinOp) {
   default:
-    // Conservative answer: empty set
-    return ConstantRange(BitWidth, false);
+    llvm_unreachable("Unsupported binary op");
 
-  case Instruction::Add:
-    if (auto *C = Other.getSingleElement())
-      if (C->isNullValue())
-        // Full set: nothing signed / unsigned wraps when added to 0.
-        return ConstantRange(BitWidth);
-    if (NoWrapKind & OBO::NoUnsignedWrap)
-      Result =
-          SubsetIntersect(Result, ConstantRange(APInt::getNullValue(BitWidth),
-                                                -Other.getUnsignedMax()));
-    if (NoWrapKind & OBO::NoSignedWrap) {
-      const APInt &SignedMin = Other.getSignedMin();
-      const APInt &SignedMax = Other.getSignedMax();
-      if (SignedMax.isStrictlyPositive())
-        Result = SubsetIntersect(
-            Result,
-            ConstantRange(APInt::getSignedMinValue(BitWidth),
-                          APInt::getSignedMinValue(BitWidth) - SignedMax));
-      if (SignedMin.isNegative())
-        Result = SubsetIntersect(
-            Result,
-            ConstantRange(APInt::getSignedMinValue(BitWidth) - SignedMin,
-                          APInt::getSignedMinValue(BitWidth)));
-    }
-    return Result;
+  case Instruction::Add: {
+    if (Unsigned)
+      return getNonEmpty(APInt::getNullValue(BitWidth),
+                         -Other.getUnsignedMax());
+
+    APInt SignedMinVal = APInt::getSignedMinValue(BitWidth);
+    APInt SMin = Other.getSignedMin(), SMax = Other.getSignedMax();
+    return getNonEmpty(
+        SMin.isNegative() ? SignedMinVal - SMin : SignedMinVal,
+        SMax.isStrictlyPositive() ? SignedMinVal - SMax : SignedMinVal);
+  }
 
-  case Instruction::Sub:
-    if (auto *C = Other.getSingleElement())
-      if (C->isNullValue())
-        // Full set: nothing signed / unsigned wraps when subtracting 0.
-        return ConstantRange(BitWidth);
-    if (NoWrapKind & OBO::NoUnsignedWrap)
-      Result =
-          SubsetIntersect(Result, ConstantRange(Other.getUnsignedMax(),
-                                                APInt::getMinValue(BitWidth)));
-    if (NoWrapKind & OBO::NoSignedWrap) {
-      const APInt &SignedMin = Other.getSignedMin();
-      const APInt &SignedMax = Other.getSignedMax();
-      if (SignedMax.isStrictlyPositive())
-        Result = SubsetIntersect(
-            Result,
-            ConstantRange(APInt::getSignedMinValue(BitWidth) + SignedMax,
-                          APInt::getSignedMinValue(BitWidth)));
-      if (SignedMin.isNegative())
-        Result = SubsetIntersect(
-            Result,
-            ConstantRange(APInt::getSignedMinValue(BitWidth),
-                          APInt::getSignedMinValue(BitWidth) + SignedMin));
-    }
-    return Result;
-  case Instruction::Mul: {
-    if (NoWrapKind == (OBO::NoSignedWrap | OBO::NoUnsignedWrap)) {
-      return SubsetIntersect(
-          makeGuaranteedNoWrapRegion(BinOp, Other, OBO::NoSignedWrap),
-          makeGuaranteedNoWrapRegion(BinOp, Other, OBO::NoUnsignedWrap));
-    }
+  case Instruction::Sub: {
+    if (Unsigned)
+      return getNonEmpty(Other.getUnsignedMax(), APInt::getMinValue(BitWidth));
 
-    // Equivalent to calling makeGuaranteedNoWrapRegion() on [V, V+1).
-    const bool Unsigned = NoWrapKind == OBO::NoUnsignedWrap;
-    const auto makeSingleValueRegion = [Unsigned,
-                                        BitWidth](APInt V) -> ConstantRange {
-      // Handle special case for 0, -1 and 1. See the last for reason why we
-      // specialize -1 and 1.
-      if (V == 0 || V.isOneValue())
-        return ConstantRange(BitWidth, true);
-
-      APInt MinValue, MaxValue;
-      if (Unsigned) {
-        MinValue = APInt::getMinValue(BitWidth);
-        MaxValue = APInt::getMaxValue(BitWidth);
-      } else {
-        MinValue = APInt::getSignedMinValue(BitWidth);
-        MaxValue = APInt::getSignedMaxValue(BitWidth);
-      }
-      // e.g. Returning [-127, 127], represented as [-127, -128).
-      if (!Unsigned && V.isAllOnesValue())
-        return ConstantRange(-MaxValue, MinValue);
-
-      APInt Lower, Upper;
-      if (!Unsigned && V.isNegative()) {
-        Lower = APIntOps::RoundingSDiv(MaxValue, V, APInt::Rounding::UP);
-        Upper = APIntOps::RoundingSDiv(MinValue, V, APInt::Rounding::DOWN);
-      } else if (Unsigned) {
-        Lower = APIntOps::RoundingUDiv(MinValue, V, APInt::Rounding::UP);
-        Upper = APIntOps::RoundingUDiv(MaxValue, V, APInt::Rounding::DOWN);
-      } else {
-        Lower = APIntOps::RoundingSDiv(MinValue, V, APInt::Rounding::UP);
-        Upper = APIntOps::RoundingSDiv(MaxValue, V, APInt::Rounding::DOWN);
-      }
-      if (Unsigned) {
-        Lower = Lower.zextOrSelf(BitWidth);
-        Upper = Upper.zextOrSelf(BitWidth);
-      } else {
-        Lower = Lower.sextOrSelf(BitWidth);
-        Upper = Upper.sextOrSelf(BitWidth);
-      }
-      // ConstantRange ctor take a half inclusive interval [Lower, Upper + 1).
-      // Upper + 1 is guanranteed not to overflow, because |divisor| > 1. 0, -1,
-      // and 1 are already handled as special cases.
-      return ConstantRange(Lower, Upper + 1);
-    };
+    APInt SignedMinVal = APInt::getSignedMinValue(BitWidth);
+    APInt SMin = Other.getSignedMin(), SMax = Other.getSignedMax();
+    return getNonEmpty(
+        SMax.isStrictlyPositive() ? SignedMinVal + SMax : SignedMinVal,
+        SMin.isNegative() ? SignedMinVal + SMin : SignedMinVal);
+  }
 
+  case Instruction::Mul:
     if (Unsigned)
-      return makeSingleValueRegion(Other.getUnsignedMax());
+      return makeExactMulNUWRegion(Other.getUnsignedMax());
 
-    return SubsetIntersect(makeSingleValueRegion(Other.getSignedMin()),
-                           makeSingleValueRegion(Other.getSignedMax()));
-  }
+    return makeExactMulNSWRegion(Other.getSignedMin())
+        .intersectWith(makeExactMulNSWRegion(Other.getSignedMax()));
   }
 }
 
+ConstantRange ConstantRange::makeExactNoWrapRegion(Instruction::BinaryOps BinOp,
+                                                   const APInt &Other,
+                                                   unsigned NoWrapKind) {
+  // makeGuaranteedNoWrapRegion() is exact for single-element ranges, as
+  // "for all" and "for any" coincide in this case.
+  return makeGuaranteedNoWrapRegion(BinOp, ConstantRange(Other), NoWrapKind);
+}
+
 bool ConstantRange::isFullSet() const {
   return Lower == Upper && Lower.isMaxValue();
 }
@@ -325,20 +289,19 @@ bool ConstantRange::isEmptySet() const {
 }
 
 bool ConstantRange::isWrappedSet() const {
+  return Lower.ugt(Upper) && !Upper.isNullValue();
+}
+
+bool ConstantRange::isUpperWrapped() const {
   return Lower.ugt(Upper);
 }
 
 bool ConstantRange::isSignWrappedSet() const {
-  return contains(APInt::getSignedMaxValue(getBitWidth())) &&
-         contains(APInt::getSignedMinValue(getBitWidth()));
+  return Lower.sgt(Upper) && !Upper.isMinSignedValue();
 }
 
-APInt ConstantRange::getSetSize() const {
-  if (isFullSet())
-    return APInt::getOneBitSet(getBitWidth()+1, getBitWidth());
-
-  // This is also correct for wrapped sets.
-  return (Upper - Lower).zext(getBitWidth()+1);
+bool ConstantRange::isUpperSignWrapped() const {
+  return Lower.sgt(Upper);
 }
 
 bool
@@ -362,26 +325,41 @@ ConstantRange::isSizeLargerThan(uint64_t MaxSize) const {
   return (Upper - Lower).ugt(MaxSize);
 }
 
+bool ConstantRange::isAllNegative() const {
+  // Empty set is all negative, full set is not.
+  if (isEmptySet())
+    return true;
+  if (isFullSet())
+    return false;
+
+  return !isUpperSignWrapped() && !Upper.isStrictlyPositive();
+}
+
+bool ConstantRange::isAllNonNegative() const {
+  // Empty and full set are automatically treated correctly.
+  return !isSignWrappedSet() && Lower.isNonNegative();
+}
+
 APInt ConstantRange::getUnsignedMax() const {
-  if (isFullSet() || isWrappedSet())
+  if (isFullSet() || isUpperWrapped())
     return APInt::getMaxValue(getBitWidth());
   return getUpper() - 1;
 }
 
 APInt ConstantRange::getUnsignedMin() const {
-  if (isFullSet() || (isWrappedSet() && !getUpper().isNullValue()))
+  if (isFullSet() || isWrappedSet())
     return APInt::getMinValue(getBitWidth());
   return getLower();
 }
 
 APInt ConstantRange::getSignedMax() const {
-  if (isFullSet() || Lower.sgt(Upper))
+  if (isFullSet() || isUpperSignWrapped())
     return APInt::getSignedMaxValue(getBitWidth());
   return getUpper() - 1;
 }
 
 APInt ConstantRange::getSignedMin() const {
-  if (isFullSet() || (Lower.sgt(Upper) && !getUpper().isMinSignedValue()))
+  if (isFullSet() || isSignWrappedSet())
     return APInt::getSignedMinValue(getBitWidth());
   return getLower();
 }
@@ -390,7 +368,7 @@ bool ConstantRange::contains(const APInt &V) const {
   if (Lower == Upper)
     return isFullSet();
 
-  if (!isWrappedSet())
+  if (!isUpperWrapped())
     return Lower.ule(V) && V.ult(Upper);
   return Lower.ule(V) || V.ult(Upper);
 }
@@ -399,14 +377,14 @@ bool ConstantRange::contains(const ConstantRange &Other) const {
   if (isFullSet() || Other.isEmptySet()) return true;
   if (isEmptySet() || Other.isFullSet()) return false;
 
-  if (!isWrappedSet()) {
-    if (Other.isWrappedSet())
+  if (!isUpperWrapped()) {
+    if (Other.isUpperWrapped())
       return false;
 
     return Lower.ule(Other.getLower()) && Other.getUpper().ule(Upper);
   }
 
-  if (!Other.isWrappedSet())
+  if (!Other.isUpperWrapped())
     return Other.getUpper().ule(Upper) ||
            Lower.ule(Other.getLower());
 
@@ -425,7 +403,28 @@ ConstantRange ConstantRange::difference(const ConstantRange &CR) const {
   return intersectWith(CR.inverse());
 }
 
-ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
+static ConstantRange getPreferredRange(
+    const ConstantRange &CR1, const ConstantRange &CR2,
+    ConstantRange::PreferredRangeType Type) {
+  if (Type == ConstantRange::Unsigned) {
+    if (!CR1.isWrappedSet() && CR2.isWrappedSet())
+      return CR1;
+    if (CR1.isWrappedSet() && !CR2.isWrappedSet())
+      return CR2;
+  } else if (Type == ConstantRange::Signed) {
+    if (!CR1.isSignWrappedSet() && CR2.isSignWrappedSet())
+      return CR1;
+    if (CR1.isSignWrappedSet() && !CR2.isSignWrappedSet())
+      return CR2;
+  }
+
+  if (CR1.isSizeStrictlySmallerThan(CR2))
+    return CR1;
+  return CR2;
+}
+
+ConstantRange ConstantRange::intersectWith(const ConstantRange &CR,
+                                           PreferredRangeType Type) const {
   assert(getBitWidth() == CR.getBitWidth() &&
          "ConstantRange types don't agree!");
 
@@ -433,100 +432,134 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
   if (   isEmptySet() || CR.isFullSet()) return *this;
   if (CR.isEmptySet() ||    isFullSet()) return CR;
 
-  if (!isWrappedSet() && CR.isWrappedSet())
-    return CR.intersectWith(*this);
+  if (!isUpperWrapped() && CR.isUpperWrapped())
+    return CR.intersectWith(*this, Type);
 
-  if (!isWrappedSet() && !CR.isWrappedSet()) {
+  if (!isUpperWrapped() && !CR.isUpperWrapped()) {
     if (Lower.ult(CR.Lower)) {
+      // L---U       : this
+      //       L---U : CR
       if (Upper.ule(CR.Lower))
-        return ConstantRange(getBitWidth(), false);
+        return getEmpty();
 
+      // L---U       : this
+      //   L---U     : CR
       if (Upper.ult(CR.Upper))
         return ConstantRange(CR.Lower, Upper);
 
+      // L-------U   : this
+      //   L---U     : CR
       return CR;
     }
+    //   L---U     : this
+    // L-------U   : CR
     if (Upper.ult(CR.Upper))
       return *this;
 
+    //   L-----U   : this
+    // L-----U     : CR
     if (Lower.ult(CR.Upper))
       return ConstantRange(Lower, CR.Upper);
 
-    return ConstantRange(getBitWidth(), false);
+    //       L---U : this
+    // L---U       : CR
+    return getEmpty();
   }
 
-  if (isWrappedSet() && !CR.isWrappedSet()) {
+  if (isUpperWrapped() && !CR.isUpperWrapped()) {
     if (CR.Lower.ult(Upper)) {
+      // ------U   L--- : this
+      //  L--U          : CR
       if (CR.Upper.ult(Upper))
         return CR;
 
+      // ------U   L--- : this
+      //  L------U      : CR
       if (CR.Upper.ule(Lower))
         return ConstantRange(CR.Lower, Upper);
 
-      if (isSizeStrictlySmallerThan(CR))
-        return *this;
-      return CR;
+      // ------U   L--- : this
+      //  L----------U  : CR
+      return getPreferredRange(*this, CR, Type);
     }
     if (CR.Lower.ult(Lower)) {
+      // --U      L---- : this
+      //     L--U       : CR
       if (CR.Upper.ule(Lower))
-        return ConstantRange(getBitWidth(), false);
+        return getEmpty();
 
+      // --U      L---- : this
+      //     L------U   : CR
       return ConstantRange(Lower, CR.Upper);
     }
+
+    // --U  L------ : this
+    //        L--U  : CR
     return CR;
   }
 
   if (CR.Upper.ult(Upper)) {
-    if (CR.Lower.ult(Upper)) {
-      if (isSizeStrictlySmallerThan(CR))
-        return *this;
-      return CR;
-    }
+    // ------U L-- : this
+    // --U L------ : CR
+    if (CR.Lower.ult(Upper))
+      return getPreferredRange(*this, CR, Type);
 
+    // ----U   L-- : this
+    // --U   L---- : CR
     if (CR.Lower.ult(Lower))
       return ConstantRange(Lower, CR.Upper);
 
+    // ----U L---- : this
+    // --U     L-- : CR
     return CR;
   }
   if (CR.Upper.ule(Lower)) {
+    // --U     L-- : this
+    // ----U L---- : CR
     if (CR.Lower.ult(Lower))
       return *this;
 
+    // --U   L---- : this
+    // ----U   L-- : CR
     return ConstantRange(CR.Lower, Upper);
   }
-  if (isSizeStrictlySmallerThan(CR))
-    return *this;
-  return CR;
+
+  // --U L------ : this
+  // ------U L-- : CR
+  return getPreferredRange(*this, CR, Type);
 }
 
-ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
+ConstantRange ConstantRange::unionWith(const ConstantRange &CR,
+                                       PreferredRangeType Type) const {
   assert(getBitWidth() == CR.getBitWidth() &&
          "ConstantRange types don't agree!");
 
   if (   isFullSet() || CR.isEmptySet()) return *this;
   if (CR.isFullSet() ||    isEmptySet()) return CR;
 
-  if (!isWrappedSet() && CR.isWrappedSet()) return CR.unionWith(*this);
+  if (!isUpperWrapped() && CR.isUpperWrapped())
+    return CR.unionWith(*this, Type);
 
-  if (!isWrappedSet() && !CR.isWrappedSet()) {
-    if (CR.Upper.ult(Lower) || Upper.ult(CR.Lower)) {
-      // If the two ranges are disjoint, find the smaller gap and bridge it.
-      APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper;
-      if (d1.ult(d2))
-        return ConstantRange(Lower, CR.Upper);
-      return ConstantRange(CR.Lower, Upper);
-    }
+  if (!isUpperWrapped() && !CR.isUpperWrapped()) {
+    //        L---U  and  L---U        : this
+    //  L---U                   L---U  : CR
+    // result in one of
+    //  L---------U
+    // -----U L-----
+    if (CR.Upper.ult(Lower) || Upper.ult(CR.Lower))
+      return getPreferredRange(
+          ConstantRange(Lower, CR.Upper), ConstantRange(CR.Lower, Upper), Type);
 
     APInt L = CR.Lower.ult(Lower) ? CR.Lower : Lower;
     APInt U = (CR.Upper - 1).ugt(Upper - 1) ? CR.Upper : Upper;
 
     if (L.isNullValue() && U.isNullValue())
-      return ConstantRange(getBitWidth());
+      return getFull();
 
     return ConstantRange(std::move(L), std::move(U));
   }
 
-  if (!CR.isWrappedSet()) {
+  if (!CR.isUpperWrapped()) {
     // ------U   L-----  and  ------U   L----- : this
     //   L--U                            L--U  : CR
     if (CR.Upper.ule(Upper) || CR.Lower.uge(Lower))
@@ -535,26 +568,25 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
     // ------U   L----- : this
     //    L---------U   : CR
     if (CR.Lower.ule(Upper) && Lower.ule(CR.Upper))
-      return ConstantRange(getBitWidth());
+      return getFull();
 
     // ----U       L---- : this
     //       L---U       : CR
-    //    <d1>  <d2>
-    if (Upper.ule(CR.Lower) && CR.Upper.ule(Lower)) {
-      APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper;
-      if (d1.ult(d2))
-        return ConstantRange(Lower, CR.Upper);
-      return ConstantRange(CR.Lower, Upper);
-    }
+    // results in one of
+    // ----------U L----
+    // ----U L----------
+    if (Upper.ult(CR.Lower) && CR.Upper.ult(Lower))
+      return getPreferredRange(
+          ConstantRange(Lower, CR.Upper), ConstantRange(CR.Lower, Upper), Type);
 
     // ----U     L----- : this
     //        L----U    : CR
-    if (Upper.ult(CR.Lower) && Lower.ult(CR.Upper))
+    if (Upper.ult(CR.Lower) && Lower.ule(CR.Upper))
       return ConstantRange(CR.Lower, Upper);
 
     // ------U    L---- : this
     //    L-----U       : CR
-    assert(CR.Lower.ult(Upper) && CR.Upper.ult(Lower) &&
+    assert(CR.Lower.ule(Upper) && CR.Upper.ult(Lower) &&
            "ConstantRange::unionWith missed a case with one range wrapped");
     return ConstantRange(Lower, CR.Upper);
   }
@@ -562,7 +594,7 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
   // ------U    L----  and  ------U    L---- : this
   // -U  L-----------  and  ------------U  L : CR
   if (CR.Lower.ule(Upper) || Lower.ule(CR.Upper))
-    return ConstantRange(getBitWidth());
+    return getFull();
 
   APInt L = CR.Lower.ult(Lower) ? CR.Lower : Lower;
   APInt U = CR.Upper.ugt(Upper) ? CR.Upper : Upper;
@@ -588,7 +620,7 @@ ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp,
     if (getBitWidth() == ResultBitWidth)
       return *this;
     else
-      return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+      return getFull();
   case Instruction::UIToFP: {
     // TODO: use input range if available
     auto BW = getBitWidth();
@@ -608,17 +640,17 @@ ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp,
   case Instruction::IntToPtr:
   case Instruction::PtrToInt:
   case Instruction::AddrSpaceCast:
-    // Conservatively return full set.
-    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+    // Conservatively return getFull set.
+    return getFull();
   };
 }
 
 ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const {
-  if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false);
+  if (isEmptySet()) return getEmpty(DstTySize);
 
   unsigned SrcTySize = getBitWidth();
   assert(SrcTySize < DstTySize && "Not a value extension");
-  if (isFullSet() || isWrappedSet()) {
+  if (isFullSet() || isUpperWrapped()) {
     // Change into [0, 1 << src bit width)
     APInt LowerExt(DstTySize, 0);
     if (!Upper) // special case: [X, 0) -- not really wrapping around
@@ -631,7 +663,7 @@ ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const {
 }
 
 ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const {
-  if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false);
+  if (isEmptySet()) return getEmpty(DstTySize);
 
   unsigned SrcTySize = getBitWidth();
   assert(SrcTySize < DstTySize && "Not a value extension");
@@ -651,9 +683,9 @@ ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const {
 ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
   assert(getBitWidth() > DstTySize && "Not a value truncation");
   if (isEmptySet())
-    return ConstantRange(DstTySize, /*isFullSet=*/false);
+    return getEmpty(DstTySize);
   if (isFullSet())
-    return ConstantRange(DstTySize, /*isFullSet=*/true);
+    return getFull(DstTySize);
 
   APInt LowerDiv(Lower), UpperDiv(Upper);
   ConstantRange Union(DstTySize, /*isFullSet=*/false);
@@ -661,12 +693,12 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
   // Analyze wrapped sets in their two parts: [0, Upper) \/ [Lower, MaxValue]
   // We use the non-wrapped set code to analyze the [Lower, MaxValue) part, and
   // then we do the union with [MaxValue, Upper)
-  if (isWrappedSet()) {
+  if (isUpperWrapped()) {
     // If Upper is greater than or equal to MaxValue(DstTy), it covers the whole
     // truncated range.
     if (Upper.getActiveBits() > DstTySize ||
         Upper.countTrailingOnes() == DstTySize)
-      return ConstantRange(DstTySize, /*isFullSet=*/true);
+      return getFull(DstTySize);
 
     Union = ConstantRange(APInt::getMaxValue(DstTySize),Upper.trunc(DstTySize));
     UpperDiv.setAllBits();
@@ -699,7 +731,7 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
                            UpperDiv.trunc(DstTySize)).unionWith(Union);
   }
 
-  return ConstantRange(DstTySize, /*isFullSet=*/true);
+  return getFull(DstTySize);
 }
 
 ConstantRange ConstantRange::zextOrTrunc(uint32_t DstTySize) const {
@@ -733,6 +765,12 @@ ConstantRange ConstantRange::binaryOp(Instruction::BinaryOps BinOp,
     return multiply(Other);
   case Instruction::UDiv:
     return udiv(Other);
+  case Instruction::SDiv:
+    return sdiv(Other);
+  case Instruction::URem:
+    return urem(Other);
+  case Instruction::SRem:
+    return srem(Other);
   case Instruction::Shl:
     return shl(Other);
   case Instruction::LShr:
@@ -752,39 +790,36 @@ ConstantRange ConstantRange::binaryOp(Instruction::BinaryOps BinOp,
   case Instruction::FMul:
     return multiply(Other);
   default:
-    // Conservatively return full set.
-    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+    // Conservatively return getFull set.
+    return getFull();
   }
 }
 
 ConstantRange
 ConstantRange::add(const ConstantRange &Other) const {
   if (isEmptySet() || Other.isEmptySet())
-    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+    return getEmpty();
   if (isFullSet() || Other.isFullSet())
-    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+    return getFull();
 
   APInt NewLower = getLower() + Other.getLower();
   APInt NewUpper = getUpper() + Other.getUpper() - 1;
   if (NewLower == NewUpper)
-    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+    return getFull();
 
   ConstantRange X = ConstantRange(std::move(NewLower), std::move(NewUpper));
   if (X.isSizeStrictlySmallerThan(*this) ||
       X.isSizeStrictlySmallerThan(Other))
     // We've wrapped, therefore, full set.
-    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+    return getFull();
   return X;
 }
 
 ConstantRange ConstantRange::addWithNoSignedWrap(const APInt &Other) const {
   // Calculate the subset of this range such that "X + Other" is
   // guaranteed not to wrap (overflow) for all X in this subset.
-  // makeGuaranteedNoWrapRegion will produce an exact NSW range since we are
-  // passing a single element range.
-  auto NSWRange = ConstantRange::makeGuaranteedNoWrapRegion(BinaryOperator::Add,
-                                      ConstantRange(Other),
-                                      OverflowingBinaryOperator::NoSignedWrap);
+  auto NSWRange = ConstantRange::makeExactNoWrapRegion(
+      BinaryOperator::Add, Other, OverflowingBinaryOperator::NoSignedWrap);
   auto NSWConstrainedRange = intersectWith(NSWRange);
 
   return NSWConstrainedRange.add(ConstantRange(Other));
@@ -793,20 +828,20 @@ ConstantRange ConstantRange::addWithNoSignedWrap(const APInt &Other) const {
 ConstantRange
 ConstantRange::sub(const ConstantRange &Other) const {
   if (isEmptySet() || Other.isEmptySet())
-    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+    return getEmpty();
   if (isFullSet() || Other.isFullSet())
-    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+    return getFull();
 
   APInt NewLower = getLower() - Other.getUpper() + 1;
   APInt NewUpper = getUpper() - Other.getLower();
   if (NewLower == NewUpper)
-    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+    return getFull();
 
   ConstantRange X = ConstantRange(std::move(NewLower), std::move(NewUpper));
   if (X.isSizeStrictlySmallerThan(*this) ||
       X.isSizeStrictlySmallerThan(Other))
     // We've wrapped, therefore, full set.
-    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+    return getFull();
   return X;
 }
 
@@ -818,7 +853,7 @@ ConstantRange::multiply(const ConstantRange &Other) const {
   // range according to the greatest power-of-two factor of the single element.
 
   if (isEmptySet() || Other.isEmptySet())
-    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+    return getEmpty();
 
   // Multiplication is signedness-independent. However different ranges can be
   // obtained depending on how the input ranges are treated. These different
@@ -840,7 +875,7 @@ ConstantRange::multiply(const ConstantRange &Other) const {
   // from one positive number to another which is as good as we can generate.
   // In this case, skip the extra work of generating signed ranges which aren't
   // going to be better than this range.
-  if (!UR.isWrappedSet() &&
+  if (!UR.isUpperWrapped() &&
       (UR.getUpper().isNonNegative() || UR.getUpper().isMinSignedValue()))
     return UR;
 
@@ -869,12 +904,10 @@ ConstantRange::smax(const ConstantRange &Other) const {
   // X smax Y is: range(smax(X_smin, Y_smin),
   //                    smax(X_smax, Y_smax))
   if (isEmptySet() || Other.isEmptySet())
-    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+    return getEmpty();
   APInt NewL = APIntOps::smax(getSignedMin(), Other.getSignedMin());
   APInt NewU = APIntOps::smax(getSignedMax(), Other.getSignedMax()) + 1;
-  if (NewU == NewL)
-    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
-  return ConstantRange(std::move(NewL), std::move(NewU));
+  return getNonEmpty(std::move(NewL), std::move(NewU));
 }
 
 ConstantRange
@@ -882,12 +915,10 @@ ConstantRange::umax(const ConstantRange &Other) const {
   // X umax Y is: range(umax(X_umin, Y_umin),
   //                    umax(X_umax, Y_umax))
   if (isEmptySet() || Other.isEmptySet())
-    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+    return getEmpty();
   APInt NewL = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin());
   APInt NewU = APIntOps::umax(getUnsignedMax(), Other.getUnsignedMax()) + 1;
-  if (NewU == NewL)
-    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
-  return ConstantRange(std::move(NewL), std::move(NewU));
+  return getNonEmpty(std::move(NewL), std::move(NewU));
 }
 
 ConstantRange
@@ -895,12 +926,10 @@ ConstantRange::smin(const ConstantRange &Other) const {
   // X smin Y is: range(smin(X_smin, Y_smin),
   //                    smin(X_smax, Y_smax))
   if (isEmptySet() || Other.isEmptySet())
-    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+    return getEmpty();
   APInt NewL = APIntOps::smin(getSignedMin(), Other.getSignedMin());
   APInt NewU = APIntOps::smin(getSignedMax(), Other.getSignedMax()) + 1;
-  if (NewU == NewL)
-    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
-  return ConstantRange(std::move(NewL), std::move(NewU));
+  return getNonEmpty(std::move(NewL), std::move(NewU));
 }
 
 ConstantRange
@@ -908,20 +937,16 @@ ConstantRange::umin(const ConstantRange &Other) const {
   // X umin Y is: range(umin(X_umin, Y_umin),
   //                    umin(X_umax, Y_umax))
   if (isEmptySet() || Other.isEmptySet())
-    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+    return getEmpty();
   APInt NewL = APIntOps::umin(getUnsignedMin(), Other.getUnsignedMin());
   APInt NewU = APIntOps::umin(getUnsignedMax(), Other.getUnsignedMax()) + 1;
-  if (NewU == NewL)
-    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
-  return ConstantRange(std::move(NewL), std::move(NewU));
+  return getNonEmpty(std::move(NewL), std::move(NewU));
 }
 
 ConstantRange
 ConstantRange::udiv(const ConstantRange &RHS) const {
   if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax().isNullValue())
-    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
-  if (RHS.isFullSet())
-    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+    return getEmpty();
 
   APInt Lower = getUnsignedMin().udiv(RHS.getUnsignedMax());
 
@@ -936,52 +961,186 @@ ConstantRange::udiv(const ConstantRange &RHS) const {
   }
 
   APInt Upper = getUnsignedMax().udiv(RHS_umin) + 1;
+  return getNonEmpty(std::move(Lower), std::move(Upper));
+}
 
-  // If the LHS is Full and the RHS is a wrapped interval containing 1 then
-  // this could occur.
-  if (Lower == Upper)
-    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ConstantRange ConstantRange::sdiv(const ConstantRange &RHS) const {
+  // We split up the LHS and RHS into positive and negative components
+  // and then also compute the positive and negative components of the result
+  // separately by combining division results with the appropriate signs.
+  APInt Zero = APInt::getNullValue(getBitWidth());
+  APInt SignedMin = APInt::getSignedMinValue(getBitWidth());
+  ConstantRange PosFilter(APInt(getBitWidth(), 1), SignedMin);
+  ConstantRange NegFilter(SignedMin, Zero);
+  ConstantRange PosL = intersectWith(PosFilter);
+  ConstantRange NegL = intersectWith(NegFilter);
+  ConstantRange PosR = RHS.intersectWith(PosFilter);
+  ConstantRange NegR = RHS.intersectWith(NegFilter);
+
+  ConstantRange PosRes = getEmpty();
+  if (!PosL.isEmptySet() && !PosR.isEmptySet())
+    // pos / pos = pos.
+    PosRes = ConstantRange(PosL.Lower.sdiv(PosR.Upper - 1),
+                           (PosL.Upper - 1).sdiv(PosR.Lower) + 1);
+
+  if (!NegL.isEmptySet() && !NegR.isEmptySet()) {
+    // neg / neg = pos.
+    //
+    // We need to deal with one tricky case here: SignedMin / -1 is UB on the
+    // IR level, so we'll want to exclude this case when calculating bounds.
+    // (For APInts the operation is well-defined and yields SignedMin.) We
+    // handle this by dropping either SignedMin from the LHS or -1 from the RHS.
+    APInt Lo = (NegL.Upper - 1).sdiv(NegR.Lower);
+    if (NegL.Lower.isMinSignedValue() && NegR.Upper.isNullValue()) {
+      // Remove -1 from the LHS. Skip if it's the only element, as this would
+      // leave us with an empty set.
+      if (!NegR.Lower.isAllOnesValue()) {
+        APInt AdjNegRUpper;
+        if (RHS.Lower.isAllOnesValue())
+          // Negative part of [-1, X] without -1 is [SignedMin, X].
+          AdjNegRUpper = RHS.Upper;
+        else
+          // [X, -1] without -1 is [X, -2].
+          AdjNegRUpper = NegR.Upper - 1;
+
+        PosRes = PosRes.unionWith(
+            ConstantRange(Lo, NegL.Lower.sdiv(AdjNegRUpper - 1) + 1));
+      }
 
+      // Remove SignedMin from the RHS. Skip if it's the only element, as this
+      // would leave us with an empty set.
+      if (NegL.Upper != SignedMin + 1) {
+        APInt AdjNegLLower;
+        if (Upper == SignedMin + 1)
+          // Negative part of [X, SignedMin] without SignedMin is [X, -1].
+          AdjNegLLower = Lower;
+        else
+          // [SignedMin, X] without SignedMin is [SignedMin + 1, X].
+          AdjNegLLower = NegL.Lower + 1;
+
+        PosRes = PosRes.unionWith(
+            ConstantRange(std::move(Lo),
+                          AdjNegLLower.sdiv(NegR.Upper - 1) + 1));
+      }
+    } else {
+      PosRes = PosRes.unionWith(
+          ConstantRange(std::move(Lo), NegL.Lower.sdiv(NegR.Upper - 1) + 1));
+    }
+  }
+
+  ConstantRange NegRes = getEmpty();
+  if (!PosL.isEmptySet() && !NegR.isEmptySet())
+    // pos / neg = neg.
+    NegRes = ConstantRange((PosL.Upper - 1).sdiv(NegR.Upper - 1),
+                           PosL.Lower.sdiv(NegR.Lower) + 1);
+
+  if (!NegL.isEmptySet() && !PosR.isEmptySet())
+    // neg / pos = neg.
+    NegRes = NegRes.unionWith(
+        ConstantRange(NegL.Lower.sdiv(PosR.Lower),
+                      (NegL.Upper - 1).sdiv(PosR.Upper - 1) + 1));
+
+  // Prefer a non-wrapping signed range here.
+  ConstantRange Res = NegRes.unionWith(PosRes, PreferredRangeType::Signed);
+
+  // Preserve the zero that we dropped when splitting the LHS by sign.
+  if (contains(Zero) && (!PosR.isEmptySet() || !NegR.isEmptySet()))
+    Res = Res.unionWith(ConstantRange(Zero));
+  return Res;
+}
+
+ConstantRange ConstantRange::urem(const ConstantRange &RHS) const {
+  if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax().isNullValue())
+    return getEmpty();
+
+  // L % R for L < R is L.
+  if (getUnsignedMax().ult(RHS.getUnsignedMin()))
+    return *this;
+
+  // L % R is <= L and < R.
+  APInt Upper = APIntOps::umin(getUnsignedMax(), RHS.getUnsignedMax() - 1) + 1;
+  return getNonEmpty(APInt::getNullValue(getBitWidth()), std::move(Upper));
+}
+
+ConstantRange ConstantRange::srem(const ConstantRange &RHS) const {
+  if (isEmptySet() || RHS.isEmptySet())
+    return getEmpty();
+
+  ConstantRange AbsRHS = RHS.abs();
+  APInt MinAbsRHS = AbsRHS.getUnsignedMin();
+  APInt MaxAbsRHS = AbsRHS.getUnsignedMax();
+
+  // Modulus by zero is UB.
+  if (MaxAbsRHS.isNullValue())
+    return getEmpty();
+
+  if (MinAbsRHS.isNullValue())
+    ++MinAbsRHS;
+
+  APInt MinLHS = getSignedMin(), MaxLHS = getSignedMax();
+
+  if (MinLHS.isNonNegative()) {
+    // L % R for L < R is L.
+    if (MaxLHS.ult(MinAbsRHS))
+      return *this;
+
+    // L % R is <= L and < R.
+    APInt Upper = APIntOps::umin(MaxLHS, MaxAbsRHS - 1) + 1;
+    return ConstantRange(APInt::getNullValue(getBitWidth()), std::move(Upper));
+  }
+
+  // Same basic logic as above, but the result is negative.
+  if (MaxLHS.isNegative()) {
+    if (MinLHS.ugt(-MinAbsRHS))
+      return *this;
+
+    APInt Lower = APIntOps::umax(MinLHS, -MaxAbsRHS + 1);
+    return ConstantRange(std::move(Lower), APInt(getBitWidth(), 1));
+  }
+
+  // LHS range crosses zero.
+  APInt Lower = APIntOps::umax(MinLHS, -MaxAbsRHS + 1);
+  APInt Upper = APIntOps::umin(MaxLHS, MaxAbsRHS - 1) + 1;
   return ConstantRange(std::move(Lower), std::move(Upper));
 }
 
 ConstantRange
 ConstantRange::binaryAnd(const ConstantRange &Other) const {
   if (isEmptySet() || Other.isEmptySet())
-    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+    return getEmpty();
 
   // TODO: replace this with something less conservative
 
   APInt umin = APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax());
-  if (umin.isAllOnesValue())
-    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
-  return ConstantRange(APInt::getNullValue(getBitWidth()), std::move(umin) + 1);
+  return getNonEmpty(APInt::getNullValue(getBitWidth()), std::move(umin) + 1);
 }
 
 ConstantRange
 ConstantRange::binaryOr(const ConstantRange &Other) const {
   if (isEmptySet() || Other.isEmptySet())
-    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+    return getEmpty();
 
   // TODO: replace this with something less conservative
 
   APInt umax = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin());
-  if (umax.isNullValue())
-    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
-  return ConstantRange(std::move(umax), APInt::getNullValue(getBitWidth()));
+  return getNonEmpty(std::move(umax), APInt::getNullValue(getBitWidth()));
 }
 
 ConstantRange
 ConstantRange::shl(const ConstantRange &Other) const {
   if (isEmptySet() || Other.isEmptySet())
-    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+    return getEmpty();
 
   APInt max = getUnsignedMax();
   APInt Other_umax = Other.getUnsignedMax();
 
+  // If we are shifting by maximum amount of
+  // zero return return the original range.
+  if (Other_umax.isNullValue())
+    return *this;
   // there's overflow!
-  if (Other_umax.uge(max.countLeadingZeros()))
-    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+  if (Other_umax.ugt(max.countLeadingZeros()))
+    return getFull();
 
   // FIXME: implement the other tricky cases
 
@@ -995,20 +1154,17 @@ ConstantRange::shl(const ConstantRange &Other) const {
 ConstantRange
 ConstantRange::lshr(const ConstantRange &Other) const {
   if (isEmptySet() || Other.isEmptySet())
-    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+    return getEmpty();
 
   APInt max = getUnsignedMax().lshr(Other.getUnsignedMin()) + 1;
   APInt min = getUnsignedMin().lshr(Other.getUnsignedMax());
-  if (min == max)
-    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
-
-  return ConstantRange(std::move(min), std::move(max));
+  return getNonEmpty(std::move(min), std::move(max));
 }
 
 ConstantRange
 ConstantRange::ashr(const ConstantRange &Other) const {
   if (isEmptySet() || Other.isEmptySet())
-    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+    return getEmpty();
 
   // May straddle zero, so handle both positive and negative cases.
   // 'PosMax' is the upper bound of the result of the ashr
@@ -1053,20 +1209,196 @@ ConstantRange::ashr(const ConstantRange &Other) const {
     min = NegMin;
     max = PosMax;
   }
-  if (min == max)
-    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+  return getNonEmpty(std::move(min), std::move(max));
+}
+
+ConstantRange ConstantRange::uadd_sat(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return getEmpty();
+
+  APInt NewL = getUnsignedMin().uadd_sat(Other.getUnsignedMin());
+  APInt NewU = getUnsignedMax().uadd_sat(Other.getUnsignedMax()) + 1;
+  return getNonEmpty(std::move(NewL), std::move(NewU));
+}
+
+ConstantRange ConstantRange::sadd_sat(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return getEmpty();
+
+  APInt NewL = getSignedMin().sadd_sat(Other.getSignedMin());
+  APInt NewU = getSignedMax().sadd_sat(Other.getSignedMax()) + 1;
+  return getNonEmpty(std::move(NewL), std::move(NewU));
+}
+
+ConstantRange ConstantRange::usub_sat(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return getEmpty();
+
+  APInt NewL = getUnsignedMin().usub_sat(Other.getUnsignedMax());
+  APInt NewU = getUnsignedMax().usub_sat(Other.getUnsignedMin()) + 1;
+  return getNonEmpty(std::move(NewL), std::move(NewU));
+}
 
-  return ConstantRange(std::move(min), std::move(max));
+ConstantRange ConstantRange::ssub_sat(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return getEmpty();
+
+  APInt NewL = getSignedMin().ssub_sat(Other.getSignedMax());
+  APInt NewU = getSignedMax().ssub_sat(Other.getSignedMin()) + 1;
+  return getNonEmpty(std::move(NewL), std::move(NewU));
 }
 
 ConstantRange ConstantRange::inverse() const {
   if (isFullSet())
-    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+    return getEmpty();
   if (isEmptySet())
-    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+    return getFull();
   return ConstantRange(Upper, Lower);
 }
 
+ConstantRange ConstantRange::abs() const {
+  if (isEmptySet())
+    return getEmpty();
+
+  if (isSignWrappedSet()) {
+    APInt Lo;
+    // Check whether the range crosses zero.
+    if (Upper.isStrictlyPositive() || !Lower.isStrictlyPositive())
+      Lo = APInt::getNullValue(getBitWidth());
+    else
+      Lo = APIntOps::umin(Lower, -Upper + 1);
+
+    // SignedMin is included in the result range.
+    return ConstantRange(Lo, APInt::getSignedMinValue(getBitWidth()) + 1);
+  }
+
+  APInt SMin = getSignedMin(), SMax = getSignedMax();
+
+  // All non-negative.
+  if (SMin.isNonNegative())
+    return *this;
+
+  // All negative.
+  if (SMax.isNegative())
+    return ConstantRange(-SMax, -SMin + 1);
+
+  // Range crosses zero.
+  return ConstantRange(APInt::getNullValue(getBitWidth()),
+                       APIntOps::umax(-SMin, SMax) + 1);
+}
+
+ConstantRange::OverflowResult ConstantRange::unsignedAddMayOverflow(
+    const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return OverflowResult::MayOverflow;
+
+  APInt Min = getUnsignedMin(), Max = getUnsignedMax();
+  APInt OtherMin = Other.getUnsignedMin(), OtherMax = Other.getUnsignedMax();
+
+  // a u+ b overflows high iff a u> ~b.
+  if (Min.ugt(~OtherMin))
+    return OverflowResult::AlwaysOverflowsHigh;
+  if (Max.ugt(~OtherMax))
+    return OverflowResult::MayOverflow;
+  return OverflowResult::NeverOverflows;
+}
+
+ConstantRange::OverflowResult ConstantRange::signedAddMayOverflow(
+    const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return OverflowResult::MayOverflow;
+
+  APInt Min = getSignedMin(), Max = getSignedMax();
+  APInt OtherMin = Other.getSignedMin(), OtherMax = Other.getSignedMax();
+
+  APInt SignedMin = APInt::getSignedMinValue(getBitWidth());
+  APInt SignedMax = APInt::getSignedMaxValue(getBitWidth());
+
+  // a s+ b overflows high iff a s>=0 && b s>= 0 && a s> smax - b.
+  // a s+ b overflows low iff a s< 0 && b s< 0 && a s< smin - b.
+  if (Min.isNonNegative() && OtherMin.isNonNegative() &&
+      Min.sgt(SignedMax - OtherMin))
+    return OverflowResult::AlwaysOverflowsHigh;
+  if (Max.isNegative() && OtherMax.isNegative() &&
+      Max.slt(SignedMin - OtherMax))
+    return OverflowResult::AlwaysOverflowsLow;
+
+  if (Max.isNonNegative() && OtherMax.isNonNegative() &&
+      Max.sgt(SignedMax - OtherMax))
+    return OverflowResult::MayOverflow;
+  if (Min.isNegative() && OtherMin.isNegative() &&
+      Min.slt(SignedMin - OtherMin))
+    return OverflowResult::MayOverflow;
+
+  return OverflowResult::NeverOverflows;
+}
+
+ConstantRange::OverflowResult ConstantRange::unsignedSubMayOverflow(
+    const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return OverflowResult::MayOverflow;
+
+  APInt Min = getUnsignedMin(), Max = getUnsignedMax();
+  APInt OtherMin = Other.getUnsignedMin(), OtherMax = Other.getUnsignedMax();
+
+  // a u- b overflows low iff a u< b.
+  if (Max.ult(OtherMin))
+    return OverflowResult::AlwaysOverflowsLow;
+  if (Min.ult(OtherMax))
+    return OverflowResult::MayOverflow;
+  return OverflowResult::NeverOverflows;
+}
+
+ConstantRange::OverflowResult ConstantRange::signedSubMayOverflow(
+    const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return OverflowResult::MayOverflow;
+
+  APInt Min = getSignedMin(), Max = getSignedMax();
+  APInt OtherMin = Other.getSignedMin(), OtherMax = Other.getSignedMax();
+
+  APInt SignedMin = APInt::getSignedMinValue(getBitWidth());
+  APInt SignedMax = APInt::getSignedMaxValue(getBitWidth());
+
+  // a s- b overflows high iff a s>=0 && b s< 0 && a s> smax + b.
+  // a s- b overflows low iff a s< 0 && b s>= 0 && a s< smin + b.
+  if (Min.isNonNegative() && OtherMax.isNegative() &&
+      Min.sgt(SignedMax + OtherMax))
+    return OverflowResult::AlwaysOverflowsHigh;
+  if (Max.isNegative() && OtherMin.isNonNegative() &&
+      Max.slt(SignedMin + OtherMin))
+    return OverflowResult::AlwaysOverflowsLow;
+
+  if (Max.isNonNegative() && OtherMin.isNegative() &&
+      Max.sgt(SignedMax + OtherMin))
+    return OverflowResult::MayOverflow;
+  if (Min.isNegative() && OtherMax.isNonNegative() &&
+      Min.slt(SignedMin + OtherMax))
+    return OverflowResult::MayOverflow;
+
+  return OverflowResult::NeverOverflows;
+}
+
+ConstantRange::OverflowResult ConstantRange::unsignedMulMayOverflow(
+    const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return OverflowResult::MayOverflow;
+
+  APInt Min = getUnsignedMin(), Max = getUnsignedMax();
+  APInt OtherMin = Other.getUnsignedMin(), OtherMax = Other.getUnsignedMax();
+  bool Overflow;
+
+  (void) Min.umul_ov(OtherMin, Overflow);
+  if (Overflow)
+    return OverflowResult::AlwaysOverflowsHigh;
+
+  (void) Max.umul_ov(OtherMax, Overflow);
+  if (Overflow)
+    return OverflowResult::MayOverflow;
+
+  return OverflowResult::NeverOverflows;
+}
+
 void ConstantRange::print(raw_ostream &OS) const {
   if (isFullSet())
     OS << "full-set";
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index d36967fdcfe1..ff551da29ae6 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -1,9 +1,8 @@
 //===-- Constants.cpp - Implement Constant nodes --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -261,6 +260,16 @@ bool Constant::containsUndefElement() const {
   return false;
 }
 
+bool Constant::containsConstantExpression() const {
+  if (!getType()->isVectorTy())
+    return false;
+  for (unsigned i = 0, e = getType()->getVectorNumElements(); i != e; ++i)
+    if (isa<ConstantExpr>(getAggregateElement(i)))
+      return true;
+
+  return false;
+}
+
 /// Constructor to create a '0' constant of arbitrary type.
 Constant *Constant::getNullValue(Type *Ty) {
   switch (Ty->getTypeID()) {
@@ -1821,7 +1830,8 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C, unsigned Flags,
   }
 #endif
 
-  // TODO: Try to constant fold operation.
+  if (Constant *FC = ConstantFoldUnaryInstruction(Opcode, C))
+    return FC;
 
   if (OnlyIfReducedTy == C->getType())
     return nullptr;
@@ -1846,51 +1856,31 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
   case Instruction::Add:
   case Instruction::Sub:
   case Instruction::Mul:
-    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
     assert(C1->getType()->isIntOrIntVectorTy() &&
            "Tried to create an integer operation on a non-integer type!");
     break;
   case Instruction::FAdd:
   case Instruction::FSub:
   case Instruction::FMul:
-    assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isFPOrFPVectorTy() &&
-           "Tried to create a floating-point operation on a "
-           "non-floating-point type!");
-    break;
-  case Instruction::UDiv:
-  case Instruction::SDiv:
-    assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isIntOrIntVectorTy() &&
-           "Tried to create an arithmetic operation on a non-arithmetic type!");
-    break;
   case Instruction::FDiv:
-    assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isFPOrFPVectorTy() &&
-           "Tried to create an arithmetic operation on a non-arithmetic type!");
-    break;
-  case Instruction::URem:
-  case Instruction::SRem:
-    assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isIntOrIntVectorTy() &&
-           "Tried to create an arithmetic operation on a non-arithmetic type!");
-    break;
   case Instruction::FRem:
-    assert(C1->getType() == C2->getType() && "Op types should be identical!");
     assert(C1->getType()->isFPOrFPVectorTy() &&
-           "Tried to create an arithmetic operation on a non-arithmetic type!");
+           "Tried to create a floating-point operation on a "
+           "non-floating-point type!");
     break;
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor:
-    assert(C1->getType() == C2->getType() && "Op types should be identical!");
     assert(C1->getType()->isIntOrIntVectorTy() &&
            "Tried to create a logical operation on a non-integral type!");
     break;
   case Instruction::Shl:
   case Instruction::LShr:
   case Instruction::AShr:
-    assert(C1->getType() == C2->getType() && "Op types should be identical!");
     assert(C1->getType()->isIntOrIntVectorTy() &&
            "Tried to create a shift operation on a non-integer type!");
     break;
@@ -1900,7 +1890,7 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
 #endif
 
   if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2))
-    return FC;          // Fold a few common cases.
+    return FC;
 
   if (OnlyIfReducedTy == C1->getType())
     return nullptr;
@@ -2226,7 +2216,7 @@ Constant *ConstantExpr::getNeg(Constant *C, bool HasNUW, bool HasNSW) {
 Constant *ConstantExpr::getFNeg(Constant *C) {
   assert(C->getType()->isFPOrFPVectorTy() &&
          "Cannot FNEG a non-floating-point value!");
-  return getFSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
+  return get(Instruction::FNeg, C);
 }
 
 Constant *ConstantExpr::getNot(Constant *C) {
@@ -2567,7 +2557,7 @@ Constant *ConstantDataArray::getFP(LLVMContext &Context,
 Constant *ConstantDataArray::getString(LLVMContext &Context,
                                        StringRef Str, bool AddNull) {
   if (!AddNull) {
-    const uint8_t *Data = reinterpret_cast<const uint8_t *>(Str.data());
+    const uint8_t *Data = Str.bytes_begin();
     return get(Context, makeArrayRef(Data, Str.size()));
   }
 
@@ -3015,7 +3005,8 @@ Instruction *ConstantExpr::getAsInstruction() {
   case Instruction::FCmp:
     return CmpInst::Create((Instruction::OtherOps)getOpcode(),
                            (CmpInst::Predicate)getPredicate(), Ops[0], Ops[1]);
-
+  case Instruction::FNeg:
+    return UnaryOperator::Create((Instruction::UnaryOps)getOpcode(), Ops[0]);
   default:
     assert(getNumOperands() == 2 && "Must be binary operator?");
     BinaryOperator *BO =
diff --git a/lib/IR/ConstantsContext.h b/lib/IR/ConstantsContext.h
index eac171397084..7614dab9f15d 100644
--- a/lib/IR/ConstantsContext.h
+++ b/lib/IR/ConstantsContext.h
@@ -1,9 +1,8 @@
 //===-- ConstantsContext.h - Constants-related Context Interals -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 815797f4b7ea..310935b5213a 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -1,9 +1,8 @@
 //===-- Core.cpp ----------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1037,6 +1036,16 @@ LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty) {
 
 /*--.. Operations on metadata nodes ........................................--*/
 
+LLVMMetadataRef LLVMMDStringInContext2(LLVMContextRef C, const char *Str,
+                                       size_t SLen) {
+  return wrap(MDString::get(*unwrap(C), StringRef(Str, SLen)));
+}
+
+LLVMMetadataRef LLVMMDNodeInContext2(LLVMContextRef C, LLVMMetadataRef *MDs,
+                                     size_t Count) {
+  return wrap(MDNode::get(*unwrap(C), ArrayRef<Metadata*>(unwrap(MDs), Count)));
+}
+
 LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
                                    unsigned SLen) {
   LLVMContext &Context = *unwrap(C);
@@ -1200,15 +1209,17 @@ void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char *Name,
 const char *LLVMGetDebugLocDirectory(LLVMValueRef Val, unsigned *Length) {
   if (!Length) return nullptr;
   StringRef S;
-  if (const auto *I = unwrap<Instruction>(Val)) {
-    S = I->getDebugLoc()->getDirectory();
-  } else if (const auto *GV = unwrap<GlobalVariable>(Val)) {
+  if (const auto *I = dyn_cast<Instruction>(unwrap(Val))) {
+    if (const auto &DL = I->getDebugLoc()) {
+      S = DL->getDirectory();
+    }
+  } else if (const auto *GV = dyn_cast<GlobalVariable>(unwrap(Val))) {
     SmallVector<DIGlobalVariableExpression *, 1> GVEs;
     GV->getDebugInfo(GVEs);
     if (GVEs.size())
       if (const DIGlobalVariable *DGV = GVEs[0]->getVariable())
         S = DGV->getDirectory();
-  } else if (const auto *F = unwrap<Function>(Val)) {
+  } else if (const auto *F = dyn_cast<Function>(unwrap(Val))) {
     if (const DISubprogram *DSP = F->getSubprogram())
       S = DSP->getDirectory();
   } else {
@@ -1222,15 +1233,17 @@ const char *LLVMGetDebugLocDirectory(LLVMValueRef Val, unsigned *Length) {
 const char *LLVMGetDebugLocFilename(LLVMValueRef Val, unsigned *Length) {
   if (!Length) return nullptr;
   StringRef S;
-  if (const auto *I = unwrap<Instruction>(Val)) {
-    S = I->getDebugLoc()->getFilename();
-  } else if (const auto *GV = unwrap<GlobalVariable>(Val)) {
+  if (const auto *I = dyn_cast<Instruction>(unwrap(Val))) {
+    if (const auto &DL = I->getDebugLoc()) {
+      S = DL->getFilename();
+    }
+  } else if (const auto *GV = dyn_cast<GlobalVariable>(unwrap(Val))) {
     SmallVector<DIGlobalVariableExpression *, 1> GVEs;
     GV->getDebugInfo(GVEs);
     if (GVEs.size())
       if (const DIGlobalVariable *DGV = GVEs[0]->getVariable())
         S = DGV->getFilename();
-  } else if (const auto *F = unwrap<Function>(Val)) {
+  } else if (const auto *F = dyn_cast<Function>(unwrap(Val))) {
     if (const DISubprogram *DSP = F->getSubprogram())
       S = DSP->getFilename();
   } else {
@@ -1243,15 +1256,17 @@ const char *LLVMGetDebugLocFilename(LLVMValueRef Val, unsigned *Length) {
 
 unsigned LLVMGetDebugLocLine(LLVMValueRef Val) {
   unsigned L = 0;
-  if (const auto *I = unwrap<Instruction>(Val)) {
-    L = I->getDebugLoc()->getLine();
-  } else if (const auto *GV = unwrap<GlobalVariable>(Val)) {
+  if (const auto *I = dyn_cast<Instruction>(unwrap(Val))) {
+    if (const auto &DL = I->getDebugLoc()) {
+      L = DL->getLine();
+    }
+  } else if (const auto *GV = dyn_cast<GlobalVariable>(unwrap(Val))) {
     SmallVector<DIGlobalVariableExpression *, 1> GVEs;
     GV->getDebugInfo(GVEs);
     if (GVEs.size())
       if (const DIGlobalVariable *DGV = GVEs[0]->getVariable())
         L = DGV->getLine();
-  } else if (const auto *F = unwrap<Function>(Val)) {
+  } else if (const auto *F = dyn_cast<Function>(unwrap(Val))) {
     if (const DISubprogram *DSP = F->getSubprogram())
       L = DSP->getLine();
   } else {
@@ -1263,9 +1278,9 @@ unsigned LLVMGetDebugLocLine(LLVMValueRef Val) {
 
 unsigned LLVMGetDebugLocColumn(LLVMValueRef Val) {
   unsigned C = 0;
-  if (const auto *I = unwrap<Instruction>(Val))
-    if (const auto &L = I->getDebugLoc())
-      C = L->getColumn();
+  if (const auto *I = dyn_cast<Instruction>(unwrap(Val)))
+    if (const auto &DL = I->getDebugLoc())
+      C = DL->getColumn();
   return C;
 }
 
@@ -2330,6 +2345,10 @@ const char *LLVMIntrinsicCopyOverloadedName(unsigned ID,
   return strdup(Str.c_str());
 }
 
+unsigned LLVMLookupIntrinsicID(const char *Name, size_t NameLen) {
+  return Function::lookupIntrinsicID({Name, NameLen});
+}
+
 LLVMBool LLVMIntrinsicIsOverloaded(unsigned ID) {
   auto IID = llvm_map_to_intrinsic_id(ID);
   return llvm::Intrinsic::isOverloaded(IID);
@@ -2464,6 +2483,71 @@ void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) {
   A->addAttr(Attribute::getWithAlignment(A->getContext(), align));
 }
 
+/*--.. Operations on ifuncs ................................................--*/
+
+LLVMValueRef LLVMAddGlobalIFunc(LLVMModuleRef M,
+                                const char *Name, size_t NameLen,
+                                LLVMTypeRef Ty, unsigned AddrSpace,
+                                LLVMValueRef Resolver) {
+  return wrap(GlobalIFunc::create(unwrap(Ty), AddrSpace,
+                                  GlobalValue::ExternalLinkage,
+                                  StringRef(Name, NameLen),
+                                  unwrap<Constant>(Resolver), unwrap(M)));
+}
+
+LLVMValueRef LLVMGetNamedGlobalIFunc(LLVMModuleRef M,
+                                     const char *Name, size_t NameLen) {
+  return wrap(unwrap(M)->getNamedIFunc(StringRef(Name, NameLen)));
+}
+
+LLVMValueRef LLVMGetFirstGlobalIFunc(LLVMModuleRef M) {
+  Module *Mod = unwrap(M);
+  Module::ifunc_iterator I = Mod->ifunc_begin();
+  if (I == Mod->ifunc_end())
+    return nullptr;
+  return wrap(&*I);
+}
+
+LLVMValueRef LLVMGetLastGlobalIFunc(LLVMModuleRef M) {
+  Module *Mod = unwrap(M);
+  Module::ifunc_iterator I = Mod->ifunc_end();
+  if (I == Mod->ifunc_begin())
+    return nullptr;
+  return wrap(&*--I);
+}
+
+LLVMValueRef LLVMGetNextGlobalIFunc(LLVMValueRef IFunc) {
+  GlobalIFunc *GIF = unwrap<GlobalIFunc>(IFunc);
+  Module::ifunc_iterator I(GIF);
+  if (++I == GIF->getParent()->ifunc_end())
+    return nullptr;
+  return wrap(&*I);
+}
+
+LLVMValueRef LLVMGetPreviousGlobalIFunc(LLVMValueRef IFunc) {
+  GlobalIFunc *GIF = unwrap<GlobalIFunc>(IFunc);
+  Module::ifunc_iterator I(GIF);
+  if (I == GIF->getParent()->ifunc_begin())
+    return nullptr;
+  return wrap(&*--I);
+}
+
+LLVMValueRef LLVMGetGlobalIFuncResolver(LLVMValueRef IFunc) {
+  return wrap(unwrap<GlobalIFunc>(IFunc)->getResolver());
+}
+
+void LLVMSetGlobalIFuncResolver(LLVMValueRef IFunc, LLVMValueRef Resolver) {
+  unwrap<GlobalIFunc>(IFunc)->setResolver(unwrap<Constant>(Resolver));
+}
+
+void LLVMEraseGlobalIFunc(LLVMValueRef IFunc) {
+  unwrap<GlobalIFunc>(IFunc)->eraseFromParent();
+}
+
+void LLVMRemoveGlobalIFunc(LLVMValueRef IFunc) {
+  unwrap<GlobalIFunc>(IFunc)->removeFromParent();
+}
+
 /*--.. Operations on basic blocks ..........................................--*/
 
 LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB) {
@@ -2541,6 +2625,20 @@ LLVMBasicBlockRef LLVMCreateBasicBlockInContext(LLVMContextRef C,
   return wrap(llvm::BasicBlock::Create(*unwrap(C), Name));
 }
 
+void LLVMInsertExistingBasicBlockAfterInsertBlock(LLVMBuilderRef Builder,
+                                                  LLVMBasicBlockRef BB) {
+  BasicBlock *ToInsert = unwrap(BB);
+  BasicBlock *CurBB = unwrap(Builder)->GetInsertBlock();
+  assert(CurBB && "current insertion point is invalid!");
+  CurBB->getParent()->getBasicBlockList().insertAfter(CurBB->getIterator(),
+                                                      ToInsert);
+}
+
+void LLVMAppendExistingBasicBlock(LLVMValueRef Fn,
+                                  LLVMBasicBlockRef BB) {
+  unwrap<Function>(Fn)->getBasicBlockList().push_back(unwrap(BB));
+}
+
 LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C,
                                                 LLVMValueRef FnRef,
                                                 const char *Name) {
@@ -2924,6 +3022,17 @@ void LLVMDisposeBuilder(LLVMBuilderRef Builder) {
 
 /*--.. Metadata builders ...................................................--*/
 
+LLVMMetadataRef LLVMGetCurrentDebugLocation2(LLVMBuilderRef Builder) {
+  return wrap(unwrap(Builder)->getCurrentDebugLocation().getAsMDNode());
+}
+
+void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Builder, LLVMMetadataRef Loc) {
+  if (Loc)
+    unwrap(Builder)->SetCurrentDebugLocation(DebugLoc(unwrap<MDNode>(Loc)));
+  else
+    unwrap(Builder)->SetCurrentDebugLocation(DebugLoc());
+}
+
 void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) {
   MDNode *Loc =
       L ? cast<MDNode>(unwrap<MetadataAsValue>(L)->getMetadata()) : nullptr;
@@ -2940,6 +3049,17 @@ void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) {
   unwrap(Builder)->SetInstDebugLocation(unwrap<Instruction>(Inst));
 }
 
+void LLVMBuilderSetDefaultFPMathTag(LLVMBuilderRef Builder,
+                                    LLVMMetadataRef FPMathTag) {
+
+  unwrap(Builder)->setDefaultFPMathTag(FPMathTag
+                                       ? unwrap<MDNode>(FPMathTag)
+                                       : nullptr);
+}
+
+LLVMMetadataRef LLVMBuilderGetDefaultFPMathTag(LLVMBuilderRef Builder) {
+  return wrap(unwrap(Builder)->getDefaultFPMathTag());
+}
 
 /*--.. Instruction builders ................................................--*/
 
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index fb81634a2868..2493c6cbe532 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -1,9 +1,8 @@
 //===--- DIBuilder.cpp - Debug Information Builder ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -167,8 +166,8 @@ createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope *Context,
   if (Line)
     assert(File && "Source location has line number but no file");
   unsigned EntitiesCount = C.pImpl->DIImportedEntitys.size();
-  auto *M =
-      DIImportedEntity::get(C, Tag, Context, DINodeRef(NS), File, Line, Name);
+  auto *M = DIImportedEntity::get(C, Tag, Context, cast_or_null<DINode>(NS),
+                                  File, Line, Name);
   if (EntitiesCount < C.pImpl->DIImportedEntitys.size())
     // A new Imported Entity was just added to the context.
     // Add it to the Imported Modules list.
@@ -806,6 +805,13 @@ DISubprogram *DIBuilder::createMethod(
   return SP;
 }
 
+DICommonBlock *DIBuilder::createCommonBlock(
+    DIScope *Scope, DIGlobalVariable *Decl, StringRef Name, DIFile *File,
+    unsigned LineNo) {
+  return DICommonBlock::get(
+      VMContext, Scope, Decl, Name, File, LineNo);
+}
+
 DINamespace *DIBuilder::createNameSpace(DIScope *Scope, StringRef Name,
                                         bool ExportSymbols) {
 
diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp
index 63c24b5ee7af..6e0ebbd4a730 100644
--- a/lib/IR/DataLayout.cpp
+++ b/lib/IR/DataLayout.cpp
@@ -1,9 +1,8 @@
 //===- DataLayout.cpp - Data size & alignment routines ---------------------==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -185,6 +184,8 @@ void DataLayout::reset(StringRef Desc) {
   AllocaAddrSpace = 0;
   StackNaturalAlign = 0;
   ProgramAddrSpace = 0;
+  FunctionPtrAlign = 0;
+  TheFunctionPtrAlignType = FunctionPtrAlignType::Independent;
   ManglingMode = MM_None;
   NonIntegralAddressSpaces.clear();
 
@@ -380,6 +381,22 @@ void DataLayout::parseSpecifier(StringRef Desc) {
       StackNaturalAlign = inBytes(getInt(Tok));
       break;
     }
+    case 'F': {
+      switch (Tok.front()) {
+      case 'i':
+        TheFunctionPtrAlignType = FunctionPtrAlignType::Independent;
+        break;
+      case 'n':
+        TheFunctionPtrAlignType = FunctionPtrAlignType::MultipleOfFunctionAlign;
+        break;
+      default:
+        report_fatal_error("Unknown function pointer alignment type in "
+                           "datalayout string");
+      }
+      Tok = Tok.substr(1);
+      FunctionPtrAlign = inBytes(getInt(Tok));
+      break;
+    }
     case 'P': { // Function address space.
       ProgramAddrSpace = getAddrSpace(Tok);
       break;
@@ -433,6 +450,8 @@ bool DataLayout::operator==(const DataLayout &Other) const {
              AllocaAddrSpace == Other.AllocaAddrSpace &&
              StackNaturalAlign == Other.StackNaturalAlign &&
              ProgramAddrSpace == Other.ProgramAddrSpace &&
+             FunctionPtrAlign == Other.FunctionPtrAlign &&
+             TheFunctionPtrAlignType == Other.TheFunctionPtrAlignType &&
              ManglingMode == Other.ManglingMode &&
              LegalIntWidths == Other.LegalIntWidths &&
              Alignments == Other.Alignments && Pointers == Other.Pointers;
@@ -444,12 +463,9 @@ DataLayout::AlignmentsTy::iterator
 DataLayout::findAlignmentLowerBound(AlignTypeEnum AlignType,
                                     uint32_t BitWidth) {
   auto Pair = std::make_pair((unsigned)AlignType, BitWidth);
-  return std::lower_bound(Alignments.begin(), Alignments.end(), Pair,
-                          [](const LayoutAlignElem &LHS,
-                             const std::pair<unsigned, uint32_t> &RHS) {
-                            return std::tie(LHS.AlignType, LHS.TypeBitWidth) <
-                                   std::tie(RHS.first, RHS.second);
-                          });
+  return partition_point(Alignments, [=](const LayoutAlignElem &E) {
+    return std::make_pair(E.AlignType, E.TypeBitWidth) < Pair;
+  });
 }
 
 void
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
index 9fa31773b598..ce47ef207434 100644
--- a/lib/IR/DebugInfo.cpp
+++ b/lib/IR/DebugInfo.cpp
@@ -1,9 +1,8 @@
 //===- DebugInfo.cpp - Debug Information Helper Classes -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -82,7 +81,7 @@ void DebugInfoFinder::processCompileUnit(DICompileUnit *CU) {
       continue;
     auto *GV = DIG->getVariable();
     processScope(GV->getScope());
-    processType(GV->getType().resolve());
+    processType(GV->getType());
   }
   for (auto *ET : CU->getEnumTypes())
     processType(ET);
@@ -92,7 +91,7 @@ void DebugInfoFinder::processCompileUnit(DICompileUnit *CU) {
     else
       processSubprogram(cast<DISubprogram>(RT));
   for (auto *Import : CU->getImportedEntities()) {
-    auto *Entity = Import->getEntity().resolve();
+    auto *Entity = Import->getEntity();
     if (auto *T = dyn_cast<DIType>(Entity))
       processType(T);
     else if (auto *SP = dyn_cast<DISubprogram>(Entity))
@@ -125,14 +124,14 @@ void DebugInfoFinder::processLocation(const Module &M, const DILocation *Loc) {
 void DebugInfoFinder::processType(DIType *DT) {
   if (!addType(DT))
     return;
-  processScope(DT->getScope().resolve());
+  processScope(DT->getScope());
   if (auto *ST = dyn_cast<DISubroutineType>(DT)) {
-    for (DITypeRef Ref : ST->getTypeArray())
-      processType(Ref.resolve());
+    for (DIType *Ref : ST->getTypeArray())
+      processType(Ref);
     return;
   }
   if (auto *DCT = dyn_cast<DICompositeType>(DT)) {
-    processType(DCT->getBaseType().resolve());
+    processType(DCT->getBaseType());
     for (Metadata *D : DCT->getElements()) {
       if (auto *T = dyn_cast<DIType>(D))
         processType(T);
@@ -142,7 +141,7 @@ void DebugInfoFinder::processType(DIType *DT) {
     return;
   }
   if (auto *DDT = dyn_cast<DIDerivedType>(DT)) {
-    processType(DDT->getBaseType().resolve());
+    processType(DDT->getBaseType());
   }
 }
 
@@ -175,7 +174,7 @@ void DebugInfoFinder::processScope(DIScope *Scope) {
 void DebugInfoFinder::processSubprogram(DISubprogram *SP) {
   if (!addSubprogram(SP))
     return;
-  processScope(SP->getScope().resolve());
+  processScope(SP->getScope());
   // Some of the users, e.g. CloneFunctionInto / CloneModule, need to set up a
   // ValueMap containing identity mappings for all of the DICompileUnit's, not
   // just DISubprogram's, referenced from anywhere within the Function being
@@ -188,9 +187,9 @@ void DebugInfoFinder::processSubprogram(DISubprogram *SP) {
   processType(SP->getType());
   for (auto *Element : SP->getTemplateParams()) {
     if (auto *TType = dyn_cast<DITemplateTypeParameter>(Element)) {
-      processType(TType->getType().resolve());
+      processType(TType->getType());
     } else if (auto *TVal = dyn_cast<DITemplateValueParameter>(Element)) {
-      processType(TVal->getType().resolve());
+      processType(TVal->getType());
     }
   }
 }
@@ -208,7 +207,7 @@ void DebugInfoFinder::processDeclare(const Module &M,
   if (!NodesSeen.insert(DV).second)
     return;
   processScope(DV->getScope());
-  processType(DV->getType().resolve());
+  processType(DV->getType());
 }
 
 void DebugInfoFinder::processValue(const Module &M, const DbgValueInst *DVI) {
@@ -223,7 +222,7 @@ void DebugInfoFinder::processValue(const Module &M, const DbgValueInst *DVI) {
   if (!NodesSeen.insert(DV).second)
     return;
   processScope(DV->getScope());
-  processType(DV->getType().resolve());
+  processType(DV->getType());
 }
 
 bool DebugInfoFinder::addType(DIType *DT) {
@@ -429,7 +428,8 @@ private:
     StringRef LinkageName = MDS->getName().empty() ? MDS->getLinkageName() : "";
     DISubprogram *Declaration = nullptr;
     auto *Type = cast_or_null<DISubroutineType>(map(MDS->getType()));
-    DITypeRef ContainingType(map(MDS->getContainingType()));
+    DIType *ContainingType =
+        cast_or_null<DIType>(map(MDS->getContainingType()));
     auto *Unit = cast_or_null<DICompileUnit>(map(MDS->getUnit()));
     auto Variables = nullptr;
     auto TemplateParams = nullptr;
@@ -900,6 +900,43 @@ LLVMMetadataRef LLVMDILocationGetScope(LLVMMetadataRef Location) {
   return wrap(unwrapDI<DILocation>(Location)->getScope());
 }
 
+LLVMMetadataRef LLVMDILocationGetInlinedAt(LLVMMetadataRef Location) {
+  return wrap(unwrapDI<DILocation>(Location)->getInlinedAt());
+}
+
+LLVMMetadataRef LLVMDIScopeGetFile(LLVMMetadataRef Scope) {
+  return wrap(unwrapDI<DIScope>(Scope)->getFile());
+}
+
+const char *LLVMDIFileGetDirectory(LLVMMetadataRef File, unsigned *Len) {
+  auto Dir = unwrapDI<DIFile>(File)->getDirectory();
+  *Len = Dir.size();
+  return Dir.data();
+}
+
+const char *LLVMDIFileGetFilename(LLVMMetadataRef File, unsigned *Len) {
+  auto Name = unwrapDI<DIFile>(File)->getFilename();
+  *Len = Name.size();
+  return Name.data();
+}
+
+const char *LLVMDIFileGetSource(LLVMMetadataRef File, unsigned *Len) {
+  if (auto Src = unwrapDI<DIFile>(File)->getSource()) {
+    *Len = Src->size();
+    return Src->data();
+  }
+  *Len = 0;
+  return "";
+}
+
+LLVMMetadataRef LLVMDIBuilderCreateEnumerator(LLVMDIBuilderRef Builder,
+                                              const char *Name, size_t NameLen,
+                                              int64_t Value,
+                                              LLVMBool IsUnsigned) {
+  return wrap(unwrap(Builder)->createEnumerator({Name, NameLen}, Value,
+                                                IsUnsigned != 0));
+}
+
 LLVMMetadataRef LLVMDIBuilderCreateEnumerationType(
   LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name,
   size_t NameLen, LLVMMetadataRef File, unsigned LineNumber,
@@ -1237,6 +1274,27 @@ LLVMMetadataRef LLVMDIBuilderCreateGlobalVariableExpression(
       nullptr, AlignInBits));
 }
 
+LLVMMetadataRef LLVMDIGlobalVariableExpressionGetVariable(LLVMMetadataRef GVE) {
+  return wrap(unwrapDI<DIGlobalVariableExpression>(GVE)->getVariable());
+}
+
+LLVMMetadataRef LLVMDIGlobalVariableExpressionGetExpression(
+    LLVMMetadataRef GVE) {
+  return wrap(unwrapDI<DIGlobalVariableExpression>(GVE)->getExpression());
+}
+
+LLVMMetadataRef LLVMDIVariableGetFile(LLVMMetadataRef Var) {
+  return wrap(unwrapDI<DIVariable>(Var)->getFile());
+}
+
+LLVMMetadataRef LLVMDIVariableGetScope(LLVMMetadataRef Var) {
+  return wrap(unwrapDI<DIVariable>(Var)->getScope());
+}
+
+unsigned LLVMDIVariableGetLine(LLVMMetadataRef Var) {
+  return unwrapDI<DIVariable>(Var)->getLine();
+}
+
 LLVMMetadataRef LLVMTemporaryMDNode(LLVMContextRef Ctx, LLVMMetadataRef *Data,
                                     size_t Count) {
   return wrap(
@@ -1348,6 +1406,21 @@ void LLVMSetSubprogram(LLVMValueRef Func, LLVMMetadataRef SP) {
   unwrap<Function>(Func)->setSubprogram(unwrap<DISubprogram>(SP));
 }
 
+unsigned LLVMDISubprogramGetLine(LLVMMetadataRef Subprogram) {
+  return unwrapDI<DISubprogram>(Subprogram)->getLine();
+}
+
+LLVMMetadataRef LLVMInstructionGetDebugLoc(LLVMValueRef Inst) {
+  return wrap(unwrap<Instruction>(Inst)->getDebugLoc().getAsMDNode());
+}
+
+void LLVMInstructionSetDebugLoc(LLVMValueRef Inst, LLVMMetadataRef Loc) {
+  if (Loc)
+    unwrap<Instruction>(Inst)->setDebugLoc(DebugLoc(unwrap<MDNode>(Loc)));
+  else
+    unwrap<Instruction>(Inst)->setDebugLoc(DebugLoc());
+}
+
 LLVMMetadataKind LLVMGetMetadataKind(LLVMMetadataRef Metadata) {
   switch(unwrap(Metadata)->getMetadataID()) {
 #define HANDLE_METADATA_LEAF(CLASS) \
diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp
index 92f3f21f754c..900df27d1d33 100644
--- a/lib/IR/DebugInfoMetadata.cpp
+++ b/lib/IR/DebugInfoMetadata.cpp
@@ -1,9 +1,8 @@
 //===- DebugInfoMetadata.cpp - Implement debug info metadata --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -89,7 +88,7 @@ const DILocation *DILocation::getMergedLocation(const DILocation *LocA,
   DILocation *L = LocA->getInlinedAt();
   while (S) {
     Locations.insert(std::make_pair(S, L));
-    S = S->getScope().resolve();
+    S = S->getScope();
     if (!S && L) {
       S = L->getScope();
       L = L->getInlinedAt();
@@ -101,7 +100,7 @@ const DILocation *DILocation::getMergedLocation(const DILocation *LocA,
   while (S) {
     if (Locations.count(std::make_pair(S, L)))
       break;
-    S = S->getScope().resolve();
+    S = S->getScope();
     if (!S && L) {
       S = L->getScope();
       L = L->getInlinedAt();
@@ -210,7 +209,7 @@ DINode::DIFlags DINode::splitFlags(DIFlags Flags,
   return Flags;
 }
 
-DIScopeRef DIScope::getScope() const {
+DIScope *DIScope::getScope() const {
   if (auto *T = dyn_cast<DIType>(this))
     return T->getScope();
 
@@ -223,6 +222,9 @@ DIScopeRef DIScope::getScope() const {
   if (auto *NS = dyn_cast<DINamespace>(this))
     return NS->getScope();
 
+  if (auto *CB = dyn_cast<DICommonBlock>(this))
+    return CB->getScope();
+
   if (auto *M = dyn_cast<DIModule>(this))
     return M->getScope();
 
@@ -238,6 +240,8 @@ StringRef DIScope::getName() const {
     return SP->getName();
   if (auto *NS = dyn_cast<DINamespace>(this))
     return NS->getName();
+  if (auto *CB = dyn_cast<DICommonBlock>(this))
+    return CB->getName();
   if (auto *M = dyn_cast<DIModule>(this))
     return M->getName();
   assert((isa<DILexicalBlockBase>(this) || isa<DIFile>(this) ||
@@ -695,6 +699,17 @@ DINamespace *DINamespace::getImpl(LLVMContext &Context, Metadata *Scope,
   DEFINE_GETIMPL_STORE(DINamespace, (ExportSymbols), Ops);
 }
 
+DICommonBlock *DICommonBlock::getImpl(LLVMContext &Context, Metadata *Scope,
+                                      Metadata *Decl, MDString *Name,
+                                      Metadata *File, unsigned LineNo,
+                                      StorageType Storage, bool ShouldCreate) {
+  assert(isCanonical(Name) && "Expected canonical MDString");
+  DEFINE_GETIMPL_LOOKUP(DICommonBlock, (Scope, Decl, Name, File, LineNo));
+  // The nullptr is for DIScope's File operand. This should be refactored.
+  Metadata *Ops[] = {Scope, Decl, Name, File};
+  DEFINE_GETIMPL_STORE(DICommonBlock, (LineNo), Ops);
+}
+
 DIModule *DIModule::getImpl(LLVMContext &Context, Metadata *Scope,
                             MDString *Name, MDString *ConfigurationMacros,
                             MDString *IncludePath, MDString *ISysRoot,
@@ -814,10 +829,14 @@ DIExpression *DIExpression::getImpl(LLVMContext &Context,
 
 unsigned DIExpression::ExprOperand::getSize() const {
   switch (getOp()) {
+  case dwarf::DW_OP_LLVM_convert:
   case dwarf::DW_OP_LLVM_fragment:
     return 3;
   case dwarf::DW_OP_constu:
+  case dwarf::DW_OP_deref_size:
   case dwarf::DW_OP_plus_uconst:
+  case dwarf::DW_OP_LLVM_tag_offset:
+  case dwarf::DW_OP_entry_value:
     return 2;
   default:
     return 1;
@@ -858,6 +877,15 @@ bool DIExpression::isValid() const {
         return false;
       break;
     }
+    case dwarf::DW_OP_entry_value: {
+      // An entry value operator must appear at the begin and the size
+      // of following expression should be 1, because we support only
+      // entry values of a simple register location.
+      return I->get() == expr_op_begin()->get() && I->getArg(0) == 1 &&
+             getNumElements() == 2;
+    }
+    case dwarf::DW_OP_LLVM_convert:
+    case dwarf::DW_OP_LLVM_tag_offset:
     case dwarf::DW_OP_constu:
     case dwarf::DW_OP_plus_uconst:
     case dwarf::DW_OP_plus:
@@ -872,6 +900,7 @@ bool DIExpression::isValid() const {
     case dwarf::DW_OP_shr:
     case dwarf::DW_OP_shra:
     case dwarf::DW_OP_deref:
+    case dwarf::DW_OP_deref_size:
     case dwarf::DW_OP_xderef:
     case dwarf::DW_OP_lit0:
     case dwarf::DW_OP_not:
@@ -882,6 +911,42 @@ bool DIExpression::isValid() const {
   return true;
 }
 
+bool DIExpression::isImplicit() const {
+  unsigned N = getNumElements();
+  if (isValid() && N > 0) {
+    switch (getElement(N-1)) {
+      case dwarf::DW_OP_stack_value:
+      case dwarf::DW_OP_LLVM_tag_offset:
+        return true;
+      case dwarf::DW_OP_LLVM_fragment:
+        return N > 1 && getElement(N-2) == dwarf::DW_OP_stack_value;
+      default: break;
+    }
+  }
+  return false;
+}
+
+bool DIExpression::isComplex() const {
+  if (!isValid())
+    return false;
+
+  if (getNumElements() == 0)
+    return false;
+
+  // If there are any elements other than fragment or tag_offset, then some
+  // kind of complex computation occurs.
+  for (const auto &It : expr_ops()) {
+    switch (It.getOp()) {
+      case dwarf::DW_OP_LLVM_tag_offset:
+      case dwarf::DW_OP_LLVM_fragment:
+        continue;
+      default: return true;
+    }
+  }
+
+  return false;
+}
+
 Optional<DIExpression::FragmentInfo>
 DIExpression::getFragmentInfo(expr_op_iterator Start, expr_op_iterator End) {
   for (auto I = Start; I != End; ++I)
@@ -929,25 +994,53 @@ bool DIExpression::extractIfOffset(int64_t &Offset) const {
   return false;
 }
 
-DIExpression *DIExpression::prepend(const DIExpression *Expr, bool DerefBefore,
-                                    int64_t Offset, bool DerefAfter,
-                                    bool StackValue) {
+const DIExpression *DIExpression::extractAddressClass(const DIExpression *Expr,
+                                                      unsigned &AddrClass) {
+  const unsigned PatternSize = 4;
+  if (Expr->Elements.size() >= PatternSize &&
+      Expr->Elements[PatternSize - 4] == dwarf::DW_OP_constu &&
+      Expr->Elements[PatternSize - 2] == dwarf::DW_OP_swap &&
+      Expr->Elements[PatternSize - 1] == dwarf::DW_OP_xderef) {
+    AddrClass = Expr->Elements[PatternSize - 3];
+
+    if (Expr->Elements.size() == PatternSize)
+      return nullptr;
+    return DIExpression::get(Expr->getContext(),
+                             makeArrayRef(&*Expr->Elements.begin(),
+                                          Expr->Elements.size() - PatternSize));
+  }
+  return Expr;
+}
+
+DIExpression *DIExpression::prepend(const DIExpression *Expr, uint8_t Flags,
+                                    int64_t Offset) {
   SmallVector<uint64_t, 8> Ops;
-  if (DerefBefore)
+  if (Flags & DIExpression::DerefBefore)
     Ops.push_back(dwarf::DW_OP_deref);
 
   appendOffset(Ops, Offset);
-  if (DerefAfter)
+  if (Flags & DIExpression::DerefAfter)
     Ops.push_back(dwarf::DW_OP_deref);
 
-  return prependOpcodes(Expr, Ops, StackValue);
+  bool StackValue = Flags & DIExpression::StackValue;
+  bool EntryValue = Flags & DIExpression::EntryValue;
+
+  return prependOpcodes(Expr, Ops, StackValue, EntryValue);
 }
 
 DIExpression *DIExpression::prependOpcodes(const DIExpression *Expr,
                                            SmallVectorImpl<uint64_t> &Ops,
-                                           bool StackValue) {
+                                           bool StackValue,
+                                           bool EntryValue) {
   assert(Expr && "Can't prepend ops to this expression");
 
+  if (EntryValue) {
+    Ops.push_back(dwarf::DW_OP_entry_value);
+    // Add size info needed for entry value expression.
+    // Add plus one for target register operand.
+    Ops.push_back(Expr->getNumElements() + 1);
+  }
+
   // If there are no ops to prepend, do not even add the DW_OP_stack_value.
   if (Ops.empty())
     StackValue = false;
diff --git a/lib/IR/DebugLoc.cpp b/lib/IR/DebugLoc.cpp
index 10ec98ac7e6c..14d1396f1543 100644
--- a/lib/IR/DebugLoc.cpp
+++ b/lib/IR/DebugLoc.cpp
@@ -1,9 +1,8 @@
 //===-- DebugLoc.cpp - Implement DebugLoc class ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/IR/DiagnosticHandler.cpp b/lib/IR/DiagnosticHandler.cpp
index 8f972785cf91..2fe634803894 100644
--- a/lib/IR/DiagnosticHandler.cpp
+++ b/lib/IR/DiagnosticHandler.cpp
@@ -1,9 +1,8 @@
 //===- DiagnosticHandler.h - DiagnosticHandler class for LLVM -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp
index dc957ab7dad9..4a8e3cca3493 100644
--- a/lib/IR/DiagnosticInfo.cpp
+++ b/lib/IR/DiagnosticInfo.cpp
@@ -1,9 +1,8 @@
 //===- llvm/Support/DiagnosticInfo.cpp - Diagnostic Definitions -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -373,83 +372,3 @@ std::string DiagnosticInfoOptimizationBase::getMsg() const {
 
 void OptimizationRemarkAnalysisFPCommute::anchor() {}
 void OptimizationRemarkAnalysisAliasing::anchor() {}
-
-namespace llvm {
-namespace yaml {
-
-void MappingTraits<DiagnosticInfoOptimizationBase *>::mapping(
-    IO &io, DiagnosticInfoOptimizationBase *&OptDiag) {
-  assert(io.outputting() && "input not yet implemented");
-
-  if (io.mapTag("!Passed",
-                (OptDiag->getKind() == DK_OptimizationRemark ||
-                 OptDiag->getKind() == DK_MachineOptimizationRemark)))
-    ;
-  else if (io.mapTag(
-               "!Missed",
-               (OptDiag->getKind() == DK_OptimizationRemarkMissed ||
-                OptDiag->getKind() == DK_MachineOptimizationRemarkMissed)))
-    ;
-  else if (io.mapTag(
-               "!Analysis",
-               (OptDiag->getKind() == DK_OptimizationRemarkAnalysis ||
-                OptDiag->getKind() == DK_MachineOptimizationRemarkAnalysis)))
-    ;
-  else if (io.mapTag("!AnalysisFPCommute",
-                     OptDiag->getKind() ==
-                         DK_OptimizationRemarkAnalysisFPCommute))
-    ;
-  else if (io.mapTag("!AnalysisAliasing",
-                     OptDiag->getKind() ==
-                         DK_OptimizationRemarkAnalysisAliasing))
-    ;
-  else if (io.mapTag("!Failure", OptDiag->getKind() == DK_OptimizationFailure))
-    ;
-  else
-    llvm_unreachable("Unknown remark type");
-
-  // These are read-only for now.
-  DiagnosticLocation DL = OptDiag->getLocation();
-  StringRef FN =
-      GlobalValue::dropLLVMManglingEscape(OptDiag->getFunction().getName());
-
-  StringRef PassName(OptDiag->PassName);
-  io.mapRequired("Pass", PassName);
-  io.mapRequired("Name", OptDiag->RemarkName);
-  if (!io.outputting() || DL.isValid())
-    io.mapOptional("DebugLoc", DL);
-  io.mapRequired("Function", FN);
-  io.mapOptional("Hotness", OptDiag->Hotness);
-  io.mapOptional("Args", OptDiag->Args);
-}
-
-template <> struct MappingTraits<DiagnosticLocation> {
-  static void mapping(IO &io, DiagnosticLocation &DL) {
-    assert(io.outputting() && "input not yet implemented");
-
-    StringRef File = DL.getRelativePath();
-    unsigned Line = DL.getLine();
-    unsigned Col = DL.getColumn();
-
-    io.mapRequired("File", File);
-    io.mapRequired("Line", Line);
-    io.mapRequired("Column", Col);
-  }
-
-  static const bool flow = true;
-};
-
-// Implement this as a mapping for now to get proper quotation for the value.
-template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> {
-  static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) {
-    assert(io.outputting() && "input not yet implemented");
-    io.mapRequired(A.Key.data(), A.Val);
-    if (A.Loc.isValid())
-      io.mapOptional("DebugLoc", A.Loc);
-  }
-};
-
-} // end namespace yaml
-} // end namespace llvm
-
-LLVM_YAML_IS_SEQUENCE_VECTOR(DiagnosticInfoOptimizationBase::Argument)
diff --git a/lib/IR/DiagnosticPrinter.cpp b/lib/IR/DiagnosticPrinter.cpp
index ee2df9e24f93..496bd18e78e2 100644
--- a/lib/IR/DiagnosticPrinter.cpp
+++ b/lib/IR/DiagnosticPrinter.cpp
@@ -1,9 +1,8 @@
 //===- llvm/Support/DiagnosticInfo.cpp - Diagnostic Definitions -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/IR/DomTreeUpdater.cpp b/lib/IR/DomTreeUpdater.cpp
deleted file mode 100644
index b72c1b77c2ce..000000000000
--- a/lib/IR/DomTreeUpdater.cpp
+++ /dev/null
@@ -1,529 +0,0 @@
-//===- DomTreeUpdater.cpp - DomTree/Post DomTree Updater --------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the DomTreeUpdater class, which provides a uniform way
-// to update dominator tree related data structures.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/IR/DomTreeUpdater.h"
-#include "llvm/Analysis/PostDominators.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/Support/GenericDomTree.h"
-#include <algorithm>
-#include <functional>
-
-namespace llvm {
-
-bool DomTreeUpdater::isUpdateValid(
-    const DominatorTree::UpdateType Update) const {
-  const auto *From = Update.getFrom();
-  const auto *To = Update.getTo();
-  const auto Kind = Update.getKind();
-
-  // Discard updates by inspecting the current state of successors of From.
-  // Since isUpdateValid() must be called *after* the Terminator of From is
-  // altered we can determine if the update is unnecessary for batch updates
-  // or invalid for a single update.
-  const bool HasEdge = llvm::any_of(
-      successors(From), [To](const BasicBlock *B) { return B == To; });
-
-  // If the IR does not match the update,
-  // 1. In batch updates, this update is unnecessary.
-  // 2. When called by insertEdge*()/deleteEdge*(), this update is invalid.
-  // Edge does not exist in IR.
-  if (Kind == DominatorTree::Insert && !HasEdge)
-    return false;
-
-  // Edge exists in IR.
-  if (Kind == DominatorTree::Delete && HasEdge)
-    return false;
-
-  return true;
-}
-
-bool DomTreeUpdater::isSelfDominance(
-    const DominatorTree::UpdateType Update) const {
-  // Won't affect DomTree and PostDomTree.
-  return Update.getFrom() == Update.getTo();
-}
-
-bool DomTreeUpdater::applyLazyUpdate(DominatorTree::UpdateKind Kind,
-                                     BasicBlock *From, BasicBlock *To) {
-  assert((DT || PDT) &&
-         "Call applyLazyUpdate() when both DT and PDT are nullptrs.");
-  assert(Strategy == DomTreeUpdater::UpdateStrategy::Lazy &&
-         "Call applyLazyUpdate() with Eager strategy error");
-  // Analyze pending updates to determine if the update is unnecessary.
-  const DominatorTree::UpdateType Update = {Kind, From, To};
-  const DominatorTree::UpdateType Invert = {Kind != DominatorTree::Insert
-                                                ? DominatorTree::Insert
-                                                : DominatorTree::Delete,
-                                            From, To};
-  // Only check duplicates in updates that are not applied by both trees.
-  auto I =
-      PendUpdates.begin() + std::max(PendDTUpdateIndex, PendPDTUpdateIndex);
-  const auto E = PendUpdates.end();
-
-  assert(I <= E && "Iterator out of range.");
-
-  for (; I != E; ++I) {
-    if (Update == *I)
-      return false; // Discard duplicate updates.
-
-    if (Invert == *I) {
-      // Update and Invert are both valid (equivalent to a no-op). Remove
-      // Invert from PendUpdates and discard the Update.
-      PendUpdates.erase(I);
-      return false;
-    }
-  }
-
-  PendUpdates.push_back(Update); // Save the valid update.
-  return true;
-}
-
-void DomTreeUpdater::applyDomTreeUpdates() {
-  // No pending DomTreeUpdates.
-  if (Strategy != UpdateStrategy::Lazy || !DT)
-    return;
-
-  // Only apply updates not are applied by DomTree.
-  if (hasPendingDomTreeUpdates()) {
-    const auto I = PendUpdates.begin() + PendDTUpdateIndex;
-    const auto E = PendUpdates.end();
-    assert(I < E && "Iterator range invalid; there should be DomTree updates.");
-    DT->applyUpdates(ArrayRef<DominatorTree::UpdateType>(I, E));
-    PendDTUpdateIndex = PendUpdates.size();
-  }
-}
-
-void DomTreeUpdater::flush() {
-  applyDomTreeUpdates();
-  applyPostDomTreeUpdates();
-  dropOutOfDateUpdates();
-}
-
-void DomTreeUpdater::applyPostDomTreeUpdates() {
-  // No pending PostDomTreeUpdates.
-  if (Strategy != UpdateStrategy::Lazy || !PDT)
-    return;
-
-  // Only apply updates not are applied by PostDomTree.
-  if (hasPendingPostDomTreeUpdates()) {
-    const auto I = PendUpdates.begin() + PendPDTUpdateIndex;
-    const auto E = PendUpdates.end();
-    assert(I < E &&
-           "Iterator range invalid; there should be PostDomTree updates.");
-    PDT->applyUpdates(ArrayRef<DominatorTree::UpdateType>(I, E));
-    PendPDTUpdateIndex = PendUpdates.size();
-  }
-}
-
-void DomTreeUpdater::tryFlushDeletedBB() {
-  if (!hasPendingUpdates())
-    forceFlushDeletedBB();
-}
-
-bool DomTreeUpdater::forceFlushDeletedBB() {
-  if (DeletedBBs.empty())
-    return false;
-
-  for (auto *BB : DeletedBBs) {
-    // After calling deleteBB or callbackDeleteBB under Lazy UpdateStrategy,
-    // validateDeleteBB() removes all instructions of DelBB and adds an
-    // UnreachableInst as its terminator. So we check whether the BasicBlock to
-    // delete only has an UnreachableInst inside.
-    assert(BB->getInstList().size() == 1 &&
-           isa<UnreachableInst>(BB->getTerminator()) &&
-           "DelBB has been modified while awaiting deletion.");
-    BB->removeFromParent();
-    eraseDelBBNode(BB);
-    delete BB;
-  }
-  DeletedBBs.clear();
-  Callbacks.clear();
-  return true;
-}
-
-void DomTreeUpdater::recalculate(Function &F) {
-
-  if (Strategy == UpdateStrategy::Eager) {
-    if (DT)
-      DT->recalculate(F);
-    if (PDT)
-      PDT->recalculate(F);
-    return;
-  }
-
-  // There is little performance gain if we pend the recalculation under
-  // Lazy UpdateStrategy so we recalculate available trees immediately.
-
-  // Prevent forceFlushDeletedBB() from erasing DomTree or PostDomTree nodes.
-  IsRecalculatingDomTree = IsRecalculatingPostDomTree = true;
-
-  // Because all trees are going to be up-to-date after recalculation,
-  // flush awaiting deleted BasicBlocks.
-  forceFlushDeletedBB();
-  if (DT)
-    DT->recalculate(F);
-  if (PDT)
-    PDT->recalculate(F);
-
-  // Resume forceFlushDeletedBB() to erase DomTree or PostDomTree nodes.
-  IsRecalculatingDomTree = IsRecalculatingPostDomTree = false;
-  PendDTUpdateIndex = PendPDTUpdateIndex = PendUpdates.size();
-  dropOutOfDateUpdates();
-}
-
-bool DomTreeUpdater::hasPendingUpdates() const {
-  return hasPendingDomTreeUpdates() || hasPendingPostDomTreeUpdates();
-}
-
-bool DomTreeUpdater::hasPendingDomTreeUpdates() const {
-  if (!DT)
-    return false;
-  return PendUpdates.size() != PendDTUpdateIndex;
-}
-
-bool DomTreeUpdater::hasPendingPostDomTreeUpdates() const {
-  if (!PDT)
-    return false;
-  return PendUpdates.size() != PendPDTUpdateIndex;
-}
-
-bool DomTreeUpdater::isBBPendingDeletion(llvm::BasicBlock *DelBB) const {
-  if (Strategy == UpdateStrategy::Eager || DeletedBBs.empty())
-    return false;
-  return DeletedBBs.count(DelBB) != 0;
-}
-
-// The DT and PDT require the nodes related to updates
-// are not deleted when update functions are called.
-// So BasicBlock deletions must be pended when the
-// UpdateStrategy is Lazy. When the UpdateStrategy is
-// Eager, the BasicBlock will be deleted immediately.
-void DomTreeUpdater::deleteBB(BasicBlock *DelBB) {
-  validateDeleteBB(DelBB);
-  if (Strategy == UpdateStrategy::Lazy) {
-    DeletedBBs.insert(DelBB);
-    return;
-  }
-
-  DelBB->removeFromParent();
-  eraseDelBBNode(DelBB);
-  delete DelBB;
-}
-
-void DomTreeUpdater::callbackDeleteBB(
-    BasicBlock *DelBB, std::function<void(BasicBlock *)> Callback) {
-  validateDeleteBB(DelBB);
-  if (Strategy == UpdateStrategy::Lazy) {
-    Callbacks.push_back(CallBackOnDeletion(DelBB, Callback));
-    DeletedBBs.insert(DelBB);
-    return;
-  }
-
-  DelBB->removeFromParent();
-  eraseDelBBNode(DelBB);
-  Callback(DelBB);
-  delete DelBB;
-}
-
-void DomTreeUpdater::eraseDelBBNode(BasicBlock *DelBB) {
-  if (DT && !IsRecalculatingDomTree)
-    if (DT->getNode(DelBB))
-      DT->eraseNode(DelBB);
-
-  if (PDT && !IsRecalculatingPostDomTree)
-    if (PDT->getNode(DelBB))
-      PDT->eraseNode(DelBB);
-}
-
-void DomTreeUpdater::validateDeleteBB(BasicBlock *DelBB) {
-  assert(DelBB && "Invalid push_back of nullptr DelBB.");
-  assert(pred_empty(DelBB) && "DelBB has one or more predecessors.");
-  // DelBB is unreachable and all its instructions are dead.
-  while (!DelBB->empty()) {
-    Instruction &I = DelBB->back();
-    // Replace used instructions with an arbitrary value (undef).
-    if (!I.use_empty())
-      I.replaceAllUsesWith(llvm::UndefValue::get(I.getType()));
-    DelBB->getInstList().pop_back();
-  }
-  // Make sure DelBB has a valid terminator instruction. As long as DelBB is a
-  // Child of Function F it must contain valid IR.
-  new UnreachableInst(DelBB->getContext(), DelBB);
-}
-
-void DomTreeUpdater::applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates,
-                                  bool ForceRemoveDuplicates) {
-  if (!DT && !PDT)
-    return;
-
-  if (Strategy == UpdateStrategy::Lazy || ForceRemoveDuplicates) {
-    SmallVector<DominatorTree::UpdateType, 8> Seen;
-    for (const auto U : Updates)
-      // For Lazy UpdateStrategy, avoid duplicates to applyLazyUpdate() to save
-      // on analysis.
-      if (llvm::none_of(
-              Seen,
-              [U](const DominatorTree::UpdateType S) { return S == U; }) &&
-          isUpdateValid(U) && !isSelfDominance(U)) {
-        Seen.push_back(U);
-        if (Strategy == UpdateStrategy::Lazy)
-          applyLazyUpdate(U.getKind(), U.getFrom(), U.getTo());
-      }
-    if (Strategy == UpdateStrategy::Lazy)
-      return;
-
-    if (DT)
-      DT->applyUpdates(Seen);
-    if (PDT)
-      PDT->applyUpdates(Seen);
-    return;
-  }
-
-  if (DT)
-    DT->applyUpdates(Updates);
-  if (PDT)
-    PDT->applyUpdates(Updates);
-}
-
-DominatorTree &DomTreeUpdater::getDomTree() {
-  assert(DT && "Invalid acquisition of a null DomTree");
-  applyDomTreeUpdates();
-  dropOutOfDateUpdates();
-  return *DT;
-}
-
-PostDominatorTree &DomTreeUpdater::getPostDomTree() {
-  assert(PDT && "Invalid acquisition of a null PostDomTree");
-  applyPostDomTreeUpdates();
-  dropOutOfDateUpdates();
-  return *PDT;
-}
-
-void DomTreeUpdater::insertEdge(BasicBlock *From, BasicBlock *To) {
-
-#ifndef NDEBUG
-  assert(isUpdateValid({DominatorTree::Insert, From, To}) &&
-         "Inserted edge does not appear in the CFG");
-#endif
-
-  if (!DT && !PDT)
-    return;
-
-  // Won't affect DomTree and PostDomTree; discard update.
-  if (From == To)
-    return;
-
-  if (Strategy == UpdateStrategy::Eager) {
-    if (DT)
-      DT->insertEdge(From, To);
-    if (PDT)
-      PDT->insertEdge(From, To);
-    return;
-  }
-
-  applyLazyUpdate(DominatorTree::Insert, From, To);
-}
-
-void DomTreeUpdater::insertEdgeRelaxed(BasicBlock *From, BasicBlock *To) {
-  if (From == To)
-    return;
-
-  if (!DT && !PDT)
-    return;
-
-  if (!isUpdateValid({DominatorTree::Insert, From, To}))
-    return;
-
-  if (Strategy == UpdateStrategy::Eager) {
-    if (DT)
-      DT->insertEdge(From, To);
-    if (PDT)
-      PDT->insertEdge(From, To);
-    return;
-  }
-
-  applyLazyUpdate(DominatorTree::Insert, From, To);
-}
-
-void DomTreeUpdater::deleteEdge(BasicBlock *From, BasicBlock *To) {
-
-#ifndef NDEBUG
-  assert(isUpdateValid({DominatorTree::Delete, From, To}) &&
-         "Deleted edge still exists in the CFG!");
-#endif
-
-  if (!DT && !PDT)
-    return;
-
-  // Won't affect DomTree and PostDomTree; discard update.
-  if (From == To)
-    return;
-
-  if (Strategy == UpdateStrategy::Eager) {
-    if (DT)
-      DT->deleteEdge(From, To);
-    if (PDT)
-      PDT->deleteEdge(From, To);
-    return;
-  }
-
-  applyLazyUpdate(DominatorTree::Delete, From, To);
-}
-
-void DomTreeUpdater::deleteEdgeRelaxed(BasicBlock *From, BasicBlock *To) {
-  if (From == To)
-    return;
-
-  if (!DT && !PDT)
-    return;
-
-  if (!isUpdateValid({DominatorTree::Delete, From, To}))
-    return;
-
-  if (Strategy == UpdateStrategy::Eager) {
-    if (DT)
-      DT->deleteEdge(From, To);
-    if (PDT)
-      PDT->deleteEdge(From, To);
-    return;
-  }
-
-  applyLazyUpdate(DominatorTree::Delete, From, To);
-}
-
-void DomTreeUpdater::dropOutOfDateUpdates() {
-  if (Strategy == DomTreeUpdater::UpdateStrategy::Eager)
-    return;
-
-  tryFlushDeletedBB();
-
-  // Drop all updates applied by both trees.
-  if (!DT)
-    PendDTUpdateIndex = PendUpdates.size();
-  if (!PDT)
-    PendPDTUpdateIndex = PendUpdates.size();
-
-  const size_t dropIndex = std::min(PendDTUpdateIndex, PendPDTUpdateIndex);
-  const auto B = PendUpdates.begin();
-  const auto E = PendUpdates.begin() + dropIndex;
-  assert(B <= E && "Iterator out of range.");
-  PendUpdates.erase(B, E);
-  // Calculate current index.
-  PendDTUpdateIndex -= dropIndex;
-  PendPDTUpdateIndex -= dropIndex;
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void DomTreeUpdater::dump() const {
-  raw_ostream &OS = llvm::dbgs();
-
-  OS << "Available Trees: ";
-  if (DT || PDT) {
-    if (DT)
-      OS << "DomTree ";
-    if (PDT)
-      OS << "PostDomTree ";
-    OS << "\n";
-  } else
-    OS << "None\n";
-
-  OS << "UpdateStrategy: ";
-  if (Strategy == UpdateStrategy::Eager) {
-    OS << "Eager\n";
-    return;
-  } else
-    OS << "Lazy\n";
-  int Index = 0;
-
-  auto printUpdates =
-      [&](ArrayRef<DominatorTree::UpdateType>::const_iterator begin,
-          ArrayRef<DominatorTree::UpdateType>::const_iterator end) {
-        if (begin == end)
-          OS << "  None\n";
-        Index = 0;
-        for (auto It = begin, ItEnd = end; It != ItEnd; ++It) {
-          auto U = *It;
-          OS << "  " << Index << " : ";
-          ++Index;
-          if (U.getKind() == DominatorTree::Insert)
-            OS << "Insert, ";
-          else
-            OS << "Delete, ";
-          BasicBlock *From = U.getFrom();
-          if (From) {
-            auto S = From->getName();
-            if (!From->hasName())
-              S = "(no name)";
-            OS << S << "(" << From << "), ";
-          } else {
-            OS << "(badref), ";
-          }
-          BasicBlock *To = U.getTo();
-          if (To) {
-            auto S = To->getName();
-            if (!To->hasName())
-              S = "(no_name)";
-            OS << S << "(" << To << ")\n";
-          } else {
-            OS << "(badref)\n";
-          }
-        }
-      };
-
-  if (DT) {
-    const auto I = PendUpdates.begin() + PendDTUpdateIndex;
-    assert(PendUpdates.begin() <= I && I <= PendUpdates.end() &&
-           "Iterator out of range.");
-    OS << "Applied but not cleared DomTreeUpdates:\n";
-    printUpdates(PendUpdates.begin(), I);
-    OS << "Pending DomTreeUpdates:\n";
-    printUpdates(I, PendUpdates.end());
-  }
-
-  if (PDT) {
-    const auto I = PendUpdates.begin() + PendPDTUpdateIndex;
-    assert(PendUpdates.begin() <= I && I <= PendUpdates.end() &&
-           "Iterator out of range.");
-    OS << "Applied but not cleared PostDomTreeUpdates:\n";
-    printUpdates(PendUpdates.begin(), I);
-    OS << "Pending PostDomTreeUpdates:\n";
-    printUpdates(I, PendUpdates.end());
-  }
-
-  OS << "Pending DeletedBBs:\n";
-  Index = 0;
-  for (auto BB : DeletedBBs) {
-    OS << "  " << Index << " : ";
-    ++Index;
-    if (BB->hasName())
-      OS << BB->getName() << "(";
-    else
-      OS << "(no_name)(";
-    OS << BB << ")\n";
-  }
-
-  OS << "Pending Callbacks:\n";
-  Index = 0;
-  for (auto BB : Callbacks) {
-    OS << "  " << Index << " : ";
-    ++Index;
-    if (BB->hasName())
-      OS << BB->getName() << "(";
-    else
-      OS << "(no_name)(";
-    OS << BB << ")\n";
-  }
-}
-#endif
-} // namespace llvm
diff --git a/lib/IR/Dominators.cpp b/lib/IR/Dominators.cpp
index cf9f5759ba53..910a41050b94 100644
--- a/lib/IR/Dominators.cpp
+++ b/lib/IR/Dominators.cpp
@@ -1,9 +1,8 @@
 //===- Dominators.cpp - Dominator Calculation -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index a88478b89bfc..dc28d22548dd 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -1,9 +1,8 @@
 //===- Function.cpp - Implement the Global object classes -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -114,6 +113,11 @@ unsigned Argument::getParamAlignment() const {
   return getParent()->getParamAlignment(getArgNo());
 }
 
+Type *Argument::getParamByValType() const {
+  assert(getType()->isPointerTy() && "Only pointers have byval types");
+  return getParent()->getParamByValType(getArgNo());
+}
+
 uint64_t Argument::getDereferenceableBytes() const {
   assert(getType()->isPointerTy() &&
          "Only pointers have dereferenceable bytes");
@@ -146,6 +150,10 @@ bool Argument::hasStructRetAttr() const {
   return hasAttribute(Attribute::StructRet);
 }
 
+bool Argument::hasInRegAttr() const {
+  return hasAttribute(Attribute::InReg);
+}
+
 bool Argument::hasReturnedAttr() const {
   return hasAttribute(Attribute::Returned);
 }
@@ -186,6 +194,10 @@ bool Argument::hasAttribute(Attribute::AttrKind Kind) const {
   return getParent()->hasParamAttribute(getArgNo(), Kind);
 }
 
+Attribute Argument::getAttribute(Attribute::AttrKind Kind) const {
+  return getParent()->getParamAttribute(getArgNo(), Kind);
+}
+
 //===----------------------------------------------------------------------===//
 // Helper Methods in Function
 //===----------------------------------------------------------------------===//
@@ -521,9 +533,8 @@ static ArrayRef<const char *> findTargetSubtable(StringRef Name) {
   // Drop "llvm." and take the first dotted component. That will be the target
   // if this is target specific.
   StringRef Target = Name.drop_front(5).split('.').first;
-  auto It = std::lower_bound(Targets.begin(), Targets.end(), Target,
-                             [](const IntrinsicTargetInfo &TI,
-                                StringRef Target) { return TI.Name < Target; });
+  auto It = partition_point(
+      Targets, [=](const IntrinsicTargetInfo &TI) { return TI.Name < Target; });
   // We've either found the target or just fall back to the generic set, which
   // is always first.
   const auto &TI = It != Targets.end() && It->Name == Target ? *It : Targets[0];
@@ -688,7 +699,8 @@ enum IIT_Info {
   IIT_STRUCT6 = 38,
   IIT_STRUCT7 = 39,
   IIT_STRUCT8 = 40,
-  IIT_F128 = 41
+  IIT_F128 = 41,
+  IIT_VEC_ELEMENT = 42
 };
 
 static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
@@ -853,6 +865,12 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
       DecodeIITType(NextElt, Infos, OutputTable);
     return;
   }
+  case IIT_VEC_ELEMENT: {
+    unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecElementArgument,
+                                             ArgInfo));
+    return;
+  }
   }
   llvm_unreachable("unhandled");
 }
@@ -949,10 +967,9 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
   case IITDescriptor::SameVecWidthArgument: {
     Type *EltTy = DecodeFixedType(Infos, Tys, Context);
     Type *Ty = Tys[D.getArgumentNumber()];
-    if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+    if (auto *VTy = dyn_cast<VectorType>(Ty))
       return VectorType::get(EltTy, VTy->getNumElements());
-    }
-    llvm_unreachable("unhandled");
+    return EltTy;
   }
   case IITDescriptor::PtrToArgument: {
     Type *Ty = Tys[D.getArgumentNumber()];
@@ -966,6 +983,12 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
     Type *EltTy = VTy->getVectorElementType();
     return PointerType::getUnqual(EltTy);
   }
+  case IITDescriptor::VecElementArgument: {
+    Type *Ty = Tys[D.getArgumentNumber()];
+    if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+      return VTy->getElementType();
+    llvm_unreachable("Expected an argument of Vector Type");
+  }
   case IITDescriptor::VecOfAnyPtrsToElt:
     // Return the overloaded type (which determines the pointers address space)
     return Tys[D.getOverloadArgNumber()];
@@ -1020,9 +1043,10 @@ bool Intrinsic::isLeaf(ID id) {
 Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) {
   // There can never be multiple globals with the same name of different types,
   // because intrinsics must be a specific type.
-  return
-    cast<Function>(M->getOrInsertFunction(getName(id, Tys),
-                                          getType(M->getContext(), id, Tys)));
+  return cast<Function>(
+      M->getOrInsertFunction(getName(id, Tys),
+                             getType(M->getContext(), id, Tys))
+          .getCallee());
 }
 
 // This defines the "Intrinsic::getIntrinsicForGCCBuiltin()" method.
@@ -1035,12 +1059,26 @@ Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) {
 #include "llvm/IR/IntrinsicImpl.inc"
 #undef GET_LLVM_INTRINSIC_FOR_MS_BUILTIN
 
-bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor> &Infos,
-                                   SmallVectorImpl<Type*> &ArgTys) {
+using DeferredIntrinsicMatchPair =
+    std::pair<Type *, ArrayRef<Intrinsic::IITDescriptor>>;
+
+static bool matchIntrinsicType(
+    Type *Ty, ArrayRef<Intrinsic::IITDescriptor> &Infos,
+    SmallVectorImpl<Type *> &ArgTys,
+    SmallVectorImpl<DeferredIntrinsicMatchPair> &DeferredChecks,
+    bool IsDeferredCheck) {
   using namespace Intrinsic;
 
   // If we ran out of descriptors, there are too many arguments.
   if (Infos.empty()) return true;
+
+  // Do this before slicing off the 'front' part
+  auto InfosRef = Infos;
+  auto DeferCheck = [&DeferredChecks, &InfosRef](Type *T) {
+    DeferredChecks.emplace_back(T, InfosRef);
+    return false;
+  };
+
   IITDescriptor D = Infos.front();
   Infos = Infos.slice(1);
 
@@ -1058,12 +1096,14 @@ bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor>
     case IITDescriptor::Vector: {
       VectorType *VT = dyn_cast<VectorType>(Ty);
       return !VT || VT->getNumElements() != D.Vector_Width ||
-             matchIntrinsicType(VT->getElementType(), Infos, ArgTys);
+             matchIntrinsicType(VT->getElementType(), Infos, ArgTys,
+                                DeferredChecks, IsDeferredCheck);
     }
     case IITDescriptor::Pointer: {
       PointerType *PT = dyn_cast<PointerType>(Ty);
       return !PT || PT->getAddressSpace() != D.Pointer_AddressSpace ||
-             matchIntrinsicType(PT->getElementType(), Infos, ArgTys);
+             matchIntrinsicType(PT->getElementType(), Infos, ArgTys,
+                                DeferredChecks, IsDeferredCheck);
     }
 
     case IITDescriptor::Struct: {
@@ -1072,35 +1112,40 @@ bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor>
         return true;
 
       for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i)
-        if (matchIntrinsicType(ST->getElementType(i), Infos, ArgTys))
+        if (matchIntrinsicType(ST->getElementType(i), Infos, ArgTys,
+                               DeferredChecks, IsDeferredCheck))
           return true;
       return false;
     }
 
     case IITDescriptor::Argument:
-      // Two cases here - If this is the second occurrence of an argument, verify
-      // that the later instance matches the previous instance.
+      // If this is the second occurrence of an argument,
+      // verify that the later instance matches the previous instance.
       if (D.getArgumentNumber() < ArgTys.size())
         return Ty != ArgTys[D.getArgumentNumber()];
 
-          // Otherwise, if this is the first instance of an argument, record it and
-          // verify the "Any" kind.
-          assert(D.getArgumentNumber() == ArgTys.size() && "Table consistency error");
-          ArgTys.push_back(Ty);
+      if (D.getArgumentNumber() > ArgTys.size() ||
+          D.getArgumentKind() == IITDescriptor::AK_MatchType)
+        return IsDeferredCheck || DeferCheck(Ty);
 
-          switch (D.getArgumentKind()) {
-            case IITDescriptor::AK_Any:        return false; // Success
-            case IITDescriptor::AK_AnyInteger: return !Ty->isIntOrIntVectorTy();
-            case IITDescriptor::AK_AnyFloat:   return !Ty->isFPOrFPVectorTy();
-            case IITDescriptor::AK_AnyVector:  return !isa<VectorType>(Ty);
-            case IITDescriptor::AK_AnyPointer: return !isa<PointerType>(Ty);
-          }
-          llvm_unreachable("all argument kinds not covered");
+      assert(D.getArgumentNumber() == ArgTys.size() && !IsDeferredCheck &&
+             "Table consistency error");
+      ArgTys.push_back(Ty);
+
+      switch (D.getArgumentKind()) {
+        case IITDescriptor::AK_Any:        return false; // Success
+        case IITDescriptor::AK_AnyInteger: return !Ty->isIntOrIntVectorTy();
+        case IITDescriptor::AK_AnyFloat:   return !Ty->isFPOrFPVectorTy();
+        case IITDescriptor::AK_AnyVector:  return !isa<VectorType>(Ty);
+        case IITDescriptor::AK_AnyPointer: return !isa<PointerType>(Ty);
+        default:                           break;
+      }
+      llvm_unreachable("all argument kinds not covered");
 
     case IITDescriptor::ExtendArgument: {
-      // This may only be used when referring to a previous vector argument.
+      // If this is a forward reference, defer the check for later.
       if (D.getArgumentNumber() >= ArgTys.size())
-        return true;
+        return IsDeferredCheck || DeferCheck(Ty);
 
       Type *NewTy = ArgTys[D.getArgumentNumber()];
       if (VectorType *VTy = dyn_cast<VectorType>(NewTy))
@@ -1113,9 +1158,9 @@ bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor>
       return Ty != NewTy;
     }
     case IITDescriptor::TruncArgument: {
-      // This may only be used when referring to a previous vector argument.
+      // If this is a forward reference, defer the check for later.
       if (D.getArgumentNumber() >= ArgTys.size())
-        return true;
+        return IsDeferredCheck || DeferCheck(Ty);
 
       Type *NewTy = ArgTys[D.getArgumentNumber()];
       if (VectorType *VTy = dyn_cast<VectorType>(NewTy))
@@ -1128,34 +1173,42 @@ bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor>
       return Ty != NewTy;
     }
     case IITDescriptor::HalfVecArgument:
-      // This may only be used when referring to a previous vector argument.
+      // If this is a forward reference, defer the check for later.
       return D.getArgumentNumber() >= ArgTys.size() ||
              !isa<VectorType>(ArgTys[D.getArgumentNumber()]) ||
              VectorType::getHalfElementsVectorType(
                      cast<VectorType>(ArgTys[D.getArgumentNumber()])) != Ty;
     case IITDescriptor::SameVecWidthArgument: {
-      if (D.getArgumentNumber() >= ArgTys.size())
-        return true;
-      VectorType * ReferenceType =
-        dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
-      VectorType *ThisArgType = dyn_cast<VectorType>(Ty);
-      if (!ThisArgType || !ReferenceType ||
-          (ReferenceType->getVectorNumElements() !=
-           ThisArgType->getVectorNumElements()))
+      if (D.getArgumentNumber() >= ArgTys.size()) {
+        // Defer check and subsequent check for the vector element type.
+        Infos = Infos.slice(1);
+        return IsDeferredCheck || DeferCheck(Ty);
+      }
+      auto *ReferenceType = dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
+      auto *ThisArgType = dyn_cast<VectorType>(Ty);
+      // Both must be vectors of the same number of elements or neither.
+      if ((ReferenceType != nullptr) != (ThisArgType != nullptr))
         return true;
-      return matchIntrinsicType(ThisArgType->getVectorElementType(),
-                                Infos, ArgTys);
+      Type *EltTy = Ty;
+      if (ThisArgType) {
+        if (ReferenceType->getVectorNumElements() !=
+            ThisArgType->getVectorNumElements())
+          return true;
+        EltTy = ThisArgType->getVectorElementType();
+      }
+      return matchIntrinsicType(EltTy, Infos, ArgTys, DeferredChecks,
+                                IsDeferredCheck);
     }
     case IITDescriptor::PtrToArgument: {
       if (D.getArgumentNumber() >= ArgTys.size())
-        return true;
+        return IsDeferredCheck || DeferCheck(Ty);
       Type * ReferenceType = ArgTys[D.getArgumentNumber()];
       PointerType *ThisArgType = dyn_cast<PointerType>(Ty);
       return (!ThisArgType || ThisArgType->getElementType() != ReferenceType);
     }
     case IITDescriptor::PtrToElt: {
       if (D.getArgumentNumber() >= ArgTys.size())
-        return true;
+        return IsDeferredCheck || DeferCheck(Ty);
       VectorType * ReferenceType =
         dyn_cast<VectorType> (ArgTys[D.getArgumentNumber()]);
       PointerType *ThisArgType = dyn_cast<PointerType>(Ty);
@@ -1165,15 +1218,20 @@ bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor>
     }
     case IITDescriptor::VecOfAnyPtrsToElt: {
       unsigned RefArgNumber = D.getRefArgNumber();
+      if (RefArgNumber >= ArgTys.size()) {
+        if (IsDeferredCheck)
+          return true;
+        // If forward referencing, already add the pointer-vector type and
+        // defer the checks for later.
+        ArgTys.push_back(Ty);
+        return DeferCheck(Ty);
+      }
 
-      // This may only be used when referring to a previous argument.
-      if (RefArgNumber >= ArgTys.size())
-        return true;
-
-      // Record the overloaded type
-      assert(D.getOverloadArgNumber() == ArgTys.size() &&
-             "Table consistency error");
-      ArgTys.push_back(Ty);
+      if (!IsDeferredCheck){
+        assert(D.getOverloadArgNumber() == ArgTys.size() &&
+               "Table consistency error");
+        ArgTys.push_back(Ty);
+      }
 
       // Verify the overloaded type "matches" the Ref type.
       // i.e. Ty is a vector with the same width as Ref.
@@ -1191,10 +1249,42 @@ bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor>
       return ThisArgEltTy->getElementType() !=
              ReferenceType->getVectorElementType();
     }
+    case IITDescriptor::VecElementArgument: {
+      if (D.getArgumentNumber() >= ArgTys.size())
+        return IsDeferredCheck ? true : DeferCheck(Ty);
+      auto *ReferenceType = dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
+      return !ReferenceType || Ty != ReferenceType->getElementType();
+    }
   }
   llvm_unreachable("unhandled");
 }
 
+Intrinsic::MatchIntrinsicTypesResult
+Intrinsic::matchIntrinsicSignature(FunctionType *FTy,
+                                   ArrayRef<Intrinsic::IITDescriptor> &Infos,
+                                   SmallVectorImpl<Type *> &ArgTys) {
+  SmallVector<DeferredIntrinsicMatchPair, 2> DeferredChecks;
+  if (matchIntrinsicType(FTy->getReturnType(), Infos, ArgTys, DeferredChecks,
+                         false))
+    return MatchIntrinsicTypes_NoMatchRet;
+
+  unsigned NumDeferredReturnChecks = DeferredChecks.size();
+
+  for (auto Ty : FTy->params())
+    if (matchIntrinsicType(Ty, Infos, ArgTys, DeferredChecks, false))
+      return MatchIntrinsicTypes_NoMatchArg;
+
+  for (unsigned I = 0, E = DeferredChecks.size(); I != E; ++I) {
+    DeferredIntrinsicMatchPair &Check = DeferredChecks[I];
+    if (matchIntrinsicType(Check.first, Check.second, ArgTys, DeferredChecks,
+                           true))
+      return I < NumDeferredReturnChecks ? MatchIntrinsicTypes_NoMatchRet
+                                         : MatchIntrinsicTypes_NoMatchArg;
+  }
+
+  return MatchIntrinsicTypes_Match;
+}
+
 bool
 Intrinsic::matchIntrinsicVarArg(bool isVarArg,
                                 ArrayRef<Intrinsic::IITDescriptor> &Infos) {
@@ -1228,13 +1318,8 @@ Optional<Function*> Intrinsic::remangleIntrinsicFunction(Function *F) {
     getIntrinsicInfoTableEntries(ID, Table);
     ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
 
-    // If we encounter any problems matching the signature with the descriptor
-    // just give up remangling. It's up to verifier to report the discrepancy.
-    if (Intrinsic::matchIntrinsicType(FTy->getReturnType(), TableRef, ArgTys))
+    if (Intrinsic::matchIntrinsicSignature(FTy, TableRef, ArgTys))
       return None;
-    for (auto Ty : FTy->params())
-      if (Intrinsic::matchIntrinsicType(Ty, TableRef, ArgTys))
-        return None;
     if (Intrinsic::matchIntrinsicVarArg(FTy->isVarArg(), TableRef))
       return None;
   }
@@ -1378,7 +1463,7 @@ void Function::setEntryCount(uint64_t Count, Function::ProfileCountType Type,
   setEntryCount(ProfileCount(Count, Type), Imports);
 }
 
-ProfileCount Function::getEntryCount() const {
+ProfileCount Function::getEntryCount(bool AllowSynthetic) const {
   MDNode *MD = getMetadata(LLVMContext::MD_prof);
   if (MD && MD->getOperand(0))
     if (MDString *MDS = dyn_cast<MDString>(MD->getOperand(0))) {
@@ -1390,7 +1475,8 @@ ProfileCount Function::getEntryCount() const {
         if (Count == (uint64_t)-1)
           return ProfileCount::getInvalid();
         return ProfileCount(Count, PCT_Real);
-      } else if (MDS->getString().equals("synthetic_function_entry_count")) {
+      } else if (AllowSynthetic &&
+                 MDS->getString().equals("synthetic_function_entry_count")) {
         ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(1));
         uint64_t Count = CI->getValue().getZExtValue();
         return ProfileCount(Count, PCT_Synthetic);
diff --git a/lib/IR/GVMaterializer.cpp b/lib/IR/GVMaterializer.cpp
index 706926d1b981..35397309a103 100644
--- a/lib/IR/GVMaterializer.cpp
+++ b/lib/IR/GVMaterializer.cpp
@@ -1,9 +1,8 @@
 //===-- GVMaterializer.cpp - Base implementation for GV materializers -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp
index cbd6450a20c9..e2bfc0420bc5 100644
--- a/lib/IR/Globals.cpp
+++ b/lib/IR/Globals.cpp
@@ -1,9 +1,8 @@
 //===-- Globals.cpp - Implement the GlobalValue & GlobalVariable class ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -68,6 +67,7 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
   setUnnamedAddr(Src->getUnnamedAddr());
   setDLLStorageClass(Src->getDLLStorageClass());
   setDSOLocal(Src->isDSOLocal());
+  setPartition(Src->getPartition());
 }
 
 void GlobalValue::removeFromParent() {
@@ -181,6 +181,28 @@ const Comdat *GlobalValue::getComdat() const {
   return cast<GlobalObject>(this)->getComdat();
 }
 
+StringRef GlobalValue::getPartition() const {
+  if (!hasPartition())
+    return "";
+  return getContext().pImpl->GlobalValuePartitions[this];
+}
+
+void GlobalValue::setPartition(StringRef S) {
+  // Do nothing if we're clearing the partition and it is already empty.
+  if (!hasPartition() && S.empty())
+    return;
+
+  // Get or create a stable partition name string and put it in the table in the
+  // context.
+  if (!S.empty())
+    S = getContext().pImpl->Saver.save(S);
+  getContext().pImpl->GlobalValuePartitions[this] = S;
+
+  // Update the HasPartition field. Setting the partition to the empty string
+  // means this global no longer has a partition.
+  HasPartition = !S.empty();
+}
+
 StringRef GlobalObject::getSectionImpl() const {
   assert(hasSection());
   return getContext().pImpl->GlobalObjectSections[this];
@@ -193,9 +215,8 @@ void GlobalObject::setSection(StringRef S) {
 
   // Get or create a stable section name string and put it in the table in the
   // context.
-  if (!S.empty()) {
-    S = getContext().pImpl->SectionStrings.insert(S).first->first();
-  }
+  if (!S.empty())
+    S = getContext().pImpl->Saver.save(S);
   getContext().pImpl->GlobalObjectSections[this] = S;
 
   // Update the HasSectionHashEntryBit. Setting the section to the empty string
diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp
index a98189956770..0c6461c9078f 100644
--- a/lib/IR/IRBuilder.cpp
+++ b/lib/IR/IRBuilder.cpp
@@ -1,9 +1,8 @@
 //===- IRBuilder.cpp - Builder for LLVM Instrs ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -72,7 +71,7 @@ Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) {
   return BCI;
 }
 
-static CallInst *createCallHelper(Value *Callee, ArrayRef<Value *> Ops,
+static CallInst *createCallHelper(Function *Callee, ArrayRef<Value *> Ops,
                                   IRBuilderBase *Builder,
                                   const Twine &Name = "",
                                   Instruction *FMFSource = nullptr) {
@@ -84,7 +83,7 @@ static CallInst *createCallHelper(Value *Callee, ArrayRef<Value *> Ops,
   return CI;
 }
 
-static InvokeInst *createInvokeHelper(Value *Invokee, BasicBlock *NormalDest,
+static InvokeInst *createInvokeHelper(Function *Invokee, BasicBlock *NormalDest,
                                       BasicBlock *UnwindDest,
                                       ArrayRef<Value *> Ops,
                                       IRBuilderBase *Builder,
@@ -105,7 +104,7 @@ CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
   Value *Ops[] = {Ptr, Val, Size, getInt1(isVolatile)};
   Type *Tys[] = { Ptr->getType(), Size->getType() };
   Module *M = BB->getParent()->getParent();
-  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
+  Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
 
   CallInst *CI = createCallHelper(TheFn, Ops, this);
 
@@ -135,7 +134,7 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemSet(
   Value *Ops[] = {Ptr, Val, Size, getInt32(ElementSize)};
   Type *Tys[] = {Ptr->getType(), Size->getType()};
   Module *M = BB->getParent()->getParent();
-  Value *TheFn = Intrinsic::getDeclaration(
+  Function *TheFn = Intrinsic::getDeclaration(
       M, Intrinsic::memset_element_unordered_atomic, Tys);
 
   CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -167,7 +166,7 @@ CreateMemCpy(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign,
   Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
   Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
   Module *M = BB->getParent()->getParent();
-  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
+  Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
 
   CallInst *CI = createCallHelper(TheFn, Ops, this);
 
@@ -208,7 +207,7 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpy(
   Value *Ops[] = {Dst, Src, Size, getInt32(ElementSize)};
   Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()};
   Module *M = BB->getParent()->getParent();
-  Value *TheFn = Intrinsic::getDeclaration(
+  Function *TheFn = Intrinsic::getDeclaration(
       M, Intrinsic::memcpy_element_unordered_atomic, Tys);
 
   CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -247,7 +246,7 @@ CreateMemMove(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign,
   Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
   Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
   Module *M = BB->getParent()->getParent();
-  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys);
+  Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys);
 
   CallInst *CI = createCallHelper(TheFn, Ops, this);
 
@@ -284,7 +283,7 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemMove(
   Value *Ops[] = {Dst, Src, Size, getInt32(ElementSize)};
   Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()};
   Module *M = BB->getParent()->getParent();
-  Value *TheFn = Intrinsic::getDeclaration(
+  Function *TheFn = Intrinsic::getDeclaration(
       M, Intrinsic::memmove_element_unordered_atomic, Tys);
 
   CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -314,7 +313,7 @@ static CallInst *getReductionIntrinsic(IRBuilderBase *Builder, Intrinsic::ID ID,
                                     Value *Src) {
   Module *M = Builder->GetInsertBlock()->getParent()->getParent();
   Value *Ops[] = {Src};
-  Type *Tys[] = { Src->getType()->getVectorElementType(), Src->getType() };
+  Type *Tys[] = { Src->getType() };
   auto Decl = Intrinsic::getDeclaration(M, ID, Tys);
   return createCallHelper(Decl, Ops, Builder);
 }
@@ -322,20 +321,18 @@ static CallInst *getReductionIntrinsic(IRBuilderBase *Builder, Intrinsic::ID ID,
 CallInst *IRBuilderBase::CreateFAddReduce(Value *Acc, Value *Src) {
   Module *M = GetInsertBlock()->getParent()->getParent();
   Value *Ops[] = {Acc, Src};
-  Type *Tys[] = {Src->getType()->getVectorElementType(), Acc->getType(),
-                 Src->getType()};
+  Type *Tys[] = {Acc->getType(), Src->getType()};
   auto Decl = Intrinsic::getDeclaration(
-      M, Intrinsic::experimental_vector_reduce_fadd, Tys);
+      M, Intrinsic::experimental_vector_reduce_v2_fadd, Tys);
   return createCallHelper(Decl, Ops, this);
 }
 
 CallInst *IRBuilderBase::CreateFMulReduce(Value *Acc, Value *Src) {
   Module *M = GetInsertBlock()->getParent()->getParent();
   Value *Ops[] = {Acc, Src};
-  Type *Tys[] = {Src->getType()->getVectorElementType(), Acc->getType(),
-                 Src->getType()};
+  Type *Tys[] = {Acc->getType(), Src->getType()};
   auto Decl = Intrinsic::getDeclaration(
-      M, Intrinsic::experimental_vector_reduce_fmul, Tys);
+      M, Intrinsic::experimental_vector_reduce_v2_fmul, Tys);
   return createCallHelper(Decl, Ops, this);
 }
 
@@ -409,8 +406,8 @@ CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) {
            "lifetime.start requires the size to be an i64");
   Value *Ops[] = { Size, Ptr };
   Module *M = BB->getParent()->getParent();
-  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_start,
-                                           { Ptr->getType() });
+  Function *TheFn =
+      Intrinsic::getDeclaration(M, Intrinsic::lifetime_start, {Ptr->getType()});
   return createCallHelper(TheFn, Ops, this);
 }
 
@@ -425,8 +422,8 @@ CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr, ConstantInt *Size) {
            "lifetime.end requires the size to be an i64");
   Value *Ops[] = { Size, Ptr };
   Module *M = BB->getParent()->getParent();
-  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_end,
-                                           { Ptr->getType() });
+  Function *TheFn =
+      Intrinsic::getDeclaration(M, Intrinsic::lifetime_end, {Ptr->getType()});
   return createCallHelper(TheFn, Ops, this);
 }
 
@@ -445,7 +442,7 @@ CallInst *IRBuilderBase::CreateInvariantStart(Value *Ptr, ConstantInt *Size) {
   // Fill in the single overloaded type: memory object type.
   Type *ObjectPtr[1] = {Ptr->getType()};
   Module *M = BB->getParent()->getParent();
-  Value *TheFn =
+  Function *TheFn =
       Intrinsic::getDeclaration(M, Intrinsic::invariant_start, ObjectPtr);
   return createCallHelper(TheFn, Ops, this);
 }
@@ -456,7 +453,7 @@ CallInst *IRBuilderBase::CreateAssumption(Value *Cond) {
 
   Value *Ops[] = { Cond };
   Module *M = BB->getParent()->getParent();
-  Value *FnAssume = Intrinsic::getDeclaration(M, Intrinsic::assume);
+  Function *FnAssume = Intrinsic::getDeclaration(M, Intrinsic::assume);
   return createCallHelper(FnAssume, Ops, this);
 }
 
@@ -508,7 +505,7 @@ CallInst *IRBuilderBase::CreateMaskedIntrinsic(Intrinsic::ID Id,
                                                ArrayRef<Type *> OverloadedTypes,
                                                const Twine &Name) {
   Module *M = BB->getParent()->getParent();
-  Value *TheFn = Intrinsic::getDeclaration(M, Id, OverloadedTypes);
+  Function *TheFn = Intrinsic::getDeclaration(M, Id, OverloadedTypes);
   return createCallHelper(TheFn, Ops, this, Name);
 }
 
@@ -709,7 +706,7 @@ CallInst *IRBuilderBase::CreateGCResult(Instruction *Statepoint,
  Intrinsic::ID ID = Intrinsic::experimental_gc_result;
  Module *M = BB->getParent()->getParent();
  Type *Types[] = {ResultType};
- Value *FnGCResult = Intrinsic::getDeclaration(M, ID, Types);
+ Function *FnGCResult = Intrinsic::getDeclaration(M, ID, Types);
 
  Value *Args[] = {Statepoint};
  return createCallHelper(FnGCResult, Args, this, Name);
@@ -722,8 +719,8 @@ CallInst *IRBuilderBase::CreateGCRelocate(Instruction *Statepoint,
                                          const Twine &Name) {
  Module *M = BB->getParent()->getParent();
  Type *Types[] = {ResultType};
- Value *FnGCRelocate =
-   Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types);
+ Function *FnGCRelocate =
+     Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types);
 
  Value *Args[] = {Statepoint,
                   getInt32(BaseOffset),
diff --git a/lib/IR/IRPrintingPasses.cpp b/lib/IR/IRPrintingPasses.cpp
index 43010220b9f3..35b06135a828 100644
--- a/lib/IR/IRPrintingPasses.cpp
+++ b/lib/IR/IRPrintingPasses.cpp
@@ -1,9 +1,8 @@
 //===--- IRPrintingPasses.cpp - Module and Function printing passes -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp
index 4623f69bd9a3..99da7caaccf0 100644
--- a/lib/IR/InlineAsm.cpp
+++ b/lib/IR/InlineAsm.cpp
@@ -1,9 +1,8 @@
 //===- InlineAsm.cpp - Implement the InlineAsm class ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp
index d861b5288592..ba5629d1662b 100644
--- a/lib/IR/Instruction.cpp
+++ b/lib/IR/Instruction.cpp
@@ -1,9 +1,8 @@
 //===-- Instruction.cpp - Implement the Instruction class -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -139,8 +138,10 @@ void Instruction::dropPoisonGeneratingFlags() {
     cast<GetElementPtrInst>(this)->setIsInBounds(false);
     break;
   }
+  // TODO: FastMathFlags!
 }
 
+
 bool Instruction::isExact() const {
   return cast<PossiblyExactOperator>(this)->isExact();
 }
@@ -302,6 +303,7 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
   case CatchRet: return "catchret";
   case CatchPad: return "catchpad";
   case CatchSwitch: return "catchswitch";
+  case CallBr: return "callbr";
 
   // Standard unary operators...
   case FNeg: return "fneg";
@@ -406,6 +408,10 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2,
     return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
            CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes() &&
            CI->hasIdenticalOperandBundleSchema(*cast<InvokeInst>(I2));
+  if (const CallBrInst *CI = dyn_cast<CallBrInst>(I1))
+    return CI->getCallingConv() == cast<CallBrInst>(I2)->getCallingConv() &&
+           CI->getAttributes() == cast<CallBrInst>(I2)->getAttributes() &&
+           CI->hasIdenticalOperandBundleSchema(*cast<CallBrInst>(I2));
   if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1))
     return IVI->getIndices() == cast<InsertValueInst>(I2)->getIndices();
   if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1))
@@ -516,9 +522,9 @@ bool Instruction::mayReadFromMemory() const {
   case Instruction::CatchRet:
     return true;
   case Instruction::Call:
-    return !cast<CallInst>(this)->doesNotAccessMemory();
   case Instruction::Invoke:
-    return !cast<InvokeInst>(this)->doesNotAccessMemory();
+  case Instruction::CallBr:
+    return !cast<CallBase>(this)->doesNotAccessMemory();
   case Instruction::Store:
     return !cast<StoreInst>(this)->isUnordered();
   }
@@ -536,9 +542,9 @@ bool Instruction::mayWriteToMemory() const {
   case Instruction::CatchRet:
     return true;
   case Instruction::Call:
-    return !cast<CallInst>(this)->onlyReadsMemory();
   case Instruction::Invoke:
-    return !cast<InvokeInst>(this)->onlyReadsMemory();
+  case Instruction::CallBr:
+    return !cast<CallBase>(this)->onlyReadsMemory();
   case Instruction::Load:
     return !cast<LoadInst>(this)->isUnordered();
   }
@@ -671,6 +677,13 @@ void Instruction::setSuccessor(unsigned idx, BasicBlock *B) {
   llvm_unreachable("not a terminator");
 }
 
+void Instruction::replaceSuccessorWith(BasicBlock *OldBB, BasicBlock *NewBB) {
+  for (unsigned Idx = 0, NumSuccessors = Instruction::getNumSuccessors();
+       Idx != NumSuccessors; ++Idx)
+    if (getSuccessor(Idx) == OldBB)
+      setSuccessor(Idx, NewBB);
+}
+
 Instruction *Instruction::cloneImpl() const {
   llvm_unreachable("Subclass of Instruction failed to implement cloneImpl");
 }
@@ -731,52 +744,9 @@ Instruction *Instruction::clone() const {
   return New;
 }
 
-void Instruction::updateProfWeight(uint64_t S, uint64_t T) {
-  auto *ProfileData = getMetadata(LLVMContext::MD_prof);
-  if (ProfileData == nullptr)
-    return;
-
-  auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
-  if (!ProfDataName || (!ProfDataName->getString().equals("branch_weights") &&
-                        !ProfDataName->getString().equals("VP")))
-    return;
-
-  MDBuilder MDB(getContext());
-  SmallVector<Metadata *, 3> Vals;
-  Vals.push_back(ProfileData->getOperand(0));
-  APInt APS(128, S), APT(128, T);
-  if (ProfDataName->getString().equals("branch_weights"))
-    for (unsigned i = 1; i < ProfileData->getNumOperands(); i++) {
-      // Using APInt::div may be expensive, but most cases should fit 64 bits.
-      APInt Val(128,
-                mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i))
-                    ->getValue()
-                    .getZExtValue());
-      Val *= APS;
-      Vals.push_back(MDB.createConstant(
-          ConstantInt::get(Type::getInt64Ty(getContext()),
-                           Val.udiv(APT).getLimitedValue())));
-    }
-  else if (ProfDataName->getString().equals("VP"))
-    for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) {
-      // The first value is the key of the value profile, which will not change.
-      Vals.push_back(ProfileData->getOperand(i));
-      // Using APInt::div may be expensive, but most cases should fit 64 bits.
-      APInt Val(128,
-                mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i + 1))
-                    ->getValue()
-                    .getZExtValue());
-      Val *= APS;
-      Vals.push_back(MDB.createConstant(
-          ConstantInt::get(Type::getInt64Ty(getContext()),
-                           Val.udiv(APT).getLimitedValue())));
-    }
-  setMetadata(LLVMContext::MD_prof, MDNode::get(getContext(), Vals));
-}
-
 void Instruction::setProfWeight(uint64_t W) {
-  assert((isa<CallInst>(this) || isa<InvokeInst>(this)) &&
-         "Can only set weights for call and invoke instrucitons");
+  assert(isa<CallBase>(this) &&
+         "Can only set weights for call like instructions");
   SmallVector<uint32_t, 1> Weights;
   Weights.push_back(W);
   MDBuilder MDB(getContext());
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index 06b46724a87f..2e7cad103c12 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -1,9 +1,8 @@
 //===- Instructions.cpp - Implement the LLVM instructions -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -29,6 +28,7 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
@@ -45,6 +45,12 @@
 
 using namespace llvm;
 
+static cl::opt<bool> SwitchInstProfUpdateWrapperStrict(
+    "switch-inst-prof-update-wrapper-strict", cl::Hidden,
+    cl::desc("Assert that prof branch_weights metadata is valid when creating "
+             "an instance of SwitchInstProfUpdateWrapper"),
+    cl::init(false));
+
 //===----------------------------------------------------------------------===//
 //                            AllocaInst Class
 //===----------------------------------------------------------------------===//
@@ -257,6 +263,11 @@ void LandingPadInst::addClause(Constant *Val) {
 
 Function *CallBase::getCaller() { return getParent()->getParent(); }
 
+unsigned CallBase::getNumSubclassExtraOperandsDynamic() const {
+  assert(getOpcode() == Instruction::CallBr && "Unexpected opcode!");
+  return cast<CallBrInst>(this)->getNumIndirectDests() + 1;
+}
+
 bool CallBase::isIndirectCall() const {
   const Value *V = getCalledValue();
   if (isa<Function>(V) || isa<Constant>(V))
@@ -267,6 +278,21 @@ bool CallBase::isIndirectCall() const {
   return true;
 }
 
+/// Tests if this call site must be tail call optimized. Only a CallInst can
+/// be tail call optimized.
+bool CallBase::isMustTailCall() const {
+  if (auto *CI = dyn_cast<CallInst>(this))
+    return CI->isMustTailCall();
+  return false;
+}
+
+/// Tests if this call site is marked as a tail call.
+bool CallBase::isTailCall() const {
+  if (auto *CI = dyn_cast<CallInst>(this))
+    return CI->isTailCall();
+  return false;
+}
+
 Intrinsic::ID CallBase::getIntrinsicID() const {
   if (auto *F = getCalledFunction())
     return F->getIntrinsicID();
@@ -429,8 +455,8 @@ CallInst *CallInst::Create(CallInst *CI, ArrayRef<OperandBundleDef> OpB,
                            Instruction *InsertPt) {
   std::vector<Value *> Args(CI->arg_begin(), CI->arg_end());
 
-  auto *NewCI = CallInst::Create(CI->getCalledValue(), Args, OpB, CI->getName(),
-                                 InsertPt);
+  auto *NewCI = CallInst::Create(CI->getFunctionType(), CI->getCalledValue(),
+                                 Args, OpB, CI->getName(), InsertPt);
   NewCI->setTailCallKind(CI->getTailCallKind());
   NewCI->setCallingConv(CI->getCallingConv());
   NewCI->SubclassOptionalData = CI->SubclassOptionalData;
@@ -439,14 +465,57 @@ CallInst *CallInst::Create(CallInst *CI, ArrayRef<OperandBundleDef> OpB,
   return NewCI;
 }
 
+// Update profile weight for call instruction by scaling it using the ratio
+// of S/T. The meaning of "branch_weights" meta data for call instruction is
+// transfered to represent call count.
+void CallInst::updateProfWeight(uint64_t S, uint64_t T) {
+  auto *ProfileData = getMetadata(LLVMContext::MD_prof);
+  if (ProfileData == nullptr)
+    return;
 
+  auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
+  if (!ProfDataName || (!ProfDataName->getString().equals("branch_weights") &&
+                        !ProfDataName->getString().equals("VP")))
+    return;
 
+  if (T == 0) {
+    LLVM_DEBUG(dbgs() << "Attempting to update profile weights will result in "
+                         "div by 0. Ignoring. Likely the function "
+                      << getParent()->getParent()->getName()
+                      << " has 0 entry count, and contains call instructions "
+                         "with non-zero prof info.");
+    return;
+  }
 
-
-
-
-
-
+  MDBuilder MDB(getContext());
+  SmallVector<Metadata *, 3> Vals;
+  Vals.push_back(ProfileData->getOperand(0));
+  APInt APS(128, S), APT(128, T);
+  if (ProfDataName->getString().equals("branch_weights") &&
+      ProfileData->getNumOperands() > 0) {
+    // Using APInt::div may be expensive, but most cases should fit 64 bits.
+    APInt Val(128, mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1))
+                       ->getValue()
+                       .getZExtValue());
+    Val *= APS;
+    Vals.push_back(MDB.createConstant(ConstantInt::get(
+        Type::getInt64Ty(getContext()), Val.udiv(APT).getLimitedValue())));
+  } else if (ProfDataName->getString().equals("VP"))
+    for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) {
+      // The first value is the key of the value profile, which will not change.
+      Vals.push_back(ProfileData->getOperand(i));
+      // Using APInt::div may be expensive, but most cases should fit 64 bits.
+      APInt Val(128,
+                mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i + 1))
+                    ->getValue()
+                    .getZExtValue());
+      Val *= APS;
+      Vals.push_back(MDB.createConstant(
+          ConstantInt::get(Type::getInt64Ty(getContext()),
+                           Val.udiv(APT).getLimitedValue())));
+    }
+  setMetadata(LLVMContext::MD_prof, MDNode::get(getContext(), Vals));
+}
 
 /// IsConstantOne - Return true only if val is constant int 1
 static bool IsConstantOne(Value *val) {
@@ -503,7 +572,7 @@ static Instruction *createMalloc(Instruction *InsertBefore,
   BasicBlock *BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
   Module *M = BB->getParent()->getParent();
   Type *BPTy = Type::getInt8PtrTy(BB->getContext());
-  Value *MallocFunc = MallocF;
+  FunctionCallee MallocFunc = MallocF;
   if (!MallocFunc)
     // prototype malloc as "void *malloc(size_t)"
     MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy);
@@ -527,7 +596,7 @@ static Instruction *createMalloc(Instruction *InsertBefore,
     }
   }
   MCall->setTailCall();
-  if (Function *F = dyn_cast<Function>(MallocFunc)) {
+  if (Function *F = dyn_cast<Function>(MallocFunc.getCallee())) {
     MCall->setCallingConv(F->getCallingConv());
     if (!F->returnDoesNotAlias())
       F->setReturnDoesNotAlias();
@@ -600,7 +669,7 @@ static Instruction *createFree(Value *Source,
   Type *VoidTy = Type::getVoidTy(M->getContext());
   Type *IntPtrTy = Type::getInt8PtrTy(M->getContext());
   // prototype free as "void free(void*)"
-  Value *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy);
+  FunctionCallee FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy);
   CallInst *Result = nullptr;
   Value *PtrCast = Source;
   if (InsertBefore) {
@@ -613,7 +682,7 @@ static Instruction *createFree(Value *Source,
     Result = CallInst::Create(FreeFunc, PtrCast, Bundles, "");
   }
   Result->setTailCall();
-  if (Function *F = dyn_cast<Function>(FreeFunc))
+  if (Function *F = dyn_cast<Function>(FreeFunc.getCallee()))
     Result->setCallingConv(F->getCallingConv());
 
   return Result;
@@ -697,9 +766,9 @@ InvokeInst *InvokeInst::Create(InvokeInst *II, ArrayRef<OperandBundleDef> OpB,
                                Instruction *InsertPt) {
   std::vector<Value *> Args(II->arg_begin(), II->arg_end());
 
-  auto *NewII = InvokeInst::Create(II->getCalledValue(), II->getNormalDest(),
-                                   II->getUnwindDest(), Args, OpB,
-                                   II->getName(), InsertPt);
+  auto *NewII = InvokeInst::Create(II->getFunctionType(), II->getCalledValue(),
+                                   II->getNormalDest(), II->getUnwindDest(),
+                                   Args, OpB, II->getName(), InsertPt);
   NewII->setCallingConv(II->getCallingConv());
   NewII->SubclassOptionalData = II->SubclassOptionalData;
   NewII->setAttributes(II->getAttributes());
@@ -712,6 +781,76 @@ LandingPadInst *InvokeInst::getLandingPadInst() const {
   return cast<LandingPadInst>(getUnwindDest()->getFirstNonPHI());
 }
 
+//===----------------------------------------------------------------------===//
+//                        CallBrInst Implementation
+//===----------------------------------------------------------------------===//
+
+void CallBrInst::init(FunctionType *FTy, Value *Fn, BasicBlock *Fallthrough,
+                      ArrayRef<BasicBlock *> IndirectDests,
+                      ArrayRef<Value *> Args,
+                      ArrayRef<OperandBundleDef> Bundles,
+                      const Twine &NameStr) {
+  this->FTy = FTy;
+
+  assert((int)getNumOperands() ==
+             ComputeNumOperands(Args.size(), IndirectDests.size(),
+                                CountBundleInputs(Bundles)) &&
+         "NumOperands not set up?");
+  NumIndirectDests = IndirectDests.size();
+  setDefaultDest(Fallthrough);
+  for (unsigned i = 0; i != NumIndirectDests; ++i)
+    setIndirectDest(i, IndirectDests[i]);
+  setCalledOperand(Fn);
+
+#ifndef NDEBUG
+  assert(((Args.size() == FTy->getNumParams()) ||
+          (FTy->isVarArg() && Args.size() > FTy->getNumParams())) &&
+         "Calling a function with bad signature");
+
+  for (unsigned i = 0, e = Args.size(); i != e; i++)
+    assert((i >= FTy->getNumParams() ||
+            FTy->getParamType(i) == Args[i]->getType()) &&
+           "Calling a function with a bad signature!");
+#endif
+
+  std::copy(Args.begin(), Args.end(), op_begin());
+
+  auto It = populateBundleOperandInfos(Bundles, Args.size());
+  (void)It;
+  assert(It + 2 + IndirectDests.size() == op_end() && "Should add up!");
+
+  setName(NameStr);
+}
+
+CallBrInst::CallBrInst(const CallBrInst &CBI)
+    : CallBase(CBI.Attrs, CBI.FTy, CBI.getType(), Instruction::CallBr,
+               OperandTraits<CallBase>::op_end(this) - CBI.getNumOperands(),
+               CBI.getNumOperands()) {
+  setCallingConv(CBI.getCallingConv());
+  std::copy(CBI.op_begin(), CBI.op_end(), op_begin());
+  std::copy(CBI.bundle_op_info_begin(), CBI.bundle_op_info_end(),
+            bundle_op_info_begin());
+  SubclassOptionalData = CBI.SubclassOptionalData;
+  NumIndirectDests = CBI.NumIndirectDests;
+}
+
+CallBrInst *CallBrInst::Create(CallBrInst *CBI, ArrayRef<OperandBundleDef> OpB,
+                               Instruction *InsertPt) {
+  std::vector<Value *> Args(CBI->arg_begin(), CBI->arg_end());
+
+  auto *NewCBI = CallBrInst::Create(CBI->getFunctionType(),
+                                    CBI->getCalledValue(),
+                                    CBI->getDefaultDest(),
+                                    CBI->getIndirectDests(),
+                                    Args, OpB, CBI->getName(), InsertPt);
+  NewCBI->setCallingConv(CBI->getCallingConv());
+  NewCBI->SubclassOptionalData = CBI->SubclassOptionalData;
+  NewCBI->setAttributes(CBI->getAttributes());
+  NewCBI->setDebugLoc(CBI->getDebugLoc());
+  NewCBI->NumIndirectDests = CBI->NumIndirectDests;
+  return NewCBI;
+}
+
 //===----------------------------------------------------------------------===//
 //                        ReturnInst Implementation
 //===----------------------------------------------------------------------===//
@@ -1408,6 +1547,10 @@ StringRef AtomicRMWInst::getOperationName(BinOp Op) {
     return "umax";
   case AtomicRMWInst::UMin:
     return "umin";
+  case AtomicRMWInst::FAdd:
+    return "fadd";
+  case AtomicRMWInst::FSub:
+    return "fsub";
   case AtomicRMWInst::BAD_BINOP:
     return "<invalid operation>";
   }
@@ -1666,6 +1809,25 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
   setName(Name);
 }
 
+void ShuffleVectorInst::commute() {
+  int NumOpElts = Op<0>()->getType()->getVectorNumElements();
+  int NumMaskElts = getMask()->getType()->getVectorNumElements();
+  SmallVector<Constant*, 16> NewMask(NumMaskElts);
+  Type *Int32Ty = Type::getInt32Ty(getContext());
+  for (int i = 0; i != NumMaskElts; ++i) {
+    int MaskElt = getMaskValue(i);
+    if (MaskElt == -1) {
+      NewMask[i] = UndefValue::get(Int32Ty);
+      continue;
+    }
+    assert(MaskElt >= 0 && MaskElt < 2 * NumOpElts && "Out-of-range mask");
+    MaskElt = (MaskElt < NumOpElts) ? MaskElt + NumOpElts : MaskElt - NumOpElts;
+    NewMask[i] = ConstantInt::get(Int32Ty, MaskElt);
+  }
+  Op<2>() = ConstantVector::get(NewMask);
+  Op<0>().swap(Op<1>());
+}
+
 bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
                                         const Value *Mask) {
   // V1 and V2 must be vectors of the same type.
@@ -3714,6 +3876,141 @@ void SwitchInst::growOperands() {
   growHungoffUses(ReservedSpace);
 }
 
+MDNode *
+SwitchInstProfUpdateWrapper::getProfBranchWeightsMD(const SwitchInst &SI) {
+  if (MDNode *ProfileData = SI.getMetadata(LLVMContext::MD_prof))
+    if (auto *MDName = dyn_cast<MDString>(ProfileData->getOperand(0)))
+      if (MDName->getString() == "branch_weights")
+        return ProfileData;
+  return nullptr;
+}
+
+MDNode *SwitchInstProfUpdateWrapper::buildProfBranchWeightsMD() {
+  assert(State == Changed && "called only if metadata has changed");
+
+  if (!Weights)
+    return nullptr;
+
+  assert(SI.getNumSuccessors() == Weights->size() &&
+         "num of prof branch_weights must accord with num of successors");
+
+  bool AllZeroes =
+      all_of(Weights.getValue(), [](uint32_t W) { return W == 0; });
+
+  if (AllZeroes || Weights.getValue().size() < 2)
+    return nullptr;
+
+  return MDBuilder(SI.getParent()->getContext()).createBranchWeights(*Weights);
+}
+
+void SwitchInstProfUpdateWrapper::init() {
+  MDNode *ProfileData = getProfBranchWeightsMD(SI);
+  if (!ProfileData) {
+    State = Initialized;
+    return;
+  }
+
+  if (ProfileData->getNumOperands() != SI.getNumSuccessors() + 1) {
+    State = Invalid;
+    if (SwitchInstProfUpdateWrapperStrict)
+      llvm_unreachable("number of prof branch_weights metadata operands does "
+                       "not correspond to number of succesors");
+    return;
+  }
+
+  SmallVector<uint32_t, 8> Weights;
+  for (unsigned CI = 1, CE = SI.getNumSuccessors(); CI <= CE; ++CI) {
+    ConstantInt *C = mdconst::extract<ConstantInt>(ProfileData->getOperand(CI));
+    uint32_t CW = C->getValue().getZExtValue();
+    Weights.push_back(CW);
+  }
+  State = Initialized;
+  this->Weights = std::move(Weights);
+}
+
+SwitchInst::CaseIt
+SwitchInstProfUpdateWrapper::removeCase(SwitchInst::CaseIt I) {
+  if (Weights) {
+    assert(SI.getNumSuccessors() == Weights->size() &&
+           "num of prof branch_weights must accord with num of successors");
+    State = Changed;
+    // Copy the last case to the place of the removed one and shrink.
+    // This is tightly coupled with the way SwitchInst::removeCase() removes
+    // the cases in SwitchInst::removeCase(CaseIt).
+    Weights.getValue()[I->getCaseIndex() + 1] = Weights.getValue().back();
+    Weights.getValue().pop_back();
+  }
+  return SI.removeCase(I);
+}
+
+void SwitchInstProfUpdateWrapper::addCase(
+    ConstantInt *OnVal, BasicBlock *Dest,
+    SwitchInstProfUpdateWrapper::CaseWeightOpt W) {
+  SI.addCase(OnVal, Dest);
+
+  if (State == Invalid)
+    return;
+
+  if (!Weights && W && *W) {
+    State = Changed;
+    Weights = SmallVector<uint32_t, 8>(SI.getNumSuccessors(), 0);
+    Weights.getValue()[SI.getNumSuccessors() - 1] = *W;
+  } else if (Weights) {
+    State = Changed;
+    Weights.getValue().push_back(W ? *W : 0);
+  }
+  if (Weights)
+    assert(SI.getNumSuccessors() == Weights->size() &&
+           "num of prof branch_weights must accord with num of successors");
+}
+
+SymbolTableList<Instruction>::iterator
+SwitchInstProfUpdateWrapper::eraseFromParent() {
+  // Instruction is erased. Mark as unchanged to not touch it in the destructor.
+  if (State != Invalid) {
+    State = Initialized;
+    if (Weights)
+      Weights->resize(0);
+  }
+  return SI.eraseFromParent();
+}
+
+SwitchInstProfUpdateWrapper::CaseWeightOpt
+SwitchInstProfUpdateWrapper::getSuccessorWeight(unsigned idx) {
+  if (!Weights)
+    return None;
+  return Weights.getValue()[idx];
+}
+
+void SwitchInstProfUpdateWrapper::setSuccessorWeight(
+    unsigned idx, SwitchInstProfUpdateWrapper::CaseWeightOpt W) {
+  if (!W || State == Invalid)
+    return;
+
+  if (!Weights && *W)
+    Weights = SmallVector<uint32_t, 8>(SI.getNumSuccessors(), 0);
+
+  if (Weights) {
+    auto &OldW = Weights.getValue()[idx];
+    if (*W != OldW) {
+      State = Changed;
+      OldW = *W;
+    }
+  }
+}
+
+SwitchInstProfUpdateWrapper::CaseWeightOpt
+SwitchInstProfUpdateWrapper::getSuccessorWeight(const SwitchInst &SI,
+                                                unsigned idx) {
+  if (MDNode *ProfileData = getProfBranchWeightsMD(SI))
+    if (ProfileData->getNumOperands() == SI.getNumSuccessors() + 1)
+      return mdconst::extract<ConstantInt>(ProfileData->getOperand(idx + 1))
+          ->getValue()
+          .getZExtValue();
+
+  return None;
+}
+
 //===----------------------------------------------------------------------===//
 //                        IndirectBrInst Implementation
 //===----------------------------------------------------------------------===//
@@ -3978,6 +4275,14 @@ InvokeInst *InvokeInst::cloneImpl() const {
   return new(getNumOperands()) InvokeInst(*this);
 }
 
+CallBrInst *CallBrInst::cloneImpl() const {
+  if (hasOperandBundles()) {
+    unsigned DescriptorBytes = getNumOperandBundles() * sizeof(BundleOpInfo);
+    return new (getNumOperands(), DescriptorBytes) CallBrInst(*this);
+  }
+  return new (getNumOperands()) CallBrInst(*this);
+}
+
 ResumeInst *ResumeInst::cloneImpl() const { return new (1) ResumeInst(*this); }
 
 CleanupReturnInst *CleanupReturnInst::cloneImpl() const {
diff --git a/lib/IR/IntrinsicInst.cpp b/lib/IR/IntrinsicInst.cpp
index df3a38ac147f..7a042326f67f 100644
--- a/lib/IR/IntrinsicInst.cpp
+++ b/lib/IR/IntrinsicInst.cpp
@@ -1,9 +1,8 @@
 //===-- InstrinsicInst.cpp - Intrinsic Instruction Wrappers ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -22,6 +21,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
@@ -103,45 +103,94 @@ Value *InstrProfIncrementInst::getStep() const {
   return ConstantInt::get(Type::getInt64Ty(Context), 1);
 }
 
-ConstrainedFPIntrinsic::RoundingMode
+Optional<ConstrainedFPIntrinsic::RoundingMode>
 ConstrainedFPIntrinsic::getRoundingMode() const {
   unsigned NumOperands = getNumArgOperands();
   Metadata *MD =
       dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 2))->getMetadata();
   if (!MD || !isa<MDString>(MD))
-    return rmInvalid;
-  StringRef RoundingArg = cast<MDString>(MD)->getString();
+    return None;
+  return StrToRoundingMode(cast<MDString>(MD)->getString());
+}
 
+Optional<ConstrainedFPIntrinsic::RoundingMode>
+ConstrainedFPIntrinsic::StrToRoundingMode(StringRef RoundingArg) {
   // For dynamic rounding mode, we use round to nearest but we will set the
   // 'exact' SDNodeFlag so that the value will not be rounded.
-  return StringSwitch<RoundingMode>(RoundingArg)
+  return StringSwitch<Optional<RoundingMode>>(RoundingArg)
     .Case("round.dynamic",    rmDynamic)
     .Case("round.tonearest",  rmToNearest)
     .Case("round.downward",   rmDownward)
     .Case("round.upward",     rmUpward)
     .Case("round.towardzero", rmTowardZero)
-    .Default(rmInvalid);
+    .Default(None);
+}
+
+Optional<StringRef>
+ConstrainedFPIntrinsic::RoundingModeToStr(RoundingMode UseRounding) {
+  Optional<StringRef> RoundingStr = None;
+  switch (UseRounding) {
+  case ConstrainedFPIntrinsic::rmDynamic:
+    RoundingStr = "round.dynamic";
+    break;
+  case ConstrainedFPIntrinsic::rmToNearest:
+    RoundingStr = "round.tonearest";
+    break;
+  case ConstrainedFPIntrinsic::rmDownward:
+    RoundingStr = "round.downward";
+    break;
+  case ConstrainedFPIntrinsic::rmUpward:
+    RoundingStr = "round.upward";
+    break;
+  case ConstrainedFPIntrinsic::rmTowardZero:
+    RoundingStr = "round.tozero";
+    break;
+  }
+  return RoundingStr;
 }
 
-ConstrainedFPIntrinsic::ExceptionBehavior
+Optional<ConstrainedFPIntrinsic::ExceptionBehavior>
 ConstrainedFPIntrinsic::getExceptionBehavior() const {
   unsigned NumOperands = getNumArgOperands();
   Metadata *MD =
       dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 1))->getMetadata();
   if (!MD || !isa<MDString>(MD))
-    return ebInvalid;
-  StringRef ExceptionArg = cast<MDString>(MD)->getString();
-  return StringSwitch<ExceptionBehavior>(ExceptionArg)
+    return None;
+  return StrToExceptionBehavior(cast<MDString>(MD)->getString());
+}
+
+Optional<ConstrainedFPIntrinsic::ExceptionBehavior>
+ConstrainedFPIntrinsic::StrToExceptionBehavior(StringRef ExceptionArg) {
+  return StringSwitch<Optional<ExceptionBehavior>>(ExceptionArg)
     .Case("fpexcept.ignore",  ebIgnore)
     .Case("fpexcept.maytrap", ebMayTrap)
     .Case("fpexcept.strict",  ebStrict)
-    .Default(ebInvalid);
+    .Default(None);
+}
+
+Optional<StringRef>
+ConstrainedFPIntrinsic::ExceptionBehaviorToStr(ExceptionBehavior UseExcept) {
+  Optional<StringRef> ExceptStr = None;
+  switch (UseExcept) {
+  case ConstrainedFPIntrinsic::ebStrict:
+    ExceptStr = "fpexcept.strict";
+    break;
+  case ConstrainedFPIntrinsic::ebIgnore:
+    ExceptStr = "fpexcept.ignore";
+    break;
+  case ConstrainedFPIntrinsic::ebMayTrap:
+    ExceptStr = "fpexcept.maytrap";
+    break;
+  }
+  return ExceptStr;
 }
 
 bool ConstrainedFPIntrinsic::isUnaryOp() const {
   switch (getIntrinsicID()) {
     default:
       return false;
+    case Intrinsic::experimental_constrained_fptrunc:
+    case Intrinsic::experimental_constrained_fpext:
     case Intrinsic::experimental_constrained_sqrt:
     case Intrinsic::experimental_constrained_sin:
     case Intrinsic::experimental_constrained_cos:
@@ -169,3 +218,42 @@ bool ConstrainedFPIntrinsic::isTernaryOp() const {
   }
 }
 
+Instruction::BinaryOps BinaryOpIntrinsic::getBinaryOp() const {
+  switch (getIntrinsicID()) {
+    case Intrinsic::uadd_with_overflow:
+    case Intrinsic::sadd_with_overflow:
+    case Intrinsic::uadd_sat:
+    case Intrinsic::sadd_sat:
+      return Instruction::Add;
+    case Intrinsic::usub_with_overflow:
+    case Intrinsic::ssub_with_overflow:
+    case Intrinsic::usub_sat:
+    case Intrinsic::ssub_sat:
+      return Instruction::Sub;
+    case Intrinsic::umul_with_overflow:
+    case Intrinsic::smul_with_overflow:
+      return Instruction::Mul;
+    default:
+      llvm_unreachable("Invalid intrinsic");
+  }
+}
+
+bool BinaryOpIntrinsic::isSigned() const {
+  switch (getIntrinsicID()) {
+    case Intrinsic::sadd_with_overflow:
+    case Intrinsic::ssub_with_overflow:
+    case Intrinsic::smul_with_overflow:
+    case Intrinsic::sadd_sat:
+    case Intrinsic::ssub_sat:
+      return true;
+    default:
+      return false;
+  }
+}
+
+unsigned BinaryOpIntrinsic::getNoWrapKind() const {
+  if (isSigned())
+    return OverflowingBinaryOperator::NoSignedWrap;
+  else
+    return OverflowingBinaryOperator::NoUnsignedWrap;
+}
diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp
index 944d8265151d..e1cdf6b539db 100644
--- a/lib/IR/LLVMContext.cpp
+++ b/lib/IR/LLVMContext.cpp
@@ -1,9 +1,8 @@
 //===-- LLVMContext.cpp - Implement LLVMContext ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -22,6 +21,7 @@
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/RemarkStreamer.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -62,6 +62,8 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
     {MD_callees, "callees"},
     {MD_irr_loop, "irr_loop"},
     {MD_access_group, "llvm.access.group"},
+    {MD_callback, "callback"},
+    {MD_preserve_access_index, "llvm.preserve.access.index"},
   };
 
   for (auto &MDKind : MDKinds) {
@@ -160,12 +162,15 @@ uint64_t LLVMContext::getDiagnosticsHotnessThreshold() const {
   return pImpl->DiagnosticsHotnessThreshold;
 }
 
-yaml::Output *LLVMContext::getDiagnosticsOutputFile() {
-  return pImpl->DiagnosticsOutputFile.get();
+RemarkStreamer *LLVMContext::getRemarkStreamer() {
+  return pImpl->RemarkDiagStreamer.get();
 }
-
-void LLVMContext::setDiagnosticsOutputFile(std::unique_ptr<yaml::Output> F) {
-  pImpl->DiagnosticsOutputFile = std::move(F);
+const RemarkStreamer *LLVMContext::getRemarkStreamer() const {
+  return const_cast<LLVMContext *>(this)->getRemarkStreamer();
+}
+void LLVMContext::setRemarkStreamer(
+    std::unique_ptr<RemarkStreamer> RemarkStreamer) {
+  pImpl->RemarkDiagStreamer = std::move(RemarkStreamer);
 }
 
 DiagnosticHandler::DiagnosticHandlerTy
@@ -228,14 +233,10 @@ LLVMContext::getDiagnosticMessagePrefix(DiagnosticSeverity Severity) {
 }
 
 void LLVMContext::diagnose(const DiagnosticInfo &DI) {
-  if (auto *OptDiagBase = dyn_cast<DiagnosticInfoOptimizationBase>(&DI)) {
-    yaml::Output *Out = getDiagnosticsOutputFile();
-    if (Out) {
-      // For remarks the << operator takes a reference to a pointer.
-      auto *P = const_cast<DiagnosticInfoOptimizationBase *>(OptDiagBase);
-      *Out << P;
-    }
-  }
+  if (auto *OptDiagBase = dyn_cast<DiagnosticInfoOptimizationBase>(&DI))
+    if (RemarkStreamer *RS = getRemarkStreamer())
+      RS->emit(*OptDiagBase);
+
   // If there is a report handler, use it.
   if (pImpl->DiagHandler &&
       (!pImpl->RespectDiagnosticFilters || isDiagnosticEnabled(DI)) &&
diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp
index 3c34ca55c224..c6ab2c6f213a 100644
--- a/lib/IR/LLVMContextImpl.cpp
+++ b/lib/IR/LLVMContextImpl.cpp
@@ -1,9 +1,8 @@
 //===- LLVMContextImpl.cpp - Implement LLVMContextImpl --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index 2d120869860a..78cf707e0e74 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -1,9 +1,8 @@
 //===- LLVMContextImpl.h - The LLVMContextImpl opaque class -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -31,16 +30,17 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSet.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/IR/RemarkStreamer.h"
 #include "llvm/IR/TrackingMDRef.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/StringSaver.h"
 #include "llvm/Support/YAMLTraits.h"
 #include <algorithm>
 #include <cassert>
@@ -789,6 +789,31 @@ template <> struct MDNodeKeyImpl<DINamespace> {
   }
 };
 
+template <> struct MDNodeKeyImpl<DICommonBlock> {
+  Metadata *Scope;
+  Metadata *Decl;
+  MDString *Name;
+  Metadata *File;
+  unsigned LineNo;
+
+  MDNodeKeyImpl(Metadata *Scope, Metadata *Decl, MDString *Name,
+                Metadata *File, unsigned LineNo)
+      : Scope(Scope), Decl(Decl), Name(Name), File(File), LineNo(LineNo) {}
+  MDNodeKeyImpl(const DICommonBlock *N)
+      : Scope(N->getRawScope()), Decl(N->getRawDecl()), Name(N->getRawName()),
+        File(N->getRawFile()), LineNo(N->getLineNo()) {}
+
+  bool isKeyOf(const DICommonBlock *RHS) const {
+    return Scope == RHS->getRawScope() && Decl == RHS->getRawDecl() &&
+      Name == RHS->getRawName() && File == RHS->getRawFile() &&
+      LineNo == RHS->getLineNo();
+  }
+
+  unsigned getHashValue() const {
+    return hash_combine(Scope, Decl, Name, File, LineNo);
+  }
+};
+
 template <> struct MDNodeKeyImpl<DIModule> {
   Metadata *Scope;
   MDString *Name;
@@ -1227,7 +1252,7 @@ public:
   bool RespectDiagnosticFilters = false;
   bool DiagnosticsHotnessRequested = false;
   uint64_t DiagnosticsHotnessThreshold = 0;
-  std::unique_ptr<yaml::Output> DiagnosticsOutputFile;
+  std::unique_ptr<RemarkStreamer> RemarkDiagStreamer;
 
   LLVMContext::YieldCallbackTy YieldCallback = nullptr;
   void *YieldOpaqueHandle = nullptr;
@@ -1296,9 +1321,8 @@ public:
   Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy;
   IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty, Int128Ty;
 
-  /// TypeAllocator - All dynamically allocated types are allocated from this.
-  /// They live forever until the context is torn down.
-  BumpPtrAllocator TypeAllocator;
+  BumpPtrAllocator Alloc;
+  UniqueStringSaver Saver{Alloc};
 
   DenseMap<unsigned, IntegerType*> IntegerTypes;
 
@@ -1310,7 +1334,7 @@ public:
   unsigned NamedStructTypesUniqueID = 0;
 
   DenseMap<std::pair<Type *, uint64_t>, ArrayType*> ArrayTypes;
-  DenseMap<std::pair<Type *, unsigned>, VectorType*> VectorTypes;
+  DenseMap<std::pair<Type *, ElementCount>, VectorType*> VectorTypes;
   DenseMap<Type*, PointerType*> PointerTypes;  // Pointers in AddrSpace = 0
   DenseMap<std::pair<Type*, unsigned>, PointerType*> ASPointerTypes;
 
@@ -1332,8 +1356,8 @@ public:
   /// Collection of per-GlobalObject sections used in this context.
   DenseMap<const GlobalObject *, StringRef> GlobalObjectSections;
 
-  /// Stable collection of section strings.
-  StringSet<> SectionStrings;
+  /// Collection of per-GlobalValue partitions used in this context.
+  DenseMap<const GlobalValue *, StringRef> GlobalValuePartitions;
 
   /// DiscriminatorTable - This table maps file:line locations to an
   /// integer representing the next DWARF path discriminator to assign to
diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp
index 01d14f17bba5..c575d6e782b9 100644
--- a/lib/IR/LegacyPassManager.cpp
+++ b/lib/IR/LegacyPassManager.cpp
@@ -1,9 +1,8 @@
 //===- LegacyPassManager.cpp - LLVM Pass Infrastructure Implementation ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -28,6 +27,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Mutex.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -310,7 +310,7 @@ void PassManagerPrettyStackEntry::print(raw_ostream &OS) const {
     OS << "value";
 
   OS << " '";
-  V->printAsOperand(OS, /*PrintTy=*/false, M);
+  V->printAsOperand(OS, /*PrintType=*/false, M);
   OS << "'\n";
 }
 
@@ -1629,10 +1629,14 @@ bool FPPassManager::runOnFunction(Function &F) {
     FunctionSize = F.getInstructionCount();
   }
 
+  llvm::TimeTraceScope FunctionScope("OptFunction", F.getName());
+
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
     FunctionPass *FP = getContainedPass(Index);
     bool LocalChanged = false;
 
+    llvm::TimeTraceScope PassScope("RunPass", FP->getPassName());
+
     dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getName());
     dumpRequiredSet(FP);
 
@@ -1669,12 +1673,14 @@ bool FPPassManager::runOnFunction(Function &F) {
     recordAvailableAnalysis(FP);
     removeDeadPasses(FP, F.getName(), ON_FUNCTION_MSG);
   }
+
   return Changed;
 }
 
 bool FPPassManager::runOnModule(Module &M) {
   bool Changed = false;
 
+  llvm::TimeTraceScope TimeScope("OptModule", M.getName());
   for (Function &F : M)
     Changed |= runOnFunction(F);
 
@@ -1707,6 +1713,8 @@ bool FPPassManager::doFinalization(Module &M) {
 /// the module, and if so, return true.
 bool
 MPPassManager::runOnModule(Module &M) {
+  llvm::TimeTraceScope TimeScope("OptModule", M.getName());
+
   bool Changed = false;
 
   // Initialize on-the-fly passes
@@ -1719,14 +1727,12 @@ MPPassManager::runOnModule(Module &M) {
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
     Changed |= getContainedPass(Index)->doInitialization(M);
 
-  unsigned InstrCount, ModuleCount = 0;
+  unsigned InstrCount;
   StringMap<std::pair<unsigned, unsigned>> FunctionToInstrCount;
   bool EmitICRemark = M.shouldEmitInstrCountChangedRemark();
   // Collect the initial size of the module.
-  if (EmitICRemark) {
+  if (EmitICRemark)
     InstrCount = initSizeRemarkInfo(M, FunctionToInstrCount);
-    ModuleCount = InstrCount;
-  }
 
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
     ModulePass *MP = getContainedPass(Index);
@@ -1744,7 +1750,7 @@ MPPassManager::runOnModule(Module &M) {
       LocalChanged |= MP->runOnModule(M);
       if (EmitICRemark) {
         // Update the size of the module.
-        ModuleCount = M.getInstructionCount();
+        unsigned ModuleCount = M.getInstructionCount();
         if (ModuleCount != InstrCount) {
           int64_t Delta = static_cast<int64_t>(ModuleCount) -
                           static_cast<int64_t>(InstrCount);
diff --git a/lib/IR/MDBuilder.cpp b/lib/IR/MDBuilder.cpp
index 3fa541f1b535..14bcb3a29b07 100644
--- a/lib/IR/MDBuilder.cpp
+++ b/lib/IR/MDBuilder.cpp
@@ -1,9 +1,8 @@
 //===---- llvm/MDBuilder.cpp - Builder for LLVM metadata ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -69,9 +68,7 @@ MDNode *MDBuilder::createFunctionEntryCount(
   Ops.push_back(createConstant(ConstantInt::get(Int64Ty, Count)));
   if (Imports) {
     SmallVector<GlobalValue::GUID, 2> OrderID(Imports->begin(), Imports->end());
-    std::stable_sort(OrderID.begin(), OrderID.end(),
-      [] (GlobalValue::GUID A, GlobalValue::GUID B) {
-        return A < B;});
+    llvm::stable_sort(OrderID);
     for (auto ID : OrderID)
       Ops.push_back(createConstant(ConstantInt::get(Int64Ty, ID)));
   }
@@ -107,6 +104,52 @@ MDNode *MDBuilder::createCallees(ArrayRef<Function *> Callees) {
   return MDNode::get(Context, Ops);
 }
 
+MDNode *MDBuilder::createCallbackEncoding(unsigned CalleeArgNo,
+                                          ArrayRef<int> Arguments,
+                                          bool VarArgArePassed) {
+  SmallVector<Metadata *, 4> Ops;
+
+  Type *Int64 = Type::getInt64Ty(Context);
+  Ops.push_back(createConstant(ConstantInt::get(Int64, CalleeArgNo)));
+
+  for (int ArgNo : Arguments)
+    Ops.push_back(createConstant(ConstantInt::get(Int64, ArgNo, true)));
+
+  Type *Int1 = Type::getInt1Ty(Context);
+  Ops.push_back(createConstant(ConstantInt::get(Int1, VarArgArePassed)));
+
+  return MDNode::get(Context, Ops);
+}
+
+MDNode *MDBuilder::mergeCallbackEncodings(MDNode *ExistingCallbacks,
+                                          MDNode *NewCB) {
+  if (!ExistingCallbacks)
+    return MDNode::get(Context, {NewCB});
+
+  auto *NewCBCalleeIdxAsCM = cast<ConstantAsMetadata>(NewCB->getOperand(0));
+  uint64_t NewCBCalleeIdx =
+      cast<ConstantInt>(NewCBCalleeIdxAsCM->getValue())->getZExtValue();
+  (void)NewCBCalleeIdx;
+
+  SmallVector<Metadata *, 4> Ops;
+  unsigned NumExistingOps = ExistingCallbacks->getNumOperands();
+  Ops.resize(NumExistingOps + 1);
+
+  for (unsigned u = 0; u < NumExistingOps; u++) {
+    Ops[u] = ExistingCallbacks->getOperand(u);
+
+    auto *OldCBCalleeIdxAsCM = cast<ConstantAsMetadata>(Ops[u]);
+    uint64_t OldCBCalleeIdx =
+      cast<ConstantInt>(OldCBCalleeIdxAsCM->getValue())->getZExtValue();
+    (void)OldCBCalleeIdx;
+    assert(NewCBCalleeIdx != OldCBCalleeIdx &&
+           "Cannot map a callback callee index twice!");
+  }
+
+  Ops[NumExistingOps] = NewCB;
+  return MDNode::get(Context, Ops);
+}
+
 MDNode *MDBuilder::createAnonymousAARoot(StringRef Name, MDNode *Extra) {
   // To ensure uniqueness the root node is self-referential.
   auto Dummy = MDNode::getTemporary(Context, None);
diff --git a/lib/IR/Mangler.cpp b/lib/IR/Mangler.cpp
index be3086cfcf05..d73f748b0584 100644
--- a/lib/IR/Mangler.cpp
+++ b/lib/IR/Mangler.cpp
@@ -1,9 +1,8 @@
 //===-- Mangler.cpp - Self-contained c/asm llvm name mangler --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -187,7 +186,7 @@ void llvm::emitLinkerFlagsForGlobalCOFF(raw_ostream &OS, const GlobalValue *GV,
   if (!GV->hasDLLExportStorageClass() || GV->isDeclaration())
     return;
 
-  if (TT.isKnownWindowsMSVCEnvironment())
+  if (TT.isWindowsMSVCEnvironment())
     OS << " /EXPORT:";
   else
     OS << " -export:";
@@ -206,7 +205,7 @@ void llvm::emitLinkerFlagsForGlobalCOFF(raw_ostream &OS, const GlobalValue *GV,
   }
 
   if (!GV->getValueType()->isFunctionTy()) {
-    if (TT.isKnownWindowsMSVCEnvironment())
+    if (TT.isWindowsMSVCEnvironment())
       OS << ",DATA";
     else
       OS << ",data";
@@ -215,7 +214,7 @@ void llvm::emitLinkerFlagsForGlobalCOFF(raw_ostream &OS, const GlobalValue *GV,
 
 void llvm::emitLinkerFlagsForUsedCOFF(raw_ostream &OS, const GlobalValue *GV,
                                       const Triple &T, Mangler &M) {
-  if (!T.isKnownWindowsMSVCEnvironment())
+  if (!T.isWindowsMSVCEnvironment())
     return;
 
   OS << " /INCLUDE:";
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
index 5536c2497f1e..748a2238e642 100644
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@@ -1,9 +1,8 @@
 //===- Metadata.cpp - Implement Metadata classes --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1180,10 +1179,7 @@ void MDGlobalAttachmentMap::getAll(
 
   // Sort the resulting array so it is stable with respect to metadata IDs. We
   // need to preserve the original insertion order though.
-  std::stable_sort(
-      Result.begin(), Result.end(),
-      [](const std::pair<unsigned, MDNode *> &A,
-         const std::pair<unsigned, MDNode *> &B) { return A.first < B.first; });
+  llvm::stable_sort(Result, less_first());
 }
 
 void Instruction::setMetadata(StringRef Kind, MDNode *Node) {
diff --git a/lib/IR/MetadataImpl.h b/lib/IR/MetadataImpl.h
index b9137460bd20..b4188dd7d3ee 100644
--- a/lib/IR/MetadataImpl.h
+++ b/lib/IR/MetadataImpl.h
@@ -1,9 +1,8 @@
 //===- MetadataImpl.h - Helpers for implementing metadata -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp
index 93f27304424f..dbf4035ac7c1 100644
--- a/lib/IR/Module.cpp
+++ b/lib/IR/Module.cpp
@@ -1,9 +1,8 @@
 //===- Module.cpp - Implement the Module class ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -141,8 +140,8 @@ void Module::getOperandBundleTags(SmallVectorImpl<StringRef> &Result) const {
 // it.  This is nice because it allows most passes to get away with not handling
 // the symbol table directly for this common task.
 //
-Constant *Module::getOrInsertFunction(StringRef Name, FunctionType *Ty,
-                                      AttributeList AttributeList) {
+FunctionCallee Module::getOrInsertFunction(StringRef Name, FunctionType *Ty,
+                                           AttributeList AttributeList) {
   // See if we have a definition for the specified function already.
   GlobalValue *F = getNamedValue(Name);
   if (!F) {
@@ -152,21 +151,20 @@ Constant *Module::getOrInsertFunction(StringRef Name, FunctionType *Ty,
     if (!New->isIntrinsic())       // Intrinsics get attrs set on construction
       New->setAttributes(AttributeList);
     FunctionList.push_back(New);
-    return New;                    // Return the new prototype.
+    return {Ty, New}; // Return the new prototype.
   }
 
   // If the function exists but has the wrong type, return a bitcast to the
   // right type.
   auto *PTy = PointerType::get(Ty, F->getAddressSpace());
   if (F->getType() != PTy)
-    return ConstantExpr::getBitCast(F, PTy);
+    return {Ty, ConstantExpr::getBitCast(F, PTy)};
 
   // Otherwise, we just found the existing function or a prototype.
-  return F;
+  return {Ty, F};
 }
 
-Constant *Module::getOrInsertFunction(StringRef Name,
-                                      FunctionType *Ty) {
+FunctionCallee Module::getOrInsertFunction(StringRef Name, FunctionType *Ty) {
   return getOrInsertFunction(Name, Ty, AttributeList());
 }
 
@@ -533,12 +531,16 @@ void Module::setCodeModel(CodeModel::Model CL) {
   addModuleFlag(ModFlagBehavior::Error, "Code Model", CL);
 }
 
-void Module::setProfileSummary(Metadata *M) {
-  addModuleFlag(ModFlagBehavior::Error, "ProfileSummary", M);
+void Module::setProfileSummary(Metadata *M, ProfileSummary::Kind Kind) {
+  if (Kind == ProfileSummary::PSK_CSInstr)
+    addModuleFlag(ModFlagBehavior::Error, "CSProfileSummary", M);
+  else
+    addModuleFlag(ModFlagBehavior::Error, "ProfileSummary", M);
 }
 
-Metadata *Module::getProfileSummary() {
-  return getModuleFlag("ProfileSummary");
+Metadata *Module::getProfileSummary(bool IsCS) {
+  return (IsCS ? getModuleFlag("CSProfileSummary")
+               : getModuleFlag("ProfileSummary"));
 }
 
 void Module::setOwnedMemoryBuffer(std::unique_ptr<MemoryBuffer> MB) {
diff --git a/lib/IR/ModuleSummaryIndex.cpp b/lib/IR/ModuleSummaryIndex.cpp
index 46b88cd31779..9f347d8da01d 100644
--- a/lib/IR/ModuleSummaryIndex.cpp
+++ b/lib/IR/ModuleSummaryIndex.cpp
@@ -1,9 +1,8 @@
 //===-- ModuleSummaryIndex.cpp - Module Summary Index ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -24,9 +23,12 @@ using namespace llvm;
 
 STATISTIC(ReadOnlyLiveGVars,
           "Number of live global variables marked read only");
+STATISTIC(WriteOnlyLiveGVars,
+          "Number of live global variables marked write only");
 
 FunctionSummary FunctionSummary::ExternalNode =
     FunctionSummary::makeDummyFunctionSummary({});
+
 bool ValueInfo::isDSOLocal() const {
   // Need to check all summaries are local in case of hash collisions.
   return getSummaryList().size() &&
@@ -36,15 +38,27 @@ bool ValueInfo::isDSOLocal() const {
                       });
 }
 
-// Gets the number of immutable refs in RefEdgeList
-unsigned FunctionSummary::immutableRefCount() const {
-  // Here we take advantage of having all readonly references
+bool ValueInfo::canAutoHide() const {
+  // Can only auto hide if all copies are eligible to auto hide.
+  return getSummaryList().size() &&
+         llvm::all_of(getSummaryList(),
+                      [](const std::unique_ptr<GlobalValueSummary> &Summary) {
+                        return Summary->canAutoHide();
+                      });
+}
+
+// Gets the number of readonly and writeonly refs in RefEdgeList
+std::pair<unsigned, unsigned> FunctionSummary::specialRefCounts() const {
+  // Here we take advantage of having all readonly and writeonly references
   // located in the end of the RefEdgeList.
   auto Refs = refs();
-  unsigned ImmutableRefCnt = 0;
-  for (int I = Refs.size() - 1; I >= 0 && Refs[I].isReadOnly(); --I)
-    ImmutableRefCnt++;
-  return ImmutableRefCnt;
+  unsigned RORefCnt = 0, WORefCnt = 0;
+  int I;
+  for (I = Refs.size() - 1; I >= 0 && Refs[I].isWriteOnly(); --I)
+    WORefCnt++;
+  for (; I >= 0 && Refs[I].isReadOnly(); --I)
+    RORefCnt++;
+  return {RORefCnt, WORefCnt};
 }
 
 // Collect for the given module the list of function it defines
@@ -66,17 +80,6 @@ void ModuleSummaryIndex::collectDefinedFunctionsForModule(
   }
 }
 
-// Collect for each module the list of function it defines (GUID -> Summary).
-void ModuleSummaryIndex::collectDefinedGVSummariesPerModule(
-    StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries) const {
-  for (auto &GlobalList : *this) {
-    auto GUID = GlobalList.first;
-    for (auto &Summary : GlobalList.second.SummaryList) {
-      ModuleToDefinedGVSummaries[Summary->modulePath()][GUID] = Summary.get();
-    }
-  }
-}
-
 GlobalValueSummary *
 ModuleSummaryIndex::getGlobalValueSummary(uint64_t ValueGUID,
                                           bool PerModuleIndex) const {
@@ -101,48 +104,56 @@ bool ModuleSummaryIndex::isGUIDLive(GlobalValue::GUID GUID) const {
   return false;
 }
 
-static void propagateConstantsToRefs(GlobalValueSummary *S) {
-  // If reference is not readonly then referenced summary is not
-  // readonly either. Note that:
+static void propagateAttributesToRefs(GlobalValueSummary *S) {
+  // If reference is not readonly or writeonly then referenced summary is not
+  // read/writeonly either. Note that:
   // - All references from GlobalVarSummary are conservatively considered as
-  //   not readonly. Tracking them properly requires more complex analysis
-  //   then we have now.
+  //   not readonly or writeonly. Tracking them properly requires more complex
+  //   analysis then we have now.
   //
   // - AliasSummary objects have no refs at all so this function is a no-op
   //   for them.
   for (auto &VI : S->refs()) {
-    if (VI.isReadOnly()) {
-      // We only mark refs as readonly when computing function summaries on
-      // analysis phase.
-      assert(isa<FunctionSummary>(S));
-      continue;
-    }
+    assert(VI.getAccessSpecifier() == 0 || isa<FunctionSummary>(S));
     for (auto &Ref : VI.getSummaryList())
-      // If references to alias is not readonly then aliasee is not readonly
-      if (auto *GVS = dyn_cast<GlobalVarSummary>(Ref->getBaseObject()))
-        GVS->setReadOnly(false);
+      // If references to alias is not read/writeonly then aliasee
+      // is not read/writeonly
+      if (auto *GVS = dyn_cast<GlobalVarSummary>(Ref->getBaseObject())) {
+        if (!VI.isReadOnly())
+          GVS->setReadOnly(false);
+        if (!VI.isWriteOnly())
+          GVS->setWriteOnly(false);
+      }
   }
 }
 
-// Do the constant propagation in combined index.
-// The goal of constant propagation is internalization of readonly
-// variables. To determine which variables are readonly and which
-// are not we take following steps:
-// - During analysis we speculatively assign readonly attribute to
-//   all variables which can be internalized. When computing function
-//   summary we also assign readonly attribute to a reference if
-//   function doesn't modify referenced variable.
+// Do the access attribute propagation in combined index.
+// The goal of attribute propagation is internalization of readonly (RO)
+// or writeonly (WO) variables. To determine which variables are RO or WO
+// and which are not we take following steps:
+// - During analysis we speculatively assign readonly and writeonly
+//   attribute to all variables which can be internalized. When computing
+//   function summary we also assign readonly or writeonly attribute to a
+//   reference if function doesn't modify referenced variable (readonly)
+//   or doesn't read it (writeonly).
+//
+// - After computing dead symbols in combined index we do the attribute
+//   propagation. During this step we:
+//   a. clear RO and WO attributes from variables which are preserved or
+//      can't be imported
+//   b. clear RO and WO attributes from variables referenced by any global
+//      variable initializer
+//   c. clear RO attribute from variable referenced by a function when
+//      reference is not readonly
+//   d. clear WO attribute from variable referenced by a function when
+//      reference is not writeonly
 //
-// - After computing dead symbols in combined index we do the constant
-//   propagation. During this step we clear readonly attribute from
-//   all variables which:
-//   a. are preserved or can't be imported
-//   b. referenced by any global variable initializer
-//   c. referenced by a function and reference is not readonly
+//   Because of (c, d) we don't internalize variables read by function A
+//   and modified by function B.
 //
 // Internalization itself happens in the backend after import is finished
-// See internalizeImmutableGVs.
-void ModuleSummaryIndex::propagateConstants(
+// See internalizeGVsAfterImport.
+void ModuleSummaryIndex::propagateAttributes(
     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
   for (auto &P : *this)
     for (auto &S : P.second.SummaryList) {
@@ -150,29 +161,36 @@ void ModuleSummaryIndex::propagateConstants(
         // We don't examine references from dead objects
         continue;
 
-      // Global variable can't be marked read only if it is not eligible
-      // to import since we need to ensure that all external references
-      // get a local (imported) copy. It also can't be marked read only
-      // if it or any alias (since alias points to the same memory) are
-      // preserved or notEligibleToImport, since either of those means
-      // there could be writes that are not visible (because preserved
-      // means it could have external to DSO writes, and notEligibleToImport
-      // means it could have writes via inline assembly leading it to be
-      // in the @llvm.*used).
+      // Global variable can't be marked read/writeonly if it is not eligible
+      // to import since we need to ensure that all external references get
+      // a local (imported) copy. It also can't be marked read/writeonly if
+      // it or any alias (since alias points to the same memory) are preserved
+      // or notEligibleToImport, since either of those means there could be
+      // writes (or reads in case of writeonly) that are not visible (because
+      // preserved means it could have external to DSO writes or reads, and
+      // notEligibleToImport means it could have writes or reads via inline
+      // assembly leading it to be in the @llvm.*used).
       if (auto *GVS = dyn_cast<GlobalVarSummary>(S->getBaseObject()))
         // Here we intentionally pass S.get() not GVS, because S could be
         // an alias.
-        if (!canImportGlobalVar(S.get()) || GUIDPreservedSymbols.count(P.first))
+        if (!canImportGlobalVar(S.get()) ||
+            GUIDPreservedSymbols.count(P.first)) {
           GVS->setReadOnly(false);
-      propagateConstantsToRefs(S.get());
+          GVS->setWriteOnly(false);
+        }
+      propagateAttributesToRefs(S.get());
     }
   if (llvm::AreStatisticsEnabled())
     for (auto &P : *this)
       if (P.second.SummaryList.size())
         if (auto *GVS = dyn_cast<GlobalVarSummary>(
                 P.second.SummaryList[0]->getBaseObject()))
-          if (isGlobalValueLive(GVS) && GVS->isReadOnly())
-            ReadOnlyLiveGVars++;
+          if (isGlobalValueLive(GVS)) {
+            if (GVS->maybeReadOnly())
+              ReadOnlyLiveGVars++;
+            if (GVS->maybeWriteOnly())
+              WriteOnlyLiveGVars++;
+          }
 }
 
 // TODO: write a graphviz dumper for SCCs (see ModuleSummaryIndex::exportToDot)
@@ -335,14 +353,21 @@ static void defineExternalNode(raw_ostream &OS, const char *Pfx,
 
 static bool hasReadOnlyFlag(const GlobalValueSummary *S) {
   if (auto *GVS = dyn_cast<GlobalVarSummary>(S))
-    return GVS->isReadOnly();
+    return GVS->maybeReadOnly();
+  return false;
+}
+
+static bool hasWriteOnlyFlag(const GlobalValueSummary *S) {
+  if (auto *GVS = dyn_cast<GlobalVarSummary>(S))
+    return GVS->maybeWriteOnly();
   return false;
 }
 
 void ModuleSummaryIndex::exportToDot(raw_ostream &OS) const {
   std::vector<Edge> CrossModuleEdges;
   DenseMap<GlobalValue::GUID, std::vector<uint64_t>> NodeMap;
-  StringMap<GVSummaryMapTy> ModuleToDefinedGVS;
+  using GVSOrderedMapTy = std::map<GlobalValue::GUID, GlobalValueSummary *>;
+  std::map<StringRef, GVSOrderedMapTy> ModuleToDefinedGVS;
   collectDefinedGVSummariesPerModule(ModuleToDefinedGVS);
 
   // Get node identifier in form MXXX_<GUID>. The MXXX prefix is required,
@@ -359,12 +384,14 @@ void ModuleSummaryIndex::exportToDot(raw_ostream &OS) const {
     // 0 - alias
     // 1 - reference
     // 2 - constant reference
-    // Other value: (hotness - 3).
-    TypeOrHotness += 3;
+    // 3 - writeonly reference
+    // Other value: (hotness - 4).
+    TypeOrHotness += 4;
     static const char *EdgeAttrs[] = {
         " [style=dotted]; // alias",
         " [style=dashed]; // ref",
         " [style=dashed,color=forestgreen]; // const-ref",
+        " [style=dashed,color=violetred]; // writeOnly-ref",
         " // call (hotness : Unknown)",
         " [color=blue]; // call (hotness : Cold)",
         " // call (hotness : None)",
@@ -379,12 +406,12 @@ void ModuleSummaryIndex::exportToDot(raw_ostream &OS) const {
 
   OS << "digraph Summary {\n";
   for (auto &ModIt : ModuleToDefinedGVS) {
-    auto ModId = getModuleId(ModIt.first());
-    OS << "  // Module: " << ModIt.first() << "\n";
+    auto ModId = getModuleId(ModIt.first);
+    OS << "  // Module: " << ModIt.first << "\n";
     OS << "  subgraph cluster_" << std::to_string(ModId) << " {\n";
     OS << "    style = filled;\n";
     OS << "    color = lightgrey;\n";
-    OS << "    label = \"" << sys::path::filename(ModIt.first()) << "\";\n";
+    OS << "    label = \"" << sys::path::filename(ModIt.first) << "\";\n";
     OS << "    node [style=filled,fillcolor=lightblue];\n";
 
     auto &GVSMap = ModIt.second;
@@ -409,7 +436,13 @@ void ModuleSummaryIndex::exportToDot(raw_ostream &OS) const {
         A.add("shape", "Mrecord", "variable");
         if (Flags.Live && hasReadOnlyFlag(SummaryIt.second))
           A.addComment("immutable");
+        if (Flags.Live && hasWriteOnlyFlag(SummaryIt.second))
+          A.addComment("writeOnly");
       }
+      if (Flags.DSOLocal)
+        A.addComment("dsoLocal");
+      if (Flags.CanAutoHide)
+        A.addComment("canAutoHide");
 
       auto VI = getValueInfo(SummaryIt.first);
       A.add("label", getNodeLabel(VI, SummaryIt.second));
@@ -426,20 +459,11 @@ void ModuleSummaryIndex::exportToDot(raw_ostream &OS) const {
     for (auto &SummaryIt : GVSMap) {
       auto *GVS = SummaryIt.second;
       for (auto &R : GVS->refs())
-        Draw(SummaryIt.first, R.getGUID(), R.isReadOnly() ? -1 : -2);
+        Draw(SummaryIt.first, R.getGUID(),
+             R.isWriteOnly() ? -1 : (R.isReadOnly() ? -2 : -3));
 
       if (auto *AS = dyn_cast_or_null<AliasSummary>(SummaryIt.second)) {
-        GlobalValue::GUID AliaseeId;
-        if (AS->hasAliaseeGUID())
-          AliaseeId = AS->getAliaseeGUID();
-        else {
-          auto AliaseeOrigId = AS->getAliasee().getOriginalName();
-          AliaseeId = getGUIDFromOriginalID(AliaseeOrigId);
-          if (!AliaseeId)
-            AliaseeId = AliaseeOrigId;
-        }
-
-        Draw(SummaryIt.first, AliaseeId, -3);
+        Draw(SummaryIt.first, AS->getAliaseeGUID(), -4);
         continue;
       }
 
diff --git a/lib/IR/Operator.cpp b/lib/IR/Operator.cpp
index 5b4c7524b672..8ba68674d50e 100644
--- a/lib/IR/Operator.cpp
+++ b/lib/IR/Operator.cpp
@@ -1,9 +1,8 @@
 //===-- Operator.cpp - Implement the LLVM operators -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/IR/OptBisect.cpp b/lib/IR/OptBisect.cpp
index c79e1fc2b0b4..3104b90f3070 100644
--- a/lib/IR/OptBisect.cpp
+++ b/lib/IR/OptBisect.cpp
@@ -1,9 +1,8 @@
 //===- llvm/IR/OptBisect/Bisect.cpp - LLVM Bisect support -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,13 +14,6 @@
 
 #include "llvm/IR/OptBisect.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/CallGraphSCCPass.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/RegionInfo.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
@@ -47,73 +39,10 @@ static void printPassMessage(const StringRef &Name, int PassNum,
          << "(" << PassNum << ") " << Name << " on " << TargetDesc << "\n";
 }
 
-static std::string getDescription(const Module &M) {
-  return "module (" + M.getName().str() + ")";
-}
-
-static std::string getDescription(const Function &F) {
-  return "function (" + F.getName().str() + ")";
-}
-
-static std::string getDescription(const BasicBlock &BB) {
-  return "basic block (" + BB.getName().str() + ") in function (" +
-         BB.getParent()->getName().str() + ")";
-}
-
-static std::string getDescription(const Loop &L) {
-  // FIXME: Move into LoopInfo so we can get a better description
-  // (and avoid a circular dependency between IR and Analysis).
-  return "loop";
-}
-
-static std::string getDescription(const Region &R) {
-  // FIXME: Move into RegionInfo so we can get a better description
-  // (and avoid a circular dependency between IR and Analysis).
-  return "region";
-}
-
-static std::string getDescription(const CallGraphSCC &SCC) {
-  // FIXME: Move into CallGraphSCCPass to avoid circular dependency between
-  // IR and Analysis.
-  std::string Desc = "SCC (";
-  bool First = true;
-  for (CallGraphNode *CGN : SCC) {
-    if (First)
-      First = false;
-    else
-      Desc += ", ";
-    Function *F = CGN->getFunction();
-    if (F)
-      Desc += F->getName();
-    else
-      Desc += "<<null function>>";
-  }
-  Desc += ")";
-  return Desc;
-}
-
-bool OptBisect::shouldRunPass(const Pass *P, const Module &U) {
-  return !BisectEnabled || checkPass(P->getPassName(), getDescription(U));
-}
-
-bool OptBisect::shouldRunPass(const Pass *P, const Function &U) {
-  return !BisectEnabled || checkPass(P->getPassName(), getDescription(U));
-}
-
-bool OptBisect::shouldRunPass(const Pass *P, const BasicBlock &U) {
-  return !BisectEnabled || checkPass(P->getPassName(), getDescription(U));
-}
-
-bool OptBisect::shouldRunPass(const Pass *P, const Region &U) {
-  return !BisectEnabled || checkPass(P->getPassName(), getDescription(U));
-}
-
-bool OptBisect::shouldRunPass(const Pass *P, const Loop &U) {
-  return !BisectEnabled || checkPass(P->getPassName(), getDescription(U));
-}
+bool OptBisect::shouldRunPass(const Pass *P, StringRef IRDescription) {
+  assert(BisectEnabled);
 
-bool OptBisect::shouldRunPass(const Pass *P, const CallGraphSCC &U) {
-  return !BisectEnabled || checkPass(P->getPassName(), getDescription(U));
+  return checkPass(P->getPassName(), IRDescription);
 }
 
 bool OptBisect::checkPass(const StringRef PassName,
diff --git a/lib/IR/Pass.cpp b/lib/IR/Pass.cpp
index a1dc17882493..699a7e17c0cb 100644
--- a/lib/IR/Pass.cpp
+++ b/lib/IR/Pass.cpp
@@ -1,9 +1,8 @@
 //===- Pass.cpp - LLVM Pass Infrastructure Implementation -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -56,8 +55,13 @@ PassManagerType ModulePass::getPotentialPassManagerType() const {
   return PMT_ModulePassManager;
 }
 
+static std::string getDescription(const Module &M) {
+  return "module (" + M.getName().str() + ")";
+}
+
 bool ModulePass::skipModule(Module &M) const {
-  return !M.getContext().getOptPassGate().shouldRunPass(this, M);
+  OptPassGate &Gate = M.getContext().getOptPassGate();
+  return Gate.isEnabled() && !Gate.shouldRunPass(this, getDescription(M));
 }
 
 bool Pass::mustPreserveAnalysisID(char &AID) const {
@@ -155,11 +159,16 @@ PassManagerType FunctionPass::getPotentialPassManagerType() const {
   return PMT_FunctionPassManager;
 }
 
+static std::string getDescription(const Function &F) {
+  return "function (" + F.getName().str() + ")";
+}
+
 bool FunctionPass::skipFunction(const Function &F) const {
-  if (!F.getContext().getOptPassGate().shouldRunPass(this, F))
+  OptPassGate &Gate = F.getContext().getOptPassGate();
+  if (Gate.isEnabled() && !Gate.shouldRunPass(this, getDescription(F)))
     return true;
 
-  if (F.hasFnAttribute(Attribute::OptimizeNone)) {
+  if (F.hasOptNone()) {
     LLVM_DEBUG(dbgs() << "Skipping pass '" << getPassName() << "' on function "
                       << F.getName() << "\n");
     return true;
@@ -186,13 +195,19 @@ bool BasicBlockPass::doFinalization(Function &) {
   return false;
 }
 
+static std::string getDescription(const BasicBlock &BB) {
+  return "basic block (" + BB.getName().str() + ") in function (" +
+         BB.getParent()->getName().str() + ")";
+}
+
 bool BasicBlockPass::skipBasicBlock(const BasicBlock &BB) const {
   const Function *F = BB.getParent();
   if (!F)
     return false;
-  if (!F->getContext().getOptPassGate().shouldRunPass(this, BB))
+  OptPassGate &Gate = F->getContext().getOptPassGate();
+  if (Gate.isEnabled() && !Gate.shouldRunPass(this, getDescription(BB)))
     return true;
-  if (F->hasFnAttribute(Attribute::OptimizeNone)) {
+  if (F->hasOptNone()) {
     // Report this only once per function.
     if (&BB == &F->getEntryBlock())
       LLVM_DEBUG(dbgs() << "Skipping pass '" << getPassName()
diff --git a/lib/IR/PassInstrumentation.cpp b/lib/IR/PassInstrumentation.cpp
index 5aa2bc6d895e..49cc6ec04d90 100644
--- a/lib/IR/PassInstrumentation.cpp
+++ b/lib/IR/PassInstrumentation.cpp
@@ -1,9 +1,8 @@
 //===- PassInstrumentation.cpp - Pass Instrumentation interface -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/IR/PassManager.cpp b/lib/IR/PassManager.cpp
index 47fdfedfdde8..cde9b873795e 100644
--- a/lib/IR/PassManager.cpp
+++ b/lib/IR/PassManager.cpp
@@ -1,9 +1,8 @@
 //===- PassManager.cpp - Infrastructure for managing & running IR passes --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/IR/PassRegistry.cpp b/lib/IR/PassRegistry.cpp
index b0f1a9928725..92c188b11898 100644
--- a/lib/IR/PassRegistry.cpp
+++ b/lib/IR/PassRegistry.cpp
@@ -1,9 +1,8 @@
 //===- PassRegistry.cpp - Pass Registration Implementation ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/IR/PassTimingInfo.cpp b/lib/IR/PassTimingInfo.cpp
index 40b3977ecbd9..9cc44ea05fee 100644
--- a/lib/IR/PassTimingInfo.cpp
+++ b/lib/IR/PassTimingInfo.cpp
@@ -1,9 +1,8 @@
 //===- PassTimingInfo.cpp - LLVM Pass Timing Implementation ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -78,7 +77,8 @@ public:
   static void init();
 
   /// Prints out timing information and then resets the timers.
-  void print();
+  /// By default it uses the stream created by CreateInfoOutputFile().
+  void print(raw_ostream *OutStream = nullptr);
 
   /// Returns the timer for the specified pass if it exists.
   Timer *getPassTimer(Pass *, PassInstanceID);
@@ -112,7 +112,9 @@ void PassTimingInfo::init() {
 }
 
 /// Prints out timing information and then resets the timers.
-void PassTimingInfo::print() { TG.print(*CreateInfoOutputFile()); }
+void PassTimingInfo::print(raw_ostream *OutStream) {
+  TG.print(OutStream ? *OutStream : *CreateInfoOutputFile(), true);
+}
 
 Timer *PassTimingInfo::newPassTimer(StringRef PassID, StringRef PassDesc) {
   unsigned &num = PassIDCountMap[PassID];
@@ -154,9 +156,9 @@ Timer *getPassTimer(Pass *P) {
 
 /// If timing is enabled, report the times collected up to now and then reset
 /// them.
-void reportAndResetTimings() {
+void reportAndResetTimings(raw_ostream *OutStream) {
   if (legacy::PassTimingInfo::TheTimeInfo)
-    legacy::PassTimingInfo::TheTimeInfo->print();
+    legacy::PassTimingInfo::TheTimeInfo->print(OutStream);
 }
 
 //===----------------------------------------------------------------------===//
@@ -182,7 +184,15 @@ Timer &TimePassesHandler::getPassTimer(StringRef PassID) {
 TimePassesHandler::TimePassesHandler(bool Enabled)
     : TG("pass", "... Pass execution timing report ..."), Enabled(Enabled) {}
 
-void TimePassesHandler::print() { TG.print(*CreateInfoOutputFile()); }
+void TimePassesHandler::setOutStream(raw_ostream &Out) {
+  OutStream = &Out;
+}
+
+void TimePassesHandler::print() {
+  if (!Enabled)
+    return;
+  TG.print(OutStream ? *OutStream : *CreateInfoOutputFile(), true);
+}
 
 LLVM_DUMP_METHOD void TimePassesHandler::dump() const {
   dbgs() << "Dumping timers for " << getTypeName<TimePassesHandler>()
diff --git a/lib/IR/ProfileSummary.cpp b/lib/IR/ProfileSummary.cpp
index 491fe834df9a..11d95ac19be6 100644
--- a/lib/IR/ProfileSummary.cpp
+++ b/lib/IR/ProfileSummary.cpp
@@ -1,9 +1,8 @@
 //=-- Profilesummary.cpp - Profile summary support --------------------------=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -22,8 +21,6 @@
 
 using namespace llvm;
 
-const char *ProfileSummary::KindStr[2] = {"InstrProf", "SampleProfile"};
-
 // Return an MDTuple with two elements. The first element is a string Key and
 // the second is a uint64_t Value.
 static Metadata *getKeyValMD(LLVMContext &Context, const char *Key,
@@ -69,6 +66,7 @@ Metadata *ProfileSummary::getDetailedSummaryMD(LLVMContext &Context) {
 // "SampleProfile"). The rest of the elements of the outer MDTuple are specific
 // to the kind of profile summary as returned by getFormatSpecificMD.
 Metadata *ProfileSummary::getMD(LLVMContext &Context) {
+  const char *KindStr[3] = {"InstrProf", "CSInstrProf", "SampleProfile"};
   Metadata *Components[] = {
     getKeyValMD(Context, "ProfileFormat", KindStr[PSK]),
     getKeyValMD(Context, "TotalCount", getTotalCount()),
@@ -154,6 +152,9 @@ ProfileSummary *ProfileSummary::getFromMD(Metadata *MD) {
   else if (isKeyValuePair(dyn_cast_or_null<MDTuple>(FormatMD), "ProfileFormat",
                           "InstrProf"))
     SummaryKind = PSK_Instr;
+  else if (isKeyValuePair(dyn_cast_or_null<MDTuple>(FormatMD), "ProfileFormat",
+                          "CSInstrProf"))
+    SummaryKind = PSK_CSInstr;
   else
     return nullptr;
 
diff --git a/lib/IR/RemarkStreamer.cpp b/lib/IR/RemarkStreamer.cpp
new file mode 100644
index 000000000000..5b4c7e72b479
--- /dev/null
+++ b/lib/IR/RemarkStreamer.cpp
@@ -0,0 +1,154 @@
+//===- llvm/IR/RemarkStreamer.cpp - Remark Streamer -*- C++ -------------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the remark outputting as part of
+// LLVMContext.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/RemarkStreamer.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Remarks/RemarkFormat.h"
+
+using namespace llvm;
+
+RemarkStreamer::RemarkStreamer(StringRef Filename,
+                               std::unique_ptr<remarks::Serializer> Serializer)
+    : Filename(Filename), PassFilter(), Serializer(std::move(Serializer)) {
+  assert(!Filename.empty() && "This needs to be a real filename.");
+}
+
+Error RemarkStreamer::setFilter(StringRef Filter) {
+  Regex R = Regex(Filter);
+  std::string RegexError;
+  if (!R.isValid(RegexError))
+    return createStringError(std::make_error_code(std::errc::invalid_argument),
+                             RegexError.data());
+  PassFilter = std::move(R);
+  return Error::success();
+}
+
+/// DiagnosticKind -> remarks::Type
+static remarks::Type toRemarkType(enum DiagnosticKind Kind) {
+  switch (Kind) {
+  default:
+    return remarks::Type::Unknown;
+  case DK_OptimizationRemark:
+  case DK_MachineOptimizationRemark:
+    return remarks::Type::Passed;
+  case DK_OptimizationRemarkMissed:
+  case DK_MachineOptimizationRemarkMissed:
+    return remarks::Type::Missed;
+  case DK_OptimizationRemarkAnalysis:
+  case DK_MachineOptimizationRemarkAnalysis:
+    return remarks::Type::Analysis;
+  case DK_OptimizationRemarkAnalysisFPCommute:
+    return remarks::Type::AnalysisFPCommute;
+  case DK_OptimizationRemarkAnalysisAliasing:
+    return remarks::Type::AnalysisAliasing;
+  case DK_OptimizationFailure:
+    return remarks::Type::Failure;
+  }
+}
+
+/// DiagnosticLocation -> remarks::RemarkLocation.
+static Optional<remarks::RemarkLocation>
+toRemarkLocation(const DiagnosticLocation &DL) {
+  if (!DL.isValid())
+    return None;
+  StringRef File = DL.getRelativePath();
+  unsigned Line = DL.getLine();
+  unsigned Col = DL.getColumn();
+  return remarks::RemarkLocation{File, Line, Col};
+}
+
+/// LLVM Diagnostic -> Remark
+remarks::Remark
+RemarkStreamer::toRemark(const DiagnosticInfoOptimizationBase &Diag) {
+  remarks::Remark R; // The result.
+  R.RemarkType = toRemarkType(static_cast<DiagnosticKind>(Diag.getKind()));
+  R.PassName = Diag.getPassName();
+  R.RemarkName = Diag.getRemarkName();
+  R.FunctionName =
+      GlobalValue::dropLLVMManglingEscape(Diag.getFunction().getName());
+  R.Loc = toRemarkLocation(Diag.getLocation());
+  R.Hotness = Diag.getHotness();
+
+  for (const DiagnosticInfoOptimizationBase::Argument &Arg : Diag.getArgs()) {
+    R.Args.emplace_back();
+    R.Args.back().Key = Arg.Key;
+    R.Args.back().Val = Arg.Val;
+    R.Args.back().Loc = toRemarkLocation(Arg.Loc);
+  }
+
+  return R;
+}
+
+void RemarkStreamer::emit(const DiagnosticInfoOptimizationBase &Diag) {
+  if (Optional<Regex> &Filter = PassFilter)
+    if (!Filter->match(Diag.getPassName()))
+      return;
+
+  // First, convert the diagnostic to a remark.
+  remarks::Remark R = toRemark(Diag);
+  // Then, emit the remark through the serializer.
+  Serializer->emit(R);
+}
+
+char RemarkSetupFileError::ID = 0;
+char RemarkSetupPatternError::ID = 0;
+char RemarkSetupFormatError::ID = 0;
+
+static std::unique_ptr<remarks::Serializer>
+formatToSerializer(remarks::Format RemarksFormat, raw_ostream &OS) {
+  switch (RemarksFormat) {
+  default:
+    llvm_unreachable("Unknown remark serializer format.");
+    return nullptr;
+  case remarks::Format::YAML:
+    return llvm::make_unique<remarks::YAMLSerializer>(OS);
+  };
+}
+
+Expected<std::unique_ptr<ToolOutputFile>>
+llvm::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename,
+                               StringRef RemarksPasses, StringRef RemarksFormat,
+                               bool RemarksWithHotness,
+                               unsigned RemarksHotnessThreshold) {
+  if (RemarksWithHotness)
+    Context.setDiagnosticsHotnessRequested(true);
+
+  if (RemarksHotnessThreshold)
+    Context.setDiagnosticsHotnessThreshold(RemarksHotnessThreshold);
+
+  if (RemarksFilename.empty())
+    return nullptr;
+
+  std::error_code EC;
+  auto RemarksFile =
+      llvm::make_unique<ToolOutputFile>(RemarksFilename, EC, sys::fs::F_None);
+  // We don't use llvm::FileError here because some diagnostics want the file
+  // name separately.
+  if (EC)
+    return make_error<RemarkSetupFileError>(errorCodeToError(EC));
+
+  Expected<remarks::Format> Format = remarks::parseFormat(RemarksFormat);
+  if (Error E = Format.takeError())
+    return make_error<RemarkSetupFormatError>(std::move(E));
+
+  Context.setRemarkStreamer(llvm::make_unique<RemarkStreamer>(
+      RemarksFilename, formatToSerializer(*Format, RemarksFile->os())));
+
+  if (!RemarksPasses.empty())
+    if (Error E = Context.getRemarkStreamer()->setFilter(RemarksPasses))
+      return make_error<RemarkSetupPatternError>(std::move(E));
+
+  return std::move(RemarksFile);
+}
diff --git a/lib/IR/SafepointIRVerifier.cpp b/lib/IR/SafepointIRVerifier.cpp
index 12ada1320225..7f3dea5e6a6d 100644
--- a/lib/IR/SafepointIRVerifier.cpp
+++ b/lib/IR/SafepointIRVerifier.cpp
@@ -1,9 +1,8 @@
 //===-- SafepointIRVerifier.cpp - Verify gc.statepoint invariants ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -198,6 +197,17 @@ protected:
 static void Verify(const Function &F, const DominatorTree &DT,
                    const CFGDeadness &CD);
 
+namespace llvm {
+PreservedAnalyses SafepointIRVerifierPass::run(Function &F,
+                                               FunctionAnalysisManager &AM) {
+  const auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+  CFGDeadness CD;
+  CD.processFunction(F, DT);
+  Verify(F, DT, CD);
+  return PreservedAnalyses::all();
+}
+}
+
 namespace {
 
 struct SafepointIRVerifier : public FunctionPass {
diff --git a/lib/IR/Statepoint.cpp b/lib/IR/Statepoint.cpp
index 18efee2177c3..fce89b42e9bf 100644
--- a/lib/IR/Statepoint.cpp
+++ b/lib/IR/Statepoint.cpp
@@ -1,9 +1,8 @@
 //===-- IR/Statepoint.cpp -- gc.statepoint utilities ---  -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,21 +17,15 @@
 
 using namespace llvm;
 
-static const Function *getCalledFunction(ImmutableCallSite CS) {
-  if (!CS.getInstruction())
-    return nullptr;
-  return CS.getCalledFunction();
-}
-
-bool llvm::isStatepoint(ImmutableCallSite CS) {
-  if (auto *F = getCalledFunction(CS))
+bool llvm::isStatepoint(const CallBase *Call) {
+  if (auto *F = Call->getCalledFunction())
     return F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint;
   return false;
 }
 
 bool llvm::isStatepoint(const Value *V) {
-  if (auto CS = ImmutableCallSite(V))
-    return isStatepoint(CS);
+  if (auto *Call = dyn_cast<CallBase>(V))
+    return isStatepoint(Call);
   return false;
 }
 
@@ -40,23 +33,21 @@ bool llvm::isStatepoint(const Value &V) {
   return isStatepoint(&V);
 }
 
-bool llvm::isGCRelocate(ImmutableCallSite CS) {
-  return CS.getInstruction() && isa<GCRelocateInst>(CS.getInstruction());
+bool llvm::isGCRelocate(const CallBase *Call) {
+  return isa<GCRelocateInst>(Call);
 }
 
 bool llvm::isGCRelocate(const Value *V) {
-  if (auto CS = ImmutableCallSite(V))
-    return isGCRelocate(CS);
+  if (auto *Call = dyn_cast<CallBase>(V))
+    return isGCRelocate(Call);
   return false;
 }
 
-bool llvm::isGCResult(ImmutableCallSite CS) {
-  return CS.getInstruction() && isa<GCResultInst>(CS.getInstruction());
-}
+bool llvm::isGCResult(const CallBase *Call) { return isa<GCResultInst>(Call); }
 
 bool llvm::isGCResult(const Value *V) {
-  if (auto CS = ImmutableCallSite(V))
-    return isGCResult(CS);
+  if (auto *Call = dyn_cast<CallBase>(V))
+    return isGCResult(Call);
   return false;
 }
 
diff --git a/lib/IR/SymbolTableListTraitsImpl.h b/lib/IR/SymbolTableListTraitsImpl.h
index d4ad1eba33c6..f399c823d6fb 100644
--- a/lib/IR/SymbolTableListTraitsImpl.h
+++ b/lib/IR/SymbolTableListTraitsImpl.h
@@ -1,9 +1,8 @@
 //===-- llvm/SymbolTableListTraitsImpl.h - Implementation ------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -84,7 +83,8 @@ void SymbolTableListTraits<ValueSubClass>::transferNodesFromList(
     SymbolTableListTraits &L2, iterator first, iterator last) {
   // We only have to do work here if transferring instructions between BBs
   ItemParentClass *NewIP = getListOwner(), *OldIP = L2.getListOwner();
-  assert(NewIP != OldIP && "Expected different list owners");
+  if (NewIP == OldIP)
+    return;
 
   // We only have to update symbol table entries if we are transferring the
   // instructions to a different symtab object...
diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp
index 0fb079c5ab73..8ece7f223dd2 100644
--- a/lib/IR/Type.cpp
+++ b/lib/IR/Type.cpp
@@ -1,9 +1,8 @@
 //===- Type.cpp - Implement the Type class --------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -256,7 +255,7 @@ IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
   IntegerType *&Entry = C.pImpl->IntegerTypes[NumBits];
 
   if (!Entry)
-    Entry = new (C.pImpl->TypeAllocator) IntegerType(C, NumBits);
+    Entry = new (C.pImpl->Alloc) IntegerType(C, NumBits);
 
   return Entry;
 }
@@ -308,7 +307,7 @@ FunctionType *FunctionType::get(Type *ReturnType,
   if (Insertion.second) {
     // The function type was not found. Allocate one and update FunctionTypes
     // in-place.
-    FT = (FunctionType *)pImpl->TypeAllocator.Allocate(
+    FT = (FunctionType *)pImpl->Alloc.Allocate(
         sizeof(FunctionType) + sizeof(Type *) * (Params.size() + 1),
         alignof(FunctionType));
     new (FT) FunctionType(ReturnType, Params, isVarArg);
@@ -354,7 +353,7 @@ StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes,
   if (Insertion.second) {
     // The struct type was not found. Allocate one and update AnonStructTypes
     // in-place.
-    ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+    ST = new (Context.pImpl->Alloc) StructType(Context);
     ST->setSubclassData(SCDB_IsLiteral);  // Literal struct.
     ST->setBody(ETypes, isPacked);
     *Insertion.first = ST;
@@ -380,7 +379,7 @@ void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
     return;
   }
 
-  ContainedTys = Elements.copy(getContext().pImpl->TypeAllocator).data();
+  ContainedTys = Elements.copy(getContext().pImpl->Alloc).data();
 }
 
 void StructType::setName(StringRef Name) {
@@ -435,7 +434,7 @@ void StructType::setName(StringRef Name) {
 // StructType Helper functions.
 
 StructType *StructType::create(LLVMContext &Context, StringRef Name) {
-  StructType *ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+  StructType *ST = new (Context.pImpl->Alloc) StructType(Context);
   if (!Name.empty())
     ST->setName(Name);
   return ST;
@@ -505,6 +504,8 @@ StringRef StructType::getName() const {
 }
 
 bool StructType::isValidElementType(Type *ElemTy) {
+  if (auto *VTy = dyn_cast<VectorType>(ElemTy))
+    return !VTy->isScalable();
   return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
          !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() &&
          !ElemTy->isTokenTy();
@@ -586,11 +587,13 @@ ArrayType *ArrayType::get(Type *ElementType, uint64_t NumElements) {
     pImpl->ArrayTypes[std::make_pair(ElementType, NumElements)];
 
   if (!Entry)
-    Entry = new (pImpl->TypeAllocator) ArrayType(ElementType, NumElements);
+    Entry = new (pImpl->Alloc) ArrayType(ElementType, NumElements);
   return Entry;
 }
 
 bool ArrayType::isValidElementType(Type *ElemTy) {
+  if (auto *VTy = dyn_cast<VectorType>(ElemTy))
+    return !VTy->isScalable();
   return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
          !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() &&
          !ElemTy->isTokenTy();
@@ -600,21 +603,20 @@ bool ArrayType::isValidElementType(Type *ElemTy) {
 //                          VectorType Implementation
 //===----------------------------------------------------------------------===//
 
-VectorType::VectorType(Type *ElType, unsigned NumEl)
-  : SequentialType(VectorTyID, ElType, NumEl) {}
+VectorType::VectorType(Type *ElType, ElementCount EC)
+  : SequentialType(VectorTyID, ElType, EC.Min), Scalable(EC.Scalable) {}
 
-VectorType *VectorType::get(Type *ElementType, unsigned NumElements) {
-  assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0");
+VectorType *VectorType::get(Type *ElementType, ElementCount EC) {
+  assert(EC.Min > 0 && "#Elements of a VectorType must be greater than 0");
   assert(isValidElementType(ElementType) && "Element type of a VectorType must "
                                             "be an integer, floating point, or "
                                             "pointer type.");
 
   LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
   VectorType *&Entry = ElementType->getContext().pImpl
-    ->VectorTypes[std::make_pair(ElementType, NumElements)];
-
+                                 ->VectorTypes[std::make_pair(ElementType, EC)];
   if (!Entry)
-    Entry = new (pImpl->TypeAllocator) VectorType(ElementType, NumElements);
+    Entry = new (pImpl->Alloc) VectorType(ElementType, EC);
   return Entry;
 }
 
@@ -638,7 +640,7 @@ PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) {
      : CImpl->ASPointerTypes[std::make_pair(EltTy, AddressSpace)];
 
   if (!Entry)
-    Entry = new (CImpl->TypeAllocator) PointerType(EltTy, AddressSpace);
+    Entry = new (CImpl->Alloc) PointerType(EltTy, AddressSpace);
   return Entry;
 }
 
diff --git a/lib/IR/TypeFinder.cpp b/lib/IR/TypeFinder.cpp
index e9af78c71bfd..2e2c194860cd 100644
--- a/lib/IR/TypeFinder.cpp
+++ b/lib/IR/TypeFinder.cpp
@@ -1,9 +1,8 @@
 //===- TypeFinder.cpp - Implement the TypeFinder class --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/IR/Use.cpp b/lib/IR/Use.cpp
index cae845d99fe5..18c61757ee84 100644
--- a/lib/IR/Use.cpp
+++ b/lib/IR/Use.cpp
@@ -1,9 +1,8 @@
 //===-- Use.cpp - Implement the Use class ---------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/IR/User.cpp b/lib/IR/User.cpp
index 041593f20b57..33a3686c94a1 100644
--- a/lib/IR/User.cpp
+++ b/lib/IR/User.cpp
@@ -1,9 +1,8 @@
 //===-- User.cpp - Implement the User class -------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index 80b993c89f7f..b7f77dc3043e 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -1,9 +1,8 @@
 //===-- Value.cpp - Implement the Value class -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -58,7 +57,8 @@ Value::Value(Type *ty, unsigned scid)
   // FIXME: Why isn't this in the subclass gunk??
   // Note, we cannot call isa<CallInst> before the CallInst has been
   // constructed.
-  if (SubclassID == Instruction::Call || SubclassID == Instruction::Invoke)
+  if (SubclassID == Instruction::Call || SubclassID == Instruction::Invoke ||
+      SubclassID == Instruction::CallBr)
     assert((VTy->isFirstClassType() || VTy->isVoidTy() || VTy->isStructTy()) &&
            "invalid CallInst type!");
   else if (SubclassID != BasicBlockVal &&
@@ -460,6 +460,7 @@ namespace {
 enum PointerStripKind {
   PSK_ZeroIndices,
   PSK_ZeroIndicesAndAliases,
+  PSK_ZeroIndicesAndAliasesSameRepresentation,
   PSK_ZeroIndicesAndAliasesAndInvariantGroups,
   PSK_InBoundsConstantIndices,
   PSK_InBounds
@@ -479,6 +480,7 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) {
     if (auto *GEP = dyn_cast<GEPOperator>(V)) {
       switch (StripKind) {
       case PSK_ZeroIndicesAndAliases:
+      case PSK_ZeroIndicesAndAliasesSameRepresentation:
       case PSK_ZeroIndicesAndAliasesAndInvariantGroups:
       case PSK_ZeroIndices:
         if (!GEP->hasAllZeroIndices())
@@ -494,8 +496,12 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) {
         break;
       }
       V = GEP->getPointerOperand();
-    } else if (Operator::getOpcode(V) == Instruction::BitCast ||
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+      V = cast<Operator>(V)->getOperand(0);
+    } else if (StripKind != PSK_ZeroIndicesAndAliasesSameRepresentation &&
                Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
+      // TODO: If we know an address space cast will not change the
+      //       representation we could look through it here as well.
       V = cast<Operator>(V)->getOperand(0);
     } else if (auto *GA = dyn_cast<GlobalAlias>(V)) {
       if (StripKind == PSK_ZeroIndices || GA->isInterposable())
@@ -530,6 +536,11 @@ const Value *Value::stripPointerCasts() const {
   return stripPointerCastsAndOffsets<PSK_ZeroIndicesAndAliases>(this);
 }
 
+const Value *Value::stripPointerCastsSameRepresentation() const {
+  return stripPointerCastsAndOffsets<
+      PSK_ZeroIndicesAndAliasesSameRepresentation>(this);
+}
+
 const Value *Value::stripPointerCastsNoFollowAliases() const {
   return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this);
 }
@@ -544,13 +555,13 @@ const Value *Value::stripPointerCastsAndInvariantGroups() const {
 }
 
 const Value *
-Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
-                                                 APInt &Offset) const {
-  if (!getType()->isPointerTy())
+Value::stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset,
+                                         bool AllowNonInbounds) const {
+  if (!getType()->isPtrOrPtrVectorTy())
     return this;
 
-  assert(Offset.getBitWidth() == DL.getIndexSizeInBits(cast<PointerType>(
-                                     getType())->getAddressSpace()) &&
+  unsigned BitWidth = Offset.getBitWidth();
+  assert(BitWidth == DL.getIndexTypeSizeInBits(getType()) &&
          "The offset bit width does not match the DL specification.");
 
   // Even though we don't look through PHI nodes, we could be called on an
@@ -560,27 +571,39 @@ Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
   const Value *V = this;
   do {
     if (auto *GEP = dyn_cast<GEPOperator>(V)) {
-      if (!GEP->isInBounds())
+      // If in-bounds was requested, we do not strip non-in-bounds GEPs.
+      if (!AllowNonInbounds && !GEP->isInBounds())
         return V;
-      APInt GEPOffset(Offset);
+
+      // If one of the values we have visited is an addrspacecast, then
+      // the pointer type of this GEP may be different from the type
+      // of the Ptr parameter which was passed to this function.  This
+      // means when we construct GEPOffset, we need to use the size
+      // of GEP's pointer type rather than the size of the original
+      // pointer type.
+      APInt GEPOffset(DL.getIndexTypeSizeInBits(V->getType()), 0);
       if (!GEP->accumulateConstantOffset(DL, GEPOffset))
         return V;
-      Offset = GEPOffset;
+
+      // Stop traversal if the pointer offset wouldn't fit in the bit-width
+      // provided by the Offset argument. This can happen due to AddrSpaceCast
+      // stripping.
+      if (GEPOffset.getMinSignedBits() > BitWidth)
+        return V;
+
+      Offset += GEPOffset.sextOrTrunc(BitWidth);
       V = GEP->getPointerOperand();
-    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+    } else if (Operator::getOpcode(V) == Instruction::BitCast ||
+               Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
       V = cast<Operator>(V)->getOperand(0);
     } else if (auto *GA = dyn_cast<GlobalAlias>(V)) {
-      V = GA->getAliasee();
-    } else {
-      if (const auto *Call = dyn_cast<CallBase>(V))
-        if (const Value *RV = Call->getReturnedArgOperand()) {
+      if (!GA->isInterposable())
+        V = GA->getAliasee();
+    } else if (const auto *Call = dyn_cast<CallBase>(V)) {
+        if (const Value *RV = Call->getReturnedArgOperand())
           V = RV;
-          continue;
-        }
-
-      return V;
     }
-    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+    assert(V->getType()->isPtrOrPtrVectorTy() && "Unexpected operand type!");
   } while (Visited.insert(V).second);
 
   return V;
@@ -648,10 +671,14 @@ unsigned Value::getPointerAlignment(const DataLayout &DL) const {
 
   unsigned Align = 0;
   if (auto *GO = dyn_cast<GlobalObject>(this)) {
-    // Don't make any assumptions about function pointer alignment. Some
-    // targets use the LSBs to store additional information.
-    if (isa<Function>(GO))
-      return 0;
+    if (isa<Function>(GO)) {
+      switch (DL.getFunctionPtrAlignType()) {
+      case DataLayout::FunctionPtrAlignType::Independent:
+        return DL.getFunctionPtrAlign();
+      case DataLayout::FunctionPtrAlignType::MultipleOfFunctionAlign:
+        return std::max(DL.getFunctionPtrAlign(), GO->getAlignment());
+      }
+    }
     Align = GO->getAlignment();
     if (Align == 0) {
       if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
@@ -931,7 +958,7 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
                << Old->getName() << " to " << *New->getType() << " %"
                << New->getName() << "\n";
         llvm_unreachable(
-            "A weak tracking value handle still pointed to the  old value!\n");
+            "A weak tracking value handle still pointed to the old value!\n");
       default:
         break;
       }
diff --git a/lib/IR/ValueSymbolTable.cpp b/lib/IR/ValueSymbolTable.cpp
index f4bea5604043..417ec045071d 100644
--- a/lib/IR/ValueSymbolTable.cpp
+++ b/lib/IR/ValueSymbolTable.cpp
@@ -1,9 +1,8 @@
 //===- ValueSymbolTable.cpp - Implement the ValueSymbolTable class --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 30e77b92009f..9346c8bda75d 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -1,9 +1,8 @@
 //===-- Verifier.cpp - Implement the Module Verifier -----------------------==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -467,6 +466,7 @@ private:
   void visitReturnInst(ReturnInst &RI);
   void visitSwitchInst(SwitchInst &SI);
   void visitIndirectBrInst(IndirectBrInst &BI);
+  void visitCallBrInst(CallBrInst &CBI);
   void visitSelectInst(SelectInst &SI);
   void visitUserOp1(Instruction &I);
   void visitUserOp2(Instruction &I) { visitUserOp1(I); }
@@ -500,7 +500,7 @@ private:
                             const Value *V);
   void verifyParameterAttrs(AttributeSet Attrs, Type *Ty, const Value *V);
   void verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
-                           const Value *V);
+                           const Value *V, bool IsIntrinsic);
   void verifyFunctionMetadata(ArrayRef<std::pair<unsigned, MDNode *>> MDs);
 
   void visitConstantExprsRecursively(const Constant *EntryC);
@@ -641,18 +641,18 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
       PointerType *FuncPtrTy =
           FunctionType::get(Type::getVoidTy(Context), false)->
           getPointerTo(DL.getProgramAddressSpace());
-      // FIXME: Reject the 2-field form in LLVM 4.0.
       Assert(STy &&
                  (STy->getNumElements() == 2 || STy->getNumElements() == 3) &&
                  STy->getTypeAtIndex(0u)->isIntegerTy(32) &&
                  STy->getTypeAtIndex(1) == FuncPtrTy,
              "wrong type for intrinsic global variable", &GV);
-      if (STy->getNumElements() == 3) {
-        Type *ETy = STy->getTypeAtIndex(2);
-        Assert(ETy->isPointerTy() &&
-                   cast<PointerType>(ETy)->getElementType()->isIntegerTy(8),
-               "wrong type for intrinsic global variable", &GV);
-      }
+      Assert(STy->getNumElements() == 3,
+             "the third field of the element type is mandatory, "
+             "specify i8* null to migrate from the obsoleted 2-field form");
+      Type *ETy = STy->getTypeAtIndex(2);
+      Assert(ETy->isPointerTy() &&
+                 cast<PointerType>(ETy)->getElementType()->isIntegerTy(8),
+             "wrong type for intrinsic global variable", &GV);
     }
   }
 
@@ -691,6 +691,13 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
                       "DIGlobalVariableExpression");
   }
 
+  // Scalable vectors cannot be global variables, since we don't know
+  // the runtime size. If the global is a struct or an array containing
+  // scalable vectors, that will be caught by the isValidElementType methods
+  // in StructType or ArrayType instead.
+  if (auto *VTy = dyn_cast<VectorType>(GV.getValueType()))
+    Assert(!VTy->isScalable(), "Globals cannot contain scalable vectors", &GV);
+
   if (!GV.hasInitializer()) {
     visitGlobalValue(GV);
     return;
@@ -926,7 +933,8 @@ void Verifier::visitDIDerivedType(const DIDerivedType &N) {
 
   if (N.getDWARFAddressSpace()) {
     AssertDI(N.getTag() == dwarf::DW_TAG_pointer_type ||
-                 N.getTag() == dwarf::DW_TAG_reference_type,
+                 N.getTag() == dwarf::DW_TAG_reference_type ||
+                 N.getTag() == dwarf::DW_TAG_rvalue_reference_type,
              "DWARF address space only applies to pointer or reference types",
              &N);
   }
@@ -1156,6 +1164,14 @@ void Verifier::visitDILexicalBlockFile(const DILexicalBlockFile &N) {
   visitDILexicalBlockBase(N);
 }
 
+void Verifier::visitDICommonBlock(const DICommonBlock &N) {
+  AssertDI(N.getTag() == dwarf::DW_TAG_common_block, "invalid tag", &N);
+  if (auto *S = N.getRawScope())
+    AssertDI(isa<DIScope>(S), "invalid scope ref", &N, S);
+  if (auto *S = N.getRawDecl())
+    AssertDI(isa<DIGlobalVariable>(S), "invalid declaration", &N, S);
+}
+
 void Verifier::visitDINamespace(const DINamespace &N) {
   AssertDI(N.getTag() == dwarf::DW_TAG_namespace, "invalid tag", &N);
   if (auto *S = N.getRawScope())
@@ -1224,7 +1240,6 @@ void Verifier::visitDIGlobalVariable(const DIGlobalVariable &N) {
   visitDIVariable(N);
 
   AssertDI(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N);
-  AssertDI(!N.getName().empty(), "missing global variable name", &N);
   AssertDI(isType(N.getRawType()), "invalid type ref", &N, N.getRawType());
   AssertDI(N.getType(), "missing global variable type", &N);
   if (auto *Member = N.getRawStaticDataMemberDeclaration()) {
@@ -1478,9 +1493,12 @@ void Verifier::visitModuleFlagCGProfileEntry(const MDOperand &MDO) {
 static bool isFuncOnlyAttr(Attribute::AttrKind Kind) {
   switch (Kind) {
   case Attribute::NoReturn:
+  case Attribute::NoSync:
+  case Attribute::WillReturn:
   case Attribute::NoCfCheck:
   case Attribute::NoUnwind:
   case Attribute::NoInline:
+  case Attribute::NoFree:
   case Attribute::AlwaysInline:
   case Attribute::OptimizeForSize:
   case Attribute::StackProtect:
@@ -1498,6 +1516,7 @@ static bool isFuncOnlyAttr(Attribute::AttrKind Kind) {
   case Attribute::ReturnsTwice:
   case Attribute::SanitizeAddress:
   case Attribute::SanitizeHWAddress:
+  case Attribute::SanitizeMemTag:
   case Attribute::SanitizeThread:
   case Attribute::SanitizeMemory:
   case Attribute::MinSize:
@@ -1562,6 +1581,11 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
 
   verifyAttributeTypes(Attrs, /*IsFunction=*/false, V);
 
+  if (Attrs.hasAttribute(Attribute::ImmArg)) {
+    Assert(Attrs.getNumAttributes() == 1,
+           "Attribute 'immarg' is incompatible with other attributes", V);
+  }
+
   // Check for mutually incompatible attributes.  Only inreg is compatible with
   // sret.
   unsigned AttrCount = 0;
@@ -1616,6 +1640,11 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
          "'noinline and alwaysinline' are incompatible!",
          V);
 
+  if (Attrs.hasAttribute(Attribute::ByVal) && Attrs.getByValType()) {
+    Assert(Attrs.getByValType() == cast<PointerType>(Ty)->getElementType(),
+           "Attribute 'byval' type does not match parameter!", V);
+  }
+
   AttrBuilder IncompatibleAttrs = AttributeFuncs::typeIncompatible(Ty);
   Assert(!AttrBuilder(Attrs).overlaps(IncompatibleAttrs),
          "Wrong types for attribute: " +
@@ -1649,7 +1678,7 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
 // Check parameter attributes against a function type.
 // The value V is printed in error messages.
 void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
-                                   const Value *V) {
+                                   const Value *V, bool IsIntrinsic) {
   if (Attrs.isEmpty())
     return;
 
@@ -1686,6 +1715,11 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
     Type *Ty = FT->getParamType(i);
     AttributeSet ArgAttrs = Attrs.getParamAttributes(i);
 
+    if (!IsIntrinsic) {
+      Assert(!ArgAttrs.hasAttribute(Attribute::ImmArg),
+             "immarg attribute only applies to intrinsics",V);
+    }
+
     verifyParameterAttrs(ArgAttrs, Ty, V);
 
     if (ArgAttrs.hasAttribute(Attribute::Nest)) {
@@ -1904,16 +1938,8 @@ void Verifier::verifyStatepoint(const CallBase &Call) {
          "reordering restrictions required by safepoint semantics",
          Call);
 
-  const Value *IDV = Call.getArgOperand(0);
-  Assert(isa<ConstantInt>(IDV), "gc.statepoint ID must be a constant integer",
-         Call);
-
-  const Value *NumPatchBytesV = Call.getArgOperand(1);
-  Assert(isa<ConstantInt>(NumPatchBytesV),
-         "gc.statepoint number of patchable bytes must be a constant integer",
-         Call);
   const int64_t NumPatchBytes =
-      cast<ConstantInt>(NumPatchBytesV)->getSExtValue();
+      cast<ConstantInt>(Call.getArgOperand(1))->getSExtValue();
   assert(isInt<32>(NumPatchBytes) && "NumPatchBytesV is an i32!");
   Assert(NumPatchBytes >= 0,
          "gc.statepoint number of patchable bytes must be "
@@ -1926,12 +1952,7 @@ void Verifier::verifyStatepoint(const CallBase &Call) {
          "gc.statepoint callee must be of function pointer type", Call, Target);
   FunctionType *TargetFuncType = cast<FunctionType>(PT->getElementType());
 
-  const Value *NumCallArgsV = Call.getArgOperand(3);
-  Assert(isa<ConstantInt>(NumCallArgsV),
-         "gc.statepoint number of arguments to underlying call "
-         "must be constant integer",
-         Call);
-  const int NumCallArgs = cast<ConstantInt>(NumCallArgsV)->getZExtValue();
+  const int NumCallArgs = cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue();
   Assert(NumCallArgs >= 0,
          "gc.statepoint number of arguments to underlying call "
          "must be positive",
@@ -1950,10 +1971,8 @@ void Verifier::verifyStatepoint(const CallBase &Call) {
     Assert(NumCallArgs == NumParams,
            "gc.statepoint mismatch in number of call args", Call);
 
-  const Value *FlagsV = Call.getArgOperand(4);
-  Assert(isa<ConstantInt>(FlagsV),
-         "gc.statepoint flags must be constant integer", Call);
-  const uint64_t Flags = cast<ConstantInt>(FlagsV)->getZExtValue();
+  const uint64_t Flags
+    = cast<ConstantInt>(Call.getArgOperand(4))->getZExtValue();
   Assert((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0,
          "unknown flag used in gc.statepoint flags argument", Call);
 
@@ -2043,7 +2062,7 @@ void Verifier::verifyFrameRecoverIndices() {
     unsigned MaxRecoveredIndex = Counts.second.second;
     Assert(MaxRecoveredIndex <= EscapedObjectCount,
            "all indices passed to llvm.localrecover must be less than the "
-           "number of arguments passed ot llvm.localescape in the parent "
+           "number of arguments passed to llvm.localescape in the parent "
            "function",
            F);
   }
@@ -2130,8 +2149,11 @@ void Verifier::visitFunction(const Function &F) {
   Assert(verifyAttributeCount(Attrs, FT->getNumParams()),
          "Attribute after last parameter!", &F);
 
+  bool isLLVMdotName = F.getName().size() >= 5 &&
+                       F.getName().substr(0, 5) == "llvm.";
+
   // Check function attributes.
-  verifyFunctionAttrs(FT, Attrs, &F);
+  verifyFunctionAttrs(FT, Attrs, &F, isLLVMdotName);
 
   // On function declarations/definitions, we do not support the builtin
   // attribute. We do not check this in VerifyFunctionAttrs since that is
@@ -2170,9 +2192,6 @@ void Verifier::visitFunction(const Function &F) {
     break;
   }
 
-  bool isLLVMdotName = F.getName().size() >= 5 &&
-                       F.getName().substr(0, 5) == "llvm.";
-
   // Check that the argument values match the function type for this function...
   unsigned i = 0;
   for (const Argument &Arg : F.args()) {
@@ -2220,8 +2239,11 @@ void Verifier::visitFunction(const Function &F) {
            MDs.empty() ? nullptr : MDs.front().second);
   } else if (F.isDeclaration()) {
     for (const auto &I : MDs) {
-      AssertDI(I.first != LLVMContext::MD_dbg,
-               "function declaration may not have a !dbg attachment", &F);
+      // This is used for call site debug information.
+      AssertDI(I.first != LLVMContext::MD_dbg ||
+                   !cast<DISubprogram>(I.second)->isDistinct(),
+               "function declaration may only have a unique !dbg attachment",
+               &F);
       Assert(I.first != LLVMContext::MD_prof,
              "function declaration may not have a !prof attachment", &F);
 
@@ -2299,36 +2321,44 @@ void Verifier::visitFunction(const Function &F) {
   // FIXME: Check this incrementally while visiting !dbg attachments.
   // FIXME: Only check when N is the canonical subprogram for F.
   SmallPtrSet<const MDNode *, 32> Seen;
-  for (auto &BB : F)
-    for (auto &I : BB) {
-      // Be careful about using DILocation here since we might be dealing with
-      // broken code (this is the Verifier after all).
-      DILocation *DL =
-          dyn_cast_or_null<DILocation>(I.getDebugLoc().getAsMDNode());
-      if (!DL)
-        continue;
-      if (!Seen.insert(DL).second)
-        continue;
+  auto VisitDebugLoc = [&](const Instruction &I, const MDNode *Node) {
+    // Be careful about using DILocation here since we might be dealing with
+    // broken code (this is the Verifier after all).
+    const DILocation *DL = dyn_cast_or_null<DILocation>(Node);
+    if (!DL)
+      return;
+    if (!Seen.insert(DL).second)
+      return;
 
-      Metadata *Parent = DL->getRawScope();
-      AssertDI(Parent && isa<DILocalScope>(Parent),
-               "DILocation's scope must be a DILocalScope", N, &F, &I, DL,
-               Parent);
-      DILocalScope *Scope = DL->getInlinedAtScope();
-      if (Scope && !Seen.insert(Scope).second)
-        continue;
+    Metadata *Parent = DL->getRawScope();
+    AssertDI(Parent && isa<DILocalScope>(Parent),
+             "DILocation's scope must be a DILocalScope", N, &F, &I, DL,
+             Parent);
+    DILocalScope *Scope = DL->getInlinedAtScope();
+    if (Scope && !Seen.insert(Scope).second)
+      return;
 
-      DISubprogram *SP = Scope ? Scope->getSubprogram() : nullptr;
+    DISubprogram *SP = Scope ? Scope->getSubprogram() : nullptr;
 
-      // Scope and SP could be the same MDNode and we don't want to skip
-      // validation in that case
-      if (SP && ((Scope != SP) && !Seen.insert(SP).second))
-        continue;
+    // Scope and SP could be the same MDNode and we don't want to skip
+    // validation in that case
+    if (SP && ((Scope != SP) && !Seen.insert(SP).second))
+      return;
 
-      // FIXME: Once N is canonical, check "SP == &N".
-      AssertDI(SP->describes(&F),
-               "!dbg attachment points at wrong subprogram for function", N, &F,
-               &I, DL, Scope, SP);
+    // FIXME: Once N is canonical, check "SP == &N".
+    AssertDI(SP->describes(&F),
+             "!dbg attachment points at wrong subprogram for function", N, &F,
+             &I, DL, Scope, SP);
+  };
+  for (auto &BB : F)
+    for (auto &I : BB) {
+      VisitDebugLoc(I, I.getDebugLoc().getAsMDNode());
+      // The llvm.loop annotations also contain two DILocations.
+      if (auto MD = I.getMetadata(LLVMContext::MD_loop))
+        for (unsigned i = 1; i < MD->getNumOperands(); ++i)
+          VisitDebugLoc(I, dyn_cast_or_null<MDNode>(MD->getOperand(i)));
+      if (BrokenDebugInfo)
+        return;
     }
 }
 
@@ -2451,6 +2481,26 @@ void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
   visitTerminator(BI);
 }
 
+void Verifier::visitCallBrInst(CallBrInst &CBI) {
+  Assert(CBI.isInlineAsm(), "Callbr is currently only used for asm-goto!",
+         &CBI);
+  Assert(CBI.getType()->isVoidTy(), "Callbr return value is not supported!",
+         &CBI);
+  for (unsigned i = 0, e = CBI.getNumSuccessors(); i != e; ++i)
+    Assert(CBI.getSuccessor(i)->getType()->isLabelTy(),
+           "Callbr successors must all have pointer type!", &CBI);
+  for (unsigned i = 0, e = CBI.getNumOperands(); i != e; ++i) {
+    Assert(i >= CBI.getNumArgOperands() || !isa<BasicBlock>(CBI.getOperand(i)),
+           "Using an unescaped label as a callbr argument!", &CBI);
+    if (isa<BasicBlock>(CBI.getOperand(i)))
+      for (unsigned j = i + 1; j != e; ++j)
+        Assert(CBI.getOperand(i) != CBI.getOperand(j),
+               "Duplicate callbr destination!", &CBI);
+  }
+
+  visitTerminator(CBI);
+}
+
 void Verifier::visitSelectInst(SelectInst &SI) {
   Assert(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1),
                                          SI.getOperand(2)),
@@ -2780,17 +2830,21 @@ void Verifier::visitCallBase(CallBase &Call) {
   Assert(verifyAttributeCount(Attrs, Call.arg_size()),
          "Attribute after last parameter!", Call);
 
+  bool IsIntrinsic = Call.getCalledFunction() &&
+                     Call.getCalledFunction()->getName().startswith("llvm.");
+
+  Function *Callee
+    = dyn_cast<Function>(Call.getCalledValue()->stripPointerCasts());
+
   if (Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::Speculatable)) {
     // Don't allow speculatable on call sites, unless the underlying function
     // declaration is also speculatable.
-    Function *Callee =
-        dyn_cast<Function>(Call.getCalledValue()->stripPointerCasts());
     Assert(Callee && Callee->isSpeculatable(),
            "speculatable attribute may not apply to call sites", Call);
   }
 
   // Verify call attributes.
-  verifyFunctionAttrs(FTy, Attrs, &Call);
+  verifyFunctionAttrs(FTy, Attrs, &Call, IsIntrinsic);
 
   // Conservatively check the inalloca argument.
   // We have a bug if we can find that there is an underlying alloca without
@@ -2805,7 +2859,7 @@ void Verifier::visitCallBase(CallBase &Call) {
   // For each argument of the callsite, if it has the swifterror argument,
   // make sure the underlying alloca/parameter it comes from has a swifterror as
   // well.
-  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
     if (Call.paramHasAttr(i, Attribute::SwiftError)) {
       Value *SwiftErrorArg = Call.getArgOperand(i);
       if (auto AI = dyn_cast<AllocaInst>(SwiftErrorArg->stripInBoundsOffsets())) {
@@ -2822,6 +2876,21 @@ void Verifier::visitCallBase(CallBase &Call) {
              Call);
     }
 
+    if (Attrs.hasParamAttribute(i, Attribute::ImmArg)) {
+      // Don't allow immarg on call sites, unless the underlying declaration
+      // also has the matching immarg.
+      Assert(Callee && Callee->hasParamAttribute(i, Attribute::ImmArg),
+             "immarg may not apply only to call sites",
+             Call.getArgOperand(i), Call);
+    }
+
+    if (Call.paramHasAttr(i, Attribute::ImmArg)) {
+      Value *ArgVal = Call.getArgOperand(i);
+      Assert(isa<ConstantInt>(ArgVal) || isa<ConstantFP>(ArgVal),
+             "immarg operand has non-immediate parameter", ArgVal, Call);
+    }
+  }
+
   if (FTy->isVarArg()) {
     // FIXME? is 'nest' even legal here?
     bool SawNest = false;
@@ -2871,8 +2940,7 @@ void Verifier::visitCallBase(CallBase &Call) {
   }
 
   // Verify that there's no metadata unless it's a direct call to an intrinsic.
-  if (!Call.getCalledFunction() ||
-      !Call.getCalledFunction()->getName().startswith("llvm.")) {
+  if (!IsIntrinsic) {
     for (Type *ParamTy : FTy->params()) {
       Assert(!ParamTy->isMetadataTy(),
              "Function has metadata parameter but isn't an intrinsic", Call);
@@ -3236,7 +3304,7 @@ void Verifier::visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty) {
   unsigned NumRanges = NumOperands / 2;
   Assert(NumRanges >= 1, "It should have at least one range!", Range);
 
-  ConstantRange LastRange(1); // Dummy initial value
+  ConstantRange LastRange(1, true); // Dummy initial value
   for (unsigned i = 0; i < NumRanges; ++i) {
     ConstantInt *Low =
         mdconst::dyn_extract<ConstantInt>(Range->getOperand(2 * i));
@@ -3431,10 +3499,22 @@ void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
   PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType());
   Assert(PTy, "First atomicrmw operand must be a pointer.", &RMWI);
   Type *ElTy = PTy->getElementType();
-  Assert(ElTy->isIntegerTy(), "atomicrmw " +
-         AtomicRMWInst::getOperationName(Op) +
-         " operand must have integer type!",
-         &RMWI, ElTy);
+  if (Op == AtomicRMWInst::Xchg) {
+    Assert(ElTy->isIntegerTy() || ElTy->isFloatingPointTy(), "atomicrmw " +
+           AtomicRMWInst::getOperationName(Op) +
+           " operand must have integer or floating point type!",
+           &RMWI, ElTy);
+  } else if (AtomicRMWInst::isFPOperation(Op)) {
+    Assert(ElTy->isFloatingPointTy(), "atomicrmw " +
+           AtomicRMWInst::getOperationName(Op) +
+           " operand must have floating point type!",
+           &RMWI, ElTy);
+  } else {
+    Assert(ElTy->isIntegerTy(), "atomicrmw " +
+           AtomicRMWInst::getOperationName(Op) +
+           " operand must have integer type!",
+           &RMWI, ElTy);
+  }
   checkAtomicMemAccessSize(ElTy, &RMWI);
   Assert(ElTy == RMWI.getOperand(1)->getType(),
          "Argument value type does not match pointer operand type!", &RMWI,
@@ -3886,7 +3966,7 @@ void Verifier::verifyDominatesUse(Instruction &I, unsigned i) {
   }
 
   // Quick check whether the def has already been encountered in the same block.
-  // PHI nodes are not checked to prevent accepting preceeding PHIs, because PHI
+  // PHI nodes are not checked to prevent accepting preceding PHIs, because PHI
   // uses are defined to happen on the incoming edge, not at the instruction.
   //
   // FIXME: If this operand is a MetadataAsValue (wrapping a LocalAsMetadata)
@@ -3981,7 +4061,8 @@ void Verifier::visitInstruction(Instruction &I) {
               F->getIntrinsicID() == Intrinsic::coro_destroy ||
               F->getIntrinsicID() == Intrinsic::experimental_patchpoint_void ||
               F->getIntrinsicID() == Intrinsic::experimental_patchpoint_i64 ||
-              F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint,
+              F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint ||
+              F->getIntrinsicID() == Intrinsic::wasm_rethrow_in_catch,
           "Cannot invoke an intrinsic other than donothing, patchpoint, "
           "statepoint, coro_resume or coro_destroy",
           &I);
@@ -4095,14 +4176,14 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
   getIntrinsicInfoTableEntries(ID, Table);
   ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
 
+  // Walk the descriptors to extract overloaded types.
   SmallVector<Type *, 4> ArgTys;
-  Assert(!Intrinsic::matchIntrinsicType(IFTy->getReturnType(),
-                                        TableRef, ArgTys),
+  Intrinsic::MatchIntrinsicTypesResult Res =
+      Intrinsic::matchIntrinsicSignature(IFTy, TableRef, ArgTys);
+  Assert(Res != Intrinsic::MatchIntrinsicTypes_NoMatchRet,
          "Intrinsic has incorrect return type!", IF);
-  for (unsigned i = 0, e = IFTy->getNumParams(); i != e; ++i)
-    Assert(!Intrinsic::matchIntrinsicType(IFTy->getParamType(i),
-                                          TableRef, ArgTys),
-           "Intrinsic has incorrect argument type!", IF);
+  Assert(Res != Intrinsic::MatchIntrinsicTypes_NoMatchArg,
+         "Intrinsic has incorrect argument type!", IF);
 
   // Verify if the intrinsic call matches the vararg property.
   if (IsVarArg)
@@ -4149,19 +4230,14 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
       "an array");
     break;
   }
-  case Intrinsic::ctlz:  // llvm.ctlz
-  case Intrinsic::cttz:  // llvm.cttz
-    Assert(isa<ConstantInt>(Call.getArgOperand(1)),
-           "is_zero_undef argument of bit counting intrinsics must be a "
-           "constant int",
-           Call);
-    break;
   case Intrinsic::experimental_constrained_fadd:
   case Intrinsic::experimental_constrained_fsub:
   case Intrinsic::experimental_constrained_fmul:
   case Intrinsic::experimental_constrained_fdiv:
   case Intrinsic::experimental_constrained_frem:
   case Intrinsic::experimental_constrained_fma:
+  case Intrinsic::experimental_constrained_fptrunc:
+  case Intrinsic::experimental_constrained_fpext:
   case Intrinsic::experimental_constrained_sqrt:
   case Intrinsic::experimental_constrained_pow:
   case Intrinsic::experimental_constrained_powi:
@@ -4211,9 +4287,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
              "alignment of arg 1 of memory intrinsic must be 0 or a power of 2",
              Call);
     }
-    Assert(isa<ConstantInt>(Call.getArgOperand(3)),
-           "isvolatile argument of memory intrinsics must be a constant int",
-           Call);
+
     break;
   }
   case Intrinsic::memcpy_element_unordered_atomic:
@@ -4222,11 +4296,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
     const auto *AMI = cast<AtomicMemIntrinsic>(&Call);
 
     ConstantInt *ElementSizeCI =
-        dyn_cast<ConstantInt>(AMI->getRawElementSizeInBytes());
-    Assert(ElementSizeCI,
-           "element size of the element-wise unordered atomic memory "
-           "intrinsic must be a constant int",
-           Call);
+        cast<ConstantInt>(AMI->getRawElementSizeInBytes());
     const APInt &ElementSizeVal = ElementSizeCI->getValue();
     Assert(ElementSizeVal.isPowerOf2(),
            "element size of the element-wise atomic memory intrinsic "
@@ -4281,28 +4351,14 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
            Call);
     break;
   case Intrinsic::prefetch:
-    Assert(isa<ConstantInt>(Call.getArgOperand(1)) &&
-               isa<ConstantInt>(Call.getArgOperand(2)) &&
-               cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2 &&
-               cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 4,
+    Assert(cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2 &&
+           cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 4,
            "invalid arguments to llvm.prefetch", Call);
     break;
   case Intrinsic::stackprotector:
     Assert(isa<AllocaInst>(Call.getArgOperand(1)->stripPointerCasts()),
            "llvm.stackprotector parameter #2 must resolve to an alloca.", Call);
     break;
-  case Intrinsic::lifetime_start:
-  case Intrinsic::lifetime_end:
-  case Intrinsic::invariant_start:
-    Assert(isa<ConstantInt>(Call.getArgOperand(0)),
-           "size argument of memory use markers must be a constant integer",
-           Call);
-    break;
-  case Intrinsic::invariant_end:
-    Assert(isa<ConstantInt>(Call.getArgOperand(1)),
-           "llvm.invariant.end parameter #2 must be a constant integer", Call);
-    break;
-
   case Intrinsic::localescape: {
     BasicBlock *BB = Call.getParent();
     Assert(BB == &BB->getParent()->front(),
@@ -4327,9 +4383,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
            "llvm.localrecover first "
            "argument must be function defined in this module",
            Call);
-    auto *IdxArg = dyn_cast<ConstantInt>(Call.getArgOperand(2));
-    Assert(IdxArg, "idx argument of llvm.localrecover must be a constant int",
-           Call);
+    auto *IdxArg = cast<ConstantInt>(Call.getArgOperand(2));
     auto &Entry = FrameEscapeInfo[Fn];
     Entry.second = unsigned(
         std::max(uint64_t(Entry.second), IdxArg->getLimitedValue(~0U) + 1));
@@ -4484,11 +4538,13 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
            Call);
 
     Value *Ptr = Call.getArgOperand(0);
-    // Value *Alignment = Call.getArgOperand(1);
+    ConstantInt *Alignment = cast<ConstantInt>(Call.getArgOperand(1));
     Value *Mask = Call.getArgOperand(2);
     Value *PassThru = Call.getArgOperand(3);
     Assert(Mask->getType()->isVectorTy(), "masked_load: mask must be vector",
            Call);
+    Assert(Alignment->getValue().isPowerOf2(),
+           "masked_load: alignment must be a power of 2", Call);
 
     // DataTy is the overloaded type
     Type *DataTy = cast<PointerType>(Ptr->getType())->getElementType();
@@ -4504,10 +4560,12 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
   case Intrinsic::masked_store: {
     Value *Val = Call.getArgOperand(0);
     Value *Ptr = Call.getArgOperand(1);
-    // Value *Alignment = Call.getArgOperand(2);
+    ConstantInt *Alignment = cast<ConstantInt>(Call.getArgOperand(2));
     Value *Mask = Call.getArgOperand(3);
     Assert(Mask->getType()->isVectorTy(), "masked_store: mask must be vector",
            Call);
+    Assert(Alignment->getValue().isPowerOf2(),
+           "masked_store: alignment must be a power of 2", Call);
 
     // DataTy is the overloaded type
     Type *DataTy = cast<PointerType>(Ptr->getType())->getElementType();
@@ -4563,22 +4621,41 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
            "of ints");
     break;
   }
-  case Intrinsic::smul_fix: {
+  case Intrinsic::smul_fix:
+  case Intrinsic::smul_fix_sat:
+  case Intrinsic::umul_fix: {
     Value *Op1 = Call.getArgOperand(0);
     Value *Op2 = Call.getArgOperand(1);
     Assert(Op1->getType()->isIntOrIntVectorTy(),
-           "first operand of smul_fix must be an int type or vector "
+           "first operand of [us]mul_fix[_sat] must be an int type or vector "
            "of ints");
     Assert(Op2->getType()->isIntOrIntVectorTy(),
-           "second operand of smul_fix must be an int type or vector "
+           "second operand of [us]mul_fix_[sat] must be an int type or vector "
            "of ints");
 
-    auto *Op3 = dyn_cast<ConstantInt>(Call.getArgOperand(2));
-    Assert(Op3, "third argument of smul_fix must be a constant integer");
+    auto *Op3 = cast<ConstantInt>(Call.getArgOperand(2));
     Assert(Op3->getType()->getBitWidth() <= 32,
-           "third argument of smul_fix must fit within 32 bits");
-    Assert(Op3->getZExtValue() < Op1->getType()->getScalarSizeInBits(),
-           "the scale of smul_fix must be less than the width of the operands");
+           "third argument of [us]mul_fix[_sat] must fit within 32 bits");
+
+    if (ID == Intrinsic::smul_fix || ID == Intrinsic::smul_fix_sat) {
+      Assert(
+          Op3->getZExtValue() < Op1->getType()->getScalarSizeInBits(),
+          "the scale of smul_fix[_sat] must be less than the width of the operands");
+    } else {
+      Assert(Op3->getZExtValue() <= Op1->getType()->getScalarSizeInBits(),
+             "the scale of umul_fix[_sat] must be less than or equal to the width of "
+             "the operands");
+    }
+    break;
+  }
+  case Intrinsic::lround:
+  case Intrinsic::llround:
+  case Intrinsic::lrint:
+  case Intrinsic::llrint: {
+    Type *ValTy = Call.getArgOperand(0)->getType();
+    Type *ResultTy = Call.getType();
+    Assert(!ValTy->isVectorTy() && !ResultTy->isVectorTy(),
+           "Intrinsic does not support vectors", &Call);
     break;
   }
   };
@@ -4605,17 +4682,109 @@ static DISubprogram *getSubprogram(Metadata *LocalScope) {
 
 void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
   unsigned NumOperands = FPI.getNumArgOperands();
-  Assert(((NumOperands == 5 && FPI.isTernaryOp()) ||
-          (NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)),
-           "invalid arguments for constrained FP intrinsic", &FPI);
-  Assert(isa<MetadataAsValue>(FPI.getArgOperand(NumOperands-1)),
-         "invalid exception behavior argument", &FPI);
-  Assert(isa<MetadataAsValue>(FPI.getArgOperand(NumOperands-2)),
-         "invalid rounding mode argument", &FPI);
-  Assert(FPI.getRoundingMode() != ConstrainedFPIntrinsic::rmInvalid,
-         "invalid rounding mode argument", &FPI);
-  Assert(FPI.getExceptionBehavior() != ConstrainedFPIntrinsic::ebInvalid,
-         "invalid exception behavior argument", &FPI);
+  bool HasExceptionMD = false;
+  bool HasRoundingMD = false;
+  switch (FPI.getIntrinsicID()) {
+  case Intrinsic::experimental_constrained_sqrt:
+  case Intrinsic::experimental_constrained_sin:
+  case Intrinsic::experimental_constrained_cos:
+  case Intrinsic::experimental_constrained_exp:
+  case Intrinsic::experimental_constrained_exp2:
+  case Intrinsic::experimental_constrained_log:
+  case Intrinsic::experimental_constrained_log10:
+  case Intrinsic::experimental_constrained_log2:
+  case Intrinsic::experimental_constrained_rint:
+  case Intrinsic::experimental_constrained_nearbyint:
+  case Intrinsic::experimental_constrained_ceil:
+  case Intrinsic::experimental_constrained_floor:
+  case Intrinsic::experimental_constrained_round:
+  case Intrinsic::experimental_constrained_trunc:
+    Assert((NumOperands == 3), "invalid arguments for constrained FP intrinsic",
+           &FPI);
+    HasExceptionMD = true;
+    HasRoundingMD = true;
+    break;
+
+  case Intrinsic::experimental_constrained_fma:
+    Assert((NumOperands == 5), "invalid arguments for constrained FP intrinsic",
+           &FPI);
+    HasExceptionMD = true;
+    HasRoundingMD = true;
+    break;
+
+  case Intrinsic::experimental_constrained_fadd:
+  case Intrinsic::experimental_constrained_fsub:
+  case Intrinsic::experimental_constrained_fmul:
+  case Intrinsic::experimental_constrained_fdiv:
+  case Intrinsic::experimental_constrained_frem:
+  case Intrinsic::experimental_constrained_pow:
+  case Intrinsic::experimental_constrained_powi:
+  case Intrinsic::experimental_constrained_maxnum:
+  case Intrinsic::experimental_constrained_minnum:
+    Assert((NumOperands == 4), "invalid arguments for constrained FP intrinsic",
+           &FPI);
+    HasExceptionMD = true;
+    HasRoundingMD = true;
+    break;
+
+  case Intrinsic::experimental_constrained_fptrunc:
+  case Intrinsic::experimental_constrained_fpext: {
+    if (FPI.getIntrinsicID() == Intrinsic::experimental_constrained_fptrunc) {
+      Assert((NumOperands == 3),
+             "invalid arguments for constrained FP intrinsic", &FPI);
+      HasRoundingMD = true;
+    } else {
+      Assert((NumOperands == 2),
+             "invalid arguments for constrained FP intrinsic", &FPI);
+    }
+    HasExceptionMD = true;
+
+    Value *Operand = FPI.getArgOperand(0);
+    Type *OperandTy = Operand->getType();
+    Value *Result = &FPI;
+    Type *ResultTy = Result->getType();
+    Assert(OperandTy->isFPOrFPVectorTy(),
+           "Intrinsic first argument must be FP or FP vector", &FPI);
+    Assert(ResultTy->isFPOrFPVectorTy(),
+           "Intrinsic result must be FP or FP vector", &FPI);
+    Assert(OperandTy->isVectorTy() == ResultTy->isVectorTy(),
+           "Intrinsic first argument and result disagree on vector use", &FPI);
+    if (OperandTy->isVectorTy()) {
+      auto *OperandVecTy = cast<VectorType>(OperandTy);
+      auto *ResultVecTy = cast<VectorType>(ResultTy);
+      Assert(OperandVecTy->getNumElements() == ResultVecTy->getNumElements(),
+             "Intrinsic first argument and result vector lengths must be equal",
+             &FPI);
+    }
+    if (FPI.getIntrinsicID() == Intrinsic::experimental_constrained_fptrunc) {
+      Assert(OperandTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits(),
+             "Intrinsic first argument's type must be larger than result type",
+             &FPI);
+    } else {
+      Assert(OperandTy->getScalarSizeInBits() < ResultTy->getScalarSizeInBits(),
+             "Intrinsic first argument's type must be smaller than result type",
+             &FPI);
+    }
+  } 
+    break;
+
+  default:
+    llvm_unreachable("Invalid constrained FP intrinsic!");
+  }
+
+  // If a non-metadata argument is passed in a metadata slot then the
+  // error will be caught earlier when the incorrect argument doesn't
+  // match the specification in the intrinsic call table. Thus, no
+  // argument type check is needed here.
+
+  if (HasExceptionMD) {
+    Assert(FPI.getExceptionBehavior().hasValue(),
+           "invalid exception behavior argument", &FPI);
+  }
+  if (HasRoundingMD) {
+    Assert(FPI.getRoundingMode().hasValue(),
+           "invalid rounding mode argument", &FPI);
+  }
 }
 
 void Verifier::visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII) {
diff --git a/lib/IRReader/IRReader.cpp b/lib/IRReader/IRReader.cpp
index 36bbf719bb61..7ca6c2fca52a 100644
--- a/lib/IRReader/IRReader.cpp
+++ b/lib/IRReader/IRReader.cpp
@@ -1,9 +1,8 @@
 //===---- IRReader.cpp - Reader for LLVM IR files -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -30,9 +29,9 @@ static const char *const TimeIRParsingGroupDescription = "LLVM IR Parsing";
 static const char *const TimeIRParsingName = "parse";
 static const char *const TimeIRParsingDescription = "Parse IR";
 
-static std::unique_ptr<Module>
-getLazyIRModule(std::unique_ptr<MemoryBuffer> Buffer, SMDiagnostic &Err,
-                LLVMContext &Context, bool ShouldLazyLoadMetadata) {
+std::unique_ptr<Module>
+llvm::getLazyIRModule(std::unique_ptr<MemoryBuffer> Buffer, SMDiagnostic &Err,
+                      LLVMContext &Context, bool ShouldLazyLoadMetadata) {
   if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
                 (const unsigned char *)Buffer->getBufferEnd())) {
     Expected<std::unique_ptr<Module>> ModuleOrErr = getOwningLazyBitcodeModule(
diff --git a/lib/LTO/Caching.cpp b/lib/LTO/Caching.cpp
index 089e77e742eb..000ab91dba7c 100644
--- a/lib/LTO/Caching.cpp
+++ b/lib/LTO/Caching.cpp
@@ -1,9 +1,8 @@
 //===-Caching.cpp - LLVM Link Time Optimizer Cache Handling ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -39,21 +38,23 @@ Expected<NativeObjectCache> lto::localCache(StringRef CacheDirectoryPath,
     SmallString<64> EntryPath;
     sys::path::append(EntryPath, CacheDirectoryPath, "llvmcache-" + Key);
     // First, see if we have a cache hit.
-    int FD;
     SmallString<64> ResultPath;
-    std::error_code EC = sys::fs::openFileForRead(
-        Twine(EntryPath), FD, sys::fs::OF_UpdateAtime, &ResultPath);
-    if (!EC) {
+    Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForRead(
+        Twine(EntryPath), sys::fs::OF_UpdateAtime, &ResultPath);
+    std::error_code EC;
+    if (FDOrErr) {
       ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
-          MemoryBuffer::getOpenFile(FD, EntryPath,
-                                    /*FileSize*/ -1,
-                                    /*RequiresNullTerminator*/ false);
-      close(FD);
+          MemoryBuffer::getOpenFile(*FDOrErr, EntryPath,
+                                    /*FileSize=*/-1,
+                                    /*RequiresNullTerminator=*/false);
+      sys::fs::closeFile(*FDOrErr);
       if (MBOrErr) {
         AddBuffer(Task, std::move(*MBOrErr));
         return AddStreamFn();
       }
       EC = MBOrErr.getError();
+    } else {
+      EC = errorToErrorCode(FDOrErr.takeError());
     }
 
     // On Windows we can fail to open a cache file with a permission denied
@@ -87,9 +88,9 @@ Expected<NativeObjectCache> lto::localCache(StringRef CacheDirectoryPath,
 
         // Open the file first to avoid racing with a cache pruner.
         ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
-            MemoryBuffer::getOpenFile(TempFile.FD, TempFile.TmpName,
-                                      /*FileSize*/ -1,
-                                      /*RequiresNullTerminator*/ false);
+            MemoryBuffer::getOpenFile(
+                sys::fs::convertFDToNativeFile(TempFile.FD), TempFile.TmpName,
+                /*FileSize=*/-1, /*RequiresNullTerminator=*/false);
         if (!MBOrErr)
           report_fatal_error(Twine("Failed to open new cache file ") +
                              TempFile.TmpName + ": " +
diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp
index 3a955060deaa..64506890956a 100644
--- a/lib/LTO/LTO.cpp
+++ b/lib/LTO/LTO.cpp
@@ -1,9 +1,8 @@
 //===-LTO.cpp - LLVM Link Time Optimizer ----------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -21,9 +20,11 @@
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/IR/RemarkStreamer.h"
 #include "llvm/LTO/LTOBackend.h"
 #include "llvm/LTO/SummaryBasedOptimizations.h"
 #include "llvm/Linker/IRMover.h"
@@ -186,12 +187,15 @@ void llvm::computeLTOCacheKey(
   auto AddUsedThings = [&](GlobalValueSummary *GS) {
     if (!GS) return;
     AddUnsigned(GS->isLive());
+    AddUnsigned(GS->canAutoHide());
     for (const ValueInfo &VI : GS->refs()) {
       AddUnsigned(VI.isDSOLocal());
       AddUsedCfiGlobal(VI.getGUID());
     }
-    if (auto *GVS = dyn_cast<GlobalVarSummary>(GS))
-      AddUnsigned(GVS->isReadOnly());
+    if (auto *GVS = dyn_cast<GlobalVarSummary>(GS)) {
+      AddUnsigned(GVS->maybeReadOnly());
+      AddUnsigned(GVS->maybeWriteOnly());
+    }
     if (auto *FS = dyn_cast<FunctionSummary>(GS)) {
       for (auto &TT : FS->type_tests())
         UsedTypeIds.insert(TT);
@@ -294,13 +298,13 @@ void llvm::computeLTOCacheKey(
 }
 
 static void thinLTOResolvePrevailingGUID(
-    GlobalValueSummaryList &GVSummaryList, GlobalValue::GUID GUID,
-    DenseSet<GlobalValueSummary *> &GlobalInvolvedWithAlias,
+    ValueInfo VI, DenseSet<GlobalValueSummary *> &GlobalInvolvedWithAlias,
     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
         isPrevailing,
     function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)>
-        recordNewLinkage) {
-  for (auto &S : GVSummaryList) {
+        recordNewLinkage,
+    const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
+  for (auto &S : VI.getSummaryList()) {
     GlobalValue::LinkageTypes OriginalLinkage = S->linkage();
     // Ignore local and appending linkage values since the linker
     // doesn't resolve them.
@@ -315,17 +319,29 @@ static void thinLTOResolvePrevailingGUID(
     // ensure a copy is kept to satisfy the exported reference.
     // FIXME: We may want to split the compile time and correctness
     // aspects into separate routines.
-    if (isPrevailing(GUID, S.get())) {
-      if (GlobalValue::isLinkOnceLinkage(OriginalLinkage))
+    if (isPrevailing(VI.getGUID(), S.get())) {
+      if (GlobalValue::isLinkOnceLinkage(OriginalLinkage)) {
         S->setLinkage(GlobalValue::getWeakLinkage(
             GlobalValue::isLinkOnceODRLinkage(OriginalLinkage)));
+        // The kept copy is eligible for auto-hiding (hidden visibility) if all
+        // copies were (i.e. they were all linkonce_odr global unnamed addr).
+        // If any copy is not (e.g. it was originally weak_odr), then the symbol
+        // must remain externally available (e.g. a weak_odr from an explicitly
+        // instantiated template). Additionally, if it is in the
+        // GUIDPreservedSymbols set, that means that it is visibile outside
+        // the summary (e.g. in a native object or a bitcode file without
+        // summary), and in that case we cannot hide it as it isn't possible to
+        // check all copies.
+        S->setCanAutoHide(VI.canAutoHide() &&
+                          !GUIDPreservedSymbols.count(VI.getGUID()));
+      }
     }
     // Alias and aliasee can't be turned into available_externally.
     else if (!isa<AliasSummary>(S.get()) &&
              !GlobalInvolvedWithAlias.count(S.get()))
       S->setLinkage(GlobalValue::AvailableExternallyLinkage);
     if (S->linkage() != OriginalLinkage)
-      recordNewLinkage(S->modulePath(), GUID, S->linkage());
+      recordNewLinkage(S->modulePath(), VI.getGUID(), S->linkage());
   }
 }
 
@@ -340,7 +356,8 @@ void llvm::thinLTOResolvePrevailingInIndex(
     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
         isPrevailing,
     function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)>
-        recordNewLinkage) {
+        recordNewLinkage,
+    const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
   // We won't optimize the globals that are referenced by an alias for now
   // Ideally we should turn the alias into a global and duplicate the definition
   // when needed.
@@ -351,9 +368,17 @@ void llvm::thinLTOResolvePrevailingInIndex(
         GlobalInvolvedWithAlias.insert(&AS->getAliasee());
 
   for (auto &I : Index)
-    thinLTOResolvePrevailingGUID(I.second.SummaryList, I.first,
-                                 GlobalInvolvedWithAlias, isPrevailing,
-                                 recordNewLinkage);
+    thinLTOResolvePrevailingGUID(Index.getValueInfo(I), GlobalInvolvedWithAlias,
+                                 isPrevailing, recordNewLinkage,
+                                 GUIDPreservedSymbols);
+}
+
+static bool isWeakObjectWithRWAccess(GlobalValueSummary *GVS) {
+  if (auto *VarSummary = dyn_cast<GlobalVarSummary>(GVS->getBaseObject()))
+    return !VarSummary->maybeReadOnly() && !VarSummary->maybeWriteOnly() &&
+           (VarSummary->linkage() == GlobalValue::WeakODRLinkage ||
+            VarSummary->linkage() == GlobalValue::LinkOnceODRLinkage);
+  return false;
 }
 
 static void thinLTOInternalizeAndPromoteGUID(
@@ -370,7 +395,13 @@ static void thinLTOInternalizeAndPromoteGUID(
                S->linkage() != GlobalValue::AppendingLinkage &&
                // We can't internalize available_externally globals because this
                // can break function pointer equality.
-               S->linkage() != GlobalValue::AvailableExternallyLinkage)
+               S->linkage() != GlobalValue::AvailableExternallyLinkage &&
+               // Functions and read-only variables with linkonce_odr and
+               // weak_odr linkage can be internalized. We can't internalize
+               // linkonce_odr and weak_odr variables which are both modified
+               // and read somewhere in the program because reads and writes
+               // will become inconsistent.
+               !isWeakObjectWithRWAccess(S.get()))
       S->setLinkage(GlobalValue::InternalLinkage);
   }
 }
@@ -397,6 +428,7 @@ Expected<std::unique_ptr<InputFile>> InputFile::create(MemoryBufferRef Object) {
   File->TargetTriple = FOrErr->TheReader.getTargetTriple();
   File->SourceFileName = FOrErr->TheReader.getSourceFileName();
   File->COFFLinkerOpts = FOrErr->TheReader.getCOFFLinkerOpts();
+  File->DependentLibraries = FOrErr->TheReader.getDependentLibraries();
   File->ComdatTable = FOrErr->TheReader.getComdatTable();
 
   for (unsigned I = 0; I != FOrErr->Mods.size(); ++I) {
@@ -419,6 +451,11 @@ StringRef InputFile::getName() const {
   return Mods[0].getModuleIdentifier();
 }
 
+BitcodeModule &InputFile::getSingleBitcodeModule() {
+  assert(Mods.size() == 1 && "Expect only one bitcode module");
+  return Mods[0];
+}
+
 LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel,
                                       Config &Conf)
     : ParallelCodeGenParallelismLevel(ParallelCodeGenParallelismLevel),
@@ -809,6 +846,45 @@ unsigned LTO::getMaxTasks() const {
   return RegularLTO.ParallelCodeGenParallelismLevel + ThinLTO.ModuleMap.size();
 }
 
+// If only some of the modules were split, we cannot correctly handle
+// code that contains type tests or type checked loads.
+Error LTO::checkPartiallySplit() {
+  if (!ThinLTO.CombinedIndex.partiallySplitLTOUnits())
+    return Error::success();
+
+  Function *TypeTestFunc = RegularLTO.CombinedModule->getFunction(
+      Intrinsic::getName(Intrinsic::type_test));
+  Function *TypeCheckedLoadFunc = RegularLTO.CombinedModule->getFunction(
+      Intrinsic::getName(Intrinsic::type_checked_load));
+
+  // First check if there are type tests / type checked loads in the
+  // merged regular LTO module IR.
+  if ((TypeTestFunc && !TypeTestFunc->use_empty()) ||
+      (TypeCheckedLoadFunc && !TypeCheckedLoadFunc->use_empty()))
+    return make_error<StringError>(
+        "inconsistent LTO Unit splitting (recompile with -fsplit-lto-unit)",
+        inconvertibleErrorCode());
+
+  // Otherwise check if there are any recorded in the combined summary from the
+  // ThinLTO modules.
+  for (auto &P : ThinLTO.CombinedIndex) {
+    for (auto &S : P.second.SummaryList) {
+      auto *FS = dyn_cast<FunctionSummary>(S.get());
+      if (!FS)
+        continue;
+      if (!FS->type_test_assume_vcalls().empty() ||
+          !FS->type_checked_load_vcalls().empty() ||
+          !FS->type_test_assume_const_vcalls().empty() ||
+          !FS->type_checked_load_const_vcalls().empty() ||
+          !FS->type_tests().empty())
+        return make_error<StringError>(
+            "inconsistent LTO Unit splitting (recompile with -fsplit-lto-unit)",
+            inconvertibleErrorCode());
+    }
+  }
+  return Error::success();
+}
+
 Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) {
   // Compute "dead" symbols, we don't want to import/export these!
   DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
@@ -840,20 +916,25 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) {
                                   isPrevailing, Conf.OptLevel > 0);
 
   // Setup output file to emit statistics.
-  std::unique_ptr<ToolOutputFile> StatsFile = nullptr;
-  if (!Conf.StatsFile.empty()) {
-    EnableStatistics(false);
-    std::error_code EC;
-    StatsFile =
-        llvm::make_unique<ToolOutputFile>(Conf.StatsFile, EC, sys::fs::F_None);
-    if (EC)
-      return errorCodeToError(EC);
-    StatsFile->keep();
-  }
+  auto StatsFileOrErr = setupStatsFile(Conf.StatsFile);
+  if (!StatsFileOrErr)
+    return StatsFileOrErr.takeError();
+  std::unique_ptr<ToolOutputFile> StatsFile = std::move(StatsFileOrErr.get());
+
+  // Finalize linking of regular LTO modules containing summaries now that
+  // we have computed liveness information.
+  for (auto &M : RegularLTO.ModsWithSummaries)
+    if (Error Err = linkRegularLTO(std::move(M),
+                                   /*LivenessFromIndex=*/true))
+      return Err;
+
+  // Ensure we don't have inconsistently split LTO units with type tests.
+  if (Error Err = checkPartiallySplit())
+    return Err;
 
   Error Result = runRegularLTO(AddStream);
   if (!Result)
-    Result = runThinLTO(AddStream, Cache);
+    Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols);
 
   if (StatsFile)
     PrintStatisticsJSON(StatsFile->os());
@@ -862,11 +943,6 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) {
 }
 
 Error LTO::runRegularLTO(AddStreamFn AddStream) {
-  for (auto &M : RegularLTO.ModsWithSummaries)
-    if (Error Err = linkRegularLTO(std::move(M),
-                                   /*LivenessFromIndex=*/true))
-      return Err;
-
   // Make sure commons have the right size/alignment: we kept the largest from
   // all the prevailing when adding the inputs, and we apply it here.
   const DataLayout &DL = RegularLTO.CombinedModule->getDataLayout();
@@ -1161,7 +1237,8 @@ ThinBackend lto::createWriteIndexesThinBackend(
   };
 }
 
-Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache) {
+Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
+                      const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
   if (ThinLTO.ModuleMap.empty())
     return Error::success();
 
@@ -1243,7 +1320,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache) {
     ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
   };
   thinLTOResolvePrevailingInIndex(ThinLTO.CombinedIndex, isPrevailing,
-                                  recordNewLinkage);
+                                  recordNewLinkage, GUIDPreservedSymbols);
 
   std::unique_ptr<ThinBackendProc> BackendProc =
       ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
@@ -1264,25 +1341,37 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache) {
 }
 
 Expected<std::unique_ptr<ToolOutputFile>>
-lto::setupOptimizationRemarks(LLVMContext &Context,
-                              StringRef LTORemarksFilename,
-                              bool LTOPassRemarksWithHotness, int Count) {
-  if (LTOPassRemarksWithHotness)
-    Context.setDiagnosticsHotnessRequested(true);
-  if (LTORemarksFilename.empty())
-    return nullptr;
-
-  std::string Filename = LTORemarksFilename;
-  if (Count != -1)
+lto::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename,
+                              StringRef RemarksPasses, StringRef RemarksFormat,
+                              bool RemarksWithHotness, int Count) {
+  std::string Filename = RemarksFilename;
+  if (!Filename.empty() && Count != -1)
     Filename += ".thin." + llvm::utostr(Count) + ".yaml";
 
+  auto ResultOrErr = llvm::setupOptimizationRemarks(
+      Context, Filename, RemarksPasses, RemarksFormat, RemarksWithHotness);
+  if (Error E = ResultOrErr.takeError())
+    return std::move(E);
+
+  if (*ResultOrErr)
+    (*ResultOrErr)->keep();
+
+  return ResultOrErr;
+}
+
+Expected<std::unique_ptr<ToolOutputFile>>
+lto::setupStatsFile(StringRef StatsFilename) {
+  // Setup output file to emit statistics.
+  if (StatsFilename.empty())
+    return nullptr;
+
+  llvm::EnableStatistics(false);
   std::error_code EC;
-  auto DiagnosticFile =
-      llvm::make_unique<ToolOutputFile>(Filename, EC, sys::fs::F_None);
+  auto StatsFile =
+      llvm::make_unique<ToolOutputFile>(StatsFilename, EC, sys::fs::F_None);
   if (EC)
     return errorCodeToError(EC);
-  Context.setDiagnosticsOutputFile(
-      llvm::make_unique<yaml::Output>(DiagnosticFile->os()));
-  DiagnosticFile->keep();
-  return std::move(DiagnosticFile);
+
+  StatsFile->keep();
+  return std::move(StatsFile);
 }
diff --git a/lib/LTO/LTOBackend.cpp b/lib/LTO/LTOBackend.cpp
index 926c419e34a8..7456e7175163 100644
--- a/lib/LTO/LTOBackend.cpp
+++ b/lib/LTO/LTOBackend.cpp
@@ -1,9 +1,8 @@
 //===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -23,6 +22,7 @@
 #include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/RemarkStreamer.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/LTO/LTO.h"
 #include "llvm/MC/SubtargetFeature.h"
@@ -33,9 +33,9 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
@@ -155,10 +155,17 @@ static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
                            const ModuleSummaryIndex *ImportSummary) {
   Optional<PGOOptions> PGOOpt;
   if (!Conf.SampleProfile.empty())
-    PGOOpt = PGOOptions("", "", Conf.SampleProfile, Conf.ProfileRemapping,
-                        false, true);
+    PGOOpt = PGOOptions(Conf.SampleProfile, "", Conf.ProfileRemapping,
+                        PGOOptions::SampleUse, PGOOptions::NoCSAction, true);
+  else if (Conf.RunCSIRInstr) {
+    PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping,
+                        PGOOptions::IRUse, PGOOptions::CSIRInstr);
+  } else if (!Conf.CSIRProfile.empty()) {
+    PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping,
+                        PGOOptions::IRUse, PGOOptions::CSIRUse);
+  }
 
-  PassBuilder PB(TM, PGOOpt);
+  PassBuilder PB(TM, PipelineTuningOptions(), PGOOpt);
   AAManager AA;
 
   // Parse a custom AA pipeline if asked to.
@@ -274,6 +281,11 @@ static void runOldPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
   PMB.SLPVectorize = true;
   PMB.OptLevel = Conf.OptLevel;
   PMB.PGOSampleUse = Conf.SampleProfile;
+  PMB.EnablePGOCSInstrGen = Conf.RunCSIRInstr;
+  if (!Conf.RunCSIRInstr && !Conf.CSIRProfile.empty()) {
+    PMB.EnablePGOCSInstrUse = true;
+    PMB.PGOInstrUse = Conf.CSIRProfile;
+  }
   if (IsThinLTO)
     PMB.populateThinLTOPassManager(passes);
   else
@@ -302,7 +314,7 @@ void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
     return;
 
   std::unique_ptr<ToolOutputFile> DwoOut;
-  SmallString<1024> DwoFile(Conf.DwoPath);
+  SmallString<1024> DwoFile(Conf.SplitDwarfOutput);
   if (!Conf.DwoDir.empty()) {
     std::error_code EC;
     if (auto EC = llvm::sys::fs::create_directories(Conf.DwoDir))
@@ -311,11 +323,12 @@ void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
 
     DwoFile = Conf.DwoDir;
     sys::path::append(DwoFile, std::to_string(Task) + ".dwo");
-  }
+    TM->Options.MCOptions.SplitDwarfFile = DwoFile.str().str();
+  } else
+    TM->Options.MCOptions.SplitDwarfFile = Conf.SplitDwarfFile;
 
   if (!DwoFile.empty()) {
     std::error_code EC;
-    TM->Options.MCOptions.SplitDwarfFile = DwoFile.str().str();
     DwoOut = llvm::make_unique<ToolOutputFile>(DwoFile, EC, sys::fs::F_None);
     if (EC)
       report_fatal_error("Failed to open " + DwoFile + ": " + EC.message());
@@ -419,7 +432,8 @@ Error lto::backend(Config &C, AddStreamFn AddStream,
 
   // Setup optimization remarks.
   auto DiagFileOrErr = lto::setupOptimizationRemarks(
-      Mod->getContext(), C.RemarksFilename, C.RemarksWithHotness);
+      Mod->getContext(), C.RemarksFilename, C.RemarksPasses, C.RemarksFormat,
+      C.RemarksWithHotness);
   if (!DiagFileOrErr)
     return DiagFileOrErr.takeError();
   auto DiagnosticOutputFile = std::move(*DiagFileOrErr);
@@ -473,7 +487,8 @@ Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream,
 
   // Setup optimization remarks.
   auto DiagFileOrErr = lto::setupOptimizationRemarks(
-      Mod.getContext(), Conf.RemarksFilename, Conf.RemarksWithHotness, Task);
+      Mod.getContext(), Conf.RemarksFilename, Conf.RemarksPasses,
+      Conf.RemarksFormat, Conf.RemarksWithHotness, Task);
   if (!DiagFileOrErr)
     return DiagFileOrErr.takeError();
   auto DiagnosticOutputFile = std::move(*DiagFileOrErr);
diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp
index 3b63bbc7e256..6bb3bfaefc9c 100644
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -1,9 +1,8 @@
 //===-LTOCodeGenerator.cpp - LLVM Link Time Optimizer ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -34,6 +33,7 @@
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassTimingInfo.h"
+#include "llvm/IR/RemarkStreamer.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/LTO/LTO.h"
@@ -81,15 +81,31 @@ cl::opt<bool> LTODiscardValueNames(
 #endif
     cl::Hidden);
 
-cl::opt<std::string>
-    LTORemarksFilename("lto-pass-remarks-output",
-                       cl::desc("Output filename for pass remarks"),
-                       cl::value_desc("filename"));
-
-cl::opt<bool> LTOPassRemarksWithHotness(
+cl::opt<bool> RemarksWithHotness(
     "lto-pass-remarks-with-hotness",
     cl::desc("With PGO, include profile count in optimization remarks"),
     cl::Hidden);
+
+cl::opt<std::string>
+    RemarksFilename("lto-pass-remarks-output",
+                    cl::desc("Output filename for pass remarks"),
+                    cl::value_desc("filename"));
+
+cl::opt<std::string>
+    RemarksPasses("lto-pass-remarks-filter",
+                  cl::desc("Only record optimization remarks from passes whose "
+                           "names match the given regular expression"),
+                  cl::value_desc("regex"));
+
+cl::opt<std::string> RemarksFormat(
+    "lto-pass-remarks-format",
+    cl::desc("The format used for serializing remarks (default: YAML)"),
+    cl::value_desc("format"), cl::init("yaml"));
+
+cl::opt<std::string> LTOStatsFile(
+    "lto-stats-file",
+    cl::desc("Save statistics to the specified file"),
+    cl::Hidden);
 }
 
 LTOCodeGenerator::LTOCodeGenerator(LLVMContext &Context)
@@ -120,6 +136,7 @@ void LTOCodeGenerator::initializeLTOPasses() {
   initializeArgPromotionPass(R);
   initializeJumpThreadingPass(R);
   initializeSROALegacyPassPass(R);
+  initializeAttributorLegacyPassPass(R);
   initializePostOrderFunctionAttrsLegacyPassPass(R);
   initializeReversePostOrderFunctionAttrsLegacyPassPass(R);
   initializeGlobalsAAWrapperPassPass(R);
@@ -505,14 +522,23 @@ bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline,
   if (!this->determineTarget())
     return false;
 
-  auto DiagFileOrErr = lto::setupOptimizationRemarks(
-      Context, LTORemarksFilename, LTOPassRemarksWithHotness);
+  auto DiagFileOrErr =
+      lto::setupOptimizationRemarks(Context, RemarksFilename, RemarksPasses,
+                                    RemarksFormat, RemarksWithHotness);
   if (!DiagFileOrErr) {
     errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n";
     report_fatal_error("Can't get an output file for the remarks");
   }
   DiagnosticOutputFile = std::move(*DiagFileOrErr);
 
+  // Setup output file to emit statistics.
+  auto StatsFileOrErr = lto::setupStatsFile(LTOStatsFile);
+  if (!StatsFileOrErr) {
+    errs() << "Error: " << toString(StatsFileOrErr.takeError()) << "\n";
+    report_fatal_error("Can't get an output file for the statistics");
+  }
+  StatsFile = std::move(StatsFileOrErr.get());
+
   // We always run the verifier once on the merged module, the `DisableVerify`
   // parameter only applies to subsequent verify.
   verifyMergedModuleOnce();
@@ -579,9 +605,13 @@ bool LTOCodeGenerator::compileOptimized(ArrayRef<raw_pwrite_stream *> Out) {
                               [&]() { return createTargetMachine(); }, FileType,
                               ShouldRestoreGlobalsLinkage);
 
-  // If statistics were requested, print them out after codegen.
-  if (llvm::AreStatisticsEnabled())
-    llvm::PrintStatistics();
+  // If statistics were requested, save them to the specified file or
+  // print them out after codegen.
+  if (StatsFile)
+    PrintStatisticsJSON(StatsFile->os());
+  else if (AreStatisticsEnabled())
+    PrintStatistics();
+
   reportAndResetTimings();
 
   finishOptimizationRemarks();
diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp
index 0d40d49dbe39..7ffe7bf84ba8 100644
--- a/lib/LTO/LTOModule.cpp
+++ b/lib/LTO/LTOModule.cpp
@@ -1,9 +1,8 @@
 //===-- LTOModule.cpp - LLVM Link Time Optimizer --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -131,7 +130,8 @@ LTOModule::createFromOpenFileSlice(LLVMContext &Context, int fd, StringRef path,
                                    size_t map_size, off_t offset,
                                    const TargetOptions &options) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
-      MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset);
+      MemoryBuffer::getOpenFileSlice(sys::fs::convertFDToNativeFile(fd), path,
+                                     map_size, offset);
   if (std::error_code EC = BufferOrErr.getError()) {
     Context.emitError(EC.message());
     return EC;
@@ -646,6 +646,32 @@ void LTOModule::parseMetadata() {
       continue;
     emitLinkerFlagsForGlobalCOFF(OS, Sym.symbol, TT, M);
   }
+}
+
+lto::InputFile *LTOModule::createInputFile(const void *buffer,
+                                           size_t buffer_size, const char *path,
+                                           std::string &outErr) {
+  StringRef Data((const char *)buffer, buffer_size);
+  MemoryBufferRef BufferRef(Data, path);
+
+  Expected<std::unique_ptr<lto::InputFile>> ObjOrErr =
+      lto::InputFile::create(BufferRef);
+
+  if (ObjOrErr)
+    return ObjOrErr->release();
+
+  outErr = std::string(path) +
+           ": Could not read LTO input file: " + toString(ObjOrErr.takeError());
+  return nullptr;
+}
+
+size_t LTOModule::getDependentLibraryCount(lto::InputFile *input) {
+  return input->getDependentLibraries().size();
+}
 
-  // Add other interesting metadata here.
+const char *LTOModule::getDependentLibrary(lto::InputFile *input, size_t index,
+                                           size_t *size) {
+  StringRef S = input->getDependentLibraries()[index];
+  *size = S.size();
+  return S.data();
 }
diff --git a/lib/LTO/SummaryBasedOptimizations.cpp b/lib/LTO/SummaryBasedOptimizations.cpp
index bcdd984daa58..e919fd530fb0 100644
--- a/lib/LTO/SummaryBasedOptimizations.cpp
+++ b/lib/LTO/SummaryBasedOptimizations.cpp
@@ -1,9 +1,8 @@
 //==-SummaryBasedOptimizations.cpp - Optimizations based on ThinLTO summary-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp
index d9ec68fe3eb5..1c52218836ca 100644
--- a/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -1,9 +1,8 @@
 //===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -30,6 +29,7 @@
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/PassTimingInfo.h"
+#include "llvm/IR/RemarkStreamer.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/LTO/LTO.h"
@@ -70,8 +70,10 @@ using namespace llvm;
 namespace llvm {
 // Flags -discard-value-names, defined in LTOCodeGenerator.cpp
 extern cl::opt<bool> LTODiscardValueNames;
-extern cl::opt<std::string> LTORemarksFilename;
-extern cl::opt<bool> LTOPassRemarksWithHotness;
+extern cl::opt<std::string> RemarksFilename;
+extern cl::opt<std::string> RemarksPasses;
+extern cl::opt<bool> RemarksWithHotness;
+extern cl::opt<std::string> RemarksFormat;
 }
 
 namespace {
@@ -135,14 +137,13 @@ static void computePrevailingCopies(
   }
 }
 
-static StringMap<MemoryBufferRef>
-generateModuleMap(const std::vector<ThinLTOBuffer> &Modules) {
-  StringMap<MemoryBufferRef> ModuleMap;
-  for (auto &ModuleBuffer : Modules) {
-    assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) ==
-               ModuleMap.end() &&
+static StringMap<lto::InputFile *>
+generateModuleMap(std::vector<std::unique_ptr<lto::InputFile>> &Modules) {
+  StringMap<lto::InputFile *> ModuleMap;
+  for (auto &M : Modules) {
+    assert(ModuleMap.find(M->getName()) == ModuleMap.end() &&
            "Expect unique Buffer Identifier");
-    ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer.getMemBuffer();
+    ModuleMap[M->getName()] = M.get();
   }
   return ModuleMap;
 }
@@ -175,18 +176,19 @@ static void verifyLoadedModule(Module &TheModule) {
   }
 }
 
-static std::unique_ptr<Module>
-loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context,
-                     bool Lazy, bool IsImporting) {
+static std::unique_ptr<Module> loadModuleFromInput(lto::InputFile *Input,
+                                                   LLVMContext &Context,
+                                                   bool Lazy,
+                                                   bool IsImporting) {
+  auto &Mod = Input->getSingleBitcodeModule();
   SMDiagnostic Err;
   Expected<std::unique_ptr<Module>> ModuleOrErr =
-      Lazy
-          ? getLazyBitcodeModule(Buffer, Context,
-                                 /* ShouldLazyLoadMetadata */ true, IsImporting)
-          : parseBitcodeFile(Buffer, Context);
+      Lazy ? Mod.getLazyModule(Context,
+                               /* ShouldLazyLoadMetadata */ true, IsImporting)
+           : Mod.parseModule(Context);
   if (!ModuleOrErr) {
     handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
-      SMDiagnostic Err = SMDiagnostic(Buffer.getBufferIdentifier(),
+      SMDiagnostic Err = SMDiagnostic(Mod.getModuleIdentifier(),
                                       SourceMgr::DK_Error, EIB.message());
       Err.print("ThinLTO", errs());
     });
@@ -194,16 +196,17 @@ loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context,
   }
   if (!Lazy)
     verifyLoadedModule(*ModuleOrErr.get());
-  return std::move(ModuleOrErr.get());
+  return std::move(*ModuleOrErr);
 }
 
 static void
 crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index,
-                      StringMap<MemoryBufferRef> &ModuleMap,
+                      StringMap<lto::InputFile*> &ModuleMap,
                       const FunctionImporter::ImportMapTy &ImportList) {
   auto Loader = [&](StringRef Identifier) {
-    return loadModuleFromBuffer(ModuleMap[Identifier], TheModule.getContext(),
-                                /*Lazy=*/true, /*IsImporting*/ true);
+    auto &Input = ModuleMap[Identifier];
+    return loadModuleFromInput(Input, TheModule.getContext(),
+                               /*Lazy=*/true, /*IsImporting*/ true);
   };
 
   FunctionImporter Importer(Index, Loader);
@@ -248,6 +251,15 @@ static void optimizeModule(Module &TheModule, TargetMachine &TM,
   PM.run(TheModule);
 }
 
+static void
+addUsedSymbolToPreservedGUID(const lto::InputFile &File,
+                             DenseSet<GlobalValue::GUID> &PreservedGUID) {
+  for (const auto &Sym : File.symbols()) {
+    if (Sym.isUsed())
+      PreservedGUID.insert(GlobalValue::getGUID(Sym.getIRName()));
+  }
+}
+
 // Convert the PreservedSymbols map from "Name" based to "GUID" based.
 static DenseSet<GlobalValue::GUID>
 computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols,
@@ -337,17 +349,14 @@ public:
   ErrorOr<std::unique_ptr<MemoryBuffer>> tryLoadingBuffer() {
     if (EntryPath.empty())
       return std::error_code();
-    int FD;
     SmallString<64> ResultPath;
-    std::error_code EC = sys::fs::openFileForRead(
-        Twine(EntryPath), FD, sys::fs::OF_UpdateAtime, &ResultPath);
-    if (EC)
-      return EC;
-    ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
-        MemoryBuffer::getOpenFile(FD, EntryPath,
-                                  /*FileSize*/ -1,
-                                  /*RequiresNullTerminator*/ false);
-    close(FD);
+    Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForRead(
+        Twine(EntryPath), sys::fs::OF_UpdateAtime, &ResultPath);
+    if (!FDOrErr)
+      return errorToErrorCode(FDOrErr.takeError());
+    ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getOpenFile(
+        *FDOrErr, EntryPath, /*FileSize=*/-1, /*RequiresNullTerminator=*/false);
+    sys::fs::closeFile(*FDOrErr);
     return MBOrErr;
   }
 
@@ -381,7 +390,7 @@ public:
 
 static std::unique_ptr<MemoryBuffer>
 ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
-                     StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM,
+                     StringMap<lto::InputFile *> &ModuleMap, TargetMachine &TM,
                      const FunctionImporter::ImportMapTy &ImportList,
                      const FunctionImporter::ExportSetTy &ExportList,
                      const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
@@ -447,7 +456,8 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
 static void resolvePrevailingInIndex(
     ModuleSummaryIndex &Index,
     StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>>
-        &ResolvedODR) {
+        &ResolvedODR,
+    const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
 
   DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
   computePrevailingCopies(Index, PrevailingCopy);
@@ -466,7 +476,8 @@ static void resolvePrevailingInIndex(
     ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
   };
 
-  thinLTOResolvePrevailingInIndex(Index, isPrevailing, recordNewLinkage);
+  thinLTOResolvePrevailingInIndex(Index, isPrevailing, recordNewLinkage,
+                                  GUIDPreservedSymbols);
 }
 
 // Initialize the TargetMachine builder for a given Triple
@@ -488,15 +499,14 @@ static void initTMBuilder(TargetMachineBuilder &TMBuilder,
 } // end anonymous namespace
 
 void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
-  ThinLTOBuffer Buffer(Data, Identifier);
-  LLVMContext Context;
-  StringRef TripleStr;
-  ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors(
-      Context, getBitcodeTargetTriple(Buffer.getMemBuffer()));
+  MemoryBufferRef Buffer(Data, Identifier);
 
-  if (TripleOrErr)
-    TripleStr = *TripleOrErr;
+  auto InputOrError = lto::InputFile::create(Buffer);
+  if (!InputOrError)
+    report_fatal_error("ThinLTO cannot create input file: " +
+                       toString(InputOrError.takeError()));
 
+  auto TripleStr = (*InputOrError)->getTargetTriple();
   Triple TheTriple(TripleStr);
 
   if (Modules.empty())
@@ -508,7 +518,7 @@ void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
     initTMBuilder(TMBuilder, Triple(TMBuilder.TheTriple.merge(TheTriple)));
   }
 
-  Modules.push_back(Buffer);
+  Modules.emplace_back(std::move(*InputOrError));
 }
 
 void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) {
@@ -549,9 +559,10 @@ std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
   std::unique_ptr<ModuleSummaryIndex> CombinedIndex =
       llvm::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
   uint64_t NextModuleId = 0;
-  for (auto &ModuleBuffer : Modules) {
-    if (Error Err = readModuleSummaryIndex(ModuleBuffer.getMemBuffer(),
-                                           *CombinedIndex, NextModuleId++)) {
+  for (auto &Mod : Modules) {
+    auto &M = Mod->getSingleBitcodeModule();
+    if (Error Err =
+            M.readSummary(*CombinedIndex, Mod->getName(), NextModuleId++)) {
       // FIXME diagnose
       logAllUnhandledErrors(
           std::move(Err), errs(),
@@ -593,8 +604,8 @@ static void computeDeadSymbolsInIndex(
  * Perform promotion and renaming of exported internal functions.
  * Index is updated to reflect linkage changes from weak resolution.
  */
-void ThinLTOCodeGenerator::promote(Module &TheModule,
-                                   ModuleSummaryIndex &Index) {
+void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index,
+                                   const lto::InputFile &File) {
   auto ModuleCount = Index.modulePaths().size();
   auto ModuleIdentifier = TheModule.getModuleIdentifier();
 
@@ -606,6 +617,9 @@ void ThinLTOCodeGenerator::promote(Module &TheModule,
   auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
       PreservedSymbols, Triple(TheModule.getTargetTriple()));
 
+  // Add used symbol to the preserved symbols.
+  addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
+
   // Compute "dead" symbols, we don't want to import/export these!
   computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
 
@@ -617,7 +631,7 @@ void ThinLTOCodeGenerator::promote(Module &TheModule,
 
   // Resolve prevailing symbols
   StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
-  resolvePrevailingInIndex(Index, ResolvedODR);
+  resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols);
 
   thinLTOResolvePrevailingInModule(
       TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
@@ -633,7 +647,8 @@ void ThinLTOCodeGenerator::promote(Module &TheModule,
  * Perform cross-module importing for the module identified by ModuleIdentifier.
  */
 void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
-                                             ModuleSummaryIndex &Index) {
+                                             ModuleSummaryIndex &Index,
+                                             const lto::InputFile &File) {
   auto ModuleMap = generateModuleMap(Modules);
   auto ModuleCount = Index.modulePaths().size();
 
@@ -645,6 +660,8 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
   auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
       PreservedSymbols, Triple(TheModule.getTargetTriple()));
 
+  addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
+
   // Compute "dead" symbols, we don't want to import/export these!
   computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
 
@@ -663,7 +680,8 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
  */
 void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
     Module &TheModule, ModuleSummaryIndex &Index,
-    std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
+    std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
+    const lto::InputFile &File) {
   auto ModuleCount = Index.modulePaths().size();
   auto ModuleIdentifier = TheModule.getModuleIdentifier();
 
@@ -675,6 +693,8 @@ void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
   auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
       PreservedSymbols, Triple(TheModule.getTargetTriple()));
 
+  addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
+
   // Compute "dead" symbols, we don't want to import/export these!
   computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
 
@@ -693,7 +713,8 @@ void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
  * Emit the list of files needed for importing into module.
  */
 void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName,
-                                       ModuleSummaryIndex &Index) {
+                                       ModuleSummaryIndex &Index,
+                                       const lto::InputFile &File) {
   auto ModuleCount = Index.modulePaths().size();
   auto ModuleIdentifier = TheModule.getModuleIdentifier();
 
@@ -705,6 +726,8 @@ void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName,
   auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
       PreservedSymbols, Triple(TheModule.getTargetTriple()));
 
+  addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
+
   // Compute "dead" symbols, we don't want to import/export these!
   computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
 
@@ -727,10 +750,12 @@ void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName,
 }
 
 /**
- * Perform internalization. Index is updated to reflect linkage changes.
+ * Perform internalization. Runs promote and internalization together.
+ * Index is updated to reflect linkage changes.
  */
 void ThinLTOCodeGenerator::internalize(Module &TheModule,
-                                       ModuleSummaryIndex &Index) {
+                                       ModuleSummaryIndex &Index,
+                                       const lto::InputFile &File) {
   initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
   auto ModuleCount = Index.modulePaths().size();
   auto ModuleIdentifier = TheModule.getModuleIdentifier();
@@ -739,6 +764,8 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule,
   auto GUIDPreservedSymbols =
       computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
 
+  addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
+
   // Collect for each module the list of function it defines (GUID -> Summary).
   StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
   Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
@@ -758,8 +785,20 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule,
   if (ExportList.empty() && GUIDPreservedSymbols.empty())
     return;
 
-  // Internalization
+  // Resolve prevailing symbols
+  StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
+  resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols);
+
+  // Promote the exported values in the index, so that they are promoted
+  // in the module.
   internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, Index);
+
+  promoteModule(TheModule, Index);
+
+  // Internalization
+  thinLTOResolvePrevailingInModule(
+      TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
+
   thinLTOInternalizeModule(TheModule,
                            ModuleToDefinedGVSummaries[ModuleIdentifier]);
 }
@@ -777,11 +816,13 @@ void ThinLTOCodeGenerator::optimize(Module &TheModule) {
 /// Write out the generated object file, either from CacheEntryPath or from
 /// OutputBuffer, preferring hard-link when possible.
 /// Returns the path to the generated file in SavedObjectsDirectoryPath.
-static std::string writeGeneratedObject(int count, StringRef CacheEntryPath,
-                                        StringRef SavedObjectsDirectoryPath,
-                                        const MemoryBuffer &OutputBuffer) {
+std::string
+ThinLTOCodeGenerator::writeGeneratedObject(int count, StringRef CacheEntryPath,
+                                           const MemoryBuffer &OutputBuffer) {
+  auto ArchName = TMBuilder.TheTriple.getArchName();
   SmallString<128> OutputPath(SavedObjectsDirectoryPath);
-  llvm::sys::path::append(OutputPath, Twine(count) + ".thinlto.o");
+  llvm::sys::path::append(OutputPath,
+                          Twine(count) + "." + ArchName + ".thinlto.o");
   OutputPath.c_str(); // Ensure the string is null terminated.
   if (sys::fs::exists(OutputPath))
     sys::fs::remove(OutputPath);
@@ -830,23 +871,22 @@ void ThinLTOCodeGenerator::run() {
     // Perform only parallel codegen and return.
     ThreadPool Pool;
     int count = 0;
-    for (auto &ModuleBuffer : Modules) {
+    for (auto &Mod : Modules) {
       Pool.async([&](int count) {
         LLVMContext Context;
         Context.setDiscardValueNames(LTODiscardValueNames);
 
         // Parse module now
-        auto TheModule =
-            loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false,
-                                 /*IsImporting*/ false);
+        auto TheModule = loadModuleFromInput(Mod.get(), Context, false,
+                                             /*IsImporting*/ false);
 
         // CodeGen
         auto OutputBuffer = codegenModule(*TheModule, *TMBuilder.create());
         if (SavedObjectsDirectoryPath.empty())
           ProducedBinaries[count] = std::move(OutputBuffer);
         else
-          ProducedBinaryFiles[count] = writeGeneratedObject(
-              count, "", SavedObjectsDirectoryPath, *OutputBuffer);
+          ProducedBinaryFiles[count] =
+              writeGeneratedObject(count, "", *OutputBuffer);
       }, count++);
     }
 
@@ -881,6 +921,10 @@ void ThinLTOCodeGenerator::run() {
   auto GUIDPreservedSymbols =
       computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
 
+  // Add used symbol from inputs to the preserved symbols.
+  for (const auto &M : Modules)
+    addUsedSymbolToPreservedGUID(*M, GUIDPreservedSymbols);
+
   // Compute "dead" symbols, we don't want to import/export these!
   computeDeadSymbolsInIndex(*Index, GUIDPreservedSymbols);
 
@@ -902,7 +946,7 @@ void ThinLTOCodeGenerator::run() {
 
   // Resolve prevailing symbols, this has to be computed early because it
   // impacts the caching.
-  resolvePrevailingInIndex(*Index, ResolvedODR);
+  resolvePrevailingInIndex(*Index, ResolvedODR, GUIDPreservedSymbols);
 
   // Use global summary-based analysis to identify symbols that can be
   // internalized (because they aren't exported or preserved as per callback).
@@ -913,7 +957,7 @@ void ThinLTOCodeGenerator::run() {
   // GVSummary and ResolvedODR maps to enable threaded access to these maps
   // below.
   for (auto &Module : Modules) {
-    auto ModuleIdentifier = Module.getBufferIdentifier();
+    auto ModuleIdentifier = Module->getName();
     ExportLists[ModuleIdentifier];
     ImportLists[ModuleIdentifier];
     ResolvedODR[ModuleIdentifier];
@@ -927,8 +971,10 @@ void ThinLTOCodeGenerator::run() {
   ModulesOrdering.resize(Modules.size());
   std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
   llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
-    auto LSize = Modules[LeftIndex].getBuffer().size();
-    auto RSize = Modules[RightIndex].getBuffer().size();
+    auto LSize =
+        Modules[LeftIndex]->getSingleBitcodeModule().getBuffer().size();
+    auto RSize =
+        Modules[RightIndex]->getSingleBitcodeModule().getBuffer().size();
     return LSize > RSize;
   });
 
@@ -936,9 +982,9 @@ void ThinLTOCodeGenerator::run() {
   {
     ThreadPool Pool(ThreadCount);
     for (auto IndexCount : ModulesOrdering) {
-      auto &ModuleBuffer = Modules[IndexCount];
+      auto &Mod = Modules[IndexCount];
       Pool.async([&](int count) {
-        auto ModuleIdentifier = ModuleBuffer.getBufferIdentifier();
+        auto ModuleIdentifier = Mod->getName();
         auto &ExportList = ExportLists[ModuleIdentifier];
 
         auto &DefinedGVSummaries = ModuleToDefinedGVSummaries[ModuleIdentifier];
@@ -963,8 +1009,7 @@ void ThinLTOCodeGenerator::run() {
               ProducedBinaries[count] = std::move(ErrOrBuffer.get());
             else
               ProducedBinaryFiles[count] = writeGeneratedObject(
-                  count, CacheEntryPath, SavedObjectsDirectoryPath,
-                  *ErrOrBuffer.get());
+                  count, CacheEntryPath, *ErrOrBuffer.get());
             return;
           }
         }
@@ -973,7 +1018,8 @@ void ThinLTOCodeGenerator::run() {
         Context.setDiscardValueNames(LTODiscardValueNames);
         Context.enableDebugTypeODRUniquing();
         auto DiagFileOrErr = lto::setupOptimizationRemarks(
-            Context, LTORemarksFilename, LTOPassRemarksWithHotness, count);
+            Context, RemarksFilename, RemarksPasses, RemarksFormat,
+            RemarksWithHotness, count);
         if (!DiagFileOrErr) {
           errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n";
           report_fatal_error("ThinLTO: Can't get an output file for the "
@@ -981,9 +1027,8 @@ void ThinLTOCodeGenerator::run() {
         }
 
         // Parse module now
-        auto TheModule =
-            loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false,
-                                 /*IsImporting*/ false);
+        auto TheModule = loadModuleFromInput(Mod.get(), Context, false,
+                                             /*IsImporting*/ false);
 
         // Save temps: original file.
         saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
@@ -1021,7 +1066,7 @@ void ThinLTOCodeGenerator::run() {
           return;
         }
         ProducedBinaryFiles[count] = writeGeneratedObject(
-            count, CacheEntryPath, SavedObjectsDirectoryPath, *OutputBuffer);
+            count, CacheEntryPath, *OutputBuffer);
       }, IndexCount);
     }
   }
diff --git a/lib/LTO/UpdateCompilerUsed.cpp b/lib/LTO/UpdateCompilerUsed.cpp
index 00482dee6e10..6434f902088d 100644
--- a/lib/LTO/UpdateCompilerUsed.cpp
+++ b/lib/LTO/UpdateCompilerUsed.cpp
@@ -1,9 +1,8 @@
 //==-LTOInternalize.cpp - LLVM Link Time Optimizer Internalization Utility -==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/LineEditor/LineEditor.cpp b/lib/LineEditor/LineEditor.cpp
index 533a928b2dfd..57e62bd64871 100644
--- a/lib/LineEditor/LineEditor.cpp
+++ b/lib/LineEditor/LineEditor.cpp
@@ -1,9 +1,8 @@
 //===-- LineEditor.cpp - line editor --------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp
index afbc57abfcc0..37515d93ed50 100644
--- a/lib/Linker/IRMover.cpp
+++ b/lib/Linker/IRMover.cpp
@@ -1,9 +1,8 @@
 //===- lib/Linker/IRMover.cpp ---------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -403,6 +402,7 @@ class IRLinker {
 
   DenseSet<GlobalValue *> ValuesToLink;
   std::vector<GlobalValue *> Worklist;
+  std::vector<std::pair<GlobalValue *, Value*>> RAUWWorklist;
 
   void maybeAdd(GlobalValue *GV) {
     if (ValuesToLink.insert(GV).second)
@@ -489,12 +489,24 @@ class IRLinker {
   void linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src);
   Error linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src);
 
+  /// Replace all types in the source AttributeList with the
+  /// corresponding destination type.
+  AttributeList mapAttributeTypes(LLVMContext &C, AttributeList Attrs);
+
   /// Functions that take care of cloning a specific global value type
   /// into the destination module.
   GlobalVariable *copyGlobalVariableProto(const GlobalVariable *SGVar);
   Function *copyFunctionProto(const Function *SF);
   GlobalValue *copyGlobalAliasProto(const GlobalAlias *SGA);
 
+  /// Perform "replace all uses with" operations. These work items need to be
+  /// performed as part of materialization, but we postpone them to happen after
+  /// materialization is done. The materializer called by ValueMapper is not
+  /// expected to delete constants, as ValueMapper is holding pointers to some
+  /// of them, but constant destruction may be indirectly triggered by RAUW.
+  /// Hence, the need to move this out of the materialization call chain.
+  void flushRAUWWorklist();
+
   /// When importing for ThinLTO, prevent importing of types listed on
   /// the DICompileUnit that we don't need a copy of in the importing
   /// module.
@@ -620,6 +632,21 @@ GlobalVariable *IRLinker::copyGlobalVariableProto(const GlobalVariable *SGVar) {
   return NewDGV;
 }
 
+AttributeList IRLinker::mapAttributeTypes(LLVMContext &C, AttributeList Attrs) {
+  for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
+    if (Attrs.hasAttribute(i, Attribute::ByVal)) {
+      Type *Ty = Attrs.getAttribute(i, Attribute::ByVal).getValueAsType();
+      if (!Ty)
+        continue;
+
+      Attrs = Attrs.removeAttribute(C, i, Attribute::ByVal);
+      Attrs = Attrs.addAttribute(
+          C, i, Attribute::getWithByValType(C, TypeMap.get(Ty)));
+    }
+  }
+  return Attrs;
+}
+
 /// Link the function in the source module into the destination module if
 /// needed, setting up mapping information.
 Function *IRLinker::copyFunctionProto(const Function *SF) {
@@ -629,6 +656,7 @@ Function *IRLinker::copyFunctionProto(const Function *SF) {
       Function::Create(TypeMap.get(SF->getFunctionType()),
                        GlobalValue::ExternalLinkage, SF->getName(), &DstM);
   F->copyAttributesFrom(SF);
+  F->setAttributes(mapAttributeTypes(F->getContext(), F->getAttributes()));
   return F;
 }
 
@@ -884,8 +912,8 @@ IRLinker::linkAppendingVarProto(GlobalVariable *DstGV,
   // Replace any uses of the two global variables with uses of the new
   // global.
   if (DstGV) {
-    DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType()));
-    DstGV->eraseFromParent();
+    RAUWWorklist.push_back(
+        std::make_pair(DstGV, ConstantExpr::getBitCast(NG, DstGV->getType())));
   }
 
   return Ret;
@@ -984,9 +1012,12 @@ Expected<Constant *> IRLinker::linkGlobalValueProto(GlobalValue *SGV,
   }
 
   if (DGV && NewGV != DGV) {
-    DGV->replaceAllUsesWith(
-      ConstantExpr::getPointerBitCastOrAddrSpaceCast(NewGV, DGV->getType()));
-    DGV->eraseFromParent();
+    // Schedule "replace all uses with" to happen after materializing is
+    // done. It is not safe to do it now, since ValueMapper may be holding
+    // pointers to constants that will get deleted if RAUW runs.
+    RAUWWorklist.push_back(std::make_pair(
+        DGV,
+        ConstantExpr::getPointerBitCastOrAddrSpaceCast(NewGV, DGV->getType())));
   }
 
   return C;
@@ -1044,6 +1075,18 @@ Error IRLinker::linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src) {
   return Error::success();
 }
 
+void IRLinker::flushRAUWWorklist() {
+  for (const auto Elem : RAUWWorklist) {
+    GlobalValue *Old;
+    Value *New;
+    std::tie(Old, New) = Elem;
+
+    Old->replaceAllUsesWith(New);
+    Old->eraseFromParent();
+  }
+  RAUWWorklist.clear();
+}
+
 void IRLinker::prepareCompileUnitsForImport() {
   NamedMDNode *SrcCompileUnits = SrcM->getNamedMetadata("llvm.dbg.cu");
   if (!SrcCompileUnits)
@@ -1200,7 +1243,9 @@ Error IRLinker::linkModuleFlagsMetadata() {
       if (SrcBehaviorValue == Module::Override &&
           SrcOp->getOperand(2) != DstOp->getOperand(2))
         return stringErr("linking module flags '" + ID->getString() +
-                         "': IDs have conflicting override values");
+                         "': IDs have conflicting override values in '" +
+                         SrcM->getModuleIdentifier() + "' and '" +
+                         DstM.getModuleIdentifier() + "'");
       continue;
     } else if (SrcBehaviorValue == Module::Override) {
       // Update the destination flag to that of the source.
@@ -1211,7 +1256,9 @@ Error IRLinker::linkModuleFlagsMetadata() {
     // Diagnose inconsistent merge behavior types.
     if (SrcBehaviorValue != DstBehaviorValue)
       return stringErr("linking module flags '" + ID->getString() +
-                       "': IDs have conflicting behaviors");
+                       "': IDs have conflicting behaviors in '" +
+                       SrcM->getModuleIdentifier() + "' and '" +
+                       DstM.getModuleIdentifier() + "'");
 
     auto replaceDstValue = [&](MDNode *New) {
       Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New};
@@ -1229,7 +1276,9 @@ Error IRLinker::linkModuleFlagsMetadata() {
       // Emit an error if the values differ.
       if (SrcOp->getOperand(2) != DstOp->getOperand(2))
         return stringErr("linking module flags '" + ID->getString() +
-                         "': IDs have conflicting values");
+                         "': IDs have conflicting values in '" +
+                         SrcM->getModuleIdentifier() + "' and '" +
+                         DstM.getModuleIdentifier() + "'");
       continue;
     }
     case Module::Warning: {
@@ -1369,6 +1418,7 @@ Error IRLinker::run() {
     Mapper.mapValue(*GV);
     if (FoundError)
       return std::move(*FoundError);
+    flushRAUWWorklist();
   }
 
   // Note that we are done linking global value bodies. This prevents
diff --git a/lib/Linker/LinkDiagnosticInfo.h b/lib/Linker/LinkDiagnosticInfo.h
index d91f19c69aac..30c16abaf509 100644
--- a/lib/Linker/LinkDiagnosticInfo.h
+++ b/lib/Linker/LinkDiagnosticInfo.h
@@ -1,9 +1,8 @@
 //===- LinkDiagnosticInfo.h -------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 25f31a3401a6..a18f4cc25bcc 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -1,9 +1,8 @@
 //===- lib/Linker/LinkModules.cpp - Module Linker Implementation ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/MC/ConstantPools.cpp b/lib/MC/ConstantPools.cpp
index 18277a225640..8cba6b3281a5 100644
--- a/lib/MC/ConstantPools.cpp
+++ b/lib/MC/ConstantPools.cpp
@@ -1,9 +1,8 @@
 //===- ConstantPools.cpp - ConstantPool class -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index 89f3b30cddd6..2c68723a12f8 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/ELFObjectWriter.cpp - ELF File Writer -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -426,7 +425,8 @@ void ELFWriter::writeHeader(const MCAssembler &Asm) {
   W.OS << char(ELF::EV_CURRENT);        // e_ident[EI_VERSION]
   // e_ident[EI_OSABI]
   W.OS << char(OWriter.TargetObjectWriter->getOSABI());
-  W.OS << char(0);                  // e_ident[EI_ABIVERSION]
+  // e_ident[EI_ABIVERSION]
+  W.OS << char(OWriter.TargetObjectWriter->getABIVersion());
 
   W.OS.write_zeros(ELF::EI_NIDENT - ELF::EI_PAD);
 
@@ -463,7 +463,7 @@ void ELFWriter::writeHeader(const MCAssembler &Asm) {
 
 uint64_t ELFWriter::SymbolValue(const MCSymbol &Sym,
                                 const MCAsmLayout &Layout) {
-  if (Sym.isCommon() && Sym.isExternal())
+  if (Sym.isCommon() && (Sym.isTargetCommon() || Sym.isExternal()))
     return Sym.getCommonAlignment();
 
   uint64_t Res;
@@ -577,6 +577,10 @@ bool ELFWriter::isInSymtab(const MCAsmLayout &Layout, const MCSymbolELF &Symbol,
                            bool Used, bool Renamed) {
   if (Symbol.isVariable()) {
     const MCExpr *Expr = Symbol.getVariableValue();
+    // Target Expressions that are always inlined do not appear in the symtab
+    if (const auto *T = dyn_cast<MCTargetExpr>(Expr))
+      if (T->inlineAssignedExpr())
+        return false;
     if (const MCSymbolRefExpr *Ref = dyn_cast<MCSymbolRefExpr>(Expr)) {
       if (Ref->getKind() == MCSymbolRefExpr::VK_WEAKREF)
         return false;
@@ -656,8 +660,12 @@ void ELFWriter::computeSymbolTable(
     if (Symbol.isAbsolute()) {
       MSD.SectionIndex = ELF::SHN_ABS;
     } else if (Symbol.isCommon()) {
-      assert(!Local);
-      MSD.SectionIndex = ELF::SHN_COMMON;
+      if (Symbol.isTargetCommon()) {
+        MSD.SectionIndex = Symbol.getIndex();
+      } else {
+        assert(!Local);
+        MSD.SectionIndex = ELF::SHN_COMMON;
+      }
     } else if (Symbol.isUndefined()) {
       if (isSignature && !Used) {
         MSD.SectionIndex = RevGroupMap.lookup(&Symbol);
@@ -710,7 +718,7 @@ void ELFWriter::computeSymbolTable(
 
   if (HasLargeSectionIndex) {
     MCSectionELF *SymtabShndxSection =
-        Ctx.getELFSection(".symtab_shndxr", ELF::SHT_SYMTAB_SHNDX, 0, 4, "");
+        Ctx.getELFSection(".symtab_shndx", ELF::SHT_SYMTAB_SHNDX, 0, 4, "");
     SymtabShndxSectionIndex = addToSectionTable(SymtabShndxSection);
     SymtabShndxSection->setAlignment(4);
   }
@@ -882,12 +890,16 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
     return;
   }
 
-  if (ZlibStyle)
+  if (ZlibStyle) {
     // Set the compressed flag. That is zlib style.
     Section.setFlags(Section.getFlags() | ELF::SHF_COMPRESSED);
-  else
+    // Alignment field should reflect the requirements of
+    // the compressed section header.
+    Section.setAlignment(is64Bit() ? 8 : 4);
+  } else {
     // Add "z" prefix to section name. This is zlib-gnu style.
     MC.renameELFSection(&Section, (".z" + SectionName.drop_front(1)).str());
+  }
   W.OS << CompressedContents;
 }
 
@@ -1271,18 +1283,25 @@ void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
     // This is the first place we are able to copy this information.
     Alias->setExternal(Symbol.isExternal());
     Alias->setBinding(Symbol.getBinding());
+    Alias->setOther(Symbol.getOther());
 
     if (!Symbol.isUndefined() && !Rest.startswith("@@@"))
       continue;
 
-    // FIXME: produce a better error message.
+    // FIXME: Get source locations for these errors or diagnose them earlier.
     if (Symbol.isUndefined() && Rest.startswith("@@") &&
-        !Rest.startswith("@@@"))
-      report_fatal_error("A @@ version cannot be undefined");
+        !Rest.startswith("@@@")) {
+      Asm.getContext().reportError(SMLoc(), "versioned symbol " + AliasName +
+                                                " must be defined");
+      continue;
+    }
 
-    if (Renames.count(&Symbol) && Renames[&Symbol] != Alias)
-      report_fatal_error(llvm::Twine("Multiple symbol versions defined for ") +
-                         Symbol.getName());
+    if (Renames.count(&Symbol) && Renames[&Symbol] != Alias) {
+      Asm.getContext().reportError(
+          SMLoc(), llvm::Twine("multiple symbol versions defined for ") +
+                       Symbol.getName());
+      continue;
+    }
 
     Renames.insert(std::make_pair(&Symbol, Alias));
   }
@@ -1358,6 +1377,12 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
     return true;
   }
 
+  // Keep symbol type for a local ifunc because it may result in an IRELATIVE
+  // reloc that the dynamic loader will use to resolve the address at startup
+  // time.
+  if (Sym->getType() == ELF::STT_GNU_IFUNC)
+    return true;
+
   // If a relocation points to a mergeable section, we have to be careful.
   // If the offset is zero, a relocation with the section will encode the
   // same information. With a non-zero offset, the situation is different.
diff --git a/lib/MC/MCAsmBackend.cpp b/lib/MC/MCAsmBackend.cpp
index 92d3a8a2645f..9b1102cbe7d1 100644
--- a/lib/MC/MCAsmBackend.cpp
+++ b/lib/MC/MCAsmBackend.cpp
@@ -1,9 +1,8 @@
 //===- MCAsmBackend.cpp - Target MC Assembly Backend ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -17,6 +16,7 @@
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCWasmObjectWriter.h"
 #include "llvm/MC/MCWinCOFFObjectWriter.h"
+#include "llvm/MC/MCXCOFFObjectWriter.h"
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
@@ -44,6 +44,9 @@ MCAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
   case Triple::Wasm:
     return createWasmObjectWriter(cast<MCWasmObjectTargetWriter>(std::move(TW)),
                                   OS);
+  case Triple::XCOFF:
+    return createXCOFFObjectWriter(
+        cast<MCXCOFFObjectTargetWriter>(std::move(TW)), OS);
   default:
     llvm_unreachable("unexpected object format");
   }
@@ -65,6 +68,7 @@ Optional<MCFixupKind> MCAsmBackend::getFixupKind(StringRef Name) const {
 
 const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
   static const MCFixupKindInfo Builtins[] = {
+      {"FK_NONE", 0, 0, 0},
       {"FK_Data_1", 0, 8, 0},
       {"FK_Data_2", 0, 16, 0},
       {"FK_Data_4", 0, 32, 0},
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 30f22d2d68f4..71e51e320f8b 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -1,9 +1,8 @@
 //===- MCAsmInfo.cpp - Asm Info -------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -71,6 +70,10 @@ MCAsmInfo::MCAsmInfo() {
 
 MCAsmInfo::~MCAsmInfo() = default;
 
+void MCAsmInfo::addInitialFrameState(const MCCFIInstruction &Inst) {
+  InitialFrameState.push_back(Inst);
+}
+
 bool MCAsmInfo::isSectionAtomizableBySymbols(const MCSection &Section) const {
   return false;
 }
diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp
index 15886eb619b9..9f19d163f57b 100644
--- a/lib/MC/MCAsmInfoCOFF.cpp
+++ b/lib/MC/MCAsmInfoCOFF.cpp
@@ -1,9 +1,8 @@
 //===- MCAsmInfoCOFF.cpp - COFF asm properties ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index c74840982fb7..62bc5b8c9418 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -1,9 +1,8 @@
 //===- MCAsmInfoDarwin.cpp - Darwin asm properties ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/MC/MCAsmInfoELF.cpp b/lib/MC/MCAsmInfoELF.cpp
index b0dc43c6c868..a5e8aff7f129 100644
--- a/lib/MC/MCAsmInfoELF.cpp
+++ b/lib/MC/MCAsmInfoELF.cpp
@@ -1,9 +1,8 @@
 //===- MCAsmInfoELF.cpp - ELF asm properties ------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/MC/MCAsmInfoWasm.cpp b/lib/MC/MCAsmInfoWasm.cpp
index d448664baa14..ce6ec7ef211e 100644
--- a/lib/MC/MCAsmInfoWasm.cpp
+++ b/lib/MC/MCAsmInfoWasm.cpp
@@ -1,9 +1,8 @@
 //===-- MCAsmInfoWasm.cpp - Wasm asm properties -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,6 +18,7 @@ void MCAsmInfoWasm::anchor() {}
 
 MCAsmInfoWasm::MCAsmInfoWasm() {
   HasIdentDirective = true;
+  HasNoDeadStrip = true;
   WeakRefDirective = "\t.weak\t";
   PrivateGlobalPrefix = ".L";
   PrivateLabelPrefix = ".L";
diff --git a/lib/MC/MCAsmInfoXCOFF.cpp b/lib/MC/MCAsmInfoXCOFF.cpp
new file mode 100644
index 000000000000..74c21f0c9e6d
--- /dev/null
+++ b/lib/MC/MCAsmInfoXCOFF.cpp
@@ -0,0 +1,18 @@
+//===- MC/MCAsmInfoXCOFF.cpp - XCOFF asm properties ------------ *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfoXCOFF.h"
+
+using namespace llvm;
+
+void MCAsmInfoXCOFF::anchor() {}
+
+MCAsmInfoXCOFF::MCAsmInfoXCOFF() {
+  IsLittleEndian = false;
+  HasDotTypeDotSizeDirective = false;
+}
diff --git a/lib/MC/MCAsmMacro.cpp b/lib/MC/MCAsmMacro.cpp
index 7e89c03c6c6b..ba4fb7d4f387 100644
--- a/lib/MC/MCAsmMacro.cpp
+++ b/lib/MC/MCAsmMacro.cpp
@@ -1,9 +1,8 @@
 //===- MCAsmMacro.h - Assembly Macros ---------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index e017103070bf..7a2b0b8a1220 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCAsmStreamer.cpp - Text Assembly Output ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -108,10 +107,7 @@ public:
   void AddComment(const Twine &T, bool EOL = true) override;
 
   /// Add a comment showing the encoding of an instruction.
-  /// If PrintSchedInfo is true, then the comment sched:[x:y] will be added to
-  /// the output if supported by the target.
-  void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &,
-                          bool PrintSchedInfo);
+  void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &);
 
   /// Return a raw_ostream that comments can be written to.
   /// Unlike AddComment, you are required to terminate comments with \n if you
@@ -192,6 +188,7 @@ public:
   void EmitValueImpl(const MCExpr *Value, unsigned Size,
                      SMLoc Loc = SMLoc()) override;
   void EmitIntValue(uint64_t Value, unsigned Size) override;
+  void EmitIntValueInHex(uint64_t Value, unsigned Size) override;
 
   void EmitULEB128Value(const MCExpr *Value) override;
 
@@ -227,11 +224,11 @@ public:
   Expected<unsigned> tryEmitDwarfFileDirective(unsigned FileNo,
                                                StringRef Directory,
                                                StringRef Filename,
-                                               MD5::MD5Result *Checksum = 0,
+                                               Optional<MD5::MD5Result> Checksum = None,
                                                Optional<StringRef> Source = None,
                                                unsigned CUID = 0) override;
   void emitDwarfFile0Directive(StringRef Directory, StringRef Filename,
-                               MD5::MD5Result *Checksum,
+                               Optional<MD5::MD5Result> Checksum,
                                Optional<StringRef> Source,
                                unsigned CUID = 0) override;
   void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
@@ -312,8 +309,7 @@ public:
   void emitCGProfileEntry(const MCSymbolRefExpr *From,
                           const MCSymbolRefExpr *To, uint64_t Count) override;
 
-  void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
-                       bool PrintSchedInfo) override;
+  void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
 
   void EmitBundleAlignMode(unsigned AlignPow2) override;
   void EmitBundleLock(bool AlignToEnd) override;
@@ -546,6 +542,7 @@ static const char *getPlatformName(MachO::PlatformType Type) {
   case MachO::PLATFORM_TVOS:             return "tvos";
   case MachO::PLATFORM_WATCHOS:          return "watchos";
   case MachO::PLATFORM_BRIDGEOS:         return "bridgeos";
+  case MachO::PLATFORM_MACCATALYST:      return "macCatalyst";
   case MachO::PLATFORM_IOSSIMULATOR:     return "iossimulator";
   case MachO::PLATFORM_TVOSSIMULATOR:    return "tvossimulator";
   case MachO::PLATFORM_WATCHOSSIMULATOR: return "watchossimulator";
@@ -657,6 +654,9 @@ bool MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
       // .weak_reference
   case MCSA_WeakReference:  OS << MAI->getWeakRefDirective(); break;
   case MCSA_WeakDefAutoPrivate: OS << "\t.weak_def_can_be_hidden\t"; break;
+  case MCSA_Cold:
+    // Assemblers currently do not support a .cold directive.
+    return false;
   }
 
   Symbol->print(OS, MAI);
@@ -924,6 +924,10 @@ void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size) {
   EmitValue(MCConstantExpr::create(Value, getContext()), Size);
 }
 
+void MCAsmStreamer::EmitIntValueInHex(uint64_t Value, unsigned Size) {
+  EmitValue(MCConstantExpr::create(Value, getContext(), true), Size);
+}
+
 void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
                                   SMLoc Loc) {
   assert(Size <= 8 && "Invalid size");
@@ -1153,7 +1157,7 @@ void MCAsmStreamer::EmitFileDirective(StringRef Filename) {
 
 static void printDwarfFileDirective(unsigned FileNo, StringRef Directory,
                                     StringRef Filename,
-                                    MD5::MD5Result *Checksum,
+                                    Optional<MD5::MD5Result> Checksum,
                                     Optional<StringRef> Source,
                                     bool UseDwarfDirectory,
                                     raw_svector_ostream &OS) {
@@ -1186,13 +1190,14 @@ static void printDwarfFileDirective(unsigned FileNo, StringRef Directory,
 
 Expected<unsigned> MCAsmStreamer::tryEmitDwarfFileDirective(
     unsigned FileNo, StringRef Directory, StringRef Filename,
-    MD5::MD5Result *Checksum, Optional<StringRef> Source, unsigned CUID) {
+    Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source, unsigned CUID) {
   assert(CUID == 0 && "multiple CUs not supported by MCAsmStreamer");
 
   MCDwarfLineTable &Table = getContext().getMCDwarfLineTable(CUID);
   unsigned NumFiles = Table.getMCDwarfFiles().size();
   Expected<unsigned> FileNoOrErr =
-      Table.tryGetFile(Directory, Filename, Checksum, Source, FileNo);
+      Table.tryGetFile(Directory, Filename, Checksum, Source,
+                       getContext().getDwarfVersion(), FileNo);
   if (!FileNoOrErr)
     return FileNoOrErr.takeError();
   FileNo = FileNoOrErr.get();
@@ -1214,7 +1219,7 @@ Expected<unsigned> MCAsmStreamer::tryEmitDwarfFileDirective(
 
 void MCAsmStreamer::emitDwarfFile0Directive(StringRef Directory,
                                             StringRef Filename,
-                                            MD5::MD5Result *Checksum,
+                                            Optional<MD5::MD5Result> Checksum,
                                             Optional<StringRef> Source,
                                             unsigned CUID) {
   assert(CUID == 0);
@@ -1737,8 +1742,7 @@ void MCAsmStreamer::emitCGProfileEntry(const MCSymbolRefExpr *From,
 }
 
 void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
-                                       const MCSubtargetInfo &STI,
-                                       bool PrintSchedInfo) {
+                                       const MCSubtargetInfo &STI) {
   raw_ostream &OS = GetCommentOS();
   SmallString<256> Code;
   SmallVector<MCFixup, 4> Fixups;
@@ -1817,11 +1821,7 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
       }
     }
   }
-  OS << "]";
-  // If we are not going to add fixup or schedule comments after this point
-  // then we have to end the current comment line with "\n".
-  if (Fixups.size() || !PrintSchedInfo)
-    OS << "\n";
+  OS << "]\n";
 
   for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
     MCFixup &F = Fixups[i];
@@ -1833,18 +1833,15 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
 }
 
 void MCAsmStreamer::EmitInstruction(const MCInst &Inst,
-                                    const MCSubtargetInfo &STI,
-                                    bool PrintSchedInfo) {
+                                    const MCSubtargetInfo &STI) {
   assert(getCurrentSectionOnly() &&
          "Cannot emit contents before setting section!");
 
   // Show the encoding in a comment if we have a code emitter.
-  AddEncodingComment(Inst, STI, PrintSchedInfo);
+  AddEncodingComment(Inst, STI);
 
   // Show the MCInst if enabled.
   if (ShowInst) {
-    if (PrintSchedInfo)
-      GetCommentOS() << "\n";
     Inst.dump_pretty(GetCommentOS(), InstPrinter.get(), "\n ");
     GetCommentOS() << "\n";
   }
@@ -1854,12 +1851,6 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst,
   else
     InstPrinter->printInst(&Inst, OS, "", STI);
 
-  if (PrintSchedInfo) {
-    std::string SI = STI.getSchedInfoStr(Inst);
-    if (!SI.empty())
-      GetCommentOS() << SI;
-  }
-
   StringRef Comments = CommentToEmit;
   if (Comments.size() && Comments.back() != '\n')
     GetCommentOS() << "\n";
@@ -1927,7 +1918,7 @@ void MCAsmStreamer::FinishImpl() {
   // Emit the label for the line table, if requested - since the rest of the
   // line table will be defined by .loc/.file directives, and not emitted
   // directly, the label is the only work required here.
-  auto &Tables = getContext().getMCDwarfLineTables();
+  const auto &Tables = getContext().getMCDwarfLineTables();
   if (!Tables.empty()) {
     assert(Tables.size() == 1 && "asm output only supports one line table");
     if (auto *Label = Tables.begin()->second.getLabel()) {
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index cde6a93a1647..c4f4d4c2870e 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCAssembler.cpp - Assembler Backend Implementation ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -323,6 +322,13 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
     const MCAlignFragment &AF = cast<MCAlignFragment>(F);
     unsigned Offset = Layout.getFragmentOffset(&AF);
     unsigned Size = OffsetToAlignment(Offset, AF.getAlignment());
+
+    // Insert extra Nops for code alignment if the target define
+    // shouldInsertExtraNopBytesForCodeAlign target hook.
+    if (AF.getParent()->UseCodeAlign() && AF.hasEmitNops() &&
+        getBackend().shouldInsertExtraNopBytesForCodeAlign(AF, Size))
+      return Size;
+
     // If we are padding with nops, force the padding to be larger than the
     // minimum nop size.
     if (Size > 0 && AF.hasEmitNops()) {
@@ -805,7 +811,8 @@ void MCAssembler::layout(MCAsmLayout &Layout) {
       if (isa<MCEncodedFragment>(&Frag) &&
           isa<MCCompactEncodedInstFragment>(&Frag))
         continue;
-      if (!isa<MCEncodedFragment>(&Frag) && !isa<MCCVDefRangeFragment>(&Frag))
+      if (!isa<MCEncodedFragment>(&Frag) && !isa<MCCVDefRangeFragment>(&Frag) &&
+          !isa<MCAlignFragment>(&Frag))
         continue;
       ArrayRef<MCFixup> Fixups;
       MutableArrayRef<char> Contents;
@@ -826,6 +833,13 @@ void MCAssembler::layout(MCAsmLayout &Layout) {
       } else if (auto *FragWithFixups = dyn_cast<MCDwarfLineAddrFragment>(&Frag)) {
         Fixups = FragWithFixups->getFixups();
         Contents = FragWithFixups->getContents();
+      } else if (auto *AF = dyn_cast<MCAlignFragment>(&Frag)) {
+        // Insert fixup type for code alignment if the target define
+        // shouldInsertFixupForCodeAlign target hook.
+        if (Sec.UseCodeAlign() && AF->hasEmitNops()) {
+          getBackend().shouldInsertFixupForCodeAlign(*this, Layout, *AF);
+        }
+        continue;
       } else
         llvm_unreachable("Unknown fragment with fixups!");
       for (const MCFixup &Fixup : Fixups) {
diff --git a/lib/MC/MCCodeEmitter.cpp b/lib/MC/MCCodeEmitter.cpp
index ca69478ed10d..0d114f12d58c 100644
--- a/lib/MC/MCCodeEmitter.cpp
+++ b/lib/MC/MCCodeEmitter.cpp
@@ -1,9 +1,8 @@
 //===- MCCodeEmitter.cpp - Instruction Encoding ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCCodePadder.cpp b/lib/MC/MCCodePadder.cpp
index 57547814e595..27a62f95a529 100644
--- a/lib/MC/MCCodePadder.cpp
+++ b/lib/MC/MCCodePadder.cpp
@@ -1,9 +1,8 @@
 //===- MCCodePadder.cpp - Target MC Code Padder ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCCodeView.cpp b/lib/MC/MCCodeView.cpp
index 978ac789c31e..1a71b542bd06 100644
--- a/lib/MC/MCCodeView.cpp
+++ b/lib/MC/MCCodeView.cpp
@@ -1,9 +1,8 @@
 //===- MCCodeView.h - Machine Code CodeView support -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index fab517075c5a..0dc2e2d37caf 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCContext.cpp - Machine Code Context ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -27,17 +26,20 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSectionWasm.h"
+#include "llvm/MC/MCSectionXCOFF.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCSymbolCOFF.h"
 #include "llvm/MC/MCSymbolELF.h"
 #include "llvm/MC/MCSymbolMachO.h"
 #include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/MC/MCSymbolXCOFF.h"
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
@@ -85,6 +87,7 @@ void MCContext::reset() {
   COFFAllocator.DestroyAll();
   ELFAllocator.DestroyAll();
   MachOAllocator.DestroyAll();
+  XCOFFAllocator.DestroyAll();
 
   MCSubtargetAllocator.DestroyAll();
   UsedNames.clear();
@@ -106,6 +109,7 @@ void MCContext::reset() {
   ELFUniquingMap.clear();
   COFFUniquingMap.clear();
   WasmUniquingMap.clear();
+  XCOFFUniquingMap.clear();
 
   NextID.clear();
   AllowTemporaryLabels = true;
@@ -161,6 +165,8 @@ MCSymbol *MCContext::createSymbolImpl(const StringMapEntry<bool> *Name,
       return new (Name, *this) MCSymbolMachO(Name, IsTemporary);
     case MCObjectFileInfo::IsWasm:
       return new (Name, *this) MCSymbolWasm(Name, IsTemporary);
+    case MCObjectFileInfo::IsXCOFF:
+      return new (Name, *this) MCSymbolXCOFF(Name, IsTemporary);
     }
   }
   return new (Name, *this) MCSymbol(MCSymbol::SymbolKindUnset, Name,
@@ -459,14 +465,6 @@ MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
                         BeginSymName);
 }
 
-MCSectionCOFF *MCContext::getCOFFSection(StringRef Section) {
-  COFFSectionKey T{Section, "", 0, GenericSectionID};
-  auto Iter = COFFUniquingMap.find(T);
-  if (Iter == COFFUniquingMap.end())
-    return nullptr;
-  return Iter->second;
-}
-
 MCSectionCOFF *MCContext::getAssociativeCOFFSection(MCSectionCOFF *Sec,
                                                     const MCSymbol *KeySym,
                                                     unsigned UniqueID) {
@@ -531,6 +529,38 @@ MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind Kind,
   return Result;
 }
 
+MCSectionXCOFF *MCContext::getXCOFFSection(StringRef Section,
+                                           XCOFF::StorageMappingClass SMC,
+                                           SectionKind Kind,
+                                           const char *BeginSymName) {
+  // Do the lookup. If we have a hit, return it.
+  auto IterBool = XCOFFUniquingMap.insert(
+      std::make_pair(XCOFFSectionKey{Section.str(), SMC}, nullptr));
+  auto &Entry = *IterBool.first;
+  if (!IterBool.second)
+    return Entry.second;
+
+  // Otherwise, return a new section.
+  StringRef CachedName = Entry.first.SectionName;
+
+  MCSymbol *Begin = nullptr;
+  if (BeginSymName)
+    Begin = createTempSymbol(BeginSymName, false);
+
+  MCSectionXCOFF *Result = new (XCOFFAllocator.Allocate())
+      MCSectionXCOFF(CachedName, SMC, Kind, Begin);
+  Entry.second = Result;
+
+  auto *F = new MCDataFragment();
+  Result->getFragmentList().insert(Result->begin(), F);
+  F->setParent(Result);
+
+  if (Begin)
+    Begin->setFragment(F);
+
+  return Result;
+}
+
 MCSubtargetInfo &MCContext::getSubtargetCopy(const MCSubtargetInfo &STI) {
   return *new (MCSubtargetAllocator.Allocate()) MCSubtargetInfo(STI);
 }
@@ -566,6 +596,42 @@ void MCContext::RemapDebugPaths() {
 // Dwarf Management
 //===----------------------------------------------------------------------===//
 
+void MCContext::setGenDwarfRootFile(StringRef InputFileName, StringRef Buffer) {
+  // MCDwarf needs the root file as well as the compilation directory.
+  // If we find a '.file 0' directive that will supersede these values.
+  Optional<MD5::MD5Result> Cksum;
+  if (getDwarfVersion() >= 5) {
+    MD5 Hash;
+    MD5::MD5Result Sum;
+    Hash.update(Buffer);
+    Hash.final(Sum);
+    Cksum = Sum;
+  }
+  // Canonicalize the root filename. It cannot be empty, and should not
+  // repeat the compilation dir.
+  // The MCContext ctor initializes MainFileName to the name associated with
+  // the SrcMgr's main file ID, which might be the same as InputFileName (and
+  // possibly include directory components).
+  // Or, MainFileName might have been overridden by a -main-file-name option,
+  // which is supposed to be just a base filename with no directory component.
+  // So, if the InputFileName and MainFileName are not equal, assume
+  // MainFileName is a substitute basename and replace the last component.
+  SmallString<1024> FileNameBuf = InputFileName;
+  if (FileNameBuf.empty() || FileNameBuf == "-")
+    FileNameBuf = "<stdin>";
+  if (!getMainFileName().empty() && FileNameBuf != getMainFileName()) {
+    llvm::sys::path::remove_filename(FileNameBuf);
+    llvm::sys::path::append(FileNameBuf, getMainFileName());
+  }
+  StringRef FileName = FileNameBuf;
+  if (FileName.consume_front(getCompilationDir()))
+    if (llvm::sys::path::is_separator(FileName.front()))
+      FileName = FileName.drop_front();
+  assert(!FileName.empty());
+  setMCLineTableRootFile(
+      /*CUID=*/0, getCompilationDir(), FileName, Cksum, None);
+}
+
 /// getDwarfFile - takes a file name and number to place in the dwarf file and
 /// directory tables.  If the file number has already been allocated it is an
 /// error and zero is returned and the client reports the error, else the
@@ -573,11 +639,12 @@ void MCContext::RemapDebugPaths() {
 Expected<unsigned> MCContext::getDwarfFile(StringRef Directory,
                                            StringRef FileName,
                                            unsigned FileNumber,
-                                           MD5::MD5Result *Checksum,
+                                           Optional<MD5::MD5Result> Checksum,
                                            Optional<StringRef> Source,
                                            unsigned CUID) {
   MCDwarfLineTable &Table = MCDwarfLineTablesCUMap[CUID];
-  return Table.tryGetFile(Directory, FileName, Checksum, Source, FileNumber);
+  return Table.tryGetFile(Directory, FileName, Checksum, Source, DwarfVersion,
+                          FileNumber);
 }
 
 /// isValidDwarfFileNumber - takes a dwarf file number and returns true if it
@@ -585,7 +652,7 @@ Expected<unsigned> MCContext::getDwarfFile(StringRef Directory,
 bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) {
   const MCDwarfLineTable &LineTable = getMCDwarfLineTable(CUID);
   if (FileNumber == 0)
-    return getDwarfVersion() >= 5 && LineTable.hasRootFile();
+    return getDwarfVersion() >= 5;
   if (FileNumber >= LineTable.getMCDwarfFiles().size())
     return false;
 
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index ad0a39991c53..21bdc2eaea3e 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -1,9 +1,8 @@
 //===-- lib/MC/Disassembler.cpp - Disassembler Public C Interface ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -53,31 +52,32 @@ LLVMCreateDisasmCPUFeatures(const char *TT, const char *CPU,
   if (!TheTarget)
     return nullptr;
 
-  const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(TT);
+  std::unique_ptr<const MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TT));
   if (!MRI)
     return nullptr;
 
   // Get the assembler info needed to setup the MCContext.
-  const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(*MRI, TT);
+  std::unique_ptr<const MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TT));
   if (!MAI)
     return nullptr;
 
-  const MCInstrInfo *MII = TheTarget->createMCInstrInfo();
+  std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
   if (!MII)
     return nullptr;
 
-  const MCSubtargetInfo *STI =
-      TheTarget->createMCSubtargetInfo(TT, CPU, Features);
+  std::unique_ptr<const MCSubtargetInfo> STI(
+      TheTarget->createMCSubtargetInfo(TT, CPU, Features));
   if (!STI)
     return nullptr;
 
   // Set up the MCContext for creating symbols and MCExpr's.
-  MCContext *Ctx = new MCContext(MAI, MRI, nullptr);
+  std::unique_ptr<MCContext> Ctx(new MCContext(MAI.get(), MRI.get(), nullptr));
   if (!Ctx)
     return nullptr;
 
   // Set up disassembler.
-  MCDisassembler *DisAsm = TheTarget->createMCDisassembler(*STI, *Ctx);
+  std::unique_ptr<MCDisassembler> DisAsm(
+      TheTarget->createMCDisassembler(*STI, *Ctx));
   if (!DisAsm)
     return nullptr;
 
@@ -87,19 +87,20 @@ LLVMCreateDisasmCPUFeatures(const char *TT, const char *CPU,
     return nullptr;
 
   std::unique_ptr<MCSymbolizer> Symbolizer(TheTarget->createMCSymbolizer(
-      TT, GetOpInfo, SymbolLookUp, DisInfo, Ctx, std::move(RelInfo)));
+      TT, GetOpInfo, SymbolLookUp, DisInfo, Ctx.get(), std::move(RelInfo)));
   DisAsm->setSymbolizer(std::move(Symbolizer));
 
   // Set up the instruction printer.
   int AsmPrinterVariant = MAI->getAssemblerDialect();
-  MCInstPrinter *IP = TheTarget->createMCInstPrinter(
-      Triple(TT), AsmPrinterVariant, *MAI, *MII, *MRI);
+  std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
+      Triple(TT), AsmPrinterVariant, *MAI, *MII, *MRI));
   if (!IP)
     return nullptr;
 
-  LLVMDisasmContext *DC =
-      new LLVMDisasmContext(TT, DisInfo, TagType, GetOpInfo, SymbolLookUp,
-                            TheTarget, MAI, MRI, STI, MII, Ctx, DisAsm, IP);
+  LLVMDisasmContext *DC = new LLVMDisasmContext(
+      TT, DisInfo, TagType, GetOpInfo, SymbolLookUp, TheTarget, std::move(MAI),
+      std::move(MRI), std::move(STI), std::move(MII), std::move(Ctx),
+      std::move(DisAsm), std::move(IP));
   if (!DC)
     return nullptr;
 
diff --git a/lib/MC/MCDisassembler/Disassembler.h b/lib/MC/MCDisassembler/Disassembler.h
index f638fdc781d7..e5aab53a7613 100644
--- a/lib/MC/MCDisassembler/Disassembler.h
+++ b/lib/MC/MCDisassembler/Disassembler.h
@@ -1,9 +1,8 @@
 //===------------- Disassembler.h - LLVM Disassembler -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -83,24 +82,22 @@ public:
   SmallString<128> CommentsToEmit;
   raw_svector_ostream CommentStream;
 
-  LLVMDisasmContext(std::string tripleName, void *disInfo, int tagType,
-                    LLVMOpInfoCallback getOpInfo,
-                    LLVMSymbolLookupCallback symbolLookUp,
-                    const Target *theTarget, const MCAsmInfo *mAI,
-                    const MCRegisterInfo *mRI, const MCSubtargetInfo *mSI,
-                    const MCInstrInfo *mII, llvm::MCContext *ctx,
-                    const MCDisassembler *disAsm, MCInstPrinter *iP)
-      : TripleName(std::move(tripleName)), DisInfo(disInfo), TagType(tagType),
-        GetOpInfo(getOpInfo), SymbolLookUp(symbolLookUp), TheTarget(theTarget),
-        Options(0), CommentStream(CommentsToEmit) {
-    MAI.reset(mAI);
-    MRI.reset(mRI);
-    MSI.reset(mSI);
-    MII.reset(mII);
-    Ctx.reset(ctx);
-    DisAsm.reset(disAsm);
-    IP.reset(iP);
-  }
+  LLVMDisasmContext(std::string TripleName, void *DisInfo, int TagType,
+                    LLVMOpInfoCallback GetOpInfo,
+                    LLVMSymbolLookupCallback SymbolLookUp,
+                    const Target *TheTarget,
+                    std::unique_ptr<const MCAsmInfo> &&MAI,
+                    std::unique_ptr<const MCRegisterInfo> &&MRI,
+                    std::unique_ptr<const MCSubtargetInfo> &&MSI,
+                    std::unique_ptr<const MCInstrInfo> &&MII,
+                    std::unique_ptr<const llvm::MCContext> &&Ctx,
+                    std::unique_ptr<const MCDisassembler> &&DisAsm,
+                    std::unique_ptr<MCInstPrinter> &&IP)
+      : TripleName(std::move(TripleName)), DisInfo(DisInfo), TagType(TagType),
+        GetOpInfo(GetOpInfo), SymbolLookUp(SymbolLookUp), TheTarget(TheTarget),
+        MAI(std::move(MAI)), MRI(std::move(MRI)), MSI(std::move(MSI)),
+        MII(std::move(MII)), Ctx(std::move(Ctx)), DisAsm(std::move(DisAsm)),
+        IP(std::move(IP)), Options(0), CommentStream(CommentsToEmit) {}
   const std::string &getTripleName() const { return TripleName; }
   void *getDisInfo() const { return DisInfo; }
   int getTagType() const { return TagType; }
diff --git a/lib/MC/MCDisassembler/MCDisassembler.cpp b/lib/MC/MCDisassembler/MCDisassembler.cpp
index 2f1275d00b86..063f7e706024 100644
--- a/lib/MC/MCDisassembler/MCDisassembler.cpp
+++ b/lib/MC/MCDisassembler/MCDisassembler.cpp
@@ -1,13 +1,14 @@
 //===- MCDisassembler.cpp - Disassembler interface ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 
@@ -15,6 +16,13 @@ using namespace llvm;
 
 MCDisassembler::~MCDisassembler() = default;
 
+MCDisassembler::DecodeStatus MCDisassembler::onSymbolStart(
+    StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
+    raw_ostream &VStream, raw_ostream &CStream) const {
+  Size = 0;
+  return MCDisassembler::Success;
+}
+
 bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,
                                               uint64_t Address, bool IsBranch,
                                               uint64_t Offset,
diff --git a/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp b/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
index 1969c5dc66ab..7befef86303c 100644
--- a/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
+++ b/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
@@ -1,9 +1,8 @@
 //===-- MCExternalSymbolizer.cpp - External symbolizer --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCDisassembler/MCRelocationInfo.cpp b/lib/MC/MCDisassembler/MCRelocationInfo.cpp
index 8f932a3f0d48..64e216e0051d 100644
--- a/lib/MC/MCDisassembler/MCRelocationInfo.cpp
+++ b/lib/MC/MCDisassembler/MCRelocationInfo.cpp
@@ -1,9 +1,8 @@
 //===-- MCRelocationInfo.cpp ----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCDisassembler/MCSymbolizer.cpp b/lib/MC/MCDisassembler/MCSymbolizer.cpp
index 78e611e3ddda..8214a196afb1 100644
--- a/lib/MC/MCDisassembler/MCSymbolizer.cpp
+++ b/lib/MC/MCDisassembler/MCSymbolizer.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/MC/MCSymbolizer.cpp - MCSymbolizer class ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 38b02694d81d..aae6fdf90931 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCDwarf.cpp - MCDwarf implementation ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -260,7 +259,7 @@ void MCDwarfLineTable::Emit(MCObjectStreamer *MCOS,
 
 void MCDwarfDwoLineTable::Emit(MCStreamer &MCOS, MCDwarfLineTableParams Params,
                                MCSection *Section) const {
-  if (Header.MCDwarfFiles.empty())
+  if (!HasSplitLineTable)
     return;
   Optional<MCDwarfLineStr> NoLineStr(None);
   MCOS.SwitchSection(Section);
@@ -362,10 +361,10 @@ static void emitOneV5FileEntry(MCStreamer *MCOS, const MCDwarfFile &DwarfFile,
   }
   MCOS->EmitULEB128IntValue(DwarfFile.DirIndex); // Directory number.
   if (EmitMD5) {
-    MD5::MD5Result *Cksum = DwarfFile.Checksum;
+    const MD5::MD5Result &Cksum = *DwarfFile.Checksum;
     MCOS->EmitBinaryData(
-        StringRef(reinterpret_cast<const char *>(Cksum->Bytes.data()),
-                  Cksum->Bytes.size()));
+        StringRef(reinterpret_cast<const char *>(Cksum.Bytes.data()),
+                  Cksum.Bytes.size()));
   }
   if (HasSource) {
     if (LineStr)
@@ -379,8 +378,7 @@ static void emitOneV5FileEntry(MCStreamer *MCOS, const MCDwarfFile &DwarfFile,
 }
 
 void MCDwarfLineTableHeader::emitV5FileDirTables(
-    MCStreamer *MCOS, Optional<MCDwarfLineStr> &LineStr,
-    StringRef CtxCompilationDir) const {
+    MCStreamer *MCOS, Optional<MCDwarfLineStr> &LineStr) const {
   // The directory format, which is just a list of the directory paths.  In a
   // non-split object, these are references to .debug_line_str; in a split
   // object, they are inline strings.
@@ -390,8 +388,9 @@ void MCDwarfLineTableHeader::emitV5FileDirTables(
                                     : dwarf::DW_FORM_string);
   MCOS->EmitULEB128IntValue(MCDwarfDirs.size() + 1);
   // Try not to emit an empty compilation directory.
-  const StringRef CompDir =
-      CompilationDir.empty() ? CtxCompilationDir : StringRef(CompilationDir);
+  const StringRef CompDir = CompilationDir.empty()
+                                ? MCOS->getContext().getCompilationDir()
+                                : StringRef(CompilationDir);
   if (LineStr) {
     // Record path strings, emit references here.
     LineStr->emitRef(MCOS, CompDir);
@@ -431,10 +430,14 @@ void MCDwarfLineTableHeader::emitV5FileDirTables(
                                       : dwarf::DW_FORM_string);
   }
   // Then the counted list of files. The root file is file #0, then emit the
-  // files as provide by .file directives.  To accommodate assembler source
-  // written for DWARF v4 but trying to emit v5, if we didn't see a root file
-  // explicitly, replicate file #1.
-  MCOS->EmitULEB128IntValue(MCDwarfFiles.size());
+  // files as provide by .file directives.
+  // MCDwarfFiles has an unused element [0] so use size() not size()+1.
+  // But sometimes MCDwarfFiles is empty, in which case we still emit one file.
+  MCOS->EmitULEB128IntValue(MCDwarfFiles.empty() ? 1 : MCDwarfFiles.size());
+  // To accommodate assembler source written for DWARF v4 but trying to emit
+  // v5: If we didn't see a root file explicitly, replicate file #1.
+  assert((!RootFile.Name.empty() || MCDwarfFiles.size() >= 1) &&
+         "No root file and no .file directives");
   emitOneV5FileEntry(MCOS, RootFile.Name.empty() ? MCDwarfFiles[1] : RootFile,
                      HasAllMD5, HasSource, LineStr);
   for (unsigned i = 1; i < MCDwarfFiles.size(); ++i)
@@ -506,7 +509,7 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
   // Put out the directory and file tables.  The formats vary depending on
   // the version.
   if (LineTableVersion >= 5)
-    emitV5FileDirTables(MCOS, LineStr, context.getCompilationDir());
+    emitV5FileDirTables(MCOS, LineStr);
   else
     emitV2FileDirTables(MCOS);
 
@@ -533,17 +536,27 @@ void MCDwarfLineTable::EmitCU(MCObjectStreamer *MCOS,
 
 Expected<unsigned> MCDwarfLineTable::tryGetFile(StringRef &Directory,
                                                 StringRef &FileName,
-                                                MD5::MD5Result *Checksum,
+                                                Optional<MD5::MD5Result> Checksum,
                                                 Optional<StringRef> Source,
+                                                uint16_t DwarfVersion,
                                                 unsigned FileNumber) {
-  return Header.tryGetFile(Directory, FileName, Checksum, Source, FileNumber);
+  return Header.tryGetFile(Directory, FileName, Checksum, Source, DwarfVersion,
+                           FileNumber);
+}
+
+bool isRootFile(const MCDwarfFile &RootFile, StringRef &Directory,
+                StringRef &FileName, Optional<MD5::MD5Result> Checksum) {
+  if (RootFile.Name.empty() || RootFile.Name != FileName.data())
+    return false;
+  return RootFile.Checksum == Checksum;
 }
 
 Expected<unsigned>
 MCDwarfLineTableHeader::tryGetFile(StringRef &Directory,
                                    StringRef &FileName,
-                                   MD5::MD5Result *Checksum,
-                                   Optional<StringRef> &Source,
+                                   Optional<MD5::MD5Result> Checksum,
+                                   Optional<StringRef> Source,
+                                   uint16_t DwarfVersion,
                                    unsigned FileNumber) {
   if (Directory == CompilationDir)
     Directory = "";
@@ -555,9 +568,11 @@ MCDwarfLineTableHeader::tryGetFile(StringRef &Directory,
   // Keep track of whether any or all files have an MD5 checksum.
   // If any files have embedded source, they all must.
   if (MCDwarfFiles.empty()) {
-    trackMD5Usage(Checksum);
+    trackMD5Usage(Checksum.hasValue());
     HasSource = (Source != None);
   }
+  if (isRootFile(RootFile, Directory, FileName, Checksum) && DwarfVersion >= 5)
+    return 0;
   if (FileNumber == 0) {
     // File numbers start with 1 and/or after any file numbers
     // allocated by inline-assembler .file directives.
@@ -603,11 +618,7 @@ MCDwarfLineTableHeader::tryGetFile(StringRef &Directory,
     // For FileNames with no directories a DirIndex of 0 is used.
     DirIndex = 0;
   } else {
-    DirIndex = 0;
-    for (unsigned End = MCDwarfDirs.size(); DirIndex < End; DirIndex++) {
-      if (Directory == MCDwarfDirs[DirIndex])
-        break;
-    }
+    DirIndex = llvm::find(MCDwarfDirs, Directory) - MCDwarfDirs.begin();
     if (DirIndex >= MCDwarfDirs.size())
       MCDwarfDirs.push_back(Directory);
     // The DirIndex is one based, as DirIndex of 0 is used for FileNames with
@@ -620,7 +631,7 @@ MCDwarfLineTableHeader::tryGetFile(StringRef &Directory,
   File.Name = FileName;
   File.DirIndex = DirIndex;
   File.Checksum = Checksum;
-  trackMD5Usage(Checksum);
+  trackMD5Usage(Checksum.hasValue());
   File.Source = Source;
   if (Source)
     HasSource = true;
@@ -755,9 +766,7 @@ bool MCDwarfLineAddr::FixedEncode(MCContext &Context,
     *Offset = OS.tell();
     *Size = AddrSize;
     SetDelta = false;
-    std::vector<uint8_t> FillData;
-    FillData.insert(FillData.begin(), AddrSize, 0);
-    OS.write(reinterpret_cast<char *>(FillData.data()), AddrSize);
+    OS.write_zeros(AddrSize);
   } else {
     OS << char(dwarf::DW_LNS_fixed_advance_pc);
     // Generate fixup for 2-bytes address delta.
@@ -1007,9 +1016,15 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
     MCOS->EmitBytes(MCDwarfDirs[0]);
     MCOS->EmitBytes(sys::path::get_separator());
   }
-  const SmallVectorImpl<MCDwarfFile> &MCDwarfFiles =
-    MCOS->getContext().getMCDwarfFiles();
-  MCOS->EmitBytes(MCDwarfFiles[1].Name);
+  const SmallVectorImpl<MCDwarfFile> &MCDwarfFiles = context.getMCDwarfFiles();
+  // MCDwarfFiles might be empty if we have an empty source file.
+  // If it's not empty, [0] is unused and [1] is the first actual file.
+  assert(MCDwarfFiles.empty() || MCDwarfFiles.size() >= 2);
+  const MCDwarfFile &RootFile =
+      MCDwarfFiles.empty()
+          ? context.getMCDwarfLineTable(/*CUID=*/0).getRootFile()
+          : MCDwarfFiles[1];
+  MCOS->EmitBytes(RootFile.Name);
   MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
 
   // AT_comp_dir, the working directory the assembly was done in.
@@ -1754,6 +1769,20 @@ struct CIEKey {
         IsSimple(Frame.IsSimple), RAReg(Frame.RAReg),
         IsBKeyFrame(Frame.IsBKeyFrame) {}
 
+  StringRef PersonalityName() const {
+    if (!Personality)
+      return StringRef();
+    return Personality->getName();
+  }
+
+  bool operator<(const CIEKey &Other) const {
+    return std::make_tuple(PersonalityName(), PersonalityEncoding, LsdaEncoding,
+                           IsSignalFrame, IsSimple, RAReg) <
+           std::make_tuple(Other.PersonalityName(), Other.PersonalityEncoding,
+                           Other.LsdaEncoding, Other.IsSignalFrame,
+                           Other.IsSimple, Other.RAReg);
+  }
+
   const MCSymbol *Personality;
   unsigned PersonalityEncoding;
   unsigned LsdaEncoding;
@@ -1831,7 +1860,16 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
 
   const MCSymbol *DummyDebugKey = nullptr;
   bool CanOmitDwarf = MOFI->getOmitDwarfIfHaveCompactUnwind();
-  for (auto I = FrameArray.begin(), E = FrameArray.end(); I != E;) {
+  // Sort the FDEs by their corresponding CIE before we emit them.
+  // This isn't technically necessary according to the DWARF standard,
+  // but the Android libunwindstack rejects eh_frame sections where
+  // an FDE refers to a CIE other than the closest previous CIE.
+  std::vector<MCDwarfFrameInfo> FrameArrayX(FrameArray.begin(), FrameArray.end());
+  llvm::stable_sort(FrameArrayX,
+                    [](const MCDwarfFrameInfo &X, const MCDwarfFrameInfo &Y) {
+                      return CIEKey(X) < CIEKey(Y);
+                    });
+  for (auto I = FrameArrayX.begin(), E = FrameArrayX.end(); I != E;) {
     const MCDwarfFrameInfo &Frame = *I;
     ++I;
     if (CanOmitDwarf && Frame.CompactUnwindEncoding !=
diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp
index ff53dd7299c1..a81eab9ca296 100644
--- a/lib/MC/MCELFObjectTargetWriter.cpp
+++ b/lib/MC/MCELFObjectTargetWriter.cpp
@@ -1,9 +1,8 @@
 //===-- MCELFObjectTargetWriter.cpp - ELF Target Writer Subclass ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -13,8 +12,9 @@ using namespace llvm;
 
 MCELFObjectTargetWriter::MCELFObjectTargetWriter(bool Is64Bit_, uint8_t OSABI_,
                                                  uint16_t EMachine_,
-                                                 bool HasRelocationAddend_)
-    : OSABI(OSABI_), EMachine(EMachine_),
+                                                 bool HasRelocationAddend_,
+                                                 uint8_t ABIVersion_)
+    : OSABI(OSABI_), ABIVersion(ABIVersion_), EMachine(EMachine_),
       HasRelocationAddend(HasRelocationAddend_), Is64Bit(Is64Bit_) {}
 
 bool MCELFObjectTargetWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 95b48e6abc74..245dd063004f 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCELFStreamer.cpp - ELF Object Output -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -202,6 +201,7 @@ bool MCELFStreamer::EmitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
   // In the future it might be worth trying to make these operations more well
   // defined.
   switch (Attribute) {
+  case MCSA_Cold:
   case MCSA_LazyReference:
   case MCSA_Reference:
   case MCSA_SymbolResolver:
@@ -400,6 +400,8 @@ void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
     case MCSymbolRefExpr::VK_INDNTPOFF:
     case MCSymbolRefExpr::VK_NTPOFF:
     case MCSymbolRefExpr::VK_GOTNTPOFF:
+    case MCSymbolRefExpr::VK_TLSCALL:
+    case MCSymbolRefExpr::VK_TLSDESC:
     case MCSymbolRefExpr::VK_TLSGD:
     case MCSymbolRefExpr::VK_TLSLD:
     case MCSymbolRefExpr::VK_TLSLDM:
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index 3c022199145f..ab53ed42778e 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -1,14 +1,14 @@
 //===- MCExpr.cpp - Assembly Level Expression Implementation --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCExpr.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/MC/MCAsmBackend.h"
@@ -43,10 +43,15 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens) const {
   switch (getKind()) {
   case MCExpr::Target:
     return cast<MCTargetExpr>(this)->printImpl(OS, MAI);
-  case MCExpr::Constant:
-    OS << cast<MCConstantExpr>(*this).getValue();
+  case MCExpr::Constant: {
+    auto Value = cast<MCConstantExpr>(*this).getValue();
+    auto PrintInHex = cast<MCConstantExpr>(*this).useHexFormat();
+    if (PrintInHex)
+      OS << "0x" << Twine::utohexstr(Value);
+    else
+      OS << Value;
     return;
-
+  }
   case MCExpr::SymbolRef: {
     const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*this);
     const MCSymbol &Sym = SRE.getSymbol();
@@ -161,8 +166,9 @@ const MCUnaryExpr *MCUnaryExpr::create(Opcode Opc, const MCExpr *Expr,
   return new (Ctx) MCUnaryExpr(Opc, Expr, Loc);
 }
 
-const MCConstantExpr *MCConstantExpr::create(int64_t Value, MCContext &Ctx) {
-  return new (Ctx) MCConstantExpr(Value);
+const MCConstantExpr *MCConstantExpr::create(int64_t Value, MCContext &Ctx,
+                                             bool PrintInHex) {
+  return new (Ctx) MCConstantExpr(Value, PrintInHex);
 }
 
 /* *** */
@@ -303,15 +309,16 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_Hexagon_LD_PLT: return "LDPLT";
   case VK_Hexagon_IE: return "IE";
   case VK_Hexagon_IE_GOT: return "IEGOT";
-  case VK_WebAssembly_FUNCTION: return "FUNCTION";
-  case VK_WebAssembly_GLOBAL: return "GLOBAL";
-  case VK_WebAssembly_TYPEINDEX: return "TYPEINDEX";
-  case VK_WebAssembly_EVENT: return "EVENT";
+  case VK_WASM_TYPEINDEX: return "TYPEINDEX";
+  case VK_WASM_MBREL: return "MBREL";
+  case VK_WASM_TBREL: return "TBREL";
   case VK_AMDGPU_GOTPCREL32_LO: return "gotpcrel32@lo";
   case VK_AMDGPU_GOTPCREL32_HI: return "gotpcrel32@hi";
   case VK_AMDGPU_REL32_LO: return "rel32@lo";
   case VK_AMDGPU_REL32_HI: return "rel32@hi";
   case VK_AMDGPU_REL64: return "rel64";
+  case VK_AMDGPU_ABS32_LO: return "abs32@lo";
+  case VK_AMDGPU_ABS32_HI: return "abs32@hi";
   }
   llvm_unreachable("Invalid variant kind");
 }
@@ -419,15 +426,16 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
     .Case("lo8", VK_AVR_LO8)
     .Case("hi8", VK_AVR_HI8)
     .Case("hlo8", VK_AVR_HLO8)
-    .Case("function", VK_WebAssembly_FUNCTION)
-    .Case("global", VK_WebAssembly_GLOBAL)
-    .Case("typeindex", VK_WebAssembly_TYPEINDEX)
-    .Case("event", VK_WebAssembly_EVENT)
+    .Case("typeindex", VK_WASM_TYPEINDEX)
+    .Case("tbrel", VK_WASM_TBREL)
+    .Case("mbrel", VK_WASM_MBREL)
     .Case("gotpcrel32@lo", VK_AMDGPU_GOTPCREL32_LO)
     .Case("gotpcrel32@hi", VK_AMDGPU_GOTPCREL32_HI)
     .Case("rel32@lo", VK_AMDGPU_REL32_LO)
     .Case("rel32@hi", VK_AMDGPU_REL32_HI)
     .Case("rel64", VK_AMDGPU_REL64)
+    .Case("abs32@lo", VK_AMDGPU_ABS32_LO)
+    .Case("abs32@hi", VK_AMDGPU_ABS32_HI)
     .Default(VK_Invalid);
 }
 
@@ -559,6 +567,11 @@ static void AttemptToFoldSymbolOffsetDifference(
   if (Asm->isThumbFunc(&SA))
     Addend |= 1;
 
+  // If symbol is labeled as micromips, we set low-bit to ensure
+  // correct offset in .gcc_except_table
+  if (Asm->getBackend().isMicroMips(&SA))
+    Addend |= 1;
+
   // Clear the symbol expr pointers to indicate we have folded these
   // operands.
   A = B = nullptr;
diff --git a/lib/MC/MCFragment.cpp b/lib/MC/MCFragment.cpp
index d22b117972bf..ae5bd65507bc 100644
--- a/lib/MC/MCFragment.cpp
+++ b/lib/MC/MCFragment.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCFragment.cpp - Assembler Fragment Implementation ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp
index 64f111fc7114..f6f6edee5822 100644
--- a/lib/MC/MCInst.cpp
+++ b/lib/MC/MCInst.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCInst.cpp - MCInst implementation --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp
index 9296fcedb72b..159f4070fe9f 100644
--- a/lib/MC/MCInstPrinter.cpp
+++ b/lib/MC/MCInstPrinter.cpp
@@ -1,9 +1,8 @@
 //===- MCInstPrinter.cpp - Convert an MCInst to target assembly syntax ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -22,10 +21,14 @@ using namespace llvm;
 
 void llvm::dumpBytes(ArrayRef<uint8_t> bytes, raw_ostream &OS) {
   static const char hex_rep[] = "0123456789abcdef";
+  bool First = true;
   for (char i: bytes) {
+    if (First)
+      First = false;
+    else
+      OS << ' ';
     OS << hex_rep[(i & 0xF0) >> 4];
     OS << hex_rep[i & 0xF];
-    OS << ' ';
   }
 }
 
diff --git a/lib/MC/MCInstrAnalysis.cpp b/lib/MC/MCInstrAnalysis.cpp
index 8223f3a5c66f..eca87f940bf5 100644
--- a/lib/MC/MCInstrAnalysis.cpp
+++ b/lib/MC/MCInstrAnalysis.cpp
@@ -1,9 +1,8 @@
 //===- MCInstrAnalysis.cpp - InstrDesc target hooks -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCInstrDesc.cpp b/lib/MC/MCInstrDesc.cpp
index 53cba864a85d..d54aeba89edc 100644
--- a/lib/MC/MCInstrDesc.cpp
+++ b/lib/MC/MCInstrDesc.cpp
@@ -1,9 +1,8 @@
 //===------ llvm/MC/MCInstrDesc.cpp- Instruction Descriptors --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/MC/MCLabel.cpp b/lib/MC/MCLabel.cpp
index c376c83274ef..66ee73c5bbb3 100644
--- a/lib/MC/MCLabel.cpp
+++ b/lib/MC/MCLabel.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCLabel.cpp - MCLabel implementation ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCLinkerOptimizationHint.cpp b/lib/MC/MCLinkerOptimizationHint.cpp
index 2f8581470ea6..9ab321872b11 100644
--- a/lib/MC/MCLinkerOptimizationHint.cpp
+++ b/lib/MC/MCLinkerOptimizationHint.cpp
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCLinkerOptimizationHint.cpp ----- LOH handling ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index b30317e74672..613f255a4ea4 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -1,9 +1,8 @@
 //===- MCMachOStreamer.cpp - MachO Streamer -------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -387,6 +386,10 @@ bool MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Sym,
     Symbol->setWeakDefinition();
     Symbol->setWeakReference();
     break;
+
+  case MCSA_Cold:
+    Symbol->setCold();
+    break;
   }
 
   return true;
diff --git a/lib/MC/MCMachObjectTargetWriter.cpp b/lib/MC/MCMachObjectTargetWriter.cpp
index 8809a3c320f8..a57b8a7ac0ff 100644
--- a/lib/MC/MCMachObjectTargetWriter.cpp
+++ b/lib/MC/MCMachObjectTargetWriter.cpp
@@ -1,9 +1,8 @@
 //===- MCMachObjectTargetWriter.cpp - Mach-O Target Writer Subclass -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 4e97e7550bcb..8452317c8c6b 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCNullStreamer.cpp - Dummy Streamer Implementation ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 9e35355d06e0..9f555abe1404 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -1,9 +1,8 @@
 //===-- MCObjectFileInfo.cpp - Object File Information --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -19,6 +18,7 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSectionWasm.h"
+#include "llvm/MC/MCSectionXCOFF.h"
 
 using namespace llvm;
 
@@ -291,6 +291,9 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
   FaultMapSection = Ctx->getMachOSection("__LLVM_FAULTMAPS", "__llvm_faultmaps",
                                          0, SectionKind::getMetadata());
 
+  RemarksSection = Ctx->getMachOSection(
+      "__LLVM", "__remarks", MachO::S_ATTR_DEBUG, SectionKind::getMetadata());
+
   TLSExtraDataSection = TLSTLVSection;
 }
 
@@ -476,6 +479,9 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
       Ctx->getELFSection(".eh_frame", EHSectionType, EHSectionFlags);
 
   StackSizesSection = Ctx->getELFSection(".stack_sizes", ELF::SHT_PROGBITS, 0);
+
+  RemarksSection =
+      Ctx->getELFSection(".remarks", ELF::SHT_PROGBITS, ELF::SHF_EXCLUDE);
 }
 
 void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
@@ -756,6 +762,15 @@ void MCObjectFileInfo::initWasmMCObjectFileInfo(const Triple &T) {
   // TODO: Define more sections.
 }
 
+void MCObjectFileInfo::initXCOFFMCObjectFileInfo(const Triple &T) {
+  // The default csect for program code. Functions without a specified section
+  // get placed into this csect. The choice of csect name is not a property of
+  // the ABI or object file format. For example, the XL compiler uses an unnamed
+  // csect for program code.
+  TextSection = Ctx->getXCOFFSection(
+      ".text", XCOFF::StorageMappingClass::XMC_PR, SectionKind::getText());
+}
+
 void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, bool PIC,
                                             MCContext &ctx,
                                             bool LargeCodeModel) {
@@ -802,6 +817,10 @@ void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, bool PIC,
     Env = IsWasm;
     initWasmMCObjectFileInfo(TT);
     break;
+  case Triple::XCOFF:
+    Env = IsXCOFF;
+    initXCOFFMCObjectFileInfo(TT);
+    break;
   case Triple::UnknownObjectFormat:
     report_fatal_error("Cannot initialize MC for unknown object file format.");
     break;
@@ -817,6 +836,7 @@ MCSection *MCObjectFileInfo::getDwarfComdatSection(const char *Name,
   case Triple::MachO:
   case Triple::COFF:
   case Triple::Wasm:
+  case Triple::XCOFF:
   case Triple::UnknownObjectFormat:
     report_fatal_error("Cannot get DWARF comdat section for this object file "
                        "format: not implemented.");
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index 6ec705bdddb7..1587d8498666 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCObjectStreamer.cpp - Object File MCStreamer Interface -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -315,7 +314,7 @@ bool MCObjectStreamer::mayHaveInstructions(MCSection &Sec) const {
 }
 
 void MCObjectStreamer::EmitInstruction(const MCInst &Inst,
-                                       const MCSubtargetInfo &STI, bool) {
+                                       const MCSubtargetInfo &STI) {
   getAssembler().getBackend().handleCodePaddingInstructionBegin(Inst);
   EmitInstructionImpl(Inst, STI);
   getAssembler().getBackend().handleCodePaddingInstructionEnd(Inst);
diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp
index 98ac48a23f91..a058bbe0ba0b 100644
--- a/lib/MC/MCObjectWriter.cpp
+++ b/lib/MC/MCObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCObjectWriter.cpp - MCObjectWriter implementation ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index 2b0d20f9b8e2..9155ae05d29d 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -1,9 +1,8 @@
 //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -62,8 +61,6 @@ int AsmLexer::getNextChar() {
   return (unsigned char)*CurPtr++;
 }
 
-/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
-///
 /// The leading integral digit sequence and dot should have already been
 /// consumed, some or all of the fractional digit sequence *can* have been
 /// consumed.
@@ -72,13 +69,16 @@ AsmToken AsmLexer::LexFloatLiteral() {
   while (isDigit(*CurPtr))
     ++CurPtr;
 
-  // Check for exponent; we intentionally accept a slighlty wider set of
-  // literals here and rely on the upstream client to reject invalid ones (e.g.,
-  // "1e+").
-  if (*CurPtr == 'e' || *CurPtr == 'E') {
+  if (*CurPtr == '-' || *CurPtr == '+')
+    return ReturnError(CurPtr, "Invalid sign in float literal");
+
+  // Check for exponent
+  if ((*CurPtr == 'e' || *CurPtr == 'E')) {
     ++CurPtr;
+
     if (*CurPtr == '-' || *CurPtr == '+')
       ++CurPtr;
+
     while (isDigit(*CurPtr))
       ++CurPtr;
   }
@@ -146,8 +146,9 @@ AsmToken AsmLexer::LexIdentifier() {
     // Disambiguate a .1243foo identifier from a floating literal.
     while (isDigit(*CurPtr))
       ++CurPtr;
-    if (*CurPtr == 'e' || *CurPtr == 'E' ||
-        !IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
+
+    if (!IsIdentifierChar(*CurPtr, AllowAtInIdentifier) ||
+        *CurPtr == 'e' || *CurPtr == 'E')
       return LexFloatLiteral();
   }
 
@@ -327,8 +328,9 @@ AsmToken AsmLexer::LexDigit() {
     unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
     bool isHex = Radix == 16;
     // Check for floating point literals.
-    if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
-      ++CurPtr;
+    if (!isHex && (*CurPtr == '.' || *CurPtr == 'e' || *CurPtr == 'E')) {
+      if (*CurPtr == '.')
+        ++CurPtr;
       return LexFloatLiteral();
     }
 
@@ -557,7 +559,7 @@ AsmToken AsmLexer::LexToken() {
     AsmToken TokenBuf[2];
     MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
     size_t num = peekTokens(Buf, true);
-    // There cannot be a space preceeding this
+    // There cannot be a space preceding this
     if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) &&
         TokenBuf[1].is(AsmToken::String)) {
       CurPtr = TokStart; // reset curPtr;
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index cf42a6f7075b..084f6a7a2e14 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -1,9 +1,8 @@
 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -159,12 +158,16 @@ private:
   /// The values from the last parsed cpp hash file line comment if any.
   struct CppHashInfoTy {
     StringRef Filename;
-    int64_t LineNumber = 0;
+    int64_t LineNumber;
     SMLoc Loc;
-    unsigned Buf = 0;
+    unsigned Buf;
+    CppHashInfoTy() : Filename(), LineNumber(0), Loc(), Buf(0) {}
   };
   CppHashInfoTy CppHashInfo;
 
+  /// The filename from the first cpp hash file line comment, if any.
+  StringRef FirstCppHashFilename;
+
   /// List of forward directional labels for diagnosis at the end.
   SmallVector<std::tuple<SMLoc, CppHashInfoTy, MCSymbol *>, 4> DirLabels;
 
@@ -426,6 +429,7 @@ private:
     DK_WEAK_DEFINITION,
     DK_WEAK_REFERENCE,
     DK_WEAK_DEF_CAN_BE_HIDDEN,
+    DK_COLD,
     DK_COMM,
     DK_COMMON,
     DK_LCOMM,
@@ -709,6 +713,9 @@ AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
   case MCObjectFileInfo::IsWasm:
     PlatformParser.reset(createWasmAsmParser());
     break;
+  case MCObjectFileInfo::IsXCOFF:
+    // TODO: Need to implement createXCOFFAsmParser for XCOFF format.
+    break;
   }
 
   PlatformParser->Initialize(*this);
@@ -844,9 +851,20 @@ bool AsmParser::enabledGenDwarfForAssembly() {
   // If we haven't encountered any .file directives (which would imply that
   // the assembler source was produced with debug info already) then emit one
   // describing the assembler source file itself.
-  if (getContext().getGenDwarfFileNumber() == 0)
+  if (getContext().getGenDwarfFileNumber() == 0) {
+    // Use the first #line directive for this, if any. It's preprocessed, so
+    // there is no checksum, and of course no source directive.
+    if (!FirstCppHashFilename.empty())
+      getContext().setMCLineTableRootFile(/*CUID=*/0,
+                                          getContext().getCompilationDir(),
+                                          FirstCppHashFilename,
+                                          /*Cksum=*/None, /*Source=*/None);
+    const MCDwarfFile &RootFile =
+        getContext().getMCDwarfLineTable(/*CUID=*/0).getRootFile();
     getContext().setGenDwarfFileNumber(getStreamer().EmitDwarfFileDirective(
-        0, StringRef(), getContext().getMainFileName()));
+        /*CUID=*/0, getContext().getCompilationDir(), RootFile.Name,
+        RootFile.Checksum, RootFile.Source));
+  }
   return true;
 }
 
@@ -1983,6 +2001,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
       return parseDirectiveSymbolAttribute(MCSA_WeakReference);
     case DK_WEAK_DEF_CAN_BE_HIDDEN:
       return parseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate);
+    case DK_COLD:
+      return parseDirectiveSymbolAttribute(MCSA_Cold);
     case DK_COMM:
     case DK_COMMON:
       return parseDirectiveComm(/*IsLocal=*/false);
@@ -2275,11 +2295,14 @@ bool AsmParser::parseCppHashLineFilenameComment(SMLoc L) {
   // Get rid of the enclosing quotes.
   Filename = Filename.substr(1, Filename.size() - 2);
 
-  // Save the SMLoc, Filename and LineNumber for later use by diagnostics.
+  // Save the SMLoc, Filename and LineNumber for later use by diagnostics
+  // and possibly DWARF file info.
   CppHashInfo.Loc = L;
   CppHashInfo.Filename = Filename;
   CppHashInfo.LineNumber = LineNumber;
   CppHashInfo.Buf = CurBuffer;
+  if (FirstCppHashFilename.empty())
+    FirstCppHashFilename = Filename;
   return false;
 }
 
@@ -3364,26 +3387,28 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
   }
 
   if (FileNumber == -1) {
-    if (!getContext().getAsmInfo()->hasSingleParameterDotFile())
-      return Error(DirectiveLoc,
-                   "target does not support '.file' without a number");
-    getStreamer().EmitFileDirective(Filename);
+    // Ignore the directive if there is no number and the target doesn't support
+    // numberless .file directives. This allows some portability of assembler
+    // between different object file formats.
+    if (getContext().getAsmInfo()->hasSingleParameterDotFile())
+      getStreamer().EmitFileDirective(Filename);
   } else {
     // In case there is a -g option as well as debug info from directive .file,
     // we turn off the -g option, directly use the existing debug info instead.
-    // Also reset any implicit ".file 0" for the assembler source.
+    // Throw away any implicit file table for the assembler source.
     if (Ctx.getGenDwarfForAssembly()) {
-      Ctx.getMCDwarfLineTable(0).resetRootFile();
+      Ctx.getMCDwarfLineTable(0).resetFileTable();
       Ctx.setGenDwarfForAssembly(false);
     }
 
-    MD5::MD5Result *CKMem = nullptr;
+    Optional<MD5::MD5Result> CKMem;
     if (HasMD5) {
-      CKMem = (MD5::MD5Result *)Ctx.allocate(sizeof(MD5::MD5Result), 1);
+      MD5::MD5Result Sum;
       for (unsigned i = 0; i != 8; ++i) {
-        CKMem->Bytes[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
-        CKMem->Bytes[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
+        Sum.Bytes[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
+        Sum.Bytes[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
       }
+      CKMem = Sum;
     }
     if (HasSource) {
       char *SourceBuf = static_cast<char *>(Ctx.allocate(SourceString.size()));
@@ -3399,7 +3424,6 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
           FileNumber, Directory, Filename, CKMem, Source);
       if (!FileNumOrErr)
         return Error(DirectiveLoc, toString(FileNumOrErr.takeError()));
-      FileNumber = FileNumOrErr.get();
     }
     // Alert the user if there are some .file directives with MD5 and some not.
     // But only do that once.
@@ -5035,9 +5059,9 @@ bool AsmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
     MCSymbol *Sym = getContext().lookupSymbol(Name);
 
     if (expect_defined)
-      TheCondState.CondMet = (Sym && !Sym->isUndefined());
+      TheCondState.CondMet = (Sym && !Sym->isUndefined(false));
     else
-      TheCondState.CondMet = (!Sym || Sym->isUndefined());
+      TheCondState.CondMet = (!Sym || Sym->isUndefined(false));
     TheCondState.Ignore = !TheCondState.CondMet;
   }
 
@@ -5223,6 +5247,7 @@ void AsmParser::initializeDirectiveKindMap() {
   DirectiveKindMap[".weak_definition"] = DK_WEAK_DEFINITION;
   DirectiveKindMap[".weak_reference"] = DK_WEAK_REFERENCE;
   DirectiveKindMap[".weak_def_can_be_hidden"] = DK_WEAK_DEF_CAN_BE_HIDDEN;
+  DirectiveKindMap[".cold"] = DK_COLD;
   DirectiveKindMap[".comm"] = DK_COMM;
   DirectiveKindMap[".common"] = DK_COMMON;
   DirectiveKindMap[".lcomm"] = DK_LCOMM;
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
index 388304a72395..1217ea99e465 100644
--- a/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -1,9 +1,8 @@
 //===- COFFAsmParser.cpp - COFF Assembly Parser ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index cd99112292a9..1160934dc62c 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -1,9 +1,8 @@
 //===- DarwinAsmParser.cpp - Darwin (Mach-O) Assembly Parser --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -1149,6 +1148,7 @@ static Triple::OSType getOSTypeFromPlatform(MachO::PlatformType Type) {
   case MachO::PLATFORM_TVOS:    return Triple::TvOS;
   case MachO::PLATFORM_WATCHOS: return Triple::WatchOS;
   case MachO::PLATFORM_BRIDGEOS:         /* silence warning */ break;
+  case MachO::PLATFORM_MACCATALYST: return Triple::IOS;
   case MachO::PLATFORM_IOSSIMULATOR:     /* silence warning */ break;
   case MachO::PLATFORM_TVOSSIMULATOR:    /* silence warning */ break;
   case MachO::PLATFORM_WATCHOSSIMULATOR: /* silence warning */ break;
@@ -1169,6 +1169,7 @@ bool DarwinAsmParser::parseBuildVersion(StringRef Directive, SMLoc Loc) {
     .Case("ios", MachO::PLATFORM_IOS)
     .Case("tvos", MachO::PLATFORM_TVOS)
     .Case("watchos", MachO::PLATFORM_WATCHOS)
+    .Case("macCatalyst", MachO::PLATFORM_MACCATALYST)
     .Default(0);
   if (Platform == 0)
     return Error(PlatformLoc, "unknown platform name");
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index d568f7a71eeb..a55bdd5364cb 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -1,9 +1,8 @@
 //===- ELFAsmParser.cpp - ELF Assembly Parser -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -616,6 +615,10 @@ EndStmt:
       Type = ELF::SHT_LLVM_LINKER_OPTIONS;
     else if (TypeName == "llvm_call_graph_profile")
       Type = ELF::SHT_LLVM_CALL_GRAPH_PROFILE;
+    else if (TypeName == "llvm_dependent_libraries")
+      Type = ELF::SHT_LLVM_DEPENDENT_LIBRARIES;
+    else if (TypeName == "llvm_sympart")
+      Type = ELF::SHT_LLVM_SYMPART;
     else if (TypeName.getAsInteger(0, Type))
       return TokError("unknown section type");
   }
diff --git a/lib/MC/MCParser/MCAsmLexer.cpp b/lib/MC/MCParser/MCAsmLexer.cpp
index 10960fc69633..497055bc1760 100644
--- a/lib/MC/MCParser/MCAsmLexer.cpp
+++ b/lib/MC/MCParser/MCAsmLexer.cpp
@@ -1,9 +1,8 @@
 //===- MCAsmLexer.cpp - Abstract Asm Lexer Interface ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp
index efedcdc5a314..41a1ee555d6f 100644
--- a/lib/MC/MCParser/MCAsmParser.cpp
+++ b/lib/MC/MCParser/MCAsmParser.cpp
@@ -1,9 +1,8 @@
 //===-- MCAsmParser.cpp - Abstract Asm Parser Interface -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCParser/MCAsmParserExtension.cpp b/lib/MC/MCParser/MCAsmParserExtension.cpp
index 031f473dc5fe..18d18f0cf6ed 100644
--- a/lib/MC/MCParser/MCAsmParserExtension.cpp
+++ b/lib/MC/MCParser/MCAsmParserExtension.cpp
@@ -1,9 +1,8 @@
 //===- MCAsmParserExtension.cpp - Asm Parser Hooks ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCParser/MCTargetAsmParser.cpp b/lib/MC/MCParser/MCTargetAsmParser.cpp
index a0c06c9d5018..940f26d4750b 100644
--- a/lib/MC/MCParser/MCTargetAsmParser.cpp
+++ b/lib/MC/MCParser/MCTargetAsmParser.cpp
@@ -1,9 +1,8 @@
 //===-- MCTargetAsmParser.cpp - Target Assembly Parser --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCParser/WasmAsmParser.cpp b/lib/MC/MCParser/WasmAsmParser.cpp
index 93bb0cb3c72e..28d4459fecd4 100644
--- a/lib/MC/MCParser/WasmAsmParser.cpp
+++ b/lib/MC/MCParser/WasmAsmParser.cpp
@@ -1,9 +1,8 @@
 //===- WasmAsmParser.cpp - Wasm Assembly Parser -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 // --
 //
@@ -22,6 +21,7 @@
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCSectionWasm.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCSymbolWasm.h"
@@ -32,8 +32,8 @@ using namespace llvm;
 namespace {
 
 class WasmAsmParser : public MCAsmParserExtension {
-  MCAsmParser *Parser;
-  MCAsmLexer *Lexer;
+  MCAsmParser *Parser = nullptr;
+  MCAsmLexer *Lexer = nullptr;
 
   template<bool (WasmAsmParser::*HandlerMethod)(StringRef, SMLoc)>
   void addDirectiveHandler(StringRef Directive) {
@@ -44,9 +44,7 @@ class WasmAsmParser : public MCAsmParserExtension {
   }
 
 public:
-  WasmAsmParser() : Parser(nullptr), Lexer(nullptr) {
-    BracketExpressionsSupported = true;
-  }
+  WasmAsmParser() { BracketExpressionsSupported = true; }
 
   void Initialize(MCAsmParser &P) override {
     Parser = &P;
@@ -58,21 +56,31 @@ public:
     addDirectiveHandler<&WasmAsmParser::parseSectionDirective>(".section");
     addDirectiveHandler<&WasmAsmParser::parseDirectiveSize>(".size");
     addDirectiveHandler<&WasmAsmParser::parseDirectiveType>(".type");
+    addDirectiveHandler<&WasmAsmParser::ParseDirectiveIdent>(".ident");
+    addDirectiveHandler<
+      &WasmAsmParser::ParseDirectiveSymbolAttribute>(".weak");
+    addDirectiveHandler<
+      &WasmAsmParser::ParseDirectiveSymbolAttribute>(".local");
+    addDirectiveHandler<
+      &WasmAsmParser::ParseDirectiveSymbolAttribute>(".internal");
+    addDirectiveHandler<
+      &WasmAsmParser::ParseDirectiveSymbolAttribute>(".hidden");
   }
 
-  bool Error(const StringRef &msg, const AsmToken &tok) {
-    return Parser->Error(tok.getLoc(), msg + tok.getString());
+  bool error(const StringRef &Msg, const AsmToken &Tok) {
+    return Parser->Error(Tok.getLoc(), Msg + Tok.getString());
   }
 
-  bool IsNext(AsmToken::TokenKind Kind) {
-    auto ok = Lexer->is(Kind);
-    if (ok) Lex();
-    return ok;
+  bool isNext(AsmToken::TokenKind Kind) {
+    auto Ok = Lexer->is(Kind);
+    if (Ok)
+      Lex();
+    return Ok;
   }
 
-  bool Expect(AsmToken::TokenKind Kind, const char *KindName) {
-    if (!IsNext(Kind))
-      return Error(std::string("Expected ") + KindName + ", instead got: ",
+  bool expect(AsmToken::TokenKind Kind, const char *KindName) {
+    if (!isNext(Kind))
+      return error(std::string("Expected ") + KindName + ", instead got: ",
                    Lexer->getTok());
     return false;
   }
@@ -82,9 +90,65 @@ public:
     return false;
   }
 
+  bool parseSectionFlags(StringRef FlagStr, bool &Passive) {
+    SmallVector<StringRef, 2> Flags;
+    // If there are no flags, keep Flags empty
+    FlagStr.split(Flags, ",", -1, false);
+    for (auto &Flag : Flags) {
+      if (Flag == "passive")
+        Passive = true;
+      else
+        return error("Expected section flags, instead got: ", Lexer->getTok());
+    }
+    return false;
+  }
+
   bool parseSectionDirective(StringRef, SMLoc) {
-    // FIXME: .section currently no-op.
-    while (Lexer->isNot(AsmToken::EndOfStatement)) Parser->Lex();
+    StringRef Name;
+    if (Parser->parseIdentifier(Name))
+      return TokError("expected identifier in directive");
+
+    if (expect(AsmToken::Comma, ","))
+      return true;
+
+    if (Lexer->isNot(AsmToken::String))
+      return error("expected string in directive, instead got: ", Lexer->getTok());
+
+    auto Kind = StringSwitch<Optional<SectionKind>>(Name)
+                    .StartsWith(".data", SectionKind::getData())
+                    .StartsWith(".rodata", SectionKind::getReadOnly())
+                    .StartsWith(".text", SectionKind::getText())
+                    .StartsWith(".custom_section", SectionKind::getMetadata())
+                    .StartsWith(".bss", SectionKind::getBSS())
+                    // See use of .init_array in WasmObjectWriter and
+                    // TargetLoweringObjectFileWasm
+                    .StartsWith(".init_array", SectionKind::getData())
+                    .Default(Optional<SectionKind>());
+    if (!Kind.hasValue())
+      return Parser->Error(Lexer->getLoc(), "unknown section kind: " + Name);
+
+    MCSectionWasm *Section = getContext().getWasmSection(Name, Kind.getValue());
+
+    // Update section flags if present in this .section directive
+    bool Passive = false;
+    if (parseSectionFlags(getTok().getStringContents(), Passive))
+      return true;
+
+    if (Passive) {
+      if (!Section->isWasmData())
+        return Parser->Error(getTok().getLoc(),
+                             "Only data sections can be passive");
+      Section->setPassive();
+    }
+
+    Lex();
+
+    if (expect(AsmToken::Comma, ",") || expect(AsmToken::At, "@") ||
+        expect(AsmToken::EndOfStatement, "eol"))
+      return true;
+
+    auto WS = getContext().getWasmSection(Name, Kind.getValue());
+    getStreamer().SwitchSection(WS);
     return false;
   }
 
@@ -95,16 +159,15 @@ public:
     if (Parser->parseIdentifier(Name))
       return TokError("expected identifier in directive");
     auto Sym = getContext().getOrCreateSymbol(Name);
-    if (Lexer->isNot(AsmToken::Comma))
-      return TokError("unexpected token in directive");
-    Lex();
+    if (expect(AsmToken::Comma, ","))
+      return true;
     const MCExpr *Expr;
     if (Parser->parseExpression(Expr))
       return true;
-    if (Lexer->isNot(AsmToken::EndOfStatement))
-      return TokError("unexpected token in directive");
-    Lex();
-    // MCWasmStreamer implements this.
+    if (expect(AsmToken::EndOfStatement, "eol"))
+      return true;
+    // This is done automatically by the assembler for functions currently,
+    // so this is only currently needed for data sections:
     getStreamer().emitELFSize(Sym, Expr);
     return false;
   }
@@ -113,24 +176,71 @@ public:
     // This could be the start of a function, check if followed by
     // "label,@function"
     if (!Lexer->is(AsmToken::Identifier))
-      return Error("Expected label after .type directive, got: ",
+      return error("Expected label after .type directive, got: ",
                    Lexer->getTok());
     auto WasmSym = cast<MCSymbolWasm>(
                      getStreamer().getContext().getOrCreateSymbol(
                        Lexer->getTok().getString()));
     Lex();
-    if (!(IsNext(AsmToken::Comma) && IsNext(AsmToken::At) &&
+    if (!(isNext(AsmToken::Comma) && isNext(AsmToken::At) &&
           Lexer->is(AsmToken::Identifier)))
-      return Error("Expected label,@type declaration, got: ", Lexer->getTok());
+      return error("Expected label,@type declaration, got: ", Lexer->getTok());
     auto TypeName = Lexer->getTok().getString();
     if (TypeName == "function")
       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
     else if (TypeName == "global")
       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
+    else if (TypeName == "object")
+      WasmSym->setType(wasm::WASM_SYMBOL_TYPE_DATA);
     else
-      return Error("Unknown WASM symbol type: ", Lexer->getTok());
+      return error("Unknown WASM symbol type: ", Lexer->getTok());
     Lex();
-    return Expect(AsmToken::EndOfStatement, "EOL");
+    return expect(AsmToken::EndOfStatement, "EOL");
+  }
+
+  // FIXME: Shared with ELF.
+  /// ParseDirectiveIdent
+  ///  ::= .ident string
+  bool ParseDirectiveIdent(StringRef, SMLoc) {
+    if (getLexer().isNot(AsmToken::String))
+      return TokError("unexpected token in '.ident' directive");
+    StringRef Data = getTok().getIdentifier();
+    Lex();
+    if (getLexer().isNot(AsmToken::EndOfStatement))
+      return TokError("unexpected token in '.ident' directive");
+    Lex();
+    getStreamer().EmitIdent(Data);
+    return false;
+  }
+
+  // FIXME: Shared with ELF.
+  /// ParseDirectiveSymbolAttribute
+  ///  ::= { ".local", ".weak", ... } [ identifier ( , identifier )* ]
+  bool ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
+    MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Directive)
+      .Case(".weak", MCSA_Weak)
+      .Case(".local", MCSA_Local)
+      .Case(".hidden", MCSA_Hidden)
+      .Case(".internal", MCSA_Internal)
+      .Case(".protected", MCSA_Protected)
+      .Default(MCSA_Invalid);
+    assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!");
+    if (getLexer().isNot(AsmToken::EndOfStatement)) {
+      while (true) {
+        StringRef Name;
+        if (getParser().parseIdentifier(Name))
+          return TokError("expected identifier in directive");
+        MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+        getStreamer().EmitSymbolAttribute(Sym, Attr);
+        if (getLexer().is(AsmToken::EndOfStatement))
+          break;
+        if (getLexer().isNot(AsmToken::Comma))
+          return TokError("unexpected token in directive");
+        Lex();
+      }
+    }
+    Lex();
+    return false;
   }
 };
 
diff --git a/lib/MC/MCRegisterInfo.cpp b/lib/MC/MCRegisterInfo.cpp
index 5abae5379867..4273b876b7bb 100644
--- a/lib/MC/MCRegisterInfo.cpp
+++ b/lib/MC/MCRegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===- MC/MCRegisterInfo.cpp - Target Register Description ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/MC/MCSchedule.cpp b/lib/MC/MCSchedule.cpp
index 929bd7f6046c..1fc5ec5e975f 100644
--- a/lib/MC/MCSchedule.cpp
+++ b/lib/MC/MCSchedule.cpp
@@ -1,9 +1,8 @@
 //===- MCSchedule.cpp - Scheduling ------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -150,3 +149,19 @@ MCSchedModel::getReciprocalThroughput(unsigned SchedClass,
   // that it can execute at the maximum default issue width.
   return 1.0 / DefaultIssueWidth;
 }
+
+unsigned
+MCSchedModel::getForwardingDelayCycles(ArrayRef<MCReadAdvanceEntry> Entries,
+                                       unsigned WriteResourceID) {
+  if (Entries.empty())
+    return 0;
+
+  int DelayCycles = 0;
+  for (const MCReadAdvanceEntry &E : Entries) {
+    if (E.WriteResourceID != WriteResourceID)
+      continue;
+    DelayCycles = std::min(DelayCycles, E.Cycles);
+  }
+
+  return std::abs(DelayCycles);
+}
diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp
index d4f11d10136a..2c892ab81608 100644
--- a/lib/MC/MCSection.cpp
+++ b/lib/MC/MCSection.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCSection.cpp - Machine Code Section Representation ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp
index c861963eec8a..f0c06f70bd73 100644
--- a/lib/MC/MCSectionCOFF.cpp
+++ b/lib/MC/MCSectionCOFF.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCSectionCOFF.cpp - COFF Code Section Representation --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,8 +14,6 @@
 
 using namespace llvm;
 
-MCSectionCOFF::~MCSectionCOFF() = default; // anchor.
-
 // ShouldOmitSectionDirective - Decides whether a '.section' directive
 // should be printed before the section name
 bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name,
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
index 7ee1694ebbf7..efe504b2024c 100644
--- a/lib/MC/MCSectionELF.cpp
+++ b/lib/MC/MCSectionELF.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCSectionELF.cpp - ELF Code Section Representation ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -18,8 +17,6 @@
 
 using namespace llvm;
 
-MCSectionELF::~MCSectionELF() = default; // anchor.
-
 // Decides whether a '.section' directive
 // should be printed before the section name.
 bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
@@ -155,6 +152,10 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
     OS << "llvm_linker_options";
   else if (Type == ELF::SHT_LLVM_CALL_GRAPH_PROFILE)
     OS << "llvm_call_graph_profile";
+  else if (Type == ELF::SHT_LLVM_DEPENDENT_LIBRARIES)
+    OS << "llvm_dependent_libraries";
+  else if (Type == ELF::SHT_LLVM_SYMPART)
+    OS << "llvm_sympart";
   else
     report_fatal_error("unsupported type 0x" + Twine::utohexstr(Type) +
                        " for section " + getSectionName());
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
index f40237231a2f..0fd89dcbe5fa 100644
--- a/lib/MC/MCSectionMachO.cpp
+++ b/lib/MC/MCSectionMachO.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCSectionMachO.cpp - MachO Code Section Representation ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCSectionWasm.cpp b/lib/MC/MCSectionWasm.cpp
index 626027a24f97..8633c10a73fd 100644
--- a/lib/MC/MCSectionWasm.cpp
+++ b/lib/MC/MCSectionWasm.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCSectionWasm.cpp - Wasm Code Section Representation --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,11 +14,9 @@
 
 using namespace llvm;
 
-MCSectionWasm::~MCSectionWasm() {} // anchor.
-
 // Decides whether a '.section' directive
 // should be printed before the section name.
-bool MCSectionWasm::ShouldOmitSectionDirective(StringRef Name,
+bool MCSectionWasm::shouldOmitSectionDirective(StringRef Name,
                                                const MCAsmInfo &MAI) const {
   return MAI.shouldOmitSectionDirective(Name);
 }
@@ -51,7 +48,7 @@ void MCSectionWasm::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
                                          raw_ostream &OS,
                                          const MCExpr *Subsection) const {
 
-  if (ShouldOmitSectionDirective(SectionName, MAI)) {
+  if (shouldOmitSectionDirective(SectionName, MAI)) {
     OS << '\t' << getSectionName();
     if (Subsection) {
       OS << '\t';
@@ -65,7 +62,8 @@ void MCSectionWasm::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
   printName(OS, getSectionName());
   OS << ",\"";
 
-  // TODO: Print section flags.
+  if (IsPassive)
+    OS << "passive";
 
   OS << '"';
 
diff --git a/lib/MC/MCSectionXCOFF.cpp b/lib/MC/MCSectionXCOFF.cpp
new file mode 100644
index 000000000000..d1a637345024
--- /dev/null
+++ b/lib/MC/MCSectionXCOFF.cpp
@@ -0,0 +1,33 @@
+//===- lib/MC/MCSectionXCOFF.cpp - XCOFF Code Section Representation ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSectionXCOFF.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+MCSectionXCOFF::~MCSectionXCOFF() = default;
+
+void MCSectionXCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+                                          raw_ostream &OS,
+                                          const MCExpr *Subsection) const {
+  if (getKind().isText()) {
+    OS << "\t.csect " << getSectionName() << "["
+       << "PR"
+       << "]" << '\n';
+    return;
+  }
+
+  report_fatal_error("Printing for this SectionKind is unimplemented.");
+}
+
+bool MCSectionXCOFF::UseCodeAlign() const { return getKind().isText(); }
+
+bool MCSectionXCOFF::isVirtualSection() const { return !getKind().isCommon(); }
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 6a8471bc61b4..decbb96817e3 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCStreamer.cpp - Streaming Machine Code Output --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -108,6 +107,11 @@ raw_ostream &MCStreamer::GetCommentOS() {
   return nulls();
 }
 
+unsigned MCStreamer::getNumFrameInfos() { return DwarfFrameInfos.size(); }
+ArrayRef<MCDwarfFrameInfo> MCStreamer::getDwarfFrameInfos() const {
+  return DwarfFrameInfos;
+}
+
 void MCStreamer::emitRawComment(const Twine &T, bool TabPrefix) {}
 
 void MCStreamer::addExplicitComment(const Twine &T) {}
@@ -136,10 +140,10 @@ void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size) {
 
 /// EmitULEB128IntValue - Special case of EmitULEB128Value that avoids the
 /// client having to pass in a MCExpr for constant integers.
-void MCStreamer::EmitULEB128IntValue(uint64_t Value) {
+void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned PadTo) {
   SmallString<128> Tmp;
   raw_svector_ostream OSE(Tmp);
-  encodeULEB128(Value, OSE);
+  encodeULEB128(Value, OSE, PadTo);
   EmitBytes(OSE.str());
 }
 
@@ -205,7 +209,7 @@ void MCStreamer::EmitZeros(uint64_t NumBytes) {
 Expected<unsigned>
 MCStreamer::tryEmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
                                       StringRef Filename,
-                                      MD5::MD5Result *Checksum,
+                                      Optional<MD5::MD5Result> Checksum,
                                       Optional<StringRef> Source,
                                       unsigned CUID) {
   return getContext().getDwarfFile(Directory, Filename, FileNo, Checksum,
@@ -214,7 +218,7 @@ MCStreamer::tryEmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
 
 void MCStreamer::emitDwarfFile0Directive(StringRef Directory,
                                          StringRef Filename,
-                                         MD5::MD5Result *Checksum,
+                                         Optional<MD5::MD5Result> Checksum,
                                          Optional<StringRef> Source,
                                          unsigned CUID) {
   getContext().setMCLineTableRootFile(CUID, Directory, Filename, Checksum,
@@ -953,8 +957,7 @@ void MCStreamer::visitUsedExpr(const MCExpr &Expr) {
   }
 }
 
-void MCStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
-                                 bool) {
+void MCStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &) {
   // Scan for values.
   for (unsigned i = Inst.getNumOperands(); i--;)
     if (Inst.getOperand(i).isExpr())
@@ -1074,6 +1077,15 @@ void MCStreamer::EmitVersionForTarget(const Triple &Target,
   unsigned Major;
   unsigned Minor;
   unsigned Update;
+  if (Target.isMacCatalystEnvironment()) {
+    // Mac Catalyst always uses the build version load command.
+    Target.getiOSVersion(Major, Minor, Update);
+    assert(Major && "A non-zero major version is expected");
+    EmitBuildVersion(MachO::PLATFORM_MACCATALYST, Major, Minor, Update,
+                     SDKVersion);
+    return;
+  }
+
   MCVersionMinType VersionType;
   if (Target.isWatchOS()) {
     VersionType = MCVM_WatchOSVersionMin;
diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp
index f6167826fae2..5fd48d9e1010 100644
--- a/lib/MC/MCSubtargetInfo.cpp
+++ b/lib/MC/MCSubtargetInfo.cpp
@@ -1,9 +1,8 @@
 //===- MCSubtargetInfo.cpp - Subtarget Information ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -13,6 +12,7 @@
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/MC/MCSchedule.h"
 #include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
@@ -20,11 +20,178 @@
 
 using namespace llvm;
 
+/// Find KV in array using binary search.
+template <typename T>
+static const T *Find(StringRef S, ArrayRef<T> A) {
+  // Binary search the array
+  auto F = llvm::lower_bound(A, S);
+  // If not found then return NULL
+  if (F == A.end() || StringRef(F->Key) != S) return nullptr;
+  // Return the found array item
+  return F;
+}
+
+/// For each feature that is (transitively) implied by this feature, set it.
+static
+void SetImpliedBits(FeatureBitset &Bits, const FeatureBitset &Implies,
+                    ArrayRef<SubtargetFeatureKV> FeatureTable) {
+  // OR the Implies bits in outside the loop. This allows the Implies for CPUs
+  // which might imply features not in FeatureTable to use this.
+  Bits |= Implies;
+  for (const SubtargetFeatureKV &FE : FeatureTable)
+    if (Implies.test(FE.Value))
+      SetImpliedBits(Bits, FE.Implies.getAsBitset(), FeatureTable);
+}
+
+/// For each feature that (transitively) implies this feature, clear it.
+static
+void ClearImpliedBits(FeatureBitset &Bits, unsigned Value,
+                      ArrayRef<SubtargetFeatureKV> FeatureTable) {
+  for (const SubtargetFeatureKV &FE : FeatureTable) {
+    if (FE.Implies.getAsBitset().test(Value)) {
+      Bits.reset(FE.Value);
+      ClearImpliedBits(Bits, FE.Value, FeatureTable);
+    }
+  }
+}
+
+static void ApplyFeatureFlag(FeatureBitset &Bits, StringRef Feature,
+                             ArrayRef<SubtargetFeatureKV> FeatureTable) {
+  assert(SubtargetFeatures::hasFlag(Feature) &&
+         "Feature flags should start with '+' or '-'");
+
+  // Find feature in table.
+  const SubtargetFeatureKV *FeatureEntry =
+      Find(SubtargetFeatures::StripFlag(Feature), FeatureTable);
+  // If there is a match
+  if (FeatureEntry) {
+    // Enable/disable feature in bits
+    if (SubtargetFeatures::isEnabled(Feature)) {
+      Bits.set(FeatureEntry->Value);
+
+      // For each feature that this implies, set it.
+      SetImpliedBits(Bits, FeatureEntry->Implies.getAsBitset(), FeatureTable);
+    } else {
+      Bits.reset(FeatureEntry->Value);
+
+      // For each feature that implies this, clear it.
+      ClearImpliedBits(Bits, FeatureEntry->Value, FeatureTable);
+    }
+  } else {
+    errs() << "'" << Feature << "' is not a recognized feature for this target"
+           << " (ignoring feature)\n";
+  }
+}
+
+/// Return the length of the longest entry in the table.
+template <typename T>
+static size_t getLongestEntryLength(ArrayRef<T> Table) {
+  size_t MaxLen = 0;
+  for (auto &I : Table)
+    MaxLen = std::max(MaxLen, std::strlen(I.Key));
+  return MaxLen;
+}
+
+/// Display help for feature and mcpu choices.
+static void Help(ArrayRef<SubtargetSubTypeKV> CPUTable,
+                 ArrayRef<SubtargetFeatureKV> FeatTable) {
+  // the static variable ensures that the help information only gets
+  // printed once even though a target machine creates multiple subtargets
+  static bool PrintOnce = false;
+  if (PrintOnce) {
+    return;
+  }
+
+  // Determine the length of the longest CPU and Feature entries.
+  unsigned MaxCPULen  = getLongestEntryLength(CPUTable);
+  unsigned MaxFeatLen = getLongestEntryLength(FeatTable);
+
+  // Print the CPU table.
+  errs() << "Available CPUs for this target:\n\n";
+  for (auto &CPU : CPUTable)
+    errs() << format("  %-*s - Select the %s processor.\n", MaxCPULen, CPU.Key,
+                     CPU.Key);
+  errs() << '\n';
+
+  // Print the Feature table.
+  errs() << "Available features for this target:\n\n";
+  for (auto &Feature : FeatTable)
+    errs() << format("  %-*s - %s.\n", MaxFeatLen, Feature.Key, Feature.Desc);
+  errs() << '\n';
+
+  errs() << "Use +feature to enable a feature, or -feature to disable it.\n"
+            "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n";
+
+  PrintOnce = true;
+}
+
+/// Display help for mcpu choices only
+static void cpuHelp(ArrayRef<SubtargetSubTypeKV> CPUTable) {
+  // the static variable ensures that the help information only gets
+  // printed once even though a target machine creates multiple subtargets
+  static bool PrintOnce = false;
+  if (PrintOnce) {
+    return;
+  }
+
+  // Print the CPU table.
+  errs() << "Available CPUs for this target:\n\n";
+  for (auto &CPU : CPUTable)
+    errs() << "\t" << CPU.Key << "\n";
+  errs() << '\n';
+
+  errs() << "Use -mcpu or -mtune to specify the target's processor.\n"
+            "For example, clang --target=aarch64-unknown-linux-gui "
+            "-mcpu=cortex-a35\n";
+
+  PrintOnce = true;
+}
+
 static FeatureBitset getFeatures(StringRef CPU, StringRef FS,
-                                 ArrayRef<SubtargetFeatureKV> ProcDesc,
+                                 ArrayRef<SubtargetSubTypeKV> ProcDesc,
                                  ArrayRef<SubtargetFeatureKV> ProcFeatures) {
   SubtargetFeatures Features(FS);
-  return Features.getFeatureBits(CPU, ProcDesc, ProcFeatures);
+
+  if (ProcDesc.empty() || ProcFeatures.empty())
+    return FeatureBitset();
+
+  assert(std::is_sorted(std::begin(ProcDesc), std::end(ProcDesc)) &&
+         "CPU table is not sorted");
+  assert(std::is_sorted(std::begin(ProcFeatures), std::end(ProcFeatures)) &&
+         "CPU features table is not sorted");
+  // Resulting bits
+  FeatureBitset Bits;
+
+  // Check if help is needed
+  if (CPU == "help")
+    Help(ProcDesc, ProcFeatures);
+
+  // Find CPU entry if CPU name is specified.
+  else if (!CPU.empty()) {
+    const SubtargetSubTypeKV *CPUEntry = Find(CPU, ProcDesc);
+
+    // If there is a match
+    if (CPUEntry) {
+      // Set the features implied by this CPU feature, if any.
+      SetImpliedBits(Bits, CPUEntry->Implies.getAsBitset(), ProcFeatures);
+    } else {
+      errs() << "'" << CPU << "' is not a recognized processor for this target"
+             << " (ignoring processor)\n";
+    }
+  }
+
+  // Iterate through each feature
+  for (const std::string &Feature : Features.getFeatures()) {
+    // Check for help
+    if (Feature == "+help")
+      Help(ProcDesc, ProcFeatures);
+    else if (Feature == "+cpuHelp")
+      cpuHelp(ProcDesc);
+    else
+      ApplyFeatureFlag(Bits, Feature, ProcFeatures);
+  }
+
+  return Bits;
 }
 
 void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef FS) {
@@ -41,12 +208,12 @@ void MCSubtargetInfo::setDefaultFeatures(StringRef CPU, StringRef FS) {
 
 MCSubtargetInfo::MCSubtargetInfo(
     const Triple &TT, StringRef C, StringRef FS,
-    ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD,
-    const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR,
+    ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetSubTypeKV> PD,
+    const MCWriteProcResEntry *WPR,
     const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA,
     const InstrStage *IS, const unsigned *OC, const unsigned *FP)
     : TargetTriple(TT), CPU(C), ProcFeatures(PF), ProcDesc(PD),
-      ProcSchedModels(ProcSched), WriteProcResTable(WPR), WriteLatencyTable(WL),
+      WriteProcResTable(WPR), WriteLatencyTable(WL),
       ReadAdvanceTable(RA), Stages(IS), OperandCycles(OC), ForwardingPaths(FP) {
   InitMCProcessorInfo(CPU, FS);
 }
@@ -61,13 +228,50 @@ FeatureBitset MCSubtargetInfo::ToggleFeature(const FeatureBitset &FB) {
   return FeatureBits;
 }
 
-FeatureBitset MCSubtargetInfo::ToggleFeature(StringRef FS) {
-  SubtargetFeatures::ToggleFeature(FeatureBits, FS, ProcFeatures);
+FeatureBitset MCSubtargetInfo::SetFeatureBitsTransitively(
+  const FeatureBitset &FB) {
+  SetImpliedBits(FeatureBits, FB, ProcFeatures);
+  return FeatureBits;
+}
+
+FeatureBitset MCSubtargetInfo::ClearFeatureBitsTransitively(
+  const FeatureBitset &FB) {
+  for (unsigned I = 0, E = FB.size(); I < E; I++) {
+    if (FB[I]) {
+      FeatureBits.reset(I);
+      ClearImpliedBits(FeatureBits, I, ProcFeatures);
+    }
+  }
+  return FeatureBits;
+}
+
+FeatureBitset MCSubtargetInfo::ToggleFeature(StringRef Feature) {
+  // Find feature in table.
+  const SubtargetFeatureKV *FeatureEntry =
+      Find(SubtargetFeatures::StripFlag(Feature), ProcFeatures);
+  // If there is a match
+  if (FeatureEntry) {
+    if (FeatureBits.test(FeatureEntry->Value)) {
+      FeatureBits.reset(FeatureEntry->Value);
+      // For each feature that implies this, clear it.
+      ClearImpliedBits(FeatureBits, FeatureEntry->Value, ProcFeatures);
+    } else {
+      FeatureBits.set(FeatureEntry->Value);
+
+      // For each feature that this implies, set it.
+      SetImpliedBits(FeatureBits, FeatureEntry->Implies.getAsBitset(),
+                     ProcFeatures);
+    }
+  } else {
+    errs() << "'" << Feature << "' is not a recognized feature for this target"
+           << " (ignoring feature)\n";
+  }
+
   return FeatureBits;
 }
 
 FeatureBitset MCSubtargetInfo::ApplyFeatureFlag(StringRef FS) {
-  SubtargetFeatures::ApplyFeatureFlag(FeatureBits, FS, ProcFeatures);
+  ::ApplyFeatureFlag(FeatureBits, FS, ProcFeatures);
   return FeatureBits;
 }
 
@@ -75,37 +279,30 @@ bool MCSubtargetInfo::checkFeatures(StringRef FS) const {
   SubtargetFeatures T(FS);
   FeatureBitset Set, All;
   for (std::string F : T.getFeatures()) {
-    SubtargetFeatures::ApplyFeatureFlag(Set, F, ProcFeatures);
+    ::ApplyFeatureFlag(Set, F, ProcFeatures);
     if (F[0] == '-')
       F[0] = '+';
-    SubtargetFeatures::ApplyFeatureFlag(All, F, ProcFeatures);
+    ::ApplyFeatureFlag(All, F, ProcFeatures);
   }
   return (FeatureBits & All) == Set;
 }
 
 const MCSchedModel &MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
-  assert(ProcSchedModels && "Processor machine model not available!");
-
-  ArrayRef<SubtargetInfoKV> SchedModels(ProcSchedModels, ProcDesc.size());
-
-  assert(std::is_sorted(SchedModels.begin(), SchedModels.end(),
-                    [](const SubtargetInfoKV &LHS, const SubtargetInfoKV &RHS) {
-                      return strcmp(LHS.Key, RHS.Key) < 0;
-                    }) &&
+  assert(std::is_sorted(ProcDesc.begin(), ProcDesc.end()) &&
          "Processor machine model table is not sorted");
 
   // Find entry
-  auto Found =
-    std::lower_bound(SchedModels.begin(), SchedModels.end(), CPU);
-  if (Found == SchedModels.end() || StringRef(Found->Key) != CPU) {
+  const SubtargetSubTypeKV *CPUEntry = Find(CPU, ProcDesc);
+
+  if (!CPUEntry) {
     if (CPU != "help") // Don't error if the user asked for help.
       errs() << "'" << CPU
              << "' is not a recognized processor for this target"
              << " (ignoring processor)\n";
     return MCSchedModel::GetDefaultSchedModel();
   }
-  assert(Found->Value && "Missing processor SchedModel value");
-  return *(const MCSchedModel *)Found->Value;
+  assert(CPUEntry->SchedModel && "Missing processor SchedModel value");
+  return *CPUEntry->SchedModel;
 }
 
 InstrItineraryData
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index 5502c658f565..67cab9a92722 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCSymbol.cpp - MCSymbol implementation ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCSymbolELF.cpp b/lib/MC/MCSymbolELF.cpp
index 12c724f6b1ee..a07c56c64f84 100644
--- a/lib/MC/MCSymbolELF.cpp
+++ b/lib/MC/MCSymbolELF.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCSymbolELF.cpp ---------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -66,7 +65,7 @@ void MCSymbolELF::setBinding(unsigned Binding) const {
 
 unsigned MCSymbolELF::getBinding() const {
   if (isBindingSet()) {
-    uint32_t Val = (getFlags() & (0x3 << ELF_STB_Shift)) >> ELF_STB_Shift;
+    uint32_t Val = (Flags >> ELF_STB_Shift) & 3;
     switch (Val) {
     default:
       llvm_unreachable("Invalid value");
@@ -126,7 +125,7 @@ void MCSymbolELF::setType(unsigned Type) const {
 }
 
 unsigned MCSymbolELF::getType() const {
-  uint32_t Val = (getFlags() & (0x7 << ELF_STT_Shift)) >> ELF_STT_Shift;
+  uint32_t Val = (Flags >> ELF_STT_Shift) & 7;
   switch (Val) {
   default:
     llvm_unreachable("Invalid value");
@@ -156,9 +155,7 @@ void MCSymbolELF::setVisibility(unsigned Visibility) {
 }
 
 unsigned MCSymbolELF::getVisibility() const {
-  unsigned Visibility = (getFlags() & (0x3 << ELF_STV_Shift)) >> ELF_STV_Shift;
-  assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
-         Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
+  unsigned Visibility = (Flags >> ELF_STV_Shift) & 3;
   return Visibility;
 }
 
@@ -171,7 +168,7 @@ void MCSymbolELF::setOther(unsigned Other) {
 }
 
 unsigned MCSymbolELF::getOther() const {
-  unsigned Other = (getFlags() & (0x7 << ELF_STO_Shift)) >> ELF_STO_Shift;
+  unsigned Other = (Flags >> ELF_STO_Shift) & 7;
   return Other << 5;
 }
 
diff --git a/lib/MC/MCTargetOptions.cpp b/lib/MC/MCTargetOptions.cpp
index b85e53db5d61..96bb094134fe 100644
--- a/lib/MC/MCTargetOptions.cpp
+++ b/lib/MC/MCTargetOptions.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCTargetOptions.cpp - MC Target Options ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -13,12 +12,11 @@
 using namespace llvm;
 
 MCTargetOptions::MCTargetOptions()
-    : SanitizeAddress(false), MCRelaxAll(false), MCNoExecStack(false),
-      MCFatalWarnings(false), MCNoWarn(false), MCNoDeprecatedWarn(false),
-      MCSaveTempLabels(false), MCUseDwarfDirectory(false),
-      MCIncrementalLinkerCompatible(false), MCPIECopyRelocations(false),
-      ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false),
-      PreserveAsmComments(true) {}
+    : MCRelaxAll(false), MCNoExecStack(false), MCFatalWarnings(false),
+      MCNoWarn(false), MCNoDeprecatedWarn(false), MCSaveTempLabels(false),
+      MCUseDwarfDirectory(false), MCIncrementalLinkerCompatible(false),
+      MCPIECopyRelocations(false), ShowMCEncoding(false), ShowMCInst(false),
+      AsmVerbose(false), PreserveAsmComments(true) {}
 
 StringRef MCTargetOptions::getABIName() const {
   return ABIName;
diff --git a/lib/MC/MCValue.cpp b/lib/MC/MCValue.cpp
index 7e03913aa680..81da47b2eced 100644
--- a/lib/MC/MCValue.cpp
+++ b/lib/MC/MCValue.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCValue.cpp - MCValue implementation ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCWasmObjectTargetWriter.cpp b/lib/MC/MCWasmObjectTargetWriter.cpp
index 59082a160caf..e46257823e34 100644
--- a/lib/MC/MCWasmObjectTargetWriter.cpp
+++ b/lib/MC/MCWasmObjectTargetWriter.cpp
@@ -1,9 +1,8 @@
 //===-- MCWasmObjectTargetWriter.cpp - Wasm Target Writer Subclass --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -11,8 +10,8 @@
 
 using namespace llvm;
 
-MCWasmObjectTargetWriter::MCWasmObjectTargetWriter(bool Is64Bit_)
-    : Is64Bit(Is64Bit_) {}
+MCWasmObjectTargetWriter::MCWasmObjectTargetWriter(bool Is64Bit)
+    : Is64Bit(Is64Bit) {}
 
 // Pin the vtable to this object file
 MCWasmObjectTargetWriter::~MCWasmObjectTargetWriter() = default;
diff --git a/lib/MC/MCWasmStreamer.cpp b/lib/MC/MCWasmStreamer.cpp
index d2a152058b90..86fa72197855 100644
--- a/lib/MC/MCWasmStreamer.cpp
+++ b/lib/MC/MCWasmStreamer.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCWasmStreamer.cpp - Wasm Object Output ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -35,15 +34,15 @@
 
 using namespace llvm;
 
-MCWasmStreamer::~MCWasmStreamer() {}
+MCWasmStreamer::~MCWasmStreamer() = default; // anchor.
 
 void MCWasmStreamer::mergeFragment(MCDataFragment *DF, MCDataFragment *EF) {
   flushPendingLabels(DF, DF->getContents().size());
 
-  for (unsigned i = 0, e = EF->getFixups().size(); i != e; ++i) {
-    EF->getFixups()[i].setOffset(EF->getFixups()[i].getOffset() +
+  for (unsigned I = 0, E = EF->getFixups().size(); I != E; ++I) {
+    EF->getFixups()[I].setOffset(EF->getFixups()[I].getOffset() +
                                  DF->getContents().size());
-    DF->getFixups().push_back(EF->getFixups()[i]);
+    DF->getFixups().push_back(EF->getFixups()[I]);
   }
   if (DF->getSubtargetInfo() == nullptr && EF->getSubtargetInfo())
     DF->setHasInstructions(*EF->getSubtargetInfo());
@@ -119,6 +118,11 @@ bool MCWasmStreamer::EmitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
     break;
 
   case MCSA_ELF_TypeObject:
+  case MCSA_Cold:
+    break;
+
+  case MCSA_NoDeadStrip:
+    Symbol->setExported();
     break;
 
   default:
@@ -179,9 +183,9 @@ void MCWasmStreamer::EmitInstToData(const MCInst &Inst,
   MCDataFragment *DF = getOrCreateDataFragment();
 
   // Add the fixups and data.
-  for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
-    Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
-    DF->getFixups().push_back(Fixups[i]);
+  for (unsigned I = 0, E = Fixups.size(); I != E; ++I) {
+    Fixups[I].setOffset(Fixups[I].getOffset() + DF->getContents().size());
+    DF->getFixups().push_back(Fixups[I]);
   }
   DF->setHasInstructions(STI);
   DF->getContents().append(Code.begin(), Code.end());
diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp
index 0724b109e1a1..4e9a29667097 100644
--- a/lib/MC/MCWin64EH.cpp
+++ b/lib/MC/MCWin64EH.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCWin64EH.cpp - MCWin64EH implementation --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -256,8 +255,12 @@ static int64_t GetAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS,
       MCBinaryExpr::createSub(MCSymbolRefExpr::create(LHS, Context),
                               MCSymbolRefExpr::create(RHS, Context), Context);
   MCObjectStreamer *OS = (MCObjectStreamer *)(&Streamer);
+  // It should normally be possible to calculate the length of a function
+  // at this point, but it might not be possible in the presence of certain
+  // unusual constructs, like an inline asm with an alignment directive.
   int64_t value;
-  Diff->evaluateAsAbsolute(value, OS->getAssembler());
+  if (!Diff->evaluateAsAbsolute(value, OS->getAssembler()))
+    report_fatal_error("Failed to evaluate function length in SEH unwind info");
   return value;
 }
 
@@ -453,6 +456,38 @@ static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin,
   }
 }
 
+// Returns the epilog symbol of an epilog with the exact same unwind code
+// sequence, if it exists.  Otherwise, returns nulltpr.
+// EpilogInstrs - Unwind codes for the current epilog.
+// Epilogs - Epilogs that potentialy match the current epilog.
+static MCSymbol*
+FindMatchingEpilog(const std::vector<WinEH::Instruction>& EpilogInstrs,
+                   const std::vector<MCSymbol *>& Epilogs,
+                   const WinEH::FrameInfo *info) {
+  for (auto *EpilogStart : Epilogs) {
+    auto InstrsIter = info->EpilogMap.find(EpilogStart);
+    assert(InstrsIter != info->EpilogMap.end() &&
+           "Epilog not found in EpilogMap");
+    const auto &Instrs = InstrsIter->second;
+
+    if (Instrs.size() != EpilogInstrs.size())
+      continue;
+
+    bool Match = true;
+    for (unsigned i = 0; i < Instrs.size(); ++i)
+      if (Instrs[i].Operation != EpilogInstrs[i].Operation ||
+          Instrs[i].Offset != EpilogInstrs[i].Offset ||
+          Instrs[i].Register != EpilogInstrs[i].Register) {
+         Match = false;
+         break;
+      }
+
+    if (Match)
+      return EpilogStart;
+  }
+  return nullptr;
+}
+
 // Populate the .xdata section.  The format of .xdata on ARM64 is documented at
 // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
 static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
@@ -467,22 +502,71 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
   streamer.EmitLabel(Label);
   info->Symbol = Label;
 
-  uint32_t FuncLength = 0x0;
-  if (info->FuncletOrFuncEnd)
-    FuncLength = (uint32_t)GetAbsDifference(streamer, info->FuncletOrFuncEnd,
-                                            info->Begin);
-  FuncLength /= 4;
+  int64_t RawFuncLength;
+  if (!info->FuncletOrFuncEnd) {
+    // FIXME: This is very wrong; we emit SEH data which covers zero bytes
+    // of code. But otherwise test/MC/AArch64/seh.s crashes.
+    RawFuncLength = 0;
+  } else {
+    // FIXME: GetAbsDifference tries to compute the length of the function
+    // immediately, before the whole file is emitted, but in general
+    // that's impossible: the size in bytes of certain assembler directives
+    // like .align and .fill is not known until the whole file is parsed and
+    // relaxations are applied. Currently, GetAbsDifference fails with a fatal
+    // error in that case. (We mostly don't hit this because inline assembly
+    // specifying those directives is rare, and we don't normally try to
+    // align loops on AArch64.)
+    //
+    // There are two potential approaches to delaying the computation. One,
+    // we could emit something like ".word (endfunc-beginfunc)/4+0x10800000",
+    // as long as we have some conservative estimate we could use to prove
+    // that we don't need to split the unwind data. Emitting the constant
+    // is straightforward, but there's no existing code for estimating the
+    // size of the function.
+    //
+    // The other approach would be to use a dedicated, relaxable fragment,
+    // which could grow to accommodate splitting the unwind data if
+    // necessary. This is more straightforward, since it automatically works
+    // without any new infrastructure, and it's consistent with how we handle
+    // relaxation in other contexts.  But it would require some refactoring
+    // to move parts of the pdata/xdata emission into the implementation of
+    // a fragment. We could probably continue to encode the unwind codes
+    // here, but we'd have to emit the pdata, the xdata header, and the
+    // epilogue scopes later, since they depend on whether the we need to
+    // split the unwind data.
+    RawFuncLength = GetAbsDifference(streamer, info->FuncletOrFuncEnd,
+                                     info->Begin);
+  }
+  if (RawFuncLength > 0xFFFFF)
+    report_fatal_error("SEH unwind data splitting not yet implemented");
+  uint32_t FuncLength = (uint32_t)RawFuncLength / 4;
   uint32_t PrologCodeBytes = ARM64CountOfUnwindCodes(info->Instructions);
   uint32_t TotalCodeBytes = PrologCodeBytes;
 
   // Process epilogs.
   MapVector<MCSymbol *, uint32_t> EpilogInfo;
+  // Epilogs processed so far.
+  std::vector<MCSymbol *> AddedEpilogs;
+
   for (auto &I : info->EpilogMap) {
     MCSymbol *EpilogStart = I.first;
     auto &EpilogInstrs = I.second;
     uint32_t CodeBytes = ARM64CountOfUnwindCodes(EpilogInstrs);
-    EpilogInfo[EpilogStart] = TotalCodeBytes;
-    TotalCodeBytes += CodeBytes;
+
+    MCSymbol* MatchingEpilog =
+      FindMatchingEpilog(EpilogInstrs, AddedEpilogs, info);
+    if (MatchingEpilog) {
+      assert(EpilogInfo.find(MatchingEpilog) != EpilogInfo.end() &&
+             "Duplicate epilog not found");
+      EpilogInfo[EpilogStart] = EpilogInfo.lookup(MatchingEpilog);
+      // Clear the unwind codes in the EpilogMap, so that they don't get output
+      // in the logic below.
+      EpilogInstrs.clear();
+    } else {
+      EpilogInfo[EpilogStart] = TotalCodeBytes;
+      TotalCodeBytes += CodeBytes;
+      AddedEpilogs.push_back(EpilogStart);
+    }
   }
 
   // Code Words, Epilog count, E, X, Vers, Function Length
diff --git a/lib/MC/MCWinCOFFStreamer.cpp b/lib/MC/MCWinCOFFStreamer.cpp
index 7b1dc7abf708..04d5f100a2ff 100644
--- a/lib/MC/MCWinCOFFStreamer.cpp
+++ b/lib/MC/MCWinCOFFStreamer.cpp
@@ -1,9 +1,8 @@
 //===- llvm/MC/MCWinCOFFStreamer.cpp --------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -256,7 +255,7 @@ void MCWinCOFFStreamer::EmitCommonSymbol(MCSymbol *S, uint64_t Size,
   auto *Symbol = cast<MCSymbolCOFF>(S);
 
   const Triple &T = getContext().getObjectFileInfo()->getTargetTriple();
-  if (T.isKnownWindowsMSVCEnvironment()) {
+  if (T.isWindowsMSVCEnvironment()) {
     if (ByteAlignment > 32)
       report_fatal_error("alignment is limited to 32-bytes");
 
@@ -268,7 +267,7 @@ void MCWinCOFFStreamer::EmitCommonSymbol(MCSymbol *S, uint64_t Size,
   Symbol->setExternal(true);
   Symbol->setCommon(Size, ByteAlignment);
 
-  if (!T.isKnownWindowsMSVCEnvironment() && ByteAlignment > 1) {
+  if (!T.isWindowsMSVCEnvironment() && ByteAlignment > 1) {
     SmallString<128> Directive;
     raw_svector_ostream OS(Directive);
     const MCObjectFileInfo *MFI = getContext().getObjectFileInfo();
diff --git a/lib/MC/MCWinEH.cpp b/lib/MC/MCWinEH.cpp
index a5d0f5a2cb75..e58a0b2cf654 100644
--- a/lib/MC/MCWinEH.cpp
+++ b/lib/MC/MCWinEH.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MCWinEH.cpp - Windows EH implementation ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/MC/MCXCOFFObjectTargetWriter.cpp b/lib/MC/MCXCOFFObjectTargetWriter.cpp
new file mode 100644
index 000000000000..504e333cb2d4
--- /dev/null
+++ b/lib/MC/MCXCOFFObjectTargetWriter.cpp
@@ -0,0 +1,16 @@
+//===- MCXCOFFObjectTargetWriter.cpp - XCOFF Target Writer Subclass -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCXCOFFObjectWriter.h"
+
+using namespace llvm;
+
+MCXCOFFObjectTargetWriter::MCXCOFFObjectTargetWriter(bool Is64Bit)
+    : Is64Bit(Is64Bit) {}
+
+MCXCOFFObjectTargetWriter::~MCXCOFFObjectTargetWriter() = default;
diff --git a/lib/MC/MCXCOFFStreamer.cpp b/lib/MC/MCXCOFFStreamer.cpp
new file mode 100644
index 000000000000..071de024a3fa
--- /dev/null
+++ b/lib/MC/MCXCOFFStreamer.cpp
@@ -0,0 +1,59 @@
+//===- lib/MC/MCXCOFFStreamer.cpp - XCOFF Object Output -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits XCOFF .o object files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCXCOFFStreamer.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+MCXCOFFStreamer::MCXCOFFStreamer(MCContext &Context,
+                                 std::unique_ptr<MCAsmBackend> MAB,
+                                 std::unique_ptr<MCObjectWriter> OW,
+                                 std::unique_ptr<MCCodeEmitter> Emitter)
+    : MCObjectStreamer(Context, std::move(MAB), std::move(OW),
+                       std::move(Emitter)) {}
+
+bool MCXCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+                                          MCSymbolAttr Attribute) {
+  report_fatal_error("Symbol attributes not implemented for XCOFF.");
+}
+
+void MCXCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                       unsigned ByteAlignment) {
+  report_fatal_error("Emiting common symbols not implemented for XCOFF.");
+}
+
+void MCXCOFFStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
+                                   uint64_t Size, unsigned ByteAlignment,
+                                   SMLoc Loc) {
+  report_fatal_error("Zero fill not implemented for XCOFF.");
+}
+
+void MCXCOFFStreamer::EmitInstToData(const MCInst &Inst,
+                                     const MCSubtargetInfo &) {
+  report_fatal_error("Instruction emission not implemented for XCOFF.");
+}
+
+MCStreamer *llvm::createXCOFFStreamer(MCContext &Context,
+                                      std::unique_ptr<MCAsmBackend> &&MAB,
+                                      std::unique_ptr<MCObjectWriter> &&OW,
+                                      std::unique_ptr<MCCodeEmitter> &&CE,
+                                      bool RelaxAll) {
+  MCXCOFFStreamer *S = new MCXCOFFStreamer(Context, std::move(MAB),
+                                           std::move(OW), std::move(CE));
+  if (RelaxAll)
+    S->getAssembler().setRelaxAll(true);
+  return S;
+}
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index 2fa65658ccfa..f0ceb86b25af 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -14,6 +13,7 @@
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixupKindInfo.h"
@@ -449,11 +449,25 @@ void MachObjectWriter::writeLinkerOptionsLoadCommand(
   assert(W.OS.tell() - Start == Size);
 }
 
+static bool isFixupTargetValid(const MCValue &Target) {
+  // Target is (LHS - RHS + cst).
+  // We don't support the form where LHS is null: -RHS + cst
+  if (!Target.getSymA() && Target.getSymB())
+    return false;
+  return true;
+}
+
 void MachObjectWriter::recordRelocation(MCAssembler &Asm,
                                         const MCAsmLayout &Layout,
                                         const MCFragment *Fragment,
                                         const MCFixup &Fixup, MCValue Target,
                                         uint64_t &FixedValue) {
+  if (!isFixupTargetValid(Target)) {
+    Asm.getContext().reportError(Fixup.getLoc(),
+                                 "unsupported relocation expression");
+    return;
+  }
+
   TargetObjectWriter->recordRelocation(this, Asm, Layout, Fragment, Fixup,
                                        Target, FixedValue);
 }
diff --git a/lib/MC/StringTableBuilder.cpp b/lib/MC/StringTableBuilder.cpp
index de40a7728d3f..cb3db8e2268c 100644
--- a/lib/MC/StringTableBuilder.cpp
+++ b/lib/MC/StringTableBuilder.cpp
@@ -1,9 +1,8 @@
 //===- StringTableBuilder.cpp - String table building utility -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -160,6 +159,13 @@ void StringTableBuilder::finalizeStringTable(bool Optimize) {
 
   if (K == MachO)
     Size = alignTo(Size, 4); // Pad to multiple of 4.
+
+  // The first byte in an ELF string table must be null, according to the ELF
+  // specification. In 'initSize()' we reserved the first byte to hold null for
+  // this purpose and here we actually add the string to allow 'getOffset()' to
+  // be called on an empty string.
+  if (K == ELF)
+    StringIndexMap[CachedHashStringRef("")] = 0;
 }
 
 void StringTableBuilder::clear() {
diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp
index b69af24b531e..c4dd77359b24 100644
--- a/lib/MC/SubtargetFeature.cpp
+++ b/lib/MC/SubtargetFeature.cpp
@@ -1,9 +1,8 @@
 //===- SubtargetFeature.cpp - CPU characteristics Implementation ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/SubtargetFeature.h"
-#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
@@ -20,7 +18,6 @@
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
@@ -32,31 +29,8 @@
 
 using namespace llvm;
 
-/// Determine if a feature has a flag; '+' or '-'
-static inline bool hasFlag(StringRef Feature) {
-  assert(!Feature.empty() && "Empty string");
-  // Get first character
-  char Ch = Feature[0];
-  // Check if first character is '+' or '-' flag
-  return Ch == '+' || Ch =='-';
-}
-
-/// Return string stripped of flag.
-static inline std::string StripFlag(StringRef Feature) {
-  return hasFlag(Feature) ? Feature.substr(1) : Feature;
-}
-
-/// Return true if enable flag; '+'.
-static inline bool isEnabled(StringRef Feature) {
-  assert(!Feature.empty() && "Empty string");
-  // Get first character
-  char Ch = Feature[0];
-  // Check if first character is '+' for enabled
-  return Ch == '+';
-}
-
 /// Splits a string of comma separated items in to a vector of strings.
-static void Split(std::vector<std::string> &V, StringRef S) {
+void SubtargetFeatures::Split(std::vector<std::string> &V, StringRef S) {
   SmallVector<StringRef, 3> Tmp;
   S.split(Tmp, ',', -1, false /* KeepEmpty */);
   V.assign(Tmp.begin(), Tmp.end());
@@ -70,48 +44,6 @@ void SubtargetFeatures::AddFeature(StringRef String, bool Enable) {
                                        : (Enable ? "+" : "-") + String.lower());
 }
 
-/// Find KV in array using binary search.
-static const SubtargetFeatureKV *Find(StringRef S,
-                                      ArrayRef<SubtargetFeatureKV> A) {
-  // Binary search the array
-  auto F = std::lower_bound(A.begin(), A.end(), S);
-  // If not found then return NULL
-  if (F == A.end() || StringRef(F->Key) != S) return nullptr;
-  // Return the found array item
-  return F;
-}
-
-/// Return the length of the longest entry in the table.
-static size_t getLongestEntryLength(ArrayRef<SubtargetFeatureKV> Table) {
-  size_t MaxLen = 0;
-  for (auto &I : Table)
-    MaxLen = std::max(MaxLen, std::strlen(I.Key));
-  return MaxLen;
-}
-
-/// Display help for feature choices.
-static void Help(ArrayRef<SubtargetFeatureKV> CPUTable,
-                 ArrayRef<SubtargetFeatureKV> FeatTable) {
-  // Determine the length of the longest CPU and Feature entries.
-  unsigned MaxCPULen  = getLongestEntryLength(CPUTable);
-  unsigned MaxFeatLen = getLongestEntryLength(FeatTable);
-
-  // Print the CPU table.
-  errs() << "Available CPUs for this target:\n\n";
-  for (auto &CPU : CPUTable)
-    errs() << format("  %-*s - %s.\n", MaxCPULen, CPU.Key, CPU.Desc);
-  errs() << '\n';
-
-  // Print the Feature table.
-  errs() << "Available features for this target:\n\n";
-  for (auto &Feature : FeatTable)
-    errs() << format("  %-*s - %s.\n", MaxFeatLen, Feature.Key, Feature.Desc);
-  errs() << '\n';
-
-  errs() << "Use +feature to enable a feature, or -feature to disable it.\n"
-            "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n";
-}
-
 SubtargetFeatures::SubtargetFeatures(StringRef Initial) {
   // Break up string into separate features
   Split(Features, Initial);
@@ -121,136 +53,6 @@ std::string SubtargetFeatures::getString() const {
   return join(Features.begin(), Features.end(), ",");
 }
 
-/// For each feature that is (transitively) implied by this feature, set it.
-static
-void SetImpliedBits(FeatureBitset &Bits, const SubtargetFeatureKV &FeatureEntry,
-                    ArrayRef<SubtargetFeatureKV> FeatureTable) {
-  for (const SubtargetFeatureKV &FE : FeatureTable) {
-    if (FeatureEntry.Value == FE.Value) continue;
-
-    if ((FeatureEntry.Implies & FE.Value).any()) {
-      Bits |= FE.Value;
-      SetImpliedBits(Bits, FE, FeatureTable);
-    }
-  }
-}
-
-/// For each feature that (transitively) implies this feature, clear it.
-static
-void ClearImpliedBits(FeatureBitset &Bits,
-                      const SubtargetFeatureKV &FeatureEntry,
-                      ArrayRef<SubtargetFeatureKV> FeatureTable) {
-  for (const SubtargetFeatureKV &FE : FeatureTable) {
-    if (FeatureEntry.Value == FE.Value) continue;
-
-    if ((FE.Implies & FeatureEntry.Value).any()) {
-      Bits &= ~FE.Value;
-      ClearImpliedBits(Bits, FE, FeatureTable);
-    }
-  }
-}
-
-void
-SubtargetFeatures::ToggleFeature(FeatureBitset &Bits, StringRef Feature,
-                                 ArrayRef<SubtargetFeatureKV> FeatureTable) {
-  // Find feature in table.
-  const SubtargetFeatureKV *FeatureEntry =
-      Find(StripFlag(Feature), FeatureTable);
-  // If there is a match
-  if (FeatureEntry) {
-    if ((Bits & FeatureEntry->Value) == FeatureEntry->Value) {
-      Bits &= ~FeatureEntry->Value;
-      // For each feature that implies this, clear it.
-      ClearImpliedBits(Bits, *FeatureEntry, FeatureTable);
-    } else {
-      Bits |=  FeatureEntry->Value;
-
-      // For each feature that this implies, set it.
-      SetImpliedBits(Bits, *FeatureEntry, FeatureTable);
-    }
-  } else {
-    errs() << "'" << Feature << "' is not a recognized feature for this target"
-           << " (ignoring feature)\n";
-  }
-}
-
-void SubtargetFeatures::ApplyFeatureFlag(FeatureBitset &Bits, StringRef Feature,
-                                    ArrayRef<SubtargetFeatureKV> FeatureTable) {
-  assert(hasFlag(Feature));
-
-  // Find feature in table.
-  const SubtargetFeatureKV *FeatureEntry =
-      Find(StripFlag(Feature), FeatureTable);
-  // If there is a match
-  if (FeatureEntry) {
-    // Enable/disable feature in bits
-    if (isEnabled(Feature)) {
-      Bits |= FeatureEntry->Value;
-
-      // For each feature that this implies, set it.
-      SetImpliedBits(Bits, *FeatureEntry, FeatureTable);
-    } else {
-      Bits &= ~FeatureEntry->Value;
-
-      // For each feature that implies this, clear it.
-      ClearImpliedBits(Bits, *FeatureEntry, FeatureTable);
-    }
-  } else {
-    errs() << "'" << Feature << "' is not a recognized feature for this target"
-           << " (ignoring feature)\n";
-  }
-}
-
-FeatureBitset
-SubtargetFeatures::getFeatureBits(StringRef CPU,
-                                  ArrayRef<SubtargetFeatureKV> CPUTable,
-                                  ArrayRef<SubtargetFeatureKV> FeatureTable) {
-  if (CPUTable.empty() || FeatureTable.empty())
-    return FeatureBitset();
-
-  assert(std::is_sorted(std::begin(CPUTable), std::end(CPUTable)) &&
-         "CPU table is not sorted");
-  assert(std::is_sorted(std::begin(FeatureTable), std::end(FeatureTable)) &&
-         "CPU features table is not sorted");
-  // Resulting bits
-  FeatureBitset Bits;
-
-  // Check if help is needed
-  if (CPU == "help")
-    Help(CPUTable, FeatureTable);
-
-  // Find CPU entry if CPU name is specified.
-  else if (!CPU.empty()) {
-    const SubtargetFeatureKV *CPUEntry = Find(CPU, CPUTable);
-
-    // If there is a match
-    if (CPUEntry) {
-      // Set base feature bits
-      Bits = CPUEntry->Value;
-
-      // Set the feature implied by this CPU feature, if any.
-      for (auto &FE : FeatureTable) {
-        if ((CPUEntry->Value & FE.Value).any())
-          SetImpliedBits(Bits, FE, FeatureTable);
-      }
-    } else {
-      errs() << "'" << CPU << "' is not a recognized processor for this target"
-             << " (ignoring processor)\n";
-    }
-  }
-
-  // Iterate through each feature
-  for (const std::string &Feature : Features) {
-    // Check for help
-    if (Feature == "+help")
-      Help(CPUTable, FeatureTable);
-
-    ApplyFeatureFlag(Bits, Feature, FeatureTable);
-  }
-
-  return Bits;
-}
-
 void SubtargetFeatures::print(raw_ostream &OS) const {
   for (auto &F : Features)
     OS << F << " ";
diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp
index 0cca3757be90..098343cd0107 100644
--- a/lib/MC/WasmObjectWriter.cpp
+++ b/lib/MC/WasmObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/WasmObjectWriter.cpp - Wasm File Writer ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -41,7 +40,7 @@ namespace {
 
 // Went we ceate the indirect function table we start at 1, so that there is
 // and emtpy slot at 0 and therefore calling a null function pointer will trap.
-static const uint32_t kInitialTableOffset = 1;
+static const uint32_t InitialTableOffset = 1;
 
 // For patching purposes, we need to remember where each section starts, both
 // for patching up the section size field, and for patching up references to
@@ -61,7 +60,7 @@ struct SectionBookkeeping {
 // TODO: Consider using wasm::WasmSignature directly instead.
 struct WasmSignature {
   // Support empty and tombstone instances, needed by DenseMap.
-  enum { Plain, Empty, Tombstone } State;
+  enum { Plain, Empty, Tombstone } State = Plain;
 
   // The return types of the function.
   SmallVector<wasm::ValType, 1> Returns;
@@ -69,8 +68,6 @@ struct WasmSignature {
   // The parameter types of the function.
   SmallVector<wasm::ValType, 4> Params;
 
-  WasmSignature() : State(Plain) {}
-
   bool operator==(const WasmSignature &Other) const {
     return State == Other.State && Returns == Other.Returns &&
            Params == Other.Params;
@@ -109,9 +106,10 @@ struct WasmSignatureDenseMapInfo {
 struct WasmDataSegment {
   MCSectionWasm *Section;
   StringRef Name;
+  uint32_t InitFlags;
   uint32_t Offset;
   uint32_t Alignment;
-  uint32_t Flags;
+  uint32_t LinkerFlags;
   SmallVector<char, 4> Data;
 };
 
@@ -149,18 +147,7 @@ struct WasmRelocationEntry {
       : Offset(Offset), Symbol(Symbol), Addend(Addend), Type(Type),
         FixupSection(FixupSection) {}
 
-  bool hasAddend() const {
-    switch (Type) {
-    case wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB:
-    case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
-    case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32:
-    case wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
-    case wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32:
-      return true;
-    default:
-      return false;
-    }
-  }
+  bool hasAddend() const { return wasm::relocTypeHasAddend(Type); }
 
   void print(raw_ostream &Out) const {
     Out << wasm::relocTypetoString(Type) << " Off=" << Offset
@@ -173,7 +160,7 @@ struct WasmRelocationEntry {
 #endif
 };
 
-static const uint32_t INVALID_INDEX = -1;
+static const uint32_t InvalidIndex = -1;
 
 struct WasmCustomSection {
 
@@ -185,7 +172,7 @@ struct WasmCustomSection {
 
   WasmCustomSection(StringRef Name, MCSectionWasm *Section)
       : Name(Name), Section(Section), OutputContentsOffset(0),
-        OutputIndex(INVALID_INDEX) {}
+        OutputIndex(InvalidIndex) {}
 };
 
 #if !defined(NDEBUG)
@@ -195,6 +182,33 @@ raw_ostream &operator<<(raw_ostream &OS, const WasmRelocationEntry &Rel) {
 }
 #endif
 
+// Write X as an (unsigned) LEB value at offset Offset in Stream, padded
+// to allow patching.
+static void writePatchableLEB(raw_pwrite_stream &Stream, uint32_t X,
+                              uint64_t Offset) {
+  uint8_t Buffer[5];
+  unsigned SizeLen = encodeULEB128(X, Buffer, 5);
+  assert(SizeLen == 5);
+  Stream.pwrite((char *)Buffer, SizeLen, Offset);
+}
+
+// Write X as an signed LEB value at offset Offset in Stream, padded
+// to allow patching.
+static void writePatchableSLEB(raw_pwrite_stream &Stream, int32_t X,
+                               uint64_t Offset) {
+  uint8_t Buffer[5];
+  unsigned SizeLen = encodeSLEB128(X, Buffer, 5);
+  assert(SizeLen == 5);
+  Stream.pwrite((char *)Buffer, SizeLen, Offset);
+}
+
+// Write X as a plain integer value at offset Offset in Stream.
+static void writeI32(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) {
+  uint8_t Buffer[4];
+  support::endian::write32le(Buffer, X);
+  Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset);
+}
+
 class WasmObjectWriter : public MCObjectWriter {
   support::endian::Writer W;
 
@@ -218,12 +232,15 @@ class WasmObjectWriter : public MCObjectWriter {
   // Maps function/global symbols to the function/global/event/section index
   // space.
   DenseMap<const MCSymbolWasm *, uint32_t> WasmIndices;
+  DenseMap<const MCSymbolWasm *, uint32_t> GOTIndices;
   // Maps data symbols to the Wasm segment and offset/size with the segment.
   DenseMap<const MCSymbolWasm *, wasm::WasmDataReference> DataLocations;
 
   // Stores output data (index, relocations, content offset) for custom
   // section.
   std::vector<WasmCustomSection> CustomSections;
+  std::unique_ptr<WasmCustomSection> ProducersSection;
+  std::unique_ptr<WasmCustomSection> TargetFeaturesSection;
   // Relocations for fixing up references in the custom sections.
   DenseMap<const MCSectionWasm *, std::vector<WasmRelocationEntry>>
       CustomSectionsRelocations;
@@ -233,7 +250,6 @@ class WasmObjectWriter : public MCObjectWriter {
 
   DenseMap<WasmSignature, uint32_t, WasmSignatureDenseMapInfo> SignatureIndices;
   SmallVector<WasmSignature, 4> Signatures;
-  SmallVector<WasmGlobal, 4> Globals;
   SmallVector<WasmDataSegment, 4> DataSegments;
   unsigned NumFunctionImports = 0;
   unsigned NumGlobalImports = 0;
@@ -242,9 +258,6 @@ class WasmObjectWriter : public MCObjectWriter {
 
   // TargetObjectWriter wrappers.
   bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
-  unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup) const {
-    return TargetObjectWriter->getRelocType(Target, Fixup);
-  }
 
   void startSection(SectionBookkeeping &Section, unsigned SectionId);
   void startCustomSection(SectionBookkeeping &Section, StringRef Name);
@@ -255,20 +268,21 @@ public:
                    raw_pwrite_stream &OS)
       : W(OS, support::little), TargetObjectWriter(std::move(MOTW)) {}
 
-  ~WasmObjectWriter() override;
-
 private:
   void reset() override {
     CodeRelocations.clear();
     DataRelocations.clear();
     TypeIndices.clear();
     WasmIndices.clear();
+    GOTIndices.clear();
     TableIndices.clear();
     DataLocations.clear();
+    CustomSections.clear();
+    ProducersSection.reset();
+    TargetFeaturesSection.reset();
     CustomSectionsRelocations.clear();
     SignatureIndices.clear();
     Signatures.clear();
-    Globals.clear();
     DataSegments.clear();
     SectionFunctions.clear();
     NumFunctionImports = 0;
@@ -298,9 +312,9 @@ private:
   void writeImportSection(ArrayRef<wasm::WasmImport> Imports, uint32_t DataSize,
                           uint32_t NumElements);
   void writeFunctionSection(ArrayRef<WasmFunction> Functions);
-  void writeGlobalSection();
   void writeExportSection(ArrayRef<wasm::WasmExport> Exports);
   void writeElemSection(ArrayRef<uint32_t> TableElems);
+  void writeDataCountSection();
   void writeCodeSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
                         ArrayRef<WasmFunction> Functions);
   void writeDataSection();
@@ -311,7 +325,8 @@ private:
       ArrayRef<wasm::WasmSymbolInfo> SymbolInfos,
       ArrayRef<std::pair<uint16_t, uint32_t>> InitFuncs,
       const std::map<StringRef, std::vector<WasmComdatEntry>> &Comdats);
-  void writeCustomSections(const MCAssembler &Asm, const MCAsmLayout &Layout);
+  void writeCustomSection(WasmCustomSection &CustomSection,
+                          const MCAssembler &Asm, const MCAsmLayout &Layout);
   void writeCustomRelocSections();
   void
   updateCustomSectionRelocations(const SmallVector<WasmFunction, 4> &Functions,
@@ -330,8 +345,6 @@ private:
 
 } // end anonymous namespace
 
-WasmObjectWriter::~WasmObjectWriter() {}
-
 // Write out a section header and a patchable section size field.
 void WasmObjectWriter::startSection(SectionBookkeeping &Section,
                                     unsigned SectionId) {
@@ -342,7 +355,7 @@ void WasmObjectWriter::startSection(SectionBookkeeping &Section,
 
   // The section size. We don't know the size yet, so reserve enough space
   // for any 32-bit value; we'll patch it later.
-  encodeULEB128(UINT32_MAX, W.OS);
+  encodeULEB128(0, W.OS, 5);
 
   // The position where the section starts, for measuring its size.
   Section.ContentsOffset = W.OS.tell();
@@ -368,7 +381,13 @@ void WasmObjectWriter::startCustomSection(SectionBookkeeping &Section,
 // Now that the section is complete and we know how big it is, patch up the
 // section size field at the start of the section.
 void WasmObjectWriter::endSection(SectionBookkeeping &Section) {
-  uint64_t Size = W.OS.tell() - Section.PayloadOffset;
+  uint64_t Size = W.OS.tell();
+  // /dev/null doesn't support seek/tell and can report offset of 0.
+  // Simply skip this patching in that case.
+  if (!Size)
+    return;
+
+  Size -= Section.PayloadOffset;
   if (uint32_t(Size) != Size)
     report_fatal_error("section size does not fit in a uint32_t");
 
@@ -376,11 +395,8 @@ void WasmObjectWriter::endSection(SectionBookkeeping &Section) {
 
   // Write the final section size to the payload_len field, which follows
   // the section id byte.
-  uint8_t Buffer[16];
-  unsigned SizeLen = encodeULEB128(Size, Buffer, 5);
-  assert(SizeLen == 5);
-  static_cast<raw_pwrite_stream &>(W.OS).pwrite((char *)Buffer, SizeLen,
-                                                Section.SizeOffset);
+  writePatchableLEB(static_cast<raw_pwrite_stream &>(W.OS), Size,
+                    Section.SizeOffset);
 }
 
 // Emit the Wasm header.
@@ -479,15 +495,15 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
   // be negative and don't wrap.
   FixedValue = 0;
 
-  unsigned Type = getRelocType(Target, Fixup);
+  unsigned Type = TargetObjectWriter->getRelocType(Target, Fixup);
   assert(!IsPCRel);
   assert(SymA);
 
   // Absolute offset within a section or a function.
   // Currently only supported for for metadata sections.
   // See: test/MC/WebAssembly/blockaddress.ll
-  if (Type == wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32 ||
-      Type == wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32) {
+  if (Type == wasm::R_WASM_FUNCTION_OFFSET_I32 ||
+      Type == wasm::R_WASM_SECTION_OFFSET_I32) {
     if (!FixupSection.getKind().isMetadata())
       report_fatal_error("relocations for function or section offsets are "
                          "only supported in metadata sections");
@@ -505,9 +521,9 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
     SymA = cast<MCSymbolWasm>(SectionSymbol);
   }
 
-  // Relocation other than R_WEBASSEMBLY_TYPE_INDEX_LEB are required to be
+  // Relocation other than R_WASM_TYPE_INDEX_LEB are required to be
   // against a named symbol.
-  if (Type != wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB) {
+  if (Type != wasm::R_WASM_TYPE_INDEX_LEB) {
     if (SymA->getName().empty())
       report_fatal_error("relocations against un-named temporaries are not yet "
                          "supported by wasm");
@@ -515,6 +531,9 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
     SymA->setUsedInReloc();
   }
 
+  if (RefA->getKind() == MCSymbolRefExpr::VK_GOT)
+    SymA->setUsedInGOT();
+
   WasmRelocationEntry Rec(FixupOffset, SymA, C, Type, &FixupSection);
   LLVM_DEBUG(dbgs() << "WasmReloc: " << Rec << "\n");
 
@@ -529,40 +548,14 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
   }
 }
 
-// Write X as an (unsigned) LEB value at offset Offset in Stream, padded
-// to allow patching.
-static void WritePatchableLEB(raw_pwrite_stream &Stream, uint32_t X,
-                              uint64_t Offset) {
-  uint8_t Buffer[5];
-  unsigned SizeLen = encodeULEB128(X, Buffer, 5);
-  assert(SizeLen == 5);
-  Stream.pwrite((char *)Buffer, SizeLen, Offset);
-}
-
-// Write X as an signed LEB value at offset Offset in Stream, padded
-// to allow patching.
-static void WritePatchableSLEB(raw_pwrite_stream &Stream, int32_t X,
-                               uint64_t Offset) {
-  uint8_t Buffer[5];
-  unsigned SizeLen = encodeSLEB128(X, Buffer, 5);
-  assert(SizeLen == 5);
-  Stream.pwrite((char *)Buffer, SizeLen, Offset);
-}
-
-// Write X as a plain integer value at offset Offset in Stream.
-static void WriteI32(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) {
-  uint8_t Buffer[4];
-  support::endian::write32le(Buffer, X);
-  Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset);
-}
-
-static const MCSymbolWasm *ResolveSymbol(const MCSymbolWasm &Symbol) {
-  if (Symbol.isVariable()) {
-    const MCExpr *Expr = Symbol.getVariableValue();
+static const MCSymbolWasm *resolveSymbol(const MCSymbolWasm &Symbol) {
+  const MCSymbolWasm* Ret = &Symbol;
+  while (Ret->isVariable()) {
+    const MCExpr *Expr = Ret->getVariableValue();
     auto *Inner = cast<MCSymbolRefExpr>(Expr);
-    return cast<MCSymbolWasm>(&Inner->getSymbol());
+    Ret = cast<MCSymbolWasm>(&Inner->getSymbol());
   }
-  return &Symbol;
+  return Ret;
 }
 
 // Compute a value to write into the code at the location covered
@@ -571,36 +564,41 @@ static const MCSymbolWasm *ResolveSymbol(const MCSymbolWasm &Symbol) {
 // useable.
 uint32_t
 WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry) {
+  if (RelEntry.Type == wasm::R_WASM_GLOBAL_INDEX_LEB && !RelEntry.Symbol->isGlobal()) {
+    assert(GOTIndices.count(RelEntry.Symbol) > 0 && "symbol not found in GOT index space");
+    return GOTIndices[RelEntry.Symbol];
+  }
+
   switch (RelEntry.Type) {
-  case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
-  case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: {
+  case wasm::R_WASM_TABLE_INDEX_REL_SLEB:
+  case wasm::R_WASM_TABLE_INDEX_SLEB:
+  case wasm::R_WASM_TABLE_INDEX_I32: {
     // Provisional value is table address of the resolved symbol itself
-    const MCSymbolWasm *Sym = ResolveSymbol(*RelEntry.Symbol);
+    const MCSymbolWasm *Sym = resolveSymbol(*RelEntry.Symbol);
     assert(Sym->isFunction());
     return TableIndices[Sym];
   }
-  case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB:
+  case wasm::R_WASM_TYPE_INDEX_LEB:
     // Provisional value is same as the index
     return getRelocationIndexValue(RelEntry);
-  case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
-  case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
-  case wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB:
+  case wasm::R_WASM_FUNCTION_INDEX_LEB:
+  case wasm::R_WASM_GLOBAL_INDEX_LEB:
+  case wasm::R_WASM_EVENT_INDEX_LEB:
     // Provisional value is function/global/event Wasm index
-    if (!WasmIndices.count(RelEntry.Symbol))
-      report_fatal_error("symbol not found in wasm index space: " +
-                         RelEntry.Symbol->getName());
+    assert(WasmIndices.count(RelEntry.Symbol) > 0 && "symbol not found in wasm index space");
     return WasmIndices[RelEntry.Symbol];
-  case wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
-  case wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32: {
+  case wasm::R_WASM_FUNCTION_OFFSET_I32:
+  case wasm::R_WASM_SECTION_OFFSET_I32: {
     const auto &Section =
         static_cast<const MCSectionWasm &>(RelEntry.Symbol->getSection());
     return Section.getSectionOffset() + RelEntry.Addend;
   }
-  case wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB:
-  case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32:
-  case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB: {
+  case wasm::R_WASM_MEMORY_ADDR_LEB:
+  case wasm::R_WASM_MEMORY_ADDR_I32:
+  case wasm::R_WASM_MEMORY_ADDR_REL_SLEB:
+  case wasm::R_WASM_MEMORY_ADDR_SLEB: {
     // Provisional value is address of the global
-    const MCSymbolWasm *Sym = ResolveSymbol(*RelEntry.Symbol);
+    const MCSymbolWasm *Sym = resolveSymbol(*RelEntry.Symbol);
     // For undefined symbols, use zero
     if (!Sym->isDefined())
       return 0;
@@ -654,7 +652,7 @@ static void addData(SmallVectorImpl<char> &DataBytes,
 
 uint32_t
 WasmObjectWriter::getRelocationIndexValue(const WasmRelocationEntry &RelEntry) {
-  if (RelEntry.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB) {
+  if (RelEntry.Type == wasm::R_WASM_TYPE_INDEX_LEB) {
     if (!TypeIndices.count(RelEntry.Symbol))
       report_fatal_error("symbol not found in type index space: " +
                          RelEntry.Symbol->getName());
@@ -678,22 +676,24 @@ void WasmObjectWriter::applyRelocations(
     uint32_t Value = getProvisionalValue(RelEntry);
 
     switch (RelEntry.Type) {
-    case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
-    case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB:
-    case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
-    case wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB:
-    case wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB:
-      WritePatchableLEB(Stream, Value, Offset);
+    case wasm::R_WASM_FUNCTION_INDEX_LEB:
+    case wasm::R_WASM_TYPE_INDEX_LEB:
+    case wasm::R_WASM_GLOBAL_INDEX_LEB:
+    case wasm::R_WASM_MEMORY_ADDR_LEB:
+    case wasm::R_WASM_EVENT_INDEX_LEB:
+      writePatchableLEB(Stream, Value, Offset);
       break;
-    case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
-    case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32:
-    case wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
-    case wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32:
-      WriteI32(Stream, Value, Offset);
+    case wasm::R_WASM_TABLE_INDEX_I32:
+    case wasm::R_WASM_MEMORY_ADDR_I32:
+    case wasm::R_WASM_FUNCTION_OFFSET_I32:
+    case wasm::R_WASM_SECTION_OFFSET_I32:
+      writeI32(Stream, Value, Offset);
       break;
-    case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
-    case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
-      WritePatchableSLEB(Stream, Value, Offset);
+    case wasm::R_WASM_TABLE_INDEX_SLEB:
+    case wasm::R_WASM_TABLE_INDEX_REL_SLEB:
+    case wasm::R_WASM_MEMORY_ADDR_SLEB:
+    case wasm::R_WASM_MEMORY_ADDR_REL_SLEB:
+      writePatchableSLEB(Stream, Value, Offset);
       break;
     default:
       llvm_unreachable("invalid relocation type");
@@ -783,26 +783,6 @@ void WasmObjectWriter::writeFunctionSection(ArrayRef<WasmFunction> Functions) {
   endSection(Section);
 }
 
-void WasmObjectWriter::writeGlobalSection() {
-  if (Globals.empty())
-    return;
-
-  SectionBookkeeping Section;
-  startSection(Section, wasm::WASM_SEC_GLOBAL);
-
-  encodeULEB128(Globals.size(), W.OS);
-  for (const WasmGlobal &Global : Globals) {
-    writeValueType(static_cast<wasm::ValType>(Global.Type.Type));
-    W.OS << char(Global.Type.Mutable);
-
-    W.OS << char(wasm::WASM_OPCODE_I32_CONST);
-    encodeSLEB128(Global.InitialValue, W.OS);
-    W.OS << char(wasm::WASM_OPCODE_END);
-  }
-
-  endSection(Section);
-}
-
 void WasmObjectWriter::writeEventSection(ArrayRef<wasm::WasmEventType> Events) {
   if (Events.empty())
     return;
@@ -848,7 +828,7 @@ void WasmObjectWriter::writeElemSection(ArrayRef<uint32_t> TableElems) {
 
   // init expr for starting offset
   W.OS << char(wasm::WASM_OPCODE_I32_CONST);
-  encodeSLEB128(kInitialTableOffset, W.OS);
+  encodeSLEB128(InitialTableOffset, W.OS);
   W.OS << char(wasm::WASM_OPCODE_END);
 
   encodeULEB128(TableElems.size(), W.OS);
@@ -858,6 +838,16 @@ void WasmObjectWriter::writeElemSection(ArrayRef<uint32_t> TableElems) {
   endSection(Section);
 }
 
+void WasmObjectWriter::writeDataCountSection() {
+  if (DataSegments.empty())
+    return;
+
+  SectionBookkeeping Section;
+  startSection(Section, wasm::WASM_SEC_DATACOUNT);
+  encodeULEB128(DataSegments.size(), W.OS);
+  endSection(Section);
+}
+
 void WasmObjectWriter::writeCodeSection(const MCAssembler &Asm,
                                         const MCAsmLayout &Layout,
                                         ArrayRef<WasmFunction> Functions) {
@@ -899,10 +889,14 @@ void WasmObjectWriter::writeDataSection() {
   encodeULEB128(DataSegments.size(), W.OS); // count
 
   for (const WasmDataSegment &Segment : DataSegments) {
-    encodeULEB128(0, W.OS); // memory index
-    W.OS << char(wasm::WASM_OPCODE_I32_CONST);
-    encodeSLEB128(Segment.Offset, W.OS); // offset
-    W.OS << char(wasm::WASM_OPCODE_END);
+    encodeULEB128(Segment.InitFlags, W.OS); // flags
+    if (Segment.InitFlags & wasm::WASM_SEGMENT_HAS_MEMINDEX)
+      encodeULEB128(0, W.OS); // memory index
+    if ((Segment.InitFlags & wasm::WASM_SEGMENT_IS_PASSIVE) == 0) {
+      W.OS << char(wasm::WASM_OPCODE_I32_CONST);
+      encodeSLEB128(Segment.Offset, W.OS); // offset
+      W.OS << char(wasm::WASM_OPCODE_END);
+    }
     encodeULEB128(Segment.Data.size(), W.OS); // size
     Segment.Section->setSectionOffset(W.OS.tell() - Section.ContentsOffset);
     W.OS << Segment.Data; // data
@@ -928,9 +922,8 @@ void WasmObjectWriter::writeRelocSection(
   // order, but for the code section we combine many MC sections into single
   // wasm section, and this order is determined by the order of Asm.Symbols()
   // not the sections order.
-  std::stable_sort(
-      Relocs.begin(), Relocs.end(),
-      [](const WasmRelocationEntry &A, const WasmRelocationEntry &B) {
+  llvm::stable_sort(
+      Relocs, [](const WasmRelocationEntry &A, const WasmRelocationEntry &B) {
         return (A.Offset + A.FixupSection->getSectionOffset()) <
                (B.Offset + B.FixupSection->getSectionOffset());
       });
@@ -982,7 +975,8 @@ void WasmObjectWriter::writeLinkingMetaDataSection(
       case wasm::WASM_SYMBOL_TYPE_GLOBAL:
       case wasm::WASM_SYMBOL_TYPE_EVENT:
         encodeULEB128(Sym.ElementIndex, W.OS);
-        if ((Sym.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0)
+        if ((Sym.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0 ||
+            (Sym.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0)
           writeString(Sym.Name);
         break;
       case wasm::WASM_SYMBOL_TYPE_DATA:
@@ -1012,7 +1006,7 @@ void WasmObjectWriter::writeLinkingMetaDataSection(
     for (const WasmDataSegment &Segment : DataSegments) {
       writeString(Segment.Name);
       encodeULEB128(Segment.Alignment, W.OS);
-      encodeULEB128(Segment.Flags, W.OS);
+      encodeULEB128(Segment.LinkerFlags, W.OS);
     }
     endSection(SubSection);
   }
@@ -1045,25 +1039,24 @@ void WasmObjectWriter::writeLinkingMetaDataSection(
   endSection(Section);
 }
 
-void WasmObjectWriter::writeCustomSections(const MCAssembler &Asm,
-                                           const MCAsmLayout &Layout) {
-  for (auto &CustomSection : CustomSections) {
-    SectionBookkeeping Section;
-    auto *Sec = CustomSection.Section;
-    startCustomSection(Section, CustomSection.Name);
+void WasmObjectWriter::writeCustomSection(WasmCustomSection &CustomSection,
+                                          const MCAssembler &Asm,
+                                          const MCAsmLayout &Layout) {
+  SectionBookkeeping Section;
+  auto *Sec = CustomSection.Section;
+  startCustomSection(Section, CustomSection.Name);
 
-    Sec->setSectionOffset(W.OS.tell() - Section.ContentsOffset);
-    Asm.writeSectionData(W.OS, Sec, Layout);
+  Sec->setSectionOffset(W.OS.tell() - Section.ContentsOffset);
+  Asm.writeSectionData(W.OS, Sec, Layout);
 
-    CustomSection.OutputContentsOffset = Section.ContentsOffset;
-    CustomSection.OutputIndex = Section.Index;
+  CustomSection.OutputContentsOffset = Section.ContentsOffset;
+  CustomSection.OutputIndex = Section.Index;
 
-    endSection(Section);
+  endSection(Section);
 
-    // Apply fixups.
-    auto &Relocations = CustomSectionsRelocations[CustomSection.Section];
-    applyRelocations(Relocations, CustomSection.OutputContentsOffset);
-  }
+  // Apply fixups.
+  auto &Relocations = CustomSectionsRelocations[CustomSection.Section];
+  applyRelocations(Relocations, CustomSection.OutputContentsOffset);
 }
 
 uint32_t WasmObjectWriter::getFunctionType(const MCSymbolWasm &Symbol) {
@@ -1082,7 +1075,7 @@ void WasmObjectWriter::registerFunctionType(const MCSymbolWasm &Symbol) {
   assert(Symbol.isFunction());
 
   WasmSignature S;
-  const MCSymbolWasm *ResolvedSym = ResolveSymbol(Symbol);
+  const MCSymbolWasm *ResolvedSym = resolveSymbol(Symbol);
   if (auto *Sig = ResolvedSym->getSignature()) {
     S.Returns = Sig->Returns;
     S.Params = Sig->Params;
@@ -1143,7 +1136,6 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
   uint64_t StartOffset = W.OS.tell();
 
   LLVM_DEBUG(dbgs() << "WasmObjectWriter::writeObject\n");
-  MCContext &Ctx = Asm.getContext();
 
   // Collect information from the available symbols.
   SmallVector<WasmFunction, 4> Functions;
@@ -1159,22 +1151,18 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
   // For now, always emit the memory import, since loads and stores are not
   // valid without it. In the future, we could perhaps be more clever and omit
   // it if there are no loads or stores.
-  MCSymbolWasm *MemorySym =
-      cast<MCSymbolWasm>(Ctx.getOrCreateSymbol("__linear_memory"));
   wasm::WasmImport MemImport;
-  MemImport.Module = MemorySym->getModuleName();
-  MemImport.Field = MemorySym->getName();
+  MemImport.Module = "env";
+  MemImport.Field = "__linear_memory";
   MemImport.Kind = wasm::WASM_EXTERNAL_MEMORY;
   Imports.push_back(MemImport);
 
   // For now, always emit the table section, since indirect calls are not
   // valid without it. In the future, we could perhaps be more clever and omit
   // it if there are no indirect calls.
-  MCSymbolWasm *TableSym =
-      cast<MCSymbolWasm>(Ctx.getOrCreateSymbol("__indirect_function_table"));
   wasm::WasmImport TableImport;
-  TableImport.Module = TableSym->getModuleName();
-  TableImport.Field = TableSym->getName();
+  TableImport.Module = "env";
+  TableImport.Field = "__indirect_function_table";
   TableImport.Kind = wasm::WASM_EXTERNAL_TABLE;
   TableImport.Table.ElemType = wasm::WASM_TYPE_FUNCREF;
   Imports.push_back(TableImport);
@@ -1200,39 +1188,60 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
     if (!WS.isDefined() && !WS.isComdat()) {
       if (WS.isFunction()) {
         wasm::WasmImport Import;
-        Import.Module = WS.getModuleName();
-        Import.Field = WS.getName();
+        Import.Module = WS.getImportModule();
+        Import.Field = WS.getImportName();
         Import.Kind = wasm::WASM_EXTERNAL_FUNCTION;
         Import.SigIndex = getFunctionType(WS);
         Imports.push_back(Import);
+        assert(WasmIndices.count(&WS) == 0);
         WasmIndices[&WS] = NumFunctionImports++;
       } else if (WS.isGlobal()) {
         if (WS.isWeak())
           report_fatal_error("undefined global symbol cannot be weak");
 
         wasm::WasmImport Import;
-        Import.Module = WS.getModuleName();
-        Import.Field = WS.getName();
+        Import.Field = WS.getImportName();
         Import.Kind = wasm::WASM_EXTERNAL_GLOBAL;
+        Import.Module = WS.getImportModule();
         Import.Global = WS.getGlobalType();
         Imports.push_back(Import);
+        assert(WasmIndices.count(&WS) == 0);
         WasmIndices[&WS] = NumGlobalImports++;
       } else if (WS.isEvent()) {
         if (WS.isWeak())
           report_fatal_error("undefined event symbol cannot be weak");
 
         wasm::WasmImport Import;
-        Import.Module = WS.getModuleName();
-        Import.Field = WS.getName();
+        Import.Module = WS.getImportModule();
+        Import.Field = WS.getImportName();
         Import.Kind = wasm::WASM_EXTERNAL_EVENT;
         Import.Event.Attribute = wasm::WASM_EVENT_ATTRIBUTE_EXCEPTION;
         Import.Event.SigIndex = getEventType(WS);
         Imports.push_back(Import);
+        assert(WasmIndices.count(&WS) == 0);
         WasmIndices[&WS] = NumEventImports++;
       }
     }
   }
 
+  // Add imports for GOT globals
+  for (const MCSymbol &S : Asm.symbols()) {
+    const auto &WS = static_cast<const MCSymbolWasm &>(S);
+    if (WS.isUsedInGOT()) {
+      wasm::WasmImport Import;
+      if (WS.isFunction())
+        Import.Module = "GOT.func";
+      else
+        Import.Module = "GOT.mem";
+      Import.Field = WS.getName();
+      Import.Kind = wasm::WASM_EXTERNAL_GLOBAL;
+      Import.Global = {wasm::WASM_TYPE_I32, true};
+      Imports.push_back(Import);
+      assert(GOTIndices.count(&WS) == 0);
+      GOTIndices[&WS] = NumGlobalImports++;
+    }
+  }
+
   // Populate DataSegments and CustomSections, which must be done before
   // populating DataLocations.
   for (MCSection &Sec : Asm) {
@@ -1253,11 +1262,13 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
       DataSegments.emplace_back();
       WasmDataSegment &Segment = DataSegments.back();
       Segment.Name = SectionName;
+      Segment.InitFlags =
+          Section.getPassive() ? (uint32_t)wasm::WASM_SEGMENT_IS_PASSIVE : 0;
       Segment.Offset = DataSize;
       Segment.Section = &Section;
       addData(Segment.Data, Section);
       Segment.Alignment = Log2_32(Section.getAlignment());
-      Segment.Flags = 0;
+      Segment.LinkerFlags = 0;
       DataSize += Segment.Data.size();
       Section.setSegmentIndex(SegmentIndex);
 
@@ -1282,6 +1293,18 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
           report_fatal_error("section name and begin symbol should match: " +
                              Twine(SectionName));
       }
+
+      // Separate out the producers and target features sections
+      if (Name == "producers") {
+        ProducersSection = llvm::make_unique<WasmCustomSection>(Name, &Section);
+        continue;
+      }
+      if (Name == "target_features") {
+        TargetFeaturesSection =
+            llvm::make_unique<WasmCustomSection>(Name, &Section);
+        continue;
+      }
+
       CustomSections.emplace_back(Name, &Section);
     }
   }
@@ -1313,7 +1336,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
           report_fatal_error(
               "function sections must contain one function each");
 
-        if (WS.getSize() == 0)
+        if (WS.getSize() == nullptr)
           report_fatal_error(
               "function symbols must have a size set with .size");
 
@@ -1338,7 +1361,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
       LLVM_DEBUG(dbgs() << "  -> function index: " << Index << "\n");
 
     } else if (WS.isData()) {
-      if (WS.isTemporary() && !WS.getSize())
+      if (!isInSymtab(WS))
         continue;
 
       if (!WS.isDefined()) {
@@ -1384,11 +1407,12 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
         wasm::WasmEventType Event;
         Event.SigIndex = getEventType(WS);
         Event.Attribute = wasm::WASM_EVENT_ATTRIBUTE_EXCEPTION;
+        assert(WasmIndices.count(&WS) == 0);
         WasmIndices[&WS] = Index;
         Events.push_back(Event);
       } else {
         // An import; the index was assigned above.
-        Index = WasmIndices.find(&WS)->second;
+        assert(WasmIndices.count(&WS) > 0);
       }
       LLVM_DEBUG(dbgs() << "  -> event index: " << WasmIndices.find(&WS)->second
                         << "\n");
@@ -1410,16 +1434,17 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
 
     // Find the target symbol of this weak alias and export that index
     const auto &WS = static_cast<const MCSymbolWasm &>(S);
-    const MCSymbolWasm *ResolvedSym = ResolveSymbol(WS);
+    const MCSymbolWasm *ResolvedSym = resolveSymbol(WS);
     LLVM_DEBUG(dbgs() << WS.getName() << ": weak alias of '" << *ResolvedSym
                       << "'\n");
 
-    if (WS.isFunction()) {
+    if (ResolvedSym->isFunction()) {
       assert(WasmIndices.count(ResolvedSym) > 0);
       uint32_t WasmIndex = WasmIndices.find(ResolvedSym)->second;
+      assert(WasmIndices.count(&WS) == 0);
       WasmIndices[&WS] = WasmIndex;
       LLVM_DEBUG(dbgs() << "  -> index:" << WasmIndex << "\n");
-    } else if (WS.isData()) {
+    } else if (ResolvedSym->isData()) {
       assert(DataLocations.count(ResolvedSym) > 0);
       const wasm::WasmDataReference &Ref =
           DataLocations.find(ResolvedSym)->second;
@@ -1434,7 +1459,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
   for (const MCSymbol &S : Asm.symbols()) {
     const auto &WS = static_cast<const MCSymbolWasm &>(S);
     if (!isInSymtab(WS)) {
-      WS.setIndex(INVALID_INDEX);
+      WS.setIndex(InvalidIndex);
       continue;
     }
     LLVM_DEBUG(dbgs() << "adding to symtab: " << WS << "\n");
@@ -1448,6 +1473,10 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
       Flags |= wasm::WASM_SYMBOL_BINDING_LOCAL;
     if (WS.isUndefined())
       Flags |= wasm::WASM_SYMBOL_UNDEFINED;
+    if (WS.isExported())
+      Flags |= wasm::WASM_SYMBOL_EXPORTED;
+    if (WS.getName() != WS.getImportName())
+      Flags |= wasm::WASM_SYMBOL_EXPLICIT_NAME;
 
     wasm::WasmSymbolInfo Info;
     Info.Name = WS.getName();
@@ -1469,13 +1498,13 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
       // Functions referenced by a relocation need to put in the table.  This is
       // purely to make the object file's provisional values readable, and is
       // ignored by the linker, which re-calculates the relocations itself.
-      if (Rel.Type != wasm::R_WEBASSEMBLY_TABLE_INDEX_I32 &&
-          Rel.Type != wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB)
+      if (Rel.Type != wasm::R_WASM_TABLE_INDEX_I32 &&
+          Rel.Type != wasm::R_WASM_TABLE_INDEX_SLEB)
         return;
       assert(Rel.Symbol->isFunction());
-      const MCSymbolWasm &WS = *ResolveSymbol(*Rel.Symbol);
+      const MCSymbolWasm &WS = *resolveSymbol(*Rel.Symbol);
       uint32_t FunctionIndex = WasmIndices.find(&WS)->second;
-      uint32_t TableIndex = TableElems.size() + kInitialTableOffset;
+      uint32_t TableIndex = TableElems.size() + InitialTableOffset;
       if (TableIndices.try_emplace(&WS, TableIndex).second) {
         LLVM_DEBUG(dbgs() << "  -> adding " << WS.getName()
                           << " to table: " << TableIndex << "\n");
@@ -1534,25 +1563,26 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
     const auto &DataFrag = cast<MCDataFragment>(Frag);
     const SmallVectorImpl<char> &Contents = DataFrag.getContents();
     for (const uint8_t *
-             p = (const uint8_t *)Contents.data(),
-            *end = (const uint8_t *)Contents.data() + Contents.size();
-         p != end; ++p) {
-      if (*p != 0)
+             P = (const uint8_t *)Contents.data(),
+            *End = (const uint8_t *)Contents.data() + Contents.size();
+         P != End; ++P) {
+      if (*P != 0)
         report_fatal_error("non-symbolic data in .init_array section");
     }
     for (const MCFixup &Fixup : DataFrag.getFixups()) {
       assert(Fixup.getKind() ==
              MCFixup::getKindForSize(is64Bit() ? 8 : 4, false));
       const MCExpr *Expr = Fixup.getValue();
-      auto *Sym = dyn_cast<MCSymbolRefExpr>(Expr);
-      if (!Sym)
+      auto *SymRef = dyn_cast<MCSymbolRefExpr>(Expr);
+      if (!SymRef)
         report_fatal_error("fixups in .init_array should be symbol references");
-      if (Sym->getKind() != MCSymbolRefExpr::VK_WebAssembly_FUNCTION)
-        report_fatal_error("symbols in .init_array should be for functions");
-      if (Sym->getSymbol().getIndex() == INVALID_INDEX)
+      const auto &TargetSym = cast<const MCSymbolWasm>(SymRef->getSymbol());
+      if (TargetSym.getIndex() == InvalidIndex)
         report_fatal_error("symbols in .init_array should exist in symbtab");
+      if (!TargetSym.isFunction())
+        report_fatal_error("symbols in .init_array should be for functions");
       InitFuncs.push_back(
-          std::make_pair(Priority, Sym->getSymbol().getIndex()));
+          std::make_pair(Priority, TargetSym.getIndex()));
     }
   }
 
@@ -1564,17 +1594,22 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
   writeFunctionSection(Functions);
   // Skip the "table" section; we import the table instead.
   // Skip the "memory" section; we import the memory instead.
-  writeGlobalSection();
   writeEventSection(Events);
   writeExportSection(Exports);
   writeElemSection(TableElems);
+  writeDataCountSection();
   writeCodeSection(Asm, Layout, Functions);
   writeDataSection();
-  writeCustomSections(Asm, Layout);
+  for (auto &CustomSection : CustomSections)
+    writeCustomSection(CustomSection, Asm, Layout);
   writeLinkingMetaDataSection(SymbolInfos, InitFuncs, Comdats);
   writeRelocSection(CodeSectionIndex, "CODE", CodeRelocations);
   writeRelocSection(DataSectionIndex, "DATA", DataRelocations);
   writeCustomRelocSections();
+  if (ProducersSection)
+    writeCustomSection(*ProducersSection, Asm, Layout);
+  if (TargetFeaturesSection)
+    writeCustomSection(*TargetFeaturesSection, Asm, Layout);
 
   // TODO: Translate the .comment section to the output.
   return W.OS.tell() - StartOffset;
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index b774852eabe6..0e6c05bc726d 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===- llvm/MC/WinCOFFObjectWriter.cpp ------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -388,7 +387,7 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &MCSym,
     Sym->Aux[0].AuxType = ATWeakExternal;
     Sym->Aux[0].Aux.WeakExternal.TagIndex = 0;
     Sym->Aux[0].Aux.WeakExternal.Characteristics =
-        COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY;
+        COFF::IMAGE_WEAK_EXTERN_SEARCH_ALIAS;
   } else {
     if (!Base)
       Sym->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
diff --git a/lib/MC/XCOFFObjectWriter.cpp b/lib/MC/XCOFFObjectWriter.cpp
new file mode 100644
index 000000000000..9b9a7b6c118c
--- /dev/null
+++ b/lib/MC/XCOFFObjectWriter.cpp
@@ -0,0 +1,94 @@
+//===-- lib/MC/XCOFFObjectWriter.cpp - XCOFF file writer ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements XCOFF object file writer information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCXCOFFObjectWriter.h"
+
+using namespace llvm;
+
+namespace {
+
+class XCOFFObjectWriter : public MCObjectWriter {
+  support::endian::Writer W;
+  std::unique_ptr<MCXCOFFObjectTargetWriter> TargetObjectWriter;
+
+  void executePostLayoutBinding(MCAssembler &, const MCAsmLayout &) override;
+
+  void recordRelocation(MCAssembler &, const MCAsmLayout &, const MCFragment *,
+                        const MCFixup &, MCValue, uint64_t &) override;
+
+  uint64_t writeObject(MCAssembler &, const MCAsmLayout &) override;
+
+public:
+  XCOFFObjectWriter(std::unique_ptr<MCXCOFFObjectTargetWriter> MOTW,
+                    raw_pwrite_stream &OS);
+};
+
+XCOFFObjectWriter::XCOFFObjectWriter(
+    std::unique_ptr<MCXCOFFObjectTargetWriter> MOTW, raw_pwrite_stream &OS)
+    : W(OS, support::big), TargetObjectWriter(std::move(MOTW)) {}
+
+void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &,
+                                                 const MCAsmLayout &) {
+  // TODO Implement once we have sections and symbols to handle.
+}
+
+void XCOFFObjectWriter::recordRelocation(MCAssembler &, const MCAsmLayout &,
+                                         const MCFragment *, const MCFixup &,
+                                         MCValue, uint64_t &) {
+  report_fatal_error("XCOFF relocations not supported.");
+}
+
+uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &) {
+  // We always emit a timestamp of 0 for reproducibility, so ensure incremental
+  // linking is not enabled, in case, like with Windows COFF, such a timestamp
+  // is incompatible with incremental linking of XCOFF.
+  if (Asm.isIncrementalLinkerCompatible())
+    report_fatal_error("Incremental linking not supported for XCOFF.");
+
+  if (TargetObjectWriter->is64Bit())
+    report_fatal_error("64-bit XCOFF object files are not supported yet.");
+
+  uint64_t StartOffset = W.OS.tell();
+
+  // TODO FIXME Assign section numbers/finalize sections.
+
+  // TODO FIXME Finalize symbols.
+
+  // Magic.
+  W.write<uint16_t>(0x01df);
+  // Number of sections.
+  W.write<uint16_t>(0);
+  // Timestamp field. For reproducible output we write a 0, which represents no
+  // timestamp.
+  W.write<int32_t>(0);
+  // Byte Offset to the start of the symbol table.
+  W.write<uint32_t>(0);
+  // Number of entries in the symbol table.
+  W.write<int32_t>(0);
+  // Size of the optional header.
+  W.write<uint16_t>(0);
+  // Flags.
+  W.write<uint16_t>(0);
+
+  return W.OS.tell() - StartOffset;
+}
+
+} // end anonymous namespace
+
+std::unique_ptr<MCObjectWriter>
+llvm::createXCOFFObjectWriter(std::unique_ptr<MCXCOFFObjectTargetWriter> MOTW,
+                              raw_pwrite_stream &OS) {
+  return llvm::make_unique<XCOFFObjectWriter>(std::move(MOTW), OS);
+}
diff --git a/lib/MCA/Context.cpp b/lib/MCA/Context.cpp
index c1b197dfe2e6..f0e8dfab8680 100644
--- a/lib/MCA/Context.cpp
+++ b/lib/MCA/Context.cpp
@@ -1,9 +1,8 @@
 //===---------------------------- Context.cpp -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -22,6 +21,7 @@
 #include "llvm/MCA/Stages/DispatchStage.h"
 #include "llvm/MCA/Stages/EntryStage.h"
 #include "llvm/MCA/Stages/ExecuteStage.h"
+#include "llvm/MCA/Stages/MicroOpQueueStage.h"
 #include "llvm/MCA/Stages/RetireStage.h"
 
 namespace llvm {
@@ -43,7 +43,8 @@ Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB,
   auto Fetch = llvm::make_unique<EntryStage>(SrcMgr);
   auto Dispatch = llvm::make_unique<DispatchStage>(STI, MRI, Opts.DispatchWidth,
                                                    *RCU, *PRF);
-  auto Execute = llvm::make_unique<ExecuteStage>(*HWS);
+  auto Execute =
+      llvm::make_unique<ExecuteStage>(*HWS, Opts.EnableBottleneckAnalysis);
   auto Retire = llvm::make_unique<RetireStage>(*RCU, *PRF);
 
   // Pass the ownership of all the hardware units to this Context.
@@ -55,6 +56,9 @@ Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB,
   // Build the pipeline.
   auto StagePipeline = llvm::make_unique<Pipeline>();
   StagePipeline->appendStage(std::move(Fetch));
+  if (Opts.MicroOpQueueSize)
+    StagePipeline->appendStage(llvm::make_unique<MicroOpQueueStage>(
+        Opts.MicroOpQueueSize, Opts.DecodersThroughput));
   StagePipeline->appendStage(std::move(Dispatch));
   StagePipeline->appendStage(std::move(Execute));
   StagePipeline->appendStage(std::move(Retire));
diff --git a/lib/MCA/HWEventListener.cpp b/lib/MCA/HWEventListener.cpp
index 4a0e5b1754dd..58b2e0329222 100644
--- a/lib/MCA/HWEventListener.cpp
+++ b/lib/MCA/HWEventListener.cpp
@@ -1,9 +1,8 @@
 //===----------------------- HWEventListener.cpp ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/MCA/HardwareUnits/HardwareUnit.cpp b/lib/MCA/HardwareUnits/HardwareUnit.cpp
index edd32b9c0c1a..69f793796ec7 100644
--- a/lib/MCA/HardwareUnits/HardwareUnit.cpp
+++ b/lib/MCA/HardwareUnits/HardwareUnit.cpp
@@ -1,9 +1,8 @@
 //===------------------------- HardwareUnit.cpp -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/MCA/HardwareUnits/LSUnit.cpp b/lib/MCA/HardwareUnits/LSUnit.cpp
index 8895eb392b60..ac1a6a36547b 100644
--- a/lib/MCA/HardwareUnits/LSUnit.cpp
+++ b/lib/MCA/HardwareUnits/LSUnit.cpp
@@ -1,9 +1,8 @@
 //===----------------------- LSUnit.cpp --------------------------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -22,67 +21,133 @@
 namespace llvm {
 namespace mca {
 
-LSUnit::LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ,
-               bool AssumeNoAlias)
-    : LQ_Size(LQ), SQ_Size(SQ), NoAlias(AssumeNoAlias) {
+LSUnitBase::LSUnitBase(const MCSchedModel &SM, unsigned LQ, unsigned SQ,
+                       bool AssumeNoAlias)
+    : LQSize(LQ), SQSize(SQ), UsedLQEntries(0), UsedSQEntries(0),
+      NoAlias(AssumeNoAlias), NextGroupID(1) {
   if (SM.hasExtraProcessorInfo()) {
     const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
-    if (!LQ_Size && EPI.LoadQueueID) {
+    if (!LQSize && EPI.LoadQueueID) {
       const MCProcResourceDesc &LdQDesc = *SM.getProcResource(EPI.LoadQueueID);
-      LQ_Size = LdQDesc.BufferSize;
+      LQSize = LdQDesc.BufferSize;
     }
 
-    if (!SQ_Size && EPI.StoreQueueID) {
+    if (!SQSize && EPI.StoreQueueID) {
       const MCProcResourceDesc &StQDesc = *SM.getProcResource(EPI.StoreQueueID);
-      SQ_Size = StQDesc.BufferSize;
+      SQSize = StQDesc.BufferSize;
     }
   }
 }
 
-#ifndef NDEBUG
-void LSUnit::dump() const {
-  dbgs() << "[LSUnit] LQ_Size = " << LQ_Size << '\n';
-  dbgs() << "[LSUnit] SQ_Size = " << SQ_Size << '\n';
-  dbgs() << "[LSUnit] NextLQSlotIdx = " << LoadQueue.size() << '\n';
-  dbgs() << "[LSUnit] NextSQSlotIdx = " << StoreQueue.size() << '\n';
-}
-#endif
-
-void LSUnit::assignLQSlot(unsigned Index) {
-  assert(!isLQFull());
-  assert(LoadQueue.count(Index) == 0);
+LSUnitBase::~LSUnitBase() {}
 
-  LLVM_DEBUG(dbgs() << "[LSUnit] - AssignLQSlot <Idx=" << Index
-                    << ",slot=" << LoadQueue.size() << ">\n");
-  LoadQueue.insert(Index);
+void LSUnitBase::cycleEvent() {
+  for (const std::pair<unsigned, std::unique_ptr<MemoryGroup>> &G : Groups)
+    G.second->cycleEvent();
 }
 
-void LSUnit::assignSQSlot(unsigned Index) {
-  assert(!isSQFull());
-  assert(StoreQueue.count(Index) == 0);
-
-  LLVM_DEBUG(dbgs() << "[LSUnit] - AssignSQSlot <Idx=" << Index
-                    << ",slot=" << StoreQueue.size() << ">\n");
-  StoreQueue.insert(Index);
+#ifndef NDEBUG
+void LSUnitBase::dump() const {
+  dbgs() << "[LSUnit] LQ_Size = " << getLoadQueueSize() << '\n';
+  dbgs() << "[LSUnit] SQ_Size = " << getStoreQueueSize() << '\n';
+  dbgs() << "[LSUnit] NextLQSlotIdx = " << getUsedLQEntries() << '\n';
+  dbgs() << "[LSUnit] NextSQSlotIdx = " << getUsedSQEntries() << '\n';
+  dbgs() << "\n";
+  for (const auto &GroupIt : Groups) {
+    const MemoryGroup &Group = *GroupIt.second;
+    dbgs() << "[LSUnit] Group (" << GroupIt.first << "): "
+           << "[ #Preds = " << Group.getNumPredecessors()
+           << ", #GIssued = " << Group.getNumExecutingPredecessors()
+           << ", #GExecuted = " << Group.getNumExecutedPredecessors()
+           << ", #Inst = " << Group.getNumInstructions()
+           << ", #IIssued = " << Group.getNumExecuting()
+           << ", #IExecuted = " << Group.getNumExecuted() << '\n';
+  }
 }
+#endif
 
-void LSUnit::dispatch(const InstRef &IR) {
+unsigned LSUnit::dispatch(const InstRef &IR) {
   const InstrDesc &Desc = IR.getInstruction()->getDesc();
   unsigned IsMemBarrier = Desc.HasSideEffects;
   assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!");
 
-  const unsigned Index = IR.getSourceIndex();
-  if (Desc.MayLoad) {
-    if (IsMemBarrier)
-      LoadBarriers.insert(Index);
-    assignLQSlot(Index);
-  }
+  if (Desc.MayLoad)
+    assignLQSlot();
+  if (Desc.MayStore)
+    assignSQSlot();
 
   if (Desc.MayStore) {
+    // Always create a new group for store operations.
+
+    // A store may not pass a previous store or store barrier.
+    unsigned NewGID = createMemoryGroup();
+    MemoryGroup &NewGroup = getGroup(NewGID);
+    NewGroup.addInstruction();
+
+    // A store may not pass a previous load or load barrier.
+    unsigned ImmediateLoadDominator =
+        std::max(CurrentLoadGroupID, CurrentLoadBarrierGroupID);
+    if (ImmediateLoadDominator) {
+      MemoryGroup &IDom = getGroup(ImmediateLoadDominator);
+      LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << ImmediateLoadDominator
+                        << ") --> (" << NewGID << ")\n");
+      IDom.addSuccessor(&NewGroup);
+    }
+    if (CurrentStoreGroupID) {
+      MemoryGroup &StoreGroup = getGroup(CurrentStoreGroupID);
+      LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentStoreGroupID
+                        << ") --> (" << NewGID << ")\n");
+      StoreGroup.addSuccessor(&NewGroup);
+    }
+
+    CurrentStoreGroupID = NewGID;
+    if (Desc.MayLoad) {
+      CurrentLoadGroupID = NewGID;
+      if (IsMemBarrier)
+        CurrentLoadBarrierGroupID = NewGID;
+    }
+
+    return NewGID;
+  }
+
+  assert(Desc.MayLoad && "Expected a load!");
+
+  // Always create a new memory group if this is the first load of the sequence.
+
+  // A load may not pass a previous store unless flag 'NoAlias' is set.
+  // A load may pass a previous load.
+  // A younger load cannot pass a older load barrier.
+  // A load barrier cannot pass a older load.
+  bool ShouldCreateANewGroup = !CurrentLoadGroupID || IsMemBarrier ||
+                               CurrentLoadGroupID <= CurrentStoreGroupID ||
+                               CurrentLoadGroupID <= CurrentLoadBarrierGroupID;
+  if (ShouldCreateANewGroup) {
+    unsigned NewGID = createMemoryGroup();
+    MemoryGroup &NewGroup = getGroup(NewGID);
+    NewGroup.addInstruction();
+
+    if (!assumeNoAlias() && CurrentStoreGroupID) {
+      MemoryGroup &StGroup = getGroup(CurrentStoreGroupID);
+      LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentStoreGroupID
+                        << ") --> (" << NewGID << ")\n");
+      StGroup.addSuccessor(&NewGroup);
+    }
+    if (CurrentLoadBarrierGroupID) {
+      MemoryGroup &LdGroup = getGroup(CurrentLoadBarrierGroupID);
+      LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentLoadBarrierGroupID
+                        << ") --> (" << NewGID << ")\n");
+      LdGroup.addSuccessor(&NewGroup);
+    }
+
+    CurrentLoadGroupID = NewGID;
     if (IsMemBarrier)
-      StoreBarriers.insert(Index);
-    assignSQSlot(Index);
+      CurrentLoadBarrierGroupID = NewGID;
+    return NewGID;
   }
+
+  MemoryGroup &Group = getGroup(CurrentLoadGroupID);
+  Group.addInstruction();
+  return CurrentLoadGroupID;
 }
 
 LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const {
@@ -94,95 +159,46 @@ LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const {
   return LSUnit::LSU_AVAILABLE;
 }
 
-bool LSUnit::isReady(const InstRef &IR) const {
+void LSUnitBase::onInstructionExecuted(const InstRef &IR) {
   const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  const unsigned Index = IR.getSourceIndex();
   bool IsALoad = Desc.MayLoad;
   bool IsAStore = Desc.MayStore;
-  assert((IsALoad || IsAStore) && "Not a memory operation!");
-  assert((!IsALoad || LoadQueue.count(Index) == 1) && "Load not in queue!");
-  assert((!IsAStore || StoreQueue.count(Index) == 1) && "Store not in queue!");
-
-  if (IsALoad && !LoadBarriers.empty()) {
-    unsigned LoadBarrierIndex = *LoadBarriers.begin();
-    // A younger load cannot pass a older load barrier.
-    if (Index > LoadBarrierIndex)
-      return false;
-    // A load barrier cannot pass a older load.
-    if (Index == LoadBarrierIndex && Index != *LoadQueue.begin())
-      return false;
-  }
+  assert((IsALoad || IsAStore) && "Expected a memory operation!");
 
-  if (IsAStore && !StoreBarriers.empty()) {
-    unsigned StoreBarrierIndex = *StoreBarriers.begin();
-    // A younger store cannot pass a older store barrier.
-    if (Index > StoreBarrierIndex)
-      return false;
-    // A store barrier cannot pass a older store.
-    if (Index == StoreBarrierIndex && Index != *StoreQueue.begin())
-      return false;
-  }
-
-  // A load may not pass a previous store unless flag 'NoAlias' is set.
-  // A load may pass a previous load.
-  if (NoAlias && IsALoad)
-    return true;
-
-  if (StoreQueue.size()) {
-    // A load may not pass a previous store.
-    // A store may not pass a previous store.
-    if (Index > *StoreQueue.begin())
-      return false;
+  unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+  auto It = Groups.find(GroupID);
+  It->second->onInstructionExecuted();
+  if (It->second->isExecuted()) {
+    Groups.erase(It);
   }
 
-  // Okay, we are older than the oldest store in the queue.
-  // If there are no pending loads, then we can say for sure that this
-  // instruction is ready.
-  if (isLQEmpty())
-    return true;
-
-  // Check if there are no older loads.
-  if (Index <= *LoadQueue.begin())
-    return true;
-
-  // There is at least one younger load.
-  //
-  // A store may not pass a previous load.
-  // A load may pass a previous load.
-  return !IsAStore;
-}
-
-void LSUnit::onInstructionExecuted(const InstRef &IR) {
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  const unsigned Index = IR.getSourceIndex();
-  bool IsALoad = Desc.MayLoad;
-  bool IsAStore = Desc.MayStore;
-
   if (IsALoad) {
-    if (LoadQueue.erase(Index)) {
-      LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
-                        << " has been removed from the load queue.\n");
-    }
-    if (!LoadBarriers.empty() && Index == *LoadBarriers.begin()) {
-      LLVM_DEBUG(
-          dbgs() << "[LSUnit]: Instruction idx=" << Index
-                 << " has been removed from the set of load barriers.\n");
-      LoadBarriers.erase(Index);
-    }
+    UsedLQEntries--;
+    LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex()
+                      << " has been removed from the load queue.\n");
   }
 
   if (IsAStore) {
-    if (StoreQueue.erase(Index)) {
-      LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
-                        << " has been removed from the store queue.\n");
-    }
+    UsedSQEntries--;
+    LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex()
+                      << " has been removed from the store queue.\n");
+  }
+}
 
-    if (!StoreBarriers.empty() && Index == *StoreBarriers.begin()) {
-      LLVM_DEBUG(
-          dbgs() << "[LSUnit]: Instruction idx=" << Index
-                 << " has been removed from the set of store barriers.\n");
-      StoreBarriers.erase(Index);
-    }
+void LSUnit::onInstructionExecuted(const InstRef &IR) {
+  const Instruction &IS = *IR.getInstruction();
+  if (!IS.isMemOp())
+    return;
+
+  LSUnitBase::onInstructionExecuted(IR);
+  unsigned GroupID = IS.getLSUTokenID();
+  if (!isValidGroupID(GroupID)) {
+    if (GroupID == CurrentLoadGroupID)
+      CurrentLoadGroupID = 0;
+    if (GroupID == CurrentStoreGroupID)
+      CurrentStoreGroupID = 0;
+    if (GroupID == CurrentLoadBarrierGroupID)
+      CurrentLoadBarrierGroupID = 0;
   }
 }
 
diff --git a/lib/MCA/HardwareUnits/RegisterFile.cpp b/lib/MCA/HardwareUnits/RegisterFile.cpp
index 22977e5ded65..86a888ea8cae 100644
--- a/lib/MCA/HardwareUnits/RegisterFile.cpp
+++ b/lib/MCA/HardwareUnits/RegisterFile.cpp
@@ -1,9 +1,8 @@
 //===--------------------- RegisterFile.cpp ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -189,7 +188,7 @@ void RegisterFile::addRegisterWrite(WriteRef Write,
       if (OtherWS && (OtherWrite.getSourceIndex() != Write.getSourceIndex())) {
         // This partial write has a false dependency on RenameAs.
         assert(!IsEliminated && "Unexpected partial update!");
-        OtherWS->addUser(&WS);
+        OtherWS->addUser(OtherWrite.getSourceIndex(), &WS);
       }
     }
   }
@@ -331,30 +330,25 @@ bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) {
   if (RMT.AllowZeroMoveEliminationOnly && !IsZeroMove)
     return false;
 
-  MCPhysReg FromReg = RS.getRegisterID();
-  MCPhysReg ToReg = WS.getRegisterID();
-
   // Construct an alias.
-  MCPhysReg AliasReg = FromReg;
-  if (RRIFrom.RenameAs)
-    AliasReg = RRIFrom.RenameAs;
+  MCPhysReg AliasedReg =
+      RRIFrom.RenameAs ? RRIFrom.RenameAs : RS.getRegisterID();
+  MCPhysReg AliasReg = RRITo.RenameAs ? RRITo.RenameAs : WS.getRegisterID();
 
-  const RegisterRenamingInfo &RMAlias = RegisterMappings[AliasReg].second;
+  const RegisterRenamingInfo &RMAlias = RegisterMappings[AliasedReg].second;
   if (RMAlias.AliasRegID)
-    AliasReg = RMAlias.AliasRegID;
+    AliasedReg = RMAlias.AliasRegID;
 
-  if (AliasReg != ToReg) {
-    RegisterMappings[ToReg].second.AliasRegID = AliasReg;
-    for (MCSubRegIterator I(ToReg, &MRI); I.isValid(); ++I)
-      RegisterMappings[*I].second.AliasRegID = AliasReg;
-  }
+  RegisterMappings[AliasReg].second.AliasRegID = AliasedReg;
+  for (MCSubRegIterator I(AliasReg, &MRI); I.isValid(); ++I)
+    RegisterMappings[*I].second.AliasRegID = AliasedReg;
 
-  RMT.NumMoveEliminated++;
   if (IsZeroMove) {
     WS.setWriteZero();
     RS.setReadZero();
   }
   WS.setEliminated();
+  RMT.NumMoveEliminated++;
 
   return true;
 }
@@ -402,7 +396,7 @@ void RegisterFile::collectWrites(const ReadState &RS,
 }
 
 void RegisterFile::addRegisterRead(ReadState &RS,
-                                   SmallVectorImpl<WriteRef> &Defs) const {
+                                   const MCSubtargetInfo &STI) const {
   unsigned RegID = RS.getRegisterID();
   const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
   RS.setPRF(RRI.IndexPlusCost.first);
@@ -411,8 +405,23 @@ void RegisterFile::addRegisterRead(ReadState &RS,
 
   if (ZeroRegisters[RS.getRegisterID()])
     RS.setReadZero();
-  collectWrites(RS, Defs);
-  RS.setDependentWrites(Defs.size());
+
+  SmallVector<WriteRef, 4> DependentWrites;
+  collectWrites(RS, DependentWrites);
+  RS.setDependentWrites(DependentWrites.size());
+
+  // We know that this read depends on all the writes in DependentWrites.
+  // For each write, check if we have ReadAdvance information, and use it
+  // to figure out in how many cycles this read becomes available.
+  const ReadDescriptor &RD = RS.getDescriptor();
+  const MCSchedModel &SM = STI.getSchedModel();
+  const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID);
+  for (WriteRef &WR : DependentWrites) {
+    WriteState &WS = *WR.getWriteState();
+    unsigned WriteResID = WS.getWriteResourceID();
+    int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID);
+    WS.addUser(WR.getSourceIndex(), &RS, ReadAdvance);
+  }
 }
 
 unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
diff --git a/lib/MCA/HardwareUnits/ResourceManager.cpp b/lib/MCA/HardwareUnits/ResourceManager.cpp
index 2039b58e8ee5..06f2476353d6 100644
--- a/lib/MCA/HardwareUnits/ResourceManager.cpp
+++ b/lib/MCA/HardwareUnits/ResourceManager.cpp
@@ -1,9 +1,8 @@
 //===--------------------- ResourceManager.cpp ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -24,16 +23,10 @@ namespace mca {
 #define DEBUG_TYPE "llvm-mca"
 ResourceStrategy::~ResourceStrategy() = default;
 
-// Returns the index of the highest bit set. For resource masks, the position of
-// the highest bit set can be used to construct a resource mask identifier.
-static unsigned getResourceStateIndex(uint64_t Mask) {
-  return std::numeric_limits<uint64_t>::digits - countLeadingZeros(Mask);
-}
-
 static uint64_t selectImpl(uint64_t CandidateMask,
                            uint64_t &NextInSequenceMask) {
   // The upper bit set in CandidateMask identifies our next candidate resource.
-  CandidateMask = 1ULL << (getResourceStateIndex(CandidateMask) - 1);
+  CandidateMask = 1ULL << getResourceStateIndex(CandidateMask);
   NextInSequenceMask &= (CandidateMask | (CandidateMask - 1));
   return CandidateMask;
 }
@@ -75,7 +68,7 @@ ResourceState::ResourceState(const MCProcResourceDesc &Desc, unsigned Index,
       BufferSize(Desc.BufferSize), IsAGroup(countPopulation(ResourceMask) > 1) {
   if (IsAGroup) {
     ResourceSizeMask =
-        ResourceMask ^ 1ULL << (getResourceStateIndex(ResourceMask) - 1);
+        ResourceMask ^ 1ULL << getResourceStateIndex(ResourceMask);
   } else {
     ResourceSizeMask = (1ULL << Desc.NumUnits) - 1;
   }
@@ -116,13 +109,21 @@ getStrategyFor(const ResourceState &RS) {
 }
 
 ResourceManager::ResourceManager(const MCSchedModel &SM)
-    : Resources(SM.getNumProcResourceKinds()),
-      Strategies(SM.getNumProcResourceKinds()),
-      Resource2Groups(SM.getNumProcResourceKinds(), 0),
-      ProcResID2Mask(SM.getNumProcResourceKinds()) {
+    : Resources(SM.getNumProcResourceKinds() - 1),
+      Strategies(SM.getNumProcResourceKinds() - 1),
+      Resource2Groups(SM.getNumProcResourceKinds() - 1, 0),
+      ProcResID2Mask(SM.getNumProcResourceKinds(), 0),
+      ResIndex2ProcResID(SM.getNumProcResourceKinds() - 1, 0),
+      ProcResUnitMask(0), ReservedResourceGroups(0) {
   computeProcResourceMasks(SM, ProcResID2Mask);
 
-  for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+  // initialize vector ResIndex2ProcResID.
+  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    unsigned Index = getResourceStateIndex(ProcResID2Mask[I]);
+    ResIndex2ProcResID[Index] = I;
+  }
+
+  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
     uint64_t Mask = ProcResID2Mask[I];
     unsigned Index = getResourceStateIndex(Mask);
     Resources[Index] =
@@ -130,14 +131,16 @@ ResourceManager::ResourceManager(const MCSchedModel &SM)
     Strategies[Index] = getStrategyFor(*Resources[Index]);
   }
 
-  for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
     uint64_t Mask = ProcResID2Mask[I];
     unsigned Index = getResourceStateIndex(Mask);
     const ResourceState &RS = *Resources[Index];
-    if (!RS.isAResourceGroup())
+    if (!RS.isAResourceGroup()) {
+      ProcResUnitMask |= Mask;
       continue;
+    }
 
-    uint64_t GroupMaskIdx = 1ULL << (Index - 1);
+    uint64_t GroupMaskIdx = 1ULL << Index;
     Mask -= GroupMaskIdx;
     while (Mask) {
       // Extract lowest set isolated bit.
@@ -147,6 +150,8 @@ ResourceManager::ResourceManager(const MCSchedModel &SM)
       Mask ^= Unit;
     }
   }
+
+  AvailableProcResUnits = ProcResUnitMask;
 }
 
 void ResourceManager::setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S,
@@ -158,7 +163,7 @@ void ResourceManager::setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S,
 }
 
 unsigned ResourceManager::resolveResourceMask(uint64_t Mask) const {
-  return Resources[getResourceStateIndex(Mask)]->getProcResourceID();
+  return ResIndex2ProcResID[getResourceStateIndex(Mask)];
 }
 
 unsigned ResourceManager::getNumUnits(uint64_t ResourceID) const {
@@ -200,6 +205,8 @@ void ResourceManager::use(const ResourceRef &RR) {
   if (RS.isReady())
     return;
 
+  AvailableProcResUnits ^= RR.first;
+
   // Notify groups that RR.first is no longer available.
   uint64_t Users = Resource2Groups[RSID];
   while (Users) {
@@ -214,19 +221,22 @@ void ResourceManager::use(const ResourceRef &RR) {
 }
 
 void ResourceManager::release(const ResourceRef &RR) {
-  ResourceState &RS = *Resources[getResourceStateIndex(RR.first)];
+  unsigned RSID = getResourceStateIndex(RR.first);
+  ResourceState &RS = *Resources[RSID];
   bool WasFullyUsed = !RS.isReady();
   RS.releaseSubResource(RR.second);
   if (!WasFullyUsed)
     return;
 
-  for (std::unique_ptr<ResourceState> &Res : Resources) {
-    ResourceState &Current = *Res;
-    if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first)
-      continue;
+  AvailableProcResUnits ^= RR.first;
 
-    if (Current.containsResource(RR.first))
-      Current.releaseSubResource(RR.first);
+  // Notify groups that RR.first is now available again.
+  uint64_t Users = Resource2Groups[RSID];
+  while (Users) {
+    unsigned GroupIndex = getResourceStateIndex(Users & (-Users));
+    ResourceState &CurrentUser = *Resources[GroupIndex];
+    CurrentUser.releaseSubResource(RR.first);
+    Users &= Users - 1;
   }
 }
 
@@ -260,13 +270,19 @@ void ResourceManager::releaseBuffers(ArrayRef<uint64_t> Buffers) {
     Resources[getResourceStateIndex(R)]->releaseBuffer();
 }
 
-bool ResourceManager::canBeIssued(const InstrDesc &Desc) const {
-  return all_of(
-      Desc.Resources, [&](const std::pair<uint64_t, const ResourceUsage> &E) {
-        unsigned NumUnits = E.second.isReserved() ? 0U : E.second.NumUnits;
-        unsigned Index = getResourceStateIndex(E.first);
-        return Resources[Index]->isReady(NumUnits);
-      });
+uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const {
+  uint64_t BusyResourceMask = 0;
+  for (const std::pair<uint64_t, const ResourceUsage> &E : Desc.Resources) {
+    unsigned NumUnits = E.second.isReserved() ? 0U : E.second.NumUnits;
+    unsigned Index = getResourceStateIndex(E.first);
+    if (!Resources[Index]->isReady(NumUnits))
+      BusyResourceMask |= E.first;
+  }
+
+  BusyResourceMask &= ProcResUnitMask;
+  if (BusyResourceMask)
+    return BusyResourceMask;
+  return Desc.UsedProcResGroups & ReservedResourceGroups;
 }
 
 void ResourceManager::issueInstruction(
@@ -317,14 +333,20 @@ void ResourceManager::cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed) {
 }
 
 void ResourceManager::reserveResource(uint64_t ResourceID) {
-  ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)];
-  assert(!Resource.isReserved());
+  const unsigned Index = getResourceStateIndex(ResourceID);
+  ResourceState &Resource = *Resources[Index];
+  assert(Resource.isAResourceGroup() && !Resource.isReserved() &&
+         "Unexpected resource found!");
   Resource.setReserved();
+  ReservedResourceGroups ^= 1ULL << Index;
 }
 
 void ResourceManager::releaseResource(uint64_t ResourceID) {
-  ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)];
+  const unsigned Index = getResourceStateIndex(ResourceID);
+  ResourceState &Resource = *Resources[Index];
   Resource.clearReserved();
+  if (Resource.isAResourceGroup())
+    ReservedResourceGroups ^= 1ULL << Index;
 }
 
 } // namespace mca
diff --git a/lib/MCA/HardwareUnits/RetireControlUnit.cpp b/lib/MCA/HardwareUnits/RetireControlUnit.cpp
index de9f24552c38..068c5062ccdf 100644
--- a/lib/MCA/HardwareUnits/RetireControlUnit.cpp
+++ b/lib/MCA/HardwareUnits/RetireControlUnit.cpp
@@ -1,9 +1,8 @@
 //===---------------------- RetireControlUnit.cpp ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/MCA/HardwareUnits/Scheduler.cpp b/lib/MCA/HardwareUnits/Scheduler.cpp
index 355ef79d06a6..0f0f2ffb8325 100644
--- a/lib/MCA/HardwareUnits/Scheduler.cpp
+++ b/lib/MCA/HardwareUnits/Scheduler.cpp
@@ -1,9 +1,8 @@
 //===--------------------- Scheduler.cpp ------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -38,10 +37,13 @@ void Scheduler::dump() const {
 }
 #endif
 
-Scheduler::Status Scheduler::isAvailable(const InstRef &IR) const {
+Scheduler::Status Scheduler::isAvailable(const InstRef &IR) {
   const InstrDesc &Desc = IR.getInstruction()->getDesc();
 
-  switch (Resources->canBeDispatched(Desc.Buffers)) {
+  ResourceStateEvent RSE = Resources->canBeDispatched(Desc.Buffers);
+  HadTokenStall = RSE != RS_BUFFER_AVAILABLE;
+
+  switch (RSE) {
   case ResourceStateEvent::RS_BUFFER_UNAVAILABLE:
     return Scheduler::SC_BUFFERS_FULL;
   case ResourceStateEvent::RS_RESERVED:
@@ -51,7 +53,10 @@ Scheduler::Status Scheduler::isAvailable(const InstRef &IR) const {
   }
 
   // Give lower priority to LSUnit stall events.
-  switch (LSU.isAvailable(IR)) {
+  LSUnit::Status LSS = LSU.isAvailable(IR);
+  HadTokenStall = LSS != LSUnit::LSU_AVAILABLE;
+
+  switch (LSS) {
   case LSUnit::LSU_LQUEUE_FULL:
     return Scheduler::SC_LOAD_QUEUE_FULL;
   case LSUnit::LSU_SQUEUE_FULL:
@@ -75,7 +80,15 @@ void Scheduler::issueInstructionImpl(
 
   // Notify the instruction that it started executing.
   // This updates the internal state of each write.
-  IS->execute();
+  IS->execute(IR.getSourceIndex());
+
+  IS->computeCriticalRegDep();
+
+  if (IS->isMemOp()) {
+    LSU.onInstructionIssued(IR);
+    const MemoryGroup &Group = LSU.getGroup(IS->getLSUTokenID());
+    IS->setCriticalMemDep(Group.getCriticalPredecessor());
+  }
 
   if (IS->isExecuting())
     IssuedSet.emplace_back(IR);
@@ -87,9 +100,11 @@ void Scheduler::issueInstructionImpl(
 void Scheduler::issueInstruction(
     InstRef &IR,
     SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources,
+    SmallVectorImpl<InstRef> &PendingInstructions,
     SmallVectorImpl<InstRef> &ReadyInstructions) {
   const Instruction &Inst = *IR.getInstruction();
   bool HasDependentUsers = Inst.hasDependentUsers();
+  HasDependentUsers |= Inst.isMemOp() && LSU.hasDependentUsers(IR);
 
   Resources->releaseBuffers(Inst.getDesc().Buffers);
   issueInstructionImpl(IR, UsedResources);
@@ -98,12 +113,49 @@ void Scheduler::issueInstruction(
   // this same cycle if operands have ReadAdvance entries.  Promote those
   // instructions to the ReadySet and notify the caller that those are ready.
   if (HasDependentUsers)
-    promoteToReadySet(ReadyInstructions);
+    if (promoteToPendingSet(PendingInstructions))
+      promoteToReadySet(ReadyInstructions);
+}
+
+bool Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
+  // Scan the set of waiting instructions and promote them to the
+  // ready set if operands are all ready.
+  unsigned PromotedElements = 0;
+  for (auto I = PendingSet.begin(), E = PendingSet.end(); I != E;) {
+    InstRef &IR = *I;
+    if (!IR)
+      break;
+
+    // Check if there are unsolved register dependencies.
+    Instruction &IS = *IR.getInstruction();
+    if (!IS.isReady() && !IS.updatePending()) {
+      ++I;
+      continue;
+    }
+    // Check if there are unsolved memory dependencies.
+    if (IS.isMemOp() && !LSU.isReady(IR)) {
+      ++I;
+      continue;
+    }
+
+    LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR
+                      << " promoted to the READY set.\n");
+
+    Ready.emplace_back(IR);
+    ReadySet.emplace_back(IR);
+
+    IR.invalidate();
+    ++PromotedElements;
+    std::iter_swap(I, E - PromotedElements);
+  }
+
+  PendingSet.resize(PendingSet.size() - PromotedElements);
+  return PromotedElements;
 }
 
-void Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
+bool Scheduler::promoteToPendingSet(SmallVectorImpl<InstRef> &Pending) {
   // Scan the set of waiting instructions and promote them to the
-  // ready queue if operands are all ready.
+  // pending set if operands are all ready.
   unsigned RemovedElements = 0;
   for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E;) {
     InstRef &IR = *I;
@@ -111,19 +163,23 @@ void Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
       break;
 
     // Check if this instruction is now ready. In case, force
-    // a transition in state using method 'update()'.
+    // a transition in state using method 'updateDispatched()'.
     Instruction &IS = *IR.getInstruction();
-    if (!IS.isReady())
-      IS.update();
+    if (IS.isDispatched() && !IS.updateDispatched()) {
+      ++I;
+      continue;
+    }
 
-    // Check if there are still unsolved data dependencies.
-    if (!isReady(IR)) {
+    if (IS.isMemOp() && LSU.isWaiting(IR)) {
       ++I;
       continue;
     }
 
-    Ready.emplace_back(IR);
-    ReadySet.emplace_back(IR);
+    LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR
+                      << " promoted to the PENDING set.\n");
+
+    Pending.emplace_back(IR);
+    PendingSet.emplace_back(IR);
 
     IR.invalidate();
     ++RemovedElements;
@@ -131,16 +187,21 @@ void Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
   }
 
   WaitSet.resize(WaitSet.size() - RemovedElements);
+  return RemovedElements;
 }
 
 InstRef Scheduler::select() {
   unsigned QueueIndex = ReadySet.size();
   for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) {
-    const InstRef &IR = ReadySet[I];
+    InstRef &IR = ReadySet[I];
     if (QueueIndex == ReadySet.size() ||
         Strategy->compare(IR, ReadySet[QueueIndex])) {
-      const InstrDesc &D = IR.getInstruction()->getDesc();
-      if (Resources->canBeIssued(D))
+      Instruction &IS = *IR.getInstruction();
+      uint64_t BusyResourceMask = Resources->checkAvailability(IS.getDesc());
+      if (BusyResourceMask)
+        IS.setCriticalResourceMask(BusyResourceMask);
+      BusyResourceUnits |= BusyResourceMask;
+      if (!BusyResourceMask)
         QueueIndex = I;
     }
   }
@@ -180,22 +241,51 @@ void Scheduler::updateIssuedSet(SmallVectorImpl<InstRef> &Executed) {
   IssuedSet.resize(IssuedSet.size() - RemovedElements);
 }
 
+uint64_t Scheduler::analyzeResourcePressure(SmallVectorImpl<InstRef> &Insts) {
+  Insts.insert(Insts.end(), ReadySet.begin(), ReadySet.end());
+  return BusyResourceUnits;
+}
+
+void Scheduler::analyzeDataDependencies(SmallVectorImpl<InstRef> &RegDeps,
+                                        SmallVectorImpl<InstRef> &MemDeps) {
+  const auto EndIt = PendingSet.end() - NumDispatchedToThePendingSet;
+  for (const InstRef &IR : make_range(PendingSet.begin(), EndIt)) {
+    const Instruction &IS = *IR.getInstruction();
+    if (Resources->checkAvailability(IS.getDesc()))
+      continue;
+
+    if (IS.isMemOp() && LSU.isPending(IR))
+      MemDeps.emplace_back(IR);
+
+    if (IS.isPending())
+      RegDeps.emplace_back(IR);
+  }
+}
+
 void Scheduler::cycleEvent(SmallVectorImpl<ResourceRef> &Freed,
                            SmallVectorImpl<InstRef> &Executed,
+                           SmallVectorImpl<InstRef> &Pending,
                            SmallVectorImpl<InstRef> &Ready) {
+  LSU.cycleEvent();
+
   // Release consumed resources.
   Resources->cycleEvent(Freed);
 
-  // Propagate the cycle event to the 'Issued' and 'Wait' sets.
   for (InstRef &IR : IssuedSet)
     IR.getInstruction()->cycleEvent();
-
   updateIssuedSet(Executed);
 
+  for (InstRef &IR : PendingSet)
+    IR.getInstruction()->cycleEvent();
+
   for (InstRef &IR : WaitSet)
     IR.getInstruction()->cycleEvent();
 
+  promoteToPendingSet(Pending);
   promoteToReadySet(Ready);
+
+  NumDispatchedToThePendingSet = 0;
+  BusyResourceUnits = 0;
 }
 
 bool Scheduler::mustIssueImmediately(const InstRef &IR) const {
@@ -208,21 +298,31 @@ bool Scheduler::mustIssueImmediately(const InstRef &IR) const {
   return Desc.MustIssueImmediately;
 }
 
-void Scheduler::dispatch(const InstRef &IR) {
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+bool Scheduler::dispatch(InstRef &IR) {
+  Instruction &IS = *IR.getInstruction();
+  const InstrDesc &Desc = IS.getDesc();
   Resources->reserveBuffers(Desc.Buffers);
 
   // If necessary, reserve queue entries in the load-store unit (LSU).
-  bool IsMemOp = Desc.MayLoad || Desc.MayStore;
-  if (IsMemOp)
-    LSU.dispatch(IR);
+  if (IS.isMemOp())
+    IS.setLSUTokenID(LSU.dispatch(IR));
 
-  if (!isReady(IR)) {
+  if (IS.isDispatched() || (IS.isMemOp() && LSU.isWaiting(IR))) {
     LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n");
     WaitSet.push_back(IR);
-    return;
+    return false;
+  }
+
+  if (IS.isPending() || (IS.isMemOp() && LSU.isPending(IR))) {
+    LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR
+                      << " to the PendingSet\n");
+    PendingSet.push_back(IR);
+    ++NumDispatchedToThePendingSet;
+    return false;
   }
 
+  assert(IS.isReady() && (!IS.isMemOp() || LSU.isReady(IR)) &&
+         "Unexpected internal state found!");
   // Don't add a zero-latency instruction to the Ready queue.
   // A zero-latency instruction doesn't consume any scheduler resources. That is
   // because it doesn't need to be executed, and it is often removed at register
@@ -235,12 +335,8 @@ void Scheduler::dispatch(const InstRef &IR) {
     LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n");
     ReadySet.push_back(IR);
   }
-}
 
-bool Scheduler::isReady(const InstRef &IR) const {
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  bool IsMemOp = Desc.MayLoad || Desc.MayStore;
-  return IR.getInstruction()->isReady() && (!IsMemOp || LSU.isReady(IR));
+  return true;
 }
 
 } // namespace mca
diff --git a/lib/MCA/InstrBuilder.cpp b/lib/MCA/InstrBuilder.cpp
index d2d65e55537c..829920366c90 100644
--- a/lib/MCA/InstrBuilder.cpp
+++ b/lib/MCA/InstrBuilder.cpp
@@ -1,9 +1,8 @@
 //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -66,6 +65,17 @@ static void initializeUsedResources(InstrDesc &ID,
   for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
     const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
     const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
+    if (!PRE->Cycles) {
+#ifndef NDEBUG
+      WithColor::warning()
+          << "Ignoring invalid write of zero cycles on processor resource "
+          << PR.Name << "\n";
+      WithColor::note() << "found in scheduling class " << SCDesc.Name
+                        << " (write index #" << I << ")\n";
+#endif
+      continue;
+    }
+
     uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
     if (PR.BufferSize < 0) {
       AllInOrderResources = false;
@@ -98,14 +108,14 @@ static void initializeUsedResources(InstrDesc &ID,
   });
 
   uint64_t UsedResourceUnits = 0;
+  uint64_t UsedResourceGroups = 0;
 
   // Remove cycles contributed by smaller resources.
   for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
     ResourcePlusCycles &A = Worklist[I];
     if (!A.second.size()) {
-      A.second.NumUnits = 0;
-      A.second.setReserved();
-      ID.Resources.emplace_back(A);
+      assert(countPopulation(A.first) > 1 && "Expected a group!");
+      UsedResourceGroups |= PowerOf2Floor(A.first);
       continue;
     }
 
@@ -116,6 +126,7 @@ static void initializeUsedResources(InstrDesc &ID,
     } else {
       // Remove the leading 1 from the resource group mask.
       NormalizedMask ^= PowerOf2Floor(NormalizedMask);
+      UsedResourceGroups |= (A.first ^ NormalizedMask);
     }
 
     for (unsigned J = I + 1; J < E; ++J) {
@@ -128,6 +139,9 @@ static void initializeUsedResources(InstrDesc &ID,
     }
   }
 
+  ID.UsedProcResUnits = UsedResourceUnits;
+  ID.UsedProcResGroups = UsedResourceGroups;
+
   // A SchedWrite may specify a number of cycles in which a resource group
   // is reserved. For example (on target x86; cpu Haswell):
   //
@@ -180,10 +194,15 @@ static void initializeUsedResources(InstrDesc &ID,
 
   LLVM_DEBUG({
     for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
-      dbgs() << "\t\tMask=" << format_hex(R.first, 16) << ", "
+      dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", "
+             << "Reserved=" << R.second.isReserved() << ", "
+             << "#Units=" << R.second.NumUnits << ", "
              << "cy=" << R.second.size() << '\n';
     for (const uint64_t R : ID.Buffers)
       dbgs() << "\t\tBuffer Mask=" << format_hex(R, 16) << '\n';
+    dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
+    dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
+           << '\n';
   });
 }
 
@@ -533,6 +552,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
   // Create a new empty descriptor.
   std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>();
   ID->NumMicroOps = SCDesc.NumMicroOps;
+  ID->SchedClassID = SchedClassID;
 
   if (MCDesc.isCall() && FirstCallInst) {
     // We don't correctly model calls.
@@ -572,7 +592,6 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
     return std::move(Err);
 
   // Now add the new descriptor.
-  SchedClassID = MCDesc.getSchedClass();
   bool IsVariadic = MCDesc.isVariadic();
   if (!IsVariadic && !IsVariant) {
     Descriptors[MCI.getOpcode()] = std::move(ID);
diff --git a/lib/MCA/Instruction.cpp b/lib/MCA/Instruction.cpp
index 057e95ca9990..001842bca318 100644
--- a/lib/MCA/Instruction.cpp
+++ b/lib/MCA/Instruction.cpp
@@ -1,9 +1,8 @@
 //===--------------------- Instruction.cpp ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,7 +18,16 @@
 namespace llvm {
 namespace mca {
 
-void ReadState::writeStartEvent(unsigned Cycles) {
+void WriteState::writeStartEvent(unsigned IID, unsigned RegID,
+                                 unsigned Cycles) {
+  CRD.IID = IID;
+  CRD.RegID = RegID;
+  CRD.Cycles = Cycles;
+  DependentWriteCyclesLeft = Cycles;
+  DependentWrite = nullptr;
+}
+
+void ReadState::writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles) {
   assert(DependentWrites);
   assert(CyclesLeft == UNKNOWN_CYCLES);
 
@@ -29,7 +37,12 @@ void ReadState::writeStartEvent(unsigned Cycles) {
   // The HW is forced to do some extra bookkeeping to track of all the
   // dependent writes, and implement a merging scheme for the partial writes.
   --DependentWrites;
-  TotalCycles = std::max(TotalCycles, Cycles);
+  if (TotalCycles < Cycles) {
+    CRD.IID = IID;
+    CRD.RegID = RegID;
+    CRD.Cycles = Cycles;
+    TotalCycles = Cycles;
+  }
 
   if (!DependentWrites) {
     CyclesLeft = TotalCycles;
@@ -37,7 +50,7 @@ void ReadState::writeStartEvent(unsigned Cycles) {
   }
 }
 
-void WriteState::onInstructionIssued() {
+void WriteState::onInstructionIssued(unsigned IID) {
   assert(CyclesLeft == UNKNOWN_CYCLES);
   // Update the number of cycles left based on the WriteDescriptor info.
   CyclesLeft = getLatency();
@@ -47,34 +60,30 @@ void WriteState::onInstructionIssued() {
   for (const std::pair<ReadState *, int> &User : Users) {
     ReadState *RS = User.first;
     unsigned ReadCycles = std::max(0, CyclesLeft - User.second);
-    RS->writeStartEvent(ReadCycles);
+    RS->writeStartEvent(IID, RegisterID, ReadCycles);
   }
 
   // Notify any writes that are in a false dependency with this write.
   if (PartialWrite)
-    PartialWrite->writeStartEvent(CyclesLeft);
+    PartialWrite->writeStartEvent(IID, RegisterID, CyclesLeft);
 }
 
-void WriteState::addUser(ReadState *User, int ReadAdvance) {
+void WriteState::addUser(unsigned IID, ReadState *User, int ReadAdvance) {
   // If CyclesLeft is different than -1, then we don't need to
   // update the list of users. We can just notify the user with
   // the actual number of cycles left (which may be zero).
   if (CyclesLeft != UNKNOWN_CYCLES) {
     unsigned ReadCycles = std::max(0, CyclesLeft - ReadAdvance);
-    User->writeStartEvent(ReadCycles);
+    User->writeStartEvent(IID, RegisterID, ReadCycles);
     return;
   }
 
-  if (llvm::find_if(Users, [&User](const std::pair<ReadState *, int> &Use) {
-        return Use.first == User;
-      }) == Users.end()) {
-    Users.emplace_back(User, ReadAdvance);
-  }
+  Users.emplace_back(User, ReadAdvance);
 }
 
-void WriteState::addUser(WriteState *User) {
+void WriteState::addUser(unsigned IID, WriteState *User) {
   if (CyclesLeft != UNKNOWN_CYCLES) {
-    User->writeStartEvent(std::max(0, CyclesLeft));
+    User->writeStartEvent(IID, RegisterID, std::max(0, CyclesLeft));
     return;
   }
 
@@ -126,16 +135,37 @@ void WriteRef::dump() const {
 }
 #endif
 
+const CriticalDependency &Instruction::computeCriticalRegDep() {
+  if (CriticalRegDep.Cycles)
+    return CriticalRegDep;
+
+  unsigned MaxLatency = 0;
+  for (const WriteState &WS : getDefs()) {
+    const CriticalDependency &WriteCRD = WS.getCriticalRegDep();
+    if (WriteCRD.Cycles > MaxLatency)
+      CriticalRegDep = WriteCRD;
+  }
+
+  for (const ReadState &RS : getUses()) {
+    const CriticalDependency &ReadCRD = RS.getCriticalRegDep();
+    if (ReadCRD.Cycles > MaxLatency)
+      CriticalRegDep = ReadCRD;
+  }
+
+  return CriticalRegDep;
+}
+
 void Instruction::dispatch(unsigned RCUToken) {
   assert(Stage == IS_INVALID);
-  Stage = IS_AVAILABLE;
+  Stage = IS_DISPATCHED;
   RCUTokenID = RCUToken;
 
   // Check if input operands are already available.
-  update();
+  if (updateDispatched())
+    updatePending();
 }
 
-void Instruction::execute() {
+void Instruction::execute(unsigned IID) {
   assert(Stage == IS_READY);
   Stage = IS_EXECUTING;
 
@@ -143,7 +173,7 @@ void Instruction::execute() {
   CyclesLeft = getLatency();
 
   for (WriteState &WS : getDefs())
-    WS.onInstructionIssued();
+    WS.onInstructionIssued(IID);
 
   // Transition to the "executed" stage if this is a zero-latency instruction.
   if (!CyclesLeft)
@@ -156,30 +186,49 @@ void Instruction::forceExecuted() {
   Stage = IS_EXECUTED;
 }
 
-void Instruction::update() {
-  assert(isDispatched() && "Unexpected instruction stage found!");
+bool Instruction::updatePending() {
+  assert(isPending() && "Unexpected instruction stage found!");
 
   if (!all_of(getUses(), [](const ReadState &Use) { return Use.isReady(); }))
-    return;
+    return false;
+
+  // A partial register write cannot complete before a dependent write.
+  if (!all_of(getDefs(), [](const WriteState &Def) { return Def.isReady(); }))
+    return false;
+
+  Stage = IS_READY;
+  return true;
+}
+
+bool Instruction::updateDispatched() {
+  assert(isDispatched() && "Unexpected instruction stage found!");
+
+  if (!all_of(getUses(), [](const ReadState &Use) {
+        return Use.isPending() || Use.isReady();
+      }))
+    return false;
 
   // A partial register write cannot complete before a dependent write.
-  auto IsDefReady = [&](const WriteState &Def) {
-    if (!Def.getDependentWrite()) {
-      unsigned CyclesLeft = Def.getDependentWriteCyclesLeft();
-      return !CyclesLeft || CyclesLeft < getLatency();
-    }
+  if (!all_of(getDefs(),
+              [](const WriteState &Def) { return !Def.getDependentWrite(); }))
     return false;
-  };
 
-  if (all_of(getDefs(), IsDefReady))
-    Stage = IS_READY;
+  Stage = IS_PENDING;
+  return true;
+}
+
+void Instruction::update() {
+  if (isDispatched())
+    updateDispatched();
+  if (isPending())
+    updatePending();
 }
 
 void Instruction::cycleEvent() {
   if (isReady())
     return;
 
-  if (isDispatched()) {
+  if (isDispatched() || isPending()) {
     for (ReadState &Use : getUses())
       Use.cycleEvent();
 
diff --git a/lib/MCA/Pipeline.cpp b/lib/MCA/Pipeline.cpp
index 4c0e37c9ba7e..22b9d0799f77 100644
--- a/lib/MCA/Pipeline.cpp
+++ b/lib/MCA/Pipeline.cpp
@@ -1,9 +1,8 @@
 //===--------------------- Pipeline.cpp -------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -64,9 +63,10 @@ Error Pipeline::runCycle() {
     Err = FirstStage.execute(IR);
 
   // Update stages in preparation for a new cycle.
-  for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) {
-    const std::unique_ptr<Stage> &S = *I;
+  for (const std::unique_ptr<Stage> &S : Stages) {
     Err = S->cycleEnd();
+    if (Err)
+      break;
   }
 
   return Err;
diff --git a/lib/MCA/Stages/DispatchStage.cpp b/lib/MCA/Stages/DispatchStage.cpp
index 7fb4eb6a1c0e..7334a268e9a6 100644
--- a/lib/MCA/Stages/DispatchStage.cpp
+++ b/lib/MCA/Stages/DispatchStage.cpp
@@ -1,9 +1,8 @@
 //===--------------------- DispatchStage.cpp --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -26,6 +25,16 @@
 namespace llvm {
 namespace mca {
 
+DispatchStage::DispatchStage(const MCSubtargetInfo &Subtarget,
+                             const MCRegisterInfo &MRI,
+                             unsigned MaxDispatchWidth, RetireControlUnit &R,
+                             RegisterFile &F)
+    : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
+      CarryOver(0U), CarriedOver(), STI(Subtarget), RCU(R), PRF(F) {
+  if (!DispatchWidth)
+    DispatchWidth = Subtarget.getSchedModel().IssueWidth;
+}
+
 void DispatchStage::notifyInstructionDispatched(const InstRef &IR,
                                                 ArrayRef<unsigned> UsedRegs,
                                                 unsigned UOps) const {
@@ -60,28 +69,10 @@ bool DispatchStage::checkRCU(const InstRef &IR) const {
 }
 
 bool DispatchStage::canDispatch(const InstRef &IR) const {
-  return checkRCU(IR) && checkPRF(IR) && checkNextStage(IR);
-}
-
-void DispatchStage::updateRAWDependencies(ReadState &RS,
-                                          const MCSubtargetInfo &STI) {
-  SmallVector<WriteRef, 4> DependentWrites;
-
-  // Collect all the dependent writes, and update RS internal state.
-  PRF.addRegisterRead(RS, DependentWrites);
-
-  // We know that this read depends on all the writes in DependentWrites.
-  // For each write, check if we have ReadAdvance information, and use it
-  // to figure out in how many cycles this read becomes available.
-  const ReadDescriptor &RD = RS.getDescriptor();
-  const MCSchedModel &SM = STI.getSchedModel();
-  const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID);
-  for (WriteRef &WR : DependentWrites) {
-    WriteState &WS = *WR.getWriteState();
-    unsigned WriteResID = WS.getWriteResourceID();
-    int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID);
-    WS.addUser(&RS, ReadAdvance);
-  }
+  bool CanDispatch = checkRCU(IR);
+  CanDispatch &= checkPRF(IR);
+  CanDispatch &= checkNextStage(IR);
+  return CanDispatch;
 }
 
 Error DispatchStage::dispatch(InstRef IR) {
@@ -104,11 +95,11 @@ Error DispatchStage::dispatch(InstRef IR) {
     AvailableEntries = 0;
 
   // Check if this is an optimizable reg-reg move.
-  bool IsEliminated = false;
   if (IS.isOptimizableMove()) {
     assert(IS.getDefs().size() == 1 && "Expected a single input!");
     assert(IS.getUses().size() == 1 && "Expected a single output!");
-    IsEliminated = PRF.tryEliminateMove(IS.getDefs()[0], IS.getUses()[0]);
+    if (PRF.tryEliminateMove(IS.getDefs()[0], IS.getUses()[0]))
+      IS.setEliminated();
   }
 
   // A dependency-breaking instruction doesn't have to wait on the register
@@ -120,9 +111,9 @@ Error DispatchStage::dispatch(InstRef IR) {
   //
   // We also don't update data dependencies for instructions that have been
   // eliminated at register renaming stage.
-  if (!IsEliminated) {
+  if (!IS.isEliminated()) {
     for (ReadState &RS : IS.getUses())
-      updateRAWDependencies(RS, STI);
+      PRF.addRegisterRead(RS, STI);
   }
 
   // By default, a dependency-breaking zero-idiom is expected to be optimized
diff --git a/lib/MCA/Stages/EntryStage.cpp b/lib/MCA/Stages/EntryStage.cpp
index 3325bb36f5af..d2f5613a0fb6 100644
--- a/lib/MCA/Stages/EntryStage.cpp
+++ b/lib/MCA/Stages/EntryStage.cpp
@@ -1,9 +1,8 @@
 //===---------------------- EntryStage.cpp ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -19,7 +18,9 @@
 namespace llvm {
 namespace mca {
 
-bool EntryStage::hasWorkToComplete() const { return CurrentInstruction; }
+bool EntryStage::hasWorkToComplete() const {
+  return static_cast<bool>(CurrentInstruction);
+}
 
 bool EntryStage::isAvailable(const InstRef & /* unused */) const {
   if (CurrentInstruction)
diff --git a/lib/MCA/Stages/ExecuteStage.cpp b/lib/MCA/Stages/ExecuteStage.cpp
index e78327763fa1..a2b361fcd1bf 100644
--- a/lib/MCA/Stages/ExecuteStage.cpp
+++ b/lib/MCA/Stages/ExecuteStage.cpp
@@ -1,9 +1,8 @@
 //===---------------------- ExecuteStage.cpp --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -53,8 +52,11 @@ bool ExecuteStage::isAvailable(const InstRef &IR) const {
 
 Error ExecuteStage::issueInstruction(InstRef &IR) {
   SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> Used;
+  SmallVector<InstRef, 4> Pending;
   SmallVector<InstRef, 4> Ready;
-  HWS.issueInstruction(IR, Used, Ready);
+
+  HWS.issueInstruction(IR, Used, Pending, Ready);
+  NumIssuedOpcodes += IR.getInstruction()->getDesc().NumMicroOps;
 
   notifyReservedOrReleasedBuffers(IR, /* Reserved */ false);
 
@@ -66,6 +68,9 @@ Error ExecuteStage::issueInstruction(InstRef &IR) {
       return S;
   }
 
+  for (const InstRef &I : Pending)
+    notifyInstructionPending(I);
+
   for (const InstRef &I : Ready)
     notifyInstructionReady(I);
   return ErrorSuccess();
@@ -87,9 +92,12 @@ Error ExecuteStage::issueReadyInstructions() {
 Error ExecuteStage::cycleStart() {
   SmallVector<ResourceRef, 8> Freed;
   SmallVector<InstRef, 4> Executed;
+  SmallVector<InstRef, 4> Pending;
   SmallVector<InstRef, 4> Ready;
 
-  HWS.cycleEvent(Freed, Executed, Ready);
+  HWS.cycleEvent(Freed, Executed, Pending, Ready);
+  NumDispatchedOpcodes = 0;
+  NumIssuedOpcodes = 0;
 
   for (const ResourceRef &RR : Freed)
     notifyResourceAvailable(RR);
@@ -101,12 +109,53 @@ Error ExecuteStage::cycleStart() {
       return S;
   }
 
+  for (const InstRef &IR : Pending)
+    notifyInstructionPending(IR);
+
   for (const InstRef &IR : Ready)
     notifyInstructionReady(IR);
 
   return issueReadyInstructions();
 }
 
+Error ExecuteStage::cycleEnd() {
+  if (!EnablePressureEvents)
+    return ErrorSuccess();
+
+  // Always conservatively report any backpressure events if the dispatch logic
+  // was stalled due to unavailable scheduler resources.
+  if (!HWS.hadTokenStall() && NumDispatchedOpcodes <= NumIssuedOpcodes)
+    return ErrorSuccess();
+
+  SmallVector<InstRef, 8> Insts;
+  uint64_t Mask = HWS.analyzeResourcePressure(Insts);
+  if (Mask) {
+    LLVM_DEBUG(dbgs() << "[E] Backpressure increased because of unavailable "
+                         "pipeline resources: "
+                      << format_hex(Mask, 16) << '\n');
+    HWPressureEvent Ev(HWPressureEvent::RESOURCES, Insts, Mask);
+    notifyEvent(Ev);
+  }
+
+  SmallVector<InstRef, 8> RegDeps;
+  SmallVector<InstRef, 8> MemDeps;
+  HWS.analyzeDataDependencies(RegDeps, MemDeps);
+  if (RegDeps.size()) {
+    LLVM_DEBUG(
+        dbgs() << "[E] Backpressure increased by register dependencies\n");
+    HWPressureEvent Ev(HWPressureEvent::REGISTER_DEPS, RegDeps);
+    notifyEvent(Ev);
+  }
+
+  if (MemDeps.size()) {
+    LLVM_DEBUG(dbgs() << "[E] Backpressure increased by memory dependencies\n");
+    HWPressureEvent Ev(HWPressureEvent::MEMORY_DEPS, MemDeps);
+    notifyEvent(Ev);
+  }
+
+  return ErrorSuccess();
+}
+
 #ifndef NDEBUG
 static void verifyInstructionEliminated(const InstRef &IR) {
   const Instruction &Inst = *IR.getInstruction();
@@ -124,6 +173,7 @@ Error ExecuteStage::handleInstructionEliminated(InstRef &IR) {
 #ifndef NDEBUG
   verifyInstructionEliminated(IR);
 #endif
+  notifyInstructionPending(IR);
   notifyInstructionReady(IR);
   notifyInstructionIssued(IR, {});
   IR.getInstruction()->forceExecuted();
@@ -147,10 +197,18 @@ Error ExecuteStage::execute(InstRef &IR) {
   // BufferSize=0 as reserved. Resources with a buffer size of zero will only
   // be released after MCIS is issued, and all the ResourceCycles for those
   // units have been consumed.
-  HWS.dispatch(IR);
+  bool IsReadyInstruction = HWS.dispatch(IR);
+  const Instruction &Inst = *IR.getInstruction();
+  NumDispatchedOpcodes += Inst.getDesc().NumMicroOps;
   notifyReservedOrReleasedBuffers(IR, /* Reserved */ true);
-  if (!HWS.isReady(IR))
+ 
+  if (!IsReadyInstruction) {
+    if (Inst.isPending())
+      notifyInstructionPending(IR);
     return ErrorSuccess();
+  }
+
+  notifyInstructionPending(IR);
 
   // If we did not return early, then the scheduler is ready for execution.
   notifyInstructionReady(IR);
@@ -170,6 +228,12 @@ void ExecuteStage::notifyInstructionExecuted(const InstRef &IR) const {
       HWInstructionEvent(HWInstructionEvent::Executed, IR));
 }
 
+void ExecuteStage::notifyInstructionPending(const InstRef &IR) const {
+  LLVM_DEBUG(dbgs() << "[E] Instruction Pending: #" << IR << '\n');
+  notifyEvent<HWInstructionEvent>(
+      HWInstructionEvent(HWInstructionEvent::Pending, IR));
+}
+
 void ExecuteStage::notifyInstructionReady(const InstRef &IR) const {
   LLVM_DEBUG(dbgs() << "[E] Instruction Ready: #" << IR << '\n');
   notifyEvent<HWInstructionEvent>(
@@ -189,9 +253,10 @@ void ExecuteStage::notifyInstructionIssued(
   LLVM_DEBUG({
     dbgs() << "[E] Instruction Issued: #" << IR << '\n';
     for (const std::pair<ResourceRef, ResourceCycles> &Resource : Used) {
+      assert(Resource.second.getDenominator() == 1 && "Invalid cycles!");
       dbgs() << "[E] Resource Used: [" << Resource.first.first << '.'
              << Resource.first.second << "], ";
-      dbgs() << "cycles: " << Resource.second << '\n';
+      dbgs() << "cycles: " << Resource.second.getNumerator() << '\n';
     }
   });
 
diff --git a/lib/MCA/Stages/InstructionTables.cpp b/lib/MCA/Stages/InstructionTables.cpp
index f918c183aa5a..adeefb45ec2d 100644
--- a/lib/MCA/Stages/InstructionTables.cpp
+++ b/lib/MCA/Stages/InstructionTables.cpp
@@ -1,9 +1,8 @@
 //===--------------------- InstructionTables.cpp ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/MCA/Stages/MicroOpQueueStage.cpp b/lib/MCA/Stages/MicroOpQueueStage.cpp
new file mode 100644
index 000000000000..cb3e4c6979a4
--- /dev/null
+++ b/lib/MCA/Stages/MicroOpQueueStage.cpp
@@ -0,0 +1,70 @@
+//===---------------------- MicroOpQueueStage.cpp ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the MicroOpQueueStage.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/MicroOpQueueStage.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+Error MicroOpQueueStage::moveInstructions() {
+  InstRef IR = Buffer[CurrentInstructionSlotIdx];
+  while (IR && checkNextStage(IR)) {
+    if (llvm::Error Val = moveToTheNextStage(IR))
+      return Val;
+
+    Buffer[CurrentInstructionSlotIdx].invalidate();
+    unsigned NormalizedOpcodes = getNormalizedOpcodes(IR);
+    CurrentInstructionSlotIdx += NormalizedOpcodes;
+    CurrentInstructionSlotIdx %= Buffer.size();
+    AvailableEntries += NormalizedOpcodes;
+    IR = Buffer[CurrentInstructionSlotIdx];
+  }
+
+  return llvm::ErrorSuccess();
+}
+
+MicroOpQueueStage::MicroOpQueueStage(unsigned Size, unsigned IPC,
+                                     bool ZeroLatencyStage)
+    : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), MaxIPC(IPC),
+      CurrentIPC(0), IsZeroLatencyStage(ZeroLatencyStage) {
+  Buffer.resize(Size ? Size : 1);
+  AvailableEntries = Buffer.size();
+}
+
+Error MicroOpQueueStage::execute(InstRef &IR) {
+  Buffer[NextAvailableSlotIdx] = IR;
+  unsigned NormalizedOpcodes = getNormalizedOpcodes(IR);
+  NextAvailableSlotIdx += NormalizedOpcodes;
+  NextAvailableSlotIdx %= Buffer.size();
+  AvailableEntries -= NormalizedOpcodes;
+  ++CurrentIPC;
+  return llvm::ErrorSuccess();
+}
+
+Error MicroOpQueueStage::cycleStart() {
+  CurrentIPC = 0;
+  if (!IsZeroLatencyStage)
+    return moveInstructions();
+  return llvm::ErrorSuccess();
+}
+
+Error MicroOpQueueStage::cycleEnd() {
+  if (IsZeroLatencyStage)
+    return moveInstructions();
+  return llvm::ErrorSuccess();
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/lib/MCA/Stages/RetireStage.cpp b/lib/MCA/Stages/RetireStage.cpp
index d6bcc518662f..e1789dd7fa2a 100644
--- a/lib/MCA/Stages/RetireStage.cpp
+++ b/lib/MCA/Stages/RetireStage.cpp
@@ -1,9 +1,8 @@
 //===---------------------- RetireStage.cpp ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/MCA/Stages/Stage.cpp b/lib/MCA/Stages/Stage.cpp
index 38191645e736..ed512ac9711c 100644
--- a/lib/MCA/Stages/Stage.cpp
+++ b/lib/MCA/Stages/Stage.cpp
@@ -1,9 +1,8 @@
 //===---------------------- Stage.cpp ---------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/MCA/Support.cpp b/lib/MCA/Support.cpp
index 335953e10481..ce1f0f6f211b 100644
--- a/lib/MCA/Support.cpp
+++ b/lib/MCA/Support.cpp
@@ -1,9 +1,8 @@
 //===--------------------- Support.cpp --------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -21,6 +20,22 @@ namespace mca {
 
 #define DEBUG_TYPE "llvm-mca"
 
+ResourceCycles &ResourceCycles::operator+=(const ResourceCycles &RHS) {
+  if (Denominator == RHS.Denominator)
+    Numerator += RHS.Numerator;
+  else {
+    // Create a common denominator for LHS and RHS by calculating the least
+    // common multiple from the GCD.
+    unsigned GCD = GreatestCommonDivisor64(Denominator, RHS.Denominator);
+    unsigned LCM = (Denominator * RHS.Denominator) / GCD;
+    unsigned LHSNumerator = Numerator * (LCM / Denominator);
+    unsigned RHSNumerator = RHS.Numerator * (LCM / RHS.Denominator);
+    Numerator = LHSNumerator + RHSNumerator;
+    Denominator = LCM;
+  }
+  return *this;
+}
+
 void computeProcResourceMasks(const MCSchedModel &SM,
                               MutableArrayRef<uint64_t> Masks) {
   unsigned ProcResourceID = 0;
@@ -57,8 +72,9 @@ void computeProcResourceMasks(const MCSchedModel &SM,
                     << "\n");
   for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
     const MCProcResourceDesc &Desc = *SM.getProcResource(I);
-    LLVM_DEBUG(dbgs() << '[' << I << "] " << Desc.Name << " - " << Masks[I]
-                      << '\n');
+    LLVM_DEBUG(dbgs() << '[' << format_decimal(I,2) << "] " << " - "
+                      << format_hex(Masks[I],16) << " - "
+                      << Desc.Name << '\n');
   }
 #endif
 }
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
index 8ec115a5566c..49e66f46ab3f 100644
--- a/lib/Object/Archive.cpp
+++ b/lib/Object/Archive.cpp
@@ -1,9 +1,8 @@
 //===- Archive.cpp - ar File Format implementation ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -512,7 +511,7 @@ Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
   StringRef Name = NameOrErr.get();
   Expected<StringRef> Buf = getBuffer();
   if (!Buf)
-    return Buf.takeError();
+    return createFileError(Name, Buf.takeError());
   return MemoryBufferRef(*Buf, Name);
 }
 
@@ -779,19 +778,18 @@ Archive::child_iterator Archive::child_begin(Error &Err,
     return child_end();
 
   if (SkipInternal)
-    return child_iterator(Child(this, FirstRegularData,
-                                FirstRegularStartOfFile),
-                          &Err);
+    return child_iterator::itr(
+        Child(this, FirstRegularData, FirstRegularStartOfFile), Err);
 
   const char *Loc = Data.getBufferStart() + strlen(Magic);
   Child C(this, Loc, &Err);
   if (Err)
     return child_end();
-  return child_iterator(C, &Err);
+  return child_iterator::itr(C, Err);
 }
 
 Archive::child_iterator Archive::child_end() const {
-  return child_iterator(Child(nullptr, nullptr, nullptr), nullptr);
+  return child_iterator::end(Child(nullptr, nullptr, nullptr));
 }
 
 StringRef Archive::Symbol::getName() const {
diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp
index da93602cbb28..228f6b40c5ec 100644
--- a/lib/Object/ArchiveWriter.cpp
+++ b/lib/Object/ArchiveWriter.cpp
@@ -1,9 +1,8 @@
 //===- ArchiveWriter.cpp - ar File Format implementation --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -49,7 +48,6 @@ NewArchiveMember::getOldMember(const object::Archive::Child &OldMember,
     return BufOrErr.takeError();
 
   NewArchiveMember M;
-  assert(M.IsNew == false);
   M.Buf = MemoryBuffer::getMemBuffer(*BufOrErr, false);
   M.MemberName = M.Buf->getBufferIdentifier();
   if (!Deterministic) {
@@ -76,10 +74,11 @@ NewArchiveMember::getOldMember(const object::Archive::Child &OldMember,
 Expected<NewArchiveMember> NewArchiveMember::getFile(StringRef FileName,
                                                      bool Deterministic) {
   sys::fs::file_status Status;
-  int FD;
-  if (auto EC = sys::fs::openFileForRead(FileName, FD))
-    return errorCodeToError(EC);
-  assert(FD != -1);
+  auto FDOrErr = sys::fs::openNativeFileForRead(FileName);
+  if (!FDOrErr)
+    return FDOrErr.takeError();
+  sys::fs::file_t FD = *FDOrErr;
+  assert(FD != sys::fs::kInvalidFile);
 
   if (auto EC = sys::fs::status(FD, Status))
     return errorCodeToError(EC);
@@ -95,11 +94,10 @@ Expected<NewArchiveMember> NewArchiveMember::getFile(StringRef FileName,
   if (!MemberBufferOrErr)
     return errorCodeToError(MemberBufferOrErr.getError());
 
-  if (close(FD) != 0)
-    return errorCodeToError(std::error_code(errno, std::generic_category()));
+  if (auto EC = sys::fs::closeFile(FD))
+    return errorCodeToError(EC);
 
   NewArchiveMember M;
-  M.IsNew = true;
   M.Buf = std::move(*MemberBufferOrErr);
   M.MemberName = M.Buf->getBufferIdentifier();
   if (!Deterministic) {
@@ -192,35 +190,6 @@ static bool useStringTable(bool Thin, StringRef Name) {
   return Thin || Name.size() >= 16 || Name.contains('/');
 }
 
-// Compute the relative path from From to To.
-static std::string computeRelativePath(StringRef From, StringRef To) {
-  if (sys::path::is_absolute(From) || sys::path::is_absolute(To))
-    return To;
-
-  StringRef DirFrom = sys::path::parent_path(From);
-  auto FromI = sys::path::begin(DirFrom);
-  auto ToI = sys::path::begin(To);
-  while (*FromI == *ToI) {
-    ++FromI;
-    ++ToI;
-  }
-
-  SmallString<128> Relative;
-  for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI)
-    sys::path::append(Relative, "..");
-
-  for (auto ToE = sys::path::end(To); ToI != ToE; ++ToI)
-    sys::path::append(Relative, *ToI);
-
-#ifdef _WIN32
-  // Replace backslashes with slashes so that the path is portable between *nix
-  // and Windows.
-  std::replace(Relative.begin(), Relative.end(), '\\', '/');
-#endif
-
-  return Relative.str();
-}
-
 static bool is64BitKind(object::Archive::Kind Kind) {
   switch (Kind) {
   case object::Archive::K_GNU:
@@ -235,27 +204,11 @@ static bool is64BitKind(object::Archive::Kind Kind) {
   llvm_unreachable("not supported for writting");
 }
 
-static void addToStringTable(raw_ostream &Out, StringRef ArcName,
-                             const NewArchiveMember &M, bool Thin) {
-  StringRef ID = M.Buf->getBufferIdentifier();
-  if (Thin) {
-    if (M.IsNew)
-      Out << computeRelativePath(ArcName, ID);
-    else
-      Out << ID;
-  } else
-    Out << M.MemberName;
-  Out << "/\n";
-}
-
-static void printMemberHeader(raw_ostream &Out, uint64_t Pos,
-                              raw_ostream &StringTable,
-                              StringMap<uint64_t> &MemberNames,
-                              object::Archive::Kind Kind, bool Thin,
-                              StringRef ArcName, const NewArchiveMember &M,
-                              sys::TimePoint<std::chrono::seconds> ModTime,
-                              unsigned Size) {
-
+static void
+printMemberHeader(raw_ostream &Out, uint64_t Pos, raw_ostream &StringTable,
+                  StringMap<uint64_t> &MemberNames, object::Archive::Kind Kind,
+                  bool Thin, const NewArchiveMember &M,
+                  sys::TimePoint<std::chrono::seconds> ModTime, unsigned Size) {
   if (isBSDLike(Kind))
     return printBSDMemberHeader(Out, Pos, M.MemberName, ModTime, M.UID, M.GID,
                                 M.Perms, Size);
@@ -266,12 +219,12 @@ static void printMemberHeader(raw_ostream &Out, uint64_t Pos,
   uint64_t NamePos;
   if (Thin) {
     NamePos = StringTable.tell();
-    addToStringTable(StringTable, ArcName, M, Thin);
+    StringTable << M.MemberName << "/\n";
   } else {
     auto Insertion = MemberNames.insert({M.MemberName, uint64_t(0)});
     if (Insertion.second) {
       Insertion.first->second = StringTable.tell();
-      addToStringTable(StringTable, ArcName, M, Thin);
+      StringTable << M.MemberName << "/\n";
     }
     NamePos = Insertion.first->second;
   }
@@ -424,8 +377,8 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) {
     if (!isArchiveSymbol(S))
       continue;
     Ret.push_back(SymNames.tell());
-    if (auto EC = S.printName(SymNames))
-      return errorCodeToError(EC);
+    if (Error E = S.printName(SymNames))
+      return std::move(E);
     SymNames << '\0';
   }
   return Ret;
@@ -433,8 +386,8 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) {
 
 static Expected<std::vector<MemberData>>
 computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
-                  object::Archive::Kind Kind, bool Thin, StringRef ArcName,
-                  bool Deterministic, ArrayRef<NewArchiveMember> NewMembers) {
+                  object::Archive::Kind Kind, bool Thin, bool Deterministic,
+                  ArrayRef<NewArchiveMember> NewMembers) {
   static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
 
   // This ignores the symbol table, but we only need the value mod 8 and the
@@ -521,8 +474,8 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
       ModTime = sys::toTimePoint(FilenameCount[M.MemberName]++);
     else
       ModTime = M.ModTime;
-    printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, ArcName,
-                      M, ModTime, Buf.getBufferSize() + MemberPadding);
+    printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, M,
+                      ModTime, Buf.getBufferSize() + MemberPadding);
     Out.flush();
 
     Expected<std::vector<unsigned>> Symbols =
@@ -541,11 +494,53 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
   return Ret;
 }
 
-Error llvm::writeArchive(StringRef ArcName,
-                         ArrayRef<NewArchiveMember> NewMembers,
-                         bool WriteSymtab, object::Archive::Kind Kind,
-                         bool Deterministic, bool Thin,
-                         std::unique_ptr<MemoryBuffer> OldArchiveBuf) {
+namespace llvm {
+
+static ErrorOr<SmallString<128>> canonicalizePath(StringRef P) {
+  SmallString<128> Ret = P;
+  std::error_code Err = sys::fs::make_absolute(Ret);
+  if (Err)
+    return Err;
+  sys::path::remove_dots(Ret, /*removedotdot*/ true);
+  return Ret;
+}
+
+// Compute the relative path from From to To.
+Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To) {
+  ErrorOr<SmallString<128>> PathToOrErr = canonicalizePath(To);
+  ErrorOr<SmallString<128>> DirFromOrErr = canonicalizePath(From);
+  if (!PathToOrErr || !DirFromOrErr)
+    return errorCodeToError(std::error_code(errno, std::generic_category()));
+
+  const SmallString<128> &PathTo = *PathToOrErr;
+  const SmallString<128> &DirFrom = sys::path::parent_path(*DirFromOrErr);
+
+  // Can't construct a relative path between different roots
+  if (sys::path::root_name(PathTo) != sys::path::root_name(DirFrom))
+    return sys::path::convert_to_slash(PathTo);
+
+  // Skip common prefixes
+  auto FromTo =
+      std::mismatch(sys::path::begin(DirFrom), sys::path::end(DirFrom),
+                    sys::path::begin(PathTo));
+  auto FromI = FromTo.first;
+  auto ToI = FromTo.second;
+
+  // Construct relative path
+  SmallString<128> Relative;
+  for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI)
+    sys::path::append(Relative, sys::path::Style::posix, "..");
+
+  for (auto ToE = sys::path::end(PathTo); ToI != ToE; ++ToI)
+    sys::path::append(Relative, sys::path::Style::posix, *ToI);
+
+  return Relative.str();
+}
+
+Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
+                   bool WriteSymtab, object::Archive::Kind Kind,
+                   bool Deterministic, bool Thin,
+                   std::unique_ptr<MemoryBuffer> OldArchiveBuf) {
   assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode");
 
   SmallString<0> SymNamesBuf;
@@ -554,7 +549,7 @@ Error llvm::writeArchive(StringRef ArcName,
   raw_svector_ostream StringTable(StringTableBuf);
 
   Expected<std::vector<MemberData>> DataOrErr = computeMemberData(
-      StringTable, SymNames, Kind, Thin, ArcName, Deterministic, NewMembers);
+      StringTable, SymNames, Kind, Thin, Deterministic, NewMembers);
   if (Error E = DataOrErr.takeError())
     return E;
   std::vector<MemberData> &Data = *DataOrErr;
@@ -631,3 +626,5 @@ Error llvm::writeArchive(StringRef ArcName,
 
   return Temp->keep(ArcName);
 }
+
+} // namespace llvm
diff --git a/lib/Object/Binary.cpp b/lib/Object/Binary.cpp
index fe41987f5c27..a953c1d8cb80 100644
--- a/lib/Object/Binary.cpp
+++ b/lib/Object/Binary.cpp
@@ -1,9 +1,8 @@
 //===- Binary.cpp - A generic binary file ---------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,6 +16,7 @@
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Object/MachOUniversal.h"
+#include "llvm/Object/Minidump.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Object/WindowsResource.h"
 #include "llvm/Support/Error.h"
@@ -69,6 +69,8 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
   case file_magic::coff_import_library:
   case file_magic::pecoff_executable:
   case file_magic::bitcode:
+  case file_magic::xcoff_object_32:
+  case file_magic::xcoff_object_64:
   case file_magic::wasm_object:
     return ObjectFile::createSymbolicFile(Buffer, Type, Context);
   case file_magic::macho_universal_binary:
@@ -82,6 +84,8 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
   case file_magic::coff_cl_gl_object:
     // Unrecognized object file format.
     return errorCodeToError(object_error::invalid_file_type);
+  case file_magic::minidump:
+    return MinidumpFile::create(Buffer);
   }
   llvm_unreachable("Unexpected Binary File Type");
 }
diff --git a/lib/Object/COFFImportFile.cpp b/lib/Object/COFFImportFile.cpp
index dc11cc4bcffe..ff4a799be60c 100644
--- a/lib/Object/COFFImportFile.cpp
+++ b/lib/Object/COFFImportFile.cpp
@@ -1,9 +1,8 @@
 //===- COFFImportFile.cpp - COFF short import file implementation ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -496,7 +495,7 @@ NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym,
 
   // COFF Header
   coff_file_header Header{
-      u16(0),
+      u16(Machine),
       u16(NumberOfSections),
       u32(0),
       u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section))),
@@ -596,7 +595,10 @@ Error writeImportLibrary(StringRef ImportName, StringRef Path,
       ImportType = IMPORT_CONST;
 
     StringRef SymbolName = E.SymbolName.empty() ? E.Name : E.SymbolName;
-    ImportNameType NameType = getNameType(SymbolName, E.Name, Machine, MinGW);
+    ImportNameType NameType = E.Noname
+                                  ? IMPORT_ORDINAL
+                                  : getNameType(SymbolName, E.Name,
+                                                Machine, MinGW);
     Expected<std::string> Name = E.ExtName.empty()
                                      ? SymbolName
                                      : replace(SymbolName, E.Name, E.ExtName);
diff --git a/lib/Object/COFFModuleDefinition.cpp b/lib/Object/COFFModuleDefinition.cpp
index c703071b86e0..64d4cf0efda2 100644
--- a/lib/Object/COFFModuleDefinition.cpp
+++ b/lib/Object/COFFModuleDefinition.cpp
@@ -1,9 +1,8 @@
 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index fc1deeba339a..854664e679df 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -1,9 +1,8 @@
 //===- COFFObjectFile.cpp - COFF object file implementation ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -270,10 +269,9 @@ void COFFObjectFile::moveSectionNext(DataRefImpl &Ref) const {
   Ref.p = reinterpret_cast<uintptr_t>(Sec);
 }
 
-std::error_code COFFObjectFile::getSectionName(DataRefImpl Ref,
-                                               StringRef &Result) const {
+Expected<StringRef> COFFObjectFile::getSectionName(DataRefImpl Ref) const {
   const coff_section *Sec = toSec(Ref);
-  return getSectionName(Sec, Result);
+  return getSectionName(Sec);
 }
 
 uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Ref) const {
@@ -294,13 +292,13 @@ uint64_t COFFObjectFile::getSectionSize(DataRefImpl Ref) const {
   return getSectionSize(toSec(Ref));
 }
 
-std::error_code COFFObjectFile::getSectionContents(DataRefImpl Ref,
-                                                   StringRef &Result) const {
+Expected<ArrayRef<uint8_t>>
+COFFObjectFile::getSectionContents(DataRefImpl Ref) const {
   const coff_section *Sec = toSec(Ref);
   ArrayRef<uint8_t> Res;
-  std::error_code EC = getSectionContents(Sec, Res);
-  Result = StringRef(reinterpret_cast<const char*>(Res.data()), Res.size());
-  return EC;
+  if (Error E = getSectionContents(Sec, Res))
+    return std::move(E);
+  return Res;
 }
 
 uint64_t COFFObjectFile::getSectionAlignment(DataRefImpl Ref) const {
@@ -1075,8 +1073,8 @@ uint32_t COFFObjectFile::getSymbolIndex(COFFSymbolRef Symbol) const {
   return Index;
 }
 
-std::error_code COFFObjectFile::getSectionName(const coff_section *Sec,
-                                               StringRef &Res) const {
+Expected<StringRef>
+COFFObjectFile::getSectionName(const coff_section *Sec) const {
   StringRef Name;
   if (Sec->Name[COFF::NameSize - 1] == 0)
     // Null terminated, let ::strlen figure out the length.
@@ -1090,17 +1088,18 @@ std::error_code COFFObjectFile::getSectionName(const coff_section *Sec,
     uint32_t Offset;
     if (Name.startswith("//")) {
       if (decodeBase64StringEntry(Name.substr(2), Offset))
-        return object_error::parse_failed;
+        return createStringError(object_error::parse_failed,
+                                 "inalid section name");
     } else {
       if (Name.substr(1).getAsInteger(10, Offset))
-        return object_error::parse_failed;
+        return createStringError(object_error::parse_failed,
+                                 "invalid section name");
     }
     if (std::error_code EC = getString(Offset, Name))
-      return EC;
+      return errorCodeToError(EC);
   }
 
-  Res = Name;
-  return std::error_code();
+  return Name;
 }
 
 uint64_t COFFObjectFile::getSectionSize(const coff_section *Sec) const {
@@ -1119,22 +1118,21 @@ uint64_t COFFObjectFile::getSectionSize(const coff_section *Sec) const {
   return Sec->SizeOfRawData;
 }
 
-std::error_code
-COFFObjectFile::getSectionContents(const coff_section *Sec,
-                                   ArrayRef<uint8_t> &Res) const {
+Error COFFObjectFile::getSectionContents(const coff_section *Sec,
+                                         ArrayRef<uint8_t> &Res) const {
   // In COFF, a virtual section won't have any in-file
   // content, so the file pointer to the content will be zero.
   if (Sec->PointerToRawData == 0)
-    return std::error_code();
+    return Error::success();
   // The only thing that we need to verify is that the contents is contained
   // within the file bounds. We don't need to make sure it doesn't cover other
   // data, as there's nothing that says that is not allowed.
   uintptr_t ConStart = uintptr_t(base()) + Sec->PointerToRawData;
   uint32_t SectionSize = getSectionSize(Sec);
   if (checkOffset(Data, ConStart, SectionSize))
-    return object_error::parse_failed;
+    return make_error<BinaryError>();
   Res = makeArrayRef(reinterpret_cast<const uint8_t *>(ConStart), SectionSize);
-  return std::error_code();
+  return Error::success();
 }
 
 const coff_relocation *COFFObjectFile::toRel(DataRefImpl Rel) const {
@@ -1237,6 +1235,7 @@ StringRef COFFObjectFile::getRelocationTypeName(uint16_t Type) const {
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_TOKEN);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX24);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX11);
+    LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_REL32);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_SECTION);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_SECREL);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_MOV32A);
@@ -1244,6 +1243,7 @@ StringRef COFFObjectFile::getRelocationTypeName(uint16_t Type) const {
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH20T);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH24T);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX23T);
+    LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_PAIR);
     default:
       return "Unknown";
     }
@@ -1267,6 +1267,7 @@ StringRef COFFObjectFile::getRelocationTypeName(uint16_t Type) const {
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR64);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH19);
     LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH14);
+    LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_REL32);
     default:
       return "Unknown";
     }
@@ -1455,7 +1456,7 @@ std::error_code DelayImportDirectoryEntryRef::getName(StringRef &Result) const {
 
 std::error_code DelayImportDirectoryEntryRef::
 getDelayImportTable(const delay_import_directory_table_entry *&Result) const {
-  Result = Table;
+  Result = &Table[Index];
   return std::error_code();
 }
 
diff --git a/lib/Object/Decompressor.cpp b/lib/Object/Decompressor.cpp
index 53f084d7620e..ec15e6f69ada 100644
--- a/lib/Object/Decompressor.cpp
+++ b/lib/Object/Decompressor.cpp
@@ -1,9 +1,8 @@
 //===-- Decompressor.cpp --------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp
index cf8313f88f93..8660b1a64bdd 100644
--- a/lib/Object/ELF.cpp
+++ b/lib/Object/ELF.cpp
@@ -1,9 +1,8 @@
 //===- ELF.cpp - ELF object file implementation ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -220,8 +219,8 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) {
     switch (Type) {
       STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_REGINFO);
       STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_OPTIONS);
-      STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_ABIFLAGS);
       STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_DWARF);
+      STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_ABIFLAGS);
     }
     break;
   default:
@@ -254,6 +253,8 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) {
     STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_LINKER_OPTIONS);
     STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_CALL_GRAPH_PROFILE);
     STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_ADDRSIG);
+    STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_DEPENDENT_LIBRARIES);
+    STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_SYMPART);
     STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES);
     STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH);
     STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verdef);
@@ -425,7 +426,7 @@ ELFFile<ELFT>::android_relas(const Elf_Shdr *Sec) const {
 }
 
 template <class ELFT>
-const char *ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
+std::string ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
                                                  uint64_t Type) const {
 #define DYNAMIC_STRINGIFY_ENUM(tag, value)                                     \
   case value:                                                                  \
@@ -433,12 +434,21 @@ const char *ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
 
 #define DYNAMIC_TAG(n, v)
   switch (Arch) {
+  case ELF::EM_AARCH64:
+    switch (Type) {
+#define AARCH64_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef AARCH64_DYNAMIC_TAG
+    }
+    break;
+
   case ELF::EM_HEXAGON:
     switch (Type) {
 #define HEXAGON_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
 #include "llvm/BinaryFormat/DynamicTags.def"
 #undef HEXAGON_DYNAMIC_TAG
     }
+    break;
 
   case ELF::EM_MIPS:
     switch (Type) {
@@ -446,6 +456,7 @@ const char *ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
 #include "llvm/BinaryFormat/DynamicTags.def"
 #undef MIPS_DYNAMIC_TAG
     }
+    break;
 
   case ELF::EM_PPC64:
     switch (Type) {
@@ -453,10 +464,12 @@ const char *ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
 #include "llvm/BinaryFormat/DynamicTags.def"
 #undef PPC64_DYNAMIC_TAG
     }
+    break;
   }
 #undef DYNAMIC_TAG
   switch (Type) {
 // Now handle all dynamic tags except the architecture specific ones
+#define AARCH64_DYNAMIC_TAG(name, value)
 #define MIPS_DYNAMIC_TAG(name, value)
 #define HEXAGON_DYNAMIC_TAG(name, value)
 #define PPC64_DYNAMIC_TAG(name, value)
@@ -465,18 +478,19 @@ const char *ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
 #define DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
 #include "llvm/BinaryFormat/DynamicTags.def"
 #undef DYNAMIC_TAG
+#undef AARCH64_DYNAMIC_TAG
 #undef MIPS_DYNAMIC_TAG
 #undef HEXAGON_DYNAMIC_TAG
 #undef PPC64_DYNAMIC_TAG
 #undef DYNAMIC_TAG_MARKER
 #undef DYNAMIC_STRINGIFY_ENUM
   default:
-    return "unknown";
+    return "<unknown:>0x" + utohexstr(Type, true);
   }
 }
 
 template <class ELFT>
-const char *ELFFile<ELFT>::getDynamicTagAsString(uint64_t Type) const {
+std::string ELFFile<ELFT>::getDynamicTagAsString(uint64_t Type) const {
   return getDynamicTagAsString(getHeader()->e_machine, Type);
 }
 
@@ -523,12 +537,15 @@ Expected<typename ELFT::DynRange> ELFFile<ELFT>::dynamicEntries() const {
   }
 
   if (Dyn.empty())
+    // TODO: this error is untested.
     return createError("invalid empty dynamic section");
 
   if (DynSecSize % sizeof(Elf_Dyn) != 0)
+    // TODO: this error is untested.
     return createError("malformed dynamic section");
 
   if (Dyn.back().d_tag != ELF::DT_NULL)
+    // TODO: this error is untested.
     return createError("dynamic sections must be DT_NULL terminated");
 
   return Dyn;
@@ -553,12 +570,14 @@ Expected<const uint8_t *> ELFFile<ELFT>::toMappedAddr(uint64_t VAddr) const {
                        });
 
   if (I == LoadSegments.begin())
-    return createError("Virtual address is not in any segment");
+    return createError("virtual address is not in any segment: 0x" +
+                       Twine::utohexstr(VAddr));
   --I;
   const Elf_Phdr &Phdr = **I;
   uint64_t Delta = VAddr - Phdr.p_vaddr;
   if (Delta >= Phdr.p_filesz)
-    return createError("Virtual address is not in any segment");
+    return createError("virtual address is not in any segment: 0x" +
+                       Twine::utohexstr(VAddr));
   return base() + Phdr.p_offset + Delta;
 }
 
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index 9fb3a55ac7b1..c7b715793048 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -1,9 +1,8 @@
 //===- ELFObjectFile.cpp - ELF object file implementation -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -36,6 +35,16 @@
 using namespace llvm;
 using namespace object;
 
+const EnumEntry<unsigned> llvm::object::ElfSymbolTypes[NumElfSymbolTypes] = {
+    {"None", "NOTYPE", ELF::STT_NOTYPE},
+    {"Object", "OBJECT", ELF::STT_OBJECT},
+    {"Function", "FUNC", ELF::STT_FUNC},
+    {"Section", "SECTION", ELF::STT_SECTION},
+    {"File", "FILE", ELF::STT_FILE},
+    {"Common", "COMMON", ELF::STT_COMMON},
+    {"TLS", "TLS", ELF::STT_TLS},
+    {"GNU_IFunc", "IFUNC", ELF::STT_GNU_IFUNC}};
+
 ELFObjectFileBase::ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source)
     : ObjectFile(Type, Source) {}
 
@@ -139,8 +148,7 @@ SubtargetFeatures ELFObjectFileBase::getMIPSFeatures() const {
 SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
   SubtargetFeatures Features;
   ARMAttributeParser Attributes;
-  std::error_code EC = getBuildAttributes(Attributes);
-  if (EC)
+  if (Error E = getBuildAttributes(Attributes))
     return SubtargetFeatures();
 
   // both ARMv7-M and R have to support thumb hardware div
@@ -186,9 +194,9 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
     default:
       break;
     case ARMBuildAttrs::Not_Allowed:
-      Features.AddFeature("vfp2", false);
-      Features.AddFeature("vfp3", false);
-      Features.AddFeature("vfp4", false);
+      Features.AddFeature("vfp2d16sp", false);
+      Features.AddFeature("vfp3d16sp", false);
+      Features.AddFeature("vfp4d16sp", false);
       break;
     case ARMBuildAttrs::AllowFPv2:
       Features.AddFeature("vfp2");
@@ -222,6 +230,24 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
     }
   }
 
+  if (Attributes.hasAttribute(ARMBuildAttrs::MVE_arch)) {
+    switch(Attributes.getAttributeValue(ARMBuildAttrs::MVE_arch)) {
+    default:
+      break;
+    case ARMBuildAttrs::Not_Allowed:
+      Features.AddFeature("mve", false);
+      Features.AddFeature("mve.fp", false);
+      break;
+    case ARMBuildAttrs::AllowMVEInteger:
+      Features.AddFeature("mve.fp", false);
+      Features.AddFeature("mve");
+      break;
+    case ARMBuildAttrs::AllowMVEIntegerAndFloat:
+      Features.AddFeature("mve.fp");
+      break;
+    }
+  }
+
   if (Attributes.hasAttribute(ARMBuildAttrs::DIV_use)) {
     switch(Attributes.getAttributeValue(ARMBuildAttrs::DIV_use)) {
     default:
@@ -270,8 +296,7 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
     return;
 
   ARMAttributeParser Attributes;
-  std::error_code EC = getBuildAttributes(Attributes);
-  if (EC)
+  if (Error E = getBuildAttributes(Attributes))
     return;
 
   std::string Triple;
@@ -370,12 +395,13 @@ ELFObjectFileBase::getPltAddresses() const {
   }
   if (!Plt || !RelaPlt || !GotPlt)
     return {};
-  StringRef PltContents;
-  if (Plt->getContents(PltContents))
+  Expected<StringRef> PltContents = Plt->getContents();
+  if (!PltContents) {
+    consumeError(PltContents.takeError());
     return {};
-  ArrayRef<uint8_t> PltBytes((const uint8_t *)PltContents.data(),
-                             Plt->getSize());
-  auto PltEntries = MIA->findPltEntries(Plt->getAddress(), PltBytes,
+  }
+  auto PltEntries = MIA->findPltEntries(Plt->getAddress(),
+                                        arrayRefFromStringRef(*PltContents),
                                         GotPlt->getAddress(), Triple);
   // Build a map from GOT entry virtual address to PLT entry virtual address.
   DenseMap<uint64_t, uint64_t> GotToPlt;
diff --git a/lib/Object/Error.cpp b/lib/Object/Error.cpp
index 6fa23e06c409..010c5b42dac2 100644
--- a/lib/Object/Error.cpp
+++ b/lib/Object/Error.cpp
@@ -1,9 +1,8 @@
 //===- Error.cpp - system_error extensions for Object -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -79,18 +78,15 @@ const std::error_category &object::object_category() {
 }
 
 llvm::Error llvm::object::isNotObjectErrorInvalidFileType(llvm::Error Err) {
-  if (auto Err2 =
-          handleErrors(std::move(Err), [](std::unique_ptr<ECError> M) -> Error {
-            // Try to handle 'M'. If successful, return a success value from
-            // the handler.
-            if (M->convertToErrorCode() == object_error::invalid_file_type)
-              return Error::success();
+  return handleErrors(std::move(Err), [](std::unique_ptr<ECError> M) -> Error {
+    // Try to handle 'M'. If successful, return a success value from
+    // the handler.
+    if (M->convertToErrorCode() == object_error::invalid_file_type)
+      return Error::success();
 
-            // We failed to handle 'M' - return it from the handler.
-            // This value will be passed back from catchErrors and
-            // wind up in Err2, where it will be returned from this function.
-            return Error(std::move(M));
-          }))
-    return Err2;
-  return Err;
+    // We failed to handle 'M' - return it from the handler.
+    // This value will be passed back from catchErrors and
+    // wind up in Err2, where it will be returned from this function.
+    return Error(std::move(M));
+  });
 }
diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp
index 1ecb26d60bce..636f1521262f 100644
--- a/lib/Object/IRObjectFile.cpp
+++ b/lib/Object/IRObjectFile.cpp
@@ -1,9 +1,8 @@
 //===- IRObjectFile.cpp - IR object file implementation ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -43,10 +42,9 @@ void IRObjectFile::moveSymbolNext(DataRefImpl &Symb) const {
   Symb.p += sizeof(ModuleSymbolTable::Symbol);
 }
 
-std::error_code IRObjectFile::printSymbolName(raw_ostream &OS,
-                                              DataRefImpl Symb) const {
+Error IRObjectFile::printSymbolName(raw_ostream &OS, DataRefImpl Symb) const {
   SymTab.printSymbolName(OS, getSym(Symb));
-  return std::error_code();
+  return Error::success();
 }
 
 uint32_t IRObjectFile::getSymbolFlags(DataRefImpl Symb) const {
@@ -76,10 +74,12 @@ Expected<MemoryBufferRef>
 IRObjectFile::findBitcodeInObject(const ObjectFile &Obj) {
   for (const SectionRef &Sec : Obj.sections()) {
     if (Sec.isBitcode()) {
-      StringRef SecContents;
-      if (std::error_code EC = Sec.getContents(SecContents))
-        return errorCodeToError(EC);
-      return MemoryBufferRef(SecContents, Obj.getFileName());
+      Expected<StringRef> Contents = Sec.getContents();
+      if (!Contents)
+        return Contents.takeError();
+      if (Contents->size() <= 1)
+        return errorCodeToError(object_error::bitcode_section_not_found);
+      return MemoryBufferRef(*Contents, Obj.getFileName());
     }
   }
 
diff --git a/lib/Object/IRSymtab.cpp b/lib/Object/IRSymtab.cpp
index 344d565349c0..e4282b9d6bd3 100644
--- a/lib/Object/IRSymtab.cpp
+++ b/lib/Object/IRSymtab.cpp
@@ -1,9 +1,8 @@
 //===- IRSymtab.cpp - implementation of IR symbol tables ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -90,6 +89,8 @@ struct Builder {
   std::string COFFLinkerOpts;
   raw_string_ostream COFFLinkerOptsOS{COFFLinkerOpts};
 
+  std::vector<storage::Str> DependentLibraries;
+
   void setStr(storage::Str &S, StringRef Value) {
     S.Offset = StrtabBuilder.add(Value);
     S.Size = Value.size();
@@ -141,6 +142,20 @@ Error Builder::addModule(Module *M) {
     }
   }
 
+  if (TT.isOSBinFormatELF()) {
+    if (auto E = M->materializeMetadata())
+      return E;
+    if (NamedMDNode *N = M->getNamedMetadata("llvm.dependent-libraries")) {
+      for (MDNode *MDOptions : N->operands()) {
+        const auto OperandStr =
+            cast<MDString>(cast<MDNode>(MDOptions)->getOperand(0))->getString();
+        storage::Str Specifier;
+        setStr(Specifier, OperandStr);
+        DependentLibraries.emplace_back(Specifier);
+      }
+    }
+  }
+
   for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols())
     if (Error Err = addSymbol(Msymtab, Used, Msym))
       return Err;
@@ -313,7 +328,7 @@ Error Builder::build(ArrayRef<Module *> IRMods) {
   writeRange(Hdr.Comdats, Comdats);
   writeRange(Hdr.Symbols, Syms);
   writeRange(Hdr.Uncommons, Uncommons);
-
+  writeRange(Hdr.DependentLibraries, DependentLibraries);
   *reinterpret_cast<storage::Header *>(Symtab.data()) = Hdr;
   return Error::success();
 }
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index ce4d1cf92e20..5aec844003c0 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -1,9 +1,8 @@
 //===- MachOObjectFile.cpp - Mach-O object file binding -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -58,6 +57,12 @@ namespace {
 
 } // end anonymous namespace
 
+static const std::array<StringRef, 17> validArchs = {
+    "i386",   "x86_64", "x86_64h",  "armv4t",  "arm",    "armv5e",
+    "armv6",  "armv6m", "armv7",    "armv7em", "armv7k", "armv7m",
+    "armv7s", "arm64",  "arm64_32", "ppc",     "ppc64",
+};
+
 static Error malformedError(const Twine &Msg) {
   return make_error<GenericBinaryError>("truncated or malformed object (" +
                                             Msg + ")",
@@ -292,7 +297,10 @@ static Error parseSegmentLoadCommand(
     for (unsigned J = 0; J < S.nsects; ++J) {
       const char *Sec = getSectionPtr(Obj, Load, J);
       Sections.push_back(Sec);
-      Section s = getStruct<Section>(Obj, Sec);
+      auto SectionOrErr = getStructOrErr<Section>(Obj, Sec);
+      if (!SectionOrErr)
+        return SectionOrErr.takeError();
+      Section s = SectionOrErr.get();
       if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB &&
           Obj.getHeader().filetype != MachO::MH_DSYM &&
           s.flags != MachO::S_ZEROFILL &&
@@ -402,8 +410,10 @@ static Error checkSymtabCommand(const MachOObjectFile &Obj,
                           " LC_SYMTAB cmdsize too small");
   if (*SymtabLoadCmd != nullptr)
     return malformedError("more than one LC_SYMTAB command");
-  MachO::symtab_command Symtab =
-    getStruct<MachO::symtab_command>(Obj, Load.Ptr);
+  auto SymtabOrErr = getStructOrErr<MachO::symtab_command>(Obj, Load.Ptr);
+  if (!SymtabOrErr)
+    return SymtabOrErr.takeError();
+  MachO::symtab_command Symtab = SymtabOrErr.get();
   if (Symtab.cmdsize != sizeof(MachO::symtab_command))
     return malformedError("LC_SYMTAB command " + Twine(LoadCommandIndex) +
                           " has incorrect cmdsize");
@@ -458,8 +468,11 @@ static Error checkDysymtabCommand(const MachOObjectFile &Obj,
                           " LC_DYSYMTAB cmdsize too small");
   if (*DysymtabLoadCmd != nullptr)
     return malformedError("more than one LC_DYSYMTAB command");
-  MachO::dysymtab_command Dysymtab =
-    getStruct<MachO::dysymtab_command>(Obj, Load.Ptr);
+  auto DysymtabOrErr =
+    getStructOrErr<MachO::dysymtab_command>(Obj, Load.Ptr);
+  if (!DysymtabOrErr)
+    return DysymtabOrErr.takeError();
+  MachO::dysymtab_command Dysymtab = DysymtabOrErr.get();
   if (Dysymtab.cmdsize != sizeof(MachO::dysymtab_command))
     return malformedError("LC_DYSYMTAB command " + Twine(LoadCommandIndex) +
                           " has incorrect cmdsize");
@@ -589,8 +602,11 @@ static Error checkLinkeditDataCommand(const MachOObjectFile &Obj,
                           CmdName + " cmdsize too small");
   if (*LoadCmd != nullptr)
     return malformedError("more than one " + Twine(CmdName) + " command");
-  MachO::linkedit_data_command LinkData =
-    getStruct<MachO::linkedit_data_command>(Obj, Load.Ptr);
+  auto LinkDataOrError =
+    getStructOrErr<MachO::linkedit_data_command>(Obj, Load.Ptr);
+  if (!LinkDataOrError)
+    return LinkDataOrError.takeError();
+  MachO::linkedit_data_command LinkData = LinkDataOrError.get();
   if (LinkData.cmdsize != sizeof(MachO::linkedit_data_command))
     return malformedError(Twine(CmdName) + " command " +
                           Twine(LoadCommandIndex) + " has incorrect cmdsize");
@@ -624,8 +640,11 @@ static Error checkDyldInfoCommand(const MachOObjectFile &Obj,
   if (*LoadCmd != nullptr)
     return malformedError("more than one LC_DYLD_INFO and or LC_DYLD_INFO_ONLY "
                           "command");
-  MachO::dyld_info_command DyldInfo =
-    getStruct<MachO::dyld_info_command>(Obj, Load.Ptr);
+  auto DyldInfoOrErr =
+    getStructOrErr<MachO::dyld_info_command>(Obj, Load.Ptr);
+  if (!DyldInfoOrErr)
+    return DyldInfoOrErr.takeError();
+  MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
   if (DyldInfo.cmdsize != sizeof(MachO::dyld_info_command))
     return malformedError(Twine(CmdName) + " command " +
                           Twine(LoadCommandIndex) + " has incorrect cmdsize");
@@ -715,7 +734,10 @@ static Error checkDylibCommand(const MachOObjectFile &Obj,
   if (Load.C.cmdsize < sizeof(MachO::dylib_command))
     return malformedError("load command " + Twine(LoadCommandIndex) + " " +
                           CmdName + " cmdsize too small");
-  MachO::dylib_command D = getStruct<MachO::dylib_command>(Obj, Load.Ptr);
+  auto CommandOrErr = getStructOrErr<MachO::dylib_command>(Obj, Load.Ptr);
+  if (!CommandOrErr)
+    return CommandOrErr.takeError();
+  MachO::dylib_command D = CommandOrErr.get();
   if (D.dylib.name < sizeof(MachO::dylib_command))
     return malformedError("load command " + Twine(LoadCommandIndex) + " " +
                           CmdName + " name.offset field too small, not past "
@@ -761,7 +783,10 @@ static Error checkDyldCommand(const MachOObjectFile &Obj,
   if (Load.C.cmdsize < sizeof(MachO::dylinker_command))
     return malformedError("load command " + Twine(LoadCommandIndex) + " " +
                           CmdName + " cmdsize too small");
-  MachO::dylinker_command D = getStruct<MachO::dylinker_command>(Obj, Load.Ptr);
+  auto CommandOrErr = getStructOrErr<MachO::dylinker_command>(Obj, Load.Ptr);
+  if (!CommandOrErr)
+    return CommandOrErr.takeError();
+  MachO::dylinker_command D = CommandOrErr.get();
   if (D.name < sizeof(MachO::dylinker_command))
     return malformedError("load command " + Twine(LoadCommandIndex) + " " +
                           CmdName + " name.offset field too small, not past "
@@ -806,7 +831,10 @@ static Error checkNoteCommand(const MachOObjectFile &Obj,
   if (Load.C.cmdsize != sizeof(MachO::note_command))
     return malformedError("load command " + Twine(LoadCommandIndex) +
                           " LC_NOTE has incorrect cmdsize");
-  MachO::note_command Nt = getStruct<MachO::note_command>(Obj, Load.Ptr);
+  auto NoteCmdOrErr = getStructOrErr<MachO::note_command>(Obj, Load.Ptr);
+  if (!NoteCmdOrErr)
+    return NoteCmdOrErr.takeError();
+  MachO::note_command Nt = NoteCmdOrErr.get();
   uint64_t FileSize = Obj.getData().size();
   if (Nt.offset > FileSize)
     return malformedError("offset field of LC_NOTE command " +
@@ -829,8 +857,11 @@ parseBuildVersionCommand(const MachOObjectFile &Obj,
                          const MachOObjectFile::LoadCommandInfo &Load,
                          SmallVectorImpl<const char*> &BuildTools,
                          uint32_t LoadCommandIndex) {
-  MachO::build_version_command BVC =
-      getStruct<MachO::build_version_command>(Obj, Load.Ptr);
+  auto BVCOrErr =
+    getStructOrErr<MachO::build_version_command>(Obj, Load.Ptr);
+  if (!BVCOrErr)
+    return BVCOrErr.takeError();
+  MachO::build_version_command BVC = BVCOrErr.get();
   if (Load.C.cmdsize !=
       sizeof(MachO::build_version_command) +
           BVC.ntools * sizeof(MachO::build_tool_version))
@@ -851,7 +882,10 @@ static Error checkRpathCommand(const MachOObjectFile &Obj,
   if (Load.C.cmdsize < sizeof(MachO::rpath_command))
     return malformedError("load command " + Twine(LoadCommandIndex) +
                           " LC_RPATH cmdsize too small");
-  MachO::rpath_command R = getStruct<MachO::rpath_command>(Obj, Load.Ptr);
+  auto ROrErr = getStructOrErr<MachO::rpath_command>(Obj, Load.Ptr);
+  if (!ROrErr)
+    return ROrErr.takeError();
+  MachO::rpath_command R = ROrErr.get();
   if (R.path < sizeof(MachO::rpath_command))
     return malformedError("load command " + Twine(LoadCommandIndex) +
                           " LC_RPATH path.offset field too small, not past "
@@ -904,8 +938,11 @@ static Error checkLinkerOptCommand(const MachOObjectFile &Obj,
   if (Load.C.cmdsize < sizeof(MachO::linker_option_command))
     return malformedError("load command " + Twine(LoadCommandIndex) +
                           " LC_LINKER_OPTION cmdsize too small");
-  MachO::linker_option_command L =
-    getStruct<MachO::linker_option_command>(Obj, Load.Ptr);
+  auto LinkOptionOrErr =
+    getStructOrErr<MachO::linker_option_command>(Obj, Load.Ptr);
+  if (!LinkOptionOrErr)
+    return LinkOptionOrErr.takeError();
+  MachO::linker_option_command L = LinkOptionOrErr.get();
   // Make sure the count of strings is correct.
   const char *string = (const char *)Load.Ptr +
                        sizeof(struct MachO::linker_option_command);
@@ -919,6 +956,10 @@ static Error checkLinkerOptCommand(const MachOObjectFile &Obj,
     if (left > 0) {
       i++;
       uint32_t NullPos = StringRef(string, left).find('\0');
+      if (0xffffffff == NullPos)
+        return malformedError("load command " + Twine(LoadCommandIndex) +
+                              " LC_LINKER_OPTION string #" + Twine(i) +
+                              " is not NULL terminated");
       uint32_t len = std::min(NullPos, left) + 1;
       string += len;
       left -= len;
@@ -965,8 +1006,11 @@ static Error checkThreadCommand(const MachOObjectFile &Obj,
   if (Load.C.cmdsize < sizeof(MachO::thread_command))
     return malformedError("load command " + Twine(LoadCommandIndex) +
                           CmdName + " cmdsize too small");
-  MachO::thread_command T =
-    getStruct<MachO::thread_command>(Obj, Load.Ptr);
+  auto ThreadCommandOrErr =
+    getStructOrErr<MachO::thread_command>(Obj, Load.Ptr);
+  if (!ThreadCommandOrErr)
+    return ThreadCommandOrErr.takeError();
+  MachO::thread_command T = ThreadCommandOrErr.get();
   const char *state = Load.Ptr + sizeof(MachO::thread_command);
   const char *end = Load.Ptr + T.cmdsize;
   uint32_t nflavor = 0;
@@ -1097,7 +1141,8 @@ static Error checkThreadCommand(const MachOObjectFile &Obj,
                               "flavor number " + Twine(nflavor) + " in " +
                               CmdName + " command");
       }
-    } else if (cputype == MachO::CPU_TYPE_ARM64) {
+    } else if (cputype == MachO::CPU_TYPE_ARM64 ||
+               cputype == MachO::CPU_TYPE_ARM64_32) {
       if (flavor == MachO::ARM_THREAD_STATE64) {
         if (count != MachO::ARM_THREAD_STATE64_COUNT)
           return malformedError("load command " + Twine(LoadCommandIndex) +
@@ -1156,8 +1201,10 @@ static Error checkTwoLevelHintsCommand(const MachOObjectFile &Obj,
                           " LC_TWOLEVEL_HINTS has incorrect cmdsize");
   if (*LoadCmd != nullptr)
     return malformedError("more than one LC_TWOLEVEL_HINTS command");
-  MachO::twolevel_hints_command Hints =
-    getStruct<MachO::twolevel_hints_command>(Obj, Load.Ptr);
+  auto HintsOrErr = getStructOrErr<MachO::twolevel_hints_command>(Obj, Load.Ptr);
+  if(!HintsOrErr)
+    return HintsOrErr.takeError();
+  MachO::twolevel_hints_command Hints = HintsOrErr.get();
   uint64_t FileSize = Obj.getData().size();
   if (Hints.offset > FileSize)
     return malformedError("offset field of LC_TWOLEVEL_HINTS command " +
@@ -1658,36 +1705,35 @@ Error MachOObjectFile::checkSymbolTable() const {
     } else {
       MachO::nlist STE = getSymbolTableEntry(SymDRI);
       NType = STE.n_type;
-      NType = STE.n_type;
       NSect = STE.n_sect;
       NDesc = STE.n_desc;
       NStrx = STE.n_strx;
       NValue = STE.n_value;
     }
-    if ((NType & MachO::N_STAB) == 0 &&
-        (NType & MachO::N_TYPE) == MachO::N_SECT) {
-      if (NSect == 0 || NSect > Sections.size())
-        return malformedError("bad section index: " + Twine((int)NSect) +
-                              " for symbol at index " + Twine(SymbolIndex));
-    }
-    if ((NType & MachO::N_STAB) == 0 &&
-        (NType & MachO::N_TYPE) == MachO::N_INDR) {
-      if (NValue >= S.strsize)
-        return malformedError("bad n_value: " + Twine((int)NValue) + " past "
-                              "the end of string table, for N_INDR symbol at "
-                              "index " + Twine(SymbolIndex));
-    }
-    if ((Flags & MachO::MH_TWOLEVEL) == MachO::MH_TWOLEVEL &&
-        (((NType & MachO::N_TYPE) == MachO::N_UNDF && NValue == 0) ||
-         (NType & MachO::N_TYPE) == MachO::N_PBUD)) {
-      uint32_t LibraryOrdinal = MachO::GET_LIBRARY_ORDINAL(NDesc);
-      if (LibraryOrdinal != 0 &&
-          LibraryOrdinal != MachO::EXECUTABLE_ORDINAL &&
-          LibraryOrdinal != MachO::DYNAMIC_LOOKUP_ORDINAL &&
-          LibraryOrdinal - 1 >= Libraries.size() ) {
-        return malformedError("bad library ordinal: " + Twine(LibraryOrdinal) +
-                            " for symbol at index " + Twine(SymbolIndex));
+    if ((NType & MachO::N_STAB) == 0) {
+      if ((NType & MachO::N_TYPE) == MachO::N_SECT) {
+        if (NSect == 0 || NSect > Sections.size())
+          return malformedError("bad section index: " + Twine((int)NSect) +
+                                " for symbol at index " + Twine(SymbolIndex));
+      }
+      if ((NType & MachO::N_TYPE) == MachO::N_INDR) {
+        if (NValue >= S.strsize)
+          return malformedError("bad n_value: " + Twine((int)NValue) + " past "
+                                "the end of string table, for N_INDR symbol at "
+                                "index " + Twine(SymbolIndex));
       }
+      if ((Flags & MachO::MH_TWOLEVEL) == MachO::MH_TWOLEVEL &&
+          (((NType & MachO::N_TYPE) == MachO::N_UNDF && NValue == 0) ||
+           (NType & MachO::N_TYPE) == MachO::N_PBUD)) {
+            uint32_t LibraryOrdinal = MachO::GET_LIBRARY_ORDINAL(NDesc);
+            if (LibraryOrdinal != 0 &&
+                LibraryOrdinal != MachO::EXECUTABLE_ORDINAL &&
+                LibraryOrdinal != MachO::DYNAMIC_LOOKUP_ORDINAL &&
+                LibraryOrdinal - 1 >= Libraries.size() ) {
+              return malformedError("bad library ordinal: " + Twine(LibraryOrdinal) +
+                                    " for symbol at index " + Twine(SymbolIndex));
+            }
+          }
     }
     if (NStrx >= S.strsize)
       return malformedError("bad string table index: " + Twine((int)NStrx) +
@@ -1861,11 +1907,9 @@ void MachOObjectFile::moveSectionNext(DataRefImpl &Sec) const {
   Sec.d.a++;
 }
 
-std::error_code MachOObjectFile::getSectionName(DataRefImpl Sec,
-                                                StringRef &Result) const {
+Expected<StringRef> MachOObjectFile::getSectionName(DataRefImpl Sec) const {
   ArrayRef<char> Raw = getSectionRawName(Sec);
-  Result = parseSegmentOrSectionName(Raw.data());
-  return std::error_code();
+  return parseSegmentOrSectionName(Raw.data());
 }
 
 uint64_t MachOObjectFile::getSectionAddress(DataRefImpl Sec) const {
@@ -1907,8 +1951,8 @@ uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const {
   return SectSize;
 }
 
-std::error_code MachOObjectFile::getSectionContents(DataRefImpl Sec,
-                                                    StringRef &Res) const {
+Expected<ArrayRef<uint8_t>>
+MachOObjectFile::getSectionContents(DataRefImpl Sec) const {
   uint32_t Offset;
   uint64_t Size;
 
@@ -1922,8 +1966,7 @@ std::error_code MachOObjectFile::getSectionContents(DataRefImpl Sec,
     Size = Sect.size;
   }
 
-  Res = this->getData().substr(Offset, Size);
-  return std::error_code();
+  return arrayRefFromStringRef(getData().substr(Offset, Size));
 }
 
 uint64_t MachOObjectFile::getSectionAlignment(DataRefImpl Sec) const {
@@ -1998,9 +2041,8 @@ bool MachOObjectFile::isSectionVirtual(DataRefImpl Sec) const {
 
 bool MachOObjectFile::isSectionBitcode(DataRefImpl Sec) const {
   StringRef SegmentName = getSectionFinalSegmentName(Sec);
-  StringRef SectName;
-  if (!getSectionName(Sec, SectName))
-    return (SegmentName == "__LLVM" && SectName == "__bitcode");
+  if (Expected<StringRef> NameOrErr = getSectionName(Sec))
+    return (SegmentName == "__LLVM" && *NameOrErr == "__bitcode");
   return false;
 }
 
@@ -2172,7 +2214,8 @@ void MachOObjectFile::getRelocationTypeName(
         res = Table[RType];
       break;
     }
-    case Triple::aarch64: {
+    case Triple::aarch64:
+    case Triple::aarch64_32: {
       static const char *const Table[] = {
         "ARM64_RELOC_UNSIGNED",           "ARM64_RELOC_SUBTRACTOR",
         "ARM64_RELOC_BRANCH26",           "ARM64_RELOC_PAGE21",
@@ -2242,9 +2285,18 @@ uint8_t MachOObjectFile::getRelocationLength(DataRefImpl Rel) const {
 // one of the two following forms:
 //      libFoo.A.dylib
 //      libFoo.dylib
+//
 // The library may have a suffix trailing the name Foo of the form:
 //      libFoo_profile.A.dylib
 //      libFoo_profile.dylib
+// These dyld image suffixes are separated from the short name by a '_'
+// character. Because the '_' character is commonly used to separate words in
+// filenames guessLibraryShortName() cannot reliably separate a dylib's short
+// name from an arbitrary image suffix; imagine if both the short name and the
+// suffix contains an '_' character! To better deal with this ambiguity,
+// guessLibraryShortName() will recognize only "_debug" and "_profile" as valid
+// Suffix values. Calling code needs to be tolerant of guessLibraryShortName()
+// guessing incorrectly.
 //
 // The Name of the dynamic library is also recognized as a library name if it
 // has the following form:
@@ -2252,7 +2304,6 @@ uint8_t MachOObjectFile::getRelocationLength(DataRefImpl Rel) const {
 //
 // If the Name of the dynamic library is none of the forms above then a NULL
 // StringRef is returned.
-//
 StringRef MachOObjectFile::guessLibraryShortName(StringRef Name,
                                                  bool &isFramework,
                                                  StringRef &Suffix) {
@@ -2272,7 +2323,10 @@ StringRef MachOObjectFile::guessLibraryShortName(StringRef Name,
   Idx = Foo.rfind('_');
   if (Idx != Foo.npos && Foo.size() >= 2) {
     Suffix = Foo.slice(Idx, Foo.npos);
-    Foo = Foo.slice(0, Idx);
+    if (Suffix != "_debug" && Suffix != "_profile")
+      Suffix = StringRef();
+    else
+      Foo = Foo.slice(0, Idx);
   }
 
   // First look for the form Foo.framework/Foo
@@ -2333,10 +2387,14 @@ guess_library:
   else
     b = b+1;
   // ignore any suffix after an underbar like Foo_profile.A.dylib
-  Idx = Name.find('_', b);
+  Idx = Name.rfind('_');
   if (Idx != Name.npos && Idx != b) {
     Lib = Name.slice(b, Idx);
     Suffix = Name.slice(Idx, a);
+    if (Suffix != "_debug" && Suffix != "_profile") {
+      Suffix = StringRef();
+      Lib = Name.slice(b, a);
+    }
   }
   else
     Lib = Name.slice(b, a);
@@ -2381,8 +2439,11 @@ std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index,
   // all the Libraries.
   if (LibrariesShortNames.size() == 0) {
     for (unsigned i = 0; i < Libraries.size(); i++) {
-      MachO::dylib_command D =
-        getStruct<MachO::dylib_command>(*this, Libraries[i]);
+      auto CommandOrErr =
+        getStructOrErr<MachO::dylib_command>(*this, Libraries[i]);
+      if (!CommandOrErr)
+        return object_error::parse_failed;
+      MachO::dylib_command D = CommandOrErr.get();
       if (D.dylib.name >= D.cmdsize)
         return object_error::parse_failed;
       const char *P = (const char *)(Libraries[i]) + D.dylib.name;
@@ -2485,6 +2546,8 @@ StringRef MachOObjectFile::getFileFormatName() const {
       return "Mach-O 32-bit i386";
     case MachO::CPU_TYPE_ARM:
       return "Mach-O arm";
+    case MachO::CPU_TYPE_ARM64_32:
+      return "Mach-O arm64 (ILP32)";
     case MachO::CPU_TYPE_POWERPC:
       return "Mach-O 32-bit ppc";
     default:
@@ -2514,6 +2577,8 @@ Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) {
     return Triple::arm;
   case MachO::CPU_TYPE_ARM64:
     return Triple::aarch64;
+  case MachO::CPU_TYPE_ARM64_32:
+    return Triple::aarch64_32;
   case MachO::CPU_TYPE_POWERPC:
     return Triple::ppc;
   case MachO::CPU_TYPE_POWERPC64:
@@ -2620,6 +2685,17 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType,
     default:
       return Triple();
     }
+  case MachO::CPU_TYPE_ARM64_32:
+    switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
+    case MachO::CPU_SUBTYPE_ARM64_32_V8:
+      if (McpuDefault)
+        *McpuDefault = "cyclone";
+      if (ArchFlag)
+        *ArchFlag = "arm64_32";
+      return Triple("arm64_32-apple-darwin");
+    default:
+      return Triple();
+    }
   case MachO::CPU_TYPE_POWERPC:
     switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
     case MachO::CPU_SUBTYPE_POWERPC_ALL:
@@ -2648,26 +2724,12 @@ Triple MachOObjectFile::getHostArch() {
 }
 
 bool MachOObjectFile::isValidArch(StringRef ArchFlag) {
-  return StringSwitch<bool>(ArchFlag)
-      .Case("i386", true)
-      .Case("x86_64", true)
-      .Case("x86_64h", true)
-      .Case("armv4t", true)
-      .Case("arm", true)
-      .Case("armv5e", true)
-      .Case("armv6", true)
-      .Case("armv6m", true)
-      .Case("armv7", true)
-      .Case("armv7em", true)
-      .Case("armv7k", true)
-      .Case("armv7m", true)
-      .Case("armv7s", true)
-      .Case("arm64", true)
-      .Case("ppc", true)
-      .Case("ppc64", true)
-      .Default(false);
+  return std::find(validArchs.cbegin(), validArchs.cend(), ArchFlag) !=
+         validArchs.cend();
 }
 
+ArrayRef<StringRef> MachOObjectFile::getValidArchs() { return validArchs; }
+
 Triple::ArchType MachOObjectFile::getArch() const {
   return getArch(getCPUType(*this));
 }
@@ -3102,8 +3164,8 @@ void MachORebaseEntry::moveNext() {
         moveToEnd();
         return;
       }
-      error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
-                                              true);
+      error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+                                               PointerSize);
       if (error) {
         *E = malformedError("for REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " +
                             Twine(error) + " for opcode at: 0x" +
@@ -3127,8 +3189,8 @@ void MachORebaseEntry::moveNext() {
         moveToEnd();
         return;
       }
-      error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
-                                              true);
+      error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+                                               PointerSize);
       if (error) {
         *E = malformedError("for REBASE_OPCODE_ADD_ADDR_ULEB " + Twine(error) +
                             " for opcode at: 0x" +
@@ -3142,8 +3204,8 @@ void MachORebaseEntry::moveNext() {
                                        SegmentOffset) << "\n");
       break;
     case MachO::REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
-      error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
-                                              true);
+      error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+                                               PointerSize);
       if (error) {
         *E = malformedError("for REBASE_OPCODE_ADD_ADDR_IMM_SCALED " +
                             Twine(error) + " for opcode at: 0x" +
@@ -3152,8 +3214,8 @@ void MachORebaseEntry::moveNext() {
         return;
       }
       SegmentOffset += ImmValue * PointerSize;
-      error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
-                                              false);
+      error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+                                               PointerSize);
       if (error) {
         *E =
             malformedError("for REBASE_OPCODE_ADD_ADDR_IMM_SCALED "
@@ -3169,15 +3231,6 @@ void MachORebaseEntry::moveNext() {
                                        SegmentOffset) << "\n");
       break;
     case MachO::REBASE_OPCODE_DO_REBASE_IMM_TIMES:
-      error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
-                                              true);
-      if (error) {
-        *E = malformedError("for REBASE_OPCODE_DO_REBASE_IMM_TIMES " +
-                            Twine(error) + " for opcode at: 0x" +
-                            Twine::utohexstr(OpcodeStart - Opcodes.begin()));
-        moveToEnd();
-        return;
-      }
       AdvanceAmount = PointerSize;
       Skip = 0;
       Count = ImmValue;
@@ -3185,8 +3238,8 @@ void MachORebaseEntry::moveNext() {
         RemainingLoopCount = ImmValue - 1;
       else
         RemainingLoopCount = 0;
-      error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize,
-                                              SegmentIndex, SegmentOffset);
+      error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+                                               PointerSize, Count, Skip);
       if (error) {
         *E = malformedError("for REBASE_OPCODE_DO_REBASE_IMM_TIMES " +
                             Twine(error) + " for opcode at: 0x" +
@@ -3203,15 +3256,6 @@ void MachORebaseEntry::moveNext() {
                  << "\n");
       return;
     case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
-      error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
-                                              true);
-      if (error) {
-        *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " +
-                            Twine(error) + " for opcode at: 0x" +
-                            Twine::utohexstr(OpcodeStart - Opcodes.begin()));
-        moveToEnd();
-        return;
-      }
       AdvanceAmount = PointerSize;
       Skip = 0;
       Count = readULEB128(&error);
@@ -3226,8 +3270,8 @@ void MachORebaseEntry::moveNext() {
         RemainingLoopCount = Count - 1;
       else
         RemainingLoopCount = 0;
-      error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize,
-                                              SegmentIndex, SegmentOffset);
+      error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+                                               PointerSize, Count, Skip);
       if (error) {
         *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " +
                             Twine(error) + " for opcode at: 0x" +
@@ -3244,15 +3288,6 @@ void MachORebaseEntry::moveNext() {
                  << "\n");
       return;
     case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
-      error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
-                                              true);
-      if (error) {
-        *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " +
-                            Twine(error) + " for opcode at: 0x" +
-                            Twine::utohexstr(OpcodeStart - Opcodes.begin()));
-        moveToEnd();
-        return;
-      }
       Skip = readULEB128(&error);
       if (error) {
         *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " +
@@ -3264,8 +3299,8 @@ void MachORebaseEntry::moveNext() {
       AdvanceAmount = Skip + PointerSize;
       Count = 1;
       RemainingLoopCount = 0;
-      error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize,
-                                              SegmentIndex, SegmentOffset);
+      error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+                                               PointerSize, Count, Skip);
       if (error) {
         *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " +
                             Twine(error) + " for opcode at: 0x" +
@@ -3282,16 +3317,6 @@ void MachORebaseEntry::moveNext() {
                  << "\n");
       return;
     case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
-      error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
-                                              true);
-      if (error) {
-        *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_"
-                            "ULEB " +
-                            Twine(error) + " for opcode at: 0x" +
-                            Twine::utohexstr(OpcodeStart - Opcodes.begin()));
-        moveToEnd();
-        return;
-      }
       Count = readULEB128(&error);
       if (error) {
         *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_"
@@ -3316,8 +3341,8 @@ void MachORebaseEntry::moveNext() {
       }
       AdvanceAmount = Skip + PointerSize;
 
-      error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize,
-                                              SegmentIndex, SegmentOffset);
+      error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+                                               PointerSize, Count, Skip);
       if (error) {
         *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_"
                             "ULEB " +
@@ -3624,7 +3649,8 @@ void MachOBindEntry::moveNext() {
         moveToEnd();
         return;
       }
-      error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true);
+      error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+                                             PointerSize);
       if (error) {
         *E = malformedError("for BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " +
                             Twine(error) + " for opcode at: 0x" +
@@ -3648,7 +3674,8 @@ void MachOBindEntry::moveNext() {
         moveToEnd();
         return;
       }
-      error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true);
+      error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+                                             PointerSize);
       if (error) {
         *E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB " + Twine(error) +
                             " for opcode at: 0x" +
@@ -3664,7 +3691,8 @@ void MachOBindEntry::moveNext() {
     case MachO::BIND_OPCODE_DO_BIND:
       AdvanceAmount = PointerSize;
       RemainingLoopCount = 0;
-      error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true);
+      error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+                                             PointerSize);
       if (error) {
         *E = malformedError("for BIND_OPCODE_DO_BIND " + Twine(error) +
                             " for opcode at: 0x" +
@@ -3701,7 +3729,8 @@ void MachOBindEntry::moveNext() {
         moveToEnd();
         return;
       }
-      error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true);
+      error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+                                             PointerSize);
       if (error) {
         *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB " +
                             Twine(error) + " for opcode at: 0x" +
@@ -3737,8 +3766,8 @@ void MachOBindEntry::moveNext() {
       // Note, this is not really an error until the next bind but make no sense
       // for a BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB to not be followed by another
       // bind operation.
-      error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset +
-                                            AdvanceAmount, false);
+      error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset +
+                                            AdvanceAmount, PointerSize);
       if (error) {
         *E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB (after adding "
                             "ULEB) " +
@@ -3764,7 +3793,8 @@ void MachOBindEntry::moveNext() {
         moveToEnd();
         return;
       }
-      error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true);
+      error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+                                             PointerSize);
       if (error) {
         *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " +
                             Twine(error) + " for opcode at: 0x" +
@@ -3792,8 +3822,8 @@ void MachOBindEntry::moveNext() {
       }
       AdvanceAmount = ImmValue * PointerSize + PointerSize;
       RemainingLoopCount = 0;
-      error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset +
-                                            AdvanceAmount, false);
+      error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset +
+                                             AdvanceAmount, PointerSize);
       if (error) {
         *E =
             malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED "
@@ -3839,15 +3869,6 @@ void MachOBindEntry::moveNext() {
         moveToEnd();
         return;
       }
-      error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true);
-      if (error) {
-        *E =
-            malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " +
-                           Twine(error) + " for opcode at: 0x" +
-                           Twine::utohexstr(OpcodeStart - Opcodes.begin()));
-        moveToEnd();
-        return;
-      }
       if (SymbolName == StringRef()) {
         *E = malformedError(
             "for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB "
@@ -3866,8 +3887,8 @@ void MachOBindEntry::moveNext() {
         moveToEnd();
         return;
       }
-      error = O->BindEntryCheckCountAndSkip(Count, Skip, PointerSize,
-                                            SegmentIndex, SegmentOffset);
+      error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+                                             PointerSize, Count, Skip);
       if (error) {
         *E =
             malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " +
@@ -3990,53 +4011,40 @@ BindRebaseSegInfo::BindRebaseSegInfo(const object::MachOObjectFile *Obj) {
   MaxSegIndex = CurSegIndex;
 }
 
-// For use with a SegIndex,SegOffset pair in MachOBindEntry::moveNext() to
-// validate a MachOBindEntry or MachORebaseEntry.
-const char * BindRebaseSegInfo::checkSegAndOffset(int32_t SegIndex,
-                                                  uint64_t SegOffset,
-                                                  bool endInvalid) {
+// For use with a SegIndex, SegOffset, and PointerSize triple in
+// MachOBindEntry::moveNext() to validate a MachOBindEntry or MachORebaseEntry.
+//
+// Given a SegIndex, SegOffset, and PointerSize, verify a valid section exists
+// that fully contains a pointer at that location. Multiple fixups in a bind
+// (such as with the BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB opcode) can
+// be tested via the Count and Skip parameters.
+const char * BindRebaseSegInfo::checkSegAndOffsets(int32_t SegIndex,
+                                                   uint64_t SegOffset,
+                                                   uint8_t PointerSize,
+                                                   uint32_t Count,
+                                                   uint32_t Skip) {
   if (SegIndex == -1)
     return "missing preceding *_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB";
   if (SegIndex >= MaxSegIndex)
     return "bad segIndex (too large)";
-  for (const SectionInfo &SI : Sections) {
-    if (SI.SegmentIndex != SegIndex)
-      continue;
-    if (SI.OffsetInSegment > SegOffset)
-      continue;
-    if (SegOffset > (SI.OffsetInSegment + SI.Size))
-      continue;
-    if (endInvalid && SegOffset >= (SI.OffsetInSegment + SI.Size))
-      continue;
-    return nullptr;
-  }
-  return "bad segOffset, too large";
-}
-
-// For use in MachOBindEntry::moveNext() to validate a MachOBindEntry for
-// the BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB opcode and for use in
-// MachORebaseEntry::moveNext() to validate a MachORebaseEntry for
-// REBASE_OPCODE_DO_*_TIMES* opcodes.  The SegIndex and SegOffset must have
-// been already checked.
-const char * BindRebaseSegInfo::checkCountAndSkip(uint32_t Count, uint32_t Skip,
-                                                  uint8_t PointerSize,
-                                                  int32_t SegIndex,
-                                                  uint64_t SegOffset) {
-  const SectionInfo &SI = findSection(SegIndex, SegOffset);
-  uint64_t addr = SI.SegmentStartAddress + SegOffset;
-  if (addr >= SI.Address + SI.Size)
-    return "bad segOffset, too large";
-  uint64_t i = 0;
-  if (Count > 1)
-    i = (Skip + PointerSize) * (Count - 1);
-  else if (Count == 1)
-    i = Skip + PointerSize;
-  if (addr + i >= SI.Address + SI.Size) {
-    // For rebase opcodes they can step from one section to another.
-    uint64_t TrailingSegOffset = (addr + i) - SI.SegmentStartAddress;
-    const char *error = checkSegAndOffset(SegIndex, TrailingSegOffset, false);
-    if (error)
-      return "bad count and skip, too large";
+  for (uint32_t i = 0; i < Count; ++i) {
+    uint32_t Start = SegOffset + i * (PointerSize + Skip);
+    uint32_t End = Start + PointerSize;
+    bool Found = false;
+    for (const SectionInfo &SI : Sections) {
+      if (SI.SegmentIndex != SegIndex)
+        continue;
+      if ((SI.OffsetInSegment<=Start) && (Start<(SI.OffsetInSegment+SI.Size))) {
+        if (End <= SI.OffsetInSegment + SI.Size) {
+          Found = true;
+          break;
+        }
+        else
+          return "bad offset, extends beyond section boundary";
+      }
+    }
+    if (!Found)
+      return "bad offset, not in section";
   }
   return nullptr;
 }
@@ -4514,8 +4522,11 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoRebaseOpcodes() const {
   if (!DyldInfoLoadCmd)
     return None;
 
-  MachO::dyld_info_command DyldInfo =
-      getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+  auto DyldInfoOrErr =
+    getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+  if (!DyldInfoOrErr)
+    return None;
+  MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
   const uint8_t *Ptr =
       reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.rebase_off));
   return makeArrayRef(Ptr, DyldInfo.rebase_size);
@@ -4525,8 +4536,11 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoBindOpcodes() const {
   if (!DyldInfoLoadCmd)
     return None;
 
-  MachO::dyld_info_command DyldInfo =
-      getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+  auto DyldInfoOrErr =
+    getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+  if (!DyldInfoOrErr)
+    return None;
+  MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
   const uint8_t *Ptr =
       reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.bind_off));
   return makeArrayRef(Ptr, DyldInfo.bind_size);
@@ -4536,8 +4550,11 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoWeakBindOpcodes() const {
   if (!DyldInfoLoadCmd)
     return None;
 
-  MachO::dyld_info_command DyldInfo =
-      getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+  auto DyldInfoOrErr =
+    getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+  if (!DyldInfoOrErr)
+    return None;
+  MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
   const uint8_t *Ptr =
       reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.weak_bind_off));
   return makeArrayRef(Ptr, DyldInfo.weak_bind_size);
@@ -4547,8 +4564,11 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoLazyBindOpcodes() const {
   if (!DyldInfoLoadCmd)
     return None;
 
-  MachO::dyld_info_command DyldInfo =
-      getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+  auto DyldInfoOrErr =
+    getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+  if (!DyldInfoOrErr)
+    return None;
+  MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
   const uint8_t *Ptr =
       reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.lazy_bind_off));
   return makeArrayRef(Ptr, DyldInfo.lazy_bind_size);
@@ -4558,8 +4578,11 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoExportsTrie() const {
   if (!DyldInfoLoadCmd)
     return None;
 
-  MachO::dyld_info_command DyldInfo =
-      getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+  auto DyldInfoOrErr =
+    getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+  if (!DyldInfoOrErr)
+    return None;
+  MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
   const uint8_t *Ptr =
       reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.export_off));
   return makeArrayRef(Ptr, DyldInfo.export_size);
diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp
index 309708e9b37c..b3f0993412c6 100644
--- a/lib/Object/MachOUniversal.cpp
+++ b/lib/Object/MachOUniversal.cpp
@@ -1,9 +1,8 @@
 //===- MachOUniversal.cpp - Mach-O universal binary -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Object/Minidump.cpp b/lib/Object/Minidump.cpp
new file mode 100644
index 000000000000..7b5b21558699
--- /dev/null
+++ b/lib/Object/Minidump.cpp
@@ -0,0 +1,137 @@
+//===- Minidump.cpp - Minidump object file implementation -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/Minidump.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Support/ConvertUTF.h"
+
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::minidump;
+
+Optional<ArrayRef<uint8_t>>
+MinidumpFile::getRawStream(minidump::StreamType Type) const {
+  auto It = StreamMap.find(Type);
+  if (It != StreamMap.end())
+    return getRawStream(Streams[It->second]);
+  return None;
+}
+
+Expected<std::string> MinidumpFile::getString(size_t Offset) const {
+  // Minidump strings consist of a 32-bit length field, which gives the size of
+  // the string in *bytes*. This is followed by the actual string encoded in
+  // UTF16.
+  auto ExpectedSize =
+      getDataSliceAs<support::ulittle32_t>(getData(), Offset, 1);
+  if (!ExpectedSize)
+    return ExpectedSize.takeError();
+  size_t Size = (*ExpectedSize)[0];
+  if (Size % 2 != 0)
+    return createError("String size not even");
+  Size /= 2;
+  if (Size == 0)
+    return "";
+
+  Offset += sizeof(support::ulittle32_t);
+  auto ExpectedData =
+      getDataSliceAs<support::ulittle16_t>(getData(), Offset, Size);
+  if (!ExpectedData)
+    return ExpectedData.takeError();
+
+  SmallVector<UTF16, 32> WStr(Size);
+  copy(*ExpectedData, WStr.begin());
+
+  std::string Result;
+  if (!convertUTF16ToUTF8String(WStr, Result))
+    return createError("String decoding failed");
+
+  return Result;
+}
+
+template <typename T>
+Expected<ArrayRef<T>> MinidumpFile::getListStream(StreamType Stream) const {
+  auto OptionalStream = getRawStream(Stream);
+  if (!OptionalStream)
+    return createError("No such stream");
+  auto ExpectedSize =
+      getDataSliceAs<support::ulittle32_t>(*OptionalStream, 0, 1);
+  if (!ExpectedSize)
+    return ExpectedSize.takeError();
+
+  size_t ListSize = ExpectedSize.get()[0];
+
+  size_t ListOffset = 4;
+  // Some producers insert additional padding bytes to align the list to an
+  // 8-byte boundary. Check for that by comparing the list size with the overall
+  // stream size.
+  if (ListOffset + sizeof(T) * ListSize < OptionalStream->size())
+    ListOffset = 8;
+
+  return getDataSliceAs<T>(*OptionalStream, ListOffset, ListSize);
+}
+template Expected<ArrayRef<Module>>
+    MinidumpFile::getListStream(StreamType) const;
+template Expected<ArrayRef<Thread>>
+    MinidumpFile::getListStream(StreamType) const;
+template Expected<ArrayRef<MemoryDescriptor>>
+    MinidumpFile::getListStream(StreamType) const;
+
+Expected<ArrayRef<uint8_t>>
+MinidumpFile::getDataSlice(ArrayRef<uint8_t> Data, size_t Offset, size_t Size) {
+  // Check for overflow.
+  if (Offset + Size < Offset || Offset + Size < Size ||
+      Offset + Size > Data.size())
+    return createEOFError();
+  return Data.slice(Offset, Size);
+}
+
+Expected<std::unique_ptr<MinidumpFile>>
+MinidumpFile::create(MemoryBufferRef Source) {
+  ArrayRef<uint8_t> Data = arrayRefFromStringRef(Source.getBuffer());
+  auto ExpectedHeader = getDataSliceAs<minidump::Header>(Data, 0, 1);
+  if (!ExpectedHeader)
+    return ExpectedHeader.takeError();
+
+  const minidump::Header &Hdr = (*ExpectedHeader)[0];
+  if (Hdr.Signature != Header::MagicSignature)
+    return createError("Invalid signature");
+  if ((Hdr.Version & 0xffff) != Header::MagicVersion)
+    return createError("Invalid version");
+
+  auto ExpectedStreams = getDataSliceAs<Directory>(Data, Hdr.StreamDirectoryRVA,
+                                                   Hdr.NumberOfStreams);
+  if (!ExpectedStreams)
+    return ExpectedStreams.takeError();
+
+  DenseMap<StreamType, std::size_t> StreamMap;
+  for (const auto &Stream : llvm::enumerate(*ExpectedStreams)) {
+    StreamType Type = Stream.value().Type;
+    const LocationDescriptor &Loc = Stream.value().Location;
+
+    auto ExpectedStream = getDataSlice(Data, Loc.RVA, Loc.DataSize);
+    if (!ExpectedStream)
+      return ExpectedStream.takeError();
+
+    if (Type == StreamType::Unused && Loc.DataSize == 0) {
+      // Ignore dummy streams. This is technically ill-formed, but a number of
+      // existing minidumps seem to contain such streams.
+      continue;
+    }
+
+    if (Type == DenseMapInfo<StreamType>::getEmptyKey() ||
+        Type == DenseMapInfo<StreamType>::getTombstoneKey())
+      return createError("Cannot handle one of the minidump streams");
+
+    // Update the directory map, checking for duplicate stream types.
+    if (!StreamMap.try_emplace(Type, Stream.index()).second)
+      return createError("Duplicate stream type");
+  }
+
+  return std::unique_ptr<MinidumpFile>(
+      new MinidumpFile(Source, Hdr, *ExpectedStreams, std::move(StreamMap)));
+}
diff --git a/lib/Object/ModuleSymbolTable.cpp b/lib/Object/ModuleSymbolTable.cpp
index 33ce7d8109fb..d1e0ce5edae1 100644
--- a/lib/Object/ModuleSymbolTable.cpp
+++ b/lib/Object/ModuleSymbolTable.cpp
@@ -1,9 +1,8 @@
 //===- ModuleSymbolTable.cpp - symbol table for in-memory IR --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
index f5de2e1d5ce2..d84798cc6dd0 100644
--- a/lib/Object/Object.cpp
+++ b/lib/Object/Object.cpp
@@ -1,9 +1,8 @@
 //===- Object.cpp - C bindings to the object file library--------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,7 +13,9 @@
 
 #include "llvm-c/Object.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/MachOUniversal.h"
 
 using namespace llvm;
 using namespace object;
@@ -58,6 +59,121 @@ wrap(const relocation_iterator *SI) {
     (const_cast<relocation_iterator*>(SI));
 }
 
+/*--.. Operations on binary files ..........................................--*/
+
+LLVMBinaryRef LLVMCreateBinary(LLVMMemoryBufferRef MemBuf,
+                               LLVMContextRef Context,
+                               char **ErrorMessage) {
+  auto maybeContext = Context ? unwrap(Context) : nullptr;
+  Expected<std::unique_ptr<Binary>> ObjOrErr(
+      createBinary(unwrap(MemBuf)->getMemBufferRef(), maybeContext));
+  if (!ObjOrErr) {
+    *ErrorMessage = strdup(toString(ObjOrErr.takeError()).c_str());
+    return nullptr;
+  }
+
+  return wrap(ObjOrErr.get().release());
+}
+
+LLVMMemoryBufferRef LLVMBinaryCopyMemoryBuffer(LLVMBinaryRef BR) {
+  auto Buf = unwrap(BR)->getMemoryBufferRef();
+  return wrap(llvm::MemoryBuffer::getMemBuffer(
+                Buf.getBuffer(), Buf.getBufferIdentifier(),
+                /*RequiresNullTerminator*/false).release());
+}
+
+void LLVMDisposeBinary(LLVMBinaryRef BR) {
+  delete unwrap(BR);
+}
+
+LLVMBinaryType LLVMBinaryGetType(LLVMBinaryRef BR) {
+  class BinaryTypeMapper final : public Binary {
+  public:
+    static LLVMBinaryType mapBinaryTypeToLLVMBinaryType(unsigned Kind) {
+      switch (Kind) {
+      case ID_Archive:
+        return LLVMBinaryTypeArchive;
+      case ID_MachOUniversalBinary:
+        return LLVMBinaryTypeMachOUniversalBinary;
+      case ID_COFFImportFile:
+        return LLVMBinaryTypeCOFFImportFile;
+      case ID_IR:
+        return LLVMBinaryTypeIR;
+      case ID_WinRes:
+        return LLVMBinaryTypeWinRes;
+      case ID_COFF:
+        return LLVMBinaryTypeCOFF;
+      case ID_ELF32L:
+        return LLVMBinaryTypeELF32L;
+      case ID_ELF32B:
+        return LLVMBinaryTypeELF32B;
+      case ID_ELF64L:
+        return LLVMBinaryTypeELF64L;
+      case ID_ELF64B:
+        return LLVMBinaryTypeELF64B;
+      case ID_MachO32L:
+        return LLVMBinaryTypeMachO32L;
+      case ID_MachO32B:
+        return LLVMBinaryTypeMachO32B;
+      case ID_MachO64L:
+        return LLVMBinaryTypeMachO64L;
+      case ID_MachO64B:
+        return LLVMBinaryTypeMachO64B;
+      case ID_Wasm:
+        return LLVMBinaryTypeWasm;
+      case ID_StartObjects:
+      case ID_EndObjects:
+        llvm_unreachable("Marker types are not valid binary kinds!");
+      default:
+        llvm_unreachable("Unknown binary kind!");
+      }
+    }
+  };
+  return BinaryTypeMapper::mapBinaryTypeToLLVMBinaryType(unwrap(BR)->getType());
+}
+
+LLVMBinaryRef LLVMMachOUniversalBinaryCopyObjectForArch(LLVMBinaryRef BR,
+                                                        const char *Arch,
+                                                        size_t ArchLen,
+                                                        char **ErrorMessage) {
+  auto universal = cast<MachOUniversalBinary>(unwrap(BR));
+  Expected<std::unique_ptr<ObjectFile>> ObjOrErr(
+      universal->getObjectForArch({Arch, ArchLen}));
+  if (!ObjOrErr) {
+    *ErrorMessage = strdup(toString(ObjOrErr.takeError()).c_str());
+    return nullptr;
+  }
+  return wrap(ObjOrErr.get().release());
+}
+
+LLVMSectionIteratorRef LLVMObjectFileCopySectionIterator(LLVMBinaryRef BR) {
+  auto OF = cast<ObjectFile>(unwrap(BR));
+  auto sections = OF->sections();
+  if (sections.begin() == sections.end())
+    return nullptr;
+  return wrap(new section_iterator(sections.begin()));
+}
+
+LLVMBool LLVMObjectFileIsSectionIteratorAtEnd(LLVMBinaryRef BR,
+                                              LLVMSectionIteratorRef SI) {
+  auto OF = cast<ObjectFile>(unwrap(BR));
+  return (*unwrap(SI) == OF->section_end()) ? 1 : 0;
+}
+
+LLVMSymbolIteratorRef LLVMObjectFileCopySymbolIterator(LLVMBinaryRef BR) {
+  auto OF = cast<ObjectFile>(unwrap(BR));
+  auto symbols = OF->symbols();
+  if (symbols.begin() == symbols.end())
+    return nullptr;
+  return wrap(new symbol_iterator(symbols.begin()));
+}
+
+LLVMBool LLVMObjectFileIsSymbolIteratorAtEnd(LLVMBinaryRef BR,
+                                             LLVMSymbolIteratorRef SI) {
+  auto OF = cast<ObjectFile>(unwrap(BR));
+  return (*unwrap(SI) == OF->symbol_end()) ? 1 : 0;
+}
+
 // ObjectFile creation
 LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) {
   std::unique_ptr<MemoryBuffer> Buf(unwrap(MemBuf));
@@ -146,10 +262,10 @@ uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) {
 }
 
 const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) {
-  StringRef ret;
-  if (std::error_code ec = (*unwrap(SI))->getContents(ret))
-    report_fatal_error(ec.message());
-  return ret.data();
+  if (Expected<StringRef> E = (*unwrap(SI))->getContents())
+    return E->data();
+  else
+    report_fatal_error(E.takeError());
 }
 
 uint64_t LLVMGetSectionAddress(LLVMSectionIteratorRef SI) {
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index cf63b89adc12..101f5dcc0821 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -1,9 +1,8 @@
 //===- ObjectFile.cpp - File format independent object file ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -57,21 +56,19 @@ uint64_t ObjectFile::getSymbolValue(DataRefImpl Ref) const {
   return getSymbolValueImpl(Ref);
 }
 
-std::error_code ObjectFile::printSymbolName(raw_ostream &OS,
-                                            DataRefImpl Symb) const {
+Error ObjectFile::printSymbolName(raw_ostream &OS, DataRefImpl Symb) const {
   Expected<StringRef> Name = getSymbolName(Symb);
   if (!Name)
-    return errorToErrorCode(Name.takeError());
+    return Name.takeError();
   OS << *Name;
-  return std::error_code();
+  return Error::success();
 }
 
 uint32_t ObjectFile::getSymbolAlignment(DataRefImpl DRI) const { return 0; }
 
 bool ObjectFile::isSectionBitcode(DataRefImpl Sec) const {
-  StringRef SectName;
-  if (!getSectionName(Sec, SectName))
-    return SectName == ".llvmbc";
+  if (Expected<StringRef> NameOrErr = getSectionName(Sec))
+    return *NameOrErr == ".llvmbc";
   return false;
 }
 
@@ -128,6 +125,7 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type) {
   case file_magic::macho_universal_binary:
   case file_magic::windows_resource:
   case file_magic::pdb:
+  case file_magic::minidump:
     return errorCodeToError(object_error::invalid_file_type);
   case file_magic::elf:
   case file_magic::elf_relocatable:
@@ -151,6 +149,10 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type) {
   case file_magic::coff_import_library:
   case file_magic::pecoff_executable:
     return createCOFFObjectFile(Object);
+  case file_magic::xcoff_object_32:
+    return createXCOFFObjectFile(Object, Binary::ID_XCOFF32);
+  case file_magic::xcoff_object_64:
+    return createXCOFFObjectFile(Object, Binary::ID_XCOFF64);
   case file_magic::wasm_object:
     return createWasmObjectFile(Object);
   }
diff --git a/lib/Object/RecordStreamer.cpp b/lib/Object/RecordStreamer.cpp
index 1f57867dd21a..f39a6c28ed50 100644
--- a/lib/Object/RecordStreamer.cpp
+++ b/lib/Object/RecordStreamer.cpp
@@ -1,9 +1,8 @@
 //===-- RecordStreamer.cpp - Record asm defined and used symbols ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -83,7 +82,7 @@ RecordStreamer::const_iterator RecordStreamer::begin() {
 RecordStreamer::const_iterator RecordStreamer::end() { return Symbols.end(); }
 
 void RecordStreamer::EmitInstruction(const MCInst &Inst,
-                                     const MCSubtargetInfo &STI, bool) {
+                                     const MCSubtargetInfo &STI) {
   MCStreamer::EmitInstruction(Inst, STI);
 }
 
diff --git a/lib/Object/RecordStreamer.h b/lib/Object/RecordStreamer.h
index 3d5ae59b58fe..c8b75bcc6d1d 100644
--- a/lib/Object/RecordStreamer.h
+++ b/lib/Object/RecordStreamer.h
@@ -1,9 +1,8 @@
 //===- RecordStreamer.h - Record asm defined and used symbols ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -47,8 +46,7 @@ private:
 public:
   RecordStreamer(MCContext &Context, const Module &M);
 
-  void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
-                       bool) override;
+  void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
   void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
   void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
   bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
@@ -56,6 +54,15 @@ public:
                     unsigned ByteAlignment, SMLoc Loc = SMLoc()) override;
   void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                         unsigned ByteAlignment) override;
+
+  // Ignore COFF-specific directives; we do not need any information from them,
+  // but the default implementation of these methods crashes, so we override
+  // them with versions that do nothing.
+  void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {}
+  void EmitCOFFSymbolStorageClass(int StorageClass) override {}
+  void EmitCOFFSymbolType(int Type) override {}
+  void EndCOFFSymbolDef() override {}
+
   /// Record .symver aliases for later processing.
   void emitELFSymverDirective(StringRef AliasName,
                               const MCSymbol *Aliasee) override;
diff --git a/lib/Object/RelocationResolver.cpp b/lib/Object/RelocationResolver.cpp
new file mode 100644
index 000000000000..0a243f32e12c
--- /dev/null
+++ b/lib/Object/RelocationResolver.cpp
@@ -0,0 +1,550 @@
+//===- RelocationResolver.cpp ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines utilities to resolve relocations in object files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/RelocationResolver.h"
+
+namespace llvm {
+namespace object {
+
+static int64_t getELFAddend(RelocationRef R) {
+  Expected<int64_t> AddendOrErr = ELFRelocationRef(R).getAddend();
+  handleAllErrors(AddendOrErr.takeError(), [](const ErrorInfoBase &EI) {
+    report_fatal_error(EI.message());
+  });
+  return *AddendOrErr;
+}
+
+static bool supportsX86_64(uint64_t Type) {
+  switch (Type) {
+  case ELF::R_X86_64_NONE:
+  case ELF::R_X86_64_64:
+  case ELF::R_X86_64_DTPOFF32:
+  case ELF::R_X86_64_DTPOFF64:
+  case ELF::R_X86_64_PC32:
+  case ELF::R_X86_64_32:
+  case ELF::R_X86_64_32S:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static uint64_t resolveX86_64(RelocationRef R, uint64_t S, uint64_t A) {
+  switch (R.getType()) {
+  case ELF::R_X86_64_NONE:
+    return A;
+  case ELF::R_X86_64_64:
+  case ELF::R_X86_64_DTPOFF32:
+  case ELF::R_X86_64_DTPOFF64:
+    return S + getELFAddend(R);
+  case ELF::R_X86_64_PC32:
+    return S + getELFAddend(R) - R.getOffset();
+  case ELF::R_X86_64_32:
+  case ELF::R_X86_64_32S:
+    return (S + getELFAddend(R)) & 0xFFFFFFFF;
+  default:
+    llvm_unreachable("Invalid relocation type");
+  }
+}
+
+static bool supportsAArch64(uint64_t Type) {
+  switch (Type) {
+  case ELF::R_AARCH64_ABS32:
+  case ELF::R_AARCH64_ABS64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static uint64_t resolveAArch64(RelocationRef R, uint64_t S, uint64_t A) {
+  switch (R.getType()) {
+  case ELF::R_AARCH64_ABS32:
+    return (S + getELFAddend(R)) & 0xFFFFFFFF;
+  case ELF::R_AARCH64_ABS64:
+    return S + getELFAddend(R);
+  default:
+    llvm_unreachable("Invalid relocation type");
+  }
+}
+
+static bool supportsBPF(uint64_t Type) {
+  switch (Type) {
+  case ELF::R_BPF_64_32:
+  case ELF::R_BPF_64_64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static uint64_t resolveBPF(RelocationRef R, uint64_t S, uint64_t A) {
+  switch (R.getType()) {
+  case ELF::R_BPF_64_32:
+    return S & 0xFFFFFFFF;
+  case ELF::R_BPF_64_64:
+    return S;
+  default:
+    llvm_unreachable("Invalid relocation type");
+  }
+}
+
+static bool supportsMips64(uint64_t Type) {
+  switch (Type) {
+  case ELF::R_MIPS_32:
+  case ELF::R_MIPS_64:
+  case ELF::R_MIPS_TLS_DTPREL64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static uint64_t resolveMips64(RelocationRef R, uint64_t S, uint64_t A) {
+  switch (R.getType()) {
+  case ELF::R_MIPS_32:
+    return (S + getELFAddend(R)) & 0xFFFFFFFF;
+  case ELF::R_MIPS_64:
+    return S + getELFAddend(R);
+  case ELF::R_MIPS_TLS_DTPREL64:
+    return S + getELFAddend(R) - 0x8000;
+  default:
+    llvm_unreachable("Invalid relocation type");
+  }
+}
+
+static bool supportsPPC64(uint64_t Type) {
+  switch (Type) {
+  case ELF::R_PPC64_ADDR32:
+  case ELF::R_PPC64_ADDR64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static uint64_t resolvePPC64(RelocationRef R, uint64_t S, uint64_t A) {
+  switch (R.getType()) {
+  case ELF::R_PPC64_ADDR32:
+    return (S + getELFAddend(R)) & 0xFFFFFFFF;
+  case ELF::R_PPC64_ADDR64:
+    return S + getELFAddend(R);
+  default:
+    llvm_unreachable("Invalid relocation type");
+  }
+}
+
+static bool supportsSystemZ(uint64_t Type) {
+  switch (Type) {
+  case ELF::R_390_32:
+  case ELF::R_390_64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static uint64_t resolveSystemZ(RelocationRef R, uint64_t S, uint64_t A) {
+  switch (R.getType()) {
+  case ELF::R_390_32:
+    return (S + getELFAddend(R)) & 0xFFFFFFFF;
+  case ELF::R_390_64:
+    return S + getELFAddend(R);
+  default:
+    llvm_unreachable("Invalid relocation type");
+  }
+}
+
+static bool supportsSparc64(uint64_t Type) {
+  switch (Type) {
+  case ELF::R_SPARC_32:
+  case ELF::R_SPARC_64:
+  case ELF::R_SPARC_UA32:
+  case ELF::R_SPARC_UA64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static uint64_t resolveSparc64(RelocationRef R, uint64_t S, uint64_t A) {
+  switch (R.getType()) {
+  case ELF::R_SPARC_32:
+  case ELF::R_SPARC_64:
+  case ELF::R_SPARC_UA32:
+  case ELF::R_SPARC_UA64:
+    return S + getELFAddend(R);
+  default:
+    llvm_unreachable("Invalid relocation type");
+  }
+}
+
+static bool supportsAmdgpu(uint64_t Type) {
+  switch (Type) {
+  case ELF::R_AMDGPU_ABS32:
+  case ELF::R_AMDGPU_ABS64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static uint64_t resolveAmdgpu(RelocationRef R, uint64_t S, uint64_t A) {
+  switch (R.getType()) {
+  case ELF::R_AMDGPU_ABS32:
+  case ELF::R_AMDGPU_ABS64:
+    return S + getELFAddend(R);
+  default:
+    llvm_unreachable("Invalid relocation type");
+  }
+}
+
+static bool supportsX86(uint64_t Type) {
+  switch (Type) {
+  case ELF::R_386_NONE:
+  case ELF::R_386_32:
+  case ELF::R_386_PC32:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static uint64_t resolveX86(RelocationRef R, uint64_t S, uint64_t A) {
+  switch (R.getType()) {
+  case ELF::R_386_NONE:
+    return A;
+  case ELF::R_386_32:
+    return S + A;
+  case ELF::R_386_PC32:
+    return S - R.getOffset() + A;
+  default:
+    llvm_unreachable("Invalid relocation type");
+  }
+}
+
+static bool supportsPPC32(uint64_t Type) {
+  return Type == ELF::R_PPC_ADDR32;
+}
+
+static uint64_t resolvePPC32(RelocationRef R, uint64_t S, uint64_t A) {
+  if (R.getType() == ELF::R_PPC_ADDR32)
+    return (S + getELFAddend(R)) & 0xFFFFFFFF;
+  llvm_unreachable("Invalid relocation type");
+}
+
+static bool supportsARM(uint64_t Type) {
+  return Type == ELF::R_ARM_ABS32;
+}
+
+static uint64_t resolveARM(RelocationRef R, uint64_t S, uint64_t A) {
+  if (R.getType() == ELF::R_ARM_ABS32)
+    return (S + A) & 0xFFFFFFFF;
+  llvm_unreachable("Invalid relocation type");
+}
+
+static bool supportsAVR(uint64_t Type) {
+  switch (Type) {
+  case ELF::R_AVR_16:
+  case ELF::R_AVR_32:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static uint64_t resolveAVR(RelocationRef R, uint64_t S, uint64_t A) {
+  switch (R.getType()) {
+  case ELF::R_AVR_16:
+    return (S + getELFAddend(R)) & 0xFFFF;
+  case ELF::R_AVR_32:
+    return (S + getELFAddend(R)) & 0xFFFFFFFF;
+  default:
+    llvm_unreachable("Invalid relocation type");
+  }
+}
+
+static bool supportsLanai(uint64_t Type) {
+  return Type == ELF::R_LANAI_32;
+}
+
+static uint64_t resolveLanai(RelocationRef R, uint64_t S, uint64_t A) {
+  if (R.getType() == ELF::R_LANAI_32)
+    return (S + getELFAddend(R)) & 0xFFFFFFFF;
+  llvm_unreachable("Invalid relocation type");
+}
+
+static bool supportsMips32(uint64_t Type) {
+  switch (Type) {
+  case ELF::R_MIPS_32:
+  case ELF::R_MIPS_TLS_DTPREL32:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static uint64_t resolveMips32(RelocationRef R, uint64_t S, uint64_t A) {
+  // FIXME: Take in account implicit addends to get correct results.
+  uint32_t Rel = R.getType();
+  if (Rel == ELF::R_MIPS_32)
+    return (S + A) & 0xFFFFFFFF;
+  if (Rel == ELF::R_MIPS_TLS_DTPREL32)
+    return (S + A) & 0xFFFFFFFF;
+  llvm_unreachable("Invalid relocation type");
+}
+
+static bool supportsSparc32(uint64_t Type) {
+  switch (Type) {
+  case ELF::R_SPARC_32:
+  case ELF::R_SPARC_UA32:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static uint64_t resolveSparc32(RelocationRef R, uint64_t S, uint64_t A) {
+  uint32_t Rel = R.getType();
+  if (Rel == ELF::R_SPARC_32 || Rel == ELF::R_SPARC_UA32)
+    return S + getELFAddend(R);
+  return A;
+}
+
+static bool supportsHexagon(uint64_t Type) {
+  return Type == ELF::R_HEX_32;
+}
+
+static uint64_t resolveHexagon(RelocationRef R, uint64_t S, uint64_t A) {
+  if (R.getType() == ELF::R_HEX_32)
+    return S + getELFAddend(R);
+  llvm_unreachable("Invalid relocation type");
+}
+
+static bool supportsRISCV(uint64_t Type) {
+  switch (Type) {
+  case ELF::R_RISCV_NONE:
+  case ELF::R_RISCV_32:
+  case ELF::R_RISCV_64:
+  case ELF::R_RISCV_ADD8:
+  case ELF::R_RISCV_SUB8:
+  case ELF::R_RISCV_ADD16:
+  case ELF::R_RISCV_SUB16:
+  case ELF::R_RISCV_ADD32:
+  case ELF::R_RISCV_SUB32:
+  case ELF::R_RISCV_ADD64:
+  case ELF::R_RISCV_SUB64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static uint64_t resolveRISCV(RelocationRef R, uint64_t S, uint64_t A) {
+  int64_t RA = getELFAddend(R);
+  switch (R.getType()) {
+  case ELF::R_RISCV_NONE:
+    return A;
+  case ELF::R_RISCV_32:
+    return (S + RA) & 0xFFFFFFFF;
+  case ELF::R_RISCV_64:
+    return S + RA;
+  case ELF::R_RISCV_ADD8:
+    return (A + (S + RA)) & 0xFF;
+  case ELF::R_RISCV_SUB8:
+    return (A - (S + RA)) & 0xFF;
+  case ELF::R_RISCV_ADD16:
+    return (A + (S + RA)) & 0xFFFF;
+  case ELF::R_RISCV_SUB16:
+    return (A - (S + RA)) & 0xFFFF;
+  case ELF::R_RISCV_ADD32:
+    return (A + (S + RA)) & 0xFFFFFFFF;
+  case ELF::R_RISCV_SUB32:
+    return (A - (S + RA)) & 0xFFFFFFFF;
+  case ELF::R_RISCV_ADD64:
+    return (A + (S + RA));
+  case ELF::R_RISCV_SUB64:
+    return (A - (S + RA));
+  default:
+    llvm_unreachable("Invalid relocation type");
+  }
+}
+
+static bool supportsCOFFX86(uint64_t Type) {
+  switch (Type) {
+  case COFF::IMAGE_REL_I386_SECREL:
+  case COFF::IMAGE_REL_I386_DIR32:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static uint64_t resolveCOFFX86(RelocationRef R, uint64_t S, uint64_t A) {
+  switch (R.getType()) {
+  case COFF::IMAGE_REL_I386_SECREL:
+  case COFF::IMAGE_REL_I386_DIR32:
+    return (S + A) & 0xFFFFFFFF;
+  default:
+    llvm_unreachable("Invalid relocation type");
+  }
+}
+
+static bool supportsCOFFX86_64(uint64_t Type) {
+  switch (Type) {
+  case COFF::IMAGE_REL_AMD64_SECREL:
+  case COFF::IMAGE_REL_AMD64_ADDR64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static uint64_t resolveCOFFX86_64(RelocationRef R, uint64_t S, uint64_t A) {
+  switch (R.getType()) {
+  case COFF::IMAGE_REL_AMD64_SECREL:
+    return (S + A) & 0xFFFFFFFF;
+  case COFF::IMAGE_REL_AMD64_ADDR64:
+    return S + A;
+  default:
+    llvm_unreachable("Invalid relocation type");
+  }
+}
+
+static bool supportsMachOX86_64(uint64_t Type) {
+  return Type == MachO::X86_64_RELOC_UNSIGNED;
+}
+
+static uint64_t resolveMachOX86_64(RelocationRef R, uint64_t S, uint64_t A) {
+  if (R.getType() == MachO::X86_64_RELOC_UNSIGNED)
+    return S;
+  llvm_unreachable("Invalid relocation type");
+}
+
+static bool supportsWasm32(uint64_t Type) {
+  switch (Type) {
+  case wasm::R_WASM_FUNCTION_INDEX_LEB:
+  case wasm::R_WASM_TABLE_INDEX_SLEB:
+  case wasm::R_WASM_TABLE_INDEX_I32:
+  case wasm::R_WASM_MEMORY_ADDR_LEB:
+  case wasm::R_WASM_MEMORY_ADDR_SLEB:
+  case wasm::R_WASM_MEMORY_ADDR_I32:
+  case wasm::R_WASM_TYPE_INDEX_LEB:
+  case wasm::R_WASM_GLOBAL_INDEX_LEB:
+  case wasm::R_WASM_FUNCTION_OFFSET_I32:
+  case wasm::R_WASM_SECTION_OFFSET_I32:
+  case wasm::R_WASM_EVENT_INDEX_LEB:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static uint64_t resolveWasm32(RelocationRef R, uint64_t S, uint64_t A) {
+  switch (R.getType()) {
+  case wasm::R_WASM_FUNCTION_INDEX_LEB:
+  case wasm::R_WASM_TABLE_INDEX_SLEB:
+  case wasm::R_WASM_TABLE_INDEX_I32:
+  case wasm::R_WASM_MEMORY_ADDR_LEB:
+  case wasm::R_WASM_MEMORY_ADDR_SLEB:
+  case wasm::R_WASM_MEMORY_ADDR_I32:
+  case wasm::R_WASM_TYPE_INDEX_LEB:
+  case wasm::R_WASM_GLOBAL_INDEX_LEB:
+  case wasm::R_WASM_FUNCTION_OFFSET_I32:
+  case wasm::R_WASM_SECTION_OFFSET_I32:
+  case wasm::R_WASM_EVENT_INDEX_LEB:
+    // For wasm section, its offset at 0 -- ignoring Value
+    return A;
+  default:
+    llvm_unreachable("Invalid relocation type");
+  }
+}
+
+std::pair<bool (*)(uint64_t), RelocationResolver>
+getRelocationResolver(const ObjectFile &Obj) {
+  if (Obj.isCOFF()) {
+    if (Obj.getBytesInAddress() == 8)
+      return {supportsCOFFX86_64, resolveCOFFX86_64};
+    return {supportsCOFFX86, resolveCOFFX86};
+  } else if (Obj.isELF()) {
+    if (Obj.getBytesInAddress() == 8) {
+      switch (Obj.getArch()) {
+      case Triple::x86_64:
+        return {supportsX86_64, resolveX86_64};
+      case Triple::aarch64:
+      case Triple::aarch64_be:
+        return {supportsAArch64, resolveAArch64};
+      case Triple::bpfel:
+      case Triple::bpfeb:
+        return {supportsBPF, resolveBPF};
+      case Triple::mips64el:
+      case Triple::mips64:
+        return {supportsMips64, resolveMips64};
+      case Triple::ppc64le:
+      case Triple::ppc64:
+        return {supportsPPC64, resolvePPC64};
+      case Triple::systemz:
+        return {supportsSystemZ, resolveSystemZ};
+      case Triple::sparcv9:
+        return {supportsSparc64, resolveSparc64};
+      case Triple::amdgcn:
+        return {supportsAmdgpu, resolveAmdgpu};
+      case Triple::riscv64:
+        return {supportsRISCV, resolveRISCV};
+      default:
+        return {nullptr, nullptr};
+      }
+    }
+
+    // 32-bit object file
+    assert(Obj.getBytesInAddress() == 4 &&
+           "Invalid word size in object file");
+
+    switch (Obj.getArch()) {
+    case Triple::x86:
+      return {supportsX86, resolveX86};
+    case Triple::ppc:
+      return {supportsPPC32, resolvePPC32};
+    case Triple::arm:
+    case Triple::armeb:
+      return {supportsARM, resolveARM};
+    case Triple::avr:
+      return {supportsAVR, resolveAVR};
+    case Triple::lanai:
+      return {supportsLanai, resolveLanai};
+    case Triple::mipsel:
+    case Triple::mips:
+      return {supportsMips32, resolveMips32};
+    case Triple::sparc:
+      return {supportsSparc32, resolveSparc32};
+    case Triple::hexagon:
+      return {supportsHexagon, resolveHexagon};
+    case Triple::riscv32:
+      return {supportsRISCV, resolveRISCV};
+    default:
+      return {nullptr, nullptr};
+    }
+  } else if (Obj.isMachO()) {
+    if (Obj.getArch() == Triple::x86_64)
+      return {supportsMachOX86_64, resolveMachOX86_64};
+    return {nullptr, nullptr};
+  } else if (Obj.isWasm()) {
+    if (Obj.getArch() == Triple::wasm32)
+      return {supportsWasm32, resolveWasm32};
+    return {nullptr, nullptr};
+  }
+
+  llvm_unreachable("Invalid object file");
+}
+
+} // namespace object
+} // namespace llvm
diff --git a/lib/Object/SymbolSize.cpp b/lib/Object/SymbolSize.cpp
index 004fb1b07546..bdf4dc55cf3c 100644
--- a/lib/Object/SymbolSize.cpp
+++ b/lib/Object/SymbolSize.cpp
@@ -1,9 +1,8 @@
 //===- SymbolSize.cpp -----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Object/SymbolicFile.cpp b/lib/Object/SymbolicFile.cpp
index 3e998a2682b8..2b152b7d8da3 100644
--- a/lib/Object/SymbolicFile.cpp
+++ b/lib/Object/SymbolicFile.cpp
@@ -1,9 +1,8 @@
 //===- SymbolicFile.cpp - Interface that only provides symbols ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -53,6 +52,7 @@ SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type,
   case file_magic::macho_universal_binary:
   case file_magic::windows_resource:
   case file_magic::pdb:
+  case file_magic::minidump:
     return errorCodeToError(object_error::invalid_file_type);
   case file_magic::elf:
   case file_magic::elf_executable:
@@ -69,6 +69,8 @@ SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type,
   case file_magic::macho_dsym_companion:
   case file_magic::macho_kext_bundle:
   case file_magic::pecoff_executable:
+  case file_magic::xcoff_object_32:
+  case file_magic::xcoff_object_64:
   case file_magic::wasm_object:
     return ObjectFile::createObjectFile(Object, Type);
   case file_magic::coff_import_library:
diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp
index d84cb48c9fbd..82aa1830dced 100644
--- a/lib/Object/WasmObjectFile.cpp
+++ b/lib/Object/WasmObjectFile.cpp
@@ -1,15 +1,15 @@
 //===- WasmObjectFile.cpp - Wasm object file implementation ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/Triple.h"
@@ -131,24 +131,24 @@ static int64_t readLEB128(WasmObjectFile::ReadContext &Ctx) {
 }
 
 static uint8_t readVaruint1(WasmObjectFile::ReadContext &Ctx) {
-  int64_t result = readLEB128(Ctx);
-  if (result > VARUINT1_MAX || result < 0)
+  int64_t Result = readLEB128(Ctx);
+  if (Result > VARUINT1_MAX || Result < 0)
     report_fatal_error("LEB is outside Varuint1 range");
-  return result;
+  return Result;
 }
 
 static int32_t readVarint32(WasmObjectFile::ReadContext &Ctx) {
-  int64_t result = readLEB128(Ctx);
-  if (result > INT32_MAX || result < INT32_MIN)
+  int64_t Result = readLEB128(Ctx);
+  if (Result > INT32_MAX || Result < INT32_MIN)
     report_fatal_error("LEB is outside Varint32 range");
-  return result;
+  return Result;
 }
 
 static uint32_t readVaruint32(WasmObjectFile::ReadContext &Ctx) {
-  uint64_t result = readULEB128(Ctx);
-  if (result > UINT32_MAX)
+  uint64_t Result = readULEB128(Ctx);
+  if (Result > UINT32_MAX)
     report_fatal_error("LEB is outside Varuint32 range");
-  return result;
+  return Result;
 }
 
 static int64_t readVarint64(WasmObjectFile::ReadContext &Ctx) {
@@ -255,7 +255,7 @@ WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err)
   }
 
   ReadContext Ctx;
-  Ctx.Start = getPtr(0);
+  Ctx.Start = getData().bytes_begin();
   Ctx.Ptr = Ctx.Start + 4;
   Ctx.End = Ctx.Start + getData().size();
 
@@ -316,14 +316,17 @@ Error WasmObjectFile::parseSection(WasmSection &Sec) {
     return parseCodeSection(Ctx);
   case wasm::WASM_SEC_DATA:
     return parseDataSection(Ctx);
+  case wasm::WASM_SEC_DATACOUNT:
+    return parseDataCountSection(Ctx);
   default:
-    return make_error<GenericBinaryError>("Bad section type",
-                                          object_error::parse_failed);
+    return make_error<GenericBinaryError>(
+        "Invalid section type: " + Twine(Sec.Type), object_error::parse_failed);
   }
 }
 
 Error WasmObjectFile::parseDylinkSection(ReadContext &Ctx) {
   // See https://github.com/WebAssembly/tool-conventions/blob/master/DynamicLinking.md
+  HasDylinkSection = true;
   DylinkInfo.MemorySize = readVaruint32(Ctx);
   DylinkInfo.MemoryAlignment = readVaruint32(Ctx);
   DylinkInfo.TableSize = readVaruint32(Ctx);
@@ -418,17 +421,17 @@ Error WasmObjectFile::parseLinkingSection(ReadContext &Ctx) {
       if (Count > DataSegments.size())
         return make_error<GenericBinaryError>("Too many segment names",
                                               object_error::parse_failed);
-      for (uint32_t i = 0; i < Count; i++) {
-        DataSegments[i].Data.Name = readString(Ctx);
-        DataSegments[i].Data.Alignment = readVaruint32(Ctx);
-        DataSegments[i].Data.Flags = readVaruint32(Ctx);
+      for (uint32_t I = 0; I < Count; I++) {
+        DataSegments[I].Data.Name = readString(Ctx);
+        DataSegments[I].Data.Alignment = readVaruint32(Ctx);
+        DataSegments[I].Data.LinkerFlags = readVaruint32(Ctx);
       }
       break;
     }
     case wasm::WASM_INIT_FUNCS: {
       uint32_t Count = readVaruint32(Ctx);
       LinkingData.InitFunctions.reserve(Count);
-      for (uint32_t i = 0; i < Count; i++) {
+      for (uint32_t I = 0; I < Count; I++) {
         wasm::WasmInitFunc Init;
         Init.Priority = readVaruint32(Ctx);
         Init.Symbol = readVaruint32(Ctx);
@@ -505,9 +508,13 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
           Function.SymbolName = Info.Name;
       } else {
         wasm::WasmImport &Import = *ImportedFunctions[Info.ElementIndex];
+        if ((Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0)
+          Info.Name = readString(Ctx);
+        else
+          Info.Name = Import.Field;
         Signature = &Signatures[Import.SigIndex];
-        Info.Name = Import.Field;
-        Info.Module = Import.Module;
+        Info.ImportName = Import.Field;
+        Info.ImportModule = Import.Module;
       }
       break;
 
@@ -530,8 +537,13 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
           Global.SymbolName = Info.Name;
       } else {
         wasm::WasmImport &Import = *ImportedGlobals[Info.ElementIndex];
-        Info.Name = Import.Field;
+        if ((Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0)
+          Info.Name = readString(Ctx);
+        else
+          Info.Name = Import.Field;
         GlobalType = &Import.Global;
+        Info.ImportName = Import.Field;
+        Info.ImportModule = Import.Module;
       }
       break;
 
@@ -585,9 +597,14 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
 
       } else {
         wasm::WasmImport &Import = *ImportedEvents[Info.ElementIndex];
+        if ((Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0)
+          Info.Name = readString(Ctx);
+        else
+          Info.Name = Import.Field;
         EventType = &Import.Event;
         Signature = &Signatures[EventType->SigIndex];
-        Info.Name = Import.Field;
+        Info.ImportName = Import.Field;
+        Info.ImportModule = Import.Module;
       }
       break;
     }
@@ -659,6 +676,77 @@ Error WasmObjectFile::parseLinkingSectionComdat(ReadContext &Ctx) {
   return Error::success();
 }
 
+Error WasmObjectFile::parseProducersSection(ReadContext &Ctx) {
+  llvm::SmallSet<StringRef, 3> FieldsSeen;
+  uint32_t Fields = readVaruint32(Ctx);
+  for (size_t I = 0; I < Fields; ++I) {
+    StringRef FieldName = readString(Ctx);
+    if (!FieldsSeen.insert(FieldName).second)
+      return make_error<GenericBinaryError>(
+          "Producers section does not have unique fields",
+          object_error::parse_failed);
+    std::vector<std::pair<std::string, std::string>> *ProducerVec = nullptr;
+    if (FieldName == "language") {
+      ProducerVec = &ProducerInfo.Languages;
+    } else if (FieldName == "processed-by") {
+      ProducerVec = &ProducerInfo.Tools;
+    } else if (FieldName == "sdk") {
+      ProducerVec = &ProducerInfo.SDKs;
+    } else {
+      return make_error<GenericBinaryError>(
+          "Producers section field is not named one of language, processed-by, "
+          "or sdk",
+          object_error::parse_failed);
+    }
+    uint32_t ValueCount = readVaruint32(Ctx);
+    llvm::SmallSet<StringRef, 8> ProducersSeen;
+    for (size_t J = 0; J < ValueCount; ++J) {
+      StringRef Name = readString(Ctx);
+      StringRef Version = readString(Ctx);
+      if (!ProducersSeen.insert(Name).second) {
+        return make_error<GenericBinaryError>(
+            "Producers section contains repeated producer",
+            object_error::parse_failed);
+      }
+      ProducerVec->emplace_back(Name, Version);
+    }
+  }
+  if (Ctx.Ptr != Ctx.End)
+    return make_error<GenericBinaryError>("Producers section ended prematurely",
+                                          object_error::parse_failed);
+  return Error::success();
+}
+
+Error WasmObjectFile::parseTargetFeaturesSection(ReadContext &Ctx) {
+  llvm::SmallSet<std::string, 8> FeaturesSeen;
+  uint32_t FeatureCount = readVaruint32(Ctx);
+  for (size_t I = 0; I < FeatureCount; ++I) {
+    wasm::WasmFeatureEntry Feature;
+    Feature.Prefix = readUint8(Ctx);
+    switch (Feature.Prefix) {
+    case wasm::WASM_FEATURE_PREFIX_USED:
+    case wasm::WASM_FEATURE_PREFIX_REQUIRED:
+    case wasm::WASM_FEATURE_PREFIX_DISALLOWED:
+      break;
+    default:
+      return make_error<GenericBinaryError>("Unknown feature policy prefix",
+                                            object_error::parse_failed);
+    }
+    Feature.Name = readString(Ctx);
+    if (!FeaturesSeen.insert(Feature.Name).second)
+      return make_error<GenericBinaryError>(
+          "Target features section contains repeated feature \"" +
+              Feature.Name + "\"",
+          object_error::parse_failed);
+    TargetFeatures.push_back(Feature);
+  }
+  if (Ctx.Ptr != Ctx.End)
+    return make_error<GenericBinaryError>(
+        "Target features section ended prematurely",
+        object_error::parse_failed);
+  return Error::success();
+}
+
 Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
   uint32_t SectionIndex = readVaruint32(Ctx);
   if (SectionIndex >= Sections.size())
@@ -678,43 +766,49 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
     PreviousOffset = Reloc.Offset;
     Reloc.Index = readVaruint32(Ctx);
     switch (Reloc.Type) {
-    case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
-    case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
-    case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
+    case wasm::R_WASM_FUNCTION_INDEX_LEB:
+    case wasm::R_WASM_TABLE_INDEX_SLEB:
+    case wasm::R_WASM_TABLE_INDEX_I32:
+    case wasm::R_WASM_TABLE_INDEX_REL_SLEB:
       if (!isValidFunctionSymbol(Reloc.Index))
         return make_error<GenericBinaryError>("Bad relocation function index",
                                               object_error::parse_failed);
       break;
-    case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB:
+    case wasm::R_WASM_TYPE_INDEX_LEB:
       if (Reloc.Index >= Signatures.size())
         return make_error<GenericBinaryError>("Bad relocation type index",
                                               object_error::parse_failed);
       break;
-    case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
-      if (!isValidGlobalSymbol(Reloc.Index))
+    case wasm::R_WASM_GLOBAL_INDEX_LEB:
+      // R_WASM_GLOBAL_INDEX_LEB are can be used against function and data
+      // symbols to refer to thier GOT enties.
+      if (!isValidGlobalSymbol(Reloc.Index) &&
+          !isValidDataSymbol(Reloc.Index) &&
+          !isValidFunctionSymbol(Reloc.Index))
         return make_error<GenericBinaryError>("Bad relocation global index",
                                               object_error::parse_failed);
       break;
-    case wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB:
+    case wasm::R_WASM_EVENT_INDEX_LEB:
       if (!isValidEventSymbol(Reloc.Index))
         return make_error<GenericBinaryError>("Bad relocation event index",
                                               object_error::parse_failed);
       break;
-    case wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB:
-    case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
-    case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32:
+    case wasm::R_WASM_MEMORY_ADDR_LEB:
+    case wasm::R_WASM_MEMORY_ADDR_SLEB:
+    case wasm::R_WASM_MEMORY_ADDR_I32:
+    case wasm::R_WASM_MEMORY_ADDR_REL_SLEB:
       if (!isValidDataSymbol(Reloc.Index))
         return make_error<GenericBinaryError>("Bad relocation data index",
                                               object_error::parse_failed);
       Reloc.Addend = readVarint32(Ctx);
       break;
-    case wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
+    case wasm::R_WASM_FUNCTION_OFFSET_I32:
       if (!isValidFunctionSymbol(Reloc.Index))
         return make_error<GenericBinaryError>("Bad relocation function index",
                                               object_error::parse_failed);
       Reloc.Addend = readVarint32(Ctx);
       break;
-    case wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32:
+    case wasm::R_WASM_SECTION_OFFSET_I32:
       if (!isValidSectionSymbol(Reloc.Index))
         return make_error<GenericBinaryError>("Bad relocation section index",
                                               object_error::parse_failed);
@@ -730,10 +824,10 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
     // also shouldn't overlap a function/element boundary, but we don't bother
     // to check that.
     uint64_t Size = 5;
-    if (Reloc.Type == wasm::R_WEBASSEMBLY_TABLE_INDEX_I32 ||
-        Reloc.Type == wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32 ||
-        Reloc.Type == wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32 ||
-        Reloc.Type == wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32)
+    if (Reloc.Type == wasm::R_WASM_TABLE_INDEX_I32 ||
+        Reloc.Type == wasm::R_WASM_MEMORY_ADDR_I32 ||
+        Reloc.Type == wasm::R_WASM_SECTION_OFFSET_I32 ||
+        Reloc.Type == wasm::R_WASM_FUNCTION_OFFSET_I32)
       Size = 4;
     if (Reloc.Offset + Size > EndOffset)
       return make_error<GenericBinaryError>("Bad relocation offset",
@@ -757,6 +851,12 @@ Error WasmObjectFile::parseCustomSection(WasmSection &Sec, ReadContext &Ctx) {
   } else if (Sec.Name == "linking") {
     if (Error Err = parseLinkingSection(Ctx))
       return Err;
+  } else if (Sec.Name == "producers") {
+    if (Error Err = parseProducersSection(Ctx))
+      return Err;
+  } else if (Sec.Name == "target_features") {
+    if (Error Err = parseTargetFeaturesSection(Ctx))
+      return Err;
   } else if (Sec.Name.startswith("reloc.")) {
     if (Error Err = parseRelocSection(Sec.Name, Ctx))
       return Err;
@@ -799,7 +899,7 @@ Error WasmObjectFile::parseTypeSection(ReadContext &Ctx) {
 Error WasmObjectFile::parseImportSection(ReadContext &Ctx) {
   uint32_t Count = readVaruint32(Ctx);
   Imports.reserve(Count);
-  for (uint32_t i = 0; i < Count; i++) {
+  for (uint32_t I = 0; I < Count; I++) {
     wasm::WasmImport Im;
     Im.Module = readString(Ctx);
     Im.Field = readString(Ctx);
@@ -925,7 +1025,7 @@ Error WasmObjectFile::parseEventSection(ReadContext &Ctx) {
 Error WasmObjectFile::parseExportSection(ReadContext &Ctx) {
   uint32_t Count = readVaruint32(Ctx);
   Exports.reserve(Count);
-  for (uint32_t i = 0; i < Count; i++) {
+  for (uint32_t I = 0; I < Count; I++) {
     wasm::WasmExport Ex;
     Ex.Name = readString(Ctx);
     Ex.Kind = readUint8(Ctx);
@@ -1010,6 +1110,12 @@ wasm::WasmFunction &WasmObjectFile::getDefinedFunction(uint32_t Index) {
   return Functions[Index - NumImportedFunctions];
 }
 
+const wasm::WasmFunction &
+WasmObjectFile::getDefinedFunction(uint32_t Index) const {
+  assert(isDefinedFunctionIndex(Index));
+  return Functions[Index - NumImportedFunctions];
+}
+
 wasm::WasmGlobal &WasmObjectFile::getDefinedGlobal(uint32_t Index) {
   assert(isDefinedGlobalIndex(Index));
   return Globals[Index - NumImportedGlobals];
@@ -1097,12 +1203,22 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) {
 Error WasmObjectFile::parseDataSection(ReadContext &Ctx) {
   DataSection = Sections.size();
   uint32_t Count = readVaruint32(Ctx);
+  if (DataCount && Count != DataCount.getValue())
+    return make_error<GenericBinaryError>(
+        "Number of data segments does not match DataCount section");
   DataSegments.reserve(Count);
   while (Count--) {
     WasmSegment Segment;
-    Segment.Data.MemoryIndex = readVaruint32(Ctx);
-    if (Error Err = readInitExpr(Segment.Data.Offset, Ctx))
-      return Err;
+    Segment.Data.InitFlags = readVaruint32(Ctx);
+    Segment.Data.MemoryIndex = (Segment.Data.InitFlags & wasm::WASM_SEGMENT_HAS_MEMINDEX)
+                               ? readVaruint32(Ctx) : 0;
+    if ((Segment.Data.InitFlags & wasm::WASM_SEGMENT_IS_PASSIVE) == 0) {
+      if (Error Err = readInitExpr(Segment.Data.Offset, Ctx))
+        return Err;
+    } else {
+      Segment.Data.Offset.Opcode = wasm::WASM_OPCODE_I32_CONST;
+      Segment.Data.Offset.Value.Int32 = 0;
+    }
     uint32_t Size = readVaruint32(Ctx);
     if (Size > (size_t)(Ctx.End - Ctx.Ptr))
       return make_error<GenericBinaryError>("Invalid segment size",
@@ -1111,7 +1227,7 @@ Error WasmObjectFile::parseDataSection(ReadContext &Ctx) {
     // The rest of these Data fields are set later, when reading in the linking
     // metadata section.
     Segment.Data.Alignment = 0;
-    Segment.Data.Flags = 0;
+    Segment.Data.LinkerFlags = 0;
     Segment.Data.Comdat = UINT32_MAX;
     Segment.SectionOffset = Ctx.Ptr - Ctx.Start;
     Ctx.Ptr += Size;
@@ -1123,15 +1239,16 @@ Error WasmObjectFile::parseDataSection(ReadContext &Ctx) {
   return Error::success();
 }
 
-const uint8_t *WasmObjectFile::getPtr(size_t Offset) const {
-  return reinterpret_cast<const uint8_t *>(getData().data() + Offset);
+Error WasmObjectFile::parseDataCountSection(ReadContext &Ctx) {
+  DataCount = readVaruint32(Ctx);
+  return Error::success();
 }
 
 const wasm::WasmObjectHeader &WasmObjectFile::getHeader() const {
   return Header;
 }
 
-void WasmObjectFile::moveSymbolNext(DataRefImpl &Symb) const { Symb.d.a++; }
+void WasmObjectFile::moveSymbolNext(DataRefImpl &Symb) const { Symb.d.b++; }
 
 uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const {
   uint32_t Result = SymbolRef::SF_None;
@@ -1153,18 +1270,20 @@ uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const {
 
 basic_symbol_iterator WasmObjectFile::symbol_begin() const {
   DataRefImpl Ref;
-  Ref.d.a = 0;
+  Ref.d.a = 1; // Arbitrary non-zero value so that Ref.p is non-null
+  Ref.d.b = 0; // Symbol index
   return BasicSymbolRef(Ref, this);
 }
 
 basic_symbol_iterator WasmObjectFile::symbol_end() const {
   DataRefImpl Ref;
-  Ref.d.a = Symbols.size();
+  Ref.d.a = 1; // Arbitrary non-zero value so that Ref.p is non-null
+  Ref.d.b = Symbols.size(); // Symbol index
   return BasicSymbolRef(Ref, this);
 }
 
 const WasmSymbol &WasmObjectFile::getWasmSymbol(const DataRefImpl &Symb) const {
-  return Symbols[Symb.d.a];
+  return Symbols[Symb.d.b];
 }
 
 const WasmSymbol &WasmObjectFile::getWasmSymbol(const SymbolRef &Symb) const {
@@ -1176,7 +1295,12 @@ Expected<StringRef> WasmObjectFile::getSymbolName(DataRefImpl Symb) const {
 }
 
 Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const {
-  return getSymbolValue(Symb);
+  auto &Sym = getWasmSymbol(Symb);
+  if (Sym.Info.Kind == wasm::WASM_SYMBOL_TYPE_FUNCTION &&
+      isDefinedFunctionIndex(Sym.Info.ElementIndex))
+    return getDefinedFunction(Sym.Info.ElementIndex).CodeSectionOffset;
+  else
+    return getSymbolValue(Symb);
 }
 
 uint64_t WasmObjectFile::getWasmSymbolValue(const WasmSymbol &Sym) const {
@@ -1265,13 +1389,11 @@ WasmObjectFile::getSymbolSection(DataRefImpl Symb) const {
 
 void WasmObjectFile::moveSectionNext(DataRefImpl &Sec) const { Sec.d.a++; }
 
-std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec,
-                                               StringRef &Res) const {
+Expected<StringRef> WasmObjectFile::getSectionName(DataRefImpl Sec) const {
   const WasmSection &S = Sections[Sec.d.a];
 #define ECase(X)                                                               \
   case wasm::WASM_SEC_##X:                                                     \
-    Res = #X;                                                                  \
-    break
+    return #X;
   switch (S.Type) {
     ECase(TYPE);
     ECase(IMPORT);
@@ -1285,14 +1407,13 @@ std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec,
     ECase(ELEM);
     ECase(CODE);
     ECase(DATA);
+    ECase(DATACOUNT);
   case wasm::WASM_SEC_CUSTOM:
-    Res = S.Name;
-    break;
+    return S.Name;
   default:
-    return object_error::invalid_section_index;
+    return createStringError(object_error::invalid_section_index, "");
   }
 #undef ECase
-  return std::error_code();
 }
 
 uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const { return 0; }
@@ -1306,14 +1427,12 @@ uint64_t WasmObjectFile::getSectionSize(DataRefImpl Sec) const {
   return S.Content.size();
 }
 
-std::error_code WasmObjectFile::getSectionContents(DataRefImpl Sec,
-                                                   StringRef &Res) const {
+Expected<ArrayRef<uint8_t>>
+WasmObjectFile::getSectionContents(DataRefImpl Sec) const {
   const WasmSection &S = Sections[Sec.d.a];
   // This will never fail since wasm sections can never be empty (user-sections
   // must have a name and non-user sections each have a defined structure).
-  Res = StringRef(reinterpret_cast<const char *>(S.Content.data()),
-                  S.Content.size());
-  return std::error_code();
+  return S.Content;
 }
 
 uint64_t WasmObjectFile::getSectionAlignment(DataRefImpl Sec) const {
@@ -1362,11 +1481,11 @@ uint64_t WasmObjectFile::getRelocationOffset(DataRefImpl Ref) const {
 
 symbol_iterator WasmObjectFile::getRelocationSymbol(DataRefImpl Ref) const {
   const wasm::WasmRelocation &Rel = getWasmRelocation(Ref);
-  if (Rel.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB)
+  if (Rel.Type == wasm::R_WASM_TYPE_INDEX_LEB)
     return symbol_end();
   DataRefImpl Sym;
-  Sym.d.a = Rel.Index;
-  Sym.d.b = 0;
+  Sym.d.a = 1;
+  Sym.d.b = Rel.Index;
   return symbol_iterator(SymbolRef(Sym, this));
 }
 
@@ -1453,7 +1572,8 @@ int WasmSectionOrderChecker::getSectionOrder(unsigned ID,
         .StartsWith("reloc.", WASM_SEC_ORDER_RELOC)
         .Case("name", WASM_SEC_ORDER_NAME)
         .Case("producers", WASM_SEC_ORDER_PRODUCERS)
-        .Default(-1);
+        .Case("target_features", WASM_SEC_ORDER_TARGET_FEATURES)
+        .Default(WASM_SEC_ORDER_NONE);
   case wasm::WASM_SEC_TYPE:
     return WASM_SEC_ORDER_TYPE;
   case wasm::WASM_SEC_IMPORT:
@@ -1481,19 +1601,73 @@ int WasmSectionOrderChecker::getSectionOrder(unsigned ID,
   case wasm::WASM_SEC_EVENT:
     return WASM_SEC_ORDER_EVENT;
   default:
-    llvm_unreachable("invalid section");
+    return WASM_SEC_ORDER_NONE;
   }
 }
 
+// Represents the edges in a directed graph where any node B reachable from node
+// A is not allowed to appear before A in the section ordering, but may appear
+// afterward.
+int WasmSectionOrderChecker::DisallowedPredecessors[WASM_NUM_SEC_ORDERS][WASM_NUM_SEC_ORDERS] = {
+  {}, // WASM_SEC_ORDER_NONE
+  {WASM_SEC_ORDER_TYPE, WASM_SEC_ORDER_IMPORT}, // WASM_SEC_ORDER_TYPE,
+  {WASM_SEC_ORDER_IMPORT, WASM_SEC_ORDER_FUNCTION}, // WASM_SEC_ORDER_IMPORT,
+  {WASM_SEC_ORDER_FUNCTION, WASM_SEC_ORDER_TABLE}, // WASM_SEC_ORDER_FUNCTION,
+  {WASM_SEC_ORDER_TABLE, WASM_SEC_ORDER_MEMORY}, // WASM_SEC_ORDER_TABLE,
+  {WASM_SEC_ORDER_MEMORY, WASM_SEC_ORDER_GLOBAL}, // WASM_SEC_ORDER_MEMORY,
+  {WASM_SEC_ORDER_GLOBAL, WASM_SEC_ORDER_EVENT}, // WASM_SEC_ORDER_GLOBAL,
+  {WASM_SEC_ORDER_EVENT, WASM_SEC_ORDER_EXPORT}, // WASM_SEC_ORDER_EVENT,
+  {WASM_SEC_ORDER_EXPORT, WASM_SEC_ORDER_START}, // WASM_SEC_ORDER_EXPORT,
+  {WASM_SEC_ORDER_START, WASM_SEC_ORDER_ELEM}, // WASM_SEC_ORDER_START,
+  {WASM_SEC_ORDER_ELEM, WASM_SEC_ORDER_DATACOUNT}, // WASM_SEC_ORDER_ELEM,
+  {WASM_SEC_ORDER_DATACOUNT, WASM_SEC_ORDER_CODE}, // WASM_SEC_ORDER_DATACOUNT,
+  {WASM_SEC_ORDER_CODE, WASM_SEC_ORDER_DATA}, // WASM_SEC_ORDER_CODE,
+  {WASM_SEC_ORDER_DATA, WASM_SEC_ORDER_LINKING}, // WASM_SEC_ORDER_DATA,
+
+  // Custom Sections
+  {WASM_SEC_ORDER_DYLINK, WASM_SEC_ORDER_TYPE}, // WASM_SEC_ORDER_DYLINK,
+  {WASM_SEC_ORDER_LINKING, WASM_SEC_ORDER_RELOC, WASM_SEC_ORDER_NAME}, // WASM_SEC_ORDER_LINKING,
+  {}, // WASM_SEC_ORDER_RELOC (can be repeated),
+  {WASM_SEC_ORDER_NAME, WASM_SEC_ORDER_PRODUCERS}, // WASM_SEC_ORDER_NAME,
+  {WASM_SEC_ORDER_PRODUCERS, WASM_SEC_ORDER_TARGET_FEATURES}, // WASM_SEC_ORDER_PRODUCERS,
+  {WASM_SEC_ORDER_TARGET_FEATURES}  // WASM_SEC_ORDER_TARGET_FEATURES
+};
+
 bool WasmSectionOrderChecker::isValidSectionOrder(unsigned ID,
                                                   StringRef CustomSectionName) {
   int Order = getSectionOrder(ID, CustomSectionName);
-  if (Order == -1) // Skip unknown sections
+  if (Order == WASM_SEC_ORDER_NONE)
     return true;
-  // There can be multiple "reloc." sections. Otherwise there shouldn't be any
-  // duplicate section orders.
-  bool IsValid = (LastOrder == Order && Order == WASM_SEC_ORDER_RELOC) ||
-                 LastOrder < Order;
-  LastOrder = Order;
-  return IsValid;
+
+  // Disallowed predecessors we need to check for
+  SmallVector<int, WASM_NUM_SEC_ORDERS> WorkList;
+
+  // Keep track of completed checks to avoid repeating work
+  bool Checked[WASM_NUM_SEC_ORDERS] = {};
+
+  int Curr = Order;
+  while (true) {
+    // Add new disallowed predecessors to work list
+    for (size_t I = 0;; ++I) {
+      int Next = DisallowedPredecessors[Curr][I];
+      if (Next == WASM_SEC_ORDER_NONE)
+        break;
+      if (Checked[Next])
+        continue;
+      WorkList.push_back(Next);
+      Checked[Next] = true;
+    }
+
+    if (WorkList.empty())
+      break;
+
+    // Consider next disallowed predecessor
+    Curr = WorkList.pop_back_val();
+    if (Seen[Curr])
+      return false;
+  }
+
+  // Have not seen any disallowed predecessors
+  Seen[Order] = true;
+  return true;
 }
diff --git a/lib/Object/WindowsMachineFlag.cpp b/lib/Object/WindowsMachineFlag.cpp
new file mode 100644
index 000000000000..f7f2b20ae1a2
--- /dev/null
+++ b/lib/Object/WindowsMachineFlag.cpp
@@ -0,0 +1,44 @@
+//===- WindowsMachineFlag.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Functions for implementing the /machine: flag.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/WindowsMachineFlag.h"
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/COFF.h"
+
+using namespace llvm;
+
+// Returns /machine's value.
+COFF::MachineTypes llvm::getMachineType(StringRef S) {
+  return StringSwitch<COFF::MachineTypes>(S.lower())
+      .Cases("x64", "amd64", COFF::IMAGE_FILE_MACHINE_AMD64)
+      .Cases("x86", "i386", COFF::IMAGE_FILE_MACHINE_I386)
+      .Case("arm", COFF::IMAGE_FILE_MACHINE_ARMNT)
+      .Case("arm64", COFF::IMAGE_FILE_MACHINE_ARM64)
+      .Default(COFF::IMAGE_FILE_MACHINE_UNKNOWN);
+}
+
+StringRef llvm::machineToStr(COFF::MachineTypes MT) {
+  switch (MT) {
+  case COFF::IMAGE_FILE_MACHINE_ARMNT:
+    return "arm";
+  case COFF::IMAGE_FILE_MACHINE_ARM64:
+    return "arm64";
+  case COFF::IMAGE_FILE_MACHINE_AMD64:
+    return "x64";
+  case COFF::IMAGE_FILE_MACHINE_I386:
+    return "x86";
+  default:
+    llvm_unreachable("unknown machine type");
+  }
+}
diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp
index 65413dd8bea1..d76e1231684c 100644
--- a/lib/Object/WindowsResource.cpp
+++ b/lib/Object/WindowsResource.cpp
@@ -1,9 +1,8 @@
 //===-- WindowsResource.cpp -------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,6 +15,7 @@
 #include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ScopedPrinter.h"
 #include <ctime>
 #include <queue>
 #include <system_error>
@@ -46,11 +46,12 @@ WindowsResource::WindowsResource(MemoryBufferRef Source)
                          support::little);
 }
 
+// static
 Expected<std::unique_ptr<WindowsResource>>
 WindowsResource::createWindowsResource(MemoryBufferRef Source) {
   if (Source.getBufferSize() < WIN_RES_MAGIC_SIZE + WIN_RES_NULL_ENTRY_SIZE)
     return make_error<GenericBinaryError>(
-        "File too small to be a resource file",
+        Source.getBufferIdentifier() + ": too small to be a resource file",
         object_error::invalid_file_type);
   std::unique_ptr<WindowsResource> Ret(new WindowsResource(Source));
   return std::move(Ret);
@@ -58,14 +59,14 @@ WindowsResource::createWindowsResource(MemoryBufferRef Source) {
 
 Expected<ResourceEntryRef> WindowsResource::getHeadEntry() {
   if (BBS.getLength() < sizeof(WinResHeaderPrefix) + sizeof(WinResHeaderSuffix))
-    return make_error<EmptyResError>(".res contains no entries",
+    return make_error<EmptyResError>(getFileName() + " contains no entries",
                                      object_error::unexpected_eof);
   return ResourceEntryRef::create(BinaryStreamRef(BBS), this);
 }
 
 ResourceEntryRef::ResourceEntryRef(BinaryStreamRef Ref,
                                    const WindowsResource *Owner)
-    : Reader(Ref) {}
+    : Reader(Ref), Owner(Owner) {}
 
 Expected<ResourceEntryRef>
 ResourceEntryRef::create(BinaryStreamRef BSR, const WindowsResource *Owner) {
@@ -108,7 +109,8 @@ Error ResourceEntryRef::loadNext() {
   RETURN_IF_ERROR(Reader.readObject(Prefix));
 
   if (Prefix->HeaderSize < MIN_HEADER_SIZE)
-    return make_error<GenericBinaryError>("Header size is too small.",
+    return make_error<GenericBinaryError>(Owner->getFileName() +
+                                              ": header size too small",
                                           object_error::parse_failed);
 
   RETURN_IF_ERROR(readStringOrId(Reader, TypeID, Type, IsStringType));
@@ -128,7 +130,78 @@ Error ResourceEntryRef::loadNext() {
 
 WindowsResourceParser::WindowsResourceParser() : Root(false) {}
 
-Error WindowsResourceParser::parse(WindowsResource *WR) {
+void printResourceTypeName(uint16_t TypeID, raw_ostream &OS) {
+  switch (TypeID) {
+  case  1: OS << "CURSOR (ID 1)"; break;
+  case  2: OS << "BITMAP (ID 2)"; break;
+  case  3: OS << "ICON (ID 3)"; break;
+  case  4: OS << "MENU (ID 4)"; break;
+  case  5: OS << "DIALOG (ID 5)"; break;
+  case  6: OS << "STRINGTABLE (ID 6)"; break;
+  case  7: OS << "FONTDIR (ID 7)"; break;
+  case  8: OS << "FONT (ID 8)"; break;
+  case  9: OS << "ACCELERATOR (ID 9)"; break;
+  case 10: OS << "RCDATA (ID 10)"; break;
+  case 11: OS << "MESSAGETABLE (ID 11)"; break;
+  case 12: OS << "GROUP_CURSOR (ID 12)"; break;
+  case 14: OS << "GROUP_ICON (ID 14)"; break;
+  case 16: OS << "VERSIONINFO (ID 16)"; break;
+  case 17: OS << "DLGINCLUDE (ID 17)"; break;
+  case 19: OS << "PLUGPLAY (ID 19)"; break;
+  case 20: OS << "VXD (ID 20)"; break;
+  case 21: OS << "ANICURSOR (ID 21)"; break;
+  case 22: OS << "ANIICON (ID 22)"; break;
+  case 23: OS << "HTML (ID 23)"; break;
+  case 24: OS << "MANIFEST (ID 24)"; break;
+  default: OS << "ID " << TypeID; break;
+  }
+}
+
+static bool convertUTF16LEToUTF8String(ArrayRef<UTF16> Src, std::string &Out) {
+  if (!sys::IsBigEndianHost)
+    return convertUTF16ToUTF8String(Src, Out);
+
+  std::vector<UTF16> EndianCorrectedSrc;
+  EndianCorrectedSrc.resize(Src.size() + 1);
+  llvm::copy(Src, EndianCorrectedSrc.begin() + 1);
+  EndianCorrectedSrc[0] = UNI_UTF16_BYTE_ORDER_MARK_SWAPPED;
+  return convertUTF16ToUTF8String(makeArrayRef(EndianCorrectedSrc), Out);
+}
+
+static std::string makeDuplicateResourceError(
+    const ResourceEntryRef &Entry, StringRef File1, StringRef File2) {
+  std::string Ret;
+  raw_string_ostream OS(Ret);
+
+  OS << "duplicate resource:";
+
+  OS << " type ";
+  if (Entry.checkTypeString()) {
+    std::string UTF8;
+    if (!convertUTF16LEToUTF8String(Entry.getTypeString(), UTF8))
+      UTF8 = "(failed conversion from UTF16)";
+    OS << '\"' << UTF8 << '\"';
+  } else
+    printResourceTypeName(Entry.getTypeID(), OS);
+
+  OS << "/name ";
+  if (Entry.checkNameString()) {
+    std::string UTF8;
+    if (!convertUTF16LEToUTF8String(Entry.getNameString(), UTF8))
+      UTF8 = "(failed conversion from UTF16)";
+    OS << '\"' << UTF8 << '\"';
+  } else {
+    OS << "ID " << Entry.getNameID();
+  }
+
+  OS << "/language " << Entry.getLanguage() << ", in " << File1 << " and in "
+     << File2;
+
+  return OS.str();
+}
+
+Error WindowsResourceParser::parse(WindowsResource *WR,
+                                   std::vector<std::string> &Duplicates) {
   auto EntryOrErr = WR->getHeadEntry();
   if (!EntryOrErr) {
     auto E = EntryOrErr.takeError();
@@ -153,7 +226,14 @@ Error WindowsResourceParser::parse(WindowsResource *WR) {
     bool IsNewTypeString = false;
     bool IsNewNameString = false;
 
-    Root.addEntry(Entry, IsNewTypeString, IsNewNameString);
+    TreeNode* Node;
+    bool IsNewNode = Root.addEntry(Entry, InputFilenames.size(),
+                                   IsNewTypeString, IsNewNameString, Node);
+    InputFilenames.push_back(WR->getFileName());
+    if (!IsNewNode) {
+      Duplicates.push_back(makeDuplicateResourceError(
+          Entry, InputFilenames[Node->Origin], WR->getFileName()));
+    }
 
     if (IsNewTypeString)
       StringTable.push_back(Entry.getTypeString());
@@ -172,12 +252,14 @@ void WindowsResourceParser::printTree(raw_ostream &OS) const {
   Root.print(Writer, "Resource Tree");
 }
 
-void WindowsResourceParser::TreeNode::addEntry(const ResourceEntryRef &Entry,
+bool WindowsResourceParser::TreeNode::addEntry(const ResourceEntryRef &Entry,
+                                               uint32_t Origin,
                                                bool &IsNewTypeString,
-                                               bool &IsNewNameString) {
+                                               bool &IsNewNameString,
+                                               TreeNode *&Result) {
   TreeNode &TypeNode = addTypeNode(Entry, IsNewTypeString);
   TreeNode &NameNode = TypeNode.addNameNode(Entry, IsNewNameString);
-  NameNode.addLanguageNode(Entry);
+  return NameNode.addLanguageNode(Entry, Origin, Result);
 }
 
 WindowsResourceParser::TreeNode::TreeNode(bool IsStringNode) {
@@ -187,10 +269,11 @@ WindowsResourceParser::TreeNode::TreeNode(bool IsStringNode) {
 
 WindowsResourceParser::TreeNode::TreeNode(uint16_t MajorVersion,
                                           uint16_t MinorVersion,
-                                          uint32_t Characteristics)
+                                          uint32_t Characteristics,
+                                          uint32_t Origin)
     : IsDataNode(true), MajorVersion(MajorVersion), MinorVersion(MinorVersion),
-      Characteristics(Characteristics) {
-    DataIndex = DataCount++;
+      Characteristics(Characteristics), Origin(Origin) {
+  DataIndex = DataCount++;
 }
 
 std::unique_ptr<WindowsResourceParser::TreeNode>
@@ -206,44 +289,52 @@ WindowsResourceParser::TreeNode::createIDNode() {
 std::unique_ptr<WindowsResourceParser::TreeNode>
 WindowsResourceParser::TreeNode::createDataNode(uint16_t MajorVersion,
                                                 uint16_t MinorVersion,
-                                                uint32_t Characteristics) {
+                                                uint32_t Characteristics,
+                                                uint32_t Origin) {
   return std::unique_ptr<TreeNode>(
-      new TreeNode(MajorVersion, MinorVersion, Characteristics));
+      new TreeNode(MajorVersion, MinorVersion, Characteristics, Origin));
 }
 
 WindowsResourceParser::TreeNode &
 WindowsResourceParser::TreeNode::addTypeNode(const ResourceEntryRef &Entry,
                                              bool &IsNewTypeString) {
   if (Entry.checkTypeString())
-    return addChild(Entry.getTypeString(), IsNewTypeString);
+    return addNameChild(Entry.getTypeString(), IsNewTypeString);
   else
-    return addChild(Entry.getTypeID());
+    return addIDChild(Entry.getTypeID());
 }
 
 WindowsResourceParser::TreeNode &
 WindowsResourceParser::TreeNode::addNameNode(const ResourceEntryRef &Entry,
                                              bool &IsNewNameString) {
   if (Entry.checkNameString())
-    return addChild(Entry.getNameString(), IsNewNameString);
+    return addNameChild(Entry.getNameString(), IsNewNameString);
   else
-    return addChild(Entry.getNameID());
+    return addIDChild(Entry.getNameID());
 }
 
-WindowsResourceParser::TreeNode &
-WindowsResourceParser::TreeNode::addLanguageNode(
-    const ResourceEntryRef &Entry) {
-  return addChild(Entry.getLanguage(), true, Entry.getMajorVersion(),
-                  Entry.getMinorVersion(), Entry.getCharacteristics());
+bool WindowsResourceParser::TreeNode::addLanguageNode(
+    const ResourceEntryRef &Entry, uint32_t Origin, TreeNode *&Result) {
+  return addDataChild(Entry.getLanguage(), Entry.getMajorVersion(),
+                      Entry.getMinorVersion(), Entry.getCharacteristics(),
+                      Origin, Result);
 }
 
-WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addChild(
-    uint32_t ID, bool IsDataNode, uint16_t MajorVersion, uint16_t MinorVersion,
-    uint32_t Characteristics) {
+bool WindowsResourceParser::TreeNode::addDataChild(
+    uint32_t ID, uint16_t MajorVersion, uint16_t MinorVersion,
+    uint32_t Characteristics, uint32_t Origin, TreeNode *&Result) {
+  auto NewChild =
+      createDataNode(MajorVersion, MinorVersion, Characteristics, Origin);
+  auto ElementInserted = IDChildren.emplace(ID, std::move(NewChild));
+  Result = ElementInserted.first->second.get();
+  return ElementInserted.second;
+}
+
+WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addIDChild(
+    uint32_t ID) {
   auto Child = IDChildren.find(ID);
   if (Child == IDChildren.end()) {
-    auto NewChild =
-        IsDataNode ? createDataNode(MajorVersion, MinorVersion, Characteristics)
-                   : createIDNode();
+    auto NewChild = createIDNode();
     WindowsResourceParser::TreeNode &Node = *NewChild;
     IDChildren.emplace(ID, std::move(NewChild));
     return Node;
@@ -252,19 +343,10 @@ WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addChild(
 }
 
 WindowsResourceParser::TreeNode &
-WindowsResourceParser::TreeNode::addChild(ArrayRef<UTF16> NameRef,
-                                          bool &IsNewString) {
+WindowsResourceParser::TreeNode::addNameChild(ArrayRef<UTF16> NameRef,
+                                              bool &IsNewString) {
   std::string NameString;
-  ArrayRef<UTF16> CorrectedName;
-  std::vector<UTF16> EndianCorrectedName;
-  if (sys::IsBigEndianHost) {
-    EndianCorrectedName.resize(NameRef.size() + 1);
-    llvm::copy(NameRef, EndianCorrectedName.begin() + 1);
-    EndianCorrectedName[0] = UNI_UTF16_BYTE_ORDER_MARK_SWAPPED;
-    CorrectedName = makeArrayRef(EndianCorrectedName);
-  } else
-    CorrectedName = NameRef;
-  convertUTF16ToUTF8String(CorrectedName, NameString);
+  convertUTF16LEToUTF8String(NameRef, NameString);
 
   auto Child = StringChildren.find(NameString);
   if (Child == StringChildren.end()) {
@@ -318,13 +400,13 @@ class WindowsResourceCOFFWriter {
 public:
   WindowsResourceCOFFWriter(COFF::MachineTypes MachineType,
                             const WindowsResourceParser &Parser, Error &E);
-  std::unique_ptr<MemoryBuffer> write();
+  std::unique_ptr<MemoryBuffer> write(uint32_t TimeDateStamp);
 
 private:
   void performFileLayout();
   void performSectionOneLayout();
   void performSectionTwoLayout();
-  void writeCOFFHeader();
+  void writeCOFFHeader(uint32_t TimeDateStamp);
   void writeFirstSectionHeader();
   void writeSecondSectionHeader();
   void writeFirstSection();
@@ -360,7 +442,8 @@ WindowsResourceCOFFWriter::WindowsResourceCOFFWriter(
       Data(Parser.getData()), StringTable(Parser.getStringTable()) {
   performFileLayout();
 
-  OutputBuffer = WritableMemoryBuffer::getNewMemBuffer(FileSize);
+  OutputBuffer = WritableMemoryBuffer::getNewMemBuffer(
+      FileSize, "internal .obj file created from .res files");
 }
 
 void WindowsResourceCOFFWriter::performFileLayout() {
@@ -417,17 +500,11 @@ void WindowsResourceCOFFWriter::performSectionTwoLayout() {
   FileSize = alignTo(FileSize, SECTION_ALIGNMENT);
 }
 
-static std::time_t getTime() {
-  std::time_t Now = time(nullptr);
-  if (Now < 0 || !isUInt<32>(Now))
-    return UINT32_MAX;
-  return Now;
-}
-
-std::unique_ptr<MemoryBuffer> WindowsResourceCOFFWriter::write() {
+std::unique_ptr<MemoryBuffer>
+WindowsResourceCOFFWriter::write(uint32_t TimeDateStamp) {
   BufferStart = OutputBuffer->getBufferStart();
 
-  writeCOFFHeader();
+  writeCOFFHeader(TimeDateStamp);
   writeFirstSectionHeader();
   writeSecondSectionHeader();
   writeFirstSection();
@@ -438,16 +515,17 @@ std::unique_ptr<MemoryBuffer> WindowsResourceCOFFWriter::write() {
   return std::move(OutputBuffer);
 }
 
-void WindowsResourceCOFFWriter::writeCOFFHeader() {
+void WindowsResourceCOFFWriter::writeCOFFHeader(uint32_t TimeDateStamp) {
   // Write the COFF header.
   auto *Header = reinterpret_cast<coff_file_header *>(BufferStart);
   Header->Machine = MachineType;
   Header->NumberOfSections = 2;
-  Header->TimeDateStamp = getTime();
+  Header->TimeDateStamp = TimeDateStamp;
   Header->PointerToSymbolTable = SymbolTableOffset;
-  // One symbol for every resource plus 2 for each section and @feat.00
+  // One symbol for every resource plus 2 for each section and 1 for @feat.00
   Header->NumberOfSymbols = Data.size() + 5;
   Header->SizeOfOptionalHeader = 0;
+  // cvtres.exe sets 32BIT_MACHINE even for 64-bit machine types. Match it.
   Header->Characteristics = COFF::IMAGE_FILE_32BIT_MACHINE;
 }
 
@@ -712,12 +790,13 @@ void WindowsResourceCOFFWriter::writeFirstSectionRelocations() {
 
 Expected<std::unique_ptr<MemoryBuffer>>
 writeWindowsResourceCOFF(COFF::MachineTypes MachineType,
-                         const WindowsResourceParser &Parser) {
+                         const WindowsResourceParser &Parser,
+                         uint32_t TimeDateStamp) {
   Error E = Error::success();
   WindowsResourceCOFFWriter Writer(MachineType, Parser, E);
   if (E)
     return std::move(E);
-  return Writer.write();
+  return Writer.write(TimeDateStamp);
 }
 
 } // namespace object
diff --git a/lib/Object/XCOFFObjectFile.cpp b/lib/Object/XCOFFObjectFile.cpp
new file mode 100644
index 000000000000..602b7357986a
--- /dev/null
+++ b/lib/Object/XCOFFObjectFile.cpp
@@ -0,0 +1,584 @@
+//===--- XCOFFObjectFile.cpp - XCOFF object file implementation -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the XCOFFObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/XCOFFObjectFile.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <cstddef>
+#include <cstring>
+
+namespace llvm {
+namespace object {
+
+// Checks that [Ptr, Ptr + Size) bytes fall inside the memory buffer
+// 'M'. Returns a pointer to the underlying object on success.
+template <typename T>
+static Expected<const T *> getObject(MemoryBufferRef M, const void *Ptr,
+                                     const uint64_t Size = sizeof(T)) {
+  uintptr_t Addr = uintptr_t(Ptr);
+  if (std::error_code EC = Binary::checkOffset(M, Addr, Size))
+    return errorCodeToError(EC);
+  return reinterpret_cast<const T *>(Addr);
+}
+
+static uintptr_t getWithOffset(uintptr_t Base, ptrdiff_t Offset) {
+  return reinterpret_cast<uintptr_t>(reinterpret_cast<const char *>(Base) +
+                                     Offset);
+}
+
+template <typename T> static const T *viewAs(uintptr_t in) {
+  return reinterpret_cast<const T *>(in);
+}
+
+static StringRef generateStringRef(const char *Name, uint64_t Size) {
+  auto NulCharPtr = static_cast<const char *>(memchr(Name, '\0', Size));
+  return NulCharPtr ? StringRef(Name, NulCharPtr - Name)
+                    : StringRef(Name, Size);
+}
+
+void XCOFFObjectFile::checkSectionAddress(uintptr_t Addr,
+                                          uintptr_t TableAddress) const {
+  if (Addr < TableAddress)
+    report_fatal_error("Section header outside of section header table.");
+
+  uintptr_t Offset = Addr - TableAddress;
+  if (Offset >= getSectionHeaderSize() * getNumberOfSections())
+    report_fatal_error("Section header outside of section header table.");
+
+  if (Offset % getSectionHeaderSize() != 0)
+    report_fatal_error(
+        "Section header pointer does not point to a valid section header.");
+}
+
+const XCOFFSectionHeader32 *
+XCOFFObjectFile::toSection32(DataRefImpl Ref) const {
+  assert(!is64Bit() && "32-bit interface called on 64-bit object file.");
+#ifndef NDEBUG
+  checkSectionAddress(Ref.p, getSectionHeaderTableAddress());
+#endif
+  return viewAs<XCOFFSectionHeader32>(Ref.p);
+}
+
+const XCOFFSectionHeader64 *
+XCOFFObjectFile::toSection64(DataRefImpl Ref) const {
+  assert(is64Bit() && "64-bit interface called on a 32-bit object file.");
+#ifndef NDEBUG
+  checkSectionAddress(Ref.p, getSectionHeaderTableAddress());
+#endif
+  return viewAs<XCOFFSectionHeader64>(Ref.p);
+}
+
+const XCOFFSymbolEntry *XCOFFObjectFile::toSymbolEntry(DataRefImpl Ref) const {
+  assert(!is64Bit() && "Symbol table support not implemented for 64-bit.");
+  assert(Ref.p != 0 && "Symbol table pointer can not be nullptr!");
+  auto SymEntPtr = viewAs<XCOFFSymbolEntry>(Ref.p);
+  return SymEntPtr;
+}
+
+const XCOFFFileHeader32 *XCOFFObjectFile::fileHeader32() const {
+  assert(!is64Bit() && "32-bit interface called on 64-bit object file.");
+  return static_cast<const XCOFFFileHeader32 *>(FileHeader);
+}
+
+const XCOFFFileHeader64 *XCOFFObjectFile::fileHeader64() const {
+  assert(is64Bit() && "64-bit interface called on a 32-bit object file.");
+  return static_cast<const XCOFFFileHeader64 *>(FileHeader);
+}
+
+const XCOFFSectionHeader32 *
+XCOFFObjectFile::sectionHeaderTable32() const {
+  assert(!is64Bit() && "32-bit interface called on 64-bit object file.");
+  return static_cast<const XCOFFSectionHeader32 *>(SectionHeaderTable);
+}
+
+const XCOFFSectionHeader64 *
+XCOFFObjectFile::sectionHeaderTable64() const {
+  assert(is64Bit() && "64-bit interface called on a 32-bit object file.");
+  return static_cast<const XCOFFSectionHeader64 *>(SectionHeaderTable);
+}
+
+void XCOFFObjectFile::moveSymbolNext(DataRefImpl &Symb) const {
+  const XCOFFSymbolEntry *SymEntPtr = toSymbolEntry(Symb);
+  SymEntPtr += SymEntPtr->NumberOfAuxEntries + 1;
+  Symb.p = reinterpret_cast<uintptr_t>(SymEntPtr);
+}
+
+Expected<StringRef> XCOFFObjectFile::getSymbolName(DataRefImpl Symb) const {
+  const XCOFFSymbolEntry *SymEntPtr = toSymbolEntry(Symb);
+
+  if (SymEntPtr->NameInStrTbl.Magic != XCOFFSymbolEntry::NAME_IN_STR_TBL_MAGIC)
+    return generateStringRef(SymEntPtr->SymbolName, XCOFF::SymbolNameSize);
+
+  // A storage class value with the high-order bit on indicates that the name is
+  // a symbolic debugger stabstring.
+  if (SymEntPtr->StorageClass & 0x80)
+    return StringRef("Unimplemented Debug Name");
+
+  uint32_t Offset = SymEntPtr->NameInStrTbl.Offset;
+  // The byte offset is relative to the start of the string table
+  // or .debug section. A byte offset value of 0 is a null or zero-length symbol
+  // name. A byte offset in the range 1 to 3 (inclusive) points into the length
+  // field; as a soft-error recovery mechanism, we treat such cases as having an
+  // offset of 0.
+  if (Offset < 4)
+    return StringRef(nullptr, 0);
+
+  if (StringTable.Data != nullptr && StringTable.Size > Offset)
+    return (StringTable.Data + Offset);
+
+  return make_error<GenericBinaryError>("Symbol Name parse failed",
+                                        object_error::parse_failed);
+}
+
+Expected<uint64_t> XCOFFObjectFile::getSymbolAddress(DataRefImpl Symb) const {
+  uint64_t Result = 0;
+  llvm_unreachable("Not yet implemented!");
+  return Result;
+}
+
+uint64_t XCOFFObjectFile::getSymbolValueImpl(DataRefImpl Symb) const {
+  return toSymbolEntry(Symb)->Value;
+}
+
+uint64_t XCOFFObjectFile::getCommonSymbolSizeImpl(DataRefImpl Symb) const {
+  uint64_t Result = 0;
+  llvm_unreachable("Not yet implemented!");
+  return Result;
+}
+
+Expected<SymbolRef::Type>
+XCOFFObjectFile::getSymbolType(DataRefImpl Symb) const {
+  llvm_unreachable("Not yet implemented!");
+  return SymbolRef::ST_Other;
+}
+
+Expected<section_iterator>
+XCOFFObjectFile::getSymbolSection(DataRefImpl Symb) const {
+  const XCOFFSymbolEntry *SymEntPtr = toSymbolEntry(Symb);
+  int16_t SectNum = SymEntPtr->SectionNumber;
+
+  if (isReservedSectionNumber(SectNum))
+    return section_end();
+
+  Expected<DataRefImpl> ExpSec = getSectionByNum(SectNum);
+  if (!ExpSec)
+    return ExpSec.takeError();
+
+  return section_iterator(SectionRef(ExpSec.get(), this));
+}
+
+void XCOFFObjectFile::moveSectionNext(DataRefImpl &Sec) const {
+  const char *Ptr = reinterpret_cast<const char *>(Sec.p);
+  Sec.p = reinterpret_cast<uintptr_t>(Ptr + getSectionHeaderSize());
+}
+
+Expected<StringRef> XCOFFObjectFile::getSectionName(DataRefImpl Sec) const {
+  return generateStringRef(getSectionNameInternal(Sec), XCOFF::SectionNameSize);
+}
+
+uint64_t XCOFFObjectFile::getSectionAddress(DataRefImpl Sec) const {
+  // Avoid ternary due to failure to convert the ubig32_t value to a unit64_t
+  // with MSVC.
+  if (is64Bit())
+    return toSection64(Sec)->VirtualAddress;
+
+  return toSection32(Sec)->VirtualAddress;
+}
+
+uint64_t XCOFFObjectFile::getSectionIndex(DataRefImpl Sec) const {
+  // Section numbers in XCOFF are numbered beginning at 1. A section number of
+  // zero is used to indicate that a symbol is being imported or is undefined.
+  if (is64Bit())
+    return toSection64(Sec) - sectionHeaderTable64() + 1;
+  else
+    return toSection32(Sec) - sectionHeaderTable32() + 1;
+}
+
+uint64_t XCOFFObjectFile::getSectionSize(DataRefImpl Sec) const {
+  // Avoid ternary due to failure to convert the ubig32_t value to a unit64_t
+  // with MSVC.
+  if (is64Bit())
+    return toSection64(Sec)->SectionSize;
+
+  return toSection32(Sec)->SectionSize;
+}
+
+Expected<ArrayRef<uint8_t>>
+XCOFFObjectFile::getSectionContents(DataRefImpl Sec) const {
+  llvm_unreachable("Not yet implemented!");
+}
+
+uint64_t XCOFFObjectFile::getSectionAlignment(DataRefImpl Sec) const {
+  uint64_t Result = 0;
+  llvm_unreachable("Not yet implemented!");
+  return Result;
+}
+
+bool XCOFFObjectFile::isSectionCompressed(DataRefImpl Sec) const {
+  bool Result = false;
+  llvm_unreachable("Not yet implemented!");
+  return Result;
+}
+
+bool XCOFFObjectFile::isSectionText(DataRefImpl Sec) const {
+  return getSectionFlags(Sec) & XCOFF::STYP_TEXT;
+}
+
+bool XCOFFObjectFile::isSectionData(DataRefImpl Sec) const {
+  uint32_t Flags = getSectionFlags(Sec);
+  return Flags & (XCOFF::STYP_DATA | XCOFF::STYP_TDATA);
+}
+
+bool XCOFFObjectFile::isSectionBSS(DataRefImpl Sec) const {
+  uint32_t Flags = getSectionFlags(Sec);
+  return Flags & (XCOFF::STYP_BSS | XCOFF::STYP_TBSS);
+}
+
+bool XCOFFObjectFile::isSectionVirtual(DataRefImpl Sec) const {
+  bool Result = false;
+  llvm_unreachable("Not yet implemented!");
+  return Result;
+}
+
+relocation_iterator XCOFFObjectFile::section_rel_begin(DataRefImpl Sec) const {
+  llvm_unreachable("Not yet implemented!");
+  return relocation_iterator(RelocationRef());
+}
+
+relocation_iterator XCOFFObjectFile::section_rel_end(DataRefImpl Sec) const {
+  llvm_unreachable("Not yet implemented!");
+  return relocation_iterator(RelocationRef());
+}
+
+void XCOFFObjectFile::moveRelocationNext(DataRefImpl &Rel) const {
+  llvm_unreachable("Not yet implemented!");
+  return;
+}
+
+uint64_t XCOFFObjectFile::getRelocationOffset(DataRefImpl Rel) const {
+  llvm_unreachable("Not yet implemented!");
+  uint64_t Result = 0;
+  return Result;
+}
+
+symbol_iterator XCOFFObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
+  llvm_unreachable("Not yet implemented!");
+  return symbol_iterator(SymbolRef());
+}
+
+uint64_t XCOFFObjectFile::getRelocationType(DataRefImpl Rel) const {
+  llvm_unreachable("Not yet implemented!");
+  uint64_t Result = 0;
+  return Result;
+}
+
+void XCOFFObjectFile::getRelocationTypeName(
+    DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
+  llvm_unreachable("Not yet implemented!");
+  return;
+}
+
+uint32_t XCOFFObjectFile::getSymbolFlags(DataRefImpl Symb) const {
+  uint32_t Result = 0;
+  llvm_unreachable("Not yet implemented!");
+  return Result;
+}
+
+basic_symbol_iterator XCOFFObjectFile::symbol_begin() const {
+  assert(!is64Bit() && "64-bit support not implemented yet.");
+  DataRefImpl SymDRI;
+  SymDRI.p = reinterpret_cast<uintptr_t>(SymbolTblPtr);
+  return basic_symbol_iterator(SymbolRef(SymDRI, this));
+}
+
+basic_symbol_iterator XCOFFObjectFile::symbol_end() const {
+  assert(!is64Bit() && "64-bit support not implemented yet.");
+  DataRefImpl SymDRI;
+  SymDRI.p = reinterpret_cast<uintptr_t>(
+      SymbolTblPtr + getLogicalNumberOfSymbolTableEntries32());
+  return basic_symbol_iterator(SymbolRef(SymDRI, this));
+}
+
+section_iterator XCOFFObjectFile::section_begin() const {
+  DataRefImpl DRI;
+  DRI.p = getSectionHeaderTableAddress();
+  return section_iterator(SectionRef(DRI, this));
+}
+
+section_iterator XCOFFObjectFile::section_end() const {
+  DataRefImpl DRI;
+  DRI.p = getWithOffset(getSectionHeaderTableAddress(),
+                        getNumberOfSections() * getSectionHeaderSize());
+  return section_iterator(SectionRef(DRI, this));
+}
+
+uint8_t XCOFFObjectFile::getBytesInAddress() const { return is64Bit() ? 8 : 4; }
+
+StringRef XCOFFObjectFile::getFileFormatName() const {
+  return is64Bit() ? "aix5coff64-rs6000" : "aixcoff-rs6000";
+}
+
+Triple::ArchType XCOFFObjectFile::getArch() const {
+  return is64Bit() ? Triple::ppc64 : Triple::ppc;
+}
+
+SubtargetFeatures XCOFFObjectFile::getFeatures() const {
+  llvm_unreachable("Not yet implemented!");
+  return SubtargetFeatures();
+}
+
+bool XCOFFObjectFile::isRelocatableObject() const {
+  bool Result = false;
+  llvm_unreachable("Not yet implemented!");
+  return Result;
+}
+
+Expected<uint64_t> XCOFFObjectFile::getStartAddress() const {
+  // TODO FIXME Should get from auxiliary_header->o_entry when support for the
+  // auxiliary_header is added.
+  return 0;
+}
+
+size_t XCOFFObjectFile::getFileHeaderSize() const {
+  return is64Bit() ? sizeof(XCOFFFileHeader64) : sizeof(XCOFFFileHeader32);
+}
+
+size_t XCOFFObjectFile::getSectionHeaderSize() const {
+  return is64Bit() ? sizeof(XCOFFSectionHeader64) :
+                     sizeof(XCOFFSectionHeader32);
+}
+
+bool XCOFFObjectFile::is64Bit() const {
+  return Binary::ID_XCOFF64 == getType();
+}
+
+uint16_t XCOFFObjectFile::getMagic() const {
+  return is64Bit() ? fileHeader64()->Magic : fileHeader32()->Magic;
+}
+
+Expected<DataRefImpl> XCOFFObjectFile::getSectionByNum(int16_t Num) const {
+  if (Num <= 0 || Num > getNumberOfSections())
+    return errorCodeToError(object_error::invalid_section_index);
+
+  DataRefImpl DRI;
+  DRI.p = getWithOffset(getSectionHeaderTableAddress(),
+                        getSectionHeaderSize() * (Num - 1));
+  return DRI;
+}
+
+Expected<StringRef>
+XCOFFObjectFile::getSymbolSectionName(const XCOFFSymbolEntry *SymEntPtr) const {
+  assert(!is64Bit() && "Symbol table support not implemented for 64-bit.");
+  int16_t SectionNum = SymEntPtr->SectionNumber;
+
+  switch (SectionNum) {
+  case XCOFF::N_DEBUG:
+    return "N_DEBUG";
+  case XCOFF::N_ABS:
+    return "N_ABS";
+  case XCOFF::N_UNDEF:
+    return "N_UNDEF";
+  default:
+    Expected<DataRefImpl> SecRef = getSectionByNum(SectionNum);
+    if (SecRef)
+      return generateStringRef(getSectionNameInternal(SecRef.get()),
+                               XCOFF::SectionNameSize);
+    return SecRef.takeError();
+  }
+}
+
+bool XCOFFObjectFile::isReservedSectionNumber(int16_t SectionNumber) {
+  return (SectionNumber <= 0 && SectionNumber >= -2);
+}
+
+uint16_t XCOFFObjectFile::getNumberOfSections() const {
+  return is64Bit() ? fileHeader64()->NumberOfSections
+                   : fileHeader32()->NumberOfSections;
+}
+
+int32_t XCOFFObjectFile::getTimeStamp() const {
+  return is64Bit() ? fileHeader64()->TimeStamp : fileHeader32()->TimeStamp;
+}
+
+uint16_t XCOFFObjectFile::getOptionalHeaderSize() const {
+  return is64Bit() ? fileHeader64()->AuxHeaderSize
+                   : fileHeader32()->AuxHeaderSize;
+}
+
+uint32_t XCOFFObjectFile::getSymbolTableOffset32() const {
+  return fileHeader32()->SymbolTableOffset;
+}
+
+int32_t XCOFFObjectFile::getRawNumberOfSymbolTableEntries32() const {
+  // As far as symbol table size is concerned, if this field is negative it is
+  // to be treated as a 0. However since this field is also used for printing we
+  // don't want to truncate any negative values.
+  return fileHeader32()->NumberOfSymTableEntries;
+}
+
+uint32_t XCOFFObjectFile::getLogicalNumberOfSymbolTableEntries32() const {
+  return (fileHeader32()->NumberOfSymTableEntries >= 0
+              ? fileHeader32()->NumberOfSymTableEntries
+              : 0);
+}
+
+uint64_t XCOFFObjectFile::getSymbolTableOffset64() const {
+  return fileHeader64()->SymbolTableOffset;
+}
+
+uint32_t XCOFFObjectFile::getNumberOfSymbolTableEntries64() const {
+  return fileHeader64()->NumberOfSymTableEntries;
+}
+
+uint16_t XCOFFObjectFile::getFlags() const {
+  return is64Bit() ? fileHeader64()->Flags : fileHeader32()->Flags;
+}
+
+const char *XCOFFObjectFile::getSectionNameInternal(DataRefImpl Sec) const {
+  return is64Bit() ? toSection64(Sec)->Name : toSection32(Sec)->Name;
+}
+
+uintptr_t XCOFFObjectFile::getSectionHeaderTableAddress() const {
+  return reinterpret_cast<uintptr_t>(SectionHeaderTable);
+}
+
+int32_t XCOFFObjectFile::getSectionFlags(DataRefImpl Sec) const {
+  return is64Bit() ? toSection64(Sec)->Flags : toSection32(Sec)->Flags;
+}
+
+XCOFFObjectFile::XCOFFObjectFile(unsigned int Type, MemoryBufferRef Object)
+    : ObjectFile(Type, Object) {
+  assert(Type == Binary::ID_XCOFF32 || Type == Binary::ID_XCOFF64);
+}
+
+ArrayRef<XCOFFSectionHeader64> XCOFFObjectFile::sections64() const {
+  assert(is64Bit() && "64-bit interface called for non 64-bit file.");
+  const XCOFFSectionHeader64 *TablePtr = sectionHeaderTable64();
+  return ArrayRef<XCOFFSectionHeader64>(TablePtr,
+                                        TablePtr + getNumberOfSections());
+}
+
+ArrayRef<XCOFFSectionHeader32> XCOFFObjectFile::sections32() const {
+  assert(!is64Bit() && "32-bit interface called for non 32-bit file.");
+  const XCOFFSectionHeader32 *TablePtr = sectionHeaderTable32();
+  return ArrayRef<XCOFFSectionHeader32>(TablePtr,
+                                        TablePtr + getNumberOfSections());
+}
+
+Expected<XCOFFStringTable>
+XCOFFObjectFile::parseStringTable(const XCOFFObjectFile *Obj, uint64_t Offset) {
+  // If there is a string table, then the buffer must contain at least 4 bytes
+  // for the string table's size. Not having a string table is not an error.
+  if (auto EC = Binary::checkOffset(
+          Obj->Data, reinterpret_cast<uintptr_t>(Obj->base() + Offset), 4))
+    return XCOFFStringTable{0, nullptr};
+
+  // Read the size out of the buffer.
+  uint32_t Size = support::endian::read32be(Obj->base() + Offset);
+
+  // If the size is less then 4, then the string table is just a size and no
+  // string data.
+  if (Size <= 4)
+    return XCOFFStringTable{4, nullptr};
+
+  auto StringTableOrErr =
+      getObject<char>(Obj->Data, Obj->base() + Offset, Size);
+  if (Error E = StringTableOrErr.takeError())
+    return std::move(E);
+
+  const char *StringTablePtr = StringTableOrErr.get();
+  if (StringTablePtr[Size - 1] != '\0')
+    return errorCodeToError(object_error::string_table_non_null_end);
+
+  return XCOFFStringTable{Size, StringTablePtr};
+}
+
+Expected<std::unique_ptr<XCOFFObjectFile>>
+XCOFFObjectFile::create(unsigned Type, MemoryBufferRef MBR) {
+  // Can't use make_unique because of the private constructor.
+  std::unique_ptr<XCOFFObjectFile> Obj;
+  Obj.reset(new XCOFFObjectFile(Type, MBR));
+
+  uint64_t CurOffset = 0;
+  const auto *Base = Obj->base();
+  MemoryBufferRef Data = Obj->Data;
+
+  // Parse file header.
+  auto FileHeaderOrErr =
+      getObject<void>(Data, Base + CurOffset, Obj->getFileHeaderSize());
+  if (Error E = FileHeaderOrErr.takeError())
+    return std::move(E);
+  Obj->FileHeader = FileHeaderOrErr.get();
+
+  CurOffset += Obj->getFileHeaderSize();
+  // TODO FIXME we don't have support for an optional header yet, so just skip
+  // past it.
+  CurOffset += Obj->getOptionalHeaderSize();
+
+  // Parse the section header table if it is present.
+  if (Obj->getNumberOfSections()) {
+    auto SecHeadersOrErr = getObject<void>(Data, Base + CurOffset,
+                                           Obj->getNumberOfSections() *
+                                               Obj->getSectionHeaderSize());
+    if (Error E = SecHeadersOrErr.takeError())
+      return std::move(E);
+    Obj->SectionHeaderTable = SecHeadersOrErr.get();
+  }
+
+  // 64-bit object supports only file header and section headers for now.
+  if (Obj->is64Bit())
+    return std::move(Obj);
+
+  // If there is no symbol table we are done parsing the memory buffer.
+  if (Obj->getLogicalNumberOfSymbolTableEntries32() == 0)
+    return std::move(Obj);
+
+  // Parse symbol table.
+  CurOffset = Obj->fileHeader32()->SymbolTableOffset;
+  uint64_t SymbolTableSize = (uint64_t)(sizeof(XCOFFSymbolEntry)) *
+                             Obj->getLogicalNumberOfSymbolTableEntries32();
+  auto SymTableOrErr =
+      getObject<XCOFFSymbolEntry>(Data, Base + CurOffset, SymbolTableSize);
+  if (Error E = SymTableOrErr.takeError())
+    return std::move(E);
+  Obj->SymbolTblPtr = SymTableOrErr.get();
+  CurOffset += SymbolTableSize;
+
+  // Parse String table.
+  Expected<XCOFFStringTable> StringTableOrErr =
+      parseStringTable(Obj.get(), CurOffset);
+  if (Error E = StringTableOrErr.takeError())
+    return std::move(E);
+  Obj->StringTable = StringTableOrErr.get();
+
+  return std::move(Obj);
+}
+
+Expected<std::unique_ptr<ObjectFile>>
+ObjectFile::createXCOFFObjectFile(MemoryBufferRef MemBufRef,
+                                  unsigned FileType) {
+  return XCOFFObjectFile::create(FileType, MemBufRef);
+}
+
+StringRef XCOFFSectionHeader32::getName() const {
+  return generateStringRef(Name, XCOFF::SectionNameSize);
+}
+
+StringRef XCOFFSectionHeader64::getName() const {
+  return generateStringRef(Name, XCOFF::SectionNameSize);
+}
+
+} // namespace object
+} // namespace llvm
diff --git a/lib/ObjectYAML/COFFYAML.cpp b/lib/ObjectYAML/COFFYAML.cpp
index fdd94f4054e1..b5154467f11a 100644
--- a/lib/ObjectYAML/COFFYAML.cpp
+++ b/lib/ObjectYAML/COFFYAML.cpp
@@ -1,9 +1,8 @@
 //===- COFFYAML.cpp - COFF YAMLIO implementation --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -188,6 +187,7 @@ void ScalarEnumerationTraits<COFF::RelocationTypesARM>::enumeration(
   ECase(IMAGE_REL_ARM_TOKEN);
   ECase(IMAGE_REL_ARM_BLX24);
   ECase(IMAGE_REL_ARM_BLX11);
+  ECase(IMAGE_REL_ARM_REL32);
   ECase(IMAGE_REL_ARM_SECTION);
   ECase(IMAGE_REL_ARM_SECREL);
   ECase(IMAGE_REL_ARM_MOV32A);
@@ -195,6 +195,7 @@ void ScalarEnumerationTraits<COFF::RelocationTypesARM>::enumeration(
   ECase(IMAGE_REL_ARM_BRANCH20T);
   ECase(IMAGE_REL_ARM_BRANCH24T);
   ECase(IMAGE_REL_ARM_BLX23T);
+  ECase(IMAGE_REL_ARM_PAIR);
 }
 
 void ScalarEnumerationTraits<COFF::RelocationTypesARM64>::enumeration(
@@ -216,6 +217,7 @@ void ScalarEnumerationTraits<COFF::RelocationTypesARM64>::enumeration(
   ECase(IMAGE_REL_ARM64_ADDR64);
   ECase(IMAGE_REL_ARM64_BRANCH19);
   ECase(IMAGE_REL_ARM64_BRANCH14);
+  ECase(IMAGE_REL_ARM64_REL32);
 }
 
 void ScalarEnumerationTraits<COFF::WindowsSubsystem>::enumeration(
@@ -576,6 +578,12 @@ void MappingTraits<COFFYAML::Section>::mapping(IO &IO, COFFYAML::Section &Sec) {
   else if (Sec.Name == ".debug$H")
     IO.mapOptional("GlobalHashes", Sec.DebugH);
 
+  // Uninitialized sections, such as .bss, typically have no data, but the size
+  // is carried in SizeOfRawData, even though PointerToRawData is zero.
+  if (Sec.SectionData.binary_size() == 0 &&
+      NC->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
+    IO.mapOptional("SizeOfRawData", Sec.Header.SizeOfRawData);
+
   IO.mapOptional("Relocations", Sec.Relocations);
 }
 
diff --git a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
index 4deeae878013..eeebb694589b 100644
--- a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
@@ -1,9 +1,8 @@
 //===- CodeViewYAMLDebugSections.cpp - CodeView YAMLIO debug sections -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
index 713e9a710e94..227107c051dd 100644
--- a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
@@ -1,9 +1,8 @@
 //===- CodeViewYAMLSymbols.cpp - CodeView YAMLIO Symbol implementation ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -148,7 +147,7 @@ void ScalarEnumerationTraits<CPUType>::enumeration(IO &io, CPUType &Cpu) {
 }
 
 void ScalarEnumerationTraits<RegisterId>::enumeration(IO &io, RegisterId &Reg) {
-  auto RegNames = getRegisterNames();
+  auto RegNames = getRegisterNames(CPUType::X64);
   for (const auto &E : RegNames) {
     io.enumCase(Reg, E.Name.str().c_str(), static_cast<RegisterId>(E.Value));
   }
@@ -249,7 +248,7 @@ struct UnknownSymbolRecord : public SymbolRecordBase {
     uint8_t *Buffer = Allocator.Allocate<uint8_t>(TotalLen);
     ::memcpy(Buffer, &Prefix, sizeof(RecordPrefix));
     ::memcpy(Buffer + sizeof(RecordPrefix), Data.data(), Data.size());
-    return CVSymbol(Kind, ArrayRef<uint8_t>(Buffer, TotalLen));
+    return CVSymbol(ArrayRef<uint8_t>(Buffer, TotalLen));
   }
 
   Error fromCodeViewSymbol(CVSymbol CVS) override {
@@ -554,6 +553,12 @@ template <> void SymbolRecordImpl<UsingNamespaceSym>::map(IO &IO) {
   IO.mapRequired("Namespace", Symbol.Name);
 }
 
+template <> void SymbolRecordImpl<AnnotationSym>::map(IO &IO) {
+  IO.mapOptional("Offset", Symbol.CodeOffset, 0U);
+  IO.mapOptional("Segment", Symbol.Segment, uint16_t(0));
+  IO.mapRequired("Strings", Symbol.Strings);
+}
+
 } // end namespace detail
 } // end namespace CodeViewYAML
 } // end namespace llvm
diff --git a/lib/ObjectYAML/CodeViewYAMLTypeHashing.cpp b/lib/ObjectYAML/CodeViewYAMLTypeHashing.cpp
index ed117059560f..e921ae1e7d8d 100644
--- a/lib/ObjectYAML/CodeViewYAMLTypeHashing.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLTypeHashing.cpp
@@ -1,9 +1,8 @@
 //===- CodeViewYAMLTypeHashing.cpp - CodeView YAMLIO type hashing ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/ObjectYAML/CodeViewYAMLTypes.cpp b/lib/ObjectYAML/CodeViewYAMLTypes.cpp
index 791b115dc492..a5e3ce1e71e8 100644
--- a/lib/ObjectYAML/CodeViewYAMLTypes.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLTypes.cpp
@@ -1,9 +1,8 @@
 //===- CodeViewYAMLTypes.cpp - CodeView YAMLIO types implementation -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -99,7 +98,7 @@ template <typename T> struct LeafRecordImpl : public LeafRecordBase {
 
   CVType toCodeViewRecord(AppendingTypeTableBuilder &TS) const override {
     TS.writeLeafType(Record);
-    return CVType(Kind, TS.records().back());
+    return CVType(TS.records().back());
   }
 
   mutable T Record;
@@ -497,7 +496,7 @@ CVType LeafRecordImpl<FieldListRecord>::toCodeViewRecord(
     Member.Member->writeTo(CRB);
   }
   TS.insertRecord(CRB);
-  return CVType(Kind, TS.records().back());
+  return CVType(TS.records().back());
 }
 
 void MappingTraits<OneMethodRecord>::mapping(IO &io, OneMethodRecord &Record) {
diff --git a/lib/ObjectYAML/DWARFEmitter.cpp b/lib/ObjectYAML/DWARFEmitter.cpp
index f23fa1237600..2ae66997cf59 100644
--- a/lib/ObjectYAML/DWARFEmitter.cpp
+++ b/lib/ObjectYAML/DWARFEmitter.cpp
@@ -1,9 +1,8 @@
 //===- DWARFEmitter - Convert YAML to DWARF binary data -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/ObjectYAML/DWARFVisitor.cpp b/lib/ObjectYAML/DWARFVisitor.cpp
index e6114c85ac0b..ecb5967ac532 100644
--- a/lib/ObjectYAML/DWARFVisitor.cpp
+++ b/lib/ObjectYAML/DWARFVisitor.cpp
@@ -1,9 +1,8 @@
 //===--- DWARFVisitor.cpp ---------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/ObjectYAML/DWARFVisitor.h b/lib/ObjectYAML/DWARFVisitor.h
index 5489031dc331..50e88aa7a26b 100644
--- a/lib/ObjectYAML/DWARFVisitor.h
+++ b/lib/ObjectYAML/DWARFVisitor.h
@@ -1,9 +1,8 @@
 //===--- DWARFVisitor.h -----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/ObjectYAML/DWARFYAML.cpp b/lib/ObjectYAML/DWARFYAML.cpp
index d6c09e1a35d7..bb3b1422eb62 100644
--- a/lib/ObjectYAML/DWARFYAML.cpp
+++ b/lib/ObjectYAML/DWARFYAML.cpp
@@ -1,9 +1,8 @@
 //===- DWARFYAML.cpp - DWARF YAMLIO implementation ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/ObjectYAML/ELFYAML.cpp b/lib/ObjectYAML/ELFYAML.cpp
index 215d6bdd091e..7497154c757d 100644
--- a/lib/ObjectYAML/ELFYAML.cpp
+++ b/lib/ObjectYAML/ELFYAML.cpp
@@ -1,9 +1,8 @@
 //===- ELFYAML.cpp - ELF YAMLIO implementation ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -233,8 +232,9 @@ void ScalarEnumerationTraits<ELFYAML::ELF_ELFCLASS>::enumeration(
 void ScalarEnumerationTraits<ELFYAML::ELF_ELFDATA>::enumeration(
     IO &IO, ELFYAML::ELF_ELFDATA &Value) {
 #define ECase(X) IO.enumCase(Value, #X, ELF::X)
-  // Since the semantics of ELFDATANONE is "invalid", just don't accept it
-  // here.
+  // ELFDATANONE is an invalid data encoding, but we accept it because
+  // we want to be able to produce invalid binaries for the tests.
+  ECase(ELFDATANONE);
   ECase(ELFDATA2LSB);
   ECase(ELFDATA2MSB);
 #undef ECase
@@ -410,7 +410,11 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
     BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX902, EF_AMDGPU_MACH);
     BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX904, EF_AMDGPU_MACH);
     BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX906, EF_AMDGPU_MACH);
+    BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX908, EF_AMDGPU_MACH);
     BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH);
+    BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
+    BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH);
+    BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH);
     BCase(EF_AMDGPU_XNACK);
     BCase(EF_AMDGPU_SRAM_ECC);
     break;
@@ -447,7 +451,6 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
   ECase(SHT_GROUP);
   ECase(SHT_SYMTAB_SHNDX);
   ECase(SHT_RELR);
-  ECase(SHT_LOOS);
   ECase(SHT_ANDROID_REL);
   ECase(SHT_ANDROID_RELA);
   ECase(SHT_ANDROID_RELR);
@@ -455,13 +458,12 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
   ECase(SHT_LLVM_LINKER_OPTIONS);
   ECase(SHT_LLVM_CALL_GRAPH_PROFILE);
   ECase(SHT_LLVM_ADDRSIG);
+  ECase(SHT_LLVM_DEPENDENT_LIBRARIES);
   ECase(SHT_GNU_ATTRIBUTES);
   ECase(SHT_GNU_HASH);
   ECase(SHT_GNU_verdef);
   ECase(SHT_GNU_verneed);
   ECase(SHT_GNU_versym);
-  ECase(SHT_HIOS);
-  ECase(SHT_LOPROC);
   switch (Object->Header.Machine) {
   case ELF::EM_ARM:
     ECase(SHT_ARM_EXIDX);
@@ -479,6 +481,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
   case ELF::EM_MIPS:
     ECase(SHT_MIPS_REGINFO);
     ECase(SHT_MIPS_OPTIONS);
+    ECase(SHT_MIPS_DWARF);
     ECase(SHT_MIPS_ABIFLAGS);
     break;
   default:
@@ -486,6 +489,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
     break;
   }
 #undef ECase
+  IO.enumFallback<Hex32>(Value);
 }
 
 void ScalarBitSetTraits<ELFYAML::ELF_PF>::bitset(IO &IO,
@@ -552,6 +556,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHN>::enumeration(
   ECase(SHN_COMMON);
   ECase(SHN_XINDEX);
   ECase(SHN_HIRESERVE);
+  ECase(SHN_AMDGPU_LDS);
   ECase(SHN_HEXAGON_SCOMMON);
   ECase(SHN_HEXAGON_SCOMMON_1);
   ECase(SHN_HEXAGON_SCOMMON_2);
@@ -561,6 +566,17 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHN>::enumeration(
   IO.enumFallback<Hex32>(Value);
 }
 
+void ScalarEnumerationTraits<ELFYAML::ELF_STB>::enumeration(
+    IO &IO, ELFYAML::ELF_STB &Value) {
+#define ECase(X) IO.enumCase(Value, #X, ELF::X)
+  ECase(STB_LOCAL);
+  ECase(STB_GLOBAL);
+  ECase(STB_WEAK);
+  ECase(STB_GNU_UNIQUE);
+#undef ECase
+  IO.enumFallback<Hex8>(Value);
+}
+
 void ScalarEnumerationTraits<ELFYAML::ELF_STT>::enumeration(
     IO &IO, ELFYAML::ELF_STT &Value) {
 #define ECase(X) IO.enumCase(Value, #X, ELF::X)
@@ -573,6 +589,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_STT>::enumeration(
   ECase(STT_TLS);
   ECase(STT_GNU_IFUNC);
 #undef ECase
+  IO.enumFallback<Hex8>(Value);
 }
 
 void ScalarEnumerationTraits<ELFYAML::ELF_STV>::enumeration(
@@ -661,6 +678,74 @@ void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration(
   IO.enumFallback<Hex32>(Value);
 }
 
+void ScalarEnumerationTraits<ELFYAML::ELF_DYNTAG>::enumeration(
+    IO &IO, ELFYAML::ELF_DYNTAG &Value) {
+  const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
+  assert(Object && "The IO context is not initialized");
+
+// Disable architecture specific tags by default. We might enable them below.
+#define AARCH64_DYNAMIC_TAG(name, value)
+#define MIPS_DYNAMIC_TAG(name, value)
+#define HEXAGON_DYNAMIC_TAG(name, value)
+#define PPC_DYNAMIC_TAG(name, value)
+#define PPC64_DYNAMIC_TAG(name, value)
+// Ignore marker tags such as DT_HIOS (maps to DT_VERNEEDNUM), etc.
+#define DYNAMIC_TAG_MARKER(name, value)
+
+#define STRINGIFY(X) (#X)
+#define DYNAMIC_TAG(X, Y) IO.enumCase(Value, STRINGIFY(DT_##X), ELF::DT_##X);
+  switch (Object->Header.Machine) {
+  case ELF::EM_AARCH64:
+#undef AARCH64_DYNAMIC_TAG
+#define AARCH64_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef AARCH64_DYNAMIC_TAG
+#define AARCH64_DYNAMIC_TAG(name, value)
+    break;
+  case ELF::EM_MIPS:
+#undef MIPS_DYNAMIC_TAG
+#define MIPS_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef MIPS_DYNAMIC_TAG
+#define MIPS_DYNAMIC_TAG(name, value)
+    break;
+  case ELF::EM_HEXAGON:
+#undef HEXAGON_DYNAMIC_TAG
+#define HEXAGON_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef HEXAGON_DYNAMIC_TAG
+#define HEXAGON_DYNAMIC_TAG(name, value)
+    break;
+  case ELF::EM_PPC:
+#undef PPC_DYNAMIC_TAG
+#define PPC_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef PPC_DYNAMIC_TAG
+#define PPC_DYNAMIC_TAG(name, value)
+    break;
+  case ELF::EM_PPC64:
+#undef PPC64_DYNAMIC_TAG
+#define PPC64_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef PPC64_DYNAMIC_TAG
+#define PPC64_DYNAMIC_TAG(name, value)
+    break;
+  default:
+#include "llvm/BinaryFormat/DynamicTags.def"
+    break;
+  }
+#undef AARCH64_DYNAMIC_TAG
+#undef MIPS_DYNAMIC_TAG
+#undef HEXAGON_DYNAMIC_TAG
+#undef PPC_DYNAMIC_TAG
+#undef PPC64_DYNAMIC_TAG
+#undef DYNAMIC_TAG_MARKER
+#undef STRINGIFY
+#undef DYNAMIC_TAG
+
+  IO.enumFallback<Hex64>(Value);
+}
+
 void ScalarEnumerationTraits<ELFYAML::MIPS_AFL_REG>::enumeration(
     IO &IO, ELFYAML::MIPS_AFL_REG &Value) {
 #define ECase(X) IO.enumCase(Value, #X, Mips::AFL_##X)
@@ -758,6 +843,11 @@ void MappingTraits<ELFYAML::FileHeader>::mapping(IO &IO,
   IO.mapRequired("Machine", FileHdr.Machine);
   IO.mapOptional("Flags", FileHdr.Flags, ELFYAML::ELF_EF(0));
   IO.mapOptional("Entry", FileHdr.Entry, Hex64(0));
+
+  IO.mapOptional("SHEntSize", FileHdr.SHEntSize);
+  IO.mapOptional("SHOffset", FileHdr.SHOffset);
+  IO.mapOptional("SHNum", FileHdr.SHNum);
+  IO.mapOptional("SHStrNdx", FileHdr.SHStrNdx);
 }
 
 void MappingTraits<ELFYAML::ProgramHeader>::mapping(
@@ -768,6 +858,9 @@ void MappingTraits<ELFYAML::ProgramHeader>::mapping(
   IO.mapOptional("VAddr", Phdr.VAddr, Hex64(0));
   IO.mapOptional("PAddr", Phdr.PAddr, Hex64(0));
   IO.mapOptional("Align", Phdr.Align);
+  IO.mapOptional("FileSize", Phdr.FileSize);
+  IO.mapOptional("MemSize", Phdr.MemSize);
+  IO.mapOptional("Offset", Phdr.Offset);
 }
 
 namespace {
@@ -788,12 +881,13 @@ struct NormalizedOther {
 
 void MappingTraits<ELFYAML::Symbol>::mapping(IO &IO, ELFYAML::Symbol &Symbol) {
   IO.mapOptional("Name", Symbol.Name, StringRef());
+  IO.mapOptional("NameIndex", Symbol.NameIndex);
   IO.mapOptional("Type", Symbol.Type, ELFYAML::ELF_STT(0));
   IO.mapOptional("Section", Symbol.Section, StringRef());
   IO.mapOptional("Index", Symbol.Index);
+  IO.mapOptional("Binding", Symbol.Binding, ELFYAML::ELF_STB(0));
   IO.mapOptional("Value", Symbol.Value, Hex64(0));
   IO.mapOptional("Size", Symbol.Size, Hex64(0));
-
   MappingNormalization<NormalizedOther, uint8_t> Keys(IO, Symbol.Other);
   IO.mapOptional("Visibility", Keys->Visibility, ELFYAML::ELF_STV(0));
   IO.mapOptional("Other", Keys->Other, ELFYAML::ELF_STO(0));
@@ -801,40 +895,44 @@ void MappingTraits<ELFYAML::Symbol>::mapping(IO &IO, ELFYAML::Symbol &Symbol) {
 
 StringRef MappingTraits<ELFYAML::Symbol>::validate(IO &IO,
                                                    ELFYAML::Symbol &Symbol) {
-  if (Symbol.Index && Symbol.Section.data()) {
+  if (Symbol.Index && Symbol.Section.data())
     return "Index and Section cannot both be specified for Symbol";
-  }
-  if (Symbol.Index && *Symbol.Index == ELFYAML::ELF_SHN(ELF::SHN_XINDEX)) {
+  if (Symbol.Index && *Symbol.Index == ELFYAML::ELF_SHN(ELF::SHN_XINDEX))
     return "Large indexes are not supported";
-  }
-  if (Symbol.Index && *Symbol.Index < ELFYAML::ELF_SHN(ELF::SHN_LORESERVE)) {
-    return "Use a section name to define which section a symbol is defined in";
-  }
+  if (Symbol.NameIndex && !Symbol.Name.empty())
+    return "Name and NameIndex cannot both be specified for Symbol";
   return StringRef();
 }
 
-void MappingTraits<ELFYAML::LocalGlobalWeakSymbols>::mapping(
-    IO &IO, ELFYAML::LocalGlobalWeakSymbols &Symbols) {
-  IO.mapOptional("Local", Symbols.Local);
-  IO.mapOptional("Global", Symbols.Global);
-  IO.mapOptional("Weak", Symbols.Weak);
-}
-
 static void commonSectionMapping(IO &IO, ELFYAML::Section &Section) {
   IO.mapOptional("Name", Section.Name, StringRef());
   IO.mapRequired("Type", Section.Type);
-  IO.mapOptional("Flags", Section.Flags, ELFYAML::ELF_SHF(0));
+  IO.mapOptional("Flags", Section.Flags);
   IO.mapOptional("Address", Section.Address, Hex64(0));
   IO.mapOptional("Link", Section.Link, StringRef());
   IO.mapOptional("AddressAlign", Section.AddressAlign, Hex64(0));
   IO.mapOptional("EntSize", Section.EntSize);
-  IO.mapOptional("Info", Section.Info, StringRef());
+
+  // obj2yaml does not dump these fields. They are expected to be empty when we
+  // are producing YAML, because yaml2obj sets appropriate values for sh_offset
+  // and sh_size automatically when they are not explicitly defined.
+  assert(!IO.outputting() ||
+         (!Section.ShOffset.hasValue() && !Section.ShSize.hasValue()));
+  IO.mapOptional("ShOffset", Section.ShOffset);
+  IO.mapOptional("ShSize", Section.ShSize);
+}
+
+static void sectionMapping(IO &IO, ELFYAML::DynamicSection &Section) {
+  commonSectionMapping(IO, Section);
+  IO.mapOptional("Entries", Section.Entries);
+  IO.mapOptional("Content", Section.Content);
 }
 
 static void sectionMapping(IO &IO, ELFYAML::RawContentSection &Section) {
   commonSectionMapping(IO, Section);
   IO.mapOptional("Content", Section.Content);
-  IO.mapOptional("Size", Section.Size, Hex64(Section.Content.binary_size()));
+  IO.mapOptional("Size", Section.Size);
+  IO.mapOptional("Info", Section.Info);
 }
 
 static void sectionMapping(IO &IO, ELFYAML::NoBitsSection &Section) {
@@ -842,14 +940,33 @@ static void sectionMapping(IO &IO, ELFYAML::NoBitsSection &Section) {
   IO.mapOptional("Size", Section.Size, Hex64(0));
 }
 
+static void sectionMapping(IO &IO, ELFYAML::VerdefSection &Section) {
+  commonSectionMapping(IO, Section);
+  IO.mapRequired("Info", Section.Info);
+  IO.mapRequired("Entries", Section.Entries);
+}
+
+static void sectionMapping(IO &IO, ELFYAML::SymverSection &Section) {
+  commonSectionMapping(IO, Section);
+  IO.mapRequired("Entries", Section.Entries);
+}
+
+static void sectionMapping(IO &IO, ELFYAML::VerneedSection &Section) {
+  commonSectionMapping(IO, Section);
+  IO.mapRequired("Info", Section.Info);
+  IO.mapRequired("Dependencies", Section.VerneedV);
+}
+
 static void sectionMapping(IO &IO, ELFYAML::RelocationSection &Section) {
   commonSectionMapping(IO, Section);
+  IO.mapOptional("Info", Section.RelocatableSec, StringRef());
   IO.mapOptional("Relocations", Section.Relocations);
 }
 
-static void groupSectionMapping(IO &IO, ELFYAML::Group &group) {
-  commonSectionMapping(IO, group);
-  IO.mapRequired("Members", group.Members);
+static void groupSectionMapping(IO &IO, ELFYAML::Group &Group) {
+  commonSectionMapping(IO, Group);
+  IO.mapOptional("Info", Group.Signature, StringRef());
+  IO.mapRequired("Members", Group.Members);
 }
 
 void MappingTraits<ELFYAML::SectionOrType>::mapping(
@@ -891,6 +1008,11 @@ void MappingTraits<std::unique_ptr<ELFYAML::Section>>::mapping(
     IO.mapRequired("Type", sectionType);
 
   switch (sectionType) {
+  case ELF::SHT_DYNAMIC:
+    if (!IO.outputting())
+      Section.reset(new ELFYAML::DynamicSection());
+    sectionMapping(IO, *cast<ELFYAML::DynamicSection>(Section.get()));
+    break;
   case ELF::SHT_REL:
   case ELF::SHT_RELA:
     if (!IO.outputting())
@@ -912,6 +1034,21 @@ void MappingTraits<std::unique_ptr<ELFYAML::Section>>::mapping(
       Section.reset(new ELFYAML::MipsABIFlags());
     sectionMapping(IO, *cast<ELFYAML::MipsABIFlags>(Section.get()));
     break;
+  case ELF::SHT_GNU_verdef:
+    if (!IO.outputting())
+      Section.reset(new ELFYAML::VerdefSection());
+    sectionMapping(IO, *cast<ELFYAML::VerdefSection>(Section.get()));
+    break;
+  case ELF::SHT_GNU_versym:
+    if (!IO.outputting())
+      Section.reset(new ELFYAML::SymverSection());
+    sectionMapping(IO, *cast<ELFYAML::SymverSection>(Section.get()));
+    break;
+  case ELF::SHT_GNU_verneed:
+    if (!IO.outputting())
+      Section.reset(new ELFYAML::VerneedSection());
+    sectionMapping(IO, *cast<ELFYAML::VerneedSection>(Section.get()));
+    break;
   default:
     if (!IO.outputting())
       Section.reset(new ELFYAML::RawContentSection());
@@ -922,9 +1059,12 @@ void MappingTraits<std::unique_ptr<ELFYAML::Section>>::mapping(
 StringRef MappingTraits<std::unique_ptr<ELFYAML::Section>>::validate(
     IO &io, std::unique_ptr<ELFYAML::Section> &Section) {
   const auto *RawSection = dyn_cast<ELFYAML::RawContentSection>(Section.get());
-  if (!RawSection || RawSection->Size >= RawSection->Content.binary_size())
-    return StringRef();
-  return "Section size must be greater or equal to the content size";
+  if (!RawSection)
+    return {};
+  if (RawSection->Size && RawSection->Content &&
+      (uint64_t)(*RawSection->Size) < RawSection->Content->binary_size())
+    return "Section size must be greater than or equal to the content size";
+  return {};
 }
 
 namespace {
@@ -952,6 +1092,44 @@ struct NormalizedMips64RelType {
 
 } // end anonymous namespace
 
+void MappingTraits<ELFYAML::DynamicEntry>::mapping(IO &IO,
+                                                   ELFYAML::DynamicEntry &Rel) {
+  assert(IO.getContext() && "The IO context is not initialized");
+
+  IO.mapRequired("Tag", Rel.Tag);
+  IO.mapRequired("Value", Rel.Val);
+}
+
+void MappingTraits<ELFYAML::VerdefEntry>::mapping(IO &IO,
+                                                  ELFYAML::VerdefEntry &E) {
+  assert(IO.getContext() && "The IO context is not initialized");
+
+  IO.mapRequired("Version", E.Version);
+  IO.mapRequired("Flags", E.Flags);
+  IO.mapRequired("VersionNdx", E.VersionNdx);
+  IO.mapRequired("Hash", E.Hash);
+  IO.mapRequired("Names", E.VerNames);
+}
+
+void MappingTraits<ELFYAML::VerneedEntry>::mapping(IO &IO,
+                                                   ELFYAML::VerneedEntry &E) {
+  assert(IO.getContext() && "The IO context is not initialized");
+
+  IO.mapRequired("Version", E.Version);
+  IO.mapRequired("File", E.File);
+  IO.mapRequired("Entries", E.AuxV);
+}
+
+void MappingTraits<ELFYAML::VernauxEntry>::mapping(IO &IO,
+                                                   ELFYAML::VernauxEntry &E) {
+  assert(IO.getContext() && "The IO context is not initialized");
+
+  IO.mapRequired("Name", E.Name);
+  IO.mapRequired("Hash", E.Hash);
+  IO.mapRequired("Flags", E.Flags);
+  IO.mapRequired("Other", E.Other);
+}
+
 void MappingTraits<ELFYAML::Relocation>::mapping(IO &IO,
                                                  ELFYAML::Relocation &Rel) {
   const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
diff --git a/lib/ObjectYAML/MachOYAML.cpp b/lib/ObjectYAML/MachOYAML.cpp
index e00a4ea93074..d12f12cf4435 100644
--- a/lib/ObjectYAML/MachOYAML.cpp
+++ b/lib/ObjectYAML/MachOYAML.cpp
@@ -1,9 +1,8 @@
 //===- MachOYAML.cpp - MachO YAMLIO implementation ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/ObjectYAML/MinidumpYAML.cpp b/lib/ObjectYAML/MinidumpYAML.cpp
new file mode 100644
index 000000000000..f5f2acd0cc4b
--- /dev/null
+++ b/lib/ObjectYAML/MinidumpYAML.cpp
@@ -0,0 +1,673 @@
+//===- MinidumpYAML.cpp - Minidump YAMLIO implementation ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ObjectYAML/MinidumpYAML.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/ConvertUTF.h"
+
+using namespace llvm;
+using namespace llvm::MinidumpYAML;
+using namespace llvm::minidump;
+
+namespace {
+/// A helper class to manage the placement of various structures into the final
+/// minidump binary. Space for objects can be allocated via various allocate***
+/// methods, while the final minidump file is written by calling the writeTo
+/// method. The plain versions of allocation functions take a reference to the
+/// data which is to be written (and hence the data must be available until
+/// writeTo is called), while the "New" versions allocate the data in an
+/// allocator-managed buffer, which is available until the allocator object is
+/// destroyed. For both kinds of functions, it is possible to modify the
+/// data for which the space has been "allocated" until the final writeTo call.
+/// This is useful for "linking" the allocated structures via their offsets.
+class BlobAllocator {
+public:
+  size_t tell() const { return NextOffset; }
+
+  size_t allocateCallback(size_t Size,
+                          std::function<void(raw_ostream &)> Callback) {
+    size_t Offset = NextOffset;
+    NextOffset += Size;
+    Callbacks.push_back(std::move(Callback));
+    return Offset;
+  }
+
+  size_t allocateBytes(ArrayRef<uint8_t> Data) {
+    return allocateCallback(
+        Data.size(), [Data](raw_ostream &OS) { OS << toStringRef(Data); });
+  }
+
+  size_t allocateBytes(yaml::BinaryRef Data) {
+    return allocateCallback(Data.binary_size(), [Data](raw_ostream &OS) {
+      Data.writeAsBinary(OS);
+    });
+  }
+
+  template <typename T> size_t allocateArray(ArrayRef<T> Data) {
+    return allocateBytes({reinterpret_cast<const uint8_t *>(Data.data()),
+                          sizeof(T) * Data.size()});
+  }
+
+  template <typename T, typename RangeType>
+  std::pair<size_t, MutableArrayRef<T>>
+  allocateNewArray(const iterator_range<RangeType> &Range);
+
+  template <typename T> size_t allocateObject(const T &Data) {
+    return allocateArray(makeArrayRef(Data));
+  }
+
+  template <typename T, typename... Types>
+  std::pair<size_t, T *> allocateNewObject(Types &&... Args) {
+    T *Object = new (Temporaries.Allocate<T>()) T(std::forward<Types>(Args)...);
+    return {allocateObject(*Object), Object};
+  }
+
+  size_t allocateString(StringRef Str);
+
+  void writeTo(raw_ostream &OS) const;
+
+private:
+  size_t NextOffset = 0;
+
+  BumpPtrAllocator Temporaries;
+  std::vector<std::function<void(raw_ostream &)>> Callbacks;
+};
+} // namespace
+
+template <typename T, typename RangeType>
+std::pair<size_t, MutableArrayRef<T>>
+BlobAllocator::allocateNewArray(const iterator_range<RangeType> &Range) {
+  size_t Num = std::distance(Range.begin(), Range.end());
+  MutableArrayRef<T> Array(Temporaries.Allocate<T>(Num), Num);
+  std::uninitialized_copy(Range.begin(), Range.end(), Array.begin());
+  return {allocateArray(Array), Array};
+}
+
+size_t BlobAllocator::allocateString(StringRef Str) {
+  SmallVector<UTF16, 32> WStr;
+  bool OK = convertUTF8ToUTF16String(Str, WStr);
+  assert(OK && "Invalid UTF8 in Str?");
+  (void)OK;
+
+  // The utf16 string is null-terminated, but the terminator is not counted in
+  // the string size.
+  WStr.push_back(0);
+  size_t Result =
+      allocateNewObject<support::ulittle32_t>(2 * (WStr.size() - 1)).first;
+  allocateNewArray<support::ulittle16_t>(make_range(WStr.begin(), WStr.end()));
+  return Result;
+}
+
+void BlobAllocator::writeTo(raw_ostream &OS) const {
+  size_t BeginOffset = OS.tell();
+  for (const auto &Callback : Callbacks)
+    Callback(OS);
+  assert(OS.tell() == BeginOffset + NextOffset &&
+         "Callbacks wrote an unexpected number of bytes.");
+  (void)BeginOffset;
+}
+
+/// Perform an optional yaml-mapping of an endian-aware type EndianType. The
+/// only purpose of this function is to avoid casting the Default value to the
+/// endian type;
+template <typename EndianType>
+static inline void mapOptional(yaml::IO &IO, const char *Key, EndianType &Val,
+                               typename EndianType::value_type Default) {
+  IO.mapOptional(Key, Val, EndianType(Default));
+}
+
+/// Yaml-map an endian-aware type EndianType as some other type MapType.
+template <typename MapType, typename EndianType>
+static inline void mapRequiredAs(yaml::IO &IO, const char *Key,
+                                 EndianType &Val) {
+  MapType Mapped = static_cast<typename EndianType::value_type>(Val);
+  IO.mapRequired(Key, Mapped);
+  Val = static_cast<typename EndianType::value_type>(Mapped);
+}
+
+/// Perform an optional yaml-mapping of an endian-aware type EndianType as some
+/// other type MapType.
+template <typename MapType, typename EndianType>
+static inline void mapOptionalAs(yaml::IO &IO, const char *Key, EndianType &Val,
+                                 MapType Default) {
+  MapType Mapped = static_cast<typename EndianType::value_type>(Val);
+  IO.mapOptional(Key, Mapped, Default);
+  Val = static_cast<typename EndianType::value_type>(Mapped);
+}
+
+namespace {
+/// Return the appropriate yaml Hex type for a given endian-aware type.
+template <typename EndianType> struct HexType;
+template <> struct HexType<support::ulittle16_t> { using type = yaml::Hex16; };
+template <> struct HexType<support::ulittle32_t> { using type = yaml::Hex32; };
+template <> struct HexType<support::ulittle64_t> { using type = yaml::Hex64; };
+} // namespace
+
+/// Yaml-map an endian-aware type as an appropriately-sized hex value.
+template <typename EndianType>
+static inline void mapRequiredHex(yaml::IO &IO, const char *Key,
+                                  EndianType &Val) {
+  mapRequiredAs<typename HexType<EndianType>::type>(IO, Key, Val);
+}
+
+/// Perform an optional yaml-mapping of an endian-aware type as an
+/// appropriately-sized hex value.
+template <typename EndianType>
+static inline void mapOptionalHex(yaml::IO &IO, const char *Key,
+                                  EndianType &Val,
+                                  typename EndianType::value_type Default) {
+  mapOptionalAs<typename HexType<EndianType>::type>(IO, Key, Val, Default);
+}
+
+Stream::~Stream() = default;
+
+Stream::StreamKind Stream::getKind(StreamType Type) {
+  switch (Type) {
+  case StreamType::MemoryList:
+    return StreamKind::MemoryList;
+  case StreamType::ModuleList:
+    return StreamKind::ModuleList;
+  case StreamType::SystemInfo:
+    return StreamKind::SystemInfo;
+  case StreamType::LinuxCPUInfo:
+  case StreamType::LinuxProcStatus:
+  case StreamType::LinuxLSBRelease:
+  case StreamType::LinuxCMDLine:
+  case StreamType::LinuxMaps:
+  case StreamType::LinuxProcStat:
+  case StreamType::LinuxProcUptime:
+    return StreamKind::TextContent;
+  case StreamType::ThreadList:
+    return StreamKind::ThreadList;
+  default:
+    return StreamKind::RawContent;
+  }
+}
+
+std::unique_ptr<Stream> Stream::create(StreamType Type) {
+  StreamKind Kind = getKind(Type);
+  switch (Kind) {
+  case StreamKind::MemoryList:
+    return llvm::make_unique<MemoryListStream>();
+  case StreamKind::ModuleList:
+    return llvm::make_unique<ModuleListStream>();
+  case StreamKind::RawContent:
+    return llvm::make_unique<RawContentStream>(Type);
+  case StreamKind::SystemInfo:
+    return llvm::make_unique<SystemInfoStream>();
+  case StreamKind::TextContent:
+    return llvm::make_unique<TextContentStream>(Type);
+  case StreamKind::ThreadList:
+    return llvm::make_unique<ThreadListStream>();
+  }
+  llvm_unreachable("Unhandled stream kind!");
+}
+
+void yaml::ScalarEnumerationTraits<ProcessorArchitecture>::enumeration(
+    IO &IO, ProcessorArchitecture &Arch) {
+#define HANDLE_MDMP_ARCH(CODE, NAME)                                           \
+  IO.enumCase(Arch, #NAME, ProcessorArchitecture::NAME);
+#include "llvm/BinaryFormat/MinidumpConstants.def"
+  IO.enumFallback<Hex16>(Arch);
+}
+
+void yaml::ScalarEnumerationTraits<OSPlatform>::enumeration(IO &IO,
+                                                            OSPlatform &Plat) {
+#define HANDLE_MDMP_PLATFORM(CODE, NAME)                                       \
+  IO.enumCase(Plat, #NAME, OSPlatform::NAME);
+#include "llvm/BinaryFormat/MinidumpConstants.def"
+  IO.enumFallback<Hex32>(Plat);
+}
+
+void yaml::ScalarEnumerationTraits<StreamType>::enumeration(IO &IO,
+                                                            StreamType &Type) {
+#define HANDLE_MDMP_STREAM_TYPE(CODE, NAME)                                    \
+  IO.enumCase(Type, #NAME, StreamType::NAME);
+#include "llvm/BinaryFormat/MinidumpConstants.def"
+  IO.enumFallback<Hex32>(Type);
+}
+
+void yaml::MappingTraits<CPUInfo::ArmInfo>::mapping(IO &IO,
+                                                    CPUInfo::ArmInfo &Info) {
+  mapRequiredHex(IO, "CPUID", Info.CPUID);
+  mapOptionalHex(IO, "ELF hwcaps", Info.ElfHWCaps, 0);
+}
+
+namespace {
+template <std::size_t N> struct FixedSizeHex {
+  FixedSizeHex(uint8_t (&Storage)[N]) : Storage(Storage) {}
+
+  uint8_t (&Storage)[N];
+};
+} // namespace
+
+namespace llvm {
+namespace yaml {
+template <std::size_t N> struct ScalarTraits<FixedSizeHex<N>> {
+  static void output(const FixedSizeHex<N> &Fixed, void *, raw_ostream &OS) {
+    OS << toHex(makeArrayRef(Fixed.Storage));
+  }
+
+  static StringRef input(StringRef Scalar, void *, FixedSizeHex<N> &Fixed) {
+    if (!all_of(Scalar, isHexDigit))
+      return "Invalid hex digit in input";
+    if (Scalar.size() < 2 * N)
+      return "String too short";
+    if (Scalar.size() > 2 * N)
+      return "String too long";
+    copy(fromHex(Scalar), Fixed.Storage);
+    return "";
+  }
+
+  static QuotingType mustQuote(StringRef S) { return QuotingType::None; }
+};
+} // namespace yaml
+} // namespace llvm
+void yaml::MappingTraits<CPUInfo::OtherInfo>::mapping(
+    IO &IO, CPUInfo::OtherInfo &Info) {
+  FixedSizeHex<sizeof(Info.ProcessorFeatures)> Features(Info.ProcessorFeatures);
+  IO.mapRequired("Features", Features);
+}
+
+namespace {
+/// A type which only accepts strings of a fixed size for yaml conversion.
+template <std::size_t N> struct FixedSizeString {
+  FixedSizeString(char (&Storage)[N]) : Storage(Storage) {}
+
+  char (&Storage)[N];
+};
+} // namespace
+
+namespace llvm {
+namespace yaml {
+template <std::size_t N> struct ScalarTraits<FixedSizeString<N>> {
+  static void output(const FixedSizeString<N> &Fixed, void *, raw_ostream &OS) {
+    OS << StringRef(Fixed.Storage, N);
+  }
+
+  static StringRef input(StringRef Scalar, void *, FixedSizeString<N> &Fixed) {
+    if (Scalar.size() < N)
+      return "String too short";
+    if (Scalar.size() > N)
+      return "String too long";
+    copy(Scalar, Fixed.Storage);
+    return "";
+  }
+
+  static QuotingType mustQuote(StringRef S) { return needsQuotes(S); }
+};
+} // namespace yaml
+} // namespace llvm
+
+void yaml::MappingTraits<CPUInfo::X86Info>::mapping(IO &IO,
+                                                    CPUInfo::X86Info &Info) {
+  FixedSizeString<sizeof(Info.VendorID)> VendorID(Info.VendorID);
+  IO.mapRequired("Vendor ID", VendorID);
+
+  mapRequiredHex(IO, "Version Info", Info.VersionInfo);
+  mapRequiredHex(IO, "Feature Info", Info.FeatureInfo);
+  mapOptionalHex(IO, "AMD Extended Features", Info.AMDExtendedFeatures, 0);
+}
+
+void yaml::MappingTraits<VSFixedFileInfo>::mapping(IO &IO,
+                                                   VSFixedFileInfo &Info) {
+  mapOptionalHex(IO, "Signature", Info.Signature, 0);
+  mapOptionalHex(IO, "Struct Version", Info.StructVersion, 0);
+  mapOptionalHex(IO, "File Version High", Info.FileVersionHigh, 0);
+  mapOptionalHex(IO, "File Version Low", Info.FileVersionLow, 0);
+  mapOptionalHex(IO, "Product Version High", Info.ProductVersionHigh, 0);
+  mapOptionalHex(IO, "Product Version Low", Info.ProductVersionLow, 0);
+  mapOptionalHex(IO, "File Flags Mask", Info.FileFlagsMask, 0);
+  mapOptionalHex(IO, "File Flags", Info.FileFlags, 0);
+  mapOptionalHex(IO, "File OS", Info.FileOS, 0);
+  mapOptionalHex(IO, "File Type", Info.FileType, 0);
+  mapOptionalHex(IO, "File Subtype", Info.FileSubtype, 0);
+  mapOptionalHex(IO, "File Date High", Info.FileDateHigh, 0);
+  mapOptionalHex(IO, "File Date Low", Info.FileDateLow, 0);
+}
+
+void yaml::MappingTraits<ModuleListStream::entry_type>::mapping(
+    IO &IO, ModuleListStream::entry_type &M) {
+  mapRequiredHex(IO, "Base of Image", M.Entry.BaseOfImage);
+  mapRequiredHex(IO, "Size of Image", M.Entry.SizeOfImage);
+  mapOptionalHex(IO, "Checksum", M.Entry.Checksum, 0);
+  IO.mapOptional("Time Date Stamp", M.Entry.TimeDateStamp,
+                 support::ulittle32_t(0));
+  IO.mapRequired("Module Name", M.Name);
+  IO.mapOptional("Version Info", M.Entry.VersionInfo, VSFixedFileInfo());
+  IO.mapRequired("CodeView Record", M.CvRecord);
+  IO.mapOptional("Misc Record", M.MiscRecord, yaml::BinaryRef());
+  mapOptionalHex(IO, "Reserved0", M.Entry.Reserved0, 0);
+  mapOptionalHex(IO, "Reserved1", M.Entry.Reserved1, 0);
+}
+
+static void streamMapping(yaml::IO &IO, RawContentStream &Stream) {
+  IO.mapOptional("Content", Stream.Content);
+  IO.mapOptional("Size", Stream.Size, Stream.Content.binary_size());
+}
+
+static StringRef streamValidate(RawContentStream &Stream) {
+  if (Stream.Size.value < Stream.Content.binary_size())
+    return "Stream size must be greater or equal to the content size";
+  return "";
+}
+
+void yaml::MappingTraits<MemoryListStream::entry_type>::mapping(
+    IO &IO, MemoryListStream::entry_type &Range) {
+  MappingContextTraits<MemoryDescriptor, yaml::BinaryRef>::mapping(
+      IO, Range.Entry, Range.Content);
+}
+
+static void streamMapping(yaml::IO &IO, MemoryListStream &Stream) {
+  IO.mapRequired("Memory Ranges", Stream.Entries);
+}
+
+static void streamMapping(yaml::IO &IO, ModuleListStream &Stream) {
+  IO.mapRequired("Modules", Stream.Entries);
+}
+
+static void streamMapping(yaml::IO &IO, SystemInfoStream &Stream) {
+  SystemInfo &Info = Stream.Info;
+  IO.mapRequired("Processor Arch", Info.ProcessorArch);
+  mapOptional(IO, "Processor Level", Info.ProcessorLevel, 0);
+  mapOptional(IO, "Processor Revision", Info.ProcessorRevision, 0);
+  IO.mapOptional("Number of Processors", Info.NumberOfProcessors, 0);
+  IO.mapOptional("Product type", Info.ProductType, 0);
+  mapOptional(IO, "Major Version", Info.MajorVersion, 0);
+  mapOptional(IO, "Minor Version", Info.MinorVersion, 0);
+  mapOptional(IO, "Build Number", Info.BuildNumber, 0);
+  IO.mapRequired("Platform ID", Info.PlatformId);
+  IO.mapOptional("CSD Version", Stream.CSDVersion, "");
+  mapOptionalHex(IO, "Suite Mask", Info.SuiteMask, 0);
+  mapOptionalHex(IO, "Reserved", Info.Reserved, 0);
+  switch (static_cast<ProcessorArchitecture>(Info.ProcessorArch)) {
+  case ProcessorArchitecture::X86:
+  case ProcessorArchitecture::AMD64:
+    IO.mapOptional("CPU", Info.CPU.X86);
+    break;
+  case ProcessorArchitecture::ARM:
+  case ProcessorArchitecture::ARM64:
+    IO.mapOptional("CPU", Info.CPU.Arm);
+    break;
+  default:
+    IO.mapOptional("CPU", Info.CPU.Other);
+    break;
+  }
+}
+
+static void streamMapping(yaml::IO &IO, TextContentStream &Stream) {
+  IO.mapOptional("Text", Stream.Text);
+}
+
+void yaml::MappingContextTraits<MemoryDescriptor, yaml::BinaryRef>::mapping(
+    IO &IO, MemoryDescriptor &Memory, BinaryRef &Content) {
+  mapRequiredHex(IO, "Start of Memory Range", Memory.StartOfMemoryRange);
+  IO.mapRequired("Content", Content);
+}
+
+void yaml::MappingTraits<ThreadListStream::entry_type>::mapping(
+    IO &IO, ThreadListStream::entry_type &T) {
+  mapRequiredHex(IO, "Thread Id", T.Entry.ThreadId);
+  mapOptionalHex(IO, "Suspend Count", T.Entry.SuspendCount, 0);
+  mapOptionalHex(IO, "Priority Class", T.Entry.PriorityClass, 0);
+  mapOptionalHex(IO, "Priority", T.Entry.Priority, 0);
+  mapOptionalHex(IO, "Environment Block", T.Entry.EnvironmentBlock, 0);
+  IO.mapRequired("Context", T.Context);
+  IO.mapRequired("Stack", T.Entry.Stack, T.Stack);
+}
+
+static void streamMapping(yaml::IO &IO, ThreadListStream &Stream) {
+  IO.mapRequired("Threads", Stream.Entries);
+}
+
+void yaml::MappingTraits<std::unique_ptr<Stream>>::mapping(
+    yaml::IO &IO, std::unique_ptr<MinidumpYAML::Stream> &S) {
+  StreamType Type;
+  if (IO.outputting())
+    Type = S->Type;
+  IO.mapRequired("Type", Type);
+
+  if (!IO.outputting())
+    S = MinidumpYAML::Stream::create(Type);
+  switch (S->Kind) {
+  case MinidumpYAML::Stream::StreamKind::MemoryList:
+    streamMapping(IO, llvm::cast<MemoryListStream>(*S));
+    break;
+  case MinidumpYAML::Stream::StreamKind::ModuleList:
+    streamMapping(IO, llvm::cast<ModuleListStream>(*S));
+    break;
+  case MinidumpYAML::Stream::StreamKind::RawContent:
+    streamMapping(IO, llvm::cast<RawContentStream>(*S));
+    break;
+  case MinidumpYAML::Stream::StreamKind::SystemInfo:
+    streamMapping(IO, llvm::cast<SystemInfoStream>(*S));
+    break;
+  case MinidumpYAML::Stream::StreamKind::TextContent:
+    streamMapping(IO, llvm::cast<TextContentStream>(*S));
+    break;
+  case MinidumpYAML::Stream::StreamKind::ThreadList:
+    streamMapping(IO, llvm::cast<ThreadListStream>(*S));
+    break;
+  }
+}
+
+StringRef yaml::MappingTraits<std::unique_ptr<Stream>>::validate(
+    yaml::IO &IO, std::unique_ptr<MinidumpYAML::Stream> &S) {
+  switch (S->Kind) {
+  case MinidumpYAML::Stream::StreamKind::RawContent:
+    return streamValidate(cast<RawContentStream>(*S));
+  case MinidumpYAML::Stream::StreamKind::MemoryList:
+  case MinidumpYAML::Stream::StreamKind::ModuleList:
+  case MinidumpYAML::Stream::StreamKind::SystemInfo:
+  case MinidumpYAML::Stream::StreamKind::TextContent:
+  case MinidumpYAML::Stream::StreamKind::ThreadList:
+    return "";
+  }
+  llvm_unreachable("Fully covered switch above!");
+}
+
+void yaml::MappingTraits<Object>::mapping(IO &IO, Object &O) {
+  IO.mapTag("!minidump", true);
+  mapOptionalHex(IO, "Signature", O.Header.Signature, Header::MagicSignature);
+  mapOptionalHex(IO, "Version", O.Header.Version, Header::MagicVersion);
+  mapOptionalHex(IO, "Flags", O.Header.Flags, 0);
+  IO.mapRequired("Streams", O.Streams);
+}
+
+static LocationDescriptor layout(BlobAllocator &File, yaml::BinaryRef Data) {
+  return {support::ulittle32_t(Data.binary_size()),
+          support::ulittle32_t(File.allocateBytes(Data))};
+}
+
+static void layout(BlobAllocator &File, MemoryListStream::entry_type &Range) {
+  Range.Entry.Memory = layout(File, Range.Content);
+}
+
+static void layout(BlobAllocator &File, ModuleListStream::entry_type &M) {
+  M.Entry.ModuleNameRVA = File.allocateString(M.Name);
+
+  M.Entry.CvRecord = layout(File, M.CvRecord);
+  M.Entry.MiscRecord = layout(File, M.MiscRecord);
+}
+
+static void layout(BlobAllocator &File, ThreadListStream::entry_type &T) {
+  T.Entry.Stack.Memory = layout(File, T.Stack);
+  T.Entry.Context = layout(File, T.Context);
+}
+
+template <typename EntryT>
+static size_t layout(BlobAllocator &File,
+                     MinidumpYAML::detail::ListStream<EntryT> &S) {
+
+  File.allocateNewObject<support::ulittle32_t>(S.Entries.size());
+  for (auto &E : S.Entries)
+    File.allocateObject(E.Entry);
+
+  size_t DataEnd = File.tell();
+
+  // Lay out the auxiliary data, (which is not a part of the stream).
+  DataEnd = File.tell();
+  for (auto &E : S.Entries)
+    layout(File, E);
+
+  return DataEnd;
+}
+
+static Directory layout(BlobAllocator &File, Stream &S) {
+  Directory Result;
+  Result.Type = S.Type;
+  Result.Location.RVA = File.tell();
+  Optional<size_t> DataEnd;
+  switch (S.Kind) {
+  case Stream::StreamKind::MemoryList:
+    DataEnd = layout(File, cast<MemoryListStream>(S));
+    break;
+  case Stream::StreamKind::ModuleList:
+    DataEnd = layout(File, cast<ModuleListStream>(S));
+    break;
+  case Stream::StreamKind::RawContent: {
+    RawContentStream &Raw = cast<RawContentStream>(S);
+    File.allocateCallback(Raw.Size, [&Raw](raw_ostream &OS) {
+      Raw.Content.writeAsBinary(OS);
+      assert(Raw.Content.binary_size() <= Raw.Size);
+      OS << std::string(Raw.Size - Raw.Content.binary_size(), '\0');
+    });
+    break;
+  }
+  case Stream::StreamKind::SystemInfo: {
+    SystemInfoStream &SystemInfo = cast<SystemInfoStream>(S);
+    File.allocateObject(SystemInfo.Info);
+    // The CSD string is not a part of the stream.
+    DataEnd = File.tell();
+    SystemInfo.Info.CSDVersionRVA = File.allocateString(SystemInfo.CSDVersion);
+    break;
+  }
+  case Stream::StreamKind::TextContent:
+    File.allocateArray(arrayRefFromStringRef(cast<TextContentStream>(S).Text));
+    break;
+  case Stream::StreamKind::ThreadList:
+    DataEnd = layout(File, cast<ThreadListStream>(S));
+    break;
+  }
+  // If DataEnd is not set, we assume everything we generated is a part of the
+  // stream.
+  Result.Location.DataSize =
+      DataEnd.getValueOr(File.tell()) - Result.Location.RVA;
+  return Result;
+}
+
+void MinidumpYAML::writeAsBinary(Object &Obj, raw_ostream &OS) {
+  BlobAllocator File;
+  File.allocateObject(Obj.Header);
+
+  std::vector<Directory> StreamDirectory(Obj.Streams.size());
+  Obj.Header.StreamDirectoryRVA =
+      File.allocateArray(makeArrayRef(StreamDirectory));
+  Obj.Header.NumberOfStreams = StreamDirectory.size();
+
+  for (auto &Stream : enumerate(Obj.Streams))
+    StreamDirectory[Stream.index()] = layout(File, *Stream.value());
+
+  File.writeTo(OS);
+}
+
+Error MinidumpYAML::writeAsBinary(StringRef Yaml, raw_ostream &OS) {
+  yaml::Input Input(Yaml);
+  Object Obj;
+  Input >> Obj;
+  if (std::error_code EC = Input.error())
+    return errorCodeToError(EC);
+
+  writeAsBinary(Obj, OS);
+  return Error::success();
+}
+
+Expected<std::unique_ptr<Stream>>
+Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) {
+  StreamKind Kind = getKind(StreamDesc.Type);
+  switch (Kind) {
+  case StreamKind::MemoryList: {
+    auto ExpectedList = File.getMemoryList();
+    if (!ExpectedList)
+      return ExpectedList.takeError();
+    std::vector<MemoryListStream::entry_type> Ranges;
+    for (const MemoryDescriptor &MD : *ExpectedList) {
+      auto ExpectedContent = File.getRawData(MD.Memory);
+      if (!ExpectedContent)
+        return ExpectedContent.takeError();
+      Ranges.push_back({MD, *ExpectedContent});
+    }
+    return llvm::make_unique<MemoryListStream>(std::move(Ranges));
+  }
+  case StreamKind::ModuleList: {
+    auto ExpectedList = File.getModuleList();
+    if (!ExpectedList)
+      return ExpectedList.takeError();
+    std::vector<ModuleListStream::entry_type> Modules;
+    for (const Module &M : *ExpectedList) {
+      auto ExpectedName = File.getString(M.ModuleNameRVA);
+      if (!ExpectedName)
+        return ExpectedName.takeError();
+      auto ExpectedCv = File.getRawData(M.CvRecord);
+      if (!ExpectedCv)
+        return ExpectedCv.takeError();
+      auto ExpectedMisc = File.getRawData(M.MiscRecord);
+      if (!ExpectedMisc)
+        return ExpectedMisc.takeError();
+      Modules.push_back(
+          {M, std::move(*ExpectedName), *ExpectedCv, *ExpectedMisc});
+    }
+    return llvm::make_unique<ModuleListStream>(std::move(Modules));
+  }
+  case StreamKind::RawContent:
+    return llvm::make_unique<RawContentStream>(StreamDesc.Type,
+                                               File.getRawStream(StreamDesc));
+  case StreamKind::SystemInfo: {
+    auto ExpectedInfo = File.getSystemInfo();
+    if (!ExpectedInfo)
+      return ExpectedInfo.takeError();
+    auto ExpectedCSDVersion = File.getString(ExpectedInfo->CSDVersionRVA);
+    if (!ExpectedCSDVersion)
+      return ExpectedInfo.takeError();
+    return llvm::make_unique<SystemInfoStream>(*ExpectedInfo,
+                                               std::move(*ExpectedCSDVersion));
+  }
+  case StreamKind::TextContent:
+    return llvm::make_unique<TextContentStream>(
+        StreamDesc.Type, toStringRef(File.getRawStream(StreamDesc)));
+  case StreamKind::ThreadList: {
+    auto ExpectedList = File.getThreadList();
+    if (!ExpectedList)
+      return ExpectedList.takeError();
+    std::vector<ThreadListStream::entry_type> Threads;
+    for (const Thread &T : *ExpectedList) {
+      auto ExpectedStack = File.getRawData(T.Stack.Memory);
+      if (!ExpectedStack)
+        return ExpectedStack.takeError();
+      auto ExpectedContext = File.getRawData(T.Context);
+      if (!ExpectedContext)
+        return ExpectedContext.takeError();
+      Threads.push_back({T, *ExpectedStack, *ExpectedContext});
+    }
+    return llvm::make_unique<ThreadListStream>(std::move(Threads));
+  }
+  }
+  llvm_unreachable("Unhandled stream kind!");
+}
+
+Expected<Object> Object::create(const object::MinidumpFile &File) {
+  std::vector<std::unique_ptr<Stream>> Streams;
+  Streams.reserve(File.streams().size());
+  for (const Directory &StreamDesc : File.streams()) {
+    auto ExpectedStream = Stream::create(StreamDesc, File);
+    if (!ExpectedStream)
+      return ExpectedStream.takeError();
+    Streams.push_back(std::move(*ExpectedStream));
+  }
+  return Object(File.header(), std::move(Streams));
+}
diff --git a/lib/ObjectYAML/ObjectYAML.cpp b/lib/ObjectYAML/ObjectYAML.cpp
index 850c1a5a06c0..7f636f4eabac 100644
--- a/lib/ObjectYAML/ObjectYAML.cpp
+++ b/lib/ObjectYAML/ObjectYAML.cpp
@@ -1,9 +1,8 @@
 //===- ObjectYAML.cpp - YAML utilities for object files -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -33,6 +32,7 @@ void MappingTraits<YamlObjectFile>::mapping(IO &IO,
       MappingTraits<MachOYAML::UniversalBinary>::mapping(IO,
                                                          *ObjectFile.FatMachO);
   } else {
+    Input &In = (Input &)IO;
     if (IO.mapTag("!ELF")) {
       ObjectFile.Elf.reset(new ELFYAML::Object());
       MappingTraits<ELFYAML::Object>::mapping(IO, *ObjectFile.Elf);
@@ -46,18 +46,18 @@ void MappingTraits<YamlObjectFile>::mapping(IO &IO,
       ObjectFile.FatMachO.reset(new MachOYAML::UniversalBinary());
       MappingTraits<MachOYAML::UniversalBinary>::mapping(IO,
                                                          *ObjectFile.FatMachO);
+    } else if (IO.mapTag("!minidump")) {
+      ObjectFile.Minidump.reset(new MinidumpYAML::Object());
+      MappingTraits<MinidumpYAML::Object>::mapping(IO, *ObjectFile.Minidump);
     } else if (IO.mapTag("!WASM")) {
       ObjectFile.Wasm.reset(new WasmYAML::Object());
       MappingTraits<WasmYAML::Object>::mapping(IO, *ObjectFile.Wasm);
-    } else {
-      Input &In = (Input &)IO;
-      std::string Tag = In.getCurrentNode()->getRawTag();
-      if (Tag.empty())
+    } else if (const Node *N = In.getCurrentNode()) {
+      if (N->getRawTag().empty())
         IO.setError("YAML Object File missing document type tag!");
       else
-        IO.setError(
-            Twine("YAML Object File unsupported document type tag '") +
-            Twine(Tag) + Twine("'!"));
+        IO.setError("YAML Object File unsupported document type tag '" +
+                    N->getRawTag() + "'!");
     }
   }
 }
diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp
index 47bf853e0d3e..88491d955c49 100644
--- a/lib/ObjectYAML/WasmYAML.cpp
+++ b/lib/ObjectYAML/WasmYAML.cpp
@@ -1,9 +1,8 @@
 //===- WasmYAML.cpp - Wasm YAMLIO implementation --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -74,6 +73,20 @@ static void sectionMapping(IO &IO, WasmYAML::LinkingSection &Section) {
   IO.mapOptional("Comdats", Section.Comdats);
 }
 
+static void sectionMapping(IO &IO, WasmYAML::ProducersSection &Section) {
+  commonSectionMapping(IO, Section);
+  IO.mapRequired("Name", Section.Name);
+  IO.mapOptional("Languages", Section.Languages);
+  IO.mapOptional("Tools", Section.Tools);
+  IO.mapOptional("SDKs", Section.SDKs);
+}
+
+static void sectionMapping(IO &IO, WasmYAML::TargetFeaturesSection &Section) {
+  commonSectionMapping(IO, Section);
+  IO.mapRequired("Name", Section.Name);
+  IO.mapRequired("Features", Section.Features);
+}
+
 static void sectionMapping(IO &IO, WasmYAML::CustomSection &Section) {
   commonSectionMapping(IO, Section);
   IO.mapRequired("Name", Section.Name);
@@ -140,6 +153,11 @@ static void sectionMapping(IO &IO, WasmYAML::DataSection &Section) {
   IO.mapRequired("Segments", Section.Segments);
 }
 
+static void sectionMapping(IO &IO, WasmYAML::DataCountSection &Section) {
+  commonSectionMapping(IO, Section);
+  IO.mapRequired("Count", Section.Count);
+}
+
 void MappingTraits<std::unique_ptr<WasmYAML::Section>>::mapping(
     IO &IO, std::unique_ptr<WasmYAML::Section> &Section) {
   WasmYAML::SectionType SectionType;
@@ -169,6 +187,14 @@ void MappingTraits<std::unique_ptr<WasmYAML::Section>>::mapping(
       if (!IO.outputting())
         Section.reset(new WasmYAML::NameSection());
       sectionMapping(IO, *cast<WasmYAML::NameSection>(Section.get()));
+    } else if (SectionName == "producers") {
+      if (!IO.outputting())
+        Section.reset(new WasmYAML::ProducersSection());
+      sectionMapping(IO, *cast<WasmYAML::ProducersSection>(Section.get()));
+    } else if (SectionName == "target_features") {
+      if (!IO.outputting())
+        Section.reset(new WasmYAML::TargetFeaturesSection());
+      sectionMapping(IO, *cast<WasmYAML::TargetFeaturesSection>(Section.get()));
     } else {
       if (!IO.outputting())
         Section.reset(new WasmYAML::CustomSection(SectionName));
@@ -236,6 +262,11 @@ void MappingTraits<std::unique_ptr<WasmYAML::Section>>::mapping(
       Section.reset(new WasmYAML::DataSection());
     sectionMapping(IO, *cast<WasmYAML::DataSection>(Section.get()));
     break;
+  case wasm::WASM_SEC_DATACOUNT:
+    if (!IO.outputting())
+      Section.reset(new WasmYAML::DataCountSection());
+    sectionMapping(IO, *cast<WasmYAML::DataCountSection>(Section.get()));
+    break;
   default:
     llvm_unreachable("Unknown section type");
   }
@@ -257,6 +288,7 @@ void ScalarEnumerationTraits<WasmYAML::SectionType>::enumeration(
   ECase(ELEM);
   ECase(CODE);
   ECase(DATA);
+  ECase(DATACOUNT);
 #undef ECase
 }
 
@@ -293,6 +325,27 @@ void MappingTraits<WasmYAML::NameEntry>::mapping(
   IO.mapRequired("Name", NameEntry.Name);
 }
 
+void MappingTraits<WasmYAML::ProducerEntry>::mapping(
+    IO &IO, WasmYAML::ProducerEntry &ProducerEntry) {
+  IO.mapRequired("Name", ProducerEntry.Name);
+  IO.mapRequired("Version", ProducerEntry.Version);
+}
+
+void ScalarEnumerationTraits<WasmYAML::FeaturePolicyPrefix>::enumeration(
+    IO &IO, WasmYAML::FeaturePolicyPrefix &Kind) {
+#define ECase(X) IO.enumCase(Kind, #X, wasm::WASM_FEATURE_PREFIX_##X);
+  ECase(USED);
+  ECase(REQUIRED);
+  ECase(DISALLOWED);
+#undef ECase
+}
+
+void MappingTraits<WasmYAML::FeatureEntry>::mapping(
+    IO &IO, WasmYAML::FeatureEntry &FeatureEntry) {
+  IO.mapRequired("Prefix", FeatureEntry.Prefix);
+  IO.mapRequired("Name", FeatureEntry.Name);
+}
+
 void MappingTraits<WasmYAML::SegmentInfo>::mapping(
     IO &IO, WasmYAML::SegmentInfo &SegmentInfo) {
   IO.mapRequired("Index", SegmentInfo.Index);
@@ -386,8 +439,18 @@ void MappingTraits<wasm::WasmInitExpr>::mapping(IO &IO,
 void MappingTraits<WasmYAML::DataSegment>::mapping(
     IO &IO, WasmYAML::DataSegment &Segment) {
   IO.mapOptional("SectionOffset", Segment.SectionOffset);
-  IO.mapRequired("MemoryIndex", Segment.MemoryIndex);
-  IO.mapRequired("Offset", Segment.Offset);
+  IO.mapRequired("InitFlags", Segment.InitFlags);
+  if (Segment.InitFlags & wasm::WASM_SEGMENT_HAS_MEMINDEX) {
+    IO.mapRequired("MemoryIndex", Segment.MemoryIndex);
+  } else {
+    Segment.MemoryIndex = 0;
+  }
+  if ((Segment.InitFlags & wasm::WASM_SEGMENT_IS_PASSIVE) == 0) {
+    IO.mapRequired("Offset", Segment.Offset);
+  } else {
+    Segment.Offset.Opcode = wasm::WASM_OPCODE_I32_CONST;
+    Segment.Offset.Value.Int32 = 0;
+  }
   IO.mapRequired("Content", Segment.Content);
 }
 
@@ -421,7 +484,8 @@ void MappingTraits<WasmYAML::SymbolInfo>::mapping(IO &IO,
                                                   WasmYAML::SymbolInfo &Info) {
   IO.mapRequired("Index", Info.Index);
   IO.mapRequired("Kind", Info.Kind);
-  IO.mapRequired("Name", Info.Name);
+  if (Info.Kind != wasm::WASM_SYMBOL_TYPE_SECTION)
+    IO.mapRequired("Name", Info.Name);
   IO.mapRequired("Flags", Info.Flags);
   if (Info.Kind == wasm::WASM_SYMBOL_TYPE_FUNCTION) {
     IO.mapRequired("Function", Info.ElementIndex);
@@ -469,6 +533,8 @@ void ScalarBitSetTraits<WasmYAML::SymbolFlags>::bitset(
   // BCaseMask(VISIBILITY_MASK, VISIBILITY_DEFAULT);
   BCaseMask(VISIBILITY_MASK, VISIBILITY_HIDDEN);
   BCaseMask(UNDEFINED, UNDEFINED);
+  BCaseMask(EXPORTED, EXPORTED);
+  BCaseMask(EXPLICIT_NAME, EXPLICIT_NAME);
 #undef BCaseMask
 }
 
diff --git a/lib/ObjectYAML/XCOFFYAML.cpp b/lib/ObjectYAML/XCOFFYAML.cpp
new file mode 100644
index 000000000000..982e6aecbb98
--- /dev/null
+++ b/lib/ObjectYAML/XCOFFYAML.cpp
@@ -0,0 +1,109 @@
+//===-- XCOFFYAML.cpp - XCOFF YAMLIO implementation -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines classes for handling the YAML representation of XCOFF.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ObjectYAML/XCOFFYAML.h"
+#include "llvm/BinaryFormat/XCOFF.h"
+#include <string.h>
+
+namespace llvm {
+namespace XCOFFYAML {
+
+Object::Object() { memset(&Header, 0, sizeof(Header)); }
+
+} // namespace XCOFFYAML
+
+namespace yaml {
+
+void ScalarEnumerationTraits<XCOFF::StorageClass>::enumeration(
+    IO &IO, XCOFF::StorageClass &Value) {
+#define ECase(X) IO.enumCase(Value, #X, XCOFF::X)
+  ECase(C_NULL);
+  ECase(C_AUTO);
+  ECase(C_EXT);
+  ECase(C_STAT);
+  ECase(C_REG);
+  ECase(C_EXTDEF);
+  ECase(C_LABEL);
+  ECase(C_ULABEL);
+  ECase(C_MOS);
+  ECase(C_ARG);
+  ECase(C_STRTAG);
+  ECase(C_MOU);
+  ECase(C_UNTAG);
+  ECase(C_TPDEF);
+  ECase(C_USTATIC);
+  ECase(C_ENTAG);
+  ECase(C_MOE);
+  ECase(C_REGPARM);
+  ECase(C_FIELD);
+  ECase(C_BLOCK);
+  ECase(C_FCN);
+  ECase(C_EOS);
+  ECase(C_FILE);
+  ECase(C_LINE);
+  ECase(C_ALIAS);
+  ECase(C_HIDDEN);
+  ECase(C_HIDEXT);
+  ECase(C_BINCL);
+  ECase(C_EINCL);
+  ECase(C_INFO);
+  ECase(C_WEAKEXT);
+  ECase(C_DWARF);
+  ECase(C_GSYM);
+  ECase(C_LSYM);
+  ECase(C_PSYM);
+  ECase(C_RSYM);
+  ECase(C_RPSYM);
+  ECase(C_STSYM);
+  ECase(C_TCSYM);
+  ECase(C_BCOMM);
+  ECase(C_ECOML);
+  ECase(C_ECOMM);
+  ECase(C_DECL);
+  ECase(C_ENTRY);
+  ECase(C_FUN);
+  ECase(C_BSTAT);
+  ECase(C_ESTAT);
+  ECase(C_GTLS);
+  ECase(C_STTLS);
+  ECase(C_EFCN);
+#undef ECase
+}
+
+void MappingTraits<XCOFFYAML::FileHeader>::mapping(
+    IO &IO, XCOFFYAML::FileHeader &FileHdr) {
+  IO.mapRequired("MagicNumber", FileHdr.Magic);
+  IO.mapRequired("NumberOfSections", FileHdr.NumberOfSections);
+  IO.mapRequired("CreationTime", FileHdr.TimeStamp);
+  IO.mapRequired("OffsetToSymbolTable", FileHdr.SymbolTableOffset);
+  IO.mapRequired("EntriesInSymbolTable", FileHdr.NumberOfSymTableEntries);
+  IO.mapRequired("AuxiliaryHeaderSize", FileHdr.AuxHeaderSize);
+  IO.mapRequired("Flags", FileHdr.Flags);
+}
+
+void MappingTraits<XCOFFYAML::Symbol>::mapping(IO &IO, XCOFFYAML::Symbol &S) {
+  IO.mapRequired("Name", S.SymbolName);
+  IO.mapRequired("Value", S.Value);
+  IO.mapRequired("Section", S.SectionName);
+  IO.mapRequired("Type", S.Type);
+  IO.mapRequired("StorageClass", S.StorageClass);
+  IO.mapRequired("NumberOfAuxEntries", S.NumberOfAuxEntries);
+}
+
+void MappingTraits<XCOFFYAML::Object>::mapping(IO &IO, XCOFFYAML::Object &Obj) {
+  IO.mapTag("!XCOFF", true);
+  IO.mapRequired("FileHeader", Obj.Header);
+  IO.mapRequired("Symbols", Obj.Symbols);
+}
+
+} // namespace yaml
+} // namespace llvm
diff --git a/lib/ObjectYAML/YAML.cpp b/lib/ObjectYAML/YAML.cpp
index 67b5764eadaa..6eba16e36c2a 100644
--- a/lib/ObjectYAML/YAML.cpp
+++ b/lib/ObjectYAML/YAML.cpp
@@ -1,9 +1,8 @@
 //===- YAML.cpp - YAMLIO utilities for object files -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -32,7 +31,7 @@ StringRef yaml::ScalarTraits<yaml::BinaryRef>::input(StringRef Scalar, void *,
   // TODO: Can we improve YAMLIO to permit a more accurate diagnostic here?
   // (e.g. a caret pointing to the offending character).
   for (unsigned I = 0, N = Scalar.size(); I != N; ++I)
-    if (!isxdigit(Scalar[I]))
+    if (!llvm::isHexDigit(Scalar[I]))
       return "BinaryRef hex string must contain only hex digits.";
   Val = yaml::BinaryRef(Scalar);
   return {};
@@ -44,8 +43,9 @@ void yaml::BinaryRef::writeAsBinary(raw_ostream &OS) const {
     return;
   }
   for (unsigned I = 0, N = Data.size(); I != N; I += 2) {
-    uint8_t Byte;
-    StringRef((const char *)&Data[I],  2).getAsInteger(16, Byte);
+    uint8_t Byte = llvm::hexDigitValue(Data[I]);
+    Byte <<= 4;
+    Byte |= llvm::hexDigitValue(Data[I + 1]);
     OS.write(Byte);
   }
 }
diff --git a/lib/OptRemarks/OptRemarksParser.cpp b/lib/OptRemarks/OptRemarksParser.cpp
deleted file mode 100644
index 0478d2bfbfa6..000000000000
--- a/lib/OptRemarks/OptRemarksParser.cpp
+++ /dev/null
@@ -1,368 +0,0 @@
-//===- OptRemarksParser.cpp -----------------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides utility methods used by clients that want to use the
-// parser for optimization remarks in LLVM.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm-c/OptRemarks.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/YAMLTraits.h"
-
-using namespace llvm;
-
-namespace {
-struct RemarkParser {
-  /// Source manager for better error messages.
-  SourceMgr SM;
-  /// Stream for yaml parsing.
-  yaml::Stream Stream;
-  /// Storage for the error stream.
-  std::string ErrorString;
-  /// The error stream.
-  raw_string_ostream ErrorStream;
-  /// Iterator in the YAML stream.
-  yaml::document_iterator DI;
-  /// The parsed remark (if any).
-  Optional<LLVMOptRemarkEntry> LastRemark;
-  /// Temporary parsing buffer for the arguments.
-  SmallVector<LLVMOptRemarkArg, 8> TmpArgs;
-  /// The state used by the parser to parse a remark entry. Invalidated with
-  /// every call to `parseYAMLElement`.
-  struct ParseState {
-    /// Temporary parsing buffer for the arguments.
-    SmallVectorImpl<LLVMOptRemarkArg> *Args;
-    StringRef Type;
-    StringRef Pass;
-    StringRef Name;
-    StringRef Function;
-    /// Optional.
-    Optional<StringRef> File;
-    Optional<unsigned> Line;
-    Optional<unsigned> Column;
-    Optional<unsigned> Hotness;
-
-    ParseState(SmallVectorImpl<LLVMOptRemarkArg> &Args) : Args(&Args) {}
-    /// Use Args only as a **temporary** buffer.
-    ~ParseState() { Args->clear(); }
-  };
-
-  ParseState State;
-
-  /// Set to `true` if we had any errors during parsing.
-  bool HadAnyErrors = false;
-
-  RemarkParser(StringRef Buf)
-      : SM(), Stream(Buf, SM), ErrorString(), ErrorStream(ErrorString),
-        DI(Stream.begin()), LastRemark(), TmpArgs(), State(TmpArgs) {
-    SM.setDiagHandler(RemarkParser::HandleDiagnostic, this);
-  }
-
-  /// Parse a YAML element.
-  Error parseYAMLElement(yaml::Document &Remark);
-
-private:
-  /// Parse one key to a string.
-  /// otherwise.
-  Error parseKey(StringRef &Result, yaml::KeyValueNode &Node);
-  /// Parse one value to a string.
-  Error parseValue(StringRef &Result, yaml::KeyValueNode &Node);
-  /// Parse one value to an unsigned.
-  Error parseValue(Optional<unsigned> &Result, yaml::KeyValueNode &Node);
-  /// Parse a debug location.
-  Error parseDebugLoc(Optional<StringRef> &File, Optional<unsigned> &Line,
-                      Optional<unsigned> &Column, yaml::KeyValueNode &Node);
-  /// Parse an argument.
-  Error parseArg(SmallVectorImpl<LLVMOptRemarkArg> &TmpArgs, yaml::Node &Node);
-
-  /// Handle a diagnostic from the YAML stream. Records the error in the
-  /// RemarkParser class.
-  static void HandleDiagnostic(const SMDiagnostic &Diag, void *Ctx) {
-    assert(Ctx && "Expected non-null Ctx in diagnostic handler.");
-    auto *Parser = static_cast<RemarkParser *>(Ctx);
-    Diag.print(/*ProgName=*/nullptr, Parser->ErrorStream, /*ShowColors*/ false,
-               /*ShowKindLabels*/ true);
-  }
-};
-
-class ParseError : public ErrorInfo<ParseError> {
-public:
-  static char ID;
-
-  ParseError(StringRef Message, yaml::Node &Node)
-      : Message(Message), Node(Node) {}
-
-  void log(raw_ostream &OS) const override { OS << Message; }
-  std::error_code convertToErrorCode() const override {
-    return inconvertibleErrorCode();
-  }
-
-  StringRef getMessage() const { return Message; }
-  yaml::Node &getNode() const { return Node; }
-
-private:
-  StringRef Message; // No need to hold a full copy of the buffer.
-  yaml::Node &Node;
-};
-
-char ParseError::ID = 0;
-
-static LLVMOptRemarkStringRef toOptRemarkStr(StringRef Str) {
-  return {Str.data(), static_cast<uint32_t>(Str.size())};
-}
-
-Error RemarkParser::parseKey(StringRef &Result, yaml::KeyValueNode &Node) {
-  auto *Key = dyn_cast<yaml::ScalarNode>(Node.getKey());
-  if (!Key)
-    return make_error<ParseError>("key is not a string.", Node);
-
-  Result = Key->getRawValue();
-  return Error::success();
-}
-
-Error RemarkParser::parseValue(StringRef &Result, yaml::KeyValueNode &Node) {
-  auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
-  if (!Value)
-    return make_error<ParseError>("expected a value of scalar type.", Node);
-  Result = Value->getRawValue();
-
-  if (Result.front() == '\'')
-    Result = Result.drop_front();
-
-  if (Result.back() == '\'')
-    Result = Result.drop_back();
-
-  return Error::success();
-}
-
-Error RemarkParser::parseValue(Optional<unsigned> &Result,
-                               yaml::KeyValueNode &Node) {
-  SmallVector<char, 4> Tmp;
-  auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
-  if (!Value)
-    return make_error<ParseError>("expected a value of scalar type.", Node);
-  unsigned UnsignedValue = 0;
-  if (Value->getValue(Tmp).getAsInteger(10, UnsignedValue))
-    return make_error<ParseError>("expected a value of integer type.", *Value);
-  Result = UnsignedValue;
-  return Error::success();
-}
-
-Error RemarkParser::parseDebugLoc(Optional<StringRef> &File,
-                                  Optional<unsigned> &Line,
-                                  Optional<unsigned> &Column,
-                                  yaml::KeyValueNode &Node) {
-  auto *DebugLoc = dyn_cast<yaml::MappingNode>(Node.getValue());
-  if (!DebugLoc)
-    return make_error<ParseError>("expected a value of mapping type.", Node);
-
-  for (yaml::KeyValueNode &DLNode : *DebugLoc) {
-    StringRef KeyName;
-    if (Error E = parseKey(KeyName, DLNode))
-      return E;
-    if (KeyName == "File") {
-      File = StringRef(); // Set the optional to contain a default constructed
-                          // value, to be passed to the parsing function.
-      if (Error E = parseValue(*File, DLNode))
-        return E;
-    } else if (KeyName == "Column") {
-      if (Error E = parseValue(Column, DLNode))
-        return E;
-    } else if (KeyName == "Line") {
-      if (Error E = parseValue(Line, DLNode))
-        return E;
-    } else {
-      return make_error<ParseError>("unknown entry in DebugLoc map.", DLNode);
-    }
-  }
-
-  // If any of the debug loc fields is missing, return an error.
-  if (!File || !Line || !Column)
-    return make_error<ParseError>("DebugLoc node incomplete.", Node);
-
-  return Error::success();
-}
-
-Error RemarkParser::parseArg(SmallVectorImpl<LLVMOptRemarkArg> &Args,
-                             yaml::Node &Node) {
-  auto *ArgMap = dyn_cast<yaml::MappingNode>(&Node);
-  if (!ArgMap)
-    return make_error<ParseError>("expected a value of mapping type.", Node);
-
-  StringRef ValueStr;
-  StringRef KeyStr;
-  Optional<StringRef> File;
-  Optional<unsigned> Line;
-  Optional<unsigned> Column;
-
-  for (yaml::KeyValueNode &ArgEntry : *ArgMap) {
-    StringRef KeyName;
-    if (Error E = parseKey(KeyName, ArgEntry))
-      return E;
-
-    // Try to parse debug locs.
-    if (KeyName == "DebugLoc") {
-      // Can't have multiple DebugLoc entries per argument.
-      if (File || Line || Column)
-        return make_error<ParseError>(
-            "only one DebugLoc entry is allowed per argument.", ArgEntry);
-
-      if (Error E = parseDebugLoc(File, Line, Column, ArgEntry))
-        return E;
-      continue;
-    }
-
-    // If we already have a string, error out.
-    if (!ValueStr.empty())
-      return make_error<ParseError>(
-          "only one string entry is allowed per argument.", ArgEntry);
-
-    // Try to parse a string.
-    if (Error E = parseValue(ValueStr, ArgEntry))
-      return E;
-
-    // Keep the key from the string.
-    KeyStr = KeyName;
-  }
-
-  if (KeyStr.empty())
-    return make_error<ParseError>("argument key is missing.", *ArgMap);
-  if (ValueStr.empty())
-    return make_error<ParseError>("argument value is missing.", *ArgMap);
-
-  Args.push_back(LLVMOptRemarkArg{
-      toOptRemarkStr(KeyStr), toOptRemarkStr(ValueStr),
-      LLVMOptRemarkDebugLoc{toOptRemarkStr(File.getValueOr(StringRef())),
-                            Line.getValueOr(0), Column.getValueOr(0)}});
-
-  return Error::success();
-}
-
-Error RemarkParser::parseYAMLElement(yaml::Document &Remark) {
-  // Parsing a new remark, clear the previous one.
-  LastRemark = None;
-  State = ParseState(TmpArgs);
-
-  auto *Root = dyn_cast<yaml::MappingNode>(Remark.getRoot());
-  if (!Root)
-    return make_error<ParseError>("document root is not of mapping type.",
-                                  *Remark.getRoot());
-
-  State.Type = Root->getRawTag();
-
-  for (yaml::KeyValueNode &RemarkField : *Root) {
-    StringRef KeyName;
-    if (Error E = parseKey(KeyName, RemarkField))
-      return E;
-
-    if (KeyName == "Pass") {
-      if (Error E = parseValue(State.Pass, RemarkField))
-        return E;
-    } else if (KeyName == "Name") {
-      if (Error E = parseValue(State.Name, RemarkField))
-        return E;
-    } else if (KeyName == "Function") {
-      if (Error E = parseValue(State.Function, RemarkField))
-        return E;
-    } else if (KeyName == "Hotness") {
-      if (Error E = parseValue(State.Hotness, RemarkField))
-        return E;
-    } else if (KeyName == "DebugLoc") {
-      if (Error E =
-              parseDebugLoc(State.File, State.Line, State.Column, RemarkField))
-        return E;
-    } else if (KeyName == "Args") {
-      auto *Args = dyn_cast<yaml::SequenceNode>(RemarkField.getValue());
-      if (!Args)
-        return make_error<ParseError>("wrong value type for key.", RemarkField);
-
-      for (yaml::Node &Arg : *Args)
-        if (Error E = parseArg(*State.Args, Arg))
-          return E;
-    } else {
-      return make_error<ParseError>("unknown key.", RemarkField);
-    }
-  }
-
-  // If the YAML parsing failed, don't even continue parsing. We might
-  // encounter malformed YAML.
-  if (Stream.failed())
-    return make_error<ParseError>("YAML parsing failed.", *Remark.getRoot());
-
-  // Check if any of the mandatory fields are missing.
-  if (State.Type.empty() || State.Pass.empty() || State.Name.empty() ||
-      State.Function.empty())
-    return make_error<ParseError>("Type, Pass, Name or Function missing.",
-                                  *Remark.getRoot());
-
-  LastRemark = LLVMOptRemarkEntry{
-      toOptRemarkStr(State.Type),
-      toOptRemarkStr(State.Pass),
-      toOptRemarkStr(State.Name),
-      toOptRemarkStr(State.Function),
-      LLVMOptRemarkDebugLoc{toOptRemarkStr(State.File.getValueOr(StringRef())),
-                            State.Line.getValueOr(0),
-                            State.Column.getValueOr(0)},
-      State.Hotness.getValueOr(0),
-      static_cast<uint32_t>(State.Args->size()),
-      State.Args->data()};
-
-  return Error::success();
-}
-} // namespace
-
-// Create wrappers for C Binding types (see CBindingWrapping.h).
-DEFINE_SIMPLE_CONVERSION_FUNCTIONS(RemarkParser, LLVMOptRemarkParserRef)
-
-extern "C" LLVMOptRemarkParserRef LLVMOptRemarkParserCreate(const void *Buf,
-                                                            uint64_t Size) {
-  return wrap(
-      new RemarkParser(StringRef(static_cast<const char *>(Buf), Size)));
-}
-
-extern "C" LLVMOptRemarkEntry *
-LLVMOptRemarkParserGetNext(LLVMOptRemarkParserRef Parser) {
-  RemarkParser &TheParser = *unwrap(Parser);
-  // Check for EOF.
-  if (TheParser.HadAnyErrors || TheParser.DI == TheParser.Stream.end())
-    return nullptr;
-
-  // Try to parse an entry.
-  if (Error E = TheParser.parseYAMLElement(*TheParser.DI)) {
-    handleAllErrors(std::move(E), [&](const ParseError &PE) {
-      TheParser.Stream.printError(&PE.getNode(),
-                                  Twine(PE.getMessage()) + Twine('\n'));
-      TheParser.HadAnyErrors = true;
-    });
-    return nullptr;
-  }
-
-  // Move on.
-  ++TheParser.DI;
-
-  // Return the just-parsed remark.
-  if (Optional<LLVMOptRemarkEntry> &Entry = TheParser.LastRemark)
-    return &*Entry;
-  return nullptr;
-}
-
-extern "C" LLVMBool LLVMOptRemarkParserHasError(LLVMOptRemarkParserRef Parser) {
-  return unwrap(Parser)->HadAnyErrors;
-}
-
-extern "C" const char *
-LLVMOptRemarkParserGetErrorMessage(LLVMOptRemarkParserRef Parser) {
-  return unwrap(Parser)->ErrorStream.str().c_str();
-}
-
-extern "C" void LLVMOptRemarkParserDispose(LLVMOptRemarkParserRef Parser) {
-  delete unwrap(Parser);
-}
diff --git a/lib/Option/Arg.cpp b/lib/Option/Arg.cpp
index 4ce40e3ab26c..ea382b347345 100644
--- a/lib/Option/Arg.cpp
+++ b/lib/Option/Arg.cpp
@@ -1,9 +1,8 @@
 //===- Arg.cpp - Argument Implementations ---------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -67,6 +66,9 @@ LLVM_DUMP_METHOD void Arg::dump() const { print(dbgs()); }
 #endif
 
 std::string Arg::getAsString(const ArgList &Args) const {
+  if (Alias)
+    return Alias->getAsString(Args);
+
   SmallString<256> Res;
   raw_svector_ostream OS(Res);
 
diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp
index 8a7d59d24366..f37c142da69b 100644
--- a/lib/Option/ArgList.cpp
+++ b/lib/Option/ArgList.cpp
@@ -1,9 +1,8 @@
 //===- ArgList.cpp - Argument List Management -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -96,21 +95,6 @@ std::vector<std::string> ArgList::getAllArgValues(OptSpecifier Id) const {
   return std::vector<std::string>(Values.begin(), Values.end());
 }
 
-void ArgList::AddLastArg(ArgStringList &Output, OptSpecifier Id) const {
-  if (Arg *A = getLastArg(Id)) {
-    A->claim();
-    A->render(*this, Output);
-  }
-}
-
-void ArgList::AddLastArg(ArgStringList &Output, OptSpecifier Id0,
-                         OptSpecifier Id1) const {
-  if (Arg *A = getLastArg(Id0, Id1)) {
-    A->claim();
-    A->render(*this, Output);
-  }
-}
-
 void ArgList::AddAllArgsExcept(ArgStringList &Output,
                                ArrayRef<OptSpecifier> Ids,
                                ArrayRef<OptSpecifier> ExcludeIds) const {
diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp
index 312ff7808759..5833d03069f8 100644
--- a/lib/Option/OptTable.cpp
+++ b/lib/Option/OptTable.cpp
@@ -1,9 +1,8 @@
 //===- OptTable.cpp - Option Table Implementation -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -252,59 +251,69 @@ unsigned OptTable::findNearest(StringRef Option, std::string &NearestString,
                                unsigned MinimumLength) const {
   assert(!Option.empty());
 
-  // Consider each option as a candidate, finding the closest match.
+  // Consider each [option prefix + option name] pair as a candidate, finding
+  // the closest match.
   unsigned BestDistance = UINT_MAX;
   for (const Info &CandidateInfo :
        ArrayRef<Info>(OptionInfos).drop_front(FirstSearchableIndex)) {
     StringRef CandidateName = CandidateInfo.Name;
 
-    // Ignore option candidates with empty names, such as "--", or names
-    // that do not meet the minimum length.
+    // We can eliminate some option prefix/name pairs as candidates right away:
+    // * Ignore option candidates with empty names, such as "--", or names
+    //   that do not meet the minimum length.
     if (CandidateName.empty() || CandidateName.size() < MinimumLength)
       continue;
 
-    // If FlagsToInclude were specified, ignore options that don't include
-    // those flags.
+    // * If FlagsToInclude were specified, ignore options that don't include
+    //   those flags.
     if (FlagsToInclude && !(CandidateInfo.Flags & FlagsToInclude))
       continue;
-    // Ignore options that contain the FlagsToExclude.
+    // * Ignore options that contain the FlagsToExclude.
     if (CandidateInfo.Flags & FlagsToExclude)
       continue;
 
-    // Ignore positional argument option candidates (which do not
-    // have prefixes).
+    // * Ignore positional argument option candidates (which do not
+    //   have prefixes).
     if (!CandidateInfo.Prefixes)
       continue;
-    // Find the most appropriate prefix. For example, if a user asks for
-    // "--helm", suggest "--help" over "-help".
-    StringRef Prefix = CandidateInfo.Prefixes[0];
-    for (int P = 1; CandidateInfo.Prefixes[P]; P++) {
-      if (Option.startswith(CandidateInfo.Prefixes[P]))
-        Prefix = CandidateInfo.Prefixes[P];
-    }
 
-    // Check if the candidate ends with a character commonly used when
+    // Now check if the candidate ends with a character commonly used when
     // delimiting an option from its value, such as '=' or ':'. If it does,
     // attempt to split the given option based on that delimiter.
-    std::string Delimiter = "";
-    char Last = CandidateName.back();
-    if (Last == '=' || Last == ':')
-      Delimiter = std::string(1, Last);
-
     StringRef LHS, RHS;
-    if (Delimiter.empty())
-      LHS = Option;
-    else
+    char Last = CandidateName.back();
+    bool CandidateHasDelimiter = Last == '=' || Last == ':';
+    std::string NormalizedName = Option;
+    if (CandidateHasDelimiter) {
       std::tie(LHS, RHS) = Option.split(Last);
+      NormalizedName = LHS;
+      if (Option.find(Last) == LHS.size())
+        NormalizedName += Last;
+    }
 
-    std::string NormalizedName =
-        (LHS.drop_front(Prefix.size()) + Delimiter).str();
-    unsigned Distance =
-        CandidateName.edit_distance(NormalizedName, /*AllowReplacements=*/true,
-                                    /*MaxEditDistance=*/BestDistance);
-    if (Distance < BestDistance) {
-      BestDistance = Distance;
-      NearestString = (Prefix + CandidateName + RHS).str();
+    // Consider each possible prefix for each candidate to find the most
+    // appropriate one. For example, if a user asks for "--helm", suggest
+    // "--help" over "-help".
+    for (int P = 0;
+         const char *const CandidatePrefix = CandidateInfo.Prefixes[P]; P++) {
+      std::string Candidate = (CandidatePrefix + CandidateName).str();
+      StringRef CandidateRef = Candidate;
+      unsigned Distance =
+          CandidateRef.edit_distance(NormalizedName, /*AllowReplacements=*/true,
+                                     /*MaxEditDistance=*/BestDistance);
+      if (RHS.empty() && CandidateHasDelimiter) {
+        // The Candidate ends with a = or : delimiter, but the option passed in
+        // didn't contain the delimiter (or doesn't have anything after it).
+        // In that case, penalize the correction: `-nodefaultlibs` is more
+        // likely to be a spello for `-nodefaultlib` than `-nodefaultlib:` even
+        // though both have an unmodified editing distance of 1, since the
+        // latter would need an argument.
+        ++Distance;
+      }
+      if (Distance < BestDistance) {
+        BestDistance = Distance;
+        NearestString = (Candidate + RHS).str();
+      }
     }
   }
   return BestDistance;
diff --git a/lib/Option/Option.cpp b/lib/Option/Option.cpp
index f9d8a5e54043..9abc9fdce4c7 100644
--- a/lib/Option/Option.cpp
+++ b/lib/Option/Option.cpp
@@ -1,9 +1,8 @@
 //===- Option.cpp - Abstract Driver Options -------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -107,49 +106,23 @@ bool Option::matches(OptSpecifier Opt) const {
   return false;
 }
 
-Arg *Option::accept(const ArgList &Args,
-                    unsigned &Index,
-                    unsigned ArgSize) const {
-  const Option &UnaliasedOption = getUnaliasedOption();
-  StringRef Spelling;
-  // If the option was an alias, get the spelling from the unaliased one.
-  if (getID() == UnaliasedOption.getID()) {
-    Spelling = StringRef(Args.getArgString(Index), ArgSize);
-  } else {
-    Spelling = Args.MakeArgString(Twine(UnaliasedOption.getPrefix()) +
-                                  Twine(UnaliasedOption.getName()));
-  }
-
+Arg *Option::acceptInternal(const ArgList &Args, unsigned &Index,
+                            unsigned ArgSize) const {
+  StringRef Spelling = StringRef(Args.getArgString(Index), ArgSize);
   switch (getKind()) {
   case FlagClass: {
     if (ArgSize != strlen(Args.getArgString(Index)))
       return nullptr;
-
-    Arg *A = new Arg(UnaliasedOption, Spelling, Index++);
-    if (getAliasArgs()) {
-      const char *Val = getAliasArgs();
-      while (*Val != '\0') {
-        A->getValues().push_back(Val);
-
-        // Move past the '\0' to the next argument.
-        Val += strlen(Val) + 1;
-      }
-    }
-
-    if (UnaliasedOption.getKind() == JoinedClass && !getAliasArgs())
-      // A Flag alias for a Joined option must provide an argument.
-      A->getValues().push_back("");
-
-    return A;
+    return new Arg(*this, Spelling, Index++);
   }
   case JoinedClass: {
     const char *Value = Args.getArgString(Index) + ArgSize;
-    return new Arg(UnaliasedOption, Spelling, Index++, Value);
+    return new Arg(*this, Spelling, Index++, Value);
   }
   case CommaJoinedClass: {
     // Always matches.
     const char *Str = Args.getArgString(Index) + ArgSize;
-    Arg *A = new Arg(UnaliasedOption, Spelling, Index++);
+    Arg *A = new Arg(*this, Spelling, Index++);
 
     // Parse out the comma separated values.
     const char *Prev = Str;
@@ -185,8 +158,7 @@ Arg *Option::accept(const ArgList &Args,
         Args.getArgString(Index - 1) == nullptr)
       return nullptr;
 
-    return new Arg(UnaliasedOption, Spelling,
-                   Index - 2, Args.getArgString(Index - 1));
+    return new Arg(*this, Spelling, Index - 2, Args.getArgString(Index - 1));
   case MultiArgClass: {
     // Matches iff this is an exact match.
     // FIXME: Avoid strlen.
@@ -197,8 +169,8 @@ Arg *Option::accept(const ArgList &Args,
     if (Index > Args.getNumInputArgStrings())
       return nullptr;
 
-    Arg *A = new Arg(UnaliasedOption, Spelling, Index - 1 - getNumArgs(),
-                      Args.getArgString(Index - getNumArgs()));
+    Arg *A = new Arg(*this, Spelling, Index - 1 - getNumArgs(),
+                     Args.getArgString(Index - getNumArgs()));
     for (unsigned i = 1; i != getNumArgs(); ++i)
       A->getValues().push_back(Args.getArgString(Index - getNumArgs() + i));
     return A;
@@ -217,8 +189,7 @@ Arg *Option::accept(const ArgList &Args,
         Args.getArgString(Index - 1) == nullptr)
       return nullptr;
 
-    return new Arg(UnaliasedOption, Spelling,
-                   Index - 2, Args.getArgString(Index - 1));
+    return new Arg(*this, Spelling, Index - 2, Args.getArgString(Index - 1));
   }
   case JoinedAndSeparateClass:
     // Always matches.
@@ -227,7 +198,7 @@ Arg *Option::accept(const ArgList &Args,
         Args.getArgString(Index - 1) == nullptr)
       return nullptr;
 
-    return new Arg(UnaliasedOption, Spelling, Index - 2,
+    return new Arg(*this, Spelling, Index - 2,
                    Args.getArgString(Index - 2) + ArgSize,
                    Args.getArgString(Index - 1));
   case RemainingArgsClass: {
@@ -235,14 +206,14 @@ Arg *Option::accept(const ArgList &Args,
     // FIXME: Avoid strlen.
     if (ArgSize != strlen(Args.getArgString(Index)))
       return nullptr;
-    Arg *A = new Arg(UnaliasedOption, Spelling, Index++);
+    Arg *A = new Arg(*this, Spelling, Index++);
     while (Index < Args.getNumInputArgStrings() &&
            Args.getArgString(Index) != nullptr)
       A->getValues().push_back(Args.getArgString(Index++));
     return A;
   }
   case RemainingArgsJoinedClass: {
-    Arg *A = new Arg(UnaliasedOption, Spelling, Index);
+    Arg *A = new Arg(*this, Spelling, Index);
     if (ArgSize != strlen(Args.getArgString(Index))) {
       // An inexact match means there is a joined arg.
       A->getValues().push_back(Args.getArgString(Index) + ArgSize);
@@ -258,3 +229,62 @@ Arg *Option::accept(const ArgList &Args,
     llvm_unreachable("Invalid option kind!");
   }
 }
+
+Arg *Option::accept(const ArgList &Args,
+                    unsigned &Index,
+                    unsigned ArgSize) const {
+  std::unique_ptr<Arg> A(acceptInternal(Args, Index, ArgSize));
+  if (!A)
+    return nullptr;
+
+  const Option &UnaliasedOption = getUnaliasedOption();
+  if (getID() == UnaliasedOption.getID())
+    return A.release();
+
+  // "A" is an alias for a different flag. For most clients it's more convenient
+  // if this function returns unaliased Args, so create an unaliased arg for
+  // returning.
+
+  // This creates a completely new Arg object for the unaliased Arg because
+  // the alias and the unaliased arg can have different Kinds and different
+  // Values (due to AliasArgs<>).
+
+  // Get the spelling from the unaliased option.
+  StringRef UnaliasedSpelling = Args.MakeArgString(
+      Twine(UnaliasedOption.getPrefix()) + Twine(UnaliasedOption.getName()));
+
+  // It's a bit weird that aliased and unaliased arg share one index, but
+  // the index is mostly use as a memory optimization in render().
+  // Due to this, ArgList::getArgString(A->getIndex()) will return the spelling
+  // of the aliased arg always, while A->getSpelling() returns either the
+  // unaliased or the aliased arg, depending on which Arg object it's called on.
+  Arg *UnaliasedA = new Arg(UnaliasedOption, UnaliasedSpelling, A->getIndex());
+  Arg *RawA = A.get();
+  UnaliasedA->setAlias(std::move(A));
+
+  if (getKind() != FlagClass) {
+    // Values are usually owned by the ArgList. The exception are
+    // CommaJoined flags, where the Arg owns the values. For aliased flags,
+    // make the unaliased Arg the owner of the values.
+    // FIXME: There aren't many uses of CommaJoined -- try removing
+    // CommaJoined in favor of just calling StringRef::split(',') instead.
+    UnaliasedA->getValues() = RawA->getValues();
+    UnaliasedA->setOwnsValues(RawA->getOwnsValues());
+    RawA->setOwnsValues(false);
+    return UnaliasedA;
+  }
+
+  // FlagClass aliases can have AliasArgs<>; add those to the unaliased arg.
+  if (const char *Val = getAliasArgs()) {
+    while (*Val != '\0') {
+      UnaliasedA->getValues().push_back(Val);
+
+      // Move past the '\0' to the next argument.
+      Val += strlen(Val) + 1;
+    }
+  }
+  if (UnaliasedOption.getKind() == JoinedClass && !getAliasArgs())
+    // A Flag alias for a Joined option must provide an argument.
+    UnaliasedA->getValues().push_back("");
+  return UnaliasedA;
+}
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index 5ec94ea6f40a..e2b2a2b25268 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -1,9 +1,8 @@
 //===- Parsing, selection, and construction of pass pipelines -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -57,6 +56,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/SafepointIRVerifier.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormatVariadic.h"
@@ -65,6 +65,7 @@
 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
 #include "llvm/Transforms/IPO/AlwaysInliner.h"
 #include "llvm/Transforms/IPO/ArgumentPromotion.h"
+#include "llvm/Transforms/IPO/Attributor.h"
 #include "llvm/Transforms/IPO/CalledValuePropagation.h"
 #include "llvm/Transforms/IPO/ConstantMerge.h"
 #include "llvm/Transforms/IPO/CrossDSOCFI.h"
@@ -89,14 +90,18 @@
 #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
 #include "llvm/Transforms/InstCombine/InstCombine.h"
 #include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
 #include "llvm/Transforms/Instrumentation/BoundsChecking.h"
 #include "llvm/Transforms/Instrumentation/CGProfile.h"
 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
 #include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
+#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
+#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
-#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
+#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
+#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
 #include "llvm/Transforms/Scalar/ADCE.h"
 #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
 #include "llvm/Transforms/Scalar/BDCE.h"
@@ -120,6 +125,7 @@
 #include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
 #include "llvm/Transforms/Scalar/LoopDeletion.h"
 #include "llvm/Transforms/Scalar/LoopDistribute.h"
+#include "llvm/Transforms/Scalar/LoopFuse.h"
 #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
 #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
 #include "llvm/Transforms/Scalar/LoopLoadElimination.h"
@@ -134,9 +140,11 @@
 #include "llvm/Transforms/Scalar/LowerAtomic.h"
 #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
 #include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
+#include "llvm/Transforms/Scalar/LowerWidenableCondition.h"
 #include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
 #include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
 #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
+#include "llvm/Transforms/Scalar/MergeICmps.h"
 #include "llvm/Transforms/Scalar/NaryReassociate.h"
 #include "llvm/Transforms/Scalar/NewGVN.h"
 #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
@@ -181,10 +189,6 @@ static cl::opt<bool>
               cl::Hidden, cl::ZeroOrMore,
               cl::desc("Run NewGVN instead of GVN"));
 
-static cl::opt<bool> EnableEarlyCSEMemSSA(
-    "enable-npm-earlycse-memssa", cl::init(true), cl::Hidden,
-    cl::desc("Enable the EarlyCSE w/ MemorySSA pass for the new PM (default = on)"));
-
 static cl::opt<bool> EnableGVNHoist(
     "enable-npm-gvn-hoist", cl::init(false), cl::Hidden,
     cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
@@ -205,11 +209,26 @@ static cl::opt<bool> EnableSyntheticCounts(
 static Regex DefaultAliasRegex(
     "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
 
+// This option is used in simplifying testing SampleFDO optimizations for
+// profile loading.
 static cl::opt<bool>
     EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden,
               cl::desc("Enable control height reduction optimization (CHR)"));
 
+PipelineTuningOptions::PipelineTuningOptions() {
+  LoopInterleaving = EnableLoopInterleaving;
+  LoopVectorization = EnableLoopVectorization;
+  SLPVectorization = RunSLPVectorization;
+  LoopUnrolling = true;
+  ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
+  LicmMssaOptCap = SetLicmMssaOptCap;
+  LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
+}
+
 extern cl::opt<bool> EnableHotColdSplit;
+extern cl::opt<bool> EnableOrderFileInstrumentation;
+
+extern cl::opt<bool> FlattenedProfileUsed;
 
 static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) {
   switch (Level) {
@@ -371,7 +390,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
   FPM.addPass(SROA());
 
   // Catch trivial redundancies
-  FPM.addPass(EarlyCSEPass(EnableEarlyCSEMemSSA));
+  FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
 
   // Hoisting of scalars and load expressions.
   if (EnableGVNHoist)
@@ -401,7 +420,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
 
   // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
   // using the size value profile. Don't perform this when optimizing for size.
-  if (PGOOpt && !PGOOpt->ProfileUseFile.empty() &&
+  if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
       !isOptimizingForSize(Level))
     FPM.addPass(PGOMemOPSizeOpt());
 
@@ -432,7 +451,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
 
   // Rotate Loop - disable header duplication at -Oz
   LPM1.addPass(LoopRotatePass(Level != Oz));
-  LPM1.addPass(LICMPass());
+  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
   LPM1.addPass(SimpleLoopUnswitchPass());
   LPM2.addPass(IndVarSimplifyPass());
   LPM2.addPass(LoopIdiomRecognizePass());
@@ -444,9 +463,11 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
   // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
   // because it changes IR to makes profile annotation in back compile
   // inaccurate.
-  if (Phase != ThinLTOPhase::PreLink ||
-      !PGOOpt || PGOOpt->SampleProfileFile.empty())
-    LPM2.addPass(LoopFullUnrollPass(Level));
+  if ((Phase != ThinLTOPhase::PreLink || !PGOOpt ||
+       PGOOpt->Action != PGOOptions::SampleUse) &&
+      PTO.LoopUnrolling)
+    LPM2.addPass(
+        LoopFullUnrollPass(Level, false, PTO.ForgetAllSCEVInLoopUnroll));
 
   for (auto &C : LoopOptimizerEndEPCallbacks)
     C(LPM2, Level);
@@ -492,7 +513,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
   FPM.addPass(JumpThreadingPass());
   FPM.addPass(CorrelatedValuePropagationPass());
   FPM.addPass(DSEPass());
-  FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging));
+  FPM.addPass(createFunctionToLoopPassAdaptor(
+      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+      DebugLogging));
 
   for (auto &C : ScalarOptimizerLateEPCallbacks)
     C(FPM, Level);
@@ -505,7 +528,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
   invokePeepholeEPCallbacks(FPM, Level);
 
   if (EnableCHR && Level == O3 && PGOOpt &&
-      (!PGOOpt->ProfileUseFile.empty() || !PGOOpt->SampleProfileFile.empty()))
+      (PGOOpt->Action == PGOOptions::IRUse ||
+       PGOOpt->Action == PGOOptions::SampleUse))
     FPM.addPass(ControlHeightReductionPass());
 
   return FPM;
@@ -513,15 +537,15 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
 
 void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
                                     PassBuilder::OptimizationLevel Level,
-                                    bool RunProfileGen,
-                                    std::string ProfileGenFile,
-                                    std::string ProfileUseFile,
+                                    bool RunProfileGen, bool IsCS,
+                                    std::string ProfileFile,
                                     std::string ProfileRemappingFile) {
   // Generally running simplification passes and the inliner with an high
   // threshold results in smaller executables, but there may be cases where
   // the size grows, so let's be conservative here and skip this simplification
-  // at -Os/Oz.
-  if (!isOptimizingForSize(Level)) {
+  // at -Os/Oz. We will not do this  inline for context sensistive PGO (when
+  // IsCS is true).
+  if (!isOptimizingForSize(Level) && !IsCS) {
     InlineParams IP;
 
     // In the old pass manager, this is a cl::opt. Should still this be one?
@@ -554,7 +578,7 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
   MPM.addPass(GlobalDCEPass());
 
   if (RunProfileGen) {
-    MPM.addPass(PGOInstrumentationGen());
+    MPM.addPass(PGOInstrumentationGen(IsCS));
 
     FunctionPassManager FPM;
     FPM.addPass(
@@ -563,14 +587,17 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
 
     // Add the profile lowering pass.
     InstrProfOptions Options;
-    if (!ProfileGenFile.empty())
-      Options.InstrProfileOutput = ProfileGenFile;
+    if (!ProfileFile.empty())
+      Options.InstrProfileOutput = ProfileFile;
     Options.DoCounterPromotion = true;
-    MPM.addPass(InstrProfiling(Options));
+    Options.UseBFIInPromotion = IsCS;
+    MPM.addPass(InstrProfiling(Options, IsCS));
+  } else if (!ProfileFile.empty()) {
+    MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
+    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+    // RequireAnalysisPass for PSI before subsequent non-module passes.
+    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
   }
-
-  if (!ProfileUseFile.empty())
-    MPM.addPass(PGOInstrumentationUse(ProfileUseFile, ProfileRemappingFile));
 }
 
 static InlineParams
@@ -587,6 +614,32 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
                                                bool DebugLogging) {
   ModulePassManager MPM(DebugLogging);
 
+  bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
+
+  // In ThinLTO mode, when flattened profile is used, all the available
+  // profile information will be annotated in PreLink phase so there is
+  // no need to load the profile again in PostLink.
+  bool LoadSampleProfile =
+      HasSampleProfile &&
+      !(FlattenedProfileUsed && Phase == ThinLTOPhase::PostLink);
+
+  // During the ThinLTO backend phase we perform early indirect call promotion
+  // here, before globalopt. Otherwise imported available_externally functions
+  // look unreferenced and are removed. If we are going to load the sample
+  // profile then defer until later.
+  // TODO: See if we can move later and consolidate with the location where
+  // we perform ICP when we are loading a sample profile.
+  // TODO: We pass HasSampleProfile (whether there was a sample profile file
+  // passed to the compile) to the SamplePGO flag of ICP. This is used to
+  // determine whether the new direct calls are annotated with prof metadata.
+  // Ideally this should be determined from whether the IR is annotated with
+  // sample profile, and not whether the a sample profile was provided on the
+  // command line. E.g. for flattened profiles where we will not be reloading
+  // the sample profile in the ThinLTO backend, we ideally shouldn't have to
+  // provide the sample profile file.
+  if (Phase == ThinLTOPhase::PostLink && !LoadSampleProfile)
+    MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
+
   // Do basic inference of function attributes from known properties of system
   // libraries and other oracles.
   MPM.addPass(InferFunctionAttrsPass());
@@ -607,17 +660,19 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
   // More details about SamplePGO design can be found in:
   // https://research.google.com/pubs/pub45290.html
   // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
-  if (PGOOpt && !PGOOpt->SampleProfileFile.empty() &&
-      Phase == ThinLTOPhase::PostLink)
+  if (LoadSampleProfile)
     EarlyFPM.addPass(InstCombinePass());
   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
 
-  if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) {
+  if (LoadSampleProfile) {
     // Annotate sample profile right after early FPM to ensure freshness of
     // the debug info.
-    MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile,
+    MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
                                         PGOOpt->ProfileRemappingFile,
                                         Phase == ThinLTOPhase::PreLink));
+    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+    // RequireAnalysisPass for PSI before subsequent non-module passes.
+    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
     // Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard
     // for the profile annotation to be accurate in the ThinLTO backend.
     if (Phase != ThinLTOPhase::PreLink)
@@ -626,7 +681,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
       // imported available_externally functions look unreferenced and are
       // removed.
       MPM.addPass(PGOIndirectCallPromotion(Phase == ThinLTOPhase::PostLink,
-                                           true));
+                                           true /* SamplePGO */));
   }
 
   // Interprocedural constant propagation now that basic cleanup has occurred
@@ -664,12 +719,17 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
 
   // Add all the requested passes for instrumentation PGO, if requested.
   if (PGOOpt && Phase != ThinLTOPhase::PostLink &&
-      (!PGOOpt->ProfileGenFile.empty() || !PGOOpt->ProfileUseFile.empty())) {
-    addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen,
-                      PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile,
+      (PGOOpt->Action == PGOOptions::IRInstr ||
+       PGOOpt->Action == PGOOptions::IRUse)) {
+    addPGOInstrPasses(MPM, DebugLogging, Level,
+                      /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
+                      /* IsCS */ false, PGOOpt->ProfileFile,
                       PGOOpt->ProfileRemappingFile);
     MPM.addPass(PGOIndirectCallPromotion(false, false));
   }
+  if (PGOOpt && Phase != ThinLTOPhase::PostLink &&
+      PGOOpt->CSAction == PGOOptions::CSIRInstr)
+    MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
 
   // Synthesize function entry counts for non-PGO compilation.
   if (EnableSyntheticCounts && !PGOOpt)
@@ -700,8 +760,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
   // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
   // because it makes profile annotation in the backend inaccurate.
   InlineParams IP = getInlineParamsFromOptLevel(Level);
-  if (Phase == ThinLTOPhase::PreLink &&
-      PGOOpt && !PGOOpt->SampleProfileFile.empty())
+  if (Phase == ThinLTOPhase::PreLink && PGOOpt &&
+      PGOOpt->Action == PGOOptions::SampleUse)
     IP.HotCallSiteThreshold = 0;
   MainCGPipeline.addPass(InlinerPass(IP));
 
@@ -718,11 +778,6 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
   MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
       buildFunctionSimplificationPipeline(Level, Phase, DebugLogging)));
 
-  // We only want to do hot cold splitting once for ThinLTO, during the
-  // post-link ThinLTO.
-  if (EnableHotColdSplit && Phase != ThinLTOPhase::PreLink)
-    MPM.addPass(HotColdSplittingPass());
-
   for (auto &C : CGSCCOptimizerLateEPCallbacks)
     C(MainCGPipeline, Level);
 
@@ -738,9 +793,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
   return MPM;
 }
 
-ModulePassManager
-PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
-                                             bool DebugLogging) {
+ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
+    OptimizationLevel Level, bool DebugLogging, bool LTOPreLink) {
   ModulePassManager MPM(DebugLogging);
 
   // Optimize globals now that the module is fully simplified.
@@ -759,14 +813,34 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   // available externally globals. Eventually they will be suppressed during
   // codegen, but eliminating here enables more opportunity for GlobalDCE as it
   // may make globals referenced by available external functions dead and saves
-  // running remaining passes on the eliminated functions.
-  MPM.addPass(EliminateAvailableExternallyPass());
+  // running remaining passes on the eliminated functions. These should be
+  // preserved during prelinking for link-time inlining decisions.
+  if (!LTOPreLink)
+    MPM.addPass(EliminateAvailableExternallyPass());
+
+  if (EnableOrderFileInstrumentation)
+    MPM.addPass(InstrOrderFilePass());
 
   // Do RPO function attribute inference across the module to forward-propagate
   // attributes where applicable.
   // FIXME: Is this really an optimization rather than a canonicalization?
   MPM.addPass(ReversePostOrderFunctionAttrsPass());
 
+  // Do a post inline PGO instrumentation and use pass. This is a context
+  // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
+  // cross-module inline has not been done yet. The context sensitive
+  // instrumentation is after all the inlines are done.
+  if (!LTOPreLink && PGOOpt) {
+    if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
+      addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ true,
+                        /* IsCS */ true, PGOOpt->CSProfileGenFile,
+                        PGOOpt->ProfileRemappingFile);
+    else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
+      addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ false,
+                        /* IsCS */ true, PGOOpt->ProfileFile,
+                        PGOOpt->ProfileRemappingFile);
+  }
+
   // Re-require GloblasAA here prior to function passes. This is particularly
   // useful as the above will have inlined, DCE'ed, and function-attr
   // propagated everything. We should at this point have a reasonably minimal
@@ -799,7 +873,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   OptimizePM.addPass(LoopDistributePass());
 
   // Now run the core loop vectorizer.
-  OptimizePM.addPass(LoopVectorizePass());
+  OptimizePM.addPass(LoopVectorizePass(
+      LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
 
   // Eliminate loads by forwarding stores from the previous iteration to loads
   // of the current iteration.
@@ -824,7 +899,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
                                      sinkCommonInsts(true)));
 
   // Optimize parallel scalar instruction chains into SIMD instructions.
-  OptimizePM.addPass(SLPVectorizerPass());
+  if (PTO.SLPVectorization)
+    OptimizePM.addPass(SLPVectorizerPass());
 
   OptimizePM.addPass(InstCombinePass());
 
@@ -839,16 +915,26 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
     OptimizePM.addPass(
         createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level)));
   }
-  OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions(Level)));
+  if (PTO.LoopUnrolling)
+    OptimizePM.addPass(LoopUnrollPass(
+        LoopUnrollOptions(Level, false, PTO.ForgetAllSCEVInLoopUnroll)));
   OptimizePM.addPass(WarnMissedTransformationsPass());
   OptimizePM.addPass(InstCombinePass());
   OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
-  OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging));
+  OptimizePM.addPass(createFunctionToLoopPassAdaptor(
+      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+      DebugLogging));
 
   // Now that we've vectorized and unrolled loops, we may have more refined
   // alignment information, try to re-derive it here.
   OptimizePM.addPass(AlignmentFromAssumptionsPass());
 
+  // Split out cold code. Splitting is done late to avoid hiding context from
+  // other optimizations and inadvertently regressing performance. The tradeoff
+  // is that this has a higher code size cost than splitting early.
+  if (EnableHotColdSplit && !LTOPreLink)
+    MPM.addPass(HotColdSplittingPass());
+
   // LoopSink pass sinks instructions hoisted by LICM, which serves as a
   // canonicalization pass that enables other optimizations. As a result,
   // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
@@ -869,7 +955,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
 
   // Optimize PHIs by speculating around them when profitable. Note that this
   // pass needs to be run after any PRE or similar pass as it is essentially
-  // inserting redudnancies into the progrem. This even includes SimplifyCFG.
+  // inserting redundancies into the program. This even includes SimplifyCFG.
   OptimizePM.addPass(SpeculateAroundPHIsPass());
 
   for (auto &C : OptimizerLastEPCallbacks)
@@ -892,7 +978,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
 
 ModulePassManager
 PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
-                                           bool DebugLogging) {
+                                           bool DebugLogging, bool LTOPreLink) {
   assert(Level != O0 && "Must request optimizations for the default pipeline!");
 
   ModulePassManager MPM(DebugLogging);
@@ -912,7 +998,7 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
                                                 DebugLogging));
 
   // Now add the optimization pipeline.
-  MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging));
+  MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging, LTOPreLink));
 
   return MPM;
 }
@@ -974,22 +1060,19 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
     //
     // Also, WPD has access to more precise information than ICP and can
     // devirtualize more effectively, so it should operate on the IR first.
+    //
+    // The WPD and LowerTypeTest passes need to run at -O0 to lower type
+    // metadata and intrinsics.
     MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
     MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
   }
 
+  if (Level == O0)
+    return MPM;
+
   // Force any function attributes we want the rest of the pipeline to observe.
   MPM.addPass(ForceFunctionAttrsPass());
 
-  // During the ThinLTO backend phase we perform early indirect call promotion
-  // here, before globalopt. Otherwise imported available_externally functions
-  // look unreferenced and are removed.
-  // FIXME: move this into buildModuleSimplificationPipeline to merge the logic
-  //        with SamplePGO.
-  if (!PGOOpt || PGOOpt->SampleProfileFile.empty())
-    MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */,
-                                         false /* SamplePGO */));
-
   // Add the core simplification pipeline.
   MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::PostLink,
                                                 DebugLogging));
@@ -1005,20 +1088,31 @@ PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level,
                                             bool DebugLogging) {
   assert(Level != O0 && "Must request optimizations for the default pipeline!");
   // FIXME: We should use a customized pre-link pipeline!
-  return buildPerModuleDefaultPipeline(Level, DebugLogging);
+  return buildPerModuleDefaultPipeline(Level, DebugLogging,
+                                       /* LTOPreLink */true);
 }
 
 ModulePassManager
 PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
                                      ModuleSummaryIndex *ExportSummary) {
-  assert(Level != O0 && "Must request optimizations for the default pipeline!");
   ModulePassManager MPM(DebugLogging);
 
-  if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) {
+  if (Level == O0) {
+    // The WPD and LowerTypeTest passes need to run at -O0 to lower type
+    // metadata and intrinsics.
+    MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
+    MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+    return MPM;
+  }
+
+  if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
     // Load sample profile before running the LTO optimization pipeline.
-    MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile,
+    MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
                                         PGOOpt->ProfileRemappingFile,
                                         false /* ThinLTOPhase::PreLink */));
+    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+    // RequireAnalysisPass for PSI before subsequent non-module passes.
+    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
   }
 
   // Remove unused virtual tables to improve the quality of code generated by
@@ -1042,7 +1136,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
     // This two-step promotion is to save the compile time. For LTO, it should
     // produce the same result as if we only do promotion here.
     MPM.addPass(PGOIndirectCallPromotion(
-        true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty()));
+        true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
     // Propagate constants at call sites into the functions they call.  This
     // opens opportunities for globalopt (and inlining) by substituting function
     // pointers passed as arguments to direct uses of functions.
@@ -1062,7 +1156,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
   // FIXME: Is this really an optimization rather than a canonicalization?
   MPM.addPass(ReversePostOrderFunctionAttrsPass());
 
-  // Use inragne annotations on GEP indices to split globals where beneficial.
+  // Use in-range annotations on GEP indices to split globals where beneficial.
   MPM.addPass(GlobalSplitPass());
 
   // Run whole program optimization of virtual call when the list of callees
@@ -1124,9 +1218,26 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
 
   FPM.addPass(JumpThreadingPass());
 
+  // Do a post inline PGO instrumentation and use pass. This is a context
+  // sensitive PGO pass.
+  if (PGOOpt) {
+    if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
+      addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ true,
+                        /* IsCS */ true, PGOOpt->CSProfileGenFile,
+                        PGOOpt->ProfileRemappingFile);
+    else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
+      addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ false,
+                        /* IsCS */ true, PGOOpt->ProfileFile,
+                        PGOOpt->ProfileRemappingFile);
+  }
+
   // Break up allocas
   FPM.addPass(SROA());
 
+  // LTO provides additional opportunities for tailcall elimination due to
+  // link-time inlining, and visibility of nocapture attribute.
+  FPM.addPass(TailCallElimPass());
+
   // Run a few AA driver optimizations here and now to cleanup the code.
   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
 
@@ -1138,7 +1249,6 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
 
   // FIXME: once we fix LoopPass Manager, add LICM here.
   // FIXME: once we provide support for enabling MLSM, add it here.
-  // FIXME: once we provide support for enabling NewGVN, add it here.
   if (RunNewGVN)
     MainFPM.addPass(NewGVNPass());
   else
@@ -1151,7 +1261,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
   MainFPM.addPass(DSEPass());
 
   // FIXME: at this point, we run a bunch of loop passes:
-  // indVarSimplify, loopDeletion, loopInterchange, loopUnrool,
+  // indVarSimplify, loopDeletion, loopInterchange, loopUnroll,
   // loopVectorize. Enable them once the remaining issue with LPM
   // are sorted out.
 
@@ -1186,6 +1296,11 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
   // CFI is disabled.
   MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
 
+  // Enable splitting late in the FullLTO post-link pipeline. This is done in
+  // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
+  if (EnableHotColdSplit)
+    MPM.addPass(HotColdSplittingPass());
+
   // Add late LTO optimization passes.
   // Delete basic blocks, which optimization passes may have killed.
   MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass()));
@@ -1196,7 +1311,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
   // Now that we have optimized the program, discard unreachable functions.
   MPM.addPass(GlobalDCEPass());
 
-  // FIXME: Enable MergeFuncs, conditionally, after ported, maybe.
+  // FIXME: Maybe enable MergeFuncs conditionally after it's ported.
   return MPM;
 }
 
@@ -1326,6 +1441,107 @@ Expected<LoopUnrollOptions> parseLoopUnrollOptions(StringRef Params) {
   return UnrollOpts;
 }
 
+Expected<MemorySanitizerOptions> parseMSanPassOptions(StringRef Params) {
+  MemorySanitizerOptions Result;
+  while (!Params.empty()) {
+    StringRef ParamName;
+    std::tie(ParamName, Params) = Params.split(';');
+
+    if (ParamName == "recover") {
+      Result.Recover = true;
+    } else if (ParamName == "kernel") {
+      Result.Kernel = true;
+    } else if (ParamName.consume_front("track-origins=")) {
+      if (ParamName.getAsInteger(0, Result.TrackOrigins))
+        return make_error<StringError>(
+            formatv("invalid argument to MemorySanitizer pass track-origins "
+                    "parameter: '{0}' ",
+                    ParamName)
+                .str(),
+            inconvertibleErrorCode());
+    } else {
+      return make_error<StringError>(
+          formatv("invalid MemorySanitizer pass parameter '{0}' ", ParamName)
+              .str(),
+          inconvertibleErrorCode());
+    }
+  }
+  return Result;
+}
+
+/// Parser of parameters for SimplifyCFG pass.
+Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
+  SimplifyCFGOptions Result;
+  while (!Params.empty()) {
+    StringRef ParamName;
+    std::tie(ParamName, Params) = Params.split(';');
+
+    bool Enable = !ParamName.consume_front("no-");
+    if (ParamName == "forward-switch-cond") {
+      Result.forwardSwitchCondToPhi(Enable);
+    } else if (ParamName == "switch-to-lookup") {
+      Result.convertSwitchToLookupTable(Enable);
+    } else if (ParamName == "keep-loops") {
+      Result.needCanonicalLoops(Enable);
+    } else if (ParamName == "sink-common-insts") {
+      Result.sinkCommonInsts(Enable);
+    } else if (Enable && ParamName.consume_front("bonus-inst-threshold=")) {
+      APInt BonusInstThreshold;
+      if (ParamName.getAsInteger(0, BonusInstThreshold))
+        return make_error<StringError>(
+            formatv("invalid argument to SimplifyCFG pass bonus-threshold "
+                    "parameter: '{0}' ",
+                    ParamName).str(),
+            inconvertibleErrorCode());
+      Result.bonusInstThreshold(BonusInstThreshold.getSExtValue());
+    } else {
+      return make_error<StringError>(
+          formatv("invalid SimplifyCFG pass parameter '{0}' ", ParamName).str(),
+          inconvertibleErrorCode());
+    }
+  }
+  return Result;
+}
+
+/// Parser of parameters for LoopVectorize pass.
+Expected<LoopVectorizeOptions> parseLoopVectorizeOptions(StringRef Params) {
+  LoopVectorizeOptions Opts;
+  while (!Params.empty()) {
+    StringRef ParamName;
+    std::tie(ParamName, Params) = Params.split(';');
+
+    bool Enable = !ParamName.consume_front("no-");
+    if (ParamName == "interleave-forced-only") {
+      Opts.setInterleaveOnlyWhenForced(Enable);
+    } else if (ParamName == "vectorize-forced-only") {
+      Opts.setVectorizeOnlyWhenForced(Enable);
+    } else {
+      return make_error<StringError>(
+          formatv("invalid LoopVectorize parameter '{0}' ", ParamName).str(),
+          inconvertibleErrorCode());
+    }
+  }
+  return Opts;
+}
+
+Expected<bool> parseLoopUnswitchOptions(StringRef Params) {
+  bool Result = false;
+  while (!Params.empty()) {
+    StringRef ParamName;
+    std::tie(ParamName, Params) = Params.split(';');
+
+    bool Enable = !ParamName.consume_front("no-");
+    if (ParamName == "nontrivial") {
+      Result = Enable;
+    } else {
+      return make_error<StringError>(
+          formatv("invalid LoopUnswitch pass parameter '{0}' ", ParamName)
+              .str(),
+          inconvertibleErrorCode());
+    }
+  }
+  return Result;
+}
 } // namespace
 
 /// Tests whether a pass name starts with a valid prefix for a default pipeline
@@ -1447,6 +1663,9 @@ static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks) {
 #define LOOP_PASS(NAME, CREATE_PASS)                                           \
   if (Name == NAME)                                                            \
     return true;
+#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)                       \
+  if (checkParametrizedPassName(Name, NAME))                                   \
+    return true;
 #define LOOP_ANALYSIS(NAME, CREATE_PASS)                                       \
   if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">")           \
     return true;
@@ -1834,6 +2053,14 @@ Error PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
     LPM.addPass(CREATE_PASS);                                                  \
     return Error::success();                                                   \
   }
+#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)                       \
+  if (checkParametrizedPassName(Name, NAME)) {                                 \
+    auto Params = parsePassParameters(PARSER, Name, NAME);                     \
+    if (!Params)                                                               \
+      return Params.takeError();                                               \
+    LPM.addPass(CREATE_PASS(Params.get()));                                    \
+    return Error::success();                                                   \
+  }
 #define LOOP_ANALYSIS(NAME, CREATE_PASS)                                       \
   if (Name == "require<" NAME ">") {                                           \
     LPM.addPass(RequireAnalysisPass<                                           \
diff --git a/lib/Passes/PassPlugin.cpp b/lib/Passes/PassPlugin.cpp
index bf38fdb842e7..ceefa25a703b 100644
--- a/lib/Passes/PassPlugin.cpp
+++ b/lib/Passes/PassPlugin.cpp
@@ -1,9 +1,8 @@
 //===- lib/Passes/PassPluginLoader.cpp - Load Plugins for New PM Passes ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def
index 771d2f5b212a..347f75870eb3 100644
--- a/lib/Passes/PassRegistry.def
+++ b/lib/Passes/PassRegistry.def
@@ -1,9 +1,8 @@
 //===- PassRegistry.def - Registry of passes --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -28,6 +27,7 @@ MODULE_ANALYSIS("stack-safety", StackSafetyGlobalAnalysis())
 MODULE_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
 MODULE_ANALYSIS("verify", VerifierAnalysis())
 MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
+MODULE_ANALYSIS("asan-globals-md", ASanGlobalsMetadataAnalysis())
 
 #ifndef MODULE_ALIAS_ANALYSIS
 #define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS)                               \
@@ -41,6 +41,7 @@ MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA())
 #define MODULE_PASS(NAME, CREATE_PASS)
 #endif
 MODULE_PASS("always-inline", AlwaysInlinerPass())
+MODULE_PASS("attributor", AttributorPass())
 MODULE_PASS("called-value-propagation", CalledValuePropagationPass())
 MODULE_PASS("canonicalize-aliases", CanonicalizeAliasesPass())
 MODULE_PASS("cg-profile", CGProfilePass())
@@ -54,8 +55,11 @@ MODULE_PASS("globaldce", GlobalDCEPass())
 MODULE_PASS("globalopt", GlobalOptPass())
 MODULE_PASS("globalsplit", GlobalSplitPass())
 MODULE_PASS("hotcoldsplit", HotColdSplittingPass())
+MODULE_PASS("hwasan", HWAddressSanitizerPass(false, false))
+MODULE_PASS("khwasan", HWAddressSanitizerPass(true, true))
 MODULE_PASS("inferattrs", InferFunctionAttrsPass())
 MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass())
+MODULE_PASS("instrorderfile", InstrOrderFilePass())
 MODULE_PASS("instrprof", InstrProfiling())
 MODULE_PASS("internalize", InternalizePass())
 MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass())
@@ -82,6 +86,9 @@ MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass())
 MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation())
 MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr))
 MODULE_PASS("verify", VerifierPass())
+MODULE_PASS("asan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/false, false, true, false))
+MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, false, true, false))
+MODULE_PASS("poison-checking", PoisonCheckingPass())
 #undef MODULE_PASS
 
 #ifndef CGSCC_ANALYSIS
@@ -178,6 +185,7 @@ FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass())
 FUNCTION_PASS("loweratomic", LowerAtomicPass())
 FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
 FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())
+FUNCTION_PASS("lower-widenable-condition", LowerWidenableConditionPass())
 FUNCTION_PASS("guard-widening", GuardWideningPass())
 FUNCTION_PASS("gvn", GVN())
 FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass())
@@ -186,6 +194,7 @@ FUNCTION_PASS("loop-sink", LoopSinkPass())
 FUNCTION_PASS("lowerinvoke", LowerInvokePass())
 FUNCTION_PASS("mem2reg", PromotePass())
 FUNCTION_PASS("memcpyopt", MemCpyOptPass())
+FUNCTION_PASS("mergeicmps", MergeICmpsPass())
 FUNCTION_PASS("mldst-motion", MergedLoadStoreMotionPass())
 FUNCTION_PASS("nary-reassociate", NaryReassociatePass())
 FUNCTION_PASS("newgvn", NewGVNPass())
@@ -194,8 +203,8 @@ FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass())
 FUNCTION_PASS("lcssa", LCSSAPass())
 FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass())
 FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass())
+FUNCTION_PASS("loop-fuse", LoopFusePass())
 FUNCTION_PASS("loop-distribute", LoopDistributePass())
-FUNCTION_PASS("loop-vectorize", LoopVectorizePass())
 FUNCTION_PASS("pgo-memop-opt", PGOMemOPSizeOpt())
 FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
 FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
@@ -215,7 +224,6 @@ FUNCTION_PASS("print<stack-safety-local>", StackSafetyPrinterPass(dbgs()))
 FUNCTION_PASS("reassociate", ReassociatePass())
 FUNCTION_PASS("scalarizer", ScalarizerPass())
 FUNCTION_PASS("sccp", SCCPPass())
-FUNCTION_PASS("simplify-cfg", SimplifyCFGPass())
 FUNCTION_PASS("sink", SinkingPass())
 FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass())
 FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass())
@@ -228,10 +236,14 @@ FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass())
 FUNCTION_PASS("verify<loops>", LoopVerifierPass())
 FUNCTION_PASS("verify<memoryssa>", MemorySSAVerifierPass())
 FUNCTION_PASS("verify<regions>", RegionInfoVerifierPass())
+FUNCTION_PASS("verify<safepoint-ir>", SafepointIRVerifierPass())
 FUNCTION_PASS("view-cfg", CFGViewerPass())
 FUNCTION_PASS("view-cfg-only", CFGOnlyViewerPass())
 FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
-FUNCTION_PASS("msan", MemorySanitizerPass())
+FUNCTION_PASS("asan", AddressSanitizerPass(false, false, false))
+FUNCTION_PASS("kasan", AddressSanitizerPass(true, false, false))
+FUNCTION_PASS("msan", MemorySanitizerPass({}))
+FUNCTION_PASS("kmsan", MemorySanitizerPass({0, false, /*Kernel=*/true}))
 FUNCTION_PASS("tsan", ThreadSanitizerPass())
 #undef FUNCTION_PASS
 
@@ -239,8 +251,25 @@ FUNCTION_PASS("tsan", ThreadSanitizerPass())
 #define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
 #endif
 FUNCTION_PASS_WITH_PARAMS("unroll",
-			  [](LoopUnrollOptions Opts) { return LoopUnrollPass(Opts); },
-			  parseLoopUnrollOptions)
+                           [](LoopUnrollOptions Opts) {
+                             return LoopUnrollPass(Opts);
+                           },
+                           parseLoopUnrollOptions)
+FUNCTION_PASS_WITH_PARAMS("msan",
+                           [](MemorySanitizerOptions Opts) {
+                             return MemorySanitizerPass(Opts);
+                           },
+                           parseMSanPassOptions)
+FUNCTION_PASS_WITH_PARAMS("simplify-cfg",
+                           [](SimplifyCFGOptions Opts) {
+                             return SimplifyCFGPass(Opts);
+                           },
+                           parseSimplifyCFGOptions)
+FUNCTION_PASS_WITH_PARAMS("loop-vectorize",
+                           [](LoopVectorizeOptions Opts) {
+                             return LoopVectorizePass(Opts);
+                           },
+                           parseLoopVectorizeOptions)
 #undef FUNCTION_PASS_WITH_PARAMS
 
 #ifndef LOOP_ANALYSIS
@@ -269,8 +298,18 @@ LOOP_PASS("indvars", IndVarSimplifyPass())
 LOOP_PASS("irce", IRCEPass())
 LOOP_PASS("unroll-and-jam", LoopUnrollAndJamPass())
 LOOP_PASS("unroll-full", LoopFullUnrollPass())
-LOOP_PASS("unswitch", SimpleLoopUnswitchPass())
 LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs()))
 LOOP_PASS("print<ivusers>", IVUsersPrinterPass(dbgs()))
 LOOP_PASS("loop-predication", LoopPredicationPass())
+LOOP_PASS("guard-widening", GuardWideningPass())
 #undef LOOP_PASS
+
+#ifndef LOOP_PASS_WITH_PARAMS
+#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
+#endif
+LOOP_PASS_WITH_PARAMS("unswitch",
+                           [](bool NonTrivial) {
+                             return SimpleLoopUnswitchPass(NonTrivial);
+                           },
+                           parseLoopUnswitchOptions)
+#undef LOOP_PASS_WITH_PARAMS
diff --git a/lib/Passes/StandardInstrumentations.cpp b/lib/Passes/StandardInstrumentations.cpp
index a1dfc39d472c..5cf0ca8e28f6 100644
--- a/lib/Passes/StandardInstrumentations.cpp
+++ b/lib/Passes/StandardInstrumentations.cpp
@@ -1,9 +1,8 @@
 //===- Standard pass instrumentations handling ----------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/ProfileData/Coverage/CoverageMapping.cpp b/lib/ProfileData/Coverage/CoverageMapping.cpp
index b2dde3406a63..afd6618e7cb3 100644
--- a/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -1,9 +1,8 @@
 //===- CoverageMapping.cpp - Code coverage mapping support ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -286,11 +285,14 @@ CoverageMapping::load(ArrayRef<StringRef> ObjectFilenames,
     if (std::error_code EC = CovMappingBufOrErr.getError())
       return errorCodeToError(EC);
     StringRef Arch = Arches.empty() ? StringRef() : Arches[File.index()];
-    auto CoverageReaderOrErr =
-        BinaryCoverageReader::create(CovMappingBufOrErr.get(), Arch);
-    if (Error E = CoverageReaderOrErr.takeError())
+    MemoryBufferRef CovMappingBufRef =
+        CovMappingBufOrErr.get()->getMemBufferRef();
+    auto CoverageReadersOrErr =
+        BinaryCoverageReader::create(CovMappingBufRef, Arch, Buffers);
+    if (Error E = CoverageReadersOrErr.takeError())
       return std::move(E);
-    Readers.push_back(std::move(CoverageReaderOrErr.get()));
+    for (auto &Reader : CoverageReadersOrErr.get())
+      Readers.push_back(std::move(Reader));
     Buffers.push_back(std::move(CovMappingBufOrErr.get()));
   }
   return load(Readers, *ProfileReader);
diff --git a/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/lib/ProfileData/Coverage/CoverageMappingReader.cpp
index ee48256bc2e5..e193e10f91d9 100644
--- a/lib/ProfileData/Coverage/CoverageMappingReader.cpp
+++ b/lib/ProfileData/Coverage/CoverageMappingReader.cpp
@@ -1,9 +1,8 @@
 //===- CoverageMappingReader.cpp - Code coverage mapping reader -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -23,6 +22,7 @@
 #include "llvm/Object/Error.h"
 #include "llvm/Object/MachOUniversal.h"
 #include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/COFF.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
@@ -59,7 +59,7 @@ Error RawCoverageReader::readULEB128(uint64_t &Result) {
   if (Data.empty())
     return make_error<CoverageMapError>(coveragemap_error::truncated);
   unsigned N = 0;
-  Result = decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
+  Result = decodeULEB128(Data.bytes_begin(), &N);
   if (N > Data.size())
     return make_error<CoverageMapError>(coveragemap_error::malformed);
   Data = Data.substr(N);
@@ -348,9 +348,18 @@ Expected<bool> RawCoverageMappingDummyChecker::isDummy() {
 }
 
 Error InstrProfSymtab::create(SectionRef &Section) {
-  if (auto EC = Section.getContents(Data))
-    return errorCodeToError(EC);
+  Expected<StringRef> DataOrErr = Section.getContents();
+  if (!DataOrErr)
+    return DataOrErr.takeError();
+  Data = *DataOrErr;
   Address = Section.getAddress();
+
+  // If this is a linked PE/COFF file, then we have to skip over the null byte
+  // that is allocated in the .lprfn$A section in the LLVM profiling runtime.
+  const ObjectFile *Obj = Section.getObject();
+  if (isa<COFFObjectFile>(Obj) && !Obj->isRelocatableObject())
+    Data = Data.drop_front(1);
+
   return Error::success();
 }
 
@@ -577,35 +586,65 @@ static Error readCoverageMappingData(
 
 static const char *TestingFormatMagic = "llvmcovmtestdata";
 
-static Error loadTestingFormat(StringRef Data, InstrProfSymtab &ProfileNames,
-                               StringRef &CoverageMapping,
-                               uint8_t &BytesInAddress,
-                               support::endianness &Endian) {
-  BytesInAddress = 8;
-  Endian = support::endianness::little;
+Expected<std::unique_ptr<BinaryCoverageReader>>
+BinaryCoverageReader::createCoverageReaderFromBuffer(
+    StringRef Coverage, InstrProfSymtab &&ProfileNames, uint8_t BytesInAddress,
+    support::endianness Endian) {
+  std::unique_ptr<BinaryCoverageReader> Reader(new BinaryCoverageReader());
+  Reader->ProfileNames = std::move(ProfileNames);
+  if (BytesInAddress == 4 && Endian == support::endianness::little) {
+    if (Error E =
+            readCoverageMappingData<uint32_t, support::endianness::little>(
+                Reader->ProfileNames, Coverage, Reader->MappingRecords,
+                Reader->Filenames))
+      return std::move(E);
+  } else if (BytesInAddress == 4 && Endian == support::endianness::big) {
+    if (Error E = readCoverageMappingData<uint32_t, support::endianness::big>(
+            Reader->ProfileNames, Coverage, Reader->MappingRecords,
+            Reader->Filenames))
+      return std::move(E);
+  } else if (BytesInAddress == 8 && Endian == support::endianness::little) {
+    if (Error E =
+            readCoverageMappingData<uint64_t, support::endianness::little>(
+                Reader->ProfileNames, Coverage, Reader->MappingRecords,
+                Reader->Filenames))
+      return std::move(E);
+  } else if (BytesInAddress == 8 && Endian == support::endianness::big) {
+    if (Error E = readCoverageMappingData<uint64_t, support::endianness::big>(
+            Reader->ProfileNames, Coverage, Reader->MappingRecords,
+            Reader->Filenames))
+      return std::move(E);
+  } else
+    return make_error<CoverageMapError>(coveragemap_error::malformed);
+  return std::move(Reader);
+}
+
+static Expected<std::unique_ptr<BinaryCoverageReader>>
+loadTestingFormat(StringRef Data) {
+  uint8_t BytesInAddress = 8;
+  support::endianness Endian = support::endianness::little;
 
   Data = Data.substr(StringRef(TestingFormatMagic).size());
   if (Data.empty())
     return make_error<CoverageMapError>(coveragemap_error::truncated);
   unsigned N = 0;
-  auto ProfileNamesSize =
-      decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
+  uint64_t ProfileNamesSize = decodeULEB128(Data.bytes_begin(), &N);
   if (N > Data.size())
     return make_error<CoverageMapError>(coveragemap_error::malformed);
   Data = Data.substr(N);
   if (Data.empty())
     return make_error<CoverageMapError>(coveragemap_error::truncated);
   N = 0;
-  uint64_t Address =
-      decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
+  uint64_t Address = decodeULEB128(Data.bytes_begin(), &N);
   if (N > Data.size())
     return make_error<CoverageMapError>(coveragemap_error::malformed);
   Data = Data.substr(N);
   if (Data.size() < ProfileNamesSize)
     return make_error<CoverageMapError>(coveragemap_error::malformed);
+  InstrProfSymtab ProfileNames;
   if (Error E = ProfileNames.create(Data.substr(0, ProfileNamesSize), Address))
-    return E;
-  CoverageMapping = Data.substr(ProfileNamesSize);
+    return std::move(E);
+  StringRef CoverageMapping = Data.substr(ProfileNamesSize);
   // Skip the padding bytes because coverage map data has an alignment of 8.
   if (CoverageMapping.empty())
     return make_error<CoverageMapError>(coveragemap_error::truncated);
@@ -613,29 +652,32 @@ static Error loadTestingFormat(StringRef Data, InstrProfSymtab &ProfileNames,
   if (CoverageMapping.size() < Pad)
     return make_error<CoverageMapError>(coveragemap_error::malformed);
   CoverageMapping = CoverageMapping.substr(Pad);
-  return Error::success();
+  return BinaryCoverageReader::createCoverageReaderFromBuffer(
+      CoverageMapping, std::move(ProfileNames), BytesInAddress, Endian);
 }
 
 static Expected<SectionRef> lookupSection(ObjectFile &OF, StringRef Name) {
+  // On COFF, the object file section name may end in "$M". This tells the
+  // linker to sort these sections between "$A" and "$Z". The linker removes the
+  // dollar and everything after it in the final binary. Do the same to match.
+  bool IsCOFF = isa<COFFObjectFile>(OF);
+  auto stripSuffix = [IsCOFF](StringRef N) {
+    return IsCOFF ? N.split('$').first : N;
+  };
+  Name = stripSuffix(Name);
+
   StringRef FoundName;
   for (const auto &Section : OF.sections()) {
     if (auto EC = Section.getName(FoundName))
       return errorCodeToError(EC);
-    if (FoundName == Name)
+    if (stripSuffix(FoundName) == Name)
       return Section;
   }
   return make_error<CoverageMapError>(coveragemap_error::no_data_found);
 }
 
-static Error loadBinaryFormat(MemoryBufferRef ObjectBuffer,
-                              InstrProfSymtab &ProfileNames,
-                              StringRef &CoverageMapping,
-                              uint8_t &BytesInAddress,
-                              support::endianness &Endian, StringRef Arch) {
-  auto BinOrErr = createBinary(ObjectBuffer);
-  if (!BinOrErr)
-    return BinOrErr.takeError();
-  auto Bin = std::move(BinOrErr.get());
+static Expected<std::unique_ptr<BinaryCoverageReader>>
+loadBinaryFormat(std::unique_ptr<Binary> Bin, StringRef Arch) {
   std::unique_ptr<ObjectFile> OF;
   if (auto *Universal = dyn_cast<MachOUniversalBinary>(Bin.get())) {
     // If we have a universal binary, try to look up the object for the
@@ -655,9 +697,10 @@ static Error loadBinaryFormat(MemoryBufferRef ObjectBuffer,
     return make_error<CoverageMapError>(coveragemap_error::malformed);
 
   // The coverage uses native pointer sizes for the object it's written in.
-  BytesInAddress = OF->getBytesInAddress();
-  Endian = OF->isLittleEndian() ? support::endianness::little
-                                : support::endianness::big;
+  uint8_t BytesInAddress = OF->getBytesInAddress();
+  support::endianness Endian = OF->isLittleEndian()
+                                   ? support::endianness::little
+                                   : support::endianness::big;
 
   // Look for the sections that we are interested in.
   auto ObjFormat = OF->getTripleObjectFormat();
@@ -665,63 +708,101 @@ static Error loadBinaryFormat(MemoryBufferRef ObjectBuffer,
       lookupSection(*OF, getInstrProfSectionName(IPSK_name, ObjFormat,
                                                  /*AddSegmentInfo=*/false));
   if (auto E = NamesSection.takeError())
-    return E;
+    return std::move(E);
   auto CoverageSection =
       lookupSection(*OF, getInstrProfSectionName(IPSK_covmap, ObjFormat,
                                                  /*AddSegmentInfo=*/false));
   if (auto E = CoverageSection.takeError())
-    return E;
+    return std::move(E);
 
   // Get the contents of the given sections.
-  if (auto EC = CoverageSection->getContents(CoverageMapping))
-    return errorCodeToError(EC);
+  auto CoverageMappingOrErr = CoverageSection->getContents();
+  if (!CoverageMappingOrErr)
+    return CoverageMappingOrErr.takeError();
+
+  InstrProfSymtab ProfileNames;
   if (Error E = ProfileNames.create(*NamesSection))
-    return E;
+    return std::move(E);
 
-  return Error::success();
+  return BinaryCoverageReader::createCoverageReaderFromBuffer(
+      CoverageMappingOrErr.get(), std::move(ProfileNames), BytesInAddress,
+      Endian);
 }
 
-Expected<std::unique_ptr<BinaryCoverageReader>>
-BinaryCoverageReader::create(std::unique_ptr<MemoryBuffer> &ObjectBuffer,
-                             StringRef Arch) {
-  std::unique_ptr<BinaryCoverageReader> Reader(new BinaryCoverageReader());
+Expected<std::vector<std::unique_ptr<BinaryCoverageReader>>>
+BinaryCoverageReader::create(
+    MemoryBufferRef ObjectBuffer, StringRef Arch,
+    SmallVectorImpl<std::unique_ptr<MemoryBuffer>> &ObjectFileBuffers) {
+  std::vector<std::unique_ptr<BinaryCoverageReader>> Readers;
 
-  StringRef Coverage;
-  uint8_t BytesInAddress;
-  support::endianness Endian;
-  Error E = Error::success();
-  consumeError(std::move(E));
-  if (ObjectBuffer->getBuffer().startswith(TestingFormatMagic))
+  if (ObjectBuffer.getBuffer().startswith(TestingFormatMagic)) {
     // This is a special format used for testing.
-    E = loadTestingFormat(ObjectBuffer->getBuffer(), Reader->ProfileNames,
-                          Coverage, BytesInAddress, Endian);
-  else
-    E = loadBinaryFormat(ObjectBuffer->getMemBufferRef(), Reader->ProfileNames,
-                         Coverage, BytesInAddress, Endian, Arch);
-  if (E)
-    return std::move(E);
+    auto ReaderOrErr = loadTestingFormat(ObjectBuffer.getBuffer());
+    if (!ReaderOrErr)
+      return ReaderOrErr.takeError();
+    Readers.push_back(std::move(ReaderOrErr.get()));
+    return std::move(Readers);
+  }
 
-  if (BytesInAddress == 4 && Endian == support::endianness::little)
-    E = readCoverageMappingData<uint32_t, support::endianness::little>(
-        Reader->ProfileNames, Coverage, Reader->MappingRecords,
-        Reader->Filenames);
-  else if (BytesInAddress == 4 && Endian == support::endianness::big)
-    E = readCoverageMappingData<uint32_t, support::endianness::big>(
-        Reader->ProfileNames, Coverage, Reader->MappingRecords,
-        Reader->Filenames);
-  else if (BytesInAddress == 8 && Endian == support::endianness::little)
-    E = readCoverageMappingData<uint64_t, support::endianness::little>(
-        Reader->ProfileNames, Coverage, Reader->MappingRecords,
-        Reader->Filenames);
-  else if (BytesInAddress == 8 && Endian == support::endianness::big)
-    E = readCoverageMappingData<uint64_t, support::endianness::big>(
-        Reader->ProfileNames, Coverage, Reader->MappingRecords,
-        Reader->Filenames);
-  else
-    return make_error<CoverageMapError>(coveragemap_error::malformed);
-  if (E)
-    return std::move(E);
-  return std::move(Reader);
+  auto BinOrErr = createBinary(ObjectBuffer);
+  if (!BinOrErr)
+    return BinOrErr.takeError();
+  std::unique_ptr<Binary> Bin = std::move(BinOrErr.get());
+
+  // MachO universal binaries which contain archives need to be treated as
+  // archives, not as regular binaries.
+  if (auto *Universal = dyn_cast<MachOUniversalBinary>(Bin.get())) {
+    for (auto &ObjForArch : Universal->objects()) {
+      // Skip slices within the universal binary which target the wrong arch.
+      std::string ObjArch = ObjForArch.getArchFlagName();
+      if (Arch != ObjArch)
+        continue;
+
+      auto ArchiveOrErr = ObjForArch.getAsArchive();
+      if (!ArchiveOrErr) {
+        // If this is not an archive, try treating it as a regular object.
+        consumeError(ArchiveOrErr.takeError());
+        break;
+      }
+
+      return BinaryCoverageReader::create(
+          ArchiveOrErr.get()->getMemoryBufferRef(), Arch, ObjectFileBuffers);
+    }
+  }
+
+  // Load coverage out of archive members.
+  if (auto *Ar = dyn_cast<Archive>(Bin.get())) {
+    Error Err = Error::success();
+    for (auto &Child : Ar->children(Err)) {
+      Expected<MemoryBufferRef> ChildBufOrErr = Child.getMemoryBufferRef();
+      if (!ChildBufOrErr)
+        return ChildBufOrErr.takeError();
+
+      auto ChildReadersOrErr = BinaryCoverageReader::create(
+          ChildBufOrErr.get(), Arch, ObjectFileBuffers);
+      if (!ChildReadersOrErr)
+        return ChildReadersOrErr.takeError();
+      for (auto &Reader : ChildReadersOrErr.get())
+        Readers.push_back(std::move(Reader));
+    }
+    if (Err)
+      return std::move(Err);
+
+    // Thin archives reference object files outside of the archive file, i.e.
+    // files which reside in memory not owned by the caller. Transfer ownership
+    // to the caller.
+    if (Ar->isThin())
+      for (auto &Buffer : Ar->takeThinBuffers())
+        ObjectFileBuffers.push_back(std::move(Buffer));
+
+    return std::move(Readers);
+  }
+
+  auto ReaderOrErr = loadBinaryFormat(std::move(Bin), Arch);
+  if (!ReaderOrErr)
+    return ReaderOrErr.takeError();
+  Readers.push_back(std::move(ReaderOrErr.get()));
+  return std::move(Readers);
 }
 
 Error BinaryCoverageReader::readNextRecord(CoverageMappingRecord &Record) {
diff --git a/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
index bb3f4f854e04..432b20f217ca 100644
--- a/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
+++ b/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
@@ -1,9 +1,8 @@
 //===- CoverageMappingWriter.cpp - Code coverage mapping writer -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -125,15 +124,14 @@ void CoverageMappingWriter::write(raw_ostream &OS) {
 
   // Sort the regions in an ascending order by the file id and the starting
   // location. Sort by region kinds to ensure stable order for tests.
-  std::stable_sort(
-      MappingRegions.begin(), MappingRegions.end(),
-      [](const CounterMappingRegion &LHS, const CounterMappingRegion &RHS) {
-        if (LHS.FileID != RHS.FileID)
-          return LHS.FileID < RHS.FileID;
-        if (LHS.startLoc() != RHS.startLoc())
-          return LHS.startLoc() < RHS.startLoc();
-        return LHS.Kind < RHS.Kind;
-      });
+  llvm::stable_sort(MappingRegions, [](const CounterMappingRegion &LHS,
+                                       const CounterMappingRegion &RHS) {
+    if (LHS.FileID != RHS.FileID)
+      return LHS.FileID < RHS.FileID;
+    if (LHS.startLoc() != RHS.startLoc())
+      return LHS.startLoc() < RHS.startLoc();
+    return LHS.Kind < RHS.Kind;
+  });
 
   // Write out the fileid -> filename mapping.
   encodeULEB128(VirtualFileMapping.size(), OS);
diff --git a/lib/ProfileData/GCOV.cpp b/lib/ProfileData/GCOV.cpp
index b687346a2c05..fa4e433d7aa6 100644
--- a/lib/ProfileData/GCOV.cpp
+++ b/lib/ProfileData/GCOV.cpp
@@ -1,9 +1,8 @@
 //===- GCOV.cpp - LLVM coverage tool --------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,6 +18,7 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/MD5.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <system_error>
@@ -396,10 +396,10 @@ void GCOVBlock::addCount(size_t DstEdgeNo, uint64_t N) {
 /// sortDstEdges - Sort destination edges by block number, nop if already
 /// sorted. This is required for printing branch info in the correct order.
 void GCOVBlock::sortDstEdges() {
-  if (!DstEdgesAreSorted) {
-    SortDstEdgesFunctor SortEdges;
-    std::stable_sort(DstEdges.begin(), DstEdges.end(), SortEdges);
-  }
+  if (!DstEdgesAreSorted)
+    llvm::stable_sort(DstEdges, [](const GCOVEdge *E1, const GCOVEdge *E2) {
+      return E1->Dst.Number < E2->Dst.Number;
+    });
 }
 
 /// collectLineCounts - Collect line counts. This must be used after
@@ -687,7 +687,15 @@ std::string FileInfo::getCoveragePath(StringRef Filename,
   if (Options.LongFileNames && !Filename.equals(MainFilename))
     CoveragePath =
         mangleCoveragePath(MainFilename, Options.PreservePaths) + "##";
-  CoveragePath += mangleCoveragePath(Filename, Options.PreservePaths) + ".gcov";
+  CoveragePath += mangleCoveragePath(Filename, Options.PreservePaths);
+  if (Options.HashFilenames) {
+    MD5 Hasher;
+    MD5::MD5Result Result;
+    Hasher.update(Filename.str());
+    Hasher.final(Result);
+    CoveragePath += "##" + Result.digest().str().str();
+  }
+  CoveragePath += ".gcov";
   return CoveragePath;
 }
 
diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
index aaa8000ff2f9..510fd9887d9a 100644
--- a/lib/ProfileData/InstrProf.cpp
+++ b/lib/ProfileData/InstrProf.cpp
@@ -1,9 +1,8 @@
 //===- InstrProf.cpp - Instrumented profiling format support --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -30,6 +29,7 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
+#include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
@@ -363,16 +363,15 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
 
 uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address) {
   finalizeSymtab();
-  auto Result =
-      std::lower_bound(AddrToMD5Map.begin(), AddrToMD5Map.end(), Address,
-                       [](const std::pair<uint64_t, uint64_t> &LHS,
-                          uint64_t RHS) { return LHS.first < RHS; });
+  auto It = partition_point(AddrToMD5Map, [=](std::pair<uint64_t, uint64_t> A) {
+    return A.first < Address;
+  });
   // Raw function pointer collected by value profiler may be from
   // external functions that are not instrumented. They won't have
   // mapping data to be used by the deserializer. Force the value to
   // be 0 in this case.
-  if (Result != AddrToMD5Map.end() && Result->first == Address)
-    return (uint64_t)Result->second;
+  if (It != AddrToMD5Map.end() && It->first == Address)
+    return (uint64_t)It->second;
   return 0;
 }
 
@@ -435,9 +434,8 @@ Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars,
 }
 
 Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
-  const uint8_t *P = reinterpret_cast<const uint8_t *>(NameStrings.data());
-  const uint8_t *EndP = reinterpret_cast<const uint8_t *>(NameStrings.data() +
-                                                          NameStrings.size());
+  const uint8_t *P = NameStrings.bytes_begin();
+  const uint8_t *EndP = NameStrings.bytes_end();
   while (P < EndP) {
     uint32_t N;
     uint64_t UncompressedSize = decodeULEB128(P, &N);
@@ -480,6 +478,126 @@ Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
   return Error::success();
 }
 
+void InstrProfRecord::accumuateCounts(CountSumOrPercent &Sum) const {
+  uint64_t FuncSum = 0;
+  Sum.NumEntries += Counts.size();
+  for (size_t F = 0, E = Counts.size(); F < E; ++F)
+    FuncSum += Counts[F];
+  Sum.CountSum += FuncSum;
+
+  for (uint32_t VK = IPVK_First; VK <= IPVK_Last; ++VK) {
+    uint64_t KindSum = 0;
+    uint32_t NumValueSites = getNumValueSites(VK);
+    for (size_t I = 0; I < NumValueSites; ++I) {
+      uint32_t NV = getNumValueDataForSite(VK, I);
+      std::unique_ptr<InstrProfValueData[]> VD = getValueForSite(VK, I);
+      for (uint32_t V = 0; V < NV; V++)
+        KindSum += VD[V].Count;
+    }
+    Sum.ValueCounts[VK] += KindSum;
+  }
+}
+
+void InstrProfValueSiteRecord::overlap(InstrProfValueSiteRecord &Input,
+                                       uint32_t ValueKind,
+                                       OverlapStats &Overlap,
+                                       OverlapStats &FuncLevelOverlap) {
+  this->sortByTargetValues();
+  Input.sortByTargetValues();
+  double Score = 0.0f, FuncLevelScore = 0.0f;
+  auto I = ValueData.begin();
+  auto IE = ValueData.end();
+  auto J = Input.ValueData.begin();
+  auto JE = Input.ValueData.end();
+  while (I != IE && J != JE) {
+    if (I->Value == J->Value) {
+      Score += OverlapStats::score(I->Count, J->Count,
+                                   Overlap.Base.ValueCounts[ValueKind],
+                                   Overlap.Test.ValueCounts[ValueKind]);
+      FuncLevelScore += OverlapStats::score(
+          I->Count, J->Count, FuncLevelOverlap.Base.ValueCounts[ValueKind],
+          FuncLevelOverlap.Test.ValueCounts[ValueKind]);
+      ++I;
+    } else if (I->Value < J->Value) {
+      ++I;
+      continue;
+    }
+    ++J;
+  }
+  Overlap.Overlap.ValueCounts[ValueKind] += Score;
+  FuncLevelOverlap.Overlap.ValueCounts[ValueKind] += FuncLevelScore;
+}
+
+// Return false on mismatch.
+void InstrProfRecord::overlapValueProfData(uint32_t ValueKind,
+                                           InstrProfRecord &Other,
+                                           OverlapStats &Overlap,
+                                           OverlapStats &FuncLevelOverlap) {
+  uint32_t ThisNumValueSites = getNumValueSites(ValueKind);
+  assert(ThisNumValueSites == Other.getNumValueSites(ValueKind));
+  if (!ThisNumValueSites)
+    return;
+
+  std::vector<InstrProfValueSiteRecord> &ThisSiteRecords =
+      getOrCreateValueSitesForKind(ValueKind);
+  MutableArrayRef<InstrProfValueSiteRecord> OtherSiteRecords =
+      Other.getValueSitesForKind(ValueKind);
+  for (uint32_t I = 0; I < ThisNumValueSites; I++)
+    ThisSiteRecords[I].overlap(OtherSiteRecords[I], ValueKind, Overlap,
+                               FuncLevelOverlap);
+}
+
+void InstrProfRecord::overlap(InstrProfRecord &Other, OverlapStats &Overlap,
+                              OverlapStats &FuncLevelOverlap,
+                              uint64_t ValueCutoff) {
+  // FuncLevel CountSum for other should already computed and nonzero.
+  assert(FuncLevelOverlap.Test.CountSum >= 1.0f);
+  accumuateCounts(FuncLevelOverlap.Base);
+  bool Mismatch = (Counts.size() != Other.Counts.size());
+
+  // Check if the value profiles mismatch.
+  if (!Mismatch) {
+    for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
+      uint32_t ThisNumValueSites = getNumValueSites(Kind);
+      uint32_t OtherNumValueSites = Other.getNumValueSites(Kind);
+      if (ThisNumValueSites != OtherNumValueSites) {
+        Mismatch = true;
+        break;
+      }
+    }
+  }
+  if (Mismatch) {
+    Overlap.addOneMismatch(FuncLevelOverlap.Test);
+    return;
+  }
+
+  // Compute overlap for value counts.
+  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+    overlapValueProfData(Kind, Other, Overlap, FuncLevelOverlap);
+
+  double Score = 0.0;
+  uint64_t MaxCount = 0;
+  // Compute overlap for edge counts.
+  for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) {
+    Score += OverlapStats::score(Counts[I], Other.Counts[I],
+                                 Overlap.Base.CountSum, Overlap.Test.CountSum);
+    MaxCount = std::max(Other.Counts[I], MaxCount);
+  }
+  Overlap.Overlap.CountSum += Score;
+  Overlap.Overlap.NumEntries += 1;
+
+  if (MaxCount >= ValueCutoff) {
+    double FuncScore = 0.0;
+    for (size_t I = 0, E = Other.Counts.size(); I < E; ++I)
+      FuncScore += OverlapStats::score(Counts[I], Other.Counts[I],
+                                       FuncLevelOverlap.Base.CountSum,
+                                       FuncLevelOverlap.Test.CountSum);
+    FuncLevelOverlap.Overlap.CountSum = FuncScore;
+    FuncLevelOverlap.Overlap.NumEntries = Other.Counts.size();
+    FuncLevelOverlap.Valid = true;
+  }
+}
+
 void InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord &Input,
                                      uint64_t Weight,
                                      function_ref<void(instrprof_error)> Warn) {
@@ -1012,4 +1130,153 @@ void getMemOPSizeRangeFromOption(StringRef MemOPSizeRange, int64_t &RangeStart,
   assert(RangeLast >= RangeStart);
 }
 
+// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
+// aware this is an ir_level profile so it can set the version flag.
+void createIRLevelProfileFlagVar(Module &M, bool IsCS) {
+  const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
+  Type *IntTy64 = Type::getInt64Ty(M.getContext());
+  uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
+  if (IsCS)
+    ProfileVersion |= VARIANT_MASK_CSIR_PROF;
+  auto IRLevelVersionVariable = new GlobalVariable(
+      M, IntTy64, true, GlobalValue::WeakAnyLinkage,
+      Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
+  IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility);
+  Triple TT(M.getTargetTriple());
+  if (TT.supportsCOMDAT()) {
+    IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
+    IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
+  }
+}
+
+// Create the variable for the profile file name.
+void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput) {
+  if (InstrProfileOutput.empty())
+    return;
+  Constant *ProfileNameConst =
+      ConstantDataArray::getString(M.getContext(), InstrProfileOutput, true);
+  GlobalVariable *ProfileNameVar = new GlobalVariable(
+      M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage,
+      ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR));
+  Triple TT(M.getTargetTriple());
+  if (TT.supportsCOMDAT()) {
+    ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
+    ProfileNameVar->setComdat(M.getOrInsertComdat(
+        StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR))));
+  }
+}
+
+Error OverlapStats::accumuateCounts(const std::string &BaseFilename,
+                                    const std::string &TestFilename,
+                                    bool IsCS) {
+  auto getProfileSum = [IsCS](const std::string &Filename,
+                              CountSumOrPercent &Sum) -> Error {
+    auto ReaderOrErr = InstrProfReader::create(Filename);
+    if (Error E = ReaderOrErr.takeError()) {
+      return E;
+    }
+    auto Reader = std::move(ReaderOrErr.get());
+    Reader->accumuateCounts(Sum, IsCS);
+    return Error::success();
+  };
+  auto Ret = getProfileSum(BaseFilename, Base);
+  if (Ret)
+    return Ret;
+  Ret = getProfileSum(TestFilename, Test);
+  if (Ret)
+    return Ret;
+  this->BaseFilename = &BaseFilename;
+  this->TestFilename = &TestFilename;
+  Valid = true;
+  return Error::success();
+}
+
+void OverlapStats::addOneMismatch(const CountSumOrPercent &MismatchFunc) {
+  Mismatch.NumEntries += 1;
+  Mismatch.CountSum += MismatchFunc.CountSum / Test.CountSum;
+  for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) {
+    if (Test.ValueCounts[I] >= 1.0f)
+      Mismatch.ValueCounts[I] +=
+          MismatchFunc.ValueCounts[I] / Test.ValueCounts[I];
+  }
+}
+
+void OverlapStats::addOneUnique(const CountSumOrPercent &UniqueFunc) {
+  Unique.NumEntries += 1;
+  Unique.CountSum += UniqueFunc.CountSum / Test.CountSum;
+  for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) {
+    if (Test.ValueCounts[I] >= 1.0f)
+      Unique.ValueCounts[I] += UniqueFunc.ValueCounts[I] / Test.ValueCounts[I];
+  }
+}
+
+void OverlapStats::dump(raw_fd_ostream &OS) const {
+  if (!Valid)
+    return;
+
+  const char *EntryName =
+      (Level == ProgramLevel ? "functions" : "edge counters");
+  if (Level == ProgramLevel) {
+    OS << "Profile overlap infomation for base_profile: " << *BaseFilename
+       << " and test_profile: " << *TestFilename << "\nProgram level:\n";
+  } else {
+    OS << "Function level:\n"
+       << "  Function: " << FuncName << " (Hash=" << FuncHash << ")\n";
+  }
+
+  OS << "  # of " << EntryName << " overlap: " << Overlap.NumEntries << "\n";
+  if (Mismatch.NumEntries)
+    OS << "  # of " << EntryName << " mismatch: " << Mismatch.NumEntries
+       << "\n";
+  if (Unique.NumEntries)
+    OS << "  # of " << EntryName
+       << " only in test_profile: " << Unique.NumEntries << "\n";
+
+  OS << "  Edge profile overlap: " << format("%.3f%%", Overlap.CountSum * 100)
+     << "\n";
+  if (Mismatch.NumEntries)
+    OS << "  Mismatched count percentage (Edge): "
+       << format("%.3f%%", Mismatch.CountSum * 100) << "\n";
+  if (Unique.NumEntries)
+    OS << "  Percentage of Edge profile only in test_profile: "
+       << format("%.3f%%", Unique.CountSum * 100) << "\n";
+  OS << "  Edge profile base count sum: " << format("%.0f", Base.CountSum)
+     << "\n"
+     << "  Edge profile test count sum: " << format("%.0f", Test.CountSum)
+     << "\n";
+
+  for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) {
+    if (Base.ValueCounts[I] < 1.0f && Test.ValueCounts[I] < 1.0f)
+      continue;
+    char ProfileKindName[20];
+    switch (I) {
+    case IPVK_IndirectCallTarget:
+      strncpy(ProfileKindName, "IndirectCall", 19);
+      break;
+    case IPVK_MemOPSize:
+      strncpy(ProfileKindName, "MemOP", 19);
+      break;
+    default:
+      snprintf(ProfileKindName, 19, "VP[%d]", I);
+      break;
+    }
+    OS << "  " << ProfileKindName
+       << " profile overlap: " << format("%.3f%%", Overlap.ValueCounts[I] * 100)
+       << "\n";
+    if (Mismatch.NumEntries)
+      OS << "  Mismatched count percentage (" << ProfileKindName
+         << "): " << format("%.3f%%", Mismatch.ValueCounts[I] * 100) << "\n";
+    if (Unique.NumEntries)
+      OS << "  Percentage of " << ProfileKindName
+         << " profile only in test_profile: "
+         << format("%.3f%%", Unique.ValueCounts[I] * 100) << "\n";
+    OS << "  " << ProfileKindName
+       << " profile base count sum: " << format("%.0f", Base.ValueCounts[I])
+       << "\n"
+       << "  " << ProfileKindName
+       << " profile test count sum: " << format("%.0f", Test.ValueCounts[I])
+       << "\n";
+  }
+}
+
 } // end namespace llvm
diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp
index eaf0eb04bfbf..fec1c152991c 100644
--- a/lib/ProfileData/InstrProfReader.cpp
+++ b/lib/ProfileData/InstrProfReader.cpp
@@ -1,9 +1,8 @@
 //===- InstrProfReader.cpp - Instrumented profiling reader ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -63,7 +62,7 @@ InstrProfReader::create(const Twine &Path) {
 Expected<std::unique_ptr<InstrProfReader>>
 InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
   // Sanity check the buffer.
-  if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<unsigned>::max())
+  if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max())
     return make_error<InstrProfError>(instrprof_error::too_large);
 
   if (Buffer->getBufferSize() == 0)
@@ -114,7 +113,7 @@ Expected<std::unique_ptr<IndexedInstrProfReader>>
 IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
                                std::unique_ptr<MemoryBuffer> RemappingBuffer) {
   // Sanity check the buffer.
-  if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<unsigned>::max())
+  if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max())
     return make_error<InstrProfError>(instrprof_error::too_large);
 
   // Create the reader.
@@ -163,7 +162,10 @@ Error TextInstrProfReader::readHeader() {
     IsIRInstr = true;
   else if (Str.equals_lower("fe"))
     IsIRInstr = false;
-  else
+  else if (Str.equals_lower("csir")) {
+    IsIRInstr = true;
+    HasCSIRLevelProfile = true;
+  } else
     return error(instrprof_error::bad_header);
 
   ++Line;
@@ -734,7 +736,7 @@ bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
 
 const unsigned char *
 IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
-                                    const unsigned char *Cur) {
+                                    const unsigned char *Cur, bool UseCS) {
   using namespace IndexedInstrProf;
   using namespace support;
 
@@ -761,10 +763,13 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
       DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount,
                                    Ent.NumBlocks);
     }
+    std::unique_ptr<llvm::ProfileSummary> &Summary =
+        UseCS ? this->CS_Summary : this->Summary;
+
     // initialize InstrProfSummary using the SummaryData from disk.
-    this->Summary = llvm::make_unique<ProfileSummary>(
-        ProfileSummary::PSK_Instr, DetailedSummary,
-        SummaryData->get(Summary::TotalBlockCount),
+    Summary = llvm::make_unique<ProfileSummary>(
+        UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr,
+        DetailedSummary, SummaryData->get(Summary::TotalBlockCount),
         SummaryData->get(Summary::MaxBlockCount),
         SummaryData->get(Summary::MaxInternalBlockCount),
         SummaryData->get(Summary::MaxFunctionCount),
@@ -806,7 +811,11 @@ Error IndexedInstrProfReader::readHeader() {
       IndexedInstrProf::ProfVersion::CurrentVersion)
     return error(instrprof_error::unsupported_version);
 
-  Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur);
+  Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur,
+                    /* UseCS */ false);
+  if (FormatVersion & VARIANT_MASK_CSIR_PROF)
+    Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur,
+                      /* UseCS */ true);
 
   // Read the hash type and start offset.
   IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
@@ -891,3 +900,17 @@ Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
   }
   return success();
 }
+
+void InstrProfReader::accumuateCounts(CountSumOrPercent &Sum, bool IsCS) {
+  uint64_t NumFuncs = 0;
+  for (const auto &Func : *this) {
+    if (isIRLevelProfile()) {
+      bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash);
+      if (FuncIsCS != IsCS)
+        continue;
+    }
+    Func.accumuateCounts(Sum);
+    ++NumFuncs;
+  }
+  Sum.NumEntries = NumFuncs;
+}
diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp
index 18b9deec158f..4ca2defd26da 100644
--- a/lib/ProfileData/InstrProfWriter.cpp
+++ b/lib/ProfileData/InstrProfWriter.cpp
@@ -1,9 +1,8 @@
 //===- InstrProfWriter.cpp - Instrumented profiling writer ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -102,6 +101,7 @@ public:
 
   support::endianness ValueProfDataEndianness = support::little;
   InstrProfSummaryBuilder *SummaryBuilder;
+  InstrProfSummaryBuilder *CSSummaryBuilder;
 
   InstrProfRecordWriterTrait() = default;
 
@@ -143,7 +143,10 @@ public:
     endian::Writer LE(Out, little);
     for (const auto &ProfileData : *V) {
       const InstrProfRecord &ProfRecord = ProfileData.second;
-      SummaryBuilder->addRecord(ProfRecord);
+      if (NamedInstrProfRecord::hasCSFlagInHash(ProfileData.first))
+        CSSummaryBuilder->addRecord(ProfRecord);
+      else
+        SummaryBuilder->addRecord(ProfRecord);
 
       LE.write<uint64_t>(ProfileData.first); // Function hash
       LE.write<uint64_t>(ProfRecord.Counts.size());
@@ -184,6 +187,40 @@ void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight,
   addRecord(Name, Hash, std::move(I), Weight, Warn);
 }
 
+void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other,
+                                    OverlapStats &Overlap,
+                                    OverlapStats &FuncLevelOverlap,
+                                    const OverlapFuncFilters &FuncFilter) {
+  auto Name = Other.Name;
+  auto Hash = Other.Hash;
+  Other.accumuateCounts(FuncLevelOverlap.Test);
+  if (FunctionData.find(Name) == FunctionData.end()) {
+    Overlap.addOneUnique(FuncLevelOverlap.Test);
+    return;
+  }
+  if (FuncLevelOverlap.Test.CountSum < 1.0f) {
+    Overlap.Overlap.NumEntries += 1;
+    return;
+  }
+  auto &ProfileDataMap = FunctionData[Name];
+  bool NewFunc;
+  ProfilingData::iterator Where;
+  std::tie(Where, NewFunc) =
+      ProfileDataMap.insert(std::make_pair(Hash, InstrProfRecord()));
+  if (NewFunc) {
+    Overlap.addOneMismatch(FuncLevelOverlap.Test);
+    return;
+  }
+  InstrProfRecord &Dest = Where->second;
+
+  uint64_t ValueCutoff = FuncFilter.ValueCutoff;
+  if (!FuncFilter.NameFilter.empty() &&
+      Name.find(FuncFilter.NameFilter) != Name.npos)
+    ValueCutoff = 0;
+
+  Dest.overlap(Other, Overlap, FuncLevelOverlap, ValueCutoff);
+}
+
 void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash,
                                 InstrProfRecord &&I, uint64_t Weight,
                                 function_ref<void(Error)> Warn) {
@@ -254,6 +291,8 @@ void InstrProfWriter::writeImpl(ProfOStream &OS) {
 
   InstrProfSummaryBuilder ISB(ProfileSummaryBuilder::DefaultCutoffs);
   InfoObj->SummaryBuilder = &ISB;
+  InstrProfSummaryBuilder CSISB(ProfileSummaryBuilder::DefaultCutoffs);
+  InfoObj->CSSummaryBuilder = &CSISB;
 
   // Populate the hash table generator.
   for (const auto &I : FunctionData)
@@ -265,6 +304,10 @@ void InstrProfWriter::writeImpl(ProfOStream &OS) {
   Header.Version = IndexedInstrProf::ProfVersion::CurrentVersion;
   if (ProfileKind == PF_IRLevel)
     Header.Version |= VARIANT_MASK_IR_PROF;
+  if (ProfileKind == PF_IRLevelWithCS) {
+    Header.Version |= VARIANT_MASK_IR_PROF;
+    Header.Version |= VARIANT_MASK_CSIR_PROF;
+  }
   Header.Unused = 0;
   Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType);
   Header.HashOffset = 0;
@@ -288,6 +331,14 @@ void InstrProfWriter::writeImpl(ProfOStream &OS) {
   uint64_t SummaryOffset = OS.tell();
   for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
     OS.write(0);
+  uint64_t CSSummaryOffset = 0;
+  uint64_t CSSummarySize = 0;
+  if (ProfileKind == PF_IRLevelWithCS) {
+    CSSummaryOffset = OS.tell();
+    CSSummarySize = SummarySize / sizeof(uint64_t);
+    for (unsigned I = 0; I < CSSummarySize; I++)
+      OS.write(0);
+  }
 
   // Write the hash table.
   uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj);
@@ -301,13 +352,25 @@ void InstrProfWriter::writeImpl(ProfOStream &OS) {
   setSummary(TheSummary.get(), *PS);
   InfoObj->SummaryBuilder = nullptr;
 
+  // For Context Sensitive summary.
+  std::unique_ptr<IndexedInstrProf::Summary> TheCSSummary = nullptr;
+  if (ProfileKind == PF_IRLevelWithCS) {
+    TheCSSummary = IndexedInstrProf::allocSummary(SummarySize);
+    std::unique_ptr<ProfileSummary> CSPS = CSISB.getSummary();
+    setSummary(TheCSSummary.get(), *CSPS);
+  }
+  InfoObj->CSSummaryBuilder = nullptr;
+
   // Now do the final patch:
   PatchItem PatchItems[] = {
       // Patch the Header.HashOffset field.
       {HashTableStartFieldOffset, &HashTableStart, 1},
       // Patch the summary data.
       {SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
-       (int)(SummarySize / sizeof(uint64_t))}};
+       (int)(SummarySize / sizeof(uint64_t))},
+      {CSSummaryOffset, reinterpret_cast<uint64_t *>(TheCSSummary.get()),
+       (int)CSSummarySize}};
+
   OS.patch(PatchItems, sizeof(PatchItems) / sizeof(*PatchItems));
 }
 
@@ -328,7 +391,7 @@ std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() {
 }
 
 static const char *ValueProfKindStr[] = {
-#define VALUE_PROF_KIND(Enumerator, Value) #Enumerator,
+#define VALUE_PROF_KIND(Enumerator, Value, Descr) #Enumerator,
 #include "llvm/ProfileData/InstrProfData.inc"
 };
 
@@ -376,15 +439,33 @@ void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash,
 Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
   if (ProfileKind == PF_IRLevel)
     OS << "# IR level Instrumentation Flag\n:ir\n";
+  else if (ProfileKind == PF_IRLevelWithCS)
+    OS << "# CSIR level Instrumentation Flag\n:csir\n";
   InstrProfSymtab Symtab;
-  for (const auto &I : FunctionData)
-    if (shouldEncodeData(I.getValue()))
+
+  using FuncPair = detail::DenseMapPair<uint64_t, InstrProfRecord>;
+  using RecordType = std::pair<StringRef, FuncPair>;
+  SmallVector<RecordType, 4> OrderedFuncData;
+
+  for (const auto &I : FunctionData) {
+    if (shouldEncodeData(I.getValue())) {
       if (Error E = Symtab.addFuncName(I.getKey()))
         return E;
-
-  for (const auto &I : FunctionData)
-    if (shouldEncodeData(I.getValue()))
       for (const auto &Func : I.getValue())
-        writeRecordInText(I.getKey(), Func.first, Func.second, Symtab, OS);
+        OrderedFuncData.push_back(std::make_pair(I.getKey(), Func));
+    }
+  }
+
+  llvm::sort(OrderedFuncData, [](const RecordType &A, const RecordType &B) {
+    return std::tie(A.first, A.second.first) <
+           std::tie(B.first, B.second.first);
+  });
+
+  for (const auto &record : OrderedFuncData) {
+    const StringRef &Name = record.first;
+    const FuncPair &Func = record.second;
+    writeRecordInText(Name, Func.first, Func.second, Symtab, OS);
+  }
+
   return Error::success();
 }
diff --git a/lib/ProfileData/ProfileSummaryBuilder.cpp b/lib/ProfileData/ProfileSummaryBuilder.cpp
index 3a8462fd9b0d..4d5b00935742 100644
--- a/lib/ProfileData/ProfileSummaryBuilder.cpp
+++ b/lib/ProfileData/ProfileSummaryBuilder.cpp
@@ -1,9 +1,8 @@
 //=-- ProfilesummaryBuilder.cpp - Profile summary computation ---------------=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -45,12 +44,17 @@ void InstrProfSummaryBuilder::addRecord(const InstrProfRecord &R) {
 // To compute the detailed summary, we consider each line containing samples as
 // equivalent to a block with a count in the instrumented profile.
 void SampleProfileSummaryBuilder::addRecord(
-    const sampleprof::FunctionSamples &FS) {
-  NumFunctions++;
-  if (FS.getHeadSamples() > MaxFunctionCount)
-    MaxFunctionCount = FS.getHeadSamples();
+    const sampleprof::FunctionSamples &FS, bool isCallsiteSample) {
+  if (!isCallsiteSample) {
+    NumFunctions++;
+    if (FS.getHeadSamples() > MaxFunctionCount)
+      MaxFunctionCount = FS.getHeadSamples();
+  }
   for (const auto &I : FS.getBodySamples())
     addCount(I.second.getSamples());
+  for (const auto &I : FS.getCallsiteSamples())
+    for (const auto &CS : I.second)
+      addRecord(CS.second, true);
 }
 
 // The argument to this method is a vector of cutoff percentages and the return
diff --git a/lib/ProfileData/SampleProf.cpp b/lib/ProfileData/SampleProf.cpp
index 1a124415f179..e17865cd15a4 100644
--- a/lib/ProfileData/SampleProf.cpp
+++ b/lib/ProfileData/SampleProf.cpp
@@ -1,9 +1,8 @@
 //=-- SampleProf.cpp - Sample profiling format support --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/ProfileData/SampleProfReader.cpp b/lib/ProfileData/SampleProfReader.cpp
index a68d1e9d3ab0..192b6c711562 100644
--- a/lib/ProfileData/SampleProfReader.cpp
+++ b/lib/ProfileData/SampleProfReader.cpp
@@ -1,9 +1,8 @@
 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -594,8 +593,8 @@ std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
 void SampleProfileReaderCompactBinary::collectFuncsToUse(const Module &M) {
   FuncsToUse.clear();
   for (auto &F : M) {
-    StringRef Fname = F.getName().split('.').first;
-    FuncsToUse.insert(Fname);
+    StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+    FuncsToUse.insert(CanonName);
   }
 }
 
diff --git a/lib/ProfileData/SampleProfWriter.cpp b/lib/ProfileData/SampleProfWriter.cpp
index b1c669ec31c4..8b876e0aa5d9 100644
--- a/lib/ProfileData/SampleProfWriter.cpp
+++ b/lib/ProfileData/SampleProfWriter.cpp
@@ -1,9 +1,8 @@
 //===- SampleProfWriter.cpp - Write LLVM sample profile data --------------===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -51,9 +50,8 @@ SampleProfileWriter::write(const StringMap<FunctionSamples> &ProfileMap) {
   for (const auto &I : ProfileMap)
     V.push_back(std::make_pair(I.getKey(), &I.second));
 
-  std::stable_sort(
-      V.begin(), V.end(),
-      [](const NameFunctionSamples &A, const NameFunctionSamples &B) {
+  llvm::stable_sort(
+      V, [](const NameFunctionSamples &A, const NameFunctionSamples &B) {
         if (A.second->getTotalSamples() == B.second->getTotalSamples())
           return A.first > B.first;
         return A.second->getTotalSamples() > B.second->getTotalSamples();
diff --git a/lib/Remarks/Remark.cpp b/lib/Remarks/Remark.cpp
new file mode 100644
index 000000000000..401ac514b011
--- /dev/null
+++ b/lib/Remarks/Remark.cpp
@@ -0,0 +1,132 @@
+//===- Remark.cpp ---------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the Remark type and the C API.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/Remark.h"
+#include "llvm-c/Remarks.h"
+#include "llvm/Support/CBindingWrapping.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+std::string Remark::getArgsAsMsg() const {
+  std::string Str;
+  raw_string_ostream OS(Str);
+  for (const Argument &Arg : Args)
+    OS << Arg.Val;
+  return OS.str();
+}
+
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(StringRef, LLVMRemarkStringRef)
+
+extern "C" const char *LLVMRemarkStringGetData(LLVMRemarkStringRef String) {
+  return unwrap(String)->data();
+}
+
+extern "C" uint32_t LLVMRemarkStringGetLen(LLVMRemarkStringRef String) {
+  return unwrap(String)->size();
+}
+
+extern "C" LLVMRemarkStringRef
+LLVMRemarkDebugLocGetSourceFilePath(LLVMRemarkDebugLocRef DL) {
+  return wrap(&unwrap(DL)->SourceFilePath);
+}
+
+extern "C" uint32_t LLVMRemarkDebugLocGetSourceLine(LLVMRemarkDebugLocRef DL) {
+  return unwrap(DL)->SourceLine;
+}
+
+extern "C" uint32_t
+LLVMRemarkDebugLocGetSourceColumn(LLVMRemarkDebugLocRef DL) {
+  return unwrap(DL)->SourceColumn;
+}
+
+extern "C" LLVMRemarkStringRef LLVMRemarkArgGetKey(LLVMRemarkArgRef Arg) {
+  return wrap(&unwrap(Arg)->Key);
+}
+
+extern "C" LLVMRemarkStringRef LLVMRemarkArgGetValue(LLVMRemarkArgRef Arg) {
+  return wrap(&unwrap(Arg)->Val);
+}
+
+extern "C" LLVMRemarkDebugLocRef
+LLVMRemarkArgGetDebugLoc(LLVMRemarkArgRef Arg) {
+  if (const Optional<RemarkLocation> &Loc = unwrap(Arg)->Loc)
+    return wrap(&*Loc);
+  return nullptr;
+}
+
+extern "C" void LLVMRemarkEntryDispose(LLVMRemarkEntryRef Remark) {
+  delete unwrap(Remark);
+}
+
+extern "C" LLVMRemarkType LLVMRemarkEntryGetType(LLVMRemarkEntryRef Remark) {
+  // Assume here that the enums can be converted both ways.
+  return static_cast<LLVMRemarkType>(unwrap(Remark)->RemarkType);
+}
+
+extern "C" LLVMRemarkStringRef
+LLVMRemarkEntryGetPassName(LLVMRemarkEntryRef Remark) {
+  return wrap(&unwrap(Remark)->PassName);
+}
+
+extern "C" LLVMRemarkStringRef
+LLVMRemarkEntryGetRemarkName(LLVMRemarkEntryRef Remark) {
+  return wrap(&unwrap(Remark)->RemarkName);
+}
+
+extern "C" LLVMRemarkStringRef
+LLVMRemarkEntryGetFunctionName(LLVMRemarkEntryRef Remark) {
+  return wrap(&unwrap(Remark)->FunctionName);
+}
+
+extern "C" LLVMRemarkDebugLocRef
+LLVMRemarkEntryGetDebugLoc(LLVMRemarkEntryRef Remark) {
+  if (const Optional<RemarkLocation> &Loc = unwrap(Remark)->Loc)
+    return wrap(&*Loc);
+  return nullptr;
+}
+
+extern "C" uint64_t LLVMRemarkEntryGetHotness(LLVMRemarkEntryRef Remark) {
+  if (const Optional<uint64_t> &Hotness = unwrap(Remark)->Hotness)
+    return *Hotness;
+  return 0;
+}
+
+extern "C" uint32_t LLVMRemarkEntryGetNumArgs(LLVMRemarkEntryRef Remark) {
+  return unwrap(Remark)->Args.size();
+}
+
+extern "C" LLVMRemarkArgRef
+LLVMRemarkEntryGetFirstArg(LLVMRemarkEntryRef Remark) {
+  ArrayRef<Argument> Args = unwrap(Remark)->Args;
+  // No arguments to iterate on.
+  if (Args.empty())
+    return NULL;
+  return reinterpret_cast<LLVMRemarkArgRef>(
+      const_cast<Argument *>(Args.begin()));
+}
+
+extern "C" LLVMRemarkArgRef
+LLVMRemarkEntryGetNextArg(LLVMRemarkArgRef ArgIt, LLVMRemarkEntryRef Remark) {
+  // No more arguments to iterate on.
+  if (ArgIt == NULL)
+    return NULL;
+
+  auto It = (ArrayRef<Argument>::const_iterator)ArgIt;
+  auto Next = std::next(It);
+  if (Next == unwrap(Remark)->Args.end())
+    return NULL;
+
+  return reinterpret_cast<LLVMRemarkArgRef>(const_cast<Argument *>(Next));
+}
diff --git a/lib/Remarks/RemarkFormat.cpp b/lib/Remarks/RemarkFormat.cpp
new file mode 100644
index 000000000000..bcd0f753ff64
--- /dev/null
+++ b/lib/Remarks/RemarkFormat.cpp
@@ -0,0 +1,30 @@
+//===- RemarkFormat.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of utilities to handle the different remark formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/RemarkFormat.h"
+#include "llvm/ADT/StringSwitch.h"
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+Expected<Format> llvm::remarks::parseFormat(StringRef FormatStr) {
+  auto Result = StringSwitch<Format>(FormatStr)
+                    .Cases("", "yaml", Format::YAML)
+                    .Default(Format::Unknown);
+
+  if (Result == Format::Unknown)
+    return createStringError(std::make_error_code(std::errc::invalid_argument),
+                             "Unknown remark serializer format: '%s'",
+                             FormatStr.data());
+
+  return Result;
+}
diff --git a/lib/Remarks/RemarkParser.cpp b/lib/Remarks/RemarkParser.cpp
new file mode 100644
index 000000000000..f67464073bd1
--- /dev/null
+++ b/lib/Remarks/RemarkParser.cpp
@@ -0,0 +1,119 @@
+//===- RemarkParser.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides utility methods used by clients that want to use the
+// parser for remark diagnostics in LLVM.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/RemarkParser.h"
+#include "YAMLRemarkParser.h"
+#include "llvm-c/Remarks.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CBindingWrapping.h"
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+char EndOfFileError::ID = 0;
+
+ParsedStringTable::ParsedStringTable(StringRef InBuffer) : Buffer(InBuffer) {
+  while (!InBuffer.empty()) {
+    // Strings are separated by '\0' bytes.
+    std::pair<StringRef, StringRef> Split = InBuffer.split('\0');
+    // We only store the offset from the beginning of the buffer.
+    Offsets.push_back(Split.first.data() - Buffer.data());
+    InBuffer = Split.second;
+  }
+}
+
+Expected<StringRef> ParsedStringTable::operator[](size_t Index) const {
+  if (Index >= Offsets.size())
+    return createStringError(
+        std::make_error_code(std::errc::invalid_argument),
+        "String with index %u is out of bounds (size = %u).", Index,
+        Offsets.size());
+
+  size_t Offset = Offsets[Index];
+  // If it's the last offset, we can't use the next offset to know the size of
+  // the string.
+  size_t NextOffset =
+      (Index == Offsets.size() - 1) ? Buffer.size() : Offsets[Index + 1];
+  return StringRef(Buffer.data() + Offset, NextOffset - Offset - 1);
+}
+
+Expected<std::unique_ptr<Parser>>
+llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf,
+                                  Optional<const ParsedStringTable *> StrTab) {
+  switch (ParserFormat) {
+  case Format::YAML:
+    return llvm::make_unique<YAMLRemarkParser>(Buf, StrTab);
+  case Format::Unknown:
+    return createStringError(std::make_error_code(std::errc::invalid_argument),
+                             "Unknown remark parser format.");
+  }
+  llvm_unreachable("unknown format");
+}
+
+// Wrapper that holds the state needed to interact with the C API.
+struct CParser {
+  std::unique_ptr<Parser> TheParser;
+  Optional<std::string> Err;
+
+  CParser(Format ParserFormat, StringRef Buf,
+          Optional<const ParsedStringTable *> StrTab = None)
+      : TheParser(cantFail(createRemarkParser(ParserFormat, Buf, StrTab))) {}
+
+  void handleError(Error E) { Err.emplace(toString(std::move(E))); }
+  bool hasError() const { return Err.hasValue(); }
+  const char *getMessage() const { return Err ? Err->c_str() : nullptr; };
+};
+
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(CParser, LLVMRemarkParserRef)
+
+extern "C" LLVMRemarkParserRef LLVMRemarkParserCreateYAML(const void *Buf,
+                                                          uint64_t Size) {
+  return wrap(new CParser(Format::YAML,
+                          StringRef(static_cast<const char *>(Buf), Size)));
+}
+
+extern "C" LLVMRemarkEntryRef
+LLVMRemarkParserGetNext(LLVMRemarkParserRef Parser) {
+  CParser &TheCParser = *unwrap(Parser);
+  remarks::Parser &TheParser = *TheCParser.TheParser;
+
+  Expected<std::unique_ptr<Remark>> MaybeRemark = TheParser.next();
+  if (Error E = MaybeRemark.takeError()) {
+    if (E.isA<EndOfFileError>()) {
+      consumeError(std::move(E));
+      return nullptr;
+    }
+
+    // Handle the error. Allow it to be checked through HasError and
+    // GetErrorMessage.
+    TheCParser.handleError(std::move(E));
+    return nullptr;
+  }
+
+  // Valid remark.
+  return wrap(MaybeRemark->release());
+}
+
+extern "C" LLVMBool LLVMRemarkParserHasError(LLVMRemarkParserRef Parser) {
+  return unwrap(Parser)->hasError();
+}
+
+extern "C" const char *
+LLVMRemarkParserGetErrorMessage(LLVMRemarkParserRef Parser) {
+  return unwrap(Parser)->getMessage();
+}
+
+extern "C" void LLVMRemarkParserDispose(LLVMRemarkParserRef Parser) {
+  delete unwrap(Parser);
+}
diff --git a/lib/Remarks/RemarkStringTable.cpp b/lib/Remarks/RemarkStringTable.cpp
new file mode 100644
index 000000000000..984aa5b33b48
--- /dev/null
+++ b/lib/Remarks/RemarkStringTable.cpp
@@ -0,0 +1,48 @@
+//===- RemarkStringTable.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the Remark string table used at remark generation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/RemarkStringTable.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/Error.h"
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+std::pair<unsigned, StringRef> StringTable::add(StringRef Str) {
+  size_t NextID = StrTab.size();
+  auto KV = StrTab.insert({Str, NextID});
+  // If it's a new string, add it to the final size.
+  if (KV.second)
+    SerializedSize += KV.first->first().size() + 1; // +1 for the '\0'
+  // Can be either NextID or the previous ID if the string is already there.
+  return {KV.first->second, KV.first->first()};
+}
+
+void StringTable::serialize(raw_ostream &OS) const {
+  // Emit the number of strings.
+  uint64_t StrTabSize = SerializedSize;
+  support::endian::write(OS, StrTabSize, support::little);
+  // Emit the sequence of strings.
+  for (StringRef Str : serialize()) {
+    OS << Str;
+    // Explicitly emit a '\0'.
+    OS.write('\0');
+  }
+}
+
+std::vector<StringRef> StringTable::serialize() const {
+  std::vector<StringRef> Strings{StrTab.size()};
+  for (const auto &KV : StrTab)
+    Strings[KV.second] = KV.first();
+  return Strings;
+}
diff --git a/lib/Remarks/YAMLRemarkParser.cpp b/lib/Remarks/YAMLRemarkParser.cpp
new file mode 100644
index 000000000000..ed78b7ba5d95
--- /dev/null
+++ b/lib/Remarks/YAMLRemarkParser.cpp
@@ -0,0 +1,327 @@
+//===- YAMLRemarkParser.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides utility methods used by clients that want to use the
+// parser for remark diagnostics in LLVM.
+//
+//===----------------------------------------------------------------------===//
+
+#include "YAMLRemarkParser.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Remarks/RemarkParser.h"
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+char YAMLParseError::ID = 0;
+
+static void handleDiagnostic(const SMDiagnostic &Diag, void *Ctx) {
+  assert(Ctx && "Expected non-null Ctx in diagnostic handler.");
+  std::string &Message = *static_cast<std::string *>(Ctx);
+  assert(Message.empty() && "Expected an empty string.");
+  raw_string_ostream OS(Message);
+  Diag.print(/*ProgName=*/nullptr, OS, /*ShowColors*/ false,
+             /*ShowKindLabels*/ true);
+  OS << '\n';
+  OS.flush();
+}
+
+YAMLParseError::YAMLParseError(StringRef Msg, SourceMgr &SM,
+                               yaml::Stream &Stream, yaml::Node &Node) {
+  // 1) Set up a diagnostic handler to avoid errors being printed out to
+  // stderr.
+  // 2) Use the stream to print the error with the associated node.
+  // 3) The stream will use the source manager to print the error, which will
+  // call the diagnostic handler.
+  // 4) The diagnostic handler will stream the error directly into this object's
+  // Message member, which is used when logging is asked for.
+  auto OldDiagHandler = SM.getDiagHandler();
+  auto OldDiagCtx = SM.getDiagContext();
+  SM.setDiagHandler(handleDiagnostic, &Message);
+  Stream.printError(&Node, Twine(Msg) + Twine('\n'));
+  // Restore the old handlers.
+  SM.setDiagHandler(OldDiagHandler, OldDiagCtx);
+}
+
+static SourceMgr setupSM(std::string &LastErrorMessage) {
+  SourceMgr SM;
+  SM.setDiagHandler(handleDiagnostic, &LastErrorMessage);
+  return SM;
+}
+
+YAMLRemarkParser::YAMLRemarkParser(StringRef Buf,
+                                   Optional<const ParsedStringTable *> StrTab)
+    : Parser{Format::YAML}, StrTab(StrTab), LastErrorMessage(),
+      SM(setupSM(LastErrorMessage)), Stream(Buf, SM), YAMLIt(Stream.begin()) {}
+
+Error YAMLRemarkParser::error(StringRef Message, yaml::Node &Node) {
+  return make_error<YAMLParseError>(Message, SM, Stream, Node);
+}
+
+Error YAMLRemarkParser::error() {
+  if (LastErrorMessage.empty())
+    return Error::success();
+  Error E = make_error<YAMLParseError>(LastErrorMessage);
+  LastErrorMessage.clear();
+  return E;
+}
+
+Expected<std::unique_ptr<Remark>>
+YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) {
+  if (Error E = error())
+    return std::move(E);
+
+  yaml::Node *YAMLRoot = RemarkEntry.getRoot();
+  if (!YAMLRoot) {
+    return createStringError(std::make_error_code(std::errc::invalid_argument),
+                             "not a valid YAML file.");
+  }
+
+  auto *Root = dyn_cast<yaml::MappingNode>(YAMLRoot);
+  if (!Root)
+    return error("document root is not of mapping type.", *YAMLRoot);
+
+  std::unique_ptr<Remark> Result = llvm::make_unique<Remark>();
+  Remark &TheRemark = *Result;
+
+  // First, the type. It needs special handling since is not part of the
+  // key-value stream.
+  Expected<Type> T = parseType(*Root);
+  if (!T)
+    return T.takeError();
+  else
+    TheRemark.RemarkType = *T;
+
+  // Then, parse the fields, one by one.
+  for (yaml::KeyValueNode &RemarkField : *Root) {
+    Expected<StringRef> MaybeKey = parseKey(RemarkField);
+    if (!MaybeKey)
+      return MaybeKey.takeError();
+    StringRef KeyName = *MaybeKey;
+
+    if (KeyName == "Pass") {
+      if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
+        TheRemark.PassName = *MaybeStr;
+      else
+        return MaybeStr.takeError();
+    } else if (KeyName == "Name") {
+      if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
+        TheRemark.RemarkName = *MaybeStr;
+      else
+        return MaybeStr.takeError();
+    } else if (KeyName == "Function") {
+      if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
+        TheRemark.FunctionName = *MaybeStr;
+      else
+        return MaybeStr.takeError();
+    } else if (KeyName == "Hotness") {
+      if (Expected<unsigned> MaybeU = parseUnsigned(RemarkField))
+        TheRemark.Hotness = *MaybeU;
+      else
+        return MaybeU.takeError();
+    } else if (KeyName == "DebugLoc") {
+      if (Expected<RemarkLocation> MaybeLoc = parseDebugLoc(RemarkField))
+        TheRemark.Loc = *MaybeLoc;
+      else
+        return MaybeLoc.takeError();
+    } else if (KeyName == "Args") {
+      auto *Args = dyn_cast<yaml::SequenceNode>(RemarkField.getValue());
+      if (!Args)
+        return error("wrong value type for key.", RemarkField);
+
+      for (yaml::Node &Arg : *Args) {
+        if (Expected<Argument> MaybeArg = parseArg(Arg))
+          TheRemark.Args.push_back(*MaybeArg);
+        else
+          return MaybeArg.takeError();
+      }
+    } else {
+      return error("unknown key.", RemarkField);
+    }
+  }
+
+  // Check if any of the mandatory fields are missing.
+  if (TheRemark.RemarkType == Type::Unknown || TheRemark.PassName.empty() ||
+      TheRemark.RemarkName.empty() || TheRemark.FunctionName.empty())
+    return error("Type, Pass, Name or Function missing.",
+                 *RemarkEntry.getRoot());
+
+  return std::move(Result);
+}
+
+Expected<Type> YAMLRemarkParser::parseType(yaml::MappingNode &Node) {
+  auto Type = StringSwitch<remarks::Type>(Node.getRawTag())
+                  .Case("!Passed", remarks::Type::Passed)
+                  .Case("!Missed", remarks::Type::Missed)
+                  .Case("!Analysis", remarks::Type::Analysis)
+                  .Case("!AnalysisFPCommute", remarks::Type::AnalysisFPCommute)
+                  .Case("!AnalysisAliasing", remarks::Type::AnalysisAliasing)
+                  .Case("!Failure", remarks::Type::Failure)
+                  .Default(remarks::Type::Unknown);
+  if (Type == remarks::Type::Unknown)
+    return error("expected a remark tag.", Node);
+  return Type;
+}
+
+Expected<StringRef> YAMLRemarkParser::parseKey(yaml::KeyValueNode &Node) {
+  if (auto *Key = dyn_cast<yaml::ScalarNode>(Node.getKey()))
+    return Key->getRawValue();
+
+  return error("key is not a string.", Node);
+}
+
+Expected<StringRef> YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) {
+  auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
+  if (!Value)
+    return error("expected a value of scalar type.", Node);
+  StringRef Result;
+  if (!StrTab) {
+    Result = Value->getRawValue();
+  } else {
+    // If we have a string table, parse it as an unsigned.
+    unsigned StrID = 0;
+    if (Expected<unsigned> MaybeStrID = parseUnsigned(Node))
+      StrID = *MaybeStrID;
+    else
+      return MaybeStrID.takeError();
+
+    if (Expected<StringRef> Str = (**StrTab)[StrID])
+      Result = *Str;
+    else
+      return Str.takeError();
+  }
+
+  if (Result.front() == '\'')
+    Result = Result.drop_front();
+
+  if (Result.back() == '\'')
+    Result = Result.drop_back();
+
+  return Result;
+}
+
+Expected<unsigned> YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) {
+  SmallVector<char, 4> Tmp;
+  auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
+  if (!Value)
+    return error("expected a value of scalar type.", Node);
+  unsigned UnsignedValue = 0;
+  if (Value->getValue(Tmp).getAsInteger(10, UnsignedValue))
+    return error("expected a value of integer type.", *Value);
+  return UnsignedValue;
+}
+
+Expected<RemarkLocation>
+YAMLRemarkParser::parseDebugLoc(yaml::KeyValueNode &Node) {
+  auto *DebugLoc = dyn_cast<yaml::MappingNode>(Node.getValue());
+  if (!DebugLoc)
+    return error("expected a value of mapping type.", Node);
+
+  Optional<StringRef> File;
+  Optional<unsigned> Line;
+  Optional<unsigned> Column;
+
+  for (yaml::KeyValueNode &DLNode : *DebugLoc) {
+    Expected<StringRef> MaybeKey = parseKey(DLNode);
+    if (!MaybeKey)
+      return MaybeKey.takeError();
+    StringRef KeyName = *MaybeKey;
+
+    if (KeyName == "File") {
+      if (Expected<StringRef> MaybeStr = parseStr(DLNode))
+        File = *MaybeStr;
+      else
+        return MaybeStr.takeError();
+    } else if (KeyName == "Column") {
+      if (Expected<unsigned> MaybeU = parseUnsigned(DLNode))
+        Column = *MaybeU;
+      else
+        return MaybeU.takeError();
+    } else if (KeyName == "Line") {
+      if (Expected<unsigned> MaybeU = parseUnsigned(DLNode))
+        Line = *MaybeU;
+      else
+        return MaybeU.takeError();
+    } else {
+      return error("unknown entry in DebugLoc map.", DLNode);
+    }
+  }
+
+  // If any of the debug loc fields is missing, return an error.
+  if (!File || !Line || !Column)
+    return error("DebugLoc node incomplete.", Node);
+
+  return RemarkLocation{*File, *Line, *Column};
+}
+
+Expected<Argument> YAMLRemarkParser::parseArg(yaml::Node &Node) {
+  auto *ArgMap = dyn_cast<yaml::MappingNode>(&Node);
+  if (!ArgMap)
+    return error("expected a value of mapping type.", Node);
+
+  Optional<StringRef> KeyStr;
+  Optional<StringRef> ValueStr;
+  Optional<RemarkLocation> Loc;
+
+  for (yaml::KeyValueNode &ArgEntry : *ArgMap) {
+    Expected<StringRef> MaybeKey = parseKey(ArgEntry);
+    if (!MaybeKey)
+      return MaybeKey.takeError();
+    StringRef KeyName = *MaybeKey;
+
+    // Try to parse debug locs.
+    if (KeyName == "DebugLoc") {
+      // Can't have multiple DebugLoc entries per argument.
+      if (Loc)
+        return error("only one DebugLoc entry is allowed per argument.",
+                     ArgEntry);
+
+      if (Expected<RemarkLocation> MaybeLoc = parseDebugLoc(ArgEntry)) {
+        Loc = *MaybeLoc;
+        continue;
+      } else
+        return MaybeLoc.takeError();
+    }
+
+    // If we already have a string, error out.
+    if (ValueStr)
+      return error("only one string entry is allowed per argument.", ArgEntry);
+
+    // Try to parse the value.
+    if (Expected<StringRef> MaybeStr = parseStr(ArgEntry))
+      ValueStr = *MaybeStr;
+    else
+      return MaybeStr.takeError();
+
+    // Keep the key from the string.
+    KeyStr = KeyName;
+  }
+
+  if (!KeyStr)
+    return error("argument key is missing.", *ArgMap);
+  if (!ValueStr)
+    return error("argument value is missing.", *ArgMap);
+
+  return Argument{*KeyStr, *ValueStr, Loc};
+}
+
+Expected<std::unique_ptr<Remark>> YAMLRemarkParser::next() {
+  if (YAMLIt == Stream.end())
+    return make_error<EndOfFileError>();
+
+  Expected<std::unique_ptr<Remark>> MaybeResult = parseRemark(*YAMLIt);
+  if (!MaybeResult) {
+    // Avoid garbage input, set the iterator to the end.
+    YAMLIt = Stream.end();
+    return MaybeResult.takeError();
+  }
+
+  ++YAMLIt;
+
+  return std::move(*MaybeResult);
+}
diff --git a/lib/Remarks/YAMLRemarkParser.h b/lib/Remarks/YAMLRemarkParser.h
new file mode 100644
index 000000000000..cea76e63e75c
--- /dev/null
+++ b/lib/Remarks/YAMLRemarkParser.h
@@ -0,0 +1,96 @@
+//===-- YAMLRemarkParser.h - Parser for YAML remarks ------------*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the impementation of the YAML remark parser.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_YAML_REMARK_PARSER_H
+#define LLVM_REMARKS_YAML_REMARK_PARSER_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkParser.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLParser.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+
+namespace llvm {
+namespace remarks {
+
+class YAMLParseError : public ErrorInfo<YAMLParseError> {
+public:
+  static char ID;
+
+  YAMLParseError(StringRef Message, SourceMgr &SM, yaml::Stream &Stream,
+                 yaml::Node &Node);
+
+  YAMLParseError(StringRef Message) : Message(Message) {}
+
+  void log(raw_ostream &OS) const override { OS << Message; }
+  std::error_code convertToErrorCode() const override {
+    return inconvertibleErrorCode();
+  }
+
+private:
+  std::string Message;
+};
+
+/// Regular YAML to Remark parser.
+struct YAMLRemarkParser : public Parser {
+  /// The string table used for parsing strings.
+  Optional<const ParsedStringTable *> StrTab;
+  /// Last error message that can come from the YAML parser diagnostics.
+  /// We need this for catching errors in the constructor.
+  std::string LastErrorMessage;
+  /// Source manager for better error messages.
+  SourceMgr SM;
+  /// Stream for yaml parsing.
+  yaml::Stream Stream;
+  /// Iterator in the YAML stream.
+  yaml::document_iterator YAMLIt;
+
+  YAMLRemarkParser(StringRef Buf,
+                   Optional<const ParsedStringTable *> StrTab = None);
+
+  Expected<std::unique_ptr<Remark>> next() override;
+
+  static bool classof(const Parser *P) {
+    return P->ParserFormat == Format::YAML;
+  }
+
+private:
+  /// Create a YAMLParseError error from an existing error generated by the YAML
+  /// parser.
+  /// If there is no error, this returns Success.
+  Error error();
+  /// Create a YAMLParseError error referencing a specific node.
+  Error error(StringRef Message, yaml::Node &Node);
+  /// Parse a YAML remark to a remarks::Remark object.
+  Expected<std::unique_ptr<Remark>> parseRemark(yaml::Document &Remark);
+  /// Parse the type of a remark to an enum type.
+  Expected<Type> parseType(yaml::MappingNode &Node);
+  /// Parse one key to a string.
+  Expected<StringRef> parseKey(yaml::KeyValueNode &Node);
+  /// Parse one value to a string.
+  Expected<StringRef> parseStr(yaml::KeyValueNode &Node);
+  /// Parse one value to an unsigned.
+  Expected<unsigned> parseUnsigned(yaml::KeyValueNode &Node);
+  /// Parse a debug location.
+  Expected<RemarkLocation> parseDebugLoc(yaml::KeyValueNode &Node);
+  /// Parse an argument.
+  Expected<Argument> parseArg(yaml::Node &Node);
+};
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_YAML_REMARK_PARSER_H */
diff --git a/lib/Remarks/YAMLRemarkSerializer.cpp b/lib/Remarks/YAMLRemarkSerializer.cpp
new file mode 100644
index 000000000000..d64ae8e12ab0
--- /dev/null
+++ b/lib/Remarks/YAMLRemarkSerializer.cpp
@@ -0,0 +1,167 @@
+//===- YAMLRemarkSerializer.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the YAML remark serializer using
+// LLVM's YAMLTraits.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/RemarkSerializer.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+cl::opt<bool> RemarksYAMLStringTable(
+    "remarks-yaml-string-table", cl::init(false), cl::Hidden,
+    cl::desc("Enable the usage of a string table with YAML remarks."));
+
+// Use the same keys whether we use a string table or not (respectively, T is an
+// unsigned or a StringRef).
+template <typename T>
+static void mapRemarkHeader(yaml::IO &io, T PassName, T RemarkName,
+                            Optional<RemarkLocation> RL, T FunctionName,
+                            Optional<uint64_t> Hotness,
+                            ArrayRef<Argument> Args) {
+  io.mapRequired("Pass", PassName);
+  io.mapRequired("Name", RemarkName);
+  io.mapOptional("DebugLoc", RL);
+  io.mapRequired("Function", FunctionName);
+  io.mapOptional("Hotness", Hotness);
+  io.mapOptional("Args", Args);
+}
+
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<remarks::Remark *> {
+  static void mapping(IO &io, remarks::Remark *&Remark) {
+    assert(io.outputting() && "input not yet implemented");
+
+    if (io.mapTag("!Passed", (Remark->RemarkType == Type::Passed)))
+      ;
+    else if (io.mapTag("!Missed", (Remark->RemarkType == Type::Missed)))
+      ;
+    else if (io.mapTag("!Analysis", (Remark->RemarkType == Type::Analysis)))
+      ;
+    else if (io.mapTag("!AnalysisFPCommute",
+                       (Remark->RemarkType == Type::AnalysisFPCommute)))
+      ;
+    else if (io.mapTag("!AnalysisAliasing",
+                       (Remark->RemarkType == Type::AnalysisAliasing)))
+      ;
+    else if (io.mapTag("!Failure", (Remark->RemarkType == Type::Failure)))
+      ;
+    else
+      llvm_unreachable("Unknown remark type");
+
+    if (Optional<StringTable> &StrTab =
+            reinterpret_cast<YAMLSerializer *>(io.getContext())->StrTab) {
+      unsigned PassID = StrTab->add(Remark->PassName).first;
+      unsigned NameID = StrTab->add(Remark->RemarkName).first;
+      unsigned FunctionID = StrTab->add(Remark->FunctionName).first;
+      mapRemarkHeader(io, PassID, NameID, Remark->Loc, FunctionID,
+                      Remark->Hotness, Remark->Args);
+    } else {
+      mapRemarkHeader(io, Remark->PassName, Remark->RemarkName, Remark->Loc,
+                      Remark->FunctionName, Remark->Hotness, Remark->Args);
+    }
+  }
+};
+
+template <> struct MappingTraits<RemarkLocation> {
+  static void mapping(IO &io, RemarkLocation &RL) {
+    assert(io.outputting() && "input not yet implemented");
+
+    StringRef File = RL.SourceFilePath;
+    unsigned Line = RL.SourceLine;
+    unsigned Col = RL.SourceColumn;
+
+    if (Optional<StringTable> &StrTab =
+            reinterpret_cast<YAMLSerializer *>(io.getContext())->StrTab) {
+      unsigned FileID = StrTab->add(File).first;
+      io.mapRequired("File", FileID);
+    } else {
+      io.mapRequired("File", File);
+    }
+
+    io.mapRequired("Line", Line);
+    io.mapRequired("Column", Col);
+  }
+
+  static const bool flow = true;
+};
+
+/// Helper struct for multiline string block literals. Use this type to preserve
+/// newlines in strings.
+struct StringBlockVal {
+  StringRef Value;
+  StringBlockVal(const std::string &Value) : Value(Value) {}
+};
+
+template <> struct BlockScalarTraits<StringBlockVal> {
+  static void output(const StringBlockVal &S, void *Ctx, raw_ostream &OS) {
+    return ScalarTraits<StringRef>::output(S.Value, Ctx, OS);
+  }
+
+  static StringRef input(StringRef Scalar, void *Ctx, StringBlockVal &S) {
+    return ScalarTraits<StringRef>::input(Scalar, Ctx, S.Value);
+  }
+};
+
+/// ArrayRef is not really compatible with the YAMLTraits. Everything should be
+/// immutable in an ArrayRef, while the SequenceTraits expect a mutable version
+/// for inputting, but we're only using the outputting capabilities here.
+/// This is a hack, but still nicer than having to manually call the YAMLIO
+/// internal methods.
+/// Keep this in this file so that it doesn't get misused from YAMLTraits.h.
+template <typename T> struct SequenceTraits<ArrayRef<T>> {
+  static size_t size(IO &io, ArrayRef<T> &seq) { return seq.size(); }
+  static Argument &element(IO &io, ArrayRef<T> &seq, size_t index) {
+    assert(io.outputting() && "input not yet implemented");
+    // The assert above should make this "safer" to satisfy the YAMLTraits.
+    return const_cast<T &>(seq[index]);
+  }
+};
+
+/// Implement this as a mapping for now to get proper quotation for the value.
+template <> struct MappingTraits<Argument> {
+  static void mapping(IO &io, Argument &A) {
+    assert(io.outputting() && "input not yet implemented");
+
+    if (Optional<StringTable> &StrTab =
+            reinterpret_cast<YAMLSerializer *>(io.getContext())->StrTab) {
+      auto ValueID = StrTab->add(A.Val).first;
+      io.mapRequired(A.Key.data(), ValueID);
+    } else if (StringRef(A.Val).count('\n') > 1) {
+      StringBlockVal S(A.Val);
+      io.mapRequired(A.Key.data(), S);
+    } else {
+      io.mapRequired(A.Key.data(), A.Val);
+    }
+    io.mapOptional("DebugLoc", A.Loc);
+  }
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(Argument)
+
+YAMLSerializer::YAMLSerializer(raw_ostream &OS, UseStringTable UseStringTable)
+    : Serializer(OS), YAMLOutput(OS, reinterpret_cast<void *>(this)) {
+  if (UseStringTable == remarks::UseStringTable::Yes || RemarksYAMLStringTable)
+    StrTab.emplace();
+}
+
+void YAMLSerializer::emit(const Remark &Remark) {
+  // Again, YAMLTraits expect a non-const object for inputting, but we're not
+  // using that here.
+  auto R = const_cast<remarks::Remark *>(&Remark);
+  YAMLOutput << R;
+}
diff --git a/lib/Support/AArch64TargetParser.cpp b/lib/Support/AArch64TargetParser.cpp
index e897137df680..df4caa1f07fd 100644
--- a/lib/Support/AArch64TargetParser.cpp
+++ b/lib/Support/AArch64TargetParser.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64TargetParser - Parser for AArch64 features -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -89,6 +88,16 @@ bool AArch64::getExtensionFeatures(unsigned Extensions,
     Features.push_back("+rdm");
   if (Extensions & AEK_SVE)
     Features.push_back("+sve");
+  if (Extensions & AEK_SVE2)
+    Features.push_back("+sve2");
+  if (Extensions & AEK_SVE2AES)
+    Features.push_back("+sve2-aes");
+  if (Extensions & AEK_SVE2SM4)
+    Features.push_back("+sve2-sm4");
+  if (Extensions & AEK_SVE2SHA3)
+    Features.push_back("+sve2-sha3");
+  if (Extensions & AEK_BITPERM)
+    Features.push_back("+bitperm");
   if (Extensions & AEK_RCPC)
     Features.push_back("+rcpc");
 
diff --git a/lib/Support/AMDGPUMetadata.cpp b/lib/Support/AMDGPUMetadata.cpp
index a04bfc2ea299..5f8102299f47 100644
--- a/lib/Support/AMDGPUMetadata.cpp
+++ b/lib/Support/AMDGPUMetadata.cpp
@@ -1,9 +1,8 @@
 //===--- AMDGPUMetadata.cpp -------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -66,6 +65,8 @@ struct ScalarEnumerationTraits<ValueKind> {
     YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue);
     YIO.enumCase(EN, "HiddenCompletionAction",
                  ValueKind::HiddenCompletionAction);
+    YIO.enumCase(EN, "HiddenMultiGridSyncArg",
+		 ValueKind::HiddenMultiGridSyncArg);
   }
 };
 
@@ -219,19 +220,5 @@ std::error_code toString(Metadata HSAMetadata, std::string &String) {
 }
 
 } // end namespace HSAMD
-
-namespace PALMD {
-
-std::error_code toString(const Metadata &PALMetadata, std::string &String) {
-  raw_string_ostream Stream(String);
-  for (auto I = PALMetadata.begin(), E = PALMetadata.end(); I != E; ++I) {
-    Stream << Twine(I == PALMetadata.begin() ? " 0x" : ",0x");
-    Stream << Twine::utohexstr(*I);
-  }
-  Stream.flush();
-  return std::error_code();
-}
-
-} // end namespace PALMD
 } // end namespace AMDGPU
 } // end namespace llvm
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index e9e429c8031b..b79baf1834a7 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -1,9 +1,8 @@
 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -114,6 +113,42 @@ namespace llvm {
   static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
                                                         53 + 53, 128};
 
+  const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
+    switch (S) {
+    case S_IEEEhalf:
+      return IEEEhalf();
+    case S_IEEEsingle:
+      return IEEEsingle();
+    case S_IEEEdouble:
+      return IEEEdouble();
+    case S_x87DoubleExtended:
+      return x87DoubleExtended();
+    case S_IEEEquad:
+      return IEEEquad();
+    case S_PPCDoubleDouble:
+      return PPCDoubleDouble();
+    }
+    llvm_unreachable("Unrecognised floating semantics");
+  }
+
+  APFloatBase::Semantics
+  APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
+    if (&Sem == &llvm::APFloat::IEEEhalf())
+      return S_IEEEhalf;
+    else if (&Sem == &llvm::APFloat::IEEEsingle())
+      return S_IEEEsingle;
+    else if (&Sem == &llvm::APFloat::IEEEdouble())
+      return S_IEEEdouble;
+    else if (&Sem == &llvm::APFloat::x87DoubleExtended())
+      return S_x87DoubleExtended;
+    else if (&Sem == &llvm::APFloat::IEEEquad())
+      return S_IEEEquad;
+    else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
+      return S_PPCDoubleDouble;
+    else
+      llvm_unreachable("Unknown floating semantics");
+  }
+
   const fltSemantics &APFloatBase::IEEEhalf() {
     return semIEEEhalf;
   }
@@ -199,7 +234,10 @@ readExponent(StringRef::iterator begin, StringRef::iterator end)
   const unsigned int overlargeExponent = 24000;  /* FIXME.  */
   StringRef::iterator p = begin;
 
-  assert(p != end && "Exponent has no digits");
+  // Treat no exponent as 0 to match binutils
+  if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
+    return 0;
+  }
 
   isNegative = (*p == '-');
   if (*p == '-' || *p == '+') {
@@ -4416,8 +4454,9 @@ APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
     return;
   }
   if (usesLayout<DoubleAPFloat>(Semantics)) {
+    const fltSemantics& S = F.getSemantics();
     new (&Double)
-        DoubleAPFloat(Semantics, APFloat(std::move(F), F.getSemantics()),
+        DoubleAPFloat(Semantics, APFloat(std::move(F), S),
                       APFloat(semIEEEdouble));
     return;
   }
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index a5f4f98c489a..43173311cd80 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -1,9 +1,8 @@
 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -483,10 +482,13 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
   APInt tmp(sufficient, StringRef(p, slen), radix);
 
   // Compute how many bits are required. If the log is infinite, assume we need
-  // just bit.
+  // just bit. If the log is exact and value is negative, then the value is
+  // MinSignedValue with (log + 1) bits.
   unsigned log = tmp.logBase2();
   if (log == (unsigned)-1) {
     return isNegative + 1;
+  } else if (isNegative && tmp.isPowerOf2()) {
+    return isNegative + log;
   } else {
     return isNegative + log + 1;
   }
@@ -1096,6 +1098,8 @@ APInt APInt::sqrt() const {
 /// however we simplify it to speed up calculating only the inverse, and take
 /// advantage of div+rem calculations. We also use some tricks to avoid copying
 /// (potentially large) APInts around.
+/// WARNING: a value of '0' may be returned,
+///          signifying that no multiplicative inverse exists!
 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
   assert(ult(modulo) && "This APInt must be smaller than the modulo");
 
@@ -1915,12 +1919,19 @@ APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
 }
 
 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
-  APInt Res = *this * RHS;
+  if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
+    Overflow = true;
+    return *this * RHS;
+  }
 
-  if (*this != 0 && RHS != 0)
-    Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
-  else
-    Overflow = false;
+  APInt Res = lshr(1) * RHS;
+  Overflow = Res.isNegative();
+  Res <<= 1;
+  if ((*this)[0]) {
+    Res += RHS;
+    if (Res.ult(RHS))
+      Overflow = true;
+  }
   return Res;
 }
 
@@ -2923,3 +2934,56 @@ llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
   LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
   return X;
 }
+
+/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
+/// with the integer held in IntVal.
+void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
+                            unsigned StoreBytes) {
+  assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
+  const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
+
+  if (sys::IsLittleEndianHost) {
+    // Little-endian host - the source is ordered from LSB to MSB.  Order the
+    // destination from LSB to MSB: Do a straight copy.
+    memcpy(Dst, Src, StoreBytes);
+  } else {
+    // Big-endian host - the source is an array of 64 bit words ordered from
+    // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
+    // from MSB to LSB: Reverse the word order, but not the bytes in a word.
+    while (StoreBytes > sizeof(uint64_t)) {
+      StoreBytes -= sizeof(uint64_t);
+      // May not be aligned so use memcpy.
+      memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
+      Src += sizeof(uint64_t);
+    }
+
+    memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
+  }
+}
+
+/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
+/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
+void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
+  assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
+  uint8_t *Dst = reinterpret_cast<uint8_t *>(
+                   const_cast<uint64_t *>(IntVal.getRawData()));
+
+  if (sys::IsLittleEndianHost)
+    // Little-endian host - the destination must be ordered from LSB to MSB.
+    // The source is ordered from LSB to MSB: Do a straight copy.
+    memcpy(Dst, Src, LoadBytes);
+  else {
+    // Big-endian - the destination is an array of 64 bit words ordered from
+    // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
+    // ordered from MSB to LSB: Reverse the word order, but not the bytes in
+    // a word.
+    while (LoadBytes > sizeof(uint64_t)) {
+      LoadBytes -= sizeof(uint64_t);
+      // May not be aligned so use memcpy.
+      memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
+      Dst += sizeof(uint64_t);
+    }
+
+    memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
+  }
+}
diff --git a/lib/Support/APSInt.cpp b/lib/Support/APSInt.cpp
index 46c0f70ff66b..7c48880f96ea 100644
--- a/lib/Support/APSInt.cpp
+++ b/lib/Support/APSInt.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/ADT/APSInt.cpp - Arbitrary Precision Signed Int ---*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -23,18 +22,18 @@ APSInt::APSInt(StringRef Str) {
 
   // (Over-)estimate the required number of bits.
   unsigned NumBits = ((Str.size() * 64) / 19) + 2;
-  APInt Tmp(NumBits, Str, /*Radix=*/10);
+  APInt Tmp(NumBits, Str, /*radix=*/10);
   if (Str[0] == '-') {
     unsigned MinBits = Tmp.getMinSignedBits();
     if (MinBits > 0 && MinBits < NumBits)
       Tmp = Tmp.trunc(MinBits);
-    *this = APSInt(Tmp, /*IsUnsigned=*/false);
+    *this = APSInt(Tmp, /*isUnsigned=*/false);
     return;
   }
   unsigned ActiveBits = Tmp.getActiveBits();
   if (ActiveBits > 0 && ActiveBits < NumBits)
     Tmp = Tmp.trunc(ActiveBits);
-  *this = APSInt(Tmp, /*IsUnsigned=*/true);
+  *this = APSInt(Tmp, /*isUnsigned=*/true);
 }
 
 void APSInt::Profile(FoldingSetNodeID& ID) const {
diff --git a/lib/Support/ARMAttributeParser.cpp b/lib/Support/ARMAttributeParser.cpp
index 1f98ac2f40ba..df50fff720cd 100644
--- a/lib/Support/ARMAttributeParser.cpp
+++ b/lib/Support/ARMAttributeParser.cpp
@@ -1,9 +1,8 @@
 //===--- ARMAttributeParser.cpp - ARM Attribute Information Printer -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -38,6 +37,7 @@ ARMAttributeParser::DisplayRoutines[] = {
   ATTRIBUTE_HANDLER(FP_arch),
   ATTRIBUTE_HANDLER(WMMX_arch),
   ATTRIBUTE_HANDLER(Advanced_SIMD_arch),
+  ATTRIBUTE_HANDLER(MVE_arch),
   ATTRIBUTE_HANDLER(PCS_config),
   ATTRIBUTE_HANDLER(ABI_PCS_R9_use),
   ATTRIBUTE_HANDLER(ABI_PCS_RW_data),
@@ -133,7 +133,9 @@ void ARMAttributeParser::CPU_arch(AttrType Tag, const uint8_t *Data,
   static const char *const Strings[] = {
     "Pre-v4", "ARM v4", "ARM v4T", "ARM v5T", "ARM v5TE", "ARM v5TEJ", "ARM v6",
     "ARM v6KZ", "ARM v6T2", "ARM v6K", "ARM v7", "ARM v6-M", "ARM v6S-M",
-    "ARM v7E-M", "ARM v8"
+    "ARM v7E-M", "ARM v8", nullptr,
+    "ARM v8-M Baseline", "ARM v8-M Mainline", nullptr, nullptr, nullptr,
+    "ARM v8.1-M Mainline"
   };
 
   uint64_t Value = ParseInteger(Data, Offset);
@@ -214,6 +216,18 @@ void ARMAttributeParser::Advanced_SIMD_arch(AttrType Tag, const uint8_t *Data,
   PrintAttribute(Tag, Value, ValueDesc);
 }
 
+void ARMAttributeParser::MVE_arch(AttrType Tag, const uint8_t *Data,
+                                  uint32_t &Offset) {
+  static const char *const Strings[] = {
+    "Not Permitted", "MVE integer", "MVE integer and float"
+  };
+
+  uint64_t Value = ParseInteger(Data, Offset);
+  StringRef ValueDesc =
+    (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+  PrintAttribute(Tag, Value, ValueDesc);
+}
+
 void ARMAttributeParser::PCS_config(AttrType Tag, const uint8_t *Data,
                                     uint32_t &Offset) {
   static const char *const Strings[] = {
@@ -682,7 +696,7 @@ void ARMAttributeParser::ParseSubsection(const uint8_t *Data, uint32_t Length) {
 }
 
 void ARMAttributeParser::Parse(ArrayRef<uint8_t> Section, bool isLittle) {
-  size_t Offset = 1;
+  uint64_t Offset = 1;
   unsigned SectionNumber = 0;
 
   while (Offset < Section.size()) {
@@ -695,6 +709,12 @@ void ARMAttributeParser::Parse(ArrayRef<uint8_t> Section, bool isLittle) {
       SW->indent();
     }
 
+    if (SectionLength == 0 || (SectionLength + Offset) > Section.size()) {
+      errs() << "invalid subsection length " << SectionLength << " at offset "
+             << Offset << "\n";
+      return;
+    }
+
     ParseSubsection(Section.data() + Offset, SectionLength);
     Offset = Offset + SectionLength;
 
diff --git a/lib/Support/ARMBuildAttrs.cpp b/lib/Support/ARMBuildAttrs.cpp
index 8f18e9eb24ed..d0c4fb792cb8 100644
--- a/lib/Support/ARMBuildAttrs.cpp
+++ b/lib/Support/ARMBuildAttrs.cpp
@@ -1,9 +1,8 @@
 //===-- ARMBuildAttrs.cpp - ARM Build Attributes --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -29,6 +28,7 @@ const struct {
   { ARMBuildAttrs::FP_arch, "Tag_FP_arch" },
   { ARMBuildAttrs::WMMX_arch, "Tag_WMMX_arch" },
   { ARMBuildAttrs::Advanced_SIMD_arch, "Tag_Advanced_SIMD_arch" },
+  { ARMBuildAttrs::MVE_arch, "Tag_MVE_arch" },
   { ARMBuildAttrs::PCS_config, "Tag_PCS_config" },
   { ARMBuildAttrs::ABI_PCS_R9_use, "Tag_ABI_PCS_R9_use" },
   { ARMBuildAttrs::ABI_PCS_RW_data, "Tag_ABI_PCS_RW_data" },
diff --git a/lib/Support/ARMTargetParser.cpp b/lib/Support/ARMTargetParser.cpp
index 07294b0c09a3..be948cfc95d4 100644
--- a/lib/Support/ARMTargetParser.cpp
+++ b/lib/Support/ARMTargetParser.cpp
@@ -1,9 +1,8 @@
 //===-- ARMTargetParser - Parser for ARM target features --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -78,6 +77,7 @@ unsigned ARM::parseArchVersion(StringRef Arch) {
   case ArchKind::ARMV8R:
   case ArchKind::ARMV8MBaseline:
   case ArchKind::ARMV8MMainline:
+  case ArchKind::ARMV8_1MMainline:
     return 8;
   case ArchKind::INVALID:
     return 0;
@@ -94,6 +94,7 @@ ARM::ProfileKind ARM::parseArchProfile(StringRef Arch) {
   case ArchKind::ARMV7EM:
   case ArchKind::ARMV8MMainline:
   case ArchKind::ARMV8MBaseline:
+  case ArchKind::ARMV8_1MMainline:
     return ProfileKind::M;
   case ArchKind::ARMV7R:
   case ArchKind::ARMV8R:
@@ -152,6 +153,7 @@ StringRef ARM::getArchSynonym(StringRef Arch) {
       .Case("v8r", "v8-r")
       .Case("v8m.base", "v8-m.base")
       .Case("v8m.main", "v8-m.main")
+      .Case("v8.1m.main", "v8.1-m.main")
       .Default(Arch);
 }
 
@@ -160,77 +162,63 @@ bool ARM::getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features) {
   if (FPUKind >= FK_LAST || FPUKind == FK_INVALID)
     return false;
 
-  // fp-only-sp and d16 subtarget features are independent of each other, so we
-  // must enable/disable both.
-  switch (FPUNames[FPUKind].Restriction) {
-  case FPURestriction::SP_D16:
-    Features.push_back("+fp-only-sp");
-    Features.push_back("+d16");
-    break;
-  case FPURestriction::D16:
-    Features.push_back("-fp-only-sp");
-    Features.push_back("+d16");
-    break;
-  case FPURestriction::None:
-    Features.push_back("-fp-only-sp");
-    Features.push_back("-d16");
-    break;
-  }
-
-  // FPU version subtarget features are inclusive of lower-numbered ones, so
-  // enable the one corresponding to this version and disable all that are
-  // higher. We also have to make sure to disable fp16 when vfp4 is disabled,
-  // as +vfp4 implies +fp16 but -vfp4 does not imply -fp16.
-  switch (FPUNames[FPUKind].FPUVer) {
-  case FPUVersion::VFPV5:
-    Features.push_back("+fp-armv8");
-    break;
-  case FPUVersion::VFPV4:
-    Features.push_back("+vfp4");
-    Features.push_back("-fp-armv8");
-    break;
-  case FPUVersion::VFPV3_FP16:
-    Features.push_back("+vfp3");
-    Features.push_back("+fp16");
-    Features.push_back("-vfp4");
-    Features.push_back("-fp-armv8");
-    break;
-  case FPUVersion::VFPV3:
-    Features.push_back("+vfp3");
-    Features.push_back("-fp16");
-    Features.push_back("-vfp4");
-    Features.push_back("-fp-armv8");
-    break;
-  case FPUVersion::VFPV2:
-    Features.push_back("+vfp2");
-    Features.push_back("-vfp3");
-    Features.push_back("-fp16");
-    Features.push_back("-vfp4");
-    Features.push_back("-fp-armv8");
-    break;
-  case FPUVersion::NONE:
-    Features.push_back("-vfp2");
-    Features.push_back("-vfp3");
-    Features.push_back("-fp16");
-    Features.push_back("-vfp4");
-    Features.push_back("-fp-armv8");
-    break;
+  static const struct FPUFeatureNameInfo {
+    const char *PlusName, *MinusName;
+    FPUVersion MinVersion;
+    FPURestriction MaxRestriction;
+  } FPUFeatureInfoList[] = {
+    // We have to specify the + and - versions of the name in full so
+    // that we can return them as static StringRefs.
+    //
+    // Also, the SubtargetFeatures ending in just "sp" are listed here
+    // under FPURestriction::None, which is the only FPURestriction in
+    // which they would be valid (since FPURestriction::SP doesn't
+    // exist).
+
+    {"+fpregs", "-fpregs", FPUVersion::VFPV2, FPURestriction::SP_D16},
+    {"+vfp2", "-vfp2", FPUVersion::VFPV2, FPURestriction::None},
+    {"+vfp2d16", "-vfp2d16", FPUVersion::VFPV2, FPURestriction::D16},
+    {"+vfp2d16sp", "-vfp2d16sp", FPUVersion::VFPV2, FPURestriction::SP_D16},
+    {"+vfp2sp", "-vfp2sp", FPUVersion::VFPV2, FPURestriction::None},
+    {"+vfp3", "-vfp3", FPUVersion::VFPV3, FPURestriction::None},
+    {"+vfp3d16", "-vfp3d16", FPUVersion::VFPV3, FPURestriction::D16},
+    {"+vfp3d16sp", "-vfp3d16sp", FPUVersion::VFPV3, FPURestriction::SP_D16},
+    {"+vfp3sp", "-vfp3sp", FPUVersion::VFPV3, FPURestriction::None},
+    {"+fp16", "-fp16", FPUVersion::VFPV3_FP16, FPURestriction::SP_D16},
+    {"+vfp4", "-vfp4", FPUVersion::VFPV4, FPURestriction::None},
+    {"+vfp4d16", "-vfp4d16", FPUVersion::VFPV4, FPURestriction::D16},
+    {"+vfp4d16sp", "-vfp4d16sp", FPUVersion::VFPV4, FPURestriction::SP_D16},
+    {"+vfp4sp", "-vfp4sp", FPUVersion::VFPV4, FPURestriction::None},
+    {"+fp-armv8", "-fp-armv8", FPUVersion::VFPV5, FPURestriction::None},
+    {"+fp-armv8d16", "-fp-armv8d16", FPUVersion::VFPV5, FPURestriction::D16},
+    {"+fp-armv8d16sp", "-fp-armv8d16sp", FPUVersion::VFPV5, FPURestriction::SP_D16},
+    {"+fp-armv8sp", "-fp-armv8sp", FPUVersion::VFPV5, FPURestriction::None},
+    {"+fullfp16", "-fullfp16", FPUVersion::VFPV5_FULLFP16, FPURestriction::SP_D16},
+    {"+fp64", "-fp64", FPUVersion::VFPV2, FPURestriction::D16},
+    {"+d32", "-d32", FPUVersion::VFPV2, FPURestriction::None},
+  };
+
+  for (const auto &Info: FPUFeatureInfoList) {
+    if (FPUNames[FPUKind].FPUVer >= Info.MinVersion &&
+        FPUNames[FPUKind].Restriction <= Info.MaxRestriction)
+      Features.push_back(Info.PlusName);
+    else
+      Features.push_back(Info.MinusName);
   }
 
-  // crypto includes neon, so we handle this similarly to FPU version.
-  switch (FPUNames[FPUKind].NeonSupport) {
-  case NeonSupportLevel::Crypto:
-    Features.push_back("+neon");
-    Features.push_back("+crypto");
-    break;
-  case NeonSupportLevel::Neon:
-    Features.push_back("+neon");
-    Features.push_back("-crypto");
-    break;
-  case NeonSupportLevel::None:
-    Features.push_back("-neon");
-    Features.push_back("-crypto");
-    break;
+  static const struct NeonFeatureNameInfo {
+    const char *PlusName, *MinusName;
+    NeonSupportLevel MinSupportLevel;
+  } NeonFeatureInfoList[] = {
+    {"+neon", "-neon", NeonSupportLevel::Neon},
+    {"+crypto", "-crypto", NeonSupportLevel::Crypto},
+  };
+
+  for (const auto &Info: NeonFeatureInfoList) {
+    if (FPUNames[FPUKind].NeonSupport >= Info.MinSupportLevel)
+      Features.push_back(Info.PlusName);
+    else
+      Features.push_back(Info.MinusName);
   }
 
   return true;
@@ -249,7 +237,7 @@ ARM::EndianKind ARM::parseArchEndian(StringRef Arch) {
       return EndianKind::LITTLE;
   }
 
-  if (Arch.startswith("aarch64"))
+  if (Arch.startswith("aarch64") || Arch.startswith("aarch64_32"))
     return EndianKind::LITTLE;
 
   return EndianKind::INVALID;
@@ -290,8 +278,12 @@ StringRef ARM::getCanonicalArchName(StringRef Arch) {
   StringRef Error = "";
 
   // Begins with "arm" / "thumb", move past it.
-  if (A.startswith("arm64"))
+  if (A.startswith("arm64_32"))
+    offset = 8;
+  else if (A.startswith("arm64"))
     offset = 5;
+  else if (A.startswith("aarch64_32"))
+    offset = 10;
   else if (A.startswith("arm"))
     offset = 3;
   else if (A.startswith("thumb"))
@@ -417,30 +409,12 @@ bool ARM::getExtensionFeatures(unsigned Extensions,
   if (Extensions == AEK_INVALID)
     return false;
 
-  if (Extensions & AEK_CRC)
-    Features.push_back("+crc");
-  else
-    Features.push_back("-crc");
-
-  if (Extensions & AEK_DSP)
-    Features.push_back("+dsp");
-  else
-    Features.push_back("-dsp");
-
-  if (Extensions & AEK_FP16FML)
-    Features.push_back("+fp16fml");
-  else
-    Features.push_back("-fp16fml");
-
-  if (Extensions & AEK_RAS)
-    Features.push_back("+ras");
-  else
-    Features.push_back("-ras");
-
-  if (Extensions & AEK_DOTPROD)
-    Features.push_back("+dotprod");
-  else
-    Features.push_back("-dotprod");
+  for (const auto AE : ARCHExtNames) {
+    if ((Extensions & AE.ID) == AE.ID && AE.Feature)
+      Features.push_back(AE.Feature);
+    else if (AE.NegFeature)
+      Features.push_back(AE.NegFeature);
+  }
 
   return getHWDivFeatures(Extensions, Features);
 }
@@ -469,22 +443,99 @@ StringRef ARM::getArchExtName(unsigned ArchExtKind) {
   return StringRef();
 }
 
-StringRef ARM::getArchExtFeature(StringRef ArchExt) {
-  if (ArchExt.startswith("no")) {
-    StringRef ArchExtBase(ArchExt.substr(2));
-    for (const auto AE : ARCHExtNames) {
-      if (AE.NegFeature && ArchExtBase == AE.getName())
-        return StringRef(AE.NegFeature);
-    }
+static bool stripNegationPrefix(StringRef &Name) {
+  if (Name.startswith("no")) {
+    Name = Name.substr(2);
+    return true;
   }
+  return false;
+}
+
+StringRef ARM::getArchExtFeature(StringRef ArchExt) {
+  bool Negated = stripNegationPrefix(ArchExt);
   for (const auto AE : ARCHExtNames) {
     if (AE.Feature && ArchExt == AE.getName())
-      return StringRef(AE.Feature);
+      return StringRef(Negated ? AE.NegFeature : AE.Feature);
   }
 
   return StringRef();
 }
 
+static unsigned findDoublePrecisionFPU(unsigned InputFPUKind) {
+  const ARM::FPUName &InputFPU = ARM::FPUNames[InputFPUKind];
+
+  // If the input FPU already supports double-precision, then there
+  // isn't any different FPU we can return here.
+  //
+  // The current available FPURestriction values are None (no
+  // restriction), D16 (only 16 d-regs) and SP_D16 (16 d-regs
+  // and single precision only); there's no value representing
+  // SP restriction without D16. So this test just means 'is it
+  // SP only?'.
+  if (InputFPU.Restriction != ARM::FPURestriction::SP_D16)
+    return ARM::FK_INVALID;
+
+  // Otherwise, look for an FPU entry with all the same fields, except
+  // that SP_D16 has been replaced with just D16, representing adding
+  // double precision and not changing anything else.
+  for (const ARM::FPUName &CandidateFPU : ARM::FPUNames) {
+    if (CandidateFPU.FPUVer == InputFPU.FPUVer &&
+        CandidateFPU.NeonSupport == InputFPU.NeonSupport &&
+        CandidateFPU.Restriction == ARM::FPURestriction::D16) {
+      return CandidateFPU.ID;
+    }
+  }
+
+  // nothing found
+  return ARM::FK_INVALID;
+}
+
+static unsigned getAEKID(StringRef ArchExtName) {
+  for (const auto AE : ARM::ARCHExtNames)
+    if (AE.getName() == ArchExtName)
+      return AE.ID;
+  return ARM::AEK_INVALID;
+}
+
+bool ARM::appendArchExtFeatures(
+  StringRef CPU, ARM::ArchKind AK, StringRef ArchExt,
+  std::vector<StringRef> &Features) {
+
+  size_t StartingNumFeatures = Features.size();
+  const bool Negated = stripNegationPrefix(ArchExt);
+  unsigned ID = getAEKID(ArchExt);
+
+  if (ID == AEK_INVALID)
+    return false;
+
+  for (const auto AE : ARCHExtNames) {
+    if (Negated && (AE.ID & ID) == ID && AE.NegFeature)
+      Features.push_back(AE.NegFeature);
+    else if (AE.ID == ID && AE.Feature)
+      Features.push_back(AE.Feature);
+  }
+
+  if (CPU == "")
+    CPU = "generic";
+
+  if (ArchExt == "fp" || ArchExt == "fp.dp") {
+    unsigned FPUKind;
+    if (ArchExt == "fp.dp") {
+      if (Negated) {
+        Features.push_back("-fp64");
+        return true;
+      }
+      FPUKind = findDoublePrecisionFPU(getDefaultFPU(CPU, AK));
+    } else if (Negated) {
+      FPUKind = ARM::FK_NONE;
+    } else {
+      FPUKind = getDefaultFPU(CPU, AK);
+    }
+    return ARM::getFPUFeatures(FPUKind, Features);
+  }
+  return StartingNumFeatures != Features.size();
+}
+
 StringRef ARM::getHWDivName(unsigned HWDivKind) {
   for (const auto D : HWDivNames) {
     if (HWDivKind == D.ID)
diff --git a/lib/Support/ARMWinEH.cpp b/lib/Support/ARMWinEH.cpp
index 03c150f1150b..831f95cd4b0b 100644
--- a/lib/Support/ARMWinEH.cpp
+++ b/lib/Support/ARMWinEH.cpp
@@ -1,9 +1,8 @@
 //===-- ARMWinEH.cpp - Windows on ARM EH Support Functions ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp
index f48edac0598c..718d3fc0d8e1 100644
--- a/lib/Support/Allocator.cpp
+++ b/lib/Support/Allocator.cpp
@@ -1,9 +1,8 @@
 //===--- Allocator.cpp - Simple memory allocation abstraction -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Atomic.cpp b/lib/Support/Atomic.cpp
index 7328a93052cc..f6865405c2b8 100644
--- a/lib/Support/Atomic.cpp
+++ b/lib/Support/Atomic.cpp
@@ -1,9 +1,8 @@
 //===-- Atomic.cpp - Atomic Operations --------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/BinaryStreamError.cpp b/lib/Support/BinaryStreamError.cpp
index cdc811d78d63..f22523f09ac8 100644
--- a/lib/Support/BinaryStreamError.cpp
+++ b/lib/Support/BinaryStreamError.cpp
@@ -1,9 +1,8 @@
 //===- BinaryStreamError.cpp - Error extensions for streams -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/BinaryStreamReader.cpp b/lib/Support/BinaryStreamReader.cpp
index e00527f2519e..b17786593bde 100644
--- a/lib/Support/BinaryStreamReader.cpp
+++ b/lib/Support/BinaryStreamReader.cpp
@@ -1,9 +1,8 @@
 //===- BinaryStreamReader.cpp - Reads objects from a binary stream --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -11,6 +10,7 @@
 
 #include "llvm/Support/BinaryStreamError.h"
 #include "llvm/Support/BinaryStreamRef.h"
+#include "llvm/Support/LEB128.h"
 
 using namespace llvm;
 using endianness = llvm::support::endianness;
@@ -41,6 +41,36 @@ Error BinaryStreamReader::readBytes(ArrayRef<uint8_t> &Buffer, uint32_t Size) {
   return Error::success();
 }
 
+Error BinaryStreamReader::readULEB128(uint64_t &Dest) {
+  SmallVector<uint8_t, 10> EncodedBytes;
+  ArrayRef<uint8_t> NextByte;
+
+  // Copy the encoded ULEB into the buffer.
+  do {
+    if (auto Err = readBytes(NextByte, 1))
+      return Err;
+    EncodedBytes.push_back(NextByte[0]);
+  } while (NextByte[0] & 0x80);
+
+  Dest = decodeULEB128(EncodedBytes.begin(), nullptr, EncodedBytes.end());
+  return Error::success();
+}
+
+Error BinaryStreamReader::readSLEB128(int64_t &Dest) {
+  SmallVector<uint8_t, 10> EncodedBytes;
+  ArrayRef<uint8_t> NextByte;
+
+  // Copy the encoded ULEB into the buffer.
+  do {
+    if (auto Err = readBytes(NextByte, 1))
+      return Err;
+    EncodedBytes.push_back(NextByte[0]);
+  } while (NextByte[0] & 0x80);
+
+  Dest = decodeSLEB128(EncodedBytes.begin(), nullptr, EncodedBytes.end());
+  return Error::success();
+}
+
 Error BinaryStreamReader::readCString(StringRef &Dest) {
   uint32_t OriginalOffset = getOffset();
   uint32_t FoundOffset = 0;
@@ -146,4 +176,4 @@ BinaryStreamReader::split(uint32_t Off) const {
   BinaryStreamReader W1{First};
   BinaryStreamReader W2{Second};
   return std::make_pair(W1, W2);
-}
\ No newline at end of file
+}
diff --git a/lib/Support/BinaryStreamRef.cpp b/lib/Support/BinaryStreamRef.cpp
index bdc0f54bf25a..6bcc504ffad5 100644
--- a/lib/Support/BinaryStreamRef.cpp
+++ b/lib/Support/BinaryStreamRef.cpp
@@ -1,9 +1,8 @@
 //===- BinaryStreamRef.cpp - ----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/BinaryStreamWriter.cpp b/lib/Support/BinaryStreamWriter.cpp
index bfad1280b929..986e18da281d 100644
--- a/lib/Support/BinaryStreamWriter.cpp
+++ b/lib/Support/BinaryStreamWriter.cpp
@@ -1,9 +1,8 @@
 //===- BinaryStreamWriter.cpp - Writes objects to a BinaryStream ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -12,6 +11,7 @@
 #include "llvm/Support/BinaryStreamError.h"
 #include "llvm/Support/BinaryStreamReader.h"
 #include "llvm/Support/BinaryStreamRef.h"
+#include "llvm/Support/LEB128.h"
 
 using namespace llvm;
 
@@ -32,6 +32,18 @@ Error BinaryStreamWriter::writeBytes(ArrayRef<uint8_t> Buffer) {
   return Error::success();
 }
 
+Error BinaryStreamWriter::writeULEB128(uint64_t Value) {
+  uint8_t EncodedBytes[10] = {0};
+  unsigned Size = encodeULEB128(Value, &EncodedBytes[0]);
+  return writeBytes({EncodedBytes, Size});
+}
+
+Error BinaryStreamWriter::writeSLEB128(int64_t Value) {
+  uint8_t EncodedBytes[10] = {0};
+  unsigned Size = encodeSLEB128(Value, &EncodedBytes[0]);
+  return writeBytes({EncodedBytes, Size});
+}
+
 Error BinaryStreamWriter::writeCString(StringRef Str) {
   if (auto EC = writeFixedString(Str))
     return EC;
diff --git a/lib/Support/BlockFrequency.cpp b/lib/Support/BlockFrequency.cpp
index 34fcbde23a28..2b63294f3789 100644
--- a/lib/Support/BlockFrequency.cpp
+++ b/lib/Support/BlockFrequency.cpp
@@ -1,9 +1,8 @@
 //====--------------- lib/Support/BlockFrequency.cpp -----------*- C++ -*-====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/BranchProbability.cpp b/lib/Support/BranchProbability.cpp
index 31dee9561f49..195e2d58d8e1 100644
--- a/lib/Support/BranchProbability.cpp
+++ b/lib/Support/BranchProbability.cpp
@@ -1,9 +1,8 @@
 //===-------------- lib/Support/BranchProbability.cpp -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -89,10 +88,6 @@ static uint64_t scale(uint64_t Num, uint32_t N, uint32_t D) {
   // Carry.
   Upper32 += Mid32 < Mid32Partial;
 
-  // Check for overflow.
-  if (Upper32 >= D)
-    return UINT64_MAX;
-
   uint64_t Rem = (uint64_t(Upper32) << 32) | Mid32;
   uint64_t UpperQ = Rem / D;
 
diff --git a/lib/Support/BuryPointer.cpp b/lib/Support/BuryPointer.cpp
index 6c988b4a0ab2..435f89010d41 100644
--- a/lib/Support/BuryPointer.cpp
+++ b/lib/Support/BuryPointer.cpp
@@ -1,9 +1,8 @@
 //===- BuryPointer.cpp - Memory Manipulation/Leak ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/COM.cpp b/lib/Support/COM.cpp
index 97cd085853b0..f37b95ba8651 100644
--- a/lib/Support/COM.cpp
+++ b/lib/Support/COM.cpp
@@ -1,9 +1,8 @@
 //===-- COM.cpp - Implement COM utility classes -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/CRC.cpp b/lib/Support/CRC.cpp
new file mode 100644
index 000000000000..fd98f3a24003
--- /dev/null
+++ b/lib/Support/CRC.cpp
@@ -0,0 +1,68 @@
+//===--- CRC.cpp - Cyclic Redundancy Check implementation -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements llvm::crc32 function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CRC.h"
+#include "llvm/Config/config.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Threading.h"
+#include <array>
+
+using namespace llvm;
+
+#if LLVM_ENABLE_ZLIB == 0 || !HAVE_ZLIB_H
+using CRC32Table = std::array<uint32_t, 256>;
+
+static void initCRC32Table(CRC32Table *Tbl) {
+  auto Shuffle = [](uint32_t V) {
+    return (V & 1) ? (V >> 1) ^ 0xEDB88320U : V >> 1;
+  };
+
+  for (size_t I = 0; I < Tbl->size(); ++I) {
+    uint32_t V = Shuffle(I);
+    V = Shuffle(V);
+    V = Shuffle(V);
+    V = Shuffle(V);
+    V = Shuffle(V);
+    V = Shuffle(V);
+    V = Shuffle(V);
+    (*Tbl)[I] = Shuffle(V);
+  }
+}
+
+uint32_t llvm::crc32(uint32_t CRC, StringRef S) {
+  static llvm::once_flag InitFlag;
+  static CRC32Table Tbl;
+  llvm::call_once(InitFlag, initCRC32Table, &Tbl);
+
+  const uint8_t *P = reinterpret_cast<const uint8_t *>(S.data());
+  size_t Len = S.size();
+  CRC ^= 0xFFFFFFFFU;
+  for (; Len >= 8; Len -= 8) {
+    CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
+    CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
+    CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
+    CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
+    CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
+    CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
+    CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
+    CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
+  }
+  while (Len--)
+    CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
+  return CRC ^ 0xFFFFFFFFU;
+}
+#else
+#include <zlib.h>
+uint32_t llvm::crc32(uint32_t CRC, StringRef S) {
+  return ::crc32(CRC, (const Bytef *)S.data(), S.size());
+}
+#endif
diff --git a/lib/Support/CachePruning.cpp b/lib/Support/CachePruning.cpp
index a0aa6024b3ed..9813eec0e433 100644
--- a/lib/Support/CachePruning.cpp
+++ b/lib/Support/CachePruning.cpp
@@ -1,9 +1,8 @@
 //===-CachePruning.cpp - LLVM Cache Directory Pruning ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -36,15 +35,8 @@ struct FileInfo {
   /// Used to determine which files to prune first. Also used to determine
   /// set membership, so must take into account all fields.
   bool operator<(const FileInfo &Other) const {
-    if (Time < Other.Time)
-      return true;
-    else if (Other.Time < Time)
-      return false;
-    if (Other.Size < Size)
-      return true;
-    else if (Size < Other.Size)
-      return false;
-    return Path < Other.Path;
+    return std::tie(Time, Other.Size, Path) <
+           std::tie(Other.Time, Size, Other.Path);
   }
 };
 } // anonymous namespace
diff --git a/lib/Support/Chrono.cpp b/lib/Support/Chrono.cpp
index a2626a89eb63..8c28d45d8822 100644
--- a/lib/Support/Chrono.cpp
+++ b/lib/Support/Chrono.cpp
@@ -1,9 +1,8 @@
 //===- Support/Chrono.cpp - Utilities for Timing Manipulation ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/CodeGenCoverage.cpp b/lib/Support/CodeGenCoverage.cpp
index 811020e3254a..f39eb7533b43 100644
--- a/lib/Support/CodeGenCoverage.cpp
+++ b/lib/Support/CodeGenCoverage.cpp
@@ -1,9 +1,8 @@
 //===- lib/Support/CodeGenCoverage.cpp -------------------------------------==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index f7290b54dcf3..25510fa58ff5 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -1,9 +1,8 @@
 //===-- CommandLine.cpp - Command line parser implementation --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -55,6 +54,7 @@ template class basic_parser<bool>;
 template class basic_parser<boolOrDefault>;
 template class basic_parser<int>;
 template class basic_parser<unsigned>;
+template class basic_parser<unsigned long>;
 template class basic_parser<unsigned long long>;
 template class basic_parser<double>;
 template class basic_parser<float>;
@@ -79,6 +79,7 @@ void parser<bool>::anchor() {}
 void parser<boolOrDefault>::anchor() {}
 void parser<int>::anchor() {}
 void parser<unsigned>::anchor() {}
+void parser<unsigned long>::anchor() {}
 void parser<unsigned long long>::anchor() {}
 void parser<double>::anchor() {}
 void parser<float>::anchor() {}
@@ -87,8 +88,47 @@ void parser<char>::anchor() {}
 
 //===----------------------------------------------------------------------===//
 
+static StringRef ArgPrefix = "  -";
+static StringRef ArgPrefixLong = "  --";
+static StringRef ArgHelpPrefix = " - ";
+
+static size_t argPlusPrefixesSize(StringRef ArgName) {
+  size_t Len = ArgName.size();
+  if (Len == 1)
+    return Len + ArgPrefix.size() + ArgHelpPrefix.size();
+  return Len + ArgPrefixLong.size() + ArgHelpPrefix.size();
+}
+
+static StringRef argPrefix(StringRef ArgName) {
+  if (ArgName.size() == 1)
+    return ArgPrefix;
+  return ArgPrefixLong;
+}
+
+// Option predicates...
+static inline bool isGrouping(const Option *O) {
+  return O->getMiscFlags() & cl::Grouping;
+}
+static inline bool isPrefixedOrGrouping(const Option *O) {
+  return isGrouping(O) || O->getFormattingFlag() == cl::Prefix ||
+         O->getFormattingFlag() == cl::AlwaysPrefix;
+}
+
+
 namespace {
 
+class PrintArg {
+  StringRef ArgName;
+public:
+  PrintArg(StringRef ArgName) : ArgName(ArgName) {}
+  friend raw_ostream &operator<<(raw_ostream &OS, const PrintArg&);
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const PrintArg& Arg) {
+  OS << argPrefix(Arg.ArgName) << Arg.ArgName;
+  return OS;
+}
+
 class CommandLineParser {
 public:
   // Globals for name and overview of program.  Program name is not a string to
@@ -99,6 +139,11 @@ public:
   // This collects additional help to be printed.
   std::vector<StringRef> MoreHelp;
 
+  // This collects Options added with the cl::DefaultOption flag. Since they can
+  // be overridden, they are not added to the appropriate SubCommands until
+  // ParseCommandLineOptions actually runs.
+  SmallVector<Option*, 4> DefaultOptions;
+
   // This collects the different option categories that have been registered.
   SmallPtrSet<OptionCategory *, 16> RegisteredOptionCategories;
 
@@ -113,7 +158,8 @@ public:
   void ResetAllOptionOccurrences();
 
   bool ParseCommandLineOptions(int argc, const char *const *argv,
-                               StringRef Overview, raw_ostream *Errs = nullptr);
+                               StringRef Overview, raw_ostream *Errs = nullptr,
+                               bool LongOptionsUseDoubleDash = false);
 
   void addLiteralOption(Option &Opt, SubCommand *SC, StringRef Name) {
     if (Opt.hasArgStr())
@@ -147,6 +193,11 @@ public:
   void addOption(Option *O, SubCommand *SC) {
     bool HadErrors = false;
     if (O->hasArgStr()) {
+      // If it's a DefaultOption, check to make sure it isn't already there.
+      if (O->isDefaultOption() &&
+          SC->OptionsMap.find(O->ArgStr) != SC->OptionsMap.end())
+        return;
+
       // Add argument to the argument map!
       if (!SC->OptionsMap.insert(std::make_pair(O->ArgStr, O)).second) {
         errs() << ProgramName << ": CommandLine Error: Option '" << O->ArgStr
@@ -186,7 +237,12 @@ public:
     }
   }
 
-  void addOption(Option *O) {
+  void addOption(Option *O, bool ProcessDefaultOption = false) {
+    if (!ProcessDefaultOption && O->isDefaultOption()) {
+      DefaultOptions.push_back(O);
+      return;
+    }
+
     if (O->Subs.empty()) {
       addOption(O, &*TopLevelSubCommand);
     } else {
@@ -202,8 +258,12 @@ public:
       OptionNames.push_back(O->ArgStr);
 
     SubCommand &Sub = *SC;
-    for (auto Name : OptionNames)
-      Sub.OptionsMap.erase(Name);
+    auto End = Sub.OptionsMap.end();
+    for (auto Name : OptionNames) {
+      auto I = Sub.OptionsMap.find(Name);
+      if (I != End && I->getValue() == O)
+        Sub.OptionsMap.erase(I);
+      }
 
     if (O->getFormattingFlag() == cl::Positional)
       for (auto Opt = Sub.PositionalOpts.begin();
@@ -267,8 +327,13 @@ public:
     if (O->Subs.empty())
       updateArgStr(O, NewName, &*TopLevelSubCommand);
     else {
-      for (auto SC : O->Subs)
-        updateArgStr(O, NewName, SC);
+      if (O->isInAllSubCommands()) {
+        for (auto SC : RegisteredSubCommands)
+          updateArgStr(O, NewName, SC);
+      } else {
+        for (auto SC : O->Subs)
+          updateArgStr(O, NewName, SC);
+      }
     }
   }
 
@@ -332,12 +397,21 @@ public:
     AllSubCommands->reset();
     registerSubCommand(&*TopLevelSubCommand);
     registerSubCommand(&*AllSubCommands);
+
+    DefaultOptions.clear();
   }
 
 private:
   SubCommand *ActiveSubCommand;
 
   Option *LookupOption(SubCommand &Sub, StringRef &Arg, StringRef &Value);
+  Option *LookupLongOption(SubCommand &Sub, StringRef &Arg, StringRef &Value,
+                           bool LongOptionsUseDoubleDash, bool HaveDoubleDash) {
+    Option *Opt = LookupOption(Sub, Arg, Value);
+    if (Opt && LongOptionsUseDoubleDash && !HaveDoubleDash && !isGrouping(Opt))
+      return nullptr;
+    return Opt;
+  }
   SubCommand *LookupSubCommand(StringRef Name);
 };
 
@@ -365,6 +439,26 @@ void Option::setArgStr(StringRef S) {
     GlobalParser->updateArgStr(this, S);
   assert((S.empty() || S[0] != '-') && "Option can't start with '-");
   ArgStr = S;
+  if (ArgStr.size() == 1)
+    setMiscFlag(Grouping);
+}
+
+void Option::addCategory(OptionCategory &C) {
+  assert(!Categories.empty() && "Categories cannot be empty.");
+  // Maintain backward compatibility by replacing the default GeneralCategory
+  // if it's still set.  Otherwise, just add the new one.  The GeneralCategory
+  // must be explicitly added if you want multiple categories that include it.
+  if (&C != &GeneralCategory && Categories[0] == &GeneralCategory)
+    Categories[0] = &C;
+  else if (find(Categories, &C) == Categories.end())
+    Categories.push_back(&C);
+}
+
+void Option::reset() {
+  NumOccurrences = 0;
+  setDefault();
+  if (isDefaultOption())
+    removeArgument();
 }
 
 // Initialise the general option category.
@@ -374,7 +468,11 @@ void OptionCategory::registerCategory() {
   GlobalParser->registerCategory(this);
 }
 
-// A special subcommand representing no subcommand
+// A special subcommand representing no subcommand. It is particularly important
+// that this ManagedStatic uses constant initailization and not dynamic
+// initialization because it is referenced from cl::opt constructors, which run
+// dynamically in an arbitrary order.
+LLVM_REQUIRE_CONSTANT_INITIALIZATION
 ManagedStatic<SubCommand> llvm::cl::TopLevelSubCommand;
 
 // A special subcommand that can be used to put an option into all subcommands.
@@ -599,15 +697,6 @@ static bool ProvidePositionalOption(Option *Handler, StringRef Arg, int i) {
   return ProvideOption(Handler, Handler->ArgStr, Arg, 0, nullptr, Dummy);
 }
 
-// Option predicates...
-static inline bool isGrouping(const Option *O) {
-  return O->getFormattingFlag() == cl::Grouping;
-}
-static inline bool isPrefixedOrGrouping(const Option *O) {
-  return isGrouping(O) || O->getFormattingFlag() == cl::Prefix ||
-         O->getFormattingFlag() == cl::AlwaysPrefix;
-}
-
 // getOptionPred - Check to see if there are any options that satisfy the
 // specified predicate with names that are the prefixes in Name.  This is
 // checked by progressively stripping characters off of the name, checking to
@@ -617,8 +706,9 @@ static inline bool isPrefixedOrGrouping(const Option *O) {
 static Option *getOptionPred(StringRef Name, size_t &Length,
                              bool (*Pred)(const Option *),
                              const StringMap<Option *> &OptionsMap) {
-
   StringMap<Option *>::const_iterator OMI = OptionsMap.find(Name);
+  if (OMI != OptionsMap.end() && !Pred(OMI->getValue()))
+    OMI = OptionsMap.end();
 
   // Loop while we haven't found an option and Name still has at least two
   // characters in it (so that the next iteration will not be the empty
@@ -626,6 +716,8 @@ static Option *getOptionPred(StringRef Name, size_t &Length,
   while (OMI == OptionsMap.end() && Name.size() > 1) {
     Name = Name.substr(0, Name.size() - 1); // Chop off the last character.
     OMI = OptionsMap.find(Name);
+    if (OMI != OptionsMap.end() && !Pred(OMI->getValue()))
+      OMI = OptionsMap.end();
   }
 
   if (OMI != OptionsMap.end() && Pred(OMI->second)) {
@@ -652,40 +744,46 @@ HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value,
   if (!PGOpt)
     return nullptr;
 
-  // If the option is a prefixed option, then the value is simply the
-  // rest of the name...  so fall through to later processing, by
-  // setting up the argument name flags and value fields.
-  if (PGOpt->getFormattingFlag() == cl::Prefix ||
-      PGOpt->getFormattingFlag() == cl::AlwaysPrefix) {
-    Value = Arg.substr(Length);
+  do {
+    StringRef MaybeValue =
+        (Length < Arg.size()) ? Arg.substr(Length) : StringRef();
     Arg = Arg.substr(0, Length);
     assert(OptionsMap.count(Arg) && OptionsMap.find(Arg)->second == PGOpt);
-    return PGOpt;
-  }
 
-  // This must be a grouped option... handle them now.  Grouping options can't
-  // have values.
-  assert(isGrouping(PGOpt) && "Broken getOptionPred!");
+    // cl::Prefix options do not preserve '=' when used separately.
+    // The behavior for them with grouped options should be the same.
+    if (MaybeValue.empty() || PGOpt->getFormattingFlag() == cl::AlwaysPrefix ||
+        (PGOpt->getFormattingFlag() == cl::Prefix && MaybeValue[0] != '=')) {
+      Value = MaybeValue;
+      return PGOpt;
+    }
 
-  do {
-    // Move current arg name out of Arg into OneArgName.
-    StringRef OneArgName = Arg.substr(0, Length);
-    Arg = Arg.substr(Length);
-
-    // Because ValueRequired is an invalid flag for grouped arguments,
-    // we don't need to pass argc/argv in.
-    assert(PGOpt->getValueExpectedFlag() != cl::ValueRequired &&
-           "Option can not be cl::Grouping AND cl::ValueRequired!");
+    if (MaybeValue[0] == '=') {
+      Value = MaybeValue.substr(1);
+      return PGOpt;
+    }
+
+    // This must be a grouped option.
+    assert(isGrouping(PGOpt) && "Broken getOptionPred!");
+
+    // Grouping options inside a group can't have values.
+    if (PGOpt->getValueExpectedFlag() == cl::ValueRequired) {
+      ErrorParsing |= PGOpt->error("may not occur within a group!");
+      return nullptr;
+    }
+
+    // Because the value for the option is not required, we don't need to pass
+    // argc/argv in.
     int Dummy = 0;
-    ErrorParsing |=
-        ProvideOption(PGOpt, OneArgName, StringRef(), 0, nullptr, Dummy);
+    ErrorParsing |= ProvideOption(PGOpt, Arg, StringRef(), 0, nullptr, Dummy);
 
     // Get the next grouping option.
+    Arg = MaybeValue;
     PGOpt = getOptionPred(Arg, Length, isGrouping, OptionsMap);
-  } while (PGOpt && Length != Arg.size());
+  } while (PGOpt);
 
-  // Return the last option with Arg cut down to just the last one.
-  return PGOpt;
+  // We could not find a grouping option in the remainder of Arg.
+  return nullptr;
 }
 
 static bool RequiresValue(const Option *O) {
@@ -869,6 +967,13 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
     // QUOTED state means that it's reading a token quoted by double quotes.
     if (State == QUOTED) {
       if (C == '"') {
+        if (I < (E - 1) && Src[I + 1] == '"') {
+          // Consecutive double-quotes inside a quoted string implies one
+          // double-quote.
+          Token.push_back('"');
+          I = I + 1;
+          continue;
+        }
         State = UNQUOTED;
         continue;
       }
@@ -992,41 +1097,84 @@ static bool ExpandResponseFile(StringRef FName, StringSaver &Saver,
 bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer,
                              SmallVectorImpl<const char *> &Argv,
                              bool MarkEOLs, bool RelativeNames) {
-  unsigned RspFiles = 0;
   bool AllExpanded = true;
+  struct ResponseFileRecord {
+    const char *File;
+    size_t End;
+  };
+
+  // To detect recursive response files, we maintain a stack of files and the
+  // position of the last argument in the file. This position is updated
+  // dynamically as we recursively expand files.
+  SmallVector<ResponseFileRecord, 3> FileStack;
+
+  // Push a dummy entry that represents the initial command line, removing
+  // the need to check for an empty list.
+  FileStack.push_back({"", Argv.size()});
 
   // Don't cache Argv.size() because it can change.
   for (unsigned I = 0; I != Argv.size();) {
+    while (I == FileStack.back().End) {
+      // Passing the end of a file's argument list, so we can remove it from the
+      // stack.
+      FileStack.pop_back();
+    }
+
     const char *Arg = Argv[I];
     // Check if it is an EOL marker
     if (Arg == nullptr) {
       ++I;
       continue;
     }
+
     if (Arg[0] != '@') {
       ++I;
       continue;
     }
 
-    // If we have too many response files, leave some unexpanded.  This avoids
-    // crashing on self-referential response files.
-    if (RspFiles++ > 20)
-      return false;
+    const char *FName = Arg + 1;
+    auto IsEquivalent = [FName](const ResponseFileRecord &RFile) {
+      return sys::fs::equivalent(RFile.File, FName);
+    };
+
+    // Check for recursive response files.
+    if (std::any_of(FileStack.begin() + 1, FileStack.end(), IsEquivalent)) {
+      // This file is recursive, so we leave it in the argument stream and
+      // move on.
+      AllExpanded = false;
+      ++I;
+      continue;
+    }
 
     // Replace this response file argument with the tokenization of its
     // contents.  Nested response files are expanded in subsequent iterations.
     SmallVector<const char *, 0> ExpandedArgv;
-    if (!ExpandResponseFile(Arg + 1, Saver, Tokenizer, ExpandedArgv,
-                            MarkEOLs, RelativeNames)) {
+    if (!ExpandResponseFile(FName, Saver, Tokenizer, ExpandedArgv, MarkEOLs,
+                            RelativeNames)) {
       // We couldn't read this file, so we leave it in the argument stream and
       // move on.
       AllExpanded = false;
       ++I;
       continue;
     }
+
+    for (ResponseFileRecord &Record : FileStack) {
+      // Increase the end of all active records by the number of newly expanded
+      // arguments, minus the response file itself.
+      Record.End += ExpandedArgv.size() - 1;
+    }
+
+    FileStack.push_back({FName, I + ExpandedArgv.size()});
     Argv.erase(Argv.begin() + I);
     Argv.insert(Argv.begin() + I, ExpandedArgv.begin(), ExpandedArgv.end());
   }
+
+  // If successful, the top of the file stack will mark the end of the Argv
+  // stream. A failure here indicates a bug in the stack popping logic above.
+  // Note that FileStack may have more than one element at this point because we
+  // don't have a chance to pop the stack when encountering recursive files at
+  // the end of the stream, so seeing that doesn't indicate a bug.
+  assert(FileStack.size() > 0 && Argv.size() == FileStack.back().End);
   return AllExpanded;
 }
 
@@ -1071,7 +1219,8 @@ void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,
 
 bool cl::ParseCommandLineOptions(int argc, const char *const *argv,
                                  StringRef Overview, raw_ostream *Errs,
-                                 const char *EnvVar) {
+                                 const char *EnvVar,
+                                 bool LongOptionsUseDoubleDash) {
   SmallVector<const char *, 20> NewArgv;
   BumpPtrAllocator A;
   StringSaver Saver(A);
@@ -1091,7 +1240,7 @@ bool cl::ParseCommandLineOptions(int argc, const char *const *argv,
 
   // Parse all options.
   return GlobalParser->ParseCommandLineOptions(NewArgc, &NewArgv[0], Overview,
-                                               Errs);
+                                               Errs, LongOptionsUseDoubleDash);
 }
 
 void CommandLineParser::ResetAllOptionOccurrences() {
@@ -1106,7 +1255,8 @@ void CommandLineParser::ResetAllOptionOccurrences() {
 bool CommandLineParser::ParseCommandLineOptions(int argc,
                                                 const char *const *argv,
                                                 StringRef Overview,
-                                                raw_ostream *Errs) {
+                                                raw_ostream *Errs,
+                                                bool LongOptionsUseDoubleDash) {
   assert(hasOptions() && "No options specified!");
 
   // Expand response files.
@@ -1152,6 +1302,10 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
   auto &SinkOpts = ChosenSubCommand->SinkOpts;
   auto &OptionsMap = ChosenSubCommand->OptionsMap;
 
+  for (auto O: DefaultOptions) {
+    addOption(O, true);
+  }
+
   if (ConsumeAfterOpt) {
     assert(PositionalOpts.size() > 0 &&
            "Cannot specify cl::ConsumeAfter without a positional argument!");
@@ -1212,6 +1366,7 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
     std::string NearestHandlerString;
     StringRef Value;
     StringRef ArgName = "";
+    bool HaveDoubleDash = false;
 
     // Check to see if this is a positional argument.  This argument is
     // considered to be positional if it doesn't start with '-', if it is "-"
@@ -1249,26 +1404,31 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
       // option is another positional argument.  If so, treat it as an argument,
       // otherwise feed it to the eating positional.
       ArgName = StringRef(argv[i] + 1);
-      // Eat leading dashes.
-      while (!ArgName.empty() && ArgName[0] == '-')
+      // Eat second dash.
+      if (!ArgName.empty() && ArgName[0] == '-') {
+        HaveDoubleDash = true;
         ArgName = ArgName.substr(1);
+      }
 
-      Handler = LookupOption(*ChosenSubCommand, ArgName, Value);
+      Handler = LookupLongOption(*ChosenSubCommand, ArgName, Value,
+                                 LongOptionsUseDoubleDash, HaveDoubleDash);
       if (!Handler || Handler->getFormattingFlag() != cl::Positional) {
         ProvidePositionalOption(ActivePositionalArg, StringRef(argv[i]), i);
         continue; // We are done!
       }
-
     } else { // We start with a '-', must be an argument.
       ArgName = StringRef(argv[i] + 1);
-      // Eat leading dashes.
-      while (!ArgName.empty() && ArgName[0] == '-')
+      // Eat second dash.
+      if (!ArgName.empty() && ArgName[0] == '-') {
+        HaveDoubleDash = true;
         ArgName = ArgName.substr(1);
+      }
 
-      Handler = LookupOption(*ChosenSubCommand, ArgName, Value);
+      Handler = LookupLongOption(*ChosenSubCommand, ArgName, Value,
+                                 LongOptionsUseDoubleDash, HaveDoubleDash);
 
       // Check to see if this "option" is really a prefixed or grouped argument.
-      if (!Handler)
+      if (!Handler && !(LongOptionsUseDoubleDash && HaveDoubleDash))
         Handler = HandlePrefixedOrGroupedOption(ArgName, Value, ErrorParsing,
                                                 OptionsMap);
 
@@ -1282,12 +1442,12 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
     if (!Handler) {
       if (SinkOpts.empty()) {
         *Errs << ProgramName << ": Unknown command line argument '" << argv[i]
-              << "'.  Try: '" << argv[0] << " -help'\n";
+              << "'.  Try: '" << argv[0] << " --help'\n";
 
         if (NearestHandler) {
           // If we know a near match, report it as well.
-          *Errs << ProgramName << ": Did you mean '-" << NearestHandlerString
-                 << "'?\n";
+          *Errs << ProgramName << ": Did you mean '"
+                << PrintArg(NearestHandlerString) << "'?\n";
         }
 
         ErrorParsing = true;
@@ -1321,14 +1481,14 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
              << ": Not enough positional command line arguments specified!\n"
              << "Must specify at least " << NumPositionalRequired
              << " positional argument" << (NumPositionalRequired > 1 ? "s" : "")
-             << ": See: " << argv[0] << " -help\n";
+             << ": See: " << argv[0] << " --help\n";
 
     ErrorParsing = true;
   } else if (!HasUnlimitedPositionals &&
              PositionalVals.size() > PositionalOpts.size()) {
     *Errs << ProgramName << ": Too many positional arguments specified!\n"
           << "Can specify at most " << PositionalOpts.size()
-          << " positional arguments: See: " << argv[0] << " -help\n";
+          << " positional arguments: See: " << argv[0] << " --help\n";
     ErrorParsing = true;
 
   } else if (!ConsumeAfterOpt) {
@@ -1441,7 +1601,7 @@ bool Option::error(const Twine &Message, StringRef ArgName, raw_ostream &Errs) {
   if (ArgName.empty())
     Errs << HelpStr; // Be nice for positional arguments
   else
-    Errs << GlobalParser->ProgramName << ": for the -" << ArgName;
+    Errs << GlobalParser->ProgramName << ": for the " << PrintArg(ArgName);
 
   Errs << " option: " << Message << "\n";
   return true;
@@ -1484,12 +1644,16 @@ static StringRef getValueStr(const Option &O, StringRef DefaultMsg) {
 //
 
 // Return the width of the option tag for printing...
-size_t alias::getOptionWidth() const { return ArgStr.size() + 6; }
+size_t alias::getOptionWidth() const {
+  return argPlusPrefixesSize(ArgStr);
+}
 
 void Option::printHelpStr(StringRef HelpStr, size_t Indent,
-                                 size_t FirstLineIndentedBy) {
+                          size_t FirstLineIndentedBy) {
+  assert(Indent >= FirstLineIndentedBy);
   std::pair<StringRef, StringRef> Split = HelpStr.split('\n');
-  outs().indent(Indent - FirstLineIndentedBy) << " - " << Split.first << "\n";
+  outs().indent(Indent - FirstLineIndentedBy)
+      << ArgHelpPrefix << Split.first << "\n";
   while (!Split.second.empty()) {
     Split = Split.second.split('\n');
     outs().indent(Indent) << Split.first << "\n";
@@ -1498,8 +1662,8 @@ void Option::printHelpStr(StringRef HelpStr, size_t Indent,
 
 // Print out the option for the alias.
 void alias::printOptionInfo(size_t GlobalWidth) const {
-  outs() << "  -" << ArgStr;
-  printHelpStr(HelpStr, GlobalWidth, ArgStr.size() + 6);
+  outs() << PrintArg(ArgStr);
+  printHelpStr(HelpStr, GlobalWidth, argPlusPrefixesSize(ArgStr));
 }
 
 //===----------------------------------------------------------------------===//
@@ -1511,7 +1675,7 @@ void alias::printOptionInfo(size_t GlobalWidth) const {
 
 // Return the width of the option tag for printing...
 size_t basic_parser_impl::getOptionWidth(const Option &O) const {
-  size_t Len = O.ArgStr.size();
+  size_t Len = argPlusPrefixesSize(O.ArgStr);
   auto ValName = getValueName();
   if (!ValName.empty()) {
     size_t FormattingLen = 3;
@@ -1520,7 +1684,7 @@ size_t basic_parser_impl::getOptionWidth(const Option &O) const {
     Len += getValueStr(O, ValName).size() + FormattingLen;
   }
 
-  return Len + 6;
+  return Len;
 }
 
 // printOptionInfo - Print out information about this option.  The
@@ -1528,7 +1692,7 @@ size_t basic_parser_impl::getOptionWidth(const Option &O) const {
 //
 void basic_parser_impl::printOptionInfo(const Option &O,
                                         size_t GlobalWidth) const {
-  outs() << "  -" << O.ArgStr;
+  outs() << PrintArg(O.ArgStr);
 
   auto ValName = getValueName();
   if (!ValName.empty()) {
@@ -1544,7 +1708,7 @@ void basic_parser_impl::printOptionInfo(const Option &O,
 
 void basic_parser_impl::printOptionName(const Option &O,
                                         size_t GlobalWidth) const {
-  outs() << "  -" << O.ArgStr;
+  outs() << PrintArg(O.ArgStr);
   outs().indent(GlobalWidth - O.ArgStr.size());
 }
 
@@ -1603,6 +1767,16 @@ bool parser<unsigned>::parse(Option &O, StringRef ArgName, StringRef Arg,
   return false;
 }
 
+// parser<unsigned long> implementation
+//
+bool parser<unsigned long>::parse(Option &O, StringRef ArgName, StringRef Arg,
+                                  unsigned long &Value) {
+
+  if (Arg.getAsInteger(0, Value))
+    return O.error("'" + Arg + "' value invalid for ulong argument!");
+  return false;
+}
+
 // parser<unsigned long long> implementation
 //
 bool parser<unsigned long long>::parse(Option &O, StringRef ArgName,
@@ -1610,7 +1784,7 @@ bool parser<unsigned long long>::parse(Option &O, StringRef ArgName,
                                        unsigned long long &Value) {
 
   if (Arg.getAsInteger(0, Value))
-    return O.error("'" + Arg + "' value invalid for uint argument!");
+    return O.error("'" + Arg + "' value invalid for ullong argument!");
   return false;
 }
 
@@ -1652,12 +1826,29 @@ unsigned generic_parser_base::findOption(StringRef Name) {
   return e;
 }
 
+static StringRef EqValue = "=<value>";
+static StringRef EmptyOption = "<empty>";
+static StringRef OptionPrefix = "    =";
+static size_t OptionPrefixesSize = OptionPrefix.size() + ArgHelpPrefix.size();
+
+static bool shouldPrintOption(StringRef Name, StringRef Description,
+                              const Option &O) {
+  return O.getValueExpectedFlag() != ValueOptional || !Name.empty() ||
+         !Description.empty();
+}
+
 // Return the width of the option tag for printing...
 size_t generic_parser_base::getOptionWidth(const Option &O) const {
   if (O.hasArgStr()) {
-    size_t Size = O.ArgStr.size() + 6;
-    for (unsigned i = 0, e = getNumOptions(); i != e; ++i)
-      Size = std::max(Size, getOption(i).size() + 8);
+    size_t Size =
+        argPlusPrefixesSize(O.ArgStr) + EqValue.size();
+    for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
+      StringRef Name = getOption(i);
+      if (!shouldPrintOption(Name, getDescription(i), O))
+        continue;
+      size_t NameSize = Name.empty() ? EmptyOption.size() : Name.size();
+      Size = std::max(Size, NameSize + OptionPrefixesSize);
+    }
     return Size;
   } else {
     size_t BaseSize = 0;
@@ -1673,20 +1864,46 @@ size_t generic_parser_base::getOptionWidth(const Option &O) const {
 void generic_parser_base::printOptionInfo(const Option &O,
                                           size_t GlobalWidth) const {
   if (O.hasArgStr()) {
-    outs() << "  -" << O.ArgStr;
-    Option::printHelpStr(O.HelpStr, GlobalWidth, O.ArgStr.size() + 6);
+    // When the value is optional, first print a line just describing the
+    // option without values.
+    if (O.getValueExpectedFlag() == ValueOptional) {
+      for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
+        if (getOption(i).empty()) {
+          outs() << PrintArg(O.ArgStr);
+          Option::printHelpStr(O.HelpStr, GlobalWidth,
+                               argPlusPrefixesSize(O.ArgStr));
+          break;
+        }
+      }
+    }
 
+    outs() << PrintArg(O.ArgStr) << EqValue;
+    Option::printHelpStr(O.HelpStr, GlobalWidth,
+                         EqValue.size() +
+                             argPlusPrefixesSize(O.ArgStr));
     for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
-      size_t NumSpaces = GlobalWidth - getOption(i).size() - 8;
-      outs() << "    =" << getOption(i);
-      outs().indent(NumSpaces) << " -   " << getDescription(i) << '\n';
+      StringRef OptionName = getOption(i);
+      StringRef Description = getDescription(i);
+      if (!shouldPrintOption(OptionName, Description, O))
+        continue;
+      assert(GlobalWidth >= OptionName.size() + OptionPrefixesSize);
+      size_t NumSpaces = GlobalWidth - OptionName.size() - OptionPrefixesSize;
+      outs() << OptionPrefix << OptionName;
+      if (OptionName.empty()) {
+        outs() << EmptyOption;
+        assert(NumSpaces >= EmptyOption.size());
+        NumSpaces -= EmptyOption.size();
+      }
+      if (!Description.empty())
+        outs().indent(NumSpaces) << ArgHelpPrefix << "  " << Description;
+      outs() << '\n';
     }
   } else {
     if (!O.HelpStr.empty())
       outs() << "  " << O.HelpStr << '\n';
     for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
-      auto Option = getOption(i);
-      outs() << "    -" << Option;
+      StringRef Option = getOption(i);
+      outs() << "    " << PrintArg(Option);
       Option::printHelpStr(getDescription(i), GlobalWidth, Option.size() + 8);
     }
   }
@@ -1700,7 +1917,7 @@ static const size_t MaxOptWidth = 8; // arbitrary spacing for printOptionDiff
 void generic_parser_base::printGenericOptionDiff(
     const Option &O, const GenericOptionValue &Value,
     const GenericOptionValue &Default, size_t GlobalWidth) const {
-  outs() << "  -" << O.ArgStr;
+  outs() << "  " << PrintArg(O.ArgStr);
   outs().indent(GlobalWidth - O.ArgStr.size());
 
   unsigned NumOpts = getNumOptions();
@@ -1750,6 +1967,7 @@ PRINT_OPT_DIFF(bool)
 PRINT_OPT_DIFF(boolOrDefault)
 PRINT_OPT_DIFF(int)
 PRINT_OPT_DIFF(unsigned)
+PRINT_OPT_DIFF(unsigned long)
 PRINT_OPT_DIFF(unsigned long long)
 PRINT_OPT_DIFF(double)
 PRINT_OPT_DIFF(float)
@@ -1919,7 +2137,7 @@ public:
       printSubCommands(Subs, MaxSubLen);
       outs() << "\n";
       outs() << "  Type \"" << GlobalParser->ProgramName
-             << " <subcommand> -help\" to get more help on a specific "
+             << " <subcommand> --help\" to get more help on a specific "
                 "subcommand";
     }
 
@@ -1986,9 +2204,11 @@ protected:
     // options within categories will also be alphabetically sorted.
     for (size_t I = 0, E = Opts.size(); I != E; ++I) {
       Option *Opt = Opts[I].second;
-      assert(CategorizedOptions.count(Opt->Category) > 0 &&
-             "Option has an unregistered category");
-      CategorizedOptions[Opt->Category].push_back(Opt);
+      for (auto &Cat : Opt->Categories) {
+        assert(CategorizedOptions.count(Cat) > 0 &&
+               "Option has an unregistered category");
+        CategorizedOptions[Cat].push_back(Opt);
+      }
     }
 
     // Now do printing.
@@ -1996,7 +2216,7 @@ protected:
              Category = SortedCategories.begin(),
              E = SortedCategories.end();
          Category != E; ++Category) {
-      // Hide empty categories for -help, but show for -help-hidden.
+      // Hide empty categories for --help, but show for --help-hidden.
       const auto &CategoryOptions = CategorizedOptions[*Category];
       bool IsEmptyCategory = CategoryOptions.empty();
       if (!ShowHidden && IsEmptyCategory)
@@ -2012,7 +2232,7 @@ protected:
       else
         outs() << "\n";
 
-      // When using -help-hidden explicitly state if the category has no
+      // When using --help-hidden explicitly state if the category has no
       // options associated with it.
       if (IsEmptyCategory) {
         outs() << "  This option category has no options.\n";
@@ -2062,11 +2282,11 @@ static HelpPrinterWrapper WrappedHiddenPrinter(UncategorizedHiddenPrinter,
 static cl::OptionCategory GenericCategory("Generic Options");
 
 // Define uncategorized help printers.
-// -help-list is hidden by default because if Option categories are being used
-// then -help behaves the same as -help-list.
+// --help-list is hidden by default because if Option categories are being used
+// then --help behaves the same as --help-list.
 static cl::opt<HelpPrinter, true, parser<bool>> HLOp(
     "help-list",
-    cl::desc("Display list of available options (-help-list-hidden for more)"),
+    cl::desc("Display list of available options (--help-list-hidden for more)"),
     cl::location(UncategorizedNormalPrinter), cl::Hidden, cl::ValueDisallowed,
     cl::cat(GenericCategory), cl::sub(*AllSubCommands));
 
@@ -2080,10 +2300,13 @@ static cl::opt<HelpPrinter, true, parser<bool>>
 // behaviour at runtime depending on whether one or more Option categories have
 // been declared.
 static cl::opt<HelpPrinterWrapper, true, parser<bool>>
-    HOp("help", cl::desc("Display available options (-help-hidden for more)"),
+    HOp("help", cl::desc("Display available options (--help-hidden for more)"),
         cl::location(WrappedNormalPrinter), cl::ValueDisallowed,
         cl::cat(GenericCategory), cl::sub(*AllSubCommands));
 
+static cl::alias HOpA("h", cl::desc("Alias for --help"), cl::aliasopt(HOp),
+                      cl::DefaultOption);
+
 static cl::opt<HelpPrinterWrapper, true, parser<bool>>
     HHOp("help-hidden", cl::desc("Display all available options"),
          cl::location(WrappedHiddenPrinter), cl::Hidden, cl::ValueDisallowed,
@@ -2108,7 +2331,7 @@ void HelpPrinterWrapper::operator=(bool Value) {
   // registered then it is useful to show the categorized help instead of
   // uncategorized help.
   if (GlobalParser->RegisteredOptionCategories.size() > 1) {
-    // unhide -help-list option so user can have uncategorized output if they
+    // unhide --help-list option so user can have uncategorized output if they
     // want it.
     HLOp.setHiddenFlag(NotHidden);
 
@@ -2242,21 +2465,21 @@ cl::getRegisteredSubcommands() {
 
 void cl::HideUnrelatedOptions(cl::OptionCategory &Category, SubCommand &Sub) {
   for (auto &I : Sub.OptionsMap) {
-    if (I.second->Category != &Category &&
-        I.second->Category != &GenericCategory)
-      I.second->setHiddenFlag(cl::ReallyHidden);
+    for (auto &Cat : I.second->Categories) {
+      if (Cat != &Category &&
+          Cat != &GenericCategory)
+        I.second->setHiddenFlag(cl::ReallyHidden);
+    }
   }
 }
 
 void cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *> Categories,
                               SubCommand &Sub) {
-  auto CategoriesBegin = Categories.begin();
-  auto CategoriesEnd = Categories.end();
   for (auto &I : Sub.OptionsMap) {
-    if (std::find(CategoriesBegin, CategoriesEnd, I.second->Category) ==
-            CategoriesEnd &&
-        I.second->Category != &GenericCategory)
-      I.second->setHiddenFlag(cl::ReallyHidden);
+    for (auto &Cat : I.second->Categories) {
+      if (find(Categories, Cat) == Categories.end() && Cat != &GenericCategory)
+        I.second->setHiddenFlag(cl::ReallyHidden);
+    }
   }
 }
 
diff --git a/lib/Support/Compression.cpp b/lib/Support/Compression.cpp
index 95261d4aad23..97d5ffaadf82 100644
--- a/lib/Support/Compression.cpp
+++ b/lib/Support/Compression.cpp
@@ -1,9 +1,8 @@
 //===--- Compression.cpp - Compression implementation ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/ConvertUTF.cpp b/lib/Support/ConvertUTF.cpp
index 8f02fae4f558..e24a918c5c89 100644
--- a/lib/Support/ConvertUTF.cpp
+++ b/lib/Support/ConvertUTF.cpp
@@ -1,9 +1,8 @@
 /*===--- ConvertUTF.c - Universal Character Names conversions ---------------===
  *
- *                     The LLVM Compiler Infrastructure
- *
- * This file is distributed under the University of Illinois Open Source
- * License. See LICENSE.TXT for details.
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  *
  *===------------------------------------------------------------------------=*/
 /*
diff --git a/lib/Support/ConvertUTFWrapper.cpp b/lib/Support/ConvertUTFWrapper.cpp
index 6cb4f6376250..eb4ead6b46b4 100644
--- a/lib/Support/ConvertUTFWrapper.cpp
+++ b/lib/Support/ConvertUTFWrapper.cpp
@@ -1,9 +1,8 @@
 //===-- ConvertUTFWrapper.cpp - Wrap ConvertUTF.h with clang data types -----===
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index be4b5c3e01c3..c2459256f8fe 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -1,9 +1,8 @@
 //===--- CrashRecoveryContext.cpp - Crash Recovery ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/DAGDeltaAlgorithm.cpp b/lib/Support/DAGDeltaAlgorithm.cpp
index bd9f98b0b82d..4675fe3a9401 100644
--- a/lib/Support/DAGDeltaAlgorithm.cpp
+++ b/lib/Support/DAGDeltaAlgorithm.cpp
@@ -1,9 +1,8 @@
 //===--- DAGDeltaAlgorithm.cpp - A DAG Minimization Algorithm --*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //===----------------------------------------------------------------------===//
 //
 // The algorithm we use attempts to exploit the dependency information by
diff --git a/lib/Support/DJB.cpp b/lib/Support/DJB.cpp
index 905dcf1b7e81..f06af7dfde44 100644
--- a/lib/Support/DJB.cpp
+++ b/lib/Support/DJB.cpp
@@ -1,9 +1,8 @@
 //===-- Support/DJB.cpp ---DJB Hash -----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -58,29 +57,26 @@ static UTF32 foldCharDwarf(UTF32 C) {
   return sys::unicode::foldCharSimple(C);
 }
 
-static uint32_t caseFoldingDjbHashCharSlow(StringRef &Buffer, uint32_t H) {
-  UTF32 C = chopOneUTF32(Buffer);
-
-  C = foldCharDwarf(C);
-
-  std::array<UTF8, UNI_MAX_UTF8_BYTES_PER_CODE_POINT> Storage;
-  StringRef Folded = toUTF8(C, Storage);
-  return djbHash(Folded, H);
+static Optional<uint32_t> fastCaseFoldingDjbHash(StringRef Buffer, uint32_t H) {
+  bool AllASCII = true;
+  for (unsigned char C : Buffer) {
+    H = H * 33 + ('A' <= C && C <= 'Z' ? C - 'A' + 'a' : C);
+    AllASCII &= C <= 0x7f;
+  }
+  if (AllASCII)
+    return H;
+  return None;
 }
 
 uint32_t llvm::caseFoldingDjbHash(StringRef Buffer, uint32_t H) {
+  if (Optional<uint32_t> Result = fastCaseFoldingDjbHash(Buffer, H))
+    return *Result;
+
+  std::array<UTF8, UNI_MAX_UTF8_BYTES_PER_CODE_POINT> Storage;
   while (!Buffer.empty()) {
-    unsigned char C = Buffer.front();
-    if (LLVM_LIKELY(C <= 0x7f)) {
-      // US-ASCII, encoded as one character in utf-8.
-      // This is by far the most common case, so handle this specially.
-      if (C >= 'A' && C <= 'Z')
-        C = 'a' + (C - 'A'); // fold uppercase into lowercase
-      H = (H << 5) + H + C;
-      Buffer = Buffer.drop_front();
-      continue;
-    }
-    H = caseFoldingDjbHashCharSlow(Buffer, H);
+    UTF32 C = foldCharDwarf(chopOneUTF32(Buffer));
+    StringRef Folded = toUTF8(C, Storage);
+    H = djbHash(Folded, H);
   }
   return H;
 }
diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp
index 0199b300ba72..673bbb4d06f4 100644
--- a/lib/Support/DataExtractor.cpp
+++ b/lib/Support/DataExtractor.cpp
@@ -1,9 +1,8 @@
 //===-- DataExtractor.cpp -------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -11,6 +10,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/LEB128.h"
 using namespace llvm;
 
 template <typename T>
@@ -146,47 +146,29 @@ StringRef DataExtractor::getCStrRef(uint32_t *OffsetPtr) const {
 }
 
 uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr) const {
-  uint64_t result = 0;
-  if (Data.empty())
+  assert(*offset_ptr <= Data.size());
+
+  const char *error;
+  unsigned bytes_read;
+  uint64_t result = decodeULEB128(
+      reinterpret_cast<const uint8_t *>(Data.data() + *offset_ptr), &bytes_read,
+      reinterpret_cast<const uint8_t *>(Data.data() + Data.size()), &error);
+  if (error)
     return 0;
-
-  unsigned shift = 0;
-  uint32_t offset = *offset_ptr;
-  uint8_t byte = 0;
-
-  while (isValidOffset(offset)) {
-    byte = Data[offset++];
-    result |= uint64_t(byte & 0x7f) << shift;
-    shift += 7;
-    if ((byte & 0x80) == 0)
-      break;
-  }
-
-  *offset_ptr = offset;
+  *offset_ptr += bytes_read;
   return result;
 }
 
 int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const {
-  int64_t result = 0;
-  if (Data.empty())
+  assert(*offset_ptr <= Data.size());
+
+  const char *error;
+  unsigned bytes_read;
+  int64_t result = decodeSLEB128(
+      reinterpret_cast<const uint8_t *>(Data.data() + *offset_ptr), &bytes_read,
+      reinterpret_cast<const uint8_t *>(Data.data() + Data.size()), &error);
+  if (error)
     return 0;
-
-  unsigned shift = 0;
-  uint32_t offset = *offset_ptr;
-  uint8_t byte = 0;
-
-  while (isValidOffset(offset)) {
-    byte = Data[offset++];
-    result |= uint64_t(byte & 0x7f) << shift;
-    shift += 7;
-    if ((byte & 0x80) == 0)
-      break;
-  }
-
-  // Sign bit of byte is 2nd high order bit (0x40)
-  if (shift < 64 && (byte & 0x40))
-    result |= -(1ULL << shift);
-
-  *offset_ptr = offset;
+  *offset_ptr += bytes_read;
   return result;
 }
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index 1a70017fee32..737cd576ed80 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -1,9 +1,8 @@
 //===-- Debug.cpp - An easy way to add debug output to your code ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/DeltaAlgorithm.cpp b/lib/Support/DeltaAlgorithm.cpp
index 50ea4e9ce0c6..6aee69f43405 100644
--- a/lib/Support/DeltaAlgorithm.cpp
+++ b/lib/Support/DeltaAlgorithm.cpp
@@ -1,9 +1,8 @@
 //===--- DeltaAlgorithm.cpp - A Set Minimization Algorithm -----*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/DeltaAlgorithm.h"
diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp
index 530e92d99a90..d23716016fb2 100644
--- a/lib/Support/DynamicLibrary.cpp
+++ b/lib/Support/DynamicLibrary.cpp
@@ -1,9 +1,8 @@
 //===-- DynamicLibrary.cpp - Runtime link/load libraries --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Errno.cpp b/lib/Support/Errno.cpp
index 2149f21281d3..d18231c6ebf5 100644
--- a/lib/Support/Errno.cpp
+++ b/lib/Support/Errno.cpp
@@ -1,9 +1,8 @@
 //===- Errno.cpp - errno support --------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,7 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Errno.h"
-#include "llvm/Config/config.h" // Get autoconf configuration settings
+#include "llvm/Config/config.h"
 #include "llvm/Support/raw_ostream.h"
 #include <string.h>
 
diff --git a/lib/Support/Error.cpp b/lib/Support/Error.cpp
index 30bfc3e6d2fb..72bc08af2ddb 100644
--- a/lib/Support/Error.cpp
+++ b/lib/Support/Error.cpp
@@ -1,9 +1,8 @@
 //===----- lib/Support/Error.cpp - Error and associated utilities ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index 21712c5c039e..0f13f7a536f1 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -1,9 +1,8 @@
 //===- lib/Support/ErrorHandling.cpp - Callbacks for errors ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -187,25 +186,13 @@ static void out_of_memory_new_handler() {
   llvm::report_bad_alloc_error("Allocation failed");
 }
 
-// Installs new handler that causes crash on allocation failure. It does not
-// need to be called explicitly, if this file is linked to application, because
-// in this case it is called during construction of 'new_handler_installer'.
+// Installs new handler that causes crash on allocation failure. It is called by
+// InitLLVM.
 void llvm::install_out_of_memory_new_handler() {
-  static bool out_of_memory_new_handler_installed = false;
-  if (!out_of_memory_new_handler_installed) {
-    std::set_new_handler(out_of_memory_new_handler);
-    out_of_memory_new_handler_installed = true;
-  }
+  std::new_handler old = std::set_new_handler(out_of_memory_new_handler);
+  (void)old;
+  assert(old == nullptr && "new-handler already installed");
 }
-
-// Static object that causes installation of 'out_of_memory_new_handler' before
-// execution of 'main'.
-static class NewHandlerInstaller {
-public:
-  NewHandlerInstaller() {
-    install_out_of_memory_new_handler();
-  }
-} new_handler_installer;
 #endif
 
 void llvm::llvm_unreachable_internal(const char *msg, const char *file,
diff --git a/lib/Support/FileCheck.cpp b/lib/Support/FileCheck.cpp
index 37986c96c081..e0f17787bdf8 100644
--- a/lib/Support/FileCheck.cpp
+++ b/lib/Support/FileCheck.cpp
@@ -1,9 +1,8 @@
 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -25,18 +24,303 @@
 
 using namespace llvm;
 
-/// Parses the given string into the Pattern.
-///
-/// \p Prefix provides which prefix is being matched, \p SM provides the
-/// SourceMgr used for error reports, and \p LineNumber is the line number in
-/// the input file from which the pattern string was read. Returns true in
-/// case of an error, false otherwise.
-bool FileCheckPattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
-                           SourceMgr &SM, unsigned LineNumber,
-                           const FileCheckRequest &Req) {
+void FileCheckNumericVariable::setValue(uint64_t NewValue) {
+  assert(!Value && "Overwriting numeric variable's value is not allowed");
+  Value = NewValue;
+}
+
+void FileCheckNumericVariable::clearValue() {
+  if (!Value)
+    return;
+  Value = None;
+}
+
+Expected<uint64_t> FileCheckNumericVariableUse::eval() const {
+  Optional<uint64_t> Value = NumericVariable->getValue();
+  if (Value)
+    return *Value;
+  return make_error<FileCheckUndefVarError>(Name);
+}
+
+Expected<uint64_t> FileCheckASTBinop::eval() const {
+  Expected<uint64_t> LeftOp = LeftOperand->eval();
+  Expected<uint64_t> RightOp = RightOperand->eval();
+
+  // Bubble up any error (e.g. undefined variables) in the recursive
+  // evaluation.
+  if (!LeftOp || !RightOp) {
+    Error Err = Error::success();
+    if (!LeftOp)
+      Err = joinErrors(std::move(Err), LeftOp.takeError());
+    if (!RightOp)
+      Err = joinErrors(std::move(Err), RightOp.takeError());
+    return std::move(Err);
+  }
+
+  return EvalBinop(*LeftOp, *RightOp);
+}
+
+Expected<std::string> FileCheckNumericSubstitution::getResult() const {
+  Expected<uint64_t> EvaluatedValue = ExpressionAST->eval();
+  if (!EvaluatedValue)
+    return EvaluatedValue.takeError();
+  return utostr(*EvaluatedValue);
+}
+
+Expected<std::string> FileCheckStringSubstitution::getResult() const {
+  // Look up the value and escape it so that we can put it into the regex.
+  Expected<StringRef> VarVal = Context->getPatternVarValue(FromStr);
+  if (!VarVal)
+    return VarVal.takeError();
+  return Regex::escape(*VarVal);
+}
+
+bool FileCheckPattern::isValidVarNameStart(char C) {
+  return C == '_' || isalpha(C);
+}
+
+Expected<FileCheckPattern::VariableProperties>
+FileCheckPattern::parseVariable(StringRef &Str, const SourceMgr &SM) {
+  if (Str.empty())
+    return FileCheckErrorDiagnostic::get(SM, Str, "empty variable name");
+
+  bool ParsedOneChar = false;
+  unsigned I = 0;
+  bool IsPseudo = Str[0] == '@';
+
+  // Global vars start with '$'.
+  if (Str[0] == '$' || IsPseudo)
+    ++I;
+
+  for (unsigned E = Str.size(); I != E; ++I) {
+    if (!ParsedOneChar && !isValidVarNameStart(Str[I]))
+      return FileCheckErrorDiagnostic::get(SM, Str, "invalid variable name");
+
+    // Variable names are composed of alphanumeric characters and underscores.
+    if (Str[I] != '_' && !isalnum(Str[I]))
+      break;
+    ParsedOneChar = true;
+  }
+
+  StringRef Name = Str.take_front(I);
+  Str = Str.substr(I);
+  return VariableProperties {Name, IsPseudo};
+}
+
+// StringRef holding all characters considered as horizontal whitespaces by
+// FileCheck input canonicalization.
+StringRef SpaceChars = " \t";
+
+// Parsing helper function that strips the first character in S and returns it.
+static char popFront(StringRef &S) {
+  char C = S.front();
+  S = S.drop_front();
+  return C;
+}
+
+char FileCheckUndefVarError::ID = 0;
+char FileCheckErrorDiagnostic::ID = 0;
+char FileCheckNotFoundError::ID = 0;
+
+Expected<FileCheckNumericVariable *>
+FileCheckPattern::parseNumericVariableDefinition(
+    StringRef &Expr, FileCheckPatternContext *Context,
+    Optional<size_t> LineNumber, const SourceMgr &SM) {
+  Expected<VariableProperties> ParseVarResult = parseVariable(Expr, SM);
+  if (!ParseVarResult)
+    return ParseVarResult.takeError();
+  StringRef Name = ParseVarResult->Name;
+
+  if (ParseVarResult->IsPseudo)
+    return FileCheckErrorDiagnostic::get(
+        SM, Name, "definition of pseudo numeric variable unsupported");
+
+  // Detect collisions between string and numeric variables when the latter
+  // is created later than the former.
+  if (Context->DefinedVariableTable.find(Name) !=
+      Context->DefinedVariableTable.end())
+    return FileCheckErrorDiagnostic::get(
+        SM, Name, "string variable with name '" + Name + "' already exists");
+
+  Expr = Expr.ltrim(SpaceChars);
+  if (!Expr.empty())
+    return FileCheckErrorDiagnostic::get(
+        SM, Expr, "unexpected characters after numeric variable name");
+
+  FileCheckNumericVariable *DefinedNumericVariable;
+  auto VarTableIter = Context->GlobalNumericVariableTable.find(Name);
+  if (VarTableIter != Context->GlobalNumericVariableTable.end())
+    DefinedNumericVariable = VarTableIter->second;
+  else
+    DefinedNumericVariable = Context->makeNumericVariable(Name, LineNumber);
+
+  return DefinedNumericVariable;
+}
+
+Expected<std::unique_ptr<FileCheckNumericVariableUse>>
+FileCheckPattern::parseNumericVariableUse(StringRef Name, bool IsPseudo,
+                                          const SourceMgr &SM) const {
+  if (IsPseudo && !Name.equals("@LINE"))
+    return FileCheckErrorDiagnostic::get(
+        SM, Name, "invalid pseudo numeric variable '" + Name + "'");
+
+  // Numeric variable definitions and uses are parsed in the order in which
+  // they appear in the CHECK patterns. For each definition, the pointer to the
+  // class instance of the corresponding numeric variable definition is stored
+  // in GlobalNumericVariableTable in parsePattern. Therefore, if the pointer
+  // we get below is null, it means no such variable was defined before. When
+  // that happens, we create a dummy variable so that parsing can continue. All
+  // uses of undefined variables, whether string or numeric, are then diagnosed
+  // in printSubstitutions() after failing to match.
+  auto VarTableIter = Context->GlobalNumericVariableTable.find(Name);
+  FileCheckNumericVariable *NumericVariable;
+  if (VarTableIter != Context->GlobalNumericVariableTable.end())
+    NumericVariable = VarTableIter->second;
+  else {
+    NumericVariable = Context->makeNumericVariable(Name);
+    Context->GlobalNumericVariableTable[Name] = NumericVariable;
+  }
+
+  Optional<size_t> DefLineNumber = NumericVariable->getDefLineNumber();
+  if (DefLineNumber && LineNumber && *DefLineNumber == *LineNumber)
+    return FileCheckErrorDiagnostic::get(
+        SM, Name,
+        "numeric variable '" + Name + "' defined on the same line as used");
+
+  return llvm::make_unique<FileCheckNumericVariableUse>(Name, NumericVariable);
+}
+
+Expected<std::unique_ptr<FileCheckExpressionAST>>
+FileCheckPattern::parseNumericOperand(StringRef &Expr, AllowedOperand AO,
+                                      const SourceMgr &SM) const {
+  if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) {
+    // Try to parse as a numeric variable use.
+    Expected<FileCheckPattern::VariableProperties> ParseVarResult =
+        parseVariable(Expr, SM);
+    if (ParseVarResult)
+      return parseNumericVariableUse(ParseVarResult->Name,
+                                     ParseVarResult->IsPseudo, SM);
+    if (AO == AllowedOperand::LineVar)
+      return ParseVarResult.takeError();
+    // Ignore the error and retry parsing as a literal.
+    consumeError(ParseVarResult.takeError());
+  }
+
+  // Otherwise, parse it as a literal.
+  uint64_t LiteralValue;
+  if (!Expr.consumeInteger(/*Radix=*/10, LiteralValue))
+    return llvm::make_unique<FileCheckExpressionLiteral>(LiteralValue);
+
+  return FileCheckErrorDiagnostic::get(SM, Expr,
+                                       "invalid operand format '" + Expr + "'");
+}
+
+static uint64_t add(uint64_t LeftOp, uint64_t RightOp) {
+  return LeftOp + RightOp;
+}
+
+static uint64_t sub(uint64_t LeftOp, uint64_t RightOp) {
+  return LeftOp - RightOp;
+}
+
+Expected<std::unique_ptr<FileCheckExpressionAST>>
+FileCheckPattern::parseBinop(StringRef &Expr,
+                             std::unique_ptr<FileCheckExpressionAST> LeftOp,
+                             bool IsLegacyLineExpr, const SourceMgr &SM) const {
+  Expr = Expr.ltrim(SpaceChars);
+  if (Expr.empty())
+    return std::move(LeftOp);
+
+  // Check if this is a supported operation and select a function to perform
+  // it.
+  SMLoc OpLoc = SMLoc::getFromPointer(Expr.data());
+  char Operator = popFront(Expr);
+  binop_eval_t EvalBinop;
+  switch (Operator) {
+  case '+':
+    EvalBinop = add;
+    break;
+  case '-':
+    EvalBinop = sub;
+    break;
+  default:
+    return FileCheckErrorDiagnostic::get(
+        SM, OpLoc, Twine("unsupported operation '") + Twine(Operator) + "'");
+  }
+
+  // Parse right operand.
+  Expr = Expr.ltrim(SpaceChars);
+  if (Expr.empty())
+    return FileCheckErrorDiagnostic::get(SM, Expr,
+                                         "missing operand in expression");
+  // The second operand in a legacy @LINE expression is always a literal.
+  AllowedOperand AO =
+      IsLegacyLineExpr ? AllowedOperand::Literal : AllowedOperand::Any;
+  Expected<std::unique_ptr<FileCheckExpressionAST>> RightOpResult =
+      parseNumericOperand(Expr, AO, SM);
+  if (!RightOpResult)
+    return RightOpResult;
+
+  Expr = Expr.ltrim(SpaceChars);
+  return llvm::make_unique<FileCheckASTBinop>(EvalBinop, std::move(LeftOp),
+                                              std::move(*RightOpResult));
+}
+
+Expected<std::unique_ptr<FileCheckExpressionAST>>
+FileCheckPattern::parseNumericSubstitutionBlock(
+    StringRef Expr,
+    Optional<FileCheckNumericVariable *> &DefinedNumericVariable,
+    bool IsLegacyLineExpr, const SourceMgr &SM) const {
+  // Parse the numeric variable definition.
+  DefinedNumericVariable = None;
+  size_t DefEnd = Expr.find(':');
+  if (DefEnd != StringRef::npos) {
+    StringRef DefExpr = Expr.substr(0, DefEnd);
+    StringRef UseExpr = Expr.substr(DefEnd + 1);
+
+    UseExpr = UseExpr.ltrim(SpaceChars);
+    if (!UseExpr.empty())
+      return FileCheckErrorDiagnostic::get(
+          SM, UseExpr,
+          "unexpected string after variable definition: '" + UseExpr + "'");
+
+    DefExpr = DefExpr.ltrim(SpaceChars);
+    Expected<FileCheckNumericVariable *> ParseResult =
+        parseNumericVariableDefinition(DefExpr, Context, LineNumber, SM);
+    if (!ParseResult)
+      return ParseResult.takeError();
+    DefinedNumericVariable = *ParseResult;
+
+    return nullptr;
+  }
+
+  // Parse the expression itself.
+  Expr = Expr.ltrim(SpaceChars);
+  // The first operand in a legacy @LINE expression is always the @LINE pseudo
+  // variable.
+  AllowedOperand AO =
+      IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any;
+  Expected<std::unique_ptr<FileCheckExpressionAST>> ParseResult =
+      parseNumericOperand(Expr, AO, SM);
+  while (ParseResult && !Expr.empty()) {
+    ParseResult =
+        parseBinop(Expr, std::move(*ParseResult), IsLegacyLineExpr, SM);
+    // Legacy @LINE expressions only allow 2 operands.
+    if (ParseResult && IsLegacyLineExpr && !Expr.empty())
+      return FileCheckErrorDiagnostic::get(
+          SM, Expr,
+          "unexpected characters at end of expression '" + Expr + "'");
+  }
+  if (!ParseResult)
+    return ParseResult;
+  return std::move(*ParseResult);
+}
+
+bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix,
+                                    SourceMgr &SM,
+                                    const FileCheckRequest &Req) {
   bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot;
 
-  this->LineNumber = LineNumber;
   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
 
   if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines))
@@ -112,95 +396,164 @@ bool FileCheckPattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
       continue;
     }
 
-    // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
-    // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
-    // second form is [[foo]] which is a reference to foo.  The variable name
-    // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
-    // it.  This is to catch some common errors.
+    // String and numeric substitution blocks. String substitution blocks come
+    // in two forms: [[foo:.*]] and [[foo]]. The former matches .* (or some
+    // other regex) and assigns it to the string variable 'foo'. The latter
+    // substitutes foo's value. Numeric substitution blocks work the same way
+    // as string ones, but start with a '#' sign after the double brackets.
+    // Both string and numeric variable names must satisfy the regular
+    // expression "[a-zA-Z_][0-9a-zA-Z_]*" to be valid, as this helps catch
+    // some common errors.
     if (PatternStr.startswith("[[")) {
+      StringRef UnparsedPatternStr = PatternStr.substr(2);
       // Find the closing bracket pair ending the match.  End is going to be an
       // offset relative to the beginning of the match string.
-      size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
+      size_t End = FindRegexVarEnd(UnparsedPatternStr, SM);
+      StringRef MatchStr = UnparsedPatternStr.substr(0, End);
+      bool IsNumBlock = MatchStr.consume_front("#");
 
       if (End == StringRef::npos) {
         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
                         SourceMgr::DK_Error,
-                        "invalid named regex reference, no ]] found");
+                        "Invalid substitution block, no ]] found");
         return true;
       }
+      // Strip the substitution block we are parsing. End points to the start
+      // of the "]]" closing the expression so account for it in computing the
+      // index of the first unparsed character.
+      PatternStr = UnparsedPatternStr.substr(End + 2);
+
+      bool IsDefinition = false;
+      // Whether the substitution block is a legacy use of @LINE with string
+      // substitution block syntax.
+      bool IsLegacyLineExpr = false;
+      StringRef DefName;
+      StringRef SubstStr;
+      StringRef MatchRegexp;
+      size_t SubstInsertIdx = RegExStr.size();
+
+      // Parse string variable or legacy @LINE expression.
+      if (!IsNumBlock) {
+        size_t VarEndIdx = MatchStr.find(":");
+        size_t SpacePos = MatchStr.substr(0, VarEndIdx).find_first_of(" \t");
+        if (SpacePos != StringRef::npos) {
+          SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data() + SpacePos),
+                          SourceMgr::DK_Error, "unexpected whitespace");
+          return true;
+        }
 
-      StringRef MatchStr = PatternStr.substr(2, End);
-      PatternStr = PatternStr.substr(End + 4);
-
-      // Get the regex name (e.g. "foo").
-      size_t NameEnd = MatchStr.find(':');
-      StringRef Name = MatchStr.substr(0, NameEnd);
+        // Get the name (e.g. "foo") and verify it is well formed.
+        StringRef OrigMatchStr = MatchStr;
+        Expected<FileCheckPattern::VariableProperties> ParseVarResult =
+            parseVariable(MatchStr, SM);
+        if (!ParseVarResult) {
+          logAllUnhandledErrors(ParseVarResult.takeError(), errs());
+          return true;
+        }
+        StringRef Name = ParseVarResult->Name;
+        bool IsPseudo = ParseVarResult->IsPseudo;
 
-      if (Name.empty()) {
-        SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
-                        "invalid name in named regex: empty name");
-        return true;
-      }
+        IsDefinition = (VarEndIdx != StringRef::npos);
+        if (IsDefinition) {
+          if ((IsPseudo || !MatchStr.consume_front(":"))) {
+            SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
+                            SourceMgr::DK_Error,
+                            "invalid name in string variable definition");
+            return true;
+          }
 
-      // Verify that the name/expression is well formed. FileCheck currently
-      // supports @LINE, @LINE+number, @LINE-number expressions. The check here
-      // is relaxed, more strict check is performed in \c EvaluateExpression.
-      bool IsExpression = false;
-      for (unsigned i = 0, e = Name.size(); i != e; ++i) {
-        if (i == 0) {
-          if (Name[i] == '$')  // Global vars start with '$'
-            continue;
-          if (Name[i] == '@') {
-            if (NameEnd != StringRef::npos) {
-              SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
-                              SourceMgr::DK_Error,
-                              "invalid name in named regex definition");
-              return true;
-            }
-            IsExpression = true;
-            continue;
+          // Detect collisions between string and numeric variables when the
+          // former is created later than the latter.
+          if (Context->GlobalNumericVariableTable.find(Name) !=
+              Context->GlobalNumericVariableTable.end()) {
+            SM.PrintMessage(
+                SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
+                "numeric variable with name '" + Name + "' already exists");
+            return true;
           }
-        }
-        if (Name[i] != '_' && !isalnum(Name[i]) &&
-            (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
-          SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
-                          SourceMgr::DK_Error, "invalid name in named regex");
-          return true;
+          DefName = Name;
+          MatchRegexp = MatchStr;
+        } else {
+          if (IsPseudo) {
+            MatchStr = OrigMatchStr;
+            IsLegacyLineExpr = IsNumBlock = true;
+          } else
+            SubstStr = Name;
         }
       }
 
-      // Name can't start with a digit.
-      if (isdigit(static_cast<unsigned char>(Name[0]))) {
-        SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
-                        "invalid name in named regex");
-        return true;
+      // Parse numeric substitution block.
+      std::unique_ptr<FileCheckExpressionAST> ExpressionAST;
+      Optional<FileCheckNumericVariable *> DefinedNumericVariable;
+      if (IsNumBlock) {
+        Expected<std::unique_ptr<FileCheckExpressionAST>> ParseResult =
+            parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable,
+                                          IsLegacyLineExpr, SM);
+        if (!ParseResult) {
+          logAllUnhandledErrors(ParseResult.takeError(), errs());
+          return true;
+        }
+        ExpressionAST = std::move(*ParseResult);
+        if (DefinedNumericVariable) {
+          IsDefinition = true;
+          DefName = (*DefinedNumericVariable)->getName();
+          MatchRegexp = StringRef("[0-9]+");
+        } else
+          SubstStr = MatchStr;
       }
 
-      // Handle [[foo]].
-      if (NameEnd == StringRef::npos) {
-        // Handle variables that were defined earlier on the same line by
-        // emitting a backreference.
-        if (VariableDefs.find(Name) != VariableDefs.end()) {
-          unsigned VarParenNum = VariableDefs[Name];
-          if (VarParenNum < 1 || VarParenNum > 9) {
-            SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
+      // Handle substitutions: [[foo]] and [[#<foo expr>]].
+      if (!IsDefinition) {
+        // Handle substitution of string variables that were defined earlier on
+        // the same line by emitting a backreference. Expressions do not
+        // support substituting a numeric variable defined on the same line.
+        if (!IsNumBlock && VariableDefs.find(SubstStr) != VariableDefs.end()) {
+          unsigned CaptureParenGroup = VariableDefs[SubstStr];
+          if (CaptureParenGroup < 1 || CaptureParenGroup > 9) {
+            SM.PrintMessage(SMLoc::getFromPointer(SubstStr.data()),
                             SourceMgr::DK_Error,
                             "Can't back-reference more than 9 variables");
             return true;
           }
-          AddBackrefToRegEx(VarParenNum);
+          AddBackrefToRegEx(CaptureParenGroup);
         } else {
-          VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
+          // Handle substitution of string variables ([[<var>]]) defined in
+          // previous CHECK patterns, and substitution of expressions.
+          FileCheckSubstitution *Substitution =
+              IsNumBlock
+                  ? Context->makeNumericSubstitution(
+                        SubstStr, std::move(ExpressionAST), SubstInsertIdx)
+                  : Context->makeStringSubstitution(SubstStr, SubstInsertIdx);
+          Substitutions.push_back(Substitution);
         }
         continue;
       }
 
-      // Handle [[foo:.*]].
-      VariableDefs[Name] = CurParen;
+      // Handle variable definitions: [[<def>:(...)]] and
+      // [[#(...)<def>:(...)]].
+      if (IsNumBlock) {
+        FileCheckNumericVariableMatch NumericVariableDefinition = {
+            *DefinedNumericVariable, CurParen};
+        NumericVariableDefs[DefName] = NumericVariableDefinition;
+        // This store is done here rather than in match() to allow
+        // parseNumericVariableUse() to get the pointer to the class instance
+        // of the right variable definition corresponding to a given numeric
+        // variable use.
+        Context->GlobalNumericVariableTable[DefName] = *DefinedNumericVariable;
+      } else {
+        VariableDefs[DefName] = CurParen;
+        // Mark the string variable as defined to detect collisions between
+        // string and numeric variables in parseNumericVariableUse() and
+        // DefineCmdlineVariables() when the latter is created later than the
+        // former. We cannot reuse GlobalVariableTable for this by populating
+        // it with an empty string since we would then lose the ability to
+        // detect the use of an undefined variable in match().
+        Context->DefinedVariableTable[DefName] = true;
+      }
       RegExStr += '(';
       ++CurParen;
 
-      if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
+      if (AddRegExToRegEx(MatchRegexp, CurParen, SM))
         return true;
 
       RegExStr += ')';
@@ -243,37 +596,8 @@ void FileCheckPattern::AddBackrefToRegEx(unsigned BackrefNum) {
   RegExStr += Backref;
 }
 
-/// Evaluates expression and stores the result to \p Value.
-///
-/// Returns true on success and false when the expression has invalid syntax.
-bool FileCheckPattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
-  // The only supported expression is @LINE([\+-]\d+)?
-  if (!Expr.startswith("@LINE"))
-    return false;
-  Expr = Expr.substr(StringRef("@LINE").size());
-  int Offset = 0;
-  if (!Expr.empty()) {
-    if (Expr[0] == '+')
-      Expr = Expr.substr(1);
-    else if (Expr[0] != '-')
-      return false;
-    if (Expr.getAsInteger(10, Offset))
-      return false;
-  }
-  Value = llvm::itostr(LineNumber + Offset);
-  return true;
-}
-
-/// Matches the pattern string against the input buffer \p Buffer
-///
-/// This returns the position that is matched or npos if there is no match. If
-/// there is a match, the size of the matched string is returned in \p
-/// MatchLen.
-///
-/// The \p VariableTable StringMap provides the current values of filecheck
-/// variables and is updated if this match defines new values.
-size_t FileCheckPattern::Match(StringRef Buffer, size_t &MatchLen,
-                      StringMap<StringRef> &VariableTable) const {
+Expected<size_t> FileCheckPattern::match(StringRef Buffer, size_t &MatchLen,
+                                         const SourceMgr &SM) const {
   // If this is the EOF pattern, match it immediately.
   if (CheckTy == Check::CheckEOF) {
     MatchLen = 0;
@@ -283,58 +607,76 @@ size_t FileCheckPattern::Match(StringRef Buffer, size_t &MatchLen,
   // If this is a fixed string pattern, just match it now.
   if (!FixedStr.empty()) {
     MatchLen = FixedStr.size();
-    return Buffer.find(FixedStr);
+    size_t Pos = Buffer.find(FixedStr);
+    if (Pos == StringRef::npos)
+      return make_error<FileCheckNotFoundError>();
+    return Pos;
   }
 
   // Regex match.
 
-  // If there are variable uses, we need to create a temporary string with the
+  // If there are substitutions, we need to create a temporary string with the
   // actual value.
   StringRef RegExToMatch = RegExStr;
   std::string TmpStr;
-  if (!VariableUses.empty()) {
+  if (!Substitutions.empty()) {
     TmpStr = RegExStr;
-
-    unsigned InsertOffset = 0;
-    for (const auto &VariableUse : VariableUses) {
-      std::string Value;
-
-      if (VariableUse.first[0] == '@') {
-        if (!EvaluateExpression(VariableUse.first, Value))
-          return StringRef::npos;
-      } else {
-        StringMap<StringRef>::iterator it =
-            VariableTable.find(VariableUse.first);
-        // If the variable is undefined, return an error.
-        if (it == VariableTable.end())
-          return StringRef::npos;
-
-        // Look up the value and escape it so that we can put it into the regex.
-        Value += Regex::escape(it->second);
+    if (LineNumber)
+      Context->LineVariable->setValue(*LineNumber);
+
+    size_t InsertOffset = 0;
+    // Substitute all string variables and expressions whose values are only
+    // now known. Use of string variables defined on the same line are handled
+    // by back-references.
+    for (const auto &Substitution : Substitutions) {
+      // Substitute and check for failure (e.g. use of undefined variable).
+      Expected<std::string> Value = Substitution->getResult();
+      if (!Value) {
+        Context->LineVariable->clearValue();
+        return Value.takeError();
       }
 
       // Plop it into the regex at the adjusted offset.
-      TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
-                    Value.begin(), Value.end());
-      InsertOffset += Value.size();
+      TmpStr.insert(TmpStr.begin() + Substitution->getIndex() + InsertOffset,
+                    Value->begin(), Value->end());
+      InsertOffset += Value->size();
     }
 
     // Match the newly constructed regex.
     RegExToMatch = TmpStr;
+    Context->LineVariable->clearValue();
   }
 
   SmallVector<StringRef, 4> MatchInfo;
   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
-    return StringRef::npos;
+    return make_error<FileCheckNotFoundError>();
 
   // Successful regex match.
   assert(!MatchInfo.empty() && "Didn't get any match");
   StringRef FullMatch = MatchInfo[0];
 
-  // If this defines any variables, remember their values.
+  // If this defines any string variables, remember their values.
   for (const auto &VariableDef : VariableDefs) {
     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
-    VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
+    Context->GlobalVariableTable[VariableDef.first] =
+        MatchInfo[VariableDef.second];
+  }
+
+  // If this defines any numeric variables, remember their values.
+  for (const auto &NumericVariableDef : NumericVariableDefs) {
+    const FileCheckNumericVariableMatch &NumericVariableMatch =
+        NumericVariableDef.getValue();
+    unsigned CaptureParenGroup = NumericVariableMatch.CaptureParenGroup;
+    assert(CaptureParenGroup < MatchInfo.size() && "Internal paren error");
+    FileCheckNumericVariable *DefinedNumericVariable =
+        NumericVariableMatch.DefinedNumericVariable;
+
+    StringRef MatchedValue = MatchInfo[CaptureParenGroup];
+    uint64_t Val;
+    if (MatchedValue.getAsInteger(10, Val))
+      return FileCheckErrorDiagnostic::get(SM, MatchedValue,
+                                           "Unable to represent numeric value");
+    DefinedNumericVariable->setValue(Val);
   }
 
   // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after
@@ -345,13 +687,7 @@ size_t FileCheckPattern::Match(StringRef Buffer, size_t &MatchLen,
   return FullMatch.data() - Buffer.data() + MatchStartSkip;
 }
 
-
-/// Computes an arbitrary estimate for the quality of matching this pattern at
-/// the start of \p Buffer; a distance of zero should correspond to a perfect
-/// match.
-unsigned
-FileCheckPattern::ComputeMatchDistance(StringRef Buffer,
-                              const StringMap<StringRef> &VariableTable) const {
+unsigned FileCheckPattern::computeMatchDistance(StringRef Buffer) const {
   // Just compute the number of matching characters. For regular expressions, we
   // just compare against the regex itself and hope for the best.
   //
@@ -368,38 +704,36 @@ FileCheckPattern::ComputeMatchDistance(StringRef Buffer,
   return BufferPrefix.edit_distance(ExampleString);
 }
 
-void FileCheckPattern::PrintVariableUses(const SourceMgr &SM, StringRef Buffer,
-                                const StringMap<StringRef> &VariableTable,
-                                SMRange MatchRange) const {
-  // If this was a regular expression using variables, print the current
-  // variable values.
-  if (!VariableUses.empty()) {
-    for (const auto &VariableUse : VariableUses) {
+void FileCheckPattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer,
+                                          SMRange MatchRange) const {
+  // Print what we know about substitutions.
+  if (!Substitutions.empty()) {
+    for (const auto &Substitution : Substitutions) {
       SmallString<256> Msg;
       raw_svector_ostream OS(Msg);
-      StringRef Var = VariableUse.first;
-      if (Var[0] == '@') {
-        std::string Value;
-        if (EvaluateExpression(Var, Value)) {
-          OS << "with expression \"";
-          OS.write_escaped(Var) << "\" equal to \"";
-          OS.write_escaped(Value) << "\"";
-        } else {
-          OS << "uses incorrect expression \"";
-          OS.write_escaped(Var) << "\"";
-        }
+      Expected<std::string> MatchedValue = Substitution->getResult();
+
+      // Substitution failed or is not known at match time, print the undefined
+      // variables it uses.
+      if (!MatchedValue) {
+        bool UndefSeen = false;
+        handleAllErrors(MatchedValue.takeError(),
+                        [](const FileCheckNotFoundError &E) {},
+                        // Handled in PrintNoMatch().
+                        [](const FileCheckErrorDiagnostic &E) {},
+                        [&](const FileCheckUndefVarError &E) {
+                          if (!UndefSeen) {
+                            OS << "uses undefined variable(s):";
+                            UndefSeen = true;
+                          }
+                          OS << " ";
+                          E.log(OS);
+                        });
       } else {
-        StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
-
-        // Check for undefined variable references.
-        if (it == VariableTable.end()) {
-          OS << "uses undefined variable \"";
-          OS.write_escaped(Var) << "\"";
-        } else {
-          OS << "with variable \"";
-          OS.write_escaped(Var) << "\" equal to \"";
-          OS.write_escaped(it->second) << "\"";
-        }
+        // Substitution succeeded. Print substituted value.
+        OS << "with \"";
+        OS.write_escaped(Substitution->getFromString()) << "\" equal to \"";
+        OS.write_escaped(*MatchedValue) << "\"";
       }
 
       if (MatchRange.isValid())
@@ -430,9 +764,8 @@ static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy,
   return Range;
 }
 
-void FileCheckPattern::PrintFuzzyMatch(
+void FileCheckPattern::printFuzzyMatch(
     const SourceMgr &SM, StringRef Buffer,
-    const StringMap<StringRef> &VariableTable,
     std::vector<FileCheckDiag> *Diags) const {
   // Attempt to find the closest/best fuzzy match.  Usually an error happens
   // because some string in the output didn't exactly match. In these cases, we
@@ -454,7 +787,7 @@ void FileCheckPattern::PrintFuzzyMatch(
 
     // Compute the "quality" of this match as an arbitrary combination of the
     // match distance and the number of lines skipped to get to this match.
-    unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
+    unsigned Distance = computeMatchDistance(Buffer.substr(i));
     double Quality = Distance + (NumLinesForward / 100.);
 
     if (Quality < BestQuality || Best == StringRef::npos) {
@@ -478,11 +811,39 @@ void FileCheckPattern::PrintFuzzyMatch(
   }
 }
 
-/// Finds the closing sequence of a regex variable usage or definition.
-///
-/// \p Str has to point in the beginning of the definition (right after the
-/// opening sequence). Returns the offset of the closing sequence within Str,
-/// or npos if it was not found.
+Expected<StringRef>
+FileCheckPatternContext::getPatternVarValue(StringRef VarName) {
+  auto VarIter = GlobalVariableTable.find(VarName);
+  if (VarIter == GlobalVariableTable.end())
+    return make_error<FileCheckUndefVarError>(VarName);
+
+  return VarIter->second;
+}
+
+template <class... Types>
+FileCheckNumericVariable *
+FileCheckPatternContext::makeNumericVariable(Types... args) {
+  NumericVariables.push_back(
+      llvm::make_unique<FileCheckNumericVariable>(args...));
+  return NumericVariables.back().get();
+}
+
+FileCheckSubstitution *
+FileCheckPatternContext::makeStringSubstitution(StringRef VarName,
+                                                size_t InsertIdx) {
+  Substitutions.push_back(
+      llvm::make_unique<FileCheckStringSubstitution>(this, VarName, InsertIdx));
+  return Substitutions.back().get();
+}
+
+FileCheckSubstitution *FileCheckPatternContext::makeNumericSubstitution(
+    StringRef ExpressionStr,
+    std::unique_ptr<FileCheckExpressionAST> ExpressionAST, size_t InsertIdx) {
+  Substitutions.push_back(llvm::make_unique<FileCheckNumericSubstitution>(
+      this, ExpressionStr, std::move(ExpressionAST), InsertIdx));
+  return Substitutions.back().get();
+}
+
 size_t FileCheckPattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
   // Offset keeps track of the current offset within the input Str
   size_t Offset = 0;
@@ -521,11 +882,8 @@ size_t FileCheckPattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
   return StringRef::npos;
 }
 
-/// Canonicalize whitespaces in the file. Line endings are replaced with
-/// UNIX-style '\n'.
-StringRef
-llvm::FileCheck::CanonicalizeFile(MemoryBuffer &MB,
-                                  SmallVectorImpl<char> &OutputBuffer) {
+StringRef FileCheck::CanonicalizeFile(MemoryBuffer &MB,
+                                      SmallVectorImpl<char> &OutputBuffer) {
   OutputBuffer.reserve(MB.getBufferSize());
 
   for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
@@ -581,7 +939,6 @@ Check::FileCheckType &Check::FileCheckType::setCount(int C) {
   return *this;
 }
 
-// Get a description of the type.
 std::string Check::FileCheckType::getDescription(StringRef Prefix) const {
   switch (Kind) {
   case Check::CheckNone:
@@ -674,7 +1031,7 @@ static size_t SkipWord(StringRef Str, size_t Loc) {
   return Loc;
 }
 
-/// Search the buffer for the first prefix in the prefix regular expression.
+/// Searches the buffer for the first prefix in the prefix regular expression.
 ///
 /// This searches the buffer using the provided regular expression, however it
 /// enforces constraints beyond that:
@@ -683,7 +1040,7 @@ static size_t SkipWord(StringRef Str, size_t Loc) {
 /// 2) The found prefix must be followed by a valid check type suffix using \c
 ///    FindCheckType above.
 ///
-/// Returns a pair of StringRefs into the Buffer, which combines:
+/// \returns a pair of StringRefs into the Buffer, which combines:
 ///   - the first match of the regular expression to satisfy these two is
 ///   returned,
 ///     otherwise an empty StringRef is returned to indicate failure.
@@ -744,13 +1101,24 @@ FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
   return {StringRef(), StringRef()};
 }
 
-/// Read the check file, which specifies the sequence of expected strings.
-///
-/// The strings are added to the CheckStrings vector. Returns true in case of
-/// an error, false otherwise.
-bool llvm::FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer,
-                                    Regex &PrefixRE,
-                                    std::vector<FileCheckString> &CheckStrings) {
+void FileCheckPatternContext::createLineVariable() {
+  assert(!LineVariable && "@LINE pseudo numeric variable already created");
+  StringRef LineName = "@LINE";
+  LineVariable = makeNumericVariable(LineName);
+  GlobalNumericVariableTable[LineName] = LineVariable;
+}
+
+bool FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
+                              std::vector<FileCheckString> &CheckStrings) {
+  Error DefineError =
+      PatternContext.defineCmdlineVariables(Req.GlobalDefines, SM);
+  if (DefineError) {
+    logAllUnhandledErrors(std::move(DefineError), errs());
+    return true;
+  }
+
+  PatternContext.createLineVariable();
+
   std::vector<FileCheckPattern> ImplicitNegativeChecks;
   for (const auto &PatternString : Req.ImplicitCheckNot) {
     // Create a buffer with fake command line content in order to display the
@@ -764,9 +1132,10 @@ bool llvm::FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer,
         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
 
-    ImplicitNegativeChecks.push_back(FileCheckPattern(Check::CheckNot));
-    ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
-                                               "IMPLICIT-CHECK", SM, 0, Req);
+    ImplicitNegativeChecks.push_back(
+        FileCheckPattern(Check::CheckNot, &PatternContext));
+    ImplicitNegativeChecks.back().parsePattern(PatternInBuffer,
+                                               "IMPLICIT-CHECK", SM, Req);
   }
 
   std::vector<FileCheckPattern> DagNotMatches = ImplicitNegativeChecks;
@@ -827,8 +1196,8 @@ bool llvm::FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer,
     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
 
     // Parse the pattern.
-    FileCheckPattern P(CheckTy);
-    if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber, Req))
+    FileCheckPattern P(CheckTy, &PatternContext, LineNumber);
+    if (P.parsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, Req))
       return true;
 
     // Verify that CHECK-LABEL lines do not define or use variables
@@ -871,8 +1240,9 @@ bool llvm::FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer,
   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
   // prefix as a filler for the error message.
   if (!DagNotMatches.empty()) {
-    CheckStrings.emplace_back(FileCheckPattern(Check::CheckEOF), *Req.CheckPrefixes.begin(),
-                              SMLoc::getFromPointer(Buffer.data()));
+    CheckStrings.emplace_back(
+        FileCheckPattern(Check::CheckEOF, &PatternContext, LineNumber + 1),
+        *Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data()));
     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
   }
 
@@ -897,20 +1267,27 @@ bool llvm::FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer,
 
 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
                        StringRef Prefix, SMLoc Loc, const FileCheckPattern &Pat,
-                       int MatchedCount, StringRef Buffer,
-                       StringMap<StringRef> &VariableTable, size_t MatchPos,
+                       int MatchedCount, StringRef Buffer, size_t MatchPos,
                        size_t MatchLen, const FileCheckRequest &Req,
                        std::vector<FileCheckDiag> *Diags) {
+  bool PrintDiag = true;
   if (ExpectedMatch) {
     if (!Req.Verbose)
       return;
     if (!Req.VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF)
       return;
+    // Due to their verbosity, we don't print verbose diagnostics here if we're
+    // gathering them for a different rendering, but we always print other
+    // diagnostics.
+    PrintDiag = !Diags;
   }
   SMRange MatchRange = ProcessMatchResult(
       ExpectedMatch ? FileCheckDiag::MatchFoundAndExpected
                     : FileCheckDiag::MatchFoundButExcluded,
       SM, Loc, Pat.getCheckTy(), Buffer, MatchPos, MatchLen, Diags);
+  if (!PrintDiag)
+    return;
+
   std::string Message = formatv("{0}: {1} string found in input",
                                 Pat.getCheckTy().getDescription(Prefix),
                                 (ExpectedMatch ? "expected" : "excluded"))
@@ -922,65 +1299,87 @@ static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
       Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, Message);
   SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, "found here",
                   {MatchRange});
-  Pat.PrintVariableUses(SM, Buffer, VariableTable, MatchRange);
+  Pat.printSubstitutions(SM, Buffer, MatchRange);
 }
 
 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
                        const FileCheckString &CheckStr, int MatchedCount,
-                       StringRef Buffer, StringMap<StringRef> &VariableTable,
-                       size_t MatchPos, size_t MatchLen, FileCheckRequest &Req,
+                       StringRef Buffer, size_t MatchPos, size_t MatchLen,
+                       FileCheckRequest &Req,
                        std::vector<FileCheckDiag> *Diags) {
   PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
-             MatchedCount, Buffer, VariableTable, MatchPos, MatchLen, Req,
-             Diags);
+             MatchedCount, Buffer, MatchPos, MatchLen, Req, Diags);
 }
 
 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
                          StringRef Prefix, SMLoc Loc,
                          const FileCheckPattern &Pat, int MatchedCount,
-                         StringRef Buffer, StringMap<StringRef> &VariableTable,
-                         bool VerboseVerbose,
-                         std::vector<FileCheckDiag> *Diags) {
-  if (!ExpectedMatch && !VerboseVerbose)
+                         StringRef Buffer, bool VerboseVerbose,
+                         std::vector<FileCheckDiag> *Diags, Error MatchErrors) {
+  assert(MatchErrors && "Called on successful match");
+  bool PrintDiag = true;
+  if (!ExpectedMatch) {
+    if (!VerboseVerbose) {
+      consumeError(std::move(MatchErrors));
+      return;
+    }
+    // Due to their verbosity, we don't print verbose diagnostics here if we're
+    // gathering them for a different rendering, but we always print other
+    // diagnostics.
+    PrintDiag = !Diags;
+  }
+
+  // If the current position is at the end of a line, advance to the start of
+  // the next line.
+  Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
+  SMRange SearchRange = ProcessMatchResult(
+      ExpectedMatch ? FileCheckDiag::MatchNoneButExpected
+                    : FileCheckDiag::MatchNoneAndExcluded,
+      SM, Loc, Pat.getCheckTy(), Buffer, 0, Buffer.size(), Diags);
+  if (!PrintDiag) {
+    consumeError(std::move(MatchErrors));
     return;
+  }
+
+  MatchErrors =
+      handleErrors(std::move(MatchErrors),
+                   [](const FileCheckErrorDiagnostic &E) { E.log(errs()); });
+
+  // No problem matching the string per se.
+  if (!MatchErrors)
+    return;
+  consumeError(std::move(MatchErrors));
 
-  // Otherwise, we have an error, emit an error message.
+  // Print "not found" diagnostic.
   std::string Message = formatv("{0}: {1} string not found in input",
                                 Pat.getCheckTy().getDescription(Prefix),
                                 (ExpectedMatch ? "expected" : "excluded"))
                             .str();
   if (Pat.getCount() > 1)
     Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str();
-
   SM.PrintMessage(
       Loc, ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark, Message);
 
-  // Print the "scanning from here" line.  If the current position is at the
-  // end of a line, advance to the start of the next line.
-  Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
-  SMRange SearchRange = ProcessMatchResult(
-      ExpectedMatch ? FileCheckDiag::MatchNoneButExpected
-                    : FileCheckDiag::MatchNoneAndExcluded,
-      SM, Loc, Pat.getCheckTy(), Buffer, 0, Buffer.size(), Diags);
+  // Print the "scanning from here" line.
   SM.PrintMessage(SearchRange.Start, SourceMgr::DK_Note, "scanning from here");
 
   // Allow the pattern to print additional information if desired.
-  Pat.PrintVariableUses(SM, Buffer, VariableTable);
+  Pat.printSubstitutions(SM, Buffer);
 
   if (ExpectedMatch)
-    Pat.PrintFuzzyMatch(SM, Buffer, VariableTable, Diags);
+    Pat.printFuzzyMatch(SM, Buffer, Diags);
 }
 
 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
                          const FileCheckString &CheckStr, int MatchedCount,
-                         StringRef Buffer, StringMap<StringRef> &VariableTable,
-                         bool VerboseVerbose,
-                         std::vector<FileCheckDiag> *Diags) {
+                         StringRef Buffer, bool VerboseVerbose,
+                         std::vector<FileCheckDiag> *Diags, Error MatchErrors) {
   PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
-               MatchedCount, Buffer, VariableTable, VerboseVerbose, Diags);
+               MatchedCount, Buffer, VerboseVerbose, Diags,
+               std::move(MatchErrors));
 }
 
-/// Count the number of newlines in the specified range.
+/// Counts the number of newlines in the specified range.
 static unsigned CountNumNewlinesBetween(StringRef Range,
                                         const char *&FirstNewLine) {
   unsigned NumNewLines = 0;
@@ -1003,10 +1402,8 @@ static unsigned CountNumNewlinesBetween(StringRef Range,
   }
 }
 
-/// Match check string and its "not strings" and/or "dag strings".
 size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer,
                               bool IsLabelScanMode, size_t &MatchLen,
-                              StringMap<StringRef> &VariableTable,
                               FileCheckRequest &Req,
                               std::vector<FileCheckDiag> *Diags) const {
   size_t LastPos = 0;
@@ -1018,7 +1415,7 @@ size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer,
   // over the block again (including the last CHECK-LABEL) in normal mode.
   if (!IsLabelScanMode) {
     // Match "dag strings" (with mixed "not strings" if any).
-    LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable, Req, Diags);
+    LastPos = CheckDag(SM, Buffer, NotStrings, Req, Diags);
     if (LastPos == StringRef::npos)
       return StringRef::npos;
   }
@@ -1033,18 +1430,19 @@ size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer,
     StringRef MatchBuffer = Buffer.substr(LastMatchEnd);
     size_t CurrentMatchLen;
     // get a match at current start point
-    size_t MatchPos = Pat.Match(MatchBuffer, CurrentMatchLen, VariableTable);
-    if (i == 1)
-      FirstMatchPos = LastPos + MatchPos;
+    Expected<size_t> MatchResult = Pat.match(MatchBuffer, CurrentMatchLen, SM);
 
     // report
-    if (MatchPos == StringRef::npos) {
-      PrintNoMatch(true, SM, *this, i, MatchBuffer, VariableTable,
-                   Req.VerboseVerbose, Diags);
+    if (!MatchResult) {
+      PrintNoMatch(true, SM, *this, i, MatchBuffer, Req.VerboseVerbose, Diags,
+                   MatchResult.takeError());
       return StringRef::npos;
     }
-    PrintMatch(true, SM, *this, i, MatchBuffer, VariableTable, MatchPos,
-               CurrentMatchLen, Req, Diags);
+    size_t MatchPos = *MatchResult;
+    PrintMatch(true, SM, *this, i, MatchBuffer, MatchPos, CurrentMatchLen, Req,
+               Diags);
+    if (i == 1)
+      FirstMatchPos = LastPos + MatchPos;
 
     // move start point after the match
     LastMatchEnd += MatchPos + CurrentMatchLen;
@@ -1079,14 +1477,13 @@ size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer,
 
     // If this match had "not strings", verify that they don't exist in the
     // skipped region.
-    if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable, Req, Diags))
+    if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags))
       return StringRef::npos;
   }
 
   return FirstMatchPos;
 }
 
-/// Verify there is a single line in the given buffer.
 bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
   if (Pat.getCheckTy() != Check::CheckNext &&
       Pat.getCheckTy() != Check::CheckEmpty)
@@ -1097,12 +1494,6 @@ bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
       Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT");
 
   // Count the number of newlines between the previous match and this one.
-  assert(Buffer.data() !=
-             SM.getMemoryBuffer(SM.FindBufferContainingLoc(
-                                    SMLoc::getFromPointer(Buffer.data())))
-                 ->getBufferStart() &&
-         "CHECK-NEXT and CHECK-EMPTY can't be the first check in a file");
-
   const char *FirstNewLine = nullptr;
   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
 
@@ -1132,18 +1523,11 @@ bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
   return false;
 }
 
-/// Verify there is no newline in the given buffer.
 bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
   if (Pat.getCheckTy() != Check::CheckSame)
     return false;
 
   // Count the number of newlines between the previous match and this one.
-  assert(Buffer.data() !=
-             SM.getMemoryBuffer(SM.FindBufferContainingLoc(
-                                    SMLoc::getFromPointer(Buffer.data())))
-                 ->getBufferStart() &&
-         "CHECK-SAME can't be the first check in a file");
-
   const char *FirstNewLine = nullptr;
   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
 
@@ -1161,26 +1545,25 @@ bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
   return false;
 }
 
-/// Verify there's no "not strings" in the given buffer.
 bool FileCheckString::CheckNot(
     const SourceMgr &SM, StringRef Buffer,
     const std::vector<const FileCheckPattern *> &NotStrings,
-    StringMap<StringRef> &VariableTable, const FileCheckRequest &Req,
-    std::vector<FileCheckDiag> *Diags) const {
+    const FileCheckRequest &Req, std::vector<FileCheckDiag> *Diags) const {
   for (const FileCheckPattern *Pat : NotStrings) {
     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
 
     size_t MatchLen = 0;
-    size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
+    Expected<size_t> MatchResult = Pat->match(Buffer, MatchLen, SM);
 
-    if (Pos == StringRef::npos) {
+    if (!MatchResult) {
       PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer,
-                   VariableTable, Req.VerboseVerbose, Diags);
+                   Req.VerboseVerbose, Diags, MatchResult.takeError());
       continue;
     }
+    size_t Pos = *MatchResult;
 
-    PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, VariableTable,
-               Pos, MatchLen, Req, Diags);
+    PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, Pos, MatchLen,
+               Req, Diags);
 
     return true;
   }
@@ -1188,11 +1571,9 @@ bool FileCheckString::CheckNot(
   return false;
 }
 
-/// Match "dag strings" and their mixed "not strings".
 size_t
 FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
                           std::vector<const FileCheckPattern *> &NotStrings,
-                          StringMap<StringRef> &VariableTable,
                           const FileCheckRequest &Req,
                           std::vector<FileCheckDiag> *Diags) const {
   if (DagNotStrings.empty())
@@ -1233,19 +1614,20 @@ FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
     // CHECK-DAG group.
     for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) {
       StringRef MatchBuffer = Buffer.substr(MatchPos);
-      size_t MatchPosBuf = Pat.Match(MatchBuffer, MatchLen, VariableTable);
+      Expected<size_t> MatchResult = Pat.match(MatchBuffer, MatchLen, SM);
       // With a group of CHECK-DAGs, a single mismatching means the match on
       // that group of CHECK-DAGs fails immediately.
-      if (MatchPosBuf == StringRef::npos) {
+      if (!MatchResult) {
         PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, MatchBuffer,
-                     VariableTable, Req.VerboseVerbose, Diags);
+                     Req.VerboseVerbose, Diags, MatchResult.takeError());
         return StringRef::npos;
       }
+      size_t MatchPosBuf = *MatchResult;
       // Re-calc it as the offset relative to the start of the original string.
       MatchPos += MatchPosBuf;
       if (Req.VerboseVerbose)
-        PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer,
-                   VariableTable, MatchPos, MatchLen, Req, Diags);
+        PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos,
+                   MatchLen, Req, Diags);
       MatchRange M{MatchPos, MatchPos + MatchLen};
       if (Req.AllowDeprecatedDagOverlap) {
         // We don't need to track all matches in this mode, so we just maintain
@@ -1276,20 +1658,24 @@ FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
         break;
       }
       if (Req.VerboseVerbose) {
-        SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos);
-        SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End);
-        SMRange OldRange(OldStart, OldEnd);
-        SM.PrintMessage(OldStart, SourceMgr::DK_Note,
-                        "match discarded, overlaps earlier DAG match here",
-                        {OldRange});
-        if (Diags)
+        // Due to their verbosity, we don't print verbose diagnostics here if
+        // we're gathering them for a different rendering, but we always print
+        // other diagnostics.
+        if (!Diags) {
+          SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos);
+          SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End);
+          SMRange OldRange(OldStart, OldEnd);
+          SM.PrintMessage(OldStart, SourceMgr::DK_Note,
+                          "match discarded, overlaps earlier DAG match here",
+                          {OldRange});
+        } else
           Diags->rbegin()->MatchTy = FileCheckDiag::MatchFoundButDiscarded;
       }
       MatchPos = MI->End;
     }
     if (!Req.VerboseVerbose)
-      PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, VariableTable,
-                 MatchPos, MatchLen, Req, Diags);
+      PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos,
+                 MatchLen, Req, Diags);
 
     // Handle the end of a CHECK-DAG group.
     if (std::next(PatItr) == PatEnd ||
@@ -1300,7 +1686,7 @@ FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
         // region.
         StringRef SkippedRegion =
             Buffer.slice(StartPos, MatchRanges.begin()->Pos);
-        if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable, Req, Diags))
+        if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags))
           return StringRef::npos;
         // Clear "not strings".
         NotStrings.clear();
@@ -1322,7 +1708,7 @@ static bool ValidateCheckPrefix(StringRef CheckPrefix) {
   return Validator.match(CheckPrefix);
 }
 
-bool llvm::FileCheck::ValidateCheckPrefixes() {
+bool FileCheck::ValidateCheckPrefixes() {
   StringSet<> PrefixSet;
 
   for (StringRef Prefix : Req.CheckPrefixes) {
@@ -1340,12 +1726,7 @@ bool llvm::FileCheck::ValidateCheckPrefixes() {
   return true;
 }
 
-// Combines the check prefixes into a single regex so that we can efficiently
-// scan for any of the set.
-//
-// The semantics are that the longest-match wins which matches our regex
-// library.
-Regex llvm::FileCheck::buildCheckPrefixRegex() {
+Regex FileCheck::buildCheckPrefixRegex() {
   // I don't think there's a way to specify an initial value for cl::list,
   // so if nothing was specified, add the default
   if (Req.CheckPrefixes.empty())
@@ -1364,32 +1745,152 @@ Regex llvm::FileCheck::buildCheckPrefixRegex() {
   return Regex(PrefixRegexStr);
 }
 
-// Remove local variables from \p VariableTable. Global variables
-// (start with '$') are preserved.
-static void ClearLocalVars(StringMap<StringRef> &VariableTable) {
-  SmallVector<StringRef, 16> LocalVars;
-  for (const auto &Var : VariableTable)
-    if (Var.first()[0] != '$')
-      LocalVars.push_back(Var.first());
+Error FileCheckPatternContext::defineCmdlineVariables(
+    std::vector<std::string> &CmdlineDefines, SourceMgr &SM) {
+  assert(GlobalVariableTable.empty() && GlobalNumericVariableTable.empty() &&
+         "Overriding defined variable with command-line variable definitions");
+
+  if (CmdlineDefines.empty())
+    return Error::success();
+
+  // Create a string representing the vector of command-line definitions. Each
+  // definition is on its own line and prefixed with a definition number to
+  // clarify which definition a given diagnostic corresponds to.
+  unsigned I = 0;
+  Error Errs = Error::success();
+  std::string CmdlineDefsDiag;
+  StringRef Prefix1 = "Global define #";
+  StringRef Prefix2 = ": ";
+  for (StringRef CmdlineDef : CmdlineDefines)
+    CmdlineDefsDiag +=
+        (Prefix1 + Twine(++I) + Prefix2 + CmdlineDef + "\n").str();
+
+  // Create a buffer with fake command line content in order to display
+  // parsing diagnostic with location information and point to the
+  // global definition with invalid syntax.
+  std::unique_ptr<MemoryBuffer> CmdLineDefsDiagBuffer =
+      MemoryBuffer::getMemBufferCopy(CmdlineDefsDiag, "Global defines");
+  StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer();
+  SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc());
+
+  SmallVector<StringRef, 4> CmdlineDefsDiagVec;
+  CmdlineDefsDiagRef.split(CmdlineDefsDiagVec, '\n', -1 /*MaxSplit*/,
+                           false /*KeepEmpty*/);
+  for (StringRef CmdlineDefDiag : CmdlineDefsDiagVec) {
+    unsigned DefStart = CmdlineDefDiag.find(Prefix2) + Prefix2.size();
+    StringRef CmdlineDef = CmdlineDefDiag.substr(DefStart);
+    size_t EqIdx = CmdlineDef.find('=');
+    if (EqIdx == StringRef::npos) {
+      Errs = joinErrors(
+          std::move(Errs),
+          FileCheckErrorDiagnostic::get(
+              SM, CmdlineDef, "missing equal sign in global definition"));
+      continue;
+    }
 
-  for (const auto &Var : LocalVars)
-    VariableTable.erase(Var);
+    // Numeric variable definition.
+    if (CmdlineDef[0] == '#') {
+      StringRef CmdlineName = CmdlineDef.substr(1, EqIdx - 1);
+      Expected<FileCheckNumericVariable *> ParseResult =
+          FileCheckPattern::parseNumericVariableDefinition(CmdlineName, this,
+                                                           None, SM);
+      if (!ParseResult) {
+        Errs = joinErrors(std::move(Errs), ParseResult.takeError());
+        continue;
+      }
+
+      StringRef CmdlineVal = CmdlineDef.substr(EqIdx + 1);
+      uint64_t Val;
+      if (CmdlineVal.getAsInteger(10, Val)) {
+        Errs = joinErrors(std::move(Errs),
+                          FileCheckErrorDiagnostic::get(
+                              SM, CmdlineVal,
+                              "invalid value in numeric variable definition '" +
+                                  CmdlineVal + "'"));
+        continue;
+      }
+      FileCheckNumericVariable *DefinedNumericVariable = *ParseResult;
+      DefinedNumericVariable->setValue(Val);
+
+      // Record this variable definition.
+      GlobalNumericVariableTable[DefinedNumericVariable->getName()] =
+          DefinedNumericVariable;
+    } else {
+      // String variable definition.
+      std::pair<StringRef, StringRef> CmdlineNameVal = CmdlineDef.split('=');
+      StringRef CmdlineName = CmdlineNameVal.first;
+      StringRef OrigCmdlineName = CmdlineName;
+      Expected<FileCheckPattern::VariableProperties> ParseVarResult =
+          FileCheckPattern::parseVariable(CmdlineName, SM);
+      if (!ParseVarResult) {
+        Errs = joinErrors(std::move(Errs), ParseVarResult.takeError());
+        continue;
+      }
+      // Check that CmdlineName does not denote a pseudo variable is only
+      // composed of the parsed numeric variable. This catches cases like
+      // "FOO+2" in a "FOO+2=10" definition.
+      if (ParseVarResult->IsPseudo || !CmdlineName.empty()) {
+        Errs = joinErrors(std::move(Errs),
+                          FileCheckErrorDiagnostic::get(
+                              SM, OrigCmdlineName,
+                              "invalid name in string variable definition '" +
+                                  OrigCmdlineName + "'"));
+        continue;
+      }
+      StringRef Name = ParseVarResult->Name;
+
+      // Detect collisions between string and numeric variables when the former
+      // is created later than the latter.
+      if (GlobalNumericVariableTable.find(Name) !=
+          GlobalNumericVariableTable.end()) {
+        Errs = joinErrors(std::move(Errs), FileCheckErrorDiagnostic::get(
+                                               SM, Name,
+                                               "numeric variable with name '" +
+                                                   Name + "' already exists"));
+        continue;
+      }
+      GlobalVariableTable.insert(CmdlineNameVal);
+      // Mark the string variable as defined to detect collisions between
+      // string and numeric variables in DefineCmdlineVariables when the latter
+      // is created later than the former. We cannot reuse GlobalVariableTable
+      // for this by populating it with an empty string since we would then
+      // lose the ability to detect the use of an undefined variable in
+      // match().
+      DefinedVariableTable[Name] = true;
+    }
+  }
+
+  return Errs;
 }
 
-/// Check the input to FileCheck provided in the \p Buffer against the \p
-/// CheckStrings read from the check file.
-///
-/// Returns false if the input fails to satisfy the checks.
-bool llvm::FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer,
-                                 ArrayRef<FileCheckString> CheckStrings,
-                                 std::vector<FileCheckDiag> *Diags) {
-  bool ChecksFailed = false;
+void FileCheckPatternContext::clearLocalVars() {
+  SmallVector<StringRef, 16> LocalPatternVars, LocalNumericVars;
+  for (const StringMapEntry<StringRef> &Var : GlobalVariableTable)
+    if (Var.first()[0] != '$')
+      LocalPatternVars.push_back(Var.first());
+
+  // Numeric substitution reads the value of a variable directly, not via
+  // GlobalNumericVariableTable. Therefore, we clear local variables by
+  // clearing their value which will lead to a numeric substitution failure. We
+  // also mark the variable for removal from GlobalNumericVariableTable since
+  // this is what defineCmdlineVariables checks to decide that no global
+  // variable has been defined.
+  for (const auto &Var : GlobalNumericVariableTable)
+    if (Var.first()[0] != '$') {
+      Var.getValue()->clearValue();
+      LocalNumericVars.push_back(Var.first());
+    }
 
-  /// VariableTable - This holds all the current filecheck variables.
-  StringMap<StringRef> VariableTable;
+  for (const auto &Var : LocalPatternVars)
+    GlobalVariableTable.erase(Var);
+  for (const auto &Var : LocalNumericVars)
+    GlobalNumericVariableTable.erase(Var);
+}
 
-  for (const auto& Def : Req.GlobalDefines)
-    VariableTable.insert(StringRef(Def).split('='));
+bool FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer,
+                           ArrayRef<FileCheckString> CheckStrings,
+                           std::vector<FileCheckDiag> *Diags) {
+  bool ChecksFailed = false;
 
   unsigned i = 0, j = 0, e = CheckStrings.size();
   while (true) {
@@ -1405,10 +1906,10 @@ bool llvm::FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer,
 
       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
       size_t MatchLabelLen = 0;
-      size_t MatchLabelPos = CheckLabelStr.Check(
-          SM, Buffer, true, MatchLabelLen, VariableTable, Req, Diags);
+      size_t MatchLabelPos =
+          CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, Req, Diags);
       if (MatchLabelPos == StringRef::npos)
-        // Immediately bail of CHECK-LABEL fails, nothing else we can do.
+        // Immediately bail if CHECK-LABEL fails, nothing else we can do.
         return false;
 
       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
@@ -1416,8 +1917,11 @@ bool llvm::FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer,
       ++j;
     }
 
-    if (Req.EnableVarScope)
-      ClearLocalVars(VariableTable);
+    // Do not clear the first region as it's the one before the first
+    // CHECK-LABEL and it would clear variables defined on the command-line
+    // before they get used.
+    if (i != 0 && Req.EnableVarScope)
+      PatternContext.clearLocalVars();
 
     for (; i != j; ++i) {
       const FileCheckString &CheckStr = CheckStrings[i];
@@ -1425,8 +1929,8 @@ bool llvm::FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer,
       // Check each string within the scanned region, including a second check
       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
       size_t MatchLen = 0;
-      size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
-                                       VariableTable, Req, Diags);
+      size_t MatchPos =
+          CheckStr.Check(SM, CheckRegion, false, MatchLen, Req, Diags);
 
       if (MatchPos == StringRef::npos) {
         ChecksFailed = true;
diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp
index b8223126227d..3d6b569f2993 100644
--- a/lib/Support/FileOutputBuffer.cpp
+++ b/lib/Support/FileOutputBuffer.cpp
@@ -1,9 +1,8 @@
 //===- FileOutputBuffer.cpp - File Output Buffer ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -76,18 +75,26 @@ private:
 // output file on commit(). This is used only when we cannot use OnDiskBuffer.
 class InMemoryBuffer : public FileOutputBuffer {
 public:
-  InMemoryBuffer(StringRef Path, MemoryBlock Buf, unsigned Mode)
-      : FileOutputBuffer(Path), Buffer(Buf), Mode(Mode) {}
+  InMemoryBuffer(StringRef Path, MemoryBlock Buf, std::size_t BufSize,
+                 unsigned Mode)
+      : FileOutputBuffer(Path), Buffer(Buf), BufferSize(BufSize),
+        Mode(Mode) {}
 
   uint8_t *getBufferStart() const override { return (uint8_t *)Buffer.base(); }
 
   uint8_t *getBufferEnd() const override {
-    return (uint8_t *)Buffer.base() + Buffer.size();
+    return (uint8_t *)Buffer.base() + BufferSize;
   }
 
-  size_t getBufferSize() const override { return Buffer.size(); }
+  size_t getBufferSize() const override { return BufferSize; }
 
   Error commit() override {
+    if (FinalPath == "-") {
+      llvm::outs() << StringRef((const char *)Buffer.base(), BufferSize);
+      llvm::outs().flush();
+      return Error::success();
+    }
+
     using namespace sys::fs;
     int FD;
     std::error_code EC;
@@ -95,12 +102,14 @@ public:
             openFileForWrite(FinalPath, FD, CD_CreateAlways, OF_None, Mode))
       return errorCodeToError(EC);
     raw_fd_ostream OS(FD, /*shouldClose=*/true, /*unbuffered=*/true);
-    OS << StringRef((const char *)Buffer.base(), Buffer.size());
+    OS << StringRef((const char *)Buffer.base(), BufferSize);
     return Error::success();
   }
 
 private:
+  // Buffer may actually contain a larger memory block than BufferSize
   OwningMemoryBlock Buffer;
+  size_t BufferSize;
   unsigned Mode;
 };
 } // namespace
@@ -112,43 +121,42 @@ createInMemoryBuffer(StringRef Path, size_t Size, unsigned Mode) {
       Size, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC);
   if (EC)
     return errorCodeToError(EC);
-  return llvm::make_unique<InMemoryBuffer>(Path, MB, Mode);
+  return llvm::make_unique<InMemoryBuffer>(Path, MB, Size, Mode);
 }
 
-static Expected<std::unique_ptr<OnDiskBuffer>>
-createOnDiskBuffer(StringRef Path, size_t Size, bool InitExisting,
-                   unsigned Mode) {
+static Expected<std::unique_ptr<FileOutputBuffer>>
+createOnDiskBuffer(StringRef Path, size_t Size, unsigned Mode) {
   Expected<fs::TempFile> FileOrErr =
       fs::TempFile::create(Path + ".tmp%%%%%%%", Mode);
   if (!FileOrErr)
     return FileOrErr.takeError();
   fs::TempFile File = std::move(*FileOrErr);
 
-  if (InitExisting) {
-    if (auto EC = sys::fs::copy_file(Path, File.FD))
-      return errorCodeToError(EC);
-  } else {
 #ifndef _WIN32
-    // On Windows, CreateFileMapping (the mmap function on Windows)
-    // automatically extends the underlying file. We don't need to
-    // extend the file beforehand. _chsize (ftruncate on Windows) is
-    // pretty slow just like it writes specified amount of bytes,
-    // so we should avoid calling that function.
-    if (auto EC = fs::resize_file(File.FD, Size)) {
-      consumeError(File.discard());
-      return errorCodeToError(EC);
-    }
-#endif
+  // On Windows, CreateFileMapping (the mmap function on Windows)
+  // automatically extends the underlying file. We don't need to
+  // extend the file beforehand. _chsize (ftruncate on Windows) is
+  // pretty slow just like it writes specified amount of bytes,
+  // so we should avoid calling that function.
+  if (auto EC = fs::resize_file(File.FD, Size)) {
+    consumeError(File.discard());
+    return errorCodeToError(EC);
   }
+#endif
 
   // Mmap it.
   std::error_code EC;
   auto MappedFile = llvm::make_unique<fs::mapped_file_region>(
-      File.FD, fs::mapped_file_region::readwrite, Size, 0, EC);
+      fs::convertFDToNativeFile(File.FD), fs::mapped_file_region::readwrite,
+      Size, 0, EC);
+
+  // mmap(2) can fail if the underlying filesystem does not support it.
+  // If that happens, we fall back to in-memory buffer as the last resort.
   if (EC) {
     consumeError(File.discard());
-    return errorCodeToError(EC);
+    return createInMemoryBuffer(Path, Size, Mode);
   }
+
   return llvm::make_unique<OnDiskBuffer>(Path, std::move(File),
                                          std::move(MappedFile));
 }
@@ -156,6 +164,10 @@ createOnDiskBuffer(StringRef Path, size_t Size, bool InitExisting,
 // Create an instance of FileOutputBuffer.
 Expected<std::unique_ptr<FileOutputBuffer>>
 FileOutputBuffer::create(StringRef Path, size_t Size, unsigned Flags) {
+  // Handle "-" as stdout just like llvm::raw_ostream does.
+  if (Path == "-")
+    return createInMemoryBuffer("-", Size, /*Mode=*/0);
+
   unsigned Mode = fs::all_read | fs::all_write;
   if (Flags & F_executable)
     Mode |= fs::all_exe;
@@ -163,15 +175,6 @@ FileOutputBuffer::create(StringRef Path, size_t Size, unsigned Flags) {
   fs::file_status Stat;
   fs::status(Path, Stat);
 
-  if ((Flags & F_modify) && Size == size_t(-1)) {
-    if (Stat.type() == fs::file_type::regular_file)
-      Size = Stat.getSize();
-    else if (Stat.type() == fs::file_type::file_not_found)
-      return errorCodeToError(errc::no_such_file_or_directory);
-    else
-      return errorCodeToError(errc::invalid_argument);
-  }
-
   // Usually, we want to create OnDiskBuffer to create a temporary file in
   // the same directory as the destination file and atomically replaces it
   // by rename(2).
@@ -186,7 +189,7 @@ FileOutputBuffer::create(StringRef Path, size_t Size, unsigned Flags) {
   case fs::file_type::regular_file:
   case fs::file_type::file_not_found:
   case fs::file_type::status_error:
-    return createOnDiskBuffer(Path, Size, !!(Flags & F_modify), Mode);
+    return createOnDiskBuffer(Path, Size, Mode);
   default:
     return createInMemoryBuffer(Path, Size, Mode);
   }
diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp
index 39dbefff5b70..62eb7bfda195 100644
--- a/lib/Support/FileUtilities.cpp
+++ b/lib/Support/FileUtilities.cpp
@@ -1,9 +1,8 @@
 //===- Support/FileUtilities.cpp - File System Utilities ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index ee69a64ac97b..ce6f196e1060 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -1,9 +1,8 @@
 //===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/FormatVariadic.cpp b/lib/Support/FormatVariadic.cpp
index 1f3505d5f74f..f9e89f69b528 100644
--- a/lib/Support/FormatVariadic.cpp
+++ b/lib/Support/FormatVariadic.cpp
@@ -1,9 +1,8 @@
 //===- FormatVariadic.cpp - Format string parsing and analysis ----*-C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/FormatVariadic.h"
diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp
index b0cb06c1daa2..4eb747038bb9 100644
--- a/lib/Support/FormattedStream.cpp
+++ b/lib/Support/FormattedStream.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/GlobPattern.cpp b/lib/Support/GlobPattern.cpp
index 4ea110301f16..6011be86d77f 100644
--- a/lib/Support/GlobPattern.cpp
+++ b/lib/Support/GlobPattern.cpp
@@ -1,9 +1,8 @@
 //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index 9335daffc3e2..c689a81925d4 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -1,9 +1,8 @@
 //===- GraphWriter.cpp - Implements GraphWriter support routines ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Hashing.cpp b/lib/Support/Hashing.cpp
index 7de25cec7371..1b20a670434f 100644
--- a/lib/Support/Hashing.cpp
+++ b/lib/Support/Hashing.cpp
@@ -1,9 +1,8 @@
 //===-------------- lib/Support/Hashing.cpp -------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index d5a688c7fb9b..d491912bdc0c 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -1,9 +1,8 @@
 //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -193,6 +192,8 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
             .Case("0xd07", "cortex-a57")
             .Case("0xd08", "cortex-a72")
             .Case("0xd09", "cortex-a73")
+            .Case("0xd0a", "cortex-a75")
+            .Case("0xd0b", "cortex-a76")
             .Default("generic");
   }
 
@@ -236,6 +237,10 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
             .Case("0x211", "kryo")
             .Case("0x800", "cortex-a73")
             .Case("0x801", "cortex-a73")
+            .Case("0x802", "cortex-a73")
+            .Case("0x803", "cortex-a73")
+            .Case("0x804", "cortex-a73")
+            .Case("0x805", "cortex-a73")
             .Case("0xc00", "falkor")
             .Case("0xc01", "saphira")
             .Default("generic");
@@ -310,6 +315,8 @@ StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
         Pos += sizeof("machine = ") - 1;
         unsigned int Id;
         if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) {
+          if (Id >= 8561 && HaveVectorSupport)
+            return "arch13";
           if (Id >= 3906 && HaveVectorSupport)
             return "z14";
           if (Id >= 2964 && HaveVectorSupport)
@@ -331,7 +338,19 @@ StringRef sys::detail::getHostCPUNameForBPF() {
 #if !defined(__linux__) || !defined(__x86_64__)
   return "generic";
 #else
-  uint8_t insns[40] __attribute__ ((aligned (8))) =
+  uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
+      /* BPF_MOV64_IMM(BPF_REG_0, 0) */
+    { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+      /* BPF_MOV64_IMM(BPF_REG_2, 1) */
+      0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
+      /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
+      0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
+      /* BPF_MOV64_IMM(BPF_REG_0, 1) */
+      0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
+      /* BPF_EXIT_INSN() */
+      0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
+
+  uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
       /* BPF_MOV64_IMM(BPF_REG_0, 0) */
     { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
       /* BPF_MOV64_IMM(BPF_REG_2, 1) */
@@ -356,10 +375,23 @@ StringRef sys::detail::getHostCPUNameForBPF() {
   } attr = {};
   attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
   attr.insn_cnt = 5;
-  attr.insns = (uint64_t)insns;
+  attr.insns = (uint64_t)v3_insns;
   attr.license = (uint64_t)"DUMMY";
 
-  int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
+  int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr,
+                   sizeof(attr));
+  if (fd >= 0) {
+    close(fd);
+    return "v3";
+  }
+
+  /* Clear the whole attr in case its content changed by syscall. */
+  memset(&attr, 0, sizeof(attr));
+  attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
+  attr.insn_cnt = 5;
+  attr.insns = (uint64_t)v2_insns;
+  attr.license = (uint64_t)"DUMMY";
+  fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
   if (fd >= 0) {
     close(fd);
     return "v2";
@@ -637,10 +669,10 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
       break;
 
     // Skylake:
-    case 0x4e: // Skylake mobile
-    case 0x5e: // Skylake desktop
-    case 0x8e: // Kaby Lake mobile
-    case 0x9e: // Kaby Lake desktop
+    case 0x4e:              // Skylake mobile
+    case 0x5e:              // Skylake desktop
+    case 0x8e:              // Kaby Lake mobile
+    case 0x9e:              // Kaby Lake desktop
       *Type = X86::INTEL_COREI7; // "skylake"
       *Subtype = X86::INTEL_COREI7_SKYLAKE;
       break;
@@ -648,7 +680,12 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     // Skylake Xeon:
     case 0x55:
       *Type = X86::INTEL_COREI7;
-      *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
+      if (Features3 & (1 << (X86::FEATURE_AVX512BF16 - 64)))
+        *Subtype = X86::INTEL_COREI7_COOPERLAKE; // "cooperlake"
+      else if (Features2 & (1 << (X86::FEATURE_AVX512VNNI - 32)))
+        *Subtype = X86::INTEL_COREI7_CASCADELAKE; // "cascadelake"
+      else
+        *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
       break;
 
     // Cannonlake:
@@ -657,6 +694,20 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
       *Subtype = X86::INTEL_COREI7_CANNONLAKE; // "cannonlake"
       break;
 
+    // Icelake:
+    case 0x7d:
+    case 0x7e:
+      *Type = X86::INTEL_COREI7;
+      *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; // "icelake-client"
+      break;
+
+    // Icelake Xeon:
+    case 0x6a:
+    case 0x6c:
+      *Type = X86::INTEL_COREI7;
+      *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER; // "icelake-server"
+      break;
+
     case 0x1c: // Most 45 nm Intel Atom processors
     case 0x26: // 45 nm Atom Lincroft
     case 0x27: // 32 nm Atom Medfield
@@ -682,9 +733,14 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     case 0x7a:
       *Type = X86::INTEL_GOLDMONT_PLUS;
       break;
+    case 0x86:
+      *Type = X86::INTEL_TREMONT;
+      break;
+
     case 0x57:
       *Type = X86::INTEL_KNL; // knl
       break;
+
     case 0x85:
       *Type = X86::INTEL_KNM; // knm
       break;
@@ -702,6 +758,12 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
         break;
       }
 
+      if (Features3 & (1 << (X86::FEATURE_AVX512BF16 - 64))) {
+        *Type = X86::INTEL_COREI7;
+        *Subtype = X86::INTEL_COREI7_COOPERLAKE;
+        break;
+      }
+
       if (Features2 & (1 << (X86::FEATURE_AVX512VNNI - 32))) {
         *Type = X86::INTEL_COREI7;
         *Subtype = X86::INTEL_COREI7_CASCADELAKE;
@@ -892,7 +954,14 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     break; // "btver2"
   case 23:
     *Type = X86::AMDFAM17H;
-    *Subtype = X86::AMDFAM17H_ZNVER1;
+    if (Model >= 0x30 && Model <= 0x3f) {
+      *Subtype = X86::AMDFAM17H_ZNVER2;
+      break; // "znver2"; 30h-3fh: Zen2
+    }
+    if (Model <= 0x0f) {
+      *Subtype = X86::AMDFAM17H_ZNVER1;
+      break; // "znver1"; 00h-0Fh: Zen1
+    }
     break;
   default:
     break; // "generic"
@@ -1233,8 +1302,10 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
 
   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
 
+  Features["cx8"]    = (EDX >>  8) & 1;
   Features["cmov"]   = (EDX >> 15) & 1;
   Features["mmx"]    = (EDX >> 23) & 1;
+  Features["fxsr"]   = (EDX >> 24) & 1;
   Features["sse"]    = (EDX >> 25) & 1;
   Features["sse2"]   = (EDX >> 26) & 1;
 
@@ -1298,6 +1369,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   Features["bmi2"]       = HasLeaf7 && ((EBX >>  8) & 1);
   Features["invpcid"]    = HasLeaf7 && ((EBX >> 10) & 1);
   Features["rtm"]        = HasLeaf7 && ((EBX >> 11) & 1);
+  Features["mpx"]        = HasLeaf7 && ((EBX >> 14) & 1);
   // AVX512 is only supported if the OS supports the context save for it.
   Features["avx512f"]    = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
   Features["avx512dq"]   = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
@@ -1329,6 +1401,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   Features["cldemote"]        = HasLeaf7 && ((ECX >> 25) & 1);
   Features["movdiri"]         = HasLeaf7 && ((ECX >> 27) & 1);
   Features["movdir64b"]       = HasLeaf7 && ((ECX >> 28) & 1);
+  Features["enqcmd"]          = HasLeaf7 && ((ECX >> 29) & 1);
 
   // There are two CPUID leafs which information associated with the pconfig
   // instruction:
@@ -1341,6 +1414,9 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   // detecting features using the "-march=native" flag.
   // For more info, see X86 ISA docs.
   Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
+  bool HasLeaf7Subleaf1 =
+      MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
+  Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
 
   bool HasLeafD = MaxLevel >= 0xd &&
                   !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
diff --git a/lib/Support/InitLLVM.cpp b/lib/Support/InitLLVM.cpp
index c008d0455c99..0d7d7fcc8cb6 100644
--- a/lib/Support/InitLLVM.cpp
+++ b/lib/Support/InitLLVM.cpp
@@ -1,9 +1,8 @@
 //===-- InitLLVM.cpp -----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -24,6 +23,7 @@ using namespace llvm::sys;
 
 InitLLVM::InitLLVM(int &Argc, const char **&Argv) : StackPrinter(Argc, Argv) {
   sys::PrintStackTraceOnErrorSignal(Argv[0]);
+  install_out_of_memory_new_handler();
 
 #ifdef _WIN32
   // We use UTF-8 as the internal character encoding. On Windows,
diff --git a/lib/Support/IntEqClasses.cpp b/lib/Support/IntEqClasses.cpp
index cb6e3a19e8d3..4a976dcefc65 100644
--- a/lib/Support/IntEqClasses.cpp
+++ b/lib/Support/IntEqClasses.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/ADT/IntEqClasses.cpp - Equivalence Classes of Integers -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/IntervalMap.cpp b/lib/Support/IntervalMap.cpp
index e11a7f2eb843..f15c7c9403c3 100644
--- a/lib/Support/IntervalMap.cpp
+++ b/lib/Support/IntervalMap.cpp
@@ -1,9 +1,8 @@
 //===- lib/Support/IntervalMap.cpp - A sorted interval map ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/ItaniumManglingCanonicalizer.cpp b/lib/Support/ItaniumManglingCanonicalizer.cpp
index e55dcd761809..da6514f7170b 100644
--- a/lib/Support/ItaniumManglingCanonicalizer.cpp
+++ b/lib/Support/ItaniumManglingCanonicalizer.cpp
@@ -1,9 +1,8 @@
 //===----------------- ItaniumManglingCanonicalizer.cpp -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -22,6 +21,7 @@ using namespace llvm;
 using llvm::itanium_demangle::ForwardTemplateReference;
 using llvm::itanium_demangle::Node;
 using llvm::itanium_demangle::NodeKind;
+using llvm::itanium_demangle::StringView;
 
 namespace {
 struct FoldingSetNodeIDBuilder {
diff --git a/lib/Support/JSON.cpp b/lib/Support/JSON.cpp
index d468013fb94a..95e5ed654277 100644
--- a/lib/Support/JSON.cpp
+++ b/lib/Support/JSON.cpp
@@ -1,9 +1,8 @@
 //=== JSON.cpp - JSON value, parsing and serialization - C++ -----------*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===---------------------------------------------------------------------===//
 
@@ -182,6 +181,12 @@ bool operator==(const Value &L, const Value &R) {
   case Value::Boolean:
     return *L.getAsBoolean() == *R.getAsBoolean();
   case Value::Number:
+    // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
+    // The same integer must convert to the same double, per the standard.
+    // However we see 64-vs-80-bit precision comparisons with gcc-7 -O3 -m32.
+    // So we avoid floating point promotion for exact comparisons.
+    if (L.Type == Value::T_Integer || R.Type == Value::T_Integer)
+      return L.getAsInteger() == R.getAsInteger();
     return *L.getAsNumber() == *R.getAsNumber();
   case Value::String:
     return *L.getAsString() == *R.getAsString();
@@ -555,9 +560,6 @@ std::string fixUTF8(llvm::StringRef S) {
   return Res;
 }
 
-} // namespace json
-} // namespace llvm
-
 static void quote(llvm::raw_ostream &OS, llvm::StringRef S) {
   OS << '\"';
   for (unsigned char C : S) {
@@ -588,106 +590,129 @@ static void quote(llvm::raw_ostream &OS, llvm::StringRef S) {
   OS << '\"';
 }
 
-enum IndenterAction {
-  Indent,
-  Outdent,
-  Newline,
-  Space,
-};
-
-// Prints JSON. The indenter can be used to control formatting.
-template <typename Indenter>
-void llvm::json::Value::print(raw_ostream &OS, const Indenter &I) const {
-  switch (Type) {
-  case T_Null:
+void llvm::json::OStream::value(const Value &V) {
+  switch (V.kind()) {
+  case Value::Null:
+    valueBegin();
     OS << "null";
-    break;
-  case T_Boolean:
-    OS << (as<bool>() ? "true" : "false");
-    break;
-  case T_Double:
-    OS << format("%.*g", std::numeric_limits<double>::max_digits10,
-                 as<double>());
-    break;
-  case T_Integer:
-    OS << as<int64_t>();
-    break;
-  case T_StringRef:
-    quote(OS, as<StringRef>());
-    break;
-  case T_String:
-    quote(OS, as<std::string>());
-    break;
-  case T_Object: {
-    bool Comma = false;
-    OS << '{';
-    I(Indent);
-    for (const auto *P : sortedElements(as<json::Object>())) {
-      if (Comma)
-        OS << ',';
-      Comma = true;
-      I(Newline);
-      quote(OS, P->first);
-      OS << ':';
-      I(Space);
-      P->second.print(OS, I);
-    }
-    I(Outdent);
-    if (Comma)
-      I(Newline);
-    OS << '}';
-    break;
+    return;
+  case Value::Boolean:
+    valueBegin();
+    OS << (*V.getAsBoolean() ? "true" : "false");
+    return;
+  case Value::Number:
+    valueBegin();
+    if (V.Type == Value::T_Integer)
+      OS << *V.getAsInteger();
+    else
+      OS << format("%.*g", std::numeric_limits<double>::max_digits10,
+                   *V.getAsNumber());
+    return;
+  case Value::String:
+    valueBegin();
+    quote(OS, *V.getAsString());
+    return;
+  case Value::Array:
+    return array([&] {
+      for (const Value &E : *V.getAsArray())
+        value(E);
+    });
+  case Value::Object:
+    return object([&] {
+      for (const Object::value_type *E : sortedElements(*V.getAsObject()))
+        attribute(E->first, E->second);
+    });
   }
-  case T_Array: {
-    bool Comma = false;
-    OS << '[';
-    I(Indent);
-    for (const auto &E : as<json::Array>()) {
-      if (Comma)
-        OS << ',';
-      Comma = true;
-      I(Newline);
-      E.print(OS, I);
-    }
-    I(Outdent);
-    if (Comma)
-      I(Newline);
-    OS << ']';
-    break;
+}
+
+void llvm::json::OStream::valueBegin() {
+  assert(Stack.back().Ctx != Object && "Only attributes allowed here");
+  if (Stack.back().HasValue) {
+    assert(Stack.back().Ctx != Singleton && "Only one value allowed here");
+    OS << ',';
+  }
+  if (Stack.back().Ctx == Array)
+    newline();
+  Stack.back().HasValue = true;
+}
+
+void llvm::json::OStream::newline() {
+  if (IndentSize) {
+    OS.write('\n');
+    OS.indent(Indent);
   }
+}
+
+void llvm::json::OStream::arrayBegin() {
+  valueBegin();
+  Stack.emplace_back();
+  Stack.back().Ctx = Array;
+  Indent += IndentSize;
+  OS << '[';
+}
+
+void llvm::json::OStream::arrayEnd() {
+  assert(Stack.back().Ctx == Array);
+  Indent -= IndentSize;
+  if (Stack.back().HasValue)
+    newline();
+  OS << ']';
+  Stack.pop_back();
+  assert(!Stack.empty());
+}
+
+void llvm::json::OStream::objectBegin() {
+  valueBegin();
+  Stack.emplace_back();
+  Stack.back().Ctx = Object;
+  Indent += IndentSize;
+  OS << '{';
+}
+
+void llvm::json::OStream::objectEnd() {
+  assert(Stack.back().Ctx == Object);
+  Indent -= IndentSize;
+  if (Stack.back().HasValue)
+    newline();
+  OS << '}';
+  Stack.pop_back();
+  assert(!Stack.empty());
+}
+
+void llvm::json::OStream::attributeBegin(llvm::StringRef Key) {
+  assert(Stack.back().Ctx == Object);
+  if (Stack.back().HasValue)
+    OS << ',';
+  newline();
+  Stack.back().HasValue = true;
+  Stack.emplace_back();
+  Stack.back().Ctx = Singleton;
+  if (LLVM_LIKELY(isUTF8(Key))) {
+    quote(OS, Key);
+  } else {
+    assert(false && "Invalid UTF-8 in attribute key");
+    quote(OS, fixUTF8(Key));
   }
+  OS.write(':');
+  if (IndentSize)
+    OS.write(' ');
+}
+
+void llvm::json::OStream::attributeEnd() {
+  assert(Stack.back().Ctx == Singleton);
+  assert(Stack.back().HasValue && "Attribute must have a value");
+  Stack.pop_back();
+  assert(Stack.back().Ctx == Object);
 }
 
+} // namespace json
+} // namespace llvm
+
 void llvm::format_provider<llvm::json::Value>::format(
     const llvm::json::Value &E, raw_ostream &OS, StringRef Options) {
-  if (Options.empty()) {
-    OS << E;
-    return;
-  }
   unsigned IndentAmount = 0;
-  if (Options.getAsInteger(/*Radix=*/10, IndentAmount))
+  if (!Options.empty() && Options.getAsInteger(/*Radix=*/10, IndentAmount))
     llvm_unreachable("json::Value format options should be an integer");
-  unsigned IndentLevel = 0;
-  E.print(OS, [&](IndenterAction A) {
-    switch (A) {
-    case Newline:
-      OS << '\n';
-      OS.indent(IndentLevel);
-      break;
-    case Space:
-      OS << ' ';
-      break;
-    case Indent:
-      IndentLevel += IndentAmount;
-      break;
-    case Outdent:
-      IndentLevel -= IndentAmount;
-      break;
-    };
-  });
+  json::OStream(OS, IndentAmount).value(E);
 }
 
-llvm::raw_ostream &llvm::json::operator<<(raw_ostream &OS, const Value &E) {
-  E.print(OS, [](IndenterAction A) { /*ignore*/ });
-  return OS;
-}
diff --git a/lib/Support/JamCRC.cpp b/lib/Support/JamCRC.cpp
index 17c55f565e08..e043a3c33c28 100644
--- a/lib/Support/JamCRC.cpp
+++ b/lib/Support/JamCRC.cpp
@@ -1,9 +1,8 @@
 //===-- JamCRC.cpp - Cyclic Redundancy Check --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/KnownBits.cpp b/lib/Support/KnownBits.cpp
index ac790ebed352..a6c591fca312 100644
--- a/lib/Support/KnownBits.cpp
+++ b/lib/Support/KnownBits.cpp
@@ -1,9 +1,8 @@
 //===-- KnownBits.cpp - Stores known zeros/ones ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,18 +15,14 @@
 
 using namespace llvm;
 
-KnownBits KnownBits::computeForAddSub(bool Add, bool NSW,
-                                      const KnownBits &LHS, KnownBits RHS) {
-  // Carry in a 1 for a subtract, rather than 0.
-  bool CarryIn = false;
-  if (!Add) {
-    // Sum = LHS + ~RHS + 1
-    std::swap(RHS.Zero, RHS.One);
-    CarryIn = true;
-  }
+static KnownBits computeForAddCarry(
+    const KnownBits &LHS, const KnownBits &RHS,
+    bool CarryZero, bool CarryOne) {
+  assert(!(CarryZero && CarryOne) &&
+         "Carry can't be zero and one at the same time");
 
-  APInt PossibleSumZero = ~LHS.Zero + ~RHS.Zero + CarryIn;
-  APInt PossibleSumOne = LHS.One + RHS.One + CarryIn;
+  APInt PossibleSumZero = ~LHS.Zero + ~RHS.Zero + !CarryZero;
+  APInt PossibleSumOne = LHS.One + RHS.One + CarryOne;
 
   // Compute known bits of the carry.
   APInt CarryKnownZero = ~(PossibleSumZero ^ LHS.Zero ^ RHS.Zero);
@@ -46,9 +41,32 @@ KnownBits KnownBits::computeForAddSub(bool Add, bool NSW,
   KnownBits KnownOut;
   KnownOut.Zero = ~std::move(PossibleSumZero) & Known;
   KnownOut.One = std::move(PossibleSumOne) & Known;
+  return KnownOut;
+}
+
+KnownBits KnownBits::computeForAddCarry(
+    const KnownBits &LHS, const KnownBits &RHS, const KnownBits &Carry) {
+  assert(Carry.getBitWidth() == 1 && "Carry must be 1-bit");
+  return ::computeForAddCarry(
+      LHS, RHS, Carry.Zero.getBoolValue(), Carry.One.getBoolValue());
+}
+
+KnownBits KnownBits::computeForAddSub(bool Add, bool NSW,
+                                      const KnownBits &LHS, KnownBits RHS) {
+  KnownBits KnownOut;
+  if (Add) {
+    // Sum = LHS + RHS + 0
+    KnownOut = ::computeForAddCarry(
+        LHS, RHS, /*CarryZero*/true, /*CarryOne*/false);
+  } else {
+    // Sum = LHS + ~RHS + 1
+    std::swap(RHS.Zero, RHS.One);
+    KnownOut = ::computeForAddCarry(
+        LHS, RHS, /*CarryZero*/false, /*CarryOne*/true);
+  }
 
   // Are we still trying to solve for the sign bit?
-  if (!Known.isSignBitSet()) {
+  if (!KnownOut.isNegative() && !KnownOut.isNonNegative()) {
     if (NSW) {
       // Adding two non-negative numbers, or subtracting a negative number from
       // a non-negative one, can't wrap into negative.
diff --git a/lib/Support/LEB128.cpp b/lib/Support/LEB128.cpp
index 449626f2d451..d41b673e9c8a 100644
--- a/lib/Support/LEB128.cpp
+++ b/lib/Support/LEB128.cpp
@@ -1,9 +1,8 @@
 //===- LEB128.cpp - LEB128 utility functions implementation -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/LineIterator.cpp b/lib/Support/LineIterator.cpp
index 5baa1a37f385..164436a2c48e 100644
--- a/lib/Support/LineIterator.cpp
+++ b/lib/Support/LineIterator.cpp
@@ -1,9 +1,8 @@
 //===- LineIterator.cpp - Implementation of line iteration ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp
index c166230ba3a3..10181192afbd 100644
--- a/lib/Support/LockFileManager.cpp
+++ b/lib/Support/LockFileManager.cpp
@@ -1,9 +1,8 @@
 //===--- LockFileManager.cpp - File-level Locking Utility------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/LowLevelType.cpp b/lib/Support/LowLevelType.cpp
index cb2187405d6b..fe77cb3db413 100644
--- a/lib/Support/LowLevelType.cpp
+++ b/lib/Support/LowLevelType.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/Support/LowLevelType.cpp -------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,14 +17,14 @@ using namespace llvm;
 
 LLT::LLT(MVT VT) {
   if (VT.isVector()) {
-    init(/*isPointer=*/false, VT.getVectorNumElements() > 1,
+    init(/*IsPointer=*/false, VT.getVectorNumElements() > 1,
          VT.getVectorNumElements(), VT.getVectorElementType().getSizeInBits(),
          /*AddressSpace=*/0);
   } else if (VT.isValid()) {
     // Aggregates are no different from real scalars as far as GlobalISel is
     // concerned.
     assert(VT.getSizeInBits() != 0 && "invalid zero-sized type");
-    init(/*isPointer=*/false, /*isVector=*/false, /*NumElements=*/0,
+    init(/*IsPointer=*/false, /*IsVector=*/false, /*NumElements=*/0,
          VT.getSizeInBits(), /*AddressSpace=*/0);
   } else {
     IsPointer = false;
diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp
index 74f71a385027..28ceb1a70e42 100644
--- a/lib/Support/ManagedStatic.cpp
+++ b/lib/Support/ManagedStatic.cpp
@@ -1,9 +1,8 @@
 //===-- ManagedStatic.cpp - Static Global wrapper -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/MathExtras.cpp b/lib/Support/MathExtras.cpp
index ba0924540ceb..87c7101c424b 100644
--- a/lib/Support/MathExtras.cpp
+++ b/lib/Support/MathExtras.cpp
@@ -1,9 +1,8 @@
 //===-- MathExtras.cpp - Implement the MathExtras header --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Memory.cpp b/lib/Support/Memory.cpp
index c245eedd2c16..581484268cd8 100644
--- a/lib/Support/Memory.cpp
+++ b/lib/Support/Memory.cpp
@@ -1,9 +1,8 @@
 //===- Memory.cpp - Memory Handling Support ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,6 +15,10 @@
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Valgrind.h"
 
+#ifndef NDEBUG
+#include "llvm/Support/raw_ostream.h"
+#endif // ifndef NDEBUG
+
 // Include the platform-specific parts of this class.
 #ifdef LLVM_ON_UNIX
 #include "Unix/Memory.inc"
@@ -23,3 +26,28 @@
 #ifdef _WIN32
 #include "Windows/Memory.inc"
 #endif
+
+#ifndef NDEBUG
+
+namespace llvm {
+namespace sys {
+
+raw_ostream &operator<<(raw_ostream &OS, const Memory::ProtectionFlags &PF) {
+  assert((PF & ~(Memory::MF_READ | Memory::MF_WRITE | Memory::MF_EXEC)) == 0 &&
+         "Unrecognized flags");
+
+  return OS << (PF & Memory::MF_READ ? 'R' : '-')
+            << (PF & Memory::MF_WRITE ? 'W' : '-')
+            << (PF & Memory::MF_EXEC ? 'X' : '-');
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const MemoryBlock &MB) {
+  return OS << "[ " << MB.base() << " .. "
+            << (void *)((char *)MB.base() + MB.allocatedSize()) << " ] ("
+            << MB.allocatedSize() << " bytes)";
+}
+
+} // end namespace sys
+} // end namespace llvm
+
+#endif // ifndef NDEBUG
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index ef9159bac284..d0e5bb154c1a 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -1,9 +1,8 @@
 //===--- MemoryBuffer.cpp - Memory Buffer implementation ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -183,7 +182,7 @@ class MemoryBufferMMapFile : public MB {
   }
 
 public:
-  MemoryBufferMMapFile(bool RequiresNullTerminator, int FD, uint64_t Len,
+  MemoryBufferMMapFile(bool RequiresNullTerminator, sys::fs::file_t FD, uint64_t Len,
                        uint64_t Offset, std::error_code &EC)
       : MFR(FD, MB::Mapmode, getLegalMapSize(Len, Offset),
             getLegalMapOffset(Offset), EC) {
@@ -209,16 +208,16 @@ public:
 }
 
 static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
-getMemoryBufferForStream(int FD, const Twine &BufferName) {
+getMemoryBufferForStream(sys::fs::file_t FD, const Twine &BufferName) {
   const ssize_t ChunkSize = 4096*4;
   SmallString<ChunkSize> Buffer;
-  ssize_t ReadBytes;
+  size_t ReadBytes;
   // Read into Buffer until we hit EOF.
   do {
     Buffer.reserve(Buffer.size() + ChunkSize);
-    ReadBytes = sys::RetryAfterSignal(-1, ::read, FD, Buffer.end(), ChunkSize);
-    if (ReadBytes == -1)
-      return std::error_code(errno, std::generic_category());
+    if (auto EC = sys::fs::readNativeFile(
+            FD, makeMutableArrayRef(Buffer.end(), ChunkSize), &ReadBytes))
+      return EC;
     Buffer.set_size(Buffer.size() + ReadBytes);
   } while (ReadBytes != 0);
 
@@ -235,7 +234,7 @@ MemoryBuffer::getFile(const Twine &Filename, int64_t FileSize,
 
 template <typename MB>
 static ErrorOr<std::unique_ptr<MB>>
-getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize,
+getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
                 uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
                 bool IsVolatile);
 
@@ -243,15 +242,14 @@ template <typename MB>
 static ErrorOr<std::unique_ptr<MB>>
 getFileAux(const Twine &Filename, int64_t FileSize, uint64_t MapSize,
            uint64_t Offset, bool RequiresNullTerminator, bool IsVolatile) {
-  int FD;
-  std::error_code EC = sys::fs::openFileForRead(Filename, FD, sys::fs::OF_None);
-
-  if (EC)
-    return EC;
-
+  Expected<sys::fs::file_t> FDOrErr =
+      sys::fs::openNativeFileForRead(Filename, sys::fs::OF_None);
+  if (!FDOrErr)
+    return errorToErrorCode(FDOrErr.takeError());
+  sys::fs::file_t FD = *FDOrErr;
   auto Ret = getOpenFileImpl<MB>(FD, Filename, FileSize, MapSize, Offset,
                                  RequiresNullTerminator, IsVolatile);
-  close(FD);
+  sys::fs::closeFile(FD);
   return Ret;
 }
 
@@ -305,7 +303,7 @@ WritableMemoryBuffer::getNewMemBuffer(size_t Size, const Twine &BufferName) {
   return SB;
 }
 
-static bool shouldUseMmap(int FD,
+static bool shouldUseMmap(sys::fs::file_t FD,
                           size_t FileSize,
                           size_t MapSize,
                           off_t Offset,
@@ -363,12 +361,11 @@ static bool shouldUseMmap(int FD,
 static ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>>
 getReadWriteFile(const Twine &Filename, uint64_t FileSize, uint64_t MapSize,
                  uint64_t Offset) {
-  int FD;
-  std::error_code EC = sys::fs::openFileForReadWrite(
-      Filename, FD, sys::fs::CD_OpenExisting, sys::fs::OF_None);
-
-  if (EC)
-    return EC;
+  Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForReadWrite(
+      Filename, sys::fs::CD_OpenExisting, sys::fs::OF_None);
+  if (!FDOrErr)
+    return errorToErrorCode(FDOrErr.takeError());
+  sys::fs::file_t FD = *FDOrErr;
 
   // Default is to map the full file.
   if (MapSize == uint64_t(-1)) {
@@ -392,6 +389,7 @@ getReadWriteFile(const Twine &Filename, uint64_t FileSize, uint64_t MapSize,
     MapSize = FileSize;
   }
 
+  std::error_code EC;
   std::unique_ptr<WriteThroughMemoryBuffer> Result(
       new (NamedBufferAlloc(Filename))
           MemoryBufferMMapFile<WriteThroughMemoryBuffer>(false, FD, MapSize,
@@ -415,10 +413,10 @@ WriteThroughMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize,
 
 template <typename MB>
 static ErrorOr<std::unique_ptr<MB>>
-getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize,
+getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
                 uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
                 bool IsVolatile) {
-  static int PageSize = sys::Process::getPageSize();
+  static int PageSize = sys::Process::getPageSizeEstimate();
 
   // Default is to map the full file.
   if (MapSize == uint64_t(-1)) {
@@ -460,45 +458,20 @@ getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize,
     return make_error_code(errc::not_enough_memory);
   }
 
-  char *BufPtr = Buf.get()->getBufferStart();
-
-  size_t BytesLeft = MapSize;
-#ifndef HAVE_PREAD
-  if (lseek(FD, Offset, SEEK_SET) == -1)
-    return std::error_code(errno, std::generic_category());
-#endif
-
-  while (BytesLeft) {
-#ifdef HAVE_PREAD
-    ssize_t NumRead = sys::RetryAfterSignal(-1, ::pread, FD, BufPtr, BytesLeft,
-                                            MapSize - BytesLeft + Offset);
-#else
-    ssize_t NumRead = sys::RetryAfterSignal(-1, ::read, FD, BufPtr, BytesLeft);
-#endif
-    if (NumRead == -1) {
-      // Error while reading.
-      return std::error_code(errno, std::generic_category());
-    }
-    if (NumRead == 0) {
-      memset(BufPtr, 0, BytesLeft); // zero-initialize rest of the buffer.
-      break;
-    }
-    BytesLeft -= NumRead;
-    BufPtr += NumRead;
-  }
+  sys::fs::readNativeFileSlice(FD, Buf->getBuffer(), Offset);
 
   return std::move(Buf);
 }
 
 ErrorOr<std::unique_ptr<MemoryBuffer>>
-MemoryBuffer::getOpenFile(int FD, const Twine &Filename, uint64_t FileSize,
+MemoryBuffer::getOpenFile(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
                           bool RequiresNullTerminator, bool IsVolatile) {
   return getOpenFileImpl<MemoryBuffer>(FD, Filename, FileSize, FileSize, 0,
                          RequiresNullTerminator, IsVolatile);
 }
 
 ErrorOr<std::unique_ptr<MemoryBuffer>>
-MemoryBuffer::getOpenFileSlice(int FD, const Twine &Filename, uint64_t MapSize,
+MemoryBuffer::getOpenFileSlice(sys::fs::file_t FD, const Twine &Filename, uint64_t MapSize,
                                int64_t Offset, bool IsVolatile) {
   assert(MapSize != uint64_t(-1));
   return getOpenFileImpl<MemoryBuffer>(FD, Filename, -1, MapSize, Offset, false,
@@ -512,18 +485,19 @@ ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() {
   // fallback if it fails.
   sys::ChangeStdinToBinary();
 
-  return getMemoryBufferForStream(0, "<stdin>");
+  return getMemoryBufferForStream(sys::fs::getStdinHandle(), "<stdin>");
 }
 
 ErrorOr<std::unique_ptr<MemoryBuffer>>
 MemoryBuffer::getFileAsStream(const Twine &Filename) {
-  int FD;
-  std::error_code EC = sys::fs::openFileForRead(Filename, FD, sys::fs::OF_None);
-  if (EC)
-    return EC;
+  Expected<sys::fs::file_t> FDOrErr =
+      sys::fs::openNativeFileForRead(Filename, sys::fs::OF_None);
+  if (!FDOrErr)
+    return errorToErrorCode(FDOrErr.takeError());
+  sys::fs::file_t FD = *FDOrErr;
   ErrorOr<std::unique_ptr<MemoryBuffer>> Ret =
       getMemoryBufferForStream(FD, Filename);
-  close(FD);
+  sys::fs::closeFile(FD);
   return Ret;
 }
 
diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp
index 7138c7a4b984..69b7b8126ab1 100644
--- a/lib/Support/Mutex.cpp
+++ b/lib/Support/Mutex.cpp
@@ -1,9 +1,8 @@
 //===- Mutex.cpp - Mutual Exclusion Lock ------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/NativeFormatting.cpp b/lib/Support/NativeFormatting.cpp
index 85b4bfb81568..3731e0c56359 100644
--- a/lib/Support/NativeFormatting.cpp
+++ b/lib/Support/NativeFormatting.cpp
@@ -1,9 +1,8 @@
 //===- NativeFormatting.cpp - Low level formatting helpers -------*- C++-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/Optional.cpp b/lib/Support/Optional.cpp
new file mode 100644
index 000000000000..2425739c845d
--- /dev/null
+++ b/lib/Support/Optional.cpp
@@ -0,0 +1,14 @@
+//===- Optional.cpp - Optional values ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/Support/raw_ostream.h"
+
+llvm::raw_ostream &llvm::operator<<(raw_ostream &OS, NoneType) {
+  return OS << "None";
+}
diff --git a/lib/Support/Options.cpp b/lib/Support/Options.cpp
index 71258450efa6..770b7381c20e 100644
--- a/lib/Support/Options.cpp
+++ b/lib/Support/Options.cpp
@@ -1,9 +1,8 @@
 //===- llvm/Support/Options.cpp - Debug options support ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Parallel.cpp b/lib/Support/Parallel.cpp
index 1844003b9d3d..621bccbf2a4c 100644
--- a/lib/Support/Parallel.cpp
+++ b/lib/Support/Parallel.cpp
@@ -1,9 +1,8 @@
 //===- llvm/Support/Parallel.cpp - Parallel algorithms --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -18,7 +17,9 @@
 #include <stack>
 #include <thread>
 
-using namespace llvm;
+namespace llvm {
+namespace parallel {
+namespace detail {
 
 namespace {
 
@@ -119,11 +120,28 @@ Executor *Executor::getDefaultExecutor() {
 #endif
 }
 
-void parallel::detail::TaskGroup::spawn(std::function<void()> F) {
-  L.inc();
-  Executor::getDefaultExecutor()->add([&, F] {
+static std::atomic<int> TaskGroupInstances;
+
+// Latch::sync() called by the dtor may cause one thread to block. If is a dead
+// lock if all threads in the default executor are blocked. To prevent the dead
+// lock, only allow the first TaskGroup to run tasks parallelly. In the scenario
+// of nested parallel_for_each(), only the outermost one runs parallelly.
+TaskGroup::TaskGroup() : Parallel(TaskGroupInstances++ == 0) {}
+TaskGroup::~TaskGroup() { --TaskGroupInstances; }
+
+void TaskGroup::spawn(std::function<void()> F) {
+  if (Parallel) {
+    L.inc();
+    Executor::getDefaultExecutor()->add([&, F] {
+      F();
+      L.dec();
+    });
+  } else {
     F();
-    L.dec();
-  });
+  }
 }
+
+} // namespace detail
+} // namespace parallel
+} // namespace llvm
 #endif // LLVM_ENABLE_THREADS
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index 5ce2f50ebdaa..c49260125dba 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -1,9 +1,8 @@
 //===-- Path.cpp - Implement OS Path Concept ------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -170,25 +169,6 @@ createUniqueEntity(const Twine &Model, int &ResultFD,
                    SmallVectorImpl<char> &ResultPath, bool MakeAbsolute,
                    unsigned Mode, FSEntity Type,
                    sys::fs::OpenFlags Flags = sys::fs::OF_None) {
-  SmallString<128> ModelStorage;
-  Model.toVector(ModelStorage);
-
-  if (MakeAbsolute) {
-    // Make model absolute by prepending a temp directory if it's not already.
-    if (!sys::path::is_absolute(Twine(ModelStorage))) {
-      SmallString<128> TDir;
-      sys::path::system_temp_directory(true, TDir);
-      sys::path::append(TDir, Twine(ModelStorage));
-      ModelStorage.swap(TDir);
-    }
-  }
-
-  // From here on, DO NOT modify model. It may be needed if the randomly chosen
-  // path already exists.
-  ResultPath = ModelStorage;
-  // Null terminate.
-  ResultPath.push_back(0);
-  ResultPath.pop_back();
 
   // Limit the number of attempts we make, so that we don't infinite loop. E.g.
   // "permission denied" could be for a specific file (so we retry with a
@@ -196,13 +176,7 @@ createUniqueEntity(const Twine &Model, int &ResultFD,
   // Checking which is racy, so we try a number of times, then give up.
   std::error_code EC;
   for (int Retries = 128; Retries > 0; --Retries) {
-    // Replace '%' with random chars.
-    for (unsigned i = 0, e = ModelStorage.size(); i != e; ++i) {
-      if (ModelStorage[i] == '%')
-        ResultPath[i] =
-            "0123456789abcdef"[sys::Process::GetRandomNumber() & 15];
-    }
-
+    sys::fs::createUniquePath(Model, ResultPath, MakeAbsolute);
     // Try to open + create the file.
     switch (Type) {
     case FS_File: {
@@ -323,7 +297,8 @@ reverse_iterator rbegin(StringRef Path, Style style) {
   I.Path = Path;
   I.Position = Path.size();
   I.S = style;
-  return ++I;
+  ++I;
+  return I;
 }
 
 reverse_iterator rend(StringRef Path) {
@@ -763,6 +738,32 @@ std::error_code getUniqueID(const Twine Path, UniqueID &Result) {
   return std::error_code();
 }
 
+void createUniquePath(const Twine &Model, SmallVectorImpl<char> &ResultPath,
+                      bool MakeAbsolute) {
+  SmallString<128> ModelStorage;
+  Model.toVector(ModelStorage);
+
+  if (MakeAbsolute) {
+    // Make model absolute by prepending a temp directory if it's not already.
+    if (!sys::path::is_absolute(Twine(ModelStorage))) {
+      SmallString<128> TDir;
+      sys::path::system_temp_directory(true, TDir);
+      sys::path::append(TDir, Twine(ModelStorage));
+      ModelStorage.swap(TDir);
+    }
+  }
+
+  ResultPath = ModelStorage;
+  ResultPath.push_back(0);
+  ResultPath.pop_back();
+
+  // Replace '%' with random chars.
+  for (unsigned i = 0, e = ModelStorage.size(); i != e; ++i) {
+    if (ModelStorage[i] == '%')
+      ResultPath[i] = "0123456789abcdef"[sys::Process::GetRandomNumber() & 15];
+  }
+}
+
 std::error_code createUniqueFile(const Twine &Model, int &ResultFd,
                                  SmallVectorImpl<char> &ResultPath,
                                  unsigned Mode) {
@@ -959,6 +960,7 @@ static std::error_code copy_file_internal(int ReadFD, int WriteFD) {
   return std::error_code();
 }
 
+#ifndef __APPLE__
 std::error_code copy_file(const Twine &From, const Twine &To) {
   int ReadFD, WriteFD;
   if (std::error_code EC = openFileForRead(From, ReadFD, OF_None))
@@ -976,6 +978,7 @@ std::error_code copy_file(const Twine &From, const Twine &To) {
 
   return EC;
 }
+#endif
 
 std::error_code copy_file(const Twine &From, int ToFD) {
   int ReadFD;
@@ -1122,6 +1125,7 @@ TempFile &TempFile::operator=(TempFile &&Other) {
   TmpName = std::move(Other.TmpName);
   FD = Other.FD;
   Other.Done = true;
+  Other.FD = -1;
   return *this;
 }
 
@@ -1129,26 +1133,27 @@ TempFile::~TempFile() { assert(Done); }
 
 Error TempFile::discard() {
   Done = true;
-  std::error_code RemoveEC;
-// On windows closing will remove the file.
-#ifndef _WIN32
-  // Always try to close and remove.
-  if (!TmpName.empty()) {
-    RemoveEC = fs::remove(TmpName);
-    sys::DontRemoveFileOnSignal(TmpName);
-  }
-#endif
-
-  if (!RemoveEC)
-    TmpName = "";
-
   if (FD != -1 && close(FD) == -1) {
     std::error_code EC = std::error_code(errno, std::generic_category());
     return errorCodeToError(EC);
   }
   FD = -1;
 
+#ifdef _WIN32
+  // On windows closing will remove the file.
+  TmpName = "";
+  return Error::success();
+#else
+  // Always try to close and remove.
+  std::error_code RemoveEC;
+  if (!TmpName.empty()) {
+    RemoveEC = fs::remove(TmpName);
+    sys::DontRemoveFileOnSignal(TmpName);
+    if (!RemoveEC)
+      TmpName = "";
+  }
   return errorCodeToError(RemoveEC);
+#endif
 }
 
 Error TempFile::keep(const Twine &Name) {
diff --git a/lib/Support/PluginLoader.cpp b/lib/Support/PluginLoader.cpp
index 358137f08f5f..6fe195ffda7a 100644
--- a/lib/Support/PluginLoader.cpp
+++ b/lib/Support/PluginLoader.cpp
@@ -1,9 +1,8 @@
 //===-- PluginLoader.cpp - Implement -load command line option ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index 206de91ae239..aec00baec0e3 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -1,9 +1,8 @@
 //===- PrettyStackTrace.cpp - Pretty Crash Handling -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,12 +14,14 @@
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm-c/ErrorHandling.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/Config/config.h" // Get autoconf configuration settings
+#include "llvm/Config/config.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/SaveAndRestore.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/Watchdog.h"
 #include "llvm/Support/raw_ostream.h"
 
+#include <atomic>
 #include <cstdarg>
 #include <cstdio>
 #include <tuple>
@@ -34,7 +35,7 @@ using namespace llvm;
 // If backtrace support is not enabled, compile out support for pretty stack
 // traces.  This has the secondary effect of not requiring thread local storage
 // when backtrace support is disabled.
-#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
+#if ENABLE_BACKTRACES
 
 // We need a thread local pointer to manage the stack of our stack trace
 // objects, but we *really* cannot tolerate destructors running and do not want
@@ -42,6 +43,22 @@ using namespace llvm;
 // thread-local variable.
 static LLVM_THREAD_LOCAL PrettyStackTraceEntry *PrettyStackTraceHead = nullptr;
 
+// The use of 'volatile' here is to ensure that any particular thread always
+// reloads the value of the counter. The 'std::atomic' allows us to specify that
+// this variable is accessed in an unsychronized way (it's not actually
+// synchronizing). This does technically mean that the value may not appear to
+// be the same across threads running simultaneously on different CPUs, but in
+// practice the worst that will happen is that we won't print a stack trace when
+// we could have.
+//
+// This is initialized to 1 because 0 is used as a sentinel for "not enabled on
+// the current thread". If the user happens to overflow an 'unsigned' with
+// SIGINFO requests, it's possible that some threads will stop responding to it,
+// but the program won't crash.
+static volatile std::atomic<unsigned> GlobalSigInfoGenerationCounter =
+    ATOMIC_VAR_INIT(1);
+static LLVM_THREAD_LOCAL unsigned ThreadLocalSigInfoGenerationCounter = 0;
+
 namespace llvm {
 PrettyStackTraceEntry *ReverseStackTrace(PrettyStackTraceEntry *Head) {
   PrettyStackTraceEntry *Prev = nullptr;
@@ -57,8 +74,9 @@ static void PrintStack(raw_ostream &OS) {
   // to fail if we crashed due to stack overflow), we do an up-front pass to
   // reverse the stack, then print it, then reverse it again.
   unsigned ID = 0;
-  PrettyStackTraceEntry *ReversedStack =
-      llvm::ReverseStackTrace(PrettyStackTraceHead);
+  SaveAndRestore<PrettyStackTraceEntry *> SavedStack{PrettyStackTraceHead,
+                                                     nullptr};
+  PrettyStackTraceEntry *ReversedStack = ReverseStackTrace(SavedStack.get());
   for (const PrettyStackTraceEntry *Entry = ReversedStack; Entry;
        Entry = Entry->getNextEntry()) {
     OS << ID++ << ".\t";
@@ -68,7 +86,10 @@ static void PrintStack(raw_ostream &OS) {
   llvm::ReverseStackTrace(ReversedStack);
 }
 
-/// PrintCurStackTrace - Print the current stack trace to the specified stream.
+/// Print the current stack trace to the specified stream.
+///
+/// Marked NOINLINE so it can be called from debuggers.
+LLVM_ATTRIBUTE_NOINLINE
 static void PrintCurStackTrace(raw_ostream &OS) {
   // Don't print an empty trace.
   if (!PrettyStackTraceHead) return;
@@ -128,11 +149,24 @@ static void CrashHandler(void *) {
 #endif
 }
 
-// defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
-#endif
+static void printForSigInfoIfNeeded() {
+  unsigned CurrentSigInfoGeneration =
+      GlobalSigInfoGenerationCounter.load(std::memory_order_relaxed);
+  if (ThreadLocalSigInfoGenerationCounter == 0 ||
+      ThreadLocalSigInfoGenerationCounter == CurrentSigInfoGeneration) {
+    return;
+  }
+
+  PrintCurStackTrace(errs());
+  ThreadLocalSigInfoGenerationCounter = CurrentSigInfoGeneration;
+}
+
+#endif // ENABLE_BACKTRACES
 
 PrettyStackTraceEntry::PrettyStackTraceEntry() {
-#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
+#if ENABLE_BACKTRACES
+  // Handle SIGINFO first, because we haven't finished constructing yet.
+  printForSigInfoIfNeeded();
   // Link ourselves.
   NextEntry = PrettyStackTraceHead;
   PrettyStackTraceHead = this;
@@ -140,10 +174,12 @@ PrettyStackTraceEntry::PrettyStackTraceEntry() {
 }
 
 PrettyStackTraceEntry::~PrettyStackTraceEntry() {
-#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
+#if ENABLE_BACKTRACES
   assert(PrettyStackTraceHead == this &&
          "Pretty stack trace entry destruction is out of order");
   PrettyStackTraceHead = NextEntry;
+  // Handle SIGINFO first, because we already started destructing.
+  printForSigInfoIfNeeded();
 #endif
 }
 
@@ -175,7 +211,7 @@ void PrettyStackTraceProgram::print(raw_ostream &OS) const {
   OS << '\n';
 }
 
-#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
+#if ENABLE_BACKTRACES
 static bool RegisterCrashPrinter() {
   sys::AddSignalHandler(CrashHandler, nullptr);
   return false;
@@ -183,15 +219,37 @@ static bool RegisterCrashPrinter() {
 #endif
 
 void llvm::EnablePrettyStackTrace() {
-#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
+#if ENABLE_BACKTRACES
   // The first time this is called, we register the crash printer.
   static bool HandlerRegistered = RegisterCrashPrinter();
   (void)HandlerRegistered;
 #endif
 }
 
+void llvm::EnablePrettyStackTraceOnSigInfoForThisThread(bool ShouldEnable) {
+#if ENABLE_BACKTRACES
+  if (!ShouldEnable) {
+    ThreadLocalSigInfoGenerationCounter = 0;
+    return;
+  }
+
+  // The first time this is called, we register the SIGINFO handler.
+  static bool HandlerRegistered = []{
+    sys::SetInfoSignalFunction([]{
+      GlobalSigInfoGenerationCounter.fetch_add(1, std::memory_order_relaxed);
+    });
+    return false;
+  }();
+  (void)HandlerRegistered;
+
+  // Next, enable it for the current thread.
+  ThreadLocalSigInfoGenerationCounter =
+      GlobalSigInfoGenerationCounter.load(std::memory_order_relaxed);
+#endif
+}
+
 const void *llvm::SavePrettyStackState() {
-#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
+#if ENABLE_BACKTRACES
   return PrettyStackTraceHead;
 #else
   return nullptr;
@@ -199,7 +257,7 @@ const void *llvm::SavePrettyStackState() {
 }
 
 void llvm::RestorePrettyStackState(const void *Top) {
-#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
+#if ENABLE_BACKTRACES
   PrettyStackTraceHead =
       static_cast<PrettyStackTraceEntry *>(const_cast<void *>(Top));
 #endif
diff --git a/lib/Support/Process.cpp b/lib/Support/Process.cpp
index f32355aefbb7..5b6471008159 100644
--- a/lib/Support/Process.cpp
+++ b/lib/Support/Process.cpp
@@ -1,9 +1,8 @@
 //===-- Process.cpp - Implement OS Process Concept --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp
index 63cdcdaabee9..0a9363c59fc6 100644
--- a/lib/Support/Program.cpp
+++ b/lib/Support/Program.cpp
@@ -1,9 +1,8 @@
 //===-- Program.cpp - Implement OS Program Concept --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/RWMutex.cpp b/lib/Support/RWMutex.cpp
index 8b6d74e49f31..7ce856b716c6 100644
--- a/lib/Support/RWMutex.cpp
+++ b/lib/Support/RWMutex.cpp
@@ -1,9 +1,8 @@
 //===- RWMutex.cpp - Reader/Writer Mutual Exclusion Lock --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/RandomNumberGenerator.cpp b/lib/Support/RandomNumberGenerator.cpp
index df0d87fab021..09fad1979985 100644
--- a/lib/Support/RandomNumberGenerator.cpp
+++ b/lib/Support/RandomNumberGenerator.cpp
@@ -1,9 +1,8 @@
 //===-- RandomNumberGenerator.cpp - Implement RNG class -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -27,13 +26,9 @@ using namespace llvm;
 
 #define DEBUG_TYPE "rng"
 
-// Tracking BUG: 19665
-// http://llvm.org/bugs/show_bug.cgi?id=19665
-//
-// Do not change to cl::opt<uint64_t> since this silently breaks argument parsing.
-static cl::opt<unsigned long long>
-    Seed("rng-seed", cl::value_desc("seed"), cl::Hidden,
-         cl::desc("Seed for the random number generator"), cl::init(0));
+static cl::opt<uint64_t> Seed("rng-seed", cl::value_desc("seed"), cl::Hidden,
+                              cl::desc("Seed for the random number generator"),
+                              cl::init(0));
 
 RandomNumberGenerator::RandomNumberGenerator(StringRef Salt) {
   LLVM_DEBUG(if (Seed == 0) dbgs()
diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp
index 48caab131526..4c1b07038024 100644
--- a/lib/Support/Regex.cpp
+++ b/lib/Support/Regex.cpp
@@ -1,9 +1,8 @@
 //===-- Regex.cpp - Regular Expression matcher implementation -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/SHA1.cpp b/lib/Support/SHA1.cpp
index 3007a78d5e22..47a5f07fbe7b 100644
--- a/lib/Support/SHA1.cpp
+++ b/lib/Support/SHA1.cpp
@@ -1,9 +1,8 @@
 //====- SHA1.cpp - Private copy of the SHA1 implementation ---*- C++ -* ======//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/ScaledNumber.cpp b/lib/Support/ScaledNumber.cpp
index 807c9fa521de..54d4cc33410b 100644
--- a/lib/Support/ScaledNumber.cpp
+++ b/lib/Support/ScaledNumber.cpp
@@ -1,9 +1,8 @@
 //==- lib/Support/ScaledNumber.cpp - Support for scaled numbers -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Signals.cpp b/lib/Support/Signals.cpp
index 333f492d4589..173a07f009d2 100644
--- a/lib/Support/Signals.cpp
+++ b/lib/Support/Signals.cpp
@@ -1,9 +1,8 @@
 //===- Signals.cpp - Signal Handling support --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -132,8 +131,8 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
   // If we don't know argv0 or the address of main() at this point, try
   // to guess it anyway (it's possible on some platforms).
   std::string MainExecutableName =
-      Argv0.empty() ? sys::fs::getMainExecutable(nullptr, nullptr)
-                    : (std::string)Argv0;
+      sys::fs::exists(Argv0) ? (std::string)Argv0
+                             : sys::fs::getMainExecutable(nullptr, nullptr);
   BumpPtrAllocator Allocator;
   StringSaver StrPool(Allocator);
   std::vector<const char *> Modules(Depth, nullptr);
diff --git a/lib/Support/Signposts.cpp b/lib/Support/Signposts.cpp
new file mode 100644
index 000000000000..d456f41d2fa6
--- /dev/null
+++ b/lib/Support/Signposts.cpp
@@ -0,0 +1,119 @@
+//===-- Signposts.cpp - Interval debug annotations ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Signposts.h"
+#include "llvm/Support/Timer.h"
+
+#include "llvm/Config/config.h"
+#if LLVM_SUPPORT_XCODE_SIGNPOSTS
+#include "llvm/ADT/DenseMap.h"
+#include <os/signpost.h>
+#endif // if LLVM_SUPPORT_XCODE_SIGNPOSTS
+
+using namespace llvm;
+
+#if LLVM_SUPPORT_XCODE_SIGNPOSTS
+namespace {
+os_log_t *LogCreator() {
+  os_log_t *X = new os_log_t;
+  *X = os_log_create("org.llvm.signposts", OS_LOG_CATEGORY_POINTS_OF_INTEREST);
+  return X;
+}
+void LogDeleter(os_log_t *X) {
+  os_release(*X);
+  delete X;
+}
+} // end anonymous namespace
+
+namespace llvm {
+class SignpostEmitterImpl {
+  using LogPtrTy =
+      std::unique_ptr<os_log_t, std::function<void(os_log_t *)>>;
+  using LogTy = LogPtrTy::element_type;
+
+  LogPtrTy SignpostLog;
+  DenseMap<const Timer *, os_signpost_id_t> Signposts;
+
+  LogTy &getLogger() const { return *SignpostLog; }
+  os_signpost_id_t getSignpostForTimer(const Timer *T) {
+    const auto &I = Signposts.find(T);
+    if (I != Signposts.end())
+      return I->second;
+
+    const auto &Inserted = Signposts.insert(
+        std::make_pair(T, os_signpost_id_make_with_pointer(getLogger(), T)));
+    return Inserted.first->second;
+  }
+
+public:
+  SignpostEmitterImpl() : SignpostLog(LogCreator(), LogDeleter), Signposts() {}
+
+  bool isEnabled() const { return os_signpost_enabled(*SignpostLog); }
+
+  void startTimerInterval(Timer *T) {
+    if (isEnabled()) {
+      // Both strings used here are required to be constant literal strings
+      os_signpost_interval_begin(getLogger(), getSignpostForTimer(T),
+                                 "Pass Timers", "Begin %s",
+                                 T->getName().c_str());
+    }
+  }
+
+  void endTimerInterval(Timer *T) {
+    if (isEnabled()) {
+      // Both strings used here are required to be constant literal strings
+      os_signpost_interval_end(getLogger(), getSignpostForTimer(T),
+                               "Pass Timers", "End %s", T->getName().c_str());
+    }
+  }
+};
+} // end namespace llvm
+#endif // if LLVM_SUPPORT_XCODE_SIGNPOSTS
+
+#if LLVM_SUPPORT_XCODE_SIGNPOSTS
+#define HAVE_ANY_SIGNPOST_IMPL 1
+#endif
+
+SignpostEmitter::SignpostEmitter() {
+#if HAVE_ANY_SIGNPOST_IMPL
+  Impl = new SignpostEmitterImpl();
+#else // if HAVE_ANY_SIGNPOST_IMPL
+  Impl = nullptr;
+#endif // if !HAVE_ANY_SIGNPOST_IMPL
+}
+
+SignpostEmitter::~SignpostEmitter() {
+#if HAVE_ANY_SIGNPOST_IMPL
+  delete Impl;
+#endif // if HAVE_ANY_SIGNPOST_IMPL
+}
+
+bool SignpostEmitter::isEnabled() const {
+#if HAVE_ANY_SIGNPOST_IMPL
+  return Impl->isEnabled();
+#else
+  return false;
+#endif // if !HAVE_ANY_SIGNPOST_IMPL
+}
+
+void SignpostEmitter::startTimerInterval(Timer *T) {
+#if HAVE_ANY_SIGNPOST_IMPL
+  if (Impl == nullptr)
+    return;
+  return Impl->startTimerInterval(T);
+#endif // if !HAVE_ANY_SIGNPOST_IMPL
+}
+
+void SignpostEmitter::endTimerInterval(Timer *T) {
+#if HAVE_ANY_SIGNPOST_IMPL
+  if (Impl == nullptr)
+    return;
+  Impl->endTimerInterval(T);
+#endif // if !HAVE_ANY_SIGNPOST_IMPL
+}
diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp
index fed4a17d6635..f60464c8e756 100644
--- a/lib/Support/SmallPtrSet.cpp
+++ b/lib/Support/SmallPtrSet.cpp
@@ -1,9 +1,8 @@
 //===- llvm/ADT/SmallPtrSet.cpp - 'Normally small' pointer set ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/SmallVector.cpp b/lib/Support/SmallVector.cpp
index 1070c6672edc..36f0a81f6b00 100644
--- a/lib/Support/SmallVector.cpp
+++ b/lib/Support/SmallVector.cpp
@@ -1,9 +1,8 @@
 //===- llvm/ADT/SmallVector.cpp - 'Normally small' vectors ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index a55ad881d012..2a241f18c362 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -1,9 +1,8 @@
 //===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -96,14 +95,9 @@ unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const {
   assert(PtrDiff >= 0 && static_cast<size_t>(PtrDiff) <= std::numeric_limits<T>::max());
   T PtrOffset = static_cast<T>(PtrDiff);
 
-  // std::lower_bound returns the first EOL offset that's not-less-than
-  // PtrOffset, meaning the EOL that _ends the line_ that PtrOffset is on
-  // (including if PtrOffset refers to the EOL itself). If there's no such
-  // EOL, returns end().
-  auto EOL = std::lower_bound(Offsets->begin(), Offsets->end(), PtrOffset);
-
-  // Lines count from 1, so add 1 to the distance from the 0th line.
-  return (1 + (EOL - Offsets->begin()));
+  // llvm::lower_bound gives the number of EOL before PtrOffset. Add 1 to get
+  // the line number.
+  return llvm::lower_bound(*Offsets, PtrOffset) - Offsets->begin() + 1;
 }
 
 SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other)
diff --git a/lib/Support/SpecialCaseList.cpp b/lib/Support/SpecialCaseList.cpp
index bf807e66e02c..96e09f9552bb 100644
--- a/lib/Support/SpecialCaseList.cpp
+++ b/lib/Support/SpecialCaseList.cpp
@@ -1,9 +1,8 @@
 //===-- SpecialCaseList.cpp - special case list for sanitizers ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index d57300a75d1d..e4f0535d21aa 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -1,9 +1,8 @@
 //===-- Statistic.cpp - Easy way to expose stats information --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -136,8 +135,7 @@ bool llvm::AreStatisticsEnabled() {
 }
 
 void StatisticInfo::sort() {
-  std::stable_sort(Stats.begin(), Stats.end(),
-                   [](const Statistic *LHS, const Statistic *RHS) {
+  llvm::stable_sort(Stats, [](const Statistic *LHS, const Statistic *RHS) {
     if (int Cmp = std::strcmp(LHS->getDebugType(), RHS->getDebugType()))
       return Cmp < 0;
 
diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp
index 386d74a47983..bf28b2be5657 100644
--- a/lib/Support/StringExtras.cpp
+++ b/lib/Support/StringExtras.cpp
@@ -1,9 +1,8 @@
 //===-- StringExtras.cpp - Implement the StringExtras header --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp
index c1f707ce50a5..6b5ea020dd46 100644
--- a/lib/Support/StringMap.cpp
+++ b/lib/Support/StringMap.cpp
@@ -1,9 +1,8 @@
 //===--- StringMap.cpp - String Hash table map implementation -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/StringPool.cpp b/lib/Support/StringPool.cpp
index c591857c415d..82351017b8cc 100644
--- a/lib/Support/StringPool.cpp
+++ b/lib/Support/StringPool.cpp
@@ -1,9 +1,8 @@
 //===-- StringPool.cpp - Interned string pool -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index f0349260e22f..4bafc4ec7181 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -1,9 +1,8 @@
 //===-- StringRef.cpp - Lightweight String References ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/StringSaver.cpp b/lib/Support/StringSaver.cpp
index bf0ac8de9821..f7ccfb97ea79 100644
--- a/lib/Support/StringSaver.cpp
+++ b/lib/Support/StringSaver.cpp
@@ -1,9 +1,8 @@
 //===-- StringSaver.cpp ---------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/SymbolRemappingReader.cpp b/lib/Support/SymbolRemappingReader.cpp
index 264c890ce8f1..1caf0947216e 100644
--- a/lib/Support/SymbolRemappingReader.cpp
+++ b/lib/Support/SymbolRemappingReader.cpp
@@ -1,9 +1,8 @@
 //===- SymbolRemappingReader.cpp - Read symbol remapping file -------------===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/SystemUtils.cpp b/lib/Support/SystemUtils.cpp
index 7fa6ae3f6199..47e0c72ec7c1 100644
--- a/lib/Support/SystemUtils.cpp
+++ b/lib/Support/SystemUtils.cpp
@@ -1,9 +1,8 @@
 //===- SystemUtils.cpp - Utilities for low-level system tasks -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/TarWriter.cpp b/lib/Support/TarWriter.cpp
index 5b4d554befe4..6136e9219767 100644
--- a/lib/Support/TarWriter.cpp
+++ b/lib/Support/TarWriter.cpp
@@ -1,9 +1,8 @@
 //===-- TarWriter.cpp - Tar archive file creator --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/TargetParser.cpp b/lib/Support/TargetParser.cpp
index bdc0dc52c5e2..d213b9a8c6af 100644
--- a/lib/Support/TargetParser.cpp
+++ b/lib/Support/TargetParser.cpp
@@ -1,9 +1,8 @@
 //===-- TargetParser - Parser for target features ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -63,7 +62,7 @@ constexpr GPUInfo R600GPUs[26] = {
 
 // This table should be sorted by the value of GPUKind
 // Don't bother listing the implicitly true features
-constexpr GPUInfo AMDGCNGPUs[33] = {
+constexpr GPUInfo AMDGCNGPUs[37] = {
   // Name         Canonical    Kind        Features
   //              Name
   {{"gfx600"},    {"gfx600"},  GK_GFX600,  FEATURE_FAST_FMA_F32},
@@ -98,7 +97,11 @@ constexpr GPUInfo AMDGCNGPUs[33] = {
   {{"gfx902"},    {"gfx902"},  GK_GFX902,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
   {{"gfx904"},    {"gfx904"},  GK_GFX904,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
   {{"gfx906"},    {"gfx906"},  GK_GFX906,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+  {{"gfx908"},    {"gfx908"},  GK_GFX908,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
   {{"gfx909"},    {"gfx909"},  GK_GFX909,  FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+  {{"gfx1010"},   {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+  {{"gfx1011"},   {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+  {{"gfx1012"},   {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
 };
 
 const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
@@ -170,30 +173,36 @@ void AMDGPU::fillValidArchListR600(SmallVectorImpl<StringRef> &Values) {
 }
 
 AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
-  if (GPU == "generic")
-    return {7, 0, 0};
-
   AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
-  if (AK == AMDGPU::GPUKind::GK_NONE)
+  if (AK == AMDGPU::GPUKind::GK_NONE) {
+    if (GPU == "generic-hsa")
+      return {7, 0, 0};
+    if (GPU == "generic")
+      return {6, 0, 0};
     return {0, 0, 0};
+  }
 
   switch (AK) {
-  case GK_GFX600: return {6, 0, 0};
-  case GK_GFX601: return {6, 0, 1};
-  case GK_GFX700: return {7, 0, 0};
-  case GK_GFX701: return {7, 0, 1};
-  case GK_GFX702: return {7, 0, 2};
-  case GK_GFX703: return {7, 0, 3};
-  case GK_GFX704: return {7, 0, 4};
-  case GK_GFX801: return {8, 0, 1};
-  case GK_GFX802: return {8, 0, 2};
-  case GK_GFX803: return {8, 0, 3};
-  case GK_GFX810: return {8, 1, 0};
-  case GK_GFX900: return {9, 0, 0};
-  case GK_GFX902: return {9, 0, 2};
-  case GK_GFX904: return {9, 0, 4};
-  case GK_GFX906: return {9, 0, 6};
-  case GK_GFX909: return {9, 0, 9};
-  default:        return {0, 0, 0};
+  case GK_GFX600:  return {6, 0, 0};
+  case GK_GFX601:  return {6, 0, 1};
+  case GK_GFX700:  return {7, 0, 0};
+  case GK_GFX701:  return {7, 0, 1};
+  case GK_GFX702:  return {7, 0, 2};
+  case GK_GFX703:  return {7, 0, 3};
+  case GK_GFX704:  return {7, 0, 4};
+  case GK_GFX801:  return {8, 0, 1};
+  case GK_GFX802:  return {8, 0, 2};
+  case GK_GFX803:  return {8, 0, 3};
+  case GK_GFX810:  return {8, 1, 0};
+  case GK_GFX900:  return {9, 0, 0};
+  case GK_GFX902:  return {9, 0, 2};
+  case GK_GFX904:  return {9, 0, 4};
+  case GK_GFX906:  return {9, 0, 6};
+  case GK_GFX908:  return {9, 0, 8};
+  case GK_GFX909:  return {9, 0, 9};
+  case GK_GFX1010: return {10, 1, 0};
+  case GK_GFX1011: return {10, 1, 1};
+  case GK_GFX1012: return {10, 1, 2};
+  default:         return {0, 0, 0};
   }
 }
diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp
index bb63891cd713..1f9c3bbf8229 100644
--- a/lib/Support/TargetRegistry.cpp
+++ b/lib/Support/TargetRegistry.cpp
@@ -1,9 +1,8 @@
 //===--- TargetRegistry.cpp - Target registration -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/ThreadLocal.cpp b/lib/Support/ThreadLocal.cpp
index f6e4a652302c..44e6223cf17b 100644
--- a/lib/Support/ThreadLocal.cpp
+++ b/lib/Support/ThreadLocal.cpp
@@ -1,9 +1,8 @@
 //===- ThreadLocal.cpp - Thread Local Data ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/ThreadPool.cpp b/lib/Support/ThreadPool.cpp
index d0212ca13467..40982d777914 100644
--- a/lib/Support/ThreadPool.cpp
+++ b/lib/Support/ThreadPool.cpp
@@ -1,9 +1,8 @@
 //==-- llvm/Support/ThreadPool.cpp - A ThreadPool implementation -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp
index fcb1030e1ab4..e5899a60f4db 100644
--- a/lib/Support/Threading.cpp
+++ b/lib/Support/Threading.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/Support/Threading.cpp- Control multithreading mode --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/TimeProfiler.cpp b/lib/Support/TimeProfiler.cpp
new file mode 100644
index 000000000000..bc2340815645
--- /dev/null
+++ b/lib/Support/TimeProfiler.cpp
@@ -0,0 +1,199 @@
+//===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hierarchical time profiler.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/JSON.h"
+#include <cassert>
+#include <chrono>
+#include <string>
+#include <vector>
+
+using namespace std::chrono;
+
+namespace llvm {
+
+static cl::opt<unsigned> TimeTraceGranularity(
+    "time-trace-granularity",
+    cl::desc(
+        "Minimum time granularity (in microseconds) traced by time profiler"),
+    cl::init(500));
+
+TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
+
+typedef duration<steady_clock::rep, steady_clock::period> DurationType;
+typedef std::pair<size_t, DurationType> CountAndDurationType;
+typedef std::pair<std::string, CountAndDurationType>
+    NameAndCountAndDurationType;
+
+struct Entry {
+  time_point<steady_clock> Start;
+  DurationType Duration;
+  std::string Name;
+  std::string Detail;
+
+  Entry(time_point<steady_clock> &&S, DurationType &&D, std::string &&N,
+        std::string &&Dt)
+      : Start(std::move(S)), Duration(std::move(D)), Name(std::move(N)),
+        Detail(std::move(Dt)){};
+};
+
+struct TimeTraceProfiler {
+  TimeTraceProfiler() {
+    StartTime = steady_clock::now();
+  }
+
+  void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
+    Stack.emplace_back(steady_clock::now(), DurationType{}, std::move(Name),
+                       Detail());
+  }
+
+  void end() {
+    assert(!Stack.empty() && "Must call begin() first");
+    auto &E = Stack.back();
+    E.Duration = steady_clock::now() - E.Start;
+
+    // Only include sections longer than TimeTraceGranularity msec.
+    if (duration_cast<microseconds>(E.Duration).count() > TimeTraceGranularity)
+      Entries.emplace_back(E);
+
+    // Track total time taken by each "name", but only the topmost levels of
+    // them; e.g. if there's a template instantiation that instantiates other
+    // templates from within, we only want to add the topmost one. "topmost"
+    // happens to be the ones that don't have any currently open entries above
+    // itself.
+    if (std::find_if(++Stack.rbegin(), Stack.rend(), [&](const Entry &Val) {
+          return Val.Name == E.Name;
+        }) == Stack.rend()) {
+      auto &CountAndTotal = CountAndTotalPerName[E.Name];
+      CountAndTotal.first++;
+      CountAndTotal.second += E.Duration;
+    }
+
+    Stack.pop_back();
+  }
+
+  void Write(raw_pwrite_stream &OS) {
+    assert(Stack.empty() &&
+           "All profiler sections should be ended when calling Write");
+    json::OStream J(OS);
+    J.objectBegin();
+    J.attributeBegin("traceEvents");
+    J.arrayBegin();
+
+    // Emit all events for the main flame graph.
+    for (const auto &E : Entries) {
+      auto StartUs = duration_cast<microseconds>(E.Start - StartTime).count();
+      auto DurUs = duration_cast<microseconds>(E.Duration).count();
+
+      J.object([&]{
+        J.attribute("pid", 1);
+        J.attribute("tid", 0);
+        J.attribute("ph", "X");
+        J.attribute("ts", StartUs);
+        J.attribute("dur", DurUs);
+        J.attribute("name", E.Name);
+        J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
+      });
+    }
+
+    // Emit totals by section name as additional "thread" events, sorted from
+    // longest one.
+    int Tid = 1;
+    std::vector<NameAndCountAndDurationType> SortedTotals;
+    SortedTotals.reserve(CountAndTotalPerName.size());
+    for (const auto &E : CountAndTotalPerName)
+      SortedTotals.emplace_back(E.getKey(), E.getValue());
+
+    llvm::sort(SortedTotals.begin(), SortedTotals.end(),
+               [](const NameAndCountAndDurationType &A,
+                  const NameAndCountAndDurationType &B) {
+                 return A.second.second > B.second.second;
+               });
+    for (const auto &E : SortedTotals) {
+      auto DurUs = duration_cast<microseconds>(E.second.second).count();
+      auto Count = CountAndTotalPerName[E.first].first;
+
+      J.object([&]{
+        J.attribute("pid", 1);
+        J.attribute("tid", Tid);
+        J.attribute("ph", "X");
+        J.attribute("ts", 0);
+        J.attribute("dur", DurUs);
+        J.attribute("name", "Total " + E.first);
+        J.attributeObject("args", [&] {
+          J.attribute("count", int64_t(Count));
+          J.attribute("avg ms", int64_t(DurUs / Count / 1000));
+        });
+      });
+
+      ++Tid;
+    }
+
+    // Emit metadata event with process name.
+    J.object([&] {
+      J.attribute("cat", "");
+      J.attribute("pid", 1);
+      J.attribute("tid", 0);
+      J.attribute("ts", 0);
+      J.attribute("ph", "M");
+      J.attribute("name", "process_name");
+      J.attributeObject("args", [&] { J.attribute("name", "clang"); });
+    });
+
+    J.arrayEnd();
+    J.attributeEnd();
+    J.objectEnd();
+  }
+
+  SmallVector<Entry, 16> Stack;
+  SmallVector<Entry, 128> Entries;
+  StringMap<CountAndDurationType> CountAndTotalPerName;
+  time_point<steady_clock> StartTime;
+};
+
+void timeTraceProfilerInitialize() {
+  assert(TimeTraceProfilerInstance == nullptr &&
+         "Profiler should not be initialized");
+  TimeTraceProfilerInstance = new TimeTraceProfiler();
+}
+
+void timeTraceProfilerCleanup() {
+  delete TimeTraceProfilerInstance;
+  TimeTraceProfilerInstance = nullptr;
+}
+
+void timeTraceProfilerWrite(raw_pwrite_stream &OS) {
+  assert(TimeTraceProfilerInstance != nullptr &&
+         "Profiler object can't be null");
+  TimeTraceProfilerInstance->Write(OS);
+}
+
+void timeTraceProfilerBegin(StringRef Name, StringRef Detail) {
+  if (TimeTraceProfilerInstance != nullptr)
+    TimeTraceProfilerInstance->begin(Name, [&]() { return Detail; });
+}
+
+void timeTraceProfilerBegin(StringRef Name,
+                            llvm::function_ref<std::string()> Detail) {
+  if (TimeTraceProfilerInstance != nullptr)
+    TimeTraceProfilerInstance->begin(Name, Detail);
+}
+
+void timeTraceProfilerEnd() {
+  if (TimeTraceProfilerInstance != nullptr)
+    TimeTraceProfilerInstance->end();
+}
+
+} // namespace llvm
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 82f5810dd107..2a7ff1eaaf63 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -1,9 +1,8 @@
 //===-- Timer.cpp - Interval Timing Support -------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -20,6 +19,7 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/Process.h"
+#include "llvm/Support/Signposts.h"
 #include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/raw_ostream.h"
 #include <limits>
@@ -40,6 +40,9 @@ static std::string &getLibSupportInfoOutputFilename() {
 
 static ManagedStatic<sys::SmartMutex<true> > TimerLock;
 
+/// Allows llvm::Timer to emit signposts when supported.
+static ManagedStatic<SignpostEmitter> Signposts;
+
 namespace {
   static cl::opt<bool>
   TrackSpace("track-memory", cl::desc("Enable -time-passes memory "
@@ -134,6 +137,7 @@ TimeRecord TimeRecord::getCurrentTime(bool Start) {
 void Timer::startTimer() {
   assert(!Running && "Cannot start a running timer");
   Running = Triggered = true;
+  Signposts->startTimerInterval(this);
   StartTime = TimeRecord::getCurrentTime(true);
 }
 
@@ -142,6 +146,7 @@ void Timer::stopTimer() {
   Running = false;
   Time += TimeRecord::getCurrentTime(false);
   Time -= StartTime;
+  Signposts->endTimerInterval(this);
 }
 
 void Timer::clear() {
@@ -342,7 +347,7 @@ void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
   TimersToPrint.clear();
 }
 
-void TimerGroup::prepareToPrintList() {
+void TimerGroup::prepareToPrintList(bool ResetTime) {
   // See if any of our timers were started, if so add them to TimersToPrint.
   for (Timer *T = FirstTimer; T; T = T->Next) {
     if (!T->hasTriggered()) continue;
@@ -352,15 +357,20 @@ void TimerGroup::prepareToPrintList() {
 
     TimersToPrint.emplace_back(T->Time, T->Name, T->Description);
 
+    if (ResetTime)
+      T->clear();
+
     if (WasRunning)
       T->startTimer();
   }
 }
 
-void TimerGroup::print(raw_ostream &OS) {
-  sys::SmartScopedLock<true> L(*TimerLock);
-
-  prepareToPrintList();
+void TimerGroup::print(raw_ostream &OS, bool ResetAfterPrint) {
+  {
+    // After preparing the timers we can free the lock
+    sys::SmartScopedLock<true> L(*TimerLock);
+    prepareToPrintList(ResetAfterPrint);
+  }
 
   // If any timers were started, print the group.
   if (!TimersToPrint.empty())
@@ -400,7 +410,7 @@ void TimerGroup::printJSONValue(raw_ostream &OS, const PrintRecord &R,
 const char *TimerGroup::printJSONValues(raw_ostream &OS, const char *delim) {
   sys::SmartScopedLock<true> L(*TimerLock);
 
-  prepareToPrintList();
+  prepareToPrintList(false);
   for (const PrintRecord &R : TimersToPrint) {
     OS << delim;
     delim = ",\n";
diff --git a/lib/Support/ToolOutputFile.cpp b/lib/Support/ToolOutputFile.cpp
index e12d9e824f7e..ed3a247f0115 100644
--- a/lib/Support/ToolOutputFile.cpp
+++ b/lib/Support/ToolOutputFile.cpp
@@ -1,9 +1,8 @@
 //===--- ToolOutputFile.cpp - Implement the ToolOutputFile class --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/TrigramIndex.cpp b/lib/Support/TrigramIndex.cpp
index 721763c88525..94810b56db8e 100644
--- a/lib/Support/TrigramIndex.cpp
+++ b/lib/Support/TrigramIndex.cpp
@@ -1,9 +1,8 @@
 //===-- TrigramIndex.cpp - a heuristic for SpecialCaseList ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 26d9327f6208..d419463e6a5e 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -1,9 +1,8 @@
 //===--- Triple.cpp - Target triple helper class --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -23,6 +22,7 @@ StringRef Triple::getArchTypeName(ArchType Kind) {
 
   case aarch64:        return "aarch64";
   case aarch64_be:     return "aarch64_be";
+  case aarch64_32:     return "aarch64_32";
   case arm:            return "arm";
   case armeb:          return "armeb";
   case arc:            return "arc";
@@ -81,7 +81,8 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) {
     return StringRef();
 
   case aarch64:
-  case aarch64_be:  return "aarch64";
+  case aarch64_be:
+  case aarch64_32:  return "aarch64";
 
   case arc:         return "arc";
 
@@ -209,6 +210,7 @@ StringRef Triple::getOSTypeName(OSType Kind) {
   case HermitCore: return "hermit";
   case Hurd: return "hurd";
   case WASI: return "wasi";
+  case Emscripten: return "emscripten";
   }
 
   llvm_unreachable("Invalid OSType");
@@ -226,6 +228,8 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) {
   case CODE16: return "code16";
   case EABI: return "eabi";
   case EABIHF: return "eabihf";
+  case ELFv1: return "elfv1";
+  case ELFv2: return "elfv2";
   case Android: return "android";
   case Musl: return "musl";
   case MuslEABI: return "musleabi";
@@ -235,6 +239,7 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) {
   case Cygnus: return "cygnus";
   case CoreCLR: return "coreclr";
   case Simulator: return "simulator";
+  case MacABI: return "macabi";
   }
 
   llvm_unreachable("Invalid EnvironmentType!");
@@ -260,8 +265,10 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
   return StringSwitch<Triple::ArchType>(Name)
     .Case("aarch64", aarch64)
     .Case("aarch64_be", aarch64_be)
+    .Case("aarch64_32", aarch64_32)
     .Case("arc", arc)
     .Case("arm64", aarch64) // "arm64" is an alias for "aarch64"
+    .Case("arm64_32", aarch64_32)
     .Case("arm", arm)
     .Case("armeb", armeb)
     .Case("avr", avr)
@@ -389,8 +396,10 @@ static Triple::ArchType parseArch(StringRef ArchName) {
     .Case("xscaleeb", Triple::armeb)
     .Case("aarch64", Triple::aarch64)
     .Case("aarch64_be", Triple::aarch64_be)
+    .Case("aarch64_32", Triple::aarch64_32)
     .Case("arc", Triple::arc)
     .Case("arm64", Triple::aarch64)
+    .Case("arm64_32", Triple::aarch64_32)
     .Case("arm", Triple::arm)
     .Case("armeb", Triple::armeb)
     .Case("thumb", Triple::thumb)
@@ -507,6 +516,7 @@ static Triple::OSType parseOS(StringRef OSName) {
     .StartsWith("hermit", Triple::HermitCore)
     .StartsWith("hurd", Triple::Hurd)
     .StartsWith("wasi", Triple::WASI)
+    .StartsWith("emscripten", Triple::Emscripten)
     .Default(Triple::UnknownOS);
 }
 
@@ -514,6 +524,8 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
   return StringSwitch<Triple::EnvironmentType>(EnvironmentName)
     .StartsWith("eabihf", Triple::EABIHF)
     .StartsWith("eabi", Triple::EABI)
+    .StartsWith("elfv1", Triple::ELFv1)
+    .StartsWith("elfv2", Triple::ELFv2)
     .StartsWith("gnuabin32", Triple::GNUABIN32)
     .StartsWith("gnuabi64", Triple::GNUABI64)
     .StartsWith("gnueabihf", Triple::GNUEABIHF)
@@ -530,11 +542,15 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
     .StartsWith("cygnus", Triple::Cygnus)
     .StartsWith("coreclr", Triple::CoreCLR)
     .StartsWith("simulator", Triple::Simulator)
+    .StartsWith("macabi", Triple::MacABI)
     .Default(Triple::UnknownEnvironment);
 }
 
 static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) {
   return StringSwitch<Triple::ObjectFormatType>(EnvironmentName)
+    // "xcoff" must come before "coff" because of the order-dependendent
+    // pattern matching.
+    .EndsWith("xcoff", Triple::XCOFF)
     .EndsWith("coff", Triple::COFF)
     .EndsWith("elf", Triple::ELF)
     .EndsWith("macho", Triple::MachO)
@@ -611,6 +627,8 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
     return Triple::ARMSubArch_v8m_baseline;
   case ARM::ArchKind::ARMV8MMainline:
     return Triple::ARMSubArch_v8m_mainline;
+  case ARM::ArchKind::ARMV8_1MMainline:
+    return Triple::ARMSubArch_v8_1m_mainline;
   default:
     return Triple::NoSubArch;
   }
@@ -623,6 +641,7 @@ static StringRef getObjectFormatTypeName(Triple::ObjectFormatType Kind) {
   case Triple::ELF: return "elf";
   case Triple::MachO: return "macho";
   case Triple::Wasm: return "wasm";
+  case Triple::XCOFF: return "xcoff";
   }
   llvm_unreachable("unknown object format type");
 }
@@ -631,6 +650,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
   switch (T.getArch()) {
   case Triple::UnknownArch:
   case Triple::aarch64:
+  case Triple::aarch64_32:
   case Triple::arm:
   case Triple::thumb:
   case Triple::x86:
@@ -687,6 +707,8 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
   case Triple::ppc64:
     if (T.isOSDarwin())
       return Triple::MachO;
+    else if (T.isOSAIX())
+      return Triple::XCOFF;
     return Triple::ELF;
 
   case Triple::wasm32:
@@ -1212,6 +1234,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
   case llvm::Triple::msp430:
     return 16;
 
+  case llvm::Triple::aarch64_32:
   case llvm::Triple::arc:
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
@@ -1292,6 +1315,7 @@ Triple Triple::get32BitArchVariant() const {
     T.setArch(UnknownArch);
     break;
 
+  case Triple::aarch64_32:
   case Triple::amdil:
   case Triple::hsail:
   case Triple::spir:
@@ -1383,6 +1407,7 @@ Triple Triple::get64BitArchVariant() const {
     // Already 64-bit.
     break;
 
+  case Triple::aarch64_32:      T.setArch(Triple::aarch64);    break;
   case Triple::arm:             T.setArch(Triple::aarch64);    break;
   case Triple::armeb:           T.setArch(Triple::aarch64_be); break;
   case Triple::le32:            T.setArch(Triple::le64);       break;
@@ -1493,6 +1518,7 @@ Triple Triple::getLittleEndianArchVariant() const {
 bool Triple::isLittleEndian() const {
   switch (getArch()) {
   case Triple::aarch64:
+  case Triple::aarch64_32:
   case Triple::amdgcn:
   case Triple::amdil64:
   case Triple::amdil:
diff --git a/lib/Support/Twine.cpp b/lib/Support/Twine.cpp
index 4726c8ab7494..fbbcd8848f1c 100644
--- a/lib/Support/Twine.cpp
+++ b/lib/Support/Twine.cpp
@@ -1,9 +1,8 @@
 //===-- Twine.cpp - Fast Temporary String Concatenation -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/Unicode.cpp b/lib/Support/Unicode.cpp
index b719bd826dc1..4d195069682b 100644
--- a/lib/Support/Unicode.cpp
+++ b/lib/Support/Unicode.cpp
@@ -1,9 +1,8 @@
 //===- llvm/Support/Unicode.cpp - Unicode character properties  -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Unix/COM.inc b/lib/Support/Unix/COM.inc
index 5b71de74ebf3..03a690ac3766 100644
--- a/lib/Support/Unix/COM.inc
+++ b/lib/Support/Unix/COM.inc
@@ -1,9 +1,8 @@
 //===- llvm/Support/Unix/COM.inc - Unix COM Implementation -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Unix/DynamicLibrary.inc b/lib/Support/Unix/DynamicLibrary.inc
index 029451f347e8..a2a379963de0 100644
--- a/lib/Support/Unix/DynamicLibrary.inc
+++ b/lib/Support/Unix/DynamicLibrary.inc
@@ -1,9 +1,8 @@
 //===- Unix/DynamicLibrary.cpp - Unix DL Implementation ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc
index b65f84bf4444..17d78dc18be7 100644
--- a/lib/Support/Unix/Host.inc
+++ b/lib/Support/Unix/Host.inc
@@ -1,9 +1,8 @@
 //===- llvm/Support/Unix/Host.inc -------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -50,6 +49,23 @@ static std::string updateTripleOSVersion(std::string TargetTripleString) {
     TargetTripleString += "-darwin";
     TargetTripleString += getOSVersion();
   }
+  // On AIX, the AIX version and release should be that of the current host
+  // unless if the version has already been specified.
+  if (Triple(LLVM_HOST_TRIPLE).getOS() == Triple::AIX) {
+    Triple TT(TargetTripleString);
+    if (TT.getOS() == Triple::AIX && !TT.getOSMajorVersion()) {
+      struct utsname name;
+      if (uname(&name) != -1) {
+        std::string NewOSName = Triple::getOSTypeName(Triple::AIX);
+        NewOSName += name.version;
+        NewOSName += '.';
+        NewOSName += name.release;
+        NewOSName += ".0.0";
+        TT.setOSName(NewOSName);
+        return TT.str();
+      }
+    }
+  }
   return TargetTripleString;
 }
 
diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc
index adbfff2f59a5..a0927da50e48 100644
--- a/lib/Support/Unix/Memory.inc
+++ b/lib/Support/Unix/Memory.inc
@@ -1,9 +1,8 @@
 //===- Unix/Memory.cpp - Generic UNIX System Configuration ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -46,7 +45,7 @@ extern "C" void __clear_cache(void *, void*);
 namespace {
 
 int getPosixProtectionFlags(unsigned Flags) {
-  switch (Flags) {
+  switch (Flags & llvm::sys::Memory::MF_RWE_MASK) {
   case llvm::sys::Memory::MF_READ:
     return PROT_READ;
   case llvm::sys::Memory::MF_WRITE:
@@ -59,14 +58,13 @@ int getPosixProtectionFlags(unsigned Flags) {
       llvm::sys::Memory::MF_EXEC:
     return PROT_READ | PROT_WRITE | PROT_EXEC;
   case llvm::sys::Memory::MF_EXEC:
-#if defined(__FreeBSD__)
+#if (defined(__FreeBSD__) || defined(__POWERPC__) || defined (__ppc__) || \
+     defined(_POWER) || defined(_ARCH_PPC))
     // On PowerPC, having an executable page that has no read permission
     // can have unintended consequences.  The function InvalidateInstruction-
     // Cache uses instructions dcbf and icbi, both of which are treated by
     // the processor as loads.  If the page has no read permissions,
     // executing these instructions will result in a segmentation fault.
-    // Somehow, this problem is not present on Linux, but it does happen
-    // on FreeBSD.
     return PROT_READ | PROT_EXEC;
 #else
     return PROT_EXEC;
@@ -92,19 +90,24 @@ Memory::allocateMappedMemory(size_t NumBytes,
   if (NumBytes == 0)
     return MemoryBlock();
 
-  static const size_t PageSize = Process::getPageSize();
-  const size_t NumPages = (NumBytes+PageSize-1)/PageSize;
-
-  int fd = -1;
-
-  int MMFlags = MAP_PRIVATE |
-#ifdef MAP_ANONYMOUS
-  MAP_ANONYMOUS
+  // On platforms that have it, we can use MAP_ANON to get a memory-mapped
+  // page without file backing, but we need a fallback of opening /dev/zero
+  // for strictly POSIX platforms instead.
+  int fd;
+#if defined(MAP_ANON)
+  fd = -1;
 #else
-  MAP_ANON
+  fd = open("/dev/zero", O_RDWR);
+  if (fd == -1) {
+    EC = std::error_code(errno, std::generic_category());
+    return MemoryBlock();
+  }
 #endif
-  ; // Ends statement above
 
+  int MMFlags = MAP_PRIVATE;
+#if defined(MAP_ANON)
+  MMFlags |= MAP_ANON;
+#endif
   int Protect = getPosixProtectionFlags(PFlags);
 
 #if defined(__NetBSD__) && defined(PROT_MPROTECT)
@@ -113,23 +116,39 @@ Memory::allocateMappedMemory(size_t NumBytes,
 
   // Use any near hint and the page size to set a page-aligned starting address
   uintptr_t Start = NearBlock ? reinterpret_cast<uintptr_t>(NearBlock->base()) +
-                                      NearBlock->size() : 0;
+                                      NearBlock->allocatedSize() : 0;
+  static const size_t PageSize = Process::getPageSizeEstimate();
+  const size_t NumPages = (NumBytes+PageSize-1)/PageSize;
+
   if (Start && Start % PageSize)
     Start += PageSize - Start % PageSize;
 
-  void *Addr = ::mmap(reinterpret_cast<void*>(Start), PageSize*NumPages,
-                      Protect, MMFlags, fd, 0);
+  // FIXME: Handle huge page requests (MF_HUGE_HINT).
+  void *Addr = ::mmap(reinterpret_cast<void *>(Start), PageSize*NumPages, Protect,
+                      MMFlags, fd, 0);
   if (Addr == MAP_FAILED) {
-    if (NearBlock) //Try again without a near hint
+    if (NearBlock) { //Try again without a near hint
+#if !defined(MAP_ANON)
+      close(fd);
+#endif
       return allocateMappedMemory(NumBytes, nullptr, PFlags, EC);
+    }
 
     EC = std::error_code(errno, std::generic_category());
+#if !defined(MAP_ANON)
+    close(fd);
+#endif
     return MemoryBlock();
   }
 
+#if !defined(MAP_ANON)
+  close(fd);
+#endif
+
   MemoryBlock Result;
   Result.Address = Addr;
-  Result.Size = NumPages*PageSize;
+  Result.AllocatedSize = PageSize*NumPages;
+  Result.Flags = PFlags;
 
   // Rely on protectMappedMemory to invalidate instruction cache.
   if (PFlags & MF_EXEC) {
@@ -143,22 +162,22 @@ Memory::allocateMappedMemory(size_t NumBytes,
 
 std::error_code
 Memory::releaseMappedMemory(MemoryBlock &M) {
-  if (M.Address == nullptr || M.Size == 0)
+  if (M.Address == nullptr || M.AllocatedSize == 0)
     return std::error_code();
 
-  if (0 != ::munmap(M.Address, M.Size))
+  if (0 != ::munmap(M.Address, M.AllocatedSize))
     return std::error_code(errno, std::generic_category());
 
   M.Address = nullptr;
-  M.Size = 0;
+  M.AllocatedSize = 0;
 
   return std::error_code();
 }
 
 std::error_code
 Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) {
-  static const size_t PageSize = Process::getPageSize();
-  if (M.Address == nullptr || M.Size == 0)
+  static const size_t PageSize = Process::getPageSizeEstimate();
+  if (M.Address == nullptr || M.AllocatedSize == 0)
     return std::error_code();
 
   if (!Flags)
@@ -166,7 +185,7 @@ Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) {
 
   int Protect = getPosixProtectionFlags(Flags);
   uintptr_t Start = alignAddr((uint8_t *)M.Address - PageSize + 1, PageSize);
-  uintptr_t End = alignAddr((uint8_t *)M.Address + M.Size, PageSize);
+  uintptr_t End = alignAddr((uint8_t *)M.Address + M.AllocatedSize, PageSize);
 
   bool InvalidateCache = (Flags & MF_EXEC);
 
@@ -179,7 +198,7 @@ Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) {
     if (Result != 0)
       return std::error_code(errno, std::generic_category());
 
-    Memory::InvalidateInstructionCache(M.Address, M.Size);
+    Memory::InvalidateInstructionCache(M.Address, M.AllocatedSize);
     InvalidateCache = false;
   }
 #endif
@@ -190,7 +209,7 @@ Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) {
     return std::error_code(errno, std::generic_category());
 
   if (InvalidateCache)
-    Memory::InvalidateInstructionCache(M.Address, M.Size);
+    Memory::InvalidateInstructionCache(M.Address, M.AllocatedSize);
 
   return std::error_code();
 }
diff --git a/lib/Support/Unix/Mutex.inc b/lib/Support/Unix/Mutex.inc
index fe6b17041457..2c982b38d6ff 100644
--- a/lib/Support/Unix/Mutex.inc
+++ b/lib/Support/Unix/Mutex.inc
@@ -1,9 +1,8 @@
 //===- llvm/Support/Unix/Mutex.inc - Unix Mutex Implementation ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index d7cc0d627d09..e80880c6b3cb 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -1,9 +1,8 @@
 //===- llvm/Support/Unix/Path.inc - Unix Path Implementation ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -38,6 +37,7 @@
 #ifdef __APPLE__
 #include <mach-o/dyld.h>
 #include <sys/attr.h>
+#include <copyfile.h>
 #elif defined(__DragonFly__)
 #include <sys/mount.h>
 #endif
@@ -56,7 +56,7 @@
 
 #include <sys/types.h>
 #if !defined(__APPLE__) && !defined(__OpenBSD__) && !defined(__FreeBSD__) &&   \
-    !defined(__linux__) && !defined(__FreeBSD_kernel__)
+    !defined(__linux__) && !defined(__FreeBSD_kernel__) && !defined(_AIX)
 #include <sys/statvfs.h>
 #define STATVFS statvfs
 #define FSTATVFS fstatvfs
@@ -77,6 +77,14 @@
 #endif
 #endif
 #include <sys/vfs.h>
+#elif defined(_AIX)
+#include <sys/statfs.h>
+
+// <sys/vmount.h> depends on `uint` to be a typedef from <sys/types.h> to
+// `uint_t`; however, <sys/types.h> does not always declare `uint`. We provide
+// the typedef prior to including <sys/vmount.h> to work around this issue.
+typedef uint_t uint;
+#include <sys/vmount.h>
 #else
 #include <sys/mount.h>
 #endif
@@ -108,7 +116,11 @@ test_dir(char ret[PATH_MAX], const char *dir, const char *bin)
   struct stat sb;
   char fullpath[PATH_MAX];
 
-  snprintf(fullpath, PATH_MAX, "%s/%s", dir, bin);
+  int chars = snprintf(fullpath, PATH_MAX, "%s/%s", dir, bin);
+  // We cannot write PATH_MAX characters because the string will be terminated
+  // with a null character. Fail if truncation happened.
+  if (chars >= PATH_MAX)
+    return 1;
   if (!realpath(fullpath, ret))
     return 1;
   if (stat(fullpath, &sb) != 0)
@@ -120,8 +132,6 @@ test_dir(char ret[PATH_MAX], const char *dir, const char *bin)
 static char *
 getprogpath(char ret[PATH_MAX], const char *bin)
 {
-  char *pv, *s, *t;
-
   /* First approach: absolute path. */
   if (bin[0] == '/') {
     if (test_dir(ret, "/", bin) == 0)
@@ -140,18 +150,21 @@ getprogpath(char ret[PATH_MAX], const char *bin)
   }
 
   /* Third approach: $PATH */
+  char *pv;
   if ((pv = getenv("PATH")) == nullptr)
     return nullptr;
-  s = pv = strdup(pv);
-  if (!pv)
+  char *s = strdup(pv);
+  if (!s)
     return nullptr;
-  while ((t = strsep(&s, ":")) != nullptr) {
+  char *state;
+  for (char *t = strtok_r(s, ":", &state); t != nullptr;
+       t = strtok_r(nullptr, ":", &state)) {
     if (test_dir(ret, t, bin) == 0) {
-      free(pv);
+      free(s);
       return ret;
     }
   }
-  free(pv);
+  free(s);
   return nullptr;
 }
 #endif // __FreeBSD__ || __NetBSD__ || __FreeBSD_kernel__
@@ -173,8 +186,21 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
 #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) ||   \
     defined(__minix) || defined(__DragonFly__) ||                              \
     defined(__FreeBSD_kernel__) || defined(_AIX)
+  StringRef curproc("/proc/curproc/file");
   char exe_path[PATH_MAX];
-
+  // /proc is not mounted by default under FreeBSD, but gives more accurate
+  // information than argv[0] when it is.
+  if (sys::fs::exists(curproc)) {
+    ssize_t len = readlink(curproc.str().c_str(), exe_path, sizeof(exe_path));
+    if (len > 0) {
+      // Null terminate the string for realpath. readlink never null
+      // terminates its output.
+      len = std::min(len, ssize_t(sizeof(exe_path) - 1));
+      exe_path[len] = '\0';
+      return exe_path;
+    }
+  }
+  // If we don't have procfs mounted, fall back to argv[0]
   if (getprogpath(exe_path, argv0) != NULL)
     return exe_path;
 #elif defined(__linux__) || defined(__CYGWIN__)
@@ -196,20 +222,20 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
     // the program, and not the eventual binary file. Therefore, call realpath
     // so this behaves the same on all platforms.
 #if _POSIX_VERSION >= 200112 || defined(__GLIBC__)
-    char *real_path = realpath(exe_path, NULL);
-    std::string ret = std::string(real_path);
-    free(real_path);
-    return ret;
+    if (char *real_path = realpath(exe_path, NULL)) {
+      std::string ret = std::string(real_path);
+      free(real_path);
+      return ret;
+    }
 #else
     char real_path[MAXPATHLEN];
-    realpath(exe_path, real_path);
-    return std::string(real_path);
+    if (realpath(exe_path, real_path))
+      return std::string(real_path);
 #endif
-  } else {
-    // Fall back to the classical detection.
-    if (getprogpath(exe_path, argv0))
-      return exe_path;
   }
+  // Fall back to the classical detection.
+  if (getprogpath(exe_path, argv0))
+    return exe_path;
 #elif defined(HAVE_DLFCN_H) && defined(HAVE_DLADDR)
   // Use dladdr to get executable path if available.
   Dl_info DLInfo;
@@ -246,7 +272,7 @@ uint32_t file_status::getLinkCount() const {
 
 ErrorOr<space_info> disk_space(const Twine &Path) {
   struct STATVFS Vfs;
-  if (::STATVFS(Path.str().c_str(), &Vfs))
+  if (::STATVFS(const_cast<char *>(Path.str().c_str()), &Vfs))
     return std::error_code(errno, std::generic_category());
   auto FrSize = STATVFS_F_FRSIZE(Vfs);
   space_info SpaceInfo;
@@ -398,6 +424,9 @@ static bool is_local_impl(struct STATVFS &Vfs) {
 #elif defined(__Fuchsia__)
   // Fuchsia doesn't yet support remote filesystem mounts.
   return true;
+#elif defined(__EMSCRIPTEN__)
+  // Emscripten doesn't currently support remote filesystem mounts.
+  return true;
 #elif defined(__HAIKU__)
   // Haiku doesn't expose this information.
   return false;
@@ -406,6 +435,40 @@ static bool is_local_impl(struct STATVFS &Vfs) {
   StringRef fstype(Vfs.f_basetype);
   // NFS is the only non-local fstype??
   return !fstype.equals("nfs");
+#elif defined(_AIX)
+  // Call mntctl; try more than twice in case of timing issues with a concurrent
+  // mount.
+  int Ret;
+  size_t BufSize = 2048u;
+  std::unique_ptr<char[]> Buf;
+  int Tries = 3;
+  while (Tries--) {
+    Buf = llvm::make_unique<char[]>(BufSize);
+    Ret = mntctl(MCTL_QUERY, BufSize, Buf.get());
+    if (Ret != 0)
+      break;
+    BufSize = *reinterpret_cast<unsigned int *>(Buf.get());
+    Buf.reset();
+  }
+
+  if (Ret == -1)
+    // There was an error; "remote" is the conservative answer.
+    return false;
+
+  // Look for the correct vmount entry.
+  char *CurObjPtr = Buf.get();
+  while (Ret--) {
+    struct vmount *Vp = reinterpret_cast<struct vmount *>(CurObjPtr);
+    static_assert(sizeof(Vfs.f_fsid) == sizeof(Vp->vmt_fsid),
+                  "fsid length mismatch");
+    if (memcmp(&Vfs.f_fsid, &Vp->vmt_fsid, sizeof Vfs.f_fsid) == 0)
+      return (Vp->vmt_flags & MNT_REMOTE) == 0;
+
+    CurObjPtr += Vp->vmt_length;
+  }
+
+  // vmount entry not found; "remote" is the conservative answer.
+  return false;
 #else
   return !!(STATVFS_F_FLAG(Vfs) & MNT_LOCAL);
 #endif
@@ -413,7 +476,7 @@ static bool is_local_impl(struct STATVFS &Vfs) {
 
 std::error_code is_local(const Twine &Path, bool &Result) {
   struct STATVFS Vfs;
-  if (::STATVFS(Path.str().c_str(), &Vfs))
+  if (::STATVFS(const_cast<char *>(Path.str().c_str()), &Vfs))
     return std::error_code(errno, std::generic_category());
 
   Result = is_local_impl(Vfs);
@@ -447,7 +510,12 @@ std::error_code resize_file(int FD, uint64_t Size) {
   // If we have posix_fallocate use it. Unlike ftruncate it always allocates
   // space, so we get an error if the disk is full.
   if (int Err = ::posix_fallocate(FD, 0, Size)) {
-    if (Err != EINVAL && Err != EOPNOTSUPP)
+#ifdef _AIX
+    constexpr int NotSupportedError = ENOTSUP;
+#else
+    constexpr int NotSupportedError = EOPNOTSUPP;
+#endif
+    if (Err != EINVAL && Err != NotSupportedError)
       return std::error_code(Err, std::generic_category());
   }
 #endif
@@ -626,6 +694,14 @@ std::error_code status(int FD, file_status &Result) {
   return fillStatus(StatRet, Status, Result);
 }
 
+unsigned getUmask() {
+  // Chose arbitary new mask and reset the umask to the old mask.
+  // umask(2) never fails so ignore the return of the second call.
+  unsigned Mask = ::umask(0);
+  (void) ::umask(Mask);
+  return Mask;
+}
+
 std::error_code setPermissions(const Twine &Path, perms Permissions) {
   SmallString<128> PathStorage;
   StringRef P = Path.toNullTerminatedStringRef(PathStorage);
@@ -635,6 +711,12 @@ std::error_code setPermissions(const Twine &Path, perms Permissions) {
   return std::error_code();
 }
 
+std::error_code setPermissions(int FD, perms Permissions) {
+  if (::fchmod(FD, Permissions))
+    return std::error_code(errno, std::generic_category());
+  return std::error_code();
+}
+
 std::error_code setLastAccessAndModificationTime(int FD, TimePoint<> AccessTime,
                                                  TimePoint<> ModificationTime) {
 #if defined(HAVE_FUTIMENS)
@@ -722,7 +804,7 @@ const char *mapped_file_region::const_data() const {
 }
 
 int mapped_file_region::alignment() {
-  return Process::getPageSize();
+  return Process::getPageSizeEstimate();
 }
 
 std::error_code detail::directory_iterator_construct(detail::DirIterState &it,
@@ -910,9 +992,54 @@ Expected<file_t> openNativeFileForRead(const Twine &Name, OpenFlags Flags,
   return ResultFD;
 }
 
-void closeFile(file_t &F) {
-  ::close(F);
+file_t getStdinHandle() { return 0; }
+file_t getStdoutHandle() { return 1; }
+file_t getStderrHandle() { return 2; }
+
+std::error_code readNativeFile(file_t FD, MutableArrayRef<char> Buf,
+                               size_t *BytesRead) {
+  *BytesRead = sys::RetryAfterSignal(-1, ::read, FD, Buf.data(), Buf.size());
+  if (ssize_t(*BytesRead) == -1)
+    return std::error_code(errno, std::generic_category());
+  return std::error_code();
+}
+
+std::error_code readNativeFileSlice(file_t FD, MutableArrayRef<char> Buf,
+                                    size_t Offset) {
+  char *BufPtr = Buf.data();
+  size_t BytesLeft = Buf.size();
+
+#ifndef HAVE_PREAD
+  // If we don't have pread, seek to Offset.
+  if (lseek(FD, Offset, SEEK_SET) == -1)
+    return std::error_code(errno, std::generic_category());
+#endif
+
+  while (BytesLeft) {
+#ifdef HAVE_PREAD
+    ssize_t NumRead = sys::RetryAfterSignal(-1, ::pread, FD, BufPtr, BytesLeft,
+                                            Buf.size() - BytesLeft + Offset);
+#else
+    ssize_t NumRead = sys::RetryAfterSignal(-1, ::read, FD, BufPtr, BytesLeft);
+#endif
+    if (NumRead == -1) {
+      // Error while reading.
+      return std::error_code(errno, std::generic_category());
+    }
+    if (NumRead == 0) {
+      memset(BufPtr, 0, BytesLeft); // zero-initialize rest of the buffer.
+      break;
+    }
+    BytesLeft -= NumRead;
+    BufPtr += NumRead;
+  }
+  return std::error_code();
+}
+
+std::error_code closeFile(file_t &F) {
+  file_t TmpF = F;
   F = kInvalidFile;
+  return Process::SafelyCloseFileDescriptor(TmpF);
 }
 
 template <typename T>
@@ -1063,5 +1190,37 @@ void system_temp_directory(bool ErasedOnReboot, SmallVectorImpl<char> &Result) {
 
 } // end namespace path
 
+namespace fs {
+
+#ifdef __APPLE__
+/// This implementation tries to perform an APFS CoW clone of the file,
+/// which can be much faster and uses less space.
+/// Unfortunately fcopyfile(3) does not support COPYFILE_CLONE, so the
+/// file descriptor variant of this function still uses the default
+/// implementation.
+std::error_code copy_file(const Twine &From, const Twine &To) {
+  uint32_t Flag = COPYFILE_DATA;
+#if __has_builtin(__builtin_available)
+  if (__builtin_available(macos 10.12, *)) {
+    bool IsSymlink;
+    if (std::error_code Error = is_symlink_file(From, IsSymlink))
+      return Error;
+    // COPYFILE_CLONE clones the symlink instead of following it
+    // and returns EEXISTS if the target file already exists.
+    if (!IsSymlink && !exists(To))
+      Flag = COPYFILE_CLONE;
+  }
+#endif
+  int Status =
+      copyfile(From.str().c_str(), To.str().c_str(), /* State */ NULL, Flag);
+
+  if (Status == 0)
+    return std::error_code();
+  return std::error_code(errno, std::generic_category());
+}
+#endif // __APPLE__
+
+} // end namespace fs
+
 } // end namespace sys
 } // end namespace llvm
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
index 3185f45a3a61..4115ee396582 100644
--- a/lib/Support/Unix/Process.inc
+++ b/lib/Support/Unix/Process.inc
@@ -1,9 +1,8 @@
 //===- Unix/Process.cpp - Unix Process Implementation --------- -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -33,10 +32,7 @@
 #if HAVE_SIGNAL_H
 #include <signal.h>
 #endif
-// DragonFlyBSD, and OpenBSD have deprecated <malloc.h> for
-// <stdlib.h> instead. Unix.h includes this for us already.
-#if defined(HAVE_MALLOC_H) && !defined(__DragonFly__) && \
-    !defined(__OpenBSD__) 
+#if defined(HAVE_MALLINFO)
 #include <malloc.h>
 #endif
 #if defined(HAVE_MALLCTL)
@@ -73,7 +69,7 @@ static std::pair<std::chrono::microseconds, std::chrono::microseconds> getRUsage
 
 // On Cygwin, getpagesize() returns 64k(AllocationGranularity) and
 // offset in mmap(3) should be aligned to the AllocationGranularity.
-unsigned Process::getPageSize() {
+Expected<unsigned> Process::getPageSize() {
 #if defined(HAVE_GETPAGESIZE)
   static const int page_size = ::getpagesize();
 #elif defined(HAVE_SYSCONF)
@@ -81,6 +77,9 @@ unsigned Process::getPageSize() {
 #else
 #error Cannot get the page size on this machine
 #endif
+  if (page_size == -1)
+    return errorCodeToError(std::error_code(errno, std::generic_category()));
+
   return static_cast<unsigned>(page_size);
 }
 
@@ -292,7 +291,8 @@ static unsigned getColumns(int FileID) {
 
   unsigned Columns = 0;
 
-#if defined(HAVE_SYS_IOCTL_H) && defined(HAVE_TERMIOS_H)
+#if defined(HAVE_SYS_IOCTL_H) && defined(HAVE_TERMIOS_H) \
+  && !(defined(_XOPEN_SOURCE) || defined(_POSIX_C_SOURCE))
   // Try to determine the width of the terminal.
   struct winsize ws;
   if (ioctl(FileID, TIOCGWINSZ, &ws) == 0)
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index d0abc3763e82..c4123a64046f 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -1,9 +1,8 @@
 //===- llvm/Support/Unix/Program.cpp -----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -246,12 +245,16 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
       Envp = const_cast<const char **>(*_NSGetEnviron());
 #endif
 
-    // Explicitly initialized to prevent what appears to be a valgrind false
-    // positive.
-    pid_t PID = 0;
-    int Err = posix_spawn(&PID, Program.str().c_str(), FileActions,
-                          /*attrp*/ nullptr, const_cast<char **>(Argv),
-                          const_cast<char **>(Envp));
+    constexpr int maxRetries = 8;
+    int retries = 0;
+    pid_t PID;
+    int Err;
+    do {
+      PID = 0; // Make Valgrind happy.
+      Err = posix_spawn(&PID, Program.str().c_str(), FileActions,
+                        /*attrp*/ nullptr, const_cast<char **>(Argv),
+                        const_cast<char **>(Envp));
+    } while (Err == EINTR && ++retries < maxRetries);
 
     if (FileActions)
       posix_spawn_file_actions_destroy(FileActions);
diff --git a/lib/Support/Unix/RWMutex.inc b/lib/Support/Unix/RWMutex.inc
index 85a104334a27..8b47dfa0f85c 100644
--- a/lib/Support/Unix/RWMutex.inc
+++ b/lib/Support/Unix/RWMutex.inc
@@ -1,9 +1,8 @@
 //= llvm/Support/Unix/RWMutex.inc - Unix Reader/Writer Mutual Exclusion Lock  =//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index ad88d5e96906..634c16aa36c7 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -1,9 +1,8 @@
 //===- Signals.cpp - Generic Unix Signals Implementation -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -43,6 +42,7 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/Program.h"
+#include "llvm/Support/SaveAndRestore.h"
 #include "llvm/Support/UniqueLock.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -81,10 +81,13 @@
 using namespace llvm;
 
 static RETSIGTYPE SignalHandler(int Sig);  // defined below.
+static RETSIGTYPE InfoSignalHandler(int Sig);  // defined below.
 
+using SignalHandlerFunctionType = void (*)();
 /// The function to call if ctrl-c is pressed.
-using InterruptFunctionType = void (*)();
-static std::atomic<InterruptFunctionType> InterruptFunction =
+static std::atomic<SignalHandlerFunctionType> InterruptFunction =
+    ATOMIC_VAR_INIT(nullptr);
+static std::atomic<SignalHandlerFunctionType> InfoSignalFunction =
     ATOMIC_VAR_INIT(nullptr);
 
 namespace {
@@ -200,15 +203,15 @@ struct FilesToRemoveCleanup {
 
 static StringRef Argv0;
 
-// Signals that represent requested termination. There's no bug or failure, or
-// if there is, it's not our direct responsibility. For whatever reason, our
-// continued execution is no longer desirable.
+/// Signals that represent requested termination. There's no bug or failure, or
+/// if there is, it's not our direct responsibility. For whatever reason, our
+/// continued execution is no longer desirable.
 static const int IntSigs[] = {
-  SIGHUP, SIGINT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2
+  SIGHUP, SIGINT, SIGPIPE, SIGTERM, SIGUSR2
 };
 
-// Signals that represent that we have a bug, and our prompt termination has
-// been ordered.
+/// Signals that represent that we have a bug, and our prompt termination has
+/// been ordered.
 static const int KillSigs[] = {
   SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV, SIGQUIT
 #ifdef SIGSYS
@@ -225,11 +228,24 @@ static const int KillSigs[] = {
 #endif
 };
 
+/// Signals that represent requests for status.
+static const int InfoSigs[] = {
+  SIGUSR1
+#ifdef SIGINFO
+  , SIGINFO
+#endif
+};
+
+static const size_t NumSigs =
+    array_lengthof(IntSigs) + array_lengthof(KillSigs) +
+    array_lengthof(InfoSigs);
+
+
 static std::atomic<unsigned> NumRegisteredSignals = ATOMIC_VAR_INIT(0);
 static struct {
   struct sigaction SA;
   int SigNo;
-} RegisteredSignalInfo[array_lengthof(IntSigs) + array_lengthof(KillSigs)];
+} RegisteredSignalInfo[NumSigs];
 
 #if defined(HAVE_SIGALTSTACK)
 // Hold onto both the old and new alternate signal stack so that it's not
@@ -277,15 +293,24 @@ static void RegisterHandlers() { // Not signal-safe.
   // be able to reliably handle signals due to stack overflow.
   CreateSigAltStack();
 
-  auto registerHandler = [&](int Signal) {
+  enum class SignalKind { IsKill, IsInfo };
+  auto registerHandler = [&](int Signal, SignalKind Kind) {
     unsigned Index = NumRegisteredSignals.load();
     assert(Index < array_lengthof(RegisteredSignalInfo) &&
            "Out of space for signal handlers!");
 
     struct sigaction NewHandler;
 
-    NewHandler.sa_handler = SignalHandler;
-    NewHandler.sa_flags = SA_NODEFER | SA_RESETHAND | SA_ONSTACK;
+    switch (Kind) {
+    case SignalKind::IsKill:
+      NewHandler.sa_handler = SignalHandler;
+      NewHandler.sa_flags = SA_NODEFER | SA_RESETHAND | SA_ONSTACK;
+      break;
+    case SignalKind::IsInfo:
+      NewHandler.sa_handler = InfoSignalHandler;
+      NewHandler.sa_flags = SA_ONSTACK;
+      break;
+    }
     sigemptyset(&NewHandler.sa_mask);
 
     // Install the new handler, save the old one in RegisteredSignalInfo.
@@ -295,9 +320,11 @@ static void RegisterHandlers() { // Not signal-safe.
   };
 
   for (auto S : IntSigs)
-    registerHandler(S);
+    registerHandler(S, SignalKind::IsKill);
   for (auto S : KillSigs)
-    registerHandler(S);
+    registerHandler(S, SignalKind::IsKill);
+  for (auto S : InfoSigs)
+    registerHandler(S, SignalKind::IsInfo);
 }
 
 static void UnregisterHandlers() {
@@ -357,6 +384,12 @@ static RETSIGTYPE SignalHandler(int Sig) {
 #endif
 }
 
+static RETSIGTYPE InfoSignalHandler(int Sig) {
+  SaveAndRestore<int> SaveErrnoDuringASignalHandler(errno);
+  if (SignalHandlerFunctionType CurrentInfoFunction = InfoSignalFunction)
+    CurrentInfoFunction();
+}
+
 void llvm::sys::RunInterruptHandlers() {
   RemoveFilesToRemove();
 }
@@ -366,6 +399,11 @@ void llvm::sys::SetInterruptFunction(void (*IF)()) {
   RegisterHandlers();
 }
 
+void llvm::sys::SetInfoSignalFunction(void (*Handler)()) {
+  InfoSignalFunction.exchange(Handler);
+  RegisterHandlers();
+}
+
 // The public API
 bool llvm::sys::RemoveFileOnSignal(StringRef Filename,
                                    std::string* ErrMsg) {
@@ -540,11 +578,8 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS) {
       else    OS << d;
       free(d);
 
-      // FIXME: When we move to C++11, use %t length modifier. It's not in
-      // C++03 and causes gcc to issue warnings. Losing the upper 32 bits of
-      // the stack offset for a stack dump isn't likely to cause any problems.
-      OS << format(" + %u",(unsigned)((char*)StackTrace[i]-
-                                      (char*)dlinfo.dli_saddr));
+      OS << format(" + %tu", (static_cast<const char*>(StackTrace[i])-
+                              static_cast<const char*>(dlinfo.dli_saddr)));
     }
     OS << '\n';
   }
diff --git a/lib/Support/Unix/ThreadLocal.inc b/lib/Support/Unix/ThreadLocal.inc
index a6564f0fa281..a402ae980424 100644
--- a/lib/Support/Unix/ThreadLocal.inc
+++ b/lib/Support/Unix/ThreadLocal.inc
@@ -1,9 +1,8 @@
 //=== llvm/Support/Unix/ThreadLocal.inc - Unix Thread Local Data -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Unix/Threading.inc b/lib/Support/Unix/Threading.inc
index 2d49ce1ad747..ed9a96563055 100644
--- a/lib/Support/Unix/Threading.inc
+++ b/lib/Support/Unix/Threading.inc
@@ -1,9 +1,8 @@
 //===- Unix/Threading.inc - Unix Threading Implementation ----- -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -202,6 +201,12 @@ void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
   char buf[len];
   ::pthread_getname_np(::pthread_self(), buf, len);
 
+  Name.append(buf, buf + strlen(buf));
+#elif defined(__OpenBSD__)
+  constexpr uint32_t len = get_max_thread_name_length_impl();
+  char buf[len];
+  ::pthread_get_name_np(::pthread_self(), buf, len);
+
   Name.append(buf, buf + strlen(buf));
 #elif defined(__linux__)
 #if HAVE_PTHREAD_GETNAME_NP
@@ -212,3 +217,42 @@ void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
 #endif
 #endif
 }
+
+SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
+#if defined(__linux__) && defined(SCHED_IDLE)
+  // Some *really* old glibcs are missing SCHED_IDLE.
+  // http://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html
+  // http://man7.org/linux/man-pages/man2/sched_setscheduler.2.html
+  sched_param priority;
+  // For each of the above policies, param->sched_priority must be 0.
+  priority.sched_priority = 0;
+  // SCHED_IDLE    for running very low priority background jobs.
+  // SCHED_OTHER   the standard round-robin time-sharing policy;
+  return !pthread_setschedparam(
+             pthread_self(),
+             Priority == ThreadPriority::Background ? SCHED_IDLE : SCHED_OTHER,
+             &priority)
+             ? SetThreadPriorityResult::SUCCESS
+             : SetThreadPriorityResult::FAILURE;
+#elif defined(__APPLE__)
+  // https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man2/getpriority.2.html
+  // When setting a thread into background state the scheduling priority is set
+  // to lowest value, disk and network IO are throttled. Network IO will be
+  // throttled for any sockets the thread opens after going into background
+  // state. Any previously opened sockets are not affected.
+
+  // https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man3/getiopolicy_np.3.html
+  // I/Os with THROTTLE policy are called THROTTLE I/Os. If a THROTTLE I/O
+  // request occurs within a small time window (usually a fraction of a second)
+  // of another NORMAL I/O request, the thread that issues the THROTTLE I/O is
+  // forced to sleep for a certain interval. This slows down the thread that
+  // issues the THROTTLE I/O so that NORMAL I/Os can utilize most of the disk
+  // I/O bandwidth.
+  return !setpriority(PRIO_DARWIN_THREAD, 0,
+                      Priority == ThreadPriority::Background ? PRIO_DARWIN_BG
+                                                             : 0)
+             ? SetThreadPriorityResult::SUCCESS
+             : SetThreadPriorityResult::FAILURE;
+#endif
+  return SetThreadPriorityResult::FAILURE;
+}
diff --git a/lib/Support/Unix/Unix.h b/lib/Support/Unix/Unix.h
index 0c5d4de556d5..86309b0567f5 100644
--- a/lib/Support/Unix/Unix.h
+++ b/lib/Support/Unix/Unix.h
@@ -1,9 +1,8 @@
 //===- llvm/Support/Unix/Unix.h - Common Unix Include File -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,7 +18,7 @@
 //===          is guaranteed to work on all UNIX variants.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Config/config.h" // Get autoconf configuration settings
+#include "llvm/Config/config.h"
 #include "llvm/Support/Chrono.h"
 #include "llvm/Support/Errno.h"
 #include <algorithm>
diff --git a/lib/Support/Unix/Watchdog.inc b/lib/Support/Unix/Watchdog.inc
index f4253391d952..b363ef779560 100644
--- a/lib/Support/Unix/Watchdog.inc
+++ b/lib/Support/Unix/Watchdog.inc
@@ -1,9 +1,8 @@
 //===--- Unix/Watchdog.inc - Unix Watchdog Implementation -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Valgrind.cpp b/lib/Support/Valgrind.cpp
index 8d852a67c075..886cb6ba3311 100644
--- a/lib/Support/Valgrind.cpp
+++ b/lib/Support/Valgrind.cpp
@@ -1,9 +1,8 @@
 //===-- Valgrind.cpp - Implement Valgrind communication ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/VersionTuple.cpp b/lib/Support/VersionTuple.cpp
index 3f219bfbedfa..60b59424fbb4 100644
--- a/lib/Support/VersionTuple.cpp
+++ b/lib/Support/VersionTuple.cpp
@@ -1,9 +1,8 @@
 //===- VersionTuple.cpp - Version Number Handling ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/VirtualFileSystem.cpp b/lib/Support/VirtualFileSystem.cpp
index f2a8a1bb27af..5d3480e97148 100644
--- a/lib/Support/VirtualFileSystem.cpp
+++ b/lib/Support/VirtualFileSystem.cpp
@@ -1,9 +1,8 @@
 //===- VirtualFileSystem.cpp - Virtual File System Layer ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -57,8 +56,10 @@
 using namespace llvm;
 using namespace llvm::vfs;
 
+using llvm::sys::fs::file_t;
 using llvm::sys::fs::file_status;
 using llvm::sys::fs::file_type;
+using llvm::sys::fs::kInvalidFile;
 using llvm::sys::fs::perms;
 using llvm::sys::fs::UniqueID;
 
@@ -67,19 +68,19 @@ Status::Status(const file_status &Status)
       User(Status.getUser()), Group(Status.getGroup()), Size(Status.getSize()),
       Type(Status.type()), Perms(Status.permissions()) {}
 
-Status::Status(StringRef Name, UniqueID UID, sys::TimePoint<> MTime,
+Status::Status(const Twine &Name, UniqueID UID, sys::TimePoint<> MTime,
                uint32_t User, uint32_t Group, uint64_t Size, file_type Type,
                perms Perms)
-    : Name(Name), UID(UID), MTime(MTime), User(User), Group(Group), Size(Size),
-      Type(Type), Perms(Perms) {}
+    : Name(Name.str()), UID(UID), MTime(MTime), User(User), Group(Group),
+      Size(Size), Type(Type), Perms(Perms) {}
 
-Status Status::copyWithNewName(const Status &In, StringRef NewName) {
+Status Status::copyWithNewName(const Status &In, const Twine &NewName) {
   return Status(NewName, In.getUniqueID(), In.getLastModificationTime(),
                 In.getUser(), In.getGroup(), In.getSize(), In.getType(),
                 In.getPermissions());
 }
 
-Status Status::copyWithNewName(const file_status &In, StringRef NewName) {
+Status Status::copyWithNewName(const file_status &In, const Twine &NewName) {
   return Status(NewName, In.getUniqueID(), In.getLastModificationTime(),
                 In.getUser(), In.getGroup(), In.getSize(), In.type(),
                 In.permissions());
@@ -171,15 +172,15 @@ namespace {
 class RealFile : public File {
   friend class RealFileSystem;
 
-  int FD;
+  file_t FD;
   Status S;
   std::string RealName;
 
-  RealFile(int FD, StringRef NewName, StringRef NewRealPathName)
+  RealFile(file_t FD, StringRef NewName, StringRef NewRealPathName)
       : FD(FD), S(NewName, {}, {}, {}, {}, {},
                   llvm::sys::fs::file_type::status_error, {}),
         RealName(NewRealPathName.str()) {
-    assert(FD >= 0 && "Invalid or inactive file descriptor");
+    assert(FD != kInvalidFile && "Invalid or inactive file descriptor");
   }
 
 public:
@@ -199,7 +200,7 @@ public:
 RealFile::~RealFile() { close(); }
 
 ErrorOr<Status> RealFile::status() {
-  assert(FD != -1 && "cannot stat closed file");
+  assert(FD != kInvalidFile && "cannot stat closed file");
   if (!S.isStatusKnown()) {
     file_status RealStatus;
     if (std::error_code EC = sys::fs::status(FD, RealStatus))
@@ -216,22 +217,41 @@ ErrorOr<std::string> RealFile::getName() {
 ErrorOr<std::unique_ptr<MemoryBuffer>>
 RealFile::getBuffer(const Twine &Name, int64_t FileSize,
                     bool RequiresNullTerminator, bool IsVolatile) {
-  assert(FD != -1 && "cannot get buffer for closed file");
+  assert(FD != kInvalidFile && "cannot get buffer for closed file");
   return MemoryBuffer::getOpenFile(FD, Name, FileSize, RequiresNullTerminator,
                                    IsVolatile);
 }
 
 std::error_code RealFile::close() {
-  std::error_code EC = sys::Process::SafelyCloseFileDescriptor(FD);
-  FD = -1;
+  std::error_code EC = sys::fs::closeFile(FD);
+  FD = kInvalidFile;
   return EC;
 }
 
 namespace {
 
-/// The file system according to your operating system.
+/// A file system according to your operating system.
+/// This may be linked to the process's working directory, or maintain its own.
+///
+/// Currently, its own working directory is emulated by storing the path and
+/// sending absolute paths to llvm::sys::fs:: functions.
+/// A more principled approach would be to push this down a level, modelling
+/// the working dir as an llvm::sys::fs::WorkingDir or similar.
+/// This would enable the use of openat()-style functions on some platforms.
 class RealFileSystem : public FileSystem {
 public:
+  explicit RealFileSystem(bool LinkCWDToProcess) {
+    if (!LinkCWDToProcess) {
+      SmallString<128> PWD, RealPWD;
+      if (llvm::sys::fs::current_path(PWD))
+        return; // Awful, but nothing to do here.
+      if (llvm::sys::fs::real_path(PWD, RealPWD))
+        WD = {PWD, PWD};
+      else
+        WD = {PWD, RealPWD};
+    }
+  }
+
   ErrorOr<Status> status(const Twine &Path) override;
   ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override;
   directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
@@ -243,72 +263,95 @@ public:
                               SmallVectorImpl<char> &Output) const override;
 
 private:
-  mutable std::mutex CWDMutex;
-  mutable std::string CWDCache;
+  // If this FS has its own working dir, use it to make Path absolute.
+  // The returned twine is safe to use as long as both Storage and Path live.
+  Twine adjustPath(const Twine &Path, SmallVectorImpl<char> &Storage) const {
+    if (!WD)
+      return Path;
+    Path.toVector(Storage);
+    sys::fs::make_absolute(WD->Resolved, Storage);
+    return Storage;
+  }
+
+  struct WorkingDirectory {
+    // The current working directory, without symlinks resolved. (echo $PWD).
+    SmallString<128> Specified;
+    // The current working directory, with links resolved. (readlink .).
+    SmallString<128> Resolved;
+  };
+  Optional<WorkingDirectory> WD;
 };
 
 } // namespace
 
 ErrorOr<Status> RealFileSystem::status(const Twine &Path) {
+  SmallString<256> Storage;
   sys::fs::file_status RealStatus;
-  if (std::error_code EC = sys::fs::status(Path, RealStatus))
+  if (std::error_code EC =
+          sys::fs::status(adjustPath(Path, Storage), RealStatus))
     return EC;
-  return Status::copyWithNewName(RealStatus, Path.str());
+  return Status::copyWithNewName(RealStatus, Path);
 }
 
 ErrorOr<std::unique_ptr<File>>
 RealFileSystem::openFileForRead(const Twine &Name) {
-  int FD;
-  SmallString<256> RealName;
-  if (std::error_code EC =
-          sys::fs::openFileForRead(Name, FD, sys::fs::OF_None, &RealName))
-    return EC;
-  return std::unique_ptr<File>(new RealFile(FD, Name.str(), RealName.str()));
+  SmallString<256> RealName, Storage;
+  Expected<file_t> FDOrErr = sys::fs::openNativeFileForRead(
+      adjustPath(Name, Storage), sys::fs::OF_None, &RealName);
+  if (!FDOrErr)
+    return errorToErrorCode(FDOrErr.takeError());
+  return std::unique_ptr<File>(
+      new RealFile(*FDOrErr, Name.str(), RealName.str()));
 }
 
 llvm::ErrorOr<std::string> RealFileSystem::getCurrentWorkingDirectory() const {
-  std::lock_guard<std::mutex> Lock(CWDMutex);
-  if (!CWDCache.empty())
-    return CWDCache;
-  SmallString<256> Dir;
+  if (WD)
+    return WD->Specified.str();
+
+  SmallString<128> Dir;
   if (std::error_code EC = llvm::sys::fs::current_path(Dir))
     return EC;
-  CWDCache = Dir.str();
-  return CWDCache;
+  return Dir.str();
 }
 
 std::error_code RealFileSystem::setCurrentWorkingDirectory(const Twine &Path) {
-  // FIXME: chdir is thread hostile; on the other hand, creating the same
-  // behavior as chdir is complex: chdir resolves the path once, thus
-  // guaranteeing that all subsequent relative path operations work
-  // on the same path the original chdir resulted in. This makes a
-  // difference for example on network filesystems, where symlinks might be
-  // switched during runtime of the tool. Fixing this depends on having a
-  // file system abstraction that allows openat() style interactions.
-  if (auto EC = llvm::sys::fs::set_current_path(Path))
-    return EC;
-
-  // Invalidate cache.
-  std::lock_guard<std::mutex> Lock(CWDMutex);
-  CWDCache.clear();
+  if (!WD)
+    return llvm::sys::fs::set_current_path(Path);
+
+  SmallString<128> Absolute, Resolved, Storage;
+  adjustPath(Path, Storage).toVector(Absolute);
+  bool IsDir;
+  if (auto Err = llvm::sys::fs::is_directory(Absolute, IsDir))
+    return Err;
+  if (!IsDir)
+    return std::make_error_code(std::errc::not_a_directory);
+  if (auto Err = llvm::sys::fs::real_path(Absolute, Resolved))
+    return Err;
+  WD = {Absolute, Resolved};
   return std::error_code();
 }
 
 std::error_code RealFileSystem::isLocal(const Twine &Path, bool &Result) {
-  return llvm::sys::fs::is_local(Path, Result);
+  SmallString<256> Storage;
+  return llvm::sys::fs::is_local(adjustPath(Path, Storage), Result);
 }
 
 std::error_code
 RealFileSystem::getRealPath(const Twine &Path,
                             SmallVectorImpl<char> &Output) const {
-  return llvm::sys::fs::real_path(Path, Output);
+  SmallString<256> Storage;
+  return llvm::sys::fs::real_path(adjustPath(Path, Storage), Output);
 }
 
 IntrusiveRefCntPtr<FileSystem> vfs::getRealFileSystem() {
-  static IntrusiveRefCntPtr<FileSystem> FS = new RealFileSystem();
+  static IntrusiveRefCntPtr<FileSystem> FS(new RealFileSystem(true));
   return FS;
 }
 
+std::unique_ptr<FileSystem> vfs::createPhysicalFileSystem() {
+  return llvm::make_unique<RealFileSystem>(false);
+}
+
 namespace {
 
 class RealFSDirIter : public llvm::vfs::detail::DirIterImpl {
@@ -334,7 +377,9 @@ public:
 
 directory_iterator RealFileSystem::dir_begin(const Twine &Dir,
                                              std::error_code &EC) {
-  return directory_iterator(std::make_shared<RealFSDirIter>(Dir, EC));
+  SmallString<128> Storage;
+  return directory_iterator(
+      std::make_shared<RealFSDirIter>(adjustPath(Dir, Storage), EC));
 }
 
 //===-----------------------------------------------------------------------===/
@@ -511,7 +556,7 @@ public:
   /// Return the \p Status for this node. \p RequestedName should be the name
   /// through which the caller referred to this node. It will override
   /// \p Status::Name in the return value, to mimic the behavior of \p RealFile.
-  Status getStatus(StringRef RequestedName) const {
+  Status getStatus(const Twine &RequestedName) const {
     return Status::copyWithNewName(Stat, RequestedName);
   }
   llvm::MemoryBuffer *getBuffer() const { return Buffer.get(); }
@@ -585,7 +630,7 @@ public:
   /// Return the \p Status for this node. \p RequestedName should be the name
   /// through which the caller referred to this node. It will override
   /// \p Status::Name in the return value, to mimic the behavior of \p RealFile.
-  Status getStatus(StringRef RequestedName) const {
+  Status getStatus(const Twine &RequestedName) const {
     return Status::copyWithNewName(Stat, RequestedName);
   }
   InMemoryNode *getChild(StringRef Name) {
@@ -619,7 +664,7 @@ public:
 };
 
 namespace {
-Status getNodeStatus(const InMemoryNode *Node, StringRef RequestedName) {
+Status getNodeStatus(const InMemoryNode *Node, const Twine &RequestedName) {
   if (auto Dir = dyn_cast<detail::InMemoryDirectory>(Node))
     return Dir->getStatus(RequestedName);
   if (auto File = dyn_cast<detail::InMemoryFile>(Node))
@@ -817,7 +862,7 @@ bool InMemoryFileSystem::addHardLink(const Twine &FromPath,
 llvm::ErrorOr<Status> InMemoryFileSystem::status(const Twine &Path) {
   auto Node = lookupInMemoryNode(*this, Root.get(), Path);
   if (Node)
-    return detail::getNodeStatus(*Node, Path.str());
+    return detail::getNodeStatus(*Node, Path);
   return Node.getError();
 }
 
@@ -1237,7 +1282,7 @@ class llvm::vfs::RedirectingFileSystemParser {
         EntryArrayContents;
     std::string ExternalContentsPath;
     std::string Name;
-    yaml::Node *NameValueNode;
+    yaml::Node *NameValueNode = nullptr;
     auto UseExternalName =
         RedirectingFileSystem::RedirectingFileEntry::NK_NotSet;
     RedirectingFileSystem::EntryKind Kind;
@@ -1633,7 +1678,7 @@ static Status getRedirectedFileStatus(const Twine &Path, bool UseExternalNames,
                                       Status ExternalStatus) {
   Status S = ExternalStatus;
   if (!UseExternalNames)
-    S = Status::copyWithNewName(S, Path.str());
+    S = Status::copyWithNewName(S, Path);
   S.IsVFSMapped = true;
   return S;
 }
@@ -1650,7 +1695,7 @@ ErrorOr<Status> RedirectingFileSystem::status(const Twine &Path,
     return S;
   } else { // directory
     auto *DE = cast<RedirectingFileSystem::RedirectingDirectoryEntry>(E);
-    return Status::copyWithNewName(DE->getStatus(), Path.str());
+    return Status::copyWithNewName(DE->getStatus(), Path);
   }
 }
 
diff --git a/lib/Support/Watchdog.cpp b/lib/Support/Watchdog.cpp
index be55e3122e70..246f3dc7a0ca 100644
--- a/lib/Support/Watchdog.cpp
+++ b/lib/Support/Watchdog.cpp
@@ -1,9 +1,8 @@
 //===---- Watchdog.cpp - Implement Watchdog ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Windows/COM.inc b/lib/Support/Windows/COM.inc
index 54f3ecf28ec2..002182bc3939 100644
--- a/lib/Support/Windows/COM.inc
+++ b/lib/Support/Windows/COM.inc
@@ -1,9 +1,8 @@
 //==- llvm/Support/Windows/COM.inc - Windows COM Implementation -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc
index 1d47f0848a6d..71b206c4cf9e 100644
--- a/lib/Support/Windows/DynamicLibrary.inc
+++ b/lib/Support/Windows/DynamicLibrary.inc
@@ -1,9 +1,8 @@
 //===- Win32/DynamicLibrary.cpp - Win32 DL Implementation -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Windows/Host.inc b/lib/Support/Windows/Host.inc
index 58c4dc5d678f..21b947f26df3 100644
--- a/lib/Support/Windows/Host.inc
+++ b/lib/Support/Windows/Host.inc
@@ -1,9 +1,8 @@
 //===- llvm/Support/Win32/Host.inc ------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Windows/Memory.inc b/lib/Support/Windows/Memory.inc
index 318e65aaa9ee..a67f9c7d0f35 100644
--- a/lib/Support/Windows/Memory.inc
+++ b/lib/Support/Windows/Memory.inc
@@ -1,9 +1,8 @@
 //===- Win32/Memory.cpp - Win32 Memory Implementation -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -23,7 +22,7 @@
 namespace {
 
 DWORD getWindowsProtectionFlags(unsigned Flags) {
-  switch (Flags) {
+  switch (Flags & llvm::sys::Memory::MF_RWE_MASK) {
   // Contrary to what you might expect, the Windows page protection flags
   // are not a bitwise combination of RWX values
   case llvm::sys::Memory::MF_READ:
@@ -48,6 +47,9 @@ DWORD getWindowsProtectionFlags(unsigned Flags) {
   return PAGE_NOACCESS;
 }
 
+// While we'd be happy to allocate single pages, the Windows allocation
+// granularity may be larger than a single page (in practice, it is 64K)
+// so mapping less than that will create an unreachable fragment of memory.
 size_t getAllocationGranularity() {
   SYSTEM_INFO  Info;
   ::GetSystemInfo(&Info);
@@ -57,6 +59,38 @@ size_t getAllocationGranularity() {
     return Info.dwAllocationGranularity;
 }
 
+// Large/huge memory pages need explicit process permissions in order to be
+// used. See https://blogs.msdn.microsoft.com/oldnewthing/20110128-00/?p=11643
+// Also large pages need to be manually enabled on your OS. If all this is
+// sucessfull, we return the minimal large memory page size.
+static size_t enableProcessLargePages() {
+  HANDLE Token = 0;
+  size_t LargePageMin = GetLargePageMinimum();
+  if (LargePageMin)
+    OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY,
+                     &Token);
+  if (!Token)
+    return 0;
+  LUID Luid;
+  if (!LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &Luid)) {
+    CloseHandle(Token);
+    return 0;
+  }
+  TOKEN_PRIVILEGES TP{};
+  TP.PrivilegeCount = 1;
+  TP.Privileges[0].Luid = Luid;
+  TP.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+  if (!AdjustTokenPrivileges(Token, FALSE, &TP, 0, 0, 0)) {
+    CloseHandle(Token);
+    return 0;
+  }
+  DWORD E = GetLastError();
+  CloseHandle(Token);
+  if (E == ERROR_SUCCESS)
+    return LargePageMin;
+  return 0;
+}
+
 } // namespace
 
 namespace llvm {
@@ -75,22 +109,23 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes,
   if (NumBytes == 0)
     return MemoryBlock();
 
-  // While we'd be happy to allocate single pages, the Windows allocation
-  // granularity may be larger than a single page (in practice, it is 64K)
-  // so mapping less than that will create an unreachable fragment of memory.
-  // Avoid using one-time initialization of static locals here, since they
-  // aren't thread safe with MSVC.
-  static volatile size_t GranularityCached;
-  size_t Granularity = GranularityCached;
-  if (Granularity == 0) {
-    Granularity = getAllocationGranularity();
-    GranularityCached = Granularity;
+  static size_t DefaultGranularity = getAllocationGranularity();
+  static size_t LargePageGranularity = enableProcessLargePages();
+
+  DWORD AllocType = MEM_RESERVE | MEM_COMMIT;
+  bool HugePages = false;
+  size_t Granularity = DefaultGranularity;
+
+  if ((Flags & MF_HUGE_HINT) && LargePageGranularity > 0) {
+    AllocType |= MEM_LARGE_PAGES;
+    HugePages = true;
+    Granularity = LargePageGranularity;
   }
 
-  const size_t NumBlocks = (NumBytes+Granularity-1)/Granularity;
+  size_t NumBlocks = (NumBytes + Granularity - 1) / Granularity;
 
   uintptr_t Start = NearBlock ? reinterpret_cast<uintptr_t>(NearBlock->base()) +
-                                NearBlock->size()
+                                NearBlock->allocatedSize()
                            : 0;
 
   // If the requested address is not aligned to the allocation granularity,
@@ -100,13 +135,13 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes,
 
   DWORD Protect = getWindowsProtectionFlags(Flags);
 
-  void *PA = ::VirtualAlloc(reinterpret_cast<void*>(Start),
-                            NumBlocks*Granularity,
-                            MEM_RESERVE | MEM_COMMIT, Protect);
+  size_t AllocSize = NumBlocks * Granularity;
+  void *PA = ::VirtualAlloc(reinterpret_cast<void *>(Start),
+                            AllocSize, AllocType, Protect);
   if (PA == NULL) {
-    if (NearBlock) {
-      // Try again without the NearBlock hint
-      return allocateMappedMemory(NumBytes, NULL, Flags, EC);
+    if (NearBlock || HugePages) {
+      // Try again without the NearBlock hint and without large memory pages
+      return allocateMappedMemory(NumBytes, NULL, Flags & ~MF_HUGE_HINT, EC);
     }
     EC = mapWindowsError(::GetLastError());
     return MemoryBlock();
@@ -114,40 +149,41 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes,
 
   MemoryBlock Result;
   Result.Address = PA;
-  Result.Size = NumBlocks*Granularity;
+  Result.AllocatedSize = AllocSize;
+  Result.Flags = (Flags & ~MF_HUGE_HINT) | (HugePages ? MF_HUGE_HINT : 0);
 
   if (Flags & MF_EXEC)
-    Memory::InvalidateInstructionCache(Result.Address, Result.Size);
+    Memory::InvalidateInstructionCache(Result.Address, AllocSize);
 
   return Result;
 }
 
   std::error_code Memory::releaseMappedMemory(MemoryBlock &M) {
-  if (M.Address == 0 || M.Size == 0)
+  if (M.Address == 0 || M.AllocatedSize == 0)
     return std::error_code();
 
   if (!VirtualFree(M.Address, 0, MEM_RELEASE))
     return mapWindowsError(::GetLastError());
 
   M.Address = 0;
-  M.Size = 0;
+  M.AllocatedSize = 0;
 
   return std::error_code();
 }
 
   std::error_code Memory::protectMappedMemory(const MemoryBlock &M,
                                        unsigned Flags) {
-  if (M.Address == 0 || M.Size == 0)
+  if (M.Address == 0 || M.AllocatedSize == 0)
     return std::error_code();
 
   DWORD Protect = getWindowsProtectionFlags(Flags);
 
   DWORD OldFlags;
-  if (!VirtualProtect(M.Address, M.Size, Protect, &OldFlags))
+  if (!VirtualProtect(M.Address, M.AllocatedSize, Protect, &OldFlags))
     return mapWindowsError(::GetLastError());
 
   if (Flags & MF_EXEC)
-    Memory::InvalidateInstructionCache(M.Address, M.Size);
+    Memory::InvalidateInstructionCache(M.Address, M.AllocatedSize);
 
   return std::error_code();
 }
diff --git a/lib/Support/Windows/Mutex.inc b/lib/Support/Windows/Mutex.inc
index 0af145ec9a4e..b55b14febf2c 100644
--- a/lib/Support/Windows/Mutex.inc
+++ b/lib/Support/Windows/Mutex.inc
@@ -1,9 +1,8 @@
 //===- llvm/Support/Win32/Mutex.inc - Win32 Mutex Implementation -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc
index d34aa763124c..5704930aeecc 100644
--- a/lib/Support/Windows/Path.inc
+++ b/lib/Support/Windows/Path.inc
@@ -1,9 +1,8 @@
 //===- llvm/Support/Windows/Path.inc - Windows Path Impl --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -735,6 +734,14 @@ std::error_code status(int FD, file_status &Result) {
   return getStatus(FileHandle, Result);
 }
 
+std::error_code status(file_t FileHandle, file_status &Result) {
+  return getStatus(FileHandle, Result);
+}
+
+unsigned getUmask() {
+  return 0;
+}
+
 std::error_code setPermissions(const Twine &Path, perms Permissions) {
   SmallVector<wchar_t, 128> PathUTF16;
   if (std::error_code EC = widenPath(Path, PathUTF16))
@@ -766,6 +773,11 @@ std::error_code setPermissions(const Twine &Path, perms Permissions) {
   return std::error_code();
 }
 
+std::error_code setPermissions(int FD, perms Permissions) {
+  // FIXME Not implemented.
+  return std::make_error_code(std::errc::not_supported);
+}
+
 std::error_code setLastAccessAndModificationTime(int FD, TimePoint<> AccessTime,
                                                  TimePoint<> ModificationTime) {
   FILETIME AccessFT = toFILETIME(AccessTime);
@@ -776,10 +788,9 @@ std::error_code setLastAccessAndModificationTime(int FD, TimePoint<> AccessTime,
   return std::error_code();
 }
 
-std::error_code mapped_file_region::init(int FD, uint64_t Offset,
-                                         mapmode Mode) {
+std::error_code mapped_file_region::init(sys::fs::file_t OrigFileHandle,
+                                         uint64_t Offset, mapmode Mode) {
   this->Mode = Mode;
-  HANDLE OrigFileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(FD));
   if (OrigFileHandle == INVALID_HANDLE_VALUE)
     return make_error_code(errc::bad_file_descriptor);
 
@@ -846,8 +857,9 @@ std::error_code mapped_file_region::init(int FD, uint64_t Offset,
   return std::error_code();
 }
 
-mapped_file_region::mapped_file_region(int fd, mapmode mode, size_t length,
-                                       uint64_t offset, std::error_code &ec)
+mapped_file_region::mapped_file_region(sys::fs::file_t fd, mapmode mode,
+                                       size_t length, uint64_t offset,
+                                       std::error_code &ec)
     : Size(length), Mapping() {
   ec = init(fd, offset, mode);
   if (ec)
@@ -1197,9 +1209,73 @@ Expected<file_t> openNativeFileForRead(const Twine &Name, OpenFlags Flags,
   return Result;
 }
 
-void closeFile(file_t &F) {
-  ::CloseHandle(F);
+file_t convertFDToNativeFile(int FD) {
+  return reinterpret_cast<HANDLE>(::_get_osfhandle(FD));
+}
+
+file_t getStdinHandle() { return ::GetStdHandle(STD_INPUT_HANDLE); }
+file_t getStdoutHandle() { return ::GetStdHandle(STD_OUTPUT_HANDLE); }
+file_t getStderrHandle() { return ::GetStdHandle(STD_ERROR_HANDLE); }
+
+std::error_code readNativeFileImpl(file_t FileHandle, char *BufPtr, size_t BytesToRead,
+                                   size_t *BytesRead, OVERLAPPED *Overlap) {
+  // ReadFile can only read 2GB at a time. The caller should check the number of
+  // bytes and read in a loop until termination.
+  DWORD BytesToRead32 =
+      std::min(size_t(std::numeric_limits<DWORD>::max()), BytesToRead);
+  DWORD BytesRead32 = 0;
+  bool Success =
+      ::ReadFile(FileHandle, BufPtr, BytesToRead32, &BytesRead32, Overlap);
+  *BytesRead = BytesRead32;
+  if (!Success) {
+    DWORD Err = ::GetLastError();
+    // Pipe EOF is not an error.
+    if (Err == ERROR_BROKEN_PIPE)
+      return std::error_code();
+    return mapWindowsError(Err);
+  }
+  return std::error_code();
+}
+
+std::error_code readNativeFile(file_t FileHandle, MutableArrayRef<char> Buf,
+                               size_t *BytesRead) {
+  return readNativeFileImpl(FileHandle, Buf.data(), Buf.size(), BytesRead,
+                            /*Overlap=*/nullptr);
+}
+
+std::error_code readNativeFileSlice(file_t FileHandle,
+                                    MutableArrayRef<char> Buf, size_t Offset) {
+  char *BufPtr = Buf.data();
+  size_t BytesLeft = Buf.size();
+
+  while (BytesLeft) {
+    uint64_t CurOff = Buf.size() - BytesLeft + Offset;
+    OVERLAPPED Overlapped = {};
+    Overlapped.Offset = uint32_t(CurOff);
+    Overlapped.OffsetHigh = uint32_t(uint64_t(CurOff) >> 32);
+
+    size_t BytesRead = 0;
+    if (auto EC = readNativeFileImpl(FileHandle, BufPtr, BytesLeft, &BytesRead,
+                                     &Overlapped))
+      return EC;
+
+    // Once we reach EOF, zero the remaining bytes in the buffer.
+    if (BytesRead == 0) {
+      memset(BufPtr, 0, BytesLeft);
+      break;
+    }
+    BytesLeft -= BytesRead;
+    BufPtr += BytesRead;
+  }
+  return std::error_code();
+}
+
+std::error_code closeFile(file_t &F) {
+  file_t TmpF = F;
   F = kInvalidFile;
+  if (!::CloseHandle(TmpF))
+    return mapWindowsError(::GetLastError());
+  return std::error_code();
 }
 
 std::error_code remove_directories(const Twine &path, bool IgnoreErrors) {
diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc
index 2b2d79231434..4b91f9f7fc66 100644
--- a/lib/Support/Windows/Process.inc
+++ b/lib/Support/Windows/Process.inc
@@ -1,9 +1,8 @@
 //===- Win32/Process.cpp - Win32 Process Implementation ------- -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -57,7 +56,7 @@ static unsigned computePageSize() {
   return static_cast<unsigned>(info.dwPageSize);
 }
 
-unsigned Process::getPageSize() {
+Expected<unsigned> Process::getPageSize() {
   static unsigned Ret = computePageSize();
   return Ret;
 }
diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc
index c037956603f2..0f54e59ee55b 100644
--- a/lib/Support/Windows/Program.inc
+++ b/lib/Support/Windows/Program.inc
@@ -1,9 +1,8 @@
 //===- Win32/Program.cpp - Win32 Program Implementation ------- -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Windows/RWMutex.inc b/lib/Support/Windows/RWMutex.inc
index 5eb9351eee52..8df9bc394160 100644
--- a/lib/Support/Windows/RWMutex.inc
+++ b/lib/Support/Windows/RWMutex.inc
@@ -1,9 +1,8 @@
 //= llvm/Support/Win32/Mutex.inc - Win32 Reader/Writer Mutual Exclusion Lock  =//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
index 41eb5e593aa5..6a820ef22b1e 100644
--- a/lib/Support/Windows/Signals.inc
+++ b/lib/Support/Windows/Signals.inc
@@ -1,9 +1,8 @@
 //===- Win32/Signals.cpp - Win32 Signals Implementation ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -557,6 +556,10 @@ void llvm::sys::SetInterruptFunction(void (*IF)()) {
   LeaveCriticalSection(&CriticalSection);
 }
 
+void llvm::sys::SetInfoSignalFunction(void (*Handler)()) {
+  // Unimplemented.
+}
+
 
 /// Add a function to be called when a signal is delivered to the process. The
 /// handler can have a cookie passed to it to identify what instance of the
diff --git a/lib/Support/Windows/ThreadLocal.inc b/lib/Support/Windows/ThreadLocal.inc
index 8be1c3ecfbb9..1e0ed955e9ab 100644
--- a/lib/Support/Windows/ThreadLocal.inc
+++ b/lib/Support/Windows/ThreadLocal.inc
@@ -1,9 +1,8 @@
 //= llvm/Support/Win32/ThreadLocal.inc - Win32 Thread Local Data -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Windows/Threading.inc b/lib/Support/Windows/Threading.inc
index 0bd92f66c6b8..96649472cc90 100644
--- a/lib/Support/Windows/Threading.inc
+++ b/lib/Support/Windows/Threading.inc
@@ -1,9 +1,8 @@
 //===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -107,3 +106,19 @@ void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
   // value.
   Name.clear();
 }
+
+SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
+  // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority
+  // Begin background processing mode. The system lowers the resource scheduling
+  // priorities of the thread so that it can perform background work without
+  // significantly affecting activity in the foreground.
+  // End background processing mode. The system restores the resource scheduling
+  // priorities of the thread as they were before the thread entered background
+  // processing mode.
+  return SetThreadPriority(GetCurrentThread(),
+                           Priority == ThreadPriority::Background
+                               ? THREAD_MODE_BACKGROUND_BEGIN
+                               : THREAD_MODE_BACKGROUND_END)
+             ? SetThreadPriorityResult::SUCCESS
+             : SetThreadPriorityResult::FAILURE;
+}
diff --git a/lib/Support/Windows/Watchdog.inc b/lib/Support/Windows/Watchdog.inc
index fab2bdf2a941..a362c999de76 100644
--- a/lib/Support/Windows/Watchdog.inc
+++ b/lib/Support/Windows/Watchdog.inc
@@ -1,9 +1,8 @@
 //===--- Windows/Watchdog.inc - Windows Watchdog Implementation -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/Windows/WindowsSupport.h b/lib/Support/Windows/WindowsSupport.h
index 979cc5d01390..fed9b2f462ef 100644
--- a/lib/Support/Windows/WindowsSupport.h
+++ b/lib/Support/Windows/WindowsSupport.h
@@ -1,9 +1,8 @@
 //===- WindowsSupport.h - Common Windows Include File -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/WithColor.cpp b/lib/Support/WithColor.cpp
index cf4c10956f21..345dd9cf3949 100644
--- a/lib/Support/WithColor.cpp
+++ b/lib/Support/WithColor.cpp
@@ -1,9 +1,8 @@
 //===- WithColor.cpp ------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
index 9ef1410b99a5..9b2fe9c4418a 100644
--- a/lib/Support/YAMLParser.cpp
+++ b/lib/Support/YAMLParser.cpp
@@ -1,9 +1,8 @@
 //===- YAMLParser.cpp - Simple YAML parser --------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp
index b9bbee7883c6..09eb36943de9 100644
--- a/lib/Support/YAMLTraits.cpp
+++ b/lib/Support/YAMLTraits.cpp
@@ -1,9 +1,8 @@
 //===- lib/Support/YAMLTraits.cpp -----------------------------------------===//
 //
-//                             The LLVM Linker
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -114,6 +113,11 @@ const Node *Input::getCurrentNode() const {
 }
 
 bool Input::mapTag(StringRef Tag, bool Default) {
+  // CurrentNode can be null if setCurrentDocument() was unable to
+  // parse the document because it was invalid or empty.
+  if (!CurrentNode)
+    return false;
+
   std::string foundTag = CurrentNode->_node->getVerbatimTag();
   if (foundTag.empty()) {
     // If no tag found and 'Tag' is the default, say it was found.
@@ -442,7 +446,8 @@ bool Output::outputting() {
 
 void Output::beginMapping() {
   StateStack.push_back(inMapFirstKey);
-  NeedsNewLine = true;
+  PaddingBeforeContainer = Padding;
+  Padding = "\n";
 }
 
 bool Output::mapTag(StringRef Tag, bool Use) {
@@ -470,7 +475,7 @@ bool Output::mapTag(StringRef Tag, bool Use) {
       }
       // Tags inside maps in sequences should act as keys in the map from a
       // formatting perspective, so we always want a newline in a sequence.
-      NeedsNewLine = true;
+      Padding = "\n";
     }
   }
   return Use;
@@ -478,8 +483,12 @@ bool Output::mapTag(StringRef Tag, bool Use) {
 
 void Output::endMapping() {
   // If we did not map anything, we should explicitly emit an empty map
-  if (StateStack.back() == inMapFirstKey)
+  if (StateStack.back() == inMapFirstKey) {
+    Padding = PaddingBeforeContainer;
+    newLineCheck();
     output("{}");
+    Padding = "\n";
+  }
   StateStack.pop_back();
 }
 
@@ -544,14 +553,19 @@ void Output::endDocuments() {
 
 unsigned Output::beginSequence() {
   StateStack.push_back(inSeqFirstElement);
-  NeedsNewLine = true;
+  PaddingBeforeContainer = Padding;
+  Padding = "\n";
   return 0;
 }
 
 void Output::endSequence() {
   // If we did not emit anything, we should explicitly emit an empty sequence
-  if (StateStack.back() == inSeqFirstElement)
+  if (StateStack.back() == inSeqFirstElement) {
+    Padding = PaddingBeforeContainer;
+    newLineCheck();
     output("[]");
+    Padding = "\n";
+  }
   StateStack.pop_back();
 }
 
@@ -661,11 +675,6 @@ void Output::scalarString(StringRef &S, QuotingType MustQuote) {
     return;
   }
 
-  unsigned i = 0;
-  unsigned j = 0;
-  unsigned End = S.size();
-  const char *Base = S.data();
-
   const char *const Quote = MustQuote == QuotingType::Single ? "'" : "\"";
   output(Quote); // Starting quote.
 
@@ -673,11 +682,16 @@ void Output::scalarString(StringRef &S, QuotingType MustQuote) {
   // present, and will be escaped using a variety of unicode-scalar and special short-form
   // escapes. This is handled in yaml::escape.
   if (MustQuote == QuotingType::Double) {
-    output(yaml::escape(Base, /* EscapePrintable= */ false));
+    output(yaml::escape(S, /* EscapePrintable= */ false));
     outputUpToEndOfLine(Quote);
     return;
   }
 
+  unsigned i = 0;
+  unsigned j = 0;
+  unsigned End = S.size();
+  const char *Base = S.data();
+
   // When using single-quoted strings, any single quote ' must be doubled to be escaped.
   while (j < End) {
     if (S[j] == '\'') {                    // Escape quotes.
@@ -742,7 +756,7 @@ void Output::outputUpToEndOfLine(StringRef s) {
   output(s);
   if (StateStack.empty() || (!inFlowSeqAnyElement(StateStack.back()) &&
                              !inFlowMapAnyKey(StateStack.back())))
-    NeedsNewLine = true;
+    Padding = "\n";
 }
 
 void Output::outputNewLine() {
@@ -755,11 +769,13 @@ void Output::outputNewLine() {
 //
 
 void Output::newLineCheck() {
-  if (!NeedsNewLine)
+  if (Padding != "\n") {
+    output(Padding);
+    Padding = {};
     return;
-  NeedsNewLine = false;
-
+  }
   outputNewLine();
+  Padding = {};
 
   if (StateStack.size() == 0)
     return;
@@ -793,9 +809,9 @@ void Output::paddedKey(StringRef key) {
   output(":");
   const char *spaces = "                ";
   if (key.size() < strlen(spaces))
-    output(&spaces[key.size()]);
+    Padding = &spaces[key.size()];
   else
-    output(" ");
+    Padding = " ";
 }
 
 void Output::flowKey(StringRef Key) {
diff --git a/lib/Support/Z3Solver.cpp b/lib/Support/Z3Solver.cpp
new file mode 100644
index 000000000000..f1a6fdf87cf2
--- /dev/null
+++ b/lib/Support/Z3Solver.cpp
@@ -0,0 +1,900 @@
+//== Z3Solver.cpp -----------------------------------------------*- C++ -*--==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/SMTAPI.h"
+#include <set>
+
+using namespace llvm;
+
+#if LLVM_WITH_Z3
+
+#include <z3.h>
+
+namespace {
+
+/// Configuration class for Z3
+class Z3Config {
+  friend class Z3Context;
+
+  Z3_config Config;
+
+public:
+  Z3Config() : Config(Z3_mk_config()) {
+    // Enable model finding
+    Z3_set_param_value(Config, "model", "true");
+    // Disable proof generation
+    Z3_set_param_value(Config, "proof", "false");
+    // Set timeout to 15000ms = 15s
+    Z3_set_param_value(Config, "timeout", "15000");
+  }
+
+  ~Z3Config() { Z3_del_config(Config); }
+}; // end class Z3Config
+
+// Function used to report errors
+void Z3ErrorHandler(Z3_context Context, Z3_error_code Error) {
+  llvm::report_fatal_error("Z3 error: " +
+                           llvm::Twine(Z3_get_error_msg(Context, Error)));
+}
+
+/// Wrapper for Z3 context
+class Z3Context {
+public:
+  Z3_context Context;
+
+  Z3Context() {
+    Context = Z3_mk_context_rc(Z3Config().Config);
+    // The error function is set here because the context is the first object
+    // created by the backend
+    Z3_set_error_handler(Context, Z3ErrorHandler);
+  }
+
+  virtual ~Z3Context() {
+    Z3_del_context(Context);
+    Context = nullptr;
+  }
+}; // end class Z3Context
+
+/// Wrapper for Z3 Sort
+class Z3Sort : public SMTSort {
+  friend class Z3Solver;
+
+  Z3Context &Context;
+
+  Z3_sort Sort;
+
+public:
+  /// Default constructor, mainly used by make_shared
+  Z3Sort(Z3Context &C, Z3_sort ZS) : Context(C), Sort(ZS) {
+    Z3_inc_ref(Context.Context, reinterpret_cast<Z3_ast>(Sort));
+  }
+
+  /// Override implicit copy constructor for correct reference counting.
+  Z3Sort(const Z3Sort &Other) : Context(Other.Context), Sort(Other.Sort) {
+    Z3_inc_ref(Context.Context, reinterpret_cast<Z3_ast>(Sort));
+  }
+
+  /// Override implicit copy assignment constructor for correct reference
+  /// counting.
+  Z3Sort &operator=(const Z3Sort &Other) {
+    Z3_inc_ref(Context.Context, reinterpret_cast<Z3_ast>(Other.Sort));
+    Z3_dec_ref(Context.Context, reinterpret_cast<Z3_ast>(Sort));
+    Sort = Other.Sort;
+    return *this;
+  }
+
+  Z3Sort(Z3Sort &&Other) = delete;
+  Z3Sort &operator=(Z3Sort &&Other) = delete;
+
+  ~Z3Sort() {
+    if (Sort)
+      Z3_dec_ref(Context.Context, reinterpret_cast<Z3_ast>(Sort));
+  }
+
+  void Profile(llvm::FoldingSetNodeID &ID) const override {
+    ID.AddInteger(
+        Z3_get_ast_id(Context.Context, reinterpret_cast<Z3_ast>(Sort)));
+  }
+
+  bool isBitvectorSortImpl() const override {
+    return (Z3_get_sort_kind(Context.Context, Sort) == Z3_BV_SORT);
+  }
+
+  bool isFloatSortImpl() const override {
+    return (Z3_get_sort_kind(Context.Context, Sort) == Z3_FLOATING_POINT_SORT);
+  }
+
+  bool isBooleanSortImpl() const override {
+    return (Z3_get_sort_kind(Context.Context, Sort) == Z3_BOOL_SORT);
+  }
+
+  unsigned getBitvectorSortSizeImpl() const override {
+    return Z3_get_bv_sort_size(Context.Context, Sort);
+  }
+
+  unsigned getFloatSortSizeImpl() const override {
+    return Z3_fpa_get_ebits(Context.Context, Sort) +
+           Z3_fpa_get_sbits(Context.Context, Sort);
+  }
+
+  bool equal_to(SMTSort const &Other) const override {
+    return Z3_is_eq_sort(Context.Context, Sort,
+                         static_cast<const Z3Sort &>(Other).Sort);
+  }
+
+  void print(raw_ostream &OS) const override {
+    OS << Z3_sort_to_string(Context.Context, Sort);
+  }
+}; // end class Z3Sort
+
+static const Z3Sort &toZ3Sort(const SMTSort &S) {
+  return static_cast<const Z3Sort &>(S);
+}
+
+class Z3Expr : public SMTExpr {
+  friend class Z3Solver;
+
+  Z3Context &Context;
+
+  Z3_ast AST;
+
+public:
+  Z3Expr(Z3Context &C, Z3_ast ZA) : SMTExpr(), Context(C), AST(ZA) {
+    Z3_inc_ref(Context.Context, AST);
+  }
+
+  /// Override implicit copy constructor for correct reference counting.
+  Z3Expr(const Z3Expr &Copy) : SMTExpr(), Context(Copy.Context), AST(Copy.AST) {
+    Z3_inc_ref(Context.Context, AST);
+  }
+
+  /// Override implicit copy assignment constructor for correct reference
+  /// counting.
+  Z3Expr &operator=(const Z3Expr &Other) {
+    Z3_inc_ref(Context.Context, Other.AST);
+    Z3_dec_ref(Context.Context, AST);
+    AST = Other.AST;
+    return *this;
+  }
+
+  Z3Expr(Z3Expr &&Other) = delete;
+  Z3Expr &operator=(Z3Expr &&Other) = delete;
+
+  ~Z3Expr() {
+    if (AST)
+      Z3_dec_ref(Context.Context, AST);
+  }
+
+  void Profile(llvm::FoldingSetNodeID &ID) const override {
+    ID.AddInteger(Z3_get_ast_id(Context.Context, AST));
+  }
+
+  /// Comparison of AST equality, not model equivalence.
+  bool equal_to(SMTExpr const &Other) const override {
+    assert(Z3_is_eq_sort(Context.Context, Z3_get_sort(Context.Context, AST),
+                         Z3_get_sort(Context.Context,
+                                     static_cast<const Z3Expr &>(Other).AST)) &&
+           "AST's must have the same sort");
+    return Z3_is_eq_ast(Context.Context, AST,
+                        static_cast<const Z3Expr &>(Other).AST);
+  }
+
+  void print(raw_ostream &OS) const override {
+    OS << Z3_ast_to_string(Context.Context, AST);
+  }
+}; // end class Z3Expr
+
+static const Z3Expr &toZ3Expr(const SMTExpr &E) {
+  return static_cast<const Z3Expr &>(E);
+}
+
+class Z3Model {
+  friend class Z3Solver;
+
+  Z3Context &Context;
+
+  Z3_model Model;
+
+public:
+  Z3Model(Z3Context &C, Z3_model ZM) : Context(C), Model(ZM) {
+    Z3_model_inc_ref(Context.Context, Model);
+  }
+
+  Z3Model(const Z3Model &Other) = delete;
+  Z3Model(Z3Model &&Other) = delete;
+  Z3Model &operator=(Z3Model &Other) = delete;
+  Z3Model &operator=(Z3Model &&Other) = delete;
+
+  ~Z3Model() {
+    if (Model)
+      Z3_model_dec_ref(Context.Context, Model);
+  }
+
+  void print(raw_ostream &OS) const {
+    OS << Z3_model_to_string(Context.Context, Model);
+  }
+
+  LLVM_DUMP_METHOD void dump() const { print(llvm::errs()); }
+}; // end class Z3Model
+
+/// Get the corresponding IEEE floating-point type for a given bitwidth.
+static const llvm::fltSemantics &getFloatSemantics(unsigned BitWidth) {
+  switch (BitWidth) {
+  default:
+    llvm_unreachable("Unsupported floating-point semantics!");
+    break;
+  case 16:
+    return llvm::APFloat::IEEEhalf();
+  case 32:
+    return llvm::APFloat::IEEEsingle();
+  case 64:
+    return llvm::APFloat::IEEEdouble();
+  case 128:
+    return llvm::APFloat::IEEEquad();
+  }
+}
+
+// Determine whether two float semantics are equivalent
+static bool areEquivalent(const llvm::fltSemantics &LHS,
+                          const llvm::fltSemantics &RHS) {
+  return (llvm::APFloat::semanticsPrecision(LHS) ==
+          llvm::APFloat::semanticsPrecision(RHS)) &&
+         (llvm::APFloat::semanticsMinExponent(LHS) ==
+          llvm::APFloat::semanticsMinExponent(RHS)) &&
+         (llvm::APFloat::semanticsMaxExponent(LHS) ==
+          llvm::APFloat::semanticsMaxExponent(RHS)) &&
+         (llvm::APFloat::semanticsSizeInBits(LHS) ==
+          llvm::APFloat::semanticsSizeInBits(RHS));
+}
+
+class Z3Solver : public SMTSolver {
+  friend class Z3ConstraintManager;
+
+  Z3Context Context;
+
+  Z3_solver Solver;
+
+  // Cache Sorts
+  std::set<Z3Sort> CachedSorts;
+
+  // Cache Exprs
+  std::set<Z3Expr> CachedExprs;
+
+public:
+  Z3Solver() : Solver(Z3_mk_simple_solver(Context.Context)) {
+    Z3_solver_inc_ref(Context.Context, Solver);
+  }
+
+  Z3Solver(const Z3Solver &Other) = delete;
+  Z3Solver(Z3Solver &&Other) = delete;
+  Z3Solver &operator=(Z3Solver &Other) = delete;
+  Z3Solver &operator=(Z3Solver &&Other) = delete;
+
+  ~Z3Solver() {
+    if (Solver)
+      Z3_solver_dec_ref(Context.Context, Solver);
+  }
+
+  void addConstraint(const SMTExprRef &Exp) const override {
+    Z3_solver_assert(Context.Context, Solver, toZ3Expr(*Exp).AST);
+  }
+
+  // Given an SMTSort, adds/retrives it from the cache and returns
+  // an SMTSortRef to the SMTSort in the cache
+  SMTSortRef newSortRef(const SMTSort &Sort) {
+    auto It = CachedSorts.insert(toZ3Sort(Sort));
+    return &(*It.first);
+  }
+
+  // Given an SMTExpr, adds/retrives it from the cache and returns
+  // an SMTExprRef to the SMTExpr in the cache
+  SMTExprRef newExprRef(const SMTExpr &Exp) {
+    auto It = CachedExprs.insert(toZ3Expr(Exp));
+    return &(*It.first);
+  }
+
+  SMTSortRef getBoolSort() override {
+    return newSortRef(Z3Sort(Context, Z3_mk_bool_sort(Context.Context)));
+  }
+
+  SMTSortRef getBitvectorSort(unsigned BitWidth) override {
+    return newSortRef(
+        Z3Sort(Context, Z3_mk_bv_sort(Context.Context, BitWidth)));
+  }
+
+  SMTSortRef getSort(const SMTExprRef &Exp) override {
+    return newSortRef(
+        Z3Sort(Context, Z3_get_sort(Context.Context, toZ3Expr(*Exp).AST)));
+  }
+
+  SMTSortRef getFloat16Sort() override {
+    return newSortRef(Z3Sort(Context, Z3_mk_fpa_sort_16(Context.Context)));
+  }
+
+  SMTSortRef getFloat32Sort() override {
+    return newSortRef(Z3Sort(Context, Z3_mk_fpa_sort_32(Context.Context)));
+  }
+
+  SMTSortRef getFloat64Sort() override {
+    return newSortRef(Z3Sort(Context, Z3_mk_fpa_sort_64(Context.Context)));
+  }
+
+  SMTSortRef getFloat128Sort() override {
+    return newSortRef(Z3Sort(Context, Z3_mk_fpa_sort_128(Context.Context)));
+  }
+
+  SMTExprRef mkBVNeg(const SMTExprRef &Exp) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvneg(Context.Context, toZ3Expr(*Exp).AST)));
+  }
+
+  SMTExprRef mkBVNot(const SMTExprRef &Exp) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvnot(Context.Context, toZ3Expr(*Exp).AST)));
+  }
+
+  SMTExprRef mkNot(const SMTExprRef &Exp) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_not(Context.Context, toZ3Expr(*Exp).AST)));
+  }
+
+  SMTExprRef mkBVAdd(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvadd(Context.Context, toZ3Expr(*LHS).AST,
+                                    toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVSub(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvsub(Context.Context, toZ3Expr(*LHS).AST,
+                                    toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVMul(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvmul(Context.Context, toZ3Expr(*LHS).AST,
+                                    toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVSRem(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvsrem(Context.Context, toZ3Expr(*LHS).AST,
+                                     toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVURem(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvurem(Context.Context, toZ3Expr(*LHS).AST,
+                                     toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVSDiv(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvsdiv(Context.Context, toZ3Expr(*LHS).AST,
+                                     toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVUDiv(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvudiv(Context.Context, toZ3Expr(*LHS).AST,
+                                     toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVShl(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvshl(Context.Context, toZ3Expr(*LHS).AST,
+                                    toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVAshr(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvashr(Context.Context, toZ3Expr(*LHS).AST,
+                                     toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVLshr(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvlshr(Context.Context, toZ3Expr(*LHS).AST,
+                                     toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVXor(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvxor(Context.Context, toZ3Expr(*LHS).AST,
+                                    toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVOr(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvor(Context.Context, toZ3Expr(*LHS).AST,
+                                   toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVAnd(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvand(Context.Context, toZ3Expr(*LHS).AST,
+                                    toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVUlt(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvult(Context.Context, toZ3Expr(*LHS).AST,
+                                    toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVSlt(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvslt(Context.Context, toZ3Expr(*LHS).AST,
+                                    toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVUgt(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvugt(Context.Context, toZ3Expr(*LHS).AST,
+                                    toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVSgt(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvsgt(Context.Context, toZ3Expr(*LHS).AST,
+                                    toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVUle(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvule(Context.Context, toZ3Expr(*LHS).AST,
+                                    toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVSle(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvsle(Context.Context, toZ3Expr(*LHS).AST,
+                                    toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVUge(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvuge(Context.Context, toZ3Expr(*LHS).AST,
+                                    toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVSge(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_bvsge(Context.Context, toZ3Expr(*LHS).AST,
+                                    toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkAnd(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    Z3_ast Args[2] = {toZ3Expr(*LHS).AST, toZ3Expr(*RHS).AST};
+    return newExprRef(Z3Expr(Context, Z3_mk_and(Context.Context, 2, Args)));
+  }
+
+  SMTExprRef mkOr(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    Z3_ast Args[2] = {toZ3Expr(*LHS).AST, toZ3Expr(*RHS).AST};
+    return newExprRef(Z3Expr(Context, Z3_mk_or(Context.Context, 2, Args)));
+  }
+
+  SMTExprRef mkEqual(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_eq(Context.Context, toZ3Expr(*LHS).AST,
+                                 toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkFPNeg(const SMTExprRef &Exp) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_fpa_neg(Context.Context, toZ3Expr(*Exp).AST)));
+  }
+
+  SMTExprRef mkFPIsInfinite(const SMTExprRef &Exp) override {
+    return newExprRef(Z3Expr(
+        Context, Z3_mk_fpa_is_infinite(Context.Context, toZ3Expr(*Exp).AST)));
+  }
+
+  SMTExprRef mkFPIsNaN(const SMTExprRef &Exp) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_fpa_is_nan(Context.Context, toZ3Expr(*Exp).AST)));
+  }
+
+  SMTExprRef mkFPIsNormal(const SMTExprRef &Exp) override {
+    return newExprRef(Z3Expr(
+        Context, Z3_mk_fpa_is_normal(Context.Context, toZ3Expr(*Exp).AST)));
+  }
+
+  SMTExprRef mkFPIsZero(const SMTExprRef &Exp) override {
+    return newExprRef(Z3Expr(
+        Context, Z3_mk_fpa_is_zero(Context.Context, toZ3Expr(*Exp).AST)));
+  }
+
+  SMTExprRef mkFPMul(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    SMTExprRef RoundingMode = getFloatRoundingMode();
+    return newExprRef(
+        Z3Expr(Context,
+               Z3_mk_fpa_mul(Context.Context, toZ3Expr(*LHS).AST,
+                             toZ3Expr(*RHS).AST, toZ3Expr(*RoundingMode).AST)));
+  }
+
+  SMTExprRef mkFPDiv(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    SMTExprRef RoundingMode = getFloatRoundingMode();
+    return newExprRef(
+        Z3Expr(Context,
+               Z3_mk_fpa_div(Context.Context, toZ3Expr(*LHS).AST,
+                             toZ3Expr(*RHS).AST, toZ3Expr(*RoundingMode).AST)));
+  }
+
+  SMTExprRef mkFPRem(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_fpa_rem(Context.Context, toZ3Expr(*LHS).AST,
+                                      toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkFPAdd(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    SMTExprRef RoundingMode = getFloatRoundingMode();
+    return newExprRef(
+        Z3Expr(Context,
+               Z3_mk_fpa_add(Context.Context, toZ3Expr(*LHS).AST,
+                             toZ3Expr(*RHS).AST, toZ3Expr(*RoundingMode).AST)));
+  }
+
+  SMTExprRef mkFPSub(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    SMTExprRef RoundingMode = getFloatRoundingMode();
+    return newExprRef(
+        Z3Expr(Context,
+               Z3_mk_fpa_sub(Context.Context, toZ3Expr(*LHS).AST,
+                             toZ3Expr(*RHS).AST, toZ3Expr(*RoundingMode).AST)));
+  }
+
+  SMTExprRef mkFPLt(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_fpa_lt(Context.Context, toZ3Expr(*LHS).AST,
+                                     toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkFPGt(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_fpa_gt(Context.Context, toZ3Expr(*LHS).AST,
+                                     toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkFPLe(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_fpa_leq(Context.Context, toZ3Expr(*LHS).AST,
+                                      toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkFPGe(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_fpa_geq(Context.Context, toZ3Expr(*LHS).AST,
+                                      toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkFPEqual(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_fpa_eq(Context.Context, toZ3Expr(*LHS).AST,
+                                     toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkIte(const SMTExprRef &Cond, const SMTExprRef &T,
+                   const SMTExprRef &F) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_ite(Context.Context, toZ3Expr(*Cond).AST,
+                                  toZ3Expr(*T).AST, toZ3Expr(*F).AST)));
+  }
+
+  SMTExprRef mkBVSignExt(unsigned i, const SMTExprRef &Exp) override {
+    return newExprRef(Z3Expr(
+        Context, Z3_mk_sign_ext(Context.Context, i, toZ3Expr(*Exp).AST)));
+  }
+
+  SMTExprRef mkBVZeroExt(unsigned i, const SMTExprRef &Exp) override {
+    return newExprRef(Z3Expr(
+        Context, Z3_mk_zero_ext(Context.Context, i, toZ3Expr(*Exp).AST)));
+  }
+
+  SMTExprRef mkBVExtract(unsigned High, unsigned Low,
+                         const SMTExprRef &Exp) override {
+    return newExprRef(Z3Expr(Context, Z3_mk_extract(Context.Context, High, Low,
+                                                    toZ3Expr(*Exp).AST)));
+  }
+
+  /// Creates a predicate that checks for overflow in a bitvector addition
+  /// operation
+  SMTExprRef mkBVAddNoOverflow(const SMTExprRef &LHS, const SMTExprRef &RHS,
+                               bool isSigned) override {
+    return newExprRef(Z3Expr(
+        Context, Z3_mk_bvadd_no_overflow(Context.Context, toZ3Expr(*LHS).AST,
+                                         toZ3Expr(*RHS).AST, isSigned)));
+  }
+
+  /// Creates a predicate that checks for underflow in a signed bitvector
+  /// addition operation
+  SMTExprRef mkBVAddNoUnderflow(const SMTExprRef &LHS,
+                                const SMTExprRef &RHS) override {
+    return newExprRef(Z3Expr(
+        Context, Z3_mk_bvadd_no_underflow(Context.Context, toZ3Expr(*LHS).AST,
+                                          toZ3Expr(*RHS).AST)));
+  }
+
+  /// Creates a predicate that checks for overflow in a signed bitvector
+  /// subtraction operation
+  SMTExprRef mkBVSubNoOverflow(const SMTExprRef &LHS,
+                               const SMTExprRef &RHS) override {
+    return newExprRef(Z3Expr(
+        Context, Z3_mk_bvsub_no_overflow(Context.Context, toZ3Expr(*LHS).AST,
+                                         toZ3Expr(*RHS).AST)));
+  }
+
+  /// Creates a predicate that checks for underflow in a bitvector subtraction
+  /// operation
+  SMTExprRef mkBVSubNoUnderflow(const SMTExprRef &LHS, const SMTExprRef &RHS,
+                                bool isSigned) override {
+    return newExprRef(Z3Expr(
+        Context, Z3_mk_bvsub_no_underflow(Context.Context, toZ3Expr(*LHS).AST,
+                                          toZ3Expr(*RHS).AST, isSigned)));
+  }
+
+  /// Creates a predicate that checks for overflow in a signed bitvector
+  /// division/modulus operation
+  SMTExprRef mkBVSDivNoOverflow(const SMTExprRef &LHS,
+                                const SMTExprRef &RHS) override {
+    return newExprRef(Z3Expr(
+        Context, Z3_mk_bvsdiv_no_overflow(Context.Context, toZ3Expr(*LHS).AST,
+                                          toZ3Expr(*RHS).AST)));
+  }
+
+  /// Creates a predicate that checks for overflow in a bitvector negation
+  /// operation
+  SMTExprRef mkBVNegNoOverflow(const SMTExprRef &Exp) override {
+    return newExprRef(Z3Expr(
+        Context, Z3_mk_bvneg_no_overflow(Context.Context, toZ3Expr(*Exp).AST)));
+  }
+
+  /// Creates a predicate that checks for overflow in a bitvector multiplication
+  /// operation
+  SMTExprRef mkBVMulNoOverflow(const SMTExprRef &LHS, const SMTExprRef &RHS,
+                               bool isSigned) override {
+    return newExprRef(Z3Expr(
+        Context, Z3_mk_bvmul_no_overflow(Context.Context, toZ3Expr(*LHS).AST,
+                                         toZ3Expr(*RHS).AST, isSigned)));
+  }
+
+  /// Creates a predicate that checks for underflow in a signed bitvector
+  /// multiplication operation
+  SMTExprRef mkBVMulNoUnderflow(const SMTExprRef &LHS,
+                                const SMTExprRef &RHS) override {
+    return newExprRef(Z3Expr(
+        Context, Z3_mk_bvmul_no_underflow(Context.Context, toZ3Expr(*LHS).AST,
+                                          toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkBVConcat(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_concat(Context.Context, toZ3Expr(*LHS).AST,
+                                     toZ3Expr(*RHS).AST)));
+  }
+
+  SMTExprRef mkFPtoFP(const SMTExprRef &From, const SMTSortRef &To) override {
+    SMTExprRef RoundingMode = getFloatRoundingMode();
+    return newExprRef(Z3Expr(
+        Context,
+        Z3_mk_fpa_to_fp_float(Context.Context, toZ3Expr(*RoundingMode).AST,
+                              toZ3Expr(*From).AST, toZ3Sort(*To).Sort)));
+  }
+
+  SMTExprRef mkSBVtoFP(const SMTExprRef &From, const SMTSortRef &To) override {
+    SMTExprRef RoundingMode = getFloatRoundingMode();
+    return newExprRef(Z3Expr(
+        Context,
+        Z3_mk_fpa_to_fp_signed(Context.Context, toZ3Expr(*RoundingMode).AST,
+                               toZ3Expr(*From).AST, toZ3Sort(*To).Sort)));
+  }
+
+  SMTExprRef mkUBVtoFP(const SMTExprRef &From, const SMTSortRef &To) override {
+    SMTExprRef RoundingMode = getFloatRoundingMode();
+    return newExprRef(Z3Expr(
+        Context,
+        Z3_mk_fpa_to_fp_unsigned(Context.Context, toZ3Expr(*RoundingMode).AST,
+                                 toZ3Expr(*From).AST, toZ3Sort(*To).Sort)));
+  }
+
+  SMTExprRef mkFPtoSBV(const SMTExprRef &From, unsigned ToWidth) override {
+    SMTExprRef RoundingMode = getFloatRoundingMode();
+    return newExprRef(Z3Expr(
+        Context, Z3_mk_fpa_to_sbv(Context.Context, toZ3Expr(*RoundingMode).AST,
+                                  toZ3Expr(*From).AST, ToWidth)));
+  }
+
+  SMTExprRef mkFPtoUBV(const SMTExprRef &From, unsigned ToWidth) override {
+    SMTExprRef RoundingMode = getFloatRoundingMode();
+    return newExprRef(Z3Expr(
+        Context, Z3_mk_fpa_to_ubv(Context.Context, toZ3Expr(*RoundingMode).AST,
+                                  toZ3Expr(*From).AST, ToWidth)));
+  }
+
+  SMTExprRef mkBoolean(const bool b) override {
+    return newExprRef(Z3Expr(Context, b ? Z3_mk_true(Context.Context)
+                                        : Z3_mk_false(Context.Context)));
+  }
+
+  SMTExprRef mkBitvector(const llvm::APSInt Int, unsigned BitWidth) override {
+    const SMTSortRef Sort = getBitvectorSort(BitWidth);
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_numeral(Context.Context, Int.toString(10).c_str(),
+                                      toZ3Sort(*Sort).Sort)));
+  }
+
+  SMTExprRef mkFloat(const llvm::APFloat Float) override {
+    SMTSortRef Sort =
+        getFloatSort(llvm::APFloat::semanticsSizeInBits(Float.getSemantics()));
+
+    llvm::APSInt Int = llvm::APSInt(Float.bitcastToAPInt(), false);
+    SMTExprRef Z3Int = mkBitvector(Int, Int.getBitWidth());
+    return newExprRef(Z3Expr(
+        Context, Z3_mk_fpa_to_fp_bv(Context.Context, toZ3Expr(*Z3Int).AST,
+                                    toZ3Sort(*Sort).Sort)));
+  }
+
+  SMTExprRef mkSymbol(const char *Name, SMTSortRef Sort) override {
+    return newExprRef(
+        Z3Expr(Context, Z3_mk_const(Context.Context,
+                                    Z3_mk_string_symbol(Context.Context, Name),
+                                    toZ3Sort(*Sort).Sort)));
+  }
+
+  llvm::APSInt getBitvector(const SMTExprRef &Exp, unsigned BitWidth,
+                            bool isUnsigned) override {
+    return llvm::APSInt(
+        llvm::APInt(BitWidth,
+                    Z3_get_numeral_string(Context.Context, toZ3Expr(*Exp).AST),
+                    10),
+        isUnsigned);
+  }
+
+  bool getBoolean(const SMTExprRef &Exp) override {
+    return Z3_get_bool_value(Context.Context, toZ3Expr(*Exp).AST) == Z3_L_TRUE;
+  }
+
+  SMTExprRef getFloatRoundingMode() override {
+    // TODO: Don't assume nearest ties to even rounding mode
+    return newExprRef(Z3Expr(Context, Z3_mk_fpa_rne(Context.Context)));
+  }
+
+  bool toAPFloat(const SMTSortRef &Sort, const SMTExprRef &AST,
+                 llvm::APFloat &Float, bool useSemantics) {
+    assert(Sort->isFloatSort() && "Unsupported sort to floating-point!");
+
+    llvm::APSInt Int(Sort->getFloatSortSize(), true);
+    const llvm::fltSemantics &Semantics =
+        getFloatSemantics(Sort->getFloatSortSize());
+    SMTSortRef BVSort = getBitvectorSort(Sort->getFloatSortSize());
+    if (!toAPSInt(BVSort, AST, Int, true)) {
+      return false;
+    }
+
+    if (useSemantics && !areEquivalent(Float.getSemantics(), Semantics)) {
+      assert(false && "Floating-point types don't match!");
+      return false;
+    }
+
+    Float = llvm::APFloat(Semantics, Int);
+    return true;
+  }
+
+  bool toAPSInt(const SMTSortRef &Sort, const SMTExprRef &AST,
+                llvm::APSInt &Int, bool useSemantics) {
+    if (Sort->isBitvectorSort()) {
+      if (useSemantics && Int.getBitWidth() != Sort->getBitvectorSortSize()) {
+        assert(false && "Bitvector types don't match!");
+        return false;
+      }
+
+      // FIXME: This function is also used to retrieve floating-point values,
+      // which can be 16, 32, 64 or 128 bits long. Bitvectors can be anything
+      // between 1 and 64 bits long, which is the reason we have this weird
+      // guard. In the future, we need proper calls in the backend to retrieve
+      // floating-points and its special values (NaN, +/-infinity, +/-zero),
+      // then we can drop this weird condition.
+      if (Sort->getBitvectorSortSize() <= 64 ||
+          Sort->getBitvectorSortSize() == 128) {
+        Int = getBitvector(AST, Int.getBitWidth(), Int.isUnsigned());
+        return true;
+      }
+
+      assert(false && "Bitwidth not supported!");
+      return false;
+    }
+
+    if (Sort->isBooleanSort()) {
+      if (useSemantics && Int.getBitWidth() < 1) {
+        assert(false && "Boolean type doesn't match!");
+        return false;
+      }
+
+      Int = llvm::APSInt(llvm::APInt(Int.getBitWidth(), getBoolean(AST)),
+                         Int.isUnsigned());
+      return true;
+    }
+
+    llvm_unreachable("Unsupported sort to integer!");
+  }
+
+  bool getInterpretation(const SMTExprRef &Exp, llvm::APSInt &Int) override {
+    Z3Model Model(Context, Z3_solver_get_model(Context.Context, Solver));
+    Z3_func_decl Func = Z3_get_app_decl(
+        Context.Context, Z3_to_app(Context.Context, toZ3Expr(*Exp).AST));
+    if (Z3_model_has_interp(Context.Context, Model.Model, Func) != Z3_L_TRUE)
+      return false;
+
+    SMTExprRef Assign = newExprRef(
+        Z3Expr(Context,
+               Z3_model_get_const_interp(Context.Context, Model.Model, Func)));
+    SMTSortRef Sort = getSort(Assign);
+    return toAPSInt(Sort, Assign, Int, true);
+  }
+
+  bool getInterpretation(const SMTExprRef &Exp, llvm::APFloat &Float) override {
+    Z3Model Model(Context, Z3_solver_get_model(Context.Context, Solver));
+    Z3_func_decl Func = Z3_get_app_decl(
+        Context.Context, Z3_to_app(Context.Context, toZ3Expr(*Exp).AST));
+    if (Z3_model_has_interp(Context.Context, Model.Model, Func) != Z3_L_TRUE)
+      return false;
+
+    SMTExprRef Assign = newExprRef(
+        Z3Expr(Context,
+               Z3_model_get_const_interp(Context.Context, Model.Model, Func)));
+    SMTSortRef Sort = getSort(Assign);
+    return toAPFloat(Sort, Assign, Float, true);
+  }
+
+  Optional<bool> check() const override {
+    Z3_lbool res = Z3_solver_check(Context.Context, Solver);
+    if (res == Z3_L_TRUE)
+      return true;
+
+    if (res == Z3_L_FALSE)
+      return false;
+
+    return Optional<bool>();
+  }
+
+  void push() override { return Z3_solver_push(Context.Context, Solver); }
+
+  void pop(unsigned NumStates = 1) override {
+    assert(Z3_solver_get_num_scopes(Context.Context, Solver) >= NumStates);
+    return Z3_solver_pop(Context.Context, Solver, NumStates);
+  }
+
+  bool isFPSupported() override { return true; }
+
+  /// Reset the solver and remove all constraints.
+  void reset() override { Z3_solver_reset(Context.Context, Solver); }
+
+  void print(raw_ostream &OS) const override {
+    OS << Z3_solver_to_string(Context.Context, Solver);
+  }
+}; // end class Z3Solver
+
+} // end anonymous namespace
+
+#endif
+
+llvm::SMTSolverRef llvm::CreateZ3Solver() {
+#if LLVM_WITH_Z3
+  return llvm::make_unique<Z3Solver>();
+#else
+  llvm::report_fatal_error("LLVM was not compiled with Z3 support, rebuild "
+                           "with -DLLVM_ENABLE_Z3_SOLVER=ON",
+                           false);
+  return nullptr;
+#endif
+}
+
+LLVM_DUMP_METHOD void SMTSort::dump() const { print(llvm::errs()); }
+LLVM_DUMP_METHOD void SMTExpr::dump() const { print(llvm::errs()); }
+LLVM_DUMP_METHOD void SMTSolver::dump() const { print(llvm::errs()); }
diff --git a/lib/Support/circular_raw_ostream.cpp b/lib/Support/circular_raw_ostream.cpp
index e768f17cd00d..acd230704ff8 100644
--- a/lib/Support/circular_raw_ostream.cpp
+++ b/lib/Support/circular_raw_ostream.cpp
@@ -1,9 +1,8 @@
 //===- circular_raw_ostream.cpp - Implement circular_raw_ostream ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/raw_os_ostream.cpp b/lib/Support/raw_os_ostream.cpp
index 44f2325d7f8a..81f0d739696e 100644
--- a/lib/Support/raw_os_ostream.cpp
+++ b/lib/Support/raw_os_ostream.cpp
@@ -1,9 +1,8 @@
 //===--- raw_os_ostream.cpp - Implement the raw_os_ostream class ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 21dde7ff914a..2baccaa0cbd7 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -1,9 +1,8 @@
 //===--- raw_ostream.cpp - Implement the raw_ostream classes --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -613,7 +612,7 @@ raw_fd_ostream::~raw_fd_ostream() {
   // destructing raw_ostream objects which may have errors.
   if (has_error())
     report_fatal_error("IO failure on output stream: " + error().message(),
-                       /*GenCrashDiag=*/false);
+                       /*gen_crash_diag=*/false);
 }
 
 #if defined(_WIN32)
diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp
index e6171c71efc0..7523b32ca0e5 100644
--- a/lib/TableGen/Error.cpp
+++ b/lib/TableGen/Error.cpp
@@ -1,9 +1,8 @@
 //===- Error.cpp - tblgen error handling helper routines --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/TableGen/JSONBackend.cpp b/lib/TableGen/JSONBackend.cpp
index 36cb2208a294..196644cda667 100644
--- a/lib/TableGen/JSONBackend.cpp
+++ b/lib/TableGen/JSONBackend.cpp
@@ -1,9 +1,8 @@
 //===- JSONBackend.cpp - Generate a JSON dump of all records. -*- C++ -*-=====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp
index 02698416609f..bcd39584e450 100644
--- a/lib/TableGen/Main.cpp
+++ b/lib/TableGen/Main.cpp
@@ -1,9 +1,8 @@
 //===- Main.cpp - Top-Level TableGen implementation -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index cf1685a2e8c2..27d1bdc7f4c3 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -1,9 +1,8 @@
 //===- Record.cpp - Record implementation ---------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,9 +15,11 @@
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
@@ -31,14 +32,20 @@
 #include <cassert>
 #include <cstdint>
 #include <memory>
+#include <map>
 #include <string>
 #include <utility>
 #include <vector>
 
 using namespace llvm;
 
+#define DEBUG_TYPE "tblgen-records"
+
 static BumpPtrAllocator Allocator;
 
+STATISTIC(CodeInitsConstructed,
+          "The total number of unique CodeInits constructed");
+
 //===----------------------------------------------------------------------===//
 //    Type implementations
 //===----------------------------------------------------------------------===//
@@ -451,7 +458,7 @@ Init *BitsInit::resolveReferences(Resolver &R) const {
 }
 
 IntInit *IntInit::get(int64_t V) {
-  static DenseMap<int64_t, IntInit*> ThePool;
+  static std::map<int64_t, IntInit*> ThePool;
 
   IntInit *&I = ThePool[V];
   if (!I) I = new(Allocator) IntInit(V);
@@ -507,13 +514,20 @@ IntInit::convertInitializerBitRange(ArrayRef<unsigned> Bits) const {
   return BitsInit::get(NewBits);
 }
 
-CodeInit *CodeInit::get(StringRef V) {
-  static StringMap<CodeInit*, BumpPtrAllocator &> ThePool(Allocator);
+CodeInit *CodeInit::get(StringRef V, const SMLoc &Loc) {
+  static StringSet<BumpPtrAllocator &> ThePool(Allocator);
 
-  auto &Entry = *ThePool.insert(std::make_pair(V, nullptr)).first;
-  if (!Entry.second)
-    Entry.second = new(Allocator) CodeInit(Entry.getKey());
-  return Entry.second;
+  CodeInitsConstructed++;
+
+  // Unlike StringMap, StringSet doesn't accept empty keys.
+  if (V.empty())
+    return new (Allocator) CodeInit("", Loc);
+
+  // Location tracking prevents us from de-duping CodeInits as we're never
+  // called with the same string and same location twice. However, we can at
+  // least de-dupe the strings for a modest saving.
+  auto &Entry = *ThePool.insert(V).first;
+  return new(Allocator) CodeInit(Entry.getKey(), Loc);
 }
 
 StringInit *StringInit::get(StringRef V) {
@@ -529,7 +543,7 @@ Init *StringInit::convertInitializerTo(RecTy *Ty) const {
   if (isa<StringRecTy>(Ty))
     return const_cast<StringInit *>(this);
   if (isa<CodeRecTy>(Ty))
-    return CodeInit::get(getValue());
+    return CodeInit::get(getValue(), SMLoc());
 
   return nullptr;
 }
@@ -843,6 +857,28 @@ Init *BinOpInit::getStrConcat(Init *I0, Init *I1) {
   return BinOpInit::get(BinOpInit::STRCONCAT, I0, I1, StringRecTy::get());
 }
 
+static ListInit *ConcatListInits(const ListInit *LHS,
+                                 const ListInit *RHS) {
+  SmallVector<Init *, 8> Args;
+  Args.insert(Args.end(), LHS->begin(), LHS->end());
+  Args.insert(Args.end(), RHS->begin(), RHS->end());
+  return ListInit::get(Args, LHS->getElementType());
+}
+
+Init *BinOpInit::getListConcat(TypedInit *LHS, Init *RHS) {
+  assert(isa<ListRecTy>(LHS->getType()) && "First arg must be a list");
+
+  // Shortcut for the common case of concatenating two lists.
+   if (const ListInit *LHSList = dyn_cast<ListInit>(LHS))
+     if (const ListInit *RHSList = dyn_cast<ListInit>(RHS))
+       return ConcatListInits(LHSList, RHSList);
+   return BinOpInit::get(BinOpInit::LISTCONCAT, LHS, RHS, LHS->getType());
+}
+
+Init *BinOpInit::getListSplat(TypedInit *LHS, Init *RHS) {
+  return BinOpInit::get(BinOpInit::LISTSPLAT, LHS, RHS, LHS->getType());
+}
+
 Init *BinOpInit::Fold(Record *CurRec) const {
   switch (getOpcode()) {
   case CONCAT: {
@@ -883,6 +919,15 @@ Init *BinOpInit::Fold(Record *CurRec) const {
     }
     break;
   }
+  case LISTSPLAT: {
+    TypedInit *Value = dyn_cast<TypedInit>(LHS);
+    IntInit *Size = dyn_cast<IntInit>(RHS);
+    if (Value && Size) {
+      SmallVector<Init *, 8> Args(Size->getValue(), Value);
+      return ListInit::get(Args, Value->getType());
+    }
+    break;
+  }
   case STRCONCAT: {
     StringInit *LHSs = dyn_cast<StringInit>(LHS);
     StringInit *RHSs = dyn_cast<StringInit>(RHS);
@@ -931,6 +976,7 @@ Init *BinOpInit::Fold(Record *CurRec) const {
     break;
   }
   case ADD:
+  case MUL:
   case AND:
   case OR:
   case SHL:
@@ -946,6 +992,7 @@ Init *BinOpInit::Fold(Record *CurRec) const {
       switch (getOpcode()) {
       default: llvm_unreachable("Bad opcode!");
       case ADD: Result = LHSv +  RHSv; break;
+      case MUL: Result = LHSv *  RHSv; break;
       case AND: Result = LHSv &  RHSv; break;
       case OR: Result = LHSv | RHSv; break;
       case SHL: Result = LHSv << RHSv; break;
@@ -975,6 +1022,7 @@ std::string BinOpInit::getAsString() const {
   switch (getOpcode()) {
   case CONCAT: Result = "!con"; break;
   case ADD: Result = "!add"; break;
+  case MUL: Result = "!mul"; break;
   case AND: Result = "!and"; break;
   case OR: Result = "!or"; break;
   case SHL: Result = "!shl"; break;
@@ -987,6 +1035,7 @@ std::string BinOpInit::getAsString() const {
   case GE: Result = "!ge"; break;
   case GT: Result = "!gt"; break;
   case LISTCONCAT: Result = "!listconcat"; break;
+  case LISTSPLAT: Result = "!listsplat"; break;
   case STRCONCAT: Result = "!strconcat"; break;
   }
   return Result + "(" + LHS->getAsString() + ", " + RHS->getAsString() + ")";
@@ -1694,6 +1743,137 @@ Init *FieldInit::Fold(Record *CurRec) const {
   return const_cast<FieldInit *>(this);
 }
 
+static void ProfileCondOpInit(FoldingSetNodeID &ID,
+                             ArrayRef<Init *> CondRange,
+                             ArrayRef<Init *> ValRange,
+                             const RecTy *ValType) {
+  assert(CondRange.size() == ValRange.size() &&
+         "Number of conditions and values must match!");
+  ID.AddPointer(ValType);
+  ArrayRef<Init *>::iterator Case = CondRange.begin();
+  ArrayRef<Init *>::iterator Val = ValRange.begin();
+
+  while (Case != CondRange.end()) {
+    ID.AddPointer(*Case++);
+    ID.AddPointer(*Val++);
+  }
+}
+
+void CondOpInit::Profile(FoldingSetNodeID &ID) const {
+  ProfileCondOpInit(ID,
+      makeArrayRef(getTrailingObjects<Init *>(), NumConds),
+      makeArrayRef(getTrailingObjects<Init *>() + NumConds, NumConds),
+      ValType);
+}
+
+CondOpInit *
+CondOpInit::get(ArrayRef<Init *> CondRange,
+                ArrayRef<Init *> ValRange, RecTy *Ty) {
+  assert(CondRange.size() == ValRange.size() &&
+         "Number of conditions and values must match!");
+
+  static FoldingSet<CondOpInit> ThePool;
+  FoldingSetNodeID ID;
+  ProfileCondOpInit(ID, CondRange, ValRange, Ty);
+
+  void *IP = nullptr;
+  if (CondOpInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+    return I;
+
+  void *Mem = Allocator.Allocate(totalSizeToAlloc<Init *>(2*CondRange.size()),
+                                 alignof(BitsInit));
+  CondOpInit *I = new(Mem) CondOpInit(CondRange.size(), Ty);
+
+  std::uninitialized_copy(CondRange.begin(), CondRange.end(),
+                          I->getTrailingObjects<Init *>());
+  std::uninitialized_copy(ValRange.begin(), ValRange.end(),
+                          I->getTrailingObjects<Init *>()+CondRange.size());
+  ThePool.InsertNode(I, IP);
+  return I;
+}
+
+Init *CondOpInit::resolveReferences(Resolver &R) const {
+  SmallVector<Init*, 4> NewConds;
+  bool Changed = false;
+  for (const Init *Case : getConds()) {
+    Init *NewCase = Case->resolveReferences(R);
+    NewConds.push_back(NewCase);
+    Changed |= NewCase != Case;
+  }
+
+  SmallVector<Init*, 4> NewVals;
+  for (const Init *Val : getVals()) {
+    Init *NewVal = Val->resolveReferences(R);
+    NewVals.push_back(NewVal);
+    Changed |= NewVal != Val;
+  }
+
+  if (Changed)
+    return (CondOpInit::get(NewConds, NewVals,
+            getValType()))->Fold(R.getCurrentRecord());
+
+  return const_cast<CondOpInit *>(this);
+}
+
+Init *CondOpInit::Fold(Record *CurRec) const {
+  for ( unsigned i = 0; i < NumConds; ++i) {
+    Init *Cond = getCond(i);
+    Init *Val = getVal(i);
+
+    if (IntInit *CondI = dyn_cast_or_null<IntInit>(
+            Cond->convertInitializerTo(IntRecTy::get()))) {
+      if (CondI->getValue())
+        return Val->convertInitializerTo(getValType());
+    } else
+     return const_cast<CondOpInit *>(this);
+  }
+
+  PrintFatalError(CurRec->getLoc(),
+                  CurRec->getName() +
+                  " does not have any true condition in:" +
+                  this->getAsString());
+  return nullptr;
+}
+
+bool CondOpInit::isConcrete() const {
+  for (const Init *Case : getConds())
+    if (!Case->isConcrete())
+      return false;
+
+  for (const Init *Val : getVals())
+    if (!Val->isConcrete())
+      return false;
+
+  return true;
+}
+
+bool CondOpInit::isComplete() const {
+  for (const Init *Case : getConds())
+    if (!Case->isComplete())
+      return false;
+
+  for (const Init *Val : getVals())
+    if (!Val->isConcrete())
+      return false;
+
+  return true;
+}
+
+std::string CondOpInit::getAsString() const {
+  std::string Result = "!cond(";
+  for (unsigned i = 0; i < getNumConds(); i++) {
+    Result += getCond(i)->getAsString() + ": ";
+    Result += getVal(i)->getAsString();
+    if (i != getNumConds()-1)
+      Result += ", ";
+  }
+  return Result + ")";
+}
+
+Init *CondOpInit::getBit(unsigned Bit) const {
+  return VarBitInit::get(const_cast<CondOpInit *>(this), Bit);
+}
+
 static void ProfileDagInit(FoldingSetNodeID &ID, Init *V, StringInit *VN,
                            ArrayRef<Init *> ArgRange,
                            ArrayRef<StringInit *> NameRange) {
diff --git a/lib/TableGen/SetTheory.cpp b/lib/TableGen/SetTheory.cpp
index 733e0aeef623..a870e41d58f8 100644
--- a/lib/TableGen/SetTheory.cpp
+++ b/lib/TableGen/SetTheory.cpp
@@ -1,9 +1,8 @@
 //===- SetTheory.cpp - Generate ordered sets from DAG expressions ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/TableGen/StringMatcher.cpp b/lib/TableGen/StringMatcher.cpp
index 2c4d1f33997d..2fca068893f3 100644
--- a/lib/TableGen/StringMatcher.cpp
+++ b/lib/TableGen/StringMatcher.cpp
@@ -1,9 +1,8 @@
 //===- StringMatcher.cpp - Generate a matcher for input strings -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
index 16aeee561075..d28c62b3133d 100644
--- a/lib/TableGen/TGLexer.cpp
+++ b/lib/TableGen/TGLexer.cpp
@@ -1,9 +1,8 @@
 //===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -37,6 +36,7 @@ struct {
   const char *Word;
 } PreprocessorDirs[] = {
   { tgtok::Ifdef, "ifdef" },
+  { tgtok::Ifndef, "ifndef" },
   { tgtok::Else, "else" },
   { tgtok::Endif, "endif" },
   { tgtok::Define, "define" }
@@ -545,6 +545,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
     .Case("ge", tgtok::XGe)
     .Case("gt", tgtok::XGt)
     .Case("if", tgtok::XIf)
+    .Case("cond", tgtok::XCond)
     .Case("isa", tgtok::XIsA)
     .Case("head", tgtok::XHead)
     .Case("tail", tgtok::XTail)
@@ -552,6 +553,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
     .Case("con", tgtok::XConcat)
     .Case("dag", tgtok::XDag)
     .Case("add", tgtok::XADD)
+    .Case("mul", tgtok::XMUL)
     .Case("and", tgtok::XAND)
     .Case("or", tgtok::XOR)
     .Case("shl", tgtok::XSHL)
@@ -563,6 +565,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
     .Case("foldl", tgtok::XFoldl)
     .Case("foreach", tgtok::XForEach)
     .Case("listconcat", tgtok::XListConcat)
+    .Case("listsplat", tgtok::XListSplat)
     .Case("strconcat", tgtok::XStrConcat)
     .Default(tgtok::Error);
 
@@ -674,21 +677,28 @@ tgtok::TokKind TGLexer::lexPreprocessor(
     PrintFatalError("lexPreprocessor() called for unknown "
                     "preprocessor directive");
 
-  if (Kind == tgtok::Ifdef) {
+  if (Kind == tgtok::Ifdef || Kind == tgtok::Ifndef) {
     StringRef MacroName = prepLexMacroName();
+    StringRef IfTokName = Kind == tgtok::Ifdef ? "#ifdef" : "#ifndef";
     if (MacroName.empty())
-      return ReturnError(TokStart, "Expected macro name after #ifdef");
+      return ReturnError(TokStart, "Expected macro name after " + IfTokName);
 
     bool MacroIsDefined = DefinedMacros.count(MacroName) != 0;
 
+    // Canonicalize ifndef to ifdef equivalent
+    if (Kind == tgtok::Ifndef) {
+      MacroIsDefined = !MacroIsDefined;
+      Kind = tgtok::Ifdef;
+    }
+
     // Regardless of whether we are processing tokens or not,
     // we put the #ifdef control on stack.
     PrepIncludeStack.back()->push_back(
         {Kind, MacroIsDefined, SMLoc::getFromPointer(TokStart)});
 
     if (!prepSkipDirectiveEnd())
-      return ReturnError(CurPtr,
-                         "Only comments are supported after #ifdef NAME");
+      return ReturnError(CurPtr, "Only comments are supported after " +
+                                     IfTokName + " NAME");
 
     // If we were not processing tokens before this #ifdef,
     // then just return back to the lines skipping code.
@@ -712,7 +722,7 @@ tgtok::TokKind TGLexer::lexPreprocessor(
     // Check if this #else is correct before calling prepSkipDirectiveEnd(),
     // which will move CurPtr away from the beginning of #else.
     if (PrepIncludeStack.back()->empty())
-      return ReturnError(TokStart, "#else without #ifdef");
+      return ReturnError(TokStart, "#else without #ifdef or #ifndef");
 
     PreprocessorControlDesc IfdefEntry = PrepIncludeStack.back()->back();
 
diff --git a/lib/TableGen/TGLexer.h b/lib/TableGen/TGLexer.h
index e9980b36b97b..3085ab2c0478 100644
--- a/lib/TableGen/TGLexer.h
+++ b/lib/TableGen/TGLexer.h
@@ -1,9 +1,8 @@
 //===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -50,9 +49,9 @@ namespace tgtok {
     MultiClass, String, Defset,
 
     // !keywords.
-    XConcat, XADD, XAND, XOR, XSRA, XSRL, XSHL, XListConcat, XStrConcat, XCast,
-    XSubst, XForEach, XFoldl, XHead, XTail, XSize, XEmpty, XIf, XEq, XIsA, XDag,
-    XNe, XLe, XLt, XGe, XGt,
+    XConcat, XADD, XMUL, XAND, XOR, XSRA, XSRL, XSHL, XListConcat, XListSplat,
+    XStrConcat, XCast, XSubst, XForEach, XFoldl, XHead, XTail, XSize, XEmpty,
+    XIf, XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt,
 
     // Integer value.
     IntVal,
@@ -66,7 +65,7 @@ namespace tgtok {
 
     // Preprocessing tokens for internal usage by the lexer.
     // They are never returned as a result of Lex().
-    Ifdef, Else, Endif, Define
+    Ifdef, Ifndef, Else, Endif, Define
   };
 }
 
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index 1d1f3603c83c..a9ace152d59e 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -1,9 +1,8 @@
 //===- TGParser.cpp - Parser for TableGen Files ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -537,8 +536,14 @@ Record *TGParser::ParseClassID() {
   }
 
   Record *Result = Records.getClass(Lex.getCurStrVal());
-  if (!Result)
-    TokError("Couldn't find class '" + Lex.getCurStrVal() + "'");
+  if (!Result) {
+    std::string Msg("Couldn't find class '" + Lex.getCurStrVal() + "'");
+    if (MultiClasses[Lex.getCurStrVal()].get())
+      TokError(Msg + ". Use 'defm' if you meant to use multiclass '" +
+               Lex.getCurStrVal() + "'");
+    else
+      TokError(Msg);
+  }
 
   Lex.Lex();
   return Result;
@@ -661,35 +666,47 @@ ParseSubMultiClassReference(MultiClass *CurMC) {
 ///   RangePiece ::= INTVAL
 ///   RangePiece ::= INTVAL '-' INTVAL
 ///   RangePiece ::= INTVAL INTVAL
-bool TGParser::ParseRangePiece(SmallVectorImpl<unsigned> &Ranges) {
-  if (Lex.getCode() != tgtok::IntVal) {
-    TokError("expected integer or bitrange");
-    return true;
-  }
-  int64_t Start = Lex.getCurIntVal();
+bool TGParser::ParseRangePiece(SmallVectorImpl<unsigned> &Ranges,
+                               TypedInit *FirstItem) {
+  Init *CurVal = FirstItem;
+  if (!CurVal)
+    CurVal = ParseValue(nullptr);
+
+  IntInit *II = dyn_cast_or_null<IntInit>(CurVal);
+  if (!II)
+    return TokError("expected integer or bitrange");
+
+  int64_t Start = II->getValue();
   int64_t End;
 
   if (Start < 0)
     return TokError("invalid range, cannot be negative");
 
-  switch (Lex.Lex()) {  // eat first character.
+  switch (Lex.getCode()) {
   default:
     Ranges.push_back(Start);
     return false;
-  case tgtok::minus:
-    if (Lex.Lex() != tgtok::IntVal) {
+  case tgtok::minus: {
+    Lex.Lex(); // eat
+
+    Init *I_End = ParseValue(nullptr);
+    IntInit *II_End = dyn_cast_or_null<IntInit>(I_End);
+    if (!II_End) {
       TokError("expected integer value as end of range");
       return true;
     }
-    End = Lex.getCurIntVal();
+
+    End = II_End->getValue();
     break;
-  case tgtok::IntVal:
+  }
+  case tgtok::IntVal: {
     End = -Lex.getCurIntVal();
+    Lex.Lex();
     break;
   }
+  }
   if (End < 0)
     return TokError("invalid range, cannot be negative");
-  Lex.Lex();
 
   // Add to the range.
   if (Start < End)
@@ -1024,6 +1041,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
 
   case tgtok::XConcat:
   case tgtok::XADD:
+  case tgtok::XMUL:
   case tgtok::XAND:
   case tgtok::XOR:
   case tgtok::XSRA:
@@ -1036,6 +1054,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
   case tgtok::XGe:
   case tgtok::XGt:
   case tgtok::XListConcat:
+  case tgtok::XListSplat:
   case tgtok::XStrConcat: {  // Value ::= !binop '(' Value ',' Value ')'
     tgtok::TokKind OpTok = Lex.getCode();
     SMLoc OpLoc = Lex.getLoc();
@@ -1046,6 +1065,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
     default: llvm_unreachable("Unhandled code!");
     case tgtok::XConcat: Code = BinOpInit::CONCAT; break;
     case tgtok::XADD:    Code = BinOpInit::ADD; break;
+    case tgtok::XMUL:    Code = BinOpInit::MUL; break;
     case tgtok::XAND:    Code = BinOpInit::AND; break;
     case tgtok::XOR:     Code = BinOpInit::OR; break;
     case tgtok::XSRA:    Code = BinOpInit::SRA; break;
@@ -1058,6 +1078,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
     case tgtok::XGe:     Code = BinOpInit::GE; break;
     case tgtok::XGt:     Code = BinOpInit::GT; break;
     case tgtok::XListConcat: Code = BinOpInit::LISTCONCAT; break;
+    case tgtok::XListSplat: Code = BinOpInit::LISTSPLAT; break;
     case tgtok::XStrConcat: Code = BinOpInit::STRCONCAT; break;
     }
 
@@ -1076,6 +1097,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
     case tgtok::XSRL:
     case tgtok::XSHL:
     case tgtok::XADD:
+    case tgtok::XMUL:
       Type = IntRecTy::get();
       ArgType = IntRecTy::get();
       break;
@@ -1095,6 +1117,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
       // We don't know the list type until we parse the first argument
       ArgType = ItemType;
       break;
+    case tgtok::XListSplat:
+      // Can't do any typechecking until we parse the first argument.
+      break;
     case tgtok::XStrConcat:
       Type = StringRecTy::get();
       ArgType = StringRecTy::get();
@@ -1134,6 +1159,33 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
             return nullptr;
           }
           break;
+        case BinOpInit::LISTSPLAT:
+          if (ItemType && InitList.size() == 1) {
+            if (!isa<ListRecTy>(ItemType)) {
+              Error(OpLoc,
+                    Twine("expected output type to be a list, got type '") +
+                        ItemType->getAsString() + "'");
+              return nullptr;
+            }
+            if (!ArgType->getListTy()->typeIsConvertibleTo(ItemType)) {
+              Error(OpLoc, Twine("expected first arg type to be '") +
+                               ArgType->getAsString() +
+                               "', got value of type '" +
+                               cast<ListRecTy>(ItemType)
+                                   ->getElementType()
+                                   ->getAsString() +
+                               "'");
+              return nullptr;
+            }
+          }
+          if (InitList.size() == 2 && !isa<IntRecTy>(ArgType)) {
+            Error(InitLoc, Twine("expected second parameter to be an int, got "
+                                 "value of type '") +
+                               ArgType->getAsString() + "'");
+            return nullptr;
+          }
+          ArgType = nullptr; // Broken invariant: types not identical.
+          break;
         case BinOpInit::EQ:
         case BinOpInit::NE:
           if (!ArgType->typeIsConvertibleTo(IntRecTy::get()) &&
@@ -1155,7 +1207,8 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
         }
         if (Code != BinOpInit::ADD && Code != BinOpInit::AND &&
             Code != BinOpInit::OR && Code != BinOpInit::SRA &&
-            Code != BinOpInit::SRL && Code != BinOpInit::SHL)
+            Code != BinOpInit::SRL && Code != BinOpInit::SHL &&
+            Code != BinOpInit::MUL)
           ArgType = Resolved;
       }
 
@@ -1170,14 +1223,19 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
     }
     Lex.Lex();  // eat the ')'
 
+    // listconcat returns a list with type of the argument.
     if (Code == BinOpInit::LISTCONCAT)
       Type = ArgType;
+    // listsplat returns a list of type of the *first* argument.
+    if (Code == BinOpInit::LISTSPLAT)
+      Type = cast<TypedInit>(InitList.front())->getType()->getListTy();
 
     // We allow multiple operands to associative operators like !strconcat as
     // shorthand for nesting them.
     if (Code == BinOpInit::STRCONCAT || Code == BinOpInit::LISTCONCAT ||
         Code == BinOpInit::CONCAT || Code == BinOpInit::ADD ||
-        Code == BinOpInit::AND || Code == BinOpInit::OR) {
+        Code == BinOpInit::AND || Code == BinOpInit::OR ||
+        Code == BinOpInit::MUL) {
       while (InitList.size() > 2) {
         Init *RHS = InitList.pop_back_val();
         RHS = (BinOpInit::get(Code, InitList.back(), RHS, Type))->Fold(CurRec);
@@ -1445,6 +1503,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
     return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec);
   }
 
+  case tgtok::XCond:
+    return ParseOperationCond(CurRec, ItemType);
+
   case tgtok::XFoldl: {
     // Value ::= !foldl '(' Id ',' Id ',' Value ',' Value ',' Value ')'
     Lex.Lex(); // eat the operation
@@ -1603,6 +1664,91 @@ RecTy *TGParser::ParseOperatorType() {
   return Type;
 }
 
+Init *TGParser::ParseOperationCond(Record *CurRec, RecTy *ItemType) {
+  Lex.Lex();  // eat the operation 'cond'
+
+  if (Lex.getCode() != tgtok::l_paren) {
+     TokError("expected '(' after !cond operator");
+     return nullptr;
+  }
+  Lex.Lex();  // eat the '('
+
+  // Parse through '[Case: Val,]+'
+  SmallVector<Init *, 4> Case;
+  SmallVector<Init *, 4> Val;
+  while (true) {
+    if (Lex.getCode() == tgtok::r_paren) {
+      Lex.Lex(); // eat the ')'
+      break;
+    }
+
+    Init *V = ParseValue(CurRec);
+    if (!V)
+      return nullptr;
+    Case.push_back(V);
+
+    if (Lex.getCode() != tgtok::colon) {
+      TokError("expected ':'  following a condition in !cond operator");
+      return nullptr;
+    }
+    Lex.Lex(); // eat the ':'
+
+    V = ParseValue(CurRec, ItemType);
+    if (!V)
+      return nullptr;
+    Val.push_back(V);
+
+    if (Lex.getCode() == tgtok::r_paren) {
+      Lex.Lex(); // eat the ')'
+      break;
+    }
+
+    if (Lex.getCode() != tgtok::comma) {
+      TokError("expected ',' or ')' following a value in !cond operator");
+      return nullptr;
+    }
+    Lex.Lex();  // eat the ','
+  }
+
+  if (Case.size() < 1) {
+    TokError("there should be at least 1 'condition : value' in the !cond operator");
+    return nullptr;
+  }
+
+  // resolve type
+  RecTy *Type = nullptr;
+  for (Init *V : Val) {
+    RecTy *VTy = nullptr;
+    if (TypedInit *Vt = dyn_cast<TypedInit>(V))
+      VTy = Vt->getType();
+    if (BitsInit *Vbits = dyn_cast<BitsInit>(V))
+      VTy = BitsRecTy::get(Vbits->getNumBits());
+    if (isa<BitInit>(V))
+      VTy = BitRecTy::get();
+
+    if (Type == nullptr) {
+      if (!isa<UnsetInit>(V))
+        Type = VTy;
+    } else {
+      if (!isa<UnsetInit>(V)) {
+        RecTy *RType = resolveTypes(Type, VTy);
+        if (!RType) {
+          TokError(Twine("inconsistent types '") + Type->getAsString() +
+                         "' and '" + VTy->getAsString() + "' for !cond");
+          return nullptr;
+        }
+        Type = RType;
+      }
+    }
+  }
+
+  if (!Type) {
+    TokError("could not determine type for !cond from its arguments");
+    return nullptr;
+  }
+  return CondOpInit::get(Case, Val, Type)->Fold(CurRec);
+}
+
 /// ParseSimpleValue - Parse a tblgen value.  This returns null on error.
 ///
 ///   SimpleValue ::= IDValue
@@ -1620,7 +1766,9 @@ RecTy *TGParser::ParseOperatorType() {
 ///   SimpleValue ::= SRATOK '(' Value ',' Value ')'
 ///   SimpleValue ::= SRLTOK '(' Value ',' Value ')'
 ///   SimpleValue ::= LISTCONCATTOK '(' Value ',' Value ')'
+///   SimpleValue ::= LISTSPLATTOK '(' Value ',' Value ')'
 ///   SimpleValue ::= STRCONCATTOK '(' Value ',' Value ')'
+///   SimpleValue ::= COND '(' [Value ':' Value,]+ ')'
 ///
 Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
                                  IDParseMode Mode) {
@@ -1656,7 +1804,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
     break;
   }
   case tgtok::CodeFragment:
-    R = CodeInit::get(Lex.getCurStrVal());
+    R = CodeInit::get(Lex.getCurStrVal(), Lex.getLoc());
     Lex.Lex();
     break;
   case tgtok::question:
@@ -1919,6 +2067,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
   case tgtok::XConcat:
   case tgtok::XDag:
   case tgtok::XADD:
+  case tgtok::XMUL:
   case tgtok::XAND:
   case tgtok::XOR:
   case tgtok::XSRA:
@@ -1931,8 +2080,10 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
   case tgtok::XGe:
   case tgtok::XGt:
   case tgtok::XListConcat:
+  case tgtok::XListSplat:
   case tgtok::XStrConcat:   // Value ::= !binop '(' Value ',' Value ')'
   case tgtok::XIf:
+  case tgtok::XCond:
   case tgtok::XFoldl:
   case tgtok::XForEach:
   case tgtok::XSubst: {  // Value ::= !ternop '(' Value ',' Value ',' Value ')'
@@ -2024,25 +2175,41 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
 
     case tgtok::paste:
       SMLoc PasteLoc = Lex.getLoc();
-
-      // Create a !strconcat() operation, first casting each operand to
-      // a string if necessary.
-
       TypedInit *LHS = dyn_cast<TypedInit>(Result);
       if (!LHS) {
         Error(PasteLoc, "LHS of paste is not typed!");
         return nullptr;
       }
 
+      // Check if it's a 'listA # listB'
+      if (isa<ListRecTy>(LHS->getType())) {
+        Lex.Lex();  // Eat the '#'.
+
+        switch (Lex.getCode()) {
+        case tgtok::colon:
+        case tgtok::semi:
+        case tgtok::l_brace:
+          Result = LHS; // trailing paste, ignore.
+          break;
+        default:
+          Init *RHSResult = ParseValue(CurRec, ItemType, ParseNameMode);
+          Result = BinOpInit::getListConcat(LHS, RHSResult);
+        }
+        break;
+      }
+
+      // Create a !strconcat() operation, first casting each operand to
+      // a string if necessary.
       if (LHS->getType() != StringRecTy::get()) {
-        LHS = dyn_cast<TypedInit>(
+        auto CastLHS = dyn_cast<TypedInit>(
             UnOpInit::get(UnOpInit::CAST, LHS, StringRecTy::get())
                 ->Fold(CurRec));
-        if (!LHS) {
-          Error(PasteLoc, Twine("can't cast '") + LHS->getAsString() +
-                              "' to string");
+        if (!CastLHS) {
+          Error(PasteLoc,
+                Twine("can't cast '") + LHS->getAsString() + "' to string");
           return nullptr;
         }
+        LHS = CastLHS;
       }
 
       TypedInit *RHS = nullptr;
@@ -2069,14 +2236,15 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
         }
 
         if (RHS->getType() != StringRecTy::get()) {
-          RHS = dyn_cast<TypedInit>(
+          auto CastRHS = dyn_cast<TypedInit>(
               UnOpInit::get(UnOpInit::CAST, RHS, StringRecTy::get())
                   ->Fold(CurRec));
-          if (!RHS) {
-            Error(PasteLoc, Twine("can't cast '") + RHS->getAsString() +
-                                "' to string");
+          if (!CastRHS) {
+            Error(PasteLoc,
+                  Twine("can't cast '") + RHS->getAsString() + "' to string");
             return nullptr;
           }
+          RHS = CastRHS;
         }
 
         break;
@@ -2167,6 +2335,10 @@ void TGParser::ParseValueList(SmallVectorImpl<Init*> &Result, Record *CurRec,
   while (Lex.getCode() == tgtok::comma) {
     Lex.Lex();  // Eat the comma
 
+    // ignore trailing comma for lists
+    if (Lex.getCode() == tgtok::r_square)
+      return;
+
     if (ArgsRec && !EltTy) {
       ArrayRef<Init *> TArgs = ArgsRec->getTemplateArgs();
       if (ArgN >= TArgs.size()) {
@@ -2279,12 +2451,6 @@ VarInit *TGParser::ParseForeachDeclaration(Init *&ForeachListValue) {
   SmallVector<unsigned, 16> Ranges;
 
   switch (Lex.getCode()) {
-  case tgtok::IntVal: { // RangePiece.
-    if (ParseRangePiece(Ranges))
-      return nullptr;
-    break;
-  }
-
   case tgtok::l_brace: { // '{' RangeList '}'
     Lex.Lex(); // eat the '{'
     ParseRangeList(Ranges);
@@ -2299,23 +2465,35 @@ VarInit *TGParser::ParseForeachDeclaration(Init *&ForeachListValue) {
   default: {
     SMLoc ValueLoc = Lex.getLoc();
     Init *I = ParseValue(nullptr);
-    TypedInit *TI = dyn_cast<TypedInit>(I);
-    if (!TI || !isa<ListRecTy>(TI->getType())) {
-      std::string Type;
-      if (TI)
-        Type = (Twine("' of type '") + TI->getType()->getAsString()).str();
-      Error(ValueLoc, "expected a list, got '" + I->getAsString() + Type + "'");
-      if (CurMultiClass)
-        PrintNote({}, "references to multiclass template arguments cannot be "
-                      "resolved at this time");
+    if (!I)
       return nullptr;
+
+    TypedInit *TI = dyn_cast<TypedInit>(I);
+    if (TI && isa<ListRecTy>(TI->getType())) {
+      ForeachListValue = I;
+      IterType = cast<ListRecTy>(TI->getType())->getElementType();
+      break;
     }
-    ForeachListValue = I;
-    IterType = cast<ListRecTy>(TI->getType())->getElementType();
-    break;
+
+    if (TI) {
+      if (ParseRangePiece(Ranges, TI))
+        return nullptr;
+      break;
+    }
+
+    std::string Type;
+    if (TI)
+      Type = (Twine("' of type '") + TI->getType()->getAsString()).str();
+    Error(ValueLoc, "expected a list, got '" + I->getAsString() + Type + "'");
+    if (CurMultiClass) {
+      PrintNote({}, "references to multiclass template arguments cannot be "
+                "resolved at this time");
+    }
+    return nullptr;
   }
   }
 
+
   if (!Ranges.empty()) {
     assert(!IterType && "Type already initialized?");
     IterType = IntRecTy::get();
diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h
index e3849043513b..af2b639f8d59 100644
--- a/lib/TableGen/TGParser.h
+++ b/lib/TableGen/TGParser.h
@@ -1,9 +1,8 @@
 //===- TGParser.h - Parser for TableGen Files -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -191,9 +190,11 @@ private:  // Parser methods.
   bool ParseOptionalRangeList(SmallVectorImpl<unsigned> &Ranges);
   bool ParseOptionalBitList(SmallVectorImpl<unsigned> &Ranges);
   void ParseRangeList(SmallVectorImpl<unsigned> &Result);
-  bool ParseRangePiece(SmallVectorImpl<unsigned> &Ranges);
+  bool ParseRangePiece(SmallVectorImpl<unsigned> &Ranges,
+                       TypedInit *FirstItem = nullptr);
   RecTy *ParseType();
   Init *ParseOperation(Record *CurRec, RecTy *ItemType);
+  Init *ParseOperationCond(Record *CurRec, RecTy *ItemType);
   RecTy *ParseOperatorType();
   Init *ParseObjectName(MultiClass *CurMultiClass);
   Record *ParseClassID();
diff --git a/lib/TableGen/TableGenBackend.cpp b/lib/TableGen/TableGenBackend.cpp
index 77ed8414b15f..e11b28e8cff9 100644
--- a/lib/TableGen/TableGenBackend.cpp
+++ b/lib/TableGen/TableGenBackend.cpp
@@ -1,9 +1,8 @@
 //===- TableGenBackend.cpp - Utilities for TableGen Backends ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h
index c36d9354f3ba..6965403a25ab 100644
--- a/lib/Target/AArch64/AArch64.h
+++ b/lib/Target/AArch64/AArch64.h
@@ -1,9 +1,8 @@
 //==-- AArch64.h - Top-level interface for AArch64  --------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -57,6 +56,7 @@ InstructionSelector *
 createAArch64InstructionSelector(const AArch64TargetMachine &,
                                  AArch64Subtarget &, AArch64RegisterBankInfo &);
 FunctionPass *createAArch64PreLegalizeCombiner();
+FunctionPass *createAArch64StackTaggingPass();
 
 void initializeAArch64A53Fix835769Pass(PassRegistry&);
 void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
@@ -79,6 +79,7 @@ void initializeAArch64StorePairSuppressPass(PassRegistry&);
 void initializeFalkorHWPFFixPass(PassRegistry&);
 void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&);
 void initializeLDTLSCleanupPass(PassRegistry&);
+void initializeAArch64StackTaggingPass(PassRegistry&);
 } // end namespace llvm
 
 #endif
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index 8f79140cba64..e39c6995e367 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -1,9 +1,8 @@
 //=- AArch64.td - Describe the AArch64 Target Machine --------*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -104,6 +103,21 @@ def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP",
 def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
   "Enable Scalable Vector Extension (SVE) instructions">;
 
+def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true",
+  "Enable Scalable Vector Extension 2 (SVE2) instructions", [FeatureSVE]>;
+
+def FeatureSVE2AES : SubtargetFeature<"sve2-aes", "HasSVE2AES", "true",
+  "Enable AES SVE2 instructions", [FeatureSVE2, FeatureAES]>;
+
+def FeatureSVE2SM4 : SubtargetFeature<"sve2-sm4", "HasSVE2SM4", "true",
+  "Enable SM4 SVE2 instructions", [FeatureSVE2, FeatureSM4]>;
+
+def FeatureSVE2SHA3 : SubtargetFeature<"sve2-sha3", "HasSVE2SHA3", "true",
+  "Enable SHA3 SVE2 instructions", [FeatureSVE2, FeatureSHA3]>;
+
+def FeatureSVE2BitPerm : SubtargetFeature<"bitperm", "HasSVE2BitPerm", "true",
+  "Enable bit permutation SVE2 instructions", [FeatureSVE2]>;
+
 def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
                                         "Has zero-cycle register moves">;
 def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true",
@@ -127,7 +141,7 @@ def FeatureStrictAlign : SubtargetFeature<"strict-align",
                                           "Disallow all unaligned memory "
                                           "access">;
 
-foreach i = {1-7,18,20} in
+foreach i = {1-7,9-15,18,20-28} in
     def FeatureReserveX#i : SubtargetFeature<"reserve-x"#i, "ReserveXRegister["#i#"]", "true",
                                              "Reserve X"#i#", making it unavailable "
                                              "as a GPR">;
@@ -385,9 +399,29 @@ def AArch64InstrInfo : InstrInfo;
 
 include "AArch64SystemOperands.td"
 
+//===----------------------------------------------------------------------===//
+// Access to privileged registers
+//===----------------------------------------------------------------------===//
+
+foreach i = 1-3 in
+def FeatureUseEL#i#ForTP : SubtargetFeature<"tpidr-el"#i, "UseEL"#i#"ForTP",
+  "true", "Permit use of TPIDR_EL"#i#" for the TLS base">;
+
 //===----------------------------------------------------------------------===//
 // AArch64 Processors supported.
 //
+
+//===----------------------------------------------------------------------===//
+// Unsupported features to disable for scheduling models
+//===----------------------------------------------------------------------===//
+
+class AArch64Unsupported { list<Predicate> F; }
+
+def SVEUnsupported : AArch64Unsupported {
+  let F = [HasSVE, HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3,
+           HasSVE2BitPerm];
+}
+
 include "AArch64SchedA53.td"
 include "AArch64SchedA57.td"
 include "AArch64SchedCyclone.td"
@@ -483,6 +517,18 @@ def ProcA75     : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75",
                                    FeaturePerfMon
                                    ]>;
 
+def ProcA76     : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
+                                   "Cortex-A76 ARM processors", [
+                                    HasV8_2aOps,
+                                    FeatureFPARMv8,
+                                    FeatureNEON,
+                                    FeatureRCPC,
+                                    FeatureCrypto,
+                                    FeatureFullFP16,
+                                    FeatureDotProd,
+                                    FeatureSSBS
+                                    ]>;
+
 // Note that cyclone does not fuse AES instructions, but newer apple chips do
 // perform the fusion and cyclone is used by default when targetting apple OSes.
 def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
@@ -554,7 +600,7 @@ def ProcExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3",
                                      FeatureDotProd,
                                      FeatureExynosCheapAsMoveHandling,
                                      FeatureForce32BitJumpTables,
-                                     FeatureFP16FML,
+                                     FeatureFullFP16,
                                      FeatureFuseAddress,
                                      FeatureFuseAES,
                                      FeatureFuseArithmeticLogic,
@@ -694,15 +740,17 @@ def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
 def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
 def : ProcessorModel<"cortex-a55", CortexA53Model, [ProcA55]>;
 def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
-// FIXME: Cortex-A72, Cortex-A73 and Cortex-A75 are currently modeled as a Cortex-A57.
 def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA72]>;
 def : ProcessorModel<"cortex-a73", CortexA57Model, [ProcA73]>;
 def : ProcessorModel<"cortex-a75", CortexA57Model, [ProcA75]>;
+def : ProcessorModel<"cortex-a76", CortexA57Model, [ProcA76]>;
+def : ProcessorModel<"cortex-a76ae", CortexA57Model, [ProcA76]>;
 def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
 def : ProcessorModel<"exynos-m1", ExynosM1Model, [ProcExynosM1]>;
 def : ProcessorModel<"exynos-m2", ExynosM1Model, [ProcExynosM2]>;
 def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>;
 def : ProcessorModel<"exynos-m4", ExynosM4Model, [ProcExynosM4]>;
+def : ProcessorModel<"exynos-m5", ExynosM4Model, [ProcExynosM4]>;
 def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>;
 def : ProcessorModel<"saphira", FalkorModel, [ProcSaphira]>;
 def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>;
@@ -716,6 +764,9 @@ def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>;
 // FIXME: HiSilicon TSV110 is currently modeled as a Cortex-A57.
 def : ProcessorModel<"tsv110", CortexA57Model, [ProcTSV110]>;
 
+// Alias for the latest Apple processor model supported by LLVM.
+def : ProcessorModel<"apple-latest", CycloneModel, [ProcCyclone]>;
+
 //===----------------------------------------------------------------------===//
 // Assembly parser
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64A53Fix835769.cpp b/lib/Target/AArch64/AArch64A53Fix835769.cpp
index 30232afaf024..e80fe2cada09 100644
--- a/lib/Target/AArch64/AArch64A53Fix835769.cpp
+++ b/lib/Target/AArch64/AArch64A53Fix835769.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64A53Fix835769.cpp -------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This pass changes code to work around Cortex-A53 erratum 835769.
diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index 452fbd3488b0..92c8c4955d50 100644
--- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64A57FPLoadBalancing.cpp - Balance FP ops statically on A57---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // For best-case performance on Cortex-A57, we should try to use a balanced
diff --git a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index 22b0c1e3b471..89404463e1f0 100644
--- a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
+++ b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64AdvSIMDScalar.cpp - Replace dead defs w/ zero reg --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // When profitable, replace GPR targeting i64 instructions with their
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 0442076992e2..094fbd999523 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -1,9 +1,8 @@
 //===- AArch64AsmPrinter.cpp - AArch64 LLVM assembly writer ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,10 +17,12 @@
 #include "AArch64RegisterInfo.h"
 #include "AArch64Subtarget.h"
 #include "AArch64TargetObjectFile.h"
-#include "InstPrinter/AArch64InstPrinter.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
+#include "MCTargetDesc/AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
 #include "MCTargetDesc/AArch64MCTargetDesc.h"
 #include "MCTargetDesc/AArch64TargetStreamer.h"
+#include "TargetInfo/AArch64TargetInfo.h"
 #include "Utils/AArch64BaseInfo.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
@@ -29,6 +30,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -44,6 +46,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Casting.h"
@@ -96,6 +99,10 @@ public:
   void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
   void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
 
+  std::map<std::pair<unsigned, uint32_t>, MCSymbol *> HwasanMemaccessSymbols;
+  void LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI);
+  void EmitHwasanMemaccessSymbols(Module &M);
+
   void EmitSled(const MachineInstr &MI, SledKind Kind);
 
   /// tblgen'erated driver function for lowering simple MI->MC
@@ -147,11 +154,9 @@ private:
                           raw_ostream &O);
 
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
-                       unsigned AsmVariant, const char *ExtraCode,
-                       raw_ostream &O) override;
+                       const char *ExtraCode, raw_ostream &O) override;
   bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
-                             unsigned AsmVariant, const char *ExtraCode,
-                             raw_ostream &O) override;
+                             const char *ExtraCode, raw_ostream &O) override;
 
   void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
 
@@ -230,7 +235,204 @@ void AArch64AsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
   recordSled(CurSled, MI, Kind);
 }
 
+void AArch64AsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
+  unsigned Reg = MI.getOperand(0).getReg();
+  uint32_t AccessInfo = MI.getOperand(1).getImm();
+  MCSymbol *&Sym = HwasanMemaccessSymbols[{Reg, AccessInfo}];
+  if (!Sym) {
+    // FIXME: Make this work on non-ELF.
+    if (!TM.getTargetTriple().isOSBinFormatELF())
+      report_fatal_error("llvm.hwasan.check.memaccess only supported on ELF");
+
+    std::string SymName = "__hwasan_check_x" + utostr(Reg - AArch64::X0) + "_" +
+                          utostr(AccessInfo);
+    Sym = OutContext.getOrCreateSymbol(SymName);
+  }
+
+  EmitToStreamer(*OutStreamer,
+                 MCInstBuilder(AArch64::BL)
+                     .addExpr(MCSymbolRefExpr::create(Sym, OutContext)));
+}
+
+void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
+  if (HwasanMemaccessSymbols.empty())
+    return;
+
+  const Triple &TT = TM.getTargetTriple();
+  assert(TT.isOSBinFormatELF());
+  std::unique_ptr<MCSubtargetInfo> STI(
+      TM.getTarget().createMCSubtargetInfo(TT.str(), "", ""));
+
+  MCSymbol *HwasanTagMismatchSym =
+      OutContext.getOrCreateSymbol("__hwasan_tag_mismatch");
+
+  const MCSymbolRefExpr *HwasanTagMismatchRef =
+      MCSymbolRefExpr::create(HwasanTagMismatchSym, OutContext);
+
+  for (auto &P : HwasanMemaccessSymbols) {
+    unsigned Reg = P.first.first;
+    uint32_t AccessInfo = P.first.second;
+    MCSymbol *Sym = P.second;
+
+    OutStreamer->SwitchSection(OutContext.getELFSection(
+        ".text.hot", ELF::SHT_PROGBITS,
+        ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
+        Sym->getName()));
+
+    OutStreamer->EmitSymbolAttribute(Sym, MCSA_ELF_TypeFunction);
+    OutStreamer->EmitSymbolAttribute(Sym, MCSA_Weak);
+    OutStreamer->EmitSymbolAttribute(Sym, MCSA_Hidden);
+    OutStreamer->EmitLabel(Sym);
+
+    OutStreamer->EmitInstruction(MCInstBuilder(AArch64::UBFMXri)
+                                     .addReg(AArch64::X16)
+                                     .addReg(Reg)
+                                     .addImm(4)
+                                     .addImm(55),
+                                 *STI);
+    OutStreamer->EmitInstruction(MCInstBuilder(AArch64::LDRBBroX)
+                                     .addReg(AArch64::W16)
+                                     .addReg(AArch64::X9)
+                                     .addReg(AArch64::X16)
+                                     .addImm(0)
+                                     .addImm(0),
+                                 *STI);
+    OutStreamer->EmitInstruction(
+        MCInstBuilder(AArch64::SUBSXrs)
+            .addReg(AArch64::XZR)
+            .addReg(AArch64::X16)
+            .addReg(Reg)
+            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSR, 56)),
+        *STI);
+    MCSymbol *HandlePartialSym = OutContext.createTempSymbol();
+    OutStreamer->EmitInstruction(
+        MCInstBuilder(AArch64::Bcc)
+            .addImm(AArch64CC::NE)
+            .addExpr(MCSymbolRefExpr::create(HandlePartialSym, OutContext)),
+        *STI);
+    MCSymbol *ReturnSym = OutContext.createTempSymbol();
+    OutStreamer->EmitLabel(ReturnSym);
+    OutStreamer->EmitInstruction(
+        MCInstBuilder(AArch64::RET).addReg(AArch64::LR), *STI);
+
+    OutStreamer->EmitLabel(HandlePartialSym);
+    OutStreamer->EmitInstruction(MCInstBuilder(AArch64::SUBSWri)
+                                     .addReg(AArch64::WZR)
+                                     .addReg(AArch64::W16)
+                                     .addImm(15)
+                                     .addImm(0),
+                                 *STI);
+    MCSymbol *HandleMismatchSym = OutContext.createTempSymbol();
+    OutStreamer->EmitInstruction(
+        MCInstBuilder(AArch64::Bcc)
+            .addImm(AArch64CC::HI)
+            .addExpr(MCSymbolRefExpr::create(HandleMismatchSym, OutContext)),
+        *STI);
+
+    OutStreamer->EmitInstruction(
+        MCInstBuilder(AArch64::ANDXri)
+            .addReg(AArch64::X17)
+            .addReg(Reg)
+            .addImm(AArch64_AM::encodeLogicalImmediate(0xf, 64)),
+        *STI);
+    unsigned Size = 1 << (AccessInfo & 0xf);
+    if (Size != 1)
+      OutStreamer->EmitInstruction(MCInstBuilder(AArch64::ADDXri)
+                                       .addReg(AArch64::X17)
+                                       .addReg(AArch64::X17)
+                                       .addImm(Size - 1)
+                                       .addImm(0),
+                                   *STI);
+    OutStreamer->EmitInstruction(MCInstBuilder(AArch64::SUBSWrs)
+                                     .addReg(AArch64::WZR)
+                                     .addReg(AArch64::W16)
+                                     .addReg(AArch64::W17)
+                                     .addImm(0),
+                                 *STI);
+    OutStreamer->EmitInstruction(
+        MCInstBuilder(AArch64::Bcc)
+            .addImm(AArch64CC::LS)
+            .addExpr(MCSymbolRefExpr::create(HandleMismatchSym, OutContext)),
+        *STI);
+
+    OutStreamer->EmitInstruction(
+        MCInstBuilder(AArch64::ORRXri)
+            .addReg(AArch64::X16)
+            .addReg(Reg)
+            .addImm(AArch64_AM::encodeLogicalImmediate(0xf, 64)),
+        *STI);
+    OutStreamer->EmitInstruction(MCInstBuilder(AArch64::LDRBBui)
+                                     .addReg(AArch64::W16)
+                                     .addReg(AArch64::X16)
+                                     .addImm(0),
+                                 *STI);
+    OutStreamer->EmitInstruction(
+        MCInstBuilder(AArch64::SUBSXrs)
+            .addReg(AArch64::XZR)
+            .addReg(AArch64::X16)
+            .addReg(Reg)
+            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSR, 56)),
+        *STI);
+    OutStreamer->EmitInstruction(
+        MCInstBuilder(AArch64::Bcc)
+            .addImm(AArch64CC::EQ)
+            .addExpr(MCSymbolRefExpr::create(ReturnSym, OutContext)),
+        *STI);
+
+    OutStreamer->EmitLabel(HandleMismatchSym);
+    OutStreamer->EmitInstruction(MCInstBuilder(AArch64::STPXpre)
+                                     .addReg(AArch64::SP)
+                                     .addReg(AArch64::X0)
+                                     .addReg(AArch64::X1)
+                                     .addReg(AArch64::SP)
+                                     .addImm(-32),
+                                 *STI);
+    OutStreamer->EmitInstruction(MCInstBuilder(AArch64::STPXi)
+                                     .addReg(AArch64::FP)
+                                     .addReg(AArch64::LR)
+                                     .addReg(AArch64::SP)
+                                     .addImm(29),
+                                 *STI);
+
+    if (Reg != AArch64::X0)
+      OutStreamer->EmitInstruction(MCInstBuilder(AArch64::ORRXrs)
+                                       .addReg(AArch64::X0)
+                                       .addReg(AArch64::XZR)
+                                       .addReg(Reg)
+                                       .addImm(0),
+                                   *STI);
+    OutStreamer->EmitInstruction(MCInstBuilder(AArch64::MOVZXi)
+                                     .addReg(AArch64::X1)
+                                     .addImm(AccessInfo)
+                                     .addImm(0),
+                                 *STI);
+
+    // Intentionally load the GOT entry and branch to it, rather than possibly
+    // late binding the function, which may clobber the registers before we have
+    // a chance to save them.
+    OutStreamer->EmitInstruction(
+        MCInstBuilder(AArch64::ADRP)
+            .addReg(AArch64::X16)
+            .addExpr(AArch64MCExpr::create(
+                HwasanTagMismatchRef,
+                AArch64MCExpr::VariantKind::VK_GOT_PAGE, OutContext)),
+        *STI);
+    OutStreamer->EmitInstruction(
+        MCInstBuilder(AArch64::LDRXui)
+            .addReg(AArch64::X16)
+            .addReg(AArch64::X16)
+            .addExpr(AArch64MCExpr::create(
+                HwasanTagMismatchRef,
+                AArch64MCExpr::VariantKind::VK_GOT_LO12, OutContext)),
+        *STI);
+    OutStreamer->EmitInstruction(
+        MCInstBuilder(AArch64::BR).addReg(AArch64::X16), *STI);
+  }
+}
+
 void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
+  EmitHwasanMemaccessSymbols(M);
+
   const Triple &TT = TM.getTargetTriple();
   if (TT.isOSBinFormatMachO()) {
     // Funny Darwin hack: This flag tells the linker that no global symbols
@@ -295,14 +497,7 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
     break;
   }
   case MachineOperand::MO_GlobalAddress: {
-    const GlobalValue *GV = MO.getGlobal();
-    MCSymbol *Sym = getSymbol(GV);
-
-    // FIXME: Can we get anything other than a plain symbol here?
-    assert(!MO.getTargetFlags() && "Unknown operand target flag!");
-
-    Sym->print(O, MAI);
-    printOffset(MO.getOffset(), O);
+    PrintSymbolOperand(MO, O);
     break;
   }
   case MachineOperand::MO_BlockAddress: {
@@ -348,12 +543,11 @@ bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
 }
 
 bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
-                                        unsigned AsmVariant,
                                         const char *ExtraCode, raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(OpNum);
 
   // First try the generic code, which knows about modifiers like 'c' and 'n'.
-  if (!AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O))
+  if (!AsmPrinter::PrintAsmOperand(MI, OpNum, ExtraCode, O))
     return false;
 
   // Does this asm operand have a single letter operand modifier?
@@ -364,9 +558,6 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
     switch (ExtraCode[0]) {
     default:
       return true; // Unknown modifier.
-    case 'a':      // Print 'a' modifier
-      PrintAsmMemoryOperand(MI, OpNum, AsmVariant, ExtraCode, O);
-      return false;
     case 'w':      // Print W register
     case 'x':      // Print X register
       if (MO.isReg())
@@ -432,7 +623,6 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
 
 bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
                                               unsigned OpNum,
-                                              unsigned AsmVariant,
                                               const char *ExtraCode,
                                               raw_ostream &O) {
   if (ExtraCode && ExtraCode[0] && ExtraCode[0] != 'a')
@@ -471,9 +661,18 @@ void AArch64AsmPrinter::EmitJumpTableInfo() {
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   if (JT.empty()) return;
 
+  const Function &F = MF->getFunction();
   const TargetLoweringObjectFile &TLOF = getObjFileLowering();
-  MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(MF->getFunction(), TM);
-  OutStreamer->SwitchSection(ReadOnlySec);
+  bool JTInDiffSection =
+      !STI->isTargetCOFF() ||
+      !TLOF.shouldPutJumpTableInFunctionSection(
+          MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32,
+          F);
+  if (JTInDiffSection) {
+      // Drop it in the readonly section.
+      MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(F, TM);
+      OutStreamer->SwitchSection(ReadOnlySec);
+  }
 
   auto AFI = MF->getInfo<AArch64FunctionInfo>();
   for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
@@ -694,6 +893,34 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   switch (MI->getOpcode()) {
   default:
     break;
+    case AArch64::MOVMCSym: {
+    unsigned DestReg = MI->getOperand(0).getReg();
+    const MachineOperand &MO_Sym = MI->getOperand(1);
+    MachineOperand Hi_MOSym(MO_Sym), Lo_MOSym(MO_Sym);
+    MCOperand Hi_MCSym, Lo_MCSym;
+
+    Hi_MOSym.setTargetFlags(AArch64II::MO_G1 | AArch64II::MO_S);
+    Lo_MOSym.setTargetFlags(AArch64II::MO_G0 | AArch64II::MO_NC);
+
+    MCInstLowering.lowerOperand(Hi_MOSym, Hi_MCSym);
+    MCInstLowering.lowerOperand(Lo_MOSym, Lo_MCSym);
+
+    MCInst MovZ;
+    MovZ.setOpcode(AArch64::MOVZXi);
+    MovZ.addOperand(MCOperand::createReg(DestReg));
+    MovZ.addOperand(Hi_MCSym);
+    MovZ.addOperand(MCOperand::createImm(16));
+    EmitToStreamer(*OutStreamer, MovZ);
+
+    MCInst MovK;
+    MovK.setOpcode(AArch64::MOVKXi);
+    MovK.addOperand(MCOperand::createReg(DestReg));
+    MovK.addOperand(MCOperand::createReg(DestReg));
+    MovK.addOperand(Lo_MCSym);
+    MovK.addOperand(MCOperand::createImm(0));
+    EmitToStreamer(*OutStreamer, MovK);
+    return;
+  }
   case AArch64::MOVIv2d_ns:
     // If the target has <rdar://problem/16473581>, lower this
     // instruction to movi.16b instead.
@@ -856,6 +1083,10 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     LowerPATCHABLE_TAIL_CALL(*MI);
     return;
 
+  case AArch64::HWASAN_CHECK_MEMACCESS:
+    LowerHWASAN_CHECK_MEMACCESS(*MI);
+    return;
+
   case AArch64::SEH_StackAlloc:
     TS->EmitARM64WinCFIAllocStack(MI->getOperand(0).getImm());
     return;
diff --git a/lib/Target/AArch64/AArch64BranchTargets.cpp b/lib/Target/AArch64/AArch64BranchTargets.cpp
index da70a624c5be..6fa3a462bc71 100644
--- a/lib/Target/AArch64/AArch64BranchTargets.cpp
+++ b/lib/Target/AArch64/AArch64BranchTargets.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64BranchTargets.cpp -- Harden code using v8.5-A BTI extension -==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/AArch64CallLowering.cpp b/lib/Target/AArch64/AArch64CallLowering.cpp
index 5980e5684e89..59757769c89a 100644
--- a/lib/Target/AArch64/AArch64CallLowering.cpp
+++ b/lib/Target/AArch64/AArch64CallLowering.cpp
@@ -1,9 +1,8 @@
 //===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -45,6 +44,8 @@
 #include <cstdint>
 #include <iterator>
 
+#define DEBUG_TYPE "aarch64-call-lowering"
+
 using namespace llvm;
 
 AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
@@ -56,18 +57,18 @@ struct IncomingArgHandler : public CallLowering::ValueHandler {
                      CCAssignFn *AssignFn)
       : ValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {}
 
-  unsigned getStackAddress(uint64_t Size, int64_t Offset,
+  Register getStackAddress(uint64_t Size, int64_t Offset,
                            MachinePointerInfo &MPO) override {
     auto &MFI = MIRBuilder.getMF().getFrameInfo();
     int FI = MFI.CreateFixedObject(Size, Offset, true);
     MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
-    unsigned AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 64));
+    Register AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 64));
     MIRBuilder.buildFrameIndex(AddrReg, FI);
     StackUsed = std::max(StackUsed, Size + Offset);
     return AddrReg;
   }
 
-  void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+  void assignValueToReg(Register ValVReg, Register PhysReg,
                         CCValAssign &VA) override {
     markPhysRegUsed(PhysReg);
     switch (VA.getLocInfo()) {
@@ -84,11 +85,12 @@ struct IncomingArgHandler : public CallLowering::ValueHandler {
     }
   }
 
-  void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+  void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
                             MachinePointerInfo &MPO, CCValAssign &VA) override {
+    // FIXME: Get alignment
     auto MMO = MIRBuilder.getMF().getMachineMemOperand(
         MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size,
-        0);
+        1);
     MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
   }
 
@@ -97,6 +99,8 @@ struct IncomingArgHandler : public CallLowering::ValueHandler {
   /// (it's an implicit-def of the BL).
   virtual void markPhysRegUsed(unsigned PhysReg) = 0;
 
+  bool isArgumentHandler() const override { return true; }
+
   uint64_t StackUsed;
 };
 
@@ -129,31 +133,31 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
       : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
         AssignFnVarArg(AssignFnVarArg), StackSize(0) {}
 
-  unsigned getStackAddress(uint64_t Size, int64_t Offset,
+  Register getStackAddress(uint64_t Size, int64_t Offset,
                            MachinePointerInfo &MPO) override {
     LLT p0 = LLT::pointer(0, 64);
     LLT s64 = LLT::scalar(64);
-    unsigned SPReg = MRI.createGenericVirtualRegister(p0);
-    MIRBuilder.buildCopy(SPReg, AArch64::SP);
+    Register SPReg = MRI.createGenericVirtualRegister(p0);
+    MIRBuilder.buildCopy(SPReg, Register(AArch64::SP));
 
-    unsigned OffsetReg = MRI.createGenericVirtualRegister(s64);
+    Register OffsetReg = MRI.createGenericVirtualRegister(s64);
     MIRBuilder.buildConstant(OffsetReg, Offset);
 
-    unsigned AddrReg = MRI.createGenericVirtualRegister(p0);
+    Register AddrReg = MRI.createGenericVirtualRegister(p0);
     MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
 
     MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
     return AddrReg;
   }
 
-  void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+  void assignValueToReg(Register ValVReg, Register PhysReg,
                         CCValAssign &VA) override {
     MIB.addUse(PhysReg, RegState::Implicit);
-    unsigned ExtReg = extendRegister(ValVReg, VA);
+    Register ExtReg = extendRegister(ValVReg, VA);
     MIRBuilder.buildCopy(PhysReg, ExtReg);
   }
 
-  void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+  void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
                             MachinePointerInfo &MPO, CCValAssign &VA) override {
     if (VA.getLocInfo() == CCValAssign::LocInfo::AExt) {
       Size = VA.getLocVT().getSizeInBits() / 8;
@@ -162,7 +166,7 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
                     .getReg();
     }
     auto MMO = MIRBuilder.getMF().getMachineMemOperand(
-        MPO, MachineMemOperand::MOStore, Size, 0);
+        MPO, MachineMemOperand::MOStore, Size, 1);
     MIRBuilder.buildStore(ValVReg, Addr, *MMO);
   }
 
@@ -188,8 +192,7 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
 
 void AArch64CallLowering::splitToValueTypes(
     const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
-    const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv,
-    const SplitArgTy &PerformArgSplit) const {
+    const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv) const {
   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
   LLVMContext &Ctx = OrigArg.Ty->getContext();
 
@@ -203,32 +206,31 @@ void AArch64CallLowering::splitToValueTypes(
   if (SplitVTs.size() == 1) {
     // No splitting to do, but we want to replace the original type (e.g. [1 x
     // double] -> double).
-    SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx),
+    SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
                            OrigArg.Flags, OrigArg.IsFixed);
     return;
   }
 
-  unsigned FirstRegIdx = SplitArgs.size();
+  // Create one ArgInfo for each virtual register in the original ArgInfo.
+  assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
+
   bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
       OrigArg.Ty, CallConv, false);
-  for (auto SplitVT : SplitVTs) {
-    Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
-    SplitArgs.push_back(
-        ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)),
-                SplitTy, OrigArg.Flags, OrigArg.IsFixed});
+  for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) {
+    Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx);
+    SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags,
+                           OrigArg.IsFixed);
     if (NeedsRegBlock)
       SplitArgs.back().Flags.setInConsecutiveRegs();
   }
 
   SplitArgs.back().Flags.setInConsecutiveRegsLast();
-
-  for (unsigned i = 0; i < Offsets.size(); ++i)
-    PerformArgSplit(SplitArgs[FirstRegIdx + i].Reg, Offsets[i] * 8);
 }
 
 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
                                       const Value *Val,
-                                      ArrayRef<unsigned> VRegs) const {
+                                      ArrayRef<Register> VRegs,
+                                      Register SwiftErrorVReg) const {
   auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
   assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
          "Return value without a vreg");
@@ -250,34 +252,101 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
            "For each split Type there should be exactly one VReg.");
 
     SmallVector<ArgInfo, 8> SplitArgs;
+    CallingConv::ID CC = F.getCallingConv();
+
     for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
-      // We zero-extend i1s to i8.
-      unsigned CurVReg = VRegs[i];
-      if (MRI.getType(VRegs[i]).getSizeInBits() == 1) {
-        CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg)
-                       ->getOperand(0)
-                       .getReg();
+      if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) > 1) {
+        LLVM_DEBUG(dbgs() << "Can't handle extended arg types which need split");
+        return false;
       }
 
+      Register CurVReg = VRegs[i];
       ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx)};
       setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
-      splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI, F.getCallingConv(),
-                        [&](unsigned Reg, uint64_t Offset) {
-                          MIRBuilder.buildExtract(Reg, CurVReg, Offset);
-                        });
+
+      // i1 is a special case because SDAG i1 true is naturally zero extended
+      // when widened using ANYEXT. We need to do it explicitly here.
+      if (MRI.getType(CurVReg).getSizeInBits() == 1) {
+        CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
+      } else {
+        // Some types will need extending as specified by the CC.
+        MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]);
+        if (EVT(NewVT) != SplitEVTs[i]) {
+          unsigned ExtendOp = TargetOpcode::G_ANYEXT;
+          if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
+                                             Attribute::SExt))
+            ExtendOp = TargetOpcode::G_SEXT;
+          else if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
+                                                  Attribute::ZExt))
+            ExtendOp = TargetOpcode::G_ZEXT;
+
+          LLT NewLLT(NewVT);
+          LLT OldLLT(MVT::getVT(CurArgInfo.Ty));
+          CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx);
+          // Instead of an extend, we might have a vector type which needs
+          // padding with more elements, e.g. <2 x half> -> <4 x half>.
+          if (NewVT.isVector()) {
+            if (OldLLT.isVector()) {
+              if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
+                // We don't handle VA types which are not exactly twice the
+                // size, but can easily be done in future.
+                if (NewLLT.getNumElements() != OldLLT.getNumElements() * 2) {
+                  LLVM_DEBUG(dbgs() << "Outgoing vector ret has too many elts");
+                  return false;
+                }
+                auto Undef = MIRBuilder.buildUndef({OldLLT});
+                CurVReg =
+                    MIRBuilder.buildMerge({NewLLT}, {CurVReg, Undef.getReg(0)})
+                        .getReg(0);
+              } else {
+                // Just do a vector extend.
+                CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
+                              .getReg(0);
+              }
+            } else if (NewLLT.getNumElements() == 2) {
+              // We need to pad a <1 x S> type to <2 x S>. Since we don't have
+              // <1 x S> vector types in GISel we use a build_vector instead
+              // of a vector merge/concat.
+              auto Undef = MIRBuilder.buildUndef({OldLLT});
+              CurVReg =
+                  MIRBuilder
+                      .buildBuildVector({NewLLT}, {CurVReg, Undef.getReg(0)})
+                      .getReg(0);
+            } else {
+              LLVM_DEBUG(dbgs() << "Could not handle ret ty");
+              return false;
+            }
+          } else {
+            // A scalar extend.
+            CurVReg =
+                MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}).getReg(0);
+          }
+        }
+      }
+      if (CurVReg != CurArgInfo.Regs[0]) {
+        CurArgInfo.Regs[0] = CurVReg;
+        // Reset the arg flags after modifying CurVReg.
+        setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
+      }
+     splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI, CC);
     }
 
     OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFn, AssignFn);
     Success = handleAssignments(MIRBuilder, SplitArgs, Handler);
   }
 
+  if (SwiftErrorVReg) {
+    MIB.addUse(AArch64::X21, RegState::Implicit);
+    MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg);
+  }
+
   MIRBuilder.insertInstr(MIB);
   return Success;
 }
 
-bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
-                                               const Function &F,
-                                               ArrayRef<unsigned> VRegs) const {
+bool AArch64CallLowering::lowerFormalArguments(
+    MachineIRBuilder &MIRBuilder, const Function &F,
+    ArrayRef<ArrayRef<Register>> VRegs) const {
   MachineFunction &MF = MIRBuilder.getMF();
   MachineBasicBlock &MBB = MIRBuilder.getMBB();
   MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -288,26 +357,11 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
   for (auto &Arg : F.args()) {
     if (DL.getTypeStoreSize(Arg.getType()) == 0)
       continue;
+
     ArgInfo OrigArg{VRegs[i], Arg.getType()};
     setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
-    bool Split = false;
-    LLT Ty = MRI.getType(VRegs[i]);
-    unsigned Dst = VRegs[i];
-
-    splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv(),
-                      [&](unsigned Reg, uint64_t Offset) {
-                        if (!Split) {
-                          Split = true;
-                          Dst = MRI.createGenericVirtualRegister(Ty);
-                          MIRBuilder.buildUndef(Dst);
-                        }
-                        unsigned Tmp = MRI.createGenericVirtualRegister(Ty);
-                        MIRBuilder.buildInsert(Tmp, Dst, Reg, Offset);
-                        Dst = Tmp;
-                      });
-
-    if (Dst != VRegs[i])
-      MIRBuilder.buildCopy(VRegs[i], Dst);
+
+    splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv());
     ++i;
   }
 
@@ -351,7 +405,8 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
                                     CallingConv::ID CallConv,
                                     const MachineOperand &Callee,
                                     const ArgInfo &OrigRet,
-                                    ArrayRef<ArgInfo> OrigArgs) const {
+                                    ArrayRef<ArgInfo> OrigArgs,
+                                    Register SwiftErrorVReg) const {
   MachineFunction &MF = MIRBuilder.getMF();
   const Function &F = MF.getFunction();
   MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -359,10 +414,10 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
 
   SmallVector<ArgInfo, 8> SplitArgs;
   for (auto &OrigArg : OrigArgs) {
-    splitToValueTypes(OrigArg, SplitArgs, DL, MRI, CallConv,
-                      [&](unsigned Reg, uint64_t Offset) {
-                        MIRBuilder.buildExtract(Reg, OrigArg.Reg, Offset);
-                      });
+    splitToValueTypes(OrigArg, SplitArgs, DL, MRI, CallConv);
+    // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
+    if (OrigArg.Ty->isIntegerTy(1))
+      SplitArgs.back().Flags.setZExt();
   }
 
   // Find out which ABI gets to decide where things go.
@@ -412,23 +467,19 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   // symmetry with the arugments, the physical register must be an
   // implicit-define of the call instruction.
   CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
-  if (OrigRet.Reg) {
+  if (!OrigRet.Ty->isVoidTy()) {
     SplitArgs.clear();
 
-    SmallVector<uint64_t, 8> RegOffsets;
-    SmallVector<unsigned, 8> SplitRegs;
-    splitToValueTypes(OrigRet, SplitArgs, DL, MRI, F.getCallingConv(),
-                      [&](unsigned Reg, uint64_t Offset) {
-                        RegOffsets.push_back(Offset);
-                        SplitRegs.push_back(Reg);
-                      });
+    splitToValueTypes(OrigRet, SplitArgs, DL, MRI, F.getCallingConv());
 
     CallReturnHandler Handler(MIRBuilder, MRI, MIB, RetAssignFn);
     if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
       return false;
+  }
 
-    if (!RegOffsets.empty())
-      MIRBuilder.buildSequence(OrigRet.Reg, SplitRegs, RegOffsets);
+  if (SwiftErrorVReg) {
+    MIB.addDef(AArch64::X21, RegState::Implicit);
+    MIRBuilder.buildCopy(SwiftErrorVReg, Register(AArch64::X21));
   }
 
   CallSeqStart.addImm(Handler.StackSize).addImm(0);
diff --git a/lib/Target/AArch64/AArch64CallLowering.h b/lib/Target/AArch64/AArch64CallLowering.h
index 1c2bd6a4de5d..4f428f254537 100644
--- a/lib/Target/AArch64/AArch64CallLowering.h
+++ b/lib/Target/AArch64/AArch64CallLowering.h
@@ -1,9 +1,8 @@
 //===- AArch64CallLowering.h - Call lowering --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -35,14 +34,24 @@ public:
   AArch64CallLowering(const AArch64TargetLowering &TLI);
 
   bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
-                   ArrayRef<unsigned> VRegs) const override;
+                   ArrayRef<Register> VRegs,
+                   Register SwiftErrorVReg) const override;
 
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
-                            ArrayRef<unsigned> VRegs) const override;
+                            ArrayRef<ArrayRef<Register>> VRegs) const override;
+
+  bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
+                 const MachineOperand &Callee, const ArgInfo &OrigRet,
+                 ArrayRef<ArgInfo> OrigArgs,
+                 Register SwiftErrorVReg) const override;
 
   bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
                  const MachineOperand &Callee, const ArgInfo &OrigRet,
-                 ArrayRef<ArgInfo> OrigArgs) const override;
+                 ArrayRef<ArgInfo> OrigArgs) const override {
+    return lowerCall(MIRBuilder, CallConv, Callee, OrigRet, OrigArgs, 0);
+  }
+
+  bool supportSwiftError() const override { return true; }
 
 private:
   using RegHandler = std::function<void(MachineIRBuilder &, Type *, unsigned,
@@ -51,13 +60,10 @@ private:
   using MemHandler =
       std::function<void(MachineIRBuilder &, int, CCValAssign &)>;
 
-  using SplitArgTy = std::function<void(unsigned, uint64_t)>;
-
   void splitToValueTypes(const ArgInfo &OrigArgInfo,
                          SmallVectorImpl<ArgInfo> &SplitArgs,
                          const DataLayout &DL, MachineRegisterInfo &MRI,
-                         CallingConv::ID CallConv,
-                         const SplitArgTy &SplitArg) const;
+                         CallingConv::ID CallConv) const;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64CallingConvention.cpp b/lib/Target/AArch64/AArch64CallingConvention.cpp
new file mode 100644
index 000000000000..02538a187611
--- /dev/null
+++ b/lib/Target/AArch64/AArch64CallingConvention.cpp
@@ -0,0 +1,134 @@
+//=== AArch64CallingConvention.cpp - AArch64 CC impl ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the table-generated and custom routines for the AArch64
+// Calling Convention.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64CallingConvention.h"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/CallingConv.h"
+using namespace llvm;
+
+static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
+                                     AArch64::X3, AArch64::X4, AArch64::X5,
+                                     AArch64::X6, AArch64::X7};
+static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,
+                                     AArch64::H3, AArch64::H4, AArch64::H5,
+                                     AArch64::H6, AArch64::H7};
+static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
+                                     AArch64::S3, AArch64::S4, AArch64::S5,
+                                     AArch64::S6, AArch64::S7};
+static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
+                                     AArch64::D3, AArch64::D4, AArch64::D5,
+                                     AArch64::D6, AArch64::D7};
+static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
+                                     AArch64::Q3, AArch64::Q4, AArch64::Q5,
+                                     AArch64::Q6, AArch64::Q7};
+
+static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
+                             MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
+                             CCState &State, unsigned SlotAlign) {
+  unsigned Size = LocVT.getSizeInBits() / 8;
+  unsigned StackAlign =
+      State.getMachineFunction().getDataLayout().getStackAlignment();
+  unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign);
+
+  for (auto &It : PendingMembers) {
+    It.convertToMem(State.AllocateStack(Size, std::max(Align, SlotAlign)));
+    State.addLoc(It);
+    SlotAlign = 1;
+  }
+
+  // All pending members have now been allocated
+  PendingMembers.clear();
+  return true;
+}
+
+/// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An
+/// [N x Ty] type must still be contiguous in memory though.
+static bool CC_AArch64_Custom_Stack_Block(
+      unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+      ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+  // Add the argument to the list to be allocated once we know the size of the
+  // block.
+  PendingMembers.push_back(
+      CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+
+  if (!ArgFlags.isInConsecutiveRegsLast())
+    return true;
+
+  return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, 8);
+}
+
+/// Given an [N x Ty] block, it should be passed in a consecutive sequence of
+/// registers. If no such sequence is available, mark the rest of the registers
+/// of that type as used and place the argument on the stack.
+static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                    CCValAssign::LocInfo &LocInfo,
+                                    ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  // Try to allocate a contiguous block of registers, each of the correct
+  // size to hold one member.
+  ArrayRef<MCPhysReg> RegList;
+  if (LocVT.SimpleTy == MVT::i64)
+    RegList = XRegList;
+  else if (LocVT.SimpleTy == MVT::f16)
+    RegList = HRegList;
+  else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector())
+    RegList = SRegList;
+  else if (LocVT.SimpleTy == MVT::f64 || LocVT.is64BitVector())
+    RegList = DRegList;
+  else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector())
+    RegList = QRegList;
+  else {
+    // Not an array we want to split up after all.
+    return false;
+  }
+
+  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+  // Add the argument to the list to be allocated once we know the size of the
+  // block.
+  PendingMembers.push_back(
+      CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+
+  if (!ArgFlags.isInConsecutiveRegsLast())
+    return true;
+
+  unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
+  if (RegResult) {
+    for (auto &It : PendingMembers) {
+      It.convertToReg(RegResult);
+      State.addLoc(It);
+      ++RegResult;
+    }
+    PendingMembers.clear();
+    return true;
+  }
+
+  // Mark all regs in the class as unavailable
+  for (auto Reg : RegList)
+    State.AllocateReg(Reg);
+
+  const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
+      State.getMachineFunction().getSubtarget());
+  unsigned SlotAlign = Subtarget.isTargetDarwin() ? 1 : 8;
+
+  return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
+}
+
+// TableGen provides definitions of the calling convention analysis entry
+// points.
+#include "AArch64GenCallingConv.inc"
diff --git a/lib/Target/AArch64/AArch64CallingConvention.h b/lib/Target/AArch64/AArch64CallingConvention.h
index 461c01318d4e..13cc0c583fd2 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.h
+++ b/lib/Target/AArch64/AArch64CallingConvention.h
@@ -1,139 +1,45 @@
-//=== AArch64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===//
+//=== AArch64CallingConvention.h - AArch64 CC entry points ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the custom routines for the AArch64 Calling Convention
-// that aren't done by tablegen.
+// This file declares the entry points for AArch64 calling convention analysis.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64CALLINGCONVENTION_H
 #define LLVM_LIB_TARGET_AARCH64_AARCH64CALLINGCONVENTION_H
 
-#include "AArch64.h"
-#include "AArch64InstrInfo.h"
-#include "AArch64Subtarget.h"
 #include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/IR/CallingConv.h"
-
-namespace {
-using namespace llvm;
-
-static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
-                                     AArch64::X3, AArch64::X4, AArch64::X5,
-                                     AArch64::X6, AArch64::X7};
-static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,
-                                     AArch64::H3, AArch64::H4, AArch64::H5,
-                                     AArch64::H6, AArch64::H7};
-static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
-                                     AArch64::S3, AArch64::S4, AArch64::S5,
-                                     AArch64::S6, AArch64::S7};
-static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
-                                     AArch64::D3, AArch64::D4, AArch64::D5,
-                                     AArch64::D6, AArch64::D7};
-static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
-                                     AArch64::Q3, AArch64::Q4, AArch64::Q5,
-                                     AArch64::Q6, AArch64::Q7};
-
-static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
-                             MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
-                             CCState &State, unsigned SlotAlign) {
-  unsigned Size = LocVT.getSizeInBits() / 8;
-  unsigned StackAlign =
-      State.getMachineFunction().getDataLayout().getStackAlignment();
-  unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign);
-
-  for (auto &It : PendingMembers) {
-    It.convertToMem(State.AllocateStack(Size, std::max(Align, SlotAlign)));
-    State.addLoc(It);
-    SlotAlign = 1;
-  }
-
-  // All pending members have now been allocated
-  PendingMembers.clear();
-  return true;
-}
-
-/// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An
-/// [N x Ty] type must still be contiguous in memory though.
-static bool CC_AArch64_Custom_Stack_Block(
-      unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
-      ISD::ArgFlagsTy &ArgFlags, CCState &State) {
-  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
-
-  // Add the argument to the list to be allocated once we know the size of the
-  // block.
-  PendingMembers.push_back(
-      CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
-
-  if (!ArgFlags.isInConsecutiveRegsLast())
-    return true;
-
-  return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, 8);
-}
-
-/// Given an [N x Ty] block, it should be passed in a consecutive sequence of
-/// registers. If no such sequence is available, mark the rest of the registers
-/// of that type as used and place the argument on the stack.
-static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                    CCValAssign::LocInfo &LocInfo,
-                                    ISD::ArgFlagsTy &ArgFlags, CCState &State) {
-  // Try to allocate a contiguous block of registers, each of the correct
-  // size to hold one member.
-  ArrayRef<MCPhysReg> RegList;
-  if (LocVT.SimpleTy == MVT::i64)
-    RegList = XRegList;
-  else if (LocVT.SimpleTy == MVT::f16)
-    RegList = HRegList;
-  else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector())
-    RegList = SRegList;
-  else if (LocVT.SimpleTy == MVT::f64 || LocVT.is64BitVector())
-    RegList = DRegList;
-  else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector())
-    RegList = QRegList;
-  else {
-    // Not an array we want to split up after all.
-    return false;
-  }
-
-  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
-
-  // Add the argument to the list to be allocated once we know the size of the
-  // block.
-  PendingMembers.push_back(
-      CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
-
-  if (!ArgFlags.isInConsecutiveRegsLast())
-    return true;
-
-  unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
-  if (RegResult) {
-    for (auto &It : PendingMembers) {
-      It.convertToReg(RegResult);
-      State.addLoc(It);
-      ++RegResult;
-    }
-    PendingMembers.clear();
-    return true;
-  }
-
-  // Mark all regs in the class as unavailable
-  for (auto Reg : RegList)
-    State.AllocateReg(Reg);
-
-  const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
-      State.getMachineFunction().getSubtarget());
-  unsigned SlotAlign = Subtarget.isTargetDarwin() ? 1 : 8;
-
-  return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
-}
 
-}
+namespace llvm {
+bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+                      CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                      CCState &State);
+bool CC_AArch64_DarwinPCS_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+                                 CCValAssign::LocInfo LocInfo,
+                                 ISD::ArgFlagsTy ArgFlags, CCState &State);
+bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+                          CCValAssign::LocInfo LocInfo,
+                          ISD::ArgFlagsTy ArgFlags, CCState &State);
+bool CC_AArch64_Win64_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+                             CCValAssign::LocInfo LocInfo,
+                             ISD::ArgFlagsTy ArgFlags, CCState &State);
+bool CC_AArch64_WebKit_JS(unsigned ValNo, MVT ValVT, MVT LocVT,
+                          CCValAssign::LocInfo LocInfo,
+                          ISD::ArgFlagsTy ArgFlags, CCState &State);
+bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
+                    CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                    CCState &State);
+bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+                         CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                         CCState &State);
+bool RetCC_AArch64_WebKit_JS(unsigned ValNo, MVT ValVT, MVT LocVT,
+                             CCValAssign::LocInfo LocInfo,
+                             ISD::ArgFlagsTy ArgFlags, CCState &State);
+} // namespace llvm
 
 #endif
diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td
index 5db941e9dac7..d969a9e1ab3a 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/lib/Target/AArch64/AArch64CallingConvention.td
@@ -1,9 +1,8 @@
 //=- AArch64CallingConv.td - Calling Conventions for AArch64 -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -22,6 +21,7 @@ class CCIfBigEndian<CCAction A> :
 // ARM AAPCS64 Calling Convention
 //===----------------------------------------------------------------------===//
 
+let Entry = 1 in
 def CC_AArch64_AAPCS : CallingConv<[
   CCIfType<[iPTR], CCBitConvertToType<i64>>,
   CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
@@ -34,7 +34,23 @@ def CC_AArch64_AAPCS : CallingConv<[
   CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8],
                          CCBitConvertToType<f128>>>,
 
-  // An SRet is passed in X8, not X0 like a normal pointer parameter.
+  // In AAPCS, an SRet is passed in X8, not X0 like a normal pointer parameter.
+  // However, on windows, in some circumstances, the SRet is passed in X0 or X1
+  // instead.  The presence of the inreg attribute indicates that SRet is
+  // passed in the alternative register (X0 or X1), not X8:
+  // - X0 for non-instance methods.
+  // - X1 for instance methods.
+
+  // The "sret" attribute identifies indirect returns.
+  // The "inreg" attribute identifies non-aggregate types.
+  // The position of the "sret" attribute identifies instance/non-instance
+  // methods.
+  // "sret" on argument 0 means non-instance methods.
+  // "sret" on argument 1 means instance methods.
+
+  CCIfInReg<CCIfType<[i64],
+    CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1], [W0, W1]>>>>>,
+
   CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
 
   // Put ByVal arguments directly on the stack. Minimum size and alignment of a
@@ -89,6 +105,7 @@ def CC_AArch64_AAPCS : CallingConv<[
            CCAssignToStack<16, 16>>
 ]>;
 
+let Entry = 1 in
 def RetCC_AArch64_AAPCS : CallingConv<[
   CCIfType<[iPTR], CCBitConvertToType<i64>>,
   CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
@@ -122,6 +139,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
 ]>;
 
 // Vararg functions on windows pass floats in integer registers
+let Entry = 1 in
 def CC_AArch64_Win64_VarArg : CallingConv<[
   CCIfType<[f16, f32], CCPromoteToType<f64>>,
   CCIfType<[f64], CCBitConvertToType<i64>>,
@@ -133,6 +151,7 @@ def CC_AArch64_Win64_VarArg : CallingConv<[
 // from the standard one at this level:
 //     + i128s (i.e. split i64s) don't need even registers.
 //     + Stack slots are sized as needed rather than being at least 64-bit.
+let Entry = 1 in
 def CC_AArch64_DarwinPCS : CallingConv<[
   CCIfType<[iPTR], CCBitConvertToType<i64>>,
   CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
@@ -189,6 +208,7 @@ def CC_AArch64_DarwinPCS : CallingConv<[
            CCAssignToStack<16, 16>>
 ]>;
 
+let Entry = 1 in
 def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
   CCIfType<[iPTR], CCBitConvertToType<i64>>,
   CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
@@ -213,6 +233,7 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
 // in register and the remaining arguments on stack. We allow 32bit stack slots,
 // so that WebKit can write partial values in the stack and define the other
 // 32bit quantity as undef.
+let Entry = 1 in
 def CC_AArch64_WebKit_JS : CallingConv<[
   // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0).
   CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
@@ -224,6 +245,7 @@ def CC_AArch64_WebKit_JS : CallingConv<[
   CCIfType<[i64, f64], CCAssignToStack<8, 8>>
 ]>;
 
+let Entry = 1 in
 def RetCC_AArch64_WebKit_JS : CallingConv<[
   CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
                                           [X0, X1, X2, X3, X4, X5, X6, X7]>>,
@@ -257,6 +279,7 @@ def RetCC_AArch64_WebKit_JS : CallingConv<[
 // The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI
 // register mapping".
 
+let Entry = 1 in
 def CC_AArch64_GHC : CallingConv<[
   CCIfType<[iPTR], CCBitConvertToType<i64>>,
 
diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index b88fba4452a1..688bd1b28e85 100644
--- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64CleanupLocalDynamicTLSPass.cpp ---------------------*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp
index 720323f81d29..9f324b433209 100644
--- a/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -1,9 +1,8 @@
 //===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/AArch64CompressJumpTables.cpp b/lib/Target/AArch64/AArch64CompressJumpTables.cpp
index 0924a27e2586..48dab79b32d3 100644
--- a/lib/Target/AArch64/AArch64CompressJumpTables.cpp
+++ b/lib/Target/AArch64/AArch64CompressJumpTables.cpp
@@ -1,9 +1,8 @@
 //==-- AArch64CompressJumpTables.cpp - Compress jump tables for AArch64 --====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 // This pass looks at the basic blocks each jump-table refers to and works out
 // whether they can be emitted in a compressed form (with 8 or 16-bit
@@ -108,6 +107,7 @@ bool AArch64CompressJumpTables::compressJumpTable(MachineInstr &MI,
       MinBlock = Block;
     }
   }
+  assert(MinBlock && "Failed to find minimum offset block");
 
   // The ADR instruction needed to calculate the address of the first reachable
   // basic block can address +/-1MB.
@@ -141,7 +141,7 @@ bool AArch64CompressJumpTables::runOnMachineFunction(MachineFunction &MFIn) {
   const auto &ST = MF->getSubtarget<AArch64Subtarget>();
   TII = ST.getInstrInfo();
 
-  if (ST.force32BitJumpTables() && !MF->getFunction().optForMinSize())
+  if (ST.force32BitJumpTables() && !MF->getFunction().hasMinSize())
     return false;
 
   scanFunction();
diff --git a/lib/Target/AArch64/AArch64CondBrTuning.cpp b/lib/Target/AArch64/AArch64CondBrTuning.cpp
index 5ae787409ae8..453132e09669 100644
--- a/lib/Target/AArch64/AArch64CondBrTuning.cpp
+++ b/lib/Target/AArch64/AArch64CondBrTuning.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64CondBrTuning.cpp --- Conditional branch tuning for AArch64 -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
index 5064762b9f77..a6efb115ed44 100644
--- a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
@@ -1,9 +1,8 @@
 //=- AArch64ConditionOptimizer.cpp - Remove useless comparisons for AArch64 -=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 8176b6fb269d..2cfbcc592d6a 100644
--- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -941,7 +940,7 @@ bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
   Traces = &getAnalysis<MachineTraceMetrics>();
   MinInstr = nullptr;
-  MinSize = MF.getFunction().optForMinSize();
+  MinSize = MF.getFunction().hasMinSize();
 
   bool Changed = false;
   CmpConv.runOnMachineFunction(MF, MBPI);
diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index 2ba10d25e939..a43077cb88ec 100644
--- a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
+++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -1,9 +1,8 @@
 //==-- AArch64DeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg --==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file When allowed by the instruction, replace a dead definition of a GPR
@@ -55,8 +54,6 @@ public:
     AU.setPreservesCFG();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
-
-  bool shouldSkip(const MachineInstr &MI, const MachineFunction &MF) const;
 };
 char AArch64DeadRegisterDefinitions::ID = 0;
 } // end anonymous namespace
@@ -71,60 +68,48 @@ static bool usesFrameIndex(const MachineInstr &MI) {
   return false;
 }
 
-bool
-AArch64DeadRegisterDefinitions::shouldSkip(const MachineInstr &MI,
-                                           const MachineFunction &MF) const {
-  if (!MF.getSubtarget<AArch64Subtarget>().hasLSE())
-    return false;
-
-#define CASE_AARCH64_ATOMIC_(PREFIX) \
-  case AArch64::PREFIX##X: \
-  case AArch64::PREFIX##W: \
-  case AArch64::PREFIX##H: \
-  case AArch64::PREFIX##B
-
-  for (const MachineMemOperand *MMO : MI.memoperands()) {
-    if (MMO->isAtomic()) {
-      unsigned Opcode = MI.getOpcode();
-      switch (Opcode) {
-      default:
-        return false;
-        break;
-
-      CASE_AARCH64_ATOMIC_(LDADDA):
-      CASE_AARCH64_ATOMIC_(LDADDAL):
-
-      CASE_AARCH64_ATOMIC_(LDCLRA):
-      CASE_AARCH64_ATOMIC_(LDCLRAL):
-
-      CASE_AARCH64_ATOMIC_(LDEORA):
-      CASE_AARCH64_ATOMIC_(LDEORAL):
-
-      CASE_AARCH64_ATOMIC_(LDSETA):
-      CASE_AARCH64_ATOMIC_(LDSETAL):
-
-      CASE_AARCH64_ATOMIC_(LDSMAXA):
-      CASE_AARCH64_ATOMIC_(LDSMAXAL):
-
-      CASE_AARCH64_ATOMIC_(LDSMINA):
-      CASE_AARCH64_ATOMIC_(LDSMINAL):
-
-      CASE_AARCH64_ATOMIC_(LDUMAXA):
-      CASE_AARCH64_ATOMIC_(LDUMAXAL):
-
-      CASE_AARCH64_ATOMIC_(LDUMINA):
-      CASE_AARCH64_ATOMIC_(LDUMINAL):
-
-      CASE_AARCH64_ATOMIC_(SWPA):
-      CASE_AARCH64_ATOMIC_(SWPAL):
-        return true;
-        break;
-                                                                    }
-    }
+// Instructions that lose their 'read' operation for a subesquent fence acquire
+// (DMB LD) once the zero register is used.
+//
+// WARNING: The aquire variants of the instructions are also affected, but they
+// are split out into `atomicBarrierDroppedOnZero()` to support annotations on
+// assembly.
+static bool atomicReadDroppedOnZero(unsigned Opcode) {
+  switch (Opcode) {
+    case AArch64::LDADDB:     case AArch64::LDADDH:
+    case AArch64::LDADDW:     case AArch64::LDADDX:
+    case AArch64::LDADDLB:    case AArch64::LDADDLH:
+    case AArch64::LDADDLW:    case AArch64::LDADDLX:
+    case AArch64::LDCLRB:     case AArch64::LDCLRH:
+    case AArch64::LDCLRW:     case AArch64::LDCLRX:
+    case AArch64::LDCLRLB:    case AArch64::LDCLRLH:
+    case AArch64::LDCLRLW:    case AArch64::LDCLRLX:
+    case AArch64::LDEORB:     case AArch64::LDEORH:
+    case AArch64::LDEORW:     case AArch64::LDEORX:
+    case AArch64::LDEORLB:    case AArch64::LDEORLH:
+    case AArch64::LDEORLW:    case AArch64::LDEORLX:
+    case AArch64::LDSETB:     case AArch64::LDSETH:
+    case AArch64::LDSETW:     case AArch64::LDSETX:
+    case AArch64::LDSETLB:    case AArch64::LDSETLH:
+    case AArch64::LDSETLW:    case AArch64::LDSETLX:
+    case AArch64::LDSMAXB:    case AArch64::LDSMAXH:
+    case AArch64::LDSMAXW:    case AArch64::LDSMAXX:
+    case AArch64::LDSMAXLB:   case AArch64::LDSMAXLH:
+    case AArch64::LDSMAXLW:   case AArch64::LDSMAXLX:
+    case AArch64::LDSMINB:    case AArch64::LDSMINH:
+    case AArch64::LDSMINW:    case AArch64::LDSMINX:
+    case AArch64::LDSMINLB:   case AArch64::LDSMINLH:
+    case AArch64::LDSMINLW:   case AArch64::LDSMINLX:
+    case AArch64::LDUMAXB:    case AArch64::LDUMAXH:
+    case AArch64::LDUMAXW:    case AArch64::LDUMAXX:
+    case AArch64::LDUMAXLB:   case AArch64::LDUMAXLH:
+    case AArch64::LDUMAXLW:   case AArch64::LDUMAXLX:
+    case AArch64::LDUMINB:    case AArch64::LDUMINH:
+    case AArch64::LDUMINW:    case AArch64::LDUMINX:
+    case AArch64::LDUMINLB:   case AArch64::LDUMINLH:
+    case AArch64::LDUMINLW:   case AArch64::LDUMINLX:
+    return true;
   }
-
-#undef CASE_AARCH64_ATOMIC_
-
   return false;
 }
 
@@ -148,9 +133,8 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
       continue;
     }
 
-    if (shouldSkip(MI, MF)) {
-      LLVM_DEBUG(dbgs() << "    Ignoring, Atomic instruction with acquire "
-                           "semantics using WZR/XZR\n");
+    if (atomicBarrierDroppedOnZero(MI.getOpcode()) || atomicReadDroppedOnZero(MI.getOpcode())) {
+      LLVM_DEBUG(dbgs() << "    Ignoring, semantics change with xzr/wzr.\n");
       continue;
     }
 
diff --git a/lib/Target/AArch64/AArch64ExpandImm.cpp b/lib/Target/AArch64/AArch64ExpandImm.cpp
new file mode 100644
index 000000000000..c764af80eb86
--- /dev/null
+++ b/lib/Target/AArch64/AArch64ExpandImm.cpp
@@ -0,0 +1,411 @@
+//===- AArch64ExpandImm.h - AArch64 Immediate Expansion -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64ExpandImm stuff.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64ExpandImm.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+
+namespace llvm {
+
+namespace AArch64_IMM {
+
+/// Helper function which extracts the specified 16-bit chunk from a
+/// 64-bit value.
+static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
+  assert(ChunkIdx < 4 && "Out of range chunk index specified!");
+
+  return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
+}
+
+/// Check whether the given 16-bit chunk replicated to full 64-bit width
+/// can be materialized with an ORR instruction.
+static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
+  Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
+
+  return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding);
+}
+
+/// Check for identical 16-bit chunks within the constant and if so
+/// materialize them with a single ORR instruction. The remaining one or two
+/// 16-bit chunks will be materialized with MOVK instructions.
+///
+/// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order
+/// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with
+/// an ORR instruction.
+static bool tryToreplicateChunks(uint64_t UImm,
+				 SmallVectorImpl<ImmInsnModel> &Insn) {
+  using CountMap = DenseMap<uint64_t, unsigned>;
+
+  CountMap Counts;
+
+  // Scan the constant and count how often every chunk occurs.
+  for (unsigned Idx = 0; Idx < 4; ++Idx)
+    ++Counts[getChunk(UImm, Idx)];
+
+  // Traverse the chunks to find one which occurs more than once.
+  for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
+       Chunk != End; ++Chunk) {
+    const uint64_t ChunkVal = Chunk->first;
+    const unsigned Count = Chunk->second;
+
+    uint64_t Encoding = 0;
+
+    // We are looking for chunks which have two or three instances and can be
+    // materialized with an ORR instruction.
+    if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding))
+      continue;
+
+    const bool CountThree = Count == 3;
+
+    Insn.push_back({ AArch64::ORRXri, 0, Encoding });
+
+    unsigned ShiftAmt = 0;
+    uint64_t Imm16 = 0;
+    // Find the first chunk not materialized with the ORR instruction.
+    for (; ShiftAmt < 64; ShiftAmt += 16) {
+      Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
+
+      if (Imm16 != ChunkVal)
+        break;
+    }
+
+    // Create the first MOVK instruction.
+    Insn.push_back({ AArch64::MOVKXi, Imm16,
+		     AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt) });
+
+    // In case we have three instances the whole constant is now materialized
+    // and we can exit.
+    if (CountThree)
+      return true;
+
+    // Find the remaining chunk which needs to be materialized.
+    for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
+      Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
+
+      if (Imm16 != ChunkVal)
+        break;
+    }
+    Insn.push_back({ AArch64::MOVKXi, Imm16,
+                     AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt) });
+    return true;
+  }
+
+  return false;
+}
+
+/// Check whether this chunk matches the pattern '1...0...'. This pattern
+/// starts a contiguous sequence of ones if we look at the bits from the LSB
+/// towards the MSB.
+static bool isStartChunk(uint64_t Chunk) {
+  if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
+    return false;
+
+  return isMask_64(~Chunk);
+}
+
+/// Check whether this chunk matches the pattern '0...1...' This pattern
+/// ends a contiguous sequence of ones if we look at the bits from the LSB
+/// towards the MSB.
+static bool isEndChunk(uint64_t Chunk) {
+  if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
+    return false;
+
+  return isMask_64(Chunk);
+}
+
+/// Clear or set all bits in the chunk at the given index.
+static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
+  const uint64_t Mask = 0xFFFF;
+
+  if (Clear)
+    // Clear chunk in the immediate.
+    Imm &= ~(Mask << (Idx * 16));
+  else
+    // Set all bits in the immediate for the particular chunk.
+    Imm |= Mask << (Idx * 16);
+
+  return Imm;
+}
+
+/// Check whether the constant contains a sequence of contiguous ones,
+/// which might be interrupted by one or two chunks. If so, materialize the
+/// sequence of contiguous ones with an ORR instruction.
+/// Materialize the chunks which are either interrupting the sequence or outside
+/// of the sequence with a MOVK instruction.
+///
+/// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk
+/// which ends the sequence (0...1...). Then we are looking for constants which
+/// contain at least one S and E chunk.
+/// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|.
+///
+/// We are also looking for constants like |S|A|B|E| where the contiguous
+/// sequence of ones wraps around the MSB into the LSB.
+static bool trySequenceOfOnes(uint64_t UImm,
+                              SmallVectorImpl<ImmInsnModel> &Insn) {
+  const int NotSet = -1;
+  const uint64_t Mask = 0xFFFF;
+
+  int StartIdx = NotSet;
+  int EndIdx = NotSet;
+  // Try to find the chunks which start/end a contiguous sequence of ones.
+  for (int Idx = 0; Idx < 4; ++Idx) {
+    int64_t Chunk = getChunk(UImm, Idx);
+    // Sign extend the 16-bit chunk to 64-bit.
+    Chunk = (Chunk << 48) >> 48;
+
+    if (isStartChunk(Chunk))
+      StartIdx = Idx;
+    else if (isEndChunk(Chunk))
+      EndIdx = Idx;
+  }
+
+  // Early exit in case we can't find a start/end chunk.
+  if (StartIdx == NotSet || EndIdx == NotSet)
+    return false;
+
+  // Outside of the contiguous sequence of ones everything needs to be zero.
+  uint64_t Outside = 0;
+  // Chunks between the start and end chunk need to have all their bits set.
+  uint64_t Inside = Mask;
+
+  // If our contiguous sequence of ones wraps around from the MSB into the LSB,
+  // just swap indices and pretend we are materializing a contiguous sequence
+  // of zeros surrounded by a contiguous sequence of ones.
+  if (StartIdx > EndIdx) {
+    std::swap(StartIdx, EndIdx);
+    std::swap(Outside, Inside);
+  }
+
+  uint64_t OrrImm = UImm;
+  int FirstMovkIdx = NotSet;
+  int SecondMovkIdx = NotSet;
+
+  // Find out which chunks we need to patch up to obtain a contiguous sequence
+  // of ones.
+  for (int Idx = 0; Idx < 4; ++Idx) {
+    const uint64_t Chunk = getChunk(UImm, Idx);
+
+    // Check whether we are looking at a chunk which is not part of the
+    // contiguous sequence of ones.
+    if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
+      OrrImm = updateImm(OrrImm, Idx, Outside == 0);
+
+      // Remember the index we need to patch.
+      if (FirstMovkIdx == NotSet)
+        FirstMovkIdx = Idx;
+      else
+        SecondMovkIdx = Idx;
+
+      // Check whether we are looking a chunk which is part of the contiguous
+      // sequence of ones.
+    } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
+      OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
+
+      // Remember the index we need to patch.
+      if (FirstMovkIdx == NotSet)
+        FirstMovkIdx = Idx;
+      else
+        SecondMovkIdx = Idx;
+    }
+  }
+  assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
+
+  // Create the ORR-immediate instruction.
+  uint64_t Encoding = 0;
+  AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
+  Insn.push_back({ AArch64::ORRXri, 0, Encoding });
+
+  const bool SingleMovk = SecondMovkIdx == NotSet;
+  Insn.push_back({ AArch64::MOVKXi, getChunk(UImm, FirstMovkIdx),
+                   AArch64_AM::getShifterImm(AArch64_AM::LSL,
+                                             FirstMovkIdx * 16) });
+
+  // Early exit in case we only need to emit a single MOVK instruction.
+  if (SingleMovk)
+    return true;
+
+  // Create the second MOVK instruction.
+  Insn.push_back({ AArch64::MOVKXi, getChunk(UImm, SecondMovkIdx),
+	           AArch64_AM::getShifterImm(AArch64_AM::LSL,
+                                             SecondMovkIdx * 16) });
+
+  return true;
+}
+
+/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to a
+/// MOVZ or MOVN of width BitSize followed by up to 3 MOVK instructions.
+static inline void expandMOVImmSimple(uint64_t Imm, unsigned BitSize,
+				      unsigned OneChunks, unsigned ZeroChunks,
+				      SmallVectorImpl<ImmInsnModel> &Insn) {
+  const unsigned Mask = 0xFFFF;
+
+  // Use a MOVZ or MOVN instruction to set the high bits, followed by one or
+  // more MOVK instructions to insert additional 16-bit portions into the
+  // lower bits.
+  bool isNeg = false;
+
+  // Use MOVN to materialize the high bits if we have more all one chunks
+  // than all zero chunks.
+  if (OneChunks > ZeroChunks) {
+    isNeg = true;
+    Imm = ~Imm;
+  }
+
+  unsigned FirstOpc;
+  if (BitSize == 32) {
+    Imm &= (1LL << 32) - 1;
+    FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
+  } else {
+    FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
+  }
+  unsigned Shift = 0;     // LSL amount for high bits with MOVZ/MOVN
+  unsigned LastShift = 0; // LSL amount for last MOVK
+  if (Imm != 0) {
+    unsigned LZ = countLeadingZeros(Imm);
+    unsigned TZ = countTrailingZeros(Imm);
+    Shift = (TZ / 16) * 16;
+    LastShift = ((63 - LZ) / 16) * 16;
+  }
+  unsigned Imm16 = (Imm >> Shift) & Mask;
+
+  Insn.push_back({ FirstOpc, Imm16,
+                   AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift) });
+
+  if (Shift == LastShift)
+    return;
+
+  // If a MOVN was used for the high bits of a negative value, flip the rest
+  // of the bits back for use with MOVK.
+  if (isNeg)
+    Imm = ~Imm;
+
+  unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
+  while (Shift < LastShift) {
+    Shift += 16;
+    Imm16 = (Imm >> Shift) & Mask;
+    if (Imm16 == (isNeg ? Mask : 0))
+      continue; // This 16-bit portion is already set correctly.
+
+    Insn.push_back({ Opc, Imm16,
+                     AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift) });
+  }
+}
+
+/// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
+/// real move-immediate instructions to synthesize the immediate.
+void expandMOVImm(uint64_t Imm, unsigned BitSize,
+		  SmallVectorImpl<ImmInsnModel> &Insn) {
+  const unsigned Mask = 0xFFFF;
+
+  // Scan the immediate and count the number of 16-bit chunks which are either
+  // all ones or all zeros.
+  unsigned OneChunks = 0;
+  unsigned ZeroChunks = 0;
+  for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
+    const unsigned Chunk = (Imm >> Shift) & Mask;
+    if (Chunk == Mask)
+      OneChunks++;
+    else if (Chunk == 0)
+      ZeroChunks++;
+  }
+
+  // Prefer MOVZ/MOVN over ORR because of the rules for the "mov" alias.
+  if ((BitSize / 16) - OneChunks <= 1 || (BitSize / 16) - ZeroChunks <= 1) {
+    expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
+    return;
+  }
+
+  // Try a single ORR.
+  uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
+  uint64_t Encoding;
+  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
+    unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
+    Insn.push_back({ Opc, 0, Encoding });
+    return;
+  }
+
+  // One to up three instruction sequences.
+  //
+  // Prefer MOVZ/MOVN followed by MOVK; it's more readable, and possibly the
+  // fastest sequence with fast literal generation.
+  if (OneChunks >= (BitSize / 16) - 2 || ZeroChunks >= (BitSize / 16) - 2) {
+    expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
+    return;
+  }
+
+  assert(BitSize == 64 && "All 32-bit immediates can be expanded with a"
+                          "MOVZ/MOVK pair");
+
+  // Try other two-instruction sequences.
+
+  // 64-bit ORR followed by MOVK.
+  // We try to construct the ORR immediate in three different ways: either we
+  // zero out the chunk which will be replaced, we fill the chunk which will
+  // be replaced with ones, or we take the bit pattern from the other half of
+  // the 64-bit immediate. This is comprehensive because of the way ORR
+  // immediates are constructed.
+  for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
+    uint64_t ShiftedMask = (0xFFFFULL << Shift);
+    uint64_t ZeroChunk = UImm & ~ShiftedMask;
+    uint64_t OneChunk = UImm | ShiftedMask;
+    uint64_t RotatedImm = (UImm << 32) | (UImm >> 32);
+    uint64_t ReplicateChunk = ZeroChunk | (RotatedImm & ShiftedMask);
+    if (AArch64_AM::processLogicalImmediate(ZeroChunk, BitSize, Encoding) ||
+        AArch64_AM::processLogicalImmediate(OneChunk, BitSize, Encoding) ||
+        AArch64_AM::processLogicalImmediate(ReplicateChunk, BitSize,
+                                            Encoding)) {
+      // Create the ORR-immediate instruction.
+      Insn.push_back({ AArch64::ORRXri, 0, Encoding });
+
+      // Create the MOVK instruction.
+      const unsigned Imm16 = getChunk(UImm, Shift / 16);
+      Insn.push_back({ AArch64::MOVKXi, Imm16,
+		       AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift) });
+      return;
+    }
+  }
+
+  // FIXME: Add more two-instruction sequences.
+
+  // Three instruction sequences.
+  //
+  // Prefer MOVZ/MOVN followed by two MOVK; it's more readable, and possibly
+  // the fastest sequence with fast literal generation. (If neither MOVK is
+  // part of a fast literal generation pair, it could be slower than the
+  // four-instruction sequence, but we won't worry about that for now.)
+  if (OneChunks || ZeroChunks) {
+    expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
+    return;
+  }
+
+  // Check for identical 16-bit chunks within the constant and if so materialize
+  // them with a single ORR instruction. The remaining one or two 16-bit chunks
+  // will be materialized with MOVK instructions.
+  if (BitSize == 64 && tryToreplicateChunks(UImm, Insn))
+    return;
+
+  // Check whether the constant contains a sequence of contiguous ones, which
+  // might be interrupted by one or two chunks. If so, materialize the sequence
+  // of contiguous ones with an ORR instruction. Materialize the chunks which
+  // are either interrupting the sequence or outside of the sequence with a
+  // MOVK instruction.
+  if (BitSize == 64 && trySequenceOfOnes(UImm, Insn))
+    return;
+
+  // We found no possible two or three instruction sequence; use the general
+  // four-instruction sequence.
+  expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
+}
+
+} // end namespace AArch64_AM
+
+} // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64ExpandImm.h b/lib/Target/AArch64/AArch64ExpandImm.h
new file mode 100644
index 000000000000..42c97d2c3e9b
--- /dev/null
+++ b/lib/Target/AArch64/AArch64ExpandImm.h
@@ -0,0 +1,35 @@
+//===- AArch64ExpandImm.h - AArch64 Immediate Expansion ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 immediate expansion stuff.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64EXPANDIMM_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64EXPANDIMM_H
+
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+namespace AArch64_IMM {
+
+struct ImmInsnModel {
+  unsigned Opcode;
+  uint64_t Op1;
+  uint64_t Op2;
+};
+
+void expandMOVImm(uint64_t Imm, unsigned BitSize,
+		  SmallVectorImpl<ImmInsnModel> &Insn);
+
+} // end namespace AArch64_IMM
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index f7190d58fbf9..210c10eb1842 100644
--- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -1,9 +1,8 @@
 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,7 +13,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "AArch64ExpandImm.h"
 #include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
 #include "AArch64Subtarget.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "Utils/AArch64BaseInfo.h"
@@ -66,11 +67,6 @@ private:
                 MachineBasicBlock::iterator &NextMBBI);
   bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                     unsigned BitSize);
-  bool expandMOVImmSimple(MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator MBBI,
-                          unsigned BitSize,
-                          unsigned OneChunks,
-                          unsigned ZeroChunks);
 
   bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                       unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
@@ -79,6 +75,9 @@ private:
   bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MBBI,
                           MachineBasicBlock::iterator &NextMBBI);
+  bool expandSetTagLoop(MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator MBBI,
+                        MachineBasicBlock::iterator &NextMBBI);
 };
 
 } // end anonymous namespace
@@ -104,279 +103,6 @@ static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
   }
 }
 
-/// Helper function which extracts the specified 16-bit chunk from a
-/// 64-bit value.
-static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
-  assert(ChunkIdx < 4 && "Out of range chunk index specified!");
-
-  return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
-}
-
-/// Check whether the given 16-bit chunk replicated to full 64-bit width
-/// can be materialized with an ORR instruction.
-static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
-  Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
-
-  return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding);
-}
-
-/// Check for identical 16-bit chunks within the constant and if so
-/// materialize them with a single ORR instruction. The remaining one or two
-/// 16-bit chunks will be materialized with MOVK instructions.
-///
-/// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order
-/// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with
-/// an ORR instruction.
-static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
-                                 MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator &MBBI,
-                                 const AArch64InstrInfo *TII) {
-  using CountMap = DenseMap<uint64_t, unsigned>;
-
-  CountMap Counts;
-
-  // Scan the constant and count how often every chunk occurs.
-  for (unsigned Idx = 0; Idx < 4; ++Idx)
-    ++Counts[getChunk(UImm, Idx)];
-
-  // Traverse the chunks to find one which occurs more than once.
-  for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
-       Chunk != End; ++Chunk) {
-    const uint64_t ChunkVal = Chunk->first;
-    const unsigned Count = Chunk->second;
-
-    uint64_t Encoding = 0;
-
-    // We are looking for chunks which have two or three instances and can be
-    // materialized with an ORR instruction.
-    if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding))
-      continue;
-
-    const bool CountThree = Count == 3;
-    // Create the ORR-immediate instruction.
-    MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
-            .add(MI.getOperand(0))
-            .addReg(AArch64::XZR)
-            .addImm(Encoding);
-
-    const unsigned DstReg = MI.getOperand(0).getReg();
-    const bool DstIsDead = MI.getOperand(0).isDead();
-
-    unsigned ShiftAmt = 0;
-    uint64_t Imm16 = 0;
-    // Find the first chunk not materialized with the ORR instruction.
-    for (; ShiftAmt < 64; ShiftAmt += 16) {
-      Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
-
-      if (Imm16 != ChunkVal)
-        break;
-    }
-
-    // Create the first MOVK instruction.
-    MachineInstrBuilder MIB1 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
-            .addReg(DstReg,
-                    RegState::Define | getDeadRegState(DstIsDead && CountThree))
-            .addReg(DstReg)
-            .addImm(Imm16)
-            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
-
-    // In case we have three instances the whole constant is now materialized
-    // and we can exit.
-    if (CountThree) {
-      transferImpOps(MI, MIB, MIB1);
-      MI.eraseFromParent();
-      return true;
-    }
-
-    // Find the remaining chunk which needs to be materialized.
-    for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
-      Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
-
-      if (Imm16 != ChunkVal)
-        break;
-    }
-
-    // Create the second MOVK instruction.
-    MachineInstrBuilder MIB2 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
-            .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
-            .addReg(DstReg)
-            .addImm(Imm16)
-            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
-
-    transferImpOps(MI, MIB, MIB2);
-    MI.eraseFromParent();
-    return true;
-  }
-
-  return false;
-}
-
-/// Check whether this chunk matches the pattern '1...0...'. This pattern
-/// starts a contiguous sequence of ones if we look at the bits from the LSB
-/// towards the MSB.
-static bool isStartChunk(uint64_t Chunk) {
-  if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
-    return false;
-
-  return isMask_64(~Chunk);
-}
-
-/// Check whether this chunk matches the pattern '0...1...' This pattern
-/// ends a contiguous sequence of ones if we look at the bits from the LSB
-/// towards the MSB.
-static bool isEndChunk(uint64_t Chunk) {
-  if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
-    return false;
-
-  return isMask_64(Chunk);
-}
-
-/// Clear or set all bits in the chunk at the given index.
-static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
-  const uint64_t Mask = 0xFFFF;
-
-  if (Clear)
-    // Clear chunk in the immediate.
-    Imm &= ~(Mask << (Idx * 16));
-  else
-    // Set all bits in the immediate for the particular chunk.
-    Imm |= Mask << (Idx * 16);
-
-  return Imm;
-}
-
-/// Check whether the constant contains a sequence of contiguous ones,
-/// which might be interrupted by one or two chunks. If so, materialize the
-/// sequence of contiguous ones with an ORR instruction.
-/// Materialize the chunks which are either interrupting the sequence or outside
-/// of the sequence with a MOVK instruction.
-///
-/// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk
-/// which ends the sequence (0...1...). Then we are looking for constants which
-/// contain at least one S and E chunk.
-/// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|.
-///
-/// We are also looking for constants like |S|A|B|E| where the contiguous
-/// sequence of ones wraps around the MSB into the LSB.
-static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
-                              MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator &MBBI,
-                              const AArch64InstrInfo *TII) {
-  const int NotSet = -1;
-  const uint64_t Mask = 0xFFFF;
-
-  int StartIdx = NotSet;
-  int EndIdx = NotSet;
-  // Try to find the chunks which start/end a contiguous sequence of ones.
-  for (int Idx = 0; Idx < 4; ++Idx) {
-    int64_t Chunk = getChunk(UImm, Idx);
-    // Sign extend the 16-bit chunk to 64-bit.
-    Chunk = (Chunk << 48) >> 48;
-
-    if (isStartChunk(Chunk))
-      StartIdx = Idx;
-    else if (isEndChunk(Chunk))
-      EndIdx = Idx;
-  }
-
-  // Early exit in case we can't find a start/end chunk.
-  if (StartIdx == NotSet || EndIdx == NotSet)
-    return false;
-
-  // Outside of the contiguous sequence of ones everything needs to be zero.
-  uint64_t Outside = 0;
-  // Chunks between the start and end chunk need to have all their bits set.
-  uint64_t Inside = Mask;
-
-  // If our contiguous sequence of ones wraps around from the MSB into the LSB,
-  // just swap indices and pretend we are materializing a contiguous sequence
-  // of zeros surrounded by a contiguous sequence of ones.
-  if (StartIdx > EndIdx) {
-    std::swap(StartIdx, EndIdx);
-    std::swap(Outside, Inside);
-  }
-
-  uint64_t OrrImm = UImm;
-  int FirstMovkIdx = NotSet;
-  int SecondMovkIdx = NotSet;
-
-  // Find out which chunks we need to patch up to obtain a contiguous sequence
-  // of ones.
-  for (int Idx = 0; Idx < 4; ++Idx) {
-    const uint64_t Chunk = getChunk(UImm, Idx);
-
-    // Check whether we are looking at a chunk which is not part of the
-    // contiguous sequence of ones.
-    if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
-      OrrImm = updateImm(OrrImm, Idx, Outside == 0);
-
-      // Remember the index we need to patch.
-      if (FirstMovkIdx == NotSet)
-        FirstMovkIdx = Idx;
-      else
-        SecondMovkIdx = Idx;
-
-      // Check whether we are looking a chunk which is part of the contiguous
-      // sequence of ones.
-    } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
-      OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
-
-      // Remember the index we need to patch.
-      if (FirstMovkIdx == NotSet)
-        FirstMovkIdx = Idx;
-      else
-        SecondMovkIdx = Idx;
-    }
-  }
-  assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
-
-  // Create the ORR-immediate instruction.
-  uint64_t Encoding = 0;
-  AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
-  MachineInstrBuilder MIB =
-      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
-          .add(MI.getOperand(0))
-          .addReg(AArch64::XZR)
-          .addImm(Encoding);
-
-  const unsigned DstReg = MI.getOperand(0).getReg();
-  const bool DstIsDead = MI.getOperand(0).isDead();
-
-  const bool SingleMovk = SecondMovkIdx == NotSet;
-  // Create the first MOVK instruction.
-  MachineInstrBuilder MIB1 =
-      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
-          .addReg(DstReg,
-                  RegState::Define | getDeadRegState(DstIsDead && SingleMovk))
-          .addReg(DstReg)
-          .addImm(getChunk(UImm, FirstMovkIdx))
-          .addImm(
-              AArch64_AM::getShifterImm(AArch64_AM::LSL, FirstMovkIdx * 16));
-
-  // Early exit in case we only need to emit a single MOVK instruction.
-  if (SingleMovk) {
-    transferImpOps(MI, MIB, MIB1);
-    MI.eraseFromParent();
-    return true;
-  }
-
-  // Create the second MOVK instruction.
-  MachineInstrBuilder MIB2 =
-      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
-          .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
-          .addReg(DstReg)
-          .addImm(getChunk(UImm, SecondMovkIdx))
-          .addImm(
-              AArch64_AM::getShifterImm(AArch64_AM::LSL, SecondMovkIdx * 16));
-
-  transferImpOps(MI, MIB, MIB2);
-  MI.eraseFromParent();
-  return true;
-}
-
 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
 /// real move-immediate instructions to synthesize the immediate.
 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
@@ -385,7 +111,6 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
   MachineInstr &MI = *MBBI;
   unsigned DstReg = MI.getOperand(0).getReg();
   uint64_t Imm = MI.getOperand(1).getImm();
-  const unsigned Mask = 0xFFFF;
 
   if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
     // Useless def, and we don't want to risk creating an invalid ORR (which
@@ -394,194 +119,50 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
     return true;
   }
 
-  // Scan the immediate and count the number of 16-bit chunks which are either
-  // all ones or all zeros.
-  unsigned OneChunks = 0;
-  unsigned ZeroChunks = 0;
-  for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
-    const unsigned Chunk = (Imm >> Shift) & Mask;
-    if (Chunk == Mask)
-      OneChunks++;
-    else if (Chunk == 0)
-      ZeroChunks++;
-  }
+  SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
+  AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
+  assert(Insn.size() != 0);
 
-  // FIXME: Prefer MOVZ/MOVN over ORR because of the rules for the "mov"
-  // alias.
+  SmallVector<MachineInstrBuilder, 4> MIBS;
+  for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
+    bool LastItem = std::next(I) == E;
+    switch (I->Opcode)
+    {
+    default: llvm_unreachable("unhandled!"); break;
 
-  // Try a single ORR.
-  uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
-  uint64_t Encoding;
-  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
-    unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
-    MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
-            .add(MI.getOperand(0))
-            .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
-            .addImm(Encoding);
-    transferImpOps(MI, MIB, MIB);
-    MI.eraseFromParent();
-    return true;
-  }
-
-  // Two instruction sequences.
-  //
-  // Prefer MOVZ/MOVN followed by MOVK; it's more readable, and possibly the
-  // fastest sequence with fast literal generation.
-  if (OneChunks >= (BitSize / 16) - 2 || ZeroChunks >= (BitSize / 16) - 2)
-    return expandMOVImmSimple(MBB, MBBI, BitSize, OneChunks, ZeroChunks);
-
-  assert(BitSize == 64 && "All 32-bit immediates can be expanded with a"
-                          "MOVZ/MOVK pair");
-
-  // Try other two-instruction sequences.
-
-  // 64-bit ORR followed by MOVK.
-  // We try to construct the ORR immediate in three different ways: either we
-  // zero out the chunk which will be replaced, we fill the chunk which will
-  // be replaced with ones, or we take the bit pattern from the other half of
-  // the 64-bit immediate. This is comprehensive because of the way ORR
-  // immediates are constructed.
-  for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
-    uint64_t ShiftedMask = (0xFFFFULL << Shift);
-    uint64_t ZeroChunk = UImm & ~ShiftedMask;
-    uint64_t OneChunk = UImm | ShiftedMask;
-    uint64_t RotatedImm = (UImm << 32) | (UImm >> 32);
-    uint64_t ReplicateChunk = ZeroChunk | (RotatedImm & ShiftedMask);
-    if (AArch64_AM::processLogicalImmediate(ZeroChunk, BitSize, Encoding) ||
-        AArch64_AM::processLogicalImmediate(OneChunk, BitSize, Encoding) ||
-        AArch64_AM::processLogicalImmediate(ReplicateChunk,
-                                            BitSize, Encoding)) {
-      // Create the ORR-immediate instruction.
-      MachineInstrBuilder MIB =
-          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
-              .add(MI.getOperand(0))
-              .addReg(AArch64::XZR)
-              .addImm(Encoding);
-
-      // Create the MOVK instruction.
-      const unsigned Imm16 = getChunk(UImm, Shift / 16);
-      const unsigned DstReg = MI.getOperand(0).getReg();
-      const bool DstIsDead = MI.getOperand(0).isDead();
-      MachineInstrBuilder MIB1 =
-          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
-              .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
-              .addReg(DstReg)
-              .addImm(Imm16)
-              .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
-
-      transferImpOps(MI, MIB, MIB1);
-      MI.eraseFromParent();
-      return true;
+    case AArch64::ORRWri:
+    case AArch64::ORRXri:
+      MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
+        .add(MI.getOperand(0))
+        .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
+        .addImm(I->Op2));
+      break;
+    case AArch64::MOVNWi:
+    case AArch64::MOVNXi:
+    case AArch64::MOVZWi:
+    case AArch64::MOVZXi: {
+      bool DstIsDead = MI.getOperand(0).isDead();
+      MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
+        .addReg(DstReg, RegState::Define |
+                getDeadRegState(DstIsDead && LastItem))
+        .addImm(I->Op1)
+        .addImm(I->Op2));
+      } break;
+    case AArch64::MOVKWi:
+    case AArch64::MOVKXi: {
+      unsigned DstReg = MI.getOperand(0).getReg();
+      bool DstIsDead = MI.getOperand(0).isDead();
+      MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
+        .addReg(DstReg,
+                RegState::Define |
+                getDeadRegState(DstIsDead && LastItem))
+        .addReg(DstReg)
+        .addImm(I->Op1)
+        .addImm(I->Op2));
+      } break;
     }
   }
-
-  // FIXME: Add more two-instruction sequences.
-
-  // Three instruction sequences.
-  //
-  // Prefer MOVZ/MOVN followed by two MOVK; it's more readable, and possibly
-  // the fastest sequence with fast literal generation. (If neither MOVK is
-  // part of a fast literal generation pair, it could be slower than the
-  // four-instruction sequence, but we won't worry about that for now.)
-  if (OneChunks || ZeroChunks)
-    return expandMOVImmSimple(MBB, MBBI, BitSize, OneChunks, ZeroChunks);
-
-  // Check for identical 16-bit chunks within the constant and if so materialize
-  // them with a single ORR instruction. The remaining one or two 16-bit chunks
-  // will be materialized with MOVK instructions.
-  if (BitSize == 64 && tryToreplicateChunks(UImm, MI, MBB, MBBI, TII))
-    return true;
-
-  // Check whether the constant contains a sequence of contiguous ones, which
-  // might be interrupted by one or two chunks. If so, materialize the sequence
-  // of contiguous ones with an ORR instruction. Materialize the chunks which
-  // are either interrupting the sequence or outside of the sequence with a
-  // MOVK instruction.
-  if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII))
-    return true;
-
-  // We found no possible two or three instruction sequence; use the general
-  // four-instruction sequence.
-  return expandMOVImmSimple(MBB, MBBI, BitSize, OneChunks, ZeroChunks);
-}
-
-/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to a
-/// MOVZ or MOVN of width BitSize followed by up to 3 MOVK instructions.
-bool AArch64ExpandPseudo::expandMOVImmSimple(MachineBasicBlock &MBB,
-                                             MachineBasicBlock::iterator MBBI,
-                                             unsigned BitSize,
-                                             unsigned OneChunks,
-                                             unsigned ZeroChunks) {
-  MachineInstr &MI = *MBBI;
-  unsigned DstReg = MI.getOperand(0).getReg();
-  uint64_t Imm = MI.getOperand(1).getImm();
-  const unsigned Mask = 0xFFFF;
-
-  // Use a MOVZ or MOVN instruction to set the high bits, followed by one or
-  // more MOVK instructions to insert additional 16-bit portions into the
-  // lower bits.
-  bool isNeg = false;
-
-  // Use MOVN to materialize the high bits if we have more all one chunks
-  // than all zero chunks.
-  if (OneChunks > ZeroChunks) {
-    isNeg = true;
-    Imm = ~Imm;
-  }
-
-  unsigned FirstOpc;
-  if (BitSize == 32) {
-    Imm &= (1LL << 32) - 1;
-    FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
-  } else {
-    FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
-  }
-  unsigned Shift = 0;     // LSL amount for high bits with MOVZ/MOVN
-  unsigned LastShift = 0; // LSL amount for last MOVK
-  if (Imm != 0) {
-    unsigned LZ = countLeadingZeros(Imm);
-    unsigned TZ = countTrailingZeros(Imm);
-    Shift = (TZ / 16) * 16;
-    LastShift = ((63 - LZ) / 16) * 16;
-  }
-  unsigned Imm16 = (Imm >> Shift) & Mask;
-  bool DstIsDead = MI.getOperand(0).isDead();
-  MachineInstrBuilder MIB1 =
-      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc))
-          .addReg(DstReg, RegState::Define |
-                  getDeadRegState(DstIsDead && Shift == LastShift))
-          .addImm(Imm16)
-          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
-
-  // If a MOVN was used for the high bits of a negative value, flip the rest
-  // of the bits back for use with MOVK.
-  if (isNeg)
-    Imm = ~Imm;
-
-  if (Shift == LastShift) {
-    transferImpOps(MI, MIB1, MIB1);
-    MI.eraseFromParent();
-    return true;
-  }
-
-  MachineInstrBuilder MIB2;
-  unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
-  while (Shift < LastShift) {
-    Shift += 16;
-    Imm16 = (Imm >> Shift) & Mask;
-    if (Imm16 == (isNeg ? Mask : 0))
-      continue; // This 16-bit portion is already set correctly.
-    MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
-               .addReg(DstReg,
-                       RegState::Define |
-                       getDeadRegState(DstIsDead && Shift == LastShift))
-               .addReg(DstReg)
-               .addImm(Imm16)
-               .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
-  }
-
-  transferImpOps(MI, MIB1, MIB2);
+  transferImpOps(MI, MIBS.front(), MIBS.back());
   MI.eraseFromParent();
   return true;
 }
@@ -759,6 +340,64 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
   return true;
 }
 
+bool AArch64ExpandPseudo::expandSetTagLoop(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI) {
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+  Register SizeReg = MI.getOperand(2).getReg();
+  Register AddressReg = MI.getOperand(3).getReg();
+
+  MachineFunction *MF = MBB.getParent();
+
+  bool ZeroData = MI.getOpcode() == AArch64::STZGloop;
+  const unsigned OpCode =
+      ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
+
+  auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+  auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+  MF->insert(++MBB.getIterator(), LoopBB);
+  MF->insert(++LoopBB->getIterator(), DoneBB);
+
+  BuildMI(LoopBB, DL, TII->get(OpCode))
+      .addDef(AddressReg)
+      .addReg(AddressReg)
+      .addReg(AddressReg)
+      .addImm(2)
+      .cloneMemRefs(MI)
+      .setMIFlags(MI.getFlags());
+  BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri))
+      .addDef(SizeReg)
+      .addReg(SizeReg)
+      .addImm(16 * 2)
+      .addImm(0);
+  BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB);
+
+  LoopBB->addSuccessor(LoopBB);
+  LoopBB->addSuccessor(DoneBB);
+
+  DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
+  DoneBB->transferSuccessors(&MBB);
+
+  MBB.addSuccessor(LoopBB);
+
+  NextMBBI = MBB.end();
+  MI.eraseFromParent();
+  // Recompute liveness bottom up.
+  LivePhysRegs LiveRegs;
+  computeAndAddLiveIns(LiveRegs, *DoneBB);
+  computeAndAddLiveIns(LiveRegs, *LoopBB);
+  // Do an extra pass in the loop to get the loop carried dependencies right.
+  // FIXME: is this necessary?
+  LoopBB->clearLiveIns();
+  computeAndAddLiveIns(LiveRegs, *LoopBB);
+  DoneBB->clearLiveIns();
+  computeAndAddLiveIns(LiveRegs, *DoneBB);
+
+  return true;
+}
+
 /// If MBBI references a pseudo instruction that should be expanded here,
 /// do the expansion and return true.  Otherwise return false.
 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
@@ -928,6 +567,12 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
     if (MF->getTarget().getTargetTriple().isOSFuchsia() &&
         MF->getTarget().getCodeModel() == CodeModel::Kernel)
       SysReg = AArch64SysReg::TPIDR_EL1;
+    else if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
+      SysReg = AArch64SysReg::TPIDR_EL3;
+    else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
+      SysReg = AArch64SysReg::TPIDR_EL2;
+    else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
+      SysReg = AArch64SysReg::TPIDR_EL1;
     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
         .addImm(SysReg);
     MI.eraseFromParent();
@@ -986,6 +631,46 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
     MI.eraseFromParent();
     return true;
    }
+   case AArch64::IRGstack: {
+     MachineFunction &MF = *MBB.getParent();
+     const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+     const AArch64FrameLowering *TFI =
+         MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
+
+     // IRG does not allow immediate offset. getTaggedBasePointerOffset should
+     // almost always point to SP-after-prologue; if not, emit a longer
+     // instruction sequence.
+     int BaseOffset = -AFI->getTaggedBasePointerOffset();
+     unsigned FrameReg;
+     int FrameRegOffset = TFI->resolveFrameOffsetReference(
+         MF, BaseOffset, false /*isFixed*/, FrameReg, /*PreferFP=*/false,
+         /*ForSimm=*/true);
+     Register SrcReg = FrameReg;
+     if (FrameRegOffset != 0) {
+       // Use output register as temporary.
+       SrcReg = MI.getOperand(0).getReg();
+       emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
+                       FrameRegOffset, TII);
+     }
+     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
+         .add(MI.getOperand(0))
+         .addUse(SrcReg)
+         .add(MI.getOperand(2));
+     MI.eraseFromParent();
+     return true;
+   }
+   case AArch64::TAGPstack: {
+     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDG))
+         .add(MI.getOperand(0))
+         .add(MI.getOperand(1))
+         .add(MI.getOperand(2))
+         .add(MI.getOperand(4));
+     MI.eraseFromParent();
+     return true;
+   }
+   case AArch64::STGloop:
+   case AArch64::STZGloop:
+     return expandSetTagLoop(MBB, MBBI, NextMBBI);
   }
   return false;
 }
diff --git a/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
index bc9a5ca97fea..3b3182128c4c 100644
--- a/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
+++ b/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
@@ -1,9 +1,8 @@
 //===- AArch64FalkorHWPFFix.cpp - Avoid HW prefetcher pitfalls on Falkor --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file For Falkor, we want to avoid HW prefetcher instruction tag collisions
@@ -213,8 +212,8 @@ private:
 struct LoadInfo {
   LoadInfo() = default;
 
-  unsigned DestReg = 0;
-  unsigned BaseReg = 0;
+  Register DestReg;
+  Register BaseReg;
   int BaseRegIdx = -1;
   const MachineOperand *OffsetOpnd = nullptr;
   bool IsPrePost = false;
@@ -648,7 +647,7 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
     return None;
 
   LoadInfo LI;
-  LI.DestReg = DestRegIdx == -1 ? 0 : MI.getOperand(DestRegIdx).getReg();
+  LI.DestReg = DestRegIdx == -1 ? Register() : MI.getOperand(DestRegIdx).getReg();
   LI.BaseReg = BaseReg;
   LI.BaseRegIdx = BaseRegIdx;
   LI.OffsetOpnd = OffsetIdx == -1 ? nullptr : &MI.getOperand(OffsetIdx);
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 47550cabb9f0..8dc2768b9597 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -1,9 +1,8 @@
 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -305,8 +304,6 @@ public:
 
 } // end anonymous namespace
 
-#include "AArch64GenCallingConv.inc"
-
 /// Check if the sign-/zero-extend will be a noop.
 static bool isIntExtFree(const Instruction *I) {
   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
@@ -408,10 +405,9 @@ unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
   bool Is64Bit = (VT == MVT::f64);
   // This checks to see if we can use FMOV instructions to materialize
   // a constant, otherwise we have to materialize via the constant pool.
-  if (TLI.isFPImmLegal(Val, VT)) {
-    int Imm =
-        Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
-    assert((Imm != -1) && "Cannot encode floating-point constant.");
+  int Imm =
+      Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
+  if (Imm != -1) {
     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
   }
@@ -2369,7 +2365,7 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
                                         AArch64::sub_32);
 
   if ((BW < 32) && !IsBitTest)
-    SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
+    SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
 
   // Emit the combined compare and branch instruction.
   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
@@ -3608,6 +3604,14 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
         .addImm(1);
     return true;
+  case Intrinsic::debugtrap: {
+    if (Subtarget->isTargetWindows()) {
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
+          .addImm(0xF000);
+      return true;
+    }
+    break;
+  }
 
   case Intrinsic::sqrt: {
     Type *RetTy = II->getCalledFunction()->getReturnType();
@@ -4268,7 +4272,7 @@ unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
   const TargetRegisterClass *RC =
       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
   if (NeedTrunc) {
-    Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
+    Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
     Op0IsKill = Op1IsKill = true;
   }
@@ -4948,7 +4952,7 @@ std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
   MVT PtrVT = TLI.getPointerTy(DL);
   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
   if (IdxVT.bitsLT(PtrVT)) {
-    IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
+    IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
     IdxNIsKill = true;
   } else if (IdxVT.bitsGT(PtrVT))
     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
@@ -5172,10 +5176,6 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
     return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
   }
 
-  // Silence warnings.
-  (void)&CC_AArch64_DarwinPCS_VarArg;
-  (void)&CC_AArch64_Win64_VarArg;
-
   // fall-back to target-independent instruction selection.
   return selectOperator(I, I->getOpcode());
 }
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index 538a8d7e8fbc..8c6e5cbd5c13 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1,9 +1,8 @@
 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -251,8 +250,7 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
 
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
-  if (!TFI->hasReservedCallFrame(MF)) {
+  if (!hasReservedCallFrame(MF)) {
     unsigned Align = getStackAlignment();
 
     int64_t Amount = I->getOperand(0).getImm();
@@ -588,7 +586,7 @@ static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
 static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
     const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
-    bool NeedsWinCFI, bool InProlog = true) {
+    bool NeedsWinCFI, bool *HasWinCFI, bool InProlog = true) {
   // Ignore instructions that do not operate on SP, i.e. shadow call stack
   // instructions and associated CFI instruction.
   while (MBBI->getOpcode() == AArch64::STRXpost ||
@@ -674,9 +672,11 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
   MIB.setMemRefs(MBBI->memoperands());
 
   // Generate a new SEH code that corresponds to the new instruction.
-  if (NeedsWinCFI)
+  if (NeedsWinCFI) {
+    *HasWinCFI = true;
     InsertSEH(*MIB, *TII,
               InProlog ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy);
+  }
 
   return std::prev(MBB.erase(MBBI));
 }
@@ -685,7 +685,8 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
 // combined SP bump by adding the local stack size to the stack offsets.
 static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
                                               unsigned LocalStackSize,
-                                              bool NeedsWinCFI) {
+                                              bool NeedsWinCFI,
+                                              bool *HasWinCFI) {
   if (AArch64InstrInfo::isSEHInstruction(MI))
     return;
 
@@ -732,6 +733,7 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
   OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
 
   if (NeedsWinCFI) {
+    *HasWinCFI = true;
     auto MBBI = std::next(MachineBasicBlock::iterator(MI));
     assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
     assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
@@ -803,7 +805,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
                          !MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
   bool HasFP = hasFP(MF);
   bool NeedsWinCFI = needsWinCFI(MF);
-  MF.setHasWinCFI(NeedsWinCFI);
+  bool HasWinCFI = false;
+  auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); });
+
   bool IsFunclet = MBB.isEHFuncletEntry();
 
   // At this point, we're going to decide whether or not the function uses a
@@ -838,6 +842,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
     return;
 
+  // Set tagged base pointer to the bottom of the stack frame.
+  // Ideally it should match SP value after prologue.
+  AFI->setTaggedBasePointerOffset(MFI.getStackSize());
+
   // getStackSize() includes all the locals in its size calculation. We don't
   // include these locals when computing the stack size of a funclet, as they
   // are allocated in the parent's stack frame and accessed via the frame
@@ -859,7 +867,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
       ++NumRedZoneFunctions;
     } else {
       emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
-                      MachineInstr::FrameSetup, false, NeedsWinCFI);
+                      MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
       if (!NeedsWinCFI) {
         // Label used to tie together the PROLOG_LABEL and the MachineMoves.
         MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
@@ -872,9 +880,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
       }
     }
 
-    if (NeedsWinCFI)
+    if (NeedsWinCFI) {
+      HasWinCFI = true;
       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
           .setMIFlag(MachineInstr::FrameSetup);
+    }
 
     return;
   }
@@ -892,11 +902,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
   if (CombineSPBump) {
     emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
-                    MachineInstr::FrameSetup, false, NeedsWinCFI);
+                    MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
     NumBytes = 0;
   } else if (PrologueSaveSize != 0) {
     MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
-        MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI);
+        MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI);
     NumBytes -= PrologueSaveSize;
   }
   assert(NumBytes >= 0 && "Negative stack allocation size!?");
@@ -908,7 +918,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
     if (CombineSPBump)
       fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
-                                        NeedsWinCFI);
+                                        NeedsWinCFI, &HasWinCFI);
     ++MBBI;
   }
 
@@ -916,9 +926,24 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   // opcodes that we needed to emit.  The FP and BP belong to the containing
   // function.
   if (IsFunclet) {
-    if (NeedsWinCFI)
+    if (NeedsWinCFI) {
+      HasWinCFI = true;
       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
           .setMIFlag(MachineInstr::FrameSetup);
+    }
+
+    // SEH funclets are passed the frame pointer in X1.  If the parent
+    // function uses the base register, then the base register is used
+    // directly, and is not retrieved from X1.
+    if (F.hasPersonalityFn()) {
+      EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
+      if (isAsynchronousEHPersonality(Per)) {
+        BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP)
+            .addReg(AArch64::X1).setMIFlag(MachineInstr::FrameSetup);
+        MBB.addLiveIn(AArch64::X1);
+      }
+    }
+
     return;
   }
 
@@ -934,12 +959,13 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
     // Note: All stores of callee-saved registers are marked as "FrameSetup".
     // This code marks the instruction(s) that set the FP also.
     emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
-                    MachineInstr::FrameSetup, false, NeedsWinCFI);
+                    MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
   }
 
   if (windowsRequiresStackProbe(MF, NumBytes)) {
     uint32_t NumWords = NumBytes >> 4;
     if (NeedsWinCFI) {
+      HasWinCFI = true;
       // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
       // exceed this amount.  We need to move at most 2^24 - 1 into x15.
       // This is at most two instructions, MOVZ follwed by MOVK.
@@ -983,9 +1009,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
           .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
           .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
           .setMIFlags(MachineInstr::FrameSetup);
-      if (NeedsWinCFI)
+      if (NeedsWinCFI) {
+        HasWinCFI = true;
         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
             .setMIFlag(MachineInstr::FrameSetup);
+      }
       break;
     case CodeModel::Large:
       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
@@ -993,9 +1021,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
           .addExternalSymbol("__chkstk")
           .addExternalSymbol("__chkstk")
           .setMIFlags(MachineInstr::FrameSetup);
-      if (NeedsWinCFI)
+      if (NeedsWinCFI) {
+        HasWinCFI = true;
         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
             .setMIFlag(MachineInstr::FrameSetup);
+      }
 
       BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
           .addReg(AArch64::X16, RegState::Kill)
@@ -1004,9 +1034,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
           .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
           .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
           .setMIFlags(MachineInstr::FrameSetup);
-      if (NeedsWinCFI)
+      if (NeedsWinCFI) {
+        HasWinCFI = true;
         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
             .setMIFlag(MachineInstr::FrameSetup);
+      }
       break;
     }
 
@@ -1015,10 +1047,12 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
         .addReg(AArch64::X15, RegState::Kill)
         .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
         .setMIFlags(MachineInstr::FrameSetup);
-    if (NeedsWinCFI)
-       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
-            .addImm(NumBytes)
-            .setMIFlag(MachineInstr::FrameSetup);
+    if (NeedsWinCFI) {
+      HasWinCFI = true;
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
+          .addImm(NumBytes)
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
     NumBytes = 0;
   }
 
@@ -1038,7 +1072,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
       // the correct value here, as NumBytes also includes padding bytes,
       // which shouldn't be counted here.
       emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
-                      MachineInstr::FrameSetup, false, NeedsWinCFI);
+                      MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
 
     if (NeedsRealignment) {
       const unsigned Alignment = MFI.getMaxAlignment();
@@ -1061,10 +1095,12 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
           .addReg(scratchSPReg, RegState::Kill)
           .addImm(andMaskEncoded);
       AFI->setStackRealigned(true);
-      if (NeedsWinCFI)
+      if (NeedsWinCFI) {
+        HasWinCFI = true;
         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
             .addImm(NumBytes & andMaskEncoded)
             .setMIFlag(MachineInstr::FrameSetup);
+      }
     }
   }
 
@@ -1078,16 +1114,19 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   if (RegInfo->hasBasePointer(MF)) {
     TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
                      false);
-    if (NeedsWinCFI)
+    if (NeedsWinCFI) {
+      HasWinCFI = true;
       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
           .setMIFlag(MachineInstr::FrameSetup);
+    }
   }
 
   // The very last FrameSetup instruction indicates the end of prologue. Emit a
   // SEH opcode indicating the prologue end.
-  if (NeedsWinCFI)
+  if (NeedsWinCFI && HasWinCFI) {
     BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
         .setMIFlag(MachineInstr::FrameSetup);
+  }
 
   if (needsFrameMoves) {
     const DataLayout &TD = MF.getDataLayout();
@@ -1231,7 +1270,12 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   DebugLoc DL;
   bool IsTailCallReturn = false;
   bool NeedsWinCFI = needsWinCFI(MF);
+  bool HasWinCFI = false;
   bool IsFunclet = false;
+  auto WinCFI = make_scope_exit([&]() {
+    if (!MF.hasWinCFI())
+      MF.setHasWinCFI(HasWinCFI);
+  });
 
   if (MBB.end() != MBBI) {
     DL = MBBI->getDebugLoc();
@@ -1326,7 +1370,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
     // If the offset is 0, convert it to a post-index ldp.
     if (OffsetOp.getImm() == 0)
       convertCalleeSaveRestoreToSPPrePostIncDec(
-          MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, false);
+          MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, false);
     else {
       // If not, make sure to emit an add after the last ldp.
       // We're doing this by transfering the size to be restored from the
@@ -1348,19 +1392,21 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
       break;
     } else if (CombineSPBump)
       fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(),
-                                        NeedsWinCFI);
+                                        NeedsWinCFI, &HasWinCFI);
   }
 
-  if (NeedsWinCFI)
+  if (NeedsWinCFI) {
+    HasWinCFI = true;
     BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
         .setMIFlag(MachineInstr::FrameDestroy);
+  }
 
   // If there is a single SP update, insert it before the ret and we're done.
   if (CombineSPBump) {
     emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
                     NumBytes + AfterCSRPopSize, TII, MachineInstr::FrameDestroy,
-                    false, NeedsWinCFI);
-    if (NeedsWinCFI)
+                    false, NeedsWinCFI, &HasWinCFI);
+    if (NeedsWinCFI && HasWinCFI)
       BuildMI(MBB, MBB.getFirstTerminator(), DL,
               TII->get(AArch64::SEH_EpilogEnd))
           .setMIFlag(MachineInstr::FrameDestroy);
@@ -1392,12 +1438,14 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
 
     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
                     StackRestoreBytes, TII, MachineInstr::FrameDestroy, false,
-                    NeedsWinCFI);
+                    NeedsWinCFI, &HasWinCFI);
     if (Done) {
-      if (NeedsWinCFI)
+      if (NeedsWinCFI) {
+        HasWinCFI = true;
         BuildMI(MBB, MBB.getFirstTerminator(), DL,
                 TII->get(AArch64::SEH_EpilogEnd))
             .setMIFlag(MachineInstr::FrameDestroy);
+      }
       return;
     }
 
@@ -1436,11 +1484,13 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
 
     emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
                     AfterCSRPopSize, TII, MachineInstr::FrameDestroy, false,
-                    NeedsWinCFI);
+                    NeedsWinCFI, &HasWinCFI);
   }
-  if (NeedsWinCFI)
+  if (NeedsWinCFI && HasWinCFI)
     BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
         .setMIFlag(MachineInstr::FrameDestroy);
+
+  MF.setHasWinCFI(HasWinCFI);
 }
 
 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
@@ -1450,25 +1500,66 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
                                                  int FI,
                                                  unsigned &FrameReg) const {
-  return resolveFrameIndexReference(MF, FI, FrameReg);
+  return resolveFrameIndexReference(
+      MF, FI, FrameReg,
+      /*PreferFP=*/
+      MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress),
+      /*ForSimm=*/false);
 }
 
-int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
-                                                     int FI, unsigned &FrameReg,
-                                                     bool PreferFP) const {
-  const MachineFrameInfo &MFI = MF.getFrameInfo();
-  const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
-      MF.getSubtarget().getRegisterInfo());
-  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+int AArch64FrameLowering::getNonLocalFrameIndexReference(
+  const MachineFunction &MF, int FI) const {
+  return getSEHFrameIndexOffset(MF, FI);
+}
+
+static int getFPOffset(const MachineFunction &MF, int ObjectOffset) {
+  const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
+  const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
   bool IsWin64 =
       Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
   unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
-  int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16;
-  int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
+  return ObjectOffset + FixedObject + 16;
+}
+
+static int getStackOffset(const MachineFunction &MF, int ObjectOffset) {
+  const auto &MFI = MF.getFrameInfo();
+  return ObjectOffset + MFI.getStackSize();
+}
+
+int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
+                                                 int FI) const {
+  const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
+      MF.getSubtarget().getRegisterInfo());
+  int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI);
+  return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
+             ? getFPOffset(MF, ObjectOffset)
+             : getStackOffset(MF, ObjectOffset);
+}
+
+int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
+                                                     int FI, unsigned &FrameReg,
+                                                     bool PreferFP,
+                                                     bool ForSimm) const {
+  const auto &MFI = MF.getFrameInfo();
+  int ObjectOffset = MFI.getObjectOffset(FI);
   bool isFixed = MFI.isFixedObjectIndex(FI);
-  bool isCSR = !isFixed && MFI.getObjectOffset(FI) >=
-                               -((int)AFI->getCalleeSavedStackSize());
+  return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, FrameReg,
+                                     PreferFP, ForSimm);
+}
+
+int AArch64FrameLowering::resolveFrameOffsetReference(
+    const MachineFunction &MF, int ObjectOffset, bool isFixed,
+    unsigned &FrameReg, bool PreferFP, bool ForSimm) const {
+  const auto &MFI = MF.getFrameInfo();
+  const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
+      MF.getSubtarget().getRegisterInfo());
+  const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
+  const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+
+  int FPOffset = getFPOffset(MF, ObjectOffset);
+  int Offset = getStackOffset(MF, ObjectOffset);
+  bool isCSR =
+      !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize());
 
   // Use frame pointer to reference fixed objects. Use it for locals if
   // there are VLAs or a dynamically realigned SP (and thus the SP isn't
@@ -1489,11 +1580,11 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
       assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
       UseFP = true;
     } else if (hasFP(MF) && !RegInfo->needsStackRealignment(MF)) {
-      // If the FPOffset is negative, we have to keep in mind that the
-      // available offset range for negative offsets is smaller than for
-      // positive ones. If an offset is
-      // available via the FP and the SP, use whichever is closest.
-      bool FPOffsetFits = FPOffset >= -256;
+      // If the FPOffset is negative and we're producing a signed immediate, we
+      // have to keep in mind that the available offset range for negative
+      // offsets is smaller than for positive ones. If an offset is available
+      // via the FP and the SP, use whichever is closest.
+      bool FPOffsetFits = !ForSimm || FPOffset >= -256;
       PreferFP |= Offset > -FPOffset;
 
       if (MFI.hasVarSizedObjects()) {
@@ -1517,6 +1608,7 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
         // Funclets access the locals contained in the parent's stack frame
         // via the frame pointer, so we have to use the FP in the parent
         // function.
+        (void) Subtarget;
         assert(
             Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) &&
             "Funclets should only be present on Win64");
@@ -1759,8 +1851,8 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
           static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
           static_cast<char>(-8) & 0x7f, // addend (sleb128)
       };
-      unsigned CFIIndex =
-          MF.addFrameInst(MCCFIInstruction::createEscape(nullptr, CFIInst));
+      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(
+          nullptr, StringRef(CFIInst, sizeof(CFIInst))));
       BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex)
           .setMIFlag(MachineInstr::FrameSetup);
@@ -2104,9 +2196,6 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
   while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
     ++MBBI;
 
-  if (MBBI->isTerminator())
-    return;
-
   // Create an UnwindHelp object.
   int UnwindHelpFI =
       MFI.CreateStackObject(/*size*/8, /*alignment*/16, false);
@@ -2114,8 +2203,10 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
   // We need to store -2 into the UnwindHelp object at the start of the
   // function.
   DebugLoc DL;
-  RS->enterBasicBlock(MBB);
-  unsigned DstReg = RS->scavengeRegister(&AArch64::GPR64RegClass, MBBI, 0);
+  RS->enterBasicBlockEnd(MBB);
+  RS->backward(std::prev(MBBI));
+  unsigned DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
+  assert(DstReg && "There must be a free register after frame setup");
   BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
   BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
       .addReg(DstReg, getKillRegState(true))
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
index 0d0385acf46e..6dbd34b2189f 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -1,9 +1,8 @@
 //==-- AArch64FrameLowering.h - TargetFrameLowering for AArch64 --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -41,8 +40,11 @@ public:
   int getFrameIndexReference(const MachineFunction &MF, int FI,
                              unsigned &FrameReg) const override;
   int resolveFrameIndexReference(const MachineFunction &MF, int FI,
-                                 unsigned &FrameReg,
-                                 bool PreferFP = false) const;
+                                 unsigned &FrameReg, bool PreferFP,
+                                 bool ForSimm) const;
+  int resolveFrameOffsetReference(const MachineFunction &MF, int ObjectOffset,
+                                  bool isFixed, unsigned &FrameReg,
+                                  bool PreferFP, bool ForSimm) const;
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MI,
                                  const std::vector<CalleeSavedInfo> &CSI,
@@ -79,6 +81,9 @@ public:
   int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
                                      unsigned &FrameReg,
                                      bool IgnoreSPUpdates) const override;
+  int getNonLocalFrameIndexReference(const MachineFunction &MF,
+                               int FI) const override;
+  int getSEHFrameIndexOffset(const MachineFunction &MF, int FI) const;
 
 private:
   bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
diff --git a/lib/Target/AArch64/AArch64GenRegisterBankInfo.def b/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
index 37720cbd32bb..528756b34856 100644
--- a/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
+++ b/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
@@ -1,9 +1,8 @@
 //===- AArch64GenRegisterBankInfo.def ----------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -111,6 +110,10 @@ RegisterBankInfo::ValueMapping AArch64GenRegisterBankInfo::ValMappings[]{
     // 47: FPExt vector: 64 to 128. <-- This must match FPExt64To128Idx.
     {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
     {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+    // 49: Shift scalar with 64 bit shift imm
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+    {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
 };
 
 bool AArch64GenRegisterBankInfo::checkPartialMap(unsigned Idx,
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index fc9855f6a0da..cd7e927ac80c 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -53,7 +52,7 @@ public:
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override {
-    ForCodeSize = MF.getFunction().optForSize();
+    ForCodeSize = MF.getFunction().hasOptSize();
     Subtarget = &MF.getSubtarget<AArch64Subtarget>();
     return SelectionDAGISel::runOnMachineFunction(MF);
   }
@@ -92,6 +91,12 @@ public:
   bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
     return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
   }
+  bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
+    return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
+  }
+  bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
+    return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
+  }
   bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
     return SelectAddrModeIndexed(N, 1, Base, OffImm);
   }
@@ -152,6 +157,9 @@ public:
 
   bool tryIndexedLoad(SDNode *N);
 
+  bool trySelectStackSlotTagP(SDNode *N);
+  void SelectTagP(SDNode *N);
+
   void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
                      unsigned SubRegIdx);
   void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
@@ -180,7 +188,12 @@ private:
   bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
                              SDValue &Shift);
   bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
-                               SDValue &OffImm);
+                               SDValue &OffImm) {
+    return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
+  }
+  bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
+                                     unsigned Size, SDValue &Base,
+                                     SDValue &OffImm);
   bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
                              SDValue &OffImm);
   bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
@@ -676,12 +689,13 @@ static bool isWorthFoldingADDlow(SDValue N) {
   return true;
 }
 
-/// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit
+/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
 /// immediate" address.  The "Size" argument is the size in bytes of the memory
 /// reference, which determines the scale.
-bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
-                                                  SDValue &Base,
-                                                  SDValue &OffImm) {
+bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
+                                                        unsigned BW, unsigned Size,
+                                                        SDValue &Base,
+                                                        SDValue &OffImm) {
   SDLoc dl(N);
   const DataLayout &DL = CurDAG->getDataLayout();
   const TargetLowering *TLI = getTargetLowering();
@@ -692,26 +706,43 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
     return true;
   }
 
-  // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
+  // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
   // selected here doesn't support labels/immediates, only base+offset.
-
   if (CurDAG->isBaseWithConstantOffset(N)) {
     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-      int64_t RHSC = RHS->getSExtValue();
-      unsigned Scale = Log2_32(Size);
-      if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) &&
-          RHSC < (0x40 << Scale)) {
-        Base = N.getOperand(0);
-        if (Base.getOpcode() == ISD::FrameIndex) {
-          int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-          Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+      if (IsSignedImm) {
+        int64_t RHSC = RHS->getSExtValue();
+        unsigned Scale = Log2_32(Size);
+        int64_t Range = 0x1LL << (BW - 1);
+
+        if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
+            RHSC < (Range << Scale)) {
+          Base = N.getOperand(0);
+          if (Base.getOpcode() == ISD::FrameIndex) {
+            int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+            Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+          }
+          OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
+          return true;
+        }
+      } else {
+        // unsigned Immediate
+        uint64_t RHSC = RHS->getZExtValue();
+        unsigned Scale = Log2_32(Size);
+        uint64_t Range = 0x1ULL << BW;
+
+        if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
+          Base = N.getOperand(0);
+          if (Base.getOpcode() == ISD::FrameIndex) {
+            int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+            Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+          }
+          OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
+          return true;
         }
-        OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
-        return true;
       }
     }
   }
-
   // Base only. The address will be materialized into a register before
   // the memory is accessed.
   //    add x0, Xbase, #offset
@@ -2650,6 +2681,14 @@ bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
     return true;
   }
 
+  if (RegString->getString() == "pc") {
+    ReplaceNode(N, CurDAG->getMachineNode(
+                       AArch64::ADR, DL, N->getSimpleValueType(0), MVT::Other,
+                       CurDAG->getTargetConstant(0, DL, MVT::i32),
+                       N->getOperand(0)));
+    return true;
+  }
+
   return false;
 }
 
@@ -2754,6 +2793,58 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
   return true;
 }
 
+bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
+  // tagp(FrameIndex, IRGstack, tag_offset):
+  // since the offset between FrameIndex and IRGstack is a compile-time
+  // constant, this can be lowered to a single ADDG instruction.
+  if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
+    return false;
+  }
+
+  SDValue IRG_SP = N->getOperand(2);
+  if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
+      cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() !=
+          Intrinsic::aarch64_irg_sp) {
+    return false;
+  }
+
+  const TargetLowering *TLI = getTargetLowering();
+  SDLoc DL(N);
+  int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
+  SDValue FiOp = CurDAG->getTargetFrameIndex(
+      FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+  int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
+
+  SDNode *Out = CurDAG->getMachineNode(
+      AArch64::TAGPstack, DL, MVT::i64,
+      {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
+       CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
+  ReplaceNode(N, Out);
+  return true;
+}
+
+void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
+  assert(isa<ConstantSDNode>(N->getOperand(3)) &&
+         "llvm.aarch64.tagp third argument must be an immediate");
+  if (trySelectStackSlotTagP(N))
+    return;
+  // FIXME: above applies in any case when offset between Op1 and Op2 is a
+  // compile-time constant, not just for stack allocations.
+
+  // General case for unrelated pointers in Op1 and Op2.
+  SDLoc DL(N);
+  int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
+  SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
+                                      {N->getOperand(1), N->getOperand(2)});
+  SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
+                                      {SDValue(N1, 0), N->getOperand(2)});
+  SDNode *N3 = CurDAG->getMachineNode(
+      AArch64::ADDG, DL, MVT::i64,
+      {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
+       CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
+  ReplaceNode(N, N3);
+}
+
 void AArch64DAGToDAGISel::Select(SDNode *Node) {
   // If we have a custom node, we already have selected!
   if (Node->isMachineOpcode()) {
@@ -3247,6 +3338,9 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
     switch (IntNo) {
     default:
       break;
+    case Intrinsic::aarch64_tagp:
+      SelectTagP(Node);
+      return;
     case Intrinsic::aarch64_neon_tbl2:
       SelectTable(Node, 2,
                   VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index e01ca14d7f63..7becc99fb5c7 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation  ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -11,6 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "AArch64ExpandImm.h"
 #include "AArch64ISelLowering.h"
 #include "AArch64CallingConvention.h"
 #include "AArch64MachineFunctionInfo.h"
@@ -55,9 +55,11 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
 #include "llvm/IR/Value.h"
@@ -87,6 +89,7 @@
 #include <vector>
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 #define DEBUG_TYPE "aarch64-lower"
 
@@ -454,6 +457,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FMAXNUM, Ty, Legal);
     setOperationAction(ISD::FMINIMUM, Ty, Legal);
     setOperationAction(ISD::FMAXIMUM, Ty, Legal);
+    setOperationAction(ISD::LROUND, Ty, Legal);
+    setOperationAction(ISD::LLROUND, Ty, Legal);
+    setOperationAction(ISD::LRINT, Ty, Legal);
+    setOperationAction(ISD::LLRINT, Ty, Legal);
   }
 
   if (Subtarget->hasFullFP16()) {
@@ -544,9 +551,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
   // Trap.
   setOperationAction(ISD::TRAP, MVT::Other, Legal);
+  if (Subtarget->isTargetWindows())
+    setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
 
   // We combine OR nodes for bitfield operations.
   setTargetDAGCombine(ISD::OR);
+  // Try to create BICs for vector ANDs.
+  setTargetDAGCombine(ISD::AND);
 
   // Vector add and sub nodes may conceal a high-half opportunity.
   // Also, try to fold ADD into CSINC/CSINV..
@@ -608,9 +619,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setPrefLoopAlignment(STI.getPrefLoopAlignment());
 
   // Only change the limit for entries in a jump table if specified by
-  // the subtarget, but not at the command line.
+  // the sub target, but not at the command line.
   unsigned MaxJT = STI.getMaximumJumpTableSize();
-  if (MaxJT && getMaximumJumpTableSize() == 0)
+  if (MaxJT && getMaximumJumpTableSize() == UINT_MAX)
     setMaximumJumpTableSize(MaxJT);
 
   setHasExtractBitsInsn(true);
@@ -658,14 +669,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     // elements smaller than i32, so promote the input to i32 first.
     setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
     setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
-    setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
-    setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
-    // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
-    // -> v8f16 conversions.
+    // i8 vector elements also need promotion to i32 for v8i8
     setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
     setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
-    setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
-    setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
     // Similarly, there is no direct i32 -> f64 vector conversion instruction.
     setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
     setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
@@ -676,18 +682,23 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
 
+    if (Subtarget->hasFullFP16()) {
+      setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
+      setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+      setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
+      setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
+    } else {
+      // when AArch64 doesn't have fullfp16 support, promote the input
+      // to i32 first.
+      setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
+      setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
+      setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
+      setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
+    }
+
     setOperationAction(ISD::CTLZ,       MVT::v1i64, Expand);
     setOperationAction(ISD::CTLZ,       MVT::v2i64, Expand);
 
-    setOperationAction(ISD::CTTZ,       MVT::v2i8,  Expand);
-    setOperationAction(ISD::CTTZ,       MVT::v4i16, Expand);
-    setOperationAction(ISD::CTTZ,       MVT::v2i32, Expand);
-    setOperationAction(ISD::CTTZ,       MVT::v1i64, Expand);
-    setOperationAction(ISD::CTTZ,       MVT::v16i8, Expand);
-    setOperationAction(ISD::CTTZ,       MVT::v8i16, Expand);
-    setOperationAction(ISD::CTTZ,       MVT::v4i32, Expand);
-    setOperationAction(ISD::CTTZ,       MVT::v2i64, Expand);
-
     // AArch64 doesn't have MUL.2d:
     setOperationAction(ISD::MUL, MVT::v2i64, Expand);
     // Custom handling for some quad-vector types to detect MULL.
@@ -696,14 +707,16 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::MUL, MVT::v2i64, Custom);
 
     // Vector reductions
-    for (MVT VT : MVT::integer_valuetypes()) {
+    for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
+                    MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
       setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
       setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
       setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
       setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
       setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
     }
-    for (MVT VT : MVT::fp_valuetypes()) {
+    for (MVT VT : { MVT::v4f16, MVT::v2f32,
+                    MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
       setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
       setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
     }
@@ -726,6 +739,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 
       setOperationAction(ISD::BSWAP, VT, Expand);
+      setOperationAction(ISD::CTTZ, VT, Expand);
 
       for (MVT InnerVT : MVT::vector_valuetypes()) {
         setTruncStoreAction(VT, InnerVT, Expand);
@@ -745,6 +759,17 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::FROUND, Ty, Legal);
     }
 
+    if (Subtarget->hasFullFP16()) {
+      for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
+        setOperationAction(ISD::FFLOOR, Ty, Legal);
+        setOperationAction(ISD::FNEARBYINT, Ty, Legal);
+        setOperationAction(ISD::FCEIL, Ty, Legal);
+        setOperationAction(ISD::FRINT, Ty, Legal);
+        setOperationAction(ISD::FTRUNC, Ty, Legal);
+        setOperationAction(ISD::FROUND, Ty, Legal);
+      }
+    }
+
     setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
   }
 
@@ -783,7 +808,6 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
   setOperationAction(ISD::SRA, VT, Custom);
   setOperationAction(ISD::SRL, VT, Custom);
   setOperationAction(ISD::SHL, VT, Custom);
-  setOperationAction(ISD::AND, VT, Custom);
   setOperationAction(ISD::OR, VT, Custom);
   setOperationAction(ISD::SETCC, VT, Custom);
   setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
@@ -1052,10 +1076,9 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
   return MVT::i64;
 }
 
-bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
-                                                           unsigned AddrSpace,
-                                                           unsigned Align,
-                                                           bool *Fast) const {
+bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
+    EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
+    bool *Fast) const {
   if (Subtarget->requiresStrictAlign())
     return false;
 
@@ -1211,6 +1234,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case AArch64ISD::FRECPS:            return "AArch64ISD::FRECPS";
   case AArch64ISD::FRSQRTE:           return "AArch64ISD::FRSQRTE";
   case AArch64ISD::FRSQRTS:           return "AArch64ISD::FRSQRTS";
+  case AArch64ISD::STG:               return "AArch64ISD::STG";
+  case AArch64ISD::STZG:              return "AArch64ISD::STZG";
+  case AArch64ISD::ST2G:              return "AArch64ISD::ST2G";
+  case AArch64ISD::STZ2G:             return "AArch64ISD::STZ2G";
   }
   return nullptr;
 }
@@ -2326,7 +2353,8 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
                      SDLoc(Op)).first;
 }
 
-static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
+                                                    SelectionDAG &DAG) const {
   // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
   // Any additional optimization in this function should be recorded
   // in the cost tables.
@@ -2334,8 +2362,9 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
   EVT VT = Op.getValueType();
   unsigned NumElts = InVT.getVectorNumElements();
 
-  // f16 vectors are promoted to f32 before a conversion.
-  if (InVT.getVectorElementType() == MVT::f16) {
+  // f16 conversions are promoted to f32 when full fp16 is not supported.
+  if (InVT.getVectorElementType() == MVT::f16 &&
+      !Subtarget->hasFullFP16()) {
     MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
     SDLoc dl(Op);
     return DAG.getNode(
@@ -2743,6 +2772,28 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::aarch64_neon_umin:
     return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
+
+  case Intrinsic::localaddress: {
+    const auto &MF = DAG.getMachineFunction();
+    const auto *RegInfo = Subtarget->getRegisterInfo();
+    unsigned Reg = RegInfo->getLocalAddressRegister(MF);
+    return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
+                              Op.getSimpleValueType());
+  }
+
+  case Intrinsic::eh_recoverfp: {
+    // FIXME: This needs to be implemented to correctly handle highly aligned
+    // stack objects. For now we simply return the incoming FP. Refer D53541
+    // for more details.
+    SDValue FnOp = Op.getOperand(1);
+    SDValue IncomingFPOp = Op.getOperand(2);
+    GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
+    auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
+    if (!Fn)
+      report_fatal_error(
+          "llvm.eh.recoverfp must take a function as the first argument");
+    return IncomingFPOp;
+  }
   }
 }
 
@@ -2797,7 +2848,8 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
   unsigned AS = StoreNode->getAddressSpace();
   unsigned Align = StoreNode->getAlignment();
   if (Align < MemVT.getStoreSize() &&
-      !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
+      !allowsMisalignedMemoryAccesses(
+          MemVT, AS, Align, StoreNode->getMemOperand()->getFlags(), nullptr)) {
     return scalarizeVectorStore(StoreNode, DAG);
   }
 
@@ -2900,8 +2952,6 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerCTPOP(Op, DAG);
   case ISD::FCOPYSIGN:
     return LowerFCOPYSIGN(Op, DAG);
-  case ISD::AND:
-    return LowerVectorAND(Op, DAG);
   case ISD::OR:
     return LowerVectorOR(Op, DAG);
   case ISD::XOR:
@@ -2945,8 +2995,6 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
 //                      Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
-#include "AArch64GenCallingConv.inc"
-
 /// Selects the correct CCAssignFn for a given CallingConvention value.
 CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
                                                      bool IsVarArg) const {
@@ -3167,6 +3215,32 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
                                        FuncInfo->getForwardedMustTailRegParms();
       CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
                                                CC_AArch64_AAPCS);
+
+      // Conservatively forward X8, since it might be used for aggregate return.
+      if (!CCInfo.isAllocated(AArch64::X8)) {
+        unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
+        Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
+      }
+    }
+  }
+
+  // On Windows, InReg pointers must be returned, so record the pointer in a
+  // virtual register at the start of the function so it can be returned in the
+  // epilogue.
+  if (IsWin64) {
+    for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
+      if (Ins[I].Flags.isInReg()) {
+        assert(!FuncInfo->getSRetReturnReg());
+
+        MVT PtrTy = getPointerTy(DAG.getDataLayout());
+        unsigned Reg =
+          MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
+        FuncInfo->setSRetReturnReg(Reg);
+
+        SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
+        Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
+        break;
+      }
     }
   }
 
@@ -3365,10 +3439,20 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
   // X86) but less efficient and uglier in LowerCall.
   for (Function::const_arg_iterator i = CallerF.arg_begin(),
                                     e = CallerF.arg_end();
-       i != e; ++i)
+       i != e; ++i) {
     if (i->hasByValAttr())
       return false;
 
+    // On Windows, "inreg" attributes signify non-aggregate indirect returns.
+    // In this case, it is necessary to save/restore X0 in the callee. Tail
+    // call opt interferes with this. So we disable tail call opt when the
+    // caller has an argument with "inreg" attribute.
+
+    // FIXME: Check whether the callee also has an "inreg" argument.
+    if (i->hasInRegAttr())
+      return false;
+  }
+
   if (getTargetMachine().Options.GuaranteedTailCallOpt)
     return canGuaranteeTCO(CalleeCC) && CCMatch;
 
@@ -3886,6 +3970,9 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
                                    const SmallVectorImpl<SDValue> &OutVals,
                                    const SDLoc &DL, SelectionDAG &DAG) const {
+  auto &MF = DAG.getMachineFunction();
+  auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+
   CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
                           ? RetCC_AArch64_WebKit_JS
                           : RetCC_AArch64_AAPCS;
@@ -3924,6 +4011,23 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
     Flag = Chain.getValue(1);
     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   }
+
+  // Windows AArch64 ABIs require that for returning structs by value we copy
+  // the sret argument into X0 for the return.
+  // We saved the argument into a virtual register in the entry block,
+  // so now we copy the value out and into X0.
+  if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
+    SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
+                                     getPointerTy(MF.getDataLayout()));
+
+    unsigned RetValReg = AArch64::X0;
+    Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
+    Flag = Chain.getValue(1);
+
+    RetOps.push_back(
+      DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
+  }
+
   const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
   const MCPhysReg *I =
       TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
@@ -5197,50 +5301,20 @@ SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
   return DAG.getFrameIndex(FI, VT);
 }
 
+#define GET_REGISTER_MATCHER
+#include "AArch64GenAsmMatcher.inc"
+
 // FIXME? Maybe this could be a TableGen attribute on some registers and
 // this table could be generated automatically from RegInfo.
 unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
                                                   SelectionDAG &DAG) const {
-  unsigned Reg = StringSwitch<unsigned>(RegName)
-                       .Case("sp", AArch64::SP)
-                       .Case("x1", AArch64::X1)
-                       .Case("w1", AArch64::W1)
-                       .Case("x2", AArch64::X2)
-                       .Case("w2", AArch64::W2)
-                       .Case("x3", AArch64::X3)
-                       .Case("w3", AArch64::W3)
-                       .Case("x4", AArch64::X4)
-                       .Case("w4", AArch64::W4)
-                       .Case("x5", AArch64::X5)
-                       .Case("w5", AArch64::W5)
-                       .Case("x6", AArch64::X6)
-                       .Case("w6", AArch64::W6)
-                       .Case("x7", AArch64::X7)
-                       .Case("w7", AArch64::W7)
-                       .Case("x18", AArch64::X18)
-                       .Case("w18", AArch64::W18)
-                       .Case("x20", AArch64::X20)
-                       .Case("w20", AArch64::W20)
-                       .Default(0);
-  if (((Reg == AArch64::X1 || Reg == AArch64::W1) &&
-      !Subtarget->isXRegisterReserved(1)) ||
-      ((Reg == AArch64::X2 || Reg == AArch64::W2) &&
-      !Subtarget->isXRegisterReserved(2)) ||
-      ((Reg == AArch64::X3 || Reg == AArch64::W3) &&
-      !Subtarget->isXRegisterReserved(3)) ||
-      ((Reg == AArch64::X4 || Reg == AArch64::W4) &&
-      !Subtarget->isXRegisterReserved(4)) ||
-      ((Reg == AArch64::X5 || Reg == AArch64::W5) &&
-      !Subtarget->isXRegisterReserved(5)) ||
-      ((Reg == AArch64::X6 || Reg == AArch64::W6) &&
-      !Subtarget->isXRegisterReserved(6)) ||
-      ((Reg == AArch64::X7 || Reg == AArch64::W7) &&
-      !Subtarget->isXRegisterReserved(7)) ||
-      ((Reg == AArch64::X18 || Reg == AArch64::W18) &&
-      !Subtarget->isXRegisterReserved(18)) ||
-      ((Reg == AArch64::X20 || Reg == AArch64::W20) &&
-      !Subtarget->isXRegisterReserved(20)))
-    Reg = 0;
+  unsigned Reg = MatchRegisterName(RegName);
+  if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
+    const MCRegisterInfo *MRI = Subtarget->getRegisterInfo();
+    unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
+    if (!Subtarget->isXRegisterReserved(DwarfRegNum))
+      Reg = 0;
+  }
   if (Reg)
     return Reg;
   report_fatal_error(Twine("Invalid register name \""
@@ -5398,35 +5472,41 @@ bool AArch64TargetLowering::isOffsetFoldingLegal(
   return false;
 }
 
-bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
-  // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
-  // FIXME: We should be able to handle f128 as well with a clever lowering.
-  if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 ||
-                          (VT == MVT::f16 && Subtarget->hasFullFP16()))) {
-    LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString() << " imm value: 0\n");
-    return true;
-  }
-
+bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+                                         bool OptForSize) const {
   bool IsLegal = false;
-  SmallString<128> ImmStrVal;
-  Imm.toString(ImmStrVal);
-
+  // We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
+  // 16-bit case when target has full fp16 support.
+  // FIXME: We should be able to handle f128 as well with a clever lowering.
+  const APInt ImmInt = Imm.bitcastToAPInt();
   if (VT == MVT::f64)
-    IsLegal = AArch64_AM::getFP64Imm(Imm) != -1;
+    IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
   else if (VT == MVT::f32)
-    IsLegal = AArch64_AM::getFP32Imm(Imm) != -1;
+    IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
   else if (VT == MVT::f16 && Subtarget->hasFullFP16())
-    IsLegal = AArch64_AM::getFP16Imm(Imm) != -1;
-
-  if (IsLegal) {
-    LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString()
-                      << " imm value: " << ImmStrVal << "\n");
-    return true;
-  }
-
-  LLVM_DEBUG(dbgs() << "Illegal " << VT.getEVTString()
-                    << " imm value: " << ImmStrVal << "\n");
-  return false;
+    IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero();
+  // TODO: fmov h0, w0 is also legal, however on't have an isel pattern to
+  //       generate that fmov.
+
+  // If we can not materialize in immediate field for fmov, check if the
+  // value can be encoded as the immediate operand of a logical instruction.
+  // The immediate value will be created with either MOVZ, MOVN, or ORR.
+  if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
+    // The cost is actually exactly the same for mov+fmov vs. adrp+ldr;
+    // however the mov+fmov sequence is always better because of the reduced
+    // cache pressure. The timings are still the same if you consider
+    // movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the
+    // movw+movk is fused). So we limit up to 2 instrdduction at most.
+    SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
+    AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(),
+			      Insn);
+    unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
+    IsLegal = Insn.size() <= Limit;
+  }
+
+  LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()
+                    << " imm value: "; Imm.dump(););
+  return IsLegal;
 }
 
 //===----------------------------------------------------------------------===//
@@ -6226,6 +6306,8 @@ static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
 
 static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
   unsigned NumElts = VT.getVectorNumElements();
+  if (NumElts % 2 != 0)
+    return false;
   WhichResult = (M[0] == 0 ? 0 : 1);
   for (unsigned i = 0; i < NumElts; i += 2) {
     if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
@@ -6240,6 +6322,8 @@ static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
 static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
   unsigned NumElts = VT.getVectorNumElements();
+  if (NumElts % 2 != 0)
+    return false;
   WhichResult = (M[0] == 0 ? 0 : 1);
   unsigned Idx = WhichResult * NumElts / 2;
   for (unsigned i = 0; i != NumElts; i += 2) {
@@ -6276,6 +6360,8 @@ static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
 static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
   unsigned NumElts = VT.getVectorNumElements();
+  if (NumElts % 2 != 0)
+    return false;
   WhichResult = (M[0] == 0 ? 0 : 1);
   for (unsigned i = 0; i < NumElts; i += 2) {
     if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
@@ -6918,46 +7004,6 @@ static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
   return SDValue();
 }
 
-SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
-                                              SelectionDAG &DAG) const {
-  SDValue LHS = Op.getOperand(0);
-  EVT VT = Op.getValueType();
-
-  BuildVectorSDNode *BVN =
-      dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
-  if (!BVN) {
-    // AND commutes, so try swapping the operands.
-    LHS = Op.getOperand(1);
-    BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode());
-  }
-  if (!BVN)
-    return Op;
-
-  APInt DefBits(VT.getSizeInBits(), 0);
-  APInt UndefBits(VT.getSizeInBits(), 0);
-  if (resolveBuildVector(BVN, DefBits, UndefBits)) {
-    SDValue NewOp;
-
-    // We only have BIC vector immediate instruction, which is and-not.
-    DefBits = ~DefBits;
-    if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, Op, DAG,
-                                    DefBits, &LHS)) ||
-        (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, Op, DAG,
-                                    DefBits, &LHS)))
-      return NewOp;
-
-    UndefBits = ~UndefBits;
-    if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, Op, DAG,
-                                    UndefBits, &LHS)) ||
-        (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, Op, DAG,
-                                    UndefBits, &LHS)))
-      return NewOp;
-  }
-
-  // We can always fall back to a non-immediate AND.
-  return Op;
-}
-
 // Specialized code to quickly find if PotentialBVec is a BuildVector that
 // consists of only the same constant int value, returned in reference arg
 // ConstVal
@@ -7799,8 +7845,8 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
 
   // Make v4f16 (only) fcmp operations utilise vector instructions
   // v8f16 support will be a litle more complicated
-  if (LHS.getValueType().getVectorElementType() == MVT::f16) {
-    if (!FullFP16 && LHS.getValueType().getVectorNumElements() == 4) {
+  if (!FullFP16 && LHS.getValueType().getVectorElementType() == MVT::f16) {
+    if (LHS.getValueType().getVectorNumElements() == 4) {
       LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS);
       RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS);
       SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC);
@@ -7810,8 +7856,8 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
       return SDValue();
   }
 
-  assert(LHS.getValueType().getVectorElementType() == MVT::f32 ||
-         LHS.getValueType().getVectorElementType() == MVT::f64);
+  assert((!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) ||
+          LHS.getValueType().getVectorElementType() != MVT::f128);
 
   // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
   // clean.  Some of them require two branches to implement.
@@ -8255,6 +8301,110 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
   return true;
 }
 
+/// Check if both Op1 and Op2 are shufflevector extracts of either the lower
+/// or upper half of the vector elements.
+static bool areExtractShuffleVectors(Value *Op1, Value *Op2) {
+  auto areTypesHalfed = [](Value *FullV, Value *HalfV) {
+    auto *FullVT = cast<VectorType>(FullV->getType());
+    auto *HalfVT = cast<VectorType>(HalfV->getType());
+    return FullVT->getBitWidth() == 2 * HalfVT->getBitWidth();
+  };
+
+  auto extractHalf = [](Value *FullV, Value *HalfV) {
+    auto *FullVT = cast<VectorType>(FullV->getType());
+    auto *HalfVT = cast<VectorType>(HalfV->getType());
+    return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
+  };
+
+  Constant *M1, *M2;
+  Value *S1Op1, *S2Op1;
+  if (!match(Op1, m_ShuffleVector(m_Value(S1Op1), m_Undef(), m_Constant(M1))) ||
+      !match(Op2, m_ShuffleVector(m_Value(S2Op1), m_Undef(), m_Constant(M2))))
+    return false;
+
+  // Check that the operands are half as wide as the result and we extract
+  // half of the elements of the input vectors.
+  if (!areTypesHalfed(S1Op1, Op1) || !areTypesHalfed(S2Op1, Op2) ||
+      !extractHalf(S1Op1, Op1) || !extractHalf(S2Op1, Op2))
+    return false;
+
+  // Check the mask extracts either the lower or upper half of vector
+  // elements.
+  int M1Start = -1;
+  int M2Start = -1;
+  int NumElements = cast<VectorType>(Op1->getType())->getNumElements() * 2;
+  if (!ShuffleVectorInst::isExtractSubvectorMask(M1, NumElements, M1Start) ||
+      !ShuffleVectorInst::isExtractSubvectorMask(M2, NumElements, M2Start) ||
+      M1Start != M2Start || (M1Start != 0 && M2Start != (NumElements / 2)))
+    return false;
+
+  return true;
+}
+
+/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
+/// of the vector elements.
+static bool areExtractExts(Value *Ext1, Value *Ext2) {
+  auto areExtDoubled = [](Instruction *Ext) {
+    return Ext->getType()->getScalarSizeInBits() ==
+           2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
+  };
+
+  if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
+      !match(Ext2, m_ZExtOrSExt(m_Value())) ||
+      !areExtDoubled(cast<Instruction>(Ext1)) ||
+      !areExtDoubled(cast<Instruction>(Ext2)))
+    return false;
+
+  return true;
+}
+
+/// Check if sinking \p I's operands to I's basic block is profitable, because
+/// the operands can be folded into a target instruction, e.g.
+/// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2).
+bool AArch64TargetLowering::shouldSinkOperands(
+    Instruction *I, SmallVectorImpl<Use *> &Ops) const {
+  if (!I->getType()->isVectorTy())
+    return false;
+
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::aarch64_neon_umull:
+      if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1)))
+        return false;
+      Ops.push_back(&II->getOperandUse(0));
+      Ops.push_back(&II->getOperandUse(1));
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  switch (I->getOpcode()) {
+  case Instruction::Sub:
+  case Instruction::Add: {
+    if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
+      return false;
+
+    // If the exts' operands extract either the lower or upper elements, we
+    // can sink them too.
+    auto Ext1 = cast<Instruction>(I->getOperand(0));
+    auto Ext2 = cast<Instruction>(I->getOperand(1));
+    if (areExtractShuffleVectors(Ext1, Ext2)) {
+      Ops.push_back(&Ext1->getOperandUse(0));
+      Ops.push_back(&Ext2->getOperandUse(0));
+    }
+
+    Ops.push_back(&I->getOperandUse(0));
+    Ops.push_back(&I->getOperandUse(1));
+
+    return true;
+  }
+  default:
+    return false;
+  }
+  return false;
+}
+
 bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
                                           unsigned &RequiredAligment) const {
   if (!LoadedType.isSimple() ||
@@ -8377,8 +8527,9 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
     // If we're generating more than one load, compute the base address of
     // subsequent loads as an offset from the previous.
     if (LoadCount > 0)
-      BaseAddr = Builder.CreateConstGEP1_32(
-          BaseAddr, VecTy->getVectorNumElements() * Factor);
+      BaseAddr =
+          Builder.CreateConstGEP1_32(VecTy->getVectorElementType(), BaseAddr,
+                                     VecTy->getVectorNumElements() * Factor);
 
     CallInst *LdN = Builder.CreateCall(
         LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");
@@ -8540,7 +8691,8 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
     // If we generating more than one store, we compute the base address of
     // subsequent stores as an offset from the previous.
     if (StoreCount > 0)
-      BaseAddr = Builder.CreateConstGEP1_32(BaseAddr, LaneLen * Factor);
+      BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getVectorElementType(),
+                                            BaseAddr, LaneLen * Factor);
 
     Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy));
     Builder.CreateCall(StNFunc, Ops);
@@ -8554,13 +8706,12 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
           (DstAlign == 0 || DstAlign % AlignCheck == 0));
 }
 
-EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
-                                               unsigned SrcAlign, bool IsMemset,
-                                               bool ZeroMemset,
-                                               bool MemcpyStrSrc,
-                                               MachineFunction &MF) const {
-  const Function &F = MF.getFunction();
-  bool CanImplicitFloat = !F.hasFnAttribute(Attribute::NoImplicitFloat);
+EVT AArch64TargetLowering::getOptimalMemOpType(
+    uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+    bool ZeroMemset, bool MemcpyStrSrc,
+    const AttributeList &FuncAttributes) const {
+  bool CanImplicitFloat =
+      !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
   bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
   bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
   // Only use AdvSIMD to implement memset of 32-byte and above. It would have
@@ -8571,7 +8722,9 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
     if (memOpAlign(SrcAlign, DstAlign, AlignCheck))
       return true;
     bool Fast;
-    return allowsMisalignedMemoryAccesses(VT, 0, 1, &Fast) && Fast;
+    return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone,
+                                          &Fast) &&
+           Fast;
   };
 
   if (CanUseNEON && IsMemset && !IsSmallMemset &&
@@ -9061,6 +9214,9 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
   if (!Subtarget->hasNEON())
     return SDValue();
 
+  if (!N->getValueType(0).isSimple())
+    return SDValue();
+
   SDValue Op = N->getOperand(0);
   if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
       Op.getOpcode() != ISD::FMUL)
@@ -9323,6 +9479,46 @@ static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
   return SDValue();
 }
 
+static SDValue performANDCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue LHS = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
+    return SDValue();
+
+  BuildVectorSDNode *BVN =
+      dyn_cast<BuildVectorSDNode>(N->getOperand(1).getNode());
+  if (!BVN)
+    return SDValue();
+
+  // AND does not accept an immediate, so check if we can use a BIC immediate
+  // instruction instead. We do this here instead of using a (and x, (mvni imm))
+  // pattern in isel, because some immediates may be lowered to the preferred
+  // (and x, (movi imm)) form, even though an mvni representation also exists.
+  APInt DefBits(VT.getSizeInBits(), 0);
+  APInt UndefBits(VT.getSizeInBits(), 0);
+  if (resolveBuildVector(BVN, DefBits, UndefBits)) {
+    SDValue NewOp;
+
+    DefBits = ~DefBits;
+    if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
+                                    DefBits, &LHS)) ||
+        (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
+                                    DefBits, &LHS)))
+      return NewOp;
+
+    UndefBits = ~UndefBits;
+    if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
+                                    UndefBits, &LHS)) ||
+        (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
+                                    UndefBits, &LHS)))
+      return NewOp;
+  }
+
+  return SDValue();
+}
+
 static SDValue performSRLCombine(SDNode *N,
                                  TargetLowering::DAGCombinerInfo &DCI) {
   SelectionDAG &DAG = DCI.DAG;
@@ -9598,12 +9794,13 @@ static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
                      DAG.getConstant(NumElems, dl, MVT::i64));
 }
 
-static bool isEssentiallyExtractSubvector(SDValue N) {
-  if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR)
-    return true;
-
-  return N.getOpcode() == ISD::BITCAST &&
-         N.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR;
+static bool isEssentiallyExtractHighSubvector(SDValue N) {
+  if (N.getOpcode() == ISD::BITCAST)
+    N = N.getOperand(0);
+  if (N.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+    return false;
+  return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() ==
+         N.getOperand(0).getValueType().getVectorNumElements() / 2;
 }
 
 /// Helper structure to keep track of ISD::SET_CC operands.
@@ -9770,13 +9967,13 @@ static SDValue performAddSubLongCombine(SDNode *N,
 
   // It's not worth doing if at least one of the inputs isn't already an
   // extract, but we don't know which it'll be so we have to try both.
-  if (isEssentiallyExtractSubvector(LHS.getOperand(0))) {
+  if (isEssentiallyExtractHighSubvector(LHS.getOperand(0))) {
     RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
     if (!RHS.getNode())
       return SDValue();
 
     RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
-  } else if (isEssentiallyExtractSubvector(RHS.getOperand(0))) {
+  } else if (isEssentiallyExtractHighSubvector(RHS.getOperand(0))) {
     LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
     if (!LHS.getNode())
       return SDValue();
@@ -9809,11 +10006,11 @@ static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
   // Either node could be a DUP, but it's not worth doing both of them (you'd
   // just as well use the non-high version) so look for a corresponding extract
   // operation on the other "wing".
-  if (isEssentiallyExtractSubvector(LHS)) {
+  if (isEssentiallyExtractHighSubvector(LHS)) {
     RHS = tryExtendDUPToExtractHigh(RHS, DAG);
     if (!RHS.getNode())
       return SDValue();
-  } else if (isEssentiallyExtractSubvector(RHS)) {
+  } else if (isEssentiallyExtractHighSubvector(RHS)) {
     LHS = tryExtendDUPToExtractHigh(LHS, DAG);
     if (!LHS.getNode())
       return SDValue();
@@ -10261,7 +10458,7 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
     return SDValue();
 
   // Don't split at -Oz.
-  if (DAG.getMachineFunction().getFunction().optForMinSize())
+  if (DAG.getMachineFunction().getFunction().hasMinSize())
     return SDValue();
 
   // Don't split v2i64 vectors. Memcpy lowering produces those and splitting
@@ -10917,6 +11114,12 @@ static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
     return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
   }
 
+  // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
+  if (Op->getOpcode() == ISD::ANY_EXTEND &&
+      Bit < Op->getOperand(0).getValueSizeInBits()) {
+    return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
+  }
+
   if (Op->getNumOperands() != 2)
     return Op;
 
@@ -11172,6 +11375,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
     return performFDivCombine(N, DAG, DCI, Subtarget);
   case ISD::OR:
     return performORCombine(N, DCI, Subtarget);
+  case ISD::AND:
+    return performANDCombine(N, DCI);
   case ISD::SRL:
     return performSRLCombine(N, DCI);
   case ISD::INTRINSIC_WO_CHAIN:
@@ -11573,6 +11778,9 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
 // For the real atomic operations, we have ldxr/stxr up to 128 bits,
 TargetLowering::AtomicExpansionKind
 AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+  if (AI->isFloatingPointOperation())
+    return AtomicExpansionKind::CmpXChg;
+
   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
   if (Size > 128) return AtomicExpansionKind::None;
   // Nand not supported in LSE.
@@ -11627,9 +11835,13 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
       IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
   Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
 
-  return Builder.CreateTruncOrBitCast(
-      Builder.CreateCall(Ldxr, Addr),
-      cast<PointerType>(Addr->getType())->getElementType());
+  Type *EltTy = cast<PointerType>(Addr->getType())->getElementType();
+
+  const DataLayout &DL = M->getDataLayout();
+  IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(EltTy));
+  Value *Trunc = Builder.CreateTrunc(Builder.CreateCall(Ldxr, Addr), IntEltTy);
+
+  return Builder.CreateBitCast(Trunc, EltTy);
 }
 
 void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
@@ -11664,6 +11876,10 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
   Type *Tys[] = { Addr->getType() };
   Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
 
+  const DataLayout &DL = M->getDataLayout();
+  IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
+  Val = Builder.CreateBitCast(Val, IntValTy);
+
   return Builder.CreateCall(Stxr,
                             {Builder.CreateZExtOrBitCast(
                                  Val, Stxr->getFunctionType()->getParamType(0)),
@@ -11685,8 +11901,9 @@ static Value *UseTlsOffset(IRBuilder<> &IRB, unsigned Offset) {
   Function *ThreadPointerFunc =
       Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
   return IRB.CreatePointerCast(
-      IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), Offset),
-      Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0));
+      IRB.CreateConstGEP1_32(IRB.getInt8Ty(), IRB.CreateCall(ThreadPointerFunc),
+                             Offset),
+      IRB.getInt8PtrTy()->getPointerTo(0));
 }
 
 Value *AArch64TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
@@ -11712,12 +11929,13 @@ void AArch64TargetLowering::insertSSPDeclarations(Module &M) const {
                         Type::getInt8PtrTy(M.getContext()));
 
     // MSVC CRT has a function to validate security cookie.
-    auto *SecurityCheckCookie = cast<Function>(
-        M.getOrInsertFunction("__security_check_cookie",
-                              Type::getVoidTy(M.getContext()),
-                              Type::getInt8PtrTy(M.getContext())));
-    SecurityCheckCookie->setCallingConv(CallingConv::Win64);
-    SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
+    FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
+        "__security_check_cookie", Type::getVoidTy(M.getContext()),
+        Type::getInt8PtrTy(M.getContext()));
+    if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
+      F->setCallingConv(CallingConv::Win64);
+      F->addAttribute(1, Attribute::AttrKind::InReg);
+    }
     return;
   }
   TargetLowering::insertSSPDeclarations(M);
@@ -11730,7 +11948,7 @@ Value *AArch64TargetLowering::getSDagStackGuard(const Module &M) const {
   return TargetLowering::getSDagStackGuard(M);
 }
 
-Value *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const {
+Function *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const {
   // MSVC CRT has a function to validate security cookie.
   if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
     return M.getFunction("__security_check_cookie");
@@ -11825,6 +12043,11 @@ bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
   return OptSize && !VT.isVector();
 }
 
+bool AArch64TargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
+  // We want inc-of-add for scalars and sub-of-not for vectors.
+  return VT.isScalarInteger();
+}
+
 bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const {
   return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
 }
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index ffc4cc3ef534..4421c31f65c9 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1,9 +1,8 @@
 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -215,7 +214,13 @@ enum NodeType : unsigned {
   LD4LANEpost,
   ST2LANEpost,
   ST3LANEpost,
-  ST4LANEpost
+  ST4LANEpost,
+
+  STG,
+  STZG,
+  ST2G,
+  STZ2G
+
 };
 
 } // end namespace AArch64ISD
@@ -263,9 +268,10 @@ public:
 
   /// Returns true if the target allows unaligned memory accesses of the
   /// specified type.
-  bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
-                                      unsigned Align = 1,
-                                      bool *Fast = nullptr) const override;
+  bool allowsMisalignedMemoryAccesses(
+      EVT VT, unsigned AddrSpace = 0, unsigned Align = 1,
+      MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+      bool *Fast = nullptr) const override;
 
   /// Provide custom lowering hooks for some operations.
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
@@ -287,7 +293,8 @@ public:
 
   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
 
-  bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+  bool isFPImmLegal(const APFloat &Imm, EVT VT,
+                    bool ForCodeSize) const override;
 
   /// Return true if the given shuffle mask can be codegen'd directly, or if it
   /// should be stack expanded.
@@ -328,6 +335,9 @@ public:
   bool isZExtFree(EVT VT1, EVT VT2) const override;
   bool isZExtFree(SDValue Val, EVT VT2) const override;
 
+  bool shouldSinkOperands(Instruction *I,
+                          SmallVectorImpl<Use *> &Ops) const override;
+
   bool hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const override;
 
   unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
@@ -346,7 +356,7 @@ public:
 
   EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
                           bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
-                          MachineFunction &MF) const override;
+                          const AttributeList &FuncAttributes) const override;
 
   /// Return true if the addressing mode represented by AM is legal for this
   /// target, for a load/store of the specified type.
@@ -409,7 +419,7 @@ public:
 
   void insertSSPDeclarations(Module &M) const override;
   Value *getSDagStackGuard(const Module &M) const override;
-  Value *getSSPStackGuardCheck(const Module &M) const override;
+  Function *getSSPStackGuardCheck(const Module &M) const override;
 
   /// If the target has a standard location for the unsafe stack pointer,
   /// returns the address of that location. Otherwise, returns nullptr.
@@ -470,6 +480,12 @@ public:
     return VT.getSizeInBits() >= 64; // vector 'bic'
   }
 
+  bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
+    if (DAG.getMachineFunction().getFunction().hasMinSize())
+      return false;
+    return true;
+  }
+
   bool shouldTransformSignedTruncationCheck(EVT XVT,
                                             unsigned KeptBits) const override {
     // For vectors, we don't have a preference..
@@ -487,6 +503,8 @@ public:
     return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
   }
 
+  bool preferIncOfAddToSubOfNot(EVT VT) const override;
+
   bool hasBitPreservingFPLogic(EVT VT) const override {
     // FIXME: Is this always true? It should be true for vectors at least.
     return VT == MVT::f32 || VT == MVT::f64;
@@ -648,9 +666,9 @@ private:
   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerVectorAND(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/AArch64/AArch64InstrAtomics.td b/lib/Target/AArch64/AArch64InstrAtomics.td
index 35cd7735ceb7..e22cb44d81ae 100644
--- a/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -1,9 +1,8 @@
 //=- AArch64InstrAtomics.td - AArch64 Atomic codegen support -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index 9061ed4f9f54..d619137b55c5 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1,9 +1,8 @@
 //===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tblgen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -356,6 +355,9 @@ def am_indexed7s32  : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S32", []>;
 def am_indexed7s64  : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S64", []>;
 def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>;
 
+def am_indexedu6s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedU6S128", []>;
+def am_indexeds9s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedS9S128", []>;
+
 // uimm5sN predicate - True if the immediate is a multiple of N in the range
 // [0 * N, 32 * N].
 def UImm5s2Operand : UImmScaledMemoryIndexed<5, 2>;
@@ -1818,6 +1820,14 @@ multiclass Shift<bits<2> shift_type, string asm, SDNode OpNode> {
 
   def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (sext GPR32:$Rm)))),
             (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>;
+
+  def : Pat<(i64 (OpNode GPR64:$Rn, (i64 (sext GPR32:$Rm)))),
+            (!cast<Instruction>(NAME # "Xr") GPR64:$Rn,
+                (SUBREG_TO_REG (i32 0), GPR32:$Rm, sub_32))>;
+
+  def : Pat<(i64 (OpNode GPR64:$Rn, (i64 (zext GPR32:$Rm)))),
+            (!cast<Instruction>(NAME # "Xr") GPR64:$Rn,
+                (SUBREG_TO_REG (i32 0), GPR32:$Rm, sub_32))>;
 }
 
 class ShiftAlias<string asm, Instruction inst, RegisterClass regtype>
@@ -2332,7 +2342,7 @@ class AddSubG<bit isSub, string asm_inst, SDPatternOperator OpNode>
 }
 
 class SUBP<bit setsFlags, string asm_instr, SDPatternOperator OpNode>
-      : BaseTwoOperand<0b0000, GPR64, asm_instr, null_frag, GPR64sp, GPR64sp> {
+      : BaseTwoOperand<0b0000, GPR64, asm_instr, OpNode, GPR64sp, GPR64sp> {
   let Inst{31} = 1;
   let Inst{29} = setsFlags;
 }
@@ -4017,7 +4027,7 @@ class BaseMemTag<bits<2> opc1, bits<2> opc2, string asm_insn,
 class MemTagVector<bit Load, string asm_insn, string asm_opnds,
                    dag oops, dag iops>
     : BaseMemTag<{0b1, Load}, 0b00, asm_insn, asm_opnds,
-                 "$Rn = $wback,@earlyclobber $wback", oops, iops> {
+                  "", oops, iops> {
   bits<5> Rt;
 
   let Inst{20-12} = 0b000000000;
@@ -4027,8 +4037,9 @@ class MemTagVector<bit Load, string asm_insn, string asm_opnds,
 }
 
 class MemTagLoad<string asm_insn, string asm_opnds>
-    : BaseMemTag<0b01, 0b00, asm_insn, asm_opnds, "", (outs GPR64:$Rt),
-                 (ins GPR64sp:$Rn, simm9s16:$offset)> {
+    : BaseMemTag<0b01, 0b00, asm_insn, asm_opnds, "$Rt = $wback",
+                 (outs GPR64:$wback),
+                 (ins GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)> {
   bits<5> Rt;
   bits<9> offset;
 
@@ -4045,29 +4056,28 @@ class BaseMemTagStore<bits<2> opc1, bits<2> opc2, string asm_insn,
   bits<9> offset;
 
   let Inst{20-12} = offset;
-  let Inst{4-0}   = 0b11111;
-  let Unpredictable{4-0} = 0b11111;
+  let Inst{4-0}   = Rt;
 
   let mayStore = 1;
 }
 
 multiclass MemTagStore<bits<2> opc1, string insn> {
   def Offset :
-    BaseMemTagStore<opc1, 0b10, insn, "\t[$Rn, $offset]", "",
-                    (outs), (ins GPR64sp:$Rn, simm9s16:$offset)>;
+    BaseMemTagStore<opc1, 0b10, insn, "\t$Rt, [$Rn, $offset]", "",
+                    (outs), (ins GPR64sp:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
   def PreIndex :
-    BaseMemTagStore<opc1, 0b11, insn, "\t[$Rn, $offset]!",
-                    "$Rn = $wback,@earlyclobber $wback",
+    BaseMemTagStore<opc1, 0b11, insn, "\t$Rt, [$Rn, $offset]!",
+                    "$Rn = $wback",
                     (outs GPR64sp:$wback),
-                    (ins GPR64sp:$Rn, simm9s16:$offset)>;
+                    (ins GPR64sp:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
   def PostIndex :
-    BaseMemTagStore<opc1, 0b01, insn, "\t[$Rn], $offset",
-                    "$Rn = $wback,@earlyclobber $wback",
+    BaseMemTagStore<opc1, 0b01, insn, "\t$Rt, [$Rn], $offset",
+                    "$Rn = $wback",
                     (outs GPR64sp:$wback),
-                    (ins GPR64sp:$Rn, simm9s16:$offset)>;
+                    (ins GPR64sp:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
 
-  def : InstAlias<insn # "\t[$Rn]",
-                  (!cast<Instruction>(NAME # "Offset") GPR64sp:$Rn, 0)>;
+  def : InstAlias<insn # "\t$Rt, [$Rn]",
+                  (!cast<Instruction>(NAME # "Offset") GPR64sp:$Rt, GPR64sp:$Rn, 0)>;
 }
 
 //---
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index ada067888572..215e96a82d0e 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1,9 +1,8 @@
 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -77,8 +76,11 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   const MachineFunction *MF = MBB.getParent();
   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
 
-  if (MI.getOpcode() == AArch64::INLINEASM)
-    return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
+  {
+    auto Op = MI.getOpcode();
+    if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
+      return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
+  }
 
   // FIXME: We currently only handle pseudoinstructions that don't get expanded
   //        before the assembly printer.
@@ -928,9 +930,9 @@ bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
 }
 
 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
-    MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
+    const MachineInstr &MIa, const MachineInstr &MIb, AliasAnalysis *AA) const {
   const TargetRegisterInfo *TRI = &getRegisterInfo();
-  MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
+  const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
   int64_t OffsetA = 0, OffsetB = 0;
   unsigned WidthA = 0, WidthB = 0;
 
@@ -1715,6 +1717,69 @@ bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) {
   }
 }
 
+Optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
+  switch (Opc) {
+  default: return {};
+  case AArch64::PRFMui: return AArch64::PRFUMi;
+  case AArch64::LDRXui: return AArch64::LDURXi;
+  case AArch64::LDRWui: return AArch64::LDURWi;
+  case AArch64::LDRBui: return AArch64::LDURBi;
+  case AArch64::LDRHui: return AArch64::LDURHi;
+  case AArch64::LDRSui: return AArch64::LDURSi;
+  case AArch64::LDRDui: return AArch64::LDURDi;
+  case AArch64::LDRQui: return AArch64::LDURQi;
+  case AArch64::LDRBBui: return AArch64::LDURBBi;
+  case AArch64::LDRHHui: return AArch64::LDURHHi;
+  case AArch64::LDRSBXui: return AArch64::LDURSBXi;
+  case AArch64::LDRSBWui: return AArch64::LDURSBWi;
+  case AArch64::LDRSHXui: return AArch64::LDURSHXi;
+  case AArch64::LDRSHWui: return AArch64::LDURSHWi;
+  case AArch64::LDRSWui: return AArch64::LDURSWi;
+  case AArch64::STRXui: return AArch64::STURXi;
+  case AArch64::STRWui: return AArch64::STURWi;
+  case AArch64::STRBui: return AArch64::STURBi;
+  case AArch64::STRHui: return AArch64::STURHi;
+  case AArch64::STRSui: return AArch64::STURSi;
+  case AArch64::STRDui: return AArch64::STURDi;
+  case AArch64::STRQui: return AArch64::STURQi;
+  case AArch64::STRBBui: return AArch64::STURBBi;
+  case AArch64::STRHHui: return AArch64::STURHHi;
+  }
+}
+
+unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
+  switch (Opc) {
+  default:
+    return 2;
+  case AArch64::LDPXi:
+  case AArch64::LDPDi:
+  case AArch64::STPXi:
+  case AArch64::STPDi:
+  case AArch64::LDNPXi:
+  case AArch64::LDNPDi:
+  case AArch64::STNPXi:
+  case AArch64::STNPDi:
+  case AArch64::LDPQi:
+  case AArch64::STPQi:
+  case AArch64::LDNPQi:
+  case AArch64::STNPQi:
+  case AArch64::LDPWi:
+  case AArch64::LDPSi:
+  case AArch64::STPWi:
+  case AArch64::STPSi:
+  case AArch64::LDNPWi:
+  case AArch64::LDNPSi:
+  case AArch64::STNPWi:
+  case AArch64::STNPSi:
+  case AArch64::LDG:
+  case AArch64::STGPi:
+    return 3;
+  case AArch64::ADDG:
+  case AArch64::STGOffset:
+    return 2;
+  }
+}
+
 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   default:
@@ -1837,7 +1902,7 @@ unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
 
 // Is this a candidate for ld/st merging or pairing?  For example, we don't
 // touch volatiles or load/stores that have a hint to avoid pair formation.
-bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
+bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
   // If this is a volatile load/store, don't mess with it.
   if (MI.hasOrderedMemoryRef())
     return false;
@@ -1879,8 +1944,8 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
   return true;
 }
 
-bool AArch64InstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
-                                          MachineOperand *&BaseOp,
+bool AArch64InstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
+                                          const MachineOperand *&BaseOp,
                                           int64_t &Offset,
                                           const TargetRegisterInfo *TRI) const {
   unsigned Width;
@@ -1888,7 +1953,7 @@ bool AArch64InstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
 }
 
 bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
-    MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset,
+    const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
     unsigned &Width, const TargetRegisterInfo *TRI) const {
   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
   // Handle only loads/stores with base register followed by immediate offset.
@@ -1944,7 +2009,7 @@ AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
 
 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
                                     unsigned &Width, int64_t &MinOffset,
-                                    int64_t &MaxOffset) const {
+                                    int64_t &MaxOffset) {
   switch (Opcode) {
   // Not a memory operation or something we want to handle.
   default:
@@ -1965,6 +2030,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
     MinOffset = -256;
     MaxOffset = 255;
     break;
+  case AArch64::PRFUMi:
   case AArch64::LDURXi:
   case AArch64::LDURDi:
   case AArch64::STURXi:
@@ -2034,6 +2100,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
     MinOffset = -64;
     MaxOffset = 63;
     break;
+  case AArch64::PRFMui:
   case AArch64::LDRXui:
   case AArch64::LDRDui:
   case AArch64::STRXui:
@@ -2066,6 +2133,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
     break;
   case AArch64::LDRHui:
   case AArch64::LDRHHui:
+  case AArch64::LDRSHWui:
+  case AArch64::LDRSHXui:
   case AArch64::STRHui:
   case AArch64::STRHHui:
     Scale = Width = 2;
@@ -2074,12 +2143,40 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
     break;
   case AArch64::LDRBui:
   case AArch64::LDRBBui:
+  case AArch64::LDRSBWui:
+  case AArch64::LDRSBXui:
   case AArch64::STRBui:
   case AArch64::STRBBui:
     Scale = Width = 1;
     MinOffset = 0;
     MaxOffset = 4095;
     break;
+  case AArch64::ADDG:
+  case AArch64::TAGPstack:
+    Scale = 16;
+    Width = 0;
+    MinOffset = 0;
+    MaxOffset = 63;
+    break;
+  case AArch64::LDG:
+  case AArch64::STGOffset:
+  case AArch64::STZGOffset:
+    Scale = Width = 16;
+    MinOffset = -256;
+    MaxOffset = 255;
+    break;
+  case AArch64::ST2GOffset:
+  case AArch64::STZ2GOffset:
+    Scale = 16;
+    Width = 32;
+    MinOffset = -256;
+    MaxOffset = 255;
+    break;
+  case AArch64::STGPi:
+    Scale = Width = 16;
+    MinOffset = -64;
+    MaxOffset = 63;
+    break;
   }
 
   return true;
@@ -2181,11 +2278,11 @@ static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
 /// Detect opportunities for ldp/stp formation.
 ///
 /// Only called for LdSt for which getMemOperandWithOffset returns true.
-bool AArch64InstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1,
-                                           MachineOperand &BaseOp2,
+bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
+                                           const MachineOperand &BaseOp2,
                                            unsigned NumLoads) const {
-  MachineInstr &FirstLdSt = *BaseOp1.getParent();
-  MachineInstr &SecondLdSt = *BaseOp2.getParent();
+  const MachineInstr &FirstLdSt = *BaseOp1.getParent();
+  const MachineInstr &SecondLdSt = *BaseOp2.getParent();
   if (BaseOp1.getType() != BaseOp2.getType())
     return false;
 
@@ -2292,6 +2389,31 @@ void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
   }
 }
 
+void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator I,
+                                       DebugLoc DL, unsigned DestReg,
+                                       unsigned SrcReg, bool KillSrc,
+                                       unsigned Opcode, unsigned ZeroReg,
+                                       llvm::ArrayRef<unsigned> Indices) const {
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
+  unsigned NumRegs = Indices.size();
+
+#ifndef NDEBUG
+  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
+  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
+  assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
+         "GPR reg sequences should not be able to overlap");
+#endif
+
+  for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
+    const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
+    AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
+    MIB.addReg(ZeroReg);
+    AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
+    MIB.addImm(0);
+  }
+}
+
 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator I,
                                    const DebugLoc &DL, unsigned DestReg,
@@ -2431,6 +2553,22 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     return;
   }
 
+  if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
+      AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
+    static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
+    copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
+                    AArch64::XZR, Indices);
+    return;
+  }
+
+  if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
+      AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
+    static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
+    copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
+                    AArch64::WZR, Indices);
+    return;
+  }
+
   if (AArch64::FPR128RegClass.contains(DestReg) &&
       AArch64::FPR128RegClass.contains(SrcReg)) {
     if (Subtarget.hasNEON()) {
@@ -2839,7 +2977,7 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
                            unsigned DestReg, unsigned SrcReg, int Offset,
                            const TargetInstrInfo *TII,
                            MachineInstr::MIFlag Flag, bool SetNZCV,
-                           bool NeedsWinCFI) {
+                           bool NeedsWinCFI, bool *HasWinCFI) {
   if (DestReg == SrcReg && Offset == 0)
     return;
 
@@ -2884,10 +3022,13 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
         .setMIFlag(Flag);
 
-   if (NeedsWinCFI && SrcReg == AArch64::SP && DestReg == AArch64::SP)
-     BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
-         .addImm(ThisVal)
-         .setMIFlag(Flag);
+    if (NeedsWinCFI && SrcReg == AArch64::SP && DestReg == AArch64::SP) {
+      if (HasWinCFI)
+        *HasWinCFI = true;
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
+          .addImm(ThisVal)
+          .setMIFlag(Flag);
+    }
 
     SrcReg = DestReg;
     Offset -= ThisVal;
@@ -2903,6 +3044,8 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
   if (NeedsWinCFI) {
     if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
         (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
+      if (HasWinCFI)
+        *HasWinCFI = true;
       if (Offset == 0)
         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).
                 setMIFlag(Flag);
@@ -2910,6 +3053,8 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)).
                 addImm(Offset).setMIFlag(Flag);
     } else if (DestReg == AArch64::SP) {
+      if (HasWinCFI)
+        *HasWinCFI = true;
       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)).
               addImm(Offset).setMIFlag(Flag);
     }
@@ -2919,7 +3064,7 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
     MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
     MachineBasicBlock::iterator InsertPt, int FrameIndex,
-    LiveIntervals *LIS) const {
+    LiveIntervals *LIS, VirtRegMap *VRM) const {
   // This is a bit of a hack. Consider this instruction:
   //
   //   %0 = COPY %sp; GPR64all:%0
@@ -3102,11 +3247,6 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
                                     bool *OutUseUnscaledOp,
                                     unsigned *OutUnscaledOp,
                                     int *EmittableOffset) {
-  int Scale = 1;
-  bool IsSigned = false;
-  // The ImmIdx should be changed case by case if it is not 2.
-  unsigned ImmIdx = 2;
-  unsigned UnscaledOp = 0;
   // Set output values in case of early exit.
   if (EmittableOffset)
     *EmittableOffset = 0;
@@ -3114,10 +3254,12 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
     *OutUseUnscaledOp = false;
   if (OutUnscaledOp)
     *OutUnscaledOp = 0;
+
+  // Exit early for structured vector spills/fills as they can't take an
+  // immediate offset.
   switch (MI.getOpcode()) {
   default:
-    llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
-  // Vector spills/fills can't take an immediate offset.
+    break;
   case AArch64::LD1Twov2d:
   case AArch64::LD1Threev2d:
   case AArch64::LD1Fourv2d:
@@ -3130,208 +3272,53 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
   case AArch64::ST1Twov1d:
   case AArch64::ST1Threev1d:
   case AArch64::ST1Fourv1d:
+  case AArch64::IRG:
+  case AArch64::IRGstack:
     return AArch64FrameOffsetCannotUpdate;
-  case AArch64::PRFMui:
-    Scale = 8;
-    UnscaledOp = AArch64::PRFUMi;
-    break;
-  case AArch64::LDRXui:
-    Scale = 8;
-    UnscaledOp = AArch64::LDURXi;
-    break;
-  case AArch64::LDRWui:
-    Scale = 4;
-    UnscaledOp = AArch64::LDURWi;
-    break;
-  case AArch64::LDRBui:
-    Scale = 1;
-    UnscaledOp = AArch64::LDURBi;
-    break;
-  case AArch64::LDRHui:
-    Scale = 2;
-    UnscaledOp = AArch64::LDURHi;
-    break;
-  case AArch64::LDRSui:
-    Scale = 4;
-    UnscaledOp = AArch64::LDURSi;
-    break;
-  case AArch64::LDRDui:
-    Scale = 8;
-    UnscaledOp = AArch64::LDURDi;
-    break;
-  case AArch64::LDRQui:
-    Scale = 16;
-    UnscaledOp = AArch64::LDURQi;
-    break;
-  case AArch64::LDRBBui:
-    Scale = 1;
-    UnscaledOp = AArch64::LDURBBi;
-    break;
-  case AArch64::LDRHHui:
-    Scale = 2;
-    UnscaledOp = AArch64::LDURHHi;
-    break;
-  case AArch64::LDRSBXui:
-    Scale = 1;
-    UnscaledOp = AArch64::LDURSBXi;
-    break;
-  case AArch64::LDRSBWui:
-    Scale = 1;
-    UnscaledOp = AArch64::LDURSBWi;
-    break;
-  case AArch64::LDRSHXui:
-    Scale = 2;
-    UnscaledOp = AArch64::LDURSHXi;
-    break;
-  case AArch64::LDRSHWui:
-    Scale = 2;
-    UnscaledOp = AArch64::LDURSHWi;
-    break;
-  case AArch64::LDRSWui:
-    Scale = 4;
-    UnscaledOp = AArch64::LDURSWi;
-    break;
-
-  case AArch64::STRXui:
-    Scale = 8;
-    UnscaledOp = AArch64::STURXi;
-    break;
-  case AArch64::STRWui:
-    Scale = 4;
-    UnscaledOp = AArch64::STURWi;
-    break;
-  case AArch64::STRBui:
-    Scale = 1;
-    UnscaledOp = AArch64::STURBi;
-    break;
-  case AArch64::STRHui:
-    Scale = 2;
-    UnscaledOp = AArch64::STURHi;
-    break;
-  case AArch64::STRSui:
-    Scale = 4;
-    UnscaledOp = AArch64::STURSi;
-    break;
-  case AArch64::STRDui:
-    Scale = 8;
-    UnscaledOp = AArch64::STURDi;
-    break;
-  case AArch64::STRQui:
-    Scale = 16;
-    UnscaledOp = AArch64::STURQi;
-    break;
-  case AArch64::STRBBui:
-    Scale = 1;
-    UnscaledOp = AArch64::STURBBi;
-    break;
-  case AArch64::STRHHui:
-    Scale = 2;
-    UnscaledOp = AArch64::STURHHi;
-    break;
-
-  case AArch64::LDPXi:
-  case AArch64::LDPDi:
-  case AArch64::STPXi:
-  case AArch64::STPDi:
-  case AArch64::LDNPXi:
-  case AArch64::LDNPDi:
-  case AArch64::STNPXi:
-  case AArch64::STNPDi:
-    ImmIdx = 3;
-    IsSigned = true;
-    Scale = 8;
-    break;
-  case AArch64::LDPQi:
-  case AArch64::STPQi:
-  case AArch64::LDNPQi:
-  case AArch64::STNPQi:
-    ImmIdx = 3;
-    IsSigned = true;
-    Scale = 16;
-    break;
-  case AArch64::LDPWi:
-  case AArch64::LDPSi:
-  case AArch64::STPWi:
-  case AArch64::STPSi:
-  case AArch64::LDNPWi:
-  case AArch64::LDNPSi:
-  case AArch64::STNPWi:
-  case AArch64::STNPSi:
-    ImmIdx = 3;
-    IsSigned = true;
-    Scale = 4;
-    break;
-
-  case AArch64::LDURXi:
-  case AArch64::LDURWi:
-  case AArch64::LDURBi:
-  case AArch64::LDURHi:
-  case AArch64::LDURSi:
-  case AArch64::LDURDi:
-  case AArch64::LDURQi:
-  case AArch64::LDURHHi:
-  case AArch64::LDURBBi:
-  case AArch64::LDURSBXi:
-  case AArch64::LDURSBWi:
-  case AArch64::LDURSHXi:
-  case AArch64::LDURSHWi:
-  case AArch64::LDURSWi:
-  case AArch64::STURXi:
-  case AArch64::STURWi:
-  case AArch64::STURBi:
-  case AArch64::STURHi:
-  case AArch64::STURSi:
-  case AArch64::STURDi:
-  case AArch64::STURQi:
-  case AArch64::STURBBi:
-  case AArch64::STURHHi:
-    Scale = 1;
-    break;
   }
 
-  Offset += MI.getOperand(ImmIdx).getImm() * Scale;
+  // Get the min/max offset and the scale.
+  unsigned Scale, Width;
+  int64_t MinOff, MaxOff;
+  if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), Scale, Width, MinOff,
+                                      MaxOff))
+    llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
+
+  // Construct the complete offset.
+  const MachineOperand &ImmOpnd =
+      MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
+  Offset += ImmOpnd.getImm() * Scale;
 
-  bool useUnscaledOp = false;
   // If the offset doesn't match the scale, we rewrite the instruction to
   // use the unscaled instruction instead. Likewise, if we have a negative
-  // offset (and have an unscaled op to use).
-  if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
-    useUnscaledOp = true;
-
-  // Use an unscaled addressing mode if the instruction has a negative offset
-  // (or if the instruction is already using an unscaled addressing mode).
-  unsigned MaskBits;
-  if (IsSigned) {
-    // ldp/stp instructions.
-    MaskBits = 7;
-    Offset /= Scale;
-  } else if (UnscaledOp == 0 || useUnscaledOp) {
-    MaskBits = 9;
-    IsSigned = true;
-    Scale = 1;
-  } else {
-    MaskBits = 12;
-    IsSigned = false;
-    Offset /= Scale;
+  // offset and there is an unscaled op to use.
+  Optional<unsigned> UnscaledOp =
+      AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode());
+  bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
+  if (useUnscaledOp &&
+      !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, Scale, Width, MinOff, MaxOff))
+    llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
+
+  int64_t Remainder = Offset % Scale;
+  assert(!(Remainder && useUnscaledOp) &&
+         "Cannot have remainder when using unscaled op");
+
+  assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
+  int NewOffset = Offset / Scale;
+  if (MinOff <= NewOffset && NewOffset <= MaxOff)
+    Offset = Remainder;
+  else {
+    NewOffset = NewOffset < 0 ? MinOff : MaxOff;
+    Offset = Offset - NewOffset * Scale + Remainder;
   }
 
-  // Attempt to fold address computation.
-  int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
-  int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
-  if (Offset >= MinOff && Offset <= MaxOff) {
-    if (EmittableOffset)
-      *EmittableOffset = Offset;
-    Offset = 0;
-  } else {
-    int NewOff = Offset < 0 ? MinOff : MaxOff;
-    if (EmittableOffset)
-      *EmittableOffset = NewOff;
-    Offset = (Offset - NewOff) * Scale;
-  }
+  if (EmittableOffset)
+    *EmittableOffset = NewOffset;
   if (OutUseUnscaledOp)
     *OutUseUnscaledOp = useUnscaledOp;
-  if (OutUnscaledOp)
-    *OutUnscaledOp = UnscaledOp;
+  if (OutUnscaledOp && UnscaledOp)
+    *OutUnscaledOp = *UnscaledOp;
+
   return AArch64FrameOffsetCanUpdate |
          (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
 }
@@ -4974,8 +4961,8 @@ AArch64InstrInfo::getOutliningCandidateInfo(
     // At this point, we have a stack instruction that we might need to
     // fix up. We'll handle it if it's a load or store.
     if (MI.mayLoadOrStore()) {
-      MachineOperand *Base; // Filled with the base operand of MI.
-      int64_t Offset;       // Filled with the offset of MI.
+      const MachineOperand *Base; // Filled with the base operand of MI.
+      int64_t Offset;             // Filled with the offset of MI.
 
       // Does it allow us to offset the base operand and is the base the
       // register SP?
@@ -5331,12 +5318,20 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
       MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
     return outliner::InstrType::Illegal;
 
+  // Don't outline BTI instructions, because that will prevent the outlining
+  // site from being indirectly callable.
+  if (MI.getOpcode() == AArch64::HINT) {
+    int64_t Imm = MI.getOperand(0).getImm();
+    if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
+      return outliner::InstrType::Illegal;
+  }
+
   return outliner::InstrType::Legal;
 }
 
 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
   for (MachineInstr &MI : MBB) {
-    MachineOperand *Base;
+    const MachineOperand *Base;
     unsigned Width;
     int64_t Offset;
 
@@ -5534,7 +5529,32 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
 
 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
   MachineFunction &MF) const {
-  return MF.getFunction().optForMinSize();
+  return MF.getFunction().hasMinSize();
+}
+
+bool AArch64InstrInfo::isCopyInstrImpl(
+    const MachineInstr &MI, const MachineOperand *&Source,
+    const MachineOperand *&Destination) const {
+
+  // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
+  // and zero immediate operands used as an alias for mov instruction.
+  if (MI.getOpcode() == AArch64::ORRWrs &&
+      MI.getOperand(1).getReg() == AArch64::WZR &&
+      MI.getOperand(3).getImm() == 0x0) {
+    Destination = &MI.getOperand(0);
+    Source = &MI.getOperand(2);
+    return true;
+  }
+
+  if (MI.getOpcode() == AArch64::ORRXrs &&
+      MI.getOperand(1).getReg() == AArch64::XZR &&
+      MI.getOperand(3).getImm() == 0x0) {
+    Destination = &MI.getOperand(0);
+    Source = &MI.getOperand(2);
+    return true;
+  }
+
+  return false;
 }
 
 #define GET_INSTRINFO_HELPERS
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index 9954669d5675..7be4daba7dc4 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -1,9 +1,8 @@
 //===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,6 +15,7 @@
 
 #include "AArch64.h"
 #include "AArch64RegisterInfo.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/CodeGen/MachineCombinerPattern.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 
@@ -54,7 +54,8 @@ public:
                              unsigned &DstReg, unsigned &SubIdx) const override;
 
   bool
-  areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
+  areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+                                  const MachineInstr &MIb,
                                   AliasAnalysis *AA = nullptr) const override;
 
   unsigned isLoadFromStackSlot(const MachineInstr &MI,
@@ -84,6 +85,14 @@ public:
     return isUnscaledLdSt(MI.getOpcode());
   }
 
+  /// Returns the unscaled load/store for the scaled load/store opcode,
+  /// if there is a corresponding unscaled variant available.
+  static Optional<unsigned> getUnscaledLdSt(unsigned Opc);
+
+
+  /// Returns the index for the immediate for a given instruction.
+  static unsigned getLoadStoreImmIdx(unsigned Opc);
+
   /// Return true if pairing the given load or store may be paired with another.
   static bool isPairableLdStInst(const MachineInstr &MI);
 
@@ -92,16 +101,18 @@ public:
   static unsigned convertToFlagSettingOpc(unsigned Opc, bool &Is64Bit);
 
   /// Return true if this is a load/store that can be potentially paired/merged.
-  bool isCandidateToMergeOrPair(MachineInstr &MI) const;
+  bool isCandidateToMergeOrPair(const MachineInstr &MI) const;
 
   /// Hint that pairing the given load or store is unprofitable.
   static void suppressLdStPair(MachineInstr &MI);
 
-  bool getMemOperandWithOffset(MachineInstr &MI, MachineOperand *&BaseOp,
+  bool getMemOperandWithOffset(const MachineInstr &MI,
+                               const MachineOperand *&BaseOp,
                                int64_t &Offset,
                                const TargetRegisterInfo *TRI) const override;
 
-  bool getMemOperandWithOffsetWidth(MachineInstr &MI, MachineOperand *&BaseOp,
+  bool getMemOperandWithOffsetWidth(const MachineInstr &MI,
+                                    const MachineOperand *&BaseOp,
                                     int64_t &Offset, unsigned &Width,
                                     const TargetRegisterInfo *TRI) const;
 
@@ -112,16 +123,21 @@ public:
   /// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly.
   ///
   /// For unscaled instructions, \p Scale is set to 1.
-  bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width,
-                    int64_t &MinOffset, int64_t &MaxOffset) const;
+  static bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width,
+                           int64_t &MinOffset, int64_t &MaxOffset);
 
-  bool shouldClusterMemOps(MachineOperand &BaseOp1, MachineOperand &BaseOp2,
+  bool shouldClusterMemOps(const MachineOperand &BaseOp1,
+                           const MachineOperand &BaseOp2,
                            unsigned NumLoads) const override;
 
   void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                         const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
                         bool KillSrc, unsigned Opcode,
                         llvm::ArrayRef<unsigned> Indices) const;
+  void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                       DebugLoc DL, unsigned DestReg, unsigned SrcReg,
+                       bool KillSrc, unsigned Opcode, unsigned ZeroReg,
+                       llvm::ArrayRef<unsigned> Indices) const;
   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                    const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
                    bool KillSrc) const override;
@@ -146,7 +162,8 @@ public:
   foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
                         ArrayRef<unsigned> Ops,
                         MachineBasicBlock::iterator InsertPt, int FrameIndex,
-                        LiveIntervals *LIS = nullptr) const override;
+                        LiveIntervals *LIS = nullptr,
+                        VirtRegMap *VRM = nullptr) const override;
 
   /// \returns true if a branch from an instruction with opcode \p BranchOpc
   ///  bytes is capable of jumping to a position \p BrOffset bytes away.
@@ -251,6 +268,13 @@ public:
 #define GET_INSTRINFO_HELPER_DECLS
 #include "AArch64GenInstrInfo.inc"
 
+protected:
+  /// If the specific machine instruction is a instruction that moves/copies
+  /// value from one register to another register return true along with
+  /// @Source machine operand and @Destination machine operand.
+  bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source,
+                       const MachineOperand *&Destination) const override;
+
 private:
   /// Sets the offsets on outlined instructions in \p MBB which use SP
   /// so that they will be valid post-outlining.
@@ -277,7 +301,8 @@ void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                      const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
                      int Offset, const TargetInstrInfo *TII,
                      MachineInstr::MIFlag = MachineInstr::NoFlags,
-                     bool SetNZCV = false,  bool NeedsWinCFI = false);
+                     bool SetNZCV = false, bool NeedsWinCFI = false,
+                     bool *HasWinCFI = nullptr);
 
 /// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the
 /// FP. Return false if the offset could not be handled directly in MI, and
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index c24b8b36441b..eed53f36d574 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1,9 +1,8 @@
 //=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -108,6 +107,16 @@ def HasFuseAES       : Predicate<"Subtarget->hasFuseAES()">,
                                  "fuse-aes">;
 def HasSVE           : Predicate<"Subtarget->hasSVE()">,
                                  AssemblerPredicate<"FeatureSVE", "sve">;
+def HasSVE2          : Predicate<"Subtarget->hasSVE2()">,
+                                 AssemblerPredicate<"FeatureSVE2", "sve2">;
+def HasSVE2AES       : Predicate<"Subtarget->hasSVE2AES()">,
+                                 AssemblerPredicate<"FeatureSVE2AES", "sve2-aes">;
+def HasSVE2SM4       : Predicate<"Subtarget->hasSVE2SM4()">,
+                                 AssemblerPredicate<"FeatureSVE2SM4", "sve2-sm4">;
+def HasSVE2SHA3      : Predicate<"Subtarget->hasSVE2SHA3()">,
+                                 AssemblerPredicate<"FeatureSVE2SHA3", "sve2-sha3">;
+def HasSVE2BitPerm   : Predicate<"Subtarget->hasSVE2BitPerm()">,
+                                 AssemblerPredicate<"FeatureSVE2BitPerm", "bitperm">;
 def HasRCPC          : Predicate<"Subtarget->hasRCPC()">,
                                  AssemblerPredicate<"FeatureRCPC", "rcpc">;
 def HasAltNZCV       : Predicate<"Subtarget->hasAlternativeNZCV()">,
@@ -126,6 +135,7 @@ def HasMTE           : Predicate<"Subtarget->hasMTE()">,
                        AssemblerPredicate<"FeatureMTE", "mte">;
 def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
 def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
+def IsWindows        : Predicate<"Subtarget->isTargetWindows()">;
 def UseAlternateSExtLoadCVTF32
     : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
 
@@ -133,6 +143,10 @@ def UseNegativeImmediates
     : Predicate<"false">, AssemblerPredicate<"!FeatureNoNegativeImmediates",
                                              "NegativeImmediates">;
 
+def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
+                                  SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+                                                       SDTCisInt<1>]>>;
+
 
 //===----------------------------------------------------------------------===//
 // AArch64-specific DAG Nodes.
@@ -395,6 +409,12 @@ def AArch64uminv    : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
 def AArch64smaxv    : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
 def AArch64umaxv    : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
 
+def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
+def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
@@ -404,10 +424,10 @@ def AArch64umaxv    : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
 // the Function object through the <Target>Subtarget and objections were raised
 // to that (see post-commit review comments for r301750).
 let RecomputePerFunction = 1 in {
-  def ForCodeSize   : Predicate<"MF->getFunction().optForSize()">;
-  def NotForCodeSize   : Predicate<"!MF->getFunction().optForSize()">;
+  def ForCodeSize   : Predicate<"MF->getFunction().hasOptSize()">;
+  def NotForCodeSize   : Predicate<"!MF->getFunction().hasOptSize()">;
   // Avoid generating STRQro if it is slow, unless we're optimizing for code size.
-  def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction().optForSize()">;
+  def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction().hasOptSize()">;
 
   def UseBTI : Predicate<[{ MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
   def NotUseBTI : Predicate<[{ !MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
@@ -703,7 +723,9 @@ let Predicates = [HasPA] in {
 // v8.3a floating point conversion for javascript
 let Predicates = [HasJS, HasFPARMv8] in
 def FJCVTZS  : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
-                                      "fjcvtzs", []> {
+                                      "fjcvtzs",
+                                      [(set GPR32:$Rd,
+                                         (int_aarch64_fjcvtzs FPR64:$Rn))]> {
   let Inst{31} = 0;
 } // HasJS, HasFPARMv8
 
@@ -760,6 +782,13 @@ def MSRpstateImm4 : MSRpstateImm0_15;
 def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
                        [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>;
 
+let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in {
+def HWASAN_CHECK_MEMACCESS : Pseudo<
+  (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
+  [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 imm:$accessinfo))]>,
+  Sched<[]>;
+}
+
 // The cycle counter PMC register is PMCCNTR_EL0.
 let Predicates = [HasPerfMon] in
 def : Pat<(readcyclecounter), (MRS 0xdce8)>;
@@ -1223,11 +1252,11 @@ defm : STOPregister<"stumin","LDUMIN">;// STUMINx
 // v8.5 Memory Tagging Extension
 let Predicates = [HasMTE] in {
 
-def IRG   : BaseTwoOperand<0b0100, GPR64sp, "irg", null_frag, GPR64sp, GPR64>,
+def IRG   : BaseTwoOperand<0b0100, GPR64sp, "irg", int_aarch64_irg, GPR64sp, GPR64>,
             Sched<[]>{
   let Inst{31} = 1;
 }
-def GMI   : BaseTwoOperand<0b0101, GPR64, "gmi", null_frag, GPR64sp>, Sched<[]>{
+def GMI   : BaseTwoOperand<0b0101, GPR64, "gmi", int_aarch64_gmi, GPR64sp>, Sched<[]>{
   let Inst{31} = 1;
   let isNotDuplicable = 1;
 }
@@ -1236,7 +1265,7 @@ def SUBG  : AddSubG<1, "subg", null_frag>;
 
 def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>;
 
-def SUBP : SUBP<0, "subp", null_frag>, Sched<[]>;
+def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>;
 def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{
   let Defs = [NZCV];
 }
@@ -1244,24 +1273,74 @@ def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{
 def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>;
 
 def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">;
+
+def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4),
+          (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>;
+def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn,  simm9s16:$offset)),
+          (LDG GPR64:$Rt, GPR64sp:$Rn,  simm9s16:$offset)>;
+
 def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>;
 
-def LDGV : MemTagVector<1, "ldgv", "\t$Rt, [$Rn]!",
-                   (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn)> {
-  let DecoderMethod = "DecodeLoadAllocTagArrayInstruction";
+def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]",
+                   (outs GPR64:$Rt), (ins GPR64sp:$Rn)>;
+def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]",
+                   (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>;
+def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]",
+                   (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> {
+  let Inst{23} = 0;
 }
-def STGV : MemTagVector<0, "stgv", "\t$Rt, [$Rn]!",
-                   (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64sp:$Rn)>;
 
 defm STG   : MemTagStore<0b00, "stg">;
 defm STZG  : MemTagStore<0b01, "stzg">;
 defm ST2G  : MemTagStore<0b10, "st2g">;
 defm STZ2G : MemTagStore<0b11, "stz2g">;
 
+def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
+          (STGOffset $Rn, $Rm, $imm)>;
+def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
+          (STZGOffset $Rn, $Rm, $imm)>;
+def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
+          (ST2GOffset $Rn, $Rm, $imm)>;
+def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
+          (STZ2GOffset $Rn, $Rm, $imm)>;
+
 defm STGP     : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
 def  STGPpre  : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
 def  STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
 
+def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
+          (STGOffset GPR64:$Rt, GPR64sp:$Rn,  simm9s16:$offset)>;
+
+def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2),
+          (STGPi $Rt, $Rt2, $Rn, $imm)>;
+
+def IRGstack
+    : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>,
+      Sched<[]>;
+def TAGPstack
+    : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>,
+      Sched<[]>;
+
+// Explicit SP in the first operand prevents ShrinkWrap optimization
+// from leaving this instruction out of the stack frame. When IRGstack
+// is transformed into IRG, this operand is replaced with the actual
+// register / expression for the tagged base pointer of the current function.
+def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;
+
+// Large STG to be expanded into a loop. $Rm is the size, $Rn is start address.
+// $Rn_wback is one past the end of the range.
+let isCodeGenOnly=1, mayStore=1 in {
+def STGloop
+    : Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn),
+             [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,$Rm = $Rm_wback,@earlyclobber $Rm_wback" >,
+      Sched<[WriteAdr, WriteST]>;
+
+def STZGloop
+    : Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn),
+             [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,$Rm = $Rm_wback,@earlyclobber $Rm_wback" >,
+      Sched<[WriteAdr, WriteST]>;
+}
+
 } // Predicates = [HasMTE]
 
 //===----------------------------------------------------------------------===//
@@ -3052,6 +3131,27 @@ defm : FPToIntegerPats<fp_to_uint, ftrunc, "FCVTZU">;
 defm : FPToIntegerPats<fp_to_sint, fround, "FCVTAS">;
 defm : FPToIntegerPats<fp_to_uint, fround, "FCVTAU">;
 
+let Predicates = [HasFullFP16] in {
+  def : Pat<(i32 (lround f16:$Rn)),
+            (!cast<Instruction>(FCVTASUWHr) f16:$Rn)>;
+  def : Pat<(i64 (lround f16:$Rn)),
+            (!cast<Instruction>(FCVTASUXHr) f16:$Rn)>;
+  def : Pat<(i64 (llround f16:$Rn)),
+            (!cast<Instruction>(FCVTASUXHr) f16:$Rn)>;
+}
+def : Pat<(i32 (lround f32:$Rn)),
+          (!cast<Instruction>(FCVTASUWSr) f32:$Rn)>;
+def : Pat<(i32 (lround f64:$Rn)),
+          (!cast<Instruction>(FCVTASUWDr) f64:$Rn)>;
+def : Pat<(i64 (lround f32:$Rn)),
+          (!cast<Instruction>(FCVTASUXSr) f32:$Rn)>;
+def : Pat<(i64 (lround f64:$Rn)),
+          (!cast<Instruction>(FCVTASUXDr) f64:$Rn)>;
+def : Pat<(i64 (llround f32:$Rn)),
+          (!cast<Instruction>(FCVTASUXSr) f32:$Rn)>;
+def : Pat<(i64 (llround f64:$Rn)),
+          (!cast<Instruction>(FCVTASUXDr) f64:$Rn)>;
+
 //===----------------------------------------------------------------------===//
 // Scaled integer to floating point conversion instructions.
 //===----------------------------------------------------------------------===//
@@ -3116,6 +3216,27 @@ let Predicates = [HasFRInt3264] in {
   defm FRINT64X : FRIntNNT<0b11, "frint64x">;
 } // HasFRInt3264
 
+let Predicates = [HasFullFP16] in {
+  def : Pat<(i32 (lrint f16:$Rn)),
+            (FCVTZSUWHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
+  def : Pat<(i64 (lrint f16:$Rn)),
+            (FCVTZSUXHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
+  def : Pat<(i64 (llrint f16:$Rn)),
+            (FCVTZSUXHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
+}
+def : Pat<(i32 (lrint f32:$Rn)),
+          (FCVTZSUWSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
+def : Pat<(i32 (lrint f64:$Rn)),
+          (FCVTZSUWDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
+def : Pat<(i64 (lrint f32:$Rn)),
+          (FCVTZSUXSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
+def : Pat<(i64 (lrint f64:$Rn)),
+          (FCVTZSUXDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
+def : Pat<(i64 (llrint f32:$Rn)),
+          (FCVTZSUXSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
+def : Pat<(i64 (llrint f64:$Rn)),
+          (FCVTZSUXDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
+
 //===----------------------------------------------------------------------===//
 // Floating point two operand instructions.
 //===----------------------------------------------------------------------===//
@@ -3489,7 +3610,7 @@ def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, V
 }
 defm FACGE   : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>;
 defm FACGT   : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>;
-defm FADDP   : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_addp>;
+defm FADDP   : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_faddp>;
 defm FADD    : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>;
 defm FCMEQ   : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
 defm FCMGE   : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
@@ -5314,6 +5435,8 @@ def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), v
           (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
 def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
           (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
+def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
+          (SCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
 def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp
             (and FPR32:$Rn, (i32 65535)),
             vecshiftR16:$imm)),
@@ -5342,6 +5465,16 @@ def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)),
             (i64 (IMPLICIT_DEF)),
             (FCVTZUh FPR16:$Rn, vecshiftR64:$imm),
             hsub))>;
+def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
+          (i32 (INSERT_SUBREG
+            (i32 (IMPLICIT_DEF)),
+            (FACGE16 FPR16:$Rn, FPR16:$Rm),
+            hsub))>;
+def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
+          (i32 (INSERT_SUBREG
+            (i32 (IMPLICIT_DEF)),
+            (FACGT16 FPR16:$Rn, FPR16:$Rm),
+            hsub))>;
 
 defm SHL      : SIMDScalarLShiftD<   0, 0b01010, "shl", AArch64vshl>;
 defm SLI      : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
@@ -6031,6 +6164,7 @@ def : Pat<(i32 (trunc GPR64sp:$src)),
 
 // __builtin_trap() uses the BRK instruction on AArch64.
 def : Pat<(trap), (BRK 1)>;
+def : Pat<(debugtrap), (BRK 0xF000)>, Requires<[IsWindows]>;
 
 // Multiply high patterns which multiply the lower subvector using smull/umull
 // and the upper subvector with smull2/umull2. Then shuffle the high the high
@@ -6147,6 +6281,7 @@ def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
 def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
 def : Pat<(v2f32 (AArch64NvCast (v2f32 FPR64:$src))), (v2f32 FPR64:$src)>;
 def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1f64 (AArch64NvCast (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
 
 // Natural vector casts (128 bit)
 def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
@@ -6801,5 +6936,8 @@ def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
 def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
           (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
 
+def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>;
+def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>;
+
 include "AArch64InstrAtomics.td"
 include "AArch64SVEInstrInfo.td"
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 5eb589bf66d5..4e13fb8e2027 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -1,9 +1,8 @@
 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -19,11 +18,14 @@
 #include "AArch64Subtarget.h"
 #include "AArch64TargetMachine.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -57,6 +59,15 @@ private:
   /// the patterns that don't require complex C++.
   bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
 
+  // A lowering phase that runs before any selection attempts.
+
+  void preISelLower(MachineInstr &I) const;
+
+  // An early selection function that runs before the selectImpl() call.
+  bool earlySelect(MachineInstr &I) const;
+
+  bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
+
   bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
                           MachineRegisterInfo &MRI) const;
   bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
@@ -65,15 +76,84 @@ private:
   bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
                            MachineRegisterInfo &MRI) const;
 
+  bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
+  bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
+
   // Helper to generate an equivalent of scalar_to_vector into a new register,
   // returned via 'Dst'.
-  bool emitScalarToVector(unsigned &Dst, const LLT DstTy,
-                          const TargetRegisterClass *DstRC, unsigned Scalar,
-                          MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator MBBI,
-                          MachineRegisterInfo &MRI) const;
+  MachineInstr *emitScalarToVector(unsigned EltSize,
+                                   const TargetRegisterClass *DstRC,
+                                   Register Scalar,
+                                   MachineIRBuilder &MIRBuilder) const;
+
+  /// Emit a lane insert into \p DstReg, or a new vector register if None is
+  /// provided.
+  ///
+  /// The lane inserted into is defined by \p LaneIdx. The vector source
+  /// register is given by \p SrcReg. The register containing the element is
+  /// given by \p EltReg.
+  MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
+                               Register EltReg, unsigned LaneIdx,
+                               const RegisterBank &RB,
+                               MachineIRBuilder &MIRBuilder) const;
+  bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
   bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
   bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
+  bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
+
+  void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
+                                 SmallVectorImpl<Optional<int>> &Idxs) const;
+  bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
+  bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
+  bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
+  bool selectSplitVectorUnmerge(MachineInstr &I,
+                                MachineRegisterInfo &MRI) const;
+  bool selectIntrinsicWithSideEffects(MachineInstr &I,
+                                      MachineRegisterInfo &MRI) const;
+  bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
+  bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
+  bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
+  bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
+  bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
+  bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
+
+  unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
+  MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
+                                         MachineIRBuilder &MIRBuilder) const;
+
+  // Emit a vector concat operation.
+  MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
+                                 Register Op2,
+                                 MachineIRBuilder &MIRBuilder) const;
+  MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
+                                   MachineOperand &Predicate,
+                                   MachineIRBuilder &MIRBuilder) const;
+  MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
+                        MachineIRBuilder &MIRBuilder) const;
+  MachineInstr *emitTST(const Register &LHS, const Register &RHS,
+                        MachineIRBuilder &MIRBuilder) const;
+  MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
+                                     const RegisterBank &DstRB, LLT ScalarTy,
+                                     Register VecReg, unsigned LaneIdx,
+                                     MachineIRBuilder &MIRBuilder) const;
+
+  /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
+  /// materialized using a FMOV instruction, then update MI and return it.
+  /// Otherwise, do nothing and return a nullptr.
+  MachineInstr *emitFMovForFConstant(MachineInstr &MI,
+                                     MachineRegisterInfo &MRI) const;
+
+  /// Emit a CSet for a compare.
+  MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
+                                MachineIRBuilder &MIRBuilder) const;
+
+  // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
+  // We use these manually instead of using the importer since it doesn't
+  // support SDNodeXForm.
+  ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
+  ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
+  ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
+  ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
 
   ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
 
@@ -109,6 +189,14 @@ private:
   void materializeLargeCMVal(MachineInstr &I, const Value *V,
                              unsigned char OpFlags) const;
 
+  // Optimization methods.
+  bool tryOptVectorShuffle(MachineInstr &I) const;
+  bool tryOptVectorDup(MachineInstr &MI) const;
+  bool tryOptSelect(MachineInstr &MI) const;
+  MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
+                                      MachineOperand &Predicate,
+                                      MachineIRBuilder &MIRBuilder) const;
+
   const AArch64TargetMachine &TM;
   const AArch64Subtarget &STI;
   const AArch64InstrInfo &TII;
@@ -177,6 +265,70 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
   return nullptr;
 }
 
+/// Given a register bank, and size in bits, return the smallest register class
+/// that can represent that combination.
+static const TargetRegisterClass *
+getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
+                      bool GetAllRegSet = false) {
+  unsigned RegBankID = RB.getID();
+
+  if (RegBankID == AArch64::GPRRegBankID) {
+    if (SizeInBits <= 32)
+      return GetAllRegSet ? &AArch64::GPR32allRegClass
+                          : &AArch64::GPR32RegClass;
+    if (SizeInBits == 64)
+      return GetAllRegSet ? &AArch64::GPR64allRegClass
+                          : &AArch64::GPR64RegClass;
+  }
+
+  if (RegBankID == AArch64::FPRRegBankID) {
+    switch (SizeInBits) {
+    default:
+      return nullptr;
+    case 8:
+      return &AArch64::FPR8RegClass;
+    case 16:
+      return &AArch64::FPR16RegClass;
+    case 32:
+      return &AArch64::FPR32RegClass;
+    case 64:
+      return &AArch64::FPR64RegClass;
+    case 128:
+      return &AArch64::FPR128RegClass;
+    }
+  }
+
+  return nullptr;
+}
+
+/// Returns the correct subregister to use for a given register class.
+static bool getSubRegForClass(const TargetRegisterClass *RC,
+                              const TargetRegisterInfo &TRI, unsigned &SubReg) {
+  switch (TRI.getRegSizeInBits(*RC)) {
+  case 8:
+    SubReg = AArch64::bsub;
+    break;
+  case 16:
+    SubReg = AArch64::hsub;
+    break;
+  case 32:
+    if (RC == &AArch64::GPR32RegClass)
+      SubReg = AArch64::sub_32;
+    else
+      SubReg = AArch64::ssub;
+    break;
+  case 64:
+    SubReg = AArch64::dsub;
+    break;
+  default:
+    LLVM_DEBUG(
+        dbgs() << "Couldn't find appropriate subregister for register class.");
+    return false;
+  }
+
+  return true;
+}
+
 /// Check whether \p I is a currently unsupported binary operation:
 /// - it has an unsized type
 /// - an operand is not a vreg
@@ -332,107 +484,209 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
   return GenericOpc;
 }
 
-static bool selectFP16CopyFromGPR32(MachineInstr &I, const TargetInstrInfo &TII,
-                                    MachineRegisterInfo &MRI, unsigned SrcReg) {
-  // Copies from gpr32 to fpr16 need to use a sub-register copy.
-  unsigned CopyReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
-  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::COPY))
-      .addDef(CopyReg)
-      .addUse(SrcReg);
-  unsigned SubRegCopy = MRI.createVirtualRegister(&AArch64::FPR16RegClass);
-  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY))
-      .addDef(SubRegCopy)
-      .addUse(CopyReg, 0, AArch64::hsub);
+#ifndef NDEBUG
+/// Helper function that verifies that we have a valid copy at the end of
+/// selectCopy. Verifies that the source and dest have the expected sizes and
+/// then returns true.
+static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
+                        const MachineRegisterInfo &MRI,
+                        const TargetRegisterInfo &TRI,
+                        const RegisterBankInfo &RBI) {
+  const unsigned DstReg = I.getOperand(0).getReg();
+  const unsigned SrcReg = I.getOperand(1).getReg();
+  const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
+  const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
+
+  // Make sure the size of the source and dest line up.
+  assert(
+      (DstSize == SrcSize ||
+       // Copies are a mean to setup initial types, the number of
+       // bits may not exactly match.
+       (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
+       // Copies are a mean to copy bits around, as long as we are
+       // on the same register class, that's fine. Otherwise, that
+       // means we need some SUBREG_TO_REG or AND & co.
+       (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
+      "Copy with different width?!");
+
+  // Check the size of the destination.
+  assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
+         "GPRs cannot get more than 64-bit width values");
+
+  return true;
+}
+#endif
 
+/// Helper function for selectCopy. Inserts a subregister copy from
+/// \p *From to \p *To, linking it up to \p I.
+///
+/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
+///
+/// CopyReg (From class) = COPY SrcReg
+/// SubRegCopy (To class) = COPY CopyReg:SubReg
+/// Dst = COPY SubRegCopy
+static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
+                                  const RegisterBankInfo &RBI, unsigned SrcReg,
+                                  const TargetRegisterClass *From,
+                                  const TargetRegisterClass *To,
+                                  unsigned SubReg) {
+  MachineIRBuilder MIB(I);
+  auto Copy = MIB.buildCopy({From}, {SrcReg});
+  auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
+                        .addReg(Copy.getReg(0), 0, SubReg);
   MachineOperand &RegOp = I.getOperand(1);
-  RegOp.setReg(SubRegCopy);
+  RegOp.setReg(SubRegCopy.getReg(0));
+
+  // It's possible that the destination register won't be constrained. Make
+  // sure that happens.
+  if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()))
+    RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
+
   return true;
 }
 
+/// Helper function to get the source and destination register classes for a
+/// copy. Returns a std::pair containing the source register class for the
+/// copy, and the destination register class for the copy. If a register class
+/// cannot be determined, then it will be nullptr.
+static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
+getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
+                     MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
+                     const RegisterBankInfo &RBI) {
+  unsigned DstReg = I.getOperand(0).getReg();
+  unsigned SrcReg = I.getOperand(1).getReg();
+  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
+  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
+  unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
+  unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
+
+  // Special casing for cross-bank copies of s1s. We can technically represent
+  // a 1-bit value with any size of register. The minimum size for a GPR is 32
+  // bits. So, we need to put the FPR on 32 bits as well.
+  //
+  // FIXME: I'm not sure if this case holds true outside of copies. If it does,
+  // then we can pull it into the helpers that get the appropriate class for a
+  // register bank. Or make a new helper that carries along some constraint
+  // information.
+  if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
+    SrcSize = DstSize = 32;
+
+  return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
+          getMinClassForRegBank(DstRegBank, DstSize, true)};
+}
+
 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
                        MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
                        const RegisterBankInfo &RBI) {
 
   unsigned DstReg = I.getOperand(0).getReg();
   unsigned SrcReg = I.getOperand(1).getReg();
+  const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
+  const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
 
-  if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
-    if (TRI.getRegClass(AArch64::FPR16RegClassID)->contains(DstReg) &&
-        !TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
-      const RegisterBank &RegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
-      const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(
-          MRI.getType(SrcReg), RegBank, RBI, /* GetAllRegSet */ true);
-      if (SrcRC == &AArch64::GPR32allRegClass)
-        return selectFP16CopyFromGPR32(I, TII, MRI, SrcReg);
-    }
-    assert(I.isCopy() && "Generic operators do not allow physical registers");
-    return true;
-  }
-
-  const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
-  const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
-  (void)DstSize;
-  const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
-  (void)SrcSize;
-  assert((!TargetRegisterInfo::isPhysicalRegister(SrcReg) || I.isCopy()) &&
-         "No phys reg on generic operators");
-  assert(
-      (DstSize == SrcSize ||
-       // Copies are a mean to setup initial types, the number of
-       // bits may not exactly match.
-       (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
-        DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI)) ||
-       // Copies are a mean to copy bits around, as long as we are
-       // on the same register class, that's fine. Otherwise, that
-       // means we need some SUBREG_TO_REG or AND & co.
-       (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
-      "Copy with different width?!");
-  assert((DstSize <= 64 || RegBank.getID() == AArch64::FPRRegBankID) &&
-         "GPRs cannot get more than 64-bit width values");
+  // Find the correct register classes for the source and destination registers.
+  const TargetRegisterClass *SrcRC;
+  const TargetRegisterClass *DstRC;
+  std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
 
-  const TargetRegisterClass *RC = getRegClassForTypeOnBank(
-      MRI.getType(DstReg), RegBank, RBI, /* GetAllRegSet */ true);
-  if (!RC) {
-    LLVM_DEBUG(dbgs() << "Unexpected bitcast size " << DstSize << '\n');
+  if (!DstRC) {
+    LLVM_DEBUG(dbgs() << "Unexpected dest size "
+                      << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
     return false;
   }
 
-  if (!TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
-    const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(SrcReg);
-    const TargetRegisterClass *SrcRC =
-        RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
-    const RegisterBank *RB = nullptr;
+  // A couple helpers below, for making sure that the copy we produce is valid.
+
+  // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
+  // to verify that the src and dst are the same size, since that's handled by
+  // the SUBREG_TO_REG.
+  bool KnownValid = false;
+
+  // Returns true, or asserts if something we don't expect happens. Instead of
+  // returning true, we return isValidCopy() to ensure that we verify the
+  // result.
+  auto CheckCopy = [&]() {
+    // If we have a bitcast or something, we can't have physical registers.
+    assert(
+        (I.isCopy() ||
+         (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) &&
+          !TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) &&
+        "No phys reg on generic operator!");
+    assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
+    (void)KnownValid;
+    return true;
+  };
+
+  // Is this a copy? If so, then we may need to insert a subregister copy, or
+  // a SUBREG_TO_REG.
+  if (I.isCopy()) {
+    // Yes. Check if there's anything to fix up.
     if (!SrcRC) {
-      RB = RegClassOrBank.get<const RegisterBank *>();
-      SrcRC = getRegClassForTypeOnBank(MRI.getType(SrcReg), *RB, RBI, true);
-    }
-    // Copies from fpr16 to gpr32 need to use SUBREG_TO_REG.
-    if (RC == &AArch64::GPR32allRegClass && SrcRC == &AArch64::FPR16RegClass) {
-      unsigned PromoteReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
-      BuildMI(*I.getParent(), I, I.getDebugLoc(),
-              TII.get(AArch64::SUBREG_TO_REG))
-          .addDef(PromoteReg)
-          .addImm(0)
-          .addUse(SrcReg)
-          .addImm(AArch64::hsub);
-      MachineOperand &RegOp = I.getOperand(1);
-      RegOp.setReg(PromoteReg);
-    } else if (RC == &AArch64::FPR16RegClass &&
-               SrcRC == &AArch64::GPR32allRegClass) {
-      selectFP16CopyFromGPR32(I, TII, MRI, SrcReg);
+      LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
+      return false;
+    }
+
+    // Is this a cross-bank copy?
+    if (DstRegBank.getID() != SrcRegBank.getID()) {
+      // If we're doing a cross-bank copy on different-sized registers, we need
+      // to do a bit more work.
+      unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
+      unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
+
+      if (SrcSize > DstSize) {
+        // We're doing a cross-bank copy into a smaller register. We need a
+        // subregister copy. First, get a register class that's on the same bank
+        // as the destination, but the same size as the source.
+        const TargetRegisterClass *SubregRC =
+            getMinClassForRegBank(DstRegBank, SrcSize, true);
+        assert(SubregRC && "Didn't get a register class for subreg?");
+
+        // Get the appropriate subregister for the destination.
+        unsigned SubReg = 0;
+        if (!getSubRegForClass(DstRC, TRI, SubReg)) {
+          LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
+          return false;
+        }
+
+        // Now, insert a subregister copy using the new register class.
+        selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
+        return CheckCopy();
+      }
+
+      else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
+               SrcSize == 16) {
+        // Special case for FPR16 to GPR32.
+        // FIXME: This can probably be generalized like the above case.
+        unsigned PromoteReg =
+            MRI.createVirtualRegister(&AArch64::FPR32RegClass);
+        BuildMI(*I.getParent(), I, I.getDebugLoc(),
+                TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
+            .addImm(0)
+            .addUse(SrcReg)
+            .addImm(AArch64::hsub);
+        MachineOperand &RegOp = I.getOperand(1);
+        RegOp.setReg(PromoteReg);
+
+        // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
+        KnownValid = true;
+      }
     }
+
+    // If the destination is a physical register, then there's nothing to
+    // change, so we're done.
+    if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+      return CheckCopy();
   }
 
-  // No need to constrain SrcReg. It will get constrained when
-  // we hit another of its use or its defs.
-  // Copies do not have constraints.
-  if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
+  // No need to constrain SrcReg. It will get constrained when we hit another
+  // of its use or its defs. Copies do not have constraints.
+  if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
                       << " operand\n");
     return false;
   }
   I.setDesc(TII.get(AArch64::COPY));
-  return true;
+  return CheckCopy();
 }
 
 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
@@ -511,6 +765,46 @@ static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
   return GenericOpc;
 }
 
+static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
+                                const RegisterBankInfo &RBI) {
+  const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+  bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
+               AArch64::GPRRegBankID);
+  LLT Ty = MRI.getType(I.getOperand(0).getReg());
+  if (Ty == LLT::scalar(32))
+    return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
+  else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
+    return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
+  return 0;
+}
+
+/// Helper function to select the opcode for a G_FCMP.
+static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
+  // If this is a compare against +0.0, then we don't have to explicitly
+  // materialize a constant.
+  const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
+  bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
+  unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
+  if (OpSize != 32 && OpSize != 64)
+    return 0;
+  unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
+                              {AArch64::FCMPSri, AArch64::FCMPDri}};
+  return CmpOpcTbl[ShouldUseImm][OpSize == 64];
+}
+
+/// Returns true if \p P is an unsigned integer comparison predicate.
+static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
+  switch (P) {
+  default:
+    return false;
+  case CmpInst::ICMP_UGT:
+  case CmpInst::ICMP_UGE:
+  case CmpInst::ICMP_ULT:
+  case CmpInst::ICMP_ULE:
+    return true;
+  }
+}
+
 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
   switch (P) {
   default:
@@ -595,7 +889,7 @@ static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
 bool AArch64InstructionSelector::selectCompareBranch(
     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
 
-  const unsigned CondReg = I.getOperand(0).getReg();
+  const Register CondReg = I.getOperand(0).getReg();
   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
   MachineInstr *CCMI = MRI.getVRegDef(CondReg);
   if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
@@ -603,14 +897,25 @@ bool AArch64InstructionSelector::selectCompareBranch(
   if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
     return false;
 
-  unsigned LHS = CCMI->getOperand(2).getReg();
-  unsigned RHS = CCMI->getOperand(3).getReg();
-  if (!getConstantVRegVal(RHS, MRI))
+  Register LHS = CCMI->getOperand(2).getReg();
+  Register RHS = CCMI->getOperand(3).getReg();
+  auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
+  if (!VRegAndVal)
     std::swap(RHS, LHS);
 
-  const auto RHSImm = getConstantVRegVal(RHS, MRI);
-  if (!RHSImm || *RHSImm != 0)
-    return false;
+  VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
+  if (!VRegAndVal || VRegAndVal->Value != 0) {
+    MachineIRBuilder MIB(I);
+    // If we can't select a CBZ then emit a cmp + Bcc.
+    if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
+                            CCMI->getOperand(1), MIB))
+      return false;
+    const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
+        (CmpInst::Predicate)CCMI->getOperand(1).getPredicate());
+    MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
+    I.eraseFromParent();
+    return true;
+  }
 
   const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
   if (RB.getID() != AArch64::GPRRegBankID)
@@ -638,6 +943,74 @@ bool AArch64InstructionSelector::selectCompareBranch(
   return true;
 }
 
+bool AArch64InstructionSelector::selectVectorSHL(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  assert(I.getOpcode() == TargetOpcode::G_SHL);
+  Register DstReg = I.getOperand(0).getReg();
+  const LLT Ty = MRI.getType(DstReg);
+  Register Src1Reg = I.getOperand(1).getReg();
+  Register Src2Reg = I.getOperand(2).getReg();
+
+  if (!Ty.isVector())
+    return false;
+
+  unsigned Opc = 0;
+  if (Ty == LLT::vector(4, 32)) {
+    Opc = AArch64::USHLv4i32;
+  } else if (Ty == LLT::vector(2, 32)) {
+    Opc = AArch64::USHLv2i32;
+  } else {
+    LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
+    return false;
+  }
+
+  MachineIRBuilder MIB(I);
+  auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
+  constrainSelectedInstRegOperands(*UShl, TII, TRI, RBI);
+  I.eraseFromParent();
+  return true;
+}
+
+bool AArch64InstructionSelector::selectVectorASHR(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  assert(I.getOpcode() == TargetOpcode::G_ASHR);
+  Register DstReg = I.getOperand(0).getReg();
+  const LLT Ty = MRI.getType(DstReg);
+  Register Src1Reg = I.getOperand(1).getReg();
+  Register Src2Reg = I.getOperand(2).getReg();
+
+  if (!Ty.isVector())
+    return false;
+
+  // There is not a shift right register instruction, but the shift left
+  // register instruction takes a signed value, where negative numbers specify a
+  // right shift.
+
+  unsigned Opc = 0;
+  unsigned NegOpc = 0;
+  const TargetRegisterClass *RC = nullptr;
+  if (Ty == LLT::vector(4, 32)) {
+    Opc = AArch64::SSHLv4i32;
+    NegOpc = AArch64::NEGv4i32;
+    RC = &AArch64::FPR128RegClass;
+  } else if (Ty == LLT::vector(2, 32)) {
+    Opc = AArch64::SSHLv2i32;
+    NegOpc = AArch64::NEGv2i32;
+    RC = &AArch64::FPR64RegClass;
+  } else {
+    LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
+    return false;
+  }
+
+  MachineIRBuilder MIB(I);
+  auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
+  constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
+  auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
+  constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
+  I.eraseFromParent();
+  return true;
+}
+
 bool AArch64InstructionSelector::selectVaStartAAPCS(
     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
   return false;
@@ -646,9 +1019,9 @@ bool AArch64InstructionSelector::selectVaStartAAPCS(
 bool AArch64InstructionSelector::selectVaStartDarwin(
     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
-  unsigned ListReg = I.getOperand(0).getReg();
+  Register ListReg = I.getOperand(0).getReg();
 
-  unsigned ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+  Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
 
   auto MIB =
       BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
@@ -684,9 +1057,9 @@ void AArch64InstructionSelector::materializeLargeCMVal(
   MovZ->addOperand(MF, MachineOperand::CreateImm(0));
   constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
 
-  auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, unsigned Offset,
-                       unsigned ForceDstReg) {
-    unsigned DstReg = ForceDstReg
+  auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
+                       Register ForceDstReg) {
+    Register DstReg = ForceDstReg
                           ? ForceDstReg
                           : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
     auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
@@ -702,13 +1075,105 @@ void AArch64InstructionSelector::materializeLargeCMVal(
     constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
     return DstReg;
   };
-  unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(),
+  Register DstReg = BuildMovK(MovZ.getReg(0),
                               AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
   DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
   BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
   return;
 }
 
+void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
+  MachineBasicBlock &MBB = *I.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  switch (I.getOpcode()) {
+  case TargetOpcode::G_SHL:
+  case TargetOpcode::G_ASHR:
+  case TargetOpcode::G_LSHR: {
+    // These shifts are legalized to have 64 bit shift amounts because we want
+    // to take advantage of the existing imported selection patterns that assume
+    // the immediates are s64s. However, if the shifted type is 32 bits and for
+    // some reason we receive input GMIR that has an s64 shift amount that's not
+    // a G_CONSTANT, insert a truncate so that we can still select the s32
+    // register-register variant.
+    unsigned SrcReg = I.getOperand(1).getReg();
+    unsigned ShiftReg = I.getOperand(2).getReg();
+    const LLT ShiftTy = MRI.getType(ShiftReg);
+    const LLT SrcTy = MRI.getType(SrcReg);
+    if (SrcTy.isVector())
+      return;
+    assert(!ShiftTy.isVector() && "unexpected vector shift ty");
+    if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
+      return;
+    auto *AmtMI = MRI.getVRegDef(ShiftReg);
+    assert(AmtMI && "could not find a vreg definition for shift amount");
+    if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
+      // Insert a subregister copy to implement a 64->32 trunc
+      MachineIRBuilder MIB(I);
+      auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
+                       .addReg(ShiftReg, 0, AArch64::sub_32);
+      MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
+      I.getOperand(2).setReg(Trunc.getReg(0));
+    }
+    return;
+  }
+  default:
+    return;
+  }
+}
+
+bool AArch64InstructionSelector::earlySelectSHL(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  // We try to match the immediate variant of LSL, which is actually an alias
+  // for a special case of UBFM. Otherwise, we fall back to the imported
+  // selector which will match the register variant.
+  assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
+  const auto &MO = I.getOperand(2);
+  auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
+  if (!VRegAndVal)
+    return false;
+
+  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+  if (DstTy.isVector())
+    return false;
+  bool Is64Bit = DstTy.getSizeInBits() == 64;
+  auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
+  auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
+  MachineIRBuilder MIB(I);
+
+  if (!Imm1Fn || !Imm2Fn)
+    return false;
+
+  auto NewI =
+      MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
+                     {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
+
+  for (auto &RenderFn : *Imm1Fn)
+    RenderFn(NewI);
+  for (auto &RenderFn : *Imm2Fn)
+    RenderFn(NewI);
+
+  I.eraseFromParent();
+  return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
+}
+
+bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
+  assert(I.getParent() && "Instruction should be in a basic block!");
+  assert(I.getParent()->getParent() && "Instruction should be in a function!");
+
+  MachineBasicBlock &MBB = *I.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  switch (I.getOpcode()) {
+  case TargetOpcode::G_SHL:
+    return earlySelectSHL(I, MRI);
+  default:
+    return false;
+  }
+}
+
 bool AArch64InstructionSelector::select(MachineInstr &I,
                                         CodeGenCoverage &CoverageInfo) const {
   assert(I.getParent() && "Instruction should be in a basic block!");
@@ -727,30 +1192,27 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
 
     if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
-      const unsigned DefReg = I.getOperand(0).getReg();
+      const Register DefReg = I.getOperand(0).getReg();
       const LLT DefTy = MRI.getType(DefReg);
 
-      const TargetRegisterClass *DefRC = nullptr;
-      if (TargetRegisterInfo::isPhysicalRegister(DefReg)) {
-        DefRC = TRI.getRegClass(DefReg);
-      } else {
-        const RegClassOrRegBank &RegClassOrBank =
-            MRI.getRegClassOrRegBank(DefReg);
+      const RegClassOrRegBank &RegClassOrBank =
+        MRI.getRegClassOrRegBank(DefReg);
 
-        DefRC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
+      const TargetRegisterClass *DefRC
+        = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
+      if (!DefRC) {
+        if (!DefTy.isValid()) {
+          LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
+          return false;
+        }
+        const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
+        DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
         if (!DefRC) {
-          if (!DefTy.isValid()) {
-            LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
-            return false;
-          }
-          const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
-          DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
-          if (!DefRC) {
-            LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
-            return false;
-          }
+          LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
+          return false;
         }
       }
+
       I.setDesc(TII.get(TargetOpcode::PHI));
 
       return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
@@ -769,12 +1231,27 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
     return false;
   }
 
+  // Try to do some lowering before we start instruction selecting. These
+  // lowerings are purely transformations on the input G_MIR and so selection
+  // must continue after any modification of the instruction.
+  preISelLower(I);
+
+  // There may be patterns where the importer can't deal with them optimally,
+  // but does select it to a suboptimal sequence so our custom C++ selection
+  // code later never has a chance to work on it. Therefore, we have an early
+  // selection attempt here to give priority to certain selection routines
+  // over the imported ones.
+  if (earlySelect(I))
+    return true;
+
   if (selectImpl(I, CoverageInfo))
     return true;
 
   LLT Ty =
       I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
 
+  MachineIRBuilder MIB(I);
+
   switch (Opcode) {
   case TargetOpcode::G_BRCOND: {
     if (Ty.getSizeInBits() > 32) {
@@ -786,7 +1263,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
       return false;
     }
 
-    const unsigned CondReg = I.getOperand(0).getReg();
+    const Register CondReg = I.getOperand(0).getReg();
     MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
 
     // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
@@ -826,15 +1303,57 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
   }
 
+  case TargetOpcode::G_BRJT:
+    return selectBrJT(I, MRI);
+
+  case TargetOpcode::G_BSWAP: {
+    // Handle vector types for G_BSWAP directly.
+    Register DstReg = I.getOperand(0).getReg();
+    LLT DstTy = MRI.getType(DstReg);
+
+    // We should only get vector types here; everything else is handled by the
+    // importer right now.
+    if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
+      LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
+      return false;
+    }
+
+    // Only handle 4 and 2 element vectors for now.
+    // TODO: 16-bit elements.
+    unsigned NumElts = DstTy.getNumElements();
+    if (NumElts != 4 && NumElts != 2) {
+      LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
+      return false;
+    }
+
+    // Choose the correct opcode for the supported types. Right now, that's
+    // v2s32, v4s32, and v2s64.
+    unsigned Opc = 0;
+    unsigned EltSize = DstTy.getElementType().getSizeInBits();
+    if (EltSize == 32)
+      Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
+                                          : AArch64::REV32v16i8;
+    else if (EltSize == 64)
+      Opc = AArch64::REV64v16i8;
+
+    // We should always get something by the time we get here...
+    assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
+
+    I.setDesc(TII.get(Opc));
+    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+  }
+
   case TargetOpcode::G_FCONSTANT:
   case TargetOpcode::G_CONSTANT: {
     const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
 
+    const LLT s8 = LLT::scalar(8);
+    const LLT s16 = LLT::scalar(16);
     const LLT s32 = LLT::scalar(32);
     const LLT s64 = LLT::scalar(64);
     const LLT p0 = LLT::pointer(0, 64);
 
-    const unsigned DefReg = I.getOperand(0).getReg();
+    const Register DefReg = I.getOperand(0).getReg();
     const LLT DefTy = MRI.getType(DefReg);
     const unsigned DefSize = DefTy.getSizeInBits();
     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
@@ -861,7 +1380,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
         return false;
     } else {
       // s32 and s64 are covered by tablegen.
-      if (Ty != p0) {
+      if (Ty != p0 && Ty != s8 && Ty != s16) {
         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
                           << " constant, expected: " << s32 << ", " << s64
                           << ", or " << p0 << '\n');
@@ -876,25 +1395,27 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
       }
     }
 
+    // We allow G_CONSTANT of types < 32b.
     const unsigned MovOpc =
-        DefSize == 32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
-
-    I.setDesc(TII.get(MovOpc));
+        DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
 
     if (isFP) {
+      // Either emit a FMOV, or emit a copy to emit a normal mov.
       const TargetRegisterClass &GPRRC =
           DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
       const TargetRegisterClass &FPRRC =
           DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
 
-      const unsigned DefGPRReg = MRI.createVirtualRegister(&GPRRC);
+      // Can we use a FMOV instruction to represent the immediate?
+      if (emitFMovForFConstant(I, MRI))
+        return true;
+
+      // Nope. Emit a copy and use a normal mov instead.
+      const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
       MachineOperand &RegOp = I.getOperand(0);
       RegOp.setReg(DefGPRReg);
-
-      BuildMI(MBB, std::next(I.getIterator()), I.getDebugLoc(),
-              TII.get(AArch64::COPY))
-          .addDef(DefReg)
-          .addUse(DefGPRReg);
+      MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
+      MIB.buildCopy({DefReg}, {DefGPRReg});
 
       if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
         LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
@@ -913,6 +1434,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
       I.getOperand(1).ChangeToImmediate(Val);
     }
 
+    I.setDesc(TII.get(MovOpc));
     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
     return true;
   }
@@ -936,11 +1458,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
     }
 
-    unsigned DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
-    BuildMI(MBB, std::next(I.getIterator()), I.getDebugLoc(),
-            TII.get(AArch64::COPY))
-        .addDef(I.getOperand(0).getReg())
-        .addUse(DstReg, 0, AArch64::sub_32);
+    Register DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
+    MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
+    MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
+        .addReg(DstReg, 0, AArch64::sub_32);
     RBI.constrainGenericRegister(I.getOperand(0).getReg(),
                                  AArch64::GPR32RegClass, MRI);
     I.getOperand(0).setReg(DstReg);
@@ -969,7 +1490,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
     }
 
-    unsigned SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
+    Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
     BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
             TII.get(AArch64::SUBREG_TO_REG))
         .addDef(SrcReg)
@@ -1026,8 +1547,12 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
   }
 
+  case TargetOpcode::G_ZEXTLOAD:
   case TargetOpcode::G_LOAD:
   case TargetOpcode::G_STORE: {
+    bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
+    MachineIRBuilder MIB(I);
+
     LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
 
     if (PtrTy != LLT::pointer(0, 64)) {
@@ -1043,7 +1568,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
     }
     unsigned MemSizeInBits = MemOp.getSize() * 8;
 
-    const unsigned PtrReg = I.getOperand(1).getReg();
+    const Register PtrReg = I.getOperand(1).getReg();
 #ifndef NDEBUG
     const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
     // Sanity-check the pointer register.
@@ -1053,7 +1578,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
            "Load/Store pointer operand isn't a pointer");
 #endif
 
-    const unsigned ValReg = I.getOperand(0).getReg();
+    const Register ValReg = I.getOperand(0).getReg();
     const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
 
     const unsigned NewOpc =
@@ -1098,6 +1623,25 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
       }
     }
 
+    if (IsZExtLoad) {
+      // The zextload from a smaller type to i32 should be handled by the importer.
+      if (MRI.getType(ValReg).getSizeInBits() != 64)
+        return false;
+      // If we have a ZEXTLOAD then change the load's type to be a narrower reg
+      //and zero_extend with SUBREG_TO_REG.
+      Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+      Register DstReg = I.getOperand(0).getReg();
+      I.getOperand(0).setReg(LdReg);
+
+      MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
+      MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
+          .addImm(0)
+          .addUse(LdReg)
+          .addImm(AArch64::sub_32);
+      constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+      return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
+                                          MRI);
+    }
     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
   }
 
@@ -1107,7 +1651,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
     if (unsupportedBinOp(I, RBI, MRI, TRI))
       return false;
 
-    const unsigned DefReg = I.getOperand(0).getReg();
+    const Register DefReg = I.getOperand(0).getReg();
     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
 
     if (RB.getID() != AArch64::GPRRegBankID) {
@@ -1134,10 +1678,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
   case TargetOpcode::G_FMUL:
   case TargetOpcode::G_FDIV:
 
-  case TargetOpcode::G_OR:
+  case TargetOpcode::G_ASHR:
+    if (MRI.getType(I.getOperand(0).getReg()).isVector())
+      return selectVectorASHR(I, MRI);
+    LLVM_FALLTHROUGH;
   case TargetOpcode::G_SHL:
+    if (Opcode == TargetOpcode::G_SHL &&
+        MRI.getType(I.getOperand(0).getReg()).isVector())
+      return selectVectorSHL(I, MRI);
+    LLVM_FALLTHROUGH;
+  case TargetOpcode::G_OR:
   case TargetOpcode::G_LSHR:
-  case TargetOpcode::G_ASHR:
   case TargetOpcode::G_GEP: {
     // Reject the various things we don't support yet.
     if (unsupportedBinOp(I, RBI, MRI, TRI))
@@ -1145,7 +1696,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
 
     const unsigned OpSize = Ty.getSizeInBits();
 
-    const unsigned DefReg = I.getOperand(0).getReg();
+    const Register DefReg = I.getOperand(0).getReg();
     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
 
     const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
@@ -1160,6 +1711,43 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
   }
 
+  case TargetOpcode::G_UADDO: {
+    // TODO: Support other types.
+    unsigned OpSize = Ty.getSizeInBits();
+    if (OpSize != 32 && OpSize != 64) {
+      LLVM_DEBUG(
+          dbgs()
+          << "G_UADDO currently only supported for 32 and 64 b types.\n");
+      return false;
+    }
+
+    // TODO: Support vectors.
+    if (Ty.isVector()) {
+      LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
+      return false;
+    }
+
+    // Add and set the set condition flag.
+    unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
+    MachineIRBuilder MIRBuilder(I);
+    auto AddsMI = MIRBuilder.buildInstr(
+        AddsOpc, {I.getOperand(0).getReg()},
+        {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
+    constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
+
+    // Now, put the overflow result in the register given by the first operand
+    // to the G_UADDO. CSINC increments the result when the predicate is false,
+    // so to get the increment when it's true, we need to use the inverse. In
+    // this case, we want to increment when carry is set.
+    auto CsetMI = MIRBuilder
+                      .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
+                                  {Register(AArch64::WZR), Register(AArch64::WZR)})
+                      .addImm(getInvertedCondCode(AArch64CC::HS));
+    constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
+    I.eraseFromParent();
+    return true;
+  }
+
   case TargetOpcode::G_PTR_MASK: {
     uint64_t Align = I.getOperand(2).getImm();
     if (Align >= 64 || Align == 0)
@@ -1176,8 +1764,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
     const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
 
-    const unsigned DstReg = I.getOperand(0).getReg();
-    const unsigned SrcReg = I.getOperand(1).getReg();
+    const Register DstReg = I.getOperand(0).getReg();
+    const Register SrcReg = I.getOperand(1).getReg();
 
     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
     const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
@@ -1234,8 +1822,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
   }
 
   case TargetOpcode::G_ANYEXT: {
-    const unsigned DstReg = I.getOperand(0).getReg();
-    const unsigned SrcReg = I.getOperand(1).getReg();
+    const Register DstReg = I.getOperand(0).getReg();
+    const Register SrcReg = I.getOperand(1).getReg();
 
     const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
     if (RBDst.getID() != AArch64::GPRRegBankID) {
@@ -1266,7 +1854,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
     // At this point G_ANYEXT is just like a plain COPY, but we need
     // to explicitly form the 64-bit value if any.
     if (DstSize > 32) {
-      unsigned ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
+      Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
       BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
           .addDef(ExtSrc)
           .addImm(0)
@@ -1283,8 +1871,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
     const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
               SrcTy = MRI.getType(I.getOperand(1).getReg());
     const bool isSigned = Opcode == TargetOpcode::G_SEXT;
-    const unsigned DefReg = I.getOperand(0).getReg();
-    const unsigned SrcReg = I.getOperand(1).getReg();
+    const Register DefReg = I.getOperand(0).getReg();
+    const Register SrcReg = I.getOperand(1).getReg();
     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
 
     if (RB.getID() != AArch64::GPRRegBankID) {
@@ -1302,7 +1890,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
         return false;
       }
 
-      const unsigned SrcXReg =
+      const Register SrcXReg =
           MRI.createVirtualRegister(&AArch64::GPR64RegClass);
       BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
           .addDef(SrcXReg)
@@ -1358,11 +1946,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
   case TargetOpcode::G_BITCAST:
     // Imported SelectionDAG rules can handle every bitcast except those that
     // bitcast from a type to the same type. Ideally, these shouldn't occur
-    // but we might not run an optimizer that deletes them.
-    if (MRI.getType(I.getOperand(0).getReg()) ==
-        MRI.getType(I.getOperand(1).getReg()))
-      return selectCopy(I, TII, MRI, TRI, RBI);
-    return false;
+    // but we might not run an optimizer that deletes them. The other exception
+    // is bitcasts involving pointer types, as SelectionDAG has no knowledge
+    // of them.
+    return selectCopy(I, TII, MRI, TRI, RBI);
 
   case TargetOpcode::G_SELECT: {
     if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
@@ -1371,20 +1958,14 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
       return false;
     }
 
-    const unsigned CondReg = I.getOperand(1).getReg();
-    const unsigned TReg = I.getOperand(2).getReg();
-    const unsigned FReg = I.getOperand(3).getReg();
-
-    unsigned CSelOpc = 0;
+    const Register CondReg = I.getOperand(1).getReg();
+    const Register TReg = I.getOperand(2).getReg();
+    const Register FReg = I.getOperand(3).getReg();
 
-    if (Ty == LLT::scalar(32)) {
-      CSelOpc = AArch64::CSELWr;
-    } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
-      CSelOpc = AArch64::CSELXr;
-    } else {
-      return false;
-    }
+    if (tryOptSelect(I))
+      return true;
 
+    Register CSelOpc = selectSelectOpc(I, MRI, RBI);
     MachineInstr &TstMI =
         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
              .addDef(AArch64::WZR)
@@ -1404,81 +1985,55 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
     return true;
   }
   case TargetOpcode::G_ICMP: {
+    if (Ty.isVector())
+      return selectVectorICmp(I, MRI);
+
     if (Ty != LLT::scalar(32)) {
       LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
                         << ", expected: " << LLT::scalar(32) << '\n');
       return false;
     }
 
-    unsigned CmpOpc = 0;
-    unsigned ZReg = 0;
+    MachineIRBuilder MIRBuilder(I);
+    if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
+                            MIRBuilder))
+      return false;
+    emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
+                    MIRBuilder);
+    I.eraseFromParent();
+    return true;
+  }
 
-    LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
-    if (CmpTy == LLT::scalar(32)) {
-      CmpOpc = AArch64::SUBSWrr;
-      ZReg = AArch64::WZR;
-    } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
-      CmpOpc = AArch64::SUBSXrr;
-      ZReg = AArch64::XZR;
-    } else {
+  case TargetOpcode::G_FCMP: {
+    if (Ty != LLT::scalar(32)) {
+      LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
+                        << ", expected: " << LLT::scalar(32) << '\n');
       return false;
     }
 
-    // CSINC increments the result by one when the condition code is false.
-    // Therefore, we have to invert the predicate to get an increment by 1 when
-    // the predicate is true.
-    const AArch64CC::CondCode invCC =
-        changeICMPPredToAArch64CC(CmpInst::getInversePredicate(
-            (CmpInst::Predicate)I.getOperand(1).getPredicate()));
+    unsigned CmpOpc = selectFCMPOpc(I, MRI);
+    if (!CmpOpc)
+      return false;
 
-    MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
-                               .addDef(ZReg)
-                               .addUse(I.getOperand(2).getReg())
-                               .addUse(I.getOperand(3).getReg());
-
-    MachineInstr &CSetMI =
-        *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
-             .addDef(I.getOperand(0).getReg())
-             .addUse(AArch64::WZR)
-             .addUse(AArch64::WZR)
-             .addImm(invCC);
-
-    constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
-    constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
-
-    I.eraseFromParent();
-    return true;
-  }
-
-  case TargetOpcode::G_FCMP: {
-    if (Ty != LLT::scalar(32)) {
-      LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
-                        << ", expected: " << LLT::scalar(32) << '\n');
-      return false;
-    }
-
-    unsigned CmpOpc = 0;
-    LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
-    if (CmpTy == LLT::scalar(32)) {
-      CmpOpc = AArch64::FCMPSrr;
-    } else if (CmpTy == LLT::scalar(64)) {
-      CmpOpc = AArch64::FCMPDrr;
-    } else {
-      return false;
-    }
-
-    // FIXME: regbank
+    // FIXME: regbank
 
     AArch64CC::CondCode CC1, CC2;
     changeFCMPPredToAArch64CC(
         (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
 
-    MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
-                               .addUse(I.getOperand(2).getReg())
-                               .addUse(I.getOperand(3).getReg());
+    // Partially build the compare. Decide if we need to add a use for the
+    // third operand based off whether or not we're comparing against 0.0.
+    auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
+                     .addUse(I.getOperand(2).getReg());
 
-    const unsigned DefReg = I.getOperand(0).getReg();
-    unsigned Def1Reg = DefReg;
+    // If we don't have an immediate compare, then we need to add a use of the
+    // register which wasn't used for the immediate.
+    // Note that the immediate will always be the last operand.
+    if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
+      CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
+
+    const Register DefReg = I.getOperand(0).getReg();
+    Register Def1Reg = DefReg;
     if (CC2 != AArch64CC::AL)
       Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
 
@@ -1490,7 +2045,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
              .addImm(getInvertedCondCode(CC1));
 
     if (CC2 != AArch64CC::AL) {
-      unsigned Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+      Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
       MachineInstr &CSet2MI =
           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
                .addDef(Def2Reg)
@@ -1505,8 +2060,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
       constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
       constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
     }
-
-    constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
+    constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
     constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
 
     I.eraseFromParent();
@@ -1515,19 +2069,14 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
   case TargetOpcode::G_VASTART:
     return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
                                 : selectVaStartAAPCS(I, MF, MRI);
+  case TargetOpcode::G_INTRINSIC:
+    return selectIntrinsic(I, MRI);
   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
-    if (!I.getOperand(0).isIntrinsicID())
-      return false;
-    if (I.getOperand(0).getIntrinsicID() != Intrinsic::trap)
-      return false;
-    BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::BRK))
-      .addImm(1);
-    I.eraseFromParent();
-    return true;
+    return selectIntrinsicWithSideEffects(I, MRI);
   case TargetOpcode::G_IMPLICIT_DEF: {
     I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
-    const unsigned DstReg = I.getOperand(0).getReg();
+    const Register DstReg = I.getOperand(0).getReg();
     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
     const TargetRegisterClass *DstRC =
         getRegClassForTypeOnBank(DstTy, DstRB, RBI);
@@ -1552,44 +2101,374 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
       return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
     }
   }
+  case TargetOpcode::G_INTRINSIC_TRUNC:
+    return selectIntrinsicTrunc(I, MRI);
+  case TargetOpcode::G_INTRINSIC_ROUND:
+    return selectIntrinsicRound(I, MRI);
   case TargetOpcode::G_BUILD_VECTOR:
     return selectBuildVector(I, MRI);
   case TargetOpcode::G_MERGE_VALUES:
     return selectMergeValues(I, MRI);
+  case TargetOpcode::G_UNMERGE_VALUES:
+    return selectUnmergeValues(I, MRI);
+  case TargetOpcode::G_SHUFFLE_VECTOR:
+    return selectShuffleVector(I, MRI);
+  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+    return selectExtractElt(I, MRI);
+  case TargetOpcode::G_INSERT_VECTOR_ELT:
+    return selectInsertElt(I, MRI);
+  case TargetOpcode::G_CONCAT_VECTORS:
+    return selectConcatVectors(I, MRI);
+  case TargetOpcode::G_JUMP_TABLE:
+    return selectJumpTable(I, MRI);
   }
 
   return false;
 }
 
-bool AArch64InstructionSelector::emitScalarToVector(
-    unsigned &Dst, const LLT DstTy, const TargetRegisterClass *DstRC,
-    unsigned Scalar, MachineBasicBlock &MBB,
-    MachineBasicBlock::iterator MBBI, MachineRegisterInfo &MRI) const {
-  Dst = MRI.createVirtualRegister(DstRC);
+bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
+                                            MachineRegisterInfo &MRI) const {
+  assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
+  Register JTAddr = I.getOperand(0).getReg();
+  unsigned JTI = I.getOperand(1).getIndex();
+  Register Index = I.getOperand(2).getReg();
+  MachineIRBuilder MIB(I);
+
+  Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+  Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
+  MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg},
+                 {JTAddr, Index})
+      .addJumpTableIndex(JTI);
+
+  // Build the indirect branch.
+  MIB.buildInstr(AArch64::BR, {}, {TargetReg});
+  I.eraseFromParent();
+  return true;
+}
+
+bool AArch64InstructionSelector::selectJumpTable(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
+  assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
+
+  Register DstReg = I.getOperand(0).getReg();
+  unsigned JTI = I.getOperand(1).getIndex();
+  // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
+  MachineIRBuilder MIB(I);
+  auto MovMI =
+    MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
+          .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
+          .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
+  I.eraseFromParent();
+  return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
+}
+
+bool AArch64InstructionSelector::selectIntrinsicTrunc(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
+
+  // Select the correct opcode.
+  unsigned Opc = 0;
+  if (!SrcTy.isVector()) {
+    switch (SrcTy.getSizeInBits()) {
+    default:
+    case 16:
+      Opc = AArch64::FRINTZHr;
+      break;
+    case 32:
+      Opc = AArch64::FRINTZSr;
+      break;
+    case 64:
+      Opc = AArch64::FRINTZDr;
+      break;
+    }
+  } else {
+    unsigned NumElts = SrcTy.getNumElements();
+    switch (SrcTy.getElementType().getSizeInBits()) {
+    default:
+      break;
+    case 16:
+      if (NumElts == 4)
+        Opc = AArch64::FRINTZv4f16;
+      else if (NumElts == 8)
+        Opc = AArch64::FRINTZv8f16;
+      break;
+    case 32:
+      if (NumElts == 2)
+        Opc = AArch64::FRINTZv2f32;
+      else if (NumElts == 4)
+        Opc = AArch64::FRINTZv4f32;
+      break;
+    case 64:
+      if (NumElts == 2)
+        Opc = AArch64::FRINTZv2f64;
+      break;
+    }
+  }
+
+  if (!Opc) {
+    // Didn't get an opcode above, bail.
+    LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
+    return false;
+  }
+
+  // Legalization would have set us up perfectly for this; we just need to
+  // set the opcode and move on.
+  I.setDesc(TII.get(Opc));
+  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
+
+bool AArch64InstructionSelector::selectIntrinsicRound(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
+
+  // Select the correct opcode.
+  unsigned Opc = 0;
+  if (!SrcTy.isVector()) {
+    switch (SrcTy.getSizeInBits()) {
+    default:
+    case 16:
+      Opc = AArch64::FRINTAHr;
+      break;
+    case 32:
+      Opc = AArch64::FRINTASr;
+      break;
+    case 64:
+      Opc = AArch64::FRINTADr;
+      break;
+    }
+  } else {
+    unsigned NumElts = SrcTy.getNumElements();
+    switch (SrcTy.getElementType().getSizeInBits()) {
+    default:
+      break;
+    case 16:
+      if (NumElts == 4)
+        Opc = AArch64::FRINTAv4f16;
+      else if (NumElts == 8)
+        Opc = AArch64::FRINTAv8f16;
+      break;
+    case 32:
+      if (NumElts == 2)
+        Opc = AArch64::FRINTAv2f32;
+      else if (NumElts == 4)
+        Opc = AArch64::FRINTAv4f32;
+      break;
+    case 64:
+      if (NumElts == 2)
+        Opc = AArch64::FRINTAv2f64;
+      break;
+    }
+  }
+
+  if (!Opc) {
+    // Didn't get an opcode above, bail.
+    LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
+    return false;
+  }
+
+  // Legalization would have set us up perfectly for this; we just need to
+  // set the opcode and move on.
+  I.setDesc(TII.get(Opc));
+  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
+
+bool AArch64InstructionSelector::selectVectorICmp(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  Register DstReg = I.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  Register SrcReg = I.getOperand(2).getReg();
+  Register Src2Reg = I.getOperand(3).getReg();
+  LLT SrcTy = MRI.getType(SrcReg);
+
+  unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
+  unsigned NumElts = DstTy.getNumElements();
+
+  // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
+  // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
+  // Third index is cc opcode:
+  // 0 == eq
+  // 1 == ugt
+  // 2 == uge
+  // 3 == ult
+  // 4 == ule
+  // 5 == sgt
+  // 6 == sge
+  // 7 == slt
+  // 8 == sle
+  // ne is done by negating 'eq' result.
+
+  // This table below assumes that for some comparisons the operands will be
+  // commuted.
+  // ult op == commute + ugt op
+  // ule op == commute + uge op
+  // slt op == commute + sgt op
+  // sle op == commute + sge op
+  unsigned PredIdx = 0;
+  bool SwapOperands = false;
+  CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
+  switch (Pred) {
+  case CmpInst::ICMP_NE:
+  case CmpInst::ICMP_EQ:
+    PredIdx = 0;
+    break;
+  case CmpInst::ICMP_UGT:
+    PredIdx = 1;
+    break;
+  case CmpInst::ICMP_UGE:
+    PredIdx = 2;
+    break;
+  case CmpInst::ICMP_ULT:
+    PredIdx = 3;
+    SwapOperands = true;
+    break;
+  case CmpInst::ICMP_ULE:
+    PredIdx = 4;
+    SwapOperands = true;
+    break;
+  case CmpInst::ICMP_SGT:
+    PredIdx = 5;
+    break;
+  case CmpInst::ICMP_SGE:
+    PredIdx = 6;
+    break;
+  case CmpInst::ICMP_SLT:
+    PredIdx = 7;
+    SwapOperands = true;
+    break;
+  case CmpInst::ICMP_SLE:
+    PredIdx = 8;
+    SwapOperands = true;
+    break;
+  default:
+    llvm_unreachable("Unhandled icmp predicate");
+    return false;
+  }
+
+  // This table obviously should be tablegen'd when we have our GISel native
+  // tablegen selector.
+
+  static const unsigned OpcTable[4][4][9] = {
+      {
+          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+           0 /* invalid */},
+          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+           0 /* invalid */},
+          {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
+           AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
+           AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
+          {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
+           AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
+           AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
+      },
+      {
+          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+           0 /* invalid */},
+          {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
+           AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
+           AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
+          {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
+           AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
+           AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
+          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+           0 /* invalid */}
+      },
+      {
+          {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
+           AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
+           AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
+          {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
+           AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
+           AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
+          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+           0 /* invalid */},
+          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+           0 /* invalid */}
+      },
+      {
+          {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
+           AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
+           AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
+          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+           0 /* invalid */},
+          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+           0 /* invalid */},
+          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+           0 /* invalid */}
+      },
+  };
+  unsigned EltIdx = Log2_32(SrcEltSize / 8);
+  unsigned NumEltsIdx = Log2_32(NumElts / 2);
+  unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
+  if (!Opc) {
+    LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
+    return false;
+  }
+
+  const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
+  const TargetRegisterClass *SrcRC =
+      getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
+  if (!SrcRC) {
+    LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
+    return false;
+  }
+
+  unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
+  if (SrcTy.getSizeInBits() == 128)
+    NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
+
+  if (SwapOperands)
+    std::swap(SrcReg, Src2Reg);
+
+  MachineIRBuilder MIB(I);
+  auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
+  constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
+
+  // Invert if we had a 'ne' cc.
+  if (NotOpc) {
+    Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
+    constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
+  } else {
+    MIB.buildCopy(DstReg, Cmp.getReg(0));
+  }
+  RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
+  I.eraseFromParent();
+  return true;
+}
 
-  unsigned UndefVec = MRI.createVirtualRegister(DstRC);
-  MachineInstr &UndefMI = *BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
-                                   TII.get(TargetOpcode::IMPLICIT_DEF))
-                               .addDef(UndefVec);
+MachineInstr *AArch64InstructionSelector::emitScalarToVector(
+    unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
+    MachineIRBuilder &MIRBuilder) const {
+  auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
 
   auto BuildFn = [&](unsigned SubregIndex) {
-    MachineInstr &InsMI = *BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
-                                   TII.get(TargetOpcode::INSERT_SUBREG))
-                               .addDef(Dst)
-                               .addUse(UndefVec)
-                               .addUse(Scalar)
-                               .addImm(SubregIndex);
-    constrainSelectedInstRegOperands(UndefMI, TII, TRI, RBI);
-    return constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
+    auto Ins =
+        MIRBuilder
+            .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
+            .addImm(SubregIndex);
+    constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
+    constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
+    return &*Ins;
   };
 
-  switch (DstTy.getElementType().getSizeInBits()) {
+  switch (EltSize) {
+  case 16:
+    return BuildFn(AArch64::hsub);
   case 32:
     return BuildFn(AArch64::ssub);
   case 64:
     return BuildFn(AArch64::dsub);
   default:
-    return false;
+    return nullptr;
   }
 }
 
@@ -1610,14 +2489,14 @@ bool AArch64InstructionSelector::selectMergeValues(
     return false;
 
   auto *DstRC = &AArch64::GPR64RegClass;
-  unsigned SubToRegDef = MRI.createVirtualRegister(DstRC);
+  Register SubToRegDef = MRI.createVirtualRegister(DstRC);
   MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
                                     TII.get(TargetOpcode::SUBREG_TO_REG))
                                 .addDef(SubToRegDef)
                                 .addImm(0)
                                 .addUse(I.getOperand(1).getReg())
                                 .addImm(AArch64::sub_32);
-  unsigned SubToRegDef2 = MRI.createVirtualRegister(DstRC);
+  Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
   // Need to anyext the second scalar before we can use bfm
   MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
                                     TII.get(TargetOpcode::SUBREG_TO_REG))
@@ -1639,122 +2518,1362 @@ bool AArch64InstructionSelector::selectMergeValues(
   return true;
 }
 
-bool AArch64InstructionSelector::selectBuildVector(
+static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
+                              const unsigned EltSize) {
+  // Choose a lane copy opcode and subregister based off of the size of the
+  // vector's elements.
+  switch (EltSize) {
+  case 16:
+    CopyOpc = AArch64::CPYi16;
+    ExtractSubReg = AArch64::hsub;
+    break;
+  case 32:
+    CopyOpc = AArch64::CPYi32;
+    ExtractSubReg = AArch64::ssub;
+    break;
+  case 64:
+    CopyOpc = AArch64::CPYi64;
+    ExtractSubReg = AArch64::dsub;
+    break;
+  default:
+    // Unknown size, bail out.
+    LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
+    return false;
+  }
+  return true;
+}
+
+MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
+    Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
+    Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
+  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+  unsigned CopyOpc = 0;
+  unsigned ExtractSubReg = 0;
+  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
+    LLVM_DEBUG(
+        dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
+    return nullptr;
+  }
+
+  const TargetRegisterClass *DstRC =
+      getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
+  if (!DstRC) {
+    LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
+    return nullptr;
+  }
+
+  const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
+  const LLT &VecTy = MRI.getType(VecReg);
+  const TargetRegisterClass *VecRC =
+      getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
+  if (!VecRC) {
+    LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
+    return nullptr;
+  }
+
+  // The register that we're going to copy into.
+  Register InsertReg = VecReg;
+  if (!DstReg)
+    DstReg = MRI.createVirtualRegister(DstRC);
+  // If the lane index is 0, we just use a subregister COPY.
+  if (LaneIdx == 0) {
+    auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
+                    .addReg(VecReg, 0, ExtractSubReg);
+    RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
+    return &*Copy;
+  }
+
+  // Lane copies require 128-bit wide registers. If we're dealing with an
+  // unpacked vector, then we need to move up to that width. Insert an implicit
+  // def and a subregister insert to get us there.
+  if (VecTy.getSizeInBits() != 128) {
+    MachineInstr *ScalarToVector = emitScalarToVector(
+        VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
+    if (!ScalarToVector)
+      return nullptr;
+    InsertReg = ScalarToVector->getOperand(0).getReg();
+  }
+
+  MachineInstr *LaneCopyMI =
+      MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
+  constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
+
+  // Make sure that we actually constrain the initial copy.
+  RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
+  return LaneCopyMI;
+}
+
+bool AArch64InstructionSelector::selectExtractElt(
     MachineInstr &I, MachineRegisterInfo &MRI) const {
-  assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
-  // Until we port more of the optimized selections, for now just use a vector
-  // insert sequence.
-  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
-  const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
-  unsigned EltSize = EltTy.getSizeInBits();
-  if (EltSize < 32 || EltSize > 64)
-    return false; // Don't support all element types yet.
-  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
-  unsigned Opc;
-  unsigned SubregIdx;
+  assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
+         "unexpected opcode!");
+  Register DstReg = I.getOperand(0).getReg();
+  const LLT NarrowTy = MRI.getType(DstReg);
+  const Register SrcReg = I.getOperand(1).getReg();
+  const LLT WideTy = MRI.getType(SrcReg);
+  (void)WideTy;
+  assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
+         "source register size too small!");
+  assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
+
+  // Need the lane index to determine the correct copy opcode.
+  MachineOperand &LaneIdxOp = I.getOperand(2);
+  assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
+
+  if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
+    LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
+    return false;
+  }
+
+  // Find the index to extract from.
+  auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
+  if (!VRegAndVal)
+    return false;
+  unsigned LaneIdx = VRegAndVal->Value;
+
+  MachineIRBuilder MIRBuilder(I);
+
+  const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
+  MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
+                                               LaneIdx, MIRBuilder);
+  if (!Extract)
+    return false;
+
+  I.eraseFromParent();
+  return true;
+}
+
+bool AArch64InstructionSelector::selectSplitVectorUnmerge(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  unsigned NumElts = I.getNumOperands() - 1;
+  Register SrcReg = I.getOperand(NumElts).getReg();
+  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
+  const LLT SrcTy = MRI.getType(SrcReg);
+
+  assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
+  if (SrcTy.getSizeInBits() > 128) {
+    LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
+    return false;
+  }
+
+  MachineIRBuilder MIB(I);
+
+  // We implement a split vector operation by treating the sub-vectors as
+  // scalars and extracting them.
+  const RegisterBank &DstRB =
+      *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
+  for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
+    Register Dst = I.getOperand(OpIdx).getReg();
+    MachineInstr *Extract =
+        emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
+    if (!Extract)
+      return false;
+  }
+  I.eraseFromParent();
+  return true;
+}
+
+bool AArch64InstructionSelector::selectUnmergeValues(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+         "unexpected opcode");
+
+  // TODO: Handle unmerging into GPRs and from scalars to scalars.
+  if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
+          AArch64::FPRRegBankID ||
+      RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
+          AArch64::FPRRegBankID) {
+    LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
+                         "currently unsupported.\n");
+    return false;
+  }
+
+  // The last operand is the vector source register, and every other operand is
+  // a register to unpack into.
+  unsigned NumElts = I.getNumOperands() - 1;
+  Register SrcReg = I.getOperand(NumElts).getReg();
+  const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
+  const LLT WideTy = MRI.getType(SrcReg);
+  (void)WideTy;
+  assert(WideTy.isVector() && "can only unmerge from vector types!");
+  assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
+         "source register size too small!");
+
+  if (!NarrowTy.isScalar())
+    return selectSplitVectorUnmerge(I, MRI);
+
+  MachineIRBuilder MIB(I);
+
+  // Choose a lane copy opcode and subregister based off of the size of the
+  // vector's elements.
+  unsigned CopyOpc = 0;
+  unsigned ExtractSubReg = 0;
+  if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
+    return false;
+
+  // Set up for the lane copies.
+  MachineBasicBlock &MBB = *I.getParent();
+
+  // Stores the registers we'll be copying from.
+  SmallVector<Register, 4> InsertRegs;
+
+  // We'll use the first register twice, so we only need NumElts-1 registers.
+  unsigned NumInsertRegs = NumElts - 1;
+
+  // If our elements fit into exactly 128 bits, then we can copy from the source
+  // directly. Otherwise, we need to do a bit of setup with some subregister
+  // inserts.
+  if (NarrowTy.getSizeInBits() * NumElts == 128) {
+    InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
+  } else {
+    // No. We have to perform subregister inserts. For each insert, create an
+    // implicit def and a subregister insert, and save the register we create.
+    for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
+      Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
+      MachineInstr &ImpDefMI =
+          *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
+                   ImpDefReg);
+
+      // Now, create the subregister insert from SrcReg.
+      Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
+      MachineInstr &InsMI =
+          *BuildMI(MBB, I, I.getDebugLoc(),
+                   TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
+               .addUse(ImpDefReg)
+               .addUse(SrcReg)
+               .addImm(AArch64::dsub);
+
+      constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
+      constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
+
+      // Save the register so that we can copy from it after.
+      InsertRegs.push_back(InsertReg);
+    }
+  }
+
+  // Now that we've created any necessary subregister inserts, we can
+  // create the copies.
+  //
+  // Perform the first copy separately as a subregister copy.
+  Register CopyTo = I.getOperand(0).getReg();
+  auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
+                       .addReg(InsertRegs[0], 0, ExtractSubReg);
+  constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
+
+  // Now, perform the remaining copies as vector lane copies.
+  unsigned LaneIdx = 1;
+  for (Register InsReg : InsertRegs) {
+    Register CopyTo = I.getOperand(LaneIdx).getReg();
+    MachineInstr &CopyInst =
+        *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
+             .addUse(InsReg)
+             .addImm(LaneIdx);
+    constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
+    ++LaneIdx;
+  }
+
+  // Separately constrain the first copy's destination. Because of the
+  // limitation in constrainOperandRegClass, we can't guarantee that this will
+  // actually be constrained. So, do it ourselves using the second operand.
+  const TargetRegisterClass *RC =
+      MRI.getRegClassOrNull(I.getOperand(1).getReg());
+  if (!RC) {
+    LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
+    return false;
+  }
+
+  RBI.constrainGenericRegister(CopyTo, *RC, MRI);
+  I.eraseFromParent();
+  return true;
+}
+
+bool AArch64InstructionSelector::selectConcatVectors(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
+         "Unexpected opcode");
+  Register Dst = I.getOperand(0).getReg();
+  Register Op1 = I.getOperand(1).getReg();
+  Register Op2 = I.getOperand(2).getReg();
+  MachineIRBuilder MIRBuilder(I);
+  MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
+  if (!ConcatMI)
+    return false;
+  I.eraseFromParent();
+  return true;
+}
+
+void AArch64InstructionSelector::collectShuffleMaskIndices(
+    MachineInstr &I, MachineRegisterInfo &MRI,
+    SmallVectorImpl<Optional<int>> &Idxs) const {
+  MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg());
+  assert(
+      MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
+      "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR");
+  // Find the constant indices.
+  for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) {
+    // Look through copies.
+    MachineInstr *ScalarDef =
+        getDefIgnoringCopies(MaskDef->getOperand(i).getReg(), MRI);
+    assert(ScalarDef && "Could not find vreg def of shufflevec index op");
+    if (ScalarDef->getOpcode() != TargetOpcode::G_CONSTANT) {
+      // This be an undef if not a constant.
+      assert(ScalarDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF);
+      Idxs.push_back(None);
+    } else {
+      Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
+    }
+  }
+}
+
+unsigned
+AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
+                                                  MachineFunction &MF) const {
+  Type *CPTy = CPVal->getType();
+  unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
+  if (Align == 0)
+    Align = MF.getDataLayout().getTypeAllocSize(CPTy);
+
+  MachineConstantPool *MCP = MF.getConstantPool();
+  return MCP->getConstantPoolIndex(CPVal, Align);
+}
+
+MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
+    Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
+  unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
+
+  auto Adrp =
+      MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
+          .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
+
+  MachineInstr *LoadMI = nullptr;
+  switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
+  case 16:
+    LoadMI =
+        &*MIRBuilder
+              .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
+              .addConstantPoolIndex(CPIdx, 0,
+                                    AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+    break;
+  case 8:
+    LoadMI = &*MIRBuilder
+                 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
+                 .addConstantPoolIndex(
+                     CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+    break;
+  default:
+    LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
+                      << *CPVal->getType());
+    return nullptr;
+  }
+  constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
+  constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
+  return LoadMI;
+}
+
+/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
+/// size and RB.
+static std::pair<unsigned, unsigned>
+getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
+  unsigned Opc, SubregIdx;
   if (RB.getID() == AArch64::GPRRegBankID) {
     if (EltSize == 32) {
       Opc = AArch64::INSvi32gpr;
       SubregIdx = AArch64::ssub;
-    } else {
+    } else if (EltSize == 64) {
       Opc = AArch64::INSvi64gpr;
       SubregIdx = AArch64::dsub;
+    } else {
+      llvm_unreachable("invalid elt size!");
     }
   } else {
-    if (EltSize == 32) {
+    if (EltSize == 8) {
+      Opc = AArch64::INSvi8lane;
+      SubregIdx = AArch64::bsub;
+    } else if (EltSize == 16) {
+      Opc = AArch64::INSvi16lane;
+      SubregIdx = AArch64::hsub;
+    } else if (EltSize == 32) {
       Opc = AArch64::INSvi32lane;
       SubregIdx = AArch64::ssub;
-    } else {
+    } else if (EltSize == 64) {
       Opc = AArch64::INSvi64lane;
       SubregIdx = AArch64::dsub;
-    }
-  }
-
-  if (EltSize * DstTy.getNumElements() != 128)
-    return false; // Don't handle unpacked vectors yet.
-
-  unsigned DstVec = 0;
-  const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(
-      DstTy, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
-  emitScalarToVector(DstVec, DstTy, DstRC, I.getOperand(1).getReg(),
-                     *I.getParent(), I.getIterator(), MRI);
-  for (unsigned i = 2, e = DstTy.getSizeInBits() / EltSize + 1; i < e; ++i) {
-    unsigned InsDef;
-    // For the last insert re-use the dst reg of the G_BUILD_VECTOR.
-    if (i + 1 < e)
-      InsDef = MRI.createVirtualRegister(DstRC);
-    else
-      InsDef = I.getOperand(0).getReg();
-    unsigned LaneIdx = i - 1;
-    if (RB.getID() == AArch64::FPRRegBankID) {
-      unsigned ImpDef = MRI.createVirtualRegister(DstRC);
-      MachineInstr &ImpDefMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
-                                        TII.get(TargetOpcode::IMPLICIT_DEF))
-                                    .addDef(ImpDef);
-      unsigned InsSubDef = MRI.createVirtualRegister(DstRC);
-      MachineInstr &InsSubMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
-                                        TII.get(TargetOpcode::INSERT_SUBREG))
-                                    .addDef(InsSubDef)
-                                    .addUse(ImpDef)
-                                    .addUse(I.getOperand(i).getReg())
-                                    .addImm(SubregIdx);
-      MachineInstr &InsEltMI =
-          *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opc))
-               .addDef(InsDef)
-               .addUse(DstVec)
-               .addImm(LaneIdx)
-               .addUse(InsSubDef)
-               .addImm(0);
-      constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
-      constrainSelectedInstRegOperands(InsSubMI, TII, TRI, RBI);
-      constrainSelectedInstRegOperands(InsEltMI, TII, TRI, RBI);
-      DstVec = InsDef;
     } else {
-      MachineInstr &InsMI =
-          *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opc))
-               .addDef(InsDef)
-               .addUse(DstVec)
-               .addImm(LaneIdx)
-               .addUse(I.getOperand(i).getReg());
-      constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
-      DstVec = InsDef;
+      llvm_unreachable("invalid elt size!");
     }
   }
-  I.eraseFromParent();
-  return true;
+  return std::make_pair(Opc, SubregIdx);
 }
 
-/// SelectArithImmed - Select an immediate value that can be represented as
-/// a 12-bit value shifted left by either 0 or 12.  If so, return true with
-/// Val set to the 12-bit value and Shift set to the shifter operand.
-InstructionSelector::ComplexRendererFns
-AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
-  MachineInstr &MI = *Root.getParent();
-  MachineBasicBlock &MBB = *MI.getParent();
+MachineInstr *
+AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
+                                    MachineIRBuilder &MIRBuilder) const {
+  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
+  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
+  static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
+                                       {AArch64::ADDSWrr, AArch64::ADDSWri}};
+  bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
+  auto ImmFns = selectArithImmed(RHS);
+  unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
+  Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
+
+  auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS.getReg()});
+
+  // If we matched a valid constant immediate, add those operands.
+  if (ImmFns) {
+    for (auto &RenderFn : *ImmFns)
+      RenderFn(CmpMI);
+  } else {
+    CmpMI.addUse(RHS.getReg());
+  }
+
+  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
+  return &*CmpMI;
+}
+
+MachineInstr *
+AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
+                                    MachineIRBuilder &MIRBuilder) const {
+  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
+  unsigned RegSize = MRI.getType(LHS).getSizeInBits();
+  bool Is32Bit = (RegSize == 32);
+  static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
+                                       {AArch64::ANDSWrr, AArch64::ANDSWri}};
+  Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
+
+  // We might be able to fold in an immediate into the TST. We need to make sure
+  // it's a logical immediate though, since ANDS requires that.
+  auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
+  bool IsImmForm = ValAndVReg.hasValue() &&
+                   AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
+  unsigned Opc = OpcTable[Is32Bit][IsImmForm];
+  auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
+
+  if (IsImmForm)
+    TstMI.addImm(
+        AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
+  else
+    TstMI.addUse(RHS);
+
+  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
+  return &*TstMI;
+}
+
+MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
+    MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
+    MachineIRBuilder &MIRBuilder) const {
+  assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
+  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
+
+  // Fold the compare if possible.
+  MachineInstr *FoldCmp =
+      tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
+  if (FoldCmp)
+    return FoldCmp;
+
+  // Can't fold into a CMN. Just emit a normal compare.
+  unsigned CmpOpc = 0;
+  Register ZReg;
+
+  LLT CmpTy = MRI.getType(LHS.getReg());
+  assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
+         "Expected scalar or pointer");
+  if (CmpTy == LLT::scalar(32)) {
+    CmpOpc = AArch64::SUBSWrr;
+    ZReg = AArch64::WZR;
+  } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
+    CmpOpc = AArch64::SUBSXrr;
+    ZReg = AArch64::XZR;
+  } else {
+    return nullptr;
+  }
+
+  // Try to match immediate forms.
+  auto ImmFns = selectArithImmed(RHS);
+  if (ImmFns)
+    CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri;
+
+  auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg());
+  // If we matched a valid constant immediate, add those operands.
+  if (ImmFns) {
+    for (auto &RenderFn : *ImmFns)
+      RenderFn(CmpMI);
+  } else {
+    CmpMI.addUse(RHS.getReg());
+  }
+
+  // Make sure that we can constrain the compare that we emitted.
+  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
+  return &*CmpMI;
+}
+
+MachineInstr *AArch64InstructionSelector::emitVectorConcat(
+    Optional<Register> Dst, Register Op1, Register Op2,
+    MachineIRBuilder &MIRBuilder) const {
+  // We implement a vector concat by:
+  // 1. Use scalar_to_vector to insert the lower vector into the larger dest
+  // 2. Insert the upper vector into the destination's upper element
+  // TODO: some of this code is common with G_BUILD_VECTOR handling.
+  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
+
+  const LLT Op1Ty = MRI.getType(Op1);
+  const LLT Op2Ty = MRI.getType(Op2);
+
+  if (Op1Ty != Op2Ty) {
+    LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
+    return nullptr;
+  }
+  assert(Op1Ty.isVector() && "Expected a vector for vector concat");
+
+  if (Op1Ty.getSizeInBits() >= 128) {
+    LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
+    return nullptr;
+  }
+
+  // At the moment we just support 64 bit vector concats.
+  if (Op1Ty.getSizeInBits() != 64) {
+    LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
+    return nullptr;
+  }
+
+  const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
+  const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
+  const TargetRegisterClass *DstRC =
+      getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
+
+  MachineInstr *WidenedOp1 =
+      emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
+  MachineInstr *WidenedOp2 =
+      emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
+  if (!WidenedOp1 || !WidenedOp2) {
+    LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
+    return nullptr;
+  }
+
+  // Now do the insert of the upper element.
+  unsigned InsertOpc, InsSubRegIdx;
+  std::tie(InsertOpc, InsSubRegIdx) =
+      getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
+
+  if (!Dst)
+    Dst = MRI.createVirtualRegister(DstRC);
+  auto InsElt =
+      MIRBuilder
+          .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
+          .addImm(1) /* Lane index */
+          .addUse(WidenedOp2->getOperand(0).getReg())
+          .addImm(0);
+  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
+  return &*InsElt;
+}
+
+MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
+         "Expected a G_FCONSTANT!");
+  MachineOperand &ImmOp = I.getOperand(1);
+  unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
+
+  // Only handle 32 and 64 bit defs for now.
+  if (DefSize != 32 && DefSize != 64)
+    return nullptr;
+
+  // Don't handle null values using FMOV.
+  if (ImmOp.getFPImm()->isNullValue())
+    return nullptr;
+
+  // Get the immediate representation for the FMOV.
+  const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
+  int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
+                          : AArch64_AM::getFP64Imm(ImmValAPF);
+
+  // If this is -1, it means the immediate can't be represented as the requested
+  // floating point value. Bail.
+  if (Imm == -1)
+    return nullptr;
+
+  // Update MI to represent the new FMOV instruction, constrain it, and return.
+  ImmOp.ChangeToImmediate(Imm);
+  unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
+  I.setDesc(TII.get(MovOpc));
+  constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+  return &I;
+}
+
+MachineInstr *
+AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
+                                     MachineIRBuilder &MIRBuilder) const {
+  // CSINC increments the result when the predicate is false. Invert it.
+  const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
+      CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
+  auto I =
+      MIRBuilder
+    .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
+          .addImm(InvCC);
+  constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
+  return &*I;
+}
+
+bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
+  MachineIRBuilder MIB(I);
+  MachineRegisterInfo &MRI = *MIB.getMRI();
+  const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+
+  // We want to recognize this pattern:
+  //
+  // $z = G_FCMP pred, $x, $y
+  // ...
+  // $w = G_SELECT $z, $a, $b
+  //
+  // Where the value of $z is *only* ever used by the G_SELECT (possibly with
+  // some copies/truncs in between.)
+  //
+  // If we see this, then we can emit something like this:
+  //
+  // fcmp $x, $y
+  // fcsel $w, $a, $b, pred
+  //
+  // Rather than emitting both of the rather long sequences in the standard
+  // G_FCMP/G_SELECT select methods.
+
+  // First, check if the condition is defined by a compare.
+  MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
+  while (CondDef) {
+    // We can only fold if all of the defs have one use.
+    if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
+      return false;
+
+    // We can skip over G_TRUNC since the condition is 1-bit.
+    // Truncating/extending can have no impact on the value.
+    unsigned Opc = CondDef->getOpcode();
+    if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
+      break;
+
+    // Can't see past copies from physregs.
+    if (Opc == TargetOpcode::COPY &&
+        TargetRegisterInfo::isPhysicalRegister(CondDef->getOperand(1).getReg()))
+      return false;
+
+    CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
+  }
+
+  // Is the condition defined by a compare?
+  if (!CondDef)
+    return false;
+
+  unsigned CondOpc = CondDef->getOpcode();
+  if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
+    return false;
+
+  AArch64CC::CondCode CondCode;
+  if (CondOpc == TargetOpcode::G_ICMP) {
+    CondCode = changeICMPPredToAArch64CC(
+        (CmpInst::Predicate)CondDef->getOperand(1).getPredicate());
+    if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
+                            CondDef->getOperand(1), MIB)) {
+      LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
+      return false;
+    }
+  } else {
+    // Get the condition code for the select.
+    AArch64CC::CondCode CondCode2;
+    changeFCMPPredToAArch64CC(
+        (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
+        CondCode2);
+
+    // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
+    // instructions to emit the comparison.
+    // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
+    // unnecessary.
+    if (CondCode2 != AArch64CC::AL)
+      return false;
+
+    // Make sure we'll be able to select the compare.
+    unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
+    if (!CmpOpc)
+      return false;
+
+    // Emit a new compare.
+    auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
+    if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
+      Cmp.addUse(CondDef->getOperand(3).getReg());
+    constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
+  }
+
+  // Emit the select.
+  unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
+  auto CSel =
+      MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
+                     {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
+          .addImm(CondCode);
+  constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
+  I.eraseFromParent();
+  return true;
+}
+
+MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
+    MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
+    MachineIRBuilder &MIRBuilder) const {
+  assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
+         "Unexpected MachineOperand");
+  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+  // We want to find this sort of thing:
+  // x = G_SUB 0, y
+  // G_ICMP z, x
+  //
+  // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
+  // e.g:
+  //
+  // cmn z, y
+
+  // Helper lambda to detect the subtract followed by the compare.
+  // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
+  auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
+    if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
+      return false;
+
+    // Need to make sure NZCV is the same at the end of the transformation.
+    if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
+      return false;
+
+    // We want to match against SUBs.
+    if (DefMI->getOpcode() != TargetOpcode::G_SUB)
+      return false;
+
+    // Make sure that we're getting
+    // x = G_SUB 0, y
+    auto ValAndVReg =
+        getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
+    if (!ValAndVReg || ValAndVReg->Value != 0)
+      return false;
+
+    // This can safely be represented as a CMN.
+    return true;
+  };
+
+  // Check if the RHS or LHS of the G_ICMP is defined by a SUB
+  MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
+  MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
+  CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
+  const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
+
+  // Given this:
+  //
+  // x = G_SUB 0, y
+  // G_ICMP x, z
+  //
+  // Produce this:
+  //
+  // cmn y, z
+  if (IsCMN(LHSDef, CC))
+    return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
+
+  // Same idea here, but with the RHS of the compare instead:
+  //
+  // Given this:
+  //
+  // x = G_SUB 0, y
+  // G_ICMP z, x
+  //
+  // Produce this:
+  //
+  // cmn z, y
+  if (IsCMN(RHSDef, CC))
+    return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
+
+  // Given this:
+  //
+  // z = G_AND x, y
+  // G_ICMP z, 0
+  //
+  // Produce this if the compare is signed:
+  //
+  // tst x, y
+  if (!isUnsignedICMPPred(P) && LHSDef &&
+      LHSDef->getOpcode() == TargetOpcode::G_AND) {
+    // Make sure that the RHS is 0.
+    auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
+    if (!ValAndVReg || ValAndVReg->Value != 0)
+      return nullptr;
+
+    return emitTST(LHSDef->getOperand(1).getReg(),
+                   LHSDef->getOperand(2).getReg(), MIRBuilder);
+  }
+
+  return nullptr;
+}
+
+bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
+  // Try to match a vector splat operation into a dup instruction.
+  // We're looking for this pattern:
+  //    %scalar:gpr(s64) = COPY $x0
+  //    %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
+  //    %cst0:gpr(s32) = G_CONSTANT i32 0
+  //    %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
+  //    %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
+  //    %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
+  //                                             %zerovec(<2 x s32>)
+  //
+  // ...into:
+  // %splat = DUP %scalar
+  // We use the regbank of the scalar to determine which kind of dup to use.
+  MachineIRBuilder MIB(I);
+  MachineRegisterInfo &MRI = *MIB.getMRI();
+  const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+  using namespace TargetOpcode;
+  using namespace MIPatternMatch;
+
+  // Begin matching the insert.
+  auto *InsMI =
+      getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI);
+  if (!InsMI)
+    return false;
+  // Match the undef vector operand.
+  auto *UndefMI =
+      getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI);
+  if (!UndefMI)
+    return false;
+  // Match the scalar being splatted.
+  Register ScalarReg = InsMI->getOperand(2).getReg();
+  const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
+  // Match the index constant 0.
+  int64_t Index = 0;
+  if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
+    return false;
+
+  // The shuffle's second operand doesn't matter if the mask is all zero.
+  auto *ZeroVec = getOpcodeDef(G_BUILD_VECTOR, I.getOperand(3).getReg(), MRI);
+  if (!ZeroVec)
+    return false;
+  int64_t Zero = 0;
+  if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero)
+    return false;
+  for (unsigned i = 1, e = ZeroVec->getNumOperands() - 1; i < e; ++i) {
+    if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg())
+      return false; // This wasn't an all zeros vector.
+  }
+
+  // We're done, now find out what kind of splat we need.
+  LLT VecTy = MRI.getType(I.getOperand(0).getReg());
+  LLT EltTy = VecTy.getElementType();
+  if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
+    LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
+    return false;
+  }
+  bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
+  static const unsigned OpcTable[2][2] = {
+      {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
+      {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
+  unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
+
+  // For FP splats, we need to widen the scalar reg via undef too.
+  if (IsFP) {
+    MachineInstr *Widen = emitScalarToVector(
+        EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
+    if (!Widen)
+      return false;
+    ScalarReg = Widen->getOperand(0).getReg();
+  }
+  auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
+  if (IsFP)
+    Dup.addImm(0);
+  constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
+  I.eraseFromParent();
+  return true;
+}
+
+bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
+  if (TM.getOptLevel() == CodeGenOpt::None)
+    return false;
+  if (tryOptVectorDup(I))
+    return true;
+  return false;
+}
+
+bool AArch64InstructionSelector::selectShuffleVector(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  if (tryOptVectorShuffle(I))
+    return true;
+  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+  Register Src1Reg = I.getOperand(1).getReg();
+  const LLT Src1Ty = MRI.getType(Src1Reg);
+  Register Src2Reg = I.getOperand(2).getReg();
+  const LLT Src2Ty = MRI.getType(Src2Reg);
+
+  MachineBasicBlock &MBB = *I.getParent();
   MachineFunction &MF = *MBB.getParent();
-  MachineRegisterInfo &MRI = MF.getRegInfo();
+  LLVMContext &Ctx = MF.getFunction().getContext();
+
+  // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
+  // operand, it comes in as a normal vector value which we have to analyze to
+  // find the mask indices. If the mask element is undef, then
+  // collectShuffleMaskIndices() will add a None entry for that index into
+  // the list.
+  SmallVector<Optional<int>, 8> Mask;
+  collectShuffleMaskIndices(I, MRI, Mask);
+  assert(!Mask.empty() && "Expected to find mask indices");
+
+  // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
+  // it's originated from a <1 x T> type. Those should have been lowered into
+  // G_BUILD_VECTOR earlier.
+  if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
+    LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
+    return false;
+  }
 
-  // This function is called from the addsub_shifted_imm ComplexPattern,
-  // which lists [imm] as the list of opcode it's interested in, however
-  // we still need to check whether the operand is actually an immediate
-  // here because the ComplexPattern opcode list is only used in
-  // root-level opcode matching.
+  unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
+
+  SmallVector<Constant *, 64> CstIdxs;
+  for (auto &MaybeVal : Mask) {
+    // For now, any undef indexes we'll just assume to be 0. This should be
+    // optimized in future, e.g. to select DUP etc.
+    int Val = MaybeVal.hasValue() ? *MaybeVal : 0;
+    for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
+      unsigned Offset = Byte + Val * BytesPerElt;
+      CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
+    }
+  }
+
+  MachineIRBuilder MIRBuilder(I);
+
+  // Use a constant pool to load the index vector for TBL.
+  Constant *CPVal = ConstantVector::get(CstIdxs);
+  MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
+  if (!IndexLoad) {
+    LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
+    return false;
+  }
+
+  if (DstTy.getSizeInBits() != 128) {
+    assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
+    // This case can be done with TBL1.
+    MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
+    if (!Concat) {
+      LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
+      return false;
+    }
+
+    // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
+    IndexLoad =
+        emitScalarToVector(64, &AArch64::FPR128RegClass,
+                           IndexLoad->getOperand(0).getReg(), MIRBuilder);
+
+    auto TBL1 = MIRBuilder.buildInstr(
+        AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
+        {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
+    constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
+
+    auto Copy =
+        MIRBuilder
+            .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
+            .addReg(TBL1.getReg(0), 0, AArch64::dsub);
+    RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
+    I.eraseFromParent();
+    return true;
+  }
+
+  // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
+  // Q registers for regalloc.
+  auto RegSeq = MIRBuilder
+                    .buildInstr(TargetOpcode::REG_SEQUENCE,
+                                {&AArch64::QQRegClass}, {Src1Reg})
+                    .addImm(AArch64::qsub0)
+                    .addUse(Src2Reg)
+                    .addImm(AArch64::qsub1);
+
+  auto TBL2 =
+      MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
+                            {RegSeq, IndexLoad->getOperand(0).getReg()});
+  constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
+  constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
+  I.eraseFromParent();
+  return true;
+}
+
+MachineInstr *AArch64InstructionSelector::emitLaneInsert(
+    Optional<Register> DstReg, Register SrcReg, Register EltReg,
+    unsigned LaneIdx, const RegisterBank &RB,
+    MachineIRBuilder &MIRBuilder) const {
+  MachineInstr *InsElt = nullptr;
+  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
+  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+
+  // Create a register to define with the insert if one wasn't passed in.
+  if (!DstReg)
+    DstReg = MRI.createVirtualRegister(DstRC);
+
+  unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
+  unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
+
+  if (RB.getID() == AArch64::FPRRegBankID) {
+    auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
+    InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
+                 .addImm(LaneIdx)
+                 .addUse(InsSub->getOperand(0).getReg())
+                 .addImm(0);
+  } else {
+    InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
+                 .addImm(LaneIdx)
+                 .addUse(EltReg);
+  }
+
+  constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
+  return InsElt;
+}
+
+bool AArch64InstructionSelector::selectInsertElt(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
+
+  // Get information on the destination.
+  Register DstReg = I.getOperand(0).getReg();
+  const LLT DstTy = MRI.getType(DstReg);
+  unsigned VecSize = DstTy.getSizeInBits();
+
+  // Get information on the element we want to insert into the destination.
+  Register EltReg = I.getOperand(2).getReg();
+  const LLT EltTy = MRI.getType(EltReg);
+  unsigned EltSize = EltTy.getSizeInBits();
+  if (EltSize < 16 || EltSize > 64)
+    return false; // Don't support all element types yet.
+
+  // Find the definition of the index. Bail out if it's not defined by a
+  // G_CONSTANT.
+  Register IdxReg = I.getOperand(3).getReg();
+  auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
+  if (!VRegAndVal)
+    return false;
+  unsigned LaneIdx = VRegAndVal->Value;
+
+  // Perform the lane insert.
+  Register SrcReg = I.getOperand(1).getReg();
+  const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
+  MachineIRBuilder MIRBuilder(I);
+
+  if (VecSize < 128) {
+    // If the vector we're inserting into is smaller than 128 bits, widen it
+    // to 128 to do the insert.
+    MachineInstr *ScalarToVec = emitScalarToVector(
+        VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
+    if (!ScalarToVec)
+      return false;
+    SrcReg = ScalarToVec->getOperand(0).getReg();
+  }
+
+  // Create an insert into a new FPR128 register.
+  // Note that if our vector is already 128 bits, we end up emitting an extra
+  // register.
+  MachineInstr *InsMI =
+      emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
+
+  if (VecSize < 128) {
+    // If we had to widen to perform the insert, then we have to demote back to
+    // the original size to get the result we want.
+    Register DemoteVec = InsMI->getOperand(0).getReg();
+    const TargetRegisterClass *RC =
+        getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
+    if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
+      LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
+      return false;
+    }
+    unsigned SubReg = 0;
+    if (!getSubRegForClass(RC, TRI, SubReg))
+      return false;
+    if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
+      LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
+                        << "\n");
+      return false;
+    }
+    MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
+        .addReg(DemoteVec, 0, SubReg);
+    RBI.constrainGenericRegister(DstReg, *RC, MRI);
+  } else {
+    // No widening needed.
+    InsMI->getOperand(0).setReg(DstReg);
+    constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
+  }
+
+  I.eraseFromParent();
+  return true;
+}
+
+bool AArch64InstructionSelector::selectBuildVector(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+  // Until we port more of the optimized selections, for now just use a vector
+  // insert sequence.
+  const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+  const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
+  unsigned EltSize = EltTy.getSizeInBits();
+  if (EltSize < 16 || EltSize > 64)
+    return false; // Don't support all element types yet.
+  const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
+  MachineIRBuilder MIRBuilder(I);
+
+  const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
+  MachineInstr *ScalarToVec =
+      emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
+                         I.getOperand(1).getReg(), MIRBuilder);
+  if (!ScalarToVec)
+    return false;
+
+  Register DstVec = ScalarToVec->getOperand(0).getReg();
+  unsigned DstSize = DstTy.getSizeInBits();
+
+  // Keep track of the last MI we inserted. Later on, we might be able to save
+  // a copy using it.
+  MachineInstr *PrevMI = nullptr;
+  for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
+    // Note that if we don't do a subregister copy, we can end up making an
+    // extra register.
+    PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
+                              MIRBuilder);
+    DstVec = PrevMI->getOperand(0).getReg();
+  }
+
+  // If DstTy's size in bits is less than 128, then emit a subregister copy
+  // from DstVec to the last register we've defined.
+  if (DstSize < 128) {
+    // Force this to be FPR using the destination vector.
+    const TargetRegisterClass *RC =
+        getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
+    if (!RC)
+      return false;
+    if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
+      LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
+      return false;
+    }
+
+    unsigned SubReg = 0;
+    if (!getSubRegForClass(RC, TRI, SubReg))
+      return false;
+    if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
+      LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
+                        << "\n");
+      return false;
+    }
+
+    Register Reg = MRI.createVirtualRegister(RC);
+    Register DstReg = I.getOperand(0).getReg();
+
+    MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
+        .addReg(DstVec, 0, SubReg);
+    MachineOperand &RegOp = I.getOperand(1);
+    RegOp.setReg(Reg);
+    RBI.constrainGenericRegister(DstReg, *RC, MRI);
+  } else {
+    // We don't need a subregister copy. Save a copy by re-using the
+    // destination register on the final insert.
+    assert(PrevMI && "PrevMI was null?");
+    PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
+    constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
+  }
+
+  I.eraseFromParent();
+  return true;
+}
+
+/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
+/// ID if it exists, and 0 otherwise.
+static unsigned findIntrinsicID(MachineInstr &I) {
+  auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
+    return Op.isIntrinsicID();
+  });
+  if (IntrinOp == I.operands_end())
+    return 0;
+  return IntrinOp->getIntrinsicID();
+}
+
+/// Helper function to emit the correct opcode for a llvm.aarch64.stlxr
+/// intrinsic.
+static unsigned getStlxrOpcode(unsigned NumBytesToStore) {
+  switch (NumBytesToStore) {
+  // TODO: 1, 2, and 4 byte stores.
+  case 8:
+    return AArch64::STLXRX;
+  default:
+    LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! ("
+                      << NumBytesToStore << ")\n");
+    break;
+  }
+  return 0;
+}
+
+bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  // Find the intrinsic ID.
+  unsigned IntrinID = findIntrinsicID(I);
+  if (!IntrinID)
+    return false;
+  MachineIRBuilder MIRBuilder(I);
+
+  // Select the instruction.
+  switch (IntrinID) {
+  default:
+    return false;
+  case Intrinsic::trap:
+    MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
+    break;
+  case Intrinsic::debugtrap:
+    if (!STI.isTargetWindows())
+      return false;
+    MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
+    break;
+  case Intrinsic::aarch64_stlxr:
+    Register StatReg = I.getOperand(0).getReg();
+    assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 &&
+           "Status register must be 32 bits!");
+    Register SrcReg = I.getOperand(2).getReg();
+
+    if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) {
+      LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n");
+      return false;
+    }
+
+    Register PtrReg = I.getOperand(3).getReg();
+    assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand");
+
+    // Expect only one memory operand.
+    if (!I.hasOneMemOperand())
+      return false;
+
+    const MachineMemOperand *MemOp = *I.memoperands_begin();
+    unsigned NumBytesToStore = MemOp->getSize();
+    unsigned Opc = getStlxrOpcode(NumBytesToStore);
+    if (!Opc)
+      return false;
+
+    auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg});
+    constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI);
+  }
+
+  I.eraseFromParent();
+  return true;
+}
+
+bool AArch64InstructionSelector::selectIntrinsic(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  unsigned IntrinID = findIntrinsicID(I);
+  if (!IntrinID)
+    return false;
+  MachineIRBuilder MIRBuilder(I);
+
+  switch (IntrinID) {
+  default:
+    break;
+  case Intrinsic::aarch64_crypto_sha1h:
+    Register DstReg = I.getOperand(0).getReg();
+    Register SrcReg = I.getOperand(2).getReg();
+
+    // FIXME: Should this be an assert?
+    if (MRI.getType(DstReg).getSizeInBits() != 32 ||
+        MRI.getType(SrcReg).getSizeInBits() != 32)
+      return false;
+
+    // The operation has to happen on FPRs. Set up some new FPR registers for
+    // the source and destination if they are on GPRs.
+    if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
+      SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
+      MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
+
+      // Make sure the copy ends up getting constrained properly.
+      RBI.constrainGenericRegister(I.getOperand(2).getReg(),
+                                   AArch64::GPR32RegClass, MRI);
+    }
+
+    if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
+      DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
+
+    // Actually insert the instruction.
+    auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
+    constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
+
+    // Did we create a new register for the destination?
+    if (DstReg != I.getOperand(0).getReg()) {
+      // Yep. Copy the result of the instruction back into the original
+      // destination.
+      MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
+      RBI.constrainGenericRegister(I.getOperand(0).getReg(),
+                                   AArch64::GPR32RegClass, MRI);
+    }
+
+    I.eraseFromParent();
+    return true;
+  }
+  return false;
+}
+
+static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
+  auto &MI = *Root.getParent();
+  auto &MBB = *MI.getParent();
+  auto &MF = *MBB.getParent();
+  auto &MRI = MF.getRegInfo();
   uint64_t Immed;
   if (Root.isImm())
     Immed = Root.getImm();
   else if (Root.isCImm())
     Immed = Root.getCImm()->getZExtValue();
   else if (Root.isReg()) {
-    MachineInstr *Def = MRI.getVRegDef(Root.getReg());
-    if (Def->getOpcode() != TargetOpcode::G_CONSTANT)
+    auto ValAndVReg =
+        getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
+    if (!ValAndVReg)
       return None;
-    MachineOperand &Op1 = Def->getOperand(1);
-    if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64)
-      return None;
-    Immed = Op1.getCImm()->getZExtValue();
+    Immed = ValAndVReg->Value;
   } else
     return None;
+  return Immed;
+}
+
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
+  auto MaybeImmed = getImmedFromMO(Root);
+  if (MaybeImmed == None || *MaybeImmed > 31)
+    return None;
+  uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
+  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
+}
+
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
+  auto MaybeImmed = getImmedFromMO(Root);
+  if (MaybeImmed == None || *MaybeImmed > 31)
+    return None;
+  uint64_t Enc = 31 - *MaybeImmed;
+  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
+}
+
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
+  auto MaybeImmed = getImmedFromMO(Root);
+  if (MaybeImmed == None || *MaybeImmed > 63)
+    return None;
+  uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
+  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
+}
 
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
+  auto MaybeImmed = getImmedFromMO(Root);
+  if (MaybeImmed == None || *MaybeImmed > 63)
+    return None;
+  uint64_t Enc = 63 - *MaybeImmed;
+  return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
+}
+
+/// SelectArithImmed - Select an immediate value that can be represented as
+/// a 12-bit value shifted left by either 0 or 12.  If so, return true with
+/// Val set to the 12-bit value and Shift set to the shifter operand.
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
+  // This function is called from the addsub_shifted_imm ComplexPattern,
+  // which lists [imm] as the list of opcode it's interested in, however
+  // we still need to check whether the operand is actually an immediate
+  // here because the ComplexPattern opcode list is only used in
+  // root-level opcode matching.
+  auto MaybeImmed = getImmedFromMO(Root);
+  if (MaybeImmed == None)
+    return None;
+  uint64_t Immed = *MaybeImmed;
   unsigned ShiftAmt;
 
   if (Immed >> 12 == 0) {
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 6f7fb7a8bc21..a985b330eafa 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -1,9 +1,8 @@
 //===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -22,8 +21,11 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Type.h"
 
+#define DEBUG_TYPE "aarch64-legalinfo"
+
 using namespace llvm;
 using namespace LegalizeActions;
+using namespace LegalizeMutations;
 using namespace LegalityPredicates;
 
 AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
@@ -46,9 +48,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
   const LLT v2s32 = LLT::vector(2, 32);
   const LLT v4s32 = LLT::vector(4, 32);
   const LLT v2s64 = LLT::vector(2, 64);
+  const LLT v2p0 = LLT::vector(2, p0);
 
   getActionDefinitionsBuilder(G_IMPLICIT_DEF)
-    .legalFor({p0, s1, s8, s16, s32, s64, v2s64})
+    .legalFor({p0, s1, s8, s16, s32, s64, v4s32, v2s64})
     .clampScalar(0, s1, s64)
     .widenScalarToNextPow2(0, 8)
     .fewerElementsIf(
@@ -65,33 +68,58 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
       });
 
   getActionDefinitionsBuilder(G_PHI)
-      .legalFor({p0, s16, s32, s64})
+      .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64})
       .clampScalar(0, s16, s64)
       .widenScalarToNextPow2(0);
 
   getActionDefinitionsBuilder(G_BSWAP)
-      .legalFor({s32, s64})
+      .legalFor({s32, s64, v4s32, v2s32, v2s64})
       .clampScalar(0, s16, s64)
       .widenScalarToNextPow2(0);
 
-  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL})
-      .legalFor({s32, s64, v2s32, v4s32, v2s64})
+  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
+      .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8})
       .clampScalar(0, s32, s64)
       .widenScalarToNextPow2(0)
       .clampNumElements(0, v2s32, v4s32)
       .clampNumElements(0, v2s64, v2s64)
       .moreElementsToNextPow2(0);
 
+  getActionDefinitionsBuilder(G_SHL)
+    .legalFor({{s32, s32}, {s64, s64},
+               {v2s32, v2s32}, {v4s32, v4s32}, {v2s64, v2s64}})
+    .clampScalar(1, s32, s64)
+    .clampScalar(0, s32, s64)
+    .widenScalarToNextPow2(0)
+    .clampNumElements(0, v2s32, v4s32)
+    .clampNumElements(0, v2s64, v2s64)
+    .moreElementsToNextPow2(0)
+    .minScalarSameAs(1, 0);
+
   getActionDefinitionsBuilder(G_GEP)
       .legalFor({{p0, s64}})
       .clampScalar(1, s64, s64);
 
   getActionDefinitionsBuilder(G_PTR_MASK).legalFor({p0});
 
-  getActionDefinitionsBuilder({G_LSHR, G_ASHR, G_SDIV, G_UDIV})
+  getActionDefinitionsBuilder({G_SDIV, G_UDIV})
       .legalFor({s32, s64})
       .clampScalar(0, s32, s64)
-      .widenScalarToNextPow2(0);
+      .widenScalarToNextPow2(0)
+      .scalarize(0);
+
+  getActionDefinitionsBuilder({G_LSHR, G_ASHR})
+      .customIf([=](const LegalityQuery &Query) {
+        const auto &SrcTy = Query.Types[0];
+        const auto &AmtTy = Query.Types[1];
+        return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
+               AmtTy.getSizeInBits() == 32;
+      })
+      .legalFor(
+          {{s32, s32}, {s32, s64}, {s64, s64}, {v2s32, v2s32}, {v4s32, v4s32}})
+      .clampScalar(1, s32, s64)
+      .clampScalar(0, s32, s64)
+      .minScalarSameAs(1, 0);
 
   getActionDefinitionsBuilder({G_SREM, G_UREM})
       .lowerFor({s1, s8, s16, s32, s64});
@@ -101,15 +129,26 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
 
   getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
 
-  getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO})
+  getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO})
       .legalFor({{s32, s1}, {s64, s1}});
 
-  getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMA, G_FMUL, G_FDIV})
-      .legalFor({s32, s64});
+  getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
+    .legalFor({s32, s64, v2s64, v4s32, v2s32});
 
-  getActionDefinitionsBuilder({G_FREM, G_FPOW}).libcallFor({s32, s64});
+  getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
 
-  getActionDefinitionsBuilder(G_FCEIL)
+  getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
+                               G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
+                               G_FNEARBYINT})
+      // If we don't have full FP16 support, then scalarize the elements of
+      // vectors containing fp16 types.
+      .fewerElementsIf(
+          [=, &ST](const LegalityQuery &Query) {
+            const auto &Ty = Query.Types[0];
+            return Ty.isVector() && Ty.getElementType() == s16 &&
+                   !ST.hasFullFP16();
+          },
+          [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
       // If we don't have full FP16 support, then widen s16 to s32 if we
       // encounter it.
       .widenScalarIf(
@@ -117,7 +156,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
             return Query.Types[0] == s16 && !ST.hasFullFP16();
           },
           [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
-      .legalFor({s16, s32, s64, v2s32, v4s32, v2s64});
+      .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
+
+  getActionDefinitionsBuilder(
+      {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
+      // We need a call for these, so we always need to scalarize.
+      .scalarize(0)
+      // Regardless of FP16 support, widen 16-bit elements to 32-bits.
+      .minScalar(0, s32)
+      .libcallFor({s32, s64, v2s32, v4s32, v2s64});
 
   getActionDefinitionsBuilder(G_INSERT)
       .unsupportedIf([=](const LegalityQuery &Query) {
@@ -158,12 +205,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
       .widenScalarToNextPow2(0);
 
   getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
-      .legalForTypesWithMemSize({{s32, p0, 8},
-                                 {s32, p0, 16},
-                                 {s32, p0, 32},
-                                 {s64, p0, 64},
-                                 {p0, p0, 64},
-                                 {v2s32, p0, 64}})
+      .legalForTypesWithMemDesc({{s32, p0, 8, 8},
+                                 {s32, p0, 16, 8},
+                                 {s32, p0, 32, 8},
+                                 {s64, p0, 8, 2},
+                                 {s64, p0, 16, 2},
+                                 {s64, p0, 32, 4},
+                                 {s64, p0, 64, 8},
+                                 {p0, p0, 64, 8},
+                                 {v2s32, p0, 64, 8}})
       .clampScalar(0, s32, s64)
       .widenScalarToNextPow2(0)
       // TODO: We could support sum-of-pow2's but the lowering code doesn't know
@@ -172,16 +222,30 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
       // Lower anything left over into G_*EXT and G_LOAD
       .lower();
 
+  auto IsPtrVecPred = [=](const LegalityQuery &Query) {
+    const LLT &ValTy = Query.Types[0];
+    if (!ValTy.isVector())
+      return false;
+    const LLT EltTy = ValTy.getElementType();
+    return EltTy.isPointer() && EltTy.getAddressSpace() == 0;
+  };
+
   getActionDefinitionsBuilder(G_LOAD)
-      .legalForTypesWithMemSize({{s8, p0, 8},
-                                 {s16, p0, 16},
-                                 {s32, p0, 32},
-                                 {s64, p0, 64},
-                                 {p0, p0, 64},
-                                 {v2s32, p0, 64}})
+      .legalForTypesWithMemDesc({{s8, p0, 8, 8},
+                                 {s16, p0, 16, 8},
+                                 {s32, p0, 32, 8},
+                                 {s64, p0, 64, 8},
+                                 {p0, p0, 64, 8},
+                                 {v8s8, p0, 64, 8},
+                                 {v16s8, p0, 128, 8},
+                                 {v4s16, p0, 64, 8},
+                                 {v8s16, p0, 128, 8},
+                                 {v2s32, p0, 64, 8},
+                                 {v4s32, p0, 128, 8},
+                                 {v2s64, p0, 128, 8}})
       // These extends are also legal
-      .legalForTypesWithMemSize({{s32, p0, 8},
-                                 {s32, p0, 16}})
+      .legalForTypesWithMemDesc({{s32, p0, 8, 8},
+                                 {s32, p0, 16, 8}})
       .clampScalar(0, s8, s64)
       .widenScalarToNextPow2(0)
       // TODO: We could support sum-of-pow2's but the lowering code doesn't know
@@ -191,16 +255,22 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
       .lowerIf([=](const LegalityQuery &Query) {
         return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
       })
-      .clampNumElements(0, v2s32, v2s32)
-      .clampMaxNumElements(0, s64, 1);
+      .clampMaxNumElements(0, s32, 2)
+      .clampMaxNumElements(0, s64, 1)
+      .customIf(IsPtrVecPred);
 
   getActionDefinitionsBuilder(G_STORE)
-      .legalForTypesWithMemSize({{s8, p0, 8},
-                                 {s16, p0, 16},
-                                 {s32, p0, 32},
-                                 {s64, p0, 64},
-                                 {p0, p0, 64},
-                                 {v2s32, p0, 64}})
+      .legalForTypesWithMemDesc({{s8, p0, 8, 8},
+                                 {s16, p0, 16, 8},
+                                 {s32, p0, 32, 8},
+                                 {s64, p0, 64, 8},
+                                 {p0, p0, 64, 8},
+                                 {v16s8, p0, 128, 8},
+                                 {v4s16, p0, 64, 8},
+                                 {v8s16, p0, 128, 8},
+                                 {v2s32, p0, 64, 8},
+                                 {v4s32, p0, 128, 8},
+                                 {v2s64, p0, 128, 8}})
       .clampScalar(0, s8, s64)
       .widenScalarToNextPow2(0)
       // TODO: We could support sum-of-pow2's but the lowering code doesn't know
@@ -210,23 +280,48 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
         return Query.Types[0].isScalar() &&
                Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
       })
-      .clampNumElements(0, v2s32, v2s32)
-      .clampMaxNumElements(0, s64, 1);
+      .clampMaxNumElements(0, s32, 2)
+      .clampMaxNumElements(0, s64, 1)
+      .customIf(IsPtrVecPred);
 
   // Constants
   getActionDefinitionsBuilder(G_CONSTANT)
-      .legalFor({p0, s32, s64})
-      .clampScalar(0, s32, s64)
+    .legalFor({p0, s8, s16, s32, s64})
+      .clampScalar(0, s8, s64)
       .widenScalarToNextPow2(0);
   getActionDefinitionsBuilder(G_FCONSTANT)
       .legalFor({s32, s64})
       .clampScalar(0, s32, s64);
 
   getActionDefinitionsBuilder(G_ICMP)
-      .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
+      .legalFor({{s32, s32},
+                 {s32, s64},
+                 {s32, p0},
+                 {v4s32, v4s32},
+                 {v2s32, v2s32},
+                 {v2s64, v2s64},
+                 {v2s64, v2p0},
+                 {v4s16, v4s16},
+                 {v8s16, v8s16},
+                 {v8s8, v8s8},
+                 {v16s8, v16s8}})
       .clampScalar(0, s32, s32)
       .clampScalar(1, s32, s64)
-      .widenScalarToNextPow2(1);
+      .minScalarEltSameAsIf(
+          [=](const LegalityQuery &Query) {
+            const LLT &Ty = Query.Types[0];
+            const LLT &SrcTy = Query.Types[1];
+            return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
+                   Ty.getElementType() != SrcTy.getElementType();
+          },
+          0, 1)
+      .minScalarOrEltIf(
+          [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
+          1, s32)
+      .minScalarOrEltIf(
+          [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
+          s64)
+      .widenScalarOrEltToNextPow2(1);
 
   getActionDefinitionsBuilder(G_FCMP)
       .legalFor({{s32, s32}, {s32, s64}})
@@ -236,24 +331,48 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
 
   // Extensions
   getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
-      .legalForCartesianProduct({s8, s16, s32, s64}, {s1, s8, s16, s32});
+      .legalIf([=](const LegalityQuery &Query) {
+        unsigned DstSize = Query.Types[0].getSizeInBits();
+
+        // Make sure that we have something that will fit in a register, and
+        // make sure it's a power of 2.
+        if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
+          return false;
+
+        const LLT &SrcTy = Query.Types[1];
+
+        // Special case for s1.
+        if (SrcTy == s1)
+          return true;
+
+        // Make sure we fit in a register otherwise. Don't bother checking that
+        // the source type is below 128 bits. We shouldn't be allowing anything
+        // through which is wider than the destination in the first place.
+        unsigned SrcSize = SrcTy.getSizeInBits();
+        if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
+          return false;
+
+        return true;
+      });
+
+  getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
 
   // FP conversions
   getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
-      {{s16, s32}, {s16, s64}, {s32, s64}});
+      {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}});
   getActionDefinitionsBuilder(G_FPEXT).legalFor(
-      {{s32, s16}, {s64, s16}, {s64, s32}});
+      {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}});
 
   // Conversions
   getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
-      .legalForCartesianProduct({s32, s64})
+      .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
       .clampScalar(0, s32, s64)
       .widenScalarToNextPow2(0)
       .clampScalar(1, s32, s64)
       .widenScalarToNextPow2(1);
 
   getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
-      .legalForCartesianProduct({s32, s64})
+      .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
       .clampScalar(1, s32, s64)
       .widenScalarToNextPow2(1)
       .clampScalar(0, s32, s64)
@@ -264,10 +383,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
   getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
 
   // Select
+  // FIXME: We can probably do a bit better than just scalarizing vector
+  // selects.
   getActionDefinitionsBuilder(G_SELECT)
       .legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
       .clampScalar(0, s32, s64)
-      .widenScalarToNextPow2(0);
+      .widenScalarToNextPow2(0)
+      .scalarize(0);
 
   // Pointer-handling
   getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
@@ -291,7 +413,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
       // number of bits but it's what the previous code described and fixing
       // it breaks tests.
       .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
-                                 v8s16, v4s16, v2s16, v4s32, v2s32, v2s64});
+                                 v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
+                                 v2p0});
 
   getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
 
@@ -335,11 +458,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
       }
       return false;
     };
-    auto scalarize =
-        [](const LegalityQuery &Query, unsigned TypeIdx) {
-          const LLT &Ty = Query.Types[TypeIdx];
-          return std::make_pair(TypeIdx, Ty.getElementType());
-        };
 
     // FIXME: This rule is horrible, but specifies the same as what we had
     // before with the particularly strange definitions removed (e.g.
@@ -353,10 +471,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
         // Break up vectors with weird elements into scalars
         .fewerElementsIf(
             [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
-            [=](const LegalityQuery &Query) { return scalarize(Query, 0); })
+            scalarize(0))
         .fewerElementsIf(
             [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
-            [=](const LegalityQuery &Query) { return scalarize(Query, 1); })
+            scalarize(1))
         // Clamp the big scalar to s8-s512 and make it either a power of 2, 192,
         // or 384.
         .clampScalar(BigTyIdx, s8, s512)
@@ -397,16 +515,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
           return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
         })
         // Any vectors left are the wrong size. Scalarize them.
-        .fewerElementsIf([](const LegalityQuery &Query) { return true; },
-                         [](const LegalityQuery &Query) {
-                           return std::make_pair(
-                               0, Query.Types[0].getElementType());
-                         })
-        .fewerElementsIf([](const LegalityQuery &Query) { return true; },
-                         [](const LegalityQuery &Query) {
-                           return std::make_pair(
-                               1, Query.Types[1].getElementType());
-                         });
+      .scalarize(0)
+      .scalarize(1);
   }
 
   getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
@@ -417,11 +527,24 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
       .minScalar(2, s64)
       .legalIf([=](const LegalityQuery &Query) {
         const LLT &VecTy = Query.Types[1];
-        return VecTy == v4s32 || VecTy == v2s64;
+        return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
+               VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32;
+      });
+
+  getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
+      .legalIf([=](const LegalityQuery &Query) {
+        const LLT &VecTy = Query.Types[0];
+        // TODO: Support s8 and s16
+        return VecTy == v2s32 || VecTy == v4s32 || VecTy == v2s64;
       });
 
   getActionDefinitionsBuilder(G_BUILD_VECTOR)
-      .legalFor({{v4s32, s32}, {v2s64, s64}})
+      .legalFor({{v4s16, s16},
+                 {v8s16, s16},
+                 {v2s32, s32},
+                 {v4s32, s32},
+                 {v2p0, p0},
+                 {v2s64, s64}})
       .clampNumElements(0, v4s32, v4s32)
       .clampNumElements(0, v2s64, v2s64)
 
@@ -432,6 +555,42 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
       })
       .minScalarSameAs(1, 0);
 
+  getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct(
+      {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
+      .scalarize(1);
+
+  getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
+      .legalIf([=](const LegalityQuery &Query) {
+        const LLT &DstTy = Query.Types[0];
+        const LLT &SrcTy = Query.Types[1];
+        // For now just support the TBL2 variant which needs the source vectors
+        // to be the same size as the dest.
+        if (DstTy != SrcTy)
+          return false;
+        for (auto &Ty : {v2s32, v4s32, v2s64}) {
+          if (DstTy == Ty)
+            return true;
+        }
+        return false;
+      })
+      // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
+      // just want those lowered into G_BUILD_VECTOR
+      .lowerIf([=](const LegalityQuery &Query) {
+        return !Query.Types[1].isVector();
+      })
+      .clampNumElements(0, v4s32, v4s32)
+      .clampNumElements(0, v2s64, v2s64);
+
+  getActionDefinitionsBuilder(G_CONCAT_VECTORS)
+      .legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
+
+  getActionDefinitionsBuilder(G_JUMP_TABLE)
+    .legalFor({{p0}, {s64}});
+
+  getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
+    return Query.Types[0] == p0 && Query.Types[1] == s64;
+  });
+
   computeTables();
   verify(*ST.getInstrInfo());
 }
@@ -446,37 +605,106 @@ bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
     return false;
   case TargetOpcode::G_VAARG:
     return legalizeVaArg(MI, MRI, MIRBuilder);
+  case TargetOpcode::G_LOAD:
+  case TargetOpcode::G_STORE:
+    return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
+  case TargetOpcode::G_SHL:
+  case TargetOpcode::G_ASHR:
+  case TargetOpcode::G_LSHR:
+    return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
   }
 
   llvm_unreachable("expected switch to return");
 }
 
+bool AArch64LegalizerInfo::legalizeShlAshrLshr(
+    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
+    GISelChangeObserver &Observer) const {
+  assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
+         MI.getOpcode() == TargetOpcode::G_LSHR ||
+         MI.getOpcode() == TargetOpcode::G_SHL);
+  // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
+  // imported patterns can select it later. Either way, it will be legal.
+  Register AmtReg = MI.getOperand(2).getReg();
+  auto *CstMI = MRI.getVRegDef(AmtReg);
+  assert(CstMI && "expected to find a vreg def");
+  if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT)
+    return true;
+  // Check the shift amount is in range for an immediate form.
+  unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue();
+  if (Amount > 31)
+    return true; // This will have to remain a register variant.
+  assert(MRI.getType(AmtReg).getSizeInBits() == 32);
+  MIRBuilder.setInstr(MI);
+  auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
+  MI.getOperand(2).setReg(ExtCst.getReg(0));
+  return true;
+}
+
+bool AArch64LegalizerInfo::legalizeLoadStore(
+    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
+    GISelChangeObserver &Observer) const {
+  assert(MI.getOpcode() == TargetOpcode::G_STORE ||
+         MI.getOpcode() == TargetOpcode::G_LOAD);
+  // Here we just try to handle vector loads/stores where our value type might
+  // have pointer elements, which the SelectionDAG importer can't handle. To
+  // allow the existing patterns for s64 to fire for p0, we just try to bitcast
+  // the value to use s64 types.
+
+  // Custom legalization requires the instruction, if not deleted, must be fully
+  // legalized. In order to allow further legalization of the inst, we create
+  // a new instruction and erase the existing one.
+
+  unsigned ValReg = MI.getOperand(0).getReg();
+  const LLT ValTy = MRI.getType(ValReg);
+
+  if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
+      ValTy.getElementType().getAddressSpace() != 0) {
+    LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
+    return false;
+  }
+
+  MIRBuilder.setInstr(MI);
+  unsigned PtrSize = ValTy.getElementType().getSizeInBits();
+  const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize);
+  auto &MMO = **MI.memoperands_begin();
+  if (MI.getOpcode() == TargetOpcode::G_STORE) {
+    auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg});
+    MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO);
+  } else {
+    unsigned NewReg = MRI.createGenericVirtualRegister(NewTy);
+    auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO);
+    MIRBuilder.buildBitcast({ValReg}, {NewLoad});
+  }
+  MI.eraseFromParent();
+  return true;
+}
+
 bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
                                          MachineRegisterInfo &MRI,
                                          MachineIRBuilder &MIRBuilder) const {
   MIRBuilder.setInstr(MI);
   MachineFunction &MF = MIRBuilder.getMF();
   unsigned Align = MI.getOperand(2).getImm();
-  unsigned Dst = MI.getOperand(0).getReg();
-  unsigned ListPtr = MI.getOperand(1).getReg();
+  Register Dst = MI.getOperand(0).getReg();
+  Register ListPtr = MI.getOperand(1).getReg();
 
   LLT PtrTy = MRI.getType(ListPtr);
   LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
 
   const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
-  unsigned List = MRI.createGenericVirtualRegister(PtrTy);
+  Register List = MRI.createGenericVirtualRegister(PtrTy);
   MIRBuilder.buildLoad(
       List, ListPtr,
       *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
                                PtrSize, /* Align = */ PtrSize));
 
-  unsigned DstPtr;
+  Register DstPtr;
   if (Align > PtrSize) {
     // Realign the list to the actual required alignment.
     auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1);
 
-    unsigned ListTmp = MRI.createGenericVirtualRegister(PtrTy);
-    MIRBuilder.buildGEP(ListTmp, List, AlignMinus1->getOperand(0).getReg());
+    auto ListTmp = MIRBuilder.buildGEP(PtrTy, List, AlignMinus1.getReg(0));
 
     DstPtr = MRI.createGenericVirtualRegister(PtrTy);
     MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
@@ -489,11 +717,9 @@ bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
       *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
                                ValSize, std::max(Align, PtrSize)));
 
-  unsigned SizeReg = MRI.createGenericVirtualRegister(IntPtrTy);
-  MIRBuilder.buildConstant(SizeReg, alignTo(ValSize, PtrSize));
+  auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrSize));
 
-  unsigned NewList = MRI.createGenericVirtualRegister(PtrTy);
-  MIRBuilder.buildGEP(NewList, DstPtr, SizeReg);
+  auto NewList = MIRBuilder.buildGEP(PtrTy, DstPtr, Size.getReg(0));
 
   MIRBuilder.buildStore(
       NewList, ListPtr,
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.h b/lib/Target/AArch64/AArch64LegalizerInfo.h
index 77e8bdc7623c..f3362a18620f 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.h
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.h
@@ -1,9 +1,8 @@
 //===- AArch64LegalizerInfo --------------------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -35,6 +34,12 @@ public:
 private:
   bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI,
                      MachineIRBuilder &MIRBuilder) const;
+  bool legalizeLoadStore(MachineInstr &MI, MachineRegisterInfo &MRI,
+                         MachineIRBuilder &MIRBuilder,
+                         GISelChangeObserver &Observer) const;
+  bool legalizeShlAshrLshr(MachineInstr &MI, MachineRegisterInfo &MRI,
+                           MachineIRBuilder &MIRBuilder,
+                           GISelChangeObserver &Observer) const;
 };
 } // End llvm namespace.
 #endif
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index aa732a99469c..65b5f906e3f6 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1,9 +1,8 @@
 //===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -934,8 +933,6 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
                                ? getLdStOffsetOp(*StoreI).getImm()
                                : getLdStOffsetOp(*StoreI).getImm() * StoreSize;
     int Width = LoadSize * 8;
-    int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
-    int Imms = Immr + Width - 1;
     unsigned DestReg = IsStoreXReg
                            ? TRI->getMatchingSuperReg(LdRt, AArch64::sub_32,
                                                       &AArch64::GPR64RegClass)
@@ -945,8 +942,8 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
             (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
            "Invalid offset");
 
-    Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
-    Imms = Immr + Width - 1;
+    int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
+    int Imms = Immr + Width - 1;
     if (UnscaledLdOffset == UnscaledStOffset) {
       uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
                                 | ((Immr) << 6)               // immr
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index d71359223b1b..e7d4a2789a28 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -1,9 +1,8 @@
 //==-- AArch64MCInstLower.cpp - Convert AArch64 MachineInstr to an MCInst --==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/AArch64MCInstLower.h b/lib/Target/AArch64/AArch64MCInstLower.h
index aa30fe1fa707..8f3148a98410 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.h
+++ b/lib/Target/AArch64/AArch64MCInstLower.h
@@ -1,9 +1,8 @@
 //===-- AArch64MCInstLower.h - Lower MachineInstr to MCInst ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 5183e7d3c0d0..0efeeb272ec1 100644
--- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -1,9 +1,8 @@
 //=- AArch64MachineFunctionInfo.h - AArch64 machine function info -*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -92,6 +91,11 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
   /// other stack allocations.
   bool CalleeSaveStackHasFreeSpace = false;
 
+  /// SRetReturnReg - sret lowering includes returning the value of the
+  /// returned struct in a register. This field holds the virtual register into
+  /// which the sret argument is passed.
+  unsigned SRetReturnReg = 0;
+
   /// Has a value when it is known whether or not the function uses a
   /// redzone, and no value otherwise.
   /// Initialized during frame lowering, unless the function has the noredzone
@@ -101,6 +105,12 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
   /// ForwardedMustTailRegParms - A list of virtual and physical registers
   /// that must be forwarded to every musttail call.
   SmallVector<ForwardedRegister, 1> ForwardedMustTailRegParms;
+
+  // Offset from SP-at-entry to the tagged base pointer.
+  // Tagged base pointer is set up to point to the first (lowest address) tagged
+  // stack slot.
+  unsigned TaggedBasePointerOffset;
+
 public:
   AArch64FunctionInfo() = default;
 
@@ -166,6 +176,9 @@ public:
   unsigned getVarArgsFPRSize() const { return VarArgsFPRSize; }
   void setVarArgsFPRSize(unsigned Size) { VarArgsFPRSize = Size; }
 
+  unsigned getSRetReturnReg() const { return SRetReturnReg; }
+  void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
   unsigned getJumpTableEntrySize(int Idx) const {
     auto It = JumpTableEntryInfo.find(Idx);
     if (It != JumpTableEntryInfo.end())
@@ -217,6 +230,13 @@ public:
     return ForwardedMustTailRegParms;
   }
 
+  unsigned getTaggedBasePointerOffset() const {
+    return TaggedBasePointerOffset;
+  }
+  void setTaggedBasePointerOffset(unsigned Offset) {
+    TaggedBasePointerOffset = Offset;
+  }
+
 private:
   // Hold the lists of LOHs.
   MILOHContainer LOHContainerSet;
diff --git a/lib/Target/AArch64/AArch64MacroFusion.cpp b/lib/Target/AArch64/AArch64MacroFusion.cpp
index bc596dd38b6e..9a2103579a6a 100644
--- a/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -1,9 +1,8 @@
 //===- AArch64MacroFusion.cpp - AArch64 Macro Fusion ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/AArch64MacroFusion.h b/lib/Target/AArch64/AArch64MacroFusion.h
index 32d90d4c40d6..4e7ccbe4baab 100644
--- a/lib/Target/AArch64/AArch64MacroFusion.h
+++ b/lib/Target/AArch64/AArch64MacroFusion.h
@@ -1,9 +1,8 @@
 //===- AArch64MacroFusion.h - AArch64 Macro Fusion ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
index ccf646575296..aff861aae6be 100644
--- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64PBQPRegAlloc.cpp - AArch64 specific PBQP constraints -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This file contains the AArch64 / Cortex-A57 specific register allocation
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.h b/lib/Target/AArch64/AArch64PBQPRegAlloc.h
index b99c1d1d6b3e..5ea91b4a1967 100644
--- a/lib/Target/AArch64/AArch64PBQPRegAlloc.h
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.h
@@ -1,9 +1,8 @@
 //==- AArch64PBQPRegAlloc.h - AArch64 specific PBQP constraints --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AArch64/AArch64PerfectShuffle.h b/lib/Target/AArch64/AArch64PerfectShuffle.h
index 9e9eec48c555..f443cd03935c 100644
--- a/lib/Target/AArch64/AArch64PerfectShuffle.h
+++ b/lib/Target/AArch64/AArch64PerfectShuffle.h
@@ -1,9 +1,8 @@
 //===-- AArch64PerfectShuffle.h - AdvSIMD Perfect Shuffle Table -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/AArch64PfmCounters.td b/lib/Target/AArch64/AArch64PfmCounters.td
index 16ba3e4282a0..b1d1664e3f1b 100644
--- a/lib/Target/AArch64/AArch64PfmCounters.td
+++ b/lib/Target/AArch64/AArch64PfmCounters.td
@@ -1,9 +1,8 @@
 //===-- AArch64PfmCounters.td - AArch64 Hardware Counters --*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp b/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
index 3da9306e6460..5f7245bfbd74 100644
--- a/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
+++ b/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
@@ -1,9 +1,8 @@
 //=== lib/CodeGen/GlobalISel/AArch64PreLegalizerCombiner.cpp --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -44,6 +43,10 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
   switch (MI.getOpcode()) {
   default:
     return false;
+  case TargetOpcode::COPY:
+    return Helper.tryCombineCopy(MI);
+  case TargetOpcode::G_BR:
+    return Helper.tryCombineBr(MI);
   case TargetOpcode::G_LOAD:
   case TargetOpcode::G_SEXTLOAD:
   case TargetOpcode::G_ZEXTLOAD:
diff --git a/lib/Target/AArch64/AArch64PromoteConstant.cpp b/lib/Target/AArch64/AArch64PromoteConstant.cpp
index 01d8a35bbc23..a594ecb71fc9 100644
--- a/lib/Target/AArch64/AArch64PromoteConstant.cpp
+++ b/lib/Target/AArch64/AArch64PromoteConstant.cpp
@@ -1,9 +1,8 @@
 //==- AArch64PromoteConstant.cpp - Promote constant to global for AArch64 --==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -494,7 +493,8 @@ void AArch64PromoteConstant::insertDefinitions(Function &F,
   for (const auto &IPI : InsertPts) {
     // Create the load of the global variable.
     IRBuilder<> Builder(IPI.first);
-    LoadInst *LoadedCst = Builder.CreateLoad(&PromotedGV);
+    LoadInst *LoadedCst =
+        Builder.CreateLoad(PromotedGV.getValueType(), &PromotedGV);
     LLVM_DEBUG(dbgs() << "**********\n");
     LLVM_DEBUG(dbgs() << "New def: ");
     LLVM_DEBUG(LoadedCst->print(dbgs()));
diff --git a/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
index fcb0b36a9f6d..0d75ab7ac8a9 100644
--- a/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
+++ b/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
@@ -1,9 +1,8 @@
 //=- AArch64RedundantCopyElimination.cpp - Remove useless copy for AArch64 -=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 // This pass removes unnecessary copies/moves in BBs based on a dominating
 // condition.
@@ -380,8 +379,8 @@ bool AArch64RedundantCopyElimination::optimizeBlock(MachineBasicBlock *MBB) {
     bool IsCopy = MI->isCopy();
     bool IsMoveImm = MI->isMoveImmediate();
     if (IsCopy || IsMoveImm) {
-      MCPhysReg DefReg = MI->getOperand(0).getReg();
-      MCPhysReg SrcReg = IsCopy ? MI->getOperand(1).getReg() : 0;
+      Register DefReg = MI->getOperand(0).getReg();
+      Register SrcReg = IsCopy ? MI->getOperand(1).getReg() : Register();
       int64_t SrcImm = IsMoveImm ? MI->getOperand(1).getImm() : 0;
       if (!MRI->isReserved(DefReg) &&
           ((IsCopy && (SrcReg == AArch64::XZR || SrcReg == AArch64::WZR)) ||
diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index 68c48a5ec216..b52259cc9acd 100644
--- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -1,9 +1,8 @@
 //===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -243,12 +242,17 @@ const RegisterBank &AArch64RegisterBankInfo::getRegBankFromRegClass(
   case AArch64::GPR32RegClassID:
   case AArch64::GPR32spRegClassID:
   case AArch64::GPR32sponlyRegClassID:
+  case AArch64::GPR32argRegClassID:
   case AArch64::GPR32allRegClassID:
   case AArch64::GPR64commonRegClassID:
   case AArch64::GPR64RegClassID:
   case AArch64::GPR64spRegClassID:
   case AArch64::GPR64sponlyRegClassID:
+  case AArch64::GPR64argRegClassID:
   case AArch64::GPR64allRegClassID:
+  case AArch64::GPR64noipRegClassID:
+  case AArch64::GPR64common_and_GPR64noipRegClassID:
+  case AArch64::GPR64noip_and_tcGPR64RegClassID:
   case AArch64::tcGPR64RegClassID:
   case AArch64::WSeqPairsClassRegClassID:
   case AArch64::XSeqPairsClassRegClassID:
@@ -385,11 +389,26 @@ static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
   case TargetOpcode::G_FADD:
   case TargetOpcode::G_FSUB:
   case TargetOpcode::G_FMUL:
+  case TargetOpcode::G_FMA:
   case TargetOpcode::G_FDIV:
   case TargetOpcode::G_FCONSTANT:
   case TargetOpcode::G_FPEXT:
   case TargetOpcode::G_FPTRUNC:
   case TargetOpcode::G_FCEIL:
+  case TargetOpcode::G_FFLOOR:
+  case TargetOpcode::G_FNEARBYINT:
+  case TargetOpcode::G_FNEG:
+  case TargetOpcode::G_FCOS:
+  case TargetOpcode::G_FSIN:
+  case TargetOpcode::G_FLOG10:
+  case TargetOpcode::G_FLOG:
+  case TargetOpcode::G_FLOG2:
+  case TargetOpcode::G_FSQRT:
+  case TargetOpcode::G_FABS:
+  case TargetOpcode::G_FEXP:
+  case TargetOpcode::G_FRINT:
+  case TargetOpcode::G_INTRINSIC_TRUNC:
+  case TargetOpcode::G_INTRINSIC_ROUND:
     return true;
   }
   return false;
@@ -438,6 +457,54 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
                                getValueMapping(RBIdx, Size), NumOperands);
 }
 
+bool AArch64RegisterBankInfo::hasFPConstraints(
+    const MachineInstr &MI, const MachineRegisterInfo &MRI,
+    const TargetRegisterInfo &TRI) const {
+  unsigned Op = MI.getOpcode();
+
+  // Do we have an explicit floating point instruction?
+  if (isPreISelGenericFloatingPointOpcode(Op))
+    return true;
+
+  // No. Check if we have a copy-like instruction. If we do, then we could
+  // still be fed by floating point instructions.
+  if (Op != TargetOpcode::COPY && !MI.isPHI())
+    return false;
+
+  // MI is copy-like. Return true if it outputs an FPR.
+  return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
+         &AArch64::FPRRegBank;
+}
+
+bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
+                                         const MachineRegisterInfo &MRI,
+                                         const TargetRegisterInfo &TRI) const {
+  switch (MI.getOpcode()) {
+  case TargetOpcode::G_FPTOSI:
+  case TargetOpcode::G_FPTOUI:
+  case TargetOpcode::G_FCMP:
+    return true;
+  default:
+    break;
+  }
+  return hasFPConstraints(MI, MRI, TRI);
+}
+
+bool AArch64RegisterBankInfo::onlyDefinesFP(
+    const MachineInstr &MI, const MachineRegisterInfo &MRI,
+    const TargetRegisterInfo &TRI) const {
+  switch (MI.getOpcode()) {
+  case TargetOpcode::G_SITOFP:
+  case TargetOpcode::G_UITOFP:
+  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+  case TargetOpcode::G_INSERT_VECTOR_ELT:
+    return true;
+  default:
+    break;
+  }
+  return hasFPConstraints(MI, MRI, TRI);
+}
+
 const RegisterBankInfo::InstructionMapping &
 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   const unsigned Opc = MI.getOpcode();
@@ -470,10 +537,6 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   case TargetOpcode::G_AND:
   case TargetOpcode::G_OR:
   case TargetOpcode::G_XOR:
-    // Shifts.
-  case TargetOpcode::G_SHL:
-  case TargetOpcode::G_LSHR:
-  case TargetOpcode::G_ASHR:
     // Floating point ops.
   case TargetOpcode::G_FADD:
   case TargetOpcode::G_FSUB:
@@ -487,6 +550,17 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
         DefaultMappingID, /*Cost*/ 1,
         getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()),
         /*NumOperands*/ 2);
+  }
+    // Shifts.
+  case TargetOpcode::G_SHL:
+  case TargetOpcode::G_LSHR:
+  case TargetOpcode::G_ASHR: {
+    LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg());
+    LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+    if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32)
+      return getInstructionMapping(DefaultMappingID, 1,
+                                   &ValMappings[Shift64Imm], 3);
+    return getSameKindOfOperandsMapping(MI);
   }
   case TargetOpcode::COPY: {
     unsigned DstReg = MI.getOperand(0).getReg();
@@ -563,10 +637,14 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   switch (Opc) {
   case TargetOpcode::G_SITOFP:
   case TargetOpcode::G_UITOFP:
+    if (MRI.getType(MI.getOperand(0).getReg()).isVector())
+      break;
     OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
     break;
   case TargetOpcode::G_FPTOSI:
   case TargetOpcode::G_FPTOUI:
+    if (MRI.getType(MI.getOperand(0).getReg()).isVector())
+      break;
     OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
     break;
   case TargetOpcode::G_FCMP:
@@ -600,15 +678,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
         // assume this was a floating point load in the IR.
         // If it was not, we would have had a bitcast before
         // reaching that instruction.
-        unsigned UseOpc = UseMI.getOpcode();
-        if (isPreISelGenericFloatingPointOpcode(UseOpc) ||
-            // Check if we feed a copy-like instruction with
-            // floating point constraints. In that case, we are still
-            // feeding fp instructions, but indirectly
-            // (e.g., through ABI copies).
-            ((UseOpc == TargetOpcode::COPY || UseMI.isPHI()) &&
-             getRegBank(UseMI.getOperand(0).getReg(), MRI, TRI) ==
-                 &AArch64::FPRRegBank)) {
+        if (onlyUsesFP(UseMI, MRI, TRI)) {
           OpRegBankIdx[0] = PMI_FirstFPR;
           break;
         }
@@ -621,18 +691,134 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       if (!VReg)
         break;
       MachineInstr *DefMI = MRI.getVRegDef(VReg);
-      unsigned DefOpc = DefMI->getOpcode();
-      if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
-          // Check if we come from a copy-like instruction with
-          // floating point constraints. In that case, we are still
-          // fed by fp instructions, but indirectly
-          // (e.g., through ABI copies).
-          ((DefOpc == TargetOpcode::COPY || DefMI->isPHI()) &&
-           getRegBank(DefMI->getOperand(0).getReg(), MRI, TRI) ==
-               &AArch64::FPRRegBank))
+      if (onlyDefinesFP(*DefMI, MRI, TRI))
         OpRegBankIdx[0] = PMI_FirstFPR;
       break;
     }
+    break;
+  case TargetOpcode::G_SELECT: {
+    // If the destination is FPR, preserve that.
+    if (OpRegBankIdx[0] != PMI_FirstGPR)
+      break;
+
+    // If we're taking in vectors, we have no choice but to put everything on
+    // FPRs.
+    LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
+    if (SrcTy.isVector()) {
+      for (unsigned Idx = 0; Idx < 4; ++Idx)
+        OpRegBankIdx[Idx] = PMI_FirstFPR;
+      break;
+    }
+
+    // Try to minimize the number of copies. If we have more floating point
+    // constrained values than not, then we'll put everything on FPR. Otherwise,
+    // everything has to be on GPR.
+    unsigned NumFP = 0;
+
+    // Check if the uses of the result always produce floating point values.
+    //
+    // For example:
+    //
+    // %z = G_SELECT %cond %x %y
+    // fpr = G_FOO %z ...
+    if (any_of(
+            MRI.use_instructions(MI.getOperand(0).getReg()),
+            [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
+      ++NumFP;
+
+    // Check if the defs of the source values always produce floating point
+    // values.
+    //
+    // For example:
+    //
+    // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
+    // %z = G_SELECT %cond %x %y
+    //
+    // Also check whether or not the sources have already been decided to be
+    // FPR. Keep track of this.
+    //
+    // This doesn't check the condition, since it's just whatever is in NZCV.
+    // This isn't passed explicitly in a register to fcsel/csel.
+    for (unsigned Idx = 2; Idx < 4; ++Idx) {
+      unsigned VReg = MI.getOperand(Idx).getReg();
+      MachineInstr *DefMI = MRI.getVRegDef(VReg);
+      if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
+          onlyDefinesFP(*DefMI, MRI, TRI))
+        ++NumFP;
+    }
+
+    // If we have more FP constraints than not, then move everything over to
+    // FPR.
+    if (NumFP >= 2)
+      for (unsigned Idx = 0; Idx < 4; ++Idx)
+        OpRegBankIdx[Idx] = PMI_FirstFPR;
+
+    break;
+  }
+  case TargetOpcode::G_UNMERGE_VALUES: {
+    // If the first operand belongs to a FPR register bank, then make sure that
+    // we preserve that.
+    if (OpRegBankIdx[0] != PMI_FirstGPR)
+      break;
+
+    LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
+    // UNMERGE into scalars from a vector should always use FPR.
+    // Likewise if any of the uses are FP instructions.
+    if (SrcTy.isVector() ||
+        any_of(MRI.use_instructions(MI.getOperand(0).getReg()),
+               [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
+      // Set the register bank of every operand to FPR.
+      for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
+           Idx < NumOperands; ++Idx)
+        OpRegBankIdx[Idx] = PMI_FirstFPR;
+    }
+    break;
+  }
+  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+    // Destination and source need to be FPRs.
+    OpRegBankIdx[0] = PMI_FirstFPR;
+    OpRegBankIdx[1] = PMI_FirstFPR;
+
+    // Index needs to be a GPR.
+    OpRegBankIdx[2] = PMI_FirstGPR;
+    break;
+  case TargetOpcode::G_INSERT_VECTOR_ELT:
+    OpRegBankIdx[0] = PMI_FirstFPR;
+    OpRegBankIdx[1] = PMI_FirstFPR;
+
+    // The element may be either a GPR or FPR. Preserve that behaviour.
+    if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank)
+      OpRegBankIdx[2] = PMI_FirstFPR;
+    else
+      OpRegBankIdx[2] = PMI_FirstGPR;
+
+    // Index needs to be a GPR.
+    OpRegBankIdx[3] = PMI_FirstGPR;
+    break;
+  case TargetOpcode::G_BUILD_VECTOR:
+    // If the first source operand belongs to a FPR register bank, then make
+    // sure that we preserve that.
+    if (OpRegBankIdx[1] != PMI_FirstGPR)
+      break;
+    unsigned VReg = MI.getOperand(1).getReg();
+    if (!VReg)
+      break;
+
+    // Get the instruction that defined the source operand reg, and check if
+    // it's a floating point operation. Or, if it's a type like s16 which
+    // doesn't have a exact size gpr register class.
+    MachineInstr *DefMI = MRI.getVRegDef(VReg);
+    unsigned DefOpc = DefMI->getOpcode();
+    const LLT SrcTy = MRI.getType(VReg);
+    if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
+        SrcTy.getSizeInBits() < 32) {
+      // Have a floating point op.
+      // Make sure every operand gets mapped to a FPR register class.
+      unsigned NumOperands = MI.getNumOperands();
+      for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
+        OpRegBankIdx[Idx] = PMI_FirstFPR;
+    }
+    break;
   }
 
   // Finally construct the computed mapping.
diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.h b/lib/Target/AArch64/AArch64RegisterBankInfo.h
index 008221dbef58..016fed65eb2a 100644
--- a/lib/Target/AArch64/AArch64RegisterBankInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterBankInfo.h
@@ -1,9 +1,8 @@
 //===- AArch64RegisterBankInfo -----------------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -58,6 +57,7 @@ protected:
     FPExt16To64Idx = 43,
     FPExt32To64Idx = 45,
     FPExt64To128Idx = 47,
+    Shift64Imm = 49
   };
 
   static bool checkPartialMap(unsigned Idx, unsigned ValStartIdx,
@@ -114,6 +114,18 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
   const InstructionMapping &
   getSameKindOfOperandsMapping(const MachineInstr &MI) const;
 
+  /// Returns true if the output of \p MI must be stored on a FPR register.
+  bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+                     const TargetRegisterInfo &TRI) const;
+
+  /// Returns true if the source registers of \p MI must all be FPRs.
+  bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+                  const TargetRegisterInfo &TRI) const;
+
+  /// Returns true if the destination register of \p MI must be a FPR.
+  bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+                     const TargetRegisterInfo &TRI) const;
+
 public:
   AArch64RegisterBankInfo(const TargetRegisterInfo &TRI);
 
diff --git a/lib/Target/AArch64/AArch64RegisterBanks.td b/lib/Target/AArch64/AArch64RegisterBanks.td
index eee584708f69..7bbd992890d1 100644
--- a/lib/Target/AArch64/AArch64RegisterBanks.td
+++ b/lib/Target/AArch64/AArch64RegisterBanks.td
@@ -1,9 +1,8 @@
 //=- AArch64RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 96ae45ae3d0d..6d5a4e3d2f76 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===- AArch64RegisterInfo.cpp - AArch64 Register Information -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -217,11 +216,8 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
 }
 
 bool AArch64RegisterInfo::isAnyArgRegReserved(const MachineFunction &MF) const {
-  // FIXME: Get the list of argument registers from TableGen.
-  static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
-                                          AArch64::X3, AArch64::X4, AArch64::X5,
-                                          AArch64::X6, AArch64::X7 };
-  return std::any_of(std::begin(GPRArgRegs), std::end(GPRArgRegs),
+  return std::any_of(std::begin(*AArch64::GPR64argRegClass.MC),
+                     std::end(*AArch64::GPR64argRegClass.MC),
                      [this, &MF](MCPhysReg r){return isReservedReg(MF, r);});
 }
 
@@ -283,7 +279,7 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   return false;
 }
 
-unsigned
+Register
 AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const AArch64FrameLowering *TFI = getFrameLowering(MF);
   return TFI->hasFP(MF) ? AArch64::FP : AArch64::SP;
@@ -457,15 +453,34 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STACKMAP ||
       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
     Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg,
-                                             /*PreferFP=*/true);
+                                             /*PreferFP=*/true,
+                                             /*ForSimm=*/false);
     Offset += MI.getOperand(FIOperandNum + 1).getImm();
     MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
     MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
     return;
   }
 
+  if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) {
+    MachineOperand &FI = MI.getOperand(FIOperandNum);
+    Offset = TFI->getNonLocalFrameIndexReference(MF, FrameIndex);
+    FI.ChangeToImmediate(Offset);
+    return;
+  }
+
+  if (MI.getOpcode() == AArch64::TAGPstack) {
+    // TAGPstack must use the virtual frame register in its 3rd operand.
+    const MachineFrameInfo &MFI = MF.getFrameInfo();
+    const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+    FrameReg = MI.getOperand(3).getReg();
+    Offset =
+        MFI.getObjectOffset(FrameIndex) + AFI->getTaggedBasePointerOffset();
+  } else {
+    Offset = TFI->resolveFrameIndexReference(
+        MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
+  }
+
   // Modify MI as necessary to handle as much of 'Offset' as possible
-  Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg);
   if (rewriteAArch64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
     return;
 
@@ -519,3 +534,13 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
     return 16;
   }
 }
+
+unsigned AArch64RegisterInfo::getLocalAddressRegister(
+  const MachineFunction &MF) const {
+  const auto &MFI = MF.getFrameInfo();
+  if (!MF.hasEHFunclets() && !MFI.hasVarSizedObjects())
+    return AArch64::SP;
+  else if (needsStackRealignment(MF))
+    return getBaseRegister();
+  return getFrameRegister(MF);
+}
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h
index c4153228a7c0..2c3f82c530d8 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -1,9 +1,8 @@
 //==- AArch64RegisterInfo.h - AArch64 Register Information Impl --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -114,7 +113,7 @@ public:
   unsigned getBaseRegister() const;
 
   // Debug information queries.
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
+  Register getFrameRegister(const MachineFunction &MF) const override;
 
   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                                MachineFunction &MF) const override;
@@ -122,6 +121,8 @@ public:
   bool trackLivenessAfterRegAlloc(const MachineFunction&) const override {
     return true;
   }
+
+  unsigned getLocalAddressRegister(const MachineFunction &MF) const;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
index d3710cea0687..61fc0795c242 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -1,9 +1,8 @@
 //=- AArch64RegisterInfo.td - Describe the AArch64 Registers -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -188,6 +187,10 @@ def GPR64z : RegisterOperand<GPR64> {
   let GIZeroRegister = XZR;
 }
 
+// GPR argument registers.
+def GPR32arg : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 7)>;
+def GPR64arg : RegisterClass<"AArch64", [i64], 64, (sequence "X%u", 0, 7)>;
+
 // GPR register classes which include WZR/XZR AND SP/WSP. This is not a
 // constraint used by any instructions, it is used as a common super-class.
 def GPR32all : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR, WSP)>;
@@ -206,6 +209,11 @@ def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X2
 // BTI-protected function.
 def rtcGPR64 : RegisterClass<"AArch64", [i64], 64, (add X16, X17)>;
 
+// Register set that excludes registers that are reserved for procedure calls.
+// This is used for pseudo-instructions that are actually implemented using a
+// procedure call.
+def GPR64noip : RegisterClass<"AArch64", [i64], 64, (sub GPR64, X16, X17, LR)>;
+
 // GPR register classes for post increment amount of vector load/store that
 // has alternate printing when Rm=31 and prints a constant immediate value
 // equal to the total number of bytes transferred.
@@ -649,10 +657,12 @@ def FPR128Op : RegisterOperand<FPR128, "printOperand"> {
 // ARMv8.1a atomic CASP register operands
 
 
-def WSeqPairs : RegisterTuples<[sube32, subo32], 
-                               [(rotl GPR32, 0), (rotl GPR32, 1)]>;
-def XSeqPairs : RegisterTuples<[sube64, subo64], 
-                               [(rotl GPR64, 0), (rotl GPR64, 1)]>;
+def WSeqPairs : RegisterTuples<[sube32, subo32],
+                               [(decimate (rotl GPR32, 0), 2),
+                                (decimate (rotl GPR32, 1), 2)]>;
+def XSeqPairs : RegisterTuples<[sube64, subo64],
+                               [(decimate (rotl GPR64, 0), 2),
+                                (decimate (rotl GPR64, 1), 2)]>;
 
 def WSeqPairsClass   : RegisterClass<"AArch64", [untyped], 32, 
                                      (add WSeqPairs)>{
diff --git a/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp b/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
index af555f6d2266..854670079e40 100644
--- a/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
+++ b/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
@@ -1,8 +1,7 @@
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/AArch64SVEInstrInfo.td b/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 0fde68011e86..79ab42f4c080 100644
--- a/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1,9 +1,8 @@
 //=- AArch64SVEInstrInfo.td -  AArch64 SVE Instructions -*- tablegen -*-----=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -26,10 +25,10 @@ let Predicates = [HasSVE] in {
   defm SQSUB_ZZZ : sve_int_bin_cons_arit_0<0b110, "sqsub">;
   defm UQSUB_ZZZ : sve_int_bin_cons_arit_0<0b111, "uqsub">;
 
-  def AND_ZZZ : sve_int_bin_cons_log<0b00, "and">;
-  def ORR_ZZZ : sve_int_bin_cons_log<0b01, "orr">;
-  def EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor">;
-  def BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic">;
+  defm AND_ZZZ : sve_int_bin_cons_log<0b00, "and">;
+  defm ORR_ZZZ : sve_int_bin_cons_log<0b01, "orr">;
+  defm EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor">;
+  defm BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic">;
 
   defm ADD_ZPmZ   : sve_int_bin_pred_arit_0<0b000, "add">;
   defm SUB_ZPmZ   : sve_int_bin_pred_arit_0<0b001, "sub">;
@@ -876,10 +875,10 @@ let Predicates = [HasSVE] in {
   defm LSL_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b11, "lsl">;
 
   // Predicated shifts
-  defm ASR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b000, "asr">;
-  defm LSR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b001, "lsr">;
-  defm LSL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b011, "lsl">;
-  defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b100, "asrd">;
+  defm ASR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b0000, "asr">;
+  defm LSR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b0001, "lsr">;
+  defm LSL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">;
+  defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd">;
 
   defm ASR_ZPmZ  : sve_int_bin_pred_shift<0b000, "asr">;
   defm LSR_ZPmZ  : sve_int_bin_pred_shift<0b001, "lsr">;
@@ -1022,3 +1021,406 @@ let Predicates = [HasSVE] in {
   def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn",
                   (FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
 }
+
+let Predicates = [HasSVE2] in {
+  // SVE2 integer multiply-add (indexed)
+  defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla">;
+  defm MLS_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b1, "mls">;
+
+  // SVE2 saturating multiply-add high (indexed)
+  defm SQRDMLAH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b0, "sqrdmlah">;
+  defm SQRDMLSH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b1, "sqrdmlsh">;
+
+  // SVE2 saturating multiply-add high (vectors, unpredicated)
+  defm SQRDMLAH_ZZZ : sve2_int_mla<0b0, "sqrdmlah">;
+  defm SQRDMLSH_ZZZ : sve2_int_mla<0b1, "sqrdmlsh">;
+
+  // SVE2 integer multiply (indexed)
+  defm MUL_ZZZI : sve2_int_mul_by_indexed_elem<0b1110, "mul">;
+
+  // SVE2 saturating multiply high (indexed)
+  defm SQDMULH_ZZZI  : sve2_int_mul_by_indexed_elem<0b1100, "sqdmulh">;
+  defm SQRDMULH_ZZZI : sve2_int_mul_by_indexed_elem<0b1101, "sqrdmulh">;
+
+  // SVE2 signed saturating doubling multiply high (unpredicated)
+  defm SQDMULH_ZZZ  : sve2_int_mul<0b100, "sqdmulh">;
+  defm SQRDMULH_ZZZ : sve2_int_mul<0b101, "sqrdmulh">;
+
+  // SVE2 integer multiply vectors (unpredicated)
+  defm MUL_ZZZ    : sve2_int_mul<0b000, "mul">;
+  defm SMULH_ZZZ  : sve2_int_mul<0b010, "smulh">;
+  defm UMULH_ZZZ  : sve2_int_mul<0b011, "umulh">;
+  def  PMUL_ZZZ_B : sve2_int_mul<0b00, 0b001, "pmul", ZPR8>;
+
+  // SVE2 complex integer dot product (indexed)
+  defm CDOT_ZZZI : sve2_cintx_dot_by_indexed_elem<"cdot">;
+
+  // SVE2 complex integer dot product
+  defm CDOT_ZZZ : sve2_cintx_dot<"cdot">;
+
+  // SVE2 complex integer multiply-add (indexed)
+  defm CMLA_ZZZI      : sve2_cmla_by_indexed_elem<0b0, "cmla">;
+  // SVE2 complex saturating multiply-add (indexed)
+  defm SQRDCMLAH_ZZZI : sve2_cmla_by_indexed_elem<0b1, "sqrdcmlah">;
+
+  // SVE2 complex integer multiply-add
+  defm CMLA_ZZZ      : sve2_int_cmla<0b0, "cmla">;
+  defm SQRDCMLAH_ZZZ : sve2_int_cmla<0b1, "sqrdcmlah">;
+
+  // SVE2 integer multiply long (indexed)
+  defm SMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b000, "smullb">;
+  defm SMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b001, "smullt">;
+  defm UMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b010, "umullb">;
+  defm UMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b011, "umullt">;
+
+  // SVE2 saturating multiply (indexed)
+  defm SQDMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b100, "sqdmullb">;
+  defm SQDMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b101, "sqdmullt">;
+
+  // SVE2 integer multiply-add long (indexed)
+  defm SMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1000, "smlalb">;
+  defm SMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1001, "smlalt">;
+  defm UMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1010, "umlalb">;
+  defm UMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1011, "umlalt">;
+  defm SMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1100, "smlslb">;
+  defm SMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1101, "smlslt">;
+  defm UMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1110, "umlslb">;
+  defm UMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1111, "umlslt">;
+
+  // SVE2 integer multiply-add long (vectors, unpredicated)
+  defm SMLALB_ZZZ : sve2_int_mla_long<0b10000, "smlalb">;
+  defm SMLALT_ZZZ : sve2_int_mla_long<0b10001, "smlalt">;
+  defm UMLALB_ZZZ : sve2_int_mla_long<0b10010, "umlalb">;
+  defm UMLALT_ZZZ : sve2_int_mla_long<0b10011, "umlalt">;
+  defm SMLSLB_ZZZ : sve2_int_mla_long<0b10100, "smlslb">;
+  defm SMLSLT_ZZZ : sve2_int_mla_long<0b10101, "smlslt">;
+  defm UMLSLB_ZZZ : sve2_int_mla_long<0b10110, "umlslb">;
+  defm UMLSLT_ZZZ : sve2_int_mla_long<0b10111, "umlslt">;
+
+  // SVE2 saturating multiply-add long (indexed)
+  defm SQDMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0100, "sqdmlalb">;
+  defm SQDMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0101, "sqdmlalt">;
+  defm SQDMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0110, "sqdmlslb">;
+  defm SQDMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0111, "sqdmlslt">;
+
+  // SVE2 saturating multiply-add long (vectors, unpredicated)
+  defm SQDMLALB_ZZZ : sve2_int_mla_long<0b11000, "sqdmlalb">;
+  defm SQDMLALT_ZZZ : sve2_int_mla_long<0b11001, "sqdmlalt">;
+  defm SQDMLSLB_ZZZ : sve2_int_mla_long<0b11010, "sqdmlslb">;
+  defm SQDMLSLT_ZZZ : sve2_int_mla_long<0b11011, "sqdmlslt">;
+
+  // SVE2 saturating multiply-add interleaved long
+  defm SQDMLALBT_ZZZ : sve2_int_mla_long<0b00010, "sqdmlalbt">;
+  defm SQDMLSLBT_ZZZ : sve2_int_mla_long<0b00011, "sqdmlslbt">;
+
+  // SVE2 integer halving add/subtract (predicated)
+  defm SHADD_ZPmZ  : sve2_int_arith_pred<0b100000, "shadd">;
+  defm UHADD_ZPmZ  : sve2_int_arith_pred<0b100010, "uhadd">;
+  defm SHSUB_ZPmZ  : sve2_int_arith_pred<0b100100, "shsub">;
+  defm UHSUB_ZPmZ  : sve2_int_arith_pred<0b100110, "uhsub">;
+  defm SRHADD_ZPmZ : sve2_int_arith_pred<0b101000, "srhadd">;
+  defm URHADD_ZPmZ : sve2_int_arith_pred<0b101010, "urhadd">;
+  defm SHSUBR_ZPmZ : sve2_int_arith_pred<0b101100, "shsubr">;
+  defm UHSUBR_ZPmZ : sve2_int_arith_pred<0b101110, "uhsubr">;
+
+  // SVE2 integer pairwise add and accumulate long
+  defm SADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<0, "sadalp">;
+  defm UADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<1, "uadalp">;
+
+  // SVE2 integer pairwise arithmetic
+  defm ADDP_ZPmZ  : sve2_int_arith_pred<0b100011, "addp">;
+  defm SMAXP_ZPmZ : sve2_int_arith_pred<0b101001, "smaxp">;
+  defm UMAXP_ZPmZ : sve2_int_arith_pred<0b101011, "umaxp">;
+  defm SMINP_ZPmZ : sve2_int_arith_pred<0b101101, "sminp">;
+  defm UMINP_ZPmZ : sve2_int_arith_pred<0b101111, "uminp">;
+
+  // SVE2 integer unary operations (predicated)
+  defm URECPE_ZPmZ  : sve2_int_un_pred_arit_s<0b000, "urecpe">;
+  defm URSQRTE_ZPmZ : sve2_int_un_pred_arit_s<0b001, "ursqrte">;
+  defm SQABS_ZPmZ   : sve2_int_un_pred_arit<0b100, "sqabs">;
+  defm SQNEG_ZPmZ   : sve2_int_un_pred_arit<0b101, "sqneg">;
+
+  // SVE2 saturating add/subtract
+  defm SQADD_ZPmZ  : sve2_int_arith_pred<0b110000, "sqadd">;
+  defm UQADD_ZPmZ  : sve2_int_arith_pred<0b110010, "uqadd">;
+  defm SQSUB_ZPmZ  : sve2_int_arith_pred<0b110100, "sqsub">;
+  defm UQSUB_ZPmZ  : sve2_int_arith_pred<0b110110, "uqsub">;
+  defm SUQADD_ZPmZ : sve2_int_arith_pred<0b111000, "suqadd">;
+  defm USQADD_ZPmZ : sve2_int_arith_pred<0b111010, "usqadd">;
+  defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr">;
+  defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr">;
+
+  // SVE2 saturating/rounding bitwise shift left (predicated)
+  defm SRSHL_ZPmZ   : sve2_int_arith_pred<0b000100, "srshl">;
+  defm URSHL_ZPmZ   : sve2_int_arith_pred<0b000110, "urshl">;
+  defm SRSHLR_ZPmZ  : sve2_int_arith_pred<0b001100, "srshlr">;
+  defm URSHLR_ZPmZ  : sve2_int_arith_pred<0b001110, "urshlr">;
+  defm SQSHL_ZPmZ   : sve2_int_arith_pred<0b010000, "sqshl">;
+  defm UQSHL_ZPmZ   : sve2_int_arith_pred<0b010010, "uqshl">;
+  defm SQRSHL_ZPmZ  : sve2_int_arith_pred<0b010100, "sqrshl">;
+  defm UQRSHL_ZPmZ  : sve2_int_arith_pred<0b010110, "uqrshl">;
+  defm SQSHLR_ZPmZ  : sve2_int_arith_pred<0b011000, "sqshlr">;
+  defm UQSHLR_ZPmZ  : sve2_int_arith_pred<0b011010, "uqshlr">;
+  defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr">;
+  defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr">;
+
+  // SVE2 integer add/subtract long
+  defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb">;
+  defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt">;
+  defm UADDLB_ZZZ : sve2_wide_int_arith_long<0b00010, "uaddlb">;
+  defm UADDLT_ZZZ : sve2_wide_int_arith_long<0b00011, "uaddlt">;
+  defm SSUBLB_ZZZ : sve2_wide_int_arith_long<0b00100, "ssublb">;
+  defm SSUBLT_ZZZ : sve2_wide_int_arith_long<0b00101, "ssublt">;
+  defm USUBLB_ZZZ : sve2_wide_int_arith_long<0b00110, "usublb">;
+  defm USUBLT_ZZZ : sve2_wide_int_arith_long<0b00111, "usublt">;
+  defm SABDLB_ZZZ : sve2_wide_int_arith_long<0b01100, "sabdlb">;
+  defm SABDLT_ZZZ : sve2_wide_int_arith_long<0b01101, "sabdlt">;
+  defm UABDLB_ZZZ : sve2_wide_int_arith_long<0b01110, "uabdlb">;
+  defm UABDLT_ZZZ : sve2_wide_int_arith_long<0b01111, "uabdlt">;
+
+  // SVE2 integer add/subtract wide
+  defm SADDWB_ZZZ : sve2_wide_int_arith_wide<0b000, "saddwb">;
+  defm SADDWT_ZZZ : sve2_wide_int_arith_wide<0b001, "saddwt">;
+  defm UADDWB_ZZZ : sve2_wide_int_arith_wide<0b010, "uaddwb">;
+  defm UADDWT_ZZZ : sve2_wide_int_arith_wide<0b011, "uaddwt">;
+  defm SSUBWB_ZZZ : sve2_wide_int_arith_wide<0b100, "ssubwb">;
+  defm SSUBWT_ZZZ : sve2_wide_int_arith_wide<0b101, "ssubwt">;
+  defm USUBWB_ZZZ : sve2_wide_int_arith_wide<0b110, "usubwb">;
+  defm USUBWT_ZZZ : sve2_wide_int_arith_wide<0b111, "usubwt">;
+
+  // SVE2 integer multiply long
+  defm SQDMULLB_ZZZ : sve2_wide_int_arith_long<0b11000, "sqdmullb">;
+  defm SQDMULLT_ZZZ : sve2_wide_int_arith_long<0b11001, "sqdmullt">;
+  defm SMULLB_ZZZ   : sve2_wide_int_arith_long<0b11100, "smullb">;
+  defm SMULLT_ZZZ   : sve2_wide_int_arith_long<0b11101, "smullt">;
+  defm UMULLB_ZZZ   : sve2_wide_int_arith_long<0b11110, "umullb">;
+  defm UMULLT_ZZZ   : sve2_wide_int_arith_long<0b11111, "umullt">;
+  defm PMULLB_ZZZ   : sve2_pmul_long<0b0, "pmullb">;
+  defm PMULLT_ZZZ   : sve2_pmul_long<0b1, "pmullt">;
+
+  // SVE2 bitwise shift and insert
+  defm SRI_ZZI : sve2_int_bin_cons_shift_imm_right<0b0, "sri">;
+  defm SLI_ZZI : sve2_int_bin_cons_shift_imm_left< 0b1, "sli">;
+
+  // SVE2 bitwise shift right and accumulate
+  defm SSRA_ZZI  : sve2_int_bin_accum_cons_shift_imm_right<0b00, "ssra">;
+  defm USRA_ZZI  : sve2_int_bin_accum_cons_shift_imm_right<0b01, "usra">;
+  defm SRSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b10, "srsra">;
+  defm URSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b11, "ursra">;
+
+  // SVE2 complex integer add
+  defm CADD_ZZI   : sve2_int_cadd<0b0, "cadd">;
+  defm SQCADD_ZZI : sve2_int_cadd<0b1, "sqcadd">;
+
+  // SVE2 integer absolute difference and accumulate
+  defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba">;
+  defm UABA_ZZZ : sve2_int_absdiff_accum<0b1, "uaba">;
+
+  // SVE2 integer absolute difference and accumulate long
+  defm SABALB_ZZZ : sve2_int_absdiff_accum_long<0b00, "sabalb">;
+  defm SABALT_ZZZ : sve2_int_absdiff_accum_long<0b01, "sabalt">;
+  defm UABALB_ZZZ : sve2_int_absdiff_accum_long<0b10, "uabalb">;
+  defm UABALT_ZZZ : sve2_int_absdiff_accum_long<0b11, "uabalt">;
+
+  // SVE2 integer add/subtract long with carry
+  defm ADCLB_ZZZ : sve2_int_addsub_long_carry<0b00, "adclb">;
+  defm ADCLT_ZZZ : sve2_int_addsub_long_carry<0b01, "adclt">;
+  defm SBCLB_ZZZ : sve2_int_addsub_long_carry<0b10, "sbclb">;
+  defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">;
+
+  // SVE2 bitwise shift right narrow
+  defm SQSHRUNB_ZZI  : sve2_int_bin_cons_shift_imm_right_narrow<0b0000, "sqshrunb">;
+  defm SQSHRUNT_ZZI  : sve2_int_bin_cons_shift_imm_right_narrow<0b0001, "sqshrunt">;
+  defm SQRSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0010, "sqrshrunb">;
+  defm SQRSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0011, "sqrshrunt">;
+  defm SHRNB_ZZI     : sve2_int_bin_cons_shift_imm_right_narrow<0b0100, "shrnb">;
+  defm SHRNT_ZZI     : sve2_int_bin_cons_shift_imm_right_narrow<0b0101, "shrnt">;
+  defm RSHRNB_ZZI    : sve2_int_bin_cons_shift_imm_right_narrow<0b0110, "rshrnb">;
+  defm RSHRNT_ZZI    : sve2_int_bin_cons_shift_imm_right_narrow<0b0111, "rshrnt">;
+  defm SQSHRNB_ZZI   : sve2_int_bin_cons_shift_imm_right_narrow<0b1000, "sqshrnb">;
+  defm SQSHRNT_ZZI   : sve2_int_bin_cons_shift_imm_right_narrow<0b1001, "sqshrnt">;
+  defm SQRSHRNB_ZZI  : sve2_int_bin_cons_shift_imm_right_narrow<0b1010, "sqrshrnb">;
+  defm SQRSHRNT_ZZI  : sve2_int_bin_cons_shift_imm_right_narrow<0b1011, "sqrshrnt">;
+  defm UQSHRNB_ZZI   : sve2_int_bin_cons_shift_imm_right_narrow<0b1100, "uqshrnb">;
+  defm UQSHRNT_ZZI   : sve2_int_bin_cons_shift_imm_right_narrow<0b1101, "uqshrnt">;
+  defm UQRSHRNB_ZZI  : sve2_int_bin_cons_shift_imm_right_narrow<0b1110, "uqrshrnb">;
+  defm UQRSHRNT_ZZI  : sve2_int_bin_cons_shift_imm_right_narrow<0b1111, "uqrshrnt">;
+
+  // SVE2 integer add/subtract narrow high part
+  defm ADDHNB_ZZZ  : sve2_int_addsub_narrow_high<0b000, "addhnb">;
+  defm ADDHNT_ZZZ  : sve2_int_addsub_narrow_high<0b001, "addhnt">;
+  defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b010, "raddhnb">;
+  defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b011, "raddhnt">;
+  defm SUBHNB_ZZZ  : sve2_int_addsub_narrow_high<0b100, "subhnb">;
+  defm SUBHNT_ZZZ  : sve2_int_addsub_narrow_high<0b101, "subhnt">;
+  defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b110, "rsubhnb">;
+  defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b111, "rsubhnt">;
+
+  // SVE2 saturating extract narrow
+  defm SQXTNB_ZZ  : sve2_int_sat_extract_narrow<0b000, "sqxtnb">;
+  defm SQXTNT_ZZ  : sve2_int_sat_extract_narrow<0b001, "sqxtnt">;
+  defm UQXTNB_ZZ  : sve2_int_sat_extract_narrow<0b010, "uqxtnb">;
+  defm UQXTNT_ZZ  : sve2_int_sat_extract_narrow<0b011, "uqxtnt">;
+  defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow<0b100, "sqxtunb">;
+  defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow<0b101, "sqxtunt">;
+
+  // SVE2 character match
+  defm MATCH_PPzZZ  : sve2_char_match<0b0, "match">;
+  defm NMATCH_PPzZZ : sve2_char_match<0b1, "nmatch">;
+
+  // SVE2 bitwise exclusive-or interleaved
+  defm EORBT_ZZZ : sve2_bitwise_xor_interleaved<0b0, "eorbt">;
+  defm EORTB_ZZZ : sve2_bitwise_xor_interleaved<0b1, "eortb">;
+
+  // SVE2 bitwise shift left long
+  defm SSHLLB_ZZI : sve2_bitwise_shift_left_long<0b00, "sshllb">;
+  defm SSHLLT_ZZI : sve2_bitwise_shift_left_long<0b01, "sshllt">;
+  defm USHLLB_ZZI : sve2_bitwise_shift_left_long<0b10, "ushllb">;
+  defm USHLLT_ZZI : sve2_bitwise_shift_left_long<0b11, "ushllt">;
+
+  // SVE2 integer add/subtract interleaved long
+  defm SADDLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b00, "saddlbt">;
+  defm SSUBLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b10, "ssublbt">;
+  defm SSUBLTB_ZZZ : sve2_misc_int_addsub_long_interleaved<0b11, "ssubltb">;
+
+  // SVE2 histogram generation (segment)
+  def HISTSEG_ZZZ : sve2_hist_gen_segment<"histseg">;
+
+  // SVE2 histogram generation (vector)
+  defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt">;
+
+  // SVE2 floating-point convert precision
+  defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtxnt">;
+  defm FCVTNT_ZPmZ  : sve2_fp_convert_down_narrow<"fcvtnt">;
+  defm FCVTLT_ZPmZ  : sve2_fp_convert_up_long<"fcvtlt">;
+
+  // SVE2 floating-point pairwise operations
+  defm FADDP_ZPmZZ   : sve2_fp_pairwise_pred<0b000, "faddp">;
+  defm FMAXNMP_ZPmZZ : sve2_fp_pairwise_pred<0b100, "fmaxnmp">;
+  defm FMINNMP_ZPmZZ : sve2_fp_pairwise_pred<0b101, "fminnmp">;
+  defm FMAXP_ZPmZZ   : sve2_fp_pairwise_pred<0b110, "fmaxp">;
+  defm FMINP_ZPmZZ   : sve2_fp_pairwise_pred<0b111, "fminp">;
+
+  // SVE2 floating-point multiply-add long (indexed)
+  def FMLALB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b00, "fmlalb">;
+  def FMLALT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b01, "fmlalt">;
+  def FMLSLB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b10, "fmlslb">;
+  def FMLSLT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b11, "fmlslt">;
+
+  // SVE2 floating-point multiply-add long
+  def FMLALB_ZZZ_SHH : sve2_fp_mla_long<0b00, "fmlalb">;
+  def FMLALT_ZZZ_SHH : sve2_fp_mla_long<0b01, "fmlalt">;
+  def FMLSLB_ZZZ_SHH : sve2_fp_mla_long<0b10, "fmlslb">;
+  def FMLSLT_ZZZ_SHH : sve2_fp_mla_long<0b11, "fmlslt">;
+
+  // SVE2 bitwise ternary operations
+  defm EOR3_ZZZZ_D  : sve2_int_bitwise_ternary_op<0b000, "eor3">;
+  defm BCAX_ZZZZ_D  : sve2_int_bitwise_ternary_op<0b010, "bcax">;
+  def BSL_ZZZZ_D    : sve2_int_bitwise_ternary_op_d<0b001, "bsl">;
+  def BSL1N_ZZZZ_D  : sve2_int_bitwise_ternary_op_d<0b011, "bsl1n">;
+  def BSL2N_ZZZZ_D  : sve2_int_bitwise_ternary_op_d<0b101, "bsl2n">;
+  def NBSL_ZZZZ_D   : sve2_int_bitwise_ternary_op_d<0b111, "nbsl">;
+
+  // sve_int_rotate_imm
+  defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar">;
+
+  // SVE2 extract vector (immediate offset, constructive)
+  def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
+
+  // SVE floating-point convert precision
+  def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>;
+
+  // SVE floating-point convert to integer
+  defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">;
+
+  // Non-temporal contiguous loads (vector + register)
+  defm LDNT1SB_ZZR_S : sve2_mem_cldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>;
+  defm LDNT1B_ZZR_S  : sve2_mem_cldnt_vs<0b00001, "ldnt1b",  Z_s, ZPR32>;
+  defm LDNT1SH_ZZR_S : sve2_mem_cldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>;
+  defm LDNT1H_ZZR_S  : sve2_mem_cldnt_vs<0b00101, "ldnt1h",  Z_s, ZPR32>;
+  defm LDNT1W_ZZR_S  : sve2_mem_cldnt_vs<0b01001, "ldnt1w",  Z_s, ZPR32>;
+
+  defm LDNT1SB_ZZR_D : sve2_mem_cldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>;
+  defm LDNT1B_ZZR_D  : sve2_mem_cldnt_vs<0b10010, "ldnt1b",  Z_d, ZPR64>;
+  defm LDNT1SH_ZZR_D : sve2_mem_cldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>;
+  defm LDNT1H_ZZR_D  : sve2_mem_cldnt_vs<0b10110, "ldnt1h",  Z_d, ZPR64>;
+  defm LDNT1SW_ZZR_D : sve2_mem_cldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>;
+  defm LDNT1W_ZZR_D  : sve2_mem_cldnt_vs<0b11010, "ldnt1w",  Z_d, ZPR64>;
+  defm LDNT1D_ZZR_D  : sve2_mem_cldnt_vs<0b11110, "ldnt1d",  Z_d, ZPR64>;
+
+  // SVE2 vector splice (constructive)
+  defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;
+
+  // Predicated shifts
+  defm SQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
+  defm UQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
+  defm SRSHR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">;
+  defm URSHR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">;
+  defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">;
+
+  // Non-temporal contiguous stores (vector + register)
+  defm STNT1B_ZZR_S : sve2_mem_cstnt_vs<0b001, "stnt1b", Z_s, ZPR32>;
+  defm STNT1H_ZZR_S : sve2_mem_cstnt_vs<0b011, "stnt1h", Z_s, ZPR32>;
+  defm STNT1W_ZZR_S : sve2_mem_cstnt_vs<0b101, "stnt1w", Z_s, ZPR32>;
+
+  defm STNT1B_ZZR_D : sve2_mem_cstnt_vs<0b000, "stnt1b", Z_d, ZPR64>;
+  defm STNT1H_ZZR_D : sve2_mem_cstnt_vs<0b010, "stnt1h", Z_d, ZPR64>;
+  defm STNT1W_ZZR_D : sve2_mem_cstnt_vs<0b100, "stnt1w", Z_d, ZPR64>;
+  defm STNT1D_ZZR_D : sve2_mem_cstnt_vs<0b110, "stnt1d", Z_d, ZPR64>;
+
+  // SVE table lookup (three sources)
+  defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl">;
+  defm TBX_ZZZ  : sve2_int_perm_tbx<"tbx">;
+
+  // SVE integer compare scalar count and limit
+  defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege">;
+  defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt">;
+  defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs">;
+  defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi">;
+
+  defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege">;
+  defm WHILEGT_PXX : sve_int_while8_rr<0b001, "whilegt">;
+  defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs">;
+  defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi">;
+
+  // SVE pointer conflict compare
+  defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr">;
+  defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw">;
+}
+
+let Predicates = [HasSVE2AES] in {
+  // SVE2 crypto destructive binary operations
+  def AESE_ZZZ_B : sve2_crypto_des_bin_op<0b00, "aese", ZPR8>;
+  def AESD_ZZZ_B : sve2_crypto_des_bin_op<0b01, "aesd", ZPR8>;
+
+  // SVE2 crypto unary operations
+  def AESMC_ZZ_B  : sve2_crypto_unary_op<0b0, "aesmc">;
+  def AESIMC_ZZ_B : sve2_crypto_unary_op<0b1, "aesimc">;
+
+  // PMULLB and PMULLT instructions which operate with 64-bit source and
+  // 128-bit destination elements are enabled with crypto extensions, similar
+  // to NEON PMULL2 instruction.
+  def PMULLB_ZZZ_Q : sve2_wide_int_arith<0b00, 0b11010, "pmullb",
+                                         ZPR128, ZPR64, ZPR64>;
+  def PMULLT_ZZZ_Q : sve2_wide_int_arith<0b00, 0b11011, "pmullt",
+                                         ZPR128, ZPR64, ZPR64>;
+}
+
+let Predicates = [HasSVE2SM4] in {
+  // SVE2 crypto constructive binary operations
+  def SM4EKEY_ZZZ_S : sve2_crypto_cons_bin_op<0b0, "sm4ekey", ZPR32>;
+  // SVE2 crypto destructive binary operations
+  def SM4E_ZZZ_S : sve2_crypto_des_bin_op<0b10, "sm4e", ZPR32>;
+}
+
+let Predicates = [HasSVE2SHA3] in {
+  // SVE2 crypto constructive binary operations
+  def RAX1_ZZZ_D : sve2_crypto_cons_bin_op<0b1, "rax1",    ZPR64>;
+}
+
+let Predicates = [HasSVE2BitPerm] in {
+  // SVE2 bitwise permute
+  defm BEXT_ZZZ : sve2_misc_bitwise<0b1100, "bext">;
+  defm BDEP_ZZZ : sve2_misc_bitwise<0b1101, "bdep">;
+  defm BGRP_ZZZ : sve2_misc_bitwise<0b1110, "bgrp">;
+}
diff --git a/lib/Target/AArch64/AArch64SchedA53.td b/lib/Target/AArch64/AArch64SchedA53.td
index f253a4f3e25a..a6df0f3f083c 100644
--- a/lib/Target/AArch64/AArch64SchedA53.td
+++ b/lib/Target/AArch64/AArch64SchedA53.td
@@ -1,9 +1,8 @@
 //==- AArch64SchedA53.td - Cortex-A53 Scheduling Definitions -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -27,7 +26,7 @@ def CortexA53Model : SchedMachineModel {
                              // v 1.0 Spreadsheet
   let CompleteModel = 1;
 
-  list<Predicate> UnsupportedFeatures = [HasSVE];
+  list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
 }
 
 
diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td
index ade03f23f8c7..9f566d1c7079 100644
--- a/lib/Target/AArch64/AArch64SchedA57.td
+++ b/lib/Target/AArch64/AArch64SchedA57.td
@@ -1,9 +1,8 @@
 //=- AArch64SchedA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -32,7 +31,7 @@ def CortexA57Model : SchedMachineModel {
   let LoopMicroOpBufferSize = 16;
   let CompleteModel = 1;
 
-  list<Predicate> UnsupportedFeatures = [HasSVE];
+  list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64SchedA57WriteRes.td b/lib/Target/AArch64/AArch64SchedA57WriteRes.td
index 55005e1d9ed1..987ed3c4ebfb 100644
--- a/lib/Target/AArch64/AArch64SchedA57WriteRes.td
+++ b/lib/Target/AArch64/AArch64SchedA57WriteRes.td
@@ -1,9 +1,8 @@
 //=- AArch64SchedA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/AArch64SchedCyclone.td b/lib/Target/AArch64/AArch64SchedCyclone.td
index 7a474ba8ef9b..798ecb7508c0 100644
--- a/lib/Target/AArch64/AArch64SchedCyclone.td
+++ b/lib/Target/AArch64/AArch64SchedCyclone.td
@@ -1,9 +1,8 @@
 //=- AArch64SchedCyclone.td - Cyclone Scheduling Definitions -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,7 +18,7 @@ def CycloneModel : SchedMachineModel {
   let MispredictPenalty = 16; // 14-19 cycles are typical.
   let CompleteModel = 1;
 
-  list<Predicate> UnsupportedFeatures = [HasSVE];
+  list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64SchedExynosM1.td b/lib/Target/AArch64/AArch64SchedExynosM1.td
index f757d53b6c1c..f1e76e2c20d3 100644
--- a/lib/Target/AArch64/AArch64SchedExynosM1.td
+++ b/lib/Target/AArch64/AArch64SchedExynosM1.td
@@ -1,9 +1,8 @@
 //=- AArch64SchedExynosM1.td - Samsung Exynos M1 Sched Defs --*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -25,7 +24,7 @@ def ExynosM1Model : SchedMachineModel {
   let MispredictPenalty     = 14; // Minimum branch misprediction penalty.
   let CompleteModel         =  1; // Use the default model otherwise.
 
-  list<Predicate> UnsupportedFeatures = [HasSVE];
+  list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64SchedExynosM3.td b/lib/Target/AArch64/AArch64SchedExynosM3.td
index 15935088a17e..c9d29d75d9db 100644
--- a/lib/Target/AArch64/AArch64SchedExynosM3.td
+++ b/lib/Target/AArch64/AArch64SchedExynosM3.td
@@ -1,9 +1,8 @@
 //=- AArch64SchedExynosM3.td - Samsung Exynos M3 Sched Defs --*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -25,7 +24,7 @@ def ExynosM3Model : SchedMachineModel {
   let MispredictPenalty     =  16; // Minimum branch misprediction penalty.
   let CompleteModel         =   1; // Use the default model otherwise.
 
-  list<Predicate> UnsupportedFeatures = [HasSVE];
+  list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64SchedExynosM4.td b/lib/Target/AArch64/AArch64SchedExynosM4.td
index 4d892465b3f2..c8bf05f16131 100644
--- a/lib/Target/AArch64/AArch64SchedExynosM4.td
+++ b/lib/Target/AArch64/AArch64SchedExynosM4.td
@@ -1,9 +1,8 @@
 //=- AArch64SchedExynosM4.td - Samsung Exynos M4 Sched Defs --*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -25,7 +24,7 @@ def ExynosM4Model : SchedMachineModel {
   let MispredictPenalty     =  16; // Minimum branch misprediction penalty.
   let CompleteModel         =   1; // Use the default model otherwise.
 
-  list<Predicate> UnsupportedFeatures = [HasSVE];
+  list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
 }
 
 //===----------------------------------------------------------------------===//
@@ -239,7 +238,6 @@ def M4WriteNEONK   : SchedWriteRes<[M4UnitNSHF,
                                     M4UnitS0]>    { let Latency = 5;
                                                     let NumMicroOps = 2; }
 def M4WriteNEONL   : SchedWriteRes<[M4UnitNMUL]>  { let Latency = 3; }
-def M4WriteNEONM   : SchedWriteRes<[M4UnitNMUL]>  { let Latency = 3; }
 def M4WriteNEONN   : SchedWriteRes<[M4UnitNMSC,
                                     M4UnitNMSC]>  { let Latency = 5;
                                                     let NumMicroOps = 2; }
@@ -480,8 +478,6 @@ def M4WriteCOPY    : SchedWriteVariant<[SchedVar<ExynosFPPred, [M4WriteNALU1]>,
                                         SchedVar<NoSchedPred,  [M4WriteZ0]>]>;
 def M4WriteMOVI    : SchedWriteVariant<[SchedVar<IsZeroFPIdiomPred, [M4WriteZ0]>,
                                         SchedVar<NoSchedPred,       [M4WriteNALU1]>]>;
-def M4WriteMULL    : SchedWriteVariant<[SchedVar<ExynosLongVectorUpperPred, [M4WriteNEONM]>,
-                                        SchedVar<NoSchedPred,               [M4WriteNMUL3]>]>;
 
 // Fast forwarding.
 def M4ReadAESM1    : SchedReadAdvance<+1, [M4WriteNCRY1]>;
@@ -489,7 +485,8 @@ def M4ReadFMACM1   : SchedReadAdvance<+1, [M4WriteFMAC4,
                                            M4WriteFMAC4H,
                                            M4WriteFMAC5]>;
 def M4ReadNMULM1   : SchedReadAdvance<+1, [M4WriteNMUL3]>;
-def M4ReadMULLP2   : SchedReadAdvance<-2, [M4WriteNEONM]>;
+def M4ReadNMULP2   : SchedReadAdvance<-2, [M4WriteNMUL3]>;
+
 
 //===----------------------------------------------------------------------===//
 // Coarse scheduling model.
@@ -662,10 +659,8 @@ def : InstRW<[M4WriteNEONK],  (instregex "^FMOVDXHighr")>;
 def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev1f16")>;
 def : InstRW<[M4WriteFCVT3],  (instregex "^F(RECP|RSQRT)Ev1i(32|64)")>;
 def : InstRW<[M4WriteNMSC1],  (instregex "^FRECPXv1")>;
-def : InstRW<[M4WriteFMAC4H,
-              M4ReadFMACM1],  (instregex "^F(RECP|RSQRT)S16")>;
-def : InstRW<[M4WriteFMAC4,
-              M4ReadFMACM1],  (instregex "^F(RECP|RSQRT)S(32|64)")>;
+def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)S16")>;
+def : InstRW<[M4WriteFMAC4],  (instregex "^F(RECP|RSQRT)S(32|64)")>;
 
 // FP load instructions.
 def : InstRW<[WriteVLD],    (instregex "^LDR[SDQ]l")>;
@@ -736,14 +731,20 @@ def : InstRW<[M4WriteNALU1],  (instregex "^(AND|BIC|EOR|NOT|ORN|ORR)v")>;
 def : InstRW<[M4WriteNMSC1],  (instregex "^[SU](MIN|MAX)v")>;
 def : InstRW<[M4WriteNMSC2],  (instregex "^[SU](MIN|MAX)Pv")>;
 def : InstRW<[M4WriteNHAD3],  (instregex "^[SU](MIN|MAX)Vv")>;
-def : InstRW<[M4WriteNMUL3],  (instregex "^(SQR?D)?MULH?v")>;
 def : InstRW<[M4WriteNMUL3,
               M4ReadNMULM1],  (instregex "^ML[AS]v")>;
-def : InstRW<[M4WriteNMUL3],  (instregex "^SQRDML[AS]H")>;
-def : InstRW<[M4WriteMULL,
-              M4ReadMULLP2],  (instregex "^(S|U|SQD)ML[AS]Lv")>;
-def : InstRW<[M4WriteMULL,
-              M4ReadMULLP2],  (instregex "^(S|U|SQD)MULLv")>;
+def : InstRW<[M4WriteNMUL3,
+              M4ReadNMULM1],  (instregex "^(SQR?D)?MULH?v")>;
+def : InstRW<[M4WriteNMUL3,
+              M4ReadNMULM1],  (instregex "^SQRDML[AS]H")>;
+def : InstRW<[M4WriteNMUL3,
+              M4ReadNMULM1],  (instregex "^(S|U|SQD)ML[AS]L(v1(i32|i64)|v2i32|v4i16|v8i8)")>;
+def : InstRW<[M4WriteNMUL3,
+              M4ReadNMULP2],  (instregex "^(S|U|SQD)ML[AS]L(v4i32|v8i16|v16i8)")>;
+def : InstRW<[M4WriteNMUL3,
+              M4ReadNMULM1],  (instregex "^(S|U|SQD)MULL(v1(i32|i64)|v2i32|v4i16|v8i8)")>;
+def : InstRW<[M4WriteNMUL3,
+              M4ReadNMULP2],  (instregex "^(S|U|SQD)MULL(v4i32|v8i16|v16i8)")>;
 def : InstRW<[M4WriteNMUL3],  (instregex "^[SU]DOT(lane)?v")>;
 def : InstRW<[M4WriteNHAD3],  (instregex "^[SU]ADALPv")>;
 def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]R?SRA[dv]")>;
@@ -808,10 +809,8 @@ def : InstRW<[M4WriteNALU1],  (instregex "^FMOVv.f(32|64)")>;
 def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev[248]f16")>;
 def : InstRW<[M4WriteFCVT3],  (instregex "^F(RECP|RSQRT)Ev[248]f(32|64)")>;
 def : InstRW<[M4WriteFCVT3],  (instregex "^U(RECP|RSQRT)Ev[24]i32")>;
-def : InstRW<[M4WriteFMAC4H,
-              M4ReadFMACM1],  (instregex "^F(RECP|RSQRT)Sv.f16")>;
-def : InstRW<[M4WriteFMAC4,
-              M4ReadFMACM1],  (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>;
+def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)Sv.f16")>;
+def : InstRW<[M4WriteFMAC4],  (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>;
 def : InstRW<[M4WriteNSHF1],  (instregex "^REV(16|32|64)v")>;
 def : InstRW<[M4WriteNSHFA],  (instregex "^TB[LX]v(8|16)i8One")>;
 def : InstRW<[M4WriteNSHFB],  (instregex "^TB[LX]v(8|16)i8Two")>;
diff --git a/lib/Target/AArch64/AArch64SchedFalkor.td b/lib/Target/AArch64/AArch64SchedFalkor.td
index 84825458e47c..92d03963de57 100644
--- a/lib/Target/AArch64/AArch64SchedFalkor.td
+++ b/lib/Target/AArch64/AArch64SchedFalkor.td
@@ -1,9 +1,8 @@
 //==- AArch64SchedFalkor.td - Falkor Scheduling Definitions -*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -24,7 +23,7 @@ def FalkorModel : SchedMachineModel {
   let MispredictPenalty = 11;  // Minimum branch misprediction penalty.
   let CompleteModel = 1;
 
-  list<Predicate> UnsupportedFeatures = [HasSVE];
+  list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
 
   // FIXME: Remove when all errors have been fixed.
   let FullInstRWOverlapCheck = 0;
diff --git a/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
index ff14e639d1a5..697a0f69c58c 100644
--- a/lib/Target/AArch64/AArch64SchedFalkorDetails.td
+++ b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
@@ -1,9 +1,8 @@
 //==- AArch64SchedFalkorDetails.td - Falkor Scheduling Defs -*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/AArch64SchedKryo.td b/lib/Target/AArch64/AArch64SchedKryo.td
index 68de3e077c96..0e1a24103121 100644
--- a/lib/Target/AArch64/AArch64SchedKryo.td
+++ b/lib/Target/AArch64/AArch64SchedKryo.td
@@ -1,9 +1,8 @@
 //==- AArch64SchedKryo.td - Qualcomm Kryo Scheduling Defs ---*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -28,7 +27,7 @@ def KryoModel : SchedMachineModel {
   let LoopMicroOpBufferSize = 16;
   let CompleteModel = 1;
 
-  list<Predicate> UnsupportedFeatures = [HasSVE];
+  list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
 
   // FIXME: Remove when all errors have been fixed.
   let FullInstRWOverlapCheck = 0;
diff --git a/lib/Target/AArch64/AArch64SchedKryoDetails.td b/lib/Target/AArch64/AArch64SchedKryoDetails.td
index cf4cdabb8cbf..4c60992e6351 100644
--- a/lib/Target/AArch64/AArch64SchedKryoDetails.td
+++ b/lib/Target/AArch64/AArch64SchedKryoDetails.td
@@ -1,9 +1,8 @@
 //=- AArch64SchedKryoDetails.td - QC Kryo Scheduling Defs ----*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/AArch64SchedPredExynos.td b/lib/Target/AArch64/AArch64SchedPredExynos.td
index 48c54230e9d8..0c1d82d354c0 100644
--- a/lib/Target/AArch64/AArch64SchedPredExynos.td
+++ b/lib/Target/AArch64/AArch64SchedPredExynos.td
@@ -1,9 +1,8 @@
 //===- AArch64SchedPredExynos.td - AArch64 Sched Preds -----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -103,17 +102,6 @@ def ExynosScaledIdxPred : MCSchedPredicate<ExynosScaledIdxFn>;
 // Identify FP instructions.
 def ExynosFPPred : MCSchedPredicate<CheckAny<[CheckDForm, CheckQForm]>>;
 
-// Identify whether an instruction whose result is a long vector
-// operates on the upper half of the input registers.
-def ExynosLongVectorUpperFn   : TIIPredicate<
-                                  "isExynosLongVectorUpper",
-                                  MCOpcodeSwitchStatement<
-                                  [MCOpcodeSwitchCase<
-                                    IsLongVectorUpperOp.ValidOpcodes,
-                                    MCReturnStatement<TruePred>>],
-                                  MCReturnStatement<FalsePred>>>;
-def ExynosLongVectorUpperPred : MCSchedPredicate<ExynosLongVectorUpperFn>;
-
 // Identify 128-bit NEON instructions.
 def ExynosQFormPred : MCSchedPredicate<CheckQForm>;
 
diff --git a/lib/Target/AArch64/AArch64SchedPredicates.td b/lib/Target/AArch64/AArch64SchedPredicates.td
index dbaf11fc95dd..0ef0f3f8675a 100644
--- a/lib/Target/AArch64/AArch64SchedPredicates.td
+++ b/lib/Target/AArch64/AArch64SchedPredicates.td
@@ -1,9 +1,8 @@
 //===- AArch64SchedPredicates.td - AArch64 Sched Preds -----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -268,59 +267,6 @@ def IsStoreRegOffsetOp     : CheckOpcode<[STRBBroW, STRBBroX,
 def IsLoadStoreRegOffsetOp : CheckOpcode<!listconcat(IsLoadRegOffsetOp.ValidOpcodes,
                                                      IsStoreRegOffsetOp.ValidOpcodes)>;
 
-// Identify whether an instruction whose result is a long vector
-// operates on the upper half of the input registers.
-def IsLongVectorUpperOp    : CheckOpcode<[FCVTLv8i16, FCVTLv4i32,
-                                          FCVTNv8i16, FCVTNv4i32,
-                                          FCVTXNv4f32,
-                                          PMULLv16i8, PMULLv2i64,
-                                          RADDHNv8i16_v16i8, RADDHNv4i32_v8i16, RADDHNv2i64_v4i32,
-                                          RSHRNv16i8_shift, RSHRNv8i16_shift, RSHRNv4i32_shift,
-                                          RSUBHNv8i16_v16i8, RSUBHNv4i32_v8i16, RSUBHNv2i64_v4i32,
-                                          SABALv16i8_v8i16, SABALv8i16_v4i32, SABALv4i32_v2i64,
-                                          SABDLv16i8_v8i16, SABDLv8i16_v4i32, SABDLv4i32_v2i64,
-                                          SADDLv16i8_v8i16, SADDLv8i16_v4i32, SADDLv4i32_v2i64,
-                                          SADDWv16i8_v8i16, SADDWv8i16_v4i32, SADDWv4i32_v2i64,
-                                          SHLLv16i8, SHLLv8i16, SHLLv4i32,
-                                          SHRNv16i8_shift, SHRNv8i16_shift, SHRNv4i32_shift,
-                                          SMLALv16i8_v8i16, SMLALv8i16_v4i32, SMLALv4i32_v2i64,
-                                          SMLALv8i16_indexed, SMLALv4i32_indexed,
-                                          SMLSLv16i8_v8i16, SMLSLv8i16_v4i32, SMLSLv4i32_v2i64,
-                                          SMLSLv8i16_indexed, SMLSLv4i32_indexed,
-                                          SMULLv16i8_v8i16, SMULLv8i16_v4i32, SMULLv4i32_v2i64,
-                                          SMULLv8i16_indexed, SMULLv4i32_indexed,
-                                          SQDMLALv8i16_v4i32, SQDMLALv4i32_v2i64,
-                                          SQDMLALv8i16_indexed, SQDMLALv4i32_indexed,
-                                          SQDMLSLv8i16_v4i32, SQDMLSLv4i32_v2i64,
-                                          SQDMLSLv8i16_indexed, SQDMLSLv4i32_indexed,
-                                          SQDMULLv8i16_v4i32, SQDMULLv4i32_v2i64,
-                                          SQDMULLv8i16_indexed, SQDMULLv4i32_indexed,
-                                          SQRSHRNv16i8_shift, SQRSHRNv8i16_shift, SQRSHRNv4i32_shift,
-                                          SQRSHRUNv16i8_shift, SQRSHRUNv8i16_shift, SQRSHRUNv4i32_shift,
-                                          SQSHRNv16i8_shift, SQSHRNv8i16_shift, SQSHRNv4i32_shift,
-                                          SQSHRUNv16i8_shift, SQSHRUNv8i16_shift, SQSHRUNv4i32_shift,
-                                          SQXTNv16i8, SQXTNv8i16, SQXTNv4i32,
-                                          SQXTUNv16i8, SQXTUNv8i16, SQXTUNv4i32,
-                                          SSHLLv16i8_shift, SSHLLv8i16_shift, SSHLLv4i32_shift,
-                                          SSUBLv16i8_v8i16, SSUBLv8i16_v4i32, SSUBLv4i32_v2i64,
-                                          SSUBWv16i8_v8i16, SSUBWv8i16_v4i32, SSUBWv4i32_v2i64,
-                                          UABALv16i8_v8i16, UABALv8i16_v4i32, UABALv4i32_v2i64,
-                                          UABDLv16i8_v8i16, UABDLv8i16_v4i32, UABDLv4i32_v2i64,
-                                          UADDLv16i8_v8i16, UADDLv8i16_v4i32, UADDLv4i32_v2i64,
-                                          UADDWv16i8_v8i16, UADDWv8i16_v4i32, UADDWv4i32_v2i64,
-                                          UMLALv16i8_v8i16, UMLALv8i16_v4i32, UMLALv4i32_v2i64,
-                                          UMLALv8i16_indexed, UMLALv4i32_indexed,
-                                          UMLSLv16i8_v8i16, UMLSLv8i16_v4i32, UMLSLv4i32_v2i64,
-                                          UMLSLv8i16_indexed, UMLSLv4i32_indexed,
-                                          UMULLv16i8_v8i16, UMULLv8i16_v4i32, UMULLv4i32_v2i64,
-                                          UMULLv8i16_indexed, UMULLv4i32_indexed,
-                                          UQSHRNv16i8_shift, UQSHRNv8i16_shift, UQSHRNv4i32_shift,
-                                          UQXTNv16i8, UQXTNv8i16, UQXTNv4i32,
-                                          USHLLv16i8_shift, USHLLv8i16_shift, USHLLv4i32_shift,
-                                          USUBLv16i8_v8i16, USUBLv8i16_v4i32, USUBLv4i32_v2i64,
-                                          USUBWv16i8_v8i16, USUBWv8i16_v4i32, USUBWv4i32_v2i64,
-                                          XTNv16i8, XTNv8i16, XTNv4i32]>;
-
 // Target predicates.
 
 // Identify an instruction that effectively transfers a register to another.
diff --git a/lib/Target/AArch64/AArch64SchedThunderX.td b/lib/Target/AArch64/AArch64SchedThunderX.td
index fbbd3850d0fd..3b6aecf5c035 100644
--- a/lib/Target/AArch64/AArch64SchedThunderX.td
+++ b/lib/Target/AArch64/AArch64SchedThunderX.td
@@ -1,9 +1,8 @@
 //==- AArch64SchedThunderX.td - Cavium ThunderX T8X Scheduling Definitions -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -26,7 +25,7 @@ def ThunderXT8XModel : SchedMachineModel {
   let PostRAScheduler = 1;    // Use PostRA scheduler.
   let CompleteModel = 1;
 
-  list<Predicate> UnsupportedFeatures = [HasSVE];
+  list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
 
   // FIXME: Remove when all errors have been fixed.
   let FullInstRWOverlapCheck = 0;
diff --git a/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/lib/Target/AArch64/AArch64SchedThunderX2T99.td
index bee3392b6d3b..674ea19b082f 100644
--- a/lib/Target/AArch64/AArch64SchedThunderX2T99.td
+++ b/lib/Target/AArch64/AArch64SchedThunderX2T99.td
@@ -1,9 +1,8 @@
 //=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 ---*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -26,7 +25,7 @@ def ThunderX2T99Model : SchedMachineModel {
   let PostRAScheduler       =   1; // Using PostRA sched.
   let CompleteModel         =   1;
 
-  list<Predicate> UnsupportedFeatures = [HasSVE];
+  list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
 
   // FIXME: Remove when all errors have been fixed.
   let FullInstRWOverlapCheck = 0;
diff --git a/lib/Target/AArch64/AArch64Schedule.td b/lib/Target/AArch64/AArch64Schedule.td
index f55ba4d42fce..49c0c1782236 100644
--- a/lib/Target/AArch64/AArch64Schedule.td
+++ b/lib/Target/AArch64/AArch64Schedule.td
@@ -1,9 +1,8 @@
 //==-- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index a719d47618e5..60dbace03ca6 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64SelectionDAGInfo.cpp - AArch64 SelectionDAG Info -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -57,3 +56,91 @@ bool AArch64SelectionDAGInfo::generateFMAsInMachineCombiner(
     CodeGenOpt::Level OptLevel) const {
   return OptLevel >= CodeGenOpt::Aggressive;
 }
+
+static const int kSetTagLoopThreshold = 176;
+
+static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,
+                                  SDValue Chain, SDValue Ptr, uint64_t ObjSize,
+                                  const MachineMemOperand *BaseMemOperand,
+                                  bool ZeroData) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  unsigned ObjSizeScaled = ObjSize / 16;
+
+  SDValue TagSrc = Ptr;
+  if (Ptr.getOpcode() == ISD::FrameIndex) {
+    int FI = cast<FrameIndexSDNode>(Ptr)->getIndex();
+    Ptr = DAG.getTargetFrameIndex(FI, MVT::i64);
+    // A frame index operand may end up as [SP + offset] => it is fine to use SP
+    // register as the tag source.
+    TagSrc = DAG.getRegister(AArch64::SP, MVT::i64);
+  }
+
+  const unsigned OpCode1 = ZeroData ? AArch64ISD::STZG : AArch64ISD::STG;
+  const unsigned OpCode2 = ZeroData ? AArch64ISD::STZ2G : AArch64ISD::ST2G;
+
+  SmallVector<SDValue, 8> OutChains;
+  unsigned OffsetScaled = 0;
+  while (OffsetScaled < ObjSizeScaled) {
+    if (ObjSizeScaled - OffsetScaled >= 2) {
+      SDValue AddrNode = DAG.getMemBasePlusOffset(Ptr, OffsetScaled * 16, dl);
+      SDValue St = DAG.getMemIntrinsicNode(
+          OpCode2, dl, DAG.getVTList(MVT::Other),
+          {Chain, TagSrc, AddrNode},
+          MVT::v4i64,
+          MF.getMachineMemOperand(BaseMemOperand, OffsetScaled * 16, 16 * 2));
+      OffsetScaled += 2;
+      OutChains.push_back(St);
+      continue;
+    }
+
+    if (ObjSizeScaled - OffsetScaled > 0) {
+      SDValue AddrNode = DAG.getMemBasePlusOffset(Ptr, OffsetScaled * 16, dl);
+      SDValue St = DAG.getMemIntrinsicNode(
+          OpCode1, dl, DAG.getVTList(MVT::Other),
+          {Chain, TagSrc, AddrNode},
+          MVT::v2i64,
+          MF.getMachineMemOperand(BaseMemOperand, OffsetScaled * 16, 16));
+      OffsetScaled += 1;
+      OutChains.push_back(St);
+    }
+  }
+
+  SDValue Res = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
+  return Res;
+}
+
+SDValue AArch64SelectionDAGInfo::EmitTargetCodeForSetTag(
+    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Addr,
+    SDValue Size, MachinePointerInfo DstPtrInfo, bool ZeroData) const {
+  uint64_t ObjSize = cast<ConstantSDNode>(Size)->getZExtValue();
+  assert(ObjSize % 16 == 0);
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineMemOperand *BaseMemOperand = MF.getMachineMemOperand(
+      DstPtrInfo, MachineMemOperand::MOStore, ObjSize, 16);
+
+  bool UseSetTagRangeLoop =
+      kSetTagLoopThreshold >= 0 && (int)ObjSize >= kSetTagLoopThreshold;
+  if (!UseSetTagRangeLoop)
+    return EmitUnrolledSetTag(DAG, dl, Chain, Addr, ObjSize, BaseMemOperand,
+                              ZeroData);
+
+  if (ObjSize % 32 != 0) {
+    SDNode *St1 = DAG.getMachineNode(
+        ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex, dl,
+        {MVT::i64, MVT::Other},
+        {Addr, Addr, DAG.getTargetConstant(1, dl, MVT::i64), Chain});
+    DAG.setNodeMemRefs(cast<MachineSDNode>(St1), {BaseMemOperand});
+    ObjSize -= 16;
+    Addr = SDValue(St1, 0);
+    Chain = SDValue(St1, 1);
+  }
+
+  const EVT ResTys[] = {MVT::i64, MVT::i64, MVT::Other};
+  SDValue Ops[] = {DAG.getConstant(ObjSize, dl, MVT::i64), Addr, Chain};
+  SDNode *St = DAG.getMachineNode(
+      ZeroData ? AArch64::STZGloop : AArch64::STGloop, dl, ResTys, Ops);
+
+  DAG.setNodeMemRefs(cast<MachineSDNode>(St), {BaseMemOperand});
+  return SDValue(St, 2);
+}
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 7e4f11091226..d0967fb973cc 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -1,9 +1,8 @@
 //===-- AArch64SelectionDAGInfo.h - AArch64 SelectionDAG Info ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -24,6 +23,10 @@ public:
                                   SDValue Chain, SDValue Dst, SDValue Src,
                                   SDValue Size, unsigned Align, bool isVolatile,
                                   MachinePointerInfo DstPtrInfo) const override;
+  SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl,
+                                  SDValue Chain, SDValue Op1, SDValue Op2,
+                                  MachinePointerInfo DstPtrInfo,
+                                  bool ZeroData) const override;
   bool generateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const override;
 };
 }
diff --git a/lib/Target/AArch64/AArch64SpeculationHardening.cpp b/lib/Target/AArch64/AArch64SpeculationHardening.cpp
index e9699b0367d3..3087e6ce441d 100644
--- a/lib/Target/AArch64/AArch64SpeculationHardening.cpp
+++ b/lib/Target/AArch64/AArch64SpeculationHardening.cpp
@@ -1,9 +1,8 @@
 //===- AArch64SpeculationHardening.cpp - Harden Against Missspeculation  --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -103,6 +102,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CodeGen.h"
@@ -146,25 +146,31 @@ private:
   BitVector RegsAlreadyMasked;
 
   bool functionUsesHardeningRegister(MachineFunction &MF) const;
-  bool instrumentControlFlow(MachineBasicBlock &MBB);
+  bool instrumentControlFlow(MachineBasicBlock &MBB,
+                             bool &UsesFullSpeculationBarrier);
   bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                                MachineBasicBlock *&FBB,
                                AArch64CC::CondCode &CondCode) const;
   void insertTrackingCode(MachineBasicBlock &SplitEdgeBB,
                           AArch64CC::CondCode &CondCode, DebugLoc DL) const;
-  void insertSPToRegTaintPropagation(MachineBasicBlock *MBB,
+  void insertSPToRegTaintPropagation(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator MBBI) const;
-  void insertRegToSPTaintPropagation(MachineBasicBlock *MBB,
+  void insertRegToSPTaintPropagation(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator MBBI,
                                      unsigned TmpReg) const;
+  void insertFullSpeculationBarrier(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    DebugLoc DL) const;
 
   bool slhLoads(MachineBasicBlock &MBB);
   bool makeGPRSpeculationSafe(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MBBI,
                               MachineInstr &MI, unsigned Reg);
-  bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB);
+  bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB,
+                                        bool UsesFullSpeculationBarrier);
   bool expandSpeculationSafeValue(MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator MBBI);
+                                  MachineBasicBlock::iterator MBBI,
+                                  bool UsesFullSpeculationBarrier);
   bool insertCSDB(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                   DebugLoc DL);
 };
@@ -207,15 +213,19 @@ bool AArch64SpeculationHardening::endsWithCondControlFlow(
   return true;
 }
 
+void AArch64SpeculationHardening::insertFullSpeculationBarrier(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    DebugLoc DL) const {
+  // A full control flow speculation barrier consists of (DSB SYS + ISB)
+  BuildMI(MBB, MBBI, DL, TII->get(AArch64::DSB)).addImm(0xf);
+  BuildMI(MBB, MBBI, DL, TII->get(AArch64::ISB)).addImm(0xf);
+}
+
 void AArch64SpeculationHardening::insertTrackingCode(
     MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode,
     DebugLoc DL) const {
   if (UseControlFlowSpeculationBarrier) {
-    // insert full control flow speculation barrier (DSB SYS + ISB)
-    BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::ISB))
-        .addImm(0xf);
-    BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::DSB))
-        .addImm(0xf);
+    insertFullSpeculationBarrier(SplitEdgeBB, SplitEdgeBB.begin(), DL);
   } else {
     BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::CSELXr))
         .addDef(MisspeculatingTaintReg)
@@ -227,7 +237,7 @@ void AArch64SpeculationHardening::insertTrackingCode(
 }
 
 bool AArch64SpeculationHardening::instrumentControlFlow(
-    MachineBasicBlock &MBB) {
+    MachineBasicBlock &MBB, bool &UsesFullSpeculationBarrier) {
   LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB);
 
   bool Modified = false;
@@ -263,55 +273,105 @@ bool AArch64SpeculationHardening::instrumentControlFlow(
   }
 
   // Perform correct code generation around function calls and before returns.
-  {
-    SmallVector<MachineInstr *, 4> ReturnInstructions;
-    SmallVector<MachineInstr *, 4> CallInstructions;
+  // The below variables record the return/terminator instructions and the call
+  // instructions respectively; including which register is available as a
+  // temporary register just before the recorded instructions.
+  SmallVector<std::pair<MachineInstr *, unsigned>, 4> ReturnInstructions;
+  SmallVector<std::pair<MachineInstr *, unsigned>, 4> CallInstructions;
+  // if a temporary register is not available for at least one of the
+  // instructions for which we need to transfer taint to the stack pointer, we
+  // need to insert a full speculation barrier.
+  // TmpRegisterNotAvailableEverywhere tracks that condition.
+  bool TmpRegisterNotAvailableEverywhere = false;
+
+  RegScavenger RS;
+  RS.enterBasicBlock(MBB);
+
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); I++) {
+    MachineInstr &MI = *I;
+    if (!MI.isReturn() && !MI.isCall())
+      continue;
 
-    for (MachineInstr &MI : MBB) {
-      if (MI.isReturn())
-        ReturnInstructions.push_back(&MI);
-      else if (MI.isCall())
-        CallInstructions.push_back(&MI);
-    }
+    // The RegScavenger represents registers available *after* the MI
+    // instruction pointed to by RS.getCurrentPosition().
+    // We need to have a register that is available *before* the MI is executed.
+    if (I != MBB.begin())
+      RS.forward(std::prev(I));
+    // FIXME: The below just finds *a* unused register. Maybe code could be
+    // optimized more if this looks for the register that isn't used for the
+    // longest time around this place, to enable more scheduling freedom. Not
+    // sure if that would actually result in a big performance difference
+    // though. Maybe RegisterScavenger::findSurvivorBackwards has some logic
+    // already to do this - but it's unclear if that could easily be used here.
+    unsigned TmpReg = RS.FindUnusedReg(&AArch64::GPR64commonRegClass);
+    LLVM_DEBUG(dbgs() << "RS finds "
+                      << ((TmpReg == 0) ? "no register " : "register ");
+               if (TmpReg != 0) dbgs() << printReg(TmpReg, TRI) << " ";
+               dbgs() << "to be available at MI " << MI);
+    if (TmpReg == 0)
+      TmpRegisterNotAvailableEverywhere = true;
+    if (MI.isReturn())
+      ReturnInstructions.push_back({&MI, TmpReg});
+    else if (MI.isCall())
+      CallInstructions.push_back({&MI, TmpReg});
+  }
 
-    Modified |=
-        (ReturnInstructions.size() > 0) || (CallInstructions.size() > 0);
+  if (TmpRegisterNotAvailableEverywhere) {
+    // When a temporary register is not available everywhere in this basic
+    // basic block where a propagate-taint-to-sp operation is needed, just
+    // emit a full speculation barrier at the start of this basic block, which
+    // renders the taint/speculation tracking in this basic block unnecessary.
+    insertFullSpeculationBarrier(MBB, MBB.begin(),
+                                 (MBB.begin())->getDebugLoc());
+    UsesFullSpeculationBarrier = true;
+    Modified = true;
+  } else {
+    for (auto MI_Reg : ReturnInstructions) {
+      assert(MI_Reg.second != 0);
+      LLVM_DEBUG(
+          dbgs()
+          << " About to insert Reg to SP taint propagation with temp register "
+          << printReg(MI_Reg.second, TRI)
+          << " on instruction: " << *MI_Reg.first);
+      insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second);
+      Modified = true;
+    }
 
-    for (MachineInstr *Return : ReturnInstructions)
-      insertRegToSPTaintPropagation(Return->getParent(), Return, AArch64::X17);
-    for (MachineInstr *Call : CallInstructions) {
+    for (auto MI_Reg : CallInstructions) {
+      assert(MI_Reg.second != 0);
+      LLVM_DEBUG(dbgs() << " About to insert Reg to SP and back taint "
+                           "propagation with temp register "
+                        << printReg(MI_Reg.second, TRI)
+                        << " around instruction: " << *MI_Reg.first);
       // Just after the call:
-      MachineBasicBlock::iterator i = Call;
-      i++;
-      insertSPToRegTaintPropagation(Call->getParent(), i);
+      insertSPToRegTaintPropagation(
+          MBB, std::next((MachineBasicBlock::iterator)MI_Reg.first));
       // Just before the call:
-      insertRegToSPTaintPropagation(Call->getParent(), Call, AArch64::X17);
+      insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second);
+      Modified = true;
     }
   }
-
   return Modified;
 }
 
 void AArch64SpeculationHardening::insertSPToRegTaintPropagation(
-    MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) const {
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
   // If full control flow speculation barriers are used, emit a control flow
   // barrier to block potential miss-speculation in flight coming in to this
   // function.
   if (UseControlFlowSpeculationBarrier) {
-    // insert full control flow speculation barrier (DSB SYS + ISB)
-    BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::DSB)).addImm(0xf);
-    BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ISB)).addImm(0xf);
+    insertFullSpeculationBarrier(MBB, MBBI, DebugLoc());
     return;
   }
 
   // CMP   SP, #0   === SUBS   xzr, SP, #0
-  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri))
+  BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri))
       .addDef(AArch64::XZR)
       .addUse(AArch64::SP)
       .addImm(0)
       .addImm(0); // no shift
   // CSETM x16, NE  === CSINV  x16, xzr, xzr, EQ
-  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr))
+  BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr))
       .addDef(MisspeculatingTaintReg)
       .addUse(AArch64::XZR)
       .addUse(AArch64::XZR)
@@ -319,7 +379,7 @@ void AArch64SpeculationHardening::insertSPToRegTaintPropagation(
 }
 
 void AArch64SpeculationHardening::insertRegToSPTaintPropagation(
-    MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
     unsigned TmpReg) const {
   // If full control flow speculation barriers are used, there will not be
   // miss-speculation when returning from this function, and therefore, also
@@ -328,19 +388,19 @@ void AArch64SpeculationHardening::insertRegToSPTaintPropagation(
     return;
 
   // mov   Xtmp, SP  === ADD  Xtmp, SP, #0
-  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
+  BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
       .addDef(TmpReg)
       .addUse(AArch64::SP)
       .addImm(0)
       .addImm(0); // no shift
   // and   Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0
-  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs))
+  BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs))
       .addDef(TmpReg, RegState::Renamable)
       .addUse(TmpReg, RegState::Kill | RegState::Renamable)
       .addUse(MisspeculatingTaintReg, RegState::Kill)
       .addImm(0);
   // mov   SP, Xtmp === ADD SP, Xtmp, #0
-  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
+  BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
       .addDef(AArch64::SP)
       .addUse(TmpReg, RegState::Kill)
       .addImm(0)
@@ -484,7 +544,8 @@ bool AArch64SpeculationHardening::slhLoads(MachineBasicBlock &MBB) {
 /// \brief If MBBI references a pseudo instruction that should be expanded
 /// here, do the expansion and return true. Otherwise return false.
 bool AArch64SpeculationHardening::expandSpeculationSafeValue(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    bool UsesFullSpeculationBarrier) {
   MachineInstr &MI = *MBBI;
   unsigned Opcode = MI.getOpcode();
   bool Is64Bit = true;
@@ -499,7 +560,7 @@ bool AArch64SpeculationHardening::expandSpeculationSafeValue(
     // Just remove the SpeculationSafe pseudo's if control flow
     // miss-speculation isn't happening because we're already inserting barriers
     // to guarantee that.
-    if (!UseControlFlowSpeculationBarrier) {
+    if (!UseControlFlowSpeculationBarrier && !UsesFullSpeculationBarrier) {
       unsigned DstReg = MI.getOperand(0).getReg();
       unsigned SrcReg = MI.getOperand(1).getReg();
       // Mark this register and all its aliasing registers as needing to be
@@ -537,7 +598,7 @@ bool AArch64SpeculationHardening::insertCSDB(MachineBasicBlock &MBB,
 }
 
 bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos(
-    MachineBasicBlock &MBB) {
+    MachineBasicBlock &MBB, bool UsesFullSpeculationBarrier) {
   bool Modified = false;
 
   RegsNeedingCSDBBeforeUse.reset();
@@ -572,15 +633,16 @@ bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos(
           break;
         }
 
-    if (NeedToEmitBarrier)
+    if (NeedToEmitBarrier && !UsesFullSpeculationBarrier)
       Modified |= insertCSDB(MBB, MBBI, DL);
 
-    Modified |= expandSpeculationSafeValue(MBB, MBBI);
+    Modified |=
+        expandSpeculationSafeValue(MBB, MBBI, UsesFullSpeculationBarrier);
 
     MBBI = NMBBI;
   }
 
-  if (RegsNeedingCSDBBeforeUse.any())
+  if (RegsNeedingCSDBBeforeUse.any() && !UsesFullSpeculationBarrier)
     Modified |= insertCSDB(MBB, MBBI, DL);
 
   return Modified;
@@ -609,7 +671,7 @@ bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) {
       Modified |= slhLoads(MBB);
   }
 
-  // 2.a Add instrumentation code to function entry and exits.
+  // 2. Add instrumentation code to function entry and exits.
   LLVM_DEBUG(
       dbgs()
       << "***** AArch64SpeculationHardening - track control flow *****\n");
@@ -620,17 +682,15 @@ bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) {
     EntryBlocks.push_back(LPI.LandingPadBlock);
   for (auto Entry : EntryBlocks)
     insertSPToRegTaintPropagation(
-        Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin()));
-
-  // 2.b Add instrumentation code to every basic block.
-  for (auto &MBB : MF)
-    Modified |= instrumentControlFlow(MBB);
+        *Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin()));
 
-  LLVM_DEBUG(dbgs() << "***** AArch64SpeculationHardening - Lowering "
-                       "SpeculationSafeValue Pseudos *****\n");
-  // Step 3: Lower SpeculationSafeValue pseudo instructions.
-  for (auto &MBB : MF)
-    Modified |= lowerSpeculationSafeValuePseudos(MBB);
+  // 3. Add instrumentation code to every basic block.
+  for (auto &MBB : MF) {
+    bool UsesFullSpeculationBarrier = false;
+    Modified |= instrumentControlFlow(MBB, UsesFullSpeculationBarrier);
+    Modified |=
+        lowerSpeculationSafeValuePseudos(MBB, UsesFullSpeculationBarrier);
+  }
 
   return Modified;
 }
diff --git a/lib/Target/AArch64/AArch64StackTagging.cpp b/lib/Target/AArch64/AArch64StackTagging.cpp
new file mode 100644
index 000000000000..6e99c48bf1d7
--- /dev/null
+++ b/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -0,0 +1,345 @@
+//===- AArch64StackTagging.cpp - Stack tagging in IR --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <cassert>
+#include <iterator>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "stack-tagging"
+
+static constexpr unsigned kTagGranuleSize = 16;
+
+namespace {
+class AArch64StackTagging : public FunctionPass {
+  struct AllocaInfo {
+    AllocaInst *AI;
+    SmallVector<IntrinsicInst *, 2> LifetimeStart;
+    SmallVector<IntrinsicInst *, 2> LifetimeEnd;
+    SmallVector<DbgVariableIntrinsic *, 2> DbgVariableIntrinsics;
+    int Tag; // -1 for non-tagged allocations
+  };
+
+public:
+  static char ID; // Pass ID, replacement for typeid
+
+  AArch64StackTagging() : FunctionPass(ID) {
+    initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool isInterestingAlloca(const AllocaInst &AI);
+  void alignAndPadAlloca(AllocaInfo &Info);
+
+  void tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr,
+                 uint64_t Size);
+  void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size);
+
+  Instruction *
+  insertBaseTaggedPointer(const MapVector<AllocaInst *, AllocaInfo> &Allocas,
+                          const DominatorTree *DT);
+  bool runOnFunction(Function &F) override;
+
+  StringRef getPassName() const override { return "AArch64 Stack Tagging"; }
+
+private:
+  Function *F;
+  Function *SetTagFunc;
+  const DataLayout *DL;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+  }
+};
+
+} // end anonymous namespace
+
+char AArch64StackTagging::ID = 0;
+
+INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
+                      false, false)
+INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
+                    false, false)
+
+FunctionPass *llvm::createAArch64StackTaggingPass() {
+  return new AArch64StackTagging();
+}
+
+bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) {
+  // FIXME: support dynamic allocas
+  bool IsInteresting =
+      AI.getAllocatedType()->isSized() && AI.isStaticAlloca() &&
+      // alloca() may be called with 0 size, ignore it.
+      AI.getAllocationSizeInBits(*DL).getValue() > 0 &&
+      // inalloca allocas are not treated as static, and we don't want
+      // dynamic alloca instrumentation for them as well.
+      !AI.isUsedWithInAlloca() &&
+      // swifterror allocas are register promoted by ISel
+      !AI.isSwiftError();
+  return IsInteresting;
+}
+
+void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore,
+                                    Value *Ptr, uint64_t Size) {
+  IRBuilder<> IRB(InsertBefore);
+  IRB.CreateCall(SetTagFunc, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
+}
+
+void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore,
+                                      uint64_t Size) {
+  IRBuilder<> IRB(InsertBefore);
+  IRB.CreateCall(SetTagFunc, {IRB.CreatePointerCast(AI, IRB.getInt8PtrTy()),
+                              ConstantInt::get(IRB.getInt64Ty(), Size)});
+}
+
+Instruction *AArch64StackTagging::insertBaseTaggedPointer(
+    const MapVector<AllocaInst *, AllocaInfo> &Allocas,
+    const DominatorTree *DT) {
+  BasicBlock *PrologueBB = nullptr;
+  // Try sinking IRG as deep as possible to avoid hurting shrink wrap.
+  for (auto &I : Allocas) {
+    const AllocaInfo &Info = I.second;
+    AllocaInst *AI = Info.AI;
+    if (Info.Tag < 0)
+      continue;
+    if (!PrologueBB) {
+      PrologueBB = AI->getParent();
+      continue;
+    }
+    PrologueBB = DT->findNearestCommonDominator(PrologueBB, AI->getParent());
+  }
+  assert(PrologueBB);
+
+  IRBuilder<> IRB(&PrologueBB->front());
+  Function *IRG_SP =
+      Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_irg_sp);
+  Instruction *Base =
+      IRB.CreateCall(IRG_SP, {Constant::getNullValue(IRB.getInt64Ty())});
+  Base->setName("basetag");
+  return Base;
+}
+
+void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
+  unsigned NewAlignment = std::max(Info.AI->getAlignment(), kTagGranuleSize);
+  Info.AI->setAlignment(NewAlignment);
+
+  uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
+  uint64_t AlignedSize = alignTo(Size, kTagGranuleSize);
+  if (Size == AlignedSize)
+    return;
+
+  // Add padding to the alloca.
+  Type *AllocatedType =
+      Info.AI->isArrayAllocation()
+          ? ArrayType::get(
+                Info.AI->getAllocatedType(),
+                dyn_cast<ConstantInt>(Info.AI->getArraySize())->getZExtValue())
+          : Info.AI->getAllocatedType();
+  Type *PaddingType =
+      ArrayType::get(Type::getInt8Ty(F->getContext()), AlignedSize - Size);
+  Type *TypeWithPadding = StructType::get(AllocatedType, PaddingType);
+  auto *NewAI = new AllocaInst(
+      TypeWithPadding, Info.AI->getType()->getAddressSpace(), nullptr, "", Info.AI);
+  NewAI->takeName(Info.AI);
+  NewAI->setAlignment(Info.AI->getAlignment());
+  NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca());
+  NewAI->setSwiftError(Info.AI->isSwiftError());
+  NewAI->copyMetadata(*Info.AI);
+
+  auto *NewPtr = new BitCastInst(NewAI, Info.AI->getType(), "", Info.AI);
+  Info.AI->replaceAllUsesWith(NewPtr);
+  Info.AI->eraseFromParent();
+  Info.AI = NewAI;
+}
+
+// FIXME: check for MTE extension
+bool AArch64StackTagging::runOnFunction(Function &Fn) {
+  if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag))
+    return false;
+
+  F = &Fn;
+  DL = &Fn.getParent()->getDataLayout();
+
+  MapVector<AllocaInst *, AllocaInfo> Allocas; // need stable iteration order
+  SmallVector<Instruction *, 8> RetVec;
+  DenseMap<Value *, AllocaInst *> AllocaForValue;
+  SmallVector<Instruction *, 4> UnrecognizedLifetimes;
+
+  for (auto &BB : *F) {
+    for (BasicBlock::iterator IT = BB.begin(); IT != BB.end(); ++IT) {
+      Instruction *I = &*IT;
+      if (auto *AI = dyn_cast<AllocaInst>(I)) {
+        Allocas[AI].AI = AI;
+        continue;
+      }
+
+      if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(I)) {
+        if (auto *AI =
+                dyn_cast_or_null<AllocaInst>(DVI->getVariableLocation())) {
+          Allocas[AI].DbgVariableIntrinsics.push_back(DVI);
+        }
+        continue;
+      }
+
+      auto *II = dyn_cast<IntrinsicInst>(I);
+      if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+                 II->getIntrinsicID() == Intrinsic::lifetime_end)) {
+        AllocaInst *AI =
+            llvm::findAllocaForValue(II->getArgOperand(1), AllocaForValue);
+        if (!AI) {
+          UnrecognizedLifetimes.push_back(I);
+          continue;
+        }
+        if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+          Allocas[AI].LifetimeStart.push_back(II);
+        else
+          Allocas[AI].LifetimeEnd.push_back(II);
+      }
+
+      if (isa<ReturnInst>(I) || isa<ResumeInst>(I) || isa<CleanupReturnInst>(I))
+        RetVec.push_back(I);
+    }
+  }
+
+  if (Allocas.empty())
+    return false;
+
+  int NextTag = 0;
+  int NumInterestingAllocas = 0;
+  for (auto &I : Allocas) {
+    AllocaInfo &Info = I.second;
+    assert(Info.AI);
+
+    if (!isInterestingAlloca(*Info.AI)) {
+      Info.Tag = -1;
+      continue;
+    }
+
+    alignAndPadAlloca(Info);
+    NumInterestingAllocas++;
+    Info.Tag = NextTag;
+    NextTag = (NextTag + 1) % 16;
+  }
+
+  if (NumInterestingAllocas == 0)
+    return true;
+
+  SetTagFunc =
+      Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag);
+
+  // Compute DT only if the function has the attribute, there are more than 1
+  // interesting allocas, and it is not available for free.
+  Instruction *Base;
+  if (NumInterestingAllocas > 1) {
+    auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+    if (DTWP) {
+      Base = insertBaseTaggedPointer(Allocas, &DTWP->getDomTree());
+    } else {
+      DominatorTree DT(*F);
+      Base = insertBaseTaggedPointer(Allocas, &DT);
+    }
+  } else {
+    Base = insertBaseTaggedPointer(Allocas, nullptr);
+  }
+
+  for (auto &I : Allocas) {
+    const AllocaInfo &Info = I.second;
+    AllocaInst *AI = Info.AI;
+    if (Info.Tag < 0)
+      continue;
+
+    // Replace alloca with tagp(alloca).
+    IRBuilder<> IRB(Info.AI->getNextNode());
+    Function *TagP = Intrinsic::getDeclaration(
+        F->getParent(), Intrinsic::aarch64_tagp, {Info.AI->getType()});
+    Instruction *TagPCall =
+        IRB.CreateCall(TagP, {Constant::getNullValue(Info.AI->getType()), Base,
+                              ConstantInt::get(IRB.getInt64Ty(), Info.Tag)});
+    if (Info.AI->hasName())
+      TagPCall->setName(Info.AI->getName() + ".tag");
+    Info.AI->replaceAllUsesWith(TagPCall);
+    TagPCall->setOperand(0, Info.AI);
+
+    if (UnrecognizedLifetimes.empty() && Info.LifetimeStart.size() == 1 &&
+        Info.LifetimeEnd.size() == 1) {
+      IntrinsicInst *Start = Info.LifetimeStart[0];
+      uint64_t Size =
+          dyn_cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
+      Size = alignTo(Size, kTagGranuleSize);
+      tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size);
+      untagAlloca(AI, Info.LifetimeEnd[0], Size);
+    } else {
+      uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
+      Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy());
+      tagAlloca(AI, &*IRB.GetInsertPoint(), Ptr, Size);
+      for (auto &RI : RetVec) {
+        untagAlloca(AI, RI, Size);
+      }
+      // We may have inserted tag/untag outside of any lifetime interval.
+      // Remove all lifetime intrinsics for this alloca.
+      for (auto &II : Info.LifetimeStart)
+        II->eraseFromParent();
+      for (auto &II : Info.LifetimeEnd)
+        II->eraseFromParent();
+    }
+
+    // Fixup debug intrinsics to point to the new alloca.
+    for (auto DVI : Info.DbgVariableIntrinsics)
+      DVI->setArgOperand(
+          0,
+          MetadataAsValue::get(F->getContext(), LocalAsMetadata::get(Info.AI)));
+  }
+
+  // If we have instrumented at least one alloca, all unrecognized lifetime
+  // instrinsics have to go.
+  for (auto &I : UnrecognizedLifetimes)
+    I->eraseFromParent();
+
+  return true;
+}
diff --git a/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index d5643d384283..0e84a00df006 100644
--- a/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -1,9 +1,8 @@
 //===--- AArch64StorePairSuppress.cpp --- Suppress store pair formation ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -148,7 +147,7 @@ bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) {
     for (auto &MI : MBB) {
       if (!isNarrowFPStore(MI))
         continue;
-      MachineOperand *BaseOp;
+      const MachineOperand *BaseOp;
       int64_t Offset;
       if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) &&
           BaseOp->isReg()) {
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index dd30d25b2b50..3bc89b91c3f7 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -83,6 +82,7 @@ void AArch64Subtarget::initializeProperties() {
   case CortexA72:
   case CortexA73:
   case CortexA75:
+  case CortexA76:
     PrefFunctionAlignment = 4;
     break;
   case Cyclone:
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index 82f7bb755951..0c84cfb8329a 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -1,9 +1,8 @@
 //===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -46,6 +45,7 @@ public:
     CortexA72,
     CortexA73,
     CortexA75,
+    CortexA76,
     Cyclone,
     ExynosM1,
     ExynosM3,
@@ -93,6 +93,12 @@ protected:
   bool HasPAN_RWV = false;
   bool HasCCPP = false;
 
+  // Armv8.2 Crypto extensions
+  bool HasSM4 = false;
+  bool HasSHA3 = false;
+  bool HasSHA2 = false;
+  bool HasAES = false;
+
   // ARMv8.3 extensions
   bool HasPA = false;
   bool HasJS = false;
@@ -110,15 +116,10 @@ protected:
   bool HasTLB_RMI = false;
   bool HasFMI = false;
   bool HasRCPC_IMMO = false;
-  // ARMv8.4 Crypto extensions
-  bool HasSM4 = true;
-  bool HasSHA3 = true;
-
-  bool HasSHA2 = true;
-  bool HasAES = true;
 
   bool HasLSLFast = false;
   bool HasSVE = false;
+  bool HasSVE2 = false;
   bool HasRCPC = false;
   bool HasAggressiveFMA = false;
 
@@ -134,6 +135,12 @@ protected:
   bool HasRandGen = false;
   bool HasMTE = false;
 
+  // Arm SVE2 extensions
+  bool HasSVE2AES = false;
+  bool HasSVE2SM4 = false;
+  bool HasSVE2SHA3 = false;
+  bool HasSVE2BitPerm = false;
+
   // HasZeroCycleRegMove - Has zero-cycle register mov instructions.
   bool HasZeroCycleRegMove = false;
 
@@ -173,6 +180,9 @@ protected:
   bool DisableLatencySchedHeuristic = false;
   bool UseRSqrt = false;
   bool Force32BitJumpTables = false;
+  bool UseEL1ForTP = false;
+  bool UseEL2ForTP = false;
+  bool UseEL3ForTP = false;
   uint8_t MaxInterleaveFactor = 2;
   uint8_t VectorInsertExtractBaseCost = 3;
   uint16_t CacheLineSize = 0;
@@ -324,6 +334,10 @@ public:
            hasFuseCCSelect() || hasFuseLiterals();
   }
 
+  bool useEL1ForTP() const { return UseEL1ForTP; }
+  bool useEL2ForTP() const { return UseEL2ForTP; }
+  bool useEL3ForTP() const { return UseEL3ForTP; }
+
   bool useRSqrt() const { return UseRSqrt; }
   bool force32BitJumpTables() const { return Force32BitJumpTables; }
   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
@@ -353,6 +367,7 @@ public:
   bool hasSPE() const { return HasSPE; }
   bool hasLSLFast() const { return HasLSLFast; }
   bool hasSVE() const { return HasSVE; }
+  bool hasSVE2() const { return HasSVE2; }
   bool hasRCPC() const { return HasRCPC; }
   bool hasAggressiveFMA() const { return HasAggressiveFMA; }
   bool hasAlternativeNZCV() const { return HasAlternativeNZCV; }
@@ -365,6 +380,11 @@ public:
   bool hasBTI() const { return HasBTI; }
   bool hasRandGen() const { return HasRandGen; }
   bool hasMTE() const { return HasMTE; }
+  // Arm SVE2 extensions
+  bool hasSVE2AES() const { return HasSVE2AES; }
+  bool hasSVE2SM4() const { return HasSVE2SM4; }
+  bool hasSVE2SHA3() const { return HasSVE2SHA3; }
+  bool hasSVE2BitPerm() const { return HasSVE2BitPerm; }
 
   bool isLittleEndian() const { return IsLittle; }
 
diff --git a/lib/Target/AArch64/AArch64SystemOperands.td b/lib/Target/AArch64/AArch64SystemOperands.td
index a804fb11175b..536a6591478b 100644
--- a/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/lib/Target/AArch64/AArch64SystemOperands.td
@@ -1,9 +1,8 @@
 //===- AArch64SystemOperands.td ----------------------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1458,6 +1457,7 @@ def : RWSysReg<"TFSR_EL2",         0b11, 0b100, 0b0110, 0b0101, 0b000>;
 def : RWSysReg<"TFSR_EL3",         0b11, 0b110, 0b0110, 0b0110, 0b000>;
 def : RWSysReg<"TFSR_EL12",        0b11, 0b101, 0b0110, 0b0110, 0b000>;
 def : RWSysReg<"TFSRE0_EL1",       0b11, 0b000, 0b0110, 0b0110, 0b001>;
+def : ROSysReg<"GMID_EL1",         0b11, 0b001, 0b0000, 0b0000, 0b100>;
 } // HasMTE
 
 // Cyclone specific system registers
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index 4e016525f7e4..865461480499 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,9 +16,11 @@
 #include "AArch64TargetObjectFile.h"
 #include "AArch64TargetTransformInfo.h"
 #include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "TargetInfo/AArch64TargetInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/CSEConfigBase.h"
 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
@@ -178,6 +179,7 @@ extern "C" void LLVMInitializeAArch64Target() {
   initializeFalkorMarkStridedAccessesLegacyPass(*PR);
   initializeLDTLSCleanupPass(*PR);
   initializeAArch64SpeculationHardeningPass(*PR);
+  initializeAArch64StackTaggingPass(*PR);
 }
 
 //===----------------------------------------------------------------------===//
@@ -209,8 +211,8 @@ static std::string computeDataLayout(const Triple &TT,
 
 static Reloc::Model getEffectiveRelocModel(const Triple &TT,
                                            Optional<Reloc::Model> RM) {
-  // AArch64 Darwin is always PIC.
-  if (TT.isOSDarwin())
+  // AArch64 Darwin and Windows are always PIC.
+  if (TT.isOSDarwin() || TT.isOSWindows())
     return Reloc::PIC_;
   // On ELF platforms the default static relocation model has a smart enough
   // linker to cope with referencing external symbols defined in a shared
@@ -384,6 +386,8 @@ public:
   void addPostRegAlloc() override;
   void addPreSched2() override;
   void addPreEmitPass() override;
+
+  std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
 };
 
 } // end anonymous namespace
@@ -397,6 +401,10 @@ TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
   return new AArch64PassConfig(*this, PM);
 }
 
+std::unique_ptr<CSEConfigBase> AArch64PassConfig::getCSEConfig() const {
+  return getStandardCSEConfigForOpt(TM->getOptLevel());
+}
+
 void AArch64PassConfig::addIRPasses() {
   // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
   // ourselves.
@@ -439,6 +447,8 @@ void AArch64PassConfig::addIRPasses() {
     // invariant.
     addPass(createLICMPass());
   }
+
+  addPass(createAArch64StackTaggingPass());
 }
 
 // Pass Pipeline Configuration
@@ -455,7 +465,20 @@ bool AArch64PassConfig::addPreISel() {
       EnableGlobalMerge == cl::BOU_TRUE) {
     bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
                                (EnableGlobalMerge == cl::BOU_UNSET);
-    addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize));
+
+    // Merging of extern globals is enabled by default on non-Mach-O as we
+    // expect it to be generally either beneficial or harmless. On Mach-O it
+    // is disabled as we emit the .subsections_via_symbols directive which
+    // means that merging extern globals is not safe.
+    bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO();
+
+    // FIXME: extern global merging is only enabled when we optimise for size
+    // because there are some regressions with it also enabled for performance.
+    if (!OnlyOptimizeForSize)
+      MergeExternalByDefault = false;
+
+    addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize,
+                                  MergeExternalByDefault));
   }
 
   return false;
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
index 8d28a5e30ebf..5264efb89b9c 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -1,9 +1,8 @@
 //==-- AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index 8ae72a7ddb57..1c3d5d0743ad 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64TargetObjectFile.cpp - AArch64 Object Info -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h
index 9077eb7902fd..7ead363d42fe 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -1,9 +1,8 @@
 //===-- AArch64TargetObjectFile.h - AArch64 Object Info -*- C++ ---------*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a256cb7c9215..a4b78f2a7d6b 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1,12 +1,12 @@
 //===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
+#include "AArch64ExpandImm.h"
 #include "AArch64TargetTransformInfo.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -50,8 +50,9 @@ int AArch64TTIImpl::getIntImmCost(int64_t Val) {
     Val = ~Val;
 
   // Calculate how many moves we will need to materialize this constant.
-  unsigned LZ = countLeadingZeros((uint64_t)Val);
-  return (64 - LZ + 15) / 16;
+  SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
+  AArch64_IMM::expandMOVImm(Val, 64, Insn);
+  return Insn.size();
 }
 
 /// Calculate the cost of materializing the given constant.
@@ -665,7 +666,7 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
   assert(Factor >= 2 && "Invalid interleave factor");
   assert(isa<VectorType>(VecTy) && "Expect a vector type");
 
-  if (!UseMaskForCond && !UseMaskForGaps && 
+  if (!UseMaskForCond && !UseMaskForGaps &&
       Factor <= TLI->getMaxSupportedInterleaveFactor()) {
     unsigned NumElts = VecTy->getVectorNumElements();
     auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 08c1a8924220..10c15a139b4c 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -1,9 +1,8 @@
 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -166,6 +165,10 @@ public:
     return false;
   }
 
+  unsigned getGISelRematGlobalCost() const {
+    return 2;
+  }
+
   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
                              TTI::ReductionFlags Flags) const;
 
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 6cc9b67e4d27..f4c55d48d215 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -1,9 +1,8 @@
 //==- AArch64AsmParser.cpp - Parse AArch64 assembly to MCInst instructions -==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -11,6 +10,7 @@
 #include "MCTargetDesc/AArch64MCExpr.h"
 #include "MCTargetDesc/AArch64MCTargetDesc.h"
 #include "MCTargetDesc/AArch64TargetStreamer.h"
+#include "TargetInfo/AArch64TargetInfo.h"
 #include "AArch64InstrInfo.h"
 #include "Utils/AArch64BaseInfo.h"
 #include "llvm/ADT/APFloat.h"
@@ -242,11 +242,13 @@ public:
     if (S.getTargetStreamer() == nullptr)
       new AArch64TargetStreamer(S);
 
-    // Alias .hword/.word/xword to the target-independent .2byte/.4byte/.8byte
-    // directives as they have the same form and semantics:
-    ///  ::= (.hword | .word | .xword ) [ expression (, expression)* ]
+    // Alias .hword/.word/.[dx]word to the target-independent
+    // .2byte/.4byte/.8byte directives as they have the same form and
+    // semantics:
+    ///  ::= (.hword | .word | .dword | .xword ) [ expression (, expression)* ]
     Parser.addAliasForDirective(".hword", ".2byte");
     Parser.addAliasForDirective(".word", ".4byte");
+    Parser.addAliasForDirective(".dword", ".8byte");
     Parser.addAliasForDirective(".xword", ".8byte");
 
     // Initialize the set of available features.
@@ -1079,8 +1081,7 @@ public:
     if (Kind != k_Register || Reg.Kind != RegKind::SVEPredicateVector)
       return DiagnosticPredicateTy::NoMatch;
 
-    if (isSVEVectorReg<Class>() &&
-           (ElementWidth == 0 || Reg.ElementWidth == ElementWidth))
+    if (isSVEVectorReg<Class>() && (Reg.ElementWidth == ElementWidth))
       return DiagnosticPredicateTy::Match;
 
     return DiagnosticPredicateTy::NearMatch;
@@ -1091,8 +1092,7 @@ public:
     if (Kind != k_Register || Reg.Kind != RegKind::SVEDataVector)
       return DiagnosticPredicateTy::NoMatch;
 
-    if (isSVEVectorReg<Class>() &&
-           (ElementWidth == 0 || Reg.ElementWidth == ElementWidth))
+    if (isSVEVectorReg<Class>() && Reg.ElementWidth == ElementWidth)
       return DiagnosticPredicateTy::Match;
 
     return DiagnosticPredicateTy::NearMatch;
@@ -1272,9 +1272,11 @@ public:
   bool isExtend64() const {
     if (!isExtend())
       return false;
-    // UXTX and SXTX require a 64-bit source register (the ExtendLSL64 class).
+    // Make sure the extend expects a 32-bit source register.
     AArch64_AM::ShiftExtendType ET = getShiftExtendType();
-    return ET != AArch64_AM::UXTX && ET != AArch64_AM::SXTX;
+    return ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB ||
+           ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH ||
+           ET == AArch64_AM::UXTW || ET == AArch64_AM::SXTW;
   }
 
   bool isExtendLSL64() const {
@@ -2473,7 +2475,7 @@ OperandMatchResultTy
 AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   SMLoc S = getLoc();
-  const MCExpr *Expr;
+  const MCExpr *Expr = nullptr;
 
   if (Parser.getTok().is(AsmToken::Hash)) {
     Parser.Lex(); // Eat hash token.
@@ -2500,6 +2502,7 @@ AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
     } else if (DarwinRefKind != MCSymbolRefExpr::VK_PAGE &&
                DarwinRefKind != MCSymbolRefExpr::VK_GOTPAGE &&
                DarwinRefKind != MCSymbolRefExpr::VK_TLVPPAGE &&
+               ELFRefKind != AArch64MCExpr::VK_ABS_PAGE_NC &&
                ELFRefKind != AArch64MCExpr::VK_GOT_PAGE &&
                ELFRefKind != AArch64MCExpr::VK_GOTTPREL_PAGE &&
                ELFRefKind != AArch64MCExpr::VK_TLSDESC_PAGE) {
@@ -2523,7 +2526,7 @@ AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
 OperandMatchResultTy
 AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
   SMLoc S = getLoc();
-  const MCExpr *Expr;
+  const MCExpr *Expr = nullptr;
 
   // Leave anything with a bracket to the default for SVE
   if (getParser().getTok().is(AsmToken::LBrac))
@@ -2621,7 +2624,7 @@ AArch64AsmParser::tryParseImmWithOptionalShift(OperandVector &Operands) {
     // Operand should start from # or should be integer, emit error otherwise.
     return MatchOperand_NoMatch;
 
-  const MCExpr *Imm;
+  const MCExpr *Imm = nullptr;
   if (parseSymbolicImmVal(Imm))
     return MatchOperand_ParseFail;
   else if (Parser.getTok().isNot(AsmToken::Comma)) {
@@ -2660,7 +2663,7 @@ AArch64AsmParser::tryParseImmWithOptionalShift(OperandVector &Operands) {
   Parser.Lex(); // Eat the number
 
   // Just in case the optional lsl #0 is used for immediates other than zero.
-  if (ShiftAmount == 0 && Imm != 0) {
+  if (ShiftAmount == 0 && Imm != nullptr) {
     SMLoc E = Parser.getTok().getLoc();
     Operands.push_back(AArch64Operand::CreateImm(Imm, S, E, getContext()));
     return MatchOperand_Success;
@@ -2833,6 +2836,11 @@ static const struct Extension {
     {"pan-rwv", {AArch64::FeaturePAN_RWV}},
     {"ccpp", {AArch64::FeatureCCPP}},
     {"sve", {AArch64::FeatureSVE}},
+    {"sve2", {AArch64::FeatureSVE2}},
+    {"sve2-aes", {AArch64::FeatureSVE2AES}},
+    {"sve2-sm4", {AArch64::FeatureSVE2SM4}},
+    {"sve2-sha3", {AArch64::FeatureSVE2SHA3}},
+    {"bitperm", {AArch64::FeatureSVE2BitPerm}},
     // FIXME: Unsupported extensions
     {"pan", {}},
     {"lor", {}},
@@ -3260,6 +3268,7 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
                   .Case("dtprel_hi12", AArch64MCExpr::VK_DTPREL_HI12)
                   .Case("dtprel_lo12", AArch64MCExpr::VK_DTPREL_LO12)
                   .Case("dtprel_lo12_nc", AArch64MCExpr::VK_DTPREL_LO12_NC)
+                  .Case("pg_hi21_nc", AArch64MCExpr::VK_ABS_PAGE_NC)
                   .Case("tprel_g2", AArch64MCExpr::VK_TPREL_G2)
                   .Case("tprel_g1", AArch64MCExpr::VK_TPREL_G1)
                   .Case("tprel_g1_nc", AArch64MCExpr::VK_TPREL_G1_NC)
@@ -4098,15 +4107,6 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst, SMLoc &IDLoc,
                    "unpredictable STXP instruction, status is also a source");
     break;
   }
-  case AArch64::LDGV: {
-    unsigned Rt = Inst.getOperand(0).getReg();
-    unsigned Rn = Inst.getOperand(1).getReg();
-    if (RI->isSubRegisterEq(Rt, Rn)) {
-      return Error(Loc[0],
-                  "unpredictable LDGV instruction, writeback register is also "
-                  "the target register");
-    }
-  }
   }
 
 
@@ -4167,7 +4167,8 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst, SMLoc &IDLoc,
   }
 }
 
-static std::string AArch64MnemonicSpellCheck(StringRef S, uint64_t FBS,
+static std::string AArch64MnemonicSpellCheck(StringRef S,
+                                             const FeatureBitset &FBS,
                                              unsigned VariantID = 0);
 
 bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
@@ -4199,7 +4200,7 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
     return Error(Loc, "expected AArch64 condition code");
   case Match_AddSubRegExtendSmall:
     return Error(Loc,
-      "expected '[su]xt[bhw]' or 'lsl' with optional integer in range [0, 4]");
+      "expected '[su]xt[bhw]' with optional integer in range [0, 4]");
   case Match_AddSubRegExtendLarge:
     return Error(Loc,
       "expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]");
@@ -4442,7 +4443,7 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
   case Match_InvalidZPR64LSL64:
     return Error(Loc, "invalid shift/extend specified, expected 'z[0..31].d, lsl #3'");
   case Match_InvalidZPR0:
-    return Error(Loc, "expected register without element width sufix");
+    return Error(Loc, "expected register without element width suffix");
   case Match_InvalidZPR8:
   case Match_InvalidZPR16:
   case Match_InvalidZPR32:
@@ -4470,11 +4471,15 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
   case Match_InvalidSVEPredicateDReg:
     return Error(Loc, "invalid predicate register.");
   case Match_InvalidSVEPredicate3bAnyReg:
+    return Error(Loc, "invalid restricted predicate register, expected p0..p7 (without element suffix)");
   case Match_InvalidSVEPredicate3bBReg:
+    return Error(Loc, "invalid restricted predicate register, expected p0.b..p7.b");
   case Match_InvalidSVEPredicate3bHReg:
+    return Error(Loc, "invalid restricted predicate register, expected p0.h..p7.h");
   case Match_InvalidSVEPredicate3bSReg:
+    return Error(Loc, "invalid restricted predicate register, expected p0.s..p7.s");
   case Match_InvalidSVEPredicate3bDReg:
-    return Error(Loc, "restricted predicate has range [0, 7].");
+    return Error(Loc, "invalid restricted predicate register, expected p0.d..p7.d");
   case Match_InvalidSVEExactFPImmOperandHalfOne:
     return Error(Loc, "Invalid floating point constant, expected 0.5 or 1.0.");
   case Match_InvalidSVEExactFPImmOperandHalfTwo:
@@ -4777,10 +4782,12 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   }
 
   MCInst Inst;
+  FeatureBitset MissingFeatures;
   // First try to match against the secondary set of tables containing the
   // short-form NEON instructions (e.g. "fadd.2s v0, v1, v2").
   unsigned MatchResult =
-      MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 1);
+      MatchInstructionImpl(Operands, Inst, ErrorInfo, MissingFeatures,
+                           MatchingInlineAsm, 1);
 
   // If that fails, try against the alternate table containing long-form NEON:
   // "fadd v0.2s, v1.2s, v2.2s"
@@ -4789,9 +4796,11 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     // long-form match also fails.
     auto ShortFormNEONErrorInfo = ErrorInfo;
     auto ShortFormNEONMatchResult = MatchResult;
+    auto ShortFormNEONMissingFeatures = MissingFeatures;
 
     MatchResult =
-        MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 0);
+        MatchInstructionImpl(Operands, Inst, ErrorInfo, MissingFeatures,
+                             MatchingInlineAsm, 0);
 
     // Now, both matches failed, and the long-form match failed on the mnemonic
     // suffix token operand.  The short-form match failure is probably more
@@ -4801,6 +4810,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
         ((AArch64Operand &)*Operands[1]).isTokenSuffix()) {
       MatchResult = ShortFormNEONMatchResult;
       ErrorInfo = ShortFormNEONErrorInfo;
+      MissingFeatures = ShortFormNEONMissingFeatures;
     }
   }
 
@@ -4819,17 +4829,15 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     return false;
   }
   case Match_MissingFeature: {
-    assert(ErrorInfo && "Unknown missing feature!");
+    assert(MissingFeatures.any() && "Unknown missing feature!");
     // Special case the error message for the very common case where only
     // a single subtarget feature is missing (neon, e.g.).
     std::string Msg = "instruction requires:";
-    uint64_t Mask = 1;
-    for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
-      if (ErrorInfo & Mask) {
+    for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
+      if (MissingFeatures[i]) {
         Msg += " ";
-        Msg += getSubtargetFeatureName(ErrorInfo & Mask);
+        Msg += getSubtargetFeatureName(i);
       }
-      Mask <<= 1;
     }
     return Error(IDLoc, Msg);
   }
@@ -5148,7 +5156,7 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) {
       FeatureBitset ToggleFeatures = EnableFeature
                                          ? (~Features & Extension.Features)
                                          : ( Features & Extension.Features);
-      uint64_t Features =
+      FeatureBitset Features =
           ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
       setAvailableFeatures(Features);
       break;
@@ -5160,15 +5168,9 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) {
 /// parseDirectiveArchExtension
 ///   ::= .arch_extension [no]feature
 bool AArch64AsmParser::parseDirectiveArchExtension(SMLoc L) {
-  MCAsmParser &Parser = getParser();
-
-  if (getLexer().isNot(AsmToken::Identifier))
-    return Error(getLexer().getLoc(), "expected architecture extension name");
+  SMLoc ExtLoc = getLoc();
 
-  const AsmToken &Tok = Parser.getTok();
-  StringRef Name = Tok.getString();
-  SMLoc ExtLoc = Tok.getLoc();
-  Lex();
+  StringRef Name = getParser().parseStringToEndOfStatement().trim();
 
   if (parseToken(AsmToken::EndOfStatement,
                  "unexpected token in '.arch_extension' directive"))
@@ -5192,7 +5194,7 @@ bool AArch64AsmParser::parseDirectiveArchExtension(SMLoc L) {
     FeatureBitset ToggleFeatures = EnableFeature
                                        ? (~Features & Extension.Features)
                                        : (Features & Extension.Features);
-    uint64_t Features =
+    FeatureBitset Features =
         ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
     setAvailableFeatures(Features);
     return false;
@@ -5257,7 +5259,7 @@ bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) {
       FeatureBitset ToggleFeatures = EnableFeature
                                          ? (~Features & Extension.Features)
                                          : ( Features & Extension.Features);
-      uint64_t Features =
+      FeatureBitset Features =
           ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
       setAvailableFeatures(Features);
       FoundExtension = true;
@@ -5518,6 +5520,8 @@ extern "C" void LLVMInitializeAArch64AsmParser() {
   RegisterMCAsmParser<AArch64AsmParser> X(getTheAArch64leTarget());
   RegisterMCAsmParser<AArch64AsmParser> Y(getTheAArch64beTarget());
   RegisterMCAsmParser<AArch64AsmParser> Z(getTheARM64Target());
+  RegisterMCAsmParser<AArch64AsmParser> W(getTheARM64_32Target());
+  RegisterMCAsmParser<AArch64AsmParser> V(getTheAArch64_32Target());
 }
 
 #define GET_REGISTER_MATCHER
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 4102f1eb5cc1..145ffef6f6f9 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -1,9 +1,8 @@
 //===- AArch64Disassembler.cpp - Disassembler for AArch64 -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,6 +13,7 @@
 #include "AArch64ExternalSymbolizer.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "TargetInfo/AArch64TargetInfo.h"
 #include "Utils/AArch64BaseInfo.h"
 #include "llvm-c/Disassembler.h"
 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
@@ -220,11 +220,6 @@ static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm,
 static DecodeStatus DecodeSVEIncDecImm(MCInst &Inst, unsigned Imm,
                                        uint64_t Addr, const void *Decoder);
 
-static DecodeStatus DecodeLoadAllocTagArrayInstruction(MCInst &Inst,
-                                                       uint32_t insn,
-                                                       uint64_t address,
-                                                       const void* Decoder);
-
 static bool Check(DecodeStatus &Out, DecodeStatus In) {
   switch (In) {
     case MCDisassembler::Success:
@@ -292,11 +287,19 @@ extern "C" void LLVMInitializeAArch64Disassembler() {
                                        createAArch64ExternalSymbolizer);
   TargetRegistry::RegisterMCSymbolizer(getTheAArch64beTarget(),
                                        createAArch64ExternalSymbolizer);
+  TargetRegistry::RegisterMCDisassembler(getTheAArch64_32Target(),
+                                         createAArch64Disassembler);
+  TargetRegistry::RegisterMCSymbolizer(getTheAArch64_32Target(),
+                                       createAArch64ExternalSymbolizer);
 
   TargetRegistry::RegisterMCDisassembler(getTheARM64Target(),
                                          createAArch64Disassembler);
   TargetRegistry::RegisterMCSymbolizer(getTheARM64Target(),
                                        createAArch64ExternalSymbolizer);
+  TargetRegistry::RegisterMCDisassembler(getTheARM64_32Target(),
+                                         createAArch64Disassembler);
+  TargetRegistry::RegisterMCSymbolizer(getTheARM64_32Target(),
+                                       createAArch64ExternalSymbolizer);
 }
 
 static const unsigned FPR128DecoderTable[] = {
@@ -1619,7 +1622,7 @@ static DecodeStatus DecodeModImmInstruction(MCInst &Inst, uint32_t insn,
   case AArch64::MOVIv4s_msl:
   case AArch64::MVNIv2s_msl:
   case AArch64::MVNIv4s_msl:
-    Inst.addOperand(MCOperand::createImm(cmode & 1 ? 0x110 : 0x108));
+    Inst.addOperand(MCOperand::createImm((cmode & 1) ? 0x110 : 0x108));
     break;
   }
 
@@ -1779,8 +1782,8 @@ static DecodeStatus DecodeGPRSeqPairsClassRegisterClass(MCInst &Inst,
   if (RegNo & 0x1)
     return Fail;
 
-  unsigned Register = AArch64MCRegisterClasses[RegClassID].getRegister(RegNo);
-  Inst.addOperand(MCOperand::createReg(Register));
+  unsigned Reg = AArch64MCRegisterClasses[RegClassID].getRegister(RegNo / 2);
+  Inst.addOperand(MCOperand::createReg(Reg));
   return Success;
 }
 
@@ -1852,25 +1855,3 @@ static DecodeStatus DecodeSVEIncDecImm(MCInst &Inst, unsigned Imm,
   Inst.addOperand(MCOperand::createImm(Imm + 1));
   return Success;
 }
-
-static DecodeStatus DecodeLoadAllocTagArrayInstruction(MCInst &Inst,
-                                                       uint32_t insn,
-                                                       uint64_t address,
-                                                       const void* Decoder) {
-  unsigned Rn = fieldFromInstruction(insn, 5, 5);
-  unsigned Rt = fieldFromInstruction(insn, 0, 5);
-
-  // Outputs
-  DecodeGPR64spRegisterClass(Inst, Rn, address, Decoder);
-  DecodeGPR64RegisterClass(Inst, Rt, address, Decoder);
-
-  // Input (Rn again)
-  Inst.addOperand(Inst.getOperand(0));
-
-  //Do this post decode since the raw number for xzr and sp is the same
-  if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) {
-    return SoftFail;
-  } else {
-    return Success;
-  }
-}
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.h b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
index bc2f7f181699..2ba5a695701f 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
@@ -1,9 +1,8 @@
 //===- AArch64Disassembler.h - Disassembler for AArch64 ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index 342655a29b1d..3f815ac8c3d0 100644
--- a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -1,9 +1,8 @@
 //===- AArch64ExternalSymbolizer.cpp - Symbolizer for AArch64 ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
index 49e844963797..dc72331660cc 100644
--- a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
+++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
@@ -1,9 +1,8 @@
 //===- AArch64ExternalSymbolizer.h - Symbolizer for AArch64 -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
deleted file mode 100644
index dcf2dd251149..000000000000
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ /dev/null
@@ -1,1582 +0,0 @@
-//==-- AArch64InstPrinter.cpp - Convert AArch64 MCInst to assembly syntax --==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an AArch64 MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AArch64InstPrinter.h"
-#include "MCTargetDesc/AArch64AddressingModes.h"
-#include "Utils/AArch64BaseInfo.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-#include <cstdint>
-#include <string>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-#define GET_INSTRUCTION_NAME
-#define PRINT_ALIAS_INSTR
-#include "AArch64GenAsmWriter.inc"
-#define GET_INSTRUCTION_NAME
-#define PRINT_ALIAS_INSTR
-#include "AArch64GenAsmWriter1.inc"
-
-AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI,
-                                       const MCInstrInfo &MII,
-                                       const MCRegisterInfo &MRI)
-    : MCInstPrinter(MAI, MII, MRI) {}
-
-AArch64AppleInstPrinter::AArch64AppleInstPrinter(const MCAsmInfo &MAI,
-                                                 const MCInstrInfo &MII,
-                                                 const MCRegisterInfo &MRI)
-    : AArch64InstPrinter(MAI, MII, MRI) {}
-
-void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  // This is for .cfi directives.
-  OS << getRegisterName(RegNo);
-}
-
-void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                   StringRef Annot,
-                                   const MCSubtargetInfo &STI) {
-  // Check for special encodings and print the canonical alias instead.
-
-  unsigned Opcode = MI->getOpcode();
-
-  if (Opcode == AArch64::SYSxt)
-    if (printSysAlias(MI, STI, O)) {
-      printAnnotation(O, Annot);
-      return;
-    }
-
-  // SBFM/UBFM should print to a nicer aliased form if possible.
-  if (Opcode == AArch64::SBFMXri || Opcode == AArch64::SBFMWri ||
-      Opcode == AArch64::UBFMXri || Opcode == AArch64::UBFMWri) {
-    const MCOperand &Op0 = MI->getOperand(0);
-    const MCOperand &Op1 = MI->getOperand(1);
-    const MCOperand &Op2 = MI->getOperand(2);
-    const MCOperand &Op3 = MI->getOperand(3);
-
-    bool IsSigned = (Opcode == AArch64::SBFMXri || Opcode == AArch64::SBFMWri);
-    bool Is64Bit = (Opcode == AArch64::SBFMXri || Opcode == AArch64::UBFMXri);
-    if (Op2.isImm() && Op2.getImm() == 0 && Op3.isImm()) {
-      const char *AsmMnemonic = nullptr;
-
-      switch (Op3.getImm()) {
-      default:
-        break;
-      case 7:
-        if (IsSigned)
-          AsmMnemonic = "sxtb";
-        else if (!Is64Bit)
-          AsmMnemonic = "uxtb";
-        break;
-      case 15:
-        if (IsSigned)
-          AsmMnemonic = "sxth";
-        else if (!Is64Bit)
-          AsmMnemonic = "uxth";
-        break;
-      case 31:
-        // *xtw is only valid for signed 64-bit operations.
-        if (Is64Bit && IsSigned)
-          AsmMnemonic = "sxtw";
-        break;
-      }
-
-      if (AsmMnemonic) {
-        O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg())
-          << ", " << getRegisterName(getWRegFromXReg(Op1.getReg()));
-        printAnnotation(O, Annot);
-        return;
-      }
-    }
-
-    // All immediate shifts are aliases, implemented using the Bitfield
-    // instruction. In all cases the immediate shift amount shift must be in
-    // the range 0 to (reg.size -1).
-    if (Op2.isImm() && Op3.isImm()) {
-      const char *AsmMnemonic = nullptr;
-      int shift = 0;
-      int64_t immr = Op2.getImm();
-      int64_t imms = Op3.getImm();
-      if (Opcode == AArch64::UBFMWri && imms != 0x1F && ((imms + 1) == immr)) {
-        AsmMnemonic = "lsl";
-        shift = 31 - imms;
-      } else if (Opcode == AArch64::UBFMXri && imms != 0x3f &&
-                 ((imms + 1 == immr))) {
-        AsmMnemonic = "lsl";
-        shift = 63 - imms;
-      } else if (Opcode == AArch64::UBFMWri && imms == 0x1f) {
-        AsmMnemonic = "lsr";
-        shift = immr;
-      } else if (Opcode == AArch64::UBFMXri && imms == 0x3f) {
-        AsmMnemonic = "lsr";
-        shift = immr;
-      } else if (Opcode == AArch64::SBFMWri && imms == 0x1f) {
-        AsmMnemonic = "asr";
-        shift = immr;
-      } else if (Opcode == AArch64::SBFMXri && imms == 0x3f) {
-        AsmMnemonic = "asr";
-        shift = immr;
-      }
-      if (AsmMnemonic) {
-        O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg())
-          << ", " << getRegisterName(Op1.getReg()) << ", #" << shift;
-        printAnnotation(O, Annot);
-        return;
-      }
-    }
-
-    // SBFIZ/UBFIZ aliases
-    if (Op2.getImm() > Op3.getImm()) {
-      O << '\t' << (IsSigned ? "sbfiz" : "ubfiz") << '\t'
-        << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op1.getReg())
-        << ", #" << (Is64Bit ? 64 : 32) - Op2.getImm() << ", #" << Op3.getImm() + 1;
-      printAnnotation(O, Annot);
-      return;
-    }
-
-    // Otherwise SBFX/UBFX is the preferred form
-    O << '\t' << (IsSigned ? "sbfx" : "ubfx") << '\t'
-      << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op1.getReg())
-      << ", #" << Op2.getImm() << ", #" << Op3.getImm() - Op2.getImm() + 1;
-    printAnnotation(O, Annot);
-    return;
-  }
-
-  if (Opcode == AArch64::BFMXri || Opcode == AArch64::BFMWri) {
-    const MCOperand &Op0 = MI->getOperand(0); // Op1 == Op0
-    const MCOperand &Op2 = MI->getOperand(2);
-    int ImmR = MI->getOperand(3).getImm();
-    int ImmS = MI->getOperand(4).getImm();
-
-    if ((Op2.getReg() == AArch64::WZR || Op2.getReg() == AArch64::XZR) &&
-        (ImmR == 0 || ImmS < ImmR)) {
-      // BFC takes precedence over its entire range, sligtly differently to BFI.
-      int BitWidth = Opcode == AArch64::BFMXri ? 64 : 32;
-      int LSB = (BitWidth - ImmR) % BitWidth;
-      int Width = ImmS + 1;
-
-      O << "\tbfc\t" << getRegisterName(Op0.getReg())
-        << ", #" << LSB << ", #" << Width;
-      printAnnotation(O, Annot);
-      return;
-    } else if (ImmS < ImmR) {
-      // BFI alias
-      int BitWidth = Opcode == AArch64::BFMXri ? 64 : 32;
-      int LSB = (BitWidth - ImmR) % BitWidth;
-      int Width = ImmS + 1;
-
-      O << "\tbfi\t" << getRegisterName(Op0.getReg()) << ", "
-        << getRegisterName(Op2.getReg()) << ", #" << LSB << ", #" << Width;
-      printAnnotation(O, Annot);
-      return;
-    }
-
-    int LSB = ImmR;
-    int Width = ImmS - ImmR + 1;
-    // Otherwise BFXIL the preferred form
-    O << "\tbfxil\t"
-      << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op2.getReg())
-      << ", #" << LSB << ", #" << Width;
-    printAnnotation(O, Annot);
-    return;
-  }
-
-  // Symbolic operands for MOVZ, MOVN and MOVK already imply a shift
-  // (e.g. :gottprel_g1: is always going to be "lsl #16") so it should not be
-  // printed.
-  if ((Opcode == AArch64::MOVZXi || Opcode == AArch64::MOVZWi ||
-       Opcode == AArch64::MOVNXi || Opcode == AArch64::MOVNWi) &&
-      MI->getOperand(1).isExpr()) {
-    if (Opcode == AArch64::MOVZXi || Opcode == AArch64::MOVZWi)
-      O << "\tmovz\t";
-    else
-      O << "\tmovn\t";
-
-    O << getRegisterName(MI->getOperand(0).getReg()) << ", #";
-    MI->getOperand(1).getExpr()->print(O, &MAI);
-    return;
-  }
-
-  if ((Opcode == AArch64::MOVKXi || Opcode == AArch64::MOVKWi) &&
-      MI->getOperand(2).isExpr()) {
-    O << "\tmovk\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #";
-    MI->getOperand(2).getExpr()->print(O, &MAI);
-    return;
-  }
-
-  // MOVZ, MOVN and "ORR wzr, #imm" instructions are aliases for MOV, but their
-  // domains overlap so they need to be prioritized. The chain is "MOVZ lsl #0 >
-  // MOVZ lsl #N > MOVN lsl #0 > MOVN lsl #N > ORR". The highest instruction
-  // that can represent the move is the MOV alias, and the rest get printed
-  // normally.
-  if ((Opcode == AArch64::MOVZXi || Opcode == AArch64::MOVZWi) &&
-      MI->getOperand(1).isImm() && MI->getOperand(2).isImm()) {
-    int RegWidth = Opcode == AArch64::MOVZXi ? 64 : 32;
-    int Shift = MI->getOperand(2).getImm();
-    uint64_t Value = (uint64_t)MI->getOperand(1).getImm() << Shift;
-
-    if (AArch64_AM::isMOVZMovAlias(Value, Shift,
-                                   Opcode == AArch64::MOVZXi ? 64 : 32)) {
-      O << "\tmov\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #"
-        << formatImm(SignExtend64(Value, RegWidth));
-      return;
-    }
-  }
-
-  if ((Opcode == AArch64::MOVNXi || Opcode == AArch64::MOVNWi) &&
-      MI->getOperand(1).isImm() && MI->getOperand(2).isImm()) {
-    int RegWidth = Opcode == AArch64::MOVNXi ? 64 : 32;
-    int Shift = MI->getOperand(2).getImm();
-    uint64_t Value = ~((uint64_t)MI->getOperand(1).getImm() << Shift);
-    if (RegWidth == 32)
-      Value = Value & 0xffffffff;
-
-    if (AArch64_AM::isMOVNMovAlias(Value, Shift, RegWidth)) {
-      O << "\tmov\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #"
-        << formatImm(SignExtend64(Value, RegWidth));
-      return;
-    }
-  }
-
-  if ((Opcode == AArch64::ORRXri || Opcode == AArch64::ORRWri) &&
-      (MI->getOperand(1).getReg() == AArch64::XZR ||
-       MI->getOperand(1).getReg() == AArch64::WZR) &&
-      MI->getOperand(2).isImm()) {
-    int RegWidth = Opcode == AArch64::ORRXri ? 64 : 32;
-    uint64_t Value = AArch64_AM::decodeLogicalImmediate(
-        MI->getOperand(2).getImm(), RegWidth);
-    if (!AArch64_AM::isAnyMOVWMovAlias(Value, RegWidth)) {
-      O << "\tmov\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #"
-        << formatImm(SignExtend64(Value, RegWidth));
-      return;
-    }
-  }
-
-  if (Opcode == AArch64::CompilerBarrier) {
-    O << '\t' << MAI.getCommentString() << " COMPILER BARRIER";
-    printAnnotation(O, Annot);
-    return;
-  }
-
-  // Instruction TSB is specified as a one operand instruction, but 'csync' is
-  // not encoded, so for printing it is treated as a special case here:
-  if (Opcode == AArch64::TSB) {
-    O << "\ttsb\tcsync";
-    return;
-  }
-
-  if (!printAliasInstr(MI, STI, O))
-    printInstruction(MI, STI, O);
-
-  printAnnotation(O, Annot);
-}
-
-static bool isTblTbxInstruction(unsigned Opcode, StringRef &Layout,
-                                bool &IsTbx) {
-  switch (Opcode) {
-  case AArch64::TBXv8i8One:
-  case AArch64::TBXv8i8Two:
-  case AArch64::TBXv8i8Three:
-  case AArch64::TBXv8i8Four:
-    IsTbx = true;
-    Layout = ".8b";
-    return true;
-  case AArch64::TBLv8i8One:
-  case AArch64::TBLv8i8Two:
-  case AArch64::TBLv8i8Three:
-  case AArch64::TBLv8i8Four:
-    IsTbx = false;
-    Layout = ".8b";
-    return true;
-  case AArch64::TBXv16i8One:
-  case AArch64::TBXv16i8Two:
-  case AArch64::TBXv16i8Three:
-  case AArch64::TBXv16i8Four:
-    IsTbx = true;
-    Layout = ".16b";
-    return true;
-  case AArch64::TBLv16i8One:
-  case AArch64::TBLv16i8Two:
-  case AArch64::TBLv16i8Three:
-  case AArch64::TBLv16i8Four:
-    IsTbx = false;
-    Layout = ".16b";
-    return true;
-  default:
-    return false;
-  }
-}
-
-struct LdStNInstrDesc {
-  unsigned Opcode;
-  const char *Mnemonic;
-  const char *Layout;
-  int ListOperand;
-  bool HasLane;
-  int NaturalOffset;
-};
-
-static const LdStNInstrDesc LdStNInstInfo[] = {
-  { AArch64::LD1i8,             "ld1",  ".b",     1, true,  0  },
-  { AArch64::LD1i16,            "ld1",  ".h",     1, true,  0  },
-  { AArch64::LD1i32,            "ld1",  ".s",     1, true,  0  },
-  { AArch64::LD1i64,            "ld1",  ".d",     1, true,  0  },
-  { AArch64::LD1i8_POST,        "ld1",  ".b",     2, true,  1  },
-  { AArch64::LD1i16_POST,       "ld1",  ".h",     2, true,  2  },
-  { AArch64::LD1i32_POST,       "ld1",  ".s",     2, true,  4  },
-  { AArch64::LD1i64_POST,       "ld1",  ".d",     2, true,  8  },
-  { AArch64::LD1Rv16b,          "ld1r", ".16b",   0, false, 0  },
-  { AArch64::LD1Rv8h,           "ld1r", ".8h",    0, false, 0  },
-  { AArch64::LD1Rv4s,           "ld1r", ".4s",    0, false, 0  },
-  { AArch64::LD1Rv2d,           "ld1r", ".2d",    0, false, 0  },
-  { AArch64::LD1Rv8b,           "ld1r", ".8b",    0, false, 0  },
-  { AArch64::LD1Rv4h,           "ld1r", ".4h",    0, false, 0  },
-  { AArch64::LD1Rv2s,           "ld1r", ".2s",    0, false, 0  },
-  { AArch64::LD1Rv1d,           "ld1r", ".1d",    0, false, 0  },
-  { AArch64::LD1Rv16b_POST,     "ld1r", ".16b",   1, false, 1  },
-  { AArch64::LD1Rv8h_POST,      "ld1r", ".8h",    1, false, 2  },
-  { AArch64::LD1Rv4s_POST,      "ld1r", ".4s",    1, false, 4  },
-  { AArch64::LD1Rv2d_POST,      "ld1r", ".2d",    1, false, 8  },
-  { AArch64::LD1Rv8b_POST,      "ld1r", ".8b",    1, false, 1  },
-  { AArch64::LD1Rv4h_POST,      "ld1r", ".4h",    1, false, 2  },
-  { AArch64::LD1Rv2s_POST,      "ld1r", ".2s",    1, false, 4  },
-  { AArch64::LD1Rv1d_POST,      "ld1r", ".1d",    1, false, 8  },
-  { AArch64::LD1Onev16b,        "ld1",  ".16b",   0, false, 0  },
-  { AArch64::LD1Onev8h,         "ld1",  ".8h",    0, false, 0  },
-  { AArch64::LD1Onev4s,         "ld1",  ".4s",    0, false, 0  },
-  { AArch64::LD1Onev2d,         "ld1",  ".2d",    0, false, 0  },
-  { AArch64::LD1Onev8b,         "ld1",  ".8b",    0, false, 0  },
-  { AArch64::LD1Onev4h,         "ld1",  ".4h",    0, false, 0  },
-  { AArch64::LD1Onev2s,         "ld1",  ".2s",    0, false, 0  },
-  { AArch64::LD1Onev1d,         "ld1",  ".1d",    0, false, 0  },
-  { AArch64::LD1Onev16b_POST,   "ld1",  ".16b",   1, false, 16 },
-  { AArch64::LD1Onev8h_POST,    "ld1",  ".8h",    1, false, 16 },
-  { AArch64::LD1Onev4s_POST,    "ld1",  ".4s",    1, false, 16 },
-  { AArch64::LD1Onev2d_POST,    "ld1",  ".2d",    1, false, 16 },
-  { AArch64::LD1Onev8b_POST,    "ld1",  ".8b",    1, false, 8  },
-  { AArch64::LD1Onev4h_POST,    "ld1",  ".4h",    1, false, 8  },
-  { AArch64::LD1Onev2s_POST,    "ld1",  ".2s",    1, false, 8  },
-  { AArch64::LD1Onev1d_POST,    "ld1",  ".1d",    1, false, 8  },
-  { AArch64::LD1Twov16b,        "ld1",  ".16b",   0, false, 0  },
-  { AArch64::LD1Twov8h,         "ld1",  ".8h",    0, false, 0  },
-  { AArch64::LD1Twov4s,         "ld1",  ".4s",    0, false, 0  },
-  { AArch64::LD1Twov2d,         "ld1",  ".2d",    0, false, 0  },
-  { AArch64::LD1Twov8b,         "ld1",  ".8b",    0, false, 0  },
-  { AArch64::LD1Twov4h,         "ld1",  ".4h",    0, false, 0  },
-  { AArch64::LD1Twov2s,         "ld1",  ".2s",    0, false, 0  },
-  { AArch64::LD1Twov1d,         "ld1",  ".1d",    0, false, 0  },
-  { AArch64::LD1Twov16b_POST,   "ld1",  ".16b",   1, false, 32 },
-  { AArch64::LD1Twov8h_POST,    "ld1",  ".8h",    1, false, 32 },
-  { AArch64::LD1Twov4s_POST,    "ld1",  ".4s",    1, false, 32 },
-  { AArch64::LD1Twov2d_POST,    "ld1",  ".2d",    1, false, 32 },
-  { AArch64::LD1Twov8b_POST,    "ld1",  ".8b",    1, false, 16 },
-  { AArch64::LD1Twov4h_POST,    "ld1",  ".4h",    1, false, 16 },
-  { AArch64::LD1Twov2s_POST,    "ld1",  ".2s",    1, false, 16 },
-  { AArch64::LD1Twov1d_POST,    "ld1",  ".1d",    1, false, 16 },
-  { AArch64::LD1Threev16b,      "ld1",  ".16b",   0, false, 0  },
-  { AArch64::LD1Threev8h,       "ld1",  ".8h",    0, false, 0  },
-  { AArch64::LD1Threev4s,       "ld1",  ".4s",    0, false, 0  },
-  { AArch64::LD1Threev2d,       "ld1",  ".2d",    0, false, 0  },
-  { AArch64::LD1Threev8b,       "ld1",  ".8b",    0, false, 0  },
-  { AArch64::LD1Threev4h,       "ld1",  ".4h",    0, false, 0  },
-  { AArch64::LD1Threev2s,       "ld1",  ".2s",    0, false, 0  },
-  { AArch64::LD1Threev1d,       "ld1",  ".1d",    0, false, 0  },
-  { AArch64::LD1Threev16b_POST, "ld1",  ".16b",   1, false, 48 },
-  { AArch64::LD1Threev8h_POST,  "ld1",  ".8h",    1, false, 48 },
-  { AArch64::LD1Threev4s_POST,  "ld1",  ".4s",    1, false, 48 },
-  { AArch64::LD1Threev2d_POST,  "ld1",  ".2d",    1, false, 48 },
-  { AArch64::LD1Threev8b_POST,  "ld1",  ".8b",    1, false, 24 },
-  { AArch64::LD1Threev4h_POST,  "ld1",  ".4h",    1, false, 24 },
-  { AArch64::LD1Threev2s_POST,  "ld1",  ".2s",    1, false, 24 },
-  { AArch64::LD1Threev1d_POST,  "ld1",  ".1d",    1, false, 24 },
-  { AArch64::LD1Fourv16b,       "ld1",  ".16b",   0, false, 0  },
-  { AArch64::LD1Fourv8h,        "ld1",  ".8h",    0, false, 0  },
-  { AArch64::LD1Fourv4s,        "ld1",  ".4s",    0, false, 0  },
-  { AArch64::LD1Fourv2d,        "ld1",  ".2d",    0, false, 0  },
-  { AArch64::LD1Fourv8b,        "ld1",  ".8b",    0, false, 0  },
-  { AArch64::LD1Fourv4h,        "ld1",  ".4h",    0, false, 0  },
-  { AArch64::LD1Fourv2s,        "ld1",  ".2s",    0, false, 0  },
-  { AArch64::LD1Fourv1d,        "ld1",  ".1d",    0, false, 0  },
-  { AArch64::LD1Fourv16b_POST,  "ld1",  ".16b",   1, false, 64 },
-  { AArch64::LD1Fourv8h_POST,   "ld1",  ".8h",    1, false, 64 },
-  { AArch64::LD1Fourv4s_POST,   "ld1",  ".4s",    1, false, 64 },
-  { AArch64::LD1Fourv2d_POST,   "ld1",  ".2d",    1, false, 64 },
-  { AArch64::LD1Fourv8b_POST,   "ld1",  ".8b",    1, false, 32 },
-  { AArch64::LD1Fourv4h_POST,   "ld1",  ".4h",    1, false, 32 },
-  { AArch64::LD1Fourv2s_POST,   "ld1",  ".2s",    1, false, 32 },
-  { AArch64::LD1Fourv1d_POST,   "ld1",  ".1d",    1, false, 32 },
-  { AArch64::LD2i8,             "ld2",  ".b",     1, true,  0  },
-  { AArch64::LD2i16,            "ld2",  ".h",     1, true,  0  },
-  { AArch64::LD2i32,            "ld2",  ".s",     1, true,  0  },
-  { AArch64::LD2i64,            "ld2",  ".d",     1, true,  0  },
-  { AArch64::LD2i8_POST,        "ld2",  ".b",     2, true,  2  },
-  { AArch64::LD2i16_POST,       "ld2",  ".h",     2, true,  4  },
-  { AArch64::LD2i32_POST,       "ld2",  ".s",     2, true,  8  },
-  { AArch64::LD2i64_POST,       "ld2",  ".d",     2, true,  16  },
-  { AArch64::LD2Rv16b,          "ld2r", ".16b",   0, false, 0  },
-  { AArch64::LD2Rv8h,           "ld2r", ".8h",    0, false, 0  },
-  { AArch64::LD2Rv4s,           "ld2r", ".4s",    0, false, 0  },
-  { AArch64::LD2Rv2d,           "ld2r", ".2d",    0, false, 0  },
-  { AArch64::LD2Rv8b,           "ld2r", ".8b",    0, false, 0  },
-  { AArch64::LD2Rv4h,           "ld2r", ".4h",    0, false, 0  },
-  { AArch64::LD2Rv2s,           "ld2r", ".2s",    0, false, 0  },
-  { AArch64::LD2Rv1d,           "ld2r", ".1d",    0, false, 0  },
-  { AArch64::LD2Rv16b_POST,     "ld2r", ".16b",   1, false, 2  },
-  { AArch64::LD2Rv8h_POST,      "ld2r", ".8h",    1, false, 4  },
-  { AArch64::LD2Rv4s_POST,      "ld2r", ".4s",    1, false, 8  },
-  { AArch64::LD2Rv2d_POST,      "ld2r", ".2d",    1, false, 16 },
-  { AArch64::LD2Rv8b_POST,      "ld2r", ".8b",    1, false, 2  },
-  { AArch64::LD2Rv4h_POST,      "ld2r", ".4h",    1, false, 4  },
-  { AArch64::LD2Rv2s_POST,      "ld2r", ".2s",    1, false, 8  },
-  { AArch64::LD2Rv1d_POST,      "ld2r", ".1d",    1, false, 16 },
-  { AArch64::LD2Twov16b,        "ld2",  ".16b",   0, false, 0  },
-  { AArch64::LD2Twov8h,         "ld2",  ".8h",    0, false, 0  },
-  { AArch64::LD2Twov4s,         "ld2",  ".4s",    0, false, 0  },
-  { AArch64::LD2Twov2d,         "ld2",  ".2d",    0, false, 0  },
-  { AArch64::LD2Twov8b,         "ld2",  ".8b",    0, false, 0  },
-  { AArch64::LD2Twov4h,         "ld2",  ".4h",    0, false, 0  },
-  { AArch64::LD2Twov2s,         "ld2",  ".2s",    0, false, 0  },
-  { AArch64::LD2Twov16b_POST,   "ld2",  ".16b",   1, false, 32 },
-  { AArch64::LD2Twov8h_POST,    "ld2",  ".8h",    1, false, 32 },
-  { AArch64::LD2Twov4s_POST,    "ld2",  ".4s",    1, false, 32 },
-  { AArch64::LD2Twov2d_POST,    "ld2",  ".2d",    1, false, 32 },
-  { AArch64::LD2Twov8b_POST,    "ld2",  ".8b",    1, false, 16 },
-  { AArch64::LD2Twov4h_POST,    "ld2",  ".4h",    1, false, 16 },
-  { AArch64::LD2Twov2s_POST,    "ld2",  ".2s",    1, false, 16 },
-  { AArch64::LD3i8,             "ld3",  ".b",     1, true,  0  },
-  { AArch64::LD3i16,            "ld3",  ".h",     1, true,  0  },
-  { AArch64::LD3i32,            "ld3",  ".s",     1, true,  0  },
-  { AArch64::LD3i64,            "ld3",  ".d",     1, true,  0  },
-  { AArch64::LD3i8_POST,        "ld3",  ".b",     2, true,  3  },
-  { AArch64::LD3i16_POST,       "ld3",  ".h",     2, true,  6  },
-  { AArch64::LD3i32_POST,       "ld3",  ".s",     2, true,  12 },
-  { AArch64::LD3i64_POST,       "ld3",  ".d",     2, true,  24 },
-  { AArch64::LD3Rv16b,          "ld3r", ".16b",   0, false, 0  },
-  { AArch64::LD3Rv8h,           "ld3r", ".8h",    0, false, 0  },
-  { AArch64::LD3Rv4s,           "ld3r", ".4s",    0, false, 0  },
-  { AArch64::LD3Rv2d,           "ld3r", ".2d",    0, false, 0  },
-  { AArch64::LD3Rv8b,           "ld3r", ".8b",    0, false, 0  },
-  { AArch64::LD3Rv4h,           "ld3r", ".4h",    0, false, 0  },
-  { AArch64::LD3Rv2s,           "ld3r", ".2s",    0, false, 0  },
-  { AArch64::LD3Rv1d,           "ld3r", ".1d",    0, false, 0  },
-  { AArch64::LD3Rv16b_POST,     "ld3r", ".16b",   1, false, 3  },
-  { AArch64::LD3Rv8h_POST,      "ld3r", ".8h",    1, false, 6  },
-  { AArch64::LD3Rv4s_POST,      "ld3r", ".4s",    1, false, 12 },
-  { AArch64::LD3Rv2d_POST,      "ld3r", ".2d",    1, false, 24 },
-  { AArch64::LD3Rv8b_POST,      "ld3r", ".8b",    1, false, 3  },
-  { AArch64::LD3Rv4h_POST,      "ld3r", ".4h",    1, false, 6  },
-  { AArch64::LD3Rv2s_POST,      "ld3r", ".2s",    1, false, 12 },
-  { AArch64::LD3Rv1d_POST,      "ld3r", ".1d",    1, false, 24 },
-  { AArch64::LD3Threev16b,      "ld3",  ".16b",   0, false, 0  },
-  { AArch64::LD3Threev8h,       "ld3",  ".8h",    0, false, 0  },
-  { AArch64::LD3Threev4s,       "ld3",  ".4s",    0, false, 0  },
-  { AArch64::LD3Threev2d,       "ld3",  ".2d",    0, false, 0  },
-  { AArch64::LD3Threev8b,       "ld3",  ".8b",    0, false, 0  },
-  { AArch64::LD3Threev4h,       "ld3",  ".4h",    0, false, 0  },
-  { AArch64::LD3Threev2s,       "ld3",  ".2s",    0, false, 0  },
-  { AArch64::LD3Threev16b_POST, "ld3",  ".16b",   1, false, 48 },
-  { AArch64::LD3Threev8h_POST,  "ld3",  ".8h",    1, false, 48 },
-  { AArch64::LD3Threev4s_POST,  "ld3",  ".4s",    1, false, 48 },
-  { AArch64::LD3Threev2d_POST,  "ld3",  ".2d",    1, false, 48 },
-  { AArch64::LD3Threev8b_POST,  "ld3",  ".8b",    1, false, 24 },
-  { AArch64::LD3Threev4h_POST,  "ld3",  ".4h",    1, false, 24 },
-  { AArch64::LD3Threev2s_POST,  "ld3",  ".2s",    1, false, 24 },
-  { AArch64::LD4i8,             "ld4",  ".b",     1, true,  0  },
-  { AArch64::LD4i16,            "ld4",  ".h",     1, true,  0  },
-  { AArch64::LD4i32,            "ld4",  ".s",     1, true,  0  },
-  { AArch64::LD4i64,            "ld4",  ".d",     1, true,  0  },
-  { AArch64::LD4i8_POST,        "ld4",  ".b",     2, true,  4  },
-  { AArch64::LD4i16_POST,       "ld4",  ".h",     2, true,  8  },
-  { AArch64::LD4i32_POST,       "ld4",  ".s",     2, true,  16 },
-  { AArch64::LD4i64_POST,       "ld4",  ".d",     2, true,  32 },
-  { AArch64::LD4Rv16b,          "ld4r", ".16b",   0, false, 0  },
-  { AArch64::LD4Rv8h,           "ld4r", ".8h",    0, false, 0  },
-  { AArch64::LD4Rv4s,           "ld4r", ".4s",    0, false, 0  },
-  { AArch64::LD4Rv2d,           "ld4r", ".2d",    0, false, 0  },
-  { AArch64::LD4Rv8b,           "ld4r", ".8b",    0, false, 0  },
-  { AArch64::LD4Rv4h,           "ld4r", ".4h",    0, false, 0  },
-  { AArch64::LD4Rv2s,           "ld4r", ".2s",    0, false, 0  },
-  { AArch64::LD4Rv1d,           "ld4r", ".1d",    0, false, 0  },
-  { AArch64::LD4Rv16b_POST,     "ld4r", ".16b",   1, false, 4  },
-  { AArch64::LD4Rv8h_POST,      "ld4r", ".8h",    1, false, 8  },
-  { AArch64::LD4Rv4s_POST,      "ld4r", ".4s",    1, false, 16 },
-  { AArch64::LD4Rv2d_POST,      "ld4r", ".2d",    1, false, 32 },
-  { AArch64::LD4Rv8b_POST,      "ld4r", ".8b",    1, false, 4  },
-  { AArch64::LD4Rv4h_POST,      "ld4r", ".4h",    1, false, 8  },
-  { AArch64::LD4Rv2s_POST,      "ld4r", ".2s",    1, false, 16 },
-  { AArch64::LD4Rv1d_POST,      "ld4r", ".1d",    1, false, 32 },
-  { AArch64::LD4Fourv16b,       "ld4",  ".16b",   0, false, 0  },
-  { AArch64::LD4Fourv8h,        "ld4",  ".8h",    0, false, 0  },
-  { AArch64::LD4Fourv4s,        "ld4",  ".4s",    0, false, 0  },
-  { AArch64::LD4Fourv2d,        "ld4",  ".2d",    0, false, 0  },
-  { AArch64::LD4Fourv8b,        "ld4",  ".8b",    0, false, 0  },
-  { AArch64::LD4Fourv4h,        "ld4",  ".4h",    0, false, 0  },
-  { AArch64::LD4Fourv2s,        "ld4",  ".2s",    0, false, 0  },
-  { AArch64::LD4Fourv16b_POST,  "ld4",  ".16b",   1, false, 64 },
-  { AArch64::LD4Fourv8h_POST,   "ld4",  ".8h",    1, false, 64 },
-  { AArch64::LD4Fourv4s_POST,   "ld4",  ".4s",    1, false, 64 },
-  { AArch64::LD4Fourv2d_POST,   "ld4",  ".2d",    1, false, 64 },
-  { AArch64::LD4Fourv8b_POST,   "ld4",  ".8b",    1, false, 32 },
-  { AArch64::LD4Fourv4h_POST,   "ld4",  ".4h",    1, false, 32 },
-  { AArch64::LD4Fourv2s_POST,   "ld4",  ".2s",    1, false, 32 },
-  { AArch64::ST1i8,             "st1",  ".b",     0, true,  0  },
-  { AArch64::ST1i16,            "st1",  ".h",     0, true,  0  },
-  { AArch64::ST1i32,            "st1",  ".s",     0, true,  0  },
-  { AArch64::ST1i64,            "st1",  ".d",     0, true,  0  },
-  { AArch64::ST1i8_POST,        "st1",  ".b",     1, true,  1  },
-  { AArch64::ST1i16_POST,       "st1",  ".h",     1, true,  2  },
-  { AArch64::ST1i32_POST,       "st1",  ".s",     1, true,  4  },
-  { AArch64::ST1i64_POST,       "st1",  ".d",     1, true,  8  },
-  { AArch64::ST1Onev16b,        "st1",  ".16b",   0, false, 0  },
-  { AArch64::ST1Onev8h,         "st1",  ".8h",    0, false, 0  },
-  { AArch64::ST1Onev4s,         "st1",  ".4s",    0, false, 0  },
-  { AArch64::ST1Onev2d,         "st1",  ".2d",    0, false, 0  },
-  { AArch64::ST1Onev8b,         "st1",  ".8b",    0, false, 0  },
-  { AArch64::ST1Onev4h,         "st1",  ".4h",    0, false, 0  },
-  { AArch64::ST1Onev2s,         "st1",  ".2s",    0, false, 0  },
-  { AArch64::ST1Onev1d,         "st1",  ".1d",    0, false, 0  },
-  { AArch64::ST1Onev16b_POST,   "st1",  ".16b",   1, false, 16 },
-  { AArch64::ST1Onev8h_POST,    "st1",  ".8h",    1, false, 16 },
-  { AArch64::ST1Onev4s_POST,    "st1",  ".4s",    1, false, 16 },
-  { AArch64::ST1Onev2d_POST,    "st1",  ".2d",    1, false, 16 },
-  { AArch64::ST1Onev8b_POST,    "st1",  ".8b",    1, false, 8  },
-  { AArch64::ST1Onev4h_POST,    "st1",  ".4h",    1, false, 8  },
-  { AArch64::ST1Onev2s_POST,    "st1",  ".2s",    1, false, 8  },
-  { AArch64::ST1Onev1d_POST,    "st1",  ".1d",    1, false, 8  },
-  { AArch64::ST1Twov16b,        "st1",  ".16b",   0, false, 0  },
-  { AArch64::ST1Twov8h,         "st1",  ".8h",    0, false, 0  },
-  { AArch64::ST1Twov4s,         "st1",  ".4s",    0, false, 0  },
-  { AArch64::ST1Twov2d,         "st1",  ".2d",    0, false, 0  },
-  { AArch64::ST1Twov8b,         "st1",  ".8b",    0, false, 0  },
-  { AArch64::ST1Twov4h,         "st1",  ".4h",    0, false, 0  },
-  { AArch64::ST1Twov2s,         "st1",  ".2s",    0, false, 0  },
-  { AArch64::ST1Twov1d,         "st1",  ".1d",    0, false, 0  },
-  { AArch64::ST1Twov16b_POST,   "st1",  ".16b",   1, false, 32 },
-  { AArch64::ST1Twov8h_POST,    "st1",  ".8h",    1, false, 32 },
-  { AArch64::ST1Twov4s_POST,    "st1",  ".4s",    1, false, 32 },
-  { AArch64::ST1Twov2d_POST,    "st1",  ".2d",    1, false, 32 },
-  { AArch64::ST1Twov8b_POST,    "st1",  ".8b",    1, false, 16 },
-  { AArch64::ST1Twov4h_POST,    "st1",  ".4h",    1, false, 16 },
-  { AArch64::ST1Twov2s_POST,    "st1",  ".2s",    1, false, 16 },
-  { AArch64::ST1Twov1d_POST,    "st1",  ".1d",    1, false, 16 },
-  { AArch64::ST1Threev16b,      "st1",  ".16b",   0, false, 0  },
-  { AArch64::ST1Threev8h,       "st1",  ".8h",    0, false, 0  },
-  { AArch64::ST1Threev4s,       "st1",  ".4s",    0, false, 0  },
-  { AArch64::ST1Threev2d,       "st1",  ".2d",    0, false, 0  },
-  { AArch64::ST1Threev8b,       "st1",  ".8b",    0, false, 0  },
-  { AArch64::ST1Threev4h,       "st1",  ".4h",    0, false, 0  },
-  { AArch64::ST1Threev2s,       "st1",  ".2s",    0, false, 0  },
-  { AArch64::ST1Threev1d,       "st1",  ".1d",    0, false, 0  },
-  { AArch64::ST1Threev16b_POST, "st1",  ".16b",   1, false, 48 },
-  { AArch64::ST1Threev8h_POST,  "st1",  ".8h",    1, false, 48 },
-  { AArch64::ST1Threev4s_POST,  "st1",  ".4s",    1, false, 48 },
-  { AArch64::ST1Threev2d_POST,  "st1",  ".2d",    1, false, 48 },
-  { AArch64::ST1Threev8b_POST,  "st1",  ".8b",    1, false, 24 },
-  { AArch64::ST1Threev4h_POST,  "st1",  ".4h",    1, false, 24 },
-  { AArch64::ST1Threev2s_POST,  "st1",  ".2s",    1, false, 24 },
-  { AArch64::ST1Threev1d_POST,  "st1",  ".1d",    1, false, 24 },
-  { AArch64::ST1Fourv16b,       "st1",  ".16b",   0, false, 0  },
-  { AArch64::ST1Fourv8h,        "st1",  ".8h",    0, false, 0  },
-  { AArch64::ST1Fourv4s,        "st1",  ".4s",    0, false, 0  },
-  { AArch64::ST1Fourv2d,        "st1",  ".2d",    0, false, 0  },
-  { AArch64::ST1Fourv8b,        "st1",  ".8b",    0, false, 0  },
-  { AArch64::ST1Fourv4h,        "st1",  ".4h",    0, false, 0  },
-  { AArch64::ST1Fourv2s,        "st1",  ".2s",    0, false, 0  },
-  { AArch64::ST1Fourv1d,        "st1",  ".1d",    0, false, 0  },
-  { AArch64::ST1Fourv16b_POST,  "st1",  ".16b",   1, false, 64 },
-  { AArch64::ST1Fourv8h_POST,   "st1",  ".8h",    1, false, 64 },
-  { AArch64::ST1Fourv4s_POST,   "st1",  ".4s",    1, false, 64 },
-  { AArch64::ST1Fourv2d_POST,   "st1",  ".2d",    1, false, 64 },
-  { AArch64::ST1Fourv8b_POST,   "st1",  ".8b",    1, false, 32 },
-  { AArch64::ST1Fourv4h_POST,   "st1",  ".4h",    1, false, 32 },
-  { AArch64::ST1Fourv2s_POST,   "st1",  ".2s",    1, false, 32 },
-  { AArch64::ST1Fourv1d_POST,   "st1",  ".1d",    1, false, 32 },
-  { AArch64::ST2i8,             "st2",  ".b",     0, true,  0  },
-  { AArch64::ST2i16,            "st2",  ".h",     0, true,  0  },
-  { AArch64::ST2i32,            "st2",  ".s",     0, true,  0  },
-  { AArch64::ST2i64,            "st2",  ".d",     0, true,  0  },
-  { AArch64::ST2i8_POST,        "st2",  ".b",     1, true,  2  },
-  { AArch64::ST2i16_POST,       "st2",  ".h",     1, true,  4  },
-  { AArch64::ST2i32_POST,       "st2",  ".s",     1, true,  8  },
-  { AArch64::ST2i64_POST,       "st2",  ".d",     1, true,  16 },
-  { AArch64::ST2Twov16b,        "st2",  ".16b",   0, false, 0  },
-  { AArch64::ST2Twov8h,         "st2",  ".8h",    0, false, 0  },
-  { AArch64::ST2Twov4s,         "st2",  ".4s",    0, false, 0  },
-  { AArch64::ST2Twov2d,         "st2",  ".2d",    0, false, 0  },
-  { AArch64::ST2Twov8b,         "st2",  ".8b",    0, false, 0  },
-  { AArch64::ST2Twov4h,         "st2",  ".4h",    0, false, 0  },
-  { AArch64::ST2Twov2s,         "st2",  ".2s",    0, false, 0  },
-  { AArch64::ST2Twov16b_POST,   "st2",  ".16b",   1, false, 32 },
-  { AArch64::ST2Twov8h_POST,    "st2",  ".8h",    1, false, 32 },
-  { AArch64::ST2Twov4s_POST,    "st2",  ".4s",    1, false, 32 },
-  { AArch64::ST2Twov2d_POST,    "st2",  ".2d",    1, false, 32 },
-  { AArch64::ST2Twov8b_POST,    "st2",  ".8b",    1, false, 16 },
-  { AArch64::ST2Twov4h_POST,    "st2",  ".4h",    1, false, 16 },
-  { AArch64::ST2Twov2s_POST,    "st2",  ".2s",    1, false, 16 },
-  { AArch64::ST3i8,             "st3",  ".b",     0, true,  0  },
-  { AArch64::ST3i16,            "st3",  ".h",     0, true,  0  },
-  { AArch64::ST3i32,            "st3",  ".s",     0, true,  0  },
-  { AArch64::ST3i64,            "st3",  ".d",     0, true,  0  },
-  { AArch64::ST3i8_POST,        "st3",  ".b",     1, true,  3  },
-  { AArch64::ST3i16_POST,       "st3",  ".h",     1, true,  6  },
-  { AArch64::ST3i32_POST,       "st3",  ".s",     1, true,  12 },
-  { AArch64::ST3i64_POST,       "st3",  ".d",     1, true,  24 },
-  { AArch64::ST3Threev16b,      "st3",  ".16b",   0, false, 0  },
-  { AArch64::ST3Threev8h,       "st3",  ".8h",    0, false, 0  },
-  { AArch64::ST3Threev4s,       "st3",  ".4s",    0, false, 0  },
-  { AArch64::ST3Threev2d,       "st3",  ".2d",    0, false, 0  },
-  { AArch64::ST3Threev8b,       "st3",  ".8b",    0, false, 0  },
-  { AArch64::ST3Threev4h,       "st3",  ".4h",    0, false, 0  },
-  { AArch64::ST3Threev2s,       "st3",  ".2s",    0, false, 0  },
-  { AArch64::ST3Threev16b_POST, "st3",  ".16b",   1, false, 48 },
-  { AArch64::ST3Threev8h_POST,  "st3",  ".8h",    1, false, 48 },
-  { AArch64::ST3Threev4s_POST,  "st3",  ".4s",    1, false, 48 },
-  { AArch64::ST3Threev2d_POST,  "st3",  ".2d",    1, false, 48 },
-  { AArch64::ST3Threev8b_POST,  "st3",  ".8b",    1, false, 24 },
-  { AArch64::ST3Threev4h_POST,  "st3",  ".4h",    1, false, 24 },
-  { AArch64::ST3Threev2s_POST,  "st3",  ".2s",    1, false, 24 },
-  { AArch64::ST4i8,             "st4",  ".b",     0, true,  0  },
-  { AArch64::ST4i16,            "st4",  ".h",     0, true,  0  },
-  { AArch64::ST4i32,            "st4",  ".s",     0, true,  0  },
-  { AArch64::ST4i64,            "st4",  ".d",     0, true,  0  },
-  { AArch64::ST4i8_POST,        "st4",  ".b",     1, true,  4  },
-  { AArch64::ST4i16_POST,       "st4",  ".h",     1, true,  8  },
-  { AArch64::ST4i32_POST,       "st4",  ".s",     1, true,  16 },
-  { AArch64::ST4i64_POST,       "st4",  ".d",     1, true,  32 },
-  { AArch64::ST4Fourv16b,       "st4",  ".16b",   0, false, 0  },
-  { AArch64::ST4Fourv8h,        "st4",  ".8h",    0, false, 0  },
-  { AArch64::ST4Fourv4s,        "st4",  ".4s",    0, false, 0  },
-  { AArch64::ST4Fourv2d,        "st4",  ".2d",    0, false, 0  },
-  { AArch64::ST4Fourv8b,        "st4",  ".8b",    0, false, 0  },
-  { AArch64::ST4Fourv4h,        "st4",  ".4h",    0, false, 0  },
-  { AArch64::ST4Fourv2s,        "st4",  ".2s",    0, false, 0  },
-  { AArch64::ST4Fourv16b_POST,  "st4",  ".16b",   1, false, 64 },
-  { AArch64::ST4Fourv8h_POST,   "st4",  ".8h",    1, false, 64 },
-  { AArch64::ST4Fourv4s_POST,   "st4",  ".4s",    1, false, 64 },
-  { AArch64::ST4Fourv2d_POST,   "st4",  ".2d",    1, false, 64 },
-  { AArch64::ST4Fourv8b_POST,   "st4",  ".8b",    1, false, 32 },
-  { AArch64::ST4Fourv4h_POST,   "st4",  ".4h",    1, false, 32 },
-  { AArch64::ST4Fourv2s_POST,   "st4",  ".2s",    1, false, 32 },
-};
-
-static const LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
-  unsigned Idx;
-  for (Idx = 0; Idx != array_lengthof(LdStNInstInfo); ++Idx)
-    if (LdStNInstInfo[Idx].Opcode == Opcode)
-      return &LdStNInstInfo[Idx];
-
-  return nullptr;
-}
-
-void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                        StringRef Annot,
-                                        const MCSubtargetInfo &STI) {
-  unsigned Opcode = MI->getOpcode();
-  StringRef Layout;
-
-  bool IsTbx;
-  if (isTblTbxInstruction(MI->getOpcode(), Layout, IsTbx)) {
-    O << "\t" << (IsTbx ? "tbx" : "tbl") << Layout << '\t'
-      << getRegisterName(MI->getOperand(0).getReg(), AArch64::vreg) << ", ";
-
-    unsigned ListOpNum = IsTbx ? 2 : 1;
-    printVectorList(MI, ListOpNum, STI, O, "");
-
-    O << ", "
-      << getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), AArch64::vreg);
-    printAnnotation(O, Annot);
-    return;
-  }
-
-  if (const LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) {
-    O << "\t" << LdStDesc->Mnemonic << LdStDesc->Layout << '\t';
-
-    // Now onto the operands: first a vector list with possible lane
-    // specifier. E.g. { v0 }[2]
-    int OpNum = LdStDesc->ListOperand;
-    printVectorList(MI, OpNum++, STI, O, "");
-
-    if (LdStDesc->HasLane)
-      O << '[' << MI->getOperand(OpNum++).getImm() << ']';
-
-    // Next the address: [xN]
-    unsigned AddrReg = MI->getOperand(OpNum++).getReg();
-    O << ", [" << getRegisterName(AddrReg) << ']';
-
-    // Finally, there might be a post-indexed offset.
-    if (LdStDesc->NaturalOffset != 0) {
-      unsigned Reg = MI->getOperand(OpNum++).getReg();
-      if (Reg != AArch64::XZR)
-        O << ", " << getRegisterName(Reg);
-      else {
-        assert(LdStDesc->NaturalOffset && "no offset on post-inc instruction?");
-        O << ", #" << LdStDesc->NaturalOffset;
-      }
-    }
-
-    printAnnotation(O, Annot);
-    return;
-  }
-
-  AArch64InstPrinter::printInst(MI, O, Annot, STI);
-}
-
-bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
-                                       const MCSubtargetInfo &STI,
-                                       raw_ostream &O) {
-#ifndef NDEBUG
-  unsigned Opcode = MI->getOpcode();
-  assert(Opcode == AArch64::SYSxt && "Invalid opcode for SYS alias!");
-#endif
-
-  const MCOperand &Op1 = MI->getOperand(0);
-  const MCOperand &Cn = MI->getOperand(1);
-  const MCOperand &Cm = MI->getOperand(2);
-  const MCOperand &Op2 = MI->getOperand(3);
-
-  unsigned Op1Val = Op1.getImm();
-  unsigned CnVal = Cn.getImm();
-  unsigned CmVal = Cm.getImm();
-  unsigned Op2Val = Op2.getImm();
-
-  uint16_t Encoding = Op2Val;
-  Encoding |= CmVal << 3;
-  Encoding |= CnVal << 7;
-  Encoding |= Op1Val << 11;
-
-  bool NeedsReg;
-  std::string Ins;
-  std::string Name;
-
-  if (CnVal == 7) {
-    switch (CmVal) {
-    default: return false;
-    // Maybe IC, maybe Prediction Restriction
-    case 1:
-      switch (Op1Val) {
-      default: return false;
-      case 0: goto Search_IC;
-      case 3: goto Search_PRCTX;
-      }
-    // Prediction Restriction aliases
-    case 3: {
-      Search_PRCTX:
-      const AArch64PRCTX::PRCTX *PRCTX = AArch64PRCTX::lookupPRCTXByEncoding(Encoding >> 3);
-      if (!PRCTX || !PRCTX->haveFeatures(STI.getFeatureBits()))
-        return false;
-
-      NeedsReg = PRCTX->NeedsReg;
-      switch (Op2Val) {
-      default: return false;
-      case 4: Ins = "cfp\t"; break;
-      case 5: Ins = "dvp\t"; break;
-      case 7: Ins = "cpp\t"; break;
-      }
-      Name = std::string(PRCTX->Name);
-    }
-    break;
-    // IC aliases
-    case 5: {
-      Search_IC:
-      const AArch64IC::IC *IC = AArch64IC::lookupICByEncoding(Encoding);
-      if (!IC || !IC->haveFeatures(STI.getFeatureBits()))
-        return false;
-
-      NeedsReg = IC->NeedsReg;
-      Ins = "ic\t";
-      Name = std::string(IC->Name);
-    }
-    break;
-    // DC aliases
-    case 4: case 6: case 10: case 11: case 12: case 13: case 14:
-    {
-      const AArch64DC::DC *DC = AArch64DC::lookupDCByEncoding(Encoding);
-      if (!DC || !DC->haveFeatures(STI.getFeatureBits()))
-        return false;
-
-      NeedsReg = true;
-      Ins = "dc\t";
-      Name = std::string(DC->Name);
-    }
-    break;
-    // AT aliases
-    case 8: case 9: {
-      const AArch64AT::AT *AT = AArch64AT::lookupATByEncoding(Encoding);
-      if (!AT || !AT->haveFeatures(STI.getFeatureBits()))
-        return false;
-
-      NeedsReg = true;
-      Ins = "at\t";
-      Name = std::string(AT->Name);
-    }
-    break;
-    }
-  } else if (CnVal == 8) {
-    // TLBI aliases
-    const AArch64TLBI::TLBI *TLBI = AArch64TLBI::lookupTLBIByEncoding(Encoding);
-    if (!TLBI || !TLBI->haveFeatures(STI.getFeatureBits()))
-      return false;
-
-    NeedsReg = TLBI->NeedsReg;
-    Ins = "tlbi\t";
-    Name = std::string(TLBI->Name);
-  }
-  else
-    return false;
-
-  std::string Str = Ins + Name;
-  std::transform(Str.begin(), Str.end(), Str.begin(), ::tolower);
-
-  O << '\t' << Str;
-  if (NeedsReg)
-    O << ", " << getRegisterName(MI->getOperand(4).getReg());
-
-  return true;
-}
-
-void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                      const MCSubtargetInfo &STI,
-                                      raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    unsigned Reg = Op.getReg();
-    O << getRegisterName(Reg);
-  } else if (Op.isImm()) {
-    printImm(MI, OpNo, STI, O);
-  } else {
-    assert(Op.isExpr() && "unknown operand kind in printOperand");
-    Op.getExpr()->print(O, &MAI);
-  }
-}
-
-void AArch64InstPrinter::printImm(const MCInst *MI, unsigned OpNo,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  O << "#" << formatImm(Op.getImm());
-}
-
-void AArch64InstPrinter::printImmHex(const MCInst *MI, unsigned OpNo,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  O << format("#%#llx", Op.getImm());
-}
-
-void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
-                                             unsigned Imm, raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    unsigned Reg = Op.getReg();
-    if (Reg == AArch64::XZR)
-      O << "#" << Imm;
-    else
-      O << getRegisterName(Reg);
-  } else
-    llvm_unreachable("unknown operand kind in printPostIncOperand64");
-}
-
-void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
-                                          const MCSubtargetInfo &STI,
-                                          raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  assert(Op.isReg() && "Non-register vreg operand!");
-  unsigned Reg = Op.getReg();
-  O << getRegisterName(Reg, AArch64::vreg);
-}
-
-void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo,
-                                           const MCSubtargetInfo &STI,
-                                           raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  assert(Op.isImm() && "System instruction C[nm] operands must be immediates!");
-  O << "c" << Op.getImm();
-}
-
-void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand(OpNum);
-  if (MO.isImm()) {
-    unsigned Val = (MO.getImm() & 0xfff);
-    assert(Val == MO.getImm() && "Add/sub immediate out of range!");
-    unsigned Shift =
-        AArch64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm());
-    O << '#' << formatImm(Val);
-    if (Shift != 0)
-      printShifter(MI, OpNum + 1, STI, O);
-
-    if (CommentStream)
-      *CommentStream << '=' << formatImm(Val << Shift) << '\n';
-  } else {
-    assert(MO.isExpr() && "Unexpected operand type!");
-    MO.getExpr()->print(O, &MAI);
-    printShifter(MI, OpNum + 1, STI, O);
-  }
-}
-
-template <typename T>
-void AArch64InstPrinter::printLogicalImm(const MCInst *MI, unsigned OpNum,
-                                         const MCSubtargetInfo &STI,
-                                         raw_ostream &O) {
-  uint64_t Val = MI->getOperand(OpNum).getImm();
-  O << "#0x";
-  O.write_hex(AArch64_AM::decodeLogicalImmediate(Val, 8 * sizeof(T)));
-}
-
-void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
-                                      const MCSubtargetInfo &STI,
-                                      raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNum).getImm();
-  // LSL #0 should not be printed.
-  if (AArch64_AM::getShiftType(Val) == AArch64_AM::LSL &&
-      AArch64_AM::getShiftValue(Val) == 0)
-    return;
-  O << ", " << AArch64_AM::getShiftExtendName(AArch64_AM::getShiftType(Val))
-    << " #" << AArch64_AM::getShiftValue(Val);
-}
-
-void AArch64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum,
-                                              const MCSubtargetInfo &STI,
-                                              raw_ostream &O) {
-  O << getRegisterName(MI->getOperand(OpNum).getReg());
-  printShifter(MI, OpNum + 1, STI, O);
-}
-
-void AArch64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum,
-                                               const MCSubtargetInfo &STI,
-                                               raw_ostream &O) {
-  O << getRegisterName(MI->getOperand(OpNum).getReg());
-  printArithExtend(MI, OpNum + 1, STI, O);
-}
-
-void AArch64InstPrinter::printArithExtend(const MCInst *MI, unsigned OpNum,
-                                          const MCSubtargetInfo &STI,
-                                          raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNum).getImm();
-  AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getArithExtendType(Val);
-  unsigned ShiftVal = AArch64_AM::getArithShiftValue(Val);
-
-  // If the destination or first source register operand is [W]SP, print
-  // UXTW/UXTX as LSL, and if the shift amount is also zero, print nothing at
-  // all.
-  if (ExtType == AArch64_AM::UXTW || ExtType == AArch64_AM::UXTX) {
-    unsigned Dest = MI->getOperand(0).getReg();
-    unsigned Src1 = MI->getOperand(1).getReg();
-    if ( ((Dest == AArch64::SP || Src1 == AArch64::SP) &&
-          ExtType == AArch64_AM::UXTX) ||
-         ((Dest == AArch64::WSP || Src1 == AArch64::WSP) &&
-          ExtType == AArch64_AM::UXTW) ) {
-      if (ShiftVal != 0)
-        O << ", lsl #" << ShiftVal;
-      return;
-    }
-  }
-  O << ", " << AArch64_AM::getShiftExtendName(ExtType);
-  if (ShiftVal != 0)
-    O << " #" << ShiftVal;
-}
-
-static void printMemExtendImpl(bool SignExtend, bool DoShift,
-                               unsigned Width, char SrcRegKind,
-                               raw_ostream &O) {
-  // sxtw, sxtx, uxtw or lsl (== uxtx)
-  bool IsLSL = !SignExtend && SrcRegKind == 'x';
-  if (IsLSL)
-    O << "lsl";
-  else
-    O << (SignExtend ? 's' : 'u') << "xt" << SrcRegKind;
-
-  if (DoShift || IsLSL)
-    O << " #" << Log2_32(Width / 8);
-}
-
-void AArch64InstPrinter::printMemExtend(const MCInst *MI, unsigned OpNum,
-                                        raw_ostream &O, char SrcRegKind,
-                                        unsigned Width) {
-  bool SignExtend = MI->getOperand(OpNum).getImm();
-  bool DoShift = MI->getOperand(OpNum + 1).getImm();
-  printMemExtendImpl(SignExtend, DoShift, Width, SrcRegKind, O);
-}
-
-template <bool SignExtend, int ExtWidth, char SrcRegKind, char Suffix>
-void AArch64InstPrinter::printRegWithShiftExtend(const MCInst *MI,
-                                                 unsigned OpNum,
-                                                 const MCSubtargetInfo &STI,
-                                                 raw_ostream &O) {
-  printOperand(MI, OpNum, STI, O);
-  if (Suffix == 's' || Suffix == 'd')
-    O << '.' << Suffix;
-  else
-    assert(Suffix == 0 && "Unsupported suffix size");
-
-  bool DoShift = ExtWidth != 8;
-  if (SignExtend || DoShift || SrcRegKind == 'w') {
-    O << ", ";
-    printMemExtendImpl(SignExtend, DoShift, ExtWidth, SrcRegKind, O);
-  }
-}
-
-void AArch64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum,
-                                       const MCSubtargetInfo &STI,
-                                       raw_ostream &O) {
-  AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm();
-  O << AArch64CC::getCondCodeName(CC);
-}
-
-void AArch64InstPrinter::printInverseCondCode(const MCInst *MI, unsigned OpNum,
-                                              const MCSubtargetInfo &STI,
-                                              raw_ostream &O) {
-  AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm();
-  O << AArch64CC::getCondCodeName(AArch64CC::getInvertedCondCode(CC));
-}
-
-void AArch64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O) {
-  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']';
-}
-
-template<int Scale>
-void AArch64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum,
-                                       const MCSubtargetInfo &STI,
-                                       raw_ostream &O) {
-  O << '#' << formatImm(Scale * MI->getOperand(OpNum).getImm());
-}
-
-void AArch64InstPrinter::printUImm12Offset(const MCInst *MI, unsigned OpNum,
-                                           unsigned Scale, raw_ostream &O) {
-  const MCOperand MO = MI->getOperand(OpNum);
-  if (MO.isImm()) {
-    O << "#" << formatImm(MO.getImm() * Scale);
-  } else {
-    assert(MO.isExpr() && "Unexpected operand type!");
-    MO.getExpr()->print(O, &MAI);
-  }
-}
-
-void AArch64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum,
-                                          unsigned Scale, raw_ostream &O) {
-  const MCOperand MO1 = MI->getOperand(OpNum + 1);
-  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg());
-  if (MO1.isImm()) {
-      O << ", #" << formatImm(MO1.getImm() * Scale);
-  } else {
-    assert(MO1.isExpr() && "Unexpected operand type!");
-    O << ", ";
-    MO1.getExpr()->print(O, &MAI);
-  }
-  O << ']';
-}
-
-template <bool IsSVEPrefetch>
-void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
-                                         const MCSubtargetInfo &STI,
-                                         raw_ostream &O) {
-  unsigned prfop = MI->getOperand(OpNum).getImm();
-  if (IsSVEPrefetch) {
-    if (auto PRFM = AArch64SVEPRFM::lookupSVEPRFMByEncoding(prfop)) {
-      O << PRFM->Name;
-      return;
-    }
-  } else if (auto PRFM = AArch64PRFM::lookupPRFMByEncoding(prfop)) {
-    O << PRFM->Name;
-    return;
-  }
-
-  O << '#' << formatImm(prfop);
-}
-
-void AArch64InstPrinter::printPSBHintOp(const MCInst *MI, unsigned OpNum,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O) {
-  unsigned psbhintop = MI->getOperand(OpNum).getImm();
-  auto PSB = AArch64PSBHint::lookupPSBByEncoding(psbhintop);
-  if (PSB)
-    O << PSB->Name;
-  else
-    O << '#' << formatImm(psbhintop);
-}
-
-void AArch64InstPrinter::printBTIHintOp(const MCInst *MI, unsigned OpNum,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O) {
-  unsigned btihintop = (MI->getOperand(OpNum).getImm() ^ 32) >> 1;
-  auto BTI = AArch64BTIHint::lookupBTIByEncoding(btihintop);
-  if (BTI)
-    O << BTI->Name;
-  else
-    O << '#' << formatImm(btihintop);
-}
-
-void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
-                                           const MCSubtargetInfo &STI,
-                                           raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand(OpNum);
-  float FPImm =
-      MO.isFPImm() ? MO.getFPImm() : AArch64_AM::getFPImmFloat(MO.getImm());
-
-  // 8 decimal places are enough to perfectly represent permitted floats.
-  O << format("#%.8f", FPImm);
-}
-
-static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
-  while (Stride--) {
-    switch (Reg) {
-    default:
-      llvm_unreachable("Vector register expected!");
-    case AArch64::Q0:  Reg = AArch64::Q1;  break;
-    case AArch64::Q1:  Reg = AArch64::Q2;  break;
-    case AArch64::Q2:  Reg = AArch64::Q3;  break;
-    case AArch64::Q3:  Reg = AArch64::Q4;  break;
-    case AArch64::Q4:  Reg = AArch64::Q5;  break;
-    case AArch64::Q5:  Reg = AArch64::Q6;  break;
-    case AArch64::Q6:  Reg = AArch64::Q7;  break;
-    case AArch64::Q7:  Reg = AArch64::Q8;  break;
-    case AArch64::Q8:  Reg = AArch64::Q9;  break;
-    case AArch64::Q9:  Reg = AArch64::Q10; break;
-    case AArch64::Q10: Reg = AArch64::Q11; break;
-    case AArch64::Q11: Reg = AArch64::Q12; break;
-    case AArch64::Q12: Reg = AArch64::Q13; break;
-    case AArch64::Q13: Reg = AArch64::Q14; break;
-    case AArch64::Q14: Reg = AArch64::Q15; break;
-    case AArch64::Q15: Reg = AArch64::Q16; break;
-    case AArch64::Q16: Reg = AArch64::Q17; break;
-    case AArch64::Q17: Reg = AArch64::Q18; break;
-    case AArch64::Q18: Reg = AArch64::Q19; break;
-    case AArch64::Q19: Reg = AArch64::Q20; break;
-    case AArch64::Q20: Reg = AArch64::Q21; break;
-    case AArch64::Q21: Reg = AArch64::Q22; break;
-    case AArch64::Q22: Reg = AArch64::Q23; break;
-    case AArch64::Q23: Reg = AArch64::Q24; break;
-    case AArch64::Q24: Reg = AArch64::Q25; break;
-    case AArch64::Q25: Reg = AArch64::Q26; break;
-    case AArch64::Q26: Reg = AArch64::Q27; break;
-    case AArch64::Q27: Reg = AArch64::Q28; break;
-    case AArch64::Q28: Reg = AArch64::Q29; break;
-    case AArch64::Q29: Reg = AArch64::Q30; break;
-    case AArch64::Q30: Reg = AArch64::Q31; break;
-    // Vector lists can wrap around.
-    case AArch64::Q31:
-      Reg = AArch64::Q0;
-      break;
-    case AArch64::Z0:  Reg = AArch64::Z1;  break;
-    case AArch64::Z1:  Reg = AArch64::Z2;  break;
-    case AArch64::Z2:  Reg = AArch64::Z3;  break;
-    case AArch64::Z3:  Reg = AArch64::Z4;  break;
-    case AArch64::Z4:  Reg = AArch64::Z5;  break;
-    case AArch64::Z5:  Reg = AArch64::Z6;  break;
-    case AArch64::Z6:  Reg = AArch64::Z7;  break;
-    case AArch64::Z7:  Reg = AArch64::Z8;  break;
-    case AArch64::Z8:  Reg = AArch64::Z9;  break;
-    case AArch64::Z9:  Reg = AArch64::Z10; break;
-    case AArch64::Z10: Reg = AArch64::Z11; break;
-    case AArch64::Z11: Reg = AArch64::Z12; break;
-    case AArch64::Z12: Reg = AArch64::Z13; break;
-    case AArch64::Z13: Reg = AArch64::Z14; break;
-    case AArch64::Z14: Reg = AArch64::Z15; break;
-    case AArch64::Z15: Reg = AArch64::Z16; break;
-    case AArch64::Z16: Reg = AArch64::Z17; break;
-    case AArch64::Z17: Reg = AArch64::Z18; break;
-    case AArch64::Z18: Reg = AArch64::Z19; break;
-    case AArch64::Z19: Reg = AArch64::Z20; break;
-    case AArch64::Z20: Reg = AArch64::Z21; break;
-    case AArch64::Z21: Reg = AArch64::Z22; break;
-    case AArch64::Z22: Reg = AArch64::Z23; break;
-    case AArch64::Z23: Reg = AArch64::Z24; break;
-    case AArch64::Z24: Reg = AArch64::Z25; break;
-    case AArch64::Z25: Reg = AArch64::Z26; break;
-    case AArch64::Z26: Reg = AArch64::Z27; break;
-    case AArch64::Z27: Reg = AArch64::Z28; break;
-    case AArch64::Z28: Reg = AArch64::Z29; break;
-    case AArch64::Z29: Reg = AArch64::Z30; break;
-    case AArch64::Z30: Reg = AArch64::Z31; break;
-    // Vector lists can wrap around.
-    case AArch64::Z31:
-      Reg = AArch64::Z0;
-      break;
-    }
-  }
-  return Reg;
-}
-
-template<unsigned size>
-void AArch64InstPrinter::printGPRSeqPairsClassOperand(const MCInst *MI,
-                                                   unsigned OpNum,
-                                                   const MCSubtargetInfo &STI,
-                                                   raw_ostream &O) {
-  static_assert(size == 64 || size == 32,
-                "Template parameter must be either 32 or 64");
-  unsigned Reg = MI->getOperand(OpNum).getReg();
-
-  unsigned Sube = (size == 32) ? AArch64::sube32 : AArch64::sube64;
-  unsigned Subo = (size == 32) ? AArch64::subo32 : AArch64::subo64;
-
-  unsigned Even = MRI.getSubReg(Reg,  Sube);
-  unsigned Odd = MRI.getSubReg(Reg,  Subo);
-  O << getRegisterName(Even) << ", " << getRegisterName(Odd);
-}
-
-void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
-                                         const MCSubtargetInfo &STI,
-                                         raw_ostream &O,
-                                         StringRef LayoutSuffix) {
-  unsigned Reg = MI->getOperand(OpNum).getReg();
-
-  O << "{ ";
-
-  // Work out how many registers there are in the list (if there is an actual
-  // list).
-  unsigned NumRegs = 1;
-  if (MRI.getRegClass(AArch64::DDRegClassID).contains(Reg) ||
-      MRI.getRegClass(AArch64::ZPR2RegClassID).contains(Reg) ||
-      MRI.getRegClass(AArch64::QQRegClassID).contains(Reg))
-    NumRegs = 2;
-  else if (MRI.getRegClass(AArch64::DDDRegClassID).contains(Reg) ||
-           MRI.getRegClass(AArch64::ZPR3RegClassID).contains(Reg) ||
-           MRI.getRegClass(AArch64::QQQRegClassID).contains(Reg))
-    NumRegs = 3;
-  else if (MRI.getRegClass(AArch64::DDDDRegClassID).contains(Reg) ||
-           MRI.getRegClass(AArch64::ZPR4RegClassID).contains(Reg) ||
-           MRI.getRegClass(AArch64::QQQQRegClassID).contains(Reg))
-    NumRegs = 4;
-
-  // Now forget about the list and find out what the first register is.
-  if (unsigned FirstReg = MRI.getSubReg(Reg, AArch64::dsub0))
-    Reg = FirstReg;
-  else if (unsigned FirstReg = MRI.getSubReg(Reg, AArch64::qsub0))
-    Reg = FirstReg;
-  else if (unsigned FirstReg = MRI.getSubReg(Reg, AArch64::zsub0))
-    Reg = FirstReg;
-
-  // If it's a D-reg, we need to promote it to the equivalent Q-reg before
-  // printing (otherwise getRegisterName fails).
-  if (MRI.getRegClass(AArch64::FPR64RegClassID).contains(Reg)) {
-    const MCRegisterClass &FPR128RC =
-        MRI.getRegClass(AArch64::FPR128RegClassID);
-    Reg = MRI.getMatchingSuperReg(Reg, AArch64::dsub, &FPR128RC);
-  }
-
-  for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) {
-    if (MRI.getRegClass(AArch64::ZPRRegClassID).contains(Reg))
-      O << getRegisterName(Reg) << LayoutSuffix;
-    else
-      O << getRegisterName(Reg, AArch64::vreg) << LayoutSuffix;
-
-    if (i + 1 != NumRegs)
-      O << ", ";
-  }
-
-  O << " }";
-}
-
-void
-AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI,
-                                                   unsigned OpNum,
-                                                   const MCSubtargetInfo &STI,
-                                                   raw_ostream &O) {
-  printVectorList(MI, OpNum, STI, O, "");
-}
-
-template <unsigned NumLanes, char LaneKind>
-void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
-                                              const MCSubtargetInfo &STI,
-                                              raw_ostream &O) {
-  std::string Suffix(".");
-  if (NumLanes)
-    Suffix += itostr(NumLanes) + LaneKind;
-  else
-    Suffix += LaneKind;
-
-  printVectorList(MI, OpNum, STI, O, Suffix);
-}
-
-void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
-                                          const MCSubtargetInfo &STI,
-                                          raw_ostream &O) {
-  O << "[" << MI->getOperand(OpNum).getImm() << "]";
-}
-
-void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum,
-                                           const MCSubtargetInfo &STI,
-                                           raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNum);
-
-  // If the label has already been resolved to an immediate offset (say, when
-  // we're running the disassembler), just print the immediate.
-  if (Op.isImm()) {
-    O << "#" << formatImm(Op.getImm() * 4);
-    return;
-  }
-
-  // If the branch target is simply an address then print it in hex.
-  const MCConstantExpr *BranchTarget =
-      dyn_cast<MCConstantExpr>(MI->getOperand(OpNum).getExpr());
-  int64_t Address;
-  if (BranchTarget && BranchTarget->evaluateAsAbsolute(Address)) {
-    O << "0x";
-    O.write_hex(Address);
-  } else {
-    // Otherwise, just print the expression.
-    MI->getOperand(OpNum).getExpr()->print(O, &MAI);
-  }
-}
-
-void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNum);
-
-  // If the label has already been resolved to an immediate offset (say, when
-  // we're running the disassembler), just print the immediate.
-  if (Op.isImm()) {
-    O << "#" << formatImm(Op.getImm() * (1 << 12));
-    return;
-  }
-
-  // Otherwise, just print the expression.
-  MI->getOperand(OpNum).getExpr()->print(O, &MAI);
-}
-
-void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
-                                            const MCSubtargetInfo &STI,
-                                            raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNo).getImm();
-  unsigned Opcode = MI->getOpcode();
-
-  StringRef Name;
-  if (Opcode == AArch64::ISB) {
-    auto ISB = AArch64ISB::lookupISBByEncoding(Val);
-    Name = ISB ? ISB->Name : "";
-  } else if (Opcode == AArch64::TSB) {
-    auto TSB = AArch64TSB::lookupTSBByEncoding(Val);
-    Name = TSB ? TSB->Name : "";
-  } else {
-    auto DB = AArch64DB::lookupDBByEncoding(Val);
-    Name = DB ? DB->Name : "";
-  }
-  if (!Name.empty())
-    O << Name;
-  else
-    O << "#" << Val;
-}
-
-void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
-                                                const MCSubtargetInfo &STI,
-                                                raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNo).getImm();
-
-  // Horrible hack for the one register that has identical encodings but
-  // different names in MSR and MRS. Because of this, one of MRS and MSR is
-  // going to get the wrong entry
-  if (Val == AArch64SysReg::DBGDTRRX_EL0) {
-    O << "DBGDTRRX_EL0";
-    return;
-  }
-
-  const AArch64SysReg::SysReg *Reg = AArch64SysReg::lookupSysRegByEncoding(Val);
-  if (Reg && Reg->Readable && Reg->haveFeatures(STI.getFeatureBits()))
-    O << Reg->Name;
-  else
-    O << AArch64SysReg::genericRegisterString(Val);
-}
-
-void AArch64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo,
-                                                const MCSubtargetInfo &STI,
-                                                raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNo).getImm();
-
-  // Horrible hack for the one register that has identical encodings but
-  // different names in MSR and MRS. Because of this, one of MRS and MSR is
-  // going to get the wrong entry
-  if (Val == AArch64SysReg::DBGDTRTX_EL0) {
-    O << "DBGDTRTX_EL0";
-    return;
-  }
-
-  const AArch64SysReg::SysReg *Reg = AArch64SysReg::lookupSysRegByEncoding(Val);
-  if (Reg && Reg->Writeable && Reg->haveFeatures(STI.getFeatureBits()))
-    O << Reg->Name;
-  else
-    O << AArch64SysReg::genericRegisterString(Val);
-}
-
-void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo,
-                                                const MCSubtargetInfo &STI,
-                                                raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNo).getImm();
-
-  auto PState = AArch64PState::lookupPStateByEncoding(Val);
-  if (PState && PState->haveFeatures(STI.getFeatureBits()))
-    O << PState->Name;
-  else
-    O << "#" << formatImm(Val);
-}
-
-void AArch64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo,
-                                                const MCSubtargetInfo &STI,
-                                                raw_ostream &O) {
-  unsigned RawVal = MI->getOperand(OpNo).getImm();
-  uint64_t Val = AArch64_AM::decodeAdvSIMDModImmType10(RawVal);
-  O << format("#%#016llx", Val);
-}
-
-template<int64_t Angle, int64_t Remainder>
-void AArch64InstPrinter::printComplexRotationOp(const MCInst *MI, unsigned OpNo,
-                                                const MCSubtargetInfo &STI,
-                                                raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNo).getImm();
-  O << "#" << (Val * Angle) + Remainder;
-}
-
-void AArch64InstPrinter::printSVEPattern(const MCInst *MI, unsigned OpNum,
-                                         const MCSubtargetInfo &STI,
-                                         raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNum).getImm();
-  if (auto Pat = AArch64SVEPredPattern::lookupSVEPREDPATByEncoding(Val))
-    O << Pat->Name;
-  else
-    O << '#' << formatImm(Val);
-}
-
-template <char suffix>
-void AArch64InstPrinter::printSVERegOp(const MCInst *MI, unsigned OpNum,
-                                       const MCSubtargetInfo &STI,
-                                       raw_ostream &O) {
-  switch (suffix) {
-  case 0:
-  case 'b':
-  case 'h':
-  case 's':
-  case 'd':
-  case 'q':
-    break;
-  default: llvm_unreachable("Invalid kind specifier.");
-  }
-
-  unsigned Reg = MI->getOperand(OpNum).getReg();
-  O << getRegisterName(Reg);
-  if (suffix != 0)
-    O << '.' << suffix;
-}
-
-template <typename T>
-void AArch64InstPrinter::printImmSVE(T Value, raw_ostream &O) {
-  typename std::make_unsigned<T>::type HexValue = Value;
-
-  if (getPrintImmHex())
-    O << '#' << formatHex((uint64_t)HexValue);
-  else
-    O << '#' << formatDec(Value);
-
-  if (CommentStream) {
-    // Do the opposite to that used for instruction operands.
-    if (getPrintImmHex())
-      *CommentStream << '=' << formatDec(HexValue) << '\n';
-    else
-      *CommentStream << '=' << formatHex((uint64_t)Value) << '\n';
-  }
-}
-
-template <typename T>
-void AArch64InstPrinter::printImm8OptLsl(const MCInst *MI, unsigned OpNum,
-                                         const MCSubtargetInfo &STI,
-                                         raw_ostream &O) {
-  unsigned UnscaledVal = MI->getOperand(OpNum).getImm();
-  unsigned Shift = MI->getOperand(OpNum + 1).getImm();
-  assert(AArch64_AM::getShiftType(Shift) == AArch64_AM::LSL &&
-         "Unexepected shift type!");
-
-  // #0 lsl #8 is never pretty printed
-  if ((UnscaledVal == 0) && (AArch64_AM::getShiftValue(Shift) != 0)) {
-    O << '#' << formatImm(UnscaledVal);
-    printShifter(MI, OpNum + 1, STI, O);
-    return;
-  }
-
-  T Val;
-  if (std::is_signed<T>())
-    Val = (int8_t)UnscaledVal * (1 << AArch64_AM::getShiftValue(Shift));
-  else
-    Val = (uint8_t)UnscaledVal * (1 << AArch64_AM::getShiftValue(Shift));
-
-  printImmSVE(Val, O);
-}
-
-template <typename T>
-void AArch64InstPrinter::printSVELogicalImm(const MCInst *MI, unsigned OpNum,
-                                            const MCSubtargetInfo &STI,
-                                            raw_ostream &O) {
-  typedef typename std::make_signed<T>::type SignedT;
-  typedef typename std::make_unsigned<T>::type UnsignedT;
-
-  uint64_t Val = MI->getOperand(OpNum).getImm();
-  UnsignedT PrintVal = AArch64_AM::decodeLogicalImmediate(Val, 64);
-
-  // Prefer the default format for 16bit values, hex otherwise.
-  if ((int16_t)PrintVal == (SignedT)PrintVal)
-    printImmSVE((T)PrintVal, O);
-  else if ((uint16_t)PrintVal == PrintVal)
-    printImmSVE(PrintVal, O);
-  else
-    O << '#' << formatHex((uint64_t)PrintVal);
-}
-
-template <int Width>
-void AArch64InstPrinter::printZPRasFPR(const MCInst *MI, unsigned OpNum,
-                                       const MCSubtargetInfo &STI,
-                                       raw_ostream &O) {
-  unsigned Base;
-  switch (Width) {
-  case 8:   Base = AArch64::B0; break;
-  case 16:  Base = AArch64::H0; break;
-  case 32:  Base = AArch64::S0; break;
-  case 64:  Base = AArch64::D0; break;
-  case 128: Base = AArch64::Q0; break;
-  default:
-    llvm_unreachable("Unsupported width");
-  }
-  unsigned Reg = MI->getOperand(OpNum).getReg();
-  O << getRegisterName(Reg - AArch64::Z0 + Base);
-}
-
-template <unsigned ImmIs0, unsigned ImmIs1>
-void AArch64InstPrinter::printExactFPImm(const MCInst *MI, unsigned OpNum,
-                                         const MCSubtargetInfo &STI,
-                                         raw_ostream  &O) {
-  auto *Imm0Desc = AArch64ExactFPImm::lookupExactFPImmByEnum(ImmIs0);
-  auto *Imm1Desc = AArch64ExactFPImm::lookupExactFPImmByEnum(ImmIs1);
-  unsigned Val = MI->getOperand(OpNum).getImm();
-  O << "#" << (Val ? Imm1Desc->Repr : Imm0Desc->Repr);
-}
-
-void AArch64InstPrinter::printGPR64as32(const MCInst *MI, unsigned OpNum,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O) {
-  unsigned Reg = MI->getOperand(OpNum).getReg();
-  O << getRegisterName(getWRegFromXReg(Reg));
-}
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
deleted file mode 100644
index 4e9982f5b7be..000000000000
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ /dev/null
@@ -1,223 +0,0 @@
-//===-- AArch64InstPrinter.h - Convert AArch64 MCInst to assembly syntax --===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an AArch64 MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AARCH64_INSTPRINTER_AARCH64INSTPRINTER_H
-#define LLVM_LIB_TARGET_AARCH64_INSTPRINTER_AARCH64INSTPRINTER_H
-
-#include "MCTargetDesc/AArch64MCTargetDesc.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCInstPrinter.h"
-#include "../Utils/AArch64BaseInfo.h"
-
-namespace llvm {
-
-class AArch64InstPrinter : public MCInstPrinter {
-public:
-  AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                     const MCRegisterInfo &MRI);
-
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
-  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-
-  // Autogenerated by tblgen.
-  virtual void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
-                                raw_ostream &O);
-  virtual bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
-                               raw_ostream &O);
-  virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
-                                       unsigned PrintMethodIdx,
-                                       const MCSubtargetInfo &STI,
-                                       raw_ostream &O);
-
-  virtual StringRef getRegName(unsigned RegNo) const {
-    return getRegisterName(RegNo);
-  }
-
-  static const char *getRegisterName(unsigned RegNo,
-                                     unsigned AltIdx = AArch64::NoRegAltName);
-
-protected:
-  bool printSysAlias(const MCInst *MI, const MCSubtargetInfo &STI,
-                     raw_ostream &O);
-  // Operand printers
-  void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                    raw_ostream &O);
-  void printImm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                raw_ostream &O);
-  void printImmHex(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                   raw_ostream &O);
-  template <typename T> void printImmSVE(T Value, raw_ostream &O);
-  void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm,
-                           raw_ostream &O);
-  template <int Amount>
-  void printPostIncOperand(const MCInst *MI, unsigned OpNo,
-                           const MCSubtargetInfo &STI, raw_ostream &O) {
-    printPostIncOperand(MI, OpNo, Amount, O);
-  }
-
-  void printVRegOperand(const MCInst *MI, unsigned OpNo,
-                        const MCSubtargetInfo &STI, raw_ostream &O);
-  void printSysCROperand(const MCInst *MI, unsigned OpNo,
-                         const MCSubtargetInfo &STI, raw_ostream &O);
-  void printAddSubImm(const MCInst *MI, unsigned OpNum,
-                      const MCSubtargetInfo &STI, raw_ostream &O);
-  template <typename T>
-  void printLogicalImm(const MCInst *MI, unsigned OpNum,
-                       const MCSubtargetInfo &STI, raw_ostream &O);
-  void printShifter(const MCInst *MI, unsigned OpNum,
-                    const MCSubtargetInfo &STI, raw_ostream &O);
-  void printShiftedRegister(const MCInst *MI, unsigned OpNum,
-                            const MCSubtargetInfo &STI, raw_ostream &O);
-  void printExtendedRegister(const MCInst *MI, unsigned OpNum,
-                             const MCSubtargetInfo &STI, raw_ostream &O);
-  void printArithExtend(const MCInst *MI, unsigned OpNum,
-                        const MCSubtargetInfo &STI, raw_ostream &O);
-
-  void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O,
-                      char SrcRegKind, unsigned Width);
-  template <char SrcRegKind, unsigned Width>
-  void printMemExtend(const MCInst *MI, unsigned OpNum,
-                      const MCSubtargetInfo &STI, raw_ostream &O) {
-    printMemExtend(MI, OpNum, O, SrcRegKind, Width);
-  }
-  template <bool SignedExtend, int ExtWidth, char SrcRegKind, char Suffix>
-  void printRegWithShiftExtend(const MCInst *MI, unsigned OpNum,
-                               const MCSubtargetInfo &STI, raw_ostream &O);
-  void printCondCode(const MCInst *MI, unsigned OpNum,
-                     const MCSubtargetInfo &STI, raw_ostream &O);
-  void printInverseCondCode(const MCInst *MI, unsigned OpNum,
-                            const MCSubtargetInfo &STI, raw_ostream &O);
-  void printAlignedLabel(const MCInst *MI, unsigned OpNum,
-                         const MCSubtargetInfo &STI, raw_ostream &O);
-  void printUImm12Offset(const MCInst *MI, unsigned OpNum, unsigned Scale,
-                         raw_ostream &O);
-  void printAMIndexedWB(const MCInst *MI, unsigned OpNum, unsigned Scale,
-                        raw_ostream &O);
-
-  template <int Scale>
-  void printUImm12Offset(const MCInst *MI, unsigned OpNum,
-                         const MCSubtargetInfo &STI, raw_ostream &O) {
-    printUImm12Offset(MI, OpNum, Scale, O);
-  }
-
-  template <int BitWidth>
-  void printAMIndexedWB(const MCInst *MI, unsigned OpNum,
-                        const MCSubtargetInfo &STI, raw_ostream &O) {
-    printAMIndexedWB(MI, OpNum, BitWidth / 8, O);
-  }
-
-  void printAMNoIndex(const MCInst *MI, unsigned OpNum,
-                      const MCSubtargetInfo &STI, raw_ostream &O);
-
-  template <int Scale>
-  void printImmScale(const MCInst *MI, unsigned OpNum,
-                     const MCSubtargetInfo &STI, raw_ostream &O);
-
-  template <bool IsSVEPrefetch = false>
-  void printPrefetchOp(const MCInst *MI, unsigned OpNum,
-                       const MCSubtargetInfo &STI, raw_ostream &O);
-
-  void printPSBHintOp(const MCInst *MI, unsigned OpNum,
-                      const MCSubtargetInfo &STI, raw_ostream &O);
-
-  void printBTIHintOp(const MCInst *MI, unsigned OpNum,
-                      const MCSubtargetInfo &STI, raw_ostream &O);
-
-  void printFPImmOperand(const MCInst *MI, unsigned OpNum,
-                         const MCSubtargetInfo &STI, raw_ostream &O);
-
-  void printVectorList(const MCInst *MI, unsigned OpNum,
-                       const MCSubtargetInfo &STI, raw_ostream &O,
-                       StringRef LayoutSuffix);
-
-  /// Print a list of vector registers where the type suffix is implicit
-  /// (i.e. attached to the instruction rather than the registers).
-  void printImplicitlyTypedVectorList(const MCInst *MI, unsigned OpNum,
-                                      const MCSubtargetInfo &STI,
-                                      raw_ostream &O);
-
-  template <unsigned NumLanes, char LaneKind>
-  void printTypedVectorList(const MCInst *MI, unsigned OpNum,
-                            const MCSubtargetInfo &STI, raw_ostream &O);
-
-  void printVectorIndex(const MCInst *MI, unsigned OpNum,
-                        const MCSubtargetInfo &STI, raw_ostream &O);
-  void printAdrpLabel(const MCInst *MI, unsigned OpNum,
-                      const MCSubtargetInfo &STI, raw_ostream &O);
-  void printBarrierOption(const MCInst *MI, unsigned OpNum,
-                          const MCSubtargetInfo &STI, raw_ostream &O);
-  void printMSRSystemRegister(const MCInst *MI, unsigned OpNum,
-                              const MCSubtargetInfo &STI, raw_ostream &O);
-  void printMRSSystemRegister(const MCInst *MI, unsigned OpNum,
-                              const MCSubtargetInfo &STI, raw_ostream &O);
-  void printSystemPStateField(const MCInst *MI, unsigned OpNum,
-                              const MCSubtargetInfo &STI, raw_ostream &O);
-  void printSIMDType10Operand(const MCInst *MI, unsigned OpNum,
-                              const MCSubtargetInfo &STI, raw_ostream &O);
-  template<int64_t Angle, int64_t Remainder>
-  void printComplexRotationOp(const MCInst *MI, unsigned OpNo,
-                            const MCSubtargetInfo &STI, raw_ostream &O);
-  template<unsigned size>
-  void printGPRSeqPairsClassOperand(const MCInst *MI, unsigned OpNum,
-                                    const MCSubtargetInfo &STI,
-                                    raw_ostream &O);
-  template <typename T>
-  void printImm8OptLsl(const MCInst *MI, unsigned OpNum,
-                       const MCSubtargetInfo &STI, raw_ostream &O);
-  template <typename T>
-  void printSVELogicalImm(const MCInst *MI, unsigned OpNum,
-                          const MCSubtargetInfo &STI, raw_ostream &O);
-  void printSVEPattern(const MCInst *MI, unsigned OpNum,
-                       const MCSubtargetInfo &STI, raw_ostream &O);
-  template <char = 0>
-  void printSVERegOp(const MCInst *MI, unsigned OpNum,
-                    const MCSubtargetInfo &STI, raw_ostream &O);
-  void printGPR64as32(const MCInst *MI, unsigned OpNum,
-                      const MCSubtargetInfo &STI, raw_ostream &O);
-  template <int Width>
-  void printZPRasFPR(const MCInst *MI, unsigned OpNum,
-                     const MCSubtargetInfo &STI, raw_ostream &O);
-  template <unsigned ImmIs0, unsigned ImmIs1>
-  void printExactFPImm(const MCInst *MI, unsigned OpNum,
-                       const MCSubtargetInfo &STI, raw_ostream &O);
-};
-
-class AArch64AppleInstPrinter : public AArch64InstPrinter {
-public:
-  AArch64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                          const MCRegisterInfo &MRI);
-
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
-
-  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
-                        raw_ostream &O) override;
-  bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
-                       raw_ostream &O) override;
-  void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
-                               unsigned PrintMethodIdx,
-                               const MCSubtargetInfo &STI,
-                               raw_ostream &O) override;
-
-  StringRef getRegName(unsigned RegNo) const override {
-    return getRegisterName(RegNo);
-  }
-
-  static const char *getRegisterName(unsigned RegNo,
-                                     unsigned AltIdx = AArch64::NoRegAltName);
-};
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_AARCH64_INSTPRINTER_AARCH64INSTPRINTER_H
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index 688ca755d0b5..05a909f1780a 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -1,9 +1,8 @@
 //===- AArch64AddressingModes.h - AArch64 Addressing Modes ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index ed89d991d9fb..6418211a4f55 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -1,15 +1,15 @@
 //===-- AArch64AsmBackend.cpp - AArch64 Assembler Backend -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "AArch64.h"
 #include "MCTargetDesc/AArch64FixupKinds.h"
 #include "MCTargetDesc/AArch64MCExpr.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64BaseInfo.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/MC/MCAsmBackend.h"
@@ -22,8 +22,10 @@
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCTargetOptions.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
 namespace {
@@ -42,6 +44,8 @@ public:
     return AArch64::NumTargetFixupKinds;
   }
 
+  Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
+
   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
     const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = {
         // This table *must* be in the order that the fixup_* kinds are defined
@@ -104,6 +108,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
   default:
     llvm_unreachable("Unknown fixup kind!");
 
+  case FK_NONE:
   case AArch64::fixup_aarch64_tlsdesc_call:
     return 0;
 
@@ -274,7 +279,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
     if (RefKind & AArch64MCExpr::VK_NC) {
       Value &= 0xFFFF;
     }
-    else if (RefKind & AArch64MCExpr::VK_SABS) {
+    else if (AArch64MCExpr::getSymbolLoc(RefKind) == AArch64MCExpr::VK_SABS) {
       if (SignedValue > 0xFFFF || SignedValue < -0xFFFF)
         Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
 
@@ -305,6 +310,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
     if (Value & 0x3)
       Ctx.reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
     return (Value >> 2) & 0x3ffffff;
+  case FK_NONE:
   case FK_Data_1:
   case FK_Data_2:
   case FK_Data_4:
@@ -315,6 +321,12 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
   }
 }
 
+Optional<MCFixupKind> AArch64AsmBackend::getFixupKind(StringRef Name) const {
+  if (TheTriple.isOSBinFormatELF() && Name == "R_AARCH64_NONE")
+    return FK_NONE;
+  return MCAsmBackend::getFixupKind(Name);
+}
+
 /// getFixupKindContainereSizeInBytes - The number of bytes of the
 /// container involved in big endian or 0 if the item is little endian
 unsigned AArch64AsmBackend::getFixupKindContainereSizeInBytes(unsigned Kind) const {
@@ -398,7 +410,7 @@ void AArch64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
   // handle this more cleanly. This may affect the output of -show-mc-encoding.
   AArch64MCExpr::VariantKind RefKind =
     static_cast<AArch64MCExpr::VariantKind>(Target.getRefKind());
-  if (RefKind & AArch64MCExpr::VK_SABS) {
+  if (AArch64MCExpr::getSymbolLoc(RefKind) == AArch64MCExpr::VK_SABS) {
     // If the immediate is negative, generate MOVN else MOVZ.
     // (Bit 30 = 0) ==> MOVN, (Bit 30 = 1) ==> MOVZ.
     if (SignedValue < 0)
@@ -446,6 +458,10 @@ bool AArch64AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
 bool AArch64AsmBackend::shouldForceRelocation(const MCAssembler &Asm,
                                               const MCFixup &Fixup,
                                               const MCValue &Target) {
+  unsigned Kind = Fixup.getKind();
+  if (Kind == FK_NONE)
+    return true;
+
   // The ADRP instruction adds some multiple of 0x1000 to the current PC &
   // ~0xfff. This means that the required offset to reach a symbol can vary by
   // up to one step depending on where the ADRP is in memory. For example:
@@ -458,14 +474,14 @@ bool AArch64AsmBackend::shouldForceRelocation(const MCAssembler &Asm,
   // same page as the ADRP and the instruction should encode 0x0. Assuming the
   // section isn't 0x1000-aligned, we therefore need to delegate this decision
   // to the linker -- a relocation!
-  if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_pcrel_adrp_imm21)
+  if (Kind == AArch64::fixup_aarch64_pcrel_adrp_imm21)
     return true;
 
   AArch64MCExpr::VariantKind RefKind =
       static_cast<AArch64MCExpr::VariantKind>(Target.getRefKind());
   AArch64MCExpr::VariantKind SymLoc = AArch64MCExpr::getSymbolLoc(RefKind);
   // LDR GOT relocations need a relocation
-  if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_ldr_pcrel_imm19 &&
+  if (Kind == AArch64::fixup_aarch64_ldr_pcrel_imm19 &&
       SymLoc == AArch64MCExpr::VK_GOT)
     return true;
   return false;
@@ -513,6 +529,7 @@ enum CompactUnwindEncodings {
 // FIXME: This should be in a separate file.
 class DarwinAArch64AsmBackend : public AArch64AsmBackend {
   const MCRegisterInfo &MRI;
+  bool IsILP32;
 
   /// Encode compact unwind stack adjustment for frameless functions.
   /// See UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK in compact_unwind_encoding.h.
@@ -523,13 +540,18 @@ class DarwinAArch64AsmBackend : public AArch64AsmBackend {
 
 public:
   DarwinAArch64AsmBackend(const Target &T, const Triple &TT,
-                          const MCRegisterInfo &MRI)
-      : AArch64AsmBackend(T, TT, /*IsLittleEndian*/ true), MRI(MRI) {}
+                          const MCRegisterInfo &MRI, bool IsILP32)
+      : AArch64AsmBackend(T, TT, /*IsLittleEndian*/ true), MRI(MRI),
+        IsILP32(IsILP32) {}
 
   std::unique_ptr<MCObjectTargetWriter>
   createObjectTargetWriter() const override {
-    return createAArch64MachObjectWriter(MachO::CPU_TYPE_ARM64,
-                                         MachO::CPU_SUBTYPE_ARM64_ALL);
+    if (IsILP32)
+      return createAArch64MachObjectWriter(
+          MachO::CPU_TYPE_ARM64_32, MachO::CPU_SUBTYPE_ARM64_32_V8, true);
+    else
+      return createAArch64MachObjectWriter(MachO::CPU_TYPE_ARM64,
+                                           MachO::CPU_SUBTYPE_ARM64_ALL, false);
   }
 
   /// Generate the compact unwind encoding from the CFI directives.
@@ -711,8 +733,10 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
                                               const MCRegisterInfo &MRI,
                                               const MCTargetOptions &Options) {
   const Triple &TheTriple = STI.getTargetTriple();
-  if (TheTriple.isOSBinFormatMachO())
-    return new DarwinAArch64AsmBackend(T, TheTriple, MRI);
+  if (TheTriple.isOSBinFormatMachO()) {
+    const bool IsILP32 = TheTriple.isArch32Bit();
+    return new DarwinAArch64AsmBackend(T, TheTriple, MRI, IsILP32);
+  }
 
   if (TheTriple.isOSBinFormatCOFF())
     return new COFFAArch64AsmBackend(T, TheTriple);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 2ccd7cef8bef..c871e2c62eac 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64ELFObjectWriter.cpp - AArch64 ELF Writer -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -186,6 +185,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
     if (IsILP32 && isNonILP32reloc(Fixup, RefKind, Ctx))
       return ELF::R_AARCH64_NONE;
     switch ((unsigned)Fixup.getKind()) {
+    case FK_NONE:
+      return ELF::R_AARCH64_NONE;
     case FK_Data_1:
       Ctx.reportError(Fixup.getLoc(), "1-byte data relocations not supported");
       return ELF::R_AARCH64_NONE;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 9a7e34b0aeb1..c33f7e957b54 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/AArch64ELFStreamer.cpp - ELF Object Output for AArch64 ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -103,8 +102,8 @@ public:
   /// This function is the one used to emit instruction data into the ELF
   /// streamer. We override it to add the appropriate mapping symbol if
   /// necessary.
-  void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
-                       bool) override {
+  void EmitInstruction(const MCInst &Inst,
+                       const MCSubtargetInfo &STI) override {
     EmitA64MappingSymbol();
     MCELFStreamer::EmitInstruction(Inst, STI);
   }
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
index d5b009ec30d1..25c609ee1496 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
@@ -1,9 +1,8 @@
 //===-- AArch64ELFStreamer.h - ELF Streamer for AArch64 ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
index 4293dcba955e..fe8043fe5ec0 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
@@ -1,9 +1,8 @@
 //===-- AArch64FixupKinds.h - AArch64 Specific Fixup Entries ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
new file mode 100644
index 000000000000..d0a544273b8b
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -0,0 +1,1587 @@
+//==-- AArch64InstPrinter.cpp - Convert AArch64 MCInst to assembly syntax --==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an AArch64 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+#include <string>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "AArch64GenAsmWriter.inc"
+#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "AArch64GenAsmWriter1.inc"
+
+AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI,
+                                       const MCInstrInfo &MII,
+                                       const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
+
+AArch64AppleInstPrinter::AArch64AppleInstPrinter(const MCAsmInfo &MAI,
+                                                 const MCInstrInfo &MII,
+                                                 const MCRegisterInfo &MRI)
+    : AArch64InstPrinter(MAI, MII, MRI) {}
+
+void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  // This is for .cfi directives.
+  OS << getRegisterName(RegNo);
+}
+
+void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                                   StringRef Annot,
+                                   const MCSubtargetInfo &STI) {
+  // Check for special encodings and print the canonical alias instead.
+
+  unsigned Opcode = MI->getOpcode();
+
+  if (Opcode == AArch64::SYSxt)
+    if (printSysAlias(MI, STI, O)) {
+      printAnnotation(O, Annot);
+      return;
+    }
+
+  // SBFM/UBFM should print to a nicer aliased form if possible.
+  if (Opcode == AArch64::SBFMXri || Opcode == AArch64::SBFMWri ||
+      Opcode == AArch64::UBFMXri || Opcode == AArch64::UBFMWri) {
+    const MCOperand &Op0 = MI->getOperand(0);
+    const MCOperand &Op1 = MI->getOperand(1);
+    const MCOperand &Op2 = MI->getOperand(2);
+    const MCOperand &Op3 = MI->getOperand(3);
+
+    bool IsSigned = (Opcode == AArch64::SBFMXri || Opcode == AArch64::SBFMWri);
+    bool Is64Bit = (Opcode == AArch64::SBFMXri || Opcode == AArch64::UBFMXri);
+    if (Op2.isImm() && Op2.getImm() == 0 && Op3.isImm()) {
+      const char *AsmMnemonic = nullptr;
+
+      switch (Op3.getImm()) {
+      default:
+        break;
+      case 7:
+        if (IsSigned)
+          AsmMnemonic = "sxtb";
+        else if (!Is64Bit)
+          AsmMnemonic = "uxtb";
+        break;
+      case 15:
+        if (IsSigned)
+          AsmMnemonic = "sxth";
+        else if (!Is64Bit)
+          AsmMnemonic = "uxth";
+        break;
+      case 31:
+        // *xtw is only valid for signed 64-bit operations.
+        if (Is64Bit && IsSigned)
+          AsmMnemonic = "sxtw";
+        break;
+      }
+
+      if (AsmMnemonic) {
+        O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg())
+          << ", " << getRegisterName(getWRegFromXReg(Op1.getReg()));
+        printAnnotation(O, Annot);
+        return;
+      }
+    }
+
+    // All immediate shifts are aliases, implemented using the Bitfield
+    // instruction. In all cases the immediate shift amount shift must be in
+    // the range 0 to (reg.size -1).
+    if (Op2.isImm() && Op3.isImm()) {
+      const char *AsmMnemonic = nullptr;
+      int shift = 0;
+      int64_t immr = Op2.getImm();
+      int64_t imms = Op3.getImm();
+      if (Opcode == AArch64::UBFMWri && imms != 0x1F && ((imms + 1) == immr)) {
+        AsmMnemonic = "lsl";
+        shift = 31 - imms;
+      } else if (Opcode == AArch64::UBFMXri && imms != 0x3f &&
+                 ((imms + 1 == immr))) {
+        AsmMnemonic = "lsl";
+        shift = 63 - imms;
+      } else if (Opcode == AArch64::UBFMWri && imms == 0x1f) {
+        AsmMnemonic = "lsr";
+        shift = immr;
+      } else if (Opcode == AArch64::UBFMXri && imms == 0x3f) {
+        AsmMnemonic = "lsr";
+        shift = immr;
+      } else if (Opcode == AArch64::SBFMWri && imms == 0x1f) {
+        AsmMnemonic = "asr";
+        shift = immr;
+      } else if (Opcode == AArch64::SBFMXri && imms == 0x3f) {
+        AsmMnemonic = "asr";
+        shift = immr;
+      }
+      if (AsmMnemonic) {
+        O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg())
+          << ", " << getRegisterName(Op1.getReg()) << ", #" << shift;
+        printAnnotation(O, Annot);
+        return;
+      }
+    }
+
+    // SBFIZ/UBFIZ aliases
+    if (Op2.getImm() > Op3.getImm()) {
+      O << '\t' << (IsSigned ? "sbfiz" : "ubfiz") << '\t'
+        << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op1.getReg())
+        << ", #" << (Is64Bit ? 64 : 32) - Op2.getImm() << ", #" << Op3.getImm() + 1;
+      printAnnotation(O, Annot);
+      return;
+    }
+
+    // Otherwise SBFX/UBFX is the preferred form
+    O << '\t' << (IsSigned ? "sbfx" : "ubfx") << '\t'
+      << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op1.getReg())
+      << ", #" << Op2.getImm() << ", #" << Op3.getImm() - Op2.getImm() + 1;
+    printAnnotation(O, Annot);
+    return;
+  }
+
+  if (Opcode == AArch64::BFMXri || Opcode == AArch64::BFMWri) {
+    const MCOperand &Op0 = MI->getOperand(0); // Op1 == Op0
+    const MCOperand &Op2 = MI->getOperand(2);
+    int ImmR = MI->getOperand(3).getImm();
+    int ImmS = MI->getOperand(4).getImm();
+
+    if ((Op2.getReg() == AArch64::WZR || Op2.getReg() == AArch64::XZR) &&
+        (ImmR == 0 || ImmS < ImmR)) {
+      // BFC takes precedence over its entire range, sligtly differently to BFI.
+      int BitWidth = Opcode == AArch64::BFMXri ? 64 : 32;
+      int LSB = (BitWidth - ImmR) % BitWidth;
+      int Width = ImmS + 1;
+
+      O << "\tbfc\t" << getRegisterName(Op0.getReg())
+        << ", #" << LSB << ", #" << Width;
+      printAnnotation(O, Annot);
+      return;
+    } else if (ImmS < ImmR) {
+      // BFI alias
+      int BitWidth = Opcode == AArch64::BFMXri ? 64 : 32;
+      int LSB = (BitWidth - ImmR) % BitWidth;
+      int Width = ImmS + 1;
+
+      O << "\tbfi\t" << getRegisterName(Op0.getReg()) << ", "
+        << getRegisterName(Op2.getReg()) << ", #" << LSB << ", #" << Width;
+      printAnnotation(O, Annot);
+      return;
+    }
+
+    int LSB = ImmR;
+    int Width = ImmS - ImmR + 1;
+    // Otherwise BFXIL the preferred form
+    O << "\tbfxil\t"
+      << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op2.getReg())
+      << ", #" << LSB << ", #" << Width;
+    printAnnotation(O, Annot);
+    return;
+  }
+
+  // Symbolic operands for MOVZ, MOVN and MOVK already imply a shift
+  // (e.g. :gottprel_g1: is always going to be "lsl #16") so it should not be
+  // printed.
+  if ((Opcode == AArch64::MOVZXi || Opcode == AArch64::MOVZWi ||
+       Opcode == AArch64::MOVNXi || Opcode == AArch64::MOVNWi) &&
+      MI->getOperand(1).isExpr()) {
+    if (Opcode == AArch64::MOVZXi || Opcode == AArch64::MOVZWi)
+      O << "\tmovz\t";
+    else
+      O << "\tmovn\t";
+
+    O << getRegisterName(MI->getOperand(0).getReg()) << ", #";
+    MI->getOperand(1).getExpr()->print(O, &MAI);
+    return;
+  }
+
+  if ((Opcode == AArch64::MOVKXi || Opcode == AArch64::MOVKWi) &&
+      MI->getOperand(2).isExpr()) {
+    O << "\tmovk\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #";
+    MI->getOperand(2).getExpr()->print(O, &MAI);
+    return;
+  }
+
+  // MOVZ, MOVN and "ORR wzr, #imm" instructions are aliases for MOV, but their
+  // domains overlap so they need to be prioritized. The chain is "MOVZ lsl #0 >
+  // MOVZ lsl #N > MOVN lsl #0 > MOVN lsl #N > ORR". The highest instruction
+  // that can represent the move is the MOV alias, and the rest get printed
+  // normally.
+  if ((Opcode == AArch64::MOVZXi || Opcode == AArch64::MOVZWi) &&
+      MI->getOperand(1).isImm() && MI->getOperand(2).isImm()) {
+    int RegWidth = Opcode == AArch64::MOVZXi ? 64 : 32;
+    int Shift = MI->getOperand(2).getImm();
+    uint64_t Value = (uint64_t)MI->getOperand(1).getImm() << Shift;
+
+    if (AArch64_AM::isMOVZMovAlias(Value, Shift,
+                                   Opcode == AArch64::MOVZXi ? 64 : 32)) {
+      O << "\tmov\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #"
+        << formatImm(SignExtend64(Value, RegWidth));
+      return;
+    }
+  }
+
+  if ((Opcode == AArch64::MOVNXi || Opcode == AArch64::MOVNWi) &&
+      MI->getOperand(1).isImm() && MI->getOperand(2).isImm()) {
+    int RegWidth = Opcode == AArch64::MOVNXi ? 64 : 32;
+    int Shift = MI->getOperand(2).getImm();
+    uint64_t Value = ~((uint64_t)MI->getOperand(1).getImm() << Shift);
+    if (RegWidth == 32)
+      Value = Value & 0xffffffff;
+
+    if (AArch64_AM::isMOVNMovAlias(Value, Shift, RegWidth)) {
+      O << "\tmov\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #"
+        << formatImm(SignExtend64(Value, RegWidth));
+      return;
+    }
+  }
+
+  if ((Opcode == AArch64::ORRXri || Opcode == AArch64::ORRWri) &&
+      (MI->getOperand(1).getReg() == AArch64::XZR ||
+       MI->getOperand(1).getReg() == AArch64::WZR) &&
+      MI->getOperand(2).isImm()) {
+    int RegWidth = Opcode == AArch64::ORRXri ? 64 : 32;
+    uint64_t Value = AArch64_AM::decodeLogicalImmediate(
+        MI->getOperand(2).getImm(), RegWidth);
+    if (!AArch64_AM::isAnyMOVWMovAlias(Value, RegWidth)) {
+      O << "\tmov\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #"
+        << formatImm(SignExtend64(Value, RegWidth));
+      return;
+    }
+  }
+
+  if (Opcode == AArch64::CompilerBarrier) {
+    O << '\t' << MAI.getCommentString() << " COMPILER BARRIER";
+    printAnnotation(O, Annot);
+    return;
+  }
+
+  // Instruction TSB is specified as a one operand instruction, but 'csync' is
+  // not encoded, so for printing it is treated as a special case here:
+  if (Opcode == AArch64::TSB) {
+    O << "\ttsb\tcsync";
+    return;
+  }
+
+  if (!printAliasInstr(MI, STI, O))
+    printInstruction(MI, STI, O);
+
+  printAnnotation(O, Annot);
+
+  if (atomicBarrierDroppedOnZero(Opcode) &&
+      (MI->getOperand(0).getReg() == AArch64::XZR ||
+       MI->getOperand(0).getReg() == AArch64::WZR)) {
+    printAnnotation(O, "acquire semantics dropped since destination is zero");
+  }
+}
+
+static bool isTblTbxInstruction(unsigned Opcode, StringRef &Layout,
+                                bool &IsTbx) {
+  switch (Opcode) {
+  case AArch64::TBXv8i8One:
+  case AArch64::TBXv8i8Two:
+  case AArch64::TBXv8i8Three:
+  case AArch64::TBXv8i8Four:
+    IsTbx = true;
+    Layout = ".8b";
+    return true;
+  case AArch64::TBLv8i8One:
+  case AArch64::TBLv8i8Two:
+  case AArch64::TBLv8i8Three:
+  case AArch64::TBLv8i8Four:
+    IsTbx = false;
+    Layout = ".8b";
+    return true;
+  case AArch64::TBXv16i8One:
+  case AArch64::TBXv16i8Two:
+  case AArch64::TBXv16i8Three:
+  case AArch64::TBXv16i8Four:
+    IsTbx = true;
+    Layout = ".16b";
+    return true;
+  case AArch64::TBLv16i8One:
+  case AArch64::TBLv16i8Two:
+  case AArch64::TBLv16i8Three:
+  case AArch64::TBLv16i8Four:
+    IsTbx = false;
+    Layout = ".16b";
+    return true;
+  default:
+    return false;
+  }
+}
+
+struct LdStNInstrDesc {
+  unsigned Opcode;
+  const char *Mnemonic;
+  const char *Layout;
+  int ListOperand;
+  bool HasLane;
+  int NaturalOffset;
+};
+
+static const LdStNInstrDesc LdStNInstInfo[] = {
+  { AArch64::LD1i8,             "ld1",  ".b",     1, true,  0  },
+  { AArch64::LD1i16,            "ld1",  ".h",     1, true,  0  },
+  { AArch64::LD1i32,            "ld1",  ".s",     1, true,  0  },
+  { AArch64::LD1i64,            "ld1",  ".d",     1, true,  0  },
+  { AArch64::LD1i8_POST,        "ld1",  ".b",     2, true,  1  },
+  { AArch64::LD1i16_POST,       "ld1",  ".h",     2, true,  2  },
+  { AArch64::LD1i32_POST,       "ld1",  ".s",     2, true,  4  },
+  { AArch64::LD1i64_POST,       "ld1",  ".d",     2, true,  8  },
+  { AArch64::LD1Rv16b,          "ld1r", ".16b",   0, false, 0  },
+  { AArch64::LD1Rv8h,           "ld1r", ".8h",    0, false, 0  },
+  { AArch64::LD1Rv4s,           "ld1r", ".4s",    0, false, 0  },
+  { AArch64::LD1Rv2d,           "ld1r", ".2d",    0, false, 0  },
+  { AArch64::LD1Rv8b,           "ld1r", ".8b",    0, false, 0  },
+  { AArch64::LD1Rv4h,           "ld1r", ".4h",    0, false, 0  },
+  { AArch64::LD1Rv2s,           "ld1r", ".2s",    0, false, 0  },
+  { AArch64::LD1Rv1d,           "ld1r", ".1d",    0, false, 0  },
+  { AArch64::LD1Rv16b_POST,     "ld1r", ".16b",   1, false, 1  },
+  { AArch64::LD1Rv8h_POST,      "ld1r", ".8h",    1, false, 2  },
+  { AArch64::LD1Rv4s_POST,      "ld1r", ".4s",    1, false, 4  },
+  { AArch64::LD1Rv2d_POST,      "ld1r", ".2d",    1, false, 8  },
+  { AArch64::LD1Rv8b_POST,      "ld1r", ".8b",    1, false, 1  },
+  { AArch64::LD1Rv4h_POST,      "ld1r", ".4h",    1, false, 2  },
+  { AArch64::LD1Rv2s_POST,      "ld1r", ".2s",    1, false, 4  },
+  { AArch64::LD1Rv1d_POST,      "ld1r", ".1d",    1, false, 8  },
+  { AArch64::LD1Onev16b,        "ld1",  ".16b",   0, false, 0  },
+  { AArch64::LD1Onev8h,         "ld1",  ".8h",    0, false, 0  },
+  { AArch64::LD1Onev4s,         "ld1",  ".4s",    0, false, 0  },
+  { AArch64::LD1Onev2d,         "ld1",  ".2d",    0, false, 0  },
+  { AArch64::LD1Onev8b,         "ld1",  ".8b",    0, false, 0  },
+  { AArch64::LD1Onev4h,         "ld1",  ".4h",    0, false, 0  },
+  { AArch64::LD1Onev2s,         "ld1",  ".2s",    0, false, 0  },
+  { AArch64::LD1Onev1d,         "ld1",  ".1d",    0, false, 0  },
+  { AArch64::LD1Onev16b_POST,   "ld1",  ".16b",   1, false, 16 },
+  { AArch64::LD1Onev8h_POST,    "ld1",  ".8h",    1, false, 16 },
+  { AArch64::LD1Onev4s_POST,    "ld1",  ".4s",    1, false, 16 },
+  { AArch64::LD1Onev2d_POST,    "ld1",  ".2d",    1, false, 16 },
+  { AArch64::LD1Onev8b_POST,    "ld1",  ".8b",    1, false, 8  },
+  { AArch64::LD1Onev4h_POST,    "ld1",  ".4h",    1, false, 8  },
+  { AArch64::LD1Onev2s_POST,    "ld1",  ".2s",    1, false, 8  },
+  { AArch64::LD1Onev1d_POST,    "ld1",  ".1d",    1, false, 8  },
+  { AArch64::LD1Twov16b,        "ld1",  ".16b",   0, false, 0  },
+  { AArch64::LD1Twov8h,         "ld1",  ".8h",    0, false, 0  },
+  { AArch64::LD1Twov4s,         "ld1",  ".4s",    0, false, 0  },
+  { AArch64::LD1Twov2d,         "ld1",  ".2d",    0, false, 0  },
+  { AArch64::LD1Twov8b,         "ld1",  ".8b",    0, false, 0  },
+  { AArch64::LD1Twov4h,         "ld1",  ".4h",    0, false, 0  },
+  { AArch64::LD1Twov2s,         "ld1",  ".2s",    0, false, 0  },
+  { AArch64::LD1Twov1d,         "ld1",  ".1d",    0, false, 0  },
+  { AArch64::LD1Twov16b_POST,   "ld1",  ".16b",   1, false, 32 },
+  { AArch64::LD1Twov8h_POST,    "ld1",  ".8h",    1, false, 32 },
+  { AArch64::LD1Twov4s_POST,    "ld1",  ".4s",    1, false, 32 },
+  { AArch64::LD1Twov2d_POST,    "ld1",  ".2d",    1, false, 32 },
+  { AArch64::LD1Twov8b_POST,    "ld1",  ".8b",    1, false, 16 },
+  { AArch64::LD1Twov4h_POST,    "ld1",  ".4h",    1, false, 16 },
+  { AArch64::LD1Twov2s_POST,    "ld1",  ".2s",    1, false, 16 },
+  { AArch64::LD1Twov1d_POST,    "ld1",  ".1d",    1, false, 16 },
+  { AArch64::LD1Threev16b,      "ld1",  ".16b",   0, false, 0  },
+  { AArch64::LD1Threev8h,       "ld1",  ".8h",    0, false, 0  },
+  { AArch64::LD1Threev4s,       "ld1",  ".4s",    0, false, 0  },
+  { AArch64::LD1Threev2d,       "ld1",  ".2d",    0, false, 0  },
+  { AArch64::LD1Threev8b,       "ld1",  ".8b",    0, false, 0  },
+  { AArch64::LD1Threev4h,       "ld1",  ".4h",    0, false, 0  },
+  { AArch64::LD1Threev2s,       "ld1",  ".2s",    0, false, 0  },
+  { AArch64::LD1Threev1d,       "ld1",  ".1d",    0, false, 0  },
+  { AArch64::LD1Threev16b_POST, "ld1",  ".16b",   1, false, 48 },
+  { AArch64::LD1Threev8h_POST,  "ld1",  ".8h",    1, false, 48 },
+  { AArch64::LD1Threev4s_POST,  "ld1",  ".4s",    1, false, 48 },
+  { AArch64::LD1Threev2d_POST,  "ld1",  ".2d",    1, false, 48 },
+  { AArch64::LD1Threev8b_POST,  "ld1",  ".8b",    1, false, 24 },
+  { AArch64::LD1Threev4h_POST,  "ld1",  ".4h",    1, false, 24 },
+  { AArch64::LD1Threev2s_POST,  "ld1",  ".2s",    1, false, 24 },
+  { AArch64::LD1Threev1d_POST,  "ld1",  ".1d",    1, false, 24 },
+  { AArch64::LD1Fourv16b,       "ld1",  ".16b",   0, false, 0  },
+  { AArch64::LD1Fourv8h,        "ld1",  ".8h",    0, false, 0  },
+  { AArch64::LD1Fourv4s,        "ld1",  ".4s",    0, false, 0  },
+  { AArch64::LD1Fourv2d,        "ld1",  ".2d",    0, false, 0  },
+  { AArch64::LD1Fourv8b,        "ld1",  ".8b",    0, false, 0  },
+  { AArch64::LD1Fourv4h,        "ld1",  ".4h",    0, false, 0  },
+  { AArch64::LD1Fourv2s,        "ld1",  ".2s",    0, false, 0  },
+  { AArch64::LD1Fourv1d,        "ld1",  ".1d",    0, false, 0  },
+  { AArch64::LD1Fourv16b_POST,  "ld1",  ".16b",   1, false, 64 },
+  { AArch64::LD1Fourv8h_POST,   "ld1",  ".8h",    1, false, 64 },
+  { AArch64::LD1Fourv4s_POST,   "ld1",  ".4s",    1, false, 64 },
+  { AArch64::LD1Fourv2d_POST,   "ld1",  ".2d",    1, false, 64 },
+  { AArch64::LD1Fourv8b_POST,   "ld1",  ".8b",    1, false, 32 },
+  { AArch64::LD1Fourv4h_POST,   "ld1",  ".4h",    1, false, 32 },
+  { AArch64::LD1Fourv2s_POST,   "ld1",  ".2s",    1, false, 32 },
+  { AArch64::LD1Fourv1d_POST,   "ld1",  ".1d",    1, false, 32 },
+  { AArch64::LD2i8,             "ld2",  ".b",     1, true,  0  },
+  { AArch64::LD2i16,            "ld2",  ".h",     1, true,  0  },
+  { AArch64::LD2i32,            "ld2",  ".s",     1, true,  0  },
+  { AArch64::LD2i64,            "ld2",  ".d",     1, true,  0  },
+  { AArch64::LD2i8_POST,        "ld2",  ".b",     2, true,  2  },
+  { AArch64::LD2i16_POST,       "ld2",  ".h",     2, true,  4  },
+  { AArch64::LD2i32_POST,       "ld2",  ".s",     2, true,  8  },
+  { AArch64::LD2i64_POST,       "ld2",  ".d",     2, true,  16  },
+  { AArch64::LD2Rv16b,          "ld2r", ".16b",   0, false, 0  },
+  { AArch64::LD2Rv8h,           "ld2r", ".8h",    0, false, 0  },
+  { AArch64::LD2Rv4s,           "ld2r", ".4s",    0, false, 0  },
+  { AArch64::LD2Rv2d,           "ld2r", ".2d",    0, false, 0  },
+  { AArch64::LD2Rv8b,           "ld2r", ".8b",    0, false, 0  },
+  { AArch64::LD2Rv4h,           "ld2r", ".4h",    0, false, 0  },
+  { AArch64::LD2Rv2s,           "ld2r", ".2s",    0, false, 0  },
+  { AArch64::LD2Rv1d,           "ld2r", ".1d",    0, false, 0  },
+  { AArch64::LD2Rv16b_POST,     "ld2r", ".16b",   1, false, 2  },
+  { AArch64::LD2Rv8h_POST,      "ld2r", ".8h",    1, false, 4  },
+  { AArch64::LD2Rv4s_POST,      "ld2r", ".4s",    1, false, 8  },
+  { AArch64::LD2Rv2d_POST,      "ld2r", ".2d",    1, false, 16 },
+  { AArch64::LD2Rv8b_POST,      "ld2r", ".8b",    1, false, 2  },
+  { AArch64::LD2Rv4h_POST,      "ld2r", ".4h",    1, false, 4  },
+  { AArch64::LD2Rv2s_POST,      "ld2r", ".2s",    1, false, 8  },
+  { AArch64::LD2Rv1d_POST,      "ld2r", ".1d",    1, false, 16 },
+  { AArch64::LD2Twov16b,        "ld2",  ".16b",   0, false, 0  },
+  { AArch64::LD2Twov8h,         "ld2",  ".8h",    0, false, 0  },
+  { AArch64::LD2Twov4s,         "ld2",  ".4s",    0, false, 0  },
+  { AArch64::LD2Twov2d,         "ld2",  ".2d",    0, false, 0  },
+  { AArch64::LD2Twov8b,         "ld2",  ".8b",    0, false, 0  },
+  { AArch64::LD2Twov4h,         "ld2",  ".4h",    0, false, 0  },
+  { AArch64::LD2Twov2s,         "ld2",  ".2s",    0, false, 0  },
+  { AArch64::LD2Twov16b_POST,   "ld2",  ".16b",   1, false, 32 },
+  { AArch64::LD2Twov8h_POST,    "ld2",  ".8h",    1, false, 32 },
+  { AArch64::LD2Twov4s_POST,    "ld2",  ".4s",    1, false, 32 },
+  { AArch64::LD2Twov2d_POST,    "ld2",  ".2d",    1, false, 32 },
+  { AArch64::LD2Twov8b_POST,    "ld2",  ".8b",    1, false, 16 },
+  { AArch64::LD2Twov4h_POST,    "ld2",  ".4h",    1, false, 16 },
+  { AArch64::LD2Twov2s_POST,    "ld2",  ".2s",    1, false, 16 },
+  { AArch64::LD3i8,             "ld3",  ".b",     1, true,  0  },
+  { AArch64::LD3i16,            "ld3",  ".h",     1, true,  0  },
+  { AArch64::LD3i32,            "ld3",  ".s",     1, true,  0  },
+  { AArch64::LD3i64,            "ld3",  ".d",     1, true,  0  },
+  { AArch64::LD3i8_POST,        "ld3",  ".b",     2, true,  3  },
+  { AArch64::LD3i16_POST,       "ld3",  ".h",     2, true,  6  },
+  { AArch64::LD3i32_POST,       "ld3",  ".s",     2, true,  12 },
+  { AArch64::LD3i64_POST,       "ld3",  ".d",     2, true,  24 },
+  { AArch64::LD3Rv16b,          "ld3r", ".16b",   0, false, 0  },
+  { AArch64::LD3Rv8h,           "ld3r", ".8h",    0, false, 0  },
+  { AArch64::LD3Rv4s,           "ld3r", ".4s",    0, false, 0  },
+  { AArch64::LD3Rv2d,           "ld3r", ".2d",    0, false, 0  },
+  { AArch64::LD3Rv8b,           "ld3r", ".8b",    0, false, 0  },
+  { AArch64::LD3Rv4h,           "ld3r", ".4h",    0, false, 0  },
+  { AArch64::LD3Rv2s,           "ld3r", ".2s",    0, false, 0  },
+  { AArch64::LD3Rv1d,           "ld3r", ".1d",    0, false, 0  },
+  { AArch64::LD3Rv16b_POST,     "ld3r", ".16b",   1, false, 3  },
+  { AArch64::LD3Rv8h_POST,      "ld3r", ".8h",    1, false, 6  },
+  { AArch64::LD3Rv4s_POST,      "ld3r", ".4s",    1, false, 12 },
+  { AArch64::LD3Rv2d_POST,      "ld3r", ".2d",    1, false, 24 },
+  { AArch64::LD3Rv8b_POST,      "ld3r", ".8b",    1, false, 3  },
+  { AArch64::LD3Rv4h_POST,      "ld3r", ".4h",    1, false, 6  },
+  { AArch64::LD3Rv2s_POST,      "ld3r", ".2s",    1, false, 12 },
+  { AArch64::LD3Rv1d_POST,      "ld3r", ".1d",    1, false, 24 },
+  { AArch64::LD3Threev16b,      "ld3",  ".16b",   0, false, 0  },
+  { AArch64::LD3Threev8h,       "ld3",  ".8h",    0, false, 0  },
+  { AArch64::LD3Threev4s,       "ld3",  ".4s",    0, false, 0  },
+  { AArch64::LD3Threev2d,       "ld3",  ".2d",    0, false, 0  },
+  { AArch64::LD3Threev8b,       "ld3",  ".8b",    0, false, 0  },
+  { AArch64::LD3Threev4h,       "ld3",  ".4h",    0, false, 0  },
+  { AArch64::LD3Threev2s,       "ld3",  ".2s",    0, false, 0  },
+  { AArch64::LD3Threev16b_POST, "ld3",  ".16b",   1, false, 48 },
+  { AArch64::LD3Threev8h_POST,  "ld3",  ".8h",    1, false, 48 },
+  { AArch64::LD3Threev4s_POST,  "ld3",  ".4s",    1, false, 48 },
+  { AArch64::LD3Threev2d_POST,  "ld3",  ".2d",    1, false, 48 },
+  { AArch64::LD3Threev8b_POST,  "ld3",  ".8b",    1, false, 24 },
+  { AArch64::LD3Threev4h_POST,  "ld3",  ".4h",    1, false, 24 },
+  { AArch64::LD3Threev2s_POST,  "ld3",  ".2s",    1, false, 24 },
+  { AArch64::LD4i8,             "ld4",  ".b",     1, true,  0  },
+  { AArch64::LD4i16,            "ld4",  ".h",     1, true,  0  },
+  { AArch64::LD4i32,            "ld4",  ".s",     1, true,  0  },
+  { AArch64::LD4i64,            "ld4",  ".d",     1, true,  0  },
+  { AArch64::LD4i8_POST,        "ld4",  ".b",     2, true,  4  },
+  { AArch64::LD4i16_POST,       "ld4",  ".h",     2, true,  8  },
+  { AArch64::LD4i32_POST,       "ld4",  ".s",     2, true,  16 },
+  { AArch64::LD4i64_POST,       "ld4",  ".d",     2, true,  32 },
+  { AArch64::LD4Rv16b,          "ld4r", ".16b",   0, false, 0  },
+  { AArch64::LD4Rv8h,           "ld4r", ".8h",    0, false, 0  },
+  { AArch64::LD4Rv4s,           "ld4r", ".4s",    0, false, 0  },
+  { AArch64::LD4Rv2d,           "ld4r", ".2d",    0, false, 0  },
+  { AArch64::LD4Rv8b,           "ld4r", ".8b",    0, false, 0  },
+  { AArch64::LD4Rv4h,           "ld4r", ".4h",    0, false, 0  },
+  { AArch64::LD4Rv2s,           "ld4r", ".2s",    0, false, 0  },
+  { AArch64::LD4Rv1d,           "ld4r", ".1d",    0, false, 0  },
+  { AArch64::LD4Rv16b_POST,     "ld4r", ".16b",   1, false, 4  },
+  { AArch64::LD4Rv8h_POST,      "ld4r", ".8h",    1, false, 8  },
+  { AArch64::LD4Rv4s_POST,      "ld4r", ".4s",    1, false, 16 },
+  { AArch64::LD4Rv2d_POST,      "ld4r", ".2d",    1, false, 32 },
+  { AArch64::LD4Rv8b_POST,      "ld4r", ".8b",    1, false, 4  },
+  { AArch64::LD4Rv4h_POST,      "ld4r", ".4h",    1, false, 8  },
+  { AArch64::LD4Rv2s_POST,      "ld4r", ".2s",    1, false, 16 },
+  { AArch64::LD4Rv1d_POST,      "ld4r", ".1d",    1, false, 32 },
+  { AArch64::LD4Fourv16b,       "ld4",  ".16b",   0, false, 0  },
+  { AArch64::LD4Fourv8h,        "ld4",  ".8h",    0, false, 0  },
+  { AArch64::LD4Fourv4s,        "ld4",  ".4s",    0, false, 0  },
+  { AArch64::LD4Fourv2d,        "ld4",  ".2d",    0, false, 0  },
+  { AArch64::LD4Fourv8b,        "ld4",  ".8b",    0, false, 0  },
+  { AArch64::LD4Fourv4h,        "ld4",  ".4h",    0, false, 0  },
+  { AArch64::LD4Fourv2s,        "ld4",  ".2s",    0, false, 0  },
+  { AArch64::LD4Fourv16b_POST,  "ld4",  ".16b",   1, false, 64 },
+  { AArch64::LD4Fourv8h_POST,   "ld4",  ".8h",    1, false, 64 },
+  { AArch64::LD4Fourv4s_POST,   "ld4",  ".4s",    1, false, 64 },
+  { AArch64::LD4Fourv2d_POST,   "ld4",  ".2d",    1, false, 64 },
+  { AArch64::LD4Fourv8b_POST,   "ld4",  ".8b",    1, false, 32 },
+  { AArch64::LD4Fourv4h_POST,   "ld4",  ".4h",    1, false, 32 },
+  { AArch64::LD4Fourv2s_POST,   "ld4",  ".2s",    1, false, 32 },
+  { AArch64::ST1i8,             "st1",  ".b",     0, true,  0  },
+  { AArch64::ST1i16,            "st1",  ".h",     0, true,  0  },
+  { AArch64::ST1i32,            "st1",  ".s",     0, true,  0  },
+  { AArch64::ST1i64,            "st1",  ".d",     0, true,  0  },
+  { AArch64::ST1i8_POST,        "st1",  ".b",     1, true,  1  },
+  { AArch64::ST1i16_POST,       "st1",  ".h",     1, true,  2  },
+  { AArch64::ST1i32_POST,       "st1",  ".s",     1, true,  4  },
+  { AArch64::ST1i64_POST,       "st1",  ".d",     1, true,  8  },
+  { AArch64::ST1Onev16b,        "st1",  ".16b",   0, false, 0  },
+  { AArch64::ST1Onev8h,         "st1",  ".8h",    0, false, 0  },
+  { AArch64::ST1Onev4s,         "st1",  ".4s",    0, false, 0  },
+  { AArch64::ST1Onev2d,         "st1",  ".2d",    0, false, 0  },
+  { AArch64::ST1Onev8b,         "st1",  ".8b",    0, false, 0  },
+  { AArch64::ST1Onev4h,         "st1",  ".4h",    0, false, 0  },
+  { AArch64::ST1Onev2s,         "st1",  ".2s",    0, false, 0  },
+  { AArch64::ST1Onev1d,         "st1",  ".1d",    0, false, 0  },
+  { AArch64::ST1Onev16b_POST,   "st1",  ".16b",   1, false, 16 },
+  { AArch64::ST1Onev8h_POST,    "st1",  ".8h",    1, false, 16 },
+  { AArch64::ST1Onev4s_POST,    "st1",  ".4s",    1, false, 16 },
+  { AArch64::ST1Onev2d_POST,    "st1",  ".2d",    1, false, 16 },
+  { AArch64::ST1Onev8b_POST,    "st1",  ".8b",    1, false, 8  },
+  { AArch64::ST1Onev4h_POST,    "st1",  ".4h",    1, false, 8  },
+  { AArch64::ST1Onev2s_POST,    "st1",  ".2s",    1, false, 8  },
+  { AArch64::ST1Onev1d_POST,    "st1",  ".1d",    1, false, 8  },
+  { AArch64::ST1Twov16b,        "st1",  ".16b",   0, false, 0  },
+  { AArch64::ST1Twov8h,         "st1",  ".8h",    0, false, 0  },
+  { AArch64::ST1Twov4s,         "st1",  ".4s",    0, false, 0  },
+  { AArch64::ST1Twov2d,         "st1",  ".2d",    0, false, 0  },
+  { AArch64::ST1Twov8b,         "st1",  ".8b",    0, false, 0  },
+  { AArch64::ST1Twov4h,         "st1",  ".4h",    0, false, 0  },
+  { AArch64::ST1Twov2s,         "st1",  ".2s",    0, false, 0  },
+  { AArch64::ST1Twov1d,         "st1",  ".1d",    0, false, 0  },
+  { AArch64::ST1Twov16b_POST,   "st1",  ".16b",   1, false, 32 },
+  { AArch64::ST1Twov8h_POST,    "st1",  ".8h",    1, false, 32 },
+  { AArch64::ST1Twov4s_POST,    "st1",  ".4s",    1, false, 32 },
+  { AArch64::ST1Twov2d_POST,    "st1",  ".2d",    1, false, 32 },
+  { AArch64::ST1Twov8b_POST,    "st1",  ".8b",    1, false, 16 },
+  { AArch64::ST1Twov4h_POST,    "st1",  ".4h",    1, false, 16 },
+  { AArch64::ST1Twov2s_POST,    "st1",  ".2s",    1, false, 16 },
+  { AArch64::ST1Twov1d_POST,    "st1",  ".1d",    1, false, 16 },
+  { AArch64::ST1Threev16b,      "st1",  ".16b",   0, false, 0  },
+  { AArch64::ST1Threev8h,       "st1",  ".8h",    0, false, 0  },
+  { AArch64::ST1Threev4s,       "st1",  ".4s",    0, false, 0  },
+  { AArch64::ST1Threev2d,       "st1",  ".2d",    0, false, 0  },
+  { AArch64::ST1Threev8b,       "st1",  ".8b",    0, false, 0  },
+  { AArch64::ST1Threev4h,       "st1",  ".4h",    0, false, 0  },
+  { AArch64::ST1Threev2s,       "st1",  ".2s",    0, false, 0  },
+  { AArch64::ST1Threev1d,       "st1",  ".1d",    0, false, 0  },
+  { AArch64::ST1Threev16b_POST, "st1",  ".16b",   1, false, 48 },
+  { AArch64::ST1Threev8h_POST,  "st1",  ".8h",    1, false, 48 },
+  { AArch64::ST1Threev4s_POST,  "st1",  ".4s",    1, false, 48 },
+  { AArch64::ST1Threev2d_POST,  "st1",  ".2d",    1, false, 48 },
+  { AArch64::ST1Threev8b_POST,  "st1",  ".8b",    1, false, 24 },
+  { AArch64::ST1Threev4h_POST,  "st1",  ".4h",    1, false, 24 },
+  { AArch64::ST1Threev2s_POST,  "st1",  ".2s",    1, false, 24 },
+  { AArch64::ST1Threev1d_POST,  "st1",  ".1d",    1, false, 24 },
+  { AArch64::ST1Fourv16b,       "st1",  ".16b",   0, false, 0  },
+  { AArch64::ST1Fourv8h,        "st1",  ".8h",    0, false, 0  },
+  { AArch64::ST1Fourv4s,        "st1",  ".4s",    0, false, 0  },
+  { AArch64::ST1Fourv2d,        "st1",  ".2d",    0, false, 0  },
+  { AArch64::ST1Fourv8b,        "st1",  ".8b",    0, false, 0  },
+  { AArch64::ST1Fourv4h,        "st1",  ".4h",    0, false, 0  },
+  { AArch64::ST1Fourv2s,        "st1",  ".2s",    0, false, 0  },
+  { AArch64::ST1Fourv1d,        "st1",  ".1d",    0, false, 0  },
+  { AArch64::ST1Fourv16b_POST,  "st1",  ".16b",   1, false, 64 },
+  { AArch64::ST1Fourv8h_POST,   "st1",  ".8h",    1, false, 64 },
+  { AArch64::ST1Fourv4s_POST,   "st1",  ".4s",    1, false, 64 },
+  { AArch64::ST1Fourv2d_POST,   "st1",  ".2d",    1, false, 64 },
+  { AArch64::ST1Fourv8b_POST,   "st1",  ".8b",    1, false, 32 },
+  { AArch64::ST1Fourv4h_POST,   "st1",  ".4h",    1, false, 32 },
+  { AArch64::ST1Fourv2s_POST,   "st1",  ".2s",    1, false, 32 },
+  { AArch64::ST1Fourv1d_POST,   "st1",  ".1d",    1, false, 32 },
+  { AArch64::ST2i8,             "st2",  ".b",     0, true,  0  },
+  { AArch64::ST2i16,            "st2",  ".h",     0, true,  0  },
+  { AArch64::ST2i32,            "st2",  ".s",     0, true,  0  },
+  { AArch64::ST2i64,            "st2",  ".d",     0, true,  0  },
+  { AArch64::ST2i8_POST,        "st2",  ".b",     1, true,  2  },
+  { AArch64::ST2i16_POST,       "st2",  ".h",     1, true,  4  },
+  { AArch64::ST2i32_POST,       "st2",  ".s",     1, true,  8  },
+  { AArch64::ST2i64_POST,       "st2",  ".d",     1, true,  16 },
+  { AArch64::ST2Twov16b,        "st2",  ".16b",   0, false, 0  },
+  { AArch64::ST2Twov8h,         "st2",  ".8h",    0, false, 0  },
+  { AArch64::ST2Twov4s,         "st2",  ".4s",    0, false, 0  },
+  { AArch64::ST2Twov2d,         "st2",  ".2d",    0, false, 0  },
+  { AArch64::ST2Twov8b,         "st2",  ".8b",    0, false, 0  },
+  { AArch64::ST2Twov4h,         "st2",  ".4h",    0, false, 0  },
+  { AArch64::ST2Twov2s,         "st2",  ".2s",    0, false, 0  },
+  { AArch64::ST2Twov16b_POST,   "st2",  ".16b",   1, false, 32 },
+  { AArch64::ST2Twov8h_POST,    "st2",  ".8h",    1, false, 32 },
+  { AArch64::ST2Twov4s_POST,    "st2",  ".4s",    1, false, 32 },
+  { AArch64::ST2Twov2d_POST,    "st2",  ".2d",    1, false, 32 },
+  { AArch64::ST2Twov8b_POST,    "st2",  ".8b",    1, false, 16 },
+  { AArch64::ST2Twov4h_POST,    "st2",  ".4h",    1, false, 16 },
+  { AArch64::ST2Twov2s_POST,    "st2",  ".2s",    1, false, 16 },
+  { AArch64::ST3i8,             "st3",  ".b",     0, true,  0  },
+  { AArch64::ST3i16,            "st3",  ".h",     0, true,  0  },
+  { AArch64::ST3i32,            "st3",  ".s",     0, true,  0  },
+  { AArch64::ST3i64,            "st3",  ".d",     0, true,  0  },
+  { AArch64::ST3i8_POST,        "st3",  ".b",     1, true,  3  },
+  { AArch64::ST3i16_POST,       "st3",  ".h",     1, true,  6  },
+  { AArch64::ST3i32_POST,       "st3",  ".s",     1, true,  12 },
+  { AArch64::ST3i64_POST,       "st3",  ".d",     1, true,  24 },
+  { AArch64::ST3Threev16b,      "st3",  ".16b",   0, false, 0  },
+  { AArch64::ST3Threev8h,       "st3",  ".8h",    0, false, 0  },
+  { AArch64::ST3Threev4s,       "st3",  ".4s",    0, false, 0  },
+  { AArch64::ST3Threev2d,       "st3",  ".2d",    0, false, 0  },
+  { AArch64::ST3Threev8b,       "st3",  ".8b",    0, false, 0  },
+  { AArch64::ST3Threev4h,       "st3",  ".4h",    0, false, 0  },
+  { AArch64::ST3Threev2s,       "st3",  ".2s",    0, false, 0  },
+  { AArch64::ST3Threev16b_POST, "st3",  ".16b",   1, false, 48 },
+  { AArch64::ST3Threev8h_POST,  "st3",  ".8h",    1, false, 48 },
+  { AArch64::ST3Threev4s_POST,  "st3",  ".4s",    1, false, 48 },
+  { AArch64::ST3Threev2d_POST,  "st3",  ".2d",    1, false, 48 },
+  { AArch64::ST3Threev8b_POST,  "st3",  ".8b",    1, false, 24 },
+  { AArch64::ST3Threev4h_POST,  "st3",  ".4h",    1, false, 24 },
+  { AArch64::ST3Threev2s_POST,  "st3",  ".2s",    1, false, 24 },
+  { AArch64::ST4i8,             "st4",  ".b",     0, true,  0  },
+  { AArch64::ST4i16,            "st4",  ".h",     0, true,  0  },
+  { AArch64::ST4i32,            "st4",  ".s",     0, true,  0  },
+  { AArch64::ST4i64,            "st4",  ".d",     0, true,  0  },
+  { AArch64::ST4i8_POST,        "st4",  ".b",     1, true,  4  },
+  { AArch64::ST4i16_POST,       "st4",  ".h",     1, true,  8  },
+  { AArch64::ST4i32_POST,       "st4",  ".s",     1, true,  16 },
+  { AArch64::ST4i64_POST,       "st4",  ".d",     1, true,  32 },
+  { AArch64::ST4Fourv16b,       "st4",  ".16b",   0, false, 0  },
+  { AArch64::ST4Fourv8h,        "st4",  ".8h",    0, false, 0  },
+  { AArch64::ST4Fourv4s,        "st4",  ".4s",    0, false, 0  },
+  { AArch64::ST4Fourv2d,        "st4",  ".2d",    0, false, 0  },
+  { AArch64::ST4Fourv8b,        "st4",  ".8b",    0, false, 0  },
+  { AArch64::ST4Fourv4h,        "st4",  ".4h",    0, false, 0  },
+  { AArch64::ST4Fourv2s,        "st4",  ".2s",    0, false, 0  },
+  { AArch64::ST4Fourv16b_POST,  "st4",  ".16b",   1, false, 64 },
+  { AArch64::ST4Fourv8h_POST,   "st4",  ".8h",    1, false, 64 },
+  { AArch64::ST4Fourv4s_POST,   "st4",  ".4s",    1, false, 64 },
+  { AArch64::ST4Fourv2d_POST,   "st4",  ".2d",    1, false, 64 },
+  { AArch64::ST4Fourv8b_POST,   "st4",  ".8b",    1, false, 32 },
+  { AArch64::ST4Fourv4h_POST,   "st4",  ".4h",    1, false, 32 },
+  { AArch64::ST4Fourv2s_POST,   "st4",  ".2s",    1, false, 32 },
+};
+
+static const LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
+  unsigned Idx;
+  for (Idx = 0; Idx != array_lengthof(LdStNInstInfo); ++Idx)
+    if (LdStNInstInfo[Idx].Opcode == Opcode)
+      return &LdStNInstInfo[Idx];
+
+  return nullptr;
+}
+
+void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                                        StringRef Annot,
+                                        const MCSubtargetInfo &STI) {
+  unsigned Opcode = MI->getOpcode();
+  StringRef Layout;
+
+  bool IsTbx;
+  if (isTblTbxInstruction(MI->getOpcode(), Layout, IsTbx)) {
+    O << "\t" << (IsTbx ? "tbx" : "tbl") << Layout << '\t'
+      << getRegisterName(MI->getOperand(0).getReg(), AArch64::vreg) << ", ";
+
+    unsigned ListOpNum = IsTbx ? 2 : 1;
+    printVectorList(MI, ListOpNum, STI, O, "");
+
+    O << ", "
+      << getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), AArch64::vreg);
+    printAnnotation(O, Annot);
+    return;
+  }
+
+  if (const LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) {
+    O << "\t" << LdStDesc->Mnemonic << LdStDesc->Layout << '\t';
+
+    // Now onto the operands: first a vector list with possible lane
+    // specifier. E.g. { v0 }[2]
+    int OpNum = LdStDesc->ListOperand;
+    printVectorList(MI, OpNum++, STI, O, "");
+
+    if (LdStDesc->HasLane)
+      O << '[' << MI->getOperand(OpNum++).getImm() << ']';
+
+    // Next the address: [xN]
+    unsigned AddrReg = MI->getOperand(OpNum++).getReg();
+    O << ", [" << getRegisterName(AddrReg) << ']';
+
+    // Finally, there might be a post-indexed offset.
+    if (LdStDesc->NaturalOffset != 0) {
+      unsigned Reg = MI->getOperand(OpNum++).getReg();
+      if (Reg != AArch64::XZR)
+        O << ", " << getRegisterName(Reg);
+      else {
+        assert(LdStDesc->NaturalOffset && "no offset on post-inc instruction?");
+        O << ", #" << LdStDesc->NaturalOffset;
+      }
+    }
+
+    printAnnotation(O, Annot);
+    return;
+  }
+
+  AArch64InstPrinter::printInst(MI, O, Annot, STI);
+}
+
+bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O) {
+#ifndef NDEBUG
+  unsigned Opcode = MI->getOpcode();
+  assert(Opcode == AArch64::SYSxt && "Invalid opcode for SYS alias!");
+#endif
+
+  const MCOperand &Op1 = MI->getOperand(0);
+  const MCOperand &Cn = MI->getOperand(1);
+  const MCOperand &Cm = MI->getOperand(2);
+  const MCOperand &Op2 = MI->getOperand(3);
+
+  unsigned Op1Val = Op1.getImm();
+  unsigned CnVal = Cn.getImm();
+  unsigned CmVal = Cm.getImm();
+  unsigned Op2Val = Op2.getImm();
+
+  uint16_t Encoding = Op2Val;
+  Encoding |= CmVal << 3;
+  Encoding |= CnVal << 7;
+  Encoding |= Op1Val << 11;
+
+  bool NeedsReg;
+  std::string Ins;
+  std::string Name;
+
+  if (CnVal == 7) {
+    switch (CmVal) {
+    default: return false;
+    // Maybe IC, maybe Prediction Restriction
+    case 1:
+      switch (Op1Val) {
+      default: return false;
+      case 0: goto Search_IC;
+      case 3: goto Search_PRCTX;
+      }
+    // Prediction Restriction aliases
+    case 3: {
+      Search_PRCTX:
+      const AArch64PRCTX::PRCTX *PRCTX = AArch64PRCTX::lookupPRCTXByEncoding(Encoding >> 3);
+      if (!PRCTX || !PRCTX->haveFeatures(STI.getFeatureBits()))
+        return false;
+
+      NeedsReg = PRCTX->NeedsReg;
+      switch (Op2Val) {
+      default: return false;
+      case 4: Ins = "cfp\t"; break;
+      case 5: Ins = "dvp\t"; break;
+      case 7: Ins = "cpp\t"; break;
+      }
+      Name = std::string(PRCTX->Name);
+    }
+    break;
+    // IC aliases
+    case 5: {
+      Search_IC:
+      const AArch64IC::IC *IC = AArch64IC::lookupICByEncoding(Encoding);
+      if (!IC || !IC->haveFeatures(STI.getFeatureBits()))
+        return false;
+
+      NeedsReg = IC->NeedsReg;
+      Ins = "ic\t";
+      Name = std::string(IC->Name);
+    }
+    break;
+    // DC aliases
+    case 4: case 6: case 10: case 11: case 12: case 13: case 14:
+    {
+      const AArch64DC::DC *DC = AArch64DC::lookupDCByEncoding(Encoding);
+      if (!DC || !DC->haveFeatures(STI.getFeatureBits()))
+        return false;
+
+      NeedsReg = true;
+      Ins = "dc\t";
+      Name = std::string(DC->Name);
+    }
+    break;
+    // AT aliases
+    case 8: case 9: {
+      const AArch64AT::AT *AT = AArch64AT::lookupATByEncoding(Encoding);
+      if (!AT || !AT->haveFeatures(STI.getFeatureBits()))
+        return false;
+
+      NeedsReg = true;
+      Ins = "at\t";
+      Name = std::string(AT->Name);
+    }
+    break;
+    }
+  } else if (CnVal == 8) {
+    // TLBI aliases
+    const AArch64TLBI::TLBI *TLBI = AArch64TLBI::lookupTLBIByEncoding(Encoding);
+    if (!TLBI || !TLBI->haveFeatures(STI.getFeatureBits()))
+      return false;
+
+    NeedsReg = TLBI->NeedsReg;
+    Ins = "tlbi\t";
+    Name = std::string(TLBI->Name);
+  }
+  else
+    return false;
+
+  std::string Str = Ins + Name;
+  std::transform(Str.begin(), Str.end(), Str.begin(), ::tolower);
+
+  O << '\t' << Str;
+  if (NeedsReg)
+    O << ", " << getRegisterName(MI->getOperand(4).getReg());
+
+  return true;
+}
+
+void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    unsigned Reg = Op.getReg();
+    O << getRegisterName(Reg);
+  } else if (Op.isImm()) {
+    printImm(MI, OpNo, STI, O);
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    Op.getExpr()->print(O, &MAI);
+  }
+}
+
+void AArch64InstPrinter::printImm(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  O << "#" << formatImm(Op.getImm());
+}
+
+void AArch64InstPrinter::printImmHex(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  O << format("#%#llx", Op.getImm());
+}
+
+void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
+                                             unsigned Imm, raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    unsigned Reg = Op.getReg();
+    if (Reg == AArch64::XZR)
+      O << "#" << Imm;
+    else
+      O << getRegisterName(Reg);
+  } else
+    llvm_unreachable("unknown operand kind in printPostIncOperand64");
+}
+
+void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  assert(Op.isReg() && "Non-register vreg operand!");
+  unsigned Reg = Op.getReg();
+  O << getRegisterName(Reg, AArch64::vreg);
+}
+
+void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo,
+                                           const MCSubtargetInfo &STI,
+                                           raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  assert(Op.isImm() && "System instruction C[nm] operands must be immediates!");
+  O << "c" << Op.getImm();
+}
+
+void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  if (MO.isImm()) {
+    unsigned Val = (MO.getImm() & 0xfff);
+    assert(Val == MO.getImm() && "Add/sub immediate out of range!");
+    unsigned Shift =
+        AArch64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm());
+    O << '#' << formatImm(Val);
+    if (Shift != 0)
+      printShifter(MI, OpNum + 1, STI, O);
+
+    if (CommentStream)
+      *CommentStream << '=' << formatImm(Val << Shift) << '\n';
+  } else {
+    assert(MO.isExpr() && "Unexpected operand type!");
+    MO.getExpr()->print(O, &MAI);
+    printShifter(MI, OpNum + 1, STI, O);
+  }
+}
+
+template <typename T>
+void AArch64InstPrinter::printLogicalImm(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
+  uint64_t Val = MI->getOperand(OpNum).getImm();
+  O << "#0x";
+  O.write_hex(AArch64_AM::decodeLogicalImmediate(Val, 8 * sizeof(T)));
+}
+
+void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNum).getImm();
+  // LSL #0 should not be printed.
+  if (AArch64_AM::getShiftType(Val) == AArch64_AM::LSL &&
+      AArch64_AM::getShiftValue(Val) == 0)
+    return;
+  O << ", " << AArch64_AM::getShiftExtendName(AArch64_AM::getShiftType(Val))
+    << " #" << AArch64_AM::getShiftValue(Val);
+}
+
+void AArch64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
+                                              raw_ostream &O) {
+  O << getRegisterName(MI->getOperand(OpNum).getReg());
+  printShifter(MI, OpNum + 1, STI, O);
+}
+
+void AArch64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
+                                               raw_ostream &O) {
+  O << getRegisterName(MI->getOperand(OpNum).getReg());
+  printArithExtend(MI, OpNum + 1, STI, O);
+}
+
+void AArch64InstPrinter::printArithExtend(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNum).getImm();
+  AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getArithExtendType(Val);
+  unsigned ShiftVal = AArch64_AM::getArithShiftValue(Val);
+
+  // If the destination or first source register operand is [W]SP, print
+  // UXTW/UXTX as LSL, and if the shift amount is also zero, print nothing at
+  // all.
+  if (ExtType == AArch64_AM::UXTW || ExtType == AArch64_AM::UXTX) {
+    unsigned Dest = MI->getOperand(0).getReg();
+    unsigned Src1 = MI->getOperand(1).getReg();
+    if ( ((Dest == AArch64::SP || Src1 == AArch64::SP) &&
+          ExtType == AArch64_AM::UXTX) ||
+         ((Dest == AArch64::WSP || Src1 == AArch64::WSP) &&
+          ExtType == AArch64_AM::UXTW) ) {
+      if (ShiftVal != 0)
+        O << ", lsl #" << ShiftVal;
+      return;
+    }
+  }
+  O << ", " << AArch64_AM::getShiftExtendName(ExtType);
+  if (ShiftVal != 0)
+    O << " #" << ShiftVal;
+}
+
+static void printMemExtendImpl(bool SignExtend, bool DoShift,
+                               unsigned Width, char SrcRegKind,
+                               raw_ostream &O) {
+  // sxtw, sxtx, uxtw or lsl (== uxtx)
+  bool IsLSL = !SignExtend && SrcRegKind == 'x';
+  if (IsLSL)
+    O << "lsl";
+  else
+    O << (SignExtend ? 's' : 'u') << "xt" << SrcRegKind;
+
+  if (DoShift || IsLSL)
+    O << " #" << Log2_32(Width / 8);
+}
+
+void AArch64InstPrinter::printMemExtend(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O, char SrcRegKind,
+                                        unsigned Width) {
+  bool SignExtend = MI->getOperand(OpNum).getImm();
+  bool DoShift = MI->getOperand(OpNum + 1).getImm();
+  printMemExtendImpl(SignExtend, DoShift, Width, SrcRegKind, O);
+}
+
+template <bool SignExtend, int ExtWidth, char SrcRegKind, char Suffix>
+void AArch64InstPrinter::printRegWithShiftExtend(const MCInst *MI,
+                                                 unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
+                                                 raw_ostream &O) {
+  printOperand(MI, OpNum, STI, O);
+  if (Suffix == 's' || Suffix == 'd')
+    O << '.' << Suffix;
+  else
+    assert(Suffix == 0 && "Unsupported suffix size");
+
+  bool DoShift = ExtWidth != 8;
+  if (SignExtend || DoShift || SrcRegKind == 'w') {
+    O << ", ";
+    printMemExtendImpl(SignExtend, DoShift, ExtWidth, SrcRegKind, O);
+  }
+}
+
+void AArch64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O) {
+  AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm();
+  O << AArch64CC::getCondCodeName(CC);
+}
+
+void AArch64InstPrinter::printInverseCondCode(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
+                                              raw_ostream &O) {
+  AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm();
+  O << AArch64CC::getCondCodeName(AArch64CC::getInvertedCondCode(CC));
+}
+
+void AArch64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']';
+}
+
+template<int Scale>
+void AArch64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O) {
+  O << '#' << formatImm(Scale * MI->getOperand(OpNum).getImm());
+}
+
+void AArch64InstPrinter::printUImm12Offset(const MCInst *MI, unsigned OpNum,
+                                           unsigned Scale, raw_ostream &O) {
+  const MCOperand MO = MI->getOperand(OpNum);
+  if (MO.isImm()) {
+    O << "#" << formatImm(MO.getImm() * Scale);
+  } else {
+    assert(MO.isExpr() && "Unexpected operand type!");
+    MO.getExpr()->print(O, &MAI);
+  }
+}
+
+void AArch64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum,
+                                          unsigned Scale, raw_ostream &O) {
+  const MCOperand MO1 = MI->getOperand(OpNum + 1);
+  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg());
+  if (MO1.isImm()) {
+      O << ", #" << formatImm(MO1.getImm() * Scale);
+  } else {
+    assert(MO1.isExpr() && "Unexpected operand type!");
+    O << ", ";
+    MO1.getExpr()->print(O, &MAI);
+  }
+  O << ']';
+}
+
+template <bool IsSVEPrefetch>
+void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
+  unsigned prfop = MI->getOperand(OpNum).getImm();
+  if (IsSVEPrefetch) {
+    if (auto PRFM = AArch64SVEPRFM::lookupSVEPRFMByEncoding(prfop)) {
+      O << PRFM->Name;
+      return;
+    }
+  } else if (auto PRFM = AArch64PRFM::lookupPRFMByEncoding(prfop)) {
+    O << PRFM->Name;
+    return;
+  }
+
+  O << '#' << formatImm(prfop);
+}
+
+void AArch64InstPrinter::printPSBHintOp(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  unsigned psbhintop = MI->getOperand(OpNum).getImm();
+  auto PSB = AArch64PSBHint::lookupPSBByEncoding(psbhintop);
+  if (PSB)
+    O << PSB->Name;
+  else
+    O << '#' << formatImm(psbhintop);
+}
+
+void AArch64InstPrinter::printBTIHintOp(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  unsigned btihintop = (MI->getOperand(OpNum).getImm() ^ 32) >> 1;
+  auto BTI = AArch64BTIHint::lookupBTIByEncoding(btihintop);
+  if (BTI)
+    O << BTI->Name;
+  else
+    O << '#' << formatImm(btihintop);
+}
+
+void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
+                                           raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  float FPImm =
+      MO.isFPImm() ? MO.getFPImm() : AArch64_AM::getFPImmFloat(MO.getImm());
+
+  // 8 decimal places are enough to perfectly represent permitted floats.
+  O << format("#%.8f", FPImm);
+}
+
+static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
+  while (Stride--) {
+    switch (Reg) {
+    default:
+      llvm_unreachable("Vector register expected!");
+    case AArch64::Q0:  Reg = AArch64::Q1;  break;
+    case AArch64::Q1:  Reg = AArch64::Q2;  break;
+    case AArch64::Q2:  Reg = AArch64::Q3;  break;
+    case AArch64::Q3:  Reg = AArch64::Q4;  break;
+    case AArch64::Q4:  Reg = AArch64::Q5;  break;
+    case AArch64::Q5:  Reg = AArch64::Q6;  break;
+    case AArch64::Q6:  Reg = AArch64::Q7;  break;
+    case AArch64::Q7:  Reg = AArch64::Q8;  break;
+    case AArch64::Q8:  Reg = AArch64::Q9;  break;
+    case AArch64::Q9:  Reg = AArch64::Q10; break;
+    case AArch64::Q10: Reg = AArch64::Q11; break;
+    case AArch64::Q11: Reg = AArch64::Q12; break;
+    case AArch64::Q12: Reg = AArch64::Q13; break;
+    case AArch64::Q13: Reg = AArch64::Q14; break;
+    case AArch64::Q14: Reg = AArch64::Q15; break;
+    case AArch64::Q15: Reg = AArch64::Q16; break;
+    case AArch64::Q16: Reg = AArch64::Q17; break;
+    case AArch64::Q17: Reg = AArch64::Q18; break;
+    case AArch64::Q18: Reg = AArch64::Q19; break;
+    case AArch64::Q19: Reg = AArch64::Q20; break;
+    case AArch64::Q20: Reg = AArch64::Q21; break;
+    case AArch64::Q21: Reg = AArch64::Q22; break;
+    case AArch64::Q22: Reg = AArch64::Q23; break;
+    case AArch64::Q23: Reg = AArch64::Q24; break;
+    case AArch64::Q24: Reg = AArch64::Q25; break;
+    case AArch64::Q25: Reg = AArch64::Q26; break;
+    case AArch64::Q26: Reg = AArch64::Q27; break;
+    case AArch64::Q27: Reg = AArch64::Q28; break;
+    case AArch64::Q28: Reg = AArch64::Q29; break;
+    case AArch64::Q29: Reg = AArch64::Q30; break;
+    case AArch64::Q30: Reg = AArch64::Q31; break;
+    // Vector lists can wrap around.
+    case AArch64::Q31:
+      Reg = AArch64::Q0;
+      break;
+    case AArch64::Z0:  Reg = AArch64::Z1;  break;
+    case AArch64::Z1:  Reg = AArch64::Z2;  break;
+    case AArch64::Z2:  Reg = AArch64::Z3;  break;
+    case AArch64::Z3:  Reg = AArch64::Z4;  break;
+    case AArch64::Z4:  Reg = AArch64::Z5;  break;
+    case AArch64::Z5:  Reg = AArch64::Z6;  break;
+    case AArch64::Z6:  Reg = AArch64::Z7;  break;
+    case AArch64::Z7:  Reg = AArch64::Z8;  break;
+    case AArch64::Z8:  Reg = AArch64::Z9;  break;
+    case AArch64::Z9:  Reg = AArch64::Z10; break;
+    case AArch64::Z10: Reg = AArch64::Z11; break;
+    case AArch64::Z11: Reg = AArch64::Z12; break;
+    case AArch64::Z12: Reg = AArch64::Z13; break;
+    case AArch64::Z13: Reg = AArch64::Z14; break;
+    case AArch64::Z14: Reg = AArch64::Z15; break;
+    case AArch64::Z15: Reg = AArch64::Z16; break;
+    case AArch64::Z16: Reg = AArch64::Z17; break;
+    case AArch64::Z17: Reg = AArch64::Z18; break;
+    case AArch64::Z18: Reg = AArch64::Z19; break;
+    case AArch64::Z19: Reg = AArch64::Z20; break;
+    case AArch64::Z20: Reg = AArch64::Z21; break;
+    case AArch64::Z21: Reg = AArch64::Z22; break;
+    case AArch64::Z22: Reg = AArch64::Z23; break;
+    case AArch64::Z23: Reg = AArch64::Z24; break;
+    case AArch64::Z24: Reg = AArch64::Z25; break;
+    case AArch64::Z25: Reg = AArch64::Z26; break;
+    case AArch64::Z26: Reg = AArch64::Z27; break;
+    case AArch64::Z27: Reg = AArch64::Z28; break;
+    case AArch64::Z28: Reg = AArch64::Z29; break;
+    case AArch64::Z29: Reg = AArch64::Z30; break;
+    case AArch64::Z30: Reg = AArch64::Z31; break;
+    // Vector lists can wrap around.
+    case AArch64::Z31:
+      Reg = AArch64::Z0;
+      break;
+    }
+  }
+  return Reg;
+}
+
+template<unsigned size>
+void AArch64InstPrinter::printGPRSeqPairsClassOperand(const MCInst *MI,
+                                                   unsigned OpNum,
+                                                   const MCSubtargetInfo &STI,
+                                                   raw_ostream &O) {
+  static_assert(size == 64 || size == 32,
+                "Template parameter must be either 32 or 64");
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+
+  unsigned Sube = (size == 32) ? AArch64::sube32 : AArch64::sube64;
+  unsigned Subo = (size == 32) ? AArch64::subo32 : AArch64::subo64;
+
+  unsigned Even = MRI.getSubReg(Reg,  Sube);
+  unsigned Odd = MRI.getSubReg(Reg,  Subo);
+  O << getRegisterName(Even) << ", " << getRegisterName(Odd);
+}
+
+void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O,
+                                         StringRef LayoutSuffix) {
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+
+  O << "{ ";
+
+  // Work out how many registers there are in the list (if there is an actual
+  // list).
+  unsigned NumRegs = 1;
+  if (MRI.getRegClass(AArch64::DDRegClassID).contains(Reg) ||
+      MRI.getRegClass(AArch64::ZPR2RegClassID).contains(Reg) ||
+      MRI.getRegClass(AArch64::QQRegClassID).contains(Reg))
+    NumRegs = 2;
+  else if (MRI.getRegClass(AArch64::DDDRegClassID).contains(Reg) ||
+           MRI.getRegClass(AArch64::ZPR3RegClassID).contains(Reg) ||
+           MRI.getRegClass(AArch64::QQQRegClassID).contains(Reg))
+    NumRegs = 3;
+  else if (MRI.getRegClass(AArch64::DDDDRegClassID).contains(Reg) ||
+           MRI.getRegClass(AArch64::ZPR4RegClassID).contains(Reg) ||
+           MRI.getRegClass(AArch64::QQQQRegClassID).contains(Reg))
+    NumRegs = 4;
+
+  // Now forget about the list and find out what the first register is.
+  if (unsigned FirstReg = MRI.getSubReg(Reg, AArch64::dsub0))
+    Reg = FirstReg;
+  else if (unsigned FirstReg = MRI.getSubReg(Reg, AArch64::qsub0))
+    Reg = FirstReg;
+  else if (unsigned FirstReg = MRI.getSubReg(Reg, AArch64::zsub0))
+    Reg = FirstReg;
+
+  // If it's a D-reg, we need to promote it to the equivalent Q-reg before
+  // printing (otherwise getRegisterName fails).
+  if (MRI.getRegClass(AArch64::FPR64RegClassID).contains(Reg)) {
+    const MCRegisterClass &FPR128RC =
+        MRI.getRegClass(AArch64::FPR128RegClassID);
+    Reg = MRI.getMatchingSuperReg(Reg, AArch64::dsub, &FPR128RC);
+  }
+
+  for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) {
+    if (MRI.getRegClass(AArch64::ZPRRegClassID).contains(Reg))
+      O << getRegisterName(Reg) << LayoutSuffix;
+    else
+      O << getRegisterName(Reg, AArch64::vreg) << LayoutSuffix;
+
+    if (i + 1 != NumRegs)
+      O << ", ";
+  }
+
+  O << " }";
+}
+
+void
+AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI,
+                                                   unsigned OpNum,
+                                                   const MCSubtargetInfo &STI,
+                                                   raw_ostream &O) {
+  printVectorList(MI, OpNum, STI, O, "");
+}
+
+template <unsigned NumLanes, char LaneKind>
+void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
+                                              raw_ostream &O) {
+  std::string Suffix(".");
+  if (NumLanes)
+    Suffix += itostr(NumLanes) + LaneKind;
+  else
+    Suffix += LaneKind;
+
+  printVectorList(MI, OpNum, STI, O, Suffix);
+}
+
+void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
+  O << "[" << MI->getOperand(OpNum).getImm() << "]";
+}
+
+void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
+                                           raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+
+  // If the label has already been resolved to an immediate offset (say, when
+  // we're running the disassembler), just print the immediate.
+  if (Op.isImm()) {
+    O << "#" << formatImm(Op.getImm() * 4);
+    return;
+  }
+
+  // If the branch target is simply an address then print it in hex.
+  const MCConstantExpr *BranchTarget =
+      dyn_cast<MCConstantExpr>(MI->getOperand(OpNum).getExpr());
+  int64_t Address;
+  if (BranchTarget && BranchTarget->evaluateAsAbsolute(Address)) {
+    O << "0x";
+    O.write_hex(Address);
+  } else {
+    // Otherwise, just print the expression.
+    MI->getOperand(OpNum).getExpr()->print(O, &MAI);
+  }
+}
+
+void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+
+  // If the label has already been resolved to an immediate offset (say, when
+  // we're running the disassembler), just print the immediate.
+  if (Op.isImm()) {
+    O << "#" << formatImm(Op.getImm() * (1 << 12));
+    return;
+  }
+
+  // Otherwise, just print the expression.
+  MI->getOperand(OpNum).getExpr()->print(O, &MAI);
+}
+
+void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
+                                            const MCSubtargetInfo &STI,
+                                            raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNo).getImm();
+  unsigned Opcode = MI->getOpcode();
+
+  StringRef Name;
+  if (Opcode == AArch64::ISB) {
+    auto ISB = AArch64ISB::lookupISBByEncoding(Val);
+    Name = ISB ? ISB->Name : "";
+  } else if (Opcode == AArch64::TSB) {
+    auto TSB = AArch64TSB::lookupTSBByEncoding(Val);
+    Name = TSB ? TSB->Name : "";
+  } else {
+    auto DB = AArch64DB::lookupDBByEncoding(Val);
+    Name = DB ? DB->Name : "";
+  }
+  if (!Name.empty())
+    O << Name;
+  else
+    O << "#" << Val;
+}
+
+void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
+                                                const MCSubtargetInfo &STI,
+                                                raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNo).getImm();
+
+  // Horrible hack for the one register that has identical encodings but
+  // different names in MSR and MRS. Because of this, one of MRS and MSR is
+  // going to get the wrong entry
+  if (Val == AArch64SysReg::DBGDTRRX_EL0) {
+    O << "DBGDTRRX_EL0";
+    return;
+  }
+
+  const AArch64SysReg::SysReg *Reg = AArch64SysReg::lookupSysRegByEncoding(Val);
+  if (Reg && Reg->Readable && Reg->haveFeatures(STI.getFeatureBits()))
+    O << Reg->Name;
+  else
+    O << AArch64SysReg::genericRegisterString(Val);
+}
+
+void AArch64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo,
+                                                const MCSubtargetInfo &STI,
+                                                raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNo).getImm();
+
+  // Horrible hack for the one register that has identical encodings but
+  // different names in MSR and MRS. Because of this, one of MRS and MSR is
+  // going to get the wrong entry
+  if (Val == AArch64SysReg::DBGDTRTX_EL0) {
+    O << "DBGDTRTX_EL0";
+    return;
+  }
+
+  const AArch64SysReg::SysReg *Reg = AArch64SysReg::lookupSysRegByEncoding(Val);
+  if (Reg && Reg->Writeable && Reg->haveFeatures(STI.getFeatureBits()))
+    O << Reg->Name;
+  else
+    O << AArch64SysReg::genericRegisterString(Val);
+}
+
+void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo,
+                                                const MCSubtargetInfo &STI,
+                                                raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNo).getImm();
+
+  auto PState = AArch64PState::lookupPStateByEncoding(Val);
+  if (PState && PState->haveFeatures(STI.getFeatureBits()))
+    O << PState->Name;
+  else
+    O << "#" << formatImm(Val);
+}
+
+void AArch64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo,
+                                                const MCSubtargetInfo &STI,
+                                                raw_ostream &O) {
+  unsigned RawVal = MI->getOperand(OpNo).getImm();
+  uint64_t Val = AArch64_AM::decodeAdvSIMDModImmType10(RawVal);
+  O << format("#%#016llx", Val);
+}
+
+template<int64_t Angle, int64_t Remainder>
+void AArch64InstPrinter::printComplexRotationOp(const MCInst *MI, unsigned OpNo,
+                                                const MCSubtargetInfo &STI,
+                                                raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNo).getImm();
+  O << "#" << (Val * Angle) + Remainder;
+}
+
+void AArch64InstPrinter::printSVEPattern(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNum).getImm();
+  if (auto Pat = AArch64SVEPredPattern::lookupSVEPREDPATByEncoding(Val))
+    O << Pat->Name;
+  else
+    O << '#' << formatImm(Val);
+}
+
+template <char suffix>
+void AArch64InstPrinter::printSVERegOp(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O) {
+  switch (suffix) {
+  case 0:
+  case 'b':
+  case 'h':
+  case 's':
+  case 'd':
+  case 'q':
+    break;
+  default: llvm_unreachable("Invalid kind specifier.");
+  }
+
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+  O << getRegisterName(Reg);
+  if (suffix != 0)
+    O << '.' << suffix;
+}
+
+template <typename T>
+void AArch64InstPrinter::printImmSVE(T Value, raw_ostream &O) {
+  typename std::make_unsigned<T>::type HexValue = Value;
+
+  if (getPrintImmHex())
+    O << '#' << formatHex((uint64_t)HexValue);
+  else
+    O << '#' << formatDec(Value);
+
+  if (CommentStream) {
+    // Do the opposite to that used for instruction operands.
+    if (getPrintImmHex())
+      *CommentStream << '=' << formatDec(HexValue) << '\n';
+    else
+      *CommentStream << '=' << formatHex((uint64_t)Value) << '\n';
+  }
+}
+
+template <typename T>
+void AArch64InstPrinter::printImm8OptLsl(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
+  unsigned UnscaledVal = MI->getOperand(OpNum).getImm();
+  unsigned Shift = MI->getOperand(OpNum + 1).getImm();
+  assert(AArch64_AM::getShiftType(Shift) == AArch64_AM::LSL &&
+         "Unexepected shift type!");
+
+  // #0 lsl #8 is never pretty printed
+  if ((UnscaledVal == 0) && (AArch64_AM::getShiftValue(Shift) != 0)) {
+    O << '#' << formatImm(UnscaledVal);
+    printShifter(MI, OpNum + 1, STI, O);
+    return;
+  }
+
+  T Val;
+  if (std::is_signed<T>())
+    Val = (int8_t)UnscaledVal * (1 << AArch64_AM::getShiftValue(Shift));
+  else
+    Val = (uint8_t)UnscaledVal * (1 << AArch64_AM::getShiftValue(Shift));
+
+  printImmSVE(Val, O);
+}
+
+template <typename T>
+void AArch64InstPrinter::printSVELogicalImm(const MCInst *MI, unsigned OpNum,
+                                            const MCSubtargetInfo &STI,
+                                            raw_ostream &O) {
+  typedef typename std::make_signed<T>::type SignedT;
+  typedef typename std::make_unsigned<T>::type UnsignedT;
+
+  uint64_t Val = MI->getOperand(OpNum).getImm();
+  UnsignedT PrintVal = AArch64_AM::decodeLogicalImmediate(Val, 64);
+
+  // Prefer the default format for 16bit values, hex otherwise.
+  if ((int16_t)PrintVal == (SignedT)PrintVal)
+    printImmSVE((T)PrintVal, O);
+  else if ((uint16_t)PrintVal == PrintVal)
+    printImmSVE(PrintVal, O);
+  else
+    O << '#' << formatHex((uint64_t)PrintVal);
+}
+
+template <int Width>
+void AArch64InstPrinter::printZPRasFPR(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O) {
+  unsigned Base;
+  switch (Width) {
+  case 8:   Base = AArch64::B0; break;
+  case 16:  Base = AArch64::H0; break;
+  case 32:  Base = AArch64::S0; break;
+  case 64:  Base = AArch64::D0; break;
+  case 128: Base = AArch64::Q0; break;
+  default:
+    llvm_unreachable("Unsupported width");
+  }
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+  O << getRegisterName(Reg - AArch64::Z0 + Base);
+}
+
+template <unsigned ImmIs0, unsigned ImmIs1>
+void AArch64InstPrinter::printExactFPImm(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream  &O) {
+  auto *Imm0Desc = AArch64ExactFPImm::lookupExactFPImmByEnum(ImmIs0);
+  auto *Imm1Desc = AArch64ExactFPImm::lookupExactFPImmByEnum(ImmIs1);
+  unsigned Val = MI->getOperand(OpNum).getImm();
+  O << "#" << (Val ? Imm1Desc->Repr : Imm0Desc->Repr);
+}
+
+void AArch64InstPrinter::printGPR64as32(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+  O << getRegisterName(getWRegFromXReg(Reg));
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
new file mode 100644
index 000000000000..5311f73ca21c
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
@@ -0,0 +1,222 @@
+//===-- AArch64InstPrinter.h - Convert AArch64 MCInst to assembly syntax --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an AArch64 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64INSTPRINTER_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64INSTPRINTER_H
+
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "../Utils/AArch64BaseInfo.h"
+
+namespace llvm {
+
+class AArch64InstPrinter : public MCInstPrinter {
+public:
+  AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                     const MCRegisterInfo &MRI);
+
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+
+  // Autogenerated by tblgen.
+  virtual void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+                                raw_ostream &O);
+  virtual bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+                               raw_ostream &O);
+  virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+                                       unsigned PrintMethodIdx,
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O);
+
+  virtual StringRef getRegName(unsigned RegNo) const {
+    return getRegisterName(RegNo);
+  }
+
+  static const char *getRegisterName(unsigned RegNo,
+                                     unsigned AltIdx = AArch64::NoRegAltName);
+
+protected:
+  bool printSysAlias(const MCInst *MI, const MCSubtargetInfo &STI,
+                     raw_ostream &O);
+  // Operand printers
+  void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printImm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                raw_ostream &O);
+  void printImmHex(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                   raw_ostream &O);
+  template <typename T> void printImmSVE(T Value, raw_ostream &O);
+  void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm,
+                           raw_ostream &O);
+  template <int Amount>
+  void printPostIncOperand(const MCInst *MI, unsigned OpNo,
+                           const MCSubtargetInfo &STI, raw_ostream &O) {
+    printPostIncOperand(MI, OpNo, Amount, O);
+  }
+
+  void printVRegOperand(const MCInst *MI, unsigned OpNo,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSysCROperand(const MCInst *MI, unsigned OpNo,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddSubImm(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O);
+  template <typename T>
+  void printLogicalImm(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printShifter(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printShiftedRegister(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExtendedRegister(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printArithExtend(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O,
+                      char SrcRegKind, unsigned Width);
+  template <char SrcRegKind, unsigned Width>
+  void printMemExtend(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O) {
+    printMemExtend(MI, OpNum, O, SrcRegKind, Width);
+  }
+  template <bool SignedExtend, int ExtWidth, char SrcRegKind, char Suffix>
+  void printRegWithShiftExtend(const MCInst *MI, unsigned OpNum,
+                               const MCSubtargetInfo &STI, raw_ostream &O);
+  void printCondCode(const MCInst *MI, unsigned OpNum,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
+  void printInverseCondCode(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAlignedLabel(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+  void printUImm12Offset(const MCInst *MI, unsigned OpNum, unsigned Scale,
+                         raw_ostream &O);
+  void printAMIndexedWB(const MCInst *MI, unsigned OpNum, unsigned Scale,
+                        raw_ostream &O);
+
+  template <int Scale>
+  void printUImm12Offset(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O) {
+    printUImm12Offset(MI, OpNum, Scale, O);
+  }
+
+  template <int BitWidth>
+  void printAMIndexedWB(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O) {
+    printAMIndexedWB(MI, OpNum, BitWidth / 8, O);
+  }
+
+  void printAMNoIndex(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O);
+
+  template <int Scale>
+  void printImmScale(const MCInst *MI, unsigned OpNum,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
+
+  template <bool IsSVEPrefetch = false>
+  void printPrefetchOp(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printPSBHintOp(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printBTIHintOp(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printVectorList(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O,
+                       StringRef LayoutSuffix);
+
+  /// Print a list of vector registers where the type suffix is implicit
+  /// (i.e. attached to the instruction rather than the registers).
+  void printImplicitlyTypedVectorList(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O);
+
+  template <unsigned NumLanes, char LaneKind>
+  void printTypedVectorList(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printVectorIndex(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAdrpLabel(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O);
+  void printBarrierOption(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printMSRSystemRegister(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printMRSSystemRegister(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSystemPStateField(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSIMDType10Operand(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  template<int64_t Angle, int64_t Remainder>
+  void printComplexRotationOp(const MCInst *MI, unsigned OpNo,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  template<unsigned size>
+  void printGPRSeqPairsClassOperand(const MCInst *MI, unsigned OpNum,
+                                    const MCSubtargetInfo &STI,
+                                    raw_ostream &O);
+  template <typename T>
+  void printImm8OptLsl(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  template <typename T>
+  void printSVELogicalImm(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSVEPattern(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  template <char = 0>
+  void printSVERegOp(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printGPR64as32(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O);
+  template <int Width>
+  void printZPRasFPR(const MCInst *MI, unsigned OpNum,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
+  template <unsigned ImmIs0, unsigned ImmIs1>
+  void printExactFPImm(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+};
+
+class AArch64AppleInstPrinter : public AArch64InstPrinter {
+public:
+  AArch64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                          const MCRegisterInfo &MRI);
+
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+
+  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+                        raw_ostream &O) override;
+  bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+                       raw_ostream &O) override;
+  void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+                               unsigned PrintMethodIdx,
+                               const MCSubtargetInfo &STI,
+                               raw_ostream &O) override;
+
+  StringRef getRegName(unsigned RegNo) const override {
+    return getRegisterName(RegNo);
+  }
+
+  static const char *getRegisterName(unsigned RegNo,
+                                     unsigned AltIdx = AArch64::NoRegAltName);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64INSTPRINTER_H
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 58e4a9c9a9e9..ecff1ab0a8b3 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64MCAsmInfo.cpp - AArch64 asm properties ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -131,8 +130,6 @@ AArch64MCAsmInfoGNUCOFF::AArch64MCAsmInfoGNUCOFF() {
   CodePointerSize = 8;
 
   CommentString = "//";
-  ExceptionsType = ExceptionHandling::DwarfCFI;
-  // The default is dwarf, but WinEH can be enabled optionally, which requires
-  // WinEHEncodingType to be set.
+  ExceptionsType = ExceptionHandling::WinEH;
   WinEHEncodingType = WinEH::EncodingType::Itanium;
 }
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index e8570b1c2887..36ae92afc8c1 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -1,9 +1,8 @@
 //=====-- AArch64MCAsmInfo.h - AArch64 asm properties ---------*- C++ -*--====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 41cad48f7aea..8cb7a1672983 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -1,9 +1,8 @@
 //=- AArch64/AArch64MCCodeEmitter.cpp - Convert AArch64 code to machine code-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -188,9 +187,10 @@ public:
                                      const MCSubtargetInfo &STI) const;
 
 private:
-  uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
-  void verifyInstructionPredicates(const MCInst &MI,
-                                   uint64_t AvailableFeatures) const;
+  FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+  void
+  verifyInstructionPredicates(const MCInst &MI,
+                              const FeatureBitset &AvailableFeatures) const;
 };
 
 } // end anonymous namespace
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index 729486b1020c..0a529321edc8 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64MCExpr.cpp - AArch64 specific MC expression classes --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -80,8 +79,7 @@ StringRef AArch64MCExpr::getVariantKindName() const {
 }
 
 void AArch64MCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
-  if (getKind() != VK_NONE)
-    OS << getVariantKindName();
+  OS << getVariantKindName();
   Expr->print(OS, MAI);
 }
 
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index b6bf254d3835..ec9c95911628 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -1,9 +1,8 @@
 //=--- AArch64MCExpr.h - AArch64 specific MC expression classes ---*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -23,8 +22,6 @@ namespace llvm {
 class AArch64MCExpr : public MCTargetExpr {
 public:
   enum VariantKind {
-    VK_NONE     = 0x000,
-
     // Symbol locations specifying (roughly speaking) what calculation should be
     // performed to construct the final address for the relocated
     // symbol. E.g. direct, via the GOT, ...
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 0f8198ba4e9b..df12274d9470 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64MCTargetDesc.cpp - AArch64 Target Descriptions ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,8 +14,10 @@
 #include "AArch64ELFStreamer.h"
 #include "AArch64MCAsmInfo.h"
 #include "AArch64WinCOFFStreamer.h"
-#include "InstPrinter/AArch64InstPrinter.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
+#include "MCTargetDesc/AArch64InstPrinter.h"
+#include "TargetInfo/AArch64TargetInfo.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCInstrAnalysis.h"
@@ -56,11 +57,177 @@ createAArch64MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
 }
 
 void AArch64_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) {
-  for (unsigned Reg = AArch64::NoRegister + 1;
-       Reg < AArch64::NUM_TARGET_REGS; ++Reg) {
-    unsigned CV = MRI->getEncodingValue(Reg);
-    MRI->mapLLVMRegToCVReg(Reg, CV);
-  }
+  // Mapping from CodeView to MC register id.
+  static const struct {
+    codeview::RegisterId CVReg;
+    MCPhysReg Reg;
+  } RegMap[] = {
+      {codeview::RegisterId::ARM64_W0, AArch64::W0},
+      {codeview::RegisterId::ARM64_W1, AArch64::W1},
+      {codeview::RegisterId::ARM64_W2, AArch64::W2},
+      {codeview::RegisterId::ARM64_W3, AArch64::W3},
+      {codeview::RegisterId::ARM64_W4, AArch64::W4},
+      {codeview::RegisterId::ARM64_W5, AArch64::W5},
+      {codeview::RegisterId::ARM64_W6, AArch64::W6},
+      {codeview::RegisterId::ARM64_W7, AArch64::W7},
+      {codeview::RegisterId::ARM64_W8, AArch64::W8},
+      {codeview::RegisterId::ARM64_W9, AArch64::W9},
+      {codeview::RegisterId::ARM64_W10, AArch64::W10},
+      {codeview::RegisterId::ARM64_W11, AArch64::W11},
+      {codeview::RegisterId::ARM64_W12, AArch64::W12},
+      {codeview::RegisterId::ARM64_W13, AArch64::W13},
+      {codeview::RegisterId::ARM64_W14, AArch64::W14},
+      {codeview::RegisterId::ARM64_W15, AArch64::W15},
+      {codeview::RegisterId::ARM64_W16, AArch64::W16},
+      {codeview::RegisterId::ARM64_W17, AArch64::W17},
+      {codeview::RegisterId::ARM64_W18, AArch64::W18},
+      {codeview::RegisterId::ARM64_W19, AArch64::W19},
+      {codeview::RegisterId::ARM64_W20, AArch64::W20},
+      {codeview::RegisterId::ARM64_W21, AArch64::W21},
+      {codeview::RegisterId::ARM64_W22, AArch64::W22},
+      {codeview::RegisterId::ARM64_W23, AArch64::W23},
+      {codeview::RegisterId::ARM64_W24, AArch64::W24},
+      {codeview::RegisterId::ARM64_W25, AArch64::W25},
+      {codeview::RegisterId::ARM64_W26, AArch64::W26},
+      {codeview::RegisterId::ARM64_W27, AArch64::W27},
+      {codeview::RegisterId::ARM64_W28, AArch64::W28},
+      {codeview::RegisterId::ARM64_W29, AArch64::W29},
+      {codeview::RegisterId::ARM64_W30, AArch64::W30},
+      {codeview::RegisterId::ARM64_WZR, AArch64::WZR},
+      {codeview::RegisterId::ARM64_X0, AArch64::X0},
+      {codeview::RegisterId::ARM64_X1, AArch64::X1},
+      {codeview::RegisterId::ARM64_X2, AArch64::X2},
+      {codeview::RegisterId::ARM64_X3, AArch64::X3},
+      {codeview::RegisterId::ARM64_X4, AArch64::X4},
+      {codeview::RegisterId::ARM64_X5, AArch64::X5},
+      {codeview::RegisterId::ARM64_X6, AArch64::X6},
+      {codeview::RegisterId::ARM64_X7, AArch64::X7},
+      {codeview::RegisterId::ARM64_X8, AArch64::X8},
+      {codeview::RegisterId::ARM64_X9, AArch64::X9},
+      {codeview::RegisterId::ARM64_X10, AArch64::X10},
+      {codeview::RegisterId::ARM64_X11, AArch64::X11},
+      {codeview::RegisterId::ARM64_X12, AArch64::X12},
+      {codeview::RegisterId::ARM64_X13, AArch64::X13},
+      {codeview::RegisterId::ARM64_X14, AArch64::X14},
+      {codeview::RegisterId::ARM64_X15, AArch64::X15},
+      {codeview::RegisterId::ARM64_X16, AArch64::X16},
+      {codeview::RegisterId::ARM64_X17, AArch64::X17},
+      {codeview::RegisterId::ARM64_X18, AArch64::X18},
+      {codeview::RegisterId::ARM64_X19, AArch64::X19},
+      {codeview::RegisterId::ARM64_X20, AArch64::X20},
+      {codeview::RegisterId::ARM64_X21, AArch64::X21},
+      {codeview::RegisterId::ARM64_X22, AArch64::X22},
+      {codeview::RegisterId::ARM64_X23, AArch64::X23},
+      {codeview::RegisterId::ARM64_X24, AArch64::X24},
+      {codeview::RegisterId::ARM64_X25, AArch64::X25},
+      {codeview::RegisterId::ARM64_X26, AArch64::X26},
+      {codeview::RegisterId::ARM64_X27, AArch64::X27},
+      {codeview::RegisterId::ARM64_X28, AArch64::X28},
+      {codeview::RegisterId::ARM64_FP, AArch64::FP},
+      {codeview::RegisterId::ARM64_LR, AArch64::LR},
+      {codeview::RegisterId::ARM64_SP, AArch64::SP},
+      {codeview::RegisterId::ARM64_ZR, AArch64::XZR},
+      {codeview::RegisterId::ARM64_NZCV, AArch64::NZCV},
+      {codeview::RegisterId::ARM64_S0, AArch64::S0},
+      {codeview::RegisterId::ARM64_S1, AArch64::S1},
+      {codeview::RegisterId::ARM64_S2, AArch64::S2},
+      {codeview::RegisterId::ARM64_S3, AArch64::S3},
+      {codeview::RegisterId::ARM64_S4, AArch64::S4},
+      {codeview::RegisterId::ARM64_S5, AArch64::S5},
+      {codeview::RegisterId::ARM64_S6, AArch64::S6},
+      {codeview::RegisterId::ARM64_S7, AArch64::S7},
+      {codeview::RegisterId::ARM64_S8, AArch64::S8},
+      {codeview::RegisterId::ARM64_S9, AArch64::S9},
+      {codeview::RegisterId::ARM64_S10, AArch64::S10},
+      {codeview::RegisterId::ARM64_S11, AArch64::S11},
+      {codeview::RegisterId::ARM64_S12, AArch64::S12},
+      {codeview::RegisterId::ARM64_S13, AArch64::S13},
+      {codeview::RegisterId::ARM64_S14, AArch64::S14},
+      {codeview::RegisterId::ARM64_S15, AArch64::S15},
+      {codeview::RegisterId::ARM64_S16, AArch64::S16},
+      {codeview::RegisterId::ARM64_S17, AArch64::S17},
+      {codeview::RegisterId::ARM64_S18, AArch64::S18},
+      {codeview::RegisterId::ARM64_S19, AArch64::S19},
+      {codeview::RegisterId::ARM64_S20, AArch64::S20},
+      {codeview::RegisterId::ARM64_S21, AArch64::S21},
+      {codeview::RegisterId::ARM64_S22, AArch64::S22},
+      {codeview::RegisterId::ARM64_S23, AArch64::S23},
+      {codeview::RegisterId::ARM64_S24, AArch64::S24},
+      {codeview::RegisterId::ARM64_S25, AArch64::S25},
+      {codeview::RegisterId::ARM64_S26, AArch64::S26},
+      {codeview::RegisterId::ARM64_S27, AArch64::S27},
+      {codeview::RegisterId::ARM64_S28, AArch64::S28},
+      {codeview::RegisterId::ARM64_S29, AArch64::S29},
+      {codeview::RegisterId::ARM64_S30, AArch64::S30},
+      {codeview::RegisterId::ARM64_S31, AArch64::S31},
+      {codeview::RegisterId::ARM64_D0, AArch64::D0},
+      {codeview::RegisterId::ARM64_D1, AArch64::D1},
+      {codeview::RegisterId::ARM64_D2, AArch64::D2},
+      {codeview::RegisterId::ARM64_D3, AArch64::D3},
+      {codeview::RegisterId::ARM64_D4, AArch64::D4},
+      {codeview::RegisterId::ARM64_D5, AArch64::D5},
+      {codeview::RegisterId::ARM64_D6, AArch64::D6},
+      {codeview::RegisterId::ARM64_D7, AArch64::D7},
+      {codeview::RegisterId::ARM64_D8, AArch64::D8},
+      {codeview::RegisterId::ARM64_D9, AArch64::D9},
+      {codeview::RegisterId::ARM64_D10, AArch64::D10},
+      {codeview::RegisterId::ARM64_D11, AArch64::D11},
+      {codeview::RegisterId::ARM64_D12, AArch64::D12},
+      {codeview::RegisterId::ARM64_D13, AArch64::D13},
+      {codeview::RegisterId::ARM64_D14, AArch64::D14},
+      {codeview::RegisterId::ARM64_D15, AArch64::D15},
+      {codeview::RegisterId::ARM64_D16, AArch64::D16},
+      {codeview::RegisterId::ARM64_D17, AArch64::D17},
+      {codeview::RegisterId::ARM64_D18, AArch64::D18},
+      {codeview::RegisterId::ARM64_D19, AArch64::D19},
+      {codeview::RegisterId::ARM64_D20, AArch64::D20},
+      {codeview::RegisterId::ARM64_D21, AArch64::D21},
+      {codeview::RegisterId::ARM64_D22, AArch64::D22},
+      {codeview::RegisterId::ARM64_D23, AArch64::D23},
+      {codeview::RegisterId::ARM64_D24, AArch64::D24},
+      {codeview::RegisterId::ARM64_D25, AArch64::D25},
+      {codeview::RegisterId::ARM64_D26, AArch64::D26},
+      {codeview::RegisterId::ARM64_D27, AArch64::D27},
+      {codeview::RegisterId::ARM64_D28, AArch64::D28},
+      {codeview::RegisterId::ARM64_D29, AArch64::D29},
+      {codeview::RegisterId::ARM64_D30, AArch64::D30},
+      {codeview::RegisterId::ARM64_D31, AArch64::D31},
+      {codeview::RegisterId::ARM64_Q0, AArch64::Q0},
+      {codeview::RegisterId::ARM64_Q1, AArch64::Q1},
+      {codeview::RegisterId::ARM64_Q2, AArch64::Q2},
+      {codeview::RegisterId::ARM64_Q3, AArch64::Q3},
+      {codeview::RegisterId::ARM64_Q4, AArch64::Q4},
+      {codeview::RegisterId::ARM64_Q5, AArch64::Q5},
+      {codeview::RegisterId::ARM64_Q6, AArch64::Q6},
+      {codeview::RegisterId::ARM64_Q7, AArch64::Q7},
+      {codeview::RegisterId::ARM64_Q8, AArch64::Q8},
+      {codeview::RegisterId::ARM64_Q9, AArch64::Q9},
+      {codeview::RegisterId::ARM64_Q10, AArch64::Q10},
+      {codeview::RegisterId::ARM64_Q11, AArch64::Q11},
+      {codeview::RegisterId::ARM64_Q12, AArch64::Q12},
+      {codeview::RegisterId::ARM64_Q13, AArch64::Q13},
+      {codeview::RegisterId::ARM64_Q14, AArch64::Q14},
+      {codeview::RegisterId::ARM64_Q15, AArch64::Q15},
+      {codeview::RegisterId::ARM64_Q16, AArch64::Q16},
+      {codeview::RegisterId::ARM64_Q17, AArch64::Q17},
+      {codeview::RegisterId::ARM64_Q18, AArch64::Q18},
+      {codeview::RegisterId::ARM64_Q19, AArch64::Q19},
+      {codeview::RegisterId::ARM64_Q20, AArch64::Q20},
+      {codeview::RegisterId::ARM64_Q21, AArch64::Q21},
+      {codeview::RegisterId::ARM64_Q22, AArch64::Q22},
+      {codeview::RegisterId::ARM64_Q23, AArch64::Q23},
+      {codeview::RegisterId::ARM64_Q24, AArch64::Q24},
+      {codeview::RegisterId::ARM64_Q25, AArch64::Q25},
+      {codeview::RegisterId::ARM64_Q26, AArch64::Q26},
+      {codeview::RegisterId::ARM64_Q27, AArch64::Q27},
+      {codeview::RegisterId::ARM64_Q28, AArch64::Q28},
+      {codeview::RegisterId::ARM64_Q29, AArch64::Q29},
+      {codeview::RegisterId::ARM64_Q30, AArch64::Q30},
+      {codeview::RegisterId::ARM64_Q31, AArch64::Q31},
+
+  };
+  for (unsigned I = 0; I < array_lengthof(RegMap); ++I)
+    MRI->mapLLVMRegToCVReg(RegMap[I].Reg, static_cast<int>(RegMap[I].CVReg));
 }
 
 static MCRegisterInfo *createAArch64MCRegisterInfo(const Triple &Triple) {
@@ -166,12 +333,20 @@ public:
     for (uint64_t Byte = 0, End = PltContents.size(); Byte + 7 < End;
          Byte += 4) {
       uint32_t Insn = support::endian::read32le(PltContents.data() + Byte);
+      uint64_t Off = 0;
+      // Check for optional bti c that prefixes adrp in BTI enabled entries
+      if (Insn == 0xd503245f) {
+         Off = 4;
+         Insn = support::endian::read32le(PltContents.data() + Byte + Off);
+      }
       // Check for adrp.
       if ((Insn & 0x9f000000) != 0x90000000)
         continue;
+      Off += 4;
       uint64_t Imm = (((PltSectionVA + Byte) >> 12) << 12) +
             (((Insn >> 29) & 3) << 12) + (((Insn >> 5) & 0x3ffff) << 14);
-      uint32_t Insn2 = support::endian::read32le(PltContents.data() + Byte + 4);
+      uint32_t Insn2 =
+          support::endian::read32le(PltContents.data() + Byte + Off);
       // Check for: ldr Xt, [Xn, #pimm].
       if (Insn2 >> 22 == 0x3e5) {
         Imm += ((Insn2 >> 10) & 0xfff) << 3;
@@ -192,7 +367,8 @@ static MCInstrAnalysis *createAArch64InstrAnalysis(const MCInstrInfo *Info) {
 // Force static initialization.
 extern "C" void LLVMInitializeAArch64TargetMC() {
   for (Target *T : {&getTheAArch64leTarget(), &getTheAArch64beTarget(),
-                    &getTheARM64Target()}) {
+                    &getTheAArch64_32Target(), &getTheARM64Target(),
+                    &getTheARM64_32Target()}) {
     // Register the MC asm info.
     RegisterMCAsmInfoFn X(*T, createAArch64MCAsmInfo);
 
@@ -228,7 +404,8 @@ extern "C" void LLVMInitializeAArch64TargetMC() {
   }
 
   // Register the asm backend.
-  for (Target *T : {&getTheAArch64leTarget(), &getTheARM64Target()})
+  for (Target *T : {&getTheAArch64leTarget(), &getTheAArch64_32Target(),
+                    &getTheARM64Target(), &getTheARM64_32Target()})
     TargetRegistry::RegisterMCAsmBackend(*T, createAArch64leAsmBackend);
   TargetRegistry::RegisterMCAsmBackend(getTheAArch64beTarget(),
                                        createAArch64beAsmBackend);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 0f22f69bd5b0..c84c313c1db0 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -1,9 +1,8 @@
 //===-- AArch64MCTargetDesc.h - AArch64 Target Descriptions -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -37,10 +36,6 @@ class Triple;
 class raw_ostream;
 class raw_pwrite_stream;
 
-Target &getTheAArch64leTarget();
-Target &getTheAArch64beTarget();
-Target &getTheARM64Target();
-
 MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
                                           const MCRegisterInfo &MRI,
                                           MCContext &Ctx);
@@ -57,7 +52,8 @@ std::unique_ptr<MCObjectTargetWriter>
 createAArch64ELFObjectWriter(uint8_t OSABI, bool IsILP32);
 
 std::unique_ptr<MCObjectTargetWriter>
-createAArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype);
+createAArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype,
+                              bool IsILP32);
 
 std::unique_ptr<MCObjectTargetWriter> createAArch64WinCOFFObjectWriter();
 
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 1021cdeeb3be..b3ce5ef22eef 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64MachObjectWriter.cpp - ARM Mach Object Writer --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -38,8 +37,8 @@ class AArch64MachObjectWriter : public MCMachObjectTargetWriter {
                                   unsigned &Log2Size, const MCAssembler &Asm);
 
 public:
-  AArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype)
-      : MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype) {}
+  AArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype, bool IsILP32)
+      : MCMachObjectTargetWriter(!IsILP32 /* is64Bit */, CPUType, CPUSubtype) {}
 
   void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
                         const MCAsmLayout &Layout, const MCFragment *Fragment,
@@ -405,6 +404,8 @@ void AArch64MachObjectWriter::recordRelocation(
 }
 
 std::unique_ptr<MCObjectTargetWriter>
-llvm::createAArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype) {
-  return llvm::make_unique<AArch64MachObjectWriter>(CPUType, CPUSubtype);
+llvm::createAArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype,
+                                    bool IsILP32) {
+  return llvm::make_unique<AArch64MachObjectWriter>(CPUType, CPUSubtype,
+                                                    IsILP32);
 }
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index a6b8d963bef9..f70752f5303f 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -1,9 +1,8 @@
 //===- AArch64TargetStreamer.cpp - AArch64TargetStreamer class ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,6 +12,7 @@
 
 #include "AArch64TargetStreamer.h"
 #include "llvm/MC/ConstantPools.h"
+#include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 
 using namespace llvm;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
index 73fb9baea3e3..3a0c5d8318dd 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
@@ -1,9 +1,8 @@
 //===-- AArch64TargetStreamer.h - AArch64 Target Streamer ------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
index 7ea7d5f2a20e..a45880a07427 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
@@ -1,9 +1,8 @@
 //= AArch64WinCOFFObjectWriter.cpp - AArch64 Windows COFF Object Writer C++ =//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===---------------------------------------------------------------------===//
 
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
index b828ab832e9d..37c6fbb03908 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64WinCOFFStreamer.cpp - ARM Target WinCOFF Streamer ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
index ed265a876ab3..8c0656652eed 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
@@ -1,9 +1,8 @@
 //===-- AArch64WinCOFFStreamer.h - WinCOFF Streamer for AArch64 -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/SVEInstrFormats.td b/lib/Target/AArch64/SVEInstrFormats.td
index 23a65b345bad..808e59467081 100644
--- a/lib/Target/AArch64/SVEInstrFormats.td
+++ b/lib/Target/AArch64/SVEInstrFormats.td
@@ -1,9 +1,8 @@
 //=-- SVEInstrFormats.td -  AArch64 SVE Instruction classes -*- tablegen -*--=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -701,8 +700,8 @@ multiclass sve_int_perm_dup_i<string asm> {
                   (!cast<Instruction>(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>;
 }
 
-class sve_int_perm_tbl<bits<2> sz8_64, string asm, ZPRRegOp zprty,
-                       RegisterOperand VecList>
+class sve_int_perm_tbl<bits<2> sz8_64, bits<2> opc, string asm,
+                       ZPRRegOp zprty, RegisterOperand VecList>
 : I<(outs zprty:$Zd), (ins VecList:$Zn, zprty:$Zm),
   asm, "\t$Zd, $Zn, $Zm",
   "",
@@ -714,16 +713,18 @@ class sve_int_perm_tbl<bits<2> sz8_64, string asm, ZPRRegOp zprty,
   let Inst{23-22} = sz8_64;
   let Inst{21}    = 0b1;
   let Inst{20-16} = Zm;
-  let Inst{15-10} = 0b001100;
+  let Inst{15-13} = 0b001;
+  let Inst{12-11} = opc;
+  let Inst{10}    = 0b0;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
 }
 
 multiclass sve_int_perm_tbl<string asm> {
-  def _B : sve_int_perm_tbl<0b00, asm, ZPR8, Z_b>;
-  def _H : sve_int_perm_tbl<0b01, asm, ZPR16, Z_h>;
-  def _S : sve_int_perm_tbl<0b10, asm, ZPR32, Z_s>;
-  def _D : sve_int_perm_tbl<0b11, asm, ZPR64, Z_d>;
+  def _B : sve_int_perm_tbl<0b00, 0b10, asm, ZPR8,  Z_b>;
+  def _H : sve_int_perm_tbl<0b01, 0b10, asm, ZPR16, Z_h>;
+  def _S : sve_int_perm_tbl<0b10, 0b10, asm, ZPR32, Z_s>;
+  def _D : sve_int_perm_tbl<0b11, 0b10, asm, ZPR64, Z_d>;
 
   def : InstAlias<asm # "\t$Zd, $Zn, $Zm",
                  (!cast<Instruction>(NAME # _B) ZPR8:$Zd, ZPR8:$Zn, ZPR8:$Zm), 0>;
@@ -735,6 +736,37 @@ multiclass sve_int_perm_tbl<string asm> {
                  (!cast<Instruction>(NAME # _D) ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zm), 0>;
 }
 
+multiclass sve2_int_perm_tbl<string asm> {
+  def _B : sve_int_perm_tbl<0b00, 0b01, asm, ZPR8,  ZZ_b>;
+  def _H : sve_int_perm_tbl<0b01, 0b01, asm, ZPR16, ZZ_h>;
+  def _S : sve_int_perm_tbl<0b10, 0b01, asm, ZPR32, ZZ_s>;
+  def _D : sve_int_perm_tbl<0b11, 0b01, asm, ZPR64, ZZ_d>;
+}
+
+class sve2_int_perm_tbx<bits<2> sz8_64, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
+  asm, "\t$Zd, $Zn, $Zm",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zm;
+  bits<5> Zn;
+  let Inst{31-24} = 0b00000101;
+  let Inst{23-22} = sz8_64;
+  let Inst{21}    = 0b1;
+  let Inst{20-16} = Zm;
+  let Inst{15-10} = 0b001011;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_int_perm_tbx<string asm> {
+  def _B : sve2_int_perm_tbx<0b00, asm, ZPR8>;
+  def _H : sve2_int_perm_tbx<0b01, asm, ZPR16>;
+  def _S : sve2_int_perm_tbx<0b10, asm, ZPR32>;
+  def _D : sve2_int_perm_tbx<0b11, asm, ZPR64>;
+}
+
 class sve_int_perm_reverse_z<bits<2> sz8_64, string asm, ZPRRegOp zprty>
 : I<(outs zprty:$Zd), (ins zprty:$Zn),
   asm, "\t$Zd, $Zn",
@@ -875,6 +907,21 @@ class sve_int_perm_extract_i<string asm>
   let ElementSize = ElementSizeNone;
 }
 
+class sve2_int_perm_extract_i_cons<string asm>
+: I<(outs ZPR8:$Zd), (ins ZZ_b:$Zn, imm0_255:$imm8),
+  asm, "\t$Zd, $Zn, $imm8",
+  "", []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<8> imm8;
+  let Inst{31-21} = 0b00000101011;
+  let Inst{20-16} = imm8{7-3};
+  let Inst{15-13} = 0b000;
+  let Inst{12-10} = imm8{2-0};
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Vector Select Group
 //===----------------------------------------------------------------------===//
@@ -1436,6 +1483,132 @@ multiclass sve_fp_fcadd<string asm> {
   def _D : sve_fp_fcadd<0b11, asm, ZPR64>;
 }
 
+//===----------------------------------------------------------------------===//
+// SVE2 Floating Point Convert Group
+//===----------------------------------------------------------------------===//
+
+class sve2_fp_convert_precision<bits<4> opc, string asm,
+                                ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zd), (ins PPR3bAny:$Pg, zprty2:$Zn),
+  asm, "\t$Zd, $Pg/m, $Zn",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<3> Pg;
+  let Inst{31-24} = 0b01100100;
+  let Inst{23-22} = opc{3-2};
+  let Inst{21-18} = 0b0010;
+  let Inst{17-16} = opc{1-0};
+  let Inst{15-13} = 0b101;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_fp_convert_down_narrow<string asm> {
+  def _StoH : sve2_fp_convert_precision<0b1000, asm, ZPR16, ZPR32>;
+  def _DtoS : sve2_fp_convert_precision<0b1110, asm, ZPR32, ZPR64>;
+}
+
+multiclass sve2_fp_convert_up_long<string asm> {
+  def _HtoS : sve2_fp_convert_precision<0b1001, asm, ZPR32, ZPR16>;
+  def _StoD : sve2_fp_convert_precision<0b1111, asm, ZPR64, ZPR32>;
+}
+
+multiclass sve2_fp_convert_down_odd_rounding<string asm> {
+  def _DtoS : sve2_fp_convert_precision<0b0010, asm, ZPR32, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Floating Point Pairwise Group
+//===----------------------------------------------------------------------===//
+
+class sve2_fp_pairwise_pred<bits<2> sz, bits<3> opc, string asm,
+                            ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm),
+  asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm",
+  "",
+  []>, Sched<[]> {
+  bits<3> Pg;
+  bits<5> Zm;
+  bits<5> Zdn;
+  let Inst{31-24} = 0b01100100;
+  let Inst{23-22} = sz;
+  let Inst{21-19} = 0b010;
+  let Inst{18-16} = opc;
+  let Inst{15-13} = 0b100;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zm;
+  let Inst{4-0}   = Zdn;
+
+  let Constraints = "$Zdn = $_Zdn";
+  let DestructiveInstType = Destructive;
+  let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve2_fp_pairwise_pred<bits<3> opc, string asm> {
+  def _H : sve2_fp_pairwise_pred<0b01, opc, asm, ZPR16>;
+  def _S : sve2_fp_pairwise_pred<0b10, opc, asm, ZPR32>;
+  def _D : sve2_fp_pairwise_pred<0b11, opc, asm, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Floating Point Widening Multiply-Add - Indexed Group
+//===----------------------------------------------------------------------===//
+
+class sve2_fp_mla_long_by_indexed_elem<bits<2> opc, string asm>
+: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm,
+                        VectorIndexH:$iop),
+  asm, "\t$Zda, $Zn, $Zm$iop",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zda;
+  bits<5> Zn;
+  bits<3> Zm;
+  bits<3> iop;
+  let Inst{31-21} = 0b01100100101;
+  let Inst{20-19} = iop{2-1};
+  let Inst{18-16} = Zm;
+  let Inst{15-14} = 0b01;
+  let Inst{13}    = opc{1};
+  let Inst{12}    = 0b0;
+  let Inst{11}    = iop{0};
+  let Inst{10}    = opc{0};
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zda;
+
+  let Constraints = "$Zda = $_Zda";
+  let DestructiveInstType = Destructive;
+  let ElementSize = ElementSizeNone;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Floating Point Widening Multiply-Add Group
+//===----------------------------------------------------------------------===//
+
+class sve2_fp_mla_long<bits<2> opc, string asm>
+: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm),
+  asm, "\t$Zda, $Zn, $Zm",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zda;
+  bits<5> Zn;
+  bits<5> Zm;
+  let Inst{31-21} = 0b01100100101;
+  let Inst{20-16} = Zm;
+  let Inst{15-14} = 0b10;
+  let Inst{13}    = opc{1};
+  let Inst{12-11} = 0b00;
+  let Inst{10}    = opc{0};
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zda;
+
+  let Constraints = "$Zda = $_Zda";
+  let DestructiveInstType = Destructive;
+  let ElementSize = ElementSizeNone;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Stack Allocation Group
 //===----------------------------------------------------------------------===//
@@ -1536,6 +1709,12 @@ multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm> {
   def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;
 }
 
+multiclass sve2_fp_flogb<string asm> {
+  def _H : sve_fp_2op_p_zd<0b0011010, asm, ZPR16, ZPR16, ElementSizeH>;
+  def _S : sve_fp_2op_p_zd<0b0011100, asm, ZPR32, ZPR32, ElementSizeS>;
+  def _D : sve_fp_2op_p_zd<0b0011110, asm, ZPR64, ZPR64, ElementSizeD>;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Floating Point Unary Operations - Unpredicated Group
 //===----------------------------------------------------------------------===//
@@ -1691,6 +1870,112 @@ multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm> {
   def _D : sve_int_mlas_vvv_pred<0b11, opc, asm, ZPR64>;
 }
 
+//===----------------------------------------------------------------------===//
+// SVE2 Integer Multiply-Add - Unpredicated Group
+//===----------------------------------------------------------------------===//
+
+class sve2_int_mla<bits<2> sz, bits<5> opc, string asm,
+                   ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm),
+  asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
+  bits<5> Zda;
+  bits<5> Zn;
+  bits<5> Zm;
+  let Inst{31-24} = 0b01000100;
+  let Inst{23-22} = sz;
+  let Inst{21}    = 0b0;
+  let Inst{20-16} = Zm;
+  let Inst{15}    = 0b0;
+  let Inst{14-10} = opc;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zda;
+
+  let Constraints = "$Zda = $_Zda";
+  let DestructiveInstType = Destructive;
+  let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_int_mla<bit S, string asm> {
+  def _B : sve2_int_mla<0b00, { 0b1110, S }, asm, ZPR8, ZPR8>;
+  def _H : sve2_int_mla<0b01, { 0b1110, S }, asm, ZPR16, ZPR16>;
+  def _S : sve2_int_mla<0b10, { 0b1110, S }, asm, ZPR32, ZPR32>;
+  def _D : sve2_int_mla<0b11, { 0b1110, S }, asm, ZPR64, ZPR64>;
+}
+
+multiclass sve2_int_mla_long<bits<5> opc, string asm> {
+  def _H : sve2_int_mla<0b01, opc, asm, ZPR16, ZPR8>;
+  def _S : sve2_int_mla<0b10, opc, asm, ZPR32, ZPR16>;
+  def _D : sve2_int_mla<0b11, opc, asm, ZPR64, ZPR32>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Integer Multiply-Add - Indexed Group
+//===----------------------------------------------------------------------===//
+
+class sve2_int_mla_by_indexed_elem<bits<2> sz, bits<6> opc, string asm,
+                                   ZPRRegOp zprty1, ZPRRegOp zprty2,
+                                   ZPRRegOp zprty3, Operand itype>
+: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty3:$Zm, itype:$iop),
+  asm, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> {
+  bits<5> Zda;
+  bits<5> Zn;
+  let Inst{31-24} = 0b01000100;
+  let Inst{23-22} = sz;
+  let Inst{21}    = 0b1;
+  let Inst{15-10} = opc;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zda;
+
+  let Constraints = "$Zda = $_Zda";
+  let DestructiveInstType = Destructive;
+  let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_int_mla_by_indexed_elem<bits<2> opc, bit S, string asm> {
+  def _H : sve2_int_mla_by_indexed_elem<{0, ?}, { 0b000, opc, S }, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexH> {
+    bits<3> Zm;
+    bits<3> iop;
+    let Inst{22} = iop{2};
+    let Inst{20-19} = iop{1-0};
+    let Inst{18-16} = Zm;
+  }
+  def _S : sve2_int_mla_by_indexed_elem<0b10, { 0b000, opc, S }, asm, ZPR32, ZPR32, ZPR3b32, VectorIndexS> {
+    bits<3> Zm;
+    bits<2> iop;
+    let Inst{20-19} = iop;
+    let Inst{18-16} = Zm;
+  }
+  def _D : sve2_int_mla_by_indexed_elem<0b11, { 0b000, opc, S }, asm, ZPR64, ZPR64, ZPR4b64, VectorIndexD> {
+    bits<4> Zm;
+    bit iop;
+    let Inst{20} = iop;
+    let Inst{19-16} = Zm;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Integer Multiply-Add Long - Indexed Group
+//===----------------------------------------------------------------------===//
+
+multiclass sve2_int_mla_long_by_indexed_elem<bits<4> opc, string asm> {
+  def _S : sve2_int_mla_by_indexed_elem<0b10, { opc{3}, 0b0, opc{2-1}, ?, opc{0} },
+                                        asm, ZPR32, ZPR16, ZPR3b16, VectorIndexH> {
+    bits<3> Zm;
+    bits<3> iop;
+    let Inst{20-19} = iop{2-1};
+    let Inst{18-16} = Zm;
+    let Inst{11} = iop{0};
+  }
+  def _D : sve2_int_mla_by_indexed_elem<0b11, { opc{3}, 0b0, opc{2-1}, ?, opc{0} },
+                                        asm, ZPR64, ZPR32, ZPR4b32, VectorIndexS> {
+    bits<4> Zm;
+    bits<2> iop;
+    let Inst{20} = iop{1};
+    let Inst{19-16} = Zm;
+    let Inst{11} = iop{0};
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Integer Dot Product Group
 //===----------------------------------------------------------------------===//
@@ -1733,32 +2018,671 @@ class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,
   "", []>, Sched<[]> {
   bits<5> Zda;
   bits<5> Zn;
-  let Inst{31-23} = 0b010001001;
-  let Inst{22}    = sz;
+  let Inst{31-23} = 0b010001001;
+  let Inst{22}    = sz;
+  let Inst{21}    = 0b1;
+  let Inst{15-11} = 0;
+  let Inst{10}    = U;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zda;
+
+  let Constraints = "$Zda = $_Zda";
+  let DestructiveInstType = Destructive;
+  let ElementSize = ElementSizeNone;
+}
+
+multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm> {
+  def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> {
+    bits<2> iop;
+    bits<3> Zm;
+    let Inst{20-19} = iop;
+    let Inst{18-16} = Zm;
+  }
+  def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> {
+    bits<1> iop;
+    bits<4> Zm;
+    let Inst{20} = iop;
+    let Inst{19-16} = Zm;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Complex Integer Dot Product Group
+//===----------------------------------------------------------------------===//
+
+class sve2_complex_int_arith<bits<2> sz, bits<4> opc, string asm,
+                             ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm,
+                         complexrotateop:$rot),
+  asm, "\t$Zda, $Zn, $Zm, $rot", "", []>, Sched<[]> {
+  bits<5> Zda;
+  bits<5> Zn;
+  bits<5> Zm;
+  bits<2> rot;
+  let Inst{31-24} = 0b01000100;
+  let Inst{23-22} = sz;
+  let Inst{21}    = 0b0;
+  let Inst{20-16} = Zm;
+  let Inst{15-12} = opc;
+  let Inst{11-10} = rot;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zda;
+
+  let Constraints = "$Zda = $_Zda";
+  let DestructiveInstType = Destructive;
+  let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_cintx_dot<string asm> {
+  def _S : sve2_complex_int_arith<0b10, 0b0001, asm, ZPR32, ZPR8>;
+  def _D : sve2_complex_int_arith<0b11, 0b0001, asm, ZPR64, ZPR16>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Complex Multiply-Add Group
+//===----------------------------------------------------------------------===//
+
+multiclass sve2_int_cmla<bit opc, string asm> {
+  def _B : sve2_complex_int_arith<0b00, { 0b001, opc }, asm, ZPR8, ZPR8>;
+  def _H : sve2_complex_int_arith<0b01, { 0b001, opc }, asm, ZPR16, ZPR16>;
+  def _S : sve2_complex_int_arith<0b10, { 0b001, opc }, asm, ZPR32, ZPR32>;
+  def _D : sve2_complex_int_arith<0b11, { 0b001, opc }, asm, ZPR64, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Complex Integer Dot Product - Indexed Group
+//===----------------------------------------------------------------------===//
+
+class sve2_complex_int_arith_indexed<bits<2> sz, bits<4> opc, string asm,
+                                     ZPRRegOp zprty1, ZPRRegOp zprty2,
+                                     ZPRRegOp zprty3, Operand itype>
+: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty3:$Zm, itype:$iop,
+                         complexrotateop:$rot),
+  asm, "\t$Zda, $Zn, $Zm$iop, $rot", "", []>, Sched<[]> {
+  bits<5> Zda;
+  bits<5> Zn;
+  bits<2> rot;
+  let Inst{31-24} = 0b01000100;
+  let Inst{23-22} = sz;
+  let Inst{21}    = 0b1;
+  let Inst{15-12} = opc;
+  let Inst{11-10} = rot;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zda;
+
+  let Constraints = "$Zda = $_Zda";
+  let DestructiveInstType = Destructive;
+  let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_cintx_dot_by_indexed_elem<string asm> {
+  def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> {
+    bits<2> iop;
+    bits<3> Zm;
+    let Inst{20-19} = iop;
+    let Inst{18-16} = Zm;
+  }
+  def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> {
+    bit iop;
+    bits<4> Zm;
+    let Inst{20} = iop;
+    let Inst{19-16} = Zm;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Complex Multiply-Add - Indexed Group
+//===----------------------------------------------------------------------===//
+
+multiclass sve2_cmla_by_indexed_elem<bit opc, string asm> {
+  def _H : sve2_complex_int_arith_indexed<0b10, { 0b011, opc }, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexS> {
+    bits<2> iop;
+    bits<3> Zm;
+    let Inst{20-19} = iop;
+    let Inst{18-16} = Zm;
+  }
+  def _S : sve2_complex_int_arith_indexed<0b11, { 0b011, opc }, asm, ZPR32, ZPR32, ZPR4b32, VectorIndexD> {
+    bit iop;
+    bits<4> Zm;
+    let Inst{20} = iop;
+    let Inst{19-16} = Zm;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Integer Multiply - Unpredicated Group
+//===----------------------------------------------------------------------===//
+
+class sve2_int_mul<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
+  asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zm;
+  bits<5> Zn;
+  let Inst{31-24} = 0b00000100;
+  let Inst{23-22} = sz;
+  let Inst{21}    = 0b1;
+  let Inst{20-16} = Zm;
+  let Inst{15-13} = 0b011;
+  let Inst{12-10} = opc;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_int_mul<bits<3> opc, string asm> {
+  def _B : sve2_int_mul<0b00, opc, asm, ZPR8>;
+  def _H : sve2_int_mul<0b01, opc, asm, ZPR16>;
+  def _S : sve2_int_mul<0b10, opc, asm, ZPR32>;
+  def _D : sve2_int_mul<0b11, opc, asm, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Integer Multiply - Indexed Group
+//===----------------------------------------------------------------------===//
+
+class sve2_int_mul_by_indexed_elem<bits<2> sz, bits<4> opc, string asm,
+                                   ZPRRegOp zprty1, ZPRRegOp zprty2,
+                                   ZPRRegOp zprty3, Operand itype>
+: I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty3:$Zm, itype:$iop),
+  asm, "\t$Zd, $Zn, $Zm$iop", "", []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  let Inst{31-24} = 0b01000100;
+  let Inst{23-22} = sz;
+  let Inst{21}    = 0b1;
+  let Inst{15-14} = 0b11;
+  let Inst{13-10} = opc;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_int_mul_by_indexed_elem<bits<4> opc, string asm> {
+  def _H : sve2_int_mul_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexH> {
+    bits<3> Zm;
+    bits<3> iop;
+    let Inst{22} = iop{2};
+    let Inst{20-19} = iop{1-0};
+    let Inst{18-16} = Zm;
+  }
+  def _S : sve2_int_mul_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR32, ZPR3b32, VectorIndexS> {
+    bits<3> Zm;
+    bits<2> iop;
+    let Inst{20-19} = iop;
+    let Inst{18-16} = Zm;
+  }
+  def _D : sve2_int_mul_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR64, ZPR4b64, VectorIndexD> {
+    bits<4> Zm;
+    bit iop;
+    let Inst{20} = iop;
+    let Inst{19-16} = Zm;
+  }
+}
+
+multiclass sve2_int_mul_long_by_indexed_elem<bits<3> opc, string asm> {
+  def _S : sve2_int_mul_by_indexed_elem<0b10, { opc{2-1}, ?, opc{0} }, asm,
+                                        ZPR32, ZPR16, ZPR3b16, VectorIndexH> {
+    bits<3> Zm;
+    bits<3> iop;
+    let Inst{20-19} = iop{2-1};
+    let Inst{18-16} = Zm;
+    let Inst{11} = iop{0};
+  }
+  def _D : sve2_int_mul_by_indexed_elem<0b11, { opc{2-1}, ?, opc{0} }, asm,
+                                        ZPR64, ZPR32, ZPR4b32, VectorIndexS> {
+    bits<4> Zm;
+    bits<2> iop;
+    let Inst{20} = iop{1};
+    let Inst{19-16} = Zm;
+    let Inst{11} = iop{0};
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Integer - Predicated Group
+//===----------------------------------------------------------------------===//
+
+class sve2_int_arith_pred<bits<2> sz, bits<6> opc, string asm,
+                          ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm),
+  asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm", "", []>, Sched<[]> {
+  bits<3> Pg;
+  bits<5> Zm;
+  bits<5> Zdn;
+  let Inst{31-24} = 0b01000100;
+  let Inst{23-22} = sz;
+  let Inst{21}    = 0b0;
+  let Inst{20-16} = opc{5-1};
+  let Inst{15-14} = 0b10;
+  let Inst{13}    = opc{0};
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zm;
+  let Inst{4-0}   = Zdn;
+
+  let Constraints = "$Zdn = $_Zdn";
+  let DestructiveInstType = Destructive;
+  let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve2_int_arith_pred<bits<6> opc, string asm> {
+  def _B : sve2_int_arith_pred<0b00, opc, asm, ZPR8>;
+  def _H : sve2_int_arith_pred<0b01, opc, asm, ZPR16>;
+  def _S : sve2_int_arith_pred<0b10, opc, asm, ZPR32>;
+  def _D : sve2_int_arith_pred<0b11, opc, asm, ZPR64>;
+}
+
+class sve2_int_sadd_long_accum_pairwise<bits<2> sz, bit U, string asm,
+                                        ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zda), (ins PPR3bAny:$Pg, zprty1:$_Zda, zprty2:$Zn),
+  asm, "\t$Zda, $Pg/m, $Zn", "", []>, Sched<[]> {
+  bits<3> Pg;
+  bits<5> Zn;
+  bits<5> Zda;
+  let Inst{31-24} = 0b01000100;
+  let Inst{23-22} = sz;
+  let Inst{21-17} = 0b00010;
+  let Inst{16}    = U;
+  let Inst{15-13} = 0b101;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zda;
+
+  let Constraints = "$Zda = $_Zda";
+  let DestructiveInstType = Destructive;
+  let ElementSize = zprty1.ElementSize;
+}
+
+multiclass sve2_int_sadd_long_accum_pairwise<bit U, string asm> {
+  def _H : sve2_int_sadd_long_accum_pairwise<0b01, U, asm, ZPR16, ZPR8>;
+  def _S : sve2_int_sadd_long_accum_pairwise<0b10, U, asm, ZPR32, ZPR16>;
+  def _D : sve2_int_sadd_long_accum_pairwise<0b11, U, asm, ZPR64, ZPR32>;
+}
+
+class sve2_int_un_pred_arit<bits<2> sz, bit Q, bits<2> opc,
+                            string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, zprty:$Zn),
+  asm, "\t$Zd, $Pg/m, $Zn",
+  "",
+  []>, Sched<[]> {
+  bits<3> Pg;
+  bits<5> Zd;
+  bits<5> Zn;
+  let Inst{31-24} = 0b01000100;
+  let Inst{23-22} = sz;
+  let Inst{21-20} = 0b00;
+  let Inst{19}    = Q;
+  let Inst{18}    = 0b0;
+  let Inst{17-16} = opc;
+  let Inst{15-13} = 0b101;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+
+  let Constraints = "$Zd = $_Zd";
+  let DestructiveInstType = Destructive;
+  let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve2_int_un_pred_arit_s<bits<3> opc, string asm> {
+  def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>;
+}
+
+multiclass sve2_int_un_pred_arit<bits<3> opc, string asm> {
+  def _B : sve2_int_un_pred_arit<0b00, opc{2}, opc{1-0}, asm, ZPR8>;
+  def _H : sve2_int_un_pred_arit<0b01, opc{2}, opc{1-0}, asm, ZPR16>;
+  def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>;
+  def _D : sve2_int_un_pred_arit<0b11, opc{2}, opc{1-0}, asm, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Widening Integer Arithmetic Group
+//===----------------------------------------------------------------------===//
+
+class sve2_wide_int_arith<bits<2> sz, bits<5> opc, string asm,
+                          ZPRRegOp zprty1, ZPRRegOp zprty2, ZPRRegOp zprty3>
+: I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty3:$Zm),
+  asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<5> Zm;
+  let Inst{31-24} = 0b01000101;
+  let Inst{23-22} = sz;
+  let Inst{21}    = 0b0;
+  let Inst{20-16} = Zm;
+  let Inst{15}    = 0b0;
+  let Inst{14-10} = opc;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_wide_int_arith_long<bits<5> opc, string asm> {
+  def _H : sve2_wide_int_arith<0b01, opc, asm, ZPR16, ZPR8, ZPR8>;
+  def _S : sve2_wide_int_arith<0b10, opc, asm, ZPR32, ZPR16, ZPR16>;
+  def _D : sve2_wide_int_arith<0b11, opc, asm, ZPR64, ZPR32, ZPR32>;
+}
+
+multiclass sve2_wide_int_arith_wide<bits<3> opc, string asm> {
+  def _H : sve2_wide_int_arith<0b01, { 0b10, opc }, asm, ZPR16, ZPR16, ZPR8>;
+  def _S : sve2_wide_int_arith<0b10, { 0b10, opc }, asm, ZPR32, ZPR32, ZPR16>;
+  def _D : sve2_wide_int_arith<0b11, { 0b10, opc }, asm, ZPR64, ZPR64, ZPR32>;
+}
+
+multiclass sve2_pmul_long<bits<1> opc, string asm> {
+  def _H : sve2_wide_int_arith<0b01, {0b1101, opc}, asm, ZPR16, ZPR8, ZPR8>;
+  def _D : sve2_wide_int_arith<0b11, {0b1101, opc}, asm, ZPR64, ZPR32, ZPR32>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Misc Group
+//===----------------------------------------------------------------------===//
+
+class sve2_misc<bits<2> sz, bits<4> opc, string asm,
+                ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty2:$Zm),
+  asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<5> Zm;
+  let Inst{31-24} = 0b01000101;
+  let Inst{23-22} = sz;
+  let Inst{21}    = 0b0;
+  let Inst{20-16} = Zm;
+  let Inst{15-14} = 0b10;
+  let Inst{13-10} = opc;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_misc_bitwise<bits<4> opc, string asm> {
+  def _B : sve2_misc<0b00, opc, asm, ZPR8, ZPR8>;
+  def _H : sve2_misc<0b01, opc, asm, ZPR16, ZPR16>;
+  def _S : sve2_misc<0b10, opc, asm, ZPR32, ZPR32>;
+  def _D : sve2_misc<0b11, opc, asm, ZPR64, ZPR64>;
+}
+
+multiclass sve2_bitwise_xor_interleaved<bit opc, string asm> {
+  let DestructiveInstType = Destructive, ElementSize = ElementSizeNone in {
+    def _B : sve2_misc<0b00, { 0b010, opc }, asm, ZPR8,  ZPR8>;
+    def _H : sve2_misc<0b01, { 0b010, opc }, asm, ZPR16, ZPR16>;
+    def _S : sve2_misc<0b10, { 0b010, opc }, asm, ZPR32, ZPR32>;
+    def _D : sve2_misc<0b11, { 0b010, opc }, asm, ZPR64, ZPR64>;
+  }
+}
+
+multiclass sve2_misc_int_addsub_long_interleaved<bits<2> opc, string asm> {
+  def _H : sve2_misc<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>;
+  def _S : sve2_misc<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>;
+  def _D : sve2_misc<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>;
+}
+
+class sve2_bitwise_shift_left_long<bits<3> tsz8_64, bits<2> opc, string asm,
+                                   ZPRRegOp zprty1, ZPRRegOp zprty2,
+                                   Operand immtype>
+: I<(outs zprty1:$Zd), (ins zprty2:$Zn, immtype:$imm),
+  asm, "\t$Zd, $Zn, $imm",
+  "", []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<5> imm;
+  let Inst{31-23} = 0b010001010;
+  let Inst{22}    = tsz8_64{2};
+  let Inst{21}    = 0b0;
+  let Inst{20-19} = tsz8_64{1-0};
+  let Inst{18-16} = imm{2-0}; // imm3
+  let Inst{15-12} = 0b1010;
+  let Inst{11-10} = opc;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm> {
+  def _H : sve2_bitwise_shift_left_long<{0,0,1}, opc, asm,
+                                        ZPR16, ZPR8, vecshiftL8>;
+  def _S : sve2_bitwise_shift_left_long<{0,1,?}, opc, asm,
+                                        ZPR32, ZPR16, vecshiftL16> {
+    let Inst{19} = imm{3};
+  }
+  def _D : sve2_bitwise_shift_left_long<{1,?,?}, opc, asm,
+                                        ZPR64, ZPR32, vecshiftL32> {
+    let Inst{20-19} = imm{4-3};
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Accumulate Group
+//===----------------------------------------------------------------------===//
+
+class sve2_int_bin_cons_shift_imm<bits<4> tsz8_64, bit opc, string asm,
+                                  ZPRRegOp zprty, Operand immtype>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm),
+  asm, "\t$Zd, $Zn, $imm",
+  "", []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<6> imm;
+  let Inst{31-24} = 0b01000101;
+  let Inst{23-22} = tsz8_64{3-2};
+  let Inst{21}    = 0b0;
+  let Inst{20-19} = tsz8_64{1-0};
+  let Inst{18-16} = imm{2-0}; // imm3
+  let Inst{15-11} = 0b11110;
+  let Inst{10}    = opc;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_int_bin_cons_shift_imm_left<bit opc, string asm> {
+  def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
+  def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
+    let Inst{19} = imm{3};
+  }
+  def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
+    let Inst{20-19} = imm{4-3};
+  }
+  def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
+    let Inst{22}    = imm{5};
+    let Inst{20-19} = imm{4-3};
+  }
+}
+
+multiclass sve2_int_bin_cons_shift_imm_right<bit opc, string asm> {
+  def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
+  def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
+    let Inst{19} = imm{3};
+  }
+  def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
+    let Inst{20-19} = imm{4-3};
+  }
+  def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
+    let Inst{22}    = imm{5};
+    let Inst{20-19} = imm{4-3};
+  }
+}
+
+class sve2_int_bin_accum_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
+                                        ZPRRegOp zprty, Operand immtype>
+: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, immtype:$imm),
+  asm, "\t$Zda, $Zn, $imm",
+  "", []>, Sched<[]> {
+  bits<5> Zda;
+  bits<5> Zn;
+  bits<6> imm;
+  let Inst{31-24} = 0b01000101;
+  let Inst{23-22} = tsz8_64{3-2};
+  let Inst{21}    = 0b0;
+  let Inst{20-19} = tsz8_64{1-0};
+  let Inst{18-16} = imm{2-0}; // imm3
+  let Inst{15-12} = 0b1110;
+  let Inst{11-10} = opc;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zda;
+
+  let Constraints = "$Zda = $_Zda";
+  let DestructiveInstType = Destructive;
+  let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_int_bin_accum_cons_shift_imm_right<bits<2> opc, string asm> {
+  def _B : sve2_int_bin_accum_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
+  def _H : sve2_int_bin_accum_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
+    let Inst{19} = imm{3};
+  }
+  def _S : sve2_int_bin_accum_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
+    let Inst{20-19} = imm{4-3};
+  }
+  def _D : sve2_int_bin_accum_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
+    let Inst{22}    = imm{5};
+    let Inst{20-19} = imm{4-3};
+  }
+}
+
+class sve2_int_cadd<bits<2> sz, bit opc, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, complexrotateopodd:$rot),
+  asm, "\t$Zdn, $_Zdn, $Zm, $rot", "", []>, Sched<[]> {
+  bits<5> Zdn;
+  bits<5> Zm;
+  bit rot;
+  let Inst{31-24} = 0b01000101;
+  let Inst{23-22} = sz;
+  let Inst{21-17} = 0b00000;
+  let Inst{16}    = opc;
+  let Inst{15-11} = 0b11011;
+  let Inst{10}    = rot;
+  let Inst{9-5}   = Zm;
+  let Inst{4-0}   = Zdn;
+
+  let Constraints = "$Zdn = $_Zdn";
+  let DestructiveInstType = Destructive;
+  let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_int_cadd<bit opc, string asm> {
+  def _B : sve2_int_cadd<0b00, opc, asm, ZPR8>;
+  def _H : sve2_int_cadd<0b01, opc, asm, ZPR16>;
+  def _S : sve2_int_cadd<0b10, opc, asm, ZPR32>;
+  def _D : sve2_int_cadd<0b11, opc, asm, ZPR64>;
+}
+
+class sve2_int_absdiff_accum<bits<2> sz, bits<4> opc, string asm,
+                             ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm),
+  asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
+  bits<5> Zda;
+  bits<5> Zn;
+  bits<5> Zm;
+  let Inst{31-24} = 0b01000101;
+  let Inst{23-22} = sz;
+  let Inst{21}    = 0b0;
+  let Inst{20-16} = Zm;
+  let Inst{15-14} = 0b11;
+  let Inst{13-10} = opc;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zda;
+
+  let Constraints = "$Zda = $_Zda";
+  let DestructiveInstType = Destructive;
+  let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_int_absdiff_accum<bit opc, string asm> {
+  def _B : sve2_int_absdiff_accum<0b00, { 0b111, opc }, asm, ZPR8, ZPR8>;
+  def _H : sve2_int_absdiff_accum<0b01, { 0b111, opc }, asm, ZPR16, ZPR16>;
+  def _S : sve2_int_absdiff_accum<0b10, { 0b111, opc }, asm, ZPR32, ZPR32>;
+  def _D : sve2_int_absdiff_accum<0b11, { 0b111, opc }, asm, ZPR64, ZPR64>;
+}
+
+multiclass sve2_int_absdiff_accum_long<bits<2> opc, string asm> {
+  def _H : sve2_int_absdiff_accum<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>;
+  def _S : sve2_int_absdiff_accum<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>;
+  def _D : sve2_int_absdiff_accum<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>;
+}
+
+multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm> {
+  def _S : sve2_int_absdiff_accum<{ opc{1}, 0b0 }, { 0b010, opc{0} }, asm,
+                                  ZPR32, ZPR32>;
+  def _D : sve2_int_absdiff_accum<{ opc{1}, 0b1 }, { 0b010, opc{0} }, asm,
+                                  ZPR64, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Narrowing Group
+//===----------------------------------------------------------------------===//
+
+class sve2_int_bin_cons_shift_imm_narrow<bits<3> tsz8_64, bits<4> opc,
+                                         string asm, ZPRRegOp zprty1,
+                                         ZPRRegOp zprty2, Operand immtype>
+: I<(outs zprty1:$Zd), (ins zprty2:$Zn, immtype:$imm),
+  asm, "\t$Zd, $Zn, $imm",
+  "", []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<5> imm;
+  let Inst{31-23} = 0b010001010;
+  let Inst{22}    = tsz8_64{2};
+  let Inst{21}    = 0b1;
+  let Inst{20-19} = tsz8_64{1-0};
+  let Inst{18-16} = imm{2-0}; // imm3
+  let Inst{15-14} = 0b00;
+  let Inst{13-10} = opc;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_int_bin_cons_shift_imm_right_narrow<bits<4> opc, string asm> {
+  def _B : sve2_int_bin_cons_shift_imm_narrow<{0,0,1}, opc, asm, ZPR8, ZPR16,
+                                              vecshiftR8>;
+  def _H : sve2_int_bin_cons_shift_imm_narrow<{0,1,?}, opc, asm, ZPR16, ZPR32,
+                                              vecshiftR16> {
+    let Inst{19} = imm{3};
+  }
+  def _S : sve2_int_bin_cons_shift_imm_narrow<{1,?,?}, opc, asm, ZPR32, ZPR64,
+                                              vecshiftR32> {
+    let Inst{20-19} = imm{4-3};
+  }
+}
+
+class sve2_int_addsub_narrow_high<bits<2> sz, bits<3> opc, string asm,
+                                  ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty2:$Zm),
+  asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<5> Zm;
+  let Inst{31-24} = 0b01000101;
+  let Inst{23-22} = sz;
+  let Inst{21}    = 0b1;
+  let Inst{20-16} = Zm;
+  let Inst{15-13} = 0b011;
+  let Inst{12-10} = opc; // S, R, T
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_int_addsub_narrow_high<bits<3> opc, string asm> {
+  def _B : sve2_int_addsub_narrow_high<0b01, opc, asm, ZPR8, ZPR16>;
+  def _H : sve2_int_addsub_narrow_high<0b10, opc, asm, ZPR16, ZPR32>;
+  def _S : sve2_int_addsub_narrow_high<0b11, opc, asm, ZPR32, ZPR64>;
+}
+
+class sve2_int_sat_extract_narrow<bits<3> tsz8_64, bits<3> opc, string asm,
+                                  ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zd), (ins zprty2:$Zn),
+  asm, "\t$Zd, $Zn", "", []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  let Inst{31-23} = 0b010001010;
+  let Inst{22}    = tsz8_64{2};
   let Inst{21}    = 0b1;
-  let Inst{15-11} = 0;
-  let Inst{10}    = U;
+  let Inst{20-19} = tsz8_64{1-0};
+  let Inst{18-13} = 0b000010;
+  let Inst{12-10} = opc;
   let Inst{9-5}   = Zn;
-  let Inst{4-0}   = Zda;
-
-  let Constraints = "$Zda = $_Zda";
-  let DestructiveInstType = Destructive;
-  let ElementSize = ElementSizeNone;
+  let Inst{4-0}   = Zd;
 }
 
-multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm> {
-  def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> {
-    bits<2> iop;
-    bits<3> Zm;
-    let Inst{20-19} = iop;
-    let Inst{18-16} = Zm;
-  }
-  def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> {
-    bits<1> iop;
-    bits<4> Zm;
-    let Inst{20} = iop;
-    let Inst{19-16} = Zm;
-  }
+multiclass sve2_int_sat_extract_narrow<bits<3> opc, string asm> {
+  def _B : sve2_int_sat_extract_narrow<0b001, opc, asm, ZPR8, ZPR16>;
+  def _H : sve2_int_sat_extract_narrow<0b010, opc, asm, ZPR16, ZPR32>;
+  def _S : sve2_int_sat_extract_narrow<0b100, opc, asm, ZPR32, ZPR64>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1983,6 +2907,86 @@ class sve_int_bin_cons_log<bits<2> opc, string asm>
   let Inst{4-0}   = Zd;
 }
 
+multiclass sve_int_bin_cons_log<bits<2> opc, string asm> {
+  def NAME : sve_int_bin_cons_log<opc, asm>;
+
+  def : InstAlias<asm # "\t$Zd, $Zn, $Zm",
+                  (!cast<Instruction>(NAME) ZPR8:$Zd,  ZPR8:$Zn,  ZPR8:$Zm),  1>;
+  def : InstAlias<asm # "\t$Zd, $Zn, $Zm",
+                  (!cast<Instruction>(NAME) ZPR16:$Zd, ZPR16:$Zn, ZPR16:$Zm), 1>;
+  def : InstAlias<asm # "\t$Zd, $Zn, $Zm",
+                  (!cast<Instruction>(NAME) ZPR32:$Zd, ZPR32:$Zn, ZPR32:$Zm), 1>;
+}
+
+class sve2_int_bitwise_ternary_op_d<bits<3> opc, string asm>
+: I<(outs ZPR64:$Zdn), (ins ZPR64:$_Zdn, ZPR64:$Zm, ZPR64:$Zk),
+  asm, "\t$Zdn, $_Zdn, $Zm, $Zk",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zdn;
+  bits<5> Zk;
+  bits<5> Zm;
+  let Inst{31-24} = 0b00000100;
+  let Inst{23-22} = opc{2-1};
+  let Inst{21}    = 0b1;
+  let Inst{20-16} = Zm;
+  let Inst{15-11} = 0b00111;
+  let Inst{10}    = opc{0};
+  let Inst{9-5}   = Zk;
+  let Inst{4-0}   = Zdn;
+
+  let Constraints = "$Zdn = $_Zdn";
+  let DestructiveInstType = Destructive;
+  let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_int_bitwise_ternary_op<bits<3> opc, string asm> {
+  def NAME : sve2_int_bitwise_ternary_op_d<opc, asm>;
+
+  def : InstAlias<asm # "\t$Zdn, $Zdn, $Zm, $Zk",
+                  (!cast<Instruction>(NAME) ZPR8:$Zdn,  ZPR8:$Zm,  ZPR8:$Zk),  1>;
+  def : InstAlias<asm # "\t$Zdn, $Zdn, $Zm, $Zk",
+                  (!cast<Instruction>(NAME) ZPR16:$Zdn, ZPR16:$Zm, ZPR16:$Zk), 1>;
+  def : InstAlias<asm # "\t$Zdn, $Zdn, $Zm, $Zk",
+                  (!cast<Instruction>(NAME) ZPR32:$Zdn, ZPR32:$Zm, ZPR32:$Zk), 1>;
+}
+
+class sve2_int_rotate_right_imm<bits<4> tsz8_64, string asm,
+                                ZPRRegOp zprty, Operand immtype>
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, immtype:$imm),
+  asm, "\t$Zdn, $_Zdn, $Zm, $imm",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zdn;
+  bits<5> Zm;
+  bits<6> imm;
+  let Inst{31-24} = 0b00000100;
+  let Inst{23-22} = tsz8_64{3-2};
+  let Inst{21}    = 0b1;
+  let Inst{20-19} = tsz8_64{1-0};
+  let Inst{18-16} = imm{2-0}; // imm3
+  let Inst{15-10} = 0b001101;
+  let Inst{9-5}   = Zm;
+  let Inst{4-0}   = Zdn;
+
+  let Constraints = "$Zdn = $_Zdn";
+  let DestructiveInstType = Destructive;
+  let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_int_rotate_right_imm<string asm> {
+  def _B : sve2_int_rotate_right_imm<{0,0,0,1}, asm, ZPR8, vecshiftR8>;
+  def _H : sve2_int_rotate_right_imm<{0,0,1,?}, asm, ZPR16, vecshiftR16> {
+    let Inst{19} = imm{3};
+  }
+  def _S : sve2_int_rotate_right_imm<{0,1,?,?}, asm, ZPR32, vecshiftR32> {
+    let Inst{20-19} = imm{4-3};
+  }
+  def _D : sve2_int_rotate_right_imm<{1,?,?,?}, asm, ZPR64, vecshiftR64> {
+    let Inst{22}    = imm{5};
+    let Inst{20-19} = imm{4-3};
+  }
+}
 
 //===----------------------------------------------------------------------===//
 // SVE Integer Wide Immediate - Predicated Group
@@ -2266,6 +3270,32 @@ multiclass sve_int_while8_rr<bits<3> opc, string asm> {
   def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64>;
 }
 
+class sve2_int_while_rr<bits<2> sz8_64, bits<1> rw, string asm,
+                        PPRRegOp pprty>
+: I<(outs pprty:$Pd), (ins GPR64:$Rn, GPR64:$Rm),
+  asm, "\t$Pd, $Rn, $Rm",
+  "", []>, Sched<[]> {
+  bits<4> Pd;
+  bits<5> Rm;
+  bits<5> Rn;
+  let Inst{31-24} = 0b00100101;
+  let Inst{23-22} = sz8_64;
+  let Inst{21}    = 0b1;
+  let Inst{20-16} = Rm;
+  let Inst{15-10} = 0b001100;
+  let Inst{9-5}   = Rn;
+  let Inst{4}     = rw;
+  let Inst{3-0}   = Pd;
+
+  let Defs = [NZCV];
+}
+
+multiclass sve2_int_while_rr<bits<1> rw, string asm> {
+  def _B : sve2_int_while_rr<0b00, rw, asm, PPR8>;
+  def _H : sve2_int_while_rr<0b01, rw, asm, PPR16>;
+  def _S : sve2_int_while_rr<0b10, rw, asm, PPR32>;
+  def _D : sve2_int_while_rr<0b11, rw, asm, PPR64>;
+}
 
 //===----------------------------------------------------------------------===//
 // SVE Floating Point Fast Reduction Group
@@ -2497,9 +3527,9 @@ multiclass sve_int_index_rr<string asm> {
 //===----------------------------------------------------------------------===//
 // SVE Bitwise Shift - Predicated Group
 //===----------------------------------------------------------------------===//
-class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm,
-                               ZPRRegOp zprty, Operand immtype,
-                               ElementSizeEnum size>
+class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<4> opc, string asm,
+                                 ZPRRegOp zprty, Operand immtype,
+                                 ElementSizeEnum size>
 : I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, immtype:$imm),
   asm, "\t$Zdn, $Pg/m, $_Zdn, $imm",
   "",
@@ -2509,8 +3539,8 @@ class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm,
   bits<6> imm;
   let Inst{31-24} = 0b00000100;
   let Inst{23-22} = tsz8_64{3-2};
-  let Inst{21-19} = 0b000;
-  let Inst{18-16} = opc;
+  let Inst{21-20} = 0b00;
+  let Inst{19-16} = opc;
   let Inst{15-13} = 0b100;
   let Inst{12-10} = Pg;
   let Inst{9-8}   = tsz8_64{1-0};
@@ -2522,7 +3552,7 @@ class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm,
   let ElementSize = size;
 }
 
-multiclass sve_int_bin_pred_shift_imm_left<bits<3> opc, string asm> {
+multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm> {
   def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8,
                                       ElementSizeB>;
   def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16,
@@ -2540,7 +3570,7 @@ multiclass sve_int_bin_pred_shift_imm_left<bits<3> opc, string asm> {
   }
 }
 
-multiclass sve_int_bin_pred_shift_imm_right<bits<3> opc, string asm> {
+multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm> {
   def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8,
                                       ElementSizeB>;
   def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16,
@@ -2856,6 +3886,43 @@ multiclass sve_mem_cstnt_ss<bits<2> msz, string asm, RegisterOperand listty,
                  (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
 }
 
+class sve2_mem_cstnt_vs_base<bits<3> opc, dag iops, string asm,
+                             RegisterOperand VecList>
+: I<(outs VecList:$Zt), iops,
+  asm, "\t$Zt, $Pg, [$Zn, $Rm]",
+  "",
+  []>, Sched<[]> {
+  bits<3> Pg;
+  bits<5> Rm;
+  bits<5> Zn;
+  bits<5> Zt;
+  let Inst{31-25} = 0b1110010;
+  let Inst{24-22} = opc;
+  let Inst{21}    = 0b0;
+  let Inst{20-16} = Rm;
+  let Inst{15-13} = 0b001;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zt;
+
+  let mayStore = 1;
+}
+
+multiclass sve2_mem_cstnt_vs<bits<3> opc, string asm,
+                             RegisterOperand listty, ZPRRegOp zprty> {
+  def _REAL : sve2_mem_cstnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
+                                     asm, listty>;
+
+  def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
+                 (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
+  def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
+                 (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;
+  def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
+                 (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
+  def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
+                 (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;
+}
+
 class sve_mem_sst_sv<bits<3> opc, bit xs, bit scaled, string asm,
                      RegisterOperand VecList, RegisterOperand zprext>
 : I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm),
@@ -3304,6 +4371,30 @@ multiclass sve_int_perm_splice<string asm> {
   def _D : sve_int_perm_splice<0b11, asm, ZPR64>;
 }
 
+class sve2_int_perm_splice_cons<bits<2> sz8_64, string asm,
+                               ZPRRegOp zprty, RegisterOperand VecList>
+: I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, VecList:$Zn),
+  asm, "\t$Zd, $Pg, $Zn",
+  "",
+  []>, Sched<[]> {
+  bits<3> Pg;
+  bits<5> Zn;
+  bits<5> Zd;
+  let Inst{31-24} = 0b00000101;
+  let Inst{23-22} = sz8_64;
+  let Inst{21-13} = 0b101101100;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_int_perm_splice_cons<string asm> {
+  def _B : sve2_int_perm_splice_cons<0b00, asm, ZPR8,  ZZ_b>;
+  def _H : sve2_int_perm_splice_cons<0b01, asm, ZPR16, ZZ_h>;
+  def _S : sve2_int_perm_splice_cons<0b10, asm, ZPR32, ZZ_s>;
+  def _D : sve2_int_perm_splice_cons<0b11, asm, ZPR64, ZZ_d>;
+}
+
 class sve_int_perm_rev<bits<2> sz8_64, bits<2> opc, string asm,
                        ZPRRegOp zprty>
 : I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, zprty:$Zn),
@@ -4003,6 +5094,46 @@ multiclass sve_mem_p_fill<string asm> {
                   (!cast<Instruction>(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>;
 }
 
+class sve2_mem_cldnt_vs_base<bits<5> opc, dag iops, string asm,
+                             RegisterOperand VecList>
+: I<(outs VecList:$Zt), iops,
+  asm, "\t$Zt, $Pg/z, [$Zn, $Rm]",
+  "",
+  []>, Sched<[]> {
+  bits<3> Pg;
+  bits<5> Rm;
+  bits<5> Zn;
+  bits<5> Zt;
+  let Inst{31}    = 0b1;
+  let Inst{30}    = opc{4};
+  let Inst{29-25} = 0b00010;
+  let Inst{24-23} = opc{3-2};
+  let Inst{22-21} = 0b00;
+  let Inst{20-16} = Rm;
+  let Inst{15}    = 0b1;
+  let Inst{14-13} = opc{1-0};
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zt;
+
+  let mayLoad = 1;
+}
+
+multiclass sve2_mem_cldnt_vs<bits<5> opc, string asm,
+                             RegisterOperand listty, ZPRRegOp zprty> {
+  def _REAL : sve2_mem_cldnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
+                                     asm, listty>;
+
+  def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
+                 (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
+  def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
+                 (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;
+  def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
+                 (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
+  def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
+                 (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Memory - 64-bit Gather Group
 //===----------------------------------------------------------------------===//
@@ -4454,3 +5585,132 @@ multiclass sve_int_break_z<bits<3> opc, string asm> {
   def NAME : sve_int_break<opc, asm, "/z", (ins PPRAny:$Pg, PPR8:$Pn)>;
 }
 
+//===----------------------------------------------------------------------===//
+// SVE2 String Processing Group
+//===----------------------------------------------------------------------===//
+
+class sve2_char_match<bit sz, bit opc, string asm,
+                      PPRRegOp pprty, ZPRRegOp zprty>
+: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty:$Zn, zprty:$Zm),
+  asm, "\t$Pd, $Pg/z, $Zn, $Zm",
+  "",
+  []>, Sched<[]> {
+  bits<4> Pd;
+  bits<3> Pg;
+  bits<5> Zm;
+  bits<5> Zn;
+  let Inst{31-23} = 0b010001010;
+  let Inst{22}    = sz;
+  let Inst{21}    = 0b1;
+  let Inst{20-16} = Zm;
+  let Inst{15-13} = 0b100;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4}     = opc;
+  let Inst{3-0}   = Pd;
+
+  let Defs = [NZCV];
+}
+
+multiclass sve2_char_match<bit opc, string asm> {
+  def _B : sve2_char_match<0b0, opc, asm, PPR8, ZPR8>;
+  def _H : sve2_char_match<0b1, opc, asm, PPR16, ZPR16>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Histogram Computation - Segment Group
+//===----------------------------------------------------------------------===//
+
+class sve2_hist_gen_segment<string asm>
+: I<(outs ZPR8:$Zd), (ins ZPR8:$Zn, ZPR8:$Zm),
+  asm, "\t$Zd, $Zn, $Zm",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<5> Zm;
+  let Inst{31-21} = 0b01000101001;
+  let Inst{20-16} = Zm;
+  let Inst{15-10} = 0b101000;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Histogram Computation - Vector Group
+//===----------------------------------------------------------------------===//
+
+class sve2_hist_gen_vector<bit sz, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zn, zprty:$Zm),
+  asm, "\t$Zd, $Pg/z, $Zn, $Zm",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<3> Pg;
+  bits<5> Zm;
+  let Inst{31-23} = 0b010001011;
+  let Inst{22}    = sz;
+  let Inst{21}    = 0b1;
+  let Inst{20-16} = Zm;
+  let Inst{15-13} = 0b110;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_hist_gen_vector<string asm> {
+  def _S : sve2_hist_gen_vector<0b0, asm, ZPR32>;
+  def _D : sve2_hist_gen_vector<0b1, asm, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Crypto Extensions Group
+//===----------------------------------------------------------------------===//
+
+class sve2_crypto_cons_bin_op<bit opc, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
+  asm, "\t$Zd, $Zn, $Zm",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<5> Zm;
+  let Inst{31-21} = 0b01000101001;
+  let Inst{20-16} = Zm;
+  let Inst{15-11} = 0b11110;
+  let Inst{10}    = opc;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+class sve2_crypto_des_bin_op<bits<2> opc, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm),
+  asm, "\t$Zdn, $_Zdn, $Zm",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zdn;
+  bits<5> Zm;
+  let Inst{31-17} = 0b010001010010001;
+  let Inst{16}    = opc{1};
+  let Inst{15-11} = 0b11100;
+  let Inst{10}    = opc{0};
+  let Inst{9-5}   = Zm;
+  let Inst{4-0}   = Zdn;
+
+  let Constraints = "$Zdn = $_Zdn";
+}
+
+class sve2_crypto_unary_op<bit opc, string asm>
+: I<(outs ZPR8:$Zdn), (ins ZPR8:$_Zdn),
+  asm, "\t$Zdn, $_Zdn",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zdn;
+  let Inst{31-11} = 0b010001010010000011100;
+  let Inst{10}    = opc;
+  let Inst{9-5}   = 0b00000;
+  let Inst{4-0}   = Zdn;
+
+  let Constraints = "$Zdn = $_Zdn";
+}
diff --git a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
index 8fb161574c5b..7f02da6a9516 100644
--- a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
+++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
@@ -1,39 +1,50 @@
 //===-- AArch64TargetInfo.cpp - AArch64 Target Implementation -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/Triple.h"
+#include "TargetInfo/AArch64TargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
+
 using namespace llvm;
-namespace llvm {
-Target &getTheAArch64leTarget() {
+Target &llvm::getTheAArch64leTarget() {
   static Target TheAArch64leTarget;
   return TheAArch64leTarget;
 }
-Target &getTheAArch64beTarget() {
+Target &llvm::getTheAArch64beTarget() {
   static Target TheAArch64beTarget;
   return TheAArch64beTarget;
 }
-Target &getTheARM64Target() {
+Target &llvm::getTheAArch64_32Target() {
+  static Target TheAArch64leTarget;
+  return TheAArch64leTarget;
+}
+Target &llvm::getTheARM64Target() {
   static Target TheARM64Target;
   return TheARM64Target;
 }
-} // namespace llvm
+Target &llvm::getTheARM64_32Target() {
+  static Target TheARM64_32Target;
+  return TheARM64_32Target;
+}
 
 extern "C" void LLVMInitializeAArch64TargetInfo() {
   // Now register the "arm64" name for use with "-march". We don't want it to
-  // take possession of the Triple::aarch64 tag though.
+  // take possession of the Triple::aarch64 tags though.
   TargetRegistry::RegisterTarget(getTheARM64Target(), "arm64",
                                  "ARM64 (little endian)", "AArch64",
                                  [](Triple::ArchType) { return false; }, true);
+  TargetRegistry::RegisterTarget(getTheARM64_32Target(), "arm64_32",
+                                 "ARM64 (little endian ILP32)", "AArch64",
+                                 [](Triple::ArchType) { return false; }, true);
 
   RegisterTarget<Triple::aarch64, /*HasJIT=*/true> Z(
       getTheAArch64leTarget(), "aarch64", "AArch64 (little endian)", "AArch64");
   RegisterTarget<Triple::aarch64_be, /*HasJIT=*/true> W(
       getTheAArch64beTarget(), "aarch64_be", "AArch64 (big endian)", "AArch64");
+  RegisterTarget<Triple::aarch64_32, /*HasJIT=*/true> X(
+      getTheAArch64_32Target(), "aarch64_32", "AArch64 (little endian ILP32)", "AArch64");
 }
diff --git a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.h b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.h
new file mode 100644
index 000000000000..b3728a11bb5d
--- /dev/null
+++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.h
@@ -0,0 +1,24 @@
+//===-- AArch64TargetInfo.h - AArch64 Target Implementation -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_TARGETINFO_AARCH64TARGETINFO_H
+#define LLVM_LIB_TARGET_AARCH64_TARGETINFO_AARCH64TARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheAArch64leTarget();
+Target &getTheAArch64beTarget();
+Target &getTheAArch64_32Target();
+Target &getTheARM64Target();
+Target &getTheARM64_32Target();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AARCH64_TARGETINFO_AARCH64TARGETINFO_H
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index c88155db7037..7bb075c36e79 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -1,9 +1,8 @@
 //===-- AArch64BaseInfo.cpp - AArch64 Base encoding information------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 44c6a6b44895..e5e2fc2cb0df 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -1,9 +1,8 @@
 //===-- AArch64BaseInfo.h - Top level definitions for AArch64 ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -186,6 +185,49 @@ static inline unsigned getDRegFromBReg(unsigned Reg) {
   return Reg;
 }
 
+static inline bool atomicBarrierDroppedOnZero(unsigned Opcode) {
+  switch (Opcode) {
+  case AArch64::LDADDAB:   case AArch64::LDADDAH:
+  case AArch64::LDADDAW:   case AArch64::LDADDAX:
+  case AArch64::LDADDALB:  case AArch64::LDADDALH:
+  case AArch64::LDADDALW:  case AArch64::LDADDALX:
+  case AArch64::LDCLRAB:   case AArch64::LDCLRAH:
+  case AArch64::LDCLRAW:   case AArch64::LDCLRAX:
+  case AArch64::LDCLRALB:  case AArch64::LDCLRALH:
+  case AArch64::LDCLRALW:  case AArch64::LDCLRALX:
+  case AArch64::LDEORAB:   case AArch64::LDEORAH:
+  case AArch64::LDEORAW:   case AArch64::LDEORAX:
+  case AArch64::LDEORALB:  case AArch64::LDEORALH:
+  case AArch64::LDEORALW:  case AArch64::LDEORALX:
+  case AArch64::LDSETAB:   case AArch64::LDSETAH:
+  case AArch64::LDSETAW:   case AArch64::LDSETAX:
+  case AArch64::LDSETALB:  case AArch64::LDSETALH:
+  case AArch64::LDSETALW:  case AArch64::LDSETALX:
+  case AArch64::LDSMAXAB:  case AArch64::LDSMAXAH:
+  case AArch64::LDSMAXAW:  case AArch64::LDSMAXAX:
+  case AArch64::LDSMAXALB: case AArch64::LDSMAXALH:
+  case AArch64::LDSMAXALW: case AArch64::LDSMAXALX:
+  case AArch64::LDSMINAB:  case AArch64::LDSMINAH:
+  case AArch64::LDSMINAW:  case AArch64::LDSMINAX:
+  case AArch64::LDSMINALB: case AArch64::LDSMINALH:
+  case AArch64::LDSMINALW: case AArch64::LDSMINALX:
+  case AArch64::LDUMAXAB:  case AArch64::LDUMAXAH:
+  case AArch64::LDUMAXAW:  case AArch64::LDUMAXAX:
+  case AArch64::LDUMAXALB: case AArch64::LDUMAXALH:
+  case AArch64::LDUMAXALW: case AArch64::LDUMAXALX:
+  case AArch64::LDUMINAB:  case AArch64::LDUMINAH:
+  case AArch64::LDUMINAW:  case AArch64::LDUMINAX:
+  case AArch64::LDUMINALB: case AArch64::LDUMINALH:
+  case AArch64::LDUMINALW: case AArch64::LDUMINALX:
+  case AArch64::SWPAB:     case AArch64::SWPAH:
+  case AArch64::SWPAW:     case AArch64::SWPAX:
+  case AArch64::SWPALB:    case AArch64::SWPALH:
+  case AArch64::SWPALW:    case AArch64::SWPALX:
+    return true;
+  }
+  return false;
+}
+
 namespace AArch64CC {
 
 // The CondCodes constants map directly to the 4-bit encoding of the condition
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index bb7801c172f6..19a8bd901629 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -1,9 +1,8 @@
 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 /// \file
 //===----------------------------------------------------------------------===//
@@ -51,14 +50,16 @@ FunctionPass *createSIFixControlFlowLiveIntervalsPass();
 FunctionPass *createSIOptimizeExecMaskingPreRAPass();
 FunctionPass *createSIFixSGPRCopiesPass();
 FunctionPass *createSIMemoryLegalizerPass();
-FunctionPass *createSIDebuggerInsertNopsPass();
 FunctionPass *createSIInsertWaitcntsPass();
-FunctionPass *createSIFixWWMLivenessPass();
+FunctionPass *createSIPreAllocateWWMRegsPass();
 FunctionPass *createSIFormMemoryClausesPass();
-FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetOptions &);
+FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetOptions &,
+                                               const TargetMachine *);
 FunctionPass *createAMDGPUUseNativeCallsPass();
 FunctionPass *createAMDGPUCodeGenPreparePass();
 FunctionPass *createAMDGPUMachineCFGStructurizerPass();
+FunctionPass *createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *);
+ModulePass *createAMDGPUPropagateAttributesLatePass(const TargetMachine *);
 FunctionPass *createAMDGPURewriteOutArgumentsPass();
 FunctionPass *createSIModeRegisterPass();
 
@@ -93,6 +94,12 @@ ModulePass *createAMDGPULowerKernelAttributesPass();
 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
 extern char &AMDGPULowerKernelAttributesID;
 
+void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &);
+extern char &AMDGPUPropagateAttributesEarlyID;
+
+void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &);
+extern char &AMDGPUPropagateAttributesLateID;
+
 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
 extern char &AMDGPURewriteOutArgumentsID;
 
@@ -135,6 +142,9 @@ extern char &SIFixupVectorISelID;
 void initializeSILowerI1CopiesPass(PassRegistry &);
 extern char &SILowerI1CopiesID;
 
+void initializeSILowerSGPRSpillsPass(PassRegistry &);
+extern char &SILowerSGPRSpillsID;
+
 void initializeSILoadStoreOptimizerPass(PassRegistry &);
 extern char &SILoadStoreOptimizerID;
 
@@ -150,8 +160,8 @@ extern char &SIInsertSkipsPassID;
 void initializeSIOptimizeExecMaskingPass(PassRegistry &);
 extern char &SIOptimizeExecMaskingID;
 
-void initializeSIFixWWMLivenessPass(PassRegistry &);
-extern char &SIFixWWMLivenessID;
+void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
+extern char &SIPreAllocateWWMRegsID;
 
 void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &);
 extern char &AMDGPUSimplifyLibCallsID;
@@ -197,9 +207,6 @@ extern char &SIAnnotateControlFlowPassID;
 void initializeSIMemoryLegalizerPass(PassRegistry&);
 extern char &SIMemoryLegalizerID;
 
-void initializeSIDebuggerInsertNopsPass(PassRegistry&);
-extern char &SIDebuggerInsertNopsID;
-
 void initializeSIModeRegisterPass(PassRegistry&);
 extern char &SIModeRegisterID;
 
@@ -226,8 +233,11 @@ ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass();
 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &);
 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID;
 
-Target &getTheAMDGPUTarget();
-Target &getTheGCNTarget();
+void initializeGCNRegBankReassignPass(PassRegistry &);
+extern char &GCNRegBankReassignID;
+
+void initializeGCNNSAReassignPass(PassRegistry &);
+extern char &GCNNSAReassignID;
 
 namespace AMDGPU {
 enum TargetIndex {
@@ -250,21 +260,23 @@ enum TargetIndex {
 namespace AMDGPUAS {
   enum : unsigned {
     // The maximum value for flat, generic, local, private, constant and region.
-    MAX_AMDGPU_ADDRESS = 6,
+    MAX_AMDGPU_ADDRESS = 7,
 
     FLAT_ADDRESS = 0,     ///< Address space for flat memory.
     GLOBAL_ADDRESS = 1,   ///< Address space for global memory (RAT0, VTX0).
-    REGION_ADDRESS = 2,   ///< Address space for region memory.
+    REGION_ADDRESS = 2,   ///< Address space for region memory. (GDS)
 
-    CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2)
+    CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2).
     LOCAL_ADDRESS = 3,    ///< Address space for local memory.
     PRIVATE_ADDRESS = 5,  ///< Address space for private memory.
 
-    CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory
+    CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory.
+
+    BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers.
 
-    /// Address space for direct addressible parameter memory (CONST0)
+    /// Address space for direct addressible parameter memory (CONST0).
     PARAM_D_ADDRESS = 6,
-    /// Address space for indirect addressible parameter memory (VTX1)
+    /// Address space for indirect addressible parameter memory (VTX1).
     PARAM_I_ADDRESS = 7,
 
     // Do not re-order the CONSTANT_BUFFER_* enums.  Several places depend on
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index 6a4cfe08e491..baeba534012c 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -1,9 +1,8 @@
 //===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===------------------------------------------------------------===//
 
@@ -61,6 +60,12 @@ def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts",
   "Have scratch_* flat memory instructions"
 >;
 
+def FeatureScalarFlatScratchInsts : SubtargetFeature<"scalar-flat-scratch-insts",
+  "ScalarFlatScratchInsts",
+  "true",
+  "Have s_scratch_* flat memory instructions"
+>;
+
 def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
   "AddNoCarryInsts",
   "true",
@@ -103,6 +108,12 @@ def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts",
   "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions"
 >;
 
+def FeatureDoesNotSupportXNACK : SubtargetFeature<"no-xnack-support",
+  "DoesNotSupportXNACK",
+  "true",
+  "Hardware does not support XNACK"
+>;
+
 // XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
 // XNACK. The current default kernel driver setting is:
 // - graphics ring: XNACK disabled
@@ -116,12 +127,78 @@ def FeatureXNACK : SubtargetFeature<"xnack",
   "Enable XNACK support"
 >;
 
+def FeatureCuMode : SubtargetFeature<"cumode",
+  "EnableCuMode",
+  "true",
+  "Enable CU wavefront execution mode"
+>;
+
 def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
   "SGPRInitBug",
   "true",
   "VI SGPR initialization bug requiring a fixed SGPR allocation size"
 >;
 
+def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug",
+  "LDSMisalignedBug",
+  "true",
+  "Some GFX10 bug with misaligned multi-dword LDS access in WGP mode"
+>;
+
+def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard",
+  "HasVcmpxPermlaneHazard",
+  "true",
+  "TODO: describe me"
+>;
+
+def FeatureVMEMtoScalarWriteHazard : SubtargetFeature<"vmem-to-scalar-write-hazard",
+  "HasVMEMtoScalarWriteHazard",
+  "true",
+  "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution."
+>;
+
+def FeatureSMEMtoVectorWriteHazard : SubtargetFeature<"smem-to-vector-write-hazard",
+  "HasSMEMtoVectorWriteHazard",
+  "true",
+  "s_load_dword followed by v_cmp page faults"
+>;
+
+def FeatureInstFwdPrefetchBug : SubtargetFeature<"inst-fwd-prefetch-bug",
+  "HasInstFwdPrefetchBug",
+  "true",
+  "S_INST_PREFETCH instruction causes shader to hang"
+>;
+
+def FeatureVcmpxExecWARHazard : SubtargetFeature<"vcmpx-exec-war-hazard",
+  "HasVcmpxExecWARHazard",
+  "true",
+  "V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)"
+>;
+
+def FeatureLdsBranchVmemWARHazard : SubtargetFeature<"lds-branch-vmem-war-hazard",
+  "HasLdsBranchVmemWARHazard",
+  "true",
+  "Switching between LDS and VMEM-tex not waiting VM_VSRC=0"
+>;
+
+def FeatureNSAtoVMEMBug : SubtargetFeature<"nsa-to-vmem-bug",
+  "HasNSAtoVMEMBug",
+  "true",
+  "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero"
+>;
+
+def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug",
+  "HasFlatSegmentOffsetBug",
+  "true",
+  "GFX10 bug, inst_offset ignored in flat segment"
+>;
+
+def FeatureOffset3fBug : SubtargetFeature<"offset-3f-bug",
+  "HasOffset3fBug",
+  "true",
+  "Branch offset of 3f hardware bug"
+>;
+
 class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
   "ldsbankcount"#Value,
   "LDSBankCount",
@@ -144,10 +221,10 @@ def FeatureCIInsts : SubtargetFeature<"ci-insts",
   "Additional instructions for CI+"
 >;
 
-def FeatureVIInsts : SubtargetFeature<"vi-insts",
-  "VIInsts",
+def FeatureGFX8Insts : SubtargetFeature<"gfx8-insts",
+  "GFX8Insts",
   "true",
-  "Additional instructions for VI+"
+  "Additional instructions for GFX8+"
 >;
 
 def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts",
@@ -156,6 +233,18 @@ def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts",
   "Additional instructions for GFX9+"
 >;
 
+def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
+  "GFX10Insts",
+  "true",
+  "Additional instructions for GFX10+"
+>;
+
+def FeatureGFX7GFX8GFX9Insts : SubtargetFeature<"gfx7-gfx8-gfx9-insts",
+  "GFX7GFX8GFX9Insts",
+  "true",
+  "Instructions shared in GFX7, GFX8, GFX9"
+>;
+
 def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime",
   "HasSMemRealTime",
   "true",
@@ -246,12 +335,25 @@ def FeatureDPP : SubtargetFeature<"dpp",
   "Support DPP (Data Parallel Primitives) extension"
 >;
 
+// DPP8 allows arbitrary cross-lane swizzling withing groups of 8 lanes.
+def FeatureDPP8 : SubtargetFeature<"dpp8",
+  "HasDPP8",
+  "true",
+  "Support DPP8 (Data Parallel Primitives) extension"
+>;
+
 def FeatureR128A16 : SubtargetFeature<"r128-a16",
   "HasR128A16",
   "true",
   "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
 >;
 
+def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding",
+  "HasNSAEncoding",
+  "true",
+  "Support NSA encoding for image instructions"
+>;
+
 def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
   "HasIntClamp",
   "true",
@@ -270,10 +372,65 @@ def FeatureDLInsts : SubtargetFeature<"dl-insts",
   "Has v_fmac_f32 and v_xnor_b32 instructions"
 >;
 
-def FeatureDotInsts : SubtargetFeature<"dot-insts",
-  "HasDotInsts",
+def FeatureDot1Insts : SubtargetFeature<"dot1-insts",
+  "HasDot1Insts",
+  "true",
+  "Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions"
+>;
+
+def FeatureDot2Insts : SubtargetFeature<"dot2-insts",
+  "HasDot2Insts",
+  "true",
+  "Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
+>;
+
+def FeatureDot3Insts : SubtargetFeature<"dot3-insts",
+  "HasDot3Insts",
+  "true",
+  "Has v_dot8c_i32_i4 instruction"
+>;
+
+def FeatureDot4Insts : SubtargetFeature<"dot4-insts",
+  "HasDot4Insts",
+  "true",
+  "Has v_dot2c_i32_i16 instruction"
+>;
+
+def FeatureDot5Insts : SubtargetFeature<"dot5-insts",
+  "HasDot5Insts",
   "true",
-  "Has v_dot* instructions"
+  "Has v_dot2c_f32_f16 instruction"
+>;
+
+def FeatureDot6Insts : SubtargetFeature<"dot6-insts",
+  "HasDot6Insts",
+  "true",
+  "Has v_dot4c_i32_i8 instruction"
+>;
+
+def FeatureMAIInsts : SubtargetFeature<"mai-insts",
+  "HasMAIInsts",
+  "true",
+  "Has mAI instructions"
+>;
+
+def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
+  "HasPkFmacF16Inst",
+  "true",
+  "Has v_pk_fmac_f16 instruction"
+>;
+
+def FeatureAtomicFaddInsts : SubtargetFeature<"atomic-fadd-insts",
+  "HasAtomicFaddInsts",
+  "true",
+  "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, "
+  "global_atomic_pk_add_f16 instructions"
+>;
+
+def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support",
+  "DoesNotSupportSRAMECC",
+  "true",
+  "Hardware does not support SRAM ECC"
 >;
 
 def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
@@ -282,6 +439,36 @@ def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
   "Enable SRAM ECC"
 >;
 
+def FeatureNoSdstCMPX : SubtargetFeature<"no-sdst-cmpx",
+  "HasNoSdstCMPX",
+  "true",
+  "V_CMPX does not write VCC/SGPR in addition to EXEC"
+>;
+
+def FeatureVscnt : SubtargetFeature<"vscnt",
+  "HasVscnt",
+  "true",
+  "Has separate store vscnt counter"
+>;
+
+def FeatureRegisterBanking : SubtargetFeature<"register-banking",
+  "HasRegisterBanking",
+  "true",
+  "Has register banking"
+>;
+
+def FeatureVOP3Literal : SubtargetFeature<"vop3-literal",
+  "HasVOP3Literal",
+  "true",
+  "Can use one literal in VOP3"
+>;
+
+def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard",
+  "HasNoDataDepHazard",
+  "true",
+  "Does not need SW waitstates"
+>;
+
 //===------------------------------------------------------------===//
 // Subtarget Features (options and debugging)
 //===------------------------------------------------------------===//
@@ -327,13 +514,6 @@ def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>;
 def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>;
 def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>;
 
-def FeatureEnableHugePrivateBuffer : SubtargetFeature<
-  "huge-private-buffer",
-  "EnableHugePrivateBuffer",
-  "true",
-  "Enable private/scratch buffer sizes greater than 128 GB"
->;
-
 def FeatureDumpCode : SubtargetFeature <"DumpCode",
   "DumpCode",
   "true",
@@ -425,103 +605,123 @@ def FeatureDisable : SubtargetFeature<"",
   "Dummy feature to disable assembler instructions"
 >;
 
-def FeatureGCN : SubtargetFeature<"gcn",
-  "IsGCN",
-  "true",
-  "GCN or newer GPU"
->;
-
 class GCNSubtargetFeatureGeneration <string Value,
-                                  list<SubtargetFeature> Implies> :
-        SubtargetFeatureGeneration <Value, "GCNSubtarget", Implies>;
+                                     string FeatureName,
+                                     list<SubtargetFeature> Implies> :
+        SubtargetFeatureGeneration <Value, FeatureName, "GCNSubtarget", Implies>;
 
 def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
+    "southern-islands",
   [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
-  FeatureWavefrontSize64, FeatureGCN,
-  FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange]
+  FeatureWavefrontSize64,
+  FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange,
+  FeatureDoesNotSupportSRAMECC, FeatureDoesNotSupportXNACK]
 >;
 
 def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
+    "sea-islands",
   [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
-  FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
-  FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange]
+  FeatureWavefrontSize64, FeatureFlatAddressSpace,
+  FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
+  FeatureGFX7GFX8GFX9Insts, FeatureDoesNotSupportSRAMECC]
 >;
 
 def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
+  "volcanic-islands",
   [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
-   FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
-   FeatureGCN3Encoding, FeatureCIInsts, FeatureVIInsts, Feature16BitInsts,
+   FeatureWavefrontSize64, FeatureFlatAddressSpace,
+   FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
    FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
    FeatureScalarStores, FeatureInv2PiInlineImm,
    FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
-   FeatureIntClamp, FeatureTrigReducedRange
+   FeatureIntClamp, FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC,
+   FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts
   ]
 >;
 
 def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
+  "gfx9",
   [FeatureFP64, FeatureLocalMemorySize65536,
-   FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
-   FeatureGCN3Encoding, FeatureCIInsts, FeatureVIInsts, Feature16BitInsts,
+   FeatureWavefrontSize64, FeatureFlatAddressSpace,
+   FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
    FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
    FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
    FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
    FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
    FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
-   FeatureAddNoCarryInsts, FeatureScalarAtomics, FeatureR128A16
+   FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
+   FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16
   ]
 >;
 
-class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping,
-                                  list<SubtargetFeature> Implies>
-                                 : SubtargetFeature <
-  "isaver"#Major#"."#Minor#"."#Stepping,
-  "IsaVersion",
-  "ISAVersion"#Major#"_"#Minor#"_"#Stepping,
-  "Instruction set version number",
-  Implies
+def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
+  "gfx10",
+  [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
+   FeatureFlatAddressSpace,
+   FeatureCIInsts, Feature16BitInsts,
+   FeatureSMemRealTime, FeatureInv2PiInlineImm,
+   FeatureApertureRegs, FeatureGFX9Insts, FeatureGFX10Insts, FeatureVOP3P,
+   FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
+   FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
+   FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
+   FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts,
+   FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
+   FeatureVOP3Literal, FeatureDPP8,
+   FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC
+  ]
 >;
 
-def FeatureISAVersion6_0_0 : SubtargetFeatureISAVersion <6,0,0,
-  [FeatureSouthernIslands,
+class FeatureSet<list<SubtargetFeature> Features_> {
+  list<SubtargetFeature> Features = Features_;
+}
+
+def FeatureISAVersion6_0_0 : FeatureSet<[FeatureSouthernIslands,
    FeatureFastFMAF32,
    HalfRate64Ops,
    FeatureLDSBankCount32,
+   FeatureDoesNotSupportXNACK,
    FeatureCodeObjectV3]>;
 
-def FeatureISAVersion6_0_1 : SubtargetFeatureISAVersion <6,0,1,
+def FeatureISAVersion6_0_1 : FeatureSet<
   [FeatureSouthernIslands,
    FeatureLDSBankCount32,
+   FeatureDoesNotSupportXNACK,
    FeatureCodeObjectV3]>;
 
-def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0,
+def FeatureISAVersion7_0_0 : FeatureSet<
   [FeatureSeaIslands,
    FeatureLDSBankCount32,
+   FeatureDoesNotSupportXNACK,
    FeatureCodeObjectV3]>;
 
-def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1,
+def FeatureISAVersion7_0_1 : FeatureSet<
   [FeatureSeaIslands,
    HalfRate64Ops,
    FeatureLDSBankCount32,
    FeatureFastFMAF32,
+   FeatureDoesNotSupportXNACK,
    FeatureCodeObjectV3]>;
 
-def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2,
+def FeatureISAVersion7_0_2 : FeatureSet<
   [FeatureSeaIslands,
    FeatureLDSBankCount16,
    FeatureFastFMAF32,
+   FeatureDoesNotSupportXNACK,
    FeatureCodeObjectV3]>;
 
-def FeatureISAVersion7_0_3 : SubtargetFeatureISAVersion <7,0,3,
+def FeatureISAVersion7_0_3 : FeatureSet<
   [FeatureSeaIslands,
    FeatureLDSBankCount16,
+   FeatureDoesNotSupportXNACK,
    FeatureCodeObjectV3]>;
 
-def FeatureISAVersion7_0_4 : SubtargetFeatureISAVersion <7,0,4,
+def FeatureISAVersion7_0_4 : FeatureSet<
   [FeatureSeaIslands,
    FeatureLDSBankCount32,
+   FeatureDoesNotSupportXNACK,
    FeatureCodeObjectV3]>;
 
-def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1,
+def FeatureISAVersion8_0_1 : FeatureSet<
   [FeatureVolcanicIslands,
    FeatureFastFMAF32,
    HalfRate64Ops,
@@ -530,78 +730,151 @@ def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1,
    FeatureUnpackedD16VMem,
    FeatureCodeObjectV3]>;
 
-def FeatureISAVersion8_0_2 : SubtargetFeatureISAVersion <8,0,2,
+def FeatureISAVersion8_0_2 : FeatureSet<
   [FeatureVolcanicIslands,
    FeatureLDSBankCount32,
    FeatureSGPRInitBug,
    FeatureUnpackedD16VMem,
+   FeatureDoesNotSupportXNACK,
    FeatureCodeObjectV3]>;
 
-def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3,
+def FeatureISAVersion8_0_3 : FeatureSet<
   [FeatureVolcanicIslands,
    FeatureLDSBankCount32,
    FeatureUnpackedD16VMem,
+   FeatureDoesNotSupportXNACK,
    FeatureCodeObjectV3]>;
 
-def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
+def FeatureISAVersion8_1_0 : FeatureSet<
   [FeatureVolcanicIslands,
    FeatureLDSBankCount16,
    FeatureXNACK,
    FeatureCodeObjectV3]>;
 
-def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,
+def FeatureISAVersion9_0_0 : FeatureSet<
   [FeatureGFX9,
    FeatureMadMixInsts,
    FeatureLDSBankCount32,
-   FeatureCodeObjectV3]>;
+   FeatureCodeObjectV3,
+   FeatureDoesNotSupportXNACK,
+   FeatureDoesNotSupportSRAMECC]>;
 
-def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2,
+def FeatureISAVersion9_0_2 : FeatureSet<
   [FeatureGFX9,
    FeatureMadMixInsts,
    FeatureLDSBankCount32,
    FeatureXNACK,
+   FeatureDoesNotSupportSRAMECC,
    FeatureCodeObjectV3]>;
 
-def FeatureISAVersion9_0_4 : SubtargetFeatureISAVersion <9,0,4,
+def FeatureISAVersion9_0_4 : FeatureSet<
   [FeatureGFX9,
    FeatureLDSBankCount32,
    FeatureFmaMixInsts,
+   FeatureDoesNotSupportXNACK,
+   FeatureDoesNotSupportSRAMECC,
    FeatureCodeObjectV3]>;
 
-def FeatureISAVersion9_0_6 : SubtargetFeatureISAVersion <9,0,6,
+def FeatureISAVersion9_0_6 : FeatureSet<
   [FeatureGFX9,
    HalfRate64Ops,
    FeatureFmaMixInsts,
    FeatureLDSBankCount32,
    FeatureDLInsts,
-   FeatureDotInsts,
+   FeatureDot1Insts,
+   FeatureDot2Insts,
+   FeatureDoesNotSupportXNACK,
+   FeatureCodeObjectV3]>;
+
+def FeatureISAVersion9_0_8 : FeatureSet<
+  [FeatureGFX9,
+   HalfRate64Ops,
+   FeatureFmaMixInsts,
+   FeatureLDSBankCount32,
+   FeatureDLInsts,
+   FeatureDot1Insts,
+   FeatureDot2Insts,
+   FeatureDot3Insts,
+   FeatureDot4Insts,
+   FeatureDot5Insts,
+   FeatureDot6Insts,
+   FeatureMAIInsts,
+   FeaturePkFmacF16Inst,
+   FeatureAtomicFaddInsts,
    FeatureSRAMECC,
    FeatureCodeObjectV3]>;
 
-def FeatureISAVersion9_0_9 : SubtargetFeatureISAVersion <9,0,9,
+def FeatureISAVersion9_0_9 : FeatureSet<
   [FeatureGFX9,
    FeatureMadMixInsts,
    FeatureLDSBankCount32,
    FeatureXNACK,
    FeatureCodeObjectV3]>;
 
-//===----------------------------------------------------------------------===//
-// Debugger related subtarget features.
-//===----------------------------------------------------------------------===//
-
-def FeatureDebuggerInsertNops : SubtargetFeature<
-  "amdgpu-debugger-insert-nops",
-  "DebuggerInsertNops",
-  "true",
-  "Insert one nop instruction for each high level source statement"
->;
+// TODO: Organize more features into groups.
+def FeatureGroup {
+  // Bugs present on gfx10.1.
+  list<SubtargetFeature> GFX10_1_Bugs = [
+    FeatureVcmpxPermlaneHazard,
+    FeatureVMEMtoScalarWriteHazard,
+    FeatureSMEMtoVectorWriteHazard,
+    FeatureInstFwdPrefetchBug,
+    FeatureVcmpxExecWARHazard,
+    FeatureLdsBranchVmemWARHazard,
+    FeatureNSAtoVMEMBug,
+    FeatureOffset3fBug,
+    FeatureFlatSegmentOffsetBug
+   ];
+}
 
-def FeatureDebuggerEmitPrologue : SubtargetFeature<
-  "amdgpu-debugger-emit-prologue",
-  "DebuggerEmitPrologue",
-  "true",
-  "Emit debugger prologue"
->;
+def FeatureISAVersion10_1_0 : FeatureSet<
+  !listconcat(FeatureGroup.GFX10_1_Bugs,
+    [FeatureGFX10,
+     FeatureLDSBankCount32,
+     FeatureDLInsts,
+     FeatureNSAEncoding,
+     FeatureWavefrontSize32,
+     FeatureScalarStores,
+     FeatureScalarAtomics,
+     FeatureScalarFlatScratchInsts,
+     FeatureLdsMisalignedBug,
+     FeatureDoesNotSupportXNACK,
+     FeatureCodeObjectV3])>;
+
+def FeatureISAVersion10_1_1 : FeatureSet<
+  !listconcat(FeatureGroup.GFX10_1_Bugs,
+    [FeatureGFX10,
+     FeatureLDSBankCount32,
+     FeatureDLInsts,
+     FeatureDot1Insts,
+     FeatureDot2Insts,
+     FeatureDot5Insts,
+     FeatureDot6Insts,
+     FeatureNSAEncoding,
+     FeatureWavefrontSize32,
+     FeatureScalarStores,
+     FeatureScalarAtomics,
+     FeatureScalarFlatScratchInsts,
+     FeatureDoesNotSupportXNACK,
+     FeatureCodeObjectV3])>;
+
+def FeatureISAVersion10_1_2 : FeatureSet<
+  !listconcat(FeatureGroup.GFX10_1_Bugs,
+    [FeatureGFX10,
+     FeatureLDSBankCount32,
+     FeatureDLInsts,
+     FeatureDot1Insts,
+     FeatureDot2Insts,
+     FeatureDot5Insts,
+     FeatureDot6Insts,
+     FeatureNSAEncoding,
+     FeatureWavefrontSize32,
+     FeatureScalarStores,
+     FeatureScalarAtomics,
+     FeatureScalarFlatScratchInsts,
+     FeatureLdsMisalignedBug,
+     FeatureDoesNotSupportXNACK,
+     FeatureCodeObjectV3])>;
 
 //===----------------------------------------------------------------------===//
 
@@ -682,23 +955,71 @@ def NullALU : InstrItinClass;
 // Predicate helper class
 //===----------------------------------------------------------------------===//
 
-def isSICI : Predicate<
-  "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
-  "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
->, AssemblerPredicate<"!FeatureGCN3Encoding">;
+def isGFX6 :
+  Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS">,
+  AssemblerPredicate<"FeatureSouthernIslands">;
+
+def isGFX6GFX7 :
+  Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
+            "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
+  AssemblerPredicate<"!FeatureGCN3Encoding,!FeatureGFX10Insts">;
+
+def isGFX6GFX7GFX10 :
+  Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
+            "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
+            "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
+  AssemblerPredicate<"!FeatureGCN3Encoding">;
+
+def isGFX7Only :
+  Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
+  AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts,!FeatureGFX10Insts">;
+
+def isGFX7GFX10 :
+  Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
+            "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
+  AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts">;
+
+def isGFX7GFX8GFX9 :
+  Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
+            "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
+            "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
+  AssemblerPredicate<"FeatureGFX7GFX8GFX9Insts">;
+
+def isGFX6GFX7GFX8GFX9 :
+  Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
+            "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
+            "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
+            "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
+  AssemblerPredicate<"!FeatureGFX10Insts">;
+
+def isGFX7Plus :
+  Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
+  AssemblerPredicate<"FeatureCIInsts">;
+
+def isGFX8Plus :
+  Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
+  AssemblerPredicate<"FeatureGFX8Insts">;
 
-def isVI : Predicate <
-  "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
-  AssemblerPredicate<"FeatureGCN3Encoding">;
+def isGFX8Only : Predicate<"Subtarget->getGeneration() =="
+                           "AMDGPUSubtarget::VOLCANIC_ISLANDS">,
+  AssemblerPredicate <"FeatureVolcanicIslands">;
 
-def isGFX9 : Predicate <
-  "Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
+def isGFX9Plus :
+  Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
   AssemblerPredicate<"FeatureGFX9Insts">;
 
-// TODO: Either the name to be changed or we simply use IsCI!
-def isCIVI : Predicate <
-  "Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
-  AssemblerPredicate<"FeatureCIInsts">;
+def isGFX9Only : Predicate <
+  "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
+  AssemblerPredicate<"FeatureGCN3Encoding,FeatureGFX9Insts">;
+
+def isGFX8GFX9 :
+  Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
+            "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
+  AssemblerPredicate<"FeatureGFX8Insts,FeatureGCN3Encoding">;
+
+def isGFX10Plus :
+  Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">,
+  AssemblerPredicate<"FeatureGFX10Insts">;
 
 def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
   AssemblerPredicate<"FeatureFlatAddressSpace">;
@@ -707,6 +1028,8 @@ def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">,
   AssemblerPredicate<"FeatureFlatGlobalInsts">;
 def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">,
   AssemblerPredicate<"FeatureFlatScratchInsts">;
+def HasScalarFlatScratchInsts : Predicate<"Subtarget->hasScalarFlatScratchInsts()">,
+  AssemblerPredicate<"FeatureScalarFlatScratchInsts">;
 def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">,
   AssemblerPredicate<"FeatureGFX9Insts">;
 
@@ -716,7 +1039,7 @@ def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">,
   AssemblerPredicate<"!FeatureUnpackedD16VMem">;
 
 def D16PreservesUnusedBits :
-  Predicate<"Subtarget->hasD16LoadStore() && !Subtarget->isSRAMECCEnabled()">,
+  Predicate<"Subtarget->d16PreservesUnusedBits()">,
   AssemblerPredicate<"FeatureGFX9Insts,!FeatureSRAMECC">;
 
 def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">;
@@ -728,38 +1051,54 @@ def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9
 def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">,
   AssemblerPredicate<"FeatureAddNoCarryInsts">;
 
-def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">,
-  AssemblerPredicate<"!FeatureAddNoCarryInsts">;
+def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">;
 
 def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
   AssemblerPredicate<"Feature16BitInsts">;
 def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
   AssemblerPredicate<"FeatureVOP3P">;
 
-def NotHasVOP3PInsts : Predicate<"!Subtarget->hasVOP3PInsts()">,
-  AssemblerPredicate<"!FeatureVOP3P">;
-
 def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
   AssemblerPredicate<"FeatureSDWA,FeatureVolcanicIslands">;
 
-def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">,
-  AssemblerPredicate<"FeatureSDWA,FeatureGFX9">;
+def HasSDWA9 :
+  Predicate<"Subtarget->hasSDWA()">,
+  AssemblerPredicate<"FeatureGCN3Encoding,FeatureGFX9Insts,FeatureSDWA">;
+
+def HasSDWA10 :
+  Predicate<"Subtarget->hasSDWA()">,
+  AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureSDWA">;
 
 def HasDPP : Predicate<"Subtarget->hasDPP()">,
-  AssemblerPredicate<"FeatureDPP">;
+  AssemblerPredicate<"FeatureGCN3Encoding,FeatureDPP">;
+
+def HasDPP8 : Predicate<"Subtarget->hasDPP8()">,
+  AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP8">;
 
 def HasR128A16 : Predicate<"Subtarget->hasR128A16()">,
   AssemblerPredicate<"FeatureR128A16">;
 
+def HasDPP16 : Predicate<"Subtarget->hasDPP()">,
+  AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP">;
+
 def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
   AssemblerPredicate<"FeatureIntClamp">;
 
 def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">,
   AssemblerPredicate<"FeatureMadMixInsts">;
 
+def HasScalarStores : Predicate<"Subtarget->hasScalarStores()">,
+  AssemblerPredicate<"FeatureScalarStores">;
+
 def HasScalarAtomics : Predicate<"Subtarget->hasScalarAtomics()">,
   AssemblerPredicate<"FeatureScalarAtomics">;
 
+def HasNoSdstCMPX : Predicate<"Subtarget->hasNoSdstCMPX()">,
+  AssemblerPredicate<"FeatureNoSdstCMPX">;
+
+def HasSdstCMPX : Predicate<"!Subtarget->hasNoSdstCMPX()">,
+  AssemblerPredicate<"!FeatureNoSdstCMPX">;
+
 def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
 def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
 def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">,
@@ -773,9 +1112,35 @@ def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">,
 def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">,
   AssemblerPredicate<"FeatureDLInsts">;
 
-def HasDotInsts : Predicate<"Subtarget->hasDotInsts()">,
-  AssemblerPredicate<"FeatureDotInsts">;
+def HasDot1Insts : Predicate<"Subtarget->hasDot1Insts()">,
+  AssemblerPredicate<"FeatureDot1Insts">;
+
+def HasDot2Insts : Predicate<"Subtarget->hasDot2Insts()">,
+  AssemblerPredicate<"FeatureDot2Insts">;
+
+def HasDot3Insts : Predicate<"Subtarget->hasDot3Insts()">,
+  AssemblerPredicate<"FeatureDot3Insts">;
+
+def HasDot4Insts : Predicate<"Subtarget->hasDot4Insts()">,
+  AssemblerPredicate<"FeatureDot4Insts">;
+
+def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">,
+  AssemblerPredicate<"FeatureDot5Insts">;
+
+def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">,
+  AssemblerPredicate<"FeatureDot6Insts">;
+
+def HasMAIInsts : Predicate<"Subtarget->hasMAIInsts()">,
+  AssemblerPredicate<"FeatureMAIInsts">;
+
+def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">,
+  AssemblerPredicate<"FeaturePkFmacF16Inst">;
+
+def HasAtomicFaddInsts : Predicate<"Subtarget->hasAtomicFaddInsts()">,
+  AssemblerPredicate<"FeatureAtomicFaddInsts">;
 
+def HasOffset3fBug : Predicate<"!Subtarget->hasOffset3fBug()">,
+  AssemblerPredicate<"FeatureOffset3fBug">;
 
 def EnableLateCFGStructurize : Predicate<
   "EnableLateStructurizeCFG">;
@@ -784,7 +1149,6 @@ def EnableLateCFGStructurize : Predicate<
 include "SISchedule.td"
 include "GCNProcessors.td"
 include "AMDGPUInstrInfo.td"
-include "SIIntrinsics.td"
 include "AMDGPURegisterInfo.td"
 include "AMDGPURegisterBanks.td"
 include "AMDGPUInstructions.td"
diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
index 73709ba13643..bba132c3bc46 100644
--- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPUAliasAnalysis ------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -54,20 +53,21 @@ void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
 }
 
-// These arrays are indexed by address space value enum elements 0 ... to 6
-static const AliasResult ASAliasRules[7][7] = {
-  /*                    Flat       Global    Region    Group     Constant  Private   Constant 32-bit */
-  /* Flat     */        {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
-  /* Global   */        {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias},
-  /* Region   */        {MayAlias, NoAlias , NoAlias , NoAlias,  MayAlias, NoAlias , MayAlias},
-  /* Group    */        {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias , NoAlias},
-  /* Constant */        {MayAlias, MayAlias, MayAlias, NoAlias , NoAlias,  NoAlias , MayAlias},
-  /* Private  */        {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
-  /* Constant 32-bit */ {MayAlias, MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , NoAlias}
+// These arrays are indexed by address space value enum elements 0 ... to 7
+static const AliasResult ASAliasRules[8][8] = {
+  /*                    Flat       Global    Region    Group     Constant  Private   Constant 32-bit  Buffer Fat Ptr */
+  /* Flat     */        {MayAlias, MayAlias, NoAlias,  MayAlias, MayAlias, MayAlias, MayAlias,        MayAlias},
+  /* Global   */        {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias,        MayAlias},
+  /* Region   */        {NoAlias,  NoAlias , MayAlias, NoAlias , NoAlias,  NoAlias , NoAlias,         NoAlias},
+  /* Group    */        {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias , NoAlias ,        NoAlias},
+  /* Constant */        {MayAlias, MayAlias, NoAlias,  NoAlias , NoAlias , NoAlias , MayAlias,        MayAlias},
+  /* Private  */        {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, NoAlias ,        NoAlias},
+  /* Constant 32-bit */ {MayAlias, MayAlias, NoAlias,  NoAlias , MayAlias, NoAlias , NoAlias ,        MayAlias},
+  /* Buffer Fat Ptr  */ {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias,        MayAlias}
 };
 
 static AliasResult getAliasResult(unsigned AS1, unsigned AS2) {
-  static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 6, "Addr space out of range");
+  static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 7, "Addr space out of range");
 
   if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS)
     return MayAlias;
@@ -76,7 +76,8 @@ static AliasResult getAliasResult(unsigned AS1, unsigned AS2) {
 }
 
 AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
-                                  const MemoryLocation &LocB) {
+                                  const MemoryLocation &LocB,
+                                  AAQueryInfo &AAQI) {
   unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace();
   unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace();
 
@@ -85,11 +86,11 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
     return Result;
 
   // Forward the query to the next alias analysis.
-  return AAResultBase::alias(LocA, LocB);
+  return AAResultBase::alias(LocA, LocB, AAQI);
 }
 
 bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
-                                            bool OrLocal) {
+                                            AAQueryInfo &AAQI, bool OrLocal) {
   const Value *Base = GetUnderlyingObject(Loc.Ptr, DL);
   unsigned AS = Base->getType()->getPointerAddressSpace();
   if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
@@ -106,7 +107,7 @@ bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
     // Only assume constant memory for arguments on kernels.
     switch (F->getCallingConv()) {
     default:
-      return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+      return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
     case CallingConv::AMDGPU_LS:
     case CallingConv::AMDGPU_HS:
     case CallingConv::AMDGPU_ES:
@@ -133,5 +134,5 @@ bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
       return true;
     }
   }
-  return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+  return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
 }
diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
index d76c9fc48199..fb722920900f 100644
--- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
+++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
@@ -1,9 +1,8 @@
 //===- AMDGPUAliasAnalysis --------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -45,8 +44,10 @@ public:
   /// By definition, this result is stateless and so remains valid.
   bool invalidate(Function &, const PreservedAnalyses &) { return false; }
 
-  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
-  bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+                    AAQueryInfo &AAQI);
+  bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+                              bool OrLocal);
 
 private:
   bool Aliases(const MDNode *A, const MDNode *B) const;
diff --git a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index fc65430b745f..4c1dbd4c5304 100644
--- a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index 896ac9c87779..419ebb2240ad 100644
--- a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -46,8 +45,11 @@ namespace {
 class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
 private:
   const TargetMachine *TM = nullptr;
+  SmallVector<CallGraphNode*, 8> NodeList;
 
   bool addFeatureAttributes(Function &F);
+  bool processUniformWorkGroupAttribute();
+  bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee);
 
 public:
   static char ID;
@@ -186,7 +188,6 @@ static bool handleAttr(Function &Parent, const Function &Callee,
     Parent.addFnAttr(Name);
     return true;
   }
-
   return false;
 }
 
@@ -213,6 +214,56 @@ static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
     handleAttr(Parent, Callee, AttrName);
 }
 
+bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
+  bool Changed = false;
+
+  for (auto *Node : reverse(NodeList)) {
+    Function *Caller = Node->getFunction();
+
+    for (auto I : *Node) {
+      Function *Callee = std::get<1>(I)->getFunction();
+      if (Callee)
+        Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
+    }
+  }
+
+  return Changed;
+}
+
+bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
+       Function &Caller, Function &Callee) {
+
+  // Check for externally defined function
+  if (!Callee.hasExactDefinition()) {
+    Callee.addFnAttr("uniform-work-group-size", "false");
+    if (!Caller.hasFnAttribute("uniform-work-group-size"))
+      Caller.addFnAttr("uniform-work-group-size", "false");
+
+    return true;
+  }
+  // Check if the Caller has the attribute
+  if (Caller.hasFnAttribute("uniform-work-group-size")) {
+    // Check if the value of the attribute is true
+    if (Caller.getFnAttribute("uniform-work-group-size")
+        .getValueAsString().equals("true")) {
+      // Propagate the attribute to the Callee, if it does not have it
+      if (!Callee.hasFnAttribute("uniform-work-group-size")) {
+        Callee.addFnAttr("uniform-work-group-size", "true");
+        return true;
+      }
+    } else {
+      Callee.addFnAttr("uniform-work-group-size", "false");
+      return true;
+    }
+  } else {
+    // If the attribute is absent, set it as false
+    Caller.addFnAttr("uniform-work-group-size", "false");
+    Callee.addFnAttr("uniform-work-group-size", "false");
+    return true;
+  }
+  return false;
+}
+
 bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
   const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
   bool HasFlat = ST.hasFlatAddressSpace();
@@ -293,15 +344,21 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
 }
 
 bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
-  Module &M = SCC.getCallGraph().getModule();
-  Triple TT(M.getTargetTriple());
-
   bool Changed = false;
+
   for (CallGraphNode *I : SCC) {
+    // Build a list of CallGraphNodes from most number of uses to least
+    if (I->getNumReferences())
+      NodeList.push_back(I);
+    else {
+      processUniformWorkGroupAttribute();
+      NodeList.clear();
+    }
+
     Function *F = I->getFunction();
+    // Add feature attributes
     if (!F || F->isDeclaration())
       continue;
-
     Changed |= addFeatureAttributes(*F);
   }
 
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index f88e3b0dac86..71121ade0a49 100644
--- a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,7 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
-#include "AMDGPUIntrinsicInfo.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
diff --git a/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index 7465cf22b5a4..99a01ca3a2fd 100644
--- a/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -1,15 +1,15 @@
 //===----------------------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
 #include "AMDGPUArgumentUsageInfo.h"
 #include "SIRegisterInfo.h"
+#include "llvm/Support/NativeFormatting.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -27,9 +27,16 @@ void ArgDescriptor::print(raw_ostream &OS,
   }
 
   if (isRegister())
-    OS << "Reg " << printReg(getRegister(), TRI) << '\n';
+    OS << "Reg " << printReg(getRegister(), TRI);
   else
-    OS << "Stack offset " << getStackOffset() << '\n';
+    OS << "Stack offset " << getStackOffset();
+
+  if (isMasked()) {
+    OS << " & ";
+    llvm::write_hex(OS, Mask, llvm::HexPrintStyle::PrefixLower);
+  }
+
+  OS << '\n';
 }
 
 char AMDGPUArgumentUsageInfo::ID = 0;
diff --git a/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index f0e6d1b83f15..097730441ed8 100644
--- a/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -1,9 +1,8 @@
 //==- AMDGPUArgumentrUsageInfo.h - Function Arg Usage Info -------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -11,6 +10,7 @@
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUARGUMENTUSAGEINFO_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/Register.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Pass.h"
 
@@ -29,22 +29,31 @@ private:
   friend class AMDGPUArgumentUsageInfo;
 
   union {
-    unsigned Register;
+    Register Reg;
     unsigned StackOffset;
   };
 
+  // Bitmask to locate argument within the register.
+  unsigned Mask;
+
   bool IsStack : 1;
   bool IsSet : 1;
 
-  ArgDescriptor(unsigned Val = 0, bool IsStack = false, bool IsSet = false)
-    : Register(Val), IsStack(IsStack), IsSet(IsSet) {}
 public:
-  static ArgDescriptor createRegister(unsigned Reg) {
-    return ArgDescriptor(Reg, false, true);
+  ArgDescriptor(unsigned Val = 0, unsigned Mask = ~0u,
+                bool IsStack = false, bool IsSet = false)
+    : Reg(Val), Mask(Mask), IsStack(IsStack), IsSet(IsSet) {}
+
+  static ArgDescriptor createRegister(Register Reg, unsigned Mask = ~0u) {
+    return ArgDescriptor(Reg, Mask, false, true);
+  }
+
+  static ArgDescriptor createStack(Register Reg, unsigned Mask = ~0u) {
+    return ArgDescriptor(Reg, Mask, true, true);
   }
 
-  static ArgDescriptor createStack(unsigned Reg) {
-    return ArgDescriptor(Reg, true, true);
+  static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask) {
+    return ArgDescriptor(Arg.Reg, Mask, Arg.IsStack, Arg.IsSet);
   }
 
   bool isSet() const {
@@ -59,9 +68,9 @@ public:
     return !IsStack;
   }
 
-  unsigned getRegister() const {
+  Register getRegister() const {
     assert(!IsStack);
-    return Register;
+    return Reg;
   }
 
   unsigned getStackOffset() const {
@@ -69,6 +78,14 @@ public:
     return StackOffset;
   }
 
+  unsigned getMask() const {
+    return Mask;
+  }
+
+  bool isMasked() const {
+    return Mask != ~0u;
+  }
+
   void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const;
 };
 
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 2ded7cdb6489..743ac64b8f10 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPUAsmPrinter.cpp - AMDGPU assembly printer  -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -20,7 +19,7 @@
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "AMDGPUTargetMachine.h"
-#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUInstPrinter.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
 #include "R600AsmPrinter.h"
@@ -31,10 +30,12 @@
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
 #include "SIRegisterInfo.h"
+#include "TargetInfo/AMDGPUTargetInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
@@ -100,7 +101,7 @@ extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
 AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
                                    std::unique_ptr<MCStreamer> Streamer)
   : AsmPrinter(TM, std::move(Streamer)) {
-    if (IsaInfo::hasCodeObjectV3(getSTI()))
+    if (IsaInfo::hasCodeObjectV3(getGlobalSTI()))
       HSAMetadataStream.reset(new MetadataStreamerV3());
     else
       HSAMetadataStream.reset(new MetadataStreamerV2());
@@ -110,7 +111,7 @@ StringRef AMDGPUAsmPrinter::getPassName() const {
   return "AMDGPU Assembly Printer";
 }
 
-const MCSubtargetInfo* AMDGPUAsmPrinter::getSTI() const {
+const MCSubtargetInfo *AMDGPUAsmPrinter::getGlobalSTI() const {
   return TM.getMCSubtargetInfo();
 }
 
@@ -121,10 +122,10 @@ AMDGPUTargetStreamer* AMDGPUAsmPrinter::getTargetStreamer() const {
 }
 
 void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
-  if (IsaInfo::hasCodeObjectV3(getSTI())) {
+  if (IsaInfo::hasCodeObjectV3(getGlobalSTI())) {
     std::string ExpectedTarget;
     raw_string_ostream ExpectedTargetOS(ExpectedTarget);
-    IsaInfo::streamIsaVersion(getSTI(), ExpectedTargetOS);
+    IsaInfo::streamIsaVersion(getGlobalSTI(), ExpectedTargetOS);
 
     getTargetStreamer()->EmitDirectiveAMDGCNTarget(ExpectedTarget);
   }
@@ -137,9 +138,9 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
     HSAMetadataStream->begin(M);
 
   if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
-    readPALMetadata(M);
+    getTargetStreamer()->getPALMetadata()->readFromIR(M);
 
-  if (IsaInfo::hasCodeObjectV3(getSTI()))
+  if (IsaInfo::hasCodeObjectV3(getGlobalSTI()))
     return;
 
   // HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2.
@@ -147,7 +148,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
     getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
 
   // HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
-  IsaVersion Version = getIsaVersion(getSTI()->getCPU());
+  IsaVersion Version = getIsaVersion(getGlobalSTI()->getCPU());
   getTargetStreamer()->EmitDirectiveHSACodeObjectISA(
       Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
 }
@@ -157,11 +158,11 @@ void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
   if (!getTargetStreamer())
     return;
 
-  if (!IsaInfo::hasCodeObjectV3(getSTI())) {
+  if (!IsaInfo::hasCodeObjectV3(getGlobalSTI())) {
     // Emit ISA Version (NT_AMD_AMDGPU_ISA).
     std::string ISAVersionString;
     raw_string_ostream ISAVersionStream(ISAVersionString);
-    IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream);
+    IsaInfo::streamIsaVersion(getGlobalSTI(), ISAVersionStream);
     getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
   }
 
@@ -172,20 +173,6 @@ void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
     (void)Success;
     assert(Success && "Malformed HSA Metadata");
   }
-
-  if (!IsaInfo::hasCodeObjectV3(getSTI())) {
-    // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA).
-    if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
-      // Copy the PAL metadata from the map where we collected it into a vector,
-      // then write it as a .note.
-      PALMD::Metadata PALMetadataVector;
-      for (auto i : PALMetadataMap) {
-        PALMetadataVector.push_back(i.first);
-        PALMetadataVector.push_back(i.second);
-      }
-      getTargetStreamer()->EmitPALMetadata(PALMetadataVector);
-    }
-  }
 }
 
 bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
@@ -225,7 +212,8 @@ void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
   const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
   if (!MFI.isEntryFunction())
     return;
-  if (!IsaInfo::hasCodeObjectV3(getSTI()) ||
+
+  if (!IsaInfo::hasCodeObjectV3(getGlobalSTI()) ||
       TM.getTargetTriple().getOS() != Triple::AMDHSA)
     return;
 
@@ -243,23 +231,25 @@ void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
   if (ReadOnlySection.getAlignment() < 64)
     ReadOnlySection.setAlignment(64);
 
+  const MCSubtargetInfo &STI = MF->getSubtarget();
+
   SmallString<128> KernelName;
   getNameWithPrefix(KernelName, &MF->getFunction());
   getTargetStreamer()->EmitAmdhsaKernelDescriptor(
-      *getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
+      STI, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
       CurrentProgramInfo.NumVGPRsForWavesPerEU,
       CurrentProgramInfo.NumSGPRsForWavesPerEU -
-          IsaInfo::getNumExtraSGPRs(getSTI(),
+          IsaInfo::getNumExtraSGPRs(&STI,
                                     CurrentProgramInfo.VCCUsed,
                                     CurrentProgramInfo.FlatUsed),
       CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
-      hasXNACK(*getSTI()));
+      hasXNACK(STI));
 
   Streamer.PopSection();
 }
 
 void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
-  if (IsaInfo::hasCodeObjectV3(getSTI()) &&
+  if (IsaInfo::hasCodeObjectV3(getGlobalSTI()) &&
       TM.getTargetTriple().getOS() == Triple::AMDHSA) {
     AsmPrinter::EmitFunctionEntryLabel();
     return;
@@ -273,8 +263,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
     getTargetStreamer()->EmitAMDGPUSymbolType(
         SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
   }
-  const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>();
-  if (STI.dumpCode()) {
+  if (DumpCodeInstEmitter) {
     // Disassemble function name label to text.
     DisasmLines.push_back(MF->getName().str() + ":");
     DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLines.back().size());
@@ -285,8 +274,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
 }
 
 void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
-  const GCNSubtarget &STI = MBB.getParent()->getSubtarget<GCNSubtarget>();
-  if (STI.dumpCode() && !isBlockOnlyReachableByFallthrough(&MBB)) {
+  if (DumpCodeInstEmitter && !isBlockOnlyReachableByFallthrough(&MBB)) {
     // Write a line for the basic block label if it is not only fallthrough.
     DisasmLines.push_back(
         (Twine("BB") + Twine(getFunctionNumber())
@@ -298,38 +286,57 @@ void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
 }
 
 void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+  if (GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+    if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
+      OutContext.reportError({},
+                             Twine(GV->getName()) +
+                                 ": unsupported initializer for address space");
+      return;
+    }
+
+    // LDS variables aren't emitted in HSA or PAL yet.
+    const Triple::OSType OS = TM.getTargetTriple().getOS();
+    if (OS == Triple::AMDHSA || OS == Triple::AMDPAL)
+      return;
 
-  // Group segment variables aren't emitted in HSA.
-  if (AMDGPU::isGroupSegment(GV))
+    MCSymbol *GVSym = getSymbol(GV);
+
+    GVSym->redefineIfPossible();
+    if (GVSym->isDefined() || GVSym->isVariable())
+      report_fatal_error("symbol '" + Twine(GVSym->getName()) +
+                         "' is already defined");
+
+    const DataLayout &DL = GV->getParent()->getDataLayout();
+    uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
+    unsigned Align = GV->getAlignment();
+    if (!Align)
+      Align = 4;
+
+    EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
+    EmitLinkage(GV, GVSym);
+    if (auto TS = getTargetStreamer())
+      TS->emitAMDGPULDS(GVSym, Size, Align);
     return;
+  }
 
   AsmPrinter::EmitGlobalVariable(GV);
 }
 
 bool AMDGPUAsmPrinter::doFinalization(Module &M) {
   CallGraphResourceInfo.clear();
-  return AsmPrinter::doFinalization(M);
-}
 
-// For the amdpal OS type, read the amdgpu.pal.metadata supplied by the
-// frontend into our PALMetadataMap, ready for per-function modification.  It
-// is a NamedMD containing an MDTuple containing a number of MDNodes each of
-// which is an integer value, and each two integer values forms a key=value
-// pair that we store as PALMetadataMap[key]=value in the map.
-void AMDGPUAsmPrinter::readPALMetadata(Module &M) {
-  auto NamedMD = M.getNamedMetadata("amdgpu.pal.metadata");
-  if (!NamedMD || !NamedMD->getNumOperands())
-    return;
-  auto Tuple = dyn_cast<MDTuple>(NamedMD->getOperand(0));
-  if (!Tuple)
-    return;
-  for (unsigned I = 0, E = Tuple->getNumOperands() & -2; I != E; I += 2) {
-    auto Key = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I));
-    auto Val = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I + 1));
-    if (!Key || !Val)
-      continue;
-    PALMetadataMap[Key->getZExtValue()] = Val->getZExtValue();
+  // Pad with s_code_end to help tools and guard against instruction prefetch
+  // causing stale data in caches. Arguably this should be done by the linker,
+  // which is why this isn't done for Mesa.
+  const MCSubtargetInfo &STI = *getGlobalSTI();
+  if (AMDGPU::isGFX10(STI) &&
+      (STI.getTargetTriple().getOS() == Triple::AMDHSA ||
+       STI.getTargetTriple().getOS() == Triple::AMDPAL)) {
+    OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
+    getTargetStreamer()->EmitCodeEnd();
   }
+
+  return AsmPrinter::doFinalization(M);
 }
 
 // Print comments that apply to both callable functions and entry points.
@@ -376,6 +383,10 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
     KernelCodeProperties |=
         amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
   }
+  if (MF.getSubtarget<GCNSubtarget>().isWave32()) {
+    KernelCodeProperties |=
+        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
+  }
 
   return KernelCodeProperties;
 }
@@ -435,6 +446,18 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
     EmitProgramInfoSI(MF, CurrentProgramInfo);
   }
 
+  DumpCodeInstEmitter = nullptr;
+  if (STM.dumpCode()) {
+    // For -dumpcode, get the assembler out of the streamer, even if it does
+    // not really want to let us have it. This only works with -filetype=obj.
+    bool SaveFlag = OutStreamer->getUseAssemblerInfoForParsing();
+    OutStreamer->setUseAssemblerInfoForParsing(true);
+    MCAssembler *Assembler = OutStreamer->getAssemblerPtr();
+    OutStreamer->setUseAssemblerInfoForParsing(SaveFlag);
+    if (Assembler)
+      DumpCodeInstEmitter = Assembler->getEmitterPtr();
+  }
+
   DisasmLines.clear();
   HexLines.clear();
   DisasmLineMaxLen = 0;
@@ -486,15 +509,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
     OutStreamer->emitRawComment(
       " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
 
-    if (MF.getSubtarget<GCNSubtarget>().debuggerEmitPrologue()) {
-      OutStreamer->emitRawComment(
-        " DebuggerWavefrontPrivateSegmentOffsetSGPR: s" +
-        Twine(CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false);
-      OutStreamer->emitRawComment(
-        " DebuggerPrivateSegmentBufferSGPR: s" +
-        Twine(CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR), false);
-    }
-
     OutStreamer->emitRawComment(
       " COMPUTE_PGM_RSRC2:USER_SGPR: " +
       Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false);
@@ -516,7 +530,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
       false);
   }
 
-  if (STM.dumpCode()) {
+  if (DumpCodeInstEmitter) {
 
     OutStreamer->SwitchSection(
         Context.getELFSection(".AMDGPU.disasm", ELF::SHT_NOTE, 0));
@@ -620,6 +634,11 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
         HighestVGPRReg = Reg;
         break;
       }
+      MCPhysReg AReg = AMDGPU::AGPR0 + TRI.getHWRegIndex(Reg);
+      if (MRI.isPhysRegUsed(AReg)) {
+        HighestVGPRReg = AReg;
+        break;
+      }
     }
 
     MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
@@ -665,8 +684,12 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
         case AMDGPU::SRC_SHARED_LIMIT:
         case AMDGPU::SRC_PRIVATE_BASE:
         case AMDGPU::SRC_PRIVATE_LIMIT:
+        case AMDGPU::SGPR_NULL:
           continue;
 
+        case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
+          llvm_unreachable("src_pops_exiting_wave_id should not be used");
+
         case AMDGPU::NoRegister:
           assert(MI.isDebugInstr());
           continue;
@@ -687,6 +710,9 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
         case AMDGPU::XNACK_MASK_HI:
           llvm_unreachable("xnack_mask registers should not be used");
 
+        case AMDGPU::LDS_DIRECT:
+          llvm_unreachable("lds_direct register should not be used");
+
         case AMDGPU::TBA:
         case AMDGPU::TBA_LO:
         case AMDGPU::TBA_HI:
@@ -695,6 +721,15 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
         case AMDGPU::TMA_HI:
           llvm_unreachable("trap handler registers should not be used");
 
+        case AMDGPU::SRC_VCCZ:
+          llvm_unreachable("src_vccz register should not be used");
+
+        case AMDGPU::SRC_EXECZ:
+          llvm_unreachable("src_execz register should not be used");
+
+        case AMDGPU::SRC_SCC:
+          llvm_unreachable("src_scc register should not be used");
+
         default:
           break;
         }
@@ -707,6 +742,9 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
         } else if (AMDGPU::VGPR_32RegClass.contains(Reg)) {
           IsSGPR = false;
           Width = 1;
+        } else if (AMDGPU::AGPR_32RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 1;
         } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
           assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
                  "trap handler registers should not be used");
@@ -715,9 +753,14 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
         } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
           IsSGPR = false;
           Width = 2;
+        } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 2;
         } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
           IsSGPR = false;
           Width = 3;
+        } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
+          Width = 3;
         } else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
           assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
             "trap handler registers should not be used");
@@ -726,6 +769,9 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
         } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
           IsSGPR = false;
           Width = 4;
+        } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 4;
         } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
           assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
             "trap handler registers should not be used");
@@ -742,6 +788,18 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
         } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
           IsSGPR = false;
           Width = 16;
+        } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 16;
+        } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
+          IsSGPR = true;
+          Width = 32;
+        } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 32;
+        } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
+          IsSGPR = false;
+          Width = 32;
         } else {
           llvm_unreachable("Unknown register class");
         }
@@ -767,8 +825,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
 
           // 48 SGPRs - vcc, - flat_scr, -xnack
           int MaxSGPRGuess =
-              47 - IsaInfo::getNumExtraSGPRs(getSTI(), true,
-                                             ST.hasFlatAddressSpace());
+            47 - IsaInfo::getNumExtraSGPRs(&ST, true, ST.hasFlatAddressSpace());
           MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
           MaxVGPR = std::max(MaxVGPR, 23);
 
@@ -779,9 +836,19 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
         } else {
           // We force CodeGen to run in SCC order, so the callee's register
           // usage etc. should be the cumulative usage of all callees.
+
           auto I = CallGraphResourceInfo.find(Callee);
-          assert(I != CallGraphResourceInfo.end() &&
-                 "callee should have been handled before caller");
+          if (I == CallGraphResourceInfo.end()) {
+            // Avoid crashing on undefined behavior with an illegal call to a
+            // kernel. If a callsite's calling convention doesn't match the
+            // function's, it's undefined behavior. If the callsite calling
+            // convention does match, that would have errored earlier.
+            // FIXME: The verifier shouldn't allow this.
+            if (AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
+              report_fatal_error("invalid call to entry function");
+
+            llvm_unreachable("callee should have been handled before caller");
+          }
 
           MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
           MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
@@ -825,14 +892,12 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
 
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  const SIInstrInfo *TII = STM.getInstrInfo();
-  const SIRegisterInfo *RI = &TII->getRegisterInfo();
 
   // TODO(scott.linder): The calculations related to SGPR/VGPR blocks are
   // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
   // unified.
   unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
-      getSTI(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
+      &STM, ProgInfo.VCCUsed, ProgInfo.FlatUsed);
 
   // Check the addressable register limit before we add ExtraSGPRs.
   if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
@@ -918,24 +983,15 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
   ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
       &STM, ProgInfo.NumVGPRsForWavesPerEU);
 
-  // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
-  // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
-  // attribute was requested.
-  if (STM.debuggerEmitPrologue()) {
-    ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
-      RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
-    ProgInfo.DebuggerPrivateSegmentBufferSGPR =
-      RI->getHWRegIndex(MFI->getScratchRSrcReg());
-  }
-
   // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
   // register.
   ProgInfo.FloatMode = getFPMode(MF);
 
-  ProgInfo.IEEEMode = STM.enableIEEEBit(MF);
+  const SIModeRegisterDefaults Mode = MFI->getMode();
+  ProgInfo.IEEEMode = Mode.IEEE;
 
   // Make clamp modifier on NaN input returns 0.
-  ProgInfo.DX10Clamp = STM.enableDX10Clamp();
+  ProgInfo.DX10Clamp = Mode.DX10Clamp;
 
   unsigned LDSAlignShift;
   if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
@@ -963,6 +1019,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
               1ULL << ScratchAlignShift) >>
       ScratchAlignShift;
 
+  if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
+    ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
+    ProgInfo.MemOrdered = 1;
+  }
+
   ProgInfo.ComputePGMRSrc1 =
       S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
       S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
@@ -971,7 +1032,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
       S_00B848_PRIV(ProgInfo.Priv) |
       S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
       S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
-      S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
+      S_00B848_IEEE_MODE(ProgInfo.IEEEMode) |
+      S_00B848_WGP_MODE(ProgInfo.WgpMode) |
+      S_00B848_MEM_ORDERED(ProgInfo.MemOrdered);
 
   // 0 = X, 1 = XY, 2 = XYZ
   unsigned TIDIGCompCnt = 0;
@@ -1053,71 +1116,38 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
 
 // This is the equivalent of EmitProgramInfoSI above, but for when the OS type
 // is AMDPAL.  It stores each compute/SPI register setting and other PAL
-// metadata items into the PALMetadataMap, combining with any provided by the
-// frontend as LLVM metadata. Once all functions are written, PALMetadataMap is
-// then written as a single block in the .note section.
+// metadata items into the PALMD::Metadata, combining with any provided by the
+// frontend as LLVM metadata. Once all functions are written, the PAL metadata
+// is then written as a single block in the .note section.
 void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
        const SIProgramInfo &CurrentProgramInfo) {
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  // Given the calling convention, calculate the register number for rsrc1. In
-  // principle the register number could change in future hardware, but we know
-  // it is the same for gfx6-9 (except that LS and ES don't exist on gfx9), so
-  // we can use the same fixed value that .AMDGPU.config has for Mesa. Note
-  // that we use a register number rather than a byte offset, so we need to
-  // divide by 4.
-  unsigned Rsrc1Reg = getRsrcReg(MF.getFunction().getCallingConv()) / 4;
-  unsigned Rsrc2Reg = Rsrc1Reg + 1;
-  // Also calculate the PAL metadata key for *S_SCRATCH_SIZE. It can be used
-  // with a constant offset to access any non-register shader-specific PAL
-  // metadata key.
-  unsigned ScratchSizeKey = PALMD::Key::CS_SCRATCH_SIZE;
-  switch (MF.getFunction().getCallingConv()) {
-    case CallingConv::AMDGPU_PS:
-      ScratchSizeKey = PALMD::Key::PS_SCRATCH_SIZE;
-      break;
-    case CallingConv::AMDGPU_VS:
-      ScratchSizeKey = PALMD::Key::VS_SCRATCH_SIZE;
-      break;
-    case CallingConv::AMDGPU_GS:
-      ScratchSizeKey = PALMD::Key::GS_SCRATCH_SIZE;
-      break;
-    case CallingConv::AMDGPU_ES:
-      ScratchSizeKey = PALMD::Key::ES_SCRATCH_SIZE;
-      break;
-    case CallingConv::AMDGPU_HS:
-      ScratchSizeKey = PALMD::Key::HS_SCRATCH_SIZE;
-      break;
-    case CallingConv::AMDGPU_LS:
-      ScratchSizeKey = PALMD::Key::LS_SCRATCH_SIZE;
-      break;
-  }
-  unsigned NumUsedVgprsKey = ScratchSizeKey +
-      PALMD::Key::VS_NUM_USED_VGPRS - PALMD::Key::VS_SCRATCH_SIZE;
-  unsigned NumUsedSgprsKey = ScratchSizeKey +
-      PALMD::Key::VS_NUM_USED_SGPRS - PALMD::Key::VS_SCRATCH_SIZE;
-  PALMetadataMap[NumUsedVgprsKey] = CurrentProgramInfo.NumVGPRsForWavesPerEU;
-  PALMetadataMap[NumUsedSgprsKey] = CurrentProgramInfo.NumSGPRsForWavesPerEU;
+  auto CC = MF.getFunction().getCallingConv();
+  auto MD = getTargetStreamer()->getPALMetadata();
+
+  MD->setEntryPoint(CC, MF.getFunction().getName());
+  MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU);
+  MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU);
   if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
-    PALMetadataMap[Rsrc1Reg] |= CurrentProgramInfo.ComputePGMRSrc1;
-    PALMetadataMap[Rsrc2Reg] |= CurrentProgramInfo.ComputePGMRSrc2;
-    // ScratchSize is in bytes, 16 aligned.
-    PALMetadataMap[ScratchSizeKey] |=
-        alignTo(CurrentProgramInfo.ScratchSize, 16);
+    MD->setRsrc1(CC, CurrentProgramInfo.ComputePGMRSrc1);
+    MD->setRsrc2(CC, CurrentProgramInfo.ComputePGMRSrc2);
   } else {
-    PALMetadataMap[Rsrc1Reg] |= S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
-        S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks);
+    MD->setRsrc1(CC, S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
+        S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks));
     if (CurrentProgramInfo.ScratchBlocks > 0)
-      PALMetadataMap[Rsrc2Reg] |= S_00B84C_SCRATCH_EN(1);
-    // ScratchSize is in bytes, 16 aligned.
-    PALMetadataMap[ScratchSizeKey] |=
-        alignTo(CurrentProgramInfo.ScratchSize, 16);
+      MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1));
   }
+  // ScratchSize is in bytes, 16 aligned.
+  MD->setScratchSize(CC, alignTo(CurrentProgramInfo.ScratchSize, 16));
   if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
-    PALMetadataMap[Rsrc2Reg] |=
-        S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks);
-    PALMetadataMap[R_0286CC_SPI_PS_INPUT_ENA / 4] |= MFI->getPSInputEnable();
-    PALMetadataMap[R_0286D0_SPI_PS_INPUT_ADDR / 4] |= MFI->getPSInputAddr();
+    MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks));
+    MD->setSpiPsInputEna(MFI->getPSInputEnable());
+    MD->setSpiPsInputAddr(MFI->getPSInputAddr());
   }
+
+  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
+  if (STM.isWave32())
+    MD->setWave32(MF.getFunction().getCallingConv());
 }
 
 // This is supposed to be log2(Size)
@@ -1144,12 +1174,12 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
 
-  AMDGPU::initDefaultAMDKernelCodeT(Out, getSTI());
+  AMDGPU::initDefaultAMDKernelCodeT(Out, &STM);
 
   Out.compute_pgm_resource_registers =
       CurrentProgramInfo.ComputePGMRSrc1 |
       (CurrentProgramInfo.ComputePGMRSrc2 << 32);
-  Out.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
+  Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
 
   if (CurrentProgramInfo.DynamicCallStack)
     Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK;
@@ -1181,9 +1211,6 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
   if (MFI->hasDispatchPtr())
     Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
 
-  if (STM.debuggerSupported())
-    Out.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED;
-
   if (STM.isXNACKEnabled())
     Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
 
@@ -1196,22 +1223,14 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
 
   // These alignment values are specified in powers of two, so alignment =
   // 2^n.  The minimum alignment is 2^4 = 16.
-  Out.kernarg_segment_alignment = std::max((size_t)4,
+  Out.kernarg_segment_alignment = std::max<size_t>(4,
       countTrailingZeros(MaxKernArgAlign));
-
-  if (STM.debuggerEmitPrologue()) {
-    Out.debug_wavefront_private_segment_offset_sgpr =
-      CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
-    Out.debug_private_segment_buffer_sgpr =
-      CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR;
-  }
 }
 
 bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                       unsigned AsmVariant,
                                        const char *ExtraCode, raw_ostream &O) {
   // First try the generic code, which knows about modifiers like 'c' and 'n'.
-  if (!AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O))
+  if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O))
     return false;
 
   if (ExtraCode && ExtraCode[0]) {
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 167ac4b21e1e..cf77034329ef 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -1,9 +1,8 @@
 //===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -33,6 +32,7 @@ namespace llvm {
 
 class AMDGPUMachineFunction;
 class AMDGPUTargetStreamer;
+class MCCodeEmitter;
 class MCOperand;
 class GCNSubtarget;
 
@@ -57,12 +57,12 @@ private:
   DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;
 
   std::unique_ptr<AMDGPU::HSAMD::MetadataStreamer> HSAMetadataStream;
-  std::map<uint32_t, uint32_t> PALMetadataMap;
+
+  MCCodeEmitter *DumpCodeInstEmitter = nullptr;
 
   uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
   SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF) const;
 
-  void readPALMetadata(Module &M);
   void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
   void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
                         const MachineFunction &MF) const;
@@ -95,7 +95,7 @@ public:
 
   StringRef getPassName() const override;
 
-  const MCSubtargetInfo* getSTI() const;
+  const MCSubtargetInfo* getGlobalSTI() const;
 
   AMDGPUTargetStreamer* getTargetStreamer() const;
 
@@ -137,8 +137,7 @@ public:
     const MachineBasicBlock *MBB) const override;
 
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                       unsigned AsmVariant, const char *ExtraCode,
-                       raw_ostream &O) override;
+                       const char *ExtraCode, raw_ostream &O) override;
 
 protected:
   mutable std::vector<std::string> DisasmLines, HexLines;
diff --git a/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index 644e4fd558ba..8a92e7d923fb 100644
--- a/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPUAtomicOptimizer.cpp -----------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -31,6 +30,7 @@ namespace {
 enum DPP_CTRL {
   DPP_ROW_SR1 = 0x111,
   DPP_ROW_SR2 = 0x112,
+  DPP_ROW_SR3 = 0x113,
   DPP_ROW_SR4 = 0x114,
   DPP_ROW_SR8 = 0x118,
   DPP_WF_SR1 = 0x138,
@@ -40,7 +40,7 @@ enum DPP_CTRL {
 
 struct ReplacementInfo {
   Instruction *I;
-  Instruction::BinaryOps Op;
+  AtomicRMWInst::BinOp Op;
   unsigned ValIdx;
   bool ValDivergent;
 };
@@ -55,10 +55,8 @@ private:
   bool HasDPP;
   bool IsPixelShader;
 
-  void optimizeAtomic(Instruction &I, Instruction::BinaryOps Op,
-                      unsigned ValIdx, bool ValDivergent) const;
-
-  void setConvergent(CallInst *const CI) const;
+  void optimizeAtomic(Instruction &I, AtomicRMWInst::BinOp Op, unsigned ValIdx,
+                      bool ValDivergent) const;
 
 public:
   static char ID;
@@ -122,16 +120,20 @@ void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) {
     break;
   }
 
-  Instruction::BinaryOps Op;
+  AtomicRMWInst::BinOp Op = I.getOperation();
 
-  switch (I.getOperation()) {
+  switch (Op) {
   default:
     return;
   case AtomicRMWInst::Add:
-    Op = Instruction::Add;
-    break;
   case AtomicRMWInst::Sub:
-    Op = Instruction::Sub;
+  case AtomicRMWInst::And:
+  case AtomicRMWInst::Or:
+  case AtomicRMWInst::Xor:
+  case AtomicRMWInst::Max:
+  case AtomicRMWInst::Min:
+  case AtomicRMWInst::UMax:
+  case AtomicRMWInst::UMin:
     break;
   }
 
@@ -163,7 +165,7 @@ void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) {
 }
 
 void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {
-  Instruction::BinaryOps Op;
+  AtomicRMWInst::BinOp Op;
 
   switch (I.getIntrinsicID()) {
   default:
@@ -171,12 +173,47 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {
   case Intrinsic::amdgcn_buffer_atomic_add:
   case Intrinsic::amdgcn_struct_buffer_atomic_add:
   case Intrinsic::amdgcn_raw_buffer_atomic_add:
-    Op = Instruction::Add;
+    Op = AtomicRMWInst::Add;
     break;
   case Intrinsic::amdgcn_buffer_atomic_sub:
   case Intrinsic::amdgcn_struct_buffer_atomic_sub:
   case Intrinsic::amdgcn_raw_buffer_atomic_sub:
-    Op = Instruction::Sub;
+    Op = AtomicRMWInst::Sub;
+    break;
+  case Intrinsic::amdgcn_buffer_atomic_and:
+  case Intrinsic::amdgcn_struct_buffer_atomic_and:
+  case Intrinsic::amdgcn_raw_buffer_atomic_and:
+    Op = AtomicRMWInst::And;
+    break;
+  case Intrinsic::amdgcn_buffer_atomic_or:
+  case Intrinsic::amdgcn_struct_buffer_atomic_or:
+  case Intrinsic::amdgcn_raw_buffer_atomic_or:
+    Op = AtomicRMWInst::Or;
+    break;
+  case Intrinsic::amdgcn_buffer_atomic_xor:
+  case Intrinsic::amdgcn_struct_buffer_atomic_xor:
+  case Intrinsic::amdgcn_raw_buffer_atomic_xor:
+    Op = AtomicRMWInst::Xor;
+    break;
+  case Intrinsic::amdgcn_buffer_atomic_smin:
+  case Intrinsic::amdgcn_struct_buffer_atomic_smin:
+  case Intrinsic::amdgcn_raw_buffer_atomic_smin:
+    Op = AtomicRMWInst::Min;
+    break;
+  case Intrinsic::amdgcn_buffer_atomic_umin:
+  case Intrinsic::amdgcn_struct_buffer_atomic_umin:
+  case Intrinsic::amdgcn_raw_buffer_atomic_umin:
+    Op = AtomicRMWInst::UMin;
+    break;
+  case Intrinsic::amdgcn_buffer_atomic_smax:
+  case Intrinsic::amdgcn_struct_buffer_atomic_smax:
+  case Intrinsic::amdgcn_raw_buffer_atomic_smax:
+    Op = AtomicRMWInst::Max;
+    break;
+  case Intrinsic::amdgcn_buffer_atomic_umax:
+  case Intrinsic::amdgcn_struct_buffer_atomic_umax:
+  case Intrinsic::amdgcn_raw_buffer_atomic_umax:
+    Op = AtomicRMWInst::UMax;
     break;
   }
 
@@ -208,12 +245,68 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {
   ToReplace.push_back(Info);
 }
 
+// Use the builder to create the non-atomic counterpart of the specified
+// atomicrmw binary op.
+static Value *buildNonAtomicBinOp(IRBuilder<> &B, AtomicRMWInst::BinOp Op,
+                                  Value *LHS, Value *RHS) {
+  CmpInst::Predicate Pred;
+
+  switch (Op) {
+  default:
+    llvm_unreachable("Unhandled atomic op");
+  case AtomicRMWInst::Add:
+    return B.CreateBinOp(Instruction::Add, LHS, RHS);
+  case AtomicRMWInst::Sub:
+    return B.CreateBinOp(Instruction::Sub, LHS, RHS);
+  case AtomicRMWInst::And:
+    return B.CreateBinOp(Instruction::And, LHS, RHS);
+  case AtomicRMWInst::Or:
+    return B.CreateBinOp(Instruction::Or, LHS, RHS);
+  case AtomicRMWInst::Xor:
+    return B.CreateBinOp(Instruction::Xor, LHS, RHS);
+
+  case AtomicRMWInst::Max:
+    Pred = CmpInst::ICMP_SGT;
+    break;
+  case AtomicRMWInst::Min:
+    Pred = CmpInst::ICMP_SLT;
+    break;
+  case AtomicRMWInst::UMax:
+    Pred = CmpInst::ICMP_UGT;
+    break;
+  case AtomicRMWInst::UMin:
+    Pred = CmpInst::ICMP_ULT;
+    break;
+  }
+  Value *Cond = B.CreateICmp(Pred, LHS, RHS);
+  return B.CreateSelect(Cond, LHS, RHS);
+}
+
+static APInt getIdentityValueForAtomicOp(AtomicRMWInst::BinOp Op,
+                                         unsigned BitWidth) {
+  switch (Op) {
+  default:
+    llvm_unreachable("Unhandled atomic op");
+  case AtomicRMWInst::Add:
+  case AtomicRMWInst::Sub:
+  case AtomicRMWInst::Or:
+  case AtomicRMWInst::Xor:
+  case AtomicRMWInst::UMax:
+    return APInt::getMinValue(BitWidth);
+  case AtomicRMWInst::And:
+  case AtomicRMWInst::UMin:
+    return APInt::getMaxValue(BitWidth);
+  case AtomicRMWInst::Max:
+    return APInt::getSignedMinValue(BitWidth);
+  case AtomicRMWInst::Min:
+    return APInt::getSignedMaxValue(BitWidth);
+  }
+}
+
 void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
-                                           Instruction::BinaryOps Op,
+                                           AtomicRMWInst::BinOp Op,
                                            unsigned ValIdx,
                                            bool ValDivergent) const {
-  LLVMContext &Context = I.getContext();
-
   // Start building just before the instruction.
   IRBuilder<> B(&I);
 
@@ -251,115 +344,130 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
   Value *const V = I.getOperand(ValIdx);
 
   // We need to know how many lanes are active within the wavefront, and we do
-  // this by getting the exec register, which tells us all the lanes that are
-  // active.
-  MDNode *const RegName =
-      llvm::MDNode::get(Context, llvm::MDString::get(Context, "exec"));
-  Value *const Metadata = llvm::MetadataAsValue::get(Context, RegName);
-  CallInst *const Exec =
-      B.CreateIntrinsic(Intrinsic::read_register, {B.getInt64Ty()}, {Metadata});
-  setConvergent(Exec);
+  // this by doing a ballot of active lanes.
+  CallInst *const Ballot = B.CreateIntrinsic(
+      Intrinsic::amdgcn_icmp, {B.getInt64Ty(), B.getInt32Ty()},
+      {B.getInt32(1), B.getInt32(0), B.getInt32(CmpInst::ICMP_NE)});
 
   // We need to know how many lanes are active within the wavefront that are
   // below us. If we counted each lane linearly starting from 0, a lane is
   // below us only if its associated index was less than ours. We do this by
   // using the mbcnt intrinsic.
-  Value *const BitCast = B.CreateBitCast(Exec, VecTy);
+  Value *const BitCast = B.CreateBitCast(Ballot, VecTy);
   Value *const ExtractLo = B.CreateExtractElement(BitCast, B.getInt32(0));
   Value *const ExtractHi = B.CreateExtractElement(BitCast, B.getInt32(1));
   CallInst *const PartialMbcnt = B.CreateIntrinsic(
       Intrinsic::amdgcn_mbcnt_lo, {}, {ExtractLo, B.getInt32(0)});
-  CallInst *const Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {},
-                                            {ExtractHi, PartialMbcnt});
+  Value *const Mbcnt =
+      B.CreateIntCast(B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {},
+                                        {ExtractHi, PartialMbcnt}),
+                      Ty, false);
 
-  Value *const MbcntCast = B.CreateIntCast(Mbcnt, Ty, false);
+  Value *const Identity = B.getInt(getIdentityValueForAtomicOp(Op, TyBitWidth));
 
-  Value *LaneOffset = nullptr;
+  Value *ExclScan = nullptr;
   Value *NewV = nullptr;
 
   // If we have a divergent value in each lane, we need to combine the value
   // using DPP.
   if (ValDivergent) {
-    // First we need to set all inactive invocations to 0, so that they can
-    // correctly contribute to the final result.
-    CallInst *const SetInactive = B.CreateIntrinsic(
-        Intrinsic::amdgcn_set_inactive, Ty, {V, B.getIntN(TyBitWidth, 0)});
-    setConvergent(SetInactive);
-    NewV = SetInactive;
-
-    const unsigned Iters = 6;
-    const unsigned DPPCtrl[Iters] = {DPP_ROW_SR1,     DPP_ROW_SR2,
-                                     DPP_ROW_SR4,     DPP_ROW_SR8,
-                                     DPP_ROW_BCAST15, DPP_ROW_BCAST31};
-    const unsigned RowMask[Iters] = {0xf, 0xf, 0xf, 0xf, 0xa, 0xc};
-
-    // This loop performs an inclusive scan across the wavefront, with all lanes
+    // First we need to set all inactive invocations to the identity value, so
+    // that they can correctly contribute to the final result.
+    CallInst *const SetInactive =
+        B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {V, Identity});
+
+    CallInst *const FirstDPP =
+        B.CreateIntrinsic(Intrinsic::amdgcn_update_dpp, Ty,
+                          {Identity, SetInactive, B.getInt32(DPP_WF_SR1),
+                           B.getInt32(0xf), B.getInt32(0xf), B.getFalse()});
+    ExclScan = FirstDPP;
+
+    const unsigned Iters = 7;
+    const unsigned DPPCtrl[Iters] = {
+        DPP_ROW_SR1, DPP_ROW_SR2,     DPP_ROW_SR3,    DPP_ROW_SR4,
+        DPP_ROW_SR8, DPP_ROW_BCAST15, DPP_ROW_BCAST31};
+    const unsigned RowMask[Iters] = {0xf, 0xf, 0xf, 0xf, 0xf, 0xa, 0xc};
+    const unsigned BankMask[Iters] = {0xf, 0xf, 0xf, 0xe, 0xc, 0xf, 0xf};
+
+    // This loop performs an exclusive scan across the wavefront, with all lanes
     // active (by using the WWM intrinsic).
     for (unsigned Idx = 0; Idx < Iters; Idx++) {
-      CallInst *const DPP = B.CreateIntrinsic(Intrinsic::amdgcn_mov_dpp, Ty,
-                                              {NewV, B.getInt32(DPPCtrl[Idx]),
-                                               B.getInt32(RowMask[Idx]),
-                                               B.getInt32(0xf), B.getFalse()});
-      setConvergent(DPP);
-      Value *const WWM = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, DPP);
-
-      NewV = B.CreateBinOp(Op, NewV, WWM);
-      NewV = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, NewV);
+      Value *const UpdateValue = Idx < 3 ? FirstDPP : ExclScan;
+      CallInst *const DPP = B.CreateIntrinsic(
+          Intrinsic::amdgcn_update_dpp, Ty,
+          {Identity, UpdateValue, B.getInt32(DPPCtrl[Idx]),
+           B.getInt32(RowMask[Idx]), B.getInt32(BankMask[Idx]), B.getFalse()});
+
+      ExclScan = buildNonAtomicBinOp(B, Op, ExclScan, DPP);
     }
 
-    // NewV has returned the inclusive scan of V, but for the lane offset we
-    // require an exclusive scan. We do this by shifting the values from the
-    // entire wavefront right by 1, and by setting the bound_ctrl (last argument
-    // to the intrinsic below) to true, we can guarantee that 0 will be shifted
-    // into the 0'th invocation.
-    CallInst *const DPP =
-        B.CreateIntrinsic(Intrinsic::amdgcn_mov_dpp, {Ty},
-                          {NewV, B.getInt32(DPP_WF_SR1), B.getInt32(0xf),
-                           B.getInt32(0xf), B.getTrue()});
-    setConvergent(DPP);
-    LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, DPP);
+    NewV = buildNonAtomicBinOp(B, Op, SetInactive, ExclScan);
 
     // Read the value from the last lane, which has accumlated the values of
-    // each active lane in the wavefront. This will be our new value with which
-    // we will provide to the atomic operation.
+    // each active lane in the wavefront. This will be our new value which we
+    // will provide to the atomic operation.
     if (TyBitWidth == 64) {
       Value *const ExtractLo = B.CreateTrunc(NewV, B.getInt32Ty());
       Value *const ExtractHi =
           B.CreateTrunc(B.CreateLShr(NewV, B.getInt64(32)), B.getInt32Ty());
       CallInst *const ReadLaneLo = B.CreateIntrinsic(
           Intrinsic::amdgcn_readlane, {}, {ExtractLo, B.getInt32(63)});
-      setConvergent(ReadLaneLo);
       CallInst *const ReadLaneHi = B.CreateIntrinsic(
           Intrinsic::amdgcn_readlane, {}, {ExtractHi, B.getInt32(63)});
-      setConvergent(ReadLaneHi);
       Value *const PartialInsert = B.CreateInsertElement(
           UndefValue::get(VecTy), ReadLaneLo, B.getInt32(0));
       Value *const Insert =
           B.CreateInsertElement(PartialInsert, ReadLaneHi, B.getInt32(1));
       NewV = B.CreateBitCast(Insert, Ty);
     } else if (TyBitWidth == 32) {
-      CallInst *const ReadLane = B.CreateIntrinsic(Intrinsic::amdgcn_readlane,
-                                                   {}, {NewV, B.getInt32(63)});
-      setConvergent(ReadLane);
-      NewV = ReadLane;
+      NewV = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
+                               {NewV, B.getInt32(63)});
     } else {
       llvm_unreachable("Unhandled atomic bit width");
     }
+
+    // Finally mark the readlanes in the WWM section.
+    NewV = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, NewV);
   } else {
-    // Get the total number of active lanes we have by using popcount.
-    Instruction *const Ctpop = B.CreateUnaryIntrinsic(Intrinsic::ctpop, Exec);
-    Value *const CtpopCast = B.CreateIntCast(Ctpop, Ty, false);
-
-    // Calculate the new value we will be contributing to the atomic operation
-    // for the entire wavefront.
-    NewV = B.CreateMul(V, CtpopCast);
-    LaneOffset = B.CreateMul(V, MbcntCast);
+    switch (Op) {
+    default:
+      llvm_unreachable("Unhandled atomic op");
+
+    case AtomicRMWInst::Add:
+    case AtomicRMWInst::Sub: {
+      // The new value we will be contributing to the atomic operation is the
+      // old value times the number of active lanes.
+      Value *const Ctpop = B.CreateIntCast(
+          B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty, false);
+      NewV = B.CreateMul(V, Ctpop);
+      break;
+    }
+
+    case AtomicRMWInst::And:
+    case AtomicRMWInst::Or:
+    case AtomicRMWInst::Max:
+    case AtomicRMWInst::Min:
+    case AtomicRMWInst::UMax:
+    case AtomicRMWInst::UMin:
+      // These operations with a uniform value are idempotent: doing the atomic
+      // operation multiple times has the same effect as doing it once.
+      NewV = V;
+      break;
+
+    case AtomicRMWInst::Xor:
+      // The new value we will be contributing to the atomic operation is the
+      // old value times the parity of the number of active lanes.
+      Value *const Ctpop = B.CreateIntCast(
+          B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty, false);
+      NewV = B.CreateMul(V, B.CreateAnd(Ctpop, 1));
+      break;
+    }
   }
 
   // We only want a single lane to enter our new control flow, and we do this
   // by checking if there are any active lanes below us. Only one lane will
   // have 0 active lanes below us, so that will be the only one to progress.
-  Value *const Cond = B.CreateICmpEQ(MbcntCast, B.getIntN(TyBitWidth, 0));
+  Value *const Cond = B.CreateICmpEQ(Mbcnt, B.getIntN(TyBitWidth, 0));
 
   // Store I's original basic block before we split the block.
   BasicBlock *const EntryBB = I.getParent();
@@ -401,20 +509,16 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
         B.CreateTrunc(B.CreateLShr(PHI, B.getInt64(32)), B.getInt32Ty());
     CallInst *const ReadFirstLaneLo =
         B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo);
-    setConvergent(ReadFirstLaneLo);
     CallInst *const ReadFirstLaneHi =
         B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi);
-    setConvergent(ReadFirstLaneHi);
     Value *const PartialInsert = B.CreateInsertElement(
         UndefValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
     Value *const Insert =
         B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1));
     BroadcastI = B.CreateBitCast(Insert, Ty);
   } else if (TyBitWidth == 32) {
-    CallInst *const ReadFirstLane =
-        B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, PHI);
-    setConvergent(ReadFirstLane);
-    BroadcastI = ReadFirstLane;
+
+    BroadcastI = B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, PHI);
   } else {
     llvm_unreachable("Unhandled atomic bit width");
   }
@@ -423,7 +527,31 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
   // get our individual lane's slice into the result. We use the lane offset we
   // previously calculated combined with the atomic result value we got from the
   // first lane, to get our lane's index into the atomic result.
-  Value *const Result = B.CreateBinOp(Op, BroadcastI, LaneOffset);
+  Value *LaneOffset = nullptr;
+  if (ValDivergent) {
+    LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, ExclScan);
+  } else {
+    switch (Op) {
+    default:
+      llvm_unreachable("Unhandled atomic op");
+    case AtomicRMWInst::Add:
+    case AtomicRMWInst::Sub:
+      LaneOffset = B.CreateMul(V, Mbcnt);
+      break;
+    case AtomicRMWInst::And:
+    case AtomicRMWInst::Or:
+    case AtomicRMWInst::Max:
+    case AtomicRMWInst::Min:
+    case AtomicRMWInst::UMax:
+    case AtomicRMWInst::UMin:
+      LaneOffset = B.CreateSelect(Cond, Identity, V);
+      break;
+    case AtomicRMWInst::Xor:
+      LaneOffset = B.CreateMul(V, B.CreateAnd(Mbcnt, 1));
+      break;
+    }
+  }
+  Value *const Result = buildNonAtomicBinOp(B, Op, BroadcastI, LaneOffset);
 
   if (IsPixelShader) {
     // Need a final PHI to reconverge to above the helper lane branch mask.
@@ -442,10 +570,6 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
   I.eraseFromParent();
 }
 
-void AMDGPUAtomicOptimizer::setConvergent(CallInst *const CI) const {
-  CI->addAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
-}
-
 INITIALIZE_PASS_BEGIN(AMDGPUAtomicOptimizer, DEBUG_TYPE,
                       "AMDGPU atomic optimizations", false, false)
 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index daef37f9c21f..b107c357196d 100644
--- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -21,28 +20,98 @@
 #include "SIMachineFunctionInfo.h"
 #include "SIRegisterInfo.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
 
 using namespace llvm;
 
+namespace {
+
+struct OutgoingArgHandler : public CallLowering::ValueHandler {
+  OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+                     MachineInstrBuilder MIB, CCAssignFn *AssignFn)
+      : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
+
+  MachineInstrBuilder MIB;
+
+  Register getStackAddress(uint64_t Size, int64_t Offset,
+                           MachinePointerInfo &MPO) override {
+    llvm_unreachable("not implemented");
+  }
+
+  void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
+                            MachinePointerInfo &MPO, CCValAssign &VA) override {
+    llvm_unreachable("not implemented");
+  }
+
+  void assignValueToReg(Register ValVReg, Register PhysReg,
+                        CCValAssign &VA) override {
+    MIB.addUse(PhysReg);
+    MIRBuilder.buildCopy(PhysReg, ValVReg);
+  }
+
+  bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+                 CCValAssign::LocInfo LocInfo,
+                 const CallLowering::ArgInfo &Info,
+                 CCState &State) override {
+    return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
+  }
+};
+
+}
+
 AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
   : CallLowering(&TLI) {
 }
 
 bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
                                      const Value *Val,
-                                     ArrayRef<unsigned> VRegs) const {
-  // FIXME: Add support for non-void returns.
-  if (Val)
+                                     ArrayRef<Register> VRegs) const {
+
+  MachineFunction &MF = MIRBuilder.getMF();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  MFI->setIfReturnsVoid(!Val);
+
+  if (!Val) {
+    MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0);
+    return true;
+  }
+
+  Register VReg = VRegs[0];
+
+  const Function &F = MF.getFunction();
+  auto &DL = F.getParent()->getDataLayout();
+  if (!AMDGPU::isShader(F.getCallingConv()))
     return false;
 
-  MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
+
+  const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
+  SmallVector<EVT, 4> SplitVTs;
+  SmallVector<uint64_t, 4> Offsets;
+  ArgInfo OrigArg{VReg, Val->getType()};
+  setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
+  ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
+
+  SmallVector<ArgInfo, 8> SplitArgs;
+  CCAssignFn *AssignFn = CCAssignFnForReturn(F.getCallingConv(), false);
+  for (unsigned i = 0, e = Offsets.size(); i != e; ++i) {
+    Type *SplitTy = SplitVTs[i].getTypeForEVT(F.getContext());
+    SplitArgs.push_back({VRegs[i], SplitTy, OrigArg.Flags, OrigArg.IsFixed});
+  }
+  auto RetInstr = MIRBuilder.buildInstrNoInsert(AMDGPU::SI_RETURN_TO_EPILOG);
+  OutgoingArgHandler Handler(MIRBuilder, MRI, RetInstr, AssignFn);
+  if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
+    return false;
+  MIRBuilder.insertInstr(RetInstr);
+
   return true;
 }
 
-unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
+Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
                                                Type *ParamTy,
                                                uint64_t Offset) const {
 
@@ -53,12 +122,12 @@ unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
   const DataLayout &DL = F.getParent()->getDataLayout();
   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
   LLT PtrType = getLLTForType(*PtrTy, DL);
-  unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
-  unsigned KernArgSegmentPtr =
+  Register DstReg = MRI.createGenericVirtualRegister(PtrType);
+  Register KernArgSegmentPtr =
     MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
-  unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
+  Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
 
-  unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
+  Register OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
   MIRBuilder.buildConstant(OffsetReg, Offset);
 
   MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
@@ -69,14 +138,14 @@ unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
 void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
                                         Type *ParamTy, uint64_t Offset,
                                         unsigned Align,
-                                        unsigned DstReg) const {
+                                        Register DstReg) const {
   MachineFunction &MF = MIRBuilder.getMF();
   const Function &F = MF.getFunction();
   const DataLayout &DL = F.getParent()->getDataLayout();
   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
   MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
   unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
-  unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
+  Register PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
 
   MachineMemOperand *MMO =
       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
@@ -87,93 +156,233 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
   MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
 }
 
-bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
-                                              const Function &F,
-                                              ArrayRef<unsigned> VRegs) const {
-  // AMDGPU_GS and AMDGP_HS are not supported yet.
-  if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
-      F.getCallingConv() == CallingConv::AMDGPU_HS)
-    return false;
+static Register findFirstFreeSGPR(CCState &CCInfo) {
+  unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
+  for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
+    if (!CCInfo.isAllocated(AMDGPU::SGPR0 + Reg)) {
+      return AMDGPU::SGPR0 + Reg;
+    }
+  }
+  llvm_unreachable("Cannot allocate sgpr");
+}
 
-  MachineFunction &MF = MIRBuilder.getMF();
-  const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
+static void allocateSpecialEntryInputVGPRs(CCState &CCInfo,
+                                           MachineFunction &MF,
+                                           const SIRegisterInfo &TRI,
+                                           SIMachineFunctionInfo &Info) {
+  const LLT S32 = LLT::scalar(32);
   MachineRegisterInfo &MRI = MF.getRegInfo();
-  SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
-  const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
-  const DataLayout &DL = F.getParent()->getDataLayout();
 
-  SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
+  if (Info.hasWorkItemIDX()) {
+    Register Reg = AMDGPU::VGPR0;
+    MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
+
+    CCInfo.AllocateReg(Reg);
+    Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg));
+  }
+
+  if (Info.hasWorkItemIDY()) {
+    Register Reg = AMDGPU::VGPR1;
+    MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
+
+    CCInfo.AllocateReg(Reg);
+    Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg));
+  }
+
+  if (Info.hasWorkItemIDZ()) {
+    Register Reg = AMDGPU::VGPR2;
+    MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
+
+    CCInfo.AllocateReg(Reg);
+    Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg));
+  }
+}
 
+// Allocate special inputs passed in user SGPRs.
+static void allocateHSAUserSGPRs(CCState &CCInfo,
+                                 MachineIRBuilder &MIRBuilder,
+                                 MachineFunction &MF,
+                                 const SIRegisterInfo &TRI,
+                                 SIMachineFunctionInfo &Info) {
   // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
-  if (Info->hasPrivateSegmentBuffer()) {
-    unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
-    MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
+  if (Info.hasPrivateSegmentBuffer()) {
+    unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
+    MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
     CCInfo.AllocateReg(PrivateSegmentBufferReg);
   }
 
-  if (Info->hasDispatchPtr()) {
-    unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
-    // FIXME: Need to add reg as live-in
+  if (Info.hasDispatchPtr()) {
+    unsigned DispatchPtrReg = Info.addDispatchPtr(TRI);
+    MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
     CCInfo.AllocateReg(DispatchPtrReg);
   }
 
-  if (Info->hasQueuePtr()) {
-    unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
-    // FIXME: Need to add reg as live-in
+  if (Info.hasQueuePtr()) {
+    unsigned QueuePtrReg = Info.addQueuePtr(TRI);
+    MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
     CCInfo.AllocateReg(QueuePtrReg);
   }
 
-  if (Info->hasKernargSegmentPtr()) {
-    unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
-    const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
-    unsigned VReg = MRI.createGenericVirtualRegister(P2);
+  if (Info.hasKernargSegmentPtr()) {
+    MachineRegisterInfo &MRI = MF.getRegInfo();
+    Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
+    const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
+    Register VReg = MRI.createGenericVirtualRegister(P4);
     MRI.addLiveIn(InputPtrReg, VReg);
     MIRBuilder.getMBB().addLiveIn(InputPtrReg);
     MIRBuilder.buildCopy(VReg, InputPtrReg);
     CCInfo.AllocateReg(InputPtrReg);
   }
 
-  if (Info->hasDispatchID()) {
-    unsigned DispatchIDReg = Info->addDispatchID(*TRI);
-    // FIXME: Need to add reg as live-in
+  if (Info.hasDispatchID()) {
+    unsigned DispatchIDReg = Info.addDispatchID(TRI);
+    MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
     CCInfo.AllocateReg(DispatchIDReg);
   }
 
-  if (Info->hasFlatScratchInit()) {
-    unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
-    // FIXME: Need to add reg as live-in
+  if (Info.hasFlatScratchInit()) {
+    unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI);
+    MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
     CCInfo.AllocateReg(FlatScratchInitReg);
   }
 
+  // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
+  // these from the dispatch pointer.
+}
+
+static void allocateSystemSGPRs(CCState &CCInfo,
+                                MachineFunction &MF,
+                                SIMachineFunctionInfo &Info,
+                                CallingConv::ID CallConv,
+                                bool IsShader) {
+  const LLT S32 = LLT::scalar(32);
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  if (Info.hasWorkGroupIDX()) {
+    Register Reg = Info.addWorkGroupIDX();
+    MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
+    CCInfo.AllocateReg(Reg);
+  }
+
+  if (Info.hasWorkGroupIDY()) {
+    Register Reg = Info.addWorkGroupIDY();
+    MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
+    CCInfo.AllocateReg(Reg);
+  }
+
+  if (Info.hasWorkGroupIDZ()) {
+    unsigned Reg = Info.addWorkGroupIDZ();
+    MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
+    CCInfo.AllocateReg(Reg);
+  }
+
+  if (Info.hasWorkGroupInfo()) {
+    unsigned Reg = Info.addWorkGroupInfo();
+    MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
+    CCInfo.AllocateReg(Reg);
+  }
+
+  if (Info.hasPrivateSegmentWaveByteOffset()) {
+    // Scratch wave offset passed in system SGPR.
+    unsigned PrivateSegmentWaveByteOffsetReg;
+
+    if (IsShader) {
+      PrivateSegmentWaveByteOffsetReg =
+        Info.getPrivateSegmentWaveByteOffsetSystemSGPR();
+
+      // This is true if the scratch wave byte offset doesn't have a fixed
+      // location.
+      if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) {
+        PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo);
+        Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
+      }
+    } else
+      PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset();
+
+    MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
+    CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
+  }
+}
+
+bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
+    MachineIRBuilder &MIRBuilder, const Function &F,
+    ArrayRef<ArrayRef<Register>> VRegs) const {
+  MachineFunction &MF = MIRBuilder.getMF();
+  const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+  const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
+  const DataLayout &DL = F.getParent()->getDataLayout();
+
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
+
+  allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info);
+
+  unsigned i = 0;
+  const unsigned KernArgBaseAlign = 16;
+  const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
+  uint64_t ExplicitArgOffset = 0;
+
+  // TODO: Align down to dword alignment and extract bits for extending loads.
+  for (auto &Arg : F.args()) {
+    Type *ArgTy = Arg.getType();
+    unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
+    if (AllocSize == 0)
+      continue;
+
+    unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
+
+    uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
+    ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
+
+    ArrayRef<Register> OrigArgRegs = VRegs[i];
+    Register ArgReg =
+      OrigArgRegs.size() == 1
+      ? OrigArgRegs[0]
+      : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
+    unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
+    ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
+    lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg);
+    if (OrigArgRegs.size() > 1)
+      unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder);
+    ++i;
+  }
+
+  allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
+  allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
+  return true;
+}
+
+bool AMDGPUCallLowering::lowerFormalArguments(
+    MachineIRBuilder &MIRBuilder, const Function &F,
+    ArrayRef<ArrayRef<Register>> VRegs) const {
   // The infrastructure for normal calling convention lowering is essentially
   // useless for kernels. We want to avoid any kind of legalization or argument
   // splitting.
-  if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) {
-    unsigned i = 0;
-    const unsigned KernArgBaseAlign = 16;
-    const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
-    uint64_t ExplicitArgOffset = 0;
-
-    // TODO: Align down to dword alignment and extract bits for extending loads.
-    for (auto &Arg : F.args()) {
-      Type *ArgTy = Arg.getType();
-      unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
-      if (AllocSize == 0)
-        continue;
+  if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL)
+    return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs);
 
-      unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
+  // AMDGPU_GS and AMDGP_HS are not supported yet.
+  if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
+      F.getCallingConv() == CallingConv::AMDGPU_HS)
+    return false;
+
+  MachineFunction &MF = MIRBuilder.getMF();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+  const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
+  const DataLayout &DL = F.getParent()->getDataLayout();
 
-      uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
-      ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
+  bool IsShader = AMDGPU::isShader(F.getCallingConv());
 
-      unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
-      ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
-      lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
-      ++i;
-    }
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
 
-    return true;
+  if (Info->hasImplicitBufferPtr()) {
+    unsigned ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
+    MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
+    CCInfo.AllocateReg(ImplicitBufferPtrReg);
   }
 
   unsigned NumArgs = F.arg_size();
@@ -186,7 +395,8 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
 
     // We can only hanlde simple value types at the moment.
     ISD::ArgFlagsTy Flags;
-    ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
+    assert(VRegs[i].size() == 1 && "Can't lower into more than one register");
+    ArgInfo OrigArg{VRegs[i][0], CurOrigArg->getType()};
     setArgFlags(OrigArg, i + 1, DL, F);
     Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
 
@@ -239,11 +449,15 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
          OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
        if (Skipped.test(OrigArgIdx))
           continue;
-      CCValAssign &VA = ArgLocs[i++];
-      MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
-      MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
-      MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
+       assert(VRegs[OrigArgIdx].size() == 1 &&
+              "Can't lower into more than 1 reg");
+       CCValAssign &VA = ArgLocs[i++];
+       MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx][0]);
+       MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
+       MIRBuilder.buildCopy(VRegs[OrigArgIdx][0], VA.getLocReg());
     }
+
+    allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader);
     return true;
   }
 
diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.h b/lib/Target/AMDGPU/AMDGPUCallLowering.h
index ed859716218e..3599659cac6a 100644
--- a/lib/Target/AMDGPU/AMDGPUCallLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUCallLowering.h
@@ -1,9 +1,8 @@
 //===- lib/Target/AMDGPU/AMDGPUCallLowering.h - Call lowering -*- C++ -*---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -23,20 +22,25 @@ namespace llvm {
 class AMDGPUTargetLowering;
 
 class AMDGPUCallLowering: public CallLowering {
-  unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy,
+  Register lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy,
                              uint64_t Offset) const;
 
   void lowerParameter(MachineIRBuilder &MIRBuilder, Type *ParamTy,
                       uint64_t Offset, unsigned Align,
-                      unsigned DstReg) const;
+                      Register DstReg) const;
 
  public:
   AMDGPUCallLowering(const AMDGPUTargetLowering &TLI);
 
   bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
-                   ArrayRef<unsigned> VRegs) const override;
+                   ArrayRef<Register> VRegs) const override;
+
+  bool lowerFormalArgumentsKernel(MachineIRBuilder &MIRBuilder,
+                                  const Function &F,
+                                  ArrayRef<ArrayRef<Register>> VRegs) const;
+
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
-                            ArrayRef<unsigned> VRegs) const override;
+                            ArrayRef<ArrayRef<Register>> VRegs) const override;
   static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
   static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
 };
diff --git a/lib/Target/AMDGPU/AMDGPUCallingConv.td b/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 367f120b5fa6..3688cd77542e 100644
--- a/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -1,9 +1,8 @@
 //===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -24,7 +23,16 @@ def CC_SI : CallingConv<[
     SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
     SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
     SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
-    SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
+    SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39,
+    SGPR40, SGPR41, SGPR42, SGPR43, SGPR44, SGPR45, SGPR46, SGPR47,
+    SGPR48, SGPR49, SGPR50, SGPR51, SGPR52, SGPR53, SGPR54, SGPR55,
+    SGPR56, SGPR57, SGPR58, SGPR59, SGPR60, SGPR61, SGPR62, SGPR63,
+    SGPR64, SGPR65, SGPR66, SGPR67, SGPR68, SGPR69, SGPR70, SGPR71,
+    SGPR72, SGPR73, SGPR74, SGPR75, SGPR76, SGPR77, SGPR78, SGPR79,
+    SGPR80, SGPR81, SGPR82, SGPR83, SGPR84, SGPR85, SGPR86, SGPR87,
+    SGPR88, SGPR89, SGPR90, SGPR91, SGPR92, SGPR93, SGPR94, SGPR95,
+    SGPR96, SGPR97, SGPR98, SGPR99, SGPR100, SGPR101, SGPR102, SGPR103,
+    SGPR104, SGPR105
   ]>>>,
 
   // We have no way of referring to the generated register tuples
@@ -60,7 +68,16 @@ def RetCC_SI_Shader : CallingConv<[
     SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
     SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
     SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
-    SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
+    SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39,
+    SGPR40, SGPR41, SGPR42, SGPR43, SGPR44, SGPR45, SGPR46, SGPR47,
+    SGPR48, SGPR49, SGPR50, SGPR51, SGPR52, SGPR53, SGPR54, SGPR55,
+    SGPR56, SGPR57, SGPR58, SGPR59, SGPR60, SGPR61, SGPR62, SGPR63,
+    SGPR64, SGPR65, SGPR66, SGPR67, SGPR68, SGPR69, SGPR70, SGPR71,
+    SGPR72, SGPR73, SGPR74, SGPR75, SGPR76, SGPR77, SGPR78, SGPR79,
+    SGPR80, SGPR81, SGPR82, SGPR83, SGPR84, SGPR85, SGPR86, SGPR87,
+    SGPR88, SGPR89, SGPR90, SGPR91, SGPR92, SGPR93, SGPR94, SGPR95,
+    SGPR96, SGPR97, SGPR98, SGPR99, SGPR100, SGPR101, SGPR102, SGPR103,
+    SGPR104, SGPR105
   ]>>,
 
   // 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
@@ -93,12 +110,22 @@ def CSR_AMDGPU_VGPRs_32_255 : CalleeSavedRegs<
   (sequence "VGPR%u", 32, 255)
 >;
 
-def CSR_AMDGPU_SGPRs_32_103 : CalleeSavedRegs<
-  (sequence "SGPR%u", 32, 103)
+def CSR_AMDGPU_SGPRs_32_105 : CalleeSavedRegs<
+  (sequence "SGPR%u", 32, 105)
+>;
+
+// Just to get the regmask, not for calling convention purposes.
+def CSR_AMDGPU_AllVGPRs : CalleeSavedRegs<
+  (sequence "VGPR%u", 0, 255)
+>;
+
+// Just to get the regmask, not for calling convention purposes.
+def CSR_AMDGPU_AllAllocatableSRegs : CalleeSavedRegs<
+  (add (sequence "SGPR%u", 0, 105), VCC_LO, VCC_HI)
 >;
 
 def CSR_AMDGPU_HighRegs : CalleeSavedRegs<
-  (add CSR_AMDGPU_VGPRs_32_255, CSR_AMDGPU_SGPRs_32_103)
+  (add CSR_AMDGPU_VGPRs_32_255, CSR_AMDGPU_SGPRs_32_105)
 >;
 
 // Calling convention for leaf functions
@@ -111,10 +138,12 @@ def CC_AMDGPU_Func : CallingConv<[
     VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
     VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
     VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
-  CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>,
+  CCIfType<[i64, f64, v2i32, v2f32, v3i32, v3f32, v4i32, v4f32, v5i32, v5f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>,
   CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>,
   CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>,
+  CCIfType<[v3i32, v3f32], CCAssignToStack<12, 4>>,
   CCIfType<[v4i32, v4f32, v2i64, v2f64], CCAssignToStack<16, 4>>,
+  CCIfType<[v5i32, v5f32], CCAssignToStack<20, 4>>,
   CCIfType<[v8i32, v8f32], CCAssignToStack<32, 4>>,
   CCIfType<[v16i32, v16f32], CCAssignToStack<64, 4>>
 ]>;
diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 4dc1e67c573d..b750c6b5f6d2 100644
--- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -62,6 +61,7 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
   AssumptionCache *AC = nullptr;
   LegacyDivergenceAnalysis *DA = nullptr;
   Module *Mod = nullptr;
+  const DataLayout *DL = nullptr;
   bool HasUnsafeFPMath = false;
 
   /// Copies exact/nsw/nuw flags (if any) from binary operation \p I to
@@ -134,6 +134,16 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
   /// \returns True.
   bool promoteUniformBitreverseToI32(IntrinsicInst &I) const;
 
+
+  unsigned numBitsUnsigned(Value *Op, unsigned ScalarSize) const;
+  unsigned numBitsSigned(Value *Op, unsigned ScalarSize) const;
+  bool isI24(Value *V, unsigned ScalarSize) const;
+  bool isU24(Value *V, unsigned ScalarSize) const;
+
+  /// Replace mul instructions with llvm.amdgcn.mul.u24 or llvm.amdgcn.mul.s24.
+  /// SelectionDAG has an issue where an and asserting the bits are known
+  bool replaceMulWithMul24(BinaryOperator &I) const;
+
   /// Expands 24 bit div or rem.
   Value* expandDivRem24(IRBuilder<> &Builder, BinaryOperator &I,
                         Value *Num, Value *Den,
@@ -393,6 +403,118 @@ bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32(
   return true;
 }
 
+unsigned AMDGPUCodeGenPrepare::numBitsUnsigned(Value *Op,
+                                               unsigned ScalarSize) const {
+  KnownBits Known = computeKnownBits(Op, *DL, 0, AC);
+  return ScalarSize - Known.countMinLeadingZeros();
+}
+
+unsigned AMDGPUCodeGenPrepare::numBitsSigned(Value *Op,
+                                             unsigned ScalarSize) const {
+  // In order for this to be a signed 24-bit value, bit 23, must
+  // be a sign bit.
+  return ScalarSize - ComputeNumSignBits(Op, *DL, 0, AC);
+}
+
+bool AMDGPUCodeGenPrepare::isI24(Value *V, unsigned ScalarSize) const {
+  return ScalarSize >= 24 && // Types less than 24-bit should be treated
+                                     // as unsigned 24-bit values.
+    numBitsSigned(V, ScalarSize) < 24;
+}
+
+bool AMDGPUCodeGenPrepare::isU24(Value *V, unsigned ScalarSize) const {
+  return numBitsUnsigned(V, ScalarSize) <= 24;
+}
+
+static void extractValues(IRBuilder<> &Builder,
+                          SmallVectorImpl<Value *> &Values, Value *V) {
+  VectorType *VT = dyn_cast<VectorType>(V->getType());
+  if (!VT) {
+    Values.push_back(V);
+    return;
+  }
+
+  for (int I = 0, E = VT->getNumElements(); I != E; ++I)
+    Values.push_back(Builder.CreateExtractElement(V, I));
+}
+
+static Value *insertValues(IRBuilder<> &Builder,
+                           Type *Ty,
+                           SmallVectorImpl<Value *> &Values) {
+  if (Values.size() == 1)
+    return Values[0];
+
+  Value *NewVal = UndefValue::get(Ty);
+  for (int I = 0, E = Values.size(); I != E; ++I)
+    NewVal = Builder.CreateInsertElement(NewVal, Values[I], I);
+
+  return NewVal;
+}
+
+bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
+  if (I.getOpcode() != Instruction::Mul)
+    return false;
+
+  Type *Ty = I.getType();
+  unsigned Size = Ty->getScalarSizeInBits();
+  if (Size <= 16 && ST->has16BitInsts())
+    return false;
+
+  // Prefer scalar if this could be s_mul_i32
+  if (DA->isUniform(&I))
+    return false;
+
+  Value *LHS = I.getOperand(0);
+  Value *RHS = I.getOperand(1);
+  IRBuilder<> Builder(&I);
+  Builder.SetCurrentDebugLocation(I.getDebugLoc());
+
+  Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
+
+  // TODO: Should this try to match mulhi24?
+  if (ST->hasMulU24() && isU24(LHS, Size) && isU24(RHS, Size)) {
+    IntrID = Intrinsic::amdgcn_mul_u24;
+  } else if (ST->hasMulI24() && isI24(LHS, Size) && isI24(RHS, Size)) {
+    IntrID = Intrinsic::amdgcn_mul_i24;
+  } else
+    return false;
+
+  SmallVector<Value *, 4> LHSVals;
+  SmallVector<Value *, 4> RHSVals;
+  SmallVector<Value *, 4> ResultVals;
+  extractValues(Builder, LHSVals, LHS);
+  extractValues(Builder, RHSVals, RHS);
+
+
+  IntegerType *I32Ty = Builder.getInt32Ty();
+  FunctionCallee Intrin = Intrinsic::getDeclaration(Mod, IntrID);
+  for (int I = 0, E = LHSVals.size(); I != E; ++I) {
+    Value *LHS, *RHS;
+    if (IntrID == Intrinsic::amdgcn_mul_u24) {
+      LHS = Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
+      RHS = Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
+    } else {
+      LHS = Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty);
+      RHS = Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty);
+    }
+
+    Value *Result = Builder.CreateCall(Intrin, {LHS, RHS});
+
+    if (IntrID == Intrinsic::amdgcn_mul_u24) {
+      ResultVals.push_back(Builder.CreateZExtOrTrunc(Result,
+                                                     LHSVals[I]->getType()));
+    } else {
+      ResultVals.push_back(Builder.CreateSExtOrTrunc(Result,
+                                                     LHSVals[I]->getType()));
+    }
+  }
+
+  I.replaceAllUsesWith(insertValues(Builder, Ty, ResultVals));
+  I.eraseFromParent();
+
+  return true;
+}
+
 static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv, bool HasDenormals) {
   const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
   if (!CNum)
@@ -757,6 +879,9 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
       DA->isUniform(&I) && promoteUniformOpToI32(I))
     return true;
 
+  if (replaceMulWithMul24(I))
+    return true;
+
   bool Changed = false;
   Instruction::BinaryOps Opc = I.getOpcode();
   Type *Ty = I.getType();
@@ -807,7 +932,7 @@ bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
     Type *I32Ty = Builder.getInt32Ty();
     Type *PT = PointerType::get(I32Ty, I.getPointerAddressSpace());
     Value *BitCast= Builder.CreateBitCast(I.getPointerOperand(), PT);
-    LoadInst *WidenLoad = Builder.CreateLoad(BitCast);
+    LoadInst *WidenLoad = Builder.CreateLoad(I32Ty, BitCast);
     WidenLoad->copyMetadata(I);
 
     // If we have range metadata, we need to convert the type, and not make
@@ -883,6 +1008,7 @@ bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
 
 bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
   Mod = &M;
+  DL = &Mod->getDataLayout();
   return false;
 }
 
diff --git a/lib/Target/AMDGPU/AMDGPUFeatures.td b/lib/Target/AMDGPU/AMDGPUFeatures.td
index 3c7d8a8fc550..ea3952c316e4 100644
--- a/lib/Target/AMDGPU/AMDGPUFeatures.td
+++ b/lib/Target/AMDGPU/AMDGPUFeatures.td
@@ -1,9 +1,8 @@
 //===-- AMDGPUFeatures.td - AMDGPU Feature Definitions -----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -50,17 +49,12 @@ def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
 def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
 def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
 
-class SubtargetFeatureGeneration <string Value, string Subtarget,
+class SubtargetFeatureGeneration <string Value, string FeatureName,
+                                 string Subtarget,
                                   list<SubtargetFeature> Implies> :
-        SubtargetFeature <Value, "Gen", Subtarget#"::"#Value,
+        SubtargetFeature <FeatureName, "Gen", Subtarget#"::"#Value,
                           Value#" GPU generation", Implies>;
 
-def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp",
-  "DX10Clamp",
-  "true",
-  "clamp modifier clamps NaNs to 0.0"
->;
-
 def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
   "EnablePromoteAlloca",
   "true",
diff --git a/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp b/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp
index 6e2a981d3396..9ba04d113c70 100644
--- a/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp
+++ b/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPUFixFunctionBitcasts.cpp - Fix function bitcasts -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
index e32ca9653b3a..e80797736363 100644
--- a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
@@ -1,9 +1,8 @@
 //===----------------------- AMDGPUFrameLowering.cpp ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //==-----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/lib/Target/AMDGPU/AMDGPUFrameLowering.h
index ee836bf8a631..48b64488303e 100644
--- a/lib/Target/AMDGPU/AMDGPUFrameLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.h
@@ -1,9 +1,8 @@
 //===--------------------- AMDGPUFrameLowering.h ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/AMDGPUGISel.td b/lib/Target/AMDGPU/AMDGPUGISel.td
index 59bb2a16e0f3..cad4c2ef404c 100644
--- a/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -1,9 +1,8 @@
 //===-- AMDGPUGIsel.td - AMDGPU GlobalISel Patterns---------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This files contains patterns that should only be used by GlobalISel.  For
@@ -13,6 +12,10 @@
 
 include "AMDGPU.td"
 
+def p0 : PtrValueType<i64, 0>;
+def p1 : PtrValueType<i64, 1>;
+def p4 : PtrValueType<i64, 4>;
+
 def sd_vsrc0 : ComplexPattern<i32, 1, "">;
 def gi_vsrc0 :
     GIComplexOperandMatcher<s32, "selectVSRC0">,
@@ -35,6 +38,33 @@ def gi_vop3omods :
     GIComplexOperandMatcher<s32, "selectVOP3OMods">,
     GIComplexPatternEquiv<VOP3OMods>;
 
+def gi_smrd_imm :
+    GIComplexOperandMatcher<s64, "selectSmrdImm">,
+    GIComplexPatternEquiv<SMRDImm>;
+
+def gi_smrd_imm32 :
+    GIComplexOperandMatcher<s64, "selectSmrdImm32">,
+    GIComplexPatternEquiv<SMRDImm32>;
+
+def gi_smrd_sgpr :
+    GIComplexOperandMatcher<s64, "selectSmrdSgpr">,
+    GIComplexPatternEquiv<SMRDSgpr>;
+
+def gi_flat_offset :
+    GIComplexOperandMatcher<s64, "selectFlatOffset">,
+    GIComplexPatternEquiv<FLATOffset>;
+def gi_flat_offset_signed :
+    GIComplexOperandMatcher<s64, "selectFlatOffsetSigned">,
+    GIComplexPatternEquiv<FLATOffsetSigned>;
+
+def gi_mubuf_scratch_offset :
+    GIComplexOperandMatcher<s32, "selectMUBUFScratchOffset">,
+    GIComplexPatternEquiv<MUBUFScratchOffset>;
+def gi_mubuf_scratch_offen :
+    GIComplexOperandMatcher<s32, "selectMUBUFScratchOffen">,
+    GIComplexPatternEquiv<MUBUFScratchOffen>;
+
+
 class GISelSop2Pat <
   SDPatternOperator node,
   Instruction inst,
@@ -113,15 +143,6 @@ multiclass GISelVop2IntrPat <
 def : GISelSop2Pat <or, S_OR_B32, i32>;
 def : GISelVop2Pat <or, V_OR_B32_e32, i32>;
 
-def : GISelSop2Pat <sra, S_ASHR_I32, i32>;
-let AddedComplexity = 100 in {
-let SubtargetPredicate = isSICI in {
-def : GISelVop2Pat <sra, V_ASHR_I32_e32, i32>;
-}
-def : GISelVop2CommutePat <sra, V_ASHRREV_I32_e32, i32>;
-}
-def : GISelVop3Pat2CommutePat <sra, V_ASHRREV_I32_e64, i32>;
-
 // FIXME: We can't re-use SelectionDAG patterns here because they match
 // against a custom SDNode and we would need to create a generic machine
 // instruction that is equivalent to the custom SDNode.  This would also require
@@ -135,3 +156,11 @@ defm : GISelVop2IntrPat <int_maxnum, V_MAX_F32_e32, f32>;
 def : GISelVop3Pat2ModsPat <int_maxnum, V_MAX_F64, f64>;
 defm : GISelVop2IntrPat <int_minnum, V_MIN_F32_e32, f32>;
 def : GISelVop3Pat2ModsPat <int_minnum, V_MIN_F64, f64>;
+
+// Since GlobalISel is more flexible then SelectionDAG, I think we can get
+// away with adding patterns for integer types and not legalizing all
+// loads and stores to vector types.  This should help simplify the load/store
+// legalization.
+foreach Ty = [i64, p0, p1, p4] in {
+  defm : SMRD_Pattern <"S_LOAD_DWORDX2",  Ty>;
+}
diff --git a/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
index 6eab59ab4e09..0a1f48231b18 100644
--- a/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
+++ b/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
@@ -1,9 +1,8 @@
 //===- AMDGPUGenRegisterBankInfo.def -----------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -92,6 +91,28 @@ const RegisterBankInfo::ValueMapping ValMappings[] {
   {&PartMappings[17], 1}
 };
 
+const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] {
+     /*32-bit op*/ {0, 32, SGPRRegBank},
+   /*2x32-bit op*/ {0, 32, SGPRRegBank},
+                   {32, 32, SGPRRegBank},
+/*<2x32-bit> op*/  {0, 64, SGPRRegBank},
+
+    /*32-bit op*/  {0, 32, VGPRRegBank},
+  /*2x32-bit op*/  {0, 32, VGPRRegBank},
+                   {32, 32, VGPRRegBank},
+};
+
+
+// For some instructions which can operate 64-bit only for the scalar version.
+const RegisterBankInfo::ValueMapping ValMappingsSGPR64OnlyVGPR32[] {
+  /*32-bit sgpr*/     {&SGPROnly64BreakDown[0], 1},
+  /*2 x 32-bit sgpr*/ {&SGPROnly64BreakDown[1], 2},
+  /*64-bit sgpr */    {&SGPROnly64BreakDown[3], 1},
+
+  /*32-bit vgpr*/     {&SGPROnly64BreakDown[4], 1},
+  /*2 x 32-bit vgpr*/ {&SGPROnly64BreakDown[5], 2}
+};
+
 enum ValueMappingIdx {
   SCCStartIdx = 0,
   SGPRStartIdx = 2,
@@ -128,5 +149,89 @@ const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
   return &ValMappings[Idx];
 }
 
+const RegisterBankInfo::ValueMapping *getValueMappingSGPR64Only(unsigned BankID,
+                                                                unsigned Size) {
+  if (Size != 64)
+    return getValueMapping(BankID, Size);
+
+  if (BankID == AMDGPU::VGPRRegBankID)
+    return &ValMappingsSGPR64OnlyVGPR32[4];
+
+  assert(BankID == AMDGPU::SGPRRegBankID);
+  return &ValMappingsSGPR64OnlyVGPR32[2];
+}
+
+const RegisterBankInfo::PartialMapping LoadSGPROnlyBreakDown[] {
+  /* 256-bit load */    {0, 256, SGPRRegBank},
+  /* 512-bit load */    {0, 512, SGPRRegBank},
+  /* 8 32-bit loads */  {0, 32, VGPRRegBank}, {32, 32, VGPRRegBank},
+                        {64, 32, VGPRRegBank}, {96, 32, VGPRRegBank},
+                        {128, 32, VGPRRegBank}, {160, 32, VGPRRegBank},
+                        {192, 32, VGPRRegBank}, {224, 32, VGPRRegBank},
+  /* 16 32-bit loads */ {0, 32, VGPRRegBank}, {32, 32, VGPRRegBank},
+                        {64, 32, VGPRRegBank}, {96, 32, VGPRRegBank},
+                        {128, 32, VGPRRegBank}, {160, 32, VGPRRegBank},
+                        {192, 32, VGPRRegBank}, {224, 32, VGPRRegBank},
+                        {256, 32, VGPRRegBank}, {288, 32, VGPRRegBank},
+                        {320, 32, VGPRRegBank}, {352, 32, VGPRRegBank},
+                        {384, 32, VGPRRegBank}, {416, 32, VGPRRegBank},
+                        {448, 32, VGPRRegBank}, {480, 32, VGPRRegBank},
+  /* 4 64-bit loads */  {0, 64, VGPRRegBank}, {64, 64, VGPRRegBank},
+                        {128, 64, VGPRRegBank}, {192, 64, VGPRRegBank},
+  /* 8 64-bit loads */  {0, 64, VGPRRegBank}, {64, 64, VGPRRegBank},
+                        {128, 64, VGPRRegBank}, {192, 64, VGPRRegBank},
+                        {256, 64, VGPRRegBank}, {320, 64, VGPRRegBank},
+                        {384, 64, VGPRRegBank}, {448, 64, VGPRRegBank},
+
+  /* FIXME: The generic register bank select does not support complex
+   * break downs where the number of vector elements does not equal the
+   * number of breakdowns.
+   * FIXME: register bank select now tries to handle complex break downs,
+   * but it emits an illegal instruction:
+   * %1:vgpr(<8 x s32>) = G_CONCAT_VECTORS %2:vgpr(s128), %3:vgpr(s128)
+   */
+  /* 2 128-bit loads */ {0, 128, VGPRRegBank}, {128, 128, VGPRRegBank},
+  /* 4 128-bit loads */ {0, 128, VGPRRegBank}, {128, 128, VGPRRegBank},
+                        {256, 128, VGPRRegBank}, {384, 128, VGPRRegBank}
+};
+
+const RegisterBankInfo::ValueMapping ValMappingsLoadSGPROnly[] {
+  /* 256-bit load */     {&LoadSGPROnlyBreakDown[0], 1},
+  /* 512-bit load */     {&LoadSGPROnlyBreakDown[1], 1},
+  /* <8 x i32> load  */  {&LoadSGPROnlyBreakDown[2], 8},
+  /* <16 x i32> load */  {&LoadSGPROnlyBreakDown[10], 16},
+  /* <4 x i64> load */   {&LoadSGPROnlyBreakDown[26], 4},
+  /* <8 x i64> load */   {&LoadSGPROnlyBreakDown[30], 8}
+};
+
+const RegisterBankInfo::ValueMapping *
+getValueMappingLoadSGPROnly(unsigned BankID, LLT SizeTy) {
+  unsigned Size = SizeTy.getSizeInBits();
+  if (Size < 256 || BankID == AMDGPU::SGPRRegBankID)
+    return getValueMapping(BankID, Size);
+
+  assert((Size == 256 || Size == 512) && BankID == AMDGPU::VGPRRegBankID);
+
+  // Default to using the non-split ValueMappings, we will use these if
+  // the register bank is SGPR or if we don't know how to handle the vector
+  // type.
+  unsigned Idx = Size == 256 ? 0 : 1;
+
+  // We need to split this load if it has a vgpr pointer.
+  if (BankID == AMDGPU::VGPRRegBankID) {
+    if (SizeTy == LLT::vector(8, 32))
+      Idx = 2;
+    else if (SizeTy == LLT::vector(16, 32))
+      Idx = 3;
+    else if (SizeTy == LLT::vector(4, 64))
+      Idx = 4;
+    else if (SizeTy == LLT::vector(8, 64))
+      Idx = 5;
+  }
+
+  return &ValMappingsLoadSGPROnly[Idx];
+}
+
+
 } // End AMDGPU namespace.
 } // End llvm namespace.
diff --git a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index c38b0e61558b..b31de0af5018 100644
--- a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -1,9 +1,8 @@
 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -240,23 +239,7 @@ MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
 Kernel::DebugProps::Metadata
 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
                                      const SIProgramInfo &ProgramInfo) const {
-  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
-  HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
-
-  if (!STM.debuggerSupported())
-    return HSADebugProps;
-
-  HSADebugProps.mDebuggerABIVersion.push_back(1);
-  HSADebugProps.mDebuggerABIVersion.push_back(0);
-
-  if (STM.debuggerEmitPrologue()) {
-    HSADebugProps.mPrivateSegmentBufferSGPR =
-        ProgramInfo.DebuggerPrivateSegmentBufferSGPR;
-    HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR =
-        ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
-  }
-
-  return HSADebugProps;
+  return HSAMD::Kernel::DebugProps::Metadata();
 }
 
 void MetadataStreamerV2::emitVersion() {
@@ -452,6 +435,10 @@ void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
     }
   }
+
+  // Emit the pointer argument for multi-grid object.
+  if (HiddenArgNumBytes >= 56)
+    emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenMultiGridSyncArg);
 }
 
 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
@@ -506,20 +493,16 @@ void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
   errs() << "AMDGPU HSA Metadata Parser Test: ";
 
-  std::shared_ptr<msgpack::Node> FromHSAMetadataString =
-      std::make_shared<msgpack::MapNode>();
+  msgpack::Document FromHSAMetadataString;
 
-  yaml::Input YIn(HSAMetadataString);
-  YIn >> FromHSAMetadataString;
-  if (YIn.error()) {
+  if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
     errs() << "FAIL\n";
     return;
   }
 
   std::string ToHSAMetadataString;
   raw_string_ostream StrOS(ToHSAMetadataString);
-  yaml::Output YOut(StrOS);
-  YOut << FromHSAMetadataString;
+  FromHSAMetadataString.toYAML(StrOS);
 
   errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
   if (HSAMetadataString != ToHSAMetadataString) {
@@ -653,23 +636,23 @@ std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
   }
 }
 
-std::shared_ptr<msgpack::ArrayNode>
+msgpack::ArrayDocNode
 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
-  auto Dims = std::make_shared<msgpack::ArrayNode>();
+  auto Dims = HSAMetadataDoc->getArrayNode();
   if (Node->getNumOperands() != 3)
     return Dims;
 
   for (auto &Op : Node->operands())
-    Dims->push_back(std::make_shared<msgpack::ScalarNode>(
-        mdconst::extract<ConstantInt>(Op)->getZExtValue()));
+    Dims.push_back(Dims.getDocument()->getNode(
+        uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
   return Dims;
 }
 
 void MetadataStreamerV3::emitVersion() {
-  auto Version = std::make_shared<msgpack::ArrayNode>();
-  Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMajor));
-  Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMinor));
-  getRootMetadata("amdhsa.version") = std::move(Version);
+  auto Version = HSAMetadataDoc->getArrayNode();
+  Version.push_back(Version.getDocument()->getNode(VersionMajor));
+  Version.push_back(Version.getDocument()->getNode(VersionMinor));
+  getRootMetadata("amdhsa.version") = Version;
 }
 
 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
@@ -677,16 +660,16 @@ void MetadataStreamerV3::emitPrintf(const Module &Mod) {
   if (!Node)
     return;
 
-  auto Printf = std::make_shared<msgpack::ArrayNode>();
+  auto Printf = HSAMetadataDoc->getArrayNode();
   for (auto Op : Node->operands())
     if (Op->getNumOperands())
-      Printf->push_back(std::make_shared<msgpack::ScalarNode>(
-          cast<MDString>(Op->getOperand(0))->getString()));
-  getRootMetadata("amdhsa.printf") = std::move(Printf);
+      Printf.push_back(Printf.getDocument()->getNode(
+          cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
+  getRootMetadata("amdhsa.printf") = Printf;
 }
 
 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
-                                            msgpack::MapNode &Kern) {
+                                            msgpack::MapDocNode Kern) {
   // TODO: What about other languages?
   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
   if (!Node || !Node->getNumOperands())
@@ -695,77 +678,50 @@ void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
   if (Op0->getNumOperands() <= 1)
     return;
 
-  Kern[".language"] = std::make_shared<msgpack::ScalarNode>("OpenCL C");
-  auto LanguageVersion = std::make_shared<msgpack::ArrayNode>();
-  LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
+  Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
+  auto LanguageVersion = Kern.getDocument()->getArrayNode();
+  LanguageVersion.push_back(Kern.getDocument()->getNode(
       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
-  LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
+  LanguageVersion.push_back(Kern.getDocument()->getNode(
       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
-  Kern[".language_version"] = std::move(LanguageVersion);
+  Kern[".language_version"] = LanguageVersion;
 }
 
 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
-                                         msgpack::MapNode &Kern) {
+                                         msgpack::MapDocNode Kern) {
 
   if (auto Node = Func.getMetadata("reqd_work_group_size"))
     Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
   if (auto Node = Func.getMetadata("work_group_size_hint"))
     Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
   if (auto Node = Func.getMetadata("vec_type_hint")) {
-    Kern[".vec_type_hint"] = std::make_shared<msgpack::ScalarNode>(getTypeName(
-        cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
-        mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()));
+    Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
+        getTypeName(
+            cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
+            mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
+        /*Copy=*/true);
   }
   if (Func.hasFnAttribute("runtime-handle")) {
-    Kern[".device_enqueue_symbol"] = std::make_shared<msgpack::ScalarNode>(
-        Func.getFnAttribute("runtime-handle").getValueAsString().str());
+    Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
+        Func.getFnAttribute("runtime-handle").getValueAsString().str(),
+        /*Copy=*/true);
   }
 }
 
 void MetadataStreamerV3::emitKernelArgs(const Function &Func,
-                                        msgpack::MapNode &Kern) {
+                                        msgpack::MapDocNode Kern) {
   unsigned Offset = 0;
-  auto Args = std::make_shared<msgpack::ArrayNode>();
+  auto Args = HSAMetadataDoc->getArrayNode();
   for (auto &Arg : Func.args())
-    emitKernelArg(Arg, Offset, *Args);
-
-  emitHiddenKernelArgs(Func, Offset, *Args);
-
-  // TODO: What about other languages?
-  if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) {
-    auto &DL = Func.getParent()->getDataLayout();
-    auto Int64Ty = Type::getInt64Ty(Func.getContext());
-
-    emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, *Args);
-    emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, *Args);
-    emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, *Args);
-
-    auto Int8PtrTy =
-        Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
+    emitKernelArg(Arg, Offset, Args);
 
-    // Emit "printf buffer" argument if printf is used, otherwise emit dummy
-    // "none" argument.
-    if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
-      emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, *Args);
-    else
-      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
+  emitHiddenKernelArgs(Func, Offset, Args);
 
-    // Emit "default queue" and "completion action" arguments if enqueue kernel
-    // is used, otherwise emit dummy "none" arguments.
-    if (Func.hasFnAttribute("calls-enqueue-kernel")) {
-      emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, *Args);
-      emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, *Args);
-    } else {
-      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
-      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
-    }
-  }
-
-  Kern[".args"] = std::move(Args);
+  Kern[".args"] = Args;
 }
 
 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
-                                       msgpack::ArrayNode &Args) {
+                                       msgpack::ArrayDocNode Args) {
   auto Func = Arg.getParent();
   auto ArgNo = Arg.getArgNo();
   const MDNode *Node;
@@ -822,36 +778,35 @@ void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
 
 void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
                                        StringRef ValueKind, unsigned &Offset,
-                                       msgpack::ArrayNode &Args,
+                                       msgpack::ArrayDocNode Args,
                                        unsigned PointeeAlign, StringRef Name,
                                        StringRef TypeName,
                                        StringRef BaseTypeName,
                                        StringRef AccQual, StringRef TypeQual) {
-  auto ArgPtr = std::make_shared<msgpack::MapNode>();
-  auto &Arg = *ArgPtr;
+  auto Arg = Args.getDocument()->getMapNode();
 
   if (!Name.empty())
-    Arg[".name"] = std::make_shared<msgpack::ScalarNode>(Name);
+    Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
   if (!TypeName.empty())
-    Arg[".type_name"] = std::make_shared<msgpack::ScalarNode>(TypeName);
+    Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
   auto Size = DL.getTypeAllocSize(Ty);
   auto Align = DL.getABITypeAlignment(Ty);
-  Arg[".size"] = std::make_shared<msgpack::ScalarNode>(Size);
+  Arg[".size"] = Arg.getDocument()->getNode(Size);
   Offset = alignTo(Offset, Align);
-  Arg[".offset"] = std::make_shared<msgpack::ScalarNode>(Offset);
+  Arg[".offset"] = Arg.getDocument()->getNode(Offset);
   Offset += Size;
-  Arg[".value_kind"] = std::make_shared<msgpack::ScalarNode>(ValueKind);
+  Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
   Arg[".value_type"] =
-      std::make_shared<msgpack::ScalarNode>(getValueType(Ty, BaseTypeName));
+      Arg.getDocument()->getNode(getValueType(Ty, BaseTypeName), /*Copy=*/true);
   if (PointeeAlign)
-    Arg[".pointee_align"] = std::make_shared<msgpack::ScalarNode>(PointeeAlign);
+    Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign);
 
   if (auto PtrTy = dyn_cast<PointerType>(Ty))
     if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
-      Arg[".address_space"] = std::make_shared<msgpack::ScalarNode>(*Qualifier);
+      Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
 
   if (auto AQ = getAccessQualifier(AccQual))
-    Arg[".access"] = std::make_shared<msgpack::ScalarNode>(*AQ);
+    Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
 
   // TODO: Emit Arg[".actual_access"].
 
@@ -859,21 +814,21 @@ void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
   TypeQual.split(SplitTypeQuals, " ", -1, false);
   for (StringRef Key : SplitTypeQuals) {
     if (Key == "const")
-      Arg[".is_const"] = std::make_shared<msgpack::ScalarNode>(true);
+      Arg[".is_const"] = Arg.getDocument()->getNode(true);
     else if (Key == "restrict")
-      Arg[".is_restrict"] = std::make_shared<msgpack::ScalarNode>(true);
+      Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
     else if (Key == "volatile")
-      Arg[".is_volatile"] = std::make_shared<msgpack::ScalarNode>(true);
+      Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
     else if (Key == "pipe")
-      Arg[".is_pipe"] = std::make_shared<msgpack::ScalarNode>(true);
+      Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
   }
 
-  Args.push_back(std::move(ArgPtr));
+  Args.push_back(Arg);
 }
 
 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
                                               unsigned &Offset,
-                                              msgpack::ArrayNode &Args) {
+                                              msgpack::ArrayDocNode Args) {
   int HiddenArgNumBytes =
       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
 
@@ -913,56 +868,58 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
     }
   }
+
+  // Emit the pointer argument for multi-grid object.
+  if (HiddenArgNumBytes >= 56)
+    emitKernelArg(DL, Int8PtrTy, "hidden_multigrid_sync_arg", Offset, Args);
 }
 
-std::shared_ptr<msgpack::MapNode>
+msgpack::MapDocNode
 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
                                       const SIProgramInfo &ProgramInfo) const {
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
   const Function &F = MF.getFunction();
 
-  auto HSAKernelProps = std::make_shared<msgpack::MapNode>();
-  auto &Kern = *HSAKernelProps;
+  auto Kern = HSAMetadataDoc->getMapNode();
 
   unsigned MaxKernArgAlign;
-  Kern[".kernarg_segment_size"] = std::make_shared<msgpack::ScalarNode>(
+  Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
       STM.getKernArgSegmentSize(F, MaxKernArgAlign));
   Kern[".group_segment_fixed_size"] =
-      std::make_shared<msgpack::ScalarNode>(ProgramInfo.LDSSize);
+      Kern.getDocument()->getNode(ProgramInfo.LDSSize);
   Kern[".private_segment_fixed_size"] =
-      std::make_shared<msgpack::ScalarNode>(ProgramInfo.ScratchSize);
+      Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
   Kern[".kernarg_segment_align"] =
-      std::make_shared<msgpack::ScalarNode>(std::max(uint32_t(4), MaxKernArgAlign));
+      Kern.getDocument()->getNode(std::max(uint32_t(4), MaxKernArgAlign));
   Kern[".wavefront_size"] =
-      std::make_shared<msgpack::ScalarNode>(STM.getWavefrontSize());
-  Kern[".sgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumSGPR);
-  Kern[".vgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumVGPR);
+      Kern.getDocument()->getNode(STM.getWavefrontSize());
+  Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
+  Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
   Kern[".max_flat_workgroup_size"] =
-      std::make_shared<msgpack::ScalarNode>(MFI.getMaxFlatWorkGroupSize());
+      Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
   Kern[".sgpr_spill_count"] =
-      std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledSGPRs());
+      Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
   Kern[".vgpr_spill_count"] =
-      std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledVGPRs());
+      Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
 
-  return HSAKernelProps;
+  return Kern;
 }
 
 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
-  return TargetStreamer.EmitHSAMetadata(getHSAMetadataRoot(), true);
+  return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
 }
 
 void MetadataStreamerV3::begin(const Module &Mod) {
   emitVersion();
   emitPrintf(Mod);
-  getRootMetadata("amdhsa.kernels").reset(new msgpack::ArrayNode());
+  getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
 }
 
 void MetadataStreamerV3::end() {
   std::string HSAMetadataString;
   raw_string_ostream StrOS(HSAMetadataString);
-  yaml::Output YOut(StrOS);
-  YOut << HSAMetadataRoot;
+  HSAMetadataDoc->toYAML(StrOS);
 
   if (DumpHSAMetadata)
     dump(StrOS.str());
@@ -973,25 +930,24 @@ void MetadataStreamerV3::end() {
 void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
                                     const SIProgramInfo &ProgramInfo) {
   auto &Func = MF.getFunction();
-  auto KernelProps = getHSAKernelProps(MF, ProgramInfo);
+  auto Kern = getHSAKernelProps(MF, ProgramInfo);
 
   assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
          Func.getCallingConv() == CallingConv::SPIR_KERNEL);
 
-  auto &KernelsNode = getRootMetadata("amdhsa.kernels");
-  auto Kernels = cast<msgpack::ArrayNode>(KernelsNode.get());
+  auto Kernels =
+      getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
 
   {
-    auto &Kern = *KernelProps;
-    Kern[".name"] = std::make_shared<msgpack::ScalarNode>(Func.getName());
-    Kern[".symbol"] = std::make_shared<msgpack::ScalarNode>(
-        (Twine(Func.getName()) + Twine(".kd")).str());
+    Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
+    Kern[".symbol"] = Kern.getDocument()->getNode(
+        (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
     emitKernelLanguage(Func, Kern);
     emitKernelAttrs(Func, Kern);
     emitKernelArgs(Func, Kern);
   }
 
-  Kernels->push_back(std::move(KernelProps));
+  Kernels.push_back(Kern);
 }
 
 } // end namespace HSAMD
diff --git a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index afc09baf952d..2eecddbd7b01 100644
--- a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -1,9 +1,8 @@
 //===--- AMDGPUHSAMetadataStreamer.h ----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,7 +18,7 @@
 #include "AMDGPU.h"
 #include "AMDKernelCodeT.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/BinaryFormat/MsgPackTypes.h"
+#include "llvm/BinaryFormat/MsgPackDocument.h"
 #include "llvm/Support/AMDGPUMetadata.h"
 
 namespace llvm {
@@ -52,8 +51,8 @@ public:
 
 class MetadataStreamerV3 final : public MetadataStreamer {
 private:
-  std::shared_ptr<msgpack::Node> HSAMetadataRoot =
-      std::make_shared<msgpack::MapNode>();
+  std::unique_ptr<msgpack::Document> HSAMetadataDoc =
+      llvm::make_unique<msgpack::Document>();
 
   void dump(StringRef HSAMetadataString) const;
 
@@ -70,41 +69,39 @@ private:
 
   std::string getTypeName(Type *Ty, bool Signed) const;
 
-  std::shared_ptr<msgpack::ArrayNode>
-  getWorkGroupDimensions(MDNode *Node) const;
+  msgpack::ArrayDocNode getWorkGroupDimensions(MDNode *Node) const;
 
-  std::shared_ptr<msgpack::MapNode>
-  getHSAKernelProps(const MachineFunction &MF,
-                    const SIProgramInfo &ProgramInfo) const;
+  msgpack::MapDocNode getHSAKernelProps(const MachineFunction &MF,
+                                        const SIProgramInfo &ProgramInfo) const;
 
   void emitVersion();
 
   void emitPrintf(const Module &Mod);
 
-  void emitKernelLanguage(const Function &Func, msgpack::MapNode &Kern);
+  void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern);
 
-  void emitKernelAttrs(const Function &Func, msgpack::MapNode &Kern);
+  void emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern);
 
-  void emitKernelArgs(const Function &Func, msgpack::MapNode &Kern);
+  void emitKernelArgs(const Function &Func, msgpack::MapDocNode Kern);
 
   void emitKernelArg(const Argument &Arg, unsigned &Offset,
-                     msgpack::ArrayNode &Args);
+                     msgpack::ArrayDocNode Args);
 
   void emitKernelArg(const DataLayout &DL, Type *Ty, StringRef ValueKind,
-                     unsigned &Offset, msgpack::ArrayNode &Args,
+                     unsigned &Offset, msgpack::ArrayDocNode Args,
                      unsigned PointeeAlign = 0, StringRef Name = "",
                      StringRef TypeName = "", StringRef BaseTypeName = "",
                      StringRef AccQual = "", StringRef TypeQual = "");
 
   void emitHiddenKernelArgs(const Function &Func, unsigned &Offset,
-                            msgpack::ArrayNode &Args);
+                            msgpack::ArrayDocNode Args);
 
-  std::shared_ptr<msgpack::Node> &getRootMetadata(StringRef Key) {
-    return (*cast<msgpack::MapNode>(HSAMetadataRoot.get()))[Key];
+  msgpack::DocNode &getRootMetadata(StringRef Key) {
+    return HSAMetadataDoc->getRoot().getMap(/*Convert=*/true)[Key];
   }
 
-  std::shared_ptr<msgpack::Node> &getHSAMetadataRoot() {
-    return HSAMetadataRoot;
+  msgpack::DocNode &getHSAMetadataRoot() {
+    return HSAMetadataDoc->getRoot();
   }
 
 public:
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index a0a045e72a58..ea730539f834 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //==-----------------------------------------------------------------------===//
 //
@@ -40,6 +39,9 @@
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/BasicBlock.h"
+#ifdef EXPENSIVE_CHECKS
+#include "llvm/IR/Dominators.h"
+#endif
 #include "llvm/IR/Instruction.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/Casting.h"
@@ -52,6 +54,8 @@
 #include <new>
 #include <vector>
 
+#define DEBUG_TYPE "isel"
+
 using namespace llvm;
 
 namespace llvm {
@@ -66,6 +70,57 @@ class R600InstrInfo;
 
 namespace {
 
+static bool isNullConstantOrUndef(SDValue V) {
+  if (V.isUndef())
+    return true;
+
+  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+  return Const != nullptr && Const->isNullValue();
+}
+
+static bool getConstantValue(SDValue N, uint32_t &Out) {
+  // This is only used for packed vectors, where ussing 0 for undef should
+  // always be good.
+  if (N.isUndef()) {
+    Out = 0;
+    return true;
+  }
+
+  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+    Out = C->getAPIntValue().getSExtValue();
+    return true;
+  }
+
+  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
+    Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
+    return true;
+  }
+
+  return false;
+}
+
+// TODO: Handle undef as zero
+static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
+                                 bool Negate = false) {
+  assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
+  uint32_t LHSVal, RHSVal;
+  if (getConstantValue(N->getOperand(0), LHSVal) &&
+      getConstantValue(N->getOperand(1), RHSVal)) {
+    SDLoc SL(N);
+    uint32_t K = Negate ?
+      (-LHSVal & 0xffff) | (-RHSVal << 16) :
+      (LHSVal & 0xffff) | (RHSVal << 16);
+    return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
+                              DAG.getTargetConstant(K, SL, MVT::i32));
+  }
+
+  return nullptr;
+}
+
+static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
+  return packConstantV2I16(N, DAG, true);
+}
+
 /// AMDGPU specific code to select AMDGPU machine instructions for
 /// SelectionDAG operations.
 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
@@ -84,12 +139,18 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AMDGPUArgumentUsageInfo>();
-    AU.addRequired<AMDGPUPerfHintAnalysis>();
     AU.addRequired<LegacyDivergenceAnalysis>();
+#ifdef EXPENSIVE_CHECKS
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
+#endif
     SelectionDAGISel::getAnalysisUsage(AU);
   }
 
+  bool matchLoadD16FromBuildVector(SDNode *N) const;
+
   bool runOnMachineFunction(MachineFunction &MF) override;
+  void PreprocessISelDAG() override;
   void Select(SDNode *N) override;
   StringRef getPassName() const override;
   void PostprocessISelDAG() override;
@@ -100,19 +161,24 @@ protected:
 private:
   std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
   bool isNoNanSrc(SDValue N) const;
-  bool isInlineImmediate(const SDNode *N) const;
+  bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
+  bool isNegInlineImmediate(const SDNode *N) const {
+    return isInlineImmediate(N, true);
+  }
+
   bool isVGPRImm(const SDNode *N) const;
   bool isUniformLoad(const SDNode *N) const;
   bool isUniformBr(const SDNode *N) const;
 
   MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
 
-  SDNode *glueCopyToM0(SDNode *N) const;
+  SDNode *glueCopyToM0LDSInit(SDNode *N) const;
+  SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
 
   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
   virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
   virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
-  bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
+  bool isDSOffsetLegal(SDValue Base, unsigned Offset,
                        unsigned OffsetBits) const;
   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
@@ -120,10 +186,10 @@ private:
   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
-                   SDValue &TFE) const;
+                   SDValue &TFE, SDValue &DLC) const;
   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
-                         SDValue &SLC, SDValue &TFE) const;
+                         SDValue &SLC, SDValue &TFE, SDValue &DLC) const;
   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
                          SDValue &SLC) const;
@@ -136,19 +202,19 @@ private:
 
   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
-                         SDValue &TFE) const;
+                         SDValue &TFE, SDValue &DLC) const;
   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
                          SDValue &Offset, SDValue &SLC) const;
   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
                          SDValue &Offset) const;
 
-  bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
+  bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr,
                         SDValue &Offset, SDValue &SLC) const;
-  bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
+  bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr,
                               SDValue &Offset, SDValue &SLC) const;
 
   template <bool IsSigned>
-  bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
+  bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
                         SDValue &Offset, SDValue &SLC) const;
 
   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
@@ -164,6 +230,7 @@ private:
   bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
 
   bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3Mods_f32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
   bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
   bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
@@ -193,11 +260,13 @@ private:
   bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
   bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
 
-  bool SelectHi16Elt(SDValue In, SDValue &Src) const;
+  SDValue getHi16Elt(SDValue In) const;
 
   void SelectADD_SUB_I64(SDNode *N);
+  void SelectAddcSubb(SDNode *N);
   void SelectUADDO_USUBO(SDNode *N);
   void SelectDIV_SCALE(SDNode *N);
+  void SelectDIV_FMAS(SDNode *N);
   void SelectMAD_64_32(SDNode *N);
   void SelectFMA_W_CHAIN(SDNode *N);
   void SelectFMUL_W_CHAIN(SDNode *N);
@@ -210,6 +279,10 @@ private:
   void SelectBRCOND(SDNode *N);
   void SelectFMAD_FMA(SDNode *N);
   void SelectATOMIC_CMP_SWAP(SDNode *N);
+  void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
+  void SelectDS_GWS(SDNode *N, unsigned IntrID);
+  void SelectINTRINSIC_W_CHAIN(SDNode *N);
+  void SelectINTRINSIC_VOID(SDNode *N);
 
 protected:
   // Include the pieces autogenerated from the target description.
@@ -235,11 +308,49 @@ public:
                           SDValue &Offset) override;
 
   bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void PreprocessISelDAG() override {}
+
 protected:
   // Include the pieces autogenerated from the target description.
 #include "R600GenDAGISel.inc"
 };
 
+static SDValue stripBitcast(SDValue Val) {
+  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
+}
+
+// Figure out if this is really an extract of the high 16-bits of a dword.
+static bool isExtractHiElt(SDValue In, SDValue &Out) {
+  In = stripBitcast(In);
+  if (In.getOpcode() != ISD::TRUNCATE)
+    return false;
+
+  SDValue Srl = In.getOperand(0);
+  if (Srl.getOpcode() == ISD::SRL) {
+    if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
+      if (ShiftAmt->getZExtValue() == 16) {
+        Out = stripBitcast(Srl.getOperand(0));
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+// Look through operations that obscure just looking at the low 16-bits of the
+// same register.
+static SDValue stripExtractLoElt(SDValue In) {
+  if (In.getOpcode() == ISD::TRUNCATE) {
+    SDValue Src = In.getOperand(0);
+    if (Src.getValueType().getSizeInBits() == 32)
+      return stripBitcast(Src);
+  }
+
+  return In;
+}
+
 }  // end anonymous namespace
 
 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
@@ -247,6 +358,10 @@ INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
 INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+#ifdef EXPENSIVE_CHECKS
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+#endif
 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
                     "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
 
@@ -265,10 +380,125 @@ FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
 }
 
 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+#ifdef EXPENSIVE_CHECKS
+  DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  for (auto &L : LI->getLoopsInPreorder()) {
+    assert(L->isLCSSAForm(DT));
+  }
+#endif
   Subtarget = &MF.getSubtarget<GCNSubtarget>();
   return SelectionDAGISel::runOnMachineFunction(MF);
 }
 
+bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
+  assert(Subtarget->d16PreservesUnusedBits());
+  MVT VT = N->getValueType(0).getSimpleVT();
+  if (VT != MVT::v2i16 && VT != MVT::v2f16)
+    return false;
+
+  SDValue Lo = N->getOperand(0);
+  SDValue Hi = N->getOperand(1);
+
+  LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
+
+  // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
+  // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
+  // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
+
+  // Need to check for possible indirect dependencies on the other half of the
+  // vector to avoid introducing a cycle.
+  if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
+    SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
+
+    SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
+    SDValue Ops[] = {
+      LdHi->getChain(), LdHi->getBasePtr(), TiedIn
+    };
+
+    unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
+    if (LdHi->getMemoryVT() == MVT::i8) {
+      LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
+        AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
+    } else {
+      assert(LdHi->getMemoryVT() == MVT::i16);
+    }
+
+    SDValue NewLoadHi =
+      CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
+                                  Ops, LdHi->getMemoryVT(),
+                                  LdHi->getMemOperand());
+
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
+    return true;
+  }
+
+  // build_vector (load ptr), hi -> load_d16_lo ptr, hi
+  // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
+  // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
+  LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
+  if (LdLo && Lo.hasOneUse()) {
+    SDValue TiedIn = getHi16Elt(Hi);
+    if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
+      return false;
+
+    SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
+    unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
+    if (LdLo->getMemoryVT() == MVT::i8) {
+      LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
+        AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
+    } else {
+      assert(LdLo->getMemoryVT() == MVT::i16);
+    }
+
+    TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
+
+    SDValue Ops[] = {
+      LdLo->getChain(), LdLo->getBasePtr(), TiedIn
+    };
+
+    SDValue NewLoadLo =
+      CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
+                                  Ops, LdLo->getMemoryVT(),
+                                  LdLo->getMemOperand());
+
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
+    return true;
+  }
+
+  return false;
+}
+
+void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
+  if (!Subtarget->d16PreservesUnusedBits())
+    return;
+
+  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
+
+  bool MadeChange = false;
+  while (Position != CurDAG->allnodes_begin()) {
+    SDNode *N = &*--Position;
+    if (N->use_empty())
+      continue;
+
+    switch (N->getOpcode()) {
+    case ISD::BUILD_VECTOR:
+      MadeChange |= matchLoadD16FromBuildVector(N);
+      break;
+    default:
+      break;
+    }
+  }
+
+  if (MadeChange) {
+    CurDAG->RemoveDeadNodes();
+    LLVM_DEBUG(dbgs() << "After PreProcess:\n";
+               CurDAG->dump(););
+  }
+}
+
 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
   if (TM.Options.NoNaNsFPMath)
     return true;
@@ -280,14 +510,26 @@ bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
   return CurDAG->isKnownNeverNaN(N);
 }
 
-bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
+bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
+                                           bool Negated) const {
+  if (N->isUndef())
+    return true;
+
   const SIInstrInfo *TII = Subtarget->getInstrInfo();
+  if (Negated) {
+    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
+      return TII->isInlineConstant(-C->getAPIntValue());
+
+    if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
+      return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
 
-  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
-    return TII->isInlineConstant(C->getAPIntValue());
+  } else {
+    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
+      return TII->isInlineConstant(C->getAPIntValue());
 
-  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
-    return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
+    if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
+      return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
+  }
 
   return false;
 }
@@ -340,37 +582,48 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
   }
 }
 
-SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
-  if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
-      !Subtarget->ldsRequiresM0Init())
-    return N;
-
+SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
   const SITargetLowering& Lowering =
-      *static_cast<const SITargetLowering*>(getTargetLowering());
+    *static_cast<const SITargetLowering*>(getTargetLowering());
 
-  // Write max value to m0 before each load operation
+  assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
 
-  SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
-                                 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
+  SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N),
+                                 Val);
 
   SDValue Glue = M0.getValue(1);
 
   SmallVector <SDValue, 8> Ops;
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-     Ops.push_back(N->getOperand(i));
-  }
+  Ops.push_back(M0); // Replace the chain.
+  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
+    Ops.push_back(N->getOperand(i));
+
   Ops.push_back(Glue);
   return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
 }
 
+SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
+  unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
+  if (AS == AMDGPUAS::LOCAL_ADDRESS) {
+    if (Subtarget->ldsRequiresM0Init())
+      return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
+  } else if (AS == AMDGPUAS::REGION_ADDRESS) {
+    MachineFunction &MF = CurDAG->getMachineFunction();
+    unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
+    return
+        glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
+  }
+  return N;
+}
+
 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
                                                   EVT VT) const {
   SDNode *Lo = CurDAG->getMachineNode(
       AMDGPU::S_MOV_B32, DL, MVT::i32,
-      CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
+      CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
   SDNode *Hi =
       CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
-                             CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
+                             CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
   const SDValue Ops[] = {
       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
@@ -385,31 +638,23 @@ static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
     return AMDGPU::SReg_32_XM0RegClassID;
   case 2:
     return AMDGPU::SReg_64RegClassID;
+  case 3:
+    return AMDGPU::SGPR_96RegClassID;
   case 4:
     return AMDGPU::SReg_128RegClassID;
+  case 5:
+    return AMDGPU::SGPR_160RegClassID;
   case 8:
     return AMDGPU::SReg_256RegClassID;
   case 16:
     return AMDGPU::SReg_512RegClassID;
+  case 32:
+    return AMDGPU::SReg_1024RegClassID;
   }
 
   llvm_unreachable("invalid vector size");
 }
 
-static bool getConstantValue(SDValue N, uint32_t &Out) {
-  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
-    Out = C->getAPIntValue().getZExtValue();
-    return true;
-  }
-
-  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
-    Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
-    return true;
-  }
-
-  return false;
-}
-
 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
   EVT VT = N->getValueType(0);
   unsigned NumVectorElts = VT.getVectorNumElements();
@@ -423,12 +668,12 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
     return;
   }
 
-  assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
+  assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
                                   "supported yet");
-  // 16 = Max Num Vector Elements
+  // 32 = Max Num Vector Elements
   // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
   // 1 = Vector Register Class
-  SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
+  SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
 
   RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
   bool IsRegSeq = true;
@@ -470,10 +715,10 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
 
   if (isa<AtomicSDNode>(N) ||
       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
-       Opc == AMDGPUISD::ATOMIC_LOAD_FADD ||
+       Opc == ISD::ATOMIC_LOAD_FADD ||
        Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
        Opc == AMDGPUISD::ATOMIC_LOAD_FMAX))
-    N = glueCopyToM0(N);
+    N = glueCopyToM0LDSInit(N);
 
   switch (Opc) {
   default:
@@ -491,6 +736,13 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
     SelectADD_SUB_I64(N);
     return;
   }
+  case ISD::ADDCARRY:
+  case ISD::SUBCARRY:
+    if (N->getValueType(0) != MVT::i32)
+      break;
+
+    SelectAddcSubb(N);
+    return;
   case ISD::UADDO:
   case ISD::USUBO: {
     SelectUADDO_USUBO(N);
@@ -511,12 +763,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
     unsigned NumVectorElts = VT.getVectorNumElements();
     if (VT.getScalarSizeInBits() == 16) {
       if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
-        uint32_t LHSVal, RHSVal;
-        if (getConstantValue(N->getOperand(0), LHSVal) &&
-            getConstantValue(N->getOperand(1), RHSVal)) {
-          uint32_t K = LHSVal | (RHSVal << 16);
-          CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
-                               CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
+        if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
+          ReplaceNode(N, Packed);
           return;
         }
       }
@@ -571,7 +819,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
   case ISD::STORE:
   case ISD::ATOMIC_LOAD:
   case ISD::ATOMIC_STORE: {
-    N = glueCopyToM0(N);
+    N = glueCopyToM0LDSInit(N);
     break;
   }
 
@@ -606,6 +854,10 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
     SelectDIV_SCALE(N);
     return;
   }
+  case AMDGPUISD::DIV_FMAS: {
+    SelectDIV_FMAS(N);
+    return;
+  }
   case AMDGPUISD::MAD_I64_I32:
   case AMDGPUISD::MAD_U64_U32: {
     SelectMAD_64_32(N);
@@ -649,6 +901,16 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
       SelectCode(N);
       return;
     }
+
+    break;
+  }
+  case ISD::INTRINSIC_W_CHAIN: {
+    SelectINTRINSIC_W_CHAIN(N);
+    return;
+  }
+  case ISD::INTRINSIC_VOID: {
+    SelectINTRINSIC_VOID(N);
+    return;
   }
   }
 
@@ -763,6 +1025,19 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
   ReplaceNode(N, RegSequence);
 }
 
+void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
+  SDLoc DL(N);
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  SDValue CI = N->getOperand(2);
+
+  unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
+                                                 : AMDGPU::V_SUBB_U32_e64;
+  CurDAG->SelectNodeTo(
+      N, Opc, N->getVTList(),
+      {LHS, RHS, CI, CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
+}
+
 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
   // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
   // carry out despite the _i32 name. These were renamed in VI to _U32.
@@ -770,8 +1045,10 @@ void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
   unsigned Opc = N->getOpcode() == ISD::UADDO ?
     AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
 
-  CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
-                       { N->getOperand(0), N->getOperand(1) });
+  CurDAG->SelectNodeTo(
+      N, Opc, N->getVTList(),
+      {N->getOperand(0), N->getOperand(1),
+       CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
 }
 
 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
@@ -816,6 +1093,35 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
 }
 
+void AMDGPUDAGToDAGISel::SelectDIV_FMAS(SDNode *N) {
+  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
+  const SIRegisterInfo *TRI = ST->getRegisterInfo();
+
+  SDLoc SL(N);
+  EVT VT = N->getValueType(0);
+
+  assert(VT == MVT::f32 || VT == MVT::f64);
+
+  unsigned Opc
+    = (VT == MVT::f64) ? AMDGPU::V_DIV_FMAS_F64 : AMDGPU::V_DIV_FMAS_F32;
+
+  SDValue CarryIn = N->getOperand(3);
+  // V_DIV_FMAS implicitly reads VCC.
+  SDValue VCC = CurDAG->getCopyToReg(CurDAG->getEntryNode(), SL,
+                                     TRI->getVCC(), CarryIn, SDValue());
+
+  SDValue Ops[10];
+
+  SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
+  SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
+  SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
+
+  Ops[8] = VCC;
+  Ops[9] = VCC.getValue(1);
+
+  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
+}
+
 // We need to handle this here because tablegen doesn't support matching
 // instructions with multiple outputs.
 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
@@ -829,13 +1135,13 @@ void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
 }
 
-bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
+bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset,
                                          unsigned OffsetBits) const {
   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
       (OffsetBits == 8 && !isUInt<8>(Offset)))
     return false;
 
-  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
+  if (Subtarget->hasUsableDSOffset() ||
       Subtarget->unsafeDSOffsetFoldingEnabled())
     return true;
 
@@ -871,13 +1177,20 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
                                       Zero, Addr.getOperand(1));
 
         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
+          SmallVector<SDValue, 3> Opnds;
+          Opnds.push_back(Zero);
+          Opnds.push_back(Addr.getOperand(1));
+
           // FIXME: Select to VOP3 version for with-carry.
-          unsigned SubOp = Subtarget->hasAddNoCarry() ?
-            AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
+          unsigned SubOp = AMDGPU::V_SUB_I32_e32;
+          if (Subtarget->hasAddNoCarry()) {
+            SubOp = AMDGPU::V_SUB_U32_e64;
+            Opnds.push_back(
+                CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
+          }
 
-          MachineSDNode *MachineSub
-            = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
-                                     Zero, Addr.getOperand(1));
+          MachineSDNode *MachineSub =
+              CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
 
           Base = SDValue(MachineSub, 0);
           Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
@@ -945,12 +1258,18 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
                                       Zero, Addr.getOperand(1));
 
         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
-          unsigned SubOp = Subtarget->hasAddNoCarry() ?
-            AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
+          SmallVector<SDValue, 3> Opnds;
+          Opnds.push_back(Zero);
+          Opnds.push_back(Addr.getOperand(1));
+          unsigned SubOp = AMDGPU::V_SUB_I32_e32;
+          if (Subtarget->hasAddNoCarry()) {
+            SubOp = AMDGPU::V_SUB_U32_e64;
+            Opnds.push_back(
+                CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
+          }
 
           MachineSDNode *MachineSub
-            = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
-                                     Zero, Addr.getOperand(1));
+            = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
 
           Base = SDValue(MachineSub, 0);
           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
@@ -989,7 +1308,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
                                      SDValue &Offset, SDValue &Offen,
                                      SDValue &Idxen, SDValue &Addr64,
                                      SDValue &GLC, SDValue &SLC,
-                                     SDValue &TFE) const {
+                                     SDValue &TFE, SDValue &DLC) const {
   // Subtarget prefers to use flat instruction
   if (Subtarget->useFlatForGlobal())
     return false;
@@ -1001,6 +1320,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
   if (!SLC.getNode())
     SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
+  DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
 
   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
@@ -1079,15 +1399,16 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
                                            SDValue &VAddr, SDValue &SOffset,
                                            SDValue &Offset, SDValue &GLC,
-                                           SDValue &SLC, SDValue &TFE) const {
+                                           SDValue &SLC, SDValue &TFE,
+                                           SDValue &DLC) const {
   SDValue Ptr, Offen, Idxen, Addr64;
 
   // addr64 bit was removed for volcanic islands.
-  if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+  if (!Subtarget->hasAddr64())
     return false;
 
   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
-              GLC, SLC, TFE))
+              GLC, SLC, TFE, DLC))
     return false;
 
   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
@@ -1109,9 +1430,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
                                            SDValue &Offset,
                                            SDValue &SLC) const {
   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
-  SDValue GLC, TFE;
+  SDValue GLC, TFE, DLC;
 
-  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
+  return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC);
 }
 
 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
@@ -1127,10 +1448,10 @@ std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const
     SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
                                               FI->getValueType(0));
 
-    // If we can resolve this to a frame index access, this is relative to the
-    // frame pointer SGPR.
-    return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
-                                                   MVT::i32));
+    // If we can resolve this to a frame index access, this will be relative to
+    // either the stack or frame pointer SGPR.
+    return std::make_pair(
+        TFI, CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32));
   }
 
   // If we don't know this private access is a local stack object, it needs to
@@ -1236,13 +1557,13 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
                                            SDValue &SOffset, SDValue &Offset,
                                            SDValue &GLC, SDValue &SLC,
-                                           SDValue &TFE) const {
+                                           SDValue &TFE, SDValue &DLC) const {
   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
   const SIInstrInfo *TII =
     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
 
   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
-              GLC, SLC, TFE))
+              GLC, SLC, TFE, DLC))
     return false;
 
   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
@@ -1264,57 +1585,42 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
                                            SDValue &Soffset, SDValue &Offset
                                            ) const {
-  SDValue GLC, SLC, TFE;
+  SDValue GLC, SLC, TFE, DLC;
 
-  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
+  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
 }
 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
                                            SDValue &Soffset, SDValue &Offset,
                                            SDValue &SLC) const {
-  SDValue GLC, TFE;
+  SDValue GLC, TFE, DLC;
 
-  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
+  return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
 }
 
 template <bool IsSigned>
-bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
+bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
+                                          SDValue Addr,
                                           SDValue &VAddr,
                                           SDValue &Offset,
                                           SDValue &SLC) const {
-  int64_t OffsetVal = 0;
-
-  if (Subtarget->hasFlatInstOffsets() &&
-      CurDAG->isBaseWithConstantOffset(Addr)) {
-    SDValue N0 = Addr.getOperand(0);
-    SDValue N1 = Addr.getOperand(1);
-    int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
-
-    if ((IsSigned && isInt<13>(COffsetVal)) ||
-        (!IsSigned && isUInt<12>(COffsetVal))) {
-      Addr = N0;
-      OffsetVal = COffsetVal;
-    }
-  }
-
-  VAddr = Addr;
-  Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
-  SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
-
-  return true;
+  return static_cast<const SITargetLowering*>(getTargetLowering())->
+    SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC);
 }
 
-bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
+bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N,
+                                          SDValue Addr,
                                           SDValue &VAddr,
                                           SDValue &Offset,
                                           SDValue &SLC) const {
-  return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
+  return SelectFlatOffset<false>(N, Addr, VAddr, Offset, SLC);
 }
 
-bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
+bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
+                                          SDValue Addr,
                                           SDValue &VAddr,
                                           SDValue &Offset,
                                           SDValue &SLC) const {
-  return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
+  return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC);
 }
 
 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
@@ -1619,9 +1925,12 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
     return;
   }
 
+  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
+  const SIRegisterInfo *TRI = ST->getRegisterInfo();
+
   bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
   unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
-  unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC;
+  unsigned CondReg = UseSCCBr ? (unsigned)AMDGPU::SCC : TRI->getVCC();
   SDLoc SL(N);
 
   if (!UseSCCBr) {
@@ -1638,9 +1947,13 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
     // the S_AND when is unnecessary. But it would be better to add a separate
     // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
     // catches both cases.
-    Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
-                               CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
-                               Cond),
+    Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
+                                                         : AMDGPU::S_AND_B64,
+                     SL, MVT::i1,
+                     CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
+                                                        : AMDGPU::EXEC,
+                                         MVT::i1),
+                    Cond),
                    0);
   }
 
@@ -1761,6 +2074,183 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
   CurDAG->RemoveDeadNode(N);
 }
 
+void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
+  // The address is assumed to be uniform, so if it ends up in a VGPR, it will
+  // be copied to an SGPR with readfirstlane.
+  unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
+    AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
+
+  SDValue Chain = N->getOperand(0);
+  SDValue Ptr = N->getOperand(2);
+  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
+  MachineMemOperand *MMO = M->getMemOperand();
+  bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
+
+  SDValue Offset;
+  if (CurDAG->isBaseWithConstantOffset(Ptr)) {
+    SDValue PtrBase = Ptr.getOperand(0);
+    SDValue PtrOffset = Ptr.getOperand(1);
+
+    const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
+    if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) {
+      N = glueCopyToM0(N, PtrBase);
+      Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
+    }
+  }
+
+  if (!Offset) {
+    N = glueCopyToM0(N, Ptr);
+    Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
+  }
+
+  SDValue Ops[] = {
+    Offset,
+    CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
+    Chain,
+    N->getOperand(N->getNumOperands() - 1) // New glue
+  };
+
+  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
+}
+
+static unsigned gwsIntrinToOpcode(unsigned IntrID) {
+  switch (IntrID) {
+  case Intrinsic::amdgcn_ds_gws_init:
+    return AMDGPU::DS_GWS_INIT;
+  case Intrinsic::amdgcn_ds_gws_barrier:
+    return AMDGPU::DS_GWS_BARRIER;
+  case Intrinsic::amdgcn_ds_gws_sema_v:
+    return AMDGPU::DS_GWS_SEMA_V;
+  case Intrinsic::amdgcn_ds_gws_sema_br:
+    return AMDGPU::DS_GWS_SEMA_BR;
+  case Intrinsic::amdgcn_ds_gws_sema_p:
+    return AMDGPU::DS_GWS_SEMA_P;
+  case Intrinsic::amdgcn_ds_gws_sema_release_all:
+    return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
+  default:
+    llvm_unreachable("not a gws intrinsic");
+  }
+}
+
+void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
+  if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
+      !Subtarget->hasGWSSemaReleaseAll()) {
+    // Let this error.
+    SelectCode(N);
+    return;
+  }
+
+  // Chain, intrinsic ID, vsrc, offset
+  const bool HasVSrc = N->getNumOperands() == 4;
+  assert(HasVSrc || N->getNumOperands() == 3);
+
+  SDLoc SL(N);
+  SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
+  int ImmOffset = 0;
+  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
+  MachineMemOperand *MMO = M->getMemOperand();
+
+  // Don't worry if the offset ends up in a VGPR. Only one lane will have
+  // effect, so SIFixSGPRCopies will validly insert readfirstlane.
+
+  // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
+  // offset field) % 64. Some versions of the programming guide omit the m0
+  // part, or claim it's from offset 0.
+  if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
+    // If we have a constant offset, try to use the default value for m0 as a
+    // base to possibly avoid setting it up.
+    glueCopyToM0(N, CurDAG->getTargetConstant(-1, SL, MVT::i32));
+    ImmOffset = ConstOffset->getZExtValue() + 1;
+  } else {
+    if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
+      ImmOffset = BaseOffset.getConstantOperandVal(1);
+      BaseOffset = BaseOffset.getOperand(0);
+    }
+
+    // Prefer to do the shift in an SGPR since it should be possible to use m0
+    // as the result directly. If it's already an SGPR, it will be eliminated
+    // later.
+    SDNode *SGPROffset
+      = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
+                               BaseOffset);
+    // Shift to offset in m0
+    SDNode *M0Base
+      = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
+                               SDValue(SGPROffset, 0),
+                               CurDAG->getTargetConstant(16, SL, MVT::i32));
+    glueCopyToM0(N, SDValue(M0Base, 0));
+  }
+
+  SDValue V0;
+  SDValue Chain = N->getOperand(0);
+  SDValue Glue;
+  if (HasVSrc) {
+    SDValue VSrc0 = N->getOperand(2);
+
+    // The manual doesn't mention this, but it seems only v0 works.
+    V0 = CurDAG->getRegister(AMDGPU::VGPR0, MVT::i32);
+
+    SDValue CopyToV0 = CurDAG->getCopyToReg(
+      N->getOperand(0), SL, V0, VSrc0,
+      N->getOperand(N->getNumOperands() - 1));
+    Chain = CopyToV0;
+    Glue = CopyToV0.getValue(1);
+  }
+
+  SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
+
+  // TODO: Can this just be removed from the instruction?
+  SDValue GDS = CurDAG->getTargetConstant(1, SL, MVT::i1);
+
+  const unsigned Opc = gwsIntrinToOpcode(IntrID);
+  SmallVector<SDValue, 5> Ops;
+  if (HasVSrc)
+    Ops.push_back(V0);
+  Ops.push_back(OffsetField);
+  Ops.push_back(GDS);
+  Ops.push_back(Chain);
+
+  if (HasVSrc)
+    Ops.push_back(Glue);
+
+  SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
+}
+
+void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
+  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+  switch (IntrID) {
+  case Intrinsic::amdgcn_ds_append:
+  case Intrinsic::amdgcn_ds_consume: {
+    if (N->getValueType(0) != MVT::i32)
+      break;
+    SelectDSAppendConsume(N, IntrID);
+    return;
+  }
+  }
+
+  SelectCode(N);
+}
+
+void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
+  unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+  switch (IntrID) {
+  case Intrinsic::amdgcn_ds_gws_init:
+  case Intrinsic::amdgcn_ds_gws_barrier:
+  case Intrinsic::amdgcn_ds_gws_sema_v:
+  case Intrinsic::amdgcn_ds_gws_sema_br:
+  case Intrinsic::amdgcn_ds_gws_sema_p:
+  case Intrinsic::amdgcn_ds_gws_sema_release_all:
+    SelectDS_GWS(N, IntrID);
+    return;
+  default:
+    break;
+  }
+
+  SelectCode(N);
+}
+
 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
                                             unsigned &Mods) const {
   Mods = 0;
@@ -1796,6 +2286,15 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
   return isNoNanSrc(Src);
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3Mods_f32(SDValue In, SDValue &Src,
+                                            SDValue &SrcMods) const {
+  if (In.getValueType() == MVT::f32)
+    return SelectVOP3Mods(In, Src, SrcMods);
+  Src = In;
+  SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);;
+  return true;
+}
+
 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
   if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
     return false;
@@ -1833,41 +2332,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
   return true;
 }
 
-static SDValue stripBitcast(SDValue Val) {
-  return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
-}
-
-// Figure out if this is really an extract of the high 16-bits of a dword.
-static bool isExtractHiElt(SDValue In, SDValue &Out) {
-  In = stripBitcast(In);
-  if (In.getOpcode() != ISD::TRUNCATE)
-    return false;
-
-  SDValue Srl = In.getOperand(0);
-  if (Srl.getOpcode() == ISD::SRL) {
-    if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
-      if (ShiftAmt->getZExtValue() == 16) {
-        Out = stripBitcast(Srl.getOperand(0));
-        return true;
-      }
-    }
-  }
-
-  return false;
-}
-
-// Look through operations that obscure just looking at the low 16-bits of the
-// same register.
-static SDValue stripExtractLoElt(SDValue In) {
-  if (In.getOpcode() == ISD::TRUNCATE) {
-    SDValue Src = In.getOperand(0);
-    if (Src.getValueType().getSizeInBits() == 32)
-      return stripBitcast(Src);
-  }
-
-  return In;
-}
-
 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
                                          SDValue &SrcMods) const {
   unsigned Mods = 0;
@@ -2020,39 +2484,31 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
   return true;
 }
 
-// TODO: Can we identify things like v_mad_mixhi_f16?
-bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const {
-  if (In.isUndef()) {
-    Src = In;
-    return true;
-  }
+SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
+  if (In.isUndef())
+    return CurDAG->getUNDEF(MVT::i32);
 
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
     SDLoc SL(In);
-    SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32);
-    MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
-                                                 SL, MVT::i32, K);
-    Src = SDValue(MovK, 0);
-    return true;
+    return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
   }
 
   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
     SDLoc SL(In);
-    SDValue K = CurDAG->getTargetConstant(
+    return CurDAG->getConstant(
       C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
-    MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
-                                                 SL, MVT::i32, K);
-    Src = SDValue(MovK, 0);
-    return true;
   }
 
-  return isExtractHiElt(In, Src);
+  SDValue Src;
+  if (isExtractHiElt(In, Src))
+    return Src;
+
+  return SDValue();
 }
 
 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
-  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
-    return false;
-  }
+  assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
+
   const SIRegisterInfo *SIRI =
     static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
   const SIInstrInfo * SII =
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 6951c915b177..39016ed37193 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -21,7 +20,6 @@
 #include "AMDGPU.h"
 #include "AMDGPUCallLowering.h"
 #include "AMDGPUFrameLowering.h"
-#include "AMDGPUIntrinsicInfo.h"
 #include "AMDGPURegisterInfo.h"
 #include "AMDGPUSubtarget.h"
 #include "AMDGPUTargetMachine.h"
@@ -65,9 +63,9 @@ static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT,
   case MVT::v2f32:
   case MVT::v4i16:
   case MVT::v4f16: {
-    // Up to SGPR0-SGPR39
+    // Up to SGPR0-SGPR105
     return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
-                          &AMDGPU::SGPR_64RegClass, 20);
+                          &AMDGPU::SGPR_64RegClass, 53);
   }
   default:
     return false;
@@ -152,15 +150,24 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
   AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
 
+  setOperationAction(ISD::LOAD, MVT::v3f32, Promote);
+  AddPromotedToType(ISD::LOAD, MVT::v3f32, MVT::v3i32);
+
   setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
   AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
 
+  setOperationAction(ISD::LOAD, MVT::v5f32, Promote);
+  AddPromotedToType(ISD::LOAD, MVT::v5f32, MVT::v5i32);
+
   setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
   AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
 
   setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
   AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
 
+  setOperationAction(ISD::LOAD, MVT::v32f32, Promote);
+  AddPromotedToType(ISD::LOAD, MVT::v32f32, MVT::v32i32);
+
   setOperationAction(ISD::LOAD, MVT::i64, Promote);
   AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
 
@@ -237,15 +244,24 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::STORE, MVT::v2f32, Promote);
   AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
 
+  setOperationAction(ISD::STORE, MVT::v3f32, Promote);
+  AddPromotedToType(ISD::STORE, MVT::v3f32, MVT::v3i32);
+
   setOperationAction(ISD::STORE, MVT::v4f32, Promote);
   AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
 
+  setOperationAction(ISD::STORE, MVT::v5f32, Promote);
+  AddPromotedToType(ISD::STORE, MVT::v5f32, MVT::v5i32);
+
   setOperationAction(ISD::STORE, MVT::v8f32, Promote);
   AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
 
   setOperationAction(ISD::STORE, MVT::v16f32, Promote);
   AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
 
+  setOperationAction(ISD::STORE, MVT::v32f32, Promote);
+  AddPromotedToType(ISD::STORE, MVT::v32f32, MVT::v32i32);
+
   setOperationAction(ISD::STORE, MVT::i64, Promote);
   AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
 
@@ -327,16 +343,28 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   // Expand to fneg + fadd.
   setOperationAction(ISD::FSUB, MVT::f64, Expand);
 
+  setOperationAction(ISD::CONCAT_VECTORS, MVT::v3i32, Custom);
+  setOperationAction(ISD::CONCAT_VECTORS, MVT::v3f32, Custom);
   setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
   setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
+  setOperationAction(ISD::CONCAT_VECTORS, MVT::v5i32, Custom);
+  setOperationAction(ISD::CONCAT_VECTORS, MVT::v5f32, Custom);
   setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom);
   setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3f32, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3i32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5f32, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5i32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f32, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i32, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i32, Custom);
 
   setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
   setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
@@ -394,7 +422,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
 
   static const MVT::SimpleValueType VectorIntTypes[] = {
-    MVT::v2i32, MVT::v4i32
+    MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32
   };
 
   for (MVT VT : VectorIntTypes) {
@@ -436,7 +464,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   }
 
   static const MVT::SimpleValueType FloatVectorTypes[] = {
-    MVT::v2f32, MVT::v4f32
+     MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32
   };
 
   for (MVT VT : FloatVectorTypes) {
@@ -478,9 +506,15 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SELECT, MVT::v2f32, Promote);
   AddPromotedToType(ISD::SELECT, MVT::v2f32, MVT::v2i32);
 
+  setOperationAction(ISD::SELECT, MVT::v3f32, Promote);
+  AddPromotedToType(ISD::SELECT, MVT::v3f32, MVT::v3i32);
+
   setOperationAction(ISD::SELECT, MVT::v4f32, Promote);
   AddPromotedToType(ISD::SELECT, MVT::v4f32, MVT::v4i32);
 
+  setOperationAction(ISD::SELECT, MVT::v5f32, Promote);
+  AddPromotedToType(ISD::SELECT, MVT::v5f32, MVT::v5i32);
+
   // There are no libcalls of any kind.
   for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I)
     setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
@@ -499,6 +533,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   // vector compares until that is fixed.
   setHasMultipleConditionRegisters(true);
 
+  setMinCmpXchgSizeInBits(32);
+  setSupportsUnalignedAtomics(false);
+
   PredictableSelectIsExpensive = false;
 
   // We want to find all load dependencies for long chains of stores to enable
@@ -592,6 +629,7 @@ static bool hasSourceMods(const SDNode *N) {
   case ISD::FDIV:
   case ISD::FREM:
   case ISD::INLINEASM:
+  case ISD::INLINEASM_BR:
   case AMDGPUISD::INTERP_P1:
   case AMDGPUISD::INTERP_P2:
   case AMDGPUISD::DIV_SCALE:
@@ -640,7 +678,8 @@ bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const {
 
 // The backend supports 32 and 64 bit floating point immediates.
 // FIXME: Why are we reporting vectors of FP immediates as legal?
-bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+                                        bool ForCodeSize) const {
   EVT ScalarVT = VT.getScalarType();
   return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64 ||
          (ScalarVT == MVT::f16 && Subtarget->has16BitInsts()));
@@ -690,8 +729,9 @@ bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
   return (OldSize < 32);
 }
 
-bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
-                                                   EVT CastTy) const {
+bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy,
+                                                   const SelectionDAG &DAG,
+                                                   const MachineMemOperand &MMO) const {
 
   assert(LoadTy.getSizeInBits() == CastTy.getSizeInBits());
 
@@ -701,8 +741,12 @@ bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
   unsigned LScalarSize = LoadTy.getScalarSizeInBits();
   unsigned CastScalarSize = CastTy.getScalarSizeInBits();
 
-  return (LScalarSize < CastScalarSize) ||
-         (CastScalarSize >= 32);
+  if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32))
+    return false;
+
+  bool Fast = false;
+  return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), CastTy,
+                            MMO, &Fast) && Fast;
 }
 
 // SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also
@@ -849,9 +893,6 @@ bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
 CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
                                                   bool IsVarArg) {
   switch (CC) {
-  case CallingConv::AMDGPU_KERNEL:
-  case CallingConv::SPIR_KERNEL:
-    llvm_unreachable("kernels should not be handled here");
   case CallingConv::AMDGPU_VS:
   case CallingConv::AMDGPU_GS:
   case CallingConv::AMDGPU_PS:
@@ -864,8 +905,10 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
   case CallingConv::Fast:
   case CallingConv::Cold:
     return CC_AMDGPU_Func;
+  case CallingConv::AMDGPU_KERNEL:
+  case CallingConv::SPIR_KERNEL:
   default:
-    report_fatal_error("Unsupported calling convention.");
+    report_fatal_error("Unsupported calling convention for call");
   }
 }
 
@@ -1010,9 +1053,10 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
       if (MemVT.isVector() && MemVT.getVectorNumElements() == 1)
         MemVT = MemVT.getScalarType();
 
-      if (MemVT.isExtended()) {
-        // This should really only happen if we have vec3 arguments
-        assert(MemVT.isVector() && MemVT.getVectorNumElements() == 3);
+      // Round up vec3/vec5 argument.
+      if (MemVT.isVector() && !MemVT.isPow2VectorType()) {
+        assert(MemVT.getVectorNumElements() == 3 ||
+               MemVT.getVectorNumElements() == 5);
         MemVT = MemVT.getPow2VectorType(State.getContext());
       }
 
@@ -1372,6 +1416,41 @@ SDValue AMDGPUTargetLowering::getHiHalf64(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
 }
 
+// Split a vector type into two parts. The first part is a power of two vector.
+// The second part is whatever is left over, and is a scalar if it would
+// otherwise be a 1-vector.
+std::pair<EVT, EVT>
+AMDGPUTargetLowering::getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const {
+  EVT LoVT, HiVT;
+  EVT EltVT = VT.getVectorElementType();
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned LoNumElts = PowerOf2Ceil((NumElts + 1) / 2);
+  LoVT = EVT::getVectorVT(*DAG.getContext(), EltVT, LoNumElts);
+  HiVT = NumElts - LoNumElts == 1
+             ? EltVT
+             : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts - LoNumElts);
+  return std::make_pair(LoVT, HiVT);
+}
+
+// Split a vector value into two parts of types LoVT and HiVT. HiVT could be
+// scalar.
+std::pair<SDValue, SDValue>
+AMDGPUTargetLowering::splitVector(const SDValue &N, const SDLoc &DL,
+                                  const EVT &LoVT, const EVT &HiVT,
+                                  SelectionDAG &DAG) const {
+  assert(LoVT.getVectorNumElements() +
+                 (HiVT.isVector() ? HiVT.getVectorNumElements() : 1) <=
+             N.getValueType().getVectorNumElements() &&
+         "More vector elements requested than available!");
+  auto IdxTy = getVectorIdxTy(DAG.getDataLayout());
+  SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N,
+                           DAG.getConstant(0, DL, IdxTy));
+  SDValue Hi = DAG.getNode(
+      HiVT.isVector() ? ISD::EXTRACT_SUBVECTOR : ISD::EXTRACT_VECTOR_ELT, DL,
+      HiVT, N, DAG.getConstant(LoVT.getVectorNumElements(), DL, IdxTy));
+  return std::make_pair(Lo, Hi);
+}
+
 SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
                                               SelectionDAG &DAG) const {
   LoadSDNode *Load = cast<LoadSDNode>(Op);
@@ -1393,9 +1472,9 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
   EVT LoMemVT, HiMemVT;
   SDValue Lo, Hi;
 
-  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
-  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
-  std::tie(Lo, Hi) = DAG.SplitVector(Op, SL, LoVT, HiVT);
+  std::tie(LoVT, HiVT) = getSplitDestVTs(VT, DAG);
+  std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG);
+  std::tie(Lo, Hi) = splitVector(Op, SL, LoVT, HiVT, DAG);
 
   unsigned Size = LoMemVT.getStoreSize();
   unsigned BaseAlign = Load->getAlignment();
@@ -1410,15 +1489,52 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
                      HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()),
                      HiMemVT, HiAlign, Load->getMemOperand()->getFlags());
 
-  SDValue Ops[] = {
-    DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad),
-    DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
-                LoLoad.getValue(1), HiLoad.getValue(1))
-  };
+  auto IdxTy = getVectorIdxTy(DAG.getDataLayout());
+  SDValue Join;
+  if (LoVT == HiVT) {
+    // This is the case that the vector is power of two so was evenly split.
+    Join = DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad);
+  } else {
+    Join = DAG.getNode(ISD::INSERT_SUBVECTOR, SL, VT, DAG.getUNDEF(VT), LoLoad,
+                       DAG.getConstant(0, SL, IdxTy));
+    Join = DAG.getNode(HiVT.isVector() ? ISD::INSERT_SUBVECTOR
+                                       : ISD::INSERT_VECTOR_ELT,
+                       SL, VT, Join, HiLoad,
+                       DAG.getConstant(LoVT.getVectorNumElements(), SL, IdxTy));
+  }
+
+  SDValue Ops[] = {Join, DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
+                                     LoLoad.getValue(1), HiLoad.getValue(1))};
 
   return DAG.getMergeValues(Ops, SL);
 }
 
+// Widen a vector load from vec3 to vec4.
+SDValue AMDGPUTargetLowering::WidenVectorLoad(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  LoadSDNode *Load = cast<LoadSDNode>(Op);
+  EVT VT = Op.getValueType();
+  assert(VT.getVectorNumElements() == 3);
+  SDValue BasePtr = Load->getBasePtr();
+  EVT MemVT = Load->getMemoryVT();
+  SDLoc SL(Op);
+  const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();
+  unsigned BaseAlign = Load->getAlignment();
+
+  EVT WideVT =
+      EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), 4);
+  EVT WideMemVT =
+      EVT::getVectorVT(*DAG.getContext(), MemVT.getVectorElementType(), 4);
+  SDValue WideLoad = DAG.getExtLoad(
+      Load->getExtensionType(), SL, WideVT, Load->getChain(), BasePtr, SrcValue,
+      WideMemVT, BaseAlign, Load->getMemOperand()->getFlags());
+  return DAG.getMergeValues(
+      {DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, VT, WideLoad,
+                   DAG.getConstant(0, SL, getVectorIdxTy(DAG.getDataLayout()))),
+       WideLoad.getValue(1)},
+      SL);
+}
+
 SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
                                                SelectionDAG &DAG) const {
   StoreSDNode *Store = cast<StoreSDNode>(Op);
@@ -1439,9 +1555,9 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
   EVT LoMemVT, HiMemVT;
   SDValue Lo, Hi;
 
-  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
-  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
-  std::tie(Lo, Hi) = DAG.SplitVector(Val, SL, LoVT, HiVT);
+  std::tie(LoVT, HiVT) = getSplitDestVTs(VT, DAG);
+  std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG);
+  std::tie(Lo, Hi) = splitVector(Val, SL, LoVT, HiVT, DAG);
 
   SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, LoMemVT.getStoreSize());
 
@@ -2788,6 +2904,54 @@ bool AMDGPUTargetLowering::shouldCombineMemoryType(EVT VT) const {
   return true;
 }
 
+// Find a load or store from corresponding pattern root.
+// Roots may be build_vector, bitconvert or their combinations.
+static MemSDNode* findMemSDNode(SDNode *N) {
+  N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
+  if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
+    return MN;
+  assert(isa<BuildVectorSDNode>(N));
+  for (SDValue V : N->op_values())
+    if (MemSDNode *MN =
+          dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
+      return MN;
+  llvm_unreachable("cannot find MemSDNode in the pattern!");
+}
+
+bool AMDGPUTargetLowering::SelectFlatOffset(bool IsSigned,
+                                            SelectionDAG &DAG,
+                                            SDNode *N,
+                                            SDValue Addr,
+                                            SDValue &VAddr,
+                                            SDValue &Offset,
+                                            SDValue &SLC) const {
+  const GCNSubtarget &ST =
+        DAG.getMachineFunction().getSubtarget<GCNSubtarget>();
+  int64_t OffsetVal = 0;
+
+  if (ST.hasFlatInstOffsets() &&
+      (!ST.hasFlatSegmentOffsetBug() ||
+       findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) &&
+      DAG.isBaseWithConstantOffset(Addr)) {
+    SDValue N0 = Addr.getOperand(0);
+    SDValue N1 = Addr.getOperand(1);
+    int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
+
+    const SIInstrInfo *TII = ST.getInstrInfo();
+    if (TII->isLegalFLATOffset(COffsetVal, findMemSDNode(N)->getAddressSpace(),
+                               IsSigned)) {
+      Addr = N0;
+      OffsetVal = COffsetVal;
+    }
+  }
+
+  VAddr = Addr;
+  Offset = DAG.getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
+  SLC = DAG.getTargetConstant(0, SDLoc(), MVT::i1);
+
+  return true;
+}
+
 // Replace load of an illegal type with a store of a bitcast to a friendlier
 // type.
 SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
@@ -2812,7 +2976,8 @@ SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
     // Expand unaligned loads earlier than legalization. Due to visitation order
     // problems during legalization, the emitted instructions to pack and unpack
     // the bytes again are not eliminated in the case of an unaligned copy.
-    if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) {
+    if (!allowsMisalignedMemoryAccesses(
+            VT, AS, Align, LN->getMemOperand()->getFlags(), &IsFast)) {
       if (VT.isVector())
         return scalarizeVectorLoad(LN, DAG);
 
@@ -2864,7 +3029,8 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
     // order problems during legalization, the emitted instructions to pack and
     // unpack the bytes again are not eliminated in the case of an unaligned
     // copy.
-    if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) {
+    if (!allowsMisalignedMemoryAccesses(
+            VT, AS, Align, SN->getMemOperand()->getFlags(), &IsFast)) {
       if (VT.isVector())
         return scalarizeVectorStore(SN, DAG);
 
@@ -3049,30 +3215,44 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
 
 SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
                                                 DAGCombinerInfo &DCI) const {
-  if (N->getValueType(0) != MVT::i64)
-    return SDValue();
-
-  const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  auto *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
   if (!RHS)
     return SDValue();
 
+  EVT VT = N->getValueType(0);
+  SDValue LHS = N->getOperand(0);
   unsigned ShiftAmt = RHS->getZExtValue();
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc SL(N);
+
+  // fold (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1)
+  // this improves the ability to match BFE patterns in isel.
+  if (LHS.getOpcode() == ISD::AND) {
+    if (auto *Mask = dyn_cast<ConstantSDNode>(LHS.getOperand(1))) {
+      if (Mask->getAPIntValue().isShiftedMask() &&
+          Mask->getAPIntValue().countTrailingZeros() == ShiftAmt) {
+        return DAG.getNode(
+            ISD::AND, SL, VT,
+            DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(0), N->getOperand(1)),
+            DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(1), N->getOperand(1)));
+      }
+    }
+  }
+
+  if (VT != MVT::i64)
+    return SDValue();
+
   if (ShiftAmt < 32)
     return SDValue();
 
   // srl i64:x, C for C >= 32
   // =>
   //   build_pair (srl hi_32(x), C - 32), 0
-
-  SelectionDAG &DAG = DCI.DAG;
-  SDLoc SL(N);
-
   SDValue One = DAG.getConstant(1, SL, MVT::i32);
   SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
 
-  SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, N->getOperand(0));
-  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
-                           VecOp, One);
+  SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, LHS);
+  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecOp, One);
 
   SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32);
   SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst);
@@ -3090,7 +3270,7 @@ SDValue AMDGPUTargetLowering::performTruncateCombine(
   SDValue Src = N->getOperand(0);
 
   // vt1 (truncate (bitcast (build_vector vt0:x, ...))) -> vt1 (bitcast vt0:x)
-  if (Src.getOpcode() == ISD::BITCAST) {
+  if (Src.getOpcode() == ISD::BITCAST && !VT.isVector()) {
     SDValue Vec = Src.getOperand(0);
     if (Vec.getOpcode() == ISD::BUILD_VECTOR) {
       SDValue Elt0 = Vec.getOperand(0);
@@ -3478,13 +3658,11 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
 
   if (Cond.hasOneUse()) { // TODO: Look for multiple select uses.
     SelectionDAG &DAG = DCI.DAG;
-    if ((DAG.isConstantValueOfAnyType(True) ||
-         DAG.isConstantValueOfAnyType(True)) &&
-        (!DAG.isConstantValueOfAnyType(False) &&
-         !DAG.isConstantValueOfAnyType(False))) {
+    if (DAG.isConstantValueOfAnyType(True) &&
+        !DAG.isConstantValueOfAnyType(False)) {
       // Swap cmp + select pair to move constant to false input.
       // This will allow using VOPC cndmasks more often.
-      // select (setcc x, y), k, x -> select (setcc y, x) x, x
+      // select (setcc x, y), k, x -> select (setccinv x, y), x, k
 
       SDLoc SL(N);
       ISD::CondCode NewCC = getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
@@ -3594,6 +3772,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
       RHS = RHS.getOperand(0);
 
     SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS, N0->getFlags());
+    if (Res.getOpcode() != ISD::FADD)
+      return SDValue(); // Op got folded away.
     if (!N0.hasOneUse())
       DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
     return Res;
@@ -3613,6 +3793,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
       RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
 
     SDValue Res = DAG.getNode(Opc, SL, VT, LHS, RHS, N0->getFlags());
+    if (Res.getOpcode() != Opc)
+      return SDValue(); // Op got folded away.
     if (!N0.hasOneUse())
       DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
     return Res;
@@ -3640,6 +3822,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
       RHS = RHS.getOperand(0);
 
     SDValue Res = DAG.getNode(Opc, SL, VT, LHS, MHS, RHS);
+    if (Res.getOpcode() != Opc)
+      return SDValue(); // Op got folded away.
     if (!N0.hasOneUse())
       DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
     return Res;
@@ -3668,6 +3852,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
     unsigned Opposite = inverseMinMax(Opc);
 
     SDValue Res = DAG.getNode(Opposite, SL, VT, NegLHS, NegRHS, N0->getFlags());
+    if (Res.getOpcode() != Opposite)
+      return SDValue(); // Op got folded away.
     if (!N0.hasOneUse())
       DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
     return Res;
@@ -3678,6 +3864,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
       Ops[I] = DAG.getNode(ISD::FNEG, SL, VT, N0->getOperand(I), N0->getFlags());
 
     SDValue Res = DAG.getNode(AMDGPUISD::FMED3, SL, VT, Ops, N0->getFlags());
+    if (Res.getOpcode() != AMDGPUISD::FMED3)
+      return SDValue(); // Op got folded away.
     if (!N0.hasOneUse())
       DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
     return Res;
@@ -4051,9 +4239,19 @@ SDValue AMDGPUTargetLowering::loadInputValue(SelectionDAG &DAG,
                                              const ArgDescriptor &Arg) const {
   assert(Arg && "Attempting to load missing argument");
 
-  if (Arg.isRegister())
-    return CreateLiveInRegister(DAG, RC, Arg.getRegister(), VT, SL);
-  return loadStackInputValue(DAG, VT, SL, Arg.getStackOffset());
+  SDValue V = Arg.isRegister() ?
+    CreateLiveInRegister(DAG, RC, Arg.getRegister(), VT, SL) :
+    loadStackInputValue(DAG, VT, SL, Arg.getStackOffset());
+
+  if (!Arg.isMasked())
+    return V;
+
+  unsigned Mask = Arg.getMask();
+  unsigned Shift = countTrailingZeros<unsigned>(Mask);
+  V = DAG.getNode(ISD::SRL, SL, VT, V,
+                  DAG.getShiftAmountConstant(Shift, VT, SL));
+  return DAG.getNode(ISD::AND, SL, VT, V,
+                     DAG.getConstant(Mask >> Shift, SL, VT));
 }
 
 uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
@@ -4175,6 +4373,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
   NODE_NAME_CASE(CONST_DATA_PTR)
   NODE_NAME_CASE(PC_ADD_REL_OFFSET)
+  NODE_NAME_CASE(LDS)
   NODE_NAME_CASE(KILL)
   NODE_NAME_CASE(DUMMY_CHAIN)
   case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
@@ -4185,24 +4384,38 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(INTERP_MOV)
   NODE_NAME_CASE(INTERP_P1)
   NODE_NAME_CASE(INTERP_P2)
+  NODE_NAME_CASE(INTERP_P1LL_F16)
+  NODE_NAME_CASE(INTERP_P1LV_F16)
+  NODE_NAME_CASE(INTERP_P2_F16)
+  NODE_NAME_CASE(LOAD_D16_HI)
+  NODE_NAME_CASE(LOAD_D16_LO)
+  NODE_NAME_CASE(LOAD_D16_HI_I8)
+  NODE_NAME_CASE(LOAD_D16_HI_U8)
+  NODE_NAME_CASE(LOAD_D16_LO_I8)
+  NODE_NAME_CASE(LOAD_D16_LO_U8)
   NODE_NAME_CASE(STORE_MSKOR)
   NODE_NAME_CASE(LOAD_CONSTANT)
   NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
-  NODE_NAME_CASE(TBUFFER_STORE_FORMAT_X3)
   NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16)
   NODE_NAME_CASE(TBUFFER_LOAD_FORMAT)
   NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16)
+  NODE_NAME_CASE(DS_ORDERED_COUNT)
   NODE_NAME_CASE(ATOMIC_CMP_SWAP)
   NODE_NAME_CASE(ATOMIC_INC)
   NODE_NAME_CASE(ATOMIC_DEC)
-  NODE_NAME_CASE(ATOMIC_LOAD_FADD)
   NODE_NAME_CASE(ATOMIC_LOAD_FMIN)
   NODE_NAME_CASE(ATOMIC_LOAD_FMAX)
   NODE_NAME_CASE(BUFFER_LOAD)
+  NODE_NAME_CASE(BUFFER_LOAD_UBYTE)
+  NODE_NAME_CASE(BUFFER_LOAD_USHORT)
+  NODE_NAME_CASE(BUFFER_LOAD_BYTE)
+  NODE_NAME_CASE(BUFFER_LOAD_SHORT)
   NODE_NAME_CASE(BUFFER_LOAD_FORMAT)
   NODE_NAME_CASE(BUFFER_LOAD_FORMAT_D16)
   NODE_NAME_CASE(SBUFFER_LOAD)
   NODE_NAME_CASE(BUFFER_STORE)
+  NODE_NAME_CASE(BUFFER_STORE_BYTE)
+  NODE_NAME_CASE(BUFFER_STORE_SHORT)
   NODE_NAME_CASE(BUFFER_STORE_FORMAT)
   NODE_NAME_CASE(BUFFER_STORE_FORMAT_D16)
   NODE_NAME_CASE(BUFFER_ATOMIC_SWAP)
@@ -4216,6 +4429,10 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(BUFFER_ATOMIC_OR)
   NODE_NAME_CASE(BUFFER_ATOMIC_XOR)
   NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP)
+  NODE_NAME_CASE(BUFFER_ATOMIC_FADD)
+  NODE_NAME_CASE(BUFFER_ATOMIC_PK_FADD)
+  NODE_NAME_CASE(ATOMIC_FADD)
+  NODE_NAME_CASE(ATOMIC_PK_FADD)
 
   case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
   }
@@ -4367,6 +4584,23 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
     }
     break;
   }
+  case AMDGPUISD::BUFFER_LOAD_UBYTE:  {
+    Known.Zero.setHighBits(24);
+    break;
+  }
+  case AMDGPUISD::BUFFER_LOAD_USHORT: {
+    Known.Zero.setHighBits(16);
+    break;
+  }
+  case AMDGPUISD::LDS: {
+    auto GA = cast<GlobalAddressSDNode>(Op.getOperand(0).getNode());
+    unsigned Align = GA->getGlobal()->getAlignment();
+
+    Known.Zero.setHighBits(16);
+    if (Align)
+      Known.Zero.setLowBits(Log2_32(Align));
+    break;
+  }
   case ISD::INTRINSIC_WO_CHAIN: {
     unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
     switch (IID) {
@@ -4412,6 +4646,14 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
   case AMDGPUISD::CARRY:
   case AMDGPUISD::BORROW:
     return 31;
+  case AMDGPUISD::BUFFER_LOAD_BYTE:
+    return 25;
+  case AMDGPUISD::BUFFER_LOAD_SHORT:
+    return 17;
+  case AMDGPUISD::BUFFER_LOAD_UBYTE:
+    return 24;
+  case AMDGPUISD::BUFFER_LOAD_USHORT:
+    return 16;
   case AMDGPUISD::FP_TO_FP16:
   case AMDGPUISD::FP16_ZEXT:
     return 16;
@@ -4519,7 +4761,12 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
 
 TargetLowering::AtomicExpansionKind
 AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
-  if (RMW->getOperation() == AtomicRMWInst::Nand)
+  switch (RMW->getOperation()) {
+  case AtomicRMWInst::Nand:
+  case AtomicRMWInst::FAdd:
+  case AtomicRMWInst::FSub:
     return AtomicExpansionKind::CmpXChg;
-  return AtomicExpansionKind::None;
+  default:
+    return AtomicExpansionKind::None;
+  }
 }
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 0d22cb2e3e20..fe7ad694943d 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -1,9 +1,8 @@
 //===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -111,9 +110,23 @@ protected:
   SDValue getLoHalf64(SDValue Op, SelectionDAG &DAG) const;
   SDValue getHiHalf64(SDValue Op, SelectionDAG &DAG) const;
 
+  /// Split a vector type into two parts. The first part is a power of two
+  /// vector. The second part is whatever is left over, and is a scalar if it
+  /// would otherwise be a 1-vector.
+  std::pair<EVT, EVT> getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const;
+
+  /// Split a vector value into two parts of types LoVT and HiVT. HiVT could be
+  /// scalar.
+  std::pair<SDValue, SDValue> splitVector(const SDValue &N, const SDLoc &DL,
+                                          const EVT &LoVT, const EVT &HighVT,
+                                          SelectionDAG &DAG) const;
+
   /// Split a vector load into 2 loads of half the vector.
   SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const;
 
+  /// Widen a vector load from vec3 to vec4.
+  SDValue WidenVectorLoad(SDValue Op, SelectionDAG &DAG) const;
+
   /// Split a vector store into 2 stores of half the vector.
   SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
 
@@ -162,13 +175,15 @@ public:
   MVT getVectorIdxTy(const DataLayout &) const override;
   bool isSelectSupported(SelectSupportKind) const override;
 
-  bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+  bool isFPImmLegal(const APFloat &Imm, EVT VT,
+                    bool ForCodeSize) const override;
   bool ShouldShrinkFPConstant(EVT VT) const override;
   bool shouldReduceLoadWidth(SDNode *Load,
                              ISD::LoadExtType ExtType,
                              EVT ExtVT) const override;
 
-  bool isLoadBitCastBeneficial(EVT, EVT) const final;
+  bool isLoadBitCastBeneficial(EVT, EVT, const SelectionDAG &DAG,
+                               const MachineMemOperand &MMO) const final;
 
   bool storeOfVectorConstantIsCheap(EVT MemVT,
                                     unsigned NumElem,
@@ -212,15 +227,15 @@ public:
 
   const char* getTargetNodeName(unsigned Opcode) const override;
 
-  // FIXME: Turn off MergeConsecutiveStores() before Instruction Selection
-  // for AMDGPU.
-  // A commit ( git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319036
-  // 91177308-0d34-0410-b5e6-96231b3b80d8 ) turned on
-  // MergeConsecutiveStores() before Instruction Selection for all targets.
-  // Enough AMDGPU compiles go into an infinite loop ( MergeConsecutiveStores()
-  // merges two stores; LegalizeStoreOps() un-merges; MergeConsecutiveStores()
-  // re-merges, etc. ) to warrant turning it off for now.
-  bool mergeStoresAfterLegalization() const override { return false; }
+  // FIXME: Turn off MergeConsecutiveStores() before Instruction Selection for
+  // AMDGPU.  Commit r319036,
+  // (https://github.com/llvm/llvm-project/commit/db77e57ea86d941a4262ef60261692f4cb6893e6)
+  // turned on MergeConsecutiveStores() before Instruction Selection for all
+  // targets.  Enough AMDGPU compiles go into an infinite loop (
+  // MergeConsecutiveStores() merges two stores; LegalizeStoreOps() un-merges;
+  // MergeConsecutiveStores() re-merges, etc. ) to warrant turning it off for
+  // now.
+  bool mergeStoresAfterLegalization(EVT) const override { return false; }
 
   bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override {
     return true;
@@ -309,6 +324,10 @@ public:
   }
 
   AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
+
+  bool SelectFlatOffset(bool IsSigned, SelectionDAG &DAG, SDNode *N,
+                        SDValue Addr, SDValue &VAddr, SDValue &Offset,
+                        SDValue &SLC) const;
 };
 
 namespace AMDGPUISD {
@@ -463,28 +482,44 @@ enum NodeType : unsigned {
   INTERP_MOV,
   INTERP_P1,
   INTERP_P2,
+  INTERP_P1LL_F16,
+  INTERP_P1LV_F16,
+  INTERP_P2_F16,
   PC_ADD_REL_OFFSET,
+  LDS,
   KILL,
   DUMMY_CHAIN,
   FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
+  LOAD_D16_HI,
+  LOAD_D16_LO,
+  LOAD_D16_HI_I8,
+  LOAD_D16_HI_U8,
+  LOAD_D16_LO_I8,
+  LOAD_D16_LO_U8,
+
   STORE_MSKOR,
   LOAD_CONSTANT,
   TBUFFER_STORE_FORMAT,
-  TBUFFER_STORE_FORMAT_X3,
   TBUFFER_STORE_FORMAT_D16,
   TBUFFER_LOAD_FORMAT,
   TBUFFER_LOAD_FORMAT_D16,
+  DS_ORDERED_COUNT,
   ATOMIC_CMP_SWAP,
   ATOMIC_INC,
   ATOMIC_DEC,
-  ATOMIC_LOAD_FADD,
   ATOMIC_LOAD_FMIN,
   ATOMIC_LOAD_FMAX,
   BUFFER_LOAD,
+  BUFFER_LOAD_UBYTE,
+  BUFFER_LOAD_USHORT,
+  BUFFER_LOAD_BYTE,
+  BUFFER_LOAD_SHORT,
   BUFFER_LOAD_FORMAT,
   BUFFER_LOAD_FORMAT_D16,
   SBUFFER_LOAD,
   BUFFER_STORE,
+  BUFFER_STORE_BYTE,
+  BUFFER_STORE_SHORT,
   BUFFER_STORE_FORMAT,
   BUFFER_STORE_FORMAT_D16,
   BUFFER_ATOMIC_SWAP,
@@ -498,6 +533,10 @@ enum NodeType : unsigned {
   BUFFER_ATOMIC_OR,
   BUFFER_ATOMIC_XOR,
   BUFFER_ATOMIC_CMPSWAP,
+  BUFFER_ATOMIC_FADD,
+  BUFFER_ATOMIC_PK_FADD,
+  ATOMIC_FADD,
+  ATOMIC_PK_FADD,
 
   LAST_AMDGPU_ISD_NUMBER
 };
diff --git a/lib/Target/AMDGPU/AMDGPUInline.cpp b/lib/Target/AMDGPU/AMDGPUInline.cpp
index 945c9acd379a..f4df20b8f03e 100644
--- a/lib/Target/AMDGPU/AMDGPUInline.cpp
+++ b/lib/Target/AMDGPU/AMDGPUInline.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPUInline.cpp - Code to perform simple function inlining --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -40,7 +39,7 @@ using namespace llvm;
 #define DEBUG_TYPE "inline"
 
 static cl::opt<int>
-ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(2200),
+ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(1500),
               cl::desc("Cost of alloca argument"));
 
 // If the amount of scratch memory to eliminate exceeds our ability to allocate
@@ -50,6 +49,12 @@ static cl::opt<unsigned>
 ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256),
                 cl::desc("Maximum alloca size to use for inline cost"));
 
+// Inliner constraint to achieve reasonable compilation time
+static cl::opt<size_t>
+MaxBB("amdgpu-inline-max-bb", cl::Hidden, cl::init(300),
+      cl::desc("Maximum BB number allowed in a function after inlining"
+               " (compile time constraint)"));
+
 namespace {
 
 class AMDGPUInliner : public LegacyInlinerBase {
@@ -112,7 +117,8 @@ unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
     Callee->hasFnAttribute(Attribute::InlineHint);
   if (InlineHint && Params.HintThreshold && Params.HintThreshold > Thres
       && !Caller->hasFnAttribute(Attribute::MinSize))
-    Thres = Params.HintThreshold.getValue();
+    Thres = Params.HintThreshold.getValue() *
+            TTIWP->getTTI(*Callee).getInliningThresholdMultiplier();
 
   const DataLayout &DL = Caller->getParent()->getDataLayout();
   if (!Callee)
@@ -124,10 +130,11 @@ unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
   uint64_t AllocaSize = 0;
   SmallPtrSet<const AllocaInst *, 8> AIVisited;
   for (Value *PtrArg : CS.args()) {
-    Type *Ty = PtrArg->getType();
-    if (!Ty->isPointerTy() ||
-        Ty->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
+    PointerType *Ty = dyn_cast<PointerType>(PtrArg->getType());
+    if (!Ty || (Ty->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS &&
+                Ty->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS))
       continue;
+
     PtrArg = GetUnderlyingObject(PtrArg, DL);
     if (const AllocaInst *AI = dyn_cast<AllocaInst>(PtrArg)) {
       if (!AI->isStaticAlloca() || !AIVisited.insert(AI).second)
@@ -170,7 +177,6 @@ static bool isWrapperOnlyCall(CallSite CS) {
 InlineCost AMDGPUInliner::getInlineCost(CallSite CS) {
   Function *Callee = CS.getCalledFunction();
   Function *Caller = CS.getCaller();
-  TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
 
   if (!Callee || Callee->isDeclaration())
     return llvm::InlineCost::getNever("undefined callee");
@@ -178,13 +184,15 @@ InlineCost AMDGPUInliner::getInlineCost(CallSite CS) {
   if (CS.isNoInline())
     return llvm::InlineCost::getNever("noinline");
 
+  TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
   if (!TTI.areInlineCompatible(Caller, Callee))
     return llvm::InlineCost::getNever("incompatible");
 
   if (CS.hasFnAttr(Attribute::AlwaysInline)) {
-    if (isInlineViable(*Callee))
+    auto IsViable = isInlineViable(*Callee);
+    if (IsViable)
       return llvm::InlineCost::getAlways("alwaysinline viable");
-    return llvm::InlineCost::getNever("alwaysinline unviable");
+    return llvm::InlineCost::getNever(IsViable.message);
   }
 
   if (isWrapperOnlyCall(CS))
@@ -206,6 +214,15 @@ InlineCost AMDGPUInliner::getInlineCost(CallSite CS) {
     return ACT->getAssumptionCache(F);
   };
 
-  return llvm::getInlineCost(CS, Callee, LocalParams, TTI, GetAssumptionCache,
-                             None, PSI, RemarksEnabled ? &ORE : nullptr);
+  auto IC = llvm::getInlineCost(cast<CallBase>(*CS.getInstruction()), Callee,
+                             LocalParams, TTI, GetAssumptionCache, None, PSI,
+                             RemarksEnabled ? &ORE : nullptr);
+
+  if (IC && !IC.isAlways() && !Callee->hasFnAttribute(Attribute::InlineHint)) {
+    // Single BB does not increase total BB amount, thus subtract 1
+    size_t Size = Caller->size() + Callee->size() - 1;
+    if (MaxBB && Size > MaxBB)
+      return llvm::InlineCost::getNever("max number of bb exceeded");
+  }
+  return IC;
 }
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index 07aa7c2cc8ad..9951cbf2326e 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 2f8166da0d33..698189e14c21 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -1,9 +1,8 @@
 //===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 82644be26563..4a8446955496 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -1,9 +1,8 @@
 //===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -51,27 +50,21 @@ def AMDGPUFmasOp : SDTypeProfile<1, 4,
 def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
 def AMDGPUIfOp : SDTypeProfile<1, 2,
-  [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
+  [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
 >;
 
 def AMDGPUElseOp : SDTypeProfile<1, 2,
-  [SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, OtherVT>]
+  [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
 >;
 
 def AMDGPULoopOp : SDTypeProfile<0, 2,
-  [SDTCisVT<0, i64>, SDTCisVT<1, OtherVT>]
+  [SDTCisVT<0, i1>, SDTCisVT<1, OtherVT>]
 >;
 
 def AMDGPUIfBreakOp : SDTypeProfile<1, 2,
-  [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, i64>]
->;
-
-def AMDGPUAddeSubeOp : SDTypeProfile<2, 3,
-  [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisVT<0, i32>, SDTCisVT<1, i1>, SDTCisVT<4, i1>]
+  [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, i1>]
 >;
 
-def SDT_AMDGPUTCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
-
 //===----------------------------------------------------------------------===//
 // AMDGPU DAG Nodes
 //
@@ -96,7 +89,8 @@ def AMDGPUcall : SDNode<"AMDGPUISD::CALL",
   SDNPVariadic]
 >;
 
-def AMDGPUtc_return: SDNode<"AMDGPUISD::TC_RETURN", SDT_AMDGPUTCRET,
+def AMDGPUtc_return: SDNode<"AMDGPUISD::TC_RETURN",
+  SDTypeProfile<0, 3, [SDTCisPtrTy<0>]>,
   [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
 >;
 
@@ -205,14 +199,8 @@ def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>;
 // out = (src1 > src0) ? 1 : 0
 def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>;
 
-// TODO: remove AMDGPUadde/AMDGPUsube when ADDCARRY/SUBCARRY get their own
-// nodes in TargetSelectionDAG.td.
-def AMDGPUadde : SDNode<"ISD::ADDCARRY", AMDGPUAddeSubeOp, []>;
-
-def AMDGPUsube : SDNode<"ISD::SUBCARRY", AMDGPUAddeSubeOp, []>;
-
 def AMDGPUSetCCOp : SDTypeProfile<1, 3, [        // setcc
-  SDTCisVT<0, i64>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>
+  SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>
 ]>;
 
 def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>;
@@ -251,7 +239,8 @@ def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>;
 
 //  Special case divide FMA with scale and flags (src0 = Quotient,
 //  src1 = Denominator, src2 = Numerator).
-def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp>;
+def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp,
+                            [SDNPOptInGlue]>;
 
 // Single or double precision division fixup.
 // Special case divide fixup and flags(src0 = Quotient, src1 =
@@ -370,6 +359,17 @@ def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2",
                       SDTypeProfile<1, 4, [SDTCisFP<0>]>,
                       [SDNPInGlue]>;
 
+def AMDGPUinterp_p1ll_f16 : SDNode<"AMDGPUISD::INTERP_P1LL_F16",
+                            SDTypeProfile<1, 7, [SDTCisFP<0>]>,
+                            [SDNPInGlue, SDNPOutGlue]>;
+
+def AMDGPUinterp_p1lv_f16 : SDNode<"AMDGPUISD::INTERP_P1LV_F16",
+                            SDTypeProfile<1, 9, [SDTCisFP<0>]>,
+                            [SDNPInGlue, SDNPOutGlue]>;
+
+def AMDGPUinterp_p2_f16 : SDNode<"AMDGPUISD::INTERP_P2_F16",
+                          SDTypeProfile<1, 8, [SDTCisFP<0>]>,
+                          [SDNPInGlue]>;
 
 def AMDGPUkill : SDNode<"AMDGPUISD::KILL", AMDGPUKillSDT,
   [SDNPHasChain, SDNPSideEffect]>;
diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 8eb49d49b2e0..901a2eaa8829 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -18,10 +17,11 @@
 #include "AMDGPURegisterInfo.h"
 #include "AMDGPUSubtarget.h"
 #include "AMDGPUTargetMachine.h"
-#include "SIMachineFunctionInfo.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIMachineFunctionInfo.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -35,6 +35,7 @@
 #define DEBUG_TYPE "amdgpu-isel"
 
 using namespace llvm;
+using namespace MIPatternMatch;
 
 #define GET_GLOBALISEL_IMPL
 #define AMDGPUSubtarget GCNSubtarget
@@ -60,11 +61,101 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector(
 
 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
 
+static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
+  if (TargetRegisterInfo::isPhysicalRegister(Reg))
+    return Reg == AMDGPU::SCC;
+
+  auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
+  const TargetRegisterClass *RC =
+      RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
+  if (RC) {
+    // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the
+    // context of the register bank has been lost.
+    if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID)
+      return false;
+    const LLT Ty = MRI.getType(Reg);
+    return Ty.isValid() && Ty.getSizeInBits() == 1;
+  }
+
+  const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
+  return RB->getID() == AMDGPU::SCCRegBankID;
+}
+
+bool AMDGPUInstructionSelector::isVCC(Register Reg,
+                                      const MachineRegisterInfo &MRI) const {
+  if (TargetRegisterInfo::isPhysicalRegister(Reg))
+    return Reg == TRI.getVCC();
+
+  auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
+  const TargetRegisterClass *RC =
+      RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
+  if (RC) {
+    const LLT Ty = MRI.getType(Reg);
+    return RC->hasSuperClassEq(TRI.getBoolRC()) &&
+           Ty.isValid() && Ty.getSizeInBits() == 1;
+  }
+
+  const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
+  return RB->getID() == AMDGPU::VCCRegBankID;
+}
+
 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
+  const DebugLoc &DL = I.getDebugLoc();
   MachineBasicBlock *BB = I.getParent();
   MachineFunction *MF = BB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
   I.setDesc(TII.get(TargetOpcode::COPY));
+
+  const MachineOperand &Src = I.getOperand(1);
+  MachineOperand &Dst = I.getOperand(0);
+  Register DstReg = Dst.getReg();
+  Register SrcReg = Src.getReg();
+
+  if (isVCC(DstReg, MRI)) {
+    if (SrcReg == AMDGPU::SCC) {
+      const TargetRegisterClass *RC
+        = TRI.getConstrainedRegClassForOperand(Dst, MRI);
+      if (!RC)
+        return true;
+      return RBI.constrainGenericRegister(DstReg, *RC, MRI);
+    }
+
+    if (!isVCC(SrcReg, MRI)) {
+      // TODO: Should probably leave the copy and let copyPhysReg expand it.
+      if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI))
+        return false;
+
+      BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
+        .addImm(0)
+        .addReg(SrcReg);
+
+      if (!MRI.getRegClassOrNull(SrcReg))
+        MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
+      I.eraseFromParent();
+      return true;
+    }
+
+    const TargetRegisterClass *RC =
+      TRI.getConstrainedRegClassForOperand(Dst, MRI);
+    if (RC && !RBI.constrainGenericRegister(DstReg, *RC, MRI))
+      return false;
+
+    // Don't constrain the source register to a class so the def instruction
+    // handles it (unless it's undef).
+    //
+    // FIXME: This is a hack. When selecting the def, we neeed to know
+    // specifically know that the result is VCCRegBank, and not just an SGPR
+    // with size 1. An SReg_32 with size 1 is ambiguous with wave32.
+    if (Src.isUndef()) {
+      const TargetRegisterClass *SrcRC =
+        TRI.getConstrainedRegClassForOperand(Src, MRI);
+      if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
+        return false;
+    }
+
+    return true;
+  }
+
   for (const MachineOperand &MO : I.operands()) {
     if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
       continue;
@@ -78,15 +169,54 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
   return true;
 }
 
+bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
+  MachineBasicBlock *BB = I.getParent();
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  const Register DefReg = I.getOperand(0).getReg();
+  const LLT DefTy = MRI.getType(DefReg);
+
+  // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
+
+  const RegClassOrRegBank &RegClassOrBank =
+    MRI.getRegClassOrRegBank(DefReg);
+
+  const TargetRegisterClass *DefRC
+    = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
+  if (!DefRC) {
+    if (!DefTy.isValid()) {
+      LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
+      return false;
+    }
+
+    const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
+    if (RB.getID() == AMDGPU::SCCRegBankID) {
+      LLVM_DEBUG(dbgs() << "illegal scc phi\n");
+      return false;
+    }
+
+    DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI);
+    if (!DefRC) {
+      LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
+      return false;
+    }
+  }
+
+  I.setDesc(TII.get(TargetOpcode::PHI));
+  return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
+}
+
 MachineOperand
 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
+                                           const TargetRegisterClass &SubRC,
                                            unsigned SubIdx) const {
 
   MachineInstr *MI = MO.getParent();
   MachineBasicBlock *BB = MO.getParent()->getParent();
   MachineFunction *MF = BB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
-  unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+  Register DstReg = MRI.createVirtualRegister(&SubRC);
 
   if (MO.isReg()) {
     unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
@@ -118,51 +248,273 @@ static int64_t getConstant(const MachineInstr *MI) {
   return MI->getOperand(1).getCImm()->getSExtValue();
 }
 
-bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
+static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
+  switch (Opc) {
+  case AMDGPU::G_AND:
+    return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
+  case AMDGPU::G_OR:
+    return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
+  case AMDGPU::G_XOR:
+    return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
+  default:
+    llvm_unreachable("not a bit op");
+  }
+}
+
+bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
   MachineBasicBlock *BB = I.getParent();
   MachineFunction *MF = BB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
-  unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
-  unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
-  unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+  MachineOperand &Dst = I.getOperand(0);
+  MachineOperand &Src0 = I.getOperand(1);
+  MachineOperand &Src1 = I.getOperand(2);
+  Register DstReg = Dst.getReg();
+  unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
+
+  const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
+  if (DstRB->getID() == AMDGPU::VCCRegBankID) {
+    const TargetRegisterClass *RC = TRI.getBoolRC();
+    unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(),
+                                           RC == &AMDGPU::SReg_64RegClass);
+    I.setDesc(TII.get(InstOpc));
+
+    // FIXME: Hack to avoid turning the register bank into a register class.
+    // The selector for G_ICMP relies on seeing the register bank for the result
+    // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will
+    // be ambiguous whether it's a scalar or vector bool.
+    if (Src0.isUndef() && !MRI.getRegClassOrNull(Src0.getReg()))
+      MRI.setRegClass(Src0.getReg(), RC);
+    if (Src1.isUndef() && !MRI.getRegClassOrNull(Src1.getReg()))
+      MRI.setRegClass(Src1.getReg(), RC);
+
+    return RBI.constrainGenericRegister(DstReg, *RC, MRI);
+  }
 
-  if (Size != 64)
-    return false;
+  // TODO: Should this allow an SCC bank result, and produce a copy from SCC for
+  // the result?
+  if (DstRB->getID() == AMDGPU::SGPRRegBankID) {
+    unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32);
+    I.setDesc(TII.get(InstOpc));
 
-  DebugLoc DL = I.getDebugLoc();
+    const TargetRegisterClass *RC
+      = TRI.getConstrainedRegClassForOperand(Dst, MRI);
+    if (!RC)
+      return false;
+    return RBI.constrainGenericRegister(DstReg, *RC, MRI) &&
+           RBI.constrainGenericRegister(Src0.getReg(), *RC, MRI) &&
+           RBI.constrainGenericRegister(Src1.getReg(), *RC, MRI);
+  }
 
-  MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0));
-  MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
+  return false;
+}
 
-  BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
-          .add(Lo1)
-          .add(Lo2);
+bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
+  MachineBasicBlock *BB = I.getParent();
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  Register DstReg = I.getOperand(0).getReg();
+  const DebugLoc &DL = I.getDebugLoc();
+  unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
+  const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
+  const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;
+  const bool Sub = I.getOpcode() == TargetOpcode::G_SUB;
 
-  MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1));
-  MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
+  if (Size == 32) {
+    if (IsSALU) {
+      const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
+      MachineInstr *Add =
+        BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
+        .add(I.getOperand(1))
+        .add(I.getOperand(2));
+      I.eraseFromParent();
+      return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
+    }
 
-  BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
-          .add(Hi1)
-          .add(Hi2);
+    if (STI.hasAddNoCarry()) {
+      const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
+      I.setDesc(TII.get(Opc));
+      I.addOperand(*MF, MachineOperand::CreateImm(0));
+      I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
+      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+    }
 
-  BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
-          .addReg(DstLo)
-          .addImm(AMDGPU::sub0)
-          .addReg(DstHi)
-          .addImm(AMDGPU::sub1);
+    const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64;
 
-  for (MachineOperand &MO : I.explicit_operands()) {
-    if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
-      continue;
-    RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
+    Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass());
+    MachineInstr *Add
+      = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
+      .addDef(UnusedCarry, RegState::Dead)
+      .add(I.getOperand(1))
+      .add(I.getOperand(2))
+      .addImm(0);
+    I.eraseFromParent();
+    return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
   }
 
+  assert(!Sub && "illegal sub should not reach here");
+
+  const TargetRegisterClass &RC
+    = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
+  const TargetRegisterClass &HalfRC
+    = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
+
+  MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));
+  MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));
+  MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
+  MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
+
+  Register DstLo = MRI.createVirtualRegister(&HalfRC);
+  Register DstHi = MRI.createVirtualRegister(&HalfRC);
+
+  if (IsSALU) {
+    BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
+      .add(Lo1)
+      .add(Lo2);
+    BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
+      .add(Hi1)
+      .add(Hi2);
+  } else {
+    const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
+    Register CarryReg = MRI.createVirtualRegister(CarryRC);
+    BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo)
+      .addDef(CarryReg)
+      .add(Lo1)
+      .add(Lo2)
+      .addImm(0);
+    MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
+      .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead)
+      .add(Hi1)
+      .add(Hi2)
+      .addReg(CarryReg, RegState::Kill)
+      .addImm(0);
+
+    if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI))
+      return false;
+  }
+
+  BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
+    .addReg(DstLo)
+    .addImm(AMDGPU::sub0)
+    .addReg(DstHi)
+    .addImm(AMDGPU::sub1);
+
+
+  if (!RBI.constrainGenericRegister(DstReg, RC, MRI))
+    return false;
+
+  I.eraseFromParent();
+  return true;
+}
+
+bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
+  MachineBasicBlock *BB = I.getParent();
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  assert(I.getOperand(2).getImm() % 32 == 0);
+  unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
+  const DebugLoc &DL = I.getDebugLoc();
+  MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
+                               I.getOperand(0).getReg())
+                               .addReg(I.getOperand(1).getReg(), 0, SubReg);
+
+  for (const MachineOperand &MO : Copy->operands()) {
+    const TargetRegisterClass *RC =
+            TRI.getConstrainedRegClassForOperand(MO, MRI);
+    if (!RC)
+      continue;
+    RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
+  }
   I.eraseFromParent();
   return true;
 }
 
+bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
+  MachineBasicBlock *BB = MI.getParent();
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  Register DstReg = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+
+  const unsigned SrcSize = SrcTy.getSizeInBits();
+  if (SrcSize < 32)
+    return false;
+
+  const DebugLoc &DL = MI.getDebugLoc();
+  const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI);
+  const unsigned DstSize = DstTy.getSizeInBits();
+  const TargetRegisterClass *DstRC =
+    TRI.getRegClassForSizeOnBank(DstSize, *DstBank, MRI);
+  if (!DstRC)
+    return false;
+
+  ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
+  MachineInstrBuilder MIB =
+    BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
+  for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
+    MachineOperand &Src = MI.getOperand(I + 1);
+    MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef()));
+    MIB.addImm(SubRegs[I]);
+
+    const TargetRegisterClass *SrcRC
+      = TRI.getConstrainedRegClassForOperand(Src, MRI);
+    if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, MRI))
+      return false;
+  }
+
+  if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI))
+    return false;
+
+  MI.eraseFromParent();
+  return true;
+}
+
+bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
+  MachineBasicBlock *BB = MI.getParent();
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  const int NumDst = MI.getNumOperands() - 1;
+
+  MachineOperand &Src = MI.getOperand(NumDst);
+
+  Register SrcReg = Src.getReg();
+  Register DstReg0 = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(DstReg0);
+  LLT SrcTy = MRI.getType(SrcReg);
+
+  const unsigned DstSize = DstTy.getSizeInBits();
+  const unsigned SrcSize = SrcTy.getSizeInBits();
+  const DebugLoc &DL = MI.getDebugLoc();
+  const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
+
+  const TargetRegisterClass *SrcRC =
+    TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI);
+  if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
+    return false;
+
+  const unsigned SrcFlags = getUndefRegState(Src.isUndef());
+
+  // Note we could have mixed SGPR and VGPR destination banks for an SGPR
+  // source, and this relies on the fact that the same subregister indices are
+  // used for both.
+  ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
+  for (int I = 0, E = NumDst; I != E; ++I) {
+    MachineOperand &Dst = MI.getOperand(I);
+    BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg())
+      .addReg(SrcReg, SrcFlags, SubRegs[I]);
+
+    const TargetRegisterClass *DstRC =
+      TRI.getConstrainedRegClassForOperand(Dst, MRI);
+    if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI))
+      return false;
+  }
+
+  MI.eraseFromParent();
+  return true;
+}
+
 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
-  return selectG_ADD(I);
+  return selectG_ADD_SUB(I);
 }
 
 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
@@ -170,47 +522,200 @@ bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
   MachineFunction *MF = BB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
   const MachineOperand &MO = I.getOperand(0);
-  const TargetRegisterClass *RC =
-      TRI.getConstrainedRegClassForOperand(MO, MRI);
-  if (RC)
+
+  // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
+  // regbank check here is to know why getConstrainedRegClassForOperand failed.
+  const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
+  if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) ||
+      (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) {
+    I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
+    return true;
+  }
+
+  return false;
+}
+
+bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
+  MachineBasicBlock *BB = I.getParent();
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32);
+  DebugLoc DL = I.getDebugLoc();
+  MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
+                               .addDef(I.getOperand(0).getReg())
+                               .addReg(I.getOperand(1).getReg())
+                               .addReg(I.getOperand(2).getReg())
+                               .addImm(SubReg);
+
+  for (const MachineOperand &MO : Ins->operands()) {
+    if (!MO.isReg())
+      continue;
+    if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+      continue;
+
+    const TargetRegisterClass *RC =
+            TRI.getConstrainedRegClassForOperand(MO, MRI);
+    if (!RC)
+      continue;
     RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
-  I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
+  }
+  I.eraseFromParent();
   return true;
 }
 
-bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I,
-                                          CodeGenCoverage &CoverageInfo) const {
-  unsigned IntrinsicID =  I.getOperand(1).getIntrinsicID();
-
+bool AMDGPUInstructionSelector::selectG_INTRINSIC(
+  MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
+  unsigned IntrinsicID =  I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
   switch (IntrinsicID) {
-  default:
-    break;
   case Intrinsic::maxnum:
   case Intrinsic::minnum:
   case Intrinsic::amdgcn_cvt_pkrtz:
     return selectImpl(I, CoverageInfo);
-
-  case Intrinsic::amdgcn_kernarg_segment_ptr: {
-    MachineFunction *MF = I.getParent()->getParent();
+  case Intrinsic::amdgcn_if_break: {
+    MachineBasicBlock *BB = I.getParent();
+    MachineFunction *MF = BB->getParent();
     MachineRegisterInfo &MRI = MF->getRegInfo();
-    const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
-    const ArgDescriptor *InputPtrReg;
-    const TargetRegisterClass *RC;
-    const DebugLoc &DL = I.getDebugLoc();
-
-    std::tie(InputPtrReg, RC)
-      = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
-    if (!InputPtrReg)
-      report_fatal_error("missing kernarg segment ptr");
 
-    BuildMI(*I.getParent(), &I, DL, TII.get(AMDGPU::COPY))
+    // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
+    // SelectionDAG uses for wave32 vs wave64.
+    BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
       .add(I.getOperand(0))
-      .addReg(MRI.getLiveInVirtReg(InputPtrReg->getRegister()));
+      .add(I.getOperand(2))
+      .add(I.getOperand(3));
+
+    Register DstReg = I.getOperand(0).getReg();
+    Register Src0Reg = I.getOperand(2).getReg();
+    Register Src1Reg = I.getOperand(3).getReg();
+
     I.eraseFromParent();
+
+    for (Register Reg : { DstReg, Src0Reg, Src1Reg }) {
+      if (!MRI.getRegClassOrNull(Reg))
+        MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
+    }
+
     return true;
   }
+  default:
+    return selectImpl(I, CoverageInfo);
+  }
+}
+
+static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
+  if (Size != 32 && Size != 64)
+    return -1;
+  switch (P) {
+  default:
+    llvm_unreachable("Unknown condition code!");
+  case CmpInst::ICMP_NE:
+    return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
+  case CmpInst::ICMP_EQ:
+    return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
+  case CmpInst::ICMP_SGT:
+    return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
+  case CmpInst::ICMP_SGE:
+    return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
+  case CmpInst::ICMP_SLT:
+    return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
+  case CmpInst::ICMP_SLE:
+    return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
+  case CmpInst::ICMP_UGT:
+    return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
+  case CmpInst::ICMP_UGE:
+    return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
+  case CmpInst::ICMP_ULT:
+    return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
+  case CmpInst::ICMP_ULE:
+    return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
   }
-  return false;
+}
+
+int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
+                                              unsigned Size) const {
+  if (Size == 64) {
+    if (!STI.hasScalarCompareEq64())
+      return -1;
+
+    switch (P) {
+    case CmpInst::ICMP_NE:
+      return AMDGPU::S_CMP_LG_U64;
+    case CmpInst::ICMP_EQ:
+      return AMDGPU::S_CMP_EQ_U64;
+    default:
+      return -1;
+    }
+  }
+
+  if (Size != 32)
+    return -1;
+
+  switch (P) {
+  case CmpInst::ICMP_NE:
+    return AMDGPU::S_CMP_LG_U32;
+  case CmpInst::ICMP_EQ:
+    return AMDGPU::S_CMP_EQ_U32;
+  case CmpInst::ICMP_SGT:
+    return AMDGPU::S_CMP_GT_I32;
+  case CmpInst::ICMP_SGE:
+    return AMDGPU::S_CMP_GE_I32;
+  case CmpInst::ICMP_SLT:
+    return AMDGPU::S_CMP_LT_I32;
+  case CmpInst::ICMP_SLE:
+    return AMDGPU::S_CMP_LE_I32;
+  case CmpInst::ICMP_UGT:
+    return AMDGPU::S_CMP_GT_U32;
+  case CmpInst::ICMP_UGE:
+    return AMDGPU::S_CMP_GE_U32;
+  case CmpInst::ICMP_ULT:
+    return AMDGPU::S_CMP_LT_U32;
+  case CmpInst::ICMP_ULE:
+    return AMDGPU::S_CMP_LE_U32;
+  default:
+    llvm_unreachable("Unknown condition code!");
+  }
+}
+
+bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
+  MachineBasicBlock *BB = I.getParent();
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  const DebugLoc &DL = I.getDebugLoc();
+
+  unsigned SrcReg = I.getOperand(2).getReg();
+  unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
+
+  auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
+
+  unsigned CCReg = I.getOperand(0).getReg();
+  if (isSCC(CCReg, MRI)) {
+    int Opcode = getS_CMPOpcode(Pred, Size);
+    if (Opcode == -1)
+      return false;
+    MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
+            .add(I.getOperand(2))
+            .add(I.getOperand(3));
+    BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
+      .addReg(AMDGPU::SCC);
+    bool Ret =
+        constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
+        RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI);
+    I.eraseFromParent();
+    return Ret;
+  }
+
+  int Opcode = getV_CMPOpcode(Pred, Size);
+  if (Opcode == -1)
+    return false;
+
+  MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
+            I.getOperand(0).getReg())
+            .add(I.getOperand(2))
+            .add(I.getOperand(3));
+  RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
+                               *TRI.getBoolRC(), MRI);
+  bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
+  I.eraseFromParent();
+  return Ret;
 }
 
 static MachineInstr *
@@ -232,8 +737,7 @@ buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
 }
 
 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
-                                                 MachineInstr &I,
-						 CodeGenCoverage &CoverageInfo) const {
+  MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
   MachineBasicBlock *BB = I.getParent();
   MachineFunction *MF = BB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -272,8 +776,72 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
     I.eraseFromParent();
     return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
   }
+  case Intrinsic::amdgcn_end_cf: {
+    // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
+    // SelectionDAG uses for wave32 vs wave64.
+    BuildMI(*BB, &I, I.getDebugLoc(),
+            TII.get(AMDGPU::SI_END_CF))
+      .add(I.getOperand(1));
+
+    Register Reg = I.getOperand(1).getReg();
+    I.eraseFromParent();
+
+    if (!MRI.getRegClassOrNull(Reg))
+      MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
+    return true;
   }
-  return false;
+  default:
+    return selectImpl(I, CoverageInfo);
+  }
+}
+
+bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
+  MachineBasicBlock *BB = I.getParent();
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  const DebugLoc &DL = I.getDebugLoc();
+
+  unsigned DstReg = I.getOperand(0).getReg();
+  unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
+  assert(Size <= 32 || Size == 64);
+  const MachineOperand &CCOp = I.getOperand(1);
+  unsigned CCReg = CCOp.getReg();
+  if (isSCC(CCReg, MRI)) {
+    unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :
+                                         AMDGPU::S_CSELECT_B32;
+    MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
+            .addReg(CCReg);
+
+    // The generic constrainSelectedInstRegOperands doesn't work for the scc register
+    // bank, because it does not cover the register class that we used to represent
+    // for it.  So we need to manually set the register class here.
+    if (!MRI.getRegClassOrNull(CCReg))
+        MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
+    MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
+            .add(I.getOperand(2))
+            .add(I.getOperand(3));
+
+    bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
+               constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
+    I.eraseFromParent();
+    return Ret;
+  }
+
+  // Wide VGPR select should have been split in RegBankSelect.
+  if (Size > 32)
+    return false;
+
+  MachineInstr *Select =
+      BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+              .addImm(0)
+              .add(I.getOperand(3))
+              .addImm(0)
+              .add(I.getOperand(2))
+              .add(I.getOperand(1));
+
+  bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
+  I.eraseFromParent();
+  return Ret;
 }
 
 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
@@ -281,10 +849,16 @@ bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
   MachineFunction *MF = BB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
   DebugLoc DL = I.getDebugLoc();
+  unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI);
+  if (PtrSize != 64) {
+    LLVM_DEBUG(dbgs() << "Unhandled address space\n");
+    return false;
+  }
+
   unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
   unsigned Opcode;
 
-  // FIXME: Select store instruction based on address space
+  // FIXME: Remove this when integers > s32 naturally selected.
   switch (StoreSize) {
   default:
     return false;
@@ -307,7 +881,8 @@ bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
           .add(I.getOperand(0))
           .addImm(0)  // offset
           .addImm(0)  // glc
-          .addImm(0); // slc
+          .addImm(0)  // slc
+          .addImm(0); // dlc
 
 
   // Now that we selected an opcode, we need to constrain the register
@@ -318,6 +893,218 @@ bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
   return Ret;
 }
 
+static int sizeToSubRegIndex(unsigned Size) {
+  switch (Size) {
+  case 32:
+    return AMDGPU::sub0;
+  case 64:
+    return AMDGPU::sub0_sub1;
+  case 96:
+    return AMDGPU::sub0_sub1_sub2;
+  case 128:
+    return AMDGPU::sub0_sub1_sub2_sub3;
+  case 256:
+    return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
+  default:
+    if (Size < 32)
+      return AMDGPU::sub0;
+    if (Size > 256)
+      return -1;
+    return sizeToSubRegIndex(PowerOf2Ceil(Size));
+  }
+}
+
+bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
+  MachineBasicBlock *BB = I.getParent();
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  unsigned DstReg = I.getOperand(0).getReg();
+  unsigned SrcReg = I.getOperand(1).getReg();
+  const LLT DstTy = MRI.getType(DstReg);
+  const LLT SrcTy = MRI.getType(SrcReg);
+  if (!DstTy.isScalar())
+    return false;
+
+  const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
+  const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
+  if (SrcRB != DstRB)
+    return false;
+
+  unsigned DstSize = DstTy.getSizeInBits();
+  unsigned SrcSize = SrcTy.getSizeInBits();
+
+  const TargetRegisterClass *SrcRC
+    = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
+  const TargetRegisterClass *DstRC
+    = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
+
+  if (SrcSize > 32) {
+    int SubRegIdx = sizeToSubRegIndex(DstSize);
+    if (SubRegIdx == -1)
+      return false;
+
+    // Deal with weird cases where the class only partially supports the subreg
+    // index.
+    SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
+    if (!SrcRC)
+      return false;
+
+    I.getOperand(1).setSubReg(SubRegIdx);
+  }
+
+  if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
+      !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
+    LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
+    return false;
+  }
+
+  I.setDesc(TII.get(TargetOpcode::COPY));
+  return true;
+}
+
+/// \returns true if a bitmask for \p Size bits will be an inline immediate.
+static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
+  Mask = maskTrailingOnes<unsigned>(Size);
+  int SignedMask = static_cast<int>(Mask);
+  return SignedMask >= -16 && SignedMask <= 64;
+}
+
+bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
+  bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
+  const DebugLoc &DL = I.getDebugLoc();
+  MachineBasicBlock &MBB = *I.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const unsigned DstReg = I.getOperand(0).getReg();
+  const unsigned SrcReg = I.getOperand(1).getReg();
+
+  const LLT DstTy = MRI.getType(DstReg);
+  const LLT SrcTy = MRI.getType(SrcReg);
+  const LLT S1 = LLT::scalar(1);
+  const unsigned SrcSize = SrcTy.getSizeInBits();
+  const unsigned DstSize = DstTy.getSizeInBits();
+  if (!DstTy.isScalar())
+    return false;
+
+  const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
+
+  if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
+    if (SrcTy != S1 || DstSize > 64) // Invalid
+      return false;
+
+    unsigned Opcode =
+        DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
+    const TargetRegisterClass *DstRC =
+        DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass;
+
+    // FIXME: Create an extra copy to avoid incorrectly constraining the result
+    // of the scc producer.
+    unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+    BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
+      .addReg(SrcReg);
+    BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
+      .addReg(TmpReg);
+
+    // The instruction operands are backwards from what you would expect.
+    BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
+      .addImm(0)
+      .addImm(Signed ? -1 : 1);
+    return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
+  }
+
+  if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
+    if (SrcTy != S1) // Invalid
+      return false;
+
+    MachineInstr *ExtI =
+      BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+      .addImm(0)               // src0_modifiers
+      .addImm(0)               // src0
+      .addImm(0)               // src1_modifiers
+      .addImm(Signed ? -1 : 1) // src1
+      .addUse(SrcReg);
+    return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
+  }
+
+  if (I.getOpcode() == AMDGPU::G_ANYEXT)
+    return selectCOPY(I);
+
+  if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
+    // 64-bit should have been split up in RegBankSelect
+
+    // Try to use an and with a mask if it will save code size.
+    unsigned Mask;
+    if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
+      MachineInstr *ExtI =
+      BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
+        .addImm(Mask)
+        .addReg(SrcReg);
+      return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
+    }
+
+    const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
+    MachineInstr *ExtI =
+      BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
+      .addReg(SrcReg)
+      .addImm(0) // Offset
+      .addImm(SrcSize); // Width
+    return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
+  }
+
+  if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
+    if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI))
+      return false;
+
+    if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
+      const unsigned SextOpc = SrcSize == 8 ?
+        AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
+      BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
+        .addReg(SrcReg);
+      return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
+    }
+
+    const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
+    const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
+
+    // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
+    if (DstSize > 32 && SrcSize <= 32) {
+      // We need a 64-bit register source, but the high bits don't matter.
+      unsigned ExtReg
+        = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+      unsigned UndefReg
+        = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+      BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
+      BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
+        .addReg(SrcReg)
+        .addImm(AMDGPU::sub0)
+        .addReg(UndefReg)
+        .addImm(AMDGPU::sub1);
+
+      BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
+        .addReg(ExtReg)
+        .addImm(SrcSize << 16);
+
+      return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
+    }
+
+    unsigned Mask;
+    if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
+      BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
+        .addReg(SrcReg)
+        .addImm(Mask);
+    } else {
+      BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
+        .addReg(SrcReg)
+        .addImm(SrcSize << 16);
+    }
+
+    return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
+  }
+
+  return false;
+}
+
 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
   MachineBasicBlock *BB = I.getParent();
   MachineFunction *MF = BB->getParent();
@@ -423,7 +1210,7 @@ void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
   getAddrModeInfo(*PtrMI, MRI, AddrInfo);
 }
 
-static bool isInstrUniform(const MachineInstr &MI) {
+bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
   if (!MI.hasOneMemOperand())
     return false;
 
@@ -445,52 +1232,6 @@ static bool isInstrUniform(const MachineInstr &MI) {
   return I && I->getMetadata("amdgpu.uniform");
 }
 
-static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) {
-
-  if (LoadSize == 32)
-    return BaseOpcode;
-
-  switch (BaseOpcode) {
-  case AMDGPU::S_LOAD_DWORD_IMM:
-    switch (LoadSize) {
-    case 64:
-      return AMDGPU::S_LOAD_DWORDX2_IMM;
-    case 128:
-      return AMDGPU::S_LOAD_DWORDX4_IMM;
-    case 256:
-      return AMDGPU::S_LOAD_DWORDX8_IMM;
-    case 512:
-      return AMDGPU::S_LOAD_DWORDX16_IMM;
-    }
-    break;
-  case AMDGPU::S_LOAD_DWORD_IMM_ci:
-    switch (LoadSize) {
-    case 64:
-      return AMDGPU::S_LOAD_DWORDX2_IMM_ci;
-    case 128:
-      return AMDGPU::S_LOAD_DWORDX4_IMM_ci;
-    case 256:
-      return AMDGPU::S_LOAD_DWORDX8_IMM_ci;
-    case 512:
-      return AMDGPU::S_LOAD_DWORDX16_IMM_ci;
-    }
-    break;
-  case AMDGPU::S_LOAD_DWORD_SGPR:
-    switch (LoadSize) {
-    case 64:
-      return AMDGPU::S_LOAD_DWORDX2_SGPR;
-    case 128:
-      return AMDGPU::S_LOAD_DWORDX4_SGPR;
-    case 256:
-      return AMDGPU::S_LOAD_DWORDX8_SGPR;
-    case 512:
-      return AMDGPU::S_LOAD_DWORDX16_SGPR;
-    }
-    break;
-  }
-  llvm_unreachable("Invalid base smrd opcode or size");
-}
-
 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
   for (const GEPInfo &GEPInfo : AddrInfo) {
     if (!GEPInfo.VgprParts.empty())
@@ -499,125 +1240,77 @@ bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
   return false;
 }
 
-bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
-                                           ArrayRef<GEPInfo> AddrInfo) const {
-
-  if (!I.hasOneMemOperand())
-    return false;
-
-  if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
-      (*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS_32BIT)
-    return false;
-
-  if (!isInstrUniform(I))
-    return false;
-
-  if (hasVgprParts(AddrInfo))
-    return false;
+bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
+  // TODO: Can/should we insert m0 initialization here for DS instructions and
+  // call the normal selector?
+  return false;
+}
 
+bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
   MachineBasicBlock *BB = I.getParent();
   MachineFunction *MF = BB->getParent();
-  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
   MachineRegisterInfo &MRI = MF->getRegInfo();
-  unsigned DstReg = I.getOperand(0).getReg();
+  MachineOperand &CondOp = I.getOperand(0);
+  Register CondReg = CondOp.getReg();
   const DebugLoc &DL = I.getDebugLoc();
-  unsigned Opcode;
-  unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
-
-  if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) {
-
-    const GEPInfo &GEPInfo = AddrInfo[0];
-
-    unsigned PtrReg = GEPInfo.SgprParts[0];
-    int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm);
-    if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) {
-      Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
 
-      MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
-                                 .addReg(PtrReg)
-                                 .addImm(EncodedImm)
-                                 .addImm(0); // glc
-      return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
-    }
+  unsigned BrOpcode;
+  Register CondPhysReg;
+  const TargetRegisterClass *ConstrainRC;
+
+  // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
+  // whether the branch is uniform when selecting the instruction. In
+  // GlobalISel, we should push that decision into RegBankSelect. Assume for now
+  // RegBankSelect knows what it's doing if the branch condition is scc, even
+  // though it currently does not.
+  if (isSCC(CondReg, MRI)) {
+    CondPhysReg = AMDGPU::SCC;
+    BrOpcode = AMDGPU::S_CBRANCH_SCC1;
+    ConstrainRC = &AMDGPU::SReg_32_XM0RegClass;
+  } else if (isVCC(CondReg, MRI)) {
+    // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
+    // We sort of know that a VCC producer based on the register bank, that ands
+    // inactive lanes with 0. What if there was a logical operation with vcc
+    // producers in different blocks/with different exec masks?
+    // FIXME: Should scc->vcc copies and with exec?
+    CondPhysReg = TRI.getVCC();
+    BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
+    ConstrainRC = TRI.getBoolRC();
+  } else
+    return false;
 
-    if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS &&
-        isUInt<32>(EncodedImm)) {
-      Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize);
-      MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
-                                   .addReg(PtrReg)
-                                   .addImm(EncodedImm)
-                                   .addImm(0); // glc
-      return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
-    }
+  if (!MRI.getRegClassOrNull(CondReg))
+    MRI.setRegClass(CondReg, ConstrainRC);
 
-    if (isUInt<32>(GEPInfo.Imm)) {
-      Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize);
-      unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
-      BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg)
-              .addImm(GEPInfo.Imm);
-
-      MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
-                                   .addReg(PtrReg)
-                                   .addReg(OffsetReg)
-                                   .addImm(0); // glc
-      return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
-    }
-  }
+  BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
+    .addReg(CondReg);
+  BuildMI(*BB, &I, DL, TII.get(BrOpcode))
+    .addMBB(I.getOperand(1).getMBB());
 
-  unsigned PtrReg = I.getOperand(1).getReg();
-  Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
-  MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
-                               .addReg(PtrReg)
-                               .addImm(0)
-                               .addImm(0); // glc
-  return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
+  I.eraseFromParent();
+  return true;
 }
 
-
-bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
+bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
   MachineBasicBlock *BB = I.getParent();
   MachineFunction *MF = BB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
-  DebugLoc DL = I.getDebugLoc();
-  unsigned DstReg = I.getOperand(0).getReg();
-  unsigned PtrReg = I.getOperand(1).getReg();
-  unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
-  unsigned Opcode;
-
-  SmallVector<GEPInfo, 4> AddrInfo;
-
-  getAddrModeInfo(I, MRI, AddrInfo);
-
-  if (selectSMRD(I, AddrInfo)) {
-    I.eraseFromParent();
-    return true;
-  }
 
-  switch (LoadSize) {
-  default:
-    llvm_unreachable("Load size not supported\n");
-  case 32:
-    Opcode = AMDGPU::FLAT_LOAD_DWORD;
-    break;
-  case 64:
-    Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
-    break;
-  }
+  Register DstReg = I.getOperand(0).getReg();
+  const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
+  const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
+  I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
+  if (IsVGPR)
+    I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
 
-  MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
-                               .add(I.getOperand(0))
-                               .addReg(PtrReg)
-                               .addImm(0)  // offset
-                               .addImm(0)  // glc
-                               .addImm(0); // slc
-
-  bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
-  I.eraseFromParent();
-  return Ret;
+  return RBI.constrainGenericRegister(
+    DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI);
 }
 
 bool AMDGPUInstructionSelector::select(MachineInstr &I,
                                        CodeGenCoverage &CoverageInfo) const {
+  if (I.isPHI())
+    return selectPHI(I);
 
   if (!isPreISelGenericOpcode(I.getOpcode())) {
     if (I.isCopy())
@@ -626,28 +1319,75 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
   }
 
   switch (I.getOpcode()) {
-  default:
+  case TargetOpcode::G_AND:
+  case TargetOpcode::G_OR:
+  case TargetOpcode::G_XOR:
+    if (selectG_AND_OR_XOR(I))
+      return true;
     return selectImpl(I, CoverageInfo);
   case TargetOpcode::G_ADD:
-    return selectG_ADD(I);
+  case TargetOpcode::G_SUB:
+    if (selectG_ADD_SUB(I))
+      return true;
+    LLVM_FALLTHROUGH;
+  default:
+    return selectImpl(I, CoverageInfo);
   case TargetOpcode::G_INTTOPTR:
   case TargetOpcode::G_BITCAST:
     return selectCOPY(I);
   case TargetOpcode::G_CONSTANT:
   case TargetOpcode::G_FCONSTANT:
     return selectG_CONSTANT(I);
+  case TargetOpcode::G_EXTRACT:
+    return selectG_EXTRACT(I);
+  case TargetOpcode::G_MERGE_VALUES:
+  case TargetOpcode::G_BUILD_VECTOR:
+  case TargetOpcode::G_CONCAT_VECTORS:
+    return selectG_MERGE_VALUES(I);
+  case TargetOpcode::G_UNMERGE_VALUES:
+    return selectG_UNMERGE_VALUES(I);
   case TargetOpcode::G_GEP:
     return selectG_GEP(I);
   case TargetOpcode::G_IMPLICIT_DEF:
     return selectG_IMPLICIT_DEF(I);
+  case TargetOpcode::G_INSERT:
+    return selectG_INSERT(I);
   case TargetOpcode::G_INTRINSIC:
     return selectG_INTRINSIC(I, CoverageInfo);
   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
     return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
+  case TargetOpcode::G_ICMP:
+    if (selectG_ICMP(I))
+      return true;
+    return selectImpl(I, CoverageInfo);
   case TargetOpcode::G_LOAD:
-    return selectG_LOAD(I);
+    return selectImpl(I, CoverageInfo);
+  case TargetOpcode::G_SELECT:
+    return selectG_SELECT(I);
   case TargetOpcode::G_STORE:
+    if (selectImpl(I, CoverageInfo))
+      return true;
     return selectG_STORE(I);
+  case TargetOpcode::G_TRUNC:
+    return selectG_TRUNC(I);
+  case TargetOpcode::G_SEXT:
+  case TargetOpcode::G_ZEXT:
+  case TargetOpcode::G_ANYEXT:
+    if (selectG_SZA_EXT(I)) {
+      I.eraseFromParent();
+      return true;
+    }
+
+    return false;
+  case TargetOpcode::G_BRCOND:
+    return selectG_BRCOND(I);
+  case TargetOpcode::G_FRAME_INDEX:
+    return selectG_FRAME_INDEX(I);
+  case TargetOpcode::G_FENCE:
+    // FIXME: Tablegen importer doesn't handle the imm operands correctly, and
+    // is checking for G_CONSTANT
+    I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE));
+    return true;
   }
   return false;
 }
@@ -660,6 +1400,26 @@ AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
 
 }
 
+std::pair<Register, unsigned>
+AMDGPUInstructionSelector::selectVOP3ModsImpl(
+  Register Src, const MachineRegisterInfo &MRI) const {
+  unsigned Mods = 0;
+  MachineInstr *MI = MRI.getVRegDef(Src);
+
+  if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
+    Src = MI->getOperand(1).getReg();
+    Mods |= SISrcMods::NEG;
+    MI = MRI.getVRegDef(Src);
+  }
+
+  if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
+    Src = MI->getOperand(1).getReg();
+    Mods |= SISrcMods::ABS;
+  }
+
+  return std::make_pair(Src, Mods);
+}
+
 ///
 /// This will select either an SGPR or VGPR operand and will save us from
 /// having to write an extra tablegen pattern.
@@ -672,11 +1432,18 @@ AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
 
 InstructionSelector::ComplexRendererFns
 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
+  MachineRegisterInfo &MRI
+    = Root.getParent()->getParent()->getParent()->getRegInfo();
+
+  Register Src;
+  unsigned Mods;
+  std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
+
   return {{
-      [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
-      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src0_mods
-      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
-      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // omod
+      [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },    // clamp
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }     // omod
   }};
 }
 InstructionSelector::ComplexRendererFns
@@ -690,8 +1457,274 @@ AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
 
 InstructionSelector::ComplexRendererFns
 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
+  MachineRegisterInfo &MRI
+    = Root.getParent()->getParent()->getParent()->getRegInfo();
+
+  Register Src;
+  unsigned Mods;
+  std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
+
   return {{
-      [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
-      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // src_mods
+      [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }  // src_mods
+  }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
+  MachineRegisterInfo &MRI =
+      Root.getParent()->getParent()->getParent()->getRegInfo();
+
+  SmallVector<GEPInfo, 4> AddrInfo;
+  getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
+
+  if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
+    return None;
+
+  const GEPInfo &GEPInfo = AddrInfo[0];
+
+  if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
+    return None;
+
+  unsigned PtrReg = GEPInfo.SgprParts[0];
+  int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
+  return {{
+    [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
+    [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
+  }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
+  MachineRegisterInfo &MRI =
+      Root.getParent()->getParent()->getParent()->getRegInfo();
+
+  SmallVector<GEPInfo, 4> AddrInfo;
+  getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
+
+  if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
+    return None;
+
+  const GEPInfo &GEPInfo = AddrInfo[0];
+  unsigned PtrReg = GEPInfo.SgprParts[0];
+  int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
+  if (!isUInt<32>(EncodedImm))
+    return None;
+
+  return {{
+    [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
+    [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
+  }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
+  MachineInstr *MI = Root.getParent();
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+  SmallVector<GEPInfo, 4> AddrInfo;
+  getAddrModeInfo(*MI, MRI, AddrInfo);
+
+  // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
+  // then we can select all ptr + 32-bit offsets not just immediate offsets.
+  if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
+    return None;
+
+  const GEPInfo &GEPInfo = AddrInfo[0];
+  if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
+    return None;
+
+  // If we make it this far we have a load with an 32-bit immediate offset.
+  // It is OK to select this using a sgpr offset, because we have already
+  // failed trying to select this load into one of the _IMM variants since
+  // the _IMM Patterns are considered before the _SGPR patterns.
+  unsigned PtrReg = GEPInfo.SgprParts[0];
+  unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+  BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
+          .addImm(GEPInfo.Imm);
+  return {{
+    [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
+    [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
+  }};
+}
+
+template <bool Signed>
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const {
+  MachineInstr *MI = Root.getParent();
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+  InstructionSelector::ComplexRendererFns Default = {{
+      [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },  // offset
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // slc
+    }};
+
+  if (!STI.hasFlatInstOffsets())
+    return Default;
+
+  const MachineInstr *OpDef = MRI.getVRegDef(Root.getReg());
+  if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP)
+    return Default;
+
+  Optional<int64_t> Offset =
+    getConstantVRegVal(OpDef->getOperand(2).getReg(), MRI);
+  if (!Offset.hasValue())
+    return Default;
+
+  unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace();
+  if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed))
+    return Default;
+
+  Register BasePtr = OpDef->getOperand(1).getReg();
+
+  return {{
+      [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); },
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); },
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // slc
+    }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const {
+  return selectFlatOffsetImpl<false>(Root);
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const {
+  return selectFlatOffsetImpl<true>(Root);
+}
+
+// FIXME: Implement
+static bool signBitIsZero(const MachineOperand &Op,
+                          const MachineRegisterInfo &MRI) {
+  return false;
+}
+
+static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
+  auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
+  return PSV && PSV->isStack();
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
+  MachineInstr *MI = Root.getParent();
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineFunction *MF = MBB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
+
+  int64_t Offset = 0;
+  if (mi_match(Root.getReg(), MRI, m_ICst(Offset))) {
+    Register HighBits = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+    // TODO: Should this be inside the render function? The iterator seems to
+    // move.
+    BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
+            HighBits)
+      .addImm(Offset & ~4095);
+
+    return {{[=](MachineInstrBuilder &MIB) { // rsrc
+               MIB.addReg(Info->getScratchRSrcReg());
+             },
+             [=](MachineInstrBuilder &MIB) { // vaddr
+               MIB.addReg(HighBits);
+             },
+             [=](MachineInstrBuilder &MIB) { // soffset
+               const MachineMemOperand *MMO = *MI->memoperands_begin();
+               const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
+
+               Register SOffsetReg = isStackPtrRelative(PtrInfo)
+                                         ? Info->getStackPtrOffsetReg()
+                                         : Info->getScratchWaveOffsetReg();
+               MIB.addReg(SOffsetReg);
+             },
+             [=](MachineInstrBuilder &MIB) { // offset
+               MIB.addImm(Offset & 4095);
+             }}};
+  }
+
+  assert(Offset == 0);
+
+  // Try to fold a frame index directly into the MUBUF vaddr field, and any
+  // offsets.
+  Optional<int> FI;
+  Register VAddr = Root.getReg();
+  if (const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg())) {
+    if (isBaseWithConstantOffset(Root, MRI)) {
+      const MachineOperand &LHS = RootDef->getOperand(1);
+      const MachineOperand &RHS = RootDef->getOperand(2);
+      const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
+      const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
+      if (LHSDef && RHSDef) {
+        int64_t PossibleOffset =
+            RHSDef->getOperand(1).getCImm()->getSExtValue();
+        if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset) &&
+            (!STI.privateMemoryResourceIsRangeChecked() ||
+             signBitIsZero(LHS, MRI))) {
+          if (LHSDef->getOpcode() == AMDGPU::G_FRAME_INDEX)
+            FI = LHSDef->getOperand(1).getIndex();
+          else
+            VAddr = LHS.getReg();
+          Offset = PossibleOffset;
+        }
+      }
+    } else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {
+      FI = RootDef->getOperand(1).getIndex();
+    }
+  }
+
+  // If we don't know this private access is a local stack object, it needs to
+  // be relative to the entry point's scratch wave offset register.
+  // TODO: Should split large offsets that don't fit like above.
+  // TODO: Don't use scratch wave offset just because the offset didn't fit.
+  Register SOffset = FI.hasValue() ? Info->getStackPtrOffsetReg()
+                                   : Info->getScratchWaveOffsetReg();
+
+  return {{[=](MachineInstrBuilder &MIB) { // rsrc
+             MIB.addReg(Info->getScratchRSrcReg());
+           },
+           [=](MachineInstrBuilder &MIB) { // vaddr
+             if (FI.hasValue())
+               MIB.addFrameIndex(FI.getValue());
+             else
+               MIB.addReg(VAddr);
+           },
+           [=](MachineInstrBuilder &MIB) { // soffset
+             MIB.addReg(SOffset);
+           },
+           [=](MachineInstrBuilder &MIB) { // offset
+             MIB.addImm(Offset);
+           }}};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectMUBUFScratchOffset(
+    MachineOperand &Root) const {
+  MachineInstr *MI = Root.getParent();
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+  int64_t Offset = 0;
+  if (!mi_match(Root.getReg(), MRI, m_ICst(Offset)) ||
+      !SIInstrInfo::isLegalMUBUFImmOffset(Offset))
+    return {};
+
+  const MachineFunction *MF = MBB->getParent();
+  const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
+  const MachineMemOperand *MMO = *MI->memoperands_begin();
+  const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
+
+  Register SOffsetReg = isStackPtrRelative(PtrInfo)
+                            ? Info->getStackPtrOffsetReg()
+                            : Info->getScratchWaveOffsetReg();
+  return {{
+      [=](MachineInstrBuilder &MIB) {
+        MIB.addReg(Info->getScratchRSrcReg());
+      },                                                         // rsrc
+      [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffsetReg); }, // soffset
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }      // offset
   }};
 }
diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 449431adc561..4f489ddfb23d 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -1,9 +1,8 @@
 //===- AMDGPUInstructionSelector --------------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -18,7 +17,9 @@
 #include "AMDGPUArgumentUsageInfo.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Register.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/IR/InstrTypes.h"
 
 namespace {
 #define GET_GLOBALISEL_PREDICATE_BITSET
@@ -58,24 +59,45 @@ private:
     GEPInfo(const MachineInstr &GEP) : GEP(GEP), Imm(0) { }
   };
 
+  bool isInstrUniform(const MachineInstr &MI) const;
+  bool isVCC(Register Reg, const MachineRegisterInfo &MRI) const;
+
   /// tblgen-erated 'select' implementation.
   bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
 
-  MachineOperand getSubOperand64(MachineOperand &MO, unsigned SubIdx) const;
+  MachineOperand getSubOperand64(MachineOperand &MO,
+                                 const TargetRegisterClass &SubRC,
+                                 unsigned SubIdx) const;
   bool selectCOPY(MachineInstr &I) const;
+  bool selectPHI(MachineInstr &I) const;
+  bool selectG_TRUNC(MachineInstr &I) const;
+  bool selectG_SZA_EXT(MachineInstr &I) const;
   bool selectG_CONSTANT(MachineInstr &I) const;
-  bool selectG_ADD(MachineInstr &I) const;
+  bool selectG_AND_OR_XOR(MachineInstr &I) const;
+  bool selectG_ADD_SUB(MachineInstr &I) const;
+  bool selectG_EXTRACT(MachineInstr &I) const;
+  bool selectG_MERGE_VALUES(MachineInstr &I) const;
+  bool selectG_UNMERGE_VALUES(MachineInstr &I) const;
   bool selectG_GEP(MachineInstr &I) const;
   bool selectG_IMPLICIT_DEF(MachineInstr &I) const;
+  bool selectG_INSERT(MachineInstr &I) const;
   bool selectG_INTRINSIC(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
   bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I,
                                         CodeGenCoverage &CoverageInfo) const;
+  int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const;
+  bool selectG_ICMP(MachineInstr &I) const;
   bool hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const;
   void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI,
                        SmallVectorImpl<GEPInfo> &AddrInfo) const;
   bool selectSMRD(MachineInstr &I, ArrayRef<GEPInfo> AddrInfo) const;
   bool selectG_LOAD(MachineInstr &I) const;
+  bool selectG_SELECT(MachineInstr &I) const;
   bool selectG_STORE(MachineInstr &I) const;
+  bool selectG_BRCOND(MachineInstr &I) const;
+  bool selectG_FRAME_INDEX(MachineInstr &I) const;
+
+  std::pair<Register, unsigned>
+  selectVOP3ModsImpl(Register Src, const MachineRegisterInfo &MRI) const;
 
   InstructionSelector::ComplexRendererFns
   selectVCSRC(MachineOperand &Root) const;
@@ -90,6 +112,27 @@ private:
   InstructionSelector::ComplexRendererFns
   selectVOP3Mods(MachineOperand &Root) const;
 
+  InstructionSelector::ComplexRendererFns
+  selectSmrdImm(MachineOperand &Root) const;
+  InstructionSelector::ComplexRendererFns
+  selectSmrdImm32(MachineOperand &Root) const;
+  InstructionSelector::ComplexRendererFns
+  selectSmrdSgpr(MachineOperand &Root) const;
+
+  template <bool Signed>
+  InstructionSelector::ComplexRendererFns
+  selectFlatOffsetImpl(MachineOperand &Root) const;
+  InstructionSelector::ComplexRendererFns
+  selectFlatOffset(MachineOperand &Root) const;
+
+  InstructionSelector::ComplexRendererFns
+  selectFlatOffsetSigned(MachineOperand &Root) const;
+
+  InstructionSelector::ComplexRendererFns
+  selectMUBUFScratchOffen(MachineOperand &Root) const;
+  InstructionSelector::ComplexRendererFns
+  selectMUBUFScratchOffset(MachineOperand &Root) const;
+
   const SIInstrInfo &TII;
   const SIRegisterInfo &TRI;
   const AMDGPURegisterBankInfo &RBI;
diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td
index eb8f2002ff2d..61bc415c839d 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -1,9 +1,8 @@
 //===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,6 +11,18 @@
 //
 //===----------------------------------------------------------------------===//
 
+class AddressSpacesImpl {
+  int Flat = 0;
+  int Global = 1;
+  int Region = 2;
+  int Local = 3;
+  int Constant = 4;
+  int Private = 5;
+}
+
+def AddrSpaces : AddressSpacesImpl;
+
+
 class AMDGPUInst <dag outs, dag ins, string asm = "",
   list<dag> pattern = []> : Instruction {
   field bit isRegisterLoad = 0;
@@ -66,17 +77,15 @@ class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
 
 def TruePredicate : Predicate<"true">;
 
-// Exists to help track down where SubtargetPredicate isn't set rather
-// than letting tablegen crash with an unhelpful error.
-def InvalidPred : Predicate<"predicate not set on instruction or pattern">;
-
 class PredicateControl {
-  Predicate SubtargetPredicate = InvalidPred;
+  Predicate SubtargetPredicate = TruePredicate;
   list<Predicate> AssemblerPredicates = [];
   Predicate AssemblerPredicate = TruePredicate;
+  Predicate WaveSizePredicate = TruePredicate;
   list<Predicate> OtherPredicates = [];
   list<Predicate> Predicates = !listconcat([SubtargetPredicate,
-                                            AssemblerPredicate],
+                                            AssemblerPredicate,
+                                            WaveSizePredicate],
                                             AssemblerPredicates,
                                             OtherPredicates);
 }
@@ -326,6 +335,10 @@ def TEX_SHADOW_ARRAY : PatLeaf<
 // Load/Store Pattern Fragments
 //===----------------------------------------------------------------------===//
 
+class AddressSpaceList<list<int> AS> {
+  list<int> AddrSpaces = AS;
+}
+
 class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
   return cast<MemSDNode>(N)->getAlignment() % 8 == 0;
 }]>;
@@ -344,21 +357,25 @@ class StoreHi16<SDPatternOperator op> : PatFrag <
   (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)
 >;
 
-class PrivateAddress : CodePatPred<[{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
-}]>;
+def LoadAddress_constant : AddressSpaceList<[  AddrSpaces.Constant ]>;
+def LoadAddress_global : AddressSpaceList<[  AddrSpaces.Global, AddrSpaces.Constant ]>;
+def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>;
 
-class ConstantAddress : CodePatPred<[{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
-}]>;
+def LoadAddress_flat : AddressSpaceList<[  AddrSpaces.Flat,
+                                           AddrSpaces.Global,
+                                           AddrSpaces.Constant ]>;
+def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>;
+
+def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
+def StoreAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
+
+def LoadAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
+def StoreAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
+
+def LoadAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
+def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
 
-class LocalAddress : CodePatPred<[{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
-}]>;
 
-class GlobalAddress : CodePatPred<[{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
-}]>;
 
 class GlobalLoadAddress : CodePatPred<[{
   auto AS = cast<MemSDNode>(N)->getAddressSpace();
@@ -372,86 +389,126 @@ class FlatLoadAddress : CodePatPred<[{
          AS == AMDGPUAS::CONSTANT_ADDRESS;
 }]>;
 
-class FlatStoreAddress : CodePatPred<[{
-  const auto AS = cast<MemSDNode>(N)->getAddressSpace();
-  return AS == AMDGPUAS::FLAT_ADDRESS ||
-         AS == AMDGPUAS::GLOBAL_ADDRESS;
+class GlobalAddress : CodePatPred<[{
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
 }]>;
 
-class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
-                                              (ld_node node:$ptr), [{
-  LoadSDNode *L = cast<LoadSDNode>(N);
-  return L->getExtensionType() == ISD::ZEXTLOAD ||
-         L->getExtensionType() == ISD::EXTLOAD;
+class PrivateAddress : CodePatPred<[{
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
 }]>;
 
-def az_extload : AZExtLoadBase <unindexedload>;
-
-def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
-  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+class LocalAddress : CodePatPred<[{
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
 }]>;
 
-def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
-  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+class RegionAddress : CodePatPred<[{
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
 }]>;
 
-def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
-  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+class FlatStoreAddress : CodePatPred<[{
+  const auto AS = cast<MemSDNode>(N)->getAddressSpace();
+  return AS == AMDGPUAS::FLAT_ADDRESS ||
+         AS == AMDGPUAS::GLOBAL_ADDRESS;
 }]>;
 
-class PrivateLoad <SDPatternOperator op> : LoadFrag <op>, PrivateAddress;
+// TODO: Remove these when stores to new PatFrag format.
 class PrivateStore <SDPatternOperator op> : StoreFrag <op>, PrivateAddress;
-
-class LocalLoad <SDPatternOperator op> : LoadFrag <op>, LocalAddress;
 class LocalStore <SDPatternOperator op> : StoreFrag <op>, LocalAddress;
-
-class GlobalLoad <SDPatternOperator op> : LoadFrag<op>, GlobalLoadAddress;
+class RegionStore <SDPatternOperator op> : StoreFrag <op>, RegionAddress;
 class GlobalStore <SDPatternOperator op> : StoreFrag<op>, GlobalAddress;
-
-class FlatLoad <SDPatternOperator op> : LoadFrag <op>, FlatLoadAddress;
 class FlatStore <SDPatternOperator op> : StoreFrag <op>, FlatStoreAddress;
 
-class ConstantLoad <SDPatternOperator op> : LoadFrag <op>, ConstantAddress;
 
+foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
+let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
 
-def load_private : PrivateLoad <load>;
-def az_extloadi8_private : PrivateLoad <az_extloadi8>;
-def sextloadi8_private : PrivateLoad <sextloadi8>;
-def az_extloadi16_private : PrivateLoad <az_extloadi16>;
-def sextloadi16_private : PrivateLoad <sextloadi16>;
+def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> {
+  let IsLoad = 1;
+  let IsNonExtLoad = 1;
+}
 
-def store_private : PrivateStore <store>;
-def truncstorei8_private : PrivateStore<truncstorei8>;
-def truncstorei16_private : PrivateStore <truncstorei16>;
-def store_hi16_private : StoreHi16 <truncstorei16>, PrivateAddress;
-def truncstorei8_hi16_private : StoreHi16<truncstorei8>, PrivateAddress;
+def extloadi8_#as  : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
+  let IsLoad = 1;
+  let MemoryVT = i8;
+}
+
+def extloadi16_#as : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
+  let IsLoad = 1;
+  let MemoryVT = i16;
+}
 
+def sextloadi8_#as  : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
+  let IsLoad = 1;
+  let MemoryVT = i8;
+}
+
+def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
+  let IsLoad = 1;
+  let MemoryVT = i16;
+}
+
+def zextloadi8_#as  : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
+  let IsLoad = 1;
+  let MemoryVT = i8;
+}
+
+def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
+  let IsLoad = 1;
+  let MemoryVT = i16;
+}
+
+def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> {
+  let IsAtomic = 1;
+  let MemoryVT = i32;
+}
 
-def load_global : GlobalLoad <load>;
-def sextloadi8_global : GlobalLoad <sextloadi8>;
-def az_extloadi8_global : GlobalLoad <az_extloadi8>;
-def sextloadi16_global : GlobalLoad <sextloadi16>;
-def az_extloadi16_global : GlobalLoad <az_extloadi16>;
-def atomic_load_global : GlobalLoad<atomic_load>;
+def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> {
+  let IsAtomic = 1;
+  let MemoryVT = i64;
+}
+
+def store_#as : PatFrag<(ops node:$val, node:$ptr),
+                    (unindexedstore node:$val, node:$ptr)> {
+  let IsStore = 1;
+  let IsTruncStore = 0;
+}
+
+// truncstore fragments.
+def truncstore_#as : PatFrag<(ops node:$val, node:$ptr),
+                             (unindexedstore node:$val, node:$ptr)> {
+  let IsStore = 1;
+  let IsTruncStore = 1;
+}
+
+// TODO: We don't really need the truncstore here. We can use
+// unindexedstore with MemoryVT directly, which will save an
+// unnecessary check that the memory size is less than the value type
+// in the generated matcher table.
+def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr),
+                               (truncstore node:$val, node:$ptr)> {
+  let IsStore = 1;
+  let MemoryVT = i8;
+}
+
+def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr),
+                                (truncstore node:$val, node:$ptr)> {
+  let IsStore = 1;
+  let MemoryVT = i16;
+}
+
+defm atomic_store_#as : binary_atomic_op<atomic_store>;
+
+} // End let AddressSpaces = ...
+} // End foreach AddrSpace
+
+
+def store_hi16_private : StoreHi16 <truncstorei16>, PrivateAddress;
+def truncstorei8_hi16_private : StoreHi16<truncstorei8>, PrivateAddress;
 
-def store_global : GlobalStore <store>;
-def truncstorei8_global : GlobalStore <truncstorei8>;
-def truncstorei16_global : GlobalStore <truncstorei16>;
 def store_atomic_global : GlobalStore<atomic_store>;
 def truncstorei8_hi16_global : StoreHi16 <truncstorei8>, GlobalAddress;
 def truncstorei16_hi16_global : StoreHi16 <truncstorei16>, GlobalAddress;
 
-def load_local : LocalLoad <load>;
-def az_extloadi8_local : LocalLoad <az_extloadi8>;
-def sextloadi8_local : LocalLoad <sextloadi8>;
-def az_extloadi16_local : LocalLoad <az_extloadi16>;
-def sextloadi16_local : LocalLoad <sextloadi16>;
-def atomic_load_32_local : LocalLoad<atomic_load_32>;
-def atomic_load_64_local : LocalLoad<atomic_load_64>;
-
-def store_local : LocalStore <store>;
-def truncstorei8_local : LocalStore <truncstorei8>;
-def truncstorei16_local : LocalStore <truncstorei16>;
 def store_local_hi16 : StoreHi16 <truncstorei16>, LocalAddress;
 def truncstorei8_local_hi16 : StoreHi16<truncstorei8>, LocalAddress;
 def atomic_store_local : LocalStore <atomic_store>;
@@ -472,34 +529,24 @@ def store_align16_local : Aligned16Bytes <
   (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr)
 >;
 
-def load_flat          : FlatLoad <load>;
-def az_extloadi8_flat  : FlatLoad <az_extloadi8>;
-def sextloadi8_flat    : FlatLoad <sextloadi8>;
-def az_extloadi16_flat : FlatLoad <az_extloadi16>;
-def sextloadi16_flat   : FlatLoad <sextloadi16>;
-def atomic_load_flat   : FlatLoad<atomic_load>;
-
-def store_flat         : FlatStore <store>;
-def truncstorei8_flat  : FlatStore <truncstorei8>;
-def truncstorei16_flat : FlatStore <truncstorei16>;
 def atomic_store_flat  : FlatStore <atomic_store>;
 def truncstorei8_hi16_flat  : StoreHi16<truncstorei8>, FlatStoreAddress;
 def truncstorei16_hi16_flat : StoreHi16<truncstorei16>, FlatStoreAddress;
 
 
-def constant_load : ConstantLoad<load>;
-def sextloadi8_constant : ConstantLoad <sextloadi8>;
-def az_extloadi8_constant : ConstantLoad <az_extloadi8>;
-def sextloadi16_constant : ConstantLoad <sextloadi16>;
-def az_extloadi16_constant : ConstantLoad <az_extloadi16>;
-
-
 class local_binary_atomic_op<SDNode atomic_op> :
   PatFrag<(ops node:$ptr, node:$value),
     (atomic_op node:$ptr, node:$value), [{
   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
 }]>;
 
+class region_binary_atomic_op<SDNode atomic_op> :
+  PatFrag<(ops node:$ptr, node:$value),
+    (atomic_op node:$ptr, node:$value), [{
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
+}]>;
+
+
 def atomic_swap_local : local_binary_atomic_op<atomic_swap>;
 def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>;
 def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>;
@@ -524,13 +571,22 @@ class AtomicCmpSwapLocal <SDNode cmp_swap_node> : PatFrag<
       return AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
 }]>;
 
+class AtomicCmpSwapRegion <SDNode cmp_swap_node> : PatFrag<
+    (ops node:$ptr, node:$cmp, node:$swap),
+    (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
+      AtomicSDNode *AN = cast<AtomicSDNode>(N);
+      return AN->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
+}]>;
+
 def atomic_cmp_swap_local : AtomicCmpSwapLocal <atomic_cmp_swap>;
 
+class global_binary_atomic_op_frag<SDNode atomic_op> : PatFrag<
+    (ops node:$ptr, node:$value),
+    (atomic_op node:$ptr, node:$value),
+    [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
+
 multiclass global_binary_atomic_op<SDNode atomic_op> {
-  def "" : PatFrag<
-        (ops node:$ptr, node:$value),
-        (atomic_op node:$ptr, node:$value),
-        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
+  def "" : global_binary_atomic_op_frag<atomic_op>;
 
   def _noret : PatFrag<
         (ops node:$ptr, node:$value),
@@ -585,7 +641,6 @@ int TWO_PI_INV = 0x3e22f983;
 int FP_UINT_MAX_PLUS_1 = 0x4f800000;    // 1 << 32 in floating point encoding
 int FP16_ONE = 0x3C00;
 int FP16_NEG_ONE = 0xBC00;
-int V2FP16_ONE = 0x3C003C00;
 int FP32_ONE = 0x3f800000;
 int FP32_NEG_ONE = 0xbf800000;
 int FP64_ONE = 0x3ff0000000000000;
@@ -626,9 +681,7 @@ class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
   : AMDGPUPat<
   (sub_type (extractelt vec_type:$src, sub_idx)),
   (EXTRACT_SUBREG $src, sub_reg)
-> {
-  let SubtargetPredicate = TruePredicate;
-}
+>;
 
 /* Insert element pattern */
 class Insert_Element <ValueType elem_type, ValueType vec_type,
@@ -636,9 +689,7 @@ class Insert_Element <ValueType elem_type, ValueType vec_type,
   : AMDGPUPat <
   (insertelt vec_type:$vec, elem_type:$elem, sub_idx),
   (INSERT_SUBREG $vec, $elem, sub_reg)
-> {
-  let SubtargetPredicate = TruePredicate;
-}
+>;
 
 // XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
 // can handle COPY instructions.
@@ -811,7 +862,7 @@ multiclass IntMed3Pat<Instruction med3Inst,
                  SDPatternOperator max_oneuse,
                  ValueType vt = i32> {
 
-  // This matches 16 permutations of 
+  // This matches 16 permutations of
   // min(max(a, b), max(min(a, b), c))
   def : AMDGPUPat <
   (min (max_oneuse vt:$src0, vt:$src1),
@@ -819,7 +870,7 @@ multiclass IntMed3Pat<Instruction med3Inst,
   (med3Inst vt:$src0, vt:$src1, vt:$src2)
 >;
 
-  // This matches 16 permutations of 
+  // This matches 16 permutations of
   // max(min(x, y), min(max(x, y), z))
   def : AMDGPUPat <
   (max (min_oneuse vt:$src0, vt:$src1),
@@ -827,7 +878,7 @@ multiclass IntMed3Pat<Instruction med3Inst,
   (med3Inst $src0, $src1, $src2)
 >;
 }
-  
+
 // Special conversion patterns
 
 def cvt_rpi_i32_f32 : PatFrag <
diff --git a/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp b/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
deleted file mode 100644
index 02108ca3ddd7..000000000000
--- a/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-//===- AMDGPUIntrinsicInfo.cpp - AMDGPU Intrinsic Information ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-/// \file
-/// AMDGPU Implementation of the IntrinsicInfo class.
-//
-//===-----------------------------------------------------------------------===//
-
-#include "AMDGPUIntrinsicInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
-
-using namespace llvm;
-
-AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo()
-    : TargetIntrinsicInfo() {}
-
-static const char *const IntrinsicNameTable[] = {
-#define GET_INTRINSIC_NAME_TABLE
-#include "AMDGPUGenIntrinsicImpl.inc"
-#undef GET_INTRINSIC_NAME_TABLE
-};
-
-namespace {
-#define GET_INTRINSIC_ATTRIBUTES
-#include "AMDGPUGenIntrinsicImpl.inc"
-#undef GET_INTRINSIC_ATTRIBUTES
-}
-
-StringRef AMDGPUIntrinsicInfo::getName(unsigned IntrID,
-                                       ArrayRef<Type *> Tys) const {
-  if (IntrID < Intrinsic::num_intrinsics)
-    return StringRef();
-
-  assert(IntrID < SIIntrinsic::num_AMDGPU_intrinsics &&
-         "Invalid intrinsic ID");
-
-  return IntrinsicNameTable[IntrID - Intrinsic::num_intrinsics];
-}
-
-std::string AMDGPUIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
-                                         unsigned NumTys) const {
-  return getName(IntrID, makeArrayRef(Tys, NumTys)).str();
-}
-
-FunctionType *AMDGPUIntrinsicInfo::getType(LLVMContext &Context, unsigned ID,
-                                           ArrayRef<Type*> Tys) const {
-  // FIXME: Re-use Intrinsic::getType machinery
-  llvm_unreachable("unhandled intrinsic");
-}
-
-unsigned AMDGPUIntrinsicInfo::lookupName(const char *NameData,
-                                         unsigned Len) const {
-  StringRef Name(NameData, Len);
-  if (!Name.startswith("llvm."))
-    return 0; // All intrinsics start with 'llvm.'
-
-  // Look for a name match in our table.  If the intrinsic is not overloaded,
-  // require an exact match. If it is overloaded, require a prefix match. The
-  // AMDGPU enum enum starts at Intrinsic::num_intrinsics.
-  int Idx = Intrinsic::lookupLLVMIntrinsicByName(IntrinsicNameTable, Name);
-  if (Idx >= 0) {
-    bool IsPrefixMatch = Name.size() > strlen(IntrinsicNameTable[Idx]);
-    return IsPrefixMatch == isOverloaded(Idx + 1)
-               ? Intrinsic::num_intrinsics + Idx
-               : 0;
-  }
-
-  return 0;
-}
-
-bool AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const {
-// Overload Table
-#define GET_INTRINSIC_OVERLOAD_TABLE
-#include "AMDGPUGenIntrinsicImpl.inc"
-#undef GET_INTRINSIC_OVERLOAD_TABLE
-}
-
-Function *AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
-                                              ArrayRef<Type *> Tys) const {
-  FunctionType *FTy = getType(M->getContext(), IntrID, Tys);
-  Function *F
-    = cast<Function>(M->getOrInsertFunction(getName(IntrID, Tys), FTy));
-
-  AttributeList AS =
-      getAttributes(M->getContext(), static_cast<SIIntrinsic::ID>(IntrID));
-  F->setAttributes(AS);
-  return F;
-}
-
-Function *AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
-                                              Type **Tys,
-                                              unsigned NumTys) const {
-  return getDeclaration(M, IntrID, makeArrayRef(Tys, NumTys));
-}
diff --git a/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h b/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h
deleted file mode 100644
index a1a094dded23..000000000000
--- a/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h
+++ /dev/null
@@ -1,58 +0,0 @@
-//===- AMDGPUIntrinsicInfo.h - AMDGPU Intrinsic Information ------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-/// \file
-/// Interface for the AMDGPU Implementation of the Intrinsic Info class.
-//
-//===-----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINTRINSICINFO_H
-#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINTRINSICINFO_H
-
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
-
-namespace llvm {
-class TargetMachine;
-
-namespace SIIntrinsic {
-enum ID {
-  last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1,
-#define GET_INTRINSIC_ENUM_VALUES
-#include "AMDGPUGenIntrinsicEnums.inc"
-#undef GET_INTRINSIC_ENUM_VALUES
-      , num_AMDGPU_intrinsics
-};
-
-} // end namespace AMDGPUIntrinsic
-
-class AMDGPUIntrinsicInfo final : public TargetIntrinsicInfo {
-public:
-  AMDGPUIntrinsicInfo();
-
-  StringRef getName(unsigned IntrId, ArrayRef<Type *> Tys = None) const;
-
-  std::string getName(unsigned IntrId, Type **Tys = nullptr,
-                      unsigned NumTys = 0) const override;
-
-  unsigned lookupName(const char *Name, unsigned Len) const override;
-  bool isOverloaded(unsigned IID) const override;
-  Function *getDeclaration(Module *M, unsigned ID,
-                           Type **Tys = nullptr,
-                           unsigned NumTys = 0) const override;
-
-  Function *getDeclaration(Module *M, unsigned ID,
-                           ArrayRef<Type *> = None) const;
-
-  FunctionType *getType(LLVMContext &Context, unsigned ID,
-                        ArrayRef<Type*> Tys = None) const;
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index ef85c1040545..670f6225fbf7 100644
--- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPULegalizerInfo.cpp -----------------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -15,17 +14,93 @@
 #include "AMDGPU.h"
 #include "AMDGPULegalizerInfo.h"
 #include "AMDGPUTargetMachine.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/Debug.h"
 
+#define DEBUG_TYPE "amdgpu-legalinfo"
+
 using namespace llvm;
 using namespace LegalizeActions;
+using namespace LegalizeMutations;
+using namespace LegalityPredicates;
+
+
+static LegalityPredicate isMultiple32(unsigned TypeIdx,
+                                      unsigned MaxSize = 512) {
+  return [=](const LegalityQuery &Query) {
+    const LLT Ty = Query.Types[TypeIdx];
+    const LLT EltTy = Ty.getScalarType();
+    return Ty.getSizeInBits() <= MaxSize && EltTy.getSizeInBits() % 32 == 0;
+  };
+}
+
+static LegalityPredicate isSmallOddVector(unsigned TypeIdx) {
+  return [=](const LegalityQuery &Query) {
+    const LLT Ty = Query.Types[TypeIdx];
+    return Ty.isVector() &&
+           Ty.getNumElements() % 2 != 0 &&
+           Ty.getElementType().getSizeInBits() < 32;
+  };
+}
 
-AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
-                                         const GCNTargetMachine &TM) {
+static LegalizeMutation oneMoreElement(unsigned TypeIdx) {
+  return [=](const LegalityQuery &Query) {
+    const LLT Ty = Query.Types[TypeIdx];
+    const LLT EltTy = Ty.getElementType();
+    return std::make_pair(TypeIdx, LLT::vector(Ty.getNumElements() + 1, EltTy));
+  };
+}
+
+static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) {
+  return [=](const LegalityQuery &Query) {
+    const LLT Ty = Query.Types[TypeIdx];
+    const LLT EltTy = Ty.getElementType();
+    unsigned Size = Ty.getSizeInBits();
+    unsigned Pieces = (Size + 63) / 64;
+    unsigned NewNumElts = (Ty.getNumElements() + 1) / Pieces;
+    return std::make_pair(TypeIdx, LLT::scalarOrVector(NewNumElts, EltTy));
+  };
+}
+
+static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size) {
+  return [=](const LegalityQuery &Query) {
+    const LLT QueryTy = Query.Types[TypeIdx];
+    return QueryTy.isVector() && QueryTy.getSizeInBits() > Size;
+  };
+}
+
+static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
+  return [=](const LegalityQuery &Query) {
+    const LLT QueryTy = Query.Types[TypeIdx];
+    return QueryTy.isVector() && QueryTy.getNumElements() % 2 != 0;
+  };
+}
+
+// Any combination of 32 or 64-bit elements up to 512 bits, and multiples of
+// v2s16.
+static LegalityPredicate isRegisterType(unsigned TypeIdx) {
+  return [=](const LegalityQuery &Query) {
+    const LLT Ty = Query.Types[TypeIdx];
+    if (Ty.isVector()) {
+      const int EltSize = Ty.getElementType().getSizeInBits();
+      return EltSize == 32 || EltSize == 64 ||
+            (EltSize == 16 && Ty.getNumElements() % 2 == 0) ||
+             EltSize == 128 || EltSize == 256;
+    }
+
+    return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 512;
+  };
+}
+
+AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
+                                         const GCNTargetMachine &TM)
+  :  ST(ST_) {
   using namespace TargetOpcode;
 
   auto GetAddrSpacePtr = [&TM](unsigned AS) {
@@ -33,13 +108,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
   };
 
   const LLT S1 = LLT::scalar(1);
+  const LLT S8 = LLT::scalar(8);
+  const LLT S16 = LLT::scalar(16);
   const LLT S32 = LLT::scalar(32);
   const LLT S64 = LLT::scalar(64);
+  const LLT S128 = LLT::scalar(128);
+  const LLT S256 = LLT::scalar(256);
   const LLT S512 = LLT::scalar(512);
 
   const LLT V2S16 = LLT::vector(2, 16);
   const LLT V4S16 = LLT::vector(4, 16);
-  const LLT V8S16 = LLT::vector(8, 16);
 
   const LLT V2S32 = LLT::vector(2, 32);
   const LLT V3S32 = LLT::vector(3, 32);
@@ -79,156 +157,428 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
 
   const LLT CodePtr = FlatPtr;
 
-  const LLT AddrSpaces[] = {
-    GlobalPtr,
-    ConstantPtr,
-    LocalPtr,
-    FlatPtr,
-    PrivatePtr
+  const std::initializer_list<LLT> AddrSpaces64 = {
+    GlobalPtr, ConstantPtr, FlatPtr
+  };
+
+  const std::initializer_list<LLT> AddrSpaces32 = {
+    LocalPtr, PrivatePtr
+  };
+
+  const std::initializer_list<LLT> FPTypesBase = {
+    S32, S64
+  };
+
+  const std::initializer_list<LLT> FPTypes16 = {
+    S32, S64, S16
+  };
+
+  const std::initializer_list<LLT> FPTypesPK16 = {
+    S32, S64, S16, V2S16
   };
 
   setAction({G_BRCOND, S1}, Legal);
 
-  setAction({G_ADD, S32}, Legal);
-  setAction({G_ASHR, S32}, Legal);
-  setAction({G_SUB, S32}, Legal);
-  setAction({G_MUL, S32}, Legal);
+  // TODO: All multiples of 32, vectors of pointers, all v2s16 pairs, more
+  // elements for v3s16
+  getActionDefinitionsBuilder(G_PHI)
+    .legalFor({S32, S64, V2S16, V4S16, S1, S128, S256})
+    .legalFor(AllS32Vectors)
+    .legalFor(AllS64Vectors)
+    .legalFor(AddrSpaces64)
+    .legalFor(AddrSpaces32)
+    .clampScalar(0, S32, S256)
+    .widenScalarToNextPow2(0, 32)
+    .clampMaxNumElements(0, S32, 16)
+    .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
+    .legalIf(isPointer(0));
 
-  // FIXME: 64-bit ones only legal for scalar
+  if (ST.has16BitInsts()) {
+    getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
+      .legalFor({S32, S16})
+      .clampScalar(0, S16, S32)
+      .scalarize(0);
+  } else {
+    getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
+      .legalFor({S32})
+      .clampScalar(0, S32, S32)
+      .scalarize(0);
+  }
+
+  getActionDefinitionsBuilder({G_UMULH, G_SMULH})
+    .legalFor({S32})
+    .clampScalar(0, S32, S32)
+    .scalarize(0);
+
+  // Report legal for any types we can handle anywhere. For the cases only legal
+  // on the SALU, RegBankSelect will be able to re-legalize.
   getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
-    .legalFor({S32, S1, S64, V2S32});
+    .legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16})
+    .clampScalar(0, S32, S64)
+    .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
+    .fewerElementsIf(vectorWiderThan(0, 32), fewerEltsToSize64Vector(0))
+    .widenScalarToNextPow2(0)
+    .scalarize(0);
 
   getActionDefinitionsBuilder({G_UADDO, G_SADDO, G_USUBO, G_SSUBO,
                                G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
-    .legalFor({{S32, S1}});
+    .legalFor({{S32, S1}})
+    .clampScalar(0, S32, S32);
 
-  setAction({G_BITCAST, V2S16}, Legal);
-  setAction({G_BITCAST, 1, S32}, Legal);
+  getActionDefinitionsBuilder(G_BITCAST)
+    .legalForCartesianProduct({S32, V2S16})
+    .legalForCartesianProduct({S64, V2S32, V4S16})
+    .legalForCartesianProduct({V2S64, V4S32})
+    // Don't worry about the size constraint.
+    .legalIf(all(isPointer(0), isPointer(1)));
 
-  setAction({G_BITCAST, S32}, Legal);
-  setAction({G_BITCAST, 1, V2S16}, Legal);
-
-  getActionDefinitionsBuilder(G_FCONSTANT)
-    .legalFor({S32, S64});
+  if (ST.has16BitInsts()) {
+    getActionDefinitionsBuilder(G_FCONSTANT)
+      .legalFor({S32, S64, S16})
+      .clampScalar(0, S16, S64);
+  } else {
+    getActionDefinitionsBuilder(G_FCONSTANT)
+      .legalFor({S32, S64})
+      .clampScalar(0, S32, S64);
+  }
 
-  // G_IMPLICIT_DEF is a no-op so we can make it legal for any value type that
-  // can fit in a register.
-  // FIXME: We need to legalize several more operations before we can add
-  // a test case for size > 512.
   getActionDefinitionsBuilder(G_IMPLICIT_DEF)
-    .legalIf([=](const LegalityQuery &Query) {
-        return Query.Types[0].getSizeInBits() <= 512;
-    })
-    .clampScalar(0, S1, S512);
+    .legalFor({S1, S32, S64, V2S32, V4S32, V2S16, V4S16, GlobalPtr,
+               ConstantPtr, LocalPtr, FlatPtr, PrivatePtr})
+    .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
+    .clampScalarOrElt(0, S32, S512)
+    .legalIf(isMultiple32(0))
+    .widenScalarToNextPow2(0, 32)
+    .clampMaxNumElements(0, S32, 16);
 
-  getActionDefinitionsBuilder(G_CONSTANT)
-    .legalFor({S1, S32, S64});
 
   // FIXME: i1 operands to intrinsics should always be legal, but other i1
   // values may not be legal.  We need to figure out how to distinguish
   // between these two scenarios.
-  setAction({G_CONSTANT, S1}, Legal);
+  getActionDefinitionsBuilder(G_CONSTANT)
+    .legalFor({S1, S32, S64, GlobalPtr,
+               LocalPtr, ConstantPtr, PrivatePtr, FlatPtr })
+    .clampScalar(0, S32, S64)
+    .widenScalarToNextPow2(0)
+    .legalIf(isPointer(0));
 
   setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
 
-  getActionDefinitionsBuilder(
-    { G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA})
+  auto &FPOpActions = getActionDefinitionsBuilder(
+    { G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA, G_FCANONICALIZE})
     .legalFor({S32, S64});
 
-  getActionDefinitionsBuilder(G_FPTRUNC)
-    .legalFor({{S32, S64}});
+  if (ST.has16BitInsts()) {
+    if (ST.hasVOP3PInsts())
+      FPOpActions.legalFor({S16, V2S16});
+    else
+      FPOpActions.legalFor({S16});
+  }
 
-  // Use actual fsub instruction
-  setAction({G_FSUB, S32}, Legal);
+  auto &MinNumMaxNum = getActionDefinitionsBuilder({
+      G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
+
+  if (ST.hasVOP3PInsts()) {
+    MinNumMaxNum.customFor(FPTypesPK16)
+      .clampMaxNumElements(0, S16, 2)
+      .clampScalar(0, S16, S64)
+      .scalarize(0);
+  } else if (ST.has16BitInsts()) {
+    MinNumMaxNum.customFor(FPTypes16)
+      .clampScalar(0, S16, S64)
+      .scalarize(0);
+  } else {
+    MinNumMaxNum.customFor(FPTypesBase)
+      .clampScalar(0, S32, S64)
+      .scalarize(0);
+  }
 
-  // Must use fadd + fneg
-  setAction({G_FSUB, S64}, Lower);
+  // TODO: Implement
+  getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
 
-  setAction({G_FCMP, S1}, Legal);
-  setAction({G_FCMP, 1, S32}, Legal);
-  setAction({G_FCMP, 1, S64}, Legal);
+  if (ST.hasVOP3PInsts())
+    FPOpActions.clampMaxNumElements(0, S16, 2);
+  FPOpActions
+    .scalarize(0)
+    .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
 
-  setAction({G_ZEXT, S64}, Legal);
-  setAction({G_ZEXT, 1, S32}, Legal);
+  if (ST.has16BitInsts()) {
+    getActionDefinitionsBuilder(G_FSQRT)
+      .legalFor({S32, S64, S16})
+      .scalarize(0)
+      .clampScalar(0, S16, S64);
+  } else {
+    getActionDefinitionsBuilder(G_FSQRT)
+      .legalFor({S32, S64})
+      .scalarize(0)
+      .clampScalar(0, S32, S64);
+  }
 
-  setAction({G_SEXT, S64}, Legal);
-  setAction({G_SEXT, 1, S32}, Legal);
+  getActionDefinitionsBuilder(G_FPTRUNC)
+    .legalFor({{S32, S64}, {S16, S32}})
+    .scalarize(0);
 
-  setAction({G_ANYEXT, S64}, Legal);
-  setAction({G_ANYEXT, 1, S32}, Legal);
+  getActionDefinitionsBuilder(G_FPEXT)
+    .legalFor({{S64, S32}, {S32, S16}})
+    .lowerFor({{S64, S16}}) // FIXME: Implement
+    .scalarize(0);
 
-  setAction({G_FPTOSI, S32}, Legal);
-  setAction({G_FPTOSI, 1, S32}, Legal);
+  // TODO: Verify V_BFI_B32 is generated from expanded bit ops.
+  getActionDefinitionsBuilder(G_FCOPYSIGN).lower();
 
-  setAction({G_SITOFP, S32}, Legal);
-  setAction({G_SITOFP, 1, S32}, Legal);
+  getActionDefinitionsBuilder(G_FSUB)
+      // Use actual fsub instruction
+      .legalFor({S32})
+      // Must use fadd + fneg
+      .lowerFor({S64, S16, V2S16})
+      .scalarize(0)
+      .clampScalar(0, S32, S64);
 
-  setAction({G_UITOFP, S32}, Legal);
-  setAction({G_UITOFP, 1, S32}, Legal);
+  getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
+    .legalFor({{S64, S32}, {S32, S16}, {S64, S16},
+               {S32, S1}, {S64, S1}, {S16, S1},
+               // FIXME: Hack
+               {S64, LLT::scalar(33)},
+               {S32, S8}, {S128, S32}, {S128, S64}, {S32, LLT::scalar(24)}})
+    .scalarize(0);
 
-  setAction({G_FPTOUI, S32}, Legal);
-  setAction({G_FPTOUI, 1, S32}, Legal);
+  getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
+    .legalFor({{S32, S32}, {S64, S32}})
+    .lowerFor({{S32, S64}})
+    .customFor({{S64, S64}})
+    .scalarize(0);
 
-  setAction({G_FPOW, S32}, Legal);
-  setAction({G_FEXP2, S32}, Legal);
-  setAction({G_FLOG2, S32}, Legal);
+  getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
+    .legalFor({{S32, S32}, {S32, S64}})
+    .scalarize(0);
 
-  getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND})
-    .legalFor({S32, S64});
+  getActionDefinitionsBuilder(G_INTRINSIC_ROUND)
+    .legalFor({S32, S64})
+    .scalarize(0);
 
-  for (LLT PtrTy : AddrSpaces) {
-    LLT IdxTy = LLT::scalar(PtrTy.getSizeInBits());
-    setAction({G_GEP, PtrTy}, Legal);
-    setAction({G_GEP, 1, IdxTy}, Legal);
+  if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
+    getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
+      .legalFor({S32, S64})
+      .clampScalar(0, S32, S64)
+      .scalarize(0);
+  } else {
+    getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
+      .legalFor({S32})
+      .customFor({S64})
+      .clampScalar(0, S32, S64)
+      .scalarize(0);
   }
 
+  getActionDefinitionsBuilder(G_GEP)
+    .legalForCartesianProduct(AddrSpaces64, {S64})
+    .legalForCartesianProduct(AddrSpaces32, {S32})
+    .scalarize(0);
+
   setAction({G_BLOCK_ADDR, CodePtr}, Legal);
 
-  setAction({G_ICMP, S1}, Legal);
-  setAction({G_ICMP, 1, S32}, Legal);
+  auto &CmpBuilder =
+    getActionDefinitionsBuilder(G_ICMP)
+    .legalForCartesianProduct(
+      {S1}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr})
+    .legalFor({{S1, S32}, {S1, S64}});
+  if (ST.has16BitInsts()) {
+    CmpBuilder.legalFor({{S1, S16}});
+  }
+
+  CmpBuilder
+    .widenScalarToNextPow2(1)
+    .clampScalar(1, S32, S64)
+    .scalarize(0)
+    .legalIf(all(typeIs(0, S1), isPointer(1)));
+
+  getActionDefinitionsBuilder(G_FCMP)
+    .legalForCartesianProduct({S1}, ST.has16BitInsts() ? FPTypes16 : FPTypesBase)
+    .widenScalarToNextPow2(1)
+    .clampScalar(1, S32, S64)
+    .scalarize(0);
+
+  // FIXME: fexp, flog2, flog10 needs to be custom lowered.
+  getActionDefinitionsBuilder({G_FPOW, G_FEXP, G_FEXP2,
+                               G_FLOG, G_FLOG2, G_FLOG10})
+    .legalFor({S32})
+    .scalarize(0);
+
+  // The 64-bit versions produce 32-bit results, but only on the SALU.
+  getActionDefinitionsBuilder({G_CTLZ, G_CTLZ_ZERO_UNDEF,
+                               G_CTTZ, G_CTTZ_ZERO_UNDEF,
+                               G_CTPOP})
+    .legalFor({{S32, S32}, {S32, S64}})
+    .clampScalar(0, S32, S32)
+    .clampScalar(1, S32, S64)
+    .scalarize(0)
+    .widenScalarToNextPow2(0, 32)
+    .widenScalarToNextPow2(1, 32);
+
+  // TODO: Expand for > s32
+  getActionDefinitionsBuilder(G_BSWAP)
+    .legalFor({S32})
+    .clampScalar(0, S32, S32)
+    .scalarize(0);
+
+  if (ST.has16BitInsts()) {
+    if (ST.hasVOP3PInsts()) {
+      getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
+        .legalFor({S32, S16, V2S16})
+        .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
+        .clampMaxNumElements(0, S16, 2)
+        .clampScalar(0, S16, S32)
+        .widenScalarToNextPow2(0)
+        .scalarize(0);
+    } else {
+      getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
+        .legalFor({S32, S16})
+        .widenScalarToNextPow2(0)
+        .clampScalar(0, S16, S32)
+        .scalarize(0);
+    }
+  } else {
+    getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
+      .legalFor({S32})
+      .clampScalar(0, S32, S32)
+      .widenScalarToNextPow2(0)
+      .scalarize(0);
+  }
 
-  setAction({G_CTLZ, S32}, Legal);
-  setAction({G_CTLZ_ZERO_UNDEF, S32}, Legal);
-  setAction({G_CTTZ, S32}, Legal);
-  setAction({G_CTTZ_ZERO_UNDEF, S32}, Legal);
-  setAction({G_BSWAP, S32}, Legal);
-  setAction({G_CTPOP, S32}, Legal);
+  auto smallerThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
+    return [=](const LegalityQuery &Query) {
+      return Query.Types[TypeIdx0].getSizeInBits() <
+             Query.Types[TypeIdx1].getSizeInBits();
+    };
+  };
+
+  auto greaterThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
+    return [=](const LegalityQuery &Query) {
+      return Query.Types[TypeIdx0].getSizeInBits() >
+             Query.Types[TypeIdx1].getSizeInBits();
+    };
+  };
 
   getActionDefinitionsBuilder(G_INTTOPTR)
-    .legalIf([](const LegalityQuery &Query) {
-      return true;
-    });
+    // List the common cases
+    .legalForCartesianProduct(AddrSpaces64, {S64})
+    .legalForCartesianProduct(AddrSpaces32, {S32})
+    .scalarize(0)
+    // Accept any address space as long as the size matches
+    .legalIf(sameSize(0, 1))
+    .widenScalarIf(smallerThan(1, 0),
+      [](const LegalityQuery &Query) {
+        return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
+      })
+    .narrowScalarIf(greaterThan(1, 0),
+      [](const LegalityQuery &Query) {
+        return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
+      });
 
   getActionDefinitionsBuilder(G_PTRTOINT)
-    .legalIf([](const LegalityQuery &Query) {
-      return true;
-    });
+    // List the common cases
+    .legalForCartesianProduct(AddrSpaces64, {S64})
+    .legalForCartesianProduct(AddrSpaces32, {S32})
+    .scalarize(0)
+    // Accept any address space as long as the size matches
+    .legalIf(sameSize(0, 1))
+    .widenScalarIf(smallerThan(0, 1),
+      [](const LegalityQuery &Query) {
+        return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
+      })
+    .narrowScalarIf(
+      greaterThan(0, 1),
+      [](const LegalityQuery &Query) {
+        return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
+      });
+
+  if (ST.hasFlatAddressSpace()) {
+    getActionDefinitionsBuilder(G_ADDRSPACE_CAST)
+      .scalarize(0)
+      .custom();
+  }
 
+  // TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we
+  // handle some operations by just promoting the register during
+  // selection. There are also d16 loads on GFX9+ which preserve the high bits.
   getActionDefinitionsBuilder({G_LOAD, G_STORE})
-    .legalIf([=, &ST](const LegalityQuery &Query) {
+    .narrowScalarIf([](const LegalityQuery &Query) {
+        unsigned Size = Query.Types[0].getSizeInBits();
+        unsigned MemSize = Query.MMODescrs[0].SizeInBits;
+        return (Size > 32 && MemSize < Size);
+      },
+      [](const LegalityQuery &Query) {
+        return std::make_pair(0, LLT::scalar(32));
+      })
+    .fewerElementsIf([=](const LegalityQuery &Query) {
+        unsigned MemSize = Query.MMODescrs[0].SizeInBits;
+        return (MemSize == 96) &&
+               Query.Types[0].isVector() &&
+               !ST.hasDwordx3LoadStores();
+      },
+      [=](const LegalityQuery &Query) {
+        return std::make_pair(0, V2S32);
+      })
+    .legalIf([=](const LegalityQuery &Query) {
         const LLT &Ty0 = Query.Types[0];
 
+        unsigned Size = Ty0.getSizeInBits();
+        unsigned MemSize = Query.MMODescrs[0].SizeInBits;
+        if (Size < 32 || (Size > 32 && MemSize < Size))
+          return false;
+
+        if (Ty0.isVector() && Size != MemSize)
+          return false;
+
         // TODO: Decompose private loads into 4-byte components.
         // TODO: Illegal flat loads on SI
-        switch (Ty0.getSizeInBits()) {
+        switch (MemSize) {
+        case 8:
+        case 16:
+          return Size == 32;
         case 32:
         case 64:
         case 128:
           return true;
 
         case 96:
-          // XXX hasLoadX3
-          return (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS);
+          return ST.hasDwordx3LoadStores();
 
         case 256:
         case 512:
-          // TODO: constant loads
+          // TODO: Possibly support loads of i256 and i512 .  This will require
+          // adding i256 and i512 types to MVT in order for to be able to use
+          // TableGen.
+          // TODO: Add support for other vector types, this will require
+          //       defining more value mappings for the new types.
+          return Ty0.isVector() && (Ty0.getScalarType().getSizeInBits() == 32 ||
+                                    Ty0.getScalarType().getSizeInBits() == 64);
+
         default:
           return false;
         }
-      });
+      })
+    .clampScalar(0, S32, S64);
 
 
+  // FIXME: Handle alignment requirements.
+  auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
+    .legalForTypesWithMemDesc({
+        {S32, GlobalPtr, 8, 8},
+        {S32, GlobalPtr, 16, 8},
+        {S32, LocalPtr, 8, 8},
+        {S32, LocalPtr, 16, 8},
+        {S32, PrivatePtr, 8, 8},
+        {S32, PrivatePtr, 16, 8}});
+  if (ST.hasFlatAddressSpace()) {
+    ExtLoads.legalForTypesWithMemDesc({{S32, FlatPtr, 8, 8},
+                                       {S32, FlatPtr, 16, 8}});
+  }
+
+  ExtLoads.clampScalar(0, S32, S32)
+          .widenScalarToNextPow2(0)
+          .unsupportedIfMemSizeNotPow2()
+          .lower();
+
   auto &Atomics = getActionDefinitionsBuilder(
     {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB,
      G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
@@ -240,84 +590,805 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
     Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
   }
 
-  setAction({G_SELECT, S32}, Legal);
-  setAction({G_SELECT, 1, S1}, Legal);
+  // TODO: Pointer types, any 32-bit or 64-bit vector
+  getActionDefinitionsBuilder(G_SELECT)
+    .legalForCartesianProduct({S32, S64, S16, V2S32, V2S16, V4S16,
+          GlobalPtr, LocalPtr, FlatPtr, PrivatePtr,
+          LLT::vector(2, LocalPtr), LLT::vector(2, PrivatePtr)}, {S1})
+    .clampScalar(0, S16, S64)
+    .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
+    .fewerElementsIf(numElementsNotEven(0), scalarize(0))
+    .scalarize(1)
+    .clampMaxNumElements(0, S32, 2)
+    .clampMaxNumElements(0, LocalPtr, 2)
+    .clampMaxNumElements(0, PrivatePtr, 2)
+    .scalarize(0)
+    .widenScalarToNextPow2(0)
+    .legalIf(all(isPointer(0), typeIs(1, S1)));
 
-  setAction({G_SHL, S32}, Legal);
+  // TODO: Only the low 4/5/6 bits of the shift amount are observed, so we can
+  // be more flexible with the shift amount type.
+  auto &Shifts = getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR})
+    .legalFor({{S32, S32}, {S64, S32}});
+  if (ST.has16BitInsts()) {
+    if (ST.hasVOP3PInsts()) {
+      Shifts.legalFor({{S16, S32}, {S16, S16}, {V2S16, V2S16}})
+            .clampMaxNumElements(0, S16, 2);
+    } else
+      Shifts.legalFor({{S16, S32}, {S16, S16}});
 
-
-  // FIXME: When RegBankSelect inserts copies, it will only create new
-  // registers with scalar types.  This means we can end up with
-  // G_LOAD/G_STORE/G_GEP instruction with scalar types for their pointer
-  // operands.  In assert builds, the instruction selector will assert
-  // if it sees a generic instruction which isn't legal, so we need to
-  // tell it that scalar types are legal for pointer operands
-  setAction({G_GEP, S64}, Legal);
+    Shifts.clampScalar(1, S16, S32);
+    Shifts.clampScalar(0, S16, S64);
+    Shifts.widenScalarToNextPow2(0, 16);
+  } else {
+    // Make sure we legalize the shift amount type first, as the general
+    // expansion for the shifted type will produce much worse code if it hasn't
+    // been truncated already.
+    Shifts.clampScalar(1, S32, S32);
+    Shifts.clampScalar(0, S32, S64);
+    Shifts.widenScalarToNextPow2(0, 32);
+  }
+  Shifts.scalarize(0);
 
   for (unsigned Op : {G_EXTRACT_VECTOR_ELT, G_INSERT_VECTOR_ELT}) {
+    unsigned VecTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 1 : 0;
+    unsigned EltTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 0 : 1;
+    unsigned IdxTypeIdx = 2;
+
     getActionDefinitionsBuilder(Op)
-      .legalIf([=](const LegalityQuery &Query) {
-          const LLT &VecTy = Query.Types[1];
-          const LLT &IdxTy = Query.Types[2];
-          return VecTy.getSizeInBits() % 32 == 0 &&
-            VecTy.getSizeInBits() <= 512 &&
-            IdxTy.getSizeInBits() == 32;
-        });
+      .customIf([=](const LegalityQuery &Query) {
+          const LLT EltTy = Query.Types[EltTypeIdx];
+          const LLT VecTy = Query.Types[VecTypeIdx];
+          const LLT IdxTy = Query.Types[IdxTypeIdx];
+          return (EltTy.getSizeInBits() == 16 ||
+                  EltTy.getSizeInBits() % 32 == 0) &&
+                 VecTy.getSizeInBits() % 32 == 0 &&
+                 VecTy.getSizeInBits() <= 512 &&
+                 IdxTy.getSizeInBits() == 32;
+        })
+      .clampScalar(EltTypeIdx, S32, S64)
+      .clampScalar(VecTypeIdx, S32, S64)
+      .clampScalar(IdxTypeIdx, S32, S32);
   }
 
-  // FIXME: Doesn't handle extract of illegal sizes.
-  getActionDefinitionsBuilder({G_EXTRACT, G_INSERT})
-    .legalIf([=](const LegalityQuery &Query) {
-        const LLT &Ty0 = Query.Types[0];
-        const LLT &Ty1 = Query.Types[1];
-        return (Ty0.getSizeInBits() % 32 == 0) &&
-               (Ty1.getSizeInBits() % 32 == 0);
+  getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
+    .unsupportedIf([=](const LegalityQuery &Query) {
+        const LLT &EltTy = Query.Types[1].getElementType();
+        return Query.Types[0] != EltTy;
       });
 
+  for (unsigned Op : {G_EXTRACT, G_INSERT}) {
+    unsigned BigTyIdx = Op == G_EXTRACT ? 1 : 0;
+    unsigned LitTyIdx = Op == G_EXTRACT ? 0 : 1;
+
+    // FIXME: Doesn't handle extract of illegal sizes.
+    getActionDefinitionsBuilder(Op)
+      .legalIf([=](const LegalityQuery &Query) {
+          const LLT BigTy = Query.Types[BigTyIdx];
+          const LLT LitTy = Query.Types[LitTyIdx];
+          return (BigTy.getSizeInBits() % 32 == 0) &&
+                 (LitTy.getSizeInBits() % 16 == 0);
+        })
+      .widenScalarIf(
+        [=](const LegalityQuery &Query) {
+          const LLT BigTy = Query.Types[BigTyIdx];
+          return (BigTy.getScalarSizeInBits() < 16);
+        },
+        LegalizeMutations::widenScalarOrEltToNextPow2(BigTyIdx, 16))
+      .widenScalarIf(
+        [=](const LegalityQuery &Query) {
+          const LLT LitTy = Query.Types[LitTyIdx];
+          return (LitTy.getScalarSizeInBits() < 16);
+        },
+        LegalizeMutations::widenScalarOrEltToNextPow2(LitTyIdx, 16))
+      .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx))
+      .widenScalarToNextPow2(BigTyIdx, 32);
+
+  }
+
   getActionDefinitionsBuilder(G_BUILD_VECTOR)
-    .legalForCartesianProduct(AllS32Vectors, {S32})
-    .legalForCartesianProduct(AllS64Vectors, {S64})
-    .clampNumElements(0, V16S32, V16S32)
-    .clampNumElements(0, V2S64, V8S64)
-    .minScalarSameAs(1, 0);
+      .legalForCartesianProduct(AllS32Vectors, {S32})
+      .legalForCartesianProduct(AllS64Vectors, {S64})
+      .clampNumElements(0, V16S32, V16S32)
+      .clampNumElements(0, V2S64, V8S64)
+      .minScalarSameAs(1, 0)
+      .legalIf(isRegisterType(0))
+      .minScalarOrElt(0, S32);
 
-  // TODO: Support any combination of v2s32
   getActionDefinitionsBuilder(G_CONCAT_VECTORS)
-    .legalFor({{V4S32, V2S32},
-               {V8S32, V2S32},
-               {V8S32, V4S32},
-               {V4S64, V2S64},
-               {V4S16, V2S16},
-               {V8S16, V2S16},
-               {V8S16, V4S16}});
+    .legalIf(isRegisterType(0));
 
   // Merge/Unmerge
   for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
     unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
     unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
 
+    auto notValidElt = [=](const LegalityQuery &Query, unsigned TypeIdx) {
+      const LLT &Ty = Query.Types[TypeIdx];
+      if (Ty.isVector()) {
+        const LLT &EltTy = Ty.getElementType();
+        if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
+          return true;
+        if (!isPowerOf2_32(EltTy.getSizeInBits()))
+          return true;
+      }
+      return false;
+    };
+
     getActionDefinitionsBuilder(Op)
+      .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
+      // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
+      // worth considering the multiples of 64 since 2*192 and 2*384 are not
+      // valid.
+      .clampScalar(LitTyIdx, S16, S256)
+      .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
+
+      // Break up vectors with weird elements into scalars
+      .fewerElementsIf(
+        [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
+        scalarize(0))
+      .fewerElementsIf(
+        [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
+        scalarize(1))
+      .clampScalar(BigTyIdx, S32, S512)
+      .widenScalarIf(
+        [=](const LegalityQuery &Query) {
+          const LLT &Ty = Query.Types[BigTyIdx];
+          return !isPowerOf2_32(Ty.getSizeInBits()) &&
+                 Ty.getSizeInBits() % 16 != 0;
+        },
+        [=](const LegalityQuery &Query) {
+          // Pick the next power of 2, or a multiple of 64 over 128.
+          // Whichever is smaller.
+          const LLT &Ty = Query.Types[BigTyIdx];
+          unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1);
+          if (NewSizeInBits >= 256) {
+            unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
+            if (RoundedTo < NewSizeInBits)
+              NewSizeInBits = RoundedTo;
+          }
+          return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
+        })
       .legalIf([=](const LegalityQuery &Query) {
           const LLT &BigTy = Query.Types[BigTyIdx];
           const LLT &LitTy = Query.Types[LitTyIdx];
-          return BigTy.getSizeInBits() % 32 == 0 &&
-                 LitTy.getSizeInBits() % 32 == 0 &&
+
+          if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
+            return false;
+          if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
+            return false;
+
+          return BigTy.getSizeInBits() % 16 == 0 &&
+                 LitTy.getSizeInBits() % 16 == 0 &&
                  BigTy.getSizeInBits() <= 512;
         })
       // Any vectors left are the wrong size. Scalarize them.
-      .fewerElementsIf([](const LegalityQuery &Query) { return true; },
-                       [](const LegalityQuery &Query) {
-                         return std::make_pair(
-                           0, Query.Types[0].getElementType());
-                       })
-      .fewerElementsIf([](const LegalityQuery &Query) { return true; },
-                       [](const LegalityQuery &Query) {
-                         return std::make_pair(
-                           1, Query.Types[1].getElementType());
-                       });
-
+      .scalarize(0)
+      .scalarize(1);
   }
 
   computeTables();
   verify(*ST.getInstrInfo());
 }
+
+bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
+                                         MachineRegisterInfo &MRI,
+                                         MachineIRBuilder &MIRBuilder,
+                                         GISelChangeObserver &Observer) const {
+  switch (MI.getOpcode()) {
+  case TargetOpcode::G_ADDRSPACE_CAST:
+    return legalizeAddrSpaceCast(MI, MRI, MIRBuilder);
+  case TargetOpcode::G_FRINT:
+    return legalizeFrint(MI, MRI, MIRBuilder);
+  case TargetOpcode::G_FCEIL:
+    return legalizeFceil(MI, MRI, MIRBuilder);
+  case TargetOpcode::G_INTRINSIC_TRUNC:
+    return legalizeIntrinsicTrunc(MI, MRI, MIRBuilder);
+  case TargetOpcode::G_SITOFP:
+    return legalizeITOFP(MI, MRI, MIRBuilder, true);
+  case TargetOpcode::G_UITOFP:
+    return legalizeITOFP(MI, MRI, MIRBuilder, false);
+  case TargetOpcode::G_FMINNUM:
+  case TargetOpcode::G_FMAXNUM:
+  case TargetOpcode::G_FMINNUM_IEEE:
+  case TargetOpcode::G_FMAXNUM_IEEE:
+    return legalizeMinNumMaxNum(MI, MRI, MIRBuilder);
+  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+    return legalizeExtractVectorElt(MI, MRI, MIRBuilder);
+  case TargetOpcode::G_INSERT_VECTOR_ELT:
+    return legalizeInsertVectorElt(MI, MRI, MIRBuilder);
+  default:
+    return false;
+  }
+
+  llvm_unreachable("expected switch to return");
+}
+
+Register AMDGPULegalizerInfo::getSegmentAperture(
+  unsigned AS,
+  MachineRegisterInfo &MRI,
+  MachineIRBuilder &MIRBuilder) const {
+  MachineFunction &MF = MIRBuilder.getMF();
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const LLT S32 = LLT::scalar(32);
+
+  if (ST.hasApertureRegs()) {
+    // FIXME: Use inline constants (src_{shared, private}_base) instead of
+    // getreg.
+    unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ?
+        AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE :
+        AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE;
+    unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ?
+        AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE :
+        AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE;
+    unsigned Encoding =
+        AMDGPU::Hwreg::ID_MEM_BASES << AMDGPU::Hwreg::ID_SHIFT_ |
+        Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ |
+        WidthM1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_;
+
+    Register ApertureReg = MRI.createGenericVirtualRegister(S32);
+    Register GetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+
+    MIRBuilder.buildInstr(AMDGPU::S_GETREG_B32)
+      .addDef(GetReg)
+      .addImm(Encoding);
+    MRI.setType(GetReg, S32);
+
+    auto ShiftAmt = MIRBuilder.buildConstant(S32, WidthM1 + 1);
+    MIRBuilder.buildInstr(TargetOpcode::G_SHL)
+      .addDef(ApertureReg)
+      .addUse(GetReg)
+      .addUse(ShiftAmt.getReg(0));
+
+    return ApertureReg;
+  }
+
+  Register QueuePtr = MRI.createGenericVirtualRegister(
+    LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
+
+  // FIXME: Placeholder until we can track the input registers.
+  MIRBuilder.buildConstant(QueuePtr, 0xdeadbeef);
+
+  // Offset into amd_queue_t for group_segment_aperture_base_hi /
+  // private_segment_aperture_base_hi.
+  uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
+
+  // FIXME: Don't use undef
+  Value *V = UndefValue::get(PointerType::get(
+                               Type::getInt8Ty(MF.getFunction().getContext()),
+                               AMDGPUAS::CONSTANT_ADDRESS));
+
+  MachinePointerInfo PtrInfo(V, StructOffset);
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+    PtrInfo,
+    MachineMemOperand::MOLoad |
+    MachineMemOperand::MODereferenceable |
+    MachineMemOperand::MOInvariant,
+    4,
+    MinAlign(64, StructOffset));
+
+  Register LoadResult = MRI.createGenericVirtualRegister(S32);
+  Register LoadAddr;
+
+  MIRBuilder.materializeGEP(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset);
+  MIRBuilder.buildLoad(LoadResult, LoadAddr, *MMO);
+  return LoadResult;
+}
+
+bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
+  MachineInstr &MI, MachineRegisterInfo &MRI,
+  MachineIRBuilder &MIRBuilder) const {
+  MachineFunction &MF = MIRBuilder.getMF();
+
+  MIRBuilder.setInstr(MI);
+
+  Register Dst = MI.getOperand(0).getReg();
+  Register Src = MI.getOperand(1).getReg();
+
+  LLT DstTy = MRI.getType(Dst);
+  LLT SrcTy = MRI.getType(Src);
+  unsigned DestAS = DstTy.getAddressSpace();
+  unsigned SrcAS = SrcTy.getAddressSpace();
+
+  // TODO: Avoid reloading from the queue ptr for each cast, or at least each
+  // vector element.
+  assert(!DstTy.isVector());
+
+  const AMDGPUTargetMachine &TM
+    = static_cast<const AMDGPUTargetMachine &>(MF.getTarget());
+
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) {
+    MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BITCAST));
+    return true;
+  }
+
+  if (SrcAS == AMDGPUAS::FLAT_ADDRESS) {
+    assert(DestAS == AMDGPUAS::LOCAL_ADDRESS ||
+           DestAS == AMDGPUAS::PRIVATE_ADDRESS);
+    unsigned NullVal = TM.getNullPointerValue(DestAS);
+
+    auto SegmentNull = MIRBuilder.buildConstant(DstTy, NullVal);
+    auto FlatNull = MIRBuilder.buildConstant(SrcTy, 0);
+
+    Register PtrLo32 = MRI.createGenericVirtualRegister(DstTy);
+
+    // Extract low 32-bits of the pointer.
+    MIRBuilder.buildExtract(PtrLo32, Src, 0);
+
+    Register CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
+    MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNull.getReg(0));
+    MIRBuilder.buildSelect(Dst, CmpRes, PtrLo32, SegmentNull.getReg(0));
+
+    MI.eraseFromParent();
+    return true;
+  }
+
+  assert(SrcAS == AMDGPUAS::LOCAL_ADDRESS ||
+         SrcAS == AMDGPUAS::PRIVATE_ADDRESS);
+
+  auto SegmentNull =
+      MIRBuilder.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS));
+  auto FlatNull =
+      MIRBuilder.buildConstant(DstTy, TM.getNullPointerValue(DestAS));
+
+  Register ApertureReg = getSegmentAperture(DestAS, MRI, MIRBuilder);
+
+  Register CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
+  MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNull.getReg(0));
+
+  Register BuildPtr = MRI.createGenericVirtualRegister(DstTy);
+
+  // Coerce the type of the low half of the result so we can use merge_values.
+  Register SrcAsInt = MRI.createGenericVirtualRegister(LLT::scalar(32));
+  MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT)
+    .addDef(SrcAsInt)
+    .addUse(Src);
+
+  // TODO: Should we allow mismatched types but matching sizes in merges to
+  // avoid the ptrtoint?
+  MIRBuilder.buildMerge(BuildPtr, {SrcAsInt, ApertureReg});
+  MIRBuilder.buildSelect(Dst, CmpRes, BuildPtr, FlatNull.getReg(0));
+
+  MI.eraseFromParent();
+  return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeFrint(
+  MachineInstr &MI, MachineRegisterInfo &MRI,
+  MachineIRBuilder &MIRBuilder) const {
+  MIRBuilder.setInstr(MI);
+
+  Register Src = MI.getOperand(1).getReg();
+  LLT Ty = MRI.getType(Src);
+  assert(Ty.isScalar() && Ty.getSizeInBits() == 64);
+
+  APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52");
+  APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51");
+
+  auto C1 = MIRBuilder.buildFConstant(Ty, C1Val);
+  auto CopySign = MIRBuilder.buildFCopysign(Ty, C1, Src);
+
+  // TODO: Should this propagate fast-math-flags?
+  auto Tmp1 = MIRBuilder.buildFAdd(Ty, Src, CopySign);
+  auto Tmp2 = MIRBuilder.buildFSub(Ty, Tmp1, CopySign);
+
+  auto C2 = MIRBuilder.buildFConstant(Ty, C2Val);
+  auto Fabs = MIRBuilder.buildFAbs(Ty, Src);
+
+  auto Cond = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, LLT::scalar(1), Fabs, C2);
+  MIRBuilder.buildSelect(MI.getOperand(0).getReg(), Cond, Src, Tmp2);
+  return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeFceil(
+  MachineInstr &MI, MachineRegisterInfo &MRI,
+  MachineIRBuilder &B) const {
+  B.setInstr(MI);
+
+  const LLT S1 = LLT::scalar(1);
+  const LLT S64 = LLT::scalar(64);
+
+  Register Src = MI.getOperand(1).getReg();
+  assert(MRI.getType(Src) == S64);
+
+  // result = trunc(src)
+  // if (src > 0.0 && src != result)
+  //   result += 1.0
+
+  auto Trunc = B.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC, {S64}, {Src});
+
+  const auto Zero = B.buildFConstant(S64, 0.0);
+  const auto One = B.buildFConstant(S64, 1.0);
+  auto Lt0 = B.buildFCmp(CmpInst::FCMP_OGT, S1, Src, Zero);
+  auto NeTrunc = B.buildFCmp(CmpInst::FCMP_ONE, S1, Src, Trunc);
+  auto And = B.buildAnd(S1, Lt0, NeTrunc);
+  auto Add = B.buildSelect(S64, And, One, Zero);
+
+  // TODO: Should this propagate fast-math-flags?
+  B.buildFAdd(MI.getOperand(0).getReg(), Trunc, Add);
+  return true;
+}
+
+static MachineInstrBuilder extractF64Exponent(unsigned Hi,
+                                              MachineIRBuilder &B) {
+  const unsigned FractBits = 52;
+  const unsigned ExpBits = 11;
+  LLT S32 = LLT::scalar(32);
+
+  auto Const0 = B.buildConstant(S32, FractBits - 32);
+  auto Const1 = B.buildConstant(S32, ExpBits);
+
+  auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32}, false)
+    .addUse(Const0.getReg(0))
+    .addUse(Const1.getReg(0));
+
+  return B.buildSub(S32, ExpPart, B.buildConstant(S32, 1023));
+}
+
+bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc(
+  MachineInstr &MI, MachineRegisterInfo &MRI,
+  MachineIRBuilder &B) const {
+  B.setInstr(MI);
+
+  const LLT S1 = LLT::scalar(1);
+  const LLT S32 = LLT::scalar(32);
+  const LLT S64 = LLT::scalar(64);
+
+  Register Src = MI.getOperand(1).getReg();
+  assert(MRI.getType(Src) == S64);
+
+  // TODO: Should this use extract since the low half is unused?
+  auto Unmerge = B.buildUnmerge({S32, S32}, Src);
+  Register Hi = Unmerge.getReg(1);
+
+  // Extract the upper half, since this is where we will find the sign and
+  // exponent.
+  auto Exp = extractF64Exponent(Hi, B);
+
+  const unsigned FractBits = 52;
+
+  // Extract the sign bit.
+  const auto SignBitMask = B.buildConstant(S32, UINT32_C(1) << 31);
+  auto SignBit = B.buildAnd(S32, Hi, SignBitMask);
+
+  const auto FractMask = B.buildConstant(S64, (UINT64_C(1) << FractBits) - 1);
+
+  const auto Zero32 = B.buildConstant(S32, 0);
+
+  // Extend back to 64-bits.
+  auto SignBit64 = B.buildMerge(S64, {Zero32.getReg(0), SignBit.getReg(0)});
+
+  auto Shr = B.buildAShr(S64, FractMask, Exp);
+  auto Not = B.buildNot(S64, Shr);
+  auto Tmp0 = B.buildAnd(S64, Src, Not);
+  auto FiftyOne = B.buildConstant(S32, FractBits - 1);
+
+  auto ExpLt0 = B.buildICmp(CmpInst::ICMP_SLT, S1, Exp, Zero32);
+  auto ExpGt51 = B.buildICmp(CmpInst::ICMP_SGT, S1, Exp, FiftyOne);
+
+  auto Tmp1 = B.buildSelect(S64, ExpLt0, SignBit64, Tmp0);
+  B.buildSelect(MI.getOperand(0).getReg(), ExpGt51, Src, Tmp1);
+  return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeITOFP(
+  MachineInstr &MI, MachineRegisterInfo &MRI,
+  MachineIRBuilder &B, bool Signed) const {
+  B.setInstr(MI);
+
+  Register Dst = MI.getOperand(0).getReg();
+  Register Src = MI.getOperand(1).getReg();
+
+  const LLT S64 = LLT::scalar(64);
+  const LLT S32 = LLT::scalar(32);
+
+  assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
+
+  auto Unmerge = B.buildUnmerge({S32, S32}, Src);
+
+  auto CvtHi = Signed ?
+    B.buildSITOFP(S64, Unmerge.getReg(1)) :
+    B.buildUITOFP(S64, Unmerge.getReg(1));
+
+  auto CvtLo = B.buildUITOFP(S64, Unmerge.getReg(0));
+
+  auto ThirtyTwo = B.buildConstant(S32, 32);
+  auto LdExp = B.buildIntrinsic(Intrinsic::amdgcn_ldexp, {S64}, false)
+    .addUse(CvtHi.getReg(0))
+    .addUse(ThirtyTwo.getReg(0));
+
+  // TODO: Should this propagate fast-math-flags?
+  B.buildFAdd(Dst, LdExp, CvtLo);
+  MI.eraseFromParent();
+  return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(
+  MachineInstr &MI, MachineRegisterInfo &MRI,
+  MachineIRBuilder &B) const {
+  MachineFunction &MF = B.getMF();
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+  const bool IsIEEEOp = MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE ||
+                        MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE;
+
+  // With ieee_mode disabled, the instructions have the correct behavior
+  // already for G_FMINNUM/G_FMAXNUM
+  if (!MFI->getMode().IEEE)
+    return !IsIEEEOp;
+
+  if (IsIEEEOp)
+    return true;
+
+  MachineIRBuilder HelperBuilder(MI);
+  GISelObserverWrapper DummyObserver;
+  LegalizerHelper Helper(MF, DummyObserver, HelperBuilder);
+  HelperBuilder.setMBB(*MI.getParent());
+  return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized;
+}
+
+bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
+  MachineInstr &MI, MachineRegisterInfo &MRI,
+  MachineIRBuilder &B) const {
+  // TODO: Should move some of this into LegalizerHelper.
+
+  // TODO: Promote dynamic indexing of s16 to s32
+  // TODO: Dynamic s64 indexing is only legal for SGPR.
+  Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(2).getReg(), MRI);
+  if (!IdxVal) // Dynamic case will be selected to register indexing.
+    return true;
+
+  Register Dst = MI.getOperand(0).getReg();
+  Register Vec = MI.getOperand(1).getReg();
+
+  LLT VecTy = MRI.getType(Vec);
+  LLT EltTy = VecTy.getElementType();
+  assert(EltTy == MRI.getType(Dst));
+
+  B.setInstr(MI);
+
+  if (IdxVal.getValue() < VecTy.getNumElements())
+    B.buildExtract(Dst, Vec, IdxVal.getValue() * EltTy.getSizeInBits());
+  else
+    B.buildUndef(Dst);
+
+  MI.eraseFromParent();
+  return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
+  MachineInstr &MI, MachineRegisterInfo &MRI,
+  MachineIRBuilder &B) const {
+  // TODO: Should move some of this into LegalizerHelper.
+
+  // TODO: Promote dynamic indexing of s16 to s32
+  // TODO: Dynamic s64 indexing is only legal for SGPR.
+  Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(3).getReg(), MRI);
+  if (!IdxVal) // Dynamic case will be selected to register indexing.
+    return true;
+
+  Register Dst = MI.getOperand(0).getReg();
+  Register Vec = MI.getOperand(1).getReg();
+  Register Ins = MI.getOperand(2).getReg();
+
+  LLT VecTy = MRI.getType(Vec);
+  LLT EltTy = VecTy.getElementType();
+  assert(EltTy == MRI.getType(Ins));
+
+  B.setInstr(MI);
+
+  if (IdxVal.getValue() < VecTy.getNumElements())
+    B.buildInsert(Dst, Vec, Ins, IdxVal.getValue() * EltTy.getSizeInBits());
+  else
+    B.buildUndef(Dst);
+
+  MI.eraseFromParent();
+  return true;
+}
+
+// Return the use branch instruction, otherwise null if the usage is invalid.
+static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
+                                       MachineRegisterInfo &MRI) {
+  Register CondDef = MI.getOperand(0).getReg();
+  if (!MRI.hasOneNonDBGUse(CondDef))
+    return nullptr;
+
+  MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef);
+  return UseMI.getParent() == MI.getParent() &&
+    UseMI.getOpcode() == AMDGPU::G_BRCOND ? &UseMI : nullptr;
+}
+
+Register AMDGPULegalizerInfo::getLiveInRegister(MachineRegisterInfo &MRI,
+                                                Register Reg, LLT Ty) const {
+  Register LiveIn = MRI.getLiveInVirtReg(Reg);
+  if (LiveIn)
+    return LiveIn;
+
+  Register NewReg = MRI.createGenericVirtualRegister(Ty);
+  MRI.addLiveIn(Reg, NewReg);
+  return NewReg;
+}
+
+bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
+                                         const ArgDescriptor *Arg) const {
+  if (!Arg->isRegister())
+    return false; // TODO: Handle these
+
+  assert(Arg->getRegister() != 0);
+  assert(Arg->getRegister().isPhysical());
+
+  MachineRegisterInfo &MRI = *B.getMRI();
+
+  LLT Ty = MRI.getType(DstReg);
+  Register LiveIn = getLiveInRegister(MRI, Arg->getRegister(), Ty);
+
+  if (Arg->isMasked()) {
+    // TODO: Should we try to emit this once in the entry block?
+    const LLT S32 = LLT::scalar(32);
+    const unsigned Mask = Arg->getMask();
+    const unsigned Shift = countTrailingZeros<unsigned>(Mask);
+
+    auto ShiftAmt = B.buildConstant(S32, Shift);
+    auto LShr = B.buildLShr(S32, LiveIn, ShiftAmt);
+    B.buildAnd(DstReg, LShr, B.buildConstant(S32, Mask >> Shift));
+  } else
+    B.buildCopy(DstReg, LiveIn);
+
+  // Insert the argument copy if it doens't already exist.
+  // FIXME: It seems EmitLiveInCopies isn't called anywhere?
+  if (!MRI.getVRegDef(LiveIn)) {
+    MachineBasicBlock &EntryMBB = B.getMF().front();
+    EntryMBB.addLiveIn(Arg->getRegister());
+    B.setInsertPt(EntryMBB, EntryMBB.begin());
+    B.buildCopy(LiveIn, Arg->getRegister());
+  }
+
+  return true;
+}
+
+bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin(
+  MachineInstr &MI,
+  MachineRegisterInfo &MRI,
+  MachineIRBuilder &B,
+  AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
+  B.setInstr(MI);
+
+  const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
+
+  const ArgDescriptor *Arg;
+  const TargetRegisterClass *RC;
+  std::tie(Arg, RC) = MFI->getPreloadedValue(ArgType);
+  if (!Arg) {
+    LLVM_DEBUG(dbgs() << "Required arg register missing\n");
+    return false;
+  }
+
+  if (loadInputValue(MI.getOperand(0).getReg(), B, Arg)) {
+    MI.eraseFromParent();
+    return true;
+  }
+
+  return false;
+}
+
+bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
+                                                 MachineRegisterInfo &MRI,
+                                                 MachineIRBuilder &B) const {
+  const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
+  if (!MFI->isEntryFunction()) {
+    return legalizePreloadedArgIntrin(MI, MRI, B,
+                                      AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
+  }
+
+  B.setInstr(MI);
+
+  uint64_t Offset =
+    ST.getTargetLowering()->getImplicitParameterOffset(
+      B.getMF(), AMDGPUTargetLowering::FIRST_IMPLICIT);
+  Register DstReg = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  LLT IdxTy = LLT::scalar(DstTy.getSizeInBits());
+
+  const ArgDescriptor *Arg;
+  const TargetRegisterClass *RC;
+  std::tie(Arg, RC)
+    = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
+  if (!Arg)
+    return false;
+
+  Register KernargPtrReg = MRI.createGenericVirtualRegister(DstTy);
+  if (!loadInputValue(KernargPtrReg, B, Arg))
+    return false;
+
+  B.buildGEP(DstReg, KernargPtrReg, B.buildConstant(IdxTy, Offset).getReg(0));
+  MI.eraseFromParent();
+  return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
+                                            MachineRegisterInfo &MRI,
+                                            MachineIRBuilder &B) const {
+  // Replace the use G_BRCOND with the exec manipulate and branch pseudos.
+  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
+  case Intrinsic::amdgcn_if: {
+    if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
+      const SIRegisterInfo *TRI
+        = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
+
+      B.setInstr(*BrCond);
+      Register Def = MI.getOperand(1).getReg();
+      Register Use = MI.getOperand(3).getReg();
+      B.buildInstr(AMDGPU::SI_IF)
+        .addDef(Def)
+        .addUse(Use)
+        .addMBB(BrCond->getOperand(1).getMBB());
+
+      MRI.setRegClass(Def, TRI->getWaveMaskRegClass());
+      MRI.setRegClass(Use, TRI->getWaveMaskRegClass());
+      MI.eraseFromParent();
+      BrCond->eraseFromParent();
+      return true;
+    }
+
+    return false;
+  }
+  case Intrinsic::amdgcn_loop: {
+    if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
+      const SIRegisterInfo *TRI
+        = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
+
+      B.setInstr(*BrCond);
+      Register Reg = MI.getOperand(2).getReg();
+      B.buildInstr(AMDGPU::SI_LOOP)
+        .addUse(Reg)
+        .addMBB(BrCond->getOperand(1).getMBB());
+      MI.eraseFromParent();
+      BrCond->eraseFromParent();
+      MRI.setRegClass(Reg, TRI->getWaveMaskRegClass());
+      return true;
+    }
+
+    return false;
+  }
+  case Intrinsic::amdgcn_kernarg_segment_ptr:
+    return legalizePreloadedArgIntrin(
+      MI, MRI, B, AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
+  case Intrinsic::amdgcn_implicitarg_ptr:
+    return legalizeImplicitArgPtr(MI, MRI, B);
+  case Intrinsic::amdgcn_workitem_id_x:
+    return legalizePreloadedArgIntrin(MI, MRI, B,
+                                      AMDGPUFunctionArgInfo::WORKITEM_ID_X);
+  case Intrinsic::amdgcn_workitem_id_y:
+    return legalizePreloadedArgIntrin(MI, MRI, B,
+                                      AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
+  case Intrinsic::amdgcn_workitem_id_z:
+    return legalizePreloadedArgIntrin(MI, MRI, B,
+                                      AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
+  case Intrinsic::amdgcn_workgroup_id_x:
+    return legalizePreloadedArgIntrin(MI, MRI, B,
+                                      AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
+  case Intrinsic::amdgcn_workgroup_id_y:
+    return legalizePreloadedArgIntrin(MI, MRI, B,
+                                      AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
+  case Intrinsic::amdgcn_workgroup_id_z:
+    return legalizePreloadedArgIntrin(MI, MRI, B,
+                                      AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+  case Intrinsic::amdgcn_dispatch_ptr:
+    return legalizePreloadedArgIntrin(MI, MRI, B,
+                                      AMDGPUFunctionArgInfo::DISPATCH_PTR);
+  case Intrinsic::amdgcn_queue_ptr:
+    return legalizePreloadedArgIntrin(MI, MRI, B,
+                                      AMDGPUFunctionArgInfo::QUEUE_PTR);
+  case Intrinsic::amdgcn_implicit_buffer_ptr:
+    return legalizePreloadedArgIntrin(
+      MI, MRI, B, AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR);
+  case Intrinsic::amdgcn_dispatch_id:
+    return legalizePreloadedArgIntrin(MI, MRI, B,
+                                      AMDGPUFunctionArgInfo::DISPATCH_ID);
+  default:
+    return true;
+  }
+
+  return true;
+}
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 1cbd37c42c4b..3f1cc1d265dd 100644
--- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -1,9 +1,8 @@
 //===- AMDGPULegalizerInfo ---------------------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -16,6 +15,7 @@
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H
 
 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "AMDGPUArgumentUsageInfo.h"
 
 namespace llvm {
 
@@ -25,9 +25,51 @@ class GCNSubtarget;
 
 /// This class provides the information for the target register banks.
 class AMDGPULegalizerInfo : public LegalizerInfo {
+  const GCNSubtarget &ST;
+
 public:
   AMDGPULegalizerInfo(const GCNSubtarget &ST,
                       const GCNTargetMachine &TM);
+
+  bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
+                      MachineIRBuilder &MIRBuilder,
+                      GISelChangeObserver &Observer) const override;
+
+  Register getSegmentAperture(unsigned AddrSpace,
+                              MachineRegisterInfo &MRI,
+                              MachineIRBuilder &MIRBuilder) const;
+
+  bool legalizeAddrSpaceCast(MachineInstr &MI, MachineRegisterInfo &MRI,
+                             MachineIRBuilder &MIRBuilder) const;
+  bool legalizeFrint(MachineInstr &MI, MachineRegisterInfo &MRI,
+                     MachineIRBuilder &MIRBuilder) const;
+  bool legalizeFceil(MachineInstr &MI, MachineRegisterInfo &MRI,
+                     MachineIRBuilder &MIRBuilder) const;
+  bool legalizeIntrinsicTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
+                              MachineIRBuilder &MIRBuilder) const;
+  bool legalizeITOFP(MachineInstr &MI, MachineRegisterInfo &MRI,
+                     MachineIRBuilder &MIRBuilder, bool Signed) const;
+  bool legalizeMinNumMaxNum(MachineInstr &MI, MachineRegisterInfo &MRI,
+                            MachineIRBuilder &MIRBuilder) const;
+  bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
+                                MachineIRBuilder &MIRBuilder) const;
+  bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
+                               MachineIRBuilder &MIRBuilder) const;
+
+  Register getLiveInRegister(MachineRegisterInfo &MRI,
+                             Register Reg, LLT Ty) const;
+
+  bool loadInputValue(Register DstReg, MachineIRBuilder &B,
+                      const ArgDescriptor *Arg) const;
+  bool legalizePreloadedArgIntrin(
+    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
+    AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
+
+  bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI,
+                              MachineIRBuilder &B) const;
+  bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
+                         MachineIRBuilder &MIRBuilder) const override;
+
 };
 } // End llvm namespace.
 #endif
diff --git a/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 14e880042691..ce0a9db7c7f4 100644
--- a/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPULibCalls.cpp -------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,6 +15,7 @@
 
 #include "AMDGPU.h"
 #include "AMDGPULibFunc.h"
+#include "AMDGPUSubtarget.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/ADT/StringSet.h"
@@ -23,6 +23,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
@@ -30,6 +31,7 @@
 #include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include <vector>
 #include <cmath>
@@ -66,6 +68,8 @@ private:
 
   typedef llvm::AMDGPULibFunc FuncInfo;
 
+  const TargetMachine *TM;
+
   // -fuse-native.
   bool AllNative = false;
 
@@ -73,7 +77,7 @@ private:
 
   // Return a pointer (pointer expr) to the function if function defintion with
   // "FuncName" exists. It may create a new function prototype in pre-link mode.
-  Constant *getFunction(Module *M, const FuncInfo& fInfo);
+  FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
 
   // Replace a normal function with its native version.
   bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo);
@@ -135,12 +139,15 @@ private:
   // __read_pipe/__write_pipe
   bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo);
 
+  // llvm.amdgcn.wavefrontsize
+  bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);
+
   // Get insertion point at entry.
   BasicBlock::iterator getEntryIns(CallInst * UI);
   // Insert an Alloc instruction.
   AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
   // Get a scalar native builtin signle argument FP function
-  Constant* getNativeFunction(Module* M, const FuncInfo &FInfo);
+  FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
 
 protected:
   CallInst *CI;
@@ -153,6 +160,8 @@ protected:
   }
 
 public:
+  AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {}
+
   bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);
 
   void initNativeFuncs();
@@ -167,15 +176,16 @@ namespace {
 
   class AMDGPUSimplifyLibCalls : public FunctionPass {
 
-  AMDGPULibCalls Simplifier;
-
   const TargetOptions Options;
 
+  AMDGPULibCalls Simplifier;
+
   public:
     static char ID; // Pass identification
 
-    AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions())
-      : FunctionPass(ID), Options(Opt) {
+    AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions(),
+                           const TargetMachine *TM = nullptr)
+      : FunctionPass(ID), Options(Opt), Simplifier(TM) {
       initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
     }
 
@@ -217,19 +227,19 @@ INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",
                 false, false)
 
 template <typename IRB>
-static CallInst *CreateCallEx(IRB &B, Value *Callee, Value *Arg,
+static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
                               const Twine &Name = "") {
   CallInst *R = B.CreateCall(Callee, Arg, Name);
-  if (Function* F = dyn_cast<Function>(Callee))
+  if (Function *F = dyn_cast<Function>(Callee.getCallee()))
     R->setCallingConv(F->getCallingConv());
   return R;
 }
 
 template <typename IRB>
-static CallInst *CreateCallEx2(IRB &B, Value *Callee, Value *Arg1, Value *Arg2,
-                               const Twine &Name = "") {
+static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
+                               Value *Arg2, const Twine &Name = "") {
   CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
-  if (Function* F = dyn_cast<Function>(Callee))
+  if (Function *F = dyn_cast<Function>(Callee.getCallee()))
     R->setCallingConv(F->getCallingConv());
   return R;
 }
@@ -472,7 +482,7 @@ static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
   return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
 }
 
-Constant *AMDGPULibCalls::getFunction(Module *M, const FuncInfo& fInfo) {
+FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
   // If we are doing PreLinkOpt, the function is external. So it is safe to
   // use getOrInsertFunction() at this stage.
 
@@ -519,11 +529,11 @@ bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
 
     nf.setPrefix(AMDGPULibFunc::NATIVE);
     nf.setId(AMDGPULibFunc::EI_SIN);
-    Constant *sinExpr = getFunction(M, nf);
+    FunctionCallee sinExpr = getFunction(M, nf);
 
     nf.setPrefix(AMDGPULibFunc::NATIVE);
     nf.setId(AMDGPULibFunc::EI_COS);
-    Constant *cosExpr = getFunction(M, nf);
+    FunctionCallee cosExpr = getFunction(M, nf);
     if (sinExpr && cosExpr) {
       Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
       Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
@@ -555,7 +565,7 @@ bool AMDGPULibCalls::useNative(CallInst *aCI) {
     return sincosUseNative(aCI, FInfo);
 
   FInfo.setPrefix(AMDGPULibFunc::NATIVE);
-  Constant *F = getFunction(aCI->getModule(), FInfo);
+  FunctionCallee F = getFunction(aCI->getModule(), FInfo);
   if (!F)
     return false;
 
@@ -613,7 +623,7 @@ bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
   auto *FTy = FunctionType::get(Callee->getReturnType(),
                                 ArrayRef<Type *>(ArgTys), false);
   AMDGPULibFunc NewLibFunc(Name, FTy);
-  auto *F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
+  FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
   if (!F)
     return false;
 
@@ -640,14 +650,6 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
   // Ignore indirect calls.
   if (Callee == 0) return false;
 
-  FuncInfo FInfo;
-  if (!parseFunctionName(Callee->getName(), &FInfo))
-    return false;
-
-  // Further check the number of arguments to see if they match.
-  if (CI->getNumArgOperands() != FInfo.getNumArgs())
-    return false;
-
   BasicBlock *BB = CI->getParent();
   LLVMContext &Context = CI->getParent()->getContext();
   IRBuilder<> B(Context);
@@ -659,6 +661,21 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
   if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
     B.setFastMathFlags(FPOp->getFastMathFlags());
 
+  switch (Callee->getIntrinsicID()) {
+  default:
+    break;
+  case Intrinsic::amdgcn_wavefrontsize:
+    return !EnablePreLink && fold_wavefrontsize(CI, B);
+  }
+
+  FuncInfo FInfo;
+  if (!parseFunctionName(Callee->getName(), &FInfo))
+    return false;
+
+  // Further check the number of arguments to see if they match.
+  if (CI->getNumArgOperands() != FInfo.getNumArgs())
+    return false;
+
   if (TDOFold(CI, FInfo))
     return true;
 
@@ -795,7 +812,7 @@ bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) {
 
   AMDGPULibFunc nf = FInfo;
   nf.setPrefix(AMDGPULibFunc::NATIVE);
-  if (Constant *FPExpr = getFunction(M, nf)) {
+  if (FunctionCallee FPExpr = getFunction(M, nf)) {
     LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> ");
 
     CI->setCalledFunction(FPExpr);
@@ -848,7 +865,7 @@ bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
 
 namespace llvm {
 static double log2(double V) {
-#if _XOPEN_SOURCE >= 600 || _ISOC99_SOURCE || _POSIX_C_SOURCE >= 200112L
+#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
   return ::log2(V);
 #else
   return log(V) / 0.693147180559945309417;
@@ -934,9 +951,10 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
   if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
     // pow[r](x, [-]0.5) = sqrt(x)
     bool issqrt = CF->isExactlyValue(0.5);
-    if (Constant *FPExpr = getFunction(M,
-        AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
-                             : AMDGPULibFunc::EI_RSQRT, FInfo))) {
+    if (FunctionCallee FPExpr =
+            getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
+                                                : AMDGPULibFunc::EI_RSQRT,
+                                         FInfo))) {
       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
                         << FInfo.getName().c_str() << "(" << *opr0 << ")\n");
       Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
@@ -1003,8 +1021,8 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
 
   // powr ---> exp2(y * log2(x))
   // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
-  Constant *ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2,
-                                                   FInfo));
+  FunctionCallee ExpExpr =
+      getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
   if (!ExpExpr)
     return false;
 
@@ -1090,8 +1108,8 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
 
   Value *nval;
   if (needabs) {
-    Constant *AbsExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS,
-                                                     FInfo));
+    FunctionCallee AbsExpr =
+        getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo));
     if (!AbsExpr)
       return false;
     nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
@@ -1099,8 +1117,8 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
     nval = cnval ? cnval : opr0;
   }
   if (needlog) {
-    Constant *LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2,
-                                                     FInfo));
+    FunctionCallee LogExpr =
+        getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
     if (!LogExpr)
       return false;
     nval = CreateCallEx(B,LogExpr, nval, "__log2");
@@ -1159,8 +1177,8 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
     std::vector<const Type*> ParamsTys;
     ParamsTys.push_back(opr0->getType());
     Module *M = CI->getModule();
-    if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT,
-                                                        FInfo))) {
+    if (FunctionCallee FPExpr =
+            getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n");
       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
       replaceCall(nval);
@@ -1168,8 +1186,8 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
     }
   } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
     Module *M = CI->getModule();
-    if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT,
-                                                        FInfo))) {
+    if (FunctionCallee FPExpr =
+            getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n");
       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
       replaceCall(nval);
@@ -1186,8 +1204,8 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
     std::vector<const Type*> ParamsTys;
     ParamsTys.push_back(opr0->getType());
     Module *M = CI->getModule();
-    if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT,
-                                                        FInfo))) {
+    if (FunctionCallee FPExpr =
+            getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) {
       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0
                         << ")\n");
       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
@@ -1243,7 +1261,8 @@ bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
 }
 
 // Get a scalar native builtin signle argument FP function
-Constant* AMDGPULibCalls::getNativeFunction(Module* M, const FuncInfo& FInfo) {
+FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
+                                                 const FuncInfo &FInfo) {
   if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
     return nullptr;
   FuncInfo nf = FInfo;
@@ -1256,8 +1275,8 @@ bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
                                const FuncInfo &FInfo) {
   if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
       (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
-    if (Constant *FPExpr = getNativeFunction(
-        CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
+    if (FunctionCallee FPExpr = getNativeFunction(
+            CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
       Value *opr0 = CI->getArgOperand(0);
       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
                         << "sqrt(" << *opr0 << ")\n");
@@ -1334,7 +1353,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
   // function.
   AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
   nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
-  Function *Fsincos = dyn_cast_or_null<Function>(getFunction(M, nf));
+  FunctionCallee Fsincos = getFunction(M, nf);
   if (!Fsincos) return false;
 
   BasicBlock::iterator ItOld = B.GetInsertPoint();
@@ -1342,7 +1361,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
   B.SetInsertPoint(UI);
 
   Value *P = Alloc;
-  Type *PTy = Fsincos->getFunctionType()->getParamType(1);
+  Type *PTy = Fsincos.getFunctionType()->getParamType(1);
   // The allocaInst allocates the memory in private address space. This need
   // to be bitcasted to point to the address space of cos pointer type.
   // In OpenCL 2.0 this is generic, while in 1.2 that is private.
@@ -1356,12 +1375,12 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
   if (!isSin) { // CI->cos, UI->sin
     B.SetInsertPoint(&*ItOld);
     UI->replaceAllUsesWith(&*Call);
-    Instruction *Reload = B.CreateLoad(Alloc);
+    Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
     CI->replaceAllUsesWith(Reload);
     UI->eraseFromParent();
     CI->eraseFromParent();
   } else { // CI->sin, UI->cos
-    Instruction *Reload = B.CreateLoad(Alloc);
+    Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
     UI->replaceAllUsesWith(Reload);
     CI->replaceAllUsesWith(Call);
     UI->eraseFromParent();
@@ -1370,6 +1389,29 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
   return true;
 }
 
+bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
+  if (!TM)
+    return false;
+
+  StringRef CPU = TM->getTargetCPU();
+  StringRef Features = TM->getTargetFeatureString();
+  if ((CPU.empty() || CPU.equals_lower("generic")) &&
+      (Features.empty() ||
+       Features.find_lower("wavefrontsize") == StringRef::npos))
+    return false;
+
+  Function *F = CI->getParent()->getParent();
+  const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F);
+  unsigned N = ST.getWavefrontSize();
+
+  LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "
+               << N << "\n");
+
+  CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N));
+  CI->eraseFromParent();
+  return true;
+}
+
 // Get insertion point at entry.
 BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
   Function * Func = UI->getParent()->getParent();
@@ -1679,8 +1721,9 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
 }
 
 // Public interface to the Simplify LibCalls pass.
-FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt) {
-  return new AMDGPUSimplifyLibCalls(Opt);
+FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt,
+                                                     const TargetMachine *TM) {
+  return new AMDGPUSimplifyLibCalls(Opt, TM);
 }
 
 FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
diff --git a/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/lib/Target/AMDGPU/AMDGPULibFunc.cpp
index 4fc3fe0f105b..a5bac25701a0 100644
--- a/lib/Target/AMDGPU/AMDGPULibFunc.cpp
+++ b/lib/Target/AMDGPU/AMDGPULibFunc.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPULibFunc.cpp -------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -64,6 +63,8 @@ struct ManglingRule {
    int getNumLeads() const { return (Lead[0] ? 1 : 0) + (Lead[1] ? 1 : 0); }
 
    unsigned getNumArgs() const;
+
+   static StringMap<int> buildManglingRulesMap();
 };
 
 // Information about library functions with unmangled names.
@@ -77,16 +78,7 @@ class UnmangledFuncInfo {
   // Number of entries in Table.
   static const unsigned TableSize;
 
-  // Map function name to index.
-  class NameMap : public StringMap<unsigned> {
-  public:
-    NameMap() {
-      for (unsigned I = 0; I != TableSize; ++I)
-        (*this)[Table[I].Name] = I;
-    }
-  };
-  friend class NameMap;
-  static NameMap Map;
+  static StringMap<unsigned> buildNameMap();
 
 public:
   using ID = AMDGPULibFunc::EFuncId;
@@ -102,7 +94,8 @@ public:
            static_cast<unsigned>(AMDGPULibFunc::EI_LAST_MANGLED);
   }
   static ID toFuncId(unsigned Index) {
-    assert(Index < TableSize && "Invalid unmangled library function");
+    assert(Index < TableSize &&
+           "Invalid unmangled library function");
     return static_cast<ID>(
         Index + 1 + static_cast<unsigned>(AMDGPULibFunc::EI_LAST_MANGLED));
   }
@@ -350,18 +343,7 @@ const UnmangledFuncInfo UnmangledFuncInfo::Table[] = {
 };
 
 const unsigned UnmangledFuncInfo::TableSize =
-    sizeof(UnmangledFuncInfo::Table) / sizeof(UnmangledFuncInfo::Table[0]);
-
-UnmangledFuncInfo::NameMap UnmangledFuncInfo::Map;
-
-static const struct ManglingRulesMap : public StringMap<int> {
-  ManglingRulesMap()
-    : StringMap<int>(sizeof(manglingRules)/sizeof(manglingRules[0])) {
-    int Id = 0;
-    for (auto Rule : manglingRules)
-      insert({ Rule.Name, Id++ });
-  }
-} manglingRulesMap;
+    array_lengthof(UnmangledFuncInfo::Table);
 
 static AMDGPULibFunc::Param getRetType(AMDGPULibFunc::EFuncId id,
                                        const AMDGPULibFunc::Param (&Leads)[2]) {
@@ -569,7 +551,17 @@ static AMDGPULibFunc::ENamePrefix parseNamePrefix(StringRef& mangledName) {
   return Pfx;
 }
 
+StringMap<int> ManglingRule::buildManglingRulesMap() {
+  StringMap<int> Map(array_lengthof(manglingRules));
+  int Id = 0;
+  for (auto Rule : manglingRules)
+    Map.insert({Rule.Name, Id++});
+  return Map;
+}
+
 bool AMDGPUMangledLibFunc::parseUnmangledName(StringRef FullName) {
+  static const StringMap<int> manglingRulesMap =
+      ManglingRule::buildManglingRulesMap();
   FuncId = static_cast<EFuncId>(manglingRulesMap.lookup(FullName));
   return FuncId != EI_NONE;
 }
@@ -961,8 +953,8 @@ Function *AMDGPULibFunc::getFunction(Module *M, const AMDGPULibFunc &fInfo) {
   return nullptr;
 }
 
-Function *AMDGPULibFunc::getOrInsertFunction(Module *M,
-                                             const AMDGPULibFunc &fInfo) {
+FunctionCallee AMDGPULibFunc::getOrInsertFunction(Module *M,
+                                                  const AMDGPULibFunc &fInfo) {
   std::string const FuncName = fInfo.mangle();
   Function *F = dyn_cast_or_null<Function>(
     M->getValueSymbolTable().lookup(FuncName));
@@ -988,7 +980,7 @@ Function *AMDGPULibFunc::getOrInsertFunction(Module *M,
     }
   }
 
-  Constant *C = nullptr;
+  FunctionCallee C;
   if (hasPtr) {
     // Do not set extra attributes for functions with pointer arguments.
     C = M->getOrInsertFunction(FuncName, FuncTy);
@@ -1002,10 +994,18 @@ Function *AMDGPULibFunc::getOrInsertFunction(Module *M,
     C = M->getOrInsertFunction(FuncName, FuncTy, Attr);
   }
 
-  return cast<Function>(C);
+  return C;
+}
+
+StringMap<unsigned> UnmangledFuncInfo::buildNameMap() {
+  StringMap<unsigned> Map;
+  for (unsigned I = 0; I != TableSize; ++I)
+    Map[Table[I].Name] = I;
+  return Map;
 }
 
 bool UnmangledFuncInfo::lookup(StringRef Name, ID &Id) {
+  static const StringMap<unsigned> Map = buildNameMap();
   auto Loc = Map.find(Name);
   if (Loc != Map.end()) {
     Id = toFuncId(Loc->second);
diff --git a/lib/Target/AMDGPU/AMDGPULibFunc.h b/lib/Target/AMDGPU/AMDGPULibFunc.h
index fe062384800a..2354ed7df205 100644
--- a/lib/Target/AMDGPU/AMDGPULibFunc.h
+++ b/lib/Target/AMDGPU/AMDGPULibFunc.h
@@ -1,9 +1,8 @@
 //===-- AMDGPULibFunc.h ----------------------------------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -394,8 +393,8 @@ public:
   }
   static Function *getFunction(llvm::Module *M, const AMDGPULibFunc &fInfo);
 
-  static Function *getOrInsertFunction(llvm::Module *M,
-                                       const AMDGPULibFunc &fInfo);
+  static FunctionCallee getOrInsertFunction(llvm::Module *M,
+                                            const AMDGPULibFunc &fInfo);
   static bool parse(StringRef MangledName, AMDGPULibFunc &Ptr);
 
 private:
diff --git a/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
index 2cec8fe53283..15032969890e 100644
--- a/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
+++ b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPULowerIntrinsics.cpp -----------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index 743dc7a0d00b..5dd5b3691e0a 100644
--- a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPULowerKernelArguments.cpp ------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -110,8 +109,9 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
       // modes on SI to know the high bits are 0 so pointer adds don't wrap. We
       // can't represent this with range metadata because it's only allowed for
       // integer types.
-      if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
-          ST.getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS)
+      if ((PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+           PT->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) &&
+          !ST.hasUsableDSOffset())
         continue;
 
       // FIXME: We can replace this with equivalent alias.scope/noalias
@@ -132,6 +132,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
                                       KernArgBaseAlign);
 
     Value *ArgPtr;
+    Type *AdjustedArgTy;
     if (DoShiftOpt) { // FIXME: Handle aggregate types
       // Since we don't have sub-dword scalar loads, avoid doing an extload by
       // loading earlier than the argument address, and extracting the relevant
@@ -139,30 +140,27 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
       //
       // Additionally widen any sub-dword load to i32 even if suitably aligned,
       // so that CSE between different argument loads works easily.
-
       ArgPtr = Builder.CreateConstInBoundsGEP1_64(
-        KernArgSegment,
-        AlignDownOffset,
-        Arg.getName() + ".kernarg.offset.align.down");
-      ArgPtr = Builder.CreateBitCast(ArgPtr,
-                                     Builder.getInt32Ty()->getPointerTo(AS),
-                                     ArgPtr->getName() + ".cast");
+          Builder.getInt8Ty(), KernArgSegment, AlignDownOffset,
+          Arg.getName() + ".kernarg.offset.align.down");
+      AdjustedArgTy = Builder.getInt32Ty();
     } else {
       ArgPtr = Builder.CreateConstInBoundsGEP1_64(
-        KernArgSegment,
-        EltOffset,
-        Arg.getName() + ".kernarg.offset");
-      ArgPtr = Builder.CreateBitCast(ArgPtr, ArgTy->getPointerTo(AS),
-                                     ArgPtr->getName() + ".cast");
+          Builder.getInt8Ty(), KernArgSegment, EltOffset,
+          Arg.getName() + ".kernarg.offset");
+      AdjustedArgTy = ArgTy;
     }
 
     if (IsV3 && Size >= 32) {
       V4Ty = VectorType::get(VT->getVectorElementType(), 4);
       // Use the hack that clang uses to avoid SelectionDAG ruining v3 loads
-      ArgPtr = Builder.CreateBitCast(ArgPtr, V4Ty->getPointerTo(AS));
+      AdjustedArgTy = V4Ty;
     }
 
-    LoadInst *Load = Builder.CreateAlignedLoad(ArgPtr, AdjustedAlign);
+    ArgPtr = Builder.CreateBitCast(ArgPtr, AdjustedArgTy->getPointerTo(AS),
+                                   ArgPtr->getName() + ".cast");
+    LoadInst *Load =
+        Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign);
     Load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(Ctx, {}));
 
     MDBuilder MDB(Ctx);
diff --git a/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
index a43dcef4cf0b..00e12f808783 100644
--- a/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
+++ b/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPULowerKernelAttributes.cpp ------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index f6bdbf5e9be2..ae4c32c258a7 100644
--- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,7 +15,7 @@
 #include "AMDGPUAsmPrinter.h"
 #include "AMDGPUSubtarget.h"
 #include "AMDGPUTargetMachine.h"
-#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUInstPrinter.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "R600AsmPrinter.h"
 #include "SIInstrInfo.h"
@@ -91,6 +90,10 @@ static MCSymbolRefExpr::VariantKind getVariantKind(unsigned MOFlags) {
     return MCSymbolRefExpr::VK_AMDGPU_REL32_LO;
   case SIInstrInfo::MO_REL32_HI:
     return MCSymbolRefExpr::VK_AMDGPU_REL32_HI;
+  case SIInstrInfo::MO_ABS32_LO:
+    return MCSymbolRefExpr::VK_AMDGPU_ABS32_LO;
+  case SIInstrInfo::MO_ABS32_HI:
+    return MCSymbolRefExpr::VK_AMDGPU_ABS32_HI;
   }
 }
 
@@ -101,17 +104,22 @@ const MCExpr *AMDGPUMCInstLower::getLongBranchBlockExpr(
     = MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx);
   const MCExpr *SrcBBSym = MCSymbolRefExpr::create(SrcBB.getSymbol(), Ctx);
 
-  assert(SrcBB.front().getOpcode() == AMDGPU::S_GETPC_B64 &&
-         ST.getInstrInfo()->get(AMDGPU::S_GETPC_B64).Size == 4);
+  // FIXME: The first half of this assert should be removed. This should
+  // probably be PC relative instead of using the source block symbol, and
+  // therefore the indirect branch expansion should use a bundle.
+  assert(
+      skipDebugInstructionsForward(SrcBB.begin(), SrcBB.end())->getOpcode() ==
+          AMDGPU::S_GETPC_B64 &&
+      ST.getInstrInfo()->get(AMDGPU::S_GETPC_B64).Size == 4);
 
   // s_getpc_b64 returns the address of next instruction.
   const MCConstantExpr *One = MCConstantExpr::create(4, Ctx);
   SrcBBSym = MCBinaryExpr::createAdd(SrcBBSym, One, Ctx);
 
-  if (MO.getTargetFlags() == AMDGPU::TF_LONG_BRANCH_FORWARD)
+  if (MO.getTargetFlags() == SIInstrInfo::MO_LONG_BRANCH_FORWARD)
     return MCBinaryExpr::createSub(DestBBSym, SrcBBSym, Ctx);
 
-  assert(MO.getTargetFlags() == AMDGPU::TF_LONG_BRANCH_BACKWARD);
+  assert(MO.getTargetFlags() == SIInstrInfo::MO_LONG_BRANCH_BACKWARD);
   return MCBinaryExpr::createSub(SrcBBSym, DestBBSym, Ctx);
 }
 
@@ -142,10 +150,13 @@ bool AMDGPUMCInstLower::lowerOperand(const MachineOperand &MO,
     SmallString<128> SymbolName;
     AP.getNameWithPrefix(SymbolName, GV);
     MCSymbol *Sym = Ctx.getOrCreateSymbol(SymbolName);
-    const MCExpr *SymExpr =
+    const MCExpr *Expr =
       MCSymbolRefExpr::create(Sym, getVariantKind(MO.getTargetFlags()),Ctx);
-    const MCExpr *Expr = MCBinaryExpr::createAdd(SymExpr,
-      MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
+    int64_t Offset = MO.getOffset();
+    if (Offset != 0) {
+      Expr = MCBinaryExpr::createAdd(Expr,
+                                     MCConstantExpr::create(Offset, Ctx), Ctx);
+    }
     MCOp = MCOperand::createExpr(Expr);
     return true;
   }
@@ -321,14 +332,13 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     }
 #endif
 
-    if (STI.dumpCode()) {
-      // Disassemble instruction/operands to text.
+    if (DumpCodeInstEmitter) {
+      // Disassemble instruction/operands to text
       DisasmLines.resize(DisasmLines.size() + 1);
       std::string &DisasmLine = DisasmLines.back();
       raw_string_ostream DisasmStream(DisasmLine);
 
-      AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(),
-                                    *STI.getInstrInfo(),
+      AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(), *STI.getInstrInfo(),
                                     *STI.getRegisterInfo());
       InstPrinter.printInst(&TmpInst, DisasmStream, StringRef(), STI);
 
@@ -337,10 +347,8 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       SmallVector<char, 16> CodeBytes;
       raw_svector_ostream CodeStream(CodeBytes);
 
-      auto &ObjStreamer = static_cast<MCObjectStreamer&>(*OutStreamer);
-      MCCodeEmitter &InstEmitter = ObjStreamer.getAssembler().getEmitter();
-      InstEmitter.encodeInstruction(TmpInst, CodeStream, Fixups,
-                                    MF->getSubtarget<MCSubtargetInfo>());
+      DumpCodeInstEmitter->encodeInstruction(
+          TmpInst, CodeStream, Fixups, MF->getSubtarget<MCSubtargetInfo>());
       HexLines.resize(HexLines.size() + 1);
       std::string &HexLine = HexLines.back();
       raw_string_ostream HexStream(HexLine);
diff --git a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
index 6f44e2dbb2d5..237490957058 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPUMachineCFGStructurizer.cpp - Machine code if conversion pass. ===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 13b4b50149ce..0d3a1f1a769f 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -30,13 +29,13 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
   // except reserved size is not correctly aligned.
   const Function &F = MF.getFunction();
 
-  if (auto *Resolver = MF.getMMI().getResolver()) {
-    if (AMDGPUPerfHintAnalysis *PHA = static_cast<AMDGPUPerfHintAnalysis*>(
-          Resolver->getAnalysisIfAvailable(&AMDGPUPerfHintAnalysisID, true))) {
-      MemoryBound = PHA->isMemoryBound(&F);
-      WaveLimiter = PHA->needsWaveLimiter(&F);
-    }
-  }
+  Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
+  MemoryBound = MemBoundAttr.isStringAttribute() &&
+                MemBoundAttr.getValueAsString() == "true";
+
+  Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
+  WaveLimiter = WaveLimitAttr.isStringAttribute() &&
+                WaveLimitAttr.getValueAsString() == "true";
 
   CallingConv::ID CC = F.getCallingConv();
   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index 8d6b871bc03e..52987e2fa411 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -1,9 +1,8 @@
 //===-- AMDGPUMachineFunctionInfo.h -------------------------------*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp b/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp
index 7b9f673c418c..4d9f08b3af01 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp
@@ -1,9 +1,8 @@
 //===--- AMDGPUMachineModuleInfo.cpp ----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -24,6 +23,16 @@ AMDGPUMachineModuleInfo::AMDGPUMachineModuleInfo(const MachineModuleInfo &MMI)
   AgentSSID = CTX.getOrInsertSyncScopeID("agent");
   WorkgroupSSID = CTX.getOrInsertSyncScopeID("workgroup");
   WavefrontSSID = CTX.getOrInsertSyncScopeID("wavefront");
+  SystemOneAddressSpaceSSID =
+      CTX.getOrInsertSyncScopeID("one-as");
+  AgentOneAddressSpaceSSID =
+      CTX.getOrInsertSyncScopeID("agent-one-as");
+  WorkgroupOneAddressSpaceSSID =
+      CTX.getOrInsertSyncScopeID("workgroup-one-as");
+  WavefrontOneAddressSpaceSSID =
+      CTX.getOrInsertSyncScopeID("wavefront-one-as");
+  SingleThreadOneAddressSpaceSSID =
+      CTX.getOrInsertSyncScopeID("singlethread-one-as");
 }
 
 } // end namespace llvm
diff --git a/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h b/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h
index 1219ab26fb69..2b0b8b42acfe 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h
@@ -1,9 +1,8 @@
 //===--- AMDGPUMachineModuleInfo.h ------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -30,12 +29,22 @@ private:
   // All supported memory/synchronization scopes can be found here:
   //   http://llvm.org/docs/AMDGPUUsage.html#memory-scopes
 
-  /// Agent synchronization scope ID.
+  /// Agent synchronization scope ID (cross address space).
   SyncScope::ID AgentSSID;
-  /// Workgroup synchronization scope ID.
+  /// Workgroup synchronization scope ID (cross address space).
   SyncScope::ID WorkgroupSSID;
-  /// Wavefront synchronization scope ID.
+  /// Wavefront synchronization scope ID (cross address space).
   SyncScope::ID WavefrontSSID;
+  /// System synchronization scope ID (single address space).
+  SyncScope::ID SystemOneAddressSpaceSSID;
+  /// Agent synchronization scope ID (single address space).
+  SyncScope::ID AgentOneAddressSpaceSSID;
+  /// Workgroup synchronization scope ID (single address space).
+  SyncScope::ID WorkgroupOneAddressSpaceSSID;
+  /// Wavefront synchronization scope ID (single address space).
+  SyncScope::ID WavefrontOneAddressSpaceSSID;
+  /// Single thread synchronization scope ID (single address space).
+  SyncScope::ID SingleThreadOneAddressSpaceSSID;
 
   /// In AMDGPU target synchronization scopes are inclusive, meaning a
   /// larger synchronization scope is inclusive of a smaller synchronization
@@ -44,35 +53,70 @@ private:
   /// \returns \p SSID's inclusion ordering, or "None" if \p SSID is not
   /// supported by the AMDGPU target.
   Optional<uint8_t> getSyncScopeInclusionOrdering(SyncScope::ID SSID) const {
-    if (SSID == SyncScope::SingleThread)
+    if (SSID == SyncScope::SingleThread ||
+        SSID == getSingleThreadOneAddressSpaceSSID())
       return 0;
-    else if (SSID == getWavefrontSSID())
+    else if (SSID == getWavefrontSSID() ||
+             SSID == getWavefrontOneAddressSpaceSSID())
       return 1;
-    else if (SSID == getWorkgroupSSID())
+    else if (SSID == getWorkgroupSSID() ||
+             SSID == getWorkgroupOneAddressSpaceSSID())
       return 2;
-    else if (SSID == getAgentSSID())
+    else if (SSID == getAgentSSID() ||
+             SSID == getAgentOneAddressSpaceSSID())
       return 3;
-    else if (SSID == SyncScope::System)
+    else if (SSID == SyncScope::System ||
+             SSID == getSystemOneAddressSpaceSSID())
       return 4;
 
     return None;
   }
 
+  /// \returns True if \p SSID is restricted to single address space, false
+  /// otherwise
+  bool isOneAddressSpace(SyncScope::ID SSID) const {
+    return SSID == getSingleThreadOneAddressSpaceSSID() ||
+        SSID == getWavefrontOneAddressSpaceSSID() ||
+        SSID == getWorkgroupOneAddressSpaceSSID() ||
+        SSID == getAgentOneAddressSpaceSSID() ||
+        SSID == getSystemOneAddressSpaceSSID();
+  }
+
 public:
   AMDGPUMachineModuleInfo(const MachineModuleInfo &MMI);
 
-  /// \returns Agent synchronization scope ID.
+  /// \returns Agent synchronization scope ID (cross address space).
   SyncScope::ID getAgentSSID() const {
     return AgentSSID;
   }
-  /// \returns Workgroup synchronization scope ID.
+  /// \returns Workgroup synchronization scope ID (cross address space).
   SyncScope::ID getWorkgroupSSID() const {
     return WorkgroupSSID;
   }
-  /// \returns Wavefront synchronization scope ID.
+  /// \returns Wavefront synchronization scope ID (cross address space).
   SyncScope::ID getWavefrontSSID() const {
     return WavefrontSSID;
   }
+  /// \returns System synchronization scope ID (single address space).
+  SyncScope::ID getSystemOneAddressSpaceSSID() const {
+    return SystemOneAddressSpaceSSID;
+  }
+  /// \returns Agent synchronization scope ID (single address space).
+  SyncScope::ID getAgentOneAddressSpaceSSID() const {
+    return AgentOneAddressSpaceSSID;
+  }
+  /// \returns Workgroup synchronization scope ID (single address space).
+  SyncScope::ID getWorkgroupOneAddressSpaceSSID() const {
+    return WorkgroupOneAddressSpaceSSID;
+  }
+  /// \returns Wavefront synchronization scope ID (single address space).
+  SyncScope::ID getWavefrontOneAddressSpaceSSID() const {
+    return WavefrontOneAddressSpaceSSID;
+  }
+  /// \returns Single thread synchronization scope ID (single address space).
+  SyncScope::ID getSingleThreadOneAddressSpaceSSID() const {
+    return SingleThreadOneAddressSpaceSSID;
+  }
 
   /// In AMDGPU target synchronization scopes are inclusive, meaning a
   /// larger synchronization scope is inclusive of a smaller synchronization
@@ -88,7 +132,11 @@ public:
     if (!AIO || !BIO)
       return None;
 
-    return AIO.getValue() > BIO.getValue();
+    bool IsAOneAddressSpace = isOneAddressSpace(A);
+    bool IsBOneAddressSpace = isOneAddressSpace(B);
+
+    return AIO.getValue() >= BIO.getValue() &&
+        (IsAOneAddressSpace == IsBOneAddressSpace || !IsAOneAddressSpace);
   }
 };
 
diff --git a/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp b/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
index 5e0b7d429022..8c11230f411a 100644
--- a/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
@@ -1,9 +1,8 @@
 //===--- AMDGPUMacroFusion.cpp - AMDGPU Macro Fusion ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/AMDGPUMacroFusion.h b/lib/Target/AMDGPU/AMDGPUMacroFusion.h
index 844958580a65..da4b3cf8bc24 100644
--- a/lib/Target/AMDGPU/AMDGPUMacroFusion.h
+++ b/lib/Target/AMDGPU/AMDGPUMacroFusion.h
@@ -1,9 +1,8 @@
 //===- AMDGPUMacroFusion.h - AMDGPU Macro Fusion ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
index 7bd8533a0ccf..f7231471c107 100644
--- a/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPUOpenCLEnqueuedBlockLowering.cpp - Lower enqueued block -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -120,11 +119,11 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
       auto T = ArrayType::get(Type::getInt64Ty(C), 2);
       auto *GV = new GlobalVariable(
           M, T,
-          /*IsConstant=*/false, GlobalValue::ExternalLinkage,
+          /*isConstant=*/false, GlobalValue::ExternalLinkage,
           /*Initializer=*/Constant::getNullValue(T), RuntimeHandle,
           /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal,
           AMDGPUAS::GLOBAL_ADDRESS,
-          /*IsExternallyInitialized=*/false);
+          /*isExternallyInitialized=*/false);
       LLVM_DEBUG(dbgs() << "runtime handle created: " << *GV << '\n');
 
       for (auto U : F.users()) {
diff --git a/lib/Target/AMDGPU/AMDGPUPTNote.h b/lib/Target/AMDGPU/AMDGPUPTNote.h
index 2feff14d34a1..8b69f51c1a0d 100644
--- a/lib/Target/AMDGPU/AMDGPUPTNote.h
+++ b/lib/Target/AMDGPU/AMDGPUPTNote.h
@@ -1,9 +1,8 @@
 //===-- AMDGPUNoteType.h - AMDGPU ELF PT_NOTE section info-------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
index e53a8fe7c074..9613d5a843b3 100644
--- a/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
+++ b/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPUPerfHintAnalysis.cpp - analysis of functions memory traffic --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,6 +17,7 @@
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -72,7 +72,7 @@ public:
                  const TargetLowering *TLI_)
       : FIM(FIM_), DL(nullptr), TLI(TLI_) {}
 
-  void runOnFunction(Function &F);
+  bool runOnFunction(Function &F);
 
 private:
   struct MemAccessInfo {
@@ -101,7 +101,7 @@ private:
 
   const TargetLowering *TLI;
 
-  void visit(const Function &F);
+  AMDGPUPerfHintAnalysis::FuncInfo *visit(const Function &F);
   static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F);
   static bool needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &F);
 
@@ -203,12 +203,8 @@ bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const {
   return false;
 }
 
-void AMDGPUPerfHint::visit(const Function &F) {
-  auto FIP = FIM.insert(std::make_pair(&F, AMDGPUPerfHintAnalysis::FuncInfo()));
-  if (!FIP.second)
-    return;
-
-  AMDGPUPerfHintAnalysis::FuncInfo &FI = FIP.first->second;
+AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) {
+  AMDGPUPerfHintAnalysis::FuncInfo &FI = FIM[&F];
 
   LLVM_DEBUG(dbgs() << "[AMDGPUPerfHint] process " << F.getName() << '\n');
 
@@ -234,10 +230,10 @@ void AMDGPUPerfHint::visit(const Function &F) {
         if (&F == Callee) // Handle immediate recursion
           continue;
 
-        visit(*Callee);
         auto Loc = FIM.find(Callee);
+        if (Loc == FIM.end())
+          continue;
 
-        assert(Loc != FIM.end() && "No func info");
         FI.MemInstCount += Loc->second.MemInstCount;
         FI.InstCount += Loc->second.InstCount;
         FI.IAMInstCount += Loc->second.IAMInstCount;
@@ -257,36 +253,39 @@ void AMDGPUPerfHint::visit(const Function &F) {
       }
     }
   }
-}
 
-void AMDGPUPerfHint::runOnFunction(Function &F) {
-  if (FIM.find(&F) != FIM.end())
-    return;
+  return &FI;
+}
 
+bool AMDGPUPerfHint::runOnFunction(Function &F) {
   const Module &M = *F.getParent();
   DL = &M.getDataLayout();
 
-  visit(F);
-  auto Loc = FIM.find(&F);
+  if (F.hasFnAttribute("amdgpu-wave-limiter") &&
+      F.hasFnAttribute("amdgpu-memory-bound"))
+    return false;
+
+  const AMDGPUPerfHintAnalysis::FuncInfo *Info = visit(F);
 
-  assert(Loc != FIM.end() && "No func info");
-  LLVM_DEBUG(dbgs() << F.getName() << " MemInst: " << Loc->second.MemInstCount
+  LLVM_DEBUG(dbgs() << F.getName() << " MemInst: " << Info->MemInstCount
                     << '\n'
-                    << " IAMInst: " << Loc->second.IAMInstCount << '\n'
-                    << " LSMInst: " << Loc->second.LSMInstCount << '\n'
-                    << " TotalInst: " << Loc->second.InstCount << '\n');
-
-  auto &FI = Loc->second;
+                    << " IAMInst: " << Info->IAMInstCount << '\n'
+                    << " LSMInst: " << Info->LSMInstCount << '\n'
+                    << " TotalInst: " << Info->InstCount << '\n');
 
-  if (isMemBound(FI)) {
+  if (isMemBound(*Info)) {
     LLVM_DEBUG(dbgs() << F.getName() << " is memory bound\n");
     NumMemBound++;
+    F.addFnAttr("amdgpu-memory-bound", "true");
   }
 
-  if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(FI)) {
+  if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(*Info)) {
     LLVM_DEBUG(dbgs() << F.getName() << " needs limit wave\n");
     NumLimitWave++;
+    F.addFnAttr("amdgpu-wave-limiter", "true");
   }
+
+  return true;
 }
 
 bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
@@ -365,17 +364,27 @@ bool AMDGPUPerfHint::MemAccessInfo::isLargeStride(
 }
 } // namespace
 
-bool AMDGPUPerfHintAnalysis::runOnFunction(Function &F) {
+bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) {
   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
   if (!TPC)
     return false;
 
   const TargetMachine &TM = TPC->getTM<TargetMachine>();
-  const TargetSubtargetInfo *ST = TM.getSubtargetImpl(F);
 
-  AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
-  Analyzer.runOnFunction(F);
-  return false;
+  bool Changed = false;
+  for (CallGraphNode *I : SCC) {
+    Function *F = I->getFunction();
+    if (!F || F->isDeclaration())
+      continue;
+
+    const TargetSubtargetInfo *ST = TM.getSubtargetImpl(*F);
+    AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
+
+    if (Analyzer.runOnFunction(*F))
+      Changed = true;
+  }
+
+  return Changed;
 }
 
 bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
diff --git a/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h b/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
index be7f37cb6815..9599e09fbd96 100644
--- a/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
+++ b/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
@@ -1,9 +1,8 @@
-//===- AMDGPUPerfHintAnalysis.h - analysis of functions memory traffic ----===//
+//===- AMDGPUPerfHintAnalysis.h ---- analysis of memory traffic -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,18 +14,20 @@
 
 #ifndef LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
 #define LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
+
+#include "llvm/Analysis/CallGraphSCCPass.h"
 #include "llvm/IR/ValueMap.h"
 #include "llvm/Pass.h"
 
 namespace llvm {
 
-struct AMDGPUPerfHintAnalysis : public FunctionPass {
+struct AMDGPUPerfHintAnalysis : public CallGraphSCCPass {
   static char ID;
 
 public:
-  AMDGPUPerfHintAnalysis() : FunctionPass(ID) {}
+  AMDGPUPerfHintAnalysis() : CallGraphSCCPass(ID) {}
 
-  bool runOnFunction(Function &F) override;
+  bool runOnSCC(CallGraphSCC &SCC) override;
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 5d087c099184..e4c9d6685d4a 100644
--- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -163,12 +162,16 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
   bool SufficientLDS = hasSufficientLocalMem(F);
   bool Changed = false;
   BasicBlock &EntryBB = *F.begin();
-  for (auto I = EntryBB.begin(), E = EntryBB.end(); I != E; ) {
-    AllocaInst *AI = dyn_cast<AllocaInst>(I);
 
-    ++I;
-    if (AI)
-      Changed |= handleAlloca(*AI, SufficientLDS);
+  SmallVector<AllocaInst *, 16> Allocas;
+  for (Instruction &I : EntryBB) {
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
+      Allocas.push_back(AI);
+  }
+
+  for (AllocaInst *AI : Allocas) {
+    if (handleAlloca(*AI, SufficientLDS))
+      Changed = true;
   }
 
   return Changed;
@@ -245,11 +248,11 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
   // We could do a single 64-bit load here, but it's likely that the basic
   // 32-bit and extract sequence is already present, and it is probably easier
   // to CSE this. The loads should be mergable later anyway.
-  Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 1);
-  LoadInst *LoadXY = Builder.CreateAlignedLoad(GEPXY, 4);
+  Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 1);
+  LoadInst *LoadXY = Builder.CreateAlignedLoad(I32Ty, GEPXY, 4);
 
-  Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 2);
-  LoadInst *LoadZU = Builder.CreateAlignedLoad(GEPZU, 4);
+  Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 2);
+  LoadInst *LoadZU = Builder.CreateAlignedLoad(I32Ty, GEPZU, 4);
 
   MDNode *MD = MDNode::get(Mod->getContext(), None);
   LoadXY->setMetadata(LLVMContext::MD_invariant_load, MD);
@@ -427,7 +430,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
       Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
 
       Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
-      Value *VecValue = Builder.CreateLoad(BitCast);
+      Value *VecValue = Builder.CreateLoad(VectorTy, BitCast);
       Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
       Inst->replaceAllUsesWith(ExtractElement);
       Inst->eraseFromParent();
@@ -442,7 +445,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
       Value *Ptr = SI->getPointerOperand();
       Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
       Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
-      Value *VecValue = Builder.CreateLoad(BitCast);
+      Value *VecValue = Builder.CreateLoad(VectorTy, BitCast);
       Value *NewVecValue = Builder.CreateInsertElement(VecValue,
                                                        SI->getValueOperand(),
                                                        Index);
@@ -919,7 +922,8 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
       );
 
       CallInst *NewCall = Builder.CreateCall(
-          ObjectSize, {Src, Intr->getOperand(1), Intr->getOperand(2)});
+          ObjectSize,
+          {Src, Intr->getOperand(1), Intr->getOperand(2), Intr->getOperand(3)});
       Intr->replaceAllUsesWith(NewCall);
       Intr->eraseFromParent();
       continue;
diff --git a/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
new file mode 100644
index 000000000000..7a7addd0f5cf
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
@@ -0,0 +1,336 @@
+//===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This pass propagates attributes from kernels to the non-entry
+/// functions. Most of the library functions were not compiled for specific ABI,
+/// yet will be correctly compiled if proper attrbutes are propagated from the
+/// caller.
+///
+/// The pass analyzes call graph and propagates ABI target features through the
+/// call graph.
+///
+/// It can run in two modes: as a function or module pass. A function pass
+/// simply propagates attributes. A module pass clones functions if there are
+/// callers with different ABI. If a function is clonned all call sites will
+/// be updated to use a correct clone.
+///
+/// A function pass is limited in functionality but can run early in the
+/// pipeline. A module pass is more powerful but has to run late, so misses
+/// library folding opportunities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <string>
+
+#define DEBUG_TYPE "amdgpu-propagate-attributes"
+
+using namespace llvm;
+
+namespace llvm {
+extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
+}
+
+namespace {
+
+class AMDGPUPropagateAttributes {
+  const FeatureBitset TargetFeatures = {
+    AMDGPU::FeatureWavefrontSize16,
+    AMDGPU::FeatureWavefrontSize32,
+    AMDGPU::FeatureWavefrontSize64
+  };
+
+  class Clone{
+  public:
+    Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) :
+      FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {}
+
+    FeatureBitset FeatureMask;
+    Function *OrigF;
+    Function *NewF;
+  };
+
+  const TargetMachine *TM;
+
+  // Clone functions as needed or just set attributes.
+  bool AllowClone;
+
+  // Option propagation roots.
+  SmallSet<Function *, 32> Roots;
+
+  // Clones of functions with their attributes.
+  SmallVector<Clone, 32> Clones;
+
+  // Find a clone with required features.
+  Function *findFunction(const FeatureBitset &FeaturesNeeded,
+                         Function *OrigF);
+
+  // Clone function F and set NewFeatures on the clone.
+  // Cole takes the name of original function.
+  Function *cloneWithFeatures(Function &F,
+                              const FeatureBitset &NewFeatures);
+
+  // Set new function's features in place.
+  void setFeatures(Function &F, const FeatureBitset &NewFeatures);
+
+  std::string getFeatureString(const FeatureBitset &Features) const;
+
+  // Propagate attributes from Roots.
+  bool process();
+
+public:
+  AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
+    TM(TM), AllowClone(AllowClone) {}
+
+  // Use F as a root and propagate its attributes.
+  bool process(Function &F);
+
+  // Propagate attributes starting from kernel functions.
+  bool process(Module &M);
+};
+
+// Allows to propagate attributes early, but no clonning is allowed as it must
+// be a function pass to run before any optimizations.
+// TODO: We shall only need a one instance of module pass, but that needs to be
+// in the linker pipeline which is currently not possible.
+class AMDGPUPropagateAttributesEarly : public FunctionPass {
+  const TargetMachine *TM;
+
+public:
+  static char ID; // Pass identification
+
+  AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
+    FunctionPass(ID), TM(TM) {
+    initializeAMDGPUPropagateAttributesEarlyPass(
+      *PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override;
+};
+
+// Allows to propagate attributes with clonning but does that late in the
+// pipeline.
+class AMDGPUPropagateAttributesLate : public ModulePass {
+  const TargetMachine *TM;
+
+public:
+  static char ID; // Pass identification
+
+  AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
+    ModulePass(ID), TM(TM) {
+    initializeAMDGPUPropagateAttributesLatePass(
+      *PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M) override;
+};
+
+}  // end anonymous namespace.
+
+char AMDGPUPropagateAttributesEarly::ID = 0;
+char AMDGPUPropagateAttributesLate::ID = 0;
+
+INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
+                "amdgpu-propagate-attributes-early",
+                "Early propagate attributes from kernels to functions",
+                false, false)
+INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
+                "amdgpu-propagate-attributes-late",
+                "Late propagate attributes from kernels to functions",
+                false, false)
+
+Function *
+AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded,
+                                        Function *OrigF) {
+  // TODO: search for clone's clones.
+  for (Clone &C : Clones)
+    if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask)
+      return C.NewF;
+
+  return nullptr;
+}
+
+bool AMDGPUPropagateAttributes::process(Module &M) {
+  for (auto &F : M.functions())
+    if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
+      Roots.insert(&F);
+
+  return process();
+}
+
+bool AMDGPUPropagateAttributes::process(Function &F) {
+  Roots.insert(&F);
+  return process();
+}
+
+bool AMDGPUPropagateAttributes::process() {
+  bool Changed = false;
+  SmallSet<Function *, 32> NewRoots;
+  SmallSet<Function *, 32> Replaced;
+
+  if (Roots.empty())
+    return false;
+  Module &M = *(*Roots.begin())->getParent();
+
+  do {
+    Roots.insert(NewRoots.begin(), NewRoots.end());
+    NewRoots.clear();
+
+    for (auto &F : M.functions()) {
+      if (F.isDeclaration() || Roots.count(&F) || Roots.count(&F))
+        continue;
+
+      const FeatureBitset &CalleeBits =
+        TM->getSubtargetImpl(F)->getFeatureBits();
+      SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
+
+      for (User *U : F.users()) {
+        Instruction *I = dyn_cast<Instruction>(U);
+        if (!I)
+          continue;
+        CallBase *CI = dyn_cast<CallBase>(I);
+        if (!CI)
+          continue;
+        Function *Caller = CI->getCaller();
+        if (!Caller)
+          continue;
+        if (!Roots.count(Caller))
+          continue;
+
+        const FeatureBitset &CallerBits =
+          TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures;
+
+        if (CallerBits == (CalleeBits  & TargetFeatures)) {
+          NewRoots.insert(&F);
+          continue;
+        }
+
+        Function *NewF = findFunction(CallerBits, &F);
+        if (!NewF) {
+          FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) |
+                                    CallerBits);
+          if (!AllowClone) {
+            // This may set different features on different iteartions if
+            // there is a contradiction in callers' attributes. In this case
+            // we rely on a second pass running on Module, which is allowed
+            // to clone.
+            setFeatures(F, NewFeatures);
+            NewRoots.insert(&F);
+            Changed = true;
+            break;
+          }
+
+          NewF = cloneWithFeatures(F, NewFeatures);
+          Clones.push_back(Clone(CallerBits, &F, NewF));
+          NewRoots.insert(NewF);
+        }
+
+        ToReplace.push_back(std::make_pair(CI, NewF));
+        Replaced.insert(&F);
+
+        Changed = true;
+      }
+
+      while (!ToReplace.empty()) {
+        auto R = ToReplace.pop_back_val();
+        R.first->setCalledFunction(R.second);
+      }
+    }
+  } while (!NewRoots.empty());
+
+  for (Function *F : Replaced) {
+    if (F->use_empty())
+      F->eraseFromParent();
+  }
+
+  return Changed;
+}
+
+Function *
+AMDGPUPropagateAttributes::cloneWithFeatures(Function &F,
+                                             const FeatureBitset &NewFeatures) {
+  LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
+
+  ValueToValueMapTy dummy;
+  Function *NewF = CloneFunction(&F, dummy);
+  setFeatures(*NewF, NewFeatures);
+
+  // Swap names. If that is the only clone it will retain the name of now
+  // dead value.
+  if (F.hasName()) {
+    std::string NewName = NewF->getName();
+    NewF->takeName(&F);
+    F.setName(NewName);
+
+    // Name has changed, it does not need an external symbol.
+    F.setVisibility(GlobalValue::DefaultVisibility);
+    F.setLinkage(GlobalValue::InternalLinkage);
+  }
+
+  return NewF;
+}
+
+void AMDGPUPropagateAttributes::setFeatures(Function &F,
+                                            const FeatureBitset &NewFeatures) {
+  std::string NewFeatureStr = getFeatureString(NewFeatures);
+
+  LLVM_DEBUG(dbgs() << "Set features "
+                    << getFeatureString(NewFeatures & TargetFeatures)
+                    << " on " << F.getName() << '\n');
+
+  F.removeFnAttr("target-features");
+  F.addFnAttr("target-features", NewFeatureStr);
+}
+
+std::string
+AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
+{
+  std::string Ret;
+  for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
+    if (Features[KV.Value])
+      Ret += (StringRef("+") + KV.Key + ",").str();
+    else if (TargetFeatures[KV.Value])
+      Ret += (StringRef("-") + KV.Key + ",").str();
+  }
+  Ret.pop_back(); // Remove last comma.
+  return Ret;
+}
+
+bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
+  if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
+    return false;
+
+  return AMDGPUPropagateAttributes(TM, false).process(F);
+}
+
+bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
+  if (!TM)
+    return false;
+
+  return AMDGPUPropagateAttributes(TM, true).process(M);
+}
+
+FunctionPass
+*llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
+  return new AMDGPUPropagateAttributesEarly(TM);
+}
+
+ModulePass
+*llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
+  return new AMDGPUPropagateAttributesLate(TM);
+}
diff --git a/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp b/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp
deleted file mode 100644
index 36d88f52910d..000000000000
--- a/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp
+++ /dev/null
@@ -1,353 +0,0 @@
-//===-- AMDGPURegAsmNames.inc - Register asm names ----------*- C++ -*-----===//
-
-#ifdef AMDGPU_REG_ASM_NAMES
-
-static const char *const VGPR32RegNames[] = {
-    "v0",   "v1",   "v2",   "v3",   "v4",   "v5",   "v6",   "v7",   "v8",
-    "v9",   "v10",  "v11",  "v12",  "v13",  "v14",  "v15",  "v16",  "v17",
-    "v18",  "v19",  "v20",  "v21",  "v22",  "v23",  "v24",  "v25",  "v26",
-    "v27",  "v28",  "v29",  "v30",  "v31",  "v32",  "v33",  "v34",  "v35",
-    "v36",  "v37",  "v38",  "v39",  "v40",  "v41",  "v42",  "v43",  "v44",
-    "v45",  "v46",  "v47",  "v48",  "v49",  "v50",  "v51",  "v52",  "v53",
-    "v54",  "v55",  "v56",  "v57",  "v58",  "v59",  "v60",  "v61",  "v62",
-    "v63",  "v64",  "v65",  "v66",  "v67",  "v68",  "v69",  "v70",  "v71",
-    "v72",  "v73",  "v74",  "v75",  "v76",  "v77",  "v78",  "v79",  "v80",
-    "v81",  "v82",  "v83",  "v84",  "v85",  "v86",  "v87",  "v88",  "v89",
-    "v90",  "v91",  "v92",  "v93",  "v94",  "v95",  "v96",  "v97",  "v98",
-    "v99",  "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
-    "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
-    "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
-    "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
-    "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
-    "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
-    "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
-    "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
-    "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
-    "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
-    "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
-    "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
-    "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
-    "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
-    "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
-    "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
-    "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
-    "v252", "v253", "v254", "v255"
-};
-
-static const char *const SGPR32RegNames[] = {
-    "s0",   "s1",   "s2",   "s3",   "s4",  "s5",  "s6",  "s7",  "s8",  "s9",
-    "s10",  "s11",  "s12",  "s13",  "s14", "s15", "s16", "s17", "s18", "s19",
-    "s20",  "s21",  "s22",  "s23",  "s24", "s25", "s26", "s27", "s28", "s29",
-    "s30",  "s31",  "s32",  "s33",  "s34", "s35", "s36", "s37", "s38", "s39",
-    "s40",  "s41",  "s42",  "s43",  "s44", "s45", "s46", "s47", "s48", "s49",
-    "s50",  "s51",  "s52",  "s53",  "s54", "s55", "s56", "s57", "s58", "s59",
-    "s60",  "s61",  "s62",  "s63",  "s64", "s65", "s66", "s67", "s68", "s69",
-    "s70",  "s71",  "s72",  "s73",  "s74", "s75", "s76", "s77", "s78", "s79",
-    "s80",  "s81",  "s82",  "s83",  "s84", "s85", "s86", "s87", "s88", "s89",
-    "s90",  "s91",  "s92",  "s93",  "s94", "s95", "s96", "s97", "s98", "s99",
-    "s100", "s101", "s102", "s103"
-};
-
-static const char *const VGPR64RegNames[] = {
-    "v[0:1]",     "v[1:2]",     "v[2:3]",     "v[3:4]",     "v[4:5]",
-    "v[5:6]",     "v[6:7]",     "v[7:8]",     "v[8:9]",     "v[9:10]",
-    "v[10:11]",   "v[11:12]",   "v[12:13]",   "v[13:14]",   "v[14:15]",
-    "v[15:16]",   "v[16:17]",   "v[17:18]",   "v[18:19]",   "v[19:20]",
-    "v[20:21]",   "v[21:22]",   "v[22:23]",   "v[23:24]",   "v[24:25]",
-    "v[25:26]",   "v[26:27]",   "v[27:28]",   "v[28:29]",   "v[29:30]",
-    "v[30:31]",   "v[31:32]",   "v[32:33]",   "v[33:34]",   "v[34:35]",
-    "v[35:36]",   "v[36:37]",   "v[37:38]",   "v[38:39]",   "v[39:40]",
-    "v[40:41]",   "v[41:42]",   "v[42:43]",   "v[43:44]",   "v[44:45]",
-    "v[45:46]",   "v[46:47]",   "v[47:48]",   "v[48:49]",   "v[49:50]",
-    "v[50:51]",   "v[51:52]",   "v[52:53]",   "v[53:54]",   "v[54:55]",
-    "v[55:56]",   "v[56:57]",   "v[57:58]",   "v[58:59]",   "v[59:60]",
-    "v[60:61]",   "v[61:62]",   "v[62:63]",   "v[63:64]",   "v[64:65]",
-    "v[65:66]",   "v[66:67]",   "v[67:68]",   "v[68:69]",   "v[69:70]",
-    "v[70:71]",   "v[71:72]",   "v[72:73]",   "v[73:74]",   "v[74:75]",
-    "v[75:76]",   "v[76:77]",   "v[77:78]",   "v[78:79]",   "v[79:80]",
-    "v[80:81]",   "v[81:82]",   "v[82:83]",   "v[83:84]",   "v[84:85]",
-    "v[85:86]",   "v[86:87]",   "v[87:88]",   "v[88:89]",   "v[89:90]",
-    "v[90:91]",   "v[91:92]",   "v[92:93]",   "v[93:94]",   "v[94:95]",
-    "v[95:96]",   "v[96:97]",   "v[97:98]",   "v[98:99]",   "v[99:100]",
-    "v[100:101]", "v[101:102]", "v[102:103]", "v[103:104]", "v[104:105]",
-    "v[105:106]", "v[106:107]", "v[107:108]", "v[108:109]", "v[109:110]",
-    "v[110:111]", "v[111:112]", "v[112:113]", "v[113:114]", "v[114:115]",
-    "v[115:116]", "v[116:117]", "v[117:118]", "v[118:119]", "v[119:120]",
-    "v[120:121]", "v[121:122]", "v[122:123]", "v[123:124]", "v[124:125]",
-    "v[125:126]", "v[126:127]", "v[127:128]", "v[128:129]", "v[129:130]",
-    "v[130:131]", "v[131:132]", "v[132:133]", "v[133:134]", "v[134:135]",
-    "v[135:136]", "v[136:137]", "v[137:138]", "v[138:139]", "v[139:140]",
-    "v[140:141]", "v[141:142]", "v[142:143]", "v[143:144]", "v[144:145]",
-    "v[145:146]", "v[146:147]", "v[147:148]", "v[148:149]", "v[149:150]",
-    "v[150:151]", "v[151:152]", "v[152:153]", "v[153:154]", "v[154:155]",
-    "v[155:156]", "v[156:157]", "v[157:158]", "v[158:159]", "v[159:160]",
-    "v[160:161]", "v[161:162]", "v[162:163]", "v[163:164]", "v[164:165]",
-    "v[165:166]", "v[166:167]", "v[167:168]", "v[168:169]", "v[169:170]",
-    "v[170:171]", "v[171:172]", "v[172:173]", "v[173:174]", "v[174:175]",
-    "v[175:176]", "v[176:177]", "v[177:178]", "v[178:179]", "v[179:180]",
-    "v[180:181]", "v[181:182]", "v[182:183]", "v[183:184]", "v[184:185]",
-    "v[185:186]", "v[186:187]", "v[187:188]", "v[188:189]", "v[189:190]",
-    "v[190:191]", "v[191:192]", "v[192:193]", "v[193:194]", "v[194:195]",
-    "v[195:196]", "v[196:197]", "v[197:198]", "v[198:199]", "v[199:200]",
-    "v[200:201]", "v[201:202]", "v[202:203]", "v[203:204]", "v[204:205]",
-    "v[205:206]", "v[206:207]", "v[207:208]", "v[208:209]", "v[209:210]",
-    "v[210:211]", "v[211:212]", "v[212:213]", "v[213:214]", "v[214:215]",
-    "v[215:216]", "v[216:217]", "v[217:218]", "v[218:219]", "v[219:220]",
-    "v[220:221]", "v[221:222]", "v[222:223]", "v[223:224]", "v[224:225]",
-    "v[225:226]", "v[226:227]", "v[227:228]", "v[228:229]", "v[229:230]",
-    "v[230:231]", "v[231:232]", "v[232:233]", "v[233:234]", "v[234:235]",
-    "v[235:236]", "v[236:237]", "v[237:238]", "v[238:239]", "v[239:240]",
-    "v[240:241]", "v[241:242]", "v[242:243]", "v[243:244]", "v[244:245]",
-    "v[245:246]", "v[246:247]", "v[247:248]", "v[248:249]", "v[249:250]",
-    "v[250:251]", "v[251:252]", "v[252:253]", "v[253:254]", "v[254:255]"
-};
-
-static const char *const VGPR96RegNames[] = {
-    "v[0:2]",     "v[1:3]",     "v[2:4]",     "v[3:5]",     "v[4:6]",
-    "v[5:7]",     "v[6:8]",     "v[7:9]",     "v[8:10]",    "v[9:11]",
-    "v[10:12]",   "v[11:13]",   "v[12:14]",   "v[13:15]",   "v[14:16]",
-    "v[15:17]",   "v[16:18]",   "v[17:19]",   "v[18:20]",   "v[19:21]",
-    "v[20:22]",   "v[21:23]",   "v[22:24]",   "v[23:25]",   "v[24:26]",
-    "v[25:27]",   "v[26:28]",   "v[27:29]",   "v[28:30]",   "v[29:31]",
-    "v[30:32]",   "v[31:33]",   "v[32:34]",   "v[33:35]",   "v[34:36]",
-    "v[35:37]",   "v[36:38]",   "v[37:39]",   "v[38:40]",   "v[39:41]",
-    "v[40:42]",   "v[41:43]",   "v[42:44]",   "v[43:45]",   "v[44:46]",
-    "v[45:47]",   "v[46:48]",   "v[47:49]",   "v[48:50]",   "v[49:51]",
-    "v[50:52]",   "v[51:53]",   "v[52:54]",   "v[53:55]",   "v[54:56]",
-    "v[55:57]",   "v[56:58]",   "v[57:59]",   "v[58:60]",   "v[59:61]",
-    "v[60:62]",   "v[61:63]",   "v[62:64]",   "v[63:65]",   "v[64:66]",
-    "v[65:67]",   "v[66:68]",   "v[67:69]",   "v[68:70]",   "v[69:71]",
-    "v[70:72]",   "v[71:73]",   "v[72:74]",   "v[73:75]",   "v[74:76]",
-    "v[75:77]",   "v[76:78]",   "v[77:79]",   "v[78:80]",   "v[79:81]",
-    "v[80:82]",   "v[81:83]",   "v[82:84]",   "v[83:85]",   "v[84:86]",
-    "v[85:87]",   "v[86:88]",   "v[87:89]",   "v[88:90]",   "v[89:91]",
-    "v[90:92]",   "v[91:93]",   "v[92:94]",   "v[93:95]",   "v[94:96]",
-    "v[95:97]",   "v[96:98]",   "v[97:99]",   "v[98:100]",  "v[99:101]",
-    "v[100:102]", "v[101:103]", "v[102:104]", "v[103:105]", "v[104:106]",
-    "v[105:107]", "v[106:108]", "v[107:109]", "v[108:110]", "v[109:111]",
-    "v[110:112]", "v[111:113]", "v[112:114]", "v[113:115]", "v[114:116]",
-    "v[115:117]", "v[116:118]", "v[117:119]", "v[118:120]", "v[119:121]",
-    "v[120:122]", "v[121:123]", "v[122:124]", "v[123:125]", "v[124:126]",
-    "v[125:127]", "v[126:128]", "v[127:129]", "v[128:130]", "v[129:131]",
-    "v[130:132]", "v[131:133]", "v[132:134]", "v[133:135]", "v[134:136]",
-    "v[135:137]", "v[136:138]", "v[137:139]", "v[138:140]", "v[139:141]",
-    "v[140:142]", "v[141:143]", "v[142:144]", "v[143:145]", "v[144:146]",
-    "v[145:147]", "v[146:148]", "v[147:149]", "v[148:150]", "v[149:151]",
-    "v[150:152]", "v[151:153]", "v[152:154]", "v[153:155]", "v[154:156]",
-    "v[155:157]", "v[156:158]", "v[157:159]", "v[158:160]", "v[159:161]",
-    "v[160:162]", "v[161:163]", "v[162:164]", "v[163:165]", "v[164:166]",
-    "v[165:167]", "v[166:168]", "v[167:169]", "v[168:170]", "v[169:171]",
-    "v[170:172]", "v[171:173]", "v[172:174]", "v[173:175]", "v[174:176]",
-    "v[175:177]", "v[176:178]", "v[177:179]", "v[178:180]", "v[179:181]",
-    "v[180:182]", "v[181:183]", "v[182:184]", "v[183:185]", "v[184:186]",
-    "v[185:187]", "v[186:188]", "v[187:189]", "v[188:190]", "v[189:191]",
-    "v[190:192]", "v[191:193]", "v[192:194]", "v[193:195]", "v[194:196]",
-    "v[195:197]", "v[196:198]", "v[197:199]", "v[198:200]", "v[199:201]",
-    "v[200:202]", "v[201:203]", "v[202:204]", "v[203:205]", "v[204:206]",
-    "v[205:207]", "v[206:208]", "v[207:209]", "v[208:210]", "v[209:211]",
-    "v[210:212]", "v[211:213]", "v[212:214]", "v[213:215]", "v[214:216]",
-    "v[215:217]", "v[216:218]", "v[217:219]", "v[218:220]", "v[219:221]",
-    "v[220:222]", "v[221:223]", "v[222:224]", "v[223:225]", "v[224:226]",
-    "v[225:227]", "v[226:228]", "v[227:229]", "v[228:230]", "v[229:231]",
-    "v[230:232]", "v[231:233]", "v[232:234]", "v[233:235]", "v[234:236]",
-    "v[235:237]", "v[236:238]", "v[237:239]", "v[238:240]", "v[239:241]",
-    "v[240:242]", "v[241:243]", "v[242:244]", "v[243:245]", "v[244:246]",
-    "v[245:247]", "v[246:248]", "v[247:249]", "v[248:250]", "v[249:251]",
-    "v[250:252]", "v[251:253]", "v[252:254]", "v[253:255]"
-};
-
-static const char *const VGPR128RegNames[] = {
-    "v[0:3]",     "v[1:4]",     "v[2:5]",     "v[3:6]",     "v[4:7]",
-    "v[5:8]",     "v[6:9]",     "v[7:10]",    "v[8:11]",    "v[9:12]",
-    "v[10:13]",   "v[11:14]",   "v[12:15]",   "v[13:16]",   "v[14:17]",
-    "v[15:18]",   "v[16:19]",   "v[17:20]",   "v[18:21]",   "v[19:22]",
-    "v[20:23]",   "v[21:24]",   "v[22:25]",   "v[23:26]",   "v[24:27]",
-    "v[25:28]",   "v[26:29]",   "v[27:30]",   "v[28:31]",   "v[29:32]",
-    "v[30:33]",   "v[31:34]",   "v[32:35]",   "v[33:36]",   "v[34:37]",
-    "v[35:38]",   "v[36:39]",   "v[37:40]",   "v[38:41]",   "v[39:42]",
-    "v[40:43]",   "v[41:44]",   "v[42:45]",   "v[43:46]",   "v[44:47]",
-    "v[45:48]",   "v[46:49]",   "v[47:50]",   "v[48:51]",   "v[49:52]",
-    "v[50:53]",   "v[51:54]",   "v[52:55]",   "v[53:56]",   "v[54:57]",
-    "v[55:58]",   "v[56:59]",   "v[57:60]",   "v[58:61]",   "v[59:62]",
-    "v[60:63]",   "v[61:64]",   "v[62:65]",   "v[63:66]",   "v[64:67]",
-    "v[65:68]",   "v[66:69]",   "v[67:70]",   "v[68:71]",   "v[69:72]",
-    "v[70:73]",   "v[71:74]",   "v[72:75]",   "v[73:76]",   "v[74:77]",
-    "v[75:78]",   "v[76:79]",   "v[77:80]",   "v[78:81]",   "v[79:82]",
-    "v[80:83]",   "v[81:84]",   "v[82:85]",   "v[83:86]",   "v[84:87]",
-    "v[85:88]",   "v[86:89]",   "v[87:90]",   "v[88:91]",   "v[89:92]",
-    "v[90:93]",   "v[91:94]",   "v[92:95]",   "v[93:96]",   "v[94:97]",
-    "v[95:98]",   "v[96:99]",   "v[97:100]",  "v[98:101]",  "v[99:102]",
-    "v[100:103]", "v[101:104]", "v[102:105]", "v[103:106]", "v[104:107]",
-    "v[105:108]", "v[106:109]", "v[107:110]", "v[108:111]", "v[109:112]",
-    "v[110:113]", "v[111:114]", "v[112:115]", "v[113:116]", "v[114:117]",
-    "v[115:118]", "v[116:119]", "v[117:120]", "v[118:121]", "v[119:122]",
-    "v[120:123]", "v[121:124]", "v[122:125]", "v[123:126]", "v[124:127]",
-    "v[125:128]", "v[126:129]", "v[127:130]", "v[128:131]", "v[129:132]",
-    "v[130:133]", "v[131:134]", "v[132:135]", "v[133:136]", "v[134:137]",
-    "v[135:138]", "v[136:139]", "v[137:140]", "v[138:141]", "v[139:142]",
-    "v[140:143]", "v[141:144]", "v[142:145]", "v[143:146]", "v[144:147]",
-    "v[145:148]", "v[146:149]", "v[147:150]", "v[148:151]", "v[149:152]",
-    "v[150:153]", "v[151:154]", "v[152:155]", "v[153:156]", "v[154:157]",
-    "v[155:158]", "v[156:159]", "v[157:160]", "v[158:161]", "v[159:162]",
-    "v[160:163]", "v[161:164]", "v[162:165]", "v[163:166]", "v[164:167]",
-    "v[165:168]", "v[166:169]", "v[167:170]", "v[168:171]", "v[169:172]",
-    "v[170:173]", "v[171:174]", "v[172:175]", "v[173:176]", "v[174:177]",
-    "v[175:178]", "v[176:179]", "v[177:180]", "v[178:181]", "v[179:182]",
-    "v[180:183]", "v[181:184]", "v[182:185]", "v[183:186]", "v[184:187]",
-    "v[185:188]", "v[186:189]", "v[187:190]", "v[188:191]", "v[189:192]",
-    "v[190:193]", "v[191:194]", "v[192:195]", "v[193:196]", "v[194:197]",
-    "v[195:198]", "v[196:199]", "v[197:200]", "v[198:201]", "v[199:202]",
-    "v[200:203]", "v[201:204]", "v[202:205]", "v[203:206]", "v[204:207]",
-    "v[205:208]", "v[206:209]", "v[207:210]", "v[208:211]", "v[209:212]",
-    "v[210:213]", "v[211:214]", "v[212:215]", "v[213:216]", "v[214:217]",
-    "v[215:218]", "v[216:219]", "v[217:220]", "v[218:221]", "v[219:222]",
-    "v[220:223]", "v[221:224]", "v[222:225]", "v[223:226]", "v[224:227]",
-    "v[225:228]", "v[226:229]", "v[227:230]", "v[228:231]", "v[229:232]",
-    "v[230:233]", "v[231:234]", "v[232:235]", "v[233:236]", "v[234:237]",
-    "v[235:238]", "v[236:239]", "v[237:240]", "v[238:241]", "v[239:242]",
-    "v[240:243]", "v[241:244]", "v[242:245]", "v[243:246]", "v[244:247]",
-    "v[245:248]", "v[246:249]", "v[247:250]", "v[248:251]", "v[249:252]",
-    "v[250:253]", "v[251:254]", "v[252:255]"
-};
-
-static const char *const VGPR256RegNames[] = {
-    "v[0:7]",     "v[1:8]",     "v[2:9]",     "v[3:10]",    "v[4:11]",
-    "v[5:12]",    "v[6:13]",    "v[7:14]",    "v[8:15]",    "v[9:16]",
-    "v[10:17]",   "v[11:18]",   "v[12:19]",   "v[13:20]",   "v[14:21]",
-    "v[15:22]",   "v[16:23]",   "v[17:24]",   "v[18:25]",   "v[19:26]",
-    "v[20:27]",   "v[21:28]",   "v[22:29]",   "v[23:30]",   "v[24:31]",
-    "v[25:32]",   "v[26:33]",   "v[27:34]",   "v[28:35]",   "v[29:36]",
-    "v[30:37]",   "v[31:38]",   "v[32:39]",   "v[33:40]",   "v[34:41]",
-    "v[35:42]",   "v[36:43]",   "v[37:44]",   "v[38:45]",   "v[39:46]",
-    "v[40:47]",   "v[41:48]",   "v[42:49]",   "v[43:50]",   "v[44:51]",
-    "v[45:52]",   "v[46:53]",   "v[47:54]",   "v[48:55]",   "v[49:56]",
-    "v[50:57]",   "v[51:58]",   "v[52:59]",   "v[53:60]",   "v[54:61]",
-    "v[55:62]",   "v[56:63]",   "v[57:64]",   "v[58:65]",   "v[59:66]",
-    "v[60:67]",   "v[61:68]",   "v[62:69]",   "v[63:70]",   "v[64:71]",
-    "v[65:72]",   "v[66:73]",   "v[67:74]",   "v[68:75]",   "v[69:76]",
-    "v[70:77]",   "v[71:78]",   "v[72:79]",   "v[73:80]",   "v[74:81]",
-    "v[75:82]",   "v[76:83]",   "v[77:84]",   "v[78:85]",   "v[79:86]",
-    "v[80:87]",   "v[81:88]",   "v[82:89]",   "v[83:90]",   "v[84:91]",
-    "v[85:92]",   "v[86:93]",   "v[87:94]",   "v[88:95]",   "v[89:96]",
-    "v[90:97]",   "v[91:98]",   "v[92:99]",   "v[93:100]",  "v[94:101]",
-    "v[95:102]",  "v[96:103]",  "v[97:104]",  "v[98:105]",  "v[99:106]",
-    "v[100:107]", "v[101:108]", "v[102:109]", "v[103:110]", "v[104:111]",
-    "v[105:112]", "v[106:113]", "v[107:114]", "v[108:115]", "v[109:116]",
-    "v[110:117]", "v[111:118]", "v[112:119]", "v[113:120]", "v[114:121]",
-    "v[115:122]", "v[116:123]", "v[117:124]", "v[118:125]", "v[119:126]",
-    "v[120:127]", "v[121:128]", "v[122:129]", "v[123:130]", "v[124:131]",
-    "v[125:132]", "v[126:133]", "v[127:134]", "v[128:135]", "v[129:136]",
-    "v[130:137]", "v[131:138]", "v[132:139]", "v[133:140]", "v[134:141]",
-    "v[135:142]", "v[136:143]", "v[137:144]", "v[138:145]", "v[139:146]",
-    "v[140:147]", "v[141:148]", "v[142:149]", "v[143:150]", "v[144:151]",
-    "v[145:152]", "v[146:153]", "v[147:154]", "v[148:155]", "v[149:156]",
-    "v[150:157]", "v[151:158]", "v[152:159]", "v[153:160]", "v[154:161]",
-    "v[155:162]", "v[156:163]", "v[157:164]", "v[158:165]", "v[159:166]",
-    "v[160:167]", "v[161:168]", "v[162:169]", "v[163:170]", "v[164:171]",
-    "v[165:172]", "v[166:173]", "v[167:174]", "v[168:175]", "v[169:176]",
-    "v[170:177]", "v[171:178]", "v[172:179]", "v[173:180]", "v[174:181]",
-    "v[175:182]", "v[176:183]", "v[177:184]", "v[178:185]", "v[179:186]",
-    "v[180:187]", "v[181:188]", "v[182:189]", "v[183:190]", "v[184:191]",
-    "v[185:192]", "v[186:193]", "v[187:194]", "v[188:195]", "v[189:196]",
-    "v[190:197]", "v[191:198]", "v[192:199]", "v[193:200]", "v[194:201]",
-    "v[195:202]", "v[196:203]", "v[197:204]", "v[198:205]", "v[199:206]",
-    "v[200:207]", "v[201:208]", "v[202:209]", "v[203:210]", "v[204:211]",
-    "v[205:212]", "v[206:213]", "v[207:214]", "v[208:215]", "v[209:216]",
-    "v[210:217]", "v[211:218]", "v[212:219]", "v[213:220]", "v[214:221]",
-    "v[215:222]", "v[216:223]", "v[217:224]", "v[218:225]", "v[219:226]",
-    "v[220:227]", "v[221:228]", "v[222:229]", "v[223:230]", "v[224:231]",
-    "v[225:232]", "v[226:233]", "v[227:234]", "v[228:235]", "v[229:236]",
-    "v[230:237]", "v[231:238]", "v[232:239]", "v[233:240]", "v[234:241]",
-    "v[235:242]", "v[236:243]", "v[237:244]", "v[238:245]", "v[239:246]",
-    "v[240:247]", "v[241:248]", "v[242:249]", "v[243:250]", "v[244:251]",
-    "v[245:252]", "v[246:253]", "v[247:254]", "v[248:255]"
-};
-
-static const char *const VGPR512RegNames[] = {
-    "v[0:15]",    "v[1:16]",    "v[2:17]",    "v[3:18]",    "v[4:19]",
-    "v[5:20]",    "v[6:21]",    "v[7:22]",    "v[8:23]",    "v[9:24]",
-    "v[10:25]",   "v[11:26]",   "v[12:27]",   "v[13:28]",   "v[14:29]",
-    "v[15:30]",   "v[16:31]",   "v[17:32]",   "v[18:33]",   "v[19:34]",
-    "v[20:35]",   "v[21:36]",   "v[22:37]",   "v[23:38]",   "v[24:39]",
-    "v[25:40]",   "v[26:41]",   "v[27:42]",   "v[28:43]",   "v[29:44]",
-    "v[30:45]",   "v[31:46]",   "v[32:47]",   "v[33:48]",   "v[34:49]",
-    "v[35:50]",   "v[36:51]",   "v[37:52]",   "v[38:53]",   "v[39:54]",
-    "v[40:55]",   "v[41:56]",   "v[42:57]",   "v[43:58]",   "v[44:59]",
-    "v[45:60]",   "v[46:61]",   "v[47:62]",   "v[48:63]",   "v[49:64]",
-    "v[50:65]",   "v[51:66]",   "v[52:67]",   "v[53:68]",   "v[54:69]",
-    "v[55:70]",   "v[56:71]",   "v[57:72]",   "v[58:73]",   "v[59:74]",
-    "v[60:75]",   "v[61:76]",   "v[62:77]",   "v[63:78]",   "v[64:79]",
-    "v[65:80]",   "v[66:81]",   "v[67:82]",   "v[68:83]",   "v[69:84]",
-    "v[70:85]",   "v[71:86]",   "v[72:87]",   "v[73:88]",   "v[74:89]",
-    "v[75:90]",   "v[76:91]",   "v[77:92]",   "v[78:93]",   "v[79:94]",
-    "v[80:95]",   "v[81:96]",   "v[82:97]",   "v[83:98]",   "v[84:99]",
-    "v[85:100]",  "v[86:101]",  "v[87:102]",  "v[88:103]",  "v[89:104]",
-    "v[90:105]",  "v[91:106]",  "v[92:107]",  "v[93:108]",  "v[94:109]",
-    "v[95:110]",  "v[96:111]",  "v[97:112]",  "v[98:113]",  "v[99:114]",
-    "v[100:115]", "v[101:116]", "v[102:117]", "v[103:118]", "v[104:119]",
-    "v[105:120]", "v[106:121]", "v[107:122]", "v[108:123]", "v[109:124]",
-    "v[110:125]", "v[111:126]", "v[112:127]", "v[113:128]", "v[114:129]",
-    "v[115:130]", "v[116:131]", "v[117:132]", "v[118:133]", "v[119:134]",
-    "v[120:135]", "v[121:136]", "v[122:137]", "v[123:138]", "v[124:139]",
-    "v[125:140]", "v[126:141]", "v[127:142]", "v[128:143]", "v[129:144]",
-    "v[130:145]", "v[131:146]", "v[132:147]", "v[133:148]", "v[134:149]",
-    "v[135:150]", "v[136:151]", "v[137:152]", "v[138:153]", "v[139:154]",
-    "v[140:155]", "v[141:156]", "v[142:157]", "v[143:158]", "v[144:159]",
-    "v[145:160]", "v[146:161]", "v[147:162]", "v[148:163]", "v[149:164]",
-    "v[150:165]", "v[151:166]", "v[152:167]", "v[153:168]", "v[154:169]",
-    "v[155:170]", "v[156:171]", "v[157:172]", "v[158:173]", "v[159:174]",
-    "v[160:175]", "v[161:176]", "v[162:177]", "v[163:178]", "v[164:179]",
-    "v[165:180]", "v[166:181]", "v[167:182]", "v[168:183]", "v[169:184]",
-    "v[170:185]", "v[171:186]", "v[172:187]", "v[173:188]", "v[174:189]",
-    "v[175:190]", "v[176:191]", "v[177:192]", "v[178:193]", "v[179:194]",
-    "v[180:195]", "v[181:196]", "v[182:197]", "v[183:198]", "v[184:199]",
-    "v[185:200]", "v[186:201]", "v[187:202]", "v[188:203]", "v[189:204]",
-    "v[190:205]", "v[191:206]", "v[192:207]", "v[193:208]", "v[194:209]",
-    "v[195:210]", "v[196:211]", "v[197:212]", "v[198:213]", "v[199:214]",
-    "v[200:215]", "v[201:216]", "v[202:217]", "v[203:218]", "v[204:219]",
-    "v[205:220]", "v[206:221]", "v[207:222]", "v[208:223]", "v[209:224]",
-    "v[210:225]", "v[211:226]", "v[212:227]", "v[213:228]", "v[214:229]",
-    "v[215:230]", "v[216:231]", "v[217:232]", "v[218:233]", "v[219:234]",
-    "v[220:235]", "v[221:236]", "v[222:237]", "v[223:238]", "v[224:239]",
-    "v[225:240]", "v[226:241]", "v[227:242]", "v[228:243]", "v[229:244]",
-    "v[230:245]", "v[231:246]", "v[232:247]", "v[233:248]", "v[234:249]",
-    "v[235:250]", "v[236:251]", "v[237:252]", "v[238:253]", "v[239:254]",
-    "v[240:255]"
-};
-
-static const char *const SGPR64RegNames[] = {
-    "s[0:1]",   "s[2:3]",   "s[4:5]",     "s[6:7]",     "s[8:9]",   "s[10:11]",
-    "s[12:13]", "s[14:15]", "s[16:17]",   "s[18:19]",   "s[20:21]", "s[22:23]",
-    "s[24:25]", "s[26:27]", "s[28:29]",   "s[30:31]",   "s[32:33]", "s[34:35]",
-    "s[36:37]", "s[38:39]", "s[40:41]",   "s[42:43]",   "s[44:45]", "s[46:47]",
-    "s[48:49]", "s[50:51]", "s[52:53]",   "s[54:55]",   "s[56:57]", "s[58:59]",
-    "s[60:61]", "s[62:63]", "s[64:65]",   "s[66:67]",   "s[68:69]", "s[70:71]",
-    "s[72:73]", "s[74:75]", "s[76:77]",   "s[78:79]",   "s[80:81]", "s[82:83]",
-    "s[84:85]", "s[86:87]", "s[88:89]",   "s[90:91]",   "s[92:93]", "s[94:95]",
-    "s[96:97]", "s[98:99]", "s[100:101]", "s[102:103]"
-};
-
-static const char *const SGPR128RegNames[] = {
-    "s[0:3]",   "s[4:7]",     "s[8:11]",  "s[12:15]", "s[16:19]", "s[20:23]",
-    "s[24:27]", "s[28:31]",   "s[32:35]", "s[36:39]", "s[40:43]", "s[44:47]",
-    "s[48:51]", "s[52:55]",   "s[56:59]", "s[60:63]", "s[64:67]", "s[68:71]",
-    "s[72:75]", "s[76:79]",   "s[80:83]", "s[84:87]", "s[88:91]", "s[92:95]",
-    "s[96:99]", "s[100:103]"
-};
-
-static const char *const SGPR256RegNames[] = {
-    "s[0:7]",   "s[4:11]",  "s[8:15]",  "s[12:19]", "s[16:23]",
-    "s[20:27]", "s[24:31]", "s[28:35]", "s[32:39]", "s[36:43]",
-    "s[40:47]", "s[44:51]", "s[48:55]", "s[52:59]", "s[56:63]",
-    "s[60:67]", "s[64:71]", "s[68:75]", "s[72:79]", "s[76:83]",
-    "s[80:87]", "s[84:91]", "s[88:95]", "s[92:99]", "s[96:103]"
-};
-
-static const char *const SGPR512RegNames[] = {
-    "s[0:15]",  "s[4:19]",  "s[8:23]",  "s[12:27]", "s[16:31]",  "s[20:35]",
-    "s[24:39]", "s[28:43]", "s[32:47]", "s[36:51]", "s[40:55]",  "s[44:59]",
-    "s[48:63]", "s[52:67]", "s[56:71]", "s[60:75]", "s[64:79]",  "s[68:83]",
-    "s[72:87]", "s[76:91]", "s[80:95]", "s[84:99]", "s[88:103]"
-};
-
-#endif
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 7a760dcf7a90..815cbc5e26ee 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPURegisterBankInfo.cpp -------------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -14,9 +13,13 @@
 
 #include "AMDGPURegisterBankInfo.h"
 #include "AMDGPUInstrInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIMachineFunctionInfo.h"
 #include "SIRegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -31,6 +34,56 @@
 
 using namespace llvm;
 
+namespace {
+
+// Observer to apply a register bank to new registers created by LegalizerHelper.
+class ApplyRegBankMapping final : public GISelChangeObserver {
+private:
+  MachineRegisterInfo &MRI;
+  const RegisterBank *NewBank;
+  SmallVector<MachineInstr *, 4> NewInsts;
+
+public:
+  ApplyRegBankMapping(MachineRegisterInfo &MRI_, const RegisterBank *RB)
+    : MRI(MRI_), NewBank(RB) {}
+
+  ~ApplyRegBankMapping() {
+    for (MachineInstr *MI : NewInsts)
+      applyBank(*MI);
+  }
+
+  /// Set any registers that don't have a set register class or bank to SALU.
+  void applyBank(MachineInstr &MI) {
+    for (MachineOperand &Op : MI.operands()) {
+      if (!Op.isReg())
+        continue;
+
+      Register Reg = Op.getReg();
+      if (MRI.getRegClassOrRegBank(Reg))
+        continue;
+
+      const RegisterBank *RB = NewBank;
+      // FIXME: This might not be enough to detect when SCC should be used.
+      if (MRI.getType(Reg) == LLT::scalar(1))
+        RB = (NewBank == &AMDGPU::SGPRRegBank ?
+              &AMDGPU::SCCRegBank : &AMDGPU::VCCRegBank);
+
+      MRI.setRegBank(Reg, *RB);
+    }
+  }
+
+  void erasingInstr(MachineInstr &MI) override {}
+
+  void createdInstr(MachineInstr &MI) override {
+    // At this point, the instruction was just inserted and has no operands.
+    NewInsts.push_back(&MI);
+  }
+
+  void changingInstr(MachineInstr &MI) override {}
+  void changedInstr(MachineInstr &MI) override {}
+};
+
+}
 AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
     : AMDGPUGenRegisterBankInfo(),
       TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
@@ -52,43 +105,62 @@ AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
 
 }
 
-static bool isConstant(const MachineOperand &MO, int64_t &C) {
-  const MachineFunction *MF = MO.getParent()->getParent()->getParent();
-  const MachineRegisterInfo &MRI = MF->getRegInfo();
-  const MachineInstr *Def = MRI.getVRegDef(MO.getReg());
-  if (!Def)
-    return false;
-
-  if (Def->getOpcode() == AMDGPU::G_CONSTANT) {
-    C = Def->getOperand(1).getCImm()->getSExtValue();
-    return true;
-  }
-
-  if (Def->getOpcode() == AMDGPU::COPY)
-    return isConstant(Def->getOperand(1), C);
-
-  return false;
-}
-
 unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
                                           const RegisterBank &Src,
                                           unsigned Size) const {
+  // TODO: Should there be a UniformVGPRRegBank which can use readfirstlane?
   if (Dst.getID() == AMDGPU::SGPRRegBankID &&
       Src.getID() == AMDGPU::VGPRRegBankID) {
     return std::numeric_limits<unsigned>::max();
   }
 
-  // SGPRRegBank with size 1 is actually vcc or another 64-bit sgpr written by
-  // the valu.
-  if (Size == 1 && Dst.getID() == AMDGPU::SCCRegBankID &&
+  // Bool values are tricky, because the meaning is based on context. The SCC
+  // and VCC banks are for the natural scalar and vector conditions produced by
+  // a compare.
+  //
+  // Legalization doesn't know about the necessary context, so an s1 use may
+  // have been a truncate from an arbitrary value, in which case a copy (lowered
+  // as a compare with 0) needs to be inserted.
+  if (Size == 1 &&
+      (Dst.getID() == AMDGPU::SCCRegBankID ||
+       Dst.getID() == AMDGPU::SGPRRegBankID) &&
       (Src.getID() == AMDGPU::SGPRRegBankID ||
        Src.getID() == AMDGPU::VGPRRegBankID ||
        Src.getID() == AMDGPU::VCCRegBankID))
     return std::numeric_limits<unsigned>::max();
 
+  if (Dst.getID() == AMDGPU::SCCRegBankID &&
+      Src.getID() == AMDGPU::VCCRegBankID)
+    return std::numeric_limits<unsigned>::max();
+
   return RegisterBankInfo::copyCost(Dst, Src, Size);
 }
 
+unsigned AMDGPURegisterBankInfo::getBreakDownCost(
+  const ValueMapping &ValMapping,
+  const RegisterBank *CurBank) const {
+  // Check if this is a breakdown for G_LOAD to move the pointer from SGPR to
+  // VGPR.
+  // FIXME: Is there a better way to do this?
+  if (ValMapping.NumBreakDowns >= 2 || ValMapping.BreakDown[0].Length >= 64)
+    return 10; // This is expensive.
+
+  assert(ValMapping.NumBreakDowns == 2 &&
+         ValMapping.BreakDown[0].Length == 32 &&
+         ValMapping.BreakDown[0].StartIdx == 0 &&
+         ValMapping.BreakDown[1].Length == 32 &&
+         ValMapping.BreakDown[1].StartIdx == 32 &&
+         ValMapping.BreakDown[0].RegBank == ValMapping.BreakDown[1].RegBank);
+
+  // 32-bit extract of a 64-bit value is just access of a subregister, so free.
+  // TODO: Cost of 0 hits assert, though it's not clear it's what we really
+  // want.
+
+  // TODO: 32-bit insert to a 64-bit SGPR may incur a non-free copy due to SGPR
+  // alignment restrictions, but this probably isn't important.
+  return 1;
+}
+
 const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass(
     const TargetRegisterClass &RC) const {
 
@@ -98,6 +170,163 @@ const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass(
   return getRegBank(AMDGPU::VGPRRegBankID);
 }
 
+template <unsigned NumOps>
+RegisterBankInfo::InstructionMappings
+AMDGPURegisterBankInfo::addMappingFromTable(
+    const MachineInstr &MI, const MachineRegisterInfo &MRI,
+    const std::array<unsigned, NumOps> RegSrcOpIdx,
+    ArrayRef<OpRegBankEntry<NumOps>> Table) const {
+
+  InstructionMappings AltMappings;
+
+  SmallVector<const ValueMapping *, 10> Operands(MI.getNumOperands());
+
+  unsigned Sizes[NumOps];
+  for (unsigned I = 0; I < NumOps; ++I) {
+    Register Reg = MI.getOperand(RegSrcOpIdx[I]).getReg();
+    Sizes[I] = getSizeInBits(Reg, MRI, *TRI);
+  }
+
+  for (unsigned I = 0, E = MI.getNumExplicitDefs(); I != E; ++I) {
+    unsigned SizeI = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
+    Operands[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
+  }
+
+  unsigned MappingID = 0;
+  for (const auto &Entry : Table) {
+    for (unsigned I = 0; I < NumOps; ++I) {
+      int OpIdx = RegSrcOpIdx[I];
+      Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[I], Sizes[I]);
+    }
+
+    AltMappings.push_back(&getInstructionMapping(MappingID++, Entry.Cost,
+                                                 getOperandsMapping(Operands),
+                                                 Operands.size()));
+  }
+
+  return AltMappings;
+}
+
+RegisterBankInfo::InstructionMappings
+AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsic(
+    const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
+  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
+  case Intrinsic::amdgcn_readlane: {
+    static const OpRegBankEntry<3> Table[2] = {
+      // Perfectly legal.
+      { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
+
+      // Need a readfirstlane for the index.
+      { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
+    };
+
+    const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
+    return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
+  }
+  case Intrinsic::amdgcn_writelane: {
+    static const OpRegBankEntry<4> Table[4] = {
+      // Perfectly legal.
+      { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
+
+      // Need readfirstlane of first op
+      { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
+
+      // Need readfirstlane of second op
+      { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
+
+      // Need readfirstlane of both ops
+      { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 }
+    };
+
+    // rsrc, voffset, offset
+    const std::array<unsigned, 4> RegSrcOpIdx = { { 0, 2, 3, 4 } };
+    return addMappingFromTable<4>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
+  }
+  default:
+    return RegisterBankInfo::getInstrAlternativeMappings(MI);
+  }
+}
+
+RegisterBankInfo::InstructionMappings
+AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
+    const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
+
+  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
+  case Intrinsic::amdgcn_buffer_load: {
+    static const OpRegBankEntry<3> Table[4] = {
+      // Perfectly legal.
+      { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
+      { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
+
+      // Waterfall loop needed for rsrc. In the worst case this will execute
+      // approximately an extra 10 * wavesize + 2 instructions.
+      { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
+      { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1000 }
+    };
+
+    // rsrc, voffset, offset
+    const std::array<unsigned, 3> RegSrcOpIdx = { { 2, 3, 4 } };
+    return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
+  }
+  case Intrinsic::amdgcn_s_buffer_load: {
+    static const OpRegBankEntry<2> Table[4] = {
+      // Perfectly legal.
+      { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
+
+      // Only need 1 register in loop
+      { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 300 },
+
+      // Have to waterfall the resource.
+      { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
+
+      // Have to waterfall the resource, and the offset.
+      { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1500 }
+    };
+
+    // rsrc, offset
+    const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };
+    return addMappingFromTable<2>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
+  }
+  case Intrinsic::amdgcn_ds_ordered_add:
+  case Intrinsic::amdgcn_ds_ordered_swap: {
+    // VGPR = M0, VGPR
+    static const OpRegBankEntry<3> Table[2] = {
+      // Perfectly legal.
+      { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID  }, 1 },
+
+      // Need a readfirstlane for m0
+      { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
+    };
+
+    const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
+    return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
+  }
+  case Intrinsic::amdgcn_s_sendmsg:
+  case Intrinsic::amdgcn_s_sendmsghalt: {
+    static const OpRegBankEntry<1> Table[2] = {
+      // Perfectly legal.
+      { { AMDGPU::SGPRRegBankID }, 1 },
+
+      // Need readlane
+      { { AMDGPU::VGPRRegBankID }, 3 }
+    };
+
+    const std::array<unsigned, 1> RegSrcOpIdx = { { 2 } };
+    return addMappingFromTable<1>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
+  }
+  default:
+    return RegisterBankInfo::getInstrAlternativeMappings(MI);
+  }
+}
+
+static bool isInstrUniform(const MachineInstr &MI) {
+  if (!MI.hasOneMemOperand())
+    return false;
+
+  const MachineMemOperand *MMO = *MI.memoperands_begin();
+  return AMDGPUInstrInfo::isUniformMMO(MMO);
+}
+
 RegisterBankInfo::InstructionMappings
 AMDGPURegisterBankInfo::getInstrAlternativeMappings(
     const MachineInstr &MI) const {
@@ -108,31 +337,102 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
 
   InstructionMappings AltMappings;
   switch (MI.getOpcode()) {
-  case TargetOpcode::G_LOAD: {
+  case TargetOpcode::G_AND:
+  case TargetOpcode::G_OR:
+  case TargetOpcode::G_XOR: {
     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
-    // FIXME: Should we be hard coding the size for these mappings?
-    const InstructionMapping &SSMapping = getInstructionMapping(
+
+    if (Size == 1) {
+      // s_{and|or|xor}_b32 set scc when the result of the 32-bit op is not 0.
+      const InstructionMapping &SCCMapping = getInstructionMapping(
         1, 1, getOperandsMapping(
-                  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
-                   AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
-        2); // Num Operands
+          {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, Size),
+           AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+           AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
+        3); // Num Operands
+      AltMappings.push_back(&SCCMapping);
+
+      const InstructionMapping &SGPRMapping = getInstructionMapping(
+        1, 1, getOperandsMapping(
+          {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+           AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+           AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
+        3); // Num Operands
+      AltMappings.push_back(&SGPRMapping);
+
+      const InstructionMapping &VCCMapping0 = getInstructionMapping(
+        2, 10, getOperandsMapping(
+          {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),
+              AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),
+              AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size)}),
+        3); // Num Operands
+      AltMappings.push_back(&VCCMapping0);
+      return AltMappings;
+    }
+
+    if (Size != 64)
+      break;
+
+    const InstructionMapping &SSMapping = getInstructionMapping(
+      1, 1, getOperandsMapping(
+        {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+         AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+         AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
+      3); // Num Operands
     AltMappings.push_back(&SSMapping);
 
+    const InstructionMapping &VVMapping = getInstructionMapping(
+      2, 2, getOperandsMapping(
+        {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
+         AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
+         AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
+      3); // Num Operands
+    AltMappings.push_back(&VVMapping);
+
+    const InstructionMapping &SVMapping = getInstructionMapping(
+      3, 3, getOperandsMapping(
+        {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
+         AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size),
+         AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
+      3); // Num Operands
+    AltMappings.push_back(&SVMapping);
+
+    // SGPR in LHS is slightly preferrable, so make it VS more expensive than
+    // SV.
+    const InstructionMapping &VSMapping = getInstructionMapping(
+      3, 4, getOperandsMapping(
+        {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
+         AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
+         AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size)}),
+      3); // Num Operands
+    AltMappings.push_back(&VSMapping);
+    break;
+  }
+  case TargetOpcode::G_LOAD: {
+    unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+    LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
+    // FIXME: Should we be hard coding the size for these mappings?
+    if (isInstrUniform(MI)) {
+      const InstructionMapping &SSMapping = getInstructionMapping(
+          1, 1, getOperandsMapping(
+                    {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+                     AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
+          2); // Num Operands
+      AltMappings.push_back(&SSMapping);
+    }
+
     const InstructionMapping &VVMapping = getInstructionMapping(
         2, 1, getOperandsMapping(
-                  {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+                  {AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy),
                    AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}),
         2); // Num Operands
     AltMappings.push_back(&VVMapping);
 
-    // FIXME: Should this be the pointer-size (64-bits) or the size of the
-    // register that will hold the bufffer resourc (128-bits).
-    const InstructionMapping &VSMapping = getInstructionMapping(
-        3, 1, getOperandsMapping(
-                  {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
-                   AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
-        2); // Num Operands
-    AltMappings.push_back(&VSMapping);
+    // It may be possible to have a vgpr = load sgpr mapping here, because
+    // the mubuf instructions support this kind of load, but probably for only
+    // gfx7 and older.  However, the addressing mode matching in the instruction
+    // selector should be able to do a better job of detecting and selecting
+    // these kinds of loads from the vgpr = load vgpr mapping.
 
     return AltMappings;
 
@@ -184,15 +484,32 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
     AltMappings.push_back(&SSMapping);
 
     const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
-      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+      getOperandsMapping({AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
                           AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
-                          AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
-                          AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
+                          AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
+                          AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
       4); // Num Operands
     AltMappings.push_back(&VVMapping);
 
     return AltMappings;
   }
+  case TargetOpcode::G_SMIN:
+  case TargetOpcode::G_SMAX:
+  case TargetOpcode::G_UMIN:
+  case TargetOpcode::G_UMAX: {
+    static const OpRegBankEntry<3> Table[4] = {
+      { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
+      { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
+      { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
+
+      // Scalar requires cmp+select, and extends if 16-bit.
+      // FIXME: Should there be separate costs for 32 and 16-bit
+      { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 3 }
+    };
+
+    const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 1, 2 } };
+    return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
+  }
   case TargetOpcode::G_UADDE:
   case TargetOpcode::G_USUBE:
   case TargetOpcode::G_SADDE:
@@ -234,23 +551,816 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
     AltMappings.push_back(&VMapping);
     return AltMappings;
   }
+  case AMDGPU::G_INTRINSIC:
+    return getInstrAlternativeMappingsIntrinsic(MI, MRI);
+  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
+    return getInstrAlternativeMappingsIntrinsicWSideEffects(MI, MRI);
   default:
     break;
   }
   return RegisterBankInfo::getInstrAlternativeMappings(MI);
 }
 
-void AMDGPURegisterBankInfo::applyMappingImpl(
-    const OperandsMapper &OpdMapper) const {
-  return applyDefaultMapping(OpdMapper);
+void AMDGPURegisterBankInfo::split64BitValueForMapping(
+  MachineIRBuilder &B,
+  SmallVector<Register, 2> &Regs,
+  LLT HalfTy,
+  Register Reg) const {
+  assert(HalfTy.getSizeInBits() == 32);
+  MachineRegisterInfo *MRI = B.getMRI();
+  Register LoLHS = MRI->createGenericVirtualRegister(HalfTy);
+  Register HiLHS = MRI->createGenericVirtualRegister(HalfTy);
+  const RegisterBank *Bank = getRegBank(Reg, *MRI, *TRI);
+  MRI->setRegBank(LoLHS, *Bank);
+  MRI->setRegBank(HiLHS, *Bank);
+
+  Regs.push_back(LoLHS);
+  Regs.push_back(HiLHS);
+
+  B.buildInstr(AMDGPU::G_UNMERGE_VALUES)
+    .addDef(LoLHS)
+    .addDef(HiLHS)
+    .addUse(Reg);
 }
 
-static bool isInstrUniform(const MachineInstr &MI) {
-  if (!MI.hasOneMemOperand())
+/// Replace the current type each register in \p Regs has with \p NewTy
+static void setRegsToType(MachineRegisterInfo &MRI, ArrayRef<Register> Regs,
+                          LLT NewTy) {
+  for (Register Reg : Regs) {
+    assert(MRI.getType(Reg).getSizeInBits() == NewTy.getSizeInBits());
+    MRI.setType(Reg, NewTy);
+  }
+}
+
+static LLT getHalfSizedType(LLT Ty) {
+  if (Ty.isVector()) {
+    assert(Ty.getNumElements() % 2 == 0);
+    return LLT::scalarOrVector(Ty.getNumElements() / 2, Ty.getElementType());
+  }
+
+  assert(Ty.getSizeInBits() % 2 == 0);
+  return LLT::scalar(Ty.getSizeInBits() / 2);
+}
+
+/// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If
+/// any of the required SGPR operands are VGPRs, perform a waterfall loop to
+/// execute the instruction for each unique combination of values in all lanes
+/// in the wave. The block will be split such that rest of the instructions are
+/// moved to a new block.
+///
+/// Essentially performs this loop:
+//
+/// Save Execution Mask
+/// For (Lane : Wavefront) {
+///   Enable Lane, Disable all other lanes
+///   SGPR = read SGPR value for current lane from VGPR
+///   VGPRResult[Lane] = use_op SGPR
+/// }
+/// Restore Execution Mask
+///
+/// There is additional complexity to try for compare values to identify the
+/// unique values used.
+void AMDGPURegisterBankInfo::executeInWaterfallLoop(
+  MachineInstr &MI, MachineRegisterInfo &MRI,
+  ArrayRef<unsigned> OpIndices) const {
+  MachineFunction *MF = MI.getParent()->getParent();
+  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  MachineBasicBlock::iterator I(MI);
+
+  MachineBasicBlock &MBB = *MI.getParent();
+  const DebugLoc &DL = MI.getDebugLoc();
+
+  // Use a set to avoid extra readfirstlanes in the case where multiple operands
+  // are the same register.
+  SmallSet<Register, 4> SGPROperandRegs;
+  for (unsigned Op : OpIndices) {
+    assert(MI.getOperand(Op).isUse());
+    Register Reg = MI.getOperand(Op).getReg();
+    const RegisterBank *OpBank = getRegBank(Reg, MRI, *TRI);
+    if (OpBank->getID() == AMDGPU::VGPRRegBankID)
+      SGPROperandRegs.insert(Reg);
+  }
+
+  // No operands need to be replaced, so no need to loop.
+  if (SGPROperandRegs.empty())
+    return;
+
+  MachineIRBuilder B(MI);
+  SmallVector<Register, 4> ResultRegs;
+  SmallVector<Register, 4> InitResultRegs;
+  SmallVector<Register, 4> PhiRegs;
+  for (MachineOperand &Def : MI.defs()) {
+    LLT ResTy = MRI.getType(Def.getReg());
+    const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI);
+    ResultRegs.push_back(Def.getReg());
+    Register InitReg = B.buildUndef(ResTy).getReg(0);
+    Register PhiReg = MRI.createGenericVirtualRegister(ResTy);
+    InitResultRegs.push_back(InitReg);
+    PhiRegs.push_back(PhiReg);
+    MRI.setRegBank(PhiReg, *DefBank);
+    MRI.setRegBank(InitReg, *DefBank);
+  }
+
+  Register SaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+  Register InitSaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+
+  // Don't bother using generic instructions/registers for the exec mask.
+  B.buildInstr(TargetOpcode::IMPLICIT_DEF)
+    .addDef(InitSaveExecReg);
+
+  Register PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  Register NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+
+  // To insert the loop we need to split the block. Move everything before this
+  // point to a new block, and insert a new empty block before this instruction.
+  MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock();
+  MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
+  MachineBasicBlock *RestoreExecBB = MF->CreateMachineBasicBlock();
+  MachineFunction::iterator MBBI(MBB);
+  ++MBBI;
+  MF->insert(MBBI, LoopBB);
+  MF->insert(MBBI, RestoreExecBB);
+  MF->insert(MBBI, RemainderBB);
+
+  LoopBB->addSuccessor(RestoreExecBB);
+  LoopBB->addSuccessor(LoopBB);
+
+  // Move the rest of the block into a new block.
+  RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
+  RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
+
+  MBB.addSuccessor(LoopBB);
+  RestoreExecBB->addSuccessor(RemainderBB);
+
+  B.setInsertPt(*LoopBB, LoopBB->end());
+
+  B.buildInstr(TargetOpcode::PHI)
+    .addDef(PhiExec)
+    .addReg(InitSaveExecReg)
+    .addMBB(&MBB)
+    .addReg(NewExec)
+    .addMBB(LoopBB);
+
+  for (auto Result : zip(InitResultRegs, ResultRegs, PhiRegs)) {
+    B.buildInstr(TargetOpcode::G_PHI)
+      .addDef(std::get<2>(Result))
+      .addReg(std::get<0>(Result)) // Initial value / implicit_def
+      .addMBB(&MBB)
+      .addReg(std::get<1>(Result)) // Mid-loop value.
+      .addMBB(LoopBB);
+  }
+
+  // Move the instruction into the loop.
+  LoopBB->splice(LoopBB->end(), &MBB, I);
+  I = std::prev(LoopBB->end());
+
+  B.setInstr(*I);
+
+  Register CondReg;
+
+  for (MachineOperand &Op : MI.uses()) {
+    if (!Op.isReg())
+      continue;
+
+    assert(!Op.isDef());
+    if (SGPROperandRegs.count(Op.getReg())) {
+      LLT OpTy = MRI.getType(Op.getReg());
+      unsigned OpSize = OpTy.getSizeInBits();
+
+      // Can only do a readlane of 32-bit pieces.
+      if (OpSize == 32) {
+        // Avoid extra copies in the simple case of one 32-bit register.
+        Register CurrentLaneOpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+        MRI.setType(CurrentLaneOpReg, OpTy);
+
+        constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI);
+        // Read the next variant <- also loop target.
+        BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentLaneOpReg)
+          .addReg(Op.getReg());
+
+        Register NewCondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+        bool First = CondReg == AMDGPU::NoRegister;
+        if (First)
+          CondReg = NewCondReg;
+
+        // Compare the just read M0 value to all possible Idx values.
+        B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
+          .addDef(NewCondReg)
+          .addReg(CurrentLaneOpReg)
+          .addReg(Op.getReg());
+        Op.setReg(CurrentLaneOpReg);
+
+        if (!First) {
+          Register AndReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+
+          // If there are multiple operands to consider, and the conditions.
+          B.buildInstr(AMDGPU::S_AND_B64)
+            .addDef(AndReg)
+            .addReg(NewCondReg)
+            .addReg(CondReg);
+          CondReg = AndReg;
+        }
+      } else {
+        LLT S32 = LLT::scalar(32);
+        SmallVector<Register, 8> ReadlanePieces;
+
+        // The compares can be done as 64-bit, but the extract needs to be done
+        // in 32-bit pieces.
+
+        bool Is64 = OpSize % 64 == 0;
+
+        LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32);
+        unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64
+                                          : AMDGPU::V_CMP_EQ_U32_e64;
+
+        // The compares can be done as 64-bit, but the extract needs to be done
+        // in 32-bit pieces.
+
+        // Insert the unmerge before the loop.
+
+        B.setMBB(MBB);
+        auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg());
+        B.setInstr(*I);
+
+        unsigned NumPieces = Unmerge->getNumOperands() - 1;
+        for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) {
+          unsigned UnmergePiece = Unmerge.getReg(PieceIdx);
+
+          Register CurrentLaneOpReg;
+          if (Is64) {
+            Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32);
+            Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32);
+
+            MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass);
+            MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass);
+            MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass);
+
+            // Read the next variant <- also loop target.
+            BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+                    CurrentLaneOpRegLo)
+              .addReg(UnmergePiece, 0, AMDGPU::sub0);
+
+            // Read the next variant <- also loop target.
+            BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+                    CurrentLaneOpRegHi)
+              .addReg(UnmergePiece, 0, AMDGPU::sub1);
+
+            CurrentLaneOpReg =
+                B.buildMerge(LLT::scalar(64),
+                             {CurrentLaneOpRegLo, CurrentLaneOpRegHi})
+                    .getReg(0);
+
+            MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass);
+
+            if (OpTy.getScalarSizeInBits() == 64) {
+              // If we need to produce a 64-bit element vector, so use the
+              // merged pieces
+              ReadlanePieces.push_back(CurrentLaneOpReg);
+            } else {
+              // 32-bit element type.
+              ReadlanePieces.push_back(CurrentLaneOpRegLo);
+              ReadlanePieces.push_back(CurrentLaneOpRegHi);
+            }
+          } else {
+            CurrentLaneOpReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
+            MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass);
+            MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass);
+
+            // Read the next variant <- also loop target.
+            BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+                    CurrentLaneOpReg)
+              .addReg(UnmergePiece);
+            ReadlanePieces.push_back(CurrentLaneOpReg);
+          }
+
+          Register NewCondReg
+            = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+          bool First = CondReg == AMDGPU::NoRegister;
+          if (First)
+            CondReg = NewCondReg;
+
+          B.buildInstr(CmpOp)
+            .addDef(NewCondReg)
+            .addReg(CurrentLaneOpReg)
+            .addReg(UnmergePiece);
+
+          if (!First) {
+            Register AndReg
+              = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+
+            // If there are multiple operands to consider, and the conditions.
+            B.buildInstr(AMDGPU::S_AND_B64)
+              .addDef(AndReg)
+              .addReg(NewCondReg)
+              .addReg(CondReg);
+            CondReg = AndReg;
+          }
+        }
+
+        // FIXME: Build merge seems to switch to CONCAT_VECTORS but not
+        // BUILD_VECTOR
+        if (OpTy.isVector()) {
+          auto Merge = B.buildBuildVector(OpTy, ReadlanePieces);
+          Op.setReg(Merge.getReg(0));
+        } else {
+          auto Merge = B.buildMerge(OpTy, ReadlanePieces);
+          Op.setReg(Merge.getReg(0));
+        }
+
+        MRI.setRegBank(Op.getReg(), getRegBank(AMDGPU::SGPRRegBankID));
+      }
+    }
+  }
+
+  B.setInsertPt(*LoopBB, LoopBB->end());
+
+  // Update EXEC, save the original EXEC value to VCC.
+  B.buildInstr(AMDGPU::S_AND_SAVEEXEC_B64)
+    .addDef(NewExec)
+    .addReg(CondReg, RegState::Kill);
+
+  MRI.setSimpleHint(NewExec, CondReg);
+
+  // Update EXEC, switch all done bits to 0 and all todo bits to 1.
+  B.buildInstr(AMDGPU::S_XOR_B64_term)
+    .addDef(AMDGPU::EXEC)
+    .addReg(AMDGPU::EXEC)
+    .addReg(NewExec);
+
+  // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
+  // s_cbranch_scc0?
+
+  // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
+  B.buildInstr(AMDGPU::S_CBRANCH_EXECNZ)
+    .addMBB(LoopBB);
+
+  // Save the EXEC mask before the loop.
+  BuildMI(MBB, MBB.end(), DL, TII->get(AMDGPU::S_MOV_B64_term), SaveExecReg)
+    .addReg(AMDGPU::EXEC);
+
+  // Restore the EXEC mask after the loop.
+  B.setMBB(*RestoreExecBB);
+  B.buildInstr(AMDGPU::S_MOV_B64_term)
+    .addDef(AMDGPU::EXEC)
+    .addReg(SaveExecReg);
+}
+
+// Legalize an operand that must be an SGPR by inserting a readfirstlane.
+void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
+    MachineInstr &MI, MachineRegisterInfo &MRI, unsigned OpIdx) const {
+  Register Reg = MI.getOperand(OpIdx).getReg();
+  const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI);
+  if (Bank != &AMDGPU::VGPRRegBank)
+    return;
+
+  MachineIRBuilder B(MI);
+  Register SGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+  B.buildInstr(AMDGPU::V_READFIRSTLANE_B32)
+    .addDef(SGPR)
+    .addReg(Reg);
+
+  const TargetRegisterClass *Constrained =
+      constrainGenericRegister(Reg, AMDGPU::VGPR_32RegClass, MRI);
+  (void)Constrained;
+  assert(Constrained && "Failed to constrain readfirstlane src reg");
+
+  MI.getOperand(OpIdx).setReg(SGPR);
+}
+
+// When regbankselect repairs registers, it will insert a repair instruction
+// which defines the repaired register.  Then it calls applyMapping and expects
+// that the targets will either delete or rewrite the originally wrote to the
+// repaired registers.  Beccause of this, we end up in a situation where
+// we have 2 instructions defining the same registers.
+static MachineInstr *getOtherVRegDef(const MachineRegisterInfo &MRI,
+                                     Register Reg,
+                                     const MachineInstr &MI) {
+  // Is there some way we can assert that there are exactly 2 def instructions?
+  for (MachineInstr &Other : MRI.def_instructions(Reg)) {
+    if (&Other != &MI)
+      return &Other;
+  }
+
+  return nullptr;
+}
+
+bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
+                        const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
+                                              MachineRegisterInfo &MRI) const {
+  Register DstReg = MI.getOperand(0).getReg();
+  const LLT LoadTy =  MRI.getType(DstReg);
+  unsigned LoadSize = LoadTy.getSizeInBits();
+  const unsigned MaxNonSmrdLoadSize = 128;
+  // 128-bit loads are supported for all instruction types.
+  if (LoadSize <= MaxNonSmrdLoadSize)
     return false;
 
-  const MachineMemOperand *MMO = *MI.memoperands_begin();
-  return AMDGPUInstrInfo::isUniformMMO(MMO);
+  SmallVector<unsigned, 16> DefRegs(OpdMapper.getVRegs(0));
+  SmallVector<unsigned, 1> SrcRegs(OpdMapper.getVRegs(1));
+
+  // If the pointer is an SGPR, we have nothing to do.
+  if (SrcRegs.empty())
+    return false;
+
+  assert(LoadSize % MaxNonSmrdLoadSize == 0);
+
+  // We want to get the repair instruction now, because it will help us
+  // determine which instruction the legalizer inserts that will also
+  // write to DstReg.
+  MachineInstr *RepairInst = getOtherVRegDef(MRI, DstReg, MI);
+
+  // RegBankSelect only emits scalar types, so we need to reset the pointer
+  // operand to a pointer type.
+  Register BasePtrReg = SrcRegs[0];
+  LLT PtrTy = MRI.getType(MI.getOperand(1).getReg());
+  MRI.setType(BasePtrReg, PtrTy);
+
+  MachineIRBuilder B(MI);
+
+  unsigned SplitElts =
+      MaxNonSmrdLoadSize / LoadTy.getScalarType().getSizeInBits();
+  const LLT LoadSplitTy =  LLT::vector(SplitElts, LoadTy.getScalarType());
+  ApplyRegBankMapping O(MRI, &AMDGPU::VGPRRegBank);
+  GISelObserverWrapper Observer(&O);
+  B.setChangeObserver(Observer);
+  LegalizerHelper Helper(B.getMF(), Observer, B);
+  if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized)
+    return false;
+
+  // At this point, the legalizer has split the original load into smaller
+  // loads.  At the end of lowering, it inserts an instruction (LegalizedInst)
+  // that combines the outputs of the lower loads and writes it to DstReg.
+  // The register bank selector has also added the RepairInst which writes to
+  // DstReg as well.
+
+  MachineInstr *LegalizedInst = getOtherVRegDef(MRI, DstReg, *RepairInst);
+
+  // Replace the output of the LegalizedInst with a temporary register, since
+  // RepairInst already defines DstReg.
+  Register TmpReg = MRI.createGenericVirtualRegister(MRI.getType(DstReg));
+  LegalizedInst->getOperand(0).setReg(TmpReg);
+  B.setInsertPt(*RepairInst->getParent(), RepairInst);
+
+  for (unsigned DefIdx = 0, e = DefRegs.size(); DefIdx != e; ++DefIdx) {
+    Register IdxReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
+    B.buildConstant(IdxReg, DefIdx);
+    MRI.setRegBank(IdxReg, getRegBank(AMDGPU::VGPRRegBankID));
+    B.buildExtractVectorElement(DefRegs[DefIdx], TmpReg, IdxReg);
+  }
+
+  MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
+  return true;
+}
+
+// For cases where only a single copy is inserted for matching register banks.
+// Replace the register in the instruction operand
+static void substituteSimpleCopyRegs(
+  const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx) {
+  SmallVector<unsigned, 1> SrcReg(OpdMapper.getVRegs(OpIdx));
+  if (!SrcReg.empty()) {
+    assert(SrcReg.size() == 1);
+    OpdMapper.getMI().getOperand(OpIdx).setReg(SrcReg[0]);
+  }
+}
+
+void AMDGPURegisterBankInfo::applyMappingImpl(
+    const OperandsMapper &OpdMapper) const {
+  MachineInstr &MI = OpdMapper.getMI();
+  unsigned Opc = MI.getOpcode();
+  MachineRegisterInfo &MRI = OpdMapper.getMRI();
+  switch (Opc) {
+  case AMDGPU::G_SELECT: {
+    Register DstReg = MI.getOperand(0).getReg();
+    LLT DstTy = MRI.getType(DstReg);
+    if (DstTy.getSizeInBits() != 64)
+      break;
+
+    LLT HalfTy = getHalfSizedType(DstTy);
+
+    SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
+    SmallVector<Register, 1> Src0Regs(OpdMapper.getVRegs(1));
+    SmallVector<Register, 2> Src1Regs(OpdMapper.getVRegs(2));
+    SmallVector<Register, 2> Src2Regs(OpdMapper.getVRegs(3));
+
+    // All inputs are SGPRs, nothing special to do.
+    if (DefRegs.empty()) {
+      assert(Src1Regs.empty() && Src2Regs.empty());
+      break;
+    }
+
+    MachineIRBuilder B(MI);
+    if (Src0Regs.empty())
+      Src0Regs.push_back(MI.getOperand(1).getReg());
+    else {
+      assert(Src0Regs.size() == 1);
+    }
+
+    if (Src1Regs.empty())
+      split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg());
+    else {
+      setRegsToType(MRI, Src1Regs, HalfTy);
+    }
+
+    if (Src2Regs.empty())
+      split64BitValueForMapping(B, Src2Regs, HalfTy, MI.getOperand(3).getReg());
+    else
+      setRegsToType(MRI, Src2Regs, HalfTy);
+
+    setRegsToType(MRI, DefRegs, HalfTy);
+
+    B.buildSelect(DefRegs[0], Src0Regs[0], Src1Regs[0], Src2Regs[0]);
+    B.buildSelect(DefRegs[1], Src0Regs[0], Src1Regs[1], Src2Regs[1]);
+
+    MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
+    MI.eraseFromParent();
+    return;
+  }
+  case AMDGPU::G_AND:
+  case AMDGPU::G_OR:
+  case AMDGPU::G_XOR: {
+    // 64-bit and is only available on the SALU, so split into 2 32-bit ops if
+    // there is a VGPR input.
+    Register DstReg = MI.getOperand(0).getReg();
+    LLT DstTy = MRI.getType(DstReg);
+    if (DstTy.getSizeInBits() != 64)
+      break;
+
+    LLT HalfTy = getHalfSizedType(DstTy);
+    SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
+    SmallVector<Register, 2> Src0Regs(OpdMapper.getVRegs(1));
+    SmallVector<Register, 2> Src1Regs(OpdMapper.getVRegs(2));
+
+    // All inputs are SGPRs, nothing special to do.
+    if (DefRegs.empty()) {
+      assert(Src0Regs.empty() && Src1Regs.empty());
+      break;
+    }
+
+    assert(DefRegs.size() == 2);
+    assert(Src0Regs.size() == Src1Regs.size() &&
+           (Src0Regs.empty() || Src0Regs.size() == 2));
+
+    // Depending on where the source registers came from, the generic code may
+    // have decided to split the inputs already or not. If not, we still need to
+    // extract the values.
+    MachineIRBuilder B(MI);
+
+    if (Src0Regs.empty())
+      split64BitValueForMapping(B, Src0Regs, HalfTy, MI.getOperand(1).getReg());
+    else
+      setRegsToType(MRI, Src0Regs, HalfTy);
+
+    if (Src1Regs.empty())
+      split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg());
+    else
+      setRegsToType(MRI, Src1Regs, HalfTy);
+
+    setRegsToType(MRI, DefRegs, HalfTy);
+
+    B.buildInstr(Opc)
+      .addDef(DefRegs[0])
+      .addUse(Src0Regs[0])
+      .addUse(Src1Regs[0]);
+
+    B.buildInstr(Opc)
+      .addDef(DefRegs[1])
+      .addUse(Src0Regs[1])
+      .addUse(Src1Regs[1]);
+
+    MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
+    MI.eraseFromParent();
+    return;
+  }
+  case AMDGPU::G_ADD:
+  case AMDGPU::G_SUB:
+  case AMDGPU::G_MUL: {
+    Register DstReg = MI.getOperand(0).getReg();
+    LLT DstTy = MRI.getType(DstReg);
+    if (DstTy != LLT::scalar(16))
+      break;
+
+    const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
+    if (DstBank == &AMDGPU::VGPRRegBank)
+      break;
+
+    // 16-bit operations are VALU only, but can be promoted to 32-bit SALU.
+    MachineFunction *MF = MI.getParent()->getParent();
+    MachineIRBuilder B(MI);
+    ApplyRegBankMapping ApplySALU(MRI, &AMDGPU::SGPRRegBank);
+    GISelObserverWrapper Observer(&ApplySALU);
+    LegalizerHelper Helper(*MF, Observer, B);
+
+    if (Helper.widenScalar(MI, 0, LLT::scalar(32)) !=
+        LegalizerHelper::Legalized)
+      llvm_unreachable("widen scalar should have succeeded");
+    return;
+  }
+  case AMDGPU::G_SMIN:
+  case AMDGPU::G_SMAX:
+  case AMDGPU::G_UMIN:
+  case AMDGPU::G_UMAX: {
+    Register DstReg = MI.getOperand(0).getReg();
+    const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
+    if (DstBank == &AMDGPU::VGPRRegBank)
+      break;
+
+    MachineFunction *MF = MI.getParent()->getParent();
+    MachineIRBuilder B(MI);
+    ApplyRegBankMapping ApplySALU(MRI, &AMDGPU::SGPRRegBank);
+    GISelObserverWrapper Observer(&ApplySALU);
+    LegalizerHelper Helper(*MF, Observer, B);
+
+    // Turn scalar min/max into a compare and select.
+    LLT Ty = MRI.getType(DstReg);
+    LLT S32 = LLT::scalar(32);
+    LLT S16 = LLT::scalar(16);
+
+    if (Ty == S16) {
+      // Need to widen to s32, and expand as cmp + select.
+      if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized)
+        llvm_unreachable("widenScalar should have succeeded");
+
+      // FIXME: This is relying on widenScalar leaving MI in place.
+      if (Helper.lower(MI, 0, S32) != LegalizerHelper::Legalized)
+        llvm_unreachable("lower should have succeeded");
+    } else {
+      if (Helper.lower(MI, 0, Ty) != LegalizerHelper::Legalized)
+        llvm_unreachable("lower should have succeeded");
+    }
+
+    return;
+  }
+  case AMDGPU::G_SEXT:
+  case AMDGPU::G_ZEXT: {
+    Register SrcReg = MI.getOperand(1).getReg();
+    LLT SrcTy = MRI.getType(SrcReg);
+    bool Signed = Opc == AMDGPU::G_SEXT;
+
+    MachineIRBuilder B(MI);
+    const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
+
+    Register DstReg = MI.getOperand(0).getReg();
+    LLT DstTy = MRI.getType(DstReg);
+    if (DstTy.isScalar() &&
+        SrcBank != &AMDGPU::SGPRRegBank &&
+        SrcBank != &AMDGPU::SCCRegBank &&
+        SrcBank != &AMDGPU::VCCRegBank &&
+        // FIXME: Should handle any type that round to s64 when irregular
+        // breakdowns supported.
+        DstTy.getSizeInBits() == 64 &&
+        SrcTy.getSizeInBits() <= 32) {
+      const LLT S32 = LLT::scalar(32);
+      SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
+
+      // Extend to 32-bit, and then extend the low half.
+      if (Signed) {
+        // TODO: Should really be buildSExtOrCopy
+        B.buildSExtOrTrunc(DefRegs[0], SrcReg);
+
+        // Replicate sign bit from 32-bit extended part.
+        auto ShiftAmt = B.buildConstant(S32, 31);
+        MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
+        B.buildAShr(DefRegs[1], DefRegs[0], ShiftAmt);
+      } else {
+        B.buildZExtOrTrunc(DefRegs[0], SrcReg);
+        B.buildConstant(DefRegs[1], 0);
+      }
+
+      MRI.setRegBank(DstReg, *SrcBank);
+      MI.eraseFromParent();
+      return;
+    }
+
+    if (SrcTy != LLT::scalar(1))
+      return;
+
+    if (SrcBank == &AMDGPU::SCCRegBank || SrcBank == &AMDGPU::VCCRegBank) {
+      SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
+
+      const RegisterBank *DstBank = SrcBank == &AMDGPU::SCCRegBank ?
+        &AMDGPU::SGPRRegBank : &AMDGPU::VGPRRegBank;
+
+      unsigned DstSize = DstTy.getSizeInBits();
+      // 64-bit select is SGPR only
+      const bool UseSel64 = DstSize > 32 &&
+        SrcBank->getID() == AMDGPU::SCCRegBankID;
+
+      // TODO: Should s16 select be legal?
+      LLT SelType = UseSel64 ? LLT::scalar(64) : LLT::scalar(32);
+      auto True = B.buildConstant(SelType, Signed ? -1 : 1);
+      auto False = B.buildConstant(SelType, 0);
+
+      MRI.setRegBank(True.getReg(0), *DstBank);
+      MRI.setRegBank(False.getReg(0), *DstBank);
+      MRI.setRegBank(DstReg, *DstBank);
+
+      if (DstSize > 32 && SrcBank->getID() != AMDGPU::SCCRegBankID) {
+        B.buildSelect(DefRegs[0], SrcReg, True, False);
+        B.buildCopy(DefRegs[1], DefRegs[0]);
+      } else if (DstSize < 32) {
+        auto Sel = B.buildSelect(SelType, SrcReg, True, False);
+        MRI.setRegBank(Sel.getReg(0), *DstBank);
+        B.buildTrunc(DstReg, Sel);
+      } else {
+        B.buildSelect(DstReg, SrcReg, True, False);
+      }
+
+      MI.eraseFromParent();
+      return;
+    }
+
+    // Fixup the case with an s1 src that isn't a condition register. Use shifts
+    // instead of introducing a compare to avoid an unnecessary condition
+    // register (and since there's no scalar 16-bit compares).
+    auto Ext = B.buildAnyExt(DstTy, SrcReg);
+    auto ShiftAmt = B.buildConstant(LLT::scalar(32), DstTy.getSizeInBits() - 1);
+    auto Shl = B.buildShl(DstTy, Ext, ShiftAmt);
+
+    if (MI.getOpcode() == AMDGPU::G_SEXT)
+      B.buildAShr(DstReg, Shl, ShiftAmt);
+    else
+      B.buildLShr(DstReg, Shl, ShiftAmt);
+
+    MRI.setRegBank(DstReg, *SrcBank);
+    MRI.setRegBank(Ext.getReg(0), *SrcBank);
+    MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
+    MRI.setRegBank(Shl.getReg(0), *SrcBank);
+    MI.eraseFromParent();
+    return;
+  }
+  case AMDGPU::G_EXTRACT_VECTOR_ELT:
+    applyDefaultMapping(OpdMapper);
+    executeInWaterfallLoop(MI, MRI, { 2 });
+    return;
+  case AMDGPU::G_INTRINSIC: {
+    switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
+    case Intrinsic::amdgcn_s_buffer_load: {
+      // FIXME: Move to G_INTRINSIC_W_SIDE_EFFECTS
+      executeInWaterfallLoop(MI, MRI, { 2, 3 });
+      return;
+    }
+    case Intrinsic::amdgcn_readlane: {
+      substituteSimpleCopyRegs(OpdMapper, 2);
+
+      assert(empty(OpdMapper.getVRegs(0)));
+      assert(empty(OpdMapper.getVRegs(3)));
+
+      // Make sure the index is an SGPR. It doesn't make sense to run this in a
+      // waterfall loop, so assume it's a uniform value.
+      constrainOpWithReadfirstlane(MI, MRI, 3); // Index
+      return;
+    }
+    case Intrinsic::amdgcn_writelane: {
+      assert(empty(OpdMapper.getVRegs(0)));
+      assert(empty(OpdMapper.getVRegs(2)));
+      assert(empty(OpdMapper.getVRegs(3)));
+
+      substituteSimpleCopyRegs(OpdMapper, 4); // VGPR input val
+      constrainOpWithReadfirstlane(MI, MRI, 2); // Source value
+      constrainOpWithReadfirstlane(MI, MRI, 3); // Index
+      return;
+    }
+    default:
+      break;
+    }
+    break;
+  }
+  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
+    switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
+    case Intrinsic::amdgcn_buffer_load: {
+      executeInWaterfallLoop(MI, MRI, { 2 });
+      return;
+    }
+    case Intrinsic::amdgcn_ds_ordered_add:
+    case Intrinsic::amdgcn_ds_ordered_swap: {
+      // This is only allowed to execute with 1 lane, so readfirstlane is safe.
+      assert(empty(OpdMapper.getVRegs(0)));
+      substituteSimpleCopyRegs(OpdMapper, 3);
+      constrainOpWithReadfirstlane(MI, MRI, 2); // M0
+      return;
+    }
+    case Intrinsic::amdgcn_s_sendmsg:
+    case Intrinsic::amdgcn_s_sendmsghalt: {
+      // FIXME: Should this use a waterfall loop?
+      constrainOpWithReadfirstlane(MI, MRI, 2); // M0
+      return;
+    }
+    default:
+      break;
+    }
+    break;
+  }
+  case AMDGPU::G_LOAD: {
+    if (applyMappingWideLoad(MI, OpdMapper, MRI))
+      return;
+    break;
+  }
+  default:
+    break;
+  }
+
+  return applyDefaultMapping(OpdMapper);
 }
 
 bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const {
@@ -259,7 +1369,7 @@ bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const {
   for (unsigned i = 0, e = MI.getNumOperands();i != e; ++i) {
     if (!MI.getOperand(i).isReg())
       continue;
-    unsigned Reg = MI.getOperand(i).getReg();
+    Register Reg = MI.getOperand(i).getReg();
     if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
       if (Bank->getID() == AMDGPU::VGPRRegBankID)
         return false;
@@ -299,7 +1409,7 @@ AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const {
   if (MI.getOperand(OpdIdx).isIntrinsicID())
     OpdsMapping[OpdIdx++] = nullptr;
 
-  unsigned Reg1 = MI.getOperand(OpdIdx).getReg();
+  Register Reg1 = MI.getOperand(OpdIdx).getReg();
   unsigned Size1 = getSizeInBits(Reg1, MRI, *TRI);
 
   unsigned DefaultBankID = Size1 == 1 ?
@@ -309,7 +1419,11 @@ AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const {
   OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(Bank1, Size1);
 
   for (unsigned e = MI.getNumOperands(); OpdIdx != e; ++OpdIdx) {
-    unsigned Size = getSizeInBits(MI.getOperand(OpdIdx).getReg(), MRI, *TRI);
+    const MachineOperand &MO = MI.getOperand(OpdIdx);
+    if (!MO.isReg())
+      continue;
+
+    unsigned Size = getSizeInBits(MO.getReg(), MRI, *TRI);
     unsigned BankID = Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
     OpdsMapping[OpdIdx] = AMDGPU::getValueMapping(BankID, Size);
   }
@@ -325,7 +1439,11 @@ AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const {
   SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
 
   for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
-    unsigned Size = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
+    const MachineOperand &Op = MI.getOperand(I);
+    if (!Op.isReg())
+      continue;
+
+    unsigned Size = getSizeInBits(Op.getReg(), MRI, *TRI);
     OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
   }
 
@@ -340,6 +1458,7 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
   const MachineRegisterInfo &MRI = MF.getRegInfo();
   SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
   unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+  LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
   unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
 
   const ValueMapping *ValMapping;
@@ -350,7 +1469,7 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
     ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
     PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
   } else {
-    ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+    ValMapping = AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy);
     // FIXME: What would happen if we used SGPRRegBankID here?
     PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
   }
@@ -366,7 +1485,7 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
 }
 
 unsigned
-AMDGPURegisterBankInfo::getRegBankID(unsigned Reg,
+AMDGPURegisterBankInfo::getRegBankID(Register Reg,
                                      const MachineRegisterInfo &MRI,
                                      const TargetRegisterInfo &TRI,
                                      unsigned Default) const {
@@ -383,13 +1502,81 @@ AMDGPURegisterBankInfo::getRegBankID(unsigned Reg,
 ///
 const RegisterBankInfo::InstructionMapping &
 AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
-  const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI);
+  const MachineFunction &MF = *MI.getParent()->getParent();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
 
+  if (MI.isRegSequence()) {
+    // If any input is a VGPR, the result must be a VGPR. The default handling
+    // assumes any copy between banks is legal.
+    unsigned BankID = AMDGPU::SGPRRegBankID;
+
+    for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
+      auto OpBank = getRegBankID(MI.getOperand(I).getReg(), MRI, *TRI);
+      // It doesn't make sense to use vcc or scc banks here, so just ignore
+      // them.
+      if (OpBank != AMDGPU::SGPRRegBankID) {
+        BankID = AMDGPU::VGPRRegBankID;
+        break;
+      }
+    }
+    unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+
+    const ValueMapping &ValMap = getValueMapping(0, Size, getRegBank(BankID));
+    return getInstructionMapping(
+        1, /*Cost*/ 1,
+        /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1);
+  }
+
+  // The default handling is broken and doesn't handle illegal SGPR->VGPR copies
+  // properly.
+  //
+  // TODO: There are additional exec masking dependencies to analyze.
+  if (MI.getOpcode() == TargetOpcode::G_PHI) {
+    // TODO: Generate proper invalid bank enum.
+    int ResultBank = -1;
+
+    for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
+      unsigned Reg = MI.getOperand(I).getReg();
+      const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI);
+
+      // FIXME: Assuming VGPR for any undetermined inputs.
+      if (!Bank || Bank->getID() == AMDGPU::VGPRRegBankID) {
+        ResultBank = AMDGPU::VGPRRegBankID;
+        break;
+      }
+
+      unsigned OpBank = Bank->getID();
+      // scc, scc -> sgpr
+      if (OpBank == AMDGPU::SCCRegBankID) {
+        // There's only one SCC register, so a phi requires copying to SGPR.
+        OpBank = AMDGPU::SGPRRegBankID;
+      } else if (OpBank == AMDGPU::VCCRegBankID) {
+        // vcc, vcc -> vcc
+        // vcc, sgpr -> vgpr
+        if (ResultBank != -1 && ResultBank != AMDGPU::VCCRegBankID) {
+          ResultBank = AMDGPU::VGPRRegBankID;
+          break;
+        }
+      }
+
+      ResultBank = OpBank;
+    }
+
+    assert(ResultBank != -1);
+
+    unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+
+    const ValueMapping &ValMap =
+        getValueMapping(0, Size, getRegBank(ResultBank));
+    return getInstructionMapping(
+        1, /*Cost*/ 1,
+        /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1);
+  }
+
+  const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI);
   if (Mapping.isValid())
     return Mapping;
 
-  const MachineFunction &MF = *MI.getParent()->getParent();
-  const MachineRegisterInfo &MRI = MF.getRegInfo();
   SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
 
   switch (MI.getOpcode()) {
@@ -401,18 +1588,86 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   case AMDGPU::G_XOR: {
     unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
     if (Size == 1) {
-      OpdsMapping[0] = OpdsMapping[1] =
-        OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+      const RegisterBank *DstBank
+        = getRegBank(MI.getOperand(0).getReg(), MRI, *TRI);
+
+      unsigned TargetBankID = -1;
+      unsigned BankLHS = -1;
+      unsigned BankRHS = -1;
+      if (DstBank) {
+        TargetBankID = DstBank->getID();
+        if (DstBank == &AMDGPU::VCCRegBank) {
+          TargetBankID = AMDGPU::VCCRegBankID;
+          BankLHS = AMDGPU::VCCRegBankID;
+          BankRHS = AMDGPU::VCCRegBankID;
+        } else if (DstBank == &AMDGPU::SCCRegBank) {
+          TargetBankID = AMDGPU::SCCRegBankID;
+          BankLHS = AMDGPU::SGPRRegBankID;
+          BankRHS = AMDGPU::SGPRRegBankID;
+        } else {
+          BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
+                                 AMDGPU::SGPRRegBankID);
+          BankRHS = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+                                 AMDGPU::SGPRRegBankID);
+        }
+      } else {
+        BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
+                               AMDGPU::VCCRegBankID);
+        BankRHS = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+                               AMDGPU::VCCRegBankID);
+
+        // Both inputs should be true booleans to produce a boolean result.
+        if (BankLHS == AMDGPU::VGPRRegBankID || BankRHS == AMDGPU::VGPRRegBankID) {
+          TargetBankID = AMDGPU::VGPRRegBankID;
+        } else if (BankLHS == AMDGPU::VCCRegBankID || BankRHS == AMDGPU::VCCRegBankID) {
+          TargetBankID = AMDGPU::VCCRegBankID;
+          BankLHS = AMDGPU::VCCRegBankID;
+          BankRHS = AMDGPU::VCCRegBankID;
+        } else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) {
+          TargetBankID = AMDGPU::SGPRRegBankID;
+        } else if (BankLHS == AMDGPU::SCCRegBankID || BankRHS == AMDGPU::SCCRegBankID) {
+          // The operation must be done on a 32-bit register, but it will set
+          // scc. The result type could interchangably be SCC or SGPR, since
+          // both values will be produced.
+          TargetBankID = AMDGPU::SCCRegBankID;
+          BankLHS = AMDGPU::SGPRRegBankID;
+          BankRHS = AMDGPU::SGPRRegBankID;
+        }
+      }
+
+      OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID, Size);
+      OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS, Size);
+      OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS, Size);
+      break;
+    }
+
+    if (Size == 64) {
+
+      if (isSALUMapping(MI)) {
+        OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size);
+        OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
+      } else {
+        OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size);
+        unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI/*, DefaultBankID*/);
+        OpdsMapping[1] = AMDGPU::getValueMapping(Bank1, Size);
+
+        unsigned Bank2 = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI/*, DefaultBankID*/);
+        OpdsMapping[2] = AMDGPU::getValueMapping(Bank2, Size);
+      }
+
       break;
     }
 
     LLVM_FALLTHROUGH;
   }
 
+  case AMDGPU::G_GEP:
   case AMDGPU::G_ADD:
   case AMDGPU::G_SUB:
   case AMDGPU::G_MUL:
   case AMDGPU::G_SHL:
+  case AMDGPU::G_LSHR:
+  case AMDGPU::G_ASHR:
   case AMDGPU::G_UADDO:
   case AMDGPU::G_SADDO:
   case AMDGPU::G_USUBO:
@@ -421,6 +1676,12 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   case AMDGPU::G_SADDE:
   case AMDGPU::G_USUBE:
   case AMDGPU::G_SSUBE:
+  case AMDGPU::G_UMULH:
+  case AMDGPU::G_SMULH:
+  case AMDGPU::G_SMIN:
+  case AMDGPU::G_SMAX:
+  case AMDGPU::G_UMIN:
+  case AMDGPU::G_UMAX:
     if (isSALUMapping(MI))
       return getDefaultMappingSOP(MI);
     LLVM_FALLTHROUGH;
@@ -431,11 +1692,14 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   case AMDGPU::G_FPTOUI:
   case AMDGPU::G_FMUL:
   case AMDGPU::G_FMA:
+  case AMDGPU::G_FSQRT:
   case AMDGPU::G_SITOFP:
   case AMDGPU::G_UITOFP:
   case AMDGPU::G_FPTRUNC:
+  case AMDGPU::G_FPEXT:
   case AMDGPU::G_FEXP2:
   case AMDGPU::G_FLOG2:
+  case AMDGPU::G_FCANONICALIZE:
   case AMDGPU::G_INTRINSIC_TRUNC:
   case AMDGPU::G_INTRINSIC_ROUND:
     return getDefaultMappingVOP(MI);
@@ -473,7 +1737,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     OpdsMapping[2] = nullptr;
     break;
   }
-  case AMDGPU::G_MERGE_VALUES: {
+  case AMDGPU::G_MERGE_VALUES:
+  case AMDGPU::G_BUILD_VECTOR:
+  case AMDGPU::G_CONCAT_VECTORS: {
     unsigned Bank = isSALUMapping(MI) ?
       AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
     unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
@@ -502,8 +1768,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     break;
   }
   case AMDGPU::G_TRUNC: {
-    unsigned Dst = MI.getOperand(0).getReg();
-    unsigned Src = MI.getOperand(1).getReg();
+    Register Dst = MI.getOperand(0).getReg();
+    Register Src = MI.getOperand(1).getReg();
     unsigned Bank = getRegBankID(Src, MRI, *TRI);
     unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
     unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
@@ -514,23 +1780,35 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   case AMDGPU::G_ZEXT:
   case AMDGPU::G_SEXT:
   case AMDGPU::G_ANYEXT: {
-    unsigned Dst = MI.getOperand(0).getReg();
-    unsigned Src = MI.getOperand(1).getReg();
+    Register Dst = MI.getOperand(0).getReg();
+    Register Src = MI.getOperand(1).getReg();
     unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
     unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
-    unsigned SrcBank = getRegBankID(Src, MRI, *TRI,
-                                    SrcSize == 1 ? AMDGPU::SGPRRegBankID :
-                                    AMDGPU::VGPRRegBankID);
-    unsigned DstBank = SrcBank;
-    if (SrcSize == 1) {
-      if (SrcBank == AMDGPU::SGPRRegBankID)
-        DstBank = AMDGPU::VGPRRegBankID;
-      else
-        DstBank = AMDGPU::SGPRRegBankID;
-    }
-
-    OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
-    OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank, SrcSize);
+
+    unsigned DstBank;
+    const RegisterBank *SrcBank = getRegBank(Src, MRI, *TRI);
+    assert(SrcBank);
+    switch (SrcBank->getID()) {
+    case AMDGPU::SCCRegBankID:
+    case AMDGPU::SGPRRegBankID:
+      DstBank = AMDGPU::SGPRRegBankID;
+      break;
+    default:
+      DstBank = AMDGPU::VGPRRegBankID;
+      break;
+    }
+
+    // TODO: Should anyext be split into 32-bit part as well?
+    if (MI.getOpcode() == AMDGPU::G_ANYEXT) {
+      OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
+      OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize);
+    } else {
+      // Scalar extend can use 64-bit BFE, but VGPRs require extending to
+      // 32-bits, and then to 64.
+      OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
+      OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(),
+                                                         SrcSize);
+    }
     break;
   }
   case AMDGPU::G_FCMP: {
@@ -542,16 +1820,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
     break;
   }
-  case AMDGPU::G_GEP: {
-    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-      if (!MI.getOperand(i).isReg())
-        continue;
-
-      unsigned Size = MRI.getType(MI.getOperand(i).getReg()).getSizeInBits();
-      OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
-    }
-    break;
-  }
   case AMDGPU::G_STORE: {
     assert(MI.getOperand(0).isReg());
     unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
@@ -571,57 +1839,55 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   }
 
   case AMDGPU::G_ICMP: {
+    auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
     unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
     unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
     unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
-    unsigned Op0Bank = Op2Bank == AMDGPU::SGPRRegBankID &&
-                       Op3Bank == AMDGPU::SGPRRegBankID ?
-                       AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
+
+    bool CanUseSCC = Op2Bank == AMDGPU::SGPRRegBankID &&
+                     Op3Bank == AMDGPU::SGPRRegBankID &&
+      (Size == 32 || (Size == 64 &&
+                      (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) &&
+                      MF.getSubtarget<GCNSubtarget>().hasScalarCompareEq64()));
+
+    unsigned Op0Bank = CanUseSCC ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
+
     OpdsMapping[0] = AMDGPU::getValueMapping(Op0Bank, 1);
     OpdsMapping[1] = nullptr; // Predicate Operand.
     OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
     OpdsMapping[3] = AMDGPU::getValueMapping(Op3Bank, Size);
     break;
   }
-
-
   case AMDGPU::G_EXTRACT_VECTOR_ELT: {
-    unsigned IdxOp = 2;
-    int64_t Imm;
-    // XXX - Do we really need to fully handle these? The constant case should
-    // be legalized away before RegBankSelect?
-
-    unsigned OutputBankID = isSALUMapping(MI) && isConstant(MI.getOperand(IdxOp), Imm) ?
+    unsigned OutputBankID = isSALUMapping(MI) ?
                             AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
-
+    unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+    unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
     unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
-    OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
-    OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, MRI.getType(MI.getOperand(1).getReg()).getSizeInBits());
+
+    OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
+    OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
 
     // The index can be either if the source vector is VGPR.
-    OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
+    OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
     break;
   }
   case AMDGPU::G_INSERT_VECTOR_ELT: {
-    // XXX - Do we really need to fully handle these? The constant case should
-    // be legalized away before RegBankSelect?
-
-    int64_t Imm;
-
-    unsigned IdxOp = MI.getOpcode() == AMDGPU::G_EXTRACT_VECTOR_ELT ? 2 : 3;
-    unsigned BankID = isSALUMapping(MI) && isConstant(MI.getOperand(IdxOp), Imm) ?
-                      AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
-
-
+    unsigned OutputBankID = isSALUMapping(MI) ?
+      AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
 
-    // TODO: Can do SGPR indexing, which would obviate the need for the
-    // isConstant check.
-    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-      unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
-      OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);
-    }
+    unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+    unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+    unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
+    unsigned InsertEltBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
+    unsigned IdxBank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
 
+    OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
+    OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
+    OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBank, InsertSize);
 
+    // The index can be either if the source vector is VGPR.
+    OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
     break;
   }
   case AMDGPU::G_UNMERGE_VALUES: {
@@ -637,14 +1903,70 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     break;
   }
   case AMDGPU::G_INTRINSIC: {
-    switch (MI.getOperand(1).getIntrinsicID()) {
+    switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
     default:
       return getInvalidInstructionMapping();
     case Intrinsic::maxnum:
     case Intrinsic::minnum:
+    case Intrinsic::amdgcn_div_fmas:
+    case Intrinsic::amdgcn_trig_preop:
+    case Intrinsic::amdgcn_sin:
+    case Intrinsic::amdgcn_cos:
+    case Intrinsic::amdgcn_log_clamp:
+    case Intrinsic::amdgcn_rcp:
+    case Intrinsic::amdgcn_rcp_legacy:
+    case Intrinsic::amdgcn_rsq:
+    case Intrinsic::amdgcn_rsq_legacy:
+    case Intrinsic::amdgcn_rsq_clamp:
+    case Intrinsic::amdgcn_ldexp:
+    case Intrinsic::amdgcn_frexp_mant:
+    case Intrinsic::amdgcn_frexp_exp:
+    case Intrinsic::amdgcn_fract:
     case Intrinsic::amdgcn_cvt_pkrtz:
+    case Intrinsic::amdgcn_cvt_pknorm_i16:
+    case Intrinsic::amdgcn_cvt_pknorm_u16:
+    case Intrinsic::amdgcn_cvt_pk_i16:
+    case Intrinsic::amdgcn_cvt_pk_u16:
+    case Intrinsic::amdgcn_fmed3:
+    case Intrinsic::amdgcn_cubeid:
+    case Intrinsic::amdgcn_cubema:
+    case Intrinsic::amdgcn_cubesc:
+    case Intrinsic::amdgcn_cubetc:
+    case Intrinsic::amdgcn_sffbh:
+    case Intrinsic::amdgcn_fmad_ftz:
+    case Intrinsic::amdgcn_mbcnt_lo:
+    case Intrinsic::amdgcn_mbcnt_hi:
+    case Intrinsic::amdgcn_ubfe:
+    case Intrinsic::amdgcn_sbfe:
+    case Intrinsic::amdgcn_lerp:
+    case Intrinsic::amdgcn_sad_u8:
+    case Intrinsic::amdgcn_msad_u8:
+    case Intrinsic::amdgcn_sad_hi_u8:
+    case Intrinsic::amdgcn_sad_u16:
+    case Intrinsic::amdgcn_qsad_pk_u16_u8:
+    case Intrinsic::amdgcn_mqsad_pk_u16_u8:
+    case Intrinsic::amdgcn_mqsad_u32_u8:
+    case Intrinsic::amdgcn_cvt_pk_u8_f32:
+    case Intrinsic::amdgcn_alignbit:
+    case Intrinsic::amdgcn_alignbyte:
+    case Intrinsic::amdgcn_fdot2:
+    case Intrinsic::amdgcn_sdot2:
+    case Intrinsic::amdgcn_udot2:
+    case Intrinsic::amdgcn_sdot4:
+    case Intrinsic::amdgcn_udot4:
+    case Intrinsic::amdgcn_sdot8:
+    case Intrinsic::amdgcn_udot8:
+    case Intrinsic::amdgcn_fdiv_fast:
+    case Intrinsic::amdgcn_wwm:
+    case Intrinsic::amdgcn_wqm:
       return getDefaultMappingVOP(MI);
-    case Intrinsic::amdgcn_kernarg_segment_ptr: {
+    case Intrinsic::amdgcn_ds_permute:
+    case Intrinsic::amdgcn_ds_bpermute:
+    case Intrinsic::amdgcn_update_dpp:
+      return getDefaultMappingAllVGPR(MI);
+    case Intrinsic::amdgcn_kernarg_segment_ptr:
+    case Intrinsic::amdgcn_s_getpc:
+    case Intrinsic::amdgcn_groupstaticsize: {
       unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
       OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
       break;
@@ -652,16 +1974,142 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     case Intrinsic::amdgcn_wqm_vote: {
       unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
       OpdsMapping[0] = OpdsMapping[2]
-        = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+        = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size);
+      break;
+    }
+    case Intrinsic::amdgcn_s_buffer_load: {
+      // FIXME: This should be moved to G_INTRINSIC_W_SIDE_EFFECTS
+      Register RSrc = MI.getOperand(2).getReg();   // SGPR
+      Register Offset = MI.getOperand(3).getReg(); // SGPR/imm
+
+      unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+      unsigned Size2 = MRI.getType(RSrc).getSizeInBits();
+      unsigned Size3 = MRI.getType(Offset).getSizeInBits();
+
+      unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI);
+      unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI);
+
+      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size0);
+      OpdsMapping[1] = nullptr; // intrinsic id
+
+      // Lie and claim everything is legal, even though some need to be
+      // SGPRs. applyMapping will have to deal with it as a waterfall loop.
+      OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc
+      OpdsMapping[3] = AMDGPU::getValueMapping(OffsetBank, Size3);
+      OpdsMapping[4] = nullptr;
+      break;
+    }
+    case Intrinsic::amdgcn_div_scale: {
+      unsigned Dst0Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+      unsigned Dst1Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Dst0Size);
+      OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);
+
+      unsigned SrcSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
+      OpdsMapping[3] = AMDGPU::getValueMapping(
+        getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI), SrcSize);
+      OpdsMapping[4] = AMDGPU::getValueMapping(
+        getRegBankID(MI.getOperand(4).getReg(), MRI, *TRI), SrcSize);
+
+      break;
+    }
+    case Intrinsic::amdgcn_class: {
+      Register Src0Reg = MI.getOperand(2).getReg();
+      Register Src1Reg = MI.getOperand(3).getReg();
+      unsigned Src0Size = MRI.getType(Src0Reg).getSizeInBits();
+      unsigned Src1Size = MRI.getType(Src1Reg).getSizeInBits();
+      unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
+      OpdsMapping[2] = AMDGPU::getValueMapping(getRegBankID(Src0Reg, MRI, *TRI),
+                                               Src0Size);
+      OpdsMapping[3] = AMDGPU::getValueMapping(getRegBankID(Src1Reg, MRI, *TRI),
+                                               Src1Size);
+      break;
+    }
+    case Intrinsic::amdgcn_icmp:
+    case Intrinsic::amdgcn_fcmp: {
+      unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+      // This is not VCCRegBank because this is not used in boolean contexts.
+      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
+      unsigned OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+      unsigned Op1Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
+      unsigned Op2Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
+      OpdsMapping[2] = AMDGPU::getValueMapping(Op1Bank, OpSize);
+      OpdsMapping[3] = AMDGPU::getValueMapping(Op2Bank, OpSize);
+      break;
+    }
+    case Intrinsic::amdgcn_readlane: {
+      // This must be an SGPR, but accept a VGPR.
+      unsigned IdxReg = MI.getOperand(3).getReg();
+      unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();
+      unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
+      OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
+      LLVM_FALLTHROUGH;
+    }
+    case Intrinsic::amdgcn_readfirstlane: {
+      unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+      unsigned SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
+      OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
+      break;
+    }
+    case Intrinsic::amdgcn_writelane: {
+      unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+      unsigned SrcReg = MI.getOperand(2).getReg();
+      unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
+      unsigned SrcBank = getRegBankID(SrcReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
+      unsigned IdxReg = MI.getOperand(3).getReg();
+      unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();
+      unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
+      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
+
+      // These 2 must be SGPRs, but accept VGPRs. Readfirstlane will be inserted
+      // to legalize.
+      OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);
+      OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
+      OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
+      break;
+    }
+    case Intrinsic::amdgcn_if_break: {
+      unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+      OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
+      OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
       break;
     }
     }
     break;
   }
   case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
-    switch (MI.getOperand(0).getIntrinsicID()) {
+    switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
     default:
       return getInvalidInstructionMapping();
+    case Intrinsic::amdgcn_s_getreg:
+    case Intrinsic::amdgcn_s_memtime:
+    case Intrinsic::amdgcn_s_memrealtime:
+    case Intrinsic::amdgcn_s_get_waveid_in_workgroup: {
+      unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+      break;
+    }
+    case Intrinsic::amdgcn_ds_append:
+    case Intrinsic::amdgcn_ds_consume:
+    case Intrinsic::amdgcn_ds_fadd:
+    case Intrinsic::amdgcn_ds_fmin:
+    case Intrinsic::amdgcn_ds_fmax:
+    case Intrinsic::amdgcn_atomic_inc:
+    case Intrinsic::amdgcn_atomic_dec:
+      return getDefaultMappingAllVGPR(MI);
+    case Intrinsic::amdgcn_ds_ordered_add:
+    case Intrinsic::amdgcn_ds_ordered_swap: {
+      unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
+      unsigned M0Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+                                 AMDGPU::SGPRRegBankID);
+      OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32);
+      OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
+      break;
+    }
     case Intrinsic::amdgcn_exp_compr:
       OpdsMapping[0] = nullptr; // IntrinsicID
       // FIXME: These are immediate values which can't be read from registers.
@@ -688,24 +2136,82 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
       OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
       break;
+    case Intrinsic::amdgcn_buffer_load: {
+      Register RSrc = MI.getOperand(2).getReg();   // SGPR
+      Register VIndex = MI.getOperand(3).getReg(); // VGPR
+      Register Offset = MI.getOperand(4).getReg(); // SGPR/VGPR/imm
+
+      unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+      unsigned Size2 = MRI.getType(RSrc).getSizeInBits();
+      unsigned Size3 = MRI.getType(VIndex).getSizeInBits();
+      unsigned Size4 = MRI.getType(Offset).getSizeInBits();
+
+      unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI);
+      unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI);
+
+      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0);
+      OpdsMapping[1] = nullptr; // intrinsic id
+
+      // Lie and claim everything is legal, even though some need to be
+      // SGPRs. applyMapping will have to deal with it as a waterfall loop.
+      OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc
+      OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size3);
+      OpdsMapping[4] = AMDGPU::getValueMapping(OffsetBank, Size4);
+      OpdsMapping[5] = nullptr;
+      OpdsMapping[6] = nullptr;
+      break;
+    }
+    case Intrinsic::amdgcn_s_sendmsg:
+    case Intrinsic::amdgcn_s_sendmsghalt: {
+      // This must be an SGPR, but accept a VGPR.
+      unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+                                   AMDGPU::SGPRRegBankID);
+      OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
+      break;
+    }
+    case Intrinsic::amdgcn_end_cf: {
+      unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
+      OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+      break;
+    }
     }
     break;
   }
   case AMDGPU::G_SELECT: {
     unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
-    unsigned Op1Bank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
+    unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
                                     AMDGPU::SGPRRegBankID);
-    unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
-    unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
-    bool SGPRSrcs = Op1Bank == AMDGPU::SCCRegBankID &&
-                    Op2Bank == AMDGPU::SGPRRegBankID &&
+    unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI,
+                                    AMDGPU::SGPRRegBankID);
+    bool SGPRSrcs = Op2Bank == AMDGPU::SGPRRegBankID &&
                     Op3Bank == AMDGPU::SGPRRegBankID;
-    unsigned Bank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
-    Op1Bank = SGPRSrcs ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
-    OpdsMapping[0] = AMDGPU::getValueMapping(Bank, Size);
-    OpdsMapping[1] = AMDGPU::getValueMapping(Op1Bank, 1);
-    OpdsMapping[2] = AMDGPU::getValueMapping(Bank, Size);
-    OpdsMapping[3] = AMDGPU::getValueMapping(Bank, Size);
+
+    unsigned CondBankDefault = SGPRSrcs ?
+      AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
+    unsigned CondBank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
+                                     CondBankDefault);
+    if (CondBank == AMDGPU::SGPRRegBankID)
+      CondBank = SGPRSrcs ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
+    else if (CondBank == AMDGPU::VGPRRegBankID)
+      CondBank = AMDGPU::VCCRegBankID;
+
+    unsigned Bank = SGPRSrcs && CondBank == AMDGPU::SCCRegBankID ?
+      AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
+
+    assert(CondBank == AMDGPU::VCCRegBankID || CondBank == AMDGPU::SCCRegBankID);
+
+    if (Size == 64) {
+      OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
+      OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
+      OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
+      OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
+    } else {
+      OpdsMapping[0] = AMDGPU::getValueMapping(Bank, Size);
+      OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
+      OpdsMapping[2] = AMDGPU::getValueMapping(Bank, Size);
+      OpdsMapping[3] = AMDGPU::getValueMapping(Bank, Size);
+    }
+
     break;
   }
 
@@ -737,6 +2243,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   }
   }
 
-  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
+  return getInstructionMapping(/*ID*/1, /*Cost*/1,
+                               getOperandsMapping(OpdsMapping),
                                MI.getNumOperands());
 }
+
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index d29f4bc79a51..f3a96e2a6128 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -1,9 +1,8 @@
 //===- AMDGPURegisterBankInfo -----------------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -14,6 +13,7 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H
 
+#include "llvm/CodeGen/Register.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
 
 #define GET_REGBANK_DECLARATIONS
@@ -22,6 +22,8 @@
 
 namespace llvm {
 
+class LLT;
+class MachineIRBuilder;
 class SIRegisterInfo;
 class TargetRegisterInfo;
 
@@ -36,16 +38,53 @@ protected:
 class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
   const SIRegisterInfo *TRI;
 
+  void executeInWaterfallLoop(MachineInstr &MI,
+                              MachineRegisterInfo &MRI,
+                              ArrayRef<unsigned> OpIndices) const;
+
+  void constrainOpWithReadfirstlane(MachineInstr &MI, MachineRegisterInfo &MRI,
+                                    unsigned OpIdx) const;
+  bool applyMappingWideLoad(MachineInstr &MI,
+                            const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
+                            MachineRegisterInfo &MRI) const;
+
   /// See RegisterBankInfo::applyMapping.
   void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
 
   const RegisterBankInfo::InstructionMapping &
   getInstrMappingForLoad(const MachineInstr &MI) const;
 
-  unsigned getRegBankID(unsigned Reg, const MachineRegisterInfo &MRI,
+  unsigned getRegBankID(Register Reg, const MachineRegisterInfo &MRI,
                         const TargetRegisterInfo &TRI,
                         unsigned Default = AMDGPU::VGPRRegBankID) const;
 
+  /// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p
+  /// Regs. This appropriately sets the regbank of the new registers.
+  void split64BitValueForMapping(MachineIRBuilder &B,
+                                 SmallVector<Register, 2> &Regs,
+                                 LLT HalfTy,
+                                 Register Reg) const;
+
+  template <unsigned NumOps>
+  struct OpRegBankEntry {
+    int8_t RegBanks[NumOps];
+    int16_t Cost;
+  };
+
+  template <unsigned NumOps>
+  InstructionMappings
+  addMappingFromTable(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+                      const std::array<unsigned, NumOps> RegSrcOpIdx,
+                      ArrayRef<OpRegBankEntry<NumOps>> Table) const;
+
+  RegisterBankInfo::InstructionMappings
+  getInstrAlternativeMappingsIntrinsic(
+      const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
+
+  RegisterBankInfo::InstructionMappings
+  getInstrAlternativeMappingsIntrinsicWSideEffects(
+      const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
+
   bool isSALUMapping(const MachineInstr &MI) const;
   const InstructionMapping &getDefaultMappingSOP(const MachineInstr &MI) const;
   const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const;
@@ -57,6 +96,9 @@ public:
   unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
                     unsigned Size) const override;
 
+  unsigned getBreakDownCost(const ValueMapping &ValMapping,
+                            const RegisterBank *CurBank = nullptr) const override;
+
   const RegisterBank &
   getRegBankFromRegClass(const TargetRegisterClass &RC) const override;
 
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/lib/Target/AMDGPU/AMDGPURegisterBanks.td
index 570379a820e1..9555694fb106 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterBanks.td
+++ b/lib/Target/AMDGPU/AMDGPURegisterBanks.td
@@ -1,9 +1,8 @@
 //=- AMDGPURegisterBank.td - Describe the AMDGPU Banks -------*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,7 +14,7 @@ def VGPRRegBank : RegisterBank<"VGPR",
   [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512]
 >;
 
-def SCCRegBank : RegisterBank <"SCC", [SCC_CLASS]>;
+def SCCRegBank : RegisterBank <"SCC", [SReg_32, SCC_CLASS]>;
 
 // It is helpful to distinguish conditions from ordinary SGPRs.
 def VCCRegBank : RegisterBank <"VCC", [SReg_64]>;
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
index 50f859addc2b..7cffdf1a4dcf 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -32,7 +31,10 @@ unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) {
     AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
     AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, AMDGPU::sub8, AMDGPU::sub9,
     AMDGPU::sub10, AMDGPU::sub11, AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14,
-    AMDGPU::sub15
+    AMDGPU::sub15, AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
+    AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23, AMDGPU::sub24,
+    AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27, AMDGPU::sub28, AMDGPU::sub29,
+    AMDGPU::sub30, AMDGPU::sub31
   };
 
   assert(Channel < array_lengthof(SubRegs));
@@ -83,7 +85,18 @@ const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
   }
 }
 
-unsigned SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const SIFrameLowering *TFI =
+      MF.getSubtarget<GCNSubtarget>().getFrameLowering();
   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
-  return FuncInfo->getFrameOffsetReg();
+  return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()
+                        : FuncInfo->getStackPtrOffsetReg();
+}
+
+const uint32_t *SIRegisterInfo::getAllVGPRRegMask() const {
+  return CSR_AMDGPU_AllVGPRs_RegMask;
+}
+
+const uint32_t *SIRegisterInfo::getAllAllocatableSRegMask() const {
+  return CSR_AMDGPU_AllAllocatableSRegs_RegMask;
 }
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/lib/Target/AMDGPU/AMDGPURegisterInfo.h
index 922d974f2ebd..3453a8c1b0b3 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.h
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.h
@@ -1,9 +1,8 @@
 //===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.td b/lib/Target/AMDGPU/AMDGPURegisterInfo.td
index ceabae524414..ab71b7aa8a57 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.td
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.td
@@ -1,9 +1,8 @@
 //===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,7 +12,7 @@
 
 let Namespace = "AMDGPU" in {
 
-foreach Index = 0-15 in {
+foreach Index = 0-31 in {
   def sub#Index : SubRegIndex<32, !shl(Index, 5)>;
 }
 
diff --git a/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
index efe501cb73c2..4f095087a57f 100644
--- a/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
+++ b/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPURewriteOutArgumentsPass.cpp - Create struct returns ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index 9dbd7751b4d8..f8703c36127a 100644
--- a/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -1,9 +1,8 @@
 //===-- AMDGPUSearchableTables.td - ------------------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -49,6 +48,8 @@ def : SourceOfDivergence<int_amdgcn_workitem_id_z>;
 def : SourceOfDivergence<int_amdgcn_interp_mov>;
 def : SourceOfDivergence<int_amdgcn_interp_p1>;
 def : SourceOfDivergence<int_amdgcn_interp_p2>;
+def : SourceOfDivergence<int_amdgcn_interp_p1_f16>;
+def : SourceOfDivergence<int_amdgcn_interp_p2_f16>;
 def : SourceOfDivergence<int_amdgcn_mbcnt_hi>;
 def : SourceOfDivergence<int_amdgcn_mbcnt_lo>;
 def : SourceOfDivergence<int_r600_read_tidig_x>;
@@ -70,8 +71,59 @@ def : SourceOfDivergence<int_amdgcn_buffer_atomic_and>;
 def : SourceOfDivergence<int_amdgcn_buffer_atomic_or>;
 def : SourceOfDivergence<int_amdgcn_buffer_atomic_xor>;
 def : SourceOfDivergence<int_amdgcn_buffer_atomic_cmpswap>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_swap>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_add>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_sub>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_smin>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_umin>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_smax>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_umax>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_and>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_or>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_xor>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_cmpswap>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_swap>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_add>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_sub>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_smin>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_umin>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_smax>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_umax>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_and>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_or>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_xor>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cmpswap>;
 def : SourceOfDivergence<int_amdgcn_ps_live>;
 def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
+def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
+def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>;
+def : SourceOfDivergence<int_amdgcn_permlane16>;
+def : SourceOfDivergence<int_amdgcn_permlanex16>;
+def : SourceOfDivergence<int_amdgcn_mov_dpp>;
+def : SourceOfDivergence<int_amdgcn_mov_dpp8>;
+def : SourceOfDivergence<int_amdgcn_update_dpp>;
+
+def : SourceOfDivergence<int_amdgcn_mfma_f32_4x4x1f32>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_4x4x1f32>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_4x4x4f16>;
+def : SourceOfDivergence<int_amdgcn_mfma_i32_4x4x4i8>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_4x4x2bf16>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x1f32>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x4f32>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x4f16>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x16f16>;
+def : SourceOfDivergence<int_amdgcn_mfma_i32_16x16x4i8>;
+def : SourceOfDivergence<int_amdgcn_mfma_i32_16x16x16i8>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x2bf16>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x8bf16>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x1f32>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x2f32>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x4f16>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x8f16>;
+def : SourceOfDivergence<int_amdgcn_mfma_i32_32x32x4i8>;
+def : SourceOfDivergence<int_amdgcn_mfma_i32_32x32x8i8>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x2bf16>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x4bf16>;
 
 foreach intr = AMDGPUImageDimAtomicIntrinsics in
 def : SourceOfDivergence<intr>;
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index ed0cc70c3d9a..1eb9b83456c5 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -41,12 +40,17 @@ using namespace llvm;
 #undef AMDGPUSubtarget
 #include "R600GenSubtargetInfo.inc"
 
+static cl::opt<bool> DisablePowerSched(
+  "amdgpu-disable-power-sched",
+  cl::desc("Disable scheduling to minimize mAI power bursts"),
+  cl::init(false));
+
 GCNSubtarget::~GCNSubtarget() = default;
 
 R600Subtarget &
 R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
                                                StringRef GPU, StringRef FS) {
-  SmallString<256> FullFS("+promote-alloca,+dx10-clamp,");
+  SmallString<256> FullFS("+promote-alloca,");
   FullFS += FS;
   ParseSubtargetFeatures(GPU, FullFS);
 
@@ -65,7 +69,7 @@ R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
 
 GCNSubtarget &
 GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
-                                                 StringRef GPU, StringRef FS) {
+                                              StringRef GPU, StringRef FS) {
   // Determine default and user-specified characteristics
   // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
   // enabled, but some instructions do not respect them and they run at the
@@ -78,10 +82,11 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
   // Similarly we want enable-prt-strict-null to be on by default and not to
   // unset everything else if it is disabled
 
-  SmallString<256> FullFS("+promote-alloca,+dx10-clamp,+load-store-opt,");
+  // Assuming ECC is enabled is the conservative default.
+  SmallString<256> FullFS("+promote-alloca,+load-store-opt,+sram-ecc,+xnack,");
 
   if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
-    FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,";
+    FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
 
   // FIXME: I don't think think Evergreen has any useful support for
   // denormals, but should be checked. Should we issue a warning somewhere
@@ -94,6 +99,16 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
 
   FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
 
+  // Disable mutually exclusive bits.
+  if (FS.find_lower("+wavefrontsize") != StringRef::npos) {
+    if (FS.find_lower("wavefrontsize16") == StringRef::npos)
+      FullFS += "-wavefrontsize16,";
+    if (FS.find_lower("wavefrontsize32") == StringRef::npos)
+      FullFS += "-wavefrontsize32,";
+    if (FS.find_lower("wavefrontsize64") == StringRef::npos)
+      FullFS += "-wavefrontsize64,";
+  }
+
   FullFS += FS;
 
   ParseSubtargetFeatures(GPU, FullFS);
@@ -124,8 +139,25 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
       HasMovrel = true;
   }
 
+  // Don't crash on invalid devices.
+  if (WavefrontSize == 0)
+    WavefrontSize = 64;
+
   HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
 
+  if (DoesNotSupportXNACK && EnableXNACK) {
+    ToggleFeature(AMDGPU::FeatureXNACK);
+    EnableXNACK = false;
+  }
+
+  // ECC is on by default, but turn it off if the hardware doesn't support it
+  // anyway. This matters for the gfx9 targets with d16 loads, but don't support
+  // ECC.
+  if (DoesNotSupportSRAMECC && EnableSRAMECC) {
+    ToggleFeature(AMDGPU::FeatureSRAMECC);
+    EnableSRAMECC = false;
+  }
+
   return *this;
 }
 
@@ -152,8 +184,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
     AMDGPUGenSubtargetInfo(TT, GPU, FS),
     AMDGPUSubtarget(TT),
     TargetTriple(TT),
-    Gen(SOUTHERN_ISLANDS),
-    IsaVersion(ISAVersion0_0_0),
+    Gen(TT.getOS() == Triple::AMDHSA ? SEA_ISLANDS : SOUTHERN_ISLANDS),
     InstrItins(getInstrItineraryForCPU(GPU)),
     LDSBankCount(0),
     MaxPrivateElementSize(0),
@@ -162,7 +193,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
     HalfRate64Ops(false),
 
     FP64FP16Denormals(false),
-    DX10Clamp(false),
     FlatForGlobal(false),
     AutoWaitcntBeforeBarrier(false),
     CodeObjectV3(false),
@@ -171,11 +201,10 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
 
     HasApertureRegs(false),
     EnableXNACK(false),
+    DoesNotSupportXNACK(false),
+    EnableCuMode(false),
     TrapHandler(false),
-    DebuggerInsertNops(false),
-    DebuggerEmitPrologue(false),
 
-    EnableHugePrivateBuffer(false),
     EnableLoadStoreOpt(false),
     EnableUnsafeDSOffsetFolding(false),
     EnableSIScheduler(false),
@@ -186,8 +215,10 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
     FP64(false),
     GCN3Encoding(false),
     CIInsts(false),
-    VIInsts(false),
+    GFX8Insts(false),
     GFX9Insts(false),
+    GFX10Insts(false),
+    GFX7GFX8GFX9Insts(false),
     SGPRInitBug(false),
     HasSMemRealTime(false),
     HasIntClamp(false),
@@ -202,19 +233,47 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
     HasSDWAMac(false),
     HasSDWAOutModsVOPC(false),
     HasDPP(false),
+    HasDPP8(false),
     HasR128A16(false),
+    HasNSAEncoding(false),
     HasDLInsts(false),
-    HasDotInsts(false),
+    HasDot1Insts(false),
+    HasDot2Insts(false),
+    HasDot3Insts(false),
+    HasDot4Insts(false),
+    HasDot5Insts(false),
+    HasDot6Insts(false),
+    HasMAIInsts(false),
+    HasPkFmacF16Inst(false),
+    HasAtomicFaddInsts(false),
     EnableSRAMECC(false),
+    DoesNotSupportSRAMECC(false),
+    HasNoSdstCMPX(false),
+    HasVscnt(false),
+    HasRegisterBanking(false),
+    HasVOP3Literal(false),
+    HasNoDataDepHazard(false),
     FlatAddressSpace(false),
     FlatInstOffsets(false),
     FlatGlobalInsts(false),
     FlatScratchInsts(false),
+    ScalarFlatScratchInsts(false),
     AddNoCarryInsts(false),
     HasUnpackedD16VMem(false),
+    LDSMisalignedBug(false),
 
     ScalarizeGlobal(false),
 
+    HasVcmpxPermlaneHazard(false),
+    HasVMEMtoScalarWriteHazard(false),
+    HasSMEMtoVectorWriteHazard(false),
+    HasInstFwdPrefetchBug(false),
+    HasVcmpxExecWARHazard(false),
+    HasLdsBranchVmemWARHazard(false),
+    HasNSAtoVMEMBug(false),
+    HasOffset3fBug(false),
+    HasFlatSegmentOffsetBug(false),
+
     FeatureDisable(false),
     InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
     TLInfo(TM, *this),
@@ -226,12 +285,34 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
   *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
 }
 
+unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const {
+  if (getGeneration() < GFX10)
+    return 1;
+
+  switch (Opcode) {
+  case AMDGPU::V_LSHLREV_B64:
+  case AMDGPU::V_LSHLREV_B64_gfx10:
+  case AMDGPU::V_LSHL_B64:
+  case AMDGPU::V_LSHRREV_B64:
+  case AMDGPU::V_LSHRREV_B64_gfx10:
+  case AMDGPU::V_LSHR_B64:
+  case AMDGPU::V_ASHRREV_I64:
+  case AMDGPU::V_ASHRREV_I64_gfx10:
+  case AMDGPU::V_ASHR_I64:
+    return 1;
+  }
+
+  return 2;
+}
+
 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
   const Function &F) const {
   if (NWaves == 1)
     return getLocalMemorySize();
   unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
   unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
+  if (!WorkGroupsPerCu)
+    return 0;
   unsigned MaxWaves = getMaxWavesPerEU();
   return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
 }
@@ -240,6 +321,8 @@ unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
   const Function &F) const {
   unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
   unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
+  if (!WorkGroupsPerCu)
+    return 0;
   unsigned MaxWaves = getMaxWavesPerEU();
   unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu;
   unsigned NumWaves = Limit / (Bytes ? Bytes : 1u);
@@ -260,7 +343,8 @@ AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
   case CallingConv::AMDGPU_CS:
   case CallingConv::AMDGPU_KERNEL:
   case CallingConv::SPIR_KERNEL:
-    return std::make_pair(getWavefrontSize() * 2, getWavefrontSize() * 4);
+    return std::make_pair(getWavefrontSize() * 2,
+                          std::max(getWavefrontSize() * 4, 256u));
   case CallingConv::AMDGPU_VS:
   case CallingConv::AMDGPU_LS:
   case CallingConv::AMDGPU_HS:
@@ -280,12 +364,6 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
   std::pair<unsigned, unsigned> Default =
     getDefaultFlatWorkGroupSize(F.getCallingConv());
 
-  // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
-  // starts using "amdgpu-flat-work-group-size" attribute.
-  Default.second = AMDGPU::getIntegerAttribute(
-    F, "amdgpu-max-work-group-size", Default.second);
-  Default.first = std::min(Default.first, Default.second);
-
   // Requested minimum/maximum flat work group sizes.
   std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
     F, "amdgpu-flat-work-group-size", Default);
@@ -319,10 +397,7 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
     getMaxWavesPerEU(FlatWorkGroupSizes.second);
   bool RequestedFlatWorkGroupSize = false;
 
-  // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
-  // starts using "amdgpu-flat-work-group-size" attribute.
-  if (F.hasFnAttribute("amdgpu-max-work-group-size") ||
-      F.hasFnAttribute("amdgpu-flat-work-group-size")) {
+  if (F.hasFnAttribute("amdgpu-flat-work-group-size")) {
     Default.first = MinImpliedByFlatWorkGroupSize;
     RequestedFlatWorkGroupSize = true;
   }
@@ -460,7 +535,6 @@ R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
   FMA(false),
   CaymanISA(false),
   CFALUBug(false),
-  DX10Clamp(false),
   HasVertexCache(false),
   R600ALUInst(false),
   FP64(false),
@@ -486,7 +560,14 @@ void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
     Policy.ShouldTrackLaneMasks = true;
 }
 
+bool GCNSubtarget::hasMadF16() const {
+  return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16) != -1;
+}
+
 unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
+  if (getGeneration() >= AMDGPUSubtarget::GFX10)
+    return 10;
+
   if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
     if (SGPRs <= 80)
       return 10;
@@ -533,6 +614,9 @@ unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
 
 unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+  if (getGeneration() >= AMDGPUSubtarget::GFX10)
+    return 2; // VCC. FLAT_SCRATCH and XNACK are no longer in SGPRs.
+
   if (MFI.hasFlatScratchInit()) {
     if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
       return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
@@ -631,9 +715,7 @@ struct MemOpClusterMutation : ScheduleDAGMutation {
 
   MemOpClusterMutation(const SIInstrInfo *tii) : TII(tii) {}
 
-  void apply(ScheduleDAGInstrs *DAGInstrs) override {
-    ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
-
+  void apply(ScheduleDAGInstrs *DAG) override {
     SUnit *SUa = nullptr;
     // Search for two consequent memory operations and link them
     // to prevent scheduler from moving them apart.
@@ -674,11 +756,130 @@ struct MemOpClusterMutation : ScheduleDAGMutation {
     }
   }
 };
+
+struct FillMFMAShadowMutation : ScheduleDAGMutation {
+  const SIInstrInfo *TII;
+
+  ScheduleDAGMI *DAG;
+
+  FillMFMAShadowMutation(const SIInstrInfo *tii) : TII(tii) {}
+
+  bool isSALU(const SUnit *SU) const {
+    const MachineInstr *MI = SU->getInstr();
+    return MI && TII->isSALU(*MI) && !MI->isTerminator();
+  }
+
+  bool canAddEdge(const SUnit *Succ, const SUnit *Pred) const {
+    if (Pred->NodeNum < Succ->NodeNum)
+      return true;
+
+    SmallVector<const SUnit*, 64> Succs({Succ}), Preds({Pred});
+
+    for (unsigned I = 0; I < Succs.size(); ++I) {
+      for (const SDep &SI : Succs[I]->Succs) {
+        const SUnit *SU = SI.getSUnit();
+        if (SU != Succs[I] && llvm::find(Succs, SU) == Succs.end())
+          Succs.push_back(SU);
+      }
+    }
+
+    SmallPtrSet<const SUnit*, 32> Visited;
+    while (!Preds.empty()) {
+      const SUnit *SU = Preds.pop_back_val();
+      if (llvm::find(Succs, SU) != Succs.end())
+        return false;
+      Visited.insert(SU);
+      for (const SDep &SI : SU->Preds)
+        if (SI.getSUnit() != SU && !Visited.count(SI.getSUnit()))
+          Preds.push_back(SI.getSUnit());
+    }
+
+    return true;
+  }
+
+  // Link as much SALU intructions in chain as possible. Return the size
+  // of the chain. Links up to MaxChain instructions.
+  unsigned linkSALUChain(SUnit *From, SUnit *To, unsigned MaxChain,
+                         SmallPtrSetImpl<SUnit *> &Visited) const {
+    SmallVector<SUnit *, 8> Worklist({To});
+    unsigned Linked = 0;
+
+    while (!Worklist.empty() && MaxChain-- > 0) {
+      SUnit *SU = Worklist.pop_back_val();
+      if (!Visited.insert(SU).second)
+        continue;
+
+      LLVM_DEBUG(dbgs() << "Inserting edge from\n" ; DAG->dumpNode(*From);
+                 dbgs() << "to\n"; DAG->dumpNode(*SU); dbgs() << '\n');
+
+      if (SU->addPred(SDep(From, SDep::Artificial), false))
+        ++Linked;
+
+      for (SDep &SI : From->Succs) {
+        SUnit *SUv = SI.getSUnit();
+        if (SUv != From && TII->isVALU(*SUv->getInstr()) && canAddEdge(SUv, SU))
+          SUv->addPred(SDep(SU, SDep::Artificial), false);
+      }
+
+      for (SDep &SI : SU->Succs) {
+        SUnit *Succ = SI.getSUnit();
+        if (Succ != SU && isSALU(Succ) && canAddEdge(From, Succ))
+          Worklist.push_back(Succ);
+      }
+    }
+
+    return Linked;
+  }
+
+  void apply(ScheduleDAGInstrs *DAGInstrs) override {
+    const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>();
+    if (!ST.hasMAIInsts() || DisablePowerSched)
+      return;
+    DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+    const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel();
+    if (!TSchedModel || DAG->SUnits.empty())
+      return;
+
+    // Scan for MFMA long latency instructions and try to add a dependency
+    // of available SALU instructions to give them a chance to fill MFMA
+    // shadow. That is desirable to fill MFMA shadow with SALU instructions
+    // rather than VALU to prevent power consumption bursts and throttle.
+    auto LastSALU = DAG->SUnits.begin();
+    auto E = DAG->SUnits.end();
+    SmallPtrSet<SUnit*, 32> Visited;
+    for (SUnit &SU : DAG->SUnits) {
+      MachineInstr &MAI = *SU.getInstr();
+      if (!TII->isMAI(MAI) ||
+           MAI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32 ||
+           MAI.getOpcode() == AMDGPU::V_ACCVGPR_READ_B32)
+        continue;
+
+      unsigned Lat = TSchedModel->computeInstrLatency(&MAI) - 1;
+
+      LLVM_DEBUG(dbgs() << "Found MFMA: "; DAG->dumpNode(SU);
+                 dbgs() << "Need " << Lat
+                        << " instructions to cover latency.\n");
+
+      // Find up to Lat independent scalar instructions as early as
+      // possible such that they can be scheduled after this MFMA.
+      for ( ; Lat && LastSALU != E; ++LastSALU) {
+        if (Visited.count(&*LastSALU))
+          continue;
+
+        if (!isSALU(&*LastSALU) || !canAddEdge(&*LastSALU, &SU))
+          continue;
+
+        Lat -= linkSALUChain(&SU, &*LastSALU, Lat, Visited);
+      }
+    }
+  }
+};
 } // namespace
 
 void GCNSubtarget::getPostRAMutations(
     std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
   Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo));
+  Mutations.push_back(llvm::make_unique<FillMFMAShadowMutation>(&InstrInfo));
 }
 
 const AMDGPUSubtarget &AMDGPUSubtarget::get(const MachineFunction &MF) {
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 5584759e5580..78c3b823946d 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -1,9 +1,8 @@
 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //==-----------------------------------------------------------------------===//
 //
@@ -56,7 +55,8 @@ public:
     SOUTHERN_ISLANDS = 4,
     SEA_ISLANDS = 5,
     VOLCANIC_ISLANDS = 6,
-    GFX9 = 7
+    GFX9 = 7,
+    GFX10 = 8
   };
 
 private:
@@ -246,26 +246,6 @@ public:
 class GCNSubtarget : public AMDGPUGenSubtargetInfo,
                      public AMDGPUSubtarget {
 public:
-  enum {
-    ISAVersion0_0_0,
-    ISAVersion6_0_0,
-    ISAVersion6_0_1,
-    ISAVersion7_0_0,
-    ISAVersion7_0_1,
-    ISAVersion7_0_2,
-    ISAVersion7_0_3,
-    ISAVersion7_0_4,
-    ISAVersion8_0_1,
-    ISAVersion8_0_2,
-    ISAVersion8_0_3,
-    ISAVersion8_1_0,
-    ISAVersion9_0_0,
-    ISAVersion9_0_2,
-    ISAVersion9_0_4,
-    ISAVersion9_0_6,
-    ISAVersion9_0_9,
-  };
-
   enum TrapHandlerAbi {
     TrapHandlerAbiNone = 0,
     TrapHandlerAbiHsa = 1
@@ -297,7 +277,6 @@ protected:
   // Basic subtarget description.
   Triple TargetTriple;
   unsigned Gen;
-  unsigned IsaVersion;
   InstrItineraryData InstrItins;
   int LDSBankCount;
   unsigned MaxPrivateElementSize;
@@ -308,7 +287,6 @@ protected:
 
   // Dynamially set bits that enable features.
   bool FP64FP16Denormals;
-  bool DX10Clamp;
   bool FlatForGlobal;
   bool AutoWaitcntBeforeBarrier;
   bool CodeObjectV3;
@@ -316,12 +294,11 @@ protected:
   bool UnalignedBufferAccess;
   bool HasApertureRegs;
   bool EnableXNACK;
+  bool DoesNotSupportXNACK;
+  bool EnableCuMode;
   bool TrapHandler;
-  bool DebuggerInsertNops;
-  bool DebuggerEmitPrologue;
 
   // Used as options.
-  bool EnableHugePrivateBuffer;
   bool EnableLoadStoreOpt;
   bool EnableUnsafeDSOffsetFolding;
   bool EnableSIScheduler;
@@ -336,8 +313,10 @@ protected:
   bool IsGCN;
   bool GCN3Encoding;
   bool CIInsts;
-  bool VIInsts;
+  bool GFX8Insts;
   bool GFX9Insts;
+  bool GFX10Insts;
+  bool GFX7GFX8GFX9Insts;
   bool SGPRInitBug;
   bool HasSMemRealTime;
   bool HasIntClamp;
@@ -352,23 +331,51 @@ protected:
   bool HasSDWAMac;
   bool HasSDWAOutModsVOPC;
   bool HasDPP;
+  bool HasDPP8;
   bool HasR128A16;
+  bool HasNSAEncoding;
   bool HasDLInsts;
-  bool HasDotInsts;
+  bool HasDot1Insts;
+  bool HasDot2Insts;
+  bool HasDot3Insts;
+  bool HasDot4Insts;
+  bool HasDot5Insts;
+  bool HasDot6Insts;
+  bool HasMAIInsts;
+  bool HasPkFmacF16Inst;
+  bool HasAtomicFaddInsts;
   bool EnableSRAMECC;
+  bool DoesNotSupportSRAMECC;
+  bool HasNoSdstCMPX;
+  bool HasVscnt;
+  bool HasRegisterBanking;
+  bool HasVOP3Literal;
+  bool HasNoDataDepHazard;
   bool FlatAddressSpace;
   bool FlatInstOffsets;
   bool FlatGlobalInsts;
   bool FlatScratchInsts;
+  bool ScalarFlatScratchInsts;
   bool AddNoCarryInsts;
   bool HasUnpackedD16VMem;
   bool R600ALUInst;
   bool CaymanISA;
   bool CFALUBug;
+  bool LDSMisalignedBug;
   bool HasVertexCache;
   short TexVTXClauseSize;
   bool ScalarizeGlobal;
 
+  bool HasVcmpxPermlaneHazard;
+  bool HasVMEMtoScalarWriteHazard;
+  bool HasSMEMtoVectorWriteHazard;
+  bool HasInstFwdPrefetchBug;
+  bool HasVcmpxExecWARHazard;
+  bool HasLdsBranchVmemWARHazard;
+  bool HasNSAtoVMEMBug;
+  bool HasOffset3fBug;
+  bool HasFlatSegmentOffsetBug;
+
   // Dummy feature to use for assembler in tablegen.
   bool FeatureDisable;
 
@@ -378,6 +385,9 @@ private:
   SITargetLowering TLInfo;
   SIFrameLowering FrameLowering;
 
+  // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
+  static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
+
 public:
   GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
                const GCNTargetMachine &TM);
@@ -437,6 +447,11 @@ public:
     return Log2_32(WavefrontSize);
   }
 
+  /// Return the number of high bits known to be zero fror a frame index.
+  unsigned getKnownHighZeroBitsForFrameIndex() const {
+    return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
+  }
+
   int getLDSBankCount() const {
     return LDSBankCount;
   }
@@ -445,6 +460,8 @@ public:
     return MaxPrivateElementSize;
   }
 
+  unsigned getConstantBusLimit(unsigned Opcode) const;
+
   bool hasIntClamp() const {
     return HasIntClamp;
   }
@@ -473,6 +490,12 @@ public:
     return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
   }
 
+  // Return true if the target only has the reverse operand versions of VALU
+  // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
+  bool hasOnlyRevVALUShifts() const {
+    return getGeneration() >= VOLCANIC_ISLANDS;
+  }
+
   bool hasBFE() const {
     return true;
   }
@@ -525,14 +548,48 @@ public:
     return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
   }
 
-  bool enableHugePrivateBuffer() const {
-    return EnableHugePrivateBuffer;
+  /// True if the offset field of DS instructions works as expected. On SI, the
+  /// offset uses a 16-bit adder and does not always wrap properly.
+  bool hasUsableDSOffset() const {
+    return getGeneration() >= SEA_ISLANDS;
   }
 
   bool unsafeDSOffsetFoldingEnabled() const {
     return EnableUnsafeDSOffsetFolding;
   }
 
+  /// Condition output from div_scale is usable.
+  bool hasUsableDivScaleConditionOutput() const {
+    return getGeneration() != SOUTHERN_ISLANDS;
+  }
+
+  /// Extra wait hazard is needed in some cases before
+  /// s_cbranch_vccnz/s_cbranch_vccz.
+  bool hasReadVCCZBug() const {
+    return getGeneration() <= SEA_ISLANDS;
+  }
+
+  /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
+  /// was written by a VALU instruction.
+  bool hasSMRDReadVALUDefHazard() const {
+    return getGeneration() == SOUTHERN_ISLANDS;
+  }
+
+  /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
+  /// SGPR was written by a VALU Instruction.
+  bool hasVMEMReadSGPRVALUDefHazard() const {
+    return getGeneration() >= VOLCANIC_ISLANDS;
+  }
+
+  bool hasRFEHazards() const {
+    return getGeneration() >= VOLCANIC_ISLANDS;
+  }
+
+  /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
+  unsigned getSetRegWaitStates() const {
+    return getGeneration() <= SEA_ISLANDS ? 1 : 2;
+  }
+
   bool dumpCode() const {
     return DumpCode;
   }
@@ -554,14 +611,6 @@ public:
     return getGeneration() >= AMDGPUSubtarget::GFX9;
   }
 
-  bool enableDX10Clamp() const {
-    return DX10Clamp;
-  }
-
-  bool enableIEEEBit(const MachineFunction &MF) const {
-    return AMDGPU::isCompute(MF.getFunction().getCallingConv());
-  }
-
   bool useFlatForGlobal() const {
     return FlatForGlobal;
   }
@@ -572,6 +621,11 @@ public:
     return CIInsts && EnableDS128;
   }
 
+  /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
+  bool haveRoundOpsF64() const {
+    return CIInsts;
+  }
+
   /// \returns If MUBUF instructions always perform range checking, even for
   /// buffer resources used for private memory access.
   bool privateMemoryResourceIsRangeChecked() const {
@@ -613,10 +667,18 @@ public:
     return EnableXNACK;
   }
 
+  bool isCuModeEnabled() const {
+    return EnableCuMode;
+  }
+
   bool hasFlatAddressSpace() const {
     return FlatAddressSpace;
   }
 
+  bool hasFlatScrRegister() const {
+    return hasFlatAddressSpace();
+  }
+
   bool hasFlatInstOffsets() const {
     return FlatInstOffsets;
   }
@@ -629,6 +691,14 @@ public:
     return FlatScratchInsts;
   }
 
+  bool hasScalarFlatScratchInsts() const {
+    return ScalarFlatScratchInsts;
+  }
+
+  bool hasFlatSegmentOffsetBug() const {
+    return HasFlatSegmentOffsetBug;
+  }
+
   bool hasFlatLgkmVMemCountInOrder() const {
     return getGeneration() > GFX9;
   }
@@ -637,12 +707,34 @@ public:
     return getGeneration() >= GFX9;
   }
 
+  bool d16PreservesUnusedBits() const {
+    return hasD16LoadStore() && !isSRAMECCEnabled();
+  }
+
+  bool hasD16Images() const {
+    return getGeneration() >= VOLCANIC_ISLANDS;
+  }
+
   /// Return if most LDS instructions have an m0 use that require m0 to be
   /// iniitalized.
   bool ldsRequiresM0Init() const {
     return getGeneration() < GFX9;
   }
 
+  // True if the hardware rewinds and replays GWS operations if a wave is
+  // preempted.
+  //
+  // If this is false, a GWS operation requires testing if a nack set the
+  // MEM_VIOL bit, and repeating if so.
+  bool hasGWSAutoReplay() const {
+    return getGeneration() >= GFX9;
+  }
+
+  /// \returns if target has ds_gws_sema_release_all instruction.
+  bool hasGWSSemaReleaseAll() const {
+    return CIInsts;
+  }
+
   bool hasAddNoCarry() const {
     return AddNoCarryInsts;
   }
@@ -680,22 +772,74 @@ public:
     return HasSDWAOutModsVOPC;
   }
 
-  bool vmemWriteNeedsExpWaitcnt() const {
-    return getGeneration() < SEA_ISLANDS;
-  }
-
   bool hasDLInsts() const {
     return HasDLInsts;
   }
 
-  bool hasDotInsts() const {
-    return HasDotInsts;
+  bool hasDot1Insts() const {
+    return HasDot1Insts;
+  }
+
+  bool hasDot2Insts() const {
+    return HasDot2Insts;
+  }
+
+  bool hasDot3Insts() const {
+    return HasDot3Insts;
+  }
+
+  bool hasDot4Insts() const {
+    return HasDot4Insts;
+  }
+
+  bool hasDot5Insts() const {
+    return HasDot5Insts;
+  }
+
+  bool hasDot6Insts() const {
+    return HasDot6Insts;
+  }
+
+  bool hasMAIInsts() const {
+    return HasMAIInsts;
+  }
+
+  bool hasPkFmacF16Inst() const {
+    return HasPkFmacF16Inst;
+  }
+
+  bool hasAtomicFaddInsts() const {
+    return HasAtomicFaddInsts;
   }
 
   bool isSRAMECCEnabled() const {
     return EnableSRAMECC;
   }
 
+  bool hasNoSdstCMPX() const {
+    return HasNoSdstCMPX;
+  }
+
+  bool hasVscnt() const {
+    return HasVscnt;
+  }
+
+  bool hasRegisterBanking() const {
+    return HasRegisterBanking;
+  }
+
+  bool hasVOP3Literal() const {
+    return HasVOP3Literal;
+  }
+
+  bool hasNoDataDepHazard() const {
+    return HasNoDataDepHazard;
+  }
+
+  bool vmemWriteNeedsExpWaitcnt() const {
+    return getGeneration() < SEA_ISLANDS;
+  }
+
   // Scratch is allocated in 256 dword per wave blocks for the entire
   // wavefront. When viewed from the perspecive of an arbitrary workitem, this
   // is 4-byte aligned.
@@ -792,29 +936,34 @@ public:
     return HasScalarAtomics;
   }
 
+  bool hasLDSFPAtomics() const {
+    return GFX8Insts;
+  }
 
   bool hasDPP() const {
     return HasDPP;
   }
 
+  bool hasDPP8() const {
+    return HasDPP8;
+  }
+
   bool hasR128A16() const {
     return HasR128A16;
   }
 
-  bool enableSIScheduler() const {
-    return EnableSIScheduler;
+  bool hasOffset3fBug() const {
+    return HasOffset3fBug;
   }
 
-  bool debuggerSupported() const {
-    return debuggerInsertNops() && debuggerEmitPrologue();
+  bool hasNSAEncoding() const {
+    return HasNSAEncoding;
   }
 
-  bool debuggerInsertNops() const {
-    return DebuggerInsertNops;
-  }
+  bool hasMadF16() const;
 
-  bool debuggerEmitPrologue() const {
-    return DebuggerEmitPrologue;
+  bool enableSIScheduler() const {
+    return EnableSIScheduler;
   }
 
   bool loadStoreOptEnabled() const {
@@ -835,15 +984,48 @@ public:
   }
 
   bool hasSMovFedHazard() const {
-    return getGeneration() >= AMDGPUSubtarget::GFX9;
+    return getGeneration() == AMDGPUSubtarget::GFX9;
   }
 
   bool hasReadM0MovRelInterpHazard() const {
-    return getGeneration() >= AMDGPUSubtarget::GFX9;
+    return getGeneration() == AMDGPUSubtarget::GFX9;
   }
 
   bool hasReadM0SendMsgHazard() const {
-    return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
+    return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
+           getGeneration() <= AMDGPUSubtarget::GFX9;
+  }
+
+  bool hasVcmpxPermlaneHazard() const {
+    return HasVcmpxPermlaneHazard;
+  }
+
+  bool hasVMEMtoScalarWriteHazard() const {
+    return HasVMEMtoScalarWriteHazard;
+  }
+
+  bool hasSMEMtoVectorWriteHazard() const {
+    return HasSMEMtoVectorWriteHazard;
+  }
+
+  bool hasLDSMisalignedBug() const {
+    return LDSMisalignedBug && !EnableCuMode;
+  }
+
+  bool hasInstFwdPrefetchBug() const {
+    return HasInstFwdPrefetchBug;
+  }
+
+  bool hasVcmpxExecWARHazard() const {
+    return HasVcmpxExecWARHazard;
+  }
+
+  bool hasLdsBranchVmemWARHazard() const {
+    return HasLdsBranchVmemWARHazard;
+  }
+
+  bool hasNSAtoVMEMBug() const {
+    return HasNSAtoVMEMBug;
   }
 
   /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
@@ -957,6 +1139,14 @@ public:
       std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
       const override;
 
+  bool isWave32() const {
+    return WavefrontSize == 32;
+  }
+
+  const TargetRegisterClass *getBoolRC() const {
+    return getRegisterInfo()->getBoolRC();
+  }
+
   /// \returns Maximum number of work groups per compute unit supported by the
   /// subtarget and limited by given \p FlatWorkGroupSize.
   unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
@@ -994,7 +1184,6 @@ private:
   bool FMA;
   bool CaymanISA;
   bool CFALUBug;
-  bool DX10Clamp;
   bool HasVertexCache;
   bool R600ALUInst;
   bool FP64;
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index e8cefdbf74b9..0ea8db04c298 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -25,11 +24,14 @@
 #include "GCNIterativeScheduler.h"
 #include "GCNSchedStrategy.h"
 #include "R600MachineScheduler.h"
+#include "SIMachineFunctionInfo.h"
 #include "SIMachineScheduler.h"
+#include "TargetInfo/AMDGPUTargetInfo.h"
 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#include "llvm/CodeGen/MIRParser/MIParser.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/Attributes.h"
@@ -67,6 +69,11 @@ EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
                         cl::desc("Run early if-conversion"),
                         cl::init(false));
 
+static cl::opt<bool>
+OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
+            cl::desc("Run pre-RA exec mask optimizations"),
+            cl::init(true));
+
 static cl::opt<bool> EnableR600IfConvert(
   "r600-if-convert",
   cl::desc("Use if conversion pass"),
@@ -109,7 +116,7 @@ static cl::opt<bool> EnableSDWAPeephole(
 static cl::opt<bool> EnableDPPCombine(
   "amdgpu-dpp-combine",
   cl::desc("Enable DPP combiner"),
-  cl::init(false));
+  cl::init(true));
 
 // Enable address space based alias analysis
 static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
@@ -123,11 +130,11 @@ static cl::opt<bool, true> LateCFGStructurize(
   cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG),
   cl::Hidden);
 
-static cl::opt<bool, true> EnableAMDGPUFunctionCalls(
+static cl::opt<bool, true> EnableAMDGPUFunctionCallsOpt(
   "amdgpu-function-calls",
   cl::desc("Enable AMDGPU function call support"),
   cl::location(AMDGPUTargetMachine::EnableFunctionCalls),
-  cl::init(false),
+  cl::init(true),
   cl::Hidden);
 
 // Enable lib calls simplifications
@@ -143,6 +150,12 @@ static cl::opt<bool> EnableLowerKernelArguments(
   cl::init(true),
   cl::Hidden);
 
+static cl::opt<bool> EnableRegReassign(
+  "amdgpu-reassign-regs",
+  cl::desc("Enable register reassign optimizations on gfx10+"),
+  cl::init(true),
+  cl::Hidden);
+
 // Enable atomic optimization
 static cl::opt<bool> EnableAtomicOptimizations(
   "amdgpu-atomic-optimizations",
@@ -157,6 +170,18 @@ static cl::opt<bool> EnableSIModeRegisterPass(
   cl::init(true),
   cl::Hidden);
 
+// Option is used in lit tests to prevent deadcoding of patterns inspected.
+static cl::opt<bool>
+EnableDCEInRA("amdgpu-dce-in-ra",
+    cl::init(true), cl::Hidden,
+    cl::desc("Enable machine DCE inside regalloc"));
+
+static cl::opt<bool> EnableScalarIRPasses(
+  "amdgpu-scalar-ir-passes",
+  cl::desc("Enable scalar IR passes"),
+  cl::init(true),
+  cl::Hidden);
+
 extern "C" void LLVMInitializeAMDGPUTarget() {
   // Register the target
   RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
@@ -172,6 +197,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
   initializeAMDGPUDAGToDAGISelPass(*PR);
   initializeGCNDPPCombinePass(*PR);
   initializeSILowerI1CopiesPass(*PR);
+  initializeSILowerSGPRSpillsPass(*PR);
   initializeSIFixSGPRCopiesPass(*PR);
   initializeSIFixVGPRCopiesPass(*PR);
   initializeSIFixupVectorISelPass(*PR);
@@ -192,6 +218,8 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
   initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR);
   initializeAMDGPUPromoteAllocaPass(*PR);
   initializeAMDGPUCodeGenPreparePass(*PR);
+  initializeAMDGPUPropagateAttributesEarlyPass(*PR);
+  initializeAMDGPUPropagateAttributesLatePass(*PR);
   initializeAMDGPURewriteOutArgumentsPass(*PR);
   initializeAMDGPUUnifyMetadataPass(*PR);
   initializeSIAnnotateControlFlowPass(*PR);
@@ -201,9 +229,8 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
   initializeSILowerControlFlowPass(*PR);
   initializeSIInsertSkipsPass(*PR);
   initializeSIMemoryLegalizerPass(*PR);
-  initializeSIDebuggerInsertNopsPass(*PR);
   initializeSIOptimizeExecMaskingPass(*PR);
-  initializeSIFixWWMLivenessPass(*PR);
+  initializeSIPreAllocateWWMRegsPass(*PR);
   initializeSIFormMemoryClausesPass(*PR);
   initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
   initializeAMDGPUAAWrapperPassPass(*PR);
@@ -211,6 +238,8 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
   initializeAMDGPUUseNativeCallsPass(*PR);
   initializeAMDGPUSimplifyLibCallsPass(*PR);
   initializeAMDGPUInlinerPass(*PR);
+  initializeGCNRegBankReassignPass(*PR);
+  initializeGCNNSAReassignPass(*PR);
 }
 
 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -295,10 +324,11 @@ static StringRef computeDataLayout(const Triple &TT) {
   }
 
   // 32-bit private, local, and region pointers. 64-bit global, constant and
-  // flat.
+  // flat, non-integral buffer fat pointers.
     return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
          "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
-         "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
+         "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
+         "-ni:7";
 }
 
 LLVM_READNONE
@@ -306,8 +336,9 @@ static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
   if (!GPU.empty())
     return GPU;
 
+  // Need to default to a target with flat support for HSA.
   if (TT.getArch() == Triple::amdgcn)
-    return "generic";
+    return TT.getOS() == Triple::AMDHSA ? "generic-hsa" : "generic";
 
   return "r600";
 }
@@ -363,24 +394,25 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
 
   bool EnableOpt = getOptLevel() > CodeGenOpt::None;
   bool Internalize = InternalizeSymbols;
-  bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableAMDGPUFunctionCalls;
+  bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableFunctionCalls;
   bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
   bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
 
-  if (EnableAMDGPUFunctionCalls) {
+  if (EnableFunctionCalls) {
     delete Builder.Inliner;
     Builder.Inliner = createAMDGPUFunctionInliningPass();
   }
 
   Builder.addExtension(
     PassManagerBuilder::EP_ModuleOptimizerEarly,
-    [Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &,
-                                         legacy::PassManagerBase &PM) {
+    [Internalize, EarlyInline, AMDGPUAA, this](const PassManagerBuilder &,
+                                               legacy::PassManagerBase &PM) {
       if (AMDGPUAA) {
         PM.add(createAMDGPUAAWrapperPass());
         PM.add(createAMDGPUExternalAAWrapperPass());
       }
       PM.add(createAMDGPUUnifyMetadataPass());
+      PM.add(createAMDGPUPropagateAttributesLatePass(this));
       if (Internalize) {
         PM.add(createInternalizePass(mustPreserveGV));
         PM.add(createGlobalDCEPass());
@@ -392,15 +424,16 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
   const auto &Opt = Options;
   Builder.addExtension(
     PassManagerBuilder::EP_EarlyAsPossible,
-    [AMDGPUAA, LibCallSimplify, &Opt](const PassManagerBuilder &,
-                                      legacy::PassManagerBase &PM) {
+    [AMDGPUAA, LibCallSimplify, &Opt, this](const PassManagerBuilder &,
+                                            legacy::PassManagerBase &PM) {
       if (AMDGPUAA) {
         PM.add(createAMDGPUAAWrapperPass());
         PM.add(createAMDGPUExternalAAWrapperPass());
       }
+      PM.add(llvm::createAMDGPUPropagateAttributesEarlyPass(this));
       PM.add(llvm::createAMDGPUUseNativeCallsPass());
       if (LibCallSimplify)
-        PM.add(llvm::createAMDGPUSimplifyLibCallsPass(Opt));
+        PM.add(llvm::createAMDGPUSimplifyLibCallsPass(Opt, this));
   });
 
   Builder.addExtension(
@@ -428,6 +461,11 @@ R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
                                      CodeGenOpt::Level OL, bool JIT)
     : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
   setRequiresStructuredCFG(true);
+
+  // Override the default since calls aren't supported for r600.
+  if (EnableFunctionCalls &&
+      EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0)
+    EnableFunctionCalls = false;
 }
 
 const R600Subtarget *R600TargetMachine::getSubtargetImpl(
@@ -528,8 +566,14 @@ public:
   bool addPreISel() override;
   bool addInstSelector() override;
   bool addGCPasses() override;
+
+  std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
 };
 
+std::unique_ptr<CSEConfigBase> AMDGPUPassConfig::getCSEConfig() const {
+  return getStandardCSEConfigForOpt(TM->getOptLevel());
+}
+
 class R600PassConfig final : public AMDGPUPassConfig {
 public:
   R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
@@ -572,9 +616,10 @@ public:
   bool addLegalizeMachineIR() override;
   bool addRegBankSelect() override;
   bool addGlobalInstructionSelect() override;
-  void addFastRegAlloc(FunctionPass *RegAllocPass) override;
-  void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
+  void addFastRegAlloc() override;
+  void addOptimizedRegAlloc() override;
   void addPreRegAlloc() override;
+  bool addPreRewrite() override;
   void addPostRegAlloc() override;
   void addPreSched2() override;
   void addPreEmitPass() override;
@@ -614,12 +659,16 @@ void AMDGPUPassConfig::addIRPasses() {
   disablePass(&FuncletLayoutID);
   disablePass(&PatchableFunctionID);
 
-  addPass(createAtomicExpandPass());
-
   // This must occur before inlining, as the inliner will not look through
   // bitcast calls.
   addPass(createAMDGPUFixFunctionBitcastsPass());
 
+  // A call to propagate attributes pass in the backend in case opt was not run.
+  addPass(createAMDGPUPropagateAttributesEarlyPass(&TM));
+
+  addPass(createAtomicExpandPass());
+
+
   addPass(createAMDGPULowerIntrinsicsPass());
 
   // Function calls are not supported, so make sure we inline everything.
@@ -652,7 +701,8 @@ void AMDGPUPassConfig::addIRPasses() {
     if (EnableSROA)
       addPass(createSROAPass());
 
-    addStraightLineScalarOptimizationPasses();
+    if (EnableScalarIRPasses)
+      addStraightLineScalarOptimizationPasses();
 
     if (EnableAMDGPUAliasAnalysis) {
       addPass(createAMDGPUAAWrapperPass());
@@ -678,15 +728,20 @@ void AMDGPUPassConfig::addIRPasses() {
   //   %1 = shl %a, 2
   //
   // but EarlyCSE can do neither of them.
-  if (getOptLevel() != CodeGenOpt::None)
+  if (getOptLevel() != CodeGenOpt::None && EnableScalarIRPasses)
     addEarlyCSEOrGVNPass();
 }
 
 void AMDGPUPassConfig::addCodeGenPrepare() {
+  if (TM->getTargetTriple().getArch() == Triple::amdgcn)
+    addPass(createAMDGPUAnnotateKernelFeaturesPass());
+
   if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
       EnableLowerKernelArguments)
     addPass(createAMDGPULowerKernelArgumentsPass());
 
+  addPass(&AMDGPUPerfHintAnalysisID);
+
   TargetPassConfig::addCodeGenPrepare();
 
   if (EnableLoadStoreVectorizer)
@@ -700,7 +755,8 @@ bool AMDGPUPassConfig::addPreISel() {
 }
 
 bool AMDGPUPassConfig::addInstSelector() {
-  addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
+  // Defer the verifier until FinalizeISel.
+  addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()), false);
   return false;
 }
 
@@ -770,7 +826,6 @@ bool GCNPassConfig::addPreISel() {
 
   // FIXME: We need to run a pass to propagate the attributes when calls are
   // supported.
-  addPass(createAMDGPUAnnotateKernelFeaturesPass());
 
   // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
   // regions formed by them.
@@ -783,6 +838,7 @@ bool GCNPassConfig::addPreISel() {
   if (!LateCFGStructurize) {
     addPass(createSIAnnotateControlFlowPass());
   }
+  addPass(createLCSSAPass());
 
   return false;
 }
@@ -856,7 +912,7 @@ void GCNPassConfig::addPreRegAlloc() {
   addPass(createSIWholeQuadModePass());
 }
 
-void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+void GCNPassConfig::addFastRegAlloc() {
   // FIXME: We have to disable the verifier here because of PHIElimination +
   // TwoAddressInstructions disabling it.
 
@@ -865,28 +921,40 @@ void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
   // SI_ELSE will introduce a copy of the tied operand source after the else.
   insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
 
-  // This must be run after SILowerControlFlow, since it needs to use the
-  // machine-level CFG, but before register allocation.
-  insertPass(&SILowerControlFlowID, &SIFixWWMLivenessID, false);
+  // This must be run just after RegisterCoalescing.
+  insertPass(&RegisterCoalescerID, &SIPreAllocateWWMRegsID, false);
 
-  TargetPassConfig::addFastRegAlloc(RegAllocPass);
+  TargetPassConfig::addFastRegAlloc();
 }
 
-void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
-  insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
-
-  insertPass(&SIOptimizeExecMaskingPreRAID, &SIFormMemoryClausesID);
+void GCNPassConfig::addOptimizedRegAlloc() {
+  if (OptExecMaskPreRA) {
+    insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
+    insertPass(&SIOptimizeExecMaskingPreRAID, &SIFormMemoryClausesID);
+  } else {
+    insertPass(&MachineSchedulerID, &SIFormMemoryClausesID);
+  }
 
   // This must be run immediately after phi elimination and before
   // TwoAddressInstructions, otherwise the processing of the tied operand of
   // SI_ELSE will introduce a copy of the tied operand source after the else.
   insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
 
-  // This must be run after SILowerControlFlow, since it needs to use the
-  // machine-level CFG, but before register allocation.
-  insertPass(&SILowerControlFlowID, &SIFixWWMLivenessID, false);
+  // This must be run just after RegisterCoalescing.
+  insertPass(&RegisterCoalescerID, &SIPreAllocateWWMRegsID, false);
+
+  if (EnableDCEInRA)
+    insertPass(&RenameIndependentSubregsID, &DeadMachineInstructionElimID);
 
-  TargetPassConfig::addOptimizedRegAlloc(RegAllocPass);
+  TargetPassConfig::addOptimizedRegAlloc();
+}
+
+bool GCNPassConfig::addPreRewrite() {
+  if (EnableRegReassign) {
+    addPass(&GCNNSAReassignID);
+    addPass(&GCNRegBankReassignID);
+  }
+  return true;
 }
 
 void GCNPassConfig::addPostRegAlloc() {
@@ -894,6 +962,9 @@ void GCNPassConfig::addPostRegAlloc() {
   if (getOptLevel() > CodeGenOpt::None)
     addPass(&SIOptimizeExecMaskingID);
   TargetPassConfig::addPostRegAlloc();
+
+  // Equivalent of PEI for SGPRs.
+  addPass(&SILowerSGPRSpillsID);
 }
 
 void GCNPassConfig::addPreSched2() {
@@ -919,10 +990,164 @@ void GCNPassConfig::addPreEmitPass() {
   addPass(&PostRAHazardRecognizerID);
 
   addPass(&SIInsertSkipsPassID);
-  addPass(createSIDebuggerInsertNopsPass());
   addPass(&BranchRelaxationPassID);
 }
 
 TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
   return new GCNPassConfig(*this, PM);
 }
+
+yaml::MachineFunctionInfo *GCNTargetMachine::createDefaultFuncInfoYAML() const {
+  return new yaml::SIMachineFunctionInfo();
+}
+
+yaml::MachineFunctionInfo *
+GCNTargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const {
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  return new yaml::SIMachineFunctionInfo(*MFI,
+                                         *MF.getSubtarget().getRegisterInfo());
+}
+
+bool GCNTargetMachine::parseMachineFunctionInfo(
+    const yaml::MachineFunctionInfo &MFI_, PerFunctionMIParsingState &PFS,
+    SMDiagnostic &Error, SMRange &SourceRange) const {
+  const yaml::SIMachineFunctionInfo &YamlMFI =
+      reinterpret_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
+  MachineFunction &MF = PFS.MF;
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+  MFI->initializeBaseYamlFields(YamlMFI);
+
+  auto parseRegister = [&](const yaml::StringValue &RegName, unsigned &RegVal) {
+    if (parseNamedRegisterReference(PFS, RegVal, RegName.Value, Error)) {
+      SourceRange = RegName.SourceRange;
+      return true;
+    }
+
+    return false;
+  };
+
+  auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {
+    // Create a diagnostic for a the register string literal.
+    const MemoryBuffer &Buffer =
+        *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
+    Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
+                         RegName.Value.size(), SourceMgr::DK_Error,
+                         "incorrect register class for field", RegName.Value,
+                         None, None);
+    SourceRange = RegName.SourceRange;
+    return true;
+  };
+
+  if (parseRegister(YamlMFI.ScratchRSrcReg, MFI->ScratchRSrcReg) ||
+      parseRegister(YamlMFI.ScratchWaveOffsetReg, MFI->ScratchWaveOffsetReg) ||
+      parseRegister(YamlMFI.FrameOffsetReg, MFI->FrameOffsetReg) ||
+      parseRegister(YamlMFI.StackPtrOffsetReg, MFI->StackPtrOffsetReg))
+    return true;
+
+  if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&
+      !AMDGPU::SReg_128RegClass.contains(MFI->ScratchRSrcReg)) {
+    return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg);
+  }
+
+  if (MFI->ScratchWaveOffsetReg != AMDGPU::SCRATCH_WAVE_OFFSET_REG &&
+      !AMDGPU::SGPR_32RegClass.contains(MFI->ScratchWaveOffsetReg)) {
+    return diagnoseRegisterClass(YamlMFI.ScratchWaveOffsetReg);
+  }
+
+  if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&
+      !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {
+    return diagnoseRegisterClass(YamlMFI.FrameOffsetReg);
+  }
+
+  if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&
+      !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {
+    return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg);
+  }
+
+  auto parseAndCheckArgument = [&](const Optional<yaml::SIArgument> &A,
+                                   const TargetRegisterClass &RC,
+                                   ArgDescriptor &Arg, unsigned UserSGPRs,
+                                   unsigned SystemSGPRs) {
+    // Skip parsing if it's not present.
+    if (!A)
+      return false;
+
+    if (A->IsRegister) {
+      unsigned Reg;
+      if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, Error)) {
+        SourceRange = A->RegisterName.SourceRange;
+        return true;
+      }
+      if (!RC.contains(Reg))
+        return diagnoseRegisterClass(A->RegisterName);
+      Arg = ArgDescriptor::createRegister(Reg);
+    } else
+      Arg = ArgDescriptor::createStack(A->StackOffset);
+    // Check and apply the optional mask.
+    if (A->Mask)
+      Arg = ArgDescriptor::createArg(Arg, A->Mask.getValue());
+
+    MFI->NumUserSGPRs += UserSGPRs;
+    MFI->NumSystemSGPRs += SystemSGPRs;
+    return false;
+  };
+
+  if (YamlMFI.ArgInfo &&
+      (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer,
+                             AMDGPU::SReg_128RegClass,
+                             MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) ||
+       parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr,
+                             AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr,
+                             2, 0) ||
+       parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,
+                             MFI->ArgInfo.QueuePtr, 2, 0) ||
+       parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr,
+                             AMDGPU::SReg_64RegClass,
+                             MFI->ArgInfo.KernargSegmentPtr, 2, 0) ||
+       parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID,
+                             AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID,
+                             2, 0) ||
+       parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit,
+                             AMDGPU::SReg_64RegClass,
+                             MFI->ArgInfo.FlatScratchInit, 2, 0) ||
+       parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,
+                             AMDGPU::SGPR_32RegClass,
+                             MFI->ArgInfo.PrivateSegmentSize, 0, 0) ||
+       parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,
+                             AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,
+                             0, 1) ||
+       parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY,
+                             AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY,
+                             0, 1) ||
+       parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ,
+                             AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ,
+                             0, 1) ||
+       parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo,
+                             AMDGPU::SGPR_32RegClass,
+                             MFI->ArgInfo.WorkGroupInfo, 0, 1) ||
+       parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset,
+                             AMDGPU::SGPR_32RegClass,
+                             MFI->ArgInfo.PrivateSegmentWaveByteOffset, 0, 1) ||
+       parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr,
+                             AMDGPU::SReg_64RegClass,
+                             MFI->ArgInfo.ImplicitArgPtr, 0, 0) ||
+       parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr,
+                             AMDGPU::SReg_64RegClass,
+                             MFI->ArgInfo.ImplicitBufferPtr, 2, 0) ||
+       parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX,
+                             AMDGPU::VGPR_32RegClass,
+                             MFI->ArgInfo.WorkItemIDX, 0, 0) ||
+       parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY,
+                             AMDGPU::VGPR_32RegClass,
+                             MFI->ArgInfo.WorkItemIDY, 0, 0) ||
+       parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ,
+                             AMDGPU::VGPR_32RegClass,
+                             MFI->ArgInfo.WorkItemIDZ, 0, 0)))
+    return true;
+
+  MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
+  MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
+
+  return false;
+}
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 62fbe71d1902..70fa3961236f 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -1,9 +1,8 @@
 //===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,7 +14,6 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H
 
-#include "AMDGPUIntrinsicInfo.h"
 #include "AMDGPUSubtarget.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringMap.h"
@@ -95,7 +93,6 @@ public:
 
 class GCNTargetMachine final : public AMDGPUTargetMachine {
 private:
-  AMDGPUIntrinsicInfo IntrinsicInfo;
   mutable StringMap<std::unique_ptr<GCNSubtarget>> SubtargetMap;
 
 public:
@@ -110,13 +107,17 @@ public:
 
   TargetTransformInfo getTargetTransformInfo(const Function &F) override;
 
-  const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
-    return &IntrinsicInfo;
-  }
-
   bool useIPRA() const override {
     return true;
   }
+
+  yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override;
+  yaml::MachineFunctionInfo *
+  convertFuncInfoToYAML(const MachineFunction &MF) const override;
+  bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &,
+                                PerFunctionMIParsingState &PFS,
+                                SMDiagnostic &Error,
+                                SMRange &SourceRange) const override;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
index c4e1efde130b..6569980d2c75 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPUHSATargetObjectFile.cpp - AMDGPU Object Files ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
index a4ae1a2c18c2..819bebb7932d 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
@@ -1,9 +1,8 @@
 //===-- AMDGPUTargetObjectFile.h - AMDGPU  Object Info ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 11e4ba4b5010..aaed280a1270 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -118,8 +117,10 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
       // Add a small bonus for each of such "if" statements.
       if (const BranchInst *Br = dyn_cast<BranchInst>(&I)) {
         if (UP.Threshold < MaxBoost && Br->isConditional()) {
-          if (L->isLoopExiting(Br->getSuccessor(0)) ||
-              L->isLoopExiting(Br->getSuccessor(1)))
+          BasicBlock *Succ0 = Br->getSuccessor(0);
+          BasicBlock *Succ1 = Br->getSuccessor(1);
+          if ((L->contains(Succ0) && L->isLoopExiting(Succ0)) ||
+              (L->contains(Succ1) && L->isLoopExiting(Succ1)))
             continue;
           if (dependsOnLocalPhi(L, Br->getCondition())) {
             UP.Threshold += UnrollThresholdIf;
@@ -141,7 +142,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
       unsigned Threshold = 0;
       if (AS == AMDGPUAS::PRIVATE_ADDRESS)
         Threshold = ThresholdPrivate;
-      else if (AS == AMDGPUAS::LOCAL_ADDRESS)
+      else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS)
         Threshold = ThresholdLocal;
       else
         continue;
@@ -159,7 +160,8 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
         unsigned AllocaSize = Ty->isSized() ? DL.getTypeAllocSize(Ty) : 0;
         if (AllocaSize > MaxAlloca)
           continue;
-      } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
+      } else if (AS == AMDGPUAS::LOCAL_ADDRESS ||
+                 AS == AMDGPUAS::REGION_ADDRESS) {
         LocalGEPsSeen++;
         // Inhibit unroll for local memory if we have seen addressing not to
         // a variable, most likely we will be unable to combine it.
@@ -254,7 +256,8 @@ unsigned GCNTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize,
 unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
   if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
       AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
-      AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
+      AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
+      AddrSpace == AMDGPUAS::BUFFER_FAT_POINTER) {
     return 512;
   }
 
@@ -308,6 +311,8 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
   switch (Inst->getIntrinsicID()) {
   case Intrinsic::amdgcn_atomic_inc:
   case Intrinsic::amdgcn_atomic_dec:
+  case Intrinsic::amdgcn_ds_ordered_add:
+  case Intrinsic::amdgcn_ds_ordered_swap:
   case Intrinsic::amdgcn_ds_fadd:
   case Intrinsic::amdgcn_ds_fmin:
   case Intrinsic::amdgcn_ds_fmax: {
@@ -399,7 +404,7 @@ int GCNTTIImpl::getArithmeticInstrCost(
     if (SLT == MVT::f64) {
       int Cost = 4 * get64BitInstrCost() + 7 * getQuarterRateInstrCost();
       // Add cost of workaround.
-      if (ST->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS)
+      if (!ST->hasUsableDivScaleConditionOutput())
         Cost += 3 * getFullRateInstrCost();
 
       return LT.first * Cost * NElts;
@@ -577,6 +582,8 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
       return false;
     case Intrinsic::amdgcn_readfirstlane:
     case Intrinsic::amdgcn_readlane:
+    case Intrinsic::amdgcn_icmp:
+    case Intrinsic::amdgcn_fcmp:
       return true;
     }
   }
@@ -607,7 +614,7 @@ unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
 }
 
 bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
-                                        const Function *Callee) const {
+                                     const Function *Callee) const {
   const TargetMachine &TM = getTLI()->getTargetMachine();
   const FeatureBitset &CallerBits =
     TM.getSubtargetImpl(*Caller)->getFeatureBits();
@@ -616,7 +623,14 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
 
   FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
   FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
-  return ((RealCallerBits & RealCalleeBits) == RealCalleeBits);
+  if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
+    return false;
+
+  // FIXME: dx10_clamp can just take the caller setting, but there seems to be
+  // no way to support merge for backend defined attributes.
+  AMDGPU::SIModeRegisterDefaults CallerMode(*Caller);
+  AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee);
+  return CallerMode.isInlineCompatible(CalleeMode);
 }
 
 void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 397c5c6fa6fb..6f1bf5a26f0d 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -1,9 +1,8 @@
 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -78,13 +77,16 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
     AMDGPU::FeatureUnalignedScratchAccess,
 
     AMDGPU::FeatureAutoWaitcntBeforeBarrier,
-    AMDGPU::FeatureDebuggerEmitPrologue,
-    AMDGPU::FeatureDebuggerInsertNops,
 
     // Property of the kernel/environment which can't actually differ.
     AMDGPU::FeatureSGPRInitBug,
     AMDGPU::FeatureXNACK,
     AMDGPU::FeatureTrapHandler,
+    AMDGPU::FeatureCodeObjectV3,
+
+    // The default assumption needs to be ecc is enabled, but no directly
+    // exposed operations depend on it, so it can be safely inlined.
+    AMDGPU::FeatureSRAMECC,
 
     // Perf-tuning features
     AMDGPU::FeatureFastFMAF32,
@@ -178,8 +180,7 @@ public:
     // don't use flat addressing.
     if (IsGraphicsShader)
       return -1;
-    return ST->hasFlatAddressSpace() ?
-      AMDGPUAS::FLAT_ADDRESS : AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
+    return AMDGPUAS::FLAT_ADDRESS;
   }
 
   unsigned getVectorSplitCost() { return 0; }
@@ -190,7 +191,9 @@ public:
   bool areInlineCompatible(const Function *Caller,
                            const Function *Callee) const;
 
-  unsigned getInliningThresholdMultiplier() { return 9; }
+  unsigned getInliningThresholdMultiplier() { return 7; }
+
+  int getInlinerVectorBonusPercent() { return 0; }
 
   int getArithmeticReductionCost(unsigned Opcode,
                                  Type *Ty,
diff --git a/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index ced3f6f567e2..396e0ed2e76c 100644
--- a/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPUUnifyDivergentExitNodes.cpp ----------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -199,14 +198,11 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
         BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB);
       } else { // Conditional branch.
         // Create a new transition block to hold the conditional branch.
-        BasicBlock *TransitionBB = BasicBlock::Create(F.getContext(),
-                                                      "TransitionBlock", &F);
-
-        // Move BI from BB to the new transition block.
-        BI->removeFromParent();
-        TransitionBB->getInstList().push_back(BI);
+        BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
 
-        // Create a branch that will always branch to the transition block.
+        // Create a branch that will always branch to the transition block and
+        // references DummyReturnBB.
+        BB->getTerminator()->eraseFromParent();
         BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB);
       }
     }
diff --git a/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp b/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
index 1f6d9234c1ed..d4401a22a1ad 100644
--- a/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
+++ b/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPUUnifyMetadata.cpp - Unify OpenCL metadata --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
index 11cd49e5b3dc..12f2e9519c9e 100644
--- a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -1,9 +1,8 @@
 //===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //==-----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AMDGPU/AMDKernelCodeT.h b/lib/Target/AMDGPU/AMDKernelCodeT.h
index 289642aaa2d0..3e658a144c1f 100644
--- a/lib/Target/AMDGPU/AMDKernelCodeT.h
+++ b/lib/Target/AMDGPU/AMDKernelCodeT.h
@@ -1,9 +1,8 @@
 //===-- AMDGPUKernelCodeT.h - Print AMDGPU assembly code ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file AMDKernelCodeT.h
@@ -127,8 +126,12 @@ enum amd_code_property_mask_t {
   AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH = 1,
   AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT,
 
-  AMD_CODE_PROPERTY_RESERVED1_SHIFT = 10,
-  AMD_CODE_PROPERTY_RESERVED1_WIDTH = 6,
+  AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT = 10,
+  AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_WIDTH = 1,
+  AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32 = ((1 << AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
+
+  AMD_CODE_PROPERTY_RESERVED1_SHIFT = 11,
+  AMD_CODE_PROPERTY_RESERVED1_WIDTH = 5,
   AMD_CODE_PROPERTY_RESERVED1 = ((1 << AMD_CODE_PROPERTY_RESERVED1_WIDTH) - 1) << AMD_CODE_PROPERTY_RESERVED1_SHIFT,
 
   /// Control wave ID base counter for GDS ordered-append. Used to set
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 3f9af27a2e5e..6d678966c98e 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -13,6 +12,7 @@
 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
 #include "SIDefines.h"
 #include "SIInstrInfo.h"
+#include "TargetInfo/AMDGPUTargetInfo.h"
 #include "Utils/AMDGPUAsmUtils.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "Utils/AMDKernelCodeTUtils.h"
@@ -69,7 +69,7 @@ namespace {
 
 class AMDGPUAsmParser;
 
-enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
+enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
 
 //===----------------------------------------------------------------------===//
 // Operand
@@ -103,14 +103,14 @@ public:
 
     int64_t getFPModifiersOperand() const {
       int64_t Operand = 0;
-      Operand |= Abs ? SISrcMods::ABS : 0;
-      Operand |= Neg ? SISrcMods::NEG : 0;
+      Operand |= Abs ? SISrcMods::ABS : 0u;
+      Operand |= Neg ? SISrcMods::NEG : 0u;
       return Operand;
     }
 
     int64_t getIntModifiersOperand() const {
       int64_t Operand = 0;
-      Operand |= Sext ? SISrcMods::SEXT : 0;
+      Operand |= Sext ? SISrcMods::SEXT : 0u;
       return Operand;
     }
 
@@ -140,21 +140,25 @@ public:
     ImmTyInstOffset,
     ImmTyOffset0,
     ImmTyOffset1,
+    ImmTyDLC,
     ImmTyGLC,
     ImmTySLC,
     ImmTyTFE,
     ImmTyD16,
     ImmTyClampSI,
     ImmTyOModSI,
+    ImmTyDPP8,
     ImmTyDppCtrl,
     ImmTyDppRowMask,
     ImmTyDppBankMask,
     ImmTyDppBoundCtrl,
+    ImmTyDppFi,
     ImmTySdwaDstSel,
     ImmTySdwaSrc0Sel,
     ImmTySdwaSrc1Sel,
     ImmTySdwaDstUnused,
     ImmTyDMask,
+    ImmTyDim,
     ImmTyUNorm,
     ImmTyDA,
     ImmTyR128A16,
@@ -174,9 +178,15 @@ public:
     ImmTyNegLo,
     ImmTyNegHi,
     ImmTySwizzle,
-    ImmTyHigh
+    ImmTyGprIdxMode,
+    ImmTyHigh,
+    ImmTyBLGP,
+    ImmTyCBSZ,
+    ImmTyABID,
+    ImmTyEndpgm,
   };
 
+private:
   struct TokOp {
     const char *Data;
     unsigned Length;
@@ -191,7 +201,6 @@ public:
 
   struct RegOp {
     unsigned RegNo;
-    bool IsForcedVOP3;
     Modifiers Mods;
   };
 
@@ -202,6 +211,7 @@ public:
     const MCExpr *Expr;
   };
 
+public:
   bool isToken() const override {
     if (Kind == Token)
       return true;
@@ -231,32 +241,32 @@ public:
     return isRegKind() && !hasModifiers();
   }
 
-  bool isRegOrImmWithInputMods(MVT type) const {
-    return isRegKind() || isInlinableImm(type);
+  bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
+    return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
   }
 
   bool isRegOrImmWithInt16InputMods() const {
-    return isRegOrImmWithInputMods(MVT::i16);
+    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
   }
 
   bool isRegOrImmWithInt32InputMods() const {
-    return isRegOrImmWithInputMods(MVT::i32);
+    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
   }
 
   bool isRegOrImmWithInt64InputMods() const {
-    return isRegOrImmWithInputMods(MVT::i64);
+    return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
   }
 
   bool isRegOrImmWithFP16InputMods() const {
-    return isRegOrImmWithInputMods(MVT::f16);
+    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
   }
 
   bool isRegOrImmWithFP32InputMods() const {
-    return isRegOrImmWithInputMods(MVT::f32);
+    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
   }
 
   bool isRegOrImmWithFP64InputMods() const {
-    return isRegOrImmWithInputMods(MVT::f64);
+    return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
   }
 
   bool isVReg() const {
@@ -268,8 +278,12 @@ public:
            isRegClass(AMDGPU::VReg_512RegClassID);
   }
 
+  bool isVReg32() const {
+    return isRegClass(AMDGPU::VGPR_32RegClassID);
+  }
+
   bool isVReg32OrOff() const {
-    return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
+    return isOff() || isVReg32();
   }
 
   bool isSDWAOperand(MVT type) const;
@@ -289,6 +303,7 @@ public:
   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
   bool isDMask() const { return isImmTy(ImmTyDMask); }
+  bool isDim() const { return isImmTy(ImmTyDim); }
   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
   bool isDA() const { return isImmTy(ImmTyDA); }
   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
@@ -301,13 +316,13 @@ public:
   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
-  bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
+  bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
 
-  bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
-  bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
+  bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
   bool isGDS() const { return isImmTy(ImmTyGDS); }
   bool isLDS() const { return isImmTy(ImmTyLDS); }
+  bool isDLC() const { return isImmTy(ImmTyDLC); }
   bool isGLC() const { return isImmTy(ImmTyGLC); }
   bool isSLC() const { return isImmTy(ImmTySLC); }
   bool isTFE() const { return isImmTy(ImmTyTFE); }
@@ -316,6 +331,7 @@ public:
   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
+  bool isFI() const { return isImmTy(ImmTyDppFi); }
   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
@@ -339,6 +355,8 @@ public:
 
   bool isRegClass(unsigned RCID) const;
 
+  bool isInlineValue() const;
+
   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
   }
@@ -359,6 +377,8 @@ public:
     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
   }
 
+  bool isBoolReg() const;
+
   bool isSCSrcF16() const {
     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
   }
@@ -411,6 +431,11 @@ public:
     return isSSrcF16();
   }
 
+  bool isSSrcOrLdsB32() const {
+    return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
+           isLiteralImm(MVT::i32) || isExpr();
+  }
+
   bool isVCSrcB32() const {
     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
   }
@@ -456,8 +481,7 @@ public:
   }
 
   bool isVSrcV2B16() const {
-    llvm_unreachable("cannot happen");
-    return isVSrcB16();
+    return isVSrcB16() || isLiteralImm(MVT::v2i16);
   }
 
   bool isVSrcF32() const {
@@ -473,8 +497,127 @@ public:
   }
 
   bool isVSrcV2F16() const {
-    llvm_unreachable("cannot happen");
-    return isVSrcF16();
+    return isVSrcF16() || isLiteralImm(MVT::v2f16);
+  }
+
+  bool isVISrcB32() const {
+    return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
+  }
+
+  bool isVISrcB16() const {
+    return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
+  }
+
+  bool isVISrcV2B16() const {
+    return isVISrcB16();
+  }
+
+  bool isVISrcF32() const {
+    return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
+  }
+
+  bool isVISrcF16() const {
+    return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
+  }
+
+  bool isVISrcV2F16() const {
+    return isVISrcF16() || isVISrcB32();
+  }
+
+  bool isAISrcB32() const {
+    return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
+  }
+
+  bool isAISrcB16() const {
+    return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
+  }
+
+  bool isAISrcV2B16() const {
+    return isAISrcB16();
+  }
+
+  bool isAISrcF32() const {
+    return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
+  }
+
+  bool isAISrcF16() const {
+    return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
+  }
+
+  bool isAISrcV2F16() const {
+    return isAISrcF16() || isAISrcB32();
+  }
+
+  bool isAISrc_128B32() const {
+    return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
+  }
+
+  bool isAISrc_128B16() const {
+    return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
+  }
+
+  bool isAISrc_128V2B16() const {
+    return isAISrc_128B16();
+  }
+
+  bool isAISrc_128F32() const {
+    return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
+  }
+
+  bool isAISrc_128F16() const {
+    return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
+  }
+
+  bool isAISrc_128V2F16() const {
+    return isAISrc_128F16() || isAISrc_128B32();
+  }
+
+  bool isAISrc_512B32() const {
+    return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
+  }
+
+  bool isAISrc_512B16() const {
+    return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
+  }
+
+  bool isAISrc_512V2B16() const {
+    return isAISrc_512B16();
+  }
+
+  bool isAISrc_512F32() const {
+    return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
+  }
+
+  bool isAISrc_512F16() const {
+    return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
+  }
+
+  bool isAISrc_512V2F16() const {
+    return isAISrc_512F16() || isAISrc_512B32();
+  }
+
+  bool isAISrc_1024B32() const {
+    return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
+  }
+
+  bool isAISrc_1024B16() const {
+    return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
+  }
+
+  bool isAISrc_1024V2B16() const {
+    return isAISrc_1024B16();
+  }
+
+  bool isAISrc_1024F32() const {
+    return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
+  }
+
+  bool isAISrc_1024F16() const {
+    return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
+  }
+
+  bool isAISrc_1024V2F16() const {
+    return isAISrc_1024F16() || isAISrc_1024B32();
   }
 
   bool isKImmFP32() const {
@@ -504,10 +647,15 @@ public:
   bool isSMRDOffset8() const;
   bool isSMRDOffset20() const;
   bool isSMRDLiteralOffset() const;
+  bool isDPP8() const;
   bool isDPPCtrl() const;
+  bool isBLGP() const;
+  bool isCBSZ() const;
+  bool isABID() const;
   bool isGPRIdxMode() const;
   bool isS16Imm() const;
   bool isU16Imm() const;
+  bool isEndpgm() const;
 
   StringRef getExpressionAsToken() const {
     assert(isExpr());
@@ -535,6 +683,7 @@ public:
   }
 
   unsigned getReg() const override {
+    assert(isRegKind());
     return Reg.RegNo;
   }
 
@@ -594,6 +743,10 @@ public:
 
   void addRegOperands(MCInst &Inst, unsigned N) const;
 
+  void addBoolRegOperands(MCInst &Inst, unsigned N) const {
+    addRegOperands(Inst, N);
+  }
+
   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
     if (isRegKind())
       addRegOperands(Inst, N);
@@ -661,6 +814,7 @@ public:
     case ImmTyInstOffset: OS << "InstOffset"; break;
     case ImmTyOffset0: OS << "Offset0"; break;
     case ImmTyOffset1: OS << "Offset1"; break;
+    case ImmTyDLC: OS << "DLC"; break;
     case ImmTyGLC: OS << "GLC"; break;
     case ImmTySLC: OS << "SLC"; break;
     case ImmTyTFE: OS << "TFE"; break;
@@ -668,15 +822,18 @@ public:
     case ImmTyFORMAT: OS << "FORMAT"; break;
     case ImmTyClampSI: OS << "ClampSI"; break;
     case ImmTyOModSI: OS << "OModSI"; break;
+    case ImmTyDPP8: OS << "DPP8"; break;
     case ImmTyDppCtrl: OS << "DppCtrl"; break;
     case ImmTyDppRowMask: OS << "DppRowMask"; break;
     case ImmTyDppBankMask: OS << "DppBankMask"; break;
     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
+    case ImmTyDppFi: OS << "FI"; break;
     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
     case ImmTyDMask: OS << "DMask"; break;
+    case ImmTyDim: OS << "Dim"; break;
     case ImmTyUNorm: OS << "UNorm"; break;
     case ImmTyDA: OS << "DA"; break;
     case ImmTyR128A16: OS << "R128A16"; break;
@@ -695,7 +852,12 @@ public:
     case ImmTyNegLo: OS << "NegLo"; break;
     case ImmTyNegHi: OS << "NegHi"; break;
     case ImmTySwizzle: OS << "Swizzle"; break;
+    case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
     case ImmTyHigh: OS << "High"; break;
+    case ImmTyBLGP: OS << "BLGP"; break;
+    case ImmTyCBSZ: OS << "CBSZ"; break;
+    case ImmTyABID: OS << "ABID"; break;
+    case ImmTyEndpgm: OS << "Endpgm"; break;
     }
   }
 
@@ -747,12 +909,10 @@ public:
 
   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
                                       unsigned RegNo, SMLoc S,
-                                      SMLoc E,
-                                      bool ForceVOP3) {
+                                      SMLoc E) {
     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
     Op->Reg.RegNo = RegNo;
     Op->Reg.Mods = Modifiers();
-    Op->Reg.IsForcedVOP3 = ForceVOP3;
     Op->StartLoc = S;
     Op->EndLoc = E;
     return Op;
@@ -817,6 +977,7 @@ public:
   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
     switch (RegKind) {
       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
+      case IS_AGPR: // fall through
       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
       default: break;
     }
@@ -853,6 +1014,8 @@ private:
   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
+  /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
+  /// descriptor field, if valid.
   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
@@ -861,9 +1024,10 @@ private:
   /// \param SGPRBlocks [out] Result SGPR block count.
   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
                           bool FlatScrUsed, bool XNACKUsed,
-                          unsigned NextFreeVGPR, SMRange VGPRRange,
-                          unsigned NextFreeSGPR, SMRange SGPRRange,
-                          unsigned &VGPRBlocks, unsigned &SGPRBlocks);
+                          Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
+                          SMRange VGPRRange, unsigned NextFreeSGPR,
+                          SMRange SGPRRange, unsigned &VGPRBlocks,
+                          unsigned &SGPRBlocks);
   bool ParseDirectiveAMDGCNTarget();
   bool ParseDirectiveAMDHSAKernel();
   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
@@ -876,7 +1040,15 @@ private:
 
   bool ParseDirectiveISAVersion();
   bool ParseDirectiveHSAMetadata();
+  bool ParseDirectivePALMetadataBegin();
   bool ParseDirectivePALMetadata();
+  bool ParseDirectiveAMDGPULDS();
+
+  /// Common code to parse out a block of text (typically YAML) between start and
+  /// end directives.
+  bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
+                           const char *AssemblerDirectiveEnd,
+                           std::string &CollectString);
 
   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
                              RegisterKind RegKind, unsigned Reg1,
@@ -884,6 +1056,8 @@ private:
   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
                            unsigned& RegNum, unsigned& RegWidth,
                            unsigned *DwordRegIndex);
+  bool isRegister();
+  bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
   void initializeGprCountSymbol(RegisterKind RegKind);
   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
@@ -897,6 +1071,10 @@ public:
   enum AMDGPUMatchResultTy {
     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
   };
+  enum OperandMode {
+    OperandMode_Default,
+    OperandMode_NSA,
+  };
 
   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
 
@@ -908,7 +1086,7 @@ public:
 
     if (getFeatureBits().none()) {
       // Set default features.
-      copySTI().ToggleFeature("SOUTHERN_ISLANDS");
+      copySTI().ToggleFeature("southern-islands");
     }
 
     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
@@ -924,6 +1102,10 @@ public:
         MCSymbol *Sym =
             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
+        Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
+        Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
+        Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
+        Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
       } else {
         MCSymbol *Sym =
             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
@@ -969,6 +1151,10 @@ public:
     return AMDGPU::isGFX9(getSTI());
   }
 
+  bool isGFX10() const {
+    return AMDGPU::isGFX10(getSTI());
+  }
+
   bool hasInv2PiInlineImm() const {
     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
   }
@@ -978,7 +1164,11 @@ public:
   }
 
   bool hasSGPR102_SGPR103() const {
-    return !isVI();
+    return !isVI() && !isGFX9();
+  }
+
+  bool hasSGPR104_SGPR105() const {
+    return isGFX10();
   }
 
   bool hasIntClamp() const {
@@ -1024,7 +1214,8 @@ public:
                                uint64_t &ErrorInfo,
                                bool MatchingInlineAsm) override;
   bool ParseDirective(AsmToken DirectiveID) override;
-  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
+  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
+                                    OperandMode Mode = OperandMode_Default);
   StringRef parseMnemonicSuffix(StringRef Name);
   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
                         SMLoc NameLoc, OperandVector &Operands) override;
@@ -1037,11 +1228,11 @@ public:
                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
                      bool (*ConvertResult)(int64_t &) = nullptr);
 
-  OperandMatchResultTy parseOperandArrayWithPrefix(
-    const char *Prefix,
-    OperandVector &Operands,
-    AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
-    bool (*ConvertResult)(int64_t&) = nullptr);
+  OperandMatchResultTy
+  parseOperandArrayWithPrefix(const char *Prefix,
+                              OperandVector &Operands,
+                              AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
+                              bool (*ConvertResult)(int64_t&) = nullptr);
 
   OperandMatchResultTy
   parseNamedBit(const char *Name, OperandVector &Operands,
@@ -1049,10 +1240,15 @@ public:
   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
                                              StringRef &Value);
 
-  bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
-  OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
+  bool isModifier();
+  bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
+  bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
+  bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
+  bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
+  bool parseSP3NegModifier();
+  OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
   OperandMatchResultTy parseReg(OperandVector &Operands);
-  OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
+  OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
@@ -1073,33 +1269,63 @@ private:
   struct OperandInfoTy {
     int64_t Id;
     bool IsSymbolic = false;
+    bool IsDefined = false;
 
     OperandInfoTy(int64_t Id_) : Id(Id_) {}
   };
 
-  bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
-  bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
+  bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
+  bool validateSendMsg(const OperandInfoTy &Msg,
+                       const OperandInfoTy &Op,
+                       const OperandInfoTy &Stream,
+                       const SMLoc Loc);
+
+  bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
+  bool validateHwreg(const OperandInfoTy &HwReg,
+                     const int64_t Offset,
+                     const int64_t Width,
+                     const SMLoc Loc);
 
   void errorExpTgt();
   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
+  SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
 
-  bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
+  bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
+  bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
+  bool validateSOPLiteral(const MCInst &Inst) const;
   bool validateConstantBusLimitations(const MCInst &Inst);
   bool validateEarlyClobberLimitations(const MCInst &Inst);
   bool validateIntClampSupported(const MCInst &Inst);
   bool validateMIMGAtomicDMask(const MCInst &Inst);
   bool validateMIMGGatherDMask(const MCInst &Inst);
   bool validateMIMGDataSize(const MCInst &Inst);
+  bool validateMIMGAddrSize(const MCInst &Inst);
   bool validateMIMGD16(const MCInst &Inst);
+  bool validateMIMGDim(const MCInst &Inst);
+  bool validateLdsDirect(const MCInst &Inst);
+  bool validateOpSel(const MCInst &Inst);
+  bool validateVccOperand(unsigned Reg) const;
+  bool validateVOP3Literal(const MCInst &Inst) const;
   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
 
+  bool isId(const StringRef Id) const;
+  bool isId(const AsmToken &Token, const StringRef Id) const;
+  bool isToken(const AsmToken::TokenKind Kind) const;
   bool trySkipId(const StringRef Id);
+  bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
   bool trySkipToken(const AsmToken::TokenKind Kind);
   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
+  void peekTokens(MutableArrayRef<AsmToken> Tokens);
+  AsmToken::TokenKind getTokenKind() const;
   bool parseExpr(int64_t &Imm);
+  StringRef getTokenStr() const;
+  AsmToken peekToken();
+  AsmToken getToken() const;
+  SMLoc getLoc() const;
+  void lex();
 
 public:
   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
@@ -1110,6 +1336,7 @@ public:
   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
+  OperandMatchResultTy parseBoolReg(OperandVector &Operands);
 
   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
                             const unsigned MinVal,
@@ -1124,20 +1351,23 @@ public:
   bool parseSwizzleSwap(int64_t &Imm);
   bool parseSwizzleReverse(int64_t &Imm);
 
+  OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
+  int64_t parseGPRIdxMacro();
+
   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
 
+  AMDGPUOperand::Ptr defaultDLC() const;
   AMDGPUOperand::Ptr defaultGLC() const;
   AMDGPUOperand::Ptr defaultSLC() const;
 
   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
-  AMDGPUOperand::Ptr defaultOffsetU12() const;
-  AMDGPUOperand::Ptr defaultOffsetS13() const;
+  AMDGPUOperand::Ptr defaultFlatOffset() const;
 
   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
 
@@ -1153,11 +1383,15 @@ public:
                bool IsAtomic = false);
   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
 
+  OperandMatchResultTy parseDim(OperandVector &Operands);
+  OperandMatchResultTy parseDPP8(OperandVector &Operands);
   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
   AMDGPUOperand::Ptr defaultRowMask() const;
   AMDGPUOperand::Ptr defaultBankMask() const;
   AMDGPUOperand::Ptr defaultBoundCtrl() const;
-  void cvtDPP(MCInst &Inst, const OperandVector &Operands);
+  AMDGPUOperand::Ptr defaultFI() const;
+  void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
+  void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
 
   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
                                     AMDGPUOperand::ImmTy Type);
@@ -1168,6 +1402,13 @@ public:
   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
                 uint64_t BasicInstType, bool skipVcc = false);
+
+  AMDGPUOperand::Ptr defaultBLGP() const;
+  AMDGPUOperand::Ptr defaultCBSZ() const;
+  AMDGPUOperand::Ptr defaultABID() const;
+
+  OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
+  AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
 };
 
 struct OptionalOperand {
@@ -1203,6 +1444,8 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
   case AMDGPU::OPERAND_REG_IMM_FP32:
   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
     return &APFloat::IEEEsingle();
   case AMDGPU::OPERAND_REG_IMM_INT64:
   case AMDGPU::OPERAND_REG_IMM_FP64:
@@ -1215,6 +1458,12 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
+  case AMDGPU::OPERAND_REG_IMM_V2INT16:
+  case AMDGPU::OPERAND_REG_IMM_V2FP16:
     return &APFloat::IEEEhalf();
   default:
     llvm_unreachable("unsupported fp type");
@@ -1243,7 +1492,20 @@ static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
   return true;
 }
 
+static bool isSafeTruncation(int64_t Val, unsigned Size) {
+  return isUIntN(Size, Val) || isIntN(Size, Val);
+}
+
 bool AMDGPUOperand::isInlinableImm(MVT type) const {
+
+  // This is a hack to enable named inline values like
+  // shared_base with both 32-bit and 64-bit operands.
+  // Note that these values are defined as
+  // 32-bit operands only.
+  if (isInlineValue()) {
+    return true;
+  }
+
   if (!isImmTy(ImmTyNone)) {
     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
     return false;
@@ -1282,6 +1544,10 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const {
                                         AsmParser->hasInv2PiInlineImm());
   }
 
+  if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
+    return false;
+  }
+
   if (type.getScalarSizeInBits() == 16) {
     return AMDGPU::isInlinableLiteral16(
       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
@@ -1315,7 +1581,7 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const {
 
     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
     // types.
-    return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
+    return isSafeTruncation(Imm.Val, Size);
   }
 
   // We got fp literal token
@@ -1330,8 +1596,14 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const {
     return false;
   }
 
+  // We allow fp literals with f16x2 operands assuming that the specified
+  // literal goes into the lower half and the upper half is zero. We also
+  // require that the literal may be losslesly converted to f16.
+  MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
+                     (type == MVT::v2i16)? MVT::i16 : type;
+
   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
-  return canLosslesslyConvertToFPType(FPLiteral, type);
+  return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
 }
 
 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
@@ -1340,9 +1612,9 @@ bool AMDGPUOperand::isRegClass(unsigned RCID) const {
 
 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
   if (AsmParser->isVI())
-    return isVReg();
-  else if (AsmParser->isGFX9())
-    return isRegKind() || isInlinableImm(type);
+    return isVReg32();
+  else if (AsmParser->isGFX9() || AsmParser->isGFX10())
+    return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
   else
     return false;
 }
@@ -1363,6 +1635,11 @@ bool AMDGPUOperand::isSDWAInt32Operand() const {
   return isSDWAOperand(MVT::i32);
 }
 
+bool AMDGPUOperand::isBoolReg() const {
+  return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
+    isSCSrcB64() : isSCSrcB32();
+}
+
 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
 {
   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
@@ -1441,12 +1718,20 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
     case AMDGPU::OPERAND_REG_IMM_FP32:
     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+    case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+    case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
     case AMDGPU::OPERAND_REG_IMM_INT16:
     case AMDGPU::OPERAND_REG_IMM_FP16:
     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
-    case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
+    case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+    case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+    case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
+    case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+    case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
+    case AMDGPU::OPERAND_REG_IMM_V2INT16:
+    case AMDGPU::OPERAND_REG_IMM_V2FP16: {
       bool lost;
       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
       // Convert literal to single precision
@@ -1456,11 +1741,6 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
       // checked earlier in isLiteralImm()
 
       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
-      if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
-          OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
-        ImmVal |= (ImmVal << 16);
-      }
-
       Inst.addOperand(MCOperand::createImm(ImmVal));
       return;
     }
@@ -1471,15 +1751,18 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
     return;
   }
 
-   // We got int literal token.
+  // We got int literal token.
   // Only sign extend inline immediates.
-  // FIXME: No errors on truncation
   switch (OpTy) {
   case AMDGPU::OPERAND_REG_IMM_INT32:
   case AMDGPU::OPERAND_REG_IMM_FP32:
   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
-    if (isInt<32>(Val) &&
+  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
+  case AMDGPU::OPERAND_REG_IMM_V2INT16:
+  case AMDGPU::OPERAND_REG_IMM_V2FP16:
+    if (isSafeTruncation(Val, 32) &&
         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
                                      AsmParser->hasInv2PiInlineImm())) {
       Inst.addOperand(MCOperand::createImm(Val));
@@ -1505,7 +1788,9 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
   case AMDGPU::OPERAND_REG_IMM_FP16:
   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
-    if (isInt<16>(Val) &&
+  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
+    if (isSafeTruncation(Val, 16) &&
         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
                                      AsmParser->hasInv2PiInlineImm())) {
       Inst.addOperand(MCOperand::createImm(Val));
@@ -1516,14 +1801,14 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
     return;
 
   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
-  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
-    auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
-    assert(AMDGPU::isInlinableLiteral16(LiteralVal,
+  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
+    assert(isSafeTruncation(Val, 16));
+    assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
                                         AsmParser->hasInv2PiInlineImm()));
 
-    uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
-                      static_cast<uint32_t>(LiteralVal);
-    Inst.addOperand(MCOperand::createImm(ImmVal));
+    Inst.addOperand(MCOperand::createImm(Val));
     return;
   }
   default:
@@ -1552,6 +1837,27 @@ void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
 }
 
+static bool isInlineValue(unsigned Reg) {
+  switch (Reg) {
+  case AMDGPU::SRC_SHARED_BASE:
+  case AMDGPU::SRC_SHARED_LIMIT:
+  case AMDGPU::SRC_PRIVATE_BASE:
+  case AMDGPU::SRC_PRIVATE_LIMIT:
+  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
+    return true;
+  case AMDGPU::SRC_VCCZ:
+  case AMDGPU::SRC_EXECZ:
+  case AMDGPU::SRC_SCC:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool AMDGPUOperand::isInlineValue() const {
+  return isRegKind() && ::isInlineValue(getReg());
+}
+
 //===----------------------------------------------------------------------===//
 // AsmParser
 //===----------------------------------------------------------------------===//
@@ -1585,6 +1891,15 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
       case 8: return AMDGPU::SGPR_256RegClassID;
       case 16: return AMDGPU::SGPR_512RegClassID;
     }
+  } else if (Is == IS_AGPR) {
+    switch (RegWidth) {
+      default: return -1;
+      case 1: return AMDGPU::AGPR_32RegClassID;
+      case 2: return AMDGPU::AReg_64RegClassID;
+      case 4: return AMDGPU::AReg_128RegClassID;
+      case 16: return AMDGPU::AReg_512RegClassID;
+      case 32: return AMDGPU::AReg_1024RegClassID;
+    }
   }
   return -1;
 }
@@ -1595,8 +1910,25 @@ static unsigned getSpecialRegForName(StringRef RegName) {
     .Case("vcc", AMDGPU::VCC)
     .Case("flat_scratch", AMDGPU::FLAT_SCR)
     .Case("xnack_mask", AMDGPU::XNACK_MASK)
+    .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
+    .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
+    .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
+    .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
+    .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
+    .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
+    .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
+    .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
+    .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
+    .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
+    .Case("lds_direct", AMDGPU::LDS_DIRECT)
+    .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
     .Case("m0", AMDGPU::M0)
-    .Case("scc", AMDGPU::SCC)
+    .Case("vccz", AMDGPU::SRC_VCCZ)
+    .Case("src_vccz", AMDGPU::SRC_VCCZ)
+    .Case("execz", AMDGPU::SRC_EXECZ)
+    .Case("src_execz", AMDGPU::SRC_EXECZ)
+    .Case("scc", AMDGPU::SRC_SCC)
+    .Case("src_scc", AMDGPU::SRC_SCC)
     .Case("tba", AMDGPU::TBA)
     .Case("tma", AMDGPU::TMA)
     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
@@ -1611,6 +1943,7 @@ static unsigned getSpecialRegForName(StringRef RegName) {
     .Case("tma_hi", AMDGPU::TMA_HI)
     .Case("tba_lo", AMDGPU::TBA_LO)
     .Case("tba_hi", AMDGPU::TBA_HI)
+    .Case("null", AMDGPU::SGPR_NULL)
     .Default(0);
 }
 
@@ -1663,6 +1996,7 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
     return false;
   case IS_VGPR:
   case IS_SGPR:
+  case IS_AGPR:
   case IS_TTMP:
     if (Reg1 != Reg + RegWidth) {
       return false;
@@ -1674,6 +2008,53 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
   }
 }
 
+static const StringRef Registers[] = {
+  { "v" },
+  { "s" },
+  { "ttmp" },
+  { "acc" },
+  { "a" },
+};
+
+bool
+AMDGPUAsmParser::isRegister(const AsmToken &Token,
+                            const AsmToken &NextToken) const {
+
+  // A list of consecutive registers: [s0,s1,s2,s3]
+  if (Token.is(AsmToken::LBrac))
+    return true;
+
+  if (!Token.is(AsmToken::Identifier))
+    return false;
+
+  // A single register like s0 or a range of registers like s[0:1]
+
+  StringRef RegName = Token.getString();
+
+  for (StringRef Reg : Registers) {
+    if (RegName.startswith(Reg)) {
+      if (Reg.size() < RegName.size()) {
+        unsigned RegNum;
+        // A single register with an index: rXX
+        if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
+          return true;
+      } else {
+        // A range of registers: r[XX:YY].
+        if (NextToken.is(AsmToken::LBrac))
+          return true;
+      }
+    }
+  }
+
+  return getSpecialRegForName(RegName);
+}
+
+bool
+AMDGPUAsmParser::isRegister()
+{
+  return isRegister(getToken(), peekToken());
+}
+
 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
                                           unsigned &RegNum, unsigned &RegWidth,
                                           unsigned *DwordRegIndex) {
@@ -1692,6 +2073,9 @@ bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
       } else if (RegName[0] == 's') {
         RegNumIndex = 1;
         RegKind = IS_SGPR;
+      } else if (RegName[0] == 'a') {
+        RegNumIndex = RegName.startswith("acc") ? 3 : 1;
+        RegKind = IS_AGPR;
       } else if (RegName.startswith("ttmp")) {
         RegNumIndex = strlen("ttmp");
         RegKind = IS_TTMP;
@@ -1773,6 +2157,7 @@ bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
     break;
   case IS_VGPR:
   case IS_SGPR:
+  case IS_AGPR:
   case IS_TTMP:
   {
     unsigned Size = 1;
@@ -1859,6 +2244,8 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
 
   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
+    //FIXME: improve error messages (bug 41303).
+    Error(StartLoc, "not a valid operand.");
     return nullptr;
   }
   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
@@ -1866,202 +2253,261 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
       return nullptr;
   } else
     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
-  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
+  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
 }
 
-bool
-AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
-  if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
-      (getLexer().getKind() == AsmToken::Integer ||
-       getLexer().getKind() == AsmToken::Real)) {
-    // This is a workaround for handling operands like these:
-    //     |1.0|
-    //     |-1|
-    // This syntax is not compatible with syntax of standard
-    // MC expressions (due to the trailing '|').
-
-    SMLoc EndLoc;
-    const MCExpr *Expr;
+OperandMatchResultTy
+AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
+  // TODO: add syntactic sugar for 1/(2*PI)
 
-    if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
-      return true;
-    }
+  assert(!isRegister());
+  assert(!isModifier());
+
+  const auto& Tok = getToken();
+  const auto& NextTok = peekToken();
+  bool IsReal = Tok.is(AsmToken::Real);
+  SMLoc S = getLoc();
+  bool Negate = false;
 
-    return !Expr->evaluateAsAbsolute(Val);
+  if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
+    lex();
+    IsReal = true;
+    Negate = true;
   }
 
-  return getParser().parseAbsoluteExpression(Val);
-}
+  if (IsReal) {
+    // Floating-point expressions are not supported.
+    // Can only allow floating-point literals with an
+    // optional sign.
 
-OperandMatchResultTy
-AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
-  // TODO: add syntactic sugar for 1/(2*PI)
-  bool Minus = false;
-  if (getLexer().getKind() == AsmToken::Minus) {
-    const AsmToken NextToken = getLexer().peekTok();
-    if (!NextToken.is(AsmToken::Integer) &&
-        !NextToken.is(AsmToken::Real)) {
-        return MatchOperand_NoMatch;
-    }
-    Minus = true;
-    Parser.Lex();
-  }
+    StringRef Num = getTokenStr();
+    lex();
 
-  SMLoc S = Parser.getTok().getLoc();
-  switch(getLexer().getKind()) {
-  case AsmToken::Integer: {
-    int64_t IntVal;
-    if (parseAbsoluteExpr(IntVal, AbsMod))
+    APFloat RealVal(APFloat::IEEEdouble());
+    auto roundMode = APFloat::rmNearestTiesToEven;
+    if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
       return MatchOperand_ParseFail;
-    if (Minus)
-      IntVal *= -1;
-    Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
+    }
+    if (Negate)
+      RealVal.changeSign();
+
+    Operands.push_back(
+      AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
+                               AMDGPUOperand::ImmTyNone, true));
+
     return MatchOperand_Success;
-  }
-  case AsmToken::Real: {
+
+  } else {
     int64_t IntVal;
-    if (parseAbsoluteExpr(IntVal, AbsMod))
-      return MatchOperand_ParseFail;
+    const MCExpr *Expr;
+    SMLoc S = getLoc();
+
+    if (HasSP3AbsModifier) {
+      // This is a workaround for handling expressions
+      // as arguments of SP3 'abs' modifier, for example:
+      //     |1.0|
+      //     |-1|
+      //     |1+x|
+      // This syntax is not compatible with syntax of standard
+      // MC expressions (due to the trailing '|').
+      SMLoc EndLoc;
+      if (getParser().parsePrimaryExpr(Expr, EndLoc))
+        return MatchOperand_ParseFail;
+    } else {
+      if (Parser.parseExpression(Expr))
+        return MatchOperand_ParseFail;
+    }
+
+    if (Expr->evaluateAsAbsolute(IntVal)) {
+      Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
+    } else {
+      Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
+    }
 
-    APFloat F(BitsToDouble(IntVal));
-    if (Minus)
-      F.changeSign();
-    Operands.push_back(
-        AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
-                                 AMDGPUOperand::ImmTyNone, true));
     return MatchOperand_Success;
   }
-  default:
-    return MatchOperand_NoMatch;
-  }
+
+  return MatchOperand_NoMatch;
 }
 
 OperandMatchResultTy
 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
+  if (!isRegister())
+    return MatchOperand_NoMatch;
+
   if (auto R = parseRegister()) {
     assert(R->isReg());
-    R->Reg.IsForcedVOP3 = isForcedVOP3();
     Operands.push_back(std::move(R));
     return MatchOperand_Success;
   }
-  return MatchOperand_NoMatch;
+  return MatchOperand_ParseFail;
 }
 
 OperandMatchResultTy
-AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
-  auto res = parseImm(Operands, AbsMod);
+AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
+  auto res = parseReg(Operands);
   if (res != MatchOperand_NoMatch) {
     return res;
+  } else if (isModifier()) {
+    return MatchOperand_NoMatch;
+  } else {
+    return parseImm(Operands, HasSP3AbsMod);
   }
+}
 
-  return parseReg(Operands);
+bool
+AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
+  if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
+    const auto &str = Token.getString();
+    return str == "abs" || str == "neg" || str == "sext";
+  }
+  return false;
 }
 
-OperandMatchResultTy
-AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
-                                              bool AllowImm) {
-  bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
+bool
+AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
+  return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
+}
+
+bool
+AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
+  return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
+}
 
-  if (getLexer().getKind()== AsmToken::Minus) {
-    const AsmToken NextToken = getLexer().peekTok();
+bool
+AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
+  return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
+}
+
+// Check if this is an operand modifier or an opcode modifier
+// which may look like an expression but it is not. We should
+// avoid parsing these modifiers as expressions. Currently
+// recognized sequences are:
+//   |...|
+//   abs(...)
+//   neg(...)
+//   sext(...)
+//   -reg
+//   -|...|
+//   -abs(...)
+//   name:...
+// Note that simple opcode modifiers like 'gds' may be parsed as
+// expressions; this is a special case. See getExpressionAsToken.
+//
+bool
+AMDGPUAsmParser::isModifier() {
 
-    // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
-    if (NextToken.is(AsmToken::Minus)) {
-      Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
-      return MatchOperand_ParseFail;
-    }
+  AsmToken Tok = getToken();
+  AsmToken NextToken[2];
+  peekTokens(NextToken);
 
-    // '-' followed by an integer literal N should be interpreted as integer
-    // negation rather than a floating-point NEG modifier applied to N.
-    // Beside being contr-intuitive, such use of floating-point NEG modifier
-    // results in different meaning of integer literals used with VOP1/2/C
-    // and VOP3, for example:
-    //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
-    //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
-    // Negative fp literals should be handled likewise for unifomtity
-    if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
-      Parser.Lex();
-      Negate = true;
-    }
+  return isOperandModifier(Tok, NextToken[0]) ||
+         (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
+         isOpcodeModifierWithVal(Tok, NextToken[0]);
+}
+
+// Check if the current token is an SP3 'neg' modifier.
+// Currently this modifier is allowed in the following context:
+//
+// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
+// 2. Before an 'abs' modifier: -abs(...)
+// 3. Before an SP3 'abs' modifier: -|...|
+//
+// In all other cases "-" is handled as a part
+// of an expression that follows the sign.
+//
+// Note: When "-" is followed by an integer literal,
+// this is interpreted as integer negation rather
+// than a floating-point NEG modifier applied to N.
+// Beside being contr-intuitive, such use of floating-point
+// NEG modifier would have resulted in different meaning
+// of integer literals used with VOP1/2/C and VOP3,
+// for example:
+//    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
+//    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
+// Negative fp literals with preceding "-" are
+// handled likewise for unifomtity
+//
+bool
+AMDGPUAsmParser::parseSP3NegModifier() {
+
+  AsmToken NextToken[2];
+  peekTokens(NextToken);
+
+  if (isToken(AsmToken::Minus) &&
+      (isRegister(NextToken[0], NextToken[1]) ||
+       NextToken[0].is(AsmToken::Pipe) ||
+       isId(NextToken[0], "abs"))) {
+    lex();
+    return true;
   }
 
-  if (getLexer().getKind() == AsmToken::Identifier &&
-      Parser.getTok().getString() == "neg") {
-    if (Negate) {
-      Error(Parser.getTok().getLoc(), "expected register or immediate");
-      return MatchOperand_ParseFail;
-    }
-    Parser.Lex();
-    Negate2 = true;
-    if (getLexer().isNot(AsmToken::LParen)) {
-      Error(Parser.getTok().getLoc(), "expected left paren after neg");
-      return MatchOperand_ParseFail;
-    }
-    Parser.Lex();
+  return false;
+}
+
+OperandMatchResultTy
+AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
+                                              bool AllowImm) {
+  bool Neg, SP3Neg;
+  bool Abs, SP3Abs;
+  SMLoc Loc;
+
+  // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
+  if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
+    Error(getLoc(), "invalid syntax, expected 'neg' modifier");
+    return MatchOperand_ParseFail;
   }
 
-  if (getLexer().getKind() == AsmToken::Identifier &&
-      Parser.getTok().getString() == "abs") {
-    Parser.Lex();
-    Abs2 = true;
-    if (getLexer().isNot(AsmToken::LParen)) {
-      Error(Parser.getTok().getLoc(), "expected left paren after abs");
-      return MatchOperand_ParseFail;
-    }
-    Parser.Lex();
+  SP3Neg = parseSP3NegModifier();
+
+  Loc = getLoc();
+  Neg = trySkipId("neg");
+  if (Neg && SP3Neg) {
+    Error(Loc, "expected register or immediate");
+    return MatchOperand_ParseFail;
   }
+  if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
+    return MatchOperand_ParseFail;
 
-  if (getLexer().getKind() == AsmToken::Pipe) {
-    if (Abs2) {
-      Error(Parser.getTok().getLoc(), "expected register or immediate");
-      return MatchOperand_ParseFail;
-    }
-    Parser.Lex();
-    Abs = true;
+  Abs = trySkipId("abs");
+  if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
+    return MatchOperand_ParseFail;
+
+  Loc = getLoc();
+  SP3Abs = trySkipToken(AsmToken::Pipe);
+  if (Abs && SP3Abs) {
+    Error(Loc, "expected register or immediate");
+    return MatchOperand_ParseFail;
   }
 
   OperandMatchResultTy Res;
   if (AllowImm) {
-    Res = parseRegOrImm(Operands, Abs);
+    Res = parseRegOrImm(Operands, SP3Abs);
   } else {
     Res = parseReg(Operands);
   }
   if (Res != MatchOperand_Success) {
-    return Res;
+    return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
   }
 
+  if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
+    return MatchOperand_ParseFail;
+  if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
+    return MatchOperand_ParseFail;
+  if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
+    return MatchOperand_ParseFail;
+
   AMDGPUOperand::Modifiers Mods;
-  if (Abs) {
-    if (getLexer().getKind() != AsmToken::Pipe) {
-      Error(Parser.getTok().getLoc(), "expected vertical bar");
-      return MatchOperand_ParseFail;
-    }
-    Parser.Lex();
-    Mods.Abs = true;
-  }
-  if (Abs2) {
-    if (getLexer().isNot(AsmToken::RParen)) {
-      Error(Parser.getTok().getLoc(), "expected closing parentheses");
-      return MatchOperand_ParseFail;
-    }
-    Parser.Lex();
-    Mods.Abs = true;
-  }
+  Mods.Abs = Abs || SP3Abs;
+  Mods.Neg = Neg || SP3Neg;
 
-  if (Negate) {
-    Mods.Neg = true;
-  } else if (Negate2) {
-    if (getLexer().isNot(AsmToken::RParen)) {
-      Error(Parser.getTok().getLoc(), "expected closing parentheses");
+  if (Mods.hasFPModifiers()) {
+    AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
+    if (Op.isExpr()) {
+      Error(Op.getStartLoc(), "expected an absolute expression");
       return MatchOperand_ParseFail;
     }
-    Parser.Lex();
-    Mods.Neg = true;
-  }
-
-  if (Mods.hasFPModifiers()) {
-    AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
-    Op.setModifiers(Mods);
+    Op.setModifiers(Mods);
   }
   return MatchOperand_Success;
 }
@@ -2069,18 +2515,9 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
 OperandMatchResultTy
 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
                                                bool AllowImm) {
-  bool Sext = false;
-
-  if (getLexer().getKind() == AsmToken::Identifier &&
-      Parser.getTok().getString() == "sext") {
-    Parser.Lex();
-    Sext = true;
-    if (getLexer().isNot(AsmToken::LParen)) {
-      Error(Parser.getTok().getLoc(), "expected left paren after sext");
-      return MatchOperand_ParseFail;
-    }
-    Parser.Lex();
-  }
+  bool Sext = trySkipId("sext");
+  if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
+    return MatchOperand_ParseFail;
 
   OperandMatchResultTy Res;
   if (AllowImm) {
@@ -2089,21 +2526,21 @@ AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
     Res = parseReg(Operands);
   }
   if (Res != MatchOperand_Success) {
-    return Res;
+    return Sext? MatchOperand_ParseFail : Res;
   }
 
+  if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
+    return MatchOperand_ParseFail;
+
   AMDGPUOperand::Modifiers Mods;
-  if (Sext) {
-    if (getLexer().isNot(AsmToken::RParen)) {
-      Error(Parser.getTok().getLoc(), "expected closing parentheses");
-      return MatchOperand_ParseFail;
-    }
-    Parser.Lex();
-    Mods.Sext = true;
-  }
+  Mods.Sext = Sext;
 
   if (Mods.hasIntModifiers()) {
     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
+    if (Op.isExpr()) {
+      Error(Op.getStartLoc(), "expected an absolute expression");
+      return MatchOperand_ParseFail;
+    }
     Op.setModifiers(Mods);
   }
 
@@ -2121,21 +2558,24 @@ AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
 }
 
 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
+  auto Loc = getLoc();
+  if (trySkipId("off")) {
+    Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
+                                                AMDGPUOperand::ImmTyOff, false));
+    return MatchOperand_Success;
+  }
+
+  if (!isRegister())
+    return MatchOperand_NoMatch;
+
   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
   if (Reg) {
     Operands.push_back(std::move(Reg));
     return MatchOperand_Success;
   }
 
-  const AsmToken &Tok = Parser.getTok();
-  if (Tok.getString() == "off") {
-    Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
-                                                AMDGPUOperand::ImmTyOff, false));
-    Parser.Lex();
-    return MatchOperand_Success;
-  }
+  return MatchOperand_ParseFail;
 
-  return MatchOperand_NoMatch;
 }
 
 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
@@ -2163,15 +2603,6 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
     }
   }
 
-  if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
-    // FIXME: Produces error without correct column reported.
-    auto OpNum =
-        AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
-    const auto &Op = Inst.getOperand(OpNum);
-    if (Op.getImm() != 0)
-      return Match_InvalidOperand;
-  }
-
   return Match_Success;
 }
 
@@ -2214,7 +2645,10 @@ unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
     switch (Reg) {
     case AMDGPU::FLAT_SCR:
     case AMDGPU::VCC:
+    case AMDGPU::VCC_LO:
+    case AMDGPU::VCC_HI:
     case AMDGPU::M0:
+    case AMDGPU::SGPR_NULL:
       return Reg;
     default:
       break;
@@ -2248,7 +2682,11 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
   case 2: {
     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
-        OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
+        OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
+        OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
+        OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
+        OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
+        OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
     } else {
       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
@@ -2272,6 +2710,8 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
   const unsigned Opcode = Inst.getOpcode();
   const MCInstrDesc &Desc = MII.get(Opcode);
   unsigned ConstantBusUseCount = 0;
+  unsigned NumLiterals = 0;
+  unsigned LiteralSize;
 
   if (Desc.TSFlags &
       (SIInstrFlags::VOPC |
@@ -2283,8 +2723,10 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
       ++ConstantBusUseCount;
     }
 
+    SmallDenseSet<unsigned> SGPRsUsed;
     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
     if (SGPRUsed != AMDGPU::NoRegister) {
+      SGPRsUsed.insert(SGPRUsed);
       ++ConstantBusUseCount;
     }
 
@@ -2307,16 +2749,41 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
           //   flat_scratch_lo, flat_scratch_hi
           // are theoretically valid but they are disabled anyway.
           // Note that this code mimics SIInstrInfo::verifyInstruction
-          if (Reg != SGPRUsed) {
+          if (!SGPRsUsed.count(Reg)) {
+            SGPRsUsed.insert(Reg);
             ++ConstantBusUseCount;
           }
-          SGPRUsed = Reg;
         } else { // Expression or a literal
-          ++ConstantBusUseCount;
+
+          if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
+            continue; // special operand like VINTERP attr_chan
+
+          // An instruction may use only one literal.
+          // This has been validated on the previous step.
+          // See validateVOP3Literal.
+          // This literal may be used as more than one operand.
+          // If all these operands are of the same size,
+          // this literal counts as one scalar value.
+          // Otherwise it counts as 2 scalar values.
+          // See "GFX10 Shader Programming", section 3.6.2.3.
+
+          unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
+          if (Size < 4) Size = 4;
+
+          if (NumLiterals == 0) {
+            NumLiterals = 1;
+            LiteralSize = Size;
+          } else if (LiteralSize != Size) {
+            NumLiterals = 2;
+          }
         }
       }
     }
   }
+  ConstantBusUseCount += NumLiterals;
+
+  if (isGFX10())
+    return ConstantBusUseCount <= 2;
 
   return ConstantBusUseCount <= 1;
 }
@@ -2405,6 +2872,46 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
   return (VDataSize / 4) == DataSize + TFESize;
 }
 
+bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
+  const unsigned Opc = Inst.getOpcode();
+  const MCInstrDesc &Desc = MII.get(Opc);
+
+  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
+    return true;
+
+  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
+  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
+      AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
+  int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
+  int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
+  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
+
+  assert(VAddr0Idx != -1);
+  assert(SrsrcIdx != -1);
+  assert(DimIdx != -1);
+  assert(SrsrcIdx > VAddr0Idx);
+
+  unsigned Dim = Inst.getOperand(DimIdx).getImm();
+  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
+  bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
+  unsigned VAddrSize =
+      IsNSA ? SrsrcIdx - VAddr0Idx
+            : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
+
+  unsigned AddrSize = BaseOpcode->NumExtraArgs +
+                      (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
+                      (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
+                      (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+  if (!IsNSA) {
+    if (AddrSize > 8)
+      AddrSize = 16;
+    else if (AddrSize > 4)
+      AddrSize = 8;
+  }
+
+  return VAddrSize == AddrSize;
+}
+
 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
 
   const unsigned Opc = Inst.getOpcode();
@@ -2461,8 +2968,346 @@ bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
   return true;
 }
 
+bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
+  const unsigned Opc = Inst.getOpcode();
+  const MCInstrDesc &Desc = MII.get(Opc);
+
+  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
+    return true;
+
+  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
+  if (DimIdx < 0)
+    return true;
+
+  long Imm = Inst.getOperand(DimIdx).getImm();
+  if (Imm < 0 || Imm >= 8)
+    return false;
+
+  return true;
+}
+
+static bool IsRevOpcode(const unsigned Opcode)
+{
+  switch (Opcode) {
+  case AMDGPU::V_SUBREV_F32_e32:
+  case AMDGPU::V_SUBREV_F32_e64:
+  case AMDGPU::V_SUBREV_F32_e32_gfx10:
+  case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
+  case AMDGPU::V_SUBREV_F32_e32_vi:
+  case AMDGPU::V_SUBREV_F32_e64_gfx10:
+  case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
+  case AMDGPU::V_SUBREV_F32_e64_vi:
+
+  case AMDGPU::V_SUBREV_I32_e32:
+  case AMDGPU::V_SUBREV_I32_e64:
+  case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
+  case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
+
+  case AMDGPU::V_SUBBREV_U32_e32:
+  case AMDGPU::V_SUBBREV_U32_e64:
+  case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
+  case AMDGPU::V_SUBBREV_U32_e32_vi:
+  case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
+  case AMDGPU::V_SUBBREV_U32_e64_vi:
+
+  case AMDGPU::V_SUBREV_U32_e32:
+  case AMDGPU::V_SUBREV_U32_e64:
+  case AMDGPU::V_SUBREV_U32_e32_gfx9:
+  case AMDGPU::V_SUBREV_U32_e32_vi:
+  case AMDGPU::V_SUBREV_U32_e64_gfx9:
+  case AMDGPU::V_SUBREV_U32_e64_vi:
+
+  case AMDGPU::V_SUBREV_F16_e32:
+  case AMDGPU::V_SUBREV_F16_e64:
+  case AMDGPU::V_SUBREV_F16_e32_gfx10:
+  case AMDGPU::V_SUBREV_F16_e32_vi:
+  case AMDGPU::V_SUBREV_F16_e64_gfx10:
+  case AMDGPU::V_SUBREV_F16_e64_vi:
+
+  case AMDGPU::V_SUBREV_U16_e32:
+  case AMDGPU::V_SUBREV_U16_e64:
+  case AMDGPU::V_SUBREV_U16_e32_vi:
+  case AMDGPU::V_SUBREV_U16_e64_vi:
+
+  case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
+  case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
+  case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
+
+  case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
+  case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
+
+  case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
+  case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
+
+  case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
+  case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
+
+  case AMDGPU::V_LSHRREV_B32_e32:
+  case AMDGPU::V_LSHRREV_B32_e64:
+  case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
+  case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
+  case AMDGPU::V_LSHRREV_B32_e32_vi:
+  case AMDGPU::V_LSHRREV_B32_e64_vi:
+  case AMDGPU::V_LSHRREV_B32_e32_gfx10:
+  case AMDGPU::V_LSHRREV_B32_e64_gfx10:
+
+  case AMDGPU::V_ASHRREV_I32_e32:
+  case AMDGPU::V_ASHRREV_I32_e64:
+  case AMDGPU::V_ASHRREV_I32_e32_gfx10:
+  case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
+  case AMDGPU::V_ASHRREV_I32_e32_vi:
+  case AMDGPU::V_ASHRREV_I32_e64_gfx10:
+  case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
+  case AMDGPU::V_ASHRREV_I32_e64_vi:
+
+  case AMDGPU::V_LSHLREV_B32_e32:
+  case AMDGPU::V_LSHLREV_B32_e64:
+  case AMDGPU::V_LSHLREV_B32_e32_gfx10:
+  case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
+  case AMDGPU::V_LSHLREV_B32_e32_vi:
+  case AMDGPU::V_LSHLREV_B32_e64_gfx10:
+  case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
+  case AMDGPU::V_LSHLREV_B32_e64_vi:
+
+  case AMDGPU::V_LSHLREV_B16_e32:
+  case AMDGPU::V_LSHLREV_B16_e64:
+  case AMDGPU::V_LSHLREV_B16_e32_vi:
+  case AMDGPU::V_LSHLREV_B16_e64_vi:
+  case AMDGPU::V_LSHLREV_B16_gfx10:
+
+  case AMDGPU::V_LSHRREV_B16_e32:
+  case AMDGPU::V_LSHRREV_B16_e64:
+  case AMDGPU::V_LSHRREV_B16_e32_vi:
+  case AMDGPU::V_LSHRREV_B16_e64_vi:
+  case AMDGPU::V_LSHRREV_B16_gfx10:
+
+  case AMDGPU::V_ASHRREV_I16_e32:
+  case AMDGPU::V_ASHRREV_I16_e64:
+  case AMDGPU::V_ASHRREV_I16_e32_vi:
+  case AMDGPU::V_ASHRREV_I16_e64_vi:
+  case AMDGPU::V_ASHRREV_I16_gfx10:
+
+  case AMDGPU::V_LSHLREV_B64:
+  case AMDGPU::V_LSHLREV_B64_gfx10:
+  case AMDGPU::V_LSHLREV_B64_vi:
+
+  case AMDGPU::V_LSHRREV_B64:
+  case AMDGPU::V_LSHRREV_B64_gfx10:
+  case AMDGPU::V_LSHRREV_B64_vi:
+
+  case AMDGPU::V_ASHRREV_I64:
+  case AMDGPU::V_ASHRREV_I64_gfx10:
+  case AMDGPU::V_ASHRREV_I64_vi:
+
+  case AMDGPU::V_PK_LSHLREV_B16:
+  case AMDGPU::V_PK_LSHLREV_B16_gfx10:
+  case AMDGPU::V_PK_LSHLREV_B16_vi:
+
+  case AMDGPU::V_PK_LSHRREV_B16:
+  case AMDGPU::V_PK_LSHRREV_B16_gfx10:
+  case AMDGPU::V_PK_LSHRREV_B16_vi:
+  case AMDGPU::V_PK_ASHRREV_I16:
+  case AMDGPU::V_PK_ASHRREV_I16_gfx10:
+  case AMDGPU::V_PK_ASHRREV_I16_vi:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
+
+  using namespace SIInstrFlags;
+  const unsigned Opcode = Inst.getOpcode();
+  const MCInstrDesc &Desc = MII.get(Opcode);
+
+  // lds_direct register is defined so that it can be used
+  // with 9-bit operands only. Ignore encodings which do not accept these.
+  if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
+    return true;
+
+  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
+  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
+  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
+
+  const int SrcIndices[] = { Src1Idx, Src2Idx };
+
+  // lds_direct cannot be specified as either src1 or src2.
+  for (int SrcIdx : SrcIndices) {
+    if (SrcIdx == -1) break;
+    const MCOperand &Src = Inst.getOperand(SrcIdx);
+    if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
+      return false;
+    }
+  }
+
+  if (Src0Idx == -1)
+    return true;
+
+  const MCOperand &Src = Inst.getOperand(Src0Idx);
+  if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
+    return true;
+
+  // lds_direct is specified as src0. Check additional limitations.
+  return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
+}
+
+SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
+  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+    if (Op.isFlatOffset())
+      return Op.getStartLoc();
+  }
+  return getLoc();
+}
+
+bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
+                                         const OperandVector &Operands) {
+  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+  if ((TSFlags & SIInstrFlags::FLAT) == 0)
+    return true;
+
+  auto Opcode = Inst.getOpcode();
+  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
+  assert(OpNum != -1);
+
+  const auto &Op = Inst.getOperand(OpNum);
+  if (!hasFlatOffsets() && Op.getImm() != 0) {
+    Error(getFlatOffsetLoc(Operands),
+          "flat offset modifier is not supported on this GPU");
+    return false;
+  }
+
+  // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
+  // For FLAT segment the offset must be positive;
+  // MSB is ignored and forced to zero.
+  unsigned OffsetSize = isGFX9() ? 13 : 12;
+  if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
+    if (!isIntN(OffsetSize, Op.getImm())) {
+      Error(getFlatOffsetLoc(Operands),
+            isGFX9() ? "expected a 13-bit signed offset" :
+                       "expected a 12-bit signed offset");
+      return false;
+    }
+  } else {
+    if (!isUIntN(OffsetSize - 1, Op.getImm())) {
+      Error(getFlatOffsetLoc(Operands),
+            isGFX9() ? "expected a 12-bit unsigned offset" :
+                       "expected an 11-bit unsigned offset");
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
+  unsigned Opcode = Inst.getOpcode();
+  const MCInstrDesc &Desc = MII.get(Opcode);
+  if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
+    return true;
+
+  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
+  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
+
+  const int OpIndices[] = { Src0Idx, Src1Idx };
+
+  unsigned NumLiterals = 0;
+  uint32_t LiteralValue;
+
+  for (int OpIdx : OpIndices) {
+    if (OpIdx == -1) break;
+
+    const MCOperand &MO = Inst.getOperand(OpIdx);
+    if (MO.isImm() &&
+        // Exclude special imm operands (like that used by s_set_gpr_idx_on)
+        AMDGPU::isSISrcOperand(Desc, OpIdx) &&
+        !isInlineConstant(Inst, OpIdx)) {
+      uint32_t Value = static_cast<uint32_t>(MO.getImm());
+      if (NumLiterals == 0 || LiteralValue != Value) {
+        LiteralValue = Value;
+        ++NumLiterals;
+      }
+    }
+  }
+
+  return NumLiterals <= 1;
+}
+
+bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
+  const unsigned Opc = Inst.getOpcode();
+  if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
+      Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
+    int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
+    unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
+
+    if (OpSel & ~3)
+      return false;
+  }
+  return true;
+}
+
+// Check if VCC register matches wavefront size
+bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
+  auto FB = getFeatureBits();
+  return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
+    (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
+}
+
+// VOP3 literal is only allowed in GFX10+ and only one can be used
+bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
+  unsigned Opcode = Inst.getOpcode();
+  const MCInstrDesc &Desc = MII.get(Opcode);
+  if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
+    return true;
+
+  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
+  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
+  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
+
+  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
+
+  unsigned NumLiterals = 0;
+  uint32_t LiteralValue;
+
+  for (int OpIdx : OpIndices) {
+    if (OpIdx == -1) break;
+
+    const MCOperand &MO = Inst.getOperand(OpIdx);
+    if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
+      continue;
+
+    if (!isInlineConstant(Inst, OpIdx)) {
+      uint32_t Value = static_cast<uint32_t>(MO.getImm());
+      if (NumLiterals == 0 || LiteralValue != Value) {
+        LiteralValue = Value;
+        ++NumLiterals;
+      }
+    }
+  }
+
+  return !NumLiterals ||
+         (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
+}
+
 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
-                                          const SMLoc &IDLoc) {
+                                          const SMLoc &IDLoc,
+                                          const OperandVector &Operands) {
+  if (!validateLdsDirect(Inst)) {
+    Error(IDLoc,
+      "invalid use of lds_direct");
+    return false;
+  }
+  if (!validateSOPLiteral(Inst)) {
+    Error(IDLoc,
+      "only one literal operand is allowed");
+    return false;
+  }
+  if (!validateVOP3Literal(Inst)) {
+    Error(IDLoc,
+      "invalid literal operand");
+    return false;
+  }
   if (!validateConstantBusLimitations(Inst)) {
     Error(IDLoc,
       "invalid operand (violates constant bus restrictions)");
@@ -2478,17 +3323,31 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
       "integer clamping is not supported on this GPU");
     return false;
   }
+  if (!validateOpSel(Inst)) {
+    Error(IDLoc,
+      "invalid op_sel operand");
+    return false;
+  }
   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
   if (!validateMIMGD16(Inst)) {
     Error(IDLoc,
       "d16 modifier is not supported on this GPU");
     return false;
   }
+  if (!validateMIMGDim(Inst)) {
+    Error(IDLoc, "dim modifier is required on this GPU");
+    return false;
+  }
   if (!validateMIMGDataSize(Inst)) {
     Error(IDLoc,
       "image data size does not match dmask and tfe");
     return false;
   }
+  if (!validateMIMGAddrSize(Inst)) {
+    Error(IDLoc,
+      "image address size does not match dim and a16");
+    return false;
+  }
   if (!validateMIMGAtomicDMask(Inst)) {
     Error(IDLoc,
       "invalid atomic image dmask");
@@ -2499,11 +3358,15 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
       "invalid image_gather dmask: only one bit must be set");
     return false;
   }
+  if (!validateFlatOffset(Inst, Operands)) {
+    return false;
+  }
 
   return true;
 }
 
-static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS,
+static std::string AMDGPUMnemonicSpellCheck(StringRef S,
+                                            const FeatureBitset &FBS,
                                             unsigned VariantID = 0);
 
 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -2538,7 +3401,7 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   switch (Result) {
   default: break;
   case Match_Success:
-    if (!validateInstruction(Inst, IDLoc)) {
+    if (!validateInstruction(Inst, IDLoc, Operands)) {
       return true;
     }
     Inst.setLoc(IDLoc);
@@ -2549,7 +3412,7 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     return Error(IDLoc, "instruction not supported on this GPU");
 
   case Match_MnemonicFail: {
-    uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+    FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
     std::string Suggestion = AMDGPUMnemonicSpellCheck(
         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
     return Error(IDLoc, "invalid instruction" + Suggestion,
@@ -2632,32 +3495,39 @@ bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
 
 bool AMDGPUAsmParser::calculateGPRBlocks(
     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
-    bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
-    unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
-    unsigned &SGPRBlocks) {
+    bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
+    SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
+    unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
   // TODO(scott.linder): These calculations are duplicated from
   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
   IsaVersion Version = getIsaVersion(getSTI().getCPU());
 
   unsigned NumVGPRs = NextFreeVGPR;
   unsigned NumSGPRs = NextFreeSGPR;
-  unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
 
-  if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
-      NumSGPRs > MaxAddressableNumSGPRs)
-    return OutOfRangeError(SGPRRange);
+  if (Version.Major >= 10)
+    NumSGPRs = 0;
+  else {
+    unsigned MaxAddressableNumSGPRs =
+        IsaInfo::getAddressableNumSGPRs(&getSTI());
 
-  NumSGPRs +=
-      IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
+    if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
+        NumSGPRs > MaxAddressableNumSGPRs)
+      return OutOfRangeError(SGPRRange);
 
-  if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
-      NumSGPRs > MaxAddressableNumSGPRs)
-    return OutOfRangeError(SGPRRange);
+    NumSGPRs +=
+        IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
 
-  if (Features.test(FeatureSGPRInitBug))
-    NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
+    if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
+        NumSGPRs > MaxAddressableNumSGPRs)
+      return OutOfRangeError(SGPRRange);
+
+    if (Features.test(FeatureSGPRInitBug))
+      NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
+  }
 
-  VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
+  VGPRBlocks =
+      IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
 
   return false;
@@ -2674,7 +3544,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
   if (getParser().parseIdentifier(KernelName))
     return true;
 
-  kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
+  kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
 
   StringSet<> Seen;
 
@@ -2688,6 +3558,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
   bool ReserveVCC = true;
   bool ReserveFlatScr = true;
   bool ReserveXNACK = hasXNACK();
+  Optional<bool> EnableWavefrontSize32;
 
   while (true) {
     while (getLexer().is(AsmToken::EndOfStatement))
@@ -2736,37 +3607,45 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
                        Val, ValRange);
-      UserSGPRCount++;
+      UserSGPRCount += 4;
     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
                        ValRange);
-      UserSGPRCount++;
+      UserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
                        ValRange);
-      UserSGPRCount++;
+      UserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
                        Val, ValRange);
-      UserSGPRCount++;
+      UserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
                        ValRange);
-      UserSGPRCount++;
+      UserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
                        ValRange);
-      UserSGPRCount++;
+      UserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
                        Val, ValRange);
-      UserSGPRCount++;
+      UserSGPRCount += 1;
+    } else if (ID == ".amdhsa_wavefront_size32") {
+      if (IVersion.Major < 10)
+        return getParser().Error(IDRange.Start, "directive requires gfx10+",
+                                 IDRange);
+      EnableWavefrontSize32 = Val;
+      PARSE_BITS_ENTRY(KD.kernel_code_properties,
+                       KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
+                       Val, ValRange);
     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
       PARSE_BITS_ENTRY(
           KD.compute_pgm_rsrc2,
@@ -2841,6 +3720,24 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
                                  IDRange);
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
                        ValRange);
+    } else if (ID == ".amdhsa_workgroup_processor_mode") {
+      if (IVersion.Major < 10)
+        return getParser().Error(IDRange.Start, "directive requires gfx10+",
+                                 IDRange);
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
+                       ValRange);
+    } else if (ID == ".amdhsa_memory_ordered") {
+      if (IVersion.Major < 10)
+        return getParser().Error(IDRange.Start, "directive requires gfx10+",
+                                 IDRange);
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
+                       ValRange);
+    } else if (ID == ".amdhsa_forward_progress") {
+      if (IVersion.Major < 10)
+        return getParser().Error(IDRange.Start, "directive requires gfx10+",
+                                 IDRange);
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
+                       ValRange);
     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
       PARSE_BITS_ENTRY(
           KD.compute_pgm_rsrc2,
@@ -2888,8 +3785,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
   unsigned VGPRBlocks;
   unsigned SGPRBlocks;
   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
-                         ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
-                         SGPRRange, VGPRBlocks, SGPRBlocks))
+                         ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
+                         VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
+                         SGPRBlocks))
     return true;
 
   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
@@ -2994,6 +3892,46 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
     return TokError(Err.str());
   }
   Lex();
+
+  if (ID == "enable_wavefront_size32") {
+    if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
+      if (!isGFX10())
+        return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
+      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
+        return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
+    } else {
+      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
+        return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
+    }
+  }
+
+  if (ID == "wavefront_size") {
+    if (Header.wavefront_size == 5) {
+      if (!isGFX10())
+        return TokError("wavefront_size=5 is only allowed on GFX10+");
+      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
+        return TokError("wavefront_size=5 requires +WavefrontSize32");
+    } else if (Header.wavefront_size == 6) {
+      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
+        return TokError("wavefront_size=6 requires +WavefrontSize64");
+    }
+  }
+
+  if (ID == "enable_wgp_mode") {
+    if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
+      return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
+  }
+
+  if (ID == "enable_mem_ordered") {
+    if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
+      return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
+  }
+
+  if (ID == "enable_fwd_progress") {
+    if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
+      return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
+  }
+
   return false;
 }
 
@@ -3081,14 +4019,35 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
   }
 
   std::string HSAMetadataString;
-  raw_string_ostream YamlStream(HSAMetadataString);
+  if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
+                          HSAMetadataString))
+    return true;
+
+  if (IsaInfo::hasCodeObjectV3(&getSTI())) {
+    if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
+      return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
+  } else {
+    if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
+      return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
+  }
+
+  return false;
+}
+
+/// Common code to parse out a block of text (typically YAML) between start and
+/// end directives.
+bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
+                                          const char *AssemblerDirectiveEnd,
+                                          std::string &CollectString) {
+
+  raw_string_ostream CollectStream(CollectString);
 
   getLexer().setSkipSpace(false);
 
   bool FoundEnd = false;
   while (!getLexer().is(AsmToken::Eof)) {
     while (getLexer().is(AsmToken::Space)) {
-      YamlStream << getLexer().getTok().getString();
+      CollectStream << getLexer().getTok().getString();
       Lex();
     }
 
@@ -3101,8 +4060,8 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
       }
     }
 
-    YamlStream << Parser.parseStringToEndOfStatement()
-               << getContext().getAsmInfo()->getSeparatorString();
+    CollectStream << Parser.parseStringToEndOfStatement()
+                  << getContext().getAsmInfo()->getSeparatorString();
 
     Parser.eatToEndOfStatement();
   }
@@ -3111,22 +4070,27 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
 
   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
     return TokError(Twine("expected directive ") +
-                    Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found"));
+                    Twine(AssemblerDirectiveEnd) + Twine(" not found"));
   }
 
-  YamlStream.flush();
+  CollectStream.flush();
+  return false;
+}
 
-  if (IsaInfo::hasCodeObjectV3(&getSTI())) {
-    if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
-      return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
-  } else {
-    if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
-      return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
-  }
+/// Parse the assembler directive for new MsgPack-format PAL metadata.
+bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
+  std::string String;
+  if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
+                          AMDGPU::PALMD::AssemblerDirectiveEnd, String))
+    return true;
 
+  auto PALMetadata = getTargetStreamer().getPALMetadata();
+  if (!PALMetadata->setFromString(String))
+    return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
   return false;
 }
 
+/// Parse the assembler directive for old linear-format PAL metadata.
 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
     return Error(getParser().getTok().getLoc(),
@@ -3134,19 +4098,82 @@ bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
                  "not available on non-amdpal OSes")).str());
   }
 
-  PALMD::Metadata PALMetadata;
+  auto PALMetadata = getTargetStreamer().getPALMetadata();
+  PALMetadata->setLegacy();
   for (;;) {
-    uint32_t Value;
+    uint32_t Key, Value;
+    if (ParseAsAbsoluteExpression(Key)) {
+      return TokError(Twine("invalid value in ") +
+                      Twine(PALMD::AssemblerDirective));
+    }
+    if (getLexer().isNot(AsmToken::Comma)) {
+      return TokError(Twine("expected an even number of values in ") +
+                      Twine(PALMD::AssemblerDirective));
+    }
+    Lex();
     if (ParseAsAbsoluteExpression(Value)) {
       return TokError(Twine("invalid value in ") +
                       Twine(PALMD::AssemblerDirective));
     }
-    PALMetadata.push_back(Value);
+    PALMetadata->setRegister(Key, Value);
     if (getLexer().isNot(AsmToken::Comma))
       break;
     Lex();
   }
-  getTargetStreamer().EmitPALMetadata(PALMetadata);
+  return false;
+}
+
+/// ParseDirectiveAMDGPULDS
+///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
+bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
+  if (getParser().checkForValidSection())
+    return true;
+
+  StringRef Name;
+  SMLoc NameLoc = getLexer().getLoc();
+  if (getParser().parseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
+  if (parseToken(AsmToken::Comma, "expected ','"))
+    return true;
+
+  unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
+
+  int64_t Size;
+  SMLoc SizeLoc = getLexer().getLoc();
+  if (getParser().parseAbsoluteExpression(Size))
+    return true;
+  if (Size < 0)
+    return Error(SizeLoc, "size must be non-negative");
+  if (Size > LocalMemorySize)
+    return Error(SizeLoc, "size is too large");
+
+  int64_t Align = 4;
+  if (getLexer().is(AsmToken::Comma)) {
+    Lex();
+    SMLoc AlignLoc = getLexer().getLoc();
+    if (getParser().parseAbsoluteExpression(Align))
+      return true;
+    if (Align < 0 || !isPowerOf2_64(Align))
+      return Error(AlignLoc, "alignment must be a power of two");
+
+    // Alignment larger than the size of LDS is possible in theory, as long
+    // as the linker manages to place to symbol at address 0, but we do want
+    // to make sure the alignment fits nicely into a 32-bit integer.
+    if (Align >= 1u << 31)
+      return Error(AlignLoc, "alignment is too large");
+  }
+
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '.amdgpu_lds' directive"))
+    return true;
+
+  Symbol->redefineIfPossible();
+  if (!Symbol->isUndefined())
+    return Error(NameLoc, "invalid symbol redefinition");
+
+  getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
   return false;
 }
 
@@ -3183,6 +4210,12 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
       return ParseDirectiveHSAMetadata();
   }
 
+  if (IDVal == ".amdgpu_lds")
+    return ParseDirectiveAMDGPULDS();
+
+  if (IDVal == PALMD::AssemblerDirectiveBegin)
+    return ParseDirectivePALMetadataBegin();
+
   if (IDVal == PALMD::AssemblerDirective)
     return ParseDirectivePALMetadata();
 
@@ -3195,21 +4228,36 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
        R.isValid(); ++R) {
     if (*R == RegNo)
-      return isGFX9();
+      return isGFX9() || isGFX10();
+  }
+
+  // GFX10 has 2 more SGPRs 104 and 105.
+  for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
+       R.isValid(); ++R) {
+    if (*R == RegNo)
+      return hasSGPR104_SGPR105();
   }
 
   switch (RegNo) {
+  case AMDGPU::SRC_SHARED_BASE:
+  case AMDGPU::SRC_SHARED_LIMIT:
+  case AMDGPU::SRC_PRIVATE_BASE:
+  case AMDGPU::SRC_PRIVATE_LIMIT:
+  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
+    return !isCI() && !isSI() && !isVI();
   case AMDGPU::TBA:
   case AMDGPU::TBA_LO:
   case AMDGPU::TBA_HI:
   case AMDGPU::TMA:
   case AMDGPU::TMA_LO:
   case AMDGPU::TMA_HI:
-    return !isGFX9();
+    return !isGFX9() && !isGFX10();
   case AMDGPU::XNACK_MASK:
   case AMDGPU::XNACK_MASK_LO:
   case AMDGPU::XNACK_MASK_HI:
-    return !isCI() && !isSI() && hasXNACK();
+    return !isCI() && !isSI() && !isGFX10() && hasXNACK();
+  case AMDGPU::SGPR_NULL:
+    return isGFX10();
   default:
     break;
   }
@@ -3217,8 +4265,10 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
   if (isCI())
     return true;
 
-  if (isSI()) {
-    // No flat_scr
+  if (isSI() || isGFX10()) {
+    // No flat_scr on SI.
+    // On GFX10 flat scratch is not a valid register operand and can only be
+    // accessed with s_setreg/s_getreg.
     switch (RegNo) {
     case AMDGPU::FLAT_SCR:
     case AMDGPU::FLAT_SCR_LO:
@@ -3234,14 +4284,15 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
        R.isValid(); ++R) {
     if (*R == RegNo)
-      return false;
+      return hasSGPR102_SGPR103();
   }
 
   return true;
 }
 
 OperandMatchResultTy
-AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
+AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
+                              OperandMode Mode) {
   // Try to parse with a custom parser
   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
 
@@ -3255,28 +4306,36 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
       getLexer().is(AsmToken::EndOfStatement))
     return ResTy;
 
-  ResTy = parseRegOrImm(Operands);
+  if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
+    unsigned Prefix = Operands.size();
+    SMLoc LBraceLoc = getTok().getLoc();
+    Parser.Lex(); // eat the '['
 
-  if (ResTy == MatchOperand_Success)
-    return ResTy;
+    for (;;) {
+      ResTy = parseReg(Operands);
+      if (ResTy != MatchOperand_Success)
+        return ResTy;
 
-  const auto &Tok = Parser.getTok();
-  SMLoc S = Tok.getLoc();
+      if (getLexer().is(AsmToken::RBrac))
+        break;
 
-  const MCExpr *Expr = nullptr;
-  if (!Parser.parseExpression(Expr)) {
-    Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
-    return MatchOperand_Success;
-  }
+      if (getLexer().isNot(AsmToken::Comma))
+        return MatchOperand_ParseFail;
+      Parser.Lex();
+    }
 
-  // Possibly this is an instruction flag like 'gds'.
-  if (Tok.getKind() == AsmToken::Identifier) {
-    Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
-    Parser.Lex();
+    if (Operands.size() - Prefix > 1) {
+      Operands.insert(Operands.begin() + Prefix,
+                      AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
+      Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
+                                                    getTok().getLoc()));
+    }
+
+    Parser.Lex(); // eat the ']'
     return MatchOperand_Success;
   }
 
-  return MatchOperand_NoMatch;
+  return parseRegOrImm(Operands);
 }
 
 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
@@ -3308,8 +4367,13 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
   Name = parseMnemonicSuffix(Name);
   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
 
+  bool IsMIMG = Name.startswith("image_");
+
   while (!getLexer().is(AsmToken::EndOfStatement)) {
-    OperandMatchResultTy Res = parseOperand(Operands, Name);
+    OperandMode Mode = OperandMode_Default;
+    if (IsMIMG && isGFX10() && Operands.size() == 2)
+      Mode = OperandMode_NSA;
+    OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
 
     // Eat the comma or space if there is one.
     if (getLexer().is(AsmToken::Comma))
@@ -3318,12 +4382,14 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
     switch (Res) {
       case MatchOperand_Success: break;
       case MatchOperand_ParseFail:
+        // FIXME: use real operand location rather than the current location.
         Error(getLexer().getLoc(), "failed parsing operand.");
         while (!getLexer().is(AsmToken::EndOfStatement)) {
           Parser.Lex();
         }
         return true;
       case MatchOperand_NoMatch:
+        // FIXME: use real operand location rather than the current location.
         Error(getLexer().getLoc(), "not a valid operand.");
         while (!getLexer().is(AsmToken::EndOfStatement)) {
           Parser.Lex();
@@ -3340,46 +4406,19 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
 //===----------------------------------------------------------------------===//
 
 OperandMatchResultTy
-AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
-  switch(getLexer().getKind()) {
-    default: return MatchOperand_NoMatch;
-    case AsmToken::Identifier: {
-      StringRef Name = Parser.getTok().getString();
-      if (!Name.equals(Prefix)) {
-        return MatchOperand_NoMatch;
-      }
-
-      Parser.Lex();
-      if (getLexer().isNot(AsmToken::Colon))
-        return MatchOperand_ParseFail;
+AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
 
-      Parser.Lex();
-
-      bool IsMinus = false;
-      if (getLexer().getKind() == AsmToken::Minus) {
-        Parser.Lex();
-        IsMinus = true;
-      }
-
-      if (getLexer().isNot(AsmToken::Integer))
-        return MatchOperand_ParseFail;
-
-      if (getParser().parseAbsoluteExpression(Int))
-        return MatchOperand_ParseFail;
+  if (!trySkipId(Prefix, AsmToken::Colon))
+    return MatchOperand_NoMatch;
 
-      if (IsMinus)
-        Int = -Int;
-      break;
-    }
-  }
-  return MatchOperand_Success;
+  return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
 }
 
 OperandMatchResultTy
 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
                                     AMDGPUOperand::ImmTy ImmTy,
                                     bool (*ConvertResult)(int64_t&)) {
-  SMLoc S = Parser.getTok().getLoc();
+  SMLoc S = getLoc();
   int64_t Value = 0;
 
   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
@@ -3387,59 +4426,55 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
     return Res;
 
   if (ConvertResult && !ConvertResult(Value)) {
-    return MatchOperand_ParseFail;
+    Error(S, "invalid " + StringRef(Prefix) + " value.");
   }
 
   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
   return MatchOperand_Success;
 }
 
-OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
-  const char *Prefix,
-  OperandVector &Operands,
-  AMDGPUOperand::ImmTy ImmTy,
-  bool (*ConvertResult)(int64_t&)) {
-  StringRef Name = Parser.getTok().getString();
-  if (!Name.equals(Prefix))
+OperandMatchResultTy
+AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
+                                             OperandVector &Operands,
+                                             AMDGPUOperand::ImmTy ImmTy,
+                                             bool (*ConvertResult)(int64_t&)) {
+  SMLoc S = getLoc();
+  if (!trySkipId(Prefix, AsmToken::Colon))
     return MatchOperand_NoMatch;
 
-  Parser.Lex();
-  if (getLexer().isNot(AsmToken::Colon))
+  if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
     return MatchOperand_ParseFail;
 
-  Parser.Lex();
-  if (getLexer().isNot(AsmToken::LBrac))
-    return MatchOperand_ParseFail;
-  Parser.Lex();
-
   unsigned Val = 0;
-  SMLoc S = Parser.getTok().getLoc();
+  const unsigned MaxSize = 4;
 
   // FIXME: How to verify the number of elements matches the number of src
   // operands?
-  for (int I = 0; I < 4; ++I) {
-    if (I != 0) {
-      if (getLexer().is(AsmToken::RBrac))
-        break;
+  for (int I = 0; ; ++I) {
+    int64_t Op;
+    SMLoc Loc = getLoc();
+    if (!parseExpr(Op))
+      return MatchOperand_ParseFail;
 
-      if (getLexer().isNot(AsmToken::Comma))
-        return MatchOperand_ParseFail;
-      Parser.Lex();
+    if (Op != 0 && Op != 1) {
+      Error(Loc, "invalid " + StringRef(Prefix) + " value.");
+      return MatchOperand_ParseFail;
     }
 
-    if (getLexer().isNot(AsmToken::Integer))
-      return MatchOperand_ParseFail;
+    Val |= (Op << I);
 
-    int64_t Op;
-    if (getParser().parseAbsoluteExpression(Op))
+    if (trySkipToken(AsmToken::RBrac))
+      break;
+
+    if (I + 1 == MaxSize) {
+      Error(getLoc(), "expected a closing square bracket");
       return MatchOperand_ParseFail;
+    }
 
-    if (Op != 0 && Op != 1)
+    if (!skipToken(AsmToken::Comma, "expected a comma"))
       return MatchOperand_ParseFail;
-    Val |= (Op << I);
   }
 
-  Parser.Lex();
   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
   return MatchOperand_Success;
 }
@@ -3459,7 +4494,7 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
         if (Tok == Name) {
           if (Tok == "r128" && isGFX9())
             Error(S, "r128 modifier is not supported on this GPU");
-          if (Tok == "a16" && !isGFX9())
+          if (Tok == "a16" && !isGFX9() && !isGFX10())
             Error(S, "a16 modifier is not supported on this GPU");
           Bit = 1;
           Parser.Lex();
@@ -3476,6 +4511,9 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
     }
   }
 
+  if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
+    return MatchOperand_ParseFail;
+
   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
   return MatchOperand_Success;
 }
@@ -3616,7 +4654,8 @@ void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
   }
 
   AMDGPUOperand::ImmTy OffsetType =
-    (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
+    (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
+     Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
                                                       AMDGPUOperand::ImmTyOffset;
 
@@ -3716,20 +4755,18 @@ encodeCnt(
 }
 
 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
-  StringRef CntName = Parser.getTok().getString();
-  int64_t CntVal;
 
-  Parser.Lex();
-  if (getLexer().isNot(AsmToken::LParen))
-    return true;
+  SMLoc CntLoc = getLoc();
+  StringRef CntName = getTokenStr();
 
-  Parser.Lex();
-  if (getLexer().isNot(AsmToken::Integer))
-    return true;
+  if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
+      !skipToken(AsmToken::LParen, "expected a left parenthesis"))
+    return false;
 
-  SMLoc ValLoc = Parser.getTok().getLoc();
-  if (getParser().parseAbsoluteExpression(CntVal))
-    return true;
+  int64_t CntVal;
+  SMLoc ValLoc = getLoc();
+  if (!parseExpr(CntVal))
+    return false;
 
   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
 
@@ -3742,265 +4779,240 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
+  } else {
+    Error(CntLoc, "invalid counter name " + CntName);
+    return false;
   }
 
   if (Failed) {
     Error(ValLoc, "too large value for " + CntName);
-    return true;
+    return false;
   }
 
-  if (getLexer().isNot(AsmToken::RParen)) {
-    return true;
-  }
+  if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
+    return false;
 
-  Parser.Lex();
-  if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
-    const AsmToken NextToken = getLexer().peekTok();
-    if (NextToken.is(AsmToken::Identifier)) {
-      Parser.Lex();
+  if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
+    if (isToken(AsmToken::EndOfStatement)) {
+      Error(getLoc(), "expected a counter name");
+      return false;
     }
   }
 
-  return false;
+  return true;
 }
 
 OperandMatchResultTy
 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
   int64_t Waitcnt = getWaitcntBitMask(ISA);
-  SMLoc S = Parser.getTok().getLoc();
+  SMLoc S = getLoc();
 
-  switch(getLexer().getKind()) {
-    default: return MatchOperand_ParseFail;
-    case AsmToken::Integer:
-      // The operand can be an integer value.
-      if (getParser().parseAbsoluteExpression(Waitcnt))
-        return MatchOperand_ParseFail;
-      break;
-
-    case AsmToken::Identifier:
-      do {
-        if (parseCnt(Waitcnt))
-          return MatchOperand_ParseFail;
-      } while(getLexer().isNot(AsmToken::EndOfStatement));
-      break;
+  // If parse failed, do not return error code
+  // to avoid excessive error messages.
+  if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
+    while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
+  } else {
+    parseExpr(Waitcnt);
   }
+
   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
   return MatchOperand_Success;
 }
 
-bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
-                                          int64_t &Width) {
-  using namespace llvm::AMDGPU::Hwreg;
+bool
+AMDGPUOperand::isSWaitCnt() const {
+  return isImm();
+}
 
-  if (Parser.getTok().getString() != "hwreg")
-    return true;
-  Parser.Lex();
+//===----------------------------------------------------------------------===//
+// hwreg
+//===----------------------------------------------------------------------===//
 
-  if (getLexer().isNot(AsmToken::LParen))
-    return true;
-  Parser.Lex();
+bool
+AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
+                                int64_t &Offset,
+                                int64_t &Width) {
+  using namespace llvm::AMDGPU::Hwreg;
 
-  if (getLexer().is(AsmToken::Identifier)) {
+  // The register may be specified by name or using a numeric code
+  if (isToken(AsmToken::Identifier) &&
+      (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
     HwReg.IsSymbolic = true;
-    HwReg.Id = ID_UNKNOWN_;
-    const StringRef tok = Parser.getTok().getString();
-    int Last = ID_SYMBOLIC_LAST_;
-    if (isSI() || isCI() || isVI())
-      Last = ID_SYMBOLIC_FIRST_GFX9_;
-    for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
-      if (tok == IdSymbolic[i]) {
-        HwReg.Id = i;
-        break;
-      }
-    }
-    Parser.Lex();
-  } else {
-    HwReg.IsSymbolic = false;
-    if (getLexer().isNot(AsmToken::Integer))
-      return true;
-    if (getParser().parseAbsoluteExpression(HwReg.Id))
-      return true;
-  }
-
-  if (getLexer().is(AsmToken::RParen)) {
-    Parser.Lex();
+    lex(); // skip message name
+  } else if (!parseExpr(HwReg.Id)) {
     return false;
   }
 
-  // optional params
-  if (getLexer().isNot(AsmToken::Comma))
+  if (trySkipToken(AsmToken::RParen))
     return true;
-  Parser.Lex();
 
-  if (getLexer().isNot(AsmToken::Integer))
-    return true;
-  if (getParser().parseAbsoluteExpression(Offset))
-    return true;
-
-  if (getLexer().isNot(AsmToken::Comma))
-    return true;
-  Parser.Lex();
+  // parse optional params
+  return
+    skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
+    parseExpr(Offset) &&
+    skipToken(AsmToken::Comma, "expected a comma") &&
+    parseExpr(Width) &&
+    skipToken(AsmToken::RParen, "expected a closing parenthesis");
+}
 
-  if (getLexer().isNot(AsmToken::Integer))
-    return true;
-  if (getParser().parseAbsoluteExpression(Width))
-    return true;
+bool
+AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
+                               const int64_t Offset,
+                               const int64_t Width,
+                               const SMLoc Loc) {
 
-  if (getLexer().isNot(AsmToken::RParen))
-    return true;
-  Parser.Lex();
+  using namespace llvm::AMDGPU::Hwreg;
 
-  return false;
+  if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
+    Error(Loc, "specified hardware register is not supported on this GPU");
+    return false;
+  } else if (!isValidHwreg(HwReg.Id)) {
+    Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
+    return false;
+  } else if (!isValidHwregOffset(Offset)) {
+    Error(Loc, "invalid bit offset: only 5-bit values are legal");
+    return false;
+  } else if (!isValidHwregWidth(Width)) {
+    Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
+    return false;
+  }
+  return true;
 }
 
-OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
+OperandMatchResultTy
+AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
   using namespace llvm::AMDGPU::Hwreg;
 
-  int64_t Imm16Val = 0;
-  SMLoc S = Parser.getTok().getLoc();
-
-  switch(getLexer().getKind()) {
-    default: return MatchOperand_NoMatch;
-    case AsmToken::Integer:
-      // The operand can be an integer value.
-      if (getParser().parseAbsoluteExpression(Imm16Val))
-        return MatchOperand_NoMatch;
-      if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
-        Error(S, "invalid immediate: only 16-bit values are legal");
-        // Do not return error code, but create an imm operand anyway and proceed
-        // to the next operand, if any. That avoids unneccessary error messages.
-      }
-      break;
-
-    case AsmToken::Identifier: {
-        OperandInfoTy HwReg(ID_UNKNOWN_);
-        int64_t Offset = OFFSET_DEFAULT_;
-        int64_t Width = WIDTH_M1_DEFAULT_ + 1;
-        if (parseHwregConstruct(HwReg, Offset, Width))
-          return MatchOperand_ParseFail;
-        if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
-          if (HwReg.IsSymbolic)
-            Error(S, "invalid symbolic name of hardware register");
-          else
-            Error(S, "invalid code of hardware register: only 6-bit values are legal");
-        }
-        if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
-          Error(S, "invalid bit offset: only 5-bit values are legal");
-        if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
-          Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
-        Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
-      }
-      break;
+  int64_t ImmVal = 0;
+  SMLoc Loc = getLoc();
+
+  // If parse failed, do not return error code
+  // to avoid excessive error messages.
+  if (trySkipId("hwreg", AsmToken::LParen)) {
+    OperandInfoTy HwReg(ID_UNKNOWN_);
+    int64_t Offset = OFFSET_DEFAULT_;
+    int64_t Width = WIDTH_DEFAULT_;
+    if (parseHwregBody(HwReg, Offset, Width) &&
+        validateHwreg(HwReg, Offset, Width, Loc)) {
+      ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
+    }
+  } else if (parseExpr(ImmVal)) {
+    if (ImmVal < 0 || !isUInt<16>(ImmVal))
+      Error(Loc, "invalid immediate: only 16-bit values are legal");
   }
-  Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
-  return MatchOperand_Success;
-}
 
-bool AMDGPUOperand::isSWaitCnt() const {
-  return isImm();
+  Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
+  return MatchOperand_Success;
 }
 
 bool AMDGPUOperand::isHwreg() const {
   return isImmTy(ImmTyHwreg);
 }
 
-bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
+//===----------------------------------------------------------------------===//
+// sendmsg
+//===----------------------------------------------------------------------===//
+
+bool
+AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
+                                  OperandInfoTy &Op,
+                                  OperandInfoTy &Stream) {
   using namespace llvm::AMDGPU::SendMsg;
 
-  if (Parser.getTok().getString() != "sendmsg")
-    return true;
-  Parser.Lex();
+  if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
+    Msg.IsSymbolic = true;
+    lex(); // skip message name
+  } else if (!parseExpr(Msg.Id)) {
+    return false;
+  }
 
-  if (getLexer().isNot(AsmToken::LParen))
-    return true;
-  Parser.Lex();
+  if (trySkipToken(AsmToken::Comma)) {
+    Op.IsDefined = true;
+    if (isToken(AsmToken::Identifier) &&
+        (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
+      lex(); // skip operation name
+    } else if (!parseExpr(Op.Id)) {
+      return false;
+    }
 
-  if (getLexer().is(AsmToken::Identifier)) {
-    Msg.IsSymbolic = true;
-    Msg.Id = ID_UNKNOWN_;
-    const std::string tok = Parser.getTok().getString();
-    for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
-      switch(i) {
-        default: continue; // Omit gaps.
-        case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
-      }
-      if (tok == IdSymbolic[i]) {
-        Msg.Id = i;
-        break;
-      }
+    if (trySkipToken(AsmToken::Comma)) {
+      Stream.IsDefined = true;
+      if (!parseExpr(Stream.Id))
+        return false;
     }
-    Parser.Lex();
-  } else {
-    Msg.IsSymbolic = false;
-    if (getLexer().isNot(AsmToken::Integer))
-      return true;
-    if (getParser().parseAbsoluteExpression(Msg.Id))
-      return true;
-    if (getLexer().is(AsmToken::Integer))
-      if (getParser().parseAbsoluteExpression(Msg.Id))
-        Msg.Id = ID_UNKNOWN_;
   }
-  if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
-    return false;
 
-  if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
-    if (getLexer().isNot(AsmToken::RParen))
-      return true;
-    Parser.Lex();
+  return skipToken(AsmToken::RParen, "expected a closing parenthesis");
+}
+
+bool
+AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
+                                 const OperandInfoTy &Op,
+                                 const OperandInfoTy &Stream,
+                                 const SMLoc S) {
+  using namespace llvm::AMDGPU::SendMsg;
+
+  // Validation strictness depends on whether message is specified
+  // in a symbolc or in a numeric form. In the latter case
+  // only encoding possibility is checked.
+  bool Strict = Msg.IsSymbolic;
+
+  if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
+    Error(S, "invalid message id");
+    return false;
+  } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
+    Error(S, Op.IsDefined ?
+             "message does not support operations" :
+             "missing message operation");
+    return false;
+  } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
+    Error(S, "invalid operation id");
+    return false;
+  } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
+    Error(S, "message operation does not support streams");
+    return false;
+  } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
+    Error(S, "invalid message stream id");
     return false;
   }
+  return true;
+}
 
-  if (getLexer().isNot(AsmToken::Comma))
-    return true;
-  Parser.Lex();
+OperandMatchResultTy
+AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
+  using namespace llvm::AMDGPU::SendMsg;
 
-  assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
-  Operation.Id = ID_UNKNOWN_;
-  if (getLexer().is(AsmToken::Identifier)) {
-    Operation.IsSymbolic = true;
-    const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
-    const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
-    const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
-    const StringRef Tok = Parser.getTok().getString();
-    for (int i = F; i < L; ++i) {
-      if (Tok == S[i]) {
-        Operation.Id = i;
-        break;
-      }
+  int64_t ImmVal = 0;
+  SMLoc Loc = getLoc();
+
+  // If parse failed, do not return error code
+  // to avoid excessive error messages.
+  if (trySkipId("sendmsg", AsmToken::LParen)) {
+    OperandInfoTy Msg(ID_UNKNOWN_);
+    OperandInfoTy Op(OP_NONE_);
+    OperandInfoTy Stream(STREAM_ID_NONE_);
+    if (parseSendMsgBody(Msg, Op, Stream) &&
+        validateSendMsg(Msg, Op, Stream, Loc)) {
+      ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
     }
-    Parser.Lex();
-  } else {
-    Operation.IsSymbolic = false;
-    if (getLexer().isNot(AsmToken::Integer))
-      return true;
-    if (getParser().parseAbsoluteExpression(Operation.Id))
-      return true;
+  } else if (parseExpr(ImmVal)) {
+    if (ImmVal < 0 || !isUInt<16>(ImmVal))
+      Error(Loc, "invalid immediate: only 16-bit values are legal");
   }
 
-  if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
-    // Stream id is optional.
-    if (getLexer().is(AsmToken::RParen)) {
-      Parser.Lex();
-      return false;
-    }
-
-    if (getLexer().isNot(AsmToken::Comma))
-      return true;
-    Parser.Lex();
-
-    if (getLexer().isNot(AsmToken::Integer))
-      return true;
-    if (getParser().parseAbsoluteExpression(StreamId))
-      return true;
-  }
+  Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
+  return MatchOperand_Success;
+}
 
-  if (getLexer().isNot(AsmToken::RParen))
-    return true;
-  Parser.Lex();
-  return false;
+bool AMDGPUOperand::isSendMsg() const {
+  return isImmTy(ImmTySendMsg);
 }
 
+//===----------------------------------------------------------------------===//
+// v_interp
+//===----------------------------------------------------------------------===//
+
 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
   if (getLexer().getKind() != AsmToken::Identifier)
     return MatchOperand_NoMatch;
@@ -4062,6 +5074,10 @@ OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
+//===----------------------------------------------------------------------===//
+// exp
+//===----------------------------------------------------------------------===//
+
 void AMDGPUAsmParser::errorExpTgt() {
   Error(Parser.getTok().getLoc(), "invalid exp target");
 }
@@ -4094,13 +5110,18 @@ OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
     if (Str.getAsInteger(10, Val))
       return MatchOperand_ParseFail;
 
-    if (Val > 3)
+    if (Val > 4 || (Val == 4 && !isGFX10()))
       errorExpTgt();
 
     Val += 12;
     return MatchOperand_Success;
   }
 
+  if (isGFX10() && Str == "prim") {
+    Val = 20;
+    return MatchOperand_Success;
+  }
+
   if (Str.startswith("param")) {
     Str = Str.drop_front(5);
     if (Str.getAsInteger(10, Val))
@@ -4118,121 +5139,62 @@ OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
     if (Str.getAsInteger(10, Val))
       return MatchOperand_ParseFail;
 
-    errorExpTgt();
-    return MatchOperand_Success;
-  }
-
-  return MatchOperand_NoMatch;
-}
-
-OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
-  uint8_t Val;
-  StringRef Str = Parser.getTok().getString();
-
-  auto Res = parseExpTgtImpl(Str, Val);
-  if (Res != MatchOperand_Success)
-    return Res;
-
-  SMLoc S = Parser.getTok().getLoc();
-  Parser.Lex();
-
-  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
-                                              AMDGPUOperand::ImmTyExpTgt));
-  return MatchOperand_Success;
-}
-
-OperandMatchResultTy
-AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
-  using namespace llvm::AMDGPU::SendMsg;
-
-  int64_t Imm16Val = 0;
-  SMLoc S = Parser.getTok().getLoc();
-
-  switch(getLexer().getKind()) {
-  default:
-    return MatchOperand_NoMatch;
-  case AsmToken::Integer:
-    // The operand can be an integer value.
-    if (getParser().parseAbsoluteExpression(Imm16Val))
-      return MatchOperand_NoMatch;
-    if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
-      Error(S, "invalid immediate: only 16-bit values are legal");
-      // Do not return error code, but create an imm operand anyway and proceed
-      // to the next operand, if any. That avoids unneccessary error messages.
-    }
-    break;
-  case AsmToken::Identifier: {
-      OperandInfoTy Msg(ID_UNKNOWN_);
-      OperandInfoTy Operation(OP_UNKNOWN_);
-      int64_t StreamId = STREAM_ID_DEFAULT_;
-      if (parseSendMsgConstruct(Msg, Operation, StreamId))
-        return MatchOperand_ParseFail;
-      do {
-        // Validate and encode message ID.
-        if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
-                || Msg.Id == ID_SYSMSG)) {
-          if (Msg.IsSymbolic)
-            Error(S, "invalid/unsupported symbolic name of message");
-          else
-            Error(S, "invalid/unsupported code of message");
-          break;
-        }
-        Imm16Val = (Msg.Id << ID_SHIFT_);
-        // Validate and encode operation ID.
-        if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
-          if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
-            if (Operation.IsSymbolic)
-              Error(S, "invalid symbolic name of GS_OP");
-            else
-              Error(S, "invalid code of GS_OP: only 2-bit values are legal");
-            break;
-          }
-          if (Operation.Id == OP_GS_NOP
-              && Msg.Id != ID_GS_DONE) {
-            Error(S, "invalid GS_OP: NOP is for GS_DONE only");
-            break;
-          }
-          Imm16Val |= (Operation.Id << OP_SHIFT_);
-        }
-        if (Msg.Id == ID_SYSMSG) {
-          if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
-            if (Operation.IsSymbolic)
-              Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
-            else
-              Error(S, "invalid/unsupported code of SYSMSG_OP");
-            break;
-          }
-          Imm16Val |= (Operation.Id << OP_SHIFT_);
-        }
-        // Validate and encode stream ID.
-        if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
-          if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
-            Error(S, "invalid stream id: only 2-bit values are legal");
-            break;
-          }
-          Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
-        }
-      } while (false);
-    }
-    break;
+    errorExpTgt();
+    return MatchOperand_Success;
   }
-  Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
-  return MatchOperand_Success;
+
+  return MatchOperand_NoMatch;
 }
 
-bool AMDGPUOperand::isSendMsg() const {
-  return isImmTy(ImmTySendMsg);
+OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
+  uint8_t Val;
+  StringRef Str = Parser.getTok().getString();
+
+  auto Res = parseExpTgtImpl(Str, Val);
+  if (Res != MatchOperand_Success)
+    return Res;
+
+  SMLoc S = Parser.getTok().getLoc();
+  Parser.Lex();
+
+  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
+                                              AMDGPUOperand::ImmTyExpTgt));
+  return MatchOperand_Success;
 }
 
 //===----------------------------------------------------------------------===//
 // parser helpers
 //===----------------------------------------------------------------------===//
 
+bool
+AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
+  return Token.is(AsmToken::Identifier) && Token.getString() == Id;
+}
+
+bool
+AMDGPUAsmParser::isId(const StringRef Id) const {
+  return isId(getToken(), Id);
+}
+
+bool
+AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
+  return getTokenKind() == Kind;
+}
+
 bool
 AMDGPUAsmParser::trySkipId(const StringRef Id) {
-  if (getLexer().getKind() == AsmToken::Identifier &&
-      Parser.getTok().getString() == Id) {
-    Parser.Lex();
+  if (isId(Id)) {
+    lex();
+    return true;
+  }
+  return false;
+}
+
+bool
+AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
+  if (isId(Id) && peekToken().is(Kind)) {
+    lex();
+    lex();
     return true;
   }
   return false;
@@ -4240,8 +5202,8 @@ AMDGPUAsmParser::trySkipId(const StringRef Id) {
 
 bool
 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
-  if (getLexer().getKind() == Kind) {
-    Parser.Lex();
+  if (isToken(Kind)) {
+    lex();
     return true;
   }
   return false;
@@ -4251,7 +5213,7 @@ bool
 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
                            const StringRef ErrMsg) {
   if (!trySkipToken(Kind)) {
-    Error(Parser.getTok().getLoc(), ErrMsg);
+    Error(getLoc(), ErrMsg);
     return false;
   }
   return true;
@@ -4264,17 +5226,54 @@ AMDGPUAsmParser::parseExpr(int64_t &Imm) {
 
 bool
 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
-  SMLoc S = Parser.getTok().getLoc();
-  if (getLexer().getKind() == AsmToken::String) {
-    Val = Parser.getTok().getStringContents();
-    Parser.Lex();
+  if (isToken(AsmToken::String)) {
+    Val = getToken().getStringContents();
+    lex();
     return true;
   } else {
-    Error(S, ErrMsg);
+    Error(getLoc(), ErrMsg);
     return false;
   }
 }
 
+AsmToken
+AMDGPUAsmParser::getToken() const {
+  return Parser.getTok();
+}
+
+AsmToken
+AMDGPUAsmParser::peekToken() {
+  return getLexer().peekTok();
+}
+
+void
+AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
+  auto TokCount = getLexer().peekTokens(Tokens);
+
+  for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
+    Tokens[Idx] = AsmToken(AsmToken::Error, "");
+}
+
+AsmToken::TokenKind
+AMDGPUAsmParser::getTokenKind() const {
+  return getLexer().getKind();
+}
+
+SMLoc
+AMDGPUAsmParser::getLoc() const {
+  return getToken().getLoc();
+}
+
+StringRef
+AMDGPUAsmParser::getTokenStr() const {
+  return getToken().getString();
+}
+
+void
+AMDGPUAsmParser::lex() {
+  Parser.Lex();
+}
+
 //===----------------------------------------------------------------------===//
 // swizzle
 //===----------------------------------------------------------------------===//
@@ -4322,8 +5321,8 @@ AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
                            "expected a 2-bit lane id")) {
     Imm = QUAD_PERM_ENC;
-    for (auto i = 0; i < LANE_NUM; ++i) {
-      Imm |= Lane[i] << (LANE_SHIFT * i);
+    for (unsigned I = 0; I < LANE_NUM; ++I) {
+      Imm |= Lane[I] << (LANE_SHIFT * I);
     }
     return true;
   }
@@ -4518,6 +5517,88 @@ AMDGPUOperand::isSwizzle() const {
   return isImmTy(ImmTySwizzle);
 }
 
+//===----------------------------------------------------------------------===//
+// VGPR Index Mode
+//===----------------------------------------------------------------------===//
+
+int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
+
+  using namespace llvm::AMDGPU::VGPRIndexMode;
+
+  if (trySkipToken(AsmToken::RParen)) {
+    return OFF;
+  }
+
+  int64_t Imm = 0;
+
+  while (true) {
+    unsigned Mode = 0;
+    SMLoc S = Parser.getTok().getLoc();
+
+    for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
+      if (trySkipId(IdSymbolic[ModeId])) {
+        Mode = 1 << ModeId;
+        break;
+      }
+    }
+
+    if (Mode == 0) {
+      Error(S, (Imm == 0)?
+               "expected a VGPR index mode or a closing parenthesis" :
+               "expected a VGPR index mode");
+      break;
+    }
+
+    if (Imm & Mode) {
+      Error(S, "duplicate VGPR index mode");
+      break;
+    }
+    Imm |= Mode;
+
+    if (trySkipToken(AsmToken::RParen))
+      break;
+    if (!skipToken(AsmToken::Comma,
+                   "expected a comma or a closing parenthesis"))
+      break;
+  }
+
+  return Imm;
+}
+
+OperandMatchResultTy
+AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
+
+  int64_t Imm = 0;
+  SMLoc S = Parser.getTok().getLoc();
+
+  if (getLexer().getKind() == AsmToken::Identifier &&
+      Parser.getTok().getString() == "gpr_idx" &&
+      getLexer().peekTok().is(AsmToken::LParen)) {
+
+    Parser.Lex();
+    Parser.Lex();
+
+    // If parse failed, trigger an error but do not return error code
+    // to avoid excessive error messages.
+    Imm = parseGPRIdxMacro();
+
+  } else {
+    if (getParser().parseAbsoluteExpression(Imm))
+      return MatchOperand_NoMatch;
+    if (Imm < 0 || !isUInt<4>(Imm)) {
+      Error(S, "invalid immediate: only 4-bit values are legal");
+    }
+  }
+
+  Operands.push_back(
+      AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
+  return MatchOperand_Success;
+}
+
+bool AMDGPUOperand::isGPRIdxMode() const {
+  return isImmTy(ImmTyGprIdxMode);
+}
+
 //===----------------------------------------------------------------------===//
 // sopp branch targets
 //===----------------------------------------------------------------------===//
@@ -4545,10 +5626,23 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
   }
 }
 
+//===----------------------------------------------------------------------===//
+// Boolean holding registers
+//===----------------------------------------------------------------------===//
+
+OperandMatchResultTy
+AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
+  return parseReg(Operands);
+}
+
 //===----------------------------------------------------------------------===//
 // mubuf
 //===----------------------------------------------------------------------===//
 
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
+}
+
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
 }
@@ -4566,13 +5660,19 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
   bool HasLdsModifier = false;
   OptionalImmIndexMap OptionalIdx;
   assert(IsAtomicReturn ? IsAtomic : true);
+  unsigned FirstOperandIdx = 1;
 
-  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+  for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
 
     // Add the register arguments
     if (Op.isReg()) {
       Op.addRegOperands(Inst, 1);
+      // Insert a tied src for atomic return dst.
+      // This cannot be postponed as subsequent calls to
+      // addImmOperands rely on correct number of MC operands.
+      if (IsAtomicReturn && i == FirstOperandIdx)
+        Op.addRegOperands(Inst, 1);
       continue;
     }
 
@@ -4582,7 +5682,7 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
       continue;
     }
 
-    HasLdsModifier = Op.isLDS();
+    HasLdsModifier |= Op.isLDS();
 
     // Handle tokens like 'offen' which are sometimes hard-coded into the
     // asm string.  There are no MCInst operands for these.
@@ -4610,12 +5710,6 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
     }
   }
 
-  // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
-  if (IsAtomicReturn) {
-    MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
-    Inst.insert(I, *I);
-  }
-
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
   if (!IsAtomic) { // glc is hard-coded.
     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
@@ -4625,6 +5719,9 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
   }
+
+  if (isGFX10())
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
 }
 
 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
@@ -4662,6 +5759,9 @@ void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
+
+  if (isGFX10())
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
 }
 
 //===----------------------------------------------------------------------===//
@@ -4692,19 +5792,26 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
       Op.addRegOperands(Inst, 1);
     } else if (Op.isImmModifier()) {
       OptionalIdx[Op.getImmTy()] = I;
-    } else {
+    } else if (!Op.isToken()) {
       llvm_unreachable("unexpected operand type");
     }
   }
 
+  bool IsGFX10 = isGFX10();
+
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
+  if (IsGFX10)
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
+  if (IsGFX10)
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
-  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
+  if (!IsGFX10)
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
 }
 
@@ -4742,11 +5849,7 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
 }
 
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
-  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
-}
-
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
 }
 
@@ -4801,7 +5904,8 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
-  {"dfmt",    AMDGPUOperand::ImmTyFORMAT, false, nullptr},
+  {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
+  {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
@@ -4816,9 +5920,11 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
+  {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
+  {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
@@ -4828,7 +5934,10 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
-  {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
+  {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
+  {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
+  {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
+  {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
 };
 
 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
@@ -4884,7 +5993,9 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands)
                Op.Type == AMDGPUOperand::ImmTyNegHi) {
       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
                                         Op.ConvertResult);
-    } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
+    } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
+      res = parseDim(Operands);
+    } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
       res = parseDfmtNfmt(Operands);
     } else {
       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
@@ -4964,7 +6075,7 @@ void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
     } else if (Op.isInterpSlot() ||
                Op.isInterpAttr() ||
                Op.isAttrChan()) {
-      Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
+      Inst.addOperand(MCOperand::createImm(Op.getImm()));
     } else if (Op.isImmModifier()) {
       OptionalIdx[Op.getImmTy()] = I;
     } else {
@@ -5029,14 +6140,17 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
   }
 
-  // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
+  // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
   // it has src2 register operand that is tied to dst operand
   // we don't allow modifiers for this operand in assembler so src2_modifiers
   // should be 0.
-  if (Opc == AMDGPU::V_MAC_F32_e64_si ||
+  if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
+      Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
       Opc == AMDGPU::V_MAC_F32_e64_vi ||
       Opc == AMDGPU::V_MAC_F16_e64_vi ||
-      Opc == AMDGPU::V_FMAC_F32_e64_vi) {
+      Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
+      Opc == AMDGPU::V_FMAC_F32_e64_vi ||
+      Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
     auto it = Inst.begin();
     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
@@ -5137,6 +6251,10 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
 // dpp
 //===----------------------------------------------------------------------===//
 
+bool AMDGPUOperand::isDPP8() const {
+  return isImmTy(ImmTyDPP8);
+}
+
 bool AMDGPUOperand::isDPPCtrl() const {
   using namespace AMDGPU::DPP;
 
@@ -5154,13 +6272,27 @@ bool AMDGPUOperand::isDPPCtrl() const {
            (Imm == DppCtrl::ROW_MIRROR) ||
            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
            (Imm == DppCtrl::BCAST15) ||
-           (Imm == DppCtrl::BCAST31);
+           (Imm == DppCtrl::BCAST31) ||
+           (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
+           (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
   }
   return false;
 }
 
-bool AMDGPUOperand::isGPRIdxMode() const {
-  return isImm() && isUInt<4>(getImm());
+//===----------------------------------------------------------------------===//
+// mAI
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUOperand::isBLGP() const {
+  return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
+}
+
+bool AMDGPUOperand::isCBSZ() const {
+  return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
+}
+
+bool AMDGPUOperand::isABID() const {
+  return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
 }
 
 bool AMDGPUOperand::isS16Imm() const {
@@ -5171,6 +6303,108 @@ bool AMDGPUOperand::isU16Imm() const {
   return isImm() && isUInt<16>(getImm());
 }
 
+OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
+  if (!isGFX10())
+    return MatchOperand_NoMatch;
+
+  SMLoc S = Parser.getTok().getLoc();
+
+  if (getLexer().isNot(AsmToken::Identifier))
+    return MatchOperand_NoMatch;
+  if (getLexer().getTok().getString() != "dim")
+    return MatchOperand_NoMatch;
+
+  Parser.Lex();
+  if (getLexer().isNot(AsmToken::Colon))
+    return MatchOperand_ParseFail;
+
+  Parser.Lex();
+
+  // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
+  // integer.
+  std::string Token;
+  if (getLexer().is(AsmToken::Integer)) {
+    SMLoc Loc = getLexer().getTok().getEndLoc();
+    Token = getLexer().getTok().getString();
+    Parser.Lex();
+    if (getLexer().getTok().getLoc() != Loc)
+      return MatchOperand_ParseFail;
+  }
+  if (getLexer().isNot(AsmToken::Identifier))
+    return MatchOperand_ParseFail;
+  Token += getLexer().getTok().getString();
+
+  StringRef DimId = Token;
+  if (DimId.startswith("SQ_RSRC_IMG_"))
+    DimId = DimId.substr(12);
+
+  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
+  if (!DimInfo)
+    return MatchOperand_ParseFail;
+
+  Parser.Lex();
+
+  Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
+                                              AMDGPUOperand::ImmTyDim));
+  return MatchOperand_Success;
+}
+
+OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  StringRef Prefix;
+
+  if (getLexer().getKind() == AsmToken::Identifier) {
+    Prefix = Parser.getTok().getString();
+  } else {
+    return MatchOperand_NoMatch;
+  }
+
+  if (Prefix != "dpp8")
+    return parseDPPCtrl(Operands);
+  if (!isGFX10())
+    return MatchOperand_NoMatch;
+
+  // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
+
+  int64_t Sels[8];
+
+  Parser.Lex();
+  if (getLexer().isNot(AsmToken::Colon))
+    return MatchOperand_ParseFail;
+
+  Parser.Lex();
+  if (getLexer().isNot(AsmToken::LBrac))
+    return MatchOperand_ParseFail;
+
+  Parser.Lex();
+  if (getParser().parseAbsoluteExpression(Sels[0]))
+    return MatchOperand_ParseFail;
+  if (0 > Sels[0] || 7 < Sels[0])
+    return MatchOperand_ParseFail;
+
+  for (size_t i = 1; i < 8; ++i) {
+    if (getLexer().isNot(AsmToken::Comma))
+      return MatchOperand_ParseFail;
+
+    Parser.Lex();
+    if (getParser().parseAbsoluteExpression(Sels[i]))
+      return MatchOperand_ParseFail;
+    if (0 > Sels[i] || 7 < Sels[i])
+      return MatchOperand_ParseFail;
+  }
+
+  if (getLexer().isNot(AsmToken::RBrac))
+    return MatchOperand_ParseFail;
+  Parser.Lex();
+
+  unsigned DPP8 = 0;
+  for (size_t i = 0; i < 8; ++i)
+    DPP8 |= (Sels[i] << (i * 3));
+
+  Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
+  return MatchOperand_Success;
+}
+
 OperandMatchResultTy
 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
   using namespace AMDGPU::DPP;
@@ -5201,10 +6435,21 @@ AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
         && Prefix != "wave_rol"
         && Prefix != "wave_shr"
         && Prefix != "wave_ror"
-        && Prefix != "row_bcast") {
+        && Prefix != "row_bcast"
+        && Prefix != "row_share"
+        && Prefix != "row_xmask") {
       return MatchOperand_NoMatch;
     }
 
+    if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
+      return MatchOperand_NoMatch;
+
+    if (!isVI() && !isGFX9() &&
+        (Prefix == "wave_shl" || Prefix == "wave_shr" ||
+         Prefix == "wave_rol" || Prefix == "wave_ror" ||
+         Prefix == "row_bcast"))
+      return MatchOperand_NoMatch;
+
     Parser.Lex();
     if (getLexer().isNot(AsmToken::Colon))
       return MatchOperand_ParseFail;
@@ -5262,6 +6507,10 @@ AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
         } else {
           return MatchOperand_ParseFail;
         }
+      } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
+        Int |= DppCtrl::ROW_SHARE_FIRST;
+      } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
+        Int |= DppCtrl::ROW_XMASK_FIRST;
       } else {
         return MatchOperand_ParseFail;
       }
@@ -5276,6 +6525,10 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
 }
 
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
+}
+
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
 }
@@ -5284,7 +6537,11 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
 }
 
-void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
+}
+
+void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
   OptionalImmIndexMap OptionalIdx;
 
   unsigned I = 1;
@@ -5293,6 +6550,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
   }
 
+  int Fi = 0;
   for (unsigned E = Operands.size(); I != E; ++I) {
     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
                                             MCOI::TIED_TO);
@@ -5303,25 +6561,49 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
     }
     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
     // Add the register arguments
-    if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
+    if (Op.isReg() && validateVccOperand(Op.getReg())) {
       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
       // Skip it.
       continue;
-    } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
-      Op.addRegWithFPInputModsOperands(Inst, 2);
-    } else if (Op.isDPPCtrl()) {
-      Op.addImmOperands(Inst, 1);
-    } else if (Op.isImm()) {
-      // Handle optional arguments
-      OptionalIdx[Op.getImmTy()] = I;
+    }
+
+    if (IsDPP8) {
+      if (Op.isDPP8()) {
+        Op.addImmOperands(Inst, 1);
+      } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
+        Op.addRegWithFPInputModsOperands(Inst, 2);
+      } else if (Op.isFI()) {
+        Fi = Op.getImm();
+      } else if (Op.isReg()) {
+        Op.addRegOperands(Inst, 1);
+      } else {
+        llvm_unreachable("Invalid operand type");
+      }
     } else {
-      llvm_unreachable("Invalid operand type");
+      if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
+        Op.addRegWithFPInputModsOperands(Inst, 2);
+      } else if (Op.isDPPCtrl()) {
+        Op.addImmOperands(Inst, 1);
+      } else if (Op.isImm()) {
+        // Handle optional arguments
+        OptionalIdx[Op.getImmTy()] = I;
+      } else {
+        llvm_unreachable("Invalid operand type");
+      }
     }
   }
 
-  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
-  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
-  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
+  if (IsDPP8) {
+    using namespace llvm::AMDGPU::DPP;
+    Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
+  } else {
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
+    if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
+      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
+    }
+  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -5422,7 +6704,8 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
 
   for (unsigned E = Operands.size(); I != E; ++I) {
     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
-    if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
+    if (skipVcc && !skippedVcc && Op.isReg() &&
+        (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
@@ -5448,7 +6731,8 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
     skippedVcc = false;
   }
 
-  if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
+  if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
+      Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
     switch (BasicInstType) {
@@ -5474,7 +6758,8 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
       break;
 
     case SIInstrFlags::VOPC:
-      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
+      if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
+        addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
       break;
@@ -5495,6 +6780,22 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
   }
 }
 
+//===----------------------------------------------------------------------===//
+// mAI
+//===----------------------------------------------------------------------===//
+
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
+}
+
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
+}
+
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
+}
+
 /// Force static initialization.
 extern "C" void LLVMInitializeAMDGPUAsmParser() {
   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
@@ -5552,3 +6853,28 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
     return Match_InvalidOperand;
   }
 }
+
+//===----------------------------------------------------------------------===//
+// endpgm
+//===----------------------------------------------------------------------===//
+
+OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  int64_t Imm = 0;
+
+  if (!parseExpr(Imm)) {
+    // The operand is optional, if not present default to 0
+    Imm = 0;
+  }
+
+  if (!isUInt<16>(Imm)) {
+    Error(S, "expected a 16-bit value");
+    return MatchOperand_ParseFail;
+  }
+
+  Operands.push_back(
+      AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
+  return MatchOperand_Success;
+}
+
+bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
diff --git a/lib/Target/AMDGPU/BUFInstructions.td b/lib/Target/AMDGPU/BUFInstructions.td
index 51c2abeac2ff..62a19d848af2 100644
--- a/lib/Target/AMDGPU/BUFInstructions.td
+++ b/lib/Target/AMDGPU/BUFInstructions.td
@@ -1,37 +1,22 @@
 //===-- BUFInstructions.td - Buffer Instruction Defintions ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
-def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
+def MUBUFAddr64 : ComplexPattern<i64, 8, "SelectMUBUFAddr64">;
 def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
 
 def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>;
 def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>;
 
-def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
+def MUBUFOffset : ComplexPattern<i64, 7, "SelectMUBUFOffset">;
 def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
 def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
 
-class MubufLoad <SDPatternOperator op> : PatFrag <
-  (ops node:$ptr), (op node:$ptr), [{
-  auto const AS = cast<MemSDNode>(N)->getAddressSpace();
-  return AS == AMDGPUAS::GLOBAL_ADDRESS ||
-         AS == AMDGPUAS::CONSTANT_ADDRESS;
-}]>;
-
-def mubuf_load          : MubufLoad <load>;
-def mubuf_az_extloadi8  : MubufLoad <az_extloadi8>;
-def mubuf_sextloadi8    : MubufLoad <sextloadi8>;
-def mubuf_az_extloadi16 : MubufLoad <az_extloadi16>;
-def mubuf_sextloadi16   : MubufLoad <sextloadi16>;
-def mubuf_load_atomic   : MubufLoad <atomic_load>;
-
 def BUFAddrKind {
   int Offset = 0;
   int OffEn  = 1;
@@ -97,7 +82,9 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins,
   bits<1> has_vdata   = 1;
   bits<1> has_vaddr   = 1;
   bits<1> has_glc     = 1;
+  bits<1> has_dlc     = 1;
   bits<1> glc_value   = 0; // the value for glc if no such operand
+  bits<1> dlc_value   = 0; // the value for dlc if no such operand
   bits<1> has_srsrc   = 1;
   bits<1> has_soffset = 1;
   bits<1> has_offset  = 1;
@@ -120,6 +107,7 @@ class MTBUF_Real <MTBUF_Pseudo ps> :
 
   bits<12> offset;
   bits<1>  glc;
+  bits<1>  dlc;
   bits<7>  format;
   bits<8>  vaddr;
   bits<8>  vdata;
@@ -138,17 +126,17 @@ class getMTBUFInsDA<list<RegisterClass> vdataList,
   RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
   dag InsNoData = !if(!empty(vaddrList),
     (ins                    SReg_128:$srsrc, SCSrc_b32:$soffset,
-         offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe),
+         offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc),
     (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
-         offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe)
+         offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc)
   );
   dag InsData = !if(!empty(vaddrList),
     (ins vdataClass:$vdata,                    SReg_128:$srsrc,
          SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
-         SLC:$slc, TFE:$tfe),
+         SLC:$slc, TFE:$tfe, DLC:$dlc),
     (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
          SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
-         SLC:$slc, TFE:$tfe)
+         SLC:$slc, TFE:$tfe, DLC:$dlc)
   );
   dag ret = !if(!empty(vdataList), InsNoData, InsData);
 }
@@ -199,7 +187,7 @@ class MTBUF_Load_Pseudo <string opName,
   : MTBUF_Pseudo<opName,
                  (outs vdataClass:$vdata),
                  getMTBUFIns<addrKindCopy>.ret,
-                 " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe",
+                 " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
                  pattern>,
     MTBUF_SetupAddr<addrKindCopy> {
   let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
@@ -214,13 +202,13 @@ multiclass MTBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
   def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
     [(set load_vt:$vdata,
      (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$format,
-                      i1:$glc, i1:$slc, i1:$tfe)))]>,
+                      i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>,
     MTBUFAddr64Table<0, NAME>;
 
   def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
     [(set load_vt:$vdata,
      (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset,
-                      i8:$format, i1:$glc, i1:$slc, i1:$tfe)))]>,
+                      i8:$format, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>,
     MTBUFAddr64Table<1, NAME>;
 
   def _OFFEN  : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
@@ -245,7 +233,7 @@ class MTBUF_Store_Pseudo <string opName,
   : MTBUF_Pseudo<opName,
                  (outs),
                  getMTBUFIns<addrKindCopy, [vdataClassCopy]>.ret,
-                 " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe",
+                 " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
                  pattern>,
     MTBUF_SetupAddr<addrKindCopy> {
   let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
@@ -260,13 +248,13 @@ multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
   def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
     [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
                                        i16:$offset, i8:$format, i1:$glc,
-                                       i1:$slc, i1:$tfe))]>,
+                                       i1:$slc, i1:$tfe, i1:$dlc))]>,
     MTBUFAddr64Table<0, NAME>;
 
   def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
     [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
                                        i16:$offset, i8:$format, i1:$glc,
-                                       i1:$slc, i1:$tfe))]>,
+                                       i1:$slc, i1:$tfe, i1:$dlc))]>,
     MTBUFAddr64Table<1, NAME>;
 
   def _OFFEN  : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
@@ -324,7 +312,9 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins,
   bits<1> has_vdata   = 1;
   bits<1> has_vaddr   = 1;
   bits<1> has_glc     = 1;
+  bits<1> has_dlc     = 1;
   bits<1> glc_value   = 0; // the value for glc if no such operand
+  bits<1> dlc_value   = 0; // the value for dlc if no such operand
   bits<1> has_srsrc   = 1;
   bits<1> has_soffset = 1;
   bits<1> has_offset  = 1;
@@ -333,7 +323,7 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins,
   bits<4> dwords      = 0;
 }
 
-class MUBUF_Real <bits<7> op, MUBUF_Pseudo ps> :
+class MUBUF_Real <MUBUF_Pseudo ps> :
   InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> {
 
   let isPseudo = 0;
@@ -348,6 +338,7 @@ class MUBUF_Real <bits<7> op, MUBUF_Pseudo ps> :
 
   bits<12> offset;
   bits<1>  glc;
+  bits<1>  dlc;
   bits<8>  vaddr;
   bits<8>  vdata;
   bits<7>  srsrc;
@@ -358,7 +349,7 @@ class MUBUF_Real <bits<7> op, MUBUF_Pseudo ps> :
 
 
 // For cache invalidation instructions.
-class MUBUF_Invalidate <string opName, SDPatternOperator node> :
+class MUBUF_Invalidate <string opName, SDPatternOperator node = null_frag> :
   MUBUF_Pseudo<opName, (outs), (ins), "", [(node)]> {
 
   let AsmMatchConverter = "";
@@ -373,7 +364,9 @@ class MUBUF_Invalidate <string opName, SDPatternOperator node> :
   let has_vdata   = 0;
   let has_vaddr   = 0;
   let has_glc     = 0;
+  let has_dlc     = 0;
   let glc_value   = 0;
+  let dlc_value   = 0;
   let has_srsrc   = 0;
   let has_soffset = 0;
   let has_offset  = 0;
@@ -400,7 +393,7 @@ class getMUBUFInsDA<list<RegisterClass> vdataList,
   );
   dag ret = !con(
               !if(!empty(vdataList), InsNoData, InsData),
-              !if(isLds, (ins), (ins TFE:$tfe))
+              !if(isLds, (ins DLC:$dlc), (ins TFE:$tfe, DLC:$dlc))
              );
 }
 
@@ -460,7 +453,7 @@ class MUBUF_Load_Pseudo <string opName,
                  !con(getMUBUFIns<addrKindCopy, [], isLds>.ret,
                       !if(HasTiedDest, (ins vdataClass:$vdata_in), (ins))),
                  " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc" #
-                   !if(isLds, " lds", "$tfe"),
+                   !if(isLds, " lds", "$tfe") # "$dlc",
                  pattern>,
     MUBUF_SetupAddr<addrKindCopy> {
   let PseudoInstr = opName # !if(isLds, "_lds", "") #
@@ -477,6 +470,24 @@ class MUBUF_Load_Pseudo <string opName,
   let dwords = getMUBUFDwords<vdataClass>.ret;
 }
 
+class MUBUF_Offset_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat <
+  (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
+  (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))
+>;
+
+class MUBUF_Addr64_Load_Pat <Instruction inst,
+                            ValueType load_vt = i32,
+                            SDPatternOperator ld = null_frag> : Pat <
+  (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
+  (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))
+>;
+
+multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> {
+  def : MUBUF_Offset_Load_Pat<!cast<Instruction>(BaseInst#"_OFFSET"), load_vt, ld>;
+  def : MUBUF_Addr64_Load_Pat<!cast<Instruction>(BaseInst#"_ADDR64"), load_vt, ld>;
+}
+
+
 // FIXME: tfe can't be an operand because it requires a separate
 // opcode because it needs an N+1 register class dest register.
 multiclass MUBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
@@ -485,20 +496,10 @@ multiclass MUBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
                               bit TiedDest = 0,
                               bit isLds = 0> {
 
-  def _OFFSET : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
-    TiedDest, isLds,
-    !if(isLds,
-        [],
-        [(set load_vt:$vdata,
-         (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe)))])>,
+  def _OFFSET : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, TiedDest, isLds>,
     MUBUFAddr64Table<0, NAME # !if(isLds, "_LDS", "")>;
 
-  def _ADDR64 : MUBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
-    TiedDest, isLds,
-    !if(isLds,
-        [],
-        [(set load_vt:$vdata,
-         (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe)))])>,
+  def _ADDR64 : MUBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, TiedDest, isLds>,
     MUBUFAddr64Table<1, NAME # !if(isLds, "_LDS", "")>;
 
   def _OFFEN  : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, TiedDest, isLds>;
@@ -531,7 +532,7 @@ class MUBUF_Store_Pseudo <string opName,
   : MUBUF_Pseudo<opName,
                  (outs),
                  getMUBUFIns<addrKindCopy, [vdataClassCopy]>.ret,
-                 " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe",
+                 " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
                  pattern>,
     MUBUF_SetupAddr<addrKindCopy> {
   let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
@@ -547,12 +548,12 @@ multiclass MUBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
 
   def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
     [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
-                                       i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>,
+                                       i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>,
     MUBUFAddr64Table<0, NAME>;
 
   def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
     [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
-                                       i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>,
+                                       i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>,
     MUBUFAddr64Table<1, NAME>;
 
   def _OFFEN  : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
@@ -638,6 +639,7 @@ class MUBUF_Atomic_Pseudo<string opName,
   let hasSideEffects = 1;
   let DisableWQM = 1;
   let has_glc = 0;
+  let has_dlc = 0;
   let has_tfe = 0;
   let maybeAtomic = 1;
 }
@@ -656,6 +658,7 @@ class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind,
     AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 0> {
   let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
   let glc_value = 0;
+  let dlc_value = 0;
   let AsmMatchConverter = "cvtMubufAtomic";
 }
 
@@ -673,6 +676,7 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
     AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 1> {
   let PseudoInstr = opName # "_rtn_" # getAddrName<addrKindCopy>.ret;
   let glc_value = 1;
+  let dlc_value = 0;
   let Constraints = "$vdata = $vdata_in";
   let DisableEncoding = "$vdata_in";
   let AsmMatchConverter = "cvtMubufAtomicReturn";
@@ -681,34 +685,53 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
 multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName,
                                         RegisterClass vdataClass,
                                         ValueType vdataType,
-                                        SDPatternOperator atomic> {
+                                        SDPatternOperator atomic,
+                                        bit isFP = getIsFP<vdataType>.ret> {
+  let FPAtomic = isFP in
   def _OFFSET : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass>,
                 MUBUFAddr64Table <0, NAME>;
+
+  let FPAtomic = isFP in
   def _ADDR64 : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass>,
                 MUBUFAddr64Table <1, NAME>;
+
+  let FPAtomic = isFP in
   def _OFFEN  : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.OffEn,  vdataClass>;
+
+  let FPAtomic = isFP in
+
   def _IDXEN  : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.IdxEn,  vdataClass>;
+
+  let FPAtomic = isFP in
   def _BOTHEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
 }
 
 multiclass MUBUF_Pseudo_Atomics_RTN <string opName,
                                      RegisterClass vdataClass,
                                      ValueType vdataType,
-                                     SDPatternOperator atomic> {
+                                     SDPatternOperator atomic,
+                                     bit isFP = getIsFP<vdataType>.ret> {
+  let FPAtomic = isFP in
   def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
     [(set vdataType:$vdata,
      (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$slc),
              vdataType:$vdata_in))]>,
     MUBUFAddr64Table <0, NAME # "_RTN">;
 
+  let FPAtomic = isFP in
   def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
     [(set vdataType:$vdata,
      (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc),
              vdataType:$vdata_in))]>,
     MUBUFAddr64Table <1, NAME # "_RTN">;
 
+  let FPAtomic = isFP in
   def _OFFEN_RTN  : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.OffEn,  vdataClass>;
+
+  let FPAtomic = isFP in
   def _IDXEN_RTN  : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.IdxEn,  vdataClass>;
+
+  let FPAtomic = isFP in
   def _BOTHEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
 }
 
@@ -804,34 +827,45 @@ let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in {
 } // End HasPackedD16VMem.
 
 defm BUFFER_LOAD_UBYTE : MUBUF_Pseudo_Loads_Lds <
-  "buffer_load_ubyte", VGPR_32, i32, mubuf_az_extloadi8
+  "buffer_load_ubyte", VGPR_32, i32
 >;
 defm BUFFER_LOAD_SBYTE : MUBUF_Pseudo_Loads_Lds <
-  "buffer_load_sbyte", VGPR_32, i32, mubuf_sextloadi8
+  "buffer_load_sbyte", VGPR_32, i32
 >;
 defm BUFFER_LOAD_USHORT : MUBUF_Pseudo_Loads_Lds <
-  "buffer_load_ushort", VGPR_32, i32, mubuf_az_extloadi16
+  "buffer_load_ushort", VGPR_32, i32
 >;
 defm BUFFER_LOAD_SSHORT : MUBUF_Pseudo_Loads_Lds <
-  "buffer_load_sshort", VGPR_32, i32, mubuf_sextloadi16
+  "buffer_load_sshort", VGPR_32, i32
 >;
 defm BUFFER_LOAD_DWORD : MUBUF_Pseudo_Loads_Lds <
-  "buffer_load_dword", VGPR_32, i32, mubuf_load
+  "buffer_load_dword", VGPR_32, i32
 >;
 defm BUFFER_LOAD_DWORDX2 : MUBUF_Pseudo_Loads <
-  "buffer_load_dwordx2", VReg_64, v2i32, mubuf_load
+  "buffer_load_dwordx2", VReg_64, v2i32
 >;
 defm BUFFER_LOAD_DWORDX3 : MUBUF_Pseudo_Loads <
-  "buffer_load_dwordx3", VReg_96, untyped, mubuf_load
+  "buffer_load_dwordx3", VReg_96, v3i32
 >;
 defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads <
-  "buffer_load_dwordx4", VReg_128, v4i32, mubuf_load
+  "buffer_load_dwordx4", VReg_128, v4i32
 >;
 
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, extloadi8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, zextloadi8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, sextloadi8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, extloadi16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, zextloadi16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SSHORT", i32, sextloadi16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORD", i32, load_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX2", v2i32, load_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX3", v3i32, load_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX4", v4i32, load_global>;
+
 // This is not described in AMD documentation,
 // but 'lds' versions of these opcodes are available
 // in at least GFX8+ chips. See Bug 37653.
-let SubtargetPredicate = isVI in {
+let SubtargetPredicate = isGFX8GFX9 in {
 defm BUFFER_LOAD_DWORDX2_LDS : MUBUF_Pseudo_Loads <
   "buffer_load_dwordx2", VReg_64, v2i32, null_frag, 0, 1
 >;
@@ -856,7 +890,7 @@ defm BUFFER_STORE_DWORDX2 : MUBUF_Pseudo_Stores <
   "buffer_store_dwordx2", VReg_64, v2i32, store_global
 >;
 defm BUFFER_STORE_DWORDX3 : MUBUF_Pseudo_Stores <
-  "buffer_store_dwordx3", VReg_96, untyped, store_global
+  "buffer_store_dwordx3", VReg_96, v3i32, store_global
 >;
 defm BUFFER_STORE_DWORDX4 : MUBUF_Pseudo_Stores <
   "buffer_store_dwordx4", VReg_128, v4i32, store_global
@@ -940,11 +974,11 @@ defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics <
   "buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global
 >;
 
-let SubtargetPredicate = isVI in {
+let SubtargetPredicate = isGFX8GFX9 in {
 def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">;
 }
 
-let SubtargetPredicate = isSI in { // isn't on CI & VI
+let SubtargetPredicate = isGFX6 in { // isn't on CI & VI
 /*
 defm BUFFER_ATOMIC_RSUB        : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub">;
 defm BUFFER_ATOMIC_FCMPSWAP    : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap">;
@@ -1006,17 +1040,28 @@ defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Pseudo_Stores <
 def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1",
                                        int_amdgcn_buffer_wbinvl1>;
 
+let SubtargetPredicate = HasAtomicFaddInsts in {
+
+defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN <
+  "buffer_atomic_add_f32", VGPR_32, f32, atomic_add_global
+>;
+defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN <
+  "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_add_global
+>;
+
+} // End SubtargetPredicate = HasAtomicFaddInsts
+
 //===----------------------------------------------------------------------===//
 // MTBUF Instructions
 //===----------------------------------------------------------------------===//
 
 defm TBUFFER_LOAD_FORMAT_X     : MTBUF_Pseudo_Loads  <"tbuffer_load_format_x",     VGPR_32>;
 defm TBUFFER_LOAD_FORMAT_XY    : MTBUF_Pseudo_Loads  <"tbuffer_load_format_xy",    VReg_64>;
-defm TBUFFER_LOAD_FORMAT_XYZ   : MTBUF_Pseudo_Loads  <"tbuffer_load_format_xyz",   VReg_128>;
+defm TBUFFER_LOAD_FORMAT_XYZ   : MTBUF_Pseudo_Loads  <"tbuffer_load_format_xyz",   VReg_96>;
 defm TBUFFER_LOAD_FORMAT_XYZW  : MTBUF_Pseudo_Loads  <"tbuffer_load_format_xyzw",  VReg_128>;
 defm TBUFFER_STORE_FORMAT_X    : MTBUF_Pseudo_Stores <"tbuffer_store_format_x",    VGPR_32>;
 defm TBUFFER_STORE_FORMAT_XY   : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy",   VReg_64>;
-defm TBUFFER_STORE_FORMAT_XYZ  : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz",  VReg_128>;
+defm TBUFFER_STORE_FORMAT_XYZ  : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz",  VReg_96>;
 defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>;
 
 let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in {
@@ -1041,19 +1086,21 @@ let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in {
   defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_64>;
 } // End HasPackedD16VMem.
 
-let SubtargetPredicate = isCIVI in {
+let SubtargetPredicate = isGFX7Plus in {
 
 //===----------------------------------------------------------------------===//
 // Instruction definitions for CI and newer.
 //===----------------------------------------------------------------------===//
-// Remaining instructions:
-// BUFFER_LOAD_DWORDX3
-// BUFFER_STORE_DWORDX3
 
 def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol",
                                            int_amdgcn_buffer_wbinvl1_vol>;
 
-} // End let SubtargetPredicate = isCIVI
+} // End let SubtargetPredicate = isGFX7Plus
+
+let SubtargetPredicate = isGFX10Plus in {
+  def BUFFER_GL0_INV : MUBUF_Invalidate<"buffer_gl0_inv">;
+  def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">;
+} // End SubtargetPredicate = isGFX10Plus
 
 //===----------------------------------------------------------------------===//
 // MUBUF Patterns
@@ -1067,6 +1114,10 @@ def extract_slc : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8);
 }]>;
 
+def extract_dlc : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8);
+}]>;
+
 //===----------------------------------------------------------------------===//
 // buffer_load/store_format patterns
 //===----------------------------------------------------------------------===//
@@ -1077,21 +1128,21 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
     (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
               imm:$cachepolicy, 0)),
     (!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
     (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
               imm:$cachepolicy, 0)),
     (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
     (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
               imm:$cachepolicy, imm)),
     (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
@@ -1100,7 +1151,7 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
     (!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
       $rsrc, $soffset, (as_i16imm $offset),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 }
 
@@ -1108,6 +1159,8 @@ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, f32, "BUFFER_LOAD_FORMAT_X">
 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, i32, "BUFFER_LOAD_FORMAT_X">;
 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2f32, "BUFFER_LOAD_FORMAT_XY">;
 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2i32, "BUFFER_LOAD_FORMAT_XY">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v3f32, "BUFFER_LOAD_FORMAT_XYZ">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v3i32, "BUFFER_LOAD_FORMAT_XYZ">;
 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4f32, "BUFFER_LOAD_FORMAT_XYZW">;
 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4i32, "BUFFER_LOAD_FORMAT_XYZW">;
 
@@ -1131,8 +1184,14 @@ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, f32, "BUFFER_LOAD_DWORD">;
 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, i32, "BUFFER_LOAD_DWORD">;
 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2f32, "BUFFER_LOAD_DWORDX2">;
 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2i32, "BUFFER_LOAD_DWORDX2">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v3f32, "BUFFER_LOAD_DWORDX3">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v3i32, "BUFFER_LOAD_DWORDX3">;
 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4f32, "BUFFER_LOAD_DWORDX4">;
 defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4i32, "BUFFER_LOAD_DWORDX4">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_byte, i32, "BUFFER_LOAD_SBYTE">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_short, i32, "BUFFER_LOAD_SSHORT">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ubyte, i32, "BUFFER_LOAD_UBYTE">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ushort,  i32, "BUFFER_LOAD_USHORT">;
 
 multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
                                    string opcode> {
@@ -1140,21 +1199,23 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
     (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
               imm:$cachepolicy, 0),
     (!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
     (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
               imm:$cachepolicy, 0),
     (!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
-      (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (as_i16imm $offset), (extract_glc $cachepolicy),
+      (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
     (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
               imm:$cachepolicy, imm),
     (!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
-      (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (as_i16imm $offset), (extract_glc $cachepolicy),
+      (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
@@ -1163,8 +1224,8 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
     (!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
       $vdata,
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
-      $rsrc, $soffset, (as_i16imm $offset),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy),
+      (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 }
 
@@ -1172,6 +1233,8 @@ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, f32, "BUFFER_STORE_FORMAT_
 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, i32, "BUFFER_STORE_FORMAT_X">;
 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2f32, "BUFFER_STORE_FORMAT_XY">;
 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2i32, "BUFFER_STORE_FORMAT_XY">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v3f32, "BUFFER_STORE_FORMAT_XYZ">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v3i32, "BUFFER_STORE_FORMAT_XYZ">;
 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4f32, "BUFFER_STORE_FORMAT_XYZW">;
 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4i32, "BUFFER_STORE_FORMAT_XYZW">;
 
@@ -1195,42 +1258,47 @@ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, f32, "BUFFER_STORE_DWORD">;
 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, i32, "BUFFER_STORE_DWORD">;
 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2f32, "BUFFER_STORE_DWORDX2">;
 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2i32, "BUFFER_STORE_DWORDX2">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v3f32, "BUFFER_STORE_DWORDX3">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v3i32, "BUFFER_STORE_DWORDX3">;
 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f32, "BUFFER_STORE_DWORDX4">;
 defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4i32, "BUFFER_STORE_DWORDX4">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_byte, i32, "BUFFER_STORE_BYTE">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_short, i32, "BUFFER_STORE_SHORT">;
 
 //===----------------------------------------------------------------------===//
 // buffer_atomic patterns
 //===----------------------------------------------------------------------===//
 
-multiclass BufferAtomicPatterns<SDPatternOperator name, string opcode> {
+multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
+                                string opcode> {
   def : GCNPat<
-    (name i32:$vdata_in, v4i32:$rsrc, 0,
+    (vt (name vt:$vdata_in, v4i32:$rsrc, 0,
           0, i32:$soffset, imm:$offset,
-          imm:$cachepolicy, 0),
+          imm:$cachepolicy, 0)),
     (!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN) $vdata_in, $rsrc, $soffset,
                                         (as_i16imm $offset), (extract_slc $cachepolicy))
   >;
 
   def : GCNPat<
-    (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex,
+    (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
           0, i32:$soffset, imm:$offset,
-          imm:$cachepolicy, imm),
+          imm:$cachepolicy, imm)),
     (!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) $vdata_in, $vindex, $rsrc, $soffset,
                                        (as_i16imm $offset), (extract_slc $cachepolicy))
   >;
 
   def : GCNPat<
-    (name i32:$vdata_in, v4i32:$rsrc, 0,
+    (vt (name vt:$vdata_in, v4i32:$rsrc, 0,
           i32:$voffset, i32:$soffset, imm:$offset,
-          imm:$cachepolicy, 0),
+          imm:$cachepolicy, 0)),
     (!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) $vdata_in, $voffset, $rsrc, $soffset,
                                        (as_i16imm $offset), (extract_slc $cachepolicy))
   >;
 
   def : GCNPat<
-    (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex,
+    (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
           i32:$voffset, i32:$soffset, imm:$offset,
-          imm:$cachepolicy, imm),
+          imm:$cachepolicy, imm)),
     (!cast<MUBUF_Pseudo>(opcode # _BOTHEN_RTN)
       $vdata_in,
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
@@ -1238,16 +1306,66 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, string opcode> {
   >;
 }
 
-defm : BufferAtomicPatterns<SIbuffer_atomic_swap, "BUFFER_ATOMIC_SWAP">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_add, "BUFFER_ATOMIC_ADD">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_sub, "BUFFER_ATOMIC_SUB">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_smin, "BUFFER_ATOMIC_SMIN">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_umin, "BUFFER_ATOMIC_UMIN">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_smax, "BUFFER_ATOMIC_SMAX">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_umax, "BUFFER_ATOMIC_UMAX">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_and, "BUFFER_ATOMIC_AND">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_or, "BUFFER_ATOMIC_OR">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_xor, "BUFFER_ATOMIC_XOR">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_swap, i32, "BUFFER_ATOMIC_SWAP">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_add, i32, "BUFFER_ATOMIC_ADD">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_sub, i32, "BUFFER_ATOMIC_SUB">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_smin, i32, "BUFFER_ATOMIC_SMIN">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_umin, i32, "BUFFER_ATOMIC_UMIN">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_smax, i32, "BUFFER_ATOMIC_SMAX">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_umax, i32, "BUFFER_ATOMIC_UMAX">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_and, i32, "BUFFER_ATOMIC_AND">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_or, i32, "BUFFER_ATOMIC_OR">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i32, "BUFFER_ATOMIC_XOR">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_swap, i64, "BUFFER_ATOMIC_SWAP_X2">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_add, i64,  "BUFFER_ATOMIC_ADD_X2">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_sub, i64, "BUFFER_ATOMIC_SUB_X2">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_smin, i64, "BUFFER_ATOMIC_SMIN_X2">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_umin, i64, "BUFFER_ATOMIC_UMIN_X2">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_smax, i64, "BUFFER_ATOMIC_SMAX_X2">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_umax, i64, "BUFFER_ATOMIC_UMAX_X2">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_and, i64, "BUFFER_ATOMIC_AND_X2">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_or, i64, "BUFFER_ATOMIC_OR_X2">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i64, "BUFFER_ATOMIC_XOR_X2">;
+
+multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
+                                       string opcode> {
+  def : GCNPat<
+    (name vt:$vdata_in, v4i32:$rsrc, 0,
+          0, i32:$soffset, imm:$offset,
+          imm:$cachepolicy, 0),
+    (!cast<MUBUF_Pseudo>(opcode # _OFFSET) $vdata_in, $rsrc, $soffset,
+                                        (as_i16imm $offset), (extract_slc $cachepolicy))
+  >;
+
+  def : GCNPat<
+    (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
+          0, i32:$soffset, imm:$offset,
+          imm:$cachepolicy, imm),
+    (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vdata_in, $vindex, $rsrc, $soffset,
+                                       (as_i16imm $offset), (extract_slc $cachepolicy))
+  >;
+
+  def : GCNPat<
+    (name vt:$vdata_in, v4i32:$rsrc, 0,
+          i32:$voffset, i32:$soffset, imm:$offset,
+          imm:$cachepolicy, 0),
+    (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $vdata_in, $voffset, $rsrc, $soffset,
+                                       (as_i16imm $offset), (extract_slc $cachepolicy))
+  >;
+
+  def : GCNPat<
+    (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
+          i32:$voffset, i32:$soffset, imm:$offset,
+          imm:$cachepolicy, imm),
+    (!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
+      $vdata_in,
+      (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
+      $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy))
+  >;
+}
+
+defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_fadd, f32, "BUFFER_ATOMIC_ADD_F32">;
+defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_pk_fadd, v2f16, "BUFFER_ATOMIC_PK_ADD_F16">;
 
 def : GCNPat<
   (SIbuffer_atomic_cmpswap
@@ -1298,12 +1416,11 @@ def : GCNPat<
     sub0)
 >;
 
-
 class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt,
                               PatFrag constant_ld> : GCNPat <
      (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
-                                   i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
-     (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
+                                   i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
+     (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
   >;
 
 multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
@@ -1311,43 +1428,47 @@ multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Ins
   def : GCNPat <
      (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
                                    i16:$offset, i1:$slc))),
-     (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0)
+     (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0)
   >;
 
   def : GCNPat <
     (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))),
-    (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0)
+    (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0)
   >;
 }
 
-let SubtargetPredicate = isSICI in {
+let SubtargetPredicate = isGFX6GFX7 in {
 def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
-def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>;
+def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_UBYTE_ADDR64, i32, extloadi8_constant>;
+def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_UBYTE_ADDR64, i32, zextloadi8_constant>;
 def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
-def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant>;
+def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_USHORT_ADDR64, i32, extloadi16_constant>;
+def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_USHORT_ADDR64, i32, zextloadi16_constant>;
 
-defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORD_ADDR64, BUFFER_LOAD_DWORD_OFFSET, i32, mubuf_load_atomic>;
-defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, BUFFER_LOAD_DWORDX2_OFFSET, i64, mubuf_load_atomic>;
-} // End SubtargetPredicate = isSICI
+defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORD_ADDR64, BUFFER_LOAD_DWORD_OFFSET, i32, atomic_load_32_global>;
+defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, BUFFER_LOAD_DWORDX2_OFFSET, i64, atomic_load_64_global>;
+} // End SubtargetPredicate = isGFX6GFX7
 
 multiclass MUBUFLoad_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
                                PatFrag ld> {
 
   def : GCNPat <
     (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset,
-                          i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
-    (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe)
+                          i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
+    (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
   >;
 }
 
 let OtherPredicates = [Has16BitInsts] in {
 
 defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_constant>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, az_extloadi8_constant>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, i16, mubuf_sextloadi8>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, mubuf_az_extloadi8>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_constant>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, zextloadi8_constant>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_global>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_global>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, zextloadi8_global>;
 
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_OFFSET, i16, mubuf_load>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_OFFSET, i16, load_global>;
 
 } // End OtherPredicates = [Has16BitInsts]
 
@@ -1357,111 +1478,79 @@ multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen,
   def : GCNPat <
     (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
                                i32:$soffset, u16imm:$offset))),
-    (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
+    (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
   >;
 
   def : GCNPat <
     (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))),
-    (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0)
+    (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0)
   >;
 }
 
 // XXX - Is it possible to have a complex pattern in a PatFrag?
-multiclass MUBUFScratchLoadPat_Hi16 <MUBUF_Pseudo InstrOffen,
+multiclass MUBUFScratchLoadPat_D16 <MUBUF_Pseudo InstrOffen,
                                 MUBUF_Pseudo InstrOffset,
-                                ValueType vt, PatFrag ld> {
-  def : GCNPat <
-    (build_vector vt:$lo, (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
-                                 i32:$soffset, u16imm:$offset)))),
-    (v2i16 (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $lo))
-  >;
-
-  def : GCNPat <
-    (build_vector f16:$lo, (f16 (bitconvert (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
-                               i32:$soffset, u16imm:$offset)))))),
-    (v2f16 (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $lo))
-  >;
-
-
-  def : GCNPat <
-    (build_vector vt:$lo, (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)))),
-    (v2i16 (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $lo))
-  >;
-
-  def : GCNPat <
-    (build_vector f16:$lo, (f16 (bitconvert (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)))))),
-    (v2f16 (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $lo))
-  >;
-}
-
-multiclass MUBUFScratchLoadPat_Lo16 <MUBUF_Pseudo InstrOffen,
-                                     MUBUF_Pseudo InstrOffset,
-                                     ValueType vt, PatFrag ld> {
-  def : GCNPat <
-    (build_vector (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
-                                             i32:$soffset, u16imm:$offset))),
-                  (vt (Hi16Elt vt:$hi))),
-    (v2i16 (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $hi))
-  >;
-
+                                ValueType vt, PatFrag ld_frag> {
   def : GCNPat <
-    (build_vector (f16 (bitconvert (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
-                                                              i32:$soffset, u16imm:$offset))))),
-                  (f16 (Hi16Elt f16:$hi))),
-    (v2f16 (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $hi))
+    (ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in),
+    (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, $in)
   >;
 
   def : GCNPat <
-    (build_vector (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))),
-                  (vt (Hi16Elt vt:$hi))),
-    (v2i16 (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $hi))
-  >;
-
-  def : GCNPat <
-    (build_vector (f16 (bitconvert (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))))),
-                  (f16 (Hi16Elt f16:$hi))),
-    (v2f16 (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $hi))
+    (ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in),
+    (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, $in)
   >;
 }
 
 defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i32, sextloadi8_private>;
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, az_extloadi8_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, extloadi8_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, zextloadi8_private>;
 defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_private>;
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, az_extloadi8_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, zextloadi8_private>;
 defm : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, BUFFER_LOAD_SSHORT_OFFSET, i32, sextloadi16_private>;
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, az_extloadi16_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, extloadi16_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, zextloadi16_private>;
 defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i16, load_private>;
 defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, BUFFER_LOAD_DWORD_OFFSET, i32, load_private>;
 defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, BUFFER_LOAD_DWORDX2_OFFSET, v2i32, load_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX3_OFFEN, BUFFER_LOAD_DWORDX3_OFFSET, v3i32, load_private>;
 defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, BUFFER_LOAD_DWORDX4_OFFSET, v4i32, load_private>;
 
 let OtherPredicates = [D16PreservesUnusedBits] in {
-defm : MUBUFScratchLoadPat_Hi16<BUFFER_LOAD_SHORT_D16_HI_OFFEN, BUFFER_LOAD_SHORT_D16_HI_OFFSET, i16, load_private>;
-defm : MUBUFScratchLoadPat_Hi16<BUFFER_LOAD_UBYTE_D16_HI_OFFEN, BUFFER_LOAD_UBYTE_D16_HI_OFFSET, i16, az_extloadi8_private>;
-defm : MUBUFScratchLoadPat_Hi16<BUFFER_LOAD_SBYTE_D16_HI_OFFEN, BUFFER_LOAD_SBYTE_D16_HI_OFFSET, i16, sextloadi8_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_HI_OFFEN, BUFFER_LOAD_SHORT_D16_HI_OFFSET, v2i16, load_d16_hi_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_HI_OFFEN, BUFFER_LOAD_UBYTE_D16_HI_OFFSET, v2i16, az_extloadi8_d16_hi_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_HI_OFFEN, BUFFER_LOAD_SBYTE_D16_HI_OFFSET, v2i16, sextloadi8_d16_hi_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_HI_OFFEN, BUFFER_LOAD_SHORT_D16_HI_OFFSET, v2f16, load_d16_hi_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_HI_OFFEN, BUFFER_LOAD_UBYTE_D16_HI_OFFSET, v2f16, az_extloadi8_d16_hi_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_HI_OFFEN, BUFFER_LOAD_SBYTE_D16_HI_OFFSET, v2f16, sextloadi8_d16_hi_private>;
 
-defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_SHORT_D16_OFFEN, BUFFER_LOAD_SHORT_D16_OFFSET, i16, load_private>;
-defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_UBYTE_D16_OFFEN, BUFFER_LOAD_UBYTE_D16_OFFSET, i16, az_extloadi8_private>;
-defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D16_OFFSET, i16, sextloadi8_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_OFFEN, BUFFER_LOAD_SHORT_D16_OFFSET, v2i16, load_d16_lo_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_OFFEN, BUFFER_LOAD_UBYTE_D16_OFFSET, v2i16, az_extloadi8_d16_lo_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D16_OFFSET, v2i16, sextloadi8_d16_lo_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_OFFEN, BUFFER_LOAD_SHORT_D16_OFFSET, v2f16, load_d16_lo_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_OFFEN, BUFFER_LOAD_UBYTE_D16_OFFSET, v2f16, az_extloadi8_d16_lo_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D16_OFFSET, v2f16, sextloadi8_d16_lo_private>;
 }
+
 multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
                                       ValueType vt, PatFrag atomic_st> {
   // Store follows atomic op convention so address is forst
   def : GCNPat <
      (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
                                    i16:$offset, i1:$slc), vt:$val),
-     (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0)
+     (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0)
   >;
 
   def : GCNPat <
     (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
-    (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0)
+    (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0)
   >;
 }
-let SubtargetPredicate = isSICI in {
+let SubtargetPredicate = isGFX6GFX7 in {
 defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORD_ADDR64, BUFFER_STORE_DWORD_OFFSET, i32, store_atomic_global>;
 defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORDX2_ADDR64, BUFFER_STORE_DWORDX2_OFFSET, i64, store_atomic_global>;
-} // End Predicates = isSICI
+} // End Predicates = isGFX6GFX7
 
 
 multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
@@ -1469,8 +1558,8 @@ multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
 
   def : GCNPat <
     (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
-                                      i16:$offset, i1:$glc, i1:$slc, i1:$tfe)),
-    (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
+                                      i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)),
+    (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
   >;
 }
 
@@ -1479,17 +1568,18 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT_OFFSET, i16, store_global>;
 
 multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen,
                                  MUBUF_Pseudo InstrOffset,
-                                 ValueType vt, PatFrag st> {
+                                 ValueType vt, PatFrag st,
+                                 RegisterClass rc = VGPR_32> {
   def : GCNPat <
     (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
                                       i32:$soffset, u16imm:$offset)),
-    (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
+    (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
   >;
 
   def : GCNPat <
     (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset,
                                        u16imm:$offset)),
-    (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0)
+    (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0)
   >;
 }
 
@@ -1498,8 +1588,9 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET
 defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_private>;
 defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i16, store_private>;
 defm : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, BUFFER_STORE_DWORD_OFFSET, i32, store_private>;
-defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private>;
-defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private, VReg_64>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX3_OFFEN, BUFFER_STORE_DWORDX3_OFFSET, v3i32, store_private, VReg_96>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private, VReg_128>;
 
 
 let OtherPredicates = [D16PreservesUnusedBits] in {
@@ -1526,7 +1617,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
               imm:$format, imm:$cachepolicy, 0)),
     (!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
       (as_i8imm $format),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
@@ -1534,7 +1625,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
               imm:$format, imm:$cachepolicy, imm)),
     (!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
       (as_i8imm $format),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
@@ -1542,7 +1633,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
               imm:$format, imm:$cachepolicy, 0)),
     (!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
       (as_i8imm $format),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
@@ -1552,15 +1643,17 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
       $rsrc, $soffset, (as_i16imm $offset),
       (as_i8imm $format),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 }
 
 defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, i32,   "TBUFFER_LOAD_FORMAT_X">;
 defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2i32, "TBUFFER_LOAD_FORMAT_XY">;
+defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v3i32, "TBUFFER_LOAD_FORMAT_XYZ">;
 defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4i32, "TBUFFER_LOAD_FORMAT_XYZW">;
 defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, f32,   "TBUFFER_LOAD_FORMAT_X">;
 defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2f32, "TBUFFER_LOAD_FORMAT_XY">;
+defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v3f32, "TBUFFER_LOAD_FORMAT_XYZ">;
 defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4f32, "TBUFFER_LOAD_FORMAT_XYZW">;
 
 let SubtargetPredicate = HasUnpackedD16VMem in {
@@ -1582,7 +1675,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
           imm:$format, imm:$cachepolicy, 0),
     (!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
       (as_i16imm $offset), (as_i8imm $format),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
@@ -1590,7 +1683,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
           imm:$format, imm:$cachepolicy, imm),
     (!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
       (as_i16imm $offset), (as_i8imm $format),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
@@ -1598,7 +1691,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
           imm:$format, imm:$cachepolicy, 0),
     (!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
       (as_i16imm $offset), (as_i8imm $format),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
@@ -1608,17 +1701,17 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
       $vdata,
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
       $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format),
-      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+      (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 }
 
 defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, i32,   "TBUFFER_STORE_FORMAT_X">;
 defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2i32, "TBUFFER_STORE_FORMAT_XY">;
-defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_x3, v4i32, "TBUFFER_STORE_FORMAT_XYZ">;
+defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v3i32, "TBUFFER_STORE_FORMAT_XYZ">;
 defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4i32, "TBUFFER_STORE_FORMAT_XYZW">;
 defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, f32,   "TBUFFER_STORE_FORMAT_X">;
 defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2f32, "TBUFFER_STORE_FORMAT_XY">;
-defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_x3, v4f32, "TBUFFER_STORE_FORMAT_XYZ">;
+defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v3f32, "TBUFFER_STORE_FORMAT_XYZ">;
 defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4f32, "TBUFFER_STORE_FORMAT_XYZW">;
 
 let SubtargetPredicate = HasUnpackedD16VMem in {
@@ -1634,28 +1727,22 @@ let SubtargetPredicate = HasPackedD16VMem in {
 } // End HasPackedD16VMem.
 
 //===----------------------------------------------------------------------===//
-// Target instructions, move to the appropriate target TD file
+// Target-specific instruction encodings.
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-// SI
+// Base ENC_MUBUF for GFX6, GFX7, GFX10.
 //===----------------------------------------------------------------------===//
 
-class MUBUF_Real_si <bits<7> op, MUBUF_Pseudo ps> :
-  MUBUF_Real<op, ps>,
-  Enc64,
-  SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> {
-  let AssemblerPredicate=isSICI;
-  let DecoderNamespace="SICI";
-
+class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> :
+    MUBUF_Real<ps>, Enc64, SIMCInstr<ps.PseudoInstr, ef> {
   let Inst{11-0}  = !if(ps.has_offset, offset, ?);
   let Inst{12}    = ps.offen;
   let Inst{13}    = ps.idxen;
   let Inst{14}    = !if(ps.has_glc, glc, ps.glc_value);
-  let Inst{15}    = ps.addr64;
   let Inst{16}    = !if(ps.lds, 1, 0);
   let Inst{24-18} = op;
-  let Inst{31-26} = 0x38; //encoding
+  let Inst{31-26} = 0x38;
   let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
   let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
   let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
@@ -1664,125 +1751,250 @@ class MUBUF_Real_si <bits<7> op, MUBUF_Pseudo ps> :
   let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
 }
 
-multiclass MUBUF_Real_AllAddr_si<bits<7> op> {
-  def _OFFSET_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
-  def _ADDR64_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>;
-  def _OFFEN_si  : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
-  def _IDXEN_si  : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
-  def _BOTHEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
-}
-
-multiclass MUBUF_Real_AllAddr_Lds_si<bits<7> op> {
-
-  def _OFFSET_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
-                   MUBUFLdsTable<0, NAME # "_OFFSET_si">;
-  def _ADDR64_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>,
-                   MUBUFLdsTable<0, NAME # "_ADDR64_si">;
-  def _OFFEN_si  : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
-                   MUBUFLdsTable<0, NAME # "_OFFEN_si">;
-  def _IDXEN_si  : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
-                   MUBUFLdsTable<0, NAME # "_IDXEN_si">;
-  def _BOTHEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
-                   MUBUFLdsTable<0, NAME # "_BOTHEN_si">;
-
-  def _LDS_OFFSET_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>,
-                       MUBUFLdsTable<1, NAME # "_OFFSET_si">;
-  def _LDS_ADDR64_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_ADDR64")>,
-                       MUBUFLdsTable<1, NAME # "_ADDR64_si">;
-  def _LDS_OFFEN_si  : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>,
-                       MUBUFLdsTable<1, NAME # "_OFFEN_si">;
-  def _LDS_IDXEN_si  : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>,
-                       MUBUFLdsTable<1, NAME # "_IDXEN_si">;
-  def _LDS_BOTHEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
-                       MUBUFLdsTable<1, NAME # "_BOTHEN_si">;
-}
-
-multiclass MUBUF_Real_Atomic_si<bits<7> op> : MUBUF_Real_AllAddr_si<op> {
-  def _OFFSET_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
-  def _ADDR64_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>;
-  def _OFFEN_RTN_si  : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
-  def _IDXEN_RTN_si  : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
-  def _BOTHEN_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
-}
-
-defm BUFFER_LOAD_FORMAT_X       : MUBUF_Real_AllAddr_Lds_si <0x00>;
-defm BUFFER_LOAD_FORMAT_XY      : MUBUF_Real_AllAddr_si <0x01>;
-defm BUFFER_LOAD_FORMAT_XYZ     : MUBUF_Real_AllAddr_si <0x02>;
-defm BUFFER_LOAD_FORMAT_XYZW    : MUBUF_Real_AllAddr_si <0x03>;
-defm BUFFER_STORE_FORMAT_X      : MUBUF_Real_AllAddr_si <0x04>;
-defm BUFFER_STORE_FORMAT_XY     : MUBUF_Real_AllAddr_si <0x05>;
-defm BUFFER_STORE_FORMAT_XYZ    : MUBUF_Real_AllAddr_si <0x06>;
-defm BUFFER_STORE_FORMAT_XYZW   : MUBUF_Real_AllAddr_si <0x07>;
-defm BUFFER_LOAD_UBYTE          : MUBUF_Real_AllAddr_Lds_si <0x08>;
-defm BUFFER_LOAD_SBYTE          : MUBUF_Real_AllAddr_Lds_si <0x09>;
-defm BUFFER_LOAD_USHORT         : MUBUF_Real_AllAddr_Lds_si <0x0a>;
-defm BUFFER_LOAD_SSHORT         : MUBUF_Real_AllAddr_Lds_si <0x0b>;
-defm BUFFER_LOAD_DWORD          : MUBUF_Real_AllAddr_Lds_si <0x0c>;
-defm BUFFER_LOAD_DWORDX2        : MUBUF_Real_AllAddr_si <0x0d>;
-defm BUFFER_LOAD_DWORDX4        : MUBUF_Real_AllAddr_si <0x0e>;
-defm BUFFER_LOAD_DWORDX3        : MUBUF_Real_AllAddr_si <0x0f>;
-defm BUFFER_STORE_BYTE          : MUBUF_Real_AllAddr_si <0x18>;
-defm BUFFER_STORE_SHORT         : MUBUF_Real_AllAddr_si <0x1a>;
-defm BUFFER_STORE_DWORD         : MUBUF_Real_AllAddr_si <0x1c>;
-defm BUFFER_STORE_DWORDX2       : MUBUF_Real_AllAddr_si <0x1d>;
-defm BUFFER_STORE_DWORDX4       : MUBUF_Real_AllAddr_si <0x1e>;
-defm BUFFER_STORE_DWORDX3       : MUBUF_Real_AllAddr_si <0x1f>;
-
-defm BUFFER_ATOMIC_SWAP         : MUBUF_Real_Atomic_si <0x30>;
-defm BUFFER_ATOMIC_CMPSWAP      : MUBUF_Real_Atomic_si <0x31>;
-defm BUFFER_ATOMIC_ADD          : MUBUF_Real_Atomic_si <0x32>;
-defm BUFFER_ATOMIC_SUB          : MUBUF_Real_Atomic_si <0x33>;
-//defm BUFFER_ATOMIC_RSUB         : MUBUF_Real_Atomic_si <0x34>;    // isn't on CI & VI
-defm BUFFER_ATOMIC_SMIN         : MUBUF_Real_Atomic_si <0x35>;
-defm BUFFER_ATOMIC_UMIN         : MUBUF_Real_Atomic_si <0x36>;
-defm BUFFER_ATOMIC_SMAX         : MUBUF_Real_Atomic_si <0x37>;
-defm BUFFER_ATOMIC_UMAX         : MUBUF_Real_Atomic_si <0x38>;
-defm BUFFER_ATOMIC_AND          : MUBUF_Real_Atomic_si <0x39>;
-defm BUFFER_ATOMIC_OR           : MUBUF_Real_Atomic_si <0x3a>;
-defm BUFFER_ATOMIC_XOR          : MUBUF_Real_Atomic_si <0x3b>;
-defm BUFFER_ATOMIC_INC          : MUBUF_Real_Atomic_si <0x3c>;
-defm BUFFER_ATOMIC_DEC          : MUBUF_Real_Atomic_si <0x3d>;
-
-//defm BUFFER_ATOMIC_FCMPSWAP     : MUBUF_Real_Atomic_si <0x3e>;    // isn't on VI
-//defm BUFFER_ATOMIC_FMIN         : MUBUF_Real_Atomic_si <0x3f>;    // isn't on VI
-//defm BUFFER_ATOMIC_FMAX         : MUBUF_Real_Atomic_si <0x40>;    // isn't on VI
-defm BUFFER_ATOMIC_SWAP_X2      : MUBUF_Real_Atomic_si <0x50>;
-defm BUFFER_ATOMIC_CMPSWAP_X2   : MUBUF_Real_Atomic_si <0x51>;
-defm BUFFER_ATOMIC_ADD_X2       : MUBUF_Real_Atomic_si <0x52>;
-defm BUFFER_ATOMIC_SUB_X2       : MUBUF_Real_Atomic_si <0x53>;
-//defm BUFFER_ATOMIC_RSUB_X2      : MUBUF_Real_Atomic_si <0x54>;    // isn't on CI & VI
-defm BUFFER_ATOMIC_SMIN_X2      : MUBUF_Real_Atomic_si <0x55>;
-defm BUFFER_ATOMIC_UMIN_X2      : MUBUF_Real_Atomic_si <0x56>;
-defm BUFFER_ATOMIC_SMAX_X2      : MUBUF_Real_Atomic_si <0x57>;
-defm BUFFER_ATOMIC_UMAX_X2      : MUBUF_Real_Atomic_si <0x58>;
-defm BUFFER_ATOMIC_AND_X2       : MUBUF_Real_Atomic_si <0x59>;
-defm BUFFER_ATOMIC_OR_X2        : MUBUF_Real_Atomic_si <0x5a>;
-defm BUFFER_ATOMIC_XOR_X2       : MUBUF_Real_Atomic_si <0x5b>;
-defm BUFFER_ATOMIC_INC_X2       : MUBUF_Real_Atomic_si <0x5c>;
-defm BUFFER_ATOMIC_DEC_X2       : MUBUF_Real_Atomic_si <0x5d>;
-// FIXME: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on CI.
-//defm BUFFER_ATOMIC_FCMPSWAP_X2  : MUBUF_Real_Atomic_si <0x5e">;   // isn't on VI
-//defm BUFFER_ATOMIC_FMIN_X2      : MUBUF_Real_Atomic_si <0x5f>;    // isn't on VI
-//defm BUFFER_ATOMIC_FMAX_X2      : MUBUF_Real_Atomic_si <0x60>;    // isn't on VI
-
-def BUFFER_WBINVL1_SC_si        : MUBUF_Real_si <0x70, BUFFER_WBINVL1_SC>;
-def BUFFER_WBINVL1_si           : MUBUF_Real_si <0x71, BUFFER_WBINVL1>;
-
-class MTBUF_Real_si <bits<3> op, MTBUF_Pseudo ps> :
-  MTBUF_Real<ps>,
-  Enc64,
-  SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> {
-  let AssemblerPredicate=isSICI;
-  let DecoderNamespace="SICI";
+class MUBUF_Real_gfx10<bits<8> op, MUBUF_Pseudo ps> :
+    Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10> {
+  let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value);
+  let Inst{25} = op{7};
+}
+
+class MUBUF_Real_gfx6_gfx7<bits<8> op, MUBUF_Pseudo ps> :
+    Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI> {
+  let Inst{15} = ps.addr64;
+}
 
+//===----------------------------------------------------------------------===//
+// MUBUF - GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+  multiclass MUBUF_Real_gfx10_with_name<bits<8> op, string opName,
+                                        string asmName> {
+    def _gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(opName)> {
+      MUBUF_Pseudo ps = !cast<MUBUF_Pseudo>(opName);
+      let AsmString = asmName # ps.AsmOperands;
+    }
+  }
+  multiclass MUBUF_Real_AllAddr_gfx10<bits<8> op> {
+    def _BOTHEN_gfx10 :
+      MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
+    def _IDXEN_gfx10 :
+      MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
+    def _OFFEN_gfx10 :
+      MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
+    def _OFFSET_gfx10 :
+      MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
+  }
+  multiclass MUBUF_Real_AllAddr_Lds_gfx10<bits<8> op> {
+    def _OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
+                        MUBUFLdsTable<0, NAME # "_OFFSET_gfx10">;
+    def _OFFEN_gfx10  : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
+                        MUBUFLdsTable<0, NAME # "_OFFEN_gfx10">;
+    def _IDXEN_gfx10  : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
+                        MUBUFLdsTable<0, NAME # "_IDXEN_gfx10">;
+    def _BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
+                        MUBUFLdsTable<0, NAME # "_BOTHEN_gfx10">;
+
+    def _LDS_OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>,
+                            MUBUFLdsTable<1, NAME # "_OFFSET_gfx10">;
+    def _LDS_OFFEN_gfx10  : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>,
+                            MUBUFLdsTable<1, NAME # "_OFFEN_gfx10">;
+    def _LDS_IDXEN_gfx10  : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>,
+                            MUBUFLdsTable<1, NAME # "_IDXEN_gfx10">;
+    def _LDS_BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
+                            MUBUFLdsTable<1, NAME # "_BOTHEN_gfx10">;
+  }
+  multiclass MUBUF_Real_Atomics_gfx10<bits<8> op> :
+      MUBUF_Real_AllAddr_gfx10<op> {
+    def _BOTHEN_RTN_gfx10 :
+      MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
+    def _IDXEN_RTN_gfx10 :
+      MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
+    def _OFFEN_RTN_gfx10 :
+      MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
+    def _OFFSET_RTN_gfx10 :
+      MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
+  }
+} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+
+defm BUFFER_STORE_BYTE_D16_HI     : MUBUF_Real_AllAddr_gfx10<0x019>;
+defm BUFFER_STORE_SHORT_D16_HI    : MUBUF_Real_AllAddr_gfx10<0x01b>;
+defm BUFFER_LOAD_UBYTE_D16        : MUBUF_Real_AllAddr_gfx10<0x020>;
+defm BUFFER_LOAD_UBYTE_D16_HI     : MUBUF_Real_AllAddr_gfx10<0x021>;
+defm BUFFER_LOAD_SBYTE_D16        : MUBUF_Real_AllAddr_gfx10<0x022>;
+defm BUFFER_LOAD_SBYTE_D16_HI     : MUBUF_Real_AllAddr_gfx10<0x023>;
+defm BUFFER_LOAD_SHORT_D16        : MUBUF_Real_AllAddr_gfx10<0x024>;
+defm BUFFER_LOAD_SHORT_D16_HI     : MUBUF_Real_AllAddr_gfx10<0x025>;
+// FIXME-GFX10: Add following instructions:
+//defm BUFFER_LOAD_FORMAT_D16_HI_X  : MUBUF_Real_AllAddr_gfx10<0x026>;
+//defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x027>;
+defm BUFFER_LOAD_FORMAT_D16_X     : MUBUF_Real_AllAddr_gfx10<0x080>;
+defm BUFFER_LOAD_FORMAT_D16_XY    : MUBUF_Real_AllAddr_gfx10<0x081>;
+defm BUFFER_LOAD_FORMAT_D16_XYZ   : MUBUF_Real_AllAddr_gfx10<0x082>;
+defm BUFFER_LOAD_FORMAT_D16_XYZW  : MUBUF_Real_AllAddr_gfx10<0x083>;
+defm BUFFER_STORE_FORMAT_D16_X    : MUBUF_Real_AllAddr_gfx10<0x084>;
+defm BUFFER_STORE_FORMAT_D16_XY   : MUBUF_Real_AllAddr_gfx10<0x085>;
+defm BUFFER_STORE_FORMAT_D16_XYZ  : MUBUF_Real_AllAddr_gfx10<0x086>;
+defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx10<0x087>;
+
+def BUFFER_GL0_INV_gfx10 :
+  MUBUF_Real_gfx10<0x071, BUFFER_GL0_INV>;
+def BUFFER_GL1_INV_gfx10 :
+  MUBUF_Real_gfx10<0x072, BUFFER_GL1_INV>;
+
+//===----------------------------------------------------------------------===//
+// MUBUF - GFX6, GFX7, GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX6, DecoderNamespace = "GFX6" in {
+  multiclass MUBUF_Real_gfx6<bits<8> op> {
+    def _gfx6 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME)>;
+  }
+} // End AssemblerPredicate = isGFX6, DecoderNamespace = "GFX6"
+
+let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
+  multiclass MUBUF_Real_gfx7<bits<8> op> {
+    def _gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME)>;
+  }
+} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
+
+let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
+  multiclass MUBUF_Real_AllAddr_gfx6_gfx7<bits<8> op> {
+    def _ADDR64_gfx6_gfx7 :
+      MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>;
+    def _BOTHEN_gfx6_gfx7 :
+      MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
+    def _IDXEN_gfx6_gfx7 :
+      MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
+    def _OFFEN_gfx6_gfx7 :
+      MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
+    def _OFFSET_gfx6_gfx7 :
+      MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
+  }
+  multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7<bits<8> op> {
+    def _OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
+                            MUBUFLdsTable<0, NAME # "_OFFSET_gfx6_gfx7">;
+    def _ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>,
+                            MUBUFLdsTable<0, NAME # "_ADDR64_gfx6_gfx7">;
+    def _OFFEN_gfx6_gfx7  : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
+                            MUBUFLdsTable<0, NAME # "_OFFEN_gfx6_gfx7">;
+    def _IDXEN_gfx6_gfx7  : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
+                            MUBUFLdsTable<0, NAME # "_IDXEN_gfx6_gfx7">;
+    def _BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
+                            MUBUFLdsTable<0, NAME # "_BOTHEN_gfx6_gfx7">;
+
+    def _LDS_OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>,
+                                MUBUFLdsTable<1, NAME # "_OFFSET_gfx6_gfx7">;
+    def _LDS_ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_ADDR64")>,
+                                MUBUFLdsTable<1, NAME # "_ADDR64_gfx6_gfx7">;
+    def _LDS_OFFEN_gfx6_gfx7  : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>,
+                                MUBUFLdsTable<1, NAME # "_OFFEN_gfx6_gfx7">;
+    def _LDS_IDXEN_gfx6_gfx7  : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>,
+                                MUBUFLdsTable<1, NAME # "_IDXEN_gfx6_gfx7">;
+    def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
+                                MUBUFLdsTable<1, NAME # "_BOTHEN_gfx6_gfx7">;
+  }
+  multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> :
+      MUBUF_Real_AllAddr_gfx6_gfx7<op> {
+    def _ADDR64_RTN_gfx6_gfx7 :
+      MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>;
+    def _BOTHEN_RTN_gfx6_gfx7 :
+      MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
+    def _IDXEN_RTN_gfx6_gfx7 :
+      MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
+    def _OFFEN_RTN_gfx6_gfx7 :
+      MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
+    def _OFFSET_RTN_gfx6_gfx7 :
+      MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
+  }
+} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
+
+multiclass MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<bits<8> op> :
+  MUBUF_Real_AllAddr_gfx6_gfx7<op>, MUBUF_Real_AllAddr_gfx10<op>;
+
+multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<bits<8> op> :
+  MUBUF_Real_AllAddr_Lds_gfx6_gfx7<op>, MUBUF_Real_AllAddr_Lds_gfx10<op>;
+
+multiclass MUBUF_Real_Atomics_gfx6_gfx7_gfx10<bits<8> op> :
+  MUBUF_Real_Atomics_gfx6_gfx7<op>, MUBUF_Real_Atomics_gfx10<op>;
+
+// FIXME-GFX6: Following instructions are available only on GFX6.
+//defm BUFFER_ATOMIC_RSUB         : MUBUF_Real_Atomics_gfx6 <0x034>;
+//defm BUFFER_ATOMIC_RSUB_X2      : MUBUF_Real_Atomics_gfx6 <0x054>;
+
+defm BUFFER_LOAD_FORMAT_X     : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x000>;
+defm BUFFER_LOAD_FORMAT_XY    : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x001>;
+defm BUFFER_LOAD_FORMAT_XYZ   : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x002>;
+defm BUFFER_LOAD_FORMAT_XYZW  : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x003>;
+defm BUFFER_STORE_FORMAT_X    : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x004>;
+defm BUFFER_STORE_FORMAT_XY   : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x005>;
+defm BUFFER_STORE_FORMAT_XYZ  : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x006>;
+defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>;
+defm BUFFER_LOAD_UBYTE        : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x008>;
+defm BUFFER_LOAD_SBYTE        : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x009>;
+defm BUFFER_LOAD_USHORT       : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00a>;
+defm BUFFER_LOAD_SSHORT       : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00b>;
+defm BUFFER_LOAD_DWORD        : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00c>;
+defm BUFFER_LOAD_DWORDX2      : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00d>;
+defm BUFFER_LOAD_DWORDX4      : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00e>;
+defm BUFFER_LOAD_DWORDX3      : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00f>;
+defm BUFFER_STORE_BYTE        : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x018>;
+defm BUFFER_STORE_SHORT       : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01a>;
+defm BUFFER_STORE_DWORD       : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01c>;
+defm BUFFER_STORE_DWORDX2     : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01d>;
+defm BUFFER_STORE_DWORDX4     : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01e>;
+defm BUFFER_STORE_DWORDX3     : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01f>;
+
+defm BUFFER_ATOMIC_SWAP        : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x030>;
+defm BUFFER_ATOMIC_CMPSWAP     : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x031>;
+defm BUFFER_ATOMIC_ADD         : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x032>;
+defm BUFFER_ATOMIC_SUB         : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x033>;
+defm BUFFER_ATOMIC_SMIN        : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x035>;
+defm BUFFER_ATOMIC_UMIN        : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x036>;
+defm BUFFER_ATOMIC_SMAX        : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x037>;
+defm BUFFER_ATOMIC_UMAX        : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x038>;
+defm BUFFER_ATOMIC_AND         : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x039>;
+defm BUFFER_ATOMIC_OR          : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03a>;
+defm BUFFER_ATOMIC_XOR         : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03b>;
+defm BUFFER_ATOMIC_INC         : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03c>;
+defm BUFFER_ATOMIC_DEC         : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03d>;
+// FIXME-GFX6-GFX7-GFX10: Add following instructions:
+//defm BUFFER_ATOMIC_FCMPSWAP    : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>;
+//defm BUFFER_ATOMIC_FMIN        : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>;
+//defm BUFFER_ATOMIC_FMAX        : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>;
+defm BUFFER_ATOMIC_SWAP_X2     : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x050>;
+defm BUFFER_ATOMIC_CMPSWAP_X2  : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x051>;
+defm BUFFER_ATOMIC_ADD_X2      : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x052>;
+defm BUFFER_ATOMIC_SUB_X2      : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x053>;
+defm BUFFER_ATOMIC_SMIN_X2     : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x055>;
+defm BUFFER_ATOMIC_UMIN_X2     : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x056>;
+defm BUFFER_ATOMIC_SMAX_X2     : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x057>;
+defm BUFFER_ATOMIC_UMAX_X2     : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x058>;
+defm BUFFER_ATOMIC_AND_X2      : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x059>;
+defm BUFFER_ATOMIC_OR_X2       : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05a>;
+defm BUFFER_ATOMIC_XOR_X2      : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05b>;
+defm BUFFER_ATOMIC_INC_X2      : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05c>;
+defm BUFFER_ATOMIC_DEC_X2      : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05d>;
+// FIXME-GFX7: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on GFX7.
+// FIXME-GFX6-GFX7-GFX10: Add following instructions:
+//defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
+//defm BUFFER_ATOMIC_FMIN_X2     : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>;
+//defm BUFFER_ATOMIC_FMAX_X2     : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>;
+
+defm BUFFER_WBINVL1_SC        : MUBUF_Real_gfx6<0x070>;
+defm BUFFER_WBINVL1_VOL       : MUBUF_Real_gfx7<0x070>;
+def  BUFFER_WBINVL1_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<0x071, BUFFER_WBINVL1>;
+
+//===----------------------------------------------------------------------===//
+// Base ENC_MTBUF for GFX6, GFX7, GFX10.
+//===----------------------------------------------------------------------===//
+
+class Base_MTBUF_Real_gfx6_gfx7_gfx10<bits<3> op, MTBUF_Pseudo ps, int ef> :
+    MTBUF_Real<ps>, Enc64, SIMCInstr<ps.PseudoInstr, ef> {
   let Inst{11-0}  = !if(ps.has_offset, offset, ?);
   let Inst{12}    = ps.offen;
   let Inst{13}    = ps.idxen;
   let Inst{14}    = !if(ps.has_glc, glc, ps.glc_value);
-  let Inst{15}    = ps.addr64;
   let Inst{18-16} = op;
-  let Inst{22-19} = dfmt;
-  let Inst{25-23} = nfmt;
   let Inst{31-26} = 0x3a; //encoding
   let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
   let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
@@ -1792,47 +2004,87 @@ class MTBUF_Real_si <bits<3> op, MTBUF_Pseudo ps> :
   let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
 }
 
-multiclass MTBUF_Real_AllAddr_si<bits<3> op> {
-  def _OFFSET_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
-  def _ADDR64_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_ADDR64")>;
-  def _OFFEN_si  : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
-  def _IDXEN_si  : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
-  def _BOTHEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
-}
+//===----------------------------------------------------------------------===//
+// MTBUF - GFX10.
+//===----------------------------------------------------------------------===//
+
+class MTBUF_Real_gfx10<bits<4> op, MTBUF_Pseudo ps> :
+    Base_MTBUF_Real_gfx6_gfx7_gfx10<op{2-0}, ps, SIEncodingFamily.GFX10> {
+  let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value);
+  let Inst{25-19} = format;
+  let Inst{53} = op{3};
+}
+
+let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+  multiclass MTBUF_Real_AllAddr_gfx10<bits<4> op> {
+    def _BOTHEN_gfx10 :
+      MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
+    def _IDXEN_gfx10 :
+      MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
+    def _OFFEN_gfx10 :
+      MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
+    def _OFFSET_gfx10 :
+      MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
+  }
+} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
 
-defm TBUFFER_LOAD_FORMAT_X     : MTBUF_Real_AllAddr_si <0>;
-defm TBUFFER_LOAD_FORMAT_XY    : MTBUF_Real_AllAddr_si <1>;
-defm TBUFFER_LOAD_FORMAT_XYZ   : MTBUF_Real_AllAddr_si <2>;
-defm TBUFFER_LOAD_FORMAT_XYZW  : MTBUF_Real_AllAddr_si <3>;
-defm TBUFFER_STORE_FORMAT_X    : MTBUF_Real_AllAddr_si <4>;
-defm TBUFFER_STORE_FORMAT_XY   : MTBUF_Real_AllAddr_si <5>;
-defm TBUFFER_STORE_FORMAT_XYZ  : MTBUF_Real_AllAddr_si <6>;
-defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_si <7>;
+defm TBUFFER_LOAD_FORMAT_D16_X     : MTBUF_Real_AllAddr_gfx10<0x008>;
+defm TBUFFER_LOAD_FORMAT_D16_XY    : MTBUF_Real_AllAddr_gfx10<0x009>;
+defm TBUFFER_LOAD_FORMAT_D16_XYZ   : MTBUF_Real_AllAddr_gfx10<0x00a>;
+defm TBUFFER_LOAD_FORMAT_D16_XYZW  : MTBUF_Real_AllAddr_gfx10<0x00b>;
+defm TBUFFER_STORE_FORMAT_D16_X    : MTBUF_Real_AllAddr_gfx10<0x00c>;
+defm TBUFFER_STORE_FORMAT_D16_XY   : MTBUF_Real_AllAddr_gfx10<0x00d>;
+defm TBUFFER_STORE_FORMAT_D16_XYZ  : MTBUF_Real_AllAddr_gfx10<0x00e>;
+defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx10<0x00f>;
 
 //===----------------------------------------------------------------------===//
-// CI
-// MTBUF - GFX6, GFX7.
+// MTBUF - GFX6, GFX7, GFX10.
 //===----------------------------------------------------------------------===//
 
-class MUBUF_Real_ci <bits<7> op, MUBUF_Pseudo ps> :
-  MUBUF_Real_si<op, ps> {
-  let AssemblerPredicate=isCIOnly;
-  let DecoderNamespace="CI";
+class MTBUF_Real_gfx6_gfx7<bits<4> op, MTBUF_Pseudo ps> :
+    Base_MTBUF_Real_gfx6_gfx7_gfx10<op{2-0}, ps, SIEncodingFamily.SI> {
+  let Inst{15} = ps.addr64;
+  let Inst{22-19} = dfmt;
+  let Inst{25-23} = nfmt;
 }
 
-def BUFFER_WBINVL1_VOL_ci : MUBUF_Real_ci <0x70, BUFFER_WBINVL1_VOL>;
+let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
+  multiclass MTBUF_Real_AllAddr_gfx6_gfx7<bits<4> op> {
+    def _ADDR64_gfx6_gfx7 :
+      MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_ADDR64")>;
+    def _BOTHEN_gfx6_gfx7 :
+      MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
+    def _IDXEN_gfx6_gfx7 :
+      MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
+    def _OFFEN_gfx6_gfx7 :
+      MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
+    def _OFFSET_gfx6_gfx7 :
+      MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
+  }
+} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
+
+multiclass MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<bits<4> op> :
+  MTBUF_Real_AllAddr_gfx6_gfx7<op>, MTBUF_Real_AllAddr_gfx10<op>;
 
+defm TBUFFER_LOAD_FORMAT_X     : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x000>;
+defm TBUFFER_LOAD_FORMAT_XY    : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x001>;
+defm TBUFFER_LOAD_FORMAT_XYZ   : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x002>;
+defm TBUFFER_LOAD_FORMAT_XYZW  : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x003>;
+defm TBUFFER_STORE_FORMAT_X    : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x004>;
+defm TBUFFER_STORE_FORMAT_XY   : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x005>;
+defm TBUFFER_STORE_FORMAT_XYZ  : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x006>;
+defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>;
 
 //===----------------------------------------------------------------------===//
-// VI
+// GFX8, GFX9 (VI).
 //===----------------------------------------------------------------------===//
 
 class MUBUF_Real_vi <bits<7> op, MUBUF_Pseudo ps> :
-  MUBUF_Real<op, ps>,
+  MUBUF_Real<ps>,
   Enc64,
   SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> {
-  let AssemblerPredicate=isVI;
-  let DecoderNamespace="VI";
+  let AssemblerPredicate = isGFX8GFX9;
+  let DecoderNamespace = "GFX8";
 
   let Inst{11-0}  = !if(ps.has_offset, offset, ?);
   let Inst{12}    = ps.offen;
@@ -1878,7 +2130,7 @@ multiclass MUBUF_Real_AllAddr_Lds_vi<bits<7> op> {
 }
 
 class MUBUF_Real_gfx80 <bits<7> op, MUBUF_Pseudo ps> :
-  MUBUF_Real<op, ps>,
+  MUBUF_Real<ps>,
   Enc64,
   SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX80> {
   let AssemblerPredicate=HasUnpackedD16VMem;
@@ -2002,12 +2254,19 @@ def BUFFER_STORE_LDS_DWORD_vi   : MUBUF_Real_vi <0x3d, BUFFER_STORE_LDS_DWORD>;
 def BUFFER_WBINVL1_vi           : MUBUF_Real_vi <0x3e, BUFFER_WBINVL1>;
 def BUFFER_WBINVL1_VOL_vi       : MUBUF_Real_vi <0x3f, BUFFER_WBINVL1_VOL>;
 
+let SubtargetPredicate = HasAtomicFaddInsts in {
+
+defm BUFFER_ATOMIC_ADD_F32    : MUBUF_Real_AllAddr_vi <0x4d>;
+defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_AllAddr_vi <0x4e>;
+
+} // End SubtargetPredicate = HasAtomicFaddInsts
+
 class MTBUF_Real_vi <bits<4> op, MTBUF_Pseudo ps> :
   MTBUF_Real<ps>,
   Enc64,
   SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> {
-  let AssemblerPredicate=isVI;
-  let DecoderNamespace="VI";
+  let AssemblerPredicate = isGFX8GFX9;
+  let DecoderNamespace = "GFX8";
 
   let Inst{11-0}  = !if(ps.has_offset, offset, ?);
   let Inst{12}    = ps.offen;
diff --git a/lib/Target/AMDGPU/CaymanInstructions.td b/lib/Target/AMDGPU/CaymanInstructions.td
index ae40c6387982..1a526675164a 100644
--- a/lib/Target/AMDGPU/CaymanInstructions.td
+++ b/lib/Target/AMDGPU/CaymanInstructions.td
@@ -1,9 +1,8 @@
 //===-- CaymanInstructions.td - CM Instruction defs  -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/DSInstructions.td b/lib/Target/AMDGPU/DSInstructions.td
index 31d2ebef481d..c52eaaa3fdc5 100644
--- a/lib/Target/AMDGPU/DSInstructions.td
+++ b/lib/Target/AMDGPU/DSInstructions.td
@@ -1,9 +1,8 @@
 //===-- DSInstructions.td - DS Instruction Defintions ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -11,8 +10,6 @@ class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt
   InstSI <outs, ins, "", pattern>,
   SIMCInstr <opName, SIEncodingFamily.NONE> {
 
-  let SubtargetPredicate = isGCN;
-
   let LGKM_CNT = 1;
   let DS = 1;
   let Size = 8;
@@ -21,6 +18,7 @@ class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt
   // Most instruction load and store data, so set this as the default.
   let mayLoad = 1;
   let mayStore = 1;
+  let maybeAtomic = 1;
 
   let hasSideEffects = 0;
   let SchedRW = [WriteLDS];
@@ -40,6 +38,8 @@ class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt
   bits<1> has_data0 = 1;
   bits<1> has_data1 = 1;
 
+  bits<1> has_gws_data0 = 0; // data0 is encoded as addr
+
   bits<1> has_offset  = 1; // has "offset" that should be split to offset0,1
   bits<1> has_offset0 = 1;
   bits<1> has_offset1 = 1;
@@ -61,6 +61,7 @@ class DS_Real <DS_Pseudo ds> :
 
   // copy relevant pseudo op flags
   let SubtargetPredicate = ds.SubtargetPredicate;
+  let OtherPredicates = ds.OtherPredicates;
   let AsmMatchConverter  = ds.AsmMatchConverter;
 
   // encoding fields
@@ -322,7 +323,7 @@ class DS_GWS_1D <string opName>
 : DS_GWS<opName,
   (ins VGPR_32:$data0, offset:$offset, gds:$gds), "$data0$offset gds"> {
 
-  let has_data0 = 1;
+  let has_gws_data0 = 1;
 }
 
 class DS_VOID <string opName> : DS_Pseudo<opName,
@@ -469,11 +470,15 @@ defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b64", VReg_64>;
 defm DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>;
 defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>;
 
-def DS_GWS_INIT       : DS_GWS_1D<"ds_gws_init">;
+let isConvergent = 1, usesCustomInserter = 1 in {
+def DS_GWS_INIT       : DS_GWS_1D<"ds_gws_init"> {
+  let mayLoad = 0;
+}
 def DS_GWS_SEMA_V     : DS_GWS_0D<"ds_gws_sema_v">;
 def DS_GWS_SEMA_BR    : DS_GWS_1D<"ds_gws_sema_br">;
 def DS_GWS_SEMA_P     : DS_GWS_0D<"ds_gws_sema_p">;
 def DS_GWS_BARRIER    : DS_GWS_1D<"ds_gws_barrier">;
+}
 
 def DS_ADD_SRC2_U32   : DS_1A<"ds_add_src2_u32">;
 def DS_SUB_SRC2_U32   : DS_1A<"ds_sub_src2_u32">;
@@ -550,12 +555,14 @@ def DS_ORDERED_COUNT : DS_1A_RET_GDS<"ds_ordered_count">;
 // Instruction definitions for CI and newer.
 //===----------------------------------------------------------------------===//
 
-let SubtargetPredicate = isCIVI in {
+let SubtargetPredicate = isGFX7Plus in {
 
 defm DS_WRAP_RTN_B32 : DS_1A2D_RET_mc<"ds_wrap_rtn_b32", VGPR_32>;
 defm DS_CONDXCHG32_RTN_B64 : DS_1A1D_RET_mc<"ds_condxchg32_rtn_b64", VReg_64>;
 
+let isConvergent = 1, usesCustomInserter = 1 in {
 def DS_GWS_SEMA_RELEASE_ALL : DS_GWS_0D<"ds_gws_sema_release_all">;
+}
 
 let mayStore = 0 in {
 defm DS_READ_B96 : DS_1A_RET_mc<"ds_read_b96", VReg_96>;
@@ -569,13 +576,13 @@ defm DS_WRITE_B128 : DS_1A1D_NORET_mc<"ds_write_b128", VReg_128>;
 
 def DS_NOP : DS_VOID<"ds_nop">;
 
-} // let SubtargetPredicate = isCIVI
+} // let SubtargetPredicate = isGFX7Plus
 
 //===----------------------------------------------------------------------===//
 // Instruction definitions for VI and newer.
 //===----------------------------------------------------------------------===//
 
-let SubtargetPredicate = isVI in {
+let SubtargetPredicate = isGFX8Plus in {
 
 let Uses = [EXEC] in {
 def DS_PERMUTE_B32  : DS_1A1D_PERMUTE <"ds_permute_b32",
@@ -586,7 +593,7 @@ def DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <"ds_bpermute_b32",
 
 def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
 
-} // let SubtargetPredicate = isVI
+} // let SubtargetPredicate = isGFX8Plus
 
 //===----------------------------------------------------------------------===//
 // DS Patterns
@@ -597,9 +604,9 @@ def : GCNPat <
   (DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0))
 >;
 
-class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
+class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
   (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
-  (inst $ptr, (as_i16imm $offset), (i1 0))
+  (inst $ptr, (as_i16imm $offset), (i1 gds))
 >;
 
 multiclass DSReadPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
@@ -613,38 +620,21 @@ multiclass DSReadPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
   }
 }
 
-
-multiclass DSReadPat_Hi16 <DS_Pseudo inst, PatFrag frag, ValueType vt = i16> {
-  def : GCNPat <
-    (build_vector vt:$lo, (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset)))),
-    (v2i16 (inst $ptr, (as_i16imm $offset), (i1 0), $lo))
-  >;
-
-  def : GCNPat <
-    (build_vector f16:$lo, (f16 (bitconvert (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset)))))),
-    (v2f16 (inst $ptr, (as_i16imm $offset), (i1 0), $lo))
-  >;
-}
-
-multiclass DSReadPat_Lo16 <DS_Pseudo inst, PatFrag frag, ValueType vt = i16> {
-  def : GCNPat <
-    (build_vector (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))), (vt (Hi16Elt vt:$hi))),
-    (v2i16 (inst $ptr, (as_i16imm $offset), 0, $hi))
-  >;
-
-  def : GCNPat <
-    (build_vector (f16 (bitconvert (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))))), (f16 (Hi16Elt f16:$hi))),
-    (v2f16 (inst $ptr, (as_i16imm $offset), 0, $hi))
-  >;
-}
+class DSReadPat_D16 <DS_Pseudo inst, PatFrag frag, ValueType vt> : GCNPat <
+  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$in),
+  (inst $ptr, (as_i16imm $offset), (i1 0), $in)
+>;
 
 defm : DSReadPat_mc <DS_READ_I8, i32, "sextloadi8_local">;
-defm : DSReadPat_mc <DS_READ_U8,  i32, "az_extloadi8_local">;
 defm : DSReadPat_mc <DS_READ_I8,  i16, "sextloadi8_local">;
-defm : DSReadPat_mc <DS_READ_U8,  i16, "az_extloadi8_local">;
+defm : DSReadPat_mc <DS_READ_U8,  i32, "extloadi8_local">;
+defm : DSReadPat_mc <DS_READ_U8,  i32, "zextloadi8_local">;
+defm : DSReadPat_mc <DS_READ_U8,  i16, "extloadi8_local">;
+defm : DSReadPat_mc <DS_READ_U8,  i16, "zextloadi8_local">;
 defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
 defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
-defm : DSReadPat_mc <DS_READ_U16, i32, "az_extloadi16_local">;
+defm : DSReadPat_mc <DS_READ_U16, i32, "extloadi16_local">;
+defm : DSReadPat_mc <DS_READ_U16, i32, "zextloadi16_local">;
 defm : DSReadPat_mc <DS_READ_U16, i16, "load_local">;
 defm : DSReadPat_mc <DS_READ_B32, i32, "load_local">;
 defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
@@ -658,21 +648,24 @@ defm : DSReadPat_mc <DS_READ_B128, v4i32, "load_align16_local">;
 } // End AddedComplexity = 100
 
 let OtherPredicates = [D16PreservesUnusedBits] in {
-let AddedComplexity = 100 in {
-defm : DSReadPat_Hi16<DS_READ_U16_D16_HI, load_local>;
-defm : DSReadPat_Hi16<DS_READ_U8_D16_HI, az_extloadi8_local>;
-defm : DSReadPat_Hi16<DS_READ_I8_D16_HI, sextloadi8_local>;
-
-defm : DSReadPat_Lo16<DS_READ_U16_D16, load_local>;
-defm : DSReadPat_Lo16<DS_READ_U8_D16, az_extloadi8_local>;
-defm : DSReadPat_Lo16<DS_READ_I8_D16, sextloadi8_local>;
-
-}
+def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2i16>;
+def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2f16>;
+def : DSReadPat_D16<DS_READ_U8_D16_HI, az_extloadi8_d16_hi_local, v2i16>;
+def : DSReadPat_D16<DS_READ_U8_D16_HI, az_extloadi8_d16_hi_local, v2f16>;
+def : DSReadPat_D16<DS_READ_I8_D16_HI, sextloadi8_d16_hi_local, v2i16>;
+def : DSReadPat_D16<DS_READ_I8_D16_HI, sextloadi8_d16_hi_local, v2f16>;
+
+def : DSReadPat_D16<DS_READ_U16_D16, load_d16_lo_local, v2i16>;
+def : DSReadPat_D16<DS_READ_U16_D16, load_d16_lo_local, v2f16>;
+def : DSReadPat_D16<DS_READ_U8_D16, az_extloadi8_d16_lo_local, v2i16>;
+def : DSReadPat_D16<DS_READ_U8_D16, az_extloadi8_d16_lo_local, v2f16>;
+def : DSReadPat_D16<DS_READ_I8_D16, sextloadi8_d16_lo_local, v2i16>;
+def : DSReadPat_D16<DS_READ_I8_D16, sextloadi8_d16_lo_local, v2f16>;
 }
 
-class DSWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
+class DSWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
   (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
-  (inst $ptr, $value, (as_i16imm $offset), (i1 0))
+  (inst $ptr, $value, (as_i16imm $offset), (i1 gds))
 >;
 
 multiclass DSWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
@@ -730,7 +723,7 @@ class DS64Bit4ByteAlignedWritePat<DS_Pseudo inst, PatFrag frag> : GCNPat<
 
 // v2i32 loads are split into i32 loads on SI during lowering, due to a bug
 // related to bounds checking.
-let OtherPredicates = [LDSRequiresM0Init, isCIVI] in {
+let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in {
 def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32, load_local_m0>;
 def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32, store_local_m0>;
 }
@@ -747,260 +740,313 @@ defm : DSWritePat_mc <DS_WRITE_B64, v2i32, "store_align8_local">;
 defm : DSWritePat_mc <DS_WRITE_B128, v4i32, "store_align16_local">;
 
 } // End AddedComplexity = 100
-class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
+class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat <
   (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
-  (inst $ptr, $value, (as_i16imm $offset), (i1 0))
+  (inst $ptr, $value, (as_i16imm $offset), (i1 gds))
 >;
 
 multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
   let OtherPredicates = [LDSRequiresM0Init] in {
-    def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
+    def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_local_m0")>;
   }
 
   let OtherPredicates = [NotLDSRequiresM0Init] in {
     def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
-                         !cast<PatFrag>(frag)>;
+                         !cast<PatFrag>(frag#"_local")>;
   }
+
+  def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0"), 1>;
 }
 
 
 
-class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
+class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat <
   (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
-  (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 0))
+  (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 gds))
 >;
 
 multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, ValueType vt, string frag> {
   let OtherPredicates = [LDSRequiresM0Init] in {
-    def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_m0")>;
+    def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_local_m0")>;
   }
 
   let OtherPredicates = [NotLDSRequiresM0Init] in {
     def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
-                          !cast<PatFrag>(frag)>;
+                          !cast<PatFrag>(frag#"_local")>;
   }
+
+  def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0"), 1>;
 }
 
 
 
 // 32-bit atomics.
-defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B32, i32, "atomic_swap_local">;
-defm : DSAtomicRetPat_mc<DS_ADD_RTN_U32, i32, "atomic_load_add_local">;
-defm : DSAtomicRetPat_mc<DS_SUB_RTN_U32, i32, "atomic_load_sub_local">;
-defm : DSAtomicRetPat_mc<DS_INC_RTN_U32, i32, "atomic_inc_local">;
-defm : DSAtomicRetPat_mc<DS_DEC_RTN_U32, i32, "atomic_dec_local">;
-defm : DSAtomicRetPat_mc<DS_AND_RTN_B32, i32, "atomic_load_and_local">;
-defm : DSAtomicRetPat_mc<DS_OR_RTN_B32, i32, "atomic_load_or_local">;
-defm : DSAtomicRetPat_mc<DS_XOR_RTN_B32, i32, "atomic_load_xor_local">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_I32, i32, "atomic_load_min_local">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_I32, i32, "atomic_load_max_local">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_U32, i32, "atomic_load_umin_local">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_U32, i32, "atomic_load_umax_local">;
-defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B32, i32, "atomic_cmp_swap_local">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_F32, f32, "atomic_load_fmin_local">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_F32, f32, "atomic_load_fmax_local">;
-defm : DSAtomicRetPat_mc<DS_ADD_RTN_F32, f32, "atomic_load_fadd_local">;
+defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B32, i32, "atomic_swap">;
+defm : DSAtomicRetPat_mc<DS_ADD_RTN_U32, i32, "atomic_load_add">;
+defm : DSAtomicRetPat_mc<DS_SUB_RTN_U32, i32, "atomic_load_sub">;
+defm : DSAtomicRetPat_mc<DS_INC_RTN_U32, i32, "atomic_inc">;
+defm : DSAtomicRetPat_mc<DS_DEC_RTN_U32, i32, "atomic_dec">;
+defm : DSAtomicRetPat_mc<DS_AND_RTN_B32, i32, "atomic_load_and">;
+defm : DSAtomicRetPat_mc<DS_OR_RTN_B32, i32, "atomic_load_or">;
+defm : DSAtomicRetPat_mc<DS_XOR_RTN_B32, i32, "atomic_load_xor">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_I32, i32, "atomic_load_min">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_I32, i32, "atomic_load_max">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_U32, i32, "atomic_load_umin">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_U32, i32, "atomic_load_umax">;
+defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B32, i32, "atomic_cmp_swap">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_F32, f32, "atomic_load_fmin">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_F32, f32, "atomic_load_fmax">;
+defm : DSAtomicRetPat_mc<DS_ADD_RTN_F32, f32, "atomic_load_fadd">;
 
 // 64-bit atomics.
-defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap_local">;
-defm : DSAtomicRetPat_mc<DS_ADD_RTN_U64, i64, "atomic_load_add_local">;
-defm : DSAtomicRetPat_mc<DS_SUB_RTN_U64, i64, "atomic_load_sub_local">;
-defm : DSAtomicRetPat_mc<DS_INC_RTN_U64, i64, "atomic_inc_local">;
-defm : DSAtomicRetPat_mc<DS_DEC_RTN_U64, i64, "atomic_dec_local">;
-defm : DSAtomicRetPat_mc<DS_AND_RTN_B64, i64, "atomic_load_and_local">;
-defm : DSAtomicRetPat_mc<DS_OR_RTN_B64, i64, "atomic_load_or_local">;
-defm : DSAtomicRetPat_mc<DS_XOR_RTN_B64, i64, "atomic_load_xor_local">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_I64, i64, "atomic_load_min_local">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_I64, i64, "atomic_load_max_local">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_U64, i64, "atomic_load_umin_local">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_U64, i64, "atomic_load_umax_local">;
-
-defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B64, i64, "atomic_cmp_swap_local">;
+defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap">;
+defm : DSAtomicRetPat_mc<DS_ADD_RTN_U64, i64, "atomic_load_add">;
+defm : DSAtomicRetPat_mc<DS_SUB_RTN_U64, i64, "atomic_load_sub">;
+defm : DSAtomicRetPat_mc<DS_INC_RTN_U64, i64, "atomic_inc">;
+defm : DSAtomicRetPat_mc<DS_DEC_RTN_U64, i64, "atomic_dec">;
+defm : DSAtomicRetPat_mc<DS_AND_RTN_B64, i64, "atomic_load_and">;
+defm : DSAtomicRetPat_mc<DS_OR_RTN_B64, i64, "atomic_load_or">;
+defm : DSAtomicRetPat_mc<DS_XOR_RTN_B64, i64, "atomic_load_xor">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_I64, i64, "atomic_load_min">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_I64, i64, "atomic_load_max">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_U64, i64, "atomic_load_umin">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_U64, i64, "atomic_load_umax">;
+
+defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B64, i64, "atomic_cmp_swap">;
+
+def : Pat <
+  (SIds_ordered_count i32:$value, i16:$offset),
+  (DS_ORDERED_COUNT $value, (as_i16imm $offset))
+>;
 
 //===----------------------------------------------------------------------===//
-// Real instructions
+// Target-specific instruction encodings.
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-// SIInstructions.td
+// Base ENC_DS for GFX6, GFX7, GFX10.
 //===----------------------------------------------------------------------===//
 
-class DS_Real_si <bits<8> op, DS_Pseudo ds> :
-  DS_Real <ds>,
-  SIMCInstr <ds.Mnemonic, SIEncodingFamily.SI> {
-  let AssemblerPredicates=[isSICI];
-  let DecoderNamespace="SICI";
+class Base_DS_Real_gfx6_gfx7_gfx10<bits<8> op, DS_Pseudo ps, int ef> :
+    DS_Real<ps>, SIMCInstr <ps.Mnemonic, ef> {
 
-  // encoding
-  let Inst{7-0}   = !if(ds.has_offset0, offset0, 0);
-  let Inst{15-8}  = !if(ds.has_offset1, offset1, 0);
-  let Inst{17}    = !if(ds.has_gds, gds, ds.gdsValue);
+  let Inst{7-0}   = !if(ps.has_offset0, offset0, 0);
+  let Inst{15-8}  = !if(ps.has_offset1, offset1, 0);
+  let Inst{17}    = !if(ps.has_gds, gds, ps.gdsValue);
   let Inst{25-18} = op;
-  let Inst{31-26} = 0x36; // ds prefix
-  let Inst{39-32} = !if(ds.has_addr, addr, 0);
-  let Inst{47-40} = !if(ds.has_data0, data0, 0);
-  let Inst{55-48} = !if(ds.has_data1, data1, 0);
-  let Inst{63-56} = !if(ds.has_vdst, vdst, 0);
+  let Inst{31-26} = 0x36;
+  let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0, 0));
+  let Inst{47-40} = !if(ps.has_data0, data0, 0);
+  let Inst{55-48} = !if(ps.has_data1, data1, 0);
+  let Inst{63-56} = !if(ps.has_vdst, vdst, 0);
 }
 
-def DS_ADD_U32_si         : DS_Real_si<0x0,  DS_ADD_U32>;
-def DS_SUB_U32_si         : DS_Real_si<0x1,  DS_SUB_U32>;
-def DS_RSUB_U32_si        : DS_Real_si<0x2,  DS_RSUB_U32>;
-def DS_INC_U32_si         : DS_Real_si<0x3,  DS_INC_U32>;
-def DS_DEC_U32_si         : DS_Real_si<0x4,  DS_DEC_U32>;
-def DS_MIN_I32_si         : DS_Real_si<0x5,  DS_MIN_I32>;
-def DS_MAX_I32_si         : DS_Real_si<0x6,  DS_MAX_I32>;
-def DS_MIN_U32_si         : DS_Real_si<0x7,  DS_MIN_U32>;
-def DS_MAX_U32_si         : DS_Real_si<0x8,  DS_MAX_U32>;
-def DS_AND_B32_si         : DS_Real_si<0x9,  DS_AND_B32>;
-def DS_OR_B32_si          : DS_Real_si<0xa,  DS_OR_B32>;
-def DS_XOR_B32_si         : DS_Real_si<0xb,  DS_XOR_B32>;
-def DS_MSKOR_B32_si       : DS_Real_si<0xc,  DS_MSKOR_B32>;
-def DS_WRITE_B32_si       : DS_Real_si<0xd,  DS_WRITE_B32>;
-def DS_WRITE2_B32_si      : DS_Real_si<0xe,  DS_WRITE2_B32>;
-def DS_WRITE2ST64_B32_si  : DS_Real_si<0xf,  DS_WRITE2ST64_B32>;
-def DS_CMPST_B32_si       : DS_Real_si<0x10, DS_CMPST_B32>;
-def DS_CMPST_F32_si       : DS_Real_si<0x11, DS_CMPST_F32>;
-def DS_MIN_F32_si         : DS_Real_si<0x12, DS_MIN_F32>;
-def DS_MAX_F32_si         : DS_Real_si<0x13, DS_MAX_F32>;
-def DS_NOP_si             : DS_Real_si<0x14, DS_NOP>;
-def DS_GWS_INIT_si        : DS_Real_si<0x19, DS_GWS_INIT>;
-def DS_GWS_SEMA_V_si      : DS_Real_si<0x1a, DS_GWS_SEMA_V>;
-def DS_GWS_SEMA_BR_si     : DS_Real_si<0x1b, DS_GWS_SEMA_BR>;
-def DS_GWS_SEMA_P_si      : DS_Real_si<0x1c, DS_GWS_SEMA_P>;
-def DS_GWS_BARRIER_si     : DS_Real_si<0x1d, DS_GWS_BARRIER>;
-def DS_WRITE_B8_si        : DS_Real_si<0x1e, DS_WRITE_B8>;
-def DS_WRITE_B16_si       : DS_Real_si<0x1f, DS_WRITE_B16>;
-def DS_ADD_RTN_U32_si     : DS_Real_si<0x20, DS_ADD_RTN_U32>;
-def DS_SUB_RTN_U32_si     : DS_Real_si<0x21, DS_SUB_RTN_U32>;
-def DS_RSUB_RTN_U32_si    : DS_Real_si<0x22, DS_RSUB_RTN_U32>;
-def DS_INC_RTN_U32_si     : DS_Real_si<0x23, DS_INC_RTN_U32>;
-def DS_DEC_RTN_U32_si     : DS_Real_si<0x24, DS_DEC_RTN_U32>;
-def DS_MIN_RTN_I32_si     : DS_Real_si<0x25, DS_MIN_RTN_I32>;
-def DS_MAX_RTN_I32_si     : DS_Real_si<0x26, DS_MAX_RTN_I32>;
-def DS_MIN_RTN_U32_si     : DS_Real_si<0x27, DS_MIN_RTN_U32>;
-def DS_MAX_RTN_U32_si     : DS_Real_si<0x28, DS_MAX_RTN_U32>;
-def DS_AND_RTN_B32_si     : DS_Real_si<0x29, DS_AND_RTN_B32>;
-def DS_OR_RTN_B32_si      : DS_Real_si<0x2a, DS_OR_RTN_B32>;
-def DS_XOR_RTN_B32_si     : DS_Real_si<0x2b, DS_XOR_RTN_B32>;
-def DS_MSKOR_RTN_B32_si   : DS_Real_si<0x2c, DS_MSKOR_RTN_B32>;
-def DS_WRXCHG_RTN_B32_si  : DS_Real_si<0x2d, DS_WRXCHG_RTN_B32>;
-def DS_WRXCHG2_RTN_B32_si : DS_Real_si<0x2e, DS_WRXCHG2_RTN_B32>;
-def DS_WRXCHG2ST64_RTN_B32_si : DS_Real_si<0x2f, DS_WRXCHG2ST64_RTN_B32>;
-def DS_CMPST_RTN_B32_si   : DS_Real_si<0x30, DS_CMPST_RTN_B32>;
-def DS_CMPST_RTN_F32_si   : DS_Real_si<0x31, DS_CMPST_RTN_F32>;
-def DS_MIN_RTN_F32_si     : DS_Real_si<0x32, DS_MIN_RTN_F32>;
-def DS_MAX_RTN_F32_si     : DS_Real_si<0x33, DS_MAX_RTN_F32>;
-
-// These instruction are CI/VI only
-def DS_WRAP_RTN_B32_si    : DS_Real_si<0x34, DS_WRAP_RTN_B32>;
-def DS_CONDXCHG32_RTN_B64_si   : DS_Real_si<0x7e, DS_CONDXCHG32_RTN_B64>;
-def DS_GWS_SEMA_RELEASE_ALL_si : DS_Real_si<0x18, DS_GWS_SEMA_RELEASE_ALL>;
-
-def DS_SWIZZLE_B32_si     : DS_Real_si<0x35, DS_SWIZZLE_B32>;
-def DS_READ_B32_si        : DS_Real_si<0x36, DS_READ_B32>;
-def DS_READ2_B32_si       : DS_Real_si<0x37, DS_READ2_B32>;
-def DS_READ2ST64_B32_si   : DS_Real_si<0x38, DS_READ2ST64_B32>;
-def DS_READ_I8_si         : DS_Real_si<0x39, DS_READ_I8>;
-def DS_READ_U8_si         : DS_Real_si<0x3a, DS_READ_U8>;
-def DS_READ_I16_si        : DS_Real_si<0x3b, DS_READ_I16>;
-def DS_READ_U16_si        : DS_Real_si<0x3c, DS_READ_U16>;
-def DS_CONSUME_si         : DS_Real_si<0x3d, DS_CONSUME>;
-def DS_APPEND_si          : DS_Real_si<0x3e, DS_APPEND>;
-def DS_ORDERED_COUNT_si   : DS_Real_si<0x3f, DS_ORDERED_COUNT>;
-def DS_ADD_U64_si         : DS_Real_si<0x40, DS_ADD_U64>;
-def DS_SUB_U64_si         : DS_Real_si<0x41, DS_SUB_U64>;
-def DS_RSUB_U64_si        : DS_Real_si<0x42, DS_RSUB_U64>;
-def DS_INC_U64_si         : DS_Real_si<0x43, DS_INC_U64>;
-def DS_DEC_U64_si         : DS_Real_si<0x44, DS_DEC_U64>;
-def DS_MIN_I64_si         : DS_Real_si<0x45, DS_MIN_I64>;
-def DS_MAX_I64_si         : DS_Real_si<0x46, DS_MAX_I64>;
-def DS_MIN_U64_si         : DS_Real_si<0x47, DS_MIN_U64>;
-def DS_MAX_U64_si         : DS_Real_si<0x48, DS_MAX_U64>;
-def DS_AND_B64_si         : DS_Real_si<0x49, DS_AND_B64>;
-def DS_OR_B64_si          : DS_Real_si<0x4a, DS_OR_B64>;
-def DS_XOR_B64_si         : DS_Real_si<0x4b, DS_XOR_B64>;
-def DS_MSKOR_B64_si       : DS_Real_si<0x4c, DS_MSKOR_B64>;
-def DS_WRITE_B64_si       : DS_Real_si<0x4d, DS_WRITE_B64>;
-def DS_WRITE2_B64_si      : DS_Real_si<0x4E, DS_WRITE2_B64>;
-def DS_WRITE2ST64_B64_si  : DS_Real_si<0x4f, DS_WRITE2ST64_B64>;
-def DS_CMPST_B64_si       : DS_Real_si<0x50, DS_CMPST_B64>;
-def DS_CMPST_F64_si       : DS_Real_si<0x51, DS_CMPST_F64>;
-def DS_MIN_F64_si         : DS_Real_si<0x52, DS_MIN_F64>;
-def DS_MAX_F64_si         : DS_Real_si<0x53, DS_MAX_F64>;
-
-def DS_ADD_RTN_U64_si     : DS_Real_si<0x60, DS_ADD_RTN_U64>;
-def DS_SUB_RTN_U64_si     : DS_Real_si<0x61, DS_SUB_RTN_U64>;
-def DS_RSUB_RTN_U64_si    : DS_Real_si<0x62, DS_RSUB_RTN_U64>;
-def DS_INC_RTN_U64_si     : DS_Real_si<0x63, DS_INC_RTN_U64>;
-def DS_DEC_RTN_U64_si     : DS_Real_si<0x64, DS_DEC_RTN_U64>;
-def DS_MIN_RTN_I64_si     : DS_Real_si<0x65, DS_MIN_RTN_I64>;
-def DS_MAX_RTN_I64_si     : DS_Real_si<0x66, DS_MAX_RTN_I64>;
-def DS_MIN_RTN_U64_si     : DS_Real_si<0x67, DS_MIN_RTN_U64>;
-def DS_MAX_RTN_U64_si     : DS_Real_si<0x68, DS_MAX_RTN_U64>;
-def DS_AND_RTN_B64_si     : DS_Real_si<0x69, DS_AND_RTN_B64>;
-def DS_OR_RTN_B64_si      : DS_Real_si<0x6a, DS_OR_RTN_B64>;
-def DS_XOR_RTN_B64_si     : DS_Real_si<0x6b, DS_XOR_RTN_B64>;
-def DS_MSKOR_RTN_B64_si   : DS_Real_si<0x6c, DS_MSKOR_RTN_B64>;
-def DS_WRXCHG_RTN_B64_si  : DS_Real_si<0x6d, DS_WRXCHG_RTN_B64>;
-def DS_WRXCHG2_RTN_B64_si : DS_Real_si<0x6e, DS_WRXCHG2_RTN_B64>;
-def DS_WRXCHG2ST64_RTN_B64_si : DS_Real_si<0x6f, DS_WRXCHG2ST64_RTN_B64>;
-def DS_CMPST_RTN_B64_si   : DS_Real_si<0x70, DS_CMPST_RTN_B64>;
-def DS_CMPST_RTN_F64_si   : DS_Real_si<0x71, DS_CMPST_RTN_F64>;
-def DS_MIN_RTN_F64_si     : DS_Real_si<0x72, DS_MIN_RTN_F64>;
-def DS_MAX_RTN_F64_si     : DS_Real_si<0x73, DS_MAX_RTN_F64>;
-
-def DS_READ_B64_si        : DS_Real_si<0x76, DS_READ_B64>;
-def DS_READ2_B64_si       : DS_Real_si<0x77, DS_READ2_B64>;
-def DS_READ2ST64_B64_si   : DS_Real_si<0x78, DS_READ2ST64_B64>;
-
-def DS_ADD_SRC2_U32_si    : DS_Real_si<0x80, DS_ADD_SRC2_U32>;
-def DS_SUB_SRC2_U32_si    : DS_Real_si<0x81, DS_SUB_SRC2_U32>;
-def DS_RSUB_SRC2_U32_si   : DS_Real_si<0x82, DS_RSUB_SRC2_U32>;
-def DS_INC_SRC2_U32_si    : DS_Real_si<0x83, DS_INC_SRC2_U32>;
-def DS_DEC_SRC2_U32_si    : DS_Real_si<0x84, DS_DEC_SRC2_U32>;
-def DS_MIN_SRC2_I32_si    : DS_Real_si<0x85, DS_MIN_SRC2_I32>;
-def DS_MAX_SRC2_I32_si    : DS_Real_si<0x86, DS_MAX_SRC2_I32>;
-def DS_MIN_SRC2_U32_si    : DS_Real_si<0x87, DS_MIN_SRC2_U32>;
-def DS_MAX_SRC2_U32_si    : DS_Real_si<0x88, DS_MAX_SRC2_U32>;
-def DS_AND_SRC2_B32_si    : DS_Real_si<0x89, DS_AND_SRC2_B32>;
-def DS_OR_SRC2_B32_si     : DS_Real_si<0x8a, DS_OR_SRC2_B32>;
-def DS_XOR_SRC2_B32_si    : DS_Real_si<0x8b, DS_XOR_SRC2_B32>;
-def DS_WRITE_SRC2_B32_si  : DS_Real_si<0x8d, DS_WRITE_SRC2_B32>;
-
-def DS_MIN_SRC2_F32_si    : DS_Real_si<0x92, DS_MIN_SRC2_F32>;
-def DS_MAX_SRC2_F32_si    : DS_Real_si<0x93, DS_MAX_SRC2_F32>;
-
-def DS_ADD_SRC2_U64_si    : DS_Real_si<0xc0, DS_ADD_SRC2_U64>;
-def DS_SUB_SRC2_U64_si    : DS_Real_si<0xc1, DS_SUB_SRC2_U64>;
-def DS_RSUB_SRC2_U64_si   : DS_Real_si<0xc2, DS_RSUB_SRC2_U64>;
-def DS_INC_SRC2_U64_si    : DS_Real_si<0xc3, DS_INC_SRC2_U64>;
-def DS_DEC_SRC2_U64_si    : DS_Real_si<0xc4, DS_DEC_SRC2_U64>;
-def DS_MIN_SRC2_I64_si    : DS_Real_si<0xc5, DS_MIN_SRC2_I64>;
-def DS_MAX_SRC2_I64_si    : DS_Real_si<0xc6, DS_MAX_SRC2_I64>;
-def DS_MIN_SRC2_U64_si    : DS_Real_si<0xc7, DS_MIN_SRC2_U64>;
-def DS_MAX_SRC2_U64_si    : DS_Real_si<0xc8, DS_MAX_SRC2_U64>;
-def DS_AND_SRC2_B64_si    : DS_Real_si<0xc9, DS_AND_SRC2_B64>;
-def DS_OR_SRC2_B64_si     : DS_Real_si<0xca, DS_OR_SRC2_B64>;
-def DS_XOR_SRC2_B64_si    : DS_Real_si<0xcb, DS_XOR_SRC2_B64>;
-def DS_WRITE_SRC2_B64_si  : DS_Real_si<0xcd, DS_WRITE_SRC2_B64>;
-
-def DS_MIN_SRC2_F64_si    : DS_Real_si<0xd2, DS_MIN_SRC2_F64>;
-def DS_MAX_SRC2_F64_si    : DS_Real_si<0xd3, DS_MAX_SRC2_F64>;
-def DS_WRITE_B96_si       : DS_Real_si<0xde, DS_WRITE_B96>;
-def DS_WRITE_B128_si      : DS_Real_si<0xdf, DS_WRITE_B128>;
-def DS_READ_B96_si        : DS_Real_si<0xfe, DS_READ_B96>;
-def DS_READ_B128_si       : DS_Real_si<0xff, DS_READ_B128>;
+//===----------------------------------------------------------------------===//
+// GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+  multiclass DS_Real_gfx10<bits<8> op>  {
+    def _gfx10 : Base_DS_Real_gfx6_gfx7_gfx10<op, !cast<DS_Pseudo>(NAME),
+                                              SIEncodingFamily.GFX10>;
+  }
+} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+
+defm DS_ADD_F32          : DS_Real_gfx10<0x015>;
+defm DS_ADD_RTN_F32      : DS_Real_gfx10<0x055>;
+defm DS_ADD_SRC2_F32     : DS_Real_gfx10<0x095>;
+defm DS_WRITE_B8_D16_HI  : DS_Real_gfx10<0x0a0>;
+defm DS_WRITE_B16_D16_HI : DS_Real_gfx10<0x0a1>;
+defm DS_READ_U8_D16      : DS_Real_gfx10<0x0a2>;
+defm DS_READ_U8_D16_HI   : DS_Real_gfx10<0x0a3>;
+defm DS_READ_I8_D16      : DS_Real_gfx10<0x0a4>;
+defm DS_READ_I8_D16_HI   : DS_Real_gfx10<0x0a5>;
+defm DS_READ_U16_D16     : DS_Real_gfx10<0x0a6>;
+defm DS_READ_U16_D16_HI  : DS_Real_gfx10<0x0a7>;
+defm DS_WRITE_ADDTID_B32 : DS_Real_gfx10<0x0b0>;
+defm DS_READ_ADDTID_B32  : DS_Real_gfx10<0x0b1>;
+defm DS_PERMUTE_B32      : DS_Real_gfx10<0x0b2>;
+defm DS_BPERMUTE_B32     : DS_Real_gfx10<0x0b3>;
+
+//===----------------------------------------------------------------------===//
+// GFX7, GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
+  multiclass DS_Real_gfx7<bits<8> op> {
+    def _gfx7 : Base_DS_Real_gfx6_gfx7_gfx10<op, !cast<DS_Pseudo>(NAME),
+                                             SIEncodingFamily.SI>;
+  }
+} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
+
+multiclass DS_Real_gfx7_gfx10<bits<8> op> :
+  DS_Real_gfx7<op>, DS_Real_gfx10<op>;
+
+// FIXME-GFX7: Add tests when upstreaming this part.
+defm DS_GWS_SEMA_RELEASE_ALL : DS_Real_gfx7_gfx10<0x018>;
+defm DS_WRAP_RTN_B32         : DS_Real_gfx7_gfx10<0x034>;
+defm DS_CONDXCHG32_RTN_B64   : DS_Real_gfx7_gfx10<0x07e>;
+defm DS_WRITE_B96            : DS_Real_gfx7_gfx10<0x0de>;
+defm DS_WRITE_B128           : DS_Real_gfx7_gfx10<0x0df>;
+defm DS_READ_B96             : DS_Real_gfx7_gfx10<0x0fe>;
+defm DS_READ_B128            : DS_Real_gfx7_gfx10<0x0ff>;
+
+//===----------------------------------------------------------------------===//
+// GFX6, GFX7, GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
+  multiclass DS_Real_gfx6_gfx7<bits<8> op> {
+    def _gfx6_gfx7 : Base_DS_Real_gfx6_gfx7_gfx10<op, !cast<DS_Pseudo>(NAME),
+                                                  SIEncodingFamily.SI>;
+  }
+} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
+
+multiclass DS_Real_gfx6_gfx7_gfx10<bits<8> op> :
+  DS_Real_gfx6_gfx7<op>, DS_Real_gfx10<op>;
+
+defm DS_ADD_U32             : DS_Real_gfx6_gfx7_gfx10<0x000>;
+defm DS_SUB_U32             : DS_Real_gfx6_gfx7_gfx10<0x001>;
+defm DS_RSUB_U32            : DS_Real_gfx6_gfx7_gfx10<0x002>;
+defm DS_INC_U32             : DS_Real_gfx6_gfx7_gfx10<0x003>;
+defm DS_DEC_U32             : DS_Real_gfx6_gfx7_gfx10<0x004>;
+defm DS_MIN_I32             : DS_Real_gfx6_gfx7_gfx10<0x005>;
+defm DS_MAX_I32             : DS_Real_gfx6_gfx7_gfx10<0x006>;
+defm DS_MIN_U32             : DS_Real_gfx6_gfx7_gfx10<0x007>;
+defm DS_MAX_U32             : DS_Real_gfx6_gfx7_gfx10<0x008>;
+defm DS_AND_B32             : DS_Real_gfx6_gfx7_gfx10<0x009>;
+defm DS_OR_B32              : DS_Real_gfx6_gfx7_gfx10<0x00a>;
+defm DS_XOR_B32             : DS_Real_gfx6_gfx7_gfx10<0x00b>;
+defm DS_MSKOR_B32           : DS_Real_gfx6_gfx7_gfx10<0x00c>;
+defm DS_WRITE_B32           : DS_Real_gfx6_gfx7_gfx10<0x00d>;
+defm DS_WRITE2_B32          : DS_Real_gfx6_gfx7_gfx10<0x00e>;
+defm DS_WRITE2ST64_B32      : DS_Real_gfx6_gfx7_gfx10<0x00f>;
+defm DS_CMPST_B32           : DS_Real_gfx6_gfx7_gfx10<0x010>;
+defm DS_CMPST_F32           : DS_Real_gfx6_gfx7_gfx10<0x011>;
+defm DS_MIN_F32             : DS_Real_gfx6_gfx7_gfx10<0x012>;
+defm DS_MAX_F32             : DS_Real_gfx6_gfx7_gfx10<0x013>;
+defm DS_NOP                 : DS_Real_gfx6_gfx7_gfx10<0x014>;
+defm DS_GWS_INIT            : DS_Real_gfx6_gfx7_gfx10<0x019>;
+defm DS_GWS_SEMA_V          : DS_Real_gfx6_gfx7_gfx10<0x01a>;
+defm DS_GWS_SEMA_BR         : DS_Real_gfx6_gfx7_gfx10<0x01b>;
+defm DS_GWS_SEMA_P          : DS_Real_gfx6_gfx7_gfx10<0x01c>;
+defm DS_GWS_BARRIER         : DS_Real_gfx6_gfx7_gfx10<0x01d>;
+defm DS_WRITE_B8            : DS_Real_gfx6_gfx7_gfx10<0x01e>;
+defm DS_WRITE_B16           : DS_Real_gfx6_gfx7_gfx10<0x01f>;
+defm DS_ADD_RTN_U32         : DS_Real_gfx6_gfx7_gfx10<0x020>;
+defm DS_SUB_RTN_U32         : DS_Real_gfx6_gfx7_gfx10<0x021>;
+defm DS_RSUB_RTN_U32        : DS_Real_gfx6_gfx7_gfx10<0x022>;
+defm DS_INC_RTN_U32         : DS_Real_gfx6_gfx7_gfx10<0x023>;
+defm DS_DEC_RTN_U32         : DS_Real_gfx6_gfx7_gfx10<0x024>;
+defm DS_MIN_RTN_I32         : DS_Real_gfx6_gfx7_gfx10<0x025>;
+defm DS_MAX_RTN_I32         : DS_Real_gfx6_gfx7_gfx10<0x026>;
+defm DS_MIN_RTN_U32         : DS_Real_gfx6_gfx7_gfx10<0x027>;
+defm DS_MAX_RTN_U32         : DS_Real_gfx6_gfx7_gfx10<0x028>;
+defm DS_AND_RTN_B32         : DS_Real_gfx6_gfx7_gfx10<0x029>;
+defm DS_OR_RTN_B32          : DS_Real_gfx6_gfx7_gfx10<0x02a>;
+defm DS_XOR_RTN_B32         : DS_Real_gfx6_gfx7_gfx10<0x02b>;
+defm DS_MSKOR_RTN_B32       : DS_Real_gfx6_gfx7_gfx10<0x02c>;
+defm DS_WRXCHG_RTN_B32      : DS_Real_gfx6_gfx7_gfx10<0x02d>;
+defm DS_WRXCHG2_RTN_B32     : DS_Real_gfx6_gfx7_gfx10<0x02e>;
+defm DS_WRXCHG2ST64_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02f>;
+defm DS_CMPST_RTN_B32       : DS_Real_gfx6_gfx7_gfx10<0x030>;
+defm DS_CMPST_RTN_F32       : DS_Real_gfx6_gfx7_gfx10<0x031>;
+defm DS_MIN_RTN_F32         : DS_Real_gfx6_gfx7_gfx10<0x032>;
+defm DS_MAX_RTN_F32         : DS_Real_gfx6_gfx7_gfx10<0x033>;
+defm DS_SWIZZLE_B32         : DS_Real_gfx6_gfx7_gfx10<0x035>;
+defm DS_READ_B32            : DS_Real_gfx6_gfx7_gfx10<0x036>;
+defm DS_READ2_B32           : DS_Real_gfx6_gfx7_gfx10<0x037>;
+defm DS_READ2ST64_B32       : DS_Real_gfx6_gfx7_gfx10<0x038>;
+defm DS_READ_I8             : DS_Real_gfx6_gfx7_gfx10<0x039>;
+defm DS_READ_U8             : DS_Real_gfx6_gfx7_gfx10<0x03a>;
+defm DS_READ_I16            : DS_Real_gfx6_gfx7_gfx10<0x03b>;
+defm DS_READ_U16            : DS_Real_gfx6_gfx7_gfx10<0x03c>;
+defm DS_CONSUME             : DS_Real_gfx6_gfx7_gfx10<0x03d>;
+defm DS_APPEND              : DS_Real_gfx6_gfx7_gfx10<0x03e>;
+defm DS_ORDERED_COUNT       : DS_Real_gfx6_gfx7_gfx10<0x03f>;
+defm DS_ADD_U64             : DS_Real_gfx6_gfx7_gfx10<0x040>;
+defm DS_SUB_U64             : DS_Real_gfx6_gfx7_gfx10<0x041>;
+defm DS_RSUB_U64            : DS_Real_gfx6_gfx7_gfx10<0x042>;
+defm DS_INC_U64             : DS_Real_gfx6_gfx7_gfx10<0x043>;
+defm DS_DEC_U64             : DS_Real_gfx6_gfx7_gfx10<0x044>;
+defm DS_MIN_I64             : DS_Real_gfx6_gfx7_gfx10<0x045>;
+defm DS_MAX_I64             : DS_Real_gfx6_gfx7_gfx10<0x046>;
+defm DS_MIN_U64             : DS_Real_gfx6_gfx7_gfx10<0x047>;
+defm DS_MAX_U64             : DS_Real_gfx6_gfx7_gfx10<0x048>;
+defm DS_AND_B64             : DS_Real_gfx6_gfx7_gfx10<0x049>;
+defm DS_OR_B64              : DS_Real_gfx6_gfx7_gfx10<0x04a>;
+defm DS_XOR_B64             : DS_Real_gfx6_gfx7_gfx10<0x04b>;
+defm DS_MSKOR_B64           : DS_Real_gfx6_gfx7_gfx10<0x04c>;
+defm DS_WRITE_B64           : DS_Real_gfx6_gfx7_gfx10<0x04d>;
+defm DS_WRITE2_B64          : DS_Real_gfx6_gfx7_gfx10<0x04e>;
+defm DS_WRITE2ST64_B64      : DS_Real_gfx6_gfx7_gfx10<0x04f>;
+defm DS_CMPST_B64           : DS_Real_gfx6_gfx7_gfx10<0x050>;
+defm DS_CMPST_F64           : DS_Real_gfx6_gfx7_gfx10<0x051>;
+defm DS_MIN_F64             : DS_Real_gfx6_gfx7_gfx10<0x052>;
+defm DS_MAX_F64             : DS_Real_gfx6_gfx7_gfx10<0x053>;
+defm DS_ADD_RTN_U64         : DS_Real_gfx6_gfx7_gfx10<0x060>;
+defm DS_SUB_RTN_U64         : DS_Real_gfx6_gfx7_gfx10<0x061>;
+defm DS_RSUB_RTN_U64        : DS_Real_gfx6_gfx7_gfx10<0x062>;
+defm DS_INC_RTN_U64         : DS_Real_gfx6_gfx7_gfx10<0x063>;
+defm DS_DEC_RTN_U64         : DS_Real_gfx6_gfx7_gfx10<0x064>;
+defm DS_MIN_RTN_I64         : DS_Real_gfx6_gfx7_gfx10<0x065>;
+defm DS_MAX_RTN_I64         : DS_Real_gfx6_gfx7_gfx10<0x066>;
+defm DS_MIN_RTN_U64         : DS_Real_gfx6_gfx7_gfx10<0x067>;
+defm DS_MAX_RTN_U64         : DS_Real_gfx6_gfx7_gfx10<0x068>;
+defm DS_AND_RTN_B64         : DS_Real_gfx6_gfx7_gfx10<0x069>;
+defm DS_OR_RTN_B64          : DS_Real_gfx6_gfx7_gfx10<0x06a>;
+defm DS_XOR_RTN_B64         : DS_Real_gfx6_gfx7_gfx10<0x06b>;
+defm DS_MSKOR_RTN_B64       : DS_Real_gfx6_gfx7_gfx10<0x06c>;
+defm DS_WRXCHG_RTN_B64      : DS_Real_gfx6_gfx7_gfx10<0x06d>;
+defm DS_WRXCHG2_RTN_B64     : DS_Real_gfx6_gfx7_gfx10<0x06e>;
+defm DS_WRXCHG2ST64_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06f>;
+defm DS_CMPST_RTN_B64       : DS_Real_gfx6_gfx7_gfx10<0x070>;
+defm DS_CMPST_RTN_F64       : DS_Real_gfx6_gfx7_gfx10<0x071>;
+defm DS_MIN_RTN_F64         : DS_Real_gfx6_gfx7_gfx10<0x072>;
+defm DS_MAX_RTN_F64         : DS_Real_gfx6_gfx7_gfx10<0x073>;
+defm DS_READ_B64            : DS_Real_gfx6_gfx7_gfx10<0x076>;
+defm DS_READ2_B64           : DS_Real_gfx6_gfx7_gfx10<0x077>;
+defm DS_READ2ST64_B64       : DS_Real_gfx6_gfx7_gfx10<0x078>;
+defm DS_ADD_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x080>;
+defm DS_SUB_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x081>;
+defm DS_RSUB_SRC2_U32       : DS_Real_gfx6_gfx7_gfx10<0x082>;
+defm DS_INC_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x083>;
+defm DS_DEC_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x084>;
+defm DS_MIN_SRC2_I32        : DS_Real_gfx6_gfx7_gfx10<0x085>;
+defm DS_MAX_SRC2_I32        : DS_Real_gfx6_gfx7_gfx10<0x086>;
+defm DS_MIN_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x087>;
+defm DS_MAX_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x088>;
+defm DS_AND_SRC2_B32        : DS_Real_gfx6_gfx7_gfx10<0x089>;
+defm DS_OR_SRC2_B32         : DS_Real_gfx6_gfx7_gfx10<0x08a>;
+defm DS_XOR_SRC2_B32        : DS_Real_gfx6_gfx7_gfx10<0x08b>;
+defm DS_WRITE_SRC2_B32      : DS_Real_gfx6_gfx7_gfx10<0x08d>;
+defm DS_MIN_SRC2_F32        : DS_Real_gfx6_gfx7_gfx10<0x092>;
+defm DS_MAX_SRC2_F32        : DS_Real_gfx6_gfx7_gfx10<0x093>;
+defm DS_ADD_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c0>;
+defm DS_SUB_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c1>;
+defm DS_RSUB_SRC2_U64       : DS_Real_gfx6_gfx7_gfx10<0x0c2>;
+defm DS_INC_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c3>;
+defm DS_DEC_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c4>;
+defm DS_MIN_SRC2_I64        : DS_Real_gfx6_gfx7_gfx10<0x0c5>;
+defm DS_MAX_SRC2_I64        : DS_Real_gfx6_gfx7_gfx10<0x0c6>;
+defm DS_MIN_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c7>;
+defm DS_MAX_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c8>;
+defm DS_AND_SRC2_B64        : DS_Real_gfx6_gfx7_gfx10<0x0c9>;
+defm DS_OR_SRC2_B64         : DS_Real_gfx6_gfx7_gfx10<0x0ca>;
+defm DS_XOR_SRC2_B64        : DS_Real_gfx6_gfx7_gfx10<0x0cb>;
+defm DS_WRITE_SRC2_B64      : DS_Real_gfx6_gfx7_gfx10<0x0cd>;
+defm DS_MIN_SRC2_F64        : DS_Real_gfx6_gfx7_gfx10<0x0d2>;
+defm DS_MAX_SRC2_F64        : DS_Real_gfx6_gfx7_gfx10<0x0d3>;
 
 //===----------------------------------------------------------------------===//
-// VIInstructions.td
+// GFX8, GFX9 (VI).
 //===----------------------------------------------------------------------===//
 
 class DS_Real_vi <bits<8> op, DS_Pseudo ds> :
   DS_Real <ds>,
   SIMCInstr <ds.Mnemonic, SIEncodingFamily.VI> {
-  let AssemblerPredicates = [isVI];
-  let DecoderNamespace="VI";
+  let AssemblerPredicates = [isGFX8GFX9];
+  let DecoderNamespace = "GFX8";
 
   // encoding
   let Inst{7-0}   = !if(ds.has_offset0, offset0, 0);
@@ -1008,7 +1054,7 @@ class DS_Real_vi <bits<8> op, DS_Pseudo ds> :
   let Inst{16}    = !if(ds.has_gds, gds, ds.gdsValue);
   let Inst{24-17} = op;
   let Inst{31-26} = 0x36; // ds prefix
-  let Inst{39-32} = !if(ds.has_addr, addr, 0);
+  let Inst{39-32} = !if(ds.has_addr, addr, !if(ds.has_gws_data0, data0, 0));
   let Inst{47-40} = !if(ds.has_data0, data0, 0);
   let Inst{55-48} = !if(ds.has_data1, data1, 0);
   let Inst{63-56} = !if(ds.has_vdst, vdst, 0);
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index f3de903f21b2..4ec4be9bc485 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -22,13 +21,14 @@
 #include "AMDGPURegisterInfo.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIDefines.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "TargetInfo/AMDGPUTargetInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm-c/Disassembler.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCExpr.h"
@@ -52,8 +52,22 @@ using namespace llvm;
 
 #define DEBUG_TYPE "amdgpu-disassembler"
 
+#define SGPR_MAX (isGFX10() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
+                            : AMDGPU::EncValues::SGPR_MAX_SI)
+
 using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
 
+AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
+                                       MCContext &Ctx,
+                                       MCInstrInfo const *MCII) :
+  MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
+  TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) {
+
+  // ToDo: AMDGPUDisassembler supports only VI ISA.
+  if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding] && !isGFX10())
+    report_fatal_error("Disassembly not yet supported for subtarget");
+}
+
 inline static MCDisassembler::DecodeStatus
 addOperand(MCInst &Inst, const MCOperand& Opnd) {
   Inst.addOperand(Opnd);
@@ -77,6 +91,8 @@ static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
                                        uint64_t Addr, const void *Decoder) {
   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
 
+  // Our branches take a simm16, but we need two extra bits to account for the
+  // factor of 4.
   APInt SignedOffset(18, Imm * 4, true);
   int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
 
@@ -85,6 +101,12 @@ static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
   return addOperand(Inst, MCOperand::createImm(Imm));
 }
 
+static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val,
+                                  uint64_t Addr, const void *Decoder) {
+  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+  return addOperand(Inst, DAsm->decodeBoolReg(Val));
+}
+
 #define DECODE_OPERAND(StaticDecoderName, DecoderName) \
 static DecodeStatus StaticDecoderName(MCInst &Inst, \
                                        unsigned Imm, \
@@ -98,6 +120,7 @@ static DecodeStatus StaticDecoderName(MCInst &Inst, \
 DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass)
 
 DECODE_OPERAND_REG(VGPR_32)
+DECODE_OPERAND_REG(VRegOrLds_32)
 DECODE_OPERAND_REG(VS_32)
 DECODE_OPERAND_REG(VS_64)
 DECODE_OPERAND_REG(VS_128)
@@ -109,12 +132,20 @@ DECODE_OPERAND_REG(VReg_128)
 DECODE_OPERAND_REG(SReg_32)
 DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
 DECODE_OPERAND_REG(SReg_32_XEXEC_HI)
+DECODE_OPERAND_REG(SRegOrLds_32)
 DECODE_OPERAND_REG(SReg_64)
 DECODE_OPERAND_REG(SReg_64_XEXEC)
 DECODE_OPERAND_REG(SReg_128)
 DECODE_OPERAND_REG(SReg_256)
 DECODE_OPERAND_REG(SReg_512)
 
+DECODE_OPERAND_REG(AGPR_32)
+DECODE_OPERAND_REG(AReg_128)
+DECODE_OPERAND_REG(AReg_512)
+DECODE_OPERAND_REG(AReg_1024)
+DECODE_OPERAND_REG(AV_32)
+DECODE_OPERAND_REG(AV_64)
+
 static DecodeStatus decodeOperand_VSrc16(MCInst &Inst,
                                          unsigned Imm,
                                          uint64_t Addr,
@@ -131,6 +162,62 @@ static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst,
   return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
 }
 
+static DecodeStatus decodeOperand_VS_16(MCInst &Inst,
+                                        unsigned Imm,
+                                        uint64_t Addr,
+                                        const void *Decoder) {
+  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+  return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
+}
+
+static DecodeStatus decodeOperand_VS_32(MCInst &Inst,
+                                        unsigned Imm,
+                                        uint64_t Addr,
+                                        const void *Decoder) {
+  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+  return addOperand(Inst, DAsm->decodeOperand_VS_32(Imm));
+}
+
+static DecodeStatus decodeOperand_AReg_128(MCInst &Inst,
+                                           unsigned Imm,
+                                           uint64_t Addr,
+                                           const void *Decoder) {
+  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm | 512));
+}
+
+static DecodeStatus decodeOperand_AReg_512(MCInst &Inst,
+                                           unsigned Imm,
+                                           uint64_t Addr,
+                                           const void *Decoder) {
+  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm | 512));
+}
+
+static DecodeStatus decodeOperand_AReg_1024(MCInst &Inst,
+                                            unsigned Imm,
+                                            uint64_t Addr,
+                                            const void *Decoder) {
+  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm | 512));
+}
+
+static DecodeStatus decodeOperand_SReg_32(MCInst &Inst,
+                                          unsigned Imm,
+                                          uint64_t Addr,
+                                          const void *Decoder) {
+  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+  return addOperand(Inst, DAsm->decodeOperand_SReg_32(Imm));
+}
+
+static DecodeStatus decodeOperand_VGPR_32(MCInst &Inst,
+                                         unsigned Imm,
+                                         uint64_t Addr,
+                                         const void *Decoder) {
+  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW32, Imm));
+}
+
 #define DECODE_SDWA(DecName) \
 DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
 
@@ -168,6 +255,16 @@ DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table,
   return MCDisassembler::Fail;
 }
 
+static bool isValidDPP8(const MCInst &MI) {
+  using namespace llvm::AMDGPU::DPP;
+  int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi);
+  assert(FiIdx != -1);
+  if ((unsigned)FiIdx >= MI.getNumOperands())
+    return false;
+  unsigned Fi = MI.getOperand(FiIdx).getImm();
+  return Fi == DPP8_FI_0 || Fi == DPP8_FI_1;
+}
+
 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
                                                 ArrayRef<uint8_t> Bytes_,
                                                 uint64_t Address,
@@ -176,11 +273,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   CommentStream = &CS;
   bool IsSDWA = false;
 
-  // ToDo: AMDGPUDisassembler supports only VI ISA.
-  if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding])
-    report_fatal_error("Disassembly not yet supported for subtarget");
-
-  const unsigned MaxInstBytesNum = (std::min)((size_t)8, Bytes_.size());
+  unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
   Bytes = Bytes_.slice(0, MaxInstBytesNum);
 
   DecodeStatus Res = MCDisassembler::Fail;
@@ -192,6 +285,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     // encodings
     if (Bytes.size() >= 8) {
       const uint64_t QW = eatBytes<uint64_t>(Bytes);
+
+      Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address);
+      if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
+        break;
+
+      MI = MCInst(); // clear
+
       Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address);
       if (Res) break;
 
@@ -201,6 +301,18 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
       Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address);
       if (Res) { IsSDWA = true;  break; }
 
+      Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address);
+      if (Res) { IsSDWA = true;  break; }
+
+      // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
+      // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
+      // table first so we print the correct name.
+
+      if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) {
+        Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address);
+        if (Res) break;
+      }
+
       if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) {
         Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address);
         if (Res)
@@ -223,7 +335,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     // Try decode 32-bit instruction
     if (Bytes.size() < 4) break;
     const uint32_t DW = eatBytes<uint32_t>(Bytes);
-    Res = tryDecodeInst(DecoderTableVI32, MI, DW, Address);
+    Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address);
     if (Res) break;
 
     Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address);
@@ -232,33 +344,84 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address);
     if (Res) break;
 
+    Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address);
+    if (Res) break;
+
     if (Bytes.size() < 4) break;
     const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
-    Res = tryDecodeInst(DecoderTableVI64, MI, QW, Address);
+    Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address);
     if (Res) break;
 
     Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address);
     if (Res) break;
 
     Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address);
+    if (Res) break;
+
+    Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address);
   } while (false);
 
+  if (Res && (MaxInstBytesNum - Bytes.size()) == 12 && (!HasLiteral ||
+        !(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3))) {
+    MaxInstBytesNum = 8;
+    Bytes = Bytes_.slice(0, MaxInstBytesNum);
+    eatBytes<uint64_t>(Bytes);
+  }
+
   if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
-              MI.getOpcode() == AMDGPU::V_MAC_F32_e64_si ||
+              MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
+              MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx10 ||
               MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ||
-              MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi)) {
+              MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi ||
+              MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 ||
+              MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10)) {
     // Insert dummy unused src2_modifiers.
     insertNamedMCOperand(MI, MCOperand::createImm(0),
                          AMDGPU::OpName::src2_modifiers);
   }
 
   if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
-    Res = convertMIMGInst(MI);
+    int VAddr0Idx =
+        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
+    int RsrcIdx =
+        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
+    unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
+    if (VAddr0Idx >= 0 && NSAArgs > 0) {
+      unsigned NSAWords = (NSAArgs + 3) / 4;
+      if (Bytes.size() < 4 * NSAWords) {
+        Res = MCDisassembler::Fail;
+      } else {
+        for (unsigned i = 0; i < NSAArgs; ++i) {
+          MI.insert(MI.begin() + VAddr0Idx + 1 + i,
+                    decodeOperand_VGPR_32(Bytes[i]));
+        }
+        Bytes = Bytes.slice(4 * NSAWords);
+      }
+    }
+
+    if (Res)
+      Res = convertMIMGInst(MI);
   }
 
   if (Res && IsSDWA)
     Res = convertSDWAInst(MI);
 
+  int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+                                              AMDGPU::OpName::vdst_in);
+  if (VDstIn_Idx != -1) {
+    int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
+                           MCOI::OperandConstraint::TIED_TO);
+    if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
+         !MI.getOperand(VDstIn_Idx).isReg() ||
+         MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
+      if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
+        MI.erase(&MI.getOperand(VDstIn_Idx));
+      insertNamedMCOperand(MI,
+        MCOperand::createReg(MI.getOperand(Tied).getReg()),
+        AMDGPU::OpName::vdst_in);
+    }
+  }
+
   // if the opcode was not recognized we'll assume a Size of 4 bytes
   // (unless there are fewer bytes left)
   Size = Res ? (MaxInstBytesNum - Bytes.size())
@@ -267,7 +430,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
 }
 
 DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
-  if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) {
+  if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
+      STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
     if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1)
       // VOPC - insert clamp
       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
@@ -285,9 +449,27 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
   return MCDisassembler::Success;
 }
 
-// Note that MIMG format provides no information about VADDR size.
-// Consequently, decoded instructions always show address
-// as if it has 1 dword, which could be not really so.
+DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
+  unsigned Opc = MI.getOpcode();
+  unsigned DescNumOps = MCII->get(Opc).getNumOperands();
+
+  // Insert dummy unused src modifiers.
+  if (MI.getNumOperands() < DescNumOps &&
+      AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1)
+    insertNamedMCOperand(MI, MCOperand::createImm(0),
+                         AMDGPU::OpName::src0_modifiers);
+
+  if (MI.getNumOperands() < DescNumOps &&
+      AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1)
+    insertNamedMCOperand(MI, MCOperand::createImm(0),
+                         AMDGPU::OpName::src1_modifiers);
+
+  return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail;
+}
+
+// Note that before gfx10, the MIMG encoding provided no information about
+// VADDR size. Consequently, decoded instructions always show address as if it
+// has 1 dword, which could be not really so.
 DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
 
   int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
@@ -295,7 +477,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
 
   int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
                                             AMDGPU::OpName::vdata);
-
+  int VAddr0Idx =
+      AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
   int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
                                             AMDGPU::OpName::dmask);
 
@@ -308,16 +491,42 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
   assert(DMaskIdx != -1);
   assert(TFEIdx != -1);
 
+  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
   bool IsAtomic = (VDstIdx != -1);
   bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
 
-  unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
-  if (DMask == 0)
-    return MCDisassembler::Success;
+  bool IsNSA = false;
+  unsigned AddrSize = Info->VAddrDwords;
+
+  if (STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
+    unsigned DimIdx =
+        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
+    const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
+        AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
+    const AMDGPU::MIMGDimInfo *Dim =
+        AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
+
+    AddrSize = BaseOpcode->NumExtraArgs +
+               (BaseOpcode->Gradients ? Dim->NumGradients : 0) +
+               (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
+               (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+    IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA;
+    if (!IsNSA) {
+      if (AddrSize > 8)
+        AddrSize = 16;
+      else if (AddrSize > 4)
+        AddrSize = 8;
+    } else {
+      if (AddrSize > Info->VAddrDwords) {
+        // The NSA encoding does not contain enough operands for the combination
+        // of base opcode / dimension. Should this be an error?
+        return MCDisassembler::Success;
+      }
+    }
+  }
 
-  unsigned DstSize = IsGather4 ? 4 : countPopulation(DMask);
-  if (DstSize == 1)
-    return MCDisassembler::Success;
+  unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
+  unsigned DstSize = IsGather4 ? 4 : std::max(countPopulation(DMask), 1u);
 
   bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
   if (D16 && AMDGPU::hasPackedD16(STI)) {
@@ -328,44 +537,64 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
   if (MI.getOperand(TFEIdx).getImm())
     return MCDisassembler::Success;
 
-  int NewOpcode = -1;
+  if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
+    return MCDisassembler::Success;
+
+  int NewOpcode =
+      AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
+  if (NewOpcode == -1)
+    return MCDisassembler::Success;
 
-  if (IsGather4) {
-    if (D16 && AMDGPU::hasPackedD16(STI))
-      NewOpcode = AMDGPU::getMaskedMIMGOp(MI.getOpcode(), 2);
-    else
+  // Widen the register to the correct number of enabled channels.
+  unsigned NewVdata = AMDGPU::NoRegister;
+  if (DstSize != Info->VDataDwords) {
+    auto DataRCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass;
+
+    // Get first subregister of VData
+    unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
+    unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
+    Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
+
+    NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
+                                       &MRI.getRegClass(DataRCID));
+    if (NewVdata == AMDGPU::NoRegister) {
+      // It's possible to encode this such that the low register + enabled
+      // components exceeds the register count.
       return MCDisassembler::Success;
-  } else {
-    NewOpcode = AMDGPU::getMaskedMIMGOp(MI.getOpcode(), DstSize);
-    if (NewOpcode == -1)
+    }
+  }
+
+  unsigned NewVAddr0 = AMDGPU::NoRegister;
+  if (STI.getFeatureBits()[AMDGPU::FeatureGFX10] && !IsNSA &&
+      AddrSize != Info->VAddrDwords) {
+    unsigned VAddr0 = MI.getOperand(VAddr0Idx).getReg();
+    unsigned VAddrSub0 = MRI.getSubReg(VAddr0, AMDGPU::sub0);
+    VAddr0 = (VAddrSub0 != 0) ? VAddrSub0 : VAddr0;
+
+    auto AddrRCID = MCII->get(NewOpcode).OpInfo[VAddr0Idx].RegClass;
+    NewVAddr0 = MRI.getMatchingSuperReg(VAddr0, AMDGPU::sub0,
+                                        &MRI.getRegClass(AddrRCID));
+    if (NewVAddr0 == AMDGPU::NoRegister)
       return MCDisassembler::Success;
   }
 
-  auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass;
+  MI.setOpcode(NewOpcode);
 
-  // Get first subregister of VData
-  unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
-  unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
-  Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
+  if (NewVdata != AMDGPU::NoRegister) {
+    MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
 
-  // Widen the register to the correct number of enabled channels.
-  auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
-                                          &MRI.getRegClass(RCID));
-  if (NewVdata == AMDGPU::NoRegister) {
-    // It's possible to encode this such that the low register + enabled
-    // components exceeds the register count.
-    return MCDisassembler::Success;
+    if (IsAtomic) {
+      // Atomic operations have an additional operand (a copy of data)
+      MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
+    }
   }
 
-  MI.setOpcode(NewOpcode);
-  // vaddr will be always appear as a single VGPR. This will look different than
-  // how it is usually emitted because the number of register components is not
-  // in the instruction encoding.
-  MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
-
-  if (IsAtomic) {
-    // Atomic operations have an additional operand (a copy of data)
-    MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
+  if (NewVAddr0 != AMDGPU::NoRegister) {
+    MI.getOperand(VAddr0Idx) = MCOperand::createReg(NewVAddr0);
+  } else if (IsNSA) {
+    assert(AddrSize <= Info->VAddrDwords);
+    MI.erase(MI.begin() + VAddr0Idx + AddrSize,
+             MI.begin() + VAddr0Idx + Info->VAddrDwords);
   }
 
   return MCDisassembler::Success;
@@ -470,6 +699,34 @@ MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
   return createRegOperand(AMDGPU::VGPR_32RegClassID, Val);
 }
 
+MCOperand AMDGPUDisassembler::decodeOperand_VRegOrLds_32(unsigned Val) const {
+  return decodeSrcOp(OPW32, Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_AGPR_32(unsigned Val) const {
+  return createRegOperand(AMDGPU::AGPR_32RegClassID, Val & 255);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_AReg_128(unsigned Val) const {
+  return createRegOperand(AMDGPU::AReg_128RegClassID, Val & 255);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_AReg_512(unsigned Val) const {
+  return createRegOperand(AMDGPU::AReg_512RegClassID, Val & 255);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_AReg_1024(unsigned Val) const {
+  return createRegOperand(AMDGPU::AReg_1024RegClassID, Val & 255);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_AV_32(unsigned Val) const {
+  return decodeSrcOp(OPW32, Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_AV_64(unsigned Val) const {
+  return decodeSrcOp(OPW64, Val);
+}
+
 MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {
   return createRegOperand(AMDGPU::VReg_64RegClassID, Val);
 }
@@ -482,6 +739,14 @@ MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const {
   return createRegOperand(AMDGPU::VReg_128RegClassID, Val);
 }
 
+MCOperand AMDGPUDisassembler::decodeOperand_VReg_256(unsigned Val) const {
+  return createRegOperand(AMDGPU::VReg_256RegClassID, Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_VReg_512(unsigned Val) const {
+  return createRegOperand(AMDGPU::VReg_512RegClassID, Val);
+}
+
 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
   // table-gen generated disassembler doesn't care about operand types
   // leaving only registry class so SSrc_32 operand turns into SReg_32
@@ -501,6 +766,13 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI(
   return decodeOperand_SReg_32(Val);
 }
 
+MCOperand AMDGPUDisassembler::decodeOperand_SRegOrLds_32(unsigned Val) const {
+  // table-gen generated disassembler doesn't care about operand types
+  // leaving only registry class so SSrc_32 operand turns into SReg_32
+  // and therefore we accept immediates and literals here as well
+  return decodeSrcOp(OPW32, Val);
+}
+
 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
   return decodeSrcOp(OPW64, Val);
 }
@@ -628,6 +900,9 @@ MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) {
   // ToDo: case 248: 1/(2*PI) - is allowed only on VI
   switch (Width) {
   case OPW32:
+  case OPW128: // splat constants
+  case OPW512:
+  case OPW1024:
     return MCOperand::createImm(getInlineImmVal32(Imm));
   case OPW64:
     return MCOperand::createImm(getInlineImmVal64(Imm));
@@ -654,6 +929,24 @@ unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
   }
 }
 
+unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {
+  using namespace AMDGPU;
+
+  assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
+  switch (Width) {
+  default: // fall
+  case OPW32:
+  case OPW16:
+  case OPWV216:
+    return AGPR_32RegClassID;
+  case OPW64: return AReg_64RegClassID;
+  case OPW128: return AReg_128RegClassID;
+  case OPW512: return AReg_512RegClassID;
+  case OPW1024: return AReg_1024RegClassID;
+  }
+}
+
+
 unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
   using namespace AMDGPU;
 
@@ -691,8 +984,10 @@ unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
 int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
   using namespace AMDGPU::EncValues;
 
-  unsigned TTmpMin = isGFX9() ? TTMP_GFX9_MIN : TTMP_VI_MIN;
-  unsigned TTmpMax = isGFX9() ? TTMP_GFX9_MAX : TTMP_VI_MAX;
+  unsigned TTmpMin =
+      (isGFX9() || isGFX10()) ? TTMP_GFX9_GFX10_MIN : TTMP_VI_MIN;
+  unsigned TTmpMax =
+      (isGFX9() || isGFX10()) ? TTMP_GFX9_GFX10_MAX : TTMP_VI_MAX;
 
   return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
 }
@@ -700,10 +995,14 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
 MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const {
   using namespace AMDGPU::EncValues;
 
-  assert(Val < 512); // enum9
+  assert(Val < 1024); // enum10
+
+  bool IsAGPR = Val & 512;
+  Val &= 511;
 
   if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
-    return createRegOperand(getVgprClassId(Width), Val - VGPR_MIN);
+    return createRegOperand(IsAGPR ? getAgprClassId(Width)
+                                   : getVgprClassId(Width), Val - VGPR_MIN);
   }
   if (Val <= SGPR_MAX) {
     assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning.
@@ -765,23 +1064,23 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
   case 105: return createRegOperand(XNACK_MASK_HI);
   case 106: return createRegOperand(VCC_LO);
   case 107: return createRegOperand(VCC_HI);
-  case 108: assert(!isGFX9()); return createRegOperand(TBA_LO);
-  case 109: assert(!isGFX9()); return createRegOperand(TBA_HI);
-  case 110: assert(!isGFX9()); return createRegOperand(TMA_LO);
-  case 111: assert(!isGFX9()); return createRegOperand(TMA_HI);
+  case 108: return createRegOperand(TBA_LO);
+  case 109: return createRegOperand(TBA_HI);
+  case 110: return createRegOperand(TMA_LO);
+  case 111: return createRegOperand(TMA_HI);
   case 124: return createRegOperand(M0);
+  case 125: return createRegOperand(SGPR_NULL);
   case 126: return createRegOperand(EXEC_LO);
   case 127: return createRegOperand(EXEC_HI);
   case 235: return createRegOperand(SRC_SHARED_BASE);
   case 236: return createRegOperand(SRC_SHARED_LIMIT);
   case 237: return createRegOperand(SRC_PRIVATE_BASE);
   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
-    // TODO: SRC_POPS_EXITING_WAVE_ID
-    // ToDo: no support for vccz register
-  case 251: break;
-    // ToDo: no support for execz register
-  case 252: break;
-  case 253: return createRegOperand(SCC);
+  case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
+  case 251: return createRegOperand(SRC_VCCZ);
+  case 252: return createRegOperand(SRC_EXECZ);
+  case 253: return createRegOperand(SRC_SCC);
+  case 254: return createRegOperand(LDS_DIRECT);
   default: break;
   }
   return errOperand(Val, "unknown operand encoding " + Twine(Val));
@@ -794,9 +1093,17 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
   case 102: return createRegOperand(FLAT_SCR);
   case 104: return createRegOperand(XNACK_MASK);
   case 106: return createRegOperand(VCC);
-  case 108: assert(!isGFX9()); return createRegOperand(TBA);
-  case 110: assert(!isGFX9()); return createRegOperand(TMA);
+  case 108: return createRegOperand(TBA);
+  case 110: return createRegOperand(TMA);
   case 126: return createRegOperand(EXEC);
+  case 235: return createRegOperand(SRC_SHARED_BASE);
+  case 236: return createRegOperand(SRC_SHARED_LIMIT);
+  case 237: return createRegOperand(SRC_PRIVATE_BASE);
+  case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
+  case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
+  case 251: return createRegOperand(SRC_VCCZ);
+  case 252: return createRegOperand(SRC_EXECZ);
+  case 253: return createRegOperand(SRC_SCC);
   default: break;
   }
   return errOperand(Val, "unknown operand encoding " + Twine(Val));
@@ -807,16 +1114,18 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
   using namespace AMDGPU::SDWA;
   using namespace AMDGPU::EncValues;
 
-  if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) {
-    // XXX: static_cast<int> is needed to avoid stupid warning:
+  if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
+      STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
+    // XXX: cast to int is needed to avoid stupid warning:
     // compare with unsigned is always true
-    if (SDWA9EncValues::SRC_VGPR_MIN <= static_cast<int>(Val) &&
+    if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
         Val <= SDWA9EncValues::SRC_VGPR_MAX) {
       return createRegOperand(getVgprClassId(Width),
                               Val - SDWA9EncValues::SRC_VGPR_MIN);
     }
     if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
-        Val <= SDWA9EncValues::SRC_SGPR_MAX) {
+        Val <= (isGFX10() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
+                          : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
       return createSRegOperand(getSgprClassId(Width),
                                Val - SDWA9EncValues::SRC_SGPR_MIN);
     }
@@ -852,24 +1161,34 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
 MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
   using namespace AMDGPU::SDWA;
 
-  assert(STI.getFeatureBits()[AMDGPU::FeatureGFX9] &&
-         "SDWAVopcDst should be present only on GFX9");
+  assert((STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
+          STI.getFeatureBits()[AMDGPU::FeatureGFX10]) &&
+         "SDWAVopcDst should be present only on GFX9+");
+
+  bool IsWave64 = STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64];
+
   if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
     Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
 
     int TTmpIdx = getTTmpIdx(Val);
     if (TTmpIdx >= 0) {
       return createSRegOperand(getTtmpClassId(OPW64), TTmpIdx);
-    } else if (Val > AMDGPU::EncValues::SGPR_MAX) {
-      return decodeSpecialReg64(Val);
+    } else if (Val > SGPR_MAX) {
+      return IsWave64 ? decodeSpecialReg64(Val)
+                      : decodeSpecialReg32(Val);
     } else {
-      return createSRegOperand(getSgprClassId(OPW64), Val);
+      return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
     }
   } else {
-    return createRegOperand(AMDGPU::VCC);
+    return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
   }
 }
 
+MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
+  return STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
+    decodeOperand_SReg_64(Val) : decodeOperand_SReg_32(Val);
+}
+
 bool AMDGPUDisassembler::isVI() const {
   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
 }
@@ -878,6 +1197,10 @@ bool AMDGPUDisassembler::isGFX9() const {
   return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
 }
 
+bool AMDGPUDisassembler::isGFX10() const {
+  return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
+}
+
 //===----------------------------------------------------------------------===//
 // AMDGPUSymbolizer
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 75cfc5e11282..c5eaba615c2a 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -1,9 +1,8 @@
 //===- AMDGPUDisassembler.hpp - Disassembler for AMDGPU ISA -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -42,15 +41,14 @@ class AMDGPUDisassembler : public MCDisassembler {
 private:
   std::unique_ptr<MCInstrInfo const> const MCII;
   const MCRegisterInfo &MRI;
+  const unsigned TargetMaxInstBytes;
   mutable ArrayRef<uint8_t> Bytes;
   mutable uint32_t Literal;
   mutable bool HasLiteral;
 
 public:
   AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
-                     MCInstrInfo const *MCII) :
-    MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()) {}
-
+                     MCInstrInfo const *MCII);
   ~AMDGPUDisassembler() override = default;
 
   DecodeStatus getInstruction(MCInst &MI, uint64_t &Size,
@@ -69,9 +67,12 @@ public:
                              uint64_t Address) const;
 
   DecodeStatus convertSDWAInst(MCInst &MI) const;
+  DecodeStatus convertDPP8Inst(MCInst &MI) const;
   DecodeStatus convertMIMGInst(MCInst &MI) const;
 
   MCOperand decodeOperand_VGPR_32(unsigned Val) const;
+  MCOperand decodeOperand_VRegOrLds_32(unsigned Val) const;
+
   MCOperand decodeOperand_VS_32(unsigned Val) const;
   MCOperand decodeOperand_VS_64(unsigned Val) const;
   MCOperand decodeOperand_VS_128(unsigned Val) const;
@@ -81,22 +82,33 @@ public:
   MCOperand decodeOperand_VReg_64(unsigned Val) const;
   MCOperand decodeOperand_VReg_96(unsigned Val) const;
   MCOperand decodeOperand_VReg_128(unsigned Val) const;
+  MCOperand decodeOperand_VReg_256(unsigned Val) const;
+  MCOperand decodeOperand_VReg_512(unsigned Val) const;
 
   MCOperand decodeOperand_SReg_32(unsigned Val) const;
   MCOperand decodeOperand_SReg_32_XM0_XEXEC(unsigned Val) const;
   MCOperand decodeOperand_SReg_32_XEXEC_HI(unsigned Val) const;
+  MCOperand decodeOperand_SRegOrLds_32(unsigned Val) const;
   MCOperand decodeOperand_SReg_64(unsigned Val) const;
   MCOperand decodeOperand_SReg_64_XEXEC(unsigned Val) const;
   MCOperand decodeOperand_SReg_128(unsigned Val) const;
   MCOperand decodeOperand_SReg_256(unsigned Val) const;
   MCOperand decodeOperand_SReg_512(unsigned Val) const;
 
+  MCOperand decodeOperand_AGPR_32(unsigned Val) const;
+  MCOperand decodeOperand_AReg_128(unsigned Val) const;
+  MCOperand decodeOperand_AReg_512(unsigned Val) const;
+  MCOperand decodeOperand_AReg_1024(unsigned Val) const;
+  MCOperand decodeOperand_AV_32(unsigned Val) const;
+  MCOperand decodeOperand_AV_64(unsigned Val) const;
+
   enum OpWidthTy {
     OPW32,
     OPW64,
     OPW128,
     OPW256,
     OPW512,
+    OPW1024,
     OPW16,
     OPWV216,
     OPW_LAST_,
@@ -104,6 +116,7 @@ public:
   };
 
   unsigned getVgprClassId(const OpWidthTy Width) const;
+  unsigned getAgprClassId(const OpWidthTy Width) const;
   unsigned getSgprClassId(const OpWidthTy Width) const;
   unsigned getTtmpClassId(const OpWidthTy Width) const;
 
@@ -121,11 +134,14 @@ public:
   MCOperand decodeSDWASrc32(unsigned Val) const;
   MCOperand decodeSDWAVopcDst(unsigned Val) const;
 
+  MCOperand decodeBoolReg(unsigned Val) const;
+
   int getTTmpIdx(unsigned Val) const;
 
   bool isVI() const;
   bool isGFX9() const;
-  };
+  bool isGFX10() const;
+};
 
 //===----------------------------------------------------------------------===//
 // AMDGPUSymbolizer
diff --git a/lib/Target/AMDGPU/EvergreenInstructions.td b/lib/Target/AMDGPU/EvergreenInstructions.td
index 944f4ffe598d..0550092ce1d6 100644
--- a/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -1,9 +1,8 @@
 //===-- EvergreenInstructions.td - EG Instruction defs  ----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/FLATInstructions.td b/lib/Target/AMDGPU/FLATInstructions.td
index 44040d352e6a..889f60dae920 100644
--- a/lib/Target/AMDGPU/FLATInstructions.td
+++ b/lib/Target/AMDGPU/FLATInstructions.td
@@ -1,17 +1,16 @@
 //===-- FLATInstructions.td - FLAT Instruction Defintions -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [], -10>;
-def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [], -10>;
+def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [SDNPWantRoot], -10>;
+def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [SDNPWantRoot], -10>;
 
-def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [], -10>;
-def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [], -10>;
+def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>;
+def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [SDNPWantRoot], -10>;
 
 //===----------------------------------------------------------------------===//
 // FLAT classes
@@ -52,6 +51,8 @@ class FLAT_Pseudo<string opName, dag outs, dag ins,
   bits<1> has_data = 1;
   bits<1> has_glc  = 1;
   bits<1> glcValue = 0;
+  bits<1> has_dlc  = 1;
+  bits<1> dlcValue = 0;
 
   let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
     !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
@@ -64,6 +65,8 @@ class FLAT_Pseudo<string opName, dag outs, dag ins,
   // and are not considered done until both have been decremented.
   let VM_CNT = 1;
   let LGKM_CNT = !if(!or(is_flat_global, is_flat_scratch), 0, 1);
+
+  let IsNonFlatSeg = !if(!or(is_flat_global, is_flat_scratch), 1, 0);
 }
 
 class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
@@ -87,6 +90,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
 
   bits<1> slc;
   bits<1> glc;
+  bits<1> dlc;
 
   // Only valid on gfx9
   bits<1> lds = 0; // XXX - What does this actually do?
@@ -131,18 +135,16 @@ class GlobalSaddrTable <bit is_saddr, string Name = ""> {
 // saddr is 32-bit (which isn't handled here yet).
 class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
   bit HasTiedOutput = 0,
-  bit HasSignedOffset = 0, bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
+  bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
   opName,
   (outs regClass:$vdst),
   !con(
     !con(
-      !con(
-        !con((ins VReg_64:$vaddr),
-          !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
-            (ins !if(HasSignedOffset,offset_s13,offset_u12):$offset)),
-            (ins GLC:$glc, SLC:$slc)),
-            !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
-  " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc"> {
+      !con((ins VReg_64:$vaddr),
+        !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
+          (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
+          !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
+  " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
   let has_data = 0;
   let mayLoad = 1;
   let has_saddr = HasSaddr;
@@ -155,16 +157,14 @@ class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
 }
 
 class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
-  bit HasSignedOffset = 0, bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
+  bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
   opName,
   (outs),
   !con(
-    !con(
-      !con((ins VReg_64:$vaddr, vdataClass:$vdata),
-        !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
-          (ins !if(HasSignedOffset,offset_s13,offset_u12):$offset)),
-          (ins GLC:$glc, SLC:$slc)),
-  " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc"> {
+    !con((ins VReg_64:$vaddr, vdataClass:$vdata),
+      !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
+        (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
+  " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
   let mayLoad  = 0;
   let mayStore = 1;
   let has_vdst = 0;
@@ -176,18 +176,18 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
 
 multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
   let is_flat_global = 1 in {
-    def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
+    def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
       GlobalSaddrTable<0, opName>;
-    def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1, 1>,
+    def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
       GlobalSaddrTable<1, opName>;
   }
 }
 
 multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
   let is_flat_global = 1 in {
-    def "" : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
+    def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
       GlobalSaddrTable<0, opName>;
-    def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1, 1>,
+    def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
       GlobalSaddrTable<1, opName>;
   }
 }
@@ -197,9 +197,9 @@ class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
   opName,
   (outs regClass:$vdst),
   !if(EnableSaddr,
-      (ins SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc),
-      (ins VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc)),
-  " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc"> {
+      (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
+      (ins VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
+  " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc$dlc"> {
   let has_data = 0;
   let mayLoad = 1;
   let has_saddr = 1;
@@ -213,9 +213,9 @@ class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit En
   opName,
   (outs),
   !if(EnableSaddr,
-    (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc),
-    (ins vdataClass:$vdata, VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc)),
-  " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc"> {
+    (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
+    (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
+  " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
   let mayLoad  = 0;
   let mayStore = 1;
   let has_vdst = 0;
@@ -247,6 +247,8 @@ class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
     let mayStore = 1;
     let has_glc  = 0;
     let glcValue = 0;
+    let has_dlc  = 0;
+    let dlcValue = 0;
     let has_vdst = 0;
     let maybeAtomic = 1;
 }
@@ -257,6 +259,7 @@ class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
   let hasPostISelHook = 1;
   let has_vdst = 1;
   let glcValue = 1;
+  let dlcValue = 0;
   let PseudoInstr = NAME # "_RTN";
 }
 
@@ -266,24 +269,28 @@ multiclass FLAT_Atomic_Pseudo<
   ValueType vt,
   SDPatternOperator atomic = null_frag,
   ValueType data_vt = vt,
-  RegisterClass data_rc = vdst_rc> {
+  RegisterClass data_rc = vdst_rc,
+  bit isFP = getIsFP<data_vt>.ret> {
   def "" : FLAT_AtomicNoRet_Pseudo <opName,
     (outs),
-    (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, SLC:$slc),
+    (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
     " $vaddr, $vdata$offset$slc">,
     GlobalSaddrTable<0, opName>,
     AtomicNoRet <opName, 0> {
     let PseudoInstr = NAME;
+    let FPAtomic = isFP;
   }
 
   def _RTN : FLAT_AtomicRet_Pseudo <opName,
     (outs vdst_rc:$vdst),
-    (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, SLC:$slc),
+    (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
     " $vdst, $vaddr, $vdata$offset glc$slc",
     [(set vt:$vdst,
       (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
        GlobalSaddrTable<0, opName#"_rtn">,
-       AtomicNoRet <opName, 1>;
+       AtomicNoRet <opName, 1>{
+    let FPAtomic = isFP;
+  }
 }
 
 multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
@@ -292,27 +299,30 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
   ValueType vt,
   SDPatternOperator atomic = null_frag,
   ValueType data_vt = vt,
-  RegisterClass data_rc = vdst_rc> {
+  RegisterClass data_rc = vdst_rc,
+  bit isFP = getIsFP<data_vt>.ret> {
 
   def "" : FLAT_AtomicNoRet_Pseudo <opName,
     (outs),
-    (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, SLC:$slc),
+    (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
     " $vaddr, $vdata, off$offset$slc">,
     GlobalSaddrTable<0, opName>,
     AtomicNoRet <opName, 0> {
     let has_saddr = 1;
     let PseudoInstr = NAME;
+    let FPAtomic = isFP;
   }
 
   def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
     (outs),
-    (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, offset_s13:$offset, SLC:$slc),
+    (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc),
     " $vaddr, $vdata, $saddr$offset$slc">,
     GlobalSaddrTable<1, opName>,
     AtomicNoRet <opName#"_saddr", 0> {
     let has_saddr = 1;
     let enabled_saddr = 1;
     let PseudoInstr = NAME#"_SADDR";
+    let FPAtomic = isFP;
   }
 }
 
@@ -322,28 +332,31 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
   ValueType vt,
   SDPatternOperator atomic = null_frag,
   ValueType data_vt = vt,
-  RegisterClass data_rc = vdst_rc> {
+  RegisterClass data_rc = vdst_rc,
+  bit isFP = getIsFP<data_vt>.ret> {
 
   def _RTN : FLAT_AtomicRet_Pseudo <opName,
     (outs vdst_rc:$vdst),
-      (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, SLC:$slc),
+      (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
     " $vdst, $vaddr, $vdata, off$offset glc$slc",
     [(set vt:$vdst,
       (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
       GlobalSaddrTable<0, opName#"_rtn">,
       AtomicNoRet <opName, 1> {
     let has_saddr = 1;
+    let FPAtomic = isFP;
   }
 
   def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
     (outs vdst_rc:$vdst),
-      (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, offset_s13:$offset, SLC:$slc),
+      (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc),
     " $vdst, $vaddr, $vdata, $saddr$offset glc$slc">,
     GlobalSaddrTable<1, opName#"_rtn">,
     AtomicNoRet <opName#"_saddr", 1> {
      let has_saddr = 1;
      let enabled_saddr = 1;
      let PseudoInstr = NAME#"_SADDR_RTN";
+     let FPAtomic = isFP;
   }
 }
 
@@ -491,7 +504,8 @@ defm FLAT_ATOMIC_INC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
 defm FLAT_ATOMIC_DEC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
                                 VReg_64, i64, atomic_dec_flat>;
 
-let SubtargetPredicate = isCI in { // CI Only flat instructions : FIXME Only?
+// GFX7-, GFX10-only flat instructions.
+let SubtargetPredicate = isGFX7GFX10 in {
 
 defm FLAT_ATOMIC_FCMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
                                 VGPR_32, f32, null_frag, v2f32, VReg_64>;
@@ -511,7 +525,7 @@ defm FLAT_ATOMIC_FMIN_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2",
 defm FLAT_ATOMIC_FMAX_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
                                 VReg_64, f64>;
 
-} // End SubtargetPredicate = isCI
+} // End SubtargetPredicate = isGFX7GFX10
 
 let SubtargetPredicate = HasFlatGlobalInsts in {
 defm GLOBAL_LOAD_UBYTE    : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
@@ -654,6 +668,32 @@ defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_shor
 
 } // End SubtargetPredicate = HasFlatScratchInsts
 
+let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
+  defm GLOBAL_ATOMIC_FCMPSWAP :
+    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>;
+  defm GLOBAL_ATOMIC_FMIN :
+    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
+  defm GLOBAL_ATOMIC_FMAX :
+    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
+  defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
+    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>;
+  defm GLOBAL_ATOMIC_FMIN_X2 :
+    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
+  defm GLOBAL_ATOMIC_FMAX_X2 :
+    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
+} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
+
+let SubtargetPredicate = HasAtomicFaddInsts, is_flat_global = 1 in {
+
+defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
+  "global_atomic_add_f32", VGPR_32, f32, atomic_add_global
+>;
+defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
+  "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_add_global
+>;
+
+} // End SubtargetPredicate = HasAtomicFaddInsts
+
 //===----------------------------------------------------------------------===//
 // Flat Patterns
 //===----------------------------------------------------------------------===//
@@ -661,89 +701,51 @@ defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_shor
 // Patterns for global loads with no offset.
 class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
   (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))),
-  (inst $vaddr, $offset, 0, $slc)
+  (inst $vaddr, $offset, 0, 0, $slc)
 >;
 
-multiclass FlatLoadPat_Hi16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt = i16> {
-  def : GCNPat <
-    (build_vector vt:$elt0, (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)))),
-    (v2i16 (inst $vaddr, $offset, 0, $slc, $elt0))
-  >;
-
- def : GCNPat <
-    (build_vector f16:$elt0, (f16 (bitconvert (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)))))),
-    (v2f16 (inst $vaddr, $offset, 0, $slc, $elt0))
-  >;
-}
-
-multiclass FlatSignedLoadPat_Hi16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt = i16> {
-  def : GCNPat <
-    (build_vector vt:$elt0, (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)))),
-    (v2i16 (inst $vaddr, $offset, 0, $slc, $elt0))
-  >;
-
- def : GCNPat <
-    (build_vector f16:$elt0, (f16 (bitconvert (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)))))),
-    (v2f16 (inst $vaddr, $offset, 0, $slc, $elt0))
-  >;
-}
-
-multiclass FlatLoadPat_Lo16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt = i16> {
-  def : GCNPat <
-    (build_vector (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))), (vt (Hi16Elt vt:$hi))),
-    (v2i16 (inst $vaddr, $offset, 0, $slc, $hi))
-  >;
-
- def : GCNPat <
-    (build_vector (f16 (bitconvert (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))))), (f16 (Hi16Elt f16:$hi))),
-    (v2f16 (inst $vaddr, $offset, 0, $slc, $hi))
-  >;
-}
-
-multiclass FlatSignedLoadPat_Lo16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt = i16> {
-  def : GCNPat <
-    (build_vector (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))), (vt (Hi16Elt vt:$hi))),
-    (v2i16 (inst $vaddr, $offset, 0, $slc, $hi))
-  >;
+class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+  (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in),
+  (inst $vaddr, $offset, 0, 0, $slc, $in)
+>;
 
- def : GCNPat <
-    (build_vector (f16 (bitconvert (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))))), (f16 (Hi16Elt f16:$hi))),
-    (v2f16 (inst $vaddr, $offset, 0, $slc, $hi))
-  >;
-}
+class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+  (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in),
+  (inst $vaddr, $offset, 0, 0, $slc, $in)
+>;
 
 class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
-  (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
-  (inst $vaddr, $offset, 0, $slc)
+  (vt (node (FLATAtomic (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))),
+  (inst $vaddr, $offset, 0, 0, $slc)
 >;
 
 class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
-  (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))),
-  (inst $vaddr, $offset, 0, $slc)
+  (vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))),
+  (inst $vaddr, $offset, 0, 0, $slc)
 >;
 
-class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
   (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)),
-  (inst $vaddr, $data, $offset, 0, $slc)
+  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
 >;
 
-class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
   (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)),
-  (inst $vaddr, $data, $offset, 0, $slc)
+  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
 >;
 
-class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
   // atomic store follows atomic binop convention so the address comes
   // first.
   (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
-  (inst $vaddr, $data, $offset, 0, $slc)
+  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
 >;
 
-class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
   // atomic store follows atomic binop convention so the address comes
   // first.
   (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
-  (inst $vaddr, $data, $offset, 0, $slc)
+  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
 >;
 
 class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
@@ -752,6 +754,11 @@ class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
   (inst $vaddr, $data, $offset, $slc)
 >;
 
+class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+  (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
+  (inst $vaddr, $data, $offset, $slc)
+>;
+
 class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
                      ValueType data_vt = vt> : GCNPat <
   (vt (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
@@ -760,28 +767,33 @@ class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType v
 
 let OtherPredicates = [HasFlatAddressSpace] in {
 
-def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_flat, i32>;
+def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
+def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
 def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
-def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_flat, i16>;
+def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
+def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
 def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
-def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_flat, i32>;
+def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
+def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
 def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
 def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
 def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, i32>;
 def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, v2i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
 def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, v4i32>;
 
-def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_load_flat, i32>;
-def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_flat, i64>;
+def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
+def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
 
 def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
 def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
 def : FlatStorePat <FLAT_STORE_DWORD, store_flat, i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, v2i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, v4i32>;
+def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, v2i32, VReg_64>;
+def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32, VReg_96>;
+def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, v4i32, VReg_128>;
 
-def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat, i32>;
-def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat, i64>;
+def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>;
+def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64, VReg_64>;
 
 def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
 def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
@@ -818,62 +830,77 @@ let OtherPredicates = [D16PreservesUnusedBits] in {
 def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
 def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
 
-let AddedComplexity = 3 in {
-defm : FlatLoadPat_Hi16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_flat>;
-defm : FlatLoadPat_Hi16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_flat>;
-defm : FlatLoadPat_Hi16 <FLAT_LOAD_SHORT_D16_HI, load_flat>;
-}
-
-let AddedComplexity = 9 in {
-defm : FlatLoadPat_Lo16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_flat>;
-defm : FlatLoadPat_Lo16 <FLAT_LOAD_SBYTE_D16, sextloadi8_flat>;
-defm : FlatLoadPat_Lo16 <FLAT_LOAD_SHORT_D16, load_flat>;
-}
+def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
+def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
+def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
+def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>;
+def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>;
+def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>;
+
+def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>;
+def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>;
+def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>;
+def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>;
+def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
+def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
 }
 
 } // End OtherPredicates = [HasFlatAddressSpace]
 
+def atomic_fadd_global    : global_binary_atomic_op_frag<SIglobal_atomic_fadd>;
+def atomic_pk_fadd_global : global_binary_atomic_op_frag<SIglobal_atomic_pk_fadd>;
+
 let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in {
 
-def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, az_extloadi8_global, i32>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
 def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
-def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, az_extloadi8_global, i16>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
 def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
-def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, az_extloadi16_global, i32>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
 def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
 def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, load_global, i16>;
 
 def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, i32>;
 def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, v2i32>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX3, load_global, v3i32>;
 def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, v4i32>;
 
-def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, atomic_load_global, i32>;
-def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, atomic_load_global, i64>;
+def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>;
+def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>;
 
-def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16>;
-def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16>;
-def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, i32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, v2i32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, v4i32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32, VGPR_32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16, VGPR_32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32, VGPR_32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16, VGPR_32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, i32, VGPR_32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, v2i32, VReg_64>;
+def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32, VReg_96>;
+def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, v4i32, VReg_128>;
 
 let OtherPredicates = [D16PreservesUnusedBits] in {
 def : FlatStoreSignedPat <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
 def : FlatStoreSignedPat <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>;
 
-defm : FlatSignedLoadPat_Hi16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_global>;
-defm : FlatSignedLoadPat_Hi16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_global>;
-defm : FlatSignedLoadPat_Hi16 <GLOBAL_LOAD_SHORT_D16_HI, load_global>;
-
-defm : FlatSignedLoadPat_Lo16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_global>;
-defm : FlatSignedLoadPat_Lo16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_global>;
-defm : FlatSignedLoadPat_Lo16 <GLOBAL_LOAD_SHORT_D16, load_global>;
-
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>;
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>;
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>;
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>;
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>;
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>;
+
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>;
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>;
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>;
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>;
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>;
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
 }
 
 def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, store_atomic_global, i32>;
-def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, store_atomic_global, i64>;
+def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, store_atomic_global, i64, VReg_64>;
 
 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_add_global, i32>;
 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
@@ -903,7 +930,10 @@ def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>;
 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
 
-} // End OtherPredicates = [HasFlatGlobalInsts]
+def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32,    atomic_fadd_global, f32>;
+def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_pk_fadd_global, v2f16>;
+
+} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
 
 
 //===----------------------------------------------------------------------===//
@@ -917,8 +947,8 @@ def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
 class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> :
   FLAT_Real <op, ps>,
   SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> {
-  let AssemblerPredicate = isCIOnly;
-  let DecoderNamespace="CI";
+  let AssemblerPredicate = isGFX7Only;
+  let DecoderNamespace="GFX7";
 }
 
 def FLAT_LOAD_UBYTE_ci         : FLAT_Real_ci <0x8,  FLAT_LOAD_UBYTE>;
@@ -985,8 +1015,8 @@ defm FLAT_ATOMIC_FMAX_X2       : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2
 class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps> :
   FLAT_Real <op, ps>,
   SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
-  let AssemblerPredicate = isVI;
-  let DecoderNamespace="VI";
+  let AssemblerPredicate = isGFX8GFX9;
+  let DecoderNamespace = "GFX8";
 }
 
 multiclass FLAT_Real_AllAddr_vi<bits<7> op> {
@@ -1133,3 +1163,200 @@ defm SCRATCH_STORE_DWORD        : FLAT_Real_AllAddr_vi <0x1c>;
 defm SCRATCH_STORE_DWORDX2      : FLAT_Real_AllAddr_vi <0x1d>;
 defm SCRATCH_STORE_DWORDX3      : FLAT_Real_AllAddr_vi <0x1e>;
 defm SCRATCH_STORE_DWORDX4      : FLAT_Real_AllAddr_vi <0x1f>;
+
+
+//===----------------------------------------------------------------------===//
+// GFX10.
+//===----------------------------------------------------------------------===//
+
+class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
+    FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> {
+  let AssemblerPredicate = isGFX10Plus;
+  let DecoderNamespace = "GFX10";
+
+  let Inst{11-0}  = {offset{12}, offset{10-0}};
+  let Inst{12}    = !if(ps.has_dlc, dlc, ps.dlcValue);
+  let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d);
+  let Inst{55}    = 0;
+}
+
+
+multiclass FLAT_Real_Base_gfx10<bits<7> op> {
+  def _gfx10 :
+    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>;
+}
+
+multiclass FLAT_Real_RTN_gfx10<bits<7> op> {
+  def _RTN_gfx10 :
+    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
+}
+
+multiclass FLAT_Real_SADDR_gfx10<bits<7> op> {
+  def _SADDR_gfx10 :
+    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
+}
+
+multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> {
+  def _SADDR_RTN_gfx10 :
+    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
+}
+
+
+multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> :
+  FLAT_Real_Base_gfx10<op>,
+  FLAT_Real_SADDR_gfx10<op>;
+
+multiclass FLAT_Real_Atomics_gfx10<bits<7> op> :
+  FLAT_Real_Base_gfx10<op>,
+  FLAT_Real_RTN_gfx10<op>;
+
+multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> :
+  FLAT_Real_AllAddr_gfx10<op>,
+  FLAT_Real_RTN_gfx10<op>,
+  FLAT_Real_SADDR_RTN_gfx10<op>;
+
+
+// ENC_FLAT.
+defm FLAT_LOAD_UBYTE            : FLAT_Real_Base_gfx10<0x008>;
+defm FLAT_LOAD_SBYTE            : FLAT_Real_Base_gfx10<0x009>;
+defm FLAT_LOAD_USHORT           : FLAT_Real_Base_gfx10<0x00a>;
+defm FLAT_LOAD_SSHORT           : FLAT_Real_Base_gfx10<0x00b>;
+defm FLAT_LOAD_DWORD            : FLAT_Real_Base_gfx10<0x00c>;
+defm FLAT_LOAD_DWORDX2          : FLAT_Real_Base_gfx10<0x00d>;
+defm FLAT_LOAD_DWORDX4          : FLAT_Real_Base_gfx10<0x00e>;
+defm FLAT_LOAD_DWORDX3          : FLAT_Real_Base_gfx10<0x00f>;
+defm FLAT_STORE_BYTE            : FLAT_Real_Base_gfx10<0x018>;
+defm FLAT_STORE_BYTE_D16_HI     : FLAT_Real_Base_gfx10<0x019>;
+defm FLAT_STORE_SHORT           : FLAT_Real_Base_gfx10<0x01a>;
+defm FLAT_STORE_SHORT_D16_HI    : FLAT_Real_Base_gfx10<0x01b>;
+defm FLAT_STORE_DWORD           : FLAT_Real_Base_gfx10<0x01c>;
+defm FLAT_STORE_DWORDX2         : FLAT_Real_Base_gfx10<0x01d>;
+defm FLAT_STORE_DWORDX4         : FLAT_Real_Base_gfx10<0x01e>;
+defm FLAT_STORE_DWORDX3         : FLAT_Real_Base_gfx10<0x01f>;
+defm FLAT_LOAD_UBYTE_D16        : FLAT_Real_Base_gfx10<0x020>;
+defm FLAT_LOAD_UBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x021>;
+defm FLAT_LOAD_SBYTE_D16        : FLAT_Real_Base_gfx10<0x022>;
+defm FLAT_LOAD_SBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x023>;
+defm FLAT_LOAD_SHORT_D16        : FLAT_Real_Base_gfx10<0x024>;
+defm FLAT_LOAD_SHORT_D16_HI     : FLAT_Real_Base_gfx10<0x025>;
+defm FLAT_ATOMIC_SWAP           : FLAT_Real_Atomics_gfx10<0x030>;
+defm FLAT_ATOMIC_CMPSWAP        : FLAT_Real_Atomics_gfx10<0x031>;
+defm FLAT_ATOMIC_ADD            : FLAT_Real_Atomics_gfx10<0x032>;
+defm FLAT_ATOMIC_SUB            : FLAT_Real_Atomics_gfx10<0x033>;
+defm FLAT_ATOMIC_SMIN           : FLAT_Real_Atomics_gfx10<0x035>;
+defm FLAT_ATOMIC_UMIN           : FLAT_Real_Atomics_gfx10<0x036>;
+defm FLAT_ATOMIC_SMAX           : FLAT_Real_Atomics_gfx10<0x037>;
+defm FLAT_ATOMIC_UMAX           : FLAT_Real_Atomics_gfx10<0x038>;
+defm FLAT_ATOMIC_AND            : FLAT_Real_Atomics_gfx10<0x039>;
+defm FLAT_ATOMIC_OR             : FLAT_Real_Atomics_gfx10<0x03a>;
+defm FLAT_ATOMIC_XOR            : FLAT_Real_Atomics_gfx10<0x03b>;
+defm FLAT_ATOMIC_INC            : FLAT_Real_Atomics_gfx10<0x03c>;
+defm FLAT_ATOMIC_DEC            : FLAT_Real_Atomics_gfx10<0x03d>;
+defm FLAT_ATOMIC_FCMPSWAP       : FLAT_Real_Atomics_gfx10<0x03e>;
+defm FLAT_ATOMIC_FMIN           : FLAT_Real_Atomics_gfx10<0x03f>;
+defm FLAT_ATOMIC_FMAX           : FLAT_Real_Atomics_gfx10<0x040>;
+defm FLAT_ATOMIC_SWAP_X2        : FLAT_Real_Atomics_gfx10<0x050>;
+defm FLAT_ATOMIC_CMPSWAP_X2     : FLAT_Real_Atomics_gfx10<0x051>;
+defm FLAT_ATOMIC_ADD_X2         : FLAT_Real_Atomics_gfx10<0x052>;
+defm FLAT_ATOMIC_SUB_X2         : FLAT_Real_Atomics_gfx10<0x053>;
+defm FLAT_ATOMIC_SMIN_X2        : FLAT_Real_Atomics_gfx10<0x055>;
+defm FLAT_ATOMIC_UMIN_X2        : FLAT_Real_Atomics_gfx10<0x056>;
+defm FLAT_ATOMIC_SMAX_X2        : FLAT_Real_Atomics_gfx10<0x057>;
+defm FLAT_ATOMIC_UMAX_X2        : FLAT_Real_Atomics_gfx10<0x058>;
+defm FLAT_ATOMIC_AND_X2         : FLAT_Real_Atomics_gfx10<0x059>;
+defm FLAT_ATOMIC_OR_X2          : FLAT_Real_Atomics_gfx10<0x05a>;
+defm FLAT_ATOMIC_XOR_X2         : FLAT_Real_Atomics_gfx10<0x05b>;
+defm FLAT_ATOMIC_INC_X2         : FLAT_Real_Atomics_gfx10<0x05c>;
+defm FLAT_ATOMIC_DEC_X2         : FLAT_Real_Atomics_gfx10<0x05d>;
+defm FLAT_ATOMIC_FCMPSWAP_X2    : FLAT_Real_Atomics_gfx10<0x05e>;
+defm FLAT_ATOMIC_FMIN_X2        : FLAT_Real_Atomics_gfx10<0x05f>;
+defm FLAT_ATOMIC_FMAX_X2        : FLAT_Real_Atomics_gfx10<0x060>;
+
+
+// ENC_FLAT_GLBL.
+defm GLOBAL_LOAD_UBYTE          : FLAT_Real_AllAddr_gfx10<0x008>;
+defm GLOBAL_LOAD_SBYTE          : FLAT_Real_AllAddr_gfx10<0x009>;
+defm GLOBAL_LOAD_USHORT         : FLAT_Real_AllAddr_gfx10<0x00a>;
+defm GLOBAL_LOAD_SSHORT         : FLAT_Real_AllAddr_gfx10<0x00b>;
+defm GLOBAL_LOAD_DWORD          : FLAT_Real_AllAddr_gfx10<0x00c>;
+defm GLOBAL_LOAD_DWORDX2        : FLAT_Real_AllAddr_gfx10<0x00d>;
+defm GLOBAL_LOAD_DWORDX4        : FLAT_Real_AllAddr_gfx10<0x00e>;
+defm GLOBAL_LOAD_DWORDX3        : FLAT_Real_AllAddr_gfx10<0x00f>;
+defm GLOBAL_STORE_BYTE          : FLAT_Real_AllAddr_gfx10<0x018>;
+defm GLOBAL_STORE_BYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x019>;
+defm GLOBAL_STORE_SHORT         : FLAT_Real_AllAddr_gfx10<0x01a>;
+defm GLOBAL_STORE_SHORT_D16_HI  : FLAT_Real_AllAddr_gfx10<0x01b>;
+defm GLOBAL_STORE_DWORD         : FLAT_Real_AllAddr_gfx10<0x01c>;
+defm GLOBAL_STORE_DWORDX2       : FLAT_Real_AllAddr_gfx10<0x01d>;
+defm GLOBAL_STORE_DWORDX4       : FLAT_Real_AllAddr_gfx10<0x01e>;
+defm GLOBAL_STORE_DWORDX3       : FLAT_Real_AllAddr_gfx10<0x01f>;
+defm GLOBAL_LOAD_UBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x020>;
+defm GLOBAL_LOAD_UBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x021>;
+defm GLOBAL_LOAD_SBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x022>;
+defm GLOBAL_LOAD_SBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x023>;
+defm GLOBAL_LOAD_SHORT_D16      : FLAT_Real_AllAddr_gfx10<0x024>;
+defm GLOBAL_LOAD_SHORT_D16_HI   : FLAT_Real_AllAddr_gfx10<0x025>;
+defm GLOBAL_ATOMIC_SWAP         : FLAT_Real_GlblAtomics_gfx10<0x030>;
+defm GLOBAL_ATOMIC_CMPSWAP      : FLAT_Real_GlblAtomics_gfx10<0x031>;
+defm GLOBAL_ATOMIC_ADD          : FLAT_Real_GlblAtomics_gfx10<0x032>;
+defm GLOBAL_ATOMIC_SUB          : FLAT_Real_GlblAtomics_gfx10<0x033>;
+defm GLOBAL_ATOMIC_SMIN         : FLAT_Real_GlblAtomics_gfx10<0x035>;
+defm GLOBAL_ATOMIC_UMIN         : FLAT_Real_GlblAtomics_gfx10<0x036>;
+defm GLOBAL_ATOMIC_SMAX         : FLAT_Real_GlblAtomics_gfx10<0x037>;
+defm GLOBAL_ATOMIC_UMAX         : FLAT_Real_GlblAtomics_gfx10<0x038>;
+defm GLOBAL_ATOMIC_AND          : FLAT_Real_GlblAtomics_gfx10<0x039>;
+defm GLOBAL_ATOMIC_OR           : FLAT_Real_GlblAtomics_gfx10<0x03a>;
+defm GLOBAL_ATOMIC_XOR          : FLAT_Real_GlblAtomics_gfx10<0x03b>;
+defm GLOBAL_ATOMIC_INC          : FLAT_Real_GlblAtomics_gfx10<0x03c>;
+defm GLOBAL_ATOMIC_DEC          : FLAT_Real_GlblAtomics_gfx10<0x03d>;
+defm GLOBAL_ATOMIC_FCMPSWAP     : FLAT_Real_GlblAtomics_gfx10<0x03e>;
+defm GLOBAL_ATOMIC_FMIN         : FLAT_Real_GlblAtomics_gfx10<0x03f>;
+defm GLOBAL_ATOMIC_FMAX         : FLAT_Real_GlblAtomics_gfx10<0x040>;
+defm GLOBAL_ATOMIC_SWAP_X2      : FLAT_Real_GlblAtomics_gfx10<0x050>;
+defm GLOBAL_ATOMIC_CMPSWAP_X2   : FLAT_Real_GlblAtomics_gfx10<0x051>;
+defm GLOBAL_ATOMIC_ADD_X2       : FLAT_Real_GlblAtomics_gfx10<0x052>;
+defm GLOBAL_ATOMIC_SUB_X2       : FLAT_Real_GlblAtomics_gfx10<0x053>;
+defm GLOBAL_ATOMIC_SMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x055>;
+defm GLOBAL_ATOMIC_UMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x056>;
+defm GLOBAL_ATOMIC_SMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x057>;
+defm GLOBAL_ATOMIC_UMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x058>;
+defm GLOBAL_ATOMIC_AND_X2       : FLAT_Real_GlblAtomics_gfx10<0x059>;
+defm GLOBAL_ATOMIC_OR_X2        : FLAT_Real_GlblAtomics_gfx10<0x05a>;
+defm GLOBAL_ATOMIC_XOR_X2       : FLAT_Real_GlblAtomics_gfx10<0x05b>;
+defm GLOBAL_ATOMIC_INC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05c>;
+defm GLOBAL_ATOMIC_DEC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05d>;
+defm GLOBAL_ATOMIC_FCMPSWAP_X2  : FLAT_Real_GlblAtomics_gfx10<0x05e>;
+defm GLOBAL_ATOMIC_FMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x05f>;
+defm GLOBAL_ATOMIC_FMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x060>;
+
+
+// ENC_FLAT_SCRATCH.
+defm SCRATCH_LOAD_UBYTE         : FLAT_Real_AllAddr_gfx10<0x008>;
+defm SCRATCH_LOAD_SBYTE         : FLAT_Real_AllAddr_gfx10<0x009>;
+defm SCRATCH_LOAD_USHORT        : FLAT_Real_AllAddr_gfx10<0x00a>;
+defm SCRATCH_LOAD_SSHORT        : FLAT_Real_AllAddr_gfx10<0x00b>;
+defm SCRATCH_LOAD_DWORD         : FLAT_Real_AllAddr_gfx10<0x00c>;
+defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_AllAddr_gfx10<0x00d>;
+defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_AllAddr_gfx10<0x00e>;
+defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_AllAddr_gfx10<0x00f>;
+defm SCRATCH_STORE_BYTE         : FLAT_Real_AllAddr_gfx10<0x018>;
+defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_AllAddr_gfx10<0x019>;
+defm SCRATCH_STORE_SHORT        : FLAT_Real_AllAddr_gfx10<0x01a>;
+defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>;
+defm SCRATCH_STORE_DWORD        : FLAT_Real_AllAddr_gfx10<0x01c>;
+defm SCRATCH_STORE_DWORDX2      : FLAT_Real_AllAddr_gfx10<0x01d>;
+defm SCRATCH_STORE_DWORDX4      : FLAT_Real_AllAddr_gfx10<0x01e>;
+defm SCRATCH_STORE_DWORDX3      : FLAT_Real_AllAddr_gfx10<0x01f>;
+defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_AllAddr_gfx10<0x020>;
+defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_AllAddr_gfx10<0x021>;
+defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_AllAddr_gfx10<0x022>;
+defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_AllAddr_gfx10<0x023>;
+defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_AllAddr_gfx10<0x024>;
+defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_AllAddr_gfx10<0x025>;
+
+let SubtargetPredicate = HasAtomicFaddInsts in {
+
+defm GLOBAL_ATOMIC_ADD_F32    : FLAT_Real_AllAddr_vi <0x04d>;
+defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Real_AllAddr_vi <0x04e>;
+
+} // End SubtargetPredicate = HasAtomicFaddInsts
diff --git a/lib/Target/AMDGPU/GCNDPPCombine.cpp b/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 56071d0d2374..e1845e2e8e87 100644
--- a/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -1,37 +1,40 @@
 //=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
-// operand.If any of the use instruction cannot be combined with the mov the
+// operand. If any of the use instruction cannot be combined with the mov the
 // whole sequence is reverted.
 //
 // $old = ...
 // $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
-//                            dpp_controls..., $bound_ctrl
-// $res = VALU $dpp_value, ...
+//                            dpp_controls..., $row_mask, $bank_mask, $bound_ctrl
+// $res = VALU $dpp_value [, src1]
 //
 // to
 //
-// $res = VALU_DPP $folded_old, $vgpr_to_be_read_from_other_lane, ...,
-//                 dpp_controls..., $folded_bound_ctrl
+// $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
+//                 dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
 //
 // Combining rules :
 //
-// $bound_ctrl is DPP_BOUND_ZERO, $old is any
-// $bound_ctrl is DPP_BOUND_OFF, $old is 0
+// if $row_mask and $bank_mask are fully enabled (0xF) and
+//    $bound_ctrl==DPP_BOUND_ZERO or $old==0
+// -> $combined_old = undef,
+//    $combined_bound_ctrl = DPP_BOUND_ZERO
 //
-// ->$folded_old = undef, $folded_bound_ctrl = DPP_BOUND_ZERO
-// $bound_ctrl is DPP_BOUND_OFF, $old is undef
+// if the VALU op is binary and
+//    $bound_ctrl==DPP_BOUND_OFF and
+//    $old==identity value (immediate) for the VALU op
+// -> $combined_old = src1,
+//    $combined_bound_ctrl = DPP_BOUND_OFF
 //
-// ->$folded_old = undef, $folded_bound_ctrl = DPP_BOUND_OFF
-// $bound_ctrl is DPP_BOUND_OFF, $old is foldable
+// Otherwise cancel.
 //
-// ->$folded_old = folded value, $folded_bound_ctrl = DPP_BOUND_OFF
+// The mov_dpp instruction should reside in the same BB as all its uses
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
@@ -67,20 +70,16 @@ class GCNDPPCombine : public MachineFunctionPass {
 
   MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
 
-  RegSubRegPair foldOldOpnd(MachineInstr &OrigMI,
-                            RegSubRegPair OldOpndVGPR,
-                            MachineOperand &OldOpndValue) const;
-
   MachineInstr *createDPPInst(MachineInstr &OrigMI,
                               MachineInstr &MovMI,
-                              RegSubRegPair OldOpndVGPR,
+                              RegSubRegPair CombOldVGPR,
                               MachineOperand *OldOpnd,
-                              bool BoundCtrlZero) const;
+                              bool CombBCZ) const;
 
   MachineInstr *createDPPInst(MachineInstr &OrigMI,
                               MachineInstr &MovMI,
-                              RegSubRegPair OldOpndVGPR,
-                              bool BoundCtrlZero) const;
+                              RegSubRegPair CombOldVGPR,
+                              bool CombBCZ) const;
 
   bool hasNoImmOrEqual(MachineInstr &MI,
                        unsigned OpndName,
@@ -153,8 +152,8 @@ MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
 
 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
                                            MachineInstr &MovMI,
-                                           RegSubRegPair OldOpndVGPR,
-                                           bool BoundCtrlZero) const {
+                                           RegSubRegPair CombOldVGPR,
+                                           bool CombBCZ) const {
   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
   assert(TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg() ==
          TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)->getReg());
@@ -178,9 +177,15 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
     const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
     if (OldIdx != -1) {
       assert(OldIdx == NumOperands);
-      assert(isOfRegClass(OldOpndVGPR, AMDGPU::VGPR_32RegClass, *MRI));
-      DPPInst.addReg(OldOpndVGPR.Reg, 0, OldOpndVGPR.SubReg);
+      assert(isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI));
+      DPPInst.addReg(CombOldVGPR.Reg, 0, CombOldVGPR.SubReg);
       ++NumOperands;
+    } else {
+      // TODO: this discards MAC/FMA instructions for now, let's add it later
+      LLVM_DEBUG(dbgs() << "  failed: no old operand in DPP instruction,"
+                           " TBD\n");
+      Fail = true;
+      break;
     }
 
     if (auto *Mod0 = TII->getNamedOperand(OrigMI,
@@ -199,6 +204,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
       break;
     }
     DPPInst.add(*Src0);
+    DPPInst->getOperand(NumOperands).setIsKill(false);
     ++NumOperands;
 
     if (auto *Mod1 = TII->getNamedOperand(OrigMI,
@@ -231,7 +237,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
-    DPPInst.addImm(BoundCtrlZero ? 1 : 0);
+    DPPInst.addImm(CombBCZ ? 1 : 0);
   } while (false);
 
   if (Fail) {
@@ -242,64 +248,81 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
   return DPPInst.getInstr();
 }
 
-GCNDPPCombine::RegSubRegPair
-GCNDPPCombine::foldOldOpnd(MachineInstr &OrigMI,
-                           RegSubRegPair OldOpndVGPR,
-                           MachineOperand &OldOpndValue) const {
-  assert(OldOpndValue.isImm());
-  switch (OrigMI.getOpcode()) {
+static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
+  assert(OldOpnd->isImm());
+  switch (OrigMIOp) {
   default: break;
+  case AMDGPU::V_ADD_U32_e32:
+  case AMDGPU::V_ADD_U32_e64:
+  case AMDGPU::V_ADD_I32_e32:
+  case AMDGPU::V_ADD_I32_e64:
+  case AMDGPU::V_OR_B32_e32:
+  case AMDGPU::V_OR_B32_e64:
+  case AMDGPU::V_SUBREV_U32_e32:
+  case AMDGPU::V_SUBREV_U32_e64:
+  case AMDGPU::V_SUBREV_I32_e32:
+  case AMDGPU::V_SUBREV_I32_e64:
   case AMDGPU::V_MAX_U32_e32:
-    if (OldOpndValue.getImm() == std::numeric_limits<uint32_t>::max())
-      return OldOpndVGPR;
+  case AMDGPU::V_MAX_U32_e64:
+  case AMDGPU::V_XOR_B32_e32:
+  case AMDGPU::V_XOR_B32_e64:
+    if (OldOpnd->getImm() == 0)
+      return true;
     break;
-  case AMDGPU::V_MAX_I32_e32:
-    if (OldOpndValue.getImm() == std::numeric_limits<int32_t>::max())
-      return OldOpndVGPR;
+  case AMDGPU::V_AND_B32_e32:
+  case AMDGPU::V_AND_B32_e64:
+  case AMDGPU::V_MIN_U32_e32:
+  case AMDGPU::V_MIN_U32_e64:
+    if (static_cast<uint32_t>(OldOpnd->getImm()) ==
+        std::numeric_limits<uint32_t>::max())
+      return true;
     break;
   case AMDGPU::V_MIN_I32_e32:
-    if (OldOpndValue.getImm() == std::numeric_limits<int32_t>::min())
-      return OldOpndVGPR;
+  case AMDGPU::V_MIN_I32_e64:
+    if (static_cast<int32_t>(OldOpnd->getImm()) ==
+        std::numeric_limits<int32_t>::max())
+      return true;
+    break;
+  case AMDGPU::V_MAX_I32_e32:
+  case AMDGPU::V_MAX_I32_e64:
+    if (static_cast<int32_t>(OldOpnd->getImm()) ==
+        std::numeric_limits<int32_t>::min())
+      return true;
     break;
-
   case AMDGPU::V_MUL_I32_I24_e32:
+  case AMDGPU::V_MUL_I32_I24_e64:
   case AMDGPU::V_MUL_U32_U24_e32:
-    if (OldOpndValue.getImm() == 1) {
-      auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
-      assert(Src1 && Src1->isReg());
-      return getRegSubRegPair(*Src1);
-    }
+  case AMDGPU::V_MUL_U32_U24_e64:
+    if (OldOpnd->getImm() == 1)
+      return true;
     break;
   }
-  return RegSubRegPair();
+  return false;
 }
 
-// Cases to combine:
-//  $bound_ctrl is DPP_BOUND_ZERO, $old is any
-//  $bound_ctrl is DPP_BOUND_OFF, $old is 0
-//  -> $old = undef, $bound_ctrl = DPP_BOUND_ZERO
-
-//  $bound_ctrl is DPP_BOUND_OFF, $old is undef
-//  -> $old = undef, $bound_ctrl = DPP_BOUND_OFF
-
-//  $bound_ctrl is DPP_BOUND_OFF, $old is foldable
-//  -> $old = folded value, $bound_ctrl = DPP_BOUND_OFF
-
 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
                                            MachineInstr &MovMI,
-                                           RegSubRegPair OldOpndVGPR,
+                                           RegSubRegPair CombOldVGPR,
                                            MachineOperand *OldOpndValue,
-                                           bool BoundCtrlZero) const {
-  assert(OldOpndVGPR.Reg);
-  if (!BoundCtrlZero && OldOpndValue) {
-    assert(OldOpndValue->isImm());
-    OldOpndVGPR = foldOldOpnd(OrigMI, OldOpndVGPR, *OldOpndValue);
-    if (!OldOpndVGPR.Reg) {
-      LLVM_DEBUG(dbgs() << "  failed: old immediate cannot be folded\n");
+                                           bool CombBCZ) const {
+  assert(CombOldVGPR.Reg);
+  if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {
+    auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
+    if (!Src1 || !Src1->isReg()) {
+      LLVM_DEBUG(dbgs() << "  failed: no src1 or it isn't a register\n");
+      return nullptr;
+    }
+    if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
+      LLVM_DEBUG(dbgs() << "  failed: old immediate isn't an identity\n");
+      return nullptr;
+    }
+    CombOldVGPR = getRegSubRegPair(*Src1);
+    if (!isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)) {
+      LLVM_DEBUG(dbgs() << "  failed: src1 isn't a VGPR32 register\n");
       return nullptr;
     }
   }
-  return createDPPInst(OrigMI, MovMI, OldOpndVGPR, BoundCtrlZero);
+  return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ);
 }
 
 // returns true if MI doesn't have OpndName immediate operand or the
@@ -316,31 +339,64 @@ bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
 
 bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
+  LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
+
+  auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
+  assert(DstOpnd && DstOpnd->isReg());
+  auto DPPMovReg = DstOpnd->getReg();
+  if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) {
+    LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
+                         " for all uses\n");
+    return false;
+  }
+
+  auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
+  assert(RowMaskOpnd && RowMaskOpnd->isImm());
+  auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
+  assert(BankMaskOpnd && BankMaskOpnd->isImm());
+  const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
+                            BankMaskOpnd->getImm() == 0xF;
+
   auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
   assert(BCZOpnd && BCZOpnd->isImm());
-  bool BoundCtrlZero = 0 != BCZOpnd->getImm();
-
-  LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
+  bool BoundCtrlZero = BCZOpnd->getImm();
 
   auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
   assert(OldOpnd && OldOpnd->isReg());
-  auto OldOpndVGPR = getRegSubRegPair(*OldOpnd);
-  auto *OldOpndValue = getOldOpndValue(*OldOpnd);
+
+  auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
+  // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
+  // We could use: assert(!OldOpndValue || OldOpndValue->isImm())
+  // but the third option is used to distinguish undef from non-immediate
+  // to reuse IMPLICIT_DEF instruction later
   assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
-  if (OldOpndValue) {
-    if (BoundCtrlZero) {
-      OldOpndVGPR.Reg = AMDGPU::NoRegister; // should be undef, ignore old opnd
-      OldOpndValue = nullptr;
-    } else {
-      if (!OldOpndValue->isImm()) {
-        LLVM_DEBUG(dbgs() << "  failed: old operand isn't an imm or undef\n");
-        return false;
-      }
-      if (OldOpndValue->getImm() == 0) {
-        OldOpndVGPR.Reg = AMDGPU::NoRegister; // should be undef
-        OldOpndValue = nullptr;
-        BoundCtrlZero = true;
+
+  bool CombBCZ = false;
+
+  if (MaskAllLanes && BoundCtrlZero) { // [1]
+    CombBCZ = true;
+  } else {
+    if (!OldOpndValue || !OldOpndValue->isImm()) {
+      LLVM_DEBUG(dbgs() << "  failed: the DPP mov isn't combinable\n");
+      return false;
+    }
+
+    if (OldOpndValue->getParent()->getParent() != MovMI.getParent()) {
+      LLVM_DEBUG(dbgs() <<
+        "  failed: old reg def and mov should be in the same BB\n");
+      return false;
+    }
+
+    if (OldOpndValue->getImm() == 0) {
+      if (MaskAllLanes) {
+        assert(!BoundCtrlZero); // by check [1]
+        CombBCZ = true;
       }
+    } else if (BoundCtrlZero) {
+      assert(!MaskAllLanes); // by check [1]
+      LLVM_DEBUG(dbgs() <<
+        "  failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
+      return false;
     }
   }
 
@@ -348,25 +404,28 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
     if (!OldOpndValue)
       dbgs() << "undef";
     else
-      dbgs() << OldOpndValue->getImm();
-    dbgs() << ", bound_ctrl=" << BoundCtrlZero << '\n');
-
-  std::vector<MachineInstr*> OrigMIs, DPPMIs;
-  if (!OldOpndVGPR.Reg) { // OldOpndVGPR = undef
-    OldOpndVGPR = RegSubRegPair(
+      dbgs() << *OldOpndValue;
+    dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
+
+  SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
+  auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
+  // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
+  if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
+    CombOldVGPR = RegSubRegPair(
       MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass));
     auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
-                             TII->get(AMDGPU::IMPLICIT_DEF), OldOpndVGPR.Reg);
+                             TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);
     DPPMIs.push_back(UndefInst.getInstr());
   }
 
   OrigMIs.push_back(&MovMI);
   bool Rollback = true;
-  for (auto &Use : MRI->use_nodbg_operands(
-       TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg())) {
+  for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
     Rollback = true;
 
     auto &OrigMI = *Use.getParent();
+    LLVM_DEBUG(dbgs() << "  try: " << OrigMI);
+
     auto OrigOp = OrigMI.getOpcode();
     if (TII->isVOP3(OrigOp)) {
       if (!TII->hasVALU32BitEncoding(OrigOp)) {
@@ -389,8 +448,8 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
 
     LLVM_DEBUG(dbgs() << "  combining: " << OrigMI);
     if (&Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
-      if (auto *DPPInst = createDPPInst(OrigMI, MovMI, OldOpndVGPR,
-                                        OldOpndValue, BoundCtrlZero)) {
+      if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
+                                        OldOpndValue, CombBCZ)) {
         DPPMIs.push_back(DPPInst);
         Rollback = false;
       }
@@ -401,8 +460,8 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
       BB->insert(OrigMI, NewMI);
       if (TII->commuteInstruction(*NewMI)) {
         LLVM_DEBUG(dbgs() << "  commuted:  " << *NewMI);
-        if (auto *DPPInst = createDPPInst(*NewMI, MovMI, OldOpndVGPR,
-                                          OldOpndValue, BoundCtrlZero)) {
+        if (auto *DPPInst = createDPPInst(*NewMI, MovMI, CombOldVGPR,
+                                          OldOpndValue, CombBCZ)) {
           DPPMIs.push_back(DPPInst);
           Rollback = false;
         }
diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index c6396de89c4f..885239e2faed 100644
--- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1,9 +1,8 @@
 //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -21,6 +20,7 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/MC/MCInstrDesc.h"
@@ -38,6 +38,7 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
+  IsHazardRecognizerMode(false),
   CurrCycleInstr(nullptr),
   MF(MF),
   ST(MF.getSubtarget<GCNSubtarget>()),
@@ -45,7 +46,8 @@ GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
   TRI(TII.getRegisterInfo()),
   ClauseUses(TRI.getNumRegUnits()),
   ClauseDefs(TRI.getNumRegUnits()) {
-  MaxLookAhead = 5;
+  MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 18 : 5;
+  TSchedModel.init(&ST);
 }
 
 void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
@@ -88,18 +90,38 @@ static bool isSMovRel(unsigned Opcode) {
   }
 }
 
-static bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) {
+static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
+                                    const MachineInstr &MI) {
+  if (TII.isAlwaysGDS(MI.getOpcode()))
+    return true;
+
   switch (MI.getOpcode()) {
   case AMDGPU::S_SENDMSG:
   case AMDGPU::S_SENDMSGHALT:
   case AMDGPU::S_TTRACEDATA:
     return true;
+  // These DS opcodes don't support GDS.
+  case AMDGPU::DS_NOP:
+  case AMDGPU::DS_PERMUTE_B32:
+  case AMDGPU::DS_BPERMUTE_B32:
+    return false;
   default:
-    // TODO: GDS
+    if (TII.isDS(MI.getOpcode())) {
+      int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+                                           AMDGPU::OpName::gds);
+      if (MI.getOperand(GDS).getImm())
+        return true;
+    }
     return false;
   }
 }
 
+static bool isPermlane(const MachineInstr &MI) {
+  unsigned Opcode = MI.getOpcode();
+  return Opcode == AMDGPU::V_PERMLANE16_B32 ||
+         Opcode == AMDGPU::V_PERMLANEX16_B32;
+}
+
 static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
   const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
                                                      AMDGPU::OpName::simm16);
@@ -109,6 +131,8 @@ static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
 ScheduleHazardRecognizer::HazardType
 GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
   MachineInstr *MI = SU->getInstr();
+  if (MI->isBundle())
+   return NoHazard;
 
   if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
     return NoopHazard;
@@ -119,6 +143,15 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
       && checkVMEMHazards(MI) > 0)
     return NoopHazard;
 
+  if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
+    return NoopHazard;
+
+  if (checkFPAtomicToDenormModeHazard(MI) > 0)
+    return NoopHazard;
+
+  if (ST.hasNoDataDepHazard())
+    return NoHazard;
+
   if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
     return NoopHazard;
 
@@ -145,10 +178,16 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
       checkReadM0Hazards(MI) > 0)
     return NoopHazard;
 
-  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) &&
+  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
       checkReadM0Hazards(MI) > 0)
     return NoopHazard;
 
+  if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
+    return NoopHazard;
+
+  if ((MI->mayLoad() || MI->mayStore()) && checkMAILdStHazards(MI) > 0)
+    return NoopHazard;
+
   if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
     return NoopHazard;
 
@@ -158,22 +197,74 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
   return NoHazard;
 }
 
+static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) {
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
+      .addImm(0);
+}
+
+void GCNHazardRecognizer::processBundle() {
+  MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator());
+  MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end();
+  // Check bundled MachineInstr's for hazards.
+  for (; MI != E && MI->isInsideBundle(); ++MI) {
+    CurrCycleInstr = &*MI;
+    unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
+
+    if (IsHazardRecognizerMode)
+      fixHazards(CurrCycleInstr);
+
+    for (unsigned i = 0; i < WaitStates; ++i)
+      insertNoopInBundle(CurrCycleInstr, TII);
+
+    // It’s unnecessary to track more than MaxLookAhead instructions. Since we
+    // include the bundled MI directly after, only add a maximum of
+    // (MaxLookAhead - 1) noops to EmittedInstrs.
+    for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i)
+      EmittedInstrs.push_front(nullptr);
+
+    EmittedInstrs.push_front(CurrCycleInstr);
+    EmittedInstrs.resize(MaxLookAhead);
+  }
+  CurrCycleInstr = nullptr;
+}
+
 unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
-  return PreEmitNoops(SU->getInstr());
+  IsHazardRecognizerMode = false;
+  return PreEmitNoopsCommon(SU->getInstr());
 }
 
 unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
+  IsHazardRecognizerMode = true;
+  CurrCycleInstr = MI;
+  unsigned W = PreEmitNoopsCommon(MI);
+  fixHazards(MI);
+  CurrCycleInstr = nullptr;
+  return W;
+}
+
+unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
+  if (MI->isBundle())
+    return 0;
+
   int WaitStates = std::max(0, checkAnyInstHazards(MI));
 
   if (SIInstrInfo::isSMRD(*MI))
     return std::max(WaitStates, checkSMRDHazards(MI));
 
-  if (SIInstrInfo::isVALU(*MI))
-    WaitStates = std::max(WaitStates, checkVALUHazards(MI));
-
   if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
     WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
 
+  if (ST.hasNSAtoVMEMBug())
+    WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
+
+  WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI));
+
+  if (ST.hasNoDataDepHazard())
+    return WaitStates;
+
+  if (SIInstrInfo::isVALU(*MI))
+    WaitStates = std::max(WaitStates, checkVALUHazards(MI));
+
   if (SIInstrInfo::isDPP(*MI))
     WaitStates = std::max(WaitStates, checkDPPHazards(MI));
 
@@ -199,9 +290,15 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
                                            isSMovRel(MI->getOpcode())))
     return std::max(WaitStates, checkReadM0Hazards(MI));
 
-  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI))
+  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
     return std::max(WaitStates, checkReadM0Hazards(MI));
 
+  if (SIInstrInfo::isMAI(*MI))
+    return std::max(WaitStates, checkMAIHazards(MI));
+
+  if (MI->mayLoad() || MI->mayStore())
+    return std::max(WaitStates, checkMAILdStHazards(MI));
+
   return WaitStates;
 }
 
@@ -218,10 +315,14 @@ void GCNHazardRecognizer::AdvanceCycle() {
   // Do not track non-instructions which do not affect the wait states.
   // If included, these instructions can lead to buffer overflow such that
   // detectable hazards are missed.
-  if (CurrCycleInstr->getOpcode() == AMDGPU::IMPLICIT_DEF)
+  if (CurrCycleInstr->isImplicitDef() || CurrCycleInstr->isDebugInstr() ||
+      CurrCycleInstr->isKill())
     return;
-  else if (CurrCycleInstr->isDebugInstr())
+
+  if (CurrCycleInstr->isBundle()) {
+    processBundle();
     return;
+  }
 
   unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
 
@@ -252,41 +353,112 @@ void GCNHazardRecognizer::RecedeCycle() {
 // Helper Functions
 //===----------------------------------------------------------------------===//
 
-int GCNHazardRecognizer::getWaitStatesSince(
-    function_ref<bool(MachineInstr *)> IsHazard) {
+typedef function_ref<bool(MachineInstr *, int WaitStates)> IsExpiredFn;
+
+// Returns a minimum wait states since \p I walking all predecessors.
+// Only scans until \p IsExpired does not return true.
+// Can only be run in a hazard recognizer mode.
+static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
+                              MachineBasicBlock *MBB,
+                              MachineBasicBlock::reverse_instr_iterator I,
+                              int WaitStates,
+                              IsExpiredFn IsExpired,
+                              DenseSet<const MachineBasicBlock *> &Visited) {
+  for (auto E = MBB->instr_rend(); I != E; ++I) {
+    // Don't add WaitStates for parent BUNDLE instructions.
+    if (I->isBundle())
+      continue;
+
+    if (IsHazard(&*I))
+      return WaitStates;
+
+    if (I->isInlineAsm() || I->isImplicitDef() || I->isDebugInstr())
+      continue;
+
+    WaitStates += SIInstrInfo::getNumWaitStates(*I);
+
+    if (IsExpired(&*I, WaitStates))
+      return std::numeric_limits<int>::max();
+  }
+
+  int MinWaitStates = WaitStates;
+  bool Found = false;
+  for (MachineBasicBlock *Pred : MBB->predecessors()) {
+    if (!Visited.insert(Pred).second)
+      continue;
+
+    int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(),
+                               WaitStates, IsExpired, Visited);
+
+    if (W == std::numeric_limits<int>::max())
+      continue;
+
+    MinWaitStates = Found ? std::min(MinWaitStates, W) : W;
+    if (IsExpired(nullptr, MinWaitStates))
+      return MinWaitStates;
+
+    Found = true;
+  }
+
+  if (Found)
+    return MinWaitStates;
+
+  return std::numeric_limits<int>::max();
+}
+
+static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
+                              MachineInstr *MI,
+                              IsExpiredFn IsExpired) {
+  DenseSet<const MachineBasicBlock *> Visited;
+  return getWaitStatesSince(IsHazard, MI->getParent(),
+                            std::next(MI->getReverseIterator()),
+                            0, IsExpired, Visited);
+}
+
+int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
+  if (IsHazardRecognizerMode) {
+    auto IsExpiredFn = [Limit] (MachineInstr *, int WaitStates) {
+      return WaitStates >= Limit;
+    };
+    return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn);
+  }
+
   int WaitStates = 0;
   for (MachineInstr *MI : EmittedInstrs) {
     if (MI) {
       if (IsHazard(MI))
         return WaitStates;
 
-      unsigned Opcode = MI->getOpcode();
-      if (Opcode == AMDGPU::INLINEASM)
+      if (MI->isInlineAsm())
         continue;
     }
     ++WaitStates;
+
+    if (WaitStates >= Limit)
+      break;
   }
   return std::numeric_limits<int>::max();
 }
 
-int GCNHazardRecognizer::getWaitStatesSinceDef(
-    unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) {
+int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
+                                               IsHazardFn IsHazardDef,
+                                               int Limit) {
   const SIRegisterInfo *TRI = ST.getRegisterInfo();
 
   auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) {
     return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI);
   };
 
-  return getWaitStatesSince(IsHazardFn);
+  return getWaitStatesSince(IsHazardFn, Limit);
 }
 
-int GCNHazardRecognizer::getWaitStatesSinceSetReg(
-    function_ref<bool(MachineInstr *)> IsHazard) {
+int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
+                                                  int Limit) {
   auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
     return isSSetReg(MI->getOpcode()) && IsHazard(MI);
   };
 
-  return getWaitStatesSince(IsHazardFn);
+  return getWaitStatesSince(IsHazardFn, Limit);
 }
 
 //===----------------------------------------------------------------------===//
@@ -328,9 +500,9 @@ int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
   // instructions in this group may return out of order and/or may be
   // replayed (i.e. the same instruction issued more than once).
   //
-  // In order to handle these situations correctly we need to make sure
-  // that when a clause has more than one instruction, no instruction in the
-  // clause writes to a register that is read another instruction in the clause
+  // In order to handle these situations correctly we need to make sure that
+  // when a clause has more than one instruction, no instruction in the clause
+  // writes to a register that is read by another instruction in the clause
   // (including itself). If we encounter this situaion, we need to break the
   // clause by inserting a non SMEM instruction.
 
@@ -363,13 +535,12 @@ int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
 }
 
 int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
-  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   int WaitStatesNeeded = 0;
 
   WaitStatesNeeded = checkSoftClauseHazards(SMRD);
 
   // This SMRD hazard only affects SI.
-  if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS)
+  if (!ST.hasSMRDReadVALUDefHazard())
     return WaitStatesNeeded;
 
   // A read of an SGPR by SMRD instruction requires 4 wait states when the
@@ -384,7 +555,8 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
     if (!Use.isReg())
       continue;
     int WaitStatesNeededForUse =
-        SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
+        SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
+                                                   SmrdSgprWaitStates);
     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
 
     // This fixes what appears to be undocumented hardware behavior in SI where
@@ -397,7 +569,8 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
     if (IsBufferSMRD) {
       int WaitStatesNeededForUse =
         SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
-                                                   IsBufferHazardDefFn);
+                                                   IsBufferHazardDefFn,
+                                                   SmrdSgprWaitStates);
       WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
     }
   }
@@ -406,7 +579,7 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
 }
 
 int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
-  if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+  if (!ST.hasVMEMReadSGPRVALUDefHazard())
     return 0;
 
   int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
@@ -415,13 +588,13 @@ int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
   // SGPR was written by a VALU Instruction.
   const int VmemSgprWaitStates = 5;
   auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
-
   for (const MachineOperand &Use : VMEM->uses()) {
     if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
       continue;
 
     int WaitStatesNeededForUse =
-        VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
+        VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
+                                                   VmemSgprWaitStates);
     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
   }
   return WaitStatesNeeded;
@@ -441,13 +614,16 @@ int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
     if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
       continue;
     int WaitStatesNeededForUse =
-        DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg());
+        DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
+                              [](MachineInstr *) { return true; },
+                              DppVgprWaitStates);
     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
   }
 
   WaitStatesNeeded = std::max(
       WaitStatesNeeded,
-      DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn));
+      DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn,
+                                                DppExecWaitStates));
 
   return WaitStatesNeeded;
 }
@@ -459,7 +635,8 @@ int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
   // instruction.
   const int DivFMasWaitStates = 4;
   auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
-  int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn);
+  int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn,
+                                               DivFMasWaitStates);
 
   return DivFMasWaitStates - WaitStatesNeeded;
 }
@@ -472,7 +649,7 @@ int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
   auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
     return GetRegHWReg == getHWReg(TII, *MI);
   };
-  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
+  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates);
 
   return GetRegWaitStates - WaitStatesNeeded;
 }
@@ -481,12 +658,11 @@ int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
   const SIInstrInfo *TII = ST.getInstrInfo();
   unsigned HWReg = getHWReg(TII, *SetRegInstr);
 
-  const int SetRegWaitStates =
-      ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ? 1 : 2;
+  const int SetRegWaitStates = ST.getSetRegWaitStates();
   auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) {
     return HWReg == getHWReg(TII, *MI);
   };
-  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
+  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates);
   return SetRegWaitStates - WaitStatesNeeded;
 }
 
@@ -557,7 +733,7 @@ int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
     TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
   };
   int WaitStatesNeededForDef =
-    VALUWaitStates - getWaitStatesSince(IsHazardFn);
+    VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates);
   WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
 
   return WaitStatesNeeded;
@@ -622,12 +798,13 @@ int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
   };
 
   const int RWLaneWaitStates = 4;
-  int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn);
+  int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn,
+                                              RWLaneWaitStates);
   return RWLaneWaitStates - WaitStatesSince;
 }
 
 int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
-  if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+  if (!ST.hasRFEHazards())
     return 0;
 
   const SIInstrInfo *TII = ST.getInstrInfo();
@@ -637,7 +814,7 @@ int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
   auto IsHazardFn = [TII] (MachineInstr *MI) {
     return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS;
   };
-  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
+  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates);
   return RFEWaitStates - WaitStatesNeeded;
 }
 
@@ -661,7 +838,8 @@ int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
       return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
     };
     int WaitStatesNeededForUse =
-        MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn);
+        MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn,
+                                                 MovFedWaitStates);
     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
   }
 
@@ -674,5 +852,557 @@ int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
   auto IsHazardFn = [TII] (MachineInstr *MI) {
     return TII->isSALU(*MI);
   };
-  return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn);
+  return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
+                                                   SMovRelWaitStates);
+}
+
+void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
+  fixVMEMtoScalarWriteHazards(MI);
+  fixVcmpxPermlaneHazards(MI);
+  fixSMEMtoVectorWriteHazards(MI);
+  fixVcmpxExecWARHazard(MI);
+  fixLdsBranchVmemWARHazard(MI);
+}
+
+bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
+  if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI))
+    return false;
+
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  auto IsHazardFn = [TII] (MachineInstr *MI) {
+    return TII->isVOPC(*MI);
+  };
+
+  auto IsExpiredFn = [] (MachineInstr *MI, int) {
+    if (!MI)
+      return false;
+    unsigned Opc = MI->getOpcode();
+    return SIInstrInfo::isVALU(*MI) &&
+           Opc != AMDGPU::V_NOP_e32 &&
+           Opc != AMDGPU::V_NOP_e64 &&
+           Opc != AMDGPU::V_NOP_sdwa;
+  };
+
+  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+      std::numeric_limits<int>::max())
+    return false;
+
+  // V_NOP will be discarded by SQ.
+  // Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE*
+  // which is always a VGPR and available.
+  auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
+  unsigned Reg = Src0->getReg();
+  bool IsUndef = Src0->isUndef();
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+          TII->get(AMDGPU::V_MOV_B32_e32))
+    .addReg(Reg, RegState::Define | (IsUndef ? RegState::Dead : 0))
+    .addReg(Reg, IsUndef ? RegState::Undef : RegState::Kill);
+
+  return true;
+}
+
+bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
+  if (!ST.hasVMEMtoScalarWriteHazard())
+    return false;
+
+  if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI))
+    return false;
+
+  if (MI->getNumDefs() == 0)
+    return false;
+
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+  auto IsHazardFn = [TRI, MI] (MachineInstr *I) {
+    if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isDS(*I) &&
+        !SIInstrInfo::isFLAT(*I))
+      return false;
+
+    for (const MachineOperand &Def : MI->defs()) {
+      MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI);
+      if (!Op)
+        continue;
+      return true;
+    }
+    return false;
+  };
+
+  auto IsExpiredFn = [] (MachineInstr *MI, int) {
+    return MI && (SIInstrInfo::isVALU(*MI) ||
+                  (MI->getOpcode() == AMDGPU::S_WAITCNT &&
+                   !MI->getOperand(0).getImm()));
+  };
+
+  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+      std::numeric_limits<int>::max())
+    return false;
+
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
+  return true;
+}
+
+bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
+  if (!ST.hasSMEMtoVectorWriteHazard())
+    return false;
+
+  if (!SIInstrInfo::isVALU(*MI))
+    return false;
+
+  unsigned SDSTName;
+  switch (MI->getOpcode()) {
+  case AMDGPU::V_READLANE_B32:
+  case AMDGPU::V_READFIRSTLANE_B32:
+    SDSTName = AMDGPU::OpName::vdst;
+    break;
+  default:
+    SDSTName = AMDGPU::OpName::sdst;
+    break;
+  }
+
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU());
+  const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
+  if (!SDST) {
+    for (const auto &MO : MI->implicit_operands()) {
+      if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) {
+        SDST = &MO;
+        break;
+      }
+    }
+  }
+
+  if (!SDST)
+    return false;
+
+  const unsigned SDSTReg = SDST->getReg();
+  auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) {
+    return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI);
+  };
+
+  auto IsExpiredFn = [TII, IV] (MachineInstr *MI, int) {
+    if (MI) {
+      if (TII->isSALU(*MI)) {
+        switch (MI->getOpcode()) {
+        case AMDGPU::S_SETVSKIP:
+        case AMDGPU::S_VERSION:
+        case AMDGPU::S_WAITCNT_VSCNT:
+        case AMDGPU::S_WAITCNT_VMCNT:
+        case AMDGPU::S_WAITCNT_EXPCNT:
+          // These instructions cannot not mitigate the hazard.
+          return false;
+        case AMDGPU::S_WAITCNT_LGKMCNT:
+          // Reducing lgkmcnt count to 0 always mitigates the hazard.
+          return (MI->getOperand(1).getImm() == 0) &&
+                 (MI->getOperand(0).getReg() == AMDGPU::SGPR_NULL);
+        case AMDGPU::S_WAITCNT: {
+          const int64_t Imm = MI->getOperand(0).getImm();
+          AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm);
+          return (Decoded.LgkmCnt == 0);
+        }
+        default:
+          // SOPP instructions cannot mitigate the hazard.
+          if (TII->isSOPP(*MI))
+            return false;
+          // At this point the SALU can be assumed to mitigate the hazard
+          // because either:
+          // (a) it is independent of the at risk SMEM (breaking chain),
+          // or
+          // (b) it is dependent on the SMEM, in which case an appropriate
+          //     s_waitcnt lgkmcnt _must_ exist between it and the at risk
+          //     SMEM instruction.
+          return true;
+        }
+      }
+    }
+    return false;
+  };
+
+  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+      std::numeric_limits<int>::max())
+    return false;
+
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+          TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL)
+      .addImm(0);
+  return true;
+}
+
+bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
+  if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI))
+    return false;
+
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  if (!MI->modifiesRegister(AMDGPU::EXEC, TRI))
+    return false;
+
+  auto IsHazardFn = [TRI] (MachineInstr *I) {
+    if (SIInstrInfo::isVALU(*I))
+      return false;
+    return I->readsRegister(AMDGPU::EXEC, TRI);
+  };
+
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  auto IsExpiredFn = [TII, TRI] (MachineInstr *MI, int) {
+    if (!MI)
+      return false;
+    if (SIInstrInfo::isVALU(*MI)) {
+      if (TII->getNamedOperand(*MI, AMDGPU::OpName::sdst))
+        return true;
+      for (auto MO : MI->implicit_operands())
+        if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg())))
+          return true;
+    }
+    if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
+        (MI->getOperand(0).getImm() & 0xfffe) == 0xfffe)
+      return true;
+    return false;
+  };
+
+  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+      std::numeric_limits<int>::max())
+    return false;
+
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+          TII->get(AMDGPU::S_WAITCNT_DEPCTR))
+    .addImm(0xfffe);
+  return true;
+}
+
+bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
+  if (!ST.hasLdsBranchVmemWARHazard())
+    return false;
+
+  auto IsHazardInst = [] (const MachineInstr *MI) {
+    if (SIInstrInfo::isDS(*MI))
+      return 1;
+    if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSegmentSpecificFLAT(*MI))
+      return 2;
+    return 0;
+  };
+
+  auto InstType = IsHazardInst(MI);
+  if (!InstType)
+    return false;
+
+  auto IsExpiredFn = [&IsHazardInst] (MachineInstr *I, int) {
+    return I && (IsHazardInst(I) ||
+                 (I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
+                  I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
+                  !I->getOperand(1).getImm()));
+  };
+
+  auto IsHazardFn = [InstType, &IsHazardInst] (MachineInstr *I) {
+    if (!I->isBranch())
+      return false;
+
+    auto IsHazardFn = [InstType, IsHazardInst] (MachineInstr *I) {
+      auto InstType2 = IsHazardInst(I);
+      return InstType2 && InstType != InstType2;
+    };
+
+    auto IsExpiredFn = [InstType, &IsHazardInst] (MachineInstr *I, int) {
+      if (!I)
+        return false;
+
+      auto InstType2 = IsHazardInst(I);
+      if (InstType == InstType2)
+        return true;
+
+      return I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
+             I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
+             !I->getOperand(1).getImm();
+    };
+
+    return ::getWaitStatesSince(IsHazardFn, I, IsExpiredFn) !=
+           std::numeric_limits<int>::max();
+  };
+
+  if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+      std::numeric_limits<int>::max())
+    return false;
+
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+          TII->get(AMDGPU::S_WAITCNT_VSCNT))
+    .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
+    .addImm(0);
+
+  return true;
+}
+
+int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
+  int NSAtoVMEMWaitStates = 1;
+
+  if (!ST.hasNSAtoVMEMBug())
+    return 0;
+
+  if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI))
+    return 0;
+
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
+  if (!Offset || (Offset->getImm() & 6) == 0)
+    return 0;
+
+  auto IsHazardFn = [TII] (MachineInstr *I) {
+    if (!SIInstrInfo::isMIMG(*I))
+      return false;
+    const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I->getOpcode());
+    return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA &&
+           TII->getInstSizeInBytes(*I) >= 16;
+  };
+
+  return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
+}
+
+int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
+  int FPAtomicToDenormModeWaitStates = 3;
+
+  if (MI->getOpcode() != AMDGPU::S_DENORM_MODE)
+    return 0;
+
+  auto IsHazardFn = [] (MachineInstr *I) {
+    if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isFLAT(*I))
+      return false;
+    return SIInstrInfo::isFPAtomic(*I);
+  };
+
+  auto IsExpiredFn = [] (MachineInstr *MI, int WaitStates) {
+    if (WaitStates >= 3 || SIInstrInfo::isVALU(*MI))
+      return true;
+
+    switch (MI->getOpcode()) {
+    case AMDGPU::S_WAITCNT:
+    case AMDGPU::S_WAITCNT_VSCNT:
+    case AMDGPU::S_WAITCNT_VMCNT:
+    case AMDGPU::S_WAITCNT_EXPCNT:
+    case AMDGPU::S_WAITCNT_LGKMCNT:
+    case AMDGPU::S_WAITCNT_IDLE:
+      return true;
+    default:
+      break;
+    }
+
+    return false;
+  };
+
+
+  return FPAtomicToDenormModeWaitStates -
+         ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
+}
+
+int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
+  assert(SIInstrInfo::isMAI(*MI));
+
+  int WaitStatesNeeded = 0;
+  unsigned Opc = MI->getOpcode();
+
+  auto IsVALUFn = [] (MachineInstr *MI) {
+    return SIInstrInfo::isVALU(*MI);
+  };
+
+  if (Opc != AMDGPU::V_ACCVGPR_READ_B32) { // MFMA or v_accvgpr_write
+    const int LegacyVALUWritesVGPRWaitStates = 2;
+    const int VALUWritesExecWaitStates = 4;
+    const int MaxWaitStates = 4;
+
+    int WaitStatesNeededForUse = VALUWritesExecWaitStates -
+      getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates);
+    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+    if (WaitStatesNeeded < MaxWaitStates) {
+      for (const MachineOperand &Use : MI->explicit_uses()) {
+        const int MaxWaitStates = 2;
+
+        if (!Use.isReg() || !TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
+          continue;
+
+        int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates -
+          getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates);
+        WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+        if (WaitStatesNeeded == MaxWaitStates)
+          break;
+      }
+    }
+  }
+
+  auto IsMFMAFn = [] (MachineInstr *MI) {
+    return SIInstrInfo::isMAI(*MI) &&
+           MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32 &&
+           MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32;
+  };
+
+  for (const MachineOperand &Op : MI->explicit_operands()) {
+    if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg()))
+      continue;
+
+    if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32)
+      continue;
+
+    const int MFMAWritesAGPROverlappedSrcABWaitStates = 4;
+    const int MFMAWritesAGPROverlappedSrcCWaitStates = 2;
+    const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4;
+    const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10;
+    const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18;
+    const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1;
+    const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7;
+    const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15;
+    const int MaxWaitStates = 18;
+    unsigned Reg = Op.getReg();
+    unsigned HazardDefLatency = 0;
+
+    auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, this]
+                              (MachineInstr *MI) {
+      if (!IsMFMAFn(MI))
+        return false;
+      unsigned DstReg = MI->getOperand(0).getReg();
+      if (DstReg == Reg)
+        return false;
+      HazardDefLatency = std::max(HazardDefLatency,
+                                  TSchedModel.computeInstrLatency(MI));
+      return TRI.regsOverlap(DstReg, Reg);
+    };
+
+    int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn,
+                                                   MaxWaitStates);
+    int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates;
+    int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
+    int OpNo = MI->getOperandNo(&Op);
+    if (OpNo == SrcCIdx) {
+      NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates;
+    } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32) {
+      switch (HazardDefLatency) {
+      case 2:  NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates;
+               break;
+      case 8:  NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates;
+               break;
+      case 16: LLVM_FALLTHROUGH;
+      default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates;
+               break;
+      }
+    } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
+      switch (HazardDefLatency) {
+      case 2:  NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates;
+               break;
+      case 8:  NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates;
+               break;
+      case 16: LLVM_FALLTHROUGH;
+      default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates;
+               break;
+      }
+    }
+
+    int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
+    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+    if (WaitStatesNeeded == MaxWaitStates)
+      return WaitStatesNeeded; // Early exit.
+
+    auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) {
+      if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
+        return false;
+      unsigned DstReg = MI->getOperand(0).getReg();
+      return TRI.regsOverlap(Reg, DstReg);
+    };
+
+    const int AccVGPRWriteMFMAReadSrcCWaitStates = 1;
+    const int AccVGPRWriteMFMAReadSrcABWaitStates = 3;
+    const int AccVGPRWriteAccVgprReadWaitStates = 3;
+    NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates;
+    if (OpNo == SrcCIdx)
+      NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates;
+    else if (Opc == AMDGPU::V_ACCVGPR_READ_B32)
+      NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates;
+
+    WaitStatesNeededForUse = NeedWaitStates -
+      getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates);
+    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+    if (WaitStatesNeeded == MaxWaitStates)
+      return WaitStatesNeeded; // Early exit.
+  }
+
+  if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
+    const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0;
+    const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5;
+    const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13;
+    const int MaxWaitStates = 13;
+    unsigned DstReg = MI->getOperand(0).getReg();
+    unsigned HazardDefLatency = 0;
+
+    auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, this]
+                         (MachineInstr *MI) {
+      if (!IsMFMAFn(MI))
+        return false;
+      unsigned Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg();
+      HazardDefLatency = std::max(HazardDefLatency,
+                                  TSchedModel.computeInstrLatency(MI));
+      return TRI.regsOverlap(Reg, DstReg);
+    };
+
+    int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates);
+    int NeedWaitStates;
+    switch (HazardDefLatency) {
+    case 2:  NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates;
+             break;
+    case 8:  NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates;
+             break;
+    case 16: LLVM_FALLTHROUGH;
+    default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates;
+             break;
+    }
+
+    int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince;
+    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+  }
+
+  return WaitStatesNeeded;
+}
+
+int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
+  if (!ST.hasMAIInsts())
+    return 0;
+
+  int WaitStatesNeeded = 0;
+
+  auto IsAccVgprReadFn = [] (MachineInstr *MI) {
+    return MI->getOpcode() == AMDGPU::V_ACCVGPR_READ_B32;
+  };
+
+  for (const MachineOperand &Op : MI->explicit_uses()) {
+    if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg()))
+      continue;
+
+    unsigned Reg = Op.getReg();
+
+    const int AccVgprReadLdStWaitStates = 2;
+    const int VALUWriteAccVgprReadLdStDepVALUWaitStates = 1;
+    const int MaxWaitStates = 2;
+
+    int WaitStatesNeededForUse = AccVgprReadLdStWaitStates -
+      getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates);
+    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+    if (WaitStatesNeeded == MaxWaitStates)
+      return WaitStatesNeeded; // Early exit.
+
+    auto IsVALUAccVgprReadCheckFn = [Reg, this] (MachineInstr *MI) {
+      if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32)
+        return false;
+      auto IsVALUFn = [] (MachineInstr *MI) {
+        return SIInstrInfo::isVALU(*MI) && !SIInstrInfo::isMAI(*MI);
+      };
+      return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) <
+             std::numeric_limits<int>::max();
+    };
+
+    WaitStatesNeededForUse = VALUWriteAccVgprReadLdStDepVALUWaitStates -
+      getWaitStatesSince(IsVALUAccVgprReadCheckFn, MaxWaitStates);
+    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+  }
+
+  return WaitStatesNeeded;
 }
diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.h b/lib/Target/AMDGPU/GCNHazardRecognizer.h
index ca17e7cb6018..6aa2e70dfbfb 100644
--- a/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -1,9 +1,8 @@
 //===-- GCNHazardRecognizers.h - GCN Hazard Recognizers ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,6 +16,7 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/TargetSchedule.h"
 #include <list>
 
 namespace llvm {
@@ -31,6 +31,13 @@ class SIRegisterInfo;
 class GCNSubtarget;
 
 class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
+public:
+  typedef function_ref<bool(MachineInstr *)> IsHazardFn;
+
+private:
+  // Distinguish if we are called from scheduler or hazard recognizer
+  bool IsHazardRecognizerMode;
+
   // This variable stores the instruction that has been emitted this cycle. It
   // will be added to EmittedInstrs, when AdvanceCycle() or RecedeCycle() is
   // called.
@@ -40,6 +47,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
   const GCNSubtarget &ST;
   const SIInstrInfo &TII;
   const SIRegisterInfo &TRI;
+  TargetSchedModel TSchedModel;
 
   /// RegUnits of uses in the current soft memory clause.
   BitVector ClauseUses;
@@ -54,11 +62,13 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
 
   void addClauseInst(const MachineInstr &MI);
 
-  int getWaitStatesSince(function_ref<bool(MachineInstr *)> IsHazard);
-  int getWaitStatesSinceDef(unsigned Reg,
-                            function_ref<bool(MachineInstr *)> IsHazardDef =
-                                [](MachineInstr *) { return true; });
-  int getWaitStatesSinceSetReg(function_ref<bool(MachineInstr *)> IsHazard);
+  // Advance over a MachineInstr bundle. Look for hazards in the bundled
+  // instructions.
+  void processBundle();
+
+  int getWaitStatesSince(IsHazardFn IsHazard, int Limit);
+  int getWaitStatesSinceDef(unsigned Reg, IsHazardFn IsHazardDef, int Limit);
+  int getWaitStatesSinceSetReg(IsHazardFn IsHazard, int Limit);
 
   int checkSoftClauseHazards(MachineInstr *SMEM);
   int checkSMRDHazards(MachineInstr *SMRD);
@@ -75,6 +85,18 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
   int checkInlineAsmHazards(MachineInstr *IA);
   int checkAnyInstHazards(MachineInstr *MI);
   int checkReadM0Hazards(MachineInstr *SMovRel);
+  int checkNSAtoVMEMHazard(MachineInstr *MI);
+  int checkFPAtomicToDenormModeHazard(MachineInstr *MI);
+  void fixHazards(MachineInstr *MI);
+  bool fixVcmpxPermlaneHazards(MachineInstr *MI);
+  bool fixVMEMtoScalarWriteHazards(MachineInstr *MI);
+  bool fixSMEMtoVectorWriteHazards(MachineInstr *MI);
+  bool fixVcmpxExecWARHazard(MachineInstr *MI);
+  bool fixLdsBranchVmemWARHazard(MachineInstr *MI);
+
+  int checkMAIHazards(MachineInstr *MI);
+  int checkMAILdStHazards(MachineInstr *MI);
+
 public:
   GCNHazardRecognizer(const MachineFunction &MF);
   // We can only issue one instruction per cycle.
@@ -85,6 +107,7 @@ public:
   void EmitNoop() override;
   unsigned PreEmitNoops(SUnit *SU) override;
   unsigned PreEmitNoops(MachineInstr *) override;
+  unsigned PreEmitNoopsCommon(MachineInstr *);
   void AdvanceCycle() override;
   void RecedeCycle() override;
 };
diff --git a/lib/Target/AMDGPU/GCNILPSched.cpp b/lib/Target/AMDGPU/GCNILPSched.cpp
index d62dc8d86781..1eb617640c32 100644
--- a/lib/Target/AMDGPU/GCNILPSched.cpp
+++ b/lib/Target/AMDGPU/GCNILPSched.cpp
@@ -1,9 +1,8 @@
 //===---------------------------- GCNILPSched.cpp - -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 8e4cc391dc21..3525174223bd 100644
--- a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -1,9 +1,8 @@
 //===- GCNIterativeScheduler.cpp ------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AMDGPU/GCNIterativeScheduler.h b/lib/Target/AMDGPU/GCNIterativeScheduler.h
index 14ef5147f32a..e6f83914af5b 100644
--- a/lib/Target/AMDGPU/GCNIterativeScheduler.h
+++ b/lib/Target/AMDGPU/GCNIterativeScheduler.h
@@ -1,9 +1,8 @@
 //===- GCNIterativeScheduler.h - GCN Scheduler ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
index ec6bcae33555..c469cf290e26 100644
--- a/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
+++ b/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
@@ -1,9 +1,8 @@
 //===- GCNMinRegStrategy.cpp ----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AMDGPU/GCNNSAReassign.cpp b/lib/Target/AMDGPU/GCNNSAReassign.cpp
new file mode 100644
index 000000000000..51c4c99cfb18
--- /dev/null
+++ b/lib/Target/AMDGPU/GCNNSAReassign.cpp
@@ -0,0 +1,343 @@
+//===-- GCNNSAReassign.cpp - Reassign registers in NSA unstructions -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Try to reassign registers on GFX10+ from non-sequential to sequential
+/// in NSA image instructions. Later SIShrinkInstructions pass will relace NSA
+/// with sequential versions where possible.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-nsa-reassign"
+
+STATISTIC(NumNSAInstructions,
+          "Number of NSA instructions with non-sequential address found");
+STATISTIC(NumNSAConverted,
+          "Number of NSA instructions changed to sequential");
+
+namespace {
+
+class GCNNSAReassign : public MachineFunctionPass {
+public:
+  static char ID;
+
+  GCNNSAReassign() : MachineFunctionPass(ID) {
+    initializeGCNNSAReassignPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override { return "GCN NSA Reassign"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<LiveIntervals>();
+    AU.addRequired<VirtRegMap>();
+    AU.addRequired<LiveRegMatrix>();
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+private:
+  typedef enum {
+    NOT_NSA,        // Not an NSA instruction
+    FIXED,          // NSA which we cannot modify
+    NON_CONTIGUOUS, // NSA with non-sequential address which we can try
+                    // to optimize.
+    CONTIGUOUS      // NSA with all sequential address registers
+  } NSA_Status;
+
+  const GCNSubtarget *ST;
+
+  const MachineRegisterInfo *MRI;
+
+  const SIRegisterInfo *TRI;
+
+  VirtRegMap *VRM;
+
+  LiveRegMatrix *LRM;
+
+  LiveIntervals *LIS;
+
+  unsigned MaxNumVGPRs;
+
+  const MCPhysReg *CSRegs;
+
+  NSA_Status CheckNSA(const MachineInstr &MI, bool Fast = false) const;
+
+  bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
+                          unsigned StartReg) const;
+
+  bool canAssign(unsigned StartReg, unsigned NumRegs) const;
+
+  bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const;
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
+INITIALIZE_PASS_END(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",
+                    false, false)
+
+
+char GCNNSAReassign::ID = 0;
+
+char &llvm::GCNNSAReassignID = GCNNSAReassign::ID;
+
+bool
+GCNNSAReassign::tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
+                                   unsigned StartReg) const {
+  unsigned NumRegs = Intervals.size();
+
+  for (unsigned N = 0; N < NumRegs; ++N)
+    if (VRM->hasPhys(Intervals[N]->reg))
+      LRM->unassign(*Intervals[N]);
+
+  for (unsigned N = 0; N < NumRegs; ++N)
+    if (LRM->checkInterference(*Intervals[N], StartReg + N))
+      return false;
+
+  for (unsigned N = 0; N < NumRegs; ++N)
+    LRM->assign(*Intervals[N], StartReg + N);
+
+  return true;
+}
+
+bool GCNNSAReassign::canAssign(unsigned StartReg, unsigned NumRegs) const {
+  for (unsigned N = 0; N < NumRegs; ++N) {
+    unsigned Reg = StartReg + N;
+    if (!MRI->isAllocatable(Reg))
+      return false;
+
+    for (unsigned I = 0; CSRegs[I]; ++I)
+      if (TRI->isSubRegisterEq(Reg, CSRegs[I]) &&
+          !LRM->isPhysRegUsed(CSRegs[I]))
+      return false;
+  }
+
+  return true;
+}
+
+bool
+GCNNSAReassign::scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const {
+  unsigned NumRegs = Intervals.size();
+
+  if (NumRegs > MaxNumVGPRs)
+    return false;
+  unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0;
+
+  for (unsigned Reg = AMDGPU::VGPR0; Reg <= MaxReg; ++Reg) {
+    if (!canAssign(Reg, NumRegs))
+      continue;
+
+    if (tryAssignRegisters(Intervals, Reg))
+      return true;
+  }
+
+  return false;
+}
+
+GCNNSAReassign::NSA_Status
+GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const {
+  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
+  if (!Info || Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
+    return NSA_Status::NOT_NSA;
+
+  int VAddr0Idx =
+    AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
+
+  unsigned VgprBase = 0;
+  bool NSA = false;
+  for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
+    const MachineOperand &Op = MI.getOperand(VAddr0Idx + I);
+    unsigned Reg = Op.getReg();
+    if (TargetRegisterInfo::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg))
+      return NSA_Status::FIXED;
+
+    unsigned PhysReg = VRM->getPhys(Reg);
+
+    if (!Fast) {
+      if (!PhysReg)
+        return NSA_Status::FIXED;
+
+      // Bail if address is not a VGPR32. That should be possible to extend the
+      // optimization to work with subregs of a wider register tuples, but the
+      // logic to find free registers will be much more complicated with much
+      // less chances for success. That seems reasonable to assume that in most
+      // cases a tuple is used because a vector variable contains different
+      // parts of an address and it is either already consequitive or cannot
+      // be reassigned if not. If needed it is better to rely on register
+      // coalescer to process such address tuples.
+      if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass || Op.getSubReg())
+        return NSA_Status::FIXED;
+
+      const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
+
+      if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg)
+        return NSA_Status::FIXED;
+
+      for (auto U : MRI->use_nodbg_operands(Reg)) {
+        if (U.isImplicit())
+          return NSA_Status::FIXED;
+        const MachineInstr *UseInst = U.getParent();
+        if (UseInst->isCopy() && UseInst->getOperand(0).getReg() == PhysReg)
+          return NSA_Status::FIXED;
+      }
+
+      if (!LIS->hasInterval(Reg))
+        return NSA_Status::FIXED;
+    }
+
+    if (I == 0)
+      VgprBase = PhysReg;
+    else if (VgprBase + I != PhysReg)
+      NSA = true;
+  }
+
+  return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS;
+}
+
+bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
+  ST = &MF.getSubtarget<GCNSubtarget>();
+  if (ST->getGeneration() < GCNSubtarget::GFX10)
+    return false;
+
+  MRI = &MF.getRegInfo();
+  TRI = ST->getRegisterInfo();
+  VRM = &getAnalysis<VirtRegMap>();
+  LRM = &getAnalysis<LiveRegMatrix>();
+  LIS = &getAnalysis<LiveIntervals>();
+
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  MaxNumVGPRs = ST->getMaxNumVGPRs(MF);
+  MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(MFI->getOccupancy()), MaxNumVGPRs);
+  CSRegs = MRI->getCalleeSavedRegs();
+
+  using Candidate = std::pair<const MachineInstr*, bool>;
+  SmallVector<Candidate, 32> Candidates;
+  for (const MachineBasicBlock &MBB : MF) {
+    for (const MachineInstr &MI : MBB) {
+      switch (CheckNSA(MI)) {
+      default:
+        continue;
+      case NSA_Status::CONTIGUOUS:
+        Candidates.push_back(std::make_pair(&MI, true));
+        break;
+      case NSA_Status::NON_CONTIGUOUS:
+        Candidates.push_back(std::make_pair(&MI, false));
+        ++NumNSAInstructions;
+        break;
+      }
+    }
+  }
+
+  bool Changed = false;
+  for (auto &C : Candidates) {
+    if (C.second)
+      continue;
+
+    const MachineInstr *MI = C.first;
+    if (CheckNSA(*MI, true) == NSA_Status::CONTIGUOUS) {
+      // Already happen to be fixed.
+      C.second = true;
+      ++NumNSAConverted;
+      continue;
+    }
+
+    const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI->getOpcode());
+    int VAddr0Idx =
+      AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0);
+
+    SmallVector<LiveInterval *, 16> Intervals;
+    SmallVector<unsigned, 16> OrigRegs;
+    SlotIndex MinInd, MaxInd;
+    for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
+      const MachineOperand &Op = MI->getOperand(VAddr0Idx + I);
+      unsigned Reg = Op.getReg();
+      LiveInterval *LI = &LIS->getInterval(Reg);
+      if (llvm::find(Intervals, LI) != Intervals.end()) {
+        // Same register used, unable to make sequential
+        Intervals.clear();
+        break;
+      }
+      Intervals.push_back(LI);
+      OrigRegs.push_back(VRM->getPhys(Reg));
+      MinInd = I ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
+      MaxInd = I ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
+    }
+
+    if (Intervals.empty())
+      continue;
+
+    LLVM_DEBUG(dbgs() << "Attempting to reassign NSA: " << *MI
+                      << "\tOriginal allocation:\t";
+               for(auto *LI : Intervals)
+                 dbgs() << " " << llvm::printReg((VRM->getPhys(LI->reg)), TRI);
+               dbgs() << '\n');
+
+    bool Success = scavengeRegs(Intervals);
+    if (!Success) {
+      LLVM_DEBUG(dbgs() << "\tCannot reallocate.\n");
+      if (VRM->hasPhys(Intervals.back()->reg)) // Did not change allocation.
+        continue;
+    } else {
+      // Check we did not make it worse for other instructions.
+      auto I = std::lower_bound(Candidates.begin(), &C, MinInd,
+                                [this](const Candidate &C, SlotIndex I) {
+                                  return LIS->getInstructionIndex(*C.first) < I;
+                                });
+      for (auto E = Candidates.end(); Success && I != E &&
+              LIS->getInstructionIndex(*I->first) < MaxInd; ++I) {
+        if (I->second && CheckNSA(*I->first, true) < NSA_Status::CONTIGUOUS) {
+          Success = false;
+          LLVM_DEBUG(dbgs() << "\tNSA conversion conflict with " << *I->first);
+        }
+      }
+    }
+
+    if (!Success) {
+      for (unsigned I = 0; I < Info->VAddrDwords; ++I)
+        if (VRM->hasPhys(Intervals[I]->reg))
+          LRM->unassign(*Intervals[I]);
+
+      for (unsigned I = 0; I < Info->VAddrDwords; ++I)
+        LRM->assign(*Intervals[I], OrigRegs[I]);
+
+      continue;
+    }
+
+    C.second = true;
+    ++NumNSAConverted;
+    LLVM_DEBUG(dbgs() << "\tNew allocation:\t\t ["
+                 << llvm::printReg((VRM->getPhys(Intervals.front()->reg)), TRI)
+                 << " : "
+                 << llvm::printReg((VRM->getPhys(Intervals.back()->reg)), TRI)
+                 << "]\n");
+    Changed = true;
+  }
+
+  return Changed;
+}
diff --git a/lib/Target/AMDGPU/GCNProcessors.td b/lib/Target/AMDGPU/GCNProcessors.td
index b8142a4e4ff8..b926041afb2f 100644
--- a/lib/Target/AMDGPU/GCNProcessors.td
+++ b/lib/Target/AMDGPU/GCNProcessors.td
@@ -1,163 +1,185 @@
 //===-- GCNProcessors.td - GCN Processor definitions ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 // The code produced for "generic" is only useful for tests and cannot
 // reasonably be expected to execute on any particular target.
 def : ProcessorModel<"generic", NoSchedModel,
-  [FeatureGCN, FeatureWavefrontSize64]
+  [FeatureWavefrontSize64]
 >;
 
-//===----------------------------------------------------------------------===//
+def : ProcessorModel<"generic-hsa", NoSchedModel,
+  [FeatureWavefrontSize64, FeatureFlatAddressSpace]
+>;
+
+//===------------------------------------------------------------===//
 // GCN GFX6 (Southern Islands (SI)).
-//===----------------------------------------------------------------------===//
+//===------------------------------------------------------------===//
 
 def : ProcessorModel<"gfx600", SIFullSpeedModel,
-  [FeatureISAVersion6_0_0]
+  FeatureISAVersion6_0_0.Features
 >;
 
 def : ProcessorModel<"tahiti", SIFullSpeedModel,
-  [FeatureISAVersion6_0_0]
+  FeatureISAVersion6_0_0.Features
 >;
 
 def : ProcessorModel<"gfx601", SIQuarterSpeedModel,
-  [FeatureISAVersion6_0_1]
+  FeatureISAVersion6_0_1.Features
 >;
 
 def : ProcessorModel<"hainan", SIQuarterSpeedModel,
-  [FeatureISAVersion6_0_1]
+  FeatureISAVersion6_0_1.Features
 >;
 
 def : ProcessorModel<"oland", SIQuarterSpeedModel,
-  [FeatureISAVersion6_0_1]
+  FeatureISAVersion6_0_1.Features
 >;
 
 def : ProcessorModel<"pitcairn", SIQuarterSpeedModel,
-  [FeatureISAVersion6_0_1]
+  FeatureISAVersion6_0_1.Features
 >;
 
 def : ProcessorModel<"verde", SIQuarterSpeedModel,
-  [FeatureISAVersion6_0_1]
+  FeatureISAVersion6_0_1.Features
 >;
 
-//===----------------------------------------------------------------------===//
+//===------------------------------------------------------------===//
 // GCN GFX7 (Sea Islands (CI)).
-//===----------------------------------------------------------------------===//
+//===------------------------------------------------------------===//
 
 def : ProcessorModel<"gfx700", SIQuarterSpeedModel,
-  [FeatureISAVersion7_0_0]
+  FeatureISAVersion7_0_0.Features
 >;
 
 def : ProcessorModel<"kaveri", SIQuarterSpeedModel,
-  [FeatureISAVersion7_0_0]
+  FeatureISAVersion7_0_0.Features
 >;
 
 def : ProcessorModel<"gfx701", SIFullSpeedModel,
-  [FeatureISAVersion7_0_1]
+  FeatureISAVersion7_0_1.Features
 >;
 
 def : ProcessorModel<"hawaii", SIFullSpeedModel,
-  [FeatureISAVersion7_0_1]
+  FeatureISAVersion7_0_1.Features
 >;
 
 def : ProcessorModel<"gfx702", SIQuarterSpeedModel,
-  [FeatureISAVersion7_0_2]
+  FeatureISAVersion7_0_2.Features
 >;
 
 def : ProcessorModel<"gfx703", SIQuarterSpeedModel,
-  [FeatureISAVersion7_0_3]
+  FeatureISAVersion7_0_3.Features
 >;
 
 def : ProcessorModel<"kabini", SIQuarterSpeedModel,
-  [FeatureISAVersion7_0_3]
+  FeatureISAVersion7_0_3.Features
 >;
 
 def : ProcessorModel<"mullins", SIQuarterSpeedModel,
-  [FeatureISAVersion7_0_3]
+  FeatureISAVersion7_0_3.Features
 >;
 
 def : ProcessorModel<"gfx704", SIQuarterSpeedModel,
-  [FeatureISAVersion7_0_4]
+  FeatureISAVersion7_0_4.Features
 >;
 
 def : ProcessorModel<"bonaire", SIQuarterSpeedModel,
-  [FeatureISAVersion7_0_4]
+  FeatureISAVersion7_0_4.Features
 >;
 
-//===----------------------------------------------------------------------===//
+//===------------------------------------------------------------===//
 // GCN GFX8 (Volcanic Islands (VI)).
-//===----------------------------------------------------------------------===//
+//===------------------------------------------------------------===//
 
 def : ProcessorModel<"gfx801", SIQuarterSpeedModel,
-  [FeatureISAVersion8_0_1]
+  FeatureISAVersion8_0_1.Features
 >;
 
 def : ProcessorModel<"carrizo", SIQuarterSpeedModel,
-  [FeatureISAVersion8_0_1]
+  FeatureISAVersion8_0_1.Features
 >;
 
 def : ProcessorModel<"gfx802", SIQuarterSpeedModel,
-  [FeatureISAVersion8_0_2]
+  FeatureISAVersion8_0_2.Features
 >;
 
 def : ProcessorModel<"iceland", SIQuarterSpeedModel,
-  [FeatureISAVersion8_0_2]
+  FeatureISAVersion8_0_2.Features
 >;
 
 def : ProcessorModel<"tonga", SIQuarterSpeedModel,
-  [FeatureISAVersion8_0_2]
+  FeatureISAVersion8_0_2.Features
 >;
 
 def : ProcessorModel<"gfx803", SIQuarterSpeedModel,
-  [FeatureISAVersion8_0_3]
+  FeatureISAVersion8_0_3.Features
 >;
 
 def : ProcessorModel<"fiji", SIQuarterSpeedModel,
-  [FeatureISAVersion8_0_3]
+  FeatureISAVersion8_0_3.Features
 >;
 
 def : ProcessorModel<"polaris10", SIQuarterSpeedModel,
-  [FeatureISAVersion8_0_3]
+  FeatureISAVersion8_0_3.Features
 >;
 
 def : ProcessorModel<"polaris11", SIQuarterSpeedModel,
-  [FeatureISAVersion8_0_3]
+  FeatureISAVersion8_0_3.Features
 >;
 
 def : ProcessorModel<"gfx810", SIQuarterSpeedModel,
-  [FeatureISAVersion8_1_0]
+  FeatureISAVersion8_1_0.Features
 >;
 
 def : ProcessorModel<"stoney", SIQuarterSpeedModel,
-  [FeatureISAVersion8_1_0]
+  FeatureISAVersion8_1_0.Features
 >;
 
-//===----------------------------------------------------------------------===//
+//===------------------------------------------------------------===//
 // GCN GFX9.
-//===----------------------------------------------------------------------===//
+//===------------------------------------------------------------===//
 
 def : ProcessorModel<"gfx900", SIQuarterSpeedModel,
-  [FeatureISAVersion9_0_0]
+  FeatureISAVersion9_0_0.Features
 >;
 
 def : ProcessorModel<"gfx902", SIQuarterSpeedModel,
-  [FeatureISAVersion9_0_2]
+  FeatureISAVersion9_0_2.Features
 >;
 
 def : ProcessorModel<"gfx904", SIQuarterSpeedModel,
-  [FeatureISAVersion9_0_4]
+  FeatureISAVersion9_0_4.Features
 >;
 
 def : ProcessorModel<"gfx906", SIQuarterSpeedModel,
-  [FeatureISAVersion9_0_6]
+  FeatureISAVersion9_0_6.Features
+>;
+
+def : ProcessorModel<"gfx908", SIQuarterSpeedModel,
+  FeatureISAVersion9_0_8.Features
 >;
 
 def : ProcessorModel<"gfx909", SIQuarterSpeedModel,
-  [FeatureISAVersion9_0_9]
+  FeatureISAVersion9_0_9.Features
+>;
+
+//===----------------------------------------------------------------------===//
+// GCN GFX10.
+//===----------------------------------------------------------------------===//
+
+def : ProcessorModel<"gfx1010", GFX10SpeedModel,
+  FeatureISAVersion10_1_0.Features
 >;
 
+def : ProcessorModel<"gfx1011", GFX10SpeedModel,
+  FeatureISAVersion10_1_1.Features
+>;
+
+def : ProcessorModel<"gfx1012", GFX10SpeedModel,
+  FeatureISAVersion10_1_2.Features
+>;
diff --git a/lib/Target/AMDGPU/GCNRegBankReassign.cpp b/lib/Target/AMDGPU/GCNRegBankReassign.cpp
new file mode 100644
index 000000000000..f0d47eaa4ed1
--- /dev/null
+++ b/lib/Target/AMDGPU/GCNRegBankReassign.cpp
@@ -0,0 +1,800 @@
+//===-- GCNRegBankReassign.cpp - Reassign registers after regalloc --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Try to reassign registers on GFX10+ to reduce register bank
+/// conflicts.
+///
+/// On GFX10 registers are organized in banks. VGPRs have 4 banks assigned in
+/// a round-robin fashion: v0, v4, v8... belong to bank 0. v1, v5, v9... to
+/// bank 1, etc. SGPRs have 8 banks and allocated in pairs, so that s0:s1,
+/// s16:s17, s32:s33 are at bank 0. s2:s3, s18:s19, s34:s35 are at bank 1 etc.
+///
+/// The shader can read one dword from each of these banks once per cycle.
+/// If an instruction has to read more register operands from the same bank
+/// an additional cycle is needed. HW attempts to pre-load registers through
+/// input operand gathering, but a stall cycle may occur if that fails. For
+/// example V_FMA_F32 V111 = V0 + V4 * V8 will need 3 cycles to read operands,
+/// potentially incuring 2 stall cycles.
+///
+/// The pass tries to reassign registers to reduce bank conflicts.
+///
+/// In this pass bank numbers 0-3 are VGPR banks and 4-11 are SGPR banks, so
+/// that 4 has to be subtracted from an SGPR bank number to get the real value.
+/// This also corresponds to bit numbers in bank masks used in the pass.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned> VerifyStallCycles("amdgpu-verify-regbanks-reassign",
+  cl::desc("Verify stall cycles in the regbanks reassign pass"),
+  cl::value_desc("0|1|2"),
+  cl::init(0), cl::Hidden);
+
+#define DEBUG_TYPE "amdgpu-regbanks-reassign"
+
+#define NUM_VGPR_BANKS 4
+#define NUM_SGPR_BANKS 8
+#define NUM_BANKS (NUM_VGPR_BANKS + NUM_SGPR_BANKS)
+#define SGPR_BANK_OFFSET NUM_VGPR_BANKS
+#define VGPR_BANK_MASK 0xf
+#define SGPR_BANK_MASK 0xff0
+#define SGPR_BANK_SHIFTED_MASK (SGPR_BANK_MASK >> SGPR_BANK_OFFSET)
+
+STATISTIC(NumStallsDetected,
+          "Number of operand read stalls detected");
+STATISTIC(NumStallsRecovered,
+          "Number of operand read stalls recovered");
+
+namespace {
+
+class GCNRegBankReassign : public MachineFunctionPass {
+
+  class OperandMask {
+  public:
+    OperandMask(unsigned r, unsigned s, unsigned m)
+      : Reg(r), SubReg(s), Mask(m) {}
+    unsigned Reg;
+    unsigned SubReg;
+    unsigned Mask;
+  };
+
+  class Candidate {
+  public:
+    Candidate(MachineInstr *mi, unsigned reg, unsigned freebanks,
+              unsigned weight)
+      : MI(mi), Reg(reg), FreeBanks(freebanks), Weight(weight) {}
+
+    bool operator< (const Candidate& RHS) const { return Weight < RHS.Weight; }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+    void dump(const GCNRegBankReassign *P) const {
+      MI->dump();
+      dbgs() << P->printReg(Reg) << " to banks ";
+      dumpFreeBanks(FreeBanks);
+      dbgs() << " weight " << Weight << '\n';
+    }
+#endif
+
+    MachineInstr *MI;
+    unsigned Reg;
+    unsigned FreeBanks;
+    unsigned Weight;
+  };
+
+  class CandidateList : public std::list<Candidate> {
+  public:
+    // Speedup subsequent sort.
+    void push(const Candidate&& C) {
+      if (C.Weight) push_back(C);
+      else push_front(C);
+    }
+  };
+
+public:
+  static char ID;
+
+public:
+  GCNRegBankReassign() : MachineFunctionPass(ID) {
+    initializeGCNRegBankReassignPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override { return "GCN RegBank Reassign"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<MachineLoopInfo>();
+    AU.addRequired<LiveIntervals>();
+    AU.addRequired<VirtRegMap>();
+    AU.addRequired<LiveRegMatrix>();
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+private:
+  const GCNSubtarget *ST;
+
+  const MachineRegisterInfo *MRI;
+
+  const SIRegisterInfo *TRI;
+
+  MachineLoopInfo *MLI;
+
+  VirtRegMap *VRM;
+
+  LiveRegMatrix *LRM;
+
+  LiveIntervals *LIS;
+
+  unsigned MaxNumVGPRs;
+
+  unsigned MaxNumSGPRs;
+
+  BitVector RegsUsed;
+
+  SmallVector<OperandMask, 8> OperandMasks;
+
+  CandidateList Candidates;
+
+  const MCPhysReg *CSRegs;
+
+  // Returns bank for a phys reg.
+  unsigned getPhysRegBank(unsigned Reg) const;
+
+  // Return a bit set for each register bank used. 4 banks for VGPRs and
+  // 8 banks for SGPRs.
+  // Registers already processed and recorded in RegsUsed are excluded.
+  // If Bank is not -1 assume Reg:SubReg to belong to that Bank.
+  unsigned getRegBankMask(unsigned Reg, unsigned SubReg, int Bank);
+
+  // Return number of stalls in the instructions.
+  // UsedBanks has bits set for the banks used by all operands.
+  // If Reg and Bank provided substitute the Reg with the Bank.
+  unsigned analyzeInst(const MachineInstr& MI, unsigned& UsedBanks,
+                       unsigned Reg = AMDGPU::NoRegister, int Bank = -1);
+
+  // Return true if register is regular VGPR or SGPR or their tuples.
+  // Returns false for special registers like m0, vcc etc.
+  bool isReassignable(unsigned Reg) const;
+
+  // Check if registers' defs are old and may be pre-loaded.
+  // Returns 0 if both registers are old enough, 1 or 2 if one or both
+  // registers will not likely be pre-loaded.
+  unsigned getOperandGatherWeight(const MachineInstr& MI,
+                                  unsigned Reg1,
+                                  unsigned Reg2,
+                                  unsigned StallCycles) const;
+
+
+  // Find all bank bits in UsedBanks where Mask can be relocated to.
+  unsigned getFreeBanks(unsigned Mask, unsigned UsedBanks) const;
+
+  // Find all bank bits in UsedBanks where Mask can be relocated to.
+  // Bank is relative to the register and not its subregister component.
+  // Returns 0 is a register is not reassignable.
+  unsigned getFreeBanks(unsigned Reg, unsigned SubReg, unsigned Mask,
+                        unsigned UsedBanks) const;
+
+  // Add cadidate instruction to the work list.
+  void collectCandidates(MachineInstr& MI, unsigned UsedBanks,
+                         unsigned StallCycles);
+
+  // Collect cadidate instructions across function. Returns a number stall
+  // cycles detected. Only counts stalls if Collect is false.
+  unsigned collectCandidates(MachineFunction &MF, bool Collect = true);
+
+  // Remove all candidates that read specified register.
+  void removeCandidates(unsigned Reg);
+
+  // Compute stalls within the uses of SrcReg replaced by a register from
+  // Bank. If Bank is -1 does not perform substitution. If Collect is set
+  // candidates are collected and added to work list.
+  unsigned computeStallCycles(unsigned SrcReg,
+                              unsigned Reg = AMDGPU::NoRegister,
+                              int Bank = -1, bool Collect = false);
+
+  // Search for a register in Bank unused within LI.
+  // Returns phys reg or NoRegister.
+  unsigned scavengeReg(LiveInterval& LI, unsigned Bank) const;
+
+  // Try to reassign candidate. Returns number or stall cycles saved.
+  unsigned tryReassign(Candidate &C);
+
+  bool verifyCycles(MachineFunction &MF,
+                    unsigned OriginalCycles, unsigned CyclesSaved);
+
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+public:
+  Printable printReg(unsigned Reg, unsigned SubReg = 0) const {
+    return Printable([Reg, SubReg, this](raw_ostream &OS) {
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+        OS << llvm::printReg(Reg, TRI);
+        return;
+      }
+      if (!VRM->isAssignedReg(Reg))
+        OS << "<unassigned> " << llvm::printReg(Reg, TRI);
+      else
+        OS << llvm::printReg(Reg, TRI) << '('
+           << llvm::printReg(VRM->getPhys(Reg), TRI) << ')';
+      if (SubReg)
+        OS << ':' << TRI->getSubRegIndexName(SubReg);
+    });
+  }
+
+  static Printable printBank(unsigned Bank) {
+    return Printable([Bank](raw_ostream &OS) {
+      OS << ((Bank >= SGPR_BANK_OFFSET) ? Bank - SGPR_BANK_OFFSET : Bank);
+    });
+  }
+
+  static void dumpFreeBanks(unsigned FreeBanks) {
+    for (unsigned L = 0; L < NUM_BANKS; ++L)
+      if (FreeBanks & (1 << L))
+        dbgs() << printBank(L) << ' ';
+  }
+#endif
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(GCNRegBankReassign, DEBUG_TYPE, "GCN RegBank Reassign",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
+INITIALIZE_PASS_END(GCNRegBankReassign, DEBUG_TYPE, "GCN RegBank Reassign",
+                    false, false)
+
+
+char GCNRegBankReassign::ID = 0;
+
+char &llvm::GCNRegBankReassignID = GCNRegBankReassign::ID;
+
+unsigned GCNRegBankReassign::getPhysRegBank(unsigned Reg) const {
+  assert (TargetRegisterInfo::isPhysicalRegister(Reg));
+
+  const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+  unsigned Size = TRI->getRegSizeInBits(*RC);
+  if (Size > 32)
+    Reg = TRI->getSubReg(Reg, AMDGPU::sub0);
+
+  if (TRI->hasVGPRs(RC)) {
+    Reg -= AMDGPU::VGPR0;
+    return Reg % NUM_VGPR_BANKS;
+  }
+
+  Reg = TRI->getEncodingValue(Reg) / 2;
+  return Reg % NUM_SGPR_BANKS + SGPR_BANK_OFFSET;
+}
+
+unsigned GCNRegBankReassign::getRegBankMask(unsigned Reg, unsigned SubReg,
+                                            int Bank) {
+  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+    if (!VRM->isAssignedReg(Reg))
+      return 0;
+
+    Reg = VRM->getPhys(Reg);
+    if (!Reg)
+      return 0;
+    if (SubReg)
+      Reg = TRI->getSubReg(Reg, SubReg);
+  }
+
+  const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+  unsigned Size = TRI->getRegSizeInBits(*RC) / 32;
+  if (Size > 1)
+    Reg = TRI->getSubReg(Reg, AMDGPU::sub0);
+
+  if (TRI->hasVGPRs(RC)) {
+    // VGPRs have 4 banks assigned in a round-robin fashion.
+    Reg -= AMDGPU::VGPR0;
+    unsigned Mask = (1 << Size) - 1;
+    unsigned Used = 0;
+    // Bitmask lacks an extract method
+    for (unsigned I = 0; I < Size; ++I)
+      if (RegsUsed.test(Reg + I))
+        Used |= 1 << I;
+    RegsUsed.set(Reg, Reg + Size);
+    Mask &= ~Used;
+    Mask <<= (Bank == -1) ? Reg % NUM_VGPR_BANKS : unsigned(Bank);
+    return (Mask | (Mask >> NUM_VGPR_BANKS)) & VGPR_BANK_MASK;
+  }
+
+  // SGPRs have 8 banks holding 2 consequitive registers each.
+  Reg = TRI->getEncodingValue(Reg) / 2;
+  unsigned StartBit = AMDGPU::VGPR_32RegClass.getNumRegs();
+  if (Reg + StartBit >= RegsUsed.size())
+    return 0;
+
+  if (Size > 1)
+    Size /= 2;
+  unsigned Mask = (1 << Size) - 1;
+  unsigned Used = 0;
+  for (unsigned I = 0; I < Size; ++I)
+    if (RegsUsed.test(StartBit + Reg + I))
+      Used |= 1 << I;
+  RegsUsed.set(StartBit + Reg, StartBit + Reg + Size);
+  Mask &= ~Used;
+  Mask <<= (Bank == -1) ? Reg % NUM_SGPR_BANKS
+                        : unsigned(Bank - SGPR_BANK_OFFSET);
+  Mask = (Mask | (Mask >> NUM_SGPR_BANKS)) & SGPR_BANK_SHIFTED_MASK;
+  // Reserve 4 bank ids for VGPRs.
+  return Mask << SGPR_BANK_OFFSET;
+}
+
+unsigned GCNRegBankReassign::analyzeInst(const MachineInstr& MI,
+                                         unsigned& UsedBanks,
+                                         unsigned Reg,
+                                         int Bank) {
+  unsigned StallCycles = 0;
+  UsedBanks = 0;
+
+  if (MI.isDebugValue())
+    return 0;
+
+  RegsUsed.reset();
+  OperandMasks.clear();
+  for (const auto& Op : MI.explicit_uses()) {
+    // Undef can be assigned to any register, so two vregs can be assigned
+    // the same phys reg within the same instruction.
+    if (!Op.isReg() || Op.isUndef())
+      continue;
+
+    unsigned R = Op.getReg();
+    if (TRI->hasAGPRs(TRI->getRegClassForReg(*MRI, R)))
+      continue;
+
+    unsigned ShiftedBank = Bank;
+
+    if (Bank != -1 && R == Reg && Op.getSubReg()) {
+      unsigned LM = TRI->getSubRegIndexLaneMask(Op.getSubReg()).getAsInteger();
+      if (!(LM & 1) && (Bank < NUM_VGPR_BANKS)) {
+        // If a register spans all banks we cannot shift it to avoid conflict.
+        if (countPopulation(LM) >= NUM_VGPR_BANKS)
+          continue;
+        ShiftedBank = (Bank + countTrailingZeros(LM)) % NUM_VGPR_BANKS;
+      } else if (!(LM & 3) && (Bank >= SGPR_BANK_OFFSET)) {
+        // If a register spans all banks we cannot shift it to avoid conflict.
+        if (countPopulation(LM) / 2 >= NUM_SGPR_BANKS)
+          continue;
+        ShiftedBank = SGPR_BANK_OFFSET + (Bank - SGPR_BANK_OFFSET +
+                                          (countTrailingZeros(LM) >> 1)) %
+                                             NUM_SGPR_BANKS;
+      }
+    }
+
+    unsigned Mask = getRegBankMask(R, Op.getSubReg(),
+                                   (Reg == R) ? ShiftedBank : -1);
+    StallCycles += countPopulation(UsedBanks & Mask);
+    UsedBanks |= Mask;
+    OperandMasks.push_back(OperandMask(Op.getReg(), Op.getSubReg(), Mask));
+  }
+
+  return StallCycles;
+}
+
+unsigned GCNRegBankReassign::getOperandGatherWeight(const MachineInstr& MI,
+                                                    unsigned Reg1,
+                                                    unsigned Reg2,
+                                                    unsigned StallCycles) const
+{
+  unsigned Defs = 0;
+  MachineBasicBlock::const_instr_iterator Def(MI.getIterator());
+  MachineBasicBlock::const_instr_iterator B(MI.getParent()->instr_begin());
+  for (unsigned S = StallCycles; S && Def != B && Defs != 3; --S) {
+    if (MI.isDebugInstr())
+      continue;
+    --Def;
+    if (Def->getOpcode() == TargetOpcode::IMPLICIT_DEF)
+      continue;
+    if (Def->modifiesRegister(Reg1, TRI))
+      Defs |= 1;
+    if (Def->modifiesRegister(Reg2, TRI))
+      Defs |= 2;
+  }
+  return countPopulation(Defs);
+}
+
+bool GCNRegBankReassign::isReassignable(unsigned Reg) const {
+  if (TargetRegisterInfo::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg))
+    return false;
+
+  const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
+
+  unsigned PhysReg = VRM->getPhys(Reg);
+
+  if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg)
+    return false;
+
+  for (auto U : MRI->use_nodbg_operands(Reg)) {
+    if (U.isImplicit())
+      return false;
+    const MachineInstr *UseInst = U.getParent();
+    if (UseInst->isCopy() && UseInst->getOperand(0).getReg() == PhysReg)
+      return false;
+  }
+
+  const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg);
+  if (TRI->hasVGPRs(RC))
+    return true;
+
+  unsigned Size = TRI->getRegSizeInBits(*RC);
+  if (Size > 32)
+    PhysReg = TRI->getSubReg(PhysReg, AMDGPU::sub0);
+
+  return AMDGPU::SGPR_32RegClass.contains(PhysReg);
+}
+
+unsigned GCNRegBankReassign::getFreeBanks(unsigned Mask,
+                                          unsigned UsedBanks) const {
+  unsigned Size = countPopulation(Mask);
+  unsigned FreeBanks = 0;
+  unsigned Bank = findFirstSet(Mask);
+
+  UsedBanks &= ~Mask;
+
+  // Find free VGPR banks
+  if ((Mask & VGPR_BANK_MASK) && (Size < NUM_VGPR_BANKS)) {
+    for (unsigned I = 0; I < NUM_VGPR_BANKS; ++I) {
+      if (Bank == I)
+        continue;
+      unsigned NewMask = ((1 << Size) - 1) << I;
+      NewMask = (NewMask | (NewMask >> NUM_VGPR_BANKS)) & VGPR_BANK_MASK;
+      if (!(UsedBanks & NewMask))
+        FreeBanks |= 1 << I;
+    }
+    return FreeBanks;
+  }
+
+  // Find free SGPR banks
+  // SGPR tuples must be aligned, so step is size in banks it
+  // crosses.
+  Bank -= SGPR_BANK_OFFSET;
+  for (unsigned I = 0; I < NUM_SGPR_BANKS; I += Size) {
+    if (Bank == I)
+      continue;
+    unsigned NewMask = ((1 << Size) - 1) << I;
+    NewMask = (NewMask | (NewMask >> NUM_SGPR_BANKS)) & SGPR_BANK_SHIFTED_MASK;
+    if (!(UsedBanks & (NewMask << SGPR_BANK_OFFSET)))
+      FreeBanks |= (1 << SGPR_BANK_OFFSET) << I;
+  }
+
+  return FreeBanks;
+}
+
+unsigned GCNRegBankReassign::getFreeBanks(unsigned Reg,
+                                          unsigned SubReg,
+                                          unsigned Mask,
+                                          unsigned UsedBanks) const {
+  if (!isReassignable(Reg))
+    return 0;
+
+  unsigned FreeBanks = getFreeBanks(Mask, UsedBanks);
+
+  unsigned LM = TRI->getSubRegIndexLaneMask(SubReg).getAsInteger();
+  if (!(LM & 1) && (Mask & VGPR_BANK_MASK)) {
+    unsigned Shift = countTrailingZeros(LM);
+    if (Shift >= NUM_VGPR_BANKS)
+      return 0;
+    unsigned VB = FreeBanks & VGPR_BANK_MASK;
+    FreeBanks = ((VB >> Shift) | (VB << (NUM_VGPR_BANKS - Shift))) &
+                VGPR_BANK_MASK;
+  } else if (!(LM & 3) && (Mask & SGPR_BANK_MASK)) {
+    unsigned Shift = countTrailingZeros(LM) >> 1;
+    if (Shift >= NUM_SGPR_BANKS)
+      return 0;
+    unsigned SB = FreeBanks >> SGPR_BANK_OFFSET;
+    FreeBanks = ((SB >> Shift) | (SB << (NUM_SGPR_BANKS - Shift))) &
+                SGPR_BANK_SHIFTED_MASK;
+    FreeBanks <<= SGPR_BANK_OFFSET;
+  }
+
+  LLVM_DEBUG(if (FreeBanks) {
+          dbgs() << "Potential reassignments of " << printReg(Reg, SubReg)
+                 << " to banks: "; dumpFreeBanks(FreeBanks);
+          dbgs() << '\n'; });
+
+  return FreeBanks;
+}
+
+void GCNRegBankReassign::collectCandidates(MachineInstr& MI,
+                                           unsigned UsedBanks,
+                                           unsigned StallCycles) {
+  LLVM_DEBUG(MI.dump());
+
+  if (!StallCycles)
+    return;
+
+  LLVM_DEBUG(dbgs() << "Stall cycles = " << StallCycles << '\n');
+
+  for (unsigned I = 0, E = OperandMasks.size(); I + 1 < E; ++I) {
+    for (unsigned J = I + 1; J != E; ++J) {
+      if (!(OperandMasks[I].Mask & OperandMasks[J].Mask))
+        continue;
+
+      unsigned Reg1 = OperandMasks[I].Reg;
+      unsigned Reg2 = OperandMasks[J].Reg;
+      unsigned SubReg1 = OperandMasks[I].SubReg;
+      unsigned SubReg2 = OperandMasks[J].SubReg;
+      unsigned Mask1 = OperandMasks[I].Mask;
+      unsigned Mask2 = OperandMasks[J].Mask;
+      unsigned Size1 = countPopulation(Mask1);
+      unsigned Size2 = countPopulation(Mask2);
+
+      LLVM_DEBUG(dbgs() << "Conflicting operands: " << printReg(Reg1, SubReg1) <<
+                      " and " << printReg(Reg2, SubReg2) << '\n');
+
+      unsigned Weight = getOperandGatherWeight(MI, Reg1, Reg2, StallCycles);
+      Weight += MLI->getLoopDepth(MI.getParent()) * 10;
+
+      LLVM_DEBUG(dbgs() << "Stall weight = " << Weight << '\n');
+
+      unsigned FreeBanks1 = getFreeBanks(Reg1, SubReg1, Mask1, UsedBanks);
+      unsigned FreeBanks2 = getFreeBanks(Reg2, SubReg2, Mask2, UsedBanks);
+      if (FreeBanks1)
+        Candidates.push(Candidate(&MI, Reg1, FreeBanks1, Weight
+                                    + ((Size2 > Size1) ? 1 : 0)));
+      if (FreeBanks2)
+        Candidates.push(Candidate(&MI, Reg2, FreeBanks2, Weight
+                                    + ((Size1 > Size2) ? 1 : 0)));
+    }
+  }
+}
+
+unsigned GCNRegBankReassign::computeStallCycles(unsigned SrcReg,
+                                                unsigned Reg, int Bank,
+                                                bool Collect) {
+  unsigned TotalStallCycles = 0;
+  unsigned UsedBanks = 0;
+  SmallSet<const MachineInstr *, 16> Visited;
+
+  for (auto &MI : MRI->use_nodbg_instructions(SrcReg)) {
+    if (MI.isBundle())
+      continue;
+    if (!Visited.insert(&MI).second)
+      continue;
+    unsigned StallCycles = analyzeInst(MI, UsedBanks, Reg, Bank);
+    TotalStallCycles += StallCycles;
+    if (Collect)
+      collectCandidates(MI, UsedBanks, StallCycles);
+  }
+
+  return TotalStallCycles;
+}
+
+unsigned GCNRegBankReassign::scavengeReg(LiveInterval& LI,
+                                         unsigned Bank) const {
+  const TargetRegisterClass *RC = MRI->getRegClass(LI.reg);
+  unsigned MaxNumRegs = (Bank < NUM_VGPR_BANKS) ? MaxNumVGPRs
+                                                : MaxNumSGPRs;
+  unsigned MaxReg = MaxNumRegs + (Bank < NUM_VGPR_BANKS ? AMDGPU::VGPR0
+                                                        : AMDGPU::SGPR0);
+
+  for (unsigned Reg : RC->getRegisters()) {
+    // Check occupancy limit.
+    if (TRI->isSubRegisterEq(Reg, MaxReg))
+      break;
+
+    if (!MRI->isAllocatable(Reg) || getPhysRegBank(Reg) != Bank)
+      continue;
+
+    for (unsigned I = 0; CSRegs[I]; ++I)
+      if (TRI->isSubRegisterEq(Reg, CSRegs[I]) &&
+          !LRM->isPhysRegUsed(CSRegs[I]))
+        return AMDGPU::NoRegister;
+
+    LLVM_DEBUG(dbgs() << "Trying register " << printReg(Reg) << '\n');
+
+    if (!LRM->checkInterference(LI, Reg))
+      return Reg;
+  }
+
+  return AMDGPU::NoRegister;
+}
+
+unsigned GCNRegBankReassign::tryReassign(Candidate &C) {
+  if (!LIS->hasInterval(C.Reg))
+    return 0;
+
+  LiveInterval &LI = LIS->getInterval(C.Reg);
+  LLVM_DEBUG(dbgs() << "Try reassign " << printReg(C.Reg) << " in "; C.MI->dump();
+             LI.dump());
+
+  // For each candidate bank walk all instructions in the range of live
+  // interval and check if replacing the register with one belonging to
+  // the candidate bank reduces conflicts.
+
+  unsigned OrigStalls = computeStallCycles(C.Reg);
+  LLVM_DEBUG(dbgs() << "--- Stall cycles in range = " << OrigStalls << '\n');
+  if (!OrigStalls)
+    return 0;
+
+  struct BankStall {
+    BankStall(unsigned b, unsigned s) : Bank(b), Stalls(s) {};
+    bool operator< (const BankStall &RHS) const { return Stalls > RHS.Stalls; }
+    unsigned Bank;
+    unsigned Stalls;
+  };
+  SmallVector<BankStall, 8> BankStalls;
+
+  for (int Bank = 0; Bank < NUM_BANKS; ++Bank) {
+    if (C.FreeBanks & (1 << Bank)) {
+      LLVM_DEBUG(dbgs() << "Trying bank " << printBank(Bank) << '\n');
+      unsigned Stalls = computeStallCycles(C.Reg, C.Reg, Bank);
+      if (Stalls < OrigStalls) {
+        LLVM_DEBUG(dbgs() << "With bank " << printBank(Bank) << " -> "
+                     << Stalls << '\n');
+        BankStalls.push_back(BankStall((unsigned)Bank, Stalls));
+      }
+    }
+  }
+  std::sort(BankStalls.begin(), BankStalls.end());
+
+  unsigned OrigReg = VRM->getPhys(C.Reg);
+  LRM->unassign(LI);
+  while (!BankStalls.empty()) {
+    BankStall BS = BankStalls.pop_back_val();
+    unsigned Reg = scavengeReg(LI, BS.Bank);
+    if (Reg == AMDGPU::NoRegister) {
+      LLVM_DEBUG(dbgs() << "No free registers in bank " << printBank(BS.Bank)
+                   << '\n');
+      continue;
+    }
+    LLVM_DEBUG(dbgs() << "Found free register " << printReg(Reg)
+                 << (LRM->isPhysRegUsed(Reg) ? "" : " (new)")
+                 << " in bank " << printBank(BS.Bank) << '\n');
+
+    LRM->assign(LI, Reg);
+
+    LLVM_DEBUG(dbgs() << "--- Cycles saved: " << OrigStalls - BS.Stalls << '\n');
+
+    return OrigStalls - BS.Stalls;
+  }
+  LRM->assign(LI, OrigReg);
+
+  return 0;
+}
+
+unsigned GCNRegBankReassign::collectCandidates(MachineFunction &MF,
+                                               bool Collect) {
+  unsigned TotalStallCycles = 0;
+
+  for (MachineBasicBlock &MBB : MF) {
+
+    LLVM_DEBUG(if (Collect) {
+            if (MBB.getName().empty()) dbgs() << "bb." << MBB.getNumber();
+            else dbgs() << MBB.getName(); dbgs() << ":\n";
+          });
+
+    for (MachineInstr &MI : MBB.instrs()) {
+      if (MI.isBundle())
+          continue; // we analyze the instructions inside the bundle individually
+
+      unsigned UsedBanks = 0;
+      unsigned StallCycles = analyzeInst(MI, UsedBanks);
+
+      if (Collect)
+        collectCandidates(MI, UsedBanks, StallCycles);
+
+      TotalStallCycles += StallCycles;
+    }
+
+    LLVM_DEBUG(if (Collect) { dbgs() << '\n'; });
+  }
+
+  return TotalStallCycles;
+}
+
+void GCNRegBankReassign::removeCandidates(unsigned Reg) {
+  Candidates.remove_if([Reg, this](const Candidate& C) {
+    return C.MI->readsRegister(Reg, TRI);
+  });
+}
+
+bool GCNRegBankReassign::verifyCycles(MachineFunction &MF,
+                                      unsigned OriginalCycles,
+                                      unsigned CyclesSaved) {
+  unsigned StallCycles = collectCandidates(MF, false);
+  LLVM_DEBUG(dbgs() << "=== After the pass " << StallCycles
+               << " stall cycles left\n");
+  return StallCycles + CyclesSaved == OriginalCycles;
+}
+
+bool GCNRegBankReassign::runOnMachineFunction(MachineFunction &MF) {
+  ST = &MF.getSubtarget<GCNSubtarget>();
+  if (!ST->hasRegisterBanking() || skipFunction(MF.getFunction()))
+    return false;
+
+  MRI = &MF.getRegInfo();
+  TRI = ST->getRegisterInfo();
+  MLI = &getAnalysis<MachineLoopInfo>();
+  VRM = &getAnalysis<VirtRegMap>();
+  LRM = &getAnalysis<LiveRegMatrix>();
+  LIS = &getAnalysis<LiveIntervals>();
+
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  unsigned Occupancy = MFI->getOccupancy();
+  MaxNumVGPRs = ST->getMaxNumVGPRs(MF);
+  MaxNumSGPRs = ST->getMaxNumSGPRs(MF);
+  MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(Occupancy), MaxNumVGPRs);
+  MaxNumSGPRs = std::min(ST->getMaxNumSGPRs(Occupancy, true), MaxNumSGPRs);
+
+  CSRegs = MRI->getCalleeSavedRegs();
+
+  RegsUsed.resize(AMDGPU::VGPR_32RegClass.getNumRegs() +
+                  TRI->getEncodingValue(AMDGPU::SGPR_NULL) / 2 + 1);
+
+  LLVM_DEBUG(dbgs() << "=== RegBanks reassign analysis on function " << MF.getName()
+               << '\n');
+
+  unsigned StallCycles = collectCandidates(MF);
+  NumStallsDetected += StallCycles;
+
+  LLVM_DEBUG(dbgs() << "=== " << StallCycles << " stall cycles detected in "
+                  "function " << MF.getName() << '\n');
+
+  Candidates.sort();
+
+  LLVM_DEBUG(dbgs() << "\nCandidates:\n\n";
+        for (auto C : Candidates) C.dump(this);
+        dbgs() << "\n\n");
+
+  unsigned CyclesSaved = 0;
+  while (!Candidates.empty()) {
+    Candidate C = Candidates.back();
+    unsigned LocalCyclesSaved = tryReassign(C);
+    CyclesSaved += LocalCyclesSaved;
+
+    if (VerifyStallCycles > 1 && !verifyCycles(MF, StallCycles, CyclesSaved))
+      report_fatal_error("RegBank reassign stall cycles verification failed.");
+
+    Candidates.pop_back();
+    if (LocalCyclesSaved) {
+      removeCandidates(C.Reg);
+      computeStallCycles(C.Reg, AMDGPU::NoRegister, -1, true);
+      Candidates.sort();
+
+      LLVM_DEBUG(dbgs() << "\nCandidates:\n\n";
+            for (auto C : Candidates)
+              C.dump(this);
+            dbgs() << "\n\n");
+    }
+  }
+  NumStallsRecovered += CyclesSaved;
+
+  LLVM_DEBUG(dbgs() << "=== After the pass " << CyclesSaved
+               << " cycles saved in function " << MF.getName() << '\n');
+
+  Candidates.clear();
+
+  if (VerifyStallCycles == 1 && !verifyCycles(MF, StallCycles, CyclesSaved))
+    report_fatal_error("RegBank reassign stall cycles verification failed.");
+
+  RegsUsed.clear();
+
+  return CyclesSaved > 0;
+}
diff --git a/lib/Target/AMDGPU/GCNRegPressure.cpp b/lib/Target/AMDGPU/GCNRegPressure.cpp
index 3d8cacc4f02c..39460fbd8a84 100644
--- a/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -1,9 +1,8 @@
 //===- GCNRegPressure.cpp -------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -64,9 +63,10 @@ void llvm::printLivesAt(SlotIndex SI,
   }
   if (!Num) dbgs() << "  <none>\n";
 }
+#endif
 
-static bool isEqual(const GCNRPTracker::LiveRegSet &S1,
-                    const GCNRPTracker::LiveRegSet &S2) {
+bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1,
+                   const GCNRPTracker::LiveRegSet &S2) {
   if (S1.size() != S2.size())
     return false;
 
@@ -77,7 +77,7 @@ static bool isEqual(const GCNRPTracker::LiveRegSet &S1,
   }
   return true;
 }
-#endif
+
 
 ///////////////////////////////////////////////////////////////////////////////
 // GCNRegPressure
@@ -89,7 +89,9 @@ unsigned GCNRegPressure::getRegKind(unsigned Reg,
   auto STI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
   return STI->isSGPRClass(RC) ?
     (STI->getRegSizeInBits(*RC) == 32 ? SGPR32 : SGPR_TUPLE) :
-    (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE);
+    STI->hasAGPRs(RC) ?
+      (STI->getRegSizeInBits(*RC) == 32 ? AGPR32 : AGPR_TUPLE) :
+      (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE);
 }
 
 void GCNRegPressure::inc(unsigned Reg,
@@ -110,16 +112,18 @@ void GCNRegPressure::inc(unsigned Reg,
   switch (auto Kind = getRegKind(Reg, MRI)) {
   case SGPR32:
   case VGPR32:
+  case AGPR32:
     assert(PrevMask.none() && NewMask == MaxMask);
     Value[Kind] += Sign;
     break;
 
   case SGPR_TUPLE:
   case VGPR_TUPLE:
+  case AGPR_TUPLE:
     assert(NewMask < MaxMask || NewMask == MaxMask);
     assert(PrevMask < NewMask);
 
-    Value[Kind == SGPR_TUPLE ? SGPR32 : VGPR32] +=
+    Value[Kind == SGPR_TUPLE ? SGPR32 : Kind == AGPR_TUPLE ? AGPR32 : VGPR32] +=
       Sign * (~PrevMask & NewMask).getNumLanes();
 
     if (PrevMask.none()) {
diff --git a/lib/Target/AMDGPU/GCNRegPressure.h b/lib/Target/AMDGPU/GCNRegPressure.h
index 357d3b7b2334..e4894418b943 100644
--- a/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/lib/Target/AMDGPU/GCNRegPressure.h
@@ -1,9 +1,8 @@
 //===- GCNRegPressure.h -----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -32,6 +31,8 @@ struct GCNRegPressure {
     SGPR_TUPLE,
     VGPR32,
     VGPR_TUPLE,
+    AGPR32,
+    AGPR_TUPLE,
     TOTAL_KINDS
   };
 
@@ -44,9 +45,10 @@ struct GCNRegPressure {
   void clear() { std::fill(&Value[0], &Value[TOTAL_KINDS], 0); }
 
   unsigned getSGPRNum() const { return Value[SGPR32]; }
-  unsigned getVGPRNum() const { return Value[VGPR32]; }
+  unsigned getVGPRNum() const { return std::max(Value[VGPR32], Value[AGPR32]); }
 
-  unsigned getVGPRTuplesWeight() const { return Value[VGPR_TUPLE]; }
+  unsigned getVGPRTuplesWeight() const { return std::max(Value[VGPR_TUPLE],
+                                                         Value[AGPR_TUPLE]); }
   unsigned getSGPRTuplesWeight() const { return Value[SGPR_TUPLE]; }
 
   unsigned getOccupancy(const GCNSubtarget &ST) const {
@@ -191,6 +193,50 @@ GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI,
                                      const LiveIntervals &LIS,
                                      const MachineRegisterInfo &MRI);
 
+/// creates a map MachineInstr -> LiveRegSet
+/// R - range of iterators on instructions
+/// After - upon entry or exit of every instruction
+/// Note: there is no entry in the map for instructions with empty live reg set
+/// Complexity = O(NumVirtRegs * averageLiveRangeSegmentsPerReg * lg(R))
+template <typename Range>
+DenseMap<MachineInstr*, GCNRPTracker::LiveRegSet>
+getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) {
+  std::vector<SlotIndex> Indexes;
+  Indexes.reserve(std::distance(R.begin(), R.end()));
+  auto &SII = *LIS.getSlotIndexes();
+  for (MachineInstr *I : R) {
+    auto SI = SII.getInstructionIndex(*I);
+    Indexes.push_back(After ? SI.getDeadSlot() : SI.getBaseIndex());
+  }
+  std::sort(Indexes.begin(), Indexes.end());
+
+  auto &MRI = (*R.begin())->getParent()->getParent()->getRegInfo();
+  DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> LiveRegMap;
+  SmallVector<SlotIndex, 32> LiveIdxs, SRLiveIdxs;
+  for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+    auto Reg = TargetRegisterInfo::index2VirtReg(I);
+    if (!LIS.hasInterval(Reg))
+      continue;
+    auto &LI = LIS.getInterval(Reg);
+    LiveIdxs.clear();
+    if (!LI.findIndexesLiveAt(Indexes, std::back_inserter(LiveIdxs)))
+      continue;
+    if (!LI.hasSubRanges()) {
+      for (auto SI : LiveIdxs)
+        LiveRegMap[SII.getInstructionFromIndex(SI)][Reg] =
+          MRI.getMaxLaneMaskForVReg(Reg);
+    } else
+      for (const auto &S : LI.subranges()) {
+        // constrain search for subranges by indexes live at main range
+        SRLiveIdxs.clear();
+        S.findIndexesLiveAt(LiveIdxs, std::back_inserter(SRLiveIdxs));
+        for (auto SI : SRLiveIdxs)
+          LiveRegMap[SII.getInstructionFromIndex(SI)][Reg] |= S.LaneMask;
+      }
+  }
+  return LiveRegMap;
+}
+
 inline GCNRPTracker::LiveRegSet getLiveRegsAfter(const MachineInstr &MI,
                                                  const LiveIntervals &LIS) {
   return getLiveRegs(LIS.getInstructionIndex(MI).getDeadSlot(), LIS,
@@ -212,6 +258,9 @@ GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI,
   return Res;
 }
 
+bool isEqual(const GCNRPTracker::LiveRegSet &S1,
+             const GCNRPTracker::LiveRegSet &S2);
+
 void printLivesAt(SlotIndex SI,
                   const LiveIntervals &LIS,
                   const MachineRegisterInfo &MRI);
diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index f09b7f6cff22..4ea990ae490e 100644
--- a/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1,9 +1,8 @@
 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -446,8 +445,12 @@ void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
     RPTracker.reset(*MBB->begin(), &LiveIn);
     MBBLiveIns.erase(LiveInIt);
   } else {
-    I = Regions[CurRegion].first;
-    RPTracker.reset(*I);
+    auto &Rgn = Regions[CurRegion];
+    I = Rgn.first;
+    auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
+    auto LRS = BBLiveInMap.lookup(NonDbgMI);
+    assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
+    RPTracker.reset(*I, &LRS);
   }
 
   for ( ; ; ) {
@@ -478,6 +481,23 @@ void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
   }
 }
 
+DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
+GCNScheduleDAGMILive::getBBLiveInMap() const {
+  assert(!Regions.empty());
+  std::vector<MachineInstr *> BBStarters;
+  BBStarters.reserve(Regions.size());
+  auto I = Regions.rbegin(), E = Regions.rend();
+  auto *BB = I->first->getParent();
+  do {
+    auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
+    BBStarters.push_back(MI);
+    do {
+      ++I;
+    } while (I != E && I->first->getParent() == BB);
+  } while (I != E);
+  return getLiveRegMap(BBStarters, false /*After*/, *LIS);
+}
+
 void GCNScheduleDAGMILive::finalizeSchedule() {
   GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
   LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
@@ -485,6 +505,9 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
   LiveIns.resize(Regions.size());
   Pressure.resize(Regions.size());
 
+  if (!Regions.empty())
+    BBLiveInMap = getBBLiveInMap();
+
   do {
     Stage++;
     RegionIdx = 0;
diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.h b/lib/Target/AMDGPU/GCNSchedStrategy.h
index 3ac6af89cb9b..eaf3dee9ba5d 100644
--- a/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -1,9 +1,8 @@
 //===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -27,7 +26,7 @@ class GCNSubtarget;
 /// and the GenericScheduler is that GCNSchedStrategy uses different
 /// heuristics to determine excess/critical pressure sets.  Its goal is to
 /// maximize kernel occupancy (i.e. maximum number of waves per simd).
-class GCNMaxOccupancySchedStrategy : public GenericScheduler {
+class GCNMaxOccupancySchedStrategy final : public GenericScheduler {
   friend class GCNScheduleDAGMILive;
 
   SUnit *pickNodeBidirectional(bool &IsTopNode);
@@ -60,7 +59,7 @@ public:
   void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
 };
 
-class GCNScheduleDAGMILive : public ScheduleDAGMILive {
+class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
 
   const GCNSubtarget &ST;
 
@@ -78,7 +77,7 @@ class GCNScheduleDAGMILive : public ScheduleDAGMILive {
   // Current region index.
   size_t RegionIdx;
 
-  // Vecor of regions recorder for later rescheduling
+  // Vector of regions recorder for later rescheduling
   SmallVector<std::pair<MachineBasicBlock::iterator,
                         MachineBasicBlock::iterator>, 32> Regions;
 
@@ -91,6 +90,9 @@ class GCNScheduleDAGMILive : public ScheduleDAGMILive {
   // Temporary basic block live-in cache.
   DenseMap<const MachineBasicBlock*, GCNRPTracker::LiveRegSet> MBBLiveIns;
 
+  DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
+  DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;
+
   // Return current region pressure.
   GCNRegPressure getRealRegPressure() const;
 
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
deleted file mode 100644
index fab0f87dfcbe..000000000000
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ /dev/null
@@ -1,1413 +0,0 @@
-//===-- AMDGPUInstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-// \file
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUInstPrinter.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIDefines.h"
-#include "Utils/AMDGPUAsmUtils.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-
-using namespace llvm;
-using namespace llvm::AMDGPU;
-
-void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
-                                  StringRef Annot, const MCSubtargetInfo &STI) {
-  OS.flush();
-  printInstruction(MI, STI, OS);
-  printAnnotation(OS, Annot);
-}
-
-void AMDGPUInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
-                                          const MCSubtargetInfo &STI,
-                                          raw_ostream &O) {
-  O << formatHex(MI->getOperand(OpNo).getImm() & 0xf);
-}
-
-void AMDGPUInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo,
-                                          raw_ostream &O) {
-  O << formatHex(MI->getOperand(OpNo).getImm() & 0xff);
-}
-
-void AMDGPUInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
-                                           const MCSubtargetInfo &STI,
-                                           raw_ostream &O) {
-  // It's possible to end up with a 32-bit literal used with a 16-bit operand
-  // with ignored high bits. Print as 32-bit anyway in that case.
-  int64_t Imm = MI->getOperand(OpNo).getImm();
-  if (isInt<16>(Imm) || isUInt<16>(Imm))
-    O << formatHex(static_cast<uint64_t>(Imm & 0xffff));
-  else
-    printU32ImmOperand(MI, OpNo, STI, O);
-}
-
-void AMDGPUInstPrinter::printU4ImmDecOperand(const MCInst *MI, unsigned OpNo,
-                                             raw_ostream &O) {
-  O << formatDec(MI->getOperand(OpNo).getImm() & 0xf);
-}
-
-void AMDGPUInstPrinter::printU8ImmDecOperand(const MCInst *MI, unsigned OpNo,
-                                             raw_ostream &O) {
-  O << formatDec(MI->getOperand(OpNo).getImm() & 0xff);
-}
-
-void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo,
-                                              raw_ostream &O) {
-  O << formatDec(MI->getOperand(OpNo).getImm() & 0xffff);
-}
-
-void AMDGPUInstPrinter::printS13ImmDecOperand(const MCInst *MI, unsigned OpNo,
-                                              raw_ostream &O) {
-  O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm()));
-}
-
-void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
-                                           const MCSubtargetInfo &STI,
-                                           raw_ostream &O) {
-  O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
-}
-
-void AMDGPUInstPrinter::printNamedBit(const MCInst *MI, unsigned OpNo,
-                                      raw_ostream &O, StringRef BitName) {
-  if (MI->getOperand(OpNo).getImm()) {
-    O << ' ' << BitName;
-  }
-}
-
-void AMDGPUInstPrinter::printOffen(const MCInst *MI, unsigned OpNo,
-                                   raw_ostream &O) {
-  printNamedBit(MI, OpNo, O, "offen");
-}
-
-void AMDGPUInstPrinter::printIdxen(const MCInst *MI, unsigned OpNo,
-                                   raw_ostream &O) {
-  printNamedBit(MI, OpNo, O, "idxen");
-}
-
-void AMDGPUInstPrinter::printAddr64(const MCInst *MI, unsigned OpNo,
-                                    raw_ostream &O) {
-  printNamedBit(MI, OpNo, O, "addr64");
-}
-
-void AMDGPUInstPrinter::printMBUFOffset(const MCInst *MI, unsigned OpNo,
-                                        raw_ostream &O) {
-  if (MI->getOperand(OpNo).getImm()) {
-    O << " offset:";
-    printU16ImmDecOperand(MI, OpNo, O);
-  }
-}
-
-void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo,
-                                    const MCSubtargetInfo &STI,
-                                    raw_ostream &O) {
-  uint16_t Imm = MI->getOperand(OpNo).getImm();
-  if (Imm != 0) {
-    O << ((OpNo == 0)? "offset:" : " offset:");
-    printU16ImmDecOperand(MI, OpNo, O);
-  }
-}
-
-void AMDGPUInstPrinter::printOffsetS13(const MCInst *MI, unsigned OpNo,
-                                       const MCSubtargetInfo &STI,
-                                       raw_ostream &O) {
-  uint16_t Imm = MI->getOperand(OpNo).getImm();
-  if (Imm != 0) {
-    O << ((OpNo == 0)? "offset:" : " offset:");
-    printS13ImmDecOperand(MI, OpNo, O);
-  }
-}
-
-void AMDGPUInstPrinter::printOffset0(const MCInst *MI, unsigned OpNo,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  if (MI->getOperand(OpNo).getImm()) {
-    O << " offset0:";
-    printU8ImmDecOperand(MI, OpNo, O);
-  }
-}
-
-void AMDGPUInstPrinter::printOffset1(const MCInst *MI, unsigned OpNo,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  if (MI->getOperand(OpNo).getImm()) {
-    O << " offset1:";
-    printU8ImmDecOperand(MI, OpNo, O);
-  }
-}
-
-void AMDGPUInstPrinter::printSMRDOffset8(const MCInst *MI, unsigned OpNo,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O) {
-  printU32ImmOperand(MI, OpNo, STI, O);
-}
-
-void AMDGPUInstPrinter::printSMRDOffset20(const MCInst *MI, unsigned OpNo,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O) {
-  printU32ImmOperand(MI, OpNo, STI, O);
-}
-
-void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
-                                               const MCSubtargetInfo &STI,
-                                               raw_ostream &O) {
-  printU32ImmOperand(MI, OpNo, STI, O);
-}
-
-void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
-                                 const MCSubtargetInfo &STI, raw_ostream &O) {
-  printNamedBit(MI, OpNo, O, "gds");
-}
-
-void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
-                                 const MCSubtargetInfo &STI, raw_ostream &O) {
-  printNamedBit(MI, OpNo, O, "glc");
-}
-
-void AMDGPUInstPrinter::printSLC(const MCInst *MI, unsigned OpNo,
-                                 const MCSubtargetInfo &STI, raw_ostream &O) {
-  printNamedBit(MI, OpNo, O, "slc");
-}
-
-void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
-                                 const MCSubtargetInfo &STI, raw_ostream &O) {
-  printNamedBit(MI, OpNo, O, "tfe");
-}
-
-void AMDGPUInstPrinter::printDMask(const MCInst *MI, unsigned OpNo,
-                                   const MCSubtargetInfo &STI, raw_ostream &O) {
-  if (MI->getOperand(OpNo).getImm()) {
-    O << " dmask:";
-    printU16ImmOperand(MI, OpNo, STI, O);
-  }
-}
-
-void AMDGPUInstPrinter::printUNorm(const MCInst *MI, unsigned OpNo,
-                                   const MCSubtargetInfo &STI, raw_ostream &O) {
-  printNamedBit(MI, OpNo, O, "unorm");
-}
-
-void AMDGPUInstPrinter::printDA(const MCInst *MI, unsigned OpNo,
-                                const MCSubtargetInfo &STI, raw_ostream &O) {
-  printNamedBit(MI, OpNo, O, "da");
-}
-
-void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
-                                  const MCSubtargetInfo &STI, raw_ostream &O) {
-  if (STI.hasFeature(AMDGPU::FeatureR128A16))
-    printNamedBit(MI, OpNo, O, "a16");
-  else
-    printNamedBit(MI, OpNo, O, "r128");
-}
-
-void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
-                                 const MCSubtargetInfo &STI, raw_ostream &O) {
-  printNamedBit(MI, OpNo, O, "lwe");
-}
-
-void AMDGPUInstPrinter::printD16(const MCInst *MI, unsigned OpNo,
-                                 const MCSubtargetInfo &STI, raw_ostream &O) {
-  printNamedBit(MI, OpNo, O, "d16");
-}
-
-void AMDGPUInstPrinter::printExpCompr(const MCInst *MI, unsigned OpNo,
-                                      const MCSubtargetInfo &STI,
-                                      raw_ostream &O) {
-  if (MI->getOperand(OpNo).getImm())
-    O << " compr";
-}
-
-void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo,
-                                   const MCSubtargetInfo &STI,
-                                   raw_ostream &O) {
-  if (MI->getOperand(OpNo).getImm())
-    O << " vm";
-}
-
-void AMDGPUInstPrinter::printFORMAT(const MCInst *MI, unsigned OpNo,
-                                    const MCSubtargetInfo &STI,
-                                    raw_ostream &O) {
-  if (unsigned Val = MI->getOperand(OpNo).getImm()) {
-    O << " dfmt:" << (Val & 15);
-    O << ", nfmt:" << (Val >> 4);
-  }
-}
-
-void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
-                                        const MCRegisterInfo &MRI) {
-  switch (RegNo) {
-  case AMDGPU::VCC:
-    O << "vcc";
-    return;
-  case AMDGPU::SCC:
-    O << "scc";
-    return;
-  case AMDGPU::EXEC:
-    O << "exec";
-    return;
-  case AMDGPU::M0:
-    O << "m0";
-    return;
-  case AMDGPU::FLAT_SCR:
-    O << "flat_scratch";
-    return;
-  case AMDGPU::XNACK_MASK:
-    O << "xnack_mask";
-    return;
-  case AMDGPU::VCC_LO:
-    O << "vcc_lo";
-    return;
-  case AMDGPU::VCC_HI:
-    O << "vcc_hi";
-    return;
-  case AMDGPU::TBA_LO:
-    O << "tba_lo";
-    return;
-  case AMDGPU::TBA_HI:
-    O << "tba_hi";
-    return;
-  case AMDGPU::TMA_LO:
-    O << "tma_lo";
-    return;
-  case AMDGPU::TMA_HI:
-    O << "tma_hi";
-    return;
-  case AMDGPU::EXEC_LO:
-    O << "exec_lo";
-    return;
-  case AMDGPU::EXEC_HI:
-    O << "exec_hi";
-    return;
-  case AMDGPU::FLAT_SCR_LO:
-    O << "flat_scratch_lo";
-    return;
-  case AMDGPU::FLAT_SCR_HI:
-    O << "flat_scratch_hi";
-    return;
-  case AMDGPU::XNACK_MASK_LO:
-    O << "xnack_mask_lo";
-    return;
-  case AMDGPU::XNACK_MASK_HI:
-    O << "xnack_mask_hi";
-    return;
-  case AMDGPU::FP_REG:
-  case AMDGPU::SP_REG:
-  case AMDGPU::SCRATCH_WAVE_OFFSET_REG:
-  case AMDGPU::PRIVATE_RSRC_REG:
-    llvm_unreachable("pseudo-register should not ever be emitted");
-  default:
-    break;
-  }
-
-  // The low 8 bits of the encoding value is the register index, for both VGPRs
-  // and SGPRs.
-  unsigned RegIdx = MRI.getEncodingValue(RegNo) & ((1 << 8) - 1);
-
-  unsigned NumRegs;
-  if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(RegNo)) {
-    O << 'v';
-    NumRegs = 1;
-  } else  if (MRI.getRegClass(AMDGPU::SGPR_32RegClassID).contains(RegNo)) {
-    O << 's';
-    NumRegs = 1;
-  } else if (MRI.getRegClass(AMDGPU::VReg_64RegClassID).contains(RegNo)) {
-    O <<'v';
-    NumRegs = 2;
-  } else  if (MRI.getRegClass(AMDGPU::SGPR_64RegClassID).contains(RegNo)) {
-    O << 's';
-    NumRegs = 2;
-  } else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(RegNo)) {
-    O << 'v';
-    NumRegs = 4;
-  } else  if (MRI.getRegClass(AMDGPU::SGPR_128RegClassID).contains(RegNo)) {
-    O << 's';
-    NumRegs = 4;
-  } else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(RegNo)) {
-    O << 'v';
-    NumRegs = 3;
-  } else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo)) {
-    O << 'v';
-    NumRegs = 8;
-  } else if (MRI.getRegClass(AMDGPU::SGPR_256RegClassID).contains(RegNo)) {
-    O << 's';
-    NumRegs = 8;
-  } else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(RegNo)) {
-    O << 'v';
-    NumRegs = 16;
-  } else if (MRI.getRegClass(AMDGPU::SGPR_512RegClassID).contains(RegNo)) {
-    O << 's';
-    NumRegs = 16;
-  } else {
-    O << getRegisterName(RegNo);
-    return;
-  }
-
-  if (NumRegs == 1) {
-    O << RegIdx;
-    return;
-  }
-
-  O << '[' << RegIdx << ':' << (RegIdx + NumRegs - 1) << ']';
-}
-
-void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
-                                    const MCSubtargetInfo &STI, raw_ostream &O) {
-  if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3)
-    O << "_e64 ";
-  else if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::DPP)
-    O << "_dpp ";
-  else if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SDWA)
-    O << "_sdwa ";
-  else
-    O << "_e32 ";
-
-  printOperand(MI, OpNo, STI, O);
-}
-
-void AMDGPUInstPrinter::printVINTRPDst(const MCInst *MI, unsigned OpNo,
-                                       const MCSubtargetInfo &STI, raw_ostream &O) {
-  if (AMDGPU::isSI(STI) || AMDGPU::isCI(STI))
-    O << " ";
-  else
-    O << "_e32 ";
-
-  printOperand(MI, OpNo, STI, O);
-}
-
-void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
-                                         const MCSubtargetInfo &STI,
-                                         raw_ostream &O) {
-  int16_t SImm = static_cast<int16_t>(Imm);
-  if (SImm >= -16 && SImm <= 64) {
-    O << SImm;
-    return;
-  }
-
-  if (Imm == 0x3C00)
-    O<< "1.0";
-  else if (Imm == 0xBC00)
-    O<< "-1.0";
-  else if (Imm == 0x3800)
-    O<< "0.5";
-  else if (Imm == 0xB800)
-    O<< "-0.5";
-  else if (Imm == 0x4000)
-    O<< "2.0";
-  else if (Imm == 0xC000)
-    O<< "-2.0";
-  else if (Imm == 0x4400)
-    O<< "4.0";
-  else if (Imm == 0xC400)
-    O<< "-4.0";
-  else if (Imm == 0x3118) {
-    assert(STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]);
-    O << "0.15915494";
-  } else
-    O << formatHex(static_cast<uint64_t>(Imm));
-}
-
-void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm,
-                                           const MCSubtargetInfo &STI,
-                                           raw_ostream &O) {
-  uint16_t Lo16 = static_cast<uint16_t>(Imm);
-  printImmediate16(Lo16, STI, O);
-}
-
-void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
-                                         const MCSubtargetInfo &STI,
-                                         raw_ostream &O) {
-  int32_t SImm = static_cast<int32_t>(Imm);
-  if (SImm >= -16 && SImm <= 64) {
-    O << SImm;
-    return;
-  }
-
-  if (Imm == FloatToBits(0.0f))
-    O << "0.0";
-  else if (Imm == FloatToBits(1.0f))
-    O << "1.0";
-  else if (Imm == FloatToBits(-1.0f))
-    O << "-1.0";
-  else if (Imm == FloatToBits(0.5f))
-    O << "0.5";
-  else if (Imm == FloatToBits(-0.5f))
-    O << "-0.5";
-  else if (Imm == FloatToBits(2.0f))
-    O << "2.0";
-  else if (Imm == FloatToBits(-2.0f))
-    O << "-2.0";
-  else if (Imm == FloatToBits(4.0f))
-    O << "4.0";
-  else if (Imm == FloatToBits(-4.0f))
-    O << "-4.0";
-  else if (Imm == 0x3e22f983 &&
-           STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
-    O << "0.15915494";
-  else
-    O << formatHex(static_cast<uint64_t>(Imm));
-}
-
-void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
-                                         const MCSubtargetInfo &STI,
-                                         raw_ostream &O) {
-  int64_t SImm = static_cast<int64_t>(Imm);
-  if (SImm >= -16 && SImm <= 64) {
-    O << SImm;
-    return;
-  }
-
-  if (Imm == DoubleToBits(0.0))
-    O << "0.0";
-  else if (Imm == DoubleToBits(1.0))
-    O << "1.0";
-  else if (Imm == DoubleToBits(-1.0))
-    O << "-1.0";
-  else if (Imm == DoubleToBits(0.5))
-    O << "0.5";
-  else if (Imm == DoubleToBits(-0.5))
-    O << "-0.5";
-  else if (Imm == DoubleToBits(2.0))
-    O << "2.0";
-  else if (Imm == DoubleToBits(-2.0))
-    O << "-2.0";
-  else if (Imm == DoubleToBits(4.0))
-    O << "4.0";
-  else if (Imm == DoubleToBits(-4.0))
-    O << "-4.0";
-  else if (Imm == 0x3fc45f306dc9c882 &&
-           STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
-  O << "0.15915494";
-  else {
-    assert(isUInt<32>(Imm) || Imm == 0x3fc45f306dc9c882);
-
-    // In rare situations, we will have a 32-bit literal in a 64-bit
-    // operand. This is technically allowed for the encoding of s_mov_b64.
-    O << formatHex(static_cast<uint64_t>(Imm));
-  }
-}
-
-void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  if (OpNo >= MI->getNumOperands()) {
-    O << "/*Missing OP" << OpNo << "*/";
-    return;
-  }
-
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    printRegOperand(Op.getReg(), O, MRI);
-  } else if (Op.isImm()) {
-    const MCInstrDesc &Desc = MII.get(MI->getOpcode());
-    switch (Desc.OpInfo[OpNo].OperandType) {
-    case AMDGPU::OPERAND_REG_IMM_INT32:
-    case AMDGPU::OPERAND_REG_IMM_FP32:
-    case AMDGPU::OPERAND_REG_INLINE_C_INT32:
-    case AMDGPU::OPERAND_REG_INLINE_C_FP32:
-    case MCOI::OPERAND_IMMEDIATE:
-      printImmediate32(Op.getImm(), STI, O);
-      break;
-    case AMDGPU::OPERAND_REG_IMM_INT64:
-    case AMDGPU::OPERAND_REG_IMM_FP64:
-    case AMDGPU::OPERAND_REG_INLINE_C_INT64:
-    case AMDGPU::OPERAND_REG_INLINE_C_FP64:
-      printImmediate64(Op.getImm(), STI, O);
-      break;
-    case AMDGPU::OPERAND_REG_INLINE_C_INT16:
-    case AMDGPU::OPERAND_REG_INLINE_C_FP16:
-    case AMDGPU::OPERAND_REG_IMM_INT16:
-    case AMDGPU::OPERAND_REG_IMM_FP16:
-      printImmediate16(Op.getImm(), STI, O);
-      break;
-    case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
-    case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
-      printImmediateV216(Op.getImm(), STI, O);
-      break;
-    case MCOI::OPERAND_UNKNOWN:
-    case MCOI::OPERAND_PCREL:
-      O << formatDec(Op.getImm());
-      break;
-    case MCOI::OPERAND_REGISTER:
-      // FIXME: This should be removed and handled somewhere else. Seems to come
-      // from a disassembler bug.
-      O << "/*invalid immediate*/";
-      break;
-    default:
-      // We hit this for the immediate instruction bits that don't yet have a
-      // custom printer.
-      llvm_unreachable("unexpected immediate operand type");
-    }
-  } else if (Op.isFPImm()) {
-    // We special case 0.0 because otherwise it will be printed as an integer.
-    if (Op.getFPImm() == 0.0)
-      O << "0.0";
-    else {
-      const MCInstrDesc &Desc = MII.get(MI->getOpcode());
-      int RCID = Desc.OpInfo[OpNo].RegClass;
-      unsigned RCBits = AMDGPU::getRegBitWidth(MRI.getRegClass(RCID));
-      if (RCBits == 32)
-        printImmediate32(FloatToBits(Op.getFPImm()), STI, O);
-      else if (RCBits == 64)
-        printImmediate64(DoubleToBits(Op.getFPImm()), STI, O);
-      else
-        llvm_unreachable("Invalid register class size");
-    }
-  } else if (Op.isExpr()) {
-    const MCExpr *Exp = Op.getExpr();
-    Exp->print(O, &MAI);
-  } else {
-    O << "/*INV_OP*/";
-  }
-}
-
-void AMDGPUInstPrinter::printOperandAndFPInputMods(const MCInst *MI,
-                                                   unsigned OpNo,
-                                                   const MCSubtargetInfo &STI,
-                                                   raw_ostream &O) {
-  unsigned InputModifiers = MI->getOperand(OpNo).getImm();
-
-  // Use 'neg(...)' instead of '-' to avoid ambiguity.
-  // This is important for integer literals because
-  // -1 is not the same value as neg(1).
-  bool NegMnemo = false;
-
-  if (InputModifiers & SISrcMods::NEG) {
-    if (OpNo + 1 < MI->getNumOperands() &&
-        (InputModifiers & SISrcMods::ABS) == 0) {
-      const MCOperand &Op = MI->getOperand(OpNo + 1);
-      NegMnemo = Op.isImm() || Op.isFPImm();
-    }
-    if (NegMnemo) {
-      O << "neg(";
-    } else {
-      O << '-';
-    }
-  }
-
-  if (InputModifiers & SISrcMods::ABS)
-    O << '|';
-  printOperand(MI, OpNo + 1, STI, O);
-  if (InputModifiers & SISrcMods::ABS)
-    O << '|';
-
-  if (NegMnemo) {
-    O << ')';
-  }
-}
-
-void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI,
-                                                    unsigned OpNo,
-                                                    const MCSubtargetInfo &STI,
-                                                    raw_ostream &O) {
-  unsigned InputModifiers = MI->getOperand(OpNo).getImm();
-  if (InputModifiers & SISrcMods::SEXT)
-    O << "sext(";
-  printOperand(MI, OpNo + 1, STI, O);
-  if (InputModifiers & SISrcMods::SEXT)
-    O << ')';
-}
-
-void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  using namespace AMDGPU::DPP;
-
-  unsigned Imm = MI->getOperand(OpNo).getImm();
-  if (Imm <= DppCtrl::QUAD_PERM_LAST) {
-    O << " quad_perm:[";
-    O << formatDec(Imm & 0x3)         << ',';
-    O << formatDec((Imm & 0xc)  >> 2) << ',';
-    O << formatDec((Imm & 0x30) >> 4) << ',';
-    O << formatDec((Imm & 0xc0) >> 6) << ']';
-  } else if ((Imm >= DppCtrl::ROW_SHL_FIRST) &&
-             (Imm <= DppCtrl::ROW_SHL_LAST)) {
-    O << " row_shl:";
-    printU4ImmDecOperand(MI, OpNo, O);
-  } else if ((Imm >= DppCtrl::ROW_SHR_FIRST) &&
-             (Imm <= DppCtrl::ROW_SHR_LAST)) {
-    O << " row_shr:";
-    printU4ImmDecOperand(MI, OpNo, O);
-  } else if ((Imm >= DppCtrl::ROW_ROR_FIRST) &&
-             (Imm <= DppCtrl::ROW_ROR_LAST)) {
-    O << " row_ror:";
-    printU4ImmDecOperand(MI, OpNo, O);
-  } else if (Imm == DppCtrl::WAVE_SHL1) {
-    O << " wave_shl:1";
-  } else if (Imm == DppCtrl::WAVE_ROL1) {
-    O << " wave_rol:1";
-  } else if (Imm == DppCtrl::WAVE_SHR1) {
-    O << " wave_shr:1";
-  } else if (Imm == DppCtrl::WAVE_ROR1) {
-    O << " wave_ror:1";
-  } else if (Imm == DppCtrl::ROW_MIRROR) {
-    O << " row_mirror";
-  } else if (Imm == DppCtrl::ROW_HALF_MIRROR) {
-    O << " row_half_mirror";
-  } else if (Imm == DppCtrl::BCAST15) {
-    O << " row_bcast:15";
-  } else if (Imm == DppCtrl::BCAST31) {
-    O << " row_bcast:31";
-  } else {
-    O << " /* Invalid dpp_ctrl value */";
-  }
-}
-
-void AMDGPUInstPrinter::printRowMask(const MCInst *MI, unsigned OpNo,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  O << " row_mask:";
-  printU4ImmOperand(MI, OpNo, STI, O);
-}
-
-void AMDGPUInstPrinter::printBankMask(const MCInst *MI, unsigned OpNo,
-                                      const MCSubtargetInfo &STI,
-                                      raw_ostream &O) {
-  O << " bank_mask:";
-  printU4ImmOperand(MI, OpNo, STI, O);
-}
-
-void AMDGPUInstPrinter::printBoundCtrl(const MCInst *MI, unsigned OpNo,
-                                       const MCSubtargetInfo &STI,
-                                       raw_ostream &O) {
-  unsigned Imm = MI->getOperand(OpNo).getImm();
-  if (Imm) {
-    O << " bound_ctrl:0"; // XXX - this syntax is used in sp3
-  }
-}
-
-void AMDGPUInstPrinter::printSDWASel(const MCInst *MI, unsigned OpNo,
-                                     raw_ostream &O) {
-  using namespace llvm::AMDGPU::SDWA;
-
-  unsigned Imm = MI->getOperand(OpNo).getImm();
-  switch (Imm) {
-  case SdwaSel::BYTE_0: O << "BYTE_0"; break;
-  case SdwaSel::BYTE_1: O << "BYTE_1"; break;
-  case SdwaSel::BYTE_2: O << "BYTE_2"; break;
-  case SdwaSel::BYTE_3: O << "BYTE_3"; break;
-  case SdwaSel::WORD_0: O << "WORD_0"; break;
-  case SdwaSel::WORD_1: O << "WORD_1"; break;
-  case SdwaSel::DWORD: O << "DWORD"; break;
-  default: llvm_unreachable("Invalid SDWA data select operand");
-  }
-}
-
-void AMDGPUInstPrinter::printSDWADstSel(const MCInst *MI, unsigned OpNo,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O) {
-  O << "dst_sel:";
-  printSDWASel(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printSDWASrc0Sel(const MCInst *MI, unsigned OpNo,
-                                         const MCSubtargetInfo &STI,
-                                         raw_ostream &O) {
-  O << "src0_sel:";
-  printSDWASel(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printSDWASrc1Sel(const MCInst *MI, unsigned OpNo,
-                                         const MCSubtargetInfo &STI,
-                                         raw_ostream &O) {
-  O << "src1_sel:";
-  printSDWASel(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printSDWADstUnused(const MCInst *MI, unsigned OpNo,
-                                           const MCSubtargetInfo &STI,
-                                           raw_ostream &O) {
-  using namespace llvm::AMDGPU::SDWA;
-
-  O << "dst_unused:";
-  unsigned Imm = MI->getOperand(OpNo).getImm();
-  switch (Imm) {
-  case DstUnused::UNUSED_PAD: O << "UNUSED_PAD"; break;
-  case DstUnused::UNUSED_SEXT: O << "UNUSED_SEXT"; break;
-  case DstUnused::UNUSED_PRESERVE: O << "UNUSED_PRESERVE"; break;
-  default: llvm_unreachable("Invalid SDWA dest_unused operand");
-  }
-}
-
-template <unsigned N>
-void AMDGPUInstPrinter::printExpSrcN(const MCInst *MI, unsigned OpNo,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  unsigned Opc = MI->getOpcode();
-  int EnIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::en);
-  unsigned En = MI->getOperand(EnIdx).getImm();
-
-  int ComprIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::compr);
-
-  // If compr is set, print as src0, src0, src1, src1
-  if (MI->getOperand(ComprIdx).getImm()) {
-    if (N == 1 || N == 2)
-      --OpNo;
-    else if (N == 3)
-      OpNo -= 2;
-  }
-
-  if (En & (1 << N))
-    printRegOperand(MI->getOperand(OpNo).getReg(), O, MRI);
-  else
-    O << "off";
-}
-
-void AMDGPUInstPrinter::printExpSrc0(const MCInst *MI, unsigned OpNo,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  printExpSrcN<0>(MI, OpNo, STI, O);
-}
-
-void AMDGPUInstPrinter::printExpSrc1(const MCInst *MI, unsigned OpNo,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  printExpSrcN<1>(MI, OpNo, STI, O);
-}
-
-void AMDGPUInstPrinter::printExpSrc2(const MCInst *MI, unsigned OpNo,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  printExpSrcN<2>(MI, OpNo, STI, O);
-}
-
-void AMDGPUInstPrinter::printExpSrc3(const MCInst *MI, unsigned OpNo,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  printExpSrcN<3>(MI, OpNo, STI, O);
-}
-
-void AMDGPUInstPrinter::printExpTgt(const MCInst *MI, unsigned OpNo,
-                                    const MCSubtargetInfo &STI,
-                                    raw_ostream &O) {
-  // This is really a 6 bit field.
-  uint32_t Tgt = MI->getOperand(OpNo).getImm() & ((1 << 6) - 1);
-
-  if (Tgt <= 7)
-    O << " mrt" << Tgt;
-  else if (Tgt == 8)
-    O << " mrtz";
-  else if (Tgt == 9)
-    O << " null";
-  else if (Tgt >= 12 && Tgt <= 15)
-    O << " pos" << Tgt - 12;
-  else if (Tgt >= 32 && Tgt <= 63)
-    O << " param" << Tgt - 32;
-  else {
-    // Reserved values 10, 11
-    O << " invalid_target_" << Tgt;
-  }
-}
-
-static bool allOpsDefaultValue(const int* Ops, int NumOps, int Mod,
-                               bool IsPacked, bool HasDstSel) {
-  int DefaultValue = IsPacked && (Mod == SISrcMods::OP_SEL_1);
-
-  for (int I = 0; I < NumOps; ++I) {
-    if (!!(Ops[I] & Mod) != DefaultValue)
-      return false;
-  }
-
-  if (HasDstSel && (Ops[0] & SISrcMods::DST_OP_SEL) != 0)
-    return false;
-
-  return true;
-}
-
-void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI,
-                                            StringRef Name,
-                                            unsigned Mod,
-                                            raw_ostream &O) {
-  unsigned Opc = MI->getOpcode();
-  int NumOps = 0;
-  int Ops[3];
-
-  for (int OpName : { AMDGPU::OpName::src0_modifiers,
-                      AMDGPU::OpName::src1_modifiers,
-                      AMDGPU::OpName::src2_modifiers }) {
-    int Idx = AMDGPU::getNamedOperandIdx(Opc, OpName);
-    if (Idx == -1)
-      break;
-
-    Ops[NumOps++] = MI->getOperand(Idx).getImm();
-  }
-
-  const bool HasDstSel =
-    NumOps > 0 &&
-    Mod == SISrcMods::OP_SEL_0 &&
-    MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3_OPSEL;
-
-  const bool IsPacked =
-    MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::IsPacked;
-
-  if (allOpsDefaultValue(Ops, NumOps, Mod, IsPacked, HasDstSel))
-    return;
-
-  O << Name;
-  for (int I = 0; I < NumOps; ++I) {
-    if (I != 0)
-      O << ',';
-
-    O << !!(Ops[I] & Mod);
-  }
-
-  if (HasDstSel) {
-    O << ',' << !!(Ops[0] & SISrcMods::DST_OP_SEL);
-  }
-
-  O << ']';
-}
-
-void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
-                                   const MCSubtargetInfo &STI,
-                                   raw_ostream &O) {
-  printPackedModifier(MI, " op_sel:[", SISrcMods::OP_SEL_0, O);
-}
-
-void AMDGPUInstPrinter::printOpSelHi(const MCInst *MI, unsigned OpNo,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  printPackedModifier(MI, " op_sel_hi:[", SISrcMods::OP_SEL_1, O);
-}
-
-void AMDGPUInstPrinter::printNegLo(const MCInst *MI, unsigned OpNo,
-                                   const MCSubtargetInfo &STI,
-                                   raw_ostream &O) {
-  printPackedModifier(MI, " neg_lo:[", SISrcMods::NEG, O);
-}
-
-void AMDGPUInstPrinter::printNegHi(const MCInst *MI, unsigned OpNo,
-                                   const MCSubtargetInfo &STI,
-                                   raw_ostream &O) {
-  printPackedModifier(MI, " neg_hi:[", SISrcMods::NEG_HI, O);
-}
-
-void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O) {
-  unsigned Imm = MI->getOperand(OpNum).getImm();
-  switch (Imm) {
-  case 0:
-    O << "p10";
-    break;
-  case 1:
-    O << "p20";
-    break;
-  case 2:
-    O << "p0";
-    break;
-  default:
-    O << "invalid_param_" << Imm;
-  }
-}
-
-void AMDGPUInstPrinter::printInterpAttr(const MCInst *MI, unsigned OpNum,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O) {
-  unsigned Attr = MI->getOperand(OpNum).getImm();
-  O << "attr" << Attr;
-}
-
-void AMDGPUInstPrinter::printInterpAttrChan(const MCInst *MI, unsigned OpNum,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O) {
-  unsigned Chan = MI->getOperand(OpNum).getImm();
-  O << '.' << "xyzw"[Chan & 0x3];
-}
-
-void AMDGPUInstPrinter::printVGPRIndexMode(const MCInst *MI, unsigned OpNo,
-                                           const MCSubtargetInfo &STI,
-                                           raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNo).getImm();
-  if (Val == 0) {
-    O << " 0";
-    return;
-  }
-
-  if (Val & VGPRIndexMode::DST_ENABLE)
-    O << " dst";
-
-  if (Val & VGPRIndexMode::SRC0_ENABLE)
-    O << " src0";
-
-  if (Val & VGPRIndexMode::SRC1_ENABLE)
-    O << " src1";
-
-  if (Val & VGPRIndexMode::SRC2_ENABLE)
-    O << " src2";
-}
-
-void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O) {
-  printOperand(MI, OpNo, STI, O);
-  O  << ", ";
-  printOperand(MI, OpNo + 1, STI, O);
-}
-
-void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
-                                   raw_ostream &O, StringRef Asm,
-                                   StringRef Default) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  assert(Op.isImm());
-  if (Op.getImm() == 1) {
-    O << Asm;
-  } else {
-    O << Default;
-  }
-}
-
-void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
-                                   raw_ostream &O, char Asm) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  assert(Op.isImm());
-  if (Op.getImm() == 1)
-    O << Asm;
-}
-
-void AMDGPUInstPrinter::printHigh(const MCInst *MI, unsigned OpNo,
-                                  const MCSubtargetInfo &STI,
-                                  raw_ostream &O) {
-  if (MI->getOperand(OpNo).getImm())
-    O << " high";
-}
-
-void AMDGPUInstPrinter::printClampSI(const MCInst *MI, unsigned OpNo,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  if (MI->getOperand(OpNo).getImm())
-    O << " clamp";
-}
-
-void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo,
-                                    const MCSubtargetInfo &STI,
-                                    raw_ostream &O) {
-  int Imm = MI->getOperand(OpNo).getImm();
-  if (Imm == SIOutMods::MUL2)
-    O << " mul:2";
-  else if (Imm == SIOutMods::MUL4)
-    O << " mul:4";
-  else if (Imm == SIOutMods::DIV2)
-    O << " div:2";
-}
-
-void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  using namespace llvm::AMDGPU::SendMsg;
-
-  const unsigned SImm16 = MI->getOperand(OpNo).getImm();
-  const unsigned Id = SImm16 & ID_MASK_;
-  do {
-    if (Id == ID_INTERRUPT) {
-      if ((SImm16 & ~ID_MASK_) != 0) // Unused/unknown bits must be 0.
-        break;
-      O << "sendmsg(" << IdSymbolic[Id] << ')';
-      return;
-    }
-    if (Id == ID_GS || Id == ID_GS_DONE) {
-      if ((SImm16 & ~(ID_MASK_|OP_GS_MASK_|STREAM_ID_MASK_)) != 0) // Unused/unknown bits must be 0.
-        break;
-      const unsigned OpGs = (SImm16 & OP_GS_MASK_) >> OP_SHIFT_;
-      const unsigned StreamId = (SImm16 & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
-      if (OpGs == OP_GS_NOP && Id != ID_GS_DONE) // NOP to be used for GS_DONE only.
-        break;
-      if (OpGs == OP_GS_NOP && StreamId != 0) // NOP does not use/define stream id bits.
-        break;
-      O << "sendmsg(" << IdSymbolic[Id] << ", " << OpGsSymbolic[OpGs];
-      if (OpGs != OP_GS_NOP) {  O << ", " << StreamId; }
-      O << ')';
-      return;
-    }
-    if (Id == ID_SYSMSG) {
-      if ((SImm16 & ~(ID_MASK_|OP_SYS_MASK_)) != 0) // Unused/unknown bits must be 0.
-        break;
-      const unsigned OpSys = (SImm16 & OP_SYS_MASK_) >> OP_SHIFT_;
-      if (! (OP_SYS_FIRST_ <= OpSys && OpSys < OP_SYS_LAST_)) // Unused/unknown.
-        break;
-      O << "sendmsg(" << IdSymbolic[Id] << ", " << OpSysSymbolic[OpSys] << ')';
-      return;
-    }
-  } while (false);
-  O << SImm16; // Unknown simm16 code.
-}
-
-static void printSwizzleBitmask(const uint16_t AndMask,
-                                const uint16_t OrMask,
-                                const uint16_t XorMask,
-                                raw_ostream &O) {
-  using namespace llvm::AMDGPU::Swizzle;
-
-  uint16_t Probe0 = ((0            & AndMask) | OrMask) ^ XorMask;
-  uint16_t Probe1 = ((BITMASK_MASK & AndMask) | OrMask) ^ XorMask;
-
-  O << "\"";
-
-  for (unsigned Mask = 1 << (BITMASK_WIDTH - 1); Mask > 0; Mask >>= 1) {
-    uint16_t p0 = Probe0 & Mask;
-    uint16_t p1 = Probe1 & Mask;
-
-    if (p0 == p1) {
-      if (p0 == 0) {
-        O << "0";
-      } else {
-        O << "1";
-      }
-    } else {
-      if (p0 == 0) {
-        O << "p";
-      } else {
-        O << "i";
-      }
-    }
-  }
-
-  O << "\"";
-}
-
-void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  using namespace llvm::AMDGPU::Swizzle;
-
-  uint16_t Imm = MI->getOperand(OpNo).getImm();
-  if (Imm == 0) {
-    return;
-  }
-
-  O << " offset:";
-
-  if ((Imm & QUAD_PERM_ENC_MASK) == QUAD_PERM_ENC) {
-
-    O << "swizzle(" << IdSymbolic[ID_QUAD_PERM];
-    for (auto i = 0; i < LANE_NUM; ++i) {
-      O << ",";
-      O << formatDec(Imm & LANE_MASK);
-      Imm >>= LANE_SHIFT;
-    }
-    O << ")";
-
-  } else if ((Imm & BITMASK_PERM_ENC_MASK) == BITMASK_PERM_ENC) {
-
-    uint16_t AndMask = (Imm >> BITMASK_AND_SHIFT) & BITMASK_MASK;
-    uint16_t OrMask  = (Imm >> BITMASK_OR_SHIFT)  & BITMASK_MASK;
-    uint16_t XorMask = (Imm >> BITMASK_XOR_SHIFT) & BITMASK_MASK;
-
-    if (AndMask == BITMASK_MAX &&
-        OrMask == 0 &&
-        countPopulation(XorMask) == 1) {
-
-      O << "swizzle(" << IdSymbolic[ID_SWAP];
-      O << ",";
-      O << formatDec(XorMask);
-      O << ")";
-
-    } else if (AndMask == BITMASK_MAX &&
-               OrMask == 0 && XorMask > 0 &&
-               isPowerOf2_64(XorMask + 1)) {
-
-      O << "swizzle(" << IdSymbolic[ID_REVERSE];
-      O << ",";
-      O << formatDec(XorMask + 1);
-      O << ")";
-
-    } else {
-
-      uint16_t GroupSize = BITMASK_MAX - AndMask + 1;
-      if (GroupSize > 1 &&
-          isPowerOf2_64(GroupSize) &&
-          OrMask < GroupSize &&
-          XorMask == 0) {
-
-        O << "swizzle(" << IdSymbolic[ID_BROADCAST];
-        O << ",";
-        O << formatDec(GroupSize);
-        O << ",";
-        O << formatDec(OrMask);
-        O << ")";
-
-      } else {
-        O << "swizzle(" << IdSymbolic[ID_BITMASK_PERM];
-        O << ",";
-        printSwizzleBitmask(AndMask, OrMask, XorMask, O);
-        O << ")";
-      }
-    }
-  } else {
-    printU16ImmDecOperand(MI, OpNo, O);
-  }
-}
-
-void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
-                                      const MCSubtargetInfo &STI,
-                                      raw_ostream &O) {
-  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU());
-
-  unsigned SImm16 = MI->getOperand(OpNo).getImm();
-  unsigned Vmcnt, Expcnt, Lgkmcnt;
-  decodeWaitcnt(ISA, SImm16, Vmcnt, Expcnt, Lgkmcnt);
-
-  bool NeedSpace = false;
-
-  if (Vmcnt != getVmcntBitMask(ISA)) {
-    O << "vmcnt(" << Vmcnt << ')';
-    NeedSpace = true;
-  }
-
-  if (Expcnt != getExpcntBitMask(ISA)) {
-    if (NeedSpace)
-      O << ' ';
-    O << "expcnt(" << Expcnt << ')';
-    NeedSpace = true;
-  }
-
-  if (Lgkmcnt != getLgkmcntBitMask(ISA)) {
-    if (NeedSpace)
-      O << ' ';
-    O << "lgkmcnt(" << Lgkmcnt << ')';
-  }
-}
-
-void AMDGPUInstPrinter::printHwreg(const MCInst *MI, unsigned OpNo,
-                                   const MCSubtargetInfo &STI, raw_ostream &O) {
-  using namespace llvm::AMDGPU::Hwreg;
-
-  unsigned SImm16 = MI->getOperand(OpNo).getImm();
-  const unsigned Id = (SImm16 & ID_MASK_) >> ID_SHIFT_;
-  const unsigned Offset = (SImm16 & OFFSET_MASK_) >> OFFSET_SHIFT_;
-  const unsigned Width = ((SImm16 & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
-
-  O << "hwreg(";
-  unsigned Last = ID_SYMBOLIC_LAST_;
-  if (AMDGPU::isSI(STI) || AMDGPU::isCI(STI) || AMDGPU::isVI(STI))
-    Last = ID_SYMBOLIC_FIRST_GFX9_;
-  if (ID_SYMBOLIC_FIRST_ <= Id && Id < Last && IdSymbolic[Id]) {
-    O << IdSymbolic[Id];
-  } else {
-    O << Id;
-  }
-  if (Width != WIDTH_M1_DEFAULT_ + 1 || Offset != OFFSET_DEFAULT_) {
-    O << ", " << Offset << ", " << Width;
-  }
-  O << ')';
-}
-
-#include "AMDGPUGenAsmWriter.inc"
-
-void R600InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-		                StringRef Annot, const MCSubtargetInfo &STI) {
-  O.flush();
-  printInstruction(MI, O);
-  printAnnotation(O, Annot);
-}
-
-void R600InstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
-                               raw_ostream &O) {
-  AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '|');
-}
-
-void R600InstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
-                                       raw_ostream &O) {
-  int BankSwizzle = MI->getOperand(OpNo).getImm();
-  switch (BankSwizzle) {
-  case 1:
-    O << "BS:VEC_021/SCL_122";
-    break;
-  case 2:
-    O << "BS:VEC_120/SCL_212";
-    break;
-  case 3:
-    O << "BS:VEC_102/SCL_221";
-    break;
-  case 4:
-    O << "BS:VEC_201";
-    break;
-  case 5:
-    O << "BS:VEC_210";
-    break;
-  default:
-    break;
-  }
-}
-
-void R600InstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
-                                 raw_ostream &O) {
-  AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "_SAT");
-}
-
-void R600InstPrinter::printCT(const MCInst *MI, unsigned OpNo,
-                                raw_ostream &O) {
-  unsigned CT = MI->getOperand(OpNo).getImm();
-  switch (CT) {
-  case 0:
-    O << 'U';
-    break;
-  case 1:
-    O << 'N';
-    break;
-  default:
-    break;
-  }
-}
-
-void R600InstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
-                                  raw_ostream &O) {
-  int KCacheMode = MI->getOperand(OpNo).getImm();
-  if (KCacheMode > 0) {
-    int KCacheBank = MI->getOperand(OpNo - 2).getImm();
-    O << "CB" << KCacheBank << ':';
-    int KCacheAddr = MI->getOperand(OpNo + 2).getImm();
-    int LineSize = (KCacheMode == 1) ? 16 : 32;
-    O << KCacheAddr * 16 << '-' << KCacheAddr * 16 + LineSize;
-  }
-}
-
-void R600InstPrinter::printLast(const MCInst *MI, unsigned OpNo,
-                                raw_ostream &O) {
-  AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "*", " ");
-}
-
-void R600InstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
-                                   raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  assert(Op.isImm() || Op.isExpr());
-  if (Op.isImm()) {
-    int64_t Imm = Op.getImm();
-    O << Imm << '(' << BitsToFloat(Imm) << ')';
-  }
-  if (Op.isExpr()) {
-    Op.getExpr()->print(O << '@', &MAI);
-  }
-}
-
-void R600InstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
-                               raw_ostream &O) {
-  AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '-');
-}
-
-void R600InstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
-                                raw_ostream &O) {
-  switch (MI->getOperand(OpNo).getImm()) {
-  default: break;
-  case 1:
-    O << " * 2.0";
-    break;
-  case 2:
-    O << " * 4.0";
-    break;
-  case 3:
-    O << " / 2.0";
-    break;
-  }
-}
-
-void R600InstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
-                                      raw_ostream &O) {
-  printOperand(MI, OpNo, O);
-  O  << ", ";
-  printOperand(MI, OpNo + 1, O);
-}
-
-void R600InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                   raw_ostream &O) {
-  if (OpNo >= MI->getNumOperands()) {
-    O << "/*Missing OP" << OpNo << "*/";
-    return;
-  }
-
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    switch (Op.getReg()) {
-    // This is the default predicate state, so we don't need to print it.
-    case R600::PRED_SEL_OFF:
-      break;
-
-    default:
-      O << getRegisterName(Op.getReg());
-      break;
-    }
-  } else if (Op.isImm()) {
-      O << Op.getImm();
-  } else if (Op.isFPImm()) {
-    // We special case 0.0 because otherwise it will be printed as an integer.
-    if (Op.getFPImm() == 0.0)
-      O << "0.0";
-    else {
-      O << Op.getFPImm();
-    }
-  } else if (Op.isExpr()) {
-    const MCExpr *Exp = Op.getExpr();
-    Exp->print(O, &MAI);
-  } else {
-    O << "/*INV_OP*/";
-  }
-}
-
-void R600InstPrinter::printRel(const MCInst *MI, unsigned OpNo,
-                               raw_ostream &O) {
-  AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '+');
-}
-
-void R600InstPrinter::printRSel(const MCInst *MI, unsigned OpNo,
-                                  raw_ostream &O) {
-  unsigned Sel = MI->getOperand(OpNo).getImm();
-  switch (Sel) {
-  case 0:
-    O << 'X';
-    break;
-  case 1:
-    O << 'Y';
-    break;
-  case 2:
-    O << 'Z';
-    break;
-  case 3:
-    O << 'W';
-    break;
-  case 4:
-    O << '0';
-    break;
-  case 5:
-    O << '1';
-    break;
-  case 7:
-    O << '_';
-    break;
-  default:
-    break;
-  }
-}
-
-void R600InstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
-                                          raw_ostream &O) {
-  AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "ExecMask,");
-}
-
-void R600InstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo,
-                                      raw_ostream &O) {
-  AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "Pred,");
-}
-
-void R600InstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
-                                 raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.getImm() == 0) {
-    O << " (MASKED)";
-  }
-}
-
-#include "R600GenAsmWriter.inc"
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
deleted file mode 100644
index 0ba74ca0f3e1..000000000000
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ /dev/null
@@ -1,250 +0,0 @@
-//===-- AMDGPUInstPrinter.h - AMDGPU MC Inst -> ASM interface ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AMDGPU_INSTPRINTER_AMDGPUINSTPRINTER_H
-#define LLVM_LIB_TARGET_AMDGPU_INSTPRINTER_AMDGPUINSTPRINTER_H
-
-#include "llvm/MC/MCInstPrinter.h"
-
-namespace llvm {
-
-class AMDGPUInstPrinter : public MCInstPrinter {
-public:
-  AMDGPUInstPrinter(const MCAsmInfo &MAI,
-                    const MCInstrInfo &MII, const MCRegisterInfo &MRI)
-    : MCInstPrinter(MAI, MII, MRI) {}
-
-  //Autogenerated by tblgen
-  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
-                        raw_ostream &O);
-  static const char *getRegisterName(unsigned RegNo);
-
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
-  static void printRegOperand(unsigned RegNo, raw_ostream &O,
-                              const MCRegisterInfo &MRI);
-
-private:
-  void printU4ImmOperand(const MCInst *MI, unsigned OpNo,
-                         const MCSubtargetInfo &STI, raw_ostream &O);
-  void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printU16ImmOperand(const MCInst *MI, unsigned OpNo,
-                          const MCSubtargetInfo &STI, raw_ostream &O);
-  void printU4ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printS13ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printU32ImmOperand(const MCInst *MI, unsigned OpNo,
-                          const MCSubtargetInfo &STI, raw_ostream &O);
-  void printNamedBit(const MCInst *MI, unsigned OpNo, raw_ostream &O,
-                     StringRef BitName);
-  void printOffen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printIdxen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printAddr64(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printMBUFOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                   raw_ostream &O);
-  void printOffsetS13(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                      raw_ostream &O);
-
-  void printOffset0(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                    raw_ostream &O);
-  void printOffset1(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                    raw_ostream &O);
-  void printSMRDOffset8(const MCInst *MI, unsigned OpNo,
-                       const MCSubtargetInfo &STI, raw_ostream &O);
-  void printSMRDOffset20(const MCInst *MI, unsigned OpNo,
-                       const MCSubtargetInfo &STI, raw_ostream &O);
-  void printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
-                              const MCSubtargetInfo &STI, raw_ostream &O);
-  void printGDS(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                raw_ostream &O);
-  void printGLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                raw_ostream &O);
-  void printSLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                raw_ostream &O);
-  void printTFE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                raw_ostream &O);
-  void printDMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                  raw_ostream &O);
-  void printUNorm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                  raw_ostream &O);
-  void printDA(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-               raw_ostream &O);
-  void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                 raw_ostream &O);
-  void printLWE(const MCInst *MI, unsigned OpNo,
-                const MCSubtargetInfo &STI, raw_ostream &O);
-  void printD16(const MCInst *MI, unsigned OpNo,
-                const MCSubtargetInfo &STI, raw_ostream &O);
-  void printExpCompr(const MCInst *MI, unsigned OpNo,
-                     const MCSubtargetInfo &STI, raw_ostream &O);
-  void printExpVM(const MCInst *MI, unsigned OpNo,
-                  const MCSubtargetInfo &STI, raw_ostream &O);
-  void printFORMAT(const MCInst *MI, unsigned OpNo,
-                   const MCSubtargetInfo &STI, raw_ostream &O);
-
-  void printRegOperand(unsigned RegNo, raw_ostream &O);
-  void printVOPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                   raw_ostream &O);
-  void printVINTRPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                      raw_ostream &O);
-  void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
-                        raw_ostream &O);
-  void printImmediateV216(uint32_t Imm, const MCSubtargetInfo &STI,
-                          raw_ostream &O);
-  void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
-                        raw_ostream &O);
-  void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI,
-                        raw_ostream &O);
-  void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                    raw_ostream &O);
-  void printOperandAndFPInputMods(const MCInst *MI, unsigned OpNo,
-                                  const MCSubtargetInfo &STI, raw_ostream &O);
-  void printOperandAndIntInputMods(const MCInst *MI, unsigned OpNo,
-                                   const MCSubtargetInfo &STI, raw_ostream &O);
-  void printDPPCtrl(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                    raw_ostream &O);
-  void printRowMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                    raw_ostream &O);
-  void printBankMask(const MCInst *MI, unsigned OpNo,
-                     const MCSubtargetInfo &STI, raw_ostream &O);
-  void printBoundCtrl(const MCInst *MI, unsigned OpNo,
-                      const MCSubtargetInfo &STI, raw_ostream &O);
-  void printSDWASel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printSDWADstSel(const MCInst *MI, unsigned OpNo,
-                       const MCSubtargetInfo &STI, raw_ostream &O);
-  void printSDWASrc0Sel(const MCInst *MI, unsigned OpNo,
-                        const MCSubtargetInfo &STI, raw_ostream &O);
-  void printSDWASrc1Sel(const MCInst *MI, unsigned OpNo,
-                        const MCSubtargetInfo &STI, raw_ostream &O);
-  void printSDWADstUnused(const MCInst *MI, unsigned OpNo,
-                          const MCSubtargetInfo &STI, raw_ostream &O);
-  void printPackedModifier(const MCInst *MI, StringRef Name, unsigned Mod,
-                           raw_ostream &O);
-  void printOpSel(const MCInst *MI, unsigned OpNo,
-                  const MCSubtargetInfo &STI, raw_ostream &O);
-  void printOpSelHi(const MCInst *MI, unsigned OpNo,
-                  const MCSubtargetInfo &STI, raw_ostream &O);
-  void printNegLo(const MCInst *MI, unsigned OpNo,
-                  const MCSubtargetInfo &STI, raw_ostream &O);
-  void printNegHi(const MCInst *MI, unsigned OpNo,
-                  const MCSubtargetInfo &STI, raw_ostream &O);
-  void printInterpSlot(const MCInst *MI, unsigned OpNo,
-                       const MCSubtargetInfo &STI, raw_ostream &O);
-  void printInterpAttr(const MCInst *MI, unsigned OpNo,
-                       const MCSubtargetInfo &STI, raw_ostream &O);
-  void printInterpAttrChan(const MCInst *MI, unsigned OpNo,
-                           const MCSubtargetInfo &STI, raw_ostream &O);
-
-  void printVGPRIndexMode(const MCInst *MI, unsigned OpNo,
-                          const MCSubtargetInfo &STI, raw_ostream &O);
-  void printMemOperand(const MCInst *MI, unsigned OpNo,
-                       const MCSubtargetInfo &STI, raw_ostream &O);
-
-
-  template <unsigned N>
-  void printExpSrcN(const MCInst *MI, unsigned OpNo,
-                    const MCSubtargetInfo &STI, raw_ostream &O);
-  void printExpSrc0(const MCInst *MI, unsigned OpNo,
-                    const MCSubtargetInfo &STI, raw_ostream &O);
-  void printExpSrc1(const MCInst *MI, unsigned OpNo,
-                    const MCSubtargetInfo &STI, raw_ostream &O);
-  void printExpSrc2(const MCInst *MI, unsigned OpNo,
-                    const MCSubtargetInfo &STI, raw_ostream &O);
-  void printExpSrc3(const MCInst *MI, unsigned OpNo,
-                    const MCSubtargetInfo &STI, raw_ostream &O);
-  void printExpTgt(const MCInst *MI, unsigned OpNo,
-                   const MCSubtargetInfo &STI, raw_ostream &O);
-
-public:
-  static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,
-                         StringRef Asm, StringRef Default = "");
-  static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,
-                         char Asm);
-protected:
-  void printAbs(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                raw_ostream &O);
-  void printHigh(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                 raw_ostream &O);
-  void printClamp(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                  raw_ostream &O);
-  void printClampSI(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                    raw_ostream &O);
-  void printOModSI(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                   raw_ostream &O);
-  void printLiteral(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                    raw_ostream &O);
-  void printLast(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                 raw_ostream &O);
-  void printNeg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                raw_ostream &O);
-  void printOMOD(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                 raw_ostream &O);
-  void printRel(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                raw_ostream &O);
-  void printUpdateExecMask(const MCInst *MI, unsigned OpNo,
-                           const MCSubtargetInfo &STI, raw_ostream &O);
-  void printUpdatePred(const MCInst *MI, unsigned OpNo,
-                       const MCSubtargetInfo &STI, raw_ostream &O);
-  void printWrite(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                  raw_ostream &O);
-  void printBankSwizzle(const MCInst *MI, unsigned OpNo,
-                        const MCSubtargetInfo &STI, raw_ostream &O);
-  void printRSel(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                 raw_ostream &O);
-  void printCT(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-               raw_ostream &O);
-  void printKCache(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                   raw_ostream &O);
-  void printSendMsg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                    raw_ostream &O);
-  void printSwizzle(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                    raw_ostream &O);
-  void printWaitFlag(const MCInst *MI, unsigned OpNo,
-                     const MCSubtargetInfo &STI, raw_ostream &O);
-  void printHwreg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                  raw_ostream &O);
-};
-
-class R600InstPrinter : public MCInstPrinter {
-public:
-  R600InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                  const MCRegisterInfo &MRI)
-    : MCInstPrinter(MAI, MII, MRI) {}
-
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
-  void printInstruction(const MCInst *MI, raw_ostream &O);
-  static const char *getRegisterName(unsigned RegNo);
-
-  void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printOMOD(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-};
-
-} // End namespace llvm
-
-#endif
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index abc88c02adca..57c0ba26cc3a 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 /// \file
 //===----------------------------------------------------------------------===//
@@ -19,8 +18,10 @@
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "Utils/AMDGPUBaseInfo.h"
 
 using namespace llvm;
+using namespace llvm::AMDGPU;
 
 namespace {
 
@@ -36,17 +37,13 @@ public:
                   const MCSubtargetInfo *STI) const override;
   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
                             const MCRelaxableFragment *DF,
-                            const MCAsmLayout &Layout) const override {
-    return false;
-  }
+                            const MCAsmLayout &Layout) const override;
+
   void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
-                        MCInst &Res) const override {
-    llvm_unreachable("Not implemented");
-  }
+                        MCInst &Res) const override;
+
   bool mayNeedRelaxation(const MCInst &Inst,
-                         const MCSubtargetInfo &STI) const override {
-    return false;
-  }
+                         const MCSubtargetInfo &STI) const override;
 
   unsigned getMinimumNopSize() const override;
   bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
@@ -56,6 +53,36 @@ public:
 
 } //End anonymous namespace
 
+void AMDGPUAsmBackend::relaxInstruction(const MCInst &Inst,
+                                        const MCSubtargetInfo &STI,
+                                        MCInst &Res) const {
+  unsigned RelaxedOpcode = AMDGPU::getSOPPWithRelaxation(Inst.getOpcode());
+  Res.setOpcode(RelaxedOpcode);
+  Res.addOperand(Inst.getOperand(0));
+  return;
+}
+
+bool AMDGPUAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+                                            uint64_t Value,
+                                            const MCRelaxableFragment *DF,
+                                            const MCAsmLayout &Layout) const {
+  // if the branch target has an offset of x3f this needs to be relaxed to
+  // add a s_nop 0 immediately after branch to effectively increment offset
+  // for hardware workaround in gfx1010
+  return (((int64_t(Value)/4)-1) == 0x3f);
+}
+
+bool AMDGPUAsmBackend::mayNeedRelaxation(const MCInst &Inst,
+                       const MCSubtargetInfo &STI) const {
+  if (!STI.getFeatureBits()[AMDGPU::FeatureOffset3fBug])
+    return false;
+
+  if (AMDGPU::getSOPPWithRelaxation(Inst.getOpcode()) >= 0)
+    return true;
+
+  return false;
+}
+
 static unsigned getFixupKindNumBytes(unsigned Kind) {
   switch (Kind) {
   case AMDGPU::fixup_si_sopp_br:
@@ -173,11 +200,13 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
   bool Is64Bit;
   bool HasRelocationAddend;
   uint8_t OSABI = ELF::ELFOSABI_NONE;
+  uint8_t ABIVersion = 0;
 
 public:
-  ELFAMDGPUAsmBackend(const Target &T, const Triple &TT) :
+  ELFAMDGPUAsmBackend(const Target &T, const Triple &TT, uint8_t ABIVersion) :
       AMDGPUAsmBackend(T), Is64Bit(TT.getArch() == Triple::amdgcn),
-      HasRelocationAddend(TT.getOS() == Triple::AMDHSA) {
+      HasRelocationAddend(TT.getOS() == Triple::AMDHSA),
+      ABIVersion(ABIVersion) {
     switch (TT.getOS()) {
     case Triple::AMDHSA:
       OSABI = ELF::ELFOSABI_AMDGPU_HSA;
@@ -195,7 +224,8 @@ public:
 
   std::unique_ptr<MCObjectTargetWriter>
   createObjectTargetWriter() const override {
-    return createAMDGPUELFObjectWriter(Is64Bit, OSABI, HasRelocationAddend);
+    return createAMDGPUELFObjectWriter(Is64Bit, OSABI, HasRelocationAddend,
+                                       ABIVersion);
   }
 };
 
@@ -206,5 +236,6 @@ MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T,
                                            const MCRegisterInfo &MRI,
                                            const MCTargetOptions &Options) {
   // Use 64-bit ELF for amdgcn
-  return new ELFAMDGPUAsmBackend(T, STI.getTargetTriple());
+  return new ELFAMDGPUAsmBackend(T, STI.getTargetTriple(),
+                                 IsaInfo::hasCodeObjectV3(&STI) ? 1 : 0);
 }
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index c85a1ea5b054..6549a8d7d592 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPUELFObjectWriter.cpp - AMDGPU ELF Writer ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -23,7 +22,8 @@ namespace {
 
 class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter {
 public:
-  AMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI, bool HasRelocationAddend);
+  AMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI, bool HasRelocationAddend,
+                        uint8_t ABIVersion);
 
 protected:
   unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
@@ -35,9 +35,10 @@ protected:
 
 AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit,
                                              uint8_t OSABI,
-                                             bool HasRelocationAddend)
+                                             bool HasRelocationAddend,
+                                             uint8_t ABIVersion)
   : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_AMDGPU,
-                            HasRelocationAddend) {}
+                            HasRelocationAddend, ABIVersion) {}
 
 unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
                                              const MCValue &Target,
@@ -84,7 +85,9 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
 
 std::unique_ptr<MCObjectTargetWriter>
 llvm::createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI,
-                                  bool HasRelocationAddend) {
+                                  bool HasRelocationAddend,
+                                  uint8_t ABIVersion) {
   return llvm::make_unique<AMDGPUELFObjectWriter>(Is64Bit, OSABI,
-                                                  HasRelocationAddend);
+                                                  HasRelocationAddend,
+                                                  ABIVersion);
 }
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp
index c627a08e7463..40437d8fa1a4 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp
@@ -1,9 +1,8 @@
 //===-------- AMDGPUELFStreamer.cpp - ELF Object Output -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
index 41e9063a759e..9fbf53c944ef 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
@@ -1,9 +1,8 @@
 //===-------- AMDGPUELFStreamer.h - ELF Object Output -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
index 20c1adfbc6b9..d49bb196ab3a 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
@@ -1,9 +1,8 @@
 //===-- AMDGPUFixupKinds.h - AMDGPU Specific Fixup Entries ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
new file mode 100644
index 000000000000..01b53432cbb7
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -0,0 +1,1568 @@
+//===-- AMDGPUInstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// \file
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
+#include "Utils/AMDGPUAsmUtils.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+using namespace llvm;
+using namespace llvm::AMDGPU;
+
+void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+                                  StringRef Annot, const MCSubtargetInfo &STI) {
+  OS.flush();
+  printInstruction(MI, STI, OS);
+  printAnnotation(OS, Annot);
+}
+
+void AMDGPUInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
+  O << formatHex(MI->getOperand(OpNo).getImm() & 0xf);
+}
+
+void AMDGPUInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo,
+                                          raw_ostream &O) {
+  O << formatHex(MI->getOperand(OpNo).getImm() & 0xff);
+}
+
+void AMDGPUInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
+                                           const MCSubtargetInfo &STI,
+                                           raw_ostream &O) {
+  // It's possible to end up with a 32-bit literal used with a 16-bit operand
+  // with ignored high bits. Print as 32-bit anyway in that case.
+  int64_t Imm = MI->getOperand(OpNo).getImm();
+  if (isInt<16>(Imm) || isUInt<16>(Imm))
+    O << formatHex(static_cast<uint64_t>(Imm & 0xffff));
+  else
+    printU32ImmOperand(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printU4ImmDecOperand(const MCInst *MI, unsigned OpNo,
+                                             raw_ostream &O) {
+  O << formatDec(MI->getOperand(OpNo).getImm() & 0xf);
+}
+
+void AMDGPUInstPrinter::printU8ImmDecOperand(const MCInst *MI, unsigned OpNo,
+                                             raw_ostream &O) {
+  O << formatDec(MI->getOperand(OpNo).getImm() & 0xff);
+}
+
+void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo,
+                                              raw_ostream &O) {
+  O << formatDec(MI->getOperand(OpNo).getImm() & 0xffff);
+}
+
+void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
+                                           const MCSubtargetInfo &STI,
+                                           raw_ostream &O) {
+  O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
+}
+
+void AMDGPUInstPrinter::printNamedBit(const MCInst *MI, unsigned OpNo,
+                                      raw_ostream &O, StringRef BitName) {
+  if (MI->getOperand(OpNo).getImm()) {
+    O << ' ' << BitName;
+  }
+}
+
+void AMDGPUInstPrinter::printOffen(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  printNamedBit(MI, OpNo, O, "offen");
+}
+
+void AMDGPUInstPrinter::printIdxen(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  printNamedBit(MI, OpNo, O, "idxen");
+}
+
+void AMDGPUInstPrinter::printAddr64(const MCInst *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  printNamedBit(MI, OpNo, O, "addr64");
+}
+
+void AMDGPUInstPrinter::printMBUFOffset(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm()) {
+    O << " offset:";
+    printU16ImmDecOperand(MI, OpNo, O);
+  }
+}
+
+void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo,
+                                    const MCSubtargetInfo &STI,
+                                    raw_ostream &O) {
+  uint16_t Imm = MI->getOperand(OpNo).getImm();
+  if (Imm != 0) {
+    O << ((OpNo == 0)? "offset:" : " offset:");
+    printU16ImmDecOperand(MI, OpNo, O);
+  }
+}
+
+void AMDGPUInstPrinter::printFlatOffset(const MCInst *MI, unsigned OpNo,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  uint16_t Imm = MI->getOperand(OpNo).getImm();
+  if (Imm != 0) {
+    O << ((OpNo == 0)? "offset:" : " offset:");
+
+    const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+    bool IsFlatSeg = !(Desc.TSFlags & SIInstrFlags::IsNonFlatSeg);
+
+    if (IsFlatSeg) { // Unsigned offset
+      printU16ImmDecOperand(MI, OpNo, O);
+    } else {         // Signed offset
+      if (AMDGPU::isGFX10(STI)) {
+        O << formatDec(SignExtend32<12>(MI->getOperand(OpNo).getImm()));
+      } else {
+        O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm()));
+      }
+    }
+  }
+}
+
+void AMDGPUInstPrinter::printOffset0(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm()) {
+    O << " offset0:";
+    printU8ImmDecOperand(MI, OpNo, O);
+  }
+}
+
+void AMDGPUInstPrinter::printOffset1(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm()) {
+    O << " offset1:";
+    printU8ImmDecOperand(MI, OpNo, O);
+  }
+}
+
+void AMDGPUInstPrinter::printSMRDOffset8(const MCInst *MI, unsigned OpNo,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  printU32ImmOperand(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printSMRDOffset20(const MCInst *MI, unsigned OpNo,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  printU32ImmOperand(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
+                                               const MCSubtargetInfo &STI,
+                                               raw_ostream &O) {
+  printU32ImmOperand(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
+                                 const MCSubtargetInfo &STI, raw_ostream &O) {
+  printNamedBit(MI, OpNo, O, "gds");
+}
+
+void AMDGPUInstPrinter::printDLC(const MCInst *MI, unsigned OpNo,
+                                 const MCSubtargetInfo &STI, raw_ostream &O) {
+  if (AMDGPU::isGFX10(STI))
+    printNamedBit(MI, OpNo, O, "dlc");
+}
+
+void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
+                                 const MCSubtargetInfo &STI, raw_ostream &O) {
+  printNamedBit(MI, OpNo, O, "glc");
+}
+
+void AMDGPUInstPrinter::printSLC(const MCInst *MI, unsigned OpNo,
+                                 const MCSubtargetInfo &STI, raw_ostream &O) {
+  printNamedBit(MI, OpNo, O, "slc");
+}
+
+void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
+                                 const MCSubtargetInfo &STI, raw_ostream &O) {
+  printNamedBit(MI, OpNo, O, "tfe");
+}
+
+void AMDGPUInstPrinter::printDMask(const MCInst *MI, unsigned OpNo,
+                                   const MCSubtargetInfo &STI, raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm()) {
+    O << " dmask:";
+    printU16ImmOperand(MI, OpNo, STI, O);
+  }
+}
+
+void AMDGPUInstPrinter::printDim(const MCInst *MI, unsigned OpNo,
+                                 const MCSubtargetInfo &STI, raw_ostream &O) {
+  unsigned Dim = MI->getOperand(OpNo).getImm();
+  O << " dim:SQ_RSRC_IMG_";
+
+  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
+  if (DimInfo)
+    O << DimInfo->AsmSuffix;
+  else
+    O << Dim;
+}
+
+void AMDGPUInstPrinter::printUNorm(const MCInst *MI, unsigned OpNo,
+                                   const MCSubtargetInfo &STI, raw_ostream &O) {
+  printNamedBit(MI, OpNo, O, "unorm");
+}
+
+void AMDGPUInstPrinter::printDA(const MCInst *MI, unsigned OpNo,
+                                const MCSubtargetInfo &STI, raw_ostream &O) {
+  printNamedBit(MI, OpNo, O, "da");
+}
+
+void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
+  if (STI.hasFeature(AMDGPU::FeatureR128A16))
+    printNamedBit(MI, OpNo, O, "a16");
+  else
+    printNamedBit(MI, OpNo, O, "r128");
+}
+
+void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
+                                 const MCSubtargetInfo &STI, raw_ostream &O) {
+  printNamedBit(MI, OpNo, O, "lwe");
+}
+
+void AMDGPUInstPrinter::printD16(const MCInst *MI, unsigned OpNo,
+                                 const MCSubtargetInfo &STI, raw_ostream &O) {
+  printNamedBit(MI, OpNo, O, "d16");
+}
+
+void AMDGPUInstPrinter::printExpCompr(const MCInst *MI, unsigned OpNo,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm())
+    O << " compr";
+}
+
+void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo,
+                                   const MCSubtargetInfo &STI,
+                                   raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm())
+    O << " vm";
+}
+
+void AMDGPUInstPrinter::printFORMAT(const MCInst *MI, unsigned OpNo,
+                                    const MCSubtargetInfo &STI,
+                                    raw_ostream &O) {
+  if (unsigned Val = MI->getOperand(OpNo).getImm()) {
+    if (AMDGPU::isGFX10(STI))
+      O << " format:" << Val;
+    else {
+      O << " dfmt:" << (Val & 15);
+      O << ", nfmt:" << (Val >> 4);
+    }
+  }
+}
+
+void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
+                                        const MCRegisterInfo &MRI) {
+#if !defined(NDEBUG)
+  switch (RegNo) {
+  case AMDGPU::FP_REG:
+  case AMDGPU::SP_REG:
+  case AMDGPU::SCRATCH_WAVE_OFFSET_REG:
+  case AMDGPU::PRIVATE_RSRC_REG:
+    llvm_unreachable("pseudo-register should not ever be emitted");
+  case AMDGPU::SCC:
+    llvm_unreachable("pseudo scc should not ever be emitted");
+  default:
+    break;
+  }
+#endif
+
+  unsigned AltName = AMDGPU::Reg32;
+
+  if (MRI.getRegClass(AMDGPU::VReg_64RegClassID).contains(RegNo) ||
+      MRI.getRegClass(AMDGPU::SGPR_64RegClassID).contains(RegNo) ||
+      MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(RegNo))
+    AltName = AMDGPU::Reg64;
+  else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(RegNo) ||
+           MRI.getRegClass(AMDGPU::SGPR_128RegClassID).contains(RegNo) ||
+           MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(RegNo))
+    AltName = AMDGPU::Reg128;
+  else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(RegNo) ||
+           MRI.getRegClass(AMDGPU::SReg_96RegClassID).contains(RegNo))
+    AltName = AMDGPU::Reg96;
+  else if (MRI.getRegClass(AMDGPU::VReg_160RegClassID).contains(RegNo) ||
+           MRI.getRegClass(AMDGPU::SReg_160RegClassID).contains(RegNo))
+    AltName = AMDGPU::Reg160;
+  else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo) ||
+           MRI.getRegClass(AMDGPU::SGPR_256RegClassID).contains(RegNo))
+    AltName = AMDGPU::Reg256;
+  else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(RegNo) ||
+           MRI.getRegClass(AMDGPU::SGPR_512RegClassID).contains(RegNo) ||
+           MRI.getRegClass(AMDGPU::AReg_512RegClassID).contains(RegNo))
+    AltName = AMDGPU::Reg512;
+  else if (MRI.getRegClass(AMDGPU::VReg_1024RegClassID).contains(RegNo) ||
+           MRI.getRegClass(AMDGPU::SReg_1024RegClassID).contains(RegNo) ||
+           MRI.getRegClass(AMDGPU::AReg_1024RegClassID).contains(RegNo))
+    AltName = AMDGPU::Reg1024;
+
+  O << getRegisterName(RegNo, AltName);
+}
+
+void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
+                                    const MCSubtargetInfo &STI, raw_ostream &O) {
+  if (OpNo == 0) {
+    if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3)
+      O << "_e64 ";
+    else if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::DPP)
+      O << "_dpp ";
+    else if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SDWA)
+      O << "_sdwa ";
+    else
+      O << "_e32 ";
+  }
+
+  printOperand(MI, OpNo, STI, O);
+
+  // Print default vcc/vcc_lo operand.
+  switch (MI->getOpcode()) {
+  default: break;
+
+  case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10:
+  case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10:
+  case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
+  case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10:
+  case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10:
+  case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10:
+  case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10:
+  case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10:
+  case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10:
+  case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
+  case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
+  case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
+    printDefaultVccOperand(1, STI, O);
+    break;
+  }
+}
+
+void AMDGPUInstPrinter::printVINTRPDst(const MCInst *MI, unsigned OpNo,
+                                       const MCSubtargetInfo &STI, raw_ostream &O) {
+  if (AMDGPU::isSI(STI) || AMDGPU::isCI(STI))
+    O << " ";
+  else
+    O << "_e32 ";
+
+  printOperand(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
+  int16_t SImm = static_cast<int16_t>(Imm);
+  if (SImm >= -16 && SImm <= 64) {
+    O << SImm;
+    return;
+  }
+
+  if (Imm == 0x3C00)
+    O<< "1.0";
+  else if (Imm == 0xBC00)
+    O<< "-1.0";
+  else if (Imm == 0x3800)
+    O<< "0.5";
+  else if (Imm == 0xB800)
+    O<< "-0.5";
+  else if (Imm == 0x4000)
+    O<< "2.0";
+  else if (Imm == 0xC000)
+    O<< "-2.0";
+  else if (Imm == 0x4400)
+    O<< "4.0";
+  else if (Imm == 0xC400)
+    O<< "-4.0";
+  else if (Imm == 0x3118) {
+    assert(STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]);
+    O << "0.15915494";
+  } else
+    O << formatHex(static_cast<uint64_t>(Imm));
+}
+
+void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm,
+                                           const MCSubtargetInfo &STI,
+                                           raw_ostream &O) {
+  uint16_t Lo16 = static_cast<uint16_t>(Imm);
+  printImmediate16(Lo16, STI, O);
+}
+
+void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
+  int32_t SImm = static_cast<int32_t>(Imm);
+  if (SImm >= -16 && SImm <= 64) {
+    O << SImm;
+    return;
+  }
+
+  if (Imm == FloatToBits(0.0f))
+    O << "0.0";
+  else if (Imm == FloatToBits(1.0f))
+    O << "1.0";
+  else if (Imm == FloatToBits(-1.0f))
+    O << "-1.0";
+  else if (Imm == FloatToBits(0.5f))
+    O << "0.5";
+  else if (Imm == FloatToBits(-0.5f))
+    O << "-0.5";
+  else if (Imm == FloatToBits(2.0f))
+    O << "2.0";
+  else if (Imm == FloatToBits(-2.0f))
+    O << "-2.0";
+  else if (Imm == FloatToBits(4.0f))
+    O << "4.0";
+  else if (Imm == FloatToBits(-4.0f))
+    O << "-4.0";
+  else if (Imm == 0x3e22f983 &&
+           STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
+    O << "0.15915494";
+  else
+    O << formatHex(static_cast<uint64_t>(Imm));
+}
+
+void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
+  int64_t SImm = static_cast<int64_t>(Imm);
+  if (SImm >= -16 && SImm <= 64) {
+    O << SImm;
+    return;
+  }
+
+  if (Imm == DoubleToBits(0.0))
+    O << "0.0";
+  else if (Imm == DoubleToBits(1.0))
+    O << "1.0";
+  else if (Imm == DoubleToBits(-1.0))
+    O << "-1.0";
+  else if (Imm == DoubleToBits(0.5))
+    O << "0.5";
+  else if (Imm == DoubleToBits(-0.5))
+    O << "-0.5";
+  else if (Imm == DoubleToBits(2.0))
+    O << "2.0";
+  else if (Imm == DoubleToBits(-2.0))
+    O << "-2.0";
+  else if (Imm == DoubleToBits(4.0))
+    O << "4.0";
+  else if (Imm == DoubleToBits(-4.0))
+    O << "-4.0";
+  else if (Imm == 0x3fc45f306dc9c882 &&
+           STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
+    O << "0.15915494309189532";
+  else {
+    assert(isUInt<32>(Imm) || Imm == 0x3fc45f306dc9c882);
+
+    // In rare situations, we will have a 32-bit literal in a 64-bit
+    // operand. This is technically allowed for the encoding of s_mov_b64.
+    O << formatHex(static_cast<uint64_t>(Imm));
+  }
+}
+
+void AMDGPUInstPrinter::printBLGP(const MCInst *MI, unsigned OpNo,
+                                  const MCSubtargetInfo &STI,
+                                  raw_ostream &O) {
+  unsigned Imm = MI->getOperand(OpNo).getImm();
+  if (!Imm)
+    return;
+
+  O << " blgp:" << Imm;
+}
+
+void AMDGPUInstPrinter::printCBSZ(const MCInst *MI, unsigned OpNo,
+                                  const MCSubtargetInfo &STI,
+                                  raw_ostream &O) {
+  unsigned Imm = MI->getOperand(OpNo).getImm();
+  if (!Imm)
+    return;
+
+  O << " cbsz:" << Imm;
+}
+
+void AMDGPUInstPrinter::printABID(const MCInst *MI, unsigned OpNo,
+                                  const MCSubtargetInfo &STI,
+                                  raw_ostream &O) {
+  unsigned Imm = MI->getOperand(OpNo).getImm();
+  if (!Imm)
+    return;
+
+  O << " abid:" << Imm;
+}
+
+void AMDGPUInstPrinter::printDefaultVccOperand(unsigned OpNo,
+                                               const MCSubtargetInfo &STI,
+                                               raw_ostream &O) {
+  if (OpNo > 0)
+    O << ", ";
+  printRegOperand(STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
+                  AMDGPU::VCC : AMDGPU::VCC_LO, O, MRI);
+  if (OpNo == 0)
+    O << ", ";
+}
+
+void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  // Print default vcc/vcc_lo operand of VOPC.
+  const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+  if (OpNo == 0 && (Desc.TSFlags & SIInstrFlags::VOPC) &&
+      (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
+       Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO)))
+    printDefaultVccOperand(OpNo, STI, O);
+
+  if (OpNo >= MI->getNumOperands()) {
+    O << "/*Missing OP" << OpNo << "*/";
+    return;
+  }
+
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    printRegOperand(Op.getReg(), O, MRI);
+  } else if (Op.isImm()) {
+    switch (Desc.OpInfo[OpNo].OperandType) {
+    case AMDGPU::OPERAND_REG_IMM_INT32:
+    case AMDGPU::OPERAND_REG_IMM_FP32:
+    case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+    case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+    case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+    case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
+    case MCOI::OPERAND_IMMEDIATE:
+      printImmediate32(Op.getImm(), STI, O);
+      break;
+    case AMDGPU::OPERAND_REG_IMM_INT64:
+    case AMDGPU::OPERAND_REG_IMM_FP64:
+    case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+    case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+      printImmediate64(Op.getImm(), STI, O);
+      break;
+    case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+    case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+    case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+    case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
+    case AMDGPU::OPERAND_REG_IMM_INT16:
+    case AMDGPU::OPERAND_REG_IMM_FP16:
+      printImmediate16(Op.getImm(), STI, O);
+      break;
+    case AMDGPU::OPERAND_REG_IMM_V2INT16:
+    case AMDGPU::OPERAND_REG_IMM_V2FP16:
+      if (!isUInt<16>(Op.getImm()) &&
+          STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
+        printImmediate32(Op.getImm(), STI, O);
+        break;
+      }
+      LLVM_FALLTHROUGH;
+    case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+    case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+    case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
+    case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+      printImmediateV216(Op.getImm(), STI, O);
+      break;
+    case MCOI::OPERAND_UNKNOWN:
+    case MCOI::OPERAND_PCREL:
+      O << formatDec(Op.getImm());
+      break;
+    case MCOI::OPERAND_REGISTER:
+      // FIXME: This should be removed and handled somewhere else. Seems to come
+      // from a disassembler bug.
+      O << "/*invalid immediate*/";
+      break;
+    default:
+      // We hit this for the immediate instruction bits that don't yet have a
+      // custom printer.
+      llvm_unreachable("unexpected immediate operand type");
+    }
+  } else if (Op.isFPImm()) {
+    // We special case 0.0 because otherwise it will be printed as an integer.
+    if (Op.getFPImm() == 0.0)
+      O << "0.0";
+    else {
+      const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+      int RCID = Desc.OpInfo[OpNo].RegClass;
+      unsigned RCBits = AMDGPU::getRegBitWidth(MRI.getRegClass(RCID));
+      if (RCBits == 32)
+        printImmediate32(FloatToBits(Op.getFPImm()), STI, O);
+      else if (RCBits == 64)
+        printImmediate64(DoubleToBits(Op.getFPImm()), STI, O);
+      else
+        llvm_unreachable("Invalid register class size");
+    }
+  } else if (Op.isExpr()) {
+    const MCExpr *Exp = Op.getExpr();
+    Exp->print(O, &MAI);
+  } else {
+    O << "/*INV_OP*/";
+  }
+
+  // Print default vcc/vcc_lo operand of v_cndmask_b32_e32.
+  switch (MI->getOpcode()) {
+  default: break;
+
+  case AMDGPU::V_CNDMASK_B32_e32_gfx10:
+  case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10:
+  case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10:
+  case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
+  case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10:
+  case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10:
+  case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10:
+  case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
+  case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
+  case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
+
+  case AMDGPU::V_CNDMASK_B32_e32_gfx6_gfx7:
+  case AMDGPU::V_CNDMASK_B32_e32_vi:
+    if ((int)OpNo == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+                                                AMDGPU::OpName::src1))
+      printDefaultVccOperand(OpNo, STI, O);
+    break;
+  }
+}
+
+void AMDGPUInstPrinter::printOperandAndFPInputMods(const MCInst *MI,
+                                                   unsigned OpNo,
+                                                   const MCSubtargetInfo &STI,
+                                                   raw_ostream &O) {
+  unsigned InputModifiers = MI->getOperand(OpNo).getImm();
+
+  // Use 'neg(...)' instead of '-' to avoid ambiguity.
+  // This is important for integer literals because
+  // -1 is not the same value as neg(1).
+  bool NegMnemo = false;
+
+  if (InputModifiers & SISrcMods::NEG) {
+    if (OpNo + 1 < MI->getNumOperands() &&
+        (InputModifiers & SISrcMods::ABS) == 0) {
+      const MCOperand &Op = MI->getOperand(OpNo + 1);
+      NegMnemo = Op.isImm() || Op.isFPImm();
+    }
+    if (NegMnemo) {
+      O << "neg(";
+    } else {
+      O << '-';
+    }
+  }
+
+  if (InputModifiers & SISrcMods::ABS)
+    O << '|';
+  printOperand(MI, OpNo + 1, STI, O);
+  if (InputModifiers & SISrcMods::ABS)
+    O << '|';
+
+  if (NegMnemo) {
+    O << ')';
+  }
+}
+
+void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI,
+                                                    unsigned OpNo,
+                                                    const MCSubtargetInfo &STI,
+                                                    raw_ostream &O) {
+  unsigned InputModifiers = MI->getOperand(OpNo).getImm();
+  if (InputModifiers & SISrcMods::SEXT)
+    O << "sext(";
+  printOperand(MI, OpNo + 1, STI, O);
+  if (InputModifiers & SISrcMods::SEXT)
+    O << ')';
+
+  // Print default vcc/vcc_lo operand of VOP2b.
+  switch (MI->getOpcode()) {
+  default: break;
+
+  case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10:
+  case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10:
+  case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10:
+    if ((int)OpNo + 1 == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+                                                    AMDGPU::OpName::src1))
+      printDefaultVccOperand(OpNo, STI, O);
+    break;
+  }
+}
+
+void AMDGPUInstPrinter::printDPP8(const MCInst *MI, unsigned OpNo,
+                                  const MCSubtargetInfo &STI,
+                                  raw_ostream &O) {
+  if (!AMDGPU::isGFX10(STI))
+    llvm_unreachable("dpp8 is not supported on ASICs earlier than GFX10");
+
+  unsigned Imm = MI->getOperand(OpNo).getImm();
+  O << " dpp8:[" << formatDec(Imm & 0x7);
+  for (size_t i = 1; i < 8; ++i) {
+    O << ',' << formatDec((Imm >> (3 * i)) & 0x7);
+  }
+  O << ']';
+}
+
+void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  using namespace AMDGPU::DPP;
+
+  unsigned Imm = MI->getOperand(OpNo).getImm();
+  if (Imm <= DppCtrl::QUAD_PERM_LAST) {
+    O << " quad_perm:[";
+    O << formatDec(Imm & 0x3)         << ',';
+    O << formatDec((Imm & 0xc)  >> 2) << ',';
+    O << formatDec((Imm & 0x30) >> 4) << ',';
+    O << formatDec((Imm & 0xc0) >> 6) << ']';
+  } else if ((Imm >= DppCtrl::ROW_SHL_FIRST) &&
+             (Imm <= DppCtrl::ROW_SHL_LAST)) {
+    O << " row_shl:";
+    printU4ImmDecOperand(MI, OpNo, O);
+  } else if ((Imm >= DppCtrl::ROW_SHR_FIRST) &&
+             (Imm <= DppCtrl::ROW_SHR_LAST)) {
+    O << " row_shr:";
+    printU4ImmDecOperand(MI, OpNo, O);
+  } else if ((Imm >= DppCtrl::ROW_ROR_FIRST) &&
+             (Imm <= DppCtrl::ROW_ROR_LAST)) {
+    O << " row_ror:";
+    printU4ImmDecOperand(MI, OpNo, O);
+  } else if (Imm == DppCtrl::WAVE_SHL1) {
+    if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
+      O << " /* wave_shl is not supported starting from GFX10 */";
+      return;
+    }
+    O << " wave_shl:1";
+  } else if (Imm == DppCtrl::WAVE_ROL1) {
+    if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
+      O << " /* wave_rol is not supported starting from GFX10 */";
+      return;
+    }
+    O << " wave_rol:1";
+  } else if (Imm == DppCtrl::WAVE_SHR1) {
+    if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
+      O << " /* wave_shr is not supported starting from GFX10 */";
+      return;
+    }
+    O << " wave_shr:1";
+  } else if (Imm == DppCtrl::WAVE_ROR1) {
+    if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
+      O << " /* wave_ror is not supported starting from GFX10 */";
+      return;
+    }
+    O << " wave_ror:1";
+  } else if (Imm == DppCtrl::ROW_MIRROR) {
+    O << " row_mirror";
+  } else if (Imm == DppCtrl::ROW_HALF_MIRROR) {
+    O << " row_half_mirror";
+  } else if (Imm == DppCtrl::BCAST15) {
+    if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
+      O << " /* row_bcast is not supported starting from GFX10 */";
+      return;
+    }
+    O << " row_bcast:15";
+  } else if (Imm == DppCtrl::BCAST31) {
+    if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
+      O << " /* row_bcast is not supported starting from GFX10 */";
+      return;
+    }
+    O << " row_bcast:31";
+  } else if ((Imm >= DppCtrl::ROW_SHARE_FIRST) &&
+             (Imm <= DppCtrl::ROW_SHARE_LAST)) {
+    if (!AMDGPU::isGFX10(STI)) {
+      O << " /* row_share is not supported on ASICs earlier than GFX10 */";
+      return;
+    }
+    O << " row_share:";
+    printU4ImmDecOperand(MI, OpNo, O);
+  } else if ((Imm >= DppCtrl::ROW_XMASK_FIRST) &&
+             (Imm <= DppCtrl::ROW_XMASK_LAST)) {
+    if (!AMDGPU::isGFX10(STI)) {
+      O << " /* row_xmask is not supported on ASICs earlier than GFX10 */";
+      return;
+    }
+    O << "row_xmask:";
+    printU4ImmDecOperand(MI, OpNo, O);
+  } else {
+    O << " /* Invalid dpp_ctrl value */";
+  }
+}
+
+void AMDGPUInstPrinter::printRowMask(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  O << " row_mask:";
+  printU4ImmOperand(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printBankMask(const MCInst *MI, unsigned OpNo,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
+  O << " bank_mask:";
+  printU4ImmOperand(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printBoundCtrl(const MCInst *MI, unsigned OpNo,
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O) {
+  unsigned Imm = MI->getOperand(OpNo).getImm();
+  if (Imm) {
+    O << " bound_ctrl:0"; // XXX - this syntax is used in sp3
+  }
+}
+
+void AMDGPUInstPrinter::printFI(const MCInst *MI, unsigned OpNo,
+                                const MCSubtargetInfo &STI,
+                                raw_ostream &O) {
+  using namespace llvm::AMDGPU::DPP;
+  unsigned Imm = MI->getOperand(OpNo).getImm();
+  if (Imm == DPP_FI_1 || Imm == DPP8_FI_1) {
+    O << " fi:1";
+  }
+}
+
+void AMDGPUInstPrinter::printSDWASel(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O) {
+  using namespace llvm::AMDGPU::SDWA;
+
+  unsigned Imm = MI->getOperand(OpNo).getImm();
+  switch (Imm) {
+  case SdwaSel::BYTE_0: O << "BYTE_0"; break;
+  case SdwaSel::BYTE_1: O << "BYTE_1"; break;
+  case SdwaSel::BYTE_2: O << "BYTE_2"; break;
+  case SdwaSel::BYTE_3: O << "BYTE_3"; break;
+  case SdwaSel::WORD_0: O << "WORD_0"; break;
+  case SdwaSel::WORD_1: O << "WORD_1"; break;
+  case SdwaSel::DWORD: O << "DWORD"; break;
+  default: llvm_unreachable("Invalid SDWA data select operand");
+  }
+}
+
+void AMDGPUInstPrinter::printSDWADstSel(const MCInst *MI, unsigned OpNo,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  O << "dst_sel:";
+  printSDWASel(MI, OpNo, O);
+}
+
+void AMDGPUInstPrinter::printSDWASrc0Sel(const MCInst *MI, unsigned OpNo,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
+  O << "src0_sel:";
+  printSDWASel(MI, OpNo, O);
+}
+
+void AMDGPUInstPrinter::printSDWASrc1Sel(const MCInst *MI, unsigned OpNo,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
+  O << "src1_sel:";
+  printSDWASel(MI, OpNo, O);
+}
+
+void AMDGPUInstPrinter::printSDWADstUnused(const MCInst *MI, unsigned OpNo,
+                                           const MCSubtargetInfo &STI,
+                                           raw_ostream &O) {
+  using namespace llvm::AMDGPU::SDWA;
+
+  O << "dst_unused:";
+  unsigned Imm = MI->getOperand(OpNo).getImm();
+  switch (Imm) {
+  case DstUnused::UNUSED_PAD: O << "UNUSED_PAD"; break;
+  case DstUnused::UNUSED_SEXT: O << "UNUSED_SEXT"; break;
+  case DstUnused::UNUSED_PRESERVE: O << "UNUSED_PRESERVE"; break;
+  default: llvm_unreachable("Invalid SDWA dest_unused operand");
+  }
+}
+
+template <unsigned N>
+void AMDGPUInstPrinter::printExpSrcN(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  unsigned Opc = MI->getOpcode();
+  int EnIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::en);
+  unsigned En = MI->getOperand(EnIdx).getImm();
+
+  int ComprIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::compr);
+
+  // If compr is set, print as src0, src0, src1, src1
+  if (MI->getOperand(ComprIdx).getImm()) {
+    if (N == 1 || N == 2)
+      --OpNo;
+    else if (N == 3)
+      OpNo -= 2;
+  }
+
+  if (En & (1 << N))
+    printRegOperand(MI->getOperand(OpNo).getReg(), O, MRI);
+  else
+    O << "off";
+}
+
+void AMDGPUInstPrinter::printExpSrc0(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  printExpSrcN<0>(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printExpSrc1(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  printExpSrcN<1>(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printExpSrc2(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  printExpSrcN<2>(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printExpSrc3(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  printExpSrcN<3>(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printExpTgt(const MCInst *MI, unsigned OpNo,
+                                    const MCSubtargetInfo &STI,
+                                    raw_ostream &O) {
+  // This is really a 6 bit field.
+  uint32_t Tgt = MI->getOperand(OpNo).getImm() & ((1 << 6) - 1);
+
+  if (Tgt <= 7)
+    O << " mrt" << Tgt;
+  else if (Tgt == 8)
+    O << " mrtz";
+  else if (Tgt == 9)
+    O << " null";
+  else if ((Tgt >= 12 && Tgt <= 15) || (Tgt == 16 && AMDGPU::isGFX10(STI)))
+    O << " pos" << Tgt - 12;
+  else if (AMDGPU::isGFX10(STI) && Tgt == 20)
+    O << " prim";
+  else if (Tgt >= 32 && Tgt <= 63)
+    O << " param" << Tgt - 32;
+  else {
+    // Reserved values 10, 11
+    O << " invalid_target_" << Tgt;
+  }
+}
+
+static bool allOpsDefaultValue(const int* Ops, int NumOps, int Mod,
+                               bool IsPacked, bool HasDstSel) {
+  int DefaultValue = IsPacked && (Mod == SISrcMods::OP_SEL_1);
+
+  for (int I = 0; I < NumOps; ++I) {
+    if (!!(Ops[I] & Mod) != DefaultValue)
+      return false;
+  }
+
+  if (HasDstSel && (Ops[0] & SISrcMods::DST_OP_SEL) != 0)
+    return false;
+
+  return true;
+}
+
+void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI,
+                                            StringRef Name,
+                                            unsigned Mod,
+                                            raw_ostream &O) {
+  unsigned Opc = MI->getOpcode();
+  int NumOps = 0;
+  int Ops[3];
+
+  for (int OpName : { AMDGPU::OpName::src0_modifiers,
+                      AMDGPU::OpName::src1_modifiers,
+                      AMDGPU::OpName::src2_modifiers }) {
+    int Idx = AMDGPU::getNamedOperandIdx(Opc, OpName);
+    if (Idx == -1)
+      break;
+
+    Ops[NumOps++] = MI->getOperand(Idx).getImm();
+  }
+
+  const bool HasDstSel =
+    NumOps > 0 &&
+    Mod == SISrcMods::OP_SEL_0 &&
+    MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3_OPSEL;
+
+  const bool IsPacked =
+    MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::IsPacked;
+
+  if (allOpsDefaultValue(Ops, NumOps, Mod, IsPacked, HasDstSel))
+    return;
+
+  O << Name;
+  for (int I = 0; I < NumOps; ++I) {
+    if (I != 0)
+      O << ',';
+
+    O << !!(Ops[I] & Mod);
+  }
+
+  if (HasDstSel) {
+    O << ',' << !!(Ops[0] & SISrcMods::DST_OP_SEL);
+  }
+
+  O << ']';
+}
+
+void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
+                                   const MCSubtargetInfo &STI,
+                                   raw_ostream &O) {
+  unsigned Opc = MI->getOpcode();
+  if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
+      Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
+    auto FIN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
+    auto BCN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
+    unsigned FI = !!(MI->getOperand(FIN).getImm() & SISrcMods::OP_SEL_0);
+    unsigned BC = !!(MI->getOperand(BCN).getImm() & SISrcMods::OP_SEL_0);
+    if (FI || BC)
+      O << " op_sel:[" << FI << ',' << BC << ']';
+    return;
+  }
+
+  printPackedModifier(MI, " op_sel:[", SISrcMods::OP_SEL_0, O);
+}
+
+void AMDGPUInstPrinter::printOpSelHi(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  printPackedModifier(MI, " op_sel_hi:[", SISrcMods::OP_SEL_1, O);
+}
+
+void AMDGPUInstPrinter::printNegLo(const MCInst *MI, unsigned OpNo,
+                                   const MCSubtargetInfo &STI,
+                                   raw_ostream &O) {
+  printPackedModifier(MI, " neg_lo:[", SISrcMods::NEG, O);
+}
+
+void AMDGPUInstPrinter::printNegHi(const MCInst *MI, unsigned OpNo,
+                                   const MCSubtargetInfo &STI,
+                                   raw_ostream &O) {
+  printPackedModifier(MI, " neg_hi:[", SISrcMods::NEG_HI, O);
+}
+
+void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  unsigned Imm = MI->getOperand(OpNum).getImm();
+  switch (Imm) {
+  case 0:
+    O << "p10";
+    break;
+  case 1:
+    O << "p20";
+    break;
+  case 2:
+    O << "p0";
+    break;
+  default:
+    O << "invalid_param_" << Imm;
+  }
+}
+
+void AMDGPUInstPrinter::printInterpAttr(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  unsigned Attr = MI->getOperand(OpNum).getImm();
+  O << "attr" << Attr;
+}
+
+void AMDGPUInstPrinter::printInterpAttrChan(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  unsigned Chan = MI->getOperand(OpNum).getImm();
+  O << '.' << "xyzw"[Chan & 0x3];
+}
+
+void AMDGPUInstPrinter::printVGPRIndexMode(const MCInst *MI, unsigned OpNo,
+                                           const MCSubtargetInfo &STI,
+                                           raw_ostream &O) {
+  using namespace llvm::AMDGPU::VGPRIndexMode;
+  unsigned Val = MI->getOperand(OpNo).getImm();
+
+  if ((Val & ~ENABLE_MASK) != 0) {
+    O << " " << formatHex(static_cast<uint64_t>(Val));
+  } else {
+    O << " gpr_idx(";
+    bool NeedComma = false;
+    for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
+      if (Val & (1 << ModeId)) {
+        if (NeedComma)
+          O << ',';
+        O << IdSymbolic[ModeId];
+        NeedComma = true;
+      }
+    }
+    O << ')';
+  }
+}
+
+void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  printOperand(MI, OpNo, STI, O);
+  O  << ", ";
+  printOperand(MI, OpNo + 1, STI, O);
+}
+
+void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O, StringRef Asm,
+                                   StringRef Default) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  assert(Op.isImm());
+  if (Op.getImm() == 1) {
+    O << Asm;
+  } else {
+    O << Default;
+  }
+}
+
+void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O, char Asm) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  assert(Op.isImm());
+  if (Op.getImm() == 1)
+    O << Asm;
+}
+
+void AMDGPUInstPrinter::printHigh(const MCInst *MI, unsigned OpNo,
+                                  const MCSubtargetInfo &STI,
+                                  raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm())
+    O << " high";
+}
+
+void AMDGPUInstPrinter::printClampSI(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm())
+    O << " clamp";
+}
+
+void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo,
+                                    const MCSubtargetInfo &STI,
+                                    raw_ostream &O) {
+  int Imm = MI->getOperand(OpNo).getImm();
+  if (Imm == SIOutMods::MUL2)
+    O << " mul:2";
+  else if (Imm == SIOutMods::MUL4)
+    O << " mul:4";
+  else if (Imm == SIOutMods::DIV2)
+    O << " div:2";
+}
+
+void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  using namespace llvm::AMDGPU::SendMsg;
+
+  const unsigned Imm16 = MI->getOperand(OpNo).getImm();
+
+  uint16_t MsgId;
+  uint16_t OpId;
+  uint16_t StreamId;
+  decodeMsg(Imm16, MsgId, OpId, StreamId);
+
+  if (isValidMsgId(MsgId, STI) &&
+      isValidMsgOp(MsgId, OpId) &&
+      isValidMsgStream(MsgId, OpId, StreamId)) {
+    O << "sendmsg(" << getMsgName(MsgId);
+    if (msgRequiresOp(MsgId)) {
+      O << ", " << getMsgOpName(MsgId, OpId);
+      if (msgSupportsStream(MsgId, OpId)) {
+        O << ", " << StreamId;
+      }
+    }
+    O << ')';
+  } else if (encodeMsg(MsgId, OpId, StreamId) == Imm16) {
+    O << "sendmsg(" << MsgId << ", " << OpId << ", " << StreamId << ')';
+  } else {
+    O << Imm16; // Unknown imm16 code.
+  }
+}
+
+static void printSwizzleBitmask(const uint16_t AndMask,
+                                const uint16_t OrMask,
+                                const uint16_t XorMask,
+                                raw_ostream &O) {
+  using namespace llvm::AMDGPU::Swizzle;
+
+  uint16_t Probe0 = ((0            & AndMask) | OrMask) ^ XorMask;
+  uint16_t Probe1 = ((BITMASK_MASK & AndMask) | OrMask) ^ XorMask;
+
+  O << "\"";
+
+  for (unsigned Mask = 1 << (BITMASK_WIDTH - 1); Mask > 0; Mask >>= 1) {
+    uint16_t p0 = Probe0 & Mask;
+    uint16_t p1 = Probe1 & Mask;
+
+    if (p0 == p1) {
+      if (p0 == 0) {
+        O << "0";
+      } else {
+        O << "1";
+      }
+    } else {
+      if (p0 == 0) {
+        O << "p";
+      } else {
+        O << "i";
+      }
+    }
+  }
+
+  O << "\"";
+}
+
+void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  using namespace llvm::AMDGPU::Swizzle;
+
+  uint16_t Imm = MI->getOperand(OpNo).getImm();
+  if (Imm == 0) {
+    return;
+  }
+
+  O << " offset:";
+
+  if ((Imm & QUAD_PERM_ENC_MASK) == QUAD_PERM_ENC) {
+
+    O << "swizzle(" << IdSymbolic[ID_QUAD_PERM];
+    for (unsigned I = 0; I < LANE_NUM; ++I) {
+      O << ",";
+      O << formatDec(Imm & LANE_MASK);
+      Imm >>= LANE_SHIFT;
+    }
+    O << ")";
+
+  } else if ((Imm & BITMASK_PERM_ENC_MASK) == BITMASK_PERM_ENC) {
+
+    uint16_t AndMask = (Imm >> BITMASK_AND_SHIFT) & BITMASK_MASK;
+    uint16_t OrMask  = (Imm >> BITMASK_OR_SHIFT)  & BITMASK_MASK;
+    uint16_t XorMask = (Imm >> BITMASK_XOR_SHIFT) & BITMASK_MASK;
+
+    if (AndMask == BITMASK_MAX &&
+        OrMask == 0 &&
+        countPopulation(XorMask) == 1) {
+
+      O << "swizzle(" << IdSymbolic[ID_SWAP];
+      O << ",";
+      O << formatDec(XorMask);
+      O << ")";
+
+    } else if (AndMask == BITMASK_MAX &&
+               OrMask == 0 && XorMask > 0 &&
+               isPowerOf2_64(XorMask + 1)) {
+
+      O << "swizzle(" << IdSymbolic[ID_REVERSE];
+      O << ",";
+      O << formatDec(XorMask + 1);
+      O << ")";
+
+    } else {
+
+      uint16_t GroupSize = BITMASK_MAX - AndMask + 1;
+      if (GroupSize > 1 &&
+          isPowerOf2_64(GroupSize) &&
+          OrMask < GroupSize &&
+          XorMask == 0) {
+
+        O << "swizzle(" << IdSymbolic[ID_BROADCAST];
+        O << ",";
+        O << formatDec(GroupSize);
+        O << ",";
+        O << formatDec(OrMask);
+        O << ")";
+
+      } else {
+        O << "swizzle(" << IdSymbolic[ID_BITMASK_PERM];
+        O << ",";
+        printSwizzleBitmask(AndMask, OrMask, XorMask, O);
+        O << ")";
+      }
+    }
+  } else {
+    printU16ImmDecOperand(MI, OpNo, O);
+  }
+}
+
+void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
+  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU());
+
+  unsigned SImm16 = MI->getOperand(OpNo).getImm();
+  unsigned Vmcnt, Expcnt, Lgkmcnt;
+  decodeWaitcnt(ISA, SImm16, Vmcnt, Expcnt, Lgkmcnt);
+
+  bool NeedSpace = false;
+
+  if (Vmcnt != getVmcntBitMask(ISA)) {
+    O << "vmcnt(" << Vmcnt << ')';
+    NeedSpace = true;
+  }
+
+  if (Expcnt != getExpcntBitMask(ISA)) {
+    if (NeedSpace)
+      O << ' ';
+    O << "expcnt(" << Expcnt << ')';
+    NeedSpace = true;
+  }
+
+  if (Lgkmcnt != getLgkmcntBitMask(ISA)) {
+    if (NeedSpace)
+      O << ' ';
+    O << "lgkmcnt(" << Lgkmcnt << ')';
+  }
+}
+
+void AMDGPUInstPrinter::printHwreg(const MCInst *MI, unsigned OpNo,
+                                   const MCSubtargetInfo &STI, raw_ostream &O) {
+  unsigned Id;
+  unsigned Offset;
+  unsigned Width;
+
+  using namespace llvm::AMDGPU::Hwreg;
+  unsigned Val = MI->getOperand(OpNo).getImm();
+  decodeHwreg(Val, Id, Offset, Width);
+  StringRef HwRegName = getHwreg(Id, STI);
+
+  O << "hwreg(";
+  if (!HwRegName.empty()) {
+    O << HwRegName;
+  } else {
+    O << Id;
+  }
+  if (Width != WIDTH_DEFAULT_ || Offset != OFFSET_DEFAULT_) {
+    O << ", " << Offset << ", " << Width;
+  }
+  O << ')';
+}
+
+void AMDGPUInstPrinter::printEndpgm(const MCInst *MI, unsigned OpNo,
+                                    const MCSubtargetInfo &STI,
+                                    raw_ostream &O) {
+  uint16_t Imm = MI->getOperand(OpNo).getImm();
+  if (Imm == 0) {
+    return;
+  }
+
+  O << ' ' << formatDec(Imm);
+}
+
+#include "AMDGPUGenAsmWriter.inc"
+
+void R600InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                                StringRef Annot, const MCSubtargetInfo &STI) {
+  O.flush();
+  printInstruction(MI, O);
+  printAnnotation(O, Annot);
+}
+
+void R600InstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
+                               raw_ostream &O) {
+  AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '|');
+}
+
+void R600InstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  int BankSwizzle = MI->getOperand(OpNo).getImm();
+  switch (BankSwizzle) {
+  case 1:
+    O << "BS:VEC_021/SCL_122";
+    break;
+  case 2:
+    O << "BS:VEC_120/SCL_212";
+    break;
+  case 3:
+    O << "BS:VEC_102/SCL_221";
+    break;
+  case 4:
+    O << "BS:VEC_201";
+    break;
+  case 5:
+    O << "BS:VEC_210";
+    break;
+  default:
+    break;
+  }
+}
+
+void R600InstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
+                                 raw_ostream &O) {
+  AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "_SAT");
+}
+
+void R600InstPrinter::printCT(const MCInst *MI, unsigned OpNo,
+                                raw_ostream &O) {
+  unsigned CT = MI->getOperand(OpNo).getImm();
+  switch (CT) {
+  case 0:
+    O << 'U';
+    break;
+  case 1:
+    O << 'N';
+    break;
+  default:
+    break;
+  }
+}
+
+void R600InstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  int KCacheMode = MI->getOperand(OpNo).getImm();
+  if (KCacheMode > 0) {
+    int KCacheBank = MI->getOperand(OpNo - 2).getImm();
+    O << "CB" << KCacheBank << ':';
+    int KCacheAddr = MI->getOperand(OpNo + 2).getImm();
+    int LineSize = (KCacheMode == 1) ? 16 : 32;
+    O << KCacheAddr * 16 << '-' << KCacheAddr * 16 + LineSize;
+  }
+}
+
+void R600InstPrinter::printLast(const MCInst *MI, unsigned OpNo,
+                                raw_ostream &O) {
+  AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "*", " ");
+}
+
+void R600InstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  assert(Op.isImm() || Op.isExpr());
+  if (Op.isImm()) {
+    int64_t Imm = Op.getImm();
+    O << Imm << '(' << BitsToFloat(Imm) << ')';
+  }
+  if (Op.isExpr()) {
+    Op.getExpr()->print(O << '@', &MAI);
+  }
+}
+
+void R600InstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
+                               raw_ostream &O) {
+  AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '-');
+}
+
+void R600InstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
+                                raw_ostream &O) {
+  switch (MI->getOperand(OpNo).getImm()) {
+  default: break;
+  case 1:
+    O << " * 2.0";
+    break;
+  case 2:
+    O << " * 4.0";
+    break;
+  case 3:
+    O << " / 2.0";
+    break;
+  }
+}
+
+void R600InstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
+                                      raw_ostream &O) {
+  printOperand(MI, OpNo, O);
+  O  << ", ";
+  printOperand(MI, OpNo + 1, O);
+}
+
+void R600InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  if (OpNo >= MI->getNumOperands()) {
+    O << "/*Missing OP" << OpNo << "*/";
+    return;
+  }
+
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    switch (Op.getReg()) {
+    // This is the default predicate state, so we don't need to print it.
+    case R600::PRED_SEL_OFF:
+      break;
+
+    default:
+      O << getRegisterName(Op.getReg());
+      break;
+    }
+  } else if (Op.isImm()) {
+      O << Op.getImm();
+  } else if (Op.isFPImm()) {
+    // We special case 0.0 because otherwise it will be printed as an integer.
+    if (Op.getFPImm() == 0.0)
+      O << "0.0";
+    else {
+      O << Op.getFPImm();
+    }
+  } else if (Op.isExpr()) {
+    const MCExpr *Exp = Op.getExpr();
+    Exp->print(O, &MAI);
+  } else {
+    O << "/*INV_OP*/";
+  }
+}
+
+void R600InstPrinter::printRel(const MCInst *MI, unsigned OpNo,
+                               raw_ostream &O) {
+  AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '+');
+}
+
+void R600InstPrinter::printRSel(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  unsigned Sel = MI->getOperand(OpNo).getImm();
+  switch (Sel) {
+  case 0:
+    O << 'X';
+    break;
+  case 1:
+    O << 'Y';
+    break;
+  case 2:
+    O << 'Z';
+    break;
+  case 3:
+    O << 'W';
+    break;
+  case 4:
+    O << '0';
+    break;
+  case 5:
+    O << '1';
+    break;
+  case 7:
+    O << '_';
+    break;
+  default:
+    break;
+  }
+}
+
+void R600InstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
+                                          raw_ostream &O) {
+  AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "ExecMask,");
+}
+
+void R600InstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo,
+                                      raw_ostream &O) {
+  AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "Pred,");
+}
+
+void R600InstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
+                                 raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.getImm() == 0) {
+    O << " (MASKED)";
+  }
+}
+
+#include "R600GenAsmWriter.inc"
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
new file mode 100644
index 000000000000..b544d1ef3605
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -0,0 +1,268 @@
+//===-- AMDGPUInstPrinter.h - AMDGPU MC Inst -> ASM interface ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUINSTPRINTER_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUINSTPRINTER_H
+
+#include "AMDGPUMCTargetDesc.h"
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class AMDGPUInstPrinter : public MCInstPrinter {
+public:
+  AMDGPUInstPrinter(const MCAsmInfo &MAI,
+                    const MCInstrInfo &MII, const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
+
+  //Autogenerated by tblgen
+  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+                        raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo,
+                                     unsigned AltIdx = AMDGPU::NoRegAltName);
+
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  static void printRegOperand(unsigned RegNo, raw_ostream &O,
+                              const MCRegisterInfo &MRI);
+
+private:
+  void printU4ImmOperand(const MCInst *MI, unsigned OpNo,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+  void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU16ImmOperand(const MCInst *MI, unsigned OpNo,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printU4ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU32ImmOperand(const MCInst *MI, unsigned OpNo,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printNamedBit(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                     StringRef BitName);
+  void printOffen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printIdxen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printAddr64(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printMBUFOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                   raw_ostream &O);
+  void printFlatOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                       raw_ostream &O);
+
+  void printOffset0(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printOffset1(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printSMRDOffset8(const MCInst *MI, unsigned OpNo,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSMRDOffset20(const MCInst *MI, unsigned OpNo,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printGDS(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                raw_ostream &O);
+  void printDLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                raw_ostream &O);
+  void printGLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                raw_ostream &O);
+  void printSLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                raw_ostream &O);
+  void printTFE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                raw_ostream &O);
+  void printDMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                  raw_ostream &O);
+  void printDim(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                raw_ostream &O);
+  void printUNorm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                  raw_ostream &O);
+  void printDA(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+               raw_ostream &O);
+  void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                 raw_ostream &O);
+  void printLWE(const MCInst *MI, unsigned OpNo,
+                const MCSubtargetInfo &STI, raw_ostream &O);
+  void printD16(const MCInst *MI, unsigned OpNo,
+                const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpCompr(const MCInst *MI, unsigned OpNo,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpVM(const MCInst *MI, unsigned OpNo,
+                  const MCSubtargetInfo &STI, raw_ostream &O);
+  void printFORMAT(const MCInst *MI, unsigned OpNo,
+                   const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printRegOperand(unsigned RegNo, raw_ostream &O);
+  void printVOPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                   raw_ostream &O);
+  void printVINTRPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                      raw_ostream &O);
+  void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
+                        raw_ostream &O);
+  void printImmediateV216(uint32_t Imm, const MCSubtargetInfo &STI,
+                          raw_ostream &O);
+  void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
+                        raw_ostream &O);
+  void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI,
+                        raw_ostream &O);
+  void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printOperandAndFPInputMods(const MCInst *MI, unsigned OpNo,
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
+  void printOperandAndIntInputMods(const MCInst *MI, unsigned OpNo,
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
+  void printDPP8(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                 raw_ostream &O);
+  void printDPPCtrl(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printRowMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printBankMask(const MCInst *MI, unsigned OpNo,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
+  void printBoundCtrl(const MCInst *MI, unsigned OpNo,
+                      const MCSubtargetInfo &STI, raw_ostream &O);
+  void printFI(const MCInst *MI, unsigned OpNo,
+               const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSDWASel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printSDWADstSel(const MCInst *MI, unsigned OpNo,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSDWASrc0Sel(const MCInst *MI, unsigned OpNo,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSDWASrc1Sel(const MCInst *MI, unsigned OpNo,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSDWADstUnused(const MCInst *MI, unsigned OpNo,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPackedModifier(const MCInst *MI, StringRef Name, unsigned Mod,
+                           raw_ostream &O);
+  void printOpSel(const MCInst *MI, unsigned OpNo,
+                  const MCSubtargetInfo &STI, raw_ostream &O);
+  void printOpSelHi(const MCInst *MI, unsigned OpNo,
+                  const MCSubtargetInfo &STI, raw_ostream &O);
+  void printNegLo(const MCInst *MI, unsigned OpNo,
+                  const MCSubtargetInfo &STI, raw_ostream &O);
+  void printNegHi(const MCInst *MI, unsigned OpNo,
+                  const MCSubtargetInfo &STI, raw_ostream &O);
+  void printInterpSlot(const MCInst *MI, unsigned OpNo,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printInterpAttr(const MCInst *MI, unsigned OpNo,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printInterpAttrChan(const MCInst *MI, unsigned OpNo,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printVGPRIndexMode(const MCInst *MI, unsigned OpNo,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printMemOperand(const MCInst *MI, unsigned OpNo,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printBLGP(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                 raw_ostream &O);
+  void printCBSZ(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                 raw_ostream &O);
+  void printABID(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                 raw_ostream &O);
+  void printDefaultVccOperand(unsigned OpNo, const MCSubtargetInfo &STI,
+                              raw_ostream &O);
+
+
+  template <unsigned N>
+  void printExpSrcN(const MCInst *MI, unsigned OpNo,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpSrc0(const MCInst *MI, unsigned OpNo,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpSrc1(const MCInst *MI, unsigned OpNo,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpSrc2(const MCInst *MI, unsigned OpNo,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpSrc3(const MCInst *MI, unsigned OpNo,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpTgt(const MCInst *MI, unsigned OpNo,
+                   const MCSubtargetInfo &STI, raw_ostream &O);
+
+public:
+  static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                         StringRef Asm, StringRef Default = "");
+  static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                         char Asm);
+protected:
+  void printAbs(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                raw_ostream &O);
+  void printHigh(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                 raw_ostream &O);
+  void printClamp(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                  raw_ostream &O);
+  void printClampSI(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printOModSI(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                   raw_ostream &O);
+  void printLiteral(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printLast(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                 raw_ostream &O);
+  void printNeg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                raw_ostream &O);
+  void printOMOD(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                 raw_ostream &O);
+  void printRel(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                raw_ostream &O);
+  void printUpdateExecMask(const MCInst *MI, unsigned OpNo,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
+  void printUpdatePred(const MCInst *MI, unsigned OpNo,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printWrite(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                  raw_ostream &O);
+  void printBankSwizzle(const MCInst *MI, unsigned OpNo,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printRSel(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                 raw_ostream &O);
+  void printCT(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+               raw_ostream &O);
+  void printKCache(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                   raw_ostream &O);
+  void printSendMsg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printSwizzle(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printWaitFlag(const MCInst *MI, unsigned OpNo,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
+  void printHwreg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                  raw_ostream &O);
+  void printEndpgm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                   raw_ostream &O);
+};
+
+class R600InstPrinter : public MCInstPrinter {
+public:
+  R600InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                  const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
+
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+
+  void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printOMOD(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+
+} // End namespace llvm
+
+#endif
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 2364e7b7b5fb..9e04ab9bae93 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -1,15 +1,16 @@
 //===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 /// \file
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPUMCAsmInfo.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 
 using namespace llvm;
 
@@ -19,7 +20,10 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() {
   HasSingleParameterDotFile = false;
   //===------------------------------------------------------------------===//
   MinInstAlignment = 4;
-  MaxInstLength = (TT.getArch() == Triple::amdgcn) ? 8 : 16;
+
+  // This is the maximum instruction encoded size for gfx10. With a known
+  // subtarget, it can be reduced to 8 bytes.
+  MaxInstLength = (TT.getArch() == Triple::amdgcn) ? 20 : 16;
   SeparatorString = "\n";
   CommentString = ";";
   PrivateLabelPrefix = "";
@@ -45,3 +49,18 @@ bool AMDGPUMCAsmInfo::shouldOmitSectionDirective(StringRef SectionName) const {
          SectionName == ".hsarodata_readonly_agent" ||
          MCAsmInfo::shouldOmitSectionDirective(SectionName);
 }
+
+unsigned AMDGPUMCAsmInfo::getMaxInstLength(const MCSubtargetInfo *STI) const {
+  if (!STI || STI->getTargetTriple().getArch() == Triple::r600)
+    return MaxInstLength;
+
+  // Maximum for NSA encoded images
+  if (STI->getFeatureBits()[AMDGPU::FeatureNSAEncoding])
+    return 20;
+
+  // 64-bit instruction with 32-bit literal.
+  if (STI->getFeatureBits()[AMDGPU::FeatureVOP3Literal])
+    return 12;
+
+  return 8;
+}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
index 8cb33a3179cd..71e63ec27a8f 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
@@ -1,9 +1,8 @@
 //===-- MCTargetDesc/AMDGPUMCAsmInfo.h - AMDGPU MCAsm Interface -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -28,6 +27,7 @@ class AMDGPUMCAsmInfo : public MCAsmInfoELF {
 public:
   explicit AMDGPUMCAsmInfo(const Triple &TT);
   bool shouldOmitSectionDirective(StringRef SectionName) const override;
+  unsigned getMaxInstLength(const MCSubtargetInfo *STI) const override;
 };
 } // namespace llvm
 #endif
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index cae7a7a6c7e7..f3d945cc0764 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPUCodeEmitter.cpp - AMDGPU Code Emitter interface -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
index dcc10a032afe..62757a707890 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -1,9 +1,8 @@
 //===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -64,10 +63,17 @@ public:
     return 0;
   }
 
+  virtual unsigned getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
+                                        SmallVectorImpl<MCFixup> &Fixups,
+                                        const MCSubtargetInfo &STI) const {
+    return 0;
+  }
+
 protected:
-  uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
-  void verifyInstructionPredicates(const MCInst &MI,
-                                   uint64_t AvailableFeatures) const;
+  FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+  void
+  verifyInstructionPredicates(const MCInst &MI,
+                              const FeatureBitset &AvailableFeatures) const;
 };
 
 } // End namespace llvm
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index c579c7d60e16..88df64d18cc5 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPUMCTargetDesc.cpp - AMDGPU Target Descriptions ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,13 +13,15 @@
 
 #include "AMDGPUMCTargetDesc.h"
 #include "AMDGPUELFStreamer.h"
+#include "AMDGPUInstPrinter.h"
 #include "AMDGPUMCAsmInfo.h"
 #include "AMDGPUTargetStreamer.h"
-#include "InstPrinter/AMDGPUInstPrinter.h"
 #include "SIDefines.h"
+#include "TargetInfo/AMDGPUTargetInfo.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrAnalysis.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCRegisterInfo.h"
@@ -104,6 +105,35 @@ static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
                                  std::move(Emitter), RelaxAll);
 }
 
+namespace {
+
+class AMDGPUMCInstrAnalysis : public MCInstrAnalysis {
+public:
+  explicit AMDGPUMCInstrAnalysis(const MCInstrInfo *Info)
+      : MCInstrAnalysis(Info) {}
+
+  bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+                      uint64_t &Target) const override {
+    if (Inst.getNumOperands() == 0 || !Inst.getOperand(0).isImm() ||
+        Info->get(Inst.getOpcode()).OpInfo[0].OperandType !=
+            MCOI::OPERAND_PCREL)
+      return false;
+
+    int64_t Imm = Inst.getOperand(0).getImm();
+    // Our branches take a simm16, but we need two extra bits to account for
+    // the factor of 4.
+    APInt SignedOffset(18, Imm * 4, true);
+    Target = (SignedOffset.sext(64) + Addr + Size).getZExtValue();
+    return true;
+  }
+};
+
+} // end anonymous namespace
+
+static MCInstrAnalysis *createAMDGPUMCInstrAnalysis(const MCInstrInfo *Info) {
+  return new AMDGPUMCInstrAnalysis(Info);
+}
+
 extern "C" void LLVMInitializeAMDGPUTargetMC() {
 
   TargetRegistry::RegisterMCInstrInfo(getTheGCNTarget(), createAMDGPUMCInstrInfo);
@@ -114,6 +144,7 @@ extern "C" void LLVMInitializeAMDGPUTargetMC() {
     TargetRegistry::RegisterMCRegInfo(*T, createAMDGPUMCRegisterInfo);
     TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo);
     TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter);
+    TargetRegistry::RegisterMCInstrAnalysis(*T, createAMDGPUMCInstrAnalysis);
     TargetRegistry::RegisterMCAsmBackend(*T, createAMDGPUAsmBackend);
     TargetRegistry::RegisterELFStreamer(*T, createMCStreamer);
   }
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
index f3628d96d6e9..9754d31fee60 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -1,9 +1,8 @@
 //===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -34,9 +33,6 @@ class Target;
 class Triple;
 class raw_pwrite_stream;
 
-Target &getTheAMDGPUTarget();
-Target &getTheGCNTarget();
-
 MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
                                        const MCRegisterInfo &MRI,
                                        MCContext &Ctx);
@@ -53,7 +49,7 @@ MCAsmBackend *createAMDGPUAsmBackend(const Target &T,
 
 std::unique_ptr<MCObjectTargetWriter>
 createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI,
-                            bool HasRelocationAddend);
+                            bool HasRelocationAddend, uint8_t ABIVersion);
 } // End llvm namespace
 
 #define GET_REGINFO_ENUM
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index c17fe126546c..8f11433476f4 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,7 +18,6 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
 #include "llvm/BinaryFormat/ELF.h"
-#include "llvm/BinaryFormat/MsgPackTypes.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Metadata.h"
@@ -52,51 +50,53 @@ bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
 }
 
 bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
-  std::shared_ptr<msgpack::Node> HSAMetadataRoot;
-  yaml::Input YIn(HSAMetadataString);
-  YIn >> HSAMetadataRoot;
-  if (YIn.error())
+  msgpack::Document HSAMetadataDoc;
+  if (!HSAMetadataDoc.fromYAML(HSAMetadataString))
     return false;
-  return EmitHSAMetadata(HSAMetadataRoot, false);
+  return EmitHSAMetadata(HSAMetadataDoc, false);
 }
 
 StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
   AMDGPU::GPUKind AK;
 
   switch (ElfMach) {
-  case ELF::EF_AMDGPU_MACH_R600_R600:     AK = GK_R600;    break;
-  case ELF::EF_AMDGPU_MACH_R600_R630:     AK = GK_R630;    break;
-  case ELF::EF_AMDGPU_MACH_R600_RS880:    AK = GK_RS880;   break;
-  case ELF::EF_AMDGPU_MACH_R600_RV670:    AK = GK_RV670;   break;
-  case ELF::EF_AMDGPU_MACH_R600_RV710:    AK = GK_RV710;   break;
-  case ELF::EF_AMDGPU_MACH_R600_RV730:    AK = GK_RV730;   break;
-  case ELF::EF_AMDGPU_MACH_R600_RV770:    AK = GK_RV770;   break;
-  case ELF::EF_AMDGPU_MACH_R600_CEDAR:    AK = GK_CEDAR;   break;
-  case ELF::EF_AMDGPU_MACH_R600_CYPRESS:  AK = GK_CYPRESS; break;
-  case ELF::EF_AMDGPU_MACH_R600_JUNIPER:  AK = GK_JUNIPER; break;
-  case ELF::EF_AMDGPU_MACH_R600_REDWOOD:  AK = GK_REDWOOD; break;
-  case ELF::EF_AMDGPU_MACH_R600_SUMO:     AK = GK_SUMO;    break;
-  case ELF::EF_AMDGPU_MACH_R600_BARTS:    AK = GK_BARTS;   break;
-  case ELF::EF_AMDGPU_MACH_R600_CAICOS:   AK = GK_CAICOS;  break;
-  case ELF::EF_AMDGPU_MACH_R600_CAYMAN:   AK = GK_CAYMAN;  break;
-  case ELF::EF_AMDGPU_MACH_R600_TURKS:    AK = GK_TURKS;   break;
-  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600;  break;
-  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601;  break;
-  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700;  break;
-  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701;  break;
-  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702;  break;
-  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703;  break;
-  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704;  break;
-  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801;  break;
-  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802;  break;
-  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803;  break;
-  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810;  break;
-  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900;  break;
-  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902;  break;
-  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904;  break;
-  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906;  break;
-  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909;  break;
-  case ELF::EF_AMDGPU_MACH_NONE:          AK = GK_NONE;    break;
+  case ELF::EF_AMDGPU_MACH_R600_R600:      AK = GK_R600;    break;
+  case ELF::EF_AMDGPU_MACH_R600_R630:      AK = GK_R630;    break;
+  case ELF::EF_AMDGPU_MACH_R600_RS880:     AK = GK_RS880;   break;
+  case ELF::EF_AMDGPU_MACH_R600_RV670:     AK = GK_RV670;   break;
+  case ELF::EF_AMDGPU_MACH_R600_RV710:     AK = GK_RV710;   break;
+  case ELF::EF_AMDGPU_MACH_R600_RV730:     AK = GK_RV730;   break;
+  case ELF::EF_AMDGPU_MACH_R600_RV770:     AK = GK_RV770;   break;
+  case ELF::EF_AMDGPU_MACH_R600_CEDAR:     AK = GK_CEDAR;   break;
+  case ELF::EF_AMDGPU_MACH_R600_CYPRESS:   AK = GK_CYPRESS; break;
+  case ELF::EF_AMDGPU_MACH_R600_JUNIPER:   AK = GK_JUNIPER; break;
+  case ELF::EF_AMDGPU_MACH_R600_REDWOOD:   AK = GK_REDWOOD; break;
+  case ELF::EF_AMDGPU_MACH_R600_SUMO:      AK = GK_SUMO;    break;
+  case ELF::EF_AMDGPU_MACH_R600_BARTS:     AK = GK_BARTS;   break;
+  case ELF::EF_AMDGPU_MACH_R600_CAICOS:    AK = GK_CAICOS;  break;
+  case ELF::EF_AMDGPU_MACH_R600_CAYMAN:    AK = GK_CAYMAN;  break;
+  case ELF::EF_AMDGPU_MACH_R600_TURKS:     AK = GK_TURKS;   break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600:  AK = GK_GFX600;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601:  AK = GK_GFX601;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700:  AK = GK_GFX700;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701:  AK = GK_GFX701;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702:  AK = GK_GFX702;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703:  AK = GK_GFX703;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704:  AK = GK_GFX704;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801:  AK = GK_GFX801;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802:  AK = GK_GFX802;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803:  AK = GK_GFX803;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810:  AK = GK_GFX810;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900:  AK = GK_GFX900;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902:  AK = GK_GFX902;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904:  AK = GK_GFX904;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906:  AK = GK_GFX906;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908:  AK = GK_GFX908;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909:  AK = GK_GFX909;  break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
+  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
+  case ELF::EF_AMDGPU_MACH_NONE:           AK = GK_NONE;    break;
   }
 
   StringRef GPUName = getArchNameAMDGCN(AK);
@@ -142,7 +142,11 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
   case GK_GFX902:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
   case GK_GFX904:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
   case GK_GFX906:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
+  case GK_GFX908:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908;
   case GK_GFX909:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
+  case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
+  case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
+  case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
   case GK_NONE:    return ELF::EF_AMDGPU_MACH_NONE;
   }
 
@@ -157,6 +161,14 @@ AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
                                                  formatted_raw_ostream &OS)
     : AMDGPUTargetStreamer(S), OS(OS) { }
 
+// A hook for emitting stuff at the end.
+// We use it for emitting the accumulated PAL metadata as directives.
+void AMDGPUTargetAsmStreamer::finish() {
+  std::string S;
+  getPALMetadata()->toString(S);
+  OS << S;
+}
+
 void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {
   OS << "\t.amdgcn_target \"" << Target << "\"\n";
 }
@@ -196,6 +208,12 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
   }
 }
 
+void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
+                                            unsigned Align) {
+  OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", " << Align
+     << '\n';
+}
+
 bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) {
   OS << "\t.amd_amdgpu_isa \"" << IsaVersionString << "\"\n";
   return true;
@@ -214,15 +232,14 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
 }
 
 bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
-    std::shared_ptr<msgpack::Node> &HSAMetadataRoot, bool Strict) {
+    msgpack::Document &HSAMetadataDoc, bool Strict) {
   V3::MetadataVerifier Verifier(Strict);
-  if (!Verifier.verify(*HSAMetadataRoot))
+  if (!Verifier.verify(HSAMetadataDoc.getRoot()))
     return false;
 
   std::string HSAMetadataString;
   raw_string_ostream StrOS(HSAMetadataString);
-  yaml::Output YOut(StrOS);
-  YOut << HSAMetadataRoot;
+  HSAMetadataDoc.toYAML(StrOS);
 
   OS << '\t' << V3::AssemblerDirectiveBegin << '\n';
   OS << StrOS.str() << '\n';
@@ -230,13 +247,10 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
   return true;
 }
 
-bool AMDGPUTargetAsmStreamer::EmitPALMetadata(
-    const PALMD::Metadata &PALMetadata) {
-  std::string PALMetadataString;
-  if (PALMD::toString(PALMetadata, PALMetadataString))
-    return false;
-
-  OS << '\t' << PALMD::AssemblerDirective << PALMetadataString << '\n';
+bool AMDGPUTargetAsmStreamer::EmitCodeEnd() {
+  const uint32_t Encoded_s_code_end = 0xbf9f0000;
+  OS << "\t.p2alignl 6, " << Encoded_s_code_end << '\n';
+  OS << "\t.fill 32, 4, " << Encoded_s_code_end << '\n';
   return true;
 }
 
@@ -278,6 +292,10 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
   PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
               kernel_code_properties,
               amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
+  if (IVersion.Major >= 10)
+    PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD,
+                kernel_code_properties,
+                amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
   PRINT_FIELD(
       OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD,
       compute_pgm_rsrc2,
@@ -331,6 +349,17 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
     PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
                 compute_pgm_rsrc1,
                 amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
+  if (IVersion.Major >= 10) {
+    PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
+                compute_pgm_rsrc1,
+                amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE);
+    PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
+                compute_pgm_rsrc1,
+                amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED);
+    PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
+                compute_pgm_rsrc1,
+                amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
+  }
   PRINT_FIELD(
       OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
       compute_pgm_rsrc2,
@@ -387,6 +416,19 @@ MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
   return static_cast<MCELFStreamer &>(Streamer);
 }
 
+// A hook for emitting stuff at the end.
+// We use it for emitting the accumulated PAL metadata as a .note record.
+void AMDGPUTargetELFStreamer::finish() {
+  std::string Blob;
+  const char *Vendor = getPALMetadata()->getVendor();
+  unsigned Type = getPALMetadata()->getType();
+  getPALMetadata()->toBlob(Type, Blob);
+  if (Blob.empty())
+    return;
+  EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type,
+           [&](MCELFStreamer &OS) { OS.EmitBytes(Blob); });
+}
+
 void AMDGPUTargetELFStreamer::EmitNote(
     StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
     function_ref<void(MCELFStreamer &)> EmitDesc) {
@@ -463,6 +505,27 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
   Symbol->setType(Type);
 }
 
+void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
+                                            unsigned Align) {
+  assert(isPowerOf2_32(Align));
+
+  MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol);
+  SymbolELF->setType(ELF::STT_OBJECT);
+
+  if (!SymbolELF->isBindingSet()) {
+    SymbolELF->setBinding(ELF::STB_GLOBAL);
+    SymbolELF->setExternal(true);
+  }
+
+  if (SymbolELF->declareCommon(Size, Align, true)) {
+    report_fatal_error("Symbol: " + Symbol->getName() +
+                       " redeclared as different type");
+  }
+
+  SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS);
+  SymbolELF->setSize(MCConstantExpr::create(Size, getContext()));
+}
+
 bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
   // Create two labels to mark the beginning and end of the desc field
   // and a MCExpr to calculate the size of the desc field.
@@ -482,16 +545,14 @@ bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
   return true;
 }
 
-bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
-    std::shared_ptr<msgpack::Node> &HSAMetadataRoot, bool Strict) {
+bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
+                                              bool Strict) {
   V3::MetadataVerifier Verifier(Strict);
-  if (!Verifier.verify(*HSAMetadataRoot))
+  if (!Verifier.verify(HSAMetadataDoc.getRoot()))
     return false;
 
   std::string HSAMetadataString;
-  raw_string_ostream StrOS(HSAMetadataString);
-  msgpack::Writer MPWriter(StrOS);
-  HSAMetadataRoot->write(MPWriter);
+  HSAMetadataDoc.writeToBlob(HSAMetadataString);
 
   // Create two labels to mark the beginning and end of the desc field
   // and a MCExpr to calculate the size of the desc field.
@@ -505,7 +566,7 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
   EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA,
            [&](MCELFStreamer &OS) {
              OS.EmitLabel(DescBegin);
-             OS.EmitBytes(StrOS.str());
+             OS.EmitBytes(HSAMetadataString);
              OS.EmitLabel(DescEnd);
            });
   return true;
@@ -535,15 +596,15 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
   return true;
 }
 
-bool AMDGPUTargetELFStreamer::EmitPALMetadata(
-    const PALMD::Metadata &PALMetadata) {
-  EmitNote(ElfNote::NoteNameV2,
-           MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t),
-                                  getContext()),
-           ELF::NT_AMD_AMDGPU_PAL_METADATA, [&](MCELFStreamer &OS) {
-             for (auto I : PALMetadata)
-               OS.EmitIntValue(I, sizeof(uint32_t));
-           });
+bool AMDGPUTargetELFStreamer::EmitCodeEnd() {
+  const uint32_t Encoded_s_code_end = 0xbf9f0000;
+
+  MCStreamer &OS = getStreamer();
+  OS.PushSection();
+  OS.EmitValueToAlignment(64, Encoded_s_code_end, 4);
+  for (unsigned I = 0; I < 32; ++I)
+    OS.EmitIntValue(Encoded_s_code_end, 4);
+  OS.PopSection();
   return true;
 }
 
@@ -555,16 +616,25 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
   auto &Streamer = getStreamer();
   auto &Context = Streamer.getContext();
 
+  MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
+      Context.getOrCreateSymbol(Twine(KernelName)));
   MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
       Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
-  KernelDescriptorSymbol->setBinding(ELF::STB_GLOBAL);
+
+  // Copy kernel descriptor symbol's binding, other and visibility from the
+  // kernel code symbol.
+  KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding());
+  KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
+  KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
+  // Kernel descriptor symbol's type and size are fixed.
   KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
   KernelDescriptorSymbol->setSize(
       MCConstantExpr::create(sizeof(KernelDescriptor), Context));
 
-  MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
-      Context.getOrCreateSymbol(Twine(KernelName)));
-  KernelCodeSymbol->setBinding(ELF::STB_LOCAL);
+  // The visibility of the kernel code symbol must be protected or less to allow
+  // static relocations from the kernel descriptor to be used.
+  if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
+    KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
 
   Streamer.EmitLabel(KernelDescriptorSymbol);
   Streamer.EmitBytes(StringRef(
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 9a807c804f9f..683b3e363b9a 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -1,9 +1,8 @@
 //===-- AMDGPUTargetStreamer.h - AMDGPU Target Streamer --------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -11,7 +10,8 @@
 #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
 
 #include "AMDKernelCodeT.h"
-#include "llvm/BinaryFormat/MsgPackTypes.h"
+#include "Utils/AMDGPUPALMetadata.h"
+#include "llvm/BinaryFormat/MsgPackDocument.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/AMDGPUMetadata.h"
@@ -29,12 +29,16 @@ class Module;
 class Type;
 
 class AMDGPUTargetStreamer : public MCTargetStreamer {
+  AMDGPUPALMetadata PALMetadata;
+
 protected:
   MCContext &getContext() const { return Streamer.getContext(); }
 
 public:
   AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
 
+  AMDGPUPALMetadata *getPALMetadata() { return &PALMetadata; }
+
   virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0;
 
   virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
@@ -49,6 +53,9 @@ public:
 
   virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0;
 
+  virtual void emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
+                             unsigned Align) = 0;
+
   /// \returns True on success, false on failure.
   virtual bool EmitISAVersion(StringRef IsaVersionString) = 0;
 
@@ -65,14 +72,13 @@ public:
   /// the \p HSAMetadata structure is updated with the correct types.
   ///
   /// \returns True on success, false on failure.
-  virtual bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
-                               bool Strict) = 0;
+  virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) = 0;
 
   /// \returns True on success, false on failure.
   virtual bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) = 0;
 
   /// \returns True on success, false on failure.
-  virtual bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) = 0;
+  virtual bool EmitCodeEnd() = 0;
 
   virtual void EmitAmdhsaKernelDescriptor(
       const MCSubtargetInfo &STI, StringRef KernelName,
@@ -89,6 +95,8 @@ class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
 public:
   AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
 
+  void finish() override;
+
   void EmitDirectiveAMDGCNTarget(StringRef Target) override;
 
   void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
@@ -102,18 +110,19 @@ public:
 
   void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
 
+  void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, unsigned Align) override;
+
   /// \returns True on success, false on failure.
   bool EmitISAVersion(StringRef IsaVersionString) override;
 
   /// \returns True on success, false on failure.
-  bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
-                       bool Strict) override;
+  bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override;
 
   /// \returns True on success, false on failure.
   bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
 
   /// \returns True on success, false on failure.
-  bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) override;
+  bool EmitCodeEnd() override;
 
   void EmitAmdhsaKernelDescriptor(
       const MCSubtargetInfo &STI, StringRef KernelName,
@@ -133,6 +142,8 @@ public:
 
   MCELFStreamer &getStreamer();
 
+  void finish() override;
+
   void EmitDirectiveAMDGCNTarget(StringRef Target) override;
 
   void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
@@ -146,18 +157,19 @@ public:
 
   void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
 
+  void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, unsigned Align) override;
+
   /// \returns True on success, false on failure.
   bool EmitISAVersion(StringRef IsaVersionString) override;
 
   /// \returns True on success, false on failure.
-  bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
-                       bool Strict) override;
+  bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override;
 
   /// \returns True on success, false on failure.
   bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
 
   /// \returns True on success, false on failure.
-  bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) override;
+  bool EmitCodeEnd() override;
 
   void EmitAmdhsaKernelDescriptor(
       const MCSubtargetInfo &STI, StringRef KernelName,
diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index 28d4bc1829e2..2f1f4e7a0392 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -1,9 +1,8 @@
 //===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -65,9 +64,10 @@ private:
   uint64_t getBinaryCodeForInstr(const MCInst &MI,
                                  SmallVectorImpl<MCFixup> &Fixups,
                                  const MCSubtargetInfo &STI) const;
-  uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
-  void verifyInstructionPredicates(const MCInst &MI,
-                                   uint64_t AvailableFeatures) const;
+  FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+  void
+  verifyInstructionPredicates(const MCInst &MI,
+                              const FeatureBitset &AvailableFeatures) const;
 
 };
 
diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
index 1c99a708e5ac..a4809af29daa 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
@@ -1,9 +1,8 @@
 //===-- R600MCTargetDesc.cpp - R600 Target Descriptions -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index 36913bd04274..f8ec3c36f019 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
 //===-- SIMCCodeEmitter.cpp - SI Code Emitter -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,9 +13,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
+#include "AMDGPURegisterInfo.h"
 #include "MCTargetDesc/AMDGPUFixupKinds.h"
 #include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
@@ -77,6 +78,10 @@ public:
   unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
                                   SmallVectorImpl<MCFixup> &Fixups,
                                   const MCSubtargetInfo &STI) const override;
+
+  unsigned getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
+                                SmallVectorImpl<MCFixup> &Fixups,
+                                const MCSubtargetInfo &STI) const override;
 };
 
 } // end anonymous namespace
@@ -233,6 +238,8 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
   case AMDGPU::OPERAND_REG_IMM_FP32:
   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
     return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
 
   case AMDGPU::OPERAND_REG_IMM_INT64:
@@ -245,12 +252,21 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
   case AMDGPU::OPERAND_REG_IMM_FP16:
   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
     // FIXME Is this correct? What do inline immediates do on SI for f16 src
     // which does not have f16 support?
     return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
 
+  case AMDGPU::OPERAND_REG_IMM_V2INT16:
+  case AMDGPU::OPERAND_REG_IMM_V2FP16:
+    if (!isUInt<16>(Imm) && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal])
+      return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
+    LLVM_FALLTHROUGH;
   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
-  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
+  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
     uint16_t Lo16 = static_cast<uint16_t>(Imm);
     uint32_t Encoding = getLit16Encoding(Lo16, STI);
     return Encoding;
@@ -274,7 +290,25 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
   }
 
-  if (bytes > 4)
+  // NSA encoding.
+  if (AMDGPU::isGFX10(STI) && Desc.TSFlags & SIInstrFlags::MIMG) {
+    int vaddr0 = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+                                            AMDGPU::OpName::vaddr0);
+    int srsrc = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+                                           AMDGPU::OpName::srsrc);
+    assert(vaddr0 >= 0 && srsrc > vaddr0);
+    unsigned NumExtraAddrs = srsrc - vaddr0 - 1;
+    unsigned NumPadding = (-NumExtraAddrs) & 3;
+
+    for (unsigned i = 0; i < NumExtraAddrs; ++i)
+      OS.write((uint8_t)getMachineOpValue(MI, MI.getOperand(vaddr0 + 1 + i),
+                                          Fixups, STI));
+    for (unsigned i = 0; i < NumPadding; ++i)
+      OS.write(0);
+  }
+
+  if ((bytes > 8 && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) ||
+      (bytes > 4 && !STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]))
     return;
 
   // Check for additional literals in SRC0/1/2 (Op 1/2/3)
@@ -366,7 +400,7 @@ SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
   const MCOperand &MO = MI.getOperand(OpNo);
 
   unsigned Reg = MO.getReg();
-  if (Reg != AMDGPU::VCC) {
+  if (Reg != AMDGPU::VCC && Reg != AMDGPU::VCC_LO) {
     RegEnc |= MRI.getEncodingValue(Reg);
     RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
     RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK;
@@ -374,10 +408,31 @@ SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
   return RegEnc;
 }
 
+unsigned
+SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
+                                      SmallVectorImpl<MCFixup> &Fixups,
+                                      const MCSubtargetInfo &STI) const {
+  unsigned Reg = MI.getOperand(OpNo).getReg();
+  uint64_t Enc = MRI.getEncodingValue(Reg);
+
+  // VGPR and AGPR have the same encoding, but SrcA and SrcB operands of mfma
+  // instructions use acc[0:1] modifier bits to distinguish. These bits are
+  // encoded as a virtual 9th bit of the register for these operands.
+  if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Reg) ||
+      MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg))
+    Enc |= 512;
+
+  return Enc;
+}
+
 static bool needsPCRel(const MCExpr *Expr) {
   switch (Expr->getKind()) {
-  case MCExpr::SymbolRef:
-    return true;
+  case MCExpr::SymbolRef: {
+    auto *SE = cast<MCSymbolRefExpr>(Expr);
+    MCSymbolRefExpr::VariantKind Kind = SE->getKind();
+    return Kind != MCSymbolRefExpr::VK_AMDGPU_ABS32_LO &&
+           Kind != MCSymbolRefExpr::VK_AMDGPU_ABS32_HI;
+  }
   case MCExpr::Binary: {
     auto *BE = cast<MCBinaryExpr>(Expr);
     if (BE->getOpcode() == MCBinaryExpr::Sub)
@@ -416,7 +471,13 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
       Kind = FK_PCRel_4;
     else
       Kind = FK_Data_4;
-    Fixups.push_back(MCFixup::create(4, MO.getExpr(), Kind, MI.getLoc()));
+
+    const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+    uint32_t Offset = Desc.getSize();
+    assert(Offset == 4 || Offset == 8);
+
+    Fixups.push_back(
+      MCFixup::create(Offset, MO.getExpr(), Kind, MI.getLoc()));
   }
 
   // Figure out the operand number, needed for isSrcOperand check
@@ -429,7 +490,8 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
   const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
   if (AMDGPU::isSISrcOperand(Desc, OpNo)) {
     uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI);
-    if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4))
+    if (Enc != ~0U &&
+        (Enc != 255 || Desc.getSize() == 4 || Desc.getSize() == 8))
       return Enc;
 
   } else if (MO.isImm())
diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td
index 1c68dbd78e75..4735e6cb2446 100644
--- a/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/lib/Target/AMDGPU/MIMGInstructions.td
@@ -1,9 +1,8 @@
 //===-- MIMGInstructions.td - MIMG Instruction Defintions -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -12,10 +11,14 @@
 //
 // - MIMGEncGfx6: encoding introduced with gfx6 (obsoleted for atomics in gfx8)
 // - MIMGEncGfx8: encoding introduced with gfx8 for atomics
+// - MIMGEncGfx10Default: gfx default (non-NSA) encoding
+// - MIMGEncGfx10NSA: gfx10 NSA encoding
 class MIMGEncoding;
 
 def MIMGEncGfx6 : MIMGEncoding;
 def MIMGEncGfx8 : MIMGEncoding;
+def MIMGEncGfx10Default : MIMGEncoding;
+def MIMGEncGfx10NSA : MIMGEncoding;
 
 def MIMGEncoding : GenericEnum {
   let FilterClass = "MIMGEncoding";
@@ -60,13 +63,28 @@ def MIMGDim : GenericEnum {
 def MIMGDimInfoTable : GenericTable {
   let FilterClass = "AMDGPUDimProps";
   let CppTypeName = "MIMGDimInfo";
-  let Fields = ["Dim", "NumCoords", "NumGradients", "DA"];
+  let Fields = ["Dim", "NumCoords", "NumGradients", "DA", "Encoding", "AsmSuffix"];
   GenericEnum TypeOf_Dim = MIMGDim;
 
   let PrimaryKey = ["Dim"];
   let PrimaryKeyName = "getMIMGDimInfo";
 }
 
+def getMIMGDimInfoByEncoding : SearchIndex {
+  let Table = MIMGDimInfoTable;
+  let Key = ["Encoding"];
+}
+
+def getMIMGDimInfoByAsmSuffix : SearchIndex {
+  let Table = MIMGDimInfoTable;
+  let Key = ["AsmSuffix"];
+}
+
+class mimg <bits<8> si_gfx10, bits<8> vi = si_gfx10> {
+  field bits<8> SI_GFX10 = si_gfx10;
+  field bits<8> VI = vi;
+}
+
 class MIMGLZMapping<MIMGBaseOpcode l, MIMGBaseOpcode lz> {
   MIMGBaseOpcode L = l;
   MIMGBaseOpcode LZ = lz;
@@ -83,12 +101,23 @@ def MIMGLZMappingTable : GenericTable {
   let PrimaryKeyName = "getMIMGLZMappingInfo";
 }
 
-class mimg <bits<7> si, bits<7> vi = si> {
-  field bits<7> SI = si;
-  field bits<7> VI = vi;
+class MIMGMIPMapping<MIMGBaseOpcode mip, MIMGBaseOpcode nonmip> {
+  MIMGBaseOpcode MIP = mip;
+  MIMGBaseOpcode NONMIP = nonmip;
 }
 
-class MIMG <dag outs, string dns = "">
+def MIMGMIPMappingTable : GenericTable {
+  let FilterClass = "MIMGMIPMapping";
+  let CppTypeName = "MIMGMIPMappingInfo";
+  let Fields = ["MIP", "NONMIP"];
+  GenericEnum TypeOf_MIP = MIMGBaseOpcode;
+  GenericEnum TypeOf_NONMIP = MIMGBaseOpcode;
+
+  let PrimaryKey = ["MIP"];
+  let PrimaryKeyName = "getMIMGMIPMappingInfo";
+}
+
+class MIMG_Base <dag outs, string dns = "">
   : InstSI <outs, (ins), "", []> {
 
   let VM_CNT = 1;
@@ -97,20 +126,24 @@ class MIMG <dag outs, string dns = "">
   let Uses = [EXEC];
   let mayLoad = 1;
   let mayStore = 0;
-  let hasPostISelHook = 1;
   let SchedRW = [WriteVMEM];
   let UseNamedOperandTable = 1;
   let hasSideEffects = 0; // XXX ????
 
-  let SubtargetPredicate = isGCN;
   let DecoderNamespace = dns;
   let isAsmParserOnly = !if(!eq(dns,""), 1, 0);
-  let AsmMatchConverter = "cvtMIMG";
   let usesCustomInserter = 1;
+}
+
+class MIMG <dag outs, string dns = "">
+  : MIMG_Base <outs, dns> {
+
+  let hasPostISelHook = 1;
+  let AsmMatchConverter = "cvtMIMG";
 
   Instruction Opcode = !cast<Instruction>(NAME);
   MIMGBaseOpcode BaseOpcode;
-  MIMGEncoding MIMGEncoding = MIMGEncGfx6;
+  MIMGEncoding MIMGEncoding;
   bits<8> VDataDwords;
   bits<8> VAddrDwords;
 }
@@ -131,15 +164,66 @@ def getMIMGInfo : SearchIndex {
   let Key = ["Opcode"];
 }
 
-class MIMG_NoSampler_Helper <bits<7> op, string asm,
+// This is a separate class so that TableGen memoizes the computations.
+class MIMGNSAHelper<int num_addrs> {
+  list<string> AddrAsmNames =
+    !foldl([]<string>, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], lhs, i,
+           !if(!lt(i, num_addrs), !listconcat(lhs, ["vaddr"#!size(lhs)]), lhs));
+  dag AddrIns = !dag(ins, !foreach(arg, AddrAsmNames, VGPR_32), AddrAsmNames);
+  string AddrAsm = "[" # !foldl("$" # !head(AddrAsmNames), !tail(AddrAsmNames), lhs, rhs,
+                                lhs # ", $" # rhs) # "]";
+
+  int NSA = !if(!le(num_addrs, 1), ?,
+            !if(!le(num_addrs, 5), 1,
+            !if(!le(num_addrs, 9), 2,
+            !if(!le(num_addrs, 13), 3, ?))));
+}
+
+// Base class of all pre-gfx10 MIMG instructions.
+class MIMG_gfx6789<bits<8> op, dag outs, string dns = "">
+  : MIMG<outs, dns>, MIMGe_gfx6789<op> {
+  let SubtargetPredicate = isGFX6GFX7GFX8GFX9;
+  let AssemblerPredicates = [isGFX6GFX7GFX8GFX9];
+
+  let MIMGEncoding = MIMGEncGfx6;
+
+  let d16 = !if(BaseOpcode.HasD16, ?, 0);
+}
+
+// Base class of all non-NSA gfx10 MIMG instructions.
+class MIMG_gfx10<int op, dag outs, string dns = "">
+  : MIMG<outs, dns>, MIMGe_gfx10<op> {
+  let SubtargetPredicate = isGFX10Plus;
+  let AssemblerPredicates = [isGFX10Plus];
+
+  let MIMGEncoding = MIMGEncGfx10Default;
+
+  let d16 = !if(BaseOpcode.HasD16, ?, 0);
+  let nsa = 0;
+}
+
+// Base class for all NSA MIMG instructions. Note that 1-dword addresses always
+// use non-NSA variants.
+class MIMG_nsa_gfx10<int op, dag outs, int num_addrs, string dns="">
+  : MIMG<outs, dns>, MIMGe_gfx10<op> {
+  let SubtargetPredicate = isGFX10Plus;
+  let AssemblerPredicates = [isGFX10Plus];
+
+  let MIMGEncoding = MIMGEncGfx10NSA;
+
+  MIMGNSAHelper nsah = MIMGNSAHelper<num_addrs>;
+  dag AddrIns = nsah.AddrIns;
+  string AddrAsm = nsah.AddrAsm;
+
+  let d16 = !if(BaseOpcode.HasD16, ?, 0);
+  let nsa = nsah.NSA;
+}
+
+class MIMG_NoSampler_Helper <bits<8> op, string asm,
                              RegisterClass dst_rc,
                              RegisterClass addr_rc,
                              string dns="">
-  : MIMG <(outs dst_rc:$vdata), dns>,
-    MIMGe<op> {
-  let ssamp = 0;
-  let d16 = !if(BaseOpcode.HasD16, ?, 0);
-
+  : MIMG_gfx6789 <op, (outs dst_rc:$vdata), dns> {
   let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
                                 DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
                                 R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
@@ -148,23 +232,66 @@ class MIMG_NoSampler_Helper <bits<7> op, string asm,
                       #!if(BaseOpcode.HasD16, "$d16", "");
 }
 
-multiclass MIMG_NoSampler_Src_Helper <bits<7> op, string asm,
+class MIMG_NoSampler_gfx10<int op, string opcode,
+                           RegisterClass DataRC, RegisterClass AddrRC,
+                           string dns="">
+  : MIMG_gfx10<op, (outs DataRC:$vdata), dns> {
+  let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask,
+                                Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
+                                SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+                           !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+  let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+                    #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+class MIMG_NoSampler_nsa_gfx10<int op, string opcode,
+                               RegisterClass DataRC, int num_addrs,
+                               string dns="">
+  : MIMG_nsa_gfx10<op, (outs DataRC:$vdata), num_addrs, dns> {
+  let InOperandList = !con(AddrIns,
+                           (ins SReg_256:$srsrc, DMask:$dmask,
+                                Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
+                                SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+                           !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+  let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+                    #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+multiclass MIMG_NoSampler_Src_Helper <bits<8> op, string asm,
                                              RegisterClass dst_rc,
                                              bit enableDisasm> {
-  let VAddrDwords = 1 in
-  def NAME # _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32,
-                                         !if(enableDisasm, "AMDGPU", "")>;
-  let VAddrDwords = 2 in
-  def NAME # _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64>;
-  let VAddrDwords = 3 in
-  def NAME # _V3 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_96>;
-  let VAddrDwords = 4 in
-  def NAME # _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128>;
-}
-
-multiclass MIMG_NoSampler <bits<7> op, string asm, bit has_d16, bit mip = 0,
+  let ssamp = 0 in {
+    let VAddrDwords = 1 in {
+      def _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32,
+                                       !if(enableDisasm, "AMDGPU", "")>;
+      def _V1_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VGPR_32,
+                                           !if(enableDisasm, "AMDGPU", "")>;
+    }
+
+    let VAddrDwords = 2 in {
+      def _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64>;
+      def _V2_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VReg_64>;
+      def _V2_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10<op, asm, dst_rc, 2>;
+    }
+
+    let VAddrDwords = 3 in {
+      def _V3 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_96>;
+      def _V3_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VReg_96>;
+      def _V3_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10<op, asm, dst_rc, 3>;
+    }
+
+    let VAddrDwords = 4 in {
+      def _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128>;
+      def _V4_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VReg_128>;
+      def _V4_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10<op, asm, dst_rc, 4,
+                                                   !if(enableDisasm, "AMDGPU", "")>;
+    }
+  }
+}
+
+multiclass MIMG_NoSampler <bits<8> op, string asm, bit has_d16, bit mip = 0,
                            bit isResInfo = 0> {
-  def "" : MIMGBaseOpcode {
+  def "" : MIMGBaseOpcode, PredicateControl {
     let Coordinates = !if(isResInfo, 0, 1);
     let LodOrClampOrMip = mip;
     let HasD16 = has_d16;
@@ -180,26 +307,16 @@ multiclass MIMG_NoSampler <bits<7> op, string asm, bit has_d16, bit mip = 0,
     defm _V3 : MIMG_NoSampler_Src_Helper <op, asm, VReg_96, 0>;
     let VDataDwords = 4 in
     defm _V4 : MIMG_NoSampler_Src_Helper <op, asm, VReg_128, 0>;
-    let VDataDwords = 8 in
-    defm _V8 : MIMG_NoSampler_Src_Helper <op, asm, VReg_256, 0>;
+    let VDataDwords = 5 in
+    defm _V5 : MIMG_NoSampler_Src_Helper <op, asm, VReg_160, 0>;
   }
 }
 
-class MIMG_Store_Helper <bits<7> op, string asm,
+class MIMG_Store_Helper <bits<8> op, string asm,
                          RegisterClass data_rc,
                          RegisterClass addr_rc,
                          string dns = "">
-  : MIMG <(outs), dns>,
-    MIMGe<op> {
-  let ssamp = 0;
-  let d16 = !if(BaseOpcode.HasD16, ?, 0);
-
-  let mayLoad = 0;
-  let mayStore = 1;
-  let hasSideEffects = 0;
-  let hasPostISelHook = 0;
-  let DisableWQM = 1;
-
+  : MIMG_gfx6789<op, (outs), dns> {
   let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
                                 DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
                                 R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
@@ -208,21 +325,63 @@ class MIMG_Store_Helper <bits<7> op, string asm,
                       #!if(BaseOpcode.HasD16, "$d16", "");
 }
 
-multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm,
+class MIMG_Store_gfx10<int op, string opcode,
+                       RegisterClass DataRC, RegisterClass AddrRC,
+                       string dns="">
+  : MIMG_gfx10<op, (outs), dns> {
+  let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
+                                DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
+                                GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+                           !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+  let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+                    #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+class MIMG_Store_nsa_gfx10<int op, string opcode,
+                           RegisterClass DataRC, int num_addrs,
+                           string dns="">
+  : MIMG_nsa_gfx10<op, (outs), num_addrs, dns> {
+  let InOperandList = !con((ins DataRC:$vdata),
+                           AddrIns,
+                           (ins SReg_256:$srsrc, DMask:$dmask,
+                                Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
+                                SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+                           !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+  let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+                    #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+multiclass MIMG_Store_Addr_Helper <int op, string asm,
                                   RegisterClass data_rc,
                                   bit enableDisasm> {
-  let VAddrDwords = 1 in
-  def NAME # _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32,
-                                      !if(enableDisasm, "AMDGPU", "")>;
-  let VAddrDwords = 2 in
-  def NAME # _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64>;
-  let VAddrDwords = 3 in
-  def NAME # _V3 : MIMG_Store_Helper <op, asm, data_rc, VReg_96>;
-  let VAddrDwords = 4 in
-  def NAME # _V4 : MIMG_Store_Helper <op, asm, data_rc, VReg_128>;
-}
-
-multiclass MIMG_Store <bits<7> op, string asm, bit has_d16, bit mip = 0> {
+  let mayLoad = 0, mayStore = 1, hasSideEffects = 0, hasPostISelHook = 0,
+      DisableWQM = 1, ssamp = 0 in {
+    let VAddrDwords = 1 in {
+      def _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32,
+                                   !if(enableDisasm, "AMDGPU", "")>;
+      def _V1_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VGPR_32,
+                                        !if(enableDisasm, "AMDGPU", "")>;
+    }
+    let VAddrDwords = 2 in {
+      def _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64>;
+      def _V2_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_64>;
+      def _V2_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 2>;
+    }
+    let VAddrDwords = 3 in {
+      def _V3 : MIMG_Store_Helper <op, asm, data_rc, VReg_96>;
+      def _V3_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_96>;
+      def _V3_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 3>;
+    }
+    let VAddrDwords = 4 in {
+      def _V4 : MIMG_Store_Helper <op, asm, data_rc, VReg_128>;
+      def _V4_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_128>;
+      def _V4_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 4,
+                                                       !if(enableDisasm, "AMDGPU", "")>;
+    }
+  }
+}
+
+multiclass MIMG_Store <bits<8> op, string asm, bit has_d16, bit mip = 0> {
   def "" : MIMGBaseOpcode {
     let Store = 1;
     let LodOrClampOrMip = mip;
@@ -241,15 +400,9 @@ multiclass MIMG_Store <bits<7> op, string asm, bit has_d16, bit mip = 0> {
   }
 }
 
-class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
-                          RegisterClass addr_rc, string dns="",
-                          bit enableDasm = 0>
-  : MIMG <(outs data_rc:$vdst), !if(enableDasm, dns, "")> {
-  let mayLoad = 1;
-  let mayStore = 1;
-  let hasSideEffects = 1; // FIXME: Remove this
-  let hasPostISelHook = 0;
-  let DisableWQM = 1;
+class MIMG_Atomic_gfx6789_base <bits<8> op, string asm, RegisterClass data_rc,
+                                RegisterClass addr_rc, string dns="">
+  : MIMG_gfx6789 <op, (outs data_rc:$vdst), dns> {
   let Constraints = "$vdst = $vdata";
   let AsmMatchConverter = "cvtMIMGAtomic";
 
@@ -259,39 +412,80 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
   let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da";
 }
 
-multiclass MIMG_Atomic_Helper_m <mimg op, string asm, RegisterClass data_rc,
-                                 RegisterClass addr_rc, bit enableDasm = 0> {
-  let ssamp = 0, d16 = 0 in {
-    def _si : MIMG_Atomic_Helper<asm, data_rc, addr_rc, "SICI", enableDasm>,
-              SIMCInstr<NAME, SIEncodingFamily.SI>,
-              MIMGe<op.SI> {
-      let AssemblerPredicates = [isSICI];
-      let DisableDecoder = DisableSIDecoder;
-    }
+class MIMG_Atomic_si<mimg op, string asm, RegisterClass data_rc,
+                     RegisterClass addr_rc, bit enableDasm = 0>
+  : MIMG_Atomic_gfx6789_base<op.SI_GFX10, asm, data_rc, addr_rc,
+                             !if(enableDasm, "GFX6GFX7", "")> {
+  let AssemblerPredicates = [isGFX6GFX7];
+}
 
-    def _vi : MIMG_Atomic_Helper<asm, data_rc, addr_rc, "VI", enableDasm>,
-              SIMCInstr<NAME, SIEncodingFamily.VI>,
-              MIMGe<op.VI> {
-      let AssemblerPredicates = [isVI];
-      let DisableDecoder = DisableVIDecoder;
-      let MIMGEncoding = MIMGEncGfx8;
-    }
-  }
+class MIMG_Atomic_vi<mimg op, string asm, RegisterClass data_rc,
+                     RegisterClass addr_rc, bit enableDasm = 0>
+  : MIMG_Atomic_gfx6789_base<op.VI, asm, data_rc, addr_rc, !if(enableDasm, "GFX8", "")> {
+  let AssemblerPredicates = [isGFX8GFX9];
+  let MIMGEncoding = MIMGEncGfx8;
+}
+
+class MIMG_Atomic_gfx10<mimg op, string opcode,
+                        RegisterClass DataRC, RegisterClass AddrRC,
+                        bit enableDisasm = 0>
+  : MIMG_gfx10<!cast<int>(op.SI_GFX10), (outs DataRC:$vdst),
+               !if(enableDisasm, "AMDGPU", "")> {
+  let Constraints = "$vdst = $vdata";
+  let AsmMatchConverter = "cvtMIMGAtomic";
+
+  let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
+                           DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
+                           GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe);
+  let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe";
+}
+
+class MIMG_Atomic_nsa_gfx10<mimg op, string opcode,
+                            RegisterClass DataRC, int num_addrs,
+                            bit enableDisasm = 0>
+  : MIMG_nsa_gfx10<!cast<int>(op.SI_GFX10), (outs DataRC:$vdst), num_addrs,
+                   !if(enableDisasm, "AMDGPU", "")> {
+  let Constraints = "$vdst = $vdata";
+  let AsmMatchConverter = "cvtMIMGAtomic";
+
+  let InOperandList = !con((ins DataRC:$vdata),
+                           AddrIns,
+                           (ins SReg_256:$srsrc, DMask:$dmask,
+                                Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
+                                SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe));
+  let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe";
 }
 
 multiclass MIMG_Atomic_Addr_Helper_m <mimg op, string asm,
                                       RegisterClass data_rc,
                                       bit enableDasm = 0> {
-  // _V* variants have different address size, but the size is not encoded.
-  // So only one variant can be disassembled. V1 looks the safest to decode.
-  let VAddrDwords = 1 in
-  defm _V1 : MIMG_Atomic_Helper_m <op, asm, data_rc, VGPR_32, enableDasm>;
-  let VAddrDwords = 2 in
-  defm _V2 : MIMG_Atomic_Helper_m <op, asm, data_rc, VReg_64>;
-  let VAddrDwords = 3 in
-  defm _V3 : MIMG_Atomic_Helper_m <op, asm, data_rc, VReg_96>;
-  let VAddrDwords = 4 in
-  defm _V4 : MIMG_Atomic_Helper_m <op, asm, data_rc, VReg_128>;
+  let hasSideEffects = 1, // FIXME: remove this
+      mayLoad = 1, mayStore = 1, hasPostISelHook = 0, DisableWQM = 1,
+      ssamp = 0 in {
+    let VAddrDwords = 1 in {
+      def _V1_si : MIMG_Atomic_si <op, asm, data_rc, VGPR_32, enableDasm>;
+      def _V1_vi : MIMG_Atomic_vi <op, asm, data_rc, VGPR_32, enableDasm>;
+      def _V1_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VGPR_32, enableDasm>;
+    }
+    let VAddrDwords = 2 in {
+      def _V2_si : MIMG_Atomic_si <op, asm, data_rc, VReg_64, 0>;
+      def _V2_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_64, 0>;
+      def _V2_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_64, 0>;
+      def _V2_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 2, 0>;
+    }
+    let VAddrDwords = 3 in {
+      def _V3_si : MIMG_Atomic_si <op, asm, data_rc, VReg_96, 0>;
+      def _V3_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_96, 0>;
+      def _V3_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_96, 0>;
+      def _V3_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 3, 0>;
+    }
+    let VAddrDwords = 4 in {
+      def _V4_si : MIMG_Atomic_si <op, asm, data_rc, VReg_128, 0>;
+      def _V4_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_128, 0>;
+      def _V4_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_128, 0>;
+      def _V4_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 4, enableDasm>;
+    }
+  }
 }
 
 multiclass MIMG_Atomic <mimg op, string asm, bit isCmpSwap = 0> { // 64-bit atomics
@@ -311,12 +505,9 @@ multiclass MIMG_Atomic <mimg op, string asm, bit isCmpSwap = 0> { // 64-bit atom
   }
 }
 
-class MIMG_Sampler_Helper <bits<7> op, string asm, RegisterClass dst_rc,
+class MIMG_Sampler_Helper <bits<8> op, string asm, RegisterClass dst_rc,
                            RegisterClass src_rc, string dns="">
-  : MIMG <(outs dst_rc:$vdata), dns>,
-    MIMGe<op> {
-  let d16 = !if(BaseOpcode.HasD16, ?, 0);
-
+  : MIMG_gfx6789 <op, (outs dst_rc:$vdata), dns> {
   let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
                                 DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
                                 R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
@@ -325,6 +516,33 @@ class MIMG_Sampler_Helper <bits<7> op, string asm, RegisterClass dst_rc,
                       #!if(BaseOpcode.HasD16, "$d16", "");
 }
 
+class MIMG_Sampler_gfx10<int op, string opcode,
+                         RegisterClass DataRC, RegisterClass AddrRC,
+                         string dns="">
+  : MIMG_gfx10<op, (outs DataRC:$vdata), dns> {
+  let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp,
+                                DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
+                                GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+                           !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+  let AsmString = opcode#" $vdata, $vaddr0, $srsrc, $ssamp$dmask$dim$unorm"
+                    #"$dlc$glc$slc$r128$tfe$lwe"
+                    #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+class MIMG_Sampler_nsa_gfx10<int op, string opcode,
+                             RegisterClass DataRC, int num_addrs,
+                             string dns="">
+  : MIMG_nsa_gfx10<op, (outs DataRC:$vdata), num_addrs, dns> {
+  let InOperandList = !con(AddrIns,
+                           (ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask,
+                                Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
+                                SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+                           !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+  let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc, $ssamp$dmask$dim$unorm"
+                    #"$dlc$glc$slc$r128$tfe$lwe"
+                    #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
 class MIMGAddrSize<int dw, bit enable_disasm> {
   int NumWords = dw;
 
@@ -341,6 +559,11 @@ class MIMGAddrSize<int dw, bit enable_disasm> {
   bit Disassemble = enable_disasm;
 }
 
+// Return whether x is in lst.
+class isIntInList<int x, list<int> lst> {
+  bit ret = !foldl(0, lst, lhs, y, !or(lhs, !eq(x, y)));
+}
+
 // Return whether a value inside the range [min, max] (endpoints inclusive)
 // is in the given list.
 class isRangeInList<int min, int max, list<int> lst> {
@@ -376,16 +599,41 @@ class MIMG_Sampler_AddrSizes<AMDGPUSampleVariant sample> {
                   !listconcat(lhs.List, [MIMGAddrSize<dw, !empty(lhs.List)>]),
                   !if(!eq(dw, 3), 3, !add(dw, 1))>, // we still need _V4 for codegen w/ 3 dwords
                lhs)).List;
-}
 
-multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm,
+  // For NSA, generate machine instructions for all possible numbers of words
+  // except 1 (which is already covered by the non-NSA case).
+  // The disassembler defaults to the largest number of arguments among the
+  // variants with the same number of NSA words, and custom code then derives
+  // the exact variant based on the sample variant and the image dimension.
+  list<MIMGAddrSize> NSAInstrs =
+    !foldl([]<MIMGAddrSize>, [[12, 11, 10], [9, 8, 7, 6], [5, 4, 3, 2]], prev, nsa_group,
+           !listconcat(prev,
+                       !foldl([]<MIMGAddrSize>, nsa_group, lhs, dw,
+                              !if(isIntInList<dw, AllNumAddrWords>.ret,
+                                  !listconcat(lhs, [MIMGAddrSize<dw, !empty(lhs)>]),
+                                  lhs))));
+}
+
+multiclass MIMG_Sampler_Src_Helper <bits<8> op, string asm,
                                     AMDGPUSampleVariant sample, RegisterClass dst_rc,
                                     bit enableDisasm = 0> {
   foreach addr = MIMG_Sampler_AddrSizes<sample>.MachineInstrs in {
-    let VAddrDwords = addr.NumWords in
-    def _V # addr.NumWords
-      : MIMG_Sampler_Helper <op, asm, dst_rc, addr.RegClass,
-                             !if(!and(enableDisasm, addr.Disassemble), "AMDGPU", "")>;
+    let VAddrDwords = addr.NumWords in {
+      def _V # addr.NumWords
+        : MIMG_Sampler_Helper <op, asm, dst_rc, addr.RegClass,
+                               !if(!and(enableDisasm, addr.Disassemble), "AMDGPU", "")>;
+      def _V # addr.NumWords # _gfx10
+        : MIMG_Sampler_gfx10 <op, asm, dst_rc, addr.RegClass,
+                               !if(!and(enableDisasm, addr.Disassemble), "AMDGPU", "")>;
+    }
+  }
+
+  foreach addr = MIMG_Sampler_AddrSizes<sample>.NSAInstrs in {
+    let VAddrDwords = addr.NumWords in {
+      def _V # addr.NumWords # _nsa_gfx10
+        : MIMG_Sampler_nsa_gfx10<op, asm, dst_rc, addr.NumWords,
+                                 !if(!and(enableDisasm, addr.Disassemble), "AMDGPU", "")>;
+    }
   }
 }
 
@@ -397,7 +645,7 @@ class MIMG_Sampler_BaseOpcode<AMDGPUSampleVariant sample>
   let LodOrClampOrMip = !ne(sample.LodOrClamp, "");
 }
 
-multiclass MIMG_Sampler <bits<7> op, AMDGPUSampleVariant sample, bit wqm = 0,
+multiclass MIMG_Sampler <bits<8> op, AMDGPUSampleVariant sample, bit wqm = 0,
                          bit isGetLod = 0,
                          string asm = "image_sample"#sample.LowerCaseMod> {
   def "" : MIMG_Sampler_BaseOpcode<sample> {
@@ -414,15 +662,15 @@ multiclass MIMG_Sampler <bits<7> op, AMDGPUSampleVariant sample, bit wqm = 0,
     defm _V3 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_96>;
     let VDataDwords = 4 in
     defm _V4 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_128>;
-    let VDataDwords = 8 in
-    defm _V8 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_256>;
+    let VDataDwords = 5 in
+    defm _V5 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_160>;
   }
 }
 
-multiclass MIMG_Sampler_WQM <bits<7> op, AMDGPUSampleVariant sample>
+multiclass MIMG_Sampler_WQM <bits<8> op, AMDGPUSampleVariant sample>
     : MIMG_Sampler<op, sample, 1>;
 
-multiclass MIMG_Gather <bits<7> op, AMDGPUSampleVariant sample, bit wqm = 0,
+multiclass MIMG_Gather <bits<8> op, AMDGPUSampleVariant sample, bit wqm = 0,
                         string asm = "image_gather4"#sample.LowerCaseMod> {
   def "" : MIMG_Sampler_BaseOpcode<sample> {
     let HasD16 = 1;
@@ -435,12 +683,12 @@ multiclass MIMG_Gather <bits<7> op, AMDGPUSampleVariant sample, bit wqm = 0,
     defm _V2 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_64>; /* for packed D16 only */
     let VDataDwords = 4 in
     defm _V4 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_128, 1>;
-    let VDataDwords = 8 in
-    defm _V8 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_256>;
+    let VDataDwords = 5 in
+    defm _V5 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_160>;
   }
 }
 
-multiclass MIMG_Gather_WQM <bits<7> op, AMDGPUSampleVariant sample>
+multiclass MIMG_Gather_WQM <bits<8> op, AMDGPUSampleVariant sample>
     : MIMG_Gather<op, sample, 1>;
 
 //===----------------------------------------------------------------------===//
@@ -473,9 +721,11 @@ defm IMAGE_ATOMIC_OR : MIMG_Atomic <mimg<0x19>, "image_atomic_or">;
 defm IMAGE_ATOMIC_XOR : MIMG_Atomic <mimg<0x1a>, "image_atomic_xor">;
 defm IMAGE_ATOMIC_INC : MIMG_Atomic <mimg<0x1b>, "image_atomic_inc">;
 defm IMAGE_ATOMIC_DEC : MIMG_Atomic <mimg<0x1c>, "image_atomic_dec">;
+//let FPAtomic = 1 in {
 //def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d, 1>; -- not on VI
 //def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>; -- not on VI
 //def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>; -- not on VI
+//} // End let FPAtomic = 1
 defm IMAGE_SAMPLE           : MIMG_Sampler_WQM <0x00000020, AMDGPUSample>;
 defm IMAGE_SAMPLE_CL        : MIMG_Sampler_WQM <0x00000021, AMDGPUSample_cl>;
 defm IMAGE_SAMPLE_D         : MIMG_Sampler <0x00000022, AMDGPUSample_d>;
@@ -581,3 +831,7 @@ def : MIMGLZMapping<IMAGE_GATHER4_L, IMAGE_GATHER4_LZ>;
 def : MIMGLZMapping<IMAGE_GATHER4_C_L, IMAGE_GATHER4_C_LZ>;
 def : MIMGLZMapping<IMAGE_GATHER4_L_O, IMAGE_GATHER4_LZ_O>;
 def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>;
+
+// MIP to NONMIP Optimization Mapping
+def : MIMGMIPMapping<IMAGE_LOAD_MIP, IMAGE_LOAD>;
+def : MIMGMIPMapping<IMAGE_STORE_MIP, IMAGE_STORE>;
diff --git a/lib/Target/AMDGPU/R600.td b/lib/Target/AMDGPU/R600.td
index 5c9c1c1ed504..1d11da969474 100644
--- a/lib/Target/AMDGPU/R600.td
+++ b/lib/Target/AMDGPU/R600.td
@@ -1,9 +1,8 @@
 //===-- R600.td - R600 Tablegen files ----------------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AMDGPU/R600AsmPrinter.cpp b/lib/Target/AMDGPU/R600AsmPrinter.cpp
index 68f8c30775b8..3fb18862fca8 100644
--- a/lib/Target/AMDGPU/R600AsmPrinter.cpp
+++ b/lib/Target/AMDGPU/R600AsmPrinter.cpp
@@ -1,9 +1,8 @@
 //===-- R600AsmPrinter.cpp - R600 Assebly printer  ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/R600AsmPrinter.h b/lib/Target/AMDGPU/R600AsmPrinter.h
index 079fc707b03c..0da9526d716e 100644
--- a/lib/Target/AMDGPU/R600AsmPrinter.h
+++ b/lib/Target/AMDGPU/R600AsmPrinter.h
@@ -1,9 +1,8 @@
 //===-- R600AsmPrinter.h - Print R600 assembly code -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/lib/Target/AMDGPU/R600ClauseMergePass.cpp
index 0c62d6a4b3d9..290a960ae901 100644
--- a/lib/Target/AMDGPU/R600ClauseMergePass.cpp
+++ b/lib/Target/AMDGPU/R600ClauseMergePass.cpp
@@ -1,9 +1,8 @@
 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index a19020276f35..8098b81d1ea2 100644
--- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -1,9 +1,8 @@
 //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/R600Defines.h b/lib/Target/AMDGPU/R600Defines.h
index 0d33d82e8e0f..d72534908dcf 100644
--- a/lib/Target/AMDGPU/R600Defines.h
+++ b/lib/Target/AMDGPU/R600Defines.h
@@ -1,9 +1,8 @@
 //===-- R600Defines.h - R600 Helper Macros ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 /// \file
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
index 679cf18d2c20..b97e3c8b8dd7 100644
--- a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -1,9 +1,8 @@
 //===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
index b924ff019dd1..c6e8a060d8a0 100644
--- a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
+++ b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -1,9 +1,8 @@
 //===- R600ExpandSpecialInstrs.cpp - Expand special instructions ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/R600FrameLowering.cpp b/lib/Target/AMDGPU/R600FrameLowering.cpp
index 37787b3c5f72..d9aa9ebe878d 100644
--- a/lib/Target/AMDGPU/R600FrameLowering.cpp
+++ b/lib/Target/AMDGPU/R600FrameLowering.cpp
@@ -1,9 +1,8 @@
 //===----------------------- R600FrameLowering.cpp ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //==-----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AMDGPU/R600FrameLowering.h b/lib/Target/AMDGPU/R600FrameLowering.h
index fe367d73682f..950e238f4979 100644
--- a/lib/Target/AMDGPU/R600FrameLowering.h
+++ b/lib/Target/AMDGPU/R600FrameLowering.h
@@ -1,9 +1,8 @@
 //===--------------------- R600FrameLowering.h ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index e2a0f05d2b34..f80a53ba1dc6 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1,9 +1,8 @@
 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1240,11 +1239,13 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
 
   SDLoc DL(Op);
 
+  const bool TruncatingStore = StoreNode->isTruncatingStore();
+
   // Neither LOCAL nor PRIVATE can do vectors at the moment
-  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
+  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS ||
+       TruncatingStore) &&
       VT.isVector()) {
-    if ((AS == AMDGPUAS::PRIVATE_ADDRESS) &&
-         StoreNode->isTruncatingStore()) {
+    if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
       // Add an extra level of chain to isolate this vector
       SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
       // TODO: can the chain be replaced without creating a new store?
@@ -1260,7 +1261,8 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
 
   unsigned Align = StoreNode->getAlignment();
   if (Align < MemVT.getStoreSize() &&
-      !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
+      !allowsMisalignedMemoryAccesses(
+          MemVT, AS, Align, StoreNode->getMemOperand()->getFlags(), nullptr)) {
     return expandUnalignedStore(StoreNode, DAG);
   }
 
@@ -1270,7 +1272,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
     // It is beneficial to create MSKOR here instead of combiner to avoid
     // artificial dependencies introduced by RMW
-    if (StoreNode->isTruncatingStore()) {
+    if (TruncatingStore) {
       assert(VT.bitsLE(MVT::i32));
       SDValue MaskConstant;
       if (MemVT == MVT::i8) {
@@ -1310,8 +1312,8 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
       // Convert pointer from byte address to dword address.
       Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
 
-      if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
-        llvm_unreachable("Truncated and indexed stores not supported yet");
+      if (StoreNode->isIndexed()) {
+        llvm_unreachable("Indexed stores not supported yet");
       } else {
         Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
       }
@@ -1662,10 +1664,9 @@ bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
   return true;
 }
 
-bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
-                                                        unsigned AddrSpace,
-                                                        unsigned Align,
-                                                        bool *IsFast) const {
+bool R600TargetLowering::allowsMisalignedMemoryAccesses(
+    EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
+    bool *IsFast) const {
   if (IsFast)
     *IsFast = false;
 
@@ -1713,6 +1714,12 @@ static SDValue CompactSwizzlableVector(
 
     if (NewBldVec[i].isUndef())
       continue;
+    // Fix spurious warning with gcc 7.3 -O3
+    //    warning: array subscript is above array bounds [-Warray-bounds]
+    //    if (NewBldVec[i] == NewBldVec[j]) {
+    //        ~~~~~~~~~~~^
+    if (i >= 4)
+      continue;
     for (unsigned j = 0; j < i; j++) {
       if (NewBldVec[i] == NewBldVec[j]) {
         NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h
index 767c3c7bd5bf..b560da8e91d9 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/lib/Target/AMDGPU/R600ISelLowering.h
@@ -1,9 +1,8 @@
 //===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -50,9 +49,10 @@ public:
   bool canMergeStoresTo(unsigned AS, EVT MemVT,
                         const SelectionDAG &DAG) const override;
 
-  bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
-                                      unsigned Align,
-                                      bool *IsFast) const override;
+  bool allowsMisalignedMemoryAccesses(
+      EVT VT, unsigned AS, unsigned Align,
+      MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+      bool *IsFast = nullptr) const override;
 
 private:
   unsigned Gen;
diff --git a/lib/Target/AMDGPU/R600InstrFormats.td b/lib/Target/AMDGPU/R600InstrFormats.td
index 687a9affa138..f62e6313b148 100644
--- a/lib/Target/AMDGPU/R600InstrFormats.td
+++ b/lib/Target/AMDGPU/R600InstrFormats.td
@@ -1,9 +1,8 @@
 //===-- R600InstrFormats.td - R600 Instruction Encodings ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp
index 9cc3e5f3c314..d9e839fe2035 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -1,9 +1,8 @@
 //===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -402,6 +401,7 @@ Swizzle(std::vector<std::pair<int, unsigned>> Src,
 }
 
 static unsigned getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
+  assert(Op < 3 && "Out of range swizzle index");
   switch (Swz) {
   case R600InstrInfo::ALU_VEC_012_SCL_210: {
     unsigned Cycles[3] = { 2, 1, 0};
diff --git a/lib/Target/AMDGPU/R600InstrInfo.h b/lib/Target/AMDGPU/R600InstrInfo.h
index e6e34dc125f4..00d96c9676aa 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/lib/Target/AMDGPU/R600InstrInfo.h
@@ -1,9 +1,8 @@
 //===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index 10e873755222..f40eece859ee 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -1,9 +1,8 @@
 //===-- R600Instructions.td - R600 Instruction defs  -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -296,6 +295,34 @@ class VTX_READ <string name, dag outs, list<dag> pattern>
   let VTXInst = 1;
 }
 
+// FIXME: Deprecated.
+class LocalLoad <SDPatternOperator op> : LoadFrag <op>, LocalAddress;
+
+class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
+                                              (ld_node node:$ptr), [{
+  LoadSDNode *L = cast<LoadSDNode>(N);
+  return L->getExtensionType() == ISD::ZEXTLOAD ||
+         L->getExtensionType() == ISD::EXTLOAD;
+}]>;
+
+def az_extload : AZExtLoadBase <unindexedload>;
+
+def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+// FIXME: These are deprecated
+def az_extloadi8_local : LocalLoad <az_extloadi8>;
+def az_extloadi16_local : LocalLoad <az_extloadi16>;
+
 class LoadParamFrag <PatFrag load_type> : PatFrag <
   (ops node:$ptr), (load_type node:$ptr),
   [{ return isConstantLoad(cast<LoadSDNode>(N), 0) ||
diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
index 3ca319c6c6c2..65011a9eadf8 100644
--- a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
 //===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 /// \file
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.h b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
index 29ac0920f997..6a5ac9023329 100644
--- a/lib/Target/AMDGPU/R600MachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
@@ -1,9 +1,8 @@
 //===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp
index 7769a35aadce..34267a909b5e 100644
--- a/lib/Target/AMDGPU/R600MachineScheduler.cpp
+++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -1,9 +1,8 @@
 //===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/R600MachineScheduler.h b/lib/Target/AMDGPU/R600MachineScheduler.h
index 8a9a8d3d1e23..bc66f2ef5907 100644
--- a/lib/Target/AMDGPU/R600MachineScheduler.h
+++ b/lib/Target/AMDGPU/R600MachineScheduler.h
@@ -1,9 +1,8 @@
 //===-- R600MachineScheduler.h - R600 Scheduler Interface -*- C++ -*-------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp b/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
index 7de5e2c9577d..1fe92d2269d3 100644
--- a/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
+++ b/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
@@ -1,9 +1,8 @@
 //===- R600OpenCLImageTypeLoweringPass.cpp ------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
index 692451cb8fe0..9f1cb6582b5c 100644
--- a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -1,9 +1,8 @@
 //===- R600MergeVectorRegisters.cpp ---------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -57,17 +56,12 @@ using namespace llvm;
 
 #define DEBUG_TYPE "vec-merger"
 
-static bool
-isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
-  for (MachineRegisterInfo::def_instr_iterator It = MRI.def_instr_begin(Reg),
-      E = MRI.def_instr_end(); It != E; ++It) {
-    return (*It).isImplicitDef();
-  }
-  if (MRI.isReserved(Reg)) {
+static bool isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
+  assert(MRI.isSSA());
+  if (TargetRegisterInfo::isPhysicalRegister(Reg))
     return false;
-  }
-  llvm_unreachable("Reg without a def");
-  return false;
+  const MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
+  return MI && MI->isImplicitDef();
 }
 
 namespace {
diff --git a/lib/Target/AMDGPU/R600Packetizer.cpp b/lib/Target/AMDGPU/R600Packetizer.cpp
index 612c62b514fd..df200baf11c1 100644
--- a/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -1,9 +1,8 @@
 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -187,8 +186,8 @@ public:
     // Does MII and MIJ share the same pred_sel ?
     int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel),
         OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel);
-    unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
-        PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
+    Register PredI = (OpI > -1)?MII->getOperand(OpI).getReg() : Register(),
+      PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg() : Register();
     if (PredI != PredJ)
       return false;
     if (SUJ->isSucc(SUI)) {
diff --git a/lib/Target/AMDGPU/R600Processors.td b/lib/Target/AMDGPU/R600Processors.td
index f39b3dc1bfd4..fff884e4848e 100644
--- a/lib/Target/AMDGPU/R600Processors.td
+++ b/lib/Target/AMDGPU/R600Processors.td
@@ -1,9 +1,8 @@
 //===-- R600Processors.td - R600 Processor definitions --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -41,23 +40,24 @@ def FeatureCFALUBug : SubtargetFeature<"cfalubug",
   "GPU has CF_ALU bug"
 >;
 
-class R600SubtargetFeatureGeneration <string Value,
+class R600SubtargetFeatureGeneration <string Value, string FeatureName,
                                   list<SubtargetFeature> Implies> :
-        SubtargetFeatureGeneration <Value, "R600Subtarget", Implies>;
+        SubtargetFeatureGeneration <Value, FeatureName, "R600Subtarget", Implies>;
 
-def FeatureR600 : R600SubtargetFeatureGeneration<"R600",
+def FeatureR600 : R600SubtargetFeatureGeneration<"R600", "r600",
   [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
 >;
 
-def FeatureR700 : R600SubtargetFeatureGeneration<"R700",
+def FeatureR700 : R600SubtargetFeatureGeneration<"R700", "r700",
   [FeatureFetchLimit16, FeatureLocalMemorySize0]
 >;
 
-def FeatureEvergreen : R600SubtargetFeatureGeneration<"EVERGREEN",
+def FeatureEvergreen : R600SubtargetFeatureGeneration<"EVERGREEN", "evergreen",
   [FeatureFetchLimit16, FeatureLocalMemorySize32768]
 >;
 
 def FeatureNorthernIslands : R600SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
+  "northern-islands",
   [FeatureFetchLimit16, FeatureWavefrontSize64,
    FeatureLocalMemorySize32768]
 >;
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.cpp b/lib/Target/AMDGPU/R600RegisterInfo.cpp
index 38933e7616a0..685df74490fe 100644
--- a/lib/Target/AMDGPU/R600RegisterInfo.cpp
+++ b/lib/Target/AMDGPU/R600RegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -68,7 +67,7 @@ const MCPhysReg *R600RegisterInfo::getCalleeSavedRegs(
   return &CalleeSavedReg;
 }
 
-unsigned R600RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register R600RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   return R600::NoRegister;
 }
 
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.h b/lib/Target/AMDGPU/R600RegisterInfo.h
index c4c77172b299..9378b70ca580 100644
--- a/lib/Target/AMDGPU/R600RegisterInfo.h
+++ b/lib/Target/AMDGPU/R600RegisterInfo.h
@@ -1,9 +1,8 @@
 //===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -27,7 +26,7 @@ struct R600RegisterInfo final : public R600GenRegisterInfo {
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
+  Register getFrameRegister(const MachineFunction &MF) const override;
 
   /// get the HW encoding for a register's channel.
   unsigned getHWRegChan(unsigned reg) const;
diff --git a/lib/Target/AMDGPU/R600Schedule.td b/lib/Target/AMDGPU/R600Schedule.td
index 70fb46c1a7d6..c998fe848193 100644
--- a/lib/Target/AMDGPU/R600Schedule.td
+++ b/lib/Target/AMDGPU/R600Schedule.td
@@ -1,9 +1,8 @@
 //===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/R700Instructions.td b/lib/Target/AMDGPU/R700Instructions.td
index 613a0d729bb3..9c9a03209ec2 100644
--- a/lib/Target/AMDGPU/R700Instructions.td
+++ b/lib/Target/AMDGPU/R700Instructions.td
@@ -1,9 +1,8 @@
 //===-- R700Instructions.td - R700 Instruction defs  -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/SIAddIMGInit.cpp b/lib/Target/AMDGPU/SIAddIMGInit.cpp
index 69cafef4a351..f8094e35816c 100644
--- a/lib/Target/AMDGPU/SIAddIMGInit.cpp
+++ b/lib/Target/AMDGPU/SIAddIMGInit.cpp
@@ -1,9 +1,8 @@
 //===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index 98e9ea662324..b764ca7d7061 100644
--- a/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -1,9 +1,8 @@
 //===- SIAnnotateControlFlow.cpp ------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,12 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constant.h"
@@ -38,6 +38,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <cassert>
 #include <utility>
 
@@ -56,13 +57,13 @@ class SIAnnotateControlFlow : public FunctionPass {
 
   Type *Boolean;
   Type *Void;
-  Type *Int64;
+  Type *IntMask;
   Type *ReturnStruct;
 
   ConstantInt *BoolTrue;
   ConstantInt *BoolFalse;
   UndefValue *BoolUndef;
-  Constant *Int64Zero;
+  Constant *IntMaskZero;
 
   Function *If;
   Function *Else;
@@ -75,6 +76,8 @@ class SIAnnotateControlFlow : public FunctionPass {
 
   LoopInfo *LI;
 
+  void initialize(Module &M, const GCNSubtarget &ST);
+
   bool isUniform(BranchInst *T);
 
   bool isTopOfStack(BasicBlock *BB);
@@ -104,8 +107,6 @@ public:
 
   SIAnnotateControlFlow() : FunctionPass(ID) {}
 
-  bool doInitialization(Module &M) override;
-
   bool runOnFunction(Function &F) override;
 
   StringRef getPassName() const override { return "SI annotate control flow"; }
@@ -115,6 +116,7 @@ public:
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<LegacyDivergenceAnalysis>();
     AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addRequired<TargetPassConfig>();
     FunctionPass::getAnalysisUsage(AU);
   }
 };
@@ -125,31 +127,34 @@ INITIALIZE_PASS_BEGIN(SIAnnotateControlFlow, DEBUG_TYPE,
                       "Annotate SI Control Flow", false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
 INITIALIZE_PASS_END(SIAnnotateControlFlow, DEBUG_TYPE,
                     "Annotate SI Control Flow", false, false)
 
 char SIAnnotateControlFlow::ID = 0;
 
 /// Initialize all the types and constants used in the pass
-bool SIAnnotateControlFlow::doInitialization(Module &M) {
+void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) {
   LLVMContext &Context = M.getContext();
 
   Void = Type::getVoidTy(Context);
   Boolean = Type::getInt1Ty(Context);
-  Int64 = Type::getInt64Ty(Context);
-  ReturnStruct = StructType::get(Boolean, Int64);
+  IntMask = ST.isWave32() ? Type::getInt32Ty(Context)
+                           : Type::getInt64Ty(Context);
+  ReturnStruct = StructType::get(Boolean, IntMask);
 
   BoolTrue = ConstantInt::getTrue(Context);
   BoolFalse = ConstantInt::getFalse(Context);
   BoolUndef = UndefValue::get(Boolean);
-  Int64Zero = ConstantInt::get(Int64, 0);
-
-  If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if);
-  Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else);
-  IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break);
-  Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop);
-  EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf);
-  return false;
+  IntMaskZero = ConstantInt::get(IntMask, 0);
+
+  If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if, { IntMask });
+  Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else,
+                                   { IntMask, IntMask });
+  IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break,
+                                      { IntMask, IntMask });
+  Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop, { IntMask });
+  EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf, { IntMask });
 }
 
 /// Is the branch condition uniform or did the StructurizeCFG pass
@@ -259,14 +264,23 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
     return;
 
   BasicBlock *Target = Term->getSuccessor(1);
-  PHINode *Broken = PHINode::Create(Int64, 0, "phi.broken", &Target->front());
+  PHINode *Broken = PHINode::Create(IntMask, 0, "phi.broken", &Target->front());
 
   Value *Cond = Term->getCondition();
   Term->setCondition(BoolTrue);
   Value *Arg = handleLoopCondition(Cond, Broken, L, Term);
 
-  for (BasicBlock *Pred : predecessors(Target))
-    Broken->addIncoming(Pred == BB ? Arg : Int64Zero, Pred);
+  for (BasicBlock *Pred : predecessors(Target)) {
+    Value *PHIValue = IntMaskZero;
+    if (Pred == BB) // Remember the value of the previous iteration.
+      PHIValue = Arg;
+    // If the backedge from Pred to Target could be executed before the exit
+    // of the loop at BB, it should not reset or change "Broken", which keeps
+    // track of the number of threads exited the loop at BB.
+    else if (L->contains(Pred) && DT->dominates(Pred, BB))
+      PHIValue = Broken;
+    Broken->addIncoming(PHIValue, Pred);
+  }
 
   Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
 
@@ -308,6 +322,10 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) {
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   DA = &getAnalysis<LegacyDivergenceAnalysis>();
+  TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+  const TargetMachine &TM = TPC.getTM<TargetMachine>();
+
+  initialize(*F.getParent(), TM.getSubtarget<GCNSubtarget>(F));
 
   for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
        E = df_end(&F.getEntryBlock()); I != E; ++I) {
diff --git a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
deleted file mode 100644
index 7e884ad93a23..000000000000
--- a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-//===--- SIDebuggerInsertNops.cpp - Inserts nops for debugger usage -------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// Inserts one nop instruction for each high level source statement for
-/// debugger usage.
-///
-/// Tools, such as a debugger, need to pause execution based on user input (i.e.
-/// breakpoint). In order to do this, one nop instruction is inserted before the
-/// first isa instruction of each high level source statement. Further, the
-/// debugger may replace nop instructions with trap instructions based on user
-/// input.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "si-debugger-insert-nops"
-#define PASS_NAME "SI Debugger Insert Nops"
-
-namespace {
-
-class SIDebuggerInsertNops : public MachineFunctionPass {
-public:
-  static char ID;
-
-  SIDebuggerInsertNops() : MachineFunctionPass(ID) { }
-  StringRef getPassName() const override { return PASS_NAME; }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-};
-
-} // anonymous namespace
-
-INITIALIZE_PASS(SIDebuggerInsertNops, DEBUG_TYPE, PASS_NAME, false, false)
-
-char SIDebuggerInsertNops::ID = 0;
-char &llvm::SIDebuggerInsertNopsID = SIDebuggerInsertNops::ID;
-
-FunctionPass *llvm::createSIDebuggerInsertNopsPass() {
-  return new SIDebuggerInsertNops();
-}
-
-bool SIDebuggerInsertNops::runOnMachineFunction(MachineFunction &MF) {
-  // Skip this pass if "amdgpu-debugger-insert-nops" attribute was not
-  // specified.
-  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-  if (!ST.debuggerInsertNops())
-    return false;
-
-  // Skip machine functions without debug info.
-  if (!MF.getMMI().hasDebugInfo())
-    return false;
-
-  // Target instruction info.
-  const SIInstrInfo *TII = ST.getInstrInfo();
-
-  // Set containing line numbers that have nop inserted.
-  DenseSet<unsigned> NopInserted;
-
-  for (auto &MBB : MF) {
-    for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
-      // Skip debug instructions and instructions without location.
-      if (MI->isDebugInstr() || !MI->getDebugLoc())
-        continue;
-
-      // Insert nop instruction if line number does not have nop inserted.
-      auto DL = MI->getDebugLoc();
-      if (NopInserted.find(DL.getLine()) == NopInserted.end()) {
-        BuildMI(MBB, *MI, DL, TII->get(AMDGPU::S_NOP))
-          .addImm(0);
-        NopInserted.insert(DL.getLine());
-      }
-    }
-  }
-
-  return true;
-}
diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h
index 7f6abc34cff3..a0e1ec6ac235 100644
--- a/lib/Target/AMDGPU/SIDefines.h
+++ b/lib/Target/AMDGPU/SIDefines.h
@@ -1,9 +1,8 @@
 //===-- SIDefines.h - SI Helper Macros ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 /// \file
 //===----------------------------------------------------------------------===//
@@ -90,13 +89,22 @@ enum : uint64_t {
   // Is a D16 buffer instruction.
   D16Buf = UINT64_C(1) << 50,
 
+  // FLAT instruction accesses FLAT_GLBL or FLAT_SCRATCH segment.
+  IsNonFlatSeg = UINT64_C(1) << 51,
+
   // Uses floating point double precision rounding mode
-  FPDPRounding = UINT64_C(1) << 51
+  FPDPRounding = UINT64_C(1) << 52,
+
+  // Instruction is FP atomic.
+  FPAtomic = UINT64_C(1) << 53,
+
+  // Is a MFMA instruction.
+  IsMAI = UINT64_C(1) << 54
 };
 
 // v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
 // The result is true if any of these tests are true.
-enum ClassFlags {
+enum ClassFlags : unsigned {
   S_NAN = 1 << 0,        // Signaling NaN
   Q_NAN = 1 << 1,        // Quiet NaN
   N_INFINITY = 1 << 2,   // Negative infinity
@@ -111,7 +119,7 @@ enum ClassFlags {
 }
 
 namespace AMDGPU {
-  enum OperandType {
+  enum OperandType : unsigned {
     /// Operands with register or 32-bit immediate
     OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET,
     OPERAND_REG_IMM_INT64,
@@ -119,6 +127,8 @@ namespace AMDGPU {
     OPERAND_REG_IMM_FP32,
     OPERAND_REG_IMM_FP64,
     OPERAND_REG_IMM_FP16,
+    OPERAND_REG_IMM_V2FP16,
+    OPERAND_REG_IMM_V2INT16,
 
     /// Operands with register or inline constant
     OPERAND_REG_INLINE_C_INT16,
@@ -130,11 +140,22 @@ namespace AMDGPU {
     OPERAND_REG_INLINE_C_V2FP16,
     OPERAND_REG_INLINE_C_V2INT16,
 
+    /// Operands with an AccVGPR register or inline constant
+    OPERAND_REG_INLINE_AC_INT16,
+    OPERAND_REG_INLINE_AC_INT32,
+    OPERAND_REG_INLINE_AC_FP16,
+    OPERAND_REG_INLINE_AC_FP32,
+    OPERAND_REG_INLINE_AC_V2FP16,
+    OPERAND_REG_INLINE_AC_V2INT16,
+
     OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
-    OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_FP16,
+    OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2INT16,
 
     OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
-    OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_C_V2INT16,
+    OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_V2INT16,
+
+    OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT16,
+    OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_V2INT16,
 
     OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
     OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
@@ -151,17 +172,10 @@ namespace AMDGPU {
   };
 }
 
-namespace SIStackID {
-enum StackTypes : uint8_t {
-  SCRATCH = 0,
-  SGPR_SPILL = 1
-};
-}
-
 // Input operand modifiers bit-masks
 // NEG and SEXT share same bit-mask because they can't be set simultaneously.
 namespace SISrcMods {
-  enum {
+  enum : unsigned {
    NEG = 1 << 0,   // Floating-point negate modifier
    ABS = 1 << 1,   // Floating-point absolute modifier
    SEXT = 1 << 0,  // Integer sign-extend modifier
@@ -173,7 +187,7 @@ namespace SISrcMods {
 }
 
 namespace SIOutMods {
-  enum {
+  enum : unsigned {
     NONE = 0,
     MUL2 = 1,
     MUL4 = 2,
@@ -181,17 +195,33 @@ namespace SIOutMods {
   };
 }
 
+namespace AMDGPU {
 namespace VGPRIndexMode {
-  enum {
-    SRC0_ENABLE = 1 << 0,
-    SRC1_ENABLE = 1 << 1,
-    SRC2_ENABLE = 1 << 2,
-    DST_ENABLE = 1 << 3
-  };
-}
+
+enum Id : unsigned { // id of symbolic names
+  ID_SRC0 = 0,
+  ID_SRC1,
+  ID_SRC2,
+  ID_DST,
+
+  ID_MIN = ID_SRC0,
+  ID_MAX = ID_DST
+};
+
+enum EncBits : unsigned {
+  OFF = 0,
+  SRC0_ENABLE = 1 << ID_SRC0,
+  SRC1_ENABLE = 1 << ID_SRC1,
+  SRC2_ENABLE = 1 << ID_SRC2,
+  DST_ENABLE = 1 << ID_DST,
+  ENABLE_MASK = SRC0_ENABLE | SRC1_ENABLE | SRC2_ENABLE | DST_ENABLE
+};
+
+} // namespace VGPRIndexMode
+} // namespace AMDGPU
 
 namespace AMDGPUAsmVariants {
-  enum {
+  enum : unsigned {
     DEFAULT = 0,
     VOP3 = 1,
     SDWA = 2,
@@ -203,13 +233,14 @@ namespace AMDGPUAsmVariants {
 namespace AMDGPU {
 namespace EncValues { // Encoding values of enum9/8/7 operands
 
-enum {
+enum : unsigned {
   SGPR_MIN = 0,
-  SGPR_MAX = 101,
+  SGPR_MAX_SI = 101,
+  SGPR_MAX_GFX10 = 105,
   TTMP_VI_MIN = 112,
   TTMP_VI_MAX = 123,
-  TTMP_GFX9_MIN = 108,
-  TTMP_GFX9_MAX = 123,
+  TTMP_GFX9_GFX10_MIN = 108,
+  TTMP_GFX9_GFX10_MAX = 123,
   INLINE_INTEGER_C_MIN = 128,
   INLINE_INTEGER_C_POSITIVE_MAX = 192, // 64
   INLINE_INTEGER_C_MAX = 208,
@@ -231,6 +262,8 @@ enum Id { // Message ID, width(4) [3:0].
   ID_INTERRUPT = 1,
   ID_GS,
   ID_GS_DONE,
+  ID_GS_ALLOC_REQ = 9,
+  ID_GET_DOORBELL = 10,
   ID_SYSMSG = 15,
   ID_GAPS_LAST_, // Indicate that sequence has gaps.
   ID_GAPS_FIRST_ = ID_INTERRUPT,
@@ -242,27 +275,28 @@ enum Id { // Message ID, width(4) [3:0].
 enum Op { // Both GS and SYS operation IDs.
   OP_UNKNOWN_ = -1,
   OP_SHIFT_ = 4,
-  // width(2) [5:4]
+  OP_NONE_ = 0,
+  // Bits used for operation encoding
+  OP_WIDTH_ = 3,
+  OP_MASK_ = (((1 << OP_WIDTH_) - 1) << OP_SHIFT_),
+  // GS operations are encoded in bits 5:4
   OP_GS_NOP = 0,
   OP_GS_CUT,
   OP_GS_EMIT,
   OP_GS_EMIT_CUT,
   OP_GS_LAST_,
   OP_GS_FIRST_ = OP_GS_NOP,
-  OP_GS_WIDTH_ = 2,
-  OP_GS_MASK_ = (((1 << OP_GS_WIDTH_) - 1) << OP_SHIFT_),
-  // width(3) [6:4]
+  // SYS operations are encoded in bits 6:4
   OP_SYS_ECC_ERR_INTERRUPT = 1,
   OP_SYS_REG_RD,
   OP_SYS_HOST_TRAP_ACK,
   OP_SYS_TTRACE_PC,
   OP_SYS_LAST_,
   OP_SYS_FIRST_ = OP_SYS_ECC_ERR_INTERRUPT,
-  OP_SYS_WIDTH_ = 3,
-  OP_SYS_MASK_ = (((1 << OP_SYS_WIDTH_) - 1) << OP_SHIFT_)
 };
 
-enum StreamId { // Stream ID, (2) [9:8].
+enum StreamId : unsigned { // Stream ID, (2) [9:8].
+  STREAM_ID_NONE_ = 0,
   STREAM_ID_DEFAULT_ = 0,
   STREAM_ID_LAST_ = 4,
   STREAM_ID_FIRST_ = STREAM_ID_DEFAULT_,
@@ -287,23 +321,34 @@ enum Id { // HwRegCode, (6) [5:0]
   ID_IB_STS = 7,
   ID_MEM_BASES = 15,
   ID_SYMBOLIC_FIRST_GFX9_ = ID_MEM_BASES,
-  ID_SYMBOLIC_LAST_ = 16,
+  ID_TBA_LO = 16,
+  ID_SYMBOLIC_FIRST_GFX10_ = ID_TBA_LO,
+  ID_TBA_HI = 17,
+  ID_TMA_LO = 18,
+  ID_TMA_HI = 19,
+  ID_FLAT_SCR_LO = 20,
+  ID_FLAT_SCR_HI = 21,
+  ID_XNACK_MASK = 22,
+  ID_POPS_PACKER = 25,
+  ID_SYMBOLIC_LAST_ = 26,
   ID_SHIFT_ = 0,
   ID_WIDTH_ = 6,
   ID_MASK_ = (((1 << ID_WIDTH_) - 1) << ID_SHIFT_)
 };
 
-enum Offset { // Offset, (5) [10:6]
+enum Offset : unsigned { // Offset, (5) [10:6]
   OFFSET_DEFAULT_ = 0,
   OFFSET_SHIFT_ = 6,
   OFFSET_WIDTH_ = 5,
   OFFSET_MASK_ = (((1 << OFFSET_WIDTH_) - 1) << OFFSET_SHIFT_),
 
+  OFFSET_MEM_VIOL = 8,
+
   OFFSET_SRC_SHARED_BASE = 16,
   OFFSET_SRC_PRIVATE_BASE = 0
 };
 
-enum WidthMinusOne { // WidthMinusOne, (5) [15:11]
+enum WidthMinusOne : unsigned { // WidthMinusOne, (5) [15:11]
   WIDTH_M1_DEFAULT_ = 31,
   WIDTH_M1_SHIFT_ = 11,
   WIDTH_M1_WIDTH_ = 5,
@@ -313,11 +358,16 @@ enum WidthMinusOne { // WidthMinusOne, (5) [15:11]
   WIDTH_M1_SRC_PRIVATE_BASE = 15
 };
 
+// Some values from WidthMinusOne mapped into Width domain.
+enum Width : unsigned {
+  WIDTH_DEFAULT_ = WIDTH_M1_DEFAULT_ + 1,
+};
+
 } // namespace Hwreg
 
 namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32.
 
-enum Id { // id of symbolic names
+enum Id : unsigned { // id of symbolic names
   ID_QUAD_PERM = 0,
   ID_BITMASK_PERM,
   ID_SWAP,
@@ -325,7 +375,7 @@ enum Id { // id of symbolic names
   ID_BROADCAST
 };
 
-enum EncBits {
+enum EncBits : unsigned {
 
   // swizzle mode encodings
 
@@ -357,7 +407,7 @@ enum EncBits {
 
 namespace SDWA {
 
-enum SdwaSel {
+enum SdwaSel : unsigned {
   BYTE_0 = 0,
   BYTE_1 = 1,
   BYTE_2 = 2,
@@ -367,13 +417,13 @@ enum SdwaSel {
   DWORD = 6,
 };
 
-enum DstUnused {
+enum DstUnused : unsigned {
   UNUSED_PAD = 0,
   UNUSED_SEXT = 1,
   UNUSED_PRESERVE = 2,
 };
 
-enum SDWA9EncValues{
+enum SDWA9EncValues : unsigned {
   SRC_SGPR_MASK = 0x100,
   SRC_VGPR_MASK = 0xFF,
   VOPC_DST_VCC_MASK = 0x80,
@@ -382,7 +432,8 @@ enum SDWA9EncValues{
   SRC_VGPR_MIN = 0,
   SRC_VGPR_MAX = 255,
   SRC_SGPR_MIN = 256,
-  SRC_SGPR_MAX = 357,
+  SRC_SGPR_MAX_SI = 357,
+  SRC_SGPR_MAX_GFX10 = 361,
   SRC_TTMP_MIN = 364,
   SRC_TTMP_MAX = 379,
 };
@@ -391,7 +442,7 @@ enum SDWA9EncValues{
 
 namespace DPP {
 
-enum DppCtrl {
+enum DppCtrl : unsigned {
   QUAD_PERM_FIRST   = 0,
   QUAD_PERM_LAST    = 0xFF,
   DPP_UNUSED1       = 0x100,
@@ -422,7 +473,20 @@ enum DppCtrl {
   ROW_HALF_MIRROR   = 0x141,
   BCAST15           = 0x142,
   BCAST31           = 0x143,
-  DPP_LAST          = BCAST31
+  DPP_UNUSED8_FIRST = 0x144,
+  DPP_UNUSED8_LAST  = 0x14F,
+  ROW_SHARE_FIRST   = 0x150,
+  ROW_SHARE_LAST    = 0x15F,
+  ROW_XMASK_FIRST   = 0x160,
+  ROW_XMASK_LAST    = 0x16F,
+  DPP_LAST          = ROW_XMASK_LAST
+};
+
+enum DppFiMode {
+  DPP_FI_0  = 0,
+  DPP_FI_1  = 1,
+  DPP8_FI_0 = 0xE9,
+  DPP8_FI_1 = 0xEA,
 };
 
 } // namespace DPP
@@ -505,6 +569,15 @@ enum DppCtrl {
 #define   S_00B848_IEEE_MODE(x)                                       (((x) & 0x1) << 23)
 #define   G_00B848_IEEE_MODE(x)                                       (((x) >> 23) & 0x1)
 #define   C_00B848_IEEE_MODE                                          0xFF7FFFFF
+#define   S_00B848_WGP_MODE(x)                                        (((x) & 0x1) << 29)
+#define   G_00B848_WGP_MODE(x)                                        (((x) >> 29) & 0x1)
+#define   C_00B848_WGP_MODE                                           0xDFFFFFFF
+#define   S_00B848_MEM_ORDERED(x)                                     (((x) & 0x1) << 30)
+#define   G_00B848_MEM_ORDERED(x)                                     (((x) >> 30) & 0x1)
+#define   C_00B848_MEM_ORDERED                                        0xBFFFFFFF
+#define   S_00B848_FWD_PROGRESS(x)                                    (((x) & 0x1) << 31)
+#define   G_00B848_FWD_PROGRESS(x)                                    (((x) >> 31) & 0x1)
+#define   C_00B848_FWD_PROGRESS                                       0x7FFFFFFF
 
 
 // Helpers for setting FLOAT_MODE
@@ -535,6 +608,15 @@ enum DppCtrl {
 #define R_0286E8_SPI_TMPRING_SIZE                                       0x0286E8
 #define   S_0286E8_WAVESIZE(x)                                        (((x) & 0x1FFF) << 12)
 
+#define R_028B54_VGT_SHADER_STAGES_EN                                 0x028B54
+#define   S_028B54_HS_W32_EN(x)                                       (((x) & 0x1) << 21)
+#define   S_028B54_GS_W32_EN(x)                                       (((x) & 0x1) << 22)
+#define   S_028B54_VS_W32_EN(x)                                       (((x) & 0x1) << 23)
+#define R_0286D8_SPI_PS_IN_CONTROL                                    0x0286D8
+#define   S_0286D8_PS_W32_EN(x)                                       (((x) & 0x1) << 15)
+#define R_00B800_COMPUTE_DISPATCH_INITIATOR                           0x00B800
+#define   S_00B800_CS_W32_EN(x)                                       (((x) & 0x1) << 15)
+
 #define R_SPILLED_SGPRS         0x4
 #define R_SPILLED_VGPRS         0x8
 } // End namespace llvm
diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 809f5bab4693..624953963cf4 100644
--- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -1,9 +1,8 @@
 //===- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -104,7 +103,7 @@ using namespace llvm;
 static cl::opt<bool> EnableM0Merge(
   "amdgpu-enable-merge-m0",
   cl::desc("Merge and hoist M0 initializations"),
-  cl::init(false));
+  cl::init(true));
 
 namespace {
 
@@ -144,14 +143,15 @@ FunctionPass *llvm::createSIFixSGPRCopiesPass() {
   return new SIFixSGPRCopies();
 }
 
-static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
+static bool hasVectorOperands(const MachineInstr &MI,
+                              const SIRegisterInfo *TRI) {
   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     if (!MI.getOperand(i).isReg() ||
         !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
       continue;
 
-    if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
+    if (TRI->hasVectorRegisters(MRI.getRegClass(MI.getOperand(i).getReg())))
       return true;
   }
   return false;
@@ -184,14 +184,14 @@ static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
                              const TargetRegisterClass *DstRC,
                              const SIRegisterInfo &TRI) {
   return SrcRC != &AMDGPU::VReg_1RegClass && TRI.isSGPRClass(DstRC) &&
-         TRI.hasVGPRs(SrcRC);
+         TRI.hasVectorRegisters(SrcRC);
 }
 
 static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
                              const TargetRegisterClass *DstRC,
                              const SIRegisterInfo &TRI) {
   return DstRC != &AMDGPU::VReg_1RegClass && TRI.isSGPRClass(SrcRC) &&
-         TRI.hasVGPRs(DstRC);
+         TRI.hasVectorRegisters(DstRC);
 }
 
 static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI,
@@ -278,6 +278,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
   // VGPRz = REG_SEQUENCE VGPRx, sub0
 
   MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
+  bool IsAGPR = TRI->hasAGPRs(DstRC);
 
   for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
     unsigned SrcReg = MI.getOperand(I).getReg();
@@ -296,6 +297,17 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
             TmpReg)
         .add(MI.getOperand(I));
 
+    if (IsAGPR) {
+      const TargetRegisterClass *NewSrcRC = TRI->getEquivalentAGPRClass(SrcRC);
+      unsigned TmpAReg = MRI.createVirtualRegister(NewSrcRC);
+      unsigned Opc = NewSrcRC == &AMDGPU::AGPR_32RegClass ?
+        AMDGPU::V_ACCVGPR_WRITE_B32 : AMDGPU::COPY;
+      BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(Opc),
+            TmpAReg)
+        .addReg(TmpReg, RegState::Kill);
+      TmpReg = TmpAReg;
+    }
+
     MI.getOperand(I).setReg(TmpReg);
   }
 
@@ -440,18 +452,32 @@ static bool isReachable(const MachineInstr *From,
            (const MachineBasicBlock *MBB) { return MBB == MBBFrom; });
 }
 
+// Return the first non-prologue instruction in the block.
+static MachineBasicBlock::iterator
+getFirstNonPrologue(MachineBasicBlock *MBB, const TargetInstrInfo *TII) {
+  MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
+  while (I != MBB->end() && TII->isBasicBlockPrologue(*I))
+    ++I;
+
+  return I;
+}
+
 // Hoist and merge identical SGPR initializations into a common predecessor.
 // This is intended to combine M0 initializations, but can work with any
 // SGPR. A VGPR cannot be processed since we cannot guarantee vector
 // executioon.
 static bool hoistAndMergeSGPRInits(unsigned Reg,
                                    const MachineRegisterInfo &MRI,
-                                   MachineDominatorTree &MDT) {
+                                   MachineDominatorTree &MDT,
+                                   const TargetInstrInfo *TII) {
   // List of inits by immediate value.
   using InitListMap = std::map<unsigned, std::list<MachineInstr *>>;
   InitListMap Inits;
   // List of clobbering instructions.
   SmallVector<MachineInstr*, 8> Clobbers;
+  // List of instructions marked for deletion.
+  SmallSet<MachineInstr*, 8> MergedInstrs;
+
   bool Changed = false;
 
   for (auto &MI : MRI.def_instructions(Reg)) {
@@ -480,8 +506,8 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
         MachineInstr *MI2 = *I2;
 
         // Check any possible interference
-        auto intereferes = [&](MachineBasicBlock::iterator From,
-                               MachineBasicBlock::iterator To) -> bool {
+        auto interferes = [&](MachineBasicBlock::iterator From,
+                              MachineBasicBlock::iterator To) -> bool {
 
           assert(MDT.dominates(&*To, &*From));
 
@@ -513,23 +539,23 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
         };
 
         if (MDT.dominates(MI1, MI2)) {
-          if (!intereferes(MI2, MI1)) {
+          if (!interferes(MI2, MI1)) {
             LLVM_DEBUG(dbgs()
                        << "Erasing from "
                        << printMBBReference(*MI2->getParent()) << " " << *MI2);
-            MI2->eraseFromParent();
-            Defs.erase(I2++);
+            MergedInstrs.insert(MI2);
             Changed = true;
+            ++I2;
             continue;
           }
         } else if (MDT.dominates(MI2, MI1)) {
-          if (!intereferes(MI1, MI2)) {
+          if (!interferes(MI1, MI2)) {
             LLVM_DEBUG(dbgs()
                        << "Erasing from "
                        << printMBBReference(*MI1->getParent()) << " " << *MI1);
-            MI1->eraseFromParent();
-            Defs.erase(I1++);
+            MergedInstrs.insert(MI1);
             Changed = true;
+            ++I1;
             break;
           }
         } else {
@@ -540,8 +566,8 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
             continue;
           }
 
-          MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
-          if (!intereferes(MI1, I) && !intereferes(MI2, I)) {
+          MachineBasicBlock::iterator I = getFirstNonPrologue(MBB, TII);
+          if (!interferes(MI1, I) && !interferes(MI2, I)) {
             LLVM_DEBUG(dbgs()
                        << "Erasing from "
                        << printMBBReference(*MI1->getParent()) << " " << *MI1
@@ -549,9 +575,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
                        << printMBBReference(*MI2->getParent()) << " to "
                        << printMBBReference(*I->getParent()) << " " << *MI2);
             I->getParent()->splice(I, MI2->getParent(), MI2);
-            MI1->eraseFromParent();
-            Defs.erase(I1++);
+            MergedInstrs.insert(MI1);
             Changed = true;
+            ++I1;
             break;
           }
         }
@@ -561,6 +587,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
     }
   }
 
+  for (auto MI : MergedInstrs)
+    MI->removeFromParent();
+
   if (Changed)
     MRI.clearKillFlags(Reg);
 
@@ -679,11 +708,12 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
           LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
           TII->moveToVALU(MI, MDT);
         }
+
         break;
       }
       case AMDGPU::REG_SEQUENCE:
-        if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
-            !hasVGPROperands(MI, TRI)) {
+        if (TRI->hasVectorRegisters(TII->getOpRegClass(MI, 0)) ||
+            !hasVectorOperands(MI, TRI)) {
           foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
           continue;
         }
@@ -698,7 +728,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
         Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
         Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
         if (TRI->isSGPRClass(DstRC) &&
-            (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
+            (TRI->hasVectorRegisters(Src0RC) ||
+             TRI->hasVectorRegisters(Src1RC))) {
           LLVM_DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
           TII->moveToVALU(MI, MDT);
         }
@@ -709,7 +740,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
   }
 
   if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
-    hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT);
+    hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT, TII);
 
   return true;
 }
diff --git a/lib/Target/AMDGPU/SIFixVGPRCopies.cpp b/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
index 15ba78edf919..29484668a01d 100644
--- a/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
+++ b/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
@@ -1,9 +1,8 @@
 //===-- SIFixVGPRCopies.cpp - Fix VGPR Copies after regalloc --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/SIFixWWMLiveness.cpp b/lib/Target/AMDGPU/SIFixWWMLiveness.cpp
deleted file mode 100644
index 7761418c5336..000000000000
--- a/lib/Target/AMDGPU/SIFixWWMLiveness.cpp
+++ /dev/null
@@ -1,418 +0,0 @@
-//===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// Computations in WWM can overwrite values in inactive channels for
-/// variables that the register allocator thinks are dead. This pass adds fake
-/// uses of those variables to their def(s) to make sure that they aren't
-/// overwritten.
-///
-/// As an example, consider this snippet:
-/// %vgpr0 = V_MOV_B32_e32 0.0
-/// if (...) {
-///   %vgpr1 = ...
-///   %vgpr2 = WWM killed %vgpr1
-///   ... = killed %vgpr2
-///   %vgpr0 = V_MOV_B32_e32 1.0
-/// }
-/// ... = %vgpr0
-///
-/// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally,
-/// we can safely allocate %vgpr0 and %vgpr1 in the same register, since
-/// writing %vgpr1 would only write to channels that would be clobbered by the
-/// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled,
-/// it would clobber even the inactive channels for which the if-condition is
-/// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use
-/// of %vgpr0 to its def to make sure they aren't allocated to the
-/// same register.
-///
-/// In general, we need to figure out what registers might have their inactive
-/// channels which are eventually used accidentally clobbered by a WWM
-/// instruction. We do that by spotting three separate cases of registers:
-///
-/// 1. A "then phi": the value resulting from phi elimination of a phi node at
-///    the end of an if..endif. If there is WWM code in the "then", then we
-///    make the def at the end of the "then" branch a partial def by adding an
-///    implicit use of the register.
-///
-/// 2. A "loop exit register": a value written inside a loop but used outside the
-///    loop, where there is WWM code inside the loop (the case in the example
-///    above). We add an implicit_def of the register in the loop pre-header,
-///    and make the original def a partial def by adding an implicit use of the
-///    register.
-///
-/// 3. A "loop exit phi": the value resulting from phi elimination of a phi node
-///    in a loop header. If there is WWM code inside the loop, then we make all
-///    defs inside the loop partial defs by adding an implicit use of the
-///    register on each one.
-///
-/// Note that we do not need to consider an if..else..endif phi. We only need to
-/// consider non-uniform control flow, and control flow structurization would
-/// have transformed a non-uniform if..else..endif into two if..endifs.
-///
-/// The analysis to detect these cases relies on a property of the MIR
-/// arising from this pass running straight after PHIElimination and before any
-/// coalescing: that any virtual register with more than one definition must be
-/// the new register added to lower a phi node by PHIElimination.
-///
-/// FIXME: We should detect whether a register in one of the above categories is
-/// already live at the WWM code before deciding to add the implicit uses to
-/// synthesize its liveness.
-///
-/// FIXME: I believe this whole scheme may be flawed due to the possibility of
-/// the register allocator doing live interval splitting.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "si-fix-wwm-liveness"
-
-namespace {
-
-class SIFixWWMLiveness : public MachineFunctionPass {
-private:
-  MachineDominatorTree *DomTree;
-  MachineLoopInfo *LoopInfo;
-  LiveIntervals *LIS = nullptr;
-  const SIInstrInfo *TII;
-  const SIRegisterInfo *TRI;
-  MachineRegisterInfo *MRI;
-
-  std::vector<MachineInstr *> WWMs;
-  std::vector<MachineOperand *> ThenDefs;
-  std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopExitDefs;
-  std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopPhiDefs;
-
-public:
-  static char ID;
-
-  SIFixWWMLiveness() : MachineFunctionPass(ID) {
-    initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry());
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  StringRef getPassName() const override { return "SI Fix WWM Liveness"; }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequiredID(MachineDominatorsID);
-    AU.addRequiredID(MachineLoopInfoID);
-    // Should preserve the same set that TwoAddressInstructions does.
-    AU.addPreserved<SlotIndexes>();
-    AU.addPreserved<LiveIntervals>();
-    AU.addPreservedID(LiveVariablesID);
-    AU.addPreservedID(MachineLoopInfoID);
-    AU.addPreservedID(MachineDominatorsID);
-    AU.setPreservesCFG();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
-private:
-  void processDef(MachineOperand &DefOpnd);
-  bool processThenDef(MachineOperand *DefOpnd);
-  bool processLoopExitDef(MachineOperand *DefOpnd, MachineLoop *Loop);
-  bool processLoopPhiDef(MachineOperand *DefOpnd, MachineLoop *Loop);
-};
-
-} // End anonymous namespace.
-
-INITIALIZE_PASS_BEGIN(SIFixWWMLiveness, DEBUG_TYPE,
-                "SI fix WWM liveness", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(SIFixWWMLiveness, DEBUG_TYPE,
-                "SI fix WWM liveness", false, false)
-
-char SIFixWWMLiveness::ID = 0;
-
-char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID;
-
-FunctionPass *llvm::createSIFixWWMLivenessPass() {
-  return new SIFixWWMLiveness();
-}
-
-bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) {
-  LLVM_DEBUG(dbgs() << "SIFixWWMLiveness: function " << MF.getName() << "\n");
-  bool Modified = false;
-
-  // This doesn't actually need LiveIntervals, but we can preserve them.
-  LIS = getAnalysisIfAvailable<LiveIntervals>();
-
-  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-
-  TII = ST.getInstrInfo();
-  TRI = &TII->getRegisterInfo();
-  MRI = &MF.getRegInfo();
-
-  DomTree = &getAnalysis<MachineDominatorTree>();
-  LoopInfo = &getAnalysis<MachineLoopInfo>();
-
-  // Scan the function to find the WWM sections and the candidate registers for
-  // having liveness modified.
-  for (MachineBasicBlock &MBB : MF) {
-    for (MachineInstr &MI : MBB) {
-      if (MI.getOpcode() == AMDGPU::EXIT_WWM)
-        WWMs.push_back(&MI);
-      else {
-        for (MachineOperand &DefOpnd : MI.defs()) {
-          if (DefOpnd.isReg()) {
-            unsigned Reg = DefOpnd.getReg();
-            if (TRI->isVGPR(*MRI, Reg))
-              processDef(DefOpnd);
-          }
-        }
-      }
-    }
-  }
-  if (!WWMs.empty()) {
-    // Synthesize liveness over WWM sections as required.
-    for (auto ThenDef : ThenDefs)
-      Modified |= processThenDef(ThenDef);
-    for (auto LoopExitDef : LoopExitDefs)
-      Modified |= processLoopExitDef(LoopExitDef.first, LoopExitDef.second);
-    for (auto LoopPhiDef : LoopPhiDefs)
-      Modified |= processLoopPhiDef(LoopPhiDef.first, LoopPhiDef.second);
-  }
-
-  WWMs.clear();
-  ThenDefs.clear();
-  LoopExitDefs.clear();
-  LoopPhiDefs.clear();
-
-  return Modified;
-}
-
-// During the function scan, process an operand that defines a VGPR.
-// This categorizes the register and puts it in the appropriate list for later
-// use when processing a WWM section.
-void SIFixWWMLiveness::processDef(MachineOperand &DefOpnd) {
-  unsigned Reg = DefOpnd.getReg();
-  // Get all the defining instructions. For convenience, make Defs[0] the def
-  // we are on now.
-  SmallVector<const MachineInstr *, 4> Defs;
-  Defs.push_back(DefOpnd.getParent());
-  for (auto &MI : MRI->def_instructions(Reg)) {
-    if (&MI != DefOpnd.getParent())
-      Defs.push_back(&MI);
-  }
-  // Check whether this def dominates all the others. If not, ignore this def.
-  // Either it is going to be processed when the scan encounters its other def
-  // that dominates all defs, or there is no def that dominates all others.
-  // The latter case is an eliminated phi from an if..else..endif or similar,
-  // which must be for uniform control flow so can be ignored.
-  // Because this pass runs shortly after PHIElimination, we assume that any
-  // multi-def register is a lowered phi, and thus has each def in a separate
-  // basic block.
-  for (unsigned I = 1; I != Defs.size(); ++I) {
-    if (!DomTree->dominates(Defs[0]->getParent(), Defs[I]->getParent()))
-      return;
-  }
-  // Check for the case of an if..endif lowered phi: It has two defs, one
-  // dominates the other, and there is a single use in a successor of the
-  // dominant def.
-  // Later we will spot any WWM code inside
-  // the "then" clause and turn the second def into a partial def so its
-  // liveness goes through the WWM code in the "then" clause.
-  if (Defs.size() == 2) {
-    auto DomDefBlock = Defs[0]->getParent();
-    if (DomDefBlock->succ_size() == 2 && MRI->hasOneUse(Reg)) {
-      auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
-      for (auto Succ : DomDefBlock->successors()) {
-        if (Succ == UseBlock) {
-          LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " is a then phi reg\n");
-          ThenDefs.push_back(&DefOpnd);
-          return;
-        }
-      }
-    }
-  }
-  // Check for the case of a non-lowered-phi register (single def) that exits
-  // a loop, that is, it has a use that is outside a loop that the def is
-  // inside. We find the outermost loop that the def is inside but a use is
-  // outside. Later we will spot any WWM code inside that loop and then make
-  // the def a partial def so its liveness goes round the loop and through the
-  // WWM code.
-  if (Defs.size() == 1) {
-    auto Loop = LoopInfo->getLoopFor(Defs[0]->getParent());
-    if (!Loop)
-      return;
-    bool IsLoopExit = false;
-    for (auto &Use : MRI->use_instructions(Reg)) {
-      auto UseBlock = Use.getParent();
-      if (Loop->contains(UseBlock))
-        continue;
-      IsLoopExit = true;
-      while (auto Parent = Loop->getParentLoop()) {
-        if (Parent->contains(UseBlock))
-          break;
-        Loop = Parent;
-      }
-    }
-    if (!IsLoopExit)
-      return;
-    LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
-        << " is a loop exit reg with loop header at "
-        << "bb." << Loop->getHeader()->getNumber() << "\n");
-    LoopExitDefs.push_back(std::pair<MachineOperand *, MachineLoop *>(
-            &DefOpnd, Loop));
-    return;
-  }
-  // Check for the case of a lowered single-preheader-loop phi, that is, a
-  // multi-def register where the dominating def is in the loop pre-header and
-  // all other defs are in backedges. Later we will spot any WWM code inside
-  // that loop and then make the backedge defs partial defs so the liveness
-  // goes through the WWM code.
-  // Note that we are ignoring multi-preheader loops on the basis that the
-  // structurizer does not allow that for non-uniform loops.
-  // There must be a single use in the loop header.
-  if (!MRI->hasOneUse(Reg))
-    return;
-  auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
-  auto Loop = LoopInfo->getLoopFor(UseBlock);
-  if (!Loop || Loop->getHeader() != UseBlock
-      || Loop->contains(Defs[0]->getParent())) {
-    LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
-        << " is multi-def but single use not in loop header\n");
-    return;
-  }
-  for (unsigned I = 1; I != Defs.size(); ++I) {
-    if (!Loop->contains(Defs[I]->getParent()))
-      return;
-  }
-  LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
-      << " is a loop phi reg with loop header at "
-      << "bb." << Loop->getHeader()->getNumber() << "\n");
-  LoopPhiDefs.push_back(
-      std::pair<MachineOperand *, MachineLoop *>(&DefOpnd, Loop));
-}
-
-// Process a then phi def: It has two defs, one dominates the other, and there
-// is a single use in a successor of the dominant def. Here we spot any WWM
-// code inside the "then" clause and turn the second def into a partial def so
-// its liveness goes through the WWM code in the "then" clause.
-bool SIFixWWMLiveness::processThenDef(MachineOperand *DefOpnd) {
-  LLVM_DEBUG(dbgs() << "Processing then def: " << *DefOpnd->getParent());
-  if (DefOpnd->getParent()->getOpcode() == TargetOpcode::IMPLICIT_DEF) {
-    // Ignore if dominating def is undef.
-    LLVM_DEBUG(dbgs() << "  ignoring as dominating def is undef\n");
-    return false;
-  }
-  unsigned Reg = DefOpnd->getReg();
-  // Get the use block, which is the endif block.
-  auto UseBlock = MRI->use_instr_begin(Reg)->getParent();
-  // Check whether there is WWM code inside the then branch. The WWM code must
-  // be dominated by the if but not dominated by the endif.
-  bool ContainsWWM = false;
-  for (auto WWM : WWMs) {
-    if (DomTree->dominates(DefOpnd->getParent()->getParent(), WWM->getParent())
-        && !DomTree->dominates(UseBlock, WWM->getParent())) {
-      LLVM_DEBUG(dbgs() << "  contains WWM: " << *WWM);
-      ContainsWWM = true;
-      break;
-    }
-  }
-  if (!ContainsWWM)
-    return false;
-  // Get the other def.
-  MachineInstr *OtherDef = nullptr;
-  for (auto &MI : MRI->def_instructions(Reg)) {
-    if (&MI != DefOpnd->getParent())
-      OtherDef = &MI;
-  }
-  // Make it a partial def.
-  OtherDef->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
-  LLVM_DEBUG(dbgs() << *OtherDef);
-  return true;
-}
-
-// Process a loop exit def, that is, a register with a single use in a loop
-// that has a use outside the loop.  Here we spot any WWM code inside that loop
-// and then make the def a partial def so its liveness goes round the loop and
-// through the WWM code.
-bool SIFixWWMLiveness::processLoopExitDef(MachineOperand *DefOpnd,
-      MachineLoop *Loop) {
-  LLVM_DEBUG(dbgs() << "Processing loop exit def: " << *DefOpnd->getParent());
-  // Check whether there is WWM code inside the loop.
-  bool ContainsWWM = false;
-  for (auto WWM : WWMs) {
-    if (Loop->contains(WWM->getParent())) {
-      LLVM_DEBUG(dbgs() << "  contains WWM: " << *WWM);
-      ContainsWWM = true;
-      break;
-    }
-  }
-  if (!ContainsWWM)
-    return false;
-  unsigned Reg = DefOpnd->getReg();
-  // Add a new implicit_def in loop preheader(s).
-  for (auto Pred : Loop->getHeader()->predecessors()) {
-    if (!Loop->contains(Pred)) {
-      auto ImplicitDef = BuildMI(*Pred, Pred->getFirstTerminator(), DebugLoc(),
-          TII->get(TargetOpcode::IMPLICIT_DEF), Reg);
-      LLVM_DEBUG(dbgs() << *ImplicitDef);
-      (void)ImplicitDef;
-    }
-  }
-  // Make the original def partial.
-  DefOpnd->getParent()->addOperand(MachineOperand::CreateReg(
-          Reg, false, /*isImp=*/true));
-  LLVM_DEBUG(dbgs() << *DefOpnd->getParent());
-  return true;
-}
-
-// Process a loop phi def, that is, a multi-def register where the dominating
-// def is in the loop pre-header and all other defs are in backedges. Here we
-// spot any WWM code inside that loop and then make the backedge defs partial
-// defs so the liveness goes through the WWM code.
-bool SIFixWWMLiveness::processLoopPhiDef(MachineOperand *DefOpnd,
-      MachineLoop *Loop) {
-  LLVM_DEBUG(dbgs() << "Processing loop phi def: " << *DefOpnd->getParent());
-  // Check whether there is WWM code inside the loop.
-  bool ContainsWWM = false;
-  for (auto WWM : WWMs) {
-    if (Loop->contains(WWM->getParent())) {
-      LLVM_DEBUG(dbgs() << "  contains WWM: " << *WWM);
-      ContainsWWM = true;
-      break;
-    }
-  }
-  if (!ContainsWWM)
-    return false;
-  unsigned Reg = DefOpnd->getReg();
-  // Remove kill mark from uses.
-  for (auto &Use : MRI->use_operands(Reg))
-    Use.setIsKill(false);
-  // Make all defs except the dominating one partial defs.
-  SmallVector<MachineInstr *, 4> Defs;
-  for (auto &Def : MRI->def_instructions(Reg))
-    Defs.push_back(&Def);
-  for (auto Def : Defs) {
-    if (DefOpnd->getParent() == Def)
-      continue;
-    Def->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
-    LLVM_DEBUG(dbgs() << *Def);
-  }
-  return true;
-}
-
diff --git a/lib/Target/AMDGPU/SIFixupVectorISel.cpp b/lib/Target/AMDGPU/SIFixupVectorISel.cpp
index ee39eb04d831..5b834c8de13a 100644
--- a/lib/Target/AMDGPU/SIFixupVectorISel.cpp
+++ b/lib/Target/AMDGPU/SIFixupVectorISel.cpp
@@ -1,9 +1,8 @@
 //===-- SIFixupVectorISel.cpp - Fixup post ISel vector issues -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 /// \file
 /// SIFixupVectorISel pass cleans up post ISEL Vector issues.
@@ -198,6 +197,11 @@ static bool fixupGlobalSaddr(MachineBasicBlock &MBB,
     // Atomics dont have a GLC, so omit the field if not there.
     if (Glc)
       NewGlob->addOperand(MF, *Glc);
+
+    MachineOperand *DLC = TII->getNamedOperand(MI, AMDGPU::OpName::dlc);
+    if (DLC)
+      NewGlob->addOperand(MF, *DLC);
+
     NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc));
     // _D16 have an vdst_in operand, copy it in.
     MachineOperand *VDstInOp = TII->getNamedOperand(MI,
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index f4e866958369..74d77d328019 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1,9 +1,8 @@
 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 /// \file
 //===----------------------------------------------------------------------===//
@@ -51,7 +50,7 @@ struct FoldCandidate {
     } else if (FoldOp->isFI()) {
       FrameIndexToFold = FoldOp->getIndex();
     } else {
-      assert(FoldOp->isReg());
+      assert(FoldOp->isReg() || FoldOp->isGlobal());
       OpToFold = FoldOp;
     }
   }
@@ -68,6 +67,8 @@ struct FoldCandidate {
     return Kind == MachineOperand::MO_Register;
   }
 
+  bool isGlobal() const { return Kind == MachineOperand::MO_GlobalAddress; }
+
   bool isCommuted() const {
     return Commuted;
   }
@@ -88,10 +89,11 @@ public:
   const SIInstrInfo *TII;
   const SIRegisterInfo *TRI;
   const GCNSubtarget *ST;
+  const SIMachineFunctionInfo *MFI;
 
   void foldOperand(MachineOperand &OpToFold,
                    MachineInstr *UseMI,
-                   unsigned UseOpIdx,
+                   int UseOpIdx,
                    SmallVectorImpl<FoldCandidate> &FoldList,
                    SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
 
@@ -160,19 +162,34 @@ static bool isInlineConstantIfFolded(const SIInstrInfo *TII,
   }
 }
 
+// TODO: Add heuristic that the frame index might not fit in the addressing mode
+// immediate offset to avoid materializing in loops.
+static bool frameIndexMayFold(const SIInstrInfo *TII,
+                              const MachineInstr &UseMI,
+                              int OpNo,
+                              const MachineOperand &OpToFold) {
+  return OpToFold.isFI() &&
+    (TII->isMUBUF(UseMI) || TII->isFLATScratch(UseMI)) &&
+    OpNo == AMDGPU::getNamedOperandIdx(UseMI.getOpcode(), AMDGPU::OpName::vaddr);
+}
+
 FunctionPass *llvm::createSIFoldOperandsPass() {
   return new SIFoldOperands();
 }
 
 static bool updateOperand(FoldCandidate &Fold,
                           const SIInstrInfo &TII,
-                          const TargetRegisterInfo &TRI) {
+                          const TargetRegisterInfo &TRI,
+                          const GCNSubtarget &ST) {
   MachineInstr *MI = Fold.UseMI;
   MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
   assert(Old.isReg());
 
   if (Fold.isImm()) {
-    if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked) {
+    if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked &&
+        !(MI->getDesc().TSFlags & SIInstrFlags::IsMAI) &&
+        AMDGPU::isInlinableLiteralV216(static_cast<uint16_t>(Fold.ImmToFold),
+                                       ST.hasInv2PiInlineImm())) {
       // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
       // already set.
       unsigned Opcode = MI->getOpcode();
@@ -190,77 +207,94 @@ static bool updateOperand(FoldCandidate &Fold,
       unsigned Val = Mod.getImm();
       if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1))
         return false;
-      // If upper part is all zero we do not need op_sel_hi.
-      if (!isUInt<16>(Fold.ImmToFold)) {
-        if (!(Fold.ImmToFold & 0xffff)) {
-          Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
+      // Only apply the following transformation if that operand requries
+      // a packed immediate.
+      switch (TII.get(Opcode).OpInfo[OpNo].OperandType) {
+      case AMDGPU::OPERAND_REG_IMM_V2FP16:
+      case AMDGPU::OPERAND_REG_IMM_V2INT16:
+      case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+      case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+        // If upper part is all zero we do not need op_sel_hi.
+        if (!isUInt<16>(Fold.ImmToFold)) {
+          if (!(Fold.ImmToFold & 0xffff)) {
+            Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
+            Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
+            Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
+            return true;
+          }
           Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
-          Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
+          Old.ChangeToImmediate(Fold.ImmToFold & 0xffff);
           return true;
         }
-        Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
+        break;
+      default:
+        break;
       }
     }
+  }
 
-    if (Fold.needsShrink()) {
-      MachineBasicBlock *MBB = MI->getParent();
-      auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
-      if (Liveness != MachineBasicBlock::LQR_Dead)
-        return false;
-
-      MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-      int Op32 = Fold.getShrinkOpcode();
-      MachineOperand &Dst0 = MI->getOperand(0);
-      MachineOperand &Dst1 = MI->getOperand(1);
-      assert(Dst0.isDef() && Dst1.isDef());
-
-      bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
+  if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
+    MachineBasicBlock *MBB = MI->getParent();
+    auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
+    if (Liveness != MachineBasicBlock::LQR_Dead)
+      return false;
 
-      const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
-      unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
-      const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
-      unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC);
+    MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+    int Op32 = Fold.getShrinkOpcode();
+    MachineOperand &Dst0 = MI->getOperand(0);
+    MachineOperand &Dst1 = MI->getOperand(1);
+    assert(Dst0.isDef() && Dst1.isDef());
 
-      MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
+    bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
 
-      if (HaveNonDbgCarryUse) {
-        BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
-          .addReg(AMDGPU::VCC, RegState::Kill);
-      }
+    const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
+    unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
 
-      // Keep the old instruction around to avoid breaking iterators, but
-      // replace the outputs with dummy registers.
-      Dst0.setReg(NewReg0);
-      Dst1.setReg(NewReg1);
+    MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
 
-      if (Fold.isCommuted())
-        TII.commuteInstruction(*Inst32, false);
-      return true;
+    if (HaveNonDbgCarryUse) {
+      BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
+        .addReg(AMDGPU::VCC, RegState::Kill);
     }
 
-    Old.ChangeToImmediate(Fold.ImmToFold);
+    // Keep the old instruction around to avoid breaking iterators, but
+    // replace it with a dummy instruction to remove uses.
+    //
+    // FIXME: We should not invert how this pass looks at operands to avoid
+    // this. Should track set of foldable movs instead of looking for uses
+    // when looking at a use.
+    Dst0.setReg(NewReg0);
+    for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
+      MI->RemoveOperand(I);
+    MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
+
+    if (Fold.isCommuted())
+      TII.commuteInstruction(*Inst32, false);
     return true;
   }
 
   assert(!Fold.needsShrink() && "not handled");
 
-  if (Fold.isFI()) {
-    Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
+  if (Fold.isImm()) {
+    Old.ChangeToImmediate(Fold.ImmToFold);
     return true;
   }
 
-  MachineOperand *New = Fold.OpToFold;
-  if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
-      TargetRegisterInfo::isVirtualRegister(New->getReg())) {
-    Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
-
-    Old.setIsUndef(New->isUndef());
+  if (Fold.isGlobal()) {
+    Old.ChangeToGA(Fold.OpToFold->getGlobal(), Fold.OpToFold->getOffset(),
+                   Fold.OpToFold->getTargetFlags());
     return true;
   }
 
-  // FIXME: Handle physical registers.
+  if (Fold.isFI()) {
+    Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
+    return true;
+  }
 
-  return false;
+  MachineOperand *New = Fold.OpToFold;
+  Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
+  Old.setIsUndef(New->isUndef());
+  return true;
 }
 
 static bool isUseMIInFoldList(ArrayRef<FoldCandidate> FoldList,
@@ -277,7 +311,6 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
                              MachineOperand *OpToFold,
                              const SIInstrInfo *TII) {
   if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
-
     // Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
     unsigned Opc = MI->getOpcode();
     if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
@@ -344,7 +377,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
       if ((Opc == AMDGPU::V_ADD_I32_e64 ||
            Opc == AMDGPU::V_SUB_I32_e64 ||
            Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
-          OpToFold->isImm()) {
+          (OpToFold->isImm() || OpToFold->isFI() || OpToFold->isGlobal())) {
         MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
 
         // Verify the other operand is a VGPR, otherwise we would violate the
@@ -357,7 +390,10 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
 
         assert(MI->getOperand(1).isDef());
 
-        int Op32 =  AMDGPU::getVOPe32(Opc);
+        // Make sure to get the 32-bit version of the commuted opcode.
+        unsigned MaybeCommutedOpc = MI->getOpcode();
+        int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
+
         FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
                                          Op32));
         return true;
@@ -384,10 +420,75 @@ static bool isUseSafeToFold(const SIInstrInfo *TII,
   //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
 }
 
+static bool tryToFoldACImm(const SIInstrInfo *TII,
+                           const MachineOperand &OpToFold,
+                           MachineInstr *UseMI,
+                           unsigned UseOpIdx,
+                           SmallVectorImpl<FoldCandidate> &FoldList) {
+  const MCInstrDesc &Desc = UseMI->getDesc();
+  const MCOperandInfo *OpInfo = Desc.OpInfo;
+  if (!OpInfo || UseOpIdx >= Desc.getNumOperands())
+    return false;
+
+  uint8_t OpTy = OpInfo[UseOpIdx].OperandType;
+  if (OpTy < AMDGPU::OPERAND_REG_INLINE_AC_FIRST ||
+      OpTy > AMDGPU::OPERAND_REG_INLINE_AC_LAST)
+    return false;
+
+  if (OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpTy)) {
+    UseMI->getOperand(UseOpIdx).ChangeToImmediate(OpToFold.getImm());
+    return true;
+  }
+
+  if (!OpToFold.isReg())
+    return false;
+
+  unsigned UseReg = OpToFold.getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(UseReg))
+    return false;
+
+  if (llvm::find_if(FoldList, [UseMI](const FoldCandidate &FC) {
+        return FC.UseMI == UseMI; }) != FoldList.end())
+    return false;
+
+  MachineRegisterInfo &MRI = UseMI->getParent()->getParent()->getRegInfo();
+  const MachineInstr *Def = MRI.getUniqueVRegDef(UseReg);
+  if (!Def || !Def->isRegSequence())
+    return false;
+
+  int64_t Imm;
+  MachineOperand *Op;
+  for (unsigned I = 1, E = Def->getNumExplicitOperands(); I < E; I += 2) {
+    const MachineOperand &Sub = Def->getOperand(I);
+    if (!Sub.isReg() || Sub.getSubReg())
+      return false;
+    MachineInstr *SubDef = MRI.getUniqueVRegDef(Sub.getReg());
+    while (SubDef && !SubDef->isMoveImmediate() &&
+           !SubDef->getOperand(1).isImm() && TII->isFoldableCopy(*SubDef))
+      SubDef = MRI.getUniqueVRegDef(SubDef->getOperand(1).getReg());
+    if (!SubDef || !SubDef->isMoveImmediate() || !SubDef->getOperand(1).isImm())
+      return false;
+    Op = &SubDef->getOperand(1);
+    auto SubImm = Op->getImm();
+    if (I == 1) {
+      if (!TII->isInlineConstant(SubDef->getOperand(1), OpTy))
+        return false;
+
+      Imm = SubImm;
+      continue;
+    }
+    if (Imm != SubImm)
+      return false; // Can only fold splat constants
+  }
+
+  FoldList.push_back(FoldCandidate(UseMI, UseOpIdx, Op));
+  return true;
+}
+
 void SIFoldOperands::foldOperand(
   MachineOperand &OpToFold,
   MachineInstr *UseMI,
-  unsigned UseOpIdx,
+  int UseOpIdx,
   SmallVectorImpl<FoldCandidate> &FoldList,
   SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
   const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
@@ -420,11 +521,18 @@ void SIFoldOperands::foldOperand(
     unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
     unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
 
+    MachineRegisterInfo::use_iterator Next;
     for (MachineRegisterInfo::use_iterator
            RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end();
-         RSUse != RSE; ++RSUse) {
+         RSUse != RSE; RSUse = Next) {
+      Next = std::next(RSUse);
 
       MachineInstr *RSUseMI = RSUse->getParent();
+
+      if (tryToFoldACImm(TII, UseMI->getOperand(0), RSUseMI,
+                         RSUse.getOperandNo(), FoldList))
+        continue;
+
       if (RSUse->getSubReg() != RegSeqDstSubReg)
         continue;
 
@@ -435,10 +543,32 @@ void SIFoldOperands::foldOperand(
     return;
   }
 
+  if (tryToFoldACImm(TII, OpToFold, UseMI, UseOpIdx, FoldList))
+    return;
 
-  bool FoldingImm = OpToFold.isImm();
+  if (frameIndexMayFold(TII, *UseMI, UseOpIdx, OpToFold)) {
+    // Sanity check that this is a stack access.
+    // FIXME: Should probably use stack pseudos before frame lowering.
+    MachineOperand *SOff = TII->getNamedOperand(*UseMI, AMDGPU::OpName::soffset);
+    if (!SOff->isReg() || (SOff->getReg() != MFI->getScratchWaveOffsetReg() &&
+                           SOff->getReg() != MFI->getStackPtrOffsetReg()))
+      return;
+
+    if (TII->getNamedOperand(*UseMI, AMDGPU::OpName::srsrc)->getReg() !=
+        MFI->getScratchRSrcReg())
+      return;
 
-  if (FoldingImm && UseMI->isCopy()) {
+    // A frame index will resolve to a positive constant, so it should always be
+    // safe to fold the addressing mode, even pre-GFX9.
+    UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getIndex());
+    SOff->setReg(MFI->getStackPtrOffsetReg());
+    return;
+  }
+
+  bool FoldingImmLike =
+      OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
+
+  if (FoldingImmLike && UseMI->isCopy()) {
     unsigned DestReg = UseMI->getOperand(0).getReg();
     const TargetRegisterClass *DestRC
       = TargetRegisterInfo::isVirtualRegister(DestReg) ?
@@ -449,7 +579,7 @@ void SIFoldOperands::foldOperand(
     if (TargetRegisterInfo::isVirtualRegister(DestReg) &&
       TargetRegisterInfo::isVirtualRegister(SrcReg)) {
       const TargetRegisterClass * SrcRC = MRI->getRegClass(SrcReg);
-      if (TRI->isSGPRClass(SrcRC) && TRI->hasVGPRs(DestRC)) {
+      if (TRI->isSGPRClass(SrcRC) && TRI->hasVectorRegisters(DestRC)) {
         MachineRegisterInfo::use_iterator NextUse;
         SmallVector<FoldCandidate, 4> CopyUses;
         for (MachineRegisterInfo::use_iterator
@@ -467,6 +597,14 @@ void SIFoldOperands::foldOperand(
       }
     }
 
+    if (DestRC == &AMDGPU::AGPR_32RegClass &&
+        TII->isInlineConstant(OpToFold, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
+      UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32));
+      UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
+      CopiesToReplace.push_back(UseMI);
+      return;
+    }
+
     // In order to fold immediates into copies, we need to change the
     // copy to a MOV.
 
@@ -479,18 +617,71 @@ void SIFoldOperands::foldOperand(
   } else {
     if (UseMI->isCopy() && OpToFold.isReg() &&
         TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(0).getReg()) &&
-        TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(1).getReg()) &&
-        TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) &&
-        TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()) &&
+        TRI->isVectorRegister(*MRI, UseMI->getOperand(0).getReg()) &&
+        TRI->isVectorRegister(*MRI, UseMI->getOperand(1).getReg()) &&
         !UseMI->getOperand(1).getSubReg()) {
+      unsigned Size = TII->getOpSize(*UseMI, 1);
       UseMI->getOperand(1).setReg(OpToFold.getReg());
       UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
       UseMI->getOperand(1).setIsKill(false);
       CopiesToReplace.push_back(UseMI);
       OpToFold.setIsKill(false);
+      if (Size != 4)
+        return;
+      if (TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) &&
+          TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()))
+        UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32));
+      else if (TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) &&
+               TRI->isAGPR(*MRI, UseMI->getOperand(1).getReg()))
+        UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_READ_B32));
       return;
     }
 
+    unsigned UseOpc = UseMI->getOpcode();
+    if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 ||
+        (UseOpc == AMDGPU::V_READLANE_B32 &&
+         (int)UseOpIdx ==
+         AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) {
+      // %vgpr = V_MOV_B32 imm
+      // %sgpr = V_READFIRSTLANE_B32 %vgpr
+      // =>
+      // %sgpr = S_MOV_B32 imm
+      if (FoldingImmLike) {
+        if (execMayBeModifiedBeforeUse(*MRI,
+                                       UseMI->getOperand(UseOpIdx).getReg(),
+                                       *OpToFold.getParent(),
+                                       *UseMI))
+          return;
+
+        UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32));
+
+        // FIXME: ChangeToImmediate should clear subreg
+        UseMI->getOperand(1).setSubReg(0);
+        if (OpToFold.isImm())
+          UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
+        else
+          UseMI->getOperand(1).ChangeToFrameIndex(OpToFold.getIndex());
+        UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
+        return;
+      }
+
+      if (OpToFold.isReg() && TRI->isSGPRReg(*MRI, OpToFold.getReg())) {
+        if (execMayBeModifiedBeforeUse(*MRI,
+                                       UseMI->getOperand(UseOpIdx).getReg(),
+                                       *OpToFold.getParent(),
+                                       *UseMI))
+          return;
+
+        // %vgpr = COPY %sgpr0
+        // %sgpr1 = V_READFIRSTLANE_B32 %vgpr
+        // =>
+        // %sgpr1 = COPY %sgpr0
+        UseMI->setDesc(TII->get(AMDGPU::COPY));
+        UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
+        return;
+      }
+    }
+
     const MCInstrDesc &UseDesc = UseMI->getDesc();
 
     // Don't fold into target independent nodes.  Target independent opcodes
@@ -501,7 +692,7 @@ void SIFoldOperands::foldOperand(
       return;
   }
 
-  if (!FoldingImm) {
+  if (!FoldingImmLike) {
     tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
 
     // FIXME: We could try to change the instruction from 64-bit to 32-bit
@@ -515,14 +706,10 @@ void SIFoldOperands::foldOperand(
   const TargetRegisterClass *FoldRC =
     TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);
 
-
   // Split 64-bit constants into 32-bits for folding.
   if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
     unsigned UseReg = UseOp.getReg();
-    const TargetRegisterClass *UseRC
-      = TargetRegisterInfo::isVirtualRegister(UseReg) ?
-      MRI->getRegClass(UseReg) :
-      TRI->getPhysRegClass(UseReg);
+    const TargetRegisterClass *UseRC = MRI->getRegClass(UseReg);
 
     if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
       return;
@@ -763,14 +950,23 @@ static bool tryFoldInst(const SIInstrInfo *TII,
       Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) {
     const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
     const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1);
-    if (Src1->isIdenticalTo(*Src0)) {
+    int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
+    int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
+    if (Src1->isIdenticalTo(*Src0) &&
+        (Src1ModIdx == -1 || !MI->getOperand(Src1ModIdx).getImm()) &&
+        (Src0ModIdx == -1 || !MI->getOperand(Src0ModIdx).getImm())) {
       LLVM_DEBUG(dbgs() << "Folded " << *MI << " into ");
+      auto &NewDesc =
+          TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY : getMovOpc(false));
       int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
       if (Src2Idx != -1)
         MI->RemoveOperand(Src2Idx);
       MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
-      mutateCopyOp(*MI, TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY
-                                               : getMovOpc(false)));
+      if (Src1ModIdx != -1)
+        MI->RemoveOperand(Src1ModIdx);
+      if (Src0ModIdx != -1)
+        MI->RemoveOperand(Src0ModIdx);
+      mutateCopyOp(*MI, NewDesc);
       LLVM_DEBUG(dbgs() << *MI << '\n');
       return true;
     }
@@ -788,7 +984,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
   SmallVector<FoldCandidate, 4> FoldList;
   MachineOperand &Dst = MI.getOperand(0);
 
-  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
+  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
   if (FoldingImm) {
     unsigned NumLiteralUses = 0;
     MachineOperand *NonInlineUse = nullptr;
@@ -840,6 +1036,9 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
       // in some cases. A better heuristic is needed.
       if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) {
         foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
+      } else if (frameIndexMayFold(TII, *UseMI, OpNo, OpToFold)) {
+        foldOperand(OpToFold, UseMI, OpNo, FoldList,
+                    CopiesToReplace);
       } else {
         if (++NumLiteralUses == 1) {
           NonInlineUse = &*Use;
@@ -874,7 +1073,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
     Copy->addImplicitDefUseOperands(*MF);
 
   for (FoldCandidate &Fold : FoldList) {
-    if (updateOperand(Fold, *TII, *TRI)) {
+    if (updateOperand(Fold, *TII, *TRI, *ST)) {
       // Clear kill flags.
       if (Fold.isReg()) {
         assert(Fold.OpToFold && Fold.OpToFold->isReg());
@@ -926,7 +1125,8 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
 
     // Having a 0 op_sel_hi would require swizzling the output in the source
     // instruction, which we can't do.
-    unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1 : 0;
+    unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1
+                                                      : 0u;
     if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
       return nullptr;
     return Src0;
@@ -1105,13 +1305,13 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
   ST = &MF.getSubtarget<GCNSubtarget>();
   TII = ST->getInstrInfo();
   TRI = &TII->getRegisterInfo();
-
-  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  MFI = MF.getInfo<SIMachineFunctionInfo>();
 
   // omod is ignored by hardware if IEEE bit is enabled. omod also does not
   // correctly handle signed zeros.
   //
-  bool IsIEEEMode = ST->enableIEEEBit(MF);
+  // FIXME: Also need to check strictfp
+  bool IsIEEEMode = MFI->getMode().IEEE;
   bool HasNSZ = MFI->hasNoSignedZerosFPMath();
 
   for (MachineBasicBlock *MBB : depth_first(&MF)) {
@@ -1132,7 +1332,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
       }
 
       MachineOperand &OpToFold = MI.getOperand(1);
-      bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
+      bool FoldingImm =
+          OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
 
       // FIXME: We could also be folding things like TargetIndexes.
       if (!FoldingImm && !OpToFold.isReg())
diff --git a/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index aa976d5141f8..f3c9ad63a80a 100644
--- a/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -1,9 +1,8 @@
 //===-- SIFormMemoryClauses.cpp -------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -119,6 +118,17 @@ static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause) {
     return false;
   if (!IsVMEMClause && !isSMEMClauseInst(MI))
     return false;
+  // If this is a load instruction where the result has been coalesced with an operand, then we cannot clause it.
+  for (const MachineOperand &ResMO : MI.defs()) {
+    unsigned ResReg = ResMO.getReg();
+    for (const MachineOperand &MO : MI.uses()) {
+      if (!MO.isReg() || MO.isDef())
+        continue;
+      if (MO.getReg() == ResReg)
+        return false;
+    }
+    break; // Only check the first def.
+  }
   return true;
 }
 
@@ -309,6 +319,8 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
 
   MaxVGPRs = TRI->getAllocatableSet(MF, &AMDGPU::VGPR_32RegClass).count();
   MaxSGPRs = TRI->getAllocatableSet(MF, &AMDGPU::SGPR_32RegClass).count();
+  unsigned FuncMaxClause = AMDGPU::getIntegerAttribute(
+      MF.getFunction(), "amdgpu-max-memory-clause", MaxClause);
 
   for (MachineBasicBlock &MBB : MF) {
     MachineBasicBlock::instr_iterator Next;
@@ -329,7 +341,7 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
         continue;
 
       unsigned Length = 1;
-      for ( ; Next != E && Length < MaxClause; ++Next) {
+      for ( ; Next != E && Length < FuncMaxClause; ++Next) {
         if (!isValidClauseInst(*Next, IsVMEM))
           break;
 
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp
index e4633c88e18f..feab6bed2603 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1,9 +1,8 @@
 //===----------------------- SIFrameLowering.cpp --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //==-----------------------------------------------------------------------===//
 
@@ -22,6 +21,8 @@
 
 using namespace llvm;
 
+#define DEBUG_TYPE "frame-info"
+
 
 static ArrayRef<MCPhysReg> getAllSGPR128(const GCNSubtarget &ST,
                                          const MachineFunction &MF) {
@@ -35,6 +36,150 @@ static ArrayRef<MCPhysReg> getAllSGPRs(const GCNSubtarget &ST,
                       ST.getMaxNumSGPRs(MF));
 }
 
+// Find a scratch register that we can use at the start of the prologue to
+// re-align the stack pointer. We avoid using callee-save registers since they
+// may appear to be free when this is called from canUseAsPrologue (during
+// shrink wrapping), but then no longer be free when this is called from
+// emitPrologue.
+//
+// FIXME: This is a bit conservative, since in the above case we could use one
+// of the callee-save registers as a scratch temp to re-align the stack pointer,
+// but we would then have to make sure that we were in fact saving at least one
+// callee-save register in the prologue, which is additional complexity that
+// doesn't seem worth the benefit.
+static unsigned findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
+                                                 LivePhysRegs &LiveRegs,
+                                                 const TargetRegisterClass &RC,
+                                                 bool Unused = false) {
+  // Mark callee saved registers as used so we will not choose them.
+  const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
+  for (unsigned i = 0; CSRegs[i]; ++i)
+    LiveRegs.addReg(CSRegs[i]);
+
+  if (Unused) {
+    // We are looking for a register that can be used throughout the entire
+    // function, so any use is unacceptable.
+    for (unsigned Reg : RC) {
+      if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
+        return Reg;
+    }
+  } else {
+    for (unsigned Reg : RC) {
+      if (LiveRegs.available(MRI, Reg))
+        return Reg;
+    }
+  }
+
+  // If we require an unused register, this is used in contexts where failure is
+  // an option and has an alternative plan. In other contexts, this must
+  // succeed0.
+  if (!Unused)
+    report_fatal_error("failed to find free scratch register");
+
+  return AMDGPU::NoRegister;
+}
+
+static MCPhysReg findUnusedSGPRNonCalleeSaved(MachineRegisterInfo &MRI) {
+  LivePhysRegs LiveRegs;
+  LiveRegs.init(*MRI.getTargetRegisterInfo());
+  return findScratchNonCalleeSaveRegister(
+    MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
+}
+
+// We need to specially emit stack operations here because a different frame
+// register is used than in the rest of the function, as getFrameRegister would
+// use.
+static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator I,
+                             const SIInstrInfo *TII, unsigned SpillReg,
+                             unsigned ScratchRsrcReg, unsigned SPReg, int FI) {
+  MachineFunction *MF = MBB.getParent();
+  MachineFrameInfo &MFI = MF->getFrameInfo();
+
+  int64_t Offset = MFI.getObjectOffset(FI);
+
+  MachineMemOperand *MMO = MF->getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4,
+      MFI.getObjectAlignment(FI));
+
+  if (isUInt<12>(Offset)) {
+    BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET))
+      .addReg(SpillReg, RegState::Kill)
+      .addReg(ScratchRsrcReg)
+      .addReg(SPReg)
+      .addImm(Offset)
+      .addImm(0) // glc
+      .addImm(0) // slc
+      .addImm(0) // tfe
+      .addImm(0) // dlc
+      .addMemOperand(MMO);
+    return;
+  }
+
+  MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
+    MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
+
+  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
+    .addImm(Offset);
+
+  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN))
+    .addReg(SpillReg, RegState::Kill)
+    .addReg(OffsetReg, RegState::Kill)
+    .addReg(ScratchRsrcReg)
+    .addReg(SPReg)
+    .addImm(0)
+    .addImm(0) // glc
+    .addImm(0) // slc
+    .addImm(0) // tfe
+    .addImm(0) // dlc
+    .addMemOperand(MMO);
+}
+
+static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I,
+                              const SIInstrInfo *TII, unsigned SpillReg,
+                              unsigned ScratchRsrcReg, unsigned SPReg, int FI) {
+  MachineFunction *MF = MBB.getParent();
+  MachineFrameInfo &MFI = MF->getFrameInfo();
+  int64_t Offset = MFI.getObjectOffset(FI);
+
+  MachineMemOperand *MMO = MF->getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 4,
+      MFI.getObjectAlignment(FI));
+
+  if (isUInt<12>(Offset)) {
+    BuildMI(MBB, I, DebugLoc(),
+            TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET), SpillReg)
+      .addReg(ScratchRsrcReg)
+      .addReg(SPReg)
+      .addImm(Offset)
+      .addImm(0) // glc
+      .addImm(0) // slc
+      .addImm(0) // tfe
+      .addImm(0) // dlc
+      .addMemOperand(MMO);
+    return;
+  }
+
+  MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
+    MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
+
+  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
+    .addImm(Offset);
+
+  BuildMI(MBB, I, DebugLoc(),
+          TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), SpillReg)
+    .addReg(OffsetReg, RegState::Kill)
+    .addReg(ScratchRsrcReg)
+    .addReg(SPReg)
+    .addImm(0)
+    .addImm(0) // glc
+    .addImm(0) // slc
+    .addImm(0) // tfe
+    .addImm(0) // dlc
+    .addMemOperand(MMO);
+}
+
 void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST,
                                           MachineFunction &MF,
                                           MachineBasicBlock &MBB) const {
@@ -71,6 +216,24 @@ void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST,
 
   // Do a 64-bit pointer add.
   if (ST.flatScratchIsPointer()) {
+    if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
+        .addReg(FlatScrInitLo)
+        .addReg(ScratchWaveOffsetReg);
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
+        .addReg(FlatScrInitHi)
+        .addImm(0);
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
+        addReg(FlatScrInitLo).
+        addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
+                       (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
+        addReg(FlatScrInitHi).
+        addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
+                       (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
+      return;
+    }
+
     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
       .addReg(FlatScrInitLo)
       .addReg(ScratchWaveOffsetReg);
@@ -81,6 +244,8 @@ void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST,
     return;
   }
 
+  assert(ST.getGeneration() < AMDGPUSubtarget::GFX10);
+
   // Copy the size in bytes.
   BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
     .addReg(FlatScrInitHi, RegState::Kill);
@@ -145,34 +310,30 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
   return ScratchRsrcReg;
 }
 
-// Shift down registers reserved for the scratch wave offset and stack pointer
-// SGPRs.
-std::pair<unsigned, unsigned>
+// Shift down registers reserved for the scratch wave offset.
+std::pair<unsigned, bool>
 SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
-  const GCNSubtarget &ST,
-  const SIInstrInfo *TII,
-  const SIRegisterInfo *TRI,
-  SIMachineFunctionInfo *MFI,
-  MachineFunction &MF) const {
+    const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
+    SIMachineFunctionInfo *MFI, MachineFunction &MF) const {
   MachineRegisterInfo &MRI = MF.getRegInfo();
   unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
 
+  assert(MFI->isEntryFunction());
+
   // No replacement necessary.
   if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
-      !MRI.isPhysRegUsed(ScratchWaveOffsetReg)) {
-    assert(MFI->getStackPtrOffsetReg() == AMDGPU::SP_REG);
-    return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister);
+      (!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) {
+    return std::make_pair(AMDGPU::NoRegister, false);
   }
 
-  unsigned SPReg = MFI->getStackPtrOffsetReg();
   if (ST.hasSGPRInitBug())
-    return std::make_pair(ScratchWaveOffsetReg, SPReg);
+    return std::make_pair(ScratchWaveOffsetReg, false);
 
   unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
 
   ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
   if (NumPreloaded > AllSGPRs.size())
-    return std::make_pair(ScratchWaveOffsetReg, SPReg);
+    return std::make_pair(ScratchWaveOffsetReg, false);
 
   AllSGPRs = AllSGPRs.slice(NumPreloaded);
 
@@ -193,10 +354,11 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
   unsigned ReservedRegCount = 13;
 
   if (AllSGPRs.size() < ReservedRegCount)
-    return std::make_pair(ScratchWaveOffsetReg, SPReg);
+    return std::make_pair(ScratchWaveOffsetReg, false);
 
   bool HandledScratchWaveOffsetReg =
     ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
+  bool FPAdjusted = false;
 
   for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) {
     // Pick the first unallocated SGPR. Be careful not to pick an alias of the
@@ -206,24 +368,25 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
         HandledScratchWaveOffsetReg = true;
 
         MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
+        if (MFI->getScratchWaveOffsetReg() == MFI->getStackPtrOffsetReg()) {
+          assert(!hasFP(MF));
+          MFI->setStackPtrOffsetReg(Reg);
+        }
+
         MFI->setScratchWaveOffsetReg(Reg);
+        MFI->setFrameOffsetReg(Reg);
         ScratchWaveOffsetReg = Reg;
+        FPAdjusted = true;
         break;
       }
     }
   }
 
-  return std::make_pair(ScratchWaveOffsetReg, SPReg);
+  return std::make_pair(ScratchWaveOffsetReg, FPAdjusted);
 }
 
 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
                                                 MachineBasicBlock &MBB) const {
-  // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
-  // specified.
-  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-  if (ST.debuggerEmitPrologue())
-    emitDebuggerPrologue(MF, MBB);
-
   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
 
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -234,6 +397,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
   // FIXME: We should be cleaning up these unused SGPR spill frame indices
   // somewhere.
 
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
   MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -251,38 +415,13 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
   if (MFI->hasFlatScratchInit())
     emitFlatScratchInit(ST, MF, MBB);
 
-  unsigned SPReg = MFI->getStackPtrOffsetReg();
-  if (SPReg != AMDGPU::SP_REG) {
-    assert(MRI.isReserved(SPReg) && "SPReg used but not reserved");
-
-    DebugLoc DL;
-    const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
-    int64_t StackSize = FrameInfo.getStackSize();
-
-    if (StackSize == 0) {
-      BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg)
-        .addReg(MFI->getScratchWaveOffsetReg());
-    } else {
-      BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
-        .addReg(MFI->getScratchWaveOffsetReg())
-        .addImm(StackSize * ST.getWavefrontSize());
-    }
-  }
-
   unsigned ScratchRsrcReg
     = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
 
   unsigned ScratchWaveOffsetReg;
-  std::tie(ScratchWaveOffsetReg, SPReg)
-    = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
-
-  // It's possible to have uses of only ScratchWaveOffsetReg without
-  // ScratchRsrcReg if it's only used for the initialization of flat_scratch,
-  // but the inverse is not true.
-  if (ScratchWaveOffsetReg == AMDGPU::NoRegister) {
-    assert(ScratchRsrcReg == AMDGPU::NoRegister);
-    return;
-  }
+  bool FPAdjusted;
+  std::tie(ScratchWaveOffsetReg, FPAdjusted) =
+      getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
 
   // We need to insert initialization of the scratch resource descriptor.
   unsigned PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
@@ -294,18 +433,19 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
       AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
   }
 
-  bool OffsetRegUsed = MRI.isPhysRegUsed(ScratchWaveOffsetReg);
+  bool OffsetRegUsed = ScratchWaveOffsetReg != AMDGPU::NoRegister &&
+                       MRI.isPhysRegUsed(ScratchWaveOffsetReg);
   bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister &&
                          MRI.isPhysRegUsed(ScratchRsrcReg);
 
+  // FIXME: Hack to not crash in situations which emitted an error.
+  if (PreloadedScratchWaveOffsetReg == AMDGPU::NoRegister)
+    return;
+
   // We added live-ins during argument lowering, but since they were not used
   // they were deleted. We're adding the uses now, so add them back.
-  if (OffsetRegUsed) {
-    assert(PreloadedScratchWaveOffsetReg != AMDGPU::NoRegister &&
-           "scratch wave offset input is required");
-    MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
-    MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
-  }
+  MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
+  MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
 
   if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
     assert(ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F));
@@ -318,7 +458,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
     if (&OtherBB == &MBB)
       continue;
 
-    if (OffsetRegUsed)
+    if (OffsetRegUsed || FPAdjusted)
       OtherBB.addLiveIn(ScratchWaveOffsetReg);
 
     if (ResourceRegUsed)
@@ -346,11 +486,16 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
       .addReg(PreloadedPrivateBufferReg, RegState::Kill);
   }
 
-  if (OffsetRegUsed &&
-      PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) {
+  unsigned SPReg = MFI->getStackPtrOffsetReg();
+  assert(SPReg != AMDGPU::SP_REG);
+
+  // FIXME: Remove the isPhysRegUsed checks
+  const bool HasFP = hasFP(MF);
+
+  if (HasFP || OffsetRegUsed) {
+    assert(ScratchWaveOffsetReg);
     BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
-      .addReg(PreloadedScratchWaveOffsetReg,
-              MRI.isPhysRegUsed(ScratchWaveOffsetReg) ? 0 : RegState::Kill);
+      .addReg(PreloadedScratchWaveOffsetReg, HasFP ? RegState::Kill : 0);
   }
 
   if (CopyBuffer && !CopyBufferFirst) {
@@ -358,9 +503,26 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
       .addReg(PreloadedPrivateBufferReg, RegState::Kill);
   }
 
-  if (ResourceRegUsed)
+  if (ResourceRegUsed) {
     emitEntryFunctionScratchSetup(ST, MF, MBB, MFI, I,
         PreloadedPrivateBufferReg, ScratchRsrcReg);
+  }
+
+  if (HasFP) {
+    DebugLoc DL;
+    const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+    int64_t StackSize = FrameInfo.getStackSize();
+
+    // On kernel entry, the private scratch wave offset is the SP value.
+    if (StackSize == 0) {
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), SPReg)
+        .addReg(MFI->getScratchWaveOffsetReg());
+    } else {
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
+        .addReg(MFI->getScratchWaveOffsetReg())
+        .addImm(StackSize * ST.getWavefrontSize());
+    }
+  }
 }
 
 // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
@@ -405,7 +567,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
       }
     }
     MF.getRegInfo().addLiveIn(GitPtrLo);
-    MF.front().addLiveIn(GitPtrLo);
+    MBB.addLiveIn(GitPtrLo);
     BuildMI(MBB, I, DL, SMovB32, RsrcLo)
       .addReg(GitPtrLo)
       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
@@ -421,12 +583,15 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
                                        MachineMemOperand::MOLoad |
                                        MachineMemOperand::MOInvariant |
                                        MachineMemOperand::MODereferenceable,
-                                       0, 0);
+                                       16, 4);
     unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
+    const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
+    unsigned EncodedOffset = AMDGPU::getSMRDEncodedOffset(Subtarget, Offset);
     BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
       .addReg(Rsrc01)
-      .addImm(Offset) // offset
+      .addImm(EncodedOffset) // offset
       .addImm(0) // glc
+      .addImm(0) // dlc
       .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
       .addMemOperand(MMO);
     return;
@@ -462,13 +627,17 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
                                            MachineMemOperand::MOLoad |
                                            MachineMemOperand::MOInvariant |
                                            MachineMemOperand::MODereferenceable,
-                                           0, 0);
+                                           8, 4);
         BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
           .addReg(MFI->getImplicitBufferPtrUserSGPR())
           .addImm(0) // offset
           .addImm(0) // glc
+          .addImm(0) // dlc
           .addMemOperand(MMO)
           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+        MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
+        MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
       }
     } else {
       unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
@@ -494,38 +663,14 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
   }
 }
 
-// Find a scratch register that we can use at the start of the prologue to
-// re-align the stack pointer.  We avoid using callee-save registers since they
-// may appear to be free when this is called from canUseAsPrologue (during
-// shrink wrapping), but then no longer be free when this is called from
-// emitPrologue.
-//
-// FIXME: This is a bit conservative, since in the above case we could use one
-// of the callee-save registers as a scratch temp to re-align the stack pointer,
-// but we would then have to make sure that we were in fact saving at least one
-// callee-save register in the prologue, which is additional complexity that
-// doesn't seem worth the benefit.
-static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock &MBB) {
-  MachineFunction *MF = MBB.getParent();
-
-  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
-  const SIRegisterInfo &TRI = *Subtarget.getRegisterInfo();
-  LivePhysRegs LiveRegs(TRI);
-  LiveRegs.addLiveIns(MBB);
-
-  // Mark callee saved registers as used so we will not choose them.
-  const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF);
-  for (unsigned i = 0; CSRegs[i]; ++i)
-    LiveRegs.addReg(CSRegs[i]);
-
-  MachineRegisterInfo &MRI = MF->getRegInfo();
-
-  for (unsigned Reg : AMDGPU::SReg_32_XM0RegClass) {
-    if (LiveRegs.available(MRI, Reg))
-      return Reg;
+bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
+  switch (ID) {
+  case TargetStackID::Default:
+  case TargetStackID::NoAlloc:
+  case TargetStackID::SGPRSpill:
+    return true;
   }
-
-  return AMDGPU::NoRegister;
+  llvm_unreachable("Invalid TargetStackID::Value");
 }
 
 void SIFrameLowering::emitPrologue(MachineFunction &MF,
@@ -537,31 +682,105 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
   }
 
   const MachineFrameInfo &MFI = MF.getFrameInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
   const SIRegisterInfo &TRI = TII->getRegisterInfo();
 
   unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
   unsigned FramePtrReg = FuncInfo->getFrameOffsetReg();
+  LivePhysRegs LiveRegs;
 
   MachineBasicBlock::iterator MBBI = MBB.begin();
   DebugLoc DL;
 
-  // XXX - Is this the right predicate?
-
-  bool NeedFP = hasFP(MF);
+  bool HasFP = false;
   uint32_t NumBytes = MFI.getStackSize();
   uint32_t RoundedSize = NumBytes;
-  const bool NeedsRealignment = TRI.needsStackRealignment(MF);
+  // To avoid clobbering VGPRs in lanes that weren't active on function entry,
+  // turn on all lanes before doing the spill to memory.
+  unsigned ScratchExecCopy = AMDGPU::NoRegister;
+
+  // Emit the copy if we need an FP, and are using a free SGPR to save it.
+  if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) {
+    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy)
+      .addReg(FramePtrReg)
+      .setMIFlag(MachineInstr::FrameSetup);
+  }
+
+  for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
+         : FuncInfo->getSGPRSpillVGPRs()) {
+    if (!Reg.FI.hasValue())
+      continue;
+
+    if (ScratchExecCopy == AMDGPU::NoRegister) {
+      if (LiveRegs.empty()) {
+        LiveRegs.init(TRI);
+        LiveRegs.addLiveIns(MBB);
+        if (FuncInfo->SGPRForFPSaveRestoreCopy)
+          LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy);
+      }
+
+      ScratchExecCopy
+        = findScratchNonCalleeSaveRegister(MRI, LiveRegs,
+                                           *TRI.getWaveMaskRegClass());
+      assert(FuncInfo->SGPRForFPSaveRestoreCopy != ScratchExecCopy);
+
+      const unsigned OrSaveExec = ST.isWave32() ?
+        AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
+      BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec),
+              ScratchExecCopy)
+        .addImm(-1);
+    }
 
-  if (NeedsRealignment) {
-    assert(NeedFP);
+    buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
+                     FuncInfo->getScratchRSrcReg(),
+                     StackPtrReg,
+                     Reg.FI.getValue());
+  }
+
+  if (ScratchExecCopy != AMDGPU::NoRegister) {
+    // FIXME: Split block and make terminator.
+    unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
+    unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+    BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
+      .addReg(ScratchExecCopy, RegState::Kill);
+    LiveRegs.addReg(ScratchExecCopy);
+  }
+
+
+  if (FuncInfo->FramePointerSaveIndex) {
+    const int FI = FuncInfo->FramePointerSaveIndex.getValue();
+    assert(!MFI.isDeadObjectIndex(FI) &&
+           MFI.getStackID(FI) == TargetStackID::SGPRSpill);
+    ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill
+      = FuncInfo->getSGPRToVGPRSpills(FI);
+    assert(Spill.size() == 1);
+
+    // Save FP before setting it up.
+    // FIXME: This should respect spillSGPRToVGPR;
+    BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
+            Spill[0].VGPR)
+      .addReg(FramePtrReg)
+      .addImm(Spill[0].Lane)
+      .addReg(Spill[0].VGPR, RegState::Undef);
+  }
+
+  if (TRI.needsStackRealignment(MF)) {
+    HasFP = true;
     const unsigned Alignment = MFI.getMaxAlignment();
 
     RoundedSize += Alignment;
+    if (LiveRegs.empty()) {
+      LiveRegs.init(TRI);
+      LiveRegs.addLiveIns(MBB);
+      LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
+    }
 
-    unsigned ScratchSPReg = findScratchNonCalleeSaveRegister(MBB);
-    assert(ScratchSPReg != AMDGPU::NoRegister);
+    unsigned ScratchSPReg = findScratchNonCalleeSaveRegister(
+        MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass);
+    assert(ScratchSPReg != AMDGPU::NoRegister &&
+           ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy);
 
     // s_add_u32 tmp_reg, s32, NumBytes
     // s_and_b32 s32, tmp_reg, 0b111...0000
@@ -574,7 +793,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
       .addImm(-Alignment * ST.getWavefrontSize())
       .setMIFlag(MachineInstr::FrameSetup);
     FuncInfo->setIsStackRealigned(true);
-  } else if (NeedFP) {
+  } else if ((HasFP = hasFP(MF))) {
     // If we need a base pointer, set it up here. It's whatever the value of
     // the stack pointer is at this point. Any variable size objects will be
     // allocated after this, so we can still use the base pointer to reference
@@ -584,21 +803,20 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
       .setMIFlag(MachineInstr::FrameSetup);
   }
 
-  if (RoundedSize != 0 && hasSP(MF)) {
+  if (HasFP && RoundedSize != 0) {
     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
       .addReg(StackPtrReg)
       .addImm(RoundedSize * ST.getWavefrontSize())
       .setMIFlag(MachineInstr::FrameSetup);
   }
 
-  for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
-         : FuncInfo->getSGPRSpillVGPRs()) {
-    if (!Reg.FI.hasValue())
-      continue;
-    TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true,
-                             Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
-                             &TII->getRegisterInfo());
-  }
+  assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister ||
+                     FuncInfo->FramePointerSaveIndex)) &&
+         "Needed to save FP but didn't save it anywhere");
+
+  assert((HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU::NoRegister &&
+                    !FuncInfo->FramePointerSaveIndex)) &&
+         "Saved FP but didn't need it");
 }
 
 void SIFrameLowering::emitEpilogue(MachineFunction &MF,
@@ -609,39 +827,87 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
 
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+  LivePhysRegs LiveRegs;
+  DebugLoc DL;
+
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  uint32_t NumBytes = MFI.getStackSize();
+  uint32_t RoundedSize = FuncInfo->isStackRealigned() ?
+    NumBytes + MFI.getMaxAlignment() : NumBytes;
+
+  if (RoundedSize != 0 && hasFP(MF)) {
+    const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
+    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
+      .addReg(StackPtrReg)
+      .addImm(RoundedSize * ST.getWavefrontSize())
+      .setMIFlag(MachineInstr::FrameDestroy);
+  }
+
+  if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) {
+    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->getFrameOffsetReg())
+      .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
+      .setMIFlag(MachineInstr::FrameSetup);
+  }
+
+  if (FuncInfo->FramePointerSaveIndex) {
+    const int FI = FuncInfo->FramePointerSaveIndex.getValue();
+
+    assert(!MF.getFrameInfo().isDeadObjectIndex(FI) &&
+           MF.getFrameInfo().getStackID(FI) == TargetStackID::SGPRSpill);
+
+    ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill
+      = FuncInfo->getSGPRToVGPRSpills(FI);
+    assert(Spill.size() == 1);
+    BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
+            FuncInfo->getFrameOffsetReg())
+      .addReg(Spill[0].VGPR)
+      .addImm(Spill[0].Lane);
+  }
 
+  unsigned ScratchExecCopy = AMDGPU::NoRegister;
   for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
          : FuncInfo->getSGPRSpillVGPRs()) {
     if (!Reg.FI.hasValue())
       continue;
-    TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR,
-                              Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
-                              &TII->getRegisterInfo());
-  }
 
-  unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
-  if (StackPtrReg == AMDGPU::NoRegister)
-    return;
+    const SIRegisterInfo &TRI = TII->getRegisterInfo();
+    if (ScratchExecCopy == AMDGPU::NoRegister) {
+      // See emitPrologue
+      if (LiveRegs.empty()) {
+        LiveRegs.init(*ST.getRegisterInfo());
+        LiveRegs.addLiveOuts(MBB);
+        LiveRegs.stepBackward(*MBBI);
+      }
 
-  const MachineFrameInfo &MFI = MF.getFrameInfo();
-  uint32_t NumBytes = MFI.getStackSize();
+      ScratchExecCopy = findScratchNonCalleeSaveRegister(
+          MRI, LiveRegs, *TRI.getWaveMaskRegClass());
+      LiveRegs.removeReg(ScratchExecCopy);
 
-  DebugLoc DL;
+      const unsigned OrSaveExec =
+          ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
 
-  // FIXME: Clarify distinction between no set SP and SP. For callee functions,
-  // it's really whether we need SP to be accurate or not.
+      BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy)
+        .addImm(-1);
+    }
 
-  if (NumBytes != 0 && hasSP(MF)) {
-    uint32_t RoundedSize = FuncInfo->isStackRealigned() ?
-      NumBytes + MFI.getMaxAlignment() : NumBytes;
+    buildEpilogReload(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
+                      FuncInfo->getScratchRSrcReg(),
+                      FuncInfo->getStackPtrOffsetReg(), Reg.FI.getValue());
+  }
 
-    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
-      .addReg(StackPtrReg)
-      .addImm(RoundedSize * ST.getWavefrontSize());
+  if (ScratchExecCopy != AMDGPU::NoRegister) {
+    // FIXME: Split block and make terminator.
+    unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
+    unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+    BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
+      .addReg(ScratchExecCopy, RegState::Kill);
   }
 }
 
+// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
+// memory. They should have been removed by now.
 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
        I != E; ++I) {
@@ -652,6 +918,22 @@ static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
   return true;
 }
 
+#ifndef NDEBUG
+static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI,
+                                 Optional<int> FramePointerSaveIndex) {
+  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
+       I != E; ++I) {
+    if (!MFI.isDeadObjectIndex(I) &&
+        MFI.getStackID(I) == TargetStackID::SGPRSpill &&
+        FramePointerSaveIndex && I != FramePointerSaveIndex) {
+      return false;
+    }
+  }
+
+  return true;
+}
+#endif
+
 int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
                                             unsigned &FrameReg) const {
   const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
@@ -665,81 +947,145 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
   RegScavenger *RS) const {
   MachineFrameInfo &MFI = MF.getFrameInfo();
 
-  if (!MFI.hasStackObjects())
-    return;
-
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-  const SIInstrInfo *TII = ST.getInstrInfo();
-  const SIRegisterInfo &TRI = TII->getRegisterInfo();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
-  bool AllSGPRSpilledToVGPRs = false;
-
-  if (TRI.spillSGPRToVGPR() && FuncInfo->hasSpilledSGPRs()) {
-    AllSGPRSpilledToVGPRs = true;
-
-    // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
-    // are spilled to VGPRs, in which case we can eliminate the stack usage.
-    //
-    // XXX - This operates under the assumption that only other SGPR spills are
-    // users of the frame index. I'm not 100% sure this is correct. The
-    // StackColoring pass has a comment saying a future improvement would be to
-    // merging of allocas with spill slots, but for now according to
-    // MachineFrameInfo isSpillSlot can't alias any other object.
-    for (MachineBasicBlock &MBB : MF) {
-      MachineBasicBlock::iterator Next;
-      for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
-        MachineInstr &MI = *I;
-        Next = std::next(I);
-
-        if (TII->isSGPRSpill(MI)) {
-          int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
-          assert(MFI.getStackID(FI) == SIStackID::SGPR_SPILL);
-          if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
-            bool Spilled = TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
-            (void)Spilled;
-            assert(Spilled && "failed to spill SGPR to VGPR when allocated");
-          } else
-            AllSGPRSpilledToVGPRs = false;
-        }
-      }
-    }
 
-    FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
-  }
+  FuncInfo->removeDeadFrameIndices(MFI);
+  assert(allSGPRSpillsAreDead(MFI, None) &&
+         "SGPR spill should have been removed in SILowerSGPRSpills");
 
   // FIXME: The other checks should be redundant with allStackObjectsAreDead,
   // but currently hasNonSpillStackObjects is set only from source
   // allocas. Stack temps produced from legalization are not counted currently.
-  if (FuncInfo->hasNonSpillStackObjects() || FuncInfo->hasSpilledVGPRs() ||
-      !AllSGPRSpilledToVGPRs || !allStackObjectsAreDead(MFI)) {
+  if (!allStackObjectsAreDead(MFI)) {
     assert(RS && "RegScavenger required if spilling");
 
-    // We force this to be at offset 0 so no user object ever has 0 as an
-    // address, so we may use 0 as an invalid pointer value. This is because
-    // LLVM assumes 0 is an invalid pointer in address space 0. Because alloca
-    // is required to be address space 0, we are forced to accept this for
-    // now. Ideally we could have the stack in another address space with 0 as a
-    // valid pointer, and -1 as the null value.
-    //
-    // This will also waste additional space when user stack objects require > 4
-    // byte alignment.
-    //
-    // The main cost here is losing the offset for addressing modes. However
-    // this also ensures we shouldn't need a register for the offset when
-    // emergency scavenging.
-    int ScavengeFI = MFI.CreateFixedObject(
-      TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
-    RS->addScavengingFrameIndex(ScavengeFI);
+    if (FuncInfo->isEntryFunction()) {
+      int ScavengeFI = MFI.CreateFixedObject(
+        TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
+      RS->addScavengingFrameIndex(ScavengeFI);
+    } else {
+      int ScavengeFI = MFI.CreateStackObject(
+        TRI->getSpillSize(AMDGPU::SGPR_32RegClass),
+        TRI->getSpillAlignment(AMDGPU::SGPR_32RegClass),
+        false);
+      RS->addScavengingFrameIndex(ScavengeFI);
+    }
   }
 }
 
-void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+// Only report VGPRs to generic code.
+void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
+                                           BitVector &SavedVGPRs,
                                            RegScavenger *RS) const {
+  TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  if (MFI->isEntryFunction())
+    return;
+
+  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+  // Ignore the SGPRs the default implementation found.
+  SavedVGPRs.clearBitsNotInMask(TRI->getAllVGPRRegMask());
+
+  // hasFP only knows about stack objects that already exist. We're now
+  // determining the stack slots that will be created, so we have to predict
+  // them. Stack objects force FP usage with calls.
+  //
+  // Note a new VGPR CSR may be introduced if one is used for the spill, but we
+  // don't want to report it here.
+  //
+  // FIXME: Is this really hasReservedCallFrame?
+  const bool WillHaveFP =
+      FrameInfo.hasCalls() &&
+      (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
+
+  // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
+  // so don't allow the default insertion to handle them.
+  for (auto SSpill : MFI->getSGPRSpillVGPRs())
+    SavedVGPRs.reset(SSpill.VGPR);
+
+  const bool HasFP = WillHaveFP || hasFP(MF);
+  if (!HasFP)
+    return;
+
+  if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
+    int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
+                                                    TargetStackID::SGPRSpill);
+
+    // If there is already a VGPR with free lanes, use it. We may already have
+    // to pay the penalty for spilling a CSR VGPR.
+    if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
+      llvm_unreachable("allocate SGPR spill should have worked");
+
+    MFI->FramePointerSaveIndex = NewFI;
+
+    LLVM_DEBUG(
+      auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
+      dbgs() << "Spilling FP to  " << printReg(Spill.VGPR, TRI)
+             << ':' << Spill.Lane << '\n');
+    return;
+  }
+
+  MFI->SGPRForFPSaveRestoreCopy = findUnusedSGPRNonCalleeSaved(MF.getRegInfo());
+
+  if (!MFI->SGPRForFPSaveRestoreCopy) {
+    // There's no free lane to spill, and no free register to save FP, so we're
+    // forced to spill another VGPR to use for the spill.
+    int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
+                                                    TargetStackID::SGPRSpill);
+    if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
+      llvm_unreachable("allocate SGPR spill should have worked");
+    MFI->FramePointerSaveIndex = NewFI;
+
+    LLVM_DEBUG(
+      auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
+      dbgs() << "FP requires fallback spill to " << printReg(Spill.VGPR, TRI)
+             << ':' << Spill.Lane << '\n';);
+  } else {
+    LLVM_DEBUG(dbgs() << "Saving FP with copy to " <<
+               printReg(MFI->SGPRForFPSaveRestoreCopy, TRI) << '\n');
+  }
+}
+
+void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
+                                               BitVector &SavedRegs,
+                                               RegScavenger *RS) const {
   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  if (MFI->isEntryFunction())
+    return;
+
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
 
   // The SP is specifically managed and we don't want extra spills of it.
   SavedRegs.reset(MFI->getStackPtrOffsetReg());
+  SavedRegs.clearBitsInMask(TRI->getAllVGPRRegMask());
+}
+
+bool SIFrameLowering::assignCalleeSavedSpillSlots(
+    MachineFunction &MF, const TargetRegisterInfo *TRI,
+    std::vector<CalleeSavedInfo> &CSI) const {
+  if (CSI.empty())
+    return true; // Early exit if no callee saved registers are modified!
+
+  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+  if (!FuncInfo->SGPRForFPSaveRestoreCopy)
+    return false;
+
+  for (auto &CS : CSI) {
+    if (CS.getReg() == FuncInfo->getFrameOffsetReg()) {
+      if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister)
+        CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
+      break;
+    }
+  }
+
+  return false;
 }
 
 MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
@@ -757,8 +1103,7 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
 
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
-  if (!TFI->hasReservedCallFrame(MF)) {
+  if (!hasReservedCallFrame(MF)) {
     unsigned Align = getStackAlignment();
 
     Amount = alignTo(Amount, Align);
@@ -777,60 +1122,25 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
   return MBB.erase(I);
 }
 
-void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,
-                                           MachineBasicBlock &MBB) const {
-  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-  const SIInstrInfo *TII = ST.getInstrInfo();
-  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
-  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-
-  MachineBasicBlock::iterator I = MBB.begin();
-  DebugLoc DL;
-
-  // For each dimension:
-  for (unsigned i = 0; i < 3; ++i) {
-    // Get work group ID SGPR, and make it live-in again.
-    unsigned WorkGroupIDSGPR = MFI->getWorkGroupIDSGPR(i);
-    MF.getRegInfo().addLiveIn(WorkGroupIDSGPR);
-    MBB.addLiveIn(WorkGroupIDSGPR);
-
-    // Since SGPRs are spilled into VGPRs, copy work group ID SGPR to VGPR in
-    // order to spill it to scratch.
-    unsigned WorkGroupIDVGPR =
-      MF.getRegInfo().createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-    BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), WorkGroupIDVGPR)
-      .addReg(WorkGroupIDSGPR);
-
-    // Spill work group ID.
-    int WorkGroupIDObjectIdx = MFI->getDebuggerWorkGroupIDStackObjectIndex(i);
-    TII->storeRegToStackSlot(MBB, I, WorkGroupIDVGPR, false,
-      WorkGroupIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
-
-    // Get work item ID VGPR, and make it live-in again.
-    unsigned WorkItemIDVGPR = MFI->getWorkItemIDVGPR(i);
-    MF.getRegInfo().addLiveIn(WorkItemIDVGPR);
-    MBB.addLiveIn(WorkItemIDVGPR);
-
-    // Spill work item ID.
-    int WorkItemIDObjectIdx = MFI->getDebuggerWorkItemIDStackObjectIndex(i);
-    TII->storeRegToStackSlot(MBB, I, WorkItemIDVGPR, false,
-      WorkItemIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
-  }
-}
-
 bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
-  // All stack operations are relative to the frame offset SGPR.
-  // TODO: Still want to eliminate sometimes.
   const MachineFrameInfo &MFI = MF.getFrameInfo();
+  if (MFI.hasCalls()) {
+    // All offsets are unsigned, so need to be addressed in the same direction
+    // as stack growth.
+
+    // FIXME: This function is pretty broken, since it can be called before the
+    // frame layout is determined or CSR spills are inserted.
+    if (MFI.getStackSize() != 0)
+      return true;
+
+    // For the entry point, the input wave scratch offset must be copied to the
+    // API SP if there are calls.
+    if (MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction())
+      return true;
+  }
 
-  // XXX - Is this only called after frame is finalized? Should be able to check
-  // frame size.
-  return MFI.hasStackObjects() && !allStackObjectsAreDead(MFI);
-}
-
-bool SIFrameLowering::hasSP(const MachineFunction &MF) const {
-  const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
-  // All stack operations are relative to the frame offset SGPR.
-  const MachineFrameInfo &MFI = MF.getFrameInfo();
-  return MFI.hasCalls() || MFI.hasVarSizedObjects() || TRI->needsStackRealignment(MF);
+  return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
+    MFI.hasStackMap() || MFI.hasPatchPoint() ||
+    MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) ||
+    MF.getTarget().Options.DisableFramePointerElim(MF);
 }
diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h
index 2f35b3631cdc..c644f4726e2c 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/lib/Target/AMDGPU/SIFrameLowering.h
@@ -1,9 +1,8 @@
 //===--------------------- SIFrameLowering.h --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -37,6 +36,14 @@ public:
 
   void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
                             RegScavenger *RS = nullptr) const override;
+  void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs,
+                                RegScavenger *RS = nullptr) const;
+  bool
+  assignCalleeSavedSpillSlots(MachineFunction &MF,
+                              const TargetRegisterInfo *TRI,
+                              std::vector<CalleeSavedInfo> &CSI) const override;
+
+  bool isSupportedStackID(TargetStackID::Value ID) const override;
 
   void processFunctionBeforeFrameFinalized(
     MachineFunction &MF,
@@ -59,15 +66,9 @@ private:
     SIMachineFunctionInfo *MFI,
     MachineFunction &MF) const;
 
-  std::pair<unsigned, unsigned> getReservedPrivateSegmentWaveByteOffsetReg(
-    const GCNSubtarget &ST,
-    const SIInstrInfo *TII,
-    const SIRegisterInfo *TRI,
-    SIMachineFunctionInfo *MFI,
-    MachineFunction &MF) const;
-
-  /// Emits debugger prologue.
-  void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+  std::pair<unsigned, bool> getReservedPrivateSegmentWaveByteOffsetReg(
+      const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
+      SIMachineFunctionInfo *MFI, MachineFunction &MF) const;
 
   // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
   void emitEntryFunctionScratchSetup(const GCNSubtarget &ST, MachineFunction &MF,
@@ -77,7 +78,6 @@ private:
 
 public:
   bool hasFP(const MachineFunction &MF) const override;
-  bool hasSP(const MachineFunction &MF) const;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 0ba921647097..db0782e2bf3e 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1,9 +1,8 @@
 //===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,7 +18,6 @@
 
 #include "SIISelLowering.h"
 #include "AMDGPU.h"
-#include "AMDGPUIntrinsicInfo.h"
 #include "AMDGPUSubtarget.h"
 #include "AMDGPUTargetMachine.h"
 #include "SIDefines.h"
@@ -95,11 +93,10 @@ static cl::opt<bool> EnableVGPRIndexMode(
   cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
   cl::init(false));
 
-static cl::opt<unsigned> AssumeFrameIndexHighZeroBits(
-  "amdgpu-frame-index-zero-bits",
-  cl::desc("High bits of frame index assumed to be zero"),
-  cl::init(5),
-  cl::ReallyHidden);
+static cl::opt<bool> DisableLoopAlignment(
+  "amdgpu-disable-loop-alignment",
+  cl::desc("Do not align and prefetch loops"),
+  cl::init(false));
 
 static unsigned findFirstFreeSGPR(CCState &CCInfo) {
   unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
@@ -125,12 +122,18 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass);
   addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass);
 
+  addRegisterClass(MVT::v3i32, &AMDGPU::SGPR_96RegClass);
+  addRegisterClass(MVT::v3f32, &AMDGPU::VReg_96RegClass);
+
   addRegisterClass(MVT::v2i64, &AMDGPU::SReg_128RegClass);
   addRegisterClass(MVT::v2f64, &AMDGPU::SReg_128RegClass);
 
   addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
   addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
 
+  addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass);
+  addRegisterClass(MVT::v5f32, &AMDGPU::VReg_160RegClass);
+
   addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
   addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
 
@@ -148,18 +151,27 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass);
   }
 
+  if (Subtarget->hasMAIInsts()) {
+    addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
+    addRegisterClass(MVT::v32f32, &AMDGPU::VReg_1024RegClass);
+  }
+
   computeRegisterProperties(Subtarget->getRegisterInfo());
 
   // We need to custom lower vector stores from local memory
   setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+  setOperationAction(ISD::LOAD, MVT::v3i32, Custom);
   setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+  setOperationAction(ISD::LOAD, MVT::v5i32, Custom);
   setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
   setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
   setOperationAction(ISD::LOAD, MVT::i1, Custom);
   setOperationAction(ISD::LOAD, MVT::v32i32, Custom);
 
   setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+  setOperationAction(ISD::STORE, MVT::v3i32, Custom);
   setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+  setOperationAction(ISD::STORE, MVT::v5i32, Custom);
   setOperationAction(ISD::STORE, MVT::v8i32, Custom);
   setOperationAction(ISD::STORE, MVT::v16i32, Custom);
   setOperationAction(ISD::STORE, MVT::i1, Custom);
@@ -218,11 +230,15 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom);
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v8f16, Custom);
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
+  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
 
   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_VOID, MVT::v2i16, Custom);
   setOperationAction(ISD::INTRINSIC_VOID, MVT::v2f16, Custom);
   setOperationAction(ISD::INTRINSIC_VOID, MVT::v4f16, Custom);
+  setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
+  setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
 
   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
   setOperationAction(ISD::BR_CC, MVT::i1, Expand);
@@ -248,8 +264,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
 
   // We only support LOAD/STORE and vector manipulation ops for vectors
   // with > 4 elements.
-  for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32,
-        MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16, MVT::v32i32 }) {
+  for (MVT VT : { MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32,
+                  MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16,
+                  MVT::v32i32, MVT::v32f32 }) {
     for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
       switch (Op) {
       case ISD::LOAD:
@@ -323,6 +340,18 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom);
   setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f16, Custom);
 
+  // Deal with vec3 vector operations when widened to vec4.
+  setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v3i32, Custom);
+  setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v3f32, Custom);
+  setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i32, Custom);
+  setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4f32, Custom);
+
+  // Deal with vec5 vector operations when widened to vec8.
+  setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v5i32, Custom);
+  setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v5f32, Custom);
+  setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i32, Custom);
+  setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8f32, Custom);
+
   // BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling,
   // and output demarshalling
   setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
@@ -400,7 +429,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
 
 
-  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
+  if (Subtarget->haveRoundOpsF64()) {
     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
     setOperationAction(ISD::FCEIL, MVT::f64, Legal);
     setOperationAction(ISD::FRINT, MVT::f64, Legal);
@@ -492,7 +521,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
 
     // F16 - VOP3 Actions.
     setOperationAction(ISD::FMA, MVT::f16, Legal);
-    if (!Subtarget->hasFP16Denormals())
+    if (!Subtarget->hasFP16Denormals() && STI.hasMadF16())
       setOperationAction(ISD::FMAD, MVT::f16, Legal);
 
     for (MVT VT : {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16}) {
@@ -607,6 +636,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom);
 
+    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f16, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
+
     setOperationAction(ISD::SHL, MVT::v4i16, Custom);
     setOperationAction(ISD::SRA, MVT::v4i16, Custom);
     setOperationAction(ISD::SRL, MVT::v4i16, Custom);
@@ -679,6 +711,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::FCANONICALIZE);
   setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
   setTargetDAGCombine(ISD::ZERO_EXTEND);
+  setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
   setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
 
@@ -701,13 +734,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::ATOMIC_LOAD_MAX);
   setTargetDAGCombine(ISD::ATOMIC_LOAD_UMIN);
   setTargetDAGCombine(ISD::ATOMIC_LOAD_UMAX);
+  setTargetDAGCombine(ISD::ATOMIC_LOAD_FADD);
 
   setSchedulingPreference(Sched::RegPressure);
-
-  // SI at least has hardware support for floating point exceptions, but no way
-  // of using or handling them is implemented. They are also optional in OpenCL
-  // (Section 7.3)
-  setHasFloatingPointExceptions(Subtarget->hasFPExceptions());
 }
 
 const GCNSubtarget *SITargetLowering::getSubtarget() const {
@@ -910,6 +939,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   switch (IntrID) {
   case Intrinsic::amdgcn_atomic_inc:
   case Intrinsic::amdgcn_atomic_dec:
+  case Intrinsic::amdgcn_ds_ordered_add:
+  case Intrinsic::amdgcn_ds_ordered_swap:
   case Intrinsic::amdgcn_ds_fadd:
   case Intrinsic::amdgcn_ds_fmin:
   case Intrinsic::amdgcn_ds_fmax: {
@@ -919,13 +950,75 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.align = 0;
     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
 
+    const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(4));
+    if (!Vol->isZero())
+      Info.flags |= MachineMemOperand::MOVolatile;
+
+    return true;
+  }
+  case Intrinsic::amdgcn_buffer_atomic_fadd: {
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+    Info.opc = ISD::INTRINSIC_VOID;
+    Info.memVT = MVT::getVT(CI.getOperand(0)->getType());
+    Info.ptrVal = MFI->getBufferPSV(
+      *MF.getSubtarget<GCNSubtarget>().getInstrInfo(),
+      CI.getArgOperand(1));
+    Info.align = 0;
+    Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
     const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(4));
     if (!Vol || !Vol->isZero())
       Info.flags |= MachineMemOperand::MOVolatile;
 
     return true;
   }
+  case Intrinsic::amdgcn_global_atomic_fadd: {
+    Info.opc = ISD::INTRINSIC_VOID;
+    Info.memVT = MVT::getVT(CI.getOperand(0)->getType()
+                            ->getPointerElementType());
+    Info.ptrVal = CI.getOperand(0);
+    Info.align = 0;
+    Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+    return true;
+  }
+  case Intrinsic::amdgcn_ds_append:
+  case Intrinsic::amdgcn_ds_consume: {
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::getVT(CI.getType());
+    Info.ptrVal = CI.getOperand(0);
+    Info.align = 0;
+    Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+    const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(1));
+    if (!Vol->isZero())
+      Info.flags |= MachineMemOperand::MOVolatile;
+
+    return true;
+  }
+  case Intrinsic::amdgcn_ds_gws_init:
+  case Intrinsic::amdgcn_ds_gws_barrier:
+  case Intrinsic::amdgcn_ds_gws_sema_v:
+  case Intrinsic::amdgcn_ds_gws_sema_br:
+  case Intrinsic::amdgcn_ds_gws_sema_p:
+  case Intrinsic::amdgcn_ds_gws_sema_release_all: {
+    Info.opc = ISD::INTRINSIC_VOID;
 
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+    Info.ptrVal =
+        MFI->getGWSPSV(*MF.getSubtarget<GCNSubtarget>().getInstrInfo());
+
+    // This is an abstract access, but we need to specify a type and size.
+    Info.memVT = MVT::i32;
+    Info.size = 4;
+    Info.align = 4;
+
+    Info.flags = MachineMemOperand::MOStore;
+    if (IntrID == Intrinsic::amdgcn_ds_gws_barrier)
+      Info.flags = MachineMemOperand::MOLoad;
+    return true;
+  }
   default:
     return false;
   }
@@ -937,6 +1030,8 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
   switch (II->getIntrinsicID()) {
   case Intrinsic::amdgcn_atomic_inc:
   case Intrinsic::amdgcn_atomic_dec:
+  case Intrinsic::amdgcn_ds_ordered_add:
+  case Intrinsic::amdgcn_ds_ordered_swap:
   case Intrinsic::amdgcn_ds_fadd:
   case Intrinsic::amdgcn_ds_fmin:
   case Intrinsic::amdgcn_ds_fmax: {
@@ -960,6 +1055,13 @@ bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
   // GFX9 added a 13-bit signed offset. When using regular flat instructions,
   // the sign bit is ignored and is treated as a 12-bit unsigned offset.
 
+  // GFX10 shrinked signed offset to 12 bits. When using regular flat
+  // instructions, the sign bit is also ignored and is treated as 11-bit
+  // unsigned offset.
+
+  if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10)
+    return isUInt<11>(AM.BaseOffs) && AM.Scale == 0;
+
   // Just r + i
   return isUInt<12>(AM.BaseOffs) && AM.Scale == 0;
 }
@@ -1030,7 +1132,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
     return isLegalGlobalAddressingMode(AM);
 
   if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
-      AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
+      AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
+      AS == AMDGPUAS::BUFFER_FAT_POINTER) {
     // If the offset isn't a multiple of 4, it probably isn't going to be
     // correctly aligned.
     // FIXME: Can we get the real alignment here?
@@ -1106,16 +1209,15 @@ bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
   } else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
     unsigned MaxPrivateBits = 8 * getSubtarget()->getMaxPrivateElementSize();
     return (MemVT.getSizeInBits() <= MaxPrivateBits);
-  } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
+  } else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
     return (MemVT.getSizeInBits() <= 2 * 32);
   }
   return true;
 }
 
-bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
-                                                      unsigned AddrSpace,
-                                                      unsigned Align,
-                                                      bool *IsFast) const {
+bool SITargetLowering::allowsMisalignedMemoryAccesses(
+    EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
+    bool *IsFast) const {
   if (IsFast)
     *IsFast = false;
 
@@ -1178,11 +1280,10 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
   return VT.bitsGT(MVT::i32) && Align % 4 == 0;
 }
 
-EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
-                                          unsigned SrcAlign, bool IsMemset,
-                                          bool ZeroMemset,
-                                          bool MemcpyStrSrc,
-                                          MachineFunction &MF) const {
+EVT SITargetLowering::getOptimalMemOpType(
+    uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+    bool ZeroMemset, bool MemcpyStrSrc,
+    const AttributeList &FuncAttributes) const {
   // FIXME: Should account for address space here.
 
   // The default fallback uses the private pointer size as a guess for a type to
@@ -1201,7 +1302,8 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
 static bool isFlatGlobalAddrSpace(unsigned AS) {
   return AS == AMDGPUAS::GLOBAL_ADDRESS ||
          AS == AMDGPUAS::FLAT_ADDRESS ||
-         AS == AMDGPUAS::CONSTANT_ADDRESS;
+         AS == AMDGPUAS::CONSTANT_ADDRESS ||
+         AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
 }
 
 bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
@@ -1216,8 +1318,8 @@ bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
   return I && I->getMetadata("amdgpu.noclobber");
 }
 
-bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS,
-                                            unsigned DestAS) const {
+bool SITargetLowering::isFreeAddrSpaceCast(unsigned SrcAS,
+                                           unsigned DestAS) const {
   // Flat -> private/local is a simple truncate.
   // Flat -> global is no-op
   if (SrcAS == AMDGPUAS::FLAT_ADDRESS)
@@ -1305,6 +1407,17 @@ SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
                                          const SDLoc &SL, SDValue Val,
                                          bool Signed,
                                          const ISD::InputArg *Arg) const {
+  // First, if it is a widened vector, narrow it.
+  if (VT.isVector() &&
+      VT.getVectorNumElements() != MemVT.getVectorNumElements()) {
+    EVT NarrowedVT =
+        EVT::getVectorVT(*DAG.getContext(), MemVT.getVectorElementType(),
+                         VT.getVectorNumElements());
+    Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, NarrowedVT, Val,
+                      DAG.getConstant(0, SL, MVT::i32));
+  }
+
+  // Then convert the vector elements or scalar value.
   if (Arg && (Arg->Flags.isSExt() || Arg->Flags.isZExt()) &&
       VT.bitsLT(MemVT)) {
     unsigned Opc = Arg->Flags.isZExt() ? ISD::AssertZext : ISD::AssertSext;
@@ -1441,8 +1554,7 @@ static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
 
     // First check if it's a PS input addr.
     if (CallConv == CallingConv::AMDGPU_PS &&
-        !Arg->Flags.isInReg() && !Arg->Flags.isByVal() && PSInputNum <= 15) {
-
+        !Arg->Flags.isInReg() && PSInputNum <= 15) {
       bool SkipArg = !Arg->Used && !Info->isPSInputAllocated(PSInputNum);
 
       // Inconveniently only the first part of the split is marked as isSplit,
@@ -1508,7 +1620,13 @@ static void allocateSpecialEntryInputVGPRs(CCState &CCInfo,
 
 // Try to allocate a VGPR at the end of the argument list, or if no argument
 // VGPRs are left allocating a stack slot.
-static ArgDescriptor allocateVGPR32Input(CCState &CCInfo) {
+// If \p Mask is is given it indicates bitfield position in the register.
+// If \p Arg is given use it with new ]p Mask instead of allocating new.
+static ArgDescriptor allocateVGPR32Input(CCState &CCInfo, unsigned Mask = ~0u,
+                                         ArgDescriptor Arg = ArgDescriptor()) {
+  if (Arg.isSet())
+    return ArgDescriptor::createArg(Arg, Mask);
+
   ArrayRef<MCPhysReg> ArgVGPRs
     = makeArrayRef(AMDGPU::VGPR_32RegClass.begin(), 32);
   unsigned RegIdx = CCInfo.getFirstUnallocated(ArgVGPRs);
@@ -1516,7 +1634,7 @@ static ArgDescriptor allocateVGPR32Input(CCState &CCInfo) {
     // Spill to stack required.
     int64_t Offset = CCInfo.AllocateStack(4, 4);
 
-    return ArgDescriptor::createStack(Offset);
+    return ArgDescriptor::createStack(Offset, Mask);
   }
 
   unsigned Reg = ArgVGPRs[RegIdx];
@@ -1525,7 +1643,7 @@ static ArgDescriptor allocateVGPR32Input(CCState &CCInfo) {
 
   MachineFunction &MF = CCInfo.getMachineFunction();
   MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
-  return ArgDescriptor::createRegister(Reg);
+  return ArgDescriptor::createRegister(Reg, Mask);
 }
 
 static ArgDescriptor allocateSGPR32InputImpl(CCState &CCInfo,
@@ -1557,14 +1675,21 @@ static void allocateSpecialInputVGPRs(CCState &CCInfo,
                                       MachineFunction &MF,
                                       const SIRegisterInfo &TRI,
                                       SIMachineFunctionInfo &Info) {
-  if (Info.hasWorkItemIDX())
-    Info.setWorkItemIDX(allocateVGPR32Input(CCInfo));
+  const unsigned Mask = 0x3ff;
+  ArgDescriptor Arg;
 
-  if (Info.hasWorkItemIDY())
-    Info.setWorkItemIDY(allocateVGPR32Input(CCInfo));
+  if (Info.hasWorkItemIDX()) {
+    Arg = allocateVGPR32Input(CCInfo, Mask);
+    Info.setWorkItemIDX(Arg);
+  }
+
+  if (Info.hasWorkItemIDY()) {
+    Arg = allocateVGPR32Input(CCInfo, Mask << 10, Arg);
+    Info.setWorkItemIDY(Arg);
+  }
 
   if (Info.hasWorkItemIDZ())
-    Info.setWorkItemIDZ(allocateVGPR32Input(CCInfo));
+    Info.setWorkItemIDZ(allocateVGPR32Input(CCInfo, Mask << 20, Arg));
 }
 
 static void allocateSpecialInputSGPRs(CCState &CCInfo,
@@ -1714,6 +1839,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
   // should reserve the arguments and use them directly.
   MachineFrameInfo &MFI = MF.getFrameInfo();
   bool HasStackObjects = MFI.hasStackObjects();
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
 
   // Record that we know we have non-spill stack objects so we don't need to
   // check all stack objects later.
@@ -1729,65 +1855,89 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
   // the scratch registers to pass in.
   bool RequiresStackAccess = HasStackObjects || MFI.hasCalls();
 
-  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-  if (ST.isAmdHsaOrMesa(MF.getFunction())) {
-    if (RequiresStackAccess) {
-      // If we have stack objects, we unquestionably need the private buffer
-      // resource. For the Code Object V2 ABI, this will be the first 4 user
-      // SGPR inputs. We can reserve those and use them directly.
-
-      unsigned PrivateSegmentBufferReg = Info.getPreloadedReg(
-        AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
-      Info.setScratchRSrcReg(PrivateSegmentBufferReg);
-
-      if (MFI.hasCalls()) {
-        // If we have calls, we need to keep the frame register in a register
-        // that won't be clobbered by a call, so ensure it is copied somewhere.
-
-        // This is not a problem for the scratch wave offset, because the same
-        // registers are reserved in all functions.
-
-        // FIXME: Nothing is really ensuring this is a call preserved register,
-        // it's just selected from the end so it happens to be.
-        unsigned ReservedOffsetReg
-          = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
-        Info.setScratchWaveOffsetReg(ReservedOffsetReg);
-      } else {
-        unsigned PrivateSegmentWaveByteOffsetReg = Info.getPreloadedReg(
-          AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
-        Info.setScratchWaveOffsetReg(PrivateSegmentWaveByteOffsetReg);
-      }
-    } else {
-      unsigned ReservedBufferReg
-        = TRI.reservedPrivateSegmentBufferReg(MF);
-      unsigned ReservedOffsetReg
-        = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
-
-      // We tentatively reserve the last registers (skipping the last two
-      // which may contain VCC). After register allocation, we'll replace
-      // these with the ones immediately after those which were really
-      // allocated. In the prologue copies will be inserted from the argument
-      // to these reserved registers.
-      Info.setScratchRSrcReg(ReservedBufferReg);
-      Info.setScratchWaveOffsetReg(ReservedOffsetReg);
-    }
+  if (RequiresStackAccess && ST.isAmdHsaOrMesa(MF.getFunction())) {
+    // If we have stack objects, we unquestionably need the private buffer
+    // resource. For the Code Object V2 ABI, this will be the first 4 user
+    // SGPR inputs. We can reserve those and use them directly.
+
+    unsigned PrivateSegmentBufferReg =
+        Info.getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
+    Info.setScratchRSrcReg(PrivateSegmentBufferReg);
   } else {
     unsigned ReservedBufferReg = TRI.reservedPrivateSegmentBufferReg(MF);
+    // We tentatively reserve the last registers (skipping the last registers
+    // which may contain VCC, FLAT_SCR, and XNACK). After register allocation,
+    // we'll replace these with the ones immediately after those which were
+    // really allocated. In the prologue copies will be inserted from the
+    // argument to these reserved registers.
 
     // Without HSA, relocations are used for the scratch pointer and the
     // buffer resource setup is always inserted in the prologue. Scratch wave
     // offset is still in an input SGPR.
     Info.setScratchRSrcReg(ReservedBufferReg);
+  }
 
-    if (HasStackObjects && !MFI.hasCalls()) {
-      unsigned ScratchWaveOffsetReg = Info.getPreloadedReg(
-        AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
-      Info.setScratchWaveOffsetReg(ScratchWaveOffsetReg);
+  // hasFP should be accurate for kernels even before the frame is finalized.
+  if (ST.getFrameLowering()->hasFP(MF)) {
+    MachineRegisterInfo &MRI = MF.getRegInfo();
+
+    // Try to use s32 as the SP, but move it if it would interfere with input
+    // arguments. This won't work with calls though.
+    //
+    // FIXME: Move SP to avoid any possible inputs, or find a way to spill input
+    // registers.
+    if (!MRI.isLiveIn(AMDGPU::SGPR32)) {
+      Info.setStackPtrOffsetReg(AMDGPU::SGPR32);
     } else {
-      unsigned ReservedOffsetReg
-        = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
+      assert(AMDGPU::isShader(MF.getFunction().getCallingConv()));
+
+      if (MFI.hasCalls())
+        report_fatal_error("call in graphics shader with too many input SGPRs");
+
+      for (unsigned Reg : AMDGPU::SGPR_32RegClass) {
+        if (!MRI.isLiveIn(Reg)) {
+          Info.setStackPtrOffsetReg(Reg);
+          break;
+        }
+      }
+
+      if (Info.getStackPtrOffsetReg() == AMDGPU::SP_REG)
+        report_fatal_error("failed to find register for SP");
+    }
+
+    if (MFI.hasCalls()) {
+      Info.setScratchWaveOffsetReg(AMDGPU::SGPR33);
+      Info.setFrameOffsetReg(AMDGPU::SGPR33);
+    } else {
+      unsigned ReservedOffsetReg =
+        TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
       Info.setScratchWaveOffsetReg(ReservedOffsetReg);
+      Info.setFrameOffsetReg(ReservedOffsetReg);
     }
+  } else if (RequiresStackAccess) {
+    assert(!MFI.hasCalls());
+    // We know there are accesses and they will be done relative to SP, so just
+    // pin it to the input.
+    //
+    // FIXME: Should not do this if inline asm is reading/writing these
+    // registers.
+    unsigned PreloadedSP = Info.getPreloadedReg(
+        AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
+
+    Info.setStackPtrOffsetReg(PreloadedSP);
+    Info.setScratchWaveOffsetReg(PreloadedSP);
+    Info.setFrameOffsetReg(PreloadedSP);
+  } else {
+    assert(!MFI.hasCalls());
+
+    // There may not be stack access at all. There may still be spills, or
+    // access of a constant pointer (in which cases an extra copy will be
+    // emitted in the prolog).
+    unsigned ReservedOffsetReg
+      = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
+    Info.setStackPtrOffsetReg(ReservedOffsetReg);
+    Info.setScratchWaveOffsetReg(ReservedOffsetReg);
+    Info.setFrameOffsetReg(ReservedOffsetReg);
   }
 }
 
@@ -1845,7 +1995,6 @@ SDValue SITargetLowering::LowerFormalArguments(
   const Function &Fn = MF.getFunction();
   FunctionType *FType = MF.getFunction().getFunctionType();
   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
-  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
 
   if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) {
     DiagnosticInfoUnsupported NoGraphicsHSA(
@@ -1854,11 +2003,6 @@ SDValue SITargetLowering::LowerFormalArguments(
     return DAG.getEntryNode();
   }
 
-  // Create stack objects that are used for emitting debugger prologue if
-  // "amdgpu-debugger-emit-prologue" attribute was specified.
-  if (ST.debuggerEmitPrologue())
-    createDebuggerPrologueStackObjects(MF);
-
   SmallVector<ISD::InputArg, 16> Splits;
   SmallVector<CCValAssign, 16> ArgLocs;
   BitVector Skipped(Ins.size());
@@ -1869,12 +2013,6 @@ SDValue SITargetLowering::LowerFormalArguments(
   bool IsKernel = AMDGPU::isKernel(CallConv);
   bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CallConv);
 
-  if (!IsEntryFunc) {
-    // 4 bytes are reserved at offset 0 for the emergency stack slot. Skip over
-    // this when allocating argument fixed offsets.
-    CCInfo.AllocateStack(4, 4);
-  }
-
   if (IsShader) {
     processShaderInputArgs(Splits, CallConv, Ins, Skipped, FType, Info);
 
@@ -1975,7 +2113,8 @@ SDValue SITargetLowering::LowerFormalArguments(
       auto *ParamTy =
         dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
       if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
-          ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+          ParamTy && (ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+                      ParamTy->getAddressSpace() == AMDGPUAS::REGION_ADDRESS)) {
         // On SI local pointers are just offsets into LDS, so they are always
         // less than 16-bits.  On CI and newer they could potentially be
         // real pointers, so we can't guarantee their size.
@@ -2002,13 +2141,14 @@ SDValue SITargetLowering::LowerFormalArguments(
     Reg = MF.addLiveIn(Reg, RC);
     SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
 
-    if (Arg.Flags.isSRet() && !getSubtarget()->enableHugePrivateBuffer()) {
+    if (Arg.Flags.isSRet()) {
       // The return object should be reasonably addressable.
 
       // FIXME: This helps when the return is a real sret. If it is a
       // automatically inserted sret (i.e. CanLowerReturn returns false), an
       // extra copy is inserted in SelectionDAGBuilder which obscures this.
-      unsigned NumBits = 32 - AssumeFrameIndexHighZeroBits;
+      unsigned NumBits
+        = 32 - getSubtarget()->getKnownHighZeroBitsForFrameIndex();
       Val = DAG.getNode(ISD::AssertZext, DL, VT, Val,
         DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), NumBits)));
     }
@@ -2126,16 +2266,13 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
     SDValue ReturnAddrReg = CreateLiveInRegister(
       DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64);
 
-    // FIXME: Should be able to use a vreg here, but need a way to prevent it
-    // from being allcoated to a CSR.
-
-    SDValue PhysReturnAddrReg = DAG.getRegister(TRI->getReturnAddressReg(MF),
-                                                MVT::i64);
-
-    Chain = DAG.getCopyToReg(Chain, DL, PhysReturnAddrReg, ReturnAddrReg, Flag);
+    SDValue ReturnAddrVirtualReg = DAG.getRegister(
+        MF.getRegInfo().createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass),
+        MVT::i64);
+    Chain =
+        DAG.getCopyToReg(Chain, DL, ReturnAddrVirtualReg, ReturnAddrReg, Flag);
     Flag = Chain.getValue(1);
-
-    RetOps.push_back(PhysReturnAddrReg);
+    RetOps.push_back(ReturnAddrVirtualReg);
   }
 
   // Copy the result values into the output registers.
@@ -2295,9 +2432,6 @@ void SITargetLowering::passSpecialInputs(
     AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
     AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,
     AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,
-    AMDGPUFunctionArgInfo::WORKITEM_ID_X,
-    AMDGPUFunctionArgInfo::WORKITEM_ID_Y,
-    AMDGPUFunctionArgInfo::WORKITEM_ID_Z,
     AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR
   };
 
@@ -2337,6 +2471,71 @@ void SITargetLowering::passSpecialInputs(
       MemOpChains.push_back(ArgStore);
     }
   }
+
+  // Pack workitem IDs into a single register or pass it as is if already
+  // packed.
+  const ArgDescriptor *OutgoingArg;
+  const TargetRegisterClass *ArgRC;
+
+  std::tie(OutgoingArg, ArgRC) =
+    CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
+  if (!OutgoingArg)
+    std::tie(OutgoingArg, ArgRC) =
+      CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
+  if (!OutgoingArg)
+    std::tie(OutgoingArg, ArgRC) =
+      CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
+  if (!OutgoingArg)
+    return;
+
+  const ArgDescriptor *IncomingArgX
+    = CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X).first;
+  const ArgDescriptor *IncomingArgY
+    = CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y).first;
+  const ArgDescriptor *IncomingArgZ
+    = CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z).first;
+
+  SDValue InputReg;
+  SDLoc SL;
+
+  // If incoming ids are not packed we need to pack them.
+  if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo.WorkItemIDX)
+    InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgX);
+
+  if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo.WorkItemIDY) {
+    SDValue Y = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgY);
+    Y = DAG.getNode(ISD::SHL, SL, MVT::i32, Y,
+                    DAG.getShiftAmountConstant(10, MVT::i32, SL));
+    InputReg = InputReg.getNode() ?
+                 DAG.getNode(ISD::OR, SL, MVT::i32, InputReg, Y) : Y;
+  }
+
+  if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo.WorkItemIDZ) {
+    SDValue Z = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgZ);
+    Z = DAG.getNode(ISD::SHL, SL, MVT::i32, Z,
+                    DAG.getShiftAmountConstant(20, MVT::i32, SL));
+    InputReg = InputReg.getNode() ?
+                 DAG.getNode(ISD::OR, SL, MVT::i32, InputReg, Z) : Z;
+  }
+
+  if (!InputReg.getNode()) {
+    // Workitem ids are already packed, any of present incoming arguments
+    // will carry all required fields.
+    ArgDescriptor IncomingArg = ArgDescriptor::createArg(
+      IncomingArgX ? *IncomingArgX :
+      IncomingArgY ? *IncomingArgY :
+                     *IncomingArgZ, ~0u);
+    InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, IncomingArg);
+  }
+
+  if (OutgoingArg->isRegister()) {
+    RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);
+  } else {
+    unsigned SpecialArgOffset = CCInfo.AllocateStack(4, 4);
+    SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
+                                            SpecialArgOffset);
+    MemOpChains.push_back(ArgStore);
+  }
 }
 
 static bool canGuaranteeTCO(CallingConv::ID CC) {
@@ -2478,7 +2677,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
                           "unsupported call from graphics shader of function ");
   }
 
-  // The first 4 bytes are reserved for the callee's emergency stack slot.
   if (IsTailCall) {
     IsTailCall = isEligibleForTailCallOptimization(
       Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
@@ -2505,9 +2703,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
   CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, IsVarArg);
 
-  // The first 4 bytes are reserved for the callee's emergency stack slot.
-  CCInfo.AllocateStack(4, 4);
-
   CCInfo.AnalyzeCallOperands(Outs, AssignFn);
 
   // Get a count of how many bytes are to be pushed on the stack.
@@ -2528,31 +2723,19 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
   MachineFrameInfo &MFI = MF.getFrameInfo();
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
 
-  SDValue CallerSavedFP;
-
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass
   if (!IsSibCall) {
     Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
 
-    unsigned OffsetReg = Info->getScratchWaveOffsetReg();
+    SmallVector<SDValue, 4> CopyFromChains;
 
     // In the HSA case, this should be an identity copy.
     SDValue ScratchRSrcReg
       = DAG.getCopyFromReg(Chain, DL, Info->getScratchRSrcReg(), MVT::v4i32);
     RegsToPass.emplace_back(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
-
-    // TODO: Don't hardcode these registers and get from the callee function.
-    SDValue ScratchWaveOffsetReg
-      = DAG.getCopyFromReg(Chain, DL, OffsetReg, MVT::i32);
-    RegsToPass.emplace_back(AMDGPU::SGPR4, ScratchWaveOffsetReg);
-
-    if (!Info->isEntryFunction()) {
-      // Avoid clobbering this function's FP value. In the current convention
-      // callee will overwrite this, so do save/restore around the call site.
-      CallerSavedFP = DAG.getCopyFromReg(Chain, DL,
-                                         Info->getFrameOffsetReg(), MVT::i32);
-    }
+    CopyFromChains.push_back(ScratchRSrcReg.getValue(1));
+    Chain = DAG.getTokenFactor(DL, CopyFromChains);
   }
 
   SmallVector<SDValue, 8> MemOpChains;
@@ -2694,6 +2877,11 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
   std::vector<SDValue> Ops;
   Ops.push_back(Chain);
   Ops.push_back(Callee);
+  // Add a redundant copy of the callee global which will not be legalized, as
+  // we need direct access to the callee later.
+  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Callee);
+  const GlobalValue *GV = GSD->getGlobal();
+  Ops.push_back(DAG.getTargetGlobalAddress(GV, DL, MVT::i64));
 
   if (IsTailCall) {
     // Each tail call may have to adjust the stack by a different amount, so
@@ -2735,12 +2923,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
   Chain = Call.getValue(0);
   InFlag = Call.getValue(1);
 
-  if (CallerSavedFP) {
-    SDValue FPReg = DAG.getRegister(Info->getFrameOffsetReg(), MVT::i32);
-    Chain = DAG.getCopyToReg(Chain, DL, FPReg, CallerSavedFP, InFlag);
-    InFlag = Chain.getValue(1);
-  }
-
   uint64_t CalleePopBytes = NumBytes;
   Chain = DAG.getCALLSEQ_END(Chain, DAG.getTargetConstant(0, DL, MVT::i32),
                              DAG.getTargetConstant(CalleePopBytes, DL, MVT::i32),
@@ -2773,8 +2955,8 @@ unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT,
 
   }
 
-  if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
-      Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) {
+  if (!Subtarget->hasFlatScrRegister() &&
+       Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) {
     report_fatal_error(Twine("invalid register \""
                              + StringRef(RegName)  + "\" for subtarget."));
   }
@@ -2830,6 +3012,107 @@ MachineBasicBlock *SITargetLowering::splitKillBlock(MachineInstr &MI,
   return SplitBB;
 }
 
+// Split block \p MBB at \p MI, as to insert a loop. If \p InstInLoop is true,
+// \p MI will be the only instruction in the loop body block. Otherwise, it will
+// be the first instruction in the remainder block.
+//
+/// \returns { LoopBody, Remainder }
+static std::pair<MachineBasicBlock *, MachineBasicBlock *>
+splitBlockForLoop(MachineInstr &MI, MachineBasicBlock &MBB, bool InstInLoop) {
+  MachineFunction *MF = MBB.getParent();
+  MachineBasicBlock::iterator I(&MI);
+
+  // To insert the loop we need to split the block. Move everything after this
+  // point to a new block, and insert a new empty block between the two.
+  MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock();
+  MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
+  MachineFunction::iterator MBBI(MBB);
+  ++MBBI;
+
+  MF->insert(MBBI, LoopBB);
+  MF->insert(MBBI, RemainderBB);
+
+  LoopBB->addSuccessor(LoopBB);
+  LoopBB->addSuccessor(RemainderBB);
+
+  // Move the rest of the block into a new block.
+  RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
+
+  if (InstInLoop) {
+    auto Next = std::next(I);
+
+    // Move instruction to loop body.
+    LoopBB->splice(LoopBB->begin(), &MBB, I, Next);
+
+    // Move the rest of the block.
+    RemainderBB->splice(RemainderBB->begin(), &MBB, Next, MBB.end());
+  } else {
+    RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
+  }
+
+  MBB.addSuccessor(LoopBB);
+
+  return std::make_pair(LoopBB, RemainderBB);
+}
+
+MachineBasicBlock *
+SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI,
+                                         MachineBasicBlock *BB) const {
+  const DebugLoc &DL = MI.getDebugLoc();
+
+  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+
+  MachineBasicBlock *LoopBB;
+  MachineBasicBlock *RemainderBB;
+  const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
+
+  MachineBasicBlock::iterator Prev = std::prev(MI.getIterator());
+
+  std::tie(LoopBB, RemainderBB) = splitBlockForLoop(MI, *BB, true);
+
+  MachineBasicBlock::iterator I = LoopBB->end();
+  MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::data0);
+
+  const unsigned EncodedReg = AMDGPU::Hwreg::encodeHwreg(
+    AMDGPU::Hwreg::ID_TRAPSTS, AMDGPU::Hwreg::OFFSET_MEM_VIOL, 1);
+
+  // Clear TRAP_STS.MEM_VIOL
+  BuildMI(*LoopBB, LoopBB->begin(), DL, TII->get(AMDGPU::S_SETREG_IMM32_B32))
+    .addImm(0)
+    .addImm(EncodedReg);
+
+  // This is a pain, but we're not allowed to have physical register live-ins
+  // yet. Insert a pair of copies if the VGPR0 hack is necessary.
+  if (Src && TargetRegisterInfo::isPhysicalRegister(Src->getReg())) {
+    unsigned Data0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+    BuildMI(*BB, std::next(Prev), DL, TII->get(AMDGPU::COPY), Data0)
+      .add(*Src);
+
+    BuildMI(*LoopBB, LoopBB->begin(), DL, TII->get(AMDGPU::COPY), Src->getReg())
+      .addReg(Data0);
+
+    MRI.setSimpleHint(Data0, Src->getReg());
+  }
+
+  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_WAITCNT))
+    .addImm(0);
+
+  unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+  // Load and check TRAP_STS.MEM_VIOL
+  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_GETREG_B32), Reg)
+    .addImm(EncodedReg);
+
+  // FIXME: Do we need to use an isel pseudo that may clobber scc?
+  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_CMP_LG_U32))
+    .addReg(Reg, RegState::Kill)
+    .addImm(0);
+  BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_CBRANCH_SCC1))
+    .addMBB(LoopBB);
+
+  return RemainderBB;
+}
+
 // Do a v_movrels_b32 or v_movreld_b32 for each unique value of \p IdxReg in the
 // wavefront. If the value is uniform and just happens to be in a VGPR, this
 // will only do one iteration. In the worst case, this will loop 64 times.
@@ -2849,12 +3132,16 @@ static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop(
   int Offset,
   bool UseGPRIdxMode,
   bool IsIndirectSrc) {
+  MachineFunction *MF = OrigBB.getParent();
+  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
   MachineBasicBlock::iterator I = LoopBB.begin();
 
-  unsigned PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
-  unsigned NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  const TargetRegisterClass *BoolRC = TRI->getBoolRC();
+  unsigned PhiExec = MRI.createVirtualRegister(BoolRC);
+  unsigned NewExec = MRI.createVirtualRegister(BoolRC);
   unsigned CurrentIdxReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
-  unsigned CondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  unsigned CondReg = MRI.createVirtualRegister(BoolRC);
 
   BuildMI(LoopBB, I, DL, TII->get(TargetOpcode::PHI), PhiReg)
     .addReg(InitReg)
@@ -2878,7 +3165,9 @@ static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop(
     .addReg(IdxReg.getReg(), 0, IdxReg.getSubReg());
 
   // Update EXEC, save the original EXEC value to VCC.
-  BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), NewExec)
+  BuildMI(LoopBB, I, DL, TII->get(ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32
+                                                : AMDGPU::S_AND_SAVEEXEC_B64),
+          NewExec)
     .addReg(CondReg, RegState::Kill);
 
   MRI.setSimpleHint(NewExec, CondReg);
@@ -2894,7 +3183,7 @@ static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop(
         .addImm(Offset);
     }
     unsigned IdxMode = IsIndirectSrc ?
-      VGPRIndexMode::SRC0_ENABLE : VGPRIndexMode::DST_ENABLE;
+      AMDGPU::VGPRIndexMode::SRC0_ENABLE : AMDGPU::VGPRIndexMode::DST_ENABLE;
     MachineInstr *SetOn =
       BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON))
       .addReg(IdxReg, RegState::Kill)
@@ -2913,10 +3202,12 @@ static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop(
   }
 
   // Update EXEC, switch all done bits to 0 and all todo bits to 1.
+  unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
   MachineInstr *InsertPt =
-    BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
-    .addReg(AMDGPU::EXEC)
-    .addReg(NewExec);
+    BuildMI(LoopBB, I, DL, TII->get(ST.isWave32() ? AMDGPU::S_XOR_B32_term
+                                                  : AMDGPU::S_XOR_B64_term), Exec)
+      .addReg(Exec)
+      .addReg(NewExec);
 
   // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
   // s_cbranch_scc0?
@@ -2942,38 +3233,28 @@ static MachineBasicBlock::iterator loadM0FromVGPR(const SIInstrInfo *TII,
                                                   bool UseGPRIdxMode,
                                                   bool IsIndirectSrc) {
   MachineFunction *MF = MBB.getParent();
+  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
   MachineRegisterInfo &MRI = MF->getRegInfo();
   const DebugLoc &DL = MI.getDebugLoc();
   MachineBasicBlock::iterator I(&MI);
 
+  const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
   unsigned DstReg = MI.getOperand(0).getReg();
-  unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
-  unsigned TmpExec = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+  unsigned SaveExec = MRI.createVirtualRegister(BoolXExecRC);
+  unsigned TmpExec = MRI.createVirtualRegister(BoolXExecRC);
+  unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+  unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
 
   BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), TmpExec);
 
   // Save the EXEC mask
-  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64), SaveExec)
-    .addReg(AMDGPU::EXEC);
-
-  // To insert the loop we need to split the block. Move everything after this
-  // point to a new block, and insert a new empty block between the two.
-  MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock();
-  MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
-  MachineFunction::iterator MBBI(MBB);
-  ++MBBI;
-
-  MF->insert(MBBI, LoopBB);
-  MF->insert(MBBI, RemainderBB);
-
-  LoopBB->addSuccessor(LoopBB);
-  LoopBB->addSuccessor(RemainderBB);
-
-  // Move the rest of the block into a new block.
-  RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
-  RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
+  BuildMI(MBB, I, DL, TII->get(MovExecOpc), SaveExec)
+    .addReg(Exec);
 
-  MBB.addSuccessor(LoopBB);
+  MachineBasicBlock *LoopBB;
+  MachineBasicBlock *RemainderBB;
+  std::tie(LoopBB, RemainderBB) = splitBlockForLoop(MI, MBB, false);
 
   const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
 
@@ -2982,7 +3263,7 @@ static MachineBasicBlock::iterator loadM0FromVGPR(const SIInstrInfo *TII,
                                       Offset, UseGPRIdxMode, IsIndirectSrc);
 
   MachineBasicBlock::iterator First = RemainderBB->begin();
-  BuildMI(*RemainderBB, First, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+  BuildMI(*RemainderBB, First, DL, TII->get(MovExecOpc), Exec)
     .addReg(SaveExec);
 
   return InsPt;
@@ -3025,7 +3306,7 @@ static bool setM0ToIndexFromSGPR(const SIInstrInfo *TII,
 
   if (UseGPRIdxMode) {
     unsigned IdxMode = IsIndirectSrc ?
-      VGPRIndexMode::SRC0_ENABLE : VGPRIndexMode::DST_ENABLE;
+      AMDGPU::VGPRIndexMode::SRC0_ENABLE : AMDGPU::VGPRIndexMode::DST_ENABLE;
     if (Offset == 0) {
       MachineInstr *SetOn =
           BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON))
@@ -3274,6 +3555,9 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
   case AMDGPU::S_ADD_U64_PSEUDO:
   case AMDGPU::S_SUB_U64_PSEUDO: {
     MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+    const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+    const SIRegisterInfo *TRI = ST.getRegisterInfo();
+    const TargetRegisterClass *BoolRC = TRI->getBoolRC();
     const DebugLoc &DL = MI.getDebugLoc();
 
     MachineOperand &Dest = MI.getOperand(0);
@@ -3284,17 +3568,17 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
     unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
 
     MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm(MI, MRI,
-     Src0, &AMDGPU::SReg_64RegClass, AMDGPU::sub0,
+     Src0, BoolRC, AMDGPU::sub0,
      &AMDGPU::SReg_32_XM0RegClass);
     MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm(MI, MRI,
-      Src0, &AMDGPU::SReg_64RegClass, AMDGPU::sub1,
+      Src0, BoolRC, AMDGPU::sub1,
       &AMDGPU::SReg_32_XM0RegClass);
 
     MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm(MI, MRI,
-      Src1, &AMDGPU::SReg_64RegClass, AMDGPU::sub0,
+      Src1, BoolRC, AMDGPU::sub0,
       &AMDGPU::SReg_32_XM0RegClass);
     MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm(MI, MRI,
-      Src1, &AMDGPU::SReg_64RegClass, AMDGPU::sub1,
+      Src1, BoolRC, AMDGPU::sub1,
       &AMDGPU::SReg_32_XM0RegClass);
 
     bool IsAdd = (MI.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
@@ -3330,6 +3614,14 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
     MI.eraseFromParent();
     return BB;
 
+  case AMDGPU::SI_INIT_EXEC_LO:
+    // This should be before all vector instructions.
+    BuildMI(*BB, &*BB->begin(), MI.getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),
+            AMDGPU::EXEC_LO)
+        .addImm(MI.getOperand(0).getImm());
+    MI.eraseFromParent();
+    return BB;
+
   case AMDGPU::SI_INIT_EXEC_FROM_INPUT: {
     // Extract the thread count from an SGPR input and set EXEC accordingly.
     // Since BFM can't shift by 64, handle that case with CMP + CMOV.
@@ -3363,24 +3655,31 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
     (void)Found;
 
     // This should be before all vector instructions.
+    unsigned Mask = (getSubtarget()->getWavefrontSize() << 1) - 1;
+    bool isWave32 = getSubtarget()->isWave32();
+    unsigned Exec = isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
     BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_BFE_U32), CountReg)
         .addReg(InputReg)
-        .addImm((MI.getOperand(1).getImm() & 0x7f) | 0x70000);
-    BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_BFM_B64),
-            AMDGPU::EXEC)
+        .addImm((MI.getOperand(1).getImm() & Mask) | 0x70000);
+    BuildMI(*BB, FirstMI, DebugLoc(),
+            TII->get(isWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64),
+            Exec)
         .addReg(CountReg)
         .addImm(0);
     BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_CMP_EQ_U32))
         .addReg(CountReg, RegState::Kill)
-        .addImm(64);
-    BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_CMOV_B64),
-            AMDGPU::EXEC)
+        .addImm(getSubtarget()->getWavefrontSize());
+    BuildMI(*BB, FirstMI, DebugLoc(),
+            TII->get(isWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64),
+            Exec)
         .addImm(-1);
     MI.eraseFromParent();
     return BB;
   }
 
   case AMDGPU::GET_GROUPSTATICSIZE: {
+    assert(getTargetMachine().getTargetTriple().getOS() == Triple::AMDHSA ||
+           getTargetMachine().getTargetTriple().getOS() == Triple::AMDPAL);
     DebugLoc DL = MI.getDebugLoc();
     BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32))
         .add(MI.getOperand(0))
@@ -3405,6 +3704,8 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
     return splitKillBlock(MI, BB);
   case AMDGPU::V_CNDMASK_B64_PSEUDO: {
     MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+    const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+    const SIRegisterInfo *TRI = ST.getRegisterInfo();
 
     unsigned Dst = MI.getOperand(0).getReg();
     unsigned Src0 = MI.getOperand(1).getReg();
@@ -3414,16 +3715,21 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
 
     unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-    unsigned SrcCondCopy = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+    const auto *CondRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
+    unsigned SrcCondCopy = MRI.createVirtualRegister(CondRC);
 
     BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy)
       .addReg(SrcCond);
     BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
+      .addImm(0)
       .addReg(Src0, 0, AMDGPU::sub0)
+      .addImm(0)
       .addReg(Src1, 0, AMDGPU::sub0)
       .addReg(SrcCondCopy);
     BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
+      .addImm(0)
       .addReg(Src0, 0, AMDGPU::sub1)
+      .addImm(0)
       .addReg(Src1, 0, AMDGPU::sub1)
       .addReg(SrcCondCopy);
 
@@ -3457,40 +3763,60 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
         .addReg(Info->getFrameOffsetReg(), RegState::Implicit);
     return BB;
   }
-  case AMDGPU::SI_CALL_ISEL:
-  case AMDGPU::SI_TCRETURN_ISEL: {
+  case AMDGPU::SI_CALL_ISEL: {
     const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
     const DebugLoc &DL = MI.getDebugLoc();
+
     unsigned ReturnAddrReg = TII->getRegisterInfo().getReturnAddressReg(*MF);
 
-    MachineRegisterInfo &MRI = MF->getRegInfo();
-    unsigned GlobalAddrReg = MI.getOperand(0).getReg();
-    MachineInstr *PCRel = MRI.getVRegDef(GlobalAddrReg);
-    assert(PCRel->getOpcode() == AMDGPU::SI_PC_ADD_REL_OFFSET);
+    MachineInstrBuilder MIB;
+    MIB = BuildMI(*BB, MI, DL, TII->get(AMDGPU::SI_CALL), ReturnAddrReg);
 
-    const GlobalValue *G = PCRel->getOperand(1).getGlobal();
+    for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I)
+      MIB.add(MI.getOperand(I));
 
-    MachineInstrBuilder MIB;
-    if (MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
-      MIB = BuildMI(*BB, MI, DL, TII->get(AMDGPU::SI_CALL), ReturnAddrReg)
-        .add(MI.getOperand(0))
-        .addGlobalAddress(G);
-    } else {
-      MIB = BuildMI(*BB, MI, DL, TII->get(AMDGPU::SI_TCRETURN))
-        .add(MI.getOperand(0))
-        .addGlobalAddress(G);
+    MIB.cloneMemRefs(MI);
+    MI.eraseFromParent();
+    return BB;
+  }
+  case AMDGPU::V_ADD_I32_e32:
+  case AMDGPU::V_SUB_I32_e32:
+  case AMDGPU::V_SUBREV_I32_e32: {
+    // TODO: Define distinct V_*_I32_Pseudo instructions instead.
+    const DebugLoc &DL = MI.getDebugLoc();
+    unsigned Opc = MI.getOpcode();
 
-      // There is an additional imm operand for tcreturn, but it should be in the
-      // right place already.
+    bool NeedClampOperand = false;
+    if (TII->pseudoToMCOpcode(Opc) == -1) {
+      Opc = AMDGPU::getVOPe64(Opc);
+      NeedClampOperand = true;
     }
 
-    for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
-      MIB.add(MI.getOperand(I));
+    auto I = BuildMI(*BB, MI, DL, TII->get(Opc), MI.getOperand(0).getReg());
+    if (TII->isVOP3(*I)) {
+      const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+      const SIRegisterInfo *TRI = ST.getRegisterInfo();
+      I.addReg(TRI->getVCC(), RegState::Define);
+    }
+    I.add(MI.getOperand(1))
+     .add(MI.getOperand(2));
+    if (NeedClampOperand)
+      I.addImm(0); // clamp bit for e64 encoding
+
+    TII->legalizeOperands(*I);
 
-    MIB.cloneMemRefs(MI);
     MI.eraseFromParent();
     return BB;
   }
+  case AMDGPU::DS_GWS_INIT:
+  case AMDGPU::DS_GWS_SEMA_V:
+  case AMDGPU::DS_GWS_SEMA_BR:
+  case AMDGPU::DS_GWS_SEMA_P:
+  case AMDGPU::DS_GWS_SEMA_RELEASE_ALL:
+  case AMDGPU::DS_GWS_BARRIER:
+    if (getSubtarget()->hasGWSAutoReplay())
+      return BB;
+    return emitGWSMemViolTestLoop(MI, BB);
   default:
     return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
   }
@@ -3617,6 +3943,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
   case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+  case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
   case ISD::LOAD: {
     SDValue Result = LowerLOAD(Op, DAG);
     assert((!Result.getNode() ||
@@ -3641,10 +3968,14 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
   case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
   case ISD::ADDRSPACECAST: return lowerADDRSPACECAST(Op, DAG);
+  case ISD::INSERT_SUBVECTOR:
+    return lowerINSERT_SUBVECTOR(Op, DAG);
   case ISD::INSERT_VECTOR_ELT:
     return lowerINSERT_VECTOR_ELT(Op, DAG);
   case ISD::EXTRACT_VECTOR_ELT:
     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
+  case ISD::VECTOR_SHUFFLE:
+    return lowerVECTOR_SHUFFLE(Op, DAG);
   case ISD::BUILD_VECTOR:
     return lowerBUILD_VECTOR(Op, DAG);
   case ISD::FP_ROUND:
@@ -3742,10 +4073,7 @@ SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode,
 static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
                                   SDNode *N, SelectionDAG &DAG) {
   EVT VT = N->getValueType(0);
-  const auto *CD = dyn_cast<ConstantSDNode>(N->getOperand(3));
-  if (!CD)
-    return DAG.getUNDEF(VT);
-
+  const auto *CD = cast<ConstantSDNode>(N->getOperand(3));
   int CondCode = CD->getSExtValue();
   if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE ||
       CondCode > ICmpInst::Predicate::LAST_ICMP_PREDICATE)
@@ -3753,7 +4081,6 @@ static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
 
   ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode);
 
-
   SDValue LHS = N->getOperand(1);
   SDValue RHS = N->getOperand(2);
 
@@ -3769,16 +4096,20 @@ static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
 
   ISD::CondCode CCOpcode = getICmpCondCode(IcInput);
 
-  return DAG.getNode(AMDGPUISD::SETCC, DL, VT, LHS, RHS,
-                     DAG.getCondCode(CCOpcode));
+  unsigned WavefrontSize = TLI.getSubtarget()->getWavefrontSize();
+  EVT CCVT = EVT::getIntegerVT(*DAG.getContext(), WavefrontSize);
+
+  SDValue SetCC = DAG.getNode(AMDGPUISD::SETCC, DL, CCVT, LHS, RHS,
+                              DAG.getCondCode(CCOpcode));
+  if (VT.bitsEq(CCVT))
+    return SetCC;
+  return DAG.getZExtOrTrunc(SetCC, DL, VT);
 }
 
 static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI,
                                   SDNode *N, SelectionDAG &DAG) {
   EVT VT = N->getValueType(0);
-  const auto *CD = dyn_cast<ConstantSDNode>(N->getOperand(3));
-  if (!CD)
-    return DAG.getUNDEF(VT);
+  const auto *CD = cast<ConstantSDNode>(N->getOperand(3));
 
   int CondCode = CD->getSExtValue();
   if (CondCode < FCmpInst::Predicate::FIRST_FCMP_PREDICATE ||
@@ -3798,8 +4129,13 @@ static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI,
 
   FCmpInst::Predicate IcInput = static_cast<FCmpInst::Predicate>(CondCode);
   ISD::CondCode CCOpcode = getFCmpCondCode(IcInput);
-  return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src0,
-                     Src1, DAG.getCondCode(CCOpcode));
+  unsigned WavefrontSize = TLI.getSubtarget()->getWavefrontSize();
+  EVT CCVT = EVT::getIntegerVT(*DAG.getContext(), WavefrontSize);
+  SDValue SetCC = DAG.getNode(AMDGPUISD::SETCC, SL, CCVT, Src0,
+                              Src1, DAG.getCondCode(CCOpcode));
+  if (VT.bitsEq(CCVT))
+    return SetCC;
+  return DAG.getZExtOrTrunc(SetCC, SL, VT);
 }
 
 void SITargetLowering::ReplaceNodeResults(SDNode *N,
@@ -3957,32 +4293,6 @@ unsigned SITargetLowering::isCFIntrinsic(const SDNode *Intr) const {
   return 0;
 }
 
-void SITargetLowering::createDebuggerPrologueStackObjects(
-    MachineFunction &MF) const {
-  // Create stack objects that are used for emitting debugger prologue.
-  //
-  // Debugger prologue writes work group IDs and work item IDs to scratch memory
-  // at fixed location in the following format:
-  //   offset 0:  work group ID x
-  //   offset 4:  work group ID y
-  //   offset 8:  work group ID z
-  //   offset 16: work item ID x
-  //   offset 20: work item ID y
-  //   offset 24: work item ID z
-  SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
-  int ObjectIdx = 0;
-
-  // For each dimension:
-  for (unsigned i = 0; i < 3; ++i) {
-    // Create fixed stack object for work group ID.
-    ObjectIdx = MF.getFrameInfo().CreateFixedObject(4, i * 4, true);
-    Info->setDebuggerWorkGroupIDStackObjectIndex(i, ObjectIdx);
-    // Create fixed stack object for work item ID.
-    ObjectIdx = MF.getFrameInfo().CreateFixedObject(4, i * 4 + 16, true);
-    Info->setDebuggerWorkItemIDStackObjectIndex(i, ObjectIdx);
-  }
-}
-
 bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
   const Triple &TT = getTargetMachine().getTargetTriple();
   return (GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
@@ -3991,7 +4301,10 @@ bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
 }
 
 bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
-  return (GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
+  // FIXME: Either avoid relying on address space here or change the default
+  // address space for functions to avoid the explicit check.
+  return (GV->getValueType()->isFunctionTy() ||
+          GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
           GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
           GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
          !shouldEmitFixup(GV) &&
@@ -4103,6 +4416,31 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
   return Chain;
 }
 
+SDValue SITargetLowering::LowerRETURNADDR(SDValue Op,
+                                          SelectionDAG &DAG) const {
+  MVT VT = Op.getSimpleValueType();
+  SDLoc DL(Op);
+  // Checking the depth
+  if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0)
+    return DAG.getConstant(0, DL, VT);
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+  // Check for kernel and shader functions
+  if (Info->isEntryFunction())
+    return DAG.getConstant(0, DL, VT);
+
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  // There is a call to @llvm.returnaddress in this function
+  MFI.setReturnAddressIsTaken(true);
+
+  const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
+  // Get the return address reg and mark it as an implicit live-in
+  unsigned Reg = MF.addLiveIn(TRI->getReturnAddressReg(MF), getRegClassFor(VT, Op.getNode()->isDivergent()));
+
+  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
+}
+
 SDValue SITargetLowering::getFPExtOrFPTrunc(SelectionDAG &DAG,
                                             SDValue Op,
                                             const SDLoc &DL,
@@ -4131,7 +4469,9 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
 SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
                                                SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
-  bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction());
+  const MachineFunction &MF = DAG.getMachineFunction();
+  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+  bool IsIEEEMode = Info->getMode().IEEE;
 
   // FIXME: Assert during eslection that this is only selected for
   // ieee_mode. Currently a combine can produce the ieee version for non-ieee
@@ -4302,6 +4642,32 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
   return DAG.getUNDEF(ASC->getValueType(0));
 }
 
+// This lowers an INSERT_SUBVECTOR by extracting the individual elements from
+// the small vector and inserting them into the big vector. That is better than
+// the default expansion of doing it via a stack slot. Even though the use of
+// the stack slot would be optimized away afterwards, the stack slot itself
+// remains.
+SDValue SITargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  SDValue Vec = Op.getOperand(0);
+  SDValue Ins = Op.getOperand(1);
+  SDValue Idx = Op.getOperand(2);
+  EVT VecVT = Vec.getValueType();
+  EVT InsVT = Ins.getValueType();
+  EVT EltVT = VecVT.getVectorElementType();
+  unsigned InsNumElts = InsVT.getVectorNumElements();
+  unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+  SDLoc SL(Op);
+
+  for (unsigned I = 0; I != InsNumElts; ++I) {
+    SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Ins,
+                              DAG.getConstant(I, SL, MVT::i32));
+    Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, VecVT, Vec, Elt,
+                      DAG.getConstant(IdxVal + I, SL, MVT::i32));
+  }
+  return Vec;
+}
+
 SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
                                                  SelectionDAG &DAG) const {
   SDValue Vec = Op.getOperand(0);
@@ -4352,12 +4718,12 @@ SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
   MVT IntVT = MVT::getIntegerVT(VecSize);
 
   // Avoid stack access for dynamic indexing.
-  SDValue Val = InsVal;
-  if (InsVal.getValueType() == MVT::f16)
-      Val = DAG.getNode(ISD::BITCAST, SL, MVT::i16, InsVal);
-
   // v_bfi_b32 (v_bfm_b32 16, (shl idx, 16)), val, vec
-  SDValue ExtVal = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Val);
+
+  // Create a congruent vector with the target value in each element so that
+  // the required element can be masked and ORed into the target vector.
+  SDValue ExtVal = DAG.getNode(ISD::BITCAST, SL, IntVT,
+                               DAG.getSplatBuildVector(VecVT, SL, InsVal));
 
   assert(isPowerOf2_32(EltSize));
   SDValue ScaleFactor = DAG.getConstant(Log2_32(EltSize), SL, MVT::i32);
@@ -4419,6 +4785,63 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
   return DAG.getAnyExtOrTrunc(Elt, SL, ResultVT);
 }
 
+static bool elementPairIsContiguous(ArrayRef<int> Mask, int Elt) {
+  assert(Elt % 2 == 0);
+  return Mask[Elt + 1] == Mask[Elt] + 1 && (Mask[Elt] % 2 == 0);
+}
+
+SDValue SITargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  EVT ResultVT = Op.getValueType();
+  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+
+  EVT PackVT = ResultVT.isInteger() ? MVT::v2i16 : MVT::v2f16;
+  EVT EltVT = PackVT.getVectorElementType();
+  int SrcNumElts = Op.getOperand(0).getValueType().getVectorNumElements();
+
+  // vector_shuffle <0,1,6,7> lhs, rhs
+  // -> concat_vectors (extract_subvector lhs, 0), (extract_subvector rhs, 2)
+  //
+  // vector_shuffle <6,7,2,3> lhs, rhs
+  // -> concat_vectors (extract_subvector rhs, 2), (extract_subvector lhs, 2)
+  //
+  // vector_shuffle <6,7,0,1> lhs, rhs
+  // -> concat_vectors (extract_subvector rhs, 2), (extract_subvector lhs, 0)
+
+  // Avoid scalarizing when both halves are reading from consecutive elements.
+  SmallVector<SDValue, 4> Pieces;
+  for (int I = 0, N = ResultVT.getVectorNumElements(); I != N; I += 2) {
+    if (elementPairIsContiguous(SVN->getMask(), I)) {
+      const int Idx = SVN->getMaskElt(I);
+      int VecIdx = Idx < SrcNumElts ? 0 : 1;
+      int EltIdx = Idx < SrcNumElts ? Idx : Idx - SrcNumElts;
+      SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL,
+                                    PackVT, SVN->getOperand(VecIdx),
+                                    DAG.getConstant(EltIdx, SL, MVT::i32));
+      Pieces.push_back(SubVec);
+    } else {
+      const int Idx0 = SVN->getMaskElt(I);
+      const int Idx1 = SVN->getMaskElt(I + 1);
+      int VecIdx0 = Idx0 < SrcNumElts ? 0 : 1;
+      int VecIdx1 = Idx1 < SrcNumElts ? 0 : 1;
+      int EltIdx0 = Idx0 < SrcNumElts ? Idx0 : Idx0 - SrcNumElts;
+      int EltIdx1 = Idx1 < SrcNumElts ? Idx1 : Idx1 - SrcNumElts;
+
+      SDValue Vec0 = SVN->getOperand(VecIdx0);
+      SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+                                 Vec0, DAG.getConstant(EltIdx0, SL, MVT::i32));
+
+      SDValue Vec1 = SVN->getOperand(VecIdx1);
+      SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+                                 Vec1, DAG.getConstant(EltIdx1, SL, MVT::i32));
+      Pieces.push_back(DAG.getBuildVector(PackVT, SL, { Elt0, Elt1 }));
+    }
+  }
+
+  return DAG.getNode(ISD::CONCAT_VECTORS, SL, ResultVT, Pieces);
+}
+
 SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
                                             SelectionDAG &DAG) const {
   SDLoc SL(Op);
@@ -4512,11 +4935,18 @@ buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
   // of the s_add_u32 instruction, we end up with an offset that is 4 bytes too
   // small. This requires us to add 4 to the global variable offset in order to
   // compute the correct address.
-  SDValue PtrLo = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4,
-                                             GAFlags);
-  SDValue PtrHi = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4,
-                                             GAFlags == SIInstrInfo::MO_NONE ?
-                                             GAFlags : GAFlags + 1);
+  unsigned LoFlags = GAFlags;
+  if (LoFlags == SIInstrInfo::MO_NONE)
+    LoFlags = SIInstrInfo::MO_REL32;
+  SDValue PtrLo =
+      DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, LoFlags);
+  SDValue PtrHi;
+  if (GAFlags == SIInstrInfo::MO_NONE) {
+    PtrHi = DAG.getTargetConstant(0, DL, MVT::i32);
+  } else {
+    PtrHi =
+        DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, GAFlags + 1);
+  }
   return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET, DL, PtrVT, PtrLo, PtrHi);
 }
 
@@ -4525,7 +4955,10 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
                                              SelectionDAG &DAG) const {
   GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
   const GlobalValue *GV = GSD->getGlobal();
-  if (GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+  if ((GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
+       (!GV->hasExternalLinkage() ||
+        getTargetMachine().getTargetTriple().getOS() == Triple::AMDHSA ||
+        getTargetMachine().getTargetTriple().getOS() == Triple::AMDPAL)) ||
       GSD->getAddressSpace() == AMDGPUAS::REGION_ADDRESS ||
       GSD->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
     return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
@@ -4533,7 +4966,12 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
   SDLoc DL(GSD);
   EVT PtrVT = Op.getValueType();
 
-  // FIXME: Should not make address space based decisions here.
+  if (GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, GSD->getOffset(),
+                                            SIInstrInfo::MO_ABS32_LO);
+    return DAG.getNode(AMDGPUISD::LDS, DL, MVT::i32, GA);
+  }
+
   if (shouldEmitFixup(GV))
     return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT);
   else if (shouldEmitPCReloc(GV))
@@ -4641,10 +5079,8 @@ static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL,
 }
 
 static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG,
-                             SDValue *GLC, SDValue *SLC) {
-  auto CachePolicyConst = dyn_cast<ConstantSDNode>(CachePolicy.getNode());
-  if (!CachePolicyConst)
-    return false;
+                             SDValue *GLC, SDValue *SLC, SDValue *DLC) {
+  auto CachePolicyConst = cast<ConstantSDNode>(CachePolicy.getNode());
 
   uint64_t Value = CachePolicyConst->getZExtValue();
   SDLoc DL(CachePolicy);
@@ -4656,6 +5092,10 @@ static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG,
     *SLC = DAG.getTargetConstant((Value & 0x2) ? 1 : 0, DL, MVT::i32);
     Value &= ~(uint64_t)0x2;
   }
+  if (DLC) {
+    *DLC = DAG.getTargetConstant((Value & 0x4) ? 1 : 0, DL, MVT::i32);
+    Value &= ~(uint64_t)0x4;
+  }
 
   return Value == 0;
 }
@@ -4689,14 +5129,14 @@ static SDValue constructRetValue(SelectionDAG &DAG,
   EVT CastVT = NumElts > 1 ? EVT::getVectorVT(Context, AdjEltVT, NumElts)
                            : AdjEltVT;
 
-  // Special case for v8f16. Rather than add support for this, use v4i32 to
+  // Special case for v6f16. Rather than add support for this, use v3i32 to
   // extract the data elements
-  bool V8F16Special = false;
-  if (CastVT == MVT::v8f16) {
-    CastVT = MVT::v4i32;
+  bool V6F16Special = false;
+  if (NumElts == 6) {
+    CastVT = EVT::getVectorVT(Context, MVT::i32, NumElts / 2);
     DMaskPop >>= 1;
     ReqRetNumElts >>= 1;
-    V8F16Special = true;
+    V6F16Special = true;
     AdjVT = MVT::v2i32;
   }
 
@@ -4726,7 +5166,7 @@ static SDValue constructRetValue(SelectionDAG &DAG,
     PreTFCRes = BVElts[0];
   }
 
-  if (V8F16Special)
+  if (V6F16Special)
     PreTFCRes = DAG.getNode(ISD::BITCAST, DL, MVT::v4f16, PreTFCRes);
 
   if (!IsTexFail) {
@@ -4745,9 +5185,7 @@ static SDValue constructRetValue(SelectionDAG &DAG,
 
 static bool parseTexFail(SDValue TexFailCtrl, SelectionDAG &DAG, SDValue *TFE,
                          SDValue *LWE, bool &IsTexFail) {
-  auto TexFailCtrlConst = dyn_cast<ConstantSDNode>(TexFailCtrl.getNode());
-  if (!TexFailCtrlConst)
-    return false;
+  auto TexFailCtrlConst = cast<ConstantSDNode>(TexFailCtrl.getNode());
 
   uint64_t Value = TexFailCtrlConst->getZExtValue();
   if (Value) {
@@ -4774,7 +5212,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
   const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
       AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
+  const AMDGPU::MIMGMIPMappingInfo *MIPMappingInfo =
+      AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode);
   unsigned IntrOpcode = Intr->BaseOpcode;
+  bool IsGFX10 = Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10;
 
   SmallVector<EVT, 3> ResultTypes(Op->value_begin(), Op->value_end());
   SmallVector<EVT, 3> OrigResultTypes(Op->value_begin(), Op->value_end());
@@ -4810,9 +5251,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
     }
   } else {
     unsigned DMaskIdx = BaseOpcode->Store ? 3 : isa<MemSDNode>(Op) ? 2 : 1;
-    auto DMaskConst = dyn_cast<ConstantSDNode>(Op.getOperand(DMaskIdx));
-    if (!DMaskConst)
-      return Op;
+    auto DMaskConst = cast<ConstantSDNode>(Op.getOperand(DMaskIdx));
     DMask = DMaskConst->getZExtValue();
     DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask);
 
@@ -4821,8 +5260,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
 
       MVT StoreVT = VData.getSimpleValueType();
       if (StoreVT.getScalarType() == MVT::f16) {
-        if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ||
-            !BaseOpcode->HasD16)
+        if (!Subtarget->hasD16Images() || !BaseOpcode->HasD16)
           return Op; // D16 is unsupported for this instruction
 
         IsD16 = true;
@@ -4835,8 +5273,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
       // and whether packing is supported.
       MVT LoadVT = ResultTypes[0].getSimpleVT();
       if (LoadVT.getScalarType() == MVT::f16) {
-        if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ||
-            !BaseOpcode->HasD16)
+        if (!Subtarget->hasD16Images() || !BaseOpcode->HasD16)
           return Op; // D16 is unsupported for this instruction
 
         IsD16 = true;
@@ -4878,6 +5315,17 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
     }
   }
 
+  // Optimize _mip away, when 'lod' is zero
+  if (MIPMappingInfo) {
+    if (auto ConstantLod =
+         dyn_cast<ConstantSDNode>(Op.getOperand(AddrIdx+NumVAddrs-1))) {
+      if (ConstantLod->isNullValue()) {
+        IntrOpcode = MIPMappingInfo->NONMIP;  // set new opcode to variant without _mip
+        NumMIVAddrs--;               // remove 'lod'
+      }
+    }
+  }
+
   // Check for 16 bit addresses and pack if true.
   unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
   MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType();
@@ -4915,7 +5363,22 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
       VAddrs.push_back(Op.getOperand(AddrIdx + i));
   }
 
-  SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
+  // If the register allocator cannot place the address registers contiguously
+  // without introducing moves, then using the non-sequential address encoding
+  // is always preferable, since it saves VALU instructions and is usually a
+  // wash in terms of code size or even better.
+  //
+  // However, we currently have no way of hinting to the register allocator that
+  // MIMG addresses should be placed contiguously when it is possible to do so,
+  // so force non-NSA for the common 2-address case as a heuristic.
+  //
+  // SIShrinkInstructions will convert NSA encodings to non-NSA after register
+  // allocation when possible.
+  bool UseNSA =
+      ST->hasFeature(AMDGPU::FeatureNSAEncoding) && VAddrs.size() >= 3;
+  SDValue VAddr;
+  if (!UseNSA)
+    VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
 
   SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);
   SDValue False = DAG.getTargetConstant(0, DL, MVT::i1);
@@ -4926,9 +5389,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
     CtrlIdx = AddrIdx + NumVAddrs + 1;
   } else {
     auto UnormConst =
-        dyn_cast<ConstantSDNode>(Op.getOperand(AddrIdx + NumVAddrs + 2));
-    if (!UnormConst)
-      return Op;
+        cast<ConstantSDNode>(Op.getOperand(AddrIdx + NumVAddrs + 2));
 
     Unorm = UnormConst->getZExtValue() ? True : False;
     CtrlIdx = AddrIdx + NumVAddrs + 3;
@@ -4965,9 +5426,6 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
       return Undef;
     }
 
-    // Have to use a power of 2 number of dwords
-    NumVDataDwords = 1 << Log2_32_Ceil(NumVDataDwords);
-
     EVT NewVT = NumVDataDwords > 1 ?
                   EVT::getVectorVT(*DAG.getContext(), MVT::f32, NumVDataDwords)
                 : MVT::f32;
@@ -4983,45 +5441,66 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
 
   SDValue GLC;
   SDValue SLC;
+  SDValue DLC;
   if (BaseOpcode->Atomic) {
     GLC = True; // TODO no-return optimization
-    if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, nullptr, &SLC))
+    if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, nullptr, &SLC,
+                          IsGFX10 ? &DLC : nullptr))
       return Op;
   } else {
-    if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, &GLC, &SLC))
+    if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, &GLC, &SLC,
+                          IsGFX10 ? &DLC : nullptr))
       return Op;
   }
 
-  SmallVector<SDValue, 14> Ops;
+  SmallVector<SDValue, 26> Ops;
   if (BaseOpcode->Store || BaseOpcode->Atomic)
     Ops.push_back(VData); // vdata
-  Ops.push_back(VAddr);
+  if (UseNSA) {
+    for (const SDValue &Addr : VAddrs)
+      Ops.push_back(Addr);
+  } else {
+    Ops.push_back(VAddr);
+  }
   Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs)); // rsrc
   if (BaseOpcode->Sampler)
     Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs + 1)); // sampler
   Ops.push_back(DAG.getTargetConstant(DMask, DL, MVT::i32));
+  if (IsGFX10)
+    Ops.push_back(DAG.getTargetConstant(DimInfo->Encoding, DL, MVT::i32));
   Ops.push_back(Unorm);
+  if (IsGFX10)
+    Ops.push_back(DLC);
   Ops.push_back(GLC);
   Ops.push_back(SLC);
   Ops.push_back(IsA16 &&  // a16 or r128
                 ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
   Ops.push_back(TFE); // tfe
   Ops.push_back(LWE); // lwe
-  Ops.push_back(DimInfo->DA ? True : False);
+  if (!IsGFX10)
+    Ops.push_back(DimInfo->DA ? True : False);
   if (BaseOpcode->HasD16)
     Ops.push_back(IsD16 ? True : False);
   if (isa<MemSDNode>(Op))
     Ops.push_back(Op.getOperand(0)); // chain
 
-  int NumVAddrDwords = VAddr.getValueType().getSizeInBits() / 32;
+  int NumVAddrDwords =
+      UseNSA ? VAddrs.size() : VAddr.getValueType().getSizeInBits() / 32;
   int Opcode = -1;
 
-  if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
-    Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8,
-                                   NumVDataDwords, NumVAddrDwords);
-  if (Opcode == -1)
-    Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6,
+  if (IsGFX10) {
+    Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
+                                   UseNSA ? AMDGPU::MIMGEncGfx10NSA
+                                          : AMDGPU::MIMGEncGfx10Default,
                                    NumVDataDwords, NumVAddrDwords);
+  } else {
+    if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+      Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8,
+                                     NumVDataDwords, NumVAddrDwords);
+    if (Opcode == -1)
+      Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6,
+                                     NumVDataDwords, NumVAddrDwords);
+  }
   assert(Opcode != -1);
 
   MachineSDNode *NewNode = DAG.getMachineNode(Opcode, DL, ResultTypes, Ops);
@@ -5046,7 +5525,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
 }
 
 SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
-                                       SDValue Offset, SDValue GLC,
+                                       SDValue Offset, SDValue GLC, SDValue DLC,
                                        SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MachineMemOperand *MMO = MF.getMachineMemOperand(
@@ -5059,7 +5538,8 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
     SDValue Ops[] = {
         Rsrc,
         Offset, // Offset
-        GLC     // glc
+        GLC,
+        DLC,
     };
     return DAG.getMemIntrinsicNode(AMDGPUISD::SBUFFER_LOAD, DL,
                                    DAG.getVTList(VT), Ops, VT, MMO);
@@ -5263,16 +5743,18 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
                           SDLoc(DAG.getEntryNode()),
                           MFI->getArgInfo().WorkItemIDZ);
-  case SIIntrinsic::SI_load_const: {
-    SDValue Load =
-        lowerSBuffer(MVT::i32, DL, Op.getOperand(1), Op.getOperand(2),
-                     DAG.getTargetConstant(0, DL, MVT::i1), DAG);
-    return DAG.getNode(ISD::BITCAST, DL, MVT::f32, Load);
-  }
+  case Intrinsic::amdgcn_wavefrontsize:
+    return DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(),
+                           SDLoc(Op), MVT::i32);
   case Intrinsic::amdgcn_s_buffer_load: {
-    unsigned Cache = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
-    return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2),
-                        DAG.getTargetConstant(Cache & 1, DL, MVT::i1), DAG);
+    bool IsGFX10 = Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10;
+    SDValue GLC;
+    SDValue DLC = DAG.getTargetConstant(0, DL, MVT::i1);
+    if (!parseCachePolicy(Op.getOperand(3), DAG, &GLC, nullptr,
+                          IsGFX10 ? &DLC : nullptr))
+      return Op;
+    return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2), GLC, DLC,
+                        DAG);
   }
   case Intrinsic::amdgcn_fdiv_fast:
     return lowerFDIV_FAST(Op, DAG);
@@ -5295,12 +5777,70 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
                        Glue);
   }
+  case Intrinsic::amdgcn_interp_p1_f16: {
+    SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5));
+    SDValue Glue = M0.getValue(1);
+    if (getSubtarget()->getLDSBankCount() == 16) {
+      // 16 bank LDS
+      SDValue S = DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32,
+                              DAG.getConstant(2, DL, MVT::i32), // P0
+                              Op.getOperand(2), // Attrchan
+                              Op.getOperand(3), // Attr
+                              Glue);
+      SDValue Ops[] = {
+        Op.getOperand(1), // Src0
+        Op.getOperand(2), // Attrchan
+        Op.getOperand(3), // Attr
+        DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
+        S, // Src2 - holds two f16 values selected by high
+        DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers
+        Op.getOperand(4), // high
+        DAG.getConstant(0, DL, MVT::i1), // $clamp
+        DAG.getConstant(0, DL, MVT::i32) // $omod
+      };
+      return DAG.getNode(AMDGPUISD::INTERP_P1LV_F16, DL, MVT::f32, Ops);
+    } else {
+      // 32 bank LDS
+      SDValue Ops[] = {
+        Op.getOperand(1), // Src0
+        Op.getOperand(2), // Attrchan
+        Op.getOperand(3), // Attr
+        DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
+        Op.getOperand(4), // high
+        DAG.getConstant(0, DL, MVT::i1), // $clamp
+        DAG.getConstant(0, DL, MVT::i32), // $omod
+        Glue
+      };
+      return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops);
+    }
+  }
+  case Intrinsic::amdgcn_interp_p2_f16: {
+    SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(6));
+    SDValue Glue = SDValue(M0.getNode(), 1);
+    SDValue Ops[] = {
+      Op.getOperand(2), // Src0
+      Op.getOperand(3), // Attrchan
+      Op.getOperand(4), // Attr
+      DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
+      Op.getOperand(1), // Src2
+      DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers
+      Op.getOperand(5), // high
+      DAG.getConstant(0, DL, MVT::i1), // $clamp
+      Glue
+    };
+    return DAG.getNode(AMDGPUISD::INTERP_P2_F16, DL, MVT::f16, Ops);
+  }
   case Intrinsic::amdgcn_sin:
     return DAG.getNode(AMDGPUISD::SIN_HW, DL, VT, Op.getOperand(1));
 
   case Intrinsic::amdgcn_cos:
     return DAG.getNode(AMDGPUISD::COS_HW, DL, VT, Op.getOperand(1));
 
+  case Intrinsic::amdgcn_mul_u24:
+    return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT, Op.getOperand(1), Op.getOperand(2));
+  case Intrinsic::amdgcn_mul_i24:
+    return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT, Op.getOperand(1), Op.getOperand(2));
+
   case Intrinsic::amdgcn_log_clamp: {
     if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
       return SDValue();
@@ -5334,10 +5874,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
                        Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::amdgcn_div_scale: {
-    // 3rd parameter required to be a constant.
-    const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3));
-    if (!Param)
-      return DAG.getMergeValues({ DAG.getUNDEF(VT), DAG.getUNDEF(MVT::i1) }, DL);
+    const ConstantSDNode *Param = cast<ConstantSDNode>(Op.getOperand(3));
 
     // Translate to the operands expected by the machine instruction. The
     // first parameter must be the same as the first instruction.
@@ -5423,6 +5960,23 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::amdgcn_fmad_ftz:
     return DAG.getNode(AMDGPUISD::FMAD_FTZ, DL, VT, Op.getOperand(1),
                        Op.getOperand(2), Op.getOperand(3));
+
+  case Intrinsic::amdgcn_if_break:
+    return SDValue(DAG.getMachineNode(AMDGPU::SI_IF_BREAK, DL, VT,
+                                      Op->getOperand(1), Op->getOperand(2)), 0);
+
+  case Intrinsic::amdgcn_groupstaticsize: {
+    Triple::OSType OS = getTargetMachine().getTargetTriple().getOS();
+    if (OS == Triple::AMDHSA || OS == Triple::AMDPAL)
+      return Op;
+
+    const Module *M = MF.getFunction().getParent();
+    const GlobalValue *GV =
+        M->getNamedValue(Intrinsic::getName(Intrinsic::amdgcn_groupstaticsize));
+    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, 0,
+                                            SIInstrInfo::MO_ABS32_LO);
+    return {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, GA), 0};
+  }
   default:
     if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
             AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
@@ -5438,9 +5992,99 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
   SDLoc DL(Op);
 
   switch (IntrID) {
+  case Intrinsic::amdgcn_ds_ordered_add:
+  case Intrinsic::amdgcn_ds_ordered_swap: {
+    MemSDNode *M = cast<MemSDNode>(Op);
+    SDValue Chain = M->getOperand(0);
+    SDValue M0 = M->getOperand(2);
+    SDValue Value = M->getOperand(3);
+    unsigned IndexOperand = M->getConstantOperandVal(7);
+    unsigned WaveRelease = M->getConstantOperandVal(8);
+    unsigned WaveDone = M->getConstantOperandVal(9);
+    unsigned ShaderType;
+    unsigned Instruction;
+
+    unsigned OrderedCountIndex = IndexOperand & 0x3f;
+    IndexOperand &= ~0x3f;
+    unsigned CountDw = 0;
+
+    if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10) {
+      CountDw = (IndexOperand >> 24) & 0xf;
+      IndexOperand &= ~(0xf << 24);
+
+      if (CountDw < 1 || CountDw > 4) {
+        report_fatal_error(
+            "ds_ordered_count: dword count must be between 1 and 4");
+      }
+    }
+
+    if (IndexOperand)
+      report_fatal_error("ds_ordered_count: bad index operand");
+
+    switch (IntrID) {
+    case Intrinsic::amdgcn_ds_ordered_add:
+      Instruction = 0;
+      break;
+    case Intrinsic::amdgcn_ds_ordered_swap:
+      Instruction = 1;
+      break;
+    }
+
+    if (WaveDone && !WaveRelease)
+      report_fatal_error("ds_ordered_count: wave_done requires wave_release");
+
+    switch (DAG.getMachineFunction().getFunction().getCallingConv()) {
+    case CallingConv::AMDGPU_CS:
+    case CallingConv::AMDGPU_KERNEL:
+      ShaderType = 0;
+      break;
+    case CallingConv::AMDGPU_PS:
+      ShaderType = 1;
+      break;
+    case CallingConv::AMDGPU_VS:
+      ShaderType = 2;
+      break;
+    case CallingConv::AMDGPU_GS:
+      ShaderType = 3;
+      break;
+    default:
+      report_fatal_error("ds_ordered_count unsupported for this calling conv");
+    }
+
+    unsigned Offset0 = OrderedCountIndex << 2;
+    unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
+                       (Instruction << 4);
+
+    if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10)
+      Offset1 |= (CountDw - 1) << 6;
+
+    unsigned Offset = Offset0 | (Offset1 << 8);
+
+    SDValue Ops[] = {
+      Chain,
+      Value,
+      DAG.getTargetConstant(Offset, DL, MVT::i16),
+      copyToM0(DAG, Chain, DL, M0).getValue(1), // Glue
+    };
+    return DAG.getMemIntrinsicNode(AMDGPUISD::DS_ORDERED_COUNT, DL,
+                                   M->getVTList(), Ops, M->getMemoryVT(),
+                                   M->getMemOperand());
+  }
+  case Intrinsic::amdgcn_ds_fadd: {
+    MemSDNode *M = cast<MemSDNode>(Op);
+    unsigned Opc;
+    switch (IntrID) {
+    case Intrinsic::amdgcn_ds_fadd:
+      Opc = ISD::ATOMIC_LOAD_FADD;
+      break;
+    }
+
+    return DAG.getAtomic(Opc, SDLoc(Op), M->getMemoryVT(),
+                         M->getOperand(0), M->getOperand(2), M->getOperand(3),
+                         M->getMemOperand());
+  }
   case Intrinsic::amdgcn_atomic_inc:
   case Intrinsic::amdgcn_atomic_dec:
-  case Intrinsic::amdgcn_ds_fadd:
   case Intrinsic::amdgcn_ds_fmin:
   case Intrinsic::amdgcn_ds_fmax: {
     MemSDNode *M = cast<MemSDNode>(Op);
@@ -5452,9 +6096,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
     case Intrinsic::amdgcn_atomic_dec:
       Opc = AMDGPUISD::ATOMIC_DEC;
       break;
-    case Intrinsic::amdgcn_ds_fadd:
-      Opc = AMDGPUISD::ATOMIC_LOAD_FADD;
-      break;
     case Intrinsic::amdgcn_ds_fmin:
       Opc = AMDGPUISD::ATOMIC_LOAD_FMIN;
       break;
@@ -5503,8 +6144,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
     if (LoadVT.getScalarType() == MVT::f16)
       return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
                                  M, DAG, Ops);
-    return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
-                                   M->getMemOperand());
+
+    // Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics
+    if (LoadVT.getScalarType() == MVT::i8 ||
+        LoadVT.getScalarType() == MVT::i16)
+      return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M);
+
+    return getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
+                               M->getMemOperand(), DAG);
   }
   case Intrinsic::amdgcn_raw_buffer_load:
   case Intrinsic::amdgcn_raw_buffer_load_format: {
@@ -5531,8 +6178,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
     if (LoadVT.getScalarType() == MVT::f16)
       return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
                                  M, DAG, Ops);
-    return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
-                                   M->getMemOperand());
+
+    // Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics
+    if (LoadVT.getScalarType() == MVT::i8 ||
+        LoadVT.getScalarType() == MVT::i16)
+      return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M);
+
+    return getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
+                               M->getMemOperand(), DAG);
   }
   case Intrinsic::amdgcn_struct_buffer_load:
   case Intrinsic::amdgcn_struct_buffer_load_format: {
@@ -5559,8 +6212,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
     if (LoadVT.getScalarType() == MVT::f16)
       return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
                                  M, DAG, Ops);
-    return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
-                                   M->getMemOperand());
+
+    // Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics
+    if (LoadVT.getScalarType() == MVT::i8 ||
+        LoadVT.getScalarType() == MVT::i16)
+      return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M);
+
+    return getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
+                               M->getMemOperand(), DAG);
   }
   case Intrinsic::amdgcn_tbuffer_load: {
     MemSDNode *M = cast<MemSDNode>(Op);
@@ -5588,9 +6247,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
     if (LoadVT.getScalarType() == MVT::f16)
       return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
                                  M, DAG, Ops);
-    return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
-                                   Op->getVTList(), Ops, LoadVT,
-                                   M->getMemOperand());
+    return getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
+                               Op->getVTList(), Ops, LoadVT, M->getMemOperand(),
+                               DAG);
   }
   case Intrinsic::amdgcn_raw_tbuffer_load: {
     MemSDNode *M = cast<MemSDNode>(Op);
@@ -5612,9 +6271,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
     if (LoadVT.getScalarType() == MVT::f16)
       return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
                                  M, DAG, Ops);
-    return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
-                                   Op->getVTList(), Ops, LoadVT,
-                                   M->getMemOperand());
+    return getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
+                               Op->getVTList(), Ops, LoadVT, M->getMemOperand(),
+                               DAG);
   }
   case Intrinsic::amdgcn_struct_tbuffer_load: {
     MemSDNode *M = cast<MemSDNode>(Op);
@@ -5636,9 +6295,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
     if (LoadVT.getScalarType() == MVT::f16)
       return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
                                  M, DAG, Ops);
-    return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
-                                   Op->getVTList(), Ops, LoadVT,
-                                   M->getMemOperand());
+    return getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
+                               Op->getVTList(), Ops, LoadVT, M->getMemOperand(),
+                               DAG);
   }
   case Intrinsic::amdgcn_buffer_atomic_swap:
   case Intrinsic::amdgcn_buffer_atomic_add:
@@ -5913,6 +6572,39 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
   }
 }
 
+// Call DAG.getMemIntrinsicNode for a load, but first widen a dwordx3 type to
+// dwordx4 if on SI.
+SDValue SITargetLowering::getMemIntrinsicNode(unsigned Opcode, const SDLoc &DL,
+                                              SDVTList VTList,
+                                              ArrayRef<SDValue> Ops, EVT MemVT,
+                                              MachineMemOperand *MMO,
+                                              SelectionDAG &DAG) const {
+  EVT VT = VTList.VTs[0];
+  EVT WidenedVT = VT;
+  EVT WidenedMemVT = MemVT;
+  if (!Subtarget->hasDwordx3LoadStores() &&
+      (WidenedVT == MVT::v3i32 || WidenedVT == MVT::v3f32)) {
+    WidenedVT = EVT::getVectorVT(*DAG.getContext(),
+                                 WidenedVT.getVectorElementType(), 4);
+    WidenedMemVT = EVT::getVectorVT(*DAG.getContext(),
+                                    WidenedMemVT.getVectorElementType(), 4);
+    MMO = DAG.getMachineFunction().getMachineMemOperand(MMO, 0, 16);
+  }
+
+  assert(VTList.NumVTs == 2);
+  SDVTList WidenedVTList = DAG.getVTList(WidenedVT, VTList.VTs[1]);
+
+  auto NewOp = DAG.getMemIntrinsicNode(Opcode, DL, WidenedVTList, Ops,
+                                       WidenedMemVT, MMO);
+  if (WidenedVT != VT) {
+    auto Extract = DAG.getNode(
+        ISD::EXTRACT_SUBVECTOR, DL, VT, NewOp,
+        DAG.getConstant(0, DL, getVectorIdxTy(DAG.getDataLayout())));
+    NewOp = DAG.getMergeValues({ Extract, SDValue(NewOp.getNode(), 1) }, DL);
+  }
+  return NewOp;
+}
+
 SDValue SITargetLowering::handleD16VData(SDValue VData,
                                          SelectionDAG &DAG) const {
   EVT StoreVT = VData.getValueType();
@@ -6129,6 +6821,12 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
                    AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
     Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
     MemSDNode *M = cast<MemSDNode>(Op);
+
+    // Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
+    EVT VDataType = VData.getValueType().getScalarType();
+    if (VDataType == MVT::i8 || VDataType == MVT::i16)
+      return handleByteShortBufferStores(DAG, VDataType, DL, Ops, M);
+
     return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
                                    M->getMemoryVT(), M->getMemOperand());
   }
@@ -6155,6 +6853,12 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
                    AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
     Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
     MemSDNode *M = cast<MemSDNode>(Op);
+
+    // Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
+    EVT VDataType = VData.getValueType().getScalarType();
+    if (VDataType == MVT::i8 || VDataType == MVT::i16)
+      return handleByteShortBufferStores(DAG, VDataType, DL, Ops, M);
+
     return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
                                    M->getMemoryVT(), M->getMemOperand());
   }
@@ -6181,10 +6885,63 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
                    AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
     Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
     MemSDNode *M = cast<MemSDNode>(Op);
+
+    // Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
+    EVT VDataType = VData.getValueType().getScalarType();
+    if (VDataType == MVT::i8 || VDataType == MVT::i16)
+      return handleByteShortBufferStores(DAG, VDataType, DL, Ops, M);
+
     return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
                                    M->getMemoryVT(), M->getMemOperand());
   }
 
+  case Intrinsic::amdgcn_buffer_atomic_fadd: {
+    unsigned Slc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
+    unsigned IdxEn = 1;
+    if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4)))
+      IdxEn = Idx->getZExtValue() != 0;
+    SDValue Ops[] = {
+      Chain,
+      Op.getOperand(2), // vdata
+      Op.getOperand(3), // rsrc
+      Op.getOperand(4), // vindex
+      SDValue(),        // voffset -- will be set by setBufferOffsets
+      SDValue(),        // soffset -- will be set by setBufferOffsets
+      SDValue(),        // offset -- will be set by setBufferOffsets
+      DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
+      DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+    };
+    setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
+    EVT VT = Op.getOperand(2).getValueType();
+
+    auto *M = cast<MemSDNode>(Op);
+    unsigned Opcode = VT.isVector() ? AMDGPUISD::BUFFER_ATOMIC_PK_FADD
+                                    : AMDGPUISD::BUFFER_ATOMIC_FADD;
+
+    return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
+                                   M->getMemOperand());
+  }
+
+  case Intrinsic::amdgcn_global_atomic_fadd: {
+    SDValue Ops[] = {
+      Chain,
+      Op.getOperand(2), // ptr
+      Op.getOperand(3)  // vdata
+    };
+    EVT VT = Op.getOperand(3).getValueType();
+
+    auto *M = cast<MemSDNode>(Op);
+    unsigned Opcode = VT.isVector() ? AMDGPUISD::ATOMIC_PK_FADD
+                                    : AMDGPUISD::ATOMIC_FADD;
+
+    return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
+                                   M->getMemOperand());
+  }
+
+  case Intrinsic::amdgcn_end_cf:
+    return SDValue(DAG.getMachineNode(AMDGPU::SI_END_CF, DL, MVT::Other,
+                                      Op->getOperand(2), Chain), 0);
+
   default: {
     if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
             AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
@@ -6283,6 +7040,38 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
   Offsets[2] = DAG.getConstant(0, DL, MVT::i32);
 }
 
+// Handle 8 bit and 16 bit buffer loads
+SDValue SITargetLowering::handleByteShortBufferLoads(SelectionDAG &DAG,
+                                                     EVT LoadVT, SDLoc DL,
+                                                     ArrayRef<SDValue> Ops,
+                                                     MemSDNode *M) const {
+  EVT IntVT = LoadVT.changeTypeToInteger();
+  unsigned Opc = (LoadVT.getScalarType() == MVT::i8) ?
+         AMDGPUISD::BUFFER_LOAD_UBYTE : AMDGPUISD::BUFFER_LOAD_USHORT;
+
+  SDVTList ResList = DAG.getVTList(MVT::i32, MVT::Other);
+  SDValue BufferLoad = DAG.getMemIntrinsicNode(Opc, DL, ResList,
+                                               Ops, IntVT,
+                                               M->getMemOperand());
+  SDValue BufferLoadTrunc = DAG.getNode(ISD::TRUNCATE, DL,
+                                        LoadVT.getScalarType(), BufferLoad);
+  return DAG.getMergeValues({BufferLoadTrunc, BufferLoad.getValue(1)}, DL);
+}
+
+// Handle 8 bit and 16 bit buffer stores
+SDValue SITargetLowering::handleByteShortBufferStores(SelectionDAG &DAG,
+                                                      EVT VDataType, SDLoc DL,
+                                                      SDValue Ops[],
+                                                      MemSDNode *M) const {
+  SDValue BufferStoreExt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Ops[1]);
+  Ops[1] = BufferStoreExt;
+  unsigned Opc = (VDataType == MVT::i8) ? AMDGPUISD::BUFFER_STORE_BYTE :
+                                 AMDGPUISD::BUFFER_STORE_SHORT;
+  ArrayRef<SDValue> OpsRef = makeArrayRef(&Ops[0], 9);
+  return DAG.getMemIntrinsicNode(Opc, DL, M->getVTList(), OpsRef, VDataType,
+                                     M->getMemOperand());
+}
+
 static SDValue getLoadExtOrTrunc(SelectionDAG &DAG,
                                  ISD::LoadExtType ExtType, SDValue Op,
                                  const SDLoc &SL, EVT VT) {
@@ -6395,8 +7184,25 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
     SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
                                    BasePtr, RealMemVT, MMO);
 
+    if (!MemVT.isVector()) {
+      SDValue Ops[] = {
+        DAG.getNode(ISD::TRUNCATE, DL, MemVT, NewLD),
+        NewLD.getValue(1)
+      };
+
+      return DAG.getMergeValues(Ops, DL);
+    }
+
+    SmallVector<SDValue, 3> Elts;
+    for (unsigned I = 0, N = MemVT.getVectorNumElements(); I != N; ++I) {
+      SDValue Elt = DAG.getNode(ISD::SRL, DL, MVT::i32, NewLD,
+                                DAG.getConstant(I, DL, MVT::i32));
+
+      Elts.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Elt));
+    }
+
     SDValue Ops[] = {
-      DAG.getNode(ISD::TRUNCATE, DL, MemVT, NewLD),
+      DAG.getBuildVector(MemVT, DL, Elts),
       NewLD.getValue(1)
     };
 
@@ -6409,15 +7215,21 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   assert(Op.getValueType().getVectorElementType() == MVT::i32 &&
          "Custom lowering for non-i32 vectors hasn't been implemented.");
 
-  unsigned Alignment = Load->getAlignment();
-  unsigned AS = Load->getAddressSpace();
   if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
-                          AS, Alignment)) {
+                          *Load->getMemOperand())) {
     SDValue Ops[2];
     std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
     return DAG.getMergeValues(Ops, DL);
   }
 
+  unsigned Alignment = Load->getAlignment();
+  unsigned AS = Load->getAddressSpace();
+  if (Subtarget->hasLDSMisalignedBug() &&
+      AS == AMDGPUAS::FLAT_ADDRESS &&
+      Alignment < MemVT.getStoreSize() && MemVT.getSizeInBits() > 32) {
+    return SplitVectorLoad(Op, DAG);
+  }
+
   MachineFunction &MF = DAG.getMachineFunction();
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   // If there is a possibilty that flat instruction access scratch memory
@@ -6430,8 +7242,13 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
 
   if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
       AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
-    if (!Op->isDivergent() && Alignment >= 4 && NumElements < 32)
-      return SDValue();
+    if (!Op->isDivergent() && Alignment >= 4 && NumElements < 32) {
+      if (MemVT.isPow2VectorType())
+        return SDValue();
+      if (NumElements == 3)
+        return WidenVectorLoad(Op, DAG);
+      return SplitVectorLoad(Op, DAG);
+    }
     // Non-uniform loads will be selected to MUBUF instructions, so they
     // have the same legalization requirements as global and private
     // loads.
@@ -6443,8 +7260,13 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
       AS == AMDGPUAS::GLOBAL_ADDRESS) {
     if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() &&
         !Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load) &&
-        Alignment >= 4 && NumElements < 32)
-      return SDValue();
+        Alignment >= 4 && NumElements < 32) {
+      if (MemVT.isPow2VectorType())
+        return SDValue();
+      if (NumElements == 3)
+        return WidenVectorLoad(Op, DAG);
+      return SplitVectorLoad(Op, DAG);
+    }
     // Non-uniform loads will be selected to MUBUF instructions, so they
     // have the same legalization requirements as global and private
     // loads.
@@ -6456,7 +7278,10 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
       AS == AMDGPUAS::FLAT_ADDRESS) {
     if (NumElements > 4)
       return SplitVectorLoad(Op, DAG);
-    // v4 loads are supported for private and global memory.
+    // v3 loads not supported on SI.
+    if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores())
+      return WidenVectorLoad(Op, DAG);
+    // v3 and v4 loads are supported for private and global memory.
     return SDValue();
   }
   if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
@@ -6474,11 +7299,14 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
       // Same as global/flat
       if (NumElements > 4)
         return SplitVectorLoad(Op, DAG);
+      // v3 loads not supported on SI.
+      if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores())
+        return WidenVectorLoad(Op, DAG);
       return SDValue();
     default:
       llvm_unreachable("unsupported private_element_size");
     }
-  } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
+  } else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
     // Use ds_read_b128 if possible.
     if (Subtarget->useDS128() && Load->getAlignment() >= 16 &&
         MemVT.getStoreSize() == 16)
@@ -6794,7 +7622,7 @@ SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
 
   SDValue Scale;
 
-  if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+  if (!Subtarget->hasUsableDivScaleConditionOutput()) {
     // Workaround a hardware bug on SI where the condition output from div_scale
     // is not usable.
 
@@ -6856,12 +7684,18 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   assert(VT.isVector() &&
          Store->getValue().getValueType().getScalarType() == MVT::i32);
 
-  unsigned AS = Store->getAddressSpace();
   if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
-                          AS, Store->getAlignment())) {
+                          *Store->getMemOperand())) {
     return expandUnalignedStore(Store, DAG);
   }
 
+  unsigned AS = Store->getAddressSpace();
+  if (Subtarget->hasLDSMisalignedBug() &&
+      AS == AMDGPUAS::FLAT_ADDRESS &&
+      Store->getAlignment() < VT.getStoreSize() && VT.getSizeInBits() > 32) {
+    return SplitVectorStore(Op, DAG);
+  }
+
   MachineFunction &MF = DAG.getMachineFunction();
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   // If there is a possibilty that flat instruction access scratch memory
@@ -6875,6 +7709,9 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
       AS == AMDGPUAS::FLAT_ADDRESS) {
     if (NumElements > 4)
       return SplitVectorStore(Op, DAG);
+    // v3 stores not supported on SI.
+    if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores())
+      return SplitVectorStore(Op, DAG);
     return SDValue();
   } else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
     switch (Subtarget->getMaxPrivateElementSize()) {
@@ -6885,16 +7722,16 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
         return SplitVectorStore(Op, DAG);
       return SDValue();
     case 16:
-      if (NumElements > 4)
+      if (NumElements > 4 || NumElements == 3)
         return SplitVectorStore(Op, DAG);
       return SDValue();
     default:
       llvm_unreachable("unsupported private_element_size");
     }
-  } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
+  } else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
     // Use ds_write_b128 if possible.
     if (Subtarget->useDS128() && Store->getAlignment() >= 16 &&
-        VT.getStoreSize() == 16)
+        VT.getStoreSize() == 16 && NumElements != 3)
       return SDValue();
 
     if (NumElements > 2)
@@ -6905,7 +7742,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
     // out-of-bounds even if base + offsets is in bounds. Split vectorized
     // stores here to avoid emitting ds_write2_b32. We may re-combine the
     // store later in the SILoadStoreOptimizer.
-    if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
+    if (!Subtarget->hasUsableDSOffset() &&
         NumElements == 2 && VT.getStoreSize() == 8 &&
         Store->getAlignment() < 8) {
       return SplitVectorStore(Op, DAG);
@@ -7614,6 +8451,43 @@ SDValue SITargetLowering::performZeroExtendCombine(SDNode *N,
   return SDValue();
 }
 
+SDValue SITargetLowering::performSignExtendInRegCombine(SDNode *N,
+                                                        DAGCombinerInfo &DCI)
+                                                        const {
+  SDValue Src = N->getOperand(0);
+  auto *VTSign = cast<VTSDNode>(N->getOperand(1));
+
+  if (((Src.getOpcode() == AMDGPUISD::BUFFER_LOAD_UBYTE &&
+      VTSign->getVT() == MVT::i8) ||
+      (Src.getOpcode() == AMDGPUISD::BUFFER_LOAD_USHORT &&
+      VTSign->getVT() == MVT::i16)) &&
+      Src.hasOneUse()) {
+    auto *M = cast<MemSDNode>(Src);
+    SDValue Ops[] = {
+      Src.getOperand(0), // Chain
+      Src.getOperand(1), // rsrc
+      Src.getOperand(2), // vindex
+      Src.getOperand(3), // voffset
+      Src.getOperand(4), // soffset
+      Src.getOperand(5), // offset
+      Src.getOperand(6),
+      Src.getOperand(7)
+    };
+    // replace with BUFFER_LOAD_BYTE/SHORT
+    SDVTList ResList = DCI.DAG.getVTList(MVT::i32,
+                                         Src.getOperand(0).getValueType());
+    unsigned Opc = (Src.getOpcode() == AMDGPUISD::BUFFER_LOAD_UBYTE) ?
+                   AMDGPUISD::BUFFER_LOAD_BYTE : AMDGPUISD::BUFFER_LOAD_SHORT;
+    SDValue BufferLoadSignExt = DCI.DAG.getMemIntrinsicNode(Opc, SDLoc(N),
+                                                          ResList,
+                                                          Ops, M->getMemoryVT(),
+                                                          M->getMemOperand());
+    return DCI.DAG.getMergeValues({BufferLoadSignExt,
+                                  BufferLoadSignExt.getValue(1)}, SDLoc(N));
+  }
+  return SDValue();
+}
+
 SDValue SITargetLowering::performClassCombine(SDNode *N,
                                               DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -8013,9 +8887,12 @@ SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
   if (Cmp == APFloat::cmpGreaterThan)
     return SDValue();
 
+  const MachineFunction &MF = DAG.getMachineFunction();
+  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
   // TODO: Check IEEE bit enabled?
   EVT VT = Op0.getValueType();
-  if (Subtarget->enableDX10Clamp()) {
+  if (Info->getMode().DX10Clamp) {
     // If dx10_clamp is enabled, NaNs clamp to 0.0. This is the same as the
     // hardware fmed3 behavior converting to a min.
     // FIXME: Should this be allowing -0.0?
@@ -8059,10 +8936,10 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
   // Only do this if the inner op has one use since this will just increases
   // register pressure for no benefit.
 
-
   if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY &&
-      !VT.isVector() && VT != MVT::f64 &&
-      ((VT != MVT::f16 && VT != MVT::i16) || Subtarget->hasMin3Max3_16())) {
+      !VT.isVector() &&
+      (VT == MVT::i32 || VT == MVT::f32 ||
+       ((VT == MVT::f16 || VT == MVT::i16) && Subtarget->hasMin3Max3_16()))) {
     // max(max(a, b), c) -> max3(a, b, c)
     // min(min(a, b), c) -> min3(a, b, c)
     if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
@@ -8149,9 +9026,12 @@ SDValue SITargetLowering::performFMed3Combine(SDNode *N,
     return DAG.getNode(AMDGPUISD::CLAMP, SL, VT, Src2);
   }
 
+  const MachineFunction &MF = DAG.getMachineFunction();
+  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
   // FIXME: dx10_clamp behavior assumed in instcombine. Should we really bother
   // handling no dx10-clamp?
-  if (Subtarget->enableDX10Clamp()) {
+  if (Info->getMode().DX10Clamp) {
     // If NaNs is clamped to 0, we are free to reorder the inputs.
 
     if (isa<ConstantFPSDNode>(Src0) && !isa<ConstantFPSDNode>(Src1))
@@ -8342,8 +9222,10 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
 
   // Only do this if we are not trying to support denormals. v_mad_f32 does not
   // support denormals ever.
-  if ((VT == MVT::f32 && !Subtarget->hasFP32Denormals()) ||
-      (VT == MVT::f16 && !Subtarget->hasFP16Denormals()))
+  if (((VT == MVT::f32 && !Subtarget->hasFP32Denormals()) ||
+       (VT == MVT::f16 && !Subtarget->hasFP16Denormals() &&
+        getSubtarget()->hasMadF16())) &&
+       isOperationLegal(ISD::FMAD, VT))
     return ISD::FMAD;
 
   const TargetOptions &Options = DAG.getTarget().Options;
@@ -8357,6 +9239,46 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
   return 0;
 }
 
+// For a reassociatable opcode perform:
+// op x, (op y, z) -> op (op x, z), y, if x and z are uniform
+SDValue SITargetLowering::reassociateScalarOps(SDNode *N,
+                                               SelectionDAG &DAG) const {
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i32 && VT != MVT::i64)
+    return SDValue();
+
+  unsigned Opc = N->getOpcode();
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+
+  if (!(Op0->isDivergent() ^ Op1->isDivergent()))
+    return SDValue();
+
+  if (Op0->isDivergent())
+    std::swap(Op0, Op1);
+
+  if (Op1.getOpcode() != Opc || !Op1.hasOneUse())
+    return SDValue();
+
+  SDValue Op2 = Op1.getOperand(1);
+  Op1 = Op1.getOperand(0);
+  if (!(Op1->isDivergent() ^ Op2->isDivergent()))
+    return SDValue();
+
+  if (Op1->isDivergent())
+    std::swap(Op1, Op2);
+
+  // If either operand is constant this will conflict with
+  // DAGCombiner::ReassociateOps().
+  if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) ||
+      DAG.isConstantIntBuildVectorOrConstantInt(Op1))
+    return SDValue();
+
+  SDLoc SL(N);
+  SDValue Add1 = DAG.getNode(Opc, SL, VT, Op0, Op1);
+  return DAG.getNode(Opc, SL, VT, Add1, Op2);
+}
+
 static SDValue getMad64_32(SelectionDAG &DAG, const SDLoc &SL,
                            EVT VT,
                            SDValue N0, SDValue N1, SDValue N2,
@@ -8405,6 +9327,10 @@ SDValue SITargetLowering::performAddCombine(SDNode *N,
     return SDValue();
   }
 
+  if (SDValue V = reassociateScalarOps(N, DAG)) {
+    return V;
+  }
+
   if (VT != MVT::i32 || !DCI.isAfterLegalizeDAG())
     return SDValue();
 
@@ -8452,14 +9378,10 @@ SDValue SITargetLowering::performSubCombine(SDNode *N,
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
 
-  unsigned Opc = LHS.getOpcode();
-  if (Opc != ISD::SUBCARRY)
-    std::swap(RHS, LHS);
-
   if (LHS.getOpcode() == ISD::SUBCARRY) {
     // sub (subcarry x, 0, cc), y => subcarry x, y, cc
     auto C = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
-    if (!C || C->getZExtValue() != 0)
+    if (!C || !C->isNullValue())
       return SDValue();
     SDValue Args[] = { LHS.getOperand(0), RHS, LHS.getOperand(2) };
     return DAG.getNode(ISD::SUBCARRY, SDLoc(N), LHS->getVTList(), Args);
@@ -8587,7 +9509,7 @@ SDValue SITargetLowering::performFMACombine(SDNode *N,
   EVT VT = N->getValueType(0);
   SDLoc SL(N);
 
-  if (!Subtarget->hasDotInsts() || VT != MVT::f32)
+  if (!Subtarget->hasDot2Insts() || VT != MVT::f32)
     return SDValue();
 
   // FMA((F32)S0.x, (F32)S1. x, FMA((F32)S0.y, (F32)S1.y, (F32)z)) ->
@@ -8801,11 +9723,13 @@ SDValue SITargetLowering::performClampCombine(SDNode *N,
   if (!CSrc)
     return SDValue();
 
+  const MachineFunction &MF = DCI.DAG.getMachineFunction();
   const APFloat &F = CSrc->getValueAPF();
   APFloat Zero = APFloat::getZero(F.getSemantics());
   APFloat::cmpResult Cmp0 = F.compare(Zero);
   if (Cmp0 == APFloat::cmpLessThan ||
-      (Cmp0 == APFloat::cmpUnordered && Subtarget->enableDX10Clamp())) {
+      (Cmp0 == APFloat::cmpUnordered &&
+       MF.getInfo<SIMachineFunctionInfo>()->getMode().DX10Clamp)) {
     return DCI.DAG.getConstantFP(Zero, SDLoc(N), N->getValueType(0));
   }
 
@@ -8822,7 +9746,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
                                             DAGCombinerInfo &DCI) const {
   if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
     return SDValue();
-
   switch (N->getOpcode()) {
   default:
     return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
@@ -8873,11 +9796,11 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::ATOMIC_LOAD_MAX:
   case ISD::ATOMIC_LOAD_UMIN:
   case ISD::ATOMIC_LOAD_UMAX:
+  case ISD::ATOMIC_LOAD_FADD:
   case AMDGPUISD::ATOMIC_INC:
   case AMDGPUISD::ATOMIC_DEC:
-  case AMDGPUISD::ATOMIC_LOAD_FADD:
   case AMDGPUISD::ATOMIC_LOAD_FMIN:
-  case AMDGPUISD::ATOMIC_LOAD_FMAX:  // TODO: Target mem intrinsics.
+  case AMDGPUISD::ATOMIC_LOAD_FMAX: // TODO: Target mem intrinsics.
     if (DCI.isBeforeLegalize())
       break;
     return performMemSDNodeCombine(cast<MemSDNode>(N), DCI);
@@ -8889,6 +9812,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
     return performXorCombine(N, DCI);
   case ISD::ZERO_EXTEND:
     return performZeroExtendCombine(N, DCI);
+  case ISD::SIGN_EXTEND_INREG:
+    return performSignExtendInRegCombine(N , DCI);
   case AMDGPUISD::FP_CLASS:
     return performClassCombine(N, DCI);
   case ISD::FCANONICALIZE:
@@ -9034,6 +9959,10 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
   // Don't allow 0 dmask, as hardware assumes one channel enabled.
   bool NoChannels = !NewDmask;
   if (NoChannels) {
+    if (!UsesTFC) {
+      // No uses of the result and not using TFC. Then do nothing.
+      return Node;
+    }
     // If the original dmask has one channel - then nothing to do
     if (OldBitsSet == 1)
       return Node;
@@ -9205,7 +10134,8 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
       break;
 
     MVT VT = Src0.getValueType().getSimpleVT();
-    const TargetRegisterClass *RC = getRegClassFor(VT);
+    const TargetRegisterClass *RC =
+        getRegClassFor(VT, Src0.getNode()->isDivergent());
 
     MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
     SDValue UndefReg = DAG.getRegister(MRI.createVirtualRegister(RC), VT);
@@ -9238,6 +10168,24 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
     Ops.push_back(ImpDef.getValue(1));
     return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
   }
+  case AMDGPU::V_PERMLANE16_B32:
+  case AMDGPU::V_PERMLANEX16_B32: {
+    ConstantSDNode *FI = cast<ConstantSDNode>(Node->getOperand(0));
+    ConstantSDNode *BC = cast<ConstantSDNode>(Node->getOperand(2));
+    if (!FI->getZExtValue() && !BC->getZExtValue())
+      break;
+    SDValue VDstIn = Node->getOperand(6);
+    if (VDstIn.isMachineOpcode()
+        && VDstIn.getMachineOpcode() == AMDGPU::IMPLICIT_DEF)
+      break;
+    MachineSDNode *ImpDef = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF,
+                                               SDLoc(Node), MVT::i32);
+    SmallVector<SDValue, 8> Ops = { SDValue(FI, 0), Node->getOperand(1),
+                                    SDValue(BC, 0), Node->getOperand(3),
+                                    Node->getOperand(4), Node->getOperand(5),
+                                    SDValue(ImpDef, 0), Node->getOperand(7) };
+    return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
+  }
   default:
     break;
   }
@@ -9256,6 +10204,36 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
   if (TII->isVOP3(MI.getOpcode())) {
     // Make sure constant bus requirements are respected.
     TII->legalizeOperandsVOP3(MRI, MI);
+
+    // Prefer VGPRs over AGPRs in mAI instructions where possible.
+    // This saves a chain-copy of registers and better ballance register
+    // use between vgpr and agpr as agpr tuples tend to be big.
+    if (const MCOperandInfo *OpInfo = MI.getDesc().OpInfo) {
+      unsigned Opc = MI.getOpcode();
+      const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
+      for (auto I : { AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
+                      AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) }) {
+        if (I == -1)
+          break;
+        MachineOperand &Op = MI.getOperand(I);
+        if ((OpInfo[I].RegClass != llvm::AMDGPU::AV_64RegClassID &&
+             OpInfo[I].RegClass != llvm::AMDGPU::AV_32RegClassID) ||
+            !TargetRegisterInfo::isVirtualRegister(Op.getReg()) ||
+            !TRI->isAGPR(MRI, Op.getReg()))
+          continue;
+        auto *Src = MRI.getUniqueVRegDef(Op.getReg());
+        if (!Src || !Src->isCopy() ||
+            !TRI->isSGPRReg(MRI, Src->getOperand(1).getReg()))
+          continue;
+        auto *RC = TRI->getRegClassForReg(MRI, Op.getReg());
+        auto *NewRC = TRI->getEquivalentVGPRClass(RC);
+        // All uses of agpr64 and agpr32 can also accept vgpr except for
+        // v_accvgpr_read, but we do not produce agpr reads during selection,
+        // so no use checks are needed.
+        MRI.setRegClass(Op.getReg(), NewRC);
+      }
+    }
+
     return;
   }
 
@@ -9391,9 +10369,15 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       case 64:
         RC = &AMDGPU::SGPR_64RegClass;
         break;
+      case 96:
+        RC = &AMDGPU::SReg_96RegClass;
+        break;
       case 128:
         RC = &AMDGPU::SReg_128RegClass;
         break;
+      case 160:
+        RC = &AMDGPU::SReg_160RegClass;
+        break;
       case 256:
         RC = &AMDGPU::SReg_256RegClass;
         break;
@@ -9419,6 +10403,9 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       case 128:
         RC = &AMDGPU::VReg_128RegClass;
         break;
+      case 160:
+        RC = &AMDGPU::VReg_160RegClass;
+        break;
       case 256:
         RC = &AMDGPU::VReg_256RegClass;
         break;
@@ -9427,6 +10414,29 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
         break;
       }
       break;
+    case 'a':
+      switch (VT.getSizeInBits()) {
+      default:
+        return std::make_pair(0U, nullptr);
+      case 32:
+      case 16:
+        RC = &AMDGPU::AGPR_32RegClass;
+        break;
+      case 64:
+        RC = &AMDGPU::AReg_64RegClass;
+        break;
+      case 128:
+        RC = &AMDGPU::AReg_128RegClass;
+        break;
+      case 512:
+        RC = &AMDGPU::AReg_512RegClass;
+        break;
+      case 1024:
+        RC = &AMDGPU::AReg_1024RegClass;
+        // v32 types are not legal but we support them here.
+        return std::make_pair(0U, RC);
+      }
+      break;
     }
     // We actually support i128, i16 and f16 as inline parameters
     // even if they are not reported as legal
@@ -9440,6 +10450,8 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       RC = &AMDGPU::VGPR_32RegClass;
     } else if (Constraint[1] == 's') {
       RC = &AMDGPU::SGPR_32RegClass;
+    } else if (Constraint[1] == 'a') {
+      RC = &AMDGPU::AGPR_32RegClass;
     }
 
     if (RC) {
@@ -9459,6 +10471,7 @@ SITargetLowering::getConstraintType(StringRef Constraint) const {
     default: break;
     case 's':
     case 'v':
+    case 'a':
       return C_RegisterClass;
     }
   }
@@ -9471,7 +10484,7 @@ SITargetLowering::getConstraintType(StringRef Constraint) const {
 void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
   MachineRegisterInfo &MRI = MF.getRegInfo();
   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
-  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
 
   if (Info->isEntryFunction()) {
@@ -9479,31 +10492,45 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
     reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info);
   }
 
-  // We have to assume the SP is needed in case there are calls in the function
-  // during lowering. Calls are only detected after the function is
-  // lowered. We're about to reserve registers, so don't bother using it if we
-  // aren't really going to use it.
-  bool NeedSP = !Info->isEntryFunction() ||
-    MFI.hasVarSizedObjects() ||
-    MFI.hasCalls();
+  assert(!TRI->isSubRegister(Info->getScratchRSrcReg(),
+                             Info->getStackPtrOffsetReg()));
+  if (Info->getStackPtrOffsetReg() != AMDGPU::SP_REG)
+    MRI.replaceRegWith(AMDGPU::SP_REG, Info->getStackPtrOffsetReg());
 
-  if (NeedSP) {
-    unsigned ReservedStackPtrOffsetReg = TRI->reservedStackPtrOffsetReg(MF);
-    Info->setStackPtrOffsetReg(ReservedStackPtrOffsetReg);
+  // We need to worry about replacing the default register with itself in case
+  // of MIR testcases missing the MFI.
+  if (Info->getScratchRSrcReg() != AMDGPU::PRIVATE_RSRC_REG)
+    MRI.replaceRegWith(AMDGPU::PRIVATE_RSRC_REG, Info->getScratchRSrcReg());
 
-    assert(Info->getStackPtrOffsetReg() != Info->getFrameOffsetReg());
-    assert(!TRI->isSubRegister(Info->getScratchRSrcReg(),
-                               Info->getStackPtrOffsetReg()));
-    MRI.replaceRegWith(AMDGPU::SP_REG, Info->getStackPtrOffsetReg());
-  }
+  if (Info->getFrameOffsetReg() != AMDGPU::FP_REG)
+    MRI.replaceRegWith(AMDGPU::FP_REG, Info->getFrameOffsetReg());
 
-  MRI.replaceRegWith(AMDGPU::PRIVATE_RSRC_REG, Info->getScratchRSrcReg());
-  MRI.replaceRegWith(AMDGPU::FP_REG, Info->getFrameOffsetReg());
-  MRI.replaceRegWith(AMDGPU::SCRATCH_WAVE_OFFSET_REG,
-                     Info->getScratchWaveOffsetReg());
+  if (Info->getScratchWaveOffsetReg() != AMDGPU::SCRATCH_WAVE_OFFSET_REG) {
+    MRI.replaceRegWith(AMDGPU::SCRATCH_WAVE_OFFSET_REG,
+                       Info->getScratchWaveOffsetReg());
+  }
 
   Info->limitOccupancy(MF);
 
+  if (ST.isWave32() && !MF.empty()) {
+    // Add VCC_HI def because many instructions marked as imp-use VCC where
+    // we may only define VCC_LO. If nothing defines VCC_HI we may end up
+    // having a use of undef.
+
+    const SIInstrInfo *TII = ST.getInstrInfo();
+    DebugLoc DL;
+
+    MachineBasicBlock &MBB = MF.front();
+    MachineBasicBlock::iterator I = MBB.getFirstNonDebugInstr();
+    BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), AMDGPU::VCC_HI);
+
+    for (auto &MBB : MF) {
+      for (auto &MI : MBB) {
+        TII->fixImplicitOperands(MI);
+      }
+    }
+  }
+
   TargetLoweringBase::finalizeLowering(MF);
 }
 
@@ -9515,14 +10542,81 @@ void SITargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
   TargetLowering::computeKnownBitsForFrameIndex(Op, Known, DemandedElts,
                                                 DAG, Depth);
 
-  if (getSubtarget()->enableHugePrivateBuffer())
-    return;
-
-  // Technically it may be possible to have a dispatch with a single workitem
-  // that uses the full private memory size, but that's not really useful. We
-  // can't use vaddr in MUBUF instructions if we don't know the address
+  // Set the high bits to zero based on the maximum allowed scratch size per
+  // wave. We can't use vaddr in MUBUF instructions if we don't know the address
   // calculation won't overflow, so assume the sign bit is never set.
-  Known.Zero.setHighBits(AssumeFrameIndexHighZeroBits);
+  Known.Zero.setHighBits(getSubtarget()->getKnownHighZeroBitsForFrameIndex());
+}
+
+unsigned SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
+  const unsigned PrefAlign = TargetLowering::getPrefLoopAlignment(ML);
+  const unsigned CacheLineAlign = 6; // log2(64)
+
+  // Pre-GFX10 target did not benefit from loop alignment
+  if (!ML || DisableLoopAlignment ||
+      (getSubtarget()->getGeneration() < AMDGPUSubtarget::GFX10) ||
+      getSubtarget()->hasInstFwdPrefetchBug())
+    return PrefAlign;
+
+  // On GFX10 I$ is 4 x 64 bytes cache lines.
+  // By default prefetcher keeps one cache line behind and reads two ahead.
+  // We can modify it with S_INST_PREFETCH for larger loops to have two lines
+  // behind and one ahead.
+  // Therefor we can benefit from aligning loop headers if loop fits 192 bytes.
+  // If loop fits 64 bytes it always spans no more than two cache lines and
+  // does not need an alignment.
+  // Else if loop is less or equal 128 bytes we do not need to modify prefetch,
+  // Else if loop is less or equal 192 bytes we need two lines behind.
+
+  const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
+  const MachineBasicBlock *Header = ML->getHeader();
+  if (Header->getAlignment() != PrefAlign)
+    return Header->getAlignment(); // Already processed.
+
+  unsigned LoopSize = 0;
+  for (const MachineBasicBlock *MBB : ML->blocks()) {
+    // If inner loop block is aligned assume in average half of the alignment
+    // size to be added as nops.
+    if (MBB != Header)
+      LoopSize += (1 << MBB->getAlignment()) / 2;
+
+    for (const MachineInstr &MI : *MBB) {
+      LoopSize += TII->getInstSizeInBytes(MI);
+      if (LoopSize > 192)
+        return PrefAlign;
+    }
+  }
+
+  if (LoopSize <= 64)
+    return PrefAlign;
+
+  if (LoopSize <= 128)
+    return CacheLineAlign;
+
+  // If any of parent loops is surrounded by prefetch instructions do not
+  // insert new for inner loop, which would reset parent's settings.
+  for (MachineLoop *P = ML->getParentLoop(); P; P = P->getParentLoop()) {
+    if (MachineBasicBlock *Exit = P->getExitBlock()) {
+      auto I = Exit->getFirstNonDebugInstr();
+      if (I != Exit->end() && I->getOpcode() == AMDGPU::S_INST_PREFETCH)
+        return CacheLineAlign;
+    }
+  }
+
+  MachineBasicBlock *Pre = ML->getLoopPreheader();
+  MachineBasicBlock *Exit = ML->getExitBlock();
+
+  if (Pre && Exit) {
+    BuildMI(*Pre, Pre->getFirstTerminator(), DebugLoc(),
+            TII->get(AMDGPU::S_INST_PREFETCH))
+      .addImm(1); // prefetch 2 lines behind PC
+
+    BuildMI(*Exit, Exit->getFirstNonDebugInstr(), DebugLoc(),
+            TII->get(AMDGPU::S_INST_PREFETCH))
+      .addImm(2); // prefetch 1 line behind PC
+  }
+
+  return CacheLineAlign;
 }
 
 LLVM_ATTRIBUTE_UNUSED
@@ -9531,7 +10625,8 @@ static bool isCopyFromRegOfInlineAsm(const SDNode *N) {
   do {
     // Follow the chain until we find an INLINEASM node.
     N = N->getOperand(0).getNode();
-    if (N->getOpcode() == ISD::INLINEASM)
+    if (N->getOpcode() == ISD::INLINEASM ||
+        N->getOpcode() == ISD::INLINEASM_BR)
       return true;
   } while (N->getOpcode() == ISD::CopyFromReg);
   return false;
@@ -9616,7 +10711,10 @@ bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
                                                     bool SNaN,
                                                     unsigned Depth) const {
   if (Op.getOpcode() == AMDGPUISD::CLAMP) {
-    if (Subtarget->enableDX10Clamp())
+    const MachineFunction &MF = DAG.getMachineFunction();
+    const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+    if (Info->getMode().DX10Clamp)
       return true; // Clamped to 0.
     return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
   }
@@ -9624,3 +10722,29 @@ bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
   return AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(Op, DAG,
                                                             SNaN, Depth);
 }
+
+TargetLowering::AtomicExpansionKind
+SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
+  switch (RMW->getOperation()) {
+  case AtomicRMWInst::FAdd: {
+    Type *Ty = RMW->getType();
+
+    // We don't have a way to support 16-bit atomics now, so just leave them
+    // as-is.
+    if (Ty->isHalfTy())
+      return AtomicExpansionKind::None;
+
+    if (!Ty->isFloatTy())
+      return AtomicExpansionKind::CmpXChg;
+
+    // TODO: Do have these for flat. Older targets also had them for buffers.
+    unsigned AS = RMW->getPointerAddressSpace();
+    return (AS == AMDGPUAS::LOCAL_ADDRESS && Subtarget->hasLDSFPAtomics()) ?
+      AtomicExpansionKind::None : AtomicExpansionKind::CmpXChg;
+  }
+  default:
+    break;
+  }
+
+  return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW);
+}
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
index bcef519ee663..21a215e16ce7 100644
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -1,9 +1,8 @@
 //===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -61,7 +60,7 @@ private:
   SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr,
                      SelectionDAG &DAG) const;
   SDValue lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Offset,
-                       SDValue GLC, SelectionDAG &DAG) const;
+                       SDValue GLC, SDValue DLC, SelectionDAG &DAG) const;
 
   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
@@ -90,11 +89,17 @@ private:
   SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
-
+  SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
   SDValue adjustLoadValueType(unsigned Opcode, MemSDNode *M,
                               SelectionDAG &DAG, ArrayRef<SDValue> Ops,
                               bool IsIntrinsic = false) const;
 
+  // Call DAG.getMemIntrinsicNode for a load, but first widen a dwordx3 type to
+  // dwordx4 if on SI.
+  SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+                              ArrayRef<SDValue> Ops, EVT MemVT,
+                              MachineMemOperand *MMO, SelectionDAG &DAG) const;
+
   SDValue handleD16VData(SDValue VData, SelectionDAG &DAG) const;
 
   /// Converts \p Op, which must be of floating point type, to the
@@ -116,8 +121,10 @@ private:
                              SelectionDAG &DAG) const;
 
   SDValue lowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const;
@@ -141,6 +148,7 @@ private:
   SDValue performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performXorCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performZeroExtendCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue performSignExtendInRegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue getCanonicalConstantFP(SelectionDAG &DAG, const SDLoc &SL, EVT VT,
                                  const APFloat &C) const;
@@ -156,6 +164,7 @@ private:
   SDValue performExtractVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performInsertVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
+  SDValue reassociateScalarOps(SDNode *N, SelectionDAG &DAG) const;
   unsigned getFusedOpcode(const SelectionDAG &DAG,
                           const SDNode *N0, const SDNode *N1) const;
   SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -174,8 +183,6 @@ private:
 
   unsigned isCFIntrinsic(const SDNode *Intr) const;
 
-  void createDebuggerPrologueStackObjects(MachineFunction &MF) const;
-
   /// \returns True if fixup needs to be emitted for given global value \p GV,
   /// false otherwise.
   bool shouldEmitFixup(const GlobalValue *GV) const;
@@ -194,6 +201,15 @@ private:
   void setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG,
                         SDValue *Offsets, unsigned Align = 4) const;
 
+  // Handle 8 bit and 16 bit buffer loads
+  SDValue handleByteShortBufferLoads(SelectionDAG &DAG, EVT LoadVT, SDLoc DL,
+                                     ArrayRef<SDValue> Ops, MemSDNode *M) const;
+
+  // Handle 8 bit and 16 bit buffer stores
+  SDValue handleByteShortBufferStores(SelectionDAG &DAG, EVT VDataType,
+                                      SDLoc DL, SDValue Ops[],
+                                      MemSDNode *M) const;
+
 public:
   SITargetLowering(const TargetMachine &tm, const GCNSubtarget &STI);
 
@@ -219,20 +235,21 @@ public:
   bool canMergeStoresTo(unsigned AS, EVT MemVT,
                         const SelectionDAG &DAG) const override;
 
-  bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
-                                      unsigned Align,
-                                      bool *IsFast) const override;
+  bool allowsMisalignedMemoryAccesses(
+      EVT VT, unsigned AS, unsigned Align,
+      MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+      bool *IsFast = nullptr) const override;
 
   EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
                           unsigned SrcAlign, bool IsMemset,
                           bool ZeroMemset,
                           bool MemcpyStrSrc,
-                          MachineFunction &MF) const override;
+                          const AttributeList &FuncAttributes) const override;
 
   bool isMemOpUniform(const SDNode *N) const;
   bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const;
   bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
-  bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
+  bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
 
   TargetLoweringBase::LegalizeTypeAction
   getPreferredVectorAction(MVT VT) const override;
@@ -298,6 +315,9 @@ public:
   MachineBasicBlock *splitKillBlock(MachineInstr &MI,
                                     MachineBasicBlock *BB) const;
 
+  MachineBasicBlock *emitGWSMemViolTestLoop(MachineInstr &MI,
+                                            MachineBasicBlock *BB) const;
+
   MachineBasicBlock *
   EmitInstrWithCustomInserter(MachineInstr &MI,
                               MachineBasicBlock *BB) const override;
@@ -352,6 +372,9 @@ public:
                                     const SelectionDAG &DAG,
                                     bool SNaN = false,
                                     unsigned Depth = 0) const override;
+  AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
+
+  unsigned getPrefLoopAlignment(MachineLoop *ML) const override;
 };
 
 } // End namespace llvm
diff --git a/lib/Target/AMDGPU/SIInsertSkips.cpp b/lib/Target/AMDGPU/SIInsertSkips.cpp
index ba21a5ce1293..87e63fcc4a04 100644
--- a/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -1,9 +1,8 @@
 //===-- SIInsertSkips.cpp - Use predicates for control flow ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -93,15 +92,13 @@ INITIALIZE_PASS(SIInsertSkips, DEBUG_TYPE,
 
 char &llvm::SIInsertSkipsPassID = SIInsertSkips::ID;
 
-static bool opcodeEmitsNoInsts(unsigned Opc) {
-  switch (Opc) {
-  case TargetOpcode::IMPLICIT_DEF:
-  case TargetOpcode::KILL:
-  case TargetOpcode::BUNDLE:
-  case TargetOpcode::CFI_INSTRUCTION:
-  case TargetOpcode::EH_LABEL:
-  case TargetOpcode::GC_LABEL:
-  case TargetOpcode::DBG_VALUE:
+static bool opcodeEmitsNoInsts(const MachineInstr &MI) {
+  if (MI.isMetaInstruction())
+    return true;
+
+  // Handle target specific opcodes.
+  switch (MI.getOpcode()) {
+  case AMDGPU::SI_MASK_BRANCH:
     return true;
   default:
     return false;
@@ -110,9 +107,6 @@ static bool opcodeEmitsNoInsts(unsigned Opc) {
 
 bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
                                const MachineBasicBlock &To) const {
-  if (From.succ_empty())
-    return false;
-
   unsigned NumInstr = 0;
   const MachineFunction *MF = From.getParent();
 
@@ -122,7 +116,7 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
 
     for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
          NumInstr < SkipThreshold && I != E; ++I) {
-      if (opcodeEmitsNoInsts(I->getOpcode()))
+      if (opcodeEmitsNoInsts(*I))
         continue;
 
       // FIXME: Since this is required for correctness, this should be inserted
@@ -138,6 +132,11 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
       if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
         return true;
 
+      // These instructions are potentially expensive even if EXEC = 0.
+      if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
+          I->getOpcode() == AMDGPU::S_WAITCNT)
+        return true;
+
       ++NumInstr;
       if (NumInstr >= SkipThreshold)
         return true;
@@ -177,7 +176,7 @@ bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) {
     .addImm(0); // en
 
   // ... and terminate wavefront.
-  BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
+  BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
 
   return true;
 }
@@ -245,6 +244,10 @@ void SIInsertSkips::kill(MachineInstr &MI) {
       llvm_unreachable("invalid ISD:SET cond code");
     }
 
+    const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();
+    if (ST.hasNoSdstCMPX())
+      Opcode = AMDGPU::getVCMPXNoSDstOp(Opcode);
+
     assert(MI.getOperand(0).isReg());
 
     if (TRI->isVGPR(MBB.getParent()->getRegInfo(),
@@ -254,17 +257,23 @@ void SIInsertSkips::kill(MachineInstr &MI) {
           .add(MI.getOperand(1))
           .add(MI.getOperand(0));
     } else {
-      BuildMI(MBB, &MI, DL, TII->get(Opcode))
-          .addReg(AMDGPU::VCC, RegState::Define)
-          .addImm(0)  // src0 modifiers
-          .add(MI.getOperand(1))
-          .addImm(0)  // src1 modifiers
-          .add(MI.getOperand(0))
-          .addImm(0);  // omod
+      auto I = BuildMI(MBB, &MI, DL, TII->get(Opcode));
+      if (!ST.hasNoSdstCMPX())
+        I.addReg(AMDGPU::VCC, RegState::Define);
+
+      I.addImm(0)  // src0 modifiers
+        .add(MI.getOperand(1))
+        .addImm(0)  // src1 modifiers
+        .add(MI.getOperand(0));
+
+      I.addImm(0);  // omod
     }
     break;
   }
   case AMDGPU::SI_KILL_I1_TERMINATOR: {
+    const MachineFunction *MF = MI.getParent()->getParent();
+    const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+    unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
     const MachineOperand &Op = MI.getOperand(0);
     int64_t KillVal = MI.getOperand(1).getImm();
     assert(KillVal == 0 || KillVal == -1);
@@ -275,14 +284,17 @@ void SIInsertSkips::kill(MachineInstr &MI) {
       assert(Imm == 0 || Imm == -1);
 
       if (Imm == KillVal)
-        BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+        BuildMI(MBB, &MI, DL, TII->get(ST.isWave32() ? AMDGPU::S_MOV_B32
+                                                     : AMDGPU::S_MOV_B64), Exec)
           .addImm(0);
       break;
     }
 
     unsigned Opcode = KillVal ? AMDGPU::S_ANDN2_B64 : AMDGPU::S_AND_B64;
-    BuildMI(MBB, &MI, DL, TII->get(Opcode), AMDGPU::EXEC)
-        .addReg(AMDGPU::EXEC)
+    if (ST.isWave32())
+      Opcode = KillVal ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_AND_B32;
+    BuildMI(MBB, &MI, DL, TII->get(Opcode), Exec)
+        .addReg(Exec)
         .add(Op);
     break;
   }
@@ -331,9 +343,11 @@ bool SIInsertSkips::optimizeVccBranch(MachineInstr &MI) const {
   // S_CBRANCH_EXEC[N]Z
   bool Changed = false;
   MachineBasicBlock &MBB = *MI.getParent();
-  const unsigned CondReg = AMDGPU::VCC;
-  const unsigned ExecReg = AMDGPU::EXEC;
-  const unsigned And = AMDGPU::S_AND_B64;
+  const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();
+  const bool IsWave32 = ST.isWave32();
+  const unsigned CondReg = TRI->getVCC();
+  const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+  const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
 
   MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(),
                                       E = MBB.rend();
diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index afc0b4467610..c89d5b71ec5c 100644
--- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1,9 +1,8 @@
 //===- SIInsertWaitcnts.cpp - Insert Wait Instructions --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -69,10 +68,10 @@ DEBUG_COUNTER(ForceLgkmCounter, DEBUG_TYPE"-forcelgkm",
 DEBUG_COUNTER(ForceVMCounter, DEBUG_TYPE"-forcevm",
               "Force emit s_waitcnt vmcnt(0) instrs");
 
-static cl::opt<unsigned> ForceEmitZeroFlag(
+static cl::opt<bool> ForceEmitZeroFlag(
   "amdgpu-waitcnt-forcezero",
   cl::desc("Force all waitcnt instrs to be emitted as s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)"),
-  cl::init(0), cl::Hidden);
+  cl::init(false), cl::Hidden);
 
 namespace {
 
@@ -101,7 +100,7 @@ public:
 
 #define CNT_MASK(t) (1u << (t))
 
-enum InstCounterType { VM_CNT = 0, LGKM_CNT, EXP_CNT, NUM_INST_CNTS };
+enum InstCounterType { VM_CNT = 0, LGKM_CNT, EXP_CNT, VS_CNT, NUM_INST_CNTS };
 
 iterator_range<enum_iterator<InstCounterType>> inst_counter_types() {
   return make_range(enum_iterator<InstCounterType>(VM_CNT),
@@ -114,6 +113,7 @@ struct {
   uint32_t VmcntMax;
   uint32_t ExpcntMax;
   uint32_t LgkmcntMax;
+  uint32_t VscntMax;
   int32_t NumVGPRsMax;
   int32_t NumSGPRsMax;
 } HardwareLimits;
@@ -127,6 +127,8 @@ struct {
 
 enum WaitEventType {
   VMEM_ACCESS,      // vector-memory read & write
+  VMEM_READ_ACCESS, // vector-memory read
+  VMEM_WRITE_ACCESS,// vector-memory write
   LDS_ACCESS,       // lds read & write
   GDS_ACCESS,       // gds read & write
   SQ_MESSAGE,       // send message
@@ -140,11 +142,12 @@ enum WaitEventType {
 };
 
 static const uint32_t WaitEventMaskForInst[NUM_INST_CNTS] = {
-  (1 << VMEM_ACCESS),
+  (1 << VMEM_ACCESS) | (1 << VMEM_READ_ACCESS),
   (1 << SMEM_ACCESS) | (1 << LDS_ACCESS) | (1 << GDS_ACCESS) |
       (1 << SQ_MESSAGE),
   (1 << EXP_GPR_LOCK) | (1 << GDS_GPR_LOCK) | (1 << VMW_GPR_LOCK) |
       (1 << EXP_PARAM_ACCESS) | (1 << EXP_POS_ACCESS),
+  (1 << VMEM_WRITE_ACCESS)
 };
 
 // The mapping is:
@@ -172,6 +175,9 @@ void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) {
   case LGKM_CNT:
     Wait.LgkmCnt = std::min(Wait.LgkmCnt, Count);
     break;
+  case VS_CNT:
+    Wait.VsCnt = std::min(Wait.VsCnt, Count);
+    break;
   default:
     llvm_unreachable("bad InstCounterType");
   }
@@ -200,6 +206,8 @@ public:
       return HardwareLimits.LgkmcntMax;
     case EXP_CNT:
       return HardwareLimits.ExpcntMax;
+    case VS_CNT:
+      return HardwareLimits.VscntMax;
     default:
       break;
     }
@@ -222,10 +230,12 @@ public:
 
   // Mapping from event to counter.
   InstCounterType eventCounter(WaitEventType E) {
-    if (E == VMEM_ACCESS)
+    if (WaitEventMaskForInst[VM_CNT] & (1 << E))
       return VM_CNT;
     if (WaitEventMaskForInst[LGKM_CNT] & (1 << E))
       return LGKM_CNT;
+    if (WaitEventMaskForInst[VS_CNT] & (1 << E))
+      return VS_CNT;
     assert(WaitEventMaskForInst[EXP_CNT] & (1 << E));
     return EXP_CNT;
   }
@@ -453,7 +463,7 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
                                             unsigned OpNo, bool Def) const {
   const MachineOperand &Op = MI->getOperand(OpNo);
   if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()) ||
-      (Def && !Op.isDef()))
+      (Def && !Op.isDef()) || TRI->isAGPR(*MRI, Op.getReg()))
     return {-1, -1};
 
   // A use via a PW operand does not need a waitcnt.
@@ -526,20 +536,22 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
     // Put score on the source vgprs. If this is a store, just use those
     // specific register(s).
     if (TII->isDS(Inst) && (Inst.mayStore() || Inst.mayLoad())) {
+      int AddrOpIdx =
+          AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::addr);
       // All GDS operations must protect their address register (same as
       // export.)
-      if (Inst.getOpcode() != AMDGPU::DS_APPEND &&
-          Inst.getOpcode() != AMDGPU::DS_CONSUME) {
-        setExpScore(
-            &Inst, TII, TRI, MRI,
-            AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::addr),
-            CurrScore);
+      if (AddrOpIdx != -1) {
+        setExpScore(&Inst, TII, TRI, MRI, AddrOpIdx, CurrScore);
       }
+
       if (Inst.mayStore()) {
-        setExpScore(
-            &Inst, TII, TRI, MRI,
-            AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
-            CurrScore);
+        if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
+                                       AMDGPU::OpName::data0) != -1) {
+          setExpScore(
+              &Inst, TII, TRI, MRI,
+              AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
+              CurrScore);
+        }
         if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
                                        AMDGPU::OpName::data1) != -1) {
           setExpScore(&Inst, TII, TRI, MRI,
@@ -663,6 +675,9 @@ void WaitcntBrackets::print(raw_ostream &OS) {
     case EXP_CNT:
       OS << "    EXP_CNT(" << UB - LB << "): ";
       break;
+    case VS_CNT:
+      OS << "    VS_CNT(" << UB - LB << "): ";
+      break;
     default:
       OS << "    UNKNOWN(" << UB - LB << "): ";
       break;
@@ -702,7 +717,8 @@ void WaitcntBrackets::print(raw_ostream &OS) {
 bool WaitcntBrackets::simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const {
   return simplifyWaitcnt(VM_CNT, Wait.VmCnt) |
          simplifyWaitcnt(EXP_CNT, Wait.ExpCnt) |
-         simplifyWaitcnt(LGKM_CNT, Wait.LgkmCnt);
+         simplifyWaitcnt(LGKM_CNT, Wait.LgkmCnt) |
+         simplifyWaitcnt(VS_CNT, Wait.VsCnt);
 }
 
 bool WaitcntBrackets::simplifyWaitcnt(InstCounterType T,
@@ -745,6 +761,7 @@ void WaitcntBrackets::applyWaitcnt(const AMDGPU::Waitcnt &Wait) {
   applyWaitcnt(VM_CNT, Wait.VmCnt);
   applyWaitcnt(EXP_CNT, Wait.ExpCnt);
   applyWaitcnt(LGKM_CNT, Wait.LgkmCnt);
+  applyWaitcnt(VS_CNT, Wait.VsCnt);
 }
 
 void WaitcntBrackets::applyWaitcnt(InstCounterType T, unsigned Count) {
@@ -790,6 +807,21 @@ static bool readsVCCZ(const MachineInstr &MI) {
          !MI.getOperand(1).isUndef();
 }
 
+/// \returns true if the callee inserts an s_waitcnt 0 on function entry.
+static bool callWaitsOnFunctionEntry(const MachineInstr &MI) {
+  // Currently all conventions wait, but this may not always be the case.
+  //
+  // TODO: If IPRA is enabled, and the callee is isSafeForNoCSROpt, it may make
+  // senses to omit the wait and do it in the caller.
+  return true;
+}
+
+/// \returns true if the callee is expected to wait for any outstanding waits
+/// before returning.
+static bool callWaitsOnFunctionReturn(const MachineInstr &MI) {
+  return true;
+}
+
 ///  Generate s_waitcnt instruction to be placed before cur_Inst.
 ///  Instructions of a given type are returned in order,
 ///  but instructions of different types can complete out of order.
@@ -815,7 +847,9 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
   // TODO: Handle other cases of NeedsWaitcntVmBefore()
   if (MI.getOpcode() == AMDGPU::BUFFER_WBINVL1 ||
       MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_SC ||
-      MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_VOL) {
+      MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_VOL ||
+      MI.getOpcode() == AMDGPU::BUFFER_GL0_INV ||
+      MI.getOpcode() == AMDGPU::BUFFER_GL1_INV) {
     Wait.VmCnt = 0;
   }
 
@@ -823,8 +857,9 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
   // NOTE: this could be improved with knowledge of all call sites or
   //   with knowledge of the called routines.
   if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
-      MI.getOpcode() == AMDGPU::S_SETPC_B64_return) {
-    Wait = AMDGPU::Waitcnt::allZero();
+      MI.getOpcode() == AMDGPU::S_SETPC_B64_return ||
+      (MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) {
+    Wait = Wait.combined(AMDGPU::Waitcnt::allZero(IV));
   }
   // Resolve vm waits before gs-done.
   else if ((MI.getOpcode() == AMDGPU::S_SENDMSG ||
@@ -903,91 +938,91 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
       }
     }
 
-#if 0 // TODO: the following code to handle CALL.
-    // The argument passing for CALLs should suffice for VM_CNT and LGKM_CNT.
-    // However, there is a problem with EXP_CNT, because the call cannot
-    // easily tell if a register is used in the function, and if it did, then
-    // the referring instruction would have to have an S_WAITCNT, which is
-    // dependent on all call sites. So Instead, force S_WAITCNT for EXP_CNTs
-    // before the call.
-    if (MI.getOpcode() == SC_CALL) {
-      if (ScoreBrackets->getScoreUB(EXP_CNT) >
-        ScoreBrackets->getScoreLB(EXP_CNT)) {
-        ScoreBrackets->setScoreLB(EXP_CNT, ScoreBrackets->getScoreUB(EXP_CNT));
-        EmitWaitcnt |= CNT_MASK(EXP_CNT);
-      }
-    }
-#endif
-
-    // FIXME: Should not be relying on memoperands.
-    // Look at the source operands of every instruction to see if
-    // any of them results from a previous memory operation that affects
-    // its current usage. If so, an s_waitcnt instruction needs to be
-    // emitted.
-    // If the source operand was defined by a load, add the s_waitcnt
-    // instruction.
-    for (const MachineMemOperand *Memop : MI.memoperands()) {
-      unsigned AS = Memop->getAddrSpace();
-      if (AS != AMDGPUAS::LOCAL_ADDRESS)
-        continue;
-      unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
-      // VM_CNT is only relevant to vgpr or LDS.
-      ScoreBrackets.determineWait(
-          VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
-    }
+    if (MI.isCall() && callWaitsOnFunctionEntry(MI)) {
+      // Don't bother waiting on anything except the call address. The function
+      // is going to insert a wait on everything in its prolog. This still needs
+      // to be careful if the call target is a load (e.g. a GOT load).
+      Wait = AMDGPU::Waitcnt();
 
-    for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
-      const MachineOperand &Op = MI.getOperand(I);
-      const MachineRegisterInfo &MRIA = *MRI;
-      RegInterval Interval =
-          ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I, false);
+      int CallAddrOpIdx =
+          AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
+      RegInterval Interval = ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI,
+                                                          CallAddrOpIdx, false);
       for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
-        if (TRI->isVGPR(MRIA, Op.getReg())) {
-          // VM_CNT is only relevant to vgpr or LDS.
-          ScoreBrackets.determineWait(
-              VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
-        }
         ScoreBrackets.determineWait(
             LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
       }
-    }
-    // End of for loop that looks at all source operands to decide vm_wait_cnt
-    // and lgk_wait_cnt.
-
-    // Two cases are handled for destination operands:
-    // 1) If the destination operand was defined by a load, add the s_waitcnt
-    // instruction to guarantee the right WAW order.
-    // 2) If a destination operand that was used by a recent export/store ins,
-    // add s_waitcnt on exp_cnt to guarantee the WAR order.
-    if (MI.mayStore()) {
+    } else {
       // FIXME: Should not be relying on memoperands.
+      // Look at the source operands of every instruction to see if
+      // any of them results from a previous memory operation that affects
+      // its current usage. If so, an s_waitcnt instruction needs to be
+      // emitted.
+      // If the source operand was defined by a load, add the s_waitcnt
+      // instruction.
       for (const MachineMemOperand *Memop : MI.memoperands()) {
         unsigned AS = Memop->getAddrSpace();
         if (AS != AMDGPUAS::LOCAL_ADDRESS)
           continue;
         unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
+        // VM_CNT is only relevant to vgpr or LDS.
         ScoreBrackets.determineWait(
             VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
-        ScoreBrackets.determineWait(
-            EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait);
       }
-    }
-    for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
-      MachineOperand &Def = MI.getOperand(I);
-      const MachineRegisterInfo &MRIA = *MRI;
-      RegInterval Interval =
-          ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I, true);
-      for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
-        if (TRI->isVGPR(MRIA, Def.getReg())) {
+
+      for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+        const MachineOperand &Op = MI.getOperand(I);
+        const MachineRegisterInfo &MRIA = *MRI;
+        RegInterval Interval =
+            ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I, false);
+        for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
+          if (TRI->isVGPR(MRIA, Op.getReg())) {
+            // VM_CNT is only relevant to vgpr or LDS.
+            ScoreBrackets.determineWait(
+                VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
+          }
+          ScoreBrackets.determineWait(
+              LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
+        }
+      }
+      // End of for loop that looks at all source operands to decide vm_wait_cnt
+      // and lgk_wait_cnt.
+
+      // Two cases are handled for destination operands:
+      // 1) If the destination operand was defined by a load, add the s_waitcnt
+      // instruction to guarantee the right WAW order.
+      // 2) If a destination operand that was used by a recent export/store ins,
+      // add s_waitcnt on exp_cnt to guarantee the WAR order.
+      if (MI.mayStore()) {
+        // FIXME: Should not be relying on memoperands.
+        for (const MachineMemOperand *Memop : MI.memoperands()) {
+          unsigned AS = Memop->getAddrSpace();
+          if (AS != AMDGPUAS::LOCAL_ADDRESS)
+            continue;
+          unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
           ScoreBrackets.determineWait(
               VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
           ScoreBrackets.determineWait(
               EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait);
         }
-        ScoreBrackets.determineWait(
-            LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
       }
-    } // End of for loop that looks at all dest operands.
+      for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+        MachineOperand &Def = MI.getOperand(I);
+        const MachineRegisterInfo &MRIA = *MRI;
+        RegInterval Interval =
+            ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I, true);
+        for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
+          if (TRI->isVGPR(MRIA, Def.getReg())) {
+            ScoreBrackets.determineWait(
+                VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
+            ScoreBrackets.determineWait(
+                EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait);
+          }
+          ScoreBrackets.determineWait(
+              LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
+        }
+      } // End of for loop that looks at all dest operands.
+    }
   }
 
   // Check to see if this is an S_BARRIER, and if an implicit S_WAITCNT 0
@@ -996,13 +1031,13 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
   // requiring a WAITCNT beforehand.
   if (MI.getOpcode() == AMDGPU::S_BARRIER &&
       !ST->hasAutoWaitcntBeforeBarrier()) {
-    Wait = AMDGPU::Waitcnt::allZero();
+    Wait = Wait.combined(AMDGPU::Waitcnt::allZero(IV));
   }
 
   // TODO: Remove this work-around, enable the assert for Bug 457939
   //       after fixing the scheduler. Also, the Shader Compiler code is
   //       independent of target.
-  if (readsVCCZ(MI) && ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) {
+  if (readsVCCZ(MI) && ST->hasReadVCCZBug()) {
     if (ScoreBrackets.getScoreLB(LGKM_CNT) <
             ScoreBrackets.getScoreUB(LGKM_CNT) &&
         ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
@@ -1014,21 +1049,31 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
   if (!ScoreBrackets.simplifyWaitcnt(Wait) && !IsForceEmitWaitcnt) {
     bool Modified = false;
     if (OldWaitcntInstr) {
-      if (TrackedWaitcntSet.count(OldWaitcntInstr)) {
-        TrackedWaitcntSet.erase(OldWaitcntInstr);
-        OldWaitcntInstr->eraseFromParent();
-        Modified = true;
-      } else {
-        int64_t Imm = OldWaitcntInstr->getOperand(0).getImm();
-        ScoreBrackets.applyWaitcnt(AMDGPU::decodeWaitcnt(IV, Imm));
+      for (auto II = OldWaitcntInstr->getIterator(), NextI = std::next(II);
+           &*II != &MI; II = NextI, ++NextI) {
+        if (II->isDebugInstr())
+          continue;
+
+        if (TrackedWaitcntSet.count(&*II)) {
+          TrackedWaitcntSet.erase(&*II);
+          II->eraseFromParent();
+          Modified = true;
+        } else if (II->getOpcode() == AMDGPU::S_WAITCNT) {
+          int64_t Imm = II->getOperand(0).getImm();
+          ScoreBrackets.applyWaitcnt(AMDGPU::decodeWaitcnt(IV, Imm));
+        } else {
+          assert(II->getOpcode() == AMDGPU::S_WAITCNT_VSCNT);
+          assert(II->getOperand(0).getReg() == AMDGPU::SGPR_NULL);
+          ScoreBrackets.applyWaitcnt(
+              AMDGPU::Waitcnt(0, 0, 0, II->getOperand(1).getImm()));
+        }
       }
-      Modified = true;
     }
     return Modified;
   }
 
   if (ForceEmitZeroWaitcnts)
-    Wait = AMDGPU::Waitcnt::allZero();
+    Wait = AMDGPU::Waitcnt::allZero(IV);
 
   if (ForceEmitWaitcnt[VM_CNT])
     Wait.VmCnt = 0;
@@ -1036,39 +1081,88 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
     Wait.ExpCnt = 0;
   if (ForceEmitWaitcnt[LGKM_CNT])
     Wait.LgkmCnt = 0;
+  if (ForceEmitWaitcnt[VS_CNT])
+    Wait.VsCnt = 0;
 
   ScoreBrackets.applyWaitcnt(Wait);
 
   AMDGPU::Waitcnt OldWait;
+  bool Modified = false;
+
   if (OldWaitcntInstr) {
-    OldWait =
-        AMDGPU::decodeWaitcnt(IV, OldWaitcntInstr->getOperand(0).getImm());
-  }
-  if (OldWait.dominates(Wait))
-    return false;
+    for (auto II = OldWaitcntInstr->getIterator(), NextI = std::next(II);
+         &*II != &MI; II = NextI, NextI++) {
+      if (II->isDebugInstr())
+        continue;
 
-  if (OldWaitcntInstr && !TrackedWaitcntSet.count(OldWaitcntInstr))
-    Wait = Wait.combined(OldWait);
+      if (II->getOpcode() == AMDGPU::S_WAITCNT) {
+        unsigned IEnc = II->getOperand(0).getImm();
+        AMDGPU::Waitcnt IWait = AMDGPU::decodeWaitcnt(IV, IEnc);
+        OldWait = OldWait.combined(IWait);
+        if (!TrackedWaitcntSet.count(&*II))
+          Wait = Wait.combined(IWait);
+        unsigned NewEnc = AMDGPU::encodeWaitcnt(IV, Wait);
+        if (IEnc != NewEnc) {
+          II->getOperand(0).setImm(NewEnc);
+          Modified = true;
+        }
+        Wait.VmCnt = ~0u;
+        Wait.LgkmCnt = ~0u;
+        Wait.ExpCnt = ~0u;
+      } else {
+        assert(II->getOpcode() == AMDGPU::S_WAITCNT_VSCNT);
+        assert(II->getOperand(0).getReg() == AMDGPU::SGPR_NULL);
+
+        unsigned ICnt = II->getOperand(1).getImm();
+        OldWait.VsCnt = std::min(OldWait.VsCnt, ICnt);
+        if (!TrackedWaitcntSet.count(&*II))
+          Wait.VsCnt = std::min(Wait.VsCnt, ICnt);
+        if (Wait.VsCnt != ICnt) {
+          II->getOperand(1).setImm(Wait.VsCnt);
+          Modified = true;
+        }
+        Wait.VsCnt = ~0u;
+      }
 
-  unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait);
-  if (OldWaitcntInstr) {
-    OldWaitcntInstr->getOperand(0).setImm(Enc);
+      LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n"
+                        << "Old Instr: " << MI << '\n'
+                        << "New Instr: " << *II << '\n');
 
-    LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n"
-                      << "Old Instr: " << MI << '\n'
-                      << "New Instr: " << *OldWaitcntInstr << '\n');
-  } else {
+      if (!Wait.hasWait())
+        return Modified;
+    }
+  }
+
+  if (Wait.VmCnt != ~0u || Wait.LgkmCnt != ~0u || Wait.ExpCnt != ~0u) {
+    unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait);
     auto SWaitInst = BuildMI(*MI.getParent(), MI.getIterator(),
                              MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
                          .addImm(Enc);
     TrackedWaitcntSet.insert(SWaitInst);
+    Modified = true;
 
     LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n"
                       << "Old Instr: " << MI << '\n'
                       << "New Instr: " << *SWaitInst << '\n');
   }
 
-  return true;
+  if (Wait.VsCnt != ~0u) {
+    assert(ST->hasVscnt());
+
+    auto SWaitInst =
+        BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
+                TII->get(AMDGPU::S_WAITCNT_VSCNT))
+            .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
+            .addImm(Wait.VsCnt);
+    TrackedWaitcntSet.insert(SWaitInst);
+    Modified = true;
+
+    LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n"
+                      << "Old Instr: " << MI << '\n'
+                      << "New Instr: " << *SWaitInst << '\n');
+  }
+
+  return Modified;
 }
 
 // This is a flat memory operation. Check to see if it has memory
@@ -1093,7 +1187,8 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
   // bracket and the destination operand scores.
   // TODO: Use the (TSFlags & SIInstrFlags::LGKM_CNT) property everywhere.
   if (TII->isDS(Inst) && TII->usesLGKM_CNT(Inst)) {
-    if (TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) {
+    if (TII->isAlwaysGDS(Inst.getOpcode()) ||
+        TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) {
       ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst);
       ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_GPR_LOCK, Inst);
     } else {
@@ -1102,8 +1197,15 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
   } else if (TII->isFLAT(Inst)) {
     assert(Inst.mayLoad() || Inst.mayStore());
 
-    if (TII->usesVM_CNT(Inst))
-      ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
+    if (TII->usesVM_CNT(Inst)) {
+      if (!ST->hasVscnt())
+        ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
+      else if (Inst.mayLoad() &&
+               AMDGPU::getAtomicRetOp(Inst.getOpcode()) == -1)
+        ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_READ_ACCESS, Inst);
+      else
+        ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_WRITE_ACCESS, Inst);
+    }
 
     if (TII->usesLGKM_CNT(Inst)) {
       ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst);
@@ -1118,14 +1220,33 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
              // TODO: get a better carve out.
              Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1 &&
              Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_SC &&
-             Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_VOL) {
-    ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
+             Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_VOL &&
+             Inst.getOpcode() != AMDGPU::BUFFER_GL0_INV &&
+             Inst.getOpcode() != AMDGPU::BUFFER_GL1_INV) {
+    if (!ST->hasVscnt())
+      ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
+    else if ((Inst.mayLoad() &&
+              AMDGPU::getAtomicRetOp(Inst.getOpcode()) == -1) ||
+             /* IMAGE_GET_RESINFO / IMAGE_GET_LOD */
+             (TII->isMIMG(Inst) && !Inst.mayLoad() && !Inst.mayStore()))
+      ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_READ_ACCESS, Inst);
+    else if (Inst.mayStore())
+      ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_WRITE_ACCESS, Inst);
+
     if (ST->vmemWriteNeedsExpWaitcnt() &&
         (Inst.mayStore() || AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1)) {
       ScoreBrackets->updateByEvent(TII, TRI, MRI, VMW_GPR_LOCK, Inst);
     }
   } else if (TII->isSMRD(Inst)) {
     ScoreBrackets->updateByEvent(TII, TRI, MRI, SMEM_ACCESS, Inst);
+  } else if (Inst.isCall()) {
+    if (callWaitsOnFunctionReturn(Inst)) {
+      // Act as a wait on everything
+      ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt::allZero(IV));
+    } else {
+      // May need to way wait for anything.
+      ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt());
+    }
   } else {
     switch (Inst.getOpcode()) {
     case AMDGPU::S_SENDMSG:
@@ -1236,31 +1357,18 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
   // Walk over the instructions.
   MachineInstr *OldWaitcntInstr = nullptr;
 
-  for (MachineBasicBlock::iterator Iter = Block.begin(), E = Block.end();
+  for (MachineBasicBlock::instr_iterator Iter = Block.instr_begin(),
+                                         E = Block.instr_end();
        Iter != E;) {
     MachineInstr &Inst = *Iter;
 
-    // Remove any previously existing waitcnts.
-    if (Inst.getOpcode() == AMDGPU::S_WAITCNT) {
-      if (OldWaitcntInstr) {
-        if (TrackedWaitcntSet.count(OldWaitcntInstr)) {
-          TrackedWaitcntSet.erase(OldWaitcntInstr);
-          OldWaitcntInstr->eraseFromParent();
-          OldWaitcntInstr = nullptr;
-        } else if (!TrackedWaitcntSet.count(&Inst)) {
-          // Two successive s_waitcnt's, both of which are pre-existing and
-          // are therefore preserved.
-          int64_t Imm = OldWaitcntInstr->getOperand(0).getImm();
-          ScoreBrackets.applyWaitcnt(AMDGPU::decodeWaitcnt(IV, Imm));
-        } else {
-          ++Iter;
-          Inst.eraseFromParent();
-          Modified = true;
-          continue;
-        }
-      }
-
-      OldWaitcntInstr = &Inst;
+    // Track pre-existing waitcnts from earlier iterations.
+    if (Inst.getOpcode() == AMDGPU::S_WAITCNT ||
+        (Inst.getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
+         Inst.getOperand(0).isReg() &&
+         Inst.getOperand(0).getReg() == AMDGPU::SGPR_NULL)) {
+      if (!OldWaitcntInstr)
+        OldWaitcntInstr = &Inst;
       ++Iter;
       continue;
     }
@@ -1299,27 +1407,16 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
       ScoreBrackets.dump();
     });
 
-    // Check to see if this is a GWS instruction. If so, and if this is CI or
-    // VI, then the generated code sequence will include an S_WAITCNT 0.
-    // TODO: Are these the only GWS instructions?
-    if (Inst.getOpcode() == AMDGPU::DS_GWS_INIT ||
-        Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_V ||
-        Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
-        Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_P ||
-        Inst.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
-      // TODO: && context->target_info->GwsRequiresMemViolTest() ) {
-      ScoreBrackets.applyWaitcnt(AMDGPU::Waitcnt::allZero());
-    }
-
     // TODO: Remove this work-around after fixing the scheduler and enable the
     // assert above.
     if (VCCZBugWorkAround) {
       // Restore the vccz bit.  Any time a value is written to vcc, the vcc
       // bit is updated, so we can restore the bit by reading the value of
       // vcc and then writing it back to the register.
-      BuildMI(Block, Inst, Inst.getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
-              AMDGPU::VCC)
-          .addReg(AMDGPU::VCC);
+      BuildMI(Block, Inst, Inst.getDebugLoc(),
+              TII->get(ST->isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64),
+              TRI->getVCC())
+          .addReg(TRI->getVCC());
       VCCZBugHandledSet.insert(&Inst);
       Modified = true;
     }
@@ -1345,6 +1442,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
   HardwareLimits.VmcntMax = AMDGPU::getVmcntBitMask(IV);
   HardwareLimits.ExpcntMax = AMDGPU::getExpcntBitMask(IV);
   HardwareLimits.LgkmcntMax = AMDGPU::getLgkmcntBitMask(IV);
+  HardwareLimits.VscntMax = ST->hasVscnt() ? 63 : 0;
 
   HardwareLimits.NumVGPRsMax = ST->getAddressableNumVGPRs();
   HardwareLimits.NumSGPRsMax = ST->getAddressableNumSGPRs();
@@ -1480,6 +1578,11 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
     // TODO: Could insert earlier and schedule more liberally with operations
     // that only use caller preserved registers.
     MachineBasicBlock &EntryBB = MF.front();
+    if (ST->hasVscnt())
+      BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(),
+              TII->get(AMDGPU::S_WAITCNT_VSCNT))
+      .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
+      .addImm(0);
     BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
       .addImm(0);
 
diff --git a/lib/Target/AMDGPU/SIInstrFormats.td b/lib/Target/AMDGPU/SIInstrFormats.td
index 65ffc27b8b60..561a16c3e351 100644
--- a/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/lib/Target/AMDGPU/SIInstrFormats.td
@@ -1,9 +1,8 @@
 //===-- SIInstrFormats.td - SI Instruction Encodings ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -11,19 +10,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-def isGCN : Predicate<"Subtarget->getGeneration() "
-                      ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">,
-            AssemblerPredicate<"FeatureGCN">;
-def isSI : Predicate<"Subtarget->getGeneration() "
-                      "== AMDGPUSubtarget::SOUTHERN_ISLANDS">,
-           AssemblerPredicate<"FeatureSouthernIslands">;
-
-
 class InstSI <dag outs, dag ins, string asm = "",
               list<dag> pattern = []> :
   AMDGPUInst<outs, ins, asm, pattern>, GCNPredicateControl {
-  let SubtargetPredicate = isGCN;
-
   // Low bits - basic encoding information.
   field bit SALU = 0;
   field bit VALU = 0;
@@ -121,10 +110,20 @@ class InstSI <dag outs, dag ins, string asm = "",
   // This bit indicates that this is a D16 buffer instruction.
   field bit D16Buf = 0;
 
+  // This field indicates that FLAT instruction accesses FLAT_GLBL or
+  // FLAT_SCRATCH segment. Must be 0 for non-FLAT instructions.
+  field bit IsNonFlatSeg = 0;
+
   // This bit indicates that this uses the floating point double precision
   // rounding mode flags
   field bit FPDPRounding = 0;
 
+  // Instruction is FP atomic.
+  field bit FPAtomic = 0;
+
+  // This bit indicates that this is one of MFMA instructions.
+  field bit IsMAI = 0;
+
   // These need to be kept in sync with the enum in SIInstrFlags.
   let TSFlags{0} = SALU;
   let TSFlags{1} = VALU;
@@ -182,7 +181,13 @@ class InstSI <dag outs, dag ins, string asm = "",
 
   let TSFlags{50} = D16Buf;
 
-  let TSFlags{51} = FPDPRounding;
+  let TSFlags{51} = IsNonFlatSeg;
+
+  let TSFlags{52} = FPDPRounding;
+
+  let TSFlags{53} = FPAtomic;
+
+  let TSFlags{54} = IsMAI;
 
   let SchedRW = [Write32Bit];
 
@@ -251,38 +256,59 @@ class VINTRPe <bits<2> op> : Enc32 {
   let Inst{31-26} = 0x32; // encoding
 }
 
-class MIMGe <bits<7> op> : Enc64 {
+class MIMGe : Enc64 {
   bits<8> vdata;
   bits<4> dmask;
   bits<1> unorm;
   bits<1> glc;
-  bits<1> da;
   bits<1> r128;
   bits<1> tfe;
   bits<1> lwe;
   bits<1> slc;
   bit d16;
-  bits<8> vaddr;
   bits<7> srsrc;
   bits<7> ssamp;
 
   let Inst{11-8} = dmask;
   let Inst{12} = unorm;
   let Inst{13} = glc;
-  let Inst{14} = da;
   let Inst{15} = r128;
   let Inst{16} = tfe;
   let Inst{17} = lwe;
-  let Inst{24-18} = op;
   let Inst{25} = slc;
   let Inst{31-26} = 0x3c;
-  let Inst{39-32} = vaddr;
   let Inst{47-40} = vdata;
   let Inst{52-48} = srsrc{6-2};
   let Inst{57-53} = ssamp{6-2};
   let Inst{63} = d16;
 }
 
+class MIMGe_gfx6789 <bits<8> op> : MIMGe {
+  bits<8> vaddr;
+  bits<1> da;
+
+  let Inst{0} = op{7};
+  let Inst{14} = da;
+  let Inst{24-18} = op{6-0};
+  let Inst{39-32} = vaddr;
+}
+
+class MIMGe_gfx10 <bits<8> op> : MIMGe {
+  bits<8> vaddr0;
+  bits<3> dim;
+  bits<2> nsa;
+  bits<1> dlc;
+  bits<1> a16 = 0; // TODO: this should be an operand
+
+  let Inst{0} = op{7};
+  let Inst{2-1} = nsa;
+  let Inst{5-3} = dim;
+  let Inst{7} = dlc;
+  let Inst{24-18} = op{6-0};
+  let Inst{39-32} = vaddr0;
+  let Inst{62} = a16;
+}
+
 class EXPe : Enc64 {
   bits<4> en;
   bits<6> tgt;
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 2370d5fa7b27..ba8ed6993a56 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1,9 +1,8 @@
 //===- SIInstrInfo.cpp - SI Instruction Information  ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,7 +13,6 @@
 
 #include "SIInstrInfo.h"
 #include "AMDGPU.h"
-#include "AMDGPUIntrinsicInfo.h"
 #include "AMDGPUSubtarget.h"
 #include "GCNHazardRecognizer.h"
 #include "SIDefines.h"
@@ -100,12 +98,6 @@ static unsigned getNumOperandsNoGlue(SDNode *Node) {
   return N;
 }
 
-static SDValue findChainOperand(SDNode *Load) {
-  SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1);
-  assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node");
-  return LastOp;
-}
-
 /// Returns true if both nodes have the same value for the given
 ///        operand \p Op, or if both nodes do not have this operand.
 static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
@@ -142,7 +134,8 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
   case AMDGPU::V_MOV_B32_e32:
   case AMDGPU::V_MOV_B32_e64:
   case AMDGPU::V_MOV_B64_PSEUDO:
-    return true;
+    // No implicit operands.
+    return MI.getNumOperands() == MI.getDesc().getNumOperands();
   default:
     return false;
   }
@@ -168,22 +161,25 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
       return false;
 
     // Check base reg.
-    if (Load0->getOperand(1) != Load1->getOperand(1))
-      return false;
-
-    // Check chain.
-    if (findChainOperand(Load0) != findChainOperand(Load1))
+    if (Load0->getOperand(0) != Load1->getOperand(0))
       return false;
 
     // Skip read2 / write2 variants for simplicity.
     // TODO: We should report true if the used offsets are adjacent (excluded
     // st64 versions).
-    if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 ||
-        AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
+    int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
+    int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
+    if (Offset0Idx == -1 || Offset1Idx == -1)
       return false;
 
-    Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
-    Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
+    // XXX - be careful of datalesss loads
+    // getNamedOperandIdx returns the index for MachineInstrs.  Since they
+    // include the output in the operand list, but SDNodes don't, we need to
+    // subtract the index by one.
+    Offset0Idx -= get(Opc0).NumDefs;
+    Offset1Idx -= get(Opc1).NumDefs;
+    Offset0 = cast<ConstantSDNode>(Load0->getOperand(Offset0Idx))->getZExtValue();
+    Offset1 = cast<ConstantSDNode>(Load1->getOperand(Offset1Idx))->getZExtValue();
     return true;
   }
 
@@ -207,10 +203,6 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
     if (!Load0Offset || !Load1Offset)
       return false;
 
-    // Check chain.
-    if (findChainOperand(Load0) != findChainOperand(Load1))
-      return false;
-
     Offset0 = Load0Offset->getZExtValue();
     Offset1 = Load1Offset->getZExtValue();
     return true;
@@ -221,7 +213,6 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
 
     // MUBUF and MTBUF have vaddr at different indices.
     if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) ||
-        findChainOperand(Load0) != findChainOperand(Load1) ||
         !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) ||
         !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
       return false;
@@ -233,10 +224,10 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
       return false;
 
     // getNamedOperandIdx returns the index for MachineInstrs.  Since they
-    // inlcude the output in the operand list, but SDNodes don't, we need to
+    // include the output in the operand list, but SDNodes don't, we need to
     // subtract the index by one.
-    --OffIdx0;
-    --OffIdx1;
+    OffIdx0 -= get(Opc0).NumDefs;
+    OffIdx1 -= get(Opc1).NumDefs;
 
     SDValue Off0 = Load0->getOperand(OffIdx0);
     SDValue Off1 = Load1->getOperand(OffIdx1);
@@ -265,8 +256,8 @@ static bool isStride64(unsigned Opc) {
   }
 }
 
-bool SIInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
-                                          MachineOperand *&BaseOp,
+bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
+                                          const MachineOperand *&BaseOp,
                                           int64_t &Offset,
                                           const TargetRegisterInfo *TRI) const {
   unsigned Opc = LdSt.getOpcode();
@@ -277,6 +268,11 @@ bool SIInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
     if (OffsetImm) {
       // Normal, single offset LDS instruction.
       BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
+      // TODO: ds_consume/ds_append use M0 for the base address. Is it safe to
+      // report that here?
+      if (!BaseOp)
+        return false;
+
       Offset = OffsetImm->getImm();
       assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
                                 "operands of type register.");
@@ -325,7 +321,7 @@ bool SIInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
     if (SOffset && SOffset->isReg())
       return false;
 
-    MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
+    const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
     if (!AddrReg)
       return false;
 
@@ -348,7 +344,7 @@ bool SIInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
     if (!OffsetImm)
       return false;
 
-    MachineOperand *SBaseReg = getNamedOperand(LdSt, AMDGPU::OpName::sbase);
+    const MachineOperand *SBaseReg = getNamedOperand(LdSt, AMDGPU::OpName::sbase);
     BaseOp = SBaseReg;
     Offset = OffsetImm->getImm();
     assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
@@ -357,7 +353,7 @@ bool SIInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
   }
 
   if (isFLAT(LdSt)) {
-    MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
+    const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
     if (VAddr) {
       // Can't analyze 2 offsets.
       if (getNamedOperand(LdSt, AMDGPU::OpName::saddr))
@@ -413,11 +409,11 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
   return Base1 == Base2;
 }
 
-bool SIInstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1,
-                                      MachineOperand &BaseOp2,
+bool SIInstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
+                                      const MachineOperand &BaseOp2,
                                       unsigned NumLoads) const {
-  MachineInstr &FirstLdSt = *BaseOp1.getParent();
-  MachineInstr &SecondLdSt = *BaseOp2.getParent();
+  const MachineInstr &FirstLdSt = *BaseOp1.getParent();
+  const MachineInstr &SecondLdSt = *BaseOp2.getParent();
 
   if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOp1, SecondLdSt, BaseOp2))
     return false;
@@ -461,7 +457,12 @@ bool SIInstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1,
 
   const MachineRegisterInfo &MRI =
       FirstLdSt.getParent()->getParent()->getRegInfo();
-  const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg());
+
+  const unsigned Reg = FirstDst->getReg();
+
+  const TargetRegisterClass *DstRC = TargetRegisterInfo::isVirtualRegister(Reg)
+                                         ? MRI.getRegClass(Reg)
+                                         : RI.getPhysRegClass(Reg);
 
   return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
 }
@@ -511,8 +512,11 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 
   if (RC == &AMDGPU::VGPR_32RegClass) {
     assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
-           AMDGPU::SReg_32RegClass.contains(SrcReg));
-    BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
+           AMDGPU::SReg_32RegClass.contains(SrcReg) ||
+           AMDGPU::AGPR_32RegClass.contains(SrcReg));
+    unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
+                     AMDGPU::V_ACCVGPR_READ_B32 : AMDGPU::V_MOV_B32_e32;
+    BuildMI(MBB, MI, DL, get(Opc), DestReg)
       .addReg(SrcReg, getKillRegState(KillSrc));
     return;
   }
@@ -526,6 +530,21 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       return;
     }
 
+    if (DestReg == AMDGPU::VCC_LO) {
+      if (AMDGPU::SReg_32RegClass.contains(SrcReg)) {
+        BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), AMDGPU::VCC_LO)
+          .addReg(SrcReg, getKillRegState(KillSrc));
+      } else {
+        // FIXME: Hack until VReg_1 removed.
+        assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
+        BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
+          .addImm(0)
+          .addReg(SrcReg, getKillRegState(KillSrc));
+      }
+
+      return;
+    }
+
     if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) {
       reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
       return;
@@ -570,10 +589,83 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     return;
   }
 
+  if (RC == &AMDGPU::AGPR_32RegClass) {
+    assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
+           AMDGPU::SReg_32RegClass.contains(SrcReg) ||
+           AMDGPU::AGPR_32RegClass.contains(SrcReg));
+    if (!AMDGPU::VGPR_32RegClass.contains(SrcReg)) {
+      // First try to find defining accvgpr_write to avoid temporary registers.
+      for (auto Def = MI, E = MBB.begin(); Def != E; ) {
+        --Def;
+        if (!Def->definesRegister(SrcReg, &RI))
+          continue;
+        if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
+          break;
+
+        MachineOperand &DefOp = Def->getOperand(1);
+        assert(DefOp.isReg() || DefOp.isImm());
+
+        if (DefOp.isReg()) {
+          // Check that register source operand if not clobbered before MI.
+          // Immediate operands are always safe to propagate.
+          bool SafeToPropagate = true;
+          for (auto I = Def; I != MI && SafeToPropagate; ++I)
+            if (I->modifiesRegister(DefOp.getReg(), &RI))
+              SafeToPropagate = false;
+
+          if (!SafeToPropagate)
+            break;
+
+          DefOp.setIsKill(false);
+        }
+
+        BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg)
+          .add(DefOp);
+        return;
+      }
+
+      RegScavenger RS;
+      RS.enterBasicBlock(MBB);
+      RS.forward(MI);
+
+      // Ideally we want to have three registers for a long reg_sequence copy
+      // to hide 2 waitstates between v_mov_b32 and accvgpr_write.
+      unsigned MaxVGPRs = RI.getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
+                                                 *MBB.getParent());
+
+      // Registers in the sequence are allocated contiguously so we can just
+      // use register number to pick one of three round-robin temps.
+      unsigned RegNo = DestReg % 3;
+      unsigned Tmp = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
+      if (!Tmp)
+        report_fatal_error("Cannot scavenge VGPR to copy to AGPR");
+      RS.setRegUsed(Tmp);
+      // Only loop through if there are any free registers left, otherwise
+      // scavenger may report a fatal error without emergency spill slot
+      // or spill with the slot.
+      while (RegNo-- && RS.FindUnusedReg(&AMDGPU::VGPR_32RegClass)) {
+        unsigned Tmp2 = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
+        if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs)
+          break;
+        Tmp = Tmp2;
+        RS.setRegUsed(Tmp);
+      }
+      copyPhysReg(MBB, MI, DL, Tmp, SrcReg, KillSrc);
+      BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg)
+        .addReg(Tmp, RegState::Kill);
+      return;
+    }
+
+    BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+
   unsigned EltSize = 4;
   unsigned Opcode = AMDGPU::V_MOV_B32_e32;
   if (RI.isSGPRClass(RC)) {
-    if (RI.getRegSizeInBits(*RC) > 32) {
+    // TODO: Copy vec3/vec5 with s_mov_b64s then final s_mov_b32.
+    if (!(RI.getRegSizeInBits(*RC) % 64)) {
       Opcode =  AMDGPU::S_MOV_B64;
       EltSize = 8;
     } else {
@@ -585,6 +677,11 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
       return;
     }
+  } else if (RI.hasAGPRs(RC)) {
+    Opcode = RI.hasVGPRs(RI.getPhysRegClass(SrcReg)) ?
+      AMDGPU::V_ACCVGPR_WRITE_B32 : AMDGPU::COPY;
+  } else if (RI.hasVGPRs(RC) && RI.hasAGPRs(RI.getPhysRegClass(SrcReg))) {
+    Opcode = AMDGPU::V_ACCVGPR_READ_B32;
   }
 
   ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize);
@@ -597,6 +694,12 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     else
       SubIdx = SubIndices[SubIndices.size() - Idx - 1];
 
+    if (Opcode == TargetOpcode::COPY) {
+      copyPhysReg(MBB, MI, DL, RI.getSubReg(DestReg, SubIdx),
+                  RI.getSubReg(SrcReg, SubIdx), KillSrc);
+      continue;
+    }
+
     MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
       get(Opcode), RI.getSubReg(DestReg, SubIdx));
 
@@ -696,38 +799,50 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
                                      unsigned TrueReg,
                                      unsigned FalseReg) const {
   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  MachineFunction *MF = MBB.getParent();
+  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+  const TargetRegisterClass *BoolXExecRC =
+    RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
   assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
          "Not a VGPR32 reg");
 
   if (Cond.size() == 1) {
-    unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+    unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
     BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
       .add(Cond[0]);
     BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+      .addImm(0)
       .addReg(FalseReg)
+      .addImm(0)
       .addReg(TrueReg)
       .addReg(SReg);
   } else if (Cond.size() == 2) {
     assert(Cond[0].isImm() && "Cond[0] is not an immediate");
     switch (Cond[0].getImm()) {
     case SIInstrInfo::SCC_TRUE: {
-      unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
-      BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
+      unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
+      BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
+                                            : AMDGPU::S_CSELECT_B64), SReg)
         .addImm(-1)
         .addImm(0);
       BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+        .addImm(0)
         .addReg(FalseReg)
+        .addImm(0)
         .addReg(TrueReg)
         .addReg(SReg);
       break;
     }
     case SIInstrInfo::SCC_FALSE: {
-      unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
-      BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
+      unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
+      BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
+                                            : AMDGPU::S_CSELECT_B64), SReg)
         .addImm(0)
         .addImm(-1);
       BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+        .addImm(0)
         .addReg(FalseReg)
+        .addImm(0)
         .addReg(TrueReg)
         .addReg(SReg);
       break;
@@ -735,11 +850,13 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
     case SIInstrInfo::VCCNZ: {
       MachineOperand RegOp = Cond[1];
       RegOp.setImplicit(false);
-      unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+      unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
       BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
         .add(RegOp);
       BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+          .addImm(0)
           .addReg(FalseReg)
+          .addImm(0)
           .addReg(TrueReg)
           .addReg(SReg);
       break;
@@ -747,39 +864,49 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
     case SIInstrInfo::VCCZ: {
       MachineOperand RegOp = Cond[1];
       RegOp.setImplicit(false);
-      unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+      unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
       BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
         .add(RegOp);
       BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+          .addImm(0)
           .addReg(TrueReg)
+          .addImm(0)
           .addReg(FalseReg)
           .addReg(SReg);
       break;
     }
     case SIInstrInfo::EXECNZ: {
-      unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
-      unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
-      BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
+      unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
+      unsigned SReg2 = MRI.createVirtualRegister(RI.getBoolRC());
+      BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32
+                                            : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
         .addImm(0);
-      BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
+      BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
+                                            : AMDGPU::S_CSELECT_B64), SReg)
         .addImm(-1)
         .addImm(0);
       BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+        .addImm(0)
         .addReg(FalseReg)
+        .addImm(0)
         .addReg(TrueReg)
         .addReg(SReg);
       break;
     }
     case SIInstrInfo::EXECZ: {
-      unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
-      unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
-      BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
+      unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
+      unsigned SReg2 = MRI.createVirtualRegister(RI.getBoolRC());
+      BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32
+                                            : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
         .addImm(0);
-      BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
+      BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
+                                            : AMDGPU::S_CSELECT_B64), SReg)
         .addImm(0)
         .addImm(-1);
       BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+        .addImm(0)
         .addReg(FalseReg)
+        .addImm(0)
         .addReg(TrueReg)
         .addReg(SReg);
       llvm_unreachable("Unhandled branch predicate EXECZ");
@@ -798,7 +925,7 @@ unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB,
                                const DebugLoc &DL,
                                unsigned SrcReg, int Value) const {
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-  unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  unsigned Reg = MRI.createVirtualRegister(RI.getBoolRC());
   BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg)
     .addImm(Value)
     .addReg(SrcReg);
@@ -811,7 +938,7 @@ unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB,
                                const DebugLoc &DL,
                                unsigned SrcReg, int Value) const {
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-  unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  unsigned Reg = MRI.createVirtualRegister(RI.getBoolRC());
   BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg)
     .addImm(Value)
     .addReg(SrcReg);
@@ -821,6 +948,8 @@ unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB,
 
 unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
 
+  if (RI.hasAGPRs(DstRC))
+    return AMDGPU::COPY;
   if (RI.getRegSizeInBits(*DstRC) == 32) {
     return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
   } else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) {
@@ -837,12 +966,18 @@ static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
     return AMDGPU::SI_SPILL_S32_SAVE;
   case 8:
     return AMDGPU::SI_SPILL_S64_SAVE;
+  case 12:
+    return AMDGPU::SI_SPILL_S96_SAVE;
   case 16:
     return AMDGPU::SI_SPILL_S128_SAVE;
+  case 20:
+    return AMDGPU::SI_SPILL_S160_SAVE;
   case 32:
     return AMDGPU::SI_SPILL_S256_SAVE;
   case 64:
     return AMDGPU::SI_SPILL_S512_SAVE;
+  case 128:
+    return AMDGPU::SI_SPILL_S1024_SAVE;
   default:
     llvm_unreachable("unknown register size");
   }
@@ -858,10 +993,31 @@ static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
     return AMDGPU::SI_SPILL_V96_SAVE;
   case 16:
     return AMDGPU::SI_SPILL_V128_SAVE;
+  case 20:
+    return AMDGPU::SI_SPILL_V160_SAVE;
   case 32:
     return AMDGPU::SI_SPILL_V256_SAVE;
   case 64:
     return AMDGPU::SI_SPILL_V512_SAVE;
+  case 128:
+    return AMDGPU::SI_SPILL_V1024_SAVE;
+  default:
+    llvm_unreachable("unknown register size");
+  }
+}
+
+static unsigned getAGPRSpillSaveOpcode(unsigned Size) {
+  switch (Size) {
+  case 4:
+    return AMDGPU::SI_SPILL_A32_SAVE;
+  case 8:
+    return AMDGPU::SI_SPILL_A64_SAVE;
+  case 16:
+    return AMDGPU::SI_SPILL_A128_SAVE;
+  case 64:
+    return AMDGPU::SI_SPILL_A512_SAVE;
+  case 128:
+    return AMDGPU::SI_SPILL_A1024_SAVE;
   default:
     llvm_unreachable("unknown register size");
   }
@@ -906,12 +1062,12 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
       .addFrameIndex(FrameIndex)               // addr
       .addMemOperand(MMO)
       .addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
-      .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
+      .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
     // Add the scratch resource registers as implicit uses because we may end up
     // needing them, and need to ensure that the reserved registers are
     // correctly handled.
-
-    FrameInfo.setStackID(FrameIndex, SIStackID::SGPR_SPILL);
+    if (RI.spillSGPRToVGPR())
+      FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill);
     if (ST.hasScalarStores()) {
       // m0 is used for offset to scalar stores if used to spill.
       Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
@@ -920,17 +1076,22 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
     return;
   }
 
-  assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
-
-  unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize);
+  unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillSaveOpcode(SpillSize)
+                                    : getVGPRSpillSaveOpcode(SpillSize);
   MFI->setHasSpilledVGPRs();
-  BuildMI(MBB, MI, DL, get(Opcode))
-    .addReg(SrcReg, getKillRegState(isKill)) // data
-    .addFrameIndex(FrameIndex)               // addr
-    .addReg(MFI->getScratchRSrcReg())        // scratch_rsrc
-    .addReg(MFI->getFrameOffsetReg())        // scratch_offset
-    .addImm(0)                               // offset
-    .addMemOperand(MMO);
+
+  auto MIB = BuildMI(MBB, MI, DL, get(Opcode));
+  if (RI.hasAGPRs(RC)) {
+    MachineRegisterInfo &MRI = MF->getRegInfo();
+    unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+    MIB.addReg(Tmp, RegState::Define);
+  }
+  MIB.addReg(SrcReg, getKillRegState(isKill)) // data
+     .addFrameIndex(FrameIndex)               // addr
+     .addReg(MFI->getScratchRSrcReg())        // scratch_rsrc
+     .addReg(MFI->getStackPtrOffsetReg())     // scratch_offset
+     .addImm(0)                               // offset
+     .addMemOperand(MMO);
 }
 
 static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
@@ -939,12 +1100,18 @@ static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
     return AMDGPU::SI_SPILL_S32_RESTORE;
   case 8:
     return AMDGPU::SI_SPILL_S64_RESTORE;
+  case 12:
+    return AMDGPU::SI_SPILL_S96_RESTORE;
   case 16:
     return AMDGPU::SI_SPILL_S128_RESTORE;
+  case 20:
+    return AMDGPU::SI_SPILL_S160_RESTORE;
   case 32:
     return AMDGPU::SI_SPILL_S256_RESTORE;
   case 64:
     return AMDGPU::SI_SPILL_S512_RESTORE;
+  case 128:
+    return AMDGPU::SI_SPILL_S1024_RESTORE;
   default:
     llvm_unreachable("unknown register size");
   }
@@ -960,10 +1127,31 @@ static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
     return AMDGPU::SI_SPILL_V96_RESTORE;
   case 16:
     return AMDGPU::SI_SPILL_V128_RESTORE;
+  case 20:
+    return AMDGPU::SI_SPILL_V160_RESTORE;
   case 32:
     return AMDGPU::SI_SPILL_V256_RESTORE;
   case 64:
     return AMDGPU::SI_SPILL_V512_RESTORE;
+  case 128:
+    return AMDGPU::SI_SPILL_V1024_RESTORE;
+  default:
+    llvm_unreachable("unknown register size");
+  }
+}
+
+static unsigned getAGPRSpillRestoreOpcode(unsigned Size) {
+  switch (Size) {
+  case 4:
+    return AMDGPU::SI_SPILL_A32_RESTORE;
+  case 8:
+    return AMDGPU::SI_SPILL_A64_RESTORE;
+  case 16:
+    return AMDGPU::SI_SPILL_A128_RESTORE;
+  case 64:
+    return AMDGPU::SI_SPILL_A512_RESTORE;
+  case 128:
+    return AMDGPU::SI_SPILL_A1024_RESTORE;
   default:
     llvm_unreachable("unknown register size");
   }
@@ -999,12 +1187,13 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
       MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
     }
 
-    FrameInfo.setStackID(FrameIndex, SIStackID::SGPR_SPILL);
+    if (RI.spillSGPRToVGPR())
+      FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill);
     MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg)
       .addFrameIndex(FrameIndex) // addr
       .addMemOperand(MMO)
       .addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
-      .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
+      .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
 
     if (ST.hasScalarStores()) {
       // m0 is used for offset to scalar stores if used to spill.
@@ -1014,15 +1203,19 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
     return;
   }
 
-  assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
-
-  unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);
-  BuildMI(MBB, MI, DL, get(Opcode), DestReg)
-    .addFrameIndex(FrameIndex)        // vaddr
-    .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
-    .addReg(MFI->getFrameOffsetReg()) // scratch_offset
-    .addImm(0)                        // offset
-    .addMemOperand(MMO);
+  unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillRestoreOpcode(SpillSize)
+                                    : getVGPRSpillRestoreOpcode(SpillSize);
+  auto MIB = BuildMI(MBB, MI, DL, get(Opcode), DestReg);
+  if (RI.hasAGPRs(RC)) {
+    MachineRegisterInfo &MRI = MF->getRegInfo();
+    unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+    MIB.addReg(Tmp, RegState::Define);
+  }
+  MIB.addFrameIndex(FrameIndex)        // vaddr
+     .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
+     .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
+     .addImm(0)                           // offset
+     .addMemOperand(MMO);
 }
 
 /// \param @Offset Offset in bytes of the FrameIndex being spilled
@@ -1089,7 +1282,8 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
       // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
       getAddNoCarry(Entry, Insert, DL, TIDReg)
         .addReg(TIDReg)
-        .addReg(TIDIGZReg);
+        .addReg(TIDIGZReg)
+        .addImm(0); // clamp bit
     } else {
       // Get the wave id
       BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
@@ -1114,7 +1308,8 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
   unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize);
   getAddNoCarry(MBB, MI, DL, TmpReg)
     .addImm(LDSOffset)
-    .addReg(TIDReg);
+    .addReg(TIDReg)
+    .addImm(0); // clamp bit
 
   return TmpReg;
 }
@@ -1148,13 +1343,17 @@ void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const {
 
   if (MBB.succ_empty()) {
     bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end();
-    if (HasNoTerminator)
-      BuildMI(MBB, MBB.end(), DebugLoc(),
-              get(Info->returnsVoid() ? AMDGPU::S_ENDPGM : AMDGPU::SI_RETURN_TO_EPILOG));
+    if (HasNoTerminator) {
+      if (Info->returnsVoid()) {
+        BuildMI(MBB, MBB.end(), DebugLoc(), get(AMDGPU::S_ENDPGM)).addImm(0);
+      } else {
+        BuildMI(MBB, MBB.end(), DebugLoc(), get(AMDGPU::SI_RETURN_TO_EPILOG));
+      }
+    }
   }
 }
 
-unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const {
+unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   default: return 1; // FIXME: Do wait states equal cycles?
 
@@ -1174,18 +1373,42 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     MI.setDesc(get(AMDGPU::S_MOV_B64));
     break;
 
+  case AMDGPU::S_MOV_B32_term:
+    // This is only a terminator to get the correct spill code placement during
+    // register allocation.
+    MI.setDesc(get(AMDGPU::S_MOV_B32));
+    break;
+
   case AMDGPU::S_XOR_B64_term:
     // This is only a terminator to get the correct spill code placement during
     // register allocation.
     MI.setDesc(get(AMDGPU::S_XOR_B64));
     break;
 
+  case AMDGPU::S_XOR_B32_term:
+    // This is only a terminator to get the correct spill code placement during
+    // register allocation.
+    MI.setDesc(get(AMDGPU::S_XOR_B32));
+    break;
+
+  case AMDGPU::S_OR_B32_term:
+    // This is only a terminator to get the correct spill code placement during
+    // register allocation.
+    MI.setDesc(get(AMDGPU::S_OR_B32));
+    break;
+
   case AMDGPU::S_ANDN2_B64_term:
     // This is only a terminator to get the correct spill code placement during
     // register allocation.
     MI.setDesc(get(AMDGPU::S_ANDN2_B64));
     break;
 
+  case AMDGPU::S_ANDN2_B32_term:
+    // This is only a terminator to get the correct spill code placement during
+    // register allocation.
+    MI.setDesc(get(AMDGPU::S_ANDN2_B32));
+    break;
+
   case AMDGPU::V_MOV_B64_PSEUDO: {
     unsigned Dst = MI.getOperand(0).getReg();
     unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
@@ -1215,24 +1438,28 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     break;
   }
   case AMDGPU::V_SET_INACTIVE_B32: {
-    BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
-      .addReg(AMDGPU::EXEC);
+    unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
+    unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+    BuildMI(MBB, MI, DL, get(NotOpc), Exec)
+      .addReg(Exec);
     BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
       .add(MI.getOperand(2));
-    BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
-      .addReg(AMDGPU::EXEC);
+    BuildMI(MBB, MI, DL, get(NotOpc), Exec)
+      .addReg(Exec);
     MI.eraseFromParent();
     break;
   }
   case AMDGPU::V_SET_INACTIVE_B64: {
-    BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
-      .addReg(AMDGPU::EXEC);
+    unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
+    unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+    BuildMI(MBB, MI, DL, get(NotOpc), Exec)
+      .addReg(Exec);
     MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO),
                                  MI.getOperand(0).getReg())
       .add(MI.getOperand(2));
     expandPostRAPseudo(*Copy);
-    BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
-      .addReg(AMDGPU::EXEC);
+    BuildMI(MBB, MI, DL, get(NotOpc), Exec)
+      .addReg(Exec);
     MI.eraseFromParent();
     break;
   }
@@ -1282,10 +1509,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
 
     MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
                                   .addReg(RegHi);
-    if (MI.getOperand(2).getTargetFlags() == SIInstrInfo::MO_NONE)
-      MIB.addImm(0);
-    else
-      MIB.add(MI.getOperand(2));
+    MIB.add(MI.getOperand(2));
 
     Bundler.append(MIB);
     finalizeBundle(MBB, Bundler.begin());
@@ -1293,10 +1517,17 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     MI.eraseFromParent();
     break;
   }
+  case AMDGPU::ENTER_WWM: {
+    // This only gets its own opcode so that SIPreAllocateWWMRegs can tell when
+    // WWM is entered.
+    MI.setDesc(get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32
+                                 : AMDGPU::S_OR_SAVEEXEC_B64));
+    break;
+  }
   case AMDGPU::EXIT_WWM: {
-    // This only gets its own opcode so that SIFixWWMLiveness can tell when WWM
-    // is exited.
-    MI.setDesc(get(AMDGPU::S_MOV_B64));
+    // This only gets its own opcode so that SIPreAllocateWWMRegs can tell when
+    // WWM is exited.
+    MI.setDesc(get(ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
     break;
   }
   case TargetOpcode::BUNDLE: {
@@ -1492,7 +1723,7 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
     BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32))
       .addReg(PCReg, RegState::Define, AMDGPU::sub0)
       .addReg(PCReg, 0, AMDGPU::sub0)
-      .addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_FORWARD);
+      .addMBB(&DestBB, MO_LONG_BRANCH_FORWARD);
     BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32))
       .addReg(PCReg, RegState::Define, AMDGPU::sub1)
       .addReg(PCReg, 0, AMDGPU::sub1)
@@ -1502,7 +1733,7 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
     BuildMI(MBB, I, DL, get(AMDGPU::S_SUB_U32))
       .addReg(PCReg, RegState::Define, AMDGPU::sub0)
       .addReg(PCReg, 0, AMDGPU::sub0)
-      .addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_BACKWARD);
+      .addMBB(&DestBB, MO_LONG_BRANCH_BACKWARD);
     BuildMI(MBB, I, DL, get(AMDGPU::S_SUBB_U32))
       .addReg(PCReg, RegState::Define, AMDGPU::sub1)
       .addReg(PCReg, 0, AMDGPU::sub1)
@@ -1659,6 +1890,10 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
     case AMDGPU::S_MOV_B64_term:
     case AMDGPU::S_XOR_B64_term:
     case AMDGPU::S_ANDN2_B64_term:
+    case AMDGPU::S_MOV_B32_term:
+    case AMDGPU::S_XOR_B32_term:
+    case AMDGPU::S_OR_B32_term:
+    case AMDGPU::S_ANDN2_B32_term:
       break;
     case AMDGPU::SI_IF:
     case AMDGPU::SI_ELSE:
@@ -1826,7 +2061,7 @@ bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
     CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
 
     // Limit to equal cost for branch vs. N v_cndmask_b32s.
-    return !RI.isSGPRClass(RC) && NumInsts <= 6;
+    return RI.hasVGPRs(RC) && NumInsts <= 6;
   }
   case SCC_TRUE:
   case SCC_FALSE: {
@@ -1907,14 +2142,18 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
   const int16_t *SubIndices = Sub0_15;
   int NElts = DstSize / 32;
 
-  // 64-bit select is only avaialble for SALU.
+  // 64-bit select is only available for SALU.
+  // TODO: Split 96-bit into 64-bit and 32-bit, not 3x 32-bit.
   if (Pred == SCC_TRUE) {
-    SelOp = AMDGPU::S_CSELECT_B64;
-    EltRC = &AMDGPU::SGPR_64RegClass;
-    SubIndices = Sub0_15_64;
-
-    assert(NElts % 2 == 0);
-    NElts /= 2;
+    if (NElts % 2) {
+      SelOp = AMDGPU::S_CSELECT_B32;
+      EltRC = &AMDGPU::SGPR_32RegClass;
+    } else {
+      SelOp = AMDGPU::S_CSELECT_B64;
+      EltRC = &AMDGPU::SGPR_64RegClass;
+      SubIndices = Sub0_15_64;
+      NElts /= 2;
+    }
   }
 
   MachineInstrBuilder MIB = BuildMI(
@@ -1934,6 +2173,7 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
       .addReg(FalseReg, 0, SubIdx)
       .addReg(TrueReg, 0, SubIdx);
     preserveCondRegFlags(Select->getOperand(3), Cond[1]);
+    fixImplicitOperands(*Select);
 
     MIB.addReg(DstElt)
        .addImm(SubIdx);
@@ -1955,6 +2195,8 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
   case AMDGPU::S_MOV_B32:
   case AMDGPU::S_MOV_B64:
   case AMDGPU::COPY:
+  case AMDGPU::V_ACCVGPR_WRITE_B32:
+  case AMDGPU::V_ACCVGPR_READ_B32:
     return true;
   default:
     return false;
@@ -2007,6 +2249,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
 
   case AMDGPU::V_MOV_B32_e32:
   case AMDGPU::S_MOV_B32:
+  case AMDGPU::V_ACCVGPR_WRITE_B32:
     break;
   }
 
@@ -2020,6 +2263,11 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
   if (Opc == AMDGPU::COPY) {
     bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
     unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
+    if (RI.isAGPR(*MRI, UseMI.getOperand(0).getReg())) {
+      if (!isInlineConstant(*ImmOp, AMDGPU::OPERAND_REG_INLINE_AC_INT32))
+        return false;
+      NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32;
+    }
     UseMI.setDesc(get(NewOpc));
     UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
     UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
@@ -2027,7 +2275,9 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
   }
 
   if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64 ||
-      Opc == AMDGPU::V_MAD_F16 || Opc == AMDGPU::V_MAC_F16_e64) {
+      Opc == AMDGPU::V_MAD_F16 || Opc == AMDGPU::V_MAC_F16_e64 ||
+      Opc == AMDGPU::V_FMA_F32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
+      Opc == AMDGPU::V_FMA_F16 || Opc == AMDGPU::V_FMAC_F16_e64) {
     // Don't fold if we are using source or output modifiers. The new VOP2
     // instructions don't have them.
     if (hasAnyModifiersSet(UseMI))
@@ -2042,7 +2292,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
     if (isInlineConstant(UseMI, *Src0, *ImmOp))
       return false;
 
-    bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64;
+    bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64 ||
+                 Opc == AMDGPU::V_FMA_F32 || Opc == AMDGPU::V_FMAC_F32_e64;
+    bool IsFMA = Opc == AMDGPU::V_FMA_F32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
+                 Opc == AMDGPU::V_FMA_F16 || Opc == AMDGPU::V_FMAC_F16_e64;
     MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
     MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
 
@@ -2055,6 +2308,12 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
       if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))
         return false;
 
+      unsigned NewOpc =
+        IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32 : AMDGPU::V_FMAMK_F16)
+              : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
+      if (pseudoToMCOpcode(NewOpc) == -1)
+        return false;
+
       // We need to swap operands 0 and 1 since madmk constant is at operand 1.
 
       const int64_t Imm = ImmOp->getImm();
@@ -2075,14 +2334,16 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
       Src0->setIsKill(Src1->isKill());
 
       if (Opc == AMDGPU::V_MAC_F32_e64 ||
-          Opc == AMDGPU::V_MAC_F16_e64)
+          Opc == AMDGPU::V_MAC_F16_e64 ||
+          Opc == AMDGPU::V_FMAC_F32_e64 ||
+          Opc == AMDGPU::V_FMAC_F16_e64)
         UseMI.untieRegOperand(
             AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
 
       Src1->ChangeToImmediate(Imm);
 
       removeModOperands(UseMI);
-      UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16));
+      UseMI.setDesc(get(NewOpc));
 
       bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
       if (DeleteDef)
@@ -2107,9 +2368,11 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
           Src0->ChangeToImmediate(Def->getOperand(1).getImm());
           Src0Inlined = true;
         } else if ((RI.isPhysicalRegister(Src0->getReg()) &&
-            RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg()))) ||
+            (ST.getConstantBusLimit(Opc) <= 1 &&
+             RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg())))) ||
             (RI.isVirtualRegister(Src0->getReg()) &&
-            RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
+            (ST.getConstantBusLimit(Opc) <= 1 &&
+             RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))))
           return false;
           // VGPR is okay as Src0 - fallthrough
       }
@@ -2130,6 +2393,12 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
           // VGPR is okay as Src1 - fallthrough
       }
 
+      unsigned NewOpc =
+        IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32 : AMDGPU::V_FMAAK_F16)
+              : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
+      if (pseudoToMCOpcode(NewOpc) == -1)
+        return false;
+
       const int64_t Imm = ImmOp->getImm();
 
       // FIXME: This would be a lot easier if we could return a new instruction
@@ -2142,7 +2411,9 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
           AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
 
       if (Opc == AMDGPU::V_MAC_F32_e64 ||
-          Opc == AMDGPU::V_MAC_F16_e64)
+          Opc == AMDGPU::V_MAC_F16_e64 ||
+          Opc == AMDGPU::V_FMAC_F32_e64 ||
+          Opc == AMDGPU::V_FMAC_F16_e64)
         UseMI.untieRegOperand(
             AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
 
@@ -2151,7 +2422,11 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
 
       // These come before src2.
       removeModOperands(UseMI);
-      UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16));
+      UseMI.setDesc(get(NewOpc));
+      // It might happen that UseMI was commuted
+      // and we now have SGPR as SRC1. If so 2 inlined
+      // constant and SGPR are illegal.
+      legalizeOperands(UseMI);
 
       bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
       if (DeleteDef)
@@ -2172,9 +2447,9 @@ static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
   return LowOffset + LowWidth <= HighOffset;
 }
 
-bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa,
-                                               MachineInstr &MIb) const {
-  MachineOperand *BaseOp0, *BaseOp1;
+bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
+                                               const MachineInstr &MIb) const {
+  const MachineOperand *BaseOp0, *BaseOp1;
   int64_t Offset0, Offset1;
 
   if (getMemOperandWithOffset(MIa, BaseOp0, Offset0, &RI) &&
@@ -2196,8 +2471,8 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa,
   return false;
 }
 
-bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr &MIa,
-                                                  MachineInstr &MIb,
+bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+                                                  const MachineInstr &MIb,
                                                   AliasAnalysis *AA) const {
   assert((MIa.mayLoad() || MIa.mayStore()) &&
          "MIa must load from or modify a memory location");
@@ -2211,17 +2486,6 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr &MIa,
   if (MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
     return false;
 
-  if (AA && MIa.hasOneMemOperand() && MIb.hasOneMemOperand()) {
-    const MachineMemOperand *MMOa = *MIa.memoperands_begin();
-    const MachineMemOperand *MMOb = *MIb.memoperands_begin();
-    if (MMOa->getValue() && MMOb->getValue()) {
-      MemoryLocation LocA(MMOa->getValue(), MMOa->getSize(), MMOa->getAAInfo());
-      MemoryLocation LocB(MMOb->getValue(), MMOb->getSize(), MMOb->getAAInfo());
-      if (!AA->alias(LocA, LocB))
-        return true;
-    }
-  }
-
   // TODO: Should we check the address space from the MachineMemOperand? That
   // would allow us to distinguish objects we know don't alias based on the
   // underlying address space, even if it was lowered to a different one,
@@ -2275,18 +2539,21 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
                                                  LiveVariables *LV) const {
   unsigned Opc = MI.getOpcode();
   bool IsF16 = false;
-  bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64;
+  bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
+               Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64;
 
   switch (Opc) {
   default:
     return nullptr;
   case AMDGPU::V_MAC_F16_e64:
+  case AMDGPU::V_FMAC_F16_e64:
     IsF16 = true;
     LLVM_FALLTHROUGH;
   case AMDGPU::V_MAC_F32_e64:
   case AMDGPU::V_FMAC_F32_e64:
     break;
   case AMDGPU::V_MAC_F16_e32:
+  case AMDGPU::V_FMAC_F16_e32:
     IsF16 = true;
     LLVM_FALLTHROUGH;
   case AMDGPU::V_MAC_F32_e32:
@@ -2315,30 +2582,38 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
   const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
   const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
 
-  if (!IsFMA && !Src0Mods && !Src1Mods && !Clamp && !Omod &&
+  if (!Src0Mods && !Src1Mods && !Clamp && !Omod &&
       // If we have an SGPR input, we will violate the constant bus restriction.
-      (!Src0->isReg() || !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
+      (ST.getConstantBusLimit(Opc) > 1 ||
+       !Src0->isReg() ||
+       !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
     if (auto Imm = getFoldableImm(Src2)) {
-      return BuildMI(*MBB, MI, MI.getDebugLoc(),
-                     get(IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32))
-               .add(*Dst)
-               .add(*Src0)
-               .add(*Src1)
-               .addImm(Imm);
+      unsigned NewOpc =
+         IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
+               : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
+      if (pseudoToMCOpcode(NewOpc) != -1)
+        return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+                 .add(*Dst)
+                 .add(*Src0)
+                 .add(*Src1)
+                 .addImm(Imm);
     }
+    unsigned NewOpc =
+      IsFMA ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
+            : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
     if (auto Imm = getFoldableImm(Src1)) {
-      return BuildMI(*MBB, MI, MI.getDebugLoc(),
-                     get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
-               .add(*Dst)
-               .add(*Src0)
-               .addImm(Imm)
-               .add(*Src2);
+      if (pseudoToMCOpcode(NewOpc) != -1)
+        return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+                 .add(*Dst)
+                 .add(*Src0)
+                 .addImm(Imm)
+                 .add(*Src2);
     }
     if (auto Imm = getFoldableImm(Src0)) {
-      if (isOperandLegal(MI, AMDGPU::getNamedOperandIdx(AMDGPU::V_MADMK_F32,
+      if (pseudoToMCOpcode(NewOpc) != -1 &&
+          isOperandLegal(MI, AMDGPU::getNamedOperandIdx(NewOpc,
                            AMDGPU::OpName::src0), Src1))
-        return BuildMI(*MBB, MI, MI.getDebugLoc(),
-                       get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
+        return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
                  .add(*Dst)
                  .add(*Src1)
                  .addImm(Imm)
@@ -2346,9 +2621,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
     }
   }
 
-  assert((!IsFMA || !IsF16) && "fmac only expected with f32");
-  unsigned NewOpc = IsFMA ? AMDGPU::V_FMA_F32 :
-    (IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32);
+  unsigned NewOpc = IsFMA ? (IsF16 ? AMDGPU::V_FMA_F16 : AMDGPU::V_FMA_F32)
+                          : (IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32);
+  if (pseudoToMCOpcode(NewOpc) == -1)
+    return nullptr;
+
   return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
       .add(*Dst)
       .addImm(Src0Mods ? Src0Mods->getImm() : 0)
@@ -2390,12 +2667,26 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
          changesVGPRIndexingMode(MI);
 }
 
+bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
+  return Opcode == AMDGPU::DS_ORDERED_COUNT ||
+         Opcode == AMDGPU::DS_GWS_INIT ||
+         Opcode == AMDGPU::DS_GWS_SEMA_V ||
+         Opcode == AMDGPU::DS_GWS_SEMA_BR ||
+         Opcode == AMDGPU::DS_GWS_SEMA_P ||
+         Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
+         Opcode == AMDGPU::DS_GWS_BARRIER;
+}
+
 bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
   unsigned Opcode = MI.getOpcode();
 
   if (MI.mayStore() && isSMRD(MI))
     return true; // scalar store or atomic
 
+  // This will terminate the function when other lanes may need to continue.
+  if (MI.isReturn())
+    return true;
+
   // These instructions cause shader I/O that may cause hardware lockups
   // when executed with an empty EXEC mask.
   //
@@ -2403,10 +2694,12 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
   //       EXEC = 0, but checking for that case here seems not worth it
   //       given the typical code patterns.
   if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
-      Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE)
+      Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE ||
+      Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP ||
+      Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER)
     return true;
 
-  if (MI.isInlineAsm())
+  if (MI.isCall() || MI.isInlineAsm())
     return true; // conservative assumption
 
   // These are like SALU instructions in terms of effects, so it's questionable
@@ -2420,8 +2713,36 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
   return false;
 }
 
+bool SIInstrInfo::mayReadEXEC(const MachineRegisterInfo &MRI,
+                              const MachineInstr &MI) const {
+  if (MI.isMetaInstruction())
+    return false;
+
+  // This won't read exec if this is an SGPR->SGPR copy.
+  if (MI.isCopyLike()) {
+    if (!RI.isSGPRReg(MRI, MI.getOperand(0).getReg()))
+      return true;
+
+    // Make sure this isn't copying exec as a normal operand
+    return MI.readsRegister(AMDGPU::EXEC, &RI);
+  }
+
+  // Make a conservative assumption about the callee.
+  if (MI.isCall())
+    return true;
+
+  // Be conservative with any unhandled generic opcodes.
+  if (!isTargetSpecificOpcode(MI.getOpcode()))
+    return true;
+
+  return !isSALU(MI) || MI.readsRegister(AMDGPU::EXEC, &RI);
+}
+
 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
   switch (Imm.getBitWidth()) {
+  case 1: // This likely will be a condition code mask.
+    return true;
+
   case 32:
     return AMDGPU::isInlinableLiteral32(Imm.getSExtValue(),
                                         ST.hasInv2PiInlineImm());
@@ -2454,7 +2775,9 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
   case AMDGPU::OPERAND_REG_IMM_INT32:
   case AMDGPU::OPERAND_REG_IMM_FP32:
   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
-  case AMDGPU::OPERAND_REG_INLINE_C_FP32: {
+  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+  case AMDGPU::OPERAND_REG_INLINE_AC_FP32: {
     int32_t Trunc = static_cast<int32_t>(Imm);
     return AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
   }
@@ -2467,7 +2790,9 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
   case AMDGPU::OPERAND_REG_IMM_INT16:
   case AMDGPU::OPERAND_REG_IMM_FP16:
   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
-  case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
+  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_FP16: {
     if (isInt<16>(Imm) || isUInt<16>(Imm)) {
       // A few special case instructions have 16-bit operands on subtargets
       // where 16-bit instructions are not legal.
@@ -2480,19 +2805,14 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
 
     return false;
   }
+  case AMDGPU::OPERAND_REG_IMM_V2INT16:
+  case AMDGPU::OPERAND_REG_IMM_V2FP16:
   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
-  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
-    if (isUInt<16>(Imm)) {
-      int16_t Trunc = static_cast<int16_t>(Imm);
-      return ST.has16BitInsts() &&
-             AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
-    }
-    if (!(Imm & 0xffff)) {
-      return ST.has16BitInsts() &&
-             AMDGPU::isInlinableLiteral16(Imm >> 16, ST.hasInv2PiInlineImm());
-    }
+  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
     uint32_t Trunc = static_cast<uint32_t>(Imm);
-    return  AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
+    return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
   }
   default:
     llvm_unreachable("invalid bitwidth");
@@ -2534,9 +2854,10 @@ static bool compareMachineOp(const MachineOperand &Op0,
 
 bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
                                     const MachineOperand &MO) const {
-  const MCOperandInfo &OpInfo = get(MI.getOpcode()).OpInfo[OpNo];
+  const MCInstrDesc &InstDesc = MI.getDesc();
+  const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpNo];
 
-  assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
+  assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal());
 
   if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
     return true;
@@ -2547,7 +2868,15 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
   if (MO.isImm() && isInlineConstant(MO, OpInfo))
     return RI.opCanUseInlineConstant(OpInfo.OperandType);
 
-  return RI.opCanUseLiteralConstant(OpInfo.OperandType);
+  if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
+    return false;
+
+  if (!isVOP3(MI) || !AMDGPU::isSISrcOperand(InstDesc, OpNo))
+    return true;
+
+  const MachineFunction *MF = MI.getParent()->getParent();
+  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+  return ST.hasVOP3Literal();
 }
 
 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
@@ -2586,7 +2915,8 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
   // Can't shrink instruction with three operands.
   // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
   // a special case for it.  It can only be shrunk if the third operand
-  // is vcc.  We should handle this the same way we handle vopc, by addding
+  // is vcc, and src0_modifiers and src1_modifiers are not set.
+  // We should handle this the same way we handle vopc, by addding
   // a register allocation hint pre-regalloc and then do the shrinking
   // post-regalloc.
   if (Src2) {
@@ -2606,6 +2936,7 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
       case AMDGPU::V_MAC_F32_e64:
       case AMDGPU::V_MAC_F16_e64:
       case AMDGPU::V_FMAC_F32_e64:
+      case AMDGPU::V_FMAC_F16_e64:
         if (!Src2->isReg() || !RI.isVGPR(MRI, Src2->getReg()) ||
             hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
           return false;
@@ -2662,7 +2993,8 @@ MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
     // dst
     Inst32.add(MI.getOperand(0));
   } else {
-    assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
+    assert(((MI.getOperand(0).getReg() == AMDGPU::VCC) ||
+            (MI.getOperand(0).getReg() == AMDGPU::VCC_LO)) &&
            "Unexpected case");
   }
 
@@ -2707,19 +3039,19 @@ bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
   if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
     return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
 
-  // FLAT_SCR is just an SGPR pair.
-  if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
-    return true;
-
-  // EXEC register uses the constant bus.
-  if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
-    return true;
+  // Null is free
+  if (MO.getReg() == AMDGPU::SGPR_NULL)
+    return false;
 
   // SGPRs use the constant bus
-  return (MO.getReg() == AMDGPU::VCC || MO.getReg() == AMDGPU::M0 ||
-          (!MO.isImplicit() &&
-           (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
-            AMDGPU::SGPR_64RegClass.contains(MO.getReg()))));
+  if (MO.isImplicit()) {
+    return MO.getReg() == AMDGPU::M0 ||
+           MO.getReg() == AMDGPU::VCC ||
+           MO.getReg() == AMDGPU::VCC_LO;
+  } else {
+    return AMDGPU::SReg_32RegClass.contains(MO.getReg()) ||
+           AMDGPU::SReg_64RegClass.contains(MO.getReg());
+  }
 }
 
 static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
@@ -2730,6 +3062,8 @@ static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
 
     switch (MO.getReg()) {
     case AMDGPU::VCC:
+    case AMDGPU::VCC_LO:
+    case AMDGPU::VCC_HI:
     case AMDGPU::M0:
     case AMDGPU::FLAT_SCR:
       return MO.getReg();
@@ -2746,10 +3080,12 @@ static bool shouldReadExec(const MachineInstr &MI) {
   if (SIInstrInfo::isVALU(MI)) {
     switch (MI.getOpcode()) {
     case AMDGPU::V_READLANE_B32:
-    case AMDGPU::V_READLANE_B32_si:
+    case AMDGPU::V_READLANE_B32_gfx6_gfx7:
+    case AMDGPU::V_READLANE_B32_gfx10:
     case AMDGPU::V_READLANE_B32_vi:
     case AMDGPU::V_WRITELANE_B32:
-    case AMDGPU::V_WRITELANE_B32_si:
+    case AMDGPU::V_WRITELANE_B32_gfx6_gfx7:
+    case AMDGPU::V_WRITELANE_B32_gfx10:
     case AMDGPU::V_WRITELANE_B32_vi:
       return false;
     }
@@ -2830,7 +3166,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
 
     switch (Desc.OpInfo[i].OperandType) {
     case MCOI::OPERAND_REGISTER:
-      if (MI.getOperand(i).isImm()) {
+      if (MI.getOperand(i).isImm() || MI.getOperand(i).isGlobal()) {
         ErrInfo = "Illegal immediate value for operand.";
         return false;
       }
@@ -2843,7 +3179,11 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
-    case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
+    case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+    case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+    case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
+    case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+    case AMDGPU::OPERAND_REG_INLINE_AC_FP16: {
       const MachineOperand &MO = MI.getOperand(i);
       if (!MO.isReg() && (!MO.isImm() || !isInlineConstant(MI, i))) {
         ErrInfo = "Illegal immediate value for operand.";
@@ -3022,9 +3362,12 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1)
       ++ConstantBusCount;
 
+    SmallVector<unsigned, 2> SGPRsUsed;
     unsigned SGPRUsed = findImplicitSGPRRead(MI);
-    if (SGPRUsed != AMDGPU::NoRegister)
+    if (SGPRUsed != AMDGPU::NoRegister) {
       ++ConstantBusCount;
+      SGPRsUsed.push_back(SGPRUsed);
+    }
 
     for (int OpIdx : OpIndices) {
       if (OpIdx == -1)
@@ -3032,23 +3375,37 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
       const MachineOperand &MO = MI.getOperand(OpIdx);
       if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) {
         if (MO.isReg()) {
-          if (MO.getReg() != SGPRUsed)
-            ++ConstantBusCount;
           SGPRUsed = MO.getReg();
+          if (llvm::all_of(SGPRsUsed, [this, SGPRUsed](unsigned SGPR) {
+                return !RI.regsOverlap(SGPRUsed, SGPR);
+              })) {
+            ++ConstantBusCount;
+            SGPRsUsed.push_back(SGPRUsed);
+          }
         } else {
           ++ConstantBusCount;
           ++LiteralCount;
         }
       }
     }
-    if (ConstantBusCount > 1) {
-      ErrInfo = "VOP* instruction uses the constant bus more than once";
+    const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+    // v_writelane_b32 is an exception from constant bus restriction:
+    // vsrc0 can be sgpr, const or m0 and lane select sgpr, m0 or inline-const
+    if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
+        Opcode != AMDGPU::V_WRITELANE_B32) {
+      ErrInfo = "VOP* instruction violates constant bus restriction";
       return false;
     }
 
     if (isVOP3(MI) && LiteralCount) {
-      ErrInfo = "VOP3 instruction uses literal";
-      return false;
+      if (LiteralCount && !ST.hasVOP3Literal()) {
+        ErrInfo = "VOP3 instruction uses literal";
+        return false;
+      }
+      if (LiteralCount > 1) {
+        ErrInfo = "VOP3 instruction uses more than one literal";
+        return false;
+      }
     }
   }
 
@@ -3067,17 +3424,43 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
     }
   }
 
+  if (isSOP2(MI) || isSOPC(MI)) {
+    const MachineOperand &Src0 = MI.getOperand(Src0Idx);
+    const MachineOperand &Src1 = MI.getOperand(Src1Idx);
+    unsigned Immediates = 0;
+
+    if (!Src0.isReg() &&
+        !isInlineConstant(Src0, Desc.OpInfo[Src0Idx].OperandType))
+      Immediates++;
+    if (!Src1.isReg() &&
+        !isInlineConstant(Src1, Desc.OpInfo[Src1Idx].OperandType))
+      Immediates++;
+
+    if (Immediates > 1) {
+      ErrInfo = "SOP2/SOPC instruction requires too many immediate constants";
+      return false;
+    }
+  }
+
   if (isSOPK(MI)) {
-    int64_t Imm = getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
-    if (sopkIsZext(MI)) {
-      if (!isUInt<16>(Imm)) {
-        ErrInfo = "invalid immediate for SOPK instruction";
+    auto Op = getNamedOperand(MI, AMDGPU::OpName::simm16);
+    if (Desc.isBranch()) {
+      if (!Op->isMBB()) {
+        ErrInfo = "invalid branch target for SOPK instruction";
         return false;
       }
     } else {
-      if (!isInt<16>(Imm)) {
-        ErrInfo = "invalid immediate for SOPK instruction";
-        return false;
+      uint64_t Imm = Op->getImm();
+      if (sopkIsZext(MI)) {
+        if (!isUInt<16>(Imm)) {
+          ErrInfo = "invalid immediate for SOPK instruction";
+          return false;
+        }
+      } else {
+        if (!isInt<16>(Imm)) {
+          ErrInfo = "invalid immediate for SOPK instruction";
+          return false;
+        }
       }
     }
   }
@@ -3155,6 +3538,53 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
     }
   }
 
+  if (isMIMG(MI)) {
+    const MachineOperand *DimOp = getNamedOperand(MI, AMDGPU::OpName::dim);
+    if (DimOp) {
+      int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
+                                                 AMDGPU::OpName::vaddr0);
+      int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
+      const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opcode);
+      const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
+          AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
+      const AMDGPU::MIMGDimInfo *Dim =
+          AMDGPU::getMIMGDimInfoByEncoding(DimOp->getImm());
+
+      if (!Dim) {
+        ErrInfo = "dim is out of range";
+        return false;
+      }
+
+      bool IsNSA = SRsrcIdx - VAddr0Idx > 1;
+      unsigned AddrWords = BaseOpcode->NumExtraArgs +
+                           (BaseOpcode->Gradients ? Dim->NumGradients : 0) +
+                           (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
+                           (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+
+      unsigned VAddrWords;
+      if (IsNSA) {
+        VAddrWords = SRsrcIdx - VAddr0Idx;
+      } else {
+        const TargetRegisterClass *RC = getOpRegClass(MI, VAddr0Idx);
+        VAddrWords = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 32;
+        if (AddrWords > 8)
+          AddrWords = 16;
+        else if (AddrWords > 4)
+          AddrWords = 8;
+        else if (AddrWords == 3 && VAddrWords == 4) {
+          // CodeGen uses the V4 variant of instructions for three addresses,
+          // because the selection DAG does not support non-power-of-two types.
+          AddrWords = 4;
+        }
+      }
+
+      if (VAddrWords != AddrWords) {
+        ErrInfo = "bad vaddr size";
+        return false;
+      }
+    }
+  }
+
   const MachineOperand *DppCt = getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl);
   if (DppCt) {
     using namespace AMDGPU::DPP;
@@ -3165,10 +3595,29 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
         (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
         (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
         (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
-        (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST)) {
+        (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
+        (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
       ErrInfo = "Invalid dpp_ctrl value";
       return false;
     }
+    if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
+        ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
+      ErrInfo = "Invalid dpp_ctrl value: "
+                "wavefront shifts are not supported on GFX10+";
+      return false;
+    }
+    if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
+        ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
+      ErrInfo = "Invalid dpp_ctrl value: "
+                "broadcats are not supported on GFX10+";
+      return false;
+    }
+    if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
+        ST.getGeneration() < AMDGPUSubtarget::GFX10) {
+      ErrInfo = "Invalid dpp_ctrl value: "
+                "row_share and row_xmask are not supported before GFX10";
+      return false;
+    }
   }
 
   return true;
@@ -3183,9 +3632,12 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
   case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
   case AMDGPU::WQM: return AMDGPU::WQM;
   case AMDGPU::WWM: return AMDGPU::WWM;
-  case AMDGPU::S_MOV_B32:
-    return MI.getOperand(1).isReg() ?
+  case AMDGPU::S_MOV_B32: {
+    const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+    return MI.getOperand(1).isReg() ||
+           RI.isAGPR(MRI, MI.getOperand(0).getReg()) ?
            AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
+  }
   case AMDGPU::S_ADD_I32:
     return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_I32_e32;
   case AMDGPU::S_ADDC_U32:
@@ -3199,7 +3651,9 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
   case AMDGPU::S_SUB_U32:
     return AMDGPU::V_SUB_I32_e32;
   case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
-  case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
+  case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_U32;
+  case AMDGPU::S_MUL_HI_U32: return AMDGPU::V_MUL_HI_U32;
+  case AMDGPU::S_MUL_HI_I32: return AMDGPU::V_MUL_HI_I32;
   case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
   case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
   case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
@@ -3244,6 +3698,8 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
   case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ;
   case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ;
   }
+  llvm_unreachable(
+      "Unexpected scalar opcode without corresponding vector one!");
 }
 
 const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
@@ -3263,30 +3719,21 @@ const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
   return RI.getRegClass(RCID);
 }
 
-bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
-  switch (MI.getOpcode()) {
-  case AMDGPU::COPY:
-  case AMDGPU::REG_SEQUENCE:
-  case AMDGPU::PHI:
-  case AMDGPU::INSERT_SUBREG:
-    return RI.hasVGPRs(getOpRegClass(MI, 0));
-  default:
-    return RI.hasVGPRs(getOpRegClass(MI, OpNo));
-  }
-}
-
 void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
   MachineBasicBlock::iterator I = MI;
   MachineBasicBlock *MBB = MI.getParent();
   MachineOperand &MO = MI.getOperand(OpIdx);
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+  const SIRegisterInfo *TRI =
+      static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
   unsigned RCID = get(MI.getOpcode()).OpInfo[OpIdx].RegClass;
   const TargetRegisterClass *RC = RI.getRegClass(RCID);
-  unsigned Opcode = AMDGPU::V_MOV_B32_e32;
+  unsigned Size = TRI->getRegSizeInBits(*RC);
+  unsigned Opcode = (Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO : AMDGPU::V_MOV_B32_e32;
   if (MO.isReg())
     Opcode = AMDGPU::COPY;
   else if (RI.isSGPRClass(RC))
-    Opcode = AMDGPU::S_MOV_B32;
+    Opcode = (Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
 
   const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
   if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
@@ -3396,37 +3843,53 @@ bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
     return isLegalRegOperand(MRI, OpInfo, MO);
 
   // Handle non-register types that are treated like immediates.
-  assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
+  assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal());
   return true;
 }
 
 bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
                                  const MachineOperand *MO) const {
-  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+  const MachineFunction &MF = *MI.getParent()->getParent();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
   const MCInstrDesc &InstDesc = MI.getDesc();
   const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const TargetRegisterClass *DefinedRC =
       OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
   if (!MO)
     MO = &MI.getOperand(OpIdx);
 
+  int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode());
+  int VOP3LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
   if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
+    if (isVOP3(MI) && isLiteralConstantLike(*MO, OpInfo) && !VOP3LiteralLimit--)
+      return false;
 
-    RegSubRegPair SGPRUsed;
+    SmallDenseSet<RegSubRegPair> SGPRsUsed;
     if (MO->isReg())
-      SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg());
+      SGPRsUsed.insert(RegSubRegPair(MO->getReg(), MO->getSubReg()));
 
     for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
       if (i == OpIdx)
         continue;
       const MachineOperand &Op = MI.getOperand(i);
       if (Op.isReg()) {
-        if ((Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) &&
+        RegSubRegPair SGPR(Op.getReg(), Op.getSubReg());
+        if (!SGPRsUsed.count(SGPR) &&
             usesConstantBus(MRI, Op, InstDesc.OpInfo[i])) {
-          return false;
+          if (--ConstantBusLimit <= 0)
+            return false;
+          SGPRsUsed.insert(SGPR);
         }
       } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) {
-        return false;
+        if (--ConstantBusLimit <= 0)
+          return false;
+      } else if (isVOP3(MI) && AMDGPU::isSISrcOperand(InstDesc, i) &&
+                 isLiteralConstantLike(Op, InstDesc.OpInfo[i])) {
+        if (!VOP3LiteralLimit--)
+          return false;
+        if (--ConstantBusLimit <= 0)
+          return false;
       }
     }
   }
@@ -3437,7 +3900,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
   }
 
   // Handle non-register types that are treated like immediates.
-  assert(MO->isImm() || MO->isTargetIndex() || MO->isFI());
+  assert(MO->isImm() || MO->isTargetIndex() || MO->isFI() || MO->isGlobal());
 
   if (!DefinedRC) {
     // This operand expects an immediate.
@@ -3452,30 +3915,24 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
   unsigned Opc = MI.getOpcode();
   const MCInstrDesc &InstrDesc = get(Opc);
 
+  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+  MachineOperand &Src0 = MI.getOperand(Src0Idx);
+
   int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
   MachineOperand &Src1 = MI.getOperand(Src1Idx);
 
   // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
-  // we need to only have one constant bus use.
-  //
-  // Note we do not need to worry about literal constants here. They are
-  // disabled for the operand type for instructions because they will always
-  // violate the one constant bus use rule.
+  // we need to only have one constant bus use before GFX10.
   bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister;
-  if (HasImplicitSGPR) {
-    int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
-    MachineOperand &Src0 = MI.getOperand(Src0Idx);
-
-    if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
-      legalizeOpWithMove(MI, Src0Idx);
-  }
+  if (HasImplicitSGPR && ST.getConstantBusLimit(Opc) <= 1 &&
+      Src0.isReg() && (RI.isSGPRReg(MRI, Src0.getReg()) ||
+       isLiteralConstantLike(Src0, InstrDesc.OpInfo[Src0Idx])))
+    legalizeOpWithMove(MI, Src0Idx);
 
   // Special case: V_WRITELANE_B32 accepts only immediate or SGPR operands for
   // both the value to write (src0) and lane select (src1).  Fix up non-SGPR
   // src0/src1 with V_READFIRSTLANE.
   if (Opc == AMDGPU::V_WRITELANE_B32) {
-    int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
-    MachineOperand &Src0 = MI.getOperand(Src0Idx);
     const DebugLoc &DL = MI.getDebugLoc();
     if (Src0.isReg() && RI.isVGPR(MRI, Src0.getReg())) {
       unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
@@ -3493,6 +3950,13 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
     return;
   }
 
+  // No VOP2 instructions support AGPRs.
+  if (Src0.isReg() && RI.isAGPR(MRI, Src0.getReg()))
+    legalizeOpWithMove(MI, Src0Idx);
+
+  if (Src1.isReg() && RI.isAGPR(MRI, Src1.getReg()))
+    legalizeOpWithMove(MI, Src1Idx);
+
   // VOP2 src0 instructions support all operand types, so we don't need to check
   // their legality. If src1 is already legal, we don't need to do anything.
   if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
@@ -3520,9 +3984,6 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
     return;
   }
 
-  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
-  MachineOperand &Src0 = MI.getOperand(Src0Idx);
-
   // If src0 can be used as src1, commuting will make the operands legal.
   // Otherwise we have to give up and insert a move.
   //
@@ -3556,12 +4017,11 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
 
   Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
   Src1.setSubReg(Src0SubReg);
+  fixImplicitOperands(MI);
 }
 
-// Legalize VOP3 operands. Because all operand types are supported for any
-// operand, and since literal constants are not allowed and should never be
-// seen, we only need to worry about inserting copies if we use multiple SGPR
-// operands.
+// Legalize VOP3 operands. All operand types are supported for any operand
+// but only one literal constant and only starting from GFX10.
 void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
                                        MachineInstr &MI) const {
   unsigned Opc = MI.getOpcode();
@@ -3572,8 +4032,35 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
     AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
   };
 
+  if (Opc == AMDGPU::V_PERMLANE16_B32 ||
+      Opc == AMDGPU::V_PERMLANEX16_B32) {
+    // src1 and src2 must be scalar
+    MachineOperand &Src1 = MI.getOperand(VOP3Idx[1]);
+    MachineOperand &Src2 = MI.getOperand(VOP3Idx[2]);
+    const DebugLoc &DL = MI.getDebugLoc();
+    if (Src1.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) {
+      unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+      BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
+        .add(Src1);
+      Src1.ChangeToRegister(Reg, false);
+    }
+    if (Src2.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src2.getReg()))) {
+      unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+      BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
+        .add(Src2);
+      Src2.ChangeToRegister(Reg, false);
+    }
+  }
+
   // Find the one SGPR operand we are allowed to use.
+  int ConstantBusLimit = ST.getConstantBusLimit(Opc);
+  int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
+  SmallDenseSet<unsigned> SGPRsUsed;
   unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
+  if (SGPRReg != AMDGPU::NoRegister) {
+    SGPRsUsed.insert(SGPRReg);
+    --ConstantBusLimit;
+  }
 
   for (unsigned i = 0; i < 3; ++i) {
     int Idx = VOP3Idx[i];
@@ -3581,16 +4068,38 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
       break;
     MachineOperand &MO = MI.getOperand(Idx);
 
-    // We should never see a VOP3 instruction with an illegal immediate operand.
-    if (!MO.isReg())
+    if (!MO.isReg()) {
+      if (!isLiteralConstantLike(MO, get(Opc).OpInfo[Idx]))
+        continue;
+
+      if (LiteralLimit > 0 && ConstantBusLimit > 0) {
+        --LiteralLimit;
+        --ConstantBusLimit;
+        continue;
+      }
+
+      --LiteralLimit;
+      --ConstantBusLimit;
+      legalizeOpWithMove(MI, Idx);
       continue;
+    }
+
+    if (RI.hasAGPRs(MRI.getRegClass(MO.getReg())) &&
+        !isOperandLegal(MI, Idx, &MO)) {
+      legalizeOpWithMove(MI, Idx);
+      continue;
+    }
 
     if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
       continue; // VGPRs are legal
 
-    if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
-      SGPRReg = MO.getReg();
-      // We can use one SGPR in each VOP3 instruction.
+    // We can use one SGPR in each VOP3 instruction prior to GFX10
+    // and two starting from GFX10.
+    if (SGPRsUsed.count(MO.getReg()))
+      continue;
+    if (ConstantBusLimit > 0) {
+      SGPRsUsed.insert(MO.getReg());
+      --ConstantBusLimit;
       continue;
     }
 
@@ -3607,6 +4116,15 @@ unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
   unsigned DstReg = MRI.createVirtualRegister(SRC);
   unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
 
+  if (RI.hasAGPRs(VRC)) {
+    VRC = RI.getEquivalentVGPRClass(VRC);
+    unsigned NewSrcReg = MRI.createVirtualRegister(VRC);
+    BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
+            get(TargetOpcode::COPY), NewSrcReg)
+        .addReg(SrcReg);
+    SrcReg = NewSrcReg;
+  }
+
   if (SubRegs == 1) {
     BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
             get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
@@ -3691,15 +4209,27 @@ static void
 emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
                           MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB,
                           const DebugLoc &DL, MachineOperand &Rsrc) {
+  MachineFunction &MF = *OrigBB.getParent();
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+  unsigned SaveExecOpc =
+      ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
+  unsigned XorTermOpc =
+      ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
+  unsigned AndOpc =
+      ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
+  const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
+
   MachineBasicBlock::iterator I = LoopBB.begin();
 
   unsigned VRsrc = Rsrc.getReg();
   unsigned VRsrcUndef = getUndefRegState(Rsrc.isUndef());
 
-  unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
-  unsigned CondReg0 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
-  unsigned CondReg1 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
-  unsigned AndCond = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  unsigned SaveExec = MRI.createVirtualRegister(BoolXExecRC);
+  unsigned CondReg0 = MRI.createVirtualRegister(BoolXExecRC);
+  unsigned CondReg1 = MRI.createVirtualRegister(BoolXExecRC);
+  unsigned AndCond = MRI.createVirtualRegister(BoolXExecRC);
   unsigned SRsrcSub0 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
   unsigned SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
   unsigned SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
@@ -3737,22 +4267,22 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
   BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), CondReg1)
       .addReg(SRsrc, 0, AMDGPU::sub2_sub3)
       .addReg(VRsrc, 0, AMDGPU::sub2_sub3);
-  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_AND_B64), AndCond)
+  BuildMI(LoopBB, I, DL, TII.get(AndOpc), AndCond)
       .addReg(CondReg0)
       .addReg(CondReg1);
 
   MRI.setSimpleHint(SaveExec, AndCond);
 
   // Update EXEC to matching lanes, saving original to SaveExec.
-  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_AND_SAVEEXEC_B64), SaveExec)
+  BuildMI(LoopBB, I, DL, TII.get(SaveExecOpc), SaveExec)
       .addReg(AndCond, RegState::Kill);
 
   // The original instruction is here; we insert the terminators after it.
   I = LoopBB.end();
 
   // Update EXEC, switch all done bits to 0 and all todo bits to 1.
-  BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC)
-      .addReg(AMDGPU::EXEC)
+  BuildMI(LoopBB, I, DL, TII.get(XorTermOpc), Exec)
+      .addReg(Exec)
       .addReg(SaveExec);
   BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_CBRANCH_EXECNZ)).addMBB(&LoopBB);
 }
@@ -3763,15 +4293,19 @@ static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
                               MachineOperand &Rsrc, MachineDominatorTree *MDT) {
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
   MachineRegisterInfo &MRI = MF.getRegInfo();
   MachineBasicBlock::iterator I(&MI);
   const DebugLoc &DL = MI.getDebugLoc();
+  unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+  unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
+  const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
 
-  unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+  unsigned SaveExec = MRI.createVirtualRegister(BoolXExecRC);
 
   // Save the EXEC mask
-  BuildMI(MBB, I, DL, TII.get(AMDGPU::S_MOV_B64), SaveExec)
-      .addReg(AMDGPU::EXEC);
+  BuildMI(MBB, I, DL, TII.get(MovExecOpc), SaveExec).addReg(Exec);
 
   // Killed uses in the instruction we are waterfalling around will be
   // incorrect due to the added control-flow.
@@ -3820,8 +4354,7 @@ static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
 
   // Restore the EXEC mask
   MachineBasicBlock::iterator First = RemainderBB->begin();
-  BuildMI(*RemainderBB, First, DL, TII.get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
-      .addReg(SaveExec);
+  BuildMI(*RemainderBB, First, DL, TII.get(MovExecOpc), Exec).addReg(SaveExec);
 }
 
 // Extract pointer from Rsrc and return a zero-value Rsrc replacement.
@@ -3901,7 +4434,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
         continue;
       const TargetRegisterClass *OpRC =
           MRI.getRegClass(MI.getOperand(i).getReg());
-      if (RI.hasVGPRs(OpRC)) {
+      if (RI.hasVectorRegisters(OpRC)) {
         VRC = OpRC;
       } else {
         SRC = OpRC;
@@ -3914,7 +4447,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
     if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) {
       if (!VRC) {
         assert(SRC);
-        VRC = RI.getEquivalentVGPRClass(SRC);
+        VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) ? RI.getEquivalentAGPRClass(SRC)
+                                                : RI.getEquivalentVGPRClass(SRC);
       }
       RC = VRC;
     } else {
@@ -3983,7 +4517,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
   // Legalize SI_INIT_M0
   if (MI.getOpcode() == AMDGPU::SI_INIT_M0) {
     MachineOperand &Src = MI.getOperand(0);
-    if (Src.isReg() && RI.hasVGPRs(MRI.getRegClass(Src.getReg())))
+    if (Src.isReg() && RI.hasVectorRegisters(MRI.getRegClass(Src.getReg())))
       Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI));
     return;
   }
@@ -4047,19 +4581,28 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
       unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
       unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
 
+      const auto *BoolXExecRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
+      unsigned CondReg0 = MRI.createVirtualRegister(BoolXExecRC);
+      unsigned CondReg1 = MRI.createVirtualRegister(BoolXExecRC);
+
       unsigned RsrcPtr, NewSRsrc;
       std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc);
 
       // NewVaddrLo = RsrcPtr:sub0 + VAddr:sub0
-      DebugLoc DL = MI.getDebugLoc();
-      BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
-          .addReg(RsrcPtr, 0, AMDGPU::sub0)
-          .addReg(VAddr->getReg(), 0, AMDGPU::sub0);
+      const DebugLoc &DL = MI.getDebugLoc();
+      BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e64), NewVAddrLo)
+        .addDef(CondReg0)
+        .addReg(RsrcPtr, 0, AMDGPU::sub0)
+        .addReg(VAddr->getReg(), 0, AMDGPU::sub0)
+        .addImm(0);
 
       // NewVaddrHi = RsrcPtr:sub1 + VAddr:sub1
-      BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
-          .addReg(RsrcPtr, 0, AMDGPU::sub1)
-          .addReg(VAddr->getReg(), 0, AMDGPU::sub1);
+      BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e64), NewVAddrHi)
+        .addDef(CondReg1, RegState::Dead)
+        .addReg(RsrcPtr, 0, AMDGPU::sub1)
+        .addReg(VAddr->getReg(), 0, AMDGPU::sub1)
+        .addReg(CondReg0, RegState::Kill)
+        .addImm(0);
 
       // NewVaddr = {NewVaddrHi, NewVaddrLo}
       BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
@@ -4106,6 +4649,10 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
                 getNamedOperand(MI, AMDGPU::OpName::glc)) {
           MIB.addImm(GLC->getImm());
         }
+        if (const MachineOperand *DLC =
+                getNamedOperand(MI, AMDGPU::OpName::dlc)) {
+          MIB.addImm(DLC->getImm());
+        }
 
         MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc));
 
@@ -4235,37 +4782,37 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
       continue;
 
     case AMDGPU::S_LSHL_B32:
-      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+      if (ST.hasOnlyRevVALUShifts()) {
         NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
         swapOperands(Inst);
       }
       break;
     case AMDGPU::S_ASHR_I32:
-      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+      if (ST.hasOnlyRevVALUShifts()) {
         NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
         swapOperands(Inst);
       }
       break;
     case AMDGPU::S_LSHR_B32:
-      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+      if (ST.hasOnlyRevVALUShifts()) {
         NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
         swapOperands(Inst);
       }
       break;
     case AMDGPU::S_LSHL_B64:
-      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+      if (ST.hasOnlyRevVALUShifts()) {
         NewOpcode = AMDGPU::V_LSHLREV_B64;
         swapOperands(Inst);
       }
       break;
     case AMDGPU::S_ASHR_I64:
-      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+      if (ST.hasOnlyRevVALUShifts()) {
         NewOpcode = AMDGPU::V_ASHRREV_I64;
         swapOperands(Inst);
       }
       break;
     case AMDGPU::S_LSHR_B64:
-      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+      if (ST.hasOnlyRevVALUShifts()) {
         NewOpcode = AMDGPU::V_LSHRREV_B64;
         swapOperands(Inst);
       }
@@ -4279,10 +4826,16 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
     case AMDGPU::S_CBRANCH_SCC0:
     case AMDGPU::S_CBRANCH_SCC1:
       // Clear unused bits of vcc
-      BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64),
-              AMDGPU::VCC)
-          .addReg(AMDGPU::EXEC)
-          .addReg(AMDGPU::VCC);
+      if (ST.isWave32())
+        BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B32),
+                AMDGPU::VCC_LO)
+            .addReg(AMDGPU::EXEC_LO)
+            .addReg(AMDGPU::VCC_LO);
+      else
+        BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64),
+                AMDGPU::VCC)
+            .addReg(AMDGPU::EXEC)
+            .addReg(AMDGPU::VCC);
       break;
 
     case AMDGPU::S_BFE_U64:
@@ -4339,8 +4892,10 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
     for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) {
       MachineOperand &Op = Inst.getOperand(i);
       if (Op.isReg() && Op.getReg() == AMDGPU::SCC) {
+        // Only propagate through live-def of SCC.
+        if (Op.isDef() && !Op.isDead())
+          addSCCDefUsersToVALUWorklist(Op, Inst, Worklist);
         Inst.RemoveOperand(i);
-        addSCCDefUsersToVALUWorklist(Inst, Worklist);
       }
     }
 
@@ -4358,6 +4913,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
     }
 
     Inst.addImplicitDefUseOperands(*Inst.getParent()->getParent());
+    fixImplicitOperands(Inst);
 
     if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
       const MachineOperand &OffsetWidthOp = Inst.getOperand(2);
@@ -4445,6 +5001,7 @@ bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
     Inst.RemoveOperand(3);
 
     Inst.setDesc(get(NewOpc));
+    Inst.addOperand(MachineOperand::CreateImm(0)); // clamp bit
     Inst.addImplicitDefUseOperands(*MBB.getParent());
     MRI.replaceRegWith(OldDstReg, ResultReg);
     legalizeOperands(Inst, MDT);
@@ -4514,8 +5071,7 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
                       RI.isSGPRClass(MRI.getRegClass(Src0.getReg()));
     bool Src1IsSGPR = Src1.isReg() &&
                       RI.isSGPRClass(MRI.getRegClass(Src1.getReg()));
-    MachineInstr *Not = nullptr;
-    MachineInstr *Xor = nullptr;
+    MachineInstr *Xor;
     unsigned Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
     unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
 
@@ -4523,14 +5079,12 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
     // The next iteration over the work list will lower these to the vector
     // unit as necessary.
     if (Src0IsSGPR) {
-      Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp)
-        .add(Src0);
+      BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp).add(Src0);
       Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest)
       .addReg(Temp)
       .add(Src1);
     } else if (Src1IsSGPR) {
-      Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp)
-        .add(Src1);
+      BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp).add(Src1);
       Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest)
       .add(Src0)
       .addReg(Temp);
@@ -4538,8 +5092,8 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
       Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), Temp)
         .add(Src0)
         .add(Src1);
-      Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest)
-        .addReg(Temp);
+      MachineInstr *Not =
+          BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest).addReg(Temp);
       Worklist.insert(Not);
     }
 
@@ -4670,13 +5224,14 @@ void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
 
   MachineBasicBlock &MBB = *Inst.getParent();
   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  const auto *CarryRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
 
   unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
   unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
-  unsigned CarryReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
-  unsigned DeadCarryReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+  unsigned CarryReg = MRI.createVirtualRegister(CarryRC);
+  unsigned DeadCarryReg = MRI.createVirtualRegister(CarryRC);
 
   MachineOperand &Dest = Inst.getOperand(0);
   MachineOperand &Src0 = Inst.getOperand(1);
@@ -4705,7 +5260,8 @@ void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
     BuildMI(MBB, MII, DL, get(LoOpc), DestSub0)
     .addReg(CarryReg, RegState::Define)
     .add(SrcReg0Sub0)
-    .add(SrcReg1Sub0);
+    .add(SrcReg1Sub0)
+    .addImm(0); // clamp bit
 
   unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
   MachineInstr *HiHalf =
@@ -4713,7 +5269,8 @@ void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
     .addReg(DeadCarryReg, RegState::Define | RegState::Dead)
     .add(SrcReg0Sub1)
     .add(SrcReg1Sub1)
-    .addReg(CarryReg, RegState::Kill);
+    .addReg(CarryReg, RegState::Kill)
+    .addImm(0); // clamp bit
 
   BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
     .addReg(DestSub0)
@@ -4943,7 +5500,23 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
   for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
          E = MRI.use_end(); I != E;) {
     MachineInstr &UseMI = *I->getParent();
-    if (!canReadVGPR(UseMI, I.getOperandNo())) {
+
+    unsigned OpNo = 0;
+
+    switch (UseMI.getOpcode()) {
+    case AMDGPU::COPY:
+    case AMDGPU::WQM:
+    case AMDGPU::WWM:
+    case AMDGPU::REG_SEQUENCE:
+    case AMDGPU::PHI:
+    case AMDGPU::INSERT_SUBREG:
+      break;
+    default:
+      OpNo = I.getOperandNo();
+      break;
+    }
+
+    if (!RI.hasVectorRegisters(getOpRegClass(UseMI, OpNo))) {
       Worklist.insert(&UseMI);
 
       do {
@@ -5017,19 +5590,23 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
   addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
 }
 
-void SIInstrInfo::addSCCDefUsersToVALUWorklist(
-    MachineInstr &SCCDefInst, SetVectorType &Worklist) const {
+void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op,
+                                               MachineInstr &SCCDefInst,
+                                               SetVectorType &Worklist) const {
+  // Ensure that def inst defines SCC, which is still live.
+  assert(Op.isReg() && Op.getReg() == AMDGPU::SCC && Op.isDef() &&
+         !Op.isDead() && Op.getParent() == &SCCDefInst);
   // This assumes that all the users of SCC are in the same block
   // as the SCC def.
-  for (MachineInstr &MI :
-       make_range(MachineBasicBlock::iterator(SCCDefInst),
-                      SCCDefInst.getParent()->end())) {
+  for (MachineInstr &MI : // Skip the def inst itself.
+       make_range(std::next(MachineBasicBlock::iterator(SCCDefInst)),
+                  SCCDefInst.getParent()->end())) {
+    // Check if SCC is used first.
+    if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1)
+      Worklist.insert(&MI);
     // Exit if we find another SCC def.
     if (MI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != -1)
       return;
-
-    if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1)
-      Worklist.insert(&MI);
   }
 }
 
@@ -5046,14 +5623,26 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
   case AMDGPU::REG_SEQUENCE:
   case AMDGPU::INSERT_SUBREG:
   case AMDGPU::WQM:
-  case AMDGPU::WWM:
-    if (RI.hasVGPRs(NewDstRC))
-      return nullptr;
+  case AMDGPU::WWM: {
+    const TargetRegisterClass *SrcRC = getOpRegClass(Inst, 1);
+    if (RI.hasAGPRs(SrcRC)) {
+      if (RI.hasAGPRs(NewDstRC))
+        return nullptr;
+
+      NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
+      if (!NewDstRC)
+        return nullptr;
+    } else {
+       if (RI.hasVGPRs(NewDstRC))
+        return nullptr;
+
+      NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
+      if (!NewDstRC)
+        return nullptr;
+    }
 
-    NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
-    if (!NewDstRC)
-      return nullptr;
     return NewDstRC;
+  }
   default:
     return NewDstRC;
   }
@@ -5139,6 +5728,12 @@ MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
 }
 
 uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
+  if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
+    return (22ULL << 44) | // IMG_FORMAT_32_FLOAT
+           (1ULL << 56) | // RESOURCE_LEVEL = 1
+           (3ULL << 60); // OOB_SELECT = 3
+  }
+
   uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
   if (ST.isAmdHsaOS()) {
     // Set ATC = 1. GFX9 doesn't have this bit.
@@ -5165,12 +5760,14 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
     Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT;
   }
 
-  // IndexStride = 64.
-  Rsrc23 |= UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT;
+  // IndexStride = 64 / 32.
+  uint64_t IndexStride = ST.getWavefrontSize() == 64 ? 3 : 2;
+  Rsrc23 |= IndexStride << AMDGPU::RSRC_INDEX_STRIDE_SHIFT;
 
   // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
   // Clear them unless we want a huge stride.
-  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
+      ST.getGeneration() <= AMDGPUSubtarget::GFX9)
     Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
 
   return Rsrc23;
@@ -5267,25 +5864,35 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
       return DescSize; // No operands.
 
     if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
-      return DescSize + 4;
+      return isVOP3(MI) ? 12 : (DescSize + 4);
 
     int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
     if (Src1Idx == -1)
       return DescSize;
 
     if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
-      return DescSize + 4;
+      return isVOP3(MI) ? 12 : (DescSize + 4);
 
     int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
     if (Src2Idx == -1)
       return DescSize;
 
     if (isLiteralConstantLike(MI.getOperand(Src2Idx), Desc.OpInfo[Src2Idx]))
-      return DescSize + 4;
+      return isVOP3(MI) ? 12 : (DescSize + 4);
 
     return DescSize;
   }
 
+  // Check whether we have extra NSA words.
+  if (isMIMG(MI)) {
+    int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
+    if (VAddr0Idx < 0)
+      return 8;
+
+    int RSrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
+    return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
+  }
+
   switch (Opc) {
   case TargetOpcode::IMPLICIT_DEF:
   case TargetOpcode::KILL:
@@ -5294,10 +5901,12 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
     return 0;
   case TargetOpcode::BUNDLE:
     return getInstBundleSize(MI);
-  case TargetOpcode::INLINEASM: {
+  case TargetOpcode::INLINEASM:
+  case TargetOpcode::INLINEASM_BR: {
     const MachineFunction *MF = MI.getParent()->getParent();
     const char *AsmStr = MI.getOperand(0).getSymbolName();
-    return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+    return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo(),
+                              &MF->getSubtarget());
   }
   default:
     return DescSize;
@@ -5332,7 +5941,7 @@ void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
   MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo();
 
   if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
-    unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+    unsigned DstReg = MRI.createVirtualRegister(RI.getBoolRC());
     MachineInstr *SIIF =
         BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg)
             .add(Branch->getOperand(0))
@@ -5359,8 +5968,8 @@ void SIInstrInfo::convertNonUniformLoopRegion(
 
   if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
 
-    unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
-    unsigned BackEdgeReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+    unsigned DstReg = MRI.createVirtualRegister(RI.getBoolRC());
+    unsigned BackEdgeReg = MRI.createVirtualRegister(RI.getBoolRC());
     MachineInstrBuilder HeaderPHIBuilder =
         BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg);
     for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(),
@@ -5370,7 +5979,7 @@ void SIInstrInfo::convertNonUniformLoopRegion(
         HeaderPHIBuilder.addReg(BackEdgeReg);
       } else {
         MachineBasicBlock *PMBB = *PI;
-        unsigned ZeroReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+        unsigned ZeroReg = MRI.createVirtualRegister(RI.getBoolRC());
         materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(),
                              ZeroReg, 0);
         HeaderPHIBuilder.addReg(ZeroReg);
@@ -5432,7 +6041,9 @@ SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
     { MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" },
     { MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" },
     { MO_REL32_LO, "amdgpu-rel32-lo" },
-    { MO_REL32_HI, "amdgpu-rel32-hi" }
+    { MO_REL32_HI, "amdgpu-rel32-hi" },
+    { MO_ABS32_LO, "amdgpu-abs32-lo" },
+    { MO_ABS32_HI, "amdgpu-abs32-hi" },
   };
 
   return makeArrayRef(TargetFlags);
@@ -5452,8 +6063,8 @@ SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
     return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg);
 
   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
-  unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
-  MRI.setRegAllocationHint(UnusedCarry, 0, AMDGPU::VCC);
+  unsigned UnusedCarry = MRI.createVirtualRegister(RI.getBoolRC());
+  MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
 
   return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
            .addReg(UnusedCarry, RegState::Define | RegState::Dead);
@@ -5480,6 +6091,20 @@ const MCInstrDesc &SIInstrInfo::getKillTerminatorFromPseudo(unsigned Opcode) con
   }
 }
 
+void SIInstrInfo::fixImplicitOperands(MachineInstr &MI) const {
+  MachineBasicBlock *MBB = MI.getParent();
+  MachineFunction *MF = MBB->getParent();
+  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+
+  if (!ST.isWave32())
+    return;
+
+  for (auto &Op : MI.implicit_operands()) {
+    if (Op.isReg() && Op.getReg() == AMDGPU::VCC)
+      Op.setReg(AMDGPU::VCC_LO);
+  }
+}
+
 bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const {
   if (!isSMRD(MI))
     return false;
@@ -5493,6 +6118,25 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const {
   return RCID == AMDGPU::SReg_128RegClassID;
 }
 
+bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
+                                    bool Signed) const {
+  // TODO: Should 0 be special cased?
+  if (!ST.hasFlatInstOffsets())
+    return false;
+
+  if (ST.hasFlatSegmentOffsetBug() && AddrSpace == AMDGPUAS::FLAT_ADDRESS)
+    return false;
+
+  if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
+    return (Signed && isInt<12>(Offset)) ||
+           (!Signed && isUInt<11>(Offset));
+  }
+
+  return (Signed && isInt<13>(Offset)) ||
+         (!Signed && isUInt<12>(Offset));
+}
+
+
 // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
 enum SIEncodingFamily {
   SI = 0,
@@ -5500,7 +6144,9 @@ enum SIEncodingFamily {
   SDWA = 2,
   SDWA9 = 3,
   GFX80 = 4,
-  GFX9 = 5
+  GFX9 = 5,
+  GFX10 = 6,
+  SDWA10 = 7
 };
 
 static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
@@ -5513,6 +6159,8 @@ static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
   case AMDGPUSubtarget::VOLCANIC_ISLANDS:
   case AMDGPUSubtarget::GFX9:
     return SIEncodingFamily::VI;
+  case AMDGPUSubtarget::GFX10:
+    return SIEncodingFamily::GFX10;
   }
   llvm_unreachable("Unknown subtarget generation!");
 }
@@ -5521,18 +6169,29 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
   SIEncodingFamily Gen = subtargetEncodingFamily(ST);
 
   if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
-    ST.getGeneration() >= AMDGPUSubtarget::GFX9)
+    ST.getGeneration() == AMDGPUSubtarget::GFX9)
     Gen = SIEncodingFamily::GFX9;
 
-  if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
-    Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
-                                                      : SIEncodingFamily::SDWA;
   // Adjust the encoding family to GFX80 for D16 buffer instructions when the
   // subtarget has UnpackedD16VMem feature.
   // TODO: remove this when we discard GFX80 encoding.
   if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf))
     Gen = SIEncodingFamily::GFX80;
 
+  if (get(Opcode).TSFlags & SIInstrFlags::SDWA) {
+    switch (ST.getGeneration()) {
+    default:
+      Gen = SIEncodingFamily::SDWA;
+      break;
+    case AMDGPUSubtarget::GFX9:
+      Gen = SIEncodingFamily::SDWA9;
+      break;
+    case AMDGPUSubtarget::GFX10:
+      Gen = SIEncodingFamily::SDWA10;
+      break;
+    }
+  }
+
   int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
 
   // -1 means that Opcode is already a native instruction.
@@ -5627,3 +6286,77 @@ MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
   }
   return nullptr;
 }
+
+bool llvm::execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
+                                      Register VReg,
+                                      const MachineInstr &DefMI,
+                                      const MachineInstr &UseMI) {
+  assert(MRI.isSSA() && "Must be run on SSA");
+
+  auto *TRI = MRI.getTargetRegisterInfo();
+  auto *DefBB = DefMI.getParent();
+
+  // Don't bother searching between blocks, although it is possible this block
+  // doesn't modify exec.
+  if (UseMI.getParent() != DefBB)
+    return true;
+
+  const int MaxInstScan = 20;
+  int NumInst = 0;
+
+  // Stop scan at the use.
+  auto E = UseMI.getIterator();
+  for (auto I = std::next(DefMI.getIterator()); I != E; ++I) {
+    if (I->isDebugInstr())
+      continue;
+
+    if (++NumInst > MaxInstScan)
+      return true;
+
+    if (I->modifiesRegister(AMDGPU::EXEC, TRI))
+      return true;
+  }
+
+  return false;
+}
+
+bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
+                                         Register VReg,
+                                         const MachineInstr &DefMI) {
+  assert(MRI.isSSA() && "Must be run on SSA");
+
+  auto *TRI = MRI.getTargetRegisterInfo();
+  auto *DefBB = DefMI.getParent();
+
+  const int MaxUseInstScan = 10;
+  int NumUseInst = 0;
+
+  for (auto &UseInst : MRI.use_nodbg_instructions(VReg)) {
+    // Don't bother searching between blocks, although it is possible this block
+    // doesn't modify exec.
+    if (UseInst.getParent() != DefBB)
+      return true;
+
+    if (++NumUseInst > MaxUseInstScan)
+      return true;
+  }
+
+  const int MaxInstScan = 20;
+  int NumInst = 0;
+
+  // Stop scan when we have seen all the uses.
+  for (auto I = std::next(DefMI.getIterator()); ; ++I) {
+    if (I->isDebugInstr())
+      continue;
+
+    if (++NumInst > MaxInstScan)
+      return true;
+
+    if (I->readsRegister(VReg))
+      if (--NumUseInst == 0)
+        return false;
+
+    if (I->modifiesRegister(AMDGPU::EXEC, TRI))
+      return true;
+  }
+}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h
index 5b1a05f3785e..3ff35da0b963 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1,9 +1,8 @@
 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -121,14 +120,15 @@ private:
   void addUsersToMoveToVALUWorklist(unsigned Reg, MachineRegisterInfo &MRI,
                                     SetVectorType &Worklist) const;
 
-  void
-  addSCCDefUsersToVALUWorklist(MachineInstr &SCCDefInst,
-                               SetVectorType &Worklist) const;
+  void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
+                                    MachineInstr &SCCDefInst,
+                                    SetVectorType &Worklist) const;
 
   const TargetRegisterClass *
   getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
 
-  bool checkInstOffsetsDoNotOverlap(MachineInstr &MIa, MachineInstr &MIb) const;
+  bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
+                                    const MachineInstr &MIb) const;
 
   unsigned findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
 
@@ -143,7 +143,7 @@ protected:
 
 public:
   enum TargetOperandFlags {
-    MO_MASK = 0x7,
+    MO_MASK = 0xf,
 
     MO_NONE = 0,
     // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
@@ -157,7 +157,13 @@ public:
     MO_REL32 = 4,
     MO_REL32_LO = 4,
     // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
-    MO_REL32_HI = 5
+    MO_REL32_HI = 5,
+
+    MO_LONG_BRANCH_FORWARD = 6,
+    MO_LONG_BRANCH_BACKWARD = 7,
+
+    MO_ABS32_LO = 8,
+    MO_ABS32_HI = 9,
   };
 
   explicit SIInstrInfo(const GCNSubtarget &ST);
@@ -173,11 +179,13 @@ public:
                                int64_t &Offset1,
                                int64_t &Offset2) const override;
 
-  bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
+  bool getMemOperandWithOffset(const MachineInstr &LdSt,
+                               const MachineOperand *&BaseOp,
                                int64_t &Offset,
                                const TargetRegisterInfo *TRI) const final;
 
-  bool shouldClusterMemOps(MachineOperand &BaseOp1, MachineOperand &BaseOp2,
+  bool shouldClusterMemOps(const MachineOperand &BaseOp1,
+                           const MachineOperand &BaseOp2,
                            unsigned NumLoads) const override;
 
   bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
@@ -294,7 +302,8 @@ public:
              unsigned Kind) const override;
 
   bool
-  areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
+  areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+                                  const MachineInstr &MIb,
                                   AliasAnalysis *AA = nullptr) const override;
 
   bool isFoldableCopy(const MachineInstr &MI) const;
@@ -376,6 +385,14 @@ public:
     return get(Opcode).TSFlags & SIInstrFlags::SOPP;
   }
 
+  static bool isPacked(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::IsPacked;
+  }
+
+  bool isPacked(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::IsPacked;
+  }
+
   static bool isVOP1(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
   }
@@ -450,6 +467,8 @@ public:
     return get(Opcode).TSFlags & SIInstrFlags::DS;
   }
 
+  bool isAlwaysGDS(uint16_t Opcode) const;
+
   static bool isMIMG(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
   }
@@ -477,6 +496,11 @@ public:
     return (Flags & SIInstrFlags::FLAT) && !(Flags & SIInstrFlags::LGKM_CNT);
   }
 
+  // FIXME: Make this more precise
+  static bool isFLATScratch(const MachineInstr &MI) {
+    return isSegmentSpecificFLAT(MI);
+  }
+
   // Any FLAT encoded instruction, including global_* and scratch_*.
   bool isFLAT(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::FLAT;
@@ -546,6 +570,14 @@ public:
     return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
   }
 
+  static bool isMAI(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::IsMAI;
+  }
+
+  bool isMAI(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::IsMAI;
+  }
+
   static bool isScalarUnit(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
   }
@@ -612,6 +644,14 @@ public:
     return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
   }
 
+  static bool isFPAtomic(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic;
+  }
+
+  bool isFPAtomic(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
+  }
+
   bool isVGPRCopy(const MachineInstr &MI) const {
     assert(MI.isCopy());
     unsigned Dest = MI.getOperand(0).getReg();
@@ -620,9 +660,21 @@ public:
     return !RI.isSGPRReg(MRI, Dest);
   }
 
+  bool hasVGPRUses(const MachineInstr &MI) const {
+    const MachineFunction &MF = *MI.getParent()->getParent();
+    const MachineRegisterInfo &MRI = MF.getRegInfo();
+    return llvm::any_of(MI.explicit_uses(),
+                        [&MRI, this](const MachineOperand &MO) {
+      return MO.isReg() && RI.isVGPR(MRI, MO.getReg());});
+  }
+
   /// Whether we must prevent this instruction from executing with EXEC = 0.
   bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
 
+  /// Returns true if the instruction could potentially depend on the value of
+  /// exec. If false, exec dependencies may safely be ignored.
+  bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const;
+
   bool isInlineConstant(const APInt &Imm) const;
 
   bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
@@ -761,10 +813,6 @@ public:
     return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
   }
 
-  /// \returns true if it is legal for the operand at index \p OpNo
-  /// to read a VGPR.
-  bool canReadVGPR(const MachineInstr &MI, unsigned OpNo) const;
-
   /// Legalize the \p OpIndex operand of this instruction by inserting
   /// a MOV.  For example:
   /// ADD_I32_e32 VGPR0, 15
@@ -836,7 +884,7 @@ public:
   void insertReturn(MachineBasicBlock &MBB) const;
   /// Return the number of wait states that result from executing this
   /// instruction.
-  unsigned getNumWaitStates(const MachineInstr &MI) const;
+  static unsigned getNumWaitStates(const MachineInstr &MI);
 
   /// Returns the operand named \p Op.  If \p MI does not have an
   /// operand named \c Op, this function returns nullptr.
@@ -922,10 +970,27 @@ public:
     return isUInt<12>(Imm);
   }
 
+  /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
+  /// encoded instruction. If \p Signed, this is for an instruction that
+  /// interprets the offset as signed.
+  bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
+                         bool Signed) const;
+
   /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
   /// Return -1 if the target-specific opcode for the pseudo instruction does
   /// not exist. If Opcode is not a pseudo instruction, this is identity.
   int pseudoToMCOpcode(int Opcode) const;
+
+  const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
+                                         const TargetRegisterInfo *TRI,
+                                         const MachineFunction &MF)
+    const override {
+    if (OpNum >= TID.getNumOperands())
+      return nullptr;
+    return RI.getRegClass(TID.OpInfo[OpNum].RegClass);
+  }
+
+  void fixImplicitOperands(MachineInstr &MI) const;
 };
 
 /// \brief Returns true if a reg:subreg pair P has a TRC class
@@ -956,6 +1021,21 @@ TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
                                MachineRegisterInfo &MRI);
 
+/// \brief Return false if EXEC is not changed between the def of \p VReg at \p
+/// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
+/// attempt to track between blocks.
+bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
+                                Register VReg,
+                                const MachineInstr &DefMI,
+                                const MachineInstr &UseMI);
+
+/// \brief Return false if EXEC is not changed between the def of \p VReg at \p
+/// DefMI and all its uses. Should be run on SSA. Currently does not attempt to
+/// track between blocks.
+bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
+                                   Register VReg,
+                                   const MachineInstr &DefMI);
+
 namespace AMDGPU {
 
   LLVM_READONLY
@@ -1003,17 +1083,14 @@ namespace AMDGPU {
   LLVM_READONLY
   int getGlobalSaddrOp(uint16_t Opcode);
 
+  LLVM_READONLY
+  int getVCMPXNoSDstOp(uint16_t Opcode);
+
   const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
   const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
   const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
   const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
 
-  // For MachineOperands.
-  enum TargetFlags {
-    TF_LONG_BRANCH_FORWARD = 1 << 0,
-    TF_LONG_BRANCH_BACKWARD = 1 << 1
-  };
-
 } // end namespace AMDGPU
 
 namespace SI {
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index 13afa4d4974b..c382c816e0b4 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1,25 +1,21 @@
 //===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-def isCI : Predicate<"Subtarget->getGeneration() "
-                      ">= AMDGPUSubtarget::SEA_ISLANDS">;
-def isCIOnly : Predicate<"Subtarget->getGeneration() =="
-                         "AMDGPUSubtarget::SEA_ISLANDS">,
-  AssemblerPredicate <"FeatureSeaIslands">;
-def isVIOnly : Predicate<"Subtarget->getGeneration() =="
-                         "AMDGPUSubtarget::VOLCANIC_ISLANDS">,
-  AssemblerPredicate <"FeatureVolcanicIslands">;
+
+def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">,
+  AssemblerPredicate <"FeatureWavefrontSize32">;
+def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">,
+  AssemblerPredicate <"FeatureWavefrontSize64">;
 
 def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
 
 class GCNPredicateControl : PredicateControl {
-  Predicate SIAssemblerPredicate = isSICI;
-  Predicate VIAssemblerPredicate = isVI;
+  Predicate SIAssemblerPredicate = isGFX6GFX7;
+  Predicate VIAssemblerPredicate = isGFX8GFX9;
 }
 
 // Execpt for the NONE field, this must be kept in sync with the
@@ -32,6 +28,8 @@ def SIEncodingFamily {
   int SDWA9 = 3;
   int GFX80 = 4;
   int GFX9 = 5;
+  int GFX10 = 6;
+  int SDWA10 = 7;
 }
 
 //===----------------------------------------------------------------------===//
@@ -41,10 +39,16 @@ def SIEncodingFamily {
 def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
 
 def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD",
-  SDTypeProfile<1, 3, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i1>]>,
+  SDTypeProfile<1, 4, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i1>,
+                       SDTCisVT<4, i1>]>,
   [SDNPMayLoad, SDNPMemOperand]
 >;
 
+def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT",
+  SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>,
+  [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue]
+>;
+
 def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2,
   [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
 >;
@@ -57,10 +61,6 @@ def SDTAtomic2_f32 : SDTypeProfile<1, 2, [
   SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1>
 ]>;
 
-def SIatomic_fadd : SDNode<"AMDGPUISD::ATOMIC_LOAD_FADD", SDTAtomic2_f32,
-  [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
->;
-
 def SIatomic_fmin : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMIN", SDTAtomic2_f32,
   [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
 >;
@@ -69,6 +69,13 @@ def SIatomic_fmax : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMAX", SDTAtomic2_f32,
   [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
 >;
 
+// load_d16_{lo|hi} ptr, tied_input
+def SIload_d16 : SDTypeProfile<1, 2, [
+  SDTCisPtrTy<1>,
+  SDTCisSameAs<0, 2>
+]>;
+
+
 def SDTtbuffer_load : SDTypeProfile<1, 8,
   [                     // vdata
    SDTCisVT<1, v4i32>,  // rsrc
@@ -101,9 +108,6 @@ def SDTtbuffer_store : SDTypeProfile<0, 9,
 
 def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store,
                              [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
-def SItbuffer_store_x3 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_X3",
-                                SDTtbuffer_store,
-                                [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
 def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16",
                                 SDTtbuffer_store,
                                 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
@@ -120,6 +124,14 @@ def SDTBufferLoad : SDTypeProfile<1, 7,
 
 def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
                             [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
+def SIbuffer_load_ubyte : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE", SDTBufferLoad,
+                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
+def SIbuffer_load_ushort : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT", SDTBufferLoad,
+                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
+def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad,
+                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
+def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad,
+                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
 def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad,
                             [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
 def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16",
@@ -138,6 +150,12 @@ def SDTBufferStore : SDTypeProfile<0, 8,
 
 def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore,
                              [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
+def SIbuffer_store_byte: SDNode <"AMDGPUISD::BUFFER_STORE_BYTE",
+                         SDTBufferStore,
+                         [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
+def SIbuffer_store_short : SDNode <"AMDGPUISD::BUFFER_STORE_SHORT",
+                           SDTBufferStore,
+                           [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
 def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT",
                             SDTBufferStore,
                             [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
@@ -147,9 +165,7 @@ def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16",
 
 class SDBufferAtomic<string opcode> : SDNode <opcode,
   SDTypeProfile<1, 8,
-      [SDTCisVT<0, i32>,   // dst
-       SDTCisVT<1, i32>,   // vdata
-       SDTCisVT<2, v4i32>, // rsrc
+       [SDTCisVT<2, v4i32>, // rsrc
        SDTCisVT<3, i32>,   // vindex(VGPR)
        SDTCisVT<4, i32>,   // voffset(VGPR)
        SDTCisVT<5, i32>,   // soffset(SGPR)
@@ -159,6 +175,19 @@ class SDBufferAtomic<string opcode> : SDNode <opcode,
   [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
 >;
 
+class SDBufferAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
+  SDTypeProfile<0, 8,
+      [SDTCisVT<0, ty>,    // vdata
+       SDTCisVT<1, v4i32>, // rsrc
+       SDTCisVT<2, i32>,   // vindex(VGPR)
+       SDTCisVT<3, i32>,   // voffset(VGPR)
+       SDTCisVT<4, i32>,   // soffset(SGPR)
+       SDTCisVT<5, i32>,   // offset(imm)
+       SDTCisVT<6, i32>,   // cachepolicy(imm)
+       SDTCisVT<7, i1>]>,  // idxen(imm)
+  [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
+>;
+
 def SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">;
 def SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">;
 def SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">;
@@ -169,6 +198,8 @@ def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">;
 def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">;
 def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
 def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
+def SIbuffer_atomic_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_FADD", f32>;
+def SIbuffer_atomic_pk_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_PK_FADD", v2f16>;
 
 def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
   SDTypeProfile<1, 9,
@@ -185,10 +216,54 @@ def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
   [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
 >;
 
+class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
+  SDTypeProfile<0, 2,
+      [SDTCisPtrTy<0>,     // vaddr
+       SDTCisVT<1, ty>]>,  // vdata
+  [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
+>;
+
+def SIglobal_atomic_fadd    : SDGlobalAtomicNoRtn <"AMDGPUISD::ATOMIC_FADD", f32>;
+def SIglobal_atomic_pk_fadd : SDGlobalAtomicNoRtn <"AMDGPUISD::ATOMIC_PK_FADD", v2f16>;
+
 def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
   SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>
 >;
 
+def SIlds : SDNode<"AMDGPUISD::LDS",
+  SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]>
+>;
+
+def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO",
+  SIload_d16,
+  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
+>;
+
+def SIload_d16_lo_u8 : SDNode<"AMDGPUISD::LOAD_D16_LO_U8",
+  SIload_d16,
+  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
+>;
+
+def SIload_d16_lo_i8 : SDNode<"AMDGPUISD::LOAD_D16_LO_I8",
+  SIload_d16,
+  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
+>;
+
+def SIload_d16_hi : SDNode<"AMDGPUISD::LOAD_D16_HI",
+  SIload_d16,
+  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
+>;
+
+def SIload_d16_hi_u8 : SDNode<"AMDGPUISD::LOAD_D16_HI_U8",
+  SIload_d16,
+  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
+>;
+
+def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8",
+  SIload_d16,
+  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
+>;
+
 //===----------------------------------------------------------------------===//
 // ValueType helpers
 //===----------------------------------------------------------------------===//
@@ -201,7 +276,8 @@ class isFloatType<ValueType SrcVT> {
     !if(!eq(SrcVT.Value, f32.Value), 1,
     !if(!eq(SrcVT.Value, f64.Value), 1,
     !if(!eq(SrcVT.Value, v2f16.Value), 1,
-    0))));
+    !if(!eq(SrcVT.Value, v4f16.Value), 1,
+    0)))));
 }
 
 class isIntType<ValueType SrcVT> {
@@ -215,8 +291,9 @@ class isIntType<ValueType SrcVT> {
 class isPackedType<ValueType SrcVT> {
   bit ret =
     !if(!eq(SrcVT.Value, v2i16.Value), 1,
-      !if(!eq(SrcVT.Value, v2f16.Value), 1, 0)
-    );
+      !if(!eq(SrcVT.Value, v2f16.Value), 1,
+        !if(!eq(SrcVT.Value, v4f16.Value), 1, 0)
+    ));
 }
 
 //===----------------------------------------------------------------------===//
@@ -228,7 +305,7 @@ defm atomic_dec_global : global_binary_atomic_op<SIatomic_dec>;
 
 def atomic_inc_local : local_binary_atomic_op<SIatomic_inc>;
 def atomic_dec_local : local_binary_atomic_op<SIatomic_dec>;
-def atomic_load_fadd_local : local_binary_atomic_op<SIatomic_fadd>;
+def atomic_load_fadd_local : local_binary_atomic_op<atomic_load_fadd>;
 def atomic_load_fmin_local : local_binary_atomic_op<SIatomic_fmin>;
 def atomic_load_fmax_local : local_binary_atomic_op<SIatomic_fmax>;
 
@@ -250,13 +327,13 @@ def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad,
   [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
 >;
 
-def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr), [{
-  return cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
-}]>;
+def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> {
+  let IsUnindexed = 1;
+}
 
-def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr), [{
-  return cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
-}]>;
+def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
+  let IsNonExtLoad = 1;
+}
 
 def atomic_load_32_glue : PatFrag<(ops node:$ptr),
   (AMDGPUatomic_ld_glue node:$ptr)> {
@@ -270,35 +347,49 @@ def atomic_load_64_glue : PatFrag<(ops node:$ptr),
   let MemoryVT = i64;
 }
 
-def extload_glue : PatFrag<(ops node:$ptr), (load_glue node:$ptr), [{
-  return cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
-}]>;
+def extload_glue : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> {
+  let IsLoad = 1;
+  let IsAnyExtLoad = 1;
+}
 
 def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr), [{
   return cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
 }]>;
 
-def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr), [{
-  return cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
-}]>;
+def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
+  let IsLoad = 1;
+  let IsZeroExtLoad = 1;
+}
 
-def az_extload_glue : AZExtLoadBase <unindexedload_glue>;
+def extloadi8_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
+  let IsLoad = 1;
+  let MemoryVT = i8;
+}
 
-def az_extloadi8_glue : PatFrag<(ops node:$ptr), (az_extload_glue node:$ptr), [{
-  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
+def zextloadi8_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
+  let IsLoad = 1;
+  let MemoryVT = i8;
+}
 
-def az_extloadi16_glue : PatFrag<(ops node:$ptr), (az_extload_glue node:$ptr), [{
-  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+def extloadi16_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
+  let IsLoad = 1;
+  let MemoryVT = i16;
+}
 
-def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr), [{
-  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
+def zextloadi16_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
+  let IsLoad = 1;
+  let MemoryVT = i16;
+}
 
-def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr), [{
-  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
+  let IsLoad = 1;
+  let MemoryVT = i8;
+}
+
+def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
+  let IsLoad = 1;
+  let MemoryVT = i16;
+}
 
 def load_glue_align8 : Aligned8Bytes <
   (ops node:$ptr), (load_glue node:$ptr)
@@ -311,8 +402,10 @@ def load_glue_align16 : Aligned16Bytes <
 def load_local_m0 : LoadFrag<load_glue>, LocalAddress;
 def sextloadi8_local_m0 : LoadFrag<sextloadi8_glue>, LocalAddress;
 def sextloadi16_local_m0 : LoadFrag<sextloadi16_glue>, LocalAddress;
-def az_extloadi8_local_m0 : LoadFrag<az_extloadi8_glue>, LocalAddress;
-def az_extloadi16_local_m0 : LoadFrag<az_extloadi16_glue>, LocalAddress;
+def extloadi8_local_m0 : LoadFrag<extloadi8_glue>, LocalAddress;
+def zextloadi8_local_m0 : LoadFrag<zextloadi8_glue>, LocalAddress;
+def extloadi16_local_m0 : LoadFrag<extloadi16_glue>, LocalAddress;
+def zextloadi16_local_m0 : LoadFrag<zextloadi16_glue>, LocalAddress;
 def load_align8_local_m0 : LoadFrag <load_glue_align8>, LocalAddress;
 def load_align16_local_m0 : LoadFrag <load_glue_align16>, LocalAddress;
 def atomic_load_32_local_m0 : LoadFrag<atomic_load_32_glue>, LocalAddress;
@@ -386,6 +479,51 @@ def si_setcc_uniform : PatFrag <
   return true;
 }]>;
 
+//===----------------------------------------------------------------------===//
+// SDNodes PatFrags for d16 loads
+//===----------------------------------------------------------------------===//
+
+class LoadD16Frag <SDPatternOperator op> : PatFrag<(ops node:$ptr, node:$tied_in), (op node:$ptr, node:$tied_in)>;
+class LocalLoadD16 <SDPatternOperator op> : LoadD16Frag <op>, LocalAddress;
+class GlobalLoadD16 <SDPatternOperator op> : LoadD16Frag <op>, GlobalLoadAddress;
+class PrivateLoadD16 <SDPatternOperator op> : LoadD16Frag <op>, PrivateAddress;
+class FlatLoadD16 <SDPatternOperator op> : LoadD16Frag <op>, FlatLoadAddress;
+
+def load_d16_hi_local : LocalLoadD16 <SIload_d16_hi>;
+def az_extloadi8_d16_hi_local : LocalLoadD16 <SIload_d16_hi_u8>;
+def sextloadi8_d16_hi_local : LocalLoadD16 <SIload_d16_hi_i8>;
+
+def load_d16_hi_global : GlobalLoadD16 <SIload_d16_hi>;
+def az_extloadi8_d16_hi_global : GlobalLoadD16 <SIload_d16_hi_u8>;
+def sextloadi8_d16_hi_global : GlobalLoadD16 <SIload_d16_hi_i8>;
+
+def load_d16_hi_private : PrivateLoadD16 <SIload_d16_hi>;
+def az_extloadi8_d16_hi_private : PrivateLoadD16 <SIload_d16_hi_u8>;
+def sextloadi8_d16_hi_private : PrivateLoadD16 <SIload_d16_hi_i8>;
+
+def load_d16_hi_flat : FlatLoadD16 <SIload_d16_hi>;
+def az_extloadi8_d16_hi_flat : FlatLoadD16 <SIload_d16_hi_u8>;
+def sextloadi8_d16_hi_flat : FlatLoadD16 <SIload_d16_hi_i8>;
+
+
+def load_d16_lo_local : LocalLoadD16 <SIload_d16_lo>;
+def az_extloadi8_d16_lo_local : LocalLoadD16 <SIload_d16_lo_u8>;
+def sextloadi8_d16_lo_local : LocalLoadD16 <SIload_d16_lo_i8>;
+
+def load_d16_lo_global : GlobalLoadD16 <SIload_d16_lo>;
+def az_extloadi8_d16_lo_global : GlobalLoadD16 <SIload_d16_lo_u8>;
+def sextloadi8_d16_lo_global : GlobalLoadD16 <SIload_d16_lo_i8>;
+
+def load_d16_lo_private : PrivateLoadD16 <SIload_d16_lo>;
+def az_extloadi8_d16_lo_private : PrivateLoadD16 <SIload_d16_lo_u8>;
+def sextloadi8_d16_lo_private : PrivateLoadD16 <SIload_d16_lo_i8>;
+
+def load_d16_lo_flat : FlatLoadD16 <SIload_d16_lo>;
+def az_extloadi8_d16_lo_flat : FlatLoadD16 <SIload_d16_lo_u8>;
+def sextloadi8_d16_lo_flat : FlatLoadD16 <SIload_d16_lo_i8>;
+
+
+
 def lshr_rev : PatFrag <
   (ops node:$src1, node:$src0),
   (srl $src0, $src1)
@@ -410,6 +548,7 @@ multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
   >;
 
   def _local_m0 : local_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
+  def _region_m0 : region_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
 }
 
 defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
@@ -424,7 +563,7 @@ defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">;
 defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">;
 defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">;
 defm atomic_swap : SIAtomicM0Glue2 <"SWAP">;
-defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 1, SDTAtomic2_f32>;
+defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32>;
 defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32>;
 defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32>;
 
@@ -433,6 +572,7 @@ def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3,
 >;
 
 def atomic_cmp_swap_local_m0 : AtomicCmpSwapLocal<atomic_cmp_swap_glue>;
+def atomic_cmp_swap_region_m0 : AtomicCmpSwapRegion<atomic_cmp_swap_glue>;
 
 
 def as_i1imm : SDNodeXForm<imm, [{
@@ -482,8 +622,12 @@ class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1);
 }]>;
 
-def SIMM16bit : PatLeaf <(imm),
-  [{return isInt<16>(N->getSExtValue());}]
+def SIMM16bit : ImmLeaf <i32,
+  [{return isInt<16>(Imm);}]
+>;
+
+def UIMM16bit : ImmLeaf <i32,
+  [{return isUInt<16>(Imm); }]
 >;
 
 class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
@@ -515,6 +659,22 @@ def ShiftAmt32Imm : PatLeaf <(imm), [{
   return N->getZExtValue() < 32;
 }]>;
 
+def getNegV2I16Imm : SDNodeXForm<build_vector, [{
+  return SDValue(packNegConstantV2I16(N, *CurDAG), 0);
+}]>;
+
+def NegSubInlineConstV216 : PatLeaf<(build_vector), [{
+  assert(N->getNumOperands() == 2);
+  assert(N->getOperand(0).getValueType().getSizeInBits() == 16);
+  SDValue Src0 = N->getOperand(0);
+  SDValue Src1 = N->getOperand(1);
+  if (Src0 == Src1)
+    return isNegInlineImmediate(Src0.getNode());
+
+  return (isNullConstantOrUndef(Src0) && isNegInlineImmediate(Src1.getNode())) ||
+         (isNullConstantOrUndef(Src1) && isNegInlineImmediate(Src0.getNode()));
+}], getNegV2I16Imm>;
+
 //===----------------------------------------------------------------------===//
 // Custom Operands
 //===----------------------------------------------------------------------===//
@@ -588,6 +748,14 @@ def SwizzleMatchClass : AsmOperandClass {
   let IsOptional = 1;
 }
 
+def EndpgmMatchClass : AsmOperandClass {
+  let Name = "EndpgmImm";
+  let PredicateMethod = "isEndpgm";
+  let ParserMethod = "parseEndpgmOp";
+  let RenderMethod = "addImmOperands";
+  let IsOptional = 1;
+}
+
 def ExpTgtMatchClass : AsmOperandClass {
   let Name = "ExpTgt";
   let PredicateMethod = "isExpTgt";
@@ -605,6 +773,11 @@ def SwizzleImm : Operand<i16> {
   let ParserMatchClass = SwizzleMatchClass;
 }
 
+def EndpgmImm : Operand<i16> {
+  let PrintMethod = "printEndpgm";
+  let ParserMatchClass = EndpgmMatchClass;
+}
+
 def SWaitMatchClass : AsmOperandClass {
   let Name = "SWaitCnt";
   let RenderMethod = "addImmOperands";
@@ -619,11 +792,41 @@ def VReg32OrOffClass : AsmOperandClass {
 def WAIT_FLAG : Operand <i32> {
   let ParserMatchClass = SWaitMatchClass;
   let PrintMethod = "printWaitFlag";
+  let OperandType = "OPERAND_IMMEDIATE";
 }
 
 include "SIInstrFormats.td"
 include "VIInstrFormats.td"
 
+def BoolReg : AsmOperandClass {
+  let Name = "BoolReg";
+  let ParserMethod = "parseBoolReg";
+  let RenderMethod = "addRegOperands";
+}
+
+class BoolRC : RegisterOperand<SReg_1> {
+  let ParserMatchClass = BoolReg;
+  let DecoderMethod = "decodeBoolReg";
+}
+
+def SSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
+  let ParserMatchClass = BoolReg;
+  let DecoderMethod = "decodeBoolReg";
+}
+
+def VOPDstS64orS32 : BoolRC {
+  let PrintMethod = "printVOPDst";
+}
+
+// SCSrc_i1 is the operand for pseudo instructions only.
+// Boolean immeadiates shall not be exposed to codegen instructions.
+def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_IMM_INT32";
+  let ParserMatchClass = BoolReg;
+  let DecoderMethod = "decodeBoolReg";
+}
+
 // ===----------------------------------------------------------------------===//
 // ExpSrc* Special cases for exp src operands which are printed as
 // "off" depending on en operand.
@@ -662,11 +865,12 @@ def SDWASrc_i16 : SDWASrc<i16>;
 def SDWASrc_f32 : SDWASrc<f32>;
 def SDWASrc_f16 : SDWASrc<f16>;
 
-def SDWAVopcDst : VOPDstOperand<SReg_64> {
+def SDWAVopcDst : BoolRC {
   let OperandNamespace = "AMDGPU";
   let OperandType = "OPERAND_SDWA_VOPC_DST";
   let EncoderMethod = "getSDWAVopcDstEncoding";
   let DecoderMethod = "decodeSDWAVopcDst";
+  let PrintMethod = "printVOPDst";
 }
 
 class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass {
@@ -688,21 +892,11 @@ class NamedOperandU8<string Name, AsmOperandClass MatchClass> : Operand<i8> {
   let ParserMatchClass = MatchClass;
 }
 
-class NamedOperandU12<string Name, AsmOperandClass MatchClass> : Operand<i16> {
-  let PrintMethod = "print"#Name;
-  let ParserMatchClass = MatchClass;
-}
-
 class NamedOperandU16<string Name, AsmOperandClass MatchClass> : Operand<i16> {
   let PrintMethod = "print"#Name;
   let ParserMatchClass = MatchClass;
 }
 
-class NamedOperandS13<string Name, AsmOperandClass MatchClass> : Operand<i16> {
-  let PrintMethod = "print"#Name;
-  let ParserMatchClass = MatchClass;
-}
-
 class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> {
   let PrintMethod = "print"#Name;
   let ParserMatchClass = MatchClass;
@@ -720,8 +914,7 @@ def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>;
 def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>;
 def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>;
 
-def offset_u12 : NamedOperandU12<"Offset", NamedMatchClass<"OffsetU12">>;
-def offset_s13 : NamedOperandS13<"OffsetS13", NamedMatchClass<"OffsetS13">>;
+def flat_offset : NamedOperandU16<"FlatOffset", NamedMatchClass<"FlatOffset">>;
 def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>;
 def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>;
 def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>;
@@ -732,6 +925,7 @@ def omod : NamedOperandU32<"OModSI", NamedMatchClass<"OModSI">>;
 def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>;
 def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>;
 
+def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>;
 def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
 def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
 def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
@@ -746,11 +940,15 @@ def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>;
 def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT">>;
 
 def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
+def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>;
+
+def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>;
 
 def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>;
 def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>;
 def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>;
 def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>;
+def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>;
 
 def dst_sel : NamedOperandU32<"SDWADstSel", NamedMatchClass<"SDWADstSel">>;
 def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>;
@@ -762,6 +960,10 @@ def op_sel_hi : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>;
 def neg_lo : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>;
 def neg_hi : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>;
 
+def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>;
+def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>;
+def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>;
+
 def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
 
 def exp_tgt : NamedOperandU8<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
@@ -793,9 +995,6 @@ def f32kimm : kimmOperand<i32>;
 def KImmFP16MatchClass : KImmMatchClass<16>;
 def f16kimm : kimmOperand<i16>;
 
-
-def VOPDstS64 : VOPDstOperand <SReg_64>;
-
 class FPInputModsMatchClass <int opSize> : AsmOperandClass {
   let Name = "RegOrImmWithFP"#opSize#"InputMods";
   let ParserMethod = "parseRegOrImmWithFPInputMods";
@@ -863,7 +1062,7 @@ def FP32SDWAInputMods : FPSDWAInputMods<FP32SDWAInputModsMatchClass>;
 def FPVRegInputModsMatchClass : AsmOperandClass {
   let Name = "VRegWithFPInputMods";
   let ParserMethod = "parseRegWithFPInputMods";
-  let PredicateMethod = "isVReg";
+  let PredicateMethod = "isVReg32";
 }
 
 def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
@@ -890,7 +1089,7 @@ def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>;
 def IntVRegInputModsMatchClass : AsmOperandClass {
   let Name = "VRegWithIntInputMods";
   let ParserMethod = "parseRegWithIntInputMods";
-  let PredicateMethod = "isVReg";
+  let PredicateMethod = "isVReg32";
 }
 
 def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
@@ -941,6 +1140,8 @@ def VOP3Mods  : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
 def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
 // VOP3Mods, but the input source is known to never be NaN.
 def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
+// VOP3Mods, but only allowed for f32 operands.
+def VOP3Mods_f32 : ComplexPattern<fAny, 2, "SelectVOP3Mods_f32">;
 
 def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
 
@@ -995,6 +1196,31 @@ def TRAPID{
   int LLVM_DEBUG_TRAP = 3;
 }
 
+def HWREG {
+  int MODE = 1;
+  int STATUS = 2;
+  int TRAPSTS = 3;
+  int HW_ID = 4;
+  int GPR_ALLOC = 5;
+  int LDS_ALLOC = 6;
+  int IB_STS = 7;
+  int MEM_BASES = 15;
+  int TBA_LO = 16;
+  int TBA_HI = 17;
+  int TMA_LO = 18;
+  int TMA_HI = 19;
+  int FLAT_SCR_LO = 20;
+  int FLAT_SCR_HI = 21;
+  int XNACK_MASK = 22;
+  int POPS_PACKER = 25;
+}
+
+class getHwRegImm<int Reg, int Offset = 0, int Size = 32> {
+  int ret = !or(Reg,
+                !or(!shl(Offset, 6),
+                    !shl(!add(Size, -1), 11)));
+}
+
 //===----------------------------------------------------------------------===//
 //
 // SI Instruction multiclass helpers.
@@ -1045,18 +1271,26 @@ multiclass EXP_m<bit done, SDPatternOperator node> {
       def _si : EXP_Helper<done>,
                 SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.SI>,
                 EXPe {
-        let AssemblerPredicates = [isSICI];
-        let DecoderNamespace = "SICI";
+        let AssemblerPredicates = [isGFX6GFX7];
+        let DecoderNamespace = "GFX6GFX7";
         let DisableDecoder = DisableSIDecoder;
       }
 
       def _vi : EXP_Helper<done>,
                 SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.VI>,
                 EXPe_vi {
-        let AssemblerPredicates = [isVI];
-        let DecoderNamespace = "VI";
+        let AssemblerPredicates = [isGFX8GFX9];
+        let DecoderNamespace = "GFX8";
         let DisableDecoder = DisableVIDecoder;
       }
+
+      def _gfx10 : EXP_Helper<done>,
+                SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.GFX10>,
+                EXPe {
+        let AssemblerPredicates = [isGFX10Plus];
+        let DecoderNamespace = "GFX10";
+        let DisableDecoder = DisableSIDecoder;
+      }
     }
   }
 }
@@ -1080,7 +1314,19 @@ class getVALUDstForVT<ValueType VT> {
                           !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
                             !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
                               !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>,
-                              VOPDstOperand<SReg_64>)))); // else VT == i1
+                              VOPDstS64orS32)))); // else VT == i1
+}
+
+// Returns true if VT is floating point.
+class getIsFP<ValueType VT> {
+  bit ret = !if(!eq(VT.Value, f16.Value), 1,
+            !if(!eq(VT.Value, v2f16.Value), 1,
+            !if(!eq(VT.Value, v4f16.Value), 1,
+            !if(!eq(VT.Value, f32.Value), 1,
+            !if(!eq(VT.Value, v2f32.Value), 1,
+            !if(!eq(VT.Value, f64.Value), 1,
+            !if(!eq(VT.Value, v2f64.Value), 1,
+            0)))))));
 }
 
 // Returns the register class to use for the destination of VOP[12C]
@@ -1094,11 +1340,7 @@ class getSDWADstForVT<ValueType VT> {
 // Returns the register class to use for source 0 of VOP[12C]
 // instructions for the given VT.
 class getVOPSrc0ForVT<ValueType VT> {
-  bit isFP = !if(!eq(VT.Value, f16.Value), 1,
-             !if(!eq(VT.Value, v2f16.Value), 1,
-             !if(!eq(VT.Value, f32.Value), 1,
-             !if(!eq(VT.Value, f64.Value), 1,
-             0))));
+  bit isFP = getIsFP<VT>.ret;
 
   RegisterOperand ret =
     !if(isFP,
@@ -1107,8 +1349,11 @@ class getVOPSrc0ForVT<ValueType VT> {
          !if(!eq(VT.Value, f16.Value),
             VSrc_f16,
             !if(!eq(VT.Value, v2f16.Value),
-               VCSrc_v2f16,
-               VSrc_f32
+               VSrc_v2f16,
+               !if(!eq(VT.Value, v4f16.Value),
+                 AVSrc_64,
+                 VSrc_f32
+               )
             )
          )
        ),
@@ -1117,7 +1362,7 @@ class getVOPSrc0ForVT<ValueType VT> {
           !if(!eq(VT.Value, i16.Value),
              VSrc_b16,
              !if(!eq(VT.Value, v2i16.Value),
-                VCSrc_v2b16,
+                VSrc_v2b16,
                 VSrc_b32
              )
           )
@@ -1132,9 +1377,7 @@ class getVregSrcForVT<ValueType VT> {
 }
 
 class getSDWASrcForVT <ValueType VT> {
-  bit isFP = !if(!eq(VT.Value, f16.Value), 1,
-             !if(!eq(VT.Value, f32.Value), 1,
-             0));
+  bit isFP = getIsFP<VT>.ret;
   RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32);
   RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32);
   RegisterOperand ret = !if(isFP, retFlt, retInt);
@@ -1143,33 +1386,32 @@ class getSDWASrcForVT <ValueType VT> {
 // Returns the register class to use for sources of VOP3 instructions for the
 // given VT.
 class getVOP3SrcForVT<ValueType VT> {
-  bit isFP = !if(!eq(VT.Value, f16.Value), 1,
-             !if(!eq(VT.Value, v2f16.Value), 1,
-             !if(!eq(VT.Value, f32.Value), 1,
-             !if(!eq(VT.Value, f64.Value), 1,
-             0))));
+  bit isFP = getIsFP<VT>.ret;
   RegisterOperand ret =
   !if(!eq(VT.Size, 128),
      VSrc_128,
      !if(!eq(VT.Size, 64),
         !if(isFP,
-           VCSrc_f64,
-           VCSrc_b64),
+           VSrc_f64,
+           VSrc_b64),
         !if(!eq(VT.Value, i1.Value),
-           SCSrc_i1,
+           SSrc_i1,
            !if(isFP,
               !if(!eq(VT.Value, f16.Value),
-                 VCSrc_f16,
+                 VSrc_f16,
                  !if(!eq(VT.Value, v2f16.Value),
-                    VCSrc_v2f16,
-                    VCSrc_f32
+                    VSrc_v2f16,
+                    !if(!eq(VT.Value, v4f16.Value),
+                      AVSrc_64,
+                      VSrc_f32
+                    )
                  )
               ),
               !if(!eq(VT.Value, i16.Value),
-                 VCSrc_b16,
+                 VSrc_b16,
                  !if(!eq(VT.Value, v2i16.Value),
-                    VCSrc_v2b16,
-                    VCSrc_b32
+                    VSrc_v2b16,
+                    VSrc_b32
                  )
               )
            )
@@ -1190,11 +1432,8 @@ class isModifierType<ValueType SrcVT> {
 }
 
 // Return type of input modifiers operand for specified input operand
-class getSrcMod <ValueType VT> {
-  bit isFP = !if(!eq(VT.Value, f16.Value), 1,
-               !if(!eq(VT.Value, f32.Value), 1,
-               !if(!eq(VT.Value, f64.Value), 1,
-               0)));
+class getSrcMod <ValueType VT, bit EnableF32SrcMods> {
+  bit isFP = getIsFP<VT>.ret;
   bit isPacked = isPackedType<VT>.ret;
   Operand ret =  !if(!eq(VT.Size, 64),
                      !if(isFP, FP64InputMods, Int64InputMods),
@@ -1203,7 +1442,7 @@ class getSrcMod <ValueType VT> {
                             FP16InputMods,
                             FP32InputMods
                           ),
-                         Int32InputMods)
+                         !if(EnableF32SrcMods, FP32InputMods, Int32InputMods))
                      );
 }
 
@@ -1213,10 +1452,7 @@ class getOpSelMod <ValueType VT> {
 
 // Return type of input modifiers operand specified input operand for DPP
 class getSrcModExt <ValueType VT> {
-    bit isFP = !if(!eq(VT.Value, f16.Value), 1,
-               !if(!eq(VT.Value, f32.Value), 1,
-               !if(!eq(VT.Value, f64.Value), 1,
-               0)));
+  bit isFP = getIsFP<VT>.ret;
   Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods);
 }
 
@@ -1238,7 +1474,7 @@ class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
 // Returns the input arguments for VOP3 instructions for the given SrcVT.
 class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
                 RegisterOperand Src2RC, int NumSrcArgs,
-                bit HasIntClamp, bit HasModifiers, bit HasOMod,
+                bit HasIntClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
                 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
 
   dag ret =
@@ -1276,16 +1512,33 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
       /* endif */ )
     /* NumSrcArgs == 3 */,
       !if (!eq(HasModifiers, 1),
-        // VOP3 with modifiers
-        !if (!eq(HasOMod, 1),
-          (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
-               Src1Mod:$src1_modifiers, Src1RC:$src1,
-               Src2Mod:$src2_modifiers, Src2RC:$src2,
-               clampmod:$clamp, omod:$omod),
-          (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
-               Src1Mod:$src1_modifiers, Src1RC:$src1,
-               Src2Mod:$src2_modifiers, Src2RC:$src2,
-               clampmod:$clamp))
+        !if (!eq(HasSrc2Mods, 1),
+          // VOP3 with modifiers
+          !if (!eq(HasOMod, 1),
+            (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+                 Src1Mod:$src1_modifiers, Src1RC:$src1,
+                 Src2Mod:$src2_modifiers, Src2RC:$src2,
+                 clampmod:$clamp, omod:$omod),
+            !if (!eq(HasIntClamp, 1),
+              (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+                   Src1Mod:$src1_modifiers, Src1RC:$src1,
+                   Src2Mod:$src2_modifiers, Src2RC:$src2,
+                   clampmod:$clamp),
+              (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+                   Src1Mod:$src1_modifiers, Src1RC:$src1,
+                   Src2Mod:$src2_modifiers, Src2RC:$src2))),
+          // VOP3 with modifiers except src2
+          !if (!eq(HasOMod, 1),
+            (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+                 Src1Mod:$src1_modifiers, Src1RC:$src1,
+                 Src2RC:$src2, clampmod:$clamp, omod:$omod),
+            !if (!eq(HasIntClamp, 1),
+              (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+                   Src1Mod:$src1_modifiers, Src1RC:$src1,
+                   Src2RC:$src2, clampmod:$clamp),
+              (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+                   Src1Mod:$src1_modifiers, Src1RC:$src1,
+                   Src2RC:$src2))))
       /* else */,
         // VOP3 without modifiers
         !if (!eq(HasIntClamp, 1),
@@ -1398,6 +1651,42 @@ class getInsDPP <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1
              /* endif */)));
 }
 
+class getInsDPP16 <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
+                   int NumSrcArgs, bit HasModifiers,
+                   Operand Src0Mod, Operand Src1Mod> {
+  dag ret = !con(getInsDPP<DstRC, Src0RC, Src1RC, NumSrcArgs,
+                           HasModifiers, Src0Mod, Src1Mod>.ret,
+                 (ins FI:$fi));
+}
+
+class getInsDPP8 <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
+                 int NumSrcArgs, bit HasModifiers,
+                 Operand Src0Mod, Operand Src1Mod> {
+  dag ret = !if (!eq(NumSrcArgs, 0),
+                // VOP1 without input operands (V_NOP)
+                (ins dpp8:$dpp8, FI:$fi),
+            !if (!eq(NumSrcArgs, 1),
+              !if (!eq(HasModifiers, 1),
+                // VOP1_DPP with modifiers
+                (ins DstRC:$old, Src0Mod:$src0_modifiers,
+                     Src0RC:$src0, dpp8:$dpp8, FI:$fi)
+              /* else */,
+                // VOP1_DPP without modifiers
+                (ins DstRC:$old, Src0RC:$src0, dpp8:$dpp8, FI:$fi)
+              /* endif */)
+              /* NumSrcArgs == 2 */,
+              !if (!eq(HasModifiers, 1),
+                // VOP2_DPP with modifiers
+                (ins DstRC:$old,
+                     Src0Mod:$src0_modifiers, Src0RC:$src0,
+                     Src1Mod:$src1_modifiers, Src1RC:$src1,
+                     dpp8:$dpp8, FI:$fi)
+              /* else */,
+                // VOP2_DPP without modifiers
+                (ins DstRC:$old,
+                     Src0RC:$src0, Src1RC:$src1, dpp8:$dpp8, FI:$fi)
+             /* endif */)));
+}
 
 
 // Ins for SDWA
@@ -1556,6 +1845,26 @@ class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT =
   string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
 }
 
+class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
+  string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi";
+}
+
+class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
+  string dst = !if(HasDst,
+                   !if(!eq(DstVT.Size, 1),
+                       "$sdst",
+                       "$vdst"),
+                    ""); // use $sdst for VOPC
+  string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
+  string src1 = !if(!eq(NumSrcArgs, 1), "",
+                   !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
+                                           " $src1_modifiers,"));
+  string args = !if(!eq(HasModifiers, 0),
+                     getAsm32<0, NumSrcArgs, DstVT>.ret,
+                     ", "#src0#src1);
+  string ret = dst#args#"$dpp8$fi";
+}
+
 class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
   string dst = !if(HasDst,
                    !if(!eq(DstVT.Size, 1),
@@ -1650,9 +1959,12 @@ def PatGenMode {
   int Pattern   = 1;
 }
 
-class VOPProfile <list<ValueType> _ArgVT> {
+class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
+                  bit _EnableClamp = 0> {
 
   field list<ValueType> ArgVT = _ArgVT;
+  field bit EnableF32SrcMods = _EnableF32SrcMods;
+  field bit EnableClamp = _EnableClamp;
 
   field ValueType DstVT = ArgVT[0];
   field ValueType Src0VT = ArgVT[1];
@@ -1670,9 +1982,9 @@ class VOPProfile <list<ValueType> _ArgVT> {
   field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
   field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
   field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
-  field Operand Src0Mod = getSrcMod<Src0VT>.ret;
-  field Operand Src1Mod = getSrcMod<Src1VT>.ret;
-  field Operand Src2Mod = getSrcMod<Src2VT>.ret;
+  field Operand Src0Mod = getSrcMod<Src0VT, EnableF32SrcMods>.ret;
+  field Operand Src1Mod = getSrcMod<Src1VT, EnableF32SrcMods>.ret;
+  field Operand Src2Mod = getSrcMod<Src2VT, EnableF32SrcMods>.ret;
   field Operand Src0ModDPP = getSrcModExt<Src0VT>.ret;
   field Operand Src1ModDPP = getSrcModExt<Src1VT>.ret;
   field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
@@ -1688,12 +2000,16 @@ class VOPProfile <list<ValueType> _ArgVT> {
   field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1);
 
   // TODO: Modifiers logic is somewhat adhoc here, to be refined later
-  field bit HasModifiers = isModifierType<Src0VT>.ret;
+  // HasModifiers affects the normal and DPP encodings. We take note of EnableF32SrcMods, which
+  // enables modifiers for i32 type.
+  field bit HasModifiers = BitOr<isModifierType<Src0VT>.ret, EnableF32SrcMods>.ret;
 
+  // HasSrc*FloatMods affects the SDWA encoding. We ignore EnableF32SrcMods.
   field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret;
   field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret;
   field bit HasSrc2FloatMods = isFloatType<Src2VT>.ret;
 
+  // HasSrc*IntMods affects the SDWA encoding. We ignore EnableF32SrcMods.
   field bit HasSrc0IntMods = isIntType<Src0VT>.ret;
   field bit HasSrc1IntMods = isIntType<Src1VT>.ret;
   field bit HasSrc2IntMods = isIntType<Src2VT>.ret;
@@ -1702,7 +2018,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
   field bit HasSrc1Mods = !if(HasModifiers, BitOr<HasSrc1FloatMods, HasSrc1IntMods>.ret, 0);
   field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0);
 
-  field bit HasClamp = HasModifiers;
+  field bit HasClamp = BitOr<isModifierType<Src0VT>.ret, EnableClamp>.ret;
   field bit HasSDWAClamp = EmitDst;
   field bit HasFPClamp = BitAnd<isFloatType<DstVT>.ret, HasClamp>.ret;
   field bit HasIntClamp = !if(isFloatType<DstVT>.ret, 0, HasClamp);
@@ -1721,6 +2037,8 @@ class VOPProfile <list<ValueType> _ArgVT> {
   field bit HasExtSDWA9 = HasExt;
   field int NeedPatGen = PatGenMode.NoPattern;
 
+  field bit IsMAI = 0;
+
   field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
   field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
   field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods);
@@ -1732,12 +2050,13 @@ class VOPProfile <list<ValueType> _ArgVT> {
   field dag Outs32 = Outs;
   field dag Outs64 = Outs;
   field dag OutsDPP = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
+  field dag OutsDPP8 = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
   field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret;
 
   field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
   field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
-                             HasIntClamp, HasModifiers, HasOMod, Src0Mod, Src1Mod,
-                             Src2Mod>.ret;
+                             HasIntClamp, HasModifiers, HasSrc2Mods,
+                             HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
   field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
                                    NumSrcArgs, HasClamp,
                                    Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
@@ -1751,6 +2070,10 @@ class VOPProfile <list<ValueType> _ArgVT> {
                          getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
                                    HasModifiers, Src0ModDPP, Src1ModDPP>.ret,
                          (ins));
+  field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
+                                   HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
+  field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs, 0,
+                                 Src0ModDPP, Src1ModDPP>.ret;
   field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
                                  HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
                                  DstVT>.ret;
@@ -1766,8 +2089,12 @@ class VOPProfile <list<ValueType> _ArgVT> {
                                               HasSrc2FloatMods>.ret;
   field string AsmDPP = !if(HasExtDPP,
                             getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, "");
+  field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
+  field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0, DstVT>.ret;
   field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
   field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
+
+  field string TieRegDPP = "$old";
 }
 
 class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
@@ -1828,6 +2155,7 @@ def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
 def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
 def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
 def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
+def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], 0, /*EnableClamp=*/1>;
 def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
 def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>;
 
@@ -1848,6 +2176,19 @@ def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>;
 def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>;
 def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>;
 
+def VOP_V4F32_F32_F32_V4F32       : VOPProfile <[v4f32,  f32,   f32,   v4f32]>;
+def VOP_V16F32_F32_F32_V16F32     : VOPProfile <[v16f32, f32,   f32,   v16f32]>;
+def VOP_V32F32_F32_F32_V32F32     : VOPProfile <[v32f32, f32,   f32,   v32f32]>;
+def VOP_V4F32_V4F16_V4F16_V4F32   : VOPProfile <[v4f32,  v4f16, v4f16, v4f32]>;
+def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>;
+def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>;
+def VOP_V4F32_V2I16_V2I16_V4F32   : VOPProfile <[v4f32,  v2i16, v2i16, v4f32]>;
+def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>;
+def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>;
+def VOP_V4I32_I32_I32_V4I32       : VOPProfile <[v4i32,  i32,   i32,   v4i32]>;
+def VOP_V16I32_I32_I32_V16I32     : VOPProfile <[v16i32, i32,   i32,   v16i32]>;
+def VOP_V32I32_I32_I32_V32I32     : VOPProfile <[v32i32, i32,   i32,   v32i32]>;
+
 class Commutable_REV <string revOp, bit isOrig> {
   string RevOp = revOp;
   bit IsOrig = isOrig;
@@ -1871,13 +2212,12 @@ class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
   let isCodeGenOnly = 1;
 }
 
+// FIXME-GFX10: WIP.
 class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins,
-                      string asm> :
+                      string asm, int encodingFamily> :
   VINTRPCommon <outs, ins, asm, []>,
   VINTRPe <op>,
-  SIMCInstr<opName, SIEncodingFamily.SI> {
-  let AssemblerPredicate = SIAssemblerPredicate;
-  let DecoderNamespace = "SICI";
+  SIMCInstr<opName, encodingFamily> {
   let DisableDecoder = DisableSIDecoder;
 }
 
@@ -1887,19 +2227,25 @@ class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
   VINTRPe_vi <op>,
   SIMCInstr<opName, SIEncodingFamily.VI> {
   let AssemblerPredicate = VIAssemblerPredicate;
-  let DecoderNamespace = "VI";
+  let DecoderNamespace = "GFX8";
   let DisableDecoder = DisableVIDecoder;
 }
 
+// FIXME-GFX10: WIP.
 multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm,
                      list<dag> pattern = []> {
   def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>;
 
-  def _si : VINTRP_Real_si <op, NAME, outs, ins, asm>;
+  let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
+    def _si : VINTRP_Real_si <op, NAME, outs, ins, asm, SIEncodingFamily.SI>;
+  } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
 
   def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>;
-}
 
+  let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+    def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>;
+  } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+}
 //===----------------------------------------------------------------------===//
 // Vector instruction mappings
 //===----------------------------------------------------------------------===//
@@ -1981,7 +2327,9 @@ def getMCOpcodeGen : InstrMapping {
                    // does not actually change the encoding, and thus may be
                    // removed later.
                    [!cast<string>(SIEncodingFamily.GFX80)],
-                   [!cast<string>(SIEncodingFamily.GFX9)]];
+                   [!cast<string>(SIEncodingFamily.GFX9)],
+                   [!cast<string>(SIEncodingFamily.GFX10)],
+                   [!cast<string>(SIEncodingFamily.SDWA10)]];
 }
 
 // Get equivalent SOPK instruction.
@@ -2044,6 +2392,24 @@ def getGlobalSaddrOp : InstrMapping {
   let ValueCols = [["1"]];
 }
 
+// Maps a v_cmpx opcode with sdst to opcode without sdst.
+def getVCMPXNoSDstOp : InstrMapping {
+  let FilterClass = "VCMPXNoSDstTable";
+  let RowFields = ["NoSDstOp"];
+  let ColFields = ["HasSDst"];
+  let KeyCol = ["1"];
+  let ValueCols = [["0"]];
+}
+
+// Maps a SOPP to a SOPP with S_NOP
+def getSOPPWithRelaxation : InstrMapping {
+  let FilterClass = "Base_SOPP";
+  let RowFields = ["AsmString"];
+  let ColFields = ["Size"];
+  let KeyCol = ["4"];
+  let ValueCols = [["8"]];
+}
+
 include "SIInstructions.td"
 
 include "DSInstructions.td"
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index b6b00c2e4257..70f20bb69370 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -1,9 +1,8 @@
 //===-- SIInstructions.td - SI Instruction Defintions ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This file was originally auto-generated from a GPU register header file and
@@ -12,7 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 class GCNPat<dag pattern, dag result> : Pat<pattern, result>, GCNPredicateControl {
-  let SubtargetPredicate = isGCN;
+
 }
 
 include "SOPInstructions.td"
@@ -122,7 +121,14 @@ def WWM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>;
 
 } // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
 
-def EXIT_WWM : SPseudoInstSI <(outs SReg_64:$sdst), (ins SReg_64:$src0)> {
+def ENTER_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins i64imm:$src0)> {
+  let Defs = [EXEC];
+  let hasSideEffects = 0;
+  let mayLoad = 0;
+  let mayStore = 0;
+}
+
+def EXIT_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> {
   let hasSideEffects = 0;
   let mayLoad = 0;
   let mayStore = 0;
@@ -155,13 +161,12 @@ def S_SUB_U64_PSEUDO : SPseudoInstSI <
 >;
 
 def S_ADD_U64_CO_PSEUDO : SPseudoInstSI <
-  (outs SReg_64:$vdst, VOPDstS64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
+  (outs SReg_64:$vdst, VOPDstS64orS32:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
 >;
 
 def S_SUB_U64_CO_PSEUDO : SPseudoInstSI <
-  (outs SReg_64:$vdst, VOPDstS64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
+  (outs SReg_64:$vdst, VOPDstS64orS32:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
 >;
-
 } // End usesCustomInserter = 1, Defs = [SCC]
 
 let usesCustomInserter = 1 in {
@@ -169,23 +174,30 @@ def GET_GROUPSTATICSIZE : SPseudoInstSI <(outs SReg_32:$sdst), (ins),
   [(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>;
 } // End let usesCustomInserter = 1, SALU = 1
 
-def S_MOV_B64_term : SPseudoInstSI<(outs SReg_64:$dst),
-   (ins SSrc_b64:$src0)> {
-  let isAsCheapAsAMove = 1;
+// Wrap an instruction by duplicating it, except for setting isTerminator.
+class WrapTerminatorInst<SOP_Pseudo base_inst> : SPseudoInstSI<
+      base_inst.OutOperandList,
+      base_inst.InOperandList> {
+  let Uses = base_inst.Uses;
+  let Defs = base_inst.Defs;
   let isTerminator = 1;
+  let isAsCheapAsAMove = base_inst.isAsCheapAsAMove;
+  let hasSideEffects = base_inst.hasSideEffects;
+  let UseNamedOperandTable = base_inst.UseNamedOperandTable;
+  let CodeSize = base_inst.CodeSize;
 }
 
-def S_XOR_B64_term : SPseudoInstSI<(outs SReg_64:$dst),
-   (ins SSrc_b64:$src0, SSrc_b64:$src1)> {
-  let isAsCheapAsAMove = 1;
-  let isTerminator = 1;
-  let Defs = [SCC];
+let WaveSizePredicate = isWave64 in {
+def S_MOV_B64_term : WrapTerminatorInst<S_MOV_B64>;
+def S_XOR_B64_term : WrapTerminatorInst<S_XOR_B64>;
+def S_ANDN2_B64_term : WrapTerminatorInst<S_ANDN2_B64>;
 }
 
-def S_ANDN2_B64_term : SPseudoInstSI<(outs SReg_64:$dst),
-   (ins SSrc_b64:$src0, SSrc_b64:$src1)> {
-  let isAsCheapAsAMove = 1;
-  let isTerminator = 1;
+let WaveSizePredicate = isWave32 in {
+def S_MOV_B32_term : WrapTerminatorInst<S_MOV_B32>;
+def S_XOR_B32_term : WrapTerminatorInst<S_XOR_B32>;
+def S_OR_B32_term : WrapTerminatorInst<S_OR_B32>;
+def S_ANDN2_B32_term : WrapTerminatorInst<S_ANDN2_B32>;
 }
 
 def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
@@ -195,7 +207,6 @@ def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
   let hasSideEffects = 1;
   let mayLoad = 1;
   let mayStore = 1;
-  let isBarrier = 1;
   let isConvergent = 1;
   let FixedSize = 1;
   let Size = 0;
@@ -222,30 +233,30 @@ let isTerminator = 1 in {
 let OtherPredicates = [EnableLateCFGStructurize] in {
  def SI_NON_UNIFORM_BRCOND_PSEUDO : CFPseudoInstSI <
   (outs),
-  (ins SReg_64:$vcc, brtarget:$target),
+  (ins SReg_1:$vcc, brtarget:$target),
   [(brcond i1:$vcc, bb:$target)]> {
     let Size = 12;
 }
 }
 
 def SI_IF: CFPseudoInstSI <
-  (outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target),
-  [(set i64:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> {
+  (outs SReg_1:$dst), (ins SReg_1:$vcc, brtarget:$target),
+  [(set i1:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> {
   let Constraints = "";
   let Size = 12;
   let hasSideEffects = 1;
 }
 
 def SI_ELSE : CFPseudoInstSI <
-  (outs SReg_64:$dst),
-  (ins SReg_64:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> {
+  (outs SReg_1:$dst),
+  (ins SReg_1:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> {
   let Size = 12;
   let hasSideEffects = 1;
 }
 
 def SI_LOOP : CFPseudoInstSI <
-  (outs), (ins SReg_64:$saved, brtarget:$target),
-  [(AMDGPUloop i64:$saved, bb:$target)], 1, 1> {
+  (outs), (ins SReg_1:$saved, brtarget:$target),
+  [(AMDGPUloop i1:$saved, bb:$target)], 1, 1> {
   let Size = 8;
   let isBranch = 1;
   let hasSideEffects = 1;
@@ -254,8 +265,7 @@ def SI_LOOP : CFPseudoInstSI <
 } // End isTerminator = 1
 
 def SI_END_CF : CFPseudoInstSI <
-  (outs), (ins SReg_64:$saved),
-  [(int_amdgcn_end_cf i64:$saved)], 1, 1> {
+  (outs), (ins SReg_1:$saved), [], 1, 1> {
   let Size = 4;
   let isAsCheapAsAMove = 1;
   let isReMaterializable = 1;
@@ -265,8 +275,7 @@ def SI_END_CF : CFPseudoInstSI <
 }
 
 def SI_IF_BREAK : CFPseudoInstSI <
-  (outs SReg_64:$dst), (ins SReg_64:$vcc, SReg_64:$src),
-  [(set i64:$dst, (int_amdgcn_if_break i1:$vcc, i64:$src))]> {
+  (outs SReg_1:$dst), (ins SReg_1:$vcc, SReg_1:$src), []> {
   let Size = 4;
   let isAsCheapAsAMove = 1;
   let isReMaterializable = 1;
@@ -292,7 +301,7 @@ multiclass PseudoInstKill <dag ins> {
   }
 }
 
-defm SI_KILL_I1 : PseudoInstKill <(ins SSrc_b64:$src, i1imm:$killvalue)>;
+defm SI_KILL_I1 : PseudoInstKill <(ins SCSrc_i1:$src, i1imm:$killvalue)>;
 defm SI_KILL_F32_COND_IMM : PseudoInstKill <(ins VSrc_b32:$src0, i32imm:$src1, i32imm:$cond)>;
 
 let Defs = [EXEC,VCC] in
@@ -311,7 +320,7 @@ def SI_BR_UNDEF : SPseudoInstSI <(outs), (ins sopp_brtarget:$simm16)> {
 }
 
 def SI_PS_LIVE : PseudoInstSI <
-  (outs SReg_64:$dst), (ins),
+  (outs SReg_1:$dst), (ins),
   [(set i1:$dst, (int_amdgcn_ps_live))]> {
   let SALU = 1;
 }
@@ -340,6 +349,15 @@ def SI_INIT_EXEC : SPseudoInstSI <
   let Defs = [EXEC];
   let usesCustomInserter = 1;
   let isAsCheapAsAMove = 1;
+  let WaveSizePredicate = isWave64;
+}
+
+def SI_INIT_EXEC_LO : SPseudoInstSI <
+  (outs), (ins i32imm:$src), []> {
+  let Defs = [EXEC_LO];
+  let usesCustomInserter = 1;
+  let isAsCheapAsAMove = 1;
+  let WaveSizePredicate = isWave32;
 }
 
 def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI <
@@ -374,11 +392,14 @@ def SI_RETURN : SPseudoInstSI <
 // This version is only needed so we can fill in the output regiter in
 // the custom inserter.
 def SI_CALL_ISEL : SPseudoInstSI <
-  (outs), (ins SSrc_b64:$src0), [(AMDGPUcall i64:$src0)]> {
+  (outs), (ins SSrc_b64:$src0, unknown:$callee),
+  [(AMDGPUcall i64:$src0, tglobaladdr:$callee)]> {
   let Size = 4;
   let isCall = 1;
   let SchedRW = [WriteBranch];
   let usesCustomInserter = 1;
+  // TODO: Should really base this on the call target
+  let isConvergent = 1;
 }
 
 // Wrapper around s_swappc_b64 with extra $callee parameter to track
@@ -389,23 +410,14 @@ def SI_CALL : SPseudoInstSI <
   let isCall = 1;
   let UseNamedOperandTable = 1;
   let SchedRW = [WriteBranch];
+  // TODO: Should really base this on the call target
+  let isConvergent = 1;
 }
 
 // Tail call handling pseudo
-def SI_TCRETURN_ISEL : SPseudoInstSI<(outs),
-  (ins SSrc_b64:$src0, i32imm:$fpdiff),
-  [(AMDGPUtc_return i64:$src0, i32:$fpdiff)]> {
-  let isCall = 1;
-  let isTerminator = 1;
-  let isReturn = 1;
-  let isBarrier = 1;
-  let SchedRW = [WriteBranch];
-  let usesCustomInserter = 1;
-}
-
-def SI_TCRETURN : SPseudoInstSI <
-  (outs),
-  (ins SSrc_b64:$src0, unknown:$callee, i32imm:$fpdiff)> {
+def SI_TCRETURN : SPseudoInstSI <(outs),
+  (ins SSrc_b64:$src0, unknown:$callee, i32imm:$fpdiff),
+  [(AMDGPUtc_return i64:$src0, tglobaladdr:$callee, i32:$fpdiff)]> {
   let Size = 4;
   let isCall = 1;
   let isTerminator = 1;
@@ -413,6 +425,8 @@ def SI_TCRETURN : SPseudoInstSI <
   let isBarrier = 1;
   let UseNamedOperandTable = 1;
   let SchedRW = [WriteBranch];
+  // TODO: Should really base this on the call target
+  let isConvergent = 1;
 }
 
 
@@ -424,6 +438,8 @@ def ADJCALLSTACKUP : SPseudoInstSI<
   let FixedSize = 1;
   let hasSideEffects = 1;
   let usesCustomInserter = 1;
+  let SchedRW = [WriteSALU];
+  let Defs = [SCC];
 }
 
 def ADJCALLSTACKDOWN : SPseudoInstSI<
@@ -433,6 +449,8 @@ def ADJCALLSTACKDOWN : SPseudoInstSI<
   let Size = 8; // Worst case. (s_add_u32 + constant)
   let hasSideEffects = 1;
   let usesCustomInserter = 1;
+  let SchedRW = [WriteSALU];
+  let Defs = [SCC];
 }
 
 let Defs = [M0, EXEC, SCC],
@@ -490,9 +508,12 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
 // SI_SPILL_32_* instructions.
 defm SI_SPILL_S32  : SI_SPILL_SGPR <SReg_32>;
 defm SI_SPILL_S64  : SI_SPILL_SGPR <SReg_64>;
+defm SI_SPILL_S96  : SI_SPILL_SGPR <SReg_96>;
 defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
+defm SI_SPILL_S160 : SI_SPILL_SGPR <SReg_160>;
 defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
 defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
+defm SI_SPILL_S1024 : SI_SPILL_SGPR <SReg_1024>;
 
 multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
   let UseNamedOperandTable = 1, VGPRSpill = 1,
@@ -504,7 +525,9 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
       let mayStore = 1;
       let mayLoad = 0;
       // (2 * 4) + (8 * num_subregs) bytes maximum
-      let Size = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
+      int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
+      // Size field is unsigned char and cannot fit more.
+      let Size = !if(!le(MaxSize, 256), MaxSize, 252);
     }
 
     def _RESTORE : VPseudoInstSI <
@@ -515,7 +538,9 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
       let mayLoad = 1;
 
       // (2 * 4) + (8 * num_subregs) bytes maximum
-      let Size = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
+      int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
+      // Size field is unsigned char and cannot fit more.
+      let Size = !if(!le(MaxSize, 256), MaxSize, 252);
     }
   } // End UseNamedOperandTable = 1, VGPRSpill = 1, SchedRW = [WriteVMEM]
 }
@@ -524,21 +549,74 @@ defm SI_SPILL_V32  : SI_SPILL_VGPR <VGPR_32>;
 defm SI_SPILL_V64  : SI_SPILL_VGPR <VReg_64>;
 defm SI_SPILL_V96  : SI_SPILL_VGPR <VReg_96>;
 defm SI_SPILL_V128 : SI_SPILL_VGPR <VReg_128>;
+defm SI_SPILL_V160 : SI_SPILL_VGPR <VReg_160>;
 defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>;
 defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>;
+defm SI_SPILL_V1024 : SI_SPILL_VGPR <VReg_1024>;
+
+multiclass SI_SPILL_AGPR <RegisterClass vgpr_class> {
+  let UseNamedOperandTable = 1, VGPRSpill = 1,
+      Constraints = "@earlyclobber $tmp",
+      SchedRW = [WriteVMEM] in {
+    def _SAVE : VPseudoInstSI <
+      (outs VGPR_32:$tmp),
+      (ins vgpr_class:$vdata, i32imm:$vaddr, SReg_128:$srsrc,
+           SReg_32:$soffset, i32imm:$offset)> {
+      let mayStore = 1;
+      let mayLoad = 0;
+      // (2 * 4) + (16 * num_subregs) bytes maximum
+      int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 4), 8);
+      // Size field is unsigned char and cannot fit more.
+      let Size = !if(!le(MaxSize, 256), MaxSize, 252);
+    }
+
+    def _RESTORE : VPseudoInstSI <
+      (outs vgpr_class:$vdata, VGPR_32:$tmp),
+      (ins i32imm:$vaddr, SReg_128:$srsrc, SReg_32:$soffset,
+           i32imm:$offset)> {
+      let mayStore = 0;
+      let mayLoad = 1;
+
+      // (2 * 4) + (16 * num_subregs) bytes maximum
+      int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 4), 8);
+      // Size field is unsigned char and cannot fit more.
+      let Size = !if(!le(MaxSize, 256), MaxSize, 252);
+    }
+  } // End UseNamedOperandTable = 1, VGPRSpill = 1, SchedRW = [WriteVMEM]
+}
+
+defm SI_SPILL_A32  : SI_SPILL_AGPR <AGPR_32>;
+defm SI_SPILL_A64  : SI_SPILL_AGPR <AReg_64>;
+defm SI_SPILL_A128 : SI_SPILL_AGPR <AReg_128>;
+defm SI_SPILL_A512 : SI_SPILL_AGPR <AReg_512>;
+defm SI_SPILL_A1024 : SI_SPILL_AGPR <AReg_1024>;
 
 def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
   (outs SReg_64:$dst),
   (ins si_ga:$ptr_lo, si_ga:$ptr_hi),
   [(set SReg_64:$dst,
-   (i64 (SIpc_add_rel_offset (tglobaladdr:$ptr_lo), (tglobaladdr:$ptr_hi))))]> {
+      (i64 (SIpc_add_rel_offset tglobaladdr:$ptr_lo, tglobaladdr:$ptr_hi)))]> {
   let Defs = [SCC];
 }
 
+def : GCNPat <
+  (SIpc_add_rel_offset tglobaladdr:$ptr_lo, 0),
+  (SI_PC_ADD_REL_OFFSET $ptr_lo, (i32 0))
+>;
+
 def : GCNPat <
   (AMDGPUinit_exec i64:$src),
   (SI_INIT_EXEC (as_i64imm $src))
->;
+> {
+  let WaveSizePredicate = isWave64;
+}
+
+def : GCNPat <
+  (AMDGPUinit_exec i64:$src),
+  (SI_INIT_EXEC_LO (as_i32imm $src))
+> {
+  let WaveSizePredicate = isWave32;
+}
 
 def : GCNPat <
   (AMDGPUinit_exec_from_input i32:$input, i32:$shift),
@@ -551,7 +629,7 @@ def : GCNPat<
 >;
 
 def : GCNPat<
-  (AMDGPUelse i64:$src, bb:$target),
+  (AMDGPUelse i1:$src, bb:$target),
   (SI_ELSE $src, $target, 0)
 >;
 
@@ -584,7 +662,12 @@ def : Pat <
   // TODO: we could add more variants for other types of conditionals
 
 def : Pat <
-  (int_amdgcn_icmp i1:$src, (i1 0), (i32 33)),
+  (i64 (int_amdgcn_icmp i1:$src, (i1 0), (i32 33))),
+  (COPY $src) // Return the SGPRs representing i1 src
+>;
+
+def : Pat <
+  (i32 (int_amdgcn_icmp i1:$src, (i1 0), (i32 33))),
   (COPY $src) // Return the SGPRs representing i1 src
 >;
 
@@ -592,7 +675,7 @@ def : Pat <
 // VOP1 Patterns
 //===----------------------------------------------------------------------===//
 
-let SubtargetPredicate = isGCN, OtherPredicates = [UnsafeFPMath] in {
+let OtherPredicates = [UnsafeFPMath] in {
 
 //def : RcpPat<V_RCP_F64_e32, f64>;
 //defm : RsqPat<V_RSQ_F64_e32, f64>;
@@ -615,7 +698,7 @@ def : GCNPat <
   (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
 >;
 
-} // End SubtargetPredicate = isGCN, OtherPredicates = [UnsafeFPMath]
+} // End OtherPredicates = [UnsafeFPMath]
 
 
 // f16_to_fp patterns
@@ -706,17 +789,18 @@ def : FMADModsPat<V_MAD_F16, AMDGPUfmad_ftz, f16> {
   let SubtargetPredicate = Has16BitInsts;
 }
 
-multiclass SelectPat <ValueType vt, Instruction inst> {
+multiclass SelectPat <ValueType vt> {
   def : GCNPat <
-    (vt (select i1:$src0, vt:$src1, vt:$src2)),
-    (inst $src2, $src1, $src0)
+    (vt (select i1:$src0, (VOP3Mods_f32 vt:$src1, i32:$src1_mods),
+                          (VOP3Mods_f32 vt:$src2, i32:$src2_mods))),
+    (V_CNDMASK_B32_e64 $src2_mods, $src2, $src1_mods, $src1, $src0)
   >;
 }
 
-defm : SelectPat <i16, V_CNDMASK_B32_e64>;
-defm : SelectPat <i32, V_CNDMASK_B32_e64>;
-defm : SelectPat <f16, V_CNDMASK_B32_e64>;
-defm : SelectPat <f32, V_CNDMASK_B32_e64>;
+defm : SelectPat <i16>;
+defm : SelectPat <i32>;
+defm : SelectPat <f16>;
+defm : SelectPat <f32>;
 
 let AddedComplexity = 1 in {
 def : GCNPat <
@@ -749,6 +833,22 @@ foreach Index = 0-2 in {
   >;
 }
 
+foreach Index = 0-2 in {
+  def Extract_Element_v3i32_#Index : Extract_Element <
+    i32, v3i32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v3i32_#Index : Insert_Element <
+    i32, v3i32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+
+  def Extract_Element_v3f32_#Index : Extract_Element <
+    f32, v3f32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v3f32_#Index : Insert_Element <
+    f32, v3f32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+}
+
 foreach Index = 0-3 in {
   def Extract_Element_v4i32_#Index : Extract_Element <
     i32, v4i32, Index, !cast<SubRegIndex>(sub#Index)
@@ -765,6 +865,22 @@ foreach Index = 0-3 in {
   >;
 }
 
+foreach Index = 0-4 in {
+  def Extract_Element_v5i32_#Index : Extract_Element <
+    i32, v5i32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v5i32_#Index : Insert_Element <
+    i32, v5i32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+
+  def Extract_Element_v5f32_#Index : Extract_Element <
+    f32, v5f32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v5f32_#Index : Insert_Element <
+    f32, v5f32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+}
+
 foreach Index = 0-7 in {
   def Extract_Element_v8i32_#Index : Extract_Element <
     i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
@@ -818,7 +934,23 @@ def : Pat <
   (v2f16 (EXTRACT_SUBREG v4f16:$vec, sub1))
 >;
 
-let SubtargetPredicate = isGCN in {
+foreach Index = 0-31 in {
+  def Extract_Element_v32i32_#Index : Extract_Element <
+    i32, v32i32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+
+  def Insert_Element_v32i32_#Index : Insert_Element <
+    i32, v32i32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+
+  def Extract_Element_v32f32_#Index : Extract_Element <
+    f32, v32f32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+
+  def Insert_Element_v32f32_#Index : Insert_Element <
+    f32, v32f32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+}
 
 // FIXME: Why do only some of these type combinations for SReg and
 // VReg?
@@ -882,6 +1014,10 @@ def : BitConvert <i64, v4f16, VReg_64>;
 def : BitConvert <v4i32, v4f32, VReg_128>;
 def : BitConvert <v4f32, v4i32, VReg_128>;
 
+// 96-bit bitcast
+def : BitConvert <v3i32, v3f32, SGPR_96>;
+def : BitConvert <v3f32, v3i32, SGPR_96>;
+
 // 128-bit bitcast
 def : BitConvert <v2i64, v4i32, SReg_128>;
 def : BitConvert <v4i32, v2i64, SReg_128>;
@@ -892,6 +1028,10 @@ def : BitConvert <v4i32, v2f64, VReg_128>;
 def : BitConvert <v2i64, v2f64, VReg_128>;
 def : BitConvert <v2f64, v2i64, VReg_128>;
 
+// 160-bit bitcast
+def : BitConvert <v5i32, v5f32, SGPR_160>;
+def : BitConvert <v5f32, v5i32, SGPR_160>;
+
 // 256-bit bitcast
 def : BitConvert <v8i32, v8f32, SReg_256>;
 def : BitConvert <v8f32, v8i32, SReg_256>;
@@ -902,7 +1042,9 @@ def : BitConvert <v8f32, v8i32, VReg_256>;
 def : BitConvert <v16i32, v16f32, VReg_512>;
 def : BitConvert <v16f32, v16i32, VReg_512>;
 
-} // End SubtargetPredicate = isGCN
+// 1024-bit bitcast
+def : BitConvert <v32i32, v32f32, VReg_1024>;
+def : BitConvert <v32f32, v32i32, VReg_1024>;
 
 /********** =================== **********/
 /********** Src & Dst modifiers **********/
@@ -1070,6 +1212,16 @@ def : GCNPat <
   (S_MOV_B32 imm:$imm)
 >;
 
+def : GCNPat <
+  (VGPRImm<(SIlds tglobaladdr:$ga)>),
+  (V_MOV_B32_e32 $ga)
+>;
+
+def : GCNPat <
+  (SIlds tglobaladdr:$ga),
+  (S_MOV_B32 $ga)
+>;
+
 // FIXME: Workaround for ordering issue with peephole optimizer where
 // a register class copy interferes with immediate folding.  Should
 // use s_mov_b32, which can be shrunk to s_movk_i32
@@ -1104,7 +1256,16 @@ def : GCNPat <
 def : GCNPat <
   (i1 imm:$imm),
   (S_MOV_B64 (i64 (as_i64imm $imm)))
->;
+> {
+  let WaveSizePredicate = isWave64;
+}
+
+def : GCNPat <
+  (i1 imm:$imm),
+  (S_MOV_B32 (i32 (as_i32imm $imm)))
+> {
+  let WaveSizePredicate = isWave32;
+}
 
 def : GCNPat <
   (f64 InlineFPImm<f64>:$imm),
@@ -1115,18 +1276,18 @@ def : GCNPat <
 /********** Intrinsic Patterns **********/
 /********** ================== **********/
 
-let SubtargetPredicate = isGCN in {
 def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
-}
 
 def : GCNPat <
   (i32 (sext i1:$src0)),
-  (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
+  (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+                     /*src1mod*/(i32 0), /*src1*/(i32 -1), $src0)
 >;
 
 class Ext32Pat <SDNode ext> : GCNPat <
   (i32 (ext i1:$src0)),
-  (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0)
+  (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+                     /*src1mod*/(i32 0), /*src1*/(i32 1), $src0)
 >;
 
 def : Ext32Pat <zext>;
@@ -1144,8 +1305,6 @@ def : GCNPat <
 // VOP3 Patterns
 //===----------------------------------------------------------------------===//
 
-let SubtargetPredicate = isGCN in {
-
 def : IMad24Pat<V_MAD_I32_I24, 1>;
 def : UMad24Pat<V_MAD_U32_U24, 1>;
 
@@ -1153,8 +1312,6 @@ def : UMad24Pat<V_MAD_U32_U24, 1>;
 defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
 def : ROTRPattern <V_ALIGNBIT_B32>;
 
-}
-
 def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
           (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
                           (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
@@ -1261,8 +1418,9 @@ def : GCNPat <
 class ZExt_i64_i1_Pat <SDNode ext> : GCNPat <
   (i64 (ext i1:$src)),
     (REG_SEQUENCE VReg_64,
-      (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0,
-      (S_MOV_B32 (i32 0)), sub1)
+      (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+                         /*src1mod*/(i32 0), /*src1*/(i32 1), $src),
+      sub0, (S_MOV_B32 (i32 0)), sub1)
 >;
 
 
@@ -1280,8 +1438,10 @@ def : GCNPat <
 def : GCNPat <
   (i64 (sext i1:$src)),
   (REG_SEQUENCE VReg_64,
-    (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub0,
-    (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub1)
+    (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+                       /*src1mod*/(i32 0), /*src1*/(i32 -1), $src), sub0,
+    (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+                       /*src1mod*/(i32 0), /*src1*/(i32 -1), $src), sub1)
 >;
 
 class FPToI1Pat<Instruction Inst, int KOne, ValueType kone_type, ValueType vt, SDPatternOperator fp_to_int> : GCNPat <
@@ -1296,10 +1456,12 @@ def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_NEG_ONE, i64, f64, fp_to_sint>;
 
 // If we need to perform a logical operation on i1 values, we need to
 // use vector comparisons since there is only one SCC register. Vector
-// comparisons still write to a pair of SGPRs, so treat these as
-// 64-bit comparisons. When legalizing SGPR copies, instructions
-// resulting in the copies from SCC to these instructions will be
-// moved to the VALU.
+// comparisons may write to a pair of SGPRs or a single SGPR, so treat
+// these as 32 or 64-bit comparisons. When legalizing SGPR copies,
+// instructions resulting in the copies from SCC to these instructions
+// will be moved to the VALU.
+
+let WaveSizePredicate = isWave64 in {
 def : GCNPat <
   (i1 (and i1:$src0, i1:$src1)),
   (S_AND_B64 $src0, $src1)
@@ -1336,35 +1498,89 @@ def : GCNPat <
   (S_NOT_B64 $src0)
 >;
 }
+} // end isWave64
+
+let WaveSizePredicate = isWave32 in {
+def : GCNPat <
+  (i1 (and i1:$src0, i1:$src1)),
+  (S_AND_B32 $src0, $src1)
+>;
+
+def : GCNPat <
+  (i1 (or i1:$src0, i1:$src1)),
+  (S_OR_B32 $src0, $src1)
+>;
+
+def : GCNPat <
+  (i1 (xor i1:$src0, i1:$src1)),
+  (S_XOR_B32 $src0, $src1)
+>;
+
+def : GCNPat <
+  (i1 (add i1:$src0, i1:$src1)),
+  (S_XOR_B32 $src0, $src1)
+>;
+
+def : GCNPat <
+  (i1 (sub i1:$src0, i1:$src1)),
+  (S_XOR_B32 $src0, $src1)
+>;
+
+let AddedComplexity = 1 in {
+def : GCNPat <
+  (i1 (add i1:$src0, (i1 -1))),
+  (S_NOT_B32 $src0)
+>;
+
+def : GCNPat <
+  (i1 (sub i1:$src0, (i1 -1))),
+  (S_NOT_B32 $src0)
+>;
+}
+} // end isWave32
 
 def : GCNPat <
   (f16 (sint_to_fp i1:$src)),
-  (V_CVT_F16_F32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src))
+  (V_CVT_F16_F32_e32 (
+      V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+                        /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
+                        $src))
 >;
 
 def : GCNPat <
   (f16 (uint_to_fp i1:$src)),
-  (V_CVT_F16_F32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src))
+  (V_CVT_F16_F32_e32 (
+      V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+                        /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
+                        $src))
 >;
 
 def : GCNPat <
   (f32 (sint_to_fp i1:$src)),
-  (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src)
+  (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+                        /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
+                        $src)
 >;
 
 def : GCNPat <
   (f32 (uint_to_fp i1:$src)),
-  (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src)
+  (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+                        /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
+                        $src)
 >;
 
 def : GCNPat <
   (f64 (sint_to_fp i1:$src)),
-  (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src))
+  (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+                                        /*src1mod*/(i32 0), /*src1*/(i32 -1),
+                                        $src))
 >;
 
 def : GCNPat <
   (f64 (uint_to_fp i1:$src)),
-  (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src))
+  (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+                                        /*src1mod*/(i32 0), /*src1*/(i32 1),
+                                        $src))
 >;
 
 //===----------------------------------------------------------------------===//
@@ -1417,7 +1633,7 @@ def : GCNPat<
 
 def : GCNPat<
   (fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))),
-  (V_PK_MUL_F16 0, (i32 CONST.V2FP16_ONE), $src_mods, $src, DSTCLAMP.NONE)
+  (V_PK_MUL_F16 0, (i32 CONST.FP16_ONE), $src_mods, $src, DSTCLAMP.NONE)
 >;
 }
 
@@ -1478,6 +1694,14 @@ def : GCNPat <
 >;
 } // End OtherPredicates = [HasDLInsts]
 
+let SubtargetPredicate = isGFX10Plus in
+def : GCNPat <
+  (fma (f16 (VOP3Mods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
+       (f16 (VOP3Mods f32:$src1, i32:$src1_modifiers)),
+       (f16 (VOP3NoMods f32:$src2))),
+  (V_FMAC_F16_e64 $src0_modifiers, $src0, $src1_modifiers, $src1,
+                  SRCMODS.NONE, $src2, $clamp, $omod)
+>;
 
 // Allow integer inputs
 class ExpPattern<SDPatternOperator node, ValueType vt, Instruction Inst> : GCNPat<
@@ -1568,7 +1792,7 @@ def : GCNPat <
 // Fract Patterns
 //===----------------------------------------------------------------------===//
 
-let SubtargetPredicate = isSI in {
+let SubtargetPredicate = isGFX6 in {
 
 // V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) is
 // used instead. However, SI doesn't have V_FLOOR_F64, so the most efficient
@@ -1595,7 +1819,7 @@ def : GCNPat <
       DSTCLAMP.NONE, DSTOMOD.NONE)
 >;
 
-} // End SubtargetPredicates = isSI
+} // End SubtargetPredicates = isGFX6
 
 //============================================================================//
 // Miscellaneous Optimization Patterns
@@ -1609,6 +1833,13 @@ def : GCNPat<
   (S_SUB_I32 $src0, NegSubInlineConst32:$src1)
 >;
 
+// Avoid pointlessly materializing a constant in VGPR.
+// FIXME: Should also do this for readlane, but tablegen crashes on
+// the ignored src1.
+def : GCNPat<
+  (int_amdgcn_readfirstlane (i32 imm:$src)),
+  (S_MOV_B32 $src)
+>;
 
 multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
   def : GCNPat <
@@ -1622,8 +1853,6 @@ multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
   >;
 }
 
-let SubtargetPredicate = isGCN in {
-
 defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
 // FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
 
@@ -1633,8 +1862,6 @@ defm : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64, SReg_64>;
 defm : IntMed3Pat<V_MED3_I32, smin, smax, smin_oneuse, smax_oneuse>;
 defm : IntMed3Pat<V_MED3_U32, umin, umax, umin_oneuse, umax_oneuse>;
 
-}
-
 // This matches 16 permutations of
 // max(min(x, y), min(max(x, y), z))
 class FPMed3Pat<ValueType vt,
@@ -1683,8 +1910,8 @@ multiclass Int16Med3Pat<Instruction med3Inst,
 
 def : FPMed3Pat<f32, V_MED3_F32>;
 
-let OtherPredicates = [isGFX9] in {
+let OtherPredicates = [isGFX9Plus] in {
 def : FP16Med3Pat<f16, V_MED3_F16>;
 defm : Int16Med3Pat<V_MED3_I16, smin, smax, smax_oneuse, smin_oneuse>;
 defm : Int16Med3Pat<V_MED3_U16, umin, umax, umax_oneuse, umin_oneuse>;
-} // End Predicates = [isGFX9]
+} // End Predicates = [isGFX9Plus]
diff --git a/lib/Target/AMDGPU/SIIntrinsics.td b/lib/Target/AMDGPU/SIIntrinsics.td
deleted file mode 100644
index e51ff4b4bc50..000000000000
--- a/lib/Target/AMDGPU/SIIntrinsics.td
+++ /dev/null
@@ -1,19 +0,0 @@
-//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Backend internal SI Intrinsic Definitions. User code should not
-// directly use these.
-//
-//===----------------------------------------------------------------------===//
-
-
-let TargetPrefix = "SI", isTarget = 1 in {
-  def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
-
-} // End TargetPrefix = "SI", isTarget = 1
diff --git a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index be291b127301..ae8b967893a2 100644
--- a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -1,9 +1,8 @@
 //===- SILoadStoreOptimizer.cpp -------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -132,6 +131,8 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
     bool GLC1;
     bool SLC0;
     bool SLC1;
+    bool DLC0;
+    bool DLC1;
     bool UseST64;
     SmallVector<MachineInstr *, 8> InstsToMove;
   };
@@ -257,13 +258,11 @@ static void addDefsUsesToList(const MachineInstr &MI,
 
 static bool memAccessesCanBeReordered(MachineBasicBlock::iterator A,
                                       MachineBasicBlock::iterator B,
-                                      const SIInstrInfo *TII,
                                       AliasAnalysis *AA) {
   // RAW or WAR - cannot reorder
   // WAW - cannot reorder
   // RAR - safe to reorder
-  return !(A->mayStore() || B->mayStore()) ||
-         TII->areMemAccessesTriviallyDisjoint(*A, *B, AA);
+  return !(A->mayStore() || B->mayStore()) || !A->mayAlias(AA, *B, true);
 }
 
 // Add MI and its defs to the lists if MI reads one of the defs that are
@@ -282,6 +281,7 @@ static bool addToListsIfDependent(MachineInstr &MI, DenseSet<unsigned> &RegDefs,
     // registers are in SSA form.
     if (Use.isReg() &&
         ((Use.readsReg() && RegDefs.count(Use.getReg())) ||
+         (Use.isDef() && RegDefs.count(Use.getReg())) ||
          (Use.isDef() && TargetRegisterInfo::isPhysicalRegister(Use.getReg()) &&
           PhysRegUses.count(Use.getReg())))) {
       Insts.push_back(&MI);
@@ -295,13 +295,13 @@ static bool addToListsIfDependent(MachineInstr &MI, DenseSet<unsigned> &RegDefs,
 
 static bool canMoveInstsAcrossMemOp(MachineInstr &MemOp,
                                     ArrayRef<MachineInstr *> InstsToMove,
-                                    const SIInstrInfo *TII, AliasAnalysis *AA) {
+                                    AliasAnalysis *AA) {
   assert(MemOp.mayLoadOrStore());
 
   for (MachineInstr *InstToMove : InstsToMove) {
     if (!InstToMove->mayLoadOrStore())
       continue;
-    if (!memAccessesCanBeReordered(MemOp, *InstToMove, TII, AA))
+    if (!memAccessesCanBeReordered(MemOp, *InstToMove, AA))
       return false;
   }
   return true;
@@ -326,7 +326,7 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
   if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) {
     return (EltOffset0 + CI.Width0 == EltOffset1 ||
             EltOffset1 + CI.Width1 == EltOffset0) &&
-           CI.GLC0 == CI.GLC1 &&
+           CI.GLC0 == CI.GLC1 && CI.DLC0 == CI.DLC1 &&
            (CI.InstClass == S_BUFFER_LOAD_IMM || CI.SLC0 == CI.SLC1);
   }
 
@@ -567,8 +567,8 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
       }
 
       if (MBBI->mayLoadOrStore() &&
-          (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) ||
-           !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))) {
+          (!memAccessesCanBeReordered(*CI.I, *MBBI, AA) ||
+           !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA))) {
         // We fail condition #1, but we may still be able to satisfy condition
         // #2.  Add this instruction to the move list and then we will check
         // if condition #2 holds once we have selected the matching instruction.
@@ -640,6 +640,8 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
           CI.SLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::slc)->getImm();
           CI.SLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::slc)->getImm();
         }
+        CI.DLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::dlc)->getImm();
+        CI.DLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::dlc)->getImm();
       }
 
       // Check both offsets fit in the reduced range.
@@ -647,7 +649,7 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
       // move and make sure they are all safe to move down past the merged
       // instruction.
       if (widthsFit(*STM, CI) && offsetsCanBeCombined(CI))
-        if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
+        if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA))
           return true;
     }
 
@@ -656,8 +658,8 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
     // it was safe to move I and also all the instruction in InstsToMove
     // down past this instruction.
     // check if we can move I across MBBI and if we can move all I's users
-    if (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) ||
-        !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
+    if (!memAccessesCanBeReordered(*CI.I, *MBBI, AA) ||
+        !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA))
       break;
   }
   return false;
@@ -726,7 +728,8 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI) {
 
     TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg)
         .addReg(ImmReg)
-        .addReg(AddrReg->getReg(), 0, BaseSubReg);
+        .addReg(AddrReg->getReg(), 0, BaseSubReg)
+        .addImm(0); // clamp bit
     BaseSubReg = 0;
   }
 
@@ -819,7 +822,8 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI) {
 
     TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg)
         .addReg(ImmReg)
-        .addReg(AddrReg->getReg(), 0, BaseSubReg);
+        .addReg(AddrReg->getReg(), 0, BaseSubReg)
+        .addImm(0); // clamp bit
     BaseSubReg = 0;
   }
 
@@ -858,6 +862,7 @@ SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) {
       .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
       .addImm(MergedOffset) // offset
       .addImm(CI.GLC0)      // glc
+      .addImm(CI.DLC0)      // dlc
       .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
 
   std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
@@ -910,6 +915,7 @@ SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) {
       .addImm(CI.GLC0)      // glc
       .addImm(CI.SLC0)      // slc
       .addImm(0)            // tfe
+      .addImm(CI.DLC0)      // dlc
       .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
 
   std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
@@ -1089,9 +1095,10 @@ SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI) {
   MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
       .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
       .addImm(std::min(CI.Offset0, CI.Offset1)) // offset
-      .addImm(CI.GLC0)                          // glc
-      .addImm(CI.SLC0)                          // slc
-      .addImm(0)                                // tfe
+      .addImm(CI.GLC0)      // glc
+      .addImm(CI.SLC0)      // slc
+      .addImm(0)            // tfe
+      .addImm(CI.DLC0)      // dlc
       .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
 
   moveInstsAfter(MIB, CI.InstsToMove);
@@ -1137,9 +1144,10 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
   MachineOperand OffsetLo = createRegOrImm(static_cast<int32_t>(Addr.Offset), MI);
   MachineOperand OffsetHi =
     createRegOrImm(static_cast<int32_t>(Addr.Offset >> 32), MI);
-  unsigned CarryReg = MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
-  unsigned DeadCarryReg =
-    MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+
+  const auto *CarryRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
+  unsigned CarryReg = MRI->createVirtualRegister(CarryRC);
+  unsigned DeadCarryReg = MRI->createVirtualRegister(CarryRC);
 
   unsigned DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   unsigned DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -1147,7 +1155,8 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
     BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_I32_e64), DestSub0)
       .addReg(CarryReg, RegState::Define)
       .addReg(Addr.Base.LoReg, 0, Addr.Base.LoSubReg)
-    .add(OffsetLo);
+      .add(OffsetLo)
+      .addImm(0); // clamp bit
   (void)LoHalf;
   LLVM_DEBUG(dbgs() << "    "; LoHalf->dump(););
 
@@ -1156,7 +1165,8 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
     .addReg(DeadCarryReg, RegState::Define | RegState::Dead)
     .addReg(Addr.Base.HiReg, 0, Addr.Base.HiSubReg)
     .add(OffsetHi)
-    .addReg(CarryReg, RegState::Kill);
+    .addReg(CarryReg, RegState::Kill)
+    .addImm(0); // clamp bit
   (void)HiHalf;
   LLVM_DEBUG(dbgs() << "    "; HiHalf->dump(););
 
diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 1aa1feebbdae..78f409cd9555 100644
--- a/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -1,9 +1,8 @@
 //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -83,6 +82,16 @@ private:
   LiveIntervals *LIS = nullptr;
   MachineRegisterInfo *MRI = nullptr;
 
+  const TargetRegisterClass *BoolRC = nullptr;
+  unsigned AndOpc;
+  unsigned OrOpc;
+  unsigned XorOpc;
+  unsigned MovTermOpc;
+  unsigned Andn2TermOpc;
+  unsigned XorTermrOpc;
+  unsigned OrSaveExecOpc;
+  unsigned Exec;
+
   void emitIf(MachineInstr &MI);
   void emitElse(MachineInstr &MI);
   void emitIfBreak(MachineInstr &MI);
@@ -176,7 +185,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
   assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister &&
          Cond.getSubReg() == AMDGPU::NoSubRegister);
 
-  unsigned SaveExecReg = SaveExec.getReg();
+  Register SaveExecReg = SaveExec.getReg();
 
   MachineOperand &ImpDefSCC = MI.getOperand(4);
   assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());
@@ -188,26 +197,26 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
 
   // Add an implicit def of exec to discourage scheduling VALU after this which
   // will interfere with trying to form s_and_saveexec_b64 later.
-  unsigned CopyReg = SimpleIf ? SaveExecReg
-                       : MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  Register CopyReg = SimpleIf ? SaveExecReg
+                       : MRI->createVirtualRegister(BoolRC);
   MachineInstr *CopyExec =
     BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), CopyReg)
-    .addReg(AMDGPU::EXEC)
-    .addReg(AMDGPU::EXEC, RegState::ImplicitDefine);
+    .addReg(Exec)
+    .addReg(Exec, RegState::ImplicitDefine);
 
-  unsigned Tmp = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  unsigned Tmp = MRI->createVirtualRegister(BoolRC);
 
   MachineInstr *And =
-    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_AND_B64), Tmp)
+    BuildMI(MBB, I, DL, TII->get(AndOpc), Tmp)
     .addReg(CopyReg)
-    //.addReg(AMDGPU::EXEC)
-    .addReg(Cond.getReg());
+    .add(Cond);
+
   setImpSCCDefDead(*And, true);
 
   MachineInstr *Xor = nullptr;
   if (!SimpleIf) {
     Xor =
-      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_XOR_B64), SaveExecReg)
+      BuildMI(MBB, I, DL, TII->get(XorOpc), SaveExecReg)
       .addReg(Tmp)
       .addReg(CopyReg);
     setImpSCCDefDead(*Xor, ImpDefSCC.isDead());
@@ -216,7 +225,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
   // Use a copy that is a terminator to get correct spill code placement it with
   // fast regalloc.
   MachineInstr *SetExec =
-    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64_term), AMDGPU::EXEC)
+    BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec)
     .addReg(Tmp, RegState::Kill);
 
   // Insert a pseudo terminator to help keep the verifier happy. This will also
@@ -240,7 +249,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
   LIS->InsertMachineInstrInMaps(*SetExec);
   LIS->InsertMachineInstrInMaps(*NewBr);
 
-  LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI));
+  LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
   MI.eraseFromParent();
 
   // FIXME: Is there a better way of adjusting the liveness? It shouldn't be
@@ -257,7 +266,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
   MachineBasicBlock &MBB = *MI.getParent();
   const DebugLoc &DL = MI.getDebugLoc();
 
-  unsigned DstReg = MI.getOperand(0).getReg();
+  Register DstReg = MI.getOperand(0).getReg();
   assert(MI.getOperand(0).getSubReg() == AMDGPU::NoSubRegister);
 
   bool ExecModified = MI.getOperand(3).getImm() != 0;
@@ -266,17 +275,17 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
   // We are running before TwoAddressInstructions, and si_else's operands are
   // tied. In order to correctly tie the registers, split this into a copy of
   // the src like it does.
-  unsigned CopyReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  Register CopyReg = MRI->createVirtualRegister(BoolRC);
   MachineInstr *CopyExec =
     BuildMI(MBB, Start, DL, TII->get(AMDGPU::COPY), CopyReg)
       .add(MI.getOperand(1)); // Saved EXEC
 
   // This must be inserted before phis and any spill code inserted before the
   // else.
-  unsigned SaveReg = ExecModified ?
-    MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass) : DstReg;
+  Register SaveReg = ExecModified ?
+    MRI->createVirtualRegister(BoolRC) : DstReg;
   MachineInstr *OrSaveExec =
-    BuildMI(MBB, Start, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), SaveReg)
+    BuildMI(MBB, Start, DL, TII->get(OrSaveExecOpc), SaveReg)
     .addReg(CopyReg);
 
   MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();
@@ -285,8 +294,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
 
   if (ExecModified) {
     MachineInstr *And =
-      BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_AND_B64), DstReg)
-      .addReg(AMDGPU::EXEC)
+      BuildMI(MBB, ElsePt, DL, TII->get(AndOpc), DstReg)
+      .addReg(Exec)
       .addReg(SaveReg);
 
     if (LIS)
@@ -294,8 +303,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
   }
 
   MachineInstr *Xor =
-    BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC)
-    .addReg(AMDGPU::EXEC)
+    BuildMI(MBB, ElsePt, DL, TII->get(XorTermrOpc), Exec)
+    .addReg(Exec)
     .addReg(DstReg);
 
   MachineInstr *Branch =
@@ -324,7 +333,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
     LIS->createAndComputeVirtRegInterval(SaveReg);
 
   // Let this be recomputed.
-  LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI));
+  LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
 }
 
 void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
@@ -348,14 +357,14 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
   // exit" mask.
   MachineInstr *And = nullptr, *Or = nullptr;
   if (!SkipAnding) {
-    And = BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_B64), Dst)
-             .addReg(AMDGPU::EXEC)
+    And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), Dst)
+             .addReg(Exec)
              .add(MI.getOperand(1));
-    Or = BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+    Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
              .addReg(Dst)
              .add(MI.getOperand(2));
   } else
-    Or = BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+    Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
              .add(MI.getOperand(1))
              .add(MI.getOperand(2));
 
@@ -373,8 +382,8 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
   const DebugLoc &DL = MI.getDebugLoc();
 
   MachineInstr *AndN2 =
-      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64_term), AMDGPU::EXEC)
-          .addReg(AMDGPU::EXEC)
+      BuildMI(MBB, &MI, DL, TII->get(Andn2TermOpc), Exec)
+          .addReg(Exec)
           .add(MI.getOperand(0));
 
   MachineInstr *Branch =
@@ -395,8 +404,8 @@ void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
 
   MachineBasicBlock::iterator InsPt = MBB.begin();
   MachineInstr *NewMI =
-      BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
-          .addReg(AMDGPU::EXEC)
+      BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec)
+          .addReg(Exec)
           .add(MI.getOperand(0));
 
   if (LIS)
@@ -428,13 +437,13 @@ void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo,
   // does not really modify exec.
   for (auto I = Def->getIterator(); I != MI.getIterator(); ++I)
     if (I->modifiesRegister(AMDGPU::EXEC, TRI) &&
-        !(I->isCopy() && I->getOperand(0).getReg() != AMDGPU::EXEC))
+        !(I->isCopy() && I->getOperand(0).getReg() != Exec))
       return;
 
   for (const auto &SrcOp : Def->explicit_operands())
     if (SrcOp.isReg() && SrcOp.isUse() &&
         (TargetRegisterInfo::isVirtualRegister(SrcOp.getReg()) ||
-        SrcOp.getReg() == AMDGPU::EXEC))
+        SrcOp.getReg() == Exec))
       Src.push_back(SrcOp);
 }
 
@@ -472,6 +481,27 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
   // This doesn't actually need LiveIntervals, but we can preserve them.
   LIS = getAnalysisIfAvailable<LiveIntervals>();
   MRI = &MF.getRegInfo();
+  BoolRC = TRI->getBoolRC();
+
+  if (ST.isWave32()) {
+    AndOpc = AMDGPU::S_AND_B32;
+    OrOpc = AMDGPU::S_OR_B32;
+    XorOpc = AMDGPU::S_XOR_B32;
+    MovTermOpc = AMDGPU::S_MOV_B32_term;
+    Andn2TermOpc = AMDGPU::S_ANDN2_B32_term;
+    XorTermrOpc = AMDGPU::S_XOR_B32_term;
+    OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B32;
+    Exec = AMDGPU::EXEC_LO;
+  } else {
+    AndOpc = AMDGPU::S_AND_B64;
+    OrOpc = AMDGPU::S_OR_B64;
+    XorOpc = AMDGPU::S_XOR_B64;
+    MovTermOpc = AMDGPU::S_MOV_B64_term;
+    Andn2TermOpc = AMDGPU::S_ANDN2_B64_term;
+    XorTermrOpc = AMDGPU::S_XOR_B64_term;
+    OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B64;
+    Exec = AMDGPU::EXEC;
+  }
 
   MachineFunction::iterator NextBB;
   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
@@ -508,6 +538,8 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
 
       case AMDGPU::S_AND_B64:
       case AMDGPU::S_OR_B64:
+      case AMDGPU::S_AND_B32:
+      case AMDGPU::S_OR_B32:
         // Cleanup bit manipulations on exec mask
         combineMasks(MI);
         Last = I;
diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp
index eb038bb5d5fc..1c0f836f07e6 100644
--- a/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -1,15 +1,14 @@
 //===-- SILowerI1Copies.cpp - Lower I1 Copies -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This pass lowers all occurrences of i1 values (with a vreg_1 register class)
-// to lane masks (64-bit scalar registers). The pass assumes machine SSA form
-// and a wave-level control flow graph.
+// to lane masks (32 / 64-bit scalar registers). The pass assumes machine SSA
+// form and a wave-level control flow graph.
 //
 // Before this pass, values that are semantically i1 and are defined and used
 // within the same basic block are already represented as lane masks in scalar
@@ -51,6 +50,7 @@ public:
   static char ID;
 
 private:
+  bool IsWave32 = false;
   MachineFunction *MF = nullptr;
   MachineDominatorTree *DT = nullptr;
   MachinePostDominatorTree *PDT = nullptr;
@@ -58,6 +58,14 @@ private:
   const GCNSubtarget *ST = nullptr;
   const SIInstrInfo *TII = nullptr;
 
+  unsigned ExecReg;
+  unsigned MovOp;
+  unsigned AndOp;
+  unsigned OrOp;
+  unsigned XorOp;
+  unsigned AndN2Op;
+  unsigned OrN2Op;
+
   DenseSet<unsigned> ConstrainRegs;
 
 public:
@@ -87,6 +95,11 @@ private:
   MachineBasicBlock::iterator
   getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
 
+  bool isVreg1(unsigned Reg) const {
+    return TargetRegisterInfo::isVirtualRegister(Reg) &&
+           MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass;
+  }
+
   bool isLaneMaskReg(unsigned Reg) const {
     return TII->getRegisterInfo().isSGPRReg(*MRI, Reg) &&
            TII->getRegisterInfo().getRegSizeInBits(Reg, *MRI) ==
@@ -412,8 +425,10 @@ FunctionPass *llvm::createSILowerI1CopiesPass() {
 }
 
 static unsigned createLaneMaskReg(MachineFunction &MF) {
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   MachineRegisterInfo &MRI = MF.getRegInfo();
-  return MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  return MRI.createVirtualRegister(ST.isWave32() ? &AMDGPU::SReg_32RegClass
+                                                 : &AMDGPU::SReg_64RegClass);
 }
 
 static unsigned insertUndefLaneMask(MachineBasicBlock &MBB) {
@@ -443,13 +458,32 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &TheMF) {
 
   ST = &MF->getSubtarget<GCNSubtarget>();
   TII = ST->getInstrInfo();
+  IsWave32 = ST->isWave32();
+
+  if (IsWave32) {
+    ExecReg = AMDGPU::EXEC_LO;
+    MovOp = AMDGPU::S_MOV_B32;
+    AndOp = AMDGPU::S_AND_B32;
+    OrOp = AMDGPU::S_OR_B32;
+    XorOp = AMDGPU::S_XOR_B32;
+    AndN2Op = AMDGPU::S_ANDN2_B32;
+    OrN2Op = AMDGPU::S_ORN2_B32;
+  } else {
+    ExecReg = AMDGPU::EXEC;
+    MovOp = AMDGPU::S_MOV_B64;
+    AndOp = AMDGPU::S_AND_B64;
+    OrOp = AMDGPU::S_OR_B64;
+    XorOp = AMDGPU::S_XOR_B64;
+    AndN2Op = AMDGPU::S_ANDN2_B64;
+    OrN2Op = AMDGPU::S_ORN2_B64;
+  }
 
   lowerCopiesFromI1();
   lowerPhis();
   lowerCopiesToI1();
 
   for (unsigned Reg : ConstrainRegs)
-    MRI->constrainRegClass(Reg, &AMDGPU::SReg_64_XEXECRegClass);
+    MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass);
   ConstrainRegs.clear();
 
   return true;
@@ -465,13 +499,10 @@ void SILowerI1Copies::lowerCopiesFromI1() {
 
       unsigned DstReg = MI.getOperand(0).getReg();
       unsigned SrcReg = MI.getOperand(1).getReg();
-      if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
-          MRI->getRegClass(SrcReg) != &AMDGPU::VReg_1RegClass)
+      if (!isVreg1(SrcReg))
         continue;
 
-      if (isLaneMaskReg(DstReg) ||
-          (TargetRegisterInfo::isVirtualRegister(DstReg) &&
-           MRI->getRegClass(DstReg) == &AMDGPU::VReg_1RegClass))
+      if (isLaneMaskReg(DstReg) || isVreg1(DstReg))
         continue;
 
       // Copy into a 32-bit vector register.
@@ -483,6 +514,8 @@ void SILowerI1Copies::lowerCopiesFromI1() {
 
       ConstrainRegs.insert(SrcReg);
       BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+          .addImm(0)
+          .addImm(0)
           .addImm(0)
           .addImm(-1)
           .addReg(SrcReg);
@@ -503,18 +536,22 @@ void SILowerI1Copies::lowerPhis() {
   SmallVector<MachineBasicBlock *, 4> IncomingBlocks;
   SmallVector<unsigned, 4> IncomingRegs;
   SmallVector<unsigned, 4> IncomingUpdated;
+#ifndef NDEBUG
+  DenseSet<unsigned> PhiRegisters;
+#endif
 
   for (MachineBasicBlock &MBB : *MF) {
     LF.initialize(MBB);
 
     for (MachineInstr &MI : MBB.phis()) {
       unsigned DstReg = MI.getOperand(0).getReg();
-      if (MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass)
+      if (!isVreg1(DstReg))
         continue;
 
       LLVM_DEBUG(dbgs() << "Lower PHI: " << MI);
 
-      MRI->setRegClass(DstReg, &AMDGPU::SReg_64RegClass);
+      MRI->setRegClass(DstReg, IsWave32 ? &AMDGPU::SReg_32RegClass
+                                        : &AMDGPU::SReg_64RegClass);
 
       // Collect incoming values.
       for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
@@ -525,18 +562,22 @@ void SILowerI1Copies::lowerPhis() {
 
         if (IncomingDef->getOpcode() == AMDGPU::COPY) {
           IncomingReg = IncomingDef->getOperand(1).getReg();
-          assert(isLaneMaskReg(IncomingReg));
+          assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
           assert(!IncomingDef->getOperand(1).getSubReg());
         } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) {
           continue;
         } else {
-          assert(IncomingDef->isPHI());
+          assert(IncomingDef->isPHI() || PhiRegisters.count(IncomingReg));
         }
 
         IncomingBlocks.push_back(IncomingMBB);
         IncomingRegs.push_back(IncomingReg);
       }
 
+#ifndef NDEBUG
+      PhiRegisters.insert(DstReg);
+#endif
+
       // Phis in a loop that are observed outside the loop receive a simple but
       // conservatively correct treatment.
       MachineBasicBlock *PostDomBound = &MBB;
@@ -629,8 +670,7 @@ void SILowerI1Copies::lowerCopiesToI1() {
         continue;
 
       unsigned DstReg = MI.getOperand(0).getReg();
-      if (!TargetRegisterInfo::isVirtualRegister(DstReg) ||
-          MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass)
+      if (!isVreg1(DstReg))
         continue;
 
       if (MRI->use_empty(DstReg)) {
@@ -640,7 +680,8 @@ void SILowerI1Copies::lowerCopiesToI1() {
 
       LLVM_DEBUG(dbgs() << "Lower Other: " << MI);
 
-      MRI->setRegClass(DstReg, &AMDGPU::SReg_64RegClass);
+      MRI->setRegClass(DstReg, IsWave32 ? &AMDGPU::SReg_32RegClass
+                                        : &AMDGPU::SReg_64RegClass);
       if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF)
         continue;
 
@@ -649,7 +690,7 @@ void SILowerI1Copies::lowerCopiesToI1() {
       assert(!MI.getOperand(1).getSubReg());
 
       if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
-          !isLaneMaskReg(SrcReg)) {
+          (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
         assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32);
         unsigned TmpReg = createLaneMaskReg(*MF);
         BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64), TmpReg)
@@ -699,7 +740,7 @@ bool SILowerI1Copies::isConstantLaneMask(unsigned Reg, bool &Val) const {
       return false;
   }
 
-  if (MI->getOpcode() != AMDGPU::S_MOV_B64)
+  if (MI->getOpcode() != MovOp)
     return false;
 
   if (!MI->getOperand(1).isImm())
@@ -774,10 +815,10 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
     if (PrevVal == CurVal) {
       BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(CurReg);
     } else if (CurVal) {
-      BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(AMDGPU::EXEC);
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(ExecReg);
     } else {
-      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_XOR_B64), DstReg)
-          .addReg(AMDGPU::EXEC)
+      BuildMI(MBB, I, DL, TII->get(XorOp), DstReg)
+          .addReg(ExecReg)
           .addImm(-1);
     }
     return;
@@ -790,9 +831,9 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
       PrevMaskedReg = PrevReg;
     } else {
       PrevMaskedReg = createLaneMaskReg(*MF);
-      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ANDN2_B64), PrevMaskedReg)
+      BuildMI(MBB, I, DL, TII->get(AndN2Op), PrevMaskedReg)
           .addReg(PrevReg)
-          .addReg(AMDGPU::EXEC);
+          .addReg(ExecReg);
     }
   }
   if (!CurConstant) {
@@ -801,9 +842,9 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
       CurMaskedReg = CurReg;
     } else {
       CurMaskedReg = createLaneMaskReg(*MF);
-      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_AND_B64), CurMaskedReg)
+      BuildMI(MBB, I, DL, TII->get(AndOp), CurMaskedReg)
           .addReg(CurReg)
-          .addReg(AMDGPU::EXEC);
+          .addReg(ExecReg);
     }
   }
 
@@ -814,12 +855,12 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
     BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg)
         .addReg(PrevMaskedReg);
   } else if (PrevConstant && PrevVal) {
-    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ORN2_B64), DstReg)
+    BuildMI(MBB, I, DL, TII->get(OrN2Op), DstReg)
         .addReg(CurMaskedReg)
-        .addReg(AMDGPU::EXEC);
+        .addReg(ExecReg);
   } else {
-    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_OR_B64), DstReg)
+    BuildMI(MBB, I, DL, TII->get(OrOp), DstReg)
         .addReg(PrevMaskedReg)
-        .addReg(CurMaskedReg ? CurMaskedReg : (unsigned)AMDGPU::EXEC);
+        .addReg(CurMaskedReg ? CurMaskedReg : ExecReg);
   }
 }
diff --git a/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
new file mode 100644
index 000000000000..a82047473370
--- /dev/null
+++ b/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -0,0 +1,323 @@
+//===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
+// SGPR spills, so must insert CSR SGPR spills as well as expand them.
+//
+// This pass must never create new SGPR virtual registers.
+//
+// FIXME: Must stop RegScavenger spills in later passes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-lower-sgpr-spills"
+
+using MBBVector = SmallVector<MachineBasicBlock *, 4>;
+
+namespace {
+
+static cl::opt<bool> EnableSpillVGPRToAGPR(
+  "amdgpu-spill-vgpr-to-agpr",
+  cl::desc("Enable spilling VGPRs to AGPRs"),
+  cl::ReallyHidden,
+  cl::init(true));
+
+class SILowerSGPRSpills : public MachineFunctionPass {
+private:
+  const SIRegisterInfo *TRI = nullptr;
+  const SIInstrInfo *TII = nullptr;
+  VirtRegMap *VRM = nullptr;
+  LiveIntervals *LIS = nullptr;
+
+  // Save and Restore blocks of the current function. Typically there is a
+  // single save block, unless Windows EH funclets are involved.
+  MBBVector SaveBlocks;
+  MBBVector RestoreBlocks;
+
+public:
+  static char ID;
+
+  SILowerSGPRSpills() : MachineFunctionPass(ID) {}
+
+  void calculateSaveRestoreBlocks(MachineFunction &MF);
+  bool spillCalleeSavedRegs(MachineFunction &MF);
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
+} // end anonymous namespace
+
+char SILowerSGPRSpills::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE,
+                      "SI lower SGPR spill instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE,
+                    "SI lower SGPR spill instructions", false, false)
+
+char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID;
+
+/// Insert restore code for the callee-saved registers used in the function.
+static void insertCSRSaves(MachineBasicBlock &SaveBlock,
+                           ArrayRef<CalleeSavedInfo> CSI,
+                           LiveIntervals *LIS) {
+  MachineFunction &MF = *SaveBlock.getParent();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+  MachineBasicBlock::iterator I = SaveBlock.begin();
+  if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
+    for (const CalleeSavedInfo &CS : CSI) {
+      // Insert the spill to the stack frame.
+      unsigned Reg = CS.getReg();
+
+      MachineInstrSpan MIS(I, &SaveBlock);
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+
+      TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
+                              TRI);
+
+      if (LIS) {
+        assert(std::distance(MIS.begin(), I) == 1);
+        MachineInstr &Inst = *std::prev(I);
+
+        LIS->InsertMachineInstrInMaps(Inst);
+        LIS->removeAllRegUnitsForPhysReg(Reg);
+      }
+    }
+  }
+}
+
+/// Insert restore code for the callee-saved registers used in the function.
+static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
+                              std::vector<CalleeSavedInfo> &CSI,
+                              LiveIntervals *LIS) {
+  MachineFunction &MF = *RestoreBlock.getParent();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+  // Restore all registers immediately before the return and any
+  // terminators that precede it.
+  MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
+
+  // FIXME: Just emit the readlane/writelane directly
+  if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
+    for (const CalleeSavedInfo &CI : reverse(CSI)) {
+      unsigned Reg = CI.getReg();
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+
+      TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
+      assert(I != RestoreBlock.begin() &&
+             "loadRegFromStackSlot didn't insert any code!");
+      // Insert in reverse order.  loadRegFromStackSlot can insert
+      // multiple instructions.
+
+      if (LIS) {
+        MachineInstr &Inst = *std::prev(I);
+        LIS->InsertMachineInstrInMaps(Inst);
+        LIS->removeAllRegUnitsForPhysReg(Reg);
+      }
+    }
+  }
+}
+
+/// Compute the sets of entry and return blocks for saving and restoring
+/// callee-saved registers, and placing prolog and epilog code.
+void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+  // Even when we do not change any CSR, we still want to insert the
+  // prologue and epilogue of the function.
+  // So set the save points for those.
+
+  // Use the points found by shrink-wrapping, if any.
+  if (MFI.getSavePoint()) {
+    SaveBlocks.push_back(MFI.getSavePoint());
+    assert(MFI.getRestorePoint() && "Both restore and save must be set");
+    MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
+    // If RestoreBlock does not have any successor and is not a return block
+    // then the end point is unreachable and we do not need to insert any
+    // epilogue.
+    if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
+      RestoreBlocks.push_back(RestoreBlock);
+    return;
+  }
+
+  // Save refs to entry and return blocks.
+  SaveBlocks.push_back(&MF.front());
+  for (MachineBasicBlock &MBB : MF) {
+    if (MBB.isEHFuncletEntry())
+      SaveBlocks.push_back(&MBB);
+    if (MBB.isReturnBlock())
+      RestoreBlocks.push_back(&MBB);
+  }
+}
+
+bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const Function &F = MF.getFunction();
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const SIFrameLowering *TFI = ST.getFrameLowering();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  RegScavenger *RS = nullptr;
+
+  // Determine which of the registers in the callee save list should be saved.
+  BitVector SavedRegs;
+  TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
+
+  // Add the code to save and restore the callee saved registers.
+  if (!F.hasFnAttribute(Attribute::Naked)) {
+    // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
+    // necessary for verifier liveness checks.
+    MFI.setCalleeSavedInfoValid(true);
+
+    std::vector<CalleeSavedInfo> CSI;
+    const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
+
+    for (unsigned I = 0; CSRegs[I]; ++I) {
+      unsigned Reg = CSRegs[I];
+      if (SavedRegs.test(Reg)) {
+        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+        int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
+                                           TRI->getSpillAlignment(*RC),
+                                           true);
+
+        CSI.push_back(CalleeSavedInfo(Reg, JunkFI));
+      }
+    }
+
+    if (!CSI.empty()) {
+      for (MachineBasicBlock *SaveBlock : SaveBlocks)
+        insertCSRSaves(*SaveBlock, CSI, LIS);
+
+      for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
+        insertCSRRestores(*RestoreBlock, CSI, LIS);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  TII = ST.getInstrInfo();
+  TRI = &TII->getRegisterInfo();
+
+  VRM = getAnalysisIfAvailable<VirtRegMap>();
+
+  assert(SaveBlocks.empty() && RestoreBlocks.empty());
+
+  // First, expose any CSR SGPR spills. This is mostly the same as what PEI
+  // does, but somewhat simpler.
+  calculateSaveRestoreBlocks(MF);
+  bool HasCSRs = spillCalleeSavedRegs(MF);
+
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  if (!MFI.hasStackObjects() && !HasCSRs) {
+    SaveBlocks.clear();
+    RestoreBlocks.clear();
+    return false;
+  }
+
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+  const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
+    && EnableSpillVGPRToAGPR;
+
+  bool MadeChange = false;
+
+  const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts();
+
+  // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
+  // handled as SpilledToReg in regular PrologEpilogInserter.
+  if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) ||
+      SpillVGPRToAGPR) {
+    // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
+    // are spilled to VGPRs, in which case we can eliminate the stack usage.
+    //
+    // This operates under the assumption that only other SGPR spills are users
+    // of the frame index.
+    for (MachineBasicBlock &MBB : MF) {
+      MachineBasicBlock::iterator Next;
+      for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
+        MachineInstr &MI = *I;
+        Next = std::next(I);
+
+        if (SpillToAGPR && TII->isVGPRSpill(MI)) {
+          // Try to eliminate stack used by VGPR spills before frame
+          // finalization.
+          unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+                                                     AMDGPU::OpName::vaddr);
+          int FI = MI.getOperand(FIOp).getIndex();
+          unsigned VReg = TII->getNamedOperand(MI, AMDGPU::OpName::vdata)
+            ->getReg();
+          if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
+                                                TRI->isAGPR(MRI, VReg))) {
+            TRI->eliminateFrameIndex(MI, 0, FIOp, nullptr);
+            continue;
+          }
+        }
+
+        if (!TII->isSGPRSpill(MI))
+          continue;
+
+        int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
+        assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
+        if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
+          bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr);
+          (void)Spilled;
+          assert(Spilled && "failed to spill SGPR to VGPR when allocated");
+        }
+      }
+    }
+
+    for (MachineBasicBlock &MBB : MF) {
+      for (auto SSpill : FuncInfo->getSGPRSpillVGPRs())
+        MBB.addLiveIn(SSpill.VGPR);
+
+      for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
+        MBB.addLiveIn(Reg);
+
+      for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
+        MBB.addLiveIn(Reg);
+
+      MBB.sortUniqueLiveIns();
+    }
+
+    MadeChange = true;
+  }
+
+  SaveBlocks.clear();
+  RestoreBlocks.clear();
+
+  return MadeChange;
+}
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 181cc41bd5ff..46da974a2f45 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -29,6 +28,7 @@ using namespace llvm;
 
 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
   : AMDGPUMachineFunction(MF),
+    Mode(MF.getFunction()),
     PrivateSegmentBuffer(false),
     DispatchPtr(false),
     QueuePtr(false),
@@ -46,7 +46,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
     ImplicitBufferPtr(false),
     ImplicitArgPtr(false),
     GITPtrHigh(0xffffffff),
-    HighBitsOf32BitAddress(0) {
+    HighBitsOf32BitAddress(0),
+    GDSSize(0) {
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const Function &F = MF.getFunction();
   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
@@ -69,8 +70,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
     // Non-entry functions have no special inputs for now, other registers
     // required for scratch access.
     ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
-    ScratchWaveOffsetReg = AMDGPU::SGPR4;
-    FrameOffsetReg = AMDGPU::SGPR5;
+    ScratchWaveOffsetReg = AMDGPU::SGPR33;
+
+    // TODO: Pick a high register, and shift down, similar to a kernel.
+    FrameOffsetReg = AMDGPU::SGPR34;
     StackPtrOffsetReg = AMDGPU::SGPR32;
 
     ArgInfo.PrivateSegmentBuffer =
@@ -88,33 +91,23 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
     }
   }
 
-  if (ST.debuggerEmitPrologue()) {
-    // Enable everything.
+  if (F.hasFnAttribute("amdgpu-work-group-id-x"))
     WorkGroupIDX = true;
-    WorkGroupIDY = true;
-    WorkGroupIDZ = true;
-    WorkItemIDX = true;
-    WorkItemIDY = true;
-    WorkItemIDZ = true;
-  } else {
-    if (F.hasFnAttribute("amdgpu-work-group-id-x"))
-      WorkGroupIDX = true;
 
-    if (F.hasFnAttribute("amdgpu-work-group-id-y"))
-      WorkGroupIDY = true;
+  if (F.hasFnAttribute("amdgpu-work-group-id-y"))
+    WorkGroupIDY = true;
 
-    if (F.hasFnAttribute("amdgpu-work-group-id-z"))
-      WorkGroupIDZ = true;
+  if (F.hasFnAttribute("amdgpu-work-group-id-z"))
+    WorkGroupIDZ = true;
 
-    if (F.hasFnAttribute("amdgpu-work-item-id-x"))
-      WorkItemIDX = true;
+  if (F.hasFnAttribute("amdgpu-work-item-id-x"))
+    WorkItemIDX = true;
 
-    if (F.hasFnAttribute("amdgpu-work-item-id-y"))
-      WorkItemIDY = true;
+  if (F.hasFnAttribute("amdgpu-work-item-id-y"))
+    WorkItemIDY = true;
 
-    if (F.hasFnAttribute("amdgpu-work-item-id-z"))
-      WorkItemIDZ = true;
-  }
+  if (F.hasFnAttribute("amdgpu-work-item-id-z"))
+    WorkItemIDZ = true;
 
   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
   bool HasStackObjects = FrameInfo.hasStackObjects();
@@ -154,9 +147,20 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
     KernargSegmentPtr = true;
 
   if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
+    auto hasNonSpillStackObjects = [&]() {
+      // Avoid expensive checking if there's no stack objects.
+      if (!HasStackObjects)
+        return false;
+      for (auto OI = FrameInfo.getObjectIndexBegin(),
+                OE = FrameInfo.getObjectIndexEnd(); OI != OE; ++OI)
+        if (!FrameInfo.isSpillSlotObjectIndex(OI))
+          return true;
+      // All stack objects are spill slots.
+      return false;
+    };
     // TODO: This could be refined a lot. The attribute is a poor way of
     // detecting calls that may require it before argument lowering.
-    if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
+    if (hasNonSpillStackObjects() || F.hasFnAttribute("amdgpu-flat-scratch"))
       FlatScratchInit = true;
   }
 
@@ -169,6 +173,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
   S = A.getValueAsString();
   if (!S.empty())
     S.consumeInteger(0, HighBitsOf32BitAddress);
+
+  S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
+  if (!S.empty())
+    S.consumeInteger(0, GDSSize);
 }
 
 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
@@ -239,6 +247,17 @@ static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
   return false;
 }
 
+/// \p returns true if \p NumLanes slots are available in VGPRs already used for
+/// SGPR spilling.
+//
+// FIXME: This only works after processFunctionBeforeFrameFinalized
+bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
+                                                      unsigned NumNeed) const {
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  unsigned WaveSize = ST.getWavefrontSize();
+  return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
+}
+
 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
                                                     int FI) {
@@ -260,7 +279,7 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
 
   int NumLanes = Size / 4;
 
-  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+  const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
 
   // Make sure to handle the case where a wide SGPR spill may span between two
   // VGPRs.
@@ -300,26 +319,92 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
   return true;
 }
 
-void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
-  for (auto &R : SGPRToVGPRSpills)
-    MFI.RemoveStackObject(R.first);
+/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
+/// Either AGPR is spilled to VGPR to vice versa.
+/// Returns true if a \p FI can be eliminated completely.
+bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
+                                                    int FI,
+                                                    bool isAGPRtoVGPR) {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+  const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
+
+  assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
+
+  auto &Spill = VGPRToAGPRSpills[FI];
+
+  // This has already been allocated.
+  if (!Spill.Lanes.empty())
+    return Spill.FullyAllocated;
+
+  unsigned Size = FrameInfo.getObjectSize(FI);
+  unsigned NumLanes = Size / 4;
+  Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
+
+  const TargetRegisterClass &RC =
+      isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
+  auto Regs = RC.getRegisters();
+
+  auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  Spill.FullyAllocated = true;
+
+  // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
+  // once.
+  BitVector OtherUsedRegs;
+  OtherUsedRegs.resize(TRI->getNumRegs());
+
+  const uint32_t *CSRMask =
+      TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
+  if (CSRMask)
+    OtherUsedRegs.setBitsInMask(CSRMask);
+
+  // TODO: Should include register tuples, but doesn't matter with current
+  // usage.
+  for (MCPhysReg Reg : SpillAGPR)
+    OtherUsedRegs.set(Reg);
+  for (MCPhysReg Reg : SpillVGPR)
+    OtherUsedRegs.set(Reg);
+
+  SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
+  for (unsigned I = 0; I < NumLanes; ++I) {
+    NextSpillReg = std::find_if(
+        NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
+          return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
+                 !OtherUsedRegs[Reg];
+        });
+
+    if (NextSpillReg == Regs.end()) { // Registers exhausted
+      Spill.FullyAllocated = false;
+      break;
+    }
+
+    OtherUsedRegs.set(*NextSpillReg);
+    SpillRegs.push_back(*NextSpillReg);
+    Spill.Lanes[I] = *NextSpillReg++;
+  }
+
+  return Spill.FullyAllocated;
 }
 
+void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
+  // The FP spill hasn't been inserted yet, so keep it around.
+  for (auto &R : SGPRToVGPRSpills) {
+    if (R.first != FramePointerSaveIndex)
+      MFI.RemoveStackObject(R.first);
+  }
 
-/// \returns VGPR used for \p Dim' work item ID.
-unsigned SIMachineFunctionInfo::getWorkItemIDVGPR(unsigned Dim) const {
-  switch (Dim) {
-  case 0:
-    assert(hasWorkItemIDX());
-    return AMDGPU::VGPR0;
-  case 1:
-    assert(hasWorkItemIDY());
-    return AMDGPU::VGPR1;
-  case 2:
-    assert(hasWorkItemIDZ());
-    return AMDGPU::VGPR2;
+  // All other SPGRs must be allocated on the default stack, so reset the stack
+  // ID.
+  for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
+       ++i)
+    if (i != FramePointerSaveIndex)
+      MFI.setStackID(i, TargetStackID::Default);
+
+  for (auto &R : VGPRToAGPRSpills) {
+    if (R.second.FullyAllocated)
+      MFI.RemoveStackObject(R.first);
   }
-  llvm_unreachable("unexpected dimension");
 }
 
 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
@@ -330,3 +415,97 @@ MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
 }
+
+static yaml::StringValue regToString(unsigned Reg,
+                                     const TargetRegisterInfo &TRI) {
+  yaml::StringValue Dest;
+  {
+    raw_string_ostream OS(Dest.Value);
+    OS << printReg(Reg, &TRI);
+  }
+  return Dest;
+}
+
+static Optional<yaml::SIArgumentInfo>
+convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
+                    const TargetRegisterInfo &TRI) {
+  yaml::SIArgumentInfo AI;
+
+  auto convertArg = [&](Optional<yaml::SIArgument> &A,
+                        const ArgDescriptor &Arg) {
+    if (!Arg)
+      return false;
+
+    // Create a register or stack argument.
+    yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
+    if (Arg.isRegister()) {
+      raw_string_ostream OS(SA.RegisterName.Value);
+      OS << printReg(Arg.getRegister(), &TRI);
+    } else
+      SA.StackOffset = Arg.getStackOffset();
+    // Check and update the optional mask.
+    if (Arg.isMasked())
+      SA.Mask = Arg.getMask();
+
+    A = SA;
+    return true;
+  };
+
+  bool Any = false;
+  Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
+  Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
+  Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
+  Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
+  Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
+  Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
+  Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
+  Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
+  Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
+  Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
+  Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
+  Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
+                    ArgInfo.PrivateSegmentWaveByteOffset);
+  Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
+  Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
+  Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
+  Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
+  Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
+
+  if (Any)
+    return AI;
+
+  return None;
+}
+
+yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
+  const llvm::SIMachineFunctionInfo& MFI,
+  const TargetRegisterInfo &TRI)
+  : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
+    MaxKernArgAlign(MFI.getMaxKernArgAlign()),
+    LDSSize(MFI.getLDSSize()),
+    IsEntryFunction(MFI.isEntryFunction()),
+    NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
+    MemoryBound(MFI.isMemoryBound()),
+    WaveLimiter(MFI.needsWaveLimiter()),
+    ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
+    ScratchWaveOffsetReg(regToString(MFI.getScratchWaveOffsetReg(), TRI)),
+    FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
+    StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
+    ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)),
+    Mode(MFI.getMode()) {}
+
+void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
+  MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
+}
+
+bool SIMachineFunctionInfo::initializeBaseYamlFields(
+  const yaml::SIMachineFunctionInfo &YamlMFI) {
+  ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
+  MaxKernArgAlign = YamlMFI.MaxKernArgAlign;
+  LDSSize = YamlMFI.LDSSize;
+  IsEntryFunction = YamlMFI.IsEntryFunction;
+  NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
+  MemoryBound = YamlMFI.MemoryBound;
+  WaveLimiter = YamlMFI.WaveLimiter;
+  return false;
+}
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index ef91d1e43075..f19b20ceb5da 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -1,9 +1,8 @@
 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,13 +15,16 @@
 
 #include "AMDGPUArgumentUsageInfo.h"
 #include "AMDGPUMachineFunction.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIInstrInfo.h"
 #include "SIRegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
@@ -38,12 +40,19 @@ class MachineFrameInfo;
 class MachineFunction;
 class TargetRegisterClass;
 
-class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
+class AMDGPUPseudoSourceValue : public PseudoSourceValue {
 public:
-  // TODO: Is the img rsrc useful?
-  explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
-    PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
+  enum AMDGPUPSVKind : unsigned {
+    PSVBuffer = PseudoSourceValue::TargetCustom,
+    PSVImage,
+    GWSResource
+  };
+
+protected:
+  AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
+      : PseudoSourceValue(Kind, TII) {}
 
+public:
   bool isConstant(const MachineFrameInfo *) const override {
     // This should probably be true for most images, but we will start by being
     // conservative.
@@ -59,29 +68,250 @@ public:
   }
 };
 
-class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
+class AMDGPUBufferPseudoSourceValue final : public AMDGPUPseudoSourceValue {
 public:
-  explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) :
-    PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
+  explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII)
+      : AMDGPUPseudoSourceValue(PSVBuffer, TII) {}
 
-  bool isConstant(const MachineFrameInfo *) const override {
-    // This should probably be true for most images, but we will start by being
-    // conservative.
-    return false;
+  static bool classof(const PseudoSourceValue *V) {
+    return V->kind() == PSVBuffer;
   }
+};
 
+class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue {
+public:
+  // TODO: Is the img rsrc useful?
+  explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII)
+      : AMDGPUPseudoSourceValue(PSVImage, TII) {}
+
+  static bool classof(const PseudoSourceValue *V) {
+    return V->kind() == PSVImage;
+  }
+};
+
+class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
+public:
+  explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII)
+      : AMDGPUPseudoSourceValue(GWSResource, TII) {}
+
+  static bool classof(const PseudoSourceValue *V) {
+    return V->kind() == GWSResource;
+  }
+
+  // These are inaccessible memory from IR.
   bool isAliased(const MachineFrameInfo *) const override {
-    return true;
+    return false;
   }
 
+  // These are inaccessible memory from IR.
   bool mayAlias(const MachineFrameInfo *) const override {
-    return true;
+    return false;
+  }
+
+  void printCustom(raw_ostream &OS) const override {
+    OS << "GWSResource";
+  }
+};
+
+namespace yaml {
+
+struct SIArgument {
+  bool IsRegister;
+  union {
+    StringValue RegisterName;
+    unsigned StackOffset;
+  };
+  Optional<unsigned> Mask;
+
+  // Default constructor, which creates a stack argument.
+  SIArgument() : IsRegister(false), StackOffset(0) {}
+  SIArgument(const SIArgument &Other) {
+    IsRegister = Other.IsRegister;
+    if (IsRegister) {
+      ::new ((void *)std::addressof(RegisterName))
+          StringValue(Other.RegisterName);
+    } else
+      StackOffset = Other.StackOffset;
+    Mask = Other.Mask;
+  }
+  SIArgument &operator=(const SIArgument &Other) {
+    IsRegister = Other.IsRegister;
+    if (IsRegister) {
+      ::new ((void *)std::addressof(RegisterName))
+          StringValue(Other.RegisterName);
+    } else
+      StackOffset = Other.StackOffset;
+    Mask = Other.Mask;
+    return *this;
+  }
+  ~SIArgument() {
+    if (IsRegister)
+      RegisterName.~StringValue();
+  }
+
+  // Helper to create a register or stack argument.
+  static inline SIArgument createArgument(bool IsReg) {
+    if (IsReg)
+      return SIArgument(IsReg);
+    return SIArgument();
+  }
+
+private:
+  // Construct a register argument.
+  SIArgument(bool) : IsRegister(true), RegisterName() {}
+};
+
+template <> struct MappingTraits<SIArgument> {
+  static void mapping(IO &YamlIO, SIArgument &A) {
+    if (YamlIO.outputting()) {
+      if (A.IsRegister)
+        YamlIO.mapRequired("reg", A.RegisterName);
+      else
+        YamlIO.mapRequired("offset", A.StackOffset);
+    } else {
+      auto Keys = YamlIO.keys();
+      if (is_contained(Keys, "reg")) {
+        A = SIArgument::createArgument(true);
+        YamlIO.mapRequired("reg", A.RegisterName);
+      } else if (is_contained(Keys, "offset"))
+        YamlIO.mapRequired("offset", A.StackOffset);
+      else
+        YamlIO.setError("missing required key 'reg' or 'offset'");
+    }
+    YamlIO.mapOptional("mask", A.Mask);
+  }
+  static const bool flow = true;
+};
+
+struct SIArgumentInfo {
+  Optional<SIArgument> PrivateSegmentBuffer;
+  Optional<SIArgument> DispatchPtr;
+  Optional<SIArgument> QueuePtr;
+  Optional<SIArgument> KernargSegmentPtr;
+  Optional<SIArgument> DispatchID;
+  Optional<SIArgument> FlatScratchInit;
+  Optional<SIArgument> PrivateSegmentSize;
+
+  Optional<SIArgument> WorkGroupIDX;
+  Optional<SIArgument> WorkGroupIDY;
+  Optional<SIArgument> WorkGroupIDZ;
+  Optional<SIArgument> WorkGroupInfo;
+  Optional<SIArgument> PrivateSegmentWaveByteOffset;
+
+  Optional<SIArgument> ImplicitArgPtr;
+  Optional<SIArgument> ImplicitBufferPtr;
+
+  Optional<SIArgument> WorkItemIDX;
+  Optional<SIArgument> WorkItemIDY;
+  Optional<SIArgument> WorkItemIDZ;
+};
+
+template <> struct MappingTraits<SIArgumentInfo> {
+  static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
+    YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer);
+    YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr);
+    YamlIO.mapOptional("queuePtr", AI.QueuePtr);
+    YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr);
+    YamlIO.mapOptional("dispatchID", AI.DispatchID);
+    YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
+    YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
+
+    YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
+    YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
+    YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ);
+    YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo);
+    YamlIO.mapOptional("privateSegmentWaveByteOffset",
+                       AI.PrivateSegmentWaveByteOffset);
+
+    YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr);
+    YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr);
+
+    YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX);
+    YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY);
+    YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ);
+  }
+};
+
+// Default to default mode for default calling convention.
+struct SIMode {
+  bool IEEE = true;
+  bool DX10Clamp = true;
+
+  SIMode() = default;
+
+
+  SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
+    IEEE = Mode.IEEE;
+    DX10Clamp = Mode.DX10Clamp;
   }
+
+  bool operator ==(const SIMode Other) const {
+    return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
+  }
+};
+
+template <> struct MappingTraits<SIMode> {
+  static void mapping(IO &YamlIO, SIMode &Mode) {
+    YamlIO.mapOptional("ieee", Mode.IEEE, true);
+    YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
+  }
+};
+
+struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
+  uint64_t ExplicitKernArgSize = 0;
+  unsigned MaxKernArgAlign = 0;
+  unsigned LDSSize = 0;
+  bool IsEntryFunction = false;
+  bool NoSignedZerosFPMath = false;
+  bool MemoryBound = false;
+  bool WaveLimiter = false;
+
+  StringValue ScratchRSrcReg = "$private_rsrc_reg";
+  StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg";
+  StringValue FrameOffsetReg = "$fp_reg";
+  StringValue StackPtrOffsetReg = "$sp_reg";
+
+  Optional<SIArgumentInfo> ArgInfo;
+  SIMode Mode;
+
+  SIMachineFunctionInfo() = default;
+  SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
+                        const TargetRegisterInfo &TRI);
+
+  void mappingImpl(yaml::IO &YamlIO) override;
+  ~SIMachineFunctionInfo() = default;
 };
 
+template <> struct MappingTraits<SIMachineFunctionInfo> {
+  static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
+    YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
+                       UINT64_C(0));
+    YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
+    YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
+    YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
+    YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
+    YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
+    YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
+    YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
+                       StringValue("$private_rsrc_reg"));
+    YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg,
+                       StringValue("$scratch_wave_offset_reg"));
+    YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
+                       StringValue("$fp_reg"));
+    YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
+                       StringValue("$sp_reg"));
+    YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
+    YamlIO.mapOptional("mode", MFI.Mode, SIMode());
+  }
+};
+
+} // end namespace yaml
+
 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
 /// tells the hardware which interpolation parameters to load.
 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
+  friend class GCNTargetMachine;
+
   unsigned TIDReg = AMDGPU::NoRegister;
 
   // Registers that may be reserved for spilling purposes. These may be the same
@@ -99,6 +329,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
 
   AMDGPUFunctionArgInfo ArgInfo;
 
+  // State of MODE register, assumed FP mode.
+  AMDGPU::SIModeRegisterDefaults Mode;
+
   // Graphics info.
   unsigned PSInputAddr = 0;
   unsigned PSInputEnable = 0;
@@ -124,16 +357,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
   // unit. Minimum - first, maximum - second.
   std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
 
-  // Stack object indices for work group IDs.
-  std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}};
-
-  // Stack object indices for work item IDs.
-  std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
-
   DenseMap<const Value *,
            std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
   DenseMap<const Value *,
            std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
+  std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
 
 private:
   unsigned LDSWaveSpillSize = 0;
@@ -182,6 +410,7 @@ private:
   unsigned GITPtrHigh;
 
   unsigned HighBitsOf32BitAddress;
+  unsigned GDSSize;
 
   // Current recorded maximum possible occupancy.
   unsigned Occupancy;
@@ -213,6 +442,15 @@ public:
     SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
   };
 
+  struct VGPRSpillToAGPR {
+    SmallVector<MCPhysReg, 32> Lanes;
+    bool FullyAllocated = false;
+  };
+
+  SparseBitVector<> WWMReservedRegs;
+
+  void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); }
+
 private:
   // SGPR->VGPR spilling support.
   using SpillRegMask = std::pair<unsigned, unsigned>;
@@ -223,9 +461,25 @@ private:
   unsigned NumVGPRSpillLanes = 0;
   SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
 
+  DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
+
+  // AGPRs used for VGPR spills.
+  SmallVector<MCPhysReg, 32> SpillAGPR;
+
+  // VGPRs used for AGPR spills.
+  SmallVector<MCPhysReg, 32> SpillVGPR;
+
+public: // FIXME
+  /// If this is set, an SGPR used for save/restore of the register used for the
+  /// frame pointer.
+  unsigned SGPRForFPSaveRestoreCopy = 0;
+  Optional<int> FramePointerSaveIndex;
+
 public:
   SIMachineFunctionInfo(const MachineFunction &MF);
 
+  bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI);
+
   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
     auto I = SGPRToVGPRSpills.find(FrameIndex);
     return (I == SGPRToVGPRSpills.end()) ?
@@ -236,8 +490,29 @@ public:
     return SpillVGPRs;
   }
 
+  ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
+    return SpillAGPR;
+  }
+
+  ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const {
+    return SpillVGPR;
+  }
+
+  MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
+    auto I = VGPRToAGPRSpills.find(FrameIndex);
+    return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
+                                         : I->second.Lanes[Lane];
+  }
+
+  AMDGPU::SIModeRegisterDefaults getMode() const {
+    return Mode;
+  }
+
+  bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
+                                 unsigned NumLane) const;
   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
-  void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
+  bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
+  void removeDeadFrameIndices(MachineFrameInfo &MFI);
 
   bool hasCalculatedTID() const { return TIDReg != 0; };
   unsigned getTIDReg() const { return TIDReg; };
@@ -386,8 +661,9 @@ public:
     return ArgInfo.getPreloadedValue(Value);
   }
 
-  unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
-    return ArgInfo.getPreloadedValue(Value).first->getRegister();
+  Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
+    auto Arg = ArgInfo.getPreloadedValue(Value).first;
+    return Arg ? Arg->getRegister() : Register();
   }
 
   unsigned getGITPtrHigh() const {
@@ -398,6 +674,10 @@ public:
     return HighBitsOf32BitAddress;
   }
 
+  unsigned getGDSSize() const {
+    return GDSSize;
+  }
+
   unsigned getNumUserSGPRs() const {
     return NumUserSGPRs;
   }
@@ -429,6 +709,11 @@ public:
     return FrameOffsetReg;
   }
 
+  void setFrameOffsetReg(unsigned Reg) {
+    assert(Reg != 0 && "Should never be unset");
+    FrameOffsetReg = Reg;
+  }
+
   void setStackPtrOffsetReg(unsigned Reg) {
     assert(Reg != 0 && "Should never be unset");
     StackPtrOffsetReg = Reg;
@@ -445,8 +730,6 @@ public:
   void setScratchWaveOffsetReg(unsigned Reg) {
     assert(Reg != 0 && "Should never be unset");
     ScratchWaveOffsetReg = Reg;
-    if (isEntryFunction())
-      FrameOffsetReg = ScratchWaveOffsetReg;
   }
 
   unsigned getQueuePtrUserSGPR() const {
@@ -565,30 +848,6 @@ public:
     return WavesPerEU.second;
   }
 
-  /// \returns Stack object index for \p Dim's work group ID.
-  int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
-    assert(Dim < 3);
-    return DebuggerWorkGroupIDStackObjectIndices[Dim];
-  }
-
-  /// Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
-  void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
-    assert(Dim < 3);
-    DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
-  }
-
-  /// \returns Stack object index for \p Dim's work item ID.
-  int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
-    assert(Dim < 3);
-    return DebuggerWorkItemIDStackObjectIndices[Dim];
-  }
-
-  /// Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
-  void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
-    assert(Dim < 3);
-    DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
-  }
-
   /// \returns SGPR used for \p Dim's work group ID.
   unsigned getWorkGroupIDSGPR(unsigned Dim) const {
     switch (Dim) {
@@ -605,9 +864,6 @@ public:
     llvm_unreachable("unexpected dimension");
   }
 
-  /// \returns VGPR used for \p Dim' work item ID.
-  unsigned getWorkItemIDVGPR(unsigned Dim) const;
-
   unsigned getLDSWaveSpillSize() const {
     return LDSWaveSpillSize;
   }
@@ -630,6 +886,15 @@ public:
     return PSV.first->second.get();
   }
 
+  const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) {
+    if (!GWSResourcePSV) {
+      GWSResourcePSV =
+          llvm::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
+    }
+
+    return GWSResourcePSV.get();
+  }
+
   unsigned getOccupancy() const {
     return Occupancy;
   }
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp
index fb7e670068fe..ebbdf80f9567 100644
--- a/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -1,9 +1,8 @@
 //===-- SIMachineScheduler.cpp - SI Scheduler Interface -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1875,6 +1874,8 @@ void SIScheduleDAGMI::moveLowLatencies() {
       bool CopyForLowLat = false;
       for (SDep& SuccDep : SU->Succs) {
         SUnit *Succ = SuccDep.getSUnit();
+        if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+          continue;
         if (SITII->isLowLatencyInstruction(*Succ->getInstr())) {
           CopyForLowLat = true;
         }
@@ -1955,7 +1956,7 @@ void SIScheduleDAGMI::schedule()
 
   for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
     SUnit *SU = &SUnits[i];
-    MachineOperand *BaseLatOp;
+    const MachineOperand *BaseLatOp;
     int64_t OffLatReg;
     if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
       IsLowLatencySU[i] = 1;
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.h b/lib/Target/AMDGPU/SIMachineScheduler.h
index 0ce68ac6a897..c28a7be4d03a 100644
--- a/lib/Target/AMDGPU/SIMachineScheduler.h
+++ b/lib/Target/AMDGPU/SIMachineScheduler.h
@@ -1,9 +1,8 @@
 //===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index b4a4e9e33133..4320e6c957a0 100644
--- a/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -1,9 +1,8 @@
 //===- SIMemoryLegalizer.cpp ----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -146,7 +145,7 @@ private:
     // only contains a single address space.
     if ((OrderingAddrSpace == InstrAddrSpace) &&
         isPowerOf2_32(uint32_t(InstrAddrSpace)))
-      IsCrossAddressSpaceOrdering = false;
+      this->IsCrossAddressSpaceOrdering = false;
   }
 
 public:
@@ -353,6 +352,40 @@ public:
 
 };
 
+class SIGfx10CacheControl : public SIGfx7CacheControl {
+protected:
+  bool CuMode = false;
+
+  /// Sets DLC bit to "true" if present in \p MI. Returns true if \p MI
+  /// is modified, false otherwise.
+  bool enableDLCBit(const MachineBasicBlock::iterator &MI) const {
+    return enableNamedBit<AMDGPU::OpName::dlc>(MI);
+  }
+
+public:
+
+  SIGfx10CacheControl(const GCNSubtarget &ST, bool CuMode) :
+    SIGfx7CacheControl(ST), CuMode(CuMode) {};
+
+  bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
+                             SIAtomicScope Scope,
+                             SIAtomicAddrSpace AddrSpace) const override;
+
+  bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
+
+  bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
+                             SIAtomicScope Scope,
+                             SIAtomicAddrSpace AddrSpace,
+                             Position Pos) const override;
+
+  bool insertWait(MachineBasicBlock::iterator &MI,
+                  SIAtomicScope Scope,
+                  SIAtomicAddrSpace AddrSpace,
+                  SIMemOp Op,
+                  bool IsCrossAddrSpaceOrdering,
+                  Position Pos) const override;
+};
+
 class SIMemoryLegalizer final : public MachineFunctionPass {
 private:
 
@@ -418,35 +451,46 @@ void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
 Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
 SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
                                SIAtomicAddrSpace InstrScope) const {
-  /// TODO: For now assume OpenCL memory model which treats each
-  /// address space as having a separate happens-before relation, and
-  /// so an instruction only has ordering with respect to the address
-  /// space it accesses, and if it accesses multiple address spaces it
-  /// does not require ordering of operations in different address
-  /// spaces.
- if (SSID == SyncScope::System)
+  if (SSID == SyncScope::System)
+    return std::make_tuple(SIAtomicScope::SYSTEM,
+                           SIAtomicAddrSpace::ATOMIC,
+                           true);
+  if (SSID == MMI->getAgentSSID())
+    return std::make_tuple(SIAtomicScope::AGENT,
+                           SIAtomicAddrSpace::ATOMIC,
+                           true);
+  if (SSID == MMI->getWorkgroupSSID())
+    return std::make_tuple(SIAtomicScope::WORKGROUP,
+                           SIAtomicAddrSpace::ATOMIC,
+                           true);
+  if (SSID == MMI->getWavefrontSSID())
+    return std::make_tuple(SIAtomicScope::WAVEFRONT,
+                           SIAtomicAddrSpace::ATOMIC,
+                           true);
+  if (SSID == SyncScope::SingleThread)
+    return std::make_tuple(SIAtomicScope::SINGLETHREAD,
+                           SIAtomicAddrSpace::ATOMIC,
+                           true);
+  if (SSID == MMI->getSystemOneAddressSpaceSSID())
     return std::make_tuple(SIAtomicScope::SYSTEM,
                            SIAtomicAddrSpace::ATOMIC & InstrScope,
                            false);
-  if (SSID == MMI->getAgentSSID())
+  if (SSID == MMI->getAgentOneAddressSpaceSSID())
     return std::make_tuple(SIAtomicScope::AGENT,
                            SIAtomicAddrSpace::ATOMIC & InstrScope,
                            false);
-  if (SSID == MMI->getWorkgroupSSID())
+  if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
     return std::make_tuple(SIAtomicScope::WORKGROUP,
                            SIAtomicAddrSpace::ATOMIC & InstrScope,
                            false);
-  if (SSID == MMI->getWavefrontSSID())
+  if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
     return std::make_tuple(SIAtomicScope::WAVEFRONT,
                            SIAtomicAddrSpace::ATOMIC & InstrScope,
                            false);
-  if (SSID == SyncScope::SingleThread)
+  if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
     return std::make_tuple(SIAtomicScope::SINGLETHREAD,
                            SIAtomicAddrSpace::ATOMIC & InstrScope,
                            false);
-  /// TODO: To support HSA Memory Model need to add additional memory
-  /// scopes that specify that do require cross address space
-  /// ordering.
   return None;
 }
 
@@ -613,7 +657,9 @@ std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
   GCNSubtarget::Generation Generation = ST.getGeneration();
   if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
     return make_unique<SIGfx6CacheControl>(ST);
-  return make_unique<SIGfx7CacheControl>(ST);
+  if (Generation < AMDGPUSubtarget::GFX10)
+    return make_unique<SIGfx7CacheControl>(ST);
+  return make_unique<SIGfx10CacheControl>(ST, ST.isCuModeEnabled());
 }
 
 bool SIGfx6CacheControl::enableLoadCacheBypass(
@@ -722,13 +768,12 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
 
   bool VMCnt = false;
   bool LGKMCnt = false;
-  bool EXPCnt = false;
 
   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
     switch (Scope) {
     case SIAtomicScope::SYSTEM:
     case SIAtomicScope::AGENT:
-      VMCnt = true;
+      VMCnt |= true;
       break;
     case SIAtomicScope::WORKGROUP:
     case SIAtomicScope::WAVEFRONT:
@@ -752,7 +797,7 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
       // also synchronizing with global/GDS memory as LDS operations
       // could be reordered with respect to later global/GDS memory
       // operations of the same wave.
-      LGKMCnt = IsCrossAddrSpaceOrdering;
+      LGKMCnt |= IsCrossAddrSpaceOrdering;
       break;
     case SIAtomicScope::WAVEFRONT:
     case SIAtomicScope::SINGLETHREAD:
@@ -774,7 +819,7 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
       // also synchronizing with global/LDS memory as GDS operations
       // could be reordered with respect to later global/LDS memory
       // operations of the same wave.
-      EXPCnt = IsCrossAddrSpaceOrdering;
+      LGKMCnt |= IsCrossAddrSpaceOrdering;
       break;
     case SIAtomicScope::WORKGROUP:
     case SIAtomicScope::WAVEFRONT:
@@ -787,11 +832,11 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
     }
   }
 
-  if (VMCnt || LGKMCnt || EXPCnt) {
+  if (VMCnt || LGKMCnt) {
     unsigned WaitCntImmediate =
       AMDGPU::encodeWaitcnt(IV,
                             VMCnt ? 0 : getVmcntBitMask(IV),
-                            EXPCnt ? 0 : getExpcntBitMask(IV),
+                            getExpcntBitMask(IV),
                             LGKMCnt ? 0 : getLgkmcntBitMask(IV));
     BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
     Changed = true;
@@ -851,6 +896,231 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
   return Changed;
 }
 
+bool SIGfx10CacheControl::enableLoadCacheBypass(
+    const MachineBasicBlock::iterator &MI,
+    SIAtomicScope Scope,
+    SIAtomicAddrSpace AddrSpace) const {
+  assert(MI->mayLoad() && !MI->mayStore());
+  bool Changed = false;
+
+  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+    /// TODO Do not set glc for rmw atomic operations as they
+    /// implicitly bypass the L0/L1 caches.
+
+    switch (Scope) {
+    case SIAtomicScope::SYSTEM:
+    case SIAtomicScope::AGENT:
+      Changed |= enableGLCBit(MI);
+      Changed |= enableDLCBit(MI);
+      break;
+    case SIAtomicScope::WORKGROUP:
+      // In WGP mode the waves of a work-group can be executing on either CU of
+      // the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
+      // CU mode and all waves of a work-group are on the same CU, and so the
+      // L0 does not need to be bypassed.
+      if (!CuMode) Changed |= enableGLCBit(MI);
+      break;
+    case SIAtomicScope::WAVEFRONT:
+    case SIAtomicScope::SINGLETHREAD:
+      // No cache to bypass.
+      break;
+    default:
+      llvm_unreachable("Unsupported synchronization scope");
+    }
+  }
+
+  /// The scratch address space does not need the global memory caches
+  /// to be bypassed as all memory operations by the same thread are
+  /// sequentially consistent, and no other thread can access scratch
+  /// memory.
+
+  /// Other address spaces do not hava a cache.
+
+  return Changed;
+}
+
+bool SIGfx10CacheControl::enableNonTemporal(
+    const MachineBasicBlock::iterator &MI) const {
+  assert(MI->mayLoad() ^ MI->mayStore());
+  bool Changed = false;
+
+  Changed |= enableSLCBit(MI);
+  /// TODO for store (non-rmw atomic) instructions also enableGLCBit(MI)
+
+  return Changed;
+}
+
+bool SIGfx10CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
+                                                SIAtomicScope Scope,
+                                                SIAtomicAddrSpace AddrSpace,
+                                                Position Pos) const {
+  bool Changed = false;
+
+  MachineBasicBlock &MBB = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+
+  if (Pos == Position::AFTER)
+    ++MI;
+
+  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+    switch (Scope) {
+    case SIAtomicScope::SYSTEM:
+    case SIAtomicScope::AGENT:
+      BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
+      BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL1_INV));
+      Changed = true;
+      break;
+    case SIAtomicScope::WORKGROUP:
+      // In WGP mode the waves of a work-group can be executing on either CU of
+      // the WGP. Therefore need to invalidate the L0 which is per CU. Otherwise
+      // in CU mode and all waves of a work-group are on the same CU, and so the
+      // L0 does not need to be invalidated.
+      if (!CuMode) {
+        BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
+        Changed = true;
+      }
+      break;
+    case SIAtomicScope::WAVEFRONT:
+    case SIAtomicScope::SINGLETHREAD:
+      // No cache to invalidate.
+      break;
+    default:
+      llvm_unreachable("Unsupported synchronization scope");
+    }
+  }
+
+  /// The scratch address space does not need the global memory cache
+  /// to be flushed as all memory operations by the same thread are
+  /// sequentially consistent, and no other thread can access scratch
+  /// memory.
+
+  /// Other address spaces do not hava a cache.
+
+  if (Pos == Position::AFTER)
+    --MI;
+
+  return Changed;
+}
+
+bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
+                                     SIAtomicScope Scope,
+                                     SIAtomicAddrSpace AddrSpace,
+                                     SIMemOp Op,
+                                     bool IsCrossAddrSpaceOrdering,
+                                     Position Pos) const {
+  bool Changed = false;
+
+  MachineBasicBlock &MBB = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+
+  if (Pos == Position::AFTER)
+    ++MI;
+
+  bool VMCnt = false;
+  bool VSCnt = false;
+  bool LGKMCnt = false;
+
+  if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+    switch (Scope) {
+    case SIAtomicScope::SYSTEM:
+    case SIAtomicScope::AGENT:
+      if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
+        VMCnt |= true;
+      if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
+        VSCnt |= true;
+      break;
+    case SIAtomicScope::WORKGROUP:
+      // In WGP mode the waves of a work-group can be executing on either CU of
+      // the WGP. Therefore need to wait for operations to complete to ensure
+      // they are visible to waves in the other CU as the L0 is per CU.
+      // Otherwise in CU mode and all waves of a work-group are on the same CU
+      // which shares the same L0.
+      if (!CuMode) {
+        if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
+          VMCnt |= true;
+        if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
+          VSCnt |= true;
+      }
+      break;
+    case SIAtomicScope::WAVEFRONT:
+    case SIAtomicScope::SINGLETHREAD:
+      // The L0 cache keeps all memory operations in order for
+      // work-items in the same wavefront.
+      break;
+    default:
+      llvm_unreachable("Unsupported synchronization scope");
+    }
+  }
+
+  if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
+    switch (Scope) {
+    case SIAtomicScope::SYSTEM:
+    case SIAtomicScope::AGENT:
+    case SIAtomicScope::WORKGROUP:
+      // If no cross address space ordering then an LDS waitcnt is not
+      // needed as LDS operations for all waves are executed in a
+      // total global ordering as observed by all waves. Required if
+      // also synchronizing with global/GDS memory as LDS operations
+      // could be reordered with respect to later global/GDS memory
+      // operations of the same wave.
+      LGKMCnt |= IsCrossAddrSpaceOrdering;
+      break;
+    case SIAtomicScope::WAVEFRONT:
+    case SIAtomicScope::SINGLETHREAD:
+      // The LDS keeps all memory operations in order for
+      // the same wavesfront.
+      break;
+    default:
+      llvm_unreachable("Unsupported synchronization scope");
+    }
+  }
+
+  if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
+    switch (Scope) {
+    case SIAtomicScope::SYSTEM:
+    case SIAtomicScope::AGENT:
+      // If no cross address space ordering then an GDS waitcnt is not
+      // needed as GDS operations for all waves are executed in a
+      // total global ordering as observed by all waves. Required if
+      // also synchronizing with global/LDS memory as GDS operations
+      // could be reordered with respect to later global/LDS memory
+      // operations of the same wave.
+      LGKMCnt |= IsCrossAddrSpaceOrdering;
+      break;
+    case SIAtomicScope::WORKGROUP:
+    case SIAtomicScope::WAVEFRONT:
+    case SIAtomicScope::SINGLETHREAD:
+      // The GDS keeps all memory operations in order for
+      // the same work-group.
+      break;
+    default:
+      llvm_unreachable("Unsupported synchronization scope");
+    }
+  }
+
+  if (VMCnt || LGKMCnt) {
+    unsigned WaitCntImmediate =
+      AMDGPU::encodeWaitcnt(IV,
+                            VMCnt ? 0 : getVmcntBitMask(IV),
+                            getExpcntBitMask(IV),
+                            LGKMCnt ? 0 : getLgkmcntBitMask(IV));
+    BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
+    Changed = true;
+  }
+
+  if (VSCnt) {
+    BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT))
+      .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
+      .addImm(0);
+    Changed = true;
+  }
+
+  if (Pos == Position::AFTER)
+    --MI;
+
+  return Changed;
+}
+
 bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
   if (AtomicPseudoMIs.empty())
     return false;
diff --git a/lib/Target/AMDGPU/SIModeRegister.cpp b/lib/Target/AMDGPU/SIModeRegister.cpp
index 883fd308f2f4..a5edd7b3554a 100644
--- a/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -1,9 +1,8 @@
 //===-- SIModeRegister.cpp - Mode Register --------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -45,7 +44,7 @@ struct Status {
 
   Status() : Mask(0), Mode(0){};
 
-  Status(unsigned Mask, unsigned Mode) : Mask(Mask), Mode(Mode) {
+  Status(unsigned NewMask, unsigned NewMode) : Mask(NewMask), Mode(NewMode) {
     Mode &= Mask;
   };
 
diff --git a/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index ebcad30a1866..3227bff20513 100644
--- a/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -1,9 +1,8 @@
 //===-- SIOptimizeExecMasking.cpp -----------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -57,13 +56,16 @@ char SIOptimizeExecMasking::ID = 0;
 char &llvm::SIOptimizeExecMaskingID = SIOptimizeExecMasking::ID;
 
 /// If \p MI is a copy from exec, return the register copied to.
-static unsigned isCopyFromExec(const MachineInstr &MI) {
+static unsigned isCopyFromExec(const MachineInstr &MI, const GCNSubtarget &ST) {
   switch (MI.getOpcode()) {
   case AMDGPU::COPY:
   case AMDGPU::S_MOV_B64:
-  case AMDGPU::S_MOV_B64_term: {
+  case AMDGPU::S_MOV_B64_term:
+  case AMDGPU::S_MOV_B32:
+  case AMDGPU::S_MOV_B32_term: {
     const MachineOperand &Src = MI.getOperand(1);
-    if (Src.isReg() && Src.getReg() == AMDGPU::EXEC)
+    if (Src.isReg() &&
+        Src.getReg() == (ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC))
       return MI.getOperand(0).getReg();
   }
   }
@@ -72,16 +74,20 @@ static unsigned isCopyFromExec(const MachineInstr &MI) {
 }
 
 /// If \p MI is a copy to exec, return the register copied from.
-static unsigned isCopyToExec(const MachineInstr &MI) {
+static unsigned isCopyToExec(const MachineInstr &MI, const GCNSubtarget &ST) {
   switch (MI.getOpcode()) {
   case AMDGPU::COPY:
-  case AMDGPU::S_MOV_B64: {
+  case AMDGPU::S_MOV_B64:
+  case AMDGPU::S_MOV_B32: {
     const MachineOperand &Dst = MI.getOperand(0);
-    if (Dst.isReg() && Dst.getReg() == AMDGPU::EXEC && MI.getOperand(1).isReg())
+    if (Dst.isReg() &&
+        Dst.getReg() == (ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC) &&
+        MI.getOperand(1).isReg())
       return MI.getOperand(1).getReg();
     break;
   }
   case AMDGPU::S_MOV_B64_term:
+  case AMDGPU::S_MOV_B32_term:
     llvm_unreachable("should have been replaced");
   }
 
@@ -106,6 +112,23 @@ static unsigned isLogicalOpOnExec(const MachineInstr &MI) {
     const MachineOperand &Src2 = MI.getOperand(2);
     if (Src2.isReg() && Src2.getReg() == AMDGPU::EXEC)
       return MI.getOperand(0).getReg();
+    break;
+  }
+  case AMDGPU::S_AND_B32:
+  case AMDGPU::S_OR_B32:
+  case AMDGPU::S_XOR_B32:
+  case AMDGPU::S_ANDN2_B32:
+  case AMDGPU::S_ORN2_B32:
+  case AMDGPU::S_NAND_B32:
+  case AMDGPU::S_NOR_B32:
+  case AMDGPU::S_XNOR_B32: {
+    const MachineOperand &Src1 = MI.getOperand(1);
+    if (Src1.isReg() && Src1.getReg() == AMDGPU::EXEC_LO)
+      return MI.getOperand(0).getReg();
+    const MachineOperand &Src2 = MI.getOperand(2);
+    if (Src2.isReg() && Src2.getReg() == AMDGPU::EXEC_LO)
+      return MI.getOperand(0).getReg();
+    break;
   }
   }
 
@@ -130,6 +153,22 @@ static unsigned getSaveExecOp(unsigned Opc) {
     return AMDGPU::S_NOR_SAVEEXEC_B64;
   case AMDGPU::S_XNOR_B64:
     return AMDGPU::S_XNOR_SAVEEXEC_B64;
+  case AMDGPU::S_AND_B32:
+    return AMDGPU::S_AND_SAVEEXEC_B32;
+  case AMDGPU::S_OR_B32:
+    return AMDGPU::S_OR_SAVEEXEC_B32;
+  case AMDGPU::S_XOR_B32:
+    return AMDGPU::S_XOR_SAVEEXEC_B32;
+  case AMDGPU::S_ANDN2_B32:
+    return AMDGPU::S_ANDN2_SAVEEXEC_B32;
+  case AMDGPU::S_ORN2_B32:
+    return AMDGPU::S_ORN2_SAVEEXEC_B32;
+  case AMDGPU::S_NAND_B32:
+    return AMDGPU::S_NAND_SAVEEXEC_B32;
+  case AMDGPU::S_NOR_B32:
+    return AMDGPU::S_NOR_SAVEEXEC_B32;
+  case AMDGPU::S_XNOR_B32:
+    return AMDGPU::S_XNOR_SAVEEXEC_B32;
   default:
     return AMDGPU::INSTRUCTION_LIST_END;
   }
@@ -140,7 +179,8 @@ static unsigned getSaveExecOp(unsigned Opc) {
 // these is expected per block.
 static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) {
   switch (MI.getOpcode()) {
-  case AMDGPU::S_MOV_B64_term: {
+  case AMDGPU::S_MOV_B64_term:
+  case AMDGPU::S_MOV_B32_term: {
     MI.setDesc(TII.get(AMDGPU::COPY));
     return true;
   }
@@ -150,12 +190,30 @@ static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) {
     MI.setDesc(TII.get(AMDGPU::S_XOR_B64));
     return true;
   }
+  case AMDGPU::S_XOR_B32_term: {
+    // This is only a terminator to get the correct spill code placement during
+    // register allocation.
+    MI.setDesc(TII.get(AMDGPU::S_XOR_B32));
+    return true;
+  }
+  case AMDGPU::S_OR_B32_term: {
+    // This is only a terminator to get the correct spill code placement during
+    // register allocation.
+    MI.setDesc(TII.get(AMDGPU::S_OR_B32));
+    return true;
+  }
   case AMDGPU::S_ANDN2_B64_term: {
     // This is only a terminator to get the correct spill code placement during
     // register allocation.
     MI.setDesc(TII.get(AMDGPU::S_ANDN2_B64));
     return true;
   }
+  case AMDGPU::S_ANDN2_B32_term: {
+    // This is only a terminator to get the correct spill code placement during
+    // register allocation.
+    MI.setDesc(TII.get(AMDGPU::S_ANDN2_B32));
+    return true;
+  }
   default:
     return false;
   }
@@ -178,6 +236,7 @@ static MachineBasicBlock::reverse_iterator fixTerminators(
 
 static MachineBasicBlock::reverse_iterator findExecCopy(
   const SIInstrInfo &TII,
+  const GCNSubtarget &ST,
   MachineBasicBlock &MBB,
   MachineBasicBlock::reverse_iterator I,
   unsigned CopyToExec) {
@@ -185,7 +244,7 @@ static MachineBasicBlock::reverse_iterator findExecCopy(
 
   auto E = MBB.rend();
   for (unsigned N = 0; N <= InstLimit && I != E; ++I, ++N) {
-    unsigned CopyFromExec = isCopyFromExec(*I);
+    unsigned CopyFromExec = isCopyFromExec(*I, ST);
     if (CopyFromExec != AMDGPU::NoRegister)
       return I;
   }
@@ -194,8 +253,8 @@ static MachineBasicBlock::reverse_iterator findExecCopy(
 }
 
 // XXX - Seems LivePhysRegs doesn't work correctly since it will incorrectly
-// repor tthe register as unavailable because a super-register with a lane mask
-// as unavailable.
+// report the register as unavailable because a super-register with a lane mask
+// is unavailable.
 static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) {
   for (MachineBasicBlock *Succ : MBB.successors()) {
     if (Succ->isLiveIn(Reg))
@@ -212,6 +271,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIRegisterInfo *TRI = ST.getRegisterInfo();
   const SIInstrInfo *TII = ST.getInstrInfo();
+  unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
 
   // Optimize sequences emitted for control flow lowering. They are originally
   // emitted as the separate operations because spill code may need to be
@@ -230,13 +290,13 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
     if (I == E)
       continue;
 
-    unsigned CopyToExec = isCopyToExec(*I);
+    unsigned CopyToExec = isCopyToExec(*I, ST);
     if (CopyToExec == AMDGPU::NoRegister)
       continue;
 
     // Scan backwards to find the def.
     auto CopyToExecInst = &*I;
-    auto CopyFromExecInst = findExecCopy(*TII, MBB, I, CopyToExec);
+    auto CopyFromExecInst = findExecCopy(*TII, ST, MBB, I, CopyToExec);
     if (CopyFromExecInst == E) {
       auto PrepareExecInst = std::next(I);
       if (PrepareExecInst == E)
@@ -246,7 +306,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
           isLogicalOpOnExec(*PrepareExecInst) == CopyToExec) {
         LLVM_DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst);
 
-        PrepareExecInst->getOperand(0).setReg(AMDGPU::EXEC);
+        PrepareExecInst->getOperand(0).setReg(Exec);
 
         LLVM_DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n');
 
@@ -269,7 +329,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
     for (MachineBasicBlock::iterator J
            = std::next(CopyFromExecInst->getIterator()), JE = I->getIterator();
          J != JE; ++J) {
-      if (SaveExecInst && J->readsRegister(AMDGPU::EXEC, TRI)) {
+      if (SaveExecInst && J->readsRegister(Exec, TRI)) {
         LLVM_DEBUG(dbgs() << "exec read prevents saveexec: " << *J << '\n');
         // Make sure this is inserted after any VALU ops that may have been
         // scheduled in between.
@@ -353,7 +413,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
     CopyToExecInst->eraseFromParent();
 
     for (MachineInstr *OtherInst : OtherUseInsts) {
-      OtherInst->substituteRegister(CopyToExec, AMDGPU::EXEC,
+      OtherInst->substituteRegister(CopyToExec, Exec,
                                     AMDGPU::NoSubRegister, *TRI);
     }
   }
diff --git a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index c671fed34bdf..7e10316eab92 100644
--- a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -1,9 +1,8 @@
 //===-- SIOptimizeExecMaskingPreRA.cpp ------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -34,10 +33,22 @@ using namespace llvm;
 namespace {
 
 class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
+private:
+  const SIRegisterInfo *TRI;
+  const SIInstrInfo *TII;
+  MachineRegisterInfo *MRI;
+
 public:
-  static char ID;
+  MachineBasicBlock::iterator skipIgnoreExecInsts(
+    MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) const;
+
+    MachineBasicBlock::iterator skipIgnoreExecInstsTrivialSucc(
+      MachineBasicBlock *&MBB,
+      MachineBasicBlock::iterator It) const;
 
 public:
+  static char ID;
+
   SIOptimizeExecMaskingPreRA() : MachineFunctionPass(ID) {
     initializeSIOptimizeExecMaskingPreRAPass(*PassRegistry::getPassRegistry());
   }
@@ -71,38 +82,93 @@ FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {
   return new SIOptimizeExecMaskingPreRA();
 }
 
-static bool isEndCF(const MachineInstr& MI, const SIRegisterInfo* TRI) {
+static bool isEndCF(const MachineInstr &MI, const SIRegisterInfo *TRI,
+                    const GCNSubtarget &ST) {
+  if (ST.isWave32()) {
+    return MI.getOpcode() == AMDGPU::S_OR_B32 &&
+           MI.modifiesRegister(AMDGPU::EXEC_LO, TRI);
+  }
+
   return MI.getOpcode() == AMDGPU::S_OR_B64 &&
          MI.modifiesRegister(AMDGPU::EXEC, TRI);
 }
 
-static bool isFullExecCopy(const MachineInstr& MI) {
-  return MI.isFullCopy() && MI.getOperand(1).getReg() == AMDGPU::EXEC;
+static bool isFullExecCopy(const MachineInstr& MI, const GCNSubtarget& ST) {
+  unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+
+  if (MI.isCopy() && MI.getOperand(1).getReg() == Exec) {
+    assert(MI.isFullCopy());
+    return true;
+  }
+
+  return false;
 }
 
 static unsigned getOrNonExecReg(const MachineInstr &MI,
-                                const SIInstrInfo &TII) {
+                                const SIInstrInfo &TII,
+                                const GCNSubtarget& ST) {
+  unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
   auto Op = TII.getNamedOperand(MI, AMDGPU::OpName::src1);
-  if (Op->isReg() && Op->getReg() != AMDGPU::EXEC)
+  if (Op->isReg() && Op->getReg() != Exec)
      return Op->getReg();
   Op = TII.getNamedOperand(MI, AMDGPU::OpName::src0);
-  if (Op->isReg() && Op->getReg() != AMDGPU::EXEC)
+  if (Op->isReg() && Op->getReg() != Exec)
      return Op->getReg();
   return AMDGPU::NoRegister;
 }
 
 static MachineInstr* getOrExecSource(const MachineInstr &MI,
                                      const SIInstrInfo &TII,
-                                     const MachineRegisterInfo &MRI) {
-  auto SavedExec = getOrNonExecReg(MI, TII);
+                                     const MachineRegisterInfo &MRI,
+                                     const GCNSubtarget& ST) {
+  auto SavedExec = getOrNonExecReg(MI, TII, ST);
   if (SavedExec == AMDGPU::NoRegister)
     return nullptr;
   auto SaveExecInst = MRI.getUniqueVRegDef(SavedExec);
-  if (!SaveExecInst || !isFullExecCopy(*SaveExecInst))
+  if (!SaveExecInst || !isFullExecCopy(*SaveExecInst, ST))
     return nullptr;
   return SaveExecInst;
 }
 
+/// Skip over instructions that don't care about the exec mask.
+MachineBasicBlock::iterator SIOptimizeExecMaskingPreRA::skipIgnoreExecInsts(
+  MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) const {
+  for ( ; I != E; ++I) {
+    if (TII->mayReadEXEC(*MRI, *I))
+      break;
+  }
+
+  return I;
+}
+
+// Skip to the next instruction, ignoring debug instructions, and trivial block
+// boundaries (blocks that have one (typically fallthrough) successor, and the
+// successor has one predecessor.
+MachineBasicBlock::iterator
+SIOptimizeExecMaskingPreRA::skipIgnoreExecInstsTrivialSucc(
+  MachineBasicBlock *&MBB,
+  MachineBasicBlock::iterator It) const {
+
+  do {
+    It = skipIgnoreExecInsts(It, MBB->end());
+    if (It != MBB->end() || MBB->succ_size() != 1)
+      break;
+
+    // If there is one trivial successor, advance to the next block.
+    MachineBasicBlock *Succ = *MBB->succ_begin();
+
+    // TODO: Is this really necessary?
+    if (!MBB->isLayoutSuccessor(Succ))
+      break;
+
+    It = Succ->begin();
+    MBB = Succ;
+  } while (true);
+
+  return It;
+}
+
+
 // Optimize sequence
 //    %sel = V_CNDMASK_B32_e64 0, 1, %cc
 //    %cmp = V_CMP_NE_U32 1, %1
@@ -125,10 +191,11 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
                                      LiveIntervals *LIS) {
   const SIRegisterInfo *TRI = ST.getRegisterInfo();
   const SIInstrInfo *TII = ST.getInstrInfo();
-  const unsigned AndOpc = AMDGPU::S_AND_B64;
-  const unsigned Andn2Opc = AMDGPU::S_ANDN2_B64;
-  const unsigned CondReg = AMDGPU::VCC;
-  const unsigned ExecReg = AMDGPU::EXEC;
+  bool Wave32 = ST.isWave32();
+  const unsigned AndOpc = Wave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
+  const unsigned Andn2Opc = Wave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64;
+  const unsigned CondReg = Wave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
+  const unsigned ExecReg = Wave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
 
   auto I = llvm::find_if(MBB.terminators(), [](const MachineInstr &MI) {
                            unsigned Opc = MI.getOpcode();
@@ -172,6 +239,10 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
   if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
     return AMDGPU::NoRegister;
 
+  if (TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers) ||
+      TII->hasModifiersSet(*Sel, AMDGPU::OpName::src1_modifiers))
+    return AMDGPU::NoRegister;
+
   Op1 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src0);
   Op2 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src1);
   MachineOperand *CC = TII->getNamedOperand(*Sel, AMDGPU::OpName::src2);
@@ -187,7 +258,7 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
   MachineInstr *Andn2 = BuildMI(MBB, *And, And->getDebugLoc(),
                                 TII->get(Andn2Opc), And->getOperand(0).getReg())
                             .addReg(ExecReg)
-                            .addReg(CCReg, CC->getSubReg());
+                            .addReg(CCReg, 0, CC->getSubReg());
   And->eraseFromParent();
   LIS->InsertMachineInstrInMaps(*Andn2);
 
@@ -224,11 +295,14 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
     return false;
 
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-  const SIRegisterInfo *TRI = ST.getRegisterInfo();
-  const SIInstrInfo *TII = ST.getInstrInfo();
+  TRI = ST.getRegisterInfo();
+  TII = ST.getInstrInfo();
+  MRI = &MF.getRegInfo();
+
   MachineRegisterInfo &MRI = MF.getRegInfo();
   LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
   DenseSet<unsigned> RecalcRegs({AMDGPU::EXEC_LO, AMDGPU::EXEC_HI});
+  unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
   bool Changed = false;
 
   for (MachineBasicBlock &MBB : MF) {
@@ -248,9 +322,10 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
 
       // Skip this if the endpgm has any implicit uses, otherwise we would need
       // to be careful to update / remove them.
+      // S_ENDPGM always has a single imm operand that is not used other than to
+      // end up in the encoding
       MachineInstr &Term = MBB.back();
-      if (Term.getOpcode() != AMDGPU::S_ENDPGM ||
-          Term.getNumOperands() != 0)
+      if (Term.getOpcode() != AMDGPU::S_ENDPGM || Term.getNumOperands() != 1)
         continue;
 
       SmallVector<MachineBasicBlock*, 4> Blocks({&MBB});
@@ -304,32 +379,21 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
     }
 
     // Try to collapse adjacent endifs.
-    auto Lead = MBB.begin(), E = MBB.end();
-    if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI))
-      continue;
-
-    const MachineBasicBlock* Succ = *MBB.succ_begin();
-    if (!MBB.isLayoutSuccessor(Succ))
-      continue;
-
-    auto I = std::next(Lead);
-
-    for ( ; I != E; ++I)
-      if (!TII->isSALU(*I) || I->readsRegister(AMDGPU::EXEC, TRI))
-        break;
-
-    if (I != E)
+    auto E = MBB.end();
+    auto Lead = skipDebugInstructionsForward(MBB.begin(), E);
+    if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI, ST))
       continue;
 
-    const auto NextLead = Succ->begin();
-    if (NextLead == Succ->end() || !isEndCF(*NextLead, TRI) ||
-        !getOrExecSource(*NextLead, *TII, MRI))
+    MachineBasicBlock *TmpMBB = &MBB;
+    auto NextLead = skipIgnoreExecInstsTrivialSucc(TmpMBB, std::next(Lead));
+    if (NextLead == TmpMBB->end() || !isEndCF(*NextLead, TRI, ST) ||
+        !getOrExecSource(*NextLead, *TII, MRI, ST))
       continue;
 
     LLVM_DEBUG(dbgs() << "Redundant EXEC = S_OR_B64 found: " << *Lead << '\n');
 
-    auto SaveExec = getOrExecSource(*Lead, *TII, MRI);
-    unsigned SaveExecReg = getOrNonExecReg(*Lead, *TII);
+    auto SaveExec = getOrExecSource(*Lead, *TII, MRI, ST);
+    unsigned SaveExecReg = getOrNonExecReg(*Lead, *TII, ST);
     for (auto &Op : Lead->operands()) {
       if (Op.isReg())
         RecalcRegs.insert(Op.getReg());
@@ -363,7 +427,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
     if (SafeToReplace) {
       LIS->RemoveMachineInstrFromMaps(*SaveExec);
       SaveExec->eraseFromParent();
-      MRI.replaceRegWith(SavedExec, AMDGPU::EXEC);
+      MRI.replaceRegWith(SavedExec, Exec);
       LIS->removeInterval(SavedExec);
     }
   }
@@ -375,8 +439,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
         if (!MRI.reg_empty(Reg))
           LIS->createAndComputeVirtRegInterval(Reg);
       } else {
-        for (MCRegUnitIterator U(Reg, TRI); U.isValid(); ++U)
-          LIS->removeRegUnit(*U);
+        LIS->removeAllRegUnitsForPhysReg(Reg);
       }
     }
   }
diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 2d43d5d05ef6..2d71abc0612a 100644
--- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -1,9 +1,8 @@
 //===- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -348,8 +347,8 @@ uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
   if (Abs || Neg) {
     assert(!Sext &&
            "Float and integer src modifiers can't be set simulteniously");
-    Mods |= Abs ? SISrcMods::ABS : 0;
-    Mods ^= Neg ? SISrcMods::NEG : 0;
+    Mods |= Abs ? SISrcMods::ABS : 0u;
+    Mods ^= Neg ? SISrcMods::NEG : 0u;
   } else if (Sext) {
     Mods |= SISrcMods::SEXT;
   }
@@ -419,7 +418,9 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
     }
     assert(Src && Src->isReg());
 
-    if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
+    if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
+         MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
+         MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
          MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
          !isSameReg(*Src, *getReplacedOperand())) {
       // In case of v_mac_f16/32_sdwa this pass can try to apply src operand to
@@ -461,7 +462,9 @@ MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) {
 bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
   // Replace vdst operand in MI with target operand. Set dst_sel and dst_unused
 
-  if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
+  if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
+       MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
+       MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
        MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
       getDstSel() != AMDGPU::SDWA::DWORD) {
     // v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD
@@ -951,7 +954,8 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI,
   if (TII->isVOPC(Opc)) {
     if (!ST.hasSDWASdst()) {
       const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
-      if (SDst && SDst->getReg() != AMDGPU::VCC)
+      if (SDst && (SDst->getReg() != AMDGPU::VCC &&
+                   SDst->getReg() != AMDGPU::VCC_LO))
         return false;
     }
 
@@ -965,10 +969,16 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI,
     return false;
   }
 
-  if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_MAC_F16_e32 ||
+  if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_FMAC_F16_e32 ||
+                           Opc == AMDGPU::V_FMAC_F32_e32 ||
+                           Opc == AMDGPU::V_MAC_F16_e32 ||
                            Opc == AMDGPU::V_MAC_F32_e32))
     return false;
 
+  // Check if target supports this SDWA opcode
+  if (TII->pseudoToMCOpcode(Opc) == -1)
+    return false;
+
   // FIXME: has SDWA but require handling of implicit VCC use
   if (Opc == AMDGPU::V_CNDMASK_B32_e32)
     return false;
@@ -1010,7 +1020,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
     SDWAInst.add(*Dst);
   } else {
     assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
-    SDWAInst.addReg(AMDGPU::VCC, RegState::Define);
+    SDWAInst.addReg(TRI->getVCC(), RegState::Define);
   }
 
   // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
@@ -1039,7 +1049,9 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
     SDWAInst.add(*Src1);
   }
 
-  if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
+  if (SDWAOpcode == AMDGPU::V_FMAC_F16_sdwa ||
+      SDWAOpcode == AMDGPU::V_FMAC_F32_sdwa ||
+      SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
       SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) {
     // v_mac_f16/32 has additional src2 operand tied to vdst
     MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
diff --git a/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
new file mode 100644
index 000000000000..f9bfe96f65cb
--- /dev/null
+++ b/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
@@ -0,0 +1,221 @@
+//===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Pass to pre-allocated WWM registers
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-pre-allocate-wwm-regs"
+
+namespace {
+
+class SIPreAllocateWWMRegs : public MachineFunctionPass {
+private:
+  const SIInstrInfo *TII;
+  const SIRegisterInfo *TRI;
+  MachineRegisterInfo *MRI;
+  LiveIntervals *LIS;
+  LiveRegMatrix *Matrix;
+  VirtRegMap *VRM;
+  RegisterClassInfo RegClassInfo;
+
+  std::vector<unsigned> RegsToRewrite;
+
+public:
+  static char ID;
+
+  SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
+    initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<LiveIntervals>();
+    AU.addPreserved<LiveIntervals>();
+    AU.addRequired<VirtRegMap>();
+    AU.addRequired<LiveRegMatrix>();
+    AU.addPreserved<SlotIndexes>();
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+private:
+  bool processDef(MachineOperand &MO);
+  void rewriteRegs(MachineFunction &MF);
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
+                "SI Pre-allocate WWM Registers", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
+INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
+                "SI Pre-allocate WWM Registers", false, false)
+
+char SIPreAllocateWWMRegs::ID = 0;
+
+char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
+
+FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
+  return new SIPreAllocateWWMRegs();
+}
+
+bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
+  if (!MO.isReg())
+    return false;
+
+  unsigned Reg = MO.getReg();
+
+  if (!TRI->isVGPR(*MRI, Reg))
+    return false;
+
+  if (TRI->isPhysicalRegister(Reg))
+    return false;
+
+  if (VRM->hasPhys(Reg))
+    return false;
+
+  LiveInterval &LI = LIS->getInterval(Reg);
+
+  for (unsigned PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
+    if (!MRI->isPhysRegUsed(PhysReg) &&
+        Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
+      Matrix->assign(LI, PhysReg);
+      assert(PhysReg != 0);
+      RegsToRewrite.push_back(Reg);
+      return true;
+    }
+  }
+
+  llvm_unreachable("physreg not found for WWM expression");
+  return false;
+}
+
+void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
+  for (MachineBasicBlock &MBB : MF) {
+    for (MachineInstr &MI : MBB) {
+      for (MachineOperand &MO : MI.operands()) {
+        if (!MO.isReg())
+          continue;
+
+        const unsigned VirtReg = MO.getReg();
+        if (TRI->isPhysicalRegister(VirtReg))
+          continue;
+
+        if (!VRM->hasPhys(VirtReg))
+          continue;
+
+        unsigned PhysReg = VRM->getPhys(VirtReg);
+        const unsigned SubReg = MO.getSubReg();
+        if (SubReg != 0) {
+          PhysReg = TRI->getSubReg(PhysReg, SubReg);
+          MO.setSubReg(0);
+        }
+
+        MO.setReg(PhysReg);
+        MO.setIsRenamable(false);
+      }
+    }
+  }
+
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+  for (unsigned Reg : RegsToRewrite) {
+    LIS->removeInterval(Reg);
+
+    const unsigned PhysReg = VRM->getPhys(Reg);
+    assert(PhysReg != 0);
+    MFI->ReserveWWMRegister(PhysReg);
+  }
+
+  RegsToRewrite.clear();
+
+  // Update the set of reserved registers to include WWM ones.
+  MRI->freezeReservedRegs(MF);
+}
+
+bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
+  LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
+
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+
+  TII = ST.getInstrInfo();
+  TRI = &TII->getRegisterInfo();
+  MRI = &MF.getRegInfo();
+
+  LIS = &getAnalysis<LiveIntervals>();
+  Matrix = &getAnalysis<LiveRegMatrix>();
+  VRM = &getAnalysis<VirtRegMap>();
+
+  RegClassInfo.runOnMachineFunction(MF);
+
+  bool RegsAssigned = false;
+
+  // We use a reverse post-order traversal of the control-flow graph to
+  // guarantee that we visit definitions in dominance order. Since WWM
+  // expressions are guaranteed to never involve phi nodes, and we can only
+  // escape WWM through the special WWM instruction, this means that this is a
+  // perfect elimination order, so we can never do any better.
+  ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
+
+  for (MachineBasicBlock *MBB : RPOT) {
+    bool InWWM = false;
+    for (MachineInstr &MI : *MBB) {
+      if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
+          MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
+        RegsAssigned |= processDef(MI.getOperand(0));
+
+      if (MI.getOpcode() == AMDGPU::ENTER_WWM) {
+        LLVM_DEBUG(dbgs() << "entering WWM region: " << MI << "\n");
+        InWWM = true;
+        continue;
+      }
+
+      if (MI.getOpcode() == AMDGPU::EXIT_WWM) {
+        LLVM_DEBUG(dbgs() << "exiting WWM region: " << MI << "\n");
+        InWWM = false;
+      }
+
+      if (!InWWM)
+        continue;
+
+      LLVM_DEBUG(dbgs() << "processing " << MI << "\n");
+
+      for (MachineOperand &DefOpnd : MI.defs()) {
+        RegsAssigned |= processDef(DefOpnd);
+      }
+    }
+  }
+
+  if (!RegsAssigned)
+    return false;
+
+  rewriteRegs(MF);
+  return true;
+}
diff --git a/lib/Target/AMDGPU/SIProgramInfo.h b/lib/Target/AMDGPU/SIProgramInfo.h
index 383f6b575808..168f05f8fdd6 100644
--- a/lib/Target/AMDGPU/SIProgramInfo.h
+++ b/lib/Target/AMDGPU/SIProgramInfo.h
@@ -1,9 +1,8 @@
 //===--- SIProgramInfo.h ----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -29,6 +28,8 @@ struct SIProgramInfo {
     uint32_t DX10Clamp = 0;
     uint32_t DebugMode = 0;
     uint32_t IEEEMode = 0;
+    uint32_t WgpMode = 0; // GFX10+
+    uint32_t MemOrdered = 0; // GFX10+
     uint64_t ScratchSize = 0;
 
     uint64_t ComputePGMRSrc1 = 0;
@@ -50,18 +51,6 @@ struct SIProgramInfo {
     // Number of VGPRs that meets number of waves per execution unit request.
     uint32_t NumVGPRsForWavesPerEU = 0;
 
-    // Fixed SGPR number used to hold wave scratch offset for entire kernel
-    // execution, or std::numeric_limits<uint16_t>::max() if the register is not
-    // used or not known.
-    uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR =
-        std::numeric_limits<uint16_t>::max();
-
-    // Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire
-    // kernel execution, or std::numeric_limits<uint16_t>::max() if the register
-    // is not used or not known.
-    uint16_t DebuggerPrivateSegmentBufferSGPR =
-        std::numeric_limits<uint16_t>::max();
-
     // Whether there is recursion, dynamic allocas, indirect calls or some other
     // reason there may be statically unknown stack usage.
     bool DynamicCallStack = false;
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 97cfde2b2354..f152deb28004 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,6 +16,7 @@
 #include "AMDGPUSubtarget.h"
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
+#include "MCTargetDesc/AMDGPUInstPrinter.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineDominators.h"
@@ -63,8 +63,10 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
   AMDGPURegisterInfo(),
   SGPRPressureSets(getNumRegPressureSets()),
   VGPRPressureSets(getNumRegPressureSets()),
+  AGPRPressureSets(getNumRegPressureSets()),
   SpillSGPRToVGPR(false),
-  SpillSGPRToSMEM(false) {
+  SpillSGPRToSMEM(false),
+  isWave32(ST.isWave32()) {
   if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
     SpillSGPRToSMEM = true;
   else if (EnableSpillSGPRToVGPR)
@@ -74,10 +76,12 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
 
   SGPRSetID = NumRegPressureSets;
   VGPRSetID = NumRegPressureSets;
+  AGPRSetID = NumRegPressureSets;
 
   for (unsigned i = 0; i < NumRegPressureSets; ++i) {
     classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
     classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
+    classifyPressureSet(i, AMDGPU::AGPR0, AGPRPressureSets);
   }
 
   // Determine the number of reg units for each pressure set.
@@ -89,7 +93,7 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
     }
   }
 
-  unsigned VGPRMax = 0, SGPRMax = 0;
+  unsigned VGPRMax = 0, SGPRMax = 0, AGPRMax = 0;
   for (unsigned i = 0; i < NumRegPressureSets; ++i) {
     if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
       VGPRSetID = i;
@@ -100,10 +104,16 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
       SGPRSetID = i;
       SGPRMax = PressureSetRegUnits[i];
     }
+    if (isAGPRPressureSet(i) && PressureSetRegUnits[i] > AGPRMax) {
+      AGPRSetID = i;
+      AGPRMax = PressureSetRegUnits[i];
+      continue;
+    }
   }
 
   assert(SGPRSetID < NumRegPressureSets &&
-         VGPRSetID < NumRegPressureSets);
+         VGPRSetID < NumRegPressureSets &&
+         AGPRSetID < NumRegPressureSets);
 }
 
 unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
@@ -139,11 +149,6 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
   return AMDGPU::SGPR_32RegClass.getRegister(Reg);
 }
 
-unsigned SIRegisterInfo::reservedStackPtrOffsetReg(
-  const MachineFunction &MF) const {
-  return AMDGPU::SGPR32;
-}
-
 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
 
@@ -155,15 +160,26 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   // M0 has to be reserved so that llvm accepts it as a live-in into a block.
   reserveRegisterTuples(Reserved, AMDGPU::M0);
 
+  // Reserve src_vccz, src_execz, src_scc.
+  reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
+  reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
+  reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
+
   // Reserve the memory aperture registers.
   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
 
+  // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
+  reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
+
   // Reserve xnack_mask registers - support is not implemented in Codegen.
   reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
 
+  // Reserve lds_direct register - support is not implemented in Codegen.
+  reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
+
   // Reserve Trap Handler registers - support is not implemented in Codegen.
   reserveRegisterTuples(Reserved, AMDGPU::TBA);
   reserveRegisterTuples(Reserved, AMDGPU::TMA);
@@ -176,6 +192,16 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
   reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
 
+  // Reserve null register - it shall never be allocated
+  reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL);
+
+  // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
+  // will result in bugs.
+  if (isWave32) {
+    Reserved.set(AMDGPU::VCC);
+    Reserved.set(AMDGPU::VCC_HI);
+  }
+
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
 
   unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
@@ -190,6 +216,8 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
     unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
     reserveRegisterTuples(Reserved, Reg);
+    Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
+    reserveRegisterTuples(Reserved, Reg);
   }
 
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -225,9 +253,33 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     assert(!isSubRegister(ScratchRSrcReg, FrameReg));
   }
 
+  for (unsigned Reg : MFI->WWMReservedRegs) {
+    reserveRegisterTuples(Reserved, Reg);
+  }
+
+  // FIXME: Stop using reserved registers for this.
+  for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
+    reserveRegisterTuples(Reserved, Reg);
+
+  for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
+    reserveRegisterTuples(Reserved, Reg);
+
   return Reserved;
 }
 
+bool SIRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+  // On entry, the base address is 0, so it can't possibly need any more
+  // alignment.
+
+  // FIXME: Should be able to specify the entry frame alignment per calling
+  // convention instead.
+  if (Info->isEntryFunction())
+    return false;
+
+  return TargetRegisterInfo::canRealignStack(MF);
+}
+
 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
   const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
   if (Info->isEntryFunction()) {
@@ -252,11 +304,20 @@ bool SIRegisterInfo::requiresFrameIndexScavenging(
 
 bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
   const MachineFunction &MF) const {
-  // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
-  // create a virtual register for it during frame index elimination, so the
-  // scavenger is directly needed.
-  return MF.getFrameInfo().hasStackObjects() &&
-         MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  if (!MFI.hasStackObjects())
+    return false;
+
+  // The scavenger is used for large frames which may require finding a free
+  // register for large offsets.
+  if (!isUInt<12>(MFI.getStackSize()))
+    return true;
+
+  // If using scalar stores, for spills, m0 is needed for the scalar store
+  // offset (pre-GFX9). m0 is unallocatable, so we can't create a virtual
+  // register for it during frame index elimination, so the scavenger is
+  // directly needed.
+  return MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
          MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
 }
 
@@ -332,7 +393,8 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
 
   TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
     .addReg(OffsetReg, RegState::Kill)
-    .addReg(FIReg);
+    .addReg(FIReg)
+    .addImm(0); // clamp bit
 }
 
 void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
@@ -394,21 +456,39 @@ const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
 
   switch (Op) {
+  case AMDGPU::SI_SPILL_S1024_SAVE:
+  case AMDGPU::SI_SPILL_S1024_RESTORE:
+  case AMDGPU::SI_SPILL_V1024_SAVE:
+  case AMDGPU::SI_SPILL_V1024_RESTORE:
+  case AMDGPU::SI_SPILL_A1024_SAVE:
+  case AMDGPU::SI_SPILL_A1024_RESTORE:
+    return 32;
   case AMDGPU::SI_SPILL_S512_SAVE:
   case AMDGPU::SI_SPILL_S512_RESTORE:
   case AMDGPU::SI_SPILL_V512_SAVE:
   case AMDGPU::SI_SPILL_V512_RESTORE:
+  case AMDGPU::SI_SPILL_A512_SAVE:
+  case AMDGPU::SI_SPILL_A512_RESTORE:
     return 16;
   case AMDGPU::SI_SPILL_S256_SAVE:
   case AMDGPU::SI_SPILL_S256_RESTORE:
   case AMDGPU::SI_SPILL_V256_SAVE:
   case AMDGPU::SI_SPILL_V256_RESTORE:
     return 8;
+  case AMDGPU::SI_SPILL_S160_SAVE:
+  case AMDGPU::SI_SPILL_S160_RESTORE:
+  case AMDGPU::SI_SPILL_V160_SAVE:
+  case AMDGPU::SI_SPILL_V160_RESTORE:
+    return 5;
   case AMDGPU::SI_SPILL_S128_SAVE:
   case AMDGPU::SI_SPILL_S128_RESTORE:
   case AMDGPU::SI_SPILL_V128_SAVE:
   case AMDGPU::SI_SPILL_V128_RESTORE:
+  case AMDGPU::SI_SPILL_A128_SAVE:
+  case AMDGPU::SI_SPILL_A128_RESTORE:
     return 4;
+  case AMDGPU::SI_SPILL_S96_SAVE:
+  case AMDGPU::SI_SPILL_S96_RESTORE:
   case AMDGPU::SI_SPILL_V96_SAVE:
   case AMDGPU::SI_SPILL_V96_RESTORE:
     return 3;
@@ -416,11 +496,15 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
   case AMDGPU::SI_SPILL_S64_RESTORE:
   case AMDGPU::SI_SPILL_V64_SAVE:
   case AMDGPU::SI_SPILL_V64_RESTORE:
+  case AMDGPU::SI_SPILL_A64_SAVE:
+  case AMDGPU::SI_SPILL_A64_RESTORE:
     return 2;
   case AMDGPU::SI_SPILL_S32_SAVE:
   case AMDGPU::SI_SPILL_S32_RESTORE:
   case AMDGPU::SI_SPILL_V32_SAVE:
   case AMDGPU::SI_SPILL_V32_RESTORE:
+  case AMDGPU::SI_SPILL_A32_SAVE:
+  case AMDGPU::SI_SPILL_A32_RESTORE:
     return 1;
   default: llvm_unreachable("Invalid spill opcode");
   }
@@ -480,6 +564,35 @@ static int getOffsetMUBUFLoad(unsigned Opc) {
   }
 }
 
+static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI,
+                                           int Index,
+                                           unsigned Lane,
+                                           unsigned ValueReg,
+                                           bool IsKill) {
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineFunction *MF = MI->getParent()->getParent();
+  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+  const GCNSubtarget &ST =  MF->getSubtarget<GCNSubtarget>();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+
+  MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
+
+  if (Reg == AMDGPU::NoRegister)
+    return MachineInstrBuilder();
+
+  bool IsStore = MI->mayStore();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
+
+  unsigned Dst = IsStore ? Reg : ValueReg;
+  unsigned Src = IsStore ? ValueReg : Reg;
+  unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32
+                                                   : AMDGPU::V_ACCVGPR_READ_B32;
+
+  return BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
+           .addReg(Src, getKillRegState(IsKill));
+}
+
 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
 // need to handle the case where an SGPR may need to be spilled while spilling.
 static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
@@ -498,6 +611,9 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
     return false;
 
   const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
+  if (spillVGPRtoAGPR(MI, Index, 0, Reg->getReg(), false).getInstr())
+    return true;
+
   MachineInstrBuilder NewMI =
       BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
           .add(*Reg)
@@ -507,6 +623,7 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
           .addImm(0) // glc
           .addImm(0) // slc
           .addImm(0) // tfe
+          .addImm(0) // dlc
           .cloneMemRefs(*MI);
 
   const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
@@ -549,6 +666,10 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
   unsigned Align = MFI.getObjectAlignment(Index);
   const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
 
+  Register TmpReg =
+    hasAGPRs(RC) ? TII->getNamedOperand(*MI, AMDGPU::OpName::tmp)->getReg()
+                 : Register();
+
   assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
 
   if (!isUInt<12>(Offset + Size - EltSize)) {
@@ -562,7 +683,7 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
     // We don't have access to the register scavenger if this function is called
     // during  PEI::scavengeFrameVirtualRegs().
     if (RS)
-      SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
+      SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false);
 
     if (SOffset == AMDGPU::NoRegister) {
       // There are no free SGPRs, and since we are in the process of spilling
@@ -597,20 +718,38 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
       SrcDstRegState |= getKillRegState(IsKill);
     }
 
-    MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
-    MachineMemOperand *NewMMO
-      = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
-                                 EltSize, MinAlign(Align, EltSize * i));
-
-    auto MIB = BuildMI(*MBB, MI, DL, Desc)
-      .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
-      .addReg(ScratchRsrcReg)
-      .addReg(SOffset, SOffsetRegState)
-      .addImm(Offset)
-      .addImm(0) // glc
-      .addImm(0) // slc
-      .addImm(0) // tfe
-      .addMemOperand(NewMMO);
+    auto MIB = spillVGPRtoAGPR(MI, Index, i, SubReg, IsKill);
+
+    if (!MIB.getInstr()) {
+      unsigned FinalReg = SubReg;
+      if (TmpReg != AMDGPU::NoRegister) {
+        if (IsStore)
+          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg)
+            .addReg(SubReg, getKillRegState(IsKill));
+        SubReg = TmpReg;
+      }
+
+      MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
+      MachineMemOperand *NewMMO
+        = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
+                                   EltSize, MinAlign(Align, EltSize * i));
+
+      MIB = BuildMI(*MBB, MI, DL, Desc)
+        .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
+        .addReg(ScratchRsrcReg)
+        .addReg(SOffset, SOffsetRegState)
+        .addImm(Offset)
+        .addImm(0) // glc
+        .addImm(0) // slc
+        .addImm(0) // tfe
+        .addImm(0) // dlc
+        .addMemOperand(NewMMO);
+
+      if (!IsStore && TmpReg != AMDGPU::NoRegister)
+        MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32),
+                      FinalReg)
+          .addReg(TmpReg, RegState::Kill);
+    }
 
     if (NumSubRegs > 1)
       MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
@@ -669,6 +808,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
   if (SpillToSMEM && OnlyToVGPR)
     return false;
 
+  Register FrameReg = getFrameRegister(*MF);
+
   assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
                          SuperReg != MFI->getFrameOffsetReg() &&
                          SuperReg != MFI->getScratchWaveOffsetReg()));
@@ -728,11 +869,11 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
       if (Offset != 0) {
         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
-          .addReg(MFI->getFrameOffsetReg())
+          .addReg(FrameReg)
           .addImm(Offset);
       } else {
         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
-          .addReg(MFI->getFrameOffsetReg());
+          .addReg(FrameReg);
       }
 
       BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
@@ -740,6 +881,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
         .addReg(MFI->getScratchRSrcReg())        // sbase
         .addReg(OffsetReg, RegState::Kill)       // soff
         .addImm(0)                               // glc
+        .addImm(0)                               // dlc
         .addMemOperand(MMO);
 
       continue;
@@ -799,11 +941,11 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
                                    EltSize, MinAlign(Align, EltSize * i));
       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
-        .addReg(TmpReg, RegState::Kill)    // src
-        .addFrameIndex(Index)              // vaddr
-        .addReg(MFI->getScratchRSrcReg())  // srrsrc
-        .addReg(MFI->getFrameOffsetReg())  // soffset
-        .addImm(i * 4)                     // offset
+        .addReg(TmpReg, RegState::Kill)       // src
+        .addFrameIndex(Index)                 // vaddr
+        .addReg(MFI->getScratchRSrcReg())     // srrsrc
+        .addReg(MFI->getStackPtrOffsetReg())  // soffset
+        .addImm(i * 4)                        // offset
         .addMemOperand(MMO);
     }
   }
@@ -859,6 +1001,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
   unsigned EltSize = 4;
   unsigned ScalarLoadOp;
 
+  Register FrameReg = getFrameRegister(*MF);
+
   const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
   if (SpillToSMEM && isSGPRClass(RC)) {
     // XXX - if private_element_size is larger than 4 it might be useful to be
@@ -890,18 +1034,19 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
       if (Offset != 0) {
         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
-          .addReg(MFI->getFrameOffsetReg())
+          .addReg(FrameReg)
           .addImm(Offset);
       } else {
         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
-          .addReg(MFI->getFrameOffsetReg());
+          .addReg(FrameReg);
       }
 
       auto MIB =
         BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
-        .addReg(MFI->getScratchRSrcReg()) // sbase
-        .addReg(OffsetReg, RegState::Kill)                // soff
-        .addImm(0)                        // glc
+        .addReg(MFI->getScratchRSrcReg())  // sbase
+        .addReg(OffsetReg, RegState::Kill) // soff
+        .addImm(0)                         // glc
+        .addImm(0)                         // dlc
         .addMemOperand(MMO);
 
       if (NumSubRegs > 1 && i == 0)
@@ -937,10 +1082,10 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
         MinAlign(Align, EltSize * i));
 
       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
-        .addFrameIndex(Index)              // vaddr
-        .addReg(MFI->getScratchRSrcReg())  // srsrc
-        .addReg(MFI->getFrameOffsetReg())  // soffset
-        .addImm(i * 4)                     // offset
+        .addFrameIndex(Index)                 // vaddr
+        .addReg(MFI->getScratchRSrcReg())     // srsrc
+        .addReg(MFI->getStackPtrOffsetReg())  // soffset
+        .addImm(i * 4)                        // offset
         .addMemOperand(MMO);
 
       auto MIB =
@@ -969,15 +1114,21 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
   int FI,
   RegScavenger *RS) const {
   switch (MI->getOpcode()) {
+  case AMDGPU::SI_SPILL_S1024_SAVE:
   case AMDGPU::SI_SPILL_S512_SAVE:
   case AMDGPU::SI_SPILL_S256_SAVE:
+  case AMDGPU::SI_SPILL_S160_SAVE:
   case AMDGPU::SI_SPILL_S128_SAVE:
+  case AMDGPU::SI_SPILL_S96_SAVE:
   case AMDGPU::SI_SPILL_S64_SAVE:
   case AMDGPU::SI_SPILL_S32_SAVE:
     return spillSGPR(MI, FI, RS, true);
+  case AMDGPU::SI_SPILL_S1024_RESTORE:
   case AMDGPU::SI_SPILL_S512_RESTORE:
   case AMDGPU::SI_SPILL_S256_RESTORE:
+  case AMDGPU::SI_SPILL_S160_RESTORE:
   case AMDGPU::SI_SPILL_S128_RESTORE:
+  case AMDGPU::SI_SPILL_S96_RESTORE:
   case AMDGPU::SI_SPILL_S64_RESTORE:
   case AMDGPU::SI_SPILL_S32_RESTORE:
     return restoreSGPR(MI, FI, RS, true);
@@ -998,14 +1149,21 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
   const SIInstrInfo *TII = ST.getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
 
+  assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
+
   MachineOperand &FIOp = MI->getOperand(FIOperandNum);
   int Index = MI->getOperand(FIOperandNum).getIndex();
 
+  Register FrameReg = getFrameRegister(*MF);
+
   switch (MI->getOpcode()) {
     // SGPR register spill
+    case AMDGPU::SI_SPILL_S1024_SAVE:
     case AMDGPU::SI_SPILL_S512_SAVE:
     case AMDGPU::SI_SPILL_S256_SAVE:
+    case AMDGPU::SI_SPILL_S160_SAVE:
     case AMDGPU::SI_SPILL_S128_SAVE:
+    case AMDGPU::SI_SPILL_S96_SAVE:
     case AMDGPU::SI_SPILL_S64_SAVE:
     case AMDGPU::SI_SPILL_S32_SAVE: {
       spillSGPR(MI, Index, RS);
@@ -1013,9 +1171,12 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     }
 
     // SGPR register restore
+    case AMDGPU::SI_SPILL_S1024_RESTORE:
     case AMDGPU::SI_SPILL_S512_RESTORE:
     case AMDGPU::SI_SPILL_S256_RESTORE:
+    case AMDGPU::SI_SPILL_S160_RESTORE:
     case AMDGPU::SI_SPILL_S128_RESTORE:
+    case AMDGPU::SI_SPILL_S96_RESTORE:
     case AMDGPU::SI_SPILL_S64_RESTORE:
     case AMDGPU::SI_SPILL_S32_RESTORE: {
       restoreSGPR(MI, Index, RS);
@@ -1023,19 +1184,29 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     }
 
     // VGPR register spill
+    case AMDGPU::SI_SPILL_V1024_SAVE:
     case AMDGPU::SI_SPILL_V512_SAVE:
     case AMDGPU::SI_SPILL_V256_SAVE:
+    case AMDGPU::SI_SPILL_V160_SAVE:
     case AMDGPU::SI_SPILL_V128_SAVE:
     case AMDGPU::SI_SPILL_V96_SAVE:
     case AMDGPU::SI_SPILL_V64_SAVE:
-    case AMDGPU::SI_SPILL_V32_SAVE: {
+    case AMDGPU::SI_SPILL_V32_SAVE:
+    case AMDGPU::SI_SPILL_A1024_SAVE:
+    case AMDGPU::SI_SPILL_A512_SAVE:
+    case AMDGPU::SI_SPILL_A128_SAVE:
+    case AMDGPU::SI_SPILL_A64_SAVE:
+    case AMDGPU::SI_SPILL_A32_SAVE: {
       const MachineOperand *VData = TII->getNamedOperand(*MI,
                                                          AMDGPU::OpName::vdata);
+      assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
+             MFI->getStackPtrOffsetReg());
+
       buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
             Index,
             VData->getReg(), VData->isKill(),
             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
-            TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
+            FrameReg,
             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
             *MI->memoperands_begin(),
             RS);
@@ -1047,16 +1218,25 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_V64_RESTORE:
     case AMDGPU::SI_SPILL_V96_RESTORE:
     case AMDGPU::SI_SPILL_V128_RESTORE:
+    case AMDGPU::SI_SPILL_V160_RESTORE:
     case AMDGPU::SI_SPILL_V256_RESTORE:
-    case AMDGPU::SI_SPILL_V512_RESTORE: {
+    case AMDGPU::SI_SPILL_V512_RESTORE:
+    case AMDGPU::SI_SPILL_V1024_RESTORE:
+    case AMDGPU::SI_SPILL_A32_RESTORE:
+    case AMDGPU::SI_SPILL_A64_RESTORE:
+    case AMDGPU::SI_SPILL_A128_RESTORE:
+    case AMDGPU::SI_SPILL_A512_RESTORE:
+    case AMDGPU::SI_SPILL_A1024_RESTORE: {
       const MachineOperand *VData = TII->getNamedOperand(*MI,
                                                          AMDGPU::OpName::vdata);
+      assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
+             MFI->getStackPtrOffsetReg());
 
       buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
             Index,
             VData->getReg(), VData->isKill(),
             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
-            TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
+            FrameReg,
             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
             *MI->memoperands_begin(),
             RS);
@@ -1068,24 +1248,23 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
       const DebugLoc &DL = MI->getDebugLoc();
       bool IsMUBUF = TII->isMUBUF(*MI);
 
-      if (!IsMUBUF &&
-          MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
+      if (!IsMUBUF && !MFI->isEntryFunction()) {
         // Convert to an absolute stack address by finding the offset from the
         // scratch wave base and scaling by the wave size.
         //
-        // In an entry function/kernel the stack address is already the
-        // absolute address relative to the scratch wave offset.
+        // In an entry function/kernel the offset is already the absolute
+        // address relative to the frame register.
 
         unsigned DiffReg
           = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
 
         bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
-        unsigned ResultReg = IsCopy ?
+        Register ResultReg = IsCopy ?
           MI->getOperand(0).getReg() :
           MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
-          .addReg(MFI->getFrameOffsetReg())
+          .addReg(FrameReg)
           .addReg(MFI->getScratchWaveOffsetReg());
 
         int64_t Offset = FrameInfo.getObjectOffset(Index);
@@ -1106,7 +1285,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
           if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
             TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
               .addImm(Offset)
-              .addReg(ScaledReg, RegState::Kill);
+              .addReg(ScaledReg, RegState::Kill)
+              .addImm(0); // clamp bit
           } else {
             unsigned ConstOffsetReg
               = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
@@ -1115,7 +1295,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
               .addImm(Offset);
             TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
               .addReg(ConstOffsetReg, RegState::Kill)
-              .addReg(ScaledReg, RegState::Kill);
+              .addReg(ScaledReg, RegState::Kill)
+              .addImm(0); // clamp bit
           }
         }
 
@@ -1133,8 +1314,10 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                AMDGPU::getNamedOperandIdx(MI->getOpcode(),
                                           AMDGPU::OpName::vaddr));
 
-        assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
-               == MFI->getFrameOffsetReg());
+        assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
+               MFI->getStackPtrOffsetReg());
+
+        TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->setReg(FrameReg);
 
         int64_t Offset = FrameInfo.getObjectOffset(Index);
         int64_t OldImm
@@ -1164,63 +1347,21 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
 }
 
 StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
-  #define AMDGPU_REG_ASM_NAMES
-  #include "AMDGPURegAsmNames.inc.cpp"
-
-  #define REG_RANGE(BeginReg, EndReg, RegTable)            \
-    if (Reg >= BeginReg && Reg <= EndReg) {                \
-      unsigned Index = Reg - BeginReg;                     \
-      assert(Index < array_lengthof(RegTable));            \
-      return RegTable[Index];                              \
-    }
+  const TargetRegisterClass *RC = getMinimalPhysRegClass(Reg);
+  unsigned Size = getRegSizeInBits(*RC);
+  unsigned AltName = AMDGPU::NoRegAltName;
 
-  REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
-  REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
-  REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
-  REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
-  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
-            VGPR96RegNames);
-
-  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
-            AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
-            VGPR128RegNames);
-  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
-            AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
-            SGPR128RegNames);
-
-  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
-            AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
-            VGPR256RegNames);
-
-  REG_RANGE(
-    AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
-    AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
-    VGPR512RegNames);
-
-  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
-            AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
-            SGPR256RegNames);
-
-  REG_RANGE(
-    AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
-    AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
-    SGPR512RegNames
-  );
-
-#undef REG_RANGE
-
-  // FIXME: Rename flat_scr so we don't need to special case this.
-  switch (Reg) {
-  case AMDGPU::FLAT_SCR:
-    return "flat_scratch";
-  case AMDGPU::FLAT_SCR_LO:
-    return "flat_scratch_lo";
-  case AMDGPU::FLAT_SCR_HI:
-    return "flat_scratch_hi";
-  default:
-    // For the special named registers the default is fine.
-    return TargetRegisterInfo::getRegAsmName(Reg);
+  switch (Size) {
+  case 32:   AltName = AMDGPU::Reg32; break;
+  case 64:   AltName = AMDGPU::Reg64; break;
+  case 96:   AltName = AMDGPU::Reg96; break;
+  case 128:  AltName = AMDGPU::Reg128; break;
+  case 160:  AltName = AMDGPU::Reg160; break;
+  case 256:  AltName = AMDGPU::Reg256; break;
+  case 512:  AltName = AMDGPU::Reg512; break;
+  case 1024: AltName = AMDGPU::Reg1024; break;
   }
+  return AMDGPUInstPrinter::getRegisterName(Reg, AltName);
 }
 
 // FIXME: This is very slow. It might be worth creating a map from physreg to
@@ -1231,15 +1372,25 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
   static const TargetRegisterClass *const BaseClasses[] = {
     &AMDGPU::VGPR_32RegClass,
     &AMDGPU::SReg_32RegClass,
+    &AMDGPU::AGPR_32RegClass,
     &AMDGPU::VReg_64RegClass,
     &AMDGPU::SReg_64RegClass,
+    &AMDGPU::AReg_64RegClass,
     &AMDGPU::VReg_96RegClass,
+    &AMDGPU::SReg_96RegClass,
     &AMDGPU::VReg_128RegClass,
     &AMDGPU::SReg_128RegClass,
+    &AMDGPU::AReg_128RegClass,
+    &AMDGPU::VReg_160RegClass,
+    &AMDGPU::SReg_160RegClass,
     &AMDGPU::VReg_256RegClass,
     &AMDGPU::SReg_256RegClass,
     &AMDGPU::VReg_512RegClass,
     &AMDGPU::SReg_512RegClass,
+    &AMDGPU::AReg_512RegClass,
+    &AMDGPU::SReg_1024RegClass,
+    &AMDGPU::VReg_1024RegClass,
+    &AMDGPU::AReg_1024RegClass,
     &AMDGPU::SCC_CLASSRegClass,
     &AMDGPU::Pseudo_SReg_32RegClass,
     &AMDGPU::Pseudo_SReg_128RegClass,
@@ -1268,10 +1419,39 @@ bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
     return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
   case 128:
     return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
+  case 160:
+    return getCommonSubClass(&AMDGPU::VReg_160RegClass, RC) != nullptr;
   case 256:
     return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
   case 512:
     return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
+  case 1024:
+    return getCommonSubClass(&AMDGPU::VReg_1024RegClass, RC) != nullptr;
+  default:
+    llvm_unreachable("Invalid register class size");
+  }
+}
+
+bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const {
+  unsigned Size = getRegSizeInBits(*RC);
+  if (Size < 32)
+    return false;
+  switch (Size) {
+  case 32:
+    return getCommonSubClass(&AMDGPU::AGPR_32RegClass, RC) != nullptr;
+  case 64:
+    return getCommonSubClass(&AMDGPU::AReg_64RegClass, RC) != nullptr;
+  case 96:
+    return false;
+  case 128:
+    return getCommonSubClass(&AMDGPU::AReg_128RegClass, RC) != nullptr;
+  case 160:
+  case 256:
+    return false;
+  case 512:
+    return getCommonSubClass(&AMDGPU::AReg_512RegClass, RC) != nullptr;
+  case 1024:
+    return getCommonSubClass(&AMDGPU::AReg_1024RegClass, RC) != nullptr;
   default:
     llvm_unreachable("Invalid register class size");
   }
@@ -1288,10 +1468,32 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
     return &AMDGPU::VReg_96RegClass;
   case 128:
     return &AMDGPU::VReg_128RegClass;
+  case 160:
+    return &AMDGPU::VReg_160RegClass;
   case 256:
     return &AMDGPU::VReg_256RegClass;
   case 512:
     return &AMDGPU::VReg_512RegClass;
+  case 1024:
+    return &AMDGPU::VReg_1024RegClass;
+  default:
+    llvm_unreachable("Invalid register class size");
+  }
+}
+
+const TargetRegisterClass *SIRegisterInfo::getEquivalentAGPRClass(
+                                         const TargetRegisterClass *SRC) const {
+  switch (getRegSizeInBits(*SRC)) {
+  case 32:
+    return &AMDGPU::AGPR_32RegClass;
+  case 64:
+    return &AMDGPU::AReg_64RegClass;
+  case 128:
+    return &AMDGPU::AReg_128RegClass;
+  case 512:
+    return &AMDGPU::AReg_512RegClass;
+  case 1024:
+    return &AMDGPU::AReg_1024RegClass;
   default:
     llvm_unreachable("Invalid register class size");
   }
@@ -1304,12 +1506,18 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
     return &AMDGPU::SGPR_32RegClass;
   case 64:
     return &AMDGPU::SReg_64RegClass;
+  case 96:
+    return &AMDGPU::SReg_96RegClass;
   case 128:
     return &AMDGPU::SReg_128RegClass;
+  case 160:
+    return &AMDGPU::SReg_160RegClass;
   case 256:
     return &AMDGPU::SReg_256RegClass;
   case 512:
     return &AMDGPU::SReg_512RegClass;
+  case 1024:
+    return &AMDGPU::SReg_1024RegClass;
   default:
     llvm_unreachable("Invalid register class size");
   }
@@ -1328,11 +1536,31 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
       return &AMDGPU::SGPR_32RegClass;
     case 2:
       return &AMDGPU::SReg_64RegClass;
+    case 3:
+      return &AMDGPU::SReg_96RegClass;
     case 4:
       return &AMDGPU::SReg_128RegClass;
+    case 5:
+      return &AMDGPU::SReg_160RegClass;
     case 8:
       return &AMDGPU::SReg_256RegClass;
-    case 16: /* fall-through */
+    case 16:
+      return &AMDGPU::SReg_512RegClass;
+    case 32: /* fall-through */
+    default:
+      llvm_unreachable("Invalid sub-register class size");
+    }
+  } else if (hasAGPRs(RC)) {
+    switch (Count) {
+    case 1:
+      return &AMDGPU::AGPR_32RegClass;
+    case 2:
+      return &AMDGPU::AReg_64RegClass;
+    case 4:
+      return &AMDGPU::AReg_128RegClass;
+    case 16:
+      return &AMDGPU::AReg_512RegClass;
+    case 32: /* fall-through */
     default:
       llvm_unreachable("Invalid sub-register class size");
     }
@@ -1346,9 +1574,13 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
       return &AMDGPU::VReg_96RegClass;
     case 4:
       return &AMDGPU::VReg_128RegClass;
+    case 5:
+      return &AMDGPU::VReg_160RegClass;
     case 8:
       return &AMDGPU::VReg_256RegClass;
-    case 16: /* fall-through */
+    case 16:
+      return &AMDGPU::VReg_512RegClass;
+    case 32: /* fall-through */
     default:
       llvm_unreachable("Invalid sub-register class size");
     }
@@ -1396,6 +1628,17 @@ SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
 ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
                                                    unsigned EltSize) const {
   if (EltSize == 4) {
+    static const int16_t Sub0_31[] = {
+      AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+      AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+      AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
+      AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
+      AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
+      AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
+      AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
+      AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31,
+    };
+
     static const int16_t Sub0_15[] = {
       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
@@ -1408,6 +1651,10 @@ ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC
       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
     };
 
+    static const int16_t Sub0_4[] = {
+      AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
+    };
+
     static const int16_t Sub0_3[] = {
       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
     };
@@ -1429,16 +1676,31 @@ ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC
       return makeArrayRef(Sub0_2);
     case 128:
       return makeArrayRef(Sub0_3);
+    case 160:
+      return makeArrayRef(Sub0_4);
     case 256:
       return makeArrayRef(Sub0_7);
     case 512:
       return makeArrayRef(Sub0_15);
+    case 1024:
+      return makeArrayRef(Sub0_31);
     default:
       llvm_unreachable("unhandled register size");
     }
   }
 
   if (EltSize == 8) {
+    static const int16_t Sub0_31_64[] = {
+      AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
+      AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
+      AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
+      AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
+      AMDGPU::sub16_sub17, AMDGPU::sub18_sub19,
+      AMDGPU::sub20_sub21, AMDGPU::sub22_sub23,
+      AMDGPU::sub24_sub25, AMDGPU::sub26_sub27,
+      AMDGPU::sub28_sub29, AMDGPU::sub30_sub31
+    };
+
     static const int16_t Sub0_15_64[] = {
       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
@@ -1465,32 +1727,73 @@ ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC
       return makeArrayRef(Sub0_7_64);
     case 512:
       return makeArrayRef(Sub0_15_64);
+    case 1024:
+      return makeArrayRef(Sub0_31_64);
     default:
       llvm_unreachable("unhandled register size");
     }
   }
 
-  assert(EltSize == 16 && "unhandled register spill split size");
+  if (EltSize == 16) {
+
+    static const int16_t Sub0_31_128[] = {
+      AMDGPU::sub0_sub1_sub2_sub3,
+      AMDGPU::sub4_sub5_sub6_sub7,
+      AMDGPU::sub8_sub9_sub10_sub11,
+      AMDGPU::sub12_sub13_sub14_sub15,
+      AMDGPU::sub16_sub17_sub18_sub19,
+      AMDGPU::sub20_sub21_sub22_sub23,
+      AMDGPU::sub24_sub25_sub26_sub27,
+      AMDGPU::sub28_sub29_sub30_sub31
+    };
+
+    static const int16_t Sub0_15_128[] = {
+      AMDGPU::sub0_sub1_sub2_sub3,
+      AMDGPU::sub4_sub5_sub6_sub7,
+      AMDGPU::sub8_sub9_sub10_sub11,
+      AMDGPU::sub12_sub13_sub14_sub15
+    };
+
+    static const int16_t Sub0_7_128[] = {
+      AMDGPU::sub0_sub1_sub2_sub3,
+      AMDGPU::sub4_sub5_sub6_sub7
+    };
 
-  static const int16_t Sub0_15_128[] = {
-    AMDGPU::sub0_sub1_sub2_sub3,
-    AMDGPU::sub4_sub5_sub6_sub7,
-    AMDGPU::sub8_sub9_sub10_sub11,
-    AMDGPU::sub12_sub13_sub14_sub15
+    switch (AMDGPU::getRegBitWidth(*RC->MC)) {
+    case 128:
+      return {};
+    case 256:
+      return makeArrayRef(Sub0_7_128);
+    case 512:
+      return makeArrayRef(Sub0_15_128);
+    case 1024:
+      return makeArrayRef(Sub0_31_128);
+    default:
+      llvm_unreachable("unhandled register size");
+    }
+  }
+
+  assert(EltSize == 32 && "unhandled elt size");
+
+  static const int16_t Sub0_31_256[] = {
+    AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
+    AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15,
+    AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23,
+    AMDGPU::sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
   };
 
-  static const int16_t Sub0_7_128[] = {
-    AMDGPU::sub0_sub1_sub2_sub3,
-    AMDGPU::sub4_sub5_sub6_sub7
+  static const int16_t Sub0_15_256[] = {
+    AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
+    AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15
   };
 
   switch (AMDGPU::getRegBitWidth(*RC->MC)) {
-  case 128:
-    return {};
   case 256:
-    return makeArrayRef(Sub0_7_128);
+    return {};
   case 512:
-    return makeArrayRef(Sub0_15_128);
+    return makeArrayRef(Sub0_15_256);
+  case 1024:
+    return makeArrayRef(Sub0_31_256);
   default:
     llvm_unreachable("unhandled register size");
   }
@@ -1512,6 +1815,13 @@ bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
   return hasVGPRs(RC);
 }
 
+bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
+                            unsigned Reg) const {
+  const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
+  assert(RC && "Register class for the reg not found");
+  return hasAGPRs(RC);
+}
+
 bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
                                     const TargetRegisterClass *SrcRC,
                                     unsigned SubReg,
@@ -1553,7 +1863,7 @@ unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
 
 unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
                                                 unsigned Idx) const {
-  if (Idx == getVGPRPressureSet())
+  if (Idx == getVGPRPressureSet() || Idx == getAGPRPressureSet())
     return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
                                const_cast<MachineFunction &>(MF));
 
@@ -1578,28 +1888,80 @@ unsigned SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
 }
 
 const TargetRegisterClass *
-SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
+SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
+                                         const RegisterBank &RB,
                                          const MachineRegisterInfo &MRI) const {
-  unsigned Size = getRegSizeInBits(MO.getReg(), MRI);
-  const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
-  if (!RB)
-    return nullptr;
-
   switch (Size) {
+  case 1: {
+    switch (RB.getID()) {
+    case AMDGPU::VGPRRegBankID:
+      return &AMDGPU::VGPR_32RegClass;
+    case AMDGPU::VCCRegBankID:
+      return isWave32 ?
+        &AMDGPU::SReg_32_XM0_XEXECRegClass : &AMDGPU::SReg_64_XEXECRegClass;
+    case AMDGPU::SGPRRegBankID:
+      return &AMDGPU::SReg_32_XM0RegClass;
+    case AMDGPU::SCCRegBankID:
+      // This needs to return an allocatable class, so don't bother returning
+      // the dummy SCC class.
+      return &AMDGPU::SReg_32_XM0RegClass;
+    default:
+      llvm_unreachable("unknown register bank");
+    }
+  }
   case 32:
-    return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
-                                                  &AMDGPU::SReg_32_XM0RegClass;
+    return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
+                                                 &AMDGPU::SReg_32_XM0RegClass;
   case 64:
-    return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
-                                                   &AMDGPU::SReg_64_XEXECRegClass;
+    return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
+                                                 &AMDGPU::SReg_64_XEXECRegClass;
   case 96:
-    return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
-                                                  nullptr;
+    return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
+                                                 &AMDGPU::SReg_96RegClass;
   case 128:
-    return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
-                                                  &AMDGPU::SReg_128RegClass;
+    return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
+                                                 &AMDGPU::SReg_128RegClass;
+  case 160:
+    return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass :
+                                                 &AMDGPU::SReg_160RegClass;
+  case 256:
+    return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_256RegClass :
+                                                 &AMDGPU::SReg_256RegClass;
+  case 512:
+    return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
+                                                 &AMDGPU::SReg_512RegClass;
+  default:
+    if (Size < 32)
+      return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
+                                                   &AMDGPU::SReg_32_XM0RegClass;
+    return nullptr;
+  }
+}
+
+const TargetRegisterClass *
+SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
+                                         const MachineRegisterInfo &MRI) const {
+  if (const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg()))
+    return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI);
+  return nullptr;
+}
+
+unsigned SIRegisterInfo::getVCC() const {
+  return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
+}
+
+const TargetRegisterClass *
+SIRegisterInfo::getRegClass(unsigned RCID) const {
+  switch ((int)RCID) {
+  case AMDGPU::SReg_1RegClassID:
+    return getBoolRC();
+  case AMDGPU::SReg_1_XEXECRegClassID:
+    return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
+      : &AMDGPU::SReg_64_XEXECRegClass;
+  case -1:
+    return nullptr;
   default:
-    llvm_unreachable("not implemented");
+    return AMDGPURegisterInfo::getRegClass(RCID);
   }
 }
 
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h
index b82fefde47e1..34487c96e72e 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -1,9 +1,8 @@
 //===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -30,10 +29,13 @@ class SIRegisterInfo final : public AMDGPURegisterInfo {
 private:
   unsigned SGPRSetID;
   unsigned VGPRSetID;
+  unsigned AGPRSetID;
   BitVector SGPRPressureSets;
   BitVector VGPRPressureSets;
+  BitVector AGPRPressureSets;
   bool SpillSGPRToVGPR;
   bool SpillSGPRToSMEM;
+  bool isWave32;
 
   void classifyPressureSet(unsigned PSetID, unsigned Reg,
                            BitVector &PressureSets) const;
@@ -57,8 +59,6 @@ public:
   unsigned reservedPrivateSegmentWaveByteOffsetReg(
     const MachineFunction &MF) const;
 
-  unsigned reservedStackPtrOffsetReg(const MachineFunction &MF) const;
-
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
@@ -72,8 +72,9 @@ public:
     return 100;
   }
 
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
+  Register getFrameRegister(const MachineFunction &MF) const override;
 
+  bool canRealignStack(const MachineFunction &MF) const override;
   bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
 
   bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
@@ -130,7 +131,7 @@ public:
 
   /// \returns true if this class contains only SGPR registers
   bool isSGPRClass(const TargetRegisterClass *RC) const {
-    return !hasVGPRs(RC);
+    return !hasVGPRs(RC) && !hasAGPRs(RC);
   }
 
   /// \returns true if this class ID contains only SGPR registers
@@ -150,10 +151,22 @@ public:
   /// \returns true if this class contains VGPR registers.
   bool hasVGPRs(const TargetRegisterClass *RC) const;
 
+  /// \returns true if this class contains AGPR registers.
+  bool hasAGPRs(const TargetRegisterClass *RC) const;
+
+  /// \returns true if this class contains any vector registers.
+  bool hasVectorRegisters(const TargetRegisterClass *RC) const {
+    return hasVGPRs(RC) || hasAGPRs(RC);
+  }
+
   /// \returns A VGPR reg class with the same width as \p SRC
   const TargetRegisterClass *getEquivalentVGPRClass(
                                           const TargetRegisterClass *SRC) const;
 
+  /// \returns An AGPR reg class with the same width as \p SRC
+  const TargetRegisterClass *getEquivalentAGPRClass(
+                                          const TargetRegisterClass *SRC) const;
+
   /// \returns A SGPR reg class with the same width as \p SRC
   const TargetRegisterClass *getEquivalentSGPRClass(
                                            const TargetRegisterClass *VRC) const;
@@ -191,16 +204,32 @@ public:
 
   unsigned getSGPRPressureSet() const { return SGPRSetID; };
   unsigned getVGPRPressureSet() const { return VGPRSetID; };
+  unsigned getAGPRPressureSet() const { return AGPRSetID; };
 
   const TargetRegisterClass *getRegClassForReg(const MachineRegisterInfo &MRI,
                                                unsigned Reg) const;
   bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const;
+  bool isAGPR(const MachineRegisterInfo &MRI, unsigned Reg) const;
+  bool isVectorRegister(const MachineRegisterInfo &MRI, unsigned Reg) const {
+    return isVGPR(MRI, Reg) || isAGPR(MRI, Reg);
+  }
+
+  virtual bool
+  isDivergentRegClass(const TargetRegisterClass *RC) const override {
+    return !isSGPRClass(RC);
+  }
 
   bool isSGPRPressureSet(unsigned SetID) const {
-    return SGPRPressureSets.test(SetID) && !VGPRPressureSets.test(SetID);
+    return SGPRPressureSets.test(SetID) && !VGPRPressureSets.test(SetID) &&
+           !AGPRPressureSets.test(SetID);
   }
   bool isVGPRPressureSet(unsigned SetID) const {
-    return VGPRPressureSets.test(SetID) && !SGPRPressureSets.test(SetID);
+    return VGPRPressureSets.test(SetID) && !SGPRPressureSets.test(SetID) &&
+           !AGPRPressureSets.test(SetID);
+  }
+  bool isAGPRPressureSet(unsigned SetID) const {
+    return AGPRPressureSets.test(SetID) && !SGPRPressureSets.test(SetID) &&
+           !VGPRPressureSets.test(SetID);
   }
 
   ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
@@ -224,16 +253,45 @@ public:
 
   unsigned getReturnAddressReg(const MachineFunction &MF) const;
 
+  const TargetRegisterClass *
+  getRegClassForSizeOnBank(unsigned Size,
+                           const RegisterBank &Bank,
+                           const MachineRegisterInfo &MRI) const;
+
+  const TargetRegisterClass *
+  getRegClassForTypeOnBank(LLT Ty,
+                           const RegisterBank &Bank,
+                           const MachineRegisterInfo &MRI) const {
+    return getRegClassForSizeOnBank(Ty.getSizeInBits(), Bank, MRI);
+  }
+
   const TargetRegisterClass *
   getConstrainedRegClassForOperand(const MachineOperand &MO,
                                  const MachineRegisterInfo &MRI) const override;
 
+  const TargetRegisterClass *getBoolRC() const {
+    return isWave32 ? &AMDGPU::SReg_32_XM0RegClass
+                    : &AMDGPU::SReg_64RegClass;
+  }
+
+  const TargetRegisterClass *getWaveMaskRegClass() const {
+    return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
+                    : &AMDGPU::SReg_64_XEXECRegClass;
+  }
+
+  unsigned getVCC() const;
+
+  const TargetRegisterClass *getRegClass(unsigned RCID) const;
+
   // Find reaching register definition
   MachineInstr *findReachingDef(unsigned Reg, unsigned SubReg,
                                 MachineInstr &Use,
                                 MachineRegisterInfo &MRI,
                                 LiveIntervals *LIS) const;
 
+  const uint32_t *getAllVGPRRegMask() const;
+  const uint32_t *getAllAllocatableSRegMask() const;
+
 private:
   void buildSpillLoadStore(MachineBasicBlock::iterator MI,
                            unsigned LoadStoreOp,
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td
index c625ecc9b750..d5948a7862cc 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1,9 +1,8 @@
 //===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,43 +14,86 @@ class getSubRegs<int size> {
   list<SubRegIndex> ret2 = [sub0, sub1];
   list<SubRegIndex> ret3 = [sub0, sub1, sub2];
   list<SubRegIndex> ret4 = [sub0, sub1, sub2, sub3];
+  list<SubRegIndex> ret5 = [sub0, sub1, sub2, sub3, sub4];
   list<SubRegIndex> ret8 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
   list<SubRegIndex> ret16 = [sub0, sub1, sub2, sub3,
                              sub4, sub5, sub6, sub7,
                              sub8, sub9, sub10, sub11,
                              sub12, sub13, sub14, sub15];
+  list<SubRegIndex> ret32 = [sub0, sub1, sub2, sub3,
+                             sub4, sub5, sub6, sub7,
+                             sub8, sub9, sub10, sub11,
+                             sub12, sub13, sub14, sub15,
+                             sub16, sub17, sub18, sub19,
+                             sub20, sub21, sub22, sub23,
+                             sub24, sub25, sub26, sub27,
+                             sub28, sub29, sub30, sub31];
 
   list<SubRegIndex> ret = !if(!eq(size, 2), ret2,
                               !if(!eq(size, 3), ret3,
                                   !if(!eq(size, 4), ret4,
-                                      !if(!eq(size, 8), ret8, ret16))));
+                                      !if(!eq(size, 5), ret5,
+                                          !if(!eq(size, 8), ret8,
+                                              !if(!eq(size, 16), ret16, ret32))))));
+}
+
+let Namespace = "AMDGPU" in {
+defset list<RegAltNameIndex> AllRegAltNameIndices = {
+  def Reg32   : RegAltNameIndex;
+  def Reg64   : RegAltNameIndex;
+  def Reg96   : RegAltNameIndex;
+  def Reg128  : RegAltNameIndex;
+  def Reg160  : RegAltNameIndex;
+  def Reg256  : RegAltNameIndex;
+  def Reg512  : RegAltNameIndex;
+  def Reg1024 : RegAltNameIndex;
+}
 }
 
 //===----------------------------------------------------------------------===//
 //  Declarations that describe the SI registers
 //===----------------------------------------------------------------------===//
-class SIReg <string n, bits<16> regIdx = 0> : Register<n>,
+class SIReg <string n, bits<16> regIdx = 0, string prefix = "",
+             int regNo = !cast<int>(regIdx)> :
+  Register<n, !if(!eq(prefix, ""),
+                [ n, n, n, n, n, n, n, n ],
+                [ prefix # regNo,
+                  prefix # "[" # regNo # ":" # !and(!add(regNo, 1), 255) # "]",
+                  prefix # "[" # regNo # ":" # !and(!add(regNo, 2), 255) # "]",
+                  prefix # "[" # regNo # ":" # !and(!add(regNo, 3), 255) # "]",
+                  prefix # "[" # regNo # ":" # !and(!add(regNo, 4), 255) # "]",
+                  prefix # "[" # regNo # ":" # !and(!add(regNo, 7), 255) # "]",
+                  prefix # "[" # regNo # ":" # !and(!add(regNo, 15), 255) # "]",
+                  prefix # "[" # regNo # ":" # !and(!add(regNo, 31), 255) # "]",
+                ])>,
   DwarfRegNum<[!cast<int>(HWEncoding)]> {
   let Namespace = "AMDGPU";
+  let RegAltNameIndices = AllRegAltNameIndices;
 
   // This is the not yet the complete register encoding. An additional
   // bit is set for VGPRs.
   let HWEncoding = regIdx;
 }
 
+class SIRegisterWithSubRegs<string n, list<Register> subregs> :
+  RegisterWithSubRegs<n, subregs> {
+  let RegAltNameIndices = AllRegAltNameIndices;
+  let AltNames = [ n, n, n, n, n, n, n, n ];
+}
+
 // Special Registers
 def VCC_LO : SIReg<"vcc_lo", 106>;
 def VCC_HI : SIReg<"vcc_hi", 107>;
 
 // Pseudo-registers: Used as placeholders during isel and immediately
 // replaced, never seeing the verifier.
-def PRIVATE_RSRC_REG : SIReg<"", 0>;
-def FP_REG : SIReg<"", 0>;
-def SP_REG : SIReg<"", 0>;
-def SCRATCH_WAVE_OFFSET_REG : SIReg<"", 0>;
+def PRIVATE_RSRC_REG : SIReg<"private_rsrc", 0>;
+def FP_REG : SIReg<"fp", 0>;
+def SP_REG : SIReg<"sp", 0>;
+def SCRATCH_WAVE_OFFSET_REG : SIReg<"scratch_wave_offset", 0>;
 
 // VCC for 64-bit instructions
-def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
+def VCC : SIRegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
           DwarfRegAlias<VCC_LO> {
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1];
@@ -61,25 +103,38 @@ def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
 def EXEC_LO : SIReg<"exec_lo", 126>;
 def EXEC_HI : SIReg<"exec_hi", 127>;
 
-def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>,
+def EXEC : SIRegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>,
            DwarfRegAlias<EXEC_LO> {
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1];
   let HWEncoding = 126;
 }
 
-def SCC : SIReg<"scc", 253>;
+// 32-bit real registers, for MC only.
+// May be used with both 32-bit and 64-bit operands.
+def SRC_VCCZ : SIReg<"src_vccz", 251>;
+def SRC_EXECZ : SIReg<"src_execz", 252>;
+def SRC_SCC : SIReg<"src_scc", 253>;
+
+// 1-bit pseudo register, for codegen only.
+// Should never be emitted.
+def SCC : SIReg<"scc">;
+
 def M0 : SIReg <"m0", 124>;
+def SGPR_NULL : SIReg<"null", 125>;
 
 def SRC_SHARED_BASE : SIReg<"src_shared_base", 235>;
 def SRC_SHARED_LIMIT : SIReg<"src_shared_limit", 236>;
 def SRC_PRIVATE_BASE : SIReg<"src_private_base", 237>;
 def SRC_PRIVATE_LIMIT : SIReg<"src_private_limit", 238>;
+def SRC_POPS_EXITING_WAVE_ID : SIReg<"src_pops_exiting_wave_id", 239>;
+
+def LDS_DIRECT : SIReg <"src_lds_direct", 254>;
 
 def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>;
 def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>;
 
-def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>,
+def XNACK_MASK : SIRegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>,
                  DwarfRegAlias<XNACK_MASK_LO> {
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1];
@@ -90,7 +145,7 @@ def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI
 def TBA_LO : SIReg<"tba_lo", 108>;
 def TBA_HI : SIReg<"tba_hi", 109>;
 
-def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
+def TBA : SIRegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
           DwarfRegAlias<TBA_LO> {
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1];
@@ -100,7 +155,7 @@ def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
 def TMA_LO : SIReg<"tma_lo", 110>;
 def TMA_HI : SIReg<"tma_hi", 111>;
 
-def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
+def TMA : SIRegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
           DwarfRegAlias<TMA_LO> {
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1];
@@ -108,19 +163,19 @@ def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
 }
 
 foreach Index = 0-15 in {
-  def TTMP#Index#_vi   : SIReg<"ttmp"#Index, !add(112, Index)>;
-  def TTMP#Index#_gfx9 : SIReg<"ttmp"#Index, !add(108, Index)>;
-  def TTMP#Index       : SIReg<"", 0>;
+  def TTMP#Index#_vi         : SIReg<"ttmp"#Index, !add(112, Index)>;
+  def TTMP#Index#_gfx9_gfx10 : SIReg<"ttmp"#Index, !add(108, Index)>;
+  def TTMP#Index             : SIReg<"ttmp"#Index, 0>;
 }
 
 multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
   def _ci : SIReg<n, ci_e>;
   def _vi : SIReg<n, vi_e>;
-  def "" : SIReg<"", 0>;
+  def "" : SIReg<n, 0>;
 }
 
 class FlatReg <Register lo, Register hi, bits<16> encoding> :
-    RegisterWithSubRegs<"flat_scratch", [lo, hi]>,
+    SIRegisterWithSubRegs<"flat_scratch", [lo, hi]>,
     DwarfRegAlias<lo> {
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1];
@@ -135,13 +190,20 @@ def FLAT_SCR_vi : FlatReg<FLAT_SCR_LO_vi, FLAT_SCR_HI_vi, 102>;
 def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>;
 
 // SGPR registers
-foreach Index = 0-103 in {
-  def SGPR#Index : SIReg <"SGPR"#Index, Index>;
+foreach Index = 0-105 in {
+  def SGPR#Index : SIReg <"SGPR"#Index, Index, "s">;
 }
 
 // VGPR registers
 foreach Index = 0-255 in {
-  def VGPR#Index : SIReg <"VGPR"#Index, Index> {
+  def VGPR#Index : SIReg <"VGPR"#Index, Index, "v"> {
+    let HWEncoding{8} = 1;
+  }
+}
+
+// AccVGPR registers
+foreach Index = 0-255 in {
+  def AGPR#Index : SIReg <"AGPR"#Index, Index, "a"> {
     let HWEncoding{8} = 1;
   }
 }
@@ -164,10 +226,10 @@ def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> {
 
 // SGPR 32-bit registers
 def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
-                            (add (sequence "SGPR%u", 0, 103))> {
+                            (add (sequence "SGPR%u", 0, 105)), Reg32> {
   // Give all SGPR classes higher priority than VGPR classes, because
   // we want to spill SGPRs to VGPRs.
-  let AllocationPriority = 7;
+  let AllocationPriority = 9;
 }
 
 // SGPR 64-bit registers
@@ -175,6 +237,12 @@ def SGPR_64Regs : RegisterTuples<getSubRegs<2>.ret,
                              [(add (decimate SGPR_32, 2)),
                               (add (decimate (shl SGPR_32, 1), 2))]>;
 
+// SGPR 96-bit registers. No operations use these, but for symmetry with 96-bit VGPRs.
+def SGPR_96Regs : RegisterTuples<getSubRegs<3>.ret,
+                            [(add (decimate SGPR_32, 3)),
+                             (add (decimate (shl SGPR_32, 1), 3)),
+                             (add (decimate (shl SGPR_32, 2), 3))]>;
+
 // SGPR 128-bit registers
 def SGPR_128Regs : RegisterTuples<getSubRegs<4>.ret,
                               [(add (decimate SGPR_32, 4)),
@@ -182,6 +250,14 @@ def SGPR_128Regs : RegisterTuples<getSubRegs<4>.ret,
                                (add (decimate (shl SGPR_32, 2), 4)),
                                (add (decimate (shl SGPR_32, 3), 4))]>;
 
+// SGPR 160-bit registers. No operations use these, but for symmetry with 160-bit VGPRs.
+def SGPR_160Regs : RegisterTuples<getSubRegs<5>.ret,
+                            [(add (decimate SGPR_32, 4)),
+                             (add (decimate (shl SGPR_32, 1), 4)),
+                             (add (decimate (shl SGPR_32, 2), 4)),
+                             (add (decimate (shl SGPR_32, 3), 4)),
+                             (add (decimate (shl SGPR_32, 4), 4))]>;
+
 // SGPR 256-bit registers
 def SGPR_256Regs : RegisterTuples<getSubRegs<8>.ret,
                               [(add (decimate SGPR_32, 4)),
@@ -212,6 +288,41 @@ def SGPR_512Regs : RegisterTuples<getSubRegs<16>.ret,
                                (add (decimate (shl SGPR_32, 14), 4)),
                                (add (decimate (shl SGPR_32, 15), 4))]>;
 
+// SGPR 1024-bit registers
+def SGPR_1024Regs : RegisterTuples<getSubRegs<32>.ret,
+                              [(add (decimate SGPR_32, 4)),
+                               (add (decimate (shl SGPR_32, 1), 4)),
+                               (add (decimate (shl SGPR_32, 2), 4)),
+                               (add (decimate (shl SGPR_32, 3), 4)),
+                               (add (decimate (shl SGPR_32, 4), 4)),
+                               (add (decimate (shl SGPR_32, 5), 4)),
+                               (add (decimate (shl SGPR_32, 6), 4)),
+                               (add (decimate (shl SGPR_32, 7), 4)),
+                               (add (decimate (shl SGPR_32, 8), 4)),
+                               (add (decimate (shl SGPR_32, 9), 4)),
+                               (add (decimate (shl SGPR_32, 10), 4)),
+                               (add (decimate (shl SGPR_32, 11), 4)),
+                               (add (decimate (shl SGPR_32, 12), 4)),
+                               (add (decimate (shl SGPR_32, 13), 4)),
+                               (add (decimate (shl SGPR_32, 14), 4)),
+                               (add (decimate (shl SGPR_32, 15), 4)),
+                               (add (decimate (shl SGPR_32, 16), 4)),
+                               (add (decimate (shl SGPR_32, 17), 4)),
+                               (add (decimate (shl SGPR_32, 18), 4)),
+                               (add (decimate (shl SGPR_32, 19), 4)),
+                               (add (decimate (shl SGPR_32, 20), 4)),
+                               (add (decimate (shl SGPR_32, 21), 4)),
+                               (add (decimate (shl SGPR_32, 22), 4)),
+                               (add (decimate (shl SGPR_32, 23), 4)),
+                               (add (decimate (shl SGPR_32, 24), 4)),
+                               (add (decimate (shl SGPR_32, 25), 4)),
+                               (add (decimate (shl SGPR_32, 26), 4)),
+                               (add (decimate (shl SGPR_32, 27), 4)),
+                               (add (decimate (shl SGPR_32, 28), 4)),
+                               (add (decimate (shl SGPR_32, 29), 4)),
+                               (add (decimate (shl SGPR_32, 30), 4)),
+                               (add (decimate (shl SGPR_32, 31), 4))]>;
+
 // Trap handler TMP 32-bit registers
 def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
                             (add (sequence "TTMP%u", 0, 15))> {
@@ -263,7 +374,7 @@ class TmpRegTuplesBase<int index, int size,
                        list<SubRegIndex> indices = getSubRegs<size>.ret,
                        int index1 = !add(index, !add(size, -1)),
                        string name = "ttmp["#index#":"#index1#"]"> :
-  RegisterWithSubRegs<name, subRegs> {
+  SIRegisterWithSubRegs<name, subRegs> {
   let HWEncoding = subRegs[0].HWEncoding;
   let SubRegIndices = indices;
 }
@@ -293,8 +404,8 @@ class TmpRegTuples<string tgt,
                    getSubRegs<size>.ret>;
 
 foreach Index = {0, 2, 4, 6, 8, 10, 12, 14} in {
-  def TTMP#Index#_TTMP#!add(Index,1)#_vi   : TmpRegTuples<"_vi",   2, Index>;
-  def TTMP#Index#_TTMP#!add(Index,1)#_gfx9 : TmpRegTuples<"_gfx9", 2, Index>;
+  def TTMP#Index#_TTMP#!add(Index,1)#_vi         : TmpRegTuples<"_vi",   2, Index>;
+  def TTMP#Index#_TTMP#!add(Index,1)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 2, Index>;
 }
 
 foreach Index = {0, 4, 8, 12} in {
@@ -303,7 +414,7 @@ foreach Index = {0, 4, 8, 12} in {
                  _TTMP#!add(Index,3)#_vi : TmpRegTuples<"_vi",   4, Index>;
   def TTMP#Index#_TTMP#!add(Index,1)#
                  _TTMP#!add(Index,2)#
-                 _TTMP#!add(Index,3)#_gfx9 : TmpRegTuples<"_gfx9", 4, Index>;
+                 _TTMP#!add(Index,3)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 4, Index>;
 }
 
 foreach Index = {0, 4, 8} in {
@@ -320,7 +431,7 @@ foreach Index = {0, 4, 8} in {
                  _TTMP#!add(Index,4)#
                  _TTMP#!add(Index,5)#
                  _TTMP#!add(Index,6)#
-                 _TTMP#!add(Index,7)#_gfx9 : TmpRegTuples<"_gfx9", 8, Index>;
+                 _TTMP#!add(Index,7)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 8, Index>;
 }
 
 def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_vi :
@@ -330,18 +441,17 @@ def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TT
                     TTMP8_vi, TTMP9_vi, TTMP10_vi, TTMP11_vi,
                     TTMP12_vi, TTMP13_vi, TTMP14_vi, TTMP15_vi]>;
 
-def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9 :
+def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9_gfx10 :
   TmpRegTuplesBase<0, 16,
-                   [TTMP0_gfx9, TTMP1_gfx9, TTMP2_gfx9, TTMP3_gfx9,
-                    TTMP4_gfx9, TTMP5_gfx9, TTMP6_gfx9, TTMP7_gfx9,
-                    TTMP8_gfx9, TTMP9_gfx9, TTMP10_gfx9, TTMP11_gfx9,
-                    TTMP12_gfx9, TTMP13_gfx9, TTMP14_gfx9, TTMP15_gfx9]>;
-
+                   [TTMP0_gfx9_gfx10, TTMP1_gfx9_gfx10, TTMP2_gfx9_gfx10, TTMP3_gfx9_gfx10,
+                    TTMP4_gfx9_gfx10, TTMP5_gfx9_gfx10, TTMP6_gfx9_gfx10, TTMP7_gfx9_gfx10,
+                    TTMP8_gfx9_gfx10, TTMP9_gfx9_gfx10, TTMP10_gfx9_gfx10, TTMP11_gfx9_gfx10,
+                    TTMP12_gfx9_gfx10, TTMP13_gfx9_gfx10, TTMP14_gfx9_gfx10, TTMP15_gfx9_gfx10]>;
 
 // VGPR 32-bit registers
 // i16/f16 only on VI+
 def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
-                            (add (sequence "VGPR%u", 0, 255))> {
+                            (add (sequence "VGPR%u", 0, 255)), Reg32> {
   let AllocationPriority = 1;
   let Size = 32;
 }
@@ -364,6 +474,14 @@ def VGPR_128 : RegisterTuples<getSubRegs<4>.ret,
                                (add (shl VGPR_32, 2)),
                                (add (shl VGPR_32, 3))]>;
 
+// VGPR 160-bit registers
+def VGPR_160 : RegisterTuples<getSubRegs<5>.ret,
+                             [(add (trunc VGPR_32, 252)),
+                              (add (shl VGPR_32, 1)),
+                              (add (shl VGPR_32, 2)),
+                              (add (shl VGPR_32, 3)),
+                              (add (shl VGPR_32, 4))]>;
+
 // VGPR 256-bit registers
 def VGPR_256 : RegisterTuples<getSubRegs<8>.ret,
                               [(add (trunc VGPR_32, 249)),
@@ -394,88 +512,257 @@ def VGPR_512 : RegisterTuples<getSubRegs<16>.ret,
                                (add (shl VGPR_32, 14)),
                                (add (shl VGPR_32, 15))]>;
 
+// VGPR 1024-bit registers
+def VGPR_1024 : RegisterTuples<getSubRegs<32>.ret,
+                              [(add (trunc VGPR_32, 225)),
+                               (add (shl VGPR_32, 1)),
+                               (add (shl VGPR_32, 2)),
+                               (add (shl VGPR_32, 3)),
+                               (add (shl VGPR_32, 4)),
+                               (add (shl VGPR_32, 5)),
+                               (add (shl VGPR_32, 6)),
+                               (add (shl VGPR_32, 7)),
+                               (add (shl VGPR_32, 8)),
+                               (add (shl VGPR_32, 9)),
+                               (add (shl VGPR_32, 10)),
+                               (add (shl VGPR_32, 11)),
+                               (add (shl VGPR_32, 12)),
+                               (add (shl VGPR_32, 13)),
+                               (add (shl VGPR_32, 14)),
+                               (add (shl VGPR_32, 15)),
+                               (add (shl VGPR_32, 16)),
+                               (add (shl VGPR_32, 17)),
+                               (add (shl VGPR_32, 18)),
+                               (add (shl VGPR_32, 19)),
+                               (add (shl VGPR_32, 20)),
+                               (add (shl VGPR_32, 21)),
+                               (add (shl VGPR_32, 22)),
+                               (add (shl VGPR_32, 23)),
+                               (add (shl VGPR_32, 24)),
+                               (add (shl VGPR_32, 25)),
+                               (add (shl VGPR_32, 26)),
+                               (add (shl VGPR_32, 27)),
+                               (add (shl VGPR_32, 28)),
+                               (add (shl VGPR_32, 29)),
+                               (add (shl VGPR_32, 30)),
+                               (add (shl VGPR_32, 31))]>;
+
+// AccVGPR 32-bit registers
+def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+                            (add (sequence "AGPR%u", 0, 255)), Reg32> {
+  let AllocationPriority = 1;
+  let Size = 32;
+}
+
+// AGPR 64-bit registers
+def AGPR_64 : RegisterTuples<getSubRegs<2>.ret,
+                             [(add (trunc AGPR_32, 255)),
+                              (add (shl AGPR_32, 1))]>;
+
+// AGPR 128-bit registers
+def AGPR_128 : RegisterTuples<getSubRegs<4>.ret,
+                              [(add (trunc AGPR_32, 253)),
+                               (add (shl AGPR_32, 1)),
+                               (add (shl AGPR_32, 2)),
+                               (add (shl AGPR_32, 3))]>;
+
+// AGPR 512-bit registers
+def AGPR_512 : RegisterTuples<getSubRegs<16>.ret,
+                              [(add (trunc AGPR_32, 241)),
+                               (add (shl AGPR_32, 1)),
+                               (add (shl AGPR_32, 2)),
+                               (add (shl AGPR_32, 3)),
+                               (add (shl AGPR_32, 4)),
+                               (add (shl AGPR_32, 5)),
+                               (add (shl AGPR_32, 6)),
+                               (add (shl AGPR_32, 7)),
+                               (add (shl AGPR_32, 8)),
+                               (add (shl AGPR_32, 9)),
+                               (add (shl AGPR_32, 10)),
+                               (add (shl AGPR_32, 11)),
+                               (add (shl AGPR_32, 12)),
+                               (add (shl AGPR_32, 13)),
+                               (add (shl AGPR_32, 14)),
+                               (add (shl AGPR_32, 15))]>;
+
+// AGPR 1024-bit registers
+def AGPR_1024 : RegisterTuples<getSubRegs<32>.ret,
+                              [(add (trunc AGPR_32, 225)),
+                               (add (shl AGPR_32, 1)),
+                               (add (shl AGPR_32, 2)),
+                               (add (shl AGPR_32, 3)),
+                               (add (shl AGPR_32, 4)),
+                               (add (shl AGPR_32, 5)),
+                               (add (shl AGPR_32, 6)),
+                               (add (shl AGPR_32, 7)),
+                               (add (shl AGPR_32, 8)),
+                               (add (shl AGPR_32, 9)),
+                               (add (shl AGPR_32, 10)),
+                               (add (shl AGPR_32, 11)),
+                               (add (shl AGPR_32, 12)),
+                               (add (shl AGPR_32, 13)),
+                               (add (shl AGPR_32, 14)),
+                               (add (shl AGPR_32, 15)),
+                               (add (shl AGPR_32, 16)),
+                               (add (shl AGPR_32, 17)),
+                               (add (shl AGPR_32, 18)),
+                               (add (shl AGPR_32, 19)),
+                               (add (shl AGPR_32, 20)),
+                               (add (shl AGPR_32, 21)),
+                               (add (shl AGPR_32, 22)),
+                               (add (shl AGPR_32, 23)),
+                               (add (shl AGPR_32, 24)),
+                               (add (shl AGPR_32, 25)),
+                               (add (shl AGPR_32, 26)),
+                               (add (shl AGPR_32, 27)),
+                               (add (shl AGPR_32, 28)),
+                               (add (shl AGPR_32, 29)),
+                               (add (shl AGPR_32, 30)),
+                               (add (shl AGPR_32, 31))]>;
+
 //===----------------------------------------------------------------------===//
 //  Register classes used as source and destination
 //===----------------------------------------------------------------------===//
 
 def Pseudo_SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
-  (add FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG)> {
+  (add FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG), Reg32> {
   let isAllocatable = 0;
   let CopyCost = -1;
 }
 
 def Pseudo_SReg_128 : RegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32,
-  (add PRIVATE_RSRC_REG)> {
+  (add PRIVATE_RSRC_REG), Reg128> {
+  let isAllocatable = 0;
+  let CopyCost = -1;
+}
+
+def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+  (add LDS_DIRECT), Reg32> {
   let isAllocatable = 0;
   let CopyCost = -1;
 }
 
 // Subset of SReg_32 without M0 for SMRD instructions and alike.
 // See comments in SIInstructions.td for more info.
-def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
   (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
-   TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
-   SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> {
-  let AllocationPriority = 7;
+   SGPR_NULL, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
+   SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID,
+   SRC_VCCZ, SRC_EXECZ, SRC_SCC), Reg32> {
+  let AllocationPriority = 10;
 }
 
-def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
-  (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> {
-  let AllocationPriority = 7;
+def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+  (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS), Reg32> {
+  let AllocationPriority = 10;
 }
 
-def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
-  (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> {
-  let AllocationPriority = 7;
+def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+  (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI), Reg32> {
+  let AllocationPriority = 10;
 }
 
 // Register class for all scalar registers (SGPRs + Special Registers)
-def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
-  (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI)> {
-  let AllocationPriority = 7;
+def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+  (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI), Reg32> {
+  let AllocationPriority = 10;
+}
+
+def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+  (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI, LDS_DIRECT_CLASS),
+  Reg32> {
+  let isAllocatable = 0;
 }
 
-def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, (add SGPR_64Regs)> {
+def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32,
+                            (add SGPR_64Regs), Reg64> {
   let CopyCost = 1;
-  let AllocationPriority = 8;
+  let AllocationPriority = 11;
+}
+
+// CCR (call clobbered registers) SGPR 64-bit registers
+def CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
+                                (add (trunc SGPR_64, 16)), Reg64> {
+  let CopyCost = SGPR_64.CopyCost;
+  let AllocationPriority = SGPR_64.AllocationPriority;
 }
 
-def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add TTMP_64Regs)> {
+def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
+                            (add TTMP_64Regs)> {
   let isAllocatable = 0;
 }
 
 def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
-  (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> {
+  (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA), Reg64> {
   let CopyCost = 1;
-  let AllocationPriority = 8;
+  let AllocationPriority = 13;
 }
 
 def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
-  (add SReg_64_XEXEC, EXEC)> {
+  (add SReg_64_XEXEC, EXEC), Reg64> {
   let CopyCost = 1;
-  let AllocationPriority = 8;
+  let AllocationPriority = 13;
+}
+
+def SReg_1_XEXEC : RegisterClass<"AMDGPU", [i1], 32,
+  (add SReg_64_XEXEC, SReg_32_XM0_XEXEC)> {
+  let CopyCost = 1;
+  let isAllocatable = 0;
+}
+
+def SReg_1 : RegisterClass<"AMDGPU", [i1], 32,
+  (add SReg_1_XEXEC, EXEC, EXEC_LO)> {
+  let CopyCost = 1;
+  let isAllocatable = 0;
 }
 
 // Requires 2 s_mov_b64 to copy
 let CopyCost = 2 in {
 
-def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, (add SGPR_128Regs)> {
-  let AllocationPriority = 10;
+// There are no 3-component scalar instructions, but this is needed
+// for symmetry with VGPRs.
+def SGPR_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32,
+  (add SGPR_96Regs), Reg96> {
+  let AllocationPriority = 14;
 }
 
-def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, (add TTMP_128Regs)> {
+def SReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32,
+  (add SGPR_96), Reg96> {
+  let AllocationPriority = 14;
+}
+
+def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32,
+                             (add SGPR_128Regs), Reg128> {
+  let AllocationPriority = 15;
+}
+
+def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32,
+                             (add TTMP_128Regs)> {
   let isAllocatable = 0;
 }
 
 def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
-  (add SGPR_128, TTMP_128)> {
-  let AllocationPriority = 10;
+                             (add SGPR_128, TTMP_128), Reg128> {
+  let AllocationPriority = 15;
 }
 
 } // End CopyCost = 2
 
-def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> {
-  let AllocationPriority = 11;
+// There are no 5-component scalar instructions, but this is needed
+// for symmetry with VGPRs.
+def SGPR_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
+                             (add SGPR_160Regs), Reg160> {
+  let AllocationPriority = 16;
+}
+
+def SReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
+                             (add SGPR_160), Reg160> {
+  let AllocationPriority = 16;
+}
+
+def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs),
+                             Reg256> {
+  let AllocationPriority = 17;
 }
 
 def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add TTMP_256Regs)> {
@@ -483,29 +770,48 @@ def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add TTMP_256Regs)> {
 }
 
 def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32,
-  (add SGPR_256, TTMP_256)> {
+                             (add SGPR_256, TTMP_256), Reg256> {
   // Requires 4 s_mov_b64 to copy
   let CopyCost = 4;
-  let AllocationPriority = 11;
+  let AllocationPriority = 17;
 }
 
-def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add SGPR_512Regs)> {
-  let AllocationPriority = 12;
+def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
+                             (add SGPR_512Regs), Reg512> {
+  let AllocationPriority = 18;
 }
 
-def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add TTMP_512Regs)> {
+def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
+                             (add TTMP_512Regs)> {
   let isAllocatable = 0;
 }
 
 def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
-  (add SGPR_512, TTMP_512)> {
+                             (add SGPR_512, TTMP_512), Reg512> {
   // Requires 8 s_mov_b64 to copy
   let CopyCost = 8;
-  let AllocationPriority = 12;
+  let AllocationPriority = 18;
+}
+
+def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+                                 (add VGPR_32, LDS_DIRECT_CLASS), Reg32> {
+  let isAllocatable = 0;
+}
+
+def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
+                              (add SGPR_1024Regs), Reg1024> {
+  let AllocationPriority = 19;
+}
+
+def SReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
+                              (add SGPR_1024), Reg1024> {
+  let CopyCost = 16;
+  let AllocationPriority = 19;
 }
 
 // Register class for all vector registers (VGPRs + Interploation Registers)
-def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32, (add VGPR_64)> {
+def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32,
+                            (add VGPR_64), Reg64> {
   let Size = 64;
 
   // Requires 2 v_mov_b32 to copy
@@ -513,7 +819,7 @@ def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32
   let AllocationPriority = 2;
 }
 
-def VReg_96 : RegisterClass<"AMDGPU", [untyped], 32, (add VGPR_96)> {
+def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96), Reg96> {
   let Size = 96;
 
   // Requires 3 v_mov_b32 to copy
@@ -521,7 +827,8 @@ def VReg_96 : RegisterClass<"AMDGPU", [untyped], 32, (add VGPR_96)> {
   let AllocationPriority = 3;
 }
 
-def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VGPR_128)> {
+def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
+                             (add VGPR_128), Reg128> {
   let Size = 128;
 
   // Requires 4 v_mov_b32 to copy
@@ -529,28 +836,88 @@ def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VG
   let AllocationPriority = 4;
 }
 
-def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add VGPR_256)> {
+def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
+                             (add VGPR_160), Reg160> {
+  let Size = 160;
+
+  // Requires 5 v_mov_b32 to copy
+  let CopyCost = 5;
+  let AllocationPriority = 5;
+}
+
+def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32,
+                             (add VGPR_256), Reg256> {
   let Size = 256;
   let CopyCost = 8;
-  let AllocationPriority = 5;
+  let AllocationPriority = 6;
 }
 
-def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add VGPR_512)> {
+def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
+                             (add VGPR_512), Reg512> {
   let Size = 512;
   let CopyCost = 16;
-  let AllocationPriority = 6;
+  let AllocationPriority = 7;
+}
+
+def VReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
+                              (add VGPR_1024), Reg1024> {
+  let Size = 1024;
+  let CopyCost = 32;
+  let AllocationPriority = 8;
 }
 
-def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
+def AReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32,
+                            (add AGPR_64), Reg64> {
+  let Size = 64;
+
+  let CopyCost = 5;
+  let AllocationPriority = 2;
+}
+
+def AReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
+                             (add AGPR_128), Reg128> {
+  let Size = 128;
+
+  // Requires 4 v_accvgpr_write and 4 v_accvgpr_read to copy + burn 1 vgpr
+  let CopyCost = 9;
+  let AllocationPriority = 4;
+}
+
+def AReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
+                             (add AGPR_512), Reg512> {
+  let Size = 512;
+  let CopyCost = 33;
+  let AllocationPriority = 7;
+}
+
+def AReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
+                              (add AGPR_1024), Reg1024> {
+  let Size = 1024;
+  let CopyCost = 65;
+  let AllocationPriority = 8;
+}
+
+def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32), Reg32> {
   let Size = 32;
 }
 
 def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
-                          (add VGPR_32, SReg_32)> {
+                          (add VGPR_32, SReg_32, LDS_DIRECT_CLASS), Reg32> {
+  let isAllocatable = 0;
+}
+
+def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64),
+                          Reg64> {
   let isAllocatable = 0;
 }
 
-def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> {
+def AV_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+                          (add AGPR_32, VGPR_32), Reg32> {
+  let isAllocatable = 0;
+}
+
+def AV_64 : RegisterClass<"AMDGPU", [i64, f64, v4f16], 32,
+                          (add AReg_64, VReg_64), Reg64> {
   let isAllocatable = 0;
 }
 
@@ -563,47 +930,40 @@ class RegImmMatcher<string name> : AsmOperandClass {
   let RenderMethod = "addRegOrImmOperands";
 }
 
-multiclass SIRegOperand <string rc, string MatchName, string opType> {
+multiclass SIRegOperand32 <string rc, string MatchName, string opType,
+                           string rc_suffix = "_32"> {
   let OperandNamespace = "AMDGPU" in {
-    def _b16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+    def _b16 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
       let OperandType = opType#"_INT16";
       let ParserMatchClass = RegImmMatcher<MatchName#"B16">;
       let DecoderMethod = "decodeOperand_VSrc16";
     }
 
-    def _f16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+    def _f16 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
       let OperandType = opType#"_FP16";
       let ParserMatchClass = RegImmMatcher<MatchName#"F16">;
-      let DecoderMethod = "decodeOperand_VSrc16";
+      let DecoderMethod = "decodeOperand_" # rc # "_16";
     }
 
-    def _b32 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+    def _b32 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
       let OperandType = opType#"_INT32";
       let ParserMatchClass = RegImmMatcher<MatchName#"B32">;
+      let DecoderMethod = "decodeOperand_" # rc # rc_suffix;
     }
 
-    def _f32 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+    def _f32 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
       let OperandType = opType#"_FP32";
       let ParserMatchClass = RegImmMatcher<MatchName#"F32">;
+      let DecoderMethod = "decodeOperand_" # rc # rc_suffix;
     }
 
-    def _b64 : RegisterOperand<!cast<RegisterClass>(rc#"_64")> {
-      let OperandType = opType#"_INT64";
-      let ParserMatchClass = RegImmMatcher<MatchName#"B64">;
-    }
-
-    def _f64 : RegisterOperand<!cast<RegisterClass>(rc#"_64")> {
-      let OperandType = opType#"_FP64";
-      let ParserMatchClass = RegImmMatcher<MatchName#"F64">;
-    }
-
-    def _v2b16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+    def _v2b16 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
       let OperandType = opType#"_V2INT16";
       let ParserMatchClass = RegImmMatcher<MatchName#"V2B16">;
       let DecoderMethod = "decodeOperand_VSrcV216";
     }
 
-    def _v2f16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+    def _v2f16 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
       let OperandType = opType#"_V2FP16";
       let ParserMatchClass = RegImmMatcher<MatchName#"V2F16">;
       let DecoderMethod = "decodeOperand_VSrcV216";
@@ -611,6 +971,21 @@ multiclass SIRegOperand <string rc, string MatchName, string opType> {
   }
 }
 
+multiclass SIRegOperand <string rc, string MatchName, string opType> :
+  SIRegOperand32<rc, MatchName, opType> {
+  let OperandNamespace = "AMDGPU" in {
+    def _b64 : RegisterOperand<!cast<RegisterClass>(rc#"_64")> {
+      let OperandType = opType#"_INT64";
+      let ParserMatchClass = RegImmMatcher<MatchName#"B64">;
+    }
+
+    def _f64 : RegisterOperand<!cast<RegisterClass>(rc#"_64")> {
+      let OperandType = opType#"_FP64";
+      let ParserMatchClass = RegImmMatcher<MatchName#"F64">;
+    }
+  }
+}
+
 // FIXME: 64-bit sources can sometimes use 32-bit constants.
 multiclass RegImmOperand <string rc, string MatchName>
   : SIRegOperand<rc, MatchName, "OPERAND_REG_IMM">;
@@ -618,20 +993,32 @@ multiclass RegImmOperand <string rc, string MatchName>
 multiclass RegInlineOperand <string rc, string MatchName>
   : SIRegOperand<rc, MatchName, "OPERAND_REG_INLINE_C">;
 
+multiclass RegInlineOperand32 <string rc, string MatchName,
+                               string rc_suffix = "_32">
+  : SIRegOperand32<rc, MatchName, "OPERAND_REG_INLINE_C", rc_suffix>;
+
+multiclass RegInlineOperandAC <string rc, string MatchName,
+                               string rc_suffix = "_32">
+  : SIRegOperand32<rc, MatchName, "OPERAND_REG_INLINE_AC", rc_suffix>;
+
 //===----------------------------------------------------------------------===//
 //  SSrc_* Operands with an SGPR or a 32-bit immediate
 //===----------------------------------------------------------------------===//
 
 defm SSrc : RegImmOperand<"SReg", "SSrc">;
 
+def SSrcOrLds_b32 : RegisterOperand<SRegOrLds_32> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_IMM_INT32";
+  let ParserMatchClass = RegImmMatcher<"SSrcOrLdsB32">;
+}
+
 //===----------------------------------------------------------------------===//
 //  SCSrc_* Operands with an SGPR or a inline constant
 //===----------------------------------------------------------------------===//
 
 defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ;
 
-def SCSrc_i1 : RegisterOperand<SReg_64_XEXEC>;
-
 //===----------------------------------------------------------------------===//
 //  VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
 //===----------------------------------------------------------------------===//
@@ -653,8 +1040,46 @@ def VRegSrc_32 : RegisterOperand<VGPR_32> {
   let DecoderMethod = "DecodeVS_32RegisterClass";
 }
 
+//===----------------------------------------------------------------------===//
+//  ASrc_* Operands with an AccVGPR
+//===----------------------------------------------------------------------===//
+
+def ARegSrc_32 : RegisterOperand<AGPR_32> {
+  let DecoderMethod = "DecodeAGPR_32RegisterClass";
+  let EncoderMethod = "getAVOperandEncoding";
+}
+
 //===----------------------------------------------------------------------===//
 //  VCSrc_* Operands with an SGPR, VGPR or an inline constant
 //===----------------------------------------------------------------------===//
 
 defm VCSrc : RegInlineOperand<"VS", "VCSrc">;
+
+//===----------------------------------------------------------------------===//
+//  VISrc_* Operands with a VGPR or an inline constant
+//===----------------------------------------------------------------------===//
+
+defm VISrc : RegInlineOperand32<"VGPR", "VISrc">;
+
+//===----------------------------------------------------------------------===//
+//  AVSrc_* Operands with an AGPR or VGPR
+//===----------------------------------------------------------------------===//
+
+def AVSrc_32 : RegisterOperand<AV_32> {
+  let DecoderMethod = "DecodeAV_32RegisterClass";
+  let EncoderMethod = "getAVOperandEncoding";
+}
+
+def AVSrc_64 : RegisterOperand<AV_64> {
+  let DecoderMethod = "DecodeAV_64RegisterClass";
+  let EncoderMethod = "getAVOperandEncoding";
+}
+
+//===----------------------------------------------------------------------===//
+//  ACSrc_* Operands with an AGPR or an inline constant
+//===----------------------------------------------------------------------===//
+
+defm AISrc      : RegInlineOperandAC<"AGPR", "AISrc">;
+defm AISrc_128  : RegInlineOperandAC<"AReg", "AISrc_128",  "_128">;
+defm AISrc_512  : RegInlineOperandAC<"AReg", "AISrc_512",  "_512">;
+defm AISrc_1024 : RegInlineOperandAC<"AReg", "AISrc_1024", "_1024">;
diff --git a/lib/Target/AMDGPU/SISchedule.td b/lib/Target/AMDGPU/SISchedule.td
index 7af69cb6a46d..824d1aeb0df9 100644
--- a/lib/Target/AMDGPU/SISchedule.td
+++ b/lib/Target/AMDGPU/SISchedule.td
@@ -1,9 +1,8 @@
 //===-- SISchedule.td - SI Scheduling definitons -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -25,6 +24,9 @@ def WriteSMEM   : SchedWrite;
 def WriteVMEM   : SchedWrite;
 def WriteBarrier : SchedWrite;
 
+def MIVGPRRead  : SchedRead;
+def MIMFMARead  : SchedRead;
+
 // Vector ALU instructions
 def Write32Bit         : SchedWrite;
 def WriteQuarterRate32 : SchedWrite;
@@ -38,9 +40,17 @@ def WriteDouble : SchedWrite;
 // half rate f64 instruction (same as v_add_f64)
 def WriteDoubleAdd  : SchedWrite;
 
+// Conversion to or from f64 instruction
+def WriteDoubleCvt  : SchedWrite;
+
 // Half rate 64-bit instructions.
 def Write64Bit : SchedWrite;
 
+// mAI multipass instructions.
+def Write2PassMAI  : SchedWrite;
+def Write8PassMAI  : SchedWrite;
+def Write16PassMAI : SchedWrite;
+
 // FIXME: Should there be a class for instructions which are VALU
 // instructions and have VALU rates, but write to the SALU (i.e. VOPC
 // instructions)
@@ -62,6 +72,7 @@ class SISchedMachineModel : SchedMachineModel {
 
 def SIFullSpeedModel : SISchedMachineModel;
 def SIQuarterSpeedModel : SISchedMachineModel;
+def GFX10SpeedModel : SISchedMachineModel;
 
 // XXX: Are the resource counts correct?
 def HWBranch : ProcResource<1> {
@@ -82,6 +93,9 @@ def HWVMEM   : ProcResource<1> {
 def HWVALU   : ProcResource<1> {
   let BufferSize = 1;
 }
+def HWRC   : ProcResource<1> { // Register destination cache
+  let BufferSize = 1;
+}
 
 class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources,
                  int latency> : WriteRes<write, resources> {
@@ -91,6 +105,11 @@ class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources,
 class HWVALUWriteRes<SchedWrite write, int latency> :
   HWWriteRes<write, [HWVALU], latency>;
 
+def PredMIReadVGPR : SchedPredicate<[{TII->hasVGPRUses(*MI)}]>;
+
+def MIReadVGPR : SchedReadVariant<[
+      SchedVar<PredMIReadVGPR, [MIVGPRRead]>,
+      SchedVar<NoSchedPred, [ReadDefault]>]>;
 
 // The latency numbers are taken from AMD Accelerated Parallel Processing
 // guide. They may not be accurate.
@@ -109,6 +128,24 @@ multiclass SICommonWriteRes {
   def : HWVALUWriteRes<Write32Bit,         1>;
   def : HWVALUWriteRes<Write64Bit,         2>;
   def : HWVALUWriteRes<WriteQuarterRate32, 4>;
+  def : HWVALUWriteRes<Write2PassMAI,      2>;
+  def : HWVALUWriteRes<Write8PassMAI,      8>;
+  def : HWVALUWriteRes<Write16PassMAI,    16>;
+
+  def : ReadAdvance<MIVGPRRead, -2>;
+  def : InstRW<[Write64Bit, MIReadVGPR], (instregex "^V_ACCVGPR_WRITE_B32$")>;
+
+  // Technicaly mfma reads can be from 0 to 4 cycles but that does not make
+  // sense to model because its register setup is huge. In particular if we
+  // properly model read advanice as -2 for a vgpr read it will result in a
+  // bad scheduling of acc writes before that mfma. To avoid it we would
+  // need to consume 2 or 4 more vgprs to be initialized before the acc
+  // write sequence. Just assume worst case here.
+  def : ReadAdvance<MIMFMARead, -4>;
+
+  def : InstRW<[Write2PassMAI,  MIMFMARead], (instregex "^V_MFMA_..._4X4X")>;
+  def : InstRW<[Write8PassMAI,  MIMFMARead], (instregex "^V_MFMA_..._16X16X")>;
+  def : InstRW<[Write16PassMAI, MIMFMARead], (instregex "^V_MFMA_..._32X32X")>;
 }
 
 def PredIsVGPR32Copy : SchedPredicate<[{TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) <= 32}]>;
@@ -125,6 +162,7 @@ defm : SICommonWriteRes;
 def : HWVALUWriteRes<WriteFloatFMA,   1>;
 def : HWVALUWriteRes<WriteDouble,     4>;
 def : HWVALUWriteRes<WriteDoubleAdd,  2>;
+def : HWVALUWriteRes<WriteDoubleCvt,  4>;
 
 def : InstRW<[WriteCopy], (instrs COPY)>;
 
@@ -137,7 +175,32 @@ defm : SICommonWriteRes;
 def : HWVALUWriteRes<WriteFloatFMA, 16>;
 def : HWVALUWriteRes<WriteDouble,   16>;
 def : HWVALUWriteRes<WriteDoubleAdd, 8>;
+def : HWVALUWriteRes<WriteDoubleCvt, 4>;
 
 def : InstRW<[WriteCopy], (instrs COPY)>;
 
 }  // End SchedModel = SIQuarterSpeedModel
+
+let SchedModel = GFX10SpeedModel in {
+
+// The latency values are 1 / (operations / cycle).
+// Add 1 stall cycle for VGPR read.
+def : HWWriteRes<Write32Bit,         [HWVALU, HWRC],   5>;
+def : HWWriteRes<Write64Bit,         [HWVALU, HWRC],   9>;
+def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC],   17>;
+def : HWWriteRes<WriteFloatFMA,      [HWVALU, HWRC],   5>;
+def : HWWriteRes<WriteDouble,        [HWVALU, HWRC],   17>;
+def : HWWriteRes<WriteDoubleAdd,     [HWVALU, HWRC],   17>;
+def : HWWriteRes<WriteDoubleCvt,     [HWVALU, HWRC],   17>;
+
+def : HWWriteRes<WriteBranch,        [HWBranch],       32>;
+def : HWWriteRes<WriteExport,        [HWExport, HWRC], 16>;
+def : HWWriteRes<WriteLDS,           [HWLGKM,   HWRC], 20>;
+def : HWWriteRes<WriteSALU,          [HWSALU,   HWRC], 5>;
+def : HWWriteRes<WriteSMEM,          [HWLGKM,   HWRC], 20>;
+def : HWWriteRes<WriteVMEM,          [HWVMEM,   HWRC], 320>;
+def : HWWriteRes<WriteBarrier,       [HWBranch],       2000>;
+
+def : InstRW<[WriteCopy], (instrs COPY)>;
+
+}  // End SchedModel = GFX10SpeedModel
diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 6ad7dd0e3a7c..7ee178149c7a 100644
--- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -1,9 +1,8 @@
 //===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 /// The pass tries to use the 32-bit encoding for instructions when possible.
 //===----------------------------------------------------------------------===//
@@ -39,6 +38,8 @@ class SIShrinkInstructions : public MachineFunctionPass {
 public:
   static char ID;
 
+  void shrinkMIMG(MachineInstr &MI);
+
 public:
   SIShrinkInstructions() : MachineFunctionPass(ID) {
   }
@@ -94,6 +95,10 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
           Src0.setSubReg(0);
           Src0.ChangeToFrameIndex(MovSrc.getIndex());
           ConstantFolded = true;
+        } else if (MovSrc.isGlobal()) {
+          Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),
+                          MovSrc.getTargetFlags());
+          ConstantFolded = true;
         }
 
         if (ConstantFolded) {
@@ -212,6 +217,96 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
   }
 }
 
+// Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
+void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) {
+  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
+  if (Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
+    return;
+
+  MachineFunction *MF = MI.getParent()->getParent();
+  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const SIRegisterInfo &TRI = TII->getRegisterInfo();
+  int VAddr0Idx =
+      AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
+  unsigned NewAddrDwords = Info->VAddrDwords;
+  const TargetRegisterClass *RC;
+
+  if (Info->VAddrDwords == 2) {
+    RC = &AMDGPU::VReg_64RegClass;
+  } else if (Info->VAddrDwords == 3) {
+    RC = &AMDGPU::VReg_96RegClass;
+  } else if (Info->VAddrDwords == 4) {
+    RC = &AMDGPU::VReg_128RegClass;
+  } else if (Info->VAddrDwords <= 8) {
+    RC = &AMDGPU::VReg_256RegClass;
+    NewAddrDwords = 8;
+  } else {
+    RC = &AMDGPU::VReg_512RegClass;
+    NewAddrDwords = 16;
+  }
+
+  unsigned VgprBase = 0;
+  bool IsUndef = true;
+  bool IsKill = NewAddrDwords == Info->VAddrDwords;
+  for (unsigned i = 0; i < Info->VAddrDwords; ++i) {
+    const MachineOperand &Op = MI.getOperand(VAddr0Idx + i);
+    unsigned Vgpr = TRI.getHWRegIndex(Op.getReg());
+
+    if (i == 0) {
+      VgprBase = Vgpr;
+    } else if (VgprBase + i != Vgpr)
+      return;
+
+    if (!Op.isUndef())
+      IsUndef = false;
+    if (!Op.isKill())
+      IsKill = false;
+  }
+
+  if (VgprBase + NewAddrDwords > 256)
+    return;
+
+  // Further check for implicit tied operands - this may be present if TFE is
+  // enabled
+  int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
+  int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe);
+  unsigned TFEVal = MI.getOperand(TFEIdx).getImm();
+  unsigned LWEVal = MI.getOperand(LWEIdx).getImm();
+  int ToUntie = -1;
+  if (TFEVal || LWEVal) {
+    // TFE/LWE is enabled so we need to deal with an implicit tied operand
+    for (unsigned i = LWEIdx + 1, e = MI.getNumOperands(); i != e; ++i) {
+      if (MI.getOperand(i).isReg() && MI.getOperand(i).isTied() &&
+          MI.getOperand(i).isImplicit()) {
+        // This is the tied operand
+        assert(
+            ToUntie == -1 &&
+            "found more than one tied implicit operand when expecting only 1");
+        ToUntie = i;
+        MI.untieRegOperand(ToUntie);
+      }
+    }
+  }
+
+  unsigned NewOpcode =
+      AMDGPU::getMIMGOpcode(Info->BaseOpcode, AMDGPU::MIMGEncGfx10Default,
+                            Info->VDataDwords, NewAddrDwords);
+  MI.setDesc(TII->get(NewOpcode));
+  MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase));
+  MI.getOperand(VAddr0Idx).setIsUndef(IsUndef);
+  MI.getOperand(VAddr0Idx).setIsKill(IsKill);
+
+  for (unsigned i = 1; i < Info->VAddrDwords; ++i)
+    MI.RemoveOperand(VAddr0Idx + 1);
+
+  if (ToUntie >= 0) {
+    MI.tieOperands(
+        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata),
+        ToUntie - (Info->VAddrDwords - 1));
+  }
+}
+
 /// Attempt to shink AND/OR/XOR operations requiring non-inlineable literals.
 /// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
 /// If the inverse of the immediate is legal, use ANDN2, ORN2 or
@@ -277,7 +372,9 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST,
         if (Opc == AMDGPU::S_BITSET0_B32 ||
             Opc == AMDGPU::S_BITSET1_B32) {
           Src0->ChangeToImmediate(NewImm);
-          MI.RemoveOperand(2);
+          // Remove the immediate and add the tied input.
+          MI.getOperand(2).ChangeToRegister(Dest->getReg(), false);
+          MI.tieOperands(0, 2);
         } else {
           SrcImm->setImm(NewImm);
         }
@@ -458,6 +555,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
   MachineRegisterInfo &MRI = MF.getRegInfo();
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
+  unsigned VCCReg = ST.isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
 
   std::vector<unsigned> I1Defs;
 
@@ -596,6 +694,14 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
           continue;
       }
 
+      if (TII->isMIMG(MI.getOpcode()) &&
+          ST.getGeneration() >= AMDGPUSubtarget::GFX10 &&
+          MF.getProperties().hasProperty(
+              MachineFunctionProperties::Property::NoVRegs)) {
+        shrinkMIMG(MI);
+        continue;
+      }
+
       if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
         continue;
 
@@ -625,10 +731,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
           // So, instead of forcing the instruction to write to VCC, we provide
           // a hint to the register allocator to use VCC and then we will run
           // this pass again after RA and shrink it if it outputs to VCC.
-          MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
+          MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, VCCReg);
           continue;
         }
-        if (DstReg != AMDGPU::VCC)
+        if (DstReg != VCCReg)
           continue;
       }
 
@@ -641,10 +747,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
           continue;
         unsigned SReg = Src2->getReg();
         if (TargetRegisterInfo::isVirtualRegister(SReg)) {
-          MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC);
+          MRI.setRegAllocationHint(SReg, 0, VCCReg);
           continue;
         }
-        if (SReg != AMDGPU::VCC)
+        if (SReg != VCCReg)
           continue;
       }
 
@@ -657,20 +763,24 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
                                                         AMDGPU::OpName::src2);
 
       if (SDst) {
-        if (SDst->getReg() != AMDGPU::VCC) {
+        bool Next = false;
+
+        if (SDst->getReg() != VCCReg) {
           if (TargetRegisterInfo::isVirtualRegister(SDst->getReg()))
-            MRI.setRegAllocationHint(SDst->getReg(), 0, AMDGPU::VCC);
-          continue;
+            MRI.setRegAllocationHint(SDst->getReg(), 0, VCCReg);
+          Next = true;
         }
 
         // All of the instructions with carry outs also have an SGPR input in
         // src2.
-        if (Src2 && Src2->getReg() != AMDGPU::VCC) {
+        if (Src2 && Src2->getReg() != VCCReg) {
           if (TargetRegisterInfo::isVirtualRegister(Src2->getReg()))
-            MRI.setRegAllocationHint(Src2->getReg(), 0, AMDGPU::VCC);
+            MRI.setRegAllocationHint(Src2->getReg(), 0, VCCReg);
+          Next = true;
+        }
 
+        if (Next)
           continue;
-        }
       }
 
       // We can shrink this instruction
diff --git a/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 879726b1528c..4e07efff55d8 100644
--- a/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -1,9 +1,8 @@
 //===-- SIWholeQuadMode.cpp - enter and suspend whole quad mode -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -149,6 +148,7 @@ private:
   CallingConv::ID CallingConv;
   const SIInstrInfo *TII;
   const SIRegisterInfo *TRI;
+  const GCNSubtarget *ST;
   MachineRegisterInfo *MRI;
   LiveIntervals *LIS;
 
@@ -201,6 +201,8 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<LiveIntervals>();
+    AU.addPreserved<SlotIndexes>();
+    AU.addPreserved<LiveIntervals>();
     AU.setPreservesCFG();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
@@ -277,7 +279,7 @@ void SIWholeQuadMode::markInstructionUses(const MachineInstr &MI, char Flag,
     // for VCC, which can appear as the (implicit) input of a uniform branch,
     // e.g. when a loop counter is stored in a VGPR.
     if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
-      if (Reg == AMDGPU::EXEC)
+      if (Reg == AMDGPU::EXEC || Reg == AMDGPU::EXEC_LO)
         continue;
 
       for (MCRegUnitIterator RegUnit(Reg, TRI); RegUnit.isValid(); ++RegUnit) {
@@ -386,7 +388,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
             unsigned Reg = MO.getReg();
 
             if (!TRI->isVirtualRegister(Reg) &&
-                TRI->hasVGPRs(TRI->getPhysRegClass(Reg))) {
+                TRI->hasVectorRegisters(TRI->getPhysRegClass(Reg))) {
               Flags = StateWQM;
               break;
             }
@@ -619,13 +621,16 @@ void SIWholeQuadMode::toExact(MachineBasicBlock &MBB,
   MachineInstr *MI;
 
   if (SaveWQM) {
-    MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_SAVEEXEC_B64),
+    MI = BuildMI(MBB, Before, DebugLoc(), TII->get(ST->isWave32() ?
+                   AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64),
                  SaveWQM)
              .addReg(LiveMaskReg);
   } else {
-    MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_B64),
-                 AMDGPU::EXEC)
-             .addReg(AMDGPU::EXEC)
+    unsigned Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+    MI = BuildMI(MBB, Before, DebugLoc(), TII->get(ST->isWave32() ?
+                   AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64),
+                 Exec)
+             .addReg(Exec)
              .addReg(LiveMaskReg);
   }
 
@@ -637,13 +642,15 @@ void SIWholeQuadMode::toWQM(MachineBasicBlock &MBB,
                             unsigned SavedWQM) {
   MachineInstr *MI;
 
+  unsigned Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
   if (SavedWQM) {
-    MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), AMDGPU::EXEC)
+    MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), Exec)
              .addReg(SavedWQM);
   } else {
-    MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
-                 AMDGPU::EXEC)
-             .addReg(AMDGPU::EXEC);
+    MI = BuildMI(MBB, Before, DebugLoc(), TII->get(ST->isWave32() ?
+                   AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64),
+                 Exec)
+             .addReg(Exec);
   }
 
   LIS->InsertMachineInstrInMaps(*MI);
@@ -655,8 +662,7 @@ void SIWholeQuadMode::toWWM(MachineBasicBlock &MBB,
   MachineInstr *MI;
 
   assert(SaveOrig);
-  MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_OR_SAVEEXEC_B64),
-               SaveOrig)
+  MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::ENTER_WWM), SaveOrig)
            .addImm(-1);
   LIS->InsertMachineInstrInMaps(*MI);
 }
@@ -667,7 +673,8 @@ void SIWholeQuadMode::fromWWM(MachineBasicBlock &MBB,
   MachineInstr *MI;
 
   assert(SavedOrig);
-  MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::EXIT_WWM), AMDGPU::EXEC)
+  MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::EXIT_WWM),
+               ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC)
            .addReg(SavedOrig);
   LIS->InsertMachineInstrInMaps(*MI);
 }
@@ -693,6 +700,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
   bool WQMFromExec = isEntry;
   char State = (isEntry || !(BI.InNeeds & StateWQM)) ? StateExact : StateWQM;
   char NonWWMState = 0;
+  const TargetRegisterClass *BoolRC = TRI->getBoolRC();
 
   auto II = MBB.getFirstNonPHI(), IE = MBB.end();
   if (isEntry)
@@ -780,13 +788,13 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
 
       if (Needs == StateWWM) {
         NonWWMState = State;
-        SavedNonWWMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+        SavedNonWWMReg = MRI->createVirtualRegister(BoolRC);
         toWWM(MBB, Before, SavedNonWWMReg);
         State = StateWWM;
       } else {
         if (State == StateWQM && (Needs & StateExact) && !(Needs & StateWQM)) {
           if (!WQMFromExec && (OutNeeds & StateWQM))
-            SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+            SavedWQMReg = MRI->createVirtualRegister(BoolRC);
 
           toExact(MBB, Before, SavedWQMReg, LiveMaskReg);
           State = StateExact;
@@ -838,7 +846,23 @@ void SIWholeQuadMode::lowerCopyInstrs() {
   for (MachineInstr *MI : LowerToCopyInstrs) {
     for (unsigned i = MI->getNumExplicitOperands() - 1; i > 1; i--)
       MI->RemoveOperand(i);
-    MI->setDesc(TII->get(AMDGPU::COPY));
+
+    const unsigned Reg = MI->getOperand(0).getReg();
+
+    if (TRI->isVGPR(*MRI, Reg)) {
+      const TargetRegisterClass *regClass =
+          TargetRegisterInfo::isVirtualRegister(Reg)
+              ? MRI->getRegClass(Reg)
+              : TRI->getPhysRegClass(Reg);
+
+      const unsigned MovOp = TII->getMovOpcode(regClass);
+      MI->setDesc(TII->get(MovOp));
+
+      // And make it implicitly depend on exec (like all VALU movs should do).
+      MI->addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
+    } else {
+      MI->setDesc(TII->get(AMDGPU::COPY));
+    }
   }
 }
 
@@ -849,17 +873,18 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
   LowerToCopyInstrs.clear();
   CallingConv = MF.getFunction().getCallingConv();
 
-  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  ST = &MF.getSubtarget<GCNSubtarget>();
 
-  TII = ST.getInstrInfo();
+  TII = ST->getInstrInfo();
   TRI = &TII->getRegisterInfo();
   MRI = &MF.getRegInfo();
   LIS = &getAnalysis<LiveIntervals>();
 
   char GlobalFlags = analyzeFunction(MF);
   unsigned LiveMaskReg = 0;
+  unsigned Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
   if (!(GlobalFlags & StateWQM)) {
-    lowerLiveMaskQueries(AMDGPU::EXEC);
+    lowerLiveMaskQueries(Exec);
     if (!(GlobalFlags & StateWWM))
       return !LiveMaskQueries.empty();
   } else {
@@ -868,10 +893,10 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
     MachineBasicBlock::iterator EntryMI = Entry.getFirstNonPHI();
 
     if (GlobalFlags & StateExact || !LiveMaskQueries.empty()) {
-      LiveMaskReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+      LiveMaskReg = MRI->createVirtualRegister(TRI->getBoolRC());
       MachineInstr *MI = BuildMI(Entry, EntryMI, DebugLoc(),
                                  TII->get(AMDGPU::COPY), LiveMaskReg)
-                             .addReg(AMDGPU::EXEC);
+                             .addReg(Exec);
       LIS->InsertMachineInstrInMaps(*MI);
     }
 
@@ -879,9 +904,10 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
 
     if (GlobalFlags == StateWQM) {
       // For a shader that needs only WQM, we can just set it once.
-      BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
-              AMDGPU::EXEC)
-          .addReg(AMDGPU::EXEC);
+      BuildMI(Entry, EntryMI, DebugLoc(), TII->get(ST->isWave32() ?
+                AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64),
+              Exec)
+          .addReg(Exec);
 
       lowerCopyInstrs();
       // EntryMI may become invalid here
diff --git a/lib/Target/AMDGPU/SMInstructions.td b/lib/Target/AMDGPU/SMInstructions.td
index 8a063e1a4867..1b410b6b5912 100644
--- a/lib/Target/AMDGPU/SMInstructions.td
+++ b/lib/Target/AMDGPU/SMInstructions.td
@@ -1,9 +1,8 @@
 //===---- SMInstructions.td - Scalar Memory Instruction Defintions --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -34,7 +33,6 @@ class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt
   let hasSideEffects = 0;
   let UseNamedOperandTable = 1;
   let SchedRW = [WriteSMEM];
-  let SubtargetPredicate = isGCN;
 
   string Mnemonic = opName;
   string AsmOperands = asmOps;
@@ -42,6 +40,7 @@ class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt
   bits<1> has_sbase = 1;
   bits<1> has_sdst = 1;
   bit has_glc = 0;
+  bit has_dlc = 0;
   bits<1> has_offset = 1;
   bits<1> offset_is_imm = 0;
 }
@@ -81,6 +80,7 @@ class SM_Load_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag>
   let mayLoad = 1;
   let mayStore = 0;
   let has_glc = 1;
+  let has_dlc = 1;
 }
 
 class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern = []>
@@ -90,6 +90,7 @@ class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern
   let mayLoad = 0;
   let mayStore = 1;
   let has_glc = 1;
+  let has_dlc = 1;
   let ScalarStore = 1;
 }
 
@@ -110,21 +111,23 @@ multiclass SM_Pseudo_Loads<string opName,
                            RegisterClass dstClass> {
   def _IMM  : SM_Load_Pseudo <opName,
                               (outs dstClass:$sdst),
-                              (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc),
-                              " $sdst, $sbase, $offset$glc", []> {
+                              (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc),
+                              " $sdst, $sbase, $offset$glc$dlc", []> {
     let offset_is_imm = 1;
     let BaseClass = baseClass;
     let PseudoInstr = opName # "_IMM";
     let has_glc = 1;
+    let has_dlc = 1;
   }
 
   def _SGPR  : SM_Load_Pseudo <opName,
                               (outs dstClass:$sdst),
-                              (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc),
-                              " $sdst, $sbase, $offset$glc", []> {
+                              (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc),
+                              " $sdst, $sbase, $offset$glc$dlc", []> {
     let BaseClass = baseClass;
     let PseudoInstr = opName # "_SGPR";
     let has_glc = 1;
+    let has_dlc = 1;
   }
 }
 
@@ -132,8 +135,8 @@ multiclass SM_Pseudo_Stores<string opName,
                            RegisterClass baseClass,
                            RegisterClass srcClass> {
   def _IMM  : SM_Store_Pseudo <opName,
-    (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc),
-    " $sdata, $sbase, $offset$glc", []> {
+    (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc),
+    " $sdata, $sbase, $offset$glc$dlc", []> {
     let offset_is_imm = 1;
     let BaseClass = baseClass;
     let SrcClass = srcClass;
@@ -141,8 +144,8 @@ multiclass SM_Pseudo_Stores<string opName,
   }
 
   def _SGPR  : SM_Store_Pseudo <opName,
-    (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc),
-    " $sdata, $sbase, $offset$glc", []> {
+    (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc),
+    " $sdata, $sbase, $offset$glc$dlc", []> {
     let BaseClass = baseClass;
     let SrcClass = srcClass;
     let PseudoInstr = opName # "_SGPR";
@@ -154,17 +157,25 @@ multiclass SM_Pseudo_Discards<string opName> {
   def _SGPR : SM_Discard_Pseudo <opName, (ins SReg_64:$sbase, SReg_32:$offset), 0>;
 }
 
-class SM_Time_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
+class SM_Time_Pseudo<string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
   opName, (outs SReg_64_XEXEC:$sdst), (ins),
   " $sdst", [(set i64:$sdst, (node))]> {
   let hasSideEffects = 1;
-  let mayStore = 0;
+
+  // FIXME: This should be definitively mayStore = 0. TableGen
+  // brokenly tries to infer these based on the intrinsic properties
+  // corresponding to the IR attributes. The target intrinsics are
+  // considered as writing to memory for IR dependency purposes, but
+  // those can be modeled with hasSideEffects here. These also end up
+  // inferring differently for llvm.readcyclecounter and the amdgcn
+  // intrinsics.
+  let mayStore = ?;
   let mayLoad = 1;
   let has_sbase = 0;
   let has_offset = 0;
 }
 
-class SM_Inval_Pseudo <string opName, SDPatternOperator node> : SM_Pseudo<
+class SM_Inval_Pseudo <string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
   opName, (outs), (ins), "", [(node)]> {
   let hasSideEffects = 1;
   let mayStore = 1;
@@ -178,6 +189,16 @@ multiclass SM_Pseudo_Probe<string opName, RegisterClass baseClass> {
   def _SGPR : SM_Probe_Pseudo <opName, (ins i8imm:$sdata, baseClass:$sbase, SReg_32:$offset), 0>;
 }
 
+class SM_WaveId_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
+  opName, (outs SReg_32_XM0_XEXEC:$sdst), (ins),
+  " $sdst", [(set i32:$sdst, (node))]> {
+  let hasSideEffects = 1;
+  let mayStore = 0;
+  let mayLoad = 1;
+  let has_sbase = 0;
+  let has_offset = 0;
+}
+
 //===----------------------------------------------------------------------===//
 // Scalar Atomic Memory Classes
 //===----------------------------------------------------------------------===//
@@ -191,6 +212,7 @@ class SM_Atomic_Pseudo <string opName,
   let mayLoad = 1;
   let mayStore = 1;
   let has_glc = 1;
+  let has_dlc = 1;
 
   // Should these be set?
   let ScalarStore = 1;
@@ -206,9 +228,9 @@ class SM_Pseudo_Atomic<string opName,
   SM_Atomic_Pseudo<opName,
                    !if(isRet, (outs dataClass:$sdst), (outs)),
                    !if(isImm,
-                       (ins dataClass:$sdata, baseClass:$sbase, smrd_offset_20:$offset),
-                       (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset)),
-                   !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset" # !if(isRet, " glc", ""),
+                       (ins dataClass:$sdata, baseClass:$sbase, smrd_offset_20:$offset, DLC:$dlc),
+                       (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset, DLC:$dlc)),
+                   !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset" # !if(isRet, " glc", "") # "$dlc",
                    isRet> {
   let offset_is_imm = isImm;
   let PseudoInstr = opName # !if(isImm,
@@ -266,6 +288,7 @@ defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <
   "s_buffer_load_dwordx16", SReg_128, SReg_512
 >;
 
+let SubtargetPredicate = HasScalarStores in {
 defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0_XEXEC>;
 defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64_XEXEC>;
 defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>;
@@ -281,25 +304,32 @@ defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores <
 defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <
   "s_buffer_store_dwordx4", SReg_128, SReg_128
 >;
-
+} // End SubtargetPredicate = HasScalarStores
 
 def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>;
 def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>;
 
-let SubtargetPredicate = isCIVI in {
+let SubtargetPredicate = isGFX7GFX8GFX9 in {
 def S_DCACHE_INV_VOL : SM_Inval_Pseudo <"s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>;
-} // let SubtargetPredicate = isCIVI
+} // let SubtargetPredicate = isGFX7GFX8GFX9
 
-let SubtargetPredicate = isVI in {
+let SubtargetPredicate = isGFX8Plus in {
+let OtherPredicates = [HasScalarStores] in {
 def S_DCACHE_WB     : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>;
 def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
+} // End OtherPredicates = [HasScalarStores]
 def S_MEMREALTIME   : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>;
 
 defm S_ATC_PROBE        : SM_Pseudo_Probe <"s_atc_probe", SReg_64>;
 defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <"s_atc_probe_buffer", SReg_128>;
-} // SubtargetPredicate = isVI
+} // SubtargetPredicate = isGFX8Plus
+
+let SubtargetPredicate = isGFX10Plus in {
+def S_GL1_INV : SM_Inval_Pseudo<"s_gl1_inv">;
+def S_GET_WAVEID_IN_WORKGROUP : SM_WaveId_Pseudo <"s_get_waveid_in_workgroup", int_amdgcn_s_get_waveid_in_workgroup>;
+} // End SubtargetPredicate = isGFX10Plus
 
-let SubtargetPredicate = HasFlatScratchInsts, Uses = [FLAT_SCR] in {
+let SubtargetPredicate = HasScalarFlatScratchInsts, Uses = [FLAT_SCR] in {
 defm S_SCRATCH_LOAD_DWORD    : SM_Pseudo_Loads <"s_scratch_load_dword",   SReg_64, SReg_32_XM0_XEXEC>;
 defm S_SCRATCH_LOAD_DWORDX2  : SM_Pseudo_Loads <"s_scratch_load_dwordx2", SReg_64, SReg_64_XEXEC>;
 defm S_SCRATCH_LOAD_DWORDX4  : SM_Pseudo_Loads <"s_scratch_load_dwordx4", SReg_64, SReg_128>;
@@ -307,7 +337,7 @@ defm S_SCRATCH_LOAD_DWORDX4  : SM_Pseudo_Loads <"s_scratch_load_dwordx4", SReg_6
 defm S_SCRATCH_STORE_DWORD   : SM_Pseudo_Stores <"s_scratch_store_dword",   SReg_64, SReg_32_XM0_XEXEC>;
 defm S_SCRATCH_STORE_DWORDX2 : SM_Pseudo_Stores <"s_scratch_store_dwordx2", SReg_64, SReg_64_XEXEC>;
 defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <"s_scratch_store_dwordx4", SReg_64, SReg_128>;
-} // SubtargetPredicate = HasFlatScratchInsts
+} // SubtargetPredicate = HasScalarFlatScratchInsts
 
 let SubtargetPredicate = HasScalarAtomics in {
 
@@ -369,7 +399,7 @@ defm S_ATOMIC_DEC_X2              : SM_Pseudo_Atomics <"s_atomic_dec_x2", SReg_6
 
 } // let SubtargetPredicate = HasScalarAtomics
 
-let SubtargetPredicate = isGFX9 in {
+let SubtargetPredicate = HasScalarAtomics in {
 defm S_DCACHE_DISCARD    : SM_Pseudo_Discards <"s_dcache_discard">;
 defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards <"s_dcache_discard_x2">;
 }
@@ -387,8 +417,8 @@ class SMRD_Real_si <bits<5> op, SM_Pseudo ps>
   , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
   , Enc32 {
 
-  let AssemblerPredicates = [isSICI];
-  let DecoderNamespace = "SICI";
+  let AssemblerPredicates = [isGFX6GFX7];
+  let DecoderNamespace = "GFX6GFX7";
 
   let Inst{7-0}   = !if(ps.has_offset, offset{7-0}, ?);
   let Inst{8}     = imm;
@@ -405,13 +435,13 @@ multiclass SM_Real_Loads_si<bits<5> op, string ps,
                             SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
 
   def _IMM_si : SMRD_Real_si <op, immPs> {
-    let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc);
+    let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc, DLC:$dlc);
   }
 
   // FIXME: The operand name $offset is inconsistent with $soff used
   // in the pseudo
   def _SGPR_si : SMRD_Real_si <op, sgprPs> {
-    let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
+    let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
   }
 
 }
@@ -441,8 +471,8 @@ class SMEM_Real_vi <bits<8> op, SM_Pseudo ps>
   , Enc64 {
   bit glc;
 
-  let AssemblerPredicates = [isVI];
-  let DecoderNamespace = "VI";
+  let AssemblerPredicates = [isGFX8GFX9];
+  let DecoderNamespace = "GFX8";
 
   let Inst{5-0}   = !if(ps.has_sbase, sbase{6-1}, ?);
   let Inst{12-6}  = !if(ps.has_sdst, sdst{6-0}, ?);
@@ -458,10 +488,10 @@ multiclass SM_Real_Loads_vi<bits<8> op, string ps,
                             SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
                             SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
   def _IMM_vi : SMEM_Real_vi <op, immPs> {
-    let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc);
+    let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
   }
   def _SGPR_vi : SMEM_Real_vi <op, sgprPs> {
-    let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
+    let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
   }
 }
 
@@ -479,11 +509,11 @@ multiclass SM_Real_Stores_vi<bits<8> op, string ps,
   // FIXME: The operand name $offset is inconsistent with $soff used
   // in the pseudo
   def _IMM_vi : SMEM_Real_Store_vi <op, immPs> {
-    let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc);
+    let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
   }
 
   def _SGPR_vi : SMEM_Real_Store_vi <op, sgprPs> {
-    let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
+    let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
   }
 }
 
@@ -630,9 +660,9 @@ class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> :
   SM_Real<ps>,
   Enc64 {
 
-  let AssemblerPredicates = [isCIOnly];
-  let DecoderNamespace = "CI";
-  let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc);
+  let AssemblerPredicates = [isGFX7Only];
+  let DecoderNamespace = "GFX7";
+  let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc, DLC:$dlc);
 
   let LGKM_CNT = ps.LGKM_CNT;
   let SMRD = ps.SMRD;
@@ -667,8 +697,8 @@ class SMRD_Real_ci <bits<5> op, SM_Pseudo ps>
   , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
   , Enc32 {
 
-  let AssemblerPredicates = [isCIOnly];
-  let DecoderNamespace = "CI";
+  let AssemblerPredicates = [isGFX7Only];
+  let DecoderNamespace = "GFX7";
 
   let Inst{7-0}   = !if(ps.has_offset, offset{7-0}, ?);
   let Inst{8}     = imm;
@@ -684,7 +714,22 @@ def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>;
 // Scalar Memory Patterns
 //===----------------------------------------------------------------------===//
 
-def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformLoad(N);}]>;
+def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformLoad(N);}]> {
+  let GISelPredicateCode = [{
+    if (!MI.hasOneMemOperand())
+      return false;
+    if (!isInstrUniform(MI))
+      return false;
+
+    // FIXME: We should probably be caching this.
+    SmallVector<GEPInfo, 4> AddrInfo;
+    getAddrModeInfo(MI, MRI, AddrInfo);
+
+    if (hasVgprParts(AddrInfo))
+      return false;
+    return true;
+  }];
+}
 
 def SMRDImm         : ComplexPattern<i64, 2, "SelectSMRDImm">;
 def SMRDImm32       : ComplexPattern<i64, 2, "SelectSMRDImm32">;
@@ -697,41 +742,49 @@ multiclass SMRD_Pattern <string Instr, ValueType vt> {
   // 1. IMM offset
   def : GCNPat <
     (smrd_load (SMRDImm i64:$sbase, i32:$offset)),
-    (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
+    (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0, 0))
   >;
 
   // 2. 32-bit IMM offset on CI
   def : GCNPat <
     (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
-    (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
-    let OtherPredicates = [isCIOnly];
+    (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0, 0))> {
+    let OtherPredicates = [isGFX7Only];
   }
 
   // 3. SGPR offset
   def : GCNPat <
     (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
-    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0))
+    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0, 0))
+  >;
+
+  // 4. No offset
+  def : GCNPat <
+    (vt (smrd_load (i64 SReg_64:$sbase))),
+    (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0, 0))
   >;
 }
 
 multiclass SMLoad_Pattern <string Instr, ValueType vt> {
   // 1. Offset as an immediate
   def : GCNPat <
-    (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc),
-    (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc)))
+    (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc, i1:$dlc),
+    (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc),
+                                        (as_i1imm $dlc)))
   >;
 
   // 2. 32-bit IMM offset on CI
   def : GCNPat <
-    (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc)),
-    (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc))> {
-    let OtherPredicates = [isCIOnly];
+    (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc, i1:$dlc)),
+    (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc), (as_i1imm $dlc))> {
+    let OtherPredicates = [isGFX7Only];
   }
 
   // 3. Offset loaded in an 32bit SGPR
   def : GCNPat <
-    (SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc),
-    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc)))
+    (SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc, i1:$dlc),
+    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc),
+                                         (as_i1imm $dlc)))
   >;
 }
 
@@ -759,18 +812,202 @@ defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8",   v8f32>;
 defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16",  v16f32>;
 } // End let AddedComplexity = 100
 
-let OtherPredicates = [isSICI] in {
 def : GCNPat <
   (i64 (readcyclecounter)),
   (S_MEMTIME)
 >;
+
+//===----------------------------------------------------------------------===//
+// GFX10.
+//===----------------------------------------------------------------------===//
+
+class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps> :
+    SM_Real<ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10>, Enc64 {
+  bit glc;
+  bit dlc;
+
+  let AssemblerPredicates = [isGFX10Plus];
+  let DecoderNamespace = "GFX10";
+
+  let Inst{5-0}   = !if(ps.has_sbase, sbase{6-1}, ?);
+  let Inst{12-6}  = !if(ps.has_sdst, sdst{6-0}, ?);
+  let Inst{14}    = !if(ps.has_dlc, dlc, ?);
+  let Inst{16}    = !if(ps.has_glc, glc, ?);
+  let Inst{25-18} = op;
+  let Inst{31-26} = 0x3d;
+  let Inst{51-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{19-0}, ?), ?);
+  let Inst{63-57} = !if(ps.offset_is_imm, !cast<int>(SGPR_NULL.HWEncoding),
+                                          !if(ps.has_offset, offset{6-0}, ?));
 }
 
-let OtherPredicates = [isVI] in {
+multiclass SM_Real_Loads_gfx10<bits<8> op, string ps,
+                               SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
+                               SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
+  def _IMM_gfx10 : SMEM_Real_gfx10<op, immPs> {
+    let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
+  }
+  def _SGPR_gfx10 : SMEM_Real_gfx10<op, sgprPs> {
+    let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
+  }
+}
 
-def : GCNPat <
-  (i64 (readcyclecounter)),
-  (S_MEMREALTIME)
->;
+class SMEM_Real_Store_gfx10<bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx10<op, ps> {
+  bits<7> sdata;
+
+  let sdst = ?;
+  let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?);
+}
+
+multiclass SM_Real_Stores_gfx10<bits<8> op, string ps,
+                                SM_Store_Pseudo immPs = !cast<SM_Store_Pseudo>(ps#_IMM),
+                                SM_Store_Pseudo sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR)> {
+  // FIXME: The operand name $offset is inconsistent with $soff used
+  // in the pseudo
+  def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs> {
+    let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
+  }
+
+  def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> {
+    let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
+  }
+}
+
+defm S_LOAD_DWORD            : SM_Real_Loads_gfx10<0x000, "S_LOAD_DWORD">;
+defm S_LOAD_DWORDX2          : SM_Real_Loads_gfx10<0x001, "S_LOAD_DWORDX2">;
+defm S_LOAD_DWORDX4          : SM_Real_Loads_gfx10<0x002, "S_LOAD_DWORDX4">;
+defm S_LOAD_DWORDX8          : SM_Real_Loads_gfx10<0x003, "S_LOAD_DWORDX8">;
+defm S_LOAD_DWORDX16         : SM_Real_Loads_gfx10<0x004, "S_LOAD_DWORDX16">;
+
+let SubtargetPredicate = HasScalarFlatScratchInsts in {
+defm S_SCRATCH_LOAD_DWORD    : SM_Real_Loads_gfx10<0x005, "S_SCRATCH_LOAD_DWORD">;
+defm S_SCRATCH_LOAD_DWORDX2  : SM_Real_Loads_gfx10<0x006, "S_SCRATCH_LOAD_DWORDX2">;
+defm S_SCRATCH_LOAD_DWORDX4  : SM_Real_Loads_gfx10<0x007, "S_SCRATCH_LOAD_DWORDX4">;
+} // End SubtargetPredicate = HasScalarFlatScratchInsts
+
+defm S_BUFFER_LOAD_DWORD     : SM_Real_Loads_gfx10<0x008, "S_BUFFER_LOAD_DWORD">;
+defm S_BUFFER_LOAD_DWORDX2   : SM_Real_Loads_gfx10<0x009, "S_BUFFER_LOAD_DWORDX2">;
+defm S_BUFFER_LOAD_DWORDX4   : SM_Real_Loads_gfx10<0x00a, "S_BUFFER_LOAD_DWORDX4">;
+defm S_BUFFER_LOAD_DWORDX8   : SM_Real_Loads_gfx10<0x00b, "S_BUFFER_LOAD_DWORDX8">;
+defm S_BUFFER_LOAD_DWORDX16  : SM_Real_Loads_gfx10<0x00c, "S_BUFFER_LOAD_DWORDX16">;
+
+let SubtargetPredicate = HasScalarStores in {
+defm S_STORE_DWORD           : SM_Real_Stores_gfx10<0x010, "S_STORE_DWORD">;
+defm S_STORE_DWORDX2         : SM_Real_Stores_gfx10<0x011, "S_STORE_DWORDX2">;
+defm S_STORE_DWORDX4         : SM_Real_Stores_gfx10<0x012, "S_STORE_DWORDX4">;
+let OtherPredicates = [HasScalarFlatScratchInsts] in {
+defm S_SCRATCH_STORE_DWORD   : SM_Real_Stores_gfx10<0x015, "S_SCRATCH_STORE_DWORD">;
+defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x016, "S_SCRATCH_STORE_DWORDX2">;
+defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x017, "S_SCRATCH_STORE_DWORDX4">;
+} // End OtherPredicates = [HasScalarFlatScratchInsts]
+defm S_BUFFER_STORE_DWORD    : SM_Real_Stores_gfx10<0x018, "S_BUFFER_STORE_DWORD">;
+defm S_BUFFER_STORE_DWORDX2  : SM_Real_Stores_gfx10<0x019, "S_BUFFER_STORE_DWORDX2">;
+defm S_BUFFER_STORE_DWORDX4  : SM_Real_Stores_gfx10<0x01a, "S_BUFFER_STORE_DWORDX4">;
+} // End SubtargetPredicate = HasScalarStores
+
+def S_MEMREALTIME_gfx10              : SMEM_Real_gfx10<0x025, S_MEMREALTIME>;
+def S_MEMTIME_gfx10                  : SMEM_Real_gfx10<0x024, S_MEMTIME>;
+def S_GL1_INV_gfx10                  : SMEM_Real_gfx10<0x01f, S_GL1_INV>;
+def S_GET_WAVEID_IN_WORKGROUP_gfx10  : SMEM_Real_gfx10<0x02a, S_GET_WAVEID_IN_WORKGROUP>;
+def S_DCACHE_INV_gfx10               : SMEM_Real_gfx10<0x020, S_DCACHE_INV>;
+
+let SubtargetPredicate = HasScalarStores in {
+def S_DCACHE_WB_gfx10                : SMEM_Real_gfx10<0x021, S_DCACHE_WB>;
+} // End SubtargetPredicate = HasScalarStores
+
+multiclass SM_Real_Probe_gfx10<bits<8> op, string ps> {
+  def _IMM_gfx10  : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
+  def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
+}
+
+defm S_ATC_PROBE        : SM_Real_Probe_gfx10 <0x26, "S_ATC_PROBE">;
+defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx10 <0x27, "S_ATC_PROBE_BUFFER">;
+
+class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps>
+  : SMEM_Real_gfx10 <op, ps> {
+
+  bits<7> sdata;
+  bit dlc;
+
+  let Constraints = ps.Constraints;
+  let DisableEncoding = ps.DisableEncoding;
+
+  let glc = ps.glc;
+
+  let Inst{14} = !if(ps.has_dlc, dlc, 0);
+  let Inst{12-6} = !if(glc, sdst{6-0}, sdata{6-0});
+}
+
+multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> {
+  def _IMM_gfx10       : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
+  def _SGPR_gfx10      : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
+  def _IMM_RTN_gfx10   : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
+  def _SGPR_RTN_gfx10  : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
+}
+
+let SubtargetPredicate = HasScalarAtomics in {
 
-} // let OtherPredicates = [isVI]
+defm S_BUFFER_ATOMIC_SWAP         : SM_Real_Atomics_gfx10 <0x40, "S_BUFFER_ATOMIC_SWAP">;
+defm S_BUFFER_ATOMIC_CMPSWAP      : SM_Real_Atomics_gfx10 <0x41, "S_BUFFER_ATOMIC_CMPSWAP">;
+defm S_BUFFER_ATOMIC_ADD          : SM_Real_Atomics_gfx10 <0x42, "S_BUFFER_ATOMIC_ADD">;
+defm S_BUFFER_ATOMIC_SUB          : SM_Real_Atomics_gfx10 <0x43, "S_BUFFER_ATOMIC_SUB">;
+defm S_BUFFER_ATOMIC_SMIN         : SM_Real_Atomics_gfx10 <0x44, "S_BUFFER_ATOMIC_SMIN">;
+defm S_BUFFER_ATOMIC_UMIN         : SM_Real_Atomics_gfx10 <0x45, "S_BUFFER_ATOMIC_UMIN">;
+defm S_BUFFER_ATOMIC_SMAX         : SM_Real_Atomics_gfx10 <0x46, "S_BUFFER_ATOMIC_SMAX">;
+defm S_BUFFER_ATOMIC_UMAX         : SM_Real_Atomics_gfx10 <0x47, "S_BUFFER_ATOMIC_UMAX">;
+defm S_BUFFER_ATOMIC_AND          : SM_Real_Atomics_gfx10 <0x48, "S_BUFFER_ATOMIC_AND">;
+defm S_BUFFER_ATOMIC_OR           : SM_Real_Atomics_gfx10 <0x49, "S_BUFFER_ATOMIC_OR">;
+defm S_BUFFER_ATOMIC_XOR          : SM_Real_Atomics_gfx10 <0x4a, "S_BUFFER_ATOMIC_XOR">;
+defm S_BUFFER_ATOMIC_INC          : SM_Real_Atomics_gfx10 <0x4b, "S_BUFFER_ATOMIC_INC">;
+defm S_BUFFER_ATOMIC_DEC          : SM_Real_Atomics_gfx10 <0x4c, "S_BUFFER_ATOMIC_DEC">;
+
+defm S_BUFFER_ATOMIC_SWAP_X2      : SM_Real_Atomics_gfx10 <0x60, "S_BUFFER_ATOMIC_SWAP_X2">;
+defm S_BUFFER_ATOMIC_CMPSWAP_X2   : SM_Real_Atomics_gfx10 <0x61, "S_BUFFER_ATOMIC_CMPSWAP_X2">;
+defm S_BUFFER_ATOMIC_ADD_X2       : SM_Real_Atomics_gfx10 <0x62, "S_BUFFER_ATOMIC_ADD_X2">;
+defm S_BUFFER_ATOMIC_SUB_X2       : SM_Real_Atomics_gfx10 <0x63, "S_BUFFER_ATOMIC_SUB_X2">;
+defm S_BUFFER_ATOMIC_SMIN_X2      : SM_Real_Atomics_gfx10 <0x64, "S_BUFFER_ATOMIC_SMIN_X2">;
+defm S_BUFFER_ATOMIC_UMIN_X2      : SM_Real_Atomics_gfx10 <0x65, "S_BUFFER_ATOMIC_UMIN_X2">;
+defm S_BUFFER_ATOMIC_SMAX_X2      : SM_Real_Atomics_gfx10 <0x66, "S_BUFFER_ATOMIC_SMAX_X2">;
+defm S_BUFFER_ATOMIC_UMAX_X2      : SM_Real_Atomics_gfx10 <0x67, "S_BUFFER_ATOMIC_UMAX_X2">;
+defm S_BUFFER_ATOMIC_AND_X2       : SM_Real_Atomics_gfx10 <0x68, "S_BUFFER_ATOMIC_AND_X2">;
+defm S_BUFFER_ATOMIC_OR_X2        : SM_Real_Atomics_gfx10 <0x69, "S_BUFFER_ATOMIC_OR_X2">;
+defm S_BUFFER_ATOMIC_XOR_X2       : SM_Real_Atomics_gfx10 <0x6a, "S_BUFFER_ATOMIC_XOR_X2">;
+defm S_BUFFER_ATOMIC_INC_X2       : SM_Real_Atomics_gfx10 <0x6b, "S_BUFFER_ATOMIC_INC_X2">;
+defm S_BUFFER_ATOMIC_DEC_X2       : SM_Real_Atomics_gfx10 <0x6c, "S_BUFFER_ATOMIC_DEC_X2">;
+
+defm S_ATOMIC_SWAP                : SM_Real_Atomics_gfx10 <0x80, "S_ATOMIC_SWAP">;
+defm S_ATOMIC_CMPSWAP             : SM_Real_Atomics_gfx10 <0x81, "S_ATOMIC_CMPSWAP">;
+defm S_ATOMIC_ADD                 : SM_Real_Atomics_gfx10 <0x82, "S_ATOMIC_ADD">;
+defm S_ATOMIC_SUB                 : SM_Real_Atomics_gfx10 <0x83, "S_ATOMIC_SUB">;
+defm S_ATOMIC_SMIN                : SM_Real_Atomics_gfx10 <0x84, "S_ATOMIC_SMIN">;
+defm S_ATOMIC_UMIN                : SM_Real_Atomics_gfx10 <0x85, "S_ATOMIC_UMIN">;
+defm S_ATOMIC_SMAX                : SM_Real_Atomics_gfx10 <0x86, "S_ATOMIC_SMAX">;
+defm S_ATOMIC_UMAX                : SM_Real_Atomics_gfx10 <0x87, "S_ATOMIC_UMAX">;
+defm S_ATOMIC_AND                 : SM_Real_Atomics_gfx10 <0x88, "S_ATOMIC_AND">;
+defm S_ATOMIC_OR                  : SM_Real_Atomics_gfx10 <0x89, "S_ATOMIC_OR">;
+defm S_ATOMIC_XOR                 : SM_Real_Atomics_gfx10 <0x8a, "S_ATOMIC_XOR">;
+defm S_ATOMIC_INC                 : SM_Real_Atomics_gfx10 <0x8b, "S_ATOMIC_INC">;
+defm S_ATOMIC_DEC                 : SM_Real_Atomics_gfx10 <0x8c, "S_ATOMIC_DEC">;
+
+defm S_ATOMIC_SWAP_X2             : SM_Real_Atomics_gfx10 <0xa0, "S_ATOMIC_SWAP_X2">;
+defm S_ATOMIC_CMPSWAP_X2          : SM_Real_Atomics_gfx10 <0xa1, "S_ATOMIC_CMPSWAP_X2">;
+defm S_ATOMIC_ADD_X2              : SM_Real_Atomics_gfx10 <0xa2, "S_ATOMIC_ADD_X2">;
+defm S_ATOMIC_SUB_X2              : SM_Real_Atomics_gfx10 <0xa3, "S_ATOMIC_SUB_X2">;
+defm S_ATOMIC_SMIN_X2             : SM_Real_Atomics_gfx10 <0xa4, "S_ATOMIC_SMIN_X2">;
+defm S_ATOMIC_UMIN_X2             : SM_Real_Atomics_gfx10 <0xa5, "S_ATOMIC_UMIN_X2">;
+defm S_ATOMIC_SMAX_X2             : SM_Real_Atomics_gfx10 <0xa6, "S_ATOMIC_SMAX_X2">;
+defm S_ATOMIC_UMAX_X2             : SM_Real_Atomics_gfx10 <0xa7, "S_ATOMIC_UMAX_X2">;
+defm S_ATOMIC_AND_X2              : SM_Real_Atomics_gfx10 <0xa8, "S_ATOMIC_AND_X2">;
+defm S_ATOMIC_OR_X2               : SM_Real_Atomics_gfx10 <0xa9, "S_ATOMIC_OR_X2">;
+defm S_ATOMIC_XOR_X2              : SM_Real_Atomics_gfx10 <0xaa, "S_ATOMIC_XOR_X2">;
+defm S_ATOMIC_INC_X2              : SM_Real_Atomics_gfx10 <0xab, "S_ATOMIC_INC_X2">;
+defm S_ATOMIC_DEC_X2              : SM_Real_Atomics_gfx10 <0xac, "S_ATOMIC_DEC_X2">;
+
+multiclass SM_Real_Discard_gfx10<bits<8> op, string ps> {
+  def _IMM_gfx10  : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
+  def _SGPR_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
+}
+
+defm S_DCACHE_DISCARD    : SM_Real_Discard_gfx10 <0x28, "S_DCACHE_DISCARD">;
+defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29, "S_DCACHE_DISCARD_X2">;
+
+} // End SubtargetPredicate = HasScalarAtomics
diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td
index ca5e981ac5c2..dfafdccc05a3 100644
--- a/lib/Target/AMDGPU/SOPInstructions.td
+++ b/lib/Target/AMDGPU/SOPInstructions.td
@@ -1,15 +1,15 @@
 //===-- SOPInstructions.td - SOP Instruction Defintions -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 def GPRIdxModeMatchClass : AsmOperandClass {
   let Name = "GPRIdxMode";
   let PredicateMethod = "isGPRIdxMode";
+  let ParserMethod = "parseGPRIdxMode";
   let RenderMethod = "addImmOperands";
 }
 
@@ -26,7 +26,6 @@ class SOP_Pseudo<string opName, dag outs, dag ins, string asmOps,
 
   let isPseudo = 1;
   let isCodeGenOnly = 1;
-  let SubtargetPredicate = isGCN;
 
   string Mnemonic = opName;
   string AsmOperands = asmOps;
@@ -78,10 +77,13 @@ class SOP1_Real<bits<8> op, SOP1_Pseudo ps> :
   let Inst{31-23} = 0x17d; //encoding;
 }
 
-class SOP1_32 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
-  opName, (outs SReg_32:$sdst), (ins SSrc_b32:$src0),
-  "$sdst, $src0", pattern
->;
+class SOP1_32 <string opName, list<dag> pattern=[], bit tied_in = 0> : SOP1_Pseudo <
+  opName, (outs SReg_32:$sdst),
+  !if(tied_in, (ins SSrc_b32:$src0, SReg_32:$sdst_in),
+               (ins SSrc_b32:$src0)),
+  "$sdst, $src0", pattern> {
+  let Constraints = !if(tied_in, "$sdst = $sdst_in", "");
+}
 
 // 32-bit input, no output.
 class SOP1_0_32 <string opName, list<dag> pattern = []> : SOP1_Pseudo <
@@ -108,10 +110,13 @@ class SOP1_32_64 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
 >;
 
 // 32-bit input, 64-bit output.
-class SOP1_64_32 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
-  opName, (outs SReg_64:$sdst), (ins SSrc_b32:$src0),
-  "$sdst, $src0", pattern
->;
+class SOP1_64_32 <string opName, list<dag> pattern=[], bit tied_in = 0> : SOP1_Pseudo <
+  opName, (outs SReg_64:$sdst),
+  !if(tied_in, (ins SSrc_b32:$src0, SReg_64:$sdst_in),
+               (ins SSrc_b32:$src0)),
+  "$sdst, $src0", pattern> {
+  let Constraints = !if(tied_in, "$sdst = $sdst_in", "");
+}
 
 // no input, 64-bit output.
 class SOP1_64_0 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
@@ -120,8 +125,8 @@ class SOP1_64_0 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
 }
 
 // 64-bit input, no output
-class SOP1_1 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
-  opName, (outs), (ins SReg_64:$src0), "$src0", pattern> {
+class SOP1_1 <string opName, RegisterClass rc = SReg_64, list<dag> pattern=[]> : SOP1_Pseudo <
+  opName, (outs), (ins rc:$src0), "$src0", pattern> {
   let has_sdst = 0;
 }
 
@@ -147,12 +152,24 @@ let Defs = [SCC] in {
     [(set i64:$sdst, (not i64:$src0))]
   >;
   def S_WQM_B32 : SOP1_32 <"s_wqm_b32">;
-  def S_WQM_B64 : SOP1_64 <"s_wqm_b64",
-    [(set i1:$sdst, (int_amdgcn_wqm_vote i1:$src0))]
-  >;
+  def S_WQM_B64 : SOP1_64 <"s_wqm_b64">;
 } // End Defs = [SCC]
 
 
+let WaveSizePredicate = isWave32 in {
+def : GCNPat <
+  (int_amdgcn_wqm_vote i1:$src0),
+  (S_WQM_B32 $src0)
+>;
+}
+
+let WaveSizePredicate = isWave64 in {
+def : GCNPat <
+  (int_amdgcn_wqm_vote i1:$src0),
+  (S_WQM_B64 $src0)
+>;
+}
+
 def S_BREV_B32 : SOP1_32 <"s_brev_b32",
   [(set i32:$sdst, (bitreverse i32:$src0))]
 >;
@@ -191,10 +208,10 @@ def S_SEXT_I32_I16 : SOP1_32 <"s_sext_i32_i16",
   [(set i32:$sdst, (sext_inreg i32:$src0, i16))]
 >;
 
-def S_BITSET0_B32 : SOP1_32    <"s_bitset0_b32">;
-def S_BITSET0_B64 : SOP1_64_32 <"s_bitset0_b64">;
-def S_BITSET1_B32 : SOP1_32    <"s_bitset1_b32">;
-def S_BITSET1_B64 : SOP1_64_32 <"s_bitset1_b64">;
+def S_BITSET0_B32 : SOP1_32    <"s_bitset0_b32", [], 1>;
+def S_BITSET0_B64 : SOP1_64_32 <"s_bitset0_b64", [], 1>;
+def S_BITSET1_B32 : SOP1_32    <"s_bitset1_b32", [], 1>;
+def S_BITSET1_B64 : SOP1_64_32 <"s_bitset1_b64", [], 1>;
 def S_GETPC_B64 : SOP1_64_0  <"s_getpc_b64",
   [(set i64:$sdst, (int_amdgcn_s_getpc))]
 >;
@@ -207,7 +224,7 @@ def S_SETPC_B64 : SOP1_1  <"s_setpc_b64">;
 
 let isReturn = 1 in {
 // Define variant marked as return rather than branch.
-def S_SETPC_B64_return : SOP1_1<"", [(AMDGPUret_flag i64:$src0)]>;
+def S_SETPC_B64_return : SOP1_1<"", CCR_SGPR_64, [(AMDGPUret_flag i64:$src0)]>;
 }
 } // End isTerminator = 1, isBarrier = 1
 
@@ -241,8 +258,11 @@ def S_MOVRELD_B32 : SOP1_32 <"s_movreld_b32">;
 def S_MOVRELD_B64 : SOP1_64 <"s_movreld_b64">;
 } // End Uses = [M0]
 
+let SubtargetPredicate = isGFX6GFX7GFX8GFX9 in {
 def S_CBRANCH_JOIN : SOP1_0_32R <"s_cbranch_join">;
 def S_MOV_REGRD_B32 : SOP1_32 <"s_mov_regrd_b32">;
+} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9
+
 let Defs = [SCC] in {
 def S_ABS_I32 : SOP1_32 <"s_abs_i32">;
 } // End Defs = [SCC]
@@ -255,7 +275,7 @@ def S_SET_GPR_IDX_IDX : SOP1_0_32<"s_set_gpr_idx_idx"> {
 }
 }
 
-let SubtargetPredicate = isGFX9 in {
+let SubtargetPredicate = isGFX9Plus in {
   let hasSideEffects = 1, Defs = [EXEC, SCC], Uses = [EXEC] in {
     def S_ANDN1_SAVEEXEC_B64 : SOP1_64<"s_andn1_saveexec_b64">;
     def S_ORN1_SAVEEXEC_B64  : SOP1_64<"s_orn1_saveexec_b64">;
@@ -264,7 +284,28 @@ let SubtargetPredicate = isGFX9 in {
   } // End hasSideEffects = 1, Defs = [EXEC, SCC], Uses = [EXEC]
 
   def S_BITREPLICATE_B64_B32 : SOP1_64_32<"s_bitreplicate_b64_b32">;
-} // End SubtargetPredicate = isGFX9
+} // End SubtargetPredicate = isGFX9Plus
+
+let SubtargetPredicate = isGFX10Plus in {
+  let hasSideEffects = 1, Defs = [EXEC, SCC], Uses = [EXEC] in {
+    def S_AND_SAVEEXEC_B32   : SOP1_32<"s_and_saveexec_b32">;
+    def S_OR_SAVEEXEC_B32    : SOP1_32<"s_or_saveexec_b32">;
+    def S_XOR_SAVEEXEC_B32   : SOP1_32<"s_xor_saveexec_b32">;
+    def S_ANDN2_SAVEEXEC_B32 : SOP1_32<"s_andn2_saveexec_b32">;
+    def S_ORN2_SAVEEXEC_B32  : SOP1_32<"s_orn2_saveexec_b32">;
+    def S_NAND_SAVEEXEC_B32  : SOP1_32<"s_nand_saveexec_b32">;
+    def S_NOR_SAVEEXEC_B32   : SOP1_32<"s_nor_saveexec_b32">;
+    def S_XNOR_SAVEEXEC_B32  : SOP1_32<"s_xnor_saveexec_b32">;
+    def S_ANDN1_SAVEEXEC_B32 : SOP1_32<"s_andn1_saveexec_b32">;
+    def S_ORN1_SAVEEXEC_B32  : SOP1_32<"s_orn1_saveexec_b32">;
+    def S_ANDN1_WREXEC_B32   : SOP1_32<"s_andn1_wrexec_b32">;
+    def S_ANDN2_WREXEC_B32   : SOP1_32<"s_andn2_wrexec_b32">;
+  } // End hasSideEffects = 1, Defs = [EXEC, SCC], Uses = [EXEC]
+
+  let Uses = [M0] in {
+    def S_MOVRELSD_2_B32 : SOP1_32<"s_movrelsd_2_b32">;
+  } // End Uses = [M0]
+} // End SubtargetPredicate = isGFX10Plus
 
 //===----------------------------------------------------------------------===//
 // SOP2 Instructions
@@ -302,6 +343,8 @@ class SOP2_Real<bits<7> op, SOP_Pseudo ps> :
   // copy relevant pseudo op flags
   let SubtargetPredicate = ps.SubtargetPredicate;
   let AsmMatchConverter  = ps.AsmMatchConverter;
+  let UseNamedOperandTable = ps.UseNamedOperandTable;
+  let TSFlags = ps.TSFlags;
 
   // encoding
   bits<7> sdst;
@@ -468,22 +511,22 @@ let AddedComplexity = 1 in {
 let Defs = [SCC] in {
 // TODO: b64 versions require VOP3 change since v_lshlrev_b64 is VOP3
 def S_LSHL_B32 : SOP2_32 <"s_lshl_b32",
-  [(set i32:$sdst, (UniformBinFrag<shl> i32:$src0, i32:$src1))]
+  [(set SReg_32:$sdst, (shl (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
 >;
 def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64",
-  [(set i64:$sdst, (UniformBinFrag<shl> i64:$src0, i32:$src1))]
+  [(set SReg_64:$sdst, (shl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
 >;
 def S_LSHR_B32 : SOP2_32 <"s_lshr_b32",
-  [(set i32:$sdst, (UniformBinFrag<srl> i32:$src0, i32:$src1))]
+  [(set SReg_32:$sdst, (srl (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
 >;
 def S_LSHR_B64 : SOP2_64_32 <"s_lshr_b64",
-  [(set i64:$sdst, (UniformBinFrag<srl> i64:$src0, i32:$src1))]
+  [(set SReg_64:$sdst, (srl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
 >;
 def S_ASHR_I32 : SOP2_32 <"s_ashr_i32",
-  [(set i32:$sdst, (UniformBinFrag<sra> i32:$src0, i32:$src1))]
+  [(set SReg_32:$sdst, (sra (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
 >;
 def S_ASHR_I64 : SOP2_64_32 <"s_ashr_i64",
-  [(set i64:$sdst, (UniformBinFrag<sra> i64:$src0, i32:$src1))]
+  [(set SReg_64:$sdst, (sra (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
 >;
 } // End Defs = [SCC]
 
@@ -512,13 +555,14 @@ def S_CBRANCH_G_FORK : SOP2_Pseudo <
   "$src0, $src1"
 > {
   let has_sdst = 0;
+  let SubtargetPredicate = isGFX6GFX7GFX8GFX9;
 }
 
 let Defs = [SCC] in {
 def S_ABSDIFF_I32 : SOP2_32 <"s_absdiff_i32">;
 } // End Defs = [SCC]
 
-let SubtargetPredicate = isVI in {
+let SubtargetPredicate = isGFX8GFX9 in {
   def S_RFE_RESTORE_B64 : SOP2_Pseudo <
     "s_rfe_restore_b64", (outs),
     (ins SSrc_b64:$src0, SSrc_b32:$src1),
@@ -529,7 +573,7 @@ let SubtargetPredicate = isVI in {
   }
 }
 
-let SubtargetPredicate = isGFX9 in {
+let SubtargetPredicate = isGFX9Plus in {
   def S_PACK_LL_B32_B16 : SOP2_32<"s_pack_ll_b32_b16">;
   def S_PACK_LH_B32_B16 : SOP2_32<"s_pack_lh_b32_b16">;
   def S_PACK_HH_B32_B16 : SOP2_32<"s_pack_hh_b32_b16">;
@@ -543,7 +587,7 @@ let SubtargetPredicate = isGFX9 in {
 
   def S_MUL_HI_U32 : SOP2_32<"s_mul_hi_u32">;
   def S_MUL_HI_I32 : SOP2_32<"s_mul_hi_i32">;
-}
+} // End SubtargetPredicate = isGFX9Plus
 
 //===----------------------------------------------------------------------===//
 // SOPK Instructions
@@ -555,7 +599,6 @@ class SOPK_Pseudo <string opName, dag outs, dag ins,
   SIMCInstr<opName, SIEncodingFamily.NONE> {
   let isPseudo = 1;
   let isCodeGenOnly = 1;
-  let SubtargetPredicate = isGCN;
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
@@ -618,6 +661,19 @@ class SOPK_32 <string opName, list<dag> pattern=[]> : SOPK_Pseudo <
   "$sdst, $simm16",
   pattern>;
 
+class SOPK_32_BR <string opName, list<dag> pattern=[]> : SOPK_Pseudo <
+  opName,
+  (outs),
+  (ins sopp_brtarget:$simm16, SReg_32:$sdst),
+  "$sdst, $simm16",
+  pattern> {
+  let Defs = [EXEC];
+  let Uses = [EXEC];
+  let isBranch = 1;
+  let isTerminator = 1;
+  let SchedRW = [WriteBranch];
+}
+
 class SOPK_SCC <string opName, string base_op, bit isSignExt> : SOPK_Pseudo <
   opName,
   (outs),
@@ -684,9 +740,10 @@ let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0",
   def S_MULK_I32 : SOPK_32TIE <"s_mulk_i32">;
 }
 
+let SubtargetPredicate = isGFX6GFX7GFX8GFX9 in
 def S_CBRANCH_I_FORK : SOPK_Pseudo <
   "s_cbranch_i_fork",
-  (outs), (ins SReg_64:$sdst, s16imm:$simm16),
+  (outs), (ins SReg_64:$sdst, sopp_brtarget:$simm16),
   "$sdst, $simm16"
 >;
 
@@ -720,15 +777,46 @@ def S_SETREG_IMM32_B32 : SOPK_Pseudo <
 
 } // End hasSideEffects = 1
 
-let SubtargetPredicate = isGFX9 in {
+class SOPK_WAITCNT<string opName, list<dag> pat=[]> :
+    SOPK_Pseudo<
+        opName,
+        (outs),
+        (ins SReg_32:$sdst, s16imm:$simm16),
+        "$sdst, $simm16",
+        pat> {
+  let hasSideEffects = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
+  let has_sdst = 1; // First source takes place of sdst in encoding
+}
+
+let SubtargetPredicate = isGFX9Plus in {
   def S_CALL_B64 : SOPK_Pseudo<
       "s_call_b64",
       (outs SReg_64:$sdst),
-      (ins s16imm:$simm16),
+      (ins sopp_brtarget:$simm16),
       "$sdst, $simm16"> {
     let isCall = 1;
   }
-}
+} // End SubtargetPredicate = isGFX9Plus
+
+let SubtargetPredicate = isGFX10Plus in {
+  def S_VERSION : SOPK_Pseudo<
+      "s_version",
+      (outs),
+      (ins s16imm:$simm16),
+      "$simm16"> {
+    let has_sdst = 0;
+  }
+
+  def S_SUBVECTOR_LOOP_BEGIN : SOPK_32_BR<"s_subvector_loop_begin">;
+  def S_SUBVECTOR_LOOP_END   : SOPK_32_BR<"s_subvector_loop_end">;
+
+  def S_WAITCNT_VSCNT   : SOPK_WAITCNT<"s_waitcnt_vscnt">;
+  def S_WAITCNT_VMCNT   : SOPK_WAITCNT<"s_waitcnt_vmcnt">;
+  def S_WAITCNT_EXPCNT  : SOPK_WAITCNT<"s_waitcnt_expcnt">;
+  def S_WAITCNT_LGKMCNT : SOPK_WAITCNT<"s_waitcnt_lgkmcnt">;
+} // End SubtargetPredicate = isGFX10Plus
 
 //===----------------------------------------------------------------------===//
 // SOPC Instructions
@@ -756,7 +844,6 @@ class SOPC <bits<7> op, dag outs, dag ins, string asm,
   let Defs = [SCC];
   let SchedRW = [WriteSALU];
   let UseNamedOperandTable = 1;
-  let SubtargetPredicate = isGCN;
 }
 
 class SOPC_Base <bits<7> op, RegisterOperand rc0, RegisterOperand rc1,
@@ -811,12 +898,13 @@ def S_BITCMP0_B32 : SOPC_32 <0x0c, "s_bitcmp0_b32">;
 def S_BITCMP1_B32 : SOPC_32 <0x0d, "s_bitcmp1_b32">;
 def S_BITCMP0_B64 : SOPC_64_32 <0x0e, "s_bitcmp0_b64">;
 def S_BITCMP1_B64 : SOPC_64_32 <0x0f, "s_bitcmp1_b64">;
+let SubtargetPredicate = isGFX6GFX7GFX8GFX9 in
 def S_SETVSKIP : SOPC_32 <0x10, "s_setvskip">;
 
-let SubtargetPredicate = isVI in {
+let SubtargetPredicate = isGFX8Plus in {
 def S_CMP_EQ_U64 : SOPC_CMP_64 <0x12, "s_cmp_eq_u64", COND_EQ>;
 def S_CMP_LG_U64 : SOPC_CMP_64 <0x13, "s_cmp_lg_u64", COND_NE>;
-}
+} // End SubtargetPredicate = isGFX8Plus
 
 let SubtargetPredicate = HasVGPRIndexMode in {
 def S_SET_GPR_IDX_ON : SOPC <0x11,
@@ -834,6 +922,10 @@ def S_SET_GPR_IDX_ON : SOPC <0x11,
 // SOPP Instructions
 //===----------------------------------------------------------------------===//
 
+class Base_SOPP <string asm> {
+  string AsmString = asm;
+}
+
 class SOPPe <bits<7> op> : Enc32 {
   bits <16> simm16;
 
@@ -843,7 +935,7 @@ class SOPPe <bits<7> op> : Enc32 {
 }
 
 class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern = []> :
-  InstSI <(outs), ins, asm, pattern >, SOPPe <op> {
+  InstSI <(outs), ins, asm, pattern >, SOPPe <op>, Base_SOPP <asm> {
 
   let mayLoad = 0;
   let mayStore = 0;
@@ -854,92 +946,124 @@ class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern = []> :
   let SchedRW = [WriteSALU];
 
   let UseNamedOperandTable = 1;
-  let SubtargetPredicate = isGCN;
 }
 
-
 def S_NOP : SOPP <0x00000000, (ins i16imm:$simm16), "s_nop $simm16">;
 
+class SOPP_w_nop_e <bits<7> op> : Enc64 {
+  bits <16> simm16;
+
+  let Inst{15-0} = simm16;
+  let Inst{22-16} = op;
+  let Inst{31-23} = 0x17f; // encoding
+  let Inst{47-32} = 0x0;
+  let Inst{54-48} = S_NOP.Inst{22-16}; // opcode
+  let Inst{63-55} = S_NOP.Inst{31-23}; // encoding
+}
+
+class SOPP_w_nop <bits<7> op, dag ins, string asm, list<dag> pattern = []> :
+  InstSI <(outs), ins, asm, pattern >, SOPP_w_nop_e <op>, Base_SOPP <asm> {
+
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let SALU = 1;
+  let SOPP = 1;
+  let Size = 8;
+  let SchedRW = [WriteSALU];
+
+  let UseNamedOperandTable = 1;
+}
+
+multiclass SOPP_With_Relaxation <bits<7> op, dag ins, string asm, list<dag> pattern = []> {
+  def "" : SOPP <op, ins, asm, pattern>;
+  def _pad_s_nop : SOPP_w_nop <op, ins, asm, pattern>;
+}
+
 let isTerminator = 1 in {
 
-def S_ENDPGM : SOPP <0x00000001, (ins), "s_endpgm",
-  [(AMDGPUendpgm)]> {
-  let simm16 = 0;
+def S_ENDPGM : SOPP <0x00000001, (ins EndpgmImm:$simm16), "s_endpgm$simm16"> {
   let isBarrier = 1;
   let isReturn = 1;
 }
 
-let SubtargetPredicate = isVI in {
 def S_ENDPGM_SAVED : SOPP <0x0000001B, (ins), "s_endpgm_saved"> {
+  let SubtargetPredicate = isGFX8Plus;
   let simm16 = 0;
   let isBarrier = 1;
   let isReturn = 1;
 }
-}
 
-let SubtargetPredicate = isGFX9 in {
+let SubtargetPredicate = isGFX9Plus in {
   let isBarrier = 1, isReturn = 1, simm16 = 0 in {
     def S_ENDPGM_ORDERED_PS_DONE :
       SOPP<0x01e, (ins), "s_endpgm_ordered_ps_done">;
   } // End isBarrier = 1, isReturn = 1, simm16 = 0
-} // End SubtargetPredicate = isGFX9
+} // End SubtargetPredicate = isGFX9Plus
+
+let SubtargetPredicate = isGFX10Plus in {
+  let isBarrier = 1, isReturn = 1, simm16 = 0 in {
+    def S_CODE_END :
+      SOPP<0x01f, (ins), "s_code_end">;
+  } // End isBarrier = 1, isReturn = 1, simm16 = 0
+} // End SubtargetPredicate = isGFX10Plus
 
 let isBranch = 1, SchedRW = [WriteBranch] in {
-def S_BRANCH : SOPP <
+let isBarrier = 1 in {
+defm S_BRANCH : SOPP_With_Relaxation <
   0x00000002, (ins sopp_brtarget:$simm16), "s_branch $simm16",
-  [(br bb:$simm16)]> {
-  let isBarrier = 1;
+  [(br bb:$simm16)]>;
 }
 
 let Uses = [SCC] in {
-def S_CBRANCH_SCC0 : SOPP <
+defm S_CBRANCH_SCC0 : SOPP_With_Relaxation <
   0x00000004, (ins sopp_brtarget:$simm16),
   "s_cbranch_scc0 $simm16"
 >;
-def S_CBRANCH_SCC1 : SOPP <
+defm S_CBRANCH_SCC1 : SOPP_With_Relaxation <
   0x00000005, (ins sopp_brtarget:$simm16),
   "s_cbranch_scc1 $simm16"
 >;
 } // End Uses = [SCC]
 
 let Uses = [VCC] in {
-def S_CBRANCH_VCCZ : SOPP <
+defm S_CBRANCH_VCCZ : SOPP_With_Relaxation <
   0x00000006, (ins sopp_brtarget:$simm16),
   "s_cbranch_vccz $simm16"
 >;
-def S_CBRANCH_VCCNZ : SOPP <
+defm S_CBRANCH_VCCNZ : SOPP_With_Relaxation <
   0x00000007, (ins sopp_brtarget:$simm16),
   "s_cbranch_vccnz $simm16"
 >;
 } // End Uses = [VCC]
 
 let Uses = [EXEC] in {
-def S_CBRANCH_EXECZ : SOPP <
+defm S_CBRANCH_EXECZ : SOPP_With_Relaxation <
   0x00000008, (ins sopp_brtarget:$simm16),
   "s_cbranch_execz $simm16"
 >;
-def S_CBRANCH_EXECNZ : SOPP <
+defm S_CBRANCH_EXECNZ : SOPP_With_Relaxation <
   0x00000009, (ins sopp_brtarget:$simm16),
   "s_cbranch_execnz $simm16"
 >;
 } // End Uses = [EXEC]
 
-def S_CBRANCH_CDBGSYS : SOPP <
+defm S_CBRANCH_CDBGSYS : SOPP_With_Relaxation <
   0x00000017, (ins sopp_brtarget:$simm16),
   "s_cbranch_cdbgsys $simm16"
 >;
 
-def S_CBRANCH_CDBGSYS_AND_USER : SOPP <
+defm S_CBRANCH_CDBGSYS_AND_USER : SOPP_With_Relaxation <
   0x0000001A, (ins sopp_brtarget:$simm16),
   "s_cbranch_cdbgsys_and_user $simm16"
 >;
 
-def S_CBRANCH_CDBGSYS_OR_USER : SOPP <
+defm S_CBRANCH_CDBGSYS_OR_USER : SOPP_With_Relaxation <
   0x00000019, (ins sopp_brtarget:$simm16),
   "s_cbranch_cdbgsys_or_user $simm16"
 >;
 
-def S_CBRANCH_CDBGUSER : SOPP <
+defm S_CBRANCH_CDBGUSER : SOPP_With_Relaxation <
   0x00000018, (ins sopp_brtarget:$simm16),
   "s_cbranch_cdbguser $simm16"
 >;
@@ -957,16 +1081,16 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier",
   let isConvergent = 1;
 }
 
-let SubtargetPredicate = isVI in {
 def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> {
+  let SubtargetPredicate = isGFX8Plus;
   let simm16 = 0;
   let mayLoad = 1;
   let mayStore = 1;
 }
-}
 
 let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
-def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16">;
+def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16",
+    [(int_amdgcn_s_waitcnt UIMM16bit:$simm16)]>;
 def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
 def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">;
 
@@ -994,7 +1118,10 @@ def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $
 >;
 } // End Uses = [EXEC, M0]
 
-def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">;
+def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16"> {
+  let isTrap = 1;
+}
+
 def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
   let simm16 = 0;
 }
@@ -1028,6 +1155,25 @@ def S_SET_GPR_IDX_MODE : SOPP<0x1d, (ins GPRIdxMode:$simm16),
 }
 }
 
+let SubtargetPredicate = isGFX10Plus in {
+  def S_INST_PREFETCH :
+    SOPP<0x020, (ins s16imm:$simm16), "s_inst_prefetch $simm16">;
+  def S_CLAUSE :
+    SOPP<0x021, (ins s16imm:$simm16), "s_clause $simm16">;
+  def S_WAITCNT_IDLE :
+    SOPP <0x022, (ins), "s_wait_idle"> {
+      let simm16 = 0;
+    }
+  def S_WAITCNT_DEPCTR :
+    SOPP <0x023, (ins s16imm:$simm16), "s_waitcnt_depctr $simm16">;
+  def S_ROUND_MODE :
+    SOPP<0x024, (ins s16imm:$simm16), "s_round_mode $simm16">;
+  def S_DENORM_MODE :
+    SOPP<0x025, (ins s16imm:$simm16), "s_denorm_mode $simm16">;
+  def S_TTRACEDATA_IMM :
+    SOPP<0x028, (ins s16imm:$simm16), "s_ttracedata_imm $simm16">;
+} // End SubtargetPredicate = isGFX10Plus
+
 //===----------------------------------------------------------------------===//
 // S_GETREG_B32 Intrinsic Pattern.
 //===----------------------------------------------------------------------===//
@@ -1040,6 +1186,11 @@ def : GCNPat <
 // SOP1 Patterns
 //===----------------------------------------------------------------------===//
 
+def : GCNPat <
+  (AMDGPUendpgm),
+    (S_ENDPGM (i16 0))
+>;
+
 def : GCNPat <
   (i64 (ctpop i64:$src)),
     (i64 (REG_SEQUENCE SReg_64,
@@ -1097,162 +1248,261 @@ def : GCNPat<
 >;
 
 
+//===----------------------------------------------------------------------===//
+// Target-specific instruction encodings.
+//===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-// SOPP Patterns
+// SOP1 - GFX10.
 //===----------------------------------------------------------------------===//
 
-def : GCNPat <
-  (int_amdgcn_s_waitcnt i32:$simm16),
-  (S_WAITCNT (as_i16imm $simm16))
->;
+class Select_gfx10<string opName> : SIMCInstr<opName, SIEncodingFamily.GFX10> {
+  Predicate AssemblerPredicate = isGFX10Plus;
+  string DecoderNamespace      = "GFX10";
+}
+
+multiclass SOP1_Real_gfx10<bits<8> op> {
+  def _gfx10 : SOP1_Real<op, !cast<SOP1_Pseudo>(NAME)>,
+               Select_gfx10<!cast<SOP1_Pseudo>(NAME).Mnemonic>;
+}
 
+defm S_ANDN1_SAVEEXEC_B64   : SOP1_Real_gfx10<0x037>;
+defm S_ORN1_SAVEEXEC_B64    : SOP1_Real_gfx10<0x038>;
+defm S_ANDN1_WREXEC_B64     : SOP1_Real_gfx10<0x039>;
+defm S_ANDN2_WREXEC_B64     : SOP1_Real_gfx10<0x03a>;
+defm S_BITREPLICATE_B64_B32 : SOP1_Real_gfx10<0x03b>;
+defm S_AND_SAVEEXEC_B32     : SOP1_Real_gfx10<0x03c>;
+defm S_OR_SAVEEXEC_B32      : SOP1_Real_gfx10<0x03d>;
+defm S_XOR_SAVEEXEC_B32     : SOP1_Real_gfx10<0x03e>;
+defm S_ANDN2_SAVEEXEC_B32   : SOP1_Real_gfx10<0x03f>;
+defm S_ORN2_SAVEEXEC_B32    : SOP1_Real_gfx10<0x040>;
+defm S_NAND_SAVEEXEC_B32    : SOP1_Real_gfx10<0x041>;
+defm S_NOR_SAVEEXEC_B32     : SOP1_Real_gfx10<0x042>;
+defm S_XNOR_SAVEEXEC_B32    : SOP1_Real_gfx10<0x043>;
+defm S_ANDN1_SAVEEXEC_B32   : SOP1_Real_gfx10<0x044>;
+defm S_ORN1_SAVEEXEC_B32    : SOP1_Real_gfx10<0x045>;
+defm S_ANDN1_WREXEC_B32     : SOP1_Real_gfx10<0x046>;
+defm S_ANDN2_WREXEC_B32     : SOP1_Real_gfx10<0x047>;
+defm S_MOVRELSD_2_B32       : SOP1_Real_gfx10<0x049>;
 
 //===----------------------------------------------------------------------===//
-// Real target instructions, move this to the appropriate subtarget TD file
+// SOP1 - GFX6, GFX7.
 //===----------------------------------------------------------------------===//
 
-class Select_si<string opName> :
-  SIMCInstr<opName, SIEncodingFamily.SI> {
-  list<Predicate> AssemblerPredicates = [isSICI];
-  string DecoderNamespace = "SICI";
+class Select_gfx6_gfx7<string opName> : SIMCInstr<opName, SIEncodingFamily.SI> {
+  Predicate AssemblerPredicate = isGFX6GFX7;
+  string DecoderNamespace      = "GFX6GFX7";
 }
 
-class SOP1_Real_si<bits<8> op, SOP1_Pseudo ps> :
-  SOP1_Real<op, ps>,
-  Select_si<ps.Mnemonic>;
+multiclass SOP1_Real_gfx6_gfx7<bits<8> op> {
+  def _gfx6_gfx7 : SOP1_Real<op, !cast<SOP1_Pseudo>(NAME)>,
+                   Select_gfx6_gfx7<!cast<SOP1_Pseudo>(NAME).Mnemonic>;
+}
 
-class SOP2_Real_si<bits<7> op, SOP2_Pseudo ps> :
-  SOP2_Real<op, ps>,
-  Select_si<ps.Mnemonic>;
+multiclass SOP1_Real_gfx6_gfx7_gfx10<bits<8> op> :
+  SOP1_Real_gfx6_gfx7<op>, SOP1_Real_gfx10<op>;
+
+defm S_CBRANCH_JOIN  : SOP1_Real_gfx6_gfx7<0x032>;
+defm S_MOV_REGRD_B32 : SOP1_Real_gfx6_gfx7<0x033>;
+
+defm S_MOV_B32            : SOP1_Real_gfx6_gfx7_gfx10<0x003>;
+defm S_MOV_B64            : SOP1_Real_gfx6_gfx7_gfx10<0x004>;
+defm S_CMOV_B32           : SOP1_Real_gfx6_gfx7_gfx10<0x005>;
+defm S_CMOV_B64           : SOP1_Real_gfx6_gfx7_gfx10<0x006>;
+defm S_NOT_B32            : SOP1_Real_gfx6_gfx7_gfx10<0x007>;
+defm S_NOT_B64            : SOP1_Real_gfx6_gfx7_gfx10<0x008>;
+defm S_WQM_B32            : SOP1_Real_gfx6_gfx7_gfx10<0x009>;
+defm S_WQM_B64            : SOP1_Real_gfx6_gfx7_gfx10<0x00a>;
+defm S_BREV_B32           : SOP1_Real_gfx6_gfx7_gfx10<0x00b>;
+defm S_BREV_B64           : SOP1_Real_gfx6_gfx7_gfx10<0x00c>;
+defm S_BCNT0_I32_B32      : SOP1_Real_gfx6_gfx7_gfx10<0x00d>;
+defm S_BCNT0_I32_B64      : SOP1_Real_gfx6_gfx7_gfx10<0x00e>;
+defm S_BCNT1_I32_B32      : SOP1_Real_gfx6_gfx7_gfx10<0x00f>;
+defm S_BCNT1_I32_B64      : SOP1_Real_gfx6_gfx7_gfx10<0x010>;
+defm S_FF0_I32_B32        : SOP1_Real_gfx6_gfx7_gfx10<0x011>;
+defm S_FF0_I32_B64        : SOP1_Real_gfx6_gfx7_gfx10<0x012>;
+defm S_FF1_I32_B32        : SOP1_Real_gfx6_gfx7_gfx10<0x013>;
+defm S_FF1_I32_B64        : SOP1_Real_gfx6_gfx7_gfx10<0x014>;
+defm S_FLBIT_I32_B32      : SOP1_Real_gfx6_gfx7_gfx10<0x015>;
+defm S_FLBIT_I32_B64      : SOP1_Real_gfx6_gfx7_gfx10<0x016>;
+defm S_FLBIT_I32          : SOP1_Real_gfx6_gfx7_gfx10<0x017>;
+defm S_FLBIT_I32_I64      : SOP1_Real_gfx6_gfx7_gfx10<0x018>;
+defm S_SEXT_I32_I8        : SOP1_Real_gfx6_gfx7_gfx10<0x019>;
+defm S_SEXT_I32_I16       : SOP1_Real_gfx6_gfx7_gfx10<0x01a>;
+defm S_BITSET0_B32        : SOP1_Real_gfx6_gfx7_gfx10<0x01b>;
+defm S_BITSET0_B64        : SOP1_Real_gfx6_gfx7_gfx10<0x01c>;
+defm S_BITSET1_B32        : SOP1_Real_gfx6_gfx7_gfx10<0x01d>;
+defm S_BITSET1_B64        : SOP1_Real_gfx6_gfx7_gfx10<0x01e>;
+defm S_GETPC_B64          : SOP1_Real_gfx6_gfx7_gfx10<0x01f>;
+defm S_SETPC_B64          : SOP1_Real_gfx6_gfx7_gfx10<0x020>;
+defm S_SWAPPC_B64         : SOP1_Real_gfx6_gfx7_gfx10<0x021>;
+defm S_RFE_B64            : SOP1_Real_gfx6_gfx7_gfx10<0x022>;
+defm S_AND_SAVEEXEC_B64   : SOP1_Real_gfx6_gfx7_gfx10<0x024>;
+defm S_OR_SAVEEXEC_B64    : SOP1_Real_gfx6_gfx7_gfx10<0x025>;
+defm S_XOR_SAVEEXEC_B64   : SOP1_Real_gfx6_gfx7_gfx10<0x026>;
+defm S_ANDN2_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x027>;
+defm S_ORN2_SAVEEXEC_B64  : SOP1_Real_gfx6_gfx7_gfx10<0x028>;
+defm S_NAND_SAVEEXEC_B64  : SOP1_Real_gfx6_gfx7_gfx10<0x029>;
+defm S_NOR_SAVEEXEC_B64   : SOP1_Real_gfx6_gfx7_gfx10<0x02a>;
+defm S_XNOR_SAVEEXEC_B64  : SOP1_Real_gfx6_gfx7_gfx10<0x02b>;
+defm S_QUADMASK_B32       : SOP1_Real_gfx6_gfx7_gfx10<0x02c>;
+defm S_QUADMASK_B64       : SOP1_Real_gfx6_gfx7_gfx10<0x02d>;
+defm S_MOVRELS_B32        : SOP1_Real_gfx6_gfx7_gfx10<0x02e>;
+defm S_MOVRELS_B64        : SOP1_Real_gfx6_gfx7_gfx10<0x02f>;
+defm S_MOVRELD_B32        : SOP1_Real_gfx6_gfx7_gfx10<0x030>;
+defm S_MOVRELD_B64        : SOP1_Real_gfx6_gfx7_gfx10<0x031>;
+defm S_ABS_I32            : SOP1_Real_gfx6_gfx7_gfx10<0x034>;
+defm S_MOV_FED_B32        : SOP1_Real_gfx6_gfx7_gfx10<0x035>;
 
-class SOPK_Real_si<bits<5> op, SOPK_Pseudo ps> :
-  SOPK_Real32<op, ps>,
-  Select_si<ps.Mnemonic>;
-
-def S_MOV_B32_si           : SOP1_Real_si <0x03, S_MOV_B32>;
-def S_MOV_B64_si           : SOP1_Real_si <0x04, S_MOV_B64>;
-def S_CMOV_B32_si          : SOP1_Real_si <0x05, S_CMOV_B32>;
-def S_CMOV_B64_si          : SOP1_Real_si <0x06, S_CMOV_B64>;
-def S_NOT_B32_si           : SOP1_Real_si <0x07, S_NOT_B32>;
-def S_NOT_B64_si           : SOP1_Real_si <0x08, S_NOT_B64>;
-def S_WQM_B32_si           : SOP1_Real_si <0x09, S_WQM_B32>;
-def S_WQM_B64_si           : SOP1_Real_si <0x0a, S_WQM_B64>;
-def S_BREV_B32_si          : SOP1_Real_si <0x0b, S_BREV_B32>;
-def S_BREV_B64_si          : SOP1_Real_si <0x0c, S_BREV_B64>;
-def S_BCNT0_I32_B32_si     : SOP1_Real_si <0x0d, S_BCNT0_I32_B32>;
-def S_BCNT0_I32_B64_si     : SOP1_Real_si <0x0e, S_BCNT0_I32_B64>;
-def S_BCNT1_I32_B32_si     : SOP1_Real_si <0x0f, S_BCNT1_I32_B32>;
-def S_BCNT1_I32_B64_si     : SOP1_Real_si <0x10, S_BCNT1_I32_B64>;
-def S_FF0_I32_B32_si       : SOP1_Real_si <0x11, S_FF0_I32_B32>;
-def S_FF0_I32_B64_si       : SOP1_Real_si <0x12, S_FF0_I32_B64>;
-def S_FF1_I32_B32_si       : SOP1_Real_si <0x13, S_FF1_I32_B32>;
-def S_FF1_I32_B64_si       : SOP1_Real_si <0x14, S_FF1_I32_B64>;
-def S_FLBIT_I32_B32_si     : SOP1_Real_si <0x15, S_FLBIT_I32_B32>;
-def S_FLBIT_I32_B64_si     : SOP1_Real_si <0x16, S_FLBIT_I32_B64>;
-def S_FLBIT_I32_si         : SOP1_Real_si <0x17, S_FLBIT_I32>;
-def S_FLBIT_I32_I64_si     : SOP1_Real_si <0x18, S_FLBIT_I32_I64>;
-def S_SEXT_I32_I8_si       : SOP1_Real_si <0x19, S_SEXT_I32_I8>;
-def S_SEXT_I32_I16_si      : SOP1_Real_si <0x1a, S_SEXT_I32_I16>;
-def S_BITSET0_B32_si       : SOP1_Real_si <0x1b, S_BITSET0_B32>;
-def S_BITSET0_B64_si       : SOP1_Real_si <0x1c, S_BITSET0_B64>;
-def S_BITSET1_B32_si       : SOP1_Real_si <0x1d, S_BITSET1_B32>;
-def S_BITSET1_B64_si       : SOP1_Real_si <0x1e, S_BITSET1_B64>;
-def S_GETPC_B64_si         : SOP1_Real_si <0x1f, S_GETPC_B64>;
-def S_SETPC_B64_si         : SOP1_Real_si <0x20, S_SETPC_B64>;
-def S_SWAPPC_B64_si        : SOP1_Real_si <0x21, S_SWAPPC_B64>;
-def S_RFE_B64_si           : SOP1_Real_si <0x22, S_RFE_B64>;
-def S_AND_SAVEEXEC_B64_si  : SOP1_Real_si <0x24, S_AND_SAVEEXEC_B64>;
-def S_OR_SAVEEXEC_B64_si   : SOP1_Real_si <0x25, S_OR_SAVEEXEC_B64>;
-def S_XOR_SAVEEXEC_B64_si  : SOP1_Real_si <0x26, S_XOR_SAVEEXEC_B64>;
-def S_ANDN2_SAVEEXEC_B64_si: SOP1_Real_si <0x27, S_ANDN2_SAVEEXEC_B64>;
-def S_ORN2_SAVEEXEC_B64_si : SOP1_Real_si <0x28, S_ORN2_SAVEEXEC_B64>;
-def S_NAND_SAVEEXEC_B64_si : SOP1_Real_si <0x29, S_NAND_SAVEEXEC_B64>;
-def S_NOR_SAVEEXEC_B64_si  : SOP1_Real_si <0x2a, S_NOR_SAVEEXEC_B64>;
-def S_XNOR_SAVEEXEC_B64_si : SOP1_Real_si <0x2b, S_XNOR_SAVEEXEC_B64>;
-def S_QUADMASK_B32_si      : SOP1_Real_si <0x2c, S_QUADMASK_B32>;
-def S_QUADMASK_B64_si      : SOP1_Real_si <0x2d, S_QUADMASK_B64>;
-def S_MOVRELS_B32_si       : SOP1_Real_si <0x2e, S_MOVRELS_B32>;
-def S_MOVRELS_B64_si       : SOP1_Real_si <0x2f, S_MOVRELS_B64>;
-def S_MOVRELD_B32_si       : SOP1_Real_si <0x30, S_MOVRELD_B32>;
-def S_MOVRELD_B64_si       : SOP1_Real_si <0x31, S_MOVRELD_B64>;
-def S_CBRANCH_JOIN_si      : SOP1_Real_si <0x32, S_CBRANCH_JOIN>;
-def S_MOV_REGRD_B32_si     : SOP1_Real_si <0x33, S_MOV_REGRD_B32>;
-def S_ABS_I32_si           : SOP1_Real_si <0x34, S_ABS_I32>;
-def S_MOV_FED_B32_si       : SOP1_Real_si <0x35, S_MOV_FED_B32>;
-
-def S_ADD_U32_si           : SOP2_Real_si <0x00, S_ADD_U32>;
-def S_ADD_I32_si           : SOP2_Real_si <0x02, S_ADD_I32>;
-def S_SUB_U32_si           : SOP2_Real_si <0x01, S_SUB_U32>;
-def S_SUB_I32_si           : SOP2_Real_si <0x03, S_SUB_I32>;
-def S_ADDC_U32_si          : SOP2_Real_si <0x04, S_ADDC_U32>;
-def S_SUBB_U32_si          : SOP2_Real_si <0x05, S_SUBB_U32>;
-def S_MIN_I32_si           : SOP2_Real_si <0x06, S_MIN_I32>;
-def S_MIN_U32_si           : SOP2_Real_si <0x07, S_MIN_U32>;
-def S_MAX_I32_si           : SOP2_Real_si <0x08, S_MAX_I32>;
-def S_MAX_U32_si           : SOP2_Real_si <0x09, S_MAX_U32>;
-def S_CSELECT_B32_si       : SOP2_Real_si <0x0a, S_CSELECT_B32>;
-def S_CSELECT_B64_si       : SOP2_Real_si <0x0b, S_CSELECT_B64>;
-def S_AND_B32_si           : SOP2_Real_si <0x0e, S_AND_B32>;
-def S_AND_B64_si           : SOP2_Real_si <0x0f, S_AND_B64>;
-def S_OR_B32_si            : SOP2_Real_si <0x10, S_OR_B32>;
-def S_OR_B64_si            : SOP2_Real_si <0x11, S_OR_B64>;
-def S_XOR_B32_si           : SOP2_Real_si <0x12, S_XOR_B32>;
-def S_XOR_B64_si           : SOP2_Real_si <0x13, S_XOR_B64>;
-def S_ANDN2_B32_si         : SOP2_Real_si <0x14, S_ANDN2_B32>;
-def S_ANDN2_B64_si         : SOP2_Real_si <0x15, S_ANDN2_B64>;
-def S_ORN2_B32_si          : SOP2_Real_si <0x16, S_ORN2_B32>;
-def S_ORN2_B64_si          : SOP2_Real_si <0x17, S_ORN2_B64>;
-def S_NAND_B32_si          : SOP2_Real_si <0x18, S_NAND_B32>;
-def S_NAND_B64_si          : SOP2_Real_si <0x19, S_NAND_B64>;
-def S_NOR_B32_si           : SOP2_Real_si <0x1a, S_NOR_B32>;
-def S_NOR_B64_si           : SOP2_Real_si <0x1b, S_NOR_B64>;
-def S_XNOR_B32_si          : SOP2_Real_si <0x1c, S_XNOR_B32>;
-def S_XNOR_B64_si          : SOP2_Real_si <0x1d, S_XNOR_B64>;
-def S_LSHL_B32_si          : SOP2_Real_si <0x1e, S_LSHL_B32>;
-def S_LSHL_B64_si          : SOP2_Real_si <0x1f, S_LSHL_B64>;
-def S_LSHR_B32_si          : SOP2_Real_si <0x20, S_LSHR_B32>;
-def S_LSHR_B64_si          : SOP2_Real_si <0x21, S_LSHR_B64>;
-def S_ASHR_I32_si          : SOP2_Real_si <0x22, S_ASHR_I32>;
-def S_ASHR_I64_si          : SOP2_Real_si <0x23, S_ASHR_I64>;
-def S_BFM_B32_si           : SOP2_Real_si <0x24, S_BFM_B32>;
-def S_BFM_B64_si           : SOP2_Real_si <0x25, S_BFM_B64>;
-def S_MUL_I32_si           : SOP2_Real_si <0x26, S_MUL_I32>;
-def S_BFE_U32_si           : SOP2_Real_si <0x27, S_BFE_U32>;
-def S_BFE_I32_si           : SOP2_Real_si <0x28, S_BFE_I32>;
-def S_BFE_U64_si           : SOP2_Real_si <0x29, S_BFE_U64>;
-def S_BFE_I64_si           : SOP2_Real_si <0x2a, S_BFE_I64>;
-def S_CBRANCH_G_FORK_si    : SOP2_Real_si <0x2b, S_CBRANCH_G_FORK>;
-def S_ABSDIFF_I32_si       : SOP2_Real_si <0x2c, S_ABSDIFF_I32>;
-
-def S_MOVK_I32_si          : SOPK_Real_si <0x00, S_MOVK_I32>;
-def S_CMOVK_I32_si         : SOPK_Real_si <0x02, S_CMOVK_I32>;
-def S_CMPK_EQ_I32_si       : SOPK_Real_si <0x03, S_CMPK_EQ_I32>;
-def S_CMPK_LG_I32_si       : SOPK_Real_si <0x04, S_CMPK_LG_I32>;
-def S_CMPK_GT_I32_si       : SOPK_Real_si <0x05, S_CMPK_GT_I32>;
-def S_CMPK_GE_I32_si       : SOPK_Real_si <0x06, S_CMPK_GE_I32>;
-def S_CMPK_LT_I32_si       : SOPK_Real_si <0x07, S_CMPK_LT_I32>;
-def S_CMPK_LE_I32_si       : SOPK_Real_si <0x08, S_CMPK_LE_I32>;
-def S_CMPK_EQ_U32_si       : SOPK_Real_si <0x09, S_CMPK_EQ_U32>;
-def S_CMPK_LG_U32_si       : SOPK_Real_si <0x0a, S_CMPK_LG_U32>;
-def S_CMPK_GT_U32_si       : SOPK_Real_si <0x0b, S_CMPK_GT_U32>;
-def S_CMPK_GE_U32_si       : SOPK_Real_si <0x0c, S_CMPK_GE_U32>;
-def S_CMPK_LT_U32_si       : SOPK_Real_si <0x0d, S_CMPK_LT_U32>;
-def S_CMPK_LE_U32_si       : SOPK_Real_si <0x0e, S_CMPK_LE_U32>;
-def S_ADDK_I32_si          : SOPK_Real_si <0x0f, S_ADDK_I32>;
-def S_MULK_I32_si          : SOPK_Real_si <0x10, S_MULK_I32>;
-def S_CBRANCH_I_FORK_si    : SOPK_Real_si <0x11, S_CBRANCH_I_FORK>;
-def S_GETREG_B32_si        : SOPK_Real_si <0x12, S_GETREG_B32>;
-def S_SETREG_B32_si        : SOPK_Real_si <0x13, S_SETREG_B32>;
-//def S_GETREG_REGRD_B32_si  : SOPK_Real_si <0x14, S_GETREG_REGRD_B32>; // see pseudo for comments
-def S_SETREG_IMM32_B32_si  : SOPK_Real64<0x15, S_SETREG_IMM32_B32>,
-                             Select_si<S_SETREG_IMM32_B32.Mnemonic>;
+//===----------------------------------------------------------------------===//
+// SOP2 - GFX10.
+//===----------------------------------------------------------------------===//
+
+multiclass SOP2_Real_gfx10<bits<7> op> {
+  def _gfx10 : SOP2_Real<op, !cast<SOP2_Pseudo>(NAME)>,
+               Select_gfx10<!cast<SOP2_Pseudo>(NAME).Mnemonic>;
+}
+
+defm S_LSHL1_ADD_U32   : SOP2_Real_gfx10<0x02e>;
+defm S_LSHL2_ADD_U32   : SOP2_Real_gfx10<0x02f>;
+defm S_LSHL3_ADD_U32   : SOP2_Real_gfx10<0x030>;
+defm S_LSHL4_ADD_U32   : SOP2_Real_gfx10<0x031>;
+defm S_PACK_LL_B32_B16 : SOP2_Real_gfx10<0x032>;
+defm S_PACK_LH_B32_B16 : SOP2_Real_gfx10<0x033>;
+defm S_PACK_HH_B32_B16 : SOP2_Real_gfx10<0x034>;
+defm S_MUL_HI_U32      : SOP2_Real_gfx10<0x035>;
+defm S_MUL_HI_I32      : SOP2_Real_gfx10<0x036>;
+
+//===----------------------------------------------------------------------===//
+// SOP2 - GFX6, GFX7.
+//===----------------------------------------------------------------------===//
 
+multiclass SOP2_Real_gfx6_gfx7<bits<7> op> {
+  def _gfx6_gfx7 : SOP2_Real<op, !cast<SOP_Pseudo>(NAME)>,
+                   Select_gfx6_gfx7<!cast<SOP_Pseudo>(NAME).Mnemonic>;
+}
+
+multiclass SOP2_Real_gfx6_gfx7_gfx10<bits<7> op> :
+  SOP2_Real_gfx6_gfx7<op>, SOP2_Real_gfx10<op>;
+
+defm S_CBRANCH_G_FORK : SOP2_Real_gfx6_gfx7<0x02b>;
+
+defm S_ADD_U32     : SOP2_Real_gfx6_gfx7_gfx10<0x000>;
+defm S_SUB_U32     : SOP2_Real_gfx6_gfx7_gfx10<0x001>;
+defm S_ADD_I32     : SOP2_Real_gfx6_gfx7_gfx10<0x002>;
+defm S_SUB_I32     : SOP2_Real_gfx6_gfx7_gfx10<0x003>;
+defm S_ADDC_U32    : SOP2_Real_gfx6_gfx7_gfx10<0x004>;
+defm S_SUBB_U32    : SOP2_Real_gfx6_gfx7_gfx10<0x005>;
+defm S_MIN_I32     : SOP2_Real_gfx6_gfx7_gfx10<0x006>;
+defm S_MIN_U32     : SOP2_Real_gfx6_gfx7_gfx10<0x007>;
+defm S_MAX_I32     : SOP2_Real_gfx6_gfx7_gfx10<0x008>;
+defm S_MAX_U32     : SOP2_Real_gfx6_gfx7_gfx10<0x009>;
+defm S_CSELECT_B32 : SOP2_Real_gfx6_gfx7_gfx10<0x00a>;
+defm S_CSELECT_B64 : SOP2_Real_gfx6_gfx7_gfx10<0x00b>;
+defm S_AND_B32     : SOP2_Real_gfx6_gfx7_gfx10<0x00e>;
+defm S_AND_B64     : SOP2_Real_gfx6_gfx7_gfx10<0x00f>;
+defm S_OR_B32      : SOP2_Real_gfx6_gfx7_gfx10<0x010>;
+defm S_OR_B64      : SOP2_Real_gfx6_gfx7_gfx10<0x011>;
+defm S_XOR_B32     : SOP2_Real_gfx6_gfx7_gfx10<0x012>;
+defm S_XOR_B64     : SOP2_Real_gfx6_gfx7_gfx10<0x013>;
+defm S_ANDN2_B32   : SOP2_Real_gfx6_gfx7_gfx10<0x014>;
+defm S_ANDN2_B64   : SOP2_Real_gfx6_gfx7_gfx10<0x015>;
+defm S_ORN2_B32    : SOP2_Real_gfx6_gfx7_gfx10<0x016>;
+defm S_ORN2_B64    : SOP2_Real_gfx6_gfx7_gfx10<0x017>;
+defm S_NAND_B32    : SOP2_Real_gfx6_gfx7_gfx10<0x018>;
+defm S_NAND_B64    : SOP2_Real_gfx6_gfx7_gfx10<0x019>;
+defm S_NOR_B32     : SOP2_Real_gfx6_gfx7_gfx10<0x01a>;
+defm S_NOR_B64     : SOP2_Real_gfx6_gfx7_gfx10<0x01b>;
+defm S_XNOR_B32    : SOP2_Real_gfx6_gfx7_gfx10<0x01c>;
+defm S_XNOR_B64    : SOP2_Real_gfx6_gfx7_gfx10<0x01d>;
+defm S_LSHL_B32    : SOP2_Real_gfx6_gfx7_gfx10<0x01e>;
+defm S_LSHL_B64    : SOP2_Real_gfx6_gfx7_gfx10<0x01f>;
+defm S_LSHR_B32    : SOP2_Real_gfx6_gfx7_gfx10<0x020>;
+defm S_LSHR_B64    : SOP2_Real_gfx6_gfx7_gfx10<0x021>;
+defm S_ASHR_I32    : SOP2_Real_gfx6_gfx7_gfx10<0x022>;
+defm S_ASHR_I64    : SOP2_Real_gfx6_gfx7_gfx10<0x023>;
+defm S_BFM_B32     : SOP2_Real_gfx6_gfx7_gfx10<0x024>;
+defm S_BFM_B64     : SOP2_Real_gfx6_gfx7_gfx10<0x025>;
+defm S_MUL_I32     : SOP2_Real_gfx6_gfx7_gfx10<0x026>;
+defm S_BFE_U32     : SOP2_Real_gfx6_gfx7_gfx10<0x027>;
+defm S_BFE_I32     : SOP2_Real_gfx6_gfx7_gfx10<0x028>;
+defm S_BFE_U64     : SOP2_Real_gfx6_gfx7_gfx10<0x029>;
+defm S_BFE_I64     : SOP2_Real_gfx6_gfx7_gfx10<0x02a>;
+defm S_ABSDIFF_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x02c>;
+
+//===----------------------------------------------------------------------===//
+// SOPK - GFX10.
+//===----------------------------------------------------------------------===//
+
+multiclass SOPK_Real32_gfx10<bits<5> op> {
+  def _gfx10 : SOPK_Real32<op, !cast<SOPK_Pseudo>(NAME)>,
+               Select_gfx10<!cast<SOPK_Pseudo>(NAME).Mnemonic>;
+}
+
+multiclass SOPK_Real64_gfx10<bits<5> op> {
+  def _gfx10 : SOPK_Real64<op, !cast<SOPK_Pseudo>(NAME)>,
+               Select_gfx10<!cast<SOPK_Pseudo>(NAME).Mnemonic>;
+}
+
+defm S_VERSION              : SOPK_Real32_gfx10<0x001>;
+defm S_CALL_B64             : SOPK_Real32_gfx10<0x016>;
+defm S_WAITCNT_VSCNT        : SOPK_Real32_gfx10<0x017>;
+defm S_WAITCNT_VMCNT        : SOPK_Real32_gfx10<0x018>;
+defm S_WAITCNT_EXPCNT       : SOPK_Real32_gfx10<0x019>;
+defm S_WAITCNT_LGKMCNT      : SOPK_Real32_gfx10<0x01a>;
+defm S_SUBVECTOR_LOOP_BEGIN : SOPK_Real32_gfx10<0x01b>;
+defm S_SUBVECTOR_LOOP_END   : SOPK_Real32_gfx10<0x01c>;
+
+//===----------------------------------------------------------------------===//
+// SOPK - GFX6, GFX7.
+//===----------------------------------------------------------------------===//
+
+multiclass SOPK_Real32_gfx6_gfx7<bits<5> op> {
+  def _gfx6_gfx7 : SOPK_Real32<op, !cast<SOPK_Pseudo>(NAME)>,
+                   Select_gfx6_gfx7<!cast<SOPK_Pseudo>(NAME).Mnemonic>;
+}
+
+multiclass SOPK_Real64_gfx6_gfx7<bits<5> op> {
+  def _gfx6_gfx7 : SOPK_Real64<op, !cast<SOPK_Pseudo>(NAME)>,
+                   Select_gfx6_gfx7<!cast<SOPK_Pseudo>(NAME).Mnemonic>;
+}
+
+multiclass SOPK_Real32_gfx6_gfx7_gfx10<bits<5> op> :
+  SOPK_Real32_gfx6_gfx7<op>, SOPK_Real32_gfx10<op>;
+
+multiclass SOPK_Real64_gfx6_gfx7_gfx10<bits<5> op> :
+  SOPK_Real64_gfx6_gfx7<op>, SOPK_Real64_gfx10<op>;
+
+defm S_CBRANCH_I_FORK : SOPK_Real32_gfx6_gfx7<0x011>;
+
+defm S_MOVK_I32         : SOPK_Real32_gfx6_gfx7_gfx10<0x000>;
+defm S_CMOVK_I32        : SOPK_Real32_gfx6_gfx7_gfx10<0x002>;
+defm S_CMPK_EQ_I32      : SOPK_Real32_gfx6_gfx7_gfx10<0x003>;
+defm S_CMPK_LG_I32      : SOPK_Real32_gfx6_gfx7_gfx10<0x004>;
+defm S_CMPK_GT_I32      : SOPK_Real32_gfx6_gfx7_gfx10<0x005>;
+defm S_CMPK_GE_I32      : SOPK_Real32_gfx6_gfx7_gfx10<0x006>;
+defm S_CMPK_LT_I32      : SOPK_Real32_gfx6_gfx7_gfx10<0x007>;
+defm S_CMPK_LE_I32      : SOPK_Real32_gfx6_gfx7_gfx10<0x008>;
+defm S_CMPK_EQ_U32      : SOPK_Real32_gfx6_gfx7_gfx10<0x009>;
+defm S_CMPK_LG_U32      : SOPK_Real32_gfx6_gfx7_gfx10<0x00a>;
+defm S_CMPK_GT_U32      : SOPK_Real32_gfx6_gfx7_gfx10<0x00b>;
+defm S_CMPK_GE_U32      : SOPK_Real32_gfx6_gfx7_gfx10<0x00c>;
+defm S_CMPK_LT_U32      : SOPK_Real32_gfx6_gfx7_gfx10<0x00d>;
+defm S_CMPK_LE_U32      : SOPK_Real32_gfx6_gfx7_gfx10<0x00e>;
+defm S_ADDK_I32         : SOPK_Real32_gfx6_gfx7_gfx10<0x00f>;
+defm S_MULK_I32         : SOPK_Real32_gfx6_gfx7_gfx10<0x010>;
+defm S_GETREG_B32       : SOPK_Real32_gfx6_gfx7_gfx10<0x012>;
+defm S_SETREG_B32       : SOPK_Real32_gfx6_gfx7_gfx10<0x013>;
+defm S_SETREG_IMM32_B32 : SOPK_Real64_gfx6_gfx7_gfx10<0x015>;
+
+//===----------------------------------------------------------------------===//
+// GFX8, GFX9 (VI).
+//===----------------------------------------------------------------------===//
 
 class Select_vi<string opName> :
   SIMCInstr<opName, SIEncodingFamily.VI> {
-  list<Predicate> AssemblerPredicates = [isVI];
-  string DecoderNamespace = "VI";
+  list<Predicate> AssemblerPredicates = [isGFX8GFX9];
+  string DecoderNamespace = "GFX8";
 }
 
 class SOP1_Real_vi<bits<8> op, SOP1_Pseudo ps> :
diff --git a/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp b/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
index e4c442db3016..30cf12337c6e 100644
--- a/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
+++ b/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
@@ -1,9 +1,8 @@
 //===-- TargetInfo/AMDGPUTargetInfo.cpp - TargetInfo for AMDGPU -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -11,7 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "AMDGPUTargetMachine.h"
+#include "TargetInfo/AMDGPUTargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 
 using namespace llvm;
diff --git a/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.h b/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.h
new file mode 100644
index 000000000000..1e6dbd90b0c1
--- /dev/null
+++ b/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.h
@@ -0,0 +1,29 @@
+//===-- TargetInfo/AMDGPUTargetInfo.h - TargetInfo for AMDGPU ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_TARGETINFO_AMDGPUTARGETINFO_H
+#define LLVM_LIB_TARGET_AMDGPU_TARGETINFO_AMDGPUTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+/// The target which supports all AMD GPUs.  This will eventually
+///         be deprecated and there will be a R600 target and a GCN target.
+Target &getTheAMDGPUTarget();
+
+/// The target for GCN GPUs
+Target &getTheGCNTarget();
+
+}
+
+#endif // LLVM_LIB_TARGET_AMDGPU_TARGETINFO_AMDGPUTARGETINFO_H
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index 9eb4c6513cce..075e08986c0c 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -1,9 +1,8 @@
 //===-- AMDGPUAsmUtils.cpp - AsmParser/InstPrinter common -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #include "AMDGPUAsmUtils.h"
@@ -23,8 +22,8 @@ const char* const IdSymbolic[] = {
   nullptr,
   nullptr,
   nullptr,
-  nullptr,
-  nullptr,
+  "MSG_GS_ALLOC_REQ",
+  "MSG_GET_DOORBELL",
   nullptr,
   nullptr,
   nullptr,
@@ -69,7 +68,17 @@ const char* const IdSymbolic[] = {
   nullptr,
   nullptr,
   nullptr,
-  "HW_REG_SH_MEM_BASES"
+  "HW_REG_SH_MEM_BASES",
+  "HW_REG_TBA_LO",
+  "HW_REG_TBA_HI",
+  "HW_REG_TMA_LO",
+  "HW_REG_TMA_HI",
+  "HW_REG_FLAT_SCR_LO",
+  "HW_REG_FLAT_SCR_HI",
+  "HW_REG_XNACK_MASK",
+  nullptr, // HW_ID1, no predictable values
+  nullptr, // HW_ID2, no predictable values
+  "HW_REG_POPS_PACKER"
 };
 
 } // namespace Hwreg
@@ -86,5 +95,18 @@ const char* const IdSymbolic[] = {
 };
 
 } // namespace Swizzle
+
+namespace VGPRIndexMode {
+
+// This must be in sync with llvm::AMDGPU::VGPRIndexMode::Id enum members, see SIDefines.h.
+const char* const IdSymbolic[] = {
+  "SRC0",
+  "SRC1",
+  "SRC2",
+  "DST",
+};
+
+} // namespace VGPRIndexMode
+
 } // namespace AMDGPU
 } // namespace llvm
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h
index ebb2be22b487..cd91c5f6edd5 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h
@@ -1,9 +1,8 @@
 //===-- AMDGPUAsmUtils.h - AsmParser/InstPrinter common ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -31,6 +30,13 @@ namespace Swizzle { // Symbolic names for the swizzle(...) syntax.
 extern const char* const IdSymbolic[];
 
 } // namespace Swizzle
+
+namespace VGPRIndexMode { // Symbolic names for the gpr_idx(...) syntax.
+
+extern const char* const IdSymbolic[];
+
+} // namespace VGPRIndexMode
+
 } // namespace AMDGPU
 } // namespace llvm
 
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 54c866bdc63c..e90f40e6abea 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -11,6 +10,7 @@
 #include "AMDGPUTargetTransformInfo.h"
 #include "AMDGPU.h"
 #include "SIDefines.h"
+#include "AMDGPUAsmUtils.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/BinaryFormat/ELF.h"
@@ -85,7 +85,9 @@ unsigned getExpcntBitWidth() { return 3; }
 unsigned getLgkmcntBitShift() { return 8; }
 
 /// \returns Lgkmcnt bit width.
-unsigned getLgkmcntBitWidth() { return 4; }
+unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
+  return (VersionMajor >= 10) ? 6 : 4;
+}
 
 /// \returns Vmcnt bit shift (higher bits).
 unsigned getVmcntBitShiftHi() { return 14; }
@@ -99,18 +101,11 @@ namespace llvm {
 
 namespace AMDGPU {
 
-struct MIMGInfo {
-  uint16_t Opcode;
-  uint16_t BaseOpcode;
-  uint8_t MIMGEncoding;
-  uint8_t VDataDwords;
-  uint8_t VAddrDwords;
-};
-
 #define GET_MIMGBaseOpcodesTable_IMPL
 #define GET_MIMGDimInfoTable_IMPL
 #define GET_MIMGInfoTable_IMPL
 #define GET_MIMGLZMappingTable_IMPL
+#define GET_MIMGMIPMappingTable_IMPL
 #include "AMDGPUGenSearchableTables.inc"
 
 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
@@ -120,6 +115,11 @@ int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
   return Info ? Info->Opcode : -1;
 }
 
+const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
+  const MIMGInfo *Info = getMIMGInfo(Opc);
+  return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
+}
+
 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
   const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
   const MIMGInfo *NewInfo =
@@ -230,7 +230,8 @@ unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
 
 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
                                unsigned FlatWorkGroupSize) {
-  if (!STI->getFeatureBits().test(FeatureGCN))
+  assert(FlatWorkGroupSize != 0);
+  if (STI->getTargetTriple().getArch() != Triple::amdgcn)
     return 8;
   unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
   if (N == 1)
@@ -279,6 +280,8 @@ unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
 
 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
   IsaVersion Version = getIsaVersion(STI->getCPU());
+  if (Version.Major >= 10)
+    return getAddressableNumSGPRs(STI);
   if (Version.Major >= 8)
     return 16;
   return 8;
@@ -300,6 +303,8 @@ unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
 
   IsaVersion Version = getIsaVersion(STI->getCPU());
+  if (Version.Major >= 10)
+    return 106;
   if (Version.Major >= 8)
     return 102;
   return 104;
@@ -308,6 +313,10 @@ unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
   assert(WavesPerEU != 0);
 
+  IsaVersion Version = getIsaVersion(STI->getCPU());
+  if (Version.Major >= 10)
+    return 0;
+
   if (WavesPerEU >= getMaxWavesPerEU())
     return 0;
 
@@ -322,8 +331,10 @@ unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
                         bool Addressable) {
   assert(WavesPerEU != 0);
 
-  IsaVersion Version = getIsaVersion(STI->getCPU());
   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
+  IsaVersion Version = getIsaVersion(STI->getCPU());
+  if (Version.Major >= 10)
+    return Addressable ? AddressableNumSGPRs : 108;
   if (Version.Major >= 8 && !Addressable)
     AddressableNumSGPRs = 112;
   unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
@@ -340,6 +351,9 @@ unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
     ExtraSGPRs = 2;
 
   IsaVersion Version = getIsaVersion(STI->getCPU());
+  if (Version.Major >= 10)
+    return ExtraSGPRs;
+
   if (Version.Major < 8) {
     if (FlatScrUsed)
       ExtraSGPRs = 4;
@@ -366,12 +380,17 @@ unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
   return NumSGPRs / getSGPREncodingGranule(STI) - 1;
 }
 
-unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
-  return 4;
+unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
+                             Optional<bool> EnableWavefrontSize32) {
+  bool IsWave32 = EnableWavefrontSize32 ?
+      *EnableWavefrontSize32 :
+      STI->getFeatureBits().test(FeatureWavefrontSize32);
+  return IsWave32 ? 8 : 4;
 }
 
-unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) {
-  return getVGPRAllocGranule(STI);
+unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
+                                Optional<bool> EnableWavefrontSize32) {
+  return getVGPRAllocGranule(STI, EnableWavefrontSize32);
 }
 
 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
@@ -402,10 +421,12 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
 }
 
-unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
-  NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
+unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
+                          Optional<bool> EnableWavefrontSize32) {
+  NumVGPRs = alignTo(std::max(1u, NumVGPRs),
+                     getVGPREncodingGranule(STI, EnableWavefrontSize32));
   // VGPRBlocks is actual number of VGPR blocks minus 1.
-  return NumVGPRs / getVGPREncodingGranule(STI) - 1;
+  return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
 }
 
 } // end namespace IsaInfo
@@ -423,7 +444,6 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
   Header.amd_machine_version_minor = Version.Minor;
   Header.amd_machine_version_stepping = Version.Stepping;
   Header.kernel_code_entry_byte_offset = sizeof(Header);
-  // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
   Header.wavefront_size = 6;
 
   // If the code object does not support indirect functions, then the value must
@@ -435,11 +455,25 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
   Header.kernarg_segment_alignment = 4;
   Header.group_segment_alignment = 4;
   Header.private_segment_alignment = 4;
+
+  if (Version.Major >= 10) {
+    if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
+      Header.wavefront_size = 5;
+      Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
+    }
+    Header.compute_pgm_resource_registers |=
+      S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
+      S_00B848_MEM_ORDERED(1);
+  }
 }
 
-amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor() {
+amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
+    const MCSubtargetInfo *STI) {
+  IsaVersion Version = getIsaVersion(STI->getCPU());
+
   amdhsa::kernel_descriptor_t KD;
   memset(&KD, 0, sizeof(KD));
+
   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
                   amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
                   amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
@@ -449,6 +483,16 @@ amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor() {
                   amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
                   amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
+  if (Version.Major >= 10) {
+    AMDHSA_BITS_SET(KD.kernel_code_properties,
+                    amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
+                    STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
+    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+                    amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
+                    STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
+    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+                    amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
+  }
   return KD;
 }
 
@@ -523,13 +567,14 @@ unsigned getExpcntBitMask(const IsaVersion &Version) {
 }
 
 unsigned getLgkmcntBitMask(const IsaVersion &Version) {
-  return (1 << getLgkmcntBitWidth()) - 1;
+  return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
 }
 
 unsigned getWaitcntBitMask(const IsaVersion &Version) {
   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
   unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
-  unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
+  unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(),
+                                getLgkmcntBitWidth(Version.Major));
   unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
   if (Version.Major < 9)
     return Waitcnt;
@@ -555,7 +600,8 @@ unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
 }
 
 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
-  return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
+  return unpackBits(Waitcnt, getLgkmcntBitShift(),
+                    getLgkmcntBitWidth(Version.Major));
 }
 
 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
@@ -591,7 +637,8 @@ unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
 
 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
                        unsigned Lgkmcnt) {
-  return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
+  return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(),
+                                    getLgkmcntBitWidth(Version.Major));
 }
 
 unsigned encodeWaitcnt(const IsaVersion &Version,
@@ -607,6 +654,181 @@ unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
   return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
 }
 
+//===----------------------------------------------------------------------===//
+// hwreg
+//===----------------------------------------------------------------------===//
+
+namespace Hwreg {
+
+int64_t getHwregId(const StringRef Name) {
+  for (int Id = ID_SYMBOLIC_FIRST_; Id < ID_SYMBOLIC_LAST_; ++Id) {
+    if (IdSymbolic[Id] && Name == IdSymbolic[Id])
+      return Id;
+  }
+  return ID_UNKNOWN_;
+}
+
+static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) {
+  if (isSI(STI) || isCI(STI) || isVI(STI))
+    return ID_SYMBOLIC_FIRST_GFX9_;
+  else if (isGFX9(STI))
+    return ID_SYMBOLIC_FIRST_GFX10_;
+  else
+    return ID_SYMBOLIC_LAST_;
+}
+
+bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) {
+  return ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) &&
+         IdSymbolic[Id];
+}
+
+bool isValidHwreg(int64_t Id) {
+  return 0 <= Id && isUInt<ID_WIDTH_>(Id);
+}
+
+bool isValidHwregOffset(int64_t Offset) {
+  return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
+}
+
+bool isValidHwregWidth(int64_t Width) {
+  return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
+}
+
+uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) {
+  return (Id << ID_SHIFT_) |
+         (Offset << OFFSET_SHIFT_) |
+         ((Width - 1) << WIDTH_M1_SHIFT_);
+}
+
+StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
+  return isValidHwreg(Id, STI) ? IdSymbolic[Id] : "";
+}
+
+void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
+  Id = (Val & ID_MASK_) >> ID_SHIFT_;
+  Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
+  Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
+}
+
+} // namespace Hwreg
+
+//===----------------------------------------------------------------------===//
+// SendMsg
+//===----------------------------------------------------------------------===//
+
+namespace SendMsg {
+
+int64_t getMsgId(const StringRef Name) {
+  for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
+    if (IdSymbolic[i] && Name == IdSymbolic[i])
+      return i;
+  }
+  return ID_UNKNOWN_;
+}
+
+static bool isValidMsgId(int64_t MsgId) {
+  return (ID_GAPS_FIRST_ <= MsgId && MsgId < ID_GAPS_LAST_) && IdSymbolic[MsgId];
+}
+
+bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) {
+  if (Strict) {
+    if (MsgId == ID_GS_ALLOC_REQ || MsgId == ID_GET_DOORBELL)
+      return isGFX9(STI) || isGFX10(STI);
+    else
+      return isValidMsgId(MsgId);
+  } else {
+    return 0 <= MsgId && isUInt<ID_WIDTH_>(MsgId);
+  }
+}
+
+StringRef getMsgName(int64_t MsgId) {
+  return isValidMsgId(MsgId)? IdSymbolic[MsgId] : "";
+}
+
+int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
+  const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
+  const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
+  const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
+  for (int i = F; i < L; ++i) {
+    if (Name == S[i]) {
+      return i;
+    }
+  }
+  return OP_UNKNOWN_;
+}
+
+bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict) {
+
+  if (!Strict)
+    return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
+
+  switch(MsgId)
+  {
+  case ID_GS:
+    return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
+  case ID_GS_DONE:
+    return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
+  case ID_SYSMSG:
+    return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
+  default:
+    return OpId == OP_NONE_;
+  }
+}
+
+StringRef getMsgOpName(int64_t MsgId, int64_t OpId) {
+  assert(msgRequiresOp(MsgId));
+  return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
+}
+
+bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict) {
+
+  if (!Strict)
+    return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
+
+  switch(MsgId)
+  {
+  case ID_GS:
+    return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
+  case ID_GS_DONE:
+    return (OpId == OP_GS_NOP)?
+           (StreamId == STREAM_ID_NONE_) :
+           (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
+  default:
+    return StreamId == STREAM_ID_NONE_;
+  }
+}
+
+bool msgRequiresOp(int64_t MsgId) {
+  return MsgId == ID_GS || MsgId == ID_GS_DONE || MsgId == ID_SYSMSG;
+}
+
+bool msgSupportsStream(int64_t MsgId, int64_t OpId) {
+  return (MsgId == ID_GS || MsgId == ID_GS_DONE) && OpId != OP_GS_NOP;
+}
+
+void decodeMsg(unsigned Val,
+               uint16_t &MsgId,
+               uint16_t &OpId,
+               uint16_t &StreamId) {
+  MsgId = Val & ID_MASK_;
+  OpId = (Val & OP_MASK_) >> OP_SHIFT_;
+  StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
+}
+
+uint64_t encodeMsg(uint64_t MsgId,
+                   uint64_t OpId,
+                   uint64_t StreamId) {
+  return (MsgId << ID_SHIFT_) |
+         (OpId << OP_SHIFT_) |
+         (StreamId << STREAM_ID_SHIFT_);
+}
+
+} // namespace SendMsg
+
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
 unsigned getInitialPSInputAddr(const Function &F) {
   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
 }
@@ -679,6 +901,10 @@ bool isGFX9(const MCSubtargetInfo &STI) {
   return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
 }
 
+bool isGFX10(const MCSubtargetInfo &STI) {
+  return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
+}
+
 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
   return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
 }
@@ -704,46 +930,46 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
   CASE_CI_VI(FLAT_SCR) \
   CASE_CI_VI(FLAT_SCR_LO) \
   CASE_CI_VI(FLAT_SCR_HI) \
-  CASE_VI_GFX9(TTMP0) \
-  CASE_VI_GFX9(TTMP1) \
-  CASE_VI_GFX9(TTMP2) \
-  CASE_VI_GFX9(TTMP3) \
-  CASE_VI_GFX9(TTMP4) \
-  CASE_VI_GFX9(TTMP5) \
-  CASE_VI_GFX9(TTMP6) \
-  CASE_VI_GFX9(TTMP7) \
-  CASE_VI_GFX9(TTMP8) \
-  CASE_VI_GFX9(TTMP9) \
-  CASE_VI_GFX9(TTMP10) \
-  CASE_VI_GFX9(TTMP11) \
-  CASE_VI_GFX9(TTMP12) \
-  CASE_VI_GFX9(TTMP13) \
-  CASE_VI_GFX9(TTMP14) \
-  CASE_VI_GFX9(TTMP15) \
-  CASE_VI_GFX9(TTMP0_TTMP1) \
-  CASE_VI_GFX9(TTMP2_TTMP3) \
-  CASE_VI_GFX9(TTMP4_TTMP5) \
-  CASE_VI_GFX9(TTMP6_TTMP7) \
-  CASE_VI_GFX9(TTMP8_TTMP9) \
-  CASE_VI_GFX9(TTMP10_TTMP11) \
-  CASE_VI_GFX9(TTMP12_TTMP13) \
-  CASE_VI_GFX9(TTMP14_TTMP15) \
-  CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \
-  CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
-  CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
-  CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
-  CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
-  CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
-  CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
-  CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
+  CASE_VI_GFX9_GFX10(TTMP0) \
+  CASE_VI_GFX9_GFX10(TTMP1) \
+  CASE_VI_GFX9_GFX10(TTMP2) \
+  CASE_VI_GFX9_GFX10(TTMP3) \
+  CASE_VI_GFX9_GFX10(TTMP4) \
+  CASE_VI_GFX9_GFX10(TTMP5) \
+  CASE_VI_GFX9_GFX10(TTMP6) \
+  CASE_VI_GFX9_GFX10(TTMP7) \
+  CASE_VI_GFX9_GFX10(TTMP8) \
+  CASE_VI_GFX9_GFX10(TTMP9) \
+  CASE_VI_GFX9_GFX10(TTMP10) \
+  CASE_VI_GFX9_GFX10(TTMP11) \
+  CASE_VI_GFX9_GFX10(TTMP12) \
+  CASE_VI_GFX9_GFX10(TTMP13) \
+  CASE_VI_GFX9_GFX10(TTMP14) \
+  CASE_VI_GFX9_GFX10(TTMP15) \
+  CASE_VI_GFX9_GFX10(TTMP0_TTMP1) \
+  CASE_VI_GFX9_GFX10(TTMP2_TTMP3) \
+  CASE_VI_GFX9_GFX10(TTMP4_TTMP5) \
+  CASE_VI_GFX9_GFX10(TTMP6_TTMP7) \
+  CASE_VI_GFX9_GFX10(TTMP8_TTMP9) \
+  CASE_VI_GFX9_GFX10(TTMP10_TTMP11) \
+  CASE_VI_GFX9_GFX10(TTMP12_TTMP13) \
+  CASE_VI_GFX9_GFX10(TTMP14_TTMP15) \
+  CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3) \
+  CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7) \
+  CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11) \
+  CASE_VI_GFX9_GFX10(TTMP12_TTMP13_TTMP14_TTMP15) \
+  CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
+  CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
+  CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
+  CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
   }
 
 #define CASE_CI_VI(node) \
   assert(!isSI(STI)); \
   case node: return isCI(STI) ? node##_ci : node##_vi;
 
-#define CASE_VI_GFX9(node) \
-  case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
+#define CASE_VI_GFX9_GFX10(node) \
+  case node: return (isGFX9(STI) || isGFX10(STI)) ? node##_gfx9_gfx10 : node##_vi;
 
 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
   if (STI.getTargetTriple().getArch() == Triple::r600)
@@ -752,17 +978,17 @@ unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
 }
 
 #undef CASE_CI_VI
-#undef CASE_VI_GFX9
+#undef CASE_VI_GFX9_GFX10
 
 #define CASE_CI_VI(node)   case node##_ci: case node##_vi:   return node;
-#define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node;
+#define CASE_VI_GFX9_GFX10(node) case node##_vi: case node##_gfx9_gfx10: return node;
 
 unsigned mc2PseudoReg(unsigned Reg) {
   MAP_REG2REG
 }
 
 #undef CASE_CI_VI
-#undef CASE_VI_GFX9
+#undef CASE_VI_GFX9_GFX10
 #undef MAP_REG2REG
 
 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
@@ -779,10 +1005,17 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
   case AMDGPU::OPERAND_REG_IMM_FP32:
   case AMDGPU::OPERAND_REG_IMM_FP64:
   case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_V2FP16:
+  case AMDGPU::OPERAND_REG_IMM_V2INT16:
   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
+  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
     return true;
   default:
     return false;
@@ -802,28 +1035,46 @@ unsigned getRegBitWidth(unsigned RCID) {
   switch (RCID) {
   case AMDGPU::SGPR_32RegClassID:
   case AMDGPU::VGPR_32RegClassID:
+  case AMDGPU::VRegOrLds_32RegClassID:
+  case AMDGPU::AGPR_32RegClassID:
   case AMDGPU::VS_32RegClassID:
+  case AMDGPU::AV_32RegClassID:
   case AMDGPU::SReg_32RegClassID:
   case AMDGPU::SReg_32_XM0RegClassID:
+  case AMDGPU::SRegOrLds_32RegClassID:
     return 32;
   case AMDGPU::SGPR_64RegClassID:
   case AMDGPU::VS_64RegClassID:
+  case AMDGPU::AV_64RegClassID:
   case AMDGPU::SReg_64RegClassID:
   case AMDGPU::VReg_64RegClassID:
+  case AMDGPU::AReg_64RegClassID:
   case AMDGPU::SReg_64_XEXECRegClassID:
     return 64;
+  case AMDGPU::SGPR_96RegClassID:
+  case AMDGPU::SReg_96RegClassID:
   case AMDGPU::VReg_96RegClassID:
     return 96;
   case AMDGPU::SGPR_128RegClassID:
   case AMDGPU::SReg_128RegClassID:
   case AMDGPU::VReg_128RegClassID:
+  case AMDGPU::AReg_128RegClassID:
     return 128;
+  case AMDGPU::SGPR_160RegClassID:
+  case AMDGPU::SReg_160RegClassID:
+  case AMDGPU::VReg_160RegClassID:
+    return 160;
   case AMDGPU::SReg_256RegClassID:
   case AMDGPU::VReg_256RegClassID:
     return 256;
   case AMDGPU::SReg_512RegClassID:
   case AMDGPU::VReg_512RegClassID:
+  case AMDGPU::AReg_512RegClassID:
     return 512;
+  case AMDGPU::SReg_1024RegClassID:
+  case AMDGPU::VReg_1024RegClassID:
+  case AMDGPU::AReg_1024RegClassID:
+    return 1024;
   default:
     llvm_unreachable("Unexpected register class");
   }
@@ -905,6 +1156,13 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
   assert(HasInv2Pi);
 
+  if (isInt<16>(Literal) || isUInt<16>(Literal)) {
+    int16_t Trunc = static_cast<int16_t>(Literal);
+    return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi);
+  }
+  if (!(Literal & 0xffff))
+    return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi);
+
   int16_t Lo16 = static_cast<int16_t>(Literal);
   int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
   return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
@@ -936,15 +1194,19 @@ bool isArgPassedInSGPR(const Argument *A) {
   }
 }
 
+static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
+  return isGCN3Encoding(ST) || isGFX10(ST);
+}
+
 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
-  if (isGCN3Encoding(ST))
+  if (hasSMEMByteOffset(ST))
     return ByteOffset;
   return ByteOffset >> 2;
 }
 
 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
   int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
-  return isGCN3Encoding(ST) ?
+  return (hasSMEMByteOffset(ST)) ?
     isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
 }
 
@@ -994,6 +1256,19 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
   return true;
 }
 
+SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
+  *this = getDefaultForCallingConv(F.getCallingConv());
+
+  StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
+  if (!IEEEAttr.empty())
+    IEEE = IEEEAttr == "true";
+
+  StringRef DX10ClampAttr
+    = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
+  if (!DX10ClampAttr.empty())
+    DX10Clamp = DX10ClampAttr == "true";
+}
+
 namespace {
 
 struct SourceOfDivergence {
@@ -1009,5 +1284,6 @@ const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
   return lookupSourceOfDivergence(IntrID);
 }
+
 } // namespace AMDGPU
 } // namespace llvm
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 20123ed4ac81..209ef7eef749 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1,9 +1,8 @@
 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -46,6 +45,7 @@ namespace AMDGPU {
 #define GET_MIMGDim_DECL
 #define GET_MIMGEncoding_DECL
 #define GET_MIMGLZMapping_DECL
+#define GET_MIMGMIPMapping_DECL
 #include "AMDGPUGenSearchableTables.inc"
 
 namespace IsaInfo {
@@ -150,10 +150,18 @@ unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
 
 /// \returns VGPR allocation granularity for given subtarget \p STI.
-unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
+///
+/// For subtargets which support it, \p EnableWavefrontSize32 should match
+/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
+unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
+                             Optional<bool> EnableWavefrontSize32 = None);
 
 /// \returns VGPR encoding granularity for given subtarget \p STI.
-unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
+///
+/// For subtargets which support it, \p EnableWavefrontSize32 should match
+/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
+unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
+                                Optional<bool> EnableWavefrontSize32 = None);
 
 /// \returns Total number of VGPRs for given subtarget \p STI.
 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
@@ -171,13 +179,20 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
 
 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
 /// \p NumVGPRs are used.
-unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
+///
+/// For subtargets which support it, \p EnableWavefrontSize32 should match the
+/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
+unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
+                          Optional<bool> EnableWavefrontSize32 = None);
 
 } // end namespace IsaInfo
 
 LLVM_READONLY
 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
 
+LLVM_READONLY
+int getSOPPWithRelaxation(uint16_t Opcode);
+
 struct MIMGBaseOpcodeInfo {
   MIMGBaseOpcode BaseOpcode;
   bool Store;
@@ -201,19 +216,35 @@ struct MIMGDimInfo {
   uint8_t NumCoords;
   uint8_t NumGradients;
   bool DA;
+  uint8_t Encoding;
+  const char *AsmSuffix;
 };
 
 LLVM_READONLY
-const MIMGDimInfo *getMIMGDimInfo(unsigned Dim);
+const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
+
+LLVM_READONLY
+const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
+
+LLVM_READONLY
+const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
 
 struct MIMGLZMappingInfo {
   MIMGBaseOpcode L;
   MIMGBaseOpcode LZ;
 };
 
+struct MIMGMIPMappingInfo {
+  MIMGBaseOpcode MIP;
+  MIMGBaseOpcode NONMIP;
+};
+
 LLVM_READONLY
 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
 
+LLVM_READONLY
+const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned L);
+
 LLVM_READONLY
 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
                   unsigned VDataDwords, unsigned VAddrDwords);
@@ -221,6 +252,17 @@ int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
 LLVM_READONLY
 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
 
+struct MIMGInfo {
+  uint16_t Opcode;
+  uint16_t BaseOpcode;
+  uint8_t MIMGEncoding;
+  uint8_t VDataDwords;
+  uint8_t VAddrDwords;
+};
+
+LLVM_READONLY
+const MIMGInfo *getMIMGInfo(unsigned Opc);
+
 LLVM_READONLY
 int getMUBUFBaseOpcode(unsigned Opc);
 
@@ -245,7 +287,8 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen);
 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
                                const MCSubtargetInfo *STI);
 
-amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
+amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
+    const MCSubtargetInfo *STI);
 
 bool isGroupSegment(const GlobalValue *GV);
 bool isGlobalSegment(const GlobalValue *GV);
@@ -285,21 +328,30 @@ struct Waitcnt {
   unsigned VmCnt = ~0u;
   unsigned ExpCnt = ~0u;
   unsigned LgkmCnt = ~0u;
+  unsigned VsCnt = ~0u;
 
   Waitcnt() {}
-  Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt)
-      : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt) {}
+  Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
+      : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
+
+  static Waitcnt allZero(const IsaVersion &Version) {
+    return Waitcnt(0, 0, 0, Version.Major >= 10 ? 0 : ~0u);
+  }
+  static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
 
-  static Waitcnt allZero() { return Waitcnt(0, 0, 0); }
+  bool hasWait() const {
+    return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
+  }
 
   bool dominates(const Waitcnt &Other) const {
     return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
-           LgkmCnt <= Other.LgkmCnt;
+           LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
   }
 
   Waitcnt combined(const Waitcnt &Other) const {
     return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
-                   std::min(LgkmCnt, Other.LgkmCnt));
+                   std::min(LgkmCnt, Other.LgkmCnt),
+                   std::min(VsCnt, Other.VsCnt));
   }
 };
 
@@ -332,7 +384,8 @@ unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
 ///     \p Vmcnt = \p Waitcnt[3:0]                      (pre-gfx9 only)
 ///     \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14]  (gfx9+ only)
 ///     \p Expcnt = \p Waitcnt[6:4]
-///     \p Lgkmcnt = \p Waitcnt[11:8]
+///     \p Lgkmcnt = \p Waitcnt[11:8]                   (pre-gfx10 only)
+///     \p Lgkmcnt = \p Waitcnt[13:8]                   (gfx10+ only)
 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
 
@@ -357,7 +410,8 @@ unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9 only)
 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9+ only)
 ///     Waitcnt[6:4]   = \p Expcnt
-///     Waitcnt[11:8]  = \p Lgkmcnt
+///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10 only)
+///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10+ only)
 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9+ only)
 ///
 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
@@ -367,6 +421,75 @@ unsigned encodeWaitcnt(const IsaVersion &Version,
 
 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
 
+namespace Hwreg {
+
+LLVM_READONLY
+int64_t getHwregId(const StringRef Name);
+
+LLVM_READNONE
+bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI);
+
+LLVM_READNONE
+bool isValidHwreg(int64_t Id);
+
+LLVM_READNONE
+bool isValidHwregOffset(int64_t Offset);
+
+LLVM_READNONE
+bool isValidHwregWidth(int64_t Width);
+
+LLVM_READNONE
+uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
+
+LLVM_READNONE
+StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
+
+void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
+
+} // namespace Hwreg
+
+namespace SendMsg {
+
+LLVM_READONLY
+int64_t getMsgId(const StringRef Name);
+
+LLVM_READONLY
+int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
+
+LLVM_READNONE
+StringRef getMsgName(int64_t MsgId);
+
+LLVM_READNONE
+StringRef getMsgOpName(int64_t MsgId, int64_t OpId);
+
+LLVM_READNONE
+bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true);
+
+LLVM_READNONE
+bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict = true);
+
+LLVM_READNONE
+bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict = true);
+
+LLVM_READNONE
+bool msgRequiresOp(int64_t MsgId);
+
+LLVM_READNONE
+bool msgSupportsStream(int64_t MsgId, int64_t OpId);
+
+void decodeMsg(unsigned Val,
+               uint16_t &MsgId,
+               uint16_t &OpId,
+               uint16_t &StreamId);
+
+LLVM_READNONE
+uint64_t encodeMsg(uint64_t MsgId,
+                   uint64_t OpId,
+                   uint64_t StreamId);
+
+} // namespace SendMsg
+
+
 unsigned getInitialPSInputAddr(const Function &F);
 
 LLVM_READNONE
@@ -399,6 +522,7 @@ bool isSI(const MCSubtargetInfo &STI);
 bool isCI(const MCSubtargetInfo &STI);
 bool isVI(const MCSubtargetInfo &STI);
 bool isGFX9(const MCSubtargetInfo &STI);
+bool isGFX10(const MCSubtargetInfo &STI);
 
 /// Is Reg - scalar register
 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
@@ -440,6 +564,8 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
   case AMDGPU::OPERAND_REG_IMM_FP32:
   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
     return 4;
 
   case AMDGPU::OPERAND_REG_IMM_INT64:
@@ -454,6 +580,12 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
+  case AMDGPU::OPERAND_REG_IMM_V2INT16:
+  case AMDGPU::OPERAND_REG_IMM_V2FP16:
     return 2;
 
   default:
@@ -496,6 +628,45 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
 /// \returns true if the intrinsic is divergent
 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
 
+
+// Track defaults for fields in the MODE registser.
+struct SIModeRegisterDefaults {
+  /// Floating point opcodes that support exception flag gathering quiet and
+  /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
+  /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
+  /// quieting.
+  bool IEEE : 1;
+
+  /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
+  /// clamp NaN to zero; otherwise, pass NaN through.
+  bool DX10Clamp : 1;
+
+  // TODO: FP mode fields
+
+  SIModeRegisterDefaults() :
+    IEEE(true),
+    DX10Clamp(true) {}
+
+  SIModeRegisterDefaults(const Function &F);
+
+  static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
+    SIModeRegisterDefaults Mode;
+    Mode.DX10Clamp = true;
+    Mode.IEEE = AMDGPU::isCompute(CC);
+    return Mode;
+  }
+
+  bool operator ==(const SIModeRegisterDefaults Other) const {
+    return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
+  }
+
+  // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
+  // be able to override.
+  bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
+    return *this == CalleeMode;
+  }
+};
+
 } // end namespace AMDGPU
 } // end namespace llvm
 
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
new file mode 100644
index 000000000000..db20d5ccf5f9
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -0,0 +1,723 @@
+//===-- AMDGPUPALMetadata.cpp - Accumulate and print AMDGPU PAL metadata  -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// This class has methods called by AMDGPUAsmPrinter to accumulate and print
+/// the PAL metadata.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPUPALMetadata.h"
+#include "AMDGPU.h"
+#include "AMDGPUAsmPrinter.h"
+#include "MCTargetDesc/AMDGPUTargetStreamer.h"
+#include "SIDefines.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/Support/AMDGPUMetadata.h"
+#include "llvm/Support/EndianStream.h"
+
+using namespace llvm;
+using namespace llvm::AMDGPU;
+
+// Read the PAL metadata from IR metadata, where it was put by the frontend.
+void AMDGPUPALMetadata::readFromIR(Module &M) {
+  auto NamedMD = M.getNamedMetadata("amdgpu.pal.metadata.msgpack");
+  if (NamedMD && NamedMD->getNumOperands()) {
+    // This is the new msgpack format for metadata. It is a NamedMD containing
+    // an MDTuple containing an MDString containing the msgpack data.
+    BlobType = ELF::NT_AMDGPU_METADATA;
+    auto MDN = dyn_cast<MDTuple>(NamedMD->getOperand(0));
+    if (MDN && MDN->getNumOperands()) {
+      if (auto MDS = dyn_cast<MDString>(MDN->getOperand(0)))
+        setFromMsgPackBlob(MDS->getString());
+    }
+    return;
+  }
+  BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA;
+  NamedMD = M.getNamedMetadata("amdgpu.pal.metadata");
+  if (!NamedMD || !NamedMD->getNumOperands())
+    return;
+  // This is the old reg=value pair format for metadata. It is a NamedMD
+  // containing an MDTuple containing a number of MDNodes each of which is an
+  // integer value, and each two integer values forms a key=value pair that we
+  // store as Registers[key]=value in the map.
+  auto Tuple = dyn_cast<MDTuple>(NamedMD->getOperand(0));
+  if (!Tuple)
+    return;
+  for (unsigned I = 0, E = Tuple->getNumOperands() & -2; I != E; I += 2) {
+    auto Key = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I));
+    auto Val = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I + 1));
+    if (!Key || !Val)
+      continue;
+    setRegister(Key->getZExtValue(), Val->getZExtValue());
+  }
+}
+
+// Set PAL metadata from a binary blob from the applicable .note record.
+// Returns false if bad format.  Blob must remain valid for the lifetime of the
+// Metadata.
+bool AMDGPUPALMetadata::setFromBlob(unsigned Type, StringRef Blob) {
+  BlobType = Type;
+  if (Type == ELF::NT_AMD_AMDGPU_PAL_METADATA)
+    return setFromLegacyBlob(Blob);
+  return setFromMsgPackBlob(Blob);
+}
+
+// Set PAL metadata from legacy (array of key=value pairs) blob.
+bool AMDGPUPALMetadata::setFromLegacyBlob(StringRef Blob) {
+  auto Data = reinterpret_cast<const uint32_t *>(Blob.data());
+  for (unsigned I = 0; I != Blob.size() / sizeof(uint32_t) / 2; ++I)
+    setRegister(Data[I * 2], Data[I * 2 + 1]);
+  return true;
+}
+
+// Set PAL metadata from msgpack blob.
+bool AMDGPUPALMetadata::setFromMsgPackBlob(StringRef Blob) {
+  msgpack::Reader Reader(Blob);
+  return MsgPackDoc.readFromBlob(Blob, /*Multi=*/false);
+}
+
+// Given the calling convention, calculate the register number for rsrc1. In
+// principle the register number could change in future hardware, but we know
+// it is the same for gfx6-9 (except that LS and ES don't exist on gfx9), so
+// we can use fixed values.
+static unsigned getRsrc1Reg(CallingConv::ID CC) {
+  switch (CC) {
+  default:
+    return PALMD::R_2E12_COMPUTE_PGM_RSRC1;
+  case CallingConv::AMDGPU_LS:
+    return PALMD::R_2D4A_SPI_SHADER_PGM_RSRC1_LS;
+  case CallingConv::AMDGPU_HS:
+    return PALMD::R_2D0A_SPI_SHADER_PGM_RSRC1_HS;
+  case CallingConv::AMDGPU_ES:
+    return PALMD::R_2CCA_SPI_SHADER_PGM_RSRC1_ES;
+  case CallingConv::AMDGPU_GS:
+    return PALMD::R_2C8A_SPI_SHADER_PGM_RSRC1_GS;
+  case CallingConv::AMDGPU_VS:
+    return PALMD::R_2C4A_SPI_SHADER_PGM_RSRC1_VS;
+  case CallingConv::AMDGPU_PS:
+    return PALMD::R_2C0A_SPI_SHADER_PGM_RSRC1_PS;
+  }
+}
+
+// Calculate the PAL metadata key for *S_SCRATCH_SIZE. It can be used
+// with a constant offset to access any non-register shader-specific PAL
+// metadata key.
+static unsigned getScratchSizeKey(CallingConv::ID CC) {
+  switch (CC) {
+  case CallingConv::AMDGPU_PS:
+    return PALMD::Key::PS_SCRATCH_SIZE;
+  case CallingConv::AMDGPU_VS:
+    return PALMD::Key::VS_SCRATCH_SIZE;
+  case CallingConv::AMDGPU_GS:
+    return PALMD::Key::GS_SCRATCH_SIZE;
+  case CallingConv::AMDGPU_ES:
+    return PALMD::Key::ES_SCRATCH_SIZE;
+  case CallingConv::AMDGPU_HS:
+    return PALMD::Key::HS_SCRATCH_SIZE;
+  case CallingConv::AMDGPU_LS:
+    return PALMD::Key::LS_SCRATCH_SIZE;
+  default:
+    return PALMD::Key::CS_SCRATCH_SIZE;
+  }
+}
+
+// Set the rsrc1 register in the metadata for a particular shader stage.
+// In fact this ORs the value into any previous setting of the register.
+void AMDGPUPALMetadata::setRsrc1(CallingConv::ID CC, unsigned Val) {
+  setRegister(getRsrc1Reg(CC), Val);
+}
+
+// Set the rsrc2 register in the metadata for a particular shader stage.
+// In fact this ORs the value into any previous setting of the register.
+void AMDGPUPALMetadata::setRsrc2(CallingConv::ID CC, unsigned Val) {
+  setRegister(getRsrc1Reg(CC) + 1, Val);
+}
+
+// Set the SPI_PS_INPUT_ENA register in the metadata.
+// In fact this ORs the value into any previous setting of the register.
+void AMDGPUPALMetadata::setSpiPsInputEna(unsigned Val) {
+  setRegister(PALMD::R_A1B3_SPI_PS_INPUT_ENA, Val);
+}
+
+// Set the SPI_PS_INPUT_ADDR register in the metadata.
+// In fact this ORs the value into any previous setting of the register.
+void AMDGPUPALMetadata::setSpiPsInputAddr(unsigned Val) {
+  setRegister(PALMD::R_A1B4_SPI_PS_INPUT_ADDR, Val);
+}
+
+// Get a register from the metadata, or 0 if not currently set.
+unsigned AMDGPUPALMetadata::getRegister(unsigned Reg) {
+  auto Regs = getRegisters();
+  auto It = Regs.find(MsgPackDoc.getNode(Reg));
+  if (It == Regs.end())
+    return 0;
+  auto N = It->second;
+  if (N.getKind() != msgpack::Type::UInt)
+    return 0;
+  return N.getUInt();
+}
+
+// Set a register in the metadata.
+// In fact this ORs the value into any previous setting of the register.
+void AMDGPUPALMetadata::setRegister(unsigned Reg, unsigned Val) {
+  if (!isLegacy()) {
+    // In the new MsgPack format, ignore register numbered >= 0x10000000. It
+    // is a PAL ABI pseudo-register in the old non-MsgPack format.
+    if (Reg >= 0x10000000)
+      return;
+  }
+  auto &N = getRegisters()[MsgPackDoc.getNode(Reg)];
+  if (N.getKind() == msgpack::Type::UInt)
+    Val |= N.getUInt();
+  N = N.getDocument()->getNode(Val);
+}
+
+// Set the entry point name for one shader.
+void AMDGPUPALMetadata::setEntryPoint(unsigned CC, StringRef Name) {
+  if (isLegacy())
+    return;
+  // Msgpack format.
+  getHwStage(CC)[".entry_point"] = MsgPackDoc.getNode(Name, /*Copy=*/true);
+}
+
+// Set the number of used vgprs in the metadata. This is an optional
+// advisory record for logging etc; wave dispatch actually uses the rsrc1
+// register for the shader stage to determine the number of vgprs to
+// allocate.
+void AMDGPUPALMetadata::setNumUsedVgprs(CallingConv::ID CC, unsigned Val) {
+  if (isLegacy()) {
+    // Old non-msgpack format.
+    unsigned NumUsedVgprsKey = getScratchSizeKey(CC) +
+                               PALMD::Key::VS_NUM_USED_VGPRS -
+                               PALMD::Key::VS_SCRATCH_SIZE;
+    setRegister(NumUsedVgprsKey, Val);
+    return;
+  }
+  // Msgpack format.
+  getHwStage(CC)[".vgpr_count"] = MsgPackDoc.getNode(Val);
+}
+
+// Set the number of used sgprs in the metadata. This is an optional advisory
+// record for logging etc; wave dispatch actually uses the rsrc1 register for
+// the shader stage to determine the number of sgprs to allocate.
+void AMDGPUPALMetadata::setNumUsedSgprs(CallingConv::ID CC, unsigned Val) {
+  if (isLegacy()) {
+    // Old non-msgpack format.
+    unsigned NumUsedSgprsKey = getScratchSizeKey(CC) +
+                               PALMD::Key::VS_NUM_USED_SGPRS -
+                               PALMD::Key::VS_SCRATCH_SIZE;
+    setRegister(NumUsedSgprsKey, Val);
+    return;
+  }
+  // Msgpack format.
+  getHwStage(CC)[".sgpr_count"] = MsgPackDoc.getNode(Val);
+}
+
+// Set the scratch size in the metadata.
+void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) {
+  if (isLegacy()) {
+    // Old non-msgpack format.
+    setRegister(getScratchSizeKey(CC), Val);
+    return;
+  }
+  // Msgpack format.
+  getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val);
+}
+
+// Set the hardware register bit in PAL metadata to enable wave32 on the
+// shader of the given calling convention.
+void AMDGPUPALMetadata::setWave32(unsigned CC) {
+  switch (CC) {
+  case CallingConv::AMDGPU_HS:
+    setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_HS_W32_EN(1));
+    break;
+  case CallingConv::AMDGPU_GS:
+    setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_GS_W32_EN(1));
+    break;
+  case CallingConv::AMDGPU_VS:
+    setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_VS_W32_EN(1));
+    break;
+  case CallingConv::AMDGPU_PS:
+    setRegister(PALMD::R_A1B6_SPI_PS_IN_CONTROL, S_0286D8_PS_W32_EN(1));
+    break;
+  case CallingConv::AMDGPU_CS:
+    setRegister(PALMD::R_2E00_COMPUTE_DISPATCH_INITIATOR,
+                S_00B800_CS_W32_EN(1));
+    break;
+  }
+}
+
+// Convert a register number to name, for display by toString().
+// Returns nullptr if none.
+static const char *getRegisterName(unsigned RegNum) {
+  // Table of registers.
+  static const struct RegInfo {
+    unsigned Num;
+    const char *Name;
+  } RegInfoTable[] = {
+      // Registers that code generation sets/modifies metadata for.
+      {PALMD::R_2C4A_SPI_SHADER_PGM_RSRC1_VS, "SPI_SHADER_PGM_RSRC1_VS"},
+      {PALMD::R_2C4A_SPI_SHADER_PGM_RSRC1_VS + 1, "SPI_SHADER_PGM_RSRC2_VS"},
+      {PALMD::R_2D4A_SPI_SHADER_PGM_RSRC1_LS, "SPI_SHADER_PGM_RSRC1_LS"},
+      {PALMD::R_2D4A_SPI_SHADER_PGM_RSRC1_LS + 1, "SPI_SHADER_PGM_RSRC2_LS"},
+      {PALMD::R_2D0A_SPI_SHADER_PGM_RSRC1_HS, "SPI_SHADER_PGM_RSRC1_HS"},
+      {PALMD::R_2D0A_SPI_SHADER_PGM_RSRC1_HS + 1, "SPI_SHADER_PGM_RSRC2_HS"},
+      {PALMD::R_2CCA_SPI_SHADER_PGM_RSRC1_ES, "SPI_SHADER_PGM_RSRC1_ES"},
+      {PALMD::R_2CCA_SPI_SHADER_PGM_RSRC1_ES + 1, "SPI_SHADER_PGM_RSRC2_ES"},
+      {PALMD::R_2C8A_SPI_SHADER_PGM_RSRC1_GS, "SPI_SHADER_PGM_RSRC1_GS"},
+      {PALMD::R_2C8A_SPI_SHADER_PGM_RSRC1_GS + 1, "SPI_SHADER_PGM_RSRC2_GS"},
+      {PALMD::R_2E00_COMPUTE_DISPATCH_INITIATOR, "COMPUTE_DISPATCH_INITIATOR"},
+      {PALMD::R_2E12_COMPUTE_PGM_RSRC1, "COMPUTE_PGM_RSRC1"},
+      {PALMD::R_2E12_COMPUTE_PGM_RSRC1 + 1, "COMPUTE_PGM_RSRC2"},
+      {PALMD::R_2C0A_SPI_SHADER_PGM_RSRC1_PS, "SPI_SHADER_PGM_RSRC1_PS"},
+      {PALMD::R_2C0A_SPI_SHADER_PGM_RSRC1_PS + 1, "SPI_SHADER_PGM_RSRC2_PS"},
+      {PALMD::R_A1B3_SPI_PS_INPUT_ENA, "SPI_PS_INPUT_ENA"},
+      {PALMD::R_A1B4_SPI_PS_INPUT_ADDR, "SPI_PS_INPUT_ADDR"},
+      {PALMD::R_A1B6_SPI_PS_IN_CONTROL, "SPI_PS_IN_CONTROL"},
+      {PALMD::R_A2D5_VGT_SHADER_STAGES_EN, "VGT_SHADER_STAGES_EN"},
+
+      // Registers not known to code generation.
+      {0x2c07, "SPI_SHADER_PGM_RSRC3_PS"},
+      {0x2c46, "SPI_SHADER_PGM_RSRC3_VS"},
+      {0x2c87, "SPI_SHADER_PGM_RSRC3_GS"},
+      {0x2cc7, "SPI_SHADER_PGM_RSRC3_ES"},
+      {0x2d07, "SPI_SHADER_PGM_RSRC3_HS"},
+      {0x2d47, "SPI_SHADER_PGM_RSRC3_LS"},
+
+      {0xa1c3, "SPI_SHADER_POS_FORMAT"},
+      {0xa1b1, "SPI_VS_OUT_CONFIG"},
+      {0xa207, "PA_CL_VS_OUT_CNTL"},
+      {0xa204, "PA_CL_CLIP_CNTL"},
+      {0xa206, "PA_CL_VTE_CNTL"},
+      {0xa2f9, "PA_SU_VTX_CNTL"},
+      {0xa293, "PA_SC_MODE_CNTL_1"},
+      {0xa2a1, "VGT_PRIMITIVEID_EN"},
+      {0x2c81, "SPI_SHADER_PGM_RSRC4_GS"},
+      {0x2e18, "COMPUTE_TMPRING_SIZE"},
+      {0xa1b5, "SPI_INTERP_CONTROL_0"},
+      {0xa1ba, "SPI_TMPRING_SIZE"},
+      {0xa1c4, "SPI_SHADER_Z_FORMAT"},
+      {0xa1c5, "SPI_SHADER_COL_FORMAT"},
+      {0xa203, "DB_SHADER_CONTROL"},
+      {0xa08f, "CB_SHADER_MASK"},
+      {0xa191, "SPI_PS_INPUT_CNTL_0"},
+      {0xa192, "SPI_PS_INPUT_CNTL_1"},
+      {0xa193, "SPI_PS_INPUT_CNTL_2"},
+      {0xa194, "SPI_PS_INPUT_CNTL_3"},
+      {0xa195, "SPI_PS_INPUT_CNTL_4"},
+      {0xa196, "SPI_PS_INPUT_CNTL_5"},
+      {0xa197, "SPI_PS_INPUT_CNTL_6"},
+      {0xa198, "SPI_PS_INPUT_CNTL_7"},
+      {0xa199, "SPI_PS_INPUT_CNTL_8"},
+      {0xa19a, "SPI_PS_INPUT_CNTL_9"},
+      {0xa19b, "SPI_PS_INPUT_CNTL_10"},
+      {0xa19c, "SPI_PS_INPUT_CNTL_11"},
+      {0xa19d, "SPI_PS_INPUT_CNTL_12"},
+      {0xa19e, "SPI_PS_INPUT_CNTL_13"},
+      {0xa19f, "SPI_PS_INPUT_CNTL_14"},
+      {0xa1a0, "SPI_PS_INPUT_CNTL_15"},
+      {0xa1a1, "SPI_PS_INPUT_CNTL_16"},
+      {0xa1a2, "SPI_PS_INPUT_CNTL_17"},
+      {0xa1a3, "SPI_PS_INPUT_CNTL_18"},
+      {0xa1a4, "SPI_PS_INPUT_CNTL_19"},
+      {0xa1a5, "SPI_PS_INPUT_CNTL_20"},
+      {0xa1a6, "SPI_PS_INPUT_CNTL_21"},
+      {0xa1a7, "SPI_PS_INPUT_CNTL_22"},
+      {0xa1a8, "SPI_PS_INPUT_CNTL_23"},
+      {0xa1a9, "SPI_PS_INPUT_CNTL_24"},
+      {0xa1aa, "SPI_PS_INPUT_CNTL_25"},
+      {0xa1ab, "SPI_PS_INPUT_CNTL_26"},
+      {0xa1ac, "SPI_PS_INPUT_CNTL_27"},
+      {0xa1ad, "SPI_PS_INPUT_CNTL_28"},
+      {0xa1ae, "SPI_PS_INPUT_CNTL_29"},
+      {0xa1af, "SPI_PS_INPUT_CNTL_30"},
+      {0xa1b0, "SPI_PS_INPUT_CNTL_31"},
+
+      {0xa2ce, "VGT_GS_MAX_VERT_OUT"},
+      {0xa2ab, "VGT_ESGS_RING_ITEMSIZE"},
+      {0xa290, "VGT_GS_MODE"},
+      {0xa291, "VGT_GS_ONCHIP_CNTL"},
+      {0xa2d7, "VGT_GS_VERT_ITEMSIZE"},
+      {0xa2d8, "VGT_GS_VERT_ITEMSIZE_1"},
+      {0xa2d9, "VGT_GS_VERT_ITEMSIZE_2"},
+      {0xa2da, "VGT_GS_VERT_ITEMSIZE_3"},
+      {0xa298, "VGT_GSVS_RING_OFFSET_1"},
+      {0xa299, "VGT_GSVS_RING_OFFSET_2"},
+      {0xa29a, "VGT_GSVS_RING_OFFSET_3"},
+
+      {0xa2e4, "VGT_GS_INSTANCE_CNT"},
+      {0xa297, "VGT_GS_PER_VS"},
+      {0xa29b, "VGT_GS_OUT_PRIM_TYPE"},
+      {0xa2ac, "VGT_GSVS_RING_ITEMSIZE"},
+
+      {0xa2ad, "VGT_REUSE_OFF"},
+      {0xa1b8, "SPI_BARYC_CNTL"},
+
+      {0x2c4c, "SPI_SHADER_USER_DATA_VS_0"},
+      {0x2c4d, "SPI_SHADER_USER_DATA_VS_1"},
+      {0x2c4e, "SPI_SHADER_USER_DATA_VS_2"},
+      {0x2c4f, "SPI_SHADER_USER_DATA_VS_3"},
+      {0x2c50, "SPI_SHADER_USER_DATA_VS_4"},
+      {0x2c51, "SPI_SHADER_USER_DATA_VS_5"},
+      {0x2c52, "SPI_SHADER_USER_DATA_VS_6"},
+      {0x2c53, "SPI_SHADER_USER_DATA_VS_7"},
+      {0x2c54, "SPI_SHADER_USER_DATA_VS_8"},
+      {0x2c55, "SPI_SHADER_USER_DATA_VS_9"},
+      {0x2c56, "SPI_SHADER_USER_DATA_VS_10"},
+      {0x2c57, "SPI_SHADER_USER_DATA_VS_11"},
+      {0x2c58, "SPI_SHADER_USER_DATA_VS_12"},
+      {0x2c59, "SPI_SHADER_USER_DATA_VS_13"},
+      {0x2c5a, "SPI_SHADER_USER_DATA_VS_14"},
+      {0x2c5b, "SPI_SHADER_USER_DATA_VS_15"},
+      {0x2c5c, "SPI_SHADER_USER_DATA_VS_16"},
+      {0x2c5d, "SPI_SHADER_USER_DATA_VS_17"},
+      {0x2c5e, "SPI_SHADER_USER_DATA_VS_18"},
+      {0x2c5f, "SPI_SHADER_USER_DATA_VS_19"},
+      {0x2c60, "SPI_SHADER_USER_DATA_VS_20"},
+      {0x2c61, "SPI_SHADER_USER_DATA_VS_21"},
+      {0x2c62, "SPI_SHADER_USER_DATA_VS_22"},
+      {0x2c63, "SPI_SHADER_USER_DATA_VS_23"},
+      {0x2c64, "SPI_SHADER_USER_DATA_VS_24"},
+      {0x2c65, "SPI_SHADER_USER_DATA_VS_25"},
+      {0x2c66, "SPI_SHADER_USER_DATA_VS_26"},
+      {0x2c67, "SPI_SHADER_USER_DATA_VS_27"},
+      {0x2c68, "SPI_SHADER_USER_DATA_VS_28"},
+      {0x2c69, "SPI_SHADER_USER_DATA_VS_29"},
+      {0x2c6a, "SPI_SHADER_USER_DATA_VS_30"},
+      {0x2c6b, "SPI_SHADER_USER_DATA_VS_31"},
+
+      {0x2ccc, "SPI_SHADER_USER_DATA_ES_0"},
+      {0x2ccd, "SPI_SHADER_USER_DATA_ES_1"},
+      {0x2cce, "SPI_SHADER_USER_DATA_ES_2"},
+      {0x2ccf, "SPI_SHADER_USER_DATA_ES_3"},
+      {0x2cd0, "SPI_SHADER_USER_DATA_ES_4"},
+      {0x2cd1, "SPI_SHADER_USER_DATA_ES_5"},
+      {0x2cd2, "SPI_SHADER_USER_DATA_ES_6"},
+      {0x2cd3, "SPI_SHADER_USER_DATA_ES_7"},
+      {0x2cd4, "SPI_SHADER_USER_DATA_ES_8"},
+      {0x2cd5, "SPI_SHADER_USER_DATA_ES_9"},
+      {0x2cd6, "SPI_SHADER_USER_DATA_ES_10"},
+      {0x2cd7, "SPI_SHADER_USER_DATA_ES_11"},
+      {0x2cd8, "SPI_SHADER_USER_DATA_ES_12"},
+      {0x2cd9, "SPI_SHADER_USER_DATA_ES_13"},
+      {0x2cda, "SPI_SHADER_USER_DATA_ES_14"},
+      {0x2cdb, "SPI_SHADER_USER_DATA_ES_15"},
+      {0x2cdc, "SPI_SHADER_USER_DATA_ES_16"},
+      {0x2cdd, "SPI_SHADER_USER_DATA_ES_17"},
+      {0x2cde, "SPI_SHADER_USER_DATA_ES_18"},
+      {0x2cdf, "SPI_SHADER_USER_DATA_ES_19"},
+      {0x2ce0, "SPI_SHADER_USER_DATA_ES_20"},
+      {0x2ce1, "SPI_SHADER_USER_DATA_ES_21"},
+      {0x2ce2, "SPI_SHADER_USER_DATA_ES_22"},
+      {0x2ce3, "SPI_SHADER_USER_DATA_ES_23"},
+      {0x2ce4, "SPI_SHADER_USER_DATA_ES_24"},
+      {0x2ce5, "SPI_SHADER_USER_DATA_ES_25"},
+      {0x2ce6, "SPI_SHADER_USER_DATA_ES_26"},
+      {0x2ce7, "SPI_SHADER_USER_DATA_ES_27"},
+      {0x2ce8, "SPI_SHADER_USER_DATA_ES_28"},
+      {0x2ce9, "SPI_SHADER_USER_DATA_ES_29"},
+      {0x2cea, "SPI_SHADER_USER_DATA_ES_30"},
+      {0x2ceb, "SPI_SHADER_USER_DATA_ES_31"},
+
+      {0x2c0c, "SPI_SHADER_USER_DATA_PS_0"},
+      {0x2c0d, "SPI_SHADER_USER_DATA_PS_1"},
+      {0x2c0e, "SPI_SHADER_USER_DATA_PS_2"},
+      {0x2c0f, "SPI_SHADER_USER_DATA_PS_3"},
+      {0x2c10, "SPI_SHADER_USER_DATA_PS_4"},
+      {0x2c11, "SPI_SHADER_USER_DATA_PS_5"},
+      {0x2c12, "SPI_SHADER_USER_DATA_PS_6"},
+      {0x2c13, "SPI_SHADER_USER_DATA_PS_7"},
+      {0x2c14, "SPI_SHADER_USER_DATA_PS_8"},
+      {0x2c15, "SPI_SHADER_USER_DATA_PS_9"},
+      {0x2c16, "SPI_SHADER_USER_DATA_PS_10"},
+      {0x2c17, "SPI_SHADER_USER_DATA_PS_11"},
+      {0x2c18, "SPI_SHADER_USER_DATA_PS_12"},
+      {0x2c19, "SPI_SHADER_USER_DATA_PS_13"},
+      {0x2c1a, "SPI_SHADER_USER_DATA_PS_14"},
+      {0x2c1b, "SPI_SHADER_USER_DATA_PS_15"},
+      {0x2c1c, "SPI_SHADER_USER_DATA_PS_16"},
+      {0x2c1d, "SPI_SHADER_USER_DATA_PS_17"},
+      {0x2c1e, "SPI_SHADER_USER_DATA_PS_18"},
+      {0x2c1f, "SPI_SHADER_USER_DATA_PS_19"},
+      {0x2c20, "SPI_SHADER_USER_DATA_PS_20"},
+      {0x2c21, "SPI_SHADER_USER_DATA_PS_21"},
+      {0x2c22, "SPI_SHADER_USER_DATA_PS_22"},
+      {0x2c23, "SPI_SHADER_USER_DATA_PS_23"},
+      {0x2c24, "SPI_SHADER_USER_DATA_PS_24"},
+      {0x2c25, "SPI_SHADER_USER_DATA_PS_25"},
+      {0x2c26, "SPI_SHADER_USER_DATA_PS_26"},
+      {0x2c27, "SPI_SHADER_USER_DATA_PS_27"},
+      {0x2c28, "SPI_SHADER_USER_DATA_PS_28"},
+      {0x2c29, "SPI_SHADER_USER_DATA_PS_29"},
+      {0x2c2a, "SPI_SHADER_USER_DATA_PS_30"},
+      {0x2c2b, "SPI_SHADER_USER_DATA_PS_31"},
+
+      {0x2e40, "COMPUTE_USER_DATA_0"},
+      {0x2e41, "COMPUTE_USER_DATA_1"},
+      {0x2e42, "COMPUTE_USER_DATA_2"},
+      {0x2e43, "COMPUTE_USER_DATA_3"},
+      {0x2e44, "COMPUTE_USER_DATA_4"},
+      {0x2e45, "COMPUTE_USER_DATA_5"},
+      {0x2e46, "COMPUTE_USER_DATA_6"},
+      {0x2e47, "COMPUTE_USER_DATA_7"},
+      {0x2e48, "COMPUTE_USER_DATA_8"},
+      {0x2e49, "COMPUTE_USER_DATA_9"},
+      {0x2e4a, "COMPUTE_USER_DATA_10"},
+      {0x2e4b, "COMPUTE_USER_DATA_11"},
+      {0x2e4c, "COMPUTE_USER_DATA_12"},
+      {0x2e4d, "COMPUTE_USER_DATA_13"},
+      {0x2e4e, "COMPUTE_USER_DATA_14"},
+      {0x2e4f, "COMPUTE_USER_DATA_15"},
+
+      {0x2e07, "COMPUTE_NUM_THREAD_X"},
+      {0x2e08, "COMPUTE_NUM_THREAD_Y"},
+      {0x2e09, "COMPUTE_NUM_THREAD_Z"},
+      {0xa2db, "VGT_TF_PARAM"},
+      {0xa2d6, "VGT_LS_HS_CONFIG"},
+      {0xa287, "VGT_HOS_MIN_TESS_LEVEL"},
+      {0xa286, "VGT_HOS_MAX_TESS_LEVEL"},
+      {0xa2f8, "PA_SC_AA_CONFIG"},
+      {0xa310, "PA_SC_SHADER_CONTROL"},
+      {0xa313, "PA_SC_CONSERVATIVE_RASTERIZATION_CNTL"},
+
+      {0x2d0c, "SPI_SHADER_USER_DATA_LS_0"},
+      {0x2d0d, "SPI_SHADER_USER_DATA_LS_1"},
+      {0x2d0e, "SPI_SHADER_USER_DATA_LS_2"},
+      {0x2d0f, "SPI_SHADER_USER_DATA_LS_3"},
+      {0x2d10, "SPI_SHADER_USER_DATA_LS_4"},
+      {0x2d11, "SPI_SHADER_USER_DATA_LS_5"},
+      {0x2d12, "SPI_SHADER_USER_DATA_LS_6"},
+      {0x2d13, "SPI_SHADER_USER_DATA_LS_7"},
+      {0x2d14, "SPI_SHADER_USER_DATA_LS_8"},
+      {0x2d15, "SPI_SHADER_USER_DATA_LS_9"},
+      {0x2d16, "SPI_SHADER_USER_DATA_LS_10"},
+      {0x2d17, "SPI_SHADER_USER_DATA_LS_11"},
+      {0x2d18, "SPI_SHADER_USER_DATA_LS_12"},
+      {0x2d19, "SPI_SHADER_USER_DATA_LS_13"},
+      {0x2d1a, "SPI_SHADER_USER_DATA_LS_14"},
+      {0x2d1b, "SPI_SHADER_USER_DATA_LS_15"},
+      {0x2d1c, "SPI_SHADER_USER_DATA_LS_16"},
+      {0x2d1d, "SPI_SHADER_USER_DATA_LS_17"},
+      {0x2d1e, "SPI_SHADER_USER_DATA_LS_18"},
+      {0x2d1f, "SPI_SHADER_USER_DATA_LS_19"},
+      {0x2d20, "SPI_SHADER_USER_DATA_LS_20"},
+      {0x2d21, "SPI_SHADER_USER_DATA_LS_21"},
+      {0x2d22, "SPI_SHADER_USER_DATA_LS_22"},
+      {0x2d23, "SPI_SHADER_USER_DATA_LS_23"},
+      {0x2d24, "SPI_SHADER_USER_DATA_LS_24"},
+      {0x2d25, "SPI_SHADER_USER_DATA_LS_25"},
+      {0x2d26, "SPI_SHADER_USER_DATA_LS_26"},
+      {0x2d27, "SPI_SHADER_USER_DATA_LS_27"},
+      {0x2d28, "SPI_SHADER_USER_DATA_LS_28"},
+      {0x2d29, "SPI_SHADER_USER_DATA_LS_29"},
+      {0x2d2a, "SPI_SHADER_USER_DATA_LS_30"},
+      {0x2d2b, "SPI_SHADER_USER_DATA_LS_31"},
+
+      {0xa2aa, "IA_MULTI_VGT_PARAM"},
+      {0xa2a5, "VGT_GS_MAX_PRIMS_PER_SUBGROUP"},
+      {0xa2e6, "VGT_STRMOUT_BUFFER_CONFIG"},
+      {0xa2e5, "VGT_STRMOUT_CONFIG"},
+      {0xa2b5, "VGT_STRMOUT_VTX_STRIDE_0"},
+      {0xa2b9, "VGT_STRMOUT_VTX_STRIDE_1"},
+      {0xa2bd, "VGT_STRMOUT_VTX_STRIDE_2"},
+      {0xa2c1, "VGT_STRMOUT_VTX_STRIDE_3"},
+      {0xa316, "VGT_VERTEX_REUSE_BLOCK_CNTL"},
+
+      {0, nullptr}};
+  auto Entry = RegInfoTable;
+  for (; Entry->Num && Entry->Num != RegNum; ++Entry)
+    ;
+  return Entry->Name;
+}
+
+// Convert the accumulated PAL metadata into an asm directive.
+void AMDGPUPALMetadata::toString(std::string &String) {
+  String.clear();
+  if (!BlobType)
+    return;
+  raw_string_ostream Stream(String);
+  if (isLegacy()) {
+    if (MsgPackDoc.getRoot().getKind() == msgpack::Type::Nil)
+      return;
+    // Old linear reg=val format.
+    Stream << '\t' << AMDGPU::PALMD::AssemblerDirective << ' ';
+    auto Regs = getRegisters();
+    for (auto I = Regs.begin(), E = Regs.end(); I != E; ++I) {
+      if (I != Regs.begin())
+        Stream << ',';
+      unsigned Reg = I->first.getUInt();
+      unsigned Val = I->second.getUInt();
+      Stream << "0x" << Twine::utohexstr(Reg) << ",0x" << Twine::utohexstr(Val);
+    }
+    Stream << '\n';
+    return;
+  }
+
+  // New msgpack-based format -- output as YAML (with unsigned numbers in hex),
+  // but first change the registers map to use names.
+  MsgPackDoc.setHexMode();
+  auto &RegsObj = refRegisters();
+  auto OrigRegs = RegsObj.getMap();
+  RegsObj = MsgPackDoc.getMapNode();
+  for (auto I : OrigRegs) {
+    auto Key = I.first;
+    if (const char *RegName = getRegisterName(Key.getUInt())) {
+      std::string KeyName = Key.toString();
+      KeyName += " (";
+      KeyName += RegName;
+      KeyName += ')';
+      Key = MsgPackDoc.getNode(KeyName, /*Copy=*/true);
+    }
+    RegsObj.getMap()[Key] = I.second;
+  }
+
+  // Output as YAML.
+  Stream << '\t' << AMDGPU::PALMD::AssemblerDirectiveBegin << '\n';
+  MsgPackDoc.toYAML(Stream);
+  Stream << '\t' << AMDGPU::PALMD::AssemblerDirectiveEnd << '\n';
+
+  // Restore original registers map.
+  RegsObj = OrigRegs;
+}
+
+// Convert the accumulated PAL metadata into a binary blob for writing as
+// a .note record of the specified AMD type. Returns an empty blob if
+// there is no PAL metadata,
+void AMDGPUPALMetadata::toBlob(unsigned Type, std::string &Blob) {
+  if (Type == ELF::NT_AMD_AMDGPU_PAL_METADATA)
+    toLegacyBlob(Blob);
+  else if (Type)
+    toMsgPackBlob(Blob);
+}
+
+void AMDGPUPALMetadata::toLegacyBlob(std::string &Blob) {
+  Blob.clear();
+  auto Registers = getRegisters();
+  if (Registers.getMap().empty())
+    return;
+  raw_string_ostream OS(Blob);
+  support::endian::Writer EW(OS, support::endianness::little);
+  for (auto I : Registers.getMap()) {
+    EW.write(uint32_t(I.first.getUInt()));
+    EW.write(uint32_t(I.second.getUInt()));
+  }
+}
+
+void AMDGPUPALMetadata::toMsgPackBlob(std::string &Blob) {
+  Blob.clear();
+  MsgPackDoc.writeToBlob(Blob);
+}
+
+// Set PAL metadata from YAML text. Returns false if failed.
+bool AMDGPUPALMetadata::setFromString(StringRef S) {
+  BlobType = ELF::NT_AMDGPU_METADATA;
+  if (!MsgPackDoc.fromYAML(S))
+    return false;
+
+  // In the registers map, some keys may be of the form "0xa191
+  // (SPI_PS_INPUT_CNTL_0)", in which case the YAML input code made it a
+  // string. We need to turn it into a number.
+  auto &RegsObj = refRegisters();
+  auto OrigRegs = RegsObj;
+  RegsObj = MsgPackDoc.getMapNode();
+  Registers = RegsObj.getMap();
+  bool Ok = true;
+  for (auto I : OrigRegs.getMap()) {
+    auto Key = I.first;
+    if (Key.getKind() == msgpack::Type::String) {
+      StringRef S = Key.getString();
+      uint64_t Val;
+      if (S.consumeInteger(0, Val)) {
+        Ok = false;
+        errs() << "Unrecognized PAL metadata register key '" << S << "'\n";
+        continue;
+      }
+      Key = MsgPackDoc.getNode(uint64_t(Val));
+    }
+    Registers.getMap()[Key] = I.second;
+  }
+  return Ok;
+}
+
+// Reference (create if necessary) the node for the registers map.
+msgpack::DocNode &AMDGPUPALMetadata::refRegisters() {
+  auto &N =
+      MsgPackDoc.getRoot()
+          .getMap(/*Convert=*/true)[MsgPackDoc.getNode("amdpal.pipelines")]
+          .getArray(/*Convert=*/true)[0]
+          .getMap(/*Convert=*/true)[MsgPackDoc.getNode(".registers")];
+  N.getMap(/*Convert=*/true);
+  return N;
+}
+
+// Get (create if necessary) the registers map.
+msgpack::MapDocNode AMDGPUPALMetadata::getRegisters() {
+  if (Registers.isEmpty())
+    Registers = refRegisters();
+  return Registers.getMap();
+}
+
+// Return the PAL metadata hardware shader stage name.
+static const char *getStageName(CallingConv::ID CC) {
+  switch (CC) {
+  case CallingConv::AMDGPU_PS:
+    return ".ps";
+  case CallingConv::AMDGPU_VS:
+    return ".vs";
+  case CallingConv::AMDGPU_GS:
+    return ".gs";
+  case CallingConv::AMDGPU_ES:
+    return ".es";
+  case CallingConv::AMDGPU_HS:
+    return ".hs";
+  case CallingConv::AMDGPU_LS:
+    return ".ls";
+  default:
+    return ".cs";
+  }
+}
+
+// Get (create if necessary) the .hardware_stages entry for the given calling
+// convention.
+msgpack::MapDocNode AMDGPUPALMetadata::getHwStage(unsigned CC) {
+  if (HwStages.isEmpty())
+    HwStages = MsgPackDoc.getRoot()
+                   .getMap(/*Convert=*/true)["amdpal.pipelines"]
+                   .getArray(/*Convert=*/true)[0]
+                   .getMap(/*Convert=*/true)[".hardware_stages"]
+                   .getMap(/*Convert=*/true);
+  return HwStages.getMap()[getStageName(CC)].getMap(/*Convert=*/true);
+}
+
+// Get .note record vendor name of metadata blob to be emitted.
+const char *AMDGPUPALMetadata::getVendor() const {
+  return isLegacy() ? ElfNote::NoteNameV2 : ElfNote::NoteNameV3;
+}
+
+// Get .note record type of metadata blob to be emitted:
+// ELF::NT_AMD_AMDGPU_PAL_METADATA (legacy key=val format), or
+// ELF::NT_AMDGPU_METADATA (MsgPack format), or
+// 0 (no PAL metadata).
+unsigned AMDGPUPALMetadata::getType() const {
+  return BlobType;
+}
+
+// Return whether the blob type is legacy PAL metadata.
+bool AMDGPUPALMetadata::isLegacy() const {
+  return BlobType == ELF::NT_AMD_AMDGPU_PAL_METADATA;
+}
+
+// Set legacy PAL metadata format.
+void AMDGPUPALMetadata::setLegacy() {
+  BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA;
+}
+
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
new file mode 100644
index 000000000000..0f17c157b206
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
@@ -0,0 +1,135 @@
+//===-- AMDGPUPALMetadata.h - PAL metadata handling -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// PAL metadata handling
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/MsgPackDocument.h"
+#include <map>
+
+namespace llvm {
+
+class AMDGPUTargetStreamer;
+class formatted_raw_ostream;
+class MCStreamer;
+class Module;
+
+class AMDGPUPALMetadata {
+  unsigned BlobType = 0;
+  msgpack::Document MsgPackDoc;
+  msgpack::DocNode Registers;
+  msgpack::DocNode HwStages;
+
+public:
+  // Read the amdgpu.pal.metadata supplied by the frontend, ready for
+  // per-function modification.
+  void readFromIR(Module &M);
+
+  // Set PAL metadata from a binary blob from the applicable .note record.
+  // Returns false if bad format.  Blob must remain valid for the lifetime of
+  // the Metadata.
+  bool setFromBlob(unsigned Type, StringRef Blob);
+
+  // Set the rsrc1 register in the metadata for a particular shader stage.
+  // In fact this ORs the value into any previous setting of the register.
+  void setRsrc1(unsigned CC, unsigned Val);
+
+  // Set the rsrc2 register in the metadata for a particular shader stage.
+  // In fact this ORs the value into any previous setting of the register.
+  void setRsrc2(unsigned CC, unsigned Val);
+
+  // Set the SPI_PS_INPUT_ENA register in the metadata.
+  // In fact this ORs the value into any previous setting of the register.
+  void setSpiPsInputEna(unsigned Val);
+
+  // Set the SPI_PS_INPUT_ADDR register in the metadata.
+  // In fact this ORs the value into any previous setting of the register.
+  void setSpiPsInputAddr(unsigned Val);
+
+  // Get a register from the metadata, or 0 if not currently set.
+  unsigned getRegister(unsigned Reg);
+
+  // Set a register in the metadata.
+  // In fact this ORs the value into any previous setting of the register.
+  void setRegister(unsigned Reg, unsigned Val);
+
+  // Set the entry point name for one shader.
+  void setEntryPoint(unsigned CC, StringRef Name);
+
+  // Set the number of used vgprs in the metadata. This is an optional advisory
+  // record for logging etc; wave dispatch actually uses the rsrc1 register for
+  // the shader stage to determine the number of vgprs to allocate.
+  void setNumUsedVgprs(unsigned CC, unsigned Val);
+
+  // Set the number of used sgprs in the metadata. This is an optional advisory
+  // record for logging etc; wave dispatch actually uses the rsrc1 register for
+  // the shader stage to determine the number of sgprs to allocate.
+  void setNumUsedSgprs(unsigned CC, unsigned Val);
+
+  // Set the scratch size in the metadata.
+  void setScratchSize(unsigned CC, unsigned Val);
+
+  // Set the hardware register bit in PAL metadata to enable wave32 on the
+  // shader of the given calling convention.
+  void setWave32(unsigned CC);
+
+  // Emit the accumulated PAL metadata as asm directives.
+  // This is called from AMDGPUTargetAsmStreamer::Finish().
+  void toString(std::string &S);
+
+  // Set PAL metadata from YAML text.
+  bool setFromString(StringRef S);
+
+  // Get .note record vendor name of metadata blob to be emitted.
+  const char *getVendor() const;
+
+  // Get .note record type of metadata blob to be emitted:
+  // ELF::NT_AMD_AMDGPU_PAL_METADATA (legacy key=val format), or
+  // ELF::NT_AMDGPU_METADATA (MsgPack format), or
+  // 0 (no PAL metadata).
+  unsigned getType() const;
+
+  // Emit the accumulated PAL metadata as a binary blob.
+  // This is called from AMDGPUTargetELFStreamer::Finish().
+  void toBlob(unsigned Type, std::string &S);
+
+  // Get the msgpack::Document for the PAL metadata.
+  msgpack::Document *getMsgPackDoc() { return &MsgPackDoc; }
+
+  // Set legacy PAL metadata format.
+  void setLegacy();
+
+private:
+  // Return whether the blob type is legacy PAL metadata.
+  bool isLegacy() const;
+
+  // Reference (create if necessary) the node for the registers map.
+  msgpack::DocNode &refRegisters();
+
+  // Get (create if necessary) the registers map.
+  msgpack::MapDocNode getRegisters();
+
+  // Get (create if necessary) the .hardware_stages entry for the given calling
+  // convention.
+  msgpack::MapDocNode getHwStage(unsigned CC);
+
+  bool setFromLegacyBlob(StringRef Blob);
+  bool setFromMsgPackBlob(StringRef Blob);
+  void toLegacyBlob(std::string &Blob);
+  void toMsgPackBlob(std::string &Blob);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
diff --git a/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h b/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
index 82ffdef8e674..95ad3f35d18f 100644
--- a/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
@@ -1,9 +1,8 @@
 //===--------------------- AMDKernelCodeTInfo.h ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -83,6 +82,9 @@ COMPPGM1(priv,                            compute_pgm_rsrc1_priv,           PRIV
 COMPPGM1(enable_dx10_clamp,               compute_pgm_rsrc1_dx10_clamp,     DX10_CLAMP),
 COMPPGM1(debug_mode,                      compute_pgm_rsrc1_debug_mode,     DEBUG_MODE),
 COMPPGM1(enable_ieee_mode,                compute_pgm_rsrc1_ieee_mode,      IEEE_MODE),
+COMPPGM1(enable_wgp_mode,                 compute_pgm_rsrc1_wgp_mode,       WGP_MODE),
+COMPPGM1(enable_mem_ordered,              compute_pgm_rsrc1_mem_ordered,    MEM_ORDERED),
+COMPPGM1(enable_fwd_progress,             compute_pgm_rsrc1_fwd_progress,   FWD_PROGRESS),
 // TODO: bulky
 // TODO: cdbg_user
 COMPPGM2(enable_sgpr_private_segment_wave_byte_offset, compute_pgm_rsrc2_scratch_en, SCRATCH_EN),
@@ -107,6 +109,7 @@ CODEPROP(enable_sgpr_private_segment_size,    ENABLE_SGPR_PRIVATE_SEGMENT_SIZE),
 CODEPROP(enable_sgpr_grid_workgroup_count_x,  ENABLE_SGPR_GRID_WORKGROUP_COUNT_X),
 CODEPROP(enable_sgpr_grid_workgroup_count_y,  ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y),
 CODEPROP(enable_sgpr_grid_workgroup_count_z,  ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z),
+CODEPROP(enable_wavefront_size32,             ENABLE_WAVEFRONT_SIZE32),
 CODEPROP(enable_ordered_append_gds,           ENABLE_ORDERED_APPEND_GDS),
 CODEPROP(private_element_size,                PRIVATE_ELEMENT_SIZE),
 CODEPROP(is_ptr64,                            IS_PTR64),
diff --git a/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp b/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
index 20059f4a1ed7..443e2cc45ac0 100644
--- a/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
@@ -1,9 +1,8 @@
 //===- AMDKernelCodeTUtils.cpp --------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h b/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
index ef9f9bdb6bcb..a87325a78df3 100644
--- a/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
+++ b/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
@@ -1,9 +1,8 @@
 //===- AMDGPUKernelCodeTUtils.h - helpers for amd_kernel_code_t -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/VIInstrFormats.td b/lib/Target/AMDGPU/VIInstrFormats.td
index 1fd1c1e21527..bd65a495fa72 100644
--- a/lib/Target/AMDGPU/VIInstrFormats.td
+++ b/lib/Target/AMDGPU/VIInstrFormats.td
@@ -1,9 +1,8 @@
 //===-- VIInstrFormats.td - VI Instruction Encodings ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AMDGPU/VIInstructions.td b/lib/Target/AMDGPU/VIInstructions.td
index b45c8fc9c7d5..ec7d8875a746 100644
--- a/lib/Target/AMDGPU/VIInstructions.td
+++ b/lib/Target/AMDGPU/VIInstructions.td
@@ -1,9 +1,8 @@
 //===-- VIInstructions.td - VI Instruction Defintions ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Instruction definitions for VI and newer.
diff --git a/lib/Target/AMDGPU/VOP1Instructions.td b/lib/Target/AMDGPU/VOP1Instructions.td
index 68446ab79720..6bc416ed7d4b 100644
--- a/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/lib/Target/AMDGPU/VOP1Instructions.td
@@ -1,9 +1,8 @@
 //===-- VOP1Instructions.td - Vector Instruction Defintions ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,7 +14,7 @@ class VOP1e <bits<8> op, VOPProfile P> : Enc32 {
   bits<8> vdst;
   bits<9> src0;
 
-  let Inst{8-0}   = !if(P.HasSrc0, src0{8-0}, 0);
+  let Inst{8-0}   = !if(P.HasSrc0, src0{8-0}, ?);
   let Inst{16-9}  = op;
   let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
   let Inst{31-25} = 0x3f; //encoding
@@ -48,7 +47,6 @@ class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1On
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let SubtargetPredicate = isGCN;
 
   let VOP1 = 1;
   let VALU = 1;
@@ -144,7 +142,7 @@ defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>;
 // TODO: Make profile for this, there is VOP3 encoding also
 def V_READFIRSTLANE_B32 :
   InstSI <(outs SReg_32:$vdst),
-    (ins VGPR_32:$src0),
+    (ins VRegOrLds_32:$src0),
     "v_readfirstlane_b32 $vdst, $src0",
     [(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))]>,
   Enc32 {
@@ -156,7 +154,6 @@ def V_READFIRSTLANE_B32 :
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let SubtargetPredicate = isGCN;
 
   let VOP1 = 1;
   let VALU = 1;
@@ -172,9 +169,16 @@ def V_READFIRSTLANE_B32 :
   let Inst{31-25} = 0x3f; //encoding
 }
 
-let SchedRW = [WriteQuarterRate32] in {
-defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>;
+let SchedRW = [WriteDoubleCvt] in {
+defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64,  fp_to_sint>;
 defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>;
+defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64,  fpround>;
+defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32,  fpextend>;
+defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64,  fp_to_uint>;
+defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>;
+} // End SchedRW = [WriteDoubleCvt]
+
+let SchedRW = [WriteQuarterRate32] in {
 defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>;
 defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>;
 defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
@@ -186,15 +190,12 @@ defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>;
 defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
 defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
 defm V_CVT_OFF_F32_I4 : VOP1Inst  <"v_cvt_off_f32_i4", VOP1_F32_I32>;
-defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>;
-defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>;
+} // End SchedRW = [WriteQuarterRate32]
+
 defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>;
 defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>;
 defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>;
 defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>;
-defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>;
-defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>;
-} // End SchedRW = [WriteQuarterRate32]
 
 defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>;
 defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>;
@@ -271,6 +272,7 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
   let InsDPP = (ins DstRC:$vdst, DstRC:$old, Src0RC32:$src0,
                     dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                     bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+  let InsDPP16 = !con(InsDPP, (ins FI:$fi));
 
   let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
                      clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused,
@@ -279,6 +281,7 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
   let Asm32 = getAsm32<1, 1>.ret;
   let Asm64 = getAsm64<1, 1, 0, 0, 1>.ret;
   let AsmDPP = getAsmDPP<1, 1, 0>.ret;
+  let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret;
   let AsmSDWA = getAsmSDWA<1, 1>.ret;
   let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret;
 
@@ -305,41 +308,43 @@ defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>;
 
 defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>;
 
-// These instruction only exist on SI and CI
-let SubtargetPredicate = isSICI in {
-
-let SchedRW = [WriteQuarterRate32] in {
-defm V_LOG_CLAMP_F32 : VOP1Inst <"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>;
-defm V_RCP_CLAMP_F32 : VOP1Inst <"v_rcp_clamp_f32", VOP_F32_F32>;
-defm V_RCP_LEGACY_F32 : VOP1Inst <"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>;
-defm V_RSQ_CLAMP_F32 : VOP1Inst <"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>;
-defm V_RSQ_LEGACY_F32 : VOP1Inst <"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>;
-} // End SchedRW = [WriteQuarterRate32]
-
-let SchedRW = [WriteDouble] in {
-defm V_RCP_CLAMP_F64 : VOP1Inst <"v_rcp_clamp_f64", VOP_F64_F64>;
-defm V_RSQ_CLAMP_F64 : VOP1Inst <"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>;
-} // End SchedRW = [WriteDouble]
-
-} // End SubtargetPredicate = isSICI
-
-
-let SubtargetPredicate = isCIVI in {
-
-let SchedRW = [WriteDoubleAdd] in {
-defm V_TRUNC_F64 : VOP1Inst <"v_trunc_f64", VOP_F64_F64, ftrunc>;
-defm V_CEIL_F64 : VOP1Inst <"v_ceil_f64", VOP_F64_F64, fceil>;
-defm V_FLOOR_F64 : VOP1Inst <"v_floor_f64", VOP_F64_F64, ffloor>;
-defm V_RNDNE_F64 : VOP1Inst <"v_rndne_f64", VOP_F64_F64, frint>;
-} // End SchedRW = [WriteDoubleAdd]
-
-let SchedRW = [WriteQuarterRate32] in {
-defm V_LOG_LEGACY_F32 : VOP1Inst <"v_log_legacy_f32", VOP_F32_F32>;
-defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>;
-} // End SchedRW = [WriteQuarterRate32]
-
-} // End SubtargetPredicate = isCIVI
-
+let SubtargetPredicate = isGFX6GFX7 in {
+  let SchedRW = [WriteQuarterRate32] in {
+    defm V_LOG_CLAMP_F32 :
+      VOP1Inst<"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>;
+    defm V_RCP_CLAMP_F32 :
+      VOP1Inst<"v_rcp_clamp_f32", VOP_F32_F32>;
+    defm V_RCP_LEGACY_F32 :
+      VOP1Inst<"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>;
+    defm V_RSQ_CLAMP_F32 :
+      VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>;
+    defm V_RSQ_LEGACY_F32 :
+      VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>;
+  } // End SchedRW = [WriteQuarterRate32]
+
+  let SchedRW = [WriteDouble] in {
+    defm V_RCP_CLAMP_F64 :
+      VOP1Inst<"v_rcp_clamp_f64", VOP_F64_F64>;
+    defm V_RSQ_CLAMP_F64 :
+      VOP1Inst<"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>;
+  } // End SchedRW = [WriteDouble]
+} // End SubtargetPredicate = isGFX6GFX7
+
+let SubtargetPredicate = isGFX7GFX8GFX9 in {
+  let SchedRW = [WriteQuarterRate32] in {
+    defm V_LOG_LEGACY_F32 : VOP1Inst<"v_log_legacy_f32", VOP_F32_F32>;
+    defm V_EXP_LEGACY_F32 : VOP1Inst<"v_exp_legacy_f32", VOP_F32_F32>;
+  } // End SchedRW = [WriteQuarterRate32]
+} // End SubtargetPredicate = isGFX7GFX8GFX9
+
+let SubtargetPredicate = isGFX7Plus in {
+  let SchedRW = [WriteDoubleAdd] in {
+    defm V_TRUNC_F64 : VOP1Inst<"v_trunc_f64", VOP_F64_F64, ftrunc>;
+    defm V_CEIL_F64  : VOP1Inst<"v_ceil_f64", VOP_F64_F64, fceil>;
+    defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, frint>;
+    defm V_FLOOR_F64 : VOP1Inst<"v_floor_f64", VOP_F64_F64, ffloor>;
+  } // End SchedRW = [WriteDoubleAdd]
+} // End SubtargetPredicate = isGFX7Plus
 
 let SubtargetPredicate = Has16BitInsts in {
 
@@ -393,125 +398,279 @@ def VOP_SWAP_I32 : VOPProfile<[i32, i32, i32, untyped]> {
   let Ins64 = (ins);
 }
 
-let SubtargetPredicate = isGFX9 in {
-  let Constraints = "$vdst = $src1, $vdst1 = $src0",
-      DisableEncoding="$vdst1,$src1",
-      SchedRW = [Write64Bit, Write64Bit] in {
-// Never VOP3. Takes as long as 2 v_mov_b32s
-def V_SWAP_B32 : VOP1_Pseudo <"v_swap_b32", VOP_SWAP_I32, [], 1>;
+let SubtargetPredicate = isGFX9Plus in {
+  def V_SWAP_B32 : VOP1_Pseudo<"v_swap_b32", VOP_SWAP_I32, [], 1> {
+    let Constraints = "$vdst = $src1, $vdst1 = $src0";
+    let DisableEncoding = "$vdst1,$src1";
+    let SchedRW = [Write64Bit, Write64Bit];
+  }
+
+  defm V_SAT_PK_U8_I16    : VOP1Inst<"v_sat_pk_u8_i16", VOP_I32_I32>;
+  defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16>;
+  defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16>;
+} // End SubtargetPredicate = isGFX9Plus
+
+let SubtargetPredicate = isGFX9Only in {
+  defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>;
+} // End SubtargetPredicate = isGFX9Only
+
+let SubtargetPredicate = isGFX10Plus in {
+  defm V_PIPEFLUSH        : VOP1Inst<"v_pipeflush", VOP_NONE>;
+
+  let Uses = [M0] in {
+    // FIXME-GFX10: Should V_MOVRELSD_2_B32 be VOP_NO_EXT?
+    defm V_MOVRELSD_2_B32 :
+      VOP1Inst<"v_movrelsd_2_b32", VOP_NO_EXT<VOP_I32_I32>>;
+
+    def V_SWAPREL_B32 : VOP1_Pseudo<"v_swaprel_b32", VOP_SWAP_I32, [], 1> {
+      let Constraints = "$vdst = $src1, $vdst1 = $src0";
+      let DisableEncoding = "$vdst1,$src1";
+      let SchedRW = [Write64Bit, Write64Bit];
+    }
+  } // End Uses = [M0]
+} // End SubtargetPredicate = isGFX10Plus
+
+//===----------------------------------------------------------------------===//
+// Target-specific instruction encodings.
+//===----------------------------------------------------------------------===//
+
+class VOP1_DPP<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> :
+    VOP_DPP<ps.OpName, p, isDPP16> {
+  let hasSideEffects = ps.hasSideEffects;
+  let Defs = ps.Defs;
+  let SchedRW = ps.SchedRW;
+  let Uses = ps.Uses;
+
+  bits<8> vdst;
+  let Inst{8-0}   = 0xfa;
+  let Inst{16-9}  = op;
+  let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0);
+  let Inst{31-25} = 0x3f;
 }
 
-defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>;
+class VOP1_DPP16<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> :
+    VOP1_DPP<op, ps, p, 1> {
+  let AssemblerPredicate = !if(p.HasExt, HasDPP16, DisableInst);
+  let SubtargetPredicate = HasDPP16;
+}
 
-defm V_SAT_PK_U8_I16    : VOP1Inst<"v_sat_pk_u8_i16", VOP_I32_I32>;
-defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16>;
-defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16>;
+class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> :
+    VOP_DPP8<ps.OpName, p> {
+  let hasSideEffects = ps.hasSideEffects;
+  let Defs = ps.Defs;
+  let SchedRW = ps.SchedRW;
+  let Uses = ps.Uses;
 
-} // End SubtargetPredicate = isGFX9
+  bits<8> vdst;
+  let Inst{8-0}   = fi;
+  let Inst{16-9}  = op;
+  let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0);
+  let Inst{31-25} = 0x3f;
+
+  let AssemblerPredicate = !if(p.HasExt, HasDPP8, DisableInst);
+  let SubtargetPredicate = HasDPP8;
+}
 
 //===----------------------------------------------------------------------===//
-// Target
+// GFX10.
 //===----------------------------------------------------------------------===//
 
+let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+  multiclass VOP1Only_Real_gfx10<bits<9> op> {
+    def _gfx10 :
+      VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX10>,
+      VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>;
+  }
+  multiclass VOP1_Real_e32_gfx10<bits<9> op> {
+    def _e32_gfx10 :
+      VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>,
+      VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
+  }
+  multiclass VOP1_Real_e64_gfx10<bits<9> op> {
+    def _e64_gfx10 :
+      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
+      VOP3e_gfx10<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+  }
+  multiclass VOP1_Real_sdwa_gfx10<bits<9> op> {
+    def _sdwa_gfx10 :
+      VOP_SDWA10_Real<!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
+      VOP1_SDWA9Ae<op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
+      let DecoderNamespace = "SDWA10";
+    }
+  }
+  multiclass VOP1_Real_dpp_gfx10<bits<9> op> {
+    def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> {
+      let DecoderNamespace = "SDWA10";
+    }
+  }
+  multiclass VOP1_Real_dpp8_gfx10<bits<9> op> {
+    def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> {
+      let DecoderNamespace = "DPP8";
+    }
+  }
+} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+
+multiclass VOP1_Real_gfx10_no_dpp<bits<9> op> :
+  VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>,
+  VOP1_Real_sdwa_gfx10<op>;
+
+multiclass VOP1_Real_gfx10_no_dpp8<bits<9> op> :
+  VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>,
+  VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>;
+
+multiclass VOP1_Real_gfx10<bits<9> op> :
+  VOP1_Real_gfx10_no_dpp8<op>, VOP1_Real_dpp8_gfx10<op>;
+
+defm V_PIPEFLUSH         : VOP1_Real_gfx10<0x01b>;
+defm V_MOVRELSD_2_B32    : VOP1_Real_gfx10<0x048>;
+defm V_CVT_F16_U16       : VOP1_Real_gfx10<0x050>;
+defm V_CVT_F16_I16       : VOP1_Real_gfx10<0x051>;
+defm V_CVT_U16_F16       : VOP1_Real_gfx10<0x052>;
+defm V_CVT_I16_F16       : VOP1_Real_gfx10<0x053>;
+defm V_RCP_F16           : VOP1_Real_gfx10<0x054>;
+defm V_SQRT_F16          : VOP1_Real_gfx10<0x055>;
+defm V_RSQ_F16           : VOP1_Real_gfx10<0x056>;
+defm V_LOG_F16           : VOP1_Real_gfx10<0x057>;
+defm V_EXP_F16           : VOP1_Real_gfx10<0x058>;
+defm V_FREXP_MANT_F16    : VOP1_Real_gfx10<0x059>;
+defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10<0x05a>;
+defm V_FLOOR_F16         : VOP1_Real_gfx10<0x05b>;
+defm V_CEIL_F16          : VOP1_Real_gfx10<0x05c>;
+defm V_TRUNC_F16         : VOP1_Real_gfx10<0x05d>;
+defm V_RNDNE_F16         : VOP1_Real_gfx10<0x05e>;
+defm V_FRACT_F16         : VOP1_Real_gfx10<0x05f>;
+defm V_SIN_F16           : VOP1_Real_gfx10<0x060>;
+defm V_COS_F16           : VOP1_Real_gfx10<0x061>;
+defm V_SAT_PK_U8_I16     : VOP1_Real_gfx10<0x062>;
+defm V_CVT_NORM_I16_F16  : VOP1_Real_gfx10<0x063>;
+defm V_CVT_NORM_U16_F16  : VOP1_Real_gfx10<0x064>;
+
+defm V_SWAP_B32    : VOP1Only_Real_gfx10<0x065>;
+defm V_SWAPREL_B32 : VOP1Only_Real_gfx10<0x068>;
+
 //===----------------------------------------------------------------------===//
-// SI
+// GFX7, GFX10.
 //===----------------------------------------------------------------------===//
 
-multiclass VOP1_Real_si <bits<9> op> {
-  let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
-    def _e32_si :
+let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
+  multiclass VOP1_Real_e32_gfx7<bits<9> op> {
+    def _e32_gfx7 :
       VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
       VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
-    def _e64_si :
+  }
+  multiclass VOP1_Real_e64_gfx7<bits<9> op> {
+    def _e64_gfx7 :
       VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
-      VOP3e_si <{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+      VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
   }
-}
+} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
 
-defm V_NOP               : VOP1_Real_si <0x0>;
-defm V_MOV_B32           : VOP1_Real_si <0x1>;
-defm V_CVT_I32_F64       : VOP1_Real_si <0x3>;
-defm V_CVT_F64_I32       : VOP1_Real_si <0x4>;
-defm V_CVT_F32_I32       : VOP1_Real_si <0x5>;
-defm V_CVT_F32_U32       : VOP1_Real_si <0x6>;
-defm V_CVT_U32_F32       : VOP1_Real_si <0x7>;
-defm V_CVT_I32_F32       : VOP1_Real_si <0x8>;
-defm V_MOV_FED_B32       : VOP1_Real_si <0x9>;
-defm V_CVT_F16_F32       : VOP1_Real_si <0xa>;
-defm V_CVT_F32_F16       : VOP1_Real_si <0xb>;
-defm V_CVT_RPI_I32_F32   : VOP1_Real_si <0xc>;
-defm V_CVT_FLR_I32_F32   : VOP1_Real_si <0xd>;
-defm V_CVT_OFF_F32_I4    : VOP1_Real_si <0xe>;
-defm V_CVT_F32_F64       : VOP1_Real_si <0xf>;
-defm V_CVT_F64_F32       : VOP1_Real_si <0x10>;
-defm V_CVT_F32_UBYTE0    : VOP1_Real_si <0x11>;
-defm V_CVT_F32_UBYTE1    : VOP1_Real_si <0x12>;
-defm V_CVT_F32_UBYTE2    : VOP1_Real_si <0x13>;
-defm V_CVT_F32_UBYTE3    : VOP1_Real_si <0x14>;
-defm V_CVT_U32_F64       : VOP1_Real_si <0x15>;
-defm V_CVT_F64_U32       : VOP1_Real_si <0x16>;
-defm V_FRACT_F32         : VOP1_Real_si <0x20>;
-defm V_TRUNC_F32         : VOP1_Real_si <0x21>;
-defm V_CEIL_F32          : VOP1_Real_si <0x22>;
-defm V_RNDNE_F32         : VOP1_Real_si <0x23>;
-defm V_FLOOR_F32         : VOP1_Real_si <0x24>;
-defm V_EXP_F32           : VOP1_Real_si <0x25>;
-defm V_LOG_CLAMP_F32     : VOP1_Real_si <0x26>;
-defm V_LOG_F32           : VOP1_Real_si <0x27>;
-defm V_RCP_CLAMP_F32     : VOP1_Real_si <0x28>;
-defm V_RCP_LEGACY_F32    : VOP1_Real_si <0x29>;
-defm V_RCP_F32           : VOP1_Real_si <0x2a>;
-defm V_RCP_IFLAG_F32     : VOP1_Real_si <0x2b>;
-defm V_RSQ_CLAMP_F32     : VOP1_Real_si <0x2c>;
-defm V_RSQ_LEGACY_F32    : VOP1_Real_si <0x2d>;
-defm V_RSQ_F32           : VOP1_Real_si <0x2e>;
-defm V_RCP_F64           : VOP1_Real_si <0x2f>;
-defm V_RCP_CLAMP_F64     : VOP1_Real_si <0x30>;
-defm V_RSQ_F64           : VOP1_Real_si <0x31>;
-defm V_RSQ_CLAMP_F64     : VOP1_Real_si <0x32>;
-defm V_SQRT_F32          : VOP1_Real_si <0x33>;
-defm V_SQRT_F64          : VOP1_Real_si <0x34>;
-defm V_SIN_F32           : VOP1_Real_si <0x35>;
-defm V_COS_F32           : VOP1_Real_si <0x36>;
-defm V_NOT_B32           : VOP1_Real_si <0x37>;
-defm V_BFREV_B32         : VOP1_Real_si <0x38>;
-defm V_FFBH_U32          : VOP1_Real_si <0x39>;
-defm V_FFBL_B32          : VOP1_Real_si <0x3a>;
-defm V_FFBH_I32          : VOP1_Real_si <0x3b>;
-defm V_FREXP_EXP_I32_F64 : VOP1_Real_si <0x3c>;
-defm V_FREXP_MANT_F64    : VOP1_Real_si <0x3d>;
-defm V_FRACT_F64         : VOP1_Real_si <0x3e>;
-defm V_FREXP_EXP_I32_F32 : VOP1_Real_si <0x3f>;
-defm V_FREXP_MANT_F32    : VOP1_Real_si <0x40>;
-defm V_CLREXCP           : VOP1_Real_si <0x41>;
-defm V_MOVRELD_B32       : VOP1_Real_si <0x42>;
-defm V_MOVRELS_B32       : VOP1_Real_si <0x43>;
-defm V_MOVRELSD_B32      : VOP1_Real_si <0x44>;
+multiclass VOP1_Real_gfx7<bits<9> op> :
+  VOP1_Real_e32_gfx7<op>, VOP1_Real_e64_gfx7<op>;
+
+multiclass VOP1_Real_gfx7_gfx10<bits<9> op> :
+  VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>;
+
+defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>;
+defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>;
+
+defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10<0x017>;
+defm V_CEIL_F64  : VOP1_Real_gfx7_gfx10<0x018>;
+defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10<0x019>;
+defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10<0x01a>;
 
 //===----------------------------------------------------------------------===//
-// CI
+// GFX6, GFX7, GFX10.
 //===----------------------------------------------------------------------===//
 
-multiclass VOP1_Real_ci <bits<9> op> {
-  let AssemblerPredicates = [isCIOnly], DecoderNamespace = "CI" in {
-    def _e32_ci :
+let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
+  multiclass VOP1_Real_e32_gfx6_gfx7<bits<9> op> {
+    def _e32_gfx6_gfx7 :
       VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
       VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
-    def _e64_ci :
+  }
+  multiclass VOP1_Real_e64_gfx6_gfx7<bits<9> op> {
+    def _e64_gfx6_gfx7 :
       VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
-      VOP3e_si <{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+      VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
   }
-}
-
-defm V_TRUNC_F64         : VOP1_Real_ci <0x17>;
-defm V_CEIL_F64          : VOP1_Real_ci <0x18>;
-defm V_FLOOR_F64         : VOP1_Real_ci <0x1A>;
-defm V_RNDNE_F64         : VOP1_Real_ci <0x19>;
-defm V_LOG_LEGACY_F32    : VOP1_Real_ci <0x45>;
-defm V_EXP_LEGACY_F32    : VOP1_Real_ci <0x46>;
+} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
+
+multiclass VOP1_Real_gfx6_gfx7<bits<9> op> :
+  VOP1_Real_e32_gfx6_gfx7<op>, VOP1_Real_e64_gfx6_gfx7<op>;
+
+multiclass VOP1_Real_gfx6_gfx7_gfx10<bits<9> op> :
+  VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10<op>;
+
+multiclass VOP1_Real_gfx6_gfx7_gfx10_no_dpp8<bits<9> op> :
+  VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10_no_dpp8<op>;
+
+multiclass VOP1_Real_gfx6_gfx7_gfx10_no_dpp<bits<9> op> :
+  VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10_no_dpp<op>;
+
+defm V_LOG_CLAMP_F32  : VOP1_Real_gfx6_gfx7<0x026>;
+defm V_RCP_CLAMP_F32  : VOP1_Real_gfx6_gfx7<0x028>;
+defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>;
+defm V_RSQ_CLAMP_F32  : VOP1_Real_gfx6_gfx7<0x02c>;
+defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>;
+defm V_RCP_CLAMP_F64  : VOP1_Real_gfx6_gfx7<0x030>;
+defm V_RSQ_CLAMP_F64  : VOP1_Real_gfx6_gfx7<0x032>;
+
+defm V_NOP               : VOP1_Real_gfx6_gfx7_gfx10<0x000>;
+defm V_MOV_B32           : VOP1_Real_gfx6_gfx7_gfx10<0x001>;
+defm V_CVT_I32_F64       : VOP1_Real_gfx6_gfx7_gfx10<0x003>;
+defm V_CVT_F64_I32       : VOP1_Real_gfx6_gfx7_gfx10<0x004>;
+defm V_CVT_F32_I32       : VOP1_Real_gfx6_gfx7_gfx10<0x005>;
+defm V_CVT_F32_U32       : VOP1_Real_gfx6_gfx7_gfx10<0x006>;
+defm V_CVT_U32_F32       : VOP1_Real_gfx6_gfx7_gfx10<0x007>;
+defm V_CVT_I32_F32       : VOP1_Real_gfx6_gfx7_gfx10<0x008>;
+defm V_MOV_FED_B32       : VOP1_Real_gfx6_gfx7_gfx10<0x009>;
+defm V_CVT_F16_F32       : VOP1_Real_gfx6_gfx7_gfx10<0x00a>;
+defm V_CVT_F32_F16       : VOP1_Real_gfx6_gfx7_gfx10<0x00b>;
+defm V_CVT_RPI_I32_F32   : VOP1_Real_gfx6_gfx7_gfx10<0x00c>;
+defm V_CVT_FLR_I32_F32   : VOP1_Real_gfx6_gfx7_gfx10<0x00d>;
+defm V_CVT_OFF_F32_I4    : VOP1_Real_gfx6_gfx7_gfx10<0x00e>;
+defm V_CVT_F32_F64       : VOP1_Real_gfx6_gfx7_gfx10<0x00f>;
+defm V_CVT_F64_F32       : VOP1_Real_gfx6_gfx7_gfx10<0x010>;
+defm V_CVT_F32_UBYTE0    : VOP1_Real_gfx6_gfx7_gfx10<0x011>;
+defm V_CVT_F32_UBYTE1    : VOP1_Real_gfx6_gfx7_gfx10<0x012>;
+defm V_CVT_F32_UBYTE2    : VOP1_Real_gfx6_gfx7_gfx10<0x013>;
+defm V_CVT_F32_UBYTE3    : VOP1_Real_gfx6_gfx7_gfx10<0x014>;
+defm V_CVT_U32_F64       : VOP1_Real_gfx6_gfx7_gfx10<0x015>;
+defm V_CVT_F64_U32       : VOP1_Real_gfx6_gfx7_gfx10<0x016>;
+defm V_FRACT_F32         : VOP1_Real_gfx6_gfx7_gfx10<0x020>;
+defm V_TRUNC_F32         : VOP1_Real_gfx6_gfx7_gfx10<0x021>;
+defm V_CEIL_F32          : VOP1_Real_gfx6_gfx7_gfx10<0x022>;
+defm V_RNDNE_F32         : VOP1_Real_gfx6_gfx7_gfx10<0x023>;
+defm V_FLOOR_F32         : VOP1_Real_gfx6_gfx7_gfx10<0x024>;
+defm V_EXP_F32           : VOP1_Real_gfx6_gfx7_gfx10<0x025>;
+defm V_LOG_F32           : VOP1_Real_gfx6_gfx7_gfx10<0x027>;
+defm V_RCP_F32           : VOP1_Real_gfx6_gfx7_gfx10<0x02a>;
+defm V_RCP_IFLAG_F32     : VOP1_Real_gfx6_gfx7_gfx10<0x02b>;
+defm V_RSQ_F32           : VOP1_Real_gfx6_gfx7_gfx10<0x02e>;
+defm V_RCP_F64           : VOP1_Real_gfx6_gfx7_gfx10<0x02f>;
+defm V_RSQ_F64           : VOP1_Real_gfx6_gfx7_gfx10<0x031>;
+defm V_SQRT_F32          : VOP1_Real_gfx6_gfx7_gfx10<0x033>;
+defm V_SQRT_F64          : VOP1_Real_gfx6_gfx7_gfx10<0x034>;
+defm V_SIN_F32           : VOP1_Real_gfx6_gfx7_gfx10<0x035>;
+defm V_COS_F32           : VOP1_Real_gfx6_gfx7_gfx10<0x036>;
+defm V_NOT_B32           : VOP1_Real_gfx6_gfx7_gfx10<0x037>;
+defm V_BFREV_B32         : VOP1_Real_gfx6_gfx7_gfx10<0x038>;
+defm V_FFBH_U32          : VOP1_Real_gfx6_gfx7_gfx10<0x039>;
+defm V_FFBL_B32          : VOP1_Real_gfx6_gfx7_gfx10<0x03a>;
+defm V_FFBH_I32          : VOP1_Real_gfx6_gfx7_gfx10<0x03b>;
+defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03c>;
+defm V_FREXP_MANT_F64    : VOP1_Real_gfx6_gfx7_gfx10<0x03d>;
+defm V_FRACT_F64         : VOP1_Real_gfx6_gfx7_gfx10<0x03e>;
+defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x03f>;
+defm V_FREXP_MANT_F32    : VOP1_Real_gfx6_gfx7_gfx10<0x040>;
+defm V_CLREXCP           : VOP1_Real_gfx6_gfx7_gfx10<0x041>;
+defm V_MOVRELD_B32       : VOP1_Real_gfx6_gfx7_gfx10_no_dpp<0x042>;
+defm V_MOVRELS_B32       : VOP1_Real_gfx6_gfx7_gfx10_no_dpp8<0x043>;
+defm V_MOVRELSD_B32      : VOP1_Real_gfx6_gfx7_gfx10_no_dpp8<0x044>;
 
 //===----------------------------------------------------------------------===//
-// VI
+// GFX8, GFX9 (VI).
 //===----------------------------------------------------------------------===//
 
 class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
@@ -524,7 +683,7 @@ class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
 }
 
 multiclass VOP1Only_Real_vi <bits<10> op> {
-  let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
+  let AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" in {
     def _vi :
       VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>,
       VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>;
@@ -532,7 +691,7 @@ multiclass VOP1Only_Real_vi <bits<10> op> {
 }
 
 multiclass VOP1_Real_e32e64_vi <bits<10> op> {
-  let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
+  let AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" in {
     def _e32_vi :
       VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>,
       VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
@@ -649,7 +808,7 @@ def V_MOV_B32_indirect : VPseudoInstSI<(outs),
   PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst,
                                         getVOPSrc0ForVT<i32>.ret:$src0)> {
   let VOP1 = 1;
-  let SubtargetPredicate = isVI;
+  let SubtargetPredicate = isGFX8GFX9;
 }
 
 // This is a pseudo variant of the v_movreld_b32 instruction in which the
@@ -672,7 +831,7 @@ def V_MOVRELD_B32_V4 : V_MOVRELD_B32_pseudo<VReg_128>;
 def V_MOVRELD_B32_V8 : V_MOVRELD_B32_pseudo<VReg_256>;
 def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo<VReg_512>;
 
-let OtherPredicates = [isVI] in {
+let OtherPredicates = [isGFX8GFX9] in {
 
 def : GCNPat <
   (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
@@ -690,6 +849,9 @@ def : GCNPat <
                        (as_i1imm $bound_ctrl))
 >;
 
+} // End OtherPredicates = [isGFX8GFX9]
+
+let OtherPredicates = [isGFX8Plus] in {
 def : GCNPat<
   (i32 (anyext i16:$src)),
   (COPY $src)
@@ -712,14 +874,14 @@ def : GCNPat <
   (EXTRACT_SUBREG $src, sub0)
 >;
 
-} // End OtherPredicates = [isVI]
+} // End OtherPredicates = [isGFX8Plus]
 
 //===----------------------------------------------------------------------===//
 // GFX9
 //===----------------------------------------------------------------------===//
 
 multiclass VOP1_Real_gfx9 <bits<10> op> {
-  let AssemblerPredicates = [isGFX9], DecoderNamespace = "GFX9" in {
+  let AssemblerPredicates = [isGFX9Only], DecoderNamespace = "GFX9" in {
     defm NAME : VOP1_Real_e32e64_vi <op>;
   }
 
@@ -735,3 +897,30 @@ multiclass VOP1_Real_gfx9 <bits<10> op> {
 }
 
 defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
+
+//===----------------------------------------------------------------------===//
+// GFX10
+//===----------------------------------------------------------------------===//
+
+let OtherPredicates = [isGFX10Plus] in {
+def : GCNPat <
+  (i32 (int_amdgcn_mov_dpp8 i32:$src, imm:$dpp8)),
+  (V_MOV_B32_dpp8_gfx10 $src, $src, (as_i32imm $dpp8), (i32 DPP8Mode.FI_0))
+>;
+
+def : GCNPat <
+  (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
+                      imm:$bound_ctrl)),
+  (V_MOV_B32_dpp_gfx10 $src, $src, (as_i32imm $dpp_ctrl),
+                       (as_i32imm $row_mask), (as_i32imm $bank_mask),
+                       (as_i1imm $bound_ctrl), (i32 0))
+>;
+
+def : GCNPat <
+  (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask,
+                              imm:$bank_mask, imm:$bound_ctrl)),
+  (V_MOV_B32_dpp_gfx10 $old, $src, (as_i32imm $dpp_ctrl),
+                       (as_i32imm $row_mask), (as_i32imm $bank_mask),
+                       (as_i1imm $bound_ctrl), (i32 0))
+>;
+} // End OtherPredicates = [isGFX10Plus]
diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td
index e3fd7b5f9fad..1b30cd2ed516 100644
--- a/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1,9 +1,8 @@
 //===-- VOP2Instructions.td - Vector Instruction Defintions ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -69,7 +68,6 @@ class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suf
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let SubtargetPredicate = isGCN;
 
   let VOP2 = 1;
   let VALU = 1;
@@ -177,7 +175,9 @@ multiclass VOP2bInst <string opName,
     let SchedRW = [Write32Bit, WriteSALU] in {
       let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
         def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>,
-                   Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
+                   Commutable_REV<revOp#"_e32", !eq(revOp, opName)> {
+          let usesCustomInserter = !eq(P.NumSrcArgs, 2);
+        }
 
         def _sdwa  : VOP2_SDWA_Pseudo <opName, P> {
           let AsmMatchConverter = "cvtSdwaVOP2b";
@@ -192,6 +192,23 @@ multiclass VOP2bInst <string opName,
   }
 }
 
+class VOP2bInstAlias <VOP2_Pseudo ps, Instruction inst,
+                      string OpName, string opnd> :
+  InstAlias <OpName#" "#!subst("vcc", opnd, ps.Pfl.Asm32),
+             (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0,
+                   ps.Pfl.Src1RC32:$src1)>,
+  PredicateControl {
+}
+
+multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> {
+  let WaveSizePredicate = isWave32 in {
+    def : VOP2bInstAlias<ps, inst, OpName, "vcc_lo">;
+  }
+  let WaveSizePredicate = isWave64 in {
+    def : VOP2bInstAlias<ps, inst, OpName, "vcc">;
+  }
+}
+
 multiclass VOP2eInst <string opName,
                       VOPProfile P,
                       SDPatternOperator node = null_frag,
@@ -216,6 +233,22 @@ multiclass VOP2eInst <string opName,
   }
 }
 
+class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd> :
+  InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd,
+             (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0,
+                   ps.Pfl.Src1RC32:$src1)>,
+  PredicateControl {
+}
+
+multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
+  let WaveSizePredicate = isWave32 in {
+    def : VOP2eInstAlias<ps, inst, "vcc_lo">;
+  }
+  let WaveSizePredicate = isWave64 in {
+    def : VOP2eInstAlias<ps, inst, "vcc">;
+  }
+}
+
 class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
   field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
   field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm);
@@ -244,15 +277,22 @@ def VOP_MADMK_F32 : VOP_MADMK <f32>;
 
 // FIXME: Remove src2_modifiers. It isn't used, so is wasting memory
 // and processing time but it makes it easier to convert to mad.
-class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
+class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, vt0]> {
   let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
   let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
-                       0, HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
+                       0, HasModifiers, HasModifiers, HasOMod,
+                       Src0Mod, Src1Mod, Src2Mod>.ret;
   let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
                     Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
                     VGPR_32:$src2, // stub argument
                     dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                     bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+  let InsDPP16 = !con(InsDPP, (ins FI:$fi));
+
+  let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
+                     Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
+                     VGPR_32:$src2, // stub argument
+                     dpp8:$dpp8, FI:$fi);
 
   let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
                      Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
@@ -260,11 +300,13 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
                      clampmod:$clamp, omod:$omod,
                      dst_sel:$dst_sel, dst_unused:$dst_unused,
                      src0_sel:$src0_sel, src1_sel:$src1_sel);
-  let Asm32 = getAsm32<1, 2, vt>.ret;
-  let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, vt>.ret;
-  let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret;
-  let AsmSDWA = getAsmSDWA<1, 2, vt>.ret;
-  let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt>.ret;
+  let Asm32 = getAsm32<1, 2, vt0>.ret;
+  let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, vt0>.ret;
+  let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt0>.ret;
+  let AsmDPP16 = getAsmDPP16<1, 2, HasModifiers, vt0>.ret;
+  let AsmDPP8 = getAsmDPP8<1, 2, 0, vt0>.ret;
+  let AsmSDWA = getAsmSDWA<1, 2, vt0>.ret;
+  let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt0>.ret;
   let HasSrc2 = 0;
   let HasSrc2Mods = 0;
 
@@ -272,38 +314,51 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
   let HasExtDPP = 1;
   let HasExtSDWA = 1;
   let HasExtSDWA9 = 0;
+  let TieRegDPP = "$src2";
 }
 
 def VOP_MAC_F16 : VOP_MAC <f16>;
 def VOP_MAC_F32 : VOP_MAC <f32>;
 
+class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> {
+  let HasClamp = 0;
+  let HasExtSDWA = 0;
+  let HasModifiers = 1;
+  let HasOpSel = 0;
+  let IsPacked = 0;
+}
+
+def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> {
+  let Src0ModDPP = FPVRegInputMods;
+  let Src1ModDPP = FPVRegInputMods;
+}
+def VOP_DOT_ACC_I32_I32   : VOP_DOT_ACC<i32, i32>;
+
 // Write out to vcc or arbitrary SGPR.
-def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
+def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp=*/1> {
   let Asm32 = "$vdst, vcc, $src0, $src1";
-  let Asm64 = "$vdst, $sdst, $src0, $src1";
+  let Asm64 = "$vdst, $sdst, $src0, $src1$clamp";
   let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
   let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
   let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
+  let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi";
+  let AsmDPP16 = AsmDPP#"$fi";
   let Outs32 = (outs DstRC:$vdst);
-  let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+  let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
 }
 
 // Write out to vcc or arbitrary SGPR and read in from vcc or
 // arbitrary SGPR.
-def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
-  // We use VCSrc_b32 to exclude literal constants, even though the
-  // encoding normally allows them since the implicit VCC use means
-  // using one would always violate the constant bus
-  // restriction. SGPRs are still allowed because it should
-  // technically be possible to use VCC again as src0.
-  let Src0RC32 = VCSrc_b32;
+def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*/1> {
   let Asm32 = "$vdst, vcc, $src0, $src1, vcc";
-  let Asm64 = "$vdst, $sdst, $src0, $src1, $src2";
+  let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp";
   let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
   let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
   let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
+  let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi";
+  let AsmDPP16 = AsmDPP#"$fi";
   let Outs32 = (outs DstRC:$vdst);
-  let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+  let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
 
   // Suppress src2 implied by type since the 32-bit encoding uses an
   // implicit VCC use.
@@ -320,20 +375,23 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
                     Src1DPP:$src1,
                     dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                     bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+  let InsDPP16 = !con(InsDPP, (ins FI:$fi));
+
   let HasExt = 1;
   let HasExtDPP = 1;
   let HasExtSDWA = 1;
   let HasExtSDWA9 = 1;
 }
 
-// Read in from vcc or arbitrary SGPR
-def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
-  let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above.
-  let Asm32 = "$vdst, $src0, $src1, vcc";
-  let Asm64 = "$vdst, $src0, $src1, $src2";
+// Read in from vcc or arbitrary SGPR.
+def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableF32SrcMods=*/1> {
+  let Asm32 = "$vdst, $src0, $src1";
+  let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2";
   let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
   let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
   let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
+  let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi";
+  let AsmDPP16 = AsmDPP#"$fi";
 
   let Outs32 = (outs DstRC:$vdst);
   let Outs64 = (outs DstRC:$vdst);
@@ -349,10 +407,12 @@ def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
                      src0_sel:$src0_sel, src1_sel:$src1_sel);
 
   let InsDPP = (ins DstRCDPP:$old,
-                    Src0DPP:$src0,
-                    Src1DPP:$src1,
+                    Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
+                    Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
                     dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                     bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+  let InsDPP16 = !con(InsDPP, (ins FI:$fi));
+
   let HasExt = 1;
   let HasExtDPP = 1;
   let HasExtSDWA = 1;
@@ -362,7 +422,7 @@ def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
 def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
   let Outs32 = (outs SReg_32:$vdst);
   let Outs64 = Outs32;
-  let Ins32 = (ins VGPR_32:$src0, SCSrc_b32:$src1);
+  let Ins32 = (ins VRegOrLds_32:$src0, SCSrc_b32:$src1);
   let Ins64 = Ins32;
   let Asm32 = " $vdst, $src0, $src1";
   let Asm64 = Asm32;
@@ -393,8 +453,6 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
 // VOP2 Instructions
 //===----------------------------------------------------------------------===//
 
-let SubtargetPredicate = isGCN, Predicates = [isGCN] in {
-
 defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>;
 def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;
 
@@ -414,9 +472,9 @@ defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>;
 defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>;
 defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>;
 defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>;
-defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, null_frag, "v_lshr_b32">;
-defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32">;
-defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, null_frag, "v_lshl_b32">;
+defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, lshr_rev, "v_lshr_b32">;
+defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, ashr_rev, "v_ashr_i32">;
+defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, lshl_rev, "v_lshl_b32">;
 defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>;
 defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>;
 defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>;
@@ -442,9 +500,9 @@ defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_f
 
 
 let SubtargetPredicate = HasAddNoCarryInsts in {
-defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32, null_frag, "v_add_u32", 1>;
-defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32, null_frag, "v_sub_u32", 1>;
-defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32, null_frag, "v_sub_u32", 1>;
+defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_add_u32", 1>;
+defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>;
+defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>;
 }
 
 } // End isCommutable = 1
@@ -472,32 +530,20 @@ defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_V2F16
 defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_u16_u32>;
 defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_i16_i32>;
 
-} // End SubtargetPredicate = isGCN, Predicates = [isGCN]
-
-def : GCNPat<
-    (AMDGPUadde i32:$src0, i32:$src1, i1:$src2),
-    (V_ADDC_U32_e64 $src0, $src1, $src2)
->;
-
-def : GCNPat<
-    (AMDGPUsube i32:$src0, i32:$src1, i1:$src2),
-    (V_SUBB_U32_e64 $src0, $src1, $src2)
->;
-
-// These instructions only exist on SI and CI
-let SubtargetPredicate = isSICI, Predicates = [isSICI] in {
 
+let SubtargetPredicate = isGFX6GFX7 in {
 defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>;
 defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>;
+} // End SubtargetPredicate = isGFX6GFX7
 
+let SubtargetPredicate = isGFX6GFX7GFX10 in {
 let isCommutable = 1 in {
 defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>;
-defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, srl>;
-defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, sra>;
-defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, shl>;
+defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32>;
+defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32>;
+defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>;
 } // End isCommutable = 1
-
-} // End let SubtargetPredicate = SICI, Predicates = [isSICI]
+} // End SubtargetPredicate = isGFX6GFX7GFX10
 
 class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> :
   GCNPat<
@@ -508,29 +554,29 @@ class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> :
       )
   >;
 
-let AddedComplexity = 1 in {
-  def : DivergentBinOp<srl, V_LSHRREV_B32_e64>;
-  def : DivergentBinOp<sra, V_ASHRREV_I32_e64>;
-  def : DivergentBinOp<shl, V_LSHLREV_B32_e64>;
-}
+class DivergentClampingBinOp<SDPatternOperator Op, VOP_Pseudo Inst> :
+  GCNPat<
+      (getDivergentFrag<Op>.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1),
+      !if(!cast<Commutable_REV>(Inst).IsOrig,
+        (Inst $src0, $src1, 0),
+        (Inst $src1, $src0, 0)
+      )
+  >;
+
+def : DivergentBinOp<srl, V_LSHRREV_B32_e64>;
+def : DivergentBinOp<sra, V_ASHRREV_I32_e64>;
+def : DivergentBinOp<shl, V_LSHLREV_B32_e64>;
 
 let SubtargetPredicate = HasAddNoCarryInsts in {
-  def : DivergentBinOp<add, V_ADD_U32_e32>;
-  def : DivergentBinOp<sub, V_SUB_U32_e32>;
-  def : DivergentBinOp<sub, V_SUBREV_U32_e32>;
+  def : DivergentClampingBinOp<add, V_ADD_U32_e64>;
+  def : DivergentClampingBinOp<sub, V_SUB_U32_e64>;
 }
 
+let SubtargetPredicate = isGFX6GFX7GFX8GFX9, Predicates = [isGFX6GFX7GFX8GFX9] in {
+def : DivergentClampingBinOp<add, V_ADD_I32_e64>;
+def : DivergentClampingBinOp<sub, V_SUB_I32_e64>;
+}
 
-def : DivergentBinOp<add, V_ADD_I32_e32>;
-
-def : DivergentBinOp<add, V_ADD_I32_e64>;
-def : DivergentBinOp<sub, V_SUB_I32_e32>;
-
-def : DivergentBinOp<sub, V_SUBREV_I32_e32>;
-
-def : DivergentBinOp<srl, V_LSHRREV_B32_e32>;
-def : DivergentBinOp<sra, V_ASHRREV_I32_e32>;
-def : DivergentBinOp<shl, V_LSHLREV_B32_e32>;
 def : DivergentBinOp<adde, V_ADDC_U32_e32>;
 def : DivergentBinOp<sube, V_SUBB_U32_e32>;
 
@@ -604,56 +650,133 @@ defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>;
 
 } // End SubtargetPredicate = HasDLInsts
 
-// Note: 16-bit instructions produce a 0 result in the high 16-bits.
-multiclass Arithmetic_i16_Pats <SDPatternOperator op, Instruction inst> {
+let Constraints = "$vdst = $src2",
+      DisableEncoding="$src2",
+      isConvertibleToThreeAddress = 1,
+      isCommutable = 1 in {
+  let SubtargetPredicate = HasDot5Insts in
+    defm V_DOT2C_F32_F16 : VOP2Inst_e32<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>;
+  let SubtargetPredicate = HasDot6Insts in
+    defm V_DOT4C_I32_I8  : VOP2Inst_e32<"v_dot4c_i32_i8",  VOP_DOT_ACC_I32_I32>;
+
+  let SubtargetPredicate = HasDot4Insts in
+    defm V_DOT2C_I32_I16 : VOP2Inst_e32<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>;
+  let SubtargetPredicate = HasDot3Insts in
+    defm V_DOT8C_I32_I4  : VOP2Inst_e32<"v_dot8c_i32_i4",  VOP_DOT_ACC_I32_I32>;
+}
+
+let AddedComplexity = 30 in {
+  def : GCNPat<
+    (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))),
+    (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2))
+  > {
+    let SubtargetPredicate = HasDot5Insts;
+  }
+  def : GCNPat<
+    (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))),
+    (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2))
+  > {
+    let SubtargetPredicate = HasDot6Insts;
+  }
+  def : GCNPat<
+    (i32 (int_amdgcn_sdot2 v2i16:$src0, v2i16:$src1, i32:$src2, (i1 DSTCLAMP.NONE))),
+    (i32 (V_DOT2C_I32_I16_e32 $src0, $src1, $src2))
+  > {
+    let SubtargetPredicate = HasDot4Insts;
+  }
+  def : GCNPat<
+    (i32 (int_amdgcn_sdot8 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))),
+    (i32 (V_DOT8C_I32_I4_e32 $src0, $src1, $src2))
+  > {
+    let SubtargetPredicate = HasDot3Insts;
+  }
+} // End AddedComplexity = 30
+
+let SubtargetPredicate = isGFX10Plus in {
+
+def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">;
+let FPDPRounding = 1 in
+def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">;
+
+let isCommutable = 1 in {
+def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">;
+let FPDPRounding = 1 in
+def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">;
+} // End isCommutable = 1
+
+let Constraints = "$vdst = $src2",
+    DisableEncoding="$src2",
+    isConvertibleToThreeAddress = 1,
+    isCommutable = 1 in {
+defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>;
+}
+
+} // End SubtargetPredicate = isGFX10Plus
+
+let SubtargetPredicate = HasPkFmacF16Inst in {
+defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>;
+} // End SubtargetPredicate = HasPkFmacF16Inst
+
+// Note: 16-bit instructions produce a 0 result in the high 16-bits
+// on GFX8 and GFX9 and preserve high 16 bits on GFX10+
+def ClearHI16 : OutPatFrag<(ops node:$op),
+                           (V_AND_B32_e64 $op, (V_MOV_B32_e32 (i32 0xffff)))>;
+
+multiclass Arithmetic_i16_Pats <SDPatternOperator op, Instruction inst,
+                                bit PreservesHI16 = 0> {
 
 def : GCNPat<
   (op i16:$src0, i16:$src1),
-  (inst $src0, $src1)
+  !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src0, $src1)), (inst $src0, $src1))
 >;
 
 def : GCNPat<
   (i32 (zext (op i16:$src0, i16:$src1))),
-  (inst $src0, $src1)
+  !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src0, $src1)), (inst $src0, $src1))
 >;
 
 def : GCNPat<
   (i64 (zext (op i16:$src0, i16:$src1))),
    (REG_SEQUENCE VReg_64,
-     (inst $src0, $src1), sub0,
+     !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src0, $src1)), (inst $src0, $src1)),
+     sub0,
      (V_MOV_B32_e32 (i32 0)), sub1)
 >;
-
 }
 
-multiclass Bits_OpsRev_i16_Pats <SDPatternOperator op, Instruction inst> {
+multiclass Bits_OpsRev_i16_Pats <SDPatternOperator op, Instruction inst,
+                                 bit PreservesHI16 = 0> {
 
 def : GCNPat<
   (op i16:$src0, i16:$src1),
-  (inst $src1, $src0)
+  !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0))
 >;
 
 def : GCNPat<
   (i32 (zext (op i16:$src0, i16:$src1))),
-  (inst $src1, $src0)
+  !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0))
 >;
 
 
 def : GCNPat<
   (i64 (zext (op i16:$src0, i16:$src1))),
    (REG_SEQUENCE VReg_64,
-     (inst $src1, $src0), sub0,
+     !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0)),
+     sub0,
      (V_MOV_B32_e32 (i32 0)), sub1)
 >;
 }
 
 class ZExt_i16_i1_Pat <SDNode ext> : GCNPat <
   (i16 (ext i1:$src)),
-  (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src)
+  (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/),
+                     (i32 0/*src1mod*/), (i32 1/*src1*/),
+                     $src)
 >;
 
 let Predicates = [Has16BitInsts] in {
 
+let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in {
 defm : Arithmetic_i16_Pats<add, V_ADD_U16_e64>;
 defm : Arithmetic_i16_Pats<mul, V_MUL_LO_U16_e64>;
 defm : Arithmetic_i16_Pats<sub, V_SUB_U16_e64>;
@@ -661,6 +784,17 @@ defm : Arithmetic_i16_Pats<smin, V_MIN_I16_e64>;
 defm : Arithmetic_i16_Pats<smax, V_MAX_I16_e64>;
 defm : Arithmetic_i16_Pats<umin, V_MIN_U16_e64>;
 defm : Arithmetic_i16_Pats<umax, V_MAX_U16_e64>;
+}
+
+let Predicates = [Has16BitInsts, isGFX10Plus] in {
+defm : Arithmetic_i16_Pats<add, V_ADD_U16_e64,    1>;
+defm : Arithmetic_i16_Pats<mul, V_MUL_LO_U16_e64, 1>;
+defm : Arithmetic_i16_Pats<sub, V_SUB_U16_e64,    1>;
+defm : Arithmetic_i16_Pats<smin, V_MIN_I16_e64,   1>;
+defm : Arithmetic_i16_Pats<smax, V_MAX_I16_e64,   1>;
+defm : Arithmetic_i16_Pats<umin, V_MIN_U16_e64,   1>;
+defm : Arithmetic_i16_Pats<umax, V_MAX_U16_e64,   1>;
+}
 
 def : GCNPat <
   (and i16:$src0, i16:$src1),
@@ -677,16 +811,25 @@ def : GCNPat <
   (V_XOR_B32_e64 $src0, $src1)
 >;
 
+let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in {
 defm : Bits_OpsRev_i16_Pats<shl, V_LSHLREV_B16_e64>;
 defm : Bits_OpsRev_i16_Pats<srl, V_LSHRREV_B16_e64>;
 defm : Bits_OpsRev_i16_Pats<sra, V_ASHRREV_I16_e64>;
+}
+
+let Predicates = [Has16BitInsts, isGFX10Plus] in {
+defm : Bits_OpsRev_i16_Pats<shl, V_LSHLREV_B16_e64, 1>;
+defm : Bits_OpsRev_i16_Pats<srl, V_LSHRREV_B16_e64, 1>;
+defm : Bits_OpsRev_i16_Pats<sra, V_ASHRREV_I16_e64, 1>;
+}
 
 def : ZExt_i16_i1_Pat<zext>;
 def : ZExt_i16_i1_Pat<anyext>;
 
 def : GCNPat <
   (i16 (sext i1:$src)),
-  (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src)
+  (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+                     /*src1mod*/(i32 0), /*src1*/(i32 -1), $src)
 >;
 
 // Undo sub x, c -> add x, -c canonicalization since c is more likely
@@ -697,105 +840,334 @@ def : GCNPat<
   (V_SUB_U16_e64 $src0, NegSubInlineConst16:$src1)
 >;
 
-} // End Predicates = [Has16BitInsts]
+} // End Predicates = [Has16BitInsts, isGFX7GFX8GFX9]
+
 
 //===----------------------------------------------------------------------===//
-// SI
+// Target-specific instruction encodings.
 //===----------------------------------------------------------------------===//
 
-let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
+class VOP2_DPP<bits<6> op, VOP2_Pseudo ps,
+               string opName = ps.OpName, VOPProfile p = ps.Pfl,
+               bit IsDPP16 = 0> :
+    VOP_DPP<opName, p, IsDPP16> {
+  let hasSideEffects = ps.hasSideEffects;
+  let Defs = ps.Defs;
+  let SchedRW = ps.SchedRW;
+  let Uses = ps.Uses;
 
-multiclass VOP2_Real_si <bits<6> op> {
-  def _si :
-    VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
-    VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
+  bits<8> vdst;
+  bits<8> src1;
+  let Inst{8-0}   = 0xfa;
+  let Inst{16-9}  = !if(p.HasSrc1, src1{7-0}, 0);
+  let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0);
+  let Inst{30-25} = op;
+  let Inst{31}    = 0x0;
 }
 
-multiclass VOP2_Real_MADK_si <bits<6> op> {
-  def _si : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
-            VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
+class VOP2_DPP16<bits<6> op, VOP2_Pseudo ps,
+                 string opName = ps.OpName, VOPProfile p = ps.Pfl> :
+    VOP2_DPP<op, ps, opName, p, 1> {
+  let AssemblerPredicate = !if(p.HasExt, HasDPP16, DisableInst);
+  let SubtargetPredicate = HasDPP16;
 }
 
-multiclass VOP2_Real_e32_si <bits<6> op> {
-  def _e32_si :
-    VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
-    VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
+class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps,
+                string opName = ps.OpName, VOPProfile p = ps.Pfl> :
+    VOP_DPP8<ps.OpName, p> {
+  let hasSideEffects = ps.hasSideEffects;
+  let Defs = ps.Defs;
+  let SchedRW = ps.SchedRW;
+  let Uses = ps.Uses;
+
+  bits<8> vdst;
+  bits<8> src1;
+
+  let Inst{8-0}   = fi;
+  let Inst{16-9}  = !if(p.HasSrc1, src1{7-0}, 0);
+  let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0);
+  let Inst{30-25} = op;
+  let Inst{31}    = 0x0;
+
+  let AssemblerPredicate = !if(p.HasExt, HasDPP8, DisableInst);
+  let SubtargetPredicate = HasDPP8;
 }
 
-multiclass VOP2_Real_e32e64_si <bits<6> op> : VOP2_Real_e32_si<op> {
-  def _e64_si :
-    VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
-    VOP3e_si <{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
-}
-
-multiclass VOP2be_Real_e32e64_si <bits<6> op> : VOP2_Real_e32_si<op> {
-  def _e64_si :
-    VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
-    VOP3be_si <{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
-}
-
-} // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI"
-
-defm V_CNDMASK_B32        : VOP2_Real_e32e64_si <0x0>;
-defm V_ADD_F32            : VOP2_Real_e32e64_si <0x3>;
-defm V_SUB_F32            : VOP2_Real_e32e64_si <0x4>;
-defm V_SUBREV_F32         : VOP2_Real_e32e64_si <0x5>;
-defm V_MUL_LEGACY_F32     : VOP2_Real_e32e64_si <0x7>;
-defm V_MUL_F32            : VOP2_Real_e32e64_si <0x8>;
-defm V_MUL_I32_I24        : VOP2_Real_e32e64_si <0x9>;
-defm V_MUL_HI_I32_I24     : VOP2_Real_e32e64_si <0xa>;
-defm V_MUL_U32_U24        : VOP2_Real_e32e64_si <0xb>;
-defm V_MUL_HI_U32_U24     : VOP2_Real_e32e64_si <0xc>;
-defm V_MIN_F32            : VOP2_Real_e32e64_si <0xf>;
-defm V_MAX_F32            : VOP2_Real_e32e64_si <0x10>;
-defm V_MIN_I32            : VOP2_Real_e32e64_si <0x11>;
-defm V_MAX_I32            : VOP2_Real_e32e64_si <0x12>;
-defm V_MIN_U32            : VOP2_Real_e32e64_si <0x13>;
-defm V_MAX_U32            : VOP2_Real_e32e64_si <0x14>;
-defm V_LSHRREV_B32        : VOP2_Real_e32e64_si <0x16>;
-defm V_ASHRREV_I32        : VOP2_Real_e32e64_si <0x18>;
-defm V_LSHLREV_B32        : VOP2_Real_e32e64_si <0x1a>;
-defm V_AND_B32            : VOP2_Real_e32e64_si <0x1b>;
-defm V_OR_B32             : VOP2_Real_e32e64_si <0x1c>;
-defm V_XOR_B32            : VOP2_Real_e32e64_si <0x1d>;
-defm V_MAC_F32            : VOP2_Real_e32e64_si <0x1f>;
-defm V_MADMK_F32          : VOP2_Real_MADK_si <0x20>;
-defm V_MADAK_F32          : VOP2_Real_MADK_si <0x21>;
-defm V_ADD_I32            : VOP2be_Real_e32e64_si <0x25>;
-defm V_SUB_I32            : VOP2be_Real_e32e64_si <0x26>;
-defm V_SUBREV_I32         : VOP2be_Real_e32e64_si <0x27>;
-defm V_ADDC_U32           : VOP2be_Real_e32e64_si <0x28>;
-defm V_SUBB_U32           : VOP2be_Real_e32e64_si <0x29>;
-defm V_SUBBREV_U32        : VOP2be_Real_e32e64_si <0x2a>;
-
-defm V_READLANE_B32       : VOP2_Real_si <0x01>;
-
-let InOperandList = (ins SSrc_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in {
-defm V_WRITELANE_B32      : VOP2_Real_si <0x02>;
-}
-
-defm V_MAC_LEGACY_F32     : VOP2_Real_e32e64_si <0x6>;
-defm V_MIN_LEGACY_F32     : VOP2_Real_e32e64_si <0xd>;
-defm V_MAX_LEGACY_F32     : VOP2_Real_e32e64_si <0xe>;
-defm V_LSHR_B32           : VOP2_Real_e32e64_si <0x15>;
-defm V_ASHR_I32           : VOP2_Real_e32e64_si <0x17>;
-defm V_LSHL_B32           : VOP2_Real_e32e64_si <0x19>;
-
-defm V_BFM_B32            : VOP2_Real_e32e64_si <0x1e>;
-defm V_BCNT_U32_B32       : VOP2_Real_e32e64_si <0x22>;
-defm V_MBCNT_LO_U32_B32   : VOP2_Real_e32e64_si <0x23>;
-defm V_MBCNT_HI_U32_B32   : VOP2_Real_e32e64_si <0x24>;
-defm V_LDEXP_F32          : VOP2_Real_e32e64_si <0x2b>;
-defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e32e64_si <0x2c>;
-defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e32e64_si <0x2d>;
-defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e32e64_si <0x2e>;
-defm V_CVT_PKRTZ_F16_F32  : VOP2_Real_e32e64_si <0x2f>;
-defm V_CVT_PK_U16_U32     : VOP2_Real_e32e64_si <0x30>;
-defm V_CVT_PK_I16_I32     : VOP2_Real_e32e64_si <0x31>;
+//===----------------------------------------------------------------------===//
+// GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+  //===------------------------------- VOP2 -------------------------------===//
+  multiclass VOP2Only_Real_MADK_gfx10<bits<6> op> {
+    def _gfx10 :
+      VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX10>,
+      VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
+  }
+  multiclass VOP2Only_Real_MADK_gfx10_with_name<bits<6> op, string opName,
+                                                string asmName> {
+    def _gfx10 :
+        VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX10>,
+        VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> {
+      VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName);
+      let AsmString = asmName # ps.AsmOperands;
+    }
+  }
+  multiclass VOP2_Real_e32_gfx10<bits<6> op> {
+    def _e32_gfx10 :
+      VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>,
+      VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
+  }
+  multiclass VOP2_Real_e64_gfx10<bits<6> op> {
+    def _e64_gfx10 :
+      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
+      VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+  }
+  multiclass VOP2_Real_sdwa_gfx10<bits<6> op> {
+    def _sdwa_gfx10 :
+      VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
+      VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
+      let DecoderNamespace = "SDWA10";
+    }
+  }
+  multiclass VOP2_Real_dpp_gfx10<bits<6> op> {
+    def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
+      let DecoderNamespace = "SDWA10";
+    }
+  }
+  multiclass VOP2_Real_dpp8_gfx10<bits<6> op> {
+    def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
+      let DecoderNamespace = "DPP8";
+    }
+  }
+
+  //===------------------------- VOP2 (with name) -------------------------===//
+  multiclass VOP2_Real_e32_gfx10_with_name<bits<6> op, string opName,
+                                           string asmName> {
+    def _e32_gfx10 :
+      VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>,
+      VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> {
+        VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
+        let AsmString = asmName # ps.AsmOperands;
+      }
+  }
+  multiclass VOP2_Real_e64_gfx10_with_name<bits<6> op, string opName,
+                                           string asmName> {
+    def _e64_gfx10 :
+      VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>,
+      VOP3e_gfx10<{0, 1, 0, 0, op{5-0}},
+                  !cast<VOP3_Pseudo>(opName#"_e64").Pfl> {
+        VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64");
+        let AsmString = asmName # ps.AsmOperands;
+      }
+  }
+  let DecoderNamespace = "SDWA10" in {
+    multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName,
+                                              string asmName> {
+      def _sdwa_gfx10 :
+        VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
+        VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
+          VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
+          let AsmString = asmName # ps.AsmOperands;
+        }
+    }
+    multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName,
+                                             string asmName> {
+      def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
+        VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
+        let AsmString = asmName # ps.Pfl.AsmDPP16;
+      }
+    }
+    multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName,
+                                              string asmName> {
+      def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
+        VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
+        let AsmString = asmName # ps.Pfl.AsmDPP8;
+        let DecoderNamespace = "DPP8";
+      }
+    }
+  } // End DecoderNamespace = "SDWA10"
+
+  //===------------------------------ VOP2be ------------------------------===//
+  multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> {
+    def _e32_gfx10 :
+      VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>,
+      VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> {
+        VOP2_Pseudo Ps = !cast<VOP2_Pseudo>(opName#"_e32");
+        let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands);
+      }
+    def _e64_gfx10 :
+      VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>,
+      VOP3be_gfx10<{0, 1, 0, 0, op{5-0}},
+                   !cast<VOP3_Pseudo>(opName#"_e64").Pfl> {
+        VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64");
+        let AsmString = asmName # Ps.AsmOperands;
+      }
+    def _sdwa_gfx10 :
+      VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
+      VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
+        VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
+        let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands);
+        let DecoderNamespace = "SDWA10";
+      }
+    def _dpp_gfx10 :
+      VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+        string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
+        let AsmString = asmName # !subst(", vcc", "", AsmDPP);
+        let DecoderNamespace = "SDWA10";
+      }
+    def _dpp8_gfx10 :
+      VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+        string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
+        let AsmString = asmName # !subst(", vcc", "", AsmDPP8);
+        let DecoderNamespace = "DPP8";
+      }
+
+    let WaveSizePredicate = isWave32 in {
+      def _sdwa_w32_gfx10 :
+        Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
+        VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
+          VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
+          let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands);
+          let isAsmParserOnly = 1;
+          let DecoderNamespace = "SDWA10";
+        }
+      def _dpp_w32_gfx10 :
+        VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+          string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
+          let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP);
+          let isAsmParserOnly = 1;
+        }
+      def _dpp8_w32_gfx10 :
+        VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+          string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
+          let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8);
+          let isAsmParserOnly = 1;
+        }
+    } // End WaveSizePredicate = isWave32
+
+    let WaveSizePredicate = isWave64 in {
+      def _sdwa_w64_gfx10 :
+        Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
+        VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
+          VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
+          let AsmString = asmName # Ps.AsmOperands;
+          let isAsmParserOnly = 1;
+          let DecoderNamespace = "SDWA10";
+        }
+      def _dpp_w64_gfx10 :
+        VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+          string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
+          let AsmString = asmName # AsmDPP;
+          let isAsmParserOnly = 1;
+        }
+      def _dpp8_w64_gfx10 :
+        VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+          string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
+          let AsmString = asmName # AsmDPP8;
+          let isAsmParserOnly = 1;
+        }
+    } // End WaveSizePredicate = isWave64
+  }
 
+  //===----------------------------- VOP3Only -----------------------------===//
+  multiclass VOP3Only_Real_gfx10<bits<10> op> {
+    def _e64_gfx10 :
+      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
+      VOP3e_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+  }
+
+  //===---------------------------- VOP3beOnly ----------------------------===//
+  multiclass VOP3beOnly_Real_gfx10<bits<10> op, string opName, string asmName> {
+    def _e64_gfx10 :
+      VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>,
+      VOP3be_gfx10<op, !cast<VOP3_Pseudo>(opName#"_e64").Pfl> {
+        VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64");
+        let AsmString = asmName # Ps.AsmOperands;
+      }
+  }
+} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+
+multiclass Base_VOP2_Real_gfx10<bits<6> op> :
+  VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>;
+
+multiclass VOP2_Real_gfx10<bits<6> op> :
+  VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>,
+  VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>;
+
+multiclass VOP2_Real_gfx10_with_name<bits<6> op, string opName,
+                                     string asmName> :
+  VOP2_Real_e32_gfx10_with_name<op, opName, asmName>,
+  VOP2_Real_e64_gfx10_with_name<op, opName, asmName>,
+  VOP2_Real_sdwa_gfx10_with_name<op, opName, asmName>,
+  VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>,
+  VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>;
+
+defm V_CNDMASK_B32   : Base_VOP2_Real_gfx10<0x001>;
+defm V_XNOR_B32      : VOP2_Real_gfx10<0x01e>;
+defm V_FMAC_F32      : VOP2_Real_gfx10<0x02b>;
+defm V_FMAMK_F32     : VOP2Only_Real_MADK_gfx10<0x02c>;
+defm V_FMAAK_F32     : VOP2Only_Real_MADK_gfx10<0x02d>;
+defm V_ADD_F16       : VOP2_Real_gfx10<0x032>;
+defm V_SUB_F16       : VOP2_Real_gfx10<0x033>;
+defm V_SUBREV_F16    : VOP2_Real_gfx10<0x034>;
+defm V_MUL_F16       : VOP2_Real_gfx10<0x035>;
+defm V_FMAC_F16      : VOP2_Real_gfx10<0x036>;
+defm V_FMAMK_F16     : VOP2Only_Real_MADK_gfx10<0x037>;
+defm V_FMAAK_F16     : VOP2Only_Real_MADK_gfx10<0x038>;
+defm V_MAX_F16       : VOP2_Real_gfx10<0x039>;
+defm V_MIN_F16       : VOP2_Real_gfx10<0x03a>;
+defm V_LDEXP_F16     : VOP2_Real_gfx10<0x03b>;
+defm V_PK_FMAC_F16   : VOP2_Real_e32_gfx10<0x03c>;
+
+// VOP2 no carry-in, carry-out.
+defm V_ADD_NC_U32 :
+  VOP2_Real_gfx10_with_name<0x025, "V_ADD_U32", "v_add_nc_u32">;
+defm V_SUB_NC_U32 :
+  VOP2_Real_gfx10_with_name<0x026, "V_SUB_U32", "v_sub_nc_u32">;
+defm V_SUBREV_NC_U32 :
+  VOP2_Real_gfx10_with_name<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">;
+
+// VOP2 carry-in, carry-out.
+defm V_ADD_CO_CI_U32 :
+  VOP2be_Real_gfx10<0x028, "V_ADDC_U32", "v_add_co_ci_u32">;
+defm V_SUB_CO_CI_U32 :
+  VOP2be_Real_gfx10<0x029, "V_SUBB_U32", "v_sub_co_ci_u32">;
+defm V_SUBREV_CO_CI_U32 :
+  VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">;
+
+// VOP3 only.
+defm V_BFM_B32            : VOP3Only_Real_gfx10<0x363>;
+defm V_BCNT_U32_B32       : VOP3Only_Real_gfx10<0x364>;
+defm V_MBCNT_LO_U32_B32   : VOP3Only_Real_gfx10<0x365>;
+defm V_MBCNT_HI_U32_B32   : VOP3Only_Real_gfx10<0x366>;
+defm V_LDEXP_F32          : VOP3Only_Real_gfx10<0x362>;
+defm V_CVT_PKNORM_I16_F32 : VOP3Only_Real_gfx10<0x368>;
+defm V_CVT_PKNORM_U16_F32 : VOP3Only_Real_gfx10<0x369>;
+defm V_CVT_PK_U16_U32     : VOP3Only_Real_gfx10<0x36a>;
+defm V_CVT_PK_I16_I32     : VOP3Only_Real_gfx10<0x36b>;
+
+// VOP3 carry-in, carry-out.
+defm V_ADD_CO_U32 :
+  VOP3beOnly_Real_gfx10<0x30f, "V_ADD_I32", "v_add_co_u32">;
+defm V_SUB_CO_U32 :
+  VOP3beOnly_Real_gfx10<0x310, "V_SUB_I32", "v_sub_co_u32">;
+defm V_SUBREV_CO_U32 :
+  VOP3beOnly_Real_gfx10<0x319, "V_SUBREV_I32", "v_subrev_co_u32">;
+
+let SubtargetPredicate = isGFX10Plus in {
+  defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx10>;
+
+  defm : VOP2bInstAliases<
+    V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx10, "v_add_co_ci_u32">;
+  defm : VOP2bInstAliases<
+    V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx10, "v_sub_co_ci_u32">;
+  defm : VOP2bInstAliases<
+    V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx10, "v_subrev_co_ci_u32">;
+} // End SubtargetPredicate = isGFX10Plus
 
 //===----------------------------------------------------------------------===//
-// VI
+// GFX6, GFX7, GFX10.
 //===----------------------------------------------------------------------===//
 
 class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
@@ -809,7 +1181,111 @@ class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
   let Inst{31}    = 0x0; //encoding
 }
 
-let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
+let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
+  multiclass VOP2Only_Real_gfx6_gfx7<bits<6> op> {
+    def _gfx6_gfx7 :
+      VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
+      VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
+  }
+  multiclass VOP2Only_Real_MADK_gfx6_gfx7<bits<6> op> {
+    def _gfx6_gfx7 :
+      VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
+      VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
+  }
+  multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op> {
+    def _e32_gfx6_gfx7 :
+      VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
+      VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
+  }
+  multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op> {
+    def _e64_gfx6_gfx7 :
+      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+      VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+  }
+  multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op> {
+    def _e64_gfx6_gfx7 :
+      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+      VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+  }
+} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
+
+multiclass VOP2Only_Real_MADK_gfx6_gfx7_gfx10<bits<6> op> :
+  VOP2Only_Real_MADK_gfx6_gfx7<op>, VOP2Only_Real_MADK_gfx10<op>;
+
+multiclass VOP2_Real_gfx6_gfx7<bits<6> op> :
+  VOP2_Real_e32_gfx6_gfx7<op>, VOP2_Real_e64_gfx6_gfx7<op>;
+
+multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> :
+  VOP2_Real_gfx6_gfx7<op>, VOP2_Real_gfx10<op>;
+
+multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> :
+  VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>;
+
+defm V_CNDMASK_B32        : VOP2_Real_gfx6_gfx7<0x000>;
+defm V_MIN_LEGACY_F32     : VOP2_Real_gfx6_gfx7<0x00d>;
+defm V_MAX_LEGACY_F32     : VOP2_Real_gfx6_gfx7<0x00e>;
+defm V_LSHR_B32           : VOP2_Real_gfx6_gfx7<0x015>;
+defm V_ASHR_I32           : VOP2_Real_gfx6_gfx7<0x017>;
+defm V_LSHL_B32           : VOP2_Real_gfx6_gfx7<0x019>;
+defm V_BFM_B32            : VOP2_Real_gfx6_gfx7<0x01e>;
+defm V_BCNT_U32_B32       : VOP2_Real_gfx6_gfx7<0x022>;
+defm V_MBCNT_LO_U32_B32   : VOP2_Real_gfx6_gfx7<0x023>;
+defm V_MBCNT_HI_U32_B32   : VOP2_Real_gfx6_gfx7<0x024>;
+defm V_LDEXP_F32          : VOP2_Real_gfx6_gfx7<0x02b>;
+defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_gfx6_gfx7<0x02c>;
+defm V_CVT_PKNORM_I16_F32 : VOP2_Real_gfx6_gfx7<0x02d>;
+defm V_CVT_PKNORM_U16_F32 : VOP2_Real_gfx6_gfx7<0x02e>;
+defm V_CVT_PK_U16_U32     : VOP2_Real_gfx6_gfx7<0x030>;
+defm V_CVT_PK_I16_I32     : VOP2_Real_gfx6_gfx7<0x031>;
+defm V_ADD_I32            : VOP2be_Real_gfx6_gfx7<0x025>;
+defm V_SUB_I32            : VOP2be_Real_gfx6_gfx7<0x026>;
+defm V_SUBREV_I32         : VOP2be_Real_gfx6_gfx7<0x027>;
+defm V_ADDC_U32           : VOP2be_Real_gfx6_gfx7<0x028>;
+defm V_SUBB_U32           : VOP2be_Real_gfx6_gfx7<0x029>;
+defm V_SUBBREV_U32        : VOP2be_Real_gfx6_gfx7<0x02a>;
+
+defm V_READLANE_B32 : VOP2Only_Real_gfx6_gfx7<0x001>;
+
+let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in {
+  defm V_WRITELANE_B32 : VOP2Only_Real_gfx6_gfx7<0x002>;
+} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in)
+
+let SubtargetPredicate = isGFX6GFX7 in {
+  defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>;
+} // End SubtargetPredicate = isGFX6GFX7
+
+defm V_ADD_F32            : VOP2_Real_gfx6_gfx7_gfx10<0x003>;
+defm V_SUB_F32            : VOP2_Real_gfx6_gfx7_gfx10<0x004>;
+defm V_SUBREV_F32         : VOP2_Real_gfx6_gfx7_gfx10<0x005>;
+defm V_MAC_LEGACY_F32     : VOP2_Real_gfx6_gfx7_gfx10<0x006>;
+defm V_MUL_LEGACY_F32     : VOP2_Real_gfx6_gfx7_gfx10<0x007>;
+defm V_MUL_F32            : VOP2_Real_gfx6_gfx7_gfx10<0x008>;
+defm V_MUL_I32_I24        : VOP2_Real_gfx6_gfx7_gfx10<0x009>;
+defm V_MUL_HI_I32_I24     : VOP2_Real_gfx6_gfx7_gfx10<0x00a>;
+defm V_MUL_U32_U24        : VOP2_Real_gfx6_gfx7_gfx10<0x00b>;
+defm V_MUL_HI_U32_U24     : VOP2_Real_gfx6_gfx7_gfx10<0x00c>;
+defm V_MIN_F32            : VOP2_Real_gfx6_gfx7_gfx10<0x00f>;
+defm V_MAX_F32            : VOP2_Real_gfx6_gfx7_gfx10<0x010>;
+defm V_MIN_I32            : VOP2_Real_gfx6_gfx7_gfx10<0x011>;
+defm V_MAX_I32            : VOP2_Real_gfx6_gfx7_gfx10<0x012>;
+defm V_MIN_U32            : VOP2_Real_gfx6_gfx7_gfx10<0x013>;
+defm V_MAX_U32            : VOP2_Real_gfx6_gfx7_gfx10<0x014>;
+defm V_LSHRREV_B32        : VOP2_Real_gfx6_gfx7_gfx10<0x016>;
+defm V_ASHRREV_I32        : VOP2_Real_gfx6_gfx7_gfx10<0x018>;
+defm V_LSHLREV_B32        : VOP2_Real_gfx6_gfx7_gfx10<0x01a>;
+defm V_AND_B32            : VOP2_Real_gfx6_gfx7_gfx10<0x01b>;
+defm V_OR_B32             : VOP2_Real_gfx6_gfx7_gfx10<0x01c>;
+defm V_XOR_B32            : VOP2_Real_gfx6_gfx7_gfx10<0x01d>;
+defm V_MAC_F32            : VOP2_Real_gfx6_gfx7_gfx10<0x01f>;
+defm V_CVT_PKRTZ_F16_F32  : VOP2_Real_gfx6_gfx7_gfx10<0x02f>;
+defm V_MADMK_F32          : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x020>;
+defm V_MADAK_F32          : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x021>;
+
+//===----------------------------------------------------------------------===//
+// GFX8, GFX9 (VI).
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" in {
 
 multiclass VOP2_Real_MADK_vi <bits<6> op> {
   def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,
@@ -843,7 +1319,7 @@ multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> :
   VOP2_Real_e32_vi<op>,
   VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>;
 
-} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
+} // End AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8"
 
 multiclass VOP2_SDWA_Real <bits<6> op> {
   def _sdwa_vi :
@@ -857,7 +1333,7 @@ multiclass VOP2_SDWA9_Real <bits<6> op> {
     VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
 }
 
-let AssemblerPredicates = [isVIOnly] in {
+let AssemblerPredicates = [isGFX8Only] in {
 
 multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName> {
   def _e32_vi :
@@ -865,14 +1341,14 @@ multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName
     VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> {
       VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32");
       let AsmString = AsmName # ps.AsmOperands;
-      let DecoderNamespace = "VI";
+      let DecoderNamespace = "GFX8";
     }
   def _e64_vi :
     VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.VI>,
     VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> {
       VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64");
       let AsmString = AsmName # ps.AsmOperands;
-      let DecoderNamespace = "VI";
+      let DecoderNamespace = "GFX8";
     }
   def _sdwa_vi :
     VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>,
@@ -890,7 +1366,7 @@ multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName
 }
 }
 
-let AssemblerPredicates = [isGFX9] in {
+let AssemblerPredicates = [isGFX9Only] in {
 
 multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> {
   def _e32_gfx9 :
@@ -946,7 +1422,7 @@ multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
       }
 }
 
-} // AssemblerPredicates = [isGFX9]
+} // AssemblerPredicates = [isGFX9Only]
 
 multiclass VOP2_Real_e32e64_vi <bits<6> op> :
   Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> {
@@ -1035,7 +1511,7 @@ defm V_MIN_U16            : VOP2_Real_e32e64_vi <0x31>;
 defm V_MIN_I16            : VOP2_Real_e32e64_vi <0x32>;
 defm V_LDEXP_F16          : VOP2_Real_e32e64_vi <0x33>;
 
-let SubtargetPredicate = isVI in {
+let SubtargetPredicate = isGFX8GFX9 in {
 
 // Aliases to simplify matching of floating-point instructions that
 // are VOP2 on SI and VOP3 on VI.
@@ -1055,7 +1531,20 @@ def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>;
 def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
 def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
 
-} // End SubtargetPredicate = isVI
+defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_vi>;
+
+} // End SubtargetPredicate = isGFX8GFX9
+
+let SubtargetPredicate = isGFX9Only in {
+
+defm : VOP2bInstAliases<V_ADD_I32_e32,     V_ADD_CO_U32_e32_gfx9,     "v_add_co_u32">;
+defm : VOP2bInstAliases<V_ADDC_U32_e32,    V_ADDC_CO_U32_e32_gfx9,    "v_addc_co_u32">;
+defm : VOP2bInstAliases<V_SUB_I32_e32,     V_SUB_CO_U32_e32_gfx9,     "v_sub_co_u32">;
+defm : VOP2bInstAliases<V_SUBB_U32_e32,    V_SUBB_CO_U32_e32_gfx9,    "v_subb_co_u32">;
+defm : VOP2bInstAliases<V_SUBREV_I32_e32,  V_SUBREV_CO_U32_e32_gfx9,  "v_subrev_co_u32">;
+defm : VOP2bInstAliases<V_SUBBREV_U32_e32, V_SUBBREV_CO_U32_e32_gfx9, "v_subbrev_co_u32">;
+
+} // End SubtargetPredicate = isGFX9Only
 
 let SubtargetPredicate = HasDLInsts in {
 
@@ -1063,3 +1552,35 @@ defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>;
 defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>;
 
 } // End SubtargetPredicate = HasDLInsts
+
+multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : VOP2_Real_e32_vi<op> {
+  def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
+}
+
+multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> :
+  VOP2_Real_e32_gfx10<op>,
+  VOP2_Real_dpp_gfx10<op>,
+  VOP2_Real_dpp8_gfx10<op>;
+
+let SubtargetPredicate = HasDot5Insts in {
+  defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>;
+  // NB: Opcode conflicts with V_DOT8C_I32_I4
+  // This opcode exists in gfx 10.1* only
+  defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>;
+}
+
+let SubtargetPredicate = HasDot6Insts in {
+  defm V_DOT4C_I32_I8  : VOP2_Real_DOT_ACC_gfx9<0x39>;
+  defm V_DOT4C_I32_I8  : VOP2_Real_DOT_ACC_gfx10<0x0d>;
+}
+
+let SubtargetPredicate = HasDot4Insts in {
+  defm V_DOT2C_I32_I16 : VOP2_Real_DOT_ACC_gfx9<0x38>;
+}
+let SubtargetPredicate = HasDot3Insts in {
+  defm V_DOT8C_I32_I4  : VOP2_Real_DOT_ACC_gfx9<0x3a>;
+}
+
+let SubtargetPredicate = HasPkFmacF16Inst in {
+defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>;
+} // End SubtargetPredicate = HasPkFmacF16Inst
diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td
index 4b8c1f208a0e..21dbef9240e1 100644
--- a/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/lib/Target/AMDGPU/VOP3Instructions.td
@@ -1,9 +1,8 @@
 //===-- VOP3Instructions.td - Vector Instruction Defintions ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -111,6 +110,11 @@ class getVOP3ClampPat<VOPProfile P, SDPatternOperator node> {
                   ret1));
 }
 
+class getVOP3MAIPat<VOPProfile P, SDPatternOperator node> {
+  list<dag> ret = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2,
+                                        imm:$cbsz, imm:$abid, imm:$blgp))];
+}
+
 class VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> :
   VOP3_Pseudo<OpName, P,
     !if(P.HasOpSel,
@@ -121,7 +125,9 @@ class VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag,
             getVOP3ModPat<P, node>.ret,
             !if(P.HasIntClamp,
                 getVOP3ClampPat<P, node>.ret,
-                getVOP3Pat<P, node>.ret))),
+                !if (P.IsMAI,
+                    getVOP3MAIPat<P, node>.ret,
+                    getVOP3Pat<P, node>.ret)))),
     VOP3Only, 0, P.HasOpSel> {
 
   let IntClamp = P.HasIntClamp;
@@ -144,33 +150,27 @@ def VOP_F64_F64_F64_F64_VCC : VOPProfile<[f64, f64, f64, f64]> {
 }
 }
 
-class getVOP3VCC<VOPProfile P, SDPatternOperator node> {
-  list<dag> ret =
-    [(set P.DstVT:$vdst,
-      (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
-            (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
-            (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers)),
-            (i1 VCC)))];
-}
-
-class VOP3Features<bit Clamp, bit OpSel, bit Packed> {
+class VOP3Features<bit Clamp, bit OpSel, bit Packed, bit MAI> {
   bit HasClamp = Clamp;
   bit HasOpSel = OpSel;
   bit IsPacked = Packed;
+  bit IsMAI = MAI;
 }
 
-def VOP3_REGULAR : VOP3Features<0, 0, 0>;
-def VOP3_CLAMP   : VOP3Features<1, 0, 0>;
-def VOP3_OPSEL   : VOP3Features<1, 1, 0>;
-def VOP3_PACKED  : VOP3Features<1, 1, 1>;
+def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>;
+def VOP3_CLAMP   : VOP3Features<1, 0, 0, 0>;
+def VOP3_OPSEL   : VOP3Features<1, 1, 0, 0>;
+def VOP3_PACKED  : VOP3Features<1, 1, 1, 0>;
+def VOP3_MAI     : VOP3Features<0, 0, 0, 1>;
 
 class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProfile<P.ArgVT> {
 
   let HasClamp = !if(Features.HasClamp, 1, P.HasClamp);
   let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel);
+  let IsMAI    = !if(Features.IsMAI,    1, P.IsMAI);
   let IsPacked = !if(Features.IsPacked, 1, P.IsPacked);
 
-  let HasModifiers = !if(Features.IsPacked, 1, P.HasModifiers);
+  let HasModifiers = !if(Features.IsPacked, !if(Features.IsMAI, 0, 1), P.HasModifiers);
 
   // FIXME: Hack to stop printing _e64
   let Outs64 = (outs DstRC.RegClass:$vdst);
@@ -191,8 +191,9 @@ class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProf
 class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
   // v_div_scale_{f32|f64} do not support input modifiers.
   let HasModifiers = 0;
+  let HasClamp = 0;
   let HasOMod = 0;
-  let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+  let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
   let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
 }
 
@@ -212,7 +213,7 @@ def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> {
   // FIXME: Hack to stop printing _e64
   let DstRC = RegisterOperand<VReg_64>;
 
-  let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+  let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
   let Asm64 = " $vdst, $sdst, $src0, $src1, $src2$clamp";
 }
 
@@ -303,7 +304,7 @@ def V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum_li
 } // End SchedRW = [WriteDoubleAdd]
 
 let SchedRW = [WriteQuarterRate32] in {
-def V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", VOP3_Profile<VOP_I32_I32_I32>>;
+def V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", VOP3_Profile<VOP_I32_I32_I32>, mul>;
 def V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", VOP3_Profile<VOP_I32_I32_I32>, mulhu>;
 def V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", VOP3_Profile<VOP_I32_I32_I32>>;
 def V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", VOP3_Profile<VOP_I32_I32_I32>, mulhs>;
@@ -315,8 +316,7 @@ let Uses = [VCC, EXEC] in {
 //   if (vcc)
 //     result *= 2^32
 //
-def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC,
-  getVOP3VCC<VOP_F32_F32_F32_F32_VCC, AMDGPUdiv_fmas>.ret> {
+def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC, []> {
   let SchedRW = [WriteFloatFMA];
 }
 // v_div_fmas_f64:
@@ -324,8 +324,7 @@ def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC,
 //   if (vcc)
 //     result *= 2^64
 //
-def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC,
-  getVOP3VCC<VOP_F64_F64_F64_F64_VCC, AMDGPUdiv_fmas>.ret> {
+def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC, []> {
   let SchedRW = [WriteDouble];
   let FPDPRounding = 1;
 }
@@ -386,22 +385,21 @@ def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I3
 }
 
 let SchedRW = [Write64Bit] in {
-// These instructions only exist on SI and CI
-let SubtargetPredicate = isSICI, Predicates = [isSICI] in {
+let SubtargetPredicate = isGFX6GFX7GFX10, Predicates = [isGFX6GFX7GFX10] in {
 def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, shl>;
 def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, srl>;
 def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, sra>;
 def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
-} // End SubtargetPredicate = isSICI, Predicates = [isSICI]
+} // End SubtargetPredicate = isGFX6GFX7GFX10, Predicates = [isGFX6GFX7GFX10]
 
-let SubtargetPredicate = isVI in {
-def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>>;
-def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>>;
-def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>>;
-} // End SubtargetPredicate = isVI
+let SubtargetPredicate = isGFX8Plus in {
+def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>, lshl_rev>;
+def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>, lshr_rev>;
+def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>, ashr_rev>;
+} // End SubtargetPredicate = isGFX8Plus
 } // End SchedRW = [Write64Bit]
 
-let Predicates = [isVI] in {
+let Predicates = [isGFX8Plus] in {
 def : GCNPat <
  (getDivergentFrag<shl>.ret i64:$x, i32:$y),
  (V_LSHLREV_B64 $y, $x)
@@ -417,7 +415,13 @@ def : AMDGPUPat <
 }
 
 
-let SubtargetPredicate = isCIVI in {
+let SchedRW = [Write32Bit] in {
+let SubtargetPredicate = isGFX8Plus in {
+def V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUperm>;
+} // End SubtargetPredicate = isGFX8Plus
+} // End SchedRW = [Write32Bit]
+
+let SubtargetPredicate = isGFX7Plus in {
 
 let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in {
 def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
@@ -431,27 +435,27 @@ def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
 } // End SchedRW = [WriteDouble, WriteSALU]
 } // End isCommutable = 1
 
-} // End SubtargetPredicate = isCIVI
+} // End SubtargetPredicate = isGFX7Plus
 
 
 def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup> {
-  let Predicates = [Has16BitInsts, isVIOnly];
+  let Predicates = [Has16BitInsts, isGFX8Only];
   let FPDPRounding = 1;
 }
 def V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9",
                                       VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUdiv_fixup> {
   let renamedInGFX9 = 1;
-  let Predicates = [Has16BitInsts, isGFX9];
+  let Predicates = [Has16BitInsts, isGFX9Plus];
   let FPDPRounding = 1;
 }
 
 def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma> {
-  let Predicates = [Has16BitInsts, isVIOnly];
+  let Predicates = [Has16BitInsts, isGFX8Only];
   let FPDPRounding = 1;
 }
 def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, fma> {
   let renamedInGFX9 = 1;
-  let Predicates = [Has16BitInsts, isGFX9];
+  let Predicates = [Has16BitInsts, isGFX9Plus];
   let FPDPRounding = 1;
 }
 
@@ -463,36 +467,58 @@ def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CL
 let FPDPRounding = 1 in {
 def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
 let Uses = [M0, EXEC] in {
-def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>>;
+def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>,
+       [(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 imm:$attrchan),
+                                                        (i32 imm:$attr),
+                                                        (i32 imm:$src0_modifiers),
+                                                        (f32 VRegSrc_32:$src2),
+                                                        (i32 imm:$src2_modifiers),
+                                                        (i1 imm:$high),
+                                                        (i1 imm:$clamp)))]>;
 } // End Uses = [M0, EXEC]
 } // End FPDPRounding = 1
 } // End renamedInGFX9 = 1
 
-let SubtargetPredicate = isGFX9 in {
+let SubtargetPredicate = isGFX9Only in {
 def V_MAD_F16_gfx9   : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>> {
   let FPDPRounding = 1;
 }
+} // End SubtargetPredicate = isGFX9Only
+
+let SubtargetPredicate = isGFX9Plus in {
 def V_MAD_U16_gfx9   : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
 def V_MAD_I16_gfx9   : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
 def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>;
-} // End SubtargetPredicate = isGFX9
+} // End SubtargetPredicate = isGFX9Plus
 
 let Uses = [M0, EXEC], FPDPRounding = 1 in {
-def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>>;
-def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>>;
+def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>,
+       [(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 imm:$attrchan),
+                                                          (i32 imm:$attr),
+                                                          (i32 imm:$src0_modifiers),
+                                                          (i1 imm:$high),
+                                                          (i1 imm:$clamp),
+                                                          (i32 imm:$omod)))]>;
+def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>,
+       [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 imm:$attrchan),
+                                                          (i32 imm:$attr),
+                                                          (i32 imm:$src0_modifiers),
+                                                          (f32 VRegSrc_32:$src2),
+                                                          (i32 imm:$src2_modifiers),
+                                                          (i1 imm:$high),
+                                                          (i1 imm:$clamp),
+                                                          (i32 imm:$omod)))]>;
 } // End Uses = [M0, EXEC], FPDPRounding = 1
 
 } // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
 
-let SubtargetPredicate = isVI in {
+let SubtargetPredicate = isGFX8GFX9 in {
 def V_INTERP_P1_F32_e64  : VOP3Interp <"v_interp_p1_f32", VOP3_INTERP>;
 def V_INTERP_P2_F32_e64  : VOP3Interp <"v_interp_p2_f32", VOP3_INTERP>;
 def V_INTERP_MOV_F32_e64 : VOP3Interp <"v_interp_mov_f32", VOP3_INTERP_MOV>;
+} // End SubtargetPredicate = isGFX8GFX9
 
-def V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUperm>;
-} // End SubtargetPredicate = isVI
-
-let Predicates = [Has16BitInsts] in {
+let Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9] in {
 
 multiclass Ternary_i16_Pats <SDPatternOperator op1, SDPatternOperator op2,
                              Instruction inst, SDPatternOperator op3> {
@@ -506,7 +532,23 @@ def : GCNPat <
 defm: Ternary_i16_Pats<mul, add, V_MAD_U16, zext>;
 defm: Ternary_i16_Pats<mul, add, V_MAD_I16, sext>;
 
-} // End Predicates = [Has16BitInsts]
+} // End Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9]
+
+let Predicates = [Has16BitInsts, isGFX10Plus] in {
+
+multiclass Ternary_i16_Pats_gfx9<SDPatternOperator op1, SDPatternOperator op2,
+                                 Instruction inst, SDPatternOperator op3> {
+def : GCNPat <
+  (op2 (op1 i16:$src0, i16:$src1), i16:$src2),
+  (inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
+>;
+
+}
+
+defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9, zext>;
+defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_I16_gfx9, sext>;
+
+} // End Predicates = [Has16BitInsts, isGFX10Plus]
 
 class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
   (ops node:$x, node:$y, node:$z),
@@ -528,7 +570,9 @@ class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
       if (!Operands[i]->isDivergent() &&
           !isInlineImmediate(Operands[i].getNode())) {
         ConstantBusUses++;
-        if (ConstantBusUses >= 2)
+        // This uses AMDGPU::V_ADD3_U32, but all three operand instructions
+        // have the same constant bus limit.
+        if (ConstantBusUses > Subtarget->getConstantBusLimit(AMDGPU::V_ADD3_U32))
           return false;
       }
     }
@@ -539,7 +583,7 @@ class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
   let PredicateCodeUsesOperands = 1;
 }
 
-let SubtargetPredicate = isGFX9 in {
+let SubtargetPredicate = isGFX9Plus in {
 def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
 def V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
 def V_ADD_LSHL_U32 : VOP3Inst <"v_add_lshl_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -589,7 +633,38 @@ def : ThreeOp_i32_Pats<and, or, V_AND_OR_B32>;
 def : ThreeOp_i32_Pats<or, or, V_OR3_B32>;
 def : ThreeOp_i32_Pats<xor, add, V_XAD_U32>;
 
-} // End SubtargetPredicate = isGFX9
+} // End SubtargetPredicate = isGFX9Plus
+
+def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3_OPSEL> {
+  let Src0RC64 = VRegSrc_32;
+  let Src1RC64 = SCSrc_b32;
+  let Src2RC64 = SCSrc_b32;
+  let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0,
+                          IntOpSelMods:$src1_modifiers, SCSrc_b32:$src1,
+                          IntOpSelMods:$src2_modifiers, SCSrc_b32:$src2,
+                          VGPR_32:$vdst_in, op_sel:$op_sel);
+  let HasClamp = 0;
+  let HasOMod = 0;
+}
+
+let SubtargetPredicate = isGFX10Plus in {
+  def V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+  def : ThreeOp_i32_Pats<xor, xor, V_XOR3_B32>;
+
+  let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
+    def V_PERMLANE16_B32 : VOP3Inst <"v_permlane16_b32", VOP3_PERMLANE_Profile>;
+    def V_PERMLANEX16_B32 : VOP3Inst <"v_permlanex16_b32", VOP3_PERMLANE_Profile>;
+  } // End $vdst = $vdst_in, DisableEncoding $vdst_in
+
+  def : GCNPat<
+    (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc),
+    (V_PERMLANE16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
+  >;
+  def : GCNPat<
+    (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc),
+    (V_PERMLANEX16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
+  >;
+} // End SubtargetPredicate = isGFX10Plus
 
 //===----------------------------------------------------------------------===//
 // Integer Clamp Patterns
@@ -631,111 +706,239 @@ def : IntClampPat<V_MQSAD_PK_U16_U8, int_amdgcn_mqsad_pk_u16_u8>;
 def : IntClampPat<V_QSAD_PK_U16_U8, int_amdgcn_qsad_pk_u16_u8>;
 def : IntClampPat<V_MQSAD_U32_U8, int_amdgcn_mqsad_u32_u8>;
 
+
 //===----------------------------------------------------------------------===//
-// Target
+// Target-specific instruction encodings.
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-// SI
+// GFX10.
 //===----------------------------------------------------------------------===//
 
-let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
-
-multiclass VOP3_Real_si<bits<9> op> {
-  def _si : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
-            VOP3e_si <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
-}
-
-multiclass VOP3be_Real_si<bits<9> op> {
-  def _si : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
-            VOP3be_si <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
-}
-
-} // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI"
-
-defm V_MAD_LEGACY_F32   : VOP3_Real_si <0x140>;
-defm V_MAD_F32          : VOP3_Real_si <0x141>;
-defm V_MAD_I32_I24      : VOP3_Real_si <0x142>;
-defm V_MAD_U32_U24      : VOP3_Real_si <0x143>;
-defm V_CUBEID_F32       : VOP3_Real_si <0x144>;
-defm V_CUBESC_F32       : VOP3_Real_si <0x145>;
-defm V_CUBETC_F32       : VOP3_Real_si <0x146>;
-defm V_CUBEMA_F32       : VOP3_Real_si <0x147>;
-defm V_BFE_U32          : VOP3_Real_si <0x148>;
-defm V_BFE_I32          : VOP3_Real_si <0x149>;
-defm V_BFI_B32          : VOP3_Real_si <0x14a>;
-defm V_FMA_F32          : VOP3_Real_si <0x14b>;
-defm V_FMA_F64          : VOP3_Real_si <0x14c>;
-defm V_LERP_U8          : VOP3_Real_si <0x14d>;
-defm V_ALIGNBIT_B32     : VOP3_Real_si <0x14e>;
-defm V_ALIGNBYTE_B32    : VOP3_Real_si <0x14f>;
-defm V_MULLIT_F32       : VOP3_Real_si <0x150>;
-defm V_MIN3_F32         : VOP3_Real_si <0x151>;
-defm V_MIN3_I32         : VOP3_Real_si <0x152>;
-defm V_MIN3_U32         : VOP3_Real_si <0x153>;
-defm V_MAX3_F32         : VOP3_Real_si <0x154>;
-defm V_MAX3_I32         : VOP3_Real_si <0x155>;
-defm V_MAX3_U32         : VOP3_Real_si <0x156>;
-defm V_MED3_F32         : VOP3_Real_si <0x157>;
-defm V_MED3_I32         : VOP3_Real_si <0x158>;
-defm V_MED3_U32         : VOP3_Real_si <0x159>;
-defm V_SAD_U8           : VOP3_Real_si <0x15a>;
-defm V_SAD_HI_U8        : VOP3_Real_si <0x15b>;
-defm V_SAD_U16          : VOP3_Real_si <0x15c>;
-defm V_SAD_U32          : VOP3_Real_si <0x15d>;
-defm V_CVT_PK_U8_F32    : VOP3_Real_si <0x15e>;
-defm V_DIV_FIXUP_F32    : VOP3_Real_si <0x15f>;
-defm V_DIV_FIXUP_F64    : VOP3_Real_si <0x160>;
-defm V_LSHL_B64         : VOP3_Real_si <0x161>;
-defm V_LSHR_B64         : VOP3_Real_si <0x162>;
-defm V_ASHR_I64         : VOP3_Real_si <0x163>;
-defm V_ADD_F64          : VOP3_Real_si <0x164>;
-defm V_MUL_F64          : VOP3_Real_si <0x165>;
-defm V_MIN_F64          : VOP3_Real_si <0x166>;
-defm V_MAX_F64          : VOP3_Real_si <0x167>;
-defm V_LDEXP_F64        : VOP3_Real_si <0x168>;
-defm V_MUL_LO_U32       : VOP3_Real_si <0x169>;
-defm V_MUL_HI_U32       : VOP3_Real_si <0x16a>;
-defm V_MUL_LO_I32       : VOP3_Real_si <0x16b>;
-defm V_MUL_HI_I32       : VOP3_Real_si <0x16c>;
-defm V_DIV_SCALE_F32    : VOP3be_Real_si <0x16d>;
-defm V_DIV_SCALE_F64    : VOP3be_Real_si <0x16e>;
-defm V_DIV_FMAS_F32     : VOP3_Real_si <0x16f>;
-defm V_DIV_FMAS_F64     : VOP3_Real_si <0x170>;
-defm V_MSAD_U8          : VOP3_Real_si <0x171>;
-defm V_MQSAD_PK_U16_U8  : VOP3_Real_si <0x173>;
-defm V_TRIG_PREOP_F64   : VOP3_Real_si <0x174>;
+let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+  multiclass VOP3_Real_gfx10<bits<10> op> {
+    def _gfx10 :
+      VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.GFX10>,
+      VOP3e_gfx10<op, !cast<VOP_Pseudo>(NAME).Pfl>;
+  }
+  multiclass VOP3_Real_gfx10_with_name<bits<10> op, string opName,
+                                       string asmName> {
+    def _gfx10 :
+      VOP3_Real<!cast<VOP3_Pseudo>(opName), SIEncodingFamily.GFX10>,
+      VOP3e_gfx10<op, !cast<VOP3_Pseudo>(opName).Pfl> {
+        VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName);
+        let AsmString = asmName # ps.AsmOperands;
+      }
+  }
+  multiclass VOP3be_Real_gfx10<bits<10> op> {
+    def _gfx10 :
+      VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX10>,
+      VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+  }
+  multiclass VOP3Interp_Real_gfx10<bits<10> op> {
+    def _gfx10 :
+      VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX10>,
+      VOP3Interp_gfx10<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+  }
+  multiclass VOP3OpSel_Real_gfx10<bits<10> op> {
+    def _gfx10 :
+      VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX10>,
+      VOP3OpSel_gfx10<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+  }
+  multiclass VOP3OpSel_Real_gfx10_with_name<bits<10> op, string opName,
+                                            string asmName> {
+    def _gfx10 :
+      VOP3_Real<!cast<VOP3_Pseudo>(opName), SIEncodingFamily.GFX10>,
+      VOP3OpSel_gfx10<op, !cast<VOP3_Pseudo>(opName).Pfl> {
+        VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName);
+        let AsmString = asmName # ps.AsmOperands;
+      }
+  }
+} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+
+defm V_READLANE_B32  : VOP3_Real_gfx10<0x360>;
+
+let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in {
+  defm V_WRITELANE_B32 : VOP3_Real_gfx10<0x361>;
+} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in)
+
+defm V_XOR3_B32           : VOP3_Real_gfx10<0x178>;
+defm V_LSHLREV_B64        : VOP3_Real_gfx10<0x2ff>;
+defm V_LSHRREV_B64        : VOP3_Real_gfx10<0x300>;
+defm V_ASHRREV_I64        : VOP3_Real_gfx10<0x301>;
+defm V_PERM_B32           : VOP3_Real_gfx10<0x344>;
+defm V_XAD_U32            : VOP3_Real_gfx10<0x345>;
+defm V_LSHL_ADD_U32       : VOP3_Real_gfx10<0x346>;
+defm V_ADD_LSHL_U32       : VOP3_Real_gfx10<0x347>;
+defm V_ADD3_U32           : VOP3_Real_gfx10<0x36d>;
+defm V_LSHL_OR_B32        : VOP3_Real_gfx10<0x36f>;
+defm V_AND_OR_B32         : VOP3_Real_gfx10<0x371>;
+defm V_OR3_B32            : VOP3_Real_gfx10<0x372>;
+
+// TODO-GFX10: add MC tests for v_add/sub_nc_i16
+defm V_ADD_NC_I16 :
+  VOP3OpSel_Real_gfx10_with_name<0x30d, "V_ADD_I16", "v_add_nc_i16">;
+defm V_SUB_NC_I16 :
+  VOP3OpSel_Real_gfx10_with_name<0x30e, "V_SUB_I16", "v_sub_nc_i16">;
+defm V_SUB_NC_I32 :
+  VOP3_Real_gfx10_with_name<0x376, "V_SUB_I32_gfx9", "v_sub_nc_i32">;
+defm V_ADD_NC_I32 :
+  VOP3_Real_gfx10_with_name<0x37f, "V_ADD_I32_gfx9", "v_add_nc_i32">;
+
+defm V_INTERP_P1LL_F16    : VOP3Interp_Real_gfx10<0x342>;
+defm V_INTERP_P1LV_F16    : VOP3Interp_Real_gfx10<0x343>;
+defm V_INTERP_P2_F16      : VOP3Interp_Real_gfx10<0x35a>;
+
+defm V_PACK_B32_F16       : VOP3OpSel_Real_gfx10<0x311>;
+defm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx10<0x312>;
+defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx10<0x313>;
+
+defm V_MIN3_F16           : VOP3OpSel_Real_gfx10<0x351>;
+defm V_MIN3_I16           : VOP3OpSel_Real_gfx10<0x352>;
+defm V_MIN3_U16           : VOP3OpSel_Real_gfx10<0x353>;
+defm V_MAX3_F16           : VOP3OpSel_Real_gfx10<0x354>;
+defm V_MAX3_I16           : VOP3OpSel_Real_gfx10<0x355>;
+defm V_MAX3_U16           : VOP3OpSel_Real_gfx10<0x356>;
+defm V_MED3_F16           : VOP3OpSel_Real_gfx10<0x357>;
+defm V_MED3_I16           : VOP3OpSel_Real_gfx10<0x358>;
+defm V_MED3_U16           : VOP3OpSel_Real_gfx10<0x359>;
+defm V_MAD_U32_U16        : VOP3OpSel_Real_gfx10<0x373>;
+defm V_MAD_I32_I16        : VOP3OpSel_Real_gfx10<0x375>;
+
+defm V_MAD_U16 :
+  VOP3OpSel_Real_gfx10_with_name<0x340, "V_MAD_U16_gfx9", "v_mad_u16">;
+defm V_FMA_F16 :
+  VOP3OpSel_Real_gfx10_with_name<0x34b, "V_FMA_F16_gfx9", "v_fma_f16">;
+defm V_MAD_I16 :
+  VOP3OpSel_Real_gfx10_with_name<0x35e, "V_MAD_I16_gfx9", "v_mad_i16">;
+defm V_DIV_FIXUP_F16 :
+  VOP3OpSel_Real_gfx10_with_name<0x35f, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">;
+
+// FIXME-GFX10-OPSEL: Need to add "selective" opsel support to some of these
+// (they do not support SDWA or DPP).
+defm V_ADD_NC_U16      : VOP3_Real_gfx10_with_name<0x303, "V_ADD_U16_e64", "v_add_nc_u16">;
+defm V_SUB_NC_U16      : VOP3_Real_gfx10_with_name<0x304, "V_SUB_U16_e64", "v_sub_nc_u16">;
+defm V_MUL_LO_U16      : VOP3_Real_gfx10_with_name<0x305, "V_MUL_LO_U16_e64", "v_mul_lo_u16">;
+defm V_LSHRREV_B16     : VOP3_Real_gfx10_with_name<0x307, "V_LSHRREV_B16_e64", "v_lshrrev_b16">;
+defm V_ASHRREV_I16     : VOP3_Real_gfx10_with_name<0x308, "V_ASHRREV_I16_e64", "v_ashrrev_i16">;
+defm V_MAX_U16         : VOP3_Real_gfx10_with_name<0x309, "V_MAX_U16_e64", "v_max_u16">;
+defm V_MAX_I16         : VOP3_Real_gfx10_with_name<0x30a, "V_MAX_I16_e64", "v_max_i16">;
+defm V_MIN_U16         : VOP3_Real_gfx10_with_name<0x30b, "V_MIN_U16_e64", "v_min_u16">;
+defm V_MIN_I16         : VOP3_Real_gfx10_with_name<0x30c, "V_MIN_I16_e64", "v_min_i16">;
+defm V_LSHLREV_B16     : VOP3_Real_gfx10_with_name<0x314, "V_LSHLREV_B16_e64", "v_lshlrev_b16">;
+defm V_PERMLANE16_B32  : VOP3OpSel_Real_gfx10<0x377>;
+defm V_PERMLANEX16_B32 : VOP3OpSel_Real_gfx10<0x378>;
 
 //===----------------------------------------------------------------------===//
-// CI
+// GFX7, GFX10.
 //===----------------------------------------------------------------------===//
 
-multiclass VOP3_Real_ci<bits<9> op> {
-  def _ci : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
-            VOP3e_si <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
-    let AssemblerPredicates = [isCIOnly];
-    let DecoderNamespace = "CI";
+let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
+  multiclass VOP3_Real_gfx7<bits<10> op> {
+    def _gfx7 :
+      VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
+      VOP3e_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME).Pfl>;
   }
-}
-
-multiclass VOP3be_Real_ci<bits<9> op> {
-  def _ci : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
-            VOP3be_si <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
-    let AssemblerPredicates = [isCIOnly];
-    let DecoderNamespace = "CI";
+  multiclass VOP3be_Real_gfx7<bits<10> op> {
+    def _gfx7 :
+      VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
+      VOP3be_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME).Pfl>;
   }
-}
+} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
+
+multiclass VOP3_Real_gfx7_gfx10<bits<10> op> :
+  VOP3_Real_gfx7<op>, VOP3_Real_gfx10<op>;
+
+multiclass VOP3be_Real_gfx7_gfx10<bits<10> op> :
+  VOP3be_Real_gfx7<op>, VOP3be_Real_gfx10<op>;
+
+defm V_QSAD_PK_U16_U8   : VOP3_Real_gfx7_gfx10<0x172>;
+defm V_MQSAD_U32_U8     : VOP3_Real_gfx7_gfx10<0x175>;
+defm V_MAD_U64_U32      : VOP3be_Real_gfx7_gfx10<0x176>;
+defm V_MAD_I64_I32      : VOP3be_Real_gfx7_gfx10<0x177>;
 
-defm V_QSAD_PK_U16_U8   : VOP3_Real_ci <0x172>;
-defm V_MQSAD_U32_U8     : VOP3_Real_ci <0x175>;
-defm V_MAD_U64_U32      : VOP3be_Real_ci <0x176>;
-defm V_MAD_I64_I32      : VOP3be_Real_ci <0x177>;
+//===----------------------------------------------------------------------===//
+// GFX6, GFX7, GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
+  multiclass VOP3_Real_gfx6_gfx7<bits<10> op> {
+    def _gfx6_gfx7 :
+      VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
+      VOP3e_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME).Pfl>;
+  }
+  multiclass VOP3be_Real_gfx6_gfx7<bits<10> op> {
+    def _gfx6_gfx7 :
+      VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
+      VOP3be_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME).Pfl>;
+  }
+} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
+
+multiclass VOP3_Real_gfx6_gfx7_gfx10<bits<10> op> :
+  VOP3_Real_gfx6_gfx7<op>, VOP3_Real_gfx10<op>;
+
+multiclass VOP3be_Real_gfx6_gfx7_gfx10<bits<10> op> :
+  VOP3be_Real_gfx6_gfx7<op>, VOP3be_Real_gfx10<op>;
+
+defm V_LSHL_B64        : VOP3_Real_gfx6_gfx7<0x161>;
+defm V_LSHR_B64        : VOP3_Real_gfx6_gfx7<0x162>;
+defm V_ASHR_I64        : VOP3_Real_gfx6_gfx7<0x163>;
+
+defm V_MAD_LEGACY_F32  : VOP3_Real_gfx6_gfx7_gfx10<0x140>;
+defm V_MAD_F32         : VOP3_Real_gfx6_gfx7_gfx10<0x141>;
+defm V_MAD_I32_I24     : VOP3_Real_gfx6_gfx7_gfx10<0x142>;
+defm V_MAD_U32_U24     : VOP3_Real_gfx6_gfx7_gfx10<0x143>;
+defm V_CUBEID_F32      : VOP3_Real_gfx6_gfx7_gfx10<0x144>;
+defm V_CUBESC_F32      : VOP3_Real_gfx6_gfx7_gfx10<0x145>;
+defm V_CUBETC_F32      : VOP3_Real_gfx6_gfx7_gfx10<0x146>;
+defm V_CUBEMA_F32      : VOP3_Real_gfx6_gfx7_gfx10<0x147>;
+defm V_BFE_U32         : VOP3_Real_gfx6_gfx7_gfx10<0x148>;
+defm V_BFE_I32         : VOP3_Real_gfx6_gfx7_gfx10<0x149>;
+defm V_BFI_B32         : VOP3_Real_gfx6_gfx7_gfx10<0x14a>;
+defm V_FMA_F32         : VOP3_Real_gfx6_gfx7_gfx10<0x14b>;
+defm V_FMA_F64         : VOP3_Real_gfx6_gfx7_gfx10<0x14c>;
+defm V_LERP_U8         : VOP3_Real_gfx6_gfx7_gfx10<0x14d>;
+defm V_ALIGNBIT_B32    : VOP3_Real_gfx6_gfx7_gfx10<0x14e>;
+defm V_ALIGNBYTE_B32   : VOP3_Real_gfx6_gfx7_gfx10<0x14f>;
+defm V_MULLIT_F32      : VOP3_Real_gfx6_gfx7_gfx10<0x150>;
+defm V_MIN3_F32        : VOP3_Real_gfx6_gfx7_gfx10<0x151>;
+defm V_MIN3_I32        : VOP3_Real_gfx6_gfx7_gfx10<0x152>;
+defm V_MIN3_U32        : VOP3_Real_gfx6_gfx7_gfx10<0x153>;
+defm V_MAX3_F32        : VOP3_Real_gfx6_gfx7_gfx10<0x154>;
+defm V_MAX3_I32        : VOP3_Real_gfx6_gfx7_gfx10<0x155>;
+defm V_MAX3_U32        : VOP3_Real_gfx6_gfx7_gfx10<0x156>;
+defm V_MED3_F32        : VOP3_Real_gfx6_gfx7_gfx10<0x157>;
+defm V_MED3_I32        : VOP3_Real_gfx6_gfx7_gfx10<0x158>;
+defm V_MED3_U32        : VOP3_Real_gfx6_gfx7_gfx10<0x159>;
+defm V_SAD_U8          : VOP3_Real_gfx6_gfx7_gfx10<0x15a>;
+defm V_SAD_HI_U8       : VOP3_Real_gfx6_gfx7_gfx10<0x15b>;
+defm V_SAD_U16         : VOP3_Real_gfx6_gfx7_gfx10<0x15c>;
+defm V_SAD_U32         : VOP3_Real_gfx6_gfx7_gfx10<0x15d>;
+defm V_CVT_PK_U8_F32   : VOP3_Real_gfx6_gfx7_gfx10<0x15e>;
+defm V_DIV_FIXUP_F32   : VOP3_Real_gfx6_gfx7_gfx10<0x15f>;
+defm V_DIV_FIXUP_F64   : VOP3_Real_gfx6_gfx7_gfx10<0x160>;
+defm V_ADD_F64         : VOP3_Real_gfx6_gfx7_gfx10<0x164>;
+defm V_MUL_F64         : VOP3_Real_gfx6_gfx7_gfx10<0x165>;
+defm V_MIN_F64         : VOP3_Real_gfx6_gfx7_gfx10<0x166>;
+defm V_MAX_F64         : VOP3_Real_gfx6_gfx7_gfx10<0x167>;
+defm V_LDEXP_F64       : VOP3_Real_gfx6_gfx7_gfx10<0x168>;
+defm V_MUL_LO_U32      : VOP3_Real_gfx6_gfx7_gfx10<0x169>;
+defm V_MUL_HI_U32      : VOP3_Real_gfx6_gfx7_gfx10<0x16a>;
+defm V_MUL_LO_I32      : VOP3_Real_gfx6_gfx7_gfx10<0x16b>;
+defm V_MUL_HI_I32      : VOP3_Real_gfx6_gfx7_gfx10<0x16c>;
+defm V_DIV_FMAS_F32    : VOP3_Real_gfx6_gfx7_gfx10<0x16f>;
+defm V_DIV_FMAS_F64    : VOP3_Real_gfx6_gfx7_gfx10<0x170>;
+defm V_MSAD_U8         : VOP3_Real_gfx6_gfx7_gfx10<0x171>;
+defm V_MQSAD_PK_U16_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x173>;
+defm V_TRIG_PREOP_F64  : VOP3_Real_gfx6_gfx7_gfx10<0x174>;
+defm V_DIV_SCALE_F32   : VOP3be_Real_gfx6_gfx7_gfx10<0x16d>;
+defm V_DIV_SCALE_F64   : VOP3be_Real_gfx6_gfx7_gfx10<0x16e>;
 
 //===----------------------------------------------------------------------===//
-// VI
+// GFX8, GFX9 (VI).
 //===----------------------------------------------------------------------===//
 
-let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
+let AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" in {
 
 multiclass VOP3_Real_vi<bits<10> op> {
   def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
@@ -757,9 +960,9 @@ multiclass VOP3Interp_Real_vi<bits<10> op> {
             VOP3Interp_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
 }
 
-} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
+} // End AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8"
 
-let AssemblerPredicates = [isVIOnly], DecoderNamespace = "VI" in {
+let AssemblerPredicates = [isGFX8Only], DecoderNamespace = "GFX8" in {
 
 multiclass VOP3_F16_Real_vi<bits<10> op> {
   def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
@@ -771,9 +974,9 @@ multiclass VOP3Interp_F16_Real_vi<bits<10> op> {
             VOP3Interp_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
 }
 
-} // End AssemblerPredicates = [isVIOnly], DecoderNamespace = "VI"
+} // End AssemblerPredicates = [isGFX8Only], DecoderNamespace = "GFX8"
 
-let AssemblerPredicates = [isGFX9], DecoderNamespace = "GFX9" in {
+let AssemblerPredicates = [isGFX9Only], DecoderNamespace = "GFX9" in {
 
 multiclass VOP3_F16_Real_gfx9<bits<10> op, string OpName, string AsmName> {
   def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(OpName), SIEncodingFamily.GFX9>,
@@ -807,7 +1010,7 @@ multiclass VOP3_Real_gfx9<bits<10> op, string AsmName> {
             }
 }
 
-} // End AssemblerPredicates = [isGFX9], DecoderNamespace = "GFX9"
+} // End AssemblerPredicates = [isGFX9Only], DecoderNamespace = "GFX9"
 
 defm V_MAD_U64_U32      : VOP3be_Real_vi <0x1E8>;
 defm V_MAD_I64_I32      : VOP3be_Real_vi <0x1E9>;
diff --git a/lib/Target/AMDGPU/VOP3PInstructions.td b/lib/Target/AMDGPU/VOP3PInstructions.td
index 91b45583c848..55ee5f6577cf 100644
--- a/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -1,9 +1,8 @@
 //===-- VOP3PInstructions.td - Vector Instruction Defintions --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -70,6 +69,16 @@ def V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile<VOP_V2I16_V2I1
 def V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, ashr_rev>;
 def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshr_rev>;
 
+
+// Undo sub x, c -> add x, -c canonicalization since c is more likely
+// an inline immediate than -c.
+// The constant will be emitted as a mov, and folded later.
+// TODO: We could directly encode the immediate now
+def : GCNPat<
+  (add (v2i16 (VOP3PMods0 v2i16:$src0, i32:$src0_modifiers, i1:$clamp)), NegSubInlineConstV216:$src1),
+  (V_PK_SUB_U16 $src0_modifiers, $src0, SRCMODS.OP_SEL_1, NegSubInlineConstV216:$src1, $clamp)
+>;
+
 multiclass MadFmaMixPats<SDPatternOperator fma_like,
                          Instruction mix_inst,
                          Instruction mixlo_inst,
@@ -239,29 +248,39 @@ class UDot2Pat<Instruction Inst> : GCNPat <
        (AMDGPUmul_u24_oneuse (and i32:$src0, (i32 65535)),
                              (and i32:$src1, (i32 65535)))
    ),
-  (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))
->;
+  (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> {
+  let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate;
+}
 
 class SDot2Pat<Instruction Inst> : GCNPat <
   (add (add_oneuse (AMDGPUmul_i24_oneuse (sra i32:$src0, (i32 16)),
                                          (sra i32:$src1, (i32 16))), i32:$src2),
        (AMDGPUmul_i24_oneuse (sext_inreg i32:$src0, i16),
                              (sext_inreg i32:$src1, i16))),
-  (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))
->;
+  (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> {
+  let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate;
+}
 
-let SubtargetPredicate = HasDotInsts in {
+let SubtargetPredicate = HasDot2Insts in {
 
 def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>>;
 def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
 def V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
-def V_DOT4_I32_I8  : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
 def V_DOT4_U32_U8  : VOP3PInst<"v_dot4_u32_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
-def V_DOT8_I32_I4  : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
 def V_DOT8_U32_U4  : VOP3PInst<"v_dot8_u32_u4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
 
+} // End SubtargetPredicate = HasDot2Insts
+
+let SubtargetPredicate = HasDot1Insts in {
+
+def V_DOT4_I32_I8  : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
+def V_DOT8_I32_I4  : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
+
+} // End SubtargetPredicate = HasDot1Insts
+
 multiclass DotPats<SDPatternOperator dot_op,
                    VOP3PInst dot_inst> {
+  let SubtargetPredicate = dot_inst.SubtargetPredicate in
   def : GCNPat <
     (dot_op (dot_inst.Pfl.Src0VT (VOP3PMods0 dot_inst.Pfl.Src0VT:$src0, i32:$src0_modifiers)),
             (dot_inst.Pfl.Src1VT (VOP3PMods dot_inst.Pfl.Src1VT:$src1, i32:$src1_modifiers)),
@@ -281,12 +300,14 @@ def : UDot2Pat<V_DOT2_U32_U16>;
 def : SDot2Pat<V_DOT2_I32_I16>;
 
 foreach Type = ["U", "I"] in
+  let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT4_"#Type#"32_"#Type#8).SubtargetPredicate in
   def : GCNPat <
     !cast<dag>(!foldl((i32 i32:$src2), [0, 1, 2, 3], lhs, y,
                       (add_oneuse lhs, (!cast<PatFrag>("Mul"#Type#"_Elt"#y) i32:$src0, i32:$src1)))),
     (!cast<VOP3PInst>("V_DOT4_"#Type#"32_"#Type#8) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
 
 foreach Type = ["U", "I"] in
+  let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).SubtargetPredicate in
   def : GCNPat <
     !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
                       [1, 2, 3, 4, 5, 6, 7], lhs, y,
@@ -296,19 +317,101 @@ foreach Type = ["U", "I"] in
 // Different variants of dot8 code-gen dag patterns are not generated through table-gen due to a huge increase
 // in the compile time. Directly handle the pattern generated by the FE here.
 foreach Type = ["U", "I"] in
+  let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).SubtargetPredicate in
   def : GCNPat <
     !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
                       [7, 1, 2, 3, 4, 5, 6], lhs, y,
                       (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))),
     (!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
 
-} // End SubtargetPredicate = HasDotInsts
+def ADst_32   : VOPDstOperand<AGPR_32>;
+def ADst_128  : VOPDstOperand<AReg_128>;
+def ADst_512  : VOPDstOperand<AReg_512>;
+def ADst_1024 : VOPDstOperand<AReg_1024>;
+
+def VOPProfileAccRead : VOP3_Profile<VOP_I32_I32, VOP3_MAI> {
+  let Src0RC64 = ARegSrc_32;
+}
+
+def VOPProfileAccWrite : VOP3_Profile<VOP_I32_I32, VOP3_MAI> {
+  let DstRC = ADst_32;
+  let Src0RC64 = VISrc_b32;
+}
+
+class VOPProfileMAI<VOPProfile P, RegisterOperand _SrcRC, RegisterOperand _DstRC,
+                    RegisterOperand SrcABRC = AVSrc_32>
+  : VOP3_Profile<P, VOP3_MAI> {
+  let DstRC = _DstRC;
+  let Src0RC64 = SrcABRC;
+  let Src1RC64 = SrcABRC;
+  let Src2RC64 = _SrcRC;
+  let HasOpSel = 0;
+  let HasClamp = 0;
+  let HasModifiers = 0;
+  let Asm64 = " $vdst, $src0, $src1, $src2$cbsz$abid$blgp";
+  let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, Src2RC64:$src2, cbsz:$cbsz, abid:$abid, blgp:$blgp);
+}
+
+def VOPProfileMAI_F32_F32_X4    : VOPProfileMAI<VOP_V4F32_F32_F32_V4F32,       AISrc_128_f32,  ADst_128>;
+def VOPProfileMAI_F32_F32_X16   : VOPProfileMAI<VOP_V16F32_F32_F32_V16F32,     AISrc_512_f32,  ADst_512>;
+def VOPProfileMAI_F32_F32_X32   : VOPProfileMAI<VOP_V32F32_F32_F32_V32F32,     AISrc_1024_f32, ADst_1024>;
+def VOPProfileMAI_I32_I32_X4    : VOPProfileMAI<VOP_V4I32_I32_I32_V4I32,       AISrc_128_b32,  ADst_128>;
+def VOPProfileMAI_I32_I32_X16   : VOPProfileMAI<VOP_V16I32_I32_I32_V16I32,     AISrc_512_b32,  ADst_512>;
+def VOPProfileMAI_I32_I32_X32   : VOPProfileMAI<VOP_V32I32_I32_I32_V32I32,     AISrc_1024_b32, ADst_1024>;
+def VOPProfileMAI_F32_V2I16_X4  : VOPProfileMAI<VOP_V4F32_V2I16_V2I16_V4F32,   AISrc_128_b32,  ADst_128>;
+def VOPProfileMAI_F32_V2I16_X16 : VOPProfileMAI<VOP_V16F32_V2I16_V2I16_V16F32, AISrc_512_b32,  ADst_512>;
+def VOPProfileMAI_F32_V2I16_X32 : VOPProfileMAI<VOP_V32F32_V2I16_V2I16_V32F32, AISrc_1024_b32, ADst_1024>;
+def VOPProfileMAI_F32_V4F16_X4  : VOPProfileMAI<VOP_V4F32_V4F16_V4F16_V4F32,   AISrc_128_b32,  ADst_128,  AVSrc_64>;
+def VOPProfileMAI_F32_V4F16_X16 : VOPProfileMAI<VOP_V16F32_V4F16_V4F16_V16F32, AISrc_512_b32,  ADst_512,  AVSrc_64>;
+def VOPProfileMAI_F32_V4F16_X32 : VOPProfileMAI<VOP_V32F32_V4F16_V4F16_V32F32, AISrc_1024_b32, ADst_1024, AVSrc_64>;
+
+let Predicates = [HasMAIInsts] in {
+def V_ACCVGPR_READ_B32  : VOP3Inst<"v_accvgpr_read_b32",  VOPProfileAccRead>;
+def V_ACCVGPR_WRITE_B32 : VOP3Inst<"v_accvgpr_write_b32", VOPProfileAccWrite> {
+  let isMoveImm = 1;
+}
+
+let isConvergent = 1 in {
+def V_MFMA_F32_4X4X1F32    : VOP3Inst<"v_mfma_f32_4x4x1f32",    VOPProfileMAI_F32_F32_X4,    int_amdgcn_mfma_f32_4x4x1f32>;
+def V_MFMA_F32_4X4X4F16    : VOP3Inst<"v_mfma_f32_4x4x4f16",    VOPProfileMAI_F32_V4F16_X4,  int_amdgcn_mfma_f32_4x4x4f16>;
+def V_MFMA_I32_4X4X4I8     : VOP3Inst<"v_mfma_i32_4x4x4i8",     VOPProfileMAI_I32_I32_X4,    int_amdgcn_mfma_i32_4x4x4i8>;
+def V_MFMA_F32_4X4X2BF16   : VOP3Inst<"v_mfma_f32_4x4x2bf16",   VOPProfileMAI_F32_V2I16_X4,  int_amdgcn_mfma_f32_4x4x2bf16>;
+def V_MFMA_F32_16X16X1F32  : VOP3Inst<"v_mfma_f32_16x16x1f32",  VOPProfileMAI_F32_F32_X16,   int_amdgcn_mfma_f32_16x16x1f32>;
+def V_MFMA_F32_16X16X4F32  : VOP3Inst<"v_mfma_f32_16x16x4f32",  VOPProfileMAI_F32_F32_X4,    int_amdgcn_mfma_f32_16x16x4f32>;
+def V_MFMA_F32_16X16X4F16  : VOP3Inst<"v_mfma_f32_16x16x4f16",  VOPProfileMAI_F32_V4F16_X16, int_amdgcn_mfma_f32_16x16x4f16>;
+def V_MFMA_F32_16X16X16F16 : VOP3Inst<"v_mfma_f32_16x16x16f16", VOPProfileMAI_F32_V4F16_X4,  int_amdgcn_mfma_f32_16x16x16f16>;
+def V_MFMA_I32_16X16X4I8   : VOP3Inst<"v_mfma_i32_16x16x4i8",   VOPProfileMAI_I32_I32_X16,   int_amdgcn_mfma_i32_16x16x4i8>;
+def V_MFMA_I32_16X16X16I8  : VOP3Inst<"v_mfma_i32_16x16x16i8",  VOPProfileMAI_I32_I32_X4,    int_amdgcn_mfma_i32_16x16x16i8>;
+def V_MFMA_F32_16X16X2BF16 : VOP3Inst<"v_mfma_f32_16x16x2bf16", VOPProfileMAI_F32_V2I16_X16, int_amdgcn_mfma_f32_16x16x2bf16>;
+def V_MFMA_F32_16X16X8BF16 : VOP3Inst<"v_mfma_f32_16x16x8bf16", VOPProfileMAI_F32_V2I16_X4,  int_amdgcn_mfma_f32_16x16x8bf16>;
+def V_MFMA_F32_32X32X1F32  : VOP3Inst<"v_mfma_f32_32x32x1f32",  VOPProfileMAI_F32_F32_X32,   int_amdgcn_mfma_f32_32x32x1f32>;
+def V_MFMA_F32_32X32X2F32  : VOP3Inst<"v_mfma_f32_32x32x2f32",  VOPProfileMAI_F32_F32_X16,   int_amdgcn_mfma_f32_32x32x2f32>;
+def V_MFMA_F32_32X32X4F16  : VOP3Inst<"v_mfma_f32_32x32x4f16",  VOPProfileMAI_F32_V4F16_X32, int_amdgcn_mfma_f32_32x32x4f16>;
+def V_MFMA_F32_32X32X8F16  : VOP3Inst<"v_mfma_f32_32x32x8f16",  VOPProfileMAI_F32_V4F16_X16, int_amdgcn_mfma_f32_32x32x8f16>;
+def V_MFMA_I32_32X32X4I8   : VOP3Inst<"v_mfma_i32_32x32x4i8",   VOPProfileMAI_I32_I32_X32,   int_amdgcn_mfma_i32_32x32x4i8>;
+def V_MFMA_I32_32X32X8I8   : VOP3Inst<"v_mfma_i32_32x32x8i8",   VOPProfileMAI_I32_I32_X16,   int_amdgcn_mfma_i32_32x32x8i8>;
+def V_MFMA_F32_32X32X2BF16 : VOP3Inst<"v_mfma_f32_32x32x2bf16", VOPProfileMAI_F32_V2I16_X32, int_amdgcn_mfma_f32_32x32x2bf16>;
+def V_MFMA_F32_32X32X4BF16 : VOP3Inst<"v_mfma_f32_32x32x4bf16", VOPProfileMAI_F32_V2I16_X16, int_amdgcn_mfma_f32_32x32x4bf16>;
+} // End isConvergent = 1
+
+} // End SubtargetPredicate = HasMAIInsts
+
+def : MnemonicAlias<"v_accvgpr_read",  "v_accvgpr_read_b32">;
+def : MnemonicAlias<"v_accvgpr_write", "v_accvgpr_write_b32">;
 
 multiclass VOP3P_Real_vi<bits<10> op> {
   def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
             VOP3Pe <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
     let AssemblerPredicates = [HasVOP3PInsts];
-    let DecoderNamespace = "VI";
+    let DecoderNamespace = "GFX8";
+  }
+}
+
+multiclass VOP3P_Real_MAI<bits<10> op> {
+  def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
+            VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
+    let AssemblerPredicates = [HasMAIInsts];
+    let DecoderNamespace = "GFX8";
   }
 }
 
@@ -352,14 +455,97 @@ defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;
 }
 
 
-let SubtargetPredicate = HasDotInsts in {
+let SubtargetPredicate = HasDot2Insts in {
 
 defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x3a3>;
 defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x3a6>;
 defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x3a7>;
-defm V_DOT4_I32_I8  : VOP3P_Real_vi <0x3a8>;
 defm V_DOT4_U32_U8  : VOP3P_Real_vi <0x3a9>;
-defm V_DOT8_I32_I4  : VOP3P_Real_vi <0x3aa>;
 defm V_DOT8_U32_U4  : VOP3P_Real_vi <0x3ab>;
 
-} // End SubtargetPredicate = HasDotInsts
+} // End SubtargetPredicate = HasDot2Insts
+
+let SubtargetPredicate = HasDot1Insts in {
+
+defm V_DOT4_I32_I8  : VOP3P_Real_vi <0x3a8>;
+defm V_DOT8_I32_I4  : VOP3P_Real_vi <0x3aa>;
+
+} // End SubtargetPredicate = HasDot1Insts
+
+let SubtargetPredicate = HasMAIInsts in {
+
+defm V_ACCVGPR_READ_B32  : VOP3P_Real_MAI <0x3d8>;
+defm V_ACCVGPR_WRITE_B32 : VOP3P_Real_MAI <0x3d9>;
+defm V_MFMA_F32_32X32X1F32  : VOP3P_Real_MAI <0x3c0>;
+defm V_MFMA_F32_16X16X1F32  : VOP3P_Real_MAI <0x3c1>;
+defm V_MFMA_F32_4X4X1F32    : VOP3P_Real_MAI <0x3c2>;
+defm V_MFMA_F32_32X32X2F32  : VOP3P_Real_MAI <0x3c4>;
+defm V_MFMA_F32_16X16X4F32  : VOP3P_Real_MAI <0x3c5>;
+defm V_MFMA_F32_32X32X4F16  : VOP3P_Real_MAI <0x3c8>;
+defm V_MFMA_F32_16X16X4F16  : VOP3P_Real_MAI <0x3c9>;
+defm V_MFMA_F32_4X4X4F16    : VOP3P_Real_MAI <0x3ca>;
+defm V_MFMA_F32_32X32X8F16  : VOP3P_Real_MAI <0x3cc>;
+defm V_MFMA_F32_16X16X16F16 : VOP3P_Real_MAI <0x3cd>;
+defm V_MFMA_I32_32X32X4I8   : VOP3P_Real_MAI <0x3d0>;
+defm V_MFMA_I32_16X16X4I8   : VOP3P_Real_MAI <0x3d1>;
+defm V_MFMA_I32_4X4X4I8     : VOP3P_Real_MAI <0x3d2>;
+defm V_MFMA_I32_32X32X8I8   : VOP3P_Real_MAI <0x3d4>;
+defm V_MFMA_I32_16X16X16I8  : VOP3P_Real_MAI <0x3d5>;
+defm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MAI <0x3e8>;
+defm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MAI <0x3e9>;
+defm V_MFMA_F32_4X4X2BF16   : VOP3P_Real_MAI <0x3eb>;
+defm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MAI <0x3ec>;
+defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MAI <0x3ed>;
+
+} // End SubtargetPredicate = HasMAIInsts
+
+//===----------------------------------------------------------------------===//
+// GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+  multiclass VOP3P_Real_gfx10<bits<10> op> {
+    def _gfx10 : VOP3P_Real<!cast<VOP3P_Pseudo>(NAME), SIEncodingFamily.GFX10>,
+                 VOP3Pe_gfx10 <op, !cast<VOP3P_Pseudo>(NAME).Pfl>;
+  }
+} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+
+defm V_PK_MAD_I16     : VOP3P_Real_gfx10<0x000>;
+defm V_PK_MUL_LO_U16  : VOP3P_Real_gfx10<0x001>;
+defm V_PK_ADD_I16     : VOP3P_Real_gfx10<0x002>;
+defm V_PK_SUB_I16     : VOP3P_Real_gfx10<0x003>;
+defm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10<0x004>;
+defm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10<0x005>;
+defm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10<0x006>;
+defm V_PK_MAX_I16     : VOP3P_Real_gfx10<0x007>;
+defm V_PK_MIN_I16     : VOP3P_Real_gfx10<0x008>;
+defm V_PK_MAD_U16     : VOP3P_Real_gfx10<0x009>;
+defm V_PK_ADD_U16     : VOP3P_Real_gfx10<0x00a>;
+defm V_PK_SUB_U16     : VOP3P_Real_gfx10<0x00b>;
+defm V_PK_MAX_U16     : VOP3P_Real_gfx10<0x00c>;
+defm V_PK_MIN_U16     : VOP3P_Real_gfx10<0x00d>;
+defm V_PK_FMA_F16     : VOP3P_Real_gfx10<0x00e>;
+defm V_PK_ADD_F16     : VOP3P_Real_gfx10<0x00f>;
+defm V_PK_MUL_F16     : VOP3P_Real_gfx10<0x010>;
+defm V_PK_MIN_F16     : VOP3P_Real_gfx10<0x011>;
+defm V_PK_MAX_F16     : VOP3P_Real_gfx10<0x012>;
+defm V_FMA_MIX_F32    : VOP3P_Real_gfx10<0x020>;
+defm V_FMA_MIXLO_F16  : VOP3P_Real_gfx10<0x021>;
+defm V_FMA_MIXHI_F16  : VOP3P_Real_gfx10<0x022>;
+
+let SubtargetPredicate = HasDot2Insts in {
+
+defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x013>;
+defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x014>;
+defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x015>;
+defm V_DOT4_U32_U8  : VOP3P_Real_gfx10 <0x017>;
+defm V_DOT8_U32_U4  : VOP3P_Real_gfx10 <0x019>;
+
+} // End SubtargetPredicate = HasDot2Insts
+
+let SubtargetPredicate = HasDot1Insts in {
+
+defm V_DOT4_I32_I8  : VOP3P_Real_gfx10 <0x016>;
+defm V_DOT8_I32_I4  : VOP3P_Real_gfx10 <0x018>;
+
+} // End SubtargetPredicate = HasDot1Insts
diff --git a/lib/Target/AMDGPU/VOPCInstructions.td b/lib/Target/AMDGPU/VOPCInstructions.td
index 091cac8cd35c..b3513e383d10 100644
--- a/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/lib/Target/AMDGPU/VOPCInstructions.td
@@ -1,9 +1,8 @@
 //===-- VOPCInstructions.td - Vector Instruction Defintions ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -54,14 +53,29 @@ class VOPC_SDWA9e <bits<8> op, VOPProfile P> : VOP_SDWA9Be <P> {
 // an explicit $dst.
 class VOPC_Profile<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt0> :
   VOPProfile <[i1, vt0, vt1, untyped]> {
-  let Asm32 = "vcc, $src0, $src1";
+  let Asm32 = "$src0, $src1";
   // The destination for 32-bit encoding is implicit.
   let HasDst32 = 0;
-  let Outs64 = (outs VOPDstS64:$sdst);
+  let Outs64 = (outs VOPDstS64orS32:$sdst);
   list<SchedReadWrite> Schedule = sched;
 }
 
-class VOPC_Pseudo <string opName, VOPC_Profile P, list<dag> pattern=[]> :
+class VOPC_NoSdst_Profile<list<SchedReadWrite> sched, ValueType vt0,
+                          ValueType vt1 = vt0> :
+  VOPC_Profile<sched, vt0, vt1> {
+  let Outs64 = (outs );
+  let OutsSDWA = (outs );
+  let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
+                     Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
+                     src0_sel:$src0_sel, src1_sel:$src1_sel);
+  let Asm64 = !if(isFloatType<Src0VT>.ret, "$src0_modifiers, $src1_modifiers$clamp",
+                                           "$src0, $src1");
+  let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
+  let EmitDst = 0;
+}
+
+class VOPC_Pseudo <string opName, VOPC_Profile P, list<dag> pattern=[],
+                   bit DefVcc = 1> :
   InstSI<(outs), P.Ins32, "", pattern>,
   VOP <opName>,
   SIMCInstr<opName#"_e32", SIEncodingFamily.NONE> {
@@ -81,9 +95,7 @@ class VOPC_Pseudo <string opName, VOPC_Profile P, list<dag> pattern=[]> :
   let VALU = 1;
   let VOPC = 1;
   let Uses = [EXEC];
-  let Defs = [VCC];
-
-  let SubtargetPredicate = isGCN;
+  let Defs = !if(DefVcc, [VCC], []);
 
   VOPProfile Pfl = P;
 }
@@ -115,8 +127,9 @@ class VOPC_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
 }
 
 // This class is used only with VOPC instructions. Use $sdst for out operand
-class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst, VOPProfile p = ps.Pfl> :
-  InstAlias <ps.OpName#" "#p.Asm32, (inst)>, PredicateControl {
+class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst,
+                     string Asm32 = ps.Pfl.Asm32, VOPProfile p = ps.Pfl> :
+  InstAlias <ps.OpName#" "#Asm32, (inst)>, PredicateControl {
 
   field bit isCompare;
   field bit isCommutable;
@@ -149,6 +162,27 @@ class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst, VOPProfile p = ps.Pfl> :
   let SubtargetPredicate = AssemblerPredicate;
 }
 
+multiclass VOPCInstAliases <string OpName, string Arch> {
+  def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
+                       !cast<Instruction>(OpName#"_e32_"#Arch)>;
+  let WaveSizePredicate = isWave32 in {
+    def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
+                         !cast<Instruction>(OpName#"_e32_"#Arch),
+                         "vcc_lo, "#!cast<VOP3_Pseudo>(OpName#"_e64").Pfl.Asm32>;
+  }
+  let WaveSizePredicate = isWave64 in {
+    def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
+                         !cast<Instruction>(OpName#"_e32_"#Arch),
+                         "vcc, "#!cast<VOP3_Pseudo>(OpName#"_e64").Pfl.Asm32>;
+  }
+}
+
+multiclass VOPCXInstAliases <string OpName, string Arch> {
+  def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
+                       !cast<Instruction>(OpName#"_e32_"#Arch)>;
+}
+
+
 class getVOPCPat64 <PatLeaf cond, VOPProfile P> : LetDummies {
   list<dag> ret = !if(P.HasModifiers,
       [(set i1:$sdst,
@@ -161,6 +195,10 @@ class getVOPCPat64 <PatLeaf cond, VOPProfile P> : LetDummies {
       [(set i1:$sdst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]);
 }
 
+class VCMPXNoSDstTable <bit has_sdst, string Name> {
+  bit HasSDst = has_sdst;
+  string NoSDstOp = Name;
+}
 
 multiclass VOPC_Pseudos <string opName,
                          VOPC_Profile P,
@@ -169,7 +207,8 @@ multiclass VOPC_Pseudos <string opName,
                          bit DefExec = 0> {
 
   def _e32 : VOPC_Pseudo <opName, P>,
-             Commutable_REV<revOp#"_e32", !eq(revOp, opName)> {
+             Commutable_REV<revOp#"_e32", !eq(revOp, opName)>,
+             VCMPXNoSDstTable<1, opName#"_e32"> {
     let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
     let SchedRW = P.Schedule;
     let isConvergent = DefExec;
@@ -178,7 +217,8 @@ multiclass VOPC_Pseudos <string opName,
   }
 
   def _e64 : VOP3_Pseudo<opName, P, getVOPCPat64<cond, P>.ret>,
-    Commutable_REV<revOp#"_e64", !eq(revOp, opName)> {
+    Commutable_REV<revOp#"_e64", !eq(revOp, opName)>,
+    VCMPXNoSDstTable<1, opName#"_e64"> {
     let Defs = !if(DefExec, [EXEC], []);
     let SchedRW = P.Schedule;
     let isCompare = 1;
@@ -193,6 +233,44 @@ multiclass VOPC_Pseudos <string opName,
   }
 }
 
+let SubtargetPredicate = HasSdstCMPX in {
+multiclass VOPCX_Pseudos <string opName,
+                          VOPC_Profile P, VOPC_Profile P_NoSDst,
+                          PatLeaf cond = COND_NULL,
+                          string revOp = opName> :
+           VOPC_Pseudos <opName, P, cond, revOp, 1> {
+
+  def _nosdst_e32 : VOPC_Pseudo <opName#"_nosdst", P_NoSDst, [], 0>,
+             Commutable_REV<revOp#"_nosdst_e32", !eq(revOp, opName)>,
+             VCMPXNoSDstTable<0, opName#"_e32"> {
+    let Defs = [EXEC];
+    let SchedRW = P_NoSDst.Schedule;
+    let isConvergent = 1;
+    let isCompare = 1;
+    let isCommutable = 1;
+    let SubtargetPredicate = HasNoSdstCMPX;
+  }
+
+  def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst>,
+    Commutable_REV<revOp#"_nosdst_e64", !eq(revOp, opName)>,
+    VCMPXNoSDstTable<0, opName#"_e64"> {
+    let Defs = [EXEC];
+    let SchedRW = P_NoSDst.Schedule;
+    let isCompare = 1;
+    let isCommutable = 1;
+    let SubtargetPredicate = HasNoSdstCMPX;
+  }
+
+  def _nosdst_sdwa : VOPC_SDWA_Pseudo <opName#"_nosdst", P_NoSDst> {
+    let Defs = [EXEC];
+    let SchedRW = P_NoSDst.Schedule;
+    let isConvergent = 1;
+    let isCompare = 1;
+    let SubtargetPredicate = HasNoSdstCMPX;
+  }
+}
+} // End SubtargetPredicate = HasSdstCMPX
+
 def VOPC_I1_F16_F16 : VOPC_Profile<[Write32Bit], f16>;
 def VOPC_I1_F32_F32 : VOPC_Profile<[Write32Bit], f32>;
 def VOPC_I1_F64_F64 : VOPC_Profile<[WriteDoubleAdd], f64>;
@@ -200,6 +278,13 @@ def VOPC_I1_I16_I16 : VOPC_Profile<[Write32Bit], i16>;
 def VOPC_I1_I32_I32 : VOPC_Profile<[Write32Bit], i32>;
 def VOPC_I1_I64_I64 : VOPC_Profile<[Write64Bit], i64>;
 
+def VOPC_F16_F16 : VOPC_NoSdst_Profile<[Write32Bit], f16>;
+def VOPC_F32_F32 : VOPC_NoSdst_Profile<[Write32Bit], f32>;
+def VOPC_F64_F64 : VOPC_NoSdst_Profile<[Write64Bit], f64>;
+def VOPC_I16_I16 : VOPC_NoSdst_Profile<[Write32Bit], i16>;
+def VOPC_I32_I32 : VOPC_NoSdst_Profile<[Write32Bit], i32>;
+def VOPC_I64_I64 : VOPC_NoSdst_Profile<[Write64Bit], i64>;
+
 multiclass VOPC_F16 <string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
   VOPC_Pseudos <opName, VOPC_I1_F16_F16, cond, revOp, 0>;
 
@@ -219,22 +304,22 @@ multiclass VOPC_I64 <string opName, PatLeaf cond = COND_NULL, string revOp = opN
   VOPC_Pseudos <opName, VOPC_I1_I64_I64, cond, revOp, 0>;
 
 multiclass VOPCX_F16 <string opName, string revOp = opName> :
-  VOPC_Pseudos <opName, VOPC_I1_F16_F16, COND_NULL, revOp, 1>;
+  VOPCX_Pseudos <opName, VOPC_I1_F16_F16, VOPC_F16_F16, COND_NULL, revOp>;
 
 multiclass VOPCX_F32 <string opName, string revOp = opName> :
-  VOPC_Pseudos <opName, VOPC_I1_F32_F32, COND_NULL, revOp, 1>;
+  VOPCX_Pseudos <opName, VOPC_I1_F32_F32, VOPC_F32_F32, COND_NULL, revOp>;
 
 multiclass VOPCX_F64 <string opName, string revOp = opName> :
-  VOPC_Pseudos <opName, VOPC_I1_F64_F64, COND_NULL, revOp, 1>;
+  VOPCX_Pseudos <opName, VOPC_I1_F64_F64, VOPC_F64_F64, COND_NULL, revOp>;
 
 multiclass VOPCX_I16 <string opName, string revOp = opName> :
-  VOPC_Pseudos <opName, VOPC_I1_I16_I16, COND_NULL, revOp, 1>;
+  VOPCX_Pseudos <opName, VOPC_I1_I16_I16, VOPC_I16_I16, COND_NULL, revOp>;
 
 multiclass VOPCX_I32 <string opName, string revOp = opName> :
-  VOPC_Pseudos <opName, VOPC_I1_I32_I32, COND_NULL, revOp, 1>;
+  VOPCX_Pseudos <opName, VOPC_I1_I32_I32, VOPC_I32_I32, COND_NULL, revOp>;
 
 multiclass VOPCX_I64 <string opName, string revOp = opName> :
-  VOPC_Pseudos <opName, VOPC_I1_I64_I64, COND_NULL, revOp, 1>;
+  VOPCX_Pseudos <opName, VOPC_I1_I64_I64, VOPC_I64_I64, COND_NULL, revOp>;
 
 
 //===----------------------------------------------------------------------===//
@@ -309,7 +394,7 @@ defm V_CMPX_NEQ_F64 : VOPCX_F64 <"v_cmpx_neq_f64">;
 defm V_CMPX_NLT_F64 : VOPCX_F64 <"v_cmpx_nlt_f64">;
 defm V_CMPX_TRU_F64 : VOPCX_F64 <"v_cmpx_tru_f64">;
 
-let SubtargetPredicate = isSICI in {
+let SubtargetPredicate = isGFX6GFX7 in {
 
 defm V_CMPS_F_F32 : VOPC_F32 <"v_cmps_f_f32">;
 defm V_CMPS_LT_F32 : VOPC_F32 <"v_cmps_lt_f32", COND_NULL, "v_cmps_gt_f32">;
@@ -379,7 +464,7 @@ defm V_CMPSX_NEQ_F64 : VOPCX_F64 <"v_cmpsx_neq_f64">;
 defm V_CMPSX_NLT_F64 : VOPCX_F64 <"v_cmpsx_nlt_f64">;
 defm V_CMPSX_TRU_F64 : VOPCX_F64 <"v_cmpsx_tru_f64">;
 
-} // End SubtargetPredicate = isSICI
+} // End SubtargetPredicate = isGFX6GFX7
 
 let SubtargetPredicate = Has16BitInsts in {
 
@@ -546,6 +631,18 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType vt> :
   let HasOMod = 0;
 }
 
+class VOPC_Class_NoSdst_Profile<list<SchedReadWrite> sched, ValueType vt> :
+  VOPC_Class_Profile<sched, vt> {
+  let Outs64 = (outs );
+  let OutsSDWA = (outs );
+  let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
+                     Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
+                     src0_sel:$src0_sel, src1_sel:$src1_sel);
+  let Asm64 = "$src0_modifiers, $src1";
+  let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
+  let EmitDst = 0;
+}
+
 class getVOPCClassPat64 <VOPProfile P> {
   list<dag> ret =
     [(set i1:$sdst,
@@ -556,46 +653,85 @@ class getVOPCClassPat64 <VOPProfile P> {
 
 // Special case for class instructions which only have modifiers on
 // the 1st source operand.
-multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec> {
-  def _e32 : VOPC_Pseudo <opName, p> {
-    let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec,
+                               bit DefVcc = 1> {
+  def _e32 : VOPC_Pseudo <opName, p>,
+             VCMPXNoSDstTable<1, opName#"_e32"> {
+    let Defs = !if(DefExec, !if(DefVcc, [VCC, EXEC], [EXEC]),
+                            !if(DefVcc, [VCC], []));
     let SchedRW = p.Schedule;
     let isConvergent = DefExec;
   }
 
-  def _e64 : VOP3_Pseudo<opName, p, getVOPCClassPat64<p>.ret> {
+  def _e64 : VOP3_Pseudo<opName, p, getVOPCClassPat64<p>.ret>,
+             VCMPXNoSDstTable<1, opName#"_e64"> {
     let Defs = !if(DefExec, [EXEC], []);
     let SchedRW = p.Schedule;
   }
 
   def _sdwa : VOPC_SDWA_Pseudo <opName, p> {
-    let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+    let Defs = !if(DefExec, !if(DefVcc, [VCC, EXEC], [EXEC]),
+                            !if(DefVcc, [VCC], []));
     let SchedRW = p.Schedule;
     let isConvergent = DefExec;
   }
 }
 
+let SubtargetPredicate = HasSdstCMPX in {
+multiclass VOPCX_Class_Pseudos <string opName,
+                                VOPC_Profile P,
+                                VOPC_Profile P_NoSDst> :
+           VOPC_Class_Pseudos <opName, P, 1, 1> {
+
+  def _nosdst_e32 : VOPC_Pseudo <opName#"_nosdst", P_NoSDst, [], 0>,
+                    VCMPXNoSDstTable<0, opName#"_e32"> {
+    let Defs = [EXEC];
+    let SchedRW = P_NoSDst.Schedule;
+    let isConvergent = 1;
+    let SubtargetPredicate = HasNoSdstCMPX;
+  }
+
+  def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst>,
+                    VCMPXNoSDstTable<0, opName#"_e64"> {
+    let Defs = [EXEC];
+    let SchedRW = P_NoSDst.Schedule;
+    let SubtargetPredicate = HasNoSdstCMPX;
+  }
+
+  def _nosdst_sdwa : VOPC_SDWA_Pseudo <opName#"_nosdst", P_NoSDst> {
+    let Defs = [EXEC];
+    let SchedRW = P_NoSDst.Schedule;
+    let isConvergent = 1;
+    let SubtargetPredicate = HasNoSdstCMPX;
+  }
+}
+} // End SubtargetPredicate = HasSdstCMPX
+
 def VOPC_I1_F16_I32 : VOPC_Class_Profile<[Write32Bit], f16>;
 def VOPC_I1_F32_I32 : VOPC_Class_Profile<[Write32Bit], f32>;
 def VOPC_I1_F64_I32 : VOPC_Class_Profile<[WriteDoubleAdd], f64>;
 
+def VOPC_F16_I32 : VOPC_Class_NoSdst_Profile<[Write32Bit], f16>;
+def VOPC_F32_I32 : VOPC_Class_NoSdst_Profile<[Write32Bit], f32>;
+def VOPC_F64_I32 : VOPC_Class_NoSdst_Profile<[Write64Bit], f64>;
+
 multiclass VOPC_CLASS_F16 <string opName> :
   VOPC_Class_Pseudos <opName, VOPC_I1_F16_I32, 0>;
 
 multiclass VOPCX_CLASS_F16 <string opName> :
-  VOPC_Class_Pseudos <opName, VOPC_I1_F16_I32, 1>;
+  VOPCX_Class_Pseudos <opName, VOPC_I1_F16_I32, VOPC_F16_I32>;
 
 multiclass VOPC_CLASS_F32 <string opName> :
   VOPC_Class_Pseudos <opName, VOPC_I1_F32_I32, 0>;
 
 multiclass VOPCX_CLASS_F32 <string opName> :
-  VOPC_Class_Pseudos <opName, VOPC_I1_F32_I32, 1>;
+  VOPCX_Class_Pseudos <opName, VOPC_I1_F32_I32, VOPC_F32_I32>;
 
 multiclass VOPC_CLASS_F64 <string opName> :
   VOPC_Class_Pseudos <opName, VOPC_I1_F64_I32, 0>;
 
 multiclass VOPCX_CLASS_F64 <string opName> :
-  VOPC_Class_Pseudos <opName, VOPC_I1_F64_I32, 1>;
+  VOPCX_Class_Pseudos <opName, VOPC_I1_F64_I32, VOPC_F64_I32>;
 
 defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <"v_cmp_class_f32">;
 defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <"v_cmpx_class_f32">;
@@ -608,342 +744,471 @@ defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;
 // V_ICMPIntrinsic Pattern.
 //===----------------------------------------------------------------------===//
 
-class ICMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> : GCNPat <
-  (AMDGPUsetcc vt:$src0, vt:$src1, cond),
-  (inst $src0, $src1)
->;
-
-def : ICMP_Pattern <COND_EQ, V_CMP_EQ_U32_e64, i32>;
-def : ICMP_Pattern <COND_NE, V_CMP_NE_U32_e64, i32>;
-def : ICMP_Pattern <COND_UGT, V_CMP_GT_U32_e64, i32>;
-def : ICMP_Pattern <COND_UGE, V_CMP_GE_U32_e64, i32>;
-def : ICMP_Pattern <COND_ULT, V_CMP_LT_U32_e64, i32>;
-def : ICMP_Pattern <COND_ULE, V_CMP_LE_U32_e64, i32>;
-def : ICMP_Pattern <COND_SGT, V_CMP_GT_I32_e64, i32>;
-def : ICMP_Pattern <COND_SGE, V_CMP_GE_I32_e64, i32>;
-def : ICMP_Pattern <COND_SLT, V_CMP_LT_I32_e64, i32>;
-def : ICMP_Pattern <COND_SLE, V_CMP_LE_I32_e64, i32>;
-
-def : ICMP_Pattern <COND_EQ, V_CMP_EQ_U64_e64, i64>;
-def : ICMP_Pattern <COND_NE, V_CMP_NE_U64_e64, i64>;
-def : ICMP_Pattern <COND_UGT, V_CMP_GT_U64_e64, i64>;
-def : ICMP_Pattern <COND_UGE, V_CMP_GE_U64_e64, i64>;
-def : ICMP_Pattern <COND_ULT, V_CMP_LT_U64_e64, i64>;
-def : ICMP_Pattern <COND_ULE, V_CMP_LE_U64_e64, i64>;
-def : ICMP_Pattern <COND_SGT, V_CMP_GT_I64_e64, i64>;
-def : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;
-def : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
-def : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;
-
-def : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_e64, i16>;
-def : ICMP_Pattern <COND_NE, V_CMP_NE_U16_e64, i16>;
-def : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_e64, i16>;
-def : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_e64, i16>;
-def : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_e64, i16>;
-def : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_e64, i16>;
-def : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_e64, i16>;
-def : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_e64, i16>;
-def : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_e64, i16>;
-def : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_e64, i16>;
-
-class FCMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> : GCNPat <
-  (i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
-                   (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
-  (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
-        DSTCLAMP.NONE)
->;
-
-def : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F32_e64, f32>;
-def : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F32_e64, f32>;
-def : FCMP_Pattern <COND_OGT, V_CMP_GT_F32_e64, f32>;
-def : FCMP_Pattern <COND_OGE, V_CMP_GE_F32_e64, f32>;
-def : FCMP_Pattern <COND_OLT, V_CMP_LT_F32_e64, f32>;
-def : FCMP_Pattern <COND_OLE, V_CMP_LE_F32_e64, f32>;
-
-def : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F64_e64, f64>;
-def : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F64_e64, f64>;
-def : FCMP_Pattern <COND_OGT, V_CMP_GT_F64_e64, f64>;
-def : FCMP_Pattern <COND_OGE, V_CMP_GE_F64_e64, f64>;
-def : FCMP_Pattern <COND_OLT, V_CMP_LT_F64_e64, f64>;
-def : FCMP_Pattern <COND_OLE, V_CMP_LE_F64_e64, f64>;
-
-def : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_e64, f16>;
-def : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_e64, f16>;
-def : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_e64, f16>;
-def : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_e64, f16>;
-def : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_e64, f16>;
-def : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_e64, f16>;
-
-
-def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F32_e64, f32>;
-def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F32_e64, f32>;
-def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F32_e64, f32>;
-def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F32_e64, f32>;
-def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F32_e64, f32>;
-def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F32_e64, f32>;
-
-def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F64_e64, f64>;
-def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F64_e64, f64>;
-def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F64_e64, f64>;
-def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;
-def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
-def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
-
-def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_e64, f16>;
-def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_e64, f16>;
-def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_e64, f16>;
-def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_e64, f16>;
-def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_e64, f16>;
-def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_e64, f16>;
+// We need to use COPY_TO_REGCLASS to w/a the problem when ReplaceAllUsesWith()
+// complaints it cannot replace i1 <-> i64/i32 if node was not morphed in place.
+multiclass ICMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> {
+  let WaveSizePredicate = isWave64 in
+  def : GCNPat <
+    (i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
+    (i64 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_64))
+  >;
+
+  let WaveSizePredicate = isWave32 in
+  def : GCNPat <
+    (i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
+    (i32 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_32))
+  >;
+}
+
+defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U32_e64, i32>;
+defm : ICMP_Pattern <COND_NE, V_CMP_NE_U32_e64, i32>;
+defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U32_e64, i32>;
+defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U32_e64, i32>;
+defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U32_e64, i32>;
+defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U32_e64, i32>;
+defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I32_e64, i32>;
+defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I32_e64, i32>;
+defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I32_e64, i32>;
+defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I32_e64, i32>;
+
+defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U64_e64, i64>;
+defm : ICMP_Pattern <COND_NE, V_CMP_NE_U64_e64, i64>;
+defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U64_e64, i64>;
+defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U64_e64, i64>;
+defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U64_e64, i64>;
+defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U64_e64, i64>;
+defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I64_e64, i64>;
+defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;
+defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
+defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;
+
+defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_e64, i16>;
+defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_e64, i16>;
+defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_e64, i16>;
+defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_e64, i16>;
+defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_e64, i16>;
+defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_e64, i16>;
+defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_e64, i16>;
+defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_e64, i16>;
+defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_e64, i16>;
+defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_e64, i16>;
+
+multiclass FCMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> {
+  let WaveSizePredicate = isWave64 in
+  def : GCNPat <
+    (i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
+                 (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
+    (i64 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
+                           DSTCLAMP.NONE), SReg_64))
+  >;
+
+  let WaveSizePredicate = isWave32 in
+  def : GCNPat <
+    (i32 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
+                 (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
+    (i32 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
+                           DSTCLAMP.NONE), SReg_32))
+  >;
+}
+
+defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F32_e64, f32>;
+defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F32_e64, f32>;
+defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F32_e64, f32>;
+defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F32_e64, f32>;
+defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F32_e64, f32>;
+defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F32_e64, f32>;
+
+defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F64_e64, f64>;
+defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F64_e64, f64>;
+defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F64_e64, f64>;
+defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F64_e64, f64>;
+defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F64_e64, f64>;
+defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F64_e64, f64>;
+
+defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_e64, f16>;
+defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_e64, f16>;
+defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_e64, f16>;
+defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_e64, f16>;
+defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_e64, f16>;
+defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_e64, f16>;
+
+
+defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F32_e64, f32>;
+defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F32_e64, f32>;
+defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F32_e64, f32>;
+defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F32_e64, f32>;
+defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F32_e64, f32>;
+defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F32_e64, f32>;
+
+defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F64_e64, f64>;
+defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F64_e64, f64>;
+defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F64_e64, f64>;
+defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;
+defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
+defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
+
+defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_e64, f16>;
+defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_e64, f16>;
+defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_e64, f16>;
+defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_e64, f16>;
+defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_e64, f16>;
+defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_e64, f16>;
 
 //===----------------------------------------------------------------------===//
-// Target
+// Target-specific instruction encodings.
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-// SI
+// GFX10.
 //===----------------------------------------------------------------------===//
 
-multiclass VOPC_Real_si <bits<9> op> {
-  let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
-    def _e32_si :
-      VOPC_Real<!cast<VOPC_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
-      VOPCe<op{7-0}>;
-
-    def _e64_si :
-      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
-      VOP3a_si <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
-      // Encoding used for VOPC instructions encoded as VOP3
-      // Differs from VOP3e by destination name (sdst) as VOPC doesn't have vector dst
-      bits<8> sdst;
-      let Inst{7-0} = sdst;
-    }
+let AssemblerPredicate = isGFX10Plus in {
+  multiclass VOPC_Real_gfx10<bits<9> op> {
+    let DecoderNamespace = "GFX10" in {
+      def _e32_gfx10 :
+        VOPC_Real<!cast<VOPC_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>,
+        VOPCe<op{7-0}>;
+      def _e64_gfx10 :
+        VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
+        VOP3a_gfx10<{0, op}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
+        // Encoding used for VOPC instructions encoded as VOP3 differs from
+        // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
+        bits<8> sdst;
+        let Inst{7-0} = sdst;
+      }
+    } // End DecoderNamespace = "GFX10"
+
+    def _sdwa_gfx10 :
+      VOP_SDWA10_Real<!cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa")>,
+      VOPC_SDWA9e<op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
+
+    defm : VOPCInstAliases<NAME, "gfx10">;
   }
-  def : VOPCInstAlias <!cast<VOP3_Pseudo>(NAME#"_e64"),
-                       !cast<Instruction>(NAME#"_e32_si")> {
-    let AssemblerPredicate = isSICI;
+
+  multiclass VOPCX_Real_gfx10<bits<9> op> {
+    let DecoderNamespace = "GFX10" in {
+      def _e32_gfx10 :
+        VOPC_Real<!cast<VOPC_Pseudo>(NAME#"_nosdst_e32"), SIEncodingFamily.GFX10>,
+        VOPCe<op{7-0}> {
+          let AsmString = !subst("_nosdst", "", !cast<VOPC_Pseudo>(NAME#"_nosdst_e32").PseudoInstr)
+                          # " " # !cast<VOPC_Pseudo>(NAME#"_nosdst_e32").AsmOperands;
+        }
+
+      def _e64_gfx10 :
+        VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_nosdst_e64"), SIEncodingFamily.GFX10>,
+        VOP3a_gfx10<{0, op}, !cast<VOP3_Pseudo>(NAME#"_nosdst_e64").Pfl> {
+          let Inst{7-0} = ?; // sdst
+          let AsmString = !subst("_nosdst", "", !cast<VOP3_Pseudo>(NAME#"_nosdst_e64").Mnemonic)
+                          # "{_e64} " # !cast<VOP3_Pseudo>(NAME#"_nosdst_e64").AsmOperands;
+        }
+    } // End DecoderNamespace = "GFX10"
+
+    def _sdwa_gfx10 :
+      VOP_SDWA10_Real<!cast<VOPC_SDWA_Pseudo>(NAME#"_nosdst_sdwa")>,
+      VOPC_SDWA9e<op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_nosdst_sdwa").Pfl> {
+        let AsmString = !subst("_nosdst", "", !cast<VOPC_SDWA_Pseudo>(NAME#"_nosdst_sdwa").Mnemonic)
+                        # "{_sdwa} " # !cast<VOPC_SDWA_Pseudo>(NAME#"_nosdst_sdwa").AsmOperands9;
+      }
+
+    defm : VOPCXInstAliases<NAME, "gfx10">;
   }
-}
+} // End AssemblerPredicate = isGFX10Plus
+
+defm V_CMP_LT_I16     : VOPC_Real_gfx10<0x089>;
+defm V_CMP_EQ_I16     : VOPC_Real_gfx10<0x08a>;
+defm V_CMP_LE_I16     : VOPC_Real_gfx10<0x08b>;
+defm V_CMP_GT_I16     : VOPC_Real_gfx10<0x08c>;
+defm V_CMP_NE_I16     : VOPC_Real_gfx10<0x08d>;
+defm V_CMP_GE_I16     : VOPC_Real_gfx10<0x08e>;
+defm V_CMP_CLASS_F16  : VOPC_Real_gfx10<0x08f>;
+defm V_CMPX_LT_I16    : VOPCX_Real_gfx10<0x099>;
+defm V_CMPX_EQ_I16    : VOPCX_Real_gfx10<0x09a>;
+defm V_CMPX_LE_I16    : VOPCX_Real_gfx10<0x09b>;
+defm V_CMPX_GT_I16    : VOPCX_Real_gfx10<0x09c>;
+defm V_CMPX_NE_I16    : VOPCX_Real_gfx10<0x09d>;
+defm V_CMPX_GE_I16    : VOPCX_Real_gfx10<0x09e>;
+defm V_CMPX_CLASS_F16 : VOPCX_Real_gfx10<0x09f>;
+defm V_CMP_LT_U16     : VOPC_Real_gfx10<0x0a9>;
+defm V_CMP_EQ_U16     : VOPC_Real_gfx10<0x0aa>;
+defm V_CMP_LE_U16     : VOPC_Real_gfx10<0x0ab>;
+defm V_CMP_GT_U16     : VOPC_Real_gfx10<0x0ac>;
+defm V_CMP_NE_U16     : VOPC_Real_gfx10<0x0ad>;
+defm V_CMP_GE_U16     : VOPC_Real_gfx10<0x0ae>;
+defm V_CMPX_LT_U16    : VOPCX_Real_gfx10<0x0b9>;
+defm V_CMPX_EQ_U16    : VOPCX_Real_gfx10<0x0ba>;
+defm V_CMPX_LE_U16    : VOPCX_Real_gfx10<0x0bb>;
+defm V_CMPX_GT_U16    : VOPCX_Real_gfx10<0x0bc>;
+defm V_CMPX_NE_U16    : VOPCX_Real_gfx10<0x0bd>;
+defm V_CMPX_GE_U16    : VOPCX_Real_gfx10<0x0be>;
+defm V_CMP_F_F16      : VOPC_Real_gfx10<0x0c8>;
+defm V_CMP_LT_F16     : VOPC_Real_gfx10<0x0c9>;
+defm V_CMP_EQ_F16     : VOPC_Real_gfx10<0x0ca>;
+defm V_CMP_LE_F16     : VOPC_Real_gfx10<0x0cb>;
+defm V_CMP_GT_F16     : VOPC_Real_gfx10<0x0cc>;
+defm V_CMP_LG_F16     : VOPC_Real_gfx10<0x0cd>;
+defm V_CMP_GE_F16     : VOPC_Real_gfx10<0x0ce>;
+defm V_CMP_O_F16      : VOPC_Real_gfx10<0x0cf>;
+defm V_CMPX_F_F16     : VOPCX_Real_gfx10<0x0d8>;
+defm V_CMPX_LT_F16    : VOPCX_Real_gfx10<0x0d9>;
+defm V_CMPX_EQ_F16    : VOPCX_Real_gfx10<0x0da>;
+defm V_CMPX_LE_F16    : VOPCX_Real_gfx10<0x0db>;
+defm V_CMPX_GT_F16    : VOPCX_Real_gfx10<0x0dc>;
+defm V_CMPX_LG_F16    : VOPCX_Real_gfx10<0x0dd>;
+defm V_CMPX_GE_F16    : VOPCX_Real_gfx10<0x0de>;
+defm V_CMPX_O_F16     : VOPCX_Real_gfx10<0x0df>;
+defm V_CMP_U_F16      : VOPC_Real_gfx10<0x0e8>;
+defm V_CMP_NGE_F16    : VOPC_Real_gfx10<0x0e9>;
+defm V_CMP_NLG_F16    : VOPC_Real_gfx10<0x0ea>;
+defm V_CMP_NGT_F16    : VOPC_Real_gfx10<0x0eb>;
+defm V_CMP_NLE_F16    : VOPC_Real_gfx10<0x0ec>;
+defm V_CMP_NEQ_F16    : VOPC_Real_gfx10<0x0ed>;
+defm V_CMP_NLT_F16    : VOPC_Real_gfx10<0x0ee>;
+defm V_CMP_TRU_F16    : VOPC_Real_gfx10<0x0ef>;
+defm V_CMPX_U_F16     : VOPCX_Real_gfx10<0x0f8>;
+defm V_CMPX_NGE_F16   : VOPCX_Real_gfx10<0x0f9>;
+defm V_CMPX_NLG_F16   : VOPCX_Real_gfx10<0x0fa>;
+defm V_CMPX_NGT_F16   : VOPCX_Real_gfx10<0x0fb>;
+defm V_CMPX_NLE_F16   : VOPCX_Real_gfx10<0x0fc>;
+defm V_CMPX_NEQ_F16   : VOPCX_Real_gfx10<0x0fd>;
+defm V_CMPX_NLT_F16   : VOPCX_Real_gfx10<0x0fe>;
+defm V_CMPX_TRU_F16   : VOPCX_Real_gfx10<0x0ff>;
 
-defm V_CMP_F_F32     : VOPC_Real_si <0x0>;
-defm V_CMP_LT_F32    : VOPC_Real_si <0x1>;
-defm V_CMP_EQ_F32    : VOPC_Real_si <0x2>;
-defm V_CMP_LE_F32    : VOPC_Real_si <0x3>;
-defm V_CMP_GT_F32    : VOPC_Real_si <0x4>;
-defm V_CMP_LG_F32    : VOPC_Real_si <0x5>;
-defm V_CMP_GE_F32    : VOPC_Real_si <0x6>;
-defm V_CMP_O_F32     : VOPC_Real_si <0x7>;
-defm V_CMP_U_F32     : VOPC_Real_si <0x8>;
-defm V_CMP_NGE_F32   : VOPC_Real_si <0x9>;
-defm V_CMP_NLG_F32   : VOPC_Real_si <0xa>;
-defm V_CMP_NGT_F32   : VOPC_Real_si <0xb>;
-defm V_CMP_NLE_F32   : VOPC_Real_si <0xc>;
-defm V_CMP_NEQ_F32   : VOPC_Real_si <0xd>;
-defm V_CMP_NLT_F32   : VOPC_Real_si <0xe>;
-defm V_CMP_TRU_F32   : VOPC_Real_si <0xf>;
-
-defm V_CMPX_F_F32    : VOPC_Real_si <0x10>;
-defm V_CMPX_LT_F32   : VOPC_Real_si <0x11>;
-defm V_CMPX_EQ_F32   : VOPC_Real_si <0x12>;
-defm V_CMPX_LE_F32   : VOPC_Real_si <0x13>;
-defm V_CMPX_GT_F32   : VOPC_Real_si <0x14>;
-defm V_CMPX_LG_F32   : VOPC_Real_si <0x15>;
-defm V_CMPX_GE_F32   : VOPC_Real_si <0x16>;
-defm V_CMPX_O_F32    : VOPC_Real_si <0x17>;
-defm V_CMPX_U_F32    : VOPC_Real_si <0x18>;
-defm V_CMPX_NGE_F32  : VOPC_Real_si <0x19>;
-defm V_CMPX_NLG_F32  : VOPC_Real_si <0x1a>;
-defm V_CMPX_NGT_F32  : VOPC_Real_si <0x1b>;
-defm V_CMPX_NLE_F32  : VOPC_Real_si <0x1c>;
-defm V_CMPX_NEQ_F32  : VOPC_Real_si <0x1d>;
-defm V_CMPX_NLT_F32  : VOPC_Real_si <0x1e>;
-defm V_CMPX_TRU_F32  : VOPC_Real_si <0x1f>;
-
-defm V_CMP_F_F64     : VOPC_Real_si <0x20>;
-defm V_CMP_LT_F64    : VOPC_Real_si <0x21>;
-defm V_CMP_EQ_F64    : VOPC_Real_si <0x22>;
-defm V_CMP_LE_F64    : VOPC_Real_si <0x23>;
-defm V_CMP_GT_F64    : VOPC_Real_si <0x24>;
-defm V_CMP_LG_F64    : VOPC_Real_si <0x25>;
-defm V_CMP_GE_F64    : VOPC_Real_si <0x26>;
-defm V_CMP_O_F64     : VOPC_Real_si <0x27>;
-defm V_CMP_U_F64     : VOPC_Real_si <0x28>;
-defm V_CMP_NGE_F64   : VOPC_Real_si <0x29>;
-defm V_CMP_NLG_F64   : VOPC_Real_si <0x2a>;
-defm V_CMP_NGT_F64   : VOPC_Real_si <0x2b>;
-defm V_CMP_NLE_F64   : VOPC_Real_si <0x2c>;
-defm V_CMP_NEQ_F64   : VOPC_Real_si <0x2d>;
-defm V_CMP_NLT_F64   : VOPC_Real_si <0x2e>;
-defm V_CMP_TRU_F64   : VOPC_Real_si <0x2f>;
-
-defm V_CMPX_F_F64    : VOPC_Real_si <0x30>;
-defm V_CMPX_LT_F64   : VOPC_Real_si <0x31>;
-defm V_CMPX_EQ_F64   : VOPC_Real_si <0x32>;
-defm V_CMPX_LE_F64   : VOPC_Real_si <0x33>;
-defm V_CMPX_GT_F64   : VOPC_Real_si <0x34>;
-defm V_CMPX_LG_F64   : VOPC_Real_si <0x35>;
-defm V_CMPX_GE_F64   : VOPC_Real_si <0x36>;
-defm V_CMPX_O_F64    : VOPC_Real_si <0x37>;
-defm V_CMPX_U_F64    : VOPC_Real_si <0x38>;
-defm V_CMPX_NGE_F64  : VOPC_Real_si <0x39>;
-defm V_CMPX_NLG_F64  : VOPC_Real_si <0x3a>;
-defm V_CMPX_NGT_F64  : VOPC_Real_si <0x3b>;
-defm V_CMPX_NLE_F64  : VOPC_Real_si <0x3c>;
-defm V_CMPX_NEQ_F64  : VOPC_Real_si <0x3d>;
-defm V_CMPX_NLT_F64  : VOPC_Real_si <0x3e>;
-defm V_CMPX_TRU_F64  : VOPC_Real_si <0x3f>;
-
-defm V_CMPS_F_F32    : VOPC_Real_si <0x40>;
-defm V_CMPS_LT_F32   : VOPC_Real_si <0x41>;
-defm V_CMPS_EQ_F32   : VOPC_Real_si <0x42>;
-defm V_CMPS_LE_F32   : VOPC_Real_si <0x43>;
-defm V_CMPS_GT_F32   : VOPC_Real_si <0x44>;
-defm V_CMPS_LG_F32   : VOPC_Real_si <0x45>;
-defm V_CMPS_GE_F32   : VOPC_Real_si <0x46>;
-defm V_CMPS_O_F32    : VOPC_Real_si <0x47>;
-defm V_CMPS_U_F32    : VOPC_Real_si <0x48>;
-defm V_CMPS_NGE_F32  : VOPC_Real_si <0x49>;
-defm V_CMPS_NLG_F32  : VOPC_Real_si <0x4a>;
-defm V_CMPS_NGT_F32  : VOPC_Real_si <0x4b>;
-defm V_CMPS_NLE_F32  : VOPC_Real_si <0x4c>;
-defm V_CMPS_NEQ_F32  : VOPC_Real_si <0x4d>;
-defm V_CMPS_NLT_F32  : VOPC_Real_si <0x4e>;
-defm V_CMPS_TRU_F32  : VOPC_Real_si <0x4f>;
-
-defm V_CMPSX_F_F32   : VOPC_Real_si <0x50>;
-defm V_CMPSX_LT_F32  : VOPC_Real_si <0x51>;
-defm V_CMPSX_EQ_F32  : VOPC_Real_si <0x52>;
-defm V_CMPSX_LE_F32  : VOPC_Real_si <0x53>;
-defm V_CMPSX_GT_F32  : VOPC_Real_si <0x54>;
-defm V_CMPSX_LG_F32  : VOPC_Real_si <0x55>;
-defm V_CMPSX_GE_F32  : VOPC_Real_si <0x56>;
-defm V_CMPSX_O_F32   : VOPC_Real_si <0x57>;
-defm V_CMPSX_U_F32   : VOPC_Real_si <0x58>;
-defm V_CMPSX_NGE_F32 : VOPC_Real_si <0x59>;
-defm V_CMPSX_NLG_F32 : VOPC_Real_si <0x5a>;
-defm V_CMPSX_NGT_F32 : VOPC_Real_si <0x5b>;
-defm V_CMPSX_NLE_F32 : VOPC_Real_si <0x5c>;
-defm V_CMPSX_NEQ_F32 : VOPC_Real_si <0x5d>;
-defm V_CMPSX_NLT_F32 : VOPC_Real_si <0x5e>;
-defm V_CMPSX_TRU_F32 : VOPC_Real_si <0x5f>;
-
-defm V_CMPS_F_F64    : VOPC_Real_si <0x60>;
-defm V_CMPS_LT_F64   : VOPC_Real_si <0x61>;
-defm V_CMPS_EQ_F64   : VOPC_Real_si <0x62>;
-defm V_CMPS_LE_F64   : VOPC_Real_si <0x63>;
-defm V_CMPS_GT_F64   : VOPC_Real_si <0x64>;
-defm V_CMPS_LG_F64   : VOPC_Real_si <0x65>;
-defm V_CMPS_GE_F64   : VOPC_Real_si <0x66>;
-defm V_CMPS_O_F64    : VOPC_Real_si <0x67>;
-defm V_CMPS_U_F64    : VOPC_Real_si <0x68>;
-defm V_CMPS_NGE_F64  : VOPC_Real_si <0x69>;
-defm V_CMPS_NLG_F64  : VOPC_Real_si <0x6a>;
-defm V_CMPS_NGT_F64  : VOPC_Real_si <0x6b>;
-defm V_CMPS_NLE_F64  : VOPC_Real_si <0x6c>;
-defm V_CMPS_NEQ_F64  : VOPC_Real_si <0x6d>;
-defm V_CMPS_NLT_F64  : VOPC_Real_si <0x6e>;
-defm V_CMPS_TRU_F64  : VOPC_Real_si <0x6f>;
-
-defm V_CMPSX_F_F64   : VOPC_Real_si <0x70>;
-defm V_CMPSX_LT_F64  : VOPC_Real_si <0x71>;
-defm V_CMPSX_EQ_F64  : VOPC_Real_si <0x72>;
-defm V_CMPSX_LE_F64  : VOPC_Real_si <0x73>;
-defm V_CMPSX_GT_F64  : VOPC_Real_si <0x74>;
-defm V_CMPSX_LG_F64  : VOPC_Real_si <0x75>;
-defm V_CMPSX_GE_F64  : VOPC_Real_si <0x76>;
-defm V_CMPSX_O_F64   : VOPC_Real_si <0x77>;
-defm V_CMPSX_U_F64   : VOPC_Real_si <0x78>;
-defm V_CMPSX_NGE_F64 : VOPC_Real_si <0x79>;
-defm V_CMPSX_NLG_F64 : VOPC_Real_si <0x7a>;
-defm V_CMPSX_NGT_F64 : VOPC_Real_si <0x7b>;
-defm V_CMPSX_NLE_F64 : VOPC_Real_si <0x7c>;
-defm V_CMPSX_NEQ_F64 : VOPC_Real_si <0x7d>;
-defm V_CMPSX_NLT_F64 : VOPC_Real_si <0x7e>;
-defm V_CMPSX_TRU_F64 : VOPC_Real_si <0x7f>;
-
-defm V_CMP_F_I32     : VOPC_Real_si <0x80>;
-defm V_CMP_LT_I32    : VOPC_Real_si <0x81>;
-defm V_CMP_EQ_I32    : VOPC_Real_si <0x82>;
-defm V_CMP_LE_I32    : VOPC_Real_si <0x83>;
-defm V_CMP_GT_I32    : VOPC_Real_si <0x84>;
-defm V_CMP_NE_I32    : VOPC_Real_si <0x85>;
-defm V_CMP_GE_I32    : VOPC_Real_si <0x86>;
-defm V_CMP_T_I32     : VOPC_Real_si <0x87>;
-
-defm V_CMPX_F_I32    : VOPC_Real_si <0x90>;
-defm V_CMPX_LT_I32   : VOPC_Real_si <0x91>;
-defm V_CMPX_EQ_I32   : VOPC_Real_si <0x92>;
-defm V_CMPX_LE_I32   : VOPC_Real_si <0x93>;
-defm V_CMPX_GT_I32   : VOPC_Real_si <0x94>;
-defm V_CMPX_NE_I32   : VOPC_Real_si <0x95>;
-defm V_CMPX_GE_I32   : VOPC_Real_si <0x96>;
-defm V_CMPX_T_I32    : VOPC_Real_si <0x97>;
-
-defm V_CMP_F_I64     : VOPC_Real_si <0xa0>;
-defm V_CMP_LT_I64    : VOPC_Real_si <0xa1>;
-defm V_CMP_EQ_I64    : VOPC_Real_si <0xa2>;
-defm V_CMP_LE_I64    : VOPC_Real_si <0xa3>;
-defm V_CMP_GT_I64    : VOPC_Real_si <0xa4>;
-defm V_CMP_NE_I64    : VOPC_Real_si <0xa5>;
-defm V_CMP_GE_I64    : VOPC_Real_si <0xa6>;
-defm V_CMP_T_I64     : VOPC_Real_si <0xa7>;
-
-defm V_CMPX_F_I64    : VOPC_Real_si <0xb0>;
-defm V_CMPX_LT_I64   : VOPC_Real_si <0xb1>;
-defm V_CMPX_EQ_I64   : VOPC_Real_si <0xb2>;
-defm V_CMPX_LE_I64   : VOPC_Real_si <0xb3>;
-defm V_CMPX_GT_I64   : VOPC_Real_si <0xb4>;
-defm V_CMPX_NE_I64   : VOPC_Real_si <0xb5>;
-defm V_CMPX_GE_I64   : VOPC_Real_si <0xb6>;
-defm V_CMPX_T_I64    : VOPC_Real_si <0xb7>;
-
-defm V_CMP_F_U32     : VOPC_Real_si <0xc0>;
-defm V_CMP_LT_U32    : VOPC_Real_si <0xc1>;
-defm V_CMP_EQ_U32    : VOPC_Real_si <0xc2>;
-defm V_CMP_LE_U32    : VOPC_Real_si <0xc3>;
-defm V_CMP_GT_U32    : VOPC_Real_si <0xc4>;
-defm V_CMP_NE_U32    : VOPC_Real_si <0xc5>;
-defm V_CMP_GE_U32    : VOPC_Real_si <0xc6>;
-defm V_CMP_T_U32     : VOPC_Real_si <0xc7>;
-
-defm V_CMPX_F_U32    : VOPC_Real_si <0xd0>;
-defm V_CMPX_LT_U32   : VOPC_Real_si <0xd1>;
-defm V_CMPX_EQ_U32   : VOPC_Real_si <0xd2>;
-defm V_CMPX_LE_U32   : VOPC_Real_si <0xd3>;
-defm V_CMPX_GT_U32   : VOPC_Real_si <0xd4>;
-defm V_CMPX_NE_U32   : VOPC_Real_si <0xd5>;
-defm V_CMPX_GE_U32   : VOPC_Real_si <0xd6>;
-defm V_CMPX_T_U32    : VOPC_Real_si <0xd7>;
-
-defm V_CMP_F_U64     : VOPC_Real_si <0xe0>;
-defm V_CMP_LT_U64    : VOPC_Real_si <0xe1>;
-defm V_CMP_EQ_U64    : VOPC_Real_si <0xe2>;
-defm V_CMP_LE_U64    : VOPC_Real_si <0xe3>;
-defm V_CMP_GT_U64    : VOPC_Real_si <0xe4>;
-defm V_CMP_NE_U64    : VOPC_Real_si <0xe5>;
-defm V_CMP_GE_U64    : VOPC_Real_si <0xe6>;
-defm V_CMP_T_U64     : VOPC_Real_si <0xe7>;
-
-defm V_CMPX_F_U64    : VOPC_Real_si <0xf0>;
-defm V_CMPX_LT_U64   : VOPC_Real_si <0xf1>;
-defm V_CMPX_EQ_U64   : VOPC_Real_si <0xf2>;
-defm V_CMPX_LE_U64   : VOPC_Real_si <0xf3>;
-defm V_CMPX_GT_U64   : VOPC_Real_si <0xf4>;
-defm V_CMPX_NE_U64   : VOPC_Real_si <0xf5>;
-defm V_CMPX_GE_U64   : VOPC_Real_si <0xf6>;
-defm V_CMPX_T_U64    : VOPC_Real_si <0xf7>;
-
-defm V_CMP_CLASS_F32  : VOPC_Real_si <0x88>;
-defm V_CMPX_CLASS_F32 : VOPC_Real_si <0x98>;
-defm V_CMP_CLASS_F64  : VOPC_Real_si <0xa8>;
-defm V_CMPX_CLASS_F64 : VOPC_Real_si <0xb8>;
+//===----------------------------------------------------------------------===//
+// GFX6, GFX7, GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX6GFX7 in {
+  multiclass VOPC_Real_gfx6_gfx7<bits<9> op> {
+    let DecoderNamespace = "GFX6GFX7" in {
+      def _e32_gfx6_gfx7 :
+        VOPC_Real<!cast<VOPC_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
+        VOPCe<op{7-0}>;
+      def _e64_gfx6_gfx7 :
+        VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+        VOP3a_gfx6_gfx7<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
+        // Encoding used for VOPC instructions encoded as VOP3 differs from
+        // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
+        bits<8> sdst;
+        let Inst{7-0} = sdst;
+      }
+    } // End DecoderNamespace = "GFX6GFX7"
+
+    defm : VOPCInstAliases<NAME, "gfx6_gfx7">;
+  }
+} // End AssemblerPredicate = isGFX6GFX7
+
+multiclass VOPC_Real_gfx6_gfx7_gfx10<bits<9> op> :
+  VOPC_Real_gfx6_gfx7<op>, VOPC_Real_gfx10<op>;
+
+multiclass VOPCX_Real_gfx6_gfx7<bits<9> op> :
+  VOPC_Real_gfx6_gfx7<op>;
+
+multiclass VOPCX_Real_gfx6_gfx7_gfx10 <bits<9> op> :
+  VOPC_Real_gfx6_gfx7<op>, VOPCX_Real_gfx10<op>;
+
+defm V_CMP_F_F32      : VOPC_Real_gfx6_gfx7_gfx10<0x000>;
+defm V_CMP_LT_F32     : VOPC_Real_gfx6_gfx7_gfx10<0x001>;
+defm V_CMP_EQ_F32     : VOPC_Real_gfx6_gfx7_gfx10<0x002>;
+defm V_CMP_LE_F32     : VOPC_Real_gfx6_gfx7_gfx10<0x003>;
+defm V_CMP_GT_F32     : VOPC_Real_gfx6_gfx7_gfx10<0x004>;
+defm V_CMP_LG_F32     : VOPC_Real_gfx6_gfx7_gfx10<0x005>;
+defm V_CMP_GE_F32     : VOPC_Real_gfx6_gfx7_gfx10<0x006>;
+defm V_CMP_O_F32      : VOPC_Real_gfx6_gfx7_gfx10<0x007>;
+defm V_CMP_U_F32      : VOPC_Real_gfx6_gfx7_gfx10<0x008>;
+defm V_CMP_NGE_F32    : VOPC_Real_gfx6_gfx7_gfx10<0x009>;
+defm V_CMP_NLG_F32    : VOPC_Real_gfx6_gfx7_gfx10<0x00a>;
+defm V_CMP_NGT_F32    : VOPC_Real_gfx6_gfx7_gfx10<0x00b>;
+defm V_CMP_NLE_F32    : VOPC_Real_gfx6_gfx7_gfx10<0x00c>;
+defm V_CMP_NEQ_F32    : VOPC_Real_gfx6_gfx7_gfx10<0x00d>;
+defm V_CMP_NLT_F32    : VOPC_Real_gfx6_gfx7_gfx10<0x00e>;
+defm V_CMP_TRU_F32    : VOPC_Real_gfx6_gfx7_gfx10<0x00f>;
+defm V_CMPX_F_F32     : VOPCX_Real_gfx6_gfx7_gfx10<0x010>;
+defm V_CMPX_LT_F32    : VOPCX_Real_gfx6_gfx7_gfx10<0x011>;
+defm V_CMPX_EQ_F32    : VOPCX_Real_gfx6_gfx7_gfx10<0x012>;
+defm V_CMPX_LE_F32    : VOPCX_Real_gfx6_gfx7_gfx10<0x013>;
+defm V_CMPX_GT_F32    : VOPCX_Real_gfx6_gfx7_gfx10<0x014>;
+defm V_CMPX_LG_F32    : VOPCX_Real_gfx6_gfx7_gfx10<0x015>;
+defm V_CMPX_GE_F32    : VOPCX_Real_gfx6_gfx7_gfx10<0x016>;
+defm V_CMPX_O_F32     : VOPCX_Real_gfx6_gfx7_gfx10<0x017>;
+defm V_CMPX_U_F32     : VOPCX_Real_gfx6_gfx7_gfx10<0x018>;
+defm V_CMPX_NGE_F32   : VOPCX_Real_gfx6_gfx7_gfx10<0x019>;
+defm V_CMPX_NLG_F32   : VOPCX_Real_gfx6_gfx7_gfx10<0x01a>;
+defm V_CMPX_NGT_F32   : VOPCX_Real_gfx6_gfx7_gfx10<0x01b>;
+defm V_CMPX_NLE_F32   : VOPCX_Real_gfx6_gfx7_gfx10<0x01c>;
+defm V_CMPX_NEQ_F32   : VOPCX_Real_gfx6_gfx7_gfx10<0x01d>;
+defm V_CMPX_NLT_F32   : VOPCX_Real_gfx6_gfx7_gfx10<0x01e>;
+defm V_CMPX_TRU_F32   : VOPCX_Real_gfx6_gfx7_gfx10<0x01f>;
+defm V_CMP_F_F64      : VOPC_Real_gfx6_gfx7_gfx10<0x020>;
+defm V_CMP_LT_F64     : VOPC_Real_gfx6_gfx7_gfx10<0x021>;
+defm V_CMP_EQ_F64     : VOPC_Real_gfx6_gfx7_gfx10<0x022>;
+defm V_CMP_LE_F64     : VOPC_Real_gfx6_gfx7_gfx10<0x023>;
+defm V_CMP_GT_F64     : VOPC_Real_gfx6_gfx7_gfx10<0x024>;
+defm V_CMP_LG_F64     : VOPC_Real_gfx6_gfx7_gfx10<0x025>;
+defm V_CMP_GE_F64     : VOPC_Real_gfx6_gfx7_gfx10<0x026>;
+defm V_CMP_O_F64      : VOPC_Real_gfx6_gfx7_gfx10<0x027>;
+defm V_CMP_U_F64      : VOPC_Real_gfx6_gfx7_gfx10<0x028>;
+defm V_CMP_NGE_F64    : VOPC_Real_gfx6_gfx7_gfx10<0x029>;
+defm V_CMP_NLG_F64    : VOPC_Real_gfx6_gfx7_gfx10<0x02a>;
+defm V_CMP_NGT_F64    : VOPC_Real_gfx6_gfx7_gfx10<0x02b>;
+defm V_CMP_NLE_F64    : VOPC_Real_gfx6_gfx7_gfx10<0x02c>;
+defm V_CMP_NEQ_F64    : VOPC_Real_gfx6_gfx7_gfx10<0x02d>;
+defm V_CMP_NLT_F64    : VOPC_Real_gfx6_gfx7_gfx10<0x02e>;
+defm V_CMP_TRU_F64    : VOPC_Real_gfx6_gfx7_gfx10<0x02f>;
+defm V_CMPX_F_F64     : VOPCX_Real_gfx6_gfx7_gfx10<0x030>;
+defm V_CMPX_LT_F64    : VOPCX_Real_gfx6_gfx7_gfx10<0x031>;
+defm V_CMPX_EQ_F64    : VOPCX_Real_gfx6_gfx7_gfx10<0x032>;
+defm V_CMPX_LE_F64    : VOPCX_Real_gfx6_gfx7_gfx10<0x033>;
+defm V_CMPX_GT_F64    : VOPCX_Real_gfx6_gfx7_gfx10<0x034>;
+defm V_CMPX_LG_F64    : VOPCX_Real_gfx6_gfx7_gfx10<0x035>;
+defm V_CMPX_GE_F64    : VOPCX_Real_gfx6_gfx7_gfx10<0x036>;
+defm V_CMPX_O_F64     : VOPCX_Real_gfx6_gfx7_gfx10<0x037>;
+defm V_CMPX_U_F64     : VOPCX_Real_gfx6_gfx7_gfx10<0x038>;
+defm V_CMPX_NGE_F64   : VOPCX_Real_gfx6_gfx7_gfx10<0x039>;
+defm V_CMPX_NLG_F64   : VOPCX_Real_gfx6_gfx7_gfx10<0x03a>;
+defm V_CMPX_NGT_F64   : VOPCX_Real_gfx6_gfx7_gfx10<0x03b>;
+defm V_CMPX_NLE_F64   : VOPCX_Real_gfx6_gfx7_gfx10<0x03c>;
+defm V_CMPX_NEQ_F64   : VOPCX_Real_gfx6_gfx7_gfx10<0x03d>;
+defm V_CMPX_NLT_F64   : VOPCX_Real_gfx6_gfx7_gfx10<0x03e>;
+defm V_CMPX_TRU_F64   : VOPCX_Real_gfx6_gfx7_gfx10<0x03f>;
+defm V_CMPS_F_F32     : VOPC_Real_gfx6_gfx7<0x040>;
+defm V_CMPS_LT_F32    : VOPC_Real_gfx6_gfx7<0x041>;
+defm V_CMPS_EQ_F32    : VOPC_Real_gfx6_gfx7<0x042>;
+defm V_CMPS_LE_F32    : VOPC_Real_gfx6_gfx7<0x043>;
+defm V_CMPS_GT_F32    : VOPC_Real_gfx6_gfx7<0x044>;
+defm V_CMPS_LG_F32    : VOPC_Real_gfx6_gfx7<0x045>;
+defm V_CMPS_GE_F32    : VOPC_Real_gfx6_gfx7<0x046>;
+defm V_CMPS_O_F32     : VOPC_Real_gfx6_gfx7<0x047>;
+defm V_CMPS_U_F32     : VOPC_Real_gfx6_gfx7<0x048>;
+defm V_CMPS_NGE_F32   : VOPC_Real_gfx6_gfx7<0x049>;
+defm V_CMPS_NLG_F32   : VOPC_Real_gfx6_gfx7<0x04a>;
+defm V_CMPS_NGT_F32   : VOPC_Real_gfx6_gfx7<0x04b>;
+defm V_CMPS_NLE_F32   : VOPC_Real_gfx6_gfx7<0x04c>;
+defm V_CMPS_NEQ_F32   : VOPC_Real_gfx6_gfx7<0x04d>;
+defm V_CMPS_NLT_F32   : VOPC_Real_gfx6_gfx7<0x04e>;
+defm V_CMPS_TRU_F32   : VOPC_Real_gfx6_gfx7<0x04f>;
+defm V_CMPSX_F_F32    : VOPCX_Real_gfx6_gfx7<0x050>;
+defm V_CMPSX_LT_F32   : VOPCX_Real_gfx6_gfx7<0x051>;
+defm V_CMPSX_EQ_F32   : VOPCX_Real_gfx6_gfx7<0x052>;
+defm V_CMPSX_LE_F32   : VOPCX_Real_gfx6_gfx7<0x053>;
+defm V_CMPSX_GT_F32   : VOPCX_Real_gfx6_gfx7<0x054>;
+defm V_CMPSX_LG_F32   : VOPCX_Real_gfx6_gfx7<0x055>;
+defm V_CMPSX_GE_F32   : VOPCX_Real_gfx6_gfx7<0x056>;
+defm V_CMPSX_O_F32    : VOPCX_Real_gfx6_gfx7<0x057>;
+defm V_CMPSX_U_F32    : VOPCX_Real_gfx6_gfx7<0x058>;
+defm V_CMPSX_NGE_F32  : VOPCX_Real_gfx6_gfx7<0x059>;
+defm V_CMPSX_NLG_F32  : VOPCX_Real_gfx6_gfx7<0x05a>;
+defm V_CMPSX_NGT_F32  : VOPCX_Real_gfx6_gfx7<0x05b>;
+defm V_CMPSX_NLE_F32  : VOPCX_Real_gfx6_gfx7<0x05c>;
+defm V_CMPSX_NEQ_F32  : VOPCX_Real_gfx6_gfx7<0x05d>;
+defm V_CMPSX_NLT_F32  : VOPCX_Real_gfx6_gfx7<0x05e>;
+defm V_CMPSX_TRU_F32  : VOPCX_Real_gfx6_gfx7<0x05f>;
+defm V_CMPS_F_F64     : VOPC_Real_gfx6_gfx7<0x060>;
+defm V_CMPS_LT_F64    : VOPC_Real_gfx6_gfx7<0x061>;
+defm V_CMPS_EQ_F64    : VOPC_Real_gfx6_gfx7<0x062>;
+defm V_CMPS_LE_F64    : VOPC_Real_gfx6_gfx7<0x063>;
+defm V_CMPS_GT_F64    : VOPC_Real_gfx6_gfx7<0x064>;
+defm V_CMPS_LG_F64    : VOPC_Real_gfx6_gfx7<0x065>;
+defm V_CMPS_GE_F64    : VOPC_Real_gfx6_gfx7<0x066>;
+defm V_CMPS_O_F64     : VOPC_Real_gfx6_gfx7<0x067>;
+defm V_CMPS_U_F64     : VOPC_Real_gfx6_gfx7<0x068>;
+defm V_CMPS_NGE_F64   : VOPC_Real_gfx6_gfx7<0x069>;
+defm V_CMPS_NLG_F64   : VOPC_Real_gfx6_gfx7<0x06a>;
+defm V_CMPS_NGT_F64   : VOPC_Real_gfx6_gfx7<0x06b>;
+defm V_CMPS_NLE_F64   : VOPC_Real_gfx6_gfx7<0x06c>;
+defm V_CMPS_NEQ_F64   : VOPC_Real_gfx6_gfx7<0x06d>;
+defm V_CMPS_NLT_F64   : VOPC_Real_gfx6_gfx7<0x06e>;
+defm V_CMPS_TRU_F64   : VOPC_Real_gfx6_gfx7<0x06f>;
+defm V_CMPSX_F_F64    : VOPCX_Real_gfx6_gfx7<0x070>;
+defm V_CMPSX_LT_F64   : VOPCX_Real_gfx6_gfx7<0x071>;
+defm V_CMPSX_EQ_F64   : VOPCX_Real_gfx6_gfx7<0x072>;
+defm V_CMPSX_LE_F64   : VOPCX_Real_gfx6_gfx7<0x073>;
+defm V_CMPSX_GT_F64   : VOPCX_Real_gfx6_gfx7<0x074>;
+defm V_CMPSX_LG_F64   : VOPCX_Real_gfx6_gfx7<0x075>;
+defm V_CMPSX_GE_F64   : VOPCX_Real_gfx6_gfx7<0x076>;
+defm V_CMPSX_O_F64    : VOPCX_Real_gfx6_gfx7<0x077>;
+defm V_CMPSX_U_F64    : VOPCX_Real_gfx6_gfx7<0x078>;
+defm V_CMPSX_NGE_F64  : VOPCX_Real_gfx6_gfx7<0x079>;
+defm V_CMPSX_NLG_F64  : VOPCX_Real_gfx6_gfx7<0x07a>;
+defm V_CMPSX_NGT_F64  : VOPCX_Real_gfx6_gfx7<0x07b>;
+defm V_CMPSX_NLE_F64  : VOPCX_Real_gfx6_gfx7<0x07c>;
+defm V_CMPSX_NEQ_F64  : VOPCX_Real_gfx6_gfx7<0x07d>;
+defm V_CMPSX_NLT_F64  : VOPCX_Real_gfx6_gfx7<0x07e>;
+defm V_CMPSX_TRU_F64  : VOPCX_Real_gfx6_gfx7<0x07f>;
+defm V_CMP_F_I32      : VOPC_Real_gfx6_gfx7_gfx10<0x080>;
+defm V_CMP_LT_I32     : VOPC_Real_gfx6_gfx7_gfx10<0x081>;
+defm V_CMP_EQ_I32     : VOPC_Real_gfx6_gfx7_gfx10<0x082>;
+defm V_CMP_LE_I32     : VOPC_Real_gfx6_gfx7_gfx10<0x083>;
+defm V_CMP_GT_I32     : VOPC_Real_gfx6_gfx7_gfx10<0x084>;
+defm V_CMP_NE_I32     : VOPC_Real_gfx6_gfx7_gfx10<0x085>;
+defm V_CMP_GE_I32     : VOPC_Real_gfx6_gfx7_gfx10<0x086>;
+defm V_CMP_T_I32      : VOPC_Real_gfx6_gfx7_gfx10<0x087>;
+defm V_CMP_CLASS_F32  : VOPC_Real_gfx6_gfx7_gfx10<0x088>;
+defm V_CMPX_F_I32     : VOPCX_Real_gfx6_gfx7_gfx10<0x090>;
+defm V_CMPX_LT_I32    : VOPCX_Real_gfx6_gfx7_gfx10<0x091>;
+defm V_CMPX_EQ_I32    : VOPCX_Real_gfx6_gfx7_gfx10<0x092>;
+defm V_CMPX_LE_I32    : VOPCX_Real_gfx6_gfx7_gfx10<0x093>;
+defm V_CMPX_GT_I32    : VOPCX_Real_gfx6_gfx7_gfx10<0x094>;
+defm V_CMPX_NE_I32    : VOPCX_Real_gfx6_gfx7_gfx10<0x095>;
+defm V_CMPX_GE_I32    : VOPCX_Real_gfx6_gfx7_gfx10<0x096>;
+defm V_CMPX_T_I32     : VOPCX_Real_gfx6_gfx7_gfx10<0x097>;
+defm V_CMPX_CLASS_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x098>;
+defm V_CMP_F_I64      : VOPC_Real_gfx6_gfx7_gfx10<0x0a0>;
+defm V_CMP_LT_I64     : VOPC_Real_gfx6_gfx7_gfx10<0x0a1>;
+defm V_CMP_EQ_I64     : VOPC_Real_gfx6_gfx7_gfx10<0x0a2>;
+defm V_CMP_LE_I64     : VOPC_Real_gfx6_gfx7_gfx10<0x0a3>;
+defm V_CMP_GT_I64     : VOPC_Real_gfx6_gfx7_gfx10<0x0a4>;
+defm V_CMP_NE_I64     : VOPC_Real_gfx6_gfx7_gfx10<0x0a5>;
+defm V_CMP_GE_I64     : VOPC_Real_gfx6_gfx7_gfx10<0x0a6>;
+defm V_CMP_T_I64      : VOPC_Real_gfx6_gfx7_gfx10<0x0a7>;
+defm V_CMP_CLASS_F64  : VOPC_Real_gfx6_gfx7_gfx10<0x0a8>;
+defm V_CMPX_F_I64     : VOPCX_Real_gfx6_gfx7_gfx10<0x0b0>;
+defm V_CMPX_LT_I64    : VOPCX_Real_gfx6_gfx7_gfx10<0x0b1>;
+defm V_CMPX_EQ_I64    : VOPCX_Real_gfx6_gfx7_gfx10<0x0b2>;
+defm V_CMPX_LE_I64    : VOPCX_Real_gfx6_gfx7_gfx10<0x0b3>;
+defm V_CMPX_GT_I64    : VOPCX_Real_gfx6_gfx7_gfx10<0x0b4>;
+defm V_CMPX_NE_I64    : VOPCX_Real_gfx6_gfx7_gfx10<0x0b5>;
+defm V_CMPX_GE_I64    : VOPCX_Real_gfx6_gfx7_gfx10<0x0b6>;
+defm V_CMPX_T_I64     : VOPCX_Real_gfx6_gfx7_gfx10<0x0b7>;
+defm V_CMPX_CLASS_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x0b8>;
+defm V_CMP_F_U32      : VOPC_Real_gfx6_gfx7_gfx10<0x0c0>;
+defm V_CMP_LT_U32     : VOPC_Real_gfx6_gfx7_gfx10<0x0c1>;
+defm V_CMP_EQ_U32     : VOPC_Real_gfx6_gfx7_gfx10<0x0c2>;
+defm V_CMP_LE_U32     : VOPC_Real_gfx6_gfx7_gfx10<0x0c3>;
+defm V_CMP_GT_U32     : VOPC_Real_gfx6_gfx7_gfx10<0x0c4>;
+defm V_CMP_NE_U32     : VOPC_Real_gfx6_gfx7_gfx10<0x0c5>;
+defm V_CMP_GE_U32     : VOPC_Real_gfx6_gfx7_gfx10<0x0c6>;
+defm V_CMP_T_U32      : VOPC_Real_gfx6_gfx7_gfx10<0x0c7>;
+defm V_CMPX_F_U32     : VOPCX_Real_gfx6_gfx7_gfx10<0x0d0>;
+defm V_CMPX_LT_U32    : VOPCX_Real_gfx6_gfx7_gfx10<0x0d1>;
+defm V_CMPX_EQ_U32    : VOPCX_Real_gfx6_gfx7_gfx10<0x0d2>;
+defm V_CMPX_LE_U32    : VOPCX_Real_gfx6_gfx7_gfx10<0x0d3>;
+defm V_CMPX_GT_U32    : VOPCX_Real_gfx6_gfx7_gfx10<0x0d4>;
+defm V_CMPX_NE_U32    : VOPCX_Real_gfx6_gfx7_gfx10<0x0d5>;
+defm V_CMPX_GE_U32    : VOPCX_Real_gfx6_gfx7_gfx10<0x0d6>;
+defm V_CMPX_T_U32     : VOPCX_Real_gfx6_gfx7_gfx10<0x0d7>;
+defm V_CMP_F_U64      : VOPC_Real_gfx6_gfx7_gfx10<0x0e0>;
+defm V_CMP_LT_U64     : VOPC_Real_gfx6_gfx7_gfx10<0x0e1>;
+defm V_CMP_EQ_U64     : VOPC_Real_gfx6_gfx7_gfx10<0x0e2>;
+defm V_CMP_LE_U64     : VOPC_Real_gfx6_gfx7_gfx10<0x0e3>;
+defm V_CMP_GT_U64     : VOPC_Real_gfx6_gfx7_gfx10<0x0e4>;
+defm V_CMP_NE_U64     : VOPC_Real_gfx6_gfx7_gfx10<0x0e5>;
+defm V_CMP_GE_U64     : VOPC_Real_gfx6_gfx7_gfx10<0x0e6>;
+defm V_CMP_T_U64      : VOPC_Real_gfx6_gfx7_gfx10<0x0e7>;
+defm V_CMPX_F_U64     : VOPCX_Real_gfx6_gfx7_gfx10<0x0f0>;
+defm V_CMPX_LT_U64    : VOPCX_Real_gfx6_gfx7_gfx10<0x0f1>;
+defm V_CMPX_EQ_U64    : VOPCX_Real_gfx6_gfx7_gfx10<0x0f2>;
+defm V_CMPX_LE_U64    : VOPCX_Real_gfx6_gfx7_gfx10<0x0f3>;
+defm V_CMPX_GT_U64    : VOPCX_Real_gfx6_gfx7_gfx10<0x0f4>;
+defm V_CMPX_NE_U64    : VOPCX_Real_gfx6_gfx7_gfx10<0x0f5>;
+defm V_CMPX_GE_U64    : VOPCX_Real_gfx6_gfx7_gfx10<0x0f6>;
+defm V_CMPX_T_U64     : VOPCX_Real_gfx6_gfx7_gfx10<0x0f7>;
 
 //===----------------------------------------------------------------------===//
-// VI
+// GFX8, GFX9 (VI).
 //===----------------------------------------------------------------------===//
 
 multiclass VOPC_Real_vi <bits<10> op> {
-  let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
+  let AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" in {
     def _e32_vi :
       VOPC_Real<!cast<VOPC_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>,
       VOPCe<op{7-0}>;
@@ -966,9 +1231,8 @@ multiclass VOPC_Real_vi <bits<10> op> {
     VOP_SDWA9_Real <!cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa")>,
     VOPC_SDWA9e <op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
 
-  def : VOPCInstAlias <!cast<VOP3_Pseudo>(NAME#"_e64"),
-                       !cast<Instruction>(NAME#"_e32_vi")> {
-    let AssemblerPredicate = isVI;
+  let AssemblerPredicate = isGFX8GFX9 in {
+    defm : VOPCInstAliases<NAME, "vi">;
   }
 }
 
diff --git a/lib/Target/AMDGPU/VOPInstructions.td b/lib/Target/AMDGPU/VOPInstructions.td
index 7de7d90d27b3..677095a354be 100644
--- a/lib/Target/AMDGPU/VOPInstructions.td
+++ b/lib/Target/AMDGPU/VOPInstructions.td
@@ -1,9 +1,8 @@
 //===-- VOPInstructions.td - Vector Instruction Defintions ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -91,6 +90,7 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
 
   let VOP3_OPSEL = isVop3OpSel;
   let IsPacked = P.IsPacked;
+  let IsMAI = P.IsMAI;
 
   let AsmOperands = !if(isVop3OpSel,
                         P.AsmVOP3OpSel,
@@ -100,7 +100,6 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let SubtargetPredicate = isGCN;
 
   // Because SGPRs may be allowed if there are multiple operands, we
   // need a post-isel hook to insert copies in order to avoid
@@ -190,9 +189,15 @@ class VOP3a<VOPProfile P> : Enc64 {
   let Inst{63}    = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
 }
 
-class VOP3a_si <bits<9> op, VOPProfile P> : VOP3a<P> {
+class VOP3a_gfx6_gfx7<bits<9> op, VOPProfile p> : VOP3a<p> {
+  let Inst{11}    = !if(p.HasClamp, clamp{0}, 0);
   let Inst{25-17} = op;
-  let Inst{11}    = !if(P.HasClamp, clamp{0}, 0);
+}
+
+class VOP3a_gfx10<bits<10> op, VOPProfile p> : VOP3a<p> {
+  let Inst{15}    = !if(p.HasClamp, clamp{0}, 0);
+  let Inst{25-16} = op;
+  let Inst{31-26} = 0x35;
 }
 
 class VOP3a_vi <bits<10> op, VOPProfile P> : VOP3a<P> {
@@ -200,9 +205,14 @@ class VOP3a_vi <bits<10> op, VOPProfile P> : VOP3a<P> {
   let Inst{15}    = !if(P.HasClamp, clamp{0}, 0);
 }
 
-class VOP3e_si <bits<9> op, VOPProfile P> : VOP3a_si <op, P> {
+class VOP3e_gfx6_gfx7<bits<9> op, VOPProfile p> : VOP3a_gfx6_gfx7<op, p> {
   bits<8> vdst;
-  let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
+  let Inst{7-0} = !if(p.EmitDst, vdst{7-0}, 0);
+}
+
+class VOP3e_gfx10<bits<10> op, VOPProfile p> : VOP3a_gfx10<op, p> {
+  bits<8> vdst;
+  let Inst{7-0} = !if(p.EmitDst, vdst{7-0}, 0);
 }
 
 class VOP3e_vi <bits<10> op, VOPProfile P> : VOP3a_vi <op, P> {
@@ -217,6 +227,13 @@ class VOP3OpSel_gfx9 <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> {
   let Inst{14} = !if(P.HasDst,  src0_modifiers{3}, 0);
 }
 
+class VOP3OpSel_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> {
+  let Inst{11} = !if(p.HasSrc0, src0_modifiers{2}, 0);
+  let Inst{12} = !if(p.HasSrc1, src1_modifiers{2}, 0);
+  let Inst{13} = !if(p.HasSrc2, src2_modifiers{2}, 0);
+  let Inst{14} = !if(p.HasDst,  src0_modifiers{3}, 0);
+}
+
 // NB: For V_INTERP* opcodes, src0 is encoded as src1 and vice versa
 class VOP3Interp_vi <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> {
   bits<2> attrchan;
@@ -236,6 +253,21 @@ class VOP3Interp_vi <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> {
   let Inst{49-41} = src0;
 }
 
+class VOP3Interp_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> {
+  bits<6> attr;
+  bits<2> attrchan;
+  bits<1> high;
+
+  let Inst{8}     = 0;
+  let Inst{9}     = !if(p.HasSrc0Mods, src0_modifiers{1}, 0);
+  let Inst{37-32} = attr;
+  let Inst{39-38} = attrchan;
+  let Inst{40}    = !if(p.HasHigh, high, 0);
+  let Inst{49-41} = src0;
+  let Inst{61}    = 0;
+  let Inst{62}    = !if(p.HasSrc0Mods, src0_modifiers{0}, 0);
+}
+
 class VOP3be <VOPProfile P> : Enc64 {
   bits<8> vdst;
   bits<2> src0_modifiers;
@@ -295,10 +327,51 @@ class VOP3Pe <bits<10> op, VOPProfile P> : Enc64 {
   let Inst{63}    = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo)
 }
 
-class VOP3be_si <bits<9> op, VOPProfile P> : VOP3be<P> {
+class VOP3Pe_MAI <bits<10> op, VOPProfile P> : Enc64 {
+  bits<8> vdst;
+  bits<10> src0;
+  bits<10> src1;
+  bits<9> src2;
+  bits<3> blgp;
+  bits<3> cbsz;
+  bits<4> abid;
+  bits<1> clamp;
+
+  let Inst{7-0} = vdst;
+
+  let Inst{10-8}  = !if(P.HasSrc1, cbsz, 0);
+  let Inst{14-11} = !if(P.HasSrc1, abid, 0);
+
+  let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
+
+  let Inst{25-16} = op;
+  let Inst{31-26} = 0x34; //encoding
+  let Inst{40-32} = !if(P.HasSrc0, src0{8-0}, 0);
+  let Inst{49-41} = !if(P.HasSrc1, src1{8-0}, 0);
+  let Inst{58-50} = !if(P.HasSrc2, src2, 0);
+
+  let Inst{59}    = !if(P.HasSrc0, src0{9}, 0); // acc(0)
+  let Inst{60}    = !if(P.HasSrc1, src1{9}, 0); // acc(1)
+
+  let Inst{63-61} = !if(P.HasSrc1, blgp, 0);
+}
+
+
+class VOP3Pe_gfx10 <bits<10> op, VOPProfile P> : VOP3Pe<op, P> {
+  let Inst{31-26} = 0x33; //encoding
+}
+
+class VOP3be_gfx6_gfx7<bits<9> op, VOPProfile p> : VOP3be<p> {
   let Inst{25-17} = op;
 }
 
+class VOP3be_gfx10<bits<10> op, VOPProfile p> : VOP3be<p> {
+  bits<1> clamp;
+  let Inst{15}    = !if(p.HasClamp, clamp{0}, 0);
+  let Inst{25-16} = op;
+  let Inst{31-26} = 0x35;
+}
+
 class VOP3be_vi <bits<10> op, VOPProfile P> : VOP3be<P> {
   bits<1> clamp;
   let Inst{25-16} = op;
@@ -393,7 +466,7 @@ class VOP_SDWA9Ae<VOPProfile P> : VOP_SDWA9e<P> {
 class VOP_SDWA9Be<VOPProfile P> : VOP_SDWA9e<P> {
   bits<8> sdst; // {vcc_sdst{0}, sdst{6-0}}
 
-  let Inst{46-40} = !if(P.EmitDst, sdst{6-0}, 0);
+  let Inst{46-40} = !if(P.EmitDst, sdst{6-0}, ?);
   let Inst{47} = !if(P.EmitDst, sdst{7}, 0);
 }
 
@@ -456,9 +529,8 @@ class VOP_SDWA_Real <VOP_SDWA_Pseudo ps> :
   let TSFlags              = ps.TSFlags;
 }
 
-class VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
-  InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands9, []>,
-  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SDWA9> {
+class Base_VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
+  InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands9, []> {
 
   let isPseudo = 0;
   let isCodeGenOnly = 0;
@@ -485,7 +557,20 @@ class VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
   let TSFlags              = ps.TSFlags;
 }
 
-class VOP_DPPe<VOPProfile P> : Enc64 {
+class VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
+  Base_VOP_SDWA9_Real <ps >,
+  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SDWA9>;
+
+class Base_VOP_SDWA10_Real<VOP_SDWA_Pseudo ps> : Base_VOP_SDWA9_Real<ps> {
+  let SubtargetPredicate = !if(ps.Pfl.HasExtSDWA9, HasSDWA10, DisableInst);
+  let AssemblerPredicate = !if(ps.Pfl.HasExtSDWA9, HasSDWA10, DisableInst);
+  let DecoderNamespace = "SDWA10";
+}
+
+class VOP_SDWA10_Real<VOP_SDWA_Pseudo ps> :
+  Base_VOP_SDWA10_Real<ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SDWA10>;
+
+class VOP_DPPe<VOPProfile P, bit IsDPP16=0> : Enc64 {
   bits<2> src0_modifiers;
   bits<8> src0;
   bits<2> src1_modifiers;
@@ -493,9 +578,11 @@ class VOP_DPPe<VOPProfile P> : Enc64 {
   bits<1> bound_ctrl;
   bits<4> bank_mask;
   bits<4> row_mask;
+  bit     fi;
 
   let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0);
   let Inst{48-40} = dpp_ctrl;
+  let Inst{50}    = !if(IsDPP16, fi, ?);
   let Inst{51}    = bound_ctrl;
   let Inst{52}    = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // src0_neg
   let Inst{53}    = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // src0_abs
@@ -533,8 +620,8 @@ class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
   let AssemblerPredicate = !if(P.HasExtDPP, HasDPP, DisableInst);
   let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP,
                                         AMDGPUAsmVariants.Disable);
-  let Constraints = !if(P.NumSrcArgs, "$old = $vdst", "");
-  let DisableEncoding = !if(P.NumSrcArgs, "$old", "");
+  let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", "");
+  let DisableEncoding = !if(P.NumSrcArgs, P.TieRegDPP, "");
   let DecoderNamespace = "DPP";
 
   VOPProfile Pfl = P;
@@ -568,6 +655,67 @@ class VOP_DPP_Real <VOP_DPP_Pseudo ps, int EncodingFamily> :
   let TSFlags              = ps.TSFlags;
 }
 
+class VOP_DPP <string OpName, VOPProfile P, bit IsDPP16,
+               dag InsDPP = !if(IsDPP16, P.InsDPP16, P.InsDPP),
+               string AsmDPP = !if(IsDPP16, P.AsmDPP16, P.AsmDPP)> :
+  InstSI <P.OutsDPP, InsDPP, OpName#AsmDPP, []>,
+  VOP_DPPe<P, IsDPP16> {
+
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let UseNamedOperandTable = 1;
+
+  let VALU = 1;
+  let DPP = 1;
+  let Size = 8;
+
+  let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", "");
+  let SubtargetPredicate = HasDPP;
+  let AssemblerPredicate = !if(P.HasExtDPP, HasDPP, DisableInst);
+  let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP,
+                                        AMDGPUAsmVariants.Disable);
+  let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", "");
+  let DisableEncoding = !if(P.NumSrcArgs, P.TieRegDPP, "");
+  let DecoderNamespace = "DPP";
+}
+
+class VOP_DPP8e<VOPProfile P> : Enc64 {
+  bits<8> src0;
+  bits<24> dpp8;
+  bits<9> fi;
+
+  let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0);
+  let Inst{63-40} = dpp8{23-0};
+}
+
+class VOP_DPP8<string OpName, VOPProfile P> :
+  InstSI<P.OutsDPP8, P.InsDPP8, OpName#P.AsmDPP8, []>,
+  VOP_DPP8e<P> {
+
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let UseNamedOperandTable = 1;
+
+  let VALU = 1;
+  let DPP = 1;
+  let Size = 8;
+
+  let AsmMatchConverter = "cvtDPP8";
+  let SubtargetPredicate = HasDPP8;
+  let AssemblerPredicate = !if(P.HasExt, HasDPP8, DisableInst);
+  let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP,
+                                     AMDGPUAsmVariants.Disable);
+  let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", "");
+  let DisableEncoding = !if(P.NumSrcArgs, P.TieRegDPP, "");
+}
+
+def DPP8Mode {
+  int FI_0 = 0xE9;
+  int FI_1 = 0xEA;
+}
+
 class getNumNodeArgs<SDPatternOperator Op> {
   SDNode N = !cast<SDNode>(Op);
   SDTypeProfile TP = N.TypeProfile;
diff --git a/lib/Target/ARC/ARC.h b/lib/Target/ARC/ARC.h
index 65f6ed67eb5b..cbbf0233706d 100644
--- a/lib/Target/ARC/ARC.h
+++ b/lib/Target/ARC/ARC.h
@@ -1,9 +1,8 @@
 //===- ARC.h - Top-level interface for ARC representation -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -26,6 +25,7 @@ class ARCTargetMachine;
 FunctionPass *createARCISelDag(ARCTargetMachine &TM,
                                CodeGenOpt::Level OptLevel);
 FunctionPass *createARCExpandPseudosPass();
+FunctionPass *createARCOptAddrMode();
 FunctionPass *createARCBranchFinalizePass();
 
 } // end namespace llvm
diff --git a/lib/Target/ARC/ARC.td b/lib/Target/ARC/ARC.td
index 6635630c62a3..846f1bb6735e 100644
--- a/lib/Target/ARC/ARC.td
+++ b/lib/Target/ARC/ARC.td
@@ -1,9 +1,8 @@
 //===- ARC.td - Describe the ARC Target Machine ------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARC/ARCAsmPrinter.cpp b/lib/Target/ARC/ARCAsmPrinter.cpp
index 8c13da0484fd..5c3e2c9e773c 100644
--- a/lib/Target/ARC/ARCAsmPrinter.cpp
+++ b/lib/Target/ARC/ARCAsmPrinter.cpp
@@ -1,9 +1,8 @@
 //===- ARCAsmPrinter.cpp - ARC LLVM assembly writer -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,28 +12,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARC.h"
-#include "ARCInstrInfo.h"
 #include "ARCMCInstLower.h"
 #include "ARCSubtarget.h"
 #include "ARCTargetMachine.h"
-#include "ARCTargetStreamer.h"
-#include "InstPrinter/ARCInstPrinter.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
+#include "MCTargetDesc/ARCInstPrinter.h"
+#include "TargetInfo/ARCTargetInfo.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include <algorithm>
 
 using namespace llvm;
 
@@ -44,7 +33,6 @@ namespace {
 
 class ARCAsmPrinter : public AsmPrinter {
   ARCMCInstLower MCInstLowering;
-  ARCTargetStreamer &getTargetStreamer();
 
 public:
   explicit ARCAsmPrinter(TargetMachine &TM,
@@ -58,10 +46,6 @@ public:
 
 } // end anonymous namespace
 
-ARCTargetStreamer &ARCAsmPrinter::getTargetStreamer() {
-  return static_cast<ARCTargetStreamer &>(*OutStreamer->getTargetStreamer());
-}
-
 void ARCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   SmallString<128> Str;
   raw_svector_ostream O(Str);
diff --git a/lib/Target/ARC/ARCBranchFinalize.cpp b/lib/Target/ARC/ARCBranchFinalize.cpp
index 3b410fa383b7..633c081b3137 100644
--- a/lib/Target/ARC/ARCBranchFinalize.cpp
+++ b/lib/Target/ARC/ARCBranchFinalize.cpp
@@ -1,9 +1,8 @@
 //===- ARCBranchFinalize.cpp - ARC conditional branches ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARC/ARCCallingConv.td b/lib/Target/ARC/ARCCallingConv.td
index b7d37bc2a41f..098e03e36bca 100644
--- a/lib/Target/ARC/ARCCallingConv.td
+++ b/lib/Target/ARC/ARCCallingConv.td
@@ -1,9 +1,8 @@
 //===- ARCCallingConv.td - Calling Conventions for ARC -----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This describes the calling conventions for ARC architecture.
diff --git a/lib/Target/ARC/ARCExpandPseudos.cpp b/lib/Target/ARC/ARCExpandPseudos.cpp
index 3177735c0529..a1646d17605f 100644
--- a/lib/Target/ARC/ARCExpandPseudos.cpp
+++ b/lib/Target/ARC/ARCExpandPseudos.cpp
@@ -1,9 +1,8 @@
 //===- ARCExpandPseudosPass - ARC expand pseudo loads -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARC/ARCFrameLowering.cpp b/lib/Target/ARC/ARCFrameLowering.cpp
index ca59cb2baaa7..d8946d97deff 100644
--- a/lib/Target/ARC/ARCFrameLowering.cpp
+++ b/lib/Target/ARC/ARCFrameLowering.cpp
@@ -1,9 +1,8 @@
 //===- ARCFrameLowering.cpp - ARC Frame Information -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -65,6 +64,8 @@ static void generateStackAdjustment(MachineBasicBlock &MBB,
   assert((AbsAmount % 4 == 0) && "Stack adjustments must be 4-byte aligned.");
   if (isUInt<6>(AbsAmount))
     AdjOp = Positive ? ARC::ADD_rru6 : ARC::SUB_rru6;
+  else if (isInt<12>(AbsAmount))
+    AdjOp = Positive ? ARC::ADD_rrs12 : ARC::SUB_rrs12;
   else
     AdjOp = Positive ? ARC::ADD_rrlimm : ARC::SUB_rrlimm;
 
@@ -134,8 +135,12 @@ void ARCFrameLowering::emitPrologue(MachineFunction &MF,
     // Add in the varargs area here first.
     LLVM_DEBUG(dbgs() << "Varargs\n");
     unsigned VarArgsBytes = MFI.getObjectSize(AFI->getVarArgsFrameIndex());
-    BuildMI(MBB, MBBI, dl, TII->get(ARC::SUB_rru6))
-        .addReg(ARC::SP)
+    unsigned Opc = ARC::SUB_rrlimm;
+    if (isUInt<6>(VarArgsBytes))
+      Opc = ARC::SUB_rru6;
+    else if (isInt<12>(VarArgsBytes))
+      Opc = ARC::SUB_rrs12;
+    BuildMI(MBB, MBBI, dl, TII->get(Opc), ARC::SP)
         .addReg(ARC::SP)
         .addImm(VarArgsBytes);
   }
@@ -247,7 +252,10 @@ void ARCFrameLowering::emitEpilogue(MachineFunction &MF,
   // Then, replace the frame pointer by (new) [sp,StackSize-4].
   // Then, move the stack pointer the rest of the way (sp = sp + StackSize).
   if (hasFP(MF)) {
-    BuildMI(MBB, MBBI, DebugLoc(), TII->get(ARC::SUB_rru6), ARC::SP)
+    unsigned Opc = ARC::SUB_rrlimm;
+    if (isUInt<6>(StackSize))
+      Opc = ARC::SUB_rru6;
+    BuildMI(MBB, MBBI, DebugLoc(), TII->get(Opc), ARC::SP)
         .addReg(ARC::FP)
         .addImm(StackSize);
     AmountAboveFunclet += 4;
@@ -271,19 +279,28 @@ void ARCFrameLowering::emitEpilogue(MachineFunction &MF,
   }
 
   // Move the stack pointer up to the point of the funclet.
-  if (StackSize - AmountAboveFunclet) {
-    BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::ADD_rru6))
-        .addReg(ARC::SP)
+  if (unsigned MoveAmount = StackSize - AmountAboveFunclet) {
+    unsigned Opc = ARC::ADD_rrlimm;
+    if (isUInt<6>(MoveAmount))
+      Opc = ARC::ADD_rru6;
+    else if (isInt<12>(MoveAmount))
+      Opc = ARC::ADD_rrs12;
+    BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(Opc), ARC::SP)
         .addReg(ARC::SP)
         .addImm(StackSize - AmountAboveFunclet);
   }
 
   if (StackSlotsUsedByFunclet) {
+    // This part of the adjustment will always be < 64 bytes.
     BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::BL))
         .addExternalSymbol(load_funclet_name[Last - ARC::R15])
         .addReg(ARC::BLINK, RegState::Implicit | RegState::Kill);
-    BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::ADD_rru6))
-        .addReg(ARC::SP)
+    unsigned Opc = ARC::ADD_rrlimm;
+    if (isUInt<6>(4 * StackSlotsUsedByFunclet))
+      Opc = ARC::ADD_rru6;
+    else if (isInt<12>(4 * StackSlotsUsedByFunclet))
+      Opc = ARC::ADD_rrs12;
+    BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(Opc), ARC::SP)
         .addReg(ARC::SP)
         .addImm(4 * (StackSlotsUsedByFunclet));
   }
@@ -294,8 +311,8 @@ void ARCFrameLowering::emitEpilogue(MachineFunction &MF,
   // Now, pop fp if necessary.
   if (hasFP(MF)) {
     BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::LD_AB_rs9))
-        .addReg(ARC::SP, RegState::Define)
         .addReg(ARC::FP, RegState::Define)
+        .addReg(ARC::SP, RegState::Define)
         .addReg(ARC::SP)
         .addImm(4);
   }
@@ -305,7 +322,12 @@ void ARCFrameLowering::emitEpilogue(MachineFunction &MF,
     // Add in the varargs area here first.
     LLVM_DEBUG(dbgs() << "Varargs\n");
     unsigned VarArgsBytes = MFI.getObjectSize(AFI->getVarArgsFrameIndex());
-    BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::ADD_rru6))
+    unsigned Opc = ARC::ADD_rrlimm;
+    if (isUInt<6>(VarArgsBytes))
+      Opc = ARC::ADD_rru6;
+    else if (isInt<12>(VarArgsBytes))
+      Opc = ARC::ADD_rrs12;
+    BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(Opc))
         .addReg(ARC::SP)
         .addReg(ARC::SP)
         .addImm(VarArgsBytes);
@@ -431,7 +453,14 @@ static void emitRegUpdate(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator &MBBI, DebugLoc dl,
                           unsigned Reg, int NumBytes, bool IsAdd,
                           const ARCInstrInfo *TII) {
-  unsigned Opc = IsAdd ? ARC::ADD_rru6 : ARC::SUB_rru6;
+  unsigned Opc;
+  if (isUInt<6>(NumBytes))
+    Opc = IsAdd ? ARC::ADD_rru6 : ARC::SUB_rru6;
+  else if (isInt<12>(NumBytes))
+    Opc = IsAdd ? ARC::ADD_rrs12 : ARC::SUB_rrs12;
+  else
+    Opc = IsAdd ? ARC::ADD_rrlimm : ARC::SUB_rrlimm;
+
   BuildMI(MBB, MBBI, dl, TII->get(Opc), Reg)
       .addReg(Reg, RegState::Kill)
       .addImm(NumBytes);
diff --git a/lib/Target/ARC/ARCFrameLowering.h b/lib/Target/ARC/ARCFrameLowering.h
index c042bec016ca..41b559d16761 100644
--- a/lib/Target/ARC/ARCFrameLowering.h
+++ b/lib/Target/ARC/ARCFrameLowering.h
@@ -1,9 +1,8 @@
 //===- ARCFrameLowering.h - Define frame lowering for ARC -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARC/ARCISelDAGToDAG.cpp b/lib/Target/ARC/ARCISelDAGToDAG.cpp
index 8dbd3d5bf036..f639c4e6f0ff 100644
--- a/lib/Target/ARC/ARCISelDAGToDAG.cpp
+++ b/lib/Target/ARC/ARCISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
 //===- ARCISelDAGToDAG.cpp - ARC dag to dag inst selector -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARC/ARCISelLowering.cpp b/lib/Target/ARC/ARCISelLowering.cpp
index bf98af801406..847d23f0abdb 100644
--- a/lib/Target/ARC/ARCISelLowering.cpp
+++ b/lib/Target/ARC/ARCISelLowering.cpp
@@ -1,9 +1,8 @@
 //===- ARCISelLowering.cpp - ARC DAG Lowering Impl --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARC/ARCISelLowering.h b/lib/Target/ARC/ARCISelLowering.h
index fec01b13a866..4b72bfdaee9c 100644
--- a/lib/Target/ARC/ARCISelLowering.h
+++ b/lib/Target/ARC/ARCISelLowering.h
@@ -1,9 +1,8 @@
 //===- ARCISelLowering.h - ARC DAG Lowering Interface -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARC/ARCInstrFormats.td b/lib/Target/ARC/ARCInstrFormats.td
index 0a49b83ef16a..e4902a73ed49 100644
--- a/lib/Target/ARC/ARCInstrFormats.td
+++ b/lib/Target/ARC/ARCInstrFormats.td
@@ -1,9 +1,8 @@
 //===- ARCInstrFormats.td - ARC Instruction Formats --------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -56,6 +55,44 @@ def GPR32Reduced : Operand<iAny> {
   let DecoderMethod = "DecodeGBR32ShortRegister";
 }
 
+// Helper classes for load/store instructions
+class DataSizeMode<bits<2> mode, string instSfx, string asmSfx> {
+  bits<2> Value = mode;
+  string  InstSuffix = instSfx;
+  string  AsmSuffix  = asmSfx;
+}
+class ExtMode<bit mode, string instSfx, string asmSfx> {
+  bit     Value = mode;
+  string  InstSuffix = instSfx;
+  string  AsmSuffix  = asmSfx;
+}
+
+class AddrMode<bits<2> mode, string instSfx, string asmSfx> {
+  bits<2> Value = mode;
+  string  InstSuffix = instSfx;
+  string  AsmSuffix  = asmSfx;
+}
+
+class CacheMode<bit mode, string instSfx, string asmSfx> {
+  bit     Value = mode;
+  string  InstSuffix = instSfx;
+  string  AsmSuffix  = asmSfx;
+}
+
+def ByteSM : DataSizeMode<0b01, "B", "b">;
+def HalfSM : DataSizeMode<0b10, "H", "h">;
+def WordSM : DataSizeMode<0b00,  "",  "">;
+
+def NoEM      : ExtMode<0,   "",   "">;
+def SignedEM  : ExtMode<1, "_X", ".x">;
+
+def NoAM      : AddrMode<0b00, "", "">;
+def PreIncAM  : AddrMode<0b01, "_AW", ".aw">;
+def PostIncAM : AddrMode<0b10, "_AB", ".ab">;
+
+def NoCC       : CacheMode<0b0,    "",    "">;
+def UncachedCC : CacheMode<0b1, "_DI", ".di">;
+
 class InstARC<int sz, dag outs, dag ins, string asmstr, list<dag> pattern>
     : Instruction, Encoding64 {
 
@@ -65,6 +102,18 @@ class InstARC<int sz, dag outs, dag ins, string asmstr, list<dag> pattern>
   let AsmString = asmstr;
   let Pattern = pattern;
   let Size = sz;
+
+  // Load/Store instruction properties
+  DataSizeMode ZZ = WordSM;
+  ExtMode X = NoEM;
+  AddrMode AA = NoAM;
+  CacheMode DI = NoCC;
+
+  // Field used for relation models
+  string BaseOpcode = "";
+
+  //TSFlags
+  let TSFlags{1-0} = AA.Value;
 }
 
 // ARC pseudo instructions format
@@ -355,6 +404,8 @@ class F32_LD_RS9<bit x, bits<2> aa, bit di, bits<2> zz, dag outs, dag ins,
   let Inst{8-7} = zz;
   let Inst{6} = x;
   let Inst{5-0} = A;
+
+  let BaseOpcode = "ld_rs9";
 }
 
 class F32_LD_ADDR<bit x, bits<2> aa, bit di, bits<2> zz, dag outs, dag ins,
@@ -364,6 +415,8 @@ class F32_LD_ADDR<bit x, bits<2> aa, bit di, bits<2> zz, dag outs, dag ins,
 
   let B = addr{14-9};
   let S9 = addr{8-0};
+
+  let BaseOpcode = "ld_rs9";
 }
 
 
@@ -388,6 +441,8 @@ class F32_LD_LIMM<bit x, bit di, bits<2> zz, dag outs, dag ins,
   let Inst{6} = x;
   let Inst{5-0} = A;
   let DecoderMethod = "DecodeLdLImmInstruction";
+
+  let BaseOpcode = "ld_limm";
 }
 
 // Register + LImm load.  The 32-bit immediate address is in Inst[63-32].
@@ -416,6 +471,8 @@ class F32_LD_RLIMM<bit x, bits<2> aa, bit di, bits<2> zz, dag outs, dag ins,
   let Inst{11-6} = LImmReg;
   let Inst{5-0} = A;
   let DecoderMethod = "DecodeLdRLImmInstruction";
+
+  let BaseOpcode = "ld_rlimm";
 }
 
 // Register + S9 Store. (B + S9)
@@ -438,6 +495,8 @@ class F32_ST_RS9<bits<2> aa, bit di, bits<2> zz, dag outs, dag ins,
   let Inst{4-3} = aa;
   let Inst{2-1} = zz;
   let Inst{0} = 0;
+
+  let BaseOpcode = "st_rs9";
 }
 
 class F32_ST_ADDR<bits<2> aa, bit di, bits<2> zz, dag outs, dag ins,
@@ -447,6 +506,8 @@ class F32_ST_ADDR<bits<2> aa, bit di, bits<2> zz, dag outs, dag ins,
 
   let B = addr{14-9};
   let S9 = addr{8-0};
+
+  let BaseOpcode = "st_rs9";
 }
 
 // LImm Store.
@@ -470,6 +531,8 @@ class F32_ST_LIMM<bit di, bits<2> zz, dag outs, dag ins,
   let Inst{2-1} = zz;
   let Inst{0} = 0;
   let DecoderMethod = "DecodeStLImmInstruction";
+
+  let BaseOpcode = "st_limm";
 }
 
 // Compact Move/Load.
diff --git a/lib/Target/ARC/ARCInstrInfo.cpp b/lib/Target/ARC/ARCInstrInfo.cpp
index a8084f16893b..2a660e3c4dd1 100644
--- a/lib/Target/ARC/ARCInstrInfo.cpp
+++ b/lib/Target/ARC/ARCInstrInfo.cpp
@@ -1,9 +1,8 @@
 //===- ARCInstrInfo.cpp - ARC Instruction Information -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -28,6 +27,19 @@ using namespace llvm;
 #include "ARCGenInstrInfo.inc"
 
 #define DEBUG_TYPE "arc-inst-info"
+
+enum AddrIncType {
+    NoAddInc = 0,
+    PreInc   = 1,
+    PostInc  = 2,
+    Scaled   = 3
+};
+
+enum TSFlagsConstants {
+    TSF_AddrModeOff = 0,
+    TSF_AddModeMask = 3
+};
+
 // Pin the vtable to this file.
 void ARCInstrInfo::anchor() {}
 
@@ -389,10 +401,42 @@ unsigned ARCInstrInfo::insertBranch(MachineBasicBlock &MBB,
 }
 
 unsigned ARCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
-  if (MI.getOpcode() == TargetOpcode::INLINEASM) {
+  if (MI.isInlineAsm()) {
     const MachineFunction *MF = MI.getParent()->getParent();
     const char *AsmStr = MI.getOperand(0).getSymbolName();
     return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
   }
   return MI.getDesc().getSize();
 }
+
+bool ARCInstrInfo::isPostIncrement(const MachineInstr &MI) const {
+  const MCInstrDesc &MID = MI.getDesc();
+  const uint64_t F = MID.TSFlags;
+  return ((F >> TSF_AddrModeOff) & TSF_AddModeMask) == PostInc;
+}
+
+bool ARCInstrInfo::isPreIncrement(const MachineInstr &MI) const {
+  const MCInstrDesc &MID = MI.getDesc();
+  const uint64_t F = MID.TSFlags;
+  return ((F >> TSF_AddrModeOff) & TSF_AddModeMask) == PreInc;
+}
+
+bool ARCInstrInfo::getBaseAndOffsetPosition(const MachineInstr &MI,
+                                        unsigned &BasePos,
+                                        unsigned &OffsetPos) const {
+  if (!MI.mayLoad() && !MI.mayStore())
+    return false;
+
+  BasePos = 1;
+  OffsetPos = 2;
+
+  if (isPostIncrement(MI) || isPreIncrement(MI)) {
+    BasePos++;
+    OffsetPos++;
+  }
+
+  if (!MI.getOperand(BasePos).isReg() || !MI.getOperand(OffsetPos).isImm())
+    return false;
+
+  return true;
+}
diff --git a/lib/Target/ARC/ARCInstrInfo.h b/lib/Target/ARC/ARCInstrInfo.h
index f965dd4ff7f8..1289b37c37b3 100644
--- a/lib/Target/ARC/ARCInstrInfo.h
+++ b/lib/Target/ARC/ARCInstrInfo.h
@@ -1,9 +1,8 @@
 //===- ARCInstrInfo.h - ARC Instruction Information -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -82,6 +81,16 @@ public:
   bool
   reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
 
+
+  bool isPostIncrement(const MachineInstr &MI) const override;
+
+  // ARC-specific
+  bool isPreIncrement(const MachineInstr &MI) const;
+
+  virtual bool getBaseAndOffsetPosition(const MachineInstr &MI,
+                                        unsigned &BasePos,
+                                        unsigned &OffsetPos) const override;
+
   // Emit code before MBBI to load immediate value into physical register Reg.
   // Returns an iterator to the new instruction.
   MachineBasicBlock::iterator loadImmediate(MachineBasicBlock &MBB,
diff --git a/lib/Target/ARC/ARCInstrInfo.td b/lib/Target/ARC/ARCInstrInfo.td
index 525098c4ff66..311d998f3d86 100644
--- a/lib/Target/ARC/ARCInstrInfo.td
+++ b/lib/Target/ARC/ARCInstrInfo.td
@@ -1,9 +1,8 @@
 //===- ARCInstrInfo.td - Target Description for ARC --------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -788,50 +787,47 @@ let isReturn = 1, isTerminator = 1 in {
 // Load/Store instructions.
 //----------------------------------------------------------------------------
 
+// Filter  class for load/store mappings
+class ArcLdStRel;
+
 // Load instruction variants:
 // Control bits: x, aa, di, zz
 // x - sign extend.
 // aa - incrementing mode. (N/A for LIMM).
 // di - uncached.
 // zz - data size.
-multiclass ArcLdInst<bits<2> zz, string asmop> {
-  let mayLoad = 1 in {
-  def _rs9 : F32_LD_ADDR<0, 0b00, 0, zz,
-                         (outs GPR32:$A), (ins MEMrs9:$addr),
-                         !strconcat(asmop, "\t$A, [$addr]"), []>;
-
-  def _limm : F32_LD_LIMM<0, 0, zz,
-                         (outs GPR32:$A), (ins MEMii:$addr),
-                         !strconcat(asmop, "\t$A, [$addr]"), []>;
-
-  def _rlimm : F32_LD_RLIMM<0, 0b00, 0, zz,
-                           (outs GPR32:$A), (ins MEMrlimm:$addr),
-                           !strconcat(asmop, "\t$A, [$addr]"), []>;
-
-  def _X_rs9 : F32_LD_ADDR<1, 0b00, 0, zz,
-                         (outs GPR32:$A), (ins MEMrs9:$addr),
-                         !strconcat(asmop, ".x\t$A, [$addr]"), []>;
-
-  def _X_limm : F32_LD_LIMM<1, 0, zz,
-                         (outs GPR32:$A), (ins MEMii:$addr),
-                         !strconcat(asmop, ".x\t$A, [$addr]"), []>;
-
-  def _X_rlimm : F32_LD_RLIMM<1, 0b00, 0, zz,
-                           (outs GPR32:$A), (ins MEMrlimm:$addr),
-                           !strconcat(asmop, ".x\t$A, [$addr]"), []>;
-
-  def _AB_rs9 : F32_LD_RS9<0, 0b10, 0, zz,
-                      (outs GPR32:$addrout, GPR32:$A),
-                      (ins GPR32:$B, immS<9>:$S9),
-                      !strconcat(asmop, ".ab\t$A, [$B,$S9]"), []>
-    { let Constraints = "$addrout = $B"; }
+multiclass ArcLdInst<DataSizeMode zz, ExtMode x, CacheMode di, string asmop> {
+  let mayLoad = 1, ZZ = zz, X = x, DI = di in {
+    def _rs9: F32_LD_ADDR<x.Value, NoAM.Value, di.Value, zz.Value,
+			   (outs GPR32:$A), (ins MEMrs9:$addr),
+			   !strconcat(asmop, "\t$A, [$addr]"), []>, ArcLdStRel;
+
+    def _limm: F32_LD_LIMM<x.Value, di.Value, zz.Value,
+			   (outs GPR32:$A), (ins MEMii:$addr),
+			   !strconcat(asmop, "\t$A, [$addr]"), []>, ArcLdStRel;
+
+    def _rlimm: F32_LD_RLIMM<x.Value, NoAM.Value, di.Value, zz.Value,
+			     (outs GPR32:$A), (ins MEMrlimm:$addr),
+			     !strconcat(asmop, "\t$A, [$addr]"), []>, ArcLdStRel;
+
+    foreach aa = [PreIncAM, PostIncAM] in {
+      def aa.InstSuffix#_rs9: F32_LD_RS9<x.Value, aa.Value, di.Value, zz.Value,
+					  (outs GPR32:$A, GPR32:$addrout),
+					  (ins GPR32:$B, immS<9>:$S9),
+					  asmop#aa.AsmSuffix#"\t$A, [$B,$S9]", []>, ArcLdStRel
+			       { let Constraints = "$addrout = $B"; let AA = aa; }
+    }
+  }
+}
+
+foreach di = [NoCC, UncachedCC] in {
+  defm LD#di.InstSuffix : ArcLdInst<WordSM, NoEM, di, "ld"#di.AsmSuffix>;
+  foreach zz = [ByteSM, HalfSM] in {
+    foreach x = [NoEM, SignedEM] in {
+      defm LD#zz.InstSuffix#x.InstSuffix#di.InstSuffix : ArcLdInst<zz, x, di, "ld"#zz.AsmSuffix#x.AsmSuffix#di.AsmSuffix>;
+    }
   }
 }
-                         
-// Load instruction definitions.
-defm LD  : ArcLdInst<0b00, "ld">;
-defm LDH : ArcLdInst<0b10, "ldh">;
-defm LDB : ArcLdInst<0b01, "ldb">;
 
 // Load instruction patterns.
 // 32-bit loads.
@@ -873,25 +869,32 @@ def : Pat<(sextloadi8 AddrModeS9:$addr),(LDB_X_rs9 AddrModeS9:$addr)>;
 // aa - incrementing mode. (N/A for LIMM).
 // di - uncached.
 // zz - data size.
-multiclass ArcStInst<bits<2> zz, string asmop> {
-  let mayStore = 1 in {
-  def _rs9  : F32_ST_ADDR<0b00, 0, zz, (outs), (ins GPR32:$C, MEMrs9:$addr),
-                         !strconcat(asmop, "\t$C, [$addr]"), []>;
-
-  def _limm : F32_ST_LIMM<0, zz, (outs), (ins GPR32:$C, MEMii:$addr),
-                         !strconcat(asmop, "\t$C, [$addr]"), []>;
-
-  def _AW_rs9 : F32_ST_RS9<0b01, 0, zz, (outs GPR32:$addrout),
-                      (ins GPR32:$C, GPR32:$B, immS<9>:$S9),
-                      !strconcat(asmop, ".aw\t$C, [$B,$S9]"), []>
-    { let Constraints = "$addrout = $B"; }
+multiclass ArcStInst<DataSizeMode zz, CacheMode di, string asmop> {
+  let mayStore = 1, ZZ = zz, DI = di in {
+    def _rs9: F32_ST_ADDR<NoAM.Value, di.Value, zz.Value,
+			   (outs), (ins GPR32:$C, MEMrs9:$addr),
+			   !strconcat(asmop, "\t$C, [$addr]"), []>, ArcLdStRel;
+
+    def _limm: F32_ST_LIMM<di.Value, zz.Value,
+			   (outs), (ins GPR32:$C, MEMii:$addr),
+			   !strconcat(asmop, "\t$C, [$addr]"), []>, ArcLdStRel;
+
+
+    foreach aa = [PreIncAM, PostIncAM] in {
+      def aa.InstSuffix#_rs9: F32_ST_RS9<aa.Value, di.Value, zz.Value,
+					  (outs GPR32:$addrout),
+					  (ins GPR32:$C, GPR32:$B, immS<9>:$S9),
+					  asmop#aa.AsmSuffix#"\t$C, [$B,$S9]", []>, ArcLdStRel
+			       { let Constraints = "$addrout = $B"; let AA = aa; }
+    }
   }
 }
 
-// Store instruction definitions.
-defm ST  : ArcStInst<0b00, "st">;
-defm STH : ArcStInst<0b10, "sth">;
-defm STB : ArcStInst<0b01, "stb">;
+foreach di = [NoCC, UncachedCC] in {
+  foreach zz = [ByteSM, HalfSM, WordSM] in {
+      defm ST#zz.InstSuffix#di.InstSuffix : ArcStInst<zz, di, "st"#zz.AsmSuffix#di.AsmSuffix>;
+  }
+}
 
 // Store instruction patterns.
 // 32-bit stores
@@ -912,3 +915,10 @@ def : Pat<(truncstorei8 i32:$C, AddrModeS9:$addr),
 def : Pat<(truncstorei8 i32:$C, AddrModeImm:$addr),
           (STB_limm i32:$C, AddrModeImm:$addr)>;
 
+def getPostIncOpcode : InstrMapping {
+  let FilterClass = "ArcLdStRel";
+  let RowFields = [ "BaseOpcode", "ZZ", "DI", "X"];
+  let ColFields = [ "AA" ];
+  let KeyCol = [ "NoAM" ];
+  let ValueCols = [["PostIncAM"]];
+}
diff --git a/lib/Target/ARC/ARCMCInstLower.cpp b/lib/Target/ARC/ARCMCInstLower.cpp
index 43b087a57204..62462b77eccf 100644
--- a/lib/Target/ARC/ARCMCInstLower.cpp
+++ b/lib/Target/ARC/ARCMCInstLower.cpp
@@ -1,9 +1,8 @@
 //===- ARCMCInstLower.cpp - ARC MachineInstr to MCInst ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Target/ARC/ARCMCInstLower.h b/lib/Target/ARC/ARCMCInstLower.h
index 9a698f26334a..24a7f68c695d 100644
--- a/lib/Target/ARC/ARCMCInstLower.h
+++ b/lib/Target/ARC/ARCMCInstLower.h
@@ -1,9 +1,8 @@
 //===- ARCMCInstLower.h - Lower MachineInstr to MCInst ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARC/ARCMachineFunctionInfo.cpp b/lib/Target/ARC/ARCMachineFunctionInfo.cpp
index 7672f8d2c6dd..9cd9661ae245 100644
--- a/lib/Target/ARC/ARCMachineFunctionInfo.cpp
+++ b/lib/Target/ARC/ARCMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
 //===- ARCMachineFunctionInfo.cpp - ARC machine func info -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARC/ARCMachineFunctionInfo.h b/lib/Target/ARC/ARCMachineFunctionInfo.h
index 95ad294e3668..31aa5b93246c 100644
--- a/lib/Target/ARC/ARCMachineFunctionInfo.h
+++ b/lib/Target/ARC/ARCMachineFunctionInfo.h
@@ -1,9 +1,8 @@
 //===- ARCMachineFunctionInfo.h - ARC machine function info -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARC/ARCOptAddrMode.cpp b/lib/Target/ARC/ARCOptAddrMode.cpp
new file mode 100644
index 000000000000..c922b99c57b0
--- /dev/null
+++ b/lib/Target/ARC/ARCOptAddrMode.cpp
@@ -0,0 +1,507 @@
+//===- ARCOptAddrMode.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass folds LD/ST + ADD pairs into Pre/Post-increment form  of
+/// load/store instructions.
+//===----------------------------------------------------------------------===//
+
+#include "ARC.h"
+#define GET_INSTRMAP_INFO
+#include "ARCInstrInfo.h"
+#include "ARCTargetMachine.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define OPTADDRMODE_DESC "ARC load/store address mode"
+#define OPTADDRMODE_NAME "arc-addr-mode"
+#define DEBUG_TYPE "arc-addr-mode"
+
+namespace llvm {
+FunctionPass *createARCOptAddrMode();
+void initializeARCOptAddrModePass(PassRegistry &);
+} // end namespace llvm
+
+namespace {
+class ARCOptAddrMode : public MachineFunctionPass {
+public:
+  static char ID;
+
+  ARCOptAddrMode() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const override { return OPTADDRMODE_DESC; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+    AU.addRequired<MachineDominatorTree>();
+    AU.addPreserved<MachineDominatorTree>();
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+  const ARCSubtarget *AST = nullptr;
+  const ARCInstrInfo *AII = nullptr;
+  MachineRegisterInfo *MRI = nullptr;
+  MachineDominatorTree *MDT = nullptr;
+
+  // Tries to combine \p Ldst with increment of its base register to form
+  // single post-increment instruction.
+  MachineInstr *tryToCombine(MachineInstr &Ldst);
+
+  // Returns true if result of \p Add is not used before \p Ldst
+  bool noUseOfAddBeforeLoadOrStore(const MachineInstr *Add,
+                                   const MachineInstr *Ldst);
+
+  // Returns true if load/store instruction \p Ldst can be hoisted up to
+  // instruction \p To
+  bool canHoistLoadStoreTo(MachineInstr *Ldst, MachineInstr *To);
+
+  // Returns true if load/store instruction \p Ldst can be sunk down
+  // to instruction \p To
+  bool canSinkLoadStoreTo(MachineInstr *Ldst, MachineInstr *To);
+
+  // Check if instructions \p Ldst and \p Add can be moved to become adjacent
+  // If they can return instruction which need not to move.
+  // If \p Uses is not null, fill it with instructions after \p Ldst which use
+  // \p Ldst's base register
+  MachineInstr *canJoinInstructions(MachineInstr *Ldst, MachineInstr *Add,
+                                    SmallVectorImpl<MachineInstr *> *Uses);
+
+  // Returns true if all instruction in \p Uses array can be adjusted
+  // to accomodate increment of register \p BaseReg by \p Incr
+  bool canFixPastUses(const ArrayRef<MachineInstr *> &Uses,
+                      MachineOperand &Incr, unsigned BaseReg);
+
+  // Update all instructions in \p Uses to accomodate increment
+  // of \p BaseReg by \p Offset
+  void fixPastUses(ArrayRef<MachineInstr *> Uses, unsigned BaseReg,
+                   int64_t Offset);
+
+  // Change instruction \p Ldst to postincrement form.
+  // \p NewBase is register to hold update base value
+  // \p NewOffset is instruction's new offset
+  void changeToAddrMode(MachineInstr &Ldst, unsigned NewOpcode,
+                        unsigned NewBase, MachineOperand &NewOffset);
+
+  bool processBasicBlock(MachineBasicBlock &MBB);
+};
+
+} // end anonymous namespace
+
+char ARCOptAddrMode::ID = 0;
+INITIALIZE_PASS_BEGIN(ARCOptAddrMode, OPTADDRMODE_NAME, OPTADDRMODE_DESC, false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(ARCOptAddrMode, OPTADDRMODE_NAME, OPTADDRMODE_DESC, false,
+                    false)
+
+// Return true if \p Off can be used as immediate offset
+// operand of load/store instruction (S9 literal)
+static bool isValidLoadStoreOffset(int64_t Off) { return isInt<9>(Off); }
+
+// Return true if \p Off can be used as immediate operand of
+// ADD/SUB instruction (U6 literal)
+static bool isValidIncrementOffset(int64_t Off) { return isUInt<6>(Off); }
+
+static bool isAddConstantOp(const MachineInstr &MI, int64_t &Amount) {
+  int64_t Sign = 1;
+  switch (MI.getOpcode()) {
+  case ARC::SUB_rru6:
+    Sign = -1;
+    LLVM_FALLTHROUGH;
+  case ARC::ADD_rru6:
+    assert(MI.getOperand(2).isImm() && "Expected immediate operand");
+    Amount = Sign * MI.getOperand(2).getImm();
+    return true;
+  default:
+    return false;
+  }
+}
+
+// Return true if \p MI dominates of uses of virtual register \p VReg
+static bool dominatesAllUsesOf(const MachineInstr *MI, unsigned VReg,
+                               MachineDominatorTree *MDT,
+                               MachineRegisterInfo *MRI) {
+
+  assert(TargetRegisterInfo::isVirtualRegister(VReg) &&
+         "Expected virtual register!");
+
+  for (auto it = MRI->use_nodbg_begin(VReg), end = MRI->use_nodbg_end();
+       it != end; ++it) {
+    MachineInstr *User = it->getParent();
+    if (User->isPHI()) {
+      unsigned BBOperandIdx = User->getOperandNo(&*it) + 1;
+      MachineBasicBlock *MBB = User->getOperand(BBOperandIdx).getMBB();
+      if (MBB->empty()) {
+        const MachineBasicBlock *InstBB = MI->getParent();
+        assert(InstBB != MBB && "Instruction found in empty MBB");
+        if (!MDT->dominates(InstBB, MBB))
+          return false;
+        continue;
+      }
+      User = &*MBB->rbegin();
+    }
+
+    if (!MDT->dominates(MI, User))
+      return false;
+  }
+  return true;
+}
+
+// Return true if \p MI is load/store instruction with immediate offset
+// which can be adjusted by \p Disp
+static bool isLoadStoreThatCanHandleDisplacement(const TargetInstrInfo *TII,
+                                                 const MachineInstr &MI,
+                                                 int64_t Disp) {
+  unsigned BasePos, OffPos;
+  if (!TII->getBaseAndOffsetPosition(MI, BasePos, OffPos))
+    return false;
+  const MachineOperand &MO = MI.getOperand(OffPos);
+  if (!MO.isImm())
+    return false;
+  int64_t Offset = MO.getImm() + Disp;
+  return isValidLoadStoreOffset(Offset);
+}
+
+bool ARCOptAddrMode::noUseOfAddBeforeLoadOrStore(const MachineInstr *Add,
+                                                 const MachineInstr *Ldst) {
+  unsigned R = Add->getOperand(0).getReg();
+  return dominatesAllUsesOf(Ldst, R, MDT, MRI);
+}
+
+MachineInstr *ARCOptAddrMode::tryToCombine(MachineInstr &Ldst) {
+  assert((Ldst.mayLoad() || Ldst.mayStore()) && "LD/ST instruction expected");
+
+  unsigned BasePos, OffsetPos;
+
+  LLVM_DEBUG(dbgs() << "[ABAW] tryToCombine " << Ldst);
+  if (!AII->getBaseAndOffsetPosition(Ldst, BasePos, OffsetPos)) {
+    LLVM_DEBUG(dbgs() << "[ABAW] Not a recognized load/store\n");
+    return nullptr;
+  }
+
+  MachineOperand &Base = Ldst.getOperand(BasePos);
+  MachineOperand &Offset = Ldst.getOperand(OffsetPos);
+
+  assert(Base.isReg() && "Base operand must be register");
+  if (!Offset.isImm()) {
+    LLVM_DEBUG(dbgs() << "[ABAW] Offset is not immediate\n");
+    return nullptr;
+  }
+
+  unsigned B = Base.getReg();
+  if (TargetRegisterInfo::isStackSlot(B) ||
+      !TargetRegisterInfo::isVirtualRegister(B)) {
+    LLVM_DEBUG(dbgs() << "[ABAW] Base is not VReg\n");
+    return nullptr;
+  }
+
+  // TODO: try to generate address preincrement
+  if (Offset.getImm() != 0) {
+    LLVM_DEBUG(dbgs() << "[ABAW] Non-zero offset\n");
+    return nullptr;
+  }
+
+  for (auto &Add : MRI->use_nodbg_instructions(B)) {
+    int64_t Incr;
+    if (!isAddConstantOp(Add, Incr))
+      continue;
+    if (!isValidLoadStoreOffset(Incr))
+      continue;
+
+    SmallVector<MachineInstr *, 8> Uses;
+    MachineInstr *MoveTo = canJoinInstructions(&Ldst, &Add, &Uses);
+
+    if (!MoveTo)
+      continue;
+
+    if (!canFixPastUses(Uses, Add.getOperand(2), B))
+      continue;
+
+    LLVM_DEBUG(MachineInstr *First = &Ldst; MachineInstr *Last = &Add;
+               if (MDT->dominates(Last, First)) std::swap(First, Last);
+               dbgs() << "[ABAW] Instructions " << *First << " and " << *Last
+                      << " combined\n";
+
+    );
+
+    MachineInstr *Result = Ldst.getNextNode();
+    if (MoveTo == &Add) {
+      Ldst.removeFromParent();
+      Add.getParent()->insertAfter(Add.getIterator(), &Ldst);
+    }
+    if (Result == &Add)
+      Result = Result->getNextNode();
+
+    fixPastUses(Uses, B, Incr);
+
+    int NewOpcode = ARC::getPostIncOpcode(Ldst.getOpcode());
+    assert(NewOpcode > 0 && "No postincrement form found");
+    unsigned NewBaseReg = Add.getOperand(0).getReg();
+    changeToAddrMode(Ldst, NewOpcode, NewBaseReg, Add.getOperand(2));
+    Add.eraseFromParent();
+
+    return Result;
+  }
+  return nullptr;
+}
+
+MachineInstr *
+ARCOptAddrMode::canJoinInstructions(MachineInstr *Ldst, MachineInstr *Add,
+                                    SmallVectorImpl<MachineInstr *> *Uses) {
+  assert(Ldst && Add && "NULL instruction passed");
+
+  MachineInstr *First = Add;
+  MachineInstr *Last = Ldst;
+  if (MDT->dominates(Ldst, Add))
+    std::swap(First, Last);
+  else if (!MDT->dominates(Add, Ldst))
+    return nullptr;
+
+  LLVM_DEBUG(dbgs() << "canJoinInstructions: " << *First << *Last);
+
+  unsigned BasePos, OffPos;
+
+  if (!AII->getBaseAndOffsetPosition(*Ldst, BasePos, OffPos)) {
+    LLVM_DEBUG(
+        dbgs()
+        << "[canJoinInstructions] Cannot determine base/offset position\n");
+    return nullptr;
+  }
+
+  unsigned BaseReg = Ldst->getOperand(BasePos).getReg();
+
+  // prohibit this:
+  //   v1 = add v0, c
+  //   st v1, [v0, 0]
+  // and this
+  //   st v0, [v0, 0]
+  //   v1 = add v0, c
+  if (Ldst->mayStore() && Ldst->getOperand(0).isReg()) {
+    unsigned StReg = Ldst->getOperand(0).getReg();
+    if (Add->getOperand(0).getReg() == StReg || BaseReg == StReg) {
+      LLVM_DEBUG(dbgs() << "[canJoinInstructions] Store uses result of Add\n");
+      return nullptr;
+    }
+  }
+
+  SmallVector<MachineInstr *, 4> UsesAfterLdst;
+  SmallVector<MachineInstr *, 4> UsesAfterAdd;
+  for (MachineInstr &MI : MRI->use_nodbg_instructions(BaseReg)) {
+    if (&MI == Ldst || &MI == Add)
+      continue;
+    if (&MI != Add && MDT->dominates(Ldst, &MI))
+      UsesAfterLdst.push_back(&MI);
+    else if (!MDT->dominates(&MI, Ldst))
+      return nullptr;
+    if (MDT->dominates(Add, &MI))
+      UsesAfterAdd.push_back(&MI);
+  }
+
+  MachineInstr *Result = nullptr;
+
+  if (First == Add) {
+    //  n = add b, i
+    //  ...
+    //  x = ld [b, o] or x = ld [n, o]
+
+    if (noUseOfAddBeforeLoadOrStore(First, Last)) {
+      Result = Last;
+      LLVM_DEBUG(dbgs() << "[canJoinInstructions] Can sink Add down to Ldst\n");
+    } else if (canHoistLoadStoreTo(Ldst, Add)) {
+      Result = First;
+      LLVM_DEBUG(dbgs() << "[canJoinInstructions] Can hoist Ldst to Add\n");
+    }
+  } else {
+    // x = ld [b, o]
+    // ...
+    // n = add b, i
+    Result = First;
+    LLVM_DEBUG(dbgs() << "[canJoinInstructions] Can hoist Add to Ldst\n");
+  }
+  if (Result && Uses)
+    *Uses = (Result == Ldst) ? UsesAfterLdst : UsesAfterAdd;
+  return Result;
+}
+
+bool ARCOptAddrMode::canFixPastUses(const ArrayRef<MachineInstr *> &Uses,
+                                    MachineOperand &Incr, unsigned BaseReg) {
+
+  assert(Incr.isImm() && "Expected immediate increment");
+  int64_t NewOffset = Incr.getImm();
+  for (MachineInstr *MI : Uses) {
+    int64_t Dummy;
+    if (isAddConstantOp(*MI, Dummy)) {
+      if (isValidIncrementOffset(Dummy + NewOffset))
+        continue;
+      return false;
+    }
+    if (isLoadStoreThatCanHandleDisplacement(AII, *MI, -NewOffset))
+      continue;
+    LLVM_DEBUG(dbgs() << "Instruction cannot handle displacement " << -NewOffset
+                      << ": " << *MI);
+    return false;
+  }
+  return true;
+}
+
+void ARCOptAddrMode::fixPastUses(ArrayRef<MachineInstr *> Uses,
+                                 unsigned NewBase, int64_t NewOffset) {
+
+  for (MachineInstr *MI : Uses) {
+    int64_t Amount;
+    unsigned BasePos, OffPos;
+    if (isAddConstantOp(*MI, Amount)) {
+      NewOffset += Amount;
+      assert(isValidIncrementOffset(NewOffset) &&
+             "New offset won't fit into ADD instr");
+      BasePos = 1;
+      OffPos = 2;
+    } else if (AII->getBaseAndOffsetPosition(*MI, BasePos, OffPos)) {
+      MachineOperand &MO = MI->getOperand(OffPos);
+      assert(MO.isImm() && "expected immediate operand");
+      NewOffset += MO.getImm();
+      assert(isValidLoadStoreOffset(NewOffset) &&
+             "New offset won't fit into LD/ST");
+    } else
+      llvm_unreachable("unexpected instruction");
+
+    MI->getOperand(BasePos).setReg(NewBase);
+    MI->getOperand(OffPos).setImm(NewOffset);
+  }
+}
+
+bool ARCOptAddrMode::canHoistLoadStoreTo(MachineInstr *Ldst, MachineInstr *To) {
+  if (Ldst->getParent() != To->getParent())
+    return false;
+  MachineBasicBlock::const_iterator MI(To), ME(Ldst),
+      End(Ldst->getParent()->end());
+
+  bool IsStore = Ldst->mayStore();
+  for (; MI != ME && MI != End; ++MI) {
+    if (MI->isDebugValue())
+      continue;
+    if (MI->mayStore() || MI->isCall() || MI->isInlineAsm() ||
+        MI->hasUnmodeledSideEffects())
+      return false;
+    if (IsStore && MI->mayLoad())
+      return false;
+  }
+
+  for (auto &O : Ldst->explicit_operands()) {
+    if (!O.isReg() || !O.isUse())
+      continue;
+    MachineInstr *OpDef = MRI->getVRegDef(O.getReg());
+    if (!OpDef || !MDT->dominates(OpDef, To))
+      return false;
+  }
+  return true;
+}
+
+bool ARCOptAddrMode::canSinkLoadStoreTo(MachineInstr *Ldst, MachineInstr *To) {
+  // Can only sink load/store within same BB
+  if (Ldst->getParent() != To->getParent())
+    return false;
+  MachineBasicBlock::const_iterator MI(Ldst), ME(To),
+      End(Ldst->getParent()->end());
+
+  bool IsStore = Ldst->mayStore();
+  bool IsLoad = Ldst->mayLoad();
+
+  Register ValReg = IsLoad ? Ldst->getOperand(0).getReg() : Register();
+  for (; MI != ME && MI != End; ++MI) {
+    if (MI->isDebugValue())
+      continue;
+    if (MI->mayStore() || MI->isCall() || MI->isInlineAsm() ||
+        MI->hasUnmodeledSideEffects())
+      return false;
+    if (IsStore && MI->mayLoad())
+      return false;
+    if (ValReg && MI->readsVirtualRegister(ValReg))
+      return false;
+  }
+  return true;
+}
+
+void ARCOptAddrMode::changeToAddrMode(MachineInstr &Ldst, unsigned NewOpcode,
+                                      unsigned NewBase,
+                                      MachineOperand &NewOffset) {
+  bool IsStore = Ldst.mayStore();
+  unsigned BasePos, OffPos;
+  MachineOperand Src = MachineOperand::CreateImm(0xDEADBEEF);
+  AII->getBaseAndOffsetPosition(Ldst, BasePos, OffPos);
+
+  unsigned BaseReg = Ldst.getOperand(BasePos).getReg();
+
+  Ldst.RemoveOperand(OffPos);
+  Ldst.RemoveOperand(BasePos);
+
+  if (IsStore) {
+    Src = Ldst.getOperand(BasePos - 1);
+    Ldst.RemoveOperand(BasePos - 1);
+  }
+
+  Ldst.setDesc(AST->getInstrInfo()->get(NewOpcode));
+  Ldst.addOperand(MachineOperand::CreateReg(NewBase, true));
+  if (IsStore)
+    Ldst.addOperand(Src);
+  Ldst.addOperand(MachineOperand::CreateReg(BaseReg, false));
+  Ldst.addOperand(NewOffset);
+  LLVM_DEBUG(dbgs() << "[ABAW] New Ldst: " << Ldst);
+}
+
+bool ARCOptAddrMode::processBasicBlock(MachineBasicBlock &MBB) {
+  bool Changed = false;
+  for (auto MI = MBB.begin(), ME = MBB.end(); MI != ME; ++MI) {
+    if (MI->isDebugValue())
+      continue;
+    if (!MI->mayLoad() && !MI->mayStore())
+      continue;
+    if (ARC::getPostIncOpcode(MI->getOpcode()) < 0)
+      continue;
+    MachineInstr *Res = tryToCombine(*MI);
+    if (Res) {
+      Changed = true;
+      // Res points to the next instruction. Rewind to process it
+      MI = std::prev(Res->getIterator());
+    }
+  }
+  return Changed;
+}
+
+bool ARCOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction()))
+    return false;
+
+  AST = &MF.getSubtarget<ARCSubtarget>();
+  AII = AST->getInstrInfo();
+  MRI = &MF.getRegInfo();
+  MDT = &getAnalysis<MachineDominatorTree>();
+
+  bool Changed = false;
+  for (auto &MBB : MF)
+    Changed |= processBasicBlock(MBB);
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createARCOptAddrMode() { return new ARCOptAddrMode(); }
diff --git a/lib/Target/ARC/ARCRegisterInfo.cpp b/lib/Target/ARC/ARCRegisterInfo.cpp
index 38ea3c93a2d4..9c8340ac8f81 100644
--- a/lib/Target/ARC/ARCRegisterInfo.cpp
+++ b/lib/Target/ARC/ARCRegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===- ARCRegisterInfo.cpp - ARC Register Information -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -83,9 +82,11 @@ static void ReplaceFrameIndex(MachineBasicBlock::iterator II,
   switch (MI.getOpcode()) {
   case ARC::LD_rs9:
     assert((Offset % 4 == 0) && "LD needs 4 byte alignment.");
+    LLVM_FALLTHROUGH;
   case ARC::LDH_rs9:
   case ARC::LDH_X_rs9:
     assert((Offset % 2 == 0) && "LDH needs 2 byte alignment.");
+    LLVM_FALLTHROUGH;
   case ARC::LDB_rs9:
   case ARC::LDB_X_rs9:
     LLVM_DEBUG(dbgs() << "Building LDFI\n");
@@ -96,8 +97,10 @@ static void ReplaceFrameIndex(MachineBasicBlock::iterator II,
     break;
   case ARC::ST_rs9:
     assert((Offset % 4 == 0) && "ST needs 4 byte alignment.");
+    LLVM_FALLTHROUGH;
   case ARC::STH_rs9:
     assert((Offset % 2 == 0) && "STH needs 2 byte alignment.");
+    LLVM_FALLTHROUGH;
   case ARC::STB_rs9:
     LLVM_DEBUG(dbgs() << "Building STFI\n");
     BuildMI(MBB, II, dl, TII.get(MI.getOpcode()))
@@ -187,7 +190,7 @@ void ARCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   // Special handling of DBG_VALUE instructions.
   if (MI.isDebugValue()) {
-    unsigned FrameReg = getFrameRegister(MF);
+    Register FrameReg = getFrameRegister(MF);
     MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
     MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
     return;
@@ -220,7 +223,7 @@ void ARCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                     ObjSize, RS, SPAdj);
 }
 
-unsigned ARCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register ARCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const ARCFrameLowering *TFI = getFrameLowering(MF);
   return TFI->hasFP(MF) ? ARC::FP : ARC::SP;
 }
diff --git a/lib/Target/ARC/ARCRegisterInfo.h b/lib/Target/ARC/ARCRegisterInfo.h
index 53abae3ac7a5..af41234e9dda 100644
--- a/lib/Target/ARC/ARCRegisterInfo.h
+++ b/lib/Target/ARC/ARCRegisterInfo.h
@@ -1,9 +1,8 @@
 //===- ARCRegisterInfo.h - ARC Register Information Impl --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -47,7 +46,7 @@ public:
                                        CallingConv::ID CC) const override;
 
   // Debug information queries.
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
+  Register getFrameRegister(const MachineFunction &MF) const override;
 
   //! Return whether to emit frame moves
   static bool needsFrameMoves(const MachineFunction &MF);
diff --git a/lib/Target/ARC/ARCRegisterInfo.td b/lib/Target/ARC/ARCRegisterInfo.td
index 6d8d1b3dfd25..4b6744ad73da 100644
--- a/lib/Target/ARC/ARCRegisterInfo.td
+++ b/lib/Target/ARC/ARCRegisterInfo.td
@@ -1,9 +1,8 @@
 //===- ARCRegisterInfo.td - ARC Register defs --------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARC/ARCSubtarget.cpp b/lib/Target/ARC/ARCSubtarget.cpp
index 2107a27bf786..bce2dbd2eaa6 100644
--- a/lib/Target/ARC/ARCSubtarget.cpp
+++ b/lib/Target/ARC/ARCSubtarget.cpp
@@ -1,9 +1,8 @@
 //===- ARCSubtarget.cpp - ARC Subtarget Information -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARC/ARCSubtarget.h b/lib/Target/ARC/ARCSubtarget.h
index 631d846f3c9c..0be797f753d5 100644
--- a/lib/Target/ARC/ARCSubtarget.h
+++ b/lib/Target/ARC/ARCSubtarget.h
@@ -1,9 +1,8 @@
 //===- ARCSubtarget.h - Define Subtarget for the ARC ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARC/ARCTargetMachine.cpp b/lib/Target/ARC/ARCTargetMachine.cpp
index 6f5bbd3b4ef3..9fb45d686c26 100644
--- a/lib/Target/ARC/ARCTargetMachine.cpp
+++ b/lib/Target/ARC/ARCTargetMachine.cpp
@@ -1,9 +1,8 @@
 //===- ARCTargetMachine.cpp - Define TargetMachine for ARC ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,6 +12,7 @@
 #include "ARCTargetMachine.h"
 #include "ARC.h"
 #include "ARCTargetTransformInfo.h"
+#include "TargetInfo/ARCTargetInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -75,7 +75,10 @@ bool ARCPassConfig::addInstSelector() {
 
 void ARCPassConfig::addPreEmitPass() { addPass(createARCBranchFinalizePass()); }
 
-void ARCPassConfig::addPreRegAlloc() { addPass(createARCExpandPseudosPass()); }
+void ARCPassConfig::addPreRegAlloc() {
+    addPass(createARCExpandPseudosPass());
+    addPass(createARCOptAddrMode());
+}
 
 // Force static initialization.
 extern "C" void LLVMInitializeARCTarget() {
diff --git a/lib/Target/ARC/ARCTargetMachine.h b/lib/Target/ARC/ARCTargetMachine.h
index 18117e3409af..c5e8c3f2936d 100644
--- a/lib/Target/ARC/ARCTargetMachine.h
+++ b/lib/Target/ARC/ARCTargetMachine.h
@@ -1,9 +1,8 @@
 //===- ARCTargetMachine.h - Define TargetMachine for ARC --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARC/ARCTargetStreamer.h b/lib/Target/ARC/ARCTargetStreamer.h
index 29fdfda661a4..abe89673316f 100644
--- a/lib/Target/ARC/ARCTargetStreamer.h
+++ b/lib/Target/ARC/ARCTargetStreamer.h
@@ -1,9 +1,8 @@
 //===- ARCTargetStreamer.h - ARC Target Streamer ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARC/ARCTargetTransformInfo.h b/lib/Target/ARC/ARCTargetTransformInfo.h
index 20a83d5ae4c7..3e34008902b5 100644
--- a/lib/Target/ARC/ARCTargetTransformInfo.h
+++ b/lib/Target/ARC/ARCTargetTransformInfo.h
@@ -1,9 +1,8 @@
 //===- ARCTargetTransformInfo.h - ARC specific TTI --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // \file
diff --git a/lib/Target/ARC/Disassembler/ARCDisassembler.cpp b/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
index 3fc5a033dd5d..82da18617b91 100644
--- a/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
+++ b/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
@@ -1,9 +1,8 @@
 //===- ARCDisassembler.cpp - Disassembler for ARC ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -15,6 +14,7 @@
 #include "ARC.h"
 #include "ARCRegisterInfo.h"
 #include "MCTargetDesc/ARCMCTargetDesc.h"
+#include "TargetInfo/ARCTargetInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
diff --git a/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp b/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp
deleted file mode 100644
index 9c820c2fc595..000000000000
--- a/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-//===- ARCInstPrinter.cpp - ARC MCInst to assembly syntax -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an ARC MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARCInstPrinter.h"
-#include "MCTargetDesc/ARCInfo.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-#include "ARCGenAsmWriter.inc"
-
-template <class T>
-static const char *BadConditionCode(T cc) {
-  LLVM_DEBUG(dbgs() << "Unknown condition code passed: " << cc << "\n");
-  return "{unknown-cc}";
-}
-
-static const char *ARCBRCondCodeToString(ARCCC::BRCondCode BRCC) {
-  switch (BRCC) {
-  case ARCCC::BREQ:
-    return "eq";
-  case ARCCC::BRNE:
-    return "ne";
-  case ARCCC::BRLT:
-    return "lt";
-  case ARCCC::BRGE:
-    return "ge";
-  case ARCCC::BRLO:
-    return "lo";
-  case ARCCC::BRHS:
-    return "hs";
-  }
-  return BadConditionCode(BRCC);
-}
-
-static const char *ARCCondCodeToString(ARCCC::CondCode CC) {
-  switch (CC) {
-  case ARCCC::EQ:
-    return "eq";
-  case ARCCC::NE:
-    return "ne";
-  case ARCCC::P:
-    return "p";
-  case ARCCC::N:
-    return "n";
-  case ARCCC::HS:
-    return "hs";
-  case ARCCC::LO:
-    return "lo";
-  case ARCCC::GT:
-    return "gt";
-  case ARCCC::GE:
-    return "ge";
-  case ARCCC::VS:
-    return "vs";
-  case ARCCC::VC:
-    return "vc";
-  case ARCCC::LT:
-    return "lt";
-  case ARCCC::LE:
-    return "le";
-  case ARCCC::HI:
-    return "hi";
-  case ARCCC::LS:
-    return "ls";
-  case ARCCC::PNZ:
-    return "pnz";
-  case ARCCC::AL:
-    return "al";
-  case ARCCC::NZ:
-    return "nz";
-  case ARCCC::Z:
-    return "z";
-  }
-  return BadConditionCode(CC);
-}
-
-void ARCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  OS << StringRef(getRegisterName(RegNo)).lower();
-}
-
-void ARCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                               StringRef Annot, const MCSubtargetInfo &STI) {
-  printInstruction(MI, O);
-  printAnnotation(O, Annot);
-}
-
-static void printExpr(const MCExpr *Expr, const MCAsmInfo *MAI,
-                      raw_ostream &OS) {
-  int Offset = 0;
-  const MCSymbolRefExpr *SRE;
-
-  if (const auto *CE = dyn_cast<MCConstantExpr>(Expr)) {
-    OS << "0x";
-    OS.write_hex(CE->getValue());
-    return;
-  }
-
-  if (const auto *BE = dyn_cast<MCBinaryExpr>(Expr)) {
-    SRE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
-    const auto *CE = dyn_cast<MCConstantExpr>(BE->getRHS());
-    assert(SRE && CE && "Binary expression must be sym+const.");
-    Offset = CE->getValue();
-  } else {
-    SRE = dyn_cast<MCSymbolRefExpr>(Expr);
-    assert(SRE && "Unexpected MCExpr type.");
-  }
-  assert(SRE->getKind() == MCSymbolRefExpr::VK_None);
-
-  // Symbols are prefixed with '@'
-  OS << '@';
-  SRE->getSymbol().print(OS, MAI);
-
-  if (Offset) {
-    if (Offset > 0)
-      OS << '+';
-    OS << Offset;
-  }
-}
-
-void ARCInstPrinter::printOperand(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNum);
-  if (Op.isReg()) {
-    printRegName(O, Op.getReg());
-    return;
-  }
-
-  if (Op.isImm()) {
-    O << Op.getImm();
-    return;
-  }
-
-  assert(Op.isExpr() && "unknown operand kind in printOperand");
-  printExpr(Op.getExpr(), &MAI, O);
-}
-
-void ARCInstPrinter::printMemOperandRI(const MCInst *MI, unsigned OpNum,
-                                       raw_ostream &O) {
-  const MCOperand &base = MI->getOperand(OpNum);
-  const MCOperand &offset = MI->getOperand(OpNum + 1);
-  assert(base.isReg() && "Base should be register.");
-  assert(offset.isImm() && "Offset should be immediate.");
-  printRegName(O, base.getReg());
-  O << "," << offset.getImm();
-}
-
-void ARCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
-                                           raw_ostream &O) {
-
-  const MCOperand &Op = MI->getOperand(OpNum);
-  assert(Op.isImm() && "Predicate operand is immediate.");
-  O << ARCCondCodeToString((ARCCC::CondCode)Op.getImm());
-}
-
-void ARCInstPrinter::printBRCCPredicateOperand(const MCInst *MI, unsigned OpNum,
-                                               raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNum);
-  assert(Op.isImm() && "Predicate operand is immediate.");
-  O << ARCBRCondCodeToString((ARCCC::BRCondCode)Op.getImm());
-}
diff --git a/lib/Target/ARC/InstPrinter/ARCInstPrinter.h b/lib/Target/ARC/InstPrinter/ARCInstPrinter.h
deleted file mode 100644
index bb3898a67cef..000000000000
--- a/lib/Target/ARC/InstPrinter/ARCInstPrinter.h
+++ /dev/null
@@ -1,46 +0,0 @@
-//===- ARCInstPrinter.h - Convert ARC MCInst to assembly syntax -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains the declaration of the ARCInstPrinter class,
-/// which is used to print ARC MCInst to a .s file.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_ARC_INSTPRINTER_ARCINSTPRINTER_H
-#define LLVM_LIB_TARGET_ARC_INSTPRINTER_ARCINSTPRINTER_H
-
-#include "llvm/MC/MCInstPrinter.h"
-
-namespace llvm {
-
-class ARCInstPrinter : public MCInstPrinter {
-public:
-  ARCInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                 const MCRegisterInfo &MRI)
-      : MCInstPrinter(MAI, MII, MRI) {}
-
-  // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
-  static const char *getRegisterName(unsigned RegNo);
-
-  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
-
-private:
-  void printMemOperandRI(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printBRCCPredicateOperand(const MCInst *MI, unsigned OpNum,
-                                 raw_ostream &O);
-};
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_ARC_INSTPRINTER_ARCINSTPRINTER_H
diff --git a/lib/Target/ARC/MCTargetDesc/ARCInfo.h b/lib/Target/ARC/MCTargetDesc/ARCInfo.h
index 401b4c5e6613..57a77631a1fb 100644
--- a/lib/Target/ARC/MCTargetDesc/ARCInfo.h
+++ b/lib/Target/ARC/MCTargetDesc/ARCInfo.h
@@ -1,9 +1,8 @@
 //===- ARCInfo.h - Additional ARC Info --------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp b/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp
new file mode 100644
index 000000000000..e3e0ea489957
--- /dev/null
+++ b/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp
@@ -0,0 +1,179 @@
+//===- ARCInstPrinter.cpp - ARC MCInst to assembly syntax -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARCInstPrinter.h"
+#include "MCTargetDesc/ARCInfo.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+#include "ARCGenAsmWriter.inc"
+
+template <class T>
+static const char *BadConditionCode(T cc) {
+  LLVM_DEBUG(dbgs() << "Unknown condition code passed: " << cc << "\n");
+  return "{unknown-cc}";
+}
+
+static const char *ARCBRCondCodeToString(ARCCC::BRCondCode BRCC) {
+  switch (BRCC) {
+  case ARCCC::BREQ:
+    return "eq";
+  case ARCCC::BRNE:
+    return "ne";
+  case ARCCC::BRLT:
+    return "lt";
+  case ARCCC::BRGE:
+    return "ge";
+  case ARCCC::BRLO:
+    return "lo";
+  case ARCCC::BRHS:
+    return "hs";
+  }
+  return BadConditionCode(BRCC);
+}
+
+static const char *ARCCondCodeToString(ARCCC::CondCode CC) {
+  switch (CC) {
+  case ARCCC::EQ:
+    return "eq";
+  case ARCCC::NE:
+    return "ne";
+  case ARCCC::P:
+    return "p";
+  case ARCCC::N:
+    return "n";
+  case ARCCC::HS:
+    return "hs";
+  case ARCCC::LO:
+    return "lo";
+  case ARCCC::GT:
+    return "gt";
+  case ARCCC::GE:
+    return "ge";
+  case ARCCC::VS:
+    return "vs";
+  case ARCCC::VC:
+    return "vc";
+  case ARCCC::LT:
+    return "lt";
+  case ARCCC::LE:
+    return "le";
+  case ARCCC::HI:
+    return "hi";
+  case ARCCC::LS:
+    return "ls";
+  case ARCCC::PNZ:
+    return "pnz";
+  case ARCCC::AL:
+    return "al";
+  case ARCCC::NZ:
+    return "nz";
+  case ARCCC::Z:
+    return "z";
+  }
+  return BadConditionCode(CC);
+}
+
+void ARCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << StringRef(getRegisterName(RegNo)).lower();
+}
+
+void ARCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                               StringRef Annot, const MCSubtargetInfo &STI) {
+  printInstruction(MI, O);
+  printAnnotation(O, Annot);
+}
+
+static void printExpr(const MCExpr *Expr, const MCAsmInfo *MAI,
+                      raw_ostream &OS) {
+  int Offset = 0;
+  const MCSymbolRefExpr *SRE;
+
+  if (const auto *CE = dyn_cast<MCConstantExpr>(Expr)) {
+    OS << "0x";
+    OS.write_hex(CE->getValue());
+    return;
+  }
+
+  if (const auto *BE = dyn_cast<MCBinaryExpr>(Expr)) {
+    SRE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
+    const auto *CE = dyn_cast<MCConstantExpr>(BE->getRHS());
+    assert(SRE && CE && "Binary expression must be sym+const.");
+    Offset = CE->getValue();
+  } else {
+    SRE = dyn_cast<MCSymbolRefExpr>(Expr);
+    assert(SRE && "Unexpected MCExpr type.");
+  }
+  assert(SRE->getKind() == MCSymbolRefExpr::VK_None);
+
+  // Symbols are prefixed with '@'
+  OS << '@';
+  SRE->getSymbol().print(OS, MAI);
+
+  if (Offset) {
+    if (Offset > 0)
+      OS << '+';
+    OS << Offset;
+  }
+}
+
+void ARCInstPrinter::printOperand(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  if (Op.isReg()) {
+    printRegName(O, Op.getReg());
+    return;
+  }
+
+  if (Op.isImm()) {
+    O << Op.getImm();
+    return;
+  }
+
+  assert(Op.isExpr() && "unknown operand kind in printOperand");
+  printExpr(Op.getExpr(), &MAI, O);
+}
+
+void ARCInstPrinter::printMemOperandRI(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O) {
+  const MCOperand &base = MI->getOperand(OpNum);
+  const MCOperand &offset = MI->getOperand(OpNum + 1);
+  assert(base.isReg() && "Base should be register.");
+  assert(offset.isImm() && "Offset should be immediate.");
+  printRegName(O, base.getReg());
+  O << "," << offset.getImm();
+}
+
+void ARCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+
+  const MCOperand &Op = MI->getOperand(OpNum);
+  assert(Op.isImm() && "Predicate operand is immediate.");
+  O << ARCCondCodeToString((ARCCC::CondCode)Op.getImm());
+}
+
+void ARCInstPrinter::printBRCCPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                               raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  assert(Op.isImm() && "Predicate operand is immediate.");
+  O << ARCBRCondCodeToString((ARCCC::BRCondCode)Op.getImm());
+}
diff --git a/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h b/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h
new file mode 100644
index 000000000000..5ea58407f9ed
--- /dev/null
+++ b/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h
@@ -0,0 +1,45 @@
+//===- ARCInstPrinter.h - Convert ARC MCInst to assembly syntax -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the declaration of the ARCInstPrinter class,
+/// which is used to print ARC MCInst to a .s file.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARC_INSTPRINTER_ARCINSTPRINTER_H
+#define LLVM_LIB_TARGET_ARC_INSTPRINTER_ARCINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class ARCInstPrinter : public MCInstPrinter {
+public:
+  ARCInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                 const MCRegisterInfo &MRI)
+      : MCInstPrinter(MAI, MII, MRI) {}
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+
+  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+
+private:
+  void printMemOperandRI(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printBRCCPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                 raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_ARC_INSTPRINTER_ARCINSTPRINTER_H
diff --git a/lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.cpp b/lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.cpp
index 5d3fb52cfb45..10f93e292e9b 100644
--- a/lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.cpp
+++ b/lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.cpp
@@ -1,9 +1,8 @@
 //===- ARCMCAsmInfo.cpp - ARC asm properties --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.h b/lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.h
index 997a370fee8d..a086bd88d459 100644
--- a/lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.h
+++ b/lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.h
@@ -1,9 +1,8 @@
 //===- ARCMCAsmInfo.h - ARC asm properties ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp b/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
index 17be15f730de..aa4818cd57ac 100644
--- a/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
+++ b/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
@@ -1,9 +1,8 @@
 //===- ARCMCTargetDesc.cpp - ARC Target Descriptions ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,9 +11,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARCMCTargetDesc.h"
+#include "ARCInstPrinter.h"
 #include "ARCMCAsmInfo.h"
 #include "ARCTargetStreamer.h"
-#include "InstPrinter/ARCInstPrinter.h"
+#include "TargetInfo/ARCTargetInfo.h"
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
diff --git a/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h b/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h
index dd152a6a34f9..ab06ce46d99f 100644
--- a/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h
+++ b/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h
@@ -1,9 +1,8 @@
 //===- ARCMCTargetDesc.h - ARC Target Descriptions --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -20,8 +19,6 @@ namespace llvm {
 
 class Target;
 
-Target &getTheARCTarget();
-
 } // end namespace llvm
 
 // Defines symbolic names for ARC registers.  This defines a mapping from
diff --git a/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp b/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp
index 460b0a9f3e9b..59b9f806d590 100644
--- a/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp
+++ b/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp
@@ -1,13 +1,12 @@
 //===- ARCTargetInfo.cpp - ARC Target Implementation ----------- *- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARC.h"
+#include "TargetInfo/ARCTargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 
 using namespace llvm;
diff --git a/lib/Target/ARC/TargetInfo/ARCTargetInfo.h b/lib/Target/ARC/TargetInfo/ARCTargetInfo.h
new file mode 100644
index 000000000000..6a9d2685f422
--- /dev/null
+++ b/lib/Target/ARC/TargetInfo/ARCTargetInfo.h
@@ -0,0 +1,20 @@
+//===- ARCTargetInfo.h - ARC Target Implementation ------------- *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARC_TARGETINFO_ARCTARGETINFO_H
+#define LLVM_LIB_TARGET_ARC_TARGETINFO_ARCTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheARCTarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_ARC_TARGETINFO_ARCTARGETINFO_H
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index be88fe4ddb14..fb238bfc9cbc 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -1,9 +1,8 @@
 //=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index b5cc45c5cc94..bf8ed6562fe7 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -1,9 +1,8 @@
 //===-- ARM.h - Top-level interface for ARM representation ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -36,7 +35,7 @@ class MachineInstr;
 class MCInst;
 class PassRegistry;
 
-
+FunctionPass *createARMLowOverheadLoopsPass();
 Pass *createARMParallelDSPPass();
 FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
                                CodeGenOpt::Level OptLevel);
@@ -47,6 +46,7 @@ FunctionPass *createARMCodeGenPreparePass();
 FunctionPass *createARMConstantIslandPass();
 FunctionPass *createMLxExpansionPass();
 FunctionPass *createThumb2ITBlockPass();
+FunctionPass *createMVEVPTBlockPass();
 FunctionPass *createARMOptimizeBarriersPass();
 FunctionPass *createThumb2SizeReductionPass(
     std::function<bool(const Function &)> Ftor = nullptr);
@@ -57,11 +57,6 @@ createARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget
 void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                   ARMAsmPrinter &AP);
 
-void computeBlockSize(MachineFunction *MF, MachineBasicBlock *MBB,
-                      BasicBlockInfo &BBI);
-std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF);
-
-
 void initializeARMParallelDSPPass(PassRegistry &);
 void initializeARMLoadStoreOptPass(PassRegistry &);
 void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
@@ -69,6 +64,9 @@ void initializeARMCodeGenPreparePass(PassRegistry &);
 void initializeARMConstantIslandsPass(PassRegistry &);
 void initializeARMExpandPseudoPass(PassRegistry &);
 void initializeThumb2SizeReducePass(PassRegistry &);
+void initializeThumb2ITBlockPass(PassRegistry &);
+void initializeMVEVPTBlockPass(PassRegistry &);
+void initializeARMLowOverheadLoopsPass(PassRegistry &);
 
 } // end namespace llvm
 
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 3db60f1c16d6..b687db12eaf5 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -1,9 +1,8 @@
 //===-- ARM.td - Describe the ARM Target Machine -----------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -33,12 +32,59 @@ def ModeSoftFloat         : SubtargetFeature<"soft-float","UseSoftFloat",
 //
 
 // Floating Point, HW Division and Neon Support
-def FeatureVFP2           : SubtargetFeature<"vfp2", "HasVFPv2", "true",
-                                             "Enable VFP2 instructions">;
 
-def FeatureVFP3           : SubtargetFeature<"vfp3", "HasVFPv3", "true",
-                                             "Enable VFP3 instructions",
-                                             [FeatureVFP2]>;
+// FP loads/stores/moves, shared between VFP and MVE (even in the integer-only
+// version).
+def FeatureFPRegs         : SubtargetFeature<"fpregs", "HasFPRegs", "true",
+                                             "Enable FP registers">;
+
+// 16-bit FP loads/stores/moves, shared between VFP (with the v8.2A FP16
+// extension) and MVE (even in the integer-only version).
+def FeatureFPRegs16       : SubtargetFeature<"fpregs16", "HasFPRegs16", "true",
+                                             "Enable 16-bit FP registers",
+                                             [FeatureFPRegs]>;
+
+def FeatureFPRegs64       : SubtargetFeature<"fpregs64", "HasFPRegs64", "true",
+                                             "Enable 64-bit FP registers",
+                                             [FeatureFPRegs]>;
+
+def FeatureFP64           : SubtargetFeature<"fp64", "HasFP64", "true",
+                                             "Floating point unit supports "
+                                             "double precision",
+                                             [FeatureFPRegs64]>;
+
+def FeatureD32            : SubtargetFeature<"d32", "HasD32", "true",
+                                             "Extend FP to 32 double registers">;
+
+multiclass VFPver<string name, string query, string description,
+                  list<SubtargetFeature> prev = [],
+                  list<SubtargetFeature> otherimplies = []> {
+  def _D16_SP: SubtargetFeature<
+    name#"d16sp", query#"D16SP", "true",
+    description#" with only 16 d-registers and no double precision",
+    !foreach(v, prev, !cast<SubtargetFeature>(v # "_D16_SP")) # otherimplies>;
+  def _SP: SubtargetFeature<
+    name#"sp", query#"SP", "true",
+    description#" with no double precision",
+    !foreach(v, prev, !cast<SubtargetFeature>(v # "_SP")) #
+      otherimplies # [FeatureD32, !cast<SubtargetFeature>(NAME # "_D16_SP")]>;
+  def _D16: SubtargetFeature<
+    name#"d16", query#"D16", "true",
+    description#" with only 16 d-registers",
+    !foreach(v, prev, !cast<SubtargetFeature>(v # "_D16")) #
+      otherimplies # [FeatureFP64, !cast<SubtargetFeature>(NAME # "_D16_SP")]>;
+  def "": SubtargetFeature<
+    name, query, "true", description,
+    prev # otherimplies # [
+        !cast<SubtargetFeature>(NAME # "_D16"),
+        !cast<SubtargetFeature>(NAME # "_SP")]>;
+}
+
+defm FeatureVFP2: VFPver<"vfp2", "HasVFPv2", "Enable VFP2 instructions",
+                         [], [FeatureFPRegs]>;
+
+defm FeatureVFP3: VFPver<"vfp3", "HasVFPv3", "Enable VFP3 instructions",
+                         [FeatureVFP2]>;
 
 def FeatureNEON           : SubtargetFeature<"neon", "HasNEON", "true",
                                              "Enable NEON instructions",
@@ -48,31 +94,22 @@ def FeatureFP16           : SubtargetFeature<"fp16", "HasFP16", "true",
                                              "Enable half-precision "
                                              "floating point">;
 
-def FeatureVFP4           : SubtargetFeature<"vfp4", "HasVFPv4", "true",
-                                             "Enable VFP4 instructions",
-                                             [FeatureVFP3, FeatureFP16]>;
+defm FeatureVFP4: VFPver<"vfp4", "HasVFPv4", "Enable VFP4 instructions",
+                         [FeatureVFP3], [FeatureFP16]>;
 
-def FeatureFPARMv8        : SubtargetFeature<"fp-armv8", "HasFPARMv8",
-                                             "true", "Enable ARMv8 FP",
-                                             [FeatureVFP4]>;
+defm FeatureFPARMv8: VFPver<"fp-armv8", "HasFPARMv8", "Enable ARMv8 FP",
+                         [FeatureVFP4]>;
 
 def FeatureFullFP16       : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
                                              "Enable full half-precision "
                                              "floating point",
-                                             [FeatureFPARMv8]>;
+                                             [FeatureFPARMv8_D16_SP, FeatureFPRegs16]>;
 
 def FeatureFP16FML        : SubtargetFeature<"fp16fml", "HasFP16FML", "true",
                                              "Enable full half-precision "
                                              "floating point fml instructions",
                                              [FeatureFullFP16]>;
 
-def FeatureVFPOnlySP      : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
-                                             "Floating point unit supports "
-                                             "single precision only">;
-
-def FeatureD16            : SubtargetFeature<"d16", "HasD16", "true",
-                                             "Restrict FP to 16 double registers">;
-
 def FeatureHWDivThumb     : SubtargetFeature<"hwdiv",
                                              "HasHardwareDivideInThumb", "true",
                                              "Enable divide instructions in Thumb">;
@@ -368,6 +405,12 @@ def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
 def FeatureSB       : SubtargetFeature<"sb", "HasSB", "true",
   "Enable v8.5a Speculation Barrier" >;
 
+// Armv8.1-M extensions
+
+def FeatureLOB            : SubtargetFeature<"lob", "HasLOB", "true",
+                                             "Enable Low Overhead Branch "
+                                             "extensions">;
+
 //===----------------------------------------------------------------------===//
 // ARM architecture class
 //
@@ -461,6 +504,19 @@ def HasV8_5aOps   : SubtargetFeature<"v8.5a", "HasV8_5aOps", "true",
                                    "Support ARM v8.5a instructions",
                                    [HasV8_4aOps, FeatureSB]>;
 
+def HasV8_1MMainlineOps : SubtargetFeature<
+               "v8.1m.main", "HasV8_1MMainlineOps", "true",
+               "Support ARM v8-1M Mainline instructions",
+               [HasV8MMainlineOps]>;
+def HasMVEIntegerOps : SubtargetFeature<
+               "mve", "HasMVEIntegerOps", "true",
+               "Support M-Class Vector Extension with integer ops",
+               [HasV8_1MMainlineOps, FeatureDSP, FeatureFPRegs16, FeatureFPRegs64]>;
+def HasMVEFloatOps : SubtargetFeature<
+               "mve.fp", "HasMVEFloatOps", "true",
+               "Support M-Class Vector Extension with integer and floating ops",
+               [HasMVEIntegerOps, FeatureFPARMv8_D16_SP, FeatureFullFP16]>;
+
 //===----------------------------------------------------------------------===//
 // ARM Processor subtarget features.
 //
@@ -495,6 +551,8 @@ def ProcA73     : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
                                    "Cortex-A73 ARM processors", []>;
 def ProcA75     : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75",
                                    "Cortex-A75 ARM processors", []>;
+def ProcA76     : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
+                                   "Cortex-A76 ARM processors", []>;
 
 def ProcKrait   : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
                                    "Qualcomm Krait processors", []>;
@@ -744,6 +802,18 @@ def ARMv8mMainline : Architecture<"armv8-m.main", "ARMv8mMainline",
                                                        FeatureAcquireRelease,
                                                        FeatureMClass]>;
 
+def ARMv81mMainline : Architecture<"armv8.1-m.main", "ARMv81mMainline",
+                                                      [HasV8_1MMainlineOps,
+                                                       FeatureNoARM,
+                                                       ModeThumb,
+                                                       FeatureDB,
+                                                       FeatureHWDivThumb,
+                                                       Feature8MSecExt,
+                                                       FeatureAcquireRelease,
+                                                       FeatureMClass,
+                                                       FeatureRAS,
+                                                       FeatureLOB]>;
+
 // Aliases
 def IWMMXT   : Architecture<"iwmmxt",      "ARMv5te",  [ARMv5te]>;
 def IWMMXT2  : Architecture<"iwmmxt2",     "ARMv5te",  [ARMv5te]>;
@@ -757,6 +827,7 @@ def ARMv7s   : Architecture<"armv7s",      "ARMv7a",   [ARMv7a]>;
 // ARM schedules.
 //===----------------------------------------------------------------------===//
 //
+include "ARMPredicates.td"
 include "ARMSchedule.td"
 
 //===----------------------------------------------------------------------===//
@@ -942,14 +1013,12 @@ def : ProcessorModel<"cortex-r4f",  CortexA8Model,      [ARMv7r, ProcR4,
                                                          FeatureHasRetAddrStack,
                                                          FeatureSlowFPBrcc,
                                                          FeatureHasSlowFPVMLx,
-                                                         FeatureVFP3,
-                                                         FeatureD16,
+                                                         FeatureVFP3_D16,
                                                          FeatureAvoidPartialCPSR]>;
 
 def : ProcessorModel<"cortex-r5",   CortexA8Model,      [ARMv7r, ProcR5,
                                                          FeatureHasRetAddrStack,
-                                                         FeatureVFP3,
-                                                         FeatureD16,
+                                                         FeatureVFP3_D16,
                                                          FeatureSlowFPBrcc,
                                                          FeatureHWDivARM,
                                                          FeatureHasSlowFPVMLx,
@@ -957,8 +1026,7 @@ def : ProcessorModel<"cortex-r5",   CortexA8Model,      [ARMv7r, ProcR5,
 
 def : ProcessorModel<"cortex-r7",   CortexA8Model,      [ARMv7r, ProcR7,
                                                          FeatureHasRetAddrStack,
-                                                         FeatureVFP3,
-                                                         FeatureD16,
+                                                         FeatureVFP3_D16,
                                                          FeatureFP16,
                                                          FeatureMP,
                                                          FeatureSlowFPBrcc,
@@ -968,8 +1036,7 @@ def : ProcessorModel<"cortex-r7",   CortexA8Model,      [ARMv7r, ProcR7,
 
 def : ProcessorModel<"cortex-r8",   CortexA8Model,      [ARMv7r,
                                                          FeatureHasRetAddrStack,
-                                                         FeatureVFP3,
-                                                         FeatureD16,
+                                                         FeatureVFP3_D16,
                                                          FeatureFP16,
                                                          FeatureMP,
                                                          FeatureSlowFPBrcc,
@@ -977,39 +1044,52 @@ def : ProcessorModel<"cortex-r8",   CortexA8Model,      [ARMv7r,
                                                          FeatureHasSlowFPVMLx,
                                                          FeatureAvoidPartialCPSR]>;
 
-def : ProcessorModel<"cortex-m3", CortexM3Model,        [ARMv7m,
+def : ProcessorModel<"cortex-m3",   CortexM4Model,      [ARMv7m,
                                                          ProcM3,
                                                          FeaturePrefLoopAlign32,
+                                                         FeatureUseMISched,
+                                                         FeatureUseAA,
                                                          FeatureHasNoBranchPredictor]>;
 
-def : ProcessorModel<"sc300",     CortexM3Model,        [ARMv7m,
+def : ProcessorModel<"sc300",       CortexM4Model,      [ARMv7m,
                                                          ProcM3,
+                                                         FeatureUseMISched,
+                                                         FeatureUseAA,
                                                          FeatureHasNoBranchPredictor]>;
 
-def : ProcessorModel<"cortex-m4", CortexM3Model,        [ARMv7em,
-                                                         FeatureVFP4,
-                                                         FeatureVFPOnlySP,
-                                                         FeatureD16,
+def : ProcessorModel<"cortex-m4", CortexM4Model,        [ARMv7em,
+                                                         FeatureVFP4_D16_SP,
                                                          FeaturePrefLoopAlign32,
                                                          FeatureHasSlowFPVMLx,
+                                                         FeatureUseMISched,
+                                                         FeatureUseAA,
                                                          FeatureHasNoBranchPredictor]>;
 
 def : ProcNoItin<"cortex-m7",                           [ARMv7em,
-                                                         FeatureFPARMv8,
-                                                         FeatureD16]>;
+                                                         FeatureFPARMv8_D16]>;
 
 def : ProcNoItin<"cortex-m23",                          [ARMv8mBaseline,
                                                          FeatureNoMovt]>;
 
-def : ProcessorModel<"cortex-m33", CortexM3Model,       [ARMv8mMainline,
+def : ProcessorModel<"cortex-m33", CortexM4Model,       [ARMv8mMainline,
                                                          FeatureDSP,
-                                                         FeatureFPARMv8,
-                                                         FeatureD16,
-                                                         FeatureVFPOnlySP,
+                                                         FeatureFPARMv8_D16_SP,
                                                          FeaturePrefLoopAlign32,
                                                          FeatureHasSlowFPVMLx,
+                                                         FeatureUseMISched,
+                                                         FeatureUseAA,
                                                          FeatureHasNoBranchPredictor]>;
 
+def : ProcessorModel<"cortex-m35p", CortexM4Model,      [ARMv8mMainline,
+                                                         FeatureDSP,
+                                                         FeatureFPARMv8_D16_SP,
+                                                         FeaturePrefLoopAlign32,
+                                                         FeatureHasSlowFPVMLx,
+                                                         FeatureUseMISched,
+                                                         FeatureUseAA,
+                                                         FeatureHasNoBranchPredictor]>;
+
+
 def : ProcNoItin<"cortex-a32",                           [ARMv8a,
                                                          FeatureHWDivThumb,
                                                          FeatureHWDivARM,
@@ -1060,6 +1140,22 @@ def : ProcNoItin<"cortex-a75",                          [ARMv82a, ProcA75,
                                                          FeatureHWDivARM,
                                                          FeatureDotProd]>;
 
+def : ProcNoItin<"cortex-a76",                          [ARMv82a, ProcA76,
+                                                         FeatureHWDivThumb,
+                                                         FeatureHWDivARM,
+                                                         FeatureCrypto,
+                                                         FeatureCRC,
+                                                         FeatureFullFP16,
+                                                         FeatureDotProd]>;
+
+def : ProcNoItin<"cortex-a76ae",                        [ARMv82a, ProcA76,
+                                                         FeatureHWDivThumb,
+                                                         FeatureHWDivARM,
+                                                         FeatureCrypto,
+                                                         FeatureCRC,
+                                                         FeatureFullFP16,
+                                                         FeatureDotProd]>;
+
 def : ProcessorModel<"cyclone",     SwiftModel,         [ARMv8a, ProcSwift,
                                                          FeatureHasRetAddrStack,
                                                          FeatureNEONForFP,
@@ -1081,6 +1177,9 @@ def : ProcNoItin<"exynos-m3",                           [ARMv8a, ProcExynos]>;
 def : ProcNoItin<"exynos-m4",                           [ARMv82a, ProcExynos,
                                                          FeatureFullFP16,
                                                          FeatureDotProd]>;
+def : ProcNoItin<"exynos-m5",                           [ARMv82a, ProcExynos,
+                                                         FeatureFullFP16,
+                                                         FeatureDotProd]>;
 
 def : ProcNoItin<"kryo",                                [ARMv8a, ProcKryo,
                                                          FeatureHWDivThumb,
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index b7cd3a0c2dae..e29077266fcd 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -1,9 +1,8 @@
 //===-- ARMAsmPrinter.cpp - Print machine code to an ARM .s file ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,9 +17,10 @@
 #include "ARMMachineFunctionInfo.h"
 #include "ARMTargetMachine.h"
 #include "ARMTargetObjectFile.h"
-#include "InstPrinter/ARMInstPrinter.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMInstPrinter.h"
 #include "MCTargetDesc/ARMMCExpr.h"
+#include "TargetInfo/ARMTargetInfo.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/BinaryFormat/COFF.h"
@@ -120,13 +120,13 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
   // Calculate this function's optimization goal.
   unsigned OptimizationGoal;
-  if (F.hasFnAttribute(Attribute::OptimizeNone))
+  if (F.hasOptNone())
     // For best debugging illusion, speed and small size sacrificed
     OptimizationGoal = 6;
-  else if (F.optForMinSize())
+  else if (F.hasMinSize())
     // Aggressively for small size, speed and debug illusion sacrificed
     OptimizationGoal = 4;
-  else if (F.optForSize())
+  else if (F.hasOptSize())
     // For small size, but speed and debugging illusion preserved
     OptimizationGoal = 3;
   else if (TM.getOptLevel() == CodeGenOpt::Aggressive)
@@ -184,10 +184,21 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   return false;
 }
 
+void ARMAsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
+                                       raw_ostream &O) {
+  assert(MO.isGlobal() && "caller should check MO.isGlobal");
+  unsigned TF = MO.getTargetFlags();
+  if (TF & ARMII::MO_LO16)
+    O << ":lower16:";
+  else if (TF & ARMII::MO_HI16)
+    O << ":upper16:";
+  GetARMGVSymbol(MO.getGlobal(), TF)->print(O, MAI);
+  printOffset(MO.getOffset(), O);
+}
+
 void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
                                  raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(OpNum);
-  unsigned TF = MO.getTargetFlags();
 
   switch (MO.getType()) {
   default: llvm_unreachable("<unknown operand type>");
@@ -204,27 +215,20 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     break;
   }
   case MachineOperand::MO_Immediate: {
-    int64_t Imm = MO.getImm();
     O << '#';
+    unsigned TF = MO.getTargetFlags();
     if (TF == ARMII::MO_LO16)
       O << ":lower16:";
     else if (TF == ARMII::MO_HI16)
       O << ":upper16:";
-    O << Imm;
+    O << MO.getImm();
     break;
   }
   case MachineOperand::MO_MachineBasicBlock:
     MO.getMBB()->getSymbol()->print(O, MAI);
     return;
   case MachineOperand::MO_GlobalAddress: {
-    const GlobalValue *GV = MO.getGlobal();
-    if (TF & ARMII::MO_LO16)
-      O << ":lower16:";
-    else if (TF & ARMII::MO_HI16)
-      O << ":upper16:";
-    GetARMGVSymbol(GV, TF)->print(O, MAI);
-
-    printOffset(MO.getOffset(), O);
+    PrintSymbolOperand(MO, O);
     break;
   }
   case MachineOperand::MO_ConstantPoolIndex:
@@ -256,8 +260,7 @@ GetARMJTIPICJumpTableLabel(unsigned uid) const {
 }
 
 bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
-                                    unsigned AsmVariant, const char *ExtraCode,
-                                    raw_ostream &O) {
+                                    const char *ExtraCode, raw_ostream &O) {
   // Does this asm operand have a single letter operand modifier?
   if (ExtraCode && ExtraCode[0]) {
     if (ExtraCode[1] != 0) return true; // Unknown modifier.
@@ -265,20 +268,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
     switch (ExtraCode[0]) {
     default:
       // See if this is a generic print operand
-      return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O);
-    case 'a': // Print as a memory address.
-      if (MI->getOperand(OpNum).isReg()) {
-        O << "["
-          << ARMInstPrinter::getRegisterName(MI->getOperand(OpNum).getReg())
-          << "]";
-        return false;
-      }
-      LLVM_FALLTHROUGH;
-    case 'c': // Don't print "#" before an immediate operand.
-      if (!MI->getOperand(OpNum).isImm())
-        return true;
-      O << MI->getOperand(OpNum).getImm();
-      return false;
+      return AsmPrinter::PrintAsmOperand(MI, OpNum, ExtraCode, O);
     case 'P': // Print a VFP double precision register.
     case 'q': // Print a NEON quad precision register.
       printOperand(MI, OpNum, O);
@@ -444,8 +434,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
 }
 
 bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                          unsigned OpNum, unsigned AsmVariant,
-                                          const char *ExtraCode,
+                                          unsigned OpNum, const char *ExtraCode,
                                           raw_ostream &O) {
   // Does this asm operand have a single letter operand modifier?
   if (ExtraCode && ExtraCode[0]) {
@@ -668,7 +657,7 @@ void ARMAsmPrinter::emitAttributes() {
     ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
                       ARMBuildAttrs::IEEEDenormals);
   else {
-    if (!STI.hasVFP2()) {
+    if (!STI.hasVFP2Base()) {
       // When the target doesn't have an FPU (by design or
       // intention), the assumptions made on the software support
       // mirror that of the equivalent hardware support *if it
@@ -678,7 +667,7 @@ void ARMAsmPrinter::emitAttributes() {
       if (STI.hasV7Ops())
         ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
                           ARMBuildAttrs::PreserveFPSign);
-    } else if (STI.hasVFP3()) {
+    } else if (STI.hasVFP3Base()) {
       // In VFPv4, VFPv4U, VFPv3, or VFPv3U, it is preserved. That is,
       // the sign bit of the zero matches the sign bit of the input or
       // result that is being flushed to zero.
@@ -773,6 +762,14 @@ void ARMAsmPrinter::emitAttributes() {
 
 //===----------------------------------------------------------------------===//
 
+static MCSymbol *getBFLabel(StringRef Prefix, unsigned FunctionNumber,
+                             unsigned LabelId, MCContext &Ctx) {
+
+  MCSymbol *Label = Ctx.getOrCreateSymbol(Twine(Prefix)
+                       + "BF" + Twine(FunctionNumber) + "_" + Twine(LabelId));
+  return Label;
+}
+
 static MCSymbol *getPICLabel(StringRef Prefix, unsigned FunctionNumber,
                              unsigned LabelId, MCContext &Ctx) {
 
@@ -1074,7 +1071,6 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
   const TargetRegisterInfo *TargetRegInfo =
     MF.getSubtarget().getRegisterInfo();
   const MachineRegisterInfo &MachineRegInfo = MF.getRegInfo();
-  const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>();
 
   unsigned FramePtr = TargetRegInfo->getFrameRegister(MF);
   unsigned Opc = MI->getOpcode();
@@ -1138,7 +1134,12 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
           Pad += Width;
           continue;
         }
-        RegList.push_back(MO.getReg());
+        // Check for registers that are remapped (for a Thumb1 prologue that
+        // saves high registers).
+        unsigned Reg = MO.getReg();
+        if (unsigned RemappedReg = AFI->EHPrologueRemappedRegs.lookup(Reg))
+          Reg = RemappedReg;
+        RegList.push_back(Reg);
       }
       break;
     case ARM::STR_PRE_IMM:
@@ -1188,7 +1189,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
         unsigned CPI = MI->getOperand(1).getIndex();
         const MachineConstantPool *MCP = MF.getConstantPool();
         if (CPI >= MCP->getConstants().size())
-          CPI = AFI.getOriginalCPIdx(CPI);
+          CPI = AFI->getOriginalCPIdx(CPI);
         assert(CPI != -1U && "Invalid constpool index");
 
         // Derive the actual offset.
@@ -1218,8 +1219,12 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
     } else if (DstReg == ARM::SP) {
       MI->print(errs());
       llvm_unreachable("Unsupported opcode for unwinding information");
-    }
-    else {
+    } else if (Opc == ARM::tMOVr) {
+      // If a Thumb1 function spills r8-r11, we copy the values to low
+      // registers before pushing them. Record the copy so we can emit the
+      // correct ".save" later.
+      AFI->EHPrologueRemappedRegs[DstReg] = SrcReg;
+    } else {
       MI->print(errs());
       llvm_unreachable("Unsupported opcode for unwinding information");
     }
@@ -1447,6 +1452,66 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     EmitToStreamer(*OutStreamer, TmpInst);
     return;
   }
+  case ARM::t2BFi:
+  case ARM::t2BFic:
+  case ARM::t2BFLi:
+  case ARM::t2BFr:
+  case ARM::t2BFLr: {
+    // This is a Branch Future instruction.
+
+    const MCExpr *BranchLabel = MCSymbolRefExpr::create(
+        getBFLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+                   MI->getOperand(0).getIndex(), OutContext),
+        OutContext);
+
+    auto MCInst = MCInstBuilder(Opc).addExpr(BranchLabel);
+    if (MI->getOperand(1).isReg()) {
+      // For BFr/BFLr
+      MCInst.addReg(MI->getOperand(1).getReg());
+    } else {
+      // For BFi/BFLi/BFic
+      const MCExpr *BranchTarget;
+      if (MI->getOperand(1).isMBB())
+        BranchTarget = MCSymbolRefExpr::create(
+            MI->getOperand(1).getMBB()->getSymbol(), OutContext);
+      else if (MI->getOperand(1).isGlobal()) {
+        const GlobalValue *GV = MI->getOperand(1).getGlobal();
+        BranchTarget = MCSymbolRefExpr::create(
+            GetARMGVSymbol(GV, MI->getOperand(1).getTargetFlags()), OutContext);
+      } else if (MI->getOperand(1).isSymbol()) {
+        BranchTarget = MCSymbolRefExpr::create(
+            GetExternalSymbolSymbol(MI->getOperand(1).getSymbolName()),
+            OutContext);
+      } else
+        llvm_unreachable("Unhandled operand kind in Branch Future instruction");
+
+      MCInst.addExpr(BranchTarget);
+    }
+
+      if (Opc == ARM::t2BFic) {
+        const MCExpr *ElseLabel = MCSymbolRefExpr::create(
+            getBFLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+                       MI->getOperand(2).getIndex(), OutContext),
+            OutContext);
+        MCInst.addExpr(ElseLabel);
+        MCInst.addImm(MI->getOperand(3).getImm());
+      } else {
+        MCInst.addImm(MI->getOperand(2).getImm())
+            .addReg(MI->getOperand(3).getReg());
+      }
+
+    EmitToStreamer(*OutStreamer, MCInst);
+    return;
+  }
+  case ARM::t2BF_LabelPseudo: {
+    // This is a pseudo op for a label used by a branch future instruction
+
+    // Emit the label.
+    OutStreamer->EmitLabel(getBFLabel(DL.getPrivateGlobalPrefix(),
+                                       getFunctionNumber(),
+                                       MI->getOperand(0).getIndex(), OutContext));
+    return;
+  }
   case ARM::tPICADD: {
     // This is a pseudo op for a label + instruction sequence, which looks like:
     // LPC0:
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 0ba4bc05d6f7..a4b37fa2331f 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -1,9 +1,8 @@
 //===-- ARMAsmPrinter.h - ARM implementation of AsmPrinter ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -76,12 +75,11 @@ public:
 
   void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
 
+  void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &O) override;
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
-                       unsigned AsmVariant, const char *ExtraCode,
-                       raw_ostream &O) override;
+                       const char *ExtraCode, raw_ostream &O) override;
   bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
-                             unsigned AsmVariant, const char *ExtraCode,
-                             raw_ostream &O) override;
+                             const char *ExtraCode, raw_ostream &O) override;
 
   void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
                         const MCSubtargetInfo *EndInfo) const override;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index bbebed59c851..222aa85856a2 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1,9 +1,8 @@
 //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -134,7 +133,7 @@ ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
 ScheduleHazardRecognizer *ARMBaseInstrInfo::
 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
                                    const ScheduleDAG *DAG) const {
-  if (Subtarget.isThumb2() || Subtarget.hasVFP2())
+  if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
     return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
   return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
 }
@@ -707,15 +706,7 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   if (MCID.getSize())
     return MCID.getSize();
 
-  // If this machine instr is an inline asm, measure it.
-  if (MI.getOpcode() == ARM::INLINEASM) {
-    unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
-    if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
-      Size = alignTo(Size, 4);
-    return Size;
-  }
-  unsigned Opc = MI.getOpcode();
-  switch (Opc) {
+  switch (MI.getOpcode()) {
   default:
     // pseudo-instruction sizes are zero.
     return 0;
@@ -752,6 +743,14 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
     return 12;
   case ARM::SPACE:
     return MI.getOperand(1).getImm();
+  case ARM::INLINEASM:
+  case ARM::INLINEASM_BR: {
+    // If this machine instr is an inline asm, measure it.
+    unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
+    if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
+      Size = alignTo(Size, 4);
+    return Size;
+  }
   }
 }
 
@@ -806,6 +805,28 @@ void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
      .addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
 }
 
+void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) {
+  MIB.addImm(ARMVCC::None);
+  MIB.addReg(0);
+}
+
+void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB,
+                                      unsigned DestReg) {
+  addUnpredicatedMveVpredNOp(MIB);
+  MIB.addReg(DestReg, RegState::Undef);
+}
+
+void llvm::addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond) {
+  MIB.addImm(Cond);
+  MIB.addReg(ARM::VPR, RegState::Implicit);
+}
+
+void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB,
+                                    unsigned Cond, unsigned Inactive) {
+  addPredicatedMveVpredNOp(MIB, Cond);
+  MIB.addReg(Inactive);
+}
+
 void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator I,
                                    const DebugLoc &DL, unsigned DestReg,
@@ -831,17 +852,20 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     Opc = ARM::VMOVRS;
   else if (SPRDest && GPRSrc)
     Opc = ARM::VMOVSR;
-  else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
+  else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
     Opc = ARM::VMOVD;
   else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
-    Opc = ARM::VORRq;
+    Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
 
   if (Opc) {
     MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
     MIB.addReg(SrcReg, getKillRegState(KillSrc));
-    if (Opc == ARM::VORRq)
+    if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
       MIB.addReg(SrcReg, getKillRegState(KillSrc));
-    MIB.add(predOps(ARMCC::AL));
+    if (Opc == ARM::MVE_VORR)
+      addUnpredicatedMveVpredROp(MIB, DestReg);
+    else
+      MIB.add(predOps(ARMCC::AL));
     return;
   }
 
@@ -852,11 +876,11 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 
   // Use VORRq when possible.
   if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
-    Opc = ARM::VORRq;
+    Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
     BeginIdx = ARM::qsub_0;
     SubRegs = 2;
   } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
-    Opc = ARM::VORRq;
+    Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
     BeginIdx = ARM::qsub_0;
     SubRegs = 4;
   // Fall back to VMOVD.
@@ -891,7 +915,8 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     BeginIdx = ARM::dsub_0;
     SubRegs = 4;
     Spacing = 2;
-  } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
+  } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
+             !Subtarget.hasFP64()) {
     Opc = ARM::VMOVS;
     BeginIdx = ARM::ssub_0;
     SubRegs = 2;
@@ -901,6 +926,30 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   } else if (DestReg == ARM::CPSR) {
     copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
     return;
+  } else if (DestReg == ARM::VPR) {
+    assert(ARM::GPRRegClass.contains(SrcReg));
+    BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc))
+        .add(predOps(ARMCC::AL));
+    return;
+  } else if (SrcReg == ARM::VPR) {
+    assert(ARM::GPRRegClass.contains(DestReg));
+    BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc))
+        .add(predOps(ARMCC::AL));
+    return;
+  } else if (DestReg == ARM::FPSCR_NZCV) {
+    assert(ARM::GPRRegClass.contains(SrcReg));
+    BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc))
+        .add(predOps(ARMCC::AL));
+    return;
+  } else if (SrcReg == ARM::FPSCR_NZCV) {
+    assert(ARM::GPRRegClass.contains(DestReg));
+    BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc))
+        .add(predOps(ARMCC::AL));
+    return;
   }
 
   assert(Opc && "Impossible reg-to-reg copy");
@@ -925,10 +974,15 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     DstRegs.insert(Dst);
 #endif
     Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
-    // VORR takes two source operands.
-    if (Opc == ARM::VORRq)
+    // VORR (NEON or MVE) takes two source operands.
+    if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
       Mov.addReg(Src);
-    Mov = Mov.add(predOps(ARMCC::AL));
+    }
+    // MVE VORR takes predicate operands in place of an ordinary condition.
+    if (Opc == ARM::MVE_VORR)
+      addUnpredicatedMveVpredROp(Mov, Dst);
+    else
+      Mov = Mov.add(predOps(ARMCC::AL));
     // MOVr can set CC.
     if (Opc == ARM::MOVr)
       Mov = Mov.add(condCodeOp());
@@ -1010,6 +1064,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
             .addImm(0)
             .addMemOperand(MMO)
             .add(predOps(ARMCC::AL));
+      } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
+        BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))
+            .addReg(SrcReg, getKillRegState(isKill))
+            .addFrameIndex(FI)
+            .addImm(0)
+            .addMemOperand(MMO)
+            .add(predOps(ARMCC::AL));
       } else
         llvm_unreachable("Unknown reg class!");
       break;
@@ -1042,7 +1103,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
         llvm_unreachable("Unknown reg class!");
       break;
     case 16:
-      if (ARM::DPairRegClass.hasSubClassEq(RC)) {
+      if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
         // Use aligned spills if the stack can be realigned.
         if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
           BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
@@ -1058,6 +1119,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
               .addMemOperand(MMO)
               .add(predOps(ARMCC::AL));
         }
+      } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
+                 Subtarget.hasMVEIntegerOps()) {
+        auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));
+        MIB.addReg(SrcReg, getKillRegState(isKill))
+          .addFrameIndex(FI)
+          .addImm(0)
+          .addMemOperand(MMO);
+        addUnpredicatedMveVpredNOp(MIB);
       } else
         llvm_unreachable("Unknown reg class!");
       break;
@@ -1155,6 +1224,13 @@ unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
       return MI.getOperand(0).getReg();
     }
     break;
+  case ARM::VSTR_P0_off:
+    if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() &&
+        MI.getOperand(1).getImm() == 0) {
+      FrameIndex = MI.getOperand(0).getIndex();
+      return ARM::P0;
+    }
+    break;
   case ARM::VST1q64:
   case ARM::VST1d64TPseudo:
   case ARM::VST1d64QPseudo:
@@ -1177,7 +1253,8 @@ unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
 unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
                                                     int &FrameIndex) const {
   SmallVector<const MachineMemOperand *, 1> Accesses;
-  if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses)) {
+  if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
+      Accesses.size() == 1) {
     FrameIndex =
         cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
             ->getFrameIndex();
@@ -1224,6 +1301,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
           .addImm(0)
           .addMemOperand(MMO)
           .add(predOps(ARMCC::AL));
+    } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
+      BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)
+          .addFrameIndex(FI)
+          .addImm(0)
+          .addMemOperand(MMO)
+          .add(predOps(ARMCC::AL));
     } else
       llvm_unreachable("Unknown reg class!");
     break;
@@ -1260,7 +1343,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
       llvm_unreachable("Unknown reg class!");
     break;
   case 16:
-    if (ARM::DPairRegClass.hasSubClassEq(RC)) {
+    if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
       if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
         BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
             .addFrameIndex(FI)
@@ -1273,6 +1356,13 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
             .addMemOperand(MMO)
             .add(predOps(ARMCC::AL));
       }
+    } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
+               Subtarget.hasMVEIntegerOps()) {
+      auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);
+      MIB.addFrameIndex(FI)
+        .addImm(0)
+        .addMemOperand(MMO);
+      addUnpredicatedMveVpredNOp(MIB);
     } else
       llvm_unreachable("Unknown reg class!");
     break;
@@ -1369,6 +1459,13 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
       return MI.getOperand(0).getReg();
     }
     break;
+  case ARM::VLDR_P0_off:
+    if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() &&
+        MI.getOperand(1).getImm() == 0) {
+      FrameIndex = MI.getOperand(0).getIndex();
+      return ARM::P0;
+    }
+    break;
   case ARM::VLD1q64:
   case ARM::VLD1d8TPseudo:
   case ARM::VLD1d16TPseudo:
@@ -1397,7 +1494,8 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
 unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
                                                      int &FrameIndex) const {
   SmallVector<const MachineMemOperand *, 1> Accesses;
-  if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses)) {
+  if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
+      Accesses.size() == 1) {
     FrameIndex =
         cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
             ->getFrameIndex();
@@ -1480,7 +1578,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   // copyPhysReg() calls.  Look for VMOVS instructions that can legally be
   // widened to VMOVD.  We prefer the VMOVD when possible because it may be
   // changed into a VORR that can go down the NEON pipeline.
-  if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP())
+  if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
     return false;
 
   // Look for a copy between even S-registers.  That is where we keep floats
@@ -1898,24 +1996,15 @@ isProfitableToIfCvt(MachineBasicBlock &MBB,
   // If we are optimizing for size, see if the branch in the predecessor can be
   // lowered to cbn?z by the constant island lowering pass, and return false if
   // so. This results in a shorter instruction sequence.
-  if (MBB.getParent()->getFunction().optForSize()) {
+  if (MBB.getParent()->getFunction().hasOptSize()) {
     MachineBasicBlock *Pred = *MBB.pred_begin();
     if (!Pred->empty()) {
       MachineInstr *LastMI = &*Pred->rbegin();
       if (LastMI->getOpcode() == ARM::t2Bcc) {
-        MachineBasicBlock::iterator CmpMI = LastMI;
-        if (CmpMI != Pred->begin()) {
-          --CmpMI;
-          if (CmpMI->getOpcode() == ARM::tCMPi8 ||
-              CmpMI->getOpcode() == ARM::t2CMPri) {
-            unsigned Reg = CmpMI->getOperand(0).getReg();
-            unsigned PredReg = 0;
-            ARMCC::CondCodes P = getInstrPredicate(*CmpMI, PredReg);
-            if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 &&
-                isARMLowRegister(Reg))
-              return false;
-          }
-        }
+        const TargetRegisterInfo *TRI = &getRegisterInfo();
+        MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
+        if (CmpMI)
+          return false;
       }
     }
   }
@@ -1932,6 +2021,15 @@ isProfitableToIfCvt(MachineBasicBlock &TBB,
   if (!TCycles)
     return false;
 
+  // In thumb code we often end up trading one branch for a IT block, and
+  // if we are cloning the instruction can increase code size. Prevent
+  // blocks with multiple predecesors from being ifcvted to prevent this
+  // cloning.
+  if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
+    if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
+      return false;
+  }
+
   // Attempt to estimate the relative costs of predication versus branching.
   // Here we scale up each component of UnpredCost to avoid precision issue when
   // scaling TCycles/FCycles by Probability.
@@ -2040,9 +2138,9 @@ MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI,
 
 /// Identify instructions that can be folded into a MOVCC instruction, and
 /// return the defining instruction.
-static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
-                                      const MachineRegisterInfo &MRI,
-                                      const TargetInstrInfo *TII) {
+MachineInstr *
+ARMBaseInstrInfo::canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI,
+                                   const TargetInstrInfo *TII) const {
   if (!TargetRegisterInfo::isVirtualRegister(Reg))
     return nullptr;
   if (!MRI.hasOneNonDBGUse(Reg))
@@ -2050,8 +2148,8 @@ static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
   MachineInstr *MI = MRI.getVRegDef(Reg);
   if (!MI)
     return nullptr;
-  // MI is folded into the MOVCC by predicating it.
-  if (!MI->isPredicable())
+  // Check if MI can be predicated and folded into the MOVCC.
+  if (!isPredicable(*MI))
     return nullptr;
   // Check if MI has any non-dead defs or physreg uses. This also detects
   // predicated instructions which will be reading CPSR.
@@ -2266,7 +2364,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
                                       unsigned NumBytes) {
   // This optimisation potentially adds lots of load and store
   // micro-operations, it's only really a great benefit to code-size.
-  if (!MF.getFunction().optForMinSize())
+  if (!Subtarget.hasMinSize())
     return false;
 
   // If only one register is pushed/popped, LLVM can use an LDR/STR
@@ -2332,6 +2430,8 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
   for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
        --CurRegEnc) {
     unsigned CurReg = RegClass->getRegister(CurRegEnc);
+    if (IsT1PushPop && CurReg > ARM::R7)
+      continue;
     if (!IsPop) {
       // Pushing any register is completely harmless, mark the register involved
       // as undef since we don't care about its value and must not restore it
@@ -2389,7 +2489,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
   bool isSub = false;
 
   // Memory operands in inline assembly always use AddrMode2.
-  if (Opcode == ARM::INLINEASM)
+  if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
     AddrMode = ARMII::AddrMode2;
 
   if (Opcode == ARM::ADDri) {
@@ -2473,6 +2573,15 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
       NumBits = 8;
       Scale = 2;
       break;
+    case ARMII::AddrModeT2_i7:
+    case ARMII::AddrModeT2_i7s2:
+    case ARMII::AddrModeT2_i7s4:
+      ImmIdx = FrameRegIdx+1;
+      InstrOffs = MI.getOperand(ImmIdx).getImm();
+      NumBits = 7;
+      Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
+               AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);
+      break;
     default:
       llvm_unreachable("Unsupported addressing mode!");
     }
@@ -2543,6 +2652,7 @@ bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
     return true;
   case ARM::CMPrr:
   case ARM::t2CMPrr:
+  case ARM::tCMPr:
     SrcReg = MI.getOperand(0).getReg();
     SrcReg2 = MI.getOperand(1).getReg();
     CmpMask = ~0;
@@ -2619,32 +2729,62 @@ inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) {
 /// This function can be extended later on.
 inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
                                         unsigned SrcReg, unsigned SrcReg2,
-                                        int ImmValue, const MachineInstr *OI) {
-  if ((CmpI->getOpcode() == ARM::CMPrr ||
-       CmpI->getOpcode() == ARM::t2CMPrr) &&
-      (OI->getOpcode() == ARM::SUBrr ||
-       OI->getOpcode() == ARM::t2SUBrr) &&
+                                        int ImmValue, const MachineInstr *OI,
+                                        bool &IsThumb1) {
+  if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
+      (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
       ((OI->getOperand(1).getReg() == SrcReg &&
         OI->getOperand(2).getReg() == SrcReg2) ||
        (OI->getOperand(1).getReg() == SrcReg2 &&
-        OI->getOperand(2).getReg() == SrcReg)))
+        OI->getOperand(2).getReg() == SrcReg))) {
+    IsThumb1 = false;
     return true;
+  }
 
-  if ((CmpI->getOpcode() == ARM::CMPri ||
-       CmpI->getOpcode() == ARM::t2CMPri) &&
-      (OI->getOpcode() == ARM::SUBri ||
-       OI->getOpcode() == ARM::t2SUBri) &&
+  if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
+      ((OI->getOperand(2).getReg() == SrcReg &&
+        OI->getOperand(3).getReg() == SrcReg2) ||
+       (OI->getOperand(2).getReg() == SrcReg2 &&
+        OI->getOperand(3).getReg() == SrcReg))) {
+    IsThumb1 = true;
+    return true;
+  }
+
+  if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
+      (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
       OI->getOperand(1).getReg() == SrcReg &&
-      OI->getOperand(2).getImm() == ImmValue)
+      OI->getOperand(2).getImm() == ImmValue) {
+    IsThumb1 = false;
+    return true;
+  }
+
+  if (CmpI->getOpcode() == ARM::tCMPi8 &&
+      (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
+      OI->getOperand(2).getReg() == SrcReg &&
+      OI->getOperand(3).getImm() == ImmValue) {
+    IsThumb1 = true;
     return true;
+  }
 
   if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
       (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
        OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
       OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
       OI->getOperand(0).getReg() == SrcReg &&
-      OI->getOperand(1).getReg() == SrcReg2)
+      OI->getOperand(1).getReg() == SrcReg2) {
+    IsThumb1 = false;
+    return true;
+  }
+
+  if (CmpI->getOpcode() == ARM::tCMPr &&
+      (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
+       OI->getOpcode() == ARM::tADDrr) &&
+      OI->getOperand(0).getReg() == SrcReg &&
+      OI->getOperand(2).getReg() == SrcReg2) {
+    IsThumb1 = true;
     return true;
+  }
+
   return false;
 }
 
@@ -2662,6 +2802,17 @@ static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
   case ARM::tSUBi3:
   case ARM::tSUBi8:
   case ARM::tMUL:
+  case ARM::tADC:
+  case ARM::tSBC:
+  case ARM::tRSB:
+  case ARM::tAND:
+  case ARM::tORR:
+  case ARM::tEOR:
+  case ARM::tBIC:
+  case ARM::tMVN:
+  case ARM::tASRri:
+  case ARM::tASRrr:
+  case ARM::tROR:
     IsThumb1 = true;
     LLVM_FALLTHROUGH;
   case ARM::RSBrr:
@@ -2761,7 +2912,8 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
     // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
     // Thus we cannot return here.
     if (CmpInstr.getOpcode() == ARM::CMPri ||
-        CmpInstr.getOpcode() == ARM::t2CMPri)
+        CmpInstr.getOpcode() == ARM::t2CMPri ||
+        CmpInstr.getOpcode() == ARM::tCMPi8)
       MI = nullptr;
     else
       return false;
@@ -2783,20 +2935,22 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
   // CMP. This peephole works on the vregs, so is still in SSA form. As a
   // consequence, the movs won't redefine/kill the MUL operands which would
   // make this reordering illegal.
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
   if (MI && IsThumb1) {
     --I;
-    bool CanReorder = true;
-    const bool HasStmts = I != E;
-    for (; I != E; --I) {
-      if (I->getOpcode() != ARM::tMOVi8) {
-        CanReorder = false;
-        break;
+    if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
+      bool CanReorder = true;
+      for (; I != E; --I) {
+        if (I->getOpcode() != ARM::tMOVi8) {
+          CanReorder = false;
+          break;
+        }
+      }
+      if (CanReorder) {
+        MI = MI->removeFromParent();
+        E = CmpInstr;
+        CmpInstr.getParent()->insert(E, MI);
       }
-    }
-    if (HasStmts && CanReorder) {
-      MI = MI->removeFromParent();
-      E = CmpInstr;
-      CmpInstr.getParent()->insert(E, MI);
     }
     I = CmpInstr;
     E = MI;
@@ -2804,12 +2958,13 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
 
   // Check that CPSR isn't set between the comparison instruction and the one we
   // want to change. At the same time, search for SubAdd.
-  const TargetRegisterInfo *TRI = &getRegisterInfo();
+  bool SubAddIsThumb1 = false;
   do {
     const MachineInstr &Instr = *--I;
 
     // Check whether CmpInstr can be made redundant by the current instruction.
-    if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr)) {
+    if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
+                             SubAddIsThumb1)) {
       SubAdd = &*I;
       break;
     }
@@ -2824,14 +2979,25 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
       // change. We can't do this transformation.
       return false;
 
-  } while (I != B);
+    if (I == B) {
+      // In some cases, we scan the use-list of an instruction for an AND;
+      // that AND is in the same BB, but may not be scheduled before the
+      // corresponding TST.  In that case, bail out.
+      //
+      // FIXME: We could try to reschedule the AND.
+      return false;
+    }
+  } while (true);
 
   // Return false if no candidates exist.
   if (!MI && !SubAdd)
     return false;
 
-  // The single candidate is called MI.
-  if (!MI) MI = SubAdd;
+  // If we found a SubAdd, use it as it will be closer to the CMP
+  if (SubAdd) {
+    MI = SubAdd;
+    IsThumb1 = SubAddIsThumb1;
+  }
 
   // We can't use a predicated instruction - it doesn't always write the flags.
   if (isPredicated(*MI))
@@ -2899,9 +3065,13 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
         // operands will be modified.
         unsigned Opc = SubAdd->getOpcode();
         bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
-                     Opc == ARM::SUBri || Opc == ARM::t2SUBri;
-        if (!IsSub || (SrcReg2 != 0 && SubAdd->getOperand(1).getReg() == SrcReg2 &&
-                       SubAdd->getOperand(2).getReg() == SrcReg)) {
+                     Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
+                     Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
+                     Opc == ARM::tSUBi8;
+        unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
+        if (!IsSub ||
+            (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
+             SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
           // VSel doesn't support condition code update.
           if (IsInstrVSel)
             return false;
@@ -2979,9 +3149,10 @@ bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const {
   ++Next;
   unsigned SrcReg, SrcReg2;
   int CmpMask, CmpValue;
+  bool IsThumb1;
   if (Next != MI.getParent()->end() &&
       analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
-      isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI))
+      isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
     return false;
   return true;
 }
@@ -3372,7 +3543,12 @@ unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const {
        I != E; ++I) {
     Size += (*I)->getSize();
   }
-  return Size / 4;
+  // FIXME: The scheduler currently can't handle values larger than 16. But
+  // the values can actually go up to 32 for floating-point load/store
+  // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
+  // operations isn't right; we could end up with "extra" memory operands for
+  // various reasons, like tail merge merging two memory operations.
+  return std::min(Size / 4, 16U);
 }
 
 static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
@@ -4093,7 +4269,7 @@ int ARMBaseInstrInfo::getOperandLatencyImpl(
     // instructions).
     if (Latency > 0 && Subtarget.isThumb2()) {
       const MachineFunction *MF = DefMI.getParent()->getParent();
-      // FIXME: Use Function::optForSize().
+      // FIXME: Use Function::hasOptSize().
       if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
         --Latency;
     }
@@ -4517,6 +4693,31 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
     ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
     return false;
   }
+  if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
+    // Make sure we don't generate a lo-lo mov that isn't supported.
+    if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&
+        !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {
+      ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
+      return false;
+    }
+  }
+  if (MI.getOpcode() == ARM::tPUSH ||
+      MI.getOpcode() == ARM::tPOP ||
+      MI.getOpcode() == ARM::tPOP_RET) {
+    for (int i = 2, e = MI.getNumOperands(); i < e; ++i) {
+      if (MI.getOperand(i).isImplicit() ||
+          !MI.getOperand(i).isReg())
+        continue;
+      unsigned Reg = MI.getOperand(i).getReg();
+      if (Reg < ARM::R0 || Reg > ARM::R7) {
+        if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
+            !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
+          ErrInfo = "Unsupported register in Thumb1 push/pop";
+          return false;
+        }
+      }
+    }
+  }
   return true;
 }
 
@@ -5107,3 +5308,44 @@ ARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
       {MO_NONLAZY, "arm-nonlazy"}};
   return makeArrayRef(TargetFlags);
 }
+
+bool llvm::registerDefinedBetween(unsigned Reg,
+                                  MachineBasicBlock::iterator From,
+                                  MachineBasicBlock::iterator To,
+                                  const TargetRegisterInfo *TRI) {
+  for (auto I = From; I != To; ++I)
+    if (I->modifiesRegister(Reg, TRI))
+      return true;
+  return false;
+}
+
+MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br,
+                                         const TargetRegisterInfo *TRI) {
+  // Search backwards to the instruction that defines CSPR. This may or not
+  // be a CMP, we check that after this loop. If we find another instruction
+  // that reads cpsr, we return nullptr.
+  MachineBasicBlock::iterator CmpMI = Br;
+  while (CmpMI != Br->getParent()->begin()) {
+    --CmpMI;
+    if (CmpMI->modifiesRegister(ARM::CPSR, TRI))
+      break;
+    if (CmpMI->readsRegister(ARM::CPSR, TRI))
+      break;
+  }
+
+  // Check that this inst is a CMP r[0-7], #0 and that the register
+  // is not redefined between the cmp and the br.
+  if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri)
+    return nullptr;
+  unsigned Reg = CmpMI->getOperand(0).getReg();
+  unsigned PredReg = 0;
+  ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg);
+  if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0)
+    return nullptr;
+  if (!isARMLowRegister(Reg))
+    return nullptr;
+  if (registerDefinedBetween(Reg, CmpMI->getNextNode(), Br, TRI))
+    return nullptr;
+
+  return &*CmpMI;
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index de1f307083ba..c28983fcc15c 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -1,9 +1,8 @@
 //===-- ARMBaseInstrInfo.h - ARM Base Instruction Information ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -399,6 +398,11 @@ private:
 
   void expandMEMCPY(MachineBasicBlock::iterator) const;
 
+  /// Identify instructions that can be folded into a MOVCC instruction, and
+  /// return the defining instruction.
+  MachineInstr *canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI,
+                                 const TargetInstrInfo *TII) const;
+
 private:
   /// Modeling special VFP / NEON fp MLA / MLS hazards.
 
@@ -478,6 +482,21 @@ bool isUncondBranchOpcode(int Opc) {
   return Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B;
 }
 
+static inline bool isVPTOpcode(int Opc) {
+  return Opc == ARM::MVE_VPTv16i8 || Opc == ARM::MVE_VPTv16u8 ||
+         Opc == ARM::MVE_VPTv16s8 || Opc == ARM::MVE_VPTv8i16 ||
+         Opc == ARM::MVE_VPTv8u16 || Opc == ARM::MVE_VPTv8s16 ||
+         Opc == ARM::MVE_VPTv4i32 || Opc == ARM::MVE_VPTv4u32 ||
+         Opc == ARM::MVE_VPTv4s32 || Opc == ARM::MVE_VPTv4f32 ||
+         Opc == ARM::MVE_VPTv8f16 || Opc == ARM::MVE_VPTv16i8r ||
+         Opc == ARM::MVE_VPTv16u8r || Opc == ARM::MVE_VPTv16s8r ||
+         Opc == ARM::MVE_VPTv8i16r || Opc == ARM::MVE_VPTv8u16r ||
+         Opc == ARM::MVE_VPTv8s16r || Opc == ARM::MVE_VPTv4i32r ||
+         Opc == ARM::MVE_VPTv4u32r || Opc == ARM::MVE_VPTv4s32r ||
+         Opc == ARM::MVE_VPTv4f32r || Opc == ARM::MVE_VPTv8f16r ||
+         Opc == ARM::MVE_VPST;
+}
+
 static inline
 bool isCondBranchOpcode(int Opc) {
   return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc;
@@ -505,6 +524,28 @@ static inline bool isPushOpcode(int Opc) {
          Opc == ARM::STMDB_UPD || Opc == ARM::VSTMDDB_UPD;
 }
 
+/// isValidCoprocessorNumber - decide whether an explicit coprocessor
+/// number is legal in generic instructions like CDP. The answer can
+/// vary with the subtarget.
+static inline bool isValidCoprocessorNumber(unsigned Num,
+                                            const FeatureBitset& featureBits) {
+  // Armv8-A disallows everything *other* than 111x (CP14 and CP15).
+  if (featureBits[ARM::HasV8Ops] && (Num & 0xE) != 0xE)
+    return false;
+
+  // Armv7 disallows 101x (CP10 and CP11), which clash with VFP/NEON.
+  if (featureBits[ARM::HasV7Ops] && (Num & 0xE) == 0xA)
+    return false;
+
+  // Armv8.1-M also disallows 100x (CP8,CP9) and 111x (CP14,CP15)
+  // which clash with MVE.
+  if (featureBits[ARM::HasV8_1MMainlineOps] &&
+      ((Num & 0xE) == 0x8 || (Num & 0xE) == 0xE))
+    return false;
+
+  return true;
+}
+
 /// getInstrPredicate - If instruction is predicated, returns its predicate
 /// condition, otherwise returns AL. It also returns the condition code
 /// register by reference.
@@ -512,12 +553,6 @@ ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, unsigned &PredReg);
 
 unsigned getMatchingCondBranchOpcode(unsigned Opc);
 
-/// Determine if MI can be folded into an ARM MOVCC instruction, and return the
-/// opcode of the SSA instruction representing the conditional MI.
-unsigned canFoldARMInstrIntoMOVCC(unsigned Reg,
-                                  MachineInstr *&MI,
-                                  const MachineRegisterInfo &MRI);
-
 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether
 /// the instruction is encoded with an 'S' bit is determined by the optional
 /// CPSR def operand.
@@ -568,6 +603,23 @@ bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
                          unsigned FrameReg, int &Offset,
                          const ARMBaseInstrInfo &TII);
 
+/// Return true if Reg is defd between From and To
+bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From,
+                            MachineBasicBlock::iterator To,
+                            const TargetRegisterInfo *TRI);
+
+/// Search backwards from a tBcc to find a tCMPi8 against 0, meaning
+/// we can convert them to a tCBZ or tCBNZ. Return nullptr if not found.
+MachineInstr *findCMPToFoldIntoCBZ(MachineInstr *Br,
+                                   const TargetRegisterInfo *TRI);
+
+void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB);
+void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned DestReg);
+
+void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond);
+void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond,
+                              unsigned Inactive);
+
 } // end namespace llvm
 
 #endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 02b3daf3c6fd..dc99b37742da 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===-- ARMBaseRegisterInfo.cpp - ARM Register Information ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -150,7 +149,7 @@ ARMBaseRegisterInfo::getTLSCallPreservedMask(const MachineFunction &MF) const {
 const uint32_t *
 ARMBaseRegisterInfo::getSjLjDispatchPreservedMask(const MachineFunction &MF) const {
   const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
-  if (!STI.useSoftFloat() && STI.hasVFP2() && !STI.isThumb1Only())
+  if (!STI.useSoftFloat() && STI.hasVFP2Base() && !STI.isThumb1Only())
     return CSR_NoRegs_RegMask;
   else
     return CSR_FPRegs_RegMask;
@@ -194,7 +193,7 @@ getReservedRegs(const MachineFunction &MF) const {
   if (STI.isR9Reserved())
     markSuperRegs(Reserved, ARM::R9);
   // Reserve D16-D31 if the subtarget doesn't support them.
-  if (!STI.hasVFP3() || STI.hasD16()) {
+  if (!STI.hasD32()) {
     static_assert(ARM::D31 == ARM::D16 + 15, "Register list not consecutive!");
     for (unsigned R = 0; R < 16; ++R)
       markSuperRegs(Reserved, ARM::D16 + R);
@@ -204,6 +203,8 @@ getReservedRegs(const MachineFunction &MF) const {
     for (MCSubRegIterator SI(Reg, this); SI.isValid(); ++SI)
       if (Reserved.test(*SI))
         markSuperRegs(Reserved, Reg);
+  // For v8.1m architecture
+  markSuperRegs(Reserved, ARM::ZR);
 
   assert(checkAllSuperRegsMarked(Reserved));
   return Reserved;
@@ -369,29 +370,35 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   const ARMFrameLowering *TFI = getFrameLowering(MF);
 
-  // When outgoing call frames are so large that we adjust the stack pointer
-  // around the call, we can no longer use the stack pointer to reach the
-  // emergency spill slot.
+  // If we have stack realignment and VLAs, we have no pointer to use to
+  // access the stack. If we have stack realignment, and a large call frame,
+  // we have no place to allocate the emergency spill slot.
   if (needsStackRealignment(MF) && !TFI->hasReservedCallFrame(MF))
     return true;
 
   // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
   // negative range for ldr/str (255), and thumb1 is positive offsets only.
+  //
   // It's going to be better to use the SP or Base Pointer instead. When there
   // are variable sized objects, we can't reference off of the SP, so we
   // reserve a Base Pointer.
-  if (AFI->isThumbFunction() && MFI.hasVarSizedObjects()) {
-    // Conservatively estimate whether the negative offset from the frame
-    // pointer will be sufficient to reach. If a function has a smallish
-    // frame, it's less likely to have lots of spills and callee saved
-    // space, so it's all more likely to be within range of the frame pointer.
-    // If it's wrong, the scavenger will still enable access to work, it just
-    // won't be optimal.
-    if (AFI->isThumb2Function() && MFI.getLocalFrameSize() < 128)
-      return false;
+  //
+  // For Thumb2, estimate whether a negative offset from the frame pointer
+  // will be sufficient to reach the whole stack frame. If a function has a
+  // smallish frame, it's less likely to have lots of spills and callee saved
+  // space, so it's all more likely to be within range of the frame pointer.
+  // If it's wrong, the scavenger will still enable access to work, it just
+  // won't be optimal.  (We should always be able to reach the emergency
+  // spill slot from the frame pointer.)
+  if (AFI->isThumb2Function() && MFI.hasVarSizedObjects() &&
+      MFI.getLocalFrameSize() >= 128)
+    return true;
+  // For Thumb1, if sp moves, nothing is in range, so force a base pointer.
+  // This is necessary for correctness in cases where we need an emergency
+  // spill slot. (In Thumb1, we can't use a negative offset from the frame
+  // pointer.)
+  if (AFI->isThumb1OnlyFunction() && !TFI->hasReservedCallFrame(MF))
     return true;
-  }
-
   return false;
 }
 
@@ -425,7 +432,7 @@ cannotEliminateFrame(const MachineFunction &MF) const {
     || needsStackRealignment(MF);
 }
 
-unsigned
+Register
 ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
   const ARMFrameLowering *TFI = getFrameLowering(MF);
@@ -785,7 +792,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   int PIdx = MI.findFirstPredOperandIdx();
   ARMCC::CondCodes Pred = (PIdx == -1)
     ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
-  unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
+  Register PredReg = (PIdx == -1) ? Register() : MI.getOperand(PIdx+1).getReg();
   if (Offset == 0)
     // Must be addrmode4/6.
     MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, false);
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 45d29ebc0bd3..7e2c72b4d712 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -1,9 +1,8 @@
 //===-- ARMBaseRegisterInfo.h - ARM Register Information Impl ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -174,7 +173,7 @@ public:
   bool cannotEliminateFrame(const MachineFunction &MF) const;
 
   // Debug information queries.
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
+  Register getFrameRegister(const MachineFunction &MF) const override;
   unsigned getBaseRegister() const { return BasePtr; }
 
   bool isLowRegister(unsigned Reg) const;
diff --git a/lib/Target/ARM/ARMBasicBlockInfo.cpp b/lib/Target/ARM/ARMBasicBlockInfo.cpp
new file mode 100644
index 000000000000..2de90e816b33
--- /dev/null
+++ b/lib/Target/ARM/ARMBasicBlockInfo.cpp
@@ -0,0 +1,146 @@
+//===--- ARMBasicBlockInfo.cpp - Utilities for block sizes ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBasicBlockInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include <vector>
+
+#define DEBUG_TYPE "arm-bb-utils"
+
+using namespace llvm;
+
+namespace llvm {
+
+// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions
+// below may shrink MI.
+static bool
+mayOptimizeThumb2Instruction(const MachineInstr *MI) {
+  switch(MI->getOpcode()) {
+    // optimizeThumb2Instructions.
+    case ARM::t2LEApcrel:
+    case ARM::t2LDRpci:
+    // optimizeThumb2Branches.
+    case ARM::t2B:
+    case ARM::t2Bcc:
+    case ARM::tBcc:
+    // optimizeThumb2JumpTables.
+    case ARM::t2BR_JT:
+    case ARM::tBR_JTr:
+      return true;
+  }
+  return false;
+}
+
+void ARMBasicBlockUtils::computeBlockSize(MachineBasicBlock *MBB) {
+  LLVM_DEBUG(dbgs() << "computeBlockSize: " << MBB->getName() << "\n");
+  BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
+  BBI.Size = 0;
+  BBI.Unalign = 0;
+  BBI.PostAlign = 0;
+
+  for (MachineInstr &I : *MBB) {
+    BBI.Size += TII->getInstSizeInBytes(I);
+    // For inline asm, getInstSizeInBytes returns a conservative estimate.
+    // The actual size may be smaller, but still a multiple of the instr size.
+    if (I.isInlineAsm())
+      BBI.Unalign = isThumb ? 1 : 2;
+    // Also consider instructions that may be shrunk later.
+    else if (isThumb && mayOptimizeThumb2Instruction(&I))
+      BBI.Unalign = 1;
+  }
+
+  // tBR_JTr contains a .align 2 directive.
+  if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
+    BBI.PostAlign = 2;
+    MBB->getParent()->ensureAlignment(2);
+  }
+}
+
+/// getOffsetOf - Return the current offset of the specified machine instruction
+/// from the start of the function.  This offset changes as stuff is moved
+/// around inside the function.
+unsigned ARMBasicBlockUtils::getOffsetOf(MachineInstr *MI) const {
+  const MachineBasicBlock *MBB = MI->getParent();
+
+  // The offset is composed of two things: the sum of the sizes of all MBB's
+  // before this instruction's block, and the offset from the start of the block
+  // it is in.
+  unsigned Offset = BBInfo[MBB->getNumber()].Offset;
+
+  // Sum instructions before MI in MBB.
+  for (MachineBasicBlock::const_iterator I = MBB->begin(); &*I != MI; ++I) {
+    assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+    Offset += TII->getInstSizeInBytes(*I);
+  }
+  return Offset;
+}
+
+/// isBBInRange - Returns true if the distance between specific MI and
+/// specific BB can fit in MI's displacement field.
+bool ARMBasicBlockUtils::isBBInRange(MachineInstr *MI,
+                                     MachineBasicBlock *DestBB,
+                                     unsigned MaxDisp) const {
+  unsigned PCAdj      = isThumb ? 4 : 8;
+  unsigned BrOffset   = getOffsetOf(MI) + PCAdj;
+  unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
+
+  LLVM_DEBUG(dbgs() << "Branch of destination " << printMBBReference(*DestBB)
+                    << " from " << printMBBReference(*MI->getParent())
+                    << " max delta=" << MaxDisp << " from " << getOffsetOf(MI)
+                    << " to " << DestOffset << " offset "
+                    << int(DestOffset - BrOffset) << "\t" << *MI);
+
+  if (BrOffset <= DestOffset) {
+    // Branch before the Dest.
+    if (DestOffset-BrOffset <= MaxDisp)
+      return true;
+  } else {
+    if (BrOffset-DestOffset <= MaxDisp)
+      return true;
+  }
+  return false;
+}
+
+void ARMBasicBlockUtils::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
+  assert(BB->getParent() == &MF &&
+         "Basic block is not a child of the current function.\n");
+
+  unsigned BBNum = BB->getNumber();
+  LLVM_DEBUG(dbgs() << "Adjust block:\n"
+             << " - name: " << BB->getName() << "\n"
+             << " - number: " << BB->getNumber() << "\n"
+             << " - function: " << MF.getName() << "\n"
+             << "   - blocks: " << MF.getNumBlockIDs() << "\n");
+
+  for(unsigned i = BBNum + 1, e = MF.getNumBlockIDs(); i < e; ++i) {
+    // Get the offset and known bits at the end of the layout predecessor.
+    // Include the alignment of the current block.
+    unsigned LogAlign = MF.getBlockNumbered(i)->getAlignment();
+    unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
+    unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
+
+    // This is where block i begins.  Stop if the offset is already correct,
+    // and we have updated 2 blocks.  This is the maximum number of blocks
+    // changed before calling this function.
+    if (i > BBNum + 2 &&
+        BBInfo[i].Offset == Offset &&
+        BBInfo[i].KnownBits == KnownBits)
+      break;
+
+    BBInfo[i].Offset = Offset;
+    BBInfo[i].KnownBits = KnownBits;
+  }
+}
+
+} // end namespace llvm
diff --git a/lib/Target/ARM/ARMBasicBlockInfo.h b/lib/Target/ARM/ARMBasicBlockInfo.h
index e0cb0aa676a6..400bba351cec 100644
--- a/lib/Target/ARM/ARMBasicBlockInfo.h
+++ b/lib/Target/ARM/ARMBasicBlockInfo.h
@@ -1,9 +1,8 @@
 //===-- ARMBasicBlockInfo.h - Basic Block Information -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,12 +13,16 @@
 #ifndef LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H
 #define LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H
 
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>
 #include <cstdint>
 
 namespace llvm {
 
+using BBInfoVector = SmallVectorImpl<BasicBlockInfo>;
+
 /// UnknownPadding - Return the worst case padding that could result from
 /// unknown offset bits.  This does not include alignment padding caused by
 /// known offset bits.
@@ -104,6 +107,54 @@ struct BasicBlockInfo {
   }
 };
 
+class ARMBasicBlockUtils {
+
+private:
+  MachineFunction &MF;
+  bool isThumb = false;
+  const ARMBaseInstrInfo *TII = nullptr;
+  SmallVector<BasicBlockInfo, 8> BBInfo;
+
+public:
+  ARMBasicBlockUtils(MachineFunction &MF) : MF(MF) {
+    TII =
+      static_cast<const ARMBaseInstrInfo*>(MF.getSubtarget().getInstrInfo());
+    isThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction();
+  }
+
+  void computeAllBlockSizes() {
+    BBInfo.resize(MF.getNumBlockIDs());
+    for (MachineBasicBlock &MBB : MF)
+      computeBlockSize(&MBB);
+  }
+
+  void computeBlockSize(MachineBasicBlock *MBB);
+
+  unsigned getOffsetOf(MachineInstr *MI) const;
+
+  unsigned getOffsetOf(MachineBasicBlock *MBB) const {
+    return BBInfo[MBB->getNumber()].Offset;
+  }
+
+  void adjustBBOffsetsAfter(MachineBasicBlock *MBB);
+
+  void adjustBBSize(MachineBasicBlock *MBB, int Size) {
+    BBInfo[MBB->getNumber()].Size += Size;
+  }
+
+  bool isBBInRange(MachineInstr *MI, MachineBasicBlock *DestBB,
+                   unsigned MaxDisp) const;
+
+  void insert(unsigned BBNum, BasicBlockInfo BBI) {
+    BBInfo.insert(BBInfo.begin() + BBNum, BBI);
+  }
+
+  void clear() { BBInfo.clear(); }
+
+  BBInfoVector &getBBInfo() { return BBInfo; }
+
+};
+
 } // end namespace llvm
 
 #endif // LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H
diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
index 8e80c32bcf89..0cbe6e1871e4 100644
--- a/lib/Target/ARM/ARMCallLowering.cpp
+++ b/lib/Target/ARM/ARMCallLowering.cpp
@@ -1,9 +1,8 @@
 //===- llvm/lib/Target/ARM/ARMCallLowering.cpp - Call lowering ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -56,7 +55,7 @@ ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI)
 static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI,
                             Type *T) {
   if (T->isArrayTy())
-    return true;
+    return isSupportedType(DL, TLI, T->getArrayElementType());
 
   if (T->isStructTy()) {
     // For now we only allow homogeneous structs that we can manipulate with
@@ -65,7 +64,7 @@ static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI,
     for (unsigned i = 1, e = StructT->getNumElements(); i != e; ++i)
       if (StructT->getElementType(i) != StructT->getElementType(0))
         return false;
-    return true;
+    return isSupportedType(DL, TLI, StructT->getElementType(0));
   }
 
   EVT VT = TLI.getValueType(DL, T, true);
@@ -91,27 +90,27 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
                        MachineInstrBuilder &MIB, CCAssignFn *AssignFn)
       : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
 
-  unsigned getStackAddress(uint64_t Size, int64_t Offset,
+  Register getStackAddress(uint64_t Size, int64_t Offset,
                            MachinePointerInfo &MPO) override {
     assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
            "Unsupported size");
 
     LLT p0 = LLT::pointer(0, 32);
     LLT s32 = LLT::scalar(32);
-    unsigned SPReg = MRI.createGenericVirtualRegister(p0);
-    MIRBuilder.buildCopy(SPReg, ARM::SP);
+    Register SPReg = MRI.createGenericVirtualRegister(p0);
+    MIRBuilder.buildCopy(SPReg, Register(ARM::SP));
 
-    unsigned OffsetReg = MRI.createGenericVirtualRegister(s32);
+    Register OffsetReg = MRI.createGenericVirtualRegister(s32);
     MIRBuilder.buildConstant(OffsetReg, Offset);
 
-    unsigned AddrReg = MRI.createGenericVirtualRegister(p0);
+    Register AddrReg = MRI.createGenericVirtualRegister(p0);
     MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
 
     MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
     return AddrReg;
   }
 
-  void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+  void assignValueToReg(Register ValVReg, Register PhysReg,
                         CCValAssign &VA) override {
     assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
     assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
@@ -119,25 +118,27 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
     assert(VA.getValVT().getSizeInBits() <= 64 && "Unsupported value size");
     assert(VA.getLocVT().getSizeInBits() <= 64 && "Unsupported location size");
 
-    unsigned ExtReg = extendRegister(ValVReg, VA);
+    Register ExtReg = extendRegister(ValVReg, VA);
     MIRBuilder.buildCopy(PhysReg, ExtReg);
     MIB.addUse(PhysReg, RegState::Implicit);
   }
 
-  void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+  void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
                             MachinePointerInfo &MPO, CCValAssign &VA) override {
     assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
            "Unsupported size");
 
-    unsigned ExtReg = extendRegister(ValVReg, VA);
+    Register ExtReg = extendRegister(ValVReg, VA);
     auto MMO = MIRBuilder.getMF().getMachineMemOperand(
         MPO, MachineMemOperand::MOStore, VA.getLocVT().getStoreSize(),
-        /* Alignment */ 0);
+        /* Alignment */ 1);
     MIRBuilder.buildStore(ExtReg, Addr, *MMO);
   }
 
   unsigned assignCustomValue(const CallLowering::ArgInfo &Arg,
                              ArrayRef<CCValAssign> VAs) override {
+    assert(Arg.Regs.size() == 1 && "Can't handle multple regs yet");
+
     CCValAssign VA = VAs[0];
     assert(VA.needsCustom() && "Value doesn't need custom handling");
     assert(VA.getValVT() == MVT::f64 && "Unsupported type");
@@ -152,9 +153,9 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
     assert(VA.isRegLoc() && "Value should be in reg");
     assert(NextVA.isRegLoc() && "Value should be in reg");
 
-    unsigned NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
+    Register NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
                           MRI.createGenericVirtualRegister(LLT::scalar(32))};
-    MIRBuilder.buildUnmerge(NewRegs, Arg.Reg);
+    MIRBuilder.buildUnmerge(NewRegs, Arg.Regs[0]);
 
     bool IsLittle = MIRBuilder.getMF().getSubtarget<ARMSubtarget>().isLittle();
     if (!IsLittle)
@@ -183,18 +184,17 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
 
 } // end anonymous namespace
 
-void ARMCallLowering::splitToValueTypes(
-    const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
-    MachineFunction &MF, const SplitArgTy &PerformArgSplit) const {
+void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg,
+                                        SmallVectorImpl<ArgInfo> &SplitArgs,
+                                        MachineFunction &MF) const {
   const ARMTargetLowering &TLI = *getTLI<ARMTargetLowering>();
   LLVMContext &Ctx = OrigArg.Ty->getContext();
   const DataLayout &DL = MF.getDataLayout();
-  MachineRegisterInfo &MRI = MF.getRegInfo();
   const Function &F = MF.getFunction();
 
   SmallVector<EVT, 4> SplitVTs;
-  SmallVector<uint64_t, 4> Offsets;
-  ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
+  ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, nullptr, nullptr, 0);
+  assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
 
   if (SplitVTs.size() == 1) {
     // Even if there is no splitting to do, we still want to replace the
@@ -202,12 +202,12 @@ void ARMCallLowering::splitToValueTypes(
     auto Flags = OrigArg.Flags;
     unsigned OriginalAlignment = DL.getABITypeAlignment(OrigArg.Ty);
     Flags.setOrigAlign(OriginalAlignment);
-    SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx), Flags,
-                           OrigArg.IsFixed);
+    SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
+                           Flags, OrigArg.IsFixed);
     return;
   }
 
-  unsigned FirstRegIdx = SplitArgs.size();
+  // Create one ArgInfo for each virtual register.
   for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) {
     EVT SplitVT = SplitVTs[i];
     Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
@@ -225,19 +225,16 @@ void ARMCallLowering::splitToValueTypes(
         Flags.setInConsecutiveRegsLast();
     }
 
-    SplitArgs.push_back(
-        ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)),
-                SplitTy, Flags, OrigArg.IsFixed});
+    // FIXME: We also want to split SplitTy further.
+    Register PartReg = OrigArg.Regs[i];
+    SplitArgs.emplace_back(PartReg, SplitTy, Flags, OrigArg.IsFixed);
   }
-
-  for (unsigned i = 0; i < Offsets.size(); ++i)
-    PerformArgSplit(SplitArgs[FirstRegIdx + i].Reg, Offsets[i] * 8);
 }
 
 /// Lower the return value for the already existing \p Ret. This assumes that
 /// \p MIRBuilder's insertion point is correct.
 bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
-                                     const Value *Val, ArrayRef<unsigned> VRegs,
+                                     const Value *Val, ArrayRef<Register> VRegs,
                                      MachineInstrBuilder &Ret) const {
   if (!Val)
     // Nothing to do here.
@@ -251,35 +248,22 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
   if (!isSupportedType(DL, TLI, Val->getType()))
     return false;
 
-  SmallVector<EVT, 4> SplitEVTs;
-  ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
-  assert(VRegs.size() == SplitEVTs.size() &&
-         "For each split Type there should be exactly one VReg.");
-
-  SmallVector<ArgInfo, 4> SplitVTs;
-  LLVMContext &Ctx = Val->getType()->getContext();
-  for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
-    ArgInfo CurArgInfo(VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx));
-    setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
-
-    SmallVector<unsigned, 4> Regs;
-    splitToValueTypes(
-        CurArgInfo, SplitVTs, MF,
-        [&](unsigned Reg, uint64_t Offset) { Regs.push_back(Reg); });
-    if (Regs.size() > 1)
-      MIRBuilder.buildUnmerge(Regs, VRegs[i]);
-  }
+  ArgInfo OrigRetInfo(VRegs, Val->getType());
+  setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F);
+
+  SmallVector<ArgInfo, 4> SplitRetInfos;
+  splitToValueTypes(OrigRetInfo, SplitRetInfos, MF);
 
   CCAssignFn *AssignFn =
       TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg());
 
   OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret, AssignFn);
-  return handleAssignments(MIRBuilder, SplitVTs, RetHandler);
+  return handleAssignments(MIRBuilder, SplitRetInfos, RetHandler);
 }
 
 bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
                                   const Value *Val,
-                                  ArrayRef<unsigned> VRegs) const {
+                                  ArrayRef<Register> VRegs) const {
   assert(!Val == VRegs.empty() && "Return value without a vreg");
 
   auto const &ST = MIRBuilder.getMF().getSubtarget<ARMSubtarget>();
@@ -302,7 +286,9 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
                        CCAssignFn AssignFn)
       : ValueHandler(MIRBuilder, MRI, AssignFn) {}
 
-  unsigned getStackAddress(uint64_t Size, int64_t Offset,
+  bool isArgumentHandler() const override { return true; }
+
+  Register getStackAddress(uint64_t Size, int64_t Offset,
                            MachinePointerInfo &MPO) override {
     assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
            "Unsupported size");
@@ -319,7 +305,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
     return AddrReg;
   }
 
-  void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+  void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
                             MachinePointerInfo &MPO, CCValAssign &VA) override {
     assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
            "Unsupported size");
@@ -332,22 +318,22 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
       assert(MRI.getType(ValVReg).isScalar() && "Only scalars supported atm");
 
       auto LoadVReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
-      buildLoad(LoadVReg, Addr, Size, /* Alignment */ 0, MPO);
+      buildLoad(LoadVReg, Addr, Size, /* Alignment */ 1, MPO);
       MIRBuilder.buildTrunc(ValVReg, LoadVReg);
     } else {
       // If the value is not extended, a simple load will suffice.
-      buildLoad(ValVReg, Addr, Size, /* Alignment */ 0, MPO);
+      buildLoad(ValVReg, Addr, Size, /* Alignment */ 1, MPO);
     }
   }
 
-  void buildLoad(unsigned Val, unsigned Addr, uint64_t Size, unsigned Alignment,
+  void buildLoad(Register Val, Register Addr, uint64_t Size, unsigned Alignment,
                  MachinePointerInfo &MPO) {
     auto MMO = MIRBuilder.getMF().getMachineMemOperand(
         MPO, MachineMemOperand::MOLoad, Size, Alignment);
     MIRBuilder.buildLoad(Val, Addr, *MMO);
   }
 
-  void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+  void assignValueToReg(Register ValVReg, Register PhysReg,
                         CCValAssign &VA) override {
     assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
     assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
@@ -376,6 +362,8 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
 
   unsigned assignCustomValue(const ARMCallLowering::ArgInfo &Arg,
                              ArrayRef<CCValAssign> VAs) override {
+    assert(Arg.Regs.size() == 1 && "Can't handle multple regs yet");
+
     CCValAssign VA = VAs[0];
     assert(VA.needsCustom() && "Value doesn't need custom handling");
     assert(VA.getValVT() == MVT::f64 && "Unsupported type");
@@ -390,7 +378,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
     assert(VA.isRegLoc() && "Value should be in reg");
     assert(NextVA.isRegLoc() && "Value should be in reg");
 
-    unsigned NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
+    Register NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
                           MRI.createGenericVirtualRegister(LLT::scalar(32))};
 
     assignValueToReg(NewRegs[0], VA.getLocReg(), VA);
@@ -400,7 +388,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
     if (!IsLittle)
       std::swap(NewRegs[0], NewRegs[1]);
 
-    MIRBuilder.buildMerge(Arg.Reg, NewRegs);
+    MIRBuilder.buildMerge(Arg.Regs[0], NewRegs);
 
     return 1;
   }
@@ -423,9 +411,9 @@ struct FormalArgHandler : public IncomingValueHandler {
 
 } // end anonymous namespace
 
-bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
-                                           const Function &F,
-                                           ArrayRef<unsigned> VRegs) const {
+bool ARMCallLowering::lowerFormalArguments(
+    MachineIRBuilder &MIRBuilder, const Function &F,
+    ArrayRef<ArrayRef<Register>> VRegs) const {
   auto &TLI = *getTLI<ARMTargetLowering>();
   auto Subtarget = TLI.getSubtarget();
 
@@ -456,21 +444,13 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
   FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo(),
                               AssignFn);
 
-  SmallVector<ArgInfo, 8> ArgInfos;
-  SmallVector<unsigned, 4> SplitRegs;
+  SmallVector<ArgInfo, 8> SplitArgInfos;
   unsigned Idx = 0;
   for (auto &Arg : F.args()) {
-    ArgInfo AInfo(VRegs[Idx], Arg.getType());
-    setArgFlags(AInfo, Idx + AttributeList::FirstArgIndex, DL, F);
-
-    SplitRegs.clear();
-
-    splitToValueTypes(AInfo, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) {
-      SplitRegs.push_back(Reg);
-    });
+    ArgInfo OrigArgInfo(VRegs[Idx], Arg.getType());
 
-    if (!SplitRegs.empty())
-      MIRBuilder.buildMerge(VRegs[Idx], SplitRegs);
+    setArgFlags(OrigArgInfo, Idx + AttributeList::FirstArgIndex, DL, F);
+    splitToValueTypes(OrigArgInfo, SplitArgInfos, MF);
 
     Idx++;
   }
@@ -478,7 +458,7 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
   if (!MBB.empty())
     MIRBuilder.setInstr(*MBB.begin());
 
-  if (!handleAssignments(MIRBuilder, ArgInfos, ArgHandler))
+  if (!handleAssignments(MIRBuilder, SplitArgInfos, ArgHandler))
     return false;
 
   // Move back to the end of the basic block.
@@ -540,19 +520,19 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
 
   // Create the call instruction so we can add the implicit uses of arg
   // registers, but don't insert it yet.
-  bool isDirect = !Callee.isReg();
-  auto CallOpcode = getCallOpcode(STI, isDirect);
+  bool IsDirect = !Callee.isReg();
+  auto CallOpcode = getCallOpcode(STI, IsDirect);
   auto MIB = MIRBuilder.buildInstrNoInsert(CallOpcode);
 
-  bool isThumb = STI.isThumb();
-  if (isThumb)
+  bool IsThumb = STI.isThumb();
+  if (IsThumb)
     MIB.add(predOps(ARMCC::AL));
 
   MIB.add(Callee);
-  if (!isDirect) {
+  if (!IsDirect) {
     auto CalleeReg = Callee.getReg();
     if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg)) {
-      unsigned CalleeIdx = isThumb ? 2 : 0;
+      unsigned CalleeIdx = IsThumb ? 2 : 0;
       MIB->getOperand(CalleeIdx).setReg(constrainOperandRegClass(
           MF, *TRI, MRI, *STI.getInstrInfo(), *STI.getRegBankInfo(),
           *MIB.getInstr(), MIB->getDesc(), Callee, CalleeIdx));
@@ -561,27 +541,22 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
 
   MIB.addRegMask(TRI->getCallPreservedMask(MF, CallConv));
 
+  bool IsVarArg = false;
   SmallVector<ArgInfo, 8> ArgInfos;
   for (auto Arg : OrigArgs) {
     if (!isSupportedType(DL, TLI, Arg.Ty))
       return false;
 
     if (!Arg.IsFixed)
-      return false;
+      IsVarArg = true;
 
     if (Arg.Flags.isByVal())
       return false;
 
-    SmallVector<unsigned, 8> Regs;
-    splitToValueTypes(Arg, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) {
-      Regs.push_back(Reg);
-    });
-
-    if (Regs.size() > 1)
-      MIRBuilder.buildUnmerge(Regs, Arg.Reg);
+    splitToValueTypes(Arg, ArgInfos, MF);
   }
 
-  auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
+  auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, IsVarArg);
   OutgoingValueHandler ArgHandler(MIRBuilder, MRI, MIB, ArgAssignFn);
   if (!handleAssignments(MIRBuilder, ArgInfos, ArgHandler))
     return false;
@@ -594,22 +569,11 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
       return false;
 
     ArgInfos.clear();
-    SmallVector<unsigned, 8> SplitRegs;
-    splitToValueTypes(OrigRet, ArgInfos, MF,
-                      [&](unsigned Reg, uint64_t Offset) {
-                        SplitRegs.push_back(Reg);
-                      });
-
-    auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, /*IsVarArg=*/false);
+    splitToValueTypes(OrigRet, ArgInfos, MF);
+    auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, IsVarArg);
     CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn);
     if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler))
       return false;
-
-    if (!SplitRegs.empty()) {
-      // We have split the value and allocated each individual piece, now build
-      // it up again.
-      MIRBuilder.buildMerge(OrigRet.Reg, SplitRegs);
-    }
   }
 
   // We now know the size of the stack - update the ADJCALLSTACKDOWN
diff --git a/lib/Target/ARM/ARMCallLowering.h b/lib/Target/ARM/ARMCallLowering.h
index 45a988a2f00e..794127b5ebc7 100644
--- a/lib/Target/ARM/ARMCallLowering.h
+++ b/lib/Target/ARM/ARMCallLowering.h
@@ -1,9 +1,8 @@
 //===- llvm/lib/Target/ARM/ARMCallLowering.h - Call lowering ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -34,10 +33,10 @@ public:
   ARMCallLowering(const ARMTargetLowering &TLI);
 
   bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
-                   ArrayRef<unsigned> VRegs) const override;
+                   ArrayRef<Register> VRegs) const override;
 
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
-                            ArrayRef<unsigned> VRegs) const override;
+                            ArrayRef<ArrayRef<Register>> VRegs) const override;
 
   bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
                  const MachineOperand &Callee, const ArgInfo &OrigRet,
@@ -45,17 +44,14 @@ public:
 
 private:
   bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val,
-                      ArrayRef<unsigned> VRegs,
+                      ArrayRef<Register> VRegs,
                       MachineInstrBuilder &Ret) const;
 
-  using SplitArgTy = std::function<void(unsigned Reg, uint64_t Offset)>;
-
   /// Split an argument into one or more arguments that the CC lowering can cope
-  /// with (e.g. replace pointers with integers).
+  /// with.
   void splitToValueTypes(const ArgInfo &OrigArg,
                          SmallVectorImpl<ArgInfo> &SplitArgs,
-                         MachineFunction &MF,
-                         const SplitArgTy &PerformArgSplit) const;
+                         MachineFunction &MF) const;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/ARMCallingConv.cpp b/lib/Target/ARM/ARMCallingConv.cpp
new file mode 100644
index 000000000000..5ede7c67f7c2
--- /dev/null
+++ b/lib/Target/ARM/ARMCallingConv.cpp
@@ -0,0 +1,284 @@
+//=== ARMCallingConv.cpp - ARM Custom CC Routines ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the ARM Calling Convention that
+// aren't done by tablegen, and includes the table generated implementations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMCallingConv.h"
+#include "ARMSubtarget.h"
+#include "ARMRegisterInfo.h"
+using namespace llvm;
+
+// APCS f64 is in register pairs, possibly split to stack
+static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                          CCValAssign::LocInfo &LocInfo,
+                          CCState &State, bool CanFail) {
+  static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+  // Try to get the first register.
+  if (unsigned Reg = State.AllocateReg(RegList))
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  else {
+    // For the 2nd half of a v2f64, do not fail.
+    if (CanFail)
+      return false;
+
+    // Put the whole thing on the stack.
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(8, 4),
+                                           LocVT, LocInfo));
+    return true;
+  }
+
+  // Try to get the second register.
+  if (unsigned Reg = State.AllocateReg(RegList))
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  else
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(4, 4),
+                                           LocVT, LocInfo));
+  return true;
+}
+
+static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                   CCValAssign::LocInfo &LocInfo,
+                                   ISD::ArgFlagsTy &ArgFlags,
+                                   CCState &State) {
+  if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+    return false;
+  if (LocVT == MVT::v2f64 &&
+      !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+    return false;
+  return true;  // we handled it
+}
+
+// AAPCS f64 is in aligned register pairs
+static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                           CCValAssign::LocInfo &LocInfo,
+                           CCState &State, bool CanFail) {
+  static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
+  static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
+  static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 };
+  static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+  unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList);
+  if (Reg == 0) {
+
+    // If we had R3 unallocated only, now we still must to waste it.
+    Reg = State.AllocateReg(GPRArgRegs);
+    assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64");
+
+    // For the 2nd half of a v2f64, do not just fail.
+    if (CanFail)
+      return false;
+
+    // Put the whole thing on the stack.
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(8, 8),
+                                           LocVT, LocInfo));
+    return true;
+  }
+
+  unsigned i;
+  for (i = 0; i < 2; ++i)
+    if (HiRegList[i] == Reg)
+      break;
+
+  unsigned T = State.AllocateReg(LoRegList[i]);
+  (void)T;
+  assert(T == LoRegList[i] && "Could not allocate register");
+
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+                                         LocVT, LocInfo));
+  return true;
+}
+
+static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                    CCValAssign::LocInfo &LocInfo,
+                                    ISD::ArgFlagsTy &ArgFlags,
+                                    CCState &State) {
+  if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+    return false;
+  if (LocVT == MVT::v2f64 &&
+      !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+    return false;
+  return true;  // we handled it
+}
+
+static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                         CCValAssign::LocInfo &LocInfo, CCState &State) {
+  static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
+  static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
+
+  unsigned Reg = State.AllocateReg(HiRegList, LoRegList);
+  if (Reg == 0)
+    return false; // we didn't handle it
+
+  unsigned i;
+  for (i = 0; i < 2; ++i)
+    if (HiRegList[i] == Reg)
+      break;
+
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+                                         LocVT, LocInfo));
+  return true;
+}
+
+static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                      CCValAssign::LocInfo &LocInfo,
+                                      ISD::ArgFlagsTy &ArgFlags,
+                                      CCState &State) {
+  if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+    return false;
+  if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+    return false;
+  return true;  // we handled it
+}
+
+static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                       CCValAssign::LocInfo &LocInfo,
+                                       ISD::ArgFlagsTy &ArgFlags,
+                                       CCState &State) {
+  return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+                                   State);
+}
+
+static const MCPhysReg RRegList[] = { ARM::R0,  ARM::R1,  ARM::R2,  ARM::R3 };
+
+static const MCPhysReg SRegList[] = { ARM::S0,  ARM::S1,  ARM::S2,  ARM::S3,
+                                      ARM::S4,  ARM::S5,  ARM::S6,  ARM::S7,
+                                      ARM::S8,  ARM::S9,  ARM::S10, ARM::S11,
+                                      ARM::S12, ARM::S13, ARM::S14,  ARM::S15 };
+static const MCPhysReg DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+                                      ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
+static const MCPhysReg QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
+
+
+// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA
+// has InConsecutiveRegs set, and that the last member also has
+// InConsecutiveRegsLast set. We must process all members of the HA before
+// we can allocate it, as we need to know the total number of registers that
+// will be needed in order to (attempt to) allocate a contiguous block.
+static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
+                                          MVT &LocVT,
+                                          CCValAssign::LocInfo &LocInfo,
+                                          ISD::ArgFlagsTy &ArgFlags,
+                                          CCState &State) {
+  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+  // AAPCS HFAs must have 1-4 elements, all of the same type
+  if (PendingMembers.size() > 0)
+    assert(PendingMembers[0].getLocVT() == LocVT);
+
+  // Add the argument to the list to be allocated once we know the size of the
+  // aggregate. Store the type's required alignmnent as extra info for later: in
+  // the [N x i64] case all trace has been removed by the time we actually get
+  // to do allocation.
+  PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo,
+                                                   ArgFlags.getOrigAlign()));
+
+  if (!ArgFlags.isInConsecutiveRegsLast())
+    return true;
+
+  // Try to allocate a contiguous block of registers, each of the correct
+  // size to hold one member.
+  auto &DL = State.getMachineFunction().getDataLayout();
+  unsigned StackAlign = DL.getStackAlignment();
+  unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign);
+
+  ArrayRef<MCPhysReg> RegList;
+  switch (LocVT.SimpleTy) {
+  case MVT::i32: {
+    RegList = RRegList;
+    unsigned RegIdx = State.getFirstUnallocated(RegList);
+
+    // First consume all registers that would give an unaligned object. Whether
+    // we go on stack or in regs, no-one will be using them in future.
+    unsigned RegAlign = alignTo(Align, 4) / 4;
+    while (RegIdx % RegAlign != 0 && RegIdx < RegList.size())
+      State.AllocateReg(RegList[RegIdx++]);
+
+    break;
+  }
+  case MVT::f16:
+  case MVT::f32:
+    RegList = SRegList;
+    break;
+  case MVT::v4f16:
+  case MVT::f64:
+    RegList = DRegList;
+    break;
+  case MVT::v8f16:
+  case MVT::v2f64:
+    RegList = QRegList;
+    break;
+  default:
+    llvm_unreachable("Unexpected member type for block aggregate");
+    break;
+  }
+
+  unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
+  if (RegResult) {
+    for (SmallVectorImpl<CCValAssign>::iterator It = PendingMembers.begin();
+         It != PendingMembers.end(); ++It) {
+      It->convertToReg(RegResult);
+      State.addLoc(*It);
+      ++RegResult;
+    }
+    PendingMembers.clear();
+    return true;
+  }
+
+  // Register allocation failed, we'll be needing the stack
+  unsigned Size = LocVT.getSizeInBits() / 8;
+  if (LocVT == MVT::i32 && State.getNextStackOffset() == 0) {
+    // If nothing else has used the stack until this point, a non-HFA aggregate
+    // can be split between regs and stack.
+    unsigned RegIdx = State.getFirstUnallocated(RegList);
+    for (auto &It : PendingMembers) {
+      if (RegIdx >= RegList.size())
+        It.convertToMem(State.AllocateStack(Size, Size));
+      else
+        It.convertToReg(State.AllocateReg(RegList[RegIdx++]));
+
+      State.addLoc(It);
+    }
+    PendingMembers.clear();
+    return true;
+  } else if (LocVT != MVT::i32)
+    RegList = SRegList;
+
+  // Mark all regs as unavailable (AAPCS rule C.2.vfp for VFP, C.6 for core)
+  for (auto Reg : RegList)
+    State.AllocateReg(Reg);
+
+  // After the first item has been allocated, the rest are packed as tightly as
+  // possible. (E.g. an incoming i64 would have starting Align of 8, but we'll
+  // be allocating a bunch of i32 slots).
+  unsigned RestAlign = std::min(Align, Size);
+
+  for (auto &It : PendingMembers) {
+    It.convertToMem(State.AllocateStack(Size, Align));
+    State.addLoc(It);
+    Align = RestAlign;
+  }
+
+  // All pending members have now been allocated
+  PendingMembers.clear();
+
+  // This will be allocated by the last member of the aggregate
+  return true;
+}
+
+// Include the table generated calling convention implementations.
+#include "ARMGenCallingConv.inc"
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index 543165de38d0..615634551d90 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -1,292 +1,50 @@
 //=== ARMCallingConv.h - ARM Custom Calling Convention Routines -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the custom routines for the ARM Calling Convention that
-// aren't done by tablegen.
+// This file declares the entry points for ARM calling convention analysis.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_LIB_TARGET_ARM_ARMCALLINGCONV_H
 #define LLVM_LIB_TARGET_ARM_ARMCALLINGCONV_H
 
-#include "ARM.h"
-#include "ARMBaseInstrInfo.h"
-#include "ARMSubtarget.h"
 #include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/IR/CallingConv.h"
 
 namespace llvm {
 
-// APCS f64 is in register pairs, possibly split to stack
-static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                          CCValAssign::LocInfo &LocInfo,
-                          CCState &State, bool CanFail) {
-  static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
-
-  // Try to get the first register.
-  if (unsigned Reg = State.AllocateReg(RegList))
-    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
-  else {
-    // For the 2nd half of a v2f64, do not fail.
-    if (CanFail)
-      return false;
-
-    // Put the whole thing on the stack.
-    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
-                                           State.AllocateStack(8, 4),
-                                           LocVT, LocInfo));
-    return true;
-  }
-
-  // Try to get the second register.
-  if (unsigned Reg = State.AllocateReg(RegList))
-    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
-  else
-    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
-                                           State.AllocateStack(4, 4),
-                                           LocVT, LocInfo));
-  return true;
-}
-
-static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                   CCValAssign::LocInfo &LocInfo,
-                                   ISD::ArgFlagsTy &ArgFlags,
-                                   CCState &State) {
-  if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
-    return false;
-  if (LocVT == MVT::v2f64 &&
-      !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
-    return false;
-  return true;  // we handled it
-}
-
-// AAPCS f64 is in aligned register pairs
-static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                           CCValAssign::LocInfo &LocInfo,
-                           CCState &State, bool CanFail) {
-  static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
-  static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
-  static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 };
-  static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
-
-  unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList);
-  if (Reg == 0) {
-
-    // If we had R3 unallocated only, now we still must to waste it.
-    Reg = State.AllocateReg(GPRArgRegs);
-    assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64");
-
-    // For the 2nd half of a v2f64, do not just fail.
-    if (CanFail)
-      return false;
-
-    // Put the whole thing on the stack.
-    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
-                                           State.AllocateStack(8, 8),
-                                           LocVT, LocInfo));
-    return true;
-  }
-
-  unsigned i;
-  for (i = 0; i < 2; ++i)
-    if (HiRegList[i] == Reg)
-      break;
-
-  unsigned T = State.AllocateReg(LoRegList[i]);
-  (void)T;
-  assert(T == LoRegList[i] && "Could not allocate register");
-
-  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
-  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
-                                         LocVT, LocInfo));
-  return true;
-}
-
-static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                    CCValAssign::LocInfo &LocInfo,
-                                    ISD::ArgFlagsTy &ArgFlags,
-                                    CCState &State) {
-  if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
-    return false;
-  if (LocVT == MVT::v2f64 &&
-      !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
-    return false;
-  return true;  // we handled it
-}
-
-static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                         CCValAssign::LocInfo &LocInfo, CCState &State) {
-  static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
-  static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
-
-  unsigned Reg = State.AllocateReg(HiRegList, LoRegList);
-  if (Reg == 0)
-    return false; // we didn't handle it
-
-  unsigned i;
-  for (i = 0; i < 2; ++i)
-    if (HiRegList[i] == Reg)
-      break;
-
-  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
-  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
-                                         LocVT, LocInfo));
-  return true;
-}
-
-static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                      CCValAssign::LocInfo &LocInfo,
-                                      ISD::ArgFlagsTy &ArgFlags,
-                                      CCState &State) {
-  if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
-    return false;
-  if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
-    return false;
-  return true;  // we handled it
-}
-
-static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                       CCValAssign::LocInfo &LocInfo,
-                                       ISD::ArgFlagsTy &ArgFlags,
-                                       CCState &State) {
-  return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
-                                   State);
-}
-
-static const MCPhysReg RRegList[] = { ARM::R0,  ARM::R1,  ARM::R2,  ARM::R3 };
-
-static const MCPhysReg SRegList[] = { ARM::S0,  ARM::S1,  ARM::S2,  ARM::S3,
-                                      ARM::S4,  ARM::S5,  ARM::S6,  ARM::S7,
-                                      ARM::S8,  ARM::S9,  ARM::S10, ARM::S11,
-                                      ARM::S12, ARM::S13, ARM::S14,  ARM::S15 };
-static const MCPhysReg DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
-                                      ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
-static const MCPhysReg QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
-
-
-// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA
-// has InConsecutiveRegs set, and that the last member also has
-// InConsecutiveRegsLast set. We must process all members of the HA before
-// we can allocate it, as we need to know the total number of registers that
-// will be needed in order to (attempt to) allocate a contiguous block.
-static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
-                                          MVT &LocVT,
-                                          CCValAssign::LocInfo &LocInfo,
-                                          ISD::ArgFlagsTy &ArgFlags,
-                                          CCState &State) {
-  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
-
-  // AAPCS HFAs must have 1-4 elements, all of the same type
-  if (PendingMembers.size() > 0)
-    assert(PendingMembers[0].getLocVT() == LocVT);
-
-  // Add the argument to the list to be allocated once we know the size of the
-  // aggregate. Store the type's required alignmnent as extra info for later: in
-  // the [N x i64] case all trace has been removed by the time we actually get
-  // to do allocation.
-  PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo,
-                                                   ArgFlags.getOrigAlign()));
-
-  if (!ArgFlags.isInConsecutiveRegsLast())
-    return true;
-
-  // Try to allocate a contiguous block of registers, each of the correct
-  // size to hold one member.
-  auto &DL = State.getMachineFunction().getDataLayout();
-  unsigned StackAlign = DL.getStackAlignment();
-  unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign);
-
-  ArrayRef<MCPhysReg> RegList;
-  switch (LocVT.SimpleTy) {
-  case MVT::i32: {
-    RegList = RRegList;
-    unsigned RegIdx = State.getFirstUnallocated(RegList);
-
-    // First consume all registers that would give an unaligned object. Whether
-    // we go on stack or in regs, no-one will be using them in future.
-    unsigned RegAlign = alignTo(Align, 4) / 4;
-    while (RegIdx % RegAlign != 0 && RegIdx < RegList.size())
-      State.AllocateReg(RegList[RegIdx++]);
-
-    break;
-  }
-  case MVT::f16:
-  case MVT::f32:
-    RegList = SRegList;
-    break;
-  case MVT::v4f16:
-  case MVT::f64:
-    RegList = DRegList;
-    break;
-  case MVT::v8f16:
-  case MVT::v2f64:
-    RegList = QRegList;
-    break;
-  default:
-    llvm_unreachable("Unexpected member type for block aggregate");
-    break;
-  }
-
-  unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
-  if (RegResult) {
-    for (SmallVectorImpl<CCValAssign>::iterator It = PendingMembers.begin();
-         It != PendingMembers.end(); ++It) {
-      It->convertToReg(RegResult);
-      State.addLoc(*It);
-      ++RegResult;
-    }
-    PendingMembers.clear();
-    return true;
-  }
-
-  // Register allocation failed, we'll be needing the stack
-  unsigned Size = LocVT.getSizeInBits() / 8;
-  if (LocVT == MVT::i32 && State.getNextStackOffset() == 0) {
-    // If nothing else has used the stack until this point, a non-HFA aggregate
-    // can be split between regs and stack.
-    unsigned RegIdx = State.getFirstUnallocated(RegList);
-    for (auto &It : PendingMembers) {
-      if (RegIdx >= RegList.size())
-        It.convertToMem(State.AllocateStack(Size, Size));
-      else
-        It.convertToReg(State.AllocateReg(RegList[RegIdx++]));
-
-      State.addLoc(It);
-    }
-    PendingMembers.clear();
-    return true;
-  } else if (LocVT != MVT::i32)
-    RegList = SRegList;
-
-  // Mark all regs as unavailable (AAPCS rule C.2.vfp for VFP, C.6 for core)
-  for (auto Reg : RegList)
-    State.AllocateReg(Reg);
-
-  // After the first item has been allocated, the rest are packed as tightly as
-  // possible. (E.g. an incoming i64 would have starting Align of 8, but we'll
-  // be allocating a bunch of i32 slots).
-  unsigned RestAlign = std::min(Align, Size);
-
-  for (auto &It : PendingMembers) {
-    It.convertToMem(State.AllocateStack(Size, Align));
-    State.addLoc(It);
-    Align = RestAlign;
-  }
-
-  // All pending members have now been allocated
-  PendingMembers.clear();
-
-  // This will be allocated by the last member of the aggregate
-  return true;
-}
-
-} // End llvm namespace
+bool CC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+                  CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                  CCState &State);
+bool CC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT,
+                      CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                      CCState &State);
+bool CC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+                 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                 CCState &State);
+bool CC_ARM_APCS_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
+                     CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                     CCState &State);
+bool FastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+                     CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                     CCState &State);
+bool RetCC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+                     CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                     CCState &State);
+bool RetCC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT,
+                         CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                         CCState &State);
+bool RetCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+                    CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                    CCState &State);
+bool RetFastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+                        CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                        CCState &State);
+
+} // namespace llvm
 
 #endif
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index f173e423f3e4..61d2d83ddc40 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -1,9 +1,8 @@
 //===-- ARMCallingConv.td - Calling Conventions for ARM ----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This describes the calling conventions for ARM architecture.
@@ -16,6 +15,7 @@ class CCIfAlign<string Align, CCAction A>:
 //===----------------------------------------------------------------------===//
 // ARM APCS Calling Convention
 //===----------------------------------------------------------------------===//
+let Entry = 1 in
 def CC_ARM_APCS : CallingConv<[
 
   // Handles byval parameters.
@@ -30,8 +30,8 @@ def CC_ARM_APCS : CallingConv<[
   CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
 
   // Handle all vector types as either f64 or v2f64.
-  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
-  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+  CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
 
   // f64 and v2f64 are passed in adjacent GPRs, possibly split onto the stack
   CCIfType<[f64, v2f64], CCCustom<"CC_ARM_APCS_Custom_f64">>,
@@ -44,6 +44,7 @@ def CC_ARM_APCS : CallingConv<[
   CCIfType<[v2f64], CCAssignToStack<16, 4>>
 ]>;
 
+let Entry = 1 in
 def RetCC_ARM_APCS : CallingConv<[
   CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
   CCIfType<[f32], CCBitConvertToType<i32>>,
@@ -55,8 +56,8 @@ def RetCC_ARM_APCS : CallingConv<[
   CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
 
   // Handle all vector types as either f64 or v2f64.
-  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
-  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+  CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
 
   CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>,
 
@@ -67,10 +68,11 @@ def RetCC_ARM_APCS : CallingConv<[
 //===----------------------------------------------------------------------===//
 // ARM APCS Calling Convention for FastCC (when VFP2 or later is available)
 //===----------------------------------------------------------------------===//
+let Entry = 1 in
 def FastCC_ARM_APCS : CallingConv<[
   // Handle all vector types as either f64 or v2f64.
-  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
-  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+  CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
 
   CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
   CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
@@ -86,10 +88,11 @@ def FastCC_ARM_APCS : CallingConv<[
   CCDelegateTo<CC_ARM_APCS>
 ]>;
 
+let Entry = 1 in
 def RetFastCC_ARM_APCS : CallingConv<[
   // Handle all vector types as either f64 or v2f64.
-  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
-  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+  CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
 
   CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
   CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
@@ -102,10 +105,11 @@ def RetFastCC_ARM_APCS : CallingConv<[
 // ARM APCS Calling Convention for GHC
 //===----------------------------------------------------------------------===//
 
+let Entry = 1 in
 def CC_ARM_APCS_GHC : CallingConv<[
   // Handle all vector types as either f64 or v2f64.
-  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
-  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+  CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
 
   CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
   CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>,
@@ -152,6 +156,7 @@ def RetCC_ARM_AAPCS_Common : CallingConv<[
 // ARM AAPCS (EABI) Calling Convention
 //===----------------------------------------------------------------------===//
 
+let Entry = 1 in
 def CC_ARM_AAPCS : CallingConv<[
   // Handles byval parameters.
   CCIfByVal<CCPassByVal<4, 4>>,
@@ -160,8 +165,8 @@ def CC_ARM_AAPCS : CallingConv<[
   CCIfNest<CCAssignToReg<[R12]>>,
 
   // Handle all vector types as either f64 or v2f64.
-  CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
-  CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+  CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
 
   // Pass SwiftSelf in a callee saved register.
   CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
@@ -174,10 +179,11 @@ def CC_ARM_AAPCS : CallingConv<[
   CCDelegateTo<CC_ARM_AAPCS_Common>
 ]>;
 
+let Entry = 1 in
 def RetCC_ARM_AAPCS : CallingConv<[
   // Handle all vector types as either f64 or v2f64.
-  CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
-  CCIfType<[v2i64, v4i32, v8i16, v8f16,v16i8, v4f32], CCBitConvertToType<v2f64>>,
+  CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16,v16i8, v4f32], CCBitConvertToType<v2f64>>,
 
   // Pass SwiftSelf in a callee saved register.
   CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
@@ -196,13 +202,14 @@ def RetCC_ARM_AAPCS : CallingConv<[
 // Also used for FastCC (when VFP2 or later is available)
 //===----------------------------------------------------------------------===//
 
+let Entry = 1 in
 def CC_ARM_AAPCS_VFP : CallingConv<[
   // Handles byval parameters.
   CCIfByVal<CCPassByVal<4, 4>>,
 
   // Handle all vector types as either f64 or v2f64.
-  CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
-  CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+  CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
 
   // Pass SwiftSelf in a callee saved register.
   CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
@@ -220,10 +227,11 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
   CCDelegateTo<CC_ARM_AAPCS_Common>
 ]>;
 
+let Entry = 1 in
 def RetCC_ARM_AAPCS_VFP : CallingConv<[
   // Handle all vector types as either f64 or v2f64.
-  CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
-  CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+  CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
 
   // Pass SwiftSelf in a callee saved register.
   CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
diff --git a/lib/Target/ARM/ARMCodeGenPrepare.cpp b/lib/Target/ARM/ARMCodeGenPrepare.cpp
index b631c2bc687b..2fc5f4aaab50 100644
--- a/lib/Target/ARM/ARMCodeGenPrepare.cpp
+++ b/lib/Target/ARM/ARMCodeGenPrepare.cpp
@@ -1,9 +1,8 @@
 //===----- ARMCodeGenPrepare.cpp ------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -114,15 +113,20 @@ class IRPromoter {
   SmallPtrSet<Value*, 8> Promoted;
   Module *M = nullptr;
   LLVMContext &Ctx;
+  // The type we promote to: always i32
   IntegerType *ExtTy = nullptr;
+  // The type of the value that the search began from, either i8 or i16.
+  // This defines the max range of the values that we allow in the promoted
+  // tree.
   IntegerType *OrigTy = nullptr;
-  SmallPtrSetImpl<Value*> *Visited;
+  SetVector<Value*> *Visited;
   SmallPtrSetImpl<Value*> *Sources;
   SmallPtrSetImpl<Instruction*> *Sinks;
   SmallPtrSetImpl<Instruction*> *SafeToPromote;
+  SmallPtrSetImpl<Instruction*> *SafeWrap;
 
   void ReplaceAllUsersOfWith(Value *From, Value *To);
-  void PrepareConstants(void);
+  void PrepareWrappingAdds(void);
   void ExtendSources(void);
   void ConvertTruncs(void);
   void PromoteTree(void);
@@ -135,10 +139,11 @@ public:
 
 
   void Mutate(Type *OrigTy,
-              SmallPtrSetImpl<Value*> &Visited,
+              SetVector<Value*> &Visited,
               SmallPtrSetImpl<Value*> &Sources,
               SmallPtrSetImpl<Instruction*> &Sinks,
-              SmallPtrSetImpl<Instruction*> &SafeToPromote);
+              SmallPtrSetImpl<Instruction*> &SafeToPromote,
+              SmallPtrSetImpl<Instruction*> &SafeWrap);
 };
 
 class ARMCodeGenPrepare : public FunctionPass {
@@ -146,8 +151,9 @@ class ARMCodeGenPrepare : public FunctionPass {
   IRPromoter *Promoter = nullptr;
   std::set<Value*> AllVisited;
   SmallPtrSet<Instruction*, 8> SafeToPromote;
+  SmallPtrSet<Instruction*, 4> SafeWrap;
 
-  bool isSafeOverflow(Instruction *I);
+  bool isSafeWrap(Instruction *I);
   bool isSupportedValue(Value *V);
   bool isLegalToPromote(Value *V);
   bool TryToPromote(Value *V);
@@ -172,13 +178,17 @@ public:
 
 }
 
-static bool generateSignBits(Value *V) {
+static bool GenerateSignBits(Value *V) {
+  if (auto *Arg = dyn_cast<Argument>(V))
+    return Arg->hasSExtAttr();
+
   if (!isa<Instruction>(V))
     return false;
 
   unsigned Opc = cast<Instruction>(V)->getOpcode();
   return Opc == Instruction::AShr || Opc == Instruction::SDiv ||
-         Opc == Instruction::SRem;
+         Opc == Instruction::SRem || Opc == Instruction::SExt ||
+         Opc == Instruction::SIToFP;
 }
 
 static bool EqualTypeSize(Value *V) {
@@ -271,19 +281,14 @@ static bool isSink(Value *V) {
   return isa<CallInst>(V);
 }
 
-/// Return whether the instruction can be promoted within any modifications to
-/// its operands or result.
-bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) {
-  // FIXME Do we need NSW too?
-  if (isa<OverflowingBinaryOperator>(I) && I->hasNoUnsignedWrap())
-    return true;
-
-  // We can support a, potentially, overflowing instruction (I) if:
+/// Return whether this instruction can safely wrap.
+bool ARMCodeGenPrepare::isSafeWrap(Instruction *I) {
+  // We can support a, potentially, wrapping instruction (I) if:
   // - It is only used by an unsigned icmp.
   // - The icmp uses a constant.
-  // - The overflowing value (I) is decreasing, i.e would underflow - wrapping
+  // - The wrapping value (I) is decreasing, i.e would underflow - wrapping
   //   around zero to become a larger number than before.
-  // - The underflowing instruction (I) also uses a constant.
+  // - The wrapping instruction (I) also uses a constant.
   //
   // We can then use the two constants to calculate whether the result would
   // wrap in respect to itself in the original bitwidth. If it doesn't wrap,
@@ -327,7 +332,7 @@ bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) {
   // - (255 >= 254) == (0xFFFFFFFF >= 254) == true
   //
   // To demonstrate why we can't handle increasing values:
-  // 
+  //
   // %add = add i8 %a, 2
   // %cmp = icmp ult i8 %add, 127
   //
@@ -385,6 +390,7 @@ bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) {
     return false;
 
   LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n");
+  SafeWrap.insert(I);
   return true;
 }
 
@@ -408,13 +414,16 @@ static bool shouldPromote(Value *V) {
 /// Return whether we can safely mutate V's type to ExtTy without having to be
 /// concerned with zero extending or truncation.
 static bool isPromotedResultSafe(Value *V) {
+  if (GenerateSignBits(V))
+    return false;
+
   if (!isa<Instruction>(V))
     return true;
 
-  if (generateSignBits(V))
-    return false;
+  if (!isa<OverflowingBinaryOperator>(V))
+    return true;
 
-  return !isa<OverflowingBinaryOperator>(V);
+  return cast<Instruction>(V)->hasNoUnsignedWrap();
 }
 
 /// Return the intrinsic for the instruction that can perform the same
@@ -462,61 +471,34 @@ void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
       InstsToRemove.insert(I);
 }
 
-void IRPromoter::PrepareConstants() {
+void IRPromoter::PrepareWrappingAdds() {
+  LLVM_DEBUG(dbgs() << "ARM CGP: Prepare underflowing adds.\n");
   IRBuilder<> Builder{Ctx};
-  // First step is to prepare the instructions for mutation. Most constants
-  // just need to be zero extended into their new type, but complications arise
-  // because:
-  // - For nuw binary operators, negative immediates would need sign extending;
-  //   however, instead we'll change them to positive and zext them. We can do
-  //   this because:
-  //   > The operators that can wrap are: add, sub, mul and shl.
-  //   > shl interprets its second operand as unsigned and if the first operand
-  //     is an immediate, it will need zext to be nuw.
-  //   > I'm assuming mul has to interpret immediates as unsigned for nuw.
-  //   > Which leaves the nuw add and sub to be handled; as with shl, if an
-  //     immediate is used as operand 0, it will need zext to be nuw.
-  // - We also allow add and sub to safely overflow in certain circumstances
-  //   and only when the value (operand 0) is being decreased.
-  //
-  // For adds and subs, that are either nuw or safely wrap and use a negative
-  // immediate as operand 1, we create an equivalent instruction using a
-  // positive immediate. That positive immediate can then be zext along with
-  // all the other immediates later.
-  for (auto *V : *Visited) {
-    if (!isa<Instruction>(V))
-      continue;
-
-    auto *I = cast<Instruction>(V);
-    if (SafeToPromote->count(I)) {
-
-      if (!isa<OverflowingBinaryOperator>(I))
-        continue;
 
-      if (auto *Const = dyn_cast<ConstantInt>(I->getOperand(1))) {
-        if (!Const->isNegative())
-          break;
+  // For adds that safely wrap and use a negative immediate as operand 1, we
+  // create an equivalent instruction using a positive immediate.
+  // That positive immediate can then be zext along with all the other
+  // immediates later.
+  for (auto *I : *SafeWrap) {
+    if (I->getOpcode() != Instruction::Add)
+      continue;
 
-        unsigned Opc = I->getOpcode();
-        if (Opc != Instruction::Add && Opc != Instruction::Sub)
-          continue;
+    LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n");
+    assert((isa<ConstantInt>(I->getOperand(1)) &&
+            cast<ConstantInt>(I->getOperand(1))->isNegative()) &&
+           "Wrapping should have a negative immediate as the second operand");
 
-        LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n");
-        auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs());
-        Builder.SetInsertPoint(I);
-        Value *NewVal = Opc == Instruction::Sub ?
-          Builder.CreateAdd(I->getOperand(0), NewConst) :
-          Builder.CreateSub(I->getOperand(0), NewConst);
-        LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n");
-
-        if (auto *NewInst = dyn_cast<Instruction>(NewVal)) {
-          NewInst->copyIRFlags(I);
-          NewInsts.insert(NewInst);
-        }
-        InstsToRemove.insert(I);
-        I->replaceAllUsesWith(NewVal);
-      }
+    auto Const = cast<ConstantInt>(I->getOperand(1));
+    auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs());
+    Builder.SetInsertPoint(I);
+    Value *NewVal = Builder.CreateSub(I->getOperand(0), NewConst);
+    if (auto *NewInst = dyn_cast<Instruction>(NewVal)) {
+      NewInst->copyIRFlags(I);
+      NewInsts.insert(NewInst);
     }
+    InstsToRemove.insert(I);
+    I->replaceAllUsesWith(NewVal);
+    LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n");
   }
   for (auto *I : NewInsts)
     Visited->insert(I);
@@ -605,7 +587,7 @@ void IRPromoter::PromoteTree() {
 
     if (!shouldPromote(I) || SafeToPromote->count(I) || NewInsts.count(I))
       continue;
-  
+
     assert(EnableDSP && "DSP intrinisc insertion not enabled!");
 
     // Replace unsafe instructions with appropriate intrinsic calls.
@@ -683,13 +665,14 @@ void IRPromoter::TruncateSinks() {
 }
 
 void IRPromoter::Cleanup() {
+  LLVM_DEBUG(dbgs() << "ARM CGP: Cleanup..\n");
   // Some zexts will now have become redundant, along with their trunc
   // operands, so remove them
   for (auto V : *Visited) {
-    if (!isa<CastInst>(V))
+    if (!isa<ZExtInst>(V))
       continue;
 
-    auto ZExt = cast<CastInst>(V);
+    auto ZExt = cast<ZExtInst>(V);
     if (ZExt->getDestTy() != ExtTy)
       continue;
 
@@ -701,9 +684,11 @@ void IRPromoter::Cleanup() {
       continue;
     }
 
-    // For any truncs that we insert to handle zexts, we can replace the
-    // result of the zext with the input to the trunc.
-    if (NewInsts.count(Src) && isa<ZExtInst>(V) && isa<TruncInst>(Src)) {
+    // Unless they produce a value that is narrower than ExtTy, we can
+    // replace the result of the zext with the input of a newly inserted
+    // trunc.
+    if (NewInsts.count(Src) && isa<TruncInst>(Src) &&
+        Src->getType() == OrigTy) {
       auto *Trunc = cast<TruncInst>(Src);
       assert(Trunc->getOperand(0)->getType() == ExtTy &&
              "expected inserted trunc to be operating on i32");
@@ -721,9 +706,12 @@ void IRPromoter::Cleanup() {
   NewInsts.clear();
   TruncTysMap.clear();
   Promoted.clear();
+  SafeToPromote->clear();
+  SafeWrap->clear();
 }
 
 void IRPromoter::ConvertTruncs() {
+  LLVM_DEBUG(dbgs() << "ARM CGP: Converting truncs..\n");
   IRBuilder<> Builder{Ctx};
 
   for (auto *V : *Visited) {
@@ -731,12 +719,13 @@ void IRPromoter::ConvertTruncs() {
       continue;
 
     auto *Trunc = cast<TruncInst>(V);
-    assert(LessThanTypeSize(Trunc) && "expected narrow trunc");
-
     Builder.SetInsertPoint(Trunc);
-    unsigned NumBits =
-      cast<IntegerType>(Trunc->getType())->getScalarSizeInBits();
-    ConstantInt *Mask = ConstantInt::get(Ctx, APInt::getMaxValue(NumBits));
+    IntegerType *SrcTy = cast<IntegerType>(Trunc->getOperand(0)->getType());
+    IntegerType *DestTy = cast<IntegerType>(TruncTysMap[Trunc][0]);
+
+    unsigned NumBits = DestTy->getScalarSizeInBits();
+    ConstantInt *Mask =
+      ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue());
     Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask);
 
     if (auto *I = dyn_cast<Instruction>(Masked))
@@ -747,10 +736,11 @@ void IRPromoter::ConvertTruncs() {
 }
 
 void IRPromoter::Mutate(Type *OrigTy,
-                        SmallPtrSetImpl<Value*> &Visited,
+                        SetVector<Value*> &Visited,
                         SmallPtrSetImpl<Value*> &Sources,
                         SmallPtrSetImpl<Instruction*> &Sinks,
-                        SmallPtrSetImpl<Instruction*> &SafeToPromote) {
+                        SmallPtrSetImpl<Instruction*> &SafeToPromote,
+                        SmallPtrSetImpl<Instruction*> &SafeWrap) {
   LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from "
              << ARMCodeGenPrepare::TypeSize << " to 32-bits\n");
 
@@ -763,6 +753,7 @@ void IRPromoter::Mutate(Type *OrigTy,
   this->Sources = &Sources;
   this->Sinks = &Sinks;
   this->SafeToPromote = &SafeToPromote;
+  this->SafeWrap = &SafeWrap;
 
   // Cache original types of the values that will likely need truncating
   for (auto *I : Sinks) {
@@ -778,22 +769,28 @@ void IRPromoter::Mutate(Type *OrigTy,
         TruncTysMap[I].push_back(I->getOperand(i)->getType());
     }
   }
+  for (auto *V : Visited) {
+    if (!isa<TruncInst>(V) || Sources.count(V))
+      continue;
+    auto *Trunc = cast<TruncInst>(V);
+    TruncTysMap[Trunc].push_back(Trunc->getDestTy());
+  }
 
-  // Convert adds and subs using negative immediates to equivalent instructions
-  // that use positive constants.
-  PrepareConstants();
+  // Convert adds using negative immediates to equivalent instructions that use
+  // positive constants.
+  PrepareWrappingAdds();
 
   // Insert zext instructions between sources and their users.
   ExtendSources();
 
-  // Convert any truncs, that aren't sources, into AND masks.
-  ConvertTruncs();
-
   // Promote visited instructions, mutating their types in place. Also insert
   // DSP intrinsics, if enabled, for adds and subs which would be unsafe to
   // promote.
   PromoteTree();
 
+  // Convert any truncs, that aren't sources, into AND masks.
+  ConvertTruncs();
+
   // Insert trunc instructions for use by calls, stores etc...
   TruncateSinks();
 
@@ -819,6 +816,11 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
     return EqualTypeSize(I->getOperand(0));
   }
 
+  if (GenerateSignBits(V)) {
+    LLVM_DEBUG(dbgs() << "ARM CGP: No, instruction can generate sign bits.\n");
+    return false;
+  }
+
   // Memory instructions
   if (isa<StoreInst>(V) || isa<GetElementPtrInst>(V))
     return true;
@@ -835,9 +837,6 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
       isa<LoadInst>(V))
     return isSupportedType(V);
 
-  if (isa<SExtInst>(V))
-    return false;
-
   if (auto *Cast = dyn_cast<CastInst>(V))
     return isSupportedType(Cast) || isSupportedType(Cast->getOperand(0));
 
@@ -854,10 +853,6 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
   if (!isSupportedType(V))
     return false;
 
-  if (generateSignBits(V)) {
-    LLVM_DEBUG(dbgs() << "ARM CGP: No, instruction can generate sign bits.\n");
-    return false;
-  }
   return true;
 }
 
@@ -873,7 +868,7 @@ bool ARMCodeGenPrepare::isLegalToPromote(Value *V) {
   if (SafeToPromote.count(I))
    return true;
 
-  if (isPromotedResultSafe(V) || isSafeOverflow(I)) {
+  if (isPromotedResultSafe(V) || isSafeWrap(I)) {
     SafeToPromote.insert(I);
     return true;
   }
@@ -911,6 +906,7 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
     return false;
 
   SafeToPromote.clear();
+  SafeWrap.clear();
 
   if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V))
     return false;
@@ -921,7 +917,7 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
   SetVector<Value*> WorkList;
   SmallPtrSet<Value*, 8> Sources;
   SmallPtrSet<Instruction*, 4> Sinks;
-  SmallPtrSet<Value*, 16> CurrentVisited;
+  SetVector<Value*> CurrentVisited;
   WorkList.insert(V);
 
   // Return true if V was added to the worklist as a supported instruction,
@@ -1009,7 +1005,8 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
   if (ToPromote < 2)
     return false;
 
-  Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote);
+  Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote,
+                   SafeWrap);
   return true;
 }
 
diff --git a/lib/Target/ARM/ARMComputeBlockSize.cpp b/lib/Target/ARM/ARMComputeBlockSize.cpp
deleted file mode 100644
index b263e9d86c42..000000000000
--- a/lib/Target/ARM/ARMComputeBlockSize.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-//===--- ARMComputeBlockSize.cpp - Compute machine block sizes ------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM.h"
-#include "ARMBaseInstrInfo.h"
-#include "ARMBasicBlockInfo.h"
-#include "ARMMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include <vector>
-
-using namespace llvm;
-
-namespace llvm {
-
-// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions
-// below may shrink MI.
-static bool
-mayOptimizeThumb2Instruction(const MachineInstr *MI) {
-  switch(MI->getOpcode()) {
-    // optimizeThumb2Instructions.
-    case ARM::t2LEApcrel:
-    case ARM::t2LDRpci:
-    // optimizeThumb2Branches.
-    case ARM::t2B:
-    case ARM::t2Bcc:
-    case ARM::tBcc:
-    // optimizeThumb2JumpTables.
-    case ARM::t2BR_JT:
-    case ARM::tBR_JTr:
-      return true;
-  }
-  return false;
-}
-
-void computeBlockSize(MachineFunction *MF, MachineBasicBlock *MBB,
-                      BasicBlockInfo &BBI) {
-  const ARMBaseInstrInfo *TII =
-    static_cast<const ARMBaseInstrInfo *>(MF->getSubtarget().getInstrInfo());
-  bool isThumb = MF->getInfo<ARMFunctionInfo>()->isThumbFunction();
-  BBI.Size = 0;
-  BBI.Unalign = 0;
-  BBI.PostAlign = 0;
-
-  for (MachineInstr &I : *MBB) {
-    BBI.Size += TII->getInstSizeInBytes(I);
-    // For inline asm, getInstSizeInBytes returns a conservative estimate.
-    // The actual size may be smaller, but still a multiple of the instr size.
-    if (I.isInlineAsm())
-      BBI.Unalign = isThumb ? 1 : 2;
-    // Also consider instructions that may be shrunk later.
-    else if (isThumb && mayOptimizeThumb2Instruction(&I))
-      BBI.Unalign = 1;
-  }
-
-  // tBR_JTr contains a .align 2 directive.
-  if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
-    BBI.PostAlign = 2;
-    MBB->getParent()->ensureAlignment(2);
-  }
-}
-
-std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF) {
-  std::vector<BasicBlockInfo> BBInfo;
-  BBInfo.resize(MF->getNumBlockIDs());
-
-  for (MachineBasicBlock &MBB : *MF)
-    computeBlockSize(MF, &MBB, BBInfo[MBB.getNumber()]);
-
-  return BBInfo;
-}
-
-} // end namespace llvm
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 5e97c4cb35e3..60e5d7bf6098 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -1,9 +1,8 @@
 //===- ARMConstantIslandPass.cpp - ARM constant islands -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -98,7 +97,7 @@ namespace {
   ///   CPE     - A constant pool entry that has been placed somewhere, which
   ///             tracks a list of users.
   class ARMConstantIslands : public MachineFunctionPass {
-    std::vector<BasicBlockInfo> BBInfo;
+    std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
 
     /// WaterList - A sorted list of basic blocks where islands could be placed
     /// (i.e. blocks that don't fall through to the following block, due
@@ -244,7 +243,6 @@ namespace {
     void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs);
     MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
     void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
-    void adjustBBOffsetsAfter(MachineBasicBlock *BB);
     bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
     unsigned getCombinedIndex(const MachineInstr *CPEMI);
     int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
@@ -260,7 +258,6 @@ namespace {
                           bool DoDump = false);
     bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water,
                         CPUser &U, unsigned &Growth);
-    bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
     bool fixupImmediateBr(ImmBranch &Br);
     bool fixupConditionalBr(ImmBranch &Br);
     bool fixupUnconditionalBr(ImmBranch &Br);
@@ -275,7 +272,6 @@ namespace {
     MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
                                                   MachineBasicBlock *JTBB);
 
-    unsigned getOffsetOf(MachineInstr *MI) const;
     unsigned getUserOffset(CPUser&) const;
     void dumpBBs();
     void verify();
@@ -296,9 +292,10 @@ char ARMConstantIslands::ID = 0;
 /// verify - check BBOffsets, BBSizes, alignment of islands
 void ARMConstantIslands::verify() {
 #ifndef NDEBUG
+  BBInfoVector &BBInfo = BBUtils->getBBInfo();
   assert(std::is_sorted(MF->begin(), MF->end(),
-                        [this](const MachineBasicBlock &LHS,
-                               const MachineBasicBlock &RHS) {
+                        [&BBInfo](const MachineBasicBlock &LHS,
+                                  const MachineBasicBlock &RHS) {
                           return BBInfo[LHS.getNumber()].postOffset() <
                                  BBInfo[RHS.getNumber()].postOffset();
                         }));
@@ -324,6 +321,7 @@ void ARMConstantIslands::verify() {
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 /// print block size and offset information - debugging
 LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
+  BBInfoVector &BBInfo = BBUtils->getBBInfo();
   LLVM_DEBUG({
     for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
       const BasicBlockInfo &BBI = BBInfo[J];
@@ -340,6 +338,7 @@ LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
 bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
   MCP = mf.getConstantPool();
+  BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(mf));
 
   LLVM_DEBUG(dbgs() << "***** ARMConstantIslands: "
                     << MCP->getConstants().size() << " CP entries, aligned to "
@@ -467,7 +466,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
 
   LLVM_DEBUG(dbgs() << '\n'; dumpBBs());
 
-  BBInfo.clear();
+  BBUtils->clear();
   WaterList.clear();
   CPUsers.clear();
   CPEntries.clear();
@@ -684,14 +683,14 @@ void ARMConstantIslands::scanFunctionJumpTables() {
 void ARMConstantIslands::
 initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
 
-  BBInfo = computeAllBlockSizes(MF);
-
+  BBUtils->computeAllBlockSizes();
+  BBInfoVector &BBInfo = BBUtils->getBBInfo();
   // The known bits of the entry block offset are determined by the function
   // alignment.
   BBInfo.front().KnownBits = MF->getAlignment();
 
   // Compute block offsets and known bits.
-  adjustBBOffsetsAfter(&MF->front());
+  BBUtils->adjustBBOffsetsAfter(&MF->front());
 
   // Now go back through the instructions and build up our data structures.
   for (MachineBasicBlock &MBB : *MF) {
@@ -856,25 +855,6 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
   }
 }
 
-/// getOffsetOf - Return the current offset of the specified machine instruction
-/// from the start of the function.  This offset changes as stuff is moved
-/// around inside the function.
-unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const {
-  MachineBasicBlock *MBB = MI->getParent();
-
-  // The offset is composed of two things: the sum of the sizes of all MBB's
-  // before this instruction's block, and the offset from the start of the block
-  // it is in.
-  unsigned Offset = BBInfo[MBB->getNumber()].Offset;
-
-  // Sum instructions before MI in MBB.
-  for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
-    assert(I != MBB->end() && "Didn't find MI in its own basic block?");
-    Offset += TII->getInstSizeInBytes(*I);
-  }
-  return Offset;
-}
-
 /// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
 /// ID.
 static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
@@ -891,13 +871,11 @@ void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
 
   // Insert an entry into BBInfo to align it properly with the (newly
   // renumbered) block numbers.
-  BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+  BBUtils->insert(NewBB->getNumber(), BasicBlockInfo());
 
   // Next, update WaterList.  Specifically, we need to add NewMBB as having
   // available water after it.
-  water_iterator IP =
-    std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
-                     CompareMBBNumbers);
+  water_iterator IP = llvm::lower_bound(WaterList, NewBB, CompareMBBNumbers);
   WaterList.insert(IP, NewBB);
 }
 
@@ -942,15 +920,13 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
 
   // Insert an entry into BBInfo to align it properly with the (newly
   // renumbered) block numbers.
-  BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+  BBUtils->insert(NewBB->getNumber(), BasicBlockInfo());
 
   // Next, update WaterList.  Specifically, we need to add OrigMBB as having
   // available water after it (but not if it's already there, which happens
   // when splitting before a conditional branch that is followed by an
   // unconditional branch - in that case we want to insert NewBB).
-  water_iterator IP =
-    std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
-                     CompareMBBNumbers);
+  water_iterator IP = llvm::lower_bound(WaterList, OrigBB, CompareMBBNumbers);
   MachineBasicBlock* WaterBB = *IP;
   if (WaterBB == OrigBB)
     WaterList.insert(std::next(IP), NewBB);
@@ -963,14 +939,14 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
   // the new jump we added.  (It should be possible to do this without
   // recounting everything, but it's very confusing, and this is rarely
   // executed.)
-  computeBlockSize(MF, OrigBB, BBInfo[OrigBB->getNumber()]);
+  BBUtils->computeBlockSize(OrigBB);
 
   // Figure out how large the NewMBB is.  As the second half of the original
   // block, it may contain a tablejump.
-  computeBlockSize(MF, NewBB, BBInfo[NewBB->getNumber()]);
+  BBUtils->computeBlockSize(NewBB);
 
   // All BBOffsets following these blocks must be modified.
-  adjustBBOffsetsAfter(OrigBB);
+  BBUtils->adjustBBOffsetsAfter(OrigBB);
 
   return NewBB;
 }
@@ -979,7 +955,9 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
 /// displacement computation.  Update U.KnownAlignment to match its current
 /// basic block location.
 unsigned ARMConstantIslands::getUserOffset(CPUser &U) const {
-  unsigned UserOffset = getOffsetOf(U.MI);
+  unsigned UserOffset = BBUtils->getOffsetOf(U.MI);
+
+  SmallVectorImpl<BasicBlockInfo> &BBInfo = BBUtils->getBBInfo();
   const BasicBlockInfo &BBI = BBInfo[U.MI->getParent()->getNumber()];
   unsigned KnownBits = BBI.internalKnownBits();
 
@@ -1028,6 +1006,7 @@ bool ARMConstantIslands::isOffsetInRange(unsigned UserOffset,
 bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
                                         MachineBasicBlock* Water, CPUser &U,
                                         unsigned &Growth) {
+  BBInfoVector &BBInfo = BBUtils->getBBInfo();
   unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
   unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
   unsigned NextBlockOffset, NextBlockAlignment;
@@ -1068,10 +1047,11 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
 bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
                                       MachineInstr *CPEMI, unsigned MaxDisp,
                                       bool NegOk, bool DoDump) {
-  unsigned CPEOffset  = getOffsetOf(CPEMI);
+  unsigned CPEOffset = BBUtils->getOffsetOf(CPEMI);
 
   if (DoDump) {
     LLVM_DEBUG({
+        BBInfoVector &BBInfo = BBUtils->getBBInfo();
       unsigned Block = MI->getParent()->getNumber();
       const BasicBlockInfo &BBI = BBInfo[Block];
       dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
@@ -1104,28 +1084,6 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
 }
 #endif // NDEBUG
 
-void ARMConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
-  unsigned BBNum = BB->getNumber();
-  for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
-    // Get the offset and known bits at the end of the layout predecessor.
-    // Include the alignment of the current block.
-    unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
-    unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
-    unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
-
-    // This is where block i begins.  Stop if the offset is already correct,
-    // and we have updated 2 blocks.  This is the maximum number of blocks
-    // changed before calling this function.
-    if (i > BBNum + 2 &&
-        BBInfo[i].Offset == Offset &&
-        BBInfo[i].KnownBits == KnownBits)
-      break;
-
-    BBInfo[i].Offset = Offset;
-    BBInfo[i].KnownBits = KnownBits;
-  }
-}
-
 /// decrementCPEReferenceCount - find the constant pool entry with index CPI
 /// and instruction CPEMI, and decrement its refcount.  If the refcount
 /// becomes 0 remove the entry and instruction.  Returns true if we removed
@@ -1241,6 +1199,7 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
   // When a CP access is out of range, BB0 may be used as water. However,
   // inserting islands between BB0 and BB1 makes other accesses out of range.
   MachineBasicBlock *UserBB = U.MI->getParent();
+  BBInfoVector &BBInfo = BBUtils->getBBInfo();
   unsigned MinNoSplitDisp =
       BBInfo[UserBB->getNumber()].postOffset(getCPELogAlign(U.CPEMI));
   if (CloserWater && MinNoSplitDisp > U.getMaxDisp() / 2)
@@ -1297,6 +1256,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
   MachineInstr *CPEMI  = U.CPEMI;
   unsigned CPELogAlign = getCPELogAlign(CPEMI);
   MachineBasicBlock *UserMBB = UserMI->getParent();
+  BBInfoVector &BBInfo = BBUtils->getBBInfo();
   const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
 
   // If the block does not end in an unconditional branch already, and if the
@@ -1328,8 +1288,8 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
       unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
       ImmBranches.push_back(ImmBranch(&UserMBB->back(),
                                       MaxDisp, false, UncondBr));
-      computeBlockSize(MF, UserMBB, BBInfo[UserMBB->getNumber()]);
-      adjustBBOffsetsAfter(UserMBB);
+      BBUtils->computeBlockSize(UserMBB);
+      BBUtils->adjustBBOffsetsAfter(UserMBB);
       return;
     }
   }
@@ -1538,8 +1498,8 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
   NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
 
   // Increase the size of the island block to account for the new entry.
-  BBInfo[NewIsland->getNumber()].Size += Size;
-  adjustBBOffsetsAfter(&*--NewIsland->getIterator());
+  BBUtils->adjustBBSize(NewIsland, Size);
+  BBUtils->adjustBBOffsetsAfter(&*--NewIsland->getIterator());
 
   // Finally, change the CPI in the instruction operand to be ID.
   for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
@@ -1550,7 +1510,8 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
 
   LLVM_DEBUG(
       dbgs() << "  Moved CPE to #" << ID << " CPI=" << CPI
-             << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset));
+             << format(" offset=%#x\n",
+                       BBUtils->getBBInfo()[NewIsland->getNumber()].Offset));
 
   return true;
 }
@@ -1561,7 +1522,8 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
   MachineBasicBlock *CPEBB = CPEMI->getParent();
   unsigned Size = CPEMI->getOperand(2).getImm();
   CPEMI->eraseFromParent();
-  BBInfo[CPEBB->getNumber()].Size -= Size;
+  BBInfoVector &BBInfo = BBUtils->getBBInfo();
+  BBUtils->adjustBBSize(CPEBB, -Size);
   // All succeeding offsets have the current size value added in, fix this.
   if (CPEBB->empty()) {
     BBInfo[CPEBB->getNumber()].Size = 0;
@@ -1572,7 +1534,7 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
     // Entries are sorted by descending alignment, so realign from the front.
     CPEBB->setAlignment(getCPELogAlign(&*CPEBB->begin()));
 
-  adjustBBOffsetsAfter(CPEBB);
+  BBUtils->adjustBBOffsetsAfter(CPEBB);
   // An island has only one predecessor BB and one successor BB. Check if
   // this BB's predecessor jumps directly to this BB's successor. This
   // shouldn't happen currently.
@@ -1597,30 +1559,6 @@ bool ARMConstantIslands::removeUnusedCPEntries() {
   return MadeChange;
 }
 
-/// isBBInRange - Returns true if the distance between specific MI and
-/// specific BB can fit in MI's displacement field.
-bool ARMConstantIslands::isBBInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
-                                     unsigned MaxDisp) {
-  unsigned PCAdj      = isThumb ? 4 : 8;
-  unsigned BrOffset   = getOffsetOf(MI) + PCAdj;
-  unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
-
-  LLVM_DEBUG(dbgs() << "Branch of destination " << printMBBReference(*DestBB)
-                    << " from " << printMBBReference(*MI->getParent())
-                    << " max delta=" << MaxDisp << " from " << getOffsetOf(MI)
-                    << " to " << DestOffset << " offset "
-                    << int(DestOffset - BrOffset) << "\t" << *MI);
-
-  if (BrOffset <= DestOffset) {
-    // Branch before the Dest.
-    if (DestOffset-BrOffset <= MaxDisp)
-      return true;
-  } else {
-    if (BrOffset-DestOffset <= MaxDisp)
-      return true;
-  }
-  return false;
-}
 
 /// fixupImmediateBr - Fix up an immediate branch whose destination is too far
 /// away to fit in its displacement field.
@@ -1629,7 +1567,7 @@ bool ARMConstantIslands::fixupImmediateBr(ImmBranch &Br) {
   MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
 
   // Check to see if the DestBB is already in-range.
-  if (isBBInRange(MI, DestBB, Br.MaxDisp))
+  if (BBUtils->isBBInRange(MI, DestBB, Br.MaxDisp))
     return false;
 
   if (!Br.isCond)
@@ -1648,11 +1586,15 @@ ARMConstantIslands::fixupUnconditionalBr(ImmBranch &Br) {
   if (!isThumb1)
     llvm_unreachable("fixupUnconditionalBr is Thumb1 only!");
 
+  if (!AFI->isLRSpilled())
+    report_fatal_error("underestimated function size");
+
   // Use BL to implement far jump.
   Br.MaxDisp = (1 << 21) * 2;
   MI->setDesc(TII->get(ARM::tBfar));
+  BBInfoVector &BBInfo = BBUtils->getBBInfo();
   BBInfo[MBB->getNumber()].Size += 2;
-  adjustBBOffsetsAfter(MBB);
+  BBUtils->adjustBBOffsetsAfter(MBB);
   HasFarJump = true;
   ++NumUBrFixed;
 
@@ -1699,7 +1641,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
       // bne L2
       // b   L1
       MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
-      if (isBBInRange(MI, NewDest, Br.MaxDisp)) {
+      if (BBUtils->isBBInRange(MI, NewDest, Br.MaxDisp)) {
         LLVM_DEBUG(
             dbgs() << "  Invert Bcc condition and swap its destination with "
                    << *BMI);
@@ -1716,7 +1658,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
     // No need for the branch to the next block. We're adding an unconditional
     // branch to the destination.
     int delta = TII->getInstSizeInBytes(MBB->back());
-    BBInfo[MBB->getNumber()].Size -= delta;
+    BBUtils->adjustBBSize(MBB, -delta);
     MBB->back().eraseFromParent();
 
     // The conditional successor will be swapped between the BBs after this, so
@@ -1737,21 +1679,21 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
   BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
     .addMBB(NextBB).addImm(CC).addReg(CCReg);
   Br.MI = &MBB->back();
-  BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
+  BBUtils->adjustBBSize(MBB, TII->getInstSizeInBytes(MBB->back()));
   if (isThumb)
     BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr))
         .addMBB(DestBB)
         .add(predOps(ARMCC::AL));
   else
     BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
-  BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
+  BBUtils->adjustBBSize(MBB, TII->getInstSizeInBytes(MBB->back()));
   unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
   ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
 
   // Remove the old conditional branch.  It may or may not still be in MBB.
-  BBInfo[MI->getParent()->getNumber()].Size -= TII->getInstSizeInBytes(*MI);
+  BBUtils->adjustBBSize(MI->getParent(), -TII->getInstSizeInBytes(*MI));
   MI->eraseFromParent();
-  adjustBBOffsetsAfter(MBB);
+  BBUtils->adjustBBOffsetsAfter(MBB);
   return true;
 }
 
@@ -1826,8 +1768,8 @@ bool ARMConstantIslands::optimizeThumb2Instructions() {
       LLVM_DEBUG(dbgs() << "Shrink: " << *U.MI);
       U.MI->setDesc(TII->get(NewOpc));
       MachineBasicBlock *MBB = U.MI->getParent();
-      BBInfo[MBB->getNumber()].Size -= 2;
-      adjustBBOffsetsAfter(MBB);
+      BBUtils->adjustBBSize(MBB, -2);
+      BBUtils->adjustBBOffsetsAfter(MBB);
       ++NumT2CPShrunk;
       MadeChange = true;
     }
@@ -1866,12 +1808,12 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
     if (NewOpc) {
       unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
       MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
-      if (isBBInRange(Br.MI, DestBB, MaxOffs)) {
+      if (BBUtils->isBBInRange(Br.MI, DestBB, MaxOffs)) {
         LLVM_DEBUG(dbgs() << "Shrink branch: " << *Br.MI);
         Br.MI->setDesc(TII->get(NewOpc));
         MachineBasicBlock *MBB = Br.MI->getParent();
-        BBInfo[MBB->getNumber()].Size -= 2;
-        adjustBBOffsetsAfter(MBB);
+        BBUtils->adjustBBSize(MBB, -2);
+        BBUtils->adjustBBOffsetsAfter(MBB);
         ++NumT2BrShrunk;
         MadeChange = true;
       }
@@ -1898,34 +1840,47 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
     MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
     // Check if the distance is within 126. Subtract starting offset by 2
     // because the cmp will be eliminated.
-    unsigned BrOffset = getOffsetOf(Br.MI) + 4 - 2;
+    unsigned BrOffset = BBUtils->getOffsetOf(Br.MI) + 4 - 2;
+    BBInfoVector &BBInfo = BBUtils->getBBInfo();
     unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
-    if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
-      MachineBasicBlock::iterator CmpMI = Br.MI;
-      if (CmpMI != Br.MI->getParent()->begin()) {
-        --CmpMI;
-        if (CmpMI->getOpcode() == ARM::tCMPi8) {
-          unsigned Reg = CmpMI->getOperand(0).getReg();
-          Pred = getInstrPredicate(*CmpMI, PredReg);
-          if (Pred == ARMCC::AL &&
-              CmpMI->getOperand(1).getImm() == 0 &&
-              isARMLowRegister(Reg)) {
-            MachineBasicBlock *MBB = Br.MI->getParent();
-            LLVM_DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI);
-            MachineInstr *NewBR =
-              BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
-              .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags());
-            CmpMI->eraseFromParent();
-            Br.MI->eraseFromParent();
-            Br.MI = NewBR;
-            BBInfo[MBB->getNumber()].Size -= 2;
-            adjustBBOffsetsAfter(MBB);
-            ++NumCBZ;
-            MadeChange = true;
-          }
-        }
+    if (BrOffset >= DestOffset || (DestOffset - BrOffset) > 126)
+      continue;
+
+    // Search backwards to find a tCMPi8
+    auto *TRI = STI->getRegisterInfo();
+    MachineInstr *CmpMI = findCMPToFoldIntoCBZ(Br.MI, TRI);
+    if (!CmpMI || CmpMI->getOpcode() != ARM::tCMPi8)
+      continue;
+
+    unsigned Reg = CmpMI->getOperand(0).getReg();
+
+    // Check for Kill flags on Reg. If they are present remove them and set kill
+    // on the new CBZ.
+    MachineBasicBlock::iterator KillMI = Br.MI;
+    bool RegKilled = false;
+    do {
+      --KillMI;
+      if (KillMI->killsRegister(Reg, TRI)) {
+        KillMI->clearRegisterKills(Reg, TRI);
+        RegKilled = true;
+        break;
       }
-    }
+    } while (KillMI != CmpMI);
+
+    // Create the new CBZ/CBNZ
+    MachineBasicBlock *MBB = Br.MI->getParent();
+    LLVM_DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI);
+    MachineInstr *NewBR =
+        BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(NewOpc))
+            .addReg(Reg, getKillRegState(RegKilled))
+            .addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags());
+    CmpMI->eraseFromParent();
+    Br.MI->eraseFromParent();
+    Br.MI = NewBR;
+    BBInfo[MBB->getNumber()].Size -= 2;
+    BBUtils->adjustBBOffsetsAfter(MBB);
+    ++NumCBZ;
+    MadeChange = true;
   }
 
   return MadeChange;
@@ -2085,16 +2040,6 @@ static void RemoveDeadAddBetweenLEAAndJT(MachineInstr *LEAMI,
   DeadSize += 4;
 }
 
-static bool registerDefinedBetween(unsigned Reg,
-                                   MachineBasicBlock::iterator From,
-                                   MachineBasicBlock::iterator To,
-                                   const TargetRegisterInfo *TRI) {
-  for (auto I = From; I != To; ++I)
-    if (I->modifiesRegister(Reg, TRI))
-      return true;
-  return false;
-}
-
 /// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
 /// jumptables when it's possible.
 bool ARMConstantIslands::optimizeThumb2JumpTables() {
@@ -2117,8 +2062,9 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
 
     bool ByteOk = true;
     bool HalfWordOk = true;
-    unsigned JTOffset = getOffsetOf(MI) + 4;
+    unsigned JTOffset = BBUtils->getOffsetOf(MI) + 4;
     const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+    BBInfoVector &BBInfo = BBUtils->getBBInfo();
     for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
       MachineBasicBlock *MBB = JTBBs[j];
       unsigned DstOffset = BBInfo[MBB->getNumber()].Offset;
@@ -2281,7 +2227,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
 
     int Delta = OrigSize - NewSize + DeadSize;
     BBInfo[MBB->getNumber()].Size -= Delta;
-    adjustBBOffsetsAfter(MBB);
+    BBUtils->adjustBBOffsetsAfter(MBB);
 
     ++NumTBs;
     MadeChange = true;
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index 236c4fab2a5c..3bdb0e1ef62d 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -1,9 +1,8 @@
 //===- ARMConstantPoolValue.cpp - ARM constantpool value ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 55194ed94532..660b7fc88d82 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -1,9 +1,8 @@
 //===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index eecd0a10dc7d..b32ba3eeea18 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1,9 +1,8 @@
 //===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -24,6 +23,7 @@
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Debug.h"
 
 using namespace llvm;
 
@@ -423,8 +423,7 @@ static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
   }
 #endif
 
-  auto I = std::lower_bound(std::begin(NEONLdStTable),
-                            std::end(NEONLdStTable), Opcode);
+  auto I = llvm::lower_bound(NEONLdStTable, Opcode);
   if (I != std::end(NEONLdStTable) && I->PseudoOpc == Opcode)
     return I;
   return nullptr;
@@ -470,6 +469,7 @@ static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
 void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
   MachineInstr &MI = *MBBI;
   MachineBasicBlock &MBB = *MI.getParent();
+  LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
 
   const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
   assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
@@ -571,8 +571,8 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
 
   // Transfer memoperands.
   MIB.cloneMemRefs(MI);
-
   MI.eraseFromParent();
+  LLVM_DEBUG(dbgs() << "To:        "; MIB.getInstr()->dump(););
 }
 
 /// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
@@ -580,6 +580,7 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
 void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
   MachineInstr &MI = *MBBI;
   MachineBasicBlock &MBB = *MI.getParent();
+  LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
 
   const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
   assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
@@ -646,8 +647,8 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
 
   // Transfer memoperands.
   MIB.cloneMemRefs(MI);
-
   MI.eraseFromParent();
+  LLVM_DEBUG(dbgs() << "To:        "; MIB.getInstr()->dump(););
 }
 
 /// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
@@ -655,6 +656,7 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
 void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
   MachineInstr &MI = *MBBI;
   MachineBasicBlock &MBB = *MI.getParent();
+  LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
 
   const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
   assert(TableEntry && "NEONLdStTable lookup failed");
@@ -745,6 +747,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
                                  unsigned Opc, bool IsExt) {
   MachineInstr &MI = *MBBI;
   MachineBasicBlock &MBB = *MI.getParent();
+  LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
 
   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
   unsigned OpIdx = 0;
@@ -774,6 +777,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
   MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill));
   TransferImpOps(MI, MIB, MIB);
   MI.eraseFromParent();
+  LLVM_DEBUG(dbgs() << "To:        "; MIB.getInstr()->dump(););
 }
 
 static bool IsAnAddressOperand(const MachineOperand &MO) {
@@ -830,6 +834,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
   const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
   bool RequiresBundling = STI->isTargetWindows() && IsAnAddressOperand(MO);
   MachineInstrBuilder LO16, HI16;
+  LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
 
   if (!STI->hasV6T2Ops() &&
       (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
@@ -911,6 +916,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
     LO16.add(makeImplicit(MI.getOperand(1)));
   TransferImpOps(MI, LO16, HI16);
   MI.eraseFromParent();
+  LLVM_DEBUG(dbgs() << "To:        "; LO16.getInstr()->dump(););
+  LLVM_DEBUG(dbgs() << "And:       "; HI16.getInstr()->dump(););
 }
 
 /// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as
@@ -1930,11 +1937,16 @@ bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
   TRI = STI->getRegisterInfo();
   AFI = MF.getInfo<ARMFunctionInfo>();
 
+  LLVM_DEBUG(dbgs() << "********** ARM EXPAND PSEUDO INSTRUCTIONS **********\n"
+                    << "********** Function: " << MF.getName() << '\n');
+
   bool Modified = false;
   for (MachineBasicBlock &MBB : MF)
     Modified |= ExpandMBB(MBB);
   if (VerifyARMPseudo)
     MF.verify(this, "After expanding ARM pseudo instructions.");
+
+  LLVM_DEBUG(dbgs() << "***************************************************\n");
   return Modified;
 }
 
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index a50abfdbee44..6e274d269bf2 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -1,9 +1,8 @@
 //===- ARMFastISel.cpp - ARM FastISel implementation ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -245,8 +244,6 @@ class ARMFastISel final : public FastISel {
 
 } // end anonymous namespace
 
-#include "ARMGenCallingConv.inc"
-
 // DefinesOptionalPredicate - This is different from DefinesPredicate in that
 // we don't care about implicit defs here, just places we'll need to add a
 // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
@@ -444,7 +441,7 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
   }
 
   // Require VFP2 for loading fp constants.
-  if (!Subtarget->hasVFP2()) return false;
+  if (!Subtarget->hasVFP2Base()) return false;
 
   // MachineConstantPool wants an explicit alignment.
   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
@@ -500,7 +497,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
   }
 
   unsigned ResultReg = 0;
-  if (Subtarget->useMovt(*FuncInfo.MF))
+  if (Subtarget->useMovt())
     ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
 
   if (ResultReg)
@@ -558,7 +555,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
   bool IsPositionIndependent = isPositionIndependent();
   // Use movw+movt when possible, it avoids constant pool entries.
   // Non-darwin targets only support static movt relocations in FastISel.
-  if (Subtarget->useMovt(*FuncInfo.MF) &&
+  if (Subtarget->useMovt() &&
       (Subtarget->isTargetMachO() || !IsPositionIndependent)) {
     unsigned Opc;
     unsigned char TF = 0;
@@ -972,7 +969,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
       RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
       break;
     case MVT::f32:
-      if (!Subtarget->hasVFP2()) return false;
+      if (!Subtarget->hasVFP2Base()) return false;
       // Unaligned loads need special handling. Floats require word-alignment.
       if (Alignment && Alignment < 4) {
         needVMOV = true;
@@ -985,7 +982,8 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
       }
       break;
     case MVT::f64:
-      if (!Subtarget->hasVFP2()) return false;
+      // Can load and store double precision even without FeatureFP64
+      if (!Subtarget->hasVFP2Base()) return false;
       // FIXME: Unaligned loads need special handling.  Doublewords require
       // word-alignment.
       if (Alignment && Alignment < 4)
@@ -1110,7 +1108,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
       }
       break;
     case MVT::f32:
-      if (!Subtarget->hasVFP2()) return false;
+      if (!Subtarget->hasVFP2Base()) return false;
       // Unaligned stores need special handling. Floats require word-alignment.
       if (Alignment && Alignment < 4) {
         unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
@@ -1125,7 +1123,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
       }
       break;
     case MVT::f64:
-      if (!Subtarget->hasVFP2()) return false;
+      // Can load and store double precision even without FeatureFP64
+      if (!Subtarget->hasVFP2Base()) return false;
       // FIXME: Unaligned stores need special handling.  Doublewords require
       // word-alignment.
       if (Alignment && Alignment < 4)
@@ -1356,10 +1355,10 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
   if (!SrcEVT.isSimple()) return false;
   MVT SrcVT = SrcEVT.getSimpleVT();
 
-  if (Ty->isFloatTy() && !Subtarget->hasVFP2())
+  if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())
     return false;
 
-  if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()))
+  if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()))
     return false;
 
   // Check to see if the 2nd operand is a constant that we can encode directly
@@ -1509,7 +1508,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
 
 bool ARMFastISel::SelectFPExt(const Instruction *I) {
   // Make sure we have VFP and that we're extending float to double.
-  if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false;
+  if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false;
 
   Value *V = I->getOperand(0);
   if (!I->getType()->isDoubleTy() ||
@@ -1528,7 +1527,7 @@ bool ARMFastISel::SelectFPExt(const Instruction *I) {
 
 bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
   // Make sure we have VFP and that we're truncating double to float.
-  if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false;
+  if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false;
 
   Value *V = I->getOperand(0);
   if (!(I->getType()->isFloatTy() &&
@@ -1547,7 +1546,7 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
 
 bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
   // Make sure we have VFP.
-  if (!Subtarget->hasVFP2()) return false;
+  if (!Subtarget->hasVFP2Base()) return false;
 
   MVT DstVT;
   Type *Ty = I->getType();
@@ -1579,7 +1578,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
 
   unsigned Opc;
   if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
-  else if (Ty->isDoubleTy() && !Subtarget->isFPOnlySP())
+  else if (Ty->isDoubleTy() && Subtarget->hasFP64())
     Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
   else return false;
 
@@ -1592,7 +1591,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
 
 bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
   // Make sure we have VFP.
-  if (!Subtarget->hasVFP2()) return false;
+  if (!Subtarget->hasVFP2Base()) return false;
 
   MVT DstVT;
   Type *RetTy = I->getType();
@@ -1605,7 +1604,7 @@ bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
   unsigned Opc;
   Type *OpTy = I->getOperand(0)->getType();
   if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
-  else if (OpTy->isDoubleTy() && !Subtarget->isFPOnlySP())
+  else if (OpTy->isDoubleTy() && Subtarget->hasFP64())
     Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
   else return false;
 
@@ -1811,9 +1810,9 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
   // if we have them.
   // FIXME: It'd be nice to use NEON instructions.
   Type *Ty = I->getType();
-  if (Ty->isFloatTy() && !Subtarget->hasVFP2())
+  if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())
     return false;
-  if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()))
+  if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()))
     return false;
 
   unsigned Opc;
@@ -1855,7 +1854,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
   default:
     report_fatal_error("Unsupported calling convention");
   case CallingConv::Fast:
-    if (Subtarget->hasVFP2() && !isVarArg) {
+    if (Subtarget->hasVFP2Base() && !isVarArg) {
       if (!Subtarget->isAAPCS_ABI())
         return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
       // For AAPCS ABI targets, just use VFP variant of the calling convention.
@@ -1866,7 +1865,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
   case CallingConv::CXX_FAST_TLS:
     // Use target triple & subtarget features to do actual dispatch.
     if (Subtarget->isAAPCS_ABI()) {
-      if (Subtarget->hasVFP2() &&
+      if (Subtarget->hasVFP2Base() &&
           TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
         return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
       else
@@ -1935,11 +1934,11 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
       case MVT::i32:
         break;
       case MVT::f32:
-        if (!Subtarget->hasVFP2())
+        if (!Subtarget->hasVFP2Base())
           return false;
         break;
       case MVT::f64:
-        if (!Subtarget->hasVFP2())
+        if (!Subtarget->hasVFP2Base())
           return false;
         break;
       }
diff --git a/lib/Target/ARM/ARMFeatures.h b/lib/Target/ARM/ARMFeatures.h
index 8c0df4c2cbf9..5cd7006c22fc 100644
--- a/lib/Target/ARM/ARMFeatures.h
+++ b/lib/Target/ARM/ARMFeatures.h
@@ -1,9 +1,8 @@
 //===-- ARMFeatures.h - Checks for ARM instruction features -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index a9d87ced31f3..bedb779bcba0 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -1,9 +1,8 @@
 //===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -30,6 +29,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -344,6 +344,10 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
 /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
 /// this to produce a conservative estimate that we check in an assert() later.
 static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) {
+  // For Thumb1, push.w isn't available, so the first push will always push
+  // r7 and lr onto the stack first.
+  if (AFI.isThumb1OnlyFunction())
+    return -AFI.getArgRegsSaveSize() - (2 * 4);
   // This is a conservative estimation: Assume the frame pointer being r7 and
   // pc("r15") up to r8 getting spilled before (= 8 registers).
   return -AFI.getArgRegsSaveSize() - (8 * 4);
@@ -954,8 +958,12 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
     }
   }
   // Use the base pointer if we have one.
-  if (RegInfo->hasBasePointer(MF))
+  // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
+  // That can happen if we forced a base pointer for a large call frame.
+  if (RegInfo->hasBasePointer(MF)) {
     FrameReg = RegInfo->getBaseRegister();
+    Offset -= SPAdj;
+  }
   return Offset;
 }
 
@@ -1476,13 +1484,17 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
 }
 
 // FIXME: Make generic?
-static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
-                                       const ARMBaseInstrInfo &TII) {
+static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF,
+                                            const ARMBaseInstrInfo &TII) {
   unsigned FnSize = 0;
   for (auto &MBB : MF) {
     for (auto &MI : MBB)
       FnSize += TII.getInstSizeInBytes(MI);
   }
+  if (MF.getJumpTableInfo())
+    for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
+      FnSize += Table.MBBs.size() * 4;
+  FnSize += MF.getConstantPool()->getConstants().size() * 4;
   return FnSize;
 }
 
@@ -1726,7 +1738,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
 
   bool ForceLRSpill = false;
   if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
-    unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
+    unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
     // Force LR to be spilled if the Thumb function size is > 2048. This enables
     // use of BL to implement far jump. If it turns out that it's not needed
     // then the branch fix up path will undo it.
@@ -1771,13 +1783,59 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
   }
   EstimatedStackSize += 16; // For possible paddings.
 
-  unsigned EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this);
+  unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
+  if (AFI->isThumb1OnlyFunction()) {
+    // For Thumb1, don't bother to iterate over the function. The only
+    // instruction that requires an emergency spill slot is a store to a
+    // frame index.
+    //
+    // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
+    // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
+    // a 5-bit unsigned immediate.
+    //
+    // We could try to check if the function actually contains a tSTRspi
+    // that might need the spill slot, but it's not really important.
+    // Functions with VLAs or extremely large call frames are rare, and
+    // if a function is allocating more than 1KB of stack, an extra 4-byte
+    // slot probably isn't relevant.
+    if (RegInfo->hasBasePointer(MF))
+      EstimatedRSStackSizeLimit = (1U << 5) * 4;
+    else
+      EstimatedRSStackSizeLimit = (1U << 8) * 4;
+    EstimatedRSFixedSizeLimit = (1U << 5) * 4;
+  } else {
+    EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this);
+    EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
+  }
+  // Final estimate of whether sp or bp-relative accesses might require
+  // scavenging.
+  bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
+
+  // If the stack pointer moves and we don't have a base pointer, the
+  // estimate logic doesn't work. The actual offsets might be larger when
+  // we're constructing a call frame, or we might need to use negative
+  // offsets from fp.
+  bool HasMovingSP = MFI.hasVarSizedObjects() ||
+    (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
+  bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
+
+  // If we have a frame pointer, we assume arguments will be accessed
+  // relative to the frame pointer. Check whether fp-relative accesses to
+  // arguments require scavenging.
+  //
+  // We could do slightly better on Thumb1; in some cases, an sp-relative
+  // offset would be legal even though an fp-relative offset is not.
   int MaxFPOffset = getMaxFPOffset(MF.getFunction(), *AFI);
-  bool BigFrameOffsets = EstimatedStackSize >= EstimatedRSStackSizeLimit ||
-    MFI.hasVarSizedObjects() ||
-    (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)) ||
-    // For large argument stacks fp relative addressed may overflow.
-    (HasFP && (MaxFixedOffset - MaxFPOffset) >= (int)EstimatedRSStackSizeLimit);
+  bool HasLargeArgumentList =
+      HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
+
+  bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
+                         HasLargeArgumentList;
+  LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
+                    << "; EstimatedStack" << EstimatedStackSize
+                    << "; EstimatedFPStack" << MaxFixedOffset - MaxFPOffset
+                    << "; BigFrameOffsets: " << BigFrameOffsets
+                    << "\n");
   if (BigFrameOffsets ||
       !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
     AFI->setHasStackFrame(true);
@@ -1802,8 +1860,17 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
         CS1Spilled = true;
     }
 
-    // This is true when we inserted a spill for an unused register that can now
-    // be used for register scavenging.
+    // This is true when we inserted a spill for a callee-save GPR which is
+    // not otherwise used by the function. This guaranteees it is possible
+    // to scavenge a register to hold the address of a stack slot. On Thumb1,
+    // the register must be a valid operand to tSTRi, i.e. r4-r7. For other
+    // subtargets, this is any GPR, i.e. r4-r11 or lr.
+    //
+    // If we don't insert a spill, we instead allocate an emergency spill
+    // slot, which can be used by scavenging to spill an arbitrary register.
+    //
+    // We currently don't try to figure out whether any specific instruction
+    // requires scavening an additional register.
     bool ExtraCSSpill = false;
 
     if (AFI->isThumb1OnlyFunction()) {
@@ -1912,7 +1979,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
         NumGPRSpills++;
         CS1Spilled = true;
         assert(!MRI.isReserved(Reg) && "Should not be reserved");
-        if (!MRI.isPhysRegUsed(Reg))
+        if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
           ExtraCSSpill = true;
         UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
         if (Reg == ARM::LR)
@@ -1937,7 +2004,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
         UnspilledCS1GPRs.erase(LRPos);
 
       ForceLRSpill = false;
-      if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR))
+      if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
+          !AFI->isThumb1OnlyFunction())
         ExtraCSSpill = true;
     }
 
@@ -1959,7 +2027,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
             SavedRegs.set(Reg);
             LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
                               << " to make up alignment\n");
-            if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
+            if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
+                !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
               ExtraCSSpill = true;
             break;
           }
@@ -1988,8 +2057,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
         unsigned Reg = UnspilledCS1GPRs.back();
         UnspilledCS1GPRs.pop_back();
         if (!MRI.isReserved(Reg) &&
-            (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
-             Reg == ARM::LR)) {
+            (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
           Extras.push_back(Reg);
           NumExtras--;
         }
@@ -2012,10 +2080,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
             ExtraCSSpill = true;
         }
       }
-      if (!ExtraCSSpill && !AFI->isThumb1OnlyFunction()) {
-        // note: Thumb1 functions spill to R12, not the stack.  Reserve a slot
-        // closest to SP or frame pointer.
+      if (!ExtraCSSpill) {
+        // Reserve a slot closest to SP or frame pointer.
         assert(RS && "Register scavenging not provided");
+        LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
         const TargetRegisterClass &RC = ARM::GPRRegClass;
         unsigned Size = TRI->getSpillSize(RC);
         unsigned Align = TRI->getSpillAlignment(RC);
@@ -2028,6 +2096,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
     SavedRegs.set(ARM::LR);
     AFI->setLRIsSpilledForFarJump(true);
   }
+  AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
 }
 
 MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 2f7e23840e75..7544ca3c38d6 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -1,9 +1,8 @@
 //===- ARMTargetFrameLowering.h - Define frame lowering for ARM -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index d5dacbe08770..0fa32a0abeff 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -1,9 +1,8 @@
 //===-- ARMHazardRecognizer.cpp - ARM postra hazard recognizer ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h
index ccf09db69937..b5ac694e01f7 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/lib/Target/ARM/ARMHazardRecognizer.h
@@ -1,9 +1,8 @@
 //===-- ARMHazardRecognizer.h - ARM Hazard Recognizers ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 8e0e82388251..b349627b67b1 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -120,8 +119,7 @@ public:
                        SDValue &Offset, SDValue &Opc);
   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
                              SDValue &Offset, SDValue &Opc);
-  bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
-                         int Lwb, int Upb, bool FP16);
+  bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
   bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
   bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
@@ -131,6 +129,7 @@ public:
 
   // Thumb Addressing Modes:
   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
+  bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
                                 SDValue &OffImm);
   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
@@ -147,6 +146,9 @@ public:
                             SDValue &OffImm);
   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
                                  SDValue &OffImm);
+  template<unsigned Shift>
+  bool SelectT2AddrModeImm7(SDValue N, SDValue &Base,
+                            SDValue &OffImm);
   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
                              SDValue &OffReg, SDValue &ShImm);
   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
@@ -452,8 +454,10 @@ unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
   if (Subtarget->isThumb()) {
     if (Val <= 255) return 1;                               // MOV
     if (Subtarget->hasV6T2Ops() &&
-        (Val <= 0xffff || ARM_AM::getT2SOImmValSplatVal(Val) != -1))
-      return 1; // MOVW
+        (Val <= 0xffff ||                                   // MOV
+         ARM_AM::getT2SOImmVal(Val) != -1 ||                // MOVW
+         ARM_AM::getT2SOImmVal(~Val) != -1))                // MVN
+      return 1;
     if (Val <= 510) return 2;                               // MOV + ADDi8
     if (~Val <= 255) return 2;                              // MOV + MVN
     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
@@ -463,7 +467,7 @@ unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
   }
-  if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
+  if (Subtarget->useMovt()) return 2; // MOVW + MOVT
   return 3; // Literal pool load
 }
 
@@ -900,7 +904,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
 }
 
 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
-                                        int Lwb, int Upb, bool FP16) {
+                                        bool FP16) {
   if (!CurDAG->isBaseWithConstantOffset(N)) {
     Base = N;
     if (N.getOpcode() == ISD::FrameIndex) {
@@ -922,7 +926,7 @@ bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offse
   int RHSC;
   const int Scale = FP16 ? 2 : 4;
 
-  if (isScaledConstantInRange(N.getOperand(1), Scale, Lwb, Upb, RHSC)) {
+  if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
     Base = N.getOperand(0);
     if (Base.getOpcode() == ISD::FrameIndex) {
       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
@@ -960,16 +964,12 @@ bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offse
 
 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
                                       SDValue &Base, SDValue &Offset) {
-  int Lwb = -256 + 1;
-  int Upb = 256;
-  return IsAddressingMode5(N, Base, Offset, Lwb, Upb, /*FP16=*/ false);
+  return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
 }
 
 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
                                           SDValue &Base, SDValue &Offset) {
-  int Lwb = -512 + 1;
-  int Upb = 512;
-  return IsAddressingMode5(N, Base, Offset, Lwb, Upb, /*FP16=*/ true);
+  return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
 }
 
 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
@@ -1033,8 +1033,22 @@ bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
 //                         Thumb Addressing Modes
 //===----------------------------------------------------------------------===//
 
-bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
-                                            SDValue &Base, SDValue &Offset){
+static bool shouldUseZeroOffsetLdSt(SDValue N) {
+  // Negative numbers are difficult to materialise in thumb1. If we are
+  // selecting the add of a negative, instead try to select ri with a zero
+  // offset, so create the add node directly which will become a sub.
+  if (N.getOpcode() != ISD::ADD)
+    return false;
+
+  // Look for an imm which is not legal for ld/st, but is legal for sub.
+  if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
+    return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
+
+  return false;
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
+                                                SDValue &Offset) {
   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
     if (!NC || !NC->isNullValue())
@@ -1049,9 +1063,22 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
   return true;
 }
 
+bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
+                                            SDValue &Offset) {
+  if (shouldUseZeroOffsetLdSt(N))
+    return false; // Select ri instead
+  return SelectThumbAddrModeRRSext(N, Base, Offset);
+}
+
 bool
 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
                                           SDValue &Base, SDValue &OffImm) {
+  if (shouldUseZeroOffsetLdSt(N)) {
+    Base = N;
+    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
+    return true;
+  }
+
   if (!CurDAG->isBaseWithConstantOffset(N)) {
     if (N.getOpcode() == ISD::ADD) {
       return false; // We want to select register offset instead
@@ -1117,25 +1144,28 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
   if (!CurDAG->isBaseWithConstantOffset(N))
     return false;
 
-  RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
-  if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
-      (LHSR && LHSR->getReg() == ARM::SP)) {
+  if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
     // If the RHS is + imm8 * scale, fold into addr mode.
     int RHSC;
     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
       Base = N.getOperand(0);
-      if (Base.getOpcode() == ISD::FrameIndex) {
-        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+      // Make sure the offset is inside the object, or we might fail to
+      // allocate an emergency spill slot. (An out-of-range access is UB, but
+      // it could show up anyway.)
+      MachineFrameInfo &MFI = MF->getFrameInfo();
+      if (RHSC * 4 < MFI.getObjectSize(FI)) {
         // For LHS+RHS to result in an offset that's a multiple of 4 the object
         // indexed by the LHS must be 4-byte aligned.
-        MachineFrameInfo &MFI = MF->getFrameInfo();
-        if (MFI.getObjectAlignment(FI) < 4)
+        if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4)
           MFI.setObjectAlignment(FI, 4);
-        Base = CurDAG->getTargetFrameIndex(
-            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+        if (MFI.getObjectAlignment(FI) >= 4) {
+          Base = CurDAG->getTargetFrameIndex(
+              FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+          OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
+          return true;
+        }
       }
-      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
-      return true;
     }
   }
 
@@ -1248,6 +1278,35 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
   return false;
 }
 
+template<unsigned Shift>
+bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N,
+                                           SDValue &Base, SDValue &OffImm) {
+  if (N.getOpcode() == ISD::SUB ||
+      CurDAG->isBaseWithConstantOffset(N)) {
+    if (auto RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      int RHSC = (int)RHS->getZExtValue();
+      if (N.getOpcode() == ISD::SUB)
+        RHSC = -RHSC;
+
+      if (isShiftedInt<7, Shift>(RHSC)) {
+        Base = N.getOperand(0);
+        if (Base.getOpcode() == ISD::FrameIndex) {
+          int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+          Base = CurDAG->getTargetFrameIndex(
+            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+        }
+        OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
+        return true;
+      }
+    }
+  }
+
+  // Base only.
+  Base = N;
+  OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
+  return true;
+}
+
 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
                                             SDValue &Base,
                                             SDValue &OffReg, SDValue &ShImm) {
@@ -2072,10 +2131,12 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
   default: llvm_unreachable("unhandled vld/vst lane type");
     // Double-register operations:
   case MVT::v8i8:  OpcodeIndex = 0; break;
+  case MVT::v4f16:
   case MVT::v4i16: OpcodeIndex = 1; break;
   case MVT::v2f32:
   case MVT::v2i32: OpcodeIndex = 2; break;
     // Quad-register operations:
+  case MVT::v8f16:
   case MVT::v8i16: OpcodeIndex = 0; break;
   case MVT::v4f32:
   case MVT::v4i32: OpcodeIndex = 1; break;
@@ -2192,7 +2253,10 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
   case MVT::v8i8:
   case MVT::v16i8: OpcodeIndex = 0; break;
   case MVT::v4i16:
-  case MVT::v8i16: OpcodeIndex = 1; break;
+  case MVT::v8i16:
+  case MVT::v4f16:
+  case MVT::v8f16:
+                  OpcodeIndex = 1; break;
   case MVT::v2f32:
   case MVT::v2i32:
   case MVT::v4f32:
@@ -2577,6 +2641,44 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
 
   switch (N->getOpcode()) {
   default: break;
+  case ISD::STORE: {
+    // For Thumb1, match an sp-relative store in C++. This is a little
+    // unfortunate, but I don't think I can make the chain check work
+    // otherwise.  (The chain of the store has to be the same as the chain
+    // of the CopyFromReg, or else we can't replace the CopyFromReg with
+    // a direct reference to "SP".)
+    //
+    // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
+    // a different addressing mode from other four-byte stores.
+    //
+    // This pattern usually comes up with call arguments.
+    StoreSDNode *ST = cast<StoreSDNode>(N);
+    SDValue Ptr = ST->getBasePtr();
+    if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
+      int RHSC = 0;
+      if (Ptr.getOpcode() == ISD::ADD &&
+          isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
+        Ptr = Ptr.getOperand(0);
+
+      if (Ptr.getOpcode() == ISD::CopyFromReg &&
+          cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
+          Ptr.getOperand(0) == ST->getChain()) {
+        SDValue Ops[] = {ST->getValue(),
+                         CurDAG->getRegister(ARM::SP, MVT::i32),
+                         CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
+                         getAL(CurDAG, dl),
+                         CurDAG->getRegister(0, MVT::i32),
+                         ST->getChain()};
+        MachineSDNode *ResNode =
+            CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
+        MachineMemOperand *MemOp = ST->getMemOperand();
+        CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
+        ReplaceNode(N, ResNode);
+        return;
+      }
+    }
+    break;
+  }
   case ISD::WRITE_REGISTER:
     if (tryWriteRegister(N))
       return;
@@ -2586,6 +2688,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
       return;
     break;
   case ISD::INLINEASM:
+  case ISD::INLINEASM_BR:
     if (tryInlineAsm(N))
       return;
     break;
@@ -2895,6 +2998,16 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
     // Other cases are autogenerated.
     break;
   }
+  case ARMISD::WLS: {
+    SDValue Ops[] = { N->getOperand(1),   // Loop count
+                      N->getOperand(2),   // Exit target
+                      N->getOperand(0) };
+    SDNode *LoopStart =
+      CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, Ops);
+    ReplaceUses(N, LoopStart);
+    CurDAG->RemoveDeadNode(N);
+    return;
+  }
   case ARMISD::BRCOND: {
     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
@@ -2922,6 +3035,36 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
 
     if (InFlag.getOpcode() == ARMISD::CMPZ) {
+      if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
+        SDValue Int = InFlag.getOperand(0);
+        uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
+
+        // Handle low-overhead loops.
+        if (ID == Intrinsic::loop_decrement_reg) {
+          SDValue Elements = Int.getOperand(2);
+          SDValue Size = CurDAG->getTargetConstant(
+            cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
+                                 MVT::i32);
+
+          SDValue Args[] = { Elements, Size, Int.getOperand(0) };
+          SDNode *LoopDec =
+            CurDAG->getMachineNode(ARM::t2LoopDec, dl,
+                                   CurDAG->getVTList(MVT::i32, MVT::Other),
+                                   Args);
+          ReplaceUses(Int.getNode(), LoopDec);
+
+          SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
+          SDNode *LoopEnd =
+            CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
+
+          ReplaceUses(N, LoopEnd);
+          CurDAG->RemoveDeadNode(N);
+          CurDAG->RemoveDeadNode(InFlag.getNode());
+          CurDAG->RemoveDeadNode(Int.getNode());
+          return;
+        }
+      }
+
       bool SwitchEQNEToPLMI;
       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
       InFlag = N->getOperand(4);
@@ -3979,9 +4122,9 @@ bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
 
   // If an opcode was found then we can lower the read to a VFP instruction.
   if (Opcode) {
-    if (!Subtarget->hasVFP2())
+    if (!Subtarget->hasVFP2Base())
       return false;
-    if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
+    if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
       return false;
 
     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
@@ -4090,7 +4233,7 @@ bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
                     .Default(0);
 
   if (Opcode) {
-    if (!Subtarget->hasVFP2())
+    if (!Subtarget->hasVFP2Base())
       return false;
     Ops = { N->getOperand(2), getAL(CurDAG, DL),
             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
@@ -4290,7 +4433,7 @@ bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
   if (!Changed)
     return false;
 
-  SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
+  SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
   New->setNodeId(-1);
   ReplaceNode(N, New.getNode());
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 21de0f6a7630..18bb9bf3eccc 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1,9 +1,8 @@
 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -80,6 +79,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
@@ -113,6 +113,7 @@
 #include <vector>
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 #define DEBUG_TYPE "arm-isel"
 
@@ -220,6 +221,121 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
   addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
 }
 
+void ARMTargetLowering::setAllExpand(MVT VT) {
+  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
+    setOperationAction(Opc, VT, Expand);
+
+  // We support these really simple operations even on types where all
+  // the actual arithmetic has to be broken down into simpler
+  // operations or turned into library calls.
+  setOperationAction(ISD::BITCAST, VT, Legal);
+  setOperationAction(ISD::LOAD, VT, Legal);
+  setOperationAction(ISD::STORE, VT, Legal);
+  setOperationAction(ISD::UNDEF, VT, Legal);
+}
+
+void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
+                                       LegalizeAction Action) {
+  setLoadExtAction(ISD::EXTLOAD,  From, To, Action);
+  setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
+  setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
+}
+
+void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
+  const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
+
+  for (auto VT : IntTypes) {
+    addRegisterClass(VT, &ARM::QPRRegClass);
+    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+    setOperationAction(ISD::SHL, VT, Custom);
+    setOperationAction(ISD::SRA, VT, Custom);
+    setOperationAction(ISD::SRL, VT, Custom);
+    setOperationAction(ISD::SMIN, VT, Legal);
+    setOperationAction(ISD::SMAX, VT, Legal);
+    setOperationAction(ISD::UMIN, VT, Legal);
+    setOperationAction(ISD::UMAX, VT, Legal);
+    setOperationAction(ISD::ABS, VT, Legal);
+
+    // No native support for these.
+    setOperationAction(ISD::UDIV, VT, Expand);
+    setOperationAction(ISD::SDIV, VT, Expand);
+    setOperationAction(ISD::UREM, VT, Expand);
+    setOperationAction(ISD::SREM, VT, Expand);
+
+    if (!HasMVEFP) {
+      setOperationAction(ISD::SINT_TO_FP, VT, Expand);
+      setOperationAction(ISD::UINT_TO_FP, VT, Expand);
+      setOperationAction(ISD::FP_TO_SINT, VT, Expand);
+      setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+    }
+  }
+
+  const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
+  for (auto VT : FloatTypes) {
+    addRegisterClass(VT, &ARM::QPRRegClass);
+    if (!HasMVEFP)
+      setAllExpand(VT);
+
+    // These are legal or custom whether we have MVE.fp or not
+    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+    setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
+    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
+
+    if (HasMVEFP) {
+      setOperationAction(ISD::FMINNUM, VT, Legal);
+      setOperationAction(ISD::FMAXNUM, VT, Legal);
+      setOperationAction(ISD::FROUND, VT, Legal);
+
+      // No native support for these.
+      setOperationAction(ISD::FDIV, VT, Expand);
+      setOperationAction(ISD::FREM, VT, Expand);
+      setOperationAction(ISD::FSQRT, VT, Expand);
+      setOperationAction(ISD::FSIN, VT, Expand);
+      setOperationAction(ISD::FCOS, VT, Expand);
+      setOperationAction(ISD::FPOW, VT, Expand);
+      setOperationAction(ISD::FLOG, VT, Expand);
+      setOperationAction(ISD::FLOG2, VT, Expand);
+      setOperationAction(ISD::FLOG10, VT, Expand);
+      setOperationAction(ISD::FEXP, VT, Expand);
+      setOperationAction(ISD::FEXP2, VT, Expand);
+      setOperationAction(ISD::FNEARBYINT, VT, Expand);
+    }
+  }
+
+  // We 'support' these types up to bitcast/load/store level, regardless of
+  // MVE integer-only / float support. Only doing FP data processing on the FP
+  // vector types is inhibited at integer-only level.
+  const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
+  for (auto VT : LongTypes) {
+    addRegisterClass(VT, &ARM::QPRRegClass);
+    setAllExpand(VT);
+    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+  }
+  // We can do bitwise operations on v2i64 vectors
+  setOperationAction(ISD::AND, MVT::v2i64, Legal);
+  setOperationAction(ISD::OR, MVT::v2i64, Legal);
+  setOperationAction(ISD::XOR, MVT::v2i64, Legal);
+
+  // It is legal to extload from v4i8 to v4i16 or v4i32.
+  addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
+  addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
+  addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
+
+  // Some truncating stores are legal too.
+  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
+  setTruncStoreAction(MVT::v4i32, MVT::v4i8,  Legal);
+  setTruncStoreAction(MVT::v8i16, MVT::v8i8,  Legal);
+}
+
 ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
                                      const ARMSubtarget &STI)
     : TargetLowering(TM), Subtarget(&STI) {
@@ -240,7 +356,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
 
   if (Subtarget->isTargetMachO()) {
     // Uses VFP for Thumb libfuncs if available.
-    if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
+    if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
         Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
       static const struct {
         const RTLIB::Libcall Op;
@@ -509,10 +625,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   else
     addRegisterClass(MVT::i32, &ARM::GPRRegClass);
 
-  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
-      !Subtarget->isThumb1Only()) {
+  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
+      Subtarget->hasFPRegs()) {
     addRegisterClass(MVT::f32, &ARM::SPRRegClass);
     addRegisterClass(MVT::f64, &ARM::DPRRegClass);
+    if (!Subtarget->hasVFP2Base())
+      setAllExpand(MVT::f32);
+    if (!Subtarget->hasFP64())
+      setAllExpand(MVT::f64);
   }
 
   if (Subtarget->hasFullFP16()) {
@@ -528,9 +648,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   for (MVT VT : MVT::vector_valuetypes()) {
     for (MVT InnerVT : MVT::vector_valuetypes()) {
       setTruncStoreAction(VT, InnerVT, Expand);
-      setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
-      setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
-      setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
+      addAllExtLoads(VT, InnerVT, Expand);
     }
 
     setOperationAction(ISD::MULHS, VT, Expand);
@@ -547,6 +665,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
   setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
 
+  if (Subtarget->hasMVEIntegerOps())
+    addMVEVectorTypes(Subtarget->hasMVEFloatOps());
+
+  // Combine low-overhead loop intrinsics so that we can lower i1 types.
+  if (Subtarget->hasLOB())
+    setTargetDAGCombine(ISD::BRCOND);
+
   if (Subtarget->hasNEON()) {
     addDRTypeForNEON(MVT::v2f32);
     addDRTypeForNEON(MVT::v8i8);
@@ -565,11 +690,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
       addQRTypeForNEON(MVT::v8f16);
       addDRTypeForNEON(MVT::v4f16);
     }
+  }
 
+  if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
     // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
-    // neither Neon nor VFP support any arithmetic operations on it.
-    // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
-    // supported for v4f32.
+    // none of Neon, MVE or VFP supports any arithmetic operations on it.
     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
@@ -603,7 +728,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
     setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
     setOperationAction(ISD::FMA, MVT::v2f64, Expand);
+  }
 
+  if (Subtarget->hasNEON()) {
+    // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
+    // supported for v4f32.
     setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
     setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
     setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
@@ -697,7 +826,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
 
     // NEON only has FMA instructions as of VFP4.
-    if (!Subtarget->hasVFP4()) {
+    if (!Subtarget->hasVFP4Base()) {
       setOperationAction(ISD::FMA, MVT::v2f32, Expand);
       setOperationAction(ISD::FMA, MVT::v4f32, Expand);
     }
@@ -711,9 +840,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setTargetDAGCombine(ISD::SIGN_EXTEND);
     setTargetDAGCombine(ISD::ZERO_EXTEND);
     setTargetDAGCombine(ISD::ANY_EXTEND);
-    setTargetDAGCombine(ISD::BUILD_VECTOR);
-    setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
-    setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
     setTargetDAGCombine(ISD::STORE);
     setTargetDAGCombine(ISD::FP_TO_SINT);
     setTargetDAGCombine(ISD::FP_TO_UINT);
@@ -731,7 +857,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     }
   }
 
-  if (Subtarget->isFPOnlySP()) {
+  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
+    setTargetDAGCombine(ISD::BUILD_VECTOR);
+    setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+    setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+  }
+
+  if (!Subtarget->hasFP64()) {
     // When targeting a floating-point unit with only single-precision
     // operations, f64 is legal for the few double-precision instructions which
     // are present However, no double-precision operations other than moves,
@@ -767,9 +899,19 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
     setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
     setOperationAction(ISD::FP_ROUND,   MVT::f32, Custom);
+  }
+
+  if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()){
     setOperationAction(ISD::FP_EXTEND,  MVT::f64, Custom);
+    setOperationAction(ISD::FP_ROUND,  MVT::f16, Custom);
   }
 
+  if (!Subtarget->hasFP16())
+    setOperationAction(ISD::FP_EXTEND,  MVT::f32, Custom);
+
+  if (!Subtarget->hasFP64())
+    setOperationAction(ISD::FP_ROUND,  MVT::f32, Custom);
+
   computeRegisterProperties(Subtarget->getRegisterInfo());
 
   // ARM does not have floating-point extending loads.
@@ -832,6 +974,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SRA,       MVT::i64, Custom);
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
 
+  // MVE lowers 64 bit shifts to lsll and lsrl
+  // assuming that ISD::SRL and SRA of i64 are already marked custom
+  if (Subtarget->hasMVEIntegerOps())
+    setOperationAction(ISD::SHL, MVT::i64, Custom);
+
   // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
   if (Subtarget->isThumb1Only()) {
     setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
@@ -1029,7 +1176,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   }
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 
-  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
+  if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
       !Subtarget->isThumb1Only()) {
     // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
     // iff target supports vfp2.
@@ -1079,7 +1226,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FSINCOS,   MVT::f32, Expand);
   setOperationAction(ISD::FREM,      MVT::f64, Expand);
   setOperationAction(ISD::FREM,      MVT::f32, Expand);
-  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
+  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
       !Subtarget->isThumb1Only()) {
     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
@@ -1087,7 +1234,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FPOW,      MVT::f64, Expand);
   setOperationAction(ISD::FPOW,      MVT::f32, Expand);
 
-  if (!Subtarget->hasVFP4()) {
+  if (!Subtarget->hasVFP4Base()) {
     setOperationAction(ISD::FMA, MVT::f64, Expand);
     setOperationAction(ISD::FMA, MVT::f32, Expand);
   }
@@ -1095,7 +1242,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   // Various VFP goodness
   if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
     // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
-    if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
+    if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
       setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
       setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
     }
@@ -1115,7 +1262,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   }
 
   // FP-ARMv8 implements a lot of rounding-like FP operations.
-  if (Subtarget->hasFPARMv8()) {
+  if (Subtarget->hasFPARMv8Base()) {
     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
     setOperationAction(ISD::FCEIL, MVT::f32, Legal);
     setOperationAction(ISD::FROUND, MVT::f32, Legal);
@@ -1124,12 +1271,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FRINT, MVT::f32, Legal);
     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
-    setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
-    setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
-    setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
-    setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
+    if (Subtarget->hasNEON()) {
+      setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
+      setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
+      setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
+      setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
+    }
 
-    if (!Subtarget->isFPOnlySP()) {
+    if (Subtarget->hasFP64()) {
       setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
       setOperationAction(ISD::FCEIL, MVT::f64, Legal);
       setOperationAction(ISD::FROUND, MVT::f64, Legal);
@@ -1141,6 +1290,24 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     }
   }
 
+  // FP16 often need to be promoted to call lib functions
+  if (Subtarget->hasFullFP16()) {
+    setOperationAction(ISD::FREM, MVT::f16, Promote);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
+    setOperationAction(ISD::FSIN, MVT::f16, Promote);
+    setOperationAction(ISD::FCOS, MVT::f16, Promote);
+    setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
+    setOperationAction(ISD::FPOWI, MVT::f16, Promote);
+    setOperationAction(ISD::FPOW, MVT::f16, Promote);
+    setOperationAction(ISD::FEXP, MVT::f16, Promote);
+    setOperationAction(ISD::FEXP2, MVT::f16, Promote);
+    setOperationAction(ISD::FLOG, MVT::f16, Promote);
+    setOperationAction(ISD::FLOG10, MVT::f16, Promote);
+    setOperationAction(ISD::FLOG2, MVT::f16, Promote);
+
+    setOperationAction(ISD::FROUND, MVT::f16, Legal);
+  }
+
   if (Subtarget->hasNEON()) {
     // vmin and vmax aren't available in a scalar form, so we use
     // a NEON instruction with an undef lane instead.
@@ -1177,11 +1344,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
 
   if (Subtarget->hasV6Ops())
     setTargetDAGCombine(ISD::SRL);
+  if (Subtarget->isThumb1Only())
+    setTargetDAGCombine(ISD::SHL);
 
   setStackPointerRegisterToSaveRestore(ARM::SP);
 
   if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
-      !Subtarget->hasVFP2())
+      !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
     setSchedulingPreference(Sched::RegPressure);
   else
     setSchedulingPreference(Sched::Hybrid);
@@ -1204,6 +1373,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   setPrefLoopAlignment(Subtarget->getPrefLoopAlignment());
 
   setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
+
+  if (Subtarget->isThumb() || Subtarget->isThumb2())
+    setTargetDAGCombine(ISD::ABS);
 }
 
 bool ARMTargetLowering::useSoftFloat() const {
@@ -1288,6 +1460,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::SSAT:          return "ARMISD::SSAT";
   case ARMISD::USAT:          return "ARMISD::USAT";
 
+  case ARMISD::ASRL:          return "ARMISD::ASRL";
+  case ARMISD::LSRL:          return "ARMISD::LSRL";
+  case ARMISD::LSLL:          return "ARMISD::LSLL";
+
   case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
   case ARMISD::RRX:           return "ARMISD::RRX";
@@ -1332,23 +1508,25 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::VCGTU:         return "ARMISD::VCGTU";
   case ARMISD::VTST:          return "ARMISD::VTST";
 
-  case ARMISD::VSHL:          return "ARMISD::VSHL";
-  case ARMISD::VSHRs:         return "ARMISD::VSHRs";
-  case ARMISD::VSHRu:         return "ARMISD::VSHRu";
-  case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
-  case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
-  case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
-  case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
-  case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
-  case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
-  case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
-  case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
-  case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
-  case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
-  case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
-  case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
-  case ARMISD::VSLI:          return "ARMISD::VSLI";
-  case ARMISD::VSRI:          return "ARMISD::VSRI";
+  case ARMISD::VSHLs:         return "ARMISD::VSHLs";
+  case ARMISD::VSHLu:         return "ARMISD::VSHLu";
+  case ARMISD::VSHLIMM:       return "ARMISD::VSHLIMM";
+  case ARMISD::VSHRsIMM:      return "ARMISD::VSHRsIMM";
+  case ARMISD::VSHRuIMM:      return "ARMISD::VSHRuIMM";
+  case ARMISD::VRSHRsIMM:     return "ARMISD::VRSHRsIMM";
+  case ARMISD::VRSHRuIMM:     return "ARMISD::VRSHRuIMM";
+  case ARMISD::VRSHRNIMM:     return "ARMISD::VRSHRNIMM";
+  case ARMISD::VQSHLsIMM:     return "ARMISD::VQSHLsIMM";
+  case ARMISD::VQSHLuIMM:     return "ARMISD::VQSHLuIMM";
+  case ARMISD::VQSHLsuIMM:    return "ARMISD::VQSHLsuIMM";
+  case ARMISD::VQSHRNsIMM:    return "ARMISD::VQSHRNsIMM";
+  case ARMISD::VQSHRNuIMM:    return "ARMISD::VQSHRNuIMM";
+  case ARMISD::VQSHRNsuIMM:   return "ARMISD::VQSHRNsuIMM";
+  case ARMISD::VQRSHRNsIMM:   return "ARMISD::VQRSHRNsIMM";
+  case ARMISD::VQRSHRNuIMM:   return "ARMISD::VQRSHRNuIMM";
+  case ARMISD::VQRSHRNsuIMM:  return "ARMISD::VQRSHRNsuIMM";
+  case ARMISD::VSLIIMM:       return "ARMISD::VSLIIMM";
+  case ARMISD::VSRIIMM:       return "ARMISD::VSRIIMM";
   case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
   case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
   case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
@@ -1410,6 +1588,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::VST2LN_UPD:    return "ARMISD::VST2LN_UPD";
   case ARMISD::VST3LN_UPD:    return "ARMISD::VST3LN_UPD";
   case ARMISD::VST4LN_UPD:    return "ARMISD::VST4LN_UPD";
+  case ARMISD::WLS:           return "ARMISD::WLS";
   }
   return nullptr;
 }
@@ -1423,11 +1602,14 @@ EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
 
 /// getRegClassFor - Return the register class that should be used for the
 /// specified value type.
-const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
+const TargetRegisterClass *
+ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
+  (void)isDivergent;
   // Map v4i64 to QQ registers but do not make the type legal. Similarly map
   // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
-  // load / store 4 to 8 consecutive D registers.
-  if (Subtarget->hasNEON()) {
+  // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
+  // MVE Q registers.
+  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
     if (VT == MVT::v4i64)
       return &ARM::QQPRRegClass;
     if (VT == MVT::v8i64)
@@ -1590,8 +1772,6 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
 //                      Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
-#include "ARMGenCallingConv.inc"
-
 /// getEffectiveCallingConv - Get the effective calling convention, taking into
 /// account presence of floating point hardware and calling convention
 /// limitations, such as support for variadic functions.
@@ -1613,7 +1793,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
   case CallingConv::C:
     if (!Subtarget->isAAPCS_ABI())
       return CallingConv::ARM_APCS;
-    else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
+    else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
              getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
              !isVarArg)
       return CallingConv::ARM_AAPCS_VFP;
@@ -1622,10 +1802,11 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
   case CallingConv::Fast:
   case CallingConv::CXX_FAST_TLS:
     if (!Subtarget->isAAPCS_ABI()) {
-      if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
+      if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
         return CallingConv::Fast;
       return CallingConv::ARM_APCS;
-    } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
+    } else if (Subtarget->hasVFP2Base() &&
+               !Subtarget->isThumb1Only() && !isVarArg)
       return CallingConv::ARM_AAPCS_VFP;
     else
       return CallingConv::ARM_AAPCS;
@@ -1807,29 +1988,42 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   bool isVarArg                         = CLI.IsVarArg;
 
   MachineFunction &MF = DAG.getMachineFunction();
-  bool isStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
-  bool isThisReturn   = false;
-  bool isSibCall      = false;
+  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+  bool isThisReturn = false;
   auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
+  bool PreferIndirect = false;
 
   // Disable tail calls if they're not supported.
   if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
     isTailCall = false;
 
+  if (isa<GlobalAddressSDNode>(Callee)) {
+    // If we're optimizing for minimum size and the function is called three or
+    // more times in this block, we can improve codesize by calling indirectly
+    // as BLXr has a 16-bit encoding.
+    auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
+    if (CLI.CS) {
+      auto *BB = CLI.CS.getParent();
+      PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
+                       count_if(GV->users(), [&BB](const User *U) {
+                         return isa<Instruction>(U) &&
+                                cast<Instruction>(U)->getParent() == BB;
+                       }) > 2;
+    }
+  }
   if (isTailCall) {
     // Check if it's really possible to do a tail call.
-    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
-                    isVarArg, isStructRet, MF.getFunction().hasStructRetAttr(),
-                                                   Outs, OutVals, Ins, DAG);
+    isTailCall = IsEligibleForTailCallOptimization(
+        Callee, CallConv, isVarArg, isStructRet,
+        MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
+        PreferIndirect);
     if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
       report_fatal_error("failed to perform tail call elimination on a call "
                          "site marked musttail");
     // We don't support GuaranteedTailCallOpt for ARM, only automatically
     // detected sibcalls.
-    if (isTailCall) {
+    if (isTailCall)
       ++NumTailCalls;
-      isSibCall = true;
-    }
   }
 
   // Analyze operands of the call, assigning locations to each operand.
@@ -1841,14 +2035,14 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
 
-  // For tail calls, memory operands are available in our caller's stack.
-  if (isSibCall)
+  if (isTailCall) {
+    // For tail calls, memory operands are available in our caller's stack.
     NumBytes = 0;
-
-  // Adjust the stack pointer for the new arguments...
-  // These operations are automatically eliminated by the prolog/epilog pass
-  if (!isSibCall)
+  } else {
+    // Adjust the stack pointer for the new arguments...
+    // These operations are automatically eliminated by the prolog/epilog pass
     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
+  }
 
   SDValue StackPtr =
       DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
@@ -1970,7 +2164,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
                                           Ops));
       }
-    } else if (!isSibCall) {
+    } else if (!isTailCall) {
       assert(VA.isMemLoc());
 
       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
@@ -1984,32 +2178,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // Build a sequence of copy-to-reg nodes chained together with token chain
   // and flag operands which copy the outgoing args into the appropriate regs.
   SDValue InFlag;
-  // Tail call byval lowering might overwrite argument registers so in case of
-  // tail call optimization the copies to registers are lowered later.
-  if (!isTailCall)
-    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
-                               RegsToPass[i].second, InFlag);
-      InFlag = Chain.getValue(1);
-    }
-
-  // For tail calls lower the arguments to the 'real' stack slot.
-  if (isTailCall) {
-    // Force all the incoming stack arguments to be loaded from the stack
-    // before any new outgoing arguments are stored to the stack, because the
-    // outgoing stack slots may alias the incoming argument stack slots, and
-    // the alias isn't otherwise explicit. This is slightly more conservative
-    // than necessary, because it means that each store effectively depends
-    // on every argument instead of just those arguments it would clobber.
-
-    // Do not flag preceding copytoreg stuff together with the following stuff.
-    InFlag = SDValue();
-    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
-                               RegsToPass[i].second, InFlag);
-      InFlag = Chain.getValue(1);
-    }
-    InFlag = SDValue();
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
   }
 
   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
@@ -2064,17 +2236,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
     }
   } else if (isa<GlobalAddressSDNode>(Callee)) {
-    // If we're optimizing for minimum size and the function is called three or
-    // more times in this block, we can improve codesize by calling indirectly
-    // as BLXr has a 16-bit encoding.
-    auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
-    auto *BB = CLI.CS.getParent();
-    bool PreferIndirect =
-        Subtarget->isThumb() && MF.getFunction().optForMinSize() &&
-        count_if(GV->users(), [&BB](const User *U) {
-          return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
-        }) > 2;
-
     if (!PreferIndirect) {
       isDirect = true;
       bool isDef = GV->isStrongDefinitionForLinker();
@@ -2098,7 +2259,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         unsigned TargetFlags = GV->hasDLLImportStorageClass()
                                    ? ARMII::MO_DLLIMPORT
                                    : ARMII::MO_NO_FLAG;
-        Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
+        Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0,
                                             TargetFlags);
         if (GV->hasDLLImportStorageClass())
           Callee =
@@ -2142,7 +2303,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       CallOpc = ARMISD::CALL_NOLINK;
     else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
              // Emit regular call when code size is the priority
-             !MF.getFunction().optForMinSize())
+             !Subtarget->hasMinSize())
       // "mov lr, pc; b _foo" to avoid confusing the RSP
       CallOpc = ARMISD::CALL_NOLINK;
     else
@@ -2306,28 +2467,25 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
 /// for tail call optimization. Targets which want to do tail call
 /// optimization should implement this function.
-bool
-ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
-                                                     CallingConv::ID CalleeCC,
-                                                     bool isVarArg,
-                                                     bool isCalleeStructRet,
-                                                     bool isCallerStructRet,
-                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                    const SmallVectorImpl<SDValue> &OutVals,
-                                    const SmallVectorImpl<ISD::InputArg> &Ins,
-                                                     SelectionDAG& DAG) const {
+bool ARMTargetLowering::IsEligibleForTailCallOptimization(
+    SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
+    bool isCalleeStructRet, bool isCallerStructRet,
+    const SmallVectorImpl<ISD::OutputArg> &Outs,
+    const SmallVectorImpl<SDValue> &OutVals,
+    const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG,
+    const bool isIndirect) const {
   MachineFunction &MF = DAG.getMachineFunction();
   const Function &CallerF = MF.getFunction();
   CallingConv::ID CallerCC = CallerF.getCallingConv();
 
   assert(Subtarget->supportsTailCall());
 
-  // Tail calls to function pointers cannot be optimized for Thumb1 if the args
+  // Indirect tail calls cannot be optimized for Thumb1 if the args
   // to the call take up r0-r3. The reason is that there are no legal registers
   // left to hold the pointer to the function to be called.
   if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
-      !isa<GlobalAddressSDNode>(Callee.getNode()))
-      return false;
+      (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect))
+    return false;
 
   // Look for obvious safe cases to perform tail call optimization that do not
   // require ABI changes. This is what gcc calls sibcall.
@@ -2756,7 +2914,7 @@ SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
     auto M = const_cast<Module*>(DAG.getMachineFunction().
                                  getFunction().getParent());
     auto GV = new GlobalVariable(
-                    *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C,
+                    *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
                     Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
                     Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
                     Twine(AFI->createPICLabelUId())
@@ -3225,7 +3383,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
   } else if (Subtarget->isRWPI() && !IsRO) {
     // SB-relative.
     SDValue RelAddr;
-    if (Subtarget->useMovt(DAG.getMachineFunction())) {
+    if (Subtarget->useMovt()) {
       ++NumMovwMovt;
       SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
       RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
@@ -3245,7 +3403,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
 
   // If we have T2 ops, we can materialize the address directly via movt/movw
   // pair. This is always cheaper.
-  if (Subtarget->useMovt(DAG.getMachineFunction())) {
+  if (Subtarget->useMovt()) {
     ++NumMovwMovt;
     // FIXME: Once remat is capable of dealing with instructions with register
     // operands, expand this into two nodes.
@@ -3268,7 +3426,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
   SDLoc dl(Op);
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
 
-  if (Subtarget->useMovt(DAG.getMachineFunction()))
+  if (Subtarget->useMovt())
     ++NumMovwMovt;
 
   // FIXME: Once remat is capable of dealing with instructions with register
@@ -3288,7 +3446,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
                                                      SelectionDAG &DAG) const {
   assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
-  assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
+  assert(Subtarget->useMovt() &&
          "Windows on ARM expects to use movw/movt");
   assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
          "ROPI/RWPI not currently supported for Windows");
@@ -3309,7 +3467,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
   // FIXME: Once remat is capable of dealing with instructions with register
   // operands, expand this into two nodes.
   Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
-                       DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
+                       DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
                                                   TargetFlags));
   if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
     Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
@@ -3615,7 +3773,8 @@ void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
   // argument passed via stack.
   int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
                                   CCInfo.getInRegsParamsCount(),
-                                  CCInfo.getNextStackOffset(), 4);
+                                  CCInfo.getNextStackOffset(),
+                                  std::max(4U, TotalArgRegsSaveSize));
   AFI->setVarArgsFrameIndex(FrameIndex);
 }
 
@@ -3891,6 +4050,22 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   }
 
   ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+
+  // If the RHS is a constant zero then the V (overflow) flag will never be
+  // set. This can allow us to simplify GE to PL or LT to MI, which can be
+  // simpler for other passes (like the peephole optimiser) to deal with.
+  if (isNullConstant(RHS)) {
+    switch (CondCode) {
+      default: break;
+      case ARMCC::GE:
+        CondCode = ARMCC::PL;
+        break;
+      case ARMCC::LT:
+        CondCode = ARMCC::MI;
+        break;
+    }
+  }
+
   ARMISD::NodeType CompareType;
   switch (CondCode) {
   default:
@@ -3910,7 +4085,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
 SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
                                      SelectionDAG &DAG, const SDLoc &dl,
                                      bool InvalidOnQNaN) const {
-  assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
+  assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
   SDValue Cmp;
   SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
   if (!isFloatingPointZero(RHS))
@@ -4175,18 +4350,18 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
   // Start by selecting the GE condition code for opcodes that return true for
   // 'equality'
   if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
-      CC == ISD::SETULE)
+      CC == ISD::SETULE || CC == ISD::SETGE  || CC == ISD::SETLE)
     CondCode = ARMCC::GE;
 
   // and GT for opcodes that return false for 'equality'.
   else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
-           CC == ISD::SETULT)
+           CC == ISD::SETULT || CC == ISD::SETGT  || CC == ISD::SETLT)
     CondCode = ARMCC::GT;
 
   // Since we are constrained to GE/GT, if the opcode contains 'less', we need
   // to swap the compare operands.
   if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
-      CC == ISD::SETULT)
+      CC == ISD::SETULT || CC == ISD::SETLE  || CC == ISD::SETLT)
     swpCmpOps = true;
 
   // Both GT and GE are ordered comparisons, and return false for 'unordered'.
@@ -4212,8 +4387,9 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
   }
 
   // 'unordered or not equal' is 'anything but equal', so use the EQ condition
-  // code and swap the VSEL operands.
-  if (CC == ISD::SETUNE) {
+  // code and swap the VSEL operands. Also do this if we don't care about the
+  // unordered case.
+  if (CC == ISD::SETUNE || CC == ISD::SETNE) {
     CondCode = ARMCC::EQ;
     swpVselOps = true;
   }
@@ -4222,7 +4398,7 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
 SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
                                    SDValue TrueVal, SDValue ARMcc, SDValue CCR,
                                    SDValue Cmp, SelectionDAG &DAG) const {
-  if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
+  if (!Subtarget->hasFP64() && VT == MVT::f64) {
     FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
                            DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
     TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
@@ -4428,6 +4604,16 @@ static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V,
   return false;
 }
 
+bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
+  if (VT == MVT::f32)
+    return !Subtarget->hasVFP2Base();
+  if (VT == MVT::f64)
+    return !Subtarget->hasFP64();
+  if (VT == MVT::f16)
+    return !Subtarget->hasFullFP16();
+  return false;
+}
+
 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   SDLoc dl(Op);
@@ -4471,9 +4657,9 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue TrueVal = Op.getOperand(2);
   SDValue FalseVal = Op.getOperand(3);
 
-  if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
-    DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
-                                                    dl);
+  if (isUnsupportedFloatingType(LHS.getValueType())) {
+    DAG.getTargetLoweringInfo().softenSetCCOperands(
+        DAG, LHS.getValueType(), LHS, RHS, CC, dl);
 
     // If softenSetCCOperands only returned one value, we should compare it to
     // zero.
@@ -4494,8 +4680,9 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
     // inverting the compare condition, swapping 'less' and 'greater') and
     // sometimes need to swap the operands to the VSEL (which inverts the
     // condition in the sense of firing whenever the previous condition didn't)
-    if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
-                                    TrueVal.getValueType() == MVT::f64)) {
+    if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
+                                        TrueVal.getValueType() == MVT::f32 ||
+                                        TrueVal.getValueType() == MVT::f64)) {
       ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
       if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
           CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
@@ -4507,6 +4694,9 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
     SDValue ARMcc;
     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+    // Choose GE over PL, which vsel does now support
+    if (cast<ConstantSDNode>(ARMcc)->getZExtValue() == ARMCC::PL)
+      ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
     return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
   }
 
@@ -4514,12 +4704,15 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   bool InvalidOnQNaN;
   FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
 
-  // Normalize the fp compare. If RHS is zero we keep it there so we match
-  // CMPFPw0 instead of CMPFP.
-  if (Subtarget->hasFPARMv8() && !isFloatingPointZero(RHS) &&
-     (TrueVal.getValueType() == MVT::f16 ||
-      TrueVal.getValueType() == MVT::f32 ||
-      TrueVal.getValueType() == MVT::f64)) {
+  // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
+  // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
+  // must use VSEL (limited condition codes), due to not having conditional f16
+  // moves.
+  if (Subtarget->hasFPARMv8Base() &&
+      !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
+      (TrueVal.getValueType() == MVT::f16 ||
+       TrueVal.getValueType() == MVT::f32 ||
+       TrueVal.getValueType() == MVT::f64)) {
     bool swpCmpOps = false;
     bool swpVselOps = false;
     checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
@@ -4708,9 +4901,9 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Dest = Op.getOperand(4);
   SDLoc dl(Op);
 
-  if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
-    DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
-                                                    dl);
+  if (isUnsupportedFloatingType(LHS.getValueType())) {
+    DAG.getTargetLoweringInfo().softenSetCCOperands(
+        DAG, LHS.getValueType(), LHS, RHS, CC, dl);
 
     // If softenSetCCOperands only returned one value, we should compare it to
     // zero.
@@ -4855,7 +5048,7 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   if (VT.isVector())
     return LowerVectorFP_TO_INT(Op, DAG);
-  if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
+  if (isUnsupportedFloatingType(Op.getOperand(0).getValueType())) {
     RTLIB::Libcall LC;
     if (Op.getOpcode() == ISD::FP_TO_SINT)
       LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
@@ -4919,7 +5112,7 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   if (VT.isVector())
     return LowerVectorINT_TO_FP(Op, DAG);
-  if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
+  if (isUnsupportedFloatingType(VT)) {
     RTLIB::Libcall LC;
     if (Op.getOpcode() == ISD::SINT_TO_FP)
       LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
@@ -4952,7 +5145,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
                                DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
     EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
     if (VT == MVT::f64)
-      Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+      Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
                          DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
                          DAG.getConstant(32, dl, MVT::i32));
     else /*if (VT == MVT::f32)*/
@@ -4960,11 +5153,11 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
     if (SrcVT == MVT::f32) {
       Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
       if (VT == MVT::f64)
-        Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+        Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
                            DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
                            DAG.getConstant(32, dl, MVT::i32));
     } else if (VT == MVT::f32)
-      Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
+      Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
                          DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
                          DAG.getConstant(32, dl, MVT::i32));
     Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
@@ -5469,40 +5662,100 @@ static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
   return Res;
 }
 
+/// Getvshiftimm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift operation, where all the elements of the
+/// build_vector must have the same constant integer value.
+static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
+  // Ignore bit_converts.
+  while (Op.getOpcode() == ISD::BITCAST)
+    Op = Op.getOperand(0);
+  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+  APInt SplatBits, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  if (!BVN ||
+      !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
+                            ElementBits) ||
+      SplatBitSize > ElementBits)
+    return false;
+  Cnt = SplatBits.getSExtValue();
+  return true;
+}
+
+/// isVShiftLImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift left operation.  That value must be in the range:
+///   0 <= Value < ElementBits for a left shift; or
+///   0 <= Value <= ElementBits for a long left shift.
+static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
+  assert(VT.isVector() && "vector shift count is not a vector type");
+  int64_t ElementBits = VT.getScalarSizeInBits();
+  if (!getVShiftImm(Op, ElementBits, Cnt))
+    return false;
+  return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
+}
+
+/// isVShiftRImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift right operation.  For a shift opcode, the value
+/// is positive, but for an intrinsic the value count must be negative. The
+/// absolute value must be in the range:
+///   1 <= |Value| <= ElementBits for a right shift; or
+///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
+static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
+                         int64_t &Cnt) {
+  assert(VT.isVector() && "vector shift count is not a vector type");
+  int64_t ElementBits = VT.getScalarSizeInBits();
+  if (!getVShiftImm(Op, ElementBits, Cnt))
+    return false;
+  if (!isIntrinsic)
+    return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
+  if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
+    Cnt = -Cnt;
+    return true;
+  }
+  return false;
+}
+
 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
                           const ARMSubtarget *ST) {
   EVT VT = N->getValueType(0);
   SDLoc dl(N);
+  int64_t Cnt;
 
   if (!VT.isVector())
     return SDValue();
 
-  // Lower vector shifts on NEON to use VSHL.
-  assert(ST->hasNEON() && "unexpected vector shift");
+  // We essentially have two forms here. Shift by an immediate and shift by a
+  // vector register (there are also shift by a gpr, but that is just handled
+  // with a tablegen pattern). We cannot easily match shift by an immediate in
+  // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
+  // For shifting by a vector, we don't have VSHR, only VSHL (which can be
+  // signed or unsigned, and a negative shift indicates a shift right).
+  if (N->getOpcode() == ISD::SHL) {
+    if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
+      return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
+                         DAG.getConstant(Cnt, dl, MVT::i32));
+    return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
+                       N->getOperand(1));
+  }
 
-  // Left shifts translate directly to the vshiftu intrinsic.
-  if (N->getOpcode() == ISD::SHL)
-    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                       DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl,
-                                       MVT::i32),
-                       N->getOperand(0), N->getOperand(1));
+  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
+         "unexpected vector shift opcode");
 
-  assert((N->getOpcode() == ISD::SRA ||
-          N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
+  if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
+    unsigned VShiftOpc =
+        (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
+    return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
+                       DAG.getConstant(Cnt, dl, MVT::i32));
+  }
 
-  // NEON uses the same intrinsics for both left and right shifts.  For
-  // right shifts, the shift amounts are negative, so negate the vector of
-  // shift amounts.
+  // Other right shifts we don't have operations for (we use a shift left by a
+  // negative number).
   EVT ShiftVT = N->getOperand(1).getValueType();
-  SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
-                                     getZeroVector(ShiftVT, DAG, dl),
-                                     N->getOperand(1));
-  Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
-                             Intrinsic::arm_neon_vshifts :
-                             Intrinsic::arm_neon_vshiftu);
-  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                     DAG.getConstant(vshiftInt, dl, MVT::i32),
-                     N->getOperand(0), NegatedCount);
+  SDValue NegatedCount = DAG.getNode(
+      ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
+  unsigned VShiftOpc =
+      (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
+  return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
 }
 
 static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
@@ -5514,15 +5767,59 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
   if (VT != MVT::i64)
     return SDValue();
 
-  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
+  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||
+          N->getOpcode() == ISD::SHL) &&
          "Unknown shift to lower!");
 
+  unsigned ShOpc = N->getOpcode();
+  if (ST->hasMVEIntegerOps()) {
+    SDValue ShAmt = N->getOperand(1);
+    unsigned ShPartsOpc = ARMISD::LSLL;
+    ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);
+
+    // If the shift amount is greater than 32 then do the default optimisation
+    if (Con && Con->getZExtValue() > 32)
+      return SDValue();
+
+    // Extract the lower 32 bits of the shift amount if it's an i64
+    if (ShAmt->getValueType(0) == MVT::i64)
+      ShAmt = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ShAmt,
+                          DAG.getConstant(0, dl, MVT::i32));
+
+    if (ShOpc == ISD::SRL) {
+      if (!Con)
+        // There is no t2LSRLr instruction so negate and perform an lsll if the
+        // shift amount is in a register, emulating a right shift.
+        ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+                            DAG.getConstant(0, dl, MVT::i32), ShAmt);
+      else
+        // Else generate an lsrl on the immediate shift amount
+        ShPartsOpc = ARMISD::LSRL;
+    } else if (ShOpc == ISD::SRA)
+      ShPartsOpc = ARMISD::ASRL;
+
+    // Lower 32 bits of the destination/source
+    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+                             DAG.getConstant(0, dl, MVT::i32));
+    // Upper 32 bits of the destination/source
+    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+                             DAG.getConstant(1, dl, MVT::i32));
+
+    // Generate the shift operation as computed above
+    Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
+                     ShAmt);
+    // The upper 32 bits come from the second return value of lsll
+    Hi = SDValue(Lo.getNode(), 1);
+    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+  }
+
   // We only lower SRA, SRL of 1 here, all others use generic lowering.
-  if (!isOneConstant(N->getOperand(1)))
+  if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
     return SDValue();
 
   // If we are in thumb mode, we don't have RRX.
-  if (ST->isThumb1Only()) return SDValue();
+  if (ST->isThumb1Only())
+    return SDValue();
 
   // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
@@ -5731,7 +6028,7 @@ static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) {
 }
 
 /// isNEONModifiedImm - Check if the specified splat value corresponds to a
-/// valid vector constant for a NEON instruction with a "modified immediate"
+/// valid vector constant for a NEON or MVE instruction with a "modified immediate"
 /// operand (e.g., VMOV).  If so, return the encoded value.
 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
                                  unsigned SplatBitSize, SelectionDAG &DAG,
@@ -5817,6 +6114,10 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
       break;
     }
 
+    // cmode == 0b1101 is not supported for MVE VMVN
+    if (type == MVEVMVNModImm)
+      return SDValue();
+
     if ((SplatBits & ~0xffffff) == 0 &&
         ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
       // Value = 0x00nnffff: Op=x, Cmode=1101.
@@ -5902,12 +6203,12 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
     }
   }
 
-  if (!ST->hasVFP3())
+  if (!ST->hasVFP3Base())
     return SDValue();
 
   // Use the default (constant pool) lowering for double constants when we have
   // an SP-only FPU
-  if (IsDouble && Subtarget->isFPOnlySP())
+  if (IsDouble && !Subtarget->hasFP64())
     return SDValue();
 
   // Try splatting with a VMOV.f32...
@@ -6383,13 +6684,15 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
     if (SplatUndef.isAllOnesValue())
       return DAG.getUNDEF(VT);
 
-    if (SplatBitSize <= 64) {
+    if ((ST->hasNEON() && SplatBitSize <= 64) ||
+        (ST->hasMVEIntegerOps() && SplatBitSize <= 32)) {
       // Check if an immediate VMOV works.
       EVT VmovVT;
       SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
                                       SplatUndef.getZExtValue(), SplatBitSize,
                                       DAG, dl, VmovVT, VT.is128BitVector(),
                                       VMOVModImm);
+
       if (Val.getNode()) {
         SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
         return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
@@ -6397,10 +6700,10 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
 
       // Try an immediate VMVN.
       uint64_t NegatedImm = (~SplatBits).getZExtValue();
-      Val = isNEONModifiedImm(NegatedImm,
-                                      SplatUndef.getZExtValue(), SplatBitSize,
-                                      DAG, dl, VmovVT, VT.is128BitVector(),
-                                      VMVNModImm);
+      Val = isNEONModifiedImm(
+          NegatedImm, SplatUndef.getZExtValue(), SplatBitSize,
+          DAG, dl, VmovVT, VT.is128BitVector(),
+          ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
       if (Val.getNode()) {
         SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
         return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
@@ -6515,10 +6818,13 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
     }
     if (VT.getVectorElementType().isFloatingPoint()) {
       SmallVector<SDValue, 8> Ops;
+      MVT FVT = VT.getVectorElementType().getSimpleVT();
+      assert(FVT == MVT::f32 || FVT == MVT::f16);
+      MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
       for (unsigned i = 0; i < NumElts; ++i)
-        Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
+        Ops.push_back(DAG.getNode(ISD::BITCAST, dl, IVT,
                                   Op.getOperand(i)));
-      EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+      EVT VecVT = EVT::getVectorVT(*DAG.getContext(), IVT, NumElts);
       SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
       Val = LowerBUILD_VECTOR(Val, DAG, ST);
       if (Val.getNode())
@@ -6544,7 +6850,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
       return shuffle;
   }
 
-  if (VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
+  if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
     // If we haven't found an efficient lowering, try splitting a 128-bit vector
     // into two 64-bit vectors; we might discover a better way to lower it.
     SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
@@ -6799,6 +7105,38 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
   return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
 }
 
+enum ShuffleOpCodes {
+  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+  OP_VREV,
+  OP_VDUP0,
+  OP_VDUP1,
+  OP_VDUP2,
+  OP_VDUP3,
+  OP_VEXT1,
+  OP_VEXT2,
+  OP_VEXT3,
+  OP_VUZPL, // VUZP, left result
+  OP_VUZPR, // VUZP, right result
+  OP_VZIPL, // VZIP, left result
+  OP_VZIPR, // VZIP, right result
+  OP_VTRNL, // VTRN, left result
+  OP_VTRNR  // VTRN, right result
+};
+
+static bool isLegalMVEShuffleOp(unsigned PFEntry) {
+  unsigned OpNum = (PFEntry >> 26) & 0x0F;
+  switch (OpNum) {
+  case OP_COPY:
+  case OP_VREV:
+  case OP_VDUP0:
+  case OP_VDUP1:
+  case OP_VDUP2:
+  case OP_VDUP3:
+    return true;
+  }
+  return false;
+}
+
 /// isShuffleMaskLegal - Targets can use this to indicate that they only
 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
@@ -6820,7 +7158,7 @@ bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
     unsigned Cost = (PFEntry >> 30);
 
-    if (Cost <= 4)
+    if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
       return true;
   }
 
@@ -6828,15 +7166,22 @@ bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
   unsigned Imm, WhichResult;
 
   unsigned EltSize = VT.getScalarSizeInBits();
-  return (EltSize >= 32 ||
-          ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
-          isVREVMask(M, VT, 64) ||
-          isVREVMask(M, VT, 32) ||
-          isVREVMask(M, VT, 16) ||
-          isVEXTMask(M, VT, ReverseVEXT, Imm) ||
-          isVTBLMask(M, VT) ||
-          isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF) ||
-          ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
+  if (EltSize >= 32 ||
+      ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+      isVREVMask(M, VT, 64) ||
+      isVREVMask(M, VT, 32) ||
+      isVREVMask(M, VT, 16))
+    return true;
+  else if (Subtarget->hasNEON() &&
+           (isVEXTMask(M, VT, ReverseVEXT, Imm) ||
+            isVTBLMask(M, VT) ||
+            isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
+    return true;
+  else if (Subtarget->hasNEON() && (VT == MVT::v8i16 || VT == MVT::v16i8) &&
+           isReverseMask(M, VT))
+    return true;
+  else
+    return false;
 }
 
 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
@@ -6848,24 +7193,6 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
 
-  enum {
-    OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
-    OP_VREV,
-    OP_VDUP0,
-    OP_VDUP1,
-    OP_VDUP2,
-    OP_VDUP3,
-    OP_VEXT1,
-    OP_VEXT2,
-    OP_VEXT3,
-    OP_VUZPL, // VUZP, left result
-    OP_VUZPR, // VUZP, right result
-    OP_VZIPL, // VZIP, left result
-    OP_VZIPR, // VZIP, right result
-    OP_VTRNL, // VTRN, left result
-    OP_VTRNR  // VTRN, right result
-  };
-
   if (OpNum == OP_COPY) {
     if (LHSID == (1*9+2)*9+3) return LHS;
     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
@@ -6955,7 +7282,8 @@ static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
                      DAG.getConstant(ExtractNum, DL, MVT::i32));
 }
 
-static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
+                                   const ARMSubtarget *ST) {
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
   SDLoc dl(Op);
@@ -6999,9 +7327,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
                          DAG.getConstant(Lane, dl, MVT::i32));
     }
 
-    bool ReverseVEXT;
-    unsigned Imm;
-    if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
+    bool ReverseVEXT = false;
+    unsigned Imm = 0;
+    if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
       if (ReverseVEXT)
         std::swap(V1, V2);
       return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
@@ -7015,7 +7343,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
     if (isVREVMask(ShuffleMask, VT, 16))
       return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
 
-    if (V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
+    if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
       return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
                          DAG.getConstant(Imm, dl, MVT::i32));
     }
@@ -7025,14 +7353,16 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
     // source operands and with masks corresponding to both results of one of
     // these operations, DAG memoization will ensure that a single node is
     // used for both shuffles.
-    unsigned WhichResult;
-    bool isV_UNDEF;
-    if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
-            ShuffleMask, VT, WhichResult, isV_UNDEF)) {
-      if (isV_UNDEF)
-        V2 = V1;
-      return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
-          .getValue(WhichResult);
+    unsigned WhichResult = 0;
+    bool isV_UNDEF = false;
+    if (ST->hasNEON()) {
+      if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
+              ShuffleMask, VT, WhichResult, isV_UNDEF)) {
+        if (isV_UNDEF)
+          V2 = V1;
+        return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
+            .getValue(WhichResult);
+      }
     }
 
     // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
@@ -7050,7 +7380,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
     // ->
     //   concat(VZIP(v1, v2):0, :1)
     //
-    if (V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
+    if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
       SDValue SubV1 = V1->getOperand(0);
       SDValue SubV2 = V1->getOperand(1);
       EVT SubVT = SubV1.getValueType();
@@ -7092,8 +7422,18 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
     unsigned Cost = (PFEntry >> 30);
 
-    if (Cost <= 4)
-      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+    if (Cost <= 4) {
+      if (ST->hasNEON())
+        return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+      else if (isLegalMVEShuffleOp(PFEntry)) {
+        unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+        unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
+        unsigned PFEntryLHS = PerfectShuffleTable[LHSID];
+        unsigned PFEntryRHS = PerfectShuffleTable[RHSID];
+        if (isLegalMVEShuffleOp(PFEntryLHS) && isLegalMVEShuffleOp(PFEntryRHS))
+          return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+      }
+    }
   }
 
   // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
@@ -7118,22 +7458,50 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
     return DAG.getNode(ISD::BITCAST, dl, VT, Val);
   }
 
-  if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
+  if (ST->hasNEON() && (VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
     return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
 
-  if (VT == MVT::v8i8)
+  if (ST->hasNEON() && VT == MVT::v8i8)
     if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
       return NewOp;
 
   return SDValue();
 }
 
-static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::
+LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
   // INSERT_VECTOR_ELT is legal only for immediate indexes.
   SDValue Lane = Op.getOperand(2);
   if (!isa<ConstantSDNode>(Lane))
     return SDValue();
 
+  SDValue Elt = Op.getOperand(1);
+  EVT EltVT = Elt.getValueType();
+  if (getTypeAction(*DAG.getContext(), EltVT) ==
+      TargetLowering::TypePromoteFloat) {
+    // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
+    // but the type system will try to do that if we don't intervene.
+    // Reinterpret any such vector-element insertion as one with the
+    // corresponding integer types.
+
+    SDLoc dl(Op);
+
+    EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
+    assert(getTypeAction(*DAG.getContext(), IEltVT) !=
+           TargetLowering::TypePromoteFloat);
+
+    SDValue VecIn = Op.getOperand(0);
+    EVT VecVT = VecIn.getValueType();
+    EVT IVecVT = EVT::getVectorVT(*DAG.getContext(), IEltVT,
+                                  VecVT.getVectorNumElements());
+
+    SDValue IElt = DAG.getNode(ISD::BITCAST, dl, IEltVT, Elt);
+    SDValue IVecIn = DAG.getNode(ISD::BITCAST, dl, IVecVT, VecIn);
+    SDValue IVecOut = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVecVT,
+                                  IVecIn, IElt, Lane);
+    return DAG.getNode(ISD::BITCAST, dl, VecVT, IVecOut);
+  }
+
   return Op;
 }
 
@@ -7809,8 +8177,7 @@ ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
     return SDValue();
 
   const auto &ST = static_cast<const ARMSubtarget&>(DAG.getSubtarget());
-  const auto &MF = DAG.getMachineFunction();
-  const bool MinSize = MF.getFunction().optForMinSize();
+  const bool MinSize = ST.hasMinSize();
   const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
                                       : ST.hasDivideInARMMode();
 
@@ -8063,7 +8430,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SETCCCARRY:    return LowerSETCCCARRY(Op, DAG);
   case ISD::ConstantFP:    return LowerConstantFP(Op, DAG, Subtarget);
   case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG, Subtarget);
-  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
   case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
   case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
@@ -8149,6 +8516,7 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
     break;
   case ISD::SRL:
   case ISD::SRA:
+  case ISD::SHL:
     Res = Expand64BitShift(N, DAG, Subtarget);
     break;
   case ISD::SREM:
@@ -8175,6 +8543,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
     return;
   case ISD::INTRINSIC_WO_CHAIN:
     return ReplaceLongIntrinsic(N, Results, DAG);
+  case ISD::ABS:
+     lowerABS(N, Results, DAG);
+     return ;
+
   }
   if (Res.getNode())
     Results.push_back(Res);
@@ -8980,7 +9352,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
 
   // Load an immediate to varEnd.
   unsigned varEnd = MRI.createVirtualRegister(TRC);
-  if (Subtarget->useMovt(*MF)) {
+  if (Subtarget->useMovt()) {
     unsigned Vtmp = varEnd;
     if ((LoopSize & 0xFFFF0000) != 0)
       Vtmp = MRI.createVirtualRegister(TRC);
@@ -9003,18 +9375,23 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
     if (Align == 0)
       Align = MF->getDataLayout().getTypeAllocSize(C->getType());
     unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+    MachineMemOperand *CPMMO =
+        MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
+                                 MachineMemOperand::MOLoad, 4, 4);
 
     if (IsThumb)
       BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
           .addReg(varEnd, RegState::Define)
           .addConstantPoolIndex(Idx)
-          .add(predOps(ARMCC::AL));
+          .add(predOps(ARMCC::AL))
+          .addMemOperand(CPMMO);
     else
       BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
           .addReg(varEnd, RegState::Define)
           .addConstantPoolIndex(Idx)
           .addImm(0)
-          .add(predOps(ARMCC::AL));
+          .add(predOps(ARMCC::AL))
+          .addMemOperand(CPMMO);
   }
   BB->addSuccessor(loopMBB);
 
@@ -9262,7 +9639,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
         .add(MI.getOperand(2))  // Rn
         .add(MI.getOperand(3))  // PredImm
         .add(MI.getOperand(4))  // PredReg
-        .add(MI.getOperand(0)); // Rt
+        .add(MI.getOperand(0))  // Rt
+        .cloneMemRefs(MI);
     MI.eraseFromParent();
     return BB;
   }
@@ -10372,6 +10750,22 @@ static SDValue PerformAddeSubeCombine(SDNode *N,
   return SDValue();
 }
 
+static SDValue PerformABSCombine(SDNode *N,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const ARMSubtarget *Subtarget) {
+  SDValue res;
+  SelectionDAG &DAG = DCI.DAG;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  if (TLI.isOperationLegal(N->getOpcode(), N->getValueType(0)))
+    return SDValue();
+
+  if (!TLI.expandABS(N, res, DAG))
+      return SDValue();
+
+  return res;
+}
+
 /// PerformADDECombine - Target-specific dag combine transform from
 /// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
 /// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
@@ -10419,11 +10813,28 @@ ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
   if (Level == BeforeLegalizeTypes)
     return true;
 
-  if (Subtarget->isThumb() && Subtarget->isThumb1Only())
+  if (N->getOpcode() != ISD::SHL)
     return true;
 
-  if (N->getOpcode() != ISD::SHL)
+  if (Subtarget->isThumb1Only()) {
+    // Avoid making expensive immediates by commuting shifts. (This logic
+    // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted
+    // for free.)
+    if (N->getOpcode() != ISD::SHL)
+      return true;
+    SDValue N1 = N->getOperand(0);
+    if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
+        N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
+      return true;
+    if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
+      if (Const->getAPIntValue().ult(256))
+        return false;
+      if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
+          Const->getAPIntValue().sgt(-256))
+        return false;
+    }
     return true;
+  }
 
   // Turn off commute-with-shift transform after legalization, so it doesn't
   // conflict with PerformSHLSimplify.  (We could try to detect when
@@ -10432,9 +10843,8 @@ ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
   return false;
 }
 
-bool
-ARMTargetLowering::shouldFoldShiftPairToMask(const SDNode *N,
-                                             CombineLevel Level) const {
+bool ARMTargetLowering::shouldFoldConstantShiftPairToMask(
+    const SDNode *N, CombineLevel Level) const {
   if (!Subtarget->isThumb1Only())
     return true;
 
@@ -10444,6 +10854,15 @@ ARMTargetLowering::shouldFoldShiftPairToMask(const SDNode *N,
   return false;
 }
 
+bool ARMTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
+  if (!Subtarget->hasNEON()) {
+    if (Subtarget->isThumb1Only())
+      return VT.getScalarSizeInBits() <= 32;
+    return true;
+  }
+  return VT.isScalarInteger();
+}
+
 static SDValue PerformSHLSimplify(SDNode *N,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 const ARMSubtarget *ST) {
@@ -10830,7 +11249,7 @@ static SDValue PerformANDCombine(SDNode *N,
   APInt SplatBits, SplatUndef;
   unsigned SplatBitSize;
   bool HasAnyUndefs;
-  if (BVN &&
+  if (BVN && Subtarget->hasNEON() &&
       BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
     if (SplatBitSize <= 64) {
       EVT VbicVT;
@@ -11308,7 +11727,7 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
                                      const ARMSubtarget *Subtarget) {
   // vmovrrd(vmovdrr x, y) -> x,y
   SDValue InDouble = N->getOperand(0);
-  if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP())
+  if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
     return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
 
   // vmovrrd(load f64) -> (load i32), (load i32)
@@ -11329,9 +11748,11 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
 
     SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
                                     DAG.getConstant(4, DL, MVT::i32));
-    SDValue NewLD2 = DAG.getLoad(
-        MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(),
-        std::min(4U, LD->getAlignment() / 2), LD->getMemOperand()->getFlags());
+
+    SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
+                                 LD->getPointerInfo().getWithOffset(4),
+                                 std::min(4U, LD->getAlignment()),
+                                 LD->getMemOperand()->getFlags());
 
     DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
     if (DCI.DAG.getDataLayout().isBigEndian())
@@ -11922,10 +12343,14 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
 
 /// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
 static SDValue PerformVDUPCombine(SDNode *N,
-                                  TargetLowering::DAGCombinerInfo &DCI) {
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const ARMSubtarget *Subtarget) {
   SelectionDAG &DAG = DCI.DAG;
   SDValue Op = N->getOperand(0);
 
+  if (!Subtarget->hasNEON())
+    return SDValue();
+
   // Match VDUP(LOAD) -> VLD1DUP.
   // We match this pattern here rather than waiting for isel because the
   // transform is only legal for unindexed loads.
@@ -12132,11 +12557,11 @@ static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG,
   MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
   uint32_t IntBits = IntTy.getSizeInBits();
   unsigned NumLanes = Op.getValueType().getVectorNumElements();
-  if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
+  if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
     // These instructions only exist converting from f32 to i32. We can handle
     // smaller integers by generating an extra truncate, but larger ones would
-    // be lossy. We also can't handle more then 4 lanes, since these intructions
-    // only support v2i32/v4i32 types.
+    // be lossy. We also can't handle anything other than 2 or 4 lanes, since
+    // these intructions only support v2i32/v4i32 types.
     return SDValue();
   }
 
@@ -12190,11 +12615,11 @@ static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG,
   MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
   uint32_t IntBits = IntTy.getSizeInBits();
   unsigned NumLanes = Op.getValueType().getVectorNumElements();
-  if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
+  if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
     // These instructions only exist converting from i32 to f32. We can handle
     // smaller integers by generating an extra extend, but larger ones would
-    // be lossy. We also can't handle more then 4 lanes, since these intructions
-    // only support v2i32/v4i32 types.
+    // be lossy. We also can't handle anything other than 2 or 4 lanes, since
+    // these intructions only support v2i32/v4i32 types.
     return SDValue();
   }
 
@@ -12220,58 +12645,6 @@ static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG,
                      ConvInput, DAG.getConstant(C, dl, MVT::i32));
 }
 
-/// Getvshiftimm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift operation, where all the elements of the
-/// build_vector must have the same constant integer value.
-static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
-  // Ignore bit_converts.
-  while (Op.getOpcode() == ISD::BITCAST)
-    Op = Op.getOperand(0);
-  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
-  APInt SplatBits, SplatUndef;
-  unsigned SplatBitSize;
-  bool HasAnyUndefs;
-  if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
-                                      HasAnyUndefs, ElementBits) ||
-      SplatBitSize > ElementBits)
-    return false;
-  Cnt = SplatBits.getSExtValue();
-  return true;
-}
-
-/// isVShiftLImm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift left operation.  That value must be in the range:
-///   0 <= Value < ElementBits for a left shift; or
-///   0 <= Value <= ElementBits for a long left shift.
-static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
-  assert(VT.isVector() && "vector shift count is not a vector type");
-  int64_t ElementBits = VT.getScalarSizeInBits();
-  if (! getVShiftImm(Op, ElementBits, Cnt))
-    return false;
-  return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
-}
-
-/// isVShiftRImm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift right operation.  For a shift opcode, the value
-/// is positive, but for an intrinsic the value count must be negative. The
-/// absolute value must be in the range:
-///   1 <= |Value| <= ElementBits for a right shift; or
-///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
-static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
-                         int64_t &Cnt) {
-  assert(VT.isVector() && "vector shift count is not a vector type");
-  int64_t ElementBits = VT.getScalarSizeInBits();
-  if (! getVShiftImm(Op, ElementBits, Cnt))
-    return false;
-  if (!isIntrinsic)
-    return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
-  if (Cnt >= -(isNarrow ? ElementBits/2 : ElementBits) && Cnt <= -1) {
-    Cnt = -Cnt;
-    return true;
-  }
-  return false;
-}
-
 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
 static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
   unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
@@ -12307,12 +12680,12 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
     case Intrinsic::arm_neon_vshifts:
     case Intrinsic::arm_neon_vshiftu:
       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
-        VShiftOpc = ARMISD::VSHL;
+        VShiftOpc = ARMISD::VSHLIMM;
         break;
       }
       if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
-        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
-                     ARMISD::VSHRs : ARMISD::VSHRu);
+        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
+                                                          : ARMISD::VSHRuIMM);
         break;
       }
       return SDValue();
@@ -12357,29 +12730,41 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
       // Opcode already set above.
       break;
     case Intrinsic::arm_neon_vrshifts:
-      VShiftOpc = ARMISD::VRSHRs; break;
+      VShiftOpc = ARMISD::VRSHRsIMM;
+      break;
     case Intrinsic::arm_neon_vrshiftu:
-      VShiftOpc = ARMISD::VRSHRu; break;
+      VShiftOpc = ARMISD::VRSHRuIMM;
+      break;
     case Intrinsic::arm_neon_vrshiftn:
-      VShiftOpc = ARMISD::VRSHRN; break;
+      VShiftOpc = ARMISD::VRSHRNIMM;
+      break;
     case Intrinsic::arm_neon_vqshifts:
-      VShiftOpc = ARMISD::VQSHLs; break;
+      VShiftOpc = ARMISD::VQSHLsIMM;
+      break;
     case Intrinsic::arm_neon_vqshiftu:
-      VShiftOpc = ARMISD::VQSHLu; break;
+      VShiftOpc = ARMISD::VQSHLuIMM;
+      break;
     case Intrinsic::arm_neon_vqshiftsu:
-      VShiftOpc = ARMISD::VQSHLsu; break;
+      VShiftOpc = ARMISD::VQSHLsuIMM;
+      break;
     case Intrinsic::arm_neon_vqshiftns:
-      VShiftOpc = ARMISD::VQSHRNs; break;
+      VShiftOpc = ARMISD::VQSHRNsIMM;
+      break;
     case Intrinsic::arm_neon_vqshiftnu:
-      VShiftOpc = ARMISD::VQSHRNu; break;
+      VShiftOpc = ARMISD::VQSHRNuIMM;
+      break;
     case Intrinsic::arm_neon_vqshiftnsu:
-      VShiftOpc = ARMISD::VQSHRNsu; break;
+      VShiftOpc = ARMISD::VQSHRNsuIMM;
+      break;
     case Intrinsic::arm_neon_vqrshiftns:
-      VShiftOpc = ARMISD::VQRSHRNs; break;
+      VShiftOpc = ARMISD::VQRSHRNsIMM;
+      break;
     case Intrinsic::arm_neon_vqrshiftnu:
-      VShiftOpc = ARMISD::VQRSHRNu; break;
+      VShiftOpc = ARMISD::VQRSHRNuIMM;
+      break;
     case Intrinsic::arm_neon_vqrshiftnsu:
-      VShiftOpc = ARMISD::VQRSHRNsu; break;
+      VShiftOpc = ARMISD::VQRSHRNsuIMM;
+      break;
     }
 
     SDLoc dl(N);
@@ -12393,9 +12778,9 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
     unsigned VShiftOpc = 0;
 
     if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
-      VShiftOpc = ARMISD::VSLI;
+      VShiftOpc = ARMISD::VSLIIMM;
     else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
-      VShiftOpc = ARMISD::VSRI;
+      VShiftOpc = ARMISD::VSRIIMM;
     else {
       llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
     }
@@ -12420,8 +12805,10 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
 /// combining instead of DAG legalizing because the build_vectors for 64-bit
 /// vector element shift counts are generally not legal, and it is hard to see
 /// their values after they get legalized to loads from a constant pool.
-static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue PerformShiftCombine(SDNode *N,
+                                   TargetLowering::DAGCombinerInfo &DCI,
                                    const ARMSubtarget *ST) {
+  SelectionDAG &DAG = DCI.DAG;
   EVT VT = N->getValueType(0);
   if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
     // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
@@ -12436,12 +12823,47 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
     }
   }
 
+  if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
+      N->getOperand(0)->getOpcode() == ISD::AND &&
+      N->getOperand(0)->hasOneUse()) {
+    if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+      return SDValue();
+    // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't
+    // usually show up because instcombine prefers to canonicalize it to
+    // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come
+    // out of GEP lowering in some cases.
+    SDValue N0 = N->getOperand(0);
+    ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    if (!ShiftAmtNode)
+      return SDValue();
+    uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
+    ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+    if (!AndMaskNode)
+      return SDValue();
+    uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
+    // Don't transform uxtb/uxth.
+    if (AndMask == 255 || AndMask == 65535)
+      return SDValue();
+    if (isMask_32(AndMask)) {
+      uint32_t MaskedBits = countLeadingZeros(AndMask);
+      if (MaskedBits > ShiftAmt) {
+        SDLoc DL(N);
+        SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
+                                  DAG.getConstant(MaskedBits, DL, MVT::i32));
+        return DAG.getNode(
+            ISD::SRL, DL, MVT::i32, SHL,
+            DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
+      }
+    }
+  }
+
   // Nothing to be done for scalar shifts.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (!VT.isVector() || !TLI.isTypeLegal(VT))
     return SDValue();
+  if (ST->hasMVEIntegerOps() && VT == MVT::v2i64)
+    return SDValue();
 
-  assert(ST->hasNEON() && "unexpected vector shift");
   int64_t Cnt;
 
   switch (N->getOpcode()) {
@@ -12450,7 +12872,7 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
   case ISD::SHL:
     if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
       SDLoc dl(N);
-      return DAG.getNode(ARMISD::VSHL, dl, VT, N->getOperand(0),
+      return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
                          DAG.getConstant(Cnt, dl, MVT::i32));
     }
     break;
@@ -12458,8 +12880,8 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
   case ISD::SRA:
   case ISD::SRL:
     if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
-      unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
-                            ARMISD::VSHRs : ARMISD::VSHRu);
+      unsigned VShiftOpc =
+          (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
       SDLoc dl(N);
       return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
                          DAG.getConstant(Cnt, dl, MVT::i32));
@@ -12606,6 +13028,45 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
   return V;
 }
 
+static SDValue PerformHWLoopCombine(SDNode *N,
+                                    TargetLowering::DAGCombinerInfo &DCI,
+                                    const ARMSubtarget *ST) {
+  // Look for (brcond (xor test.set.loop.iterations, -1)
+  SDValue CC = N->getOperand(1);
+  unsigned Opc = CC->getOpcode();
+  SDValue Int;
+
+  if ((Opc == ISD::XOR || Opc == ISD::SETCC) &&
+      (CC->getOperand(0)->getOpcode() == ISD::INTRINSIC_W_CHAIN)) {
+
+    assert((isa<ConstantSDNode>(CC->getOperand(1)) &&
+            cast<ConstantSDNode>(CC->getOperand(1))->isOne()) &&
+            "Expected to compare against 1");
+
+    Int = CC->getOperand(0);
+  } else if (CC->getOpcode() == ISD::INTRINSIC_W_CHAIN)
+    Int = CC;
+  else 
+    return SDValue();
+
+  unsigned IntOp = cast<ConstantSDNode>(Int.getOperand(1))->getZExtValue();
+  if (IntOp != Intrinsic::test_set_loop_iterations)
+    return SDValue();
+
+  SDLoc dl(Int);
+  SDValue Chain = N->getOperand(0);
+  SDValue Elements = Int.getOperand(2);
+  SDValue ExitBlock = N->getOperand(2);
+
+  // TODO: Once we start supporting tail predication, we can add another
+  // operand to WLS for the number of elements processed in a vector loop.
+
+  SDValue Ops[] = { Chain, Elements, ExitBlock };
+  SDValue Res = DCI.DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
+  DCI.DAG.ReplaceAllUsesOfValueWith(Int.getValue(1), Int.getOperand(0));
+  return Res;
+}
+
 /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
 SDValue
 ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
@@ -12779,15 +13240,21 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
   // On Thumb1, the DAG above may be further combined if z is a power of 2
   // (z == 2 ^ K).
   // CMOV (SUBS x, y), z, !=, (SUBS x, y):1 ->
-  //       merge t3, t4
-  // where t1 = (SUBCARRY (SUB x, y), z, 0)
-  //       t2 = (SUBCARRY (SUB x, y), t1:0, t1:1)
-  //       t3 = if K != 0 then (SHL t2:0, K) else t2:0
-  //       t4 = (SUB 1, t2:1)   [ we want a carry, not a borrow ]
+  // t1 = (USUBO (SUB x, y), 1)
+  // t2 = (SUBCARRY (SUB x, y), t1:0, t1:1)
+  // Result = if K != 0 then (SHL t2:0, K) else t2:0
+  //
+  // This also handles the special case of comparing against zero; it's
+  // essentially, the same pattern, except there's no SUBS:
+  // CMOV x, z, !=, (CMPZ x, 0) ->
+  // t1 = (USUBO x, 1)
+  // t2 = (SUBCARRY x, t1:0, t1:1)
+  // Result = if K != 0 then (SHL t2:0, K) else t2:0
   const APInt *TrueConst;
   if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
-      (FalseVal.getOpcode() == ARMISD::SUBS) &&
-      (FalseVal.getOperand(0) == LHS) && (FalseVal.getOperand(1) == RHS) &&
+      ((FalseVal.getOpcode() == ARMISD::SUBS &&
+        FalseVal.getOperand(0) == LHS && FalseVal.getOperand(1) == RHS) ||
+       (FalseVal == LHS && isNullConstant(RHS))) &&
       (TrueConst = isPowerOf2Constant(TrueVal))) {
     SDVTList VTs = DAG.getVTList(VT, MVT::i32);
     unsigned ShiftAmount = TrueConst->logBase2();
@@ -12795,10 +13262,6 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
       TrueVal = DAG.getConstant(1, dl, VT);
     SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
     Res = DAG.getNode(ISD::SUBCARRY, dl, VTs, FalseVal, Subc, Subc.getValue(1));
-    // Make it a carry, not a borrow.
-    SDValue Carry = DAG.getNode(
-        ISD::SUB, dl, VT, DAG.getConstant(1, dl, MVT::i32), Res.getValue(1));
-    Res = DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Res, Carry);
 
     if (ShiftAmount)
       Res = DAG.getNode(ISD::SHL, dl, VT, Res,
@@ -12826,6 +13289,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   switch (N->getOpcode()) {
   default: break;
+  case ISD::ABS:        return PerformABSCombine(N, DCI, Subtarget);
   case ARMISD::ADDE:    return PerformADDECombine(N, DCI, Subtarget);
   case ARMISD::UMLAL:   return PerformUMLALCombine(N, DCI.DAG, Subtarget);
   case ISD::ADD:        return PerformADDCombine(N, DCI, Subtarget);
@@ -12834,6 +13298,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);
   case ISD::XOR:        return PerformXORCombine(N, DCI, Subtarget);
   case ISD::AND:        return PerformANDCombine(N, DCI, Subtarget);
+  case ISD::BRCOND:     return PerformHWLoopCombine(N, DCI, Subtarget);
   case ARMISD::ADDC:
   case ARMISD::SUBC:    return PerformAddcSubcCombine(N, DCI, Subtarget);
   case ARMISD::SUBE:    return PerformAddeSubeCombine(N, DCI, Subtarget);
@@ -12845,7 +13310,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
   case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
   case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
-  case ARMISD::VDUP: return PerformVDUPCombine(N, DCI);
+  case ARMISD::VDUP: return PerformVDUPCombine(N, DCI, Subtarget);
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
     return PerformVCVTCombine(N, DCI.DAG, Subtarget);
@@ -12854,7 +13319,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
   case ISD::SHL:
   case ISD::SRA:
-  case ISD::SRL:        return PerformShiftCombine(N, DCI.DAG, Subtarget);
+  case ISD::SRL:
+    return PerformShiftCombine(N, DCI, Subtarget);
   case ISD::SIGN_EXTEND:
   case ISD::ZERO_EXTEND:
   case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
@@ -12957,9 +13423,9 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
   return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
 }
 
-bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
-                                                       unsigned,
-                                                       unsigned,
+bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
+                                                       unsigned Alignment,
+                                                       MachineMemOperand::Flags,
                                                        bool *Fast) const {
   // Depends what it gets converted into if the type is weird.
   if (!VT.isSimple())
@@ -12967,23 +13433,18 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
 
   // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
   bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
+  auto Ty = VT.getSimpleVT().SimpleTy;
 
-  switch (VT.getSimpleVT().SimpleTy) {
-  default:
-    return false;
-  case MVT::i8:
-  case MVT::i16:
-  case MVT::i32: {
+  if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
     // Unaligned access can use (for example) LRDB, LRDH, LDR
     if (AllowsUnaligned) {
       if (Fast)
         *Fast = Subtarget->hasV7Ops();
       return true;
     }
-    return false;
   }
-  case MVT::f64:
-  case MVT::v2f64: {
+
+  if (Ty == MVT::f64 || Ty == MVT::v2f64) {
     // For any little-endian targets with neon, we can support unaligned ld/st
     // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
     // A big-endian target may also explicitly support unaligned accesses
@@ -12992,9 +13453,54 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
         *Fast = true;
       return true;
     }
-    return false;
   }
+
+  if (!Subtarget->hasMVEIntegerOps())
+    return false;
+  if (Ty != MVT::v16i8 && Ty != MVT::v8i16 && Ty != MVT::v8f16 &&
+      Ty != MVT::v4i32 && Ty != MVT::v4f32 && Ty != MVT::v2i64 &&
+      Ty != MVT::v2f64 &&
+      // These are for truncated stores
+      Ty != MVT::v4i8 && Ty != MVT::v8i8 && Ty != MVT::v4i16)
+    return false;
+
+  if (Subtarget->isLittle()) {
+    // In little-endian MVE, the store instructions VSTRB.U8,
+    // VSTRH.U16 and VSTRW.U32 all store the vector register in
+    // exactly the same format, and differ only in the range of
+    // their immediate offset field and the required alignment.
+    //
+    // In particular, VSTRB.U8 can store a vector at byte alignment.
+    // So at this stage we can simply say that loads/stores of all
+    // 128-bit wide vector types are permitted at any alignment,
+    // because we know at least _one_ instruction can manage that.
+    //
+    // Later on we might find that some of those loads are better
+    // generated as VLDRW.U32 if alignment permits, to take
+    // advantage of the larger immediate range. But for the moment,
+    // all that matters is that if we don't lower the load then
+    // _some_ instruction can handle it.
+    if (Fast)
+      *Fast = true;
+    return true;
+  } else {
+    // In big-endian MVE, those instructions aren't so similar
+    // after all, because they reorder the bytes of the vector
+    // differently. So this time we can only store a particular
+    // kind of vector if its alignment is at least the element
+    // type. And we can't store vectors of i64 or f64 at all
+    // without having to do some postprocessing, because there's
+    // no VSTRD.U64.
+    if (Ty == MVT::v16i8 ||
+        ((Ty == MVT::v8i16 || Ty == MVT::v8f16) && Alignment >= 2) ||
+        ((Ty == MVT::v4i32 || Ty == MVT::v4f32) && Alignment >= 4)) {
+      if (Fast)
+        *Fast = true;
+      return true;
+    }
   }
+
+  return false;
 }
 
 static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
@@ -13003,24 +13509,24 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
           (DstAlign == 0 || DstAlign % AlignCheck == 0));
 }
 
-EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
-                                           unsigned DstAlign, unsigned SrcAlign,
-                                           bool IsMemset, bool ZeroMemset,
-                                           bool MemcpyStrSrc,
-                                           MachineFunction &MF) const {
-  const Function &F = MF.getFunction();
-
+EVT ARMTargetLowering::getOptimalMemOpType(
+    uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+    bool ZeroMemset, bool MemcpyStrSrc,
+    const AttributeList &FuncAttributes) const {
   // See if we can use NEON instructions for this...
   if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() &&
-      !F.hasFnAttribute(Attribute::NoImplicitFloat)) {
+      !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
     bool Fast;
     if (Size >= 16 &&
         (memOpAlign(SrcAlign, DstAlign, 16) ||
-         (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) {
+         (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1,
+                                         MachineMemOperand::MONone, &Fast) &&
+          Fast))) {
       return MVT::v2f64;
     } else if (Size >= 8 &&
                (memOpAlign(SrcAlign, DstAlign, 8) ||
-                (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) &&
+                (allowsMisalignedMemoryAccesses(
+                     MVT::f64, 0, 1, MachineMemOperand::MONone, &Fast) &&
                  Fast))) {
       return MVT::f64;
     }
@@ -13089,6 +13595,46 @@ bool ARMTargetLowering::isFNegFree(EVT VT) const {
   return false;
 }
 
+/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
+/// of the vector elements.
+static bool areExtractExts(Value *Ext1, Value *Ext2) {
+  auto areExtDoubled = [](Instruction *Ext) {
+    return Ext->getType()->getScalarSizeInBits() ==
+           2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
+  };
+
+  if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
+      !match(Ext2, m_ZExtOrSExt(m_Value())) ||
+      !areExtDoubled(cast<Instruction>(Ext1)) ||
+      !areExtDoubled(cast<Instruction>(Ext2)))
+    return false;
+
+  return true;
+}
+
+/// Check if sinking \p I's operands to I's basic block is profitable, because
+/// the operands can be folded into a target instruction, e.g.
+/// sext/zext can be folded into vsubl.
+bool ARMTargetLowering::shouldSinkOperands(Instruction *I,
+                                           SmallVectorImpl<Use *> &Ops) const {
+  if (!Subtarget->hasNEON() || !I->getType()->isVectorTy())
+    return false;
+
+  switch (I->getOpcode()) {
+  case Instruction::Sub:
+  case Instruction::Add: {
+    if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
+      return false;
+    Ops.push_back(&I->getOperandUse(0));
+    Ops.push_back(&I->getOperandUse(1));
+    return true;
+  }
+  default:
+    return false;
+  }
+  return false;
+}
+
 bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
   EVT VT = ExtVal.getValueType();
 
@@ -13105,7 +13651,7 @@ bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
 
   SDNode *U = *ExtVal->use_begin();
   if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
-       U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL))
+       U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
     return false;
 
   return true;
@@ -13142,7 +13688,6 @@ static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
 
   unsigned Scale = 1;
   switch (VT.getSimpleVT().SimpleTy) {
-  default: return false;
   case MVT::i1:
   case MVT::i8:
     // Scale == 1;
@@ -13151,7 +13696,8 @@ static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
     // Scale == 2;
     Scale = 2;
     break;
-  case MVT::i32:
+  default:
+    // On thumb1 we load most things (i32, i64, floats, etc) with a LDR
     // Scale == 4;
     Scale = 4;
     break;
@@ -13159,38 +13705,58 @@ static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
 
   if ((V & (Scale - 1)) != 0)
     return false;
-  V /= Scale;
-  return V == (V & ((1LL << 5) - 1));
+  return isUInt<5>(V / Scale);
 }
 
 static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
                                       const ARMSubtarget *Subtarget) {
-  bool isNeg = false;
+  if (!VT.isInteger() && !VT.isFloatingPoint())
+    return false;
+  if (VT.isVector() && Subtarget->hasNEON())
+    return false;
+  if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
+      !Subtarget->hasMVEFloatOps())
+    return false;
+
+  bool IsNeg = false;
   if (V < 0) {
-    isNeg = true;
-    V = - V;
+    IsNeg = true;
+    V = -V;
   }
 
-  switch (VT.getSimpleVT().SimpleTy) {
-  default: return false;
-  case MVT::i1:
-  case MVT::i8:
-  case MVT::i16:
-  case MVT::i32:
-    // + imm12 or - imm8
-    if (isNeg)
-      return V == (V & ((1LL << 8) - 1));
-    return V == (V & ((1LL << 12) - 1));
-  case MVT::f32:
-  case MVT::f64:
-    // Same as ARM mode. FIXME: NEON?
-    if (!Subtarget->hasVFP2())
-      return false;
-    if ((V & 3) != 0)
+  unsigned NumBytes = std::max(VT.getSizeInBits() / 8, 1U);
+
+  // MVE: size * imm7
+  if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
+    switch (VT.getSimpleVT().getVectorElementType().SimpleTy) {
+    case MVT::i32:
+    case MVT::f32:
+      return isShiftedUInt<7,2>(V);
+    case MVT::i16:
+    case MVT::f16:
+      return isShiftedUInt<7,1>(V);
+    case MVT::i8:
+      return isUInt<7>(V);
+    default:
       return false;
-    V >>= 2;
-    return V == (V & ((1LL << 8) - 1));
+    }
   }
+
+  // half VLDR: 2 * imm8
+  if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
+    return isShiftedUInt<8, 1>(V);
+  // VLDR and LDRD: 4 * imm8
+  if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
+    return isShiftedUInt<8, 2>(V);
+
+  if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
+    // + imm12 or - imm8
+    if (IsNeg)
+      return isUInt<8>(V);
+    return isUInt<12>(V);
+  }
+
+  return false;
 }
 
 /// isLegalAddressImmediate - Return true if the integer value can be used
@@ -13218,18 +13784,15 @@ static bool isLegalAddressImmediate(int64_t V, EVT VT,
   case MVT::i8:
   case MVT::i32:
     // +- imm12
-    return V == (V & ((1LL << 12) - 1));
+    return isUInt<12>(V);
   case MVT::i16:
     // +- imm8
-    return V == (V & ((1LL << 8) - 1));
+    return isUInt<8>(V);
   case MVT::f32:
   case MVT::f64:
-    if (!Subtarget->hasVFP2()) // FIXME: NEON?
+    if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
       return false;
-    if ((V & 3) != 0)
-      return false;
-    V >>= 2;
-    return V == (V & ((1LL << 8) - 1));
+    return isShiftedUInt<8, 2>(V);
   }
 }
 
@@ -13649,13 +14212,13 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     EVT VT = Op.getValueType();
     const unsigned DstSz = VT.getScalarSizeInBits();
     const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
+    (void)SrcSz;
     assert(SrcSz == Known.getBitWidth());
     assert(DstSz > SrcSz);
     if (Op.getOpcode() == ARMISD::VGETLANEs)
       Known = Known.sext(DstSz);
     else {
-      Known = Known.zext(DstSz);
-      Known.Zero.setBitsFrom(SrcSz);
+      Known = Known.zext(DstSz, true /* extended bits are known zero */);
     }
     assert(DstSz == Known.getBitWidth());
     break;
@@ -13790,7 +14353,7 @@ const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
   // Although we are correct (we are free to emit anything, without
   // constraints), we might break use cases that would expect us to be more
   // efficient and emit something else.
-  if (!Subtarget->hasVFP2())
+  if (!Subtarget->hasVFP2Base())
     return "r";
   if (ConstraintVT.isFloatingPoint())
     return "w";
@@ -13822,6 +14385,7 @@ ARMTargetLowering::getConstraintType(StringRef Constraint) const {
   } else if (Constraint.size() == 2) {
     switch (Constraint[0]) {
     default: break;
+    case 'T': return C_RegisterClass;
     // All 'U+' constraints are addresses.
     case 'U': return C_Memory;
     }
@@ -13867,7 +14431,8 @@ using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
 
 RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
-  if (Constraint.size() == 1) {
+  switch (Constraint.size()) {
+  case 1:
     // GCC ARM Constraint Letters
     switch (Constraint[0]) {
     case 'l': // Low regs or general regs.
@@ -13913,7 +14478,25 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
         return RCPair(0U, &ARM::QPR_VFP2RegClass);
       break;
     }
+    break;
+
+  case 2:
+    if (Constraint[0] == 'T') {
+      switch (Constraint[1]) {
+      default:
+        break;
+      case 'e':
+        return RCPair(0U, &ARM::tGPREvenRegClass);
+      case 'o':
+        return RCPair(0U, &ARM::tGPROddRegClass);
+      }
+    }
+    break;
+
+  default:
+    break;
   }
+
   if (StringRef("{cc}").equals_lower(Constraint))
     return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
 
@@ -14272,28 +14855,107 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
 }
 
 SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
-  assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() &&
+  SDValue SrcVal = Op.getOperand(0);
+  const unsigned DstSz = Op.getValueType().getSizeInBits();
+  const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
+  assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
          "Unexpected type for custom-lowering FP_EXTEND");
 
+  assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
+         "With both FP DP and 16, any FP conversion is legal!");
+
+  assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
+         "With FP16, 16 to 32 conversion is legal!");
+
+  // Either we are converting from 16 -> 64, without FP16 and/or
+  // FP.double-precision or without Armv8-fp. So we must do it in two
+  // steps.
+  // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
+  // without FP16. So we must do a function call.
+  SDLoc Loc(Op);
   RTLIB::Libcall LC;
-  LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
+  if (SrcSz == 16) {
+    // Instruction from 16 -> 32
+    if (Subtarget->hasFP16())
+      SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, MVT::f32, SrcVal);
+    // Lib call from 16 -> 32
+    else {
+      LC = RTLIB::getFPEXT(MVT::f16, MVT::f32);
+      assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+             "Unexpected type for custom-lowering FP_EXTEND");
+      SrcVal =
+        makeLibCall(DAG, LC, MVT::f32, SrcVal, /*isSigned*/ false, Loc).first;
+    }
+  }
 
-  SDValue SrcVal = Op.getOperand(0);
-  return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
-                     SDLoc(Op)).first;
+  if (DstSz != 64)
+    return SrcVal;
+  // For sure now SrcVal is 32 bits
+  if (Subtarget->hasFP64()) // Instruction from 32 -> 64
+    return DAG.getNode(ISD::FP_EXTEND, Loc, MVT::f64, SrcVal);
+
+  LC = RTLIB::getFPEXT(MVT::f32, MVT::f64);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+         "Unexpected type for custom-lowering FP_EXTEND");
+  return makeLibCall(DAG, LC, MVT::f64, SrcVal, /*isSigned*/ false, Loc).first;
 }
 
 SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
-  assert(Op.getOperand(0).getValueType() == MVT::f64 &&
-         Subtarget->isFPOnlySP() &&
+  SDValue SrcVal = Op.getOperand(0);
+  EVT SrcVT = SrcVal.getValueType();
+  EVT DstVT = Op.getValueType();
+  const unsigned DstSz = Op.getValueType().getSizeInBits();
+  const unsigned SrcSz = SrcVT.getSizeInBits();
+  (void)DstSz;
+  assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&
          "Unexpected type for custom-lowering FP_ROUND");
 
-  RTLIB::Libcall LC;
-  LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
+  assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
+         "With both FP DP and 16, any FP conversion is legal!");
 
-  SDValue SrcVal = Op.getOperand(0);
-  return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
-                     SDLoc(Op)).first;
+  SDLoc Loc(Op);
+
+  // Instruction from 32 -> 16 if hasFP16 is valid
+  if (SrcSz == 32 && Subtarget->hasFP16())
+    return Op;
+
+  // Lib call from 32 -> 16 / 64 -> [32, 16]
+  RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+         "Unexpected type for custom-lowering FP_ROUND");
+  return makeLibCall(DAG, LC, DstVT, SrcVal, /*isSigned*/ false, Loc).first;
+}
+
+void ARMTargetLowering::lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
+                                 SelectionDAG &DAG) const {
+  assert(N->getValueType(0) == MVT::i64 && "Unexpected type (!= i64) on ABS.");
+  MVT HalfT = MVT::i32;
+  SDLoc dl(N);
+  SDValue Hi, Lo, Tmp;
+
+  if (!isOperationLegalOrCustom(ISD::ADDCARRY, HalfT) ||
+      !isOperationLegalOrCustom(ISD::UADDO, HalfT))
+    return ;
+
+  unsigned OpTypeBits = HalfT.getScalarSizeInBits();
+  SDVTList VTList = DAG.getVTList(HalfT, MVT::i1);
+
+  Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
+                   DAG.getConstant(0, dl, HalfT));
+  Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
+                   DAG.getConstant(1, dl, HalfT));
+
+  Tmp = DAG.getNode(ISD::SRA, dl, HalfT, Hi,
+                    DAG.getConstant(OpTypeBits - 1, dl,
+                    getShiftAmountTy(HalfT, DAG.getDataLayout())));
+  Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo);
+  Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi,
+                   SDValue(Lo.getNode(), 1));
+  Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi);
+  Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo);
+
+  Results.push_back(Lo);
+  Results.push_back(Hi);
 }
 
 bool
@@ -14314,14 +14976,15 @@ bool ARM::isBitFieldInvertedMask(unsigned v) {
 /// isFPImmLegal - Returns true if the target can instruction select the
 /// specified FP immediate natively. If false, the legalizer will
 /// materialize the FP immediate as a load from a constant pool.
-bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
-  if (!Subtarget->hasVFP3())
+bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+                                     bool ForCodeSize) const {
+  if (!Subtarget->hasVFP3Base())
     return false;
   if (VT == MVT::f16 && Subtarget->hasFullFP16())
     return ARM_AM::getFP16Imm(Imm) != -1;
   if (VT == MVT::f32)
     return ARM_AM::getFP32Imm(Imm) != -1;
-  if (VT == MVT::f64 && !Subtarget->isFPOnlySP())
+  if (VT == MVT::f64 && Subtarget->hasFP64())
     return ARM_AM::getFP64Imm(Imm) != -1;
   return false;
 }
@@ -14590,6 +15253,9 @@ ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
 // and up to 64 bits on the non-M profiles
 TargetLowering::AtomicExpansionKind
 ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+  if (AI->isFloatingPointOperation())
+    return AtomicExpansionKind::CmpXChg;
+
   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
   bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
   return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW)
@@ -14621,6 +15287,36 @@ bool ARMTargetLowering::useLoadStackGuardNode() const {
   return Subtarget->isTargetMachO();
 }
 
+void ARMTargetLowering::insertSSPDeclarations(Module &M) const {
+  if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
+    return TargetLowering::insertSSPDeclarations(M);
+
+  // MSVC CRT has a global variable holding security cookie.
+  M.getOrInsertGlobal("__security_cookie",
+                      Type::getInt8PtrTy(M.getContext()));
+
+  // MSVC CRT has a function to validate security cookie.
+  FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
+      "__security_check_cookie", Type::getVoidTy(M.getContext()),
+      Type::getInt8PtrTy(M.getContext()));
+  if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
+    F->addAttribute(1, Attribute::AttrKind::InReg);
+}
+
+Value *ARMTargetLowering::getSDagStackGuard(const Module &M) const {
+  // MSVC CRT has a global variable holding security cookie.
+  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
+    return M.getGlobalVariable("__security_cookie");
+  return TargetLowering::getSDagStackGuard(M);
+}
+
+Function *ARMTargetLowering::getSSPStackGuardCheck(const Module &M) const {
+  // MSVC CRT has a function to validate security cookie.
+  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
+    return M.getFunction("__security_check_cookie");
+  return TargetLowering::getSSPStackGuardCheck(M);
+}
+
 bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
                                                   unsigned &Cost) const {
   // If we do not have NEON, vector types are not natively supported.
@@ -14658,6 +15354,10 @@ bool ARMTargetLowering::isCheapToSpeculateCtlz() const {
   return Subtarget->hasV6T2Ops();
 }
 
+bool ARMTargetLowering::shouldExpandShift(SelectionDAG &DAG, SDNode *N) const {
+  return !Subtarget->hasMinSize();
+}
+
 Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
                                          AtomicOrdering Ord) const {
   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
@@ -14850,8 +15550,9 @@ bool ARMTargetLowering::lowerInterleavedLoad(
     // If we're generating more than one load, compute the base address of
     // subsequent loads as an offset from the previous.
     if (LoadCount > 0)
-      BaseAddr = Builder.CreateConstGEP1_32(
-          BaseAddr, VecTy->getVectorNumElements() * Factor);
+      BaseAddr =
+          Builder.CreateConstGEP1_32(VecTy->getVectorElementType(), BaseAddr,
+                                     VecTy->getVectorNumElements() * Factor);
 
     SmallVector<Value *, 2> Ops;
     Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
@@ -14990,7 +15691,8 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
     // If we generating more than one store, we compute the base address of
     // subsequent stores as an offset from the previous.
     if (StoreCount > 0)
-      BaseAddr = Builder.CreateConstGEP1_32(BaseAddr, LaneLen * Factor);
+      BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getVectorElementType(),
+                                            BaseAddr, LaneLen * Factor);
 
     SmallVector<Value *, 6> Ops;
     Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 7a9fc739fc13..1675ec59a354 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -1,9 +1,8 @@
 //===- ARMISelLowering.h - ARM DAG Lowering Interface -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -77,6 +76,10 @@ class VectorType;
 
       PIC_ADD,      // Add with a PC operand and a PIC label.
 
+      ASRL,         // MVE long arithmetic shift right.
+      LSRL,         // MVE long shift right.
+      LSLL,         // MVE long shift left.
+
       CMP,          // ARM compare instructions.
       CMN,          // ARM CMN instructions.
       CMPZ,         // ARM compare that sets only Z flag.
@@ -122,6 +125,8 @@ class VectorType;
       WIN__CHKSTK,  // Windows' __chkstk call to do stack probing.
       WIN__DBZCHK,  // Windows' divide by zero check
 
+      WLS,          // Low-overhead loops, While Loop Start
+
       VCEQ,         // Vector compare equal.
       VCEQZ,        // Vector compare equal to zero.
       VCGE,         // Vector compare greater than or equal.
@@ -134,32 +139,36 @@ class VectorType;
       VCGTU,        // Vector compare unsigned greater than.
       VTST,         // Vector test bits.
 
+      // Vector shift by vector
+      VSHLs,        // ...left/right by signed
+      VSHLu,        // ...left/right by unsigned
+
       // Vector shift by immediate:
-      VSHL,         // ...left
-      VSHRs,        // ...right (signed)
-      VSHRu,        // ...right (unsigned)
+      VSHLIMM,      // ...left
+      VSHRsIMM,     // ...right (signed)
+      VSHRuIMM,     // ...right (unsigned)
 
       // Vector rounding shift by immediate:
-      VRSHRs,       // ...right (signed)
-      VRSHRu,       // ...right (unsigned)
-      VRSHRN,       // ...right narrow
+      VRSHRsIMM,    // ...right (signed)
+      VRSHRuIMM,    // ...right (unsigned)
+      VRSHRNIMM,    // ...right narrow
 
       // Vector saturating shift by immediate:
-      VQSHLs,       // ...left (signed)
-      VQSHLu,       // ...left (unsigned)
-      VQSHLsu,      // ...left (signed to unsigned)
-      VQSHRNs,      // ...right narrow (signed)
-      VQSHRNu,      // ...right narrow (unsigned)
-      VQSHRNsu,     // ...right narrow (signed to unsigned)
+      VQSHLsIMM,    // ...left (signed)
+      VQSHLuIMM,    // ...left (unsigned)
+      VQSHLsuIMM,   // ...left (signed to unsigned)
+      VQSHRNsIMM,   // ...right narrow (signed)
+      VQSHRNuIMM,   // ...right narrow (unsigned)
+      VQSHRNsuIMM,  // ...right narrow (signed to unsigned)
 
       // Vector saturating rounding shift by immediate:
-      VQRSHRNs,     // ...right narrow (signed)
-      VQRSHRNu,     // ...right narrow (unsigned)
-      VQRSHRNsu,    // ...right narrow (signed to unsigned)
+      VQRSHRNsIMM,  // ...right narrow (signed)
+      VQRSHRNuIMM,  // ...right narrow (unsigned)
+      VQRSHRNsuIMM, // ...right narrow (signed to unsigned)
 
       // Vector shift and insert:
-      VSLI,         // ...left
-      VSRI,         // ...right
+      VSLIIMM,      // ...left
+      VSRIIMM,      // ...right
 
       // Vector get lane (VMOV scalar to ARM core register)
       // (These are used for 8- and 16-bit element types only.)
@@ -322,17 +331,21 @@ class VectorType;
     /// is "fast" by reference in the second argument.
     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
                                         unsigned Align,
+                                        MachineMemOperand::Flags Flags,
                                         bool *Fast) const override;
 
     EVT getOptimalMemOpType(uint64_t Size,
                             unsigned DstAlign, unsigned SrcAlign,
                             bool IsMemset, bool ZeroMemset,
                             bool MemcpyStrSrc,
-                            MachineFunction &MF) const override;
+                            const AttributeList &FuncAttributes) const override;
 
     bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
     bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
     bool isZExtFree(SDValue Val, EVT VT2) const override;
+    bool shouldSinkOperands(Instruction *I,
+                            SmallVectorImpl<Use *> &Ops) const override;
+
     bool isFNegFree(EVT VT) const override;
 
     bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
@@ -454,7 +467,8 @@ class VectorType;
 
     /// getRegClassFor - Return the register class that should be used for the
     /// specified value type.
-    const TargetRegisterClass *getRegClassFor(MVT VT) const override;
+    const TargetRegisterClass *
+    getRegClassFor(MVT VT, bool isDivergent = false) const override;
 
     /// Returns true if a cast between SrcAS and DestAS is a noop.
     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
@@ -479,7 +493,8 @@ class VectorType;
     /// isFPImmLegal - Returns true if the target can instruction select the
     /// specified FP immediate natively. If false, the legalizer will
     /// materialize the FP immediate as a load from a constant pool.
-    bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+    bool isFPImmLegal(const APFloat &Imm, EVT VT,
+                      bool ForCodeSize = false) const override;
 
     bool getTgtMemIntrinsic(IntrinsicInfo &Info,
                             const CallInst &I,
@@ -544,6 +559,10 @@ class VectorType;
 
     bool useLoadStackGuardNode() const override;
 
+    void insertSSPDeclarations(Module &M) const override;
+    Value *getSDagStackGuard(const Module &M) const override;
+    Function *getSSPStackGuardCheck(const Module &M) const override;
+
     bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
                                    unsigned &Cost) const override;
 
@@ -568,6 +587,8 @@ class VectorType;
       return HasStandaloneRem;
     }
 
+    bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
+
     CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const;
     CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const;
 
@@ -593,8 +614,11 @@ class VectorType;
     bool isDesirableToCommuteWithShift(const SDNode *N,
                                        CombineLevel Level) const override;
 
-    bool shouldFoldShiftPairToMask(const SDNode *N,
-                                   CombineLevel Level) const override;
+    bool shouldFoldConstantShiftPairToMask(const SDNode *N,
+                                           CombineLevel Level) const override;
+
+    bool preferIncOfAddToSubOfNot(EVT VT) const override;
+
   protected:
     std::pair<const TargetRegisterClass *, uint8_t>
     findRepresentativeClass(const TargetRegisterInfo *TRI,
@@ -680,6 +704,7 @@ class VectorType;
                             const ARMSubtarget *ST) const;
     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
                               const ARMSubtarget *ST) const;
+    SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const;
@@ -693,6 +718,8 @@ class VectorType;
     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+    void lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
+                  SelectionDAG &DAG) const;
 
     unsigned getRegisterByName(const char* RegName, EVT VT,
                                SelectionDAG &DAG) const override;
@@ -755,15 +782,13 @@ class VectorType;
     /// IsEligibleForTailCallOptimization - Check whether the call is eligible
     /// for tail call optimization. Targets which want to do tail call
     /// optimization should implement this function.
-    bool IsEligibleForTailCallOptimization(SDValue Callee,
-                                           CallingConv::ID CalleeCC,
-                                           bool isVarArg,
-                                           bool isCalleeStructRet,
-                                           bool isCallerStructRet,
-                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                    const SmallVectorImpl<SDValue> &OutVals,
-                                    const SmallVectorImpl<ISD::InputArg> &Ins,
-                                           SelectionDAG& DAG) const;
+    bool IsEligibleForTailCallOptimization(
+        SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
+        bool isCalleeStructRet, bool isCallerStructRet,
+        const SmallVectorImpl<ISD::OutputArg> &Outs,
+        const SmallVectorImpl<SDValue> &OutVals,
+        const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG,
+        const bool isIndirect) const;
 
     bool CanLowerReturn(CallingConv::ID CallConv,
                         MachineFunction &MF, bool isVarArg,
@@ -781,6 +806,8 @@ class VectorType;
 
     bool shouldConsiderGEPOffsetSplit() const override { return true; }
 
+    bool isUnsupportedFloatingType(EVT VT) const;
+
     SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
                     SDValue ARMcc, SDValue CCR, SDValue Cmp,
                     SelectionDAG &DAG) const;
@@ -806,11 +833,15 @@ class VectorType;
                                            MachineBasicBlock *MBB) const;
     MachineBasicBlock *EmitLowered__dbzchk(MachineInstr &MI,
                                            MachineBasicBlock *MBB) const;
+    void addMVEVectorTypes(bool HasMVEFP);
+    void addAllExtLoads(const MVT From, const MVT To, LegalizeAction Action);
+    void setAllExpand(MVT VT);
   };
 
   enum NEONModImmType {
     VMOVModImm,
     VMVNModImm,
+    MVEVMVNModImm,
     OtherModImm
   };
 
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 0df48ba61299..bc93a058720c 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -1,9 +1,8 @@
 //===-- ARMInstrFormats.td - ARM Instruction Formats -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -110,6 +109,9 @@ def AddrModeT2_i8s4 : AddrMode<15>;
 def AddrMode_i12    : AddrMode<16>;
 def AddrMode5FP16   : AddrMode<17>;
 def AddrModeT2_ldrex : AddrMode<18>;
+def AddrModeT2_i7s4 : AddrMode<19>;
+def AddrModeT2_i7s2 : AddrMode<20>;
+def AddrModeT2_i7   : AddrMode<21>;
 
 // Load / store index mode.
 class IndexMode<bits<2> val> {
@@ -121,14 +123,15 @@ def IndexModePost : IndexMode<2>;
 def IndexModeUpd  : IndexMode<3>;
 
 // Instruction execution domain.
-class Domain<bits<3> val> {
-  bits<3> Value = val;
+class Domain<bits<4> val> {
+  bits<4> Value = val;
 }
 def GenericDomain : Domain<0>;
 def VFPDomain     : Domain<1>; // Instructions in VFP domain only
 def NeonDomain    : Domain<2>; // Instructions in Neon domain only
 def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
 def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8
+def MVEDomain : Domain<8>; // Instructions in MVE and ARMv8.1m
 
 //===----------------------------------------------------------------------===//
 // ARM special operands.
@@ -185,6 +188,86 @@ def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
   let DecoderMethod = "DecodeCCOutOperand";
 }
 
+// VPT predicate
+
+def VPTPredNOperand : AsmOperandClass {
+  let Name = "VPTPredN";
+  let PredicateMethod = "isVPTPred";
+}
+def VPTPredROperand : AsmOperandClass {
+  let Name = "VPTPredR";
+  let PredicateMethod = "isVPTPred";
+}
+def undef_tied_input;
+
+// Operand classes for the cluster of MC operands describing a
+// VPT-predicated MVE instruction.
+//
+// There are two of these classes. Both of them have the same first
+// two options:
+//
+// $cond (an integer) indicates the instruction's predication status:
+//   * ARMVCC::None means it's unpredicated
+//   * ARMVCC::Then means it's in a VPT block and appears with the T suffix
+//   * ARMVCC::Else means it's in a VPT block and appears with the E suffix.
+// During code generation, unpredicated and predicated instructions
+// are indicated by setting this parameter to 'None' or to 'Then'; the
+// third value 'Else' is only used for assembly and disassembly.
+//
+// $cond_reg (type VCCR) gives the input predicate register. This is
+// always either zero_reg or VPR, but needs to be modelled as an
+// explicit operand so that it can be register-allocated and spilled
+// when these operands are used in code generation).
+//
+// For 'vpred_r', there's an extra operand $inactive, which specifies
+// the vector register which will supply any lanes of the output
+// register that the predication mask prevents from being written by
+// this instruction. It's always tied to the actual output register
+// (i.e. must be allocated into the same physical reg), but again,
+// code generation will need to model it as a separate input value.
+//
+// 'vpred_n' doesn't have that extra operand: it only has $cond and
+// $cond_reg. This variant is used for any instruction that can't, or
+// doesn't want to, tie $inactive to the output register. Sometimes
+// that's because another input parameter is already tied to it (e.g.
+// instructions that both read and write their Qd register even when
+// unpredicated, either because they only partially overwrite it like
+// a narrowing integer conversion, or simply because the instruction
+// encoding doesn't have enough register fields to make the output
+// independent of all inputs). It can also be because the instruction
+// is defined to set disabled output lanes to zero rather than leaving
+// them unchanged (vector loads), or because it doesn't output a
+// vector register at all (stores, compares). In any of these
+// situations it's unnecessary to have an extra operand tied to the
+// output, and inconvenient to leave it there unused.
+
+// Base class for both kinds of vpred.
+class vpred_ops<dag extra_op, dag extra_mi> : OperandWithDefaultOps<OtherVT,
+            !con((ops (i32 0), (i32 zero_reg)), extra_op)> {
+  let PrintMethod = "printVPTPredicateOperand";
+  let OperandNamespace = "ARM";
+  let MIOperandInfo = !con((ops i32imm:$cond, VCCR:$cond_reg), extra_mi);
+
+  // For convenience, we provide a string value that can be appended
+  // to the constraints string. It's empty for vpred_n, and for
+  // vpred_r it ties the $inactive operand to the output q-register
+  // (which by convention will be called $Qd).
+  string vpred_constraint;
+}
+
+def vpred_r : vpred_ops<(ops (v4i32 undef_tied_input)), (ops MQPR:$inactive)> {
+  let ParserMatchClass = VPTPredROperand;
+  let OperandType = "OPERAND_VPRED_R";
+  let DecoderMethod = "DecodeVpredROperand";
+  let vpred_constraint = ",$Qd = $vp.inactive";
+}
+
+def vpred_n : vpred_ops<(ops), (ops)> {
+  let ParserMatchClass = VPTPredNOperand;
+  let OperandType = "OPERAND_VPRED_N";
+  let vpred_constraint = "";
+}
+
 // ARM special operands for disassembly only.
 //
 def SetEndAsmOperand : ImmAsmOperand<0,1> {
@@ -285,6 +368,8 @@ class VFP3InstAlias<string Asm, dag Result, bit EmitPriority = 0>
       : InstAlias<Asm, Result, EmitPriority>, Requires<[HasVFP3]>;
 class NEONInstAlias<string Asm, dag Result, bit EmitPriority = 0>
       : InstAlias<Asm, Result, EmitPriority>, Requires<[HasNEON]>;
+class MVEInstAlias<string Asm, dag Result, bit EmitPriority = 1>
+      : InstAlias<Asm, Result, EmitPriority>, Requires<[HasMVEInt, IsThumb]>;
 
 
 class VFP2MnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
@@ -325,8 +410,8 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
   let TSFlags{12-7} = Form;
   let TSFlags{13}    = isUnaryDataProc;
   let TSFlags{14}    = canXformTo16Bit;
-  let TSFlags{17-15} = D.Value;
-  let TSFlags{18}    = thumbArithFlagSetting;
+  let TSFlags{18-15} = D.Value;
+  let TSFlags{19}    = thumbArithFlagSetting;
 
   let Constraints = cstr;
   let Itinerary = itin;
@@ -382,6 +467,8 @@ class VFP2AsmPseudo<string asm, dag iops, dag oops = (outs)>
   : AsmPseudoInst<asm, iops, oops>, Requires<[HasVFP2]>;
 class NEONAsmPseudo<string asm, dag iops, dag oops = (outs)>
   : AsmPseudoInst<asm, iops, oops>, Requires<[HasNEON]>;
+class MVEAsmPseudo<string asm, dag iops, dag oops = (outs)>
+  : AsmPseudoInst<asm, iops, oops>, Requires<[HasMVEInt]>;
 
 // Pseudo instructions for the code generator.
 class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern>
@@ -1556,6 +1643,8 @@ class AHI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
 
   // Loads & stores operate on both NEON and VFP pipelines.
   let D = VFPNeonDomain;
+
+  let isUnpredicable = 1; // FP16 instructions cannot in general be conditional
 }
 
 // VFP Load / store multiple pseudo instructions.
@@ -1903,6 +1992,8 @@ class AHuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
   let Inst{11-8}  = 0b1001;   // Half precision
   let Inst{7-6}   = opcod4;
   let Inst{4}     = opcod5;
+
+  let isUnpredicable = 1; // FP16 instructions cannot in general be conditional
 }
 
 // Half precision, unary, non-predicated
@@ -1931,6 +2022,8 @@ class AHuInp<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
   let Inst{11-8}  = 0b1001;   // Half precision
   let Inst{7-6}   = opcod4;
   let Inst{4}     = opcod5;
+
+  let isUnpredicable = 1; // FP16 instructions cannot in general be conditional
 }
 
 // Half precision, binary
@@ -1957,6 +2050,8 @@ class AHbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
   let Inst{11-8}  = 0b1001;   // Half precision
   let Inst{6}     = op6;
   let Inst{4}     = op4;
+
+  let isUnpredicable = 1; // FP16 instructions cannot in general be conditional
 }
 
 // Half precision, binary, not predicated
@@ -1986,6 +2081,8 @@ class AHbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops,
   let Inst{11-8}  = 0b1001;   // Half precision
   let Inst{6}     = opcod3;
   let Inst{4}     = 0;
+
+  let isUnpredicable = 1; // FP16 instructions cannot in general be conditional
 }
 
 // VFP conversion instructions
@@ -2494,7 +2591,7 @@ class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
 // VFP/NEON Instruction aliases for type suffices.
 // Note: When EmitPriority == 1, the alias will be used for printing
 class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result, bit EmitPriority = 0> :
-  InstAlias<!strconcat(opc, dt, "\t", asm), Result, EmitPriority>, Requires<[HasVFP2]>;
+  InstAlias<!strconcat(opc, dt, "\t", asm), Result, EmitPriority>, Requires<[HasFPRegs]>;
 
 // Note: When EmitPriority == 1, the alias will be used for printing
 multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result, bit EmitPriority = 0> {
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index bcc31f5fa4cc..388c889349b7 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -1,9 +1,8 @@
 //===-- ARMInstrInfo.cpp - ARM Instruction Information --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -95,7 +94,7 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
   const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
   const TargetMachine &TM = MF.getTarget();
 
-  if (!Subtarget.useMovt(MF)) {
+  if (!Subtarget.useMovt()) {
     if (TM.isPositionIndependent())
       expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12);
     else
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index c87fb97448c9..042b53f0f8c3 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -1,9 +1,8 @@
 //===-- ARMInstrInfo.h - ARM Instruction Information ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 13abdc9687ec..e35145463852 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -1,9 +1,8 @@
 //===- ARMInstrInfo.td - Target Description for ARM Target -*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -100,6 +99,18 @@ def SDT_LongMac  : SDTypeProfile<2, 4, [SDTCisVT<0, i32>,
                                         SDTCisSameAs<0, 4>,
                                         SDTCisSameAs<0, 5>]>;
 
+// ARMlsll, ARMlsrl, ARMasrl
+def SDT_ARMIntShiftParts : SDTypeProfile<2, 3, [SDTCisSameAs<0, 1>,
+                                              SDTCisSameAs<0, 2>,
+                                              SDTCisSameAs<0, 3>,
+                                              SDTCisInt<0>,
+                                              SDTCisInt<4>]>;
+
+// TODO Add another operand for 'Size' so that we can re-use this node when we
+// start supporting *TP versions.
+def SDT_ARMWhileLoop : SDTypeProfile<0, 2, [SDTCisVT<0, i32>,
+                                            SDTCisVT<1, OtherVT>]>;
+
 def ARMSmlald        : SDNode<"ARMISD::SMLALD", SDT_LongMac>;
 def ARMSmlaldx       : SDNode<"ARMISD::SMLALDX", SDT_LongMac>;
 def ARMSmlsld        : SDNode<"ARMISD::SMLSLD", SDT_LongMac>;
@@ -172,6 +183,10 @@ def ARMcmpZ          : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
 
 def ARMpic_add       : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
 
+def ARMasrl          : SDNode<"ARMISD::ASRL", SDT_ARMIntShiftParts, []>;
+def ARMlsrl          : SDNode<"ARMISD::LSRL", SDT_ARMIntShiftParts, []>;
+def ARMlsll          : SDNode<"ARMISD::LSLL", SDT_ARMIntShiftParts, []>;
+
 def ARMsrl_flag      : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
 def ARMsra_flag      : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
 def ARMrrx           : SDNode<"ARMISD::RRX"     , SDTIntUnaryOp, [SDNPInGlue ]>;
@@ -214,189 +229,44 @@ def ARMsmlalbt      : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>;
 def ARMsmlaltb      : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>;
 def ARMsmlaltt      : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>;
 
-//===----------------------------------------------------------------------===//
-// ARM Instruction Predicate Definitions.
-//
-def HasV4T           : Predicate<"Subtarget->hasV4TOps()">,
-                                 AssemblerPredicate<"HasV4TOps", "armv4t">;
-def NoV4T            : Predicate<"!Subtarget->hasV4TOps()">;
-def HasV5T           : Predicate<"Subtarget->hasV5TOps()">,
-                                 AssemblerPredicate<"HasV5TOps", "armv5t">;
-def NoV5T            : Predicate<"!Subtarget->hasV5TOps()">;
-def HasV5TE          : Predicate<"Subtarget->hasV5TEOps()">,
-                                 AssemblerPredicate<"HasV5TEOps", "armv5te">;
-def HasV6            : Predicate<"Subtarget->hasV6Ops()">,
-                                 AssemblerPredicate<"HasV6Ops", "armv6">;
-def NoV6             : Predicate<"!Subtarget->hasV6Ops()">;
-def HasV6M           : Predicate<"Subtarget->hasV6MOps()">,
-                                 AssemblerPredicate<"HasV6MOps",
-                                                    "armv6m or armv6t2">;
-def HasV8MBaseline   : Predicate<"Subtarget->hasV8MBaselineOps()">,
-                                 AssemblerPredicate<"HasV8MBaselineOps",
-                                                    "armv8m.base">;
-def HasV8MMainline   : Predicate<"Subtarget->hasV8MMainlineOps()">,
-                                 AssemblerPredicate<"HasV8MMainlineOps",
-                                                    "armv8m.main">;
-def HasV6T2          : Predicate<"Subtarget->hasV6T2Ops()">,
-                                 AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
-def NoV6T2           : Predicate<"!Subtarget->hasV6T2Ops()">;
-def HasV6K           : Predicate<"Subtarget->hasV6KOps()">,
-                                 AssemblerPredicate<"HasV6KOps", "armv6k">;
-def NoV6K            : Predicate<"!Subtarget->hasV6KOps()">;
-def HasV7            : Predicate<"Subtarget->hasV7Ops()">,
-                                 AssemblerPredicate<"HasV7Ops", "armv7">;
-def HasV8            : Predicate<"Subtarget->hasV8Ops()">,
-                                 AssemblerPredicate<"HasV8Ops", "armv8">;
-def PreV8            : Predicate<"!Subtarget->hasV8Ops()">,
-                                 AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">;
-def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
-                                 AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
-def HasV8_2a         : Predicate<"Subtarget->hasV8_2aOps()">,
-                                 AssemblerPredicate<"HasV8_2aOps", "armv8.2a">;
-def HasV8_3a         : Predicate<"Subtarget->hasV8_3aOps()">,
-                                 AssemblerPredicate<"HasV8_3aOps", "armv8.3a">;
-def HasV8_4a         : Predicate<"Subtarget->hasV8_4aOps()">,
-                                 AssemblerPredicate<"HasV8_4aOps", "armv8.4a">;
-def HasV8_5a         : Predicate<"Subtarget->hasV8_5aOps()">,
-                                 AssemblerPredicate<"HasV8_5aOps", "armv8.5a">;
-def NoVFP            : Predicate<"!Subtarget->hasVFP2()">;
-def HasVFP2          : Predicate<"Subtarget->hasVFP2()">,
-                                 AssemblerPredicate<"FeatureVFP2", "VFP2">;
-def HasVFP3          : Predicate<"Subtarget->hasVFP3()">,
-                                 AssemblerPredicate<"FeatureVFP3", "VFP3">;
-def HasVFP4          : Predicate<"Subtarget->hasVFP4()">,
-                                 AssemblerPredicate<"FeatureVFP4", "VFP4">;
-def HasDPVFP         : Predicate<"!Subtarget->isFPOnlySP()">,
-                                 AssemblerPredicate<"!FeatureVFPOnlySP",
-                                                    "double precision VFP">;
-def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
-                                 AssemblerPredicate<"FeatureFPARMv8", "FPARMv8">;
-def HasNEON          : Predicate<"Subtarget->hasNEON()">,
-                                 AssemblerPredicate<"FeatureNEON", "NEON">;
-def HasSHA2          : Predicate<"Subtarget->hasSHA2()">,
-                                 AssemblerPredicate<"FeatureSHA2", "sha2">;
-def HasAES           : Predicate<"Subtarget->hasAES()">,
-                                 AssemblerPredicate<"FeatureAES", "aes">;
-def HasCrypto        : Predicate<"Subtarget->hasCrypto()">,
-                                 AssemblerPredicate<"FeatureCrypto", "crypto">;
-def HasDotProd       : Predicate<"Subtarget->hasDotProd()">,
-                                 AssemblerPredicate<"FeatureDotProd", "dotprod">;
-def HasCRC           : Predicate<"Subtarget->hasCRC()">,
-                                 AssemblerPredicate<"FeatureCRC", "crc">;
-def HasRAS           : Predicate<"Subtarget->hasRAS()">,
-                                 AssemblerPredicate<"FeatureRAS", "ras">;
-def HasFP16          : Predicate<"Subtarget->hasFP16()">,
-                                 AssemblerPredicate<"FeatureFP16","half-float conversions">;
-def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
-                                 AssemblerPredicate<"FeatureFullFP16","full half-float">;
-def HasFP16FML       : Predicate<"Subtarget->hasFP16FML()">,
-                                 AssemblerPredicate<"FeatureFP16FML","full half-float fml">;
-def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">,
-                                 AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">;
-def HasDivideInARM   : Predicate<"Subtarget->hasDivideInARMMode()">,
-                                 AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">;
-def HasDSP           : Predicate<"Subtarget->hasDSP()">,
-                                 AssemblerPredicate<"FeatureDSP", "dsp">;
-def HasDB            : Predicate<"Subtarget->hasDataBarrier()">,
-                                 AssemblerPredicate<"FeatureDB",
-                                                    "data-barriers">;
-def HasDFB           : Predicate<"Subtarget->hasFullDataBarrier()">,
-                                 AssemblerPredicate<"FeatureDFB",
-                                                    "full-data-barrier">;
-def HasV7Clrex  : Predicate<"Subtarget->hasV7Clrex()">,
-                            AssemblerPredicate<"FeatureV7Clrex",
-                                               "v7 clrex">;
-def HasAcquireRelease : Predicate<"Subtarget->hasAcquireRelease()">,
-                                  AssemblerPredicate<"FeatureAcquireRelease",
-                                                     "acquire/release">;
-def HasMP            : Predicate<"Subtarget->hasMPExtension()">,
-                                 AssemblerPredicate<"FeatureMP",
-                                                    "mp-extensions">;
-def HasVirtualization: Predicate<"false">,
-                                 AssemblerPredicate<"FeatureVirtualization",
-                                                   "virtualization-extensions">;
-def HasTrustZone     : Predicate<"Subtarget->hasTrustZone()">,
-                                 AssemblerPredicate<"FeatureTrustZone",
-                                                    "TrustZone">;
-def Has8MSecExt      : Predicate<"Subtarget->has8MSecExt()">,
-                                 AssemblerPredicate<"Feature8MSecExt",
-                                                    "ARMv8-M Security Extensions">;
-def HasZCZ           : Predicate<"Subtarget->hasZeroCycleZeroing()">;
-def UseNEONForFP     : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
-def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
-def IsThumb          : Predicate<"Subtarget->isThumb()">,
-                                 AssemblerPredicate<"ModeThumb", "thumb">;
-def IsThumb1Only     : Predicate<"Subtarget->isThumb1Only()">;
-def IsThumb2         : Predicate<"Subtarget->isThumb2()">,
-                                 AssemblerPredicate<"ModeThumb,FeatureThumb2",
-                                                    "thumb2">;
-def IsMClass         : Predicate<"Subtarget->isMClass()">,
-                                 AssemblerPredicate<"FeatureMClass", "armv*m">;
-def IsNotMClass      : Predicate<"!Subtarget->isMClass()">,
-                                 AssemblerPredicate<"!FeatureMClass",
-                                                    "!armv*m">;
-def IsARM            : Predicate<"!Subtarget->isThumb()">,
-                                 AssemblerPredicate<"!ModeThumb", "arm-mode">;
-def IsMachO          : Predicate<"Subtarget->isTargetMachO()">;
-def IsNotMachO       : Predicate<"!Subtarget->isTargetMachO()">;
-def IsNaCl           : Predicate<"Subtarget->isTargetNaCl()">;
-def IsWindows        : Predicate<"Subtarget->isTargetWindows()">;
-def IsNotWindows     : Predicate<"!Subtarget->isTargetWindows()">;
-def IsReadTPHard     : Predicate<"Subtarget->isReadTPHard()">;
-def IsReadTPSoft     : Predicate<"!Subtarget->isReadTPHard()">;
-def UseNaClTrap      : Predicate<"Subtarget->useNaClTrap()">,
-                                 AssemblerPredicate<"FeatureNaClTrap", "NaCl">;
-def DontUseNaClTrap  : Predicate<"!Subtarget->useNaClTrap()">;
-
-def UseNegativeImmediates :
-  Predicate<"false">,
-            AssemblerPredicate<"!FeatureNoNegativeImmediates",
-                               "NegativeImmediates">;
-
-// FIXME: Eventually this will be just "hasV6T2Ops".
-let RecomputePerFunction = 1 in {
-  def UseMovt          : Predicate<"Subtarget->useMovt(*MF)">;
-  def DontUseMovt      : Predicate<"!Subtarget->useMovt(*MF)">;
-  def UseMovtInPic     : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">;
-  def DontUseMovtInPic : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">;
-
-  def UseFPVMLx: Predicate<"((Subtarget->useFPVMLx() &&"
-                           "  TM.Options.AllowFPOpFusion != FPOpFusion::Fast) ||"
-                           "MF->getFunction().optForMinSize())">;
-}
-def UseMulOps        : Predicate<"Subtarget->useMulOps()">;
-
-// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
-// But only select them if more precision in FP computation is allowed, and when
-// they are not slower than a mul + add sequence.
-// Do not use them for Darwin platforms.
-def UseFusedMAC      : Predicate<"(TM.Options.AllowFPOpFusion =="
-                                 " FPOpFusion::Fast && "
-                                 " Subtarget->hasVFP4()) && "
-                                 "!Subtarget->isTargetDarwin() &&"
-                                 "Subtarget->useFPVMLx()">;
-
-def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">;
-def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">;
-
-def HasFastVDUP32 : Predicate<"!Subtarget->hasSlowVDUP32()">;
-def HasSlowVDUP32 : Predicate<"Subtarget->hasSlowVDUP32()">;
-
-def UseVMOVSR : Predicate<"Subtarget->preferVMOVSR() ||"
-                          "!Subtarget->useNEONForSinglePrecisionFP()">;
-def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() &&"
-                              "Subtarget->useNEONForSinglePrecisionFP()">;
-
-let RecomputePerFunction = 1 in {
-  def IsLE             : Predicate<"MF->getDataLayout().isLittleEndian()">;
-  def IsBE             : Predicate<"MF->getDataLayout().isBigEndian()">;
-}
-
-def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">;
-
-// Armv8.5-A extensions
-def HasSB            : Predicate<"Subtarget->hasSB()">,
-                       AssemblerPredicate<"FeatureSB", "sb">;
+// Vector operations shared between NEON and MVE
+
+def ARMvdup      : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
+
+// VDUPLANE can produce a quad-register result from a double-register source,
+// so the result is not constrained to match the source.
+def ARMvduplane  : SDNode<"ARMISD::VDUPLANE",
+                          SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+                                               SDTCisVT<2, i32>]>>;
+
+def SDTARMVSHUF   : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
+def ARMvrev64    : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
+def ARMvrev32    : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
+def ARMvrev16    : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
+
+def SDTARMVGETLN  : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
+                                         SDTCisVT<2, i32>]>;
+def ARMvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
+def ARMvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
+
+def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
+def ARMvmovImm   : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
+def ARMvmvnImm   : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
+def ARMvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
+
+
+def SDTARMVSHIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+                                        SDTCisVT<2, i32>]>;
+def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+                                     SDTCisSameAs<0, 2>,]>;
+def ARMvshlImm   : SDNode<"ARMISD::VSHLIMM", SDTARMVSHIMM>;
+def ARMvshrsImm  : SDNode<"ARMISD::VSHRsIMM", SDTARMVSHIMM>;
+def ARMvshruImm  : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>;
+def ARMvshls     : SDNode<"ARMISD::VSHLs", SDTARMVSH>;
+def ARMvshlu     : SDNode<"ARMISD::VSHLu", SDTARMVSH>;
+
+def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMWhileLoop,
+                    [SDNPHasChain]>;
 
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
@@ -552,6 +422,16 @@ def reglist : Operand<i32> {
   let DecoderMethod = "DecodeRegListOperand";
 }
 
+// A list of general purpose registers and APSR separated by comma.
+// Used by CLRM
+def RegListWithAPSRAsmOperand : AsmOperandClass { let Name = "RegListWithAPSR"; }
+def reglist_with_apsr : Operand<i32> {
+  let EncoderMethod = "getRegisterListOpValue";
+  let ParserMatchClass = RegListWithAPSRAsmOperand;
+  let PrintMethod = "printRegisterList";
+  let DecoderMethod = "DecodeRegListOperand";
+}
+
 def GPRPairOp : RegisterOperand<GPRPair, "printGPRPairOperand">;
 
 def DPRRegListAsmOperand : AsmOperandClass {
@@ -576,6 +456,21 @@ def spr_reglist : Operand<i32> {
   let DecoderMethod = "DecodeSPRRegListOperand";
 }
 
+def FPSRegListWithVPRAsmOperand : AsmOperandClass { let Name =
+    "FPSRegListWithVPR"; }
+def fp_sreglist_with_vpr : Operand<i32> {
+  let EncoderMethod = "getRegisterListOpValue";
+  let ParserMatchClass = FPSRegListWithVPRAsmOperand;
+  let PrintMethod = "printRegisterList";
+}
+def FPDRegListWithVPRAsmOperand : AsmOperandClass { let Name =
+    "FPDRegListWithVPR"; }
+def fp_dreglist_with_vpr : Operand<i32> {
+  let EncoderMethod = "getRegisterListOpValue";
+  let ParserMatchClass = FPDRegListWithVPRAsmOperand;
+  let PrintMethod = "printRegisterList";
+}
+
 // An operand for the CONSTPOOL_ENTRY pseudo-instruction.
 def cpinst_operand : Operand<i32> {
   let PrintMethod = "printCPInstOperand";
@@ -621,6 +516,55 @@ def rot_imm : Operand<i32>, PatLeaf<(i32 imm), [{
   let ParserMatchClass = RotImmAsmOperand;
 }
 
+// Power-of-two operand for MVE VIDUP and friends, which encode
+// {1,2,4,8} as its log to base 2, i.e. as {0,1,2,3} respectively
+def MVE_VIDUP_imm_asmoperand : AsmOperandClass {
+  let Name = "VIDUP_imm";
+  let PredicateMethod = "isPowerTwoInRange<1,8>";
+  let RenderMethod = "addPowerTwoOperands";
+  let DiagnosticString = "vector increment immediate must be 1, 2, 4 or 8";
+}
+def MVE_VIDUP_imm : Operand<i32> {
+  let EncoderMethod = "getPowerTwoOpValue";
+  let DecoderMethod = "DecodePowerTwoOperand<0,3>";
+  let ParserMatchClass = MVE_VIDUP_imm_asmoperand;
+}
+
+// Pair vector indexing
+class MVEPairVectorIndexOperand<string start, string end> : AsmOperandClass {
+  let Name = "MVEPairVectorIndex"#start;
+  let RenderMethod = "addMVEPairVectorIndexOperands";
+  let PredicateMethod = "isMVEPairVectorIndex<"#start#", "#end#">";
+}
+
+class MVEPairVectorIndex<string opval> : Operand<i32> {
+  let PrintMethod = "printVectorIndex";
+  let EncoderMethod = "getMVEPairVectorIndexOpValue<"#opval#">";
+  let DecoderMethod = "DecodeMVEPairVectorIndexOperand<"#opval#">";
+  let MIOperandInfo = (ops i32imm);
+}
+
+def MVEPairVectorIndex0 : MVEPairVectorIndex<"0"> {
+  let ParserMatchClass = MVEPairVectorIndexOperand<"0", "1">;
+}
+
+def MVEPairVectorIndex2 : MVEPairVectorIndex<"2"> {
+  let ParserMatchClass = MVEPairVectorIndexOperand<"2", "3">;
+}
+
+// Vector indexing
+class MVEVectorIndexOperand<int NumLanes> : AsmOperandClass {
+  let Name = "MVEVectorIndex"#NumLanes;
+  let RenderMethod = "addMVEVectorIndexOperands";
+  let PredicateMethod = "isVectorIndexInRange<"#NumLanes#">";
+}
+
+class MVEVectorIndex<int NumLanes> : Operand<i32> {
+  let PrintMethod = "printVectorIndex";
+  let ParserMatchClass = MVEVectorIndexOperand<NumLanes>;
+  let MIOperandInfo = (ops i32imm);
+}
+
 // shift_imm: An integer that encodes a shift amount and the type of shift
 // (asr or lsl). The 6-bit immediate encodes as:
 //    {5}     0 ==> lsl
@@ -718,24 +662,11 @@ def mod_imm_neg : Operand<i32>, PatLeaf<(imm), [{
 }
 
 /// arm_i32imm - True for +V6T2, or when isSOImmTwoParVal()
-def arm_i32imm : PatLeaf<(imm), [{
-  if (Subtarget->useMovt(*MF))
+def arm_i32imm : IntImmLeaf<i32, [{
+  if (Subtarget->useMovt())
     return true;
-  return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
-}]> {
-  // Ideally this would be an IntImmLeaf, but then we wouldn't have access to
-  // the MachineFunction.
-  let GISelPredicateCode = [{
-    const auto &MF = *MI.getParent()->getParent();
-    if (STI.useMovt(MF))
-      return true;
-
-    const auto &MO = MI.getOperand(1);
-    if (!MO.isCImm())
-      return false;
-    return ARM_AM::isSOImmTwoPartVal(MO.getCImm()->getZExtValue());
-  }];
-}
+  return ARM_AM::isSOImmTwoPartVal(Imm.getZExtValue());
+}]>;
 
 /// imm0_1 predicate - Immediate in the range [0,1].
 def Imm0_1AsmOperand: ImmAsmOperand<0,1> { let Name = "Imm0_1"; }
@@ -952,6 +883,32 @@ def imm1_16 : Operand<i32>, ImmLeaf<i32, [{
   let ParserMatchClass = Imm1_16AsmOperand;
 }
 
+def MVEShiftImm1_7AsmOperand: ImmAsmOperand<1,7> {
+  let Name = "MVEShiftImm1_7";
+  // Reason we're doing this is because instruction vshll.s8 t1 encoding
+  // accepts 1,7 but the t2 encoding accepts 8.  By doing this we can get a
+  // better diagnostic message if someone uses bigger immediate than the t1/t2
+  // encodings allow.
+  let DiagnosticString = "operand must be an immediate in the range [1,8]";
+}
+def mve_shift_imm1_7 : Operand<i32> {
+  let ParserMatchClass = MVEShiftImm1_7AsmOperand;
+  let EncoderMethod = "getMVEShiftImmOpValue";
+}
+
+def MVEShiftImm1_15AsmOperand: ImmAsmOperand<1,15> {
+  let Name = "MVEShiftImm1_15";
+  // Reason we're doing this is because instruction vshll.s16 t1 encoding
+  // accepts 1,15 but the t2 encoding accepts 16.  By doing this we can get a
+  // better diagnostic message if someone uses bigger immediate than the t1/t2
+  // encodings allow.
+  let DiagnosticString = "operand must be an immediate in the range [1,16]";
+}
+def mve_shift_imm1_15 : Operand<i32> {
+  let ParserMatchClass = MVEShiftImm1_15AsmOperand;
+  let EncoderMethod = "getMVEShiftImmOpValue";
+}
+
 // Define ARM specific addressing modes.
 // addrmode_imm12 := reg +/- imm12
 //
@@ -1332,6 +1289,15 @@ def addr_offset_none : MemOperand,
   let MIOperandInfo = (ops GPR:$base);
 }
 
+// t_addr_offset_none := reg [r0-r7]
+def MemNoOffsetTAsmOperand : AsmOperandClass { let Name = "MemNoOffsetT"; }
+def t_addr_offset_none : MemOperand {
+  let PrintMethod = "printAddrMode7Operand";
+  let DecoderMethod = "DecodetGPRRegisterClass";
+  let ParserMatchClass = MemNoOffsetTAsmOperand;
+  let MIOperandInfo = (ops tGPR:$base);
+}
+
 def nohash_imm : Operand<i32> {
   let PrintMethod = "printNoHashImmediate";
 }
@@ -5931,6 +5897,12 @@ include "ARMInstrVFP.td"
 
 include "ARMInstrNEON.td"
 
+//===----------------------------------------------------------------------===//
+// MVE Support
+//
+
+include "ARMInstrMVE.td"
+
 //===----------------------------------------------------------------------===//
 // Assembler aliases
 //
diff --git a/lib/Target/ARM/ARMInstrMVE.td b/lib/Target/ARM/ARMInstrMVE.td
new file mode 100644
index 000000000000..3e7ae55c7fc8
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrMVE.td
@@ -0,0 +1,4591 @@
+//===-- ARMInstrMVE.td - MVE support for ARM ---------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM MVE instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+class ExpandImmAsmOp<string shift> : AsmOperandClass {
+  let Name = !strconcat("ExpandImm", shift);
+  let PredicateMethod = !strconcat("isExpImm<", shift, ">");
+  let RenderMethod = "addImmOperands";
+}
+class InvertedExpandImmAsmOp<string shift, string size> : AsmOperandClass {
+  let Name = !strconcat("InvertedExpandImm", shift, "_", size);
+  let PredicateMethod = !strconcat("isInvertedExpImm<", shift, ",", size, ">");
+  let RenderMethod = "addImmOperands";
+}
+
+class ExpandImm<string shift> : Operand<i32> {
+  let ParserMatchClass = ExpandImmAsmOp<shift>;
+  let EncoderMethod = !strconcat("getExpandedImmOpValue<",shift,",false>");
+  let DecoderMethod = !strconcat("DecodeExpandedImmOperand<",shift,">");
+  let PrintMethod = "printExpandedImmOperand";
+}
+class InvertedExpandImm<string shift, string size> : Operand<i32> {
+  let ParserMatchClass = InvertedExpandImmAsmOp<shift, size>;
+  let EncoderMethod = !strconcat("getExpandedImmOpValue<",shift,",true>");
+  let PrintMethod = "printExpandedImmOperand";
+  // No decoder method needed, because this operand type is only used
+  // by aliases (VAND and VORN)
+}
+
+def expzero00 : ExpandImm<"0">;
+def expzero08 : ExpandImm<"8">;
+def expzero16 : ExpandImm<"16">;
+def expzero24 : ExpandImm<"24">;
+
+def expzero00inv16 : InvertedExpandImm<"0", "16">;
+def expzero08inv16 : InvertedExpandImm<"8", "16">;
+
+def expzero00inv32 : InvertedExpandImm<"0", "32">;
+def expzero08inv32 : InvertedExpandImm<"8", "32">;
+def expzero16inv32 : InvertedExpandImm<"16", "32">;
+def expzero24inv32 : InvertedExpandImm<"24", "32">;
+
+// VPT condition mask
+def vpt_mask : Operand<i32> {
+  let PrintMethod = "printVPTMask";
+  let ParserMatchClass = it_mask_asmoperand;
+  let EncoderMethod = "getVPTMaskOpValue";
+  let DecoderMethod = "DecodeVPTMaskOperand";
+}
+
+// VPT/VCMP restricted predicate for sign invariant types
+def pred_restricted_i_asmoperand : AsmOperandClass {
+  let Name = "CondCodeRestrictedI";
+  let RenderMethod = "addITCondCodeOperands";
+  let PredicateMethod = "isITCondCodeRestrictedI";
+  let ParserMethod = "parseITCondCode";
+  let DiagnosticString = "condition code for sign-independent integer "#
+                         "comparison must be EQ or NE";
+}
+
+// VPT/VCMP restricted predicate for signed types
+def pred_restricted_s_asmoperand : AsmOperandClass {
+  let Name = "CondCodeRestrictedS";
+  let RenderMethod = "addITCondCodeOperands";
+  let PredicateMethod = "isITCondCodeRestrictedS";
+  let ParserMethod = "parseITCondCode";
+  let DiagnosticString = "condition code for signed integer "#
+                         "comparison must be EQ, NE, LT, GT, LE or GE";
+}
+
+// VPT/VCMP restricted predicate for unsigned types
+def pred_restricted_u_asmoperand : AsmOperandClass {
+  let Name = "CondCodeRestrictedU";
+  let RenderMethod = "addITCondCodeOperands";
+  let PredicateMethod = "isITCondCodeRestrictedU";
+  let ParserMethod = "parseITCondCode";
+  let DiagnosticString = "condition code for unsigned integer "#
+                         "comparison must be EQ, NE, HS or HI";
+}
+
+// VPT/VCMP restricted predicate for floating point
+def pred_restricted_fp_asmoperand : AsmOperandClass {
+  let Name = "CondCodeRestrictedFP";
+  let RenderMethod = "addITCondCodeOperands";
+  let PredicateMethod = "isITCondCodeRestrictedFP";
+  let ParserMethod = "parseITCondCode";
+  let DiagnosticString = "condition code for floating-point "#
+                         "comparison must be EQ, NE, LT, GT, LE or GE";
+}
+
+class VCMPPredicateOperand : Operand<i32>;
+
+def pred_basic_i : VCMPPredicateOperand {
+  let PrintMethod = "printMandatoryRestrictedPredicateOperand";
+  let ParserMatchClass = pred_restricted_i_asmoperand;
+  let DecoderMethod = "DecodeRestrictedIPredicateOperand";
+  let EncoderMethod = "getRestrictedCondCodeOpValue";
+}
+
+def pred_basic_u : VCMPPredicateOperand {
+  let PrintMethod = "printMandatoryRestrictedPredicateOperand";
+  let ParserMatchClass = pred_restricted_u_asmoperand;
+  let DecoderMethod = "DecodeRestrictedUPredicateOperand";
+  let EncoderMethod = "getRestrictedCondCodeOpValue";
+}
+
+def pred_basic_s : VCMPPredicateOperand {
+  let PrintMethod = "printMandatoryRestrictedPredicateOperand";
+  let ParserMatchClass = pred_restricted_s_asmoperand;
+  let DecoderMethod = "DecodeRestrictedSPredicateOperand";
+  let EncoderMethod = "getRestrictedCondCodeOpValue";
+}
+
+def pred_basic_fp : VCMPPredicateOperand {
+  let PrintMethod = "printMandatoryRestrictedPredicateOperand";
+  let ParserMatchClass = pred_restricted_fp_asmoperand;
+  let DecoderMethod = "DecodeRestrictedFPPredicateOperand";
+  let EncoderMethod = "getRestrictedCondCodeOpValue";
+}
+
+// Register list operands for interleaving load/stores
+def VecList2QAsmOperand : AsmOperandClass {
+  let Name = "VecListTwoMQ";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addMVEVecListOperands";
+  let DiagnosticString = "operand must be a list of two consecutive "#
+                         "q-registers in range [q0,q7]";
+}
+
+def VecList2Q : RegisterOperand<QQPR, "printMVEVectorListTwoQ"> {
+  let ParserMatchClass = VecList2QAsmOperand;
+  let PrintMethod = "printMVEVectorList<2>";
+}
+
+def VecList4QAsmOperand : AsmOperandClass {
+  let Name = "VecListFourMQ";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addMVEVecListOperands";
+  let DiagnosticString = "operand must be a list of four consecutive "#
+                         "q-registers in range [q0,q7]";
+}
+
+def VecList4Q : RegisterOperand<QQQQPR, "printMVEVectorListFourQ"> {
+  let ParserMatchClass = VecList4QAsmOperand;
+  let PrintMethod = "printMVEVectorList<4>";
+}
+
+// taddrmode_imm7  := reg[r0-r7] +/- (imm7 << shift)
+class TMemImm7ShiftOffsetAsmOperand<int shift> : AsmOperandClass {
+  let Name = "TMemImm7Shift"#shift#"Offset";
+  let PredicateMethod = "isMemImm7ShiftedOffset<"#shift#",ARM::tGPRRegClassID>";
+  let RenderMethod = "addMemImmOffsetOperands";
+}
+
+class taddrmode_imm7<int shift> : MemOperand {
+  let ParserMatchClass = TMemImm7ShiftOffsetAsmOperand<shift>;
+  // They are printed the same way as the T2 imm8 version
+  let PrintMethod = "printT2AddrModeImm8Operand<false>";
+  // This can also be the same as the T2 version.
+  let EncoderMethod = "getT2AddrModeImmOpValue<7,"#shift#">";
+  let DecoderMethod = "DecodeTAddrModeImm7<"#shift#">";
+  let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+}
+
+// t2addrmode_imm7  := reg +/- (imm7)
+class MemImm7ShiftOffsetAsmOperand<int shift> : AsmOperandClass {
+  let Name = "MemImm7Shift"#shift#"Offset";
+  let PredicateMethod = "isMemImm7ShiftedOffset<" # shift #
+                        ",ARM::GPRnopcRegClassID>";
+  let RenderMethod = "addMemImmOffsetOperands";
+}
+
+def MemImm7Shift0OffsetAsmOperand : MemImm7ShiftOffsetAsmOperand<0>;
+def MemImm7Shift1OffsetAsmOperand : MemImm7ShiftOffsetAsmOperand<1>;
+def MemImm7Shift2OffsetAsmOperand : MemImm7ShiftOffsetAsmOperand<2>;
+class T2AddrMode_Imm7<int shift> : MemOperand,
+      ComplexPattern<i32, 2, "SelectT2AddrModeImm7<"#shift#">", []> {
+  let EncoderMethod = "getT2AddrModeImmOpValue<7,"#shift#">";
+  let DecoderMethod = "DecodeT2AddrModeImm7<"#shift#", 0>";
+  let ParserMatchClass =
+    !cast<AsmOperandClass>("MemImm7Shift"#shift#"OffsetAsmOperand");
+  let MIOperandInfo = (ops GPRnopc:$base, i32imm:$offsimm);
+}
+
+class t2addrmode_imm7<int shift> : T2AddrMode_Imm7<shift> {
+  // They are printed the same way as the imm8 version
+  let PrintMethod = "printT2AddrModeImm8Operand<false>";
+}
+
+class MemImm7ShiftOffsetWBAsmOperand<int shift> : AsmOperandClass {
+  let Name = "MemImm7Shift"#shift#"OffsetWB";
+  let PredicateMethod = "isMemImm7ShiftedOffset<" # shift #
+                        ",ARM::rGPRRegClassID>";
+  let RenderMethod = "addMemImmOffsetOperands";
+}
+
+def MemImm7Shift0OffsetWBAsmOperand : MemImm7ShiftOffsetWBAsmOperand<0>;
+def MemImm7Shift1OffsetWBAsmOperand : MemImm7ShiftOffsetWBAsmOperand<1>;
+def MemImm7Shift2OffsetWBAsmOperand : MemImm7ShiftOffsetWBAsmOperand<2>;
+
+class t2addrmode_imm7_pre<int shift> : T2AddrMode_Imm7<shift> {
+  // They are printed the same way as the imm8 version
+  let PrintMethod = "printT2AddrModeImm8Operand<true>";
+  let ParserMatchClass =
+    !cast<AsmOperandClass>("MemImm7Shift"#shift#"OffsetWBAsmOperand");
+  let DecoderMethod = "DecodeT2AddrModeImm7<"#shift#", 1>";
+  let MIOperandInfo = (ops rGPR:$base, i32imm:$offsim);
+}
+
+class t2am_imm7shiftOffsetAsmOperand<int shift>
+  : AsmOperandClass { let Name = "Imm7Shift"#shift; }
+def t2am_imm7shift0OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<0>;
+def t2am_imm7shift1OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<1>;
+def t2am_imm7shift2OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<2>;
+
+class t2am_imm7_offset<int shift> : MemOperand {
+  // They are printed the same way as the imm8 version
+  let PrintMethod = "printT2AddrModeImm8OffsetOperand";
+  let ParserMatchClass =
+    !cast<AsmOperandClass>("t2am_imm7shift"#shift#"OffsetAsmOperand");
+  let EncoderMethod = "getT2ScaledImmOpValue<7,"#shift#">";
+  let DecoderMethod = "DecodeT2Imm7<"#shift#">";
+}
+
+// Operands for gather/scatter loads of the form [Rbase, Qoffsets]
+class MemRegRQOffsetAsmOperand<int shift> : AsmOperandClass {
+  let Name = "MemRegRQS"#shift#"Offset";
+  let PredicateMethod = "isMemRegRQOffset<"#shift#">";
+  let RenderMethod = "addMemRegRQOffsetOperands";
+}
+
+def MemRegRQS0OffsetAsmOperand : MemRegRQOffsetAsmOperand<0>;
+def MemRegRQS1OffsetAsmOperand : MemRegRQOffsetAsmOperand<1>;
+def MemRegRQS2OffsetAsmOperand : MemRegRQOffsetAsmOperand<2>;
+def MemRegRQS3OffsetAsmOperand : MemRegRQOffsetAsmOperand<3>;
+
+// mve_addr_rq_shift  := reg + vreg{ << UXTW #shift}
+class mve_addr_rq_shift<int shift> : MemOperand {
+  let EncoderMethod = "getMveAddrModeRQOpValue";
+  let PrintMethod = "printMveAddrModeRQOperand<"#shift#">";
+  let ParserMatchClass =
+    !cast<AsmOperandClass>("MemRegRQS"#shift#"OffsetAsmOperand");
+  let DecoderMethod = "DecodeMveAddrModeRQ";
+  let MIOperandInfo = (ops GPRnopc:$base, MQPR:$offsreg);
+}
+
+class MemRegQOffsetAsmOperand<int shift> : AsmOperandClass {
+  let Name = "MemRegQS"#shift#"Offset";
+  let PredicateMethod = "isMemRegQOffset<"#shift#">";
+  let RenderMethod = "addMemImmOffsetOperands";
+}
+
+def MemRegQS2OffsetAsmOperand : MemRegQOffsetAsmOperand<2>;
+def MemRegQS3OffsetAsmOperand : MemRegQOffsetAsmOperand<3>;
+
+// mve_addr_q_shift  := vreg {+ #imm7s2/4}
+class mve_addr_q_shift<int shift> : MemOperand {
+  let EncoderMethod = "getMveAddrModeQOpValue<"#shift#">";
+  // Can be printed same way as other reg + imm operands
+  let PrintMethod = "printT2AddrModeImm8Operand<false>";
+  let ParserMatchClass =
+    !cast<AsmOperandClass>("MemRegQS"#shift#"OffsetAsmOperand");
+  let DecoderMethod = "DecodeMveAddrModeQ<"#shift#">";
+  let MIOperandInfo = (ops MQPR:$base, i32imm:$imm);
+}
+
+// --------- Start of base classes for the instructions themselves
+
+class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm,
+             string ops, string cstr, list<dag> pattern>
+  : Thumb2XI<oops, iops, AddrModeNone, 4, itin, !strconcat(asm, "\t", ops), cstr,
+             pattern>,
+    Requires<[HasMVEInt]> {
+  let D = MVEDomain;
+  let DecoderNamespace = "MVE";
+}
+
+// MVE_p is used for most predicated instructions, to add the cluster
+// of input operands that provides the VPT suffix (none, T or E) and
+// the input predicate register.
+class MVE_p<dag oops, dag iops, InstrItinClass itin, string iname,
+            string suffix, string ops, vpred_ops vpred, string cstr,
+            list<dag> pattern=[]>
+  : MVE_MI<oops, !con(iops, (ins vpred:$vp)), itin,
+           // If the instruction has a suffix, like vadd.f32, then the
+           // VPT predication suffix goes before the dot, so the full
+           // name has to be "vadd${vp}.f32".
+           !strconcat(iname, "${vp}",
+                      !if(!eq(suffix, ""), "", !strconcat(".", suffix))),
+           ops, !strconcat(cstr, vpred.vpred_constraint), pattern> {
+  let Inst{31-29} = 0b111;
+  let Inst{27-26} = 0b11;
+}
+
+class MVE_f<dag oops, dag iops, InstrItinClass itin, string iname,
+            string suffix, string ops, vpred_ops vpred, string cstr,
+            list<dag> pattern=[]>
+  : MVE_p<oops, iops, itin, iname, suffix, ops, vpred, cstr, pattern> {
+  let Predicates = [HasMVEFloat];
+}
+
+class MVE_MI_with_pred<dag oops, dag iops, InstrItinClass itin, string asm,
+                       string ops, string cstr, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeNone, 4, itin, asm, !strconcat("\t", ops), cstr,
+             pattern>,
+    Requires<[HasV8_1MMainline, HasMVEInt]> {
+  let D = MVEDomain;
+  let DecoderNamespace = "MVE";
+}
+
+class MVE_VMOV_lane_base<dag oops, dag iops, InstrItinClass itin, string asm,
+                         string suffix, string ops, string cstr,
+                         list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeNone, 4, itin, asm,
+            !if(!eq(suffix, ""), "", "." # suffix) # "\t" # ops,
+            cstr, pattern>,
+    Requires<[HasV8_1MMainline, HasMVEInt]> {
+  let D = MVEDomain;
+  let DecoderNamespace = "MVE";
+}
+
+class MVE_ScalarShift<string iname, dag oops, dag iops, string asm, string cstr,
+            list<dag> pattern=[]>
+  : MVE_MI_with_pred<oops, iops, NoItinerary, iname, asm, cstr, pattern> {
+  let Inst{31-20} = 0b111010100101;
+  let Inst{8} = 0b1;
+
+}
+
+class MVE_ScalarShiftSingleReg<string iname, dag iops, string asm, string cstr,
+                    list<dag> pattern=[]>
+  : MVE_ScalarShift<iname, (outs rGPR:$RdaDest), iops, asm, cstr, pattern> {
+  bits<4> RdaDest;
+
+  let Inst{19-16} = RdaDest{3-0};
+}
+
+class MVE_ScalarShiftSRegImm<string iname, bits<2> op5_4, list<dag> pattern=[]>
+  : MVE_ScalarShiftSingleReg<iname, (ins rGPR:$RdaSrc, long_shift:$imm),
+                     "$RdaSrc, $imm", "$RdaDest = $RdaSrc", pattern> {
+  bits<5> imm;
+
+  let Inst{15} = 0b0;
+  let Inst{14-12} = imm{4-2};
+  let Inst{11-8} = 0b1111;
+  let Inst{7-6} = imm{1-0};
+  let Inst{5-4} = op5_4{1-0};
+  let Inst{3-0} = 0b1111;
+}
+
+def MVE_SQSHL : MVE_ScalarShiftSRegImm<"sqshl", 0b11>;
+def MVE_SRSHR : MVE_ScalarShiftSRegImm<"srshr", 0b10>;
+def MVE_UQSHL : MVE_ScalarShiftSRegImm<"uqshl", 0b00>;
+def MVE_URSHR : MVE_ScalarShiftSRegImm<"urshr", 0b01>;
+
+class MVE_ScalarShiftSRegReg<string iname, bits<2> op5_4, list<dag> pattern=[]>
+  : MVE_ScalarShiftSingleReg<iname, (ins rGPR:$RdaSrc, rGPR:$Rm),
+                     "$RdaSrc, $Rm", "$RdaDest = $RdaSrc", pattern> {
+  bits<4> Rm;
+
+  let Inst{15-12} = Rm{3-0};
+  let Inst{11-8} = 0b1111;
+  let Inst{7-6} = 0b00;
+  let Inst{5-4} = op5_4{1-0};
+  let Inst{3-0} = 0b1101;
+}
+
+def MVE_SQRSHR : MVE_ScalarShiftSRegReg<"sqrshr", 0b10>;
+def MVE_UQRSHL : MVE_ScalarShiftSRegReg<"uqrshl", 0b00>;
+
+class MVE_ScalarShiftDoubleReg<string iname, dag iops, string asm,
+                               string cstr, list<dag> pattern=[]>
+  : MVE_ScalarShift<iname, (outs tGPREven:$RdaLo, tGPROdd:$RdaHi),
+                    iops, asm, cstr, pattern> {
+  bits<4> RdaLo;
+  bits<4> RdaHi;
+
+  let Inst{19-17} = RdaLo{3-1};
+  let Inst{11-9} = RdaHi{3-1};
+}
+
+class MVE_ScalarShiftDRegImm<string iname, bits<2> op5_4, bit op16,
+                             list<dag> pattern=[]>
+  : MVE_ScalarShiftDoubleReg<
+      iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, long_shift:$imm),
+      "$RdaLo, $RdaHi, $imm", "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src",
+      pattern> {
+  bits<5> imm;
+
+  let Inst{16} = op16;
+  let Inst{15} = 0b0;
+  let Inst{14-12} = imm{4-2};
+  let Inst{7-6} = imm{1-0};
+  let Inst{5-4} = op5_4{1-0};
+  let Inst{3-0} = 0b1111;
+}
+
+class MVE_ScalarShiftDRegReg<string iname, bit op5, bit op16,
+                             list<dag> pattern=[]>
+  : MVE_ScalarShiftDoubleReg<
+     iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm),
+     "$RdaLo, $RdaHi, $Rm", "@earlyclobber $RdaHi,@earlyclobber $RdaLo,"
+                            "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src",
+     pattern> {
+  bits<4> Rm;
+
+  let Inst{16} = op16;
+  let Inst{15-12} = Rm{3-0};
+  let Inst{7-6} = 0b00;
+  let Inst{5} = op5;
+  let Inst{4} = 0b0;
+  let Inst{3-0} = 0b1101;
+
+  // Custom decoder method because of the following overlapping encodings:
+  // ASRL and SQRSHR
+  // LSLL and UQRSHL
+  // SQRSHRL and SQRSHR
+  // UQRSHLL and UQRSHL
+  let DecoderMethod = "DecodeMVEOverlappingLongShift";
+}
+
+def MVE_ASRLr   : MVE_ScalarShiftDRegReg<"asrl",    0b1,  0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
+                                        (ARMasrl tGPREven:$RdaLo_src,
+                                        tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
+def MVE_ASRLi   : MVE_ScalarShiftDRegImm<"asrl",    0b10, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
+                                        (ARMasrl tGPREven:$RdaLo_src,
+                                        tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
+def MVE_LSLLr   : MVE_ScalarShiftDRegReg<"lsll",    0b0,  0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
+                                        (ARMlsll tGPREven:$RdaLo_src,
+                                        tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
+def MVE_LSLLi   : MVE_ScalarShiftDRegImm<"lsll",    0b00, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
+                                        (ARMlsll tGPREven:$RdaLo_src,
+                                        tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
+def MVE_LSRL    : MVE_ScalarShiftDRegImm<"lsrl",    0b01, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
+                                        (ARMlsrl tGPREven:$RdaLo_src,
+                                        tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
+
+def MVE_SQRSHRL : MVE_ScalarShiftDRegReg<"sqrshrl", 0b1,  0b1>;
+def MVE_SQSHLL  : MVE_ScalarShiftDRegImm<"sqshll",  0b11, 0b1>;
+def MVE_SRSHRL  : MVE_ScalarShiftDRegImm<"srshrl",  0b10, 0b1>;
+
+def MVE_UQRSHLL : MVE_ScalarShiftDRegReg<"uqrshll", 0b0,  0b1>;
+def MVE_UQSHLL  : MVE_ScalarShiftDRegImm<"uqshll",  0b00, 0b1>;
+def MVE_URSHRL  : MVE_ScalarShiftDRegImm<"urshrl",  0b01, 0b1>;
+
+// start of mve_rDest instructions
+
+class MVE_rDest<dag oops, dag iops, InstrItinClass itin,
+                string iname, string suffix,
+                string ops, string cstr, list<dag> pattern=[]>
+// Always use vpred_n and not vpred_r: with the output register being
+// a GPR and not a vector register, there can't be any question of
+// what to put in its inactive lanes.
+  : MVE_p<oops, iops, itin, iname, suffix, ops, vpred_n, cstr, pattern> {
+
+  let Inst{25-23} = 0b101;
+  let Inst{11-9} = 0b111;
+  let Inst{4} = 0b0;
+}
+
+class MVE_VABAV<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+  : MVE_rDest<(outs rGPR:$Rda), (ins rGPR:$Rda_src, MQPR:$Qn, MQPR:$Qm),
+              NoItinerary, "vabav", suffix, "$Rda, $Qn, $Qm", "$Rda = $Rda_src",
+              pattern> {
+  bits<4> Qm;
+  bits<4> Qn;
+  bits<4> Rda;
+
+  let Inst{28} = U;
+  let Inst{22} = 0b0;
+  let Inst{21-20} = size{1-0};
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b0;
+  let Inst{15-12} = Rda{3-0};
+  let Inst{8} = 0b1;
+  let Inst{7} = Qn{3};
+  let Inst{6} = 0b0;
+  let Inst{5} = Qm{3};
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = 0b1;
+}
+
+def MVE_VABAVs8  : MVE_VABAV<"s8", 0b0, 0b00>;
+def MVE_VABAVs16 : MVE_VABAV<"s16", 0b0, 0b01>;
+def MVE_VABAVs32 : MVE_VABAV<"s32", 0b0, 0b10>;
+def MVE_VABAVu8  : MVE_VABAV<"u8", 0b1, 0b00>;
+def MVE_VABAVu16 : MVE_VABAV<"u16", 0b1, 0b01>;
+def MVE_VABAVu32 : MVE_VABAV<"u32", 0b1, 0b10>;
+
+class MVE_VADDV<string iname, string suffix, dag iops, string cstr,
+              bit A, bit U, bits<2> size, list<dag> pattern=[]>
+  : MVE_rDest<(outs tGPREven:$Rda), iops, NoItinerary,
+              iname, suffix, "$Rda, $Qm", cstr, pattern> {
+  bits<3> Qm;
+  bits<4> Rda;
+
+  let Inst{28} = U;
+  let Inst{22-20} = 0b111;
+  let Inst{19-18} = size{1-0};
+  let Inst{17-16} = 0b01;
+  let Inst{15-13} = Rda{3-1};
+  let Inst{12} = 0b0;
+  let Inst{8-6} = 0b100;
+  let Inst{5} = A;
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = 0b0;
+}
+
+multiclass MVE_VADDV_A<string suffix, bit U, bits<2> size,
+                       list<dag> pattern=[]> {
+  def acc    : MVE_VADDV<"vaddva", suffix,
+                         (ins tGPREven:$Rda_src, MQPR:$Qm), "$Rda = $Rda_src",
+                         0b1, U, size, pattern>;
+  def no_acc : MVE_VADDV<"vaddv", suffix,
+                         (ins MQPR:$Qm), "",
+                         0b0, U, size, pattern>;
+}
+
+defm MVE_VADDVs8  : MVE_VADDV_A<"s8",  0b0, 0b00>;
+defm MVE_VADDVs16 : MVE_VADDV_A<"s16", 0b0, 0b01>;
+defm MVE_VADDVs32 : MVE_VADDV_A<"s32", 0b0, 0b10>;
+defm MVE_VADDVu8  : MVE_VADDV_A<"u8",  0b1, 0b00>;
+defm MVE_VADDVu16 : MVE_VADDV_A<"u16", 0b1, 0b01>;
+defm MVE_VADDVu32 : MVE_VADDV_A<"u32", 0b1, 0b10>;
+
+class MVE_VADDLV<string iname, string suffix, dag iops, string cstr,
+               bit A, bit U, list<dag> pattern=[]>
+  : MVE_rDest<(outs tGPREven:$RdaLo, tGPROdd:$RdaHi), iops, NoItinerary, iname,
+              suffix, "$RdaLo, $RdaHi, $Qm", cstr, pattern> {
+  bits<3> Qm;
+  bits<4> RdaLo;
+  bits<4> RdaHi;
+
+  let Inst{28} = U;
+  let Inst{22-20} = RdaHi{3-1};
+  let Inst{19-18} = 0b10;
+  let Inst{17-16} = 0b01;
+  let Inst{15-13} = RdaLo{3-1};
+  let Inst{12} = 0b0;
+  let Inst{8-6} = 0b100;
+  let Inst{5} = A;
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = 0b0;
+}
+
+multiclass MVE_VADDLV_A<string suffix, bit U, list<dag> pattern=[]> {
+  def acc    : MVE_VADDLV<"vaddlva", suffix,
+                        (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, MQPR:$Qm),
+                        "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src",
+                        0b1, U, pattern>;
+  def no_acc : MVE_VADDLV<"vaddlv", suffix,
+                        (ins MQPR:$Qm), "",
+                        0b0, U, pattern>;
+}
+
+
+defm MVE_VADDLVs32 : MVE_VADDLV_A<"s32", 0b0>;
+defm MVE_VADDLVu32 : MVE_VADDLV_A<"u32", 0b1>;
+
+class MVE_VMINMAXNMV<string iname, string suffix, bit sz,
+                     bit bit_17, bit bit_7, list<dag> pattern=[]>
+  : MVE_rDest<(outs rGPR:$RdaDest), (ins rGPR:$RdaSrc, MQPR:$Qm),
+              NoItinerary, iname, suffix, "$RdaSrc, $Qm",
+              "$RdaDest = $RdaSrc", pattern> {
+  bits<3> Qm;
+  bits<4> RdaDest;
+
+  let Inst{28} = sz;
+  let Inst{22-20} = 0b110;
+  let Inst{19-18} = 0b11;
+  let Inst{17} = bit_17;
+  let Inst{16} = 0b0;
+  let Inst{15-12} = RdaDest{3-0};
+  let Inst{8} = 0b1;
+  let Inst{7} = bit_7;
+  let Inst{6-5} = 0b00;
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = 0b0;
+
+  let Predicates = [HasMVEFloat];
+}
+
+multiclass MVE_VMINMAXNMV_fty<string iname, bit bit_7, list<dag> pattern=[]> {
+  def f32 : MVE_VMINMAXNMV<iname, "f32", 0b0, 0b1, bit_7, pattern>;
+  def f16 : MVE_VMINMAXNMV<iname, "f16", 0b1, 0b1, bit_7, pattern>;
+}
+
+defm MVE_VMINNMV : MVE_VMINMAXNMV_fty<"vminnmv", 0b1>;
+defm MVE_VMAXNMV : MVE_VMINMAXNMV_fty<"vmaxnmv", 0b0>;
+
+multiclass MVE_VMINMAXNMAV_fty<string iname, bit bit_7, list<dag> pattern=[]> {
+  def f32 : MVE_VMINMAXNMV<iname, "f32", 0b0, 0b0, bit_7, pattern>;
+  def f16 : MVE_VMINMAXNMV<iname, "f16", 0b1, 0b0, bit_7, pattern>;
+}
+
+defm MVE_VMINNMAV : MVE_VMINMAXNMAV_fty<"vminnmav", 0b1>;
+defm MVE_VMAXNMAV : MVE_VMINMAXNMAV_fty<"vmaxnmav", 0b0>;
+
+class MVE_VMINMAXV<string iname, string suffix, bit U, bits<2> size,
+                 bit bit_17, bit bit_7, list<dag> pattern=[]>
+  : MVE_rDest<(outs rGPR:$RdaDest), (ins rGPR:$RdaSrc, MQPR:$Qm), NoItinerary,
+              iname, suffix, "$RdaSrc, $Qm", "$RdaDest = $RdaSrc", pattern> {
+  bits<3> Qm;
+  bits<4> RdaDest;
+
+  let Inst{28} = U;
+  let Inst{22-20} = 0b110;
+  let Inst{19-18} = size{1-0};
+  let Inst{17} = bit_17;
+  let Inst{16} = 0b0;
+  let Inst{15-12} = RdaDest{3-0};
+  let Inst{8} = 0b1;
+  let Inst{7} = bit_7;
+  let Inst{6-5} = 0b00;
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = 0b0;
+}
+
+multiclass MVE_VMINMAXV_ty<string iname, bit bit_7, list<dag> pattern=[]> {
+  def s8  : MVE_VMINMAXV<iname, "s8",  0b0, 0b00, 0b1, bit_7>;
+  def s16 : MVE_VMINMAXV<iname, "s16", 0b0, 0b01, 0b1, bit_7>;
+  def s32 : MVE_VMINMAXV<iname, "s32", 0b0, 0b10, 0b1, bit_7>;
+  def u8  : MVE_VMINMAXV<iname, "u8",  0b1, 0b00, 0b1, bit_7>;
+  def u16 : MVE_VMINMAXV<iname, "u16", 0b1, 0b01, 0b1, bit_7>;
+  def u32 : MVE_VMINMAXV<iname, "u32", 0b1, 0b10, 0b1, bit_7>;
+}
+
+defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 0b1>;
+defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0b0>;
+
+multiclass MVE_VMINMAXAV_ty<string iname, bit bit_7, list<dag> pattern=[]> {
+  def s8  : MVE_VMINMAXV<iname, "s8",  0b0, 0b00, 0b0, bit_7>;
+  def s16 : MVE_VMINMAXV<iname, "s16", 0b0, 0b01, 0b0, bit_7>;
+  def s32 : MVE_VMINMAXV<iname, "s32", 0b0, 0b10, 0b0, bit_7>;
+}
+
+defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 0b1>;
+defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0b0>;
+
+class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr,
+                   bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0,
+                   list<dag> pattern=[]>
+  : MVE_rDest<(outs tGPREven:$RdaDest), iops, NoItinerary, iname, suffix,
+              "$RdaDest, $Qn, $Qm", cstr, pattern> {
+  bits<4> RdaDest;
+  bits<3> Qm;
+  bits<3> Qn;
+
+  let Inst{28} = bit_28;
+  let Inst{22-20} = 0b111;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = sz;
+  let Inst{15-13} = RdaDest{3-1};
+  let Inst{12} = X;
+  let Inst{8} = bit_8;
+  let Inst{7-6} = 0b00;
+  let Inst{5} = A;
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = bit_0;
+}
+
+multiclass MVE_VMLAMLSDAV_X<string iname, string suffix, dag iops, string cstr,
+                          bit sz, bit bit_28, bit A, bit bit_8, bit bit_0,
+                          list<dag> pattern=[]> {
+  def _noexch : MVE_VMLAMLSDAV<iname, suffix, iops, cstr, sz,
+                            bit_28, A, 0b0, bit_8, bit_0, pattern>;
+  def _exch   : MVE_VMLAMLSDAV<iname # "x", suffix, iops, cstr, sz,
+                            bit_28, A, 0b1, bit_8, bit_0, pattern>;
+}
+
+multiclass MVE_VMLAMLSDAV_XA<string iname, string suffix, bit sz, bit bit_28,
+                           bit bit_8, bit bit_0, list<dag> pattern=[]> {
+  defm _noacc : MVE_VMLAMLSDAV_X<iname, suffix, (ins MQPR:$Qn, MQPR:$Qm), "",
+                              sz, bit_28, 0b0, bit_8, bit_0, pattern>;
+  defm _acc   : MVE_VMLAMLSDAV_X<iname # "a", suffix,
+                             (ins tGPREven:$RdaSrc, MQPR:$Qn, MQPR:$Qm),
+                             "$RdaDest = $RdaSrc",
+                              sz, bit_28, 0b1, bit_8, bit_0, pattern>;
+}
+
+multiclass MVE_VMLADAV_multi<string suffix, bit sz, bit U, bit bit_8,
+                           list<dag> pattern=[]> {
+  defm "" : MVE_VMLAMLSDAV_XA<"vmladav", suffix, sz, U, bit_8, 0b0, pattern>;
+}
+
+defm MVE_VMLADAVs16 : MVE_VMLADAV_multi<"s16", 0b0, 0b0, 0b0>;
+defm MVE_VMLADAVs32 : MVE_VMLADAV_multi<"s32", 0b1, 0b0, 0b0>;
+defm MVE_VMLADAVu16 : MVE_VMLADAV_multi<"u16", 0b0, 0b1, 0b0>;
+defm MVE_VMLADAVu32 : MVE_VMLADAV_multi<"u32", 0b1, 0b1, 0b0>;
+
+defm MVE_VMLADAVs8 : MVE_VMLADAV_multi<"s8", 0b0, 0b0, 0b1>;
+defm MVE_VMLADAVu8 : MVE_VMLADAV_multi<"u8", 0b0, 0b1, 0b1>;
+
+// vmlav aliases vmladav
+foreach acc = ["_acc", "_noacc"] in {
+  foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32"] in {
+    def : MVEInstAlias<!strconcat("vmlav", !if(!eq(acc, "_acc"), "a", ""),
+                       "${vp}.", suffix, "\t$RdaDest, $Qn, $Qm"),
+                       (!cast<Instruction>("MVE_VMLADAV"#suffix#acc#"_noexch")
+                        tGPREven:$RdaDest, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
+  }
+}
+
+multiclass MVE_VMLSDAV_multi<string suffix, bit sz, bit bit_28,
+                           list<dag> pattern=[]> {
+  defm "" : MVE_VMLAMLSDAV_XA<"vmlsdav", suffix, sz, bit_28, 0b0, 0b1, pattern>;
+}
+
+defm MVE_VMLSDAVs8  : MVE_VMLSDAV_multi<"s8", 0, 0b1>;
+defm MVE_VMLSDAVs16 : MVE_VMLSDAV_multi<"s16", 0, 0b0>;
+defm MVE_VMLSDAVs32 : MVE_VMLSDAV_multi<"s32", 1, 0b0>;
+
+// Base class for VMLALDAV and VMLSLDAV, VRMLALDAVH, VRMLSLDAVH
+class MVE_VMLALDAVBase<string iname, string suffix, dag iops, string cstr,
+                       bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0,
+                       list<dag> pattern=[]>
+  : MVE_rDest<(outs tGPREven:$RdaLoDest, tGPROdd:$RdaHiDest), iops, NoItinerary,
+              iname, suffix, "$RdaLoDest, $RdaHiDest, $Qn, $Qm", cstr, pattern> {
+  bits<4> RdaLoDest;
+  bits<4> RdaHiDest;
+  bits<3> Qm;
+  bits<3> Qn;
+
+  let Inst{28} = bit_28;
+  let Inst{22-20} = RdaHiDest{3-1};
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = sz;
+  let Inst{15-13} = RdaLoDest{3-1};
+  let Inst{12} = X;
+  let Inst{8} = bit_8;
+  let Inst{7-6} = 0b00;
+  let Inst{5} = A;
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = bit_0;
+}
+
+multiclass MVE_VMLALDAVBase_X<string iname, string suffix, dag iops,
+                              string cstr, bit sz, bit bit_28, bit A,
+                              bit bit_8, bit bit_0, list<dag> pattern=[]> {
+  def _noexch : MVE_VMLALDAVBase<iname, suffix, iops, cstr, sz,
+                               bit_28, A, 0b0, bit_8, bit_0, pattern>;
+  def _exch   : MVE_VMLALDAVBase<iname # "x", suffix, iops, cstr, sz,
+                               bit_28, A, 0b1, bit_8, bit_0, pattern>;
+}
+
+multiclass MVE_VMLALDAVBase_XA<string iname, string suffix, bit sz, bit bit_28,
+                             bit bit_8, bit bit_0, list<dag> pattern=[]> {
+  defm _noacc : MVE_VMLALDAVBase_X<
+     iname, suffix, (ins MQPR:$Qn, MQPR:$Qm), "",
+     sz, bit_28, 0b0, bit_8, bit_0, pattern>;
+  defm _acc   : MVE_VMLALDAVBase_X<
+     iname # "a", suffix, (ins tGPREven:$RdaLoSrc, tGPROdd:$RdaHiSrc,
+                               MQPR:$Qn, MQPR:$Qm),
+     "$RdaLoDest = $RdaLoSrc,$RdaHiDest = $RdaHiSrc",
+     sz, bit_28, 0b1, bit_8, bit_0, pattern>;
+}
+
+multiclass MVE_VRMLALDAVH_multi<string suffix, bit U, list<dag> pattern=[]> {
+  defm "" : MVE_VMLALDAVBase_XA<
+     "vrmlaldavh", suffix, 0b0, U, 0b1, 0b0, pattern>;
+}
+
+defm MVE_VRMLALDAVHs32 : MVE_VRMLALDAVH_multi<"s32", 0>;
+defm MVE_VRMLALDAVHu32 : MVE_VRMLALDAVH_multi<"u32", 1>;
+
+// vrmlalvh aliases for vrmlaldavh
+def : MVEInstAlias<"vrmlalvh${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm",
+                  (MVE_VRMLALDAVHs32_noacc_noexch
+                   tGPREven:$RdaLo, tGPROdd:$RdaHi,
+                   MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
+def : MVEInstAlias<"vrmlalvha${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm",
+                  (MVE_VRMLALDAVHs32_acc_noexch
+                   tGPREven:$RdaLo, tGPROdd:$RdaHi,
+                   MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
+def : MVEInstAlias<"vrmlalvh${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm",
+                  (MVE_VRMLALDAVHu32_noacc_noexch
+                   tGPREven:$RdaLo, tGPROdd:$RdaHi,
+                   MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
+def : MVEInstAlias<"vrmlalvha${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm",
+                  (MVE_VRMLALDAVHu32_acc_noexch
+                   tGPREven:$RdaLo, tGPROdd:$RdaHi,
+                   MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
+
+multiclass MVE_VMLALDAV_multi<string suffix, bit sz, bit U,
+                              list<dag> pattern=[]> {
+  defm "" : MVE_VMLALDAVBase_XA<"vmlaldav", suffix, sz, U, 0b0, 0b0, pattern>;
+}
+
+defm MVE_VMLALDAVs16 : MVE_VMLALDAV_multi<"s16", 0b0, 0b0>;
+defm MVE_VMLALDAVs32 : MVE_VMLALDAV_multi<"s32", 0b1, 0b0>;
+defm MVE_VMLALDAVu16 : MVE_VMLALDAV_multi<"u16", 0b0, 0b1>;
+defm MVE_VMLALDAVu32 : MVE_VMLALDAV_multi<"u32", 0b1, 0b1>;
+
+// vmlalv aliases vmlaldav
+foreach acc = ["_acc", "_noacc"] in {
+  foreach suffix = ["s16", "s32", "u16", "u32"] in {
+    def : MVEInstAlias<!strconcat("vmlalv", !if(!eq(acc, "_acc"), "a", ""),
+                       "${vp}.", suffix, "\t$RdaLoDest, $RdaHiDest, $Qn, $Qm"),
+                       (!cast<Instruction>("MVE_VMLALDAV"#suffix#acc#"_noexch")
+                       tGPREven:$RdaLoDest, tGPROdd:$RdaHiDest,
+                       MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
+  }
+}
+
+multiclass MVE_VMLSLDAV_multi<string iname, string suffix, bit sz,
+                            bit bit_28, list<dag> pattern=[]> {
+  defm "" : MVE_VMLALDAVBase_XA<iname, suffix, sz, bit_28, 0b0, 0b1, pattern>;
+}
+
+defm MVE_VMLSLDAVs16   : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0>;
+defm MVE_VMLSLDAVs32   : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0>;
+defm MVE_VRMLSLDAVHs32 : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1>;
+
+// end of mve_rDest instructions
+
+// start of mve_comp instructions
+
+class MVE_comp<InstrItinClass itin, string iname, string suffix,
+               string cstr, list<dag> pattern=[]>
+  : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), itin, iname, suffix,
+           "$Qd, $Qn, $Qm", vpred_r, cstr, pattern> {
+  bits<4> Qd;
+  bits<4> Qn;
+  bits<4> Qm;
+
+  let Inst{22} = Qd{3};
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b0;
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12} = 0b0;
+  let Inst{10-9} = 0b11;
+  let Inst{7} = Qn{3};
+  let Inst{5} = Qm{3};
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = 0b0;
+}
+
+class MVE_VMINMAXNM<string iname, string suffix, bit sz, bit bit_21,
+                list<dag> pattern=[]>
+  : MVE_comp<NoItinerary, iname, suffix, "", pattern> {
+
+  let Inst{28} = 0b1;
+  let Inst{25-24} = 0b11;
+  let Inst{23} = 0b0;
+  let Inst{21} = bit_21;
+  let Inst{20} = sz;
+  let Inst{11} = 0b1;
+  let Inst{8} = 0b1;
+  let Inst{6} = 0b1;
+  let Inst{4} = 0b1;
+
+  let Predicates = [HasMVEFloat];
+}
+
+def MVE_VMAXNMf32 : MVE_VMINMAXNM<"vmaxnm", "f32", 0b0, 0b0>;
+def MVE_VMAXNMf16 : MVE_VMINMAXNM<"vmaxnm", "f16", 0b1, 0b0>;
+
+let Predicates = [HasMVEFloat] in {
+  def : Pat<(v4f32 (fmaxnum (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
+            (v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
+  def : Pat<(v8f16 (fmaxnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
+            (v8f16 (MVE_VMAXNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+}
+
+def MVE_VMINNMf32 : MVE_VMINMAXNM<"vminnm", "f32", 0b0, 0b1>;
+def MVE_VMINNMf16 : MVE_VMINMAXNM<"vminnm", "f16", 0b1, 0b1>;
+
+let Predicates = [HasMVEFloat] in {
+  def : Pat<(v4f32 (fminnum (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
+            (v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
+  def : Pat<(v8f16 (fminnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
+            (v8f16 (MVE_VMINNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+}
+
+
+class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size,
+              bit bit_4, list<dag> pattern=[]>
+  : MVE_comp<NoItinerary, iname, suffix, "", pattern> {
+
+  let Inst{28} = U;
+  let Inst{25-24} = 0b11;
+  let Inst{23} = 0b0;
+  let Inst{21-20} = size{1-0};
+  let Inst{11} = 0b0;
+  let Inst{8} = 0b0;
+  let Inst{6} = 0b1;
+  let Inst{4} = bit_4;
+}
+
+multiclass MVE_VMINMAX_all_sizes<string iname, bit bit_4> {
+  def s8  : MVE_VMINMAX<iname, "s8",  0b0, 0b00, bit_4>;
+  def s16 : MVE_VMINMAX<iname, "s16", 0b0, 0b01, bit_4>;
+  def s32 : MVE_VMINMAX<iname, "s32", 0b0, 0b10, bit_4>;
+  def u8  : MVE_VMINMAX<iname, "u8",  0b1, 0b00, bit_4>;
+  def u16 : MVE_VMINMAX<iname, "u16", 0b1, 0b01, bit_4>;
+  def u32 : MVE_VMINMAX<iname, "u32", 0b1, 0b10, bit_4>;
+}
+
+defm MVE_VMAX : MVE_VMINMAX_all_sizes<"vmax", 0b0>;
+defm MVE_VMIN : MVE_VMINMAX_all_sizes<"vmin", 0b1>;
+
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (smin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+            (v16i8 (MVE_VMINs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (smin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+            (v8i16 (MVE_VMINs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (smin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+            (v4i32 (MVE_VMINs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+
+  def : Pat<(v16i8 (smax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+            (v16i8 (MVE_VMAXs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (smax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+            (v8i16 (MVE_VMAXs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (smax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+            (v4i32 (MVE_VMAXs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+
+  def : Pat<(v16i8 (umin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+            (v16i8 (MVE_VMINu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (umin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+            (v8i16 (MVE_VMINu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (umin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+            (v4i32 (MVE_VMINu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+
+  def : Pat<(v16i8 (umax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+            (v16i8 (MVE_VMAXu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (umax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+            (v8i16 (MVE_VMAXu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (umax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+            (v4i32 (MVE_VMAXu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+}
+
+// end of mve_comp instructions
+
+// start of mve_bit instructions
+
+class MVE_bit_arith<dag oops, dag iops, string iname, string suffix,
+                    string ops, string cstr, list<dag> pattern=[]>
+  : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred_r, cstr, pattern> {
+  bits<4> Qd;
+  bits<4> Qm;
+
+  let Inst{22} = Qd{3};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{5} = Qm{3};
+  let Inst{3-1} = Qm{2-0};
+}
+
+def MVE_VBIC : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
+                             "vbic", "", "$Qd, $Qn, $Qm", ""> {
+  bits<4> Qn;
+
+  let Inst{28} = 0b0;
+  let Inst{25-23} = 0b110;
+  let Inst{21-20} = 0b01;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b0;
+  let Inst{12-8} = 0b00001;
+  let Inst{7} = Qn{3};
+  let Inst{6} = 0b1;
+  let Inst{4} = 0b1;
+  let Inst{0} = 0b0;
+}
+
+class MVE_VREV<string iname, string suffix, bits<2> size, bits<2> bit_8_7>
+  : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), iname,
+                  suffix, "$Qd, $Qm", ""> {
+
+  let Inst{28} = 0b1;
+  let Inst{25-23} = 0b111;
+  let Inst{21-20} = 0b11;
+  let Inst{19-18} = size;
+  let Inst{17-16} = 0b00;
+  let Inst{12-9} = 0b0000;
+  let Inst{8-7} = bit_8_7;
+  let Inst{6} = 0b1;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
+}
+
+def MVE_VREV64_8  : MVE_VREV<"vrev64", "8", 0b00, 0b00>;
+def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00>;
+def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00>;
+
+def MVE_VREV32_8  : MVE_VREV<"vrev32", "8", 0b00, 0b01>;
+def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01>;
+
+def MVE_VREV16_8  : MVE_VREV<"vrev16", "8", 0b00, 0b10>;
+
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v4i32 (ARMvrev64 (v4i32 MQPR:$src))),
+            (v4i32 (MVE_VREV64_32 (v4i32 MQPR:$src)))>;
+  def : Pat<(v8i16 (ARMvrev64 (v8i16 MQPR:$src))),
+            (v8i16 (MVE_VREV64_16 (v8i16 MQPR:$src)))>;
+  def : Pat<(v16i8 (ARMvrev64 (v16i8 MQPR:$src))),
+            (v16i8 (MVE_VREV64_8  (v16i8 MQPR:$src)))>;
+
+  def : Pat<(v8i16 (ARMvrev32 (v8i16 MQPR:$src))),
+            (v8i16 (MVE_VREV32_16 (v8i16 MQPR:$src)))>;
+  def : Pat<(v16i8 (ARMvrev32 (v16i8 MQPR:$src))),
+            (v16i8 (MVE_VREV32_8  (v16i8 MQPR:$src)))>;
+
+  def : Pat<(v16i8 (ARMvrev16 (v16i8 MQPR:$src))),
+            (v16i8 (MVE_VREV16_8  (v16i8 MQPR:$src)))>;
+
+  def : Pat<(v4f32 (ARMvrev64 (v4f32 MQPR:$src))),
+            (v4f32 (MVE_VREV64_32 (v4f32 MQPR:$src)))>;
+  def : Pat<(v8f16 (ARMvrev64 (v8f16 MQPR:$src))),
+            (v8f16 (MVE_VREV64_16 (v8f16 MQPR:$src)))>;
+  def : Pat<(v8f16 (ARMvrev32 (v8f16 MQPR:$src))),
+            (v8f16 (MVE_VREV32_16 (v8f16 MQPR:$src)))>;
+}
+
+def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm),
+                             "vmvn", "", "$Qd, $Qm", ""> {
+  let Inst{28} = 0b1;
+  let Inst{25-23} = 0b111;
+  let Inst{21-16} = 0b110000;
+  let Inst{12-6} = 0b0010111;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
+}
+
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (vnotq  (v16i8 MQPR:$val1))),
+            (v16i8 (MVE_VMVN (v16i8 MQPR:$val1)))>;
+  def : Pat<(v8i16 (vnotq  (v8i16 MQPR:$val1))),
+            (v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>;
+  def : Pat<(v4i32 (vnotq  (v4i32 MQPR:$val1))),
+            (v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>;
+  def : Pat<(v2i64 (vnotq  (v2i64 MQPR:$val1))),
+            (v2i64 (MVE_VMVN (v2i64 MQPR:$val1)))>;
+}
+
+class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28>
+  : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
+                  iname, "", "$Qd, $Qn, $Qm", ""> {
+  bits<4> Qn;
+
+  let Inst{28} = bit_28;
+  let Inst{25-23} = 0b110;
+  let Inst{21-20} = bit_21_20;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b0;
+  let Inst{12-8} = 0b00001;
+  let Inst{7} = Qn{3};
+  let Inst{6} = 0b1;
+  let Inst{4} = 0b1;
+  let Inst{0} = 0b0;
+}
+
+def MVE_VEOR : MVE_bit_ops<"veor", 0b00, 0b1>;
+def MVE_VORN : MVE_bit_ops<"vorn", 0b11, 0b0>;
+def MVE_VORR : MVE_bit_ops<"vorr", 0b10, 0b0>;
+def MVE_VAND : MVE_bit_ops<"vand", 0b00, 0b0>;
+
+// add ignored suffixes as aliases
+
+foreach s=["s8", "s16", "s32", "u8", "u16", "u32", "i8", "i16", "i32", "f16", "f32"] in {
+  def : MVEInstAlias<"vbic${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
+        (MVE_VBIC MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
+  def : MVEInstAlias<"veor${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
+        (MVE_VEOR MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
+  def : MVEInstAlias<"vorn${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
+        (MVE_VORN MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
+  def : MVEInstAlias<"vorr${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
+        (MVE_VORR MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
+  def : MVEInstAlias<"vand${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
+        (MVE_VAND MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
+}
+
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+            (v16i8 (MVE_VAND (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+            (v8i16 (MVE_VAND (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+            (v4i32 (MVE_VAND (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+  def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
+            (v2i64 (MVE_VAND (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
+
+  def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+            (v16i8 (MVE_VORR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+            (v8i16 (MVE_VORR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+            (v4i32 (MVE_VORR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+  def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
+            (v2i64 (MVE_VORR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
+
+  def : Pat<(v16i8 (xor (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+            (v16i8 (MVE_VEOR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (xor (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+            (v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+            (v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+  def : Pat<(v2i64 (xor (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
+            (v2i64 (MVE_VEOR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
+
+  def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
+            (v16i8 (MVE_VBIC (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),
+            (v8i16 (MVE_VBIC (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
+            (v4i32 (MVE_VBIC (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+  def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (vnotq MQPR:$val2))),
+            (v2i64 (MVE_VBIC (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
+
+  def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
+            (v16i8 (MVE_VORN (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),
+            (v8i16 (MVE_VORN (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
+            (v4i32 (MVE_VORN (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+  def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (vnotq MQPR:$val2))),
+            (v2i64 (MVE_VORN (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
+}
+
+class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps>
+  : MVE_p<(outs MQPR:$Qd), inOps, NoItinerary,
+          iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> {
+  bits<8> imm;
+  bits<4> Qd;
+
+  let Inst{28} = imm{7};
+  let Inst{27-23} = 0b11111;
+  let Inst{22} = Qd{3};
+  let Inst{21-19} = 0b000;
+  let Inst{18-16} = imm{6-4};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12} = 0b0;
+  let Inst{11-8} = cmode;
+  let Inst{7-6} = 0b01;
+  let Inst{4} = 0b1;
+  let Inst{3-0} = imm{3-0};
+}
+
+class MVE_VORR<string suffix, bits<4> cmode, ExpandImm imm_type>
+  : MVE_bit_cmode<"vorr", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> {
+  let Inst{5} = 0b0;
+}
+
+def MVE_VORRIZ0v4i32  : MVE_VORR<"i32", 0b0001, expzero00>;
+def MVE_VORRIZ0v8i16  : MVE_VORR<"i16", 0b1001, expzero00>;
+def MVE_VORRIZ8v4i32  : MVE_VORR<"i32", 0b0011, expzero08>;
+def MVE_VORRIZ8v8i16  : MVE_VORR<"i16", 0b1011, expzero08>;
+def MVE_VORRIZ16v4i32 : MVE_VORR<"i32", 0b0101, expzero16>;
+def MVE_VORRIZ24v4i32 : MVE_VORR<"i32", 0b0111, expzero24>;
+
+def MVE_VORNIZ0v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
+    (ins MQPR:$Qd_src, expzero00inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
+def MVE_VORNIZ0v8i16 : MVEAsmPseudo<"vorn${vp}.i16\t$Qd, $imm",
+    (ins MQPR:$Qd_src, expzero00inv16:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
+def MVE_VORNIZ8v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
+    (ins MQPR:$Qd_src, expzero08inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
+def MVE_VORNIZ8v8i16 : MVEAsmPseudo<"vorn${vp}.i16\t$Qd, $imm",
+    (ins MQPR:$Qd_src, expzero08inv16:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
+def MVE_VORNIZ16v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
+    (ins MQPR:$Qd_src, expzero16inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
+def MVE_VORNIZ24v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
+    (ins MQPR:$Qd_src, expzero24inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
+
+def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm",
+    (MVE_VORR MQPR:$Qd, MQPR:$Qm, MQPR:$Qm, vpred_r:$vp)>;
+
+class MVE_VBIC<string suffix, bits<4> cmode, ExpandImm imm_type>
+  : MVE_bit_cmode<"vbic", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> {
+  let Inst{5} = 0b1;
+}
+
+def MVE_VBICIZ0v4i32  : MVE_VBIC<"i32", 0b0001, expzero00>;
+def MVE_VBICIZ0v8i16  : MVE_VBIC<"i16", 0b1001, expzero00>;
+def MVE_VBICIZ8v4i32  : MVE_VBIC<"i32", 0b0011, expzero08>;
+def MVE_VBICIZ8v8i16  : MVE_VBIC<"i16", 0b1011, expzero08>;
+def MVE_VBICIZ16v4i32 : MVE_VBIC<"i32", 0b0101, expzero16>;
+def MVE_VBICIZ24v4i32 : MVE_VBIC<"i32", 0b0111, expzero24>;
+
+def MVE_VANDIZ0v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
+    (ins MQPR:$Qda_src, expzero00inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+def MVE_VANDIZ0v8i16 : MVEAsmPseudo<"vand${vp}.i16\t$Qda, $imm",
+    (ins MQPR:$Qda_src, expzero00inv16:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+def MVE_VANDIZ8v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
+    (ins MQPR:$Qda_src, expzero08inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+def MVE_VANDIZ8v8i16 : MVEAsmPseudo<"vand${vp}.i16\t$Qda, $imm",
+    (ins MQPR:$Qda_src, expzero08inv16:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+def MVE_VANDIZ16v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
+    (ins MQPR:$Qda_src, expzero16inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+def MVE_VANDIZ24v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
+    (ins MQPR:$Qda_src, expzero24inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+
+class MVE_VMOV_lane_direction {
+  bit bit_20;
+  dag oops;
+  dag iops;
+  string ops;
+  string cstr;
+}
+def MVE_VMOV_from_lane : MVE_VMOV_lane_direction {
+  let bit_20 = 0b1;
+  let oops = (outs rGPR:$Rt);
+  let iops = (ins MQPR:$Qd);
+  let ops = "$Rt, $Qd$Idx";
+  let cstr = "";
+}
+def MVE_VMOV_to_lane : MVE_VMOV_lane_direction {
+  let bit_20 = 0b0;
+  let oops = (outs MQPR:$Qd);
+  let iops = (ins MQPR:$Qd_src, rGPR:$Rt);
+  let ops = "$Qd$Idx, $Rt";
+  let cstr = "$Qd = $Qd_src";
+}
+
+class MVE_VMOV_lane<string suffix, bit U, dag indexop,
+                    MVE_VMOV_lane_direction dir>
+  : MVE_VMOV_lane_base<dir.oops, !con(dir.iops, indexop), NoItinerary,
+                       "vmov", suffix, dir.ops, dir.cstr, []> {
+  bits<4> Qd;
+  bits<4> Rt;
+
+  let Inst{31-24} = 0b11101110;
+  let Inst{23} = U;
+  let Inst{20} = dir.bit_20;
+  let Inst{19-17} = Qd{2-0};
+  let Inst{15-12} = Rt{3-0};
+  let Inst{11-8} = 0b1011;
+  let Inst{7} = Qd{3};
+  let Inst{4-0} = 0b10000;
+}
+
+class MVE_VMOV_lane_32<MVE_VMOV_lane_direction dir>
+    : MVE_VMOV_lane<"32", 0b0, (ins MVEVectorIndex<4>:$Idx), dir> {
+  bits<2> Idx;
+  let Inst{22} = 0b0;
+  let Inst{6-5} = 0b00;
+  let Inst{16} = Idx{1};
+  let Inst{21} = Idx{0};
+
+  let Predicates = [HasFPRegsV8_1M];
+}
+
+class MVE_VMOV_lane_16<string suffix, bit U, MVE_VMOV_lane_direction dir>
+  : MVE_VMOV_lane<suffix, U, (ins MVEVectorIndex<8>:$Idx), dir> {
+  bits<3> Idx;
+  let Inst{22} = 0b0;
+  let Inst{5} = 0b1;
+  let Inst{16} = Idx{2};
+  let Inst{21} = Idx{1};
+  let Inst{6} = Idx{0};
+}
+
+class MVE_VMOV_lane_8<string suffix, bit U, MVE_VMOV_lane_direction dir>
+  : MVE_VMOV_lane<suffix, U, (ins MVEVectorIndex<16>:$Idx), dir> {
+  bits<4> Idx;
+  let Inst{22} = 0b1;
+  let Inst{16} = Idx{3};
+  let Inst{21} = Idx{2};
+  let Inst{6} = Idx{1};
+  let Inst{5} = Idx{0};
+}
+
+def MVE_VMOV_from_lane_32  : MVE_VMOV_lane_32<            MVE_VMOV_from_lane>;
+def MVE_VMOV_to_lane_32    : MVE_VMOV_lane_32<            MVE_VMOV_to_lane>;
+def MVE_VMOV_from_lane_s16 : MVE_VMOV_lane_16<"s16", 0b0, MVE_VMOV_from_lane>;
+def MVE_VMOV_from_lane_u16 : MVE_VMOV_lane_16<"u16", 0b1, MVE_VMOV_from_lane>;
+def MVE_VMOV_to_lane_16    : MVE_VMOV_lane_16< "16", 0b0, MVE_VMOV_to_lane>;
+def MVE_VMOV_from_lane_s8  : MVE_VMOV_lane_8 < "s8", 0b0, MVE_VMOV_from_lane>;
+def MVE_VMOV_from_lane_u8  : MVE_VMOV_lane_8 < "u8", 0b1, MVE_VMOV_from_lane>;
+def MVE_VMOV_to_lane_8     : MVE_VMOV_lane_8 <  "8", 0b0, MVE_VMOV_to_lane>;
+
+let Predicates = [HasMVEInt] in {
+  def : Pat<(extractelt (v2f64 MQPR:$src), imm:$lane),
+            (f64 (EXTRACT_SUBREG MQPR:$src, (DSubReg_f64_reg imm:$lane)))>;
+  def : Pat<(insertelt (v2f64 MQPR:$src1), DPR:$src2, imm:$lane),
+            (INSERT_SUBREG (v2f64 (COPY_TO_REGCLASS MQPR:$src1, MQPR)), DPR:$src2, (DSubReg_f64_reg imm:$lane))>;
+
+  def : Pat<(extractelt (v4i32 MQPR:$src), imm:$lane),
+            (COPY_TO_REGCLASS
+              (i32 (EXTRACT_SUBREG MQPR:$src, (SSubReg_f32_reg imm:$lane))), rGPR)>;
+  def : Pat<(insertelt (v4i32 MQPR:$src1), rGPR:$src2, imm:$lane),
+            (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$src2, imm:$lane)>;
+
+  def : Pat<(vector_insert (v16i8 MQPR:$src1), rGPR:$src2, imm:$lane),
+            (MVE_VMOV_to_lane_8  MQPR:$src1, rGPR:$src2, imm:$lane)>;
+  def : Pat<(vector_insert (v8i16 MQPR:$src1), rGPR:$src2, imm:$lane),
+            (MVE_VMOV_to_lane_16 MQPR:$src1, rGPR:$src2, imm:$lane)>;
+
+  def : Pat<(ARMvgetlanes (v16i8 MQPR:$src), imm:$lane),
+            (MVE_VMOV_from_lane_s8 MQPR:$src, imm:$lane)>;
+  def : Pat<(ARMvgetlanes (v8i16 MQPR:$src), imm:$lane),
+            (MVE_VMOV_from_lane_s16 MQPR:$src, imm:$lane)>;
+  def : Pat<(ARMvgetlaneu (v16i8 MQPR:$src), imm:$lane),
+            (MVE_VMOV_from_lane_u8 MQPR:$src, imm:$lane)>;
+  def : Pat<(ARMvgetlaneu (v8i16 MQPR:$src), imm:$lane),
+            (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane)>;
+
+  def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
+            (MVE_VMOV_to_lane_8  (v16i8 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
+  def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
+            (MVE_VMOV_to_lane_16 (v8i16 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
+  def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
+            (MVE_VMOV_to_lane_32 (v4i32 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
+
+  // Floating point patterns, still enabled under HasMVEInt
+  def : Pat<(extractelt (v4f32 MQPR:$src), imm:$lane),
+            (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG MQPR:$src, (SSubReg_f32_reg imm:$lane))), SPR)>;
+  def : Pat<(insertelt (v4f32 MQPR:$src1), (f32 SPR:$src2), imm:$lane),
+            (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS MQPR:$src1, MQPR)), SPR:$src2, (SSubReg_f32_reg imm:$lane))>;
+
+  def : Pat<(insertelt (v8f16 MQPR:$src1), HPR:$src2, imm:$lane),
+            (MVE_VMOV_to_lane_16 MQPR:$src1, (COPY_TO_REGCLASS HPR:$src2, rGPR), imm:$lane)>;
+  def : Pat<(extractelt (v8f16 MQPR:$src), imm:$lane),
+            (COPY_TO_REGCLASS (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane), HPR)>;
+
+  def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
+            (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
+  def : Pat<(v4f32 (scalar_to_vector GPR:$src)),
+            (MVE_VMOV_to_lane_32 (v4f32 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
+  def : Pat<(v8f16 (scalar_to_vector HPR:$src)),
+            (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>;
+  def : Pat<(v8f16 (scalar_to_vector GPR:$src)),
+            (MVE_VMOV_to_lane_16 (v8f16 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
+}
+
+// end of mve_bit instructions
+
+// start of MVE Integer instructions
+
+class MVE_int<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
+  : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary,
+          iname, suffix, "$Qd, $Qn, $Qm", vpred_r, "", pattern> {
+  bits<4> Qd;
+  bits<4> Qn;
+  bits<4> Qm;
+
+  let Inst{22} = Qd{3};
+  let Inst{21-20} = size;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{7} = Qn{3};
+  let Inst{6} = 0b1;
+  let Inst{5} = Qm{3};
+  let Inst{3-1} = Qm{2-0};
+}
+
+class MVE_VMULt1<string suffix, bits<2> size, list<dag> pattern=[]>
+  : MVE_int<"vmul", suffix, size, pattern> {
+
+  let Inst{28} = 0b0;
+  let Inst{25-23} = 0b110;
+  let Inst{16} = 0b0;
+  let Inst{12-8} = 0b01001;
+  let Inst{4} = 0b1;
+  let Inst{0} = 0b0;
+}
+
+def MVE_VMULt1i8  : MVE_VMULt1<"i8", 0b00>;
+def MVE_VMULt1i16 : MVE_VMULt1<"i16", 0b01>;
+def MVE_VMULt1i32 : MVE_VMULt1<"i32", 0b10>;
+
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (mul (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+            (v16i8 (MVE_VMULt1i8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (mul (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+            (v8i16 (MVE_VMULt1i16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (mul (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+            (v4i32 (MVE_VMULt1i32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+}
+
+class MVE_VQxDMULH<string iname, string suffix, bits<2> size, bit rounding,
+                  list<dag> pattern=[]>
+  : MVE_int<iname, suffix, size, pattern> {
+
+  let Inst{28} = rounding;
+  let Inst{25-23} = 0b110;
+  let Inst{16} = 0b0;
+  let Inst{12-8} = 0b01011;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
+}
+
+class MVE_VQDMULH<string suffix, bits<2> size, list<dag> pattern=[]>
+  : MVE_VQxDMULH<"vqdmulh", suffix, size, 0b0, pattern>;
+class MVE_VQRDMULH<string suffix, bits<2> size, list<dag> pattern=[]>
+  : MVE_VQxDMULH<"vqrdmulh", suffix, size, 0b1, pattern>;
+
+def MVE_VQDMULHi8   : MVE_VQDMULH<"s8",  0b00>;
+def MVE_VQDMULHi16  : MVE_VQDMULH<"s16", 0b01>;
+def MVE_VQDMULHi32  : MVE_VQDMULH<"s32", 0b10>;
+
+def MVE_VQRDMULHi8  : MVE_VQRDMULH<"s8",  0b00>;
+def MVE_VQRDMULHi16 : MVE_VQRDMULH<"s16", 0b01>;
+def MVE_VQRDMULHi32 : MVE_VQRDMULH<"s32", 0b10>;
+
+class MVE_VADDSUB<string iname, string suffix, bits<2> size, bit subtract,
+                    list<dag> pattern=[]>
+  : MVE_int<iname, suffix, size, pattern> {
+
+  let Inst{28} = subtract;
+  let Inst{25-23} = 0b110;
+  let Inst{16} = 0b0;
+  let Inst{12-8} = 0b01000;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
+}
+
+class MVE_VADD<string suffix, bits<2> size, list<dag> pattern=[]>
+  : MVE_VADDSUB<"vadd", suffix, size, 0b0, pattern>;
+class MVE_VSUB<string suffix, bits<2> size, list<dag> pattern=[]>
+  : MVE_VADDSUB<"vsub", suffix, size, 0b1, pattern>;
+
+def MVE_VADDi8  : MVE_VADD<"i8",  0b00>;
+def MVE_VADDi16 : MVE_VADD<"i16", 0b01>;
+def MVE_VADDi32 : MVE_VADD<"i32", 0b10>;
+
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (add (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+            (v16i8 (MVE_VADDi8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (add (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+            (v8i16 (MVE_VADDi16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (add (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+            (v4i32 (MVE_VADDi32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+}
+
+def MVE_VSUBi8  : MVE_VSUB<"i8",  0b00>;
+def MVE_VSUBi16 : MVE_VSUB<"i16", 0b01>;
+def MVE_VSUBi32 : MVE_VSUB<"i32", 0b10>;
+
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (sub (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+            (v16i8 (MVE_VSUBi8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (sub (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+            (v8i16 (MVE_VSUBi16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (sub (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+            (v4i32 (MVE_VSUBi32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+}
+
+class MVE_VQADDSUB<string iname, string suffix, bit U, bit subtract,
+                   bits<2> size, list<dag> pattern=[]>
+  : MVE_int<iname, suffix, size, pattern> {
+
+  let Inst{28} = U;
+  let Inst{25-23} = 0b110;
+  let Inst{16} = 0b0;
+  let Inst{12-10} = 0b000;
+  let Inst{9} = subtract;
+  let Inst{8} = 0b0;
+  let Inst{4} = 0b1;
+  let Inst{0} = 0b0;
+}
+
+class MVE_VQADD<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+  : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size, pattern>;
+class MVE_VQSUB<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+  : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size, pattern>;
+
+def MVE_VQADDs8  : MVE_VQADD<"s8",  0b0, 0b00>;
+def MVE_VQADDs16 : MVE_VQADD<"s16", 0b0, 0b01>;
+def MVE_VQADDs32 : MVE_VQADD<"s32", 0b0, 0b10>;
+def MVE_VQADDu8  : MVE_VQADD<"u8",  0b1, 0b00>;
+def MVE_VQADDu16 : MVE_VQADD<"u16", 0b1, 0b01>;
+def MVE_VQADDu32 : MVE_VQADD<"u32", 0b1, 0b10>;
+
+def MVE_VQSUBs8  : MVE_VQSUB<"s8",  0b0, 0b00>;
+def MVE_VQSUBs16 : MVE_VQSUB<"s16", 0b0, 0b01>;
+def MVE_VQSUBs32 : MVE_VQSUB<"s32", 0b0, 0b10>;
+def MVE_VQSUBu8  : MVE_VQSUB<"u8",  0b1, 0b00>;
+def MVE_VQSUBu16 : MVE_VQSUB<"u16", 0b1, 0b01>;
+def MVE_VQSUBu32 : MVE_VQSUB<"u32", 0b1, 0b10>;
+
+class MVE_VABD_int<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+  : MVE_int<"vabd", suffix, size, pattern> {
+
+  let Inst{28} = U;
+  let Inst{25-23} = 0b110;
+  let Inst{16} = 0b0;
+  let Inst{12-8} = 0b00111;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
+}
+
+def MVE_VABDs8  : MVE_VABD_int<"s8", 0b0, 0b00>;
+def MVE_VABDs16 : MVE_VABD_int<"s16", 0b0, 0b01>;
+def MVE_VABDs32 : MVE_VABD_int<"s32", 0b0, 0b10>;
+def MVE_VABDu8  : MVE_VABD_int<"u8", 0b1, 0b00>;
+def MVE_VABDu16 : MVE_VABD_int<"u16", 0b1, 0b01>;
+def MVE_VABDu32 : MVE_VABD_int<"u32", 0b1, 0b10>;
+
+class MVE_VRHADD<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+  : MVE_int<"vrhadd", suffix, size, pattern> {
+
+  let Inst{28} = U;
+  let Inst{25-23} = 0b110;
+  let Inst{16} = 0b0;
+  let Inst{12-8} = 0b00001;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
+}
+
+def MVE_VRHADDs8  : MVE_VRHADD<"s8", 0b0, 0b00>;
+def MVE_VRHADDs16 : MVE_VRHADD<"s16", 0b0, 0b01>;
+def MVE_VRHADDs32 : MVE_VRHADD<"s32", 0b0, 0b10>;
+def MVE_VRHADDu8  : MVE_VRHADD<"u8", 0b1, 0b00>;
+def MVE_VRHADDu16 : MVE_VRHADD<"u16", 0b1, 0b01>;
+def MVE_VRHADDu32 : MVE_VRHADD<"u32", 0b1, 0b10>;
+
+class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract,
+                   bits<2> size, list<dag> pattern=[]>
+  : MVE_int<iname, suffix, size, pattern> {
+
+  let Inst{28} = U;
+  let Inst{25-23} = 0b110;
+  let Inst{16} = 0b0;
+  let Inst{12-10} = 0b000;
+  let Inst{9} = subtract;
+  let Inst{8} = 0b0;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
+}
+
+class MVE_VHADD<string suffix, bit U, bits<2> size,
+              list<dag> pattern=[]>
+  : MVE_VHADDSUB<"vhadd", suffix, U, 0b0, size, pattern>;
+class MVE_VHSUB<string suffix, bit U, bits<2> size,
+              list<dag> pattern=[]>
+  : MVE_VHADDSUB<"vhsub", suffix, U, 0b1, size, pattern>;
+
+def MVE_VHADDs8  : MVE_VHADD<"s8",  0b0, 0b00>;
+def MVE_VHADDs16 : MVE_VHADD<"s16", 0b0, 0b01>;
+def MVE_VHADDs32 : MVE_VHADD<"s32", 0b0, 0b10>;
+def MVE_VHADDu8  : MVE_VHADD<"u8",  0b1, 0b00>;
+def MVE_VHADDu16 : MVE_VHADD<"u16", 0b1, 0b01>;
+def MVE_VHADDu32 : MVE_VHADD<"u32", 0b1, 0b10>;
+
+def MVE_VHSUBs8  : MVE_VHSUB<"s8",  0b0, 0b00>;
+def MVE_VHSUBs16 : MVE_VHSUB<"s16", 0b0, 0b01>;
+def MVE_VHSUBs32 : MVE_VHSUB<"s32", 0b0, 0b10>;
+def MVE_VHSUBu8  : MVE_VHSUB<"u8",  0b1, 0b00>;
+def MVE_VHSUBu16 : MVE_VHSUB<"u16", 0b1, 0b01>;
+def MVE_VHSUBu32 : MVE_VHSUB<"u32", 0b1, 0b10>;
+
+class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]>
+  : MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary,
+          "vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> {
+  bits<4> Qd;
+  bits<4> Rt;
+
+  let Inst{28} = 0b0;
+  let Inst{25-23} = 0b101;
+  let Inst{22} = B;
+  let Inst{21-20} = 0b10;
+  let Inst{19-17} = Qd{2-0};
+  let Inst{16} = 0b0;
+  let Inst{15-12} = Rt;
+  let Inst{11-8} = 0b1011;
+  let Inst{7} = Qd{3};
+  let Inst{6} = 0b0;
+  let Inst{5} = E;
+  let Inst{4-0} = 0b10000;
+}
+
+def MVE_VDUP32 : MVE_VDUP<"32", 0b0, 0b0>;
+def MVE_VDUP16 : MVE_VDUP<"16", 0b0, 0b1>;
+def MVE_VDUP8  : MVE_VDUP<"8",  0b1, 0b0>;
+
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (ARMvdup (i32 rGPR:$elem))),
+            (MVE_VDUP8  rGPR:$elem)>;
+  def : Pat<(v8i16 (ARMvdup (i32 rGPR:$elem))),
+            (MVE_VDUP16 rGPR:$elem)>;
+  def : Pat<(v4i32 (ARMvdup (i32 rGPR:$elem))),
+            (MVE_VDUP32 rGPR:$elem)>;
+
+  def : Pat<(v4i32 (ARMvduplane (v4i32 MQPR:$src), imm:$lane)),
+            (MVE_VDUP32 (MVE_VMOV_from_lane_32 MQPR:$src, imm:$lane))>;
+  // For the 16-bit and 8-bit vduplanes we don't care about the signedness
+  // of the lane move operation as we only want the lowest 8/16 bits anyway.
+  def : Pat<(v8i16 (ARMvduplane (v8i16 MQPR:$src), imm:$lane)),
+            (MVE_VDUP16 (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane))>;
+  def : Pat<(v16i8 (ARMvduplane (v16i8 MQPR:$src), imm:$lane)),
+            (MVE_VDUP8  (MVE_VMOV_from_lane_u8 MQPR:$src, imm:$lane))>;
+
+  def : Pat<(v4f32 (ARMvdup (f32 SPR:$elem))),
+            (v4f32 (MVE_VDUP32 (i32 (COPY_TO_REGCLASS (f32 SPR:$elem), rGPR))))>;
+  def : Pat<(v8f16 (ARMvdup (f16 HPR:$elem))),
+            (v8f16 (MVE_VDUP16 (i32 (COPY_TO_REGCLASS (f16 HPR:$elem), rGPR))))>;
+
+  def : Pat<(v4f32 (ARMvduplane (v4f32 MQPR:$src), imm:$lane)),
+            (MVE_VDUP32 (MVE_VMOV_from_lane_32 MQPR:$src, imm:$lane))>;
+  def : Pat<(v8f16 (ARMvduplane (v8f16 MQPR:$src), imm:$lane)),
+            (MVE_VDUP16 (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane))>;
+}
+
+
+class MVEIntSingleSrc<string iname, string suffix, bits<2> size,
+                         list<dag> pattern=[]>
+  : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm), NoItinerary,
+          iname, suffix, "$Qd, $Qm", vpred_r, "", pattern> {
+  bits<4> Qd;
+  bits<4> Qm;
+
+  let Inst{22} = Qd{3};
+  let Inst{19-18} = size{1-0};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{5} = Qm{3};
+  let Inst{3-1} = Qm{2-0};
+}
+
+class MVE_VCLSCLZ<string iname, string suffix, bits<2> size,
+                   bit count_zeroes, list<dag> pattern=[]>
+  : MVEIntSingleSrc<iname, suffix, size, pattern> {
+
+  let Inst{28} = 0b1;
+  let Inst{25-23} = 0b111;
+  let Inst{21-20} = 0b11;
+  let Inst{17-16} = 0b00;
+  let Inst{12-8} = 0b00100;
+  let Inst{7} = count_zeroes;
+  let Inst{6} = 0b1;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
+}
+
+def MVE_VCLSs8  : MVE_VCLSCLZ<"vcls", "s8",  0b00, 0b0>;
+def MVE_VCLSs16 : MVE_VCLSCLZ<"vcls", "s16", 0b01, 0b0>;
+def MVE_VCLSs32 : MVE_VCLSCLZ<"vcls", "s32", 0b10, 0b0>;
+
+def MVE_VCLZs8  : MVE_VCLSCLZ<"vclz", "i8",  0b00, 0b1>;
+def MVE_VCLZs16 : MVE_VCLSCLZ<"vclz", "i16", 0b01, 0b1>;
+def MVE_VCLZs32 : MVE_VCLSCLZ<"vclz", "i32", 0b10, 0b1>;
+
+class MVE_VABSNEG_int<string iname, string suffix, bits<2> size, bit negate,
+                      list<dag> pattern=[]>
+  : MVEIntSingleSrc<iname, suffix, size, pattern> {
+
+  let Inst{28} = 0b1;
+  let Inst{25-23} = 0b111;
+  let Inst{21-20} = 0b11;
+  let Inst{17-16} = 0b01;
+  let Inst{12-8} = 0b00011;
+  let Inst{7} = negate;
+  let Inst{6} = 0b1;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
+}
+
+def MVE_VABSs8  : MVE_VABSNEG_int<"vabs", "s8",  0b00, 0b0>;
+def MVE_VABSs16 : MVE_VABSNEG_int<"vabs", "s16", 0b01, 0b0>;
+def MVE_VABSs32 : MVE_VABSNEG_int<"vabs", "s32", 0b10, 0b0>;
+
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (abs (v16i8 MQPR:$v))),
+            (v16i8 (MVE_VABSs8 $v))>;
+  def : Pat<(v8i16 (abs (v8i16 MQPR:$v))),
+            (v8i16 (MVE_VABSs16 $v))>;
+  def : Pat<(v4i32 (abs (v4i32 MQPR:$v))),
+            (v4i32 (MVE_VABSs32 $v))>;
+}
+
+def MVE_VNEGs8  : MVE_VABSNEG_int<"vneg", "s8",  0b00, 0b1>;
+def MVE_VNEGs16 : MVE_VABSNEG_int<"vneg", "s16", 0b01, 0b1>;
+def MVE_VNEGs32 : MVE_VABSNEG_int<"vneg", "s32", 0b10, 0b1>;
+
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (vnegq (v16i8 MQPR:$v))),
+            (v16i8 (MVE_VNEGs8 $v))>;
+  def : Pat<(v8i16 (vnegq (v8i16 MQPR:$v))),
+            (v8i16 (MVE_VNEGs16 $v))>;
+  def : Pat<(v4i32 (vnegq (v4i32 MQPR:$v))),
+            (v4i32 (MVE_VNEGs32 $v))>;
+}
+
+class MVE_VQABSNEG<string iname, string suffix, bits<2> size,
+                   bit negate, list<dag> pattern=[]>
+  : MVEIntSingleSrc<iname, suffix, size, pattern> {
+
+  let Inst{28} = 0b1;
+  let Inst{25-23} = 0b111;
+  let Inst{21-20} = 0b11;
+  let Inst{17-16} = 0b00;
+  let Inst{12-8} = 0b00111;
+  let Inst{7} = negate;
+  let Inst{6} = 0b1;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
+}
+
+def MVE_VQABSs8  : MVE_VQABSNEG<"vqabs", "s8",  0b00, 0b0>;
+def MVE_VQABSs16 : MVE_VQABSNEG<"vqabs", "s16", 0b01, 0b0>;
+def MVE_VQABSs32 : MVE_VQABSNEG<"vqabs", "s32", 0b10, 0b0>;
+
+def MVE_VQNEGs8  : MVE_VQABSNEG<"vqneg", "s8",  0b00, 0b1>;
+def MVE_VQNEGs16 : MVE_VQABSNEG<"vqneg", "s16", 0b01, 0b1>;
+def MVE_VQNEGs32 : MVE_VQABSNEG<"vqneg", "s32", 0b10, 0b1>;
+
+class MVE_mod_imm<string iname, string suffix, bits<4> cmode, bit op,
+                  dag iops, list<dag> pattern=[]>
+  : MVE_p<(outs MQPR:$Qd), iops, NoItinerary, iname, suffix, "$Qd, $imm",
+          vpred_r, "", pattern> {
+  bits<13> imm;
+  bits<4> Qd;
+
+  let Inst{28} = imm{7};
+  let Inst{25-23} = 0b111;
+  let Inst{22} = Qd{3};
+  let Inst{21-19} = 0b000;
+  let Inst{18-16} = imm{6-4};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12} = 0b0;
+  let Inst{11-8} = cmode{3-0};
+  let Inst{7-6} = 0b01;
+  let Inst{5} = op;
+  let Inst{4} = 0b1;
+  let Inst{3-0} = imm{3-0};
+
+  let DecoderMethod = "DecodeMVEModImmInstruction";
+}
+
+let isReMaterializable = 1 in {
+let isAsCheapAsAMove = 1 in {
+def MVE_VMOVimmi8  : MVE_mod_imm<"vmov", "i8",  {1,1,1,0}, 0b0, (ins nImmSplatI8:$imm)>;
+def MVE_VMOVimmi16 : MVE_mod_imm<"vmov", "i16", {1,0,?,0}, 0b0, (ins nImmSplatI16:$imm)> {
+  let Inst{9} = imm{9};
+}
+def MVE_VMOVimmi32 : MVE_mod_imm<"vmov", "i32", {?,?,?,?}, 0b0, (ins nImmVMOVI32:$imm)> {
+  let Inst{11-8} = imm{11-8};
+}
+def MVE_VMOVimmi64 : MVE_mod_imm<"vmov", "i64", {1,1,1,0}, 0b1, (ins nImmSplatI64:$imm)>;
+def MVE_VMOVimmf32 : MVE_mod_imm<"vmov", "f32", {1,1,1,1}, 0b0, (ins nImmVMOVF32:$imm)>;
+} // let isAsCheapAsAMove = 1
+
+def MVE_VMVNimmi16 : MVE_mod_imm<"vmvn", "i16", {1,0,?,0}, 0b1, (ins nImmSplatI16:$imm)> {
+  let Inst{9} = imm{9};
+}
+def MVE_VMVNimmi32 : MVE_mod_imm<"vmvn", "i32", {?,?,?,?}, 0b1, (ins nImmVMOVI32:$imm)> {
+  let Inst{11-8} = imm{11-8};
+}
+} // let isReMaterializable = 1
+
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (ARMvmovImm timm:$simm)),
+            (v16i8 (MVE_VMOVimmi8  nImmSplatI8:$simm))>;
+  def : Pat<(v8i16 (ARMvmovImm timm:$simm)),
+            (v8i16 (MVE_VMOVimmi16 nImmSplatI16:$simm))>;
+  def : Pat<(v4i32 (ARMvmovImm timm:$simm)),
+            (v4i32 (MVE_VMOVimmi32 nImmVMOVI32:$simm))>;
+
+  def : Pat<(v8i16 (ARMvmvnImm timm:$simm)),
+            (v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm))>;
+  def : Pat<(v4i32 (ARMvmvnImm timm:$simm)),
+            (v4i32 (MVE_VMVNimmi32 nImmVMOVI32:$simm))>;
+
+  def : Pat<(v4f32 (ARMvmovFPImm timm:$simm)),
+            (v4f32 (MVE_VMOVimmf32 nImmVMOVF32:$simm))>;
+}
+
+class MVE_VMINMAXA<string iname, string suffix, bits<2> size,
+                   bit bit_12, list<dag> pattern=[]>
+  : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
+          NoItinerary, iname, suffix, "$Qd, $Qm", vpred_n, "$Qd = $Qd_src",
+          pattern> {
+  bits<4> Qd;
+  bits<4> Qm;
+
+  let Inst{28} = 0b0;
+  let Inst{25-23} = 0b100;
+  let Inst{22} = Qd{3};
+  let Inst{21-20} = 0b11;
+  let Inst{19-18} = size;
+  let Inst{17-16} = 0b11;
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12} = bit_12;
+  let Inst{11-6} = 0b111010;
+  let Inst{5} = Qm{3};
+  let Inst{4} = 0b0;
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = 0b1;
+}
+
+def MVE_VMAXAs8  : MVE_VMINMAXA<"vmaxa", "s8",  0b00, 0b0>;
+def MVE_VMAXAs16 : MVE_VMINMAXA<"vmaxa", "s16", 0b01, 0b0>;
+def MVE_VMAXAs32 : MVE_VMINMAXA<"vmaxa", "s32", 0b10, 0b0>;
+
+def MVE_VMINAs8  : MVE_VMINMAXA<"vmina", "s8",  0b00, 0b1>;
+def MVE_VMINAs16 : MVE_VMINMAXA<"vmina", "s16", 0b01, 0b1>;
+def MVE_VMINAs32 : MVE_VMINMAXA<"vmina", "s32", 0b10, 0b1>;
+
+// end of MVE Integer instructions
+
+// start of mve_imm_shift instructions
+
+def MVE_VSHLC : MVE_p<(outs rGPR:$RdmDest, MQPR:$Qd),
+                      (ins MQPR:$QdSrc, rGPR:$RdmSrc, long_shift:$imm),
+                      NoItinerary, "vshlc", "", "$QdSrc, $RdmSrc, $imm",
+                      vpred_n, "$RdmDest = $RdmSrc,$Qd = $QdSrc"> {
+  bits<5> imm;
+  bits<4> Qd;
+  bits<4> RdmDest;
+
+  let Inst{28} = 0b0;
+  let Inst{25-23} = 0b101;
+  let Inst{22} = Qd{3};
+  let Inst{21} = 0b1;
+  let Inst{20-16} = imm{4-0};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12-4} = 0b011111100;
+  let Inst{3-0} = RdmDest{3-0};
+}
+
+class MVE_shift_imm<dag oops, dag iops, string iname, string suffix,
+                    string ops, vpred_ops vpred, string cstr,
+                    list<dag> pattern=[]>
+  : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
+  bits<4> Qd;
+  bits<4> Qm;
+
+  let Inst{22} = Qd{3};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{5} = Qm{3};
+  let Inst{3-1} = Qm{2-0};
+}
+
+class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U,
+              list<dag> pattern=[]>
+  : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
+                  iname, suffix, "$Qd, $Qm", vpred_r, "",
+                  pattern> {
+  let Inst{28} = U;
+  let Inst{25-23} = 0b101;
+  let Inst{21} = 0b1;
+  let Inst{20-19} = sz{1-0};
+  let Inst{18-16} = 0b000;
+  let Inst{11-6} = 0b111101;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
+}
+
+multiclass MVE_VMOVL_shift_half<string iname, string suffix, bits<2> sz, bit U,
+                                list<dag> pattern=[]> {
+  def bh : MVE_VMOVL<!strconcat(iname, "b"), suffix, sz, U, pattern> {
+    let Inst{12} = 0b0;
+  }
+  def th : MVE_VMOVL<!strconcat(iname, "t"), suffix, sz, U, pattern> {
+    let Inst{12} = 0b1;
+  }
+}
+
+defm MVE_VMOVLs8 : MVE_VMOVL_shift_half<"vmovl", "s8", 0b01, 0b0>;
+defm MVE_VMOVLu8 : MVE_VMOVL_shift_half<"vmovl", "u8", 0b01, 0b1>;
+defm MVE_VMOVLs16 : MVE_VMOVL_shift_half<"vmovl", "s16", 0b10, 0b0>;
+defm MVE_VMOVLu16 : MVE_VMOVL_shift_half<"vmovl", "u16", 0b10, 0b1>;
+
+let Predicates = [HasMVEInt] in {
+  def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i16),
+            (MVE_VMOVLs16bh MQPR:$src)>;
+  def : Pat<(sext_inreg (v8i16 MQPR:$src), v8i8),
+            (MVE_VMOVLs8bh MQPR:$src)>;
+  def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i8),
+            (MVE_VMOVLs16bh (MVE_VMOVLs8bh MQPR:$src))>;
+
+  // zext_inreg 16 -> 32
+  def : Pat<(and (v4i32 MQPR:$src), (v4i32 (ARMvmovImm (i32 0xCFF)))),
+            (MVE_VMOVLu16bh MQPR:$src)>;
+  // zext_inreg 8 -> 16
+  def : Pat<(and (v8i16 MQPR:$src), (v8i16 (ARMvmovImm (i32 0x8FF)))),
+            (MVE_VMOVLu8bh MQPR:$src)>;
+}
+
+
+class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
+                    dag immops, list<dag> pattern=[]>
+  : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$Qm), immops),
+                  iname, suffix, "$Qd, $Qm, $imm", vpred_r, "", pattern> {
+  let Inst{28} = U;
+  let Inst{25-23} = 0b101;
+  let Inst{21} = 0b1;
+  let Inst{12} = th;
+  let Inst{11-6} = 0b111101;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
+}
+
+// The immediate VSHLL instructions accept shift counts from 1 up to
+// the lane width (8 or 16), but the full-width shifts have an
+// entirely separate encoding, given below with 'lw' in the name.
+
+class MVE_VSHLL_imm8<string iname, string suffix,
+                     bit U, bit th, list<dag> pattern=[]>
+  : MVE_VSHLL_imm<iname, suffix, U, th, (ins mve_shift_imm1_7:$imm), pattern> {
+  bits<3> imm;
+  let Inst{20-19} = 0b01;
+  let Inst{18-16} = imm;
+}
+
+class MVE_VSHLL_imm16<string iname, string suffix,
+                      bit U, bit th, list<dag> pattern=[]>
+  : MVE_VSHLL_imm<iname, suffix, U, th, (ins mve_shift_imm1_15:$imm), pattern> {
+  bits<4> imm;
+  let Inst{20} = 0b1;
+  let Inst{19-16} = imm;
+}
+
+def MVE_VSHLL_imms8bh  : MVE_VSHLL_imm8 <"vshllb", "s8", 0b0, 0b0>;
+def MVE_VSHLL_imms8th  : MVE_VSHLL_imm8 <"vshllt", "s8", 0b0, 0b1>;
+def MVE_VSHLL_immu8bh  : MVE_VSHLL_imm8 <"vshllb", "u8", 0b1, 0b0>;
+def MVE_VSHLL_immu8th  : MVE_VSHLL_imm8 <"vshllt", "u8", 0b1, 0b1>;
+def MVE_VSHLL_imms16bh : MVE_VSHLL_imm16<"vshllb", "s16", 0b0, 0b0>;
+def MVE_VSHLL_imms16th : MVE_VSHLL_imm16<"vshllt", "s16", 0b0, 0b1>;
+def MVE_VSHLL_immu16bh : MVE_VSHLL_imm16<"vshllb", "u16", 0b1, 0b0>;
+def MVE_VSHLL_immu16th : MVE_VSHLL_imm16<"vshllt", "u16", 0b1, 0b1>;
+
+class MVE_VSHLL_by_lane_width<string iname, string suffix, bits<2> size,
+                              bit U, string ops, list<dag> pattern=[]>
+  : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
+                  iname, suffix, ops, vpred_r, "", pattern> {
+  let Inst{28} = U;
+  let Inst{25-23} = 0b100;
+  let Inst{21-20} = 0b11;
+  let Inst{19-18} = size{1-0};
+  let Inst{17-16} = 0b01;
+  let Inst{11-6} = 0b111000;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b1;
+}
+
+multiclass MVE_VSHLL_lw<string iname, string suffix, bits<2> sz, bit U,
+                              string ops, list<dag> pattern=[]> {
+  def bh : MVE_VSHLL_by_lane_width<iname#"b", suffix, sz, U, ops, pattern> {
+    let Inst{12} = 0b0;
+  }
+  def th : MVE_VSHLL_by_lane_width<iname#"t", suffix, sz, U, ops, pattern> {
+    let Inst{12} = 0b1;
+  }
+}
+
+defm MVE_VSHLL_lws8  : MVE_VSHLL_lw<"vshll", "s8",  0b00, 0b0, "$Qd, $Qm, #8">;
+defm MVE_VSHLL_lws16 : MVE_VSHLL_lw<"vshll", "s16", 0b01, 0b0, "$Qd, $Qm, #16">;
+defm MVE_VSHLL_lwu8  : MVE_VSHLL_lw<"vshll", "u8",  0b00, 0b1, "$Qd, $Qm, #8">;
+defm MVE_VSHLL_lwu16 : MVE_VSHLL_lw<"vshll", "u16", 0b01, 0b1, "$Qd, $Qm, #16">;
+
+class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
+               dag immops, list<dag> pattern=[]>
+  : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
+                  iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
+                  pattern> {
+  bits<5> imm;
+
+  let Inst{28} = bit_28;
+  let Inst{25-23} = 0b101;
+  let Inst{21} = 0b0;
+  let Inst{20-16} = imm{4-0};
+  let Inst{12} = bit_12;
+  let Inst{11-6} = 0b111111;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b1;
+}
+
+def MVE_VRSHRNi16bh : MVE_VxSHRN<
+    "vrshrnb", "i16", 0b0, 0b1, (ins shr_imm8:$imm)> {
+  let Inst{20-19} = 0b01;
+}
+def MVE_VRSHRNi16th : MVE_VxSHRN<
+    "vrshrnt", "i16", 0b1, 0b1,(ins shr_imm8:$imm)> {
+  let Inst{20-19} = 0b01;
+}
+def MVE_VRSHRNi32bh : MVE_VxSHRN<
+    "vrshrnb", "i32", 0b0, 0b1, (ins shr_imm16:$imm)> {
+  let Inst{20} = 0b1;
+}
+def MVE_VRSHRNi32th : MVE_VxSHRN<
+    "vrshrnt", "i32", 0b1, 0b1, (ins shr_imm16:$imm)> {
+  let Inst{20} = 0b1;
+}
+
+def MVE_VSHRNi16bh : MVE_VxSHRN<
+    "vshrnb", "i16", 0b0, 0b0, (ins shr_imm8:$imm)> {
+  let Inst{20-19} = 0b01;
+}
+def MVE_VSHRNi16th : MVE_VxSHRN<
+    "vshrnt", "i16", 0b1, 0b0, (ins shr_imm8:$imm)> {
+  let Inst{20-19} = 0b01;
+}
+def MVE_VSHRNi32bh : MVE_VxSHRN<
+    "vshrnb", "i32", 0b0, 0b0, (ins shr_imm16:$imm)> {
+  let Inst{20} = 0b1;
+}
+def MVE_VSHRNi32th : MVE_VxSHRN<
+    "vshrnt", "i32", 0b1, 0b0, (ins shr_imm16:$imm)> {
+  let Inst{20} = 0b1;
+}
+
+class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12, dag immops,
+                 list<dag> pattern=[]>
+  : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
+                  iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
+                  pattern> {
+  bits<5> imm;
+
+  let Inst{28} = bit_28;
+  let Inst{25-23} = 0b101;
+  let Inst{21} = 0b0;
+  let Inst{20-16} = imm{4-0};
+  let Inst{12} = bit_12;
+  let Inst{11-6} = 0b111111;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
+}
+
+def MVE_VQRSHRUNs16bh : MVE_VxQRSHRUN<
+    "vqrshrunb", "s16", 0b1, 0b0, (ins shr_imm8:$imm)> {
+  let Inst{20-19} = 0b01;
+}
+def MVE_VQRSHRUNs16th : MVE_VxQRSHRUN<
+    "vqrshrunt", "s16", 0b1, 0b1, (ins shr_imm8:$imm)> {
+  let Inst{20-19} = 0b01;
+}
+def MVE_VQRSHRUNs32bh : MVE_VxQRSHRUN<
+    "vqrshrunb", "s32", 0b1, 0b0, (ins shr_imm16:$imm)> {
+  let Inst{20} = 0b1;
+}
+def MVE_VQRSHRUNs32th : MVE_VxQRSHRUN<
+    "vqrshrunt", "s32", 0b1, 0b1, (ins shr_imm16:$imm)> {
+  let Inst{20} = 0b1;
+}
+
+def MVE_VQSHRUNs16bh : MVE_VxQRSHRUN<
+    "vqshrunb", "s16", 0b0, 0b0, (ins shr_imm8:$imm)> {
+  let Inst{20-19} = 0b01;
+}
+def MVE_VQSHRUNs16th : MVE_VxQRSHRUN<
+    "vqshrunt", "s16", 0b0, 0b1, (ins shr_imm8:$imm)> {
+  let Inst{20-19} = 0b01;
+}
+def MVE_VQSHRUNs32bh : MVE_VxQRSHRUN<
+    "vqshrunb", "s32", 0b0, 0b0, (ins shr_imm16:$imm)> {
+  let Inst{20} = 0b1;
+}
+def MVE_VQSHRUNs32th : MVE_VxQRSHRUN<
+    "vqshrunt", "s32", 0b0, 0b1, (ins shr_imm16:$imm)> {
+  let Inst{20} = 0b1;
+}
+
+class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12,
+                   dag immops, list<dag> pattern=[]>
+  : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
+                  iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
+                  pattern> {
+  bits<5> imm;
+
+  let Inst{25-23} = 0b101;
+  let Inst{21} = 0b0;
+  let Inst{20-16} = imm{4-0};
+  let Inst{12} = bit_12;
+  let Inst{11-6} = 0b111101;
+  let Inst{4} = 0b0;
+  let Inst{0} = bit_0;
+}
+
+multiclass MVE_VxQRSHRN_types<string iname, bit bit_0, bit bit_12> {
+  def s16 : MVE_VxQRSHRN<iname, "s16", bit_0, bit_12, (ins shr_imm8:$imm)> {
+    let Inst{28} = 0b0;
+    let Inst{20-19} = 0b01;
+  }
+  def u16 : MVE_VxQRSHRN<iname, "u16", bit_0, bit_12, (ins shr_imm8:$imm)> {
+    let Inst{28} = 0b1;
+    let Inst{20-19} = 0b01;
+  }
+  def s32 : MVE_VxQRSHRN<iname, "s32", bit_0, bit_12, (ins shr_imm16:$imm)> {
+    let Inst{28} = 0b0;
+    let Inst{20} = 0b1;
+  }
+  def u32 : MVE_VxQRSHRN<iname, "u32", bit_0, bit_12, (ins shr_imm16:$imm)> {
+    let Inst{28} = 0b1;
+    let Inst{20} = 0b1;
+  }
+}
+
+defm MVE_VQRSHRNbh : MVE_VxQRSHRN_types<"vqrshrnb", 0b1, 0b0>;
+defm MVE_VQRSHRNth : MVE_VxQRSHRN_types<"vqrshrnt", 0b1, 0b1>;
+defm MVE_VQSHRNbh  : MVE_VxQRSHRN_types<"vqshrnb", 0b0, 0b0>;
+defm MVE_VQSHRNth  : MVE_VxQRSHRN_types<"vqshrnt", 0b0, 0b1>;
+
+// end of mve_imm_shift instructions
+
+// start of mve_shift instructions
+
+class MVE_shift_by_vec<string iname, string suffix, bit U,
+                       bits<2> size, bit bit_4, bit bit_8>
+  : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm, MQPR:$Qn), NoItinerary,
+           iname, suffix, "$Qd, $Qm, $Qn", vpred_r, "", []> {
+  // Shift instructions which take a vector of shift counts
+  bits<4> Qd;
+  bits<4> Qm;
+  bits<4> Qn;
+
+  let Inst{28} = U;
+  let Inst{25-24} = 0b11;
+  let Inst{23} = 0b0;
+  let Inst{22} = Qd{3};
+  let Inst{21-20} = size;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b0;
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12-9} = 0b0010;
+  let Inst{8} = bit_8;
+  let Inst{7} = Qn{3};
+  let Inst{6} = 0b1;
+  let Inst{5} = Qm{3};
+  let Inst{4} = bit_4;
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = 0b0;
+}
+
+multiclass mve_shift_by_vec_multi<string iname, bit bit_4, bit bit_8> {
+  def s8  : MVE_shift_by_vec<iname, "s8", 0b0, 0b00, bit_4, bit_8>;
+  def s16 : MVE_shift_by_vec<iname, "s16", 0b0, 0b01, bit_4, bit_8>;
+  def s32 : MVE_shift_by_vec<iname, "s32", 0b0, 0b10, bit_4, bit_8>;
+  def u8  : MVE_shift_by_vec<iname, "u8", 0b1, 0b00, bit_4, bit_8>;
+  def u16 : MVE_shift_by_vec<iname, "u16", 0b1, 0b01, bit_4, bit_8>;
+  def u32 : MVE_shift_by_vec<iname, "u32", 0b1, 0b10, bit_4, bit_8>;
+}
+
+defm MVE_VSHL_by_vec   : mve_shift_by_vec_multi<"vshl",   0b0, 0b0>;
+defm MVE_VQSHL_by_vec  : mve_shift_by_vec_multi<"vqshl",  0b1, 0b0>;
+defm MVE_VQRSHL_by_vec : mve_shift_by_vec_multi<"vqrshl", 0b1, 0b1>;
+defm MVE_VRSHL_by_vec  : mve_shift_by_vec_multi<"vrshl",  0b0, 0b1>;
+
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn))),
+            (v4i32 (MVE_VSHL_by_vecu32 (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)))>;
+  def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn))),
+            (v8i16 (MVE_VSHL_by_vecu16 (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)))>;
+  def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn))),
+            (v16i8 (MVE_VSHL_by_vecu8 (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)))>;
+
+  def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn))),
+            (v4i32 (MVE_VSHL_by_vecs32 (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)))>;
+  def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn))),
+            (v8i16 (MVE_VSHL_by_vecs16 (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)))>;
+  def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn))),
+            (v16i8 (MVE_VSHL_by_vecs8 (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)))>;
+}
+
+class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops,
+                         string ops, vpred_ops vpred, string cstr,
+                         list<dag> pattern=[]>
+  : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
+  bits<4> Qd;
+  bits<4> Qm;
+
+  let Inst{23} = 0b1;
+  let Inst{22} = Qd{3};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12-11} = 0b00;
+  let Inst{7-6} = 0b01;
+  let Inst{5} = Qm{3};
+  let Inst{4} = 0b1;
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = 0b0;
+}
+
+class MVE_VSxI_imm<string iname, string suffix, bit bit_8, dag imm>
+  : MVE_shift_with_imm<iname, suffix, (outs MQPR:$Qd),
+                       !con((ins MQPR:$Qd_src, MQPR:$Qm), imm),
+                       "$Qd, $Qm, $imm", vpred_n, "$Qd = $Qd_src"> {
+  bits<6> imm;
+  let Inst{28} = 0b1;
+  let Inst{25-24} = 0b11;
+  let Inst{21-16} = imm;
+  let Inst{10-9} = 0b10;
+  let Inst{8} = bit_8;
+}
+
+def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, (ins shr_imm8:$imm)> {
+  let Inst{21-19} = 0b001;
+}
+
+def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, (ins shr_imm16:$imm)> {
+  let Inst{21-20} = 0b01;
+}
+
+def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, (ins shr_imm32:$imm)> {
+  let Inst{21} = 0b1;
+}
+
+def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, (ins imm0_7:$imm)> {
+  let Inst{21-19} = 0b001;
+}
+
+def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, (ins imm0_15:$imm)> {
+  let Inst{21-20} = 0b01;
+}
+
+def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,(ins imm0_31:$imm)> {
+  let Inst{21} = 0b1;
+}
+
+class MVE_VQSHL_imm<string suffix, dag imm>
+  : MVE_shift_with_imm<"vqshl", suffix, (outs MQPR:$Qd),
+                       !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+                       vpred_r, ""> {
+  bits<6> imm;
+
+  let Inst{25-24} = 0b11;
+  let Inst{21-16} = imm;
+  let Inst{10-8} = 0b111;
+}
+
+def MVE_VSLIimms8 : MVE_VQSHL_imm<"s8", (ins imm0_7:$imm)> {
+  let Inst{28} = 0b0;
+  let Inst{21-19} = 0b001;
+}
+
+def MVE_VSLIimmu8 : MVE_VQSHL_imm<"u8", (ins imm0_7:$imm)> {
+  let Inst{28} = 0b1;
+  let Inst{21-19} = 0b001;
+}
+
+def MVE_VSLIimms16 : MVE_VQSHL_imm<"s16", (ins imm0_15:$imm)> {
+  let Inst{28} = 0b0;
+  let Inst{21-20} = 0b01;
+}
+
+def MVE_VSLIimmu16 : MVE_VQSHL_imm<"u16", (ins imm0_15:$imm)> {
+  let Inst{28} = 0b1;
+  let Inst{21-20} = 0b01;
+}
+
+def MVE_VSLIimms32 : MVE_VQSHL_imm<"s32", (ins imm0_31:$imm)> {
+  let Inst{28} = 0b0;
+  let Inst{21} = 0b1;
+}
+
+def MVE_VSLIimmu32 : MVE_VQSHL_imm<"u32", (ins imm0_31:$imm)> {
+  let Inst{28} = 0b1;
+  let Inst{21} = 0b1;
+}
+
+class MVE_VQSHLU_imm<string suffix, dag imm>
+  : MVE_shift_with_imm<"vqshlu", suffix, (outs MQPR:$Qd),
+                       !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+                       vpred_r, ""> {
+  bits<6> imm;
+
+  let Inst{28} = 0b1;
+  let Inst{25-24} = 0b11;
+  let Inst{21-16} = imm;
+  let Inst{10-8} = 0b110;
+}
+
+def MVE_VQSHLU_imms8 : MVE_VQSHLU_imm<"s8", (ins imm0_7:$imm)> {
+  let Inst{21-19} = 0b001;
+}
+
+def MVE_VQSHLU_imms16 : MVE_VQSHLU_imm<"s16", (ins imm0_15:$imm)> {
+  let Inst{21-20} = 0b01;
+}
+
+def MVE_VQSHLU_imms32 : MVE_VQSHLU_imm<"s32", (ins imm0_31:$imm)> {
+  let Inst{21} = 0b1;
+}
+
+class MVE_VRSHR_imm<string suffix, dag imm>
+  : MVE_shift_with_imm<"vrshr", suffix, (outs MQPR:$Qd),
+                       !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+                       vpred_r, ""> {
+  bits<6> imm;
+
+  let Inst{25-24} = 0b11;
+  let Inst{21-16} = imm;
+  let Inst{10-8} = 0b010;
+}
+
+def MVE_VRSHR_imms8 : MVE_VRSHR_imm<"s8", (ins shr_imm8:$imm)> {
+  let Inst{28} = 0b0;
+  let Inst{21-19} = 0b001;
+}
+
+def MVE_VRSHR_immu8 : MVE_VRSHR_imm<"u8", (ins shr_imm8:$imm)> {
+  let Inst{28} = 0b1;
+  let Inst{21-19} = 0b001;
+}
+
+def MVE_VRSHR_imms16 : MVE_VRSHR_imm<"s16", (ins shr_imm16:$imm)> {
+  let Inst{28} = 0b0;
+  let Inst{21-20} = 0b01;
+}
+
+def MVE_VRSHR_immu16 : MVE_VRSHR_imm<"u16", (ins shr_imm16:$imm)> {
+  let Inst{28} = 0b1;
+  let Inst{21-20} = 0b01;
+}
+
+def MVE_VRSHR_imms32 : MVE_VRSHR_imm<"s32", (ins shr_imm32:$imm)> {
+  let Inst{28} = 0b0;
+  let Inst{21} = 0b1;
+}
+
+def MVE_VRSHR_immu32 : MVE_VRSHR_imm<"u32", (ins shr_imm32:$imm)> {
+  let Inst{28} = 0b1;
+  let Inst{21} = 0b1;
+}
+
+class MVE_VSHR_imm<string suffix, dag imm>
+  : MVE_shift_with_imm<"vshr", suffix, (outs MQPR:$Qd),
+                       !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+                       vpred_r, ""> {
+  bits<6> imm;
+
+  let Inst{25-24} = 0b11;
+  let Inst{21-16} = imm;
+  let Inst{10-8} = 0b000;
+}
+
+def MVE_VSHR_imms8 : MVE_VSHR_imm<"s8", (ins shr_imm8:$imm)> {
+  let Inst{28} = 0b0;
+  let Inst{21-19} = 0b001;
+}
+
+def MVE_VSHR_immu8 : MVE_VSHR_imm<"u8", (ins shr_imm8:$imm)> {
+  let Inst{28} = 0b1;
+  let Inst{21-19} = 0b001;
+}
+
+def MVE_VSHR_imms16 : MVE_VSHR_imm<"s16", (ins shr_imm16:$imm)> {
+  let Inst{28} = 0b0;
+  let Inst{21-20} = 0b01;
+}
+
+def MVE_VSHR_immu16 : MVE_VSHR_imm<"u16", (ins shr_imm16:$imm)> {
+  let Inst{28} = 0b1;
+  let Inst{21-20} = 0b01;
+}
+
+def MVE_VSHR_imms32 : MVE_VSHR_imm<"s32", (ins shr_imm32:$imm)> {
+  let Inst{28} = 0b0;
+  let Inst{21} = 0b1;
+}
+
+def MVE_VSHR_immu32 : MVE_VSHR_imm<"u32", (ins shr_imm32:$imm)> {
+  let Inst{28} = 0b1;
+  let Inst{21} = 0b1;
+}
+
+class MVE_VSHL_imm<string suffix, dag imm>
+  : MVE_shift_with_imm<"vshl", suffix, (outs MQPR:$Qd),
+                       !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+                       vpred_r, ""> {
+  bits<6> imm;
+
+  let Inst{28} = 0b0;
+  let Inst{25-24} = 0b11;
+  let Inst{21-16} = imm;
+  let Inst{10-8} = 0b101;
+}
+
+def MVE_VSHL_immi8 : MVE_VSHL_imm<"i8", (ins imm0_7:$imm)> {
+  let Inst{21-19} = 0b001;
+}
+
+def MVE_VSHL_immi16 : MVE_VSHL_imm<"i16", (ins imm0_15:$imm)> {
+  let Inst{21-20} = 0b01;
+}
+
+def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> {
+  let Inst{21} = 0b1;
+}
+
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v4i32 (ARMvshlImm (v4i32 MQPR:$src), imm0_31:$imm)),
+            (v4i32 (MVE_VSHL_immi32 (v4i32 MQPR:$src), imm0_31:$imm))>;
+  def : Pat<(v8i16 (ARMvshlImm (v8i16 MQPR:$src), imm0_15:$imm)),
+            (v8i16 (MVE_VSHL_immi16 (v8i16 MQPR:$src), imm0_15:$imm))>;
+  def : Pat<(v16i8 (ARMvshlImm (v16i8 MQPR:$src), imm0_7:$imm)),
+            (v16i8 (MVE_VSHL_immi8 (v16i8 MQPR:$src), imm0_7:$imm))>;
+
+  def : Pat<(v4i32 (ARMvshruImm (v4i32 MQPR:$src), imm0_31:$imm)),
+            (v4i32 (MVE_VSHR_immu32 (v4i32 MQPR:$src), imm0_31:$imm))>;
+  def : Pat<(v8i16 (ARMvshruImm (v8i16 MQPR:$src), imm0_15:$imm)),
+            (v8i16 (MVE_VSHR_immu16 (v8i16 MQPR:$src), imm0_15:$imm))>;
+  def : Pat<(v16i8 (ARMvshruImm (v16i8 MQPR:$src), imm0_7:$imm)),
+            (v16i8 (MVE_VSHR_immu8 (v16i8 MQPR:$src), imm0_7:$imm))>;
+
+  def : Pat<(v4i32 (ARMvshrsImm (v4i32 MQPR:$src), imm0_31:$imm)),
+            (v4i32 (MVE_VSHR_imms32 (v4i32 MQPR:$src), imm0_31:$imm))>;
+  def : Pat<(v8i16 (ARMvshrsImm (v8i16 MQPR:$src), imm0_15:$imm)),
+            (v8i16 (MVE_VSHR_imms16 (v8i16 MQPR:$src), imm0_15:$imm))>;
+  def : Pat<(v16i8 (ARMvshrsImm (v16i8 MQPR:$src), imm0_7:$imm)),
+            (v16i8 (MVE_VSHR_imms8 (v16i8 MQPR:$src), imm0_7:$imm))>;
+}
+
+// end of mve_shift instructions
+
+// start of MVE Floating Point instructions
+
+class MVE_float<string iname, string suffix, dag oops, dag iops, string ops,
+                vpred_ops vpred, string cstr, list<dag> pattern=[]>
+  : MVE_f<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
+  bits<4> Qm;
+
+  let Inst{12} = 0b0;
+  let Inst{6} = 0b1;
+  let Inst{5} = Qm{3};
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = 0b0;
+}
+
+class MVE_VRINT<string rmode, bits<3> op, string suffix, bits<2> size,
+                list<dag> pattern=[]>
+  : MVE_float<!strconcat("vrint", rmode), suffix, (outs MQPR:$Qd),
+              (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
+  bits<4> Qd;
+
+  let Inst{28} = 0b1;
+  let Inst{25-23} = 0b111;
+  let Inst{22} = Qd{3};
+  let Inst{21-20} = 0b11;
+  let Inst{19-18} = size;
+  let Inst{17-16} = 0b10;
+  let Inst{15-13} = Qd{2-0};
+  let Inst{11-10} = 0b01;
+  let Inst{9-7} = op{2-0};
+  let Inst{4} = 0b0;
+
+}
+
+multiclass MVE_VRINT_ops<string suffix, bits<2> size, list<dag> pattern=[]> {
+  def N : MVE_VRINT<"n", 0b000, suffix, size, pattern>;
+  def X : MVE_VRINT<"x", 0b001, suffix, size, pattern>;
+  def A : MVE_VRINT<"a", 0b010, suffix, size, pattern>;
+  def Z : MVE_VRINT<"z", 0b011, suffix, size, pattern>;
+  def M : MVE_VRINT<"m", 0b101, suffix, size, pattern>;
+  def P : MVE_VRINT<"p", 0b111, suffix, size, pattern>;
+}
+
+defm MVE_VRINTf16 : MVE_VRINT_ops<"f16", 0b01>;
+defm MVE_VRINTf32 : MVE_VRINT_ops<"f32", 0b10>;
+
+let Predicates = [HasMVEFloat] in {
+  def : Pat<(v4f32 (frint (v4f32 MQPR:$val1))),
+            (v4f32 (MVE_VRINTf32X (v4f32 MQPR:$val1)))>;
+  def : Pat<(v8f16 (frint (v8f16 MQPR:$val1))),
+            (v8f16 (MVE_VRINTf16X (v8f16 MQPR:$val1)))>;
+  def : Pat<(v4f32 (fround (v4f32 MQPR:$val1))),
+            (v4f32 (MVE_VRINTf32A (v4f32 MQPR:$val1)))>;
+  def : Pat<(v8f16 (fround (v8f16 MQPR:$val1))),
+            (v8f16 (MVE_VRINTf16A (v8f16 MQPR:$val1)))>;
+  def : Pat<(v4f32 (ftrunc (v4f32 MQPR:$val1))),
+            (v4f32 (MVE_VRINTf32Z (v4f32 MQPR:$val1)))>;
+  def : Pat<(v8f16 (ftrunc (v8f16 MQPR:$val1))),
+            (v8f16 (MVE_VRINTf16Z (v8f16 MQPR:$val1)))>;
+  def : Pat<(v4f32 (ffloor (v4f32 MQPR:$val1))),
+            (v4f32 (MVE_VRINTf32M (v4f32 MQPR:$val1)))>;
+  def : Pat<(v8f16 (ffloor (v8f16 MQPR:$val1))),
+            (v8f16 (MVE_VRINTf16M (v8f16 MQPR:$val1)))>;
+  def : Pat<(v4f32 (fceil (v4f32 MQPR:$val1))),
+            (v4f32 (MVE_VRINTf32P (v4f32 MQPR:$val1)))>;
+  def : Pat<(v8f16 (fceil (v8f16 MQPR:$val1))),
+            (v8f16 (MVE_VRINTf16P (v8f16 MQPR:$val1)))>;
+}
+
+class MVEFloatArithNeon<string iname, string suffix, bit size,
+                           dag oops, dag iops, string ops,
+                           vpred_ops vpred, string cstr, list<dag> pattern=[]>
+  : MVE_float<iname, suffix, oops, iops, ops, vpred, cstr, pattern> {
+  let Inst{20} = size;
+  let Inst{16} = 0b0;
+}
+
+class MVE_VMUL_fp<string suffix, bit size, list<dag> pattern=[]>
+  : MVEFloatArithNeon<"vmul", suffix, size, (outs MQPR:$Qd),
+                      (ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm", vpred_r, "",
+                      pattern> {
+  bits<4> Qd;
+  bits<4> Qn;
+
+  let Inst{28} = 0b1;
+  let Inst{25-23} = 0b110;
+  let Inst{22} = Qd{3};
+  let Inst{21} = 0b0;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12-8} = 0b01101;
+  let Inst{7} = Qn{3};
+  let Inst{4} = 0b1;
+}
+
+def MVE_VMULf32 : MVE_VMUL_fp<"f32", 0b0>;
+def MVE_VMULf16 : MVE_VMUL_fp<"f16", 0b1>;
+
+let Predicates = [HasMVEFloat] in {
+  def : Pat<(v4f32 (fmul (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
+            (v4f32 (MVE_VMULf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
+  def : Pat<(v8f16 (fmul (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
+            (v8f16 (MVE_VMULf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+}
+
+class MVE_VCMLA<string suffix, bit size, list<dag> pattern=[]>
+  : MVEFloatArithNeon<"vcmla", suffix, size, (outs MQPR:$Qd),
+                         (ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
+                         "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", pattern> {
+  bits<4> Qd;
+  bits<4> Qn;
+  bits<2> rot;
+
+  let Inst{28} = 0b1;
+  let Inst{25} = 0b0;
+  let Inst{24-23} = rot;
+  let Inst{22} = Qd{3};
+  let Inst{21} = 0b1;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12-8} = 0b01000;
+  let Inst{7} = Qn{3};
+  let Inst{4} = 0b0;
+}
+
+def MVE_VCMLAf16 : MVE_VCMLA<"f16", 0b0>;
+def MVE_VCMLAf32 : MVE_VCMLA<"f32", 0b1>;
+
+class MVE_VADDSUBFMA_fp<string iname, string suffix, bit size, bit bit_4,
+                        bit bit_8, bit bit_21, dag iops=(ins),
+                        vpred_ops vpred=vpred_r, string cstr="",
+                        list<dag> pattern=[]>
+  : MVEFloatArithNeon<iname, suffix, size, (outs MQPR:$Qd),
+                      !con(iops, (ins MQPR:$Qn, MQPR:$Qm)), "$Qd, $Qn, $Qm",
+                      vpred, cstr, pattern> {
+  bits<4> Qd;
+  bits<4> Qn;
+
+  let Inst{28} = 0b0;
+  let Inst{25-23} = 0b110;
+  let Inst{22} = Qd{3};
+  let Inst{21} = bit_21;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{11-9} = 0b110;
+  let Inst{8} = bit_8;
+  let Inst{7} = Qn{3};
+  let Inst{4} = bit_4;
+}
+
+def MVE_VFMAf32 : MVE_VADDSUBFMA_fp<"vfma", "f32", 0b0, 0b1, 0b0, 0b0,
+    (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
+def MVE_VFMAf16 : MVE_VADDSUBFMA_fp<"vfma", "f16", 0b1, 0b1, 0b0, 0b0,
+    (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
+
+def MVE_VFMSf32 : MVE_VADDSUBFMA_fp<"vfms", "f32", 0b0, 0b1, 0b0, 0b1,
+    (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
+def MVE_VFMSf16 : MVE_VADDSUBFMA_fp<"vfms", "f16", 0b1, 0b1, 0b0, 0b1,
+    (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
+
+def MVE_VADDf32 : MVE_VADDSUBFMA_fp<"vadd", "f32", 0b0, 0b0, 0b1, 0b0>;
+def MVE_VADDf16 : MVE_VADDSUBFMA_fp<"vadd", "f16", 0b1, 0b0, 0b1, 0b0>;
+
+let Predicates = [HasMVEFloat] in {
+  def : Pat<(v4f32 (fadd (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
+            (v4f32 (MVE_VADDf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
+  def : Pat<(v8f16 (fadd (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
+            (v8f16 (MVE_VADDf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+}
+
+def MVE_VSUBf32 : MVE_VADDSUBFMA_fp<"vsub", "f32", 0b0, 0b0, 0b1, 0b1>;
+def MVE_VSUBf16 : MVE_VADDSUBFMA_fp<"vsub", "f16", 0b1, 0b0, 0b1, 0b1>;
+
+let Predicates = [HasMVEFloat] in {
+  def : Pat<(v4f32 (fsub (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
+            (v4f32 (MVE_VSUBf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
+  def : Pat<(v8f16 (fsub (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
+            (v8f16 (MVE_VSUBf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+}
+
+class MVE_VCADD<string suffix, bit size, list<dag> pattern=[]>
+  : MVEFloatArithNeon<"vcadd", suffix, size, (outs MQPR:$Qd),
+                         (ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot),
+                         "$Qd, $Qn, $Qm, $rot", vpred_r, "", pattern> {
+  bits<4> Qd;
+  bits<4> Qn;
+  bit rot;
+
+  let Inst{28} = 0b1;
+  let Inst{25} = 0b0;
+  let Inst{24} = rot;
+  let Inst{23} = 0b1;
+  let Inst{22} = Qd{3};
+  let Inst{21} = 0b0;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12-8} = 0b01000;
+  let Inst{7} = Qn{3};
+  let Inst{4} = 0b0;
+}
+
+def MVE_VCADDf16 : MVE_VCADD<"f16", 0b0>;
+def MVE_VCADDf32 : MVE_VCADD<"f32", 0b1>;
+
+class MVE_VABD_fp<string suffix, bit size>
+  : MVE_float<"vabd", suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
+              "$Qd, $Qn, $Qm", vpred_r, ""> {
+  bits<4> Qd;
+  bits<4> Qn;
+
+  let Inst{28} = 0b1;
+  let Inst{25-23} = 0b110;
+  let Inst{22} = Qd{3};
+  let Inst{21} = 0b1;
+  let Inst{20} = size;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b0;
+  let Inst{15-13} = Qd{2-0};
+  let Inst{11-8} = 0b1101;
+  let Inst{7} = Qn{3};
+  let Inst{4} = 0b0;
+}
+
+def MVE_VABDf32 : MVE_VABD_fp<"f32", 0b0>;
+def MVE_VABDf16 : MVE_VABD_fp<"f16", 0b1>;
+
+class MVE_VCVT_fix<string suffix, bit fsi, bit U, bit op,
+                   Operand imm_operand_type, list<dag> pattern=[]>
+  : MVE_float<"vcvt", suffix,
+              (outs MQPR:$Qd), (ins MQPR:$Qm, imm_operand_type:$imm6),
+              "$Qd, $Qm, $imm6", vpred_r, "", pattern> {
+  bits<4> Qd;
+  bits<6> imm6;
+
+  let Inst{28} = U;
+  let Inst{25-23} = 0b111;
+  let Inst{22} = Qd{3};
+  let Inst{21} = 0b1;
+  let Inst{19-16} = imm6{3-0};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{11-10} = 0b11;
+  let Inst{9} = fsi;
+  let Inst{8} = op;
+  let Inst{7} = 0b0;
+  let Inst{4} = 0b1;
+
+  let DecoderMethod = "DecodeMVEVCVTt1fp";
+}
+
+class MVE_VCVT_imm_asmop<int Bits> : AsmOperandClass {
+  let PredicateMethod = "isImmediate<1," # Bits # ">";
+  let DiagnosticString =
+      "MVE fixed-point immediate operand must be between 1 and " # Bits;
+  let Name = "MVEVcvtImm" # Bits;
+  let RenderMethod = "addImmOperands";
+}
+class MVE_VCVT_imm<int Bits>: Operand<i32> {
+  let ParserMatchClass = MVE_VCVT_imm_asmop<Bits>;
+  let EncoderMethod = "getNEONVcvtImm32OpValue";
+  let DecoderMethod = "DecodeVCVTImmOperand";
+}
+
+class MVE_VCVT_fix_f32<string suffix, bit U, bit op>
+    : MVE_VCVT_fix<suffix, 0b1, U, op, MVE_VCVT_imm<32>> {
+  let Inst{20} = imm6{4};
+}
+class MVE_VCVT_fix_f16<string suffix, bit U, bit op>
+    : MVE_VCVT_fix<suffix, 0b0, U, op, MVE_VCVT_imm<16>> {
+  let Inst{20} = 0b1;
+}
+
+def MVE_VCVTf16s16_fix : MVE_VCVT_fix_f16<"f16.s16", 0b0, 0b0>;
+def MVE_VCVTs16f16_fix : MVE_VCVT_fix_f16<"s16.f16", 0b0, 0b1>;
+def MVE_VCVTf16u16_fix : MVE_VCVT_fix_f16<"f16.u16", 0b1, 0b0>;
+def MVE_VCVTu16f16_fix : MVE_VCVT_fix_f16<"u16.f16", 0b1, 0b1>;
+def MVE_VCVTf32s32_fix : MVE_VCVT_fix_f32<"f32.s32", 0b0, 0b0>;
+def MVE_VCVTs32f32_fix : MVE_VCVT_fix_f32<"s32.f32", 0b0, 0b1>;
+def MVE_VCVTf32u32_fix : MVE_VCVT_fix_f32<"f32.u32", 0b1, 0b0>;
+def MVE_VCVTu32f32_fix : MVE_VCVT_fix_f32<"u32.f32", 0b1, 0b1>;
+
+class MVE_VCVT_fp_int_anpm<string suffix, bits<2> size, bit op, string anpm,
+                bits<2> rm, list<dag> pattern=[]>
+  : MVE_float<!strconcat("vcvt", anpm), suffix, (outs MQPR:$Qd),
+              (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
+  bits<4> Qd;
+
+  let Inst{28} = 0b1;
+  let Inst{25-23} = 0b111;
+  let Inst{22} = Qd{3};
+  let Inst{21-20} = 0b11;
+  let Inst{19-18} = size;
+  let Inst{17-16} = 0b11;
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12-10} = 0b000;
+  let Inst{9-8} = rm;
+  let Inst{7} = op;
+  let Inst{4} = 0b0;
+}
+
+multiclass MVE_VCVT_fp_int_anpm_multi<string suffix, bits<2> size, bit op,
+                                list<dag> pattern=[]> {
+  def a : MVE_VCVT_fp_int_anpm<suffix, size, op, "a", 0b00>;
+  def n : MVE_VCVT_fp_int_anpm<suffix, size, op, "n", 0b01>;
+  def p : MVE_VCVT_fp_int_anpm<suffix, size, op, "p", 0b10>;
+  def m : MVE_VCVT_fp_int_anpm<suffix, size, op, "m", 0b11>;
+}
+
+// This defines instructions such as MVE_VCVTu16f16a, with an explicit
+// rounding-mode suffix on the mnemonic. The class below will define
+// the bare MVE_VCVTu16f16 (with implied rounding toward zero).
+defm MVE_VCVTs16f16 : MVE_VCVT_fp_int_anpm_multi<"s16.f16", 0b01, 0b0>;
+defm MVE_VCVTu16f16 : MVE_VCVT_fp_int_anpm_multi<"u16.f16", 0b01, 0b1>;
+defm MVE_VCVTs32f32 : MVE_VCVT_fp_int_anpm_multi<"s32.f32", 0b10, 0b0>;
+defm MVE_VCVTu32f32 : MVE_VCVT_fp_int_anpm_multi<"u32.f32", 0b10, 0b1>;
+
+class MVE_VCVT_fp_int<string suffix, bits<2> size, bits<2> op,
+                      list<dag> pattern=[]>
+  : MVE_float<"vcvt", suffix, (outs MQPR:$Qd),
+              (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
+  bits<4> Qd;
+
+  let Inst{28} = 0b1;
+  let Inst{25-23} = 0b111;
+  let Inst{22} = Qd{3};
+  let Inst{21-20} = 0b11;
+  let Inst{19-18} = size;
+  let Inst{17-16} = 0b11;
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12-9} = 0b0011;
+  let Inst{8-7} = op;
+  let Inst{4} = 0b0;
+}
+
+// The unsuffixed VCVT for float->int implicitly rounds toward zero,
+// which I reflect here in the llvm instruction names
+def MVE_VCVTs16f16z : MVE_VCVT_fp_int<"s16.f16", 0b01, 0b10>;
+def MVE_VCVTu16f16z : MVE_VCVT_fp_int<"u16.f16", 0b01, 0b11>;
+def MVE_VCVTs32f32z : MVE_VCVT_fp_int<"s32.f32", 0b10, 0b10>;
+def MVE_VCVTu32f32z : MVE_VCVT_fp_int<"u32.f32", 0b10, 0b11>;
+// Whereas VCVT for int->float rounds to nearest
+def MVE_VCVTf16s16n : MVE_VCVT_fp_int<"f16.s16", 0b01, 0b00>;
+def MVE_VCVTf16u16n : MVE_VCVT_fp_int<"f16.u16", 0b01, 0b01>;
+def MVE_VCVTf32s32n : MVE_VCVT_fp_int<"f32.s32", 0b10, 0b00>;
+def MVE_VCVTf32u32n : MVE_VCVT_fp_int<"f32.u32", 0b10, 0b01>;
+
+let Predicates = [HasMVEFloat] in {
+  def : Pat<(v4i32 (fp_to_sint (v4f32 MQPR:$src))),
+            (v4i32 (MVE_VCVTs32f32z (v4f32 MQPR:$src)))>;
+  def : Pat<(v4i32 (fp_to_uint (v4f32 MQPR:$src))),
+            (v4i32 (MVE_VCVTu32f32z (v4f32 MQPR:$src)))>;
+  def : Pat<(v8i16 (fp_to_sint (v8f16 MQPR:$src))),
+            (v8i16 (MVE_VCVTs16f16z (v8f16 MQPR:$src)))>;
+  def : Pat<(v8i16 (fp_to_uint (v8f16 MQPR:$src))),
+            (v8i16 (MVE_VCVTu16f16z (v8f16 MQPR:$src)))>;
+  def : Pat<(v4f32 (sint_to_fp (v4i32 MQPR:$src))),
+            (v4f32 (MVE_VCVTf32s32n (v4i32 MQPR:$src)))>;
+  def : Pat<(v4f32 (uint_to_fp (v4i32 MQPR:$src))),
+            (v4f32 (MVE_VCVTf32u32n (v4i32 MQPR:$src)))>;
+  def : Pat<(v8f16 (sint_to_fp (v8i16 MQPR:$src))),
+            (v8f16 (MVE_VCVTf16s16n (v8i16 MQPR:$src)))>;
+  def : Pat<(v8f16 (uint_to_fp (v8i16 MQPR:$src))),
+            (v8f16 (MVE_VCVTf16u16n (v8i16 MQPR:$src)))>;
+}
+
+class MVE_VABSNEG_fp<string iname, string suffix, bits<2> size, bit negate,
+                   list<dag> pattern=[]>
+  : MVE_float<iname, suffix, (outs MQPR:$Qd),
+              (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
+  bits<4> Qd;
+
+  let Inst{28} = 0b1;
+  let Inst{25-23} = 0b111;
+  let Inst{22} = Qd{3};
+  let Inst{21-20} = 0b11;
+  let Inst{19-18} = size;
+  let Inst{17-16} = 0b01;
+  let Inst{15-13} = Qd{2-0};
+  let Inst{11-8} = 0b0111;
+  let Inst{7} = negate;
+  let Inst{4} = 0b0;
+}
+
+def MVE_VABSf16 : MVE_VABSNEG_fp<"vabs", "f16", 0b01, 0b0>;
+def MVE_VABSf32 : MVE_VABSNEG_fp<"vabs", "f32", 0b10, 0b0>;
+
+let Predicates = [HasMVEFloat] in {
+  def : Pat<(v8f16 (fabs MQPR:$src)),
+            (MVE_VABSf16 MQPR:$src)>;
+  def : Pat<(v4f32 (fabs MQPR:$src)),
+            (MVE_VABSf32 MQPR:$src)>;
+}
+
+def MVE_VNEGf16 : MVE_VABSNEG_fp<"vneg", "f16", 0b01, 0b1>;
+def MVE_VNEGf32 : MVE_VABSNEG_fp<"vneg", "f32", 0b10, 0b1>;
+
+let Predicates = [HasMVEFloat] in {
+  def : Pat<(v8f16 (fneg MQPR:$src)),
+            (MVE_VNEGf16 MQPR:$src)>;
+  def : Pat<(v4f32 (fneg MQPR:$src)),
+            (MVE_VNEGf32 MQPR:$src)>;
+}
+
+class MVE_VMAXMINNMA<string iname, string suffix, bit size, bit bit_12,
+                     list<dag> pattern=[]>
+  : MVE_f<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
+          NoItinerary, iname, suffix, "$Qd, $Qm", vpred_n, "$Qd = $Qd_src",
+          pattern> {
+  bits<4> Qd;
+  bits<4> Qm;
+
+  let Inst{28} = size;
+  let Inst{25-23} = 0b100;
+  let Inst{22} = Qd{3};
+  let Inst{21-16} = 0b111111;
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12} = bit_12;
+  let Inst{11-6} = 0b111010;
+  let Inst{5} = Qm{3};
+  let Inst{4} = 0b0;
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = 0b1;
+}
+
+def MVE_VMAXNMAf32 : MVE_VMAXMINNMA<"vmaxnma", "f32", 0b0, 0b0>;
+def MVE_VMAXNMAf16 : MVE_VMAXMINNMA<"vmaxnma", "f16", 0b1, 0b0>;
+
+def MVE_VMINNMAf32 : MVE_VMAXMINNMA<"vminnma", "f32", 0b0, 0b1>;
+def MVE_VMINNMAf16 : MVE_VMAXMINNMA<"vminnma", "f16", 0b1, 0b1>;
+
+// end of MVE Floating Point instructions
+
+// start of MVE compares
+
+class MVE_VCMPqq<string suffix, bit bit_28, bits<2> bits_21_20,
+                 VCMPPredicateOperand predtype, list<dag> pattern=[]>
+  : MVE_p<(outs VCCR:$P0), (ins MQPR:$Qn, MQPR:$Qm, predtype:$fc),
+           NoItinerary, "vcmp", suffix, "$fc, $Qn, $Qm", vpred_n, "", pattern> {
+  // Base class for comparing two vector registers
+  bits<3> fc;
+  bits<4> Qn;
+  bits<4> Qm;
+
+  let Inst{28} = bit_28;
+  let Inst{25-22} = 0b1000;
+  let Inst{21-20} = bits_21_20;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16-13} = 0b1000;
+  let Inst{12} = fc{2};
+  let Inst{11-8} = 0b1111;
+  let Inst{7} = fc{0};
+  let Inst{6} = 0b0;
+  let Inst{5} = Qm{3};
+  let Inst{4} = 0b0;
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = fc{1};
+
+  let Constraints = "";
+
+  // We need a custom decoder method for these instructions because of
+  // the output VCCR operand, which isn't encoded in the instruction
+  // bits anywhere (there is only one choice for it) but has to be
+  // included in the MC operands so that codegen will be able to track
+  // its data flow between instructions, spill/reload it when
+  // necessary, etc. There seems to be no way to get the Tablegen
+  // decoder to emit an operand that isn't affected by any instruction
+  // bit.
+  let DecoderMethod = "DecodeMVEVCMP<false," # predtype.DecoderMethod # ">";
+}
+
+class MVE_VCMPqqf<string suffix, bit size>
+    : MVE_VCMPqq<suffix, size, 0b11, pred_basic_fp> {
+  let Predicates = [HasMVEFloat];
+}
+
+class MVE_VCMPqqi<string suffix, bits<2> size>
+    : MVE_VCMPqq<suffix, 0b1, size, pred_basic_i> {
+  let Inst{12} = 0b0;
+  let Inst{0} = 0b0;
+}
+
+class MVE_VCMPqqu<string suffix, bits<2> size>
+    : MVE_VCMPqq<suffix, 0b1, size, pred_basic_u> {
+  let Inst{12} = 0b0;
+  let Inst{0} = 0b1;
+}
+
+class MVE_VCMPqqs<string suffix, bits<2> size>
+    : MVE_VCMPqq<suffix, 0b1, size, pred_basic_s> {
+  let Inst{12} = 0b1;
+}
+
+def MVE_VCMPf32 : MVE_VCMPqqf<"f32", 0b0>;
+def MVE_VCMPf16 : MVE_VCMPqqf<"f16", 0b1>;
+
+def MVE_VCMPi8  : MVE_VCMPqqi<"i8",  0b00>;
+def MVE_VCMPi16 : MVE_VCMPqqi<"i16", 0b01>;
+def MVE_VCMPi32 : MVE_VCMPqqi<"i32", 0b10>;
+
+def MVE_VCMPu8  : MVE_VCMPqqu<"u8",  0b00>;
+def MVE_VCMPu16 : MVE_VCMPqqu<"u16", 0b01>;
+def MVE_VCMPu32 : MVE_VCMPqqu<"u32", 0b10>;
+
+def MVE_VCMPs8  : MVE_VCMPqqs<"s8",  0b00>;
+def MVE_VCMPs16 : MVE_VCMPqqs<"s16", 0b01>;
+def MVE_VCMPs32 : MVE_VCMPqqs<"s32", 0b10>;
+
+class MVE_VCMPqr<string suffix, bit bit_28, bits<2> bits_21_20,
+                 VCMPPredicateOperand predtype, list<dag> pattern=[]>
+  : MVE_p<(outs VCCR:$P0), (ins MQPR:$Qn, GPRwithZR:$Rm, predtype:$fc),
+           NoItinerary, "vcmp", suffix, "$fc, $Qn, $Rm", vpred_n, "", pattern> {
+  // Base class for comparing a vector register with a scalar
+  bits<3> fc;
+  bits<4> Qn;
+  bits<4> Rm;
+
+  let Inst{28} = bit_28;
+  let Inst{25-22} = 0b1000;
+  let Inst{21-20} = bits_21_20;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16-13} = 0b1000;
+  let Inst{12} = fc{2};
+  let Inst{11-8} = 0b1111;
+  let Inst{7} = fc{0};
+  let Inst{6} = 0b1;
+  let Inst{5} = fc{1};
+  let Inst{4} = 0b0;
+  let Inst{3-0} = Rm{3-0};
+
+  let Constraints = "";
+  // Custom decoder method, for the same reason as MVE_VCMPqq
+  let DecoderMethod = "DecodeMVEVCMP<true," # predtype.DecoderMethod # ">";
+}
+
+class MVE_VCMPqrf<string suffix, bit size>
+    : MVE_VCMPqr<suffix, size, 0b11, pred_basic_fp> {
+  let Predicates = [HasMVEFloat];
+}
+
+class MVE_VCMPqri<string suffix, bits<2> size>
+    : MVE_VCMPqr<suffix, 0b1, size, pred_basic_i> {
+  let Inst{12} = 0b0;
+  let Inst{5} = 0b0;
+}
+
+class MVE_VCMPqru<string suffix, bits<2> size>
+    : MVE_VCMPqr<suffix, 0b1, size, pred_basic_u> {
+  let Inst{12} = 0b0;
+  let Inst{5} = 0b1;
+}
+
+class MVE_VCMPqrs<string suffix, bits<2> size>
+    : MVE_VCMPqr<suffix, 0b1, size, pred_basic_s> {
+  let Inst{12} = 0b1;
+}
+
+def MVE_VCMPf32r : MVE_VCMPqrf<"f32", 0b0>;
+def MVE_VCMPf16r : MVE_VCMPqrf<"f16", 0b1>;
+
+def MVE_VCMPi8r  : MVE_VCMPqri<"i8",  0b00>;
+def MVE_VCMPi16r : MVE_VCMPqri<"i16", 0b01>;
+def MVE_VCMPi32r : MVE_VCMPqri<"i32", 0b10>;
+
+def MVE_VCMPu8r  : MVE_VCMPqru<"u8",  0b00>;
+def MVE_VCMPu16r : MVE_VCMPqru<"u16", 0b01>;
+def MVE_VCMPu32r : MVE_VCMPqru<"u32", 0b10>;
+
+def MVE_VCMPs8r  : MVE_VCMPqrs<"s8",  0b00>;
+def MVE_VCMPs16r : MVE_VCMPqrs<"s16", 0b01>;
+def MVE_VCMPs32r : MVE_VCMPqrs<"s32", 0b10>;
+
+// end of MVE compares
+
+// start of MVE_qDest_qSrc
+
+class MVE_qDest_qSrc<string iname, string suffix, dag oops, dag iops,
+                     string ops, vpred_ops vpred, string cstr,
+                     list<dag> pattern=[]>
+  : MVE_p<oops, iops, NoItinerary, iname, suffix,
+          ops, vpred, cstr, pattern> {
+  bits<4> Qd;
+  bits<4> Qm;
+
+  let Inst{25-23} = 0b100;
+  let Inst{22} = Qd{3};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{11-9} = 0b111;
+  let Inst{6} = 0b0;
+  let Inst{5} = Qm{3};
+  let Inst{4} = 0b0;
+  let Inst{3-1} = Qm{2-0};
+}
+
+class MVE_VQxDMLxDH<string iname, bit exch, bit round, bit subtract,
+                    string suffix, bits<2> size, list<dag> pattern=[]>
+  : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
+                   (ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
+                   vpred_n, "$Qd = $Qd_src", pattern> {
+  bits<4> Qn;
+
+  let Inst{28} = subtract;
+  let Inst{21-20} = size;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b0;
+  let Inst{12} = exch;
+  let Inst{8} = 0b0;
+  let Inst{7} = Qn{3};
+  let Inst{0} = round;
+}
+
+multiclass MVE_VQxDMLxDH_multi<string iname, bit exch,
+                               bit round, bit subtract> {
+  def s8  : MVE_VQxDMLxDH<iname, exch, round, subtract, "s8",  0b00>;
+  def s16 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s16", 0b01>;
+  def s32 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s32", 0b10>;
+}
+
+defm MVE_VQDMLADH   : MVE_VQxDMLxDH_multi<"vqdmladh",   0b0, 0b0, 0b0>;
+defm MVE_VQDMLADHX  : MVE_VQxDMLxDH_multi<"vqdmladhx",  0b1, 0b0, 0b0>;
+defm MVE_VQRDMLADH  : MVE_VQxDMLxDH_multi<"vqrdmladh",  0b0, 0b1, 0b0>;
+defm MVE_VQRDMLADHX : MVE_VQxDMLxDH_multi<"vqrdmladhx", 0b1, 0b1, 0b0>;
+defm MVE_VQDMLSDH   : MVE_VQxDMLxDH_multi<"vqdmlsdh",   0b0, 0b0, 0b1>;
+defm MVE_VQDMLSDHX  : MVE_VQxDMLxDH_multi<"vqdmlsdhx",  0b1, 0b0, 0b1>;
+defm MVE_VQRDMLSDH  : MVE_VQxDMLxDH_multi<"vqrdmlsdh",  0b0, 0b1, 0b1>;
+defm MVE_VQRDMLSDHX : MVE_VQxDMLxDH_multi<"vqrdmlsdhx", 0b1, 0b1, 0b1>;
+
+class MVE_VCMUL<string iname, string suffix, bit size, list<dag> pattern=[]>
+  : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
+                   (ins MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
+                   "$Qd, $Qn, $Qm, $rot", vpred_r, "", pattern> {
+  bits<4> Qn;
+  bits<2> rot;
+
+  let Inst{28} = size;
+  let Inst{21-20} = 0b11;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b0;
+  let Inst{12} = rot{1};
+  let Inst{8} = 0b0;
+  let Inst{7} = Qn{3};
+  let Inst{0} = rot{0};
+
+  let Predicates = [HasMVEFloat];
+}
+
+def MVE_VCMULf16 : MVE_VCMUL<"vcmul", "f16", 0b0>;
+def MVE_VCMULf32 : MVE_VCMUL<"vcmul", "f32", 0b1>;
+
+class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20,
+                bit T, list<dag> pattern=[]>
+  : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
+                   (ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
+                   vpred_r, "", pattern> {
+  bits<4> Qd;
+  bits<4> Qn;
+  bits<4> Qm;
+
+  let Inst{28} = bit_28;
+  let Inst{21-20} = bits_21_20;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b1;
+  let Inst{12} = T;
+  let Inst{8} = 0b0;
+  let Inst{7} = Qn{3};
+  let Inst{0} = 0b0;
+}
+
+multiclass MVE_VMULL_multi<string iname, string suffix,
+                           bit bit_28, bits<2> bits_21_20> {
+  def bh : MVE_VMULL<iname # "b", suffix, bit_28, bits_21_20, 0b0>;
+  def th : MVE_VMULL<iname # "t", suffix, bit_28, bits_21_20, 0b1>;
+}
+
+// For integer multiplies, bits 21:20 encode size, and bit 28 signedness.
+// For polynomial multiplies, bits 21:20 take the unused value 0b11, and
+// bit 28 switches to encoding the size.
+
+defm MVE_VMULLs8  : MVE_VMULL_multi<"vmull", "s8",  0b0, 0b00>;
+defm MVE_VMULLs16 : MVE_VMULL_multi<"vmull", "s16", 0b0, 0b01>;
+defm MVE_VMULLs32 : MVE_VMULL_multi<"vmull", "s32", 0b0, 0b10>;
+defm MVE_VMULLu8  : MVE_VMULL_multi<"vmull", "u8",  0b1, 0b00>;
+defm MVE_VMULLu16 : MVE_VMULL_multi<"vmull", "u16", 0b1, 0b01>;
+defm MVE_VMULLu32 : MVE_VMULL_multi<"vmull", "u32", 0b1, 0b10>;
+defm MVE_VMULLp8  : MVE_VMULL_multi<"vmull", "p8",  0b0, 0b11>;
+defm MVE_VMULLp16 : MVE_VMULL_multi<"vmull", "p16", 0b1, 0b11>;
+
+class MVE_VxMULH<string iname, string suffix, bit U, bits<2> size,
+                 bit round, list<dag> pattern=[]>
+  : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
+                   (ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
+                   vpred_r, "", pattern> {
+  bits<4> Qn;
+
+  let Inst{28} = U;
+  let Inst{21-20} = size;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b1;
+  let Inst{12} = round;
+  let Inst{8} = 0b0;
+  let Inst{7} = Qn{3};
+  let Inst{0} = 0b1;
+}
+
+def MVE_VMULHs8   : MVE_VxMULH<"vmulh",  "s8",  0b0, 0b00, 0b0>;
+def MVE_VMULHs16  : MVE_VxMULH<"vmulh",  "s16", 0b0, 0b01, 0b0>;
+def MVE_VMULHs32  : MVE_VxMULH<"vmulh",  "s32", 0b0, 0b10, 0b0>;
+def MVE_VMULHu8   : MVE_VxMULH<"vmulh",  "u8",  0b1, 0b00, 0b0>;
+def MVE_VMULHu16  : MVE_VxMULH<"vmulh",  "u16", 0b1, 0b01, 0b0>;
+def MVE_VMULHu32  : MVE_VxMULH<"vmulh",  "u32", 0b1, 0b10, 0b0>;
+
+def MVE_VRMULHs8  : MVE_VxMULH<"vrmulh", "s8",  0b0, 0b00, 0b1>;
+def MVE_VRMULHs16 : MVE_VxMULH<"vrmulh", "s16", 0b0, 0b01, 0b1>;
+def MVE_VRMULHs32 : MVE_VxMULH<"vrmulh", "s32", 0b0, 0b10, 0b1>;
+def MVE_VRMULHu8  : MVE_VxMULH<"vrmulh", "u8",  0b1, 0b00, 0b1>;
+def MVE_VRMULHu16 : MVE_VxMULH<"vrmulh", "u16", 0b1, 0b01, 0b1>;
+def MVE_VRMULHu32 : MVE_VxMULH<"vrmulh", "u32", 0b1, 0b10, 0b1>;
+
+class MVE_VxMOVxN<string iname, string suffix, bit bit_28, bit bit_17,
+                  bits<2> size, bit T, list<dag> pattern=[]>
+  : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
+                   (ins MQPR:$Qd_src, MQPR:$Qm), "$Qd, $Qm",
+                   vpred_n, "$Qd = $Qd_src", pattern> {
+
+  let Inst{28} = bit_28;
+  let Inst{21-20} = 0b11;
+  let Inst{19-18} = size;
+  let Inst{17} = bit_17;
+  let Inst{16} = 0b1;
+  let Inst{12} = T;
+  let Inst{8} = 0b0;
+  let Inst{7} = !if(!eq(bit_17, 0), 1, 0);
+  let Inst{0} = 0b1;
+}
+
+multiclass MVE_VxMOVxN_halves<string iname, string suffix,
+                              bit bit_28, bit bit_17, bits<2> size> {
+  def bh : MVE_VxMOVxN<iname # "b", suffix, bit_28, bit_17, size, 0b0>;
+  def th : MVE_VxMOVxN<iname # "t", suffix, bit_28, bit_17, size, 0b1>;
+}
+
+defm MVE_VMOVNi16   : MVE_VxMOVxN_halves<"vmovn",   "i16", 0b1, 0b0, 0b00>;
+defm MVE_VMOVNi32   : MVE_VxMOVxN_halves<"vmovn",   "i32", 0b1, 0b0, 0b01>;
+defm MVE_VQMOVNs16  : MVE_VxMOVxN_halves<"vqmovn",  "s16", 0b0, 0b1, 0b00>;
+defm MVE_VQMOVNs32  : MVE_VxMOVxN_halves<"vqmovn",  "s32", 0b0, 0b1, 0b01>;
+defm MVE_VQMOVNu16  : MVE_VxMOVxN_halves<"vqmovn",  "u16", 0b1, 0b1, 0b00>;
+defm MVE_VQMOVNu32  : MVE_VxMOVxN_halves<"vqmovn",  "u32", 0b1, 0b1, 0b01>;
+defm MVE_VQMOVUNs16 : MVE_VxMOVxN_halves<"vqmovun", "s16", 0b0, 0b0, 0b00>;
+defm MVE_VQMOVUNs32 : MVE_VxMOVxN_halves<"vqmovun", "s32", 0b0, 0b0, 0b01>;
+
+class MVE_VCVT_ff<string iname, string suffix, bit op, bit T,
+                  list<dag> pattern=[]>
+  : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
+                   "$Qd, $Qm", vpred_n, "$Qd = $Qd_src", pattern> {
+  let Inst{28} = op;
+  let Inst{21-16} = 0b111111;
+  let Inst{12} = T;
+  let Inst{8-7} = 0b00;
+  let Inst{0} = 0b1;
+
+  let Predicates = [HasMVEFloat];
+}
+
+multiclass MVE_VCVT_ff_halves<string suffix, bit op> {
+  def bh : MVE_VCVT_ff<"vcvtb", suffix, op, 0b0>;
+  def th : MVE_VCVT_ff<"vcvtt", suffix, op, 0b1>;
+}
+
+defm MVE_VCVTf16f32 : MVE_VCVT_ff_halves<"f16.f32", 0b0>;
+defm MVE_VCVTf32f16 : MVE_VCVT_ff_halves<"f32.f16", 0b1>;
+
+class MVE_VxCADD<string iname, string suffix, bits<2> size, bit halve,
+                 list<dag> pattern=[]>
+  : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
+                   (ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot),
+                   "$Qd, $Qn, $Qm, $rot", vpred_r, "",
+          pattern> {
+  bits<4> Qn;
+  bit rot;
+
+  let Inst{28} = halve;
+  let Inst{21-20} = size;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b0;
+  let Inst{12} = rot;
+  let Inst{8} = 0b1;
+  let Inst{7} = Qn{3};
+  let Inst{0} = 0b0;
+}
+
+def MVE_VCADDi8   : MVE_VxCADD<"vcadd", "i8", 0b00, 0b1>;
+def MVE_VCADDi16  : MVE_VxCADD<"vcadd", "i16", 0b01, 0b1>;
+def MVE_VCADDi32  : MVE_VxCADD<"vcadd", "i32", 0b10, 0b1>;
+
+def MVE_VHCADDs8  : MVE_VxCADD<"vhcadd", "s8", 0b00, 0b0>;
+def MVE_VHCADDs16 : MVE_VxCADD<"vhcadd", "s16", 0b01, 0b0>;
+def MVE_VHCADDs32 : MVE_VxCADD<"vhcadd", "s32", 0b10, 0b0>;
+
+class MVE_VADCSBC<string iname, bit I, bit subtract,
+                  dag carryin, list<dag> pattern=[]>
+  : MVE_qDest_qSrc<iname, "i32", (outs MQPR:$Qd, cl_FPSCR_NZCV:$carryout),
+                   !con((ins MQPR:$Qn, MQPR:$Qm), carryin),
+                   "$Qd, $Qn, $Qm", vpred_r, "", pattern> {
+  bits<4> Qn;
+
+  let Inst{28} = subtract;
+  let Inst{21-20} = 0b11;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b0;
+  let Inst{12} = I;
+  let Inst{8} = 0b1;
+  let Inst{7} = Qn{3};
+  let Inst{0} = 0b0;
+
+  // Custom decoder method in order to add the FPSCR operand(s), which
+  // Tablegen won't do right
+  let DecoderMethod = "DecodeMVEVADCInstruction";
+}
+
+def MVE_VADC  : MVE_VADCSBC<"vadc",  0b0, 0b0, (ins cl_FPSCR_NZCV:$carryin)>;
+def MVE_VADCI : MVE_VADCSBC<"vadci", 0b1, 0b0, (ins)>;
+
+def MVE_VSBC  : MVE_VADCSBC<"vsbc",  0b0, 0b1, (ins cl_FPSCR_NZCV:$carryin)>;
+def MVE_VSBCI : MVE_VADCSBC<"vsbci", 0b1, 0b1, (ins)>;
+
+class MVE_VQDMULL<string iname, string suffix, bit size, bit T,
+                  list<dag> pattern=[]>
+  : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
+                   (ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
+                   vpred_r, "", pattern> {
+  bits<4> Qn;
+
+  let Inst{28} = size;
+  let Inst{21-20} = 0b11;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b0;
+  let Inst{12} = T;
+  let Inst{8} = 0b1;
+  let Inst{7} = Qn{3};
+  let Inst{0} = 0b1;
+}
+
+multiclass MVE_VQDMULL_halves<string suffix, bit size> {
+  def bh : MVE_VQDMULL<"vqdmullb", suffix, size, 0b0>;
+  def th : MVE_VQDMULL<"vqdmullt", suffix, size, 0b1>;
+}
+
+defm MVE_VQDMULLs16 : MVE_VQDMULL_halves<"s16", 0b0>;
+defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<"s32", 0b1>;
+
+// end of mve_qDest_qSrc
+
+// start of mve_qDest_rSrc
+
+class MVE_qr_base<dag oops, dag iops, InstrItinClass itin, string iname,
+                  string suffix, string ops, vpred_ops vpred, string cstr,
+                  list<dag> pattern=[]>
+   : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
+  bits<4> Qd;
+  bits<4> Qn;
+  bits<4> Rm;
+
+  let Inst{25-23} = 0b100;
+  let Inst{22} = Qd{3};
+  let Inst{19-17} = Qn{2-0};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{11-9} = 0b111;
+  let Inst{7} = Qn{3};
+  let Inst{6} = 0b1;
+  let Inst{4} = 0b0;
+  let Inst{3-0} = Rm{3-0};
+}
+
+class MVE_qDest_rSrc<string iname, string suffix, list<dag> pattern=[]>
+  : MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qn, rGPR:$Rm),
+          NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_r, "",
+           pattern>;
+
+class MVE_qDestSrc_rSrc<string iname, string suffix, list<dag> pattern=[]>
+  : MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qn, rGPR:$Rm),
+          NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_n, "$Qd = $Qd_src",
+           pattern>;
+
+class MVE_qDest_single_rSrc<string iname, string suffix, list<dag> pattern=[]>
+  : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, rGPR:$Rm), NoItinerary, iname,
+          suffix, "$Qd, $Rm", vpred_n, "$Qd = $Qd_src", pattern> {
+  bits<4> Qd;
+  bits<4> Rm;
+
+  let Inst{22} = Qd{3};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{3-0} = Rm{3-0};
+}
+
+class MVE_VADDSUB_qr<string iname, string suffix, bits<2> size,
+                     bit bit_5, bit bit_12, bit bit_16,
+                     bit bit_28, list<dag> pattern=[]>
+  : MVE_qDest_rSrc<iname, suffix, pattern> {
+
+  let Inst{28} = bit_28;
+  let Inst{21-20} = size;
+  let Inst{16} = bit_16;
+  let Inst{12} = bit_12;
+  let Inst{8} = 0b1;
+  let Inst{5} = bit_5;
+}
+
+multiclass MVE_VADDSUB_qr_sizes<string iname, string suffix,
+                                bit bit_5, bit bit_12, bit bit_16,
+                                bit bit_28, list<dag> pattern=[]> {
+  def "8"  : MVE_VADDSUB_qr<iname, suffix#"8",  0b00,
+                            bit_5, bit_12, bit_16, bit_28>;
+  def "16" : MVE_VADDSUB_qr<iname, suffix#"16", 0b01,
+                            bit_5, bit_12, bit_16, bit_28>;
+  def "32" : MVE_VADDSUB_qr<iname, suffix#"32", 0b10,
+                            bit_5, bit_12, bit_16, bit_28>;
+}
+
+defm MVE_VADD_qr_i  : MVE_VADDSUB_qr_sizes<"vadd",  "i", 0b0, 0b0, 0b1, 0b0>;
+defm MVE_VQADD_qr_s : MVE_VADDSUB_qr_sizes<"vqadd", "s", 0b1, 0b0, 0b0, 0b0>;
+defm MVE_VQADD_qr_u : MVE_VADDSUB_qr_sizes<"vqadd", "u", 0b1, 0b0, 0b0, 0b1>;
+
+defm MVE_VSUB_qr_i  : MVE_VADDSUB_qr_sizes<"vsub",  "i", 0b0, 0b1, 0b1, 0b0>;
+defm MVE_VQSUB_qr_s : MVE_VADDSUB_qr_sizes<"vqsub", "s", 0b1, 0b1, 0b0, 0b0>;
+defm MVE_VQSUB_qr_u : MVE_VADDSUB_qr_sizes<"vqsub", "u", 0b1, 0b1, 0b0, 0b1>;
+
+class MVE_VQDMULL_qr<string iname, string suffix, bit size,
+                     bit T, list<dag> pattern=[]>
+  : MVE_qDest_rSrc<iname, suffix, pattern> {
+
+  let Inst{28} = size;
+  let Inst{21-20} = 0b11;
+  let Inst{16} = 0b0;
+  let Inst{12} = T;
+  let Inst{8} = 0b1;
+  let Inst{5} = 0b1;
+}
+
+multiclass MVE_VQDMULL_qr_halves<string suffix, bit size> {
+  def bh : MVE_VQDMULL_qr<"vqdmullb", suffix, size, 0b0>;
+  def th : MVE_VQDMULL_qr<"vqdmullt", suffix, size, 0b1>;
+}
+
+defm MVE_VQDMULL_qr_s16 : MVE_VQDMULL_qr_halves<"s16", 0b0>;
+defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<"s32", 0b1>;
+
+class MVE_VxADDSUB_qr<string iname, string suffix,
+                      bit bit_28, bits<2> bits_21_20, bit subtract,
+                      list<dag> pattern=[]>
+  : MVE_qDest_rSrc<iname, suffix, pattern> {
+
+  let Inst{28} = bit_28;
+  let Inst{21-20} = bits_21_20;
+  let Inst{16} = 0b0;
+  let Inst{12} = subtract;
+  let Inst{8} = 0b1;
+  let Inst{5} = 0b0;
+}
+
+def MVE_VHADD_qr_s8   : MVE_VxADDSUB_qr<"vhadd", "s8",  0b0, 0b00, 0b0>;
+def MVE_VHADD_qr_s16  : MVE_VxADDSUB_qr<"vhadd", "s16", 0b0, 0b01, 0b0>;
+def MVE_VHADD_qr_s32  : MVE_VxADDSUB_qr<"vhadd", "s32", 0b0, 0b10, 0b0>;
+def MVE_VHADD_qr_u8   : MVE_VxADDSUB_qr<"vhadd", "u8",  0b1, 0b00, 0b0>;
+def MVE_VHADD_qr_u16  : MVE_VxADDSUB_qr<"vhadd", "u16", 0b1, 0b01, 0b0>;
+def MVE_VHADD_qr_u32  : MVE_VxADDSUB_qr<"vhadd", "u32", 0b1, 0b10, 0b0>;
+
+def MVE_VHSUB_qr_s8   : MVE_VxADDSUB_qr<"vhsub", "s8",  0b0, 0b00, 0b1>;
+def MVE_VHSUB_qr_s16  : MVE_VxADDSUB_qr<"vhsub", "s16", 0b0, 0b01, 0b1>;
+def MVE_VHSUB_qr_s32  : MVE_VxADDSUB_qr<"vhsub", "s32", 0b0, 0b10, 0b1>;
+def MVE_VHSUB_qr_u8   : MVE_VxADDSUB_qr<"vhsub", "u8",  0b1, 0b00, 0b1>;
+def MVE_VHSUB_qr_u16  : MVE_VxADDSUB_qr<"vhsub", "u16", 0b1, 0b01, 0b1>;
+def MVE_VHSUB_qr_u32  : MVE_VxADDSUB_qr<"vhsub", "u32", 0b1, 0b10, 0b1>;
+
+let Predicates = [HasMVEFloat] in {
+  def MVE_VADD_qr_f32 : MVE_VxADDSUB_qr<"vadd",  "f32", 0b0, 0b11, 0b0>;
+  def MVE_VADD_qr_f16 : MVE_VxADDSUB_qr<"vadd",  "f16", 0b1, 0b11, 0b0>;
+
+  def MVE_VSUB_qr_f32 : MVE_VxADDSUB_qr<"vsub",  "f32", 0b0, 0b11, 0b1>;
+  def MVE_VSUB_qr_f16 : MVE_VxADDSUB_qr<"vsub",  "f16", 0b1, 0b11, 0b1>;
+}
+
+class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size,
+                   bit bit_7, bit bit_17, list<dag> pattern=[]>
+  : MVE_qDest_single_rSrc<iname, suffix, pattern> {
+
+  let Inst{28} = U;
+  let Inst{25-23} = 0b100;
+  let Inst{21-20} = 0b11;
+  let Inst{19-18} = size;
+  let Inst{17} = bit_17;
+  let Inst{16} = 0b1;
+  let Inst{12-8} = 0b11110;
+  let Inst{7} = bit_7;
+  let Inst{6-4} = 0b110;
+}
+
+multiclass MVE_VxSHL_qr_types<string iname, bit bit_7, bit bit_17> {
+  def s8  : MVE_VxSHL_qr<iname, "s8", 0b0, 0b00, bit_7, bit_17>;
+  def s16 : MVE_VxSHL_qr<iname, "s16", 0b0, 0b01, bit_7, bit_17>;
+  def s32 : MVE_VxSHL_qr<iname, "s32", 0b0, 0b10, bit_7, bit_17>;
+  def u8  : MVE_VxSHL_qr<iname, "u8", 0b1, 0b00, bit_7, bit_17>;
+  def u16 : MVE_VxSHL_qr<iname, "u16", 0b1, 0b01, bit_7, bit_17>;
+  def u32 : MVE_VxSHL_qr<iname, "u32", 0b1, 0b10, bit_7, bit_17>;
+}
+
+defm MVE_VSHL_qr   : MVE_VxSHL_qr_types<"vshl",   0b0, 0b0>;
+defm MVE_VRSHL_qr  : MVE_VxSHL_qr_types<"vrshl",  0b0, 0b1>;
+defm MVE_VQSHL_qr  : MVE_VxSHL_qr_types<"vqshl",  0b1, 0b0>;
+defm MVE_VQRSHL_qr : MVE_VxSHL_qr_types<"vqrshl", 0b1, 0b1>;
+
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 (ARMvdup GPR:$Rm)))),
+            (v4i32 (MVE_VSHL_qru32 (v4i32 MQPR:$Qm), GPR:$Rm))>;
+  def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 (ARMvdup GPR:$Rm)))),
+            (v8i16 (MVE_VSHL_qru16 (v8i16 MQPR:$Qm), GPR:$Rm))>;
+  def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 (ARMvdup GPR:$Rm)))),
+            (v16i8 (MVE_VSHL_qru8 (v16i8 MQPR:$Qm), GPR:$Rm))>;
+
+  def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 (ARMvdup GPR:$Rm)))),
+            (v4i32 (MVE_VSHL_qrs32 (v4i32 MQPR:$Qm), GPR:$Rm))>;
+  def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 (ARMvdup GPR:$Rm)))),
+            (v8i16 (MVE_VSHL_qrs16 (v8i16 MQPR:$Qm), GPR:$Rm))>;
+  def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 (ARMvdup GPR:$Rm)))),
+            (v16i8 (MVE_VSHL_qrs8 (v16i8 MQPR:$Qm), GPR:$Rm))>;
+}
+
+class MVE_VBRSR<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
+  : MVE_qDest_rSrc<iname, suffix, pattern> {
+
+  let Inst{28} = 0b1;
+  let Inst{21-20} = size;
+  let Inst{16} = 0b1;
+  let Inst{12} = 0b1;
+  let Inst{8} = 0b0;
+  let Inst{5} = 0b1;
+}
+
+def MVE_VBRSR8  : MVE_VBRSR<"vbrsr", "8", 0b00>;
+def MVE_VBRSR16 : MVE_VBRSR<"vbrsr", "16", 0b01>;
+def MVE_VBRSR32 : MVE_VBRSR<"vbrsr", "32", 0b10>;
+
+class MVE_VMUL_qr_int<string iname, string suffix,
+                      bits<2> size, list<dag> pattern=[]>
+  : MVE_qDest_rSrc<iname, suffix, pattern> {
+
+  let Inst{28} = 0b0;
+  let Inst{21-20} = size;
+  let Inst{16} = 0b1;
+  let Inst{12} = 0b1;
+  let Inst{8} = 0b0;
+  let Inst{5} = 0b1;
+}
+
+def MVE_VMUL_qr_i8  : MVE_VMUL_qr_int<"vmul", "i8",  0b00>;
+def MVE_VMUL_qr_i16 : MVE_VMUL_qr_int<"vmul", "i16", 0b01>;
+def MVE_VMUL_qr_i32 : MVE_VMUL_qr_int<"vmul", "i32", 0b10>;
+
+class MVE_VxxMUL_qr<string iname, string suffix,
+                    bit bit_28, bits<2> bits_21_20, list<dag> pattern=[]>
+  : MVE_qDest_rSrc<iname, suffix, pattern> {
+
+  let Inst{28} = bit_28;
+  let Inst{21-20} = bits_21_20;
+  let Inst{16} = 0b1;
+  let Inst{12} = 0b0;
+  let Inst{8} = 0b0;
+  let Inst{5} = 0b1;
+}
+
+def MVE_VQDMULH_qr_s8   : MVE_VxxMUL_qr<"vqdmulh",  "s8",  0b0, 0b00>;
+def MVE_VQDMULH_qr_s16  : MVE_VxxMUL_qr<"vqdmulh",  "s16", 0b0, 0b01>;
+def MVE_VQDMULH_qr_s32  : MVE_VxxMUL_qr<"vqdmulh",  "s32", 0b0, 0b10>;
+
+def MVE_VQRDMULH_qr_s8  : MVE_VxxMUL_qr<"vqrdmulh", "s8",  0b1, 0b00>;
+def MVE_VQRDMULH_qr_s16 : MVE_VxxMUL_qr<"vqrdmulh", "s16", 0b1, 0b01>;
+def MVE_VQRDMULH_qr_s32 : MVE_VxxMUL_qr<"vqrdmulh", "s32", 0b1, 0b10>;
+
+let Predicates = [HasMVEFloat] in {
+  def MVE_VMUL_qr_f16   : MVE_VxxMUL_qr<"vmul", "f16", 0b1, 0b11>;
+  def MVE_VMUL_qr_f32   : MVE_VxxMUL_qr<"vmul", "f32", 0b0, 0b11>;
+}
+
+class MVE_VFMAMLA_qr<string iname, string suffix,
+                   bit bit_28, bits<2> bits_21_20, bit S,
+                   list<dag> pattern=[]>
+  : MVE_qDestSrc_rSrc<iname, suffix, pattern> {
+
+  let Inst{28} = bit_28;
+  let Inst{21-20} = bits_21_20;
+  let Inst{16} = 0b1;
+  let Inst{12} = S;
+  let Inst{8} = 0b0;
+  let Inst{5} = 0b0;
+}
+
+def MVE_VMLA_qr_s8     : MVE_VFMAMLA_qr<"vmla",  "s8",  0b0, 0b00, 0b0>;
+def MVE_VMLA_qr_s16    : MVE_VFMAMLA_qr<"vmla",  "s16", 0b0, 0b01, 0b0>;
+def MVE_VMLA_qr_s32    : MVE_VFMAMLA_qr<"vmla",  "s32", 0b0, 0b10, 0b0>;
+def MVE_VMLA_qr_u8     : MVE_VFMAMLA_qr<"vmla",  "u8",  0b1, 0b00, 0b0>;
+def MVE_VMLA_qr_u16    : MVE_VFMAMLA_qr<"vmla",  "u16", 0b1, 0b01, 0b0>;
+def MVE_VMLA_qr_u32    : MVE_VFMAMLA_qr<"vmla",  "u32", 0b1, 0b10, 0b0>;
+
+def MVE_VMLAS_qr_s8    : MVE_VFMAMLA_qr<"vmlas", "s8",  0b0, 0b00, 0b1>;
+def MVE_VMLAS_qr_s16   : MVE_VFMAMLA_qr<"vmlas", "s16", 0b0, 0b01, 0b1>;
+def MVE_VMLAS_qr_s32   : MVE_VFMAMLA_qr<"vmlas", "s32", 0b0, 0b10, 0b1>;
+def MVE_VMLAS_qr_u8    : MVE_VFMAMLA_qr<"vmlas", "u8",  0b1, 0b00, 0b1>;
+def MVE_VMLAS_qr_u16   : MVE_VFMAMLA_qr<"vmlas", "u16", 0b1, 0b01, 0b1>;
+def MVE_VMLAS_qr_u32   : MVE_VFMAMLA_qr<"vmlas", "u32", 0b1, 0b10, 0b1>;
+
+let Predicates = [HasMVEFloat] in {
+  def MVE_VFMA_qr_f16  : MVE_VFMAMLA_qr<"vfma",  "f16", 0b1, 0b11, 0b0>;
+  def MVE_VFMA_qr_f32  : MVE_VFMAMLA_qr<"vfma",  "f32", 0b0, 0b11, 0b0>;
+  def MVE_VFMA_qr_Sf16 : MVE_VFMAMLA_qr<"vfmas", "f16", 0b1, 0b11, 0b1>;
+  def MVE_VFMA_qr_Sf32 : MVE_VFMAMLA_qr<"vfmas", "f32", 0b0, 0b11, 0b1>;
+}
+
+class MVE_VQDMLAH_qr<string iname, string suffix, bit U, bits<2> size,
+                     bit bit_5, bit bit_12, list<dag> pattern=[]>
+  : MVE_qDestSrc_rSrc<iname, suffix, pattern> {
+
+  let Inst{28} = U;
+  let Inst{21-20} = size;
+  let Inst{16} = 0b0;
+  let Inst{12} = bit_12;
+  let Inst{8} = 0b0;
+  let Inst{5} = bit_5;
+}
+
+multiclass MVE_VQDMLAH_qr_types<string iname, bit bit_5, bit bit_12> {
+  def s8  : MVE_VQDMLAH_qr<iname, "s8",  0b0, 0b00, bit_5, bit_12>;
+  def s16 : MVE_VQDMLAH_qr<iname, "s16", 0b0, 0b01, bit_5, bit_12>;
+  def s32 : MVE_VQDMLAH_qr<iname, "s32", 0b0, 0b10, bit_5, bit_12>;
+}
+
+defm MVE_VQDMLAH_qr   : MVE_VQDMLAH_qr_types<"vqdmlah",   0b1, 0b0>;
+defm MVE_VQRDMLAH_qr  : MVE_VQDMLAH_qr_types<"vqrdmlah",  0b0, 0b0>;
+defm MVE_VQDMLASH_qr  : MVE_VQDMLAH_qr_types<"vqdmlash",  0b1, 0b1>;
+defm MVE_VQRDMLASH_qr : MVE_VQDMLAH_qr_types<"vqrdmlash", 0b0, 0b1>;
+
+class MVE_VxDUP<string iname, string suffix, bits<2> size, bit bit_12,
+              list<dag> pattern=[]>
+  : MVE_p<(outs MQPR:$Qd, tGPREven:$Rn),
+          (ins tGPREven:$Rn_src, MVE_VIDUP_imm:$imm), NoItinerary,
+          iname, suffix, "$Qd, $Rn, $imm", vpred_r, "$Rn = $Rn_src",
+          pattern> {
+  bits<4> Qd;
+  bits<4> Rn;
+  bits<2> imm;
+
+  let Inst{28} = 0b0;
+  let Inst{25-23} = 0b100;
+  let Inst{22} = Qd{3};
+  let Inst{21-20} = size;
+  let Inst{19-17} = Rn{3-1};
+  let Inst{16} = 0b1;
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12} = bit_12;
+  let Inst{11-8} = 0b1111;
+  let Inst{7} = imm{1};
+  let Inst{6-1} = 0b110111;
+  let Inst{0} = imm{0};
+}
+
+def MVE_VIDUPu8  : MVE_VxDUP<"vidup", "u8",  0b00, 0b0>;
+def MVE_VIDUPu16 : MVE_VxDUP<"vidup", "u16", 0b01, 0b0>;
+def MVE_VIDUPu32 : MVE_VxDUP<"vidup", "u32", 0b10, 0b0>;
+
+def MVE_VDDUPu8  : MVE_VxDUP<"vddup", "u8",  0b00, 0b1>;
+def MVE_VDDUPu16 : MVE_VxDUP<"vddup", "u16", 0b01, 0b1>;
+def MVE_VDDUPu32 : MVE_VxDUP<"vddup", "u32", 0b10, 0b1>;
+
+class MVE_VxWDUP<string iname, string suffix, bits<2> size, bit bit_12,
+                 list<dag> pattern=[]>
+  : MVE_p<(outs MQPR:$Qd, tGPREven:$Rn),
+          (ins tGPREven:$Rn_src, tGPROdd:$Rm, MVE_VIDUP_imm:$imm), NoItinerary,
+          iname, suffix, "$Qd, $Rn, $Rm, $imm", vpred_r, "$Rn = $Rn_src",
+          pattern> {
+  bits<4> Qd;
+  bits<4> Rm;
+  bits<4> Rn;
+  bits<2> imm;
+
+  let Inst{28} = 0b0;
+  let Inst{25-23} = 0b100;
+  let Inst{22} = Qd{3};
+  let Inst{21-20} = size;
+  let Inst{19-17} = Rn{3-1};
+  let Inst{16} = 0b1;
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12} = bit_12;
+  let Inst{11-8} = 0b1111;
+  let Inst{7} = imm{1};
+  let Inst{6-4} = 0b110;
+  let Inst{3-1} = Rm{3-1};
+  let Inst{0} = imm{0};
+}
+
+def MVE_VIWDUPu8  : MVE_VxWDUP<"viwdup", "u8",  0b00, 0b0>;
+def MVE_VIWDUPu16 : MVE_VxWDUP<"viwdup", "u16", 0b01, 0b0>;
+def MVE_VIWDUPu32 : MVE_VxWDUP<"viwdup", "u32", 0b10, 0b0>;
+
+def MVE_VDWDUPu8  : MVE_VxWDUP<"vdwdup", "u8",  0b00, 0b1>;
+def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>;
+def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>;
+
+class MVE_VCTP<string suffix, bits<2> size, list<dag> pattern=[]>
+  : MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix,
+          "$Rn", vpred_n, "", pattern> {
+  bits<4> Rn;
+
+  let Inst{28-27} = 0b10;
+  let Inst{26-22} = 0b00000;
+  let Inst{21-20} = size;
+  let Inst{19-16} = Rn{3-0};
+  let Inst{15-11} = 0b11101;
+  let Inst{10-0}  = 0b00000000001;
+  let Unpredictable{10-0} = 0b11111111111;
+
+  let Constraints = "";
+  let DecoderMethod = "DecodeMveVCTP";
+}
+
+def MVE_VCTP8  : MVE_VCTP<"8",  0b00>;
+def MVE_VCTP16 : MVE_VCTP<"16", 0b01>;
+def MVE_VCTP32 : MVE_VCTP<"32", 0b10>;
+def MVE_VCTP64 : MVE_VCTP<"64", 0b11>;
+
+// end of mve_qDest_rSrc
+
+// start of coproc mov
+
+class MVE_VMOV_64bit<dag oops, dag iops, bit to_qreg, string ops, string cstr>
+  : MVE_VMOV_lane_base<oops, !con(iops, (ins MVEPairVectorIndex2:$idx,
+                                             MVEPairVectorIndex0:$idx2)),
+                       NoItinerary, "vmov", "", ops, cstr, []> {
+  bits<5> Rt;
+  bits<5> Rt2;
+  bits<4> Qd;
+  bit idx;
+  bit idx2;
+
+  let Inst{31-23} = 0b111011000;
+  let Inst{22} = Qd{3};
+  let Inst{21} = 0b0;
+  let Inst{20} = to_qreg;
+  let Inst{19-16} = Rt2{3-0};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12-5} = 0b01111000;
+  let Inst{4} = idx2;
+  let Inst{3-0} = Rt{3-0};
+}
+
+// The assembly syntax for these instructions mentions the vector
+// register name twice, e.g.
+//
+//    vmov q2[2], q2[0], r0, r1
+//    vmov r0, r1, q2[2], q2[0]
+//
+// which needs a bit of juggling with MC operand handling.
+//
+// For the move _into_ a vector register, the MC operand list also has
+// to mention the register name twice: once as the output, and once as
+// an extra input to represent where the unchanged half of the output
+// register comes from (when this instruction is used in code
+// generation). So we arrange that the first mention of the vector reg
+// in the instruction is considered by the AsmMatcher to be the output
+// ($Qd), and the second one is the input ($QdSrc). Binding them
+// together with the existing 'tie' constraint is enough to enforce at
+// register allocation time that they have to be the same register.
+//
+// For the move _from_ a vector register, there's no way to get round
+// the fact that both instances of that register name have to be
+// inputs. They have to be the same register again, but this time, we
+// can't use a tie constraint, because that has to be between an
+// output and an input operand. So this time, we have to arrange that
+// the q-reg appears just once in the MC operand list, in spite of
+// being mentioned twice in the asm syntax - which needs a custom
+// AsmMatchConverter.
+
+def MVE_VMOV_q_rr : MVE_VMOV_64bit<(outs MQPR:$Qd),
+                                   (ins MQPR:$QdSrc, rGPR:$Rt, rGPR:$Rt2),
+                                   0b1, "$Qd$idx, $QdSrc$idx2, $Rt, $Rt2",
+                                   "$Qd = $QdSrc"> {
+  let DecoderMethod = "DecodeMVEVMOVDRegtoQ";
+}
+
+def MVE_VMOV_rr_q : MVE_VMOV_64bit<(outs rGPR:$Rt, rGPR:$Rt2), (ins MQPR:$Qd),
+                                   0b0, "$Rt, $Rt2, $Qd$idx, $Qd$idx2", ""> {
+  let DecoderMethod = "DecodeMVEVMOVQtoDReg";
+  let AsmMatchConverter = "cvtMVEVMOVQtoDReg";
+}
+
+// end of coproc mov
+
+// start of MVE interleaving load/store
+
+// Base class for the family of interleaving/deinterleaving
+// load/stores with names like VLD20.8 and VST43.32.
+class MVE_vldst24_base<bit writeback, bit fourregs, bits<2> stage, bits<2> size,
+                       bit load, dag Oops, dag loadIops, dag wbIops,
+                       string iname, string ops,
+                       string cstr, list<dag> pattern=[]>
+  : MVE_MI<Oops, !con(loadIops, wbIops), NoItinerary, iname, ops, cstr, pattern> {
+  bits<4> VQd;
+  bits<4> Rn;
+
+  let Inst{31-22} = 0b1111110010;
+  let Inst{21} = writeback;
+  let Inst{20} = load;
+  let Inst{19-16} = Rn;
+  let Inst{15-13} = VQd{2-0};
+  let Inst{12-9} = 0b1111;
+  let Inst{8-7} = size;
+  let Inst{6-5} = stage;
+  let Inst{4-1} = 0b0000;
+  let Inst{0} = fourregs;
+
+  let mayLoad = load;
+  let mayStore = !eq(load,0);
+}
+
+// A parameter class used to encapsulate all the ways the writeback
+// variants of VLD20 and friends differ from the non-writeback ones.
+class MVE_vldst24_writeback<bit b, dag Oo, dag Io,
+                            string sy="", string c="", string n=""> {
+  bit writeback = b;
+  dag Oops = Oo;
+  dag Iops = Io;
+  string syntax = sy;
+  string cstr = c;
+  string id_suffix = n;
+}
+
+// Another parameter class that encapsulates the differences between VLD2x
+// and VLD4x.
+class MVE_vldst24_nvecs<int n, list<int> s, bit b, RegisterOperand vl> {
+  int nvecs = n;
+  list<int> stages = s;
+  bit bit0 = b;
+  RegisterOperand VecList = vl;
+}
+
+// A third parameter class that distinguishes VLDnn.8 from .16 from .32.
+class MVE_vldst24_lanesize<int i, bits<2> b> {
+  int lanesize = i;
+  bits<2> sizebits = b;
+}
+
+// A base class for each direction of transfer: one for load, one for
+// store. I can't make these a fourth independent parametric tuple
+// class, because they have to take the nvecs tuple class as a
+// parameter, in order to find the right VecList operand type.
+
+class MVE_vld24_base<MVE_vldst24_nvecs n, bits<2> pat, bits<2> size,
+                     MVE_vldst24_writeback wb, string iname,
+                     list<dag> pattern=[]>
+  : MVE_vldst24_base<wb.writeback, n.bit0, pat, size, 1,
+                     !con((outs n.VecList:$VQd), wb.Oops),
+                     (ins n.VecList:$VQdSrc), wb.Iops,
+                     iname, "$VQd, $Rn" # wb.syntax,
+                     wb.cstr # ",$VQdSrc = $VQd", pattern>;
+
+class MVE_vst24_base<MVE_vldst24_nvecs n, bits<2> pat, bits<2> size,
+                     MVE_vldst24_writeback wb, string iname,
+                     list<dag> pattern=[]>
+  : MVE_vldst24_base<wb.writeback, n.bit0, pat, size, 0,
+                     wb.Oops, (ins n.VecList:$VQd), wb.Iops,
+                     iname, "$VQd, $Rn" # wb.syntax,
+                     wb.cstr, pattern>;
+
+// Actually define all the interleaving loads and stores, by a series
+// of nested foreaches over number of vectors (VLD2/VLD4); stage
+// within one of those series (VLDx0/VLDx1/VLDx2/VLDx3); size of
+// vector lane; writeback or no writeback.
+foreach n = [MVE_vldst24_nvecs<2, [0,1],     0, VecList2Q>,
+             MVE_vldst24_nvecs<4, [0,1,2,3], 1, VecList4Q>] in
+foreach stage = n.stages in
+foreach s = [MVE_vldst24_lanesize< 8, 0b00>,
+             MVE_vldst24_lanesize<16, 0b01>,
+             MVE_vldst24_lanesize<32, 0b10>] in
+foreach wb = [MVE_vldst24_writeback<
+                1, (outs rGPR:$wb), (ins t2_nosp_addr_offset_none:$Rn),
+                "!", "$Rn.base = $wb", "_wb">,
+              MVE_vldst24_writeback<0, (outs), (ins t2_addr_offset_none:$Rn)>] in {
+
+  // For each case within all of those foreaches, define the actual
+  // instructions. The def names are made by gluing together pieces
+  // from all the parameter classes, and will end up being things like
+  // MVE_VLD20_8 and MVE_VST43_16_wb.
+
+  def "MVE_VLD" # n.nvecs # stage # "_" # s.lanesize # wb.id_suffix
+    : MVE_vld24_base<n, stage, s.sizebits, wb,
+                     "vld" # n.nvecs # stage # "." # s.lanesize>;
+
+  def "MVE_VST" # n.nvecs # stage # "_" # s.lanesize # wb.id_suffix
+    : MVE_vst24_base<n, stage, s.sizebits, wb,
+                     "vst" # n.nvecs # stage # "." # s.lanesize>;
+}
+
+// end of MVE interleaving load/store
+
+// start of MVE predicable load/store
+
+// A parameter class for the direction of transfer.
+class MVE_ldst_direction<bit b, dag Oo, dag Io, string c=""> {
+  bit load = b;
+  dag Oops = Oo;
+  dag Iops = Io;
+  string cstr = c;
+}
+def MVE_ld: MVE_ldst_direction<1, (outs MQPR:$Qd), (ins), ",@earlyclobber $Qd">;
+def MVE_st: MVE_ldst_direction<0, (outs), (ins MQPR:$Qd)>;
+
+// A parameter class for the size of memory access in a load.
+class MVE_memsz<bits<2> e, int s, AddrMode m, string mn, list<string> types> {
+  bits<2> encoding = e;         // opcode bit(s) for encoding
+  int shift = s;                // shift applied to immediate load offset
+  AddrMode AM = m;
+
+  // For instruction aliases: define the complete list of type
+  // suffixes at this size, and the canonical ones for loads and
+  // stores.
+  string MnemonicLetter = mn;
+  int TypeBits = !shl(8, s);
+  string CanonLoadSuffix = ".u" # TypeBits;
+  string CanonStoreSuffix = "." # TypeBits;
+  list<string> suffixes = !foreach(letter, types, "." # letter # TypeBits);
+}
+
+// Instances of MVE_memsz.
+//
+// (memD doesn't need an AddrMode, because those are only for
+// contiguous loads, and memD is only used by gather/scatters.)
+def MVE_memB: MVE_memsz<0b00, 0, AddrModeT2_i7,   "b", ["", "u", "s"]>;
+def MVE_memH: MVE_memsz<0b01, 1, AddrModeT2_i7s2, "h", ["", "u", "s", "f"]>;
+def MVE_memW: MVE_memsz<0b10, 2, AddrModeT2_i7s4, "w", ["", "u", "s", "f"]>;
+def MVE_memD: MVE_memsz<0b11, 3, ?,               "d", ["", "u", "s", "f"]>;
+
+// This is the base class for all the MVE loads and stores other than
+// the interleaving ones. All the non-interleaving loads/stores share
+// the characteristic that they operate on just one vector register,
+// so they are VPT-predicable.
+//
+// The predication operand is vpred_n, for both loads and stores. For
+// store instructions, the reason is obvious: if there is no output
+// register, there can't be a need for an input parameter giving the
+// output register's previous value. Load instructions also don't need
+// that input parameter, because unlike MVE data processing
+// instructions, predicated loads are defined to set the inactive
+// lanes of the output register to zero, instead of preserving their
+// input values.
+class MVE_VLDRSTR_base<MVE_ldst_direction dir, bit U, bit P, bit W, bit opc,
+                       dag oops, dag iops, string asm, string suffix,
+                       string ops, string cstr, list<dag> pattern=[]>
+ : MVE_p<oops, iops, NoItinerary, asm, suffix, ops, vpred_n, cstr, pattern> {
+  bits<3> Qd;
+
+  let Inst{28} = U;
+  let Inst{25} = 0b0;
+  let Inst{24} = P;
+  let Inst{22} = 0b0;
+  let Inst{21} = W;
+  let Inst{20} = dir.load;
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12} = opc;
+  let Inst{11-9} = 0b111;
+
+  let mayLoad = dir.load;
+  let mayStore = !eq(dir.load,0);
+}
+
+// Contiguous load and store instructions. These come in two main
+// categories: same-size loads/stores in which 128 bits of vector
+// register is transferred to or from 128 bits of memory in the most
+// obvious way, and widening loads / narrowing stores, in which the
+// size of memory accessed is less than the size of a vector register,
+// so the load instructions sign- or zero-extend each memory value
+// into a wider vector lane, and the store instructions truncate
+// correspondingly.
+//
+// The instruction mnemonics for these two classes look reasonably
+// similar, but the actual encodings are different enough to need two
+// separate base classes.
+
+// Contiguous, same size
+class MVE_VLDRSTR_cs<MVE_ldst_direction dir, MVE_memsz memsz, bit P, bit W,
+                     dag oops, dag iops, string asm, string suffix,
+                     IndexMode im, string ops, string cstr>
+  : MVE_VLDRSTR_base<dir, 0, P, W, 1, oops, iops, asm, suffix, ops, cstr> {
+  bits<12> addr;
+  let Inst{23} = addr{7};
+  let Inst{19-16} = addr{11-8};
+  let Inst{8-7} = memsz.encoding;
+  let Inst{6-0} = addr{6-0};
+}
+
+// Contiguous, widening/narrowing
+class MVE_VLDRSTR_cw<MVE_ldst_direction dir, MVE_memsz memsz, bit U,
+                     bit P, bit W, bits<2> size, dag oops, dag iops,
+                     string asm, string suffix, IndexMode im,
+                     string ops, string cstr>
+  : MVE_VLDRSTR_base<dir, U, P, W, 0, oops, iops, asm, suffix, ops, cstr> {
+  bits<11> addr;
+  let Inst{23} = addr{7};
+  let Inst{19} = memsz.encoding{0}; // enough to tell 16- from 32-bit
+  let Inst{18-16} = addr{10-8};
+  let Inst{8-7} = size;
+  let Inst{6-0} = addr{6-0};
+
+  let IM = im;
+}
+
+// Multiclass wrapper on each of the _cw and _cs base classes, to
+// generate three writeback modes (none, preindex, postindex).
+
+multiclass MVE_VLDRSTR_cw_m<MVE_ldst_direction dir, MVE_memsz memsz,
+                            string asm, string suffix, bit U, bits<2> size> {
+  let AM = memsz.AM in {
+    def "" : MVE_VLDRSTR_cw<
+        dir, memsz, U, 1, 0, size,
+        dir.Oops, !con(dir.Iops, (ins taddrmode_imm7<memsz.shift>:$addr)),
+        asm, suffix, IndexModeNone, "$Qd, $addr", "">;
+
+    def _pre : MVE_VLDRSTR_cw<
+        dir, memsz, U, 1, 1, size,
+        !con((outs tGPR:$wb), dir.Oops),
+        !con(dir.Iops, (ins taddrmode_imm7<memsz.shift>:$addr)),
+        asm, suffix, IndexModePre, "$Qd, $addr!", "$addr.base = $wb"> {
+      let DecoderMethod = "DecodeMVE_MEM_1_pre<"#memsz.shift#">";
+    }
+
+    def _post : MVE_VLDRSTR_cw<
+        dir, memsz, U, 0, 1, size,
+        !con((outs tGPR:$wb), dir.Oops),
+        !con(dir.Iops, (ins t_addr_offset_none:$Rn,
+                            t2am_imm7_offset<memsz.shift>:$addr)),
+        asm, suffix, IndexModePost, "$Qd, $Rn$addr", "$Rn.base = $wb"> {
+      bits<4> Rn;
+      let Inst{18-16} = Rn{2-0};
+    }
+  }
+}
+
+multiclass MVE_VLDRSTR_cs_m<MVE_ldst_direction dir, MVE_memsz memsz,
+                            string asm, string suffix> {
+  let AM = memsz.AM in {
+    def "" : MVE_VLDRSTR_cs<
+        dir, memsz, 1, 0,
+        dir.Oops, !con(dir.Iops, (ins t2addrmode_imm7<memsz.shift>:$addr)),
+        asm, suffix, IndexModeNone, "$Qd, $addr", "">;
+
+    def _pre : MVE_VLDRSTR_cs<
+        dir, memsz, 1, 1,
+        !con((outs rGPR:$wb), dir.Oops),
+        !con(dir.Iops, (ins t2addrmode_imm7_pre<memsz.shift>:$addr)),
+        asm, suffix, IndexModePre, "$Qd, $addr!", "$addr.base = $wb"> {
+      let DecoderMethod = "DecodeMVE_MEM_2_pre<"#memsz.shift#">";
+    }
+
+    def _post : MVE_VLDRSTR_cs<
+        dir, memsz, 0, 1,
+        !con((outs rGPR:$wb), dir.Oops),
+        // We need an !if here to select the base register class,
+        // because it's legal to write back to SP in a load of this
+        // type, but not in a store.
+        !con(dir.Iops, (ins !if(dir.load, t2_addr_offset_none,
+                                          t2_nosp_addr_offset_none):$Rn,
+                            t2am_imm7_offset<memsz.shift>:$addr)),
+        asm, suffix, IndexModePost, "$Qd, $Rn$addr", "$Rn.base = $wb"> {
+      bits<4> Rn;
+      let Inst{19-16} = Rn{3-0};
+    }
+  }
+}
+
+// Now actually declare all the contiguous load/stores, via those
+// multiclasses. The instruction ids coming out of this are the bare
+// names shown in the defm, with _pre or _post appended for writeback,
+// e.g. MVE_VLDRBS16, MVE_VSTRB16_pre, MVE_VSTRHU16_post.
+
+defm MVE_VLDRBS16: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memB, "vldrb", "s16", 0, 0b01>;
+defm MVE_VLDRBS32: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memB, "vldrb", "s32", 0, 0b10>;
+defm MVE_VLDRBU16: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memB, "vldrb", "u16", 1, 0b01>;
+defm MVE_VLDRBU32: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memB, "vldrb", "u32", 1, 0b10>;
+defm MVE_VLDRHS32: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memH, "vldrh", "s32", 0, 0b10>;
+defm MVE_VLDRHU32: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memH, "vldrh", "u32", 1, 0b10>;
+
+defm MVE_VLDRBU8:  MVE_VLDRSTR_cs_m<MVE_ld, MVE_memB, "vldrb", "u8">;
+defm MVE_VLDRHU16: MVE_VLDRSTR_cs_m<MVE_ld, MVE_memH, "vldrh", "u16">;
+defm MVE_VLDRWU32: MVE_VLDRSTR_cs_m<MVE_ld, MVE_memW, "vldrw", "u32">;
+
+defm MVE_VSTRB16:  MVE_VLDRSTR_cw_m<MVE_st, MVE_memB, "vstrb", "16",  0, 0b01>;
+defm MVE_VSTRB32:  MVE_VLDRSTR_cw_m<MVE_st, MVE_memB, "vstrb", "32",  0, 0b10>;
+defm MVE_VSTRH32:  MVE_VLDRSTR_cw_m<MVE_st, MVE_memH, "vstrh", "32",  0, 0b10>;
+
+defm MVE_VSTRBU8 : MVE_VLDRSTR_cs_m<MVE_st, MVE_memB, "vstrb", "8">;
+defm MVE_VSTRHU16: MVE_VLDRSTR_cs_m<MVE_st, MVE_memH, "vstrh", "16">;
+defm MVE_VSTRWU32: MVE_VLDRSTR_cs_m<MVE_st, MVE_memW, "vstrw", "32">;
+
+// Gather loads / scatter stores whose address operand is of the form
+// [Rn,Qm], i.e. a single GPR as the common base address, plus a
+// vector of offset from it. ('Load/store this sequence of elements of
+// the same array.')
+//
+// Like the contiguous family, these loads and stores can widen the
+// loaded values / truncate the stored ones, or they can just
+// load/store the same size of memory and vector lane. But unlike the
+// contiguous family, there's no particular difference in encoding
+// between those two cases.
+//
+// This family also comes with the option to scale the offset values
+// in Qm by the size of the loaded memory (i.e. to treat them as array
+// indices), or not to scale them (to treat them as plain byte offsets
+// in memory, so that perhaps the loaded values are unaligned). The
+// scaled instructions' address operand in assembly looks like
+// [Rn,Qm,UXTW #2] or similar.
+
+// Base class.
+class MVE_VLDRSTR_rq<MVE_ldst_direction dir, MVE_memsz memsz, bit U,
+                     bits<2> size, bit os, string asm, string suffix, int shift>
+  : MVE_VLDRSTR_base<dir, U, 0b0, 0b0, 0, dir.Oops,
+                     !con(dir.Iops, (ins mve_addr_rq_shift<shift>:$addr)),
+                     asm, suffix, "$Qd, $addr", dir.cstr> {
+  bits<7> addr;
+  let Inst{23} = 0b1;
+  let Inst{19-16} = addr{6-3};
+  let Inst{8-7} = size;
+  let Inst{6} = memsz.encoding{1};
+  let Inst{5} = 0;
+  let Inst{4} = memsz.encoding{0};
+  let Inst{3-1} = addr{2-0};
+  let Inst{0} = os;
+}
+
+// Multiclass that defines the scaled and unscaled versions of an
+// instruction, when the memory size is wider than a byte. The scaled
+// version gets the default name like MVE_VLDRBU16_rq; the unscaled /
+// potentially unaligned version gets a "_u" suffix, e.g.
+// MVE_VLDRBU16_rq_u.
+multiclass MVE_VLDRSTR_rq_w<MVE_ldst_direction dir, MVE_memsz memsz,
+                            string asm, string suffix, bit U, bits<2> size> {
+  def _u : MVE_VLDRSTR_rq<dir, memsz, U, size, 0, asm, suffix, 0>;
+  def "" : MVE_VLDRSTR_rq<dir, memsz, U, size, 1, asm, suffix, memsz.shift>;
+}
+
+// Subclass of MVE_VLDRSTR_rq with the same API as that multiclass,
+// for use when the memory size is one byte, so there's no 'scaled'
+// version of the instruction at all. (This is encoded as if it were
+// unscaled, but named in the default way with no _u suffix.)
+class MVE_VLDRSTR_rq_b<MVE_ldst_direction dir, MVE_memsz memsz,
+                       string asm, string suffix, bit U, bits<2> size>
+  : MVE_VLDRSTR_rq<dir, memsz, U, size, 0, asm, suffix, 0>;
+
+// Actually define all the loads and stores in this family.
+
+def  MVE_VLDRBU8_rq : MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","u8",  1,0b00>;
+def  MVE_VLDRBU16_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","u16", 1,0b01>;
+def  MVE_VLDRBS16_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","s16", 0,0b01>;
+def  MVE_VLDRBU32_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","u32", 1,0b10>;
+def  MVE_VLDRBS32_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","s32", 0,0b10>;
+
+defm MVE_VLDRHU16_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memH, "vldrh","u16", 1,0b01>;
+defm MVE_VLDRHU32_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memH, "vldrh","u32", 1,0b10>;
+defm MVE_VLDRHS32_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memH, "vldrh","s32", 0,0b10>;
+defm MVE_VLDRWU32_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memW, "vldrw","u32", 1,0b10>;
+defm MVE_VLDRDU64_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memD, "vldrd","u64", 1,0b11>;
+
+def  MVE_VSTRB8_rq  : MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb","8",   0,0b00>;
+def  MVE_VSTRB16_rq : MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb","16",  0,0b01>;
+def  MVE_VSTRB32_rq : MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb","32",  0,0b10>;
+
+defm MVE_VSTRH16_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memH, "vstrh","16",  0,0b01>;
+defm MVE_VSTRH32_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memH, "vstrh","32",  0,0b10>;
+defm MVE_VSTRW32_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memW, "vstrw","32",  0,0b10>;
+defm MVE_VSTRD64_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memD, "vstrd","64",  0,0b11>;
+
+// Gather loads / scatter stores whose address operand is of the form
+// [Qm,#imm], i.e. a vector containing a full base address for each
+// loaded item, plus an immediate offset applied consistently to all
+// of them. ('Load/store the same field from this vector of pointers
+// to a structure type.')
+//
+// This family requires the vector lane size to be at least 32 bits
+// (so there's room for an address in each lane at all). It has no
+// widening/narrowing variants. But it does support preindex
+// writeback, in which the address vector is updated to hold the
+// addresses actually loaded from.
+
+// Base class.
+class MVE_VLDRSTR_qi<MVE_ldst_direction dir, MVE_memsz memsz, bit W, dag wbops,
+                     string asm, string wbAsm, string suffix, string cstr = "">
+  : MVE_VLDRSTR_base<dir, 1, 1, W, 1, !con(wbops, dir.Oops),
+                     !con(dir.Iops, (ins mve_addr_q_shift<memsz.shift>:$addr)),
+                     asm, suffix, "$Qd, $addr" # wbAsm, cstr # dir.cstr> {
+  bits<11> addr;
+  let Inst{23} = addr{7};
+  let Inst{19-17} = addr{10-8};
+  let Inst{16} = 0;
+  let Inst{8} = memsz.encoding{0}; // enough to distinguish 32- from 64-bit
+  let Inst{7} = 0;
+  let Inst{6-0} = addr{6-0};
+}
+
+// Multiclass that generates the non-writeback and writeback variants.
+multiclass MVE_VLDRSTR_qi_m<MVE_ldst_direction dir, MVE_memsz memsz,
+                            string asm, string suffix> {
+  def ""   : MVE_VLDRSTR_qi<dir, memsz, 0, (outs),          asm, "",  suffix>;
+  def _pre : MVE_VLDRSTR_qi<dir, memsz, 1, (outs MQPR:$wb), asm, "!", suffix,
+                            "$addr.base = $wb"> {
+    let DecoderMethod="DecodeMVE_MEM_3_pre<"#memsz.shift#">";
+  }
+}
+
+// Actual instruction definitions.
+defm MVE_VLDRWU32_qi: MVE_VLDRSTR_qi_m<MVE_ld, MVE_memW, "vldrw", "u32">;
+defm MVE_VLDRDU64_qi: MVE_VLDRSTR_qi_m<MVE_ld, MVE_memD, "vldrd", "u64">;
+defm MVE_VSTRW32_qi:  MVE_VLDRSTR_qi_m<MVE_st, MVE_memW, "vstrw", "32">;
+defm MVE_VSTRD64_qi:  MVE_VLDRSTR_qi_m<MVE_st, MVE_memD, "vstrd", "64">;
+
+// Define aliases for all the instructions where memory size and
+// vector lane size are the same. These are mnemonic aliases, so they
+// apply consistently across all of the above families - contiguous
+// loads, and both the rq and qi types of gather/scatter.
+//
+// Rationale: As long as you're loading (for example) 16-bit memory
+// values into 16-bit vector lanes, you can think of them as signed or
+// unsigned integers, fp16 or just raw 16-bit blobs and it makes no
+// difference. So we permit all of vldrh.16, vldrh.u16, vldrh.s16,
+// vldrh.f16 and treat them all as equivalent to the canonical
+// spelling (which happens to be .u16 for loads, and just .16 for
+// stores).
+
+foreach vpt_cond = ["", "t", "e"] in
+foreach memsz = [MVE_memB, MVE_memH, MVE_memW, MVE_memD] in
+foreach suffix = memsz.suffixes in {
+
+  // These foreaches are conceptually ifs, implemented by iterating a
+  // dummy variable over a list with 0 or 1 elements depending on the
+  // condition. The idea is to iterate over _nearly_ all the suffixes
+  // in memsz.suffixes, but omit the one we want all the others to alias.
+
+  foreach _ = !if(!ne(suffix, memsz.CanonLoadSuffix), [1], []<int>) in
+  def : MnemonicAlias<
+    "vldr" # memsz.MnemonicLetter # vpt_cond # suffix,
+    "vldr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonLoadSuffix>;
+
+  foreach _ = !if(!ne(suffix, memsz.CanonStoreSuffix), [1], []<int>) in
+  def : MnemonicAlias<
+    "vstr" # memsz.MnemonicLetter # vpt_cond # suffix,
+    "vstr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonStoreSuffix>;
+}
+
+// end of MVE predicable load/store
+
+class MVE_VPT<string suffix, bits<2> size, dag iops, string asm, list<dag> pattern=[]>
+  : MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm, "", pattern> {
+  bits<3> fc;
+  bits<4> Mk;
+  bits<3> Qn;
+
+  let Inst{31-23} = 0b111111100;
+  let Inst{22} = Mk{3};
+  let Inst{21-20} = size;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b1;
+  let Inst{15-13} = Mk{2-0};
+  let Inst{12} = fc{2};
+  let Inst{11-8} = 0b1111;
+  let Inst{7} = fc{0};
+  let Inst{4} = 0b0;
+
+  let Defs = [VPR, P0];
+}
+
+class MVE_VPTt1<string suffix, bits<2> size, dag iops>
+  : MVE_VPT<suffix, size, iops, "$fc, $Qn, $Qm"> {
+  bits<4> Qm;
+  bits<4> Mk;
+
+  let Inst{6} = 0b0;
+  let Inst{5} = Qm{3};
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = fc{1};
+}
+
+class MVE_VPTt1i<string suffix, bits<2> size>
+ : MVE_VPTt1<suffix, size,
+           (ins vpt_mask:$Mk, pred_basic_i:$fc, MQPR:$Qn, MQPR:$Qm)> {
+  let Inst{12} = 0b0;
+  let Inst{0} = 0b0;
+}
+
+def MVE_VPTv4i32 : MVE_VPTt1i<"i32", 0b10>;
+def MVE_VPTv8i16 : MVE_VPTt1i<"i16", 0b01>;
+def MVE_VPTv16i8 : MVE_VPTt1i<"i8", 0b00>;
+
+class MVE_VPTt1u<string suffix, bits<2> size>
+ : MVE_VPTt1<suffix, size,
+           (ins vpt_mask:$Mk, pred_basic_u:$fc, MQPR:$Qn, MQPR:$Qm)> {
+  let Inst{12} = 0b0;
+  let Inst{0} = 0b1;
+}
+
+def MVE_VPTv4u32 : MVE_VPTt1u<"u32", 0b10>;
+def MVE_VPTv8u16 : MVE_VPTt1u<"u16", 0b01>;
+def MVE_VPTv16u8 : MVE_VPTt1u<"u8", 0b00>;
+
+class MVE_VPTt1s<string suffix, bits<2> size>
+ : MVE_VPTt1<suffix, size,
+           (ins vpt_mask:$Mk, pred_basic_s:$fc, MQPR:$Qn, MQPR:$Qm)> {
+  let Inst{12} = 0b1;
+}
+
+def MVE_VPTv4s32 : MVE_VPTt1s<"s32", 0b10>;
+def MVE_VPTv8s16 : MVE_VPTt1s<"s16", 0b01>;
+def MVE_VPTv16s8 : MVE_VPTt1s<"s8", 0b00>;
+
+class MVE_VPTt2<string suffix, bits<2> size, dag iops>
+  : MVE_VPT<suffix, size, iops,
+          "$fc, $Qn, $Rm"> {
+  bits<4> Rm;
+  bits<3> fc;
+  bits<4> Mk;
+
+  let Inst{6} = 0b1;
+  let Inst{5} = fc{1};
+  let Inst{3-0} = Rm{3-0};
+}
+
+class MVE_VPTt2i<string suffix, bits<2> size>
+  : MVE_VPTt2<suffix, size,
+            (ins vpt_mask:$Mk, pred_basic_i:$fc, MQPR:$Qn, GPRwithZR:$Rm)> {
+  let Inst{12} = 0b0;
+  let Inst{5} = 0b0;
+}
+
+def MVE_VPTv4i32r : MVE_VPTt2i<"i32", 0b10>;
+def MVE_VPTv8i16r : MVE_VPTt2i<"i16", 0b01>;
+def MVE_VPTv16i8r : MVE_VPTt2i<"i8", 0b00>;
+
+class MVE_VPTt2u<string suffix, bits<2> size>
+  : MVE_VPTt2<suffix, size,
+            (ins vpt_mask:$Mk, pred_basic_u:$fc, MQPR:$Qn, GPRwithZR:$Rm)> {
+  let Inst{12} = 0b0;
+  let Inst{5} = 0b1;
+}
+
+def MVE_VPTv4u32r : MVE_VPTt2u<"u32", 0b10>;
+def MVE_VPTv8u16r : MVE_VPTt2u<"u16", 0b01>;
+def MVE_VPTv16u8r : MVE_VPTt2u<"u8", 0b00>;
+
+class MVE_VPTt2s<string suffix, bits<2> size>
+  : MVE_VPTt2<suffix, size,
+            (ins vpt_mask:$Mk, pred_basic_s:$fc, MQPR:$Qn, GPRwithZR:$Rm)> {
+  let Inst{12} = 0b1;
+}
+
+def MVE_VPTv4s32r : MVE_VPTt2s<"s32", 0b10>;
+def MVE_VPTv8s16r : MVE_VPTt2s<"s16", 0b01>;
+def MVE_VPTv16s8r : MVE_VPTt2s<"s8", 0b00>;
+
+
+class MVE_VPTf<string suffix, bit size, dag iops, string asm, list<dag> pattern=[]>
+  : MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm,
+            "", pattern> {
+  bits<3> fc;
+  bits<4> Mk;
+  bits<3> Qn;
+
+  let Inst{31-29} = 0b111;
+  let Inst{28} = size;
+  let Inst{27-23} = 0b11100;
+  let Inst{22} = Mk{3};
+  let Inst{21-20} = 0b11;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b1;
+  let Inst{15-13} = Mk{2-0};
+  let Inst{12} = fc{2};
+  let Inst{11-8} = 0b1111;
+  let Inst{7} = fc{0};
+  let Inst{4} = 0b0;
+
+  let Defs = [P0];
+  let Predicates = [HasMVEFloat];
+}
+
+class MVE_VPTft1<string suffix, bit size>
+  : MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, pred_basic_fp:$fc, MQPR:$Qn, MQPR:$Qm),
+          "$fc, $Qn, $Qm"> {
+  bits<3> fc;
+  bits<4> Qm;
+
+  let Inst{6} = 0b0;
+  let Inst{5} = Qm{3};
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = fc{1};
+}
+
+def MVE_VPTv4f32         : MVE_VPTft1<"f32", 0b0>;
+def MVE_VPTv8f16         : MVE_VPTft1<"f16", 0b1>;
+
+class MVE_VPTft2<string suffix, bit size>
+  : MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, pred_basic_fp:$fc, MQPR:$Qn, GPRwithZR:$Rm),
+          "$fc, $Qn, $Rm"> {
+  bits<3> fc;
+  bits<4> Rm;
+
+  let Inst{6} = 0b1;
+  let Inst{5} = fc{1};
+  let Inst{3-0} = Rm{3-0};
+}
+
+def MVE_VPTv4f32r        : MVE_VPTft2<"f32", 0b0>;
+def MVE_VPTv8f16r        : MVE_VPTft2<"f16", 0b1>;
+
+def MVE_VPST : MVE_MI<(outs ), (ins vpt_mask:$Mk), NoItinerary,
+       !strconcat("vpst", "${Mk}"), "", "", []> {
+  bits<4> Mk;
+
+  let Inst{31-23} = 0b111111100;
+  let Inst{22} = Mk{3};
+  let Inst{21-16} = 0b110001;
+  let Inst{15-13} = Mk{2-0};
+  let Inst{12-0} = 0b0111101001101;
+  let Unpredictable{12} = 0b1;
+  let Unpredictable{7} = 0b1;
+  let Unpredictable{5} = 0b1;
+
+  let Defs = [P0];
+}
+
+def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary,
+                      "vpsel", "", "$Qd, $Qn, $Qm", vpred_n, "", []> {
+  bits<4> Qn;
+  bits<4> Qd;
+  bits<4> Qm;
+
+  let Inst{28} = 0b1;
+  let Inst{25-23} = 0b100;
+  let Inst{22} = Qd{3};
+  let Inst{21-20} = 0b11;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b1;
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12-9} = 0b0111;
+  let Inst{8} = 0b1;
+  let Inst{7} = Qn{3};
+  let Inst{6} = 0b0;
+  let Inst{5} = Qm{3};
+  let Inst{4} = 0b0;
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = 0b1;
+}
+
+foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32",
+                  "i8", "i16", "i32",       "f16", "f32"] in
+def : MVEInstAlias<"vpsel${vp}." # suffix # "\t$Qd, $Qn, $Qm",
+                   (MVE_VPSEL MQPR:$Qd, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
+
+def MVE_VPNOT : MVE_p<(outs), (ins), NoItinerary,
+                      "vpnot", "", "", vpred_n, "", []> {
+  let Inst{31-0} = 0b11111110001100010000111101001101;
+  let Unpredictable{19-17} = 0b111;
+  let Unpredictable{12} = 0b1;
+  let Unpredictable{7} = 0b1;
+  let Unpredictable{5} = 0b1;
+  let Defs = [P0];
+  let Uses = [P0];
+
+  let Constraints = "";
+}
+
+class MVE_loltp_start<dag iops, string asm, string ops, bits<2> size>
+  : t2LOL<(outs GPRlr:$LR), iops, asm, ops> {
+  bits<4> Rn;
+  let Predicates = [HasMVEInt];
+  let Inst{22} = 0b0;
+  let Inst{21-20} = size;
+  let Inst{19-16} = Rn{3-0};
+  let Inst{12} = 0b0;
+}
+
+class MVE_DLSTP<string asm, bits<2> size>
+  : MVE_loltp_start<(ins rGPR:$Rn), asm, "$LR, $Rn", size> {
+  let Inst{13} = 0b1;
+  let Inst{11-1} = 0b00000000000;
+  let Unpredictable{10-1} = 0b1111111111;
+}
+
+class MVE_WLSTP<string asm, bits<2> size>
+  : MVE_loltp_start<(ins rGPR:$Rn, wlslabel_u11:$label),
+                    asm, "$LR, $Rn, $label", size> {
+  bits<11> label;
+  let Inst{13} = 0b0;
+  let Inst{11} = label{0};
+  let Inst{10-1} = label{10-1};
+}
+
+def MVE_DLSTP_8  : MVE_DLSTP<"dlstp.8",  0b00>;
+def MVE_DLSTP_16 : MVE_DLSTP<"dlstp.16", 0b01>;
+def MVE_DLSTP_32 : MVE_DLSTP<"dlstp.32", 0b10>;
+def MVE_DLSTP_64 : MVE_DLSTP<"dlstp.64", 0b11>;
+
+def MVE_WLSTP_8  : MVE_WLSTP<"wlstp.8",  0b00>;
+def MVE_WLSTP_16 : MVE_WLSTP<"wlstp.16", 0b01>;
+def MVE_WLSTP_32 : MVE_WLSTP<"wlstp.32", 0b10>;
+def MVE_WLSTP_64 : MVE_WLSTP<"wlstp.64", 0b11>;
+
+class MVE_loltp_end<dag oops, dag iops, string asm, string ops>
+  : t2LOL<oops, iops, asm, ops> {
+  let Predicates = [HasMVEInt];
+  let Inst{22-21} = 0b00;
+  let Inst{19-16} = 0b1111;
+  let Inst{12} = 0b0;
+}
+
+def MVE_LETP : MVE_loltp_end<(outs GPRlr:$LRout),
+                             (ins GPRlr:$LRin, lelabel_u11:$label),
+                             "letp", "$LRin, $label"> {
+  bits<11> label;
+  let Inst{20} = 0b1;
+  let Inst{13} = 0b0;
+  let Inst{11} = label{0};
+  let Inst{10-1} = label{10-1};
+}
+
+def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> {
+  let Inst{20} = 0b0;
+  let Inst{13} = 0b1;
+  let Inst{11-1} = 0b00000000000;
+  let Unpredictable{21-20} = 0b11;
+  let Unpredictable{11-1} = 0b11111111111;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Patterns
+//===----------------------------------------------------------------------===//
+
+class MVE_unpred_vector_store_typed<ValueType Ty, Instruction RegImmInst,
+                                    PatFrag StoreKind, int shift>
+      : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr),
+           (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr)>;
+
+multiclass MVE_unpred_vector_store<Instruction RegImmInst, PatFrag StoreKind,
+                                   int shift> {
+  def : MVE_unpred_vector_store_typed<v16i8, RegImmInst, StoreKind, shift>;
+  def : MVE_unpred_vector_store_typed<v8i16, RegImmInst, StoreKind, shift>;
+  def : MVE_unpred_vector_store_typed<v8f16, RegImmInst, StoreKind, shift>;
+  def : MVE_unpred_vector_store_typed<v4i32, RegImmInst, StoreKind, shift>;
+  def : MVE_unpred_vector_store_typed<v4f32, RegImmInst, StoreKind, shift>;
+  def : MVE_unpred_vector_store_typed<v2i64, RegImmInst, StoreKind, shift>;
+  def : MVE_unpred_vector_store_typed<v2f64, RegImmInst, StoreKind, shift>;
+}
+
+class MVE_unpred_vector_load_typed<ValueType Ty, Instruction RegImmInst,
+                                   PatFrag LoadKind, int shift>
+      : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr)),
+          (Ty (RegImmInst t2addrmode_imm7<shift>:$addr))>;
+
+multiclass MVE_unpred_vector_load<Instruction RegImmInst, PatFrag LoadKind,
+                                  int shift> {
+  def : MVE_unpred_vector_load_typed<v16i8, RegImmInst, LoadKind, shift>;
+  def : MVE_unpred_vector_load_typed<v8i16, RegImmInst, LoadKind, shift>;
+  def : MVE_unpred_vector_load_typed<v8f16, RegImmInst, LoadKind, shift>;
+  def : MVE_unpred_vector_load_typed<v4i32, RegImmInst, LoadKind, shift>;
+  def : MVE_unpred_vector_load_typed<v4f32, RegImmInst, LoadKind, shift>;
+  def : MVE_unpred_vector_load_typed<v2i64, RegImmInst, LoadKind, shift>;
+  def : MVE_unpred_vector_load_typed<v2f64, RegImmInst, LoadKind, shift>;
+}
+
+let Predicates = [HasMVEInt, IsLE] in {
+  defm : MVE_unpred_vector_store<MVE_VSTRBU8, byte_alignedstore, 0>;
+  defm : MVE_unpred_vector_store<MVE_VSTRHU16, hword_alignedstore, 1>;
+  defm : MVE_unpred_vector_store<MVE_VSTRWU32, alignedstore32, 2>;
+
+  defm : MVE_unpred_vector_load<MVE_VLDRBU8, byte_alignedload, 0>;
+  defm : MVE_unpred_vector_load<MVE_VLDRHU16, hword_alignedload, 1>;
+  defm : MVE_unpred_vector_load<MVE_VLDRWU32, alignedload32, 2>;
+
+  def  : Pat<(v16i1 (load t2addrmode_imm7<2>:$addr)),
+             (v16i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
+  def  : Pat<(v8i1 (load t2addrmode_imm7<2>:$addr)),
+             (v8i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
+  def  : Pat<(v4i1 (load t2addrmode_imm7<2>:$addr)),
+             (v4i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
+}
+
+let Predicates = [HasMVEInt, IsBE] in {
+  def : MVE_unpred_vector_store_typed<v16i8, MVE_VSTRBU8, store, 0>;
+  def : MVE_unpred_vector_store_typed<v8i16, MVE_VSTRHU16, alignedstore16, 1>;
+  def : MVE_unpred_vector_store_typed<v8f16, MVE_VSTRHU16, alignedstore16, 1>;
+  def : MVE_unpred_vector_store_typed<v4i32, MVE_VSTRWU32, alignedstore32, 2>;
+  def : MVE_unpred_vector_store_typed<v4f32, MVE_VSTRWU32, alignedstore32, 2>;
+
+  def : MVE_unpred_vector_load_typed<v16i8, MVE_VLDRBU8, load, 0>;
+  def : MVE_unpred_vector_load_typed<v8i16, MVE_VLDRHU16, alignedload16, 1>;
+  def : MVE_unpred_vector_load_typed<v8f16, MVE_VLDRHU16, alignedload16, 1>;
+  def : MVE_unpred_vector_load_typed<v4i32, MVE_VLDRWU32, alignedload32, 2>;
+  def : MVE_unpred_vector_load_typed<v4f32, MVE_VLDRWU32, alignedload32, 2>;
+}
+
+
+// Widening/Narrowing Loads/Stores
+
+let Predicates = [HasMVEInt] in {
+  def : Pat<(truncstorevi8  (v8i16 MQPR:$val), t2addrmode_imm7<1>:$addr),
+             (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<1>:$addr)>;
+  def : Pat<(truncstorevi8  (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr),
+             (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<1>:$addr)>;
+  def : Pat<(truncstorevi16 (v4i32 MQPR:$val), t2addrmode_imm7<2>:$addr),
+             (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<2>:$addr)>;
+}
+
+multiclass MVEExtLoad<string DestLanes, string DestElemBits,
+                      string SrcElemBits, string SrcElemType,
+                      Operand am> {
+  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
+                   (!cast<PatFrag>("extloadvi"  # SrcElemBits) am:$addr)),
+                 (!cast<Instruction>("MVE_VLDR" # SrcElemType # "U" # DestElemBits)
+                   am:$addr)>;
+  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
+                   (!cast<PatFrag>("zextloadvi"  # SrcElemBits) am:$addr)),
+                 (!cast<Instruction>("MVE_VLDR" # SrcElemType # "U" # DestElemBits)
+                   am:$addr)>;
+  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
+                   (!cast<PatFrag>("sextloadvi"  # SrcElemBits) am:$addr)),
+                 (!cast<Instruction>("MVE_VLDR" # SrcElemType # "S" # DestElemBits)
+                   am:$addr)>;
+}
+
+let Predicates = [HasMVEInt] in {
+  defm : MVEExtLoad<"4", "32", "8",  "B", t2addrmode_imm7<1>>;
+  defm : MVEExtLoad<"8", "16", "8",  "B", t2addrmode_imm7<1>>;
+  defm : MVEExtLoad<"4", "32", "16", "H", t2addrmode_imm7<2>>;
+}
+
+
+// Bit convert patterns
+
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
+  def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
+
+  def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
+  def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
+
+  def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16  QPR:$src)>;
+  def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16  QPR:$src)>;
+}
+
+let Predicates = [IsLE,HasMVEInt] in {
+  def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
+  def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
+  def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
+  def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
+  def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
+
+  def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
+  def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
+  def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>;
+  def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
+  def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
+
+  def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
+  def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
+  def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>;
+  def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
+  def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
+
+  def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
+  def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
+  def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>;
+  def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
+  def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
+
+  def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>;
+  def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>;
+  def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>;
+  def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>;
+  def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>;
+
+  def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
+  def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
+  def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
+  def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
+  def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
+
+  def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
+  def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
+  def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
+  def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
+  def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>;
+  def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
+}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 96986e74415b..806681df102c 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -1,9 +1,8 @@
 //===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -497,45 +496,30 @@ def NEONvtst      : SDNode<"ARMISD::VTST", SDTARMVCMP>;
 // Types for vector shift by immediates.  The "SHX" version is for long and
 // narrow operations where the source and destination vectors have different
 // types.  The "SHINS" version is for shift and insert operations.
-def SDTARMVSH     : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
-                                         SDTCisVT<2, i32>]>;
-def SDTARMVSHX    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
-                                         SDTCisVT<2, i32>]>;
-def SDTARMVSHINS  : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
-                                         SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
-
-def NEONvshl      : SDNode<"ARMISD::VSHL", SDTARMVSH>;
-def NEONvshrs     : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
-def NEONvshru     : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
-def NEONvshrn     : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
+def SDTARMVSHXIMM    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+                                            SDTCisVT<2, i32>]>;
+def SDTARMVSHINSIMM  : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+                                            SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
 
-def NEONvrshrs    : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
-def NEONvrshru    : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
-def NEONvrshrn    : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
+def NEONvshrnImm     : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>;
 
-def NEONvqshls    : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
-def NEONvqshlu    : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
-def NEONvqshlsu   : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
-def NEONvqshrns   : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
-def NEONvqshrnu   : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
-def NEONvqshrnsu  : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
+def NEONvrshrsImm    : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>;
+def NEONvrshruImm    : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>;
+def NEONvrshrnImm    : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>;
 
-def NEONvqrshrns  : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
-def NEONvqrshrnu  : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
-def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
+def NEONvqshlsImm    : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>;
+def NEONvqshluImm    : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>;
+def NEONvqshlsuImm   : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>;
+def NEONvqshrnsImm   : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>;
+def NEONvqshrnuImm   : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>;
+def NEONvqshrnsuImm  : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>;
 
-def NEONvsli      : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
-def NEONvsri      : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
+def NEONvqrshrnsImm  : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>;
+def NEONvqrshrnuImm  : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>;
+def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
 
-def SDTARMVGETLN  : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
-                                         SDTCisVT<2, i32>]>;
-def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
-def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
-
-def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
-def NEONvmovImm   : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
-def NEONvmvnImm   : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
-def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
+def NEONvsliImm      : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
+def NEONvsriImm      : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
 
 def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
                                            SDTCisVT<2, i32>]>;
@@ -548,23 +532,10 @@ def NEONvbsl      : SDNode<"ARMISD::VBSL",
                                                 SDTCisSameAs<0, 2>,
                                                 SDTCisSameAs<0, 3>]>>;
 
-def NEONvdup      : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
-
-// VDUPLANE can produce a quad-register result from a double-register source,
-// so the result is not constrained to match the source.
-def NEONvduplane  : SDNode<"ARMISD::VDUPLANE",
-                           SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
-                                                SDTCisVT<2, i32>]>>;
-
 def SDTARMVEXT    : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
                                          SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
 def NEONvext      : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
 
-def SDTARMVSHUF   : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
-def NEONvrev64    : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
-def NEONvrev32    : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
-def NEONvrev16    : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
-
 def SDTARMVSHUF2  : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
                                          SDTCisSameAs<0, 2>,
                                          SDTCisSameAs<0, 3>]>;
@@ -585,14 +556,14 @@ def NEONvtbl1     : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>;
 def NEONvtbl2     : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
 
 
-def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+def NEONimmAllZerosV: PatLeaf<(ARMvmovImm (i32 timm)), [{
   ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
   unsigned EltBits = 0;
   uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
   return (EltBits == 32 && EltVal == 0);
 }]>;
 
-def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+def NEONimmAllOnesV: PatLeaf<(ARMvmovImm (i32 timm)), [{
   ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
   unsigned EltBits = 0;
   uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
@@ -1118,6 +1089,13 @@ def VLD1LNq8Pseudo  : VLD1QLNPseudo<v16i8, extloadi8>;
 def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
 def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
 
+let Predicates = [HasNEON] in {
+def : Pat<(vector_insert (v4f16 DPR:$src),
+                         (f16 (load addrmode6:$addr)), imm:$lane),
+          (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(vector_insert (v8f16 QPR:$src),
+                         (f16 (load addrmode6:$addr)), imm:$lane),
+          (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
 def : Pat<(vector_insert (v2f32 DPR:$src),
                          (f32 (load addrmode6:$addr)), imm:$lane),
           (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
@@ -1139,6 +1117,7 @@ def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)),
           (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
 def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)),
           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
+}
 
 
 let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
@@ -1404,7 +1383,7 @@ class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
           (ins AddrMode:$Rn),
           IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
           [(set VecListOneDAllLanes:$Vd,
-                (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]>,
+                (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]>,
    Sched<[WriteVLD2]> {
   let Rm = 0b1111;
   let Inst{4} = Rn{4};
@@ -1417,8 +1396,10 @@ def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16,
 def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load,
                          addrmode6dupalign32>;
 
-def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+let Predicates = [HasNEON] in {
+def : Pat<(v2f32 (ARMvdup (f32 (load addrmode6dup:$addr)))),
           (VLD1DUPd32 addrmode6:$addr)>;
+}
 
 class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
                Operand AddrMode>
@@ -1426,7 +1407,7 @@ class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
           (ins AddrMode:$Rn), IIC_VLD1dup,
           "vld1", Dt, "$Vd, $Rn", "",
           [(set VecListDPairAllLanes:$Vd,
-                (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
+                (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
   let Rm = 0b1111;
   let Inst{4} = Rn{4};
   let DecoderMethod = "DecodeVLD1DupInstruction";
@@ -1439,8 +1420,10 @@ def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16,
 def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load,
                           addrmode6dupalign32>;
 
-def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+let Predicates = [HasNEON] in {
+def : Pat<(v4f32 (ARMvdup (f32 (load addrmode6dup:$addr)))),
           (VLD1DUPq32 addrmode6:$addr)>;
+}
 
 let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
 // ...with address register writeback:
@@ -2152,11 +2135,11 @@ class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
 }
 
 def VST1LNd8  : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
-                       NEONvgetlaneu, addrmode6> {
+                       ARMvgetlaneu, addrmode6> {
   let Inst{7-5} = lane{2-0};
 }
 def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
-                       NEONvgetlaneu, addrmode6> {
+                       ARMvgetlaneu, addrmode6> {
   let Inst{7-6} = lane{1-0};
   let Inst{4}   = Rn{4};
 }
@@ -2167,15 +2150,22 @@ def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
   let Inst{5-4} = Rn{5-4};
 }
 
-def VST1LNq8Pseudo  : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
-def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
+def VST1LNq8Pseudo  : VST1QLNPseudo<v16i8, truncstorei8, ARMvgetlaneu>;
+def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, ARMvgetlaneu>;
 def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
 
+let Predicates = [HasNEON] in {
 def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
           (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
 def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
           (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
 
+def : Pat<(store (extractelt (v4f16 DPR:$src), imm:$lane), addrmode6:$addr),
+          (VST1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(store (extractelt (v8f16 QPR:$src), imm:$lane), addrmode6:$addr),
+          (VST1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+}
+
 // ...with address register writeback:
 class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
                PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
@@ -2196,11 +2186,11 @@ class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
 }
 
 def VST1LNd8_UPD  : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
-                             NEONvgetlaneu, addrmode6> {
+                             ARMvgetlaneu, addrmode6> {
   let Inst{7-5} = lane{2-0};
 }
 def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
-                             NEONvgetlaneu, addrmode6> {
+                             ARMvgetlaneu, addrmode6> {
   let Inst{7-6} = lane{1-0};
   let Inst{4}   = Rn{4};
 }
@@ -2210,8 +2200,8 @@ def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
   let Inst{5-4} = Rn{5-4};
 }
 
-def VST1LNq8Pseudo_UPD  : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
-def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
+def VST1LNq8Pseudo_UPD  : VST1QLNWBPseudo<v16i8, post_truncsti8, ARMvgetlaneu>;
+def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,ARMvgetlaneu>;
 def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
 
 let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
@@ -2440,37 +2430,45 @@ def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
 } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
 
 // Use vld1/vst1 for unaligned f64 load / store
+let Predicates = [IsLE,HasNEON] in {
 def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
-          (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>;
+          (VLD1d16 addrmode6:$addr)>;
 def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
-          (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
+          (VST1d16 addrmode6:$addr, DPR:$value)>;
 def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
-          (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>;
+          (VLD1d8 addrmode6:$addr)>;
 def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
-          (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
+          (VST1d8 addrmode6:$addr, DPR:$value)>;
+}
+let Predicates = [IsBE,HasNEON] in {
 def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
-          (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>;
+          (VLD1d64 addrmode6:$addr)>;
 def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
-          (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>;
+          (VST1d64 addrmode6:$addr, DPR:$value)>;
+}
 
 // Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64
 // load / store if it's legal.
+let Predicates = [HasNEON] in {
 def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
           (VLD1q64 addrmode6:$addr)>;
 def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
           (VST1q64 addrmode6:$addr, QPR:$value)>;
+}
+let Predicates = [IsLE,HasNEON] in {
 def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
-          (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>;
+          (VLD1q32 addrmode6:$addr)>;
 def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
-          (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
+          (VST1q32 addrmode6:$addr, QPR:$value)>;
 def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
-          (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>;
+          (VLD1q16 addrmode6:$addr)>;
 def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
-          (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
+          (VST1q16 addrmode6:$addr, QPR:$value)>;
 def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
-          (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>;
+          (VLD1q8 addrmode6:$addr)>;
 def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
-          (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
+          (VST1q8 addrmode6:$addr, QPR:$value)>;
+}
 
 //===----------------------------------------------------------------------===//
 // NEON pattern fragments
@@ -2505,6 +2503,13 @@ def SSubReg_f32_reg : SDNodeXForm<imm, [{
                                    MVT::i32);
 }]>;
 
+// Extract S sub-registers of Q/D registers containing a given f16 lane.
+def SSubReg_f16_reg : SDNodeXForm<imm, [{
+  assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
+  return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue()/2, SDLoc(N),
+                                   MVT::i32);
+}]>;
+
 // Translate lane numbers from Q registers to D subregs.
 def SubReg_i8_lane  : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(N->getZExtValue() & 7, SDLoc(N), MVT::i32);
@@ -2666,7 +2671,7 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8,
         NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
         [(set (Ty DPR:$Vd),
               (Ty (ShOp (Ty DPR:$Vn),
-                        (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
+                        (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
   // All of these have a two-operand InstAlias.
   let TwoOperandAliasConstraint = "$Vn = $Vd";
   let isCommutable = 0;
@@ -2678,7 +2683,7 @@ class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
         NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
         [(set (Ty DPR:$Vd),
               (Ty (ShOp (Ty DPR:$Vn),
-                        (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+                        (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
   // All of these have a two-operand InstAlias.
   let TwoOperandAliasConstraint = "$Vn = $Vd";
   let isCommutable = 0;
@@ -2714,7 +2719,7 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8,
         NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
         [(set (ResTy QPR:$Vd),
               (ResTy (ShOp (ResTy QPR:$Vn),
-                           (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+                           (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
                                                 imm:$lane)))))]> {
   // All of these have a two-operand InstAlias.
   let TwoOperandAliasConstraint = "$Vn = $Vd";
@@ -2727,7 +2732,7 @@ class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
         NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
         [(set (ResTy QPR:$Vd),
               (ResTy (ShOp (ResTy QPR:$Vn),
-                           (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+                           (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
                                                 imm:$lane)))))]> {
   // All of these have a two-operand InstAlias.
   let TwoOperandAliasConstraint = "$Vn = $Vd";
@@ -2762,7 +2767,7 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
         NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
         [(set (Ty DPR:$Vd),
               (Ty (IntOp (Ty DPR:$Vn),
-                         (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
+                         (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),
                                            imm:$lane)))))]> {
   let isCommutable = 0;
 }
@@ -2774,7 +2779,7 @@ class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
         NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
         [(set (Ty DPR:$Vd),
               (Ty (IntOp (Ty DPR:$Vn),
-                         (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+                         (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
   let isCommutable = 0;
 }
 class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -2829,7 +2834,7 @@ class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
         NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
         [(set (ResTy QPR:$Vd),
               (ResTy (IntOp (ResTy QPR:$Vn),
-                            (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+                            (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
                                                  imm:$lane)))))]> {
   let isCommutable = 0;
 }
@@ -2841,7 +2846,7 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
         NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
         [(set (ResTy QPR:$Vd),
               (ResTy (IntOp (ResTy QPR:$Vn),
-                            (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+                            (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
                                                  imm:$lane)))))]> {
   let isCommutable = 0;
 }
@@ -2877,7 +2882,7 @@ class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
         [(set (Ty DPR:$Vd),
               (Ty (ShOp (Ty DPR:$src1),
                         (Ty (MulOp DPR:$Vn,
-                                   (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
+                                   (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),
                                                      imm:$lane)))))))]>;
 class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                     string OpcodeStr, string Dt,
@@ -2890,7 +2895,7 @@ class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
         [(set (Ty DPR:$Vd),
               (Ty (ShOp (Ty DPR:$src1),
                         (Ty (MulOp DPR:$Vn,
-                                   (Ty (NEONvduplane (Ty DPR_8:$Vm),
+                                   (Ty (ARMvduplane (Ty DPR_8:$Vm),
                                                      imm:$lane)))))))]>;
 
 class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -2912,7 +2917,7 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
         [(set (ResTy QPR:$Vd),
               (ResTy (ShOp (ResTy QPR:$src1),
                            (ResTy (MulOp QPR:$Vn,
-                                   (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+                                   (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
                                                         imm:$lane)))))))]>;
 class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                     string OpcodeStr, string Dt,
@@ -2926,7 +2931,7 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
         [(set (ResTy QPR:$Vd),
               (ResTy (ShOp (ResTy QPR:$src1),
                            (ResTy (MulOp QPR:$Vn,
-                                   (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+                                   (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
                                                         imm:$lane)))))))]>;
 
 // Neon Intrinsic-Op instructions (VABA): double- and quad-register.
@@ -2986,7 +2991,7 @@ class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
         [(set QPR:$Vd,
           (OpNode (TyQ QPR:$src1),
                   (TyQ (MulOp (TyD DPR:$Vn),
-                              (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),
+                              (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),
                                                  imm:$lane))))))]>;
 class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
                     InstrItinClass itin, string OpcodeStr, string Dt,
@@ -2998,7 +3003,7 @@ class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
         [(set QPR:$Vd,
           (OpNode (TyQ QPR:$src1),
                   (TyQ (MulOp (TyD DPR:$Vn),
-                              (TyD (NEONvduplane (TyD DPR_8:$Vm),
+                              (TyD (ARMvduplane (TyD DPR_8:$Vm),
                                                  imm:$lane))))))]>;
 
 // Long Intrinsic-Op vector operations with explicit extend (VABAL).
@@ -3034,7 +3039,7 @@ class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
         [(set (ResTy QPR:$Vd),
               (ResTy (IntOp (ResTy QPR:$src1),
                             (OpTy DPR:$Vn),
-                            (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+                            (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
                                                 imm:$lane)))))]>;
 class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
                    InstrItinClass itin, string OpcodeStr, string Dt,
@@ -3047,7 +3052,7 @@ class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
         [(set (ResTy QPR:$Vd),
               (ResTy (IntOp (ResTy QPR:$src1),
                             (OpTy DPR:$Vn),
-                            (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
+                            (OpTy (ARMvduplane (OpTy DPR_8:$Vm),
                                                 imm:$lane)))))]>;
 
 // Narrowing 3-register intrinsics.
@@ -3080,7 +3085,7 @@ class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
         NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
         [(set QPR:$Vd,
           (TyQ (OpNode (TyD DPR:$Vn),
-                       (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
+                       (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
 class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
                InstrItinClass itin, string OpcodeStr, string Dt,
                ValueType TyQ, ValueType TyD, SDNode OpNode>
@@ -3089,7 +3094,7 @@ class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
         NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
         [(set QPR:$Vd,
           (TyQ (OpNode (TyD DPR:$Vn),
-                       (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
+                       (TyD (ARMvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
 
 // Long 3-register operations with explicitly extended operands.
 class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -3145,7 +3150,7 @@ class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
         NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
         [(set (ResTy QPR:$Vd),
               (ResTy (IntOp (OpTy DPR:$Vn),
-                            (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+                            (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
                                                 imm:$lane)))))]>;
 class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
                   InstrItinClass itin, string OpcodeStr, string Dt,
@@ -3155,7 +3160,7 @@ class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
         NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
         [(set (ResTy QPR:$Vd),
               (ResTy (IntOp (OpTy DPR:$Vn),
-                            (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
+                            (OpTy (ARMvduplane (OpTy DPR_8:$Vm),
                                                 imm:$lane)))))]>;
 
 // Wide 3-register operations.
@@ -4087,72 +4092,72 @@ multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
                           string OpcodeStr> {
   // 64-bit vector types.
   def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
-                        N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> {
+                        N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsliImm> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
   def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
-                        N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> {
+                        N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsliImm> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
   def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
-                        N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> {
+                        N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsliImm> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
   def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
-                        N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>;
+                        N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsliImm>;
                              // imm6 = xxxxxx
 
   // 128-bit vector types.
   def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
-                        N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> {
+                        N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsliImm> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
   def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
-                        N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> {
+                        N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsliImm> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
   def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
-                        N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> {
+                        N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsliImm> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
   def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
-                        N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>;
+                        N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsliImm>;
                              // imm6 = xxxxxx
 }
 multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
                           string OpcodeStr> {
   // 64-bit vector types.
   def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
-                        N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> {
+                        N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsriImm> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
   def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
-                        N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> {
+                        N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsriImm> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
   def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
-                        N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> {
+                        N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsriImm> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
   def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
-                        N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>;
+                        N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsriImm>;
                              // imm6 = xxxxxx
 
   // 128-bit vector types.
   def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
-                        N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> {
+                        N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsriImm> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
   def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
-                        N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> {
+                        N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsriImm> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
   def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
-                        N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> {
+                        N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsriImm> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
   def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
-                        N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>;
+                        N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsriImm>;
                              // imm6 = xxxxxx
 }
 
@@ -4251,12 +4256,14 @@ defm VADDHN   : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
 defm VRADDHN  : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
                             int_arm_neon_vraddhn, 1>;
 
-def : Pat<(v8i8  (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+let Predicates = [HasNEON] in {
+def : Pat<(v8i8  (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
           (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
           (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
           (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
+}
 
 // Vector Multiply Operations.
 
@@ -4287,47 +4294,49 @@ def  VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16,
                        v4f16, fmul>,
                 Requires<[HasNEON,HasFullFP16]>;
 
+let Predicates = [HasNEON] in {
 def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
-                      (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
+                      (v8i16 (ARMvduplane (v8i16 QPR:$src2), imm:$lane)))),
           (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
                               (v4i16 (EXTRACT_SUBREG QPR:$src2,
                                       (DSubReg_i16_reg imm:$lane))),
                               (SubReg_i16_lane imm:$lane)))>;
 def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
-                      (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
+                      (v4i32 (ARMvduplane (v4i32 QPR:$src2), imm:$lane)))),
           (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
                               (v2i32 (EXTRACT_SUBREG QPR:$src2,
                                       (DSubReg_i32_reg imm:$lane))),
                               (SubReg_i32_lane imm:$lane)))>;
 def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
-                       (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
+                       (v4f32 (ARMvduplane (v4f32 QPR:$src2), imm:$lane)))),
           (v4f32 (VMULslfq (v4f32 QPR:$src1),
                            (v2f32 (EXTRACT_SUBREG QPR:$src2,
                                    (DSubReg_i32_reg imm:$lane))),
                            (SubReg_i32_lane imm:$lane)))>;
 def : Pat<(v8f16 (fmul (v8f16 QPR:$src1),
-                       (v8f16 (NEONvduplane (v8f16 QPR:$src2), imm:$lane)))),
+                       (v8f16 (ARMvduplane (v8f16 QPR:$src2), imm:$lane)))),
           (v8f16 (VMULslhq(v8f16 QPR:$src1),
                            (v4f16 (EXTRACT_SUBREG QPR:$src2,
                                    (DSubReg_i16_reg imm:$lane))),
                            (SubReg_i16_lane imm:$lane)))>;
 
-def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
+def : Pat<(v2f32 (fmul DPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
           (VMULslfd DPR:$Rn,
             (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
             (i32 0))>;
-def : Pat<(v4f16 (fmul DPR:$Rn, (NEONvdup (f16 HPR:$Rm)))),
+def : Pat<(v4f16 (fmul DPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
           (VMULslhd DPR:$Rn,
             (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
             (i32 0))>;
-def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
+def : Pat<(v4f32 (fmul QPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
           (VMULslfq QPR:$Rn,
             (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
             (i32 0))>;
-def : Pat<(v8f16 (fmul QPR:$Rn, (NEONvdup (f16 HPR:$Rm)))),
+def : Pat<(v8f16 (fmul QPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
           (VMULslhq QPR:$Rn,
             (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
             (i32 0))>;
+}
 
 //   VQDMULH  : Vector Saturating Doubling Multiply Returning High Half
 defm VQDMULH  : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
@@ -4336,20 +4345,23 @@ defm VQDMULH  : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
 defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
                             IIC_VMULi16Q, IIC_VMULi32Q,
                             "vqdmulh", "s",  int_arm_neon_vqdmulh>;
+
+let Predicates = [HasNEON] in {
 def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
-                                       (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+                                       (v8i16 (ARMvduplane (v8i16 QPR:$src2),
                                                             imm:$lane)))),
           (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
                                  (v4i16 (EXTRACT_SUBREG QPR:$src2,
                                          (DSubReg_i16_reg imm:$lane))),
                                  (SubReg_i16_lane imm:$lane)))>;
 def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
-                                       (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+                                       (v4i32 (ARMvduplane (v4i32 QPR:$src2),
                                                             imm:$lane)))),
           (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
                                  (v2i32 (EXTRACT_SUBREG QPR:$src2,
                                          (DSubReg_i32_reg imm:$lane))),
                                  (SubReg_i32_lane imm:$lane)))>;
+}
 
 //   VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
 defm VQRDMULH   : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
@@ -4358,20 +4370,23 @@ defm VQRDMULH   : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
 defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
                               IIC_VMULi16Q, IIC_VMULi32Q,
                               "vqrdmulh", "s",  int_arm_neon_vqrdmulh>;
+
+let Predicates = [HasNEON] in {
 def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
-                                        (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+                                        (v8i16 (ARMvduplane (v8i16 QPR:$src2),
                                                              imm:$lane)))),
           (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
                                   (v4i16 (EXTRACT_SUBREG QPR:$src2,
                                           (DSubReg_i16_reg imm:$lane))),
                                   (SubReg_i16_lane imm:$lane)))>;
 def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
-                                        (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+                                        (v4i32 (ARMvduplane (v4i32 QPR:$src2),
                                                              imm:$lane)))),
           (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
                                   (v2i32 (EXTRACT_SUBREG QPR:$src2,
                                           (DSubReg_i32_reg imm:$lane))),
                                   (SubReg_i32_lane imm:$lane)))>;
+}
 
 //   VMULL    : Vector Multiply Long (integer and polynomial) (Q = D * D)
 let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
@@ -4427,9 +4442,10 @@ def  VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16",
                             v8f16, v4f16, fmul, fadd>,
                 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
 
+let Predicates = [HasNEON] in {
 def : Pat<(v8i16 (add (v8i16 QPR:$src1),
                   (mul (v8i16 QPR:$src2),
-                       (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+                       (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
           (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
                               (v4i16 (EXTRACT_SUBREG QPR:$src3,
                                       (DSubReg_i16_reg imm:$lane))),
@@ -4437,15 +4453,16 @@ def : Pat<(v8i16 (add (v8i16 QPR:$src1),
 
 def : Pat<(v4i32 (add (v4i32 QPR:$src1),
                   (mul (v4i32 QPR:$src2),
-                       (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+                       (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
           (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
                               (v2i32 (EXTRACT_SUBREG QPR:$src3,
                                       (DSubReg_i32_reg imm:$lane))),
                               (SubReg_i32_lane imm:$lane)))>;
+}
 
 def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
                   (fmul_su (v4f32 QPR:$src2),
-                        (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+                        (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
           (v4f32 (VMLAslfq (v4f32 QPR:$src1),
                            (v4f32 QPR:$src2),
                            (v2f32 (EXTRACT_SUBREG QPR:$src3,
@@ -4497,7 +4514,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
                      (v4i16 DPR:$src1),
                      (v4i16 (int_arm_neon_vqrdmulh
                               (v4i16 DPR:$Vn),
-                              (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+                              (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
                                                    imm:$lane)))))),
             (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
                                     imm:$lane))>;
@@ -4505,7 +4522,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
                      (v2i32 DPR:$src1),
                      (v2i32 (int_arm_neon_vqrdmulh
                               (v2i32 DPR:$Vn),
-                              (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+                              (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
                                                    imm:$lane)))))),
             (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
                                     imm:$lane))>;
@@ -4513,7 +4530,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
                      (v8i16 QPR:$src1),
                      (v8i16 (int_arm_neon_vqrdmulh
                               (v8i16 QPR:$src2),
-                              (v8i16 (NEONvduplane (v8i16 QPR:$src3),
+                              (v8i16 (ARMvduplane (v8i16 QPR:$src3),
                                                    imm:$lane)))))),
             (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1),
                                     (v8i16 QPR:$src2),
@@ -4525,7 +4542,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
                      (v4i32 QPR:$src1),
                      (v4i32 (int_arm_neon_vqrdmulh 
                               (v4i32 QPR:$src2),
-                              (v4i32 (NEONvduplane (v4i32 QPR:$src3), 
+                              (v4i32 (ARMvduplane (v4i32 QPR:$src3), 
                                                    imm:$lane)))))),
             (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
                                     (v4i32 QPR:$src2),
@@ -4567,14 +4584,14 @@ let Predicates = [HasNEON, HasV8_1a] in {
                      (v4i16 DPR:$src1),
                      (v4i16 (int_arm_neon_vqrdmulh
                               (v4i16 DPR:$Vn),
-                              (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+                              (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
                                                    imm:$lane)))))),
             (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
   def : Pat<(v2i32 (int_arm_neon_vqsubs
                      (v2i32 DPR:$src1),
                      (v2i32 (int_arm_neon_vqrdmulh
                               (v2i32 DPR:$Vn),
-                              (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+                              (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
                                                    imm:$lane)))))),
             (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 
                                     imm:$lane))>;
@@ -4582,7 +4599,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
                      (v8i16 QPR:$src1),
                      (v8i16 (int_arm_neon_vqrdmulh
                               (v8i16 QPR:$src2),
-                              (v8i16 (NEONvduplane (v8i16 QPR:$src3), 
+                              (v8i16 (ARMvduplane (v8i16 QPR:$src3), 
                                                    imm:$lane)))))),
             (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
                                     (v8i16 QPR:$src2),
@@ -4594,7 +4611,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
                      (v4i32 QPR:$src1),
                      (v4i32 (int_arm_neon_vqrdmulh
                               (v4i32 QPR:$src2),
-                              (v4i32 (NEONvduplane (v4i32 QPR:$src3),
+                              (v4i32 (ARMvduplane (v4i32 QPR:$src3),
                                                     imm:$lane)))))),
             (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
                                     (v4i32 QPR:$src2),
@@ -4608,6 +4625,7 @@ defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
                             "vqdmlal", "s", null_frag>;
 defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
 
+let Predicates = [HasNEON] in {
 def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
                      (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
                                                   (v4i16 DPR:$Vm))))),
@@ -4618,14 +4636,15 @@ def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
           (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
 def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
                      (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
-                                (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+                                (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
                                                      imm:$lane)))))),
           (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
 def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
                      (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
-                                (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+                                (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
                                                      imm:$lane)))))),
           (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
+}
 
 //   VMLS     : Vector Multiply Subtract (integer and floating-point)
 defm VMLS     : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
@@ -4657,9 +4676,10 @@ def  VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16",
                             v8f16, v4f16, fmul, fsub>,
                 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
 
+let Predicates = [HasNEON] in {
 def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
                   (mul (v8i16 QPR:$src2),
-                       (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+                       (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
           (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
                               (v4i16 (EXTRACT_SUBREG QPR:$src3,
                                       (DSubReg_i16_reg imm:$lane))),
@@ -4667,15 +4687,16 @@ def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
 
 def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
                   (mul (v4i32 QPR:$src2),
-                     (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+                     (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
           (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
                               (v2i32 (EXTRACT_SUBREG QPR:$src3,
                                       (DSubReg_i32_reg imm:$lane))),
                               (SubReg_i32_lane imm:$lane)))>;
+}
 
 def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
                   (fmul_su (v4f32 QPR:$src2),
-                        (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+                        (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
           (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
                            (v2f32 (EXTRACT_SUBREG QPR:$src3,
                                    (DSubReg_i32_reg imm:$lane))),
@@ -4696,6 +4717,7 @@ defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
                             "vqdmlsl", "s", null_frag>;
 defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>;
 
+let Predicates = [HasNEON] in {
 def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
                      (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
                                                   (v4i16 DPR:$Vm))))),
@@ -4706,14 +4728,15 @@ def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
           (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
 def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
                      (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
-                                (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+                                (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
                                                      imm:$lane)))))),
           (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
 def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
                      (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
-                                (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+                                (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
                                                      imm:$lane)))))),
           (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
+}
 
 // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
 def  VFMAfd   : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
@@ -4754,16 +4777,16 @@ def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
           Requires<[HasNEON,HasFullFP16]>;
 def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
           (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
-          Requires<[HasVFP4]>;
+          Requires<[HasNEON,HasVFP4]>;
 def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
           (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
-          Requires<[HasVFP4]>;
+          Requires<[HasNEON,HasVFP4]>;
 def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
           (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
-      Requires<[HasVFP4]>;
+      Requires<[HasNEON,HasVFP4]>;
 def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
           (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
-      Requires<[HasVFP4]>;
+      Requires<[HasNEON,HasVFP4]>;
 
 // ARMv8.2a dot product instructions.
 // We put them in the VFPV8 decoder namespace because the ARM and Thumb
@@ -4808,7 +4831,7 @@ multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty,
     (AccumType (OpNode (AccumType Ty:$Vd),
                        (InputType Ty:$Vn),
                        (InputType (bitconvert (AccumType
-                                  (NEONvduplane (AccumType Ty:$Vm),
+                                  (ARMvduplane (AccumType Ty:$Vm),
                                                  VectorIndex32:$lane)))))),
     (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>;
 }
@@ -4991,12 +5014,14 @@ defm VSUBHN   : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
 defm VRSUBHN  : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
                             int_arm_neon_vrsubhn, 0>;
 
-def : Pat<(v8i8  (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+let Predicates = [HasNEON] in {
+def : Pat<(v8i8  (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
           (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
           (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
           (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
+}
 
 // Vector Comparisons.
 
@@ -5122,10 +5147,11 @@ class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn,
   : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
            asm, "f16", "$Vd, $Vn, $Vm", "", []>;
 
-class VFMQ0<string opc, bits<2> S>
+// Vd, Vs, Vs[0-15], Idx[0-1]
+class VFMD<string opc, string type, bits<2> S>
   : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd),
-               (ins SPR:$Vn, SPR:$Vm, VectorIndex32:$idx),
-               IIC_VMACD, opc, "f16", "$Vd, $Vn, $Vm$idx", "", []> {
+               (ins SPR:$Vn, SPR_8:$Vm, VectorIndex32:$idx),
+               IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> {
   bit idx;
   let Inst{3} = idx;
   let Inst{19-16} = Vn{4-1};
@@ -5134,10 +5160,11 @@ class VFMQ0<string opc, bits<2> S>
   let Inst{2-0}   = Vm{3-1};
 }
 
-class VFMQ1<string opc, bits<2> S>
+// Vq, Vd, Vd[0-7], Idx[0-3]
+class VFMQ<string opc, string type, bits<2> S>
   : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd),
-               (ins DPR:$Vn, DPR:$Vm, VectorIndex16:$idx),
-               IIC_VMACD, opc, "f16", "$Vd, $Vn, $Vm$idx", "", []> {
+               (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx),
+               IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> {
   bits<2> idx;
   let Inst{5} = idx{1};
   let Inst{3} = idx{0};
@@ -5149,10 +5176,10 @@ def VFMALD  : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>;
 def VFMSLD  : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>;
 def VFMALQ  : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>;
 def VFMSLQ  : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>;
-def VFMALDI : VFMQ0<"vfmal", 0b00>;
-def VFMSLDI : VFMQ0<"vfmsl", 0b01>;
-def VFMALQI : VFMQ1<"vfmal", 0b00>;
-def VFMSLQI : VFMQ1<"vfmsl", 0b01>;
+def VFMALDI : VFMD<"vfmal", "f16", 0b00>;
+def VFMSLDI : VFMD<"vfmsl", "f16", 0b01>;
+def VFMALQI : VFMQ<"vfmal", "f16", 0b00>;
+def VFMSLQI : VFMQ<"vfmsl", "f16", 0b01>;
 }
 } // HasNEON, HasFP16FML
 
@@ -5308,28 +5335,28 @@ let isReMaterializable = 1 in {
 def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
                          (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
                          "vmvn", "i16", "$Vd, $SIMM", "",
-                         [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
+                         [(set DPR:$Vd, (v4i16 (ARMvmvnImm timm:$SIMM)))]> {
   let Inst{9} = SIMM{9};
 }
 
 def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
                          (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
                          "vmvn", "i16", "$Vd, $SIMM", "",
-                         [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
+                         [(set QPR:$Vd, (v8i16 (ARMvmvnImm timm:$SIMM)))]> {
   let Inst{9} = SIMM{9};
 }
 
 def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
                          (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
                          "vmvn", "i32", "$Vd, $SIMM", "",
-                         [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
+                         [(set DPR:$Vd, (v2i32 (ARMvmvnImm timm:$SIMM)))]> {
   let Inst{11-8} = SIMM{11-8};
 }
 
 def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
                          (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
                          "vmvn", "i32", "$Vd, $SIMM", "",
-                         [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
+                         [(set QPR:$Vd, (v4i32 (ARMvmvnImm timm:$SIMM)))]> {
   let Inst{11-8} = SIMM{11-8};
 }
 }
@@ -5343,8 +5370,10 @@ def  VMVNq    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
                      (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
                      "vmvn", "$Vd, $Vm", "",
                      [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
+let Predicates = [HasNEON] in {
 def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
 def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
+}
 
 //   VBSL     : Vector Bitwise Select
 def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
@@ -5353,36 +5382,31 @@ def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
                      "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
                      [(set DPR:$Vd,
                            (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
+let Predicates = [HasNEON] in {
 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
                                    (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
-          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
-        Requires<[HasNEON]>;
+          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
                                     (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
-          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
-        Requires<[HasNEON]>;
+          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
                                     (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
-          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
-        Requires<[HasNEON]>;
+          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
                                     (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
-          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
-        Requires<[HasNEON]>;
+          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
                                     (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
-          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
-        Requires<[HasNEON]>;
+          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
 
 def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
                      (and DPR:$Vm, (vnotd DPR:$Vd)))),
-          (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
-        Requires<[HasNEON]>;
+          (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
 
 def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
                      (and DPR:$Vm, (vnotd DPR:$Vd)))),
-          (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
-        Requires<[HasNEON]>;
+          (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
+}
 
 def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
                      (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
@@ -5391,35 +5415,30 @@ def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
                      [(set QPR:$Vd,
                            (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
 
+let Predicates = [HasNEON] in {
 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
                                    (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
-          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
-        Requires<[HasNEON]>;
+          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
                                     (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
-          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
-        Requires<[HasNEON]>;
+          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
                                     (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
-          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
-        Requires<[HasNEON]>;
+          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
                                     (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
-          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
-        Requires<[HasNEON]>;
+          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
                                     (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
-          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
-        Requires<[HasNEON]>;
+          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
 
 def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
                      (and QPR:$Vm, (vnotq QPR:$Vd)))),
-          (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
-        Requires<[HasNEON]>;
+          (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
 def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
                      (and QPR:$Vm, (vnotq QPR:$Vd)))),
-          (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
-        Requires<[HasNEON]>;
+          (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
+}
 
 //   VBIF     : Vector Bitwise Insert if False
 //              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
@@ -5479,24 +5498,28 @@ defm VABDLs   : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
 defm VABDLu   : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
                                "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
 
+let Predicates = [HasNEON] in {
 def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))),
           (VABDLuv8i16 DPR:$opA, DPR:$opB)>;
 def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),
           (VABDLuv4i32 DPR:$opA, DPR:$opB)>;
+}
 
 // ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the
 // shift/xor pattern for ABS.
 
 def abd_shr :
     PatFrag<(ops node:$in1, node:$in2, node:$shift),
-            (NEONvshrs (sub (zext node:$in1),
+            (ARMvshrsImm (sub (zext node:$in1),
                             (zext node:$in2)), (i32 $shift))>;
 
+let Predicates = [HasNEON] in {
 def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
                (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)),
                                                    (zext (v2i32 DPR:$opB))),
                                          (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))),
           (VABDLuv2i64 DPR:$opA, DPR:$opB)>;
+}
 
 //   VABA     : Vector Absolute Difference and Accumulate
 defm VABAs    : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
@@ -5536,22 +5559,22 @@ def  VMAXhq   : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
 
 // VMAXNM
 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
-  def VMAXNMNDf  : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
-                            N3RegFrm, NoItinerary, "vmaxnm", "f32",
-                            v2f32, v2f32, fmaxnum, 1>,
-                            Requires<[HasV8, HasNEON]>;
-  def VMAXNMNQf  : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
-                            N3RegFrm, NoItinerary, "vmaxnm", "f32",
-                            v4f32, v4f32, fmaxnum, 1>,
-                            Requires<[HasV8, HasNEON]>;
-  def VMAXNMNDh  : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
-                            N3RegFrm, NoItinerary, "vmaxnm", "f16",
-                            v4f16, v4f16, fmaxnum, 1>,
-                            Requires<[HasV8, HasNEON, HasFullFP16]>;
-  def VMAXNMNQh  : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
-                            N3RegFrm, NoItinerary, "vmaxnm", "f16",
-                            v8f16, v8f16, fmaxnum, 1>,
-                            Requires<[HasV8, HasNEON, HasFullFP16]>;
+  def NEON_VMAXNMNDf  : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
+                                  N3RegFrm, NoItinerary, "vmaxnm", "f32",
+                                  v2f32, v2f32, fmaxnum, 1>,
+                                  Requires<[HasV8, HasNEON]>;
+  def NEON_VMAXNMNQf  : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
+                                  N3RegFrm, NoItinerary, "vmaxnm", "f32",
+                                  v4f32, v4f32, fmaxnum, 1>,
+                                  Requires<[HasV8, HasNEON]>;
+  def NEON_VMAXNMNDh  : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
+                                  N3RegFrm, NoItinerary, "vmaxnm", "f16",
+                                  v4f16, v4f16, fmaxnum, 1>,
+                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
+  def NEON_VMAXNMNQh  : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
+                                  N3RegFrm, NoItinerary, "vmaxnm", "f16",
+                                  v8f16, v8f16, fmaxnum, 1>,
+                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
 }
 
 //   VMIN     : Vector Minimum
@@ -5578,22 +5601,22 @@ def  VMINhq   : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
 
 // VMINNM
 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
-  def VMINNMNDf  : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
-                            N3RegFrm, NoItinerary, "vminnm", "f32",
-                            v2f32, v2f32, fminnum, 1>,
-                            Requires<[HasV8, HasNEON]>;
-  def VMINNMNQf  : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
-                            N3RegFrm, NoItinerary, "vminnm", "f32",
-                            v4f32, v4f32, fminnum, 1>,
-                            Requires<[HasV8, HasNEON]>;
-  def VMINNMNDh  : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
-                            N3RegFrm, NoItinerary, "vminnm", "f16",
-                            v4f16, v4f16, fminnum, 1>,
-                            Requires<[HasV8, HasNEON, HasFullFP16]>;
-  def VMINNMNQh  : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
-                            N3RegFrm, NoItinerary, "vminnm", "f16",
-                            v8f16, v8f16, fminnum, 1>,
-                            Requires<[HasV8, HasNEON, HasFullFP16]>;
+  def NEON_VMINNMNDf  : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
+                                  N3RegFrm, NoItinerary, "vminnm", "f32",
+                                  v2f32, v2f32, fminnum, 1>,
+                                  Requires<[HasV8, HasNEON]>;
+  def NEON_VMINNMNQf  : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
+                                  N3RegFrm, NoItinerary, "vminnm", "f32",
+                                  v4f32, v4f32, fminnum, 1>,
+                                  Requires<[HasV8, HasNEON]>;
+  def NEON_VMINNMNDh  : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
+                                  N3RegFrm, NoItinerary, "vminnm", "f16",
+                                  v4f16, v4f16, fminnum, 1>,
+                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
+  def NEON_VMINNMNQh  : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
+                                  N3RegFrm, NoItinerary, "vminnm", "f16",
+                                  v8f16, v8f16, fminnum, 1>,
+                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
 }
 
 // Vector Pairwise Operations.
@@ -5754,20 +5777,57 @@ defm VSHLu    : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
                             IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
                             "vshl", "u", int_arm_neon_vshiftu>;
 
+let Predicates = [HasNEON] in {
+def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
+          (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
+          (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
+          (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
+          (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
+          (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
+          (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
+          (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
+          (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>;
+
+def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
+          (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
+          (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
+          (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
+          (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
+          (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
+          (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
+          (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
+          (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>;
+
+}
+
 //   VSHL     : Vector Shift Left (Immediate)
-defm VSHLi    : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
+defm VSHLi    : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>;
 
 //   VSHR     : Vector Shift Right (Immediate)
 defm VSHRs    : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
-                            NEONvshrs>;
+                            ARMvshrsImm>;
 defm VSHRu    : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
-                            NEONvshru>;
+                            ARMvshruImm>;
 
 //   VSHLL    : Vector Shift Left Long
 defm VSHLLs   : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s",
-  PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (sext node:$LHS), node:$RHS)>>;
+  PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>;
 defm VSHLLu   : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u",
-  PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (zext node:$LHS), node:$RHS)>>;
+  PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>;
 
 //   VSHLL    : Vector Shift Left Long (with maximum shift count)
 class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
@@ -5785,36 +5845,40 @@ def  VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
 def  VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
                           v2i64, v2i32, imm32>;
 
-def : Pat<(v8i16 (NEONvshl (zext (v8i8 DPR:$Rn)), (i32 8))),
+let Predicates = [HasNEON] in {
+def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))),
           (VSHLLi8 DPR:$Rn, 8)>;
-def : Pat<(v4i32 (NEONvshl (zext (v4i16 DPR:$Rn)), (i32 16))),
+def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))),
           (VSHLLi16 DPR:$Rn, 16)>;
-def : Pat<(v2i64 (NEONvshl (zext (v2i32 DPR:$Rn)), (i32 32))),
+def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))),
           (VSHLLi32 DPR:$Rn, 32)>;
-def : Pat<(v8i16 (NEONvshl (sext (v8i8 DPR:$Rn)), (i32 8))),
+def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))),
           (VSHLLi8 DPR:$Rn, 8)>;
-def : Pat<(v4i32 (NEONvshl (sext (v4i16 DPR:$Rn)), (i32 16))),
+def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))),
           (VSHLLi16 DPR:$Rn, 16)>;
-def : Pat<(v2i64 (NEONvshl (sext (v2i32 DPR:$Rn)), (i32 32))),
+def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))),
           (VSHLLi32 DPR:$Rn, 32)>;
-def : Pat<(v8i16 (NEONvshl (anyext (v8i8 DPR:$Rn)), (i32 8))),
+def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))),
           (VSHLLi8 DPR:$Rn, 8)>;
-def : Pat<(v4i32 (NEONvshl (anyext (v4i16 DPR:$Rn)), (i32 16))),
+def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))),
           (VSHLLi16 DPR:$Rn, 16)>;
-def : Pat<(v2i64 (NEONvshl (anyext (v2i32 DPR:$Rn)), (i32 32))),
+def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))),
           (VSHLLi32 DPR:$Rn, 32)>;
+}
 
 //   VSHRN    : Vector Shift Right and Narrow
 defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
                            PatFrag<(ops node:$Rn, node:$amt),
-                                   (trunc (NEONvshrs node:$Rn, node:$amt))>>;
+                                   (trunc (ARMvshrsImm node:$Rn, node:$amt))>>;
 
-def : Pat<(v8i8 (trunc (NEONvshru (v8i16 QPR:$Vn), shr_imm8:$amt))),
+let Predicates = [HasNEON] in {
+def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))),
           (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
-def : Pat<(v4i16 (trunc (NEONvshru (v4i32 QPR:$Vn), shr_imm16:$amt))),
+def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))),
           (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
-def : Pat<(v2i32 (trunc (NEONvshru (v2i64 QPR:$Vn), shr_imm32:$amt))),
+def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))),
           (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
+}
 
 //   VRSHL    : Vector Rounding Shift
 defm VRSHLs   : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
@@ -5825,13 +5889,13 @@ defm VRSHLu   : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
                             "vrshl", "u", int_arm_neon_vrshiftu>;
 //   VRSHR    : Vector Rounding Shift Right
 defm VRSHRs   : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
-                            NEONvrshrs>;
+                            NEONvrshrsImm>;
 defm VRSHRu   : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
-                            NEONvrshru>;
+                            NEONvrshruImm>;
 
 //   VRSHRN   : Vector Rounding Shift Right and Narrow
 defm VRSHRN   : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
-                           NEONvrshrn>;
+                           NEONvrshrnImm>;
 
 //   VQSHL    : Vector Saturating Shift
 defm VQSHLs   : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
@@ -5841,21 +5905,21 @@ defm VQSHLu   : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
                             IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
                             "vqshl", "u", int_arm_neon_vqshiftu>;
 //   VQSHL    : Vector Saturating Shift Left (Immediate)
-defm VQSHLsi  : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>;
-defm VQSHLui  : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>;
+defm VQSHLsi  : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshlsImm>;
+defm VQSHLui  : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshluImm>;
 
 //   VQSHLU   : Vector Saturating Shift Left (Immediate, Unsigned)
-defm VQSHLsu  : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>;
+defm VQSHLsu  : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsuImm>;
 
 //   VQSHRN   : Vector Saturating Shift Right and Narrow
 defm VQSHRNs  : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
-                           NEONvqshrns>;
+                           NEONvqshrnsImm>;
 defm VQSHRNu  : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
-                           NEONvqshrnu>;
+                           NEONvqshrnuImm>;
 
 //   VQSHRUN  : Vector Saturating Shift Right and Narrow (Unsigned)
 defm VQSHRUN  : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
-                           NEONvqshrnsu>;
+                           NEONvqshrnsuImm>;
 
 //   VQRSHL   : Vector Saturating Rounding Shift
 defm VQRSHLs  : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
@@ -5867,20 +5931,20 @@ defm VQRSHLu  : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
 
 //   VQRSHRN  : Vector Saturating Rounding Shift Right and Narrow
 defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
-                           NEONvqrshrns>;
+                           NEONvqrshrnsImm>;
 defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
-                           NEONvqrshrnu>;
+                           NEONvqrshrnuImm>;
 
 //   VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
 defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
-                           NEONvqrshrnsu>;
+                           NEONvqrshrnsuImm>;
 
 //   VSRA     : Vector Shift Right and Accumulate
-defm VSRAs    : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
-defm VSRAu    : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
+defm VSRAs    : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>;
+defm VSRAu    : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>;
 //   VRSRA    : Vector Rounding Shift Right and Accumulate
-defm VRSRAs   : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
-defm VRSRAu   : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
+defm VRSRAs   : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>;
+defm VRSRAu   : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>;
 
 //   VSLI     : Vector Shift Left and Insert
 defm VSLI     : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
@@ -5957,12 +6021,14 @@ def  VNEGhq   : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0,
                     [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>,
                 Requires<[HasNEON, HasFullFP16]>;
 
+let Predicates = [HasNEON] in {
 def : Pat<(v8i8  (vnegd  DPR:$src)), (VNEGs8d DPR:$src)>;
 def : Pat<(v4i16 (vnegd  DPR:$src)), (VNEGs16d DPR:$src)>;
 def : Pat<(v2i32 (vnegd  DPR:$src)), (VNEGs32d DPR:$src)>;
 def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
 def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
 def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
+}
 
 //   VQNEG    : Vector Saturating Negate
 defm VQNEG    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
@@ -6014,57 +6080,57 @@ let isReMaterializable = 1, isAsCheapAsAMove=1 in {
 def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
                          (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
                          "vmov", "i8", "$Vd, $SIMM", "",
-                         [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
+                         [(set DPR:$Vd, (v8i8 (ARMvmovImm timm:$SIMM)))]>;
 def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
                          (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
                          "vmov", "i8", "$Vd, $SIMM", "",
-                         [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
+                         [(set QPR:$Vd, (v16i8 (ARMvmovImm timm:$SIMM)))]>;
 
 def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
                          (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
                          "vmov", "i16", "$Vd, $SIMM", "",
-                         [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
+                         [(set DPR:$Vd, (v4i16 (ARMvmovImm timm:$SIMM)))]> {
   let Inst{9} = SIMM{9};
 }
 
 def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
                          (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
                          "vmov", "i16", "$Vd, $SIMM", "",
-                         [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
+                         [(set QPR:$Vd, (v8i16 (ARMvmovImm timm:$SIMM)))]> {
  let Inst{9} = SIMM{9};
 }
 
 def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
                          (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
                          "vmov", "i32", "$Vd, $SIMM", "",
-                         [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
+                         [(set DPR:$Vd, (v2i32 (ARMvmovImm timm:$SIMM)))]> {
   let Inst{11-8} = SIMM{11-8};
 }
 
 def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
                          (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
                          "vmov", "i32", "$Vd, $SIMM", "",
-                         [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
+                         [(set QPR:$Vd, (v4i32 (ARMvmovImm timm:$SIMM)))]> {
   let Inst{11-8} = SIMM{11-8};
 }
 
 def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
                          (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
                          "vmov", "i64", "$Vd, $SIMM", "",
-                         [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
+                         [(set DPR:$Vd, (v1i64 (ARMvmovImm timm:$SIMM)))]>;
 def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
                          (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
                          "vmov", "i64", "$Vd, $SIMM", "",
-                         [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
+                         [(set QPR:$Vd, (v2i64 (ARMvmovImm timm:$SIMM)))]>;
 
 def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
                          (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
                          "vmov", "f32", "$Vd, $SIMM", "",
-                         [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>;
+                         [(set DPR:$Vd, (v2f32 (ARMvmovFPImm timm:$SIMM)))]>;
 def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
                          (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
                          "vmov", "f32", "$Vd, $SIMM", "",
-                         [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
+                         [(set QPR:$Vd, (v4f32 (ARMvmovFPImm timm:$SIMM)))]>;
 } // isReMaterializable, isAsCheapAsAMove
 
 // Add support for bytes replication feature, so it could be GAS compatible.
@@ -6144,7 +6210,7 @@ let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
 def VGETLNs8  : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
                           (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
                           IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
-                          [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
+                          [(set GPR:$R, (ARMvgetlanes (v8i8 DPR:$V),
                                            imm:$lane))]> {
   let Inst{21}  = lane{2};
   let Inst{6-5} = lane{1-0};
@@ -6152,7 +6218,7 @@ def VGETLNs8  : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
 def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
                           (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
                           IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
-                          [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
+                          [(set GPR:$R, (ARMvgetlanes (v4i16 DPR:$V),
                                            imm:$lane))]> {
   let Inst{21} = lane{1};
   let Inst{6}  = lane{0};
@@ -6160,7 +6226,7 @@ def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
 def VGETLNu8  : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
                           (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
                           IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
-                          [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
+                          [(set GPR:$R, (ARMvgetlaneu (v8i8 DPR:$V),
                                            imm:$lane))]> {
   let Inst{21}  = lane{2};
   let Inst{6-5} = lane{1-0};
@@ -6168,7 +6234,7 @@ def VGETLNu8  : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
 def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
                           (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
                           IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
-                          [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
+                          [(set GPR:$R, (ARMvgetlaneu (v4i16 DPR:$V),
                                            imm:$lane))]> {
   let Inst{21} = lane{1};
   let Inst{6}  = lane{0};
@@ -6178,26 +6244,28 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
                           IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
                           [(set GPR:$R, (extractelt (v2i32 DPR:$V),
                                            imm:$lane))]>,
-                Requires<[HasVFP2, HasFastVGETLNi32]> {
+                Requires<[HasFPRegs, HasFastVGETLNi32]> {
   let Inst{21} = lane{0};
 }
+let Predicates = [HasNEON] in {
 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
-def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
+def : Pat<(ARMvgetlanes (v16i8 QPR:$src), imm:$lane),
           (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
                            (DSubReg_i8_reg imm:$lane))),
                      (SubReg_i8_lane imm:$lane))>;
-def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
+def : Pat<(ARMvgetlanes (v8i16 QPR:$src), imm:$lane),
           (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
                              (DSubReg_i16_reg imm:$lane))),
                      (SubReg_i16_lane imm:$lane))>;
-def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
+def : Pat<(ARMvgetlaneu (v16i8 QPR:$src), imm:$lane),
           (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
                            (DSubReg_i8_reg imm:$lane))),
                      (SubReg_i8_lane imm:$lane))>;
-def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
+def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane),
           (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
                              (DSubReg_i16_reg imm:$lane))),
                      (SubReg_i16_lane imm:$lane))>;
+}
 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
           (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
                              (DSubReg_i32_reg imm:$lane))),
@@ -6211,6 +6279,7 @@ def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
           (COPY_TO_REGCLASS
             (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
       Requires<[HasNEON, HasSlowVGETLNi32]>;
+let Predicates = [HasNEON] in {
 def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
           (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
                           (SSubReg_f32_reg imm:$src2))>;
@@ -6221,7 +6290,36 @@ def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
 //          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
 def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
           (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
+}
+
+def imm_even : ImmLeaf<i32, [{ return (Imm & 1) == 0; }]>;
+def imm_odd : ImmLeaf<i32, [{ return (Imm & 1) == 1; }]>;
+
+let Predicates = [HasNEON] in {
+def : Pat<(extractelt (v4f16 DPR:$src), imm_even:$lane),
+            (EXTRACT_SUBREG
+                (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)),
+                (SSubReg_f16_reg imm_even:$lane))>;
 
+def : Pat<(extractelt (v4f16 DPR:$src), imm_odd:$lane),
+            (COPY_TO_REGCLASS
+              (VMOVH (EXTRACT_SUBREG
+                  (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)),
+                  (SSubReg_f16_reg imm_odd:$lane))),
+              HPR)>;
+
+def : Pat<(extractelt (v8f16 QPR:$src), imm_even:$lane),
+            (EXTRACT_SUBREG
+                (v4f32 (COPY_TO_REGCLASS (v8f16 QPR:$src), QPR_VFP2)),
+                (SSubReg_f16_reg imm_even:$lane))>;
+
+def : Pat<(extractelt (v8f16 QPR:$src), imm_odd:$lane),
+            (COPY_TO_REGCLASS
+              (VMOVH (EXTRACT_SUBREG
+                  (v4f32 (COPY_TO_REGCLASS (v8f16 QPR:$src), QPR_VFP2)),
+                  (SSubReg_f16_reg imm_odd:$lane))),
+              HPR)>;
+}
 
 //   VMOV     : Vector Set Lane (move ARM core register to scalar)
 
@@ -6254,6 +6352,8 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
   let isInsertSubreg = 1;
 }
 }
+
+let Predicates = [HasNEON] in {
 def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
           (v16i8 (INSERT_SUBREG QPR:$src1,
                   (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
@@ -6280,6 +6380,15 @@ def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
           (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
                                 SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
 
+def : Pat<(insertelt (v4f16 DPR:$src1), HPR:$src2, imm:$lane),
+          (v4f16 (VSETLNi16 DPR:$src1, (VMOVRH $src2), imm:$lane))>;
+def : Pat<(insertelt (v8f16 QPR:$src1), HPR:$src2, imm:$lane),
+          (v8f16 (INSERT_SUBREG QPR:$src1,
+                   (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
+                                      (DSubReg_i16_reg imm:$lane))),
+                             (VMOVRH $src2), (SubReg_i16_lane imm:$lane))),
+                   (DSubReg_i16_reg imm:$lane)))>;
+
 //def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
 //          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
 def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
@@ -6311,17 +6420,18 @@ def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
           (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
                          (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
                          dsub_0)>;
+}
 
 //   VDUP     : Vector Duplicate (from ARM core register to all elements)
 
 class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
   : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
           IIC_VMOVIS, "vdup", Dt, "$V, $R",
-          [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
+          [(set DPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>;
 class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
   : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
           IIC_VMOVIS, "vdup", Dt, "$V, $R",
-          [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
+          [(set QPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>;
 
 def  VDUP8d   : VDUPD<0b11101100, 0b00, "8", v8i8>;
 def  VDUP16d  : VDUPD<0b11101000, 0b01, "16", v4i16>;
@@ -6331,15 +6441,16 @@ def  VDUP8q   : VDUPQ<0b11101110, 0b00, "8", v16i8>;
 def  VDUP16q  : VDUPQ<0b11101010, 0b01, "16", v8i16>;
 def  VDUP32q  : VDUPQ<0b11101010, 0b00, "32", v4i32>;
 
-// NEONvdup patterns for uarchs with fast VDUP.32.
-def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
+// ARMvdup patterns for uarchs with fast VDUP.32.
+def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
       Requires<[HasNEON,HasFastVDUP32]>;
-def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
+def : Pat<(v4f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>,
+      Requires<[HasNEON]>;
 
-// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
-def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
+// ARMvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
+def : Pat<(v2i32 (ARMvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
       Requires<[HasNEON,HasSlowVDUP32]>;
-def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
+def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
       Requires<[HasNEON,HasSlowVDUP32]>;
 
 //   VDUP     : Vector Duplicate Lane (from scalar to all elements)
@@ -6348,13 +6459,13 @@ class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
               ValueType Ty, Operand IdxTy>
   : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
               IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
-              [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
+              [(set DPR:$Vd, (Ty (ARMvduplane (Ty DPR:$Vm), imm:$lane)))]>;
 
 class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
               ValueType ResTy, ValueType OpTy, Operand IdxTy>
   : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
               IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
-              [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
+              [(set QPR:$Vd, (ResTy (ARMvduplane (OpTy DPR:$Vm),
                                       VectorIndex32:$lane)))]>;
 
 // Inst{19-16} is partially specified depending on the element size.
@@ -6384,48 +6495,50 @@ def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
   let Inst{19} = lane{0};
 }
 
-def : Pat<(v4f16 (NEONvduplane (v4f16 DPR:$Vm), imm:$lane)),
+let Predicates = [HasNEON] in {
+def : Pat<(v4f16 (ARMvduplane (v4f16 DPR:$Vm), imm:$lane)),
           (VDUPLN32d DPR:$Vm, imm:$lane)>;
 
-def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
+def : Pat<(v2f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
           (VDUPLN32d DPR:$Vm, imm:$lane)>;
 
-def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
+def : Pat<(v4f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
           (VDUPLN32q DPR:$Vm, imm:$lane)>;
 
-def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
+def : Pat<(v16i8 (ARMvduplane (v16i8 QPR:$src), imm:$lane)),
           (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
                                   (DSubReg_i8_reg imm:$lane))),
                            (SubReg_i8_lane imm:$lane)))>;
-def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
+def : Pat<(v8i16 (ARMvduplane (v8i16 QPR:$src), imm:$lane)),
           (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
                                     (DSubReg_i16_reg imm:$lane))),
                             (SubReg_i16_lane imm:$lane)))>;
-def : Pat<(v8f16 (NEONvduplane (v8f16 QPR:$src), imm:$lane)),
+def : Pat<(v8f16 (ARMvduplane (v8f16 QPR:$src), imm:$lane)),
           (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src,
                                     (DSubReg_i16_reg imm:$lane))),
                             (SubReg_i16_lane imm:$lane)))>;
-def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
+def : Pat<(v4i32 (ARMvduplane (v4i32 QPR:$src), imm:$lane)),
           (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
                                     (DSubReg_i32_reg imm:$lane))),
                             (SubReg_i32_lane imm:$lane)))>;
-def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
+def : Pat<(v4f32 (ARMvduplane (v4f32 QPR:$src), imm:$lane)),
           (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
                                    (DSubReg_i32_reg imm:$lane))),
                            (SubReg_i32_lane imm:$lane)))>;
 
-def : Pat<(v4f16 (NEONvdup HPR:$src)),
+def : Pat<(v4f16 (ARMvdup HPR:$src)),
           (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
                              HPR:$src, ssub_0), (i32 0)))>;
-def : Pat<(v2f32 (NEONvdup (f32 SPR:$src))),
+def : Pat<(v2f32 (ARMvdup (f32 SPR:$src))),
           (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
                              SPR:$src, ssub_0), (i32 0)))>;
-def : Pat<(v4f32 (NEONvdup (f32 SPR:$src))),
+def : Pat<(v4f32 (ARMvdup (f32 SPR:$src))),
           (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
                              SPR:$src, ssub_0), (i32 0)))>;
-def : Pat<(v8f16 (NEONvdup HPR:$src)),
+def : Pat<(v8f16 (ARMvdup HPR:$src)),
           (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
                              HPR:$src, ssub_0), (i32 0)))>;
+}
 
 //   VMOVN    : Vector Narrowing Move
 defm VMOVN    : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
@@ -6440,9 +6553,12 @@ defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
 //   VMOVL    : Vector Lengthening Move
 defm VMOVLs   : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
 defm VMOVLu   : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
+
+let Predicates = [HasNEON] in {
 def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
 def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
 def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
+}
 
 // Vector Conversions.
 
@@ -6621,24 +6737,29 @@ class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
   : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
         (ins DPR:$Vm), IIC_VMOVD,
         OpcodeStr, Dt, "$Vd, $Vm", "",
-        [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>;
+        [(set DPR:$Vd, (Ty (ARMvrev64 (Ty DPR:$Vm))))]>;
 class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
   : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
         (ins QPR:$Vm), IIC_VMOVQ,
         OpcodeStr, Dt, "$Vd, $Vm", "",
-        [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>;
+        [(set QPR:$Vd, (Ty (ARMvrev64 (Ty QPR:$Vm))))]>;
 
 def VREV64d8  : VREV64D<0b00, "vrev64", "8", v8i8>;
 def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
 def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
-def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
+let Predicates = [HasNEON] in {
+def : Pat<(v2f32 (ARMvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
+}
 
 def VREV64q8  : VREV64Q<0b00, "vrev64", "8", v16i8>;
 def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
 def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
-def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
-def : Pat<(v8f16 (NEONvrev64 (v8f16 QPR:$Vm))), (VREV64q16 QPR:$Vm)>;
-def : Pat<(v4f16 (NEONvrev64 (v4f16 DPR:$Vm))), (VREV64d16 DPR:$Vm)>;
+
+let Predicates = [HasNEON] in {
+def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
+def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))), (VREV64q16 QPR:$Vm)>;
+def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))), (VREV64d16 DPR:$Vm)>;
+}
 
 //   VREV32   : Vector Reverse elements within 32-bit words
 
@@ -6646,12 +6767,12 @@ class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
   : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
         (ins DPR:$Vm), IIC_VMOVD,
         OpcodeStr, Dt, "$Vd, $Vm", "",
-        [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>;
+        [(set DPR:$Vd, (Ty (ARMvrev32 (Ty DPR:$Vm))))]>;
 class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
   : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
         (ins QPR:$Vm), IIC_VMOVQ,
         OpcodeStr, Dt, "$Vd, $Vm", "",
-        [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>;
+        [(set QPR:$Vd, (Ty (ARMvrev32 (Ty QPR:$Vm))))]>;
 
 def VREV32d8  : VREV32D<0b00, "vrev32", "8", v8i8>;
 def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
@@ -6665,12 +6786,12 @@ class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
   : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
         (ins DPR:$Vm), IIC_VMOVD,
         OpcodeStr, Dt, "$Vd, $Vm", "",
-        [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>;
+        [(set DPR:$Vd, (Ty (ARMvrev16 (Ty DPR:$Vm))))]>;
 class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
   : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
         (ins QPR:$Vm), IIC_VMOVQ,
         OpcodeStr, Dt, "$Vd, $Vm", "",
-        [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>;
+        [(set QPR:$Vd, (Ty (ARMvrev16 (Ty QPR:$Vm))))]>;
 
 def VREV16d8  : VREV16D<0b00, "vrev16", "8", v8i8>;
 def VREV16q8  : VREV16Q<0b00, "vrev16", "8", v16i8>;
@@ -6681,7 +6802,8 @@ def VREV16q8  : VREV16Q<0b00, "vrev16", "8", v16i8>;
 
 class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
       : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
-             (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>;
+             (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>,
+        Requires<[HasNEON]>;
 
 def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
 
@@ -6693,6 +6815,7 @@ def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
 
 def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
 
+def : AlignedVEXTq<v4f16, v8f16, DSubReg_i16_reg>; // v8f16 -> v4f16
 
 //   VEXT     : Vector Extract
 
@@ -6728,15 +6851,19 @@ def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
   let Inst{10-9} = index{1-0};
   let Inst{8}    = 0b0;
 }
+let Predicates = [HasNEON] in {
 def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))),
           (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>;
+}
 
 def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
   let Inst{10}     = index{0};
   let Inst{9-8}    = 0b00;
 }
+let Predicates = [HasNEON] in {
 def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))),
           (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
+}
 
 def VEXTq8  : VEXTq<"vext", "8",  v16i8, imm0_15> {
   let Inst{11-8} = index{3-0};
@@ -6745,8 +6872,10 @@ def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
   let Inst{11-9} = index{2-0};
   let Inst{8}    = 0b0;
 }
+let Predicates = [HasNEON] in {
 def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))),
           (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>;
+}
 
 def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
   let Inst{11-10} = index{1-0};
@@ -6756,8 +6885,10 @@ def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
   let Inst{11} = index{0};
   let Inst{10-8}    = 0b000;
 }
+let Predicates = [HasNEON] in {
 def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))),
           (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
+}
 
 //   VTRN     : Vector Transpose
 
@@ -6857,6 +6988,7 @@ def  VTBX4Pseudo
                 IIC_VTBX4, "$orig = $dst", []>;
 } // DecoderMethod = "DecodeTBLInstruction"
 
+let Predicates = [HasNEON] in {
 def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)),
           (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
                                             v8i8:$Vn1, dsub_1),
@@ -6899,6 +7031,7 @@ def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
                                                  v8i8:$Vn2, dsub_2,
                                                  v8i8:$Vn3, dsub_3),
                              v8i8:$Vm))>;
+}
 
 // VRINT      : Vector Rounding
 multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
@@ -6989,6 +7122,7 @@ def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
 def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
 def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
 
+let Predicates = [HasNEON] in {
 def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)),
           (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG
               (SHA1H (SUBREG_TO_REG (i64 0),
@@ -7016,6 +7150,7 @@ def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
                                 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
                                 ssub_0),
                  v4i32:$wk)>;
+}
 
 //===----------------------------------------------------------------------===//
 // NEON instructions for single-precision FP math
@@ -7123,171 +7258,228 @@ def : Pat<(arm_vmovsr GPR:$a),
         Requires<[HasNEON, DontUseVMOVSR]>;
 
 //===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
+// Non-Instruction Patterns or Endiness - Revert Patterns
 //===----------------------------------------------------------------------===//
 
 // bit_convert
-let Predicates = [IsLE] in {
+// 64 bit conversions
+let Predicates = [HasNEON] in {
+def : Pat<(f64   (bitconvert (v1i64 DPR:$src))), (f64   DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (f64   DPR:$src))), (v1i64 DPR:$src)>;
+
+def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
+
+def : Pat<(v4i16 (bitconvert (v4f16 DPR:$src))), (v4i16  DPR:$src)>;
+def : Pat<(v4f16 (bitconvert (v4i16 DPR:$src))), (v4f16  DPR:$src)>;
+
+// 128 bit conversions
+def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16  QPR:$src)>;
+def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16  QPR:$src)>;
+}
+
+let Predicates = [IsLE,HasNEON] in {
+  // 64 bit conversions
+  def : Pat<(f64   (bitconvert (v2f32 DPR:$src))), (f64   DPR:$src)>;
+  def : Pat<(f64   (bitconvert (v2i32 DPR:$src))), (f64   DPR:$src)>;
+  def : Pat<(f64   (bitconvert (v4f16 DPR:$src))), (f64   DPR:$src)>;
+  def : Pat<(f64   (bitconvert (v4i16 DPR:$src))), (f64   DPR:$src)>;
+  def : Pat<(f64   (bitconvert (v8i8  DPR:$src))), (f64   DPR:$src)>;
+
+  def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
   def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
+  def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (v1i64 DPR:$src)>;
   def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
   def : Pat<(v1i64 (bitconvert (v8i8  DPR:$src))), (v1i64 DPR:$src)>;
-}
-def : Pat<(v1i64 (bitconvert (f64   DPR:$src))), (v1i64 DPR:$src)>;
-let Predicates = [IsLE] in {
-  def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
+
+  def : Pat<(v2f32 (bitconvert (f64   DPR:$src))), (v2f32 DPR:$src)>;
+  def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
+  def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (v2f32 DPR:$src)>;
+  def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
+  def : Pat<(v2f32 (bitconvert (v8i8  DPR:$src))), (v2f32 DPR:$src)>;
+
+  def : Pat<(v2i32 (bitconvert (f64   DPR:$src))), (v2i32 DPR:$src)>;
   def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
+  def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (v2i32 DPR:$src)>;
   def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
   def : Pat<(v2i32 (bitconvert (v8i8  DPR:$src))), (v2i32 DPR:$src)>;
-  def : Pat<(v2i32 (bitconvert (f64   DPR:$src))), (v2i32 DPR:$src)>;
-}
-def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
-let Predicates = [IsLE] in {
+
+  def : Pat<(v4f16 (bitconvert (f64   DPR:$src))), (v4f16 DPR:$src)>;
+  def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (v4f16 DPR:$src)>;
+  def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (v4f16 DPR:$src)>;
+  def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (v4f16 DPR:$src)>;
+  def : Pat<(v4f16 (bitconvert (v8i8  DPR:$src))), (v4f16 DPR:$src)>;
+
+  def : Pat<(v4i16 (bitconvert (f64   DPR:$src))), (v4i16 DPR:$src)>;
   def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
+  def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
   def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
   def : Pat<(v4i16 (bitconvert (v8i8  DPR:$src))), (v4i16 DPR:$src)>;
-  def : Pat<(v4i16 (bitconvert (f64   DPR:$src))), (v4i16 DPR:$src)>;
-  def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
+
+  def : Pat<(v8i8  (bitconvert (f64   DPR:$src))), (v8i8  DPR:$src)>;
   def : Pat<(v8i8  (bitconvert (v1i64 DPR:$src))), (v8i8  DPR:$src)>;
+  def : Pat<(v8i8  (bitconvert (v2f32 DPR:$src))), (v8i8  DPR:$src)>;
   def : Pat<(v8i8  (bitconvert (v2i32 DPR:$src))), (v8i8  DPR:$src)>;
+  def : Pat<(v8i8  (bitconvert (v4f16 DPR:$src))), (v8i8  DPR:$src)>;
   def : Pat<(v8i8  (bitconvert (v4i16 DPR:$src))), (v8i8  DPR:$src)>;
-  def : Pat<(v8i8  (bitconvert (f64   DPR:$src))), (v8i8  DPR:$src)>;
-  def : Pat<(v8i8  (bitconvert (v2f32 DPR:$src))), (v8i8  DPR:$src)>;
-}
-def : Pat<(f64   (bitconvert (v1i64 DPR:$src))), (f64   DPR:$src)>;
-let Predicates = [IsLE] in {
-  def : Pat<(f64   (bitconvert (v2i32 DPR:$src))), (f64   DPR:$src)>;
-  def : Pat<(f64   (bitconvert (v4i16 DPR:$src))), (f64   DPR:$src)>;
-  def : Pat<(f64   (bitconvert (v4f16 DPR:$src))), (f64   DPR:$src)>;
-  def : Pat<(f64   (bitconvert (v8i8  DPR:$src))), (f64   DPR:$src)>;
-  def : Pat<(f64   (bitconvert (v2f32 DPR:$src))), (f64   DPR:$src)>;
-  def : Pat<(v2f32 (bitconvert (f64   DPR:$src))), (v2f32 DPR:$src)>;
-  def : Pat<(v4f16 (bitconvert (f64   DPR:$src))), (v4f16 DPR:$src)>;
-  def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
-}
-def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
-let Predicates = [IsLE] in {
-  def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
-  def : Pat<(v2f32 (bitconvert (v8i8  DPR:$src))), (v2f32 DPR:$src)>;
-}
 
-let Predicates = [IsLE] in {
+  // 128 bit conversions
+  def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
+  def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
+  def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
+  def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
+  def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
+
+  def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
   def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
+  def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>;
   def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
   def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
-}
-def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
-let Predicates = [IsLE] in {
-  def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
+
+  def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
+  def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
+  def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>;
+  def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
+  def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
+
+  def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
   def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
+  def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>;
   def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
   def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
-  def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
-}
-def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
-let Predicates = [IsLE] in {
+
+  def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>;
+  def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>;
+  def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>;
+  def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>;
+  def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>;
+
+  def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
   def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
+  def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
   def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
   def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
-  def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
-  def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
-  def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>;
+
+  def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
   def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
+  def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
   def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
+  def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>;
   def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
-  def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
-  def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
-  def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
-}
-def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
-let Predicates = [IsLE] in {
-  def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
-  def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
-  def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
-}
-def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
-let Predicates = [IsLE] in {
-  def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
-  def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
-  def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
-  def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
-  def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
 }
 
-let Predicates = [IsBE] in {
+let Predicates = [IsBE,HasNEON] in {
   // 64 bit conversions
+  def : Pat<(f64   (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
+  def : Pat<(f64   (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
+  def : Pat<(f64   (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
+  def : Pat<(f64   (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
+  def : Pat<(f64   (bitconvert (v8i8  DPR:$src))), (VREV64d8  DPR:$src)>;
+
+  def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
   def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
+  def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
   def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
   def : Pat<(v1i64 (bitconvert (v8i8  DPR:$src))), (VREV64d8  DPR:$src)>;
-  def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
+
+  def : Pat<(v2f32 (bitconvert (f64   DPR:$src))), (VREV64d32 DPR:$src)>;
+  def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
+  def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
+  def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
+  def : Pat<(v2f32 (bitconvert (v8i8  DPR:$src))), (VREV32d8  DPR:$src)>;
+
+  def : Pat<(v2i32 (bitconvert (f64   DPR:$src))), (VREV64d32 DPR:$src)>;
   def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
+  def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
   def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
   def : Pat<(v2i32 (bitconvert (v8i8  DPR:$src))), (VREV32d8  DPR:$src)>;
-  def : Pat<(v2i32 (bitconvert (f64   DPR:$src))), (VREV64d32 DPR:$src)>;
+
+  def : Pat<(v4f16 (bitconvert (f64   DPR:$src))), (VREV64d16 DPR:$src)>;
+  def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
+  def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
+  def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
+  def : Pat<(v4f16 (bitconvert (v8i8  DPR:$src))), (VREV16d8  DPR:$src)>;
+
+  def : Pat<(v4i16 (bitconvert (f64   DPR:$src))), (VREV64d16 DPR:$src)>;
   def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
+  def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
   def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
   def : Pat<(v4i16 (bitconvert (v8i8  DPR:$src))), (VREV16d8  DPR:$src)>;
-  def : Pat<(v4i16 (bitconvert (f64   DPR:$src))), (VREV64d16 DPR:$src)>;
-  def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
+
+  def : Pat<(v8i8  (bitconvert (f64   DPR:$src))), (VREV64d8  DPR:$src)>;
   def : Pat<(v8i8  (bitconvert (v1i64 DPR:$src))), (VREV64d8  DPR:$src)>;
+  def : Pat<(v8i8  (bitconvert (v2f32 DPR:$src))), (VREV32d8  DPR:$src)>;
   def : Pat<(v8i8  (bitconvert (v2i32 DPR:$src))), (VREV32d8  DPR:$src)>;
+  def : Pat<(v8i8  (bitconvert (v4f16 DPR:$src))), (VREV16d8  DPR:$src)>;
   def : Pat<(v8i8  (bitconvert (v4i16 DPR:$src))), (VREV16d8  DPR:$src)>;
-  def : Pat<(v8i8  (bitconvert (f64   DPR:$src))), (VREV64d8  DPR:$src)>;
-  def : Pat<(v8i8  (bitconvert (v2f32 DPR:$src))), (VREV32d8  DPR:$src)>;
-  def : Pat<(f64   (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
-  def : Pat<(f64   (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
-  def : Pat<(f64   (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
-  def : Pat<(f64   (bitconvert (v8i8  DPR:$src))), (VREV64d8  DPR:$src)>;
-  def : Pat<(f64   (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
-  def : Pat<(v2f32 (bitconvert (f64   DPR:$src))), (VREV64d32 DPR:$src)>;
-  def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
-  def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
-  def : Pat<(v2f32 (bitconvert (v8i8  DPR:$src))), (VREV32d8  DPR:$src)>;
 
   // 128 bit conversions
+  def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
+  def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
+  def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
+  def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
+  def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8  QPR:$src)>;
+
+  def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
   def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
+  def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
   def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
   def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8  QPR:$src)>;
-  def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
+
+  def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
+  def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
+  def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
+  def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
+  def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8  QPR:$src)>;
+
+  def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
   def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
+  def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
   def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
   def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8  QPR:$src)>;
-  def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
+
+  def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
+  def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
+  def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
+  def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
+  def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (VREV16q8  QPR:$src)>;
+
+  def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
   def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
+  def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
   def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
   def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8  QPR:$src)>;
-  def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
-  def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
-  def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
+
+  def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8  QPR:$src)>;
   def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8  QPR:$src)>;
+  def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8  QPR:$src)>;
   def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8  QPR:$src)>;
+  def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (VREV16q8  QPR:$src)>;
   def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8  QPR:$src)>;
-  def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8  QPR:$src)>;
-  def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8  QPR:$src)>;
-  def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
-  def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
-  def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
-  def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8  QPR:$src)>;
-  def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
-  def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
-  def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
-  def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
-  def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8  QPR:$src)>;
-  def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
 }
 
 // Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian
+let Predicates = [IsBE,HasNEON] in {
 def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
-          (VREV64q8 (VLD1q8 addrmode6:$addr))>, Requires<[IsBE]>;
+          (VREV64q8 (VLD1q8 addrmode6:$addr))>;
 def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
-          (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>, Requires<[IsBE]>;
+          (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>;
 def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
-          (VREV64q16 (VLD1q16 addrmode6:$addr))>, Requires<[IsBE]>;
+          (VREV64q16 (VLD1q16 addrmode6:$addr))>;
 def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
-          (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>, Requires<[IsBE]>;
+          (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>;
+}
 
 // Fold extracting an element out of a v2i32 into a vfp register.
 def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
-          (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+          (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>,
+      Requires<[HasNEON]>;
 
 // Vector lengthening move with load, matching extending loads.
 
@@ -7301,17 +7493,20 @@ multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
   def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
                     (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)),
                   (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
-                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
+                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
+             Requires<[HasNEON]>;
 
   def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
                   (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)),
                 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
-                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
+                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
+           Requires<[HasNEON]>;
 
   def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
                   (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)),
                 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
-                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
+                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
+           Requires<[HasNEON]>;
   }
 }
 
@@ -7328,17 +7523,20 @@ multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
                    (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
        (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
          (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
-         dsub_0)>;
+         dsub_0)>,
+             Requires<[HasNEON]>;
   def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
                    (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
        (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
          (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
-         dsub_0)>;
+         dsub_0)>,
+             Requires<[HasNEON]>;
   def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
                    (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
        (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
          (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
-         dsub_0)>;
+         dsub_0)>,
+             Requires<[HasNEON]>;
 }
 
 // The following class definition is basically a copy of the
@@ -7352,19 +7550,22 @@ multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, strin
        (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
          (!cast<Instruction>("VREV32d" # RevLanes)
            (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
-         dsub_0)>;
+         dsub_0)>,
+             Requires<[HasNEON]>;
   def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
                    (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
        (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
          (!cast<Instruction>("VREV32d" # RevLanes)
            (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
-         dsub_0)>;
+         dsub_0)>,
+             Requires<[HasNEON]>;
   def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
                    (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
        (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
          (!cast<Instruction>("VREV32d" # RevLanes)
            (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
-         dsub_0)>;
+         dsub_0)>,
+             Requires<[HasNEON]>;
 }
 
 // extload, zextload and sextload for a lengthening load followed by another
@@ -7386,19 +7587,22 @@ multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
          (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
            (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
              (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
-             dsub_0))>;
+             dsub_0))>,
+             Requires<[HasNEON]>;
   def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
                    (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
          (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
            (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
              (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
-             dsub_0))>;
+             dsub_0))>,
+             Requires<[HasNEON]>;
   def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
                    (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
          (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
            (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
              (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
-             dsub_0))>;
+             dsub_0))>,
+             Requires<[HasNEON]>;
 }
 
 // The following class definition is basically a copy of the
@@ -7414,21 +7618,24 @@ multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string Sr
            (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
             (!cast<Instruction>("VREV32d" # RevLanes)
              (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
-             dsub_0))>;
+             dsub_0))>,
+             Requires<[HasNEON]>;
   def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
                    (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
          (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
            (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
             (!cast<Instruction>("VREV32d" # RevLanes)
              (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
-             dsub_0))>;
+             dsub_0))>,
+             Requires<[HasNEON]>;
   def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
                    (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
          (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
            (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
             (!cast<Instruction>("VREV32d" # RevLanes)
              (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
-             dsub_0))>;
+             dsub_0))>,
+             Requires<[HasNEON]>;
 }
 
 // extload, zextload and sextload for a lengthening load followed by another
@@ -7451,21 +7658,24 @@ multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
            (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
              (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
              dsub_0)),
-          dsub_0)>;
+          dsub_0)>,
+             Requires<[HasNEON]>;
   def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
                    (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
          (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
            (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
              (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
              dsub_0)),
-          dsub_0)>;
+          dsub_0)>,
+              Requires<[HasNEON]>;
   def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
                    (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
          (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
            (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
              (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
              dsub_0)),
-          dsub_0)>;
+          dsub_0)>,
+             Requires<[HasNEON]>;
 }
 
 // The following class definition is basically a copy of the
@@ -7482,7 +7692,8 @@ multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, strin
             (!cast<Instruction>("VREV16d8")
              (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
              dsub_0)),
-          dsub_0)>;
+          dsub_0)>,
+             Requires<[HasNEON]>;
   def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
                    (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
          (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
@@ -7490,7 +7701,8 @@ multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, strin
             (!cast<Instruction>("VREV16d8")
              (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
              dsub_0)),
-          dsub_0)>;
+          dsub_0)>,
+             Requires<[HasNEON]>;
   def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
                    (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
          (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
@@ -7498,14 +7710,15 @@ multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, strin
             (!cast<Instruction>("VREV16d8")
              (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
              dsub_0)),
-          dsub_0)>;
+          dsub_0)>,
+             Requires<[HasNEON]>;
 }
 
 defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
 defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
 defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
 
-let Predicates = [IsLE] in {
+let Predicates = [HasNEON,IsLE] in {
   defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
   defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
 
@@ -7517,7 +7730,7 @@ let Predicates = [IsLE] in {
   defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
 }
 
-let Predicates = [IsBE] in {
+let Predicates = [HasNEON,IsBE] in {
   defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16
   defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32
 
@@ -7530,7 +7743,7 @@ let Predicates = [IsBE] in {
 }
 
 // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
-let Predicates = [IsLE] in {
+let Predicates = [HasNEON,IsLE] in {
   def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
         (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
            (VLD1LNd16 addrmode6:$addr,
@@ -7547,7 +7760,7 @@ let Predicates = [IsLE] in {
 // The following patterns are basically a copy of the patterns above, 
 // however with an additional VREV16d instruction to convert data
 // loaded by VLD1LN into proper vector format in big endian mode.
-let Predicates = [IsBE] in {
+let Predicates = [HasNEON,IsBE] in {
   def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
         (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
            (!cast<Instruction>("VREV16d8")
@@ -7565,6 +7778,7 @@ let Predicates = [IsBE] in {
                         (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
 }
 
+let Predicates = [HasNEON] in {
 def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)),
           (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
 def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)),
@@ -7575,6 +7789,9 @@ def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)),
           (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
 def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)),
           (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)),
+          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+}
 
 //===----------------------------------------------------------------------===//
 // Assembler aliases
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index b20b34eaa6a9..cfeb13c6acb6 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -1,9 +1,8 @@
 //===-- ARMInstrThumb.td - Thumb support for ARM -----------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -188,6 +187,19 @@ def t_addrmode_rr : MemOperand,
   let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
 }
 
+// t_addrmode_rr_sext := reg + reg
+//
+// This is similar to t_addrmode_rr, but uses different heuristics for
+// ldrsb/ldrsh.
+def t_addrmode_rr_sext : MemOperand,
+                    ComplexPattern<i32, 2, "SelectThumbAddrModeRRSext", []> {
+  let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+  let PrintMethod = "printThumbAddrModeRROperand";
+  let DecoderMethod = "DecodeThumbAddrModeRR";
+  let ParserMatchClass = t_addrmode_rr_asm_operand;
+  let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+
 // t_addrmode_rrs := reg + reg
 //
 // We use separate scaled versions because the Select* functions need
@@ -651,7 +663,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 10 in
 def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
                   "ldr", "\t$Rt, $addr",
                   [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
-              T1Encoding<{0,1,0,0,1,?}> {
+              T1Encoding<{0,1,0,0,1,?}>, Sched<[WriteLd]> {
   // A6.2 & A8.6.59
   bits<3> Rt;
   bits<8> addr;
@@ -665,7 +677,7 @@ let canFoldAsLoad = 1 in
 def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
                     "ldr", "\t$Rt, $addr",
                     [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
-              T1LdStSP<{1,?,?}> {
+              T1LdStSP<{1,?,?}>, Sched<[WriteLd]> {
   bits<3> Rt;
   bits<8> addr;
   let Inst{10-8} = Rt;
@@ -716,39 +728,39 @@ multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
 defm tLDR  : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rr,
                                 t_addrmode_is4, AddrModeT1_4,
                                 IIC_iLoad_r, IIC_iLoad_i, "ldr",
-                                load>;
+                                load>, Sched<[WriteLd]>;
 
 // A8.6.64 & A8.6.61
 defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rr,
                                 t_addrmode_is1, AddrModeT1_1,
                                 IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb",
-                                zextloadi8>;
+                                zextloadi8>, Sched<[WriteLd]>;
 
 // A8.6.76 & A8.6.73
 defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rr,
                                 t_addrmode_is2, AddrModeT1_2,
                                 IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh",
-                                zextloadi16>;
+                                zextloadi16>, Sched<[WriteLd]>;
 
 let AddedComplexity = 10 in
 def tLDRSB :                    // A8.6.80
-  T1pILdStEncode<0b011, (outs tGPR:$Rt), (ins t_addrmode_rr:$addr),
+  T1pILdStEncode<0b011, (outs tGPR:$Rt), (ins t_addrmode_rr_sext:$addr),
                  AddrModeT1_1, IIC_iLoad_bh_r,
                  "ldrsb", "\t$Rt, $addr",
-                 [(set tGPR:$Rt, (sextloadi8 t_addrmode_rr:$addr))]>;
+                 [(set tGPR:$Rt, (sextloadi8 t_addrmode_rr_sext:$addr))]>, Sched<[WriteLd]>;
 
 let AddedComplexity = 10 in
 def tLDRSH :                    // A8.6.84
-  T1pILdStEncode<0b111, (outs tGPR:$Rt), (ins t_addrmode_rr:$addr),
+  T1pILdStEncode<0b111, (outs tGPR:$Rt), (ins t_addrmode_rr_sext:$addr),
                  AddrModeT1_2, IIC_iLoad_bh_r,
                  "ldrsh", "\t$Rt, $addr",
-                 [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr:$addr))]>;
+                 [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr_sext:$addr))]>, Sched<[WriteLd]>;
 
 
 def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
                     "str", "\t$Rt, $addr",
                     [(store tGPR:$Rt, t_addrmode_sp:$addr)]>,
-              T1LdStSP<{0,?,?}> {
+              T1LdStSP<{0,?,?}>, Sched<[WriteST]> {
   bits<3> Rt;
   bits<8> addr;
   let Inst{10-8} = Rt;
@@ -759,19 +771,19 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
 defm tSTR  : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rr,
                                 t_addrmode_is4, AddrModeT1_4,
                                 IIC_iStore_r, IIC_iStore_i, "str",
-                                store>;
+                                store>, Sched<[WriteST]>;
 
 // A8.6.197 & A8.6.195
 defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rr,
                                 t_addrmode_is1, AddrModeT1_1,
                                 IIC_iStore_bh_r, IIC_iStore_bh_i, "strb",
-                                truncstorei8>;
+                                truncstorei8>, Sched<[WriteST]>;
 
 // A8.6.207 & A8.6.205
 defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rr,
                                t_addrmode_is2, AddrModeT1_2,
                                IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
-                               truncstorei16>;
+                               truncstorei16>, Sched<[WriteST]>;
 
 
 //===----------------------------------------------------------------------===//
@@ -799,8 +811,8 @@ def tLDMIA_UPD :
                  "$Rn = $wb", IIC_iLoad_mu>,
     PseudoInstExpansion<(tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)> {
   let Size = 2;
-  let OutOperandList = (outs GPR:$wb);
-  let InOperandList = (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops);
+  let OutOperandList = (outs tGPR:$wb);
+  let InOperandList = (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops);
   let Pattern = [];
   let isCodeGenOnly = 1;
   let isPseudo = 1;
@@ -809,7 +821,7 @@ def tLDMIA_UPD :
 
 // There is no non-writeback version of STM for Thumb.
 let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
-def tSTMIA_UPD : Thumb1I<(outs GPR:$wb),
+def tSTMIA_UPD : Thumb1I<(outs tGPR:$wb),
                          (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
                          AddrModeNone, 2, IIC_iStore_mu,
                          "stm${p}\t$Rn!, $regs", "$Rn = $wb", []>,
@@ -831,7 +843,7 @@ let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1,
 def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
                IIC_iPop,
                "pop${p}\t$regs", []>,
-           T1Misc<{1,1,0,?,?,?,?}> {
+           T1Misc<{1,1,0,?,?,?,?}>, Sched<[WriteLd]> {
   bits<16> regs;
   let Inst{8}   = regs{15};
   let Inst{7-0} = regs{7-0};
@@ -841,7 +853,7 @@ let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
 def tPUSH : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
                 IIC_iStore_m,
                 "push${p}\t$regs", []>,
-            T1Misc<{0,1,0,?,?,?,?}> {
+            T1Misc<{0,1,0,?,?,?,?}>, Sched<[WriteST]> {
   bits<16> regs;
   let Inst{8}   = regs{14};
   let Inst{7-0} = regs{7-0};
@@ -1202,7 +1214,7 @@ def tMUL :                      // A8.6.105 T1
   Thumb1sI<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), AddrModeNone, 2,
            IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", "$Rm = $Rd",
            [(set tGPR:$Rd, (mul tGPR:$Rn, tGPR:$Rm))]>,
-      T1DataProcessing<0b1101> {
+      T1DataProcessing<0b1101>, Sched<[WriteMUL32, ReadMUL, ReadMUL]> {
   bits<3> Rd;
   bits<3> Rn;
   let Inst{5-3} = Rn;
@@ -1499,12 +1511,13 @@ def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
 // FIXME: Non-IOS version(s)
 let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
     Defs = [ R7, LR, SP ] in
-def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
+def tInt_eh_sjlj_longjmp : XI<(outs), (ins tGPR:$src, tGPR:$scratch),
                               AddrModeNone, 0, IndexModeNone,
                               Pseudo, NoItinerary, "", "",
-                              [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+                              [(ARMeh_sjlj_longjmp tGPR:$src, tGPR:$scratch)]>,
                              Requires<[IsThumb,IsNotWindows]>;
 
+// (Windows is Thumb2-only)
 let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
     Defs = [ R11, LR, SP ] in
 def tInt_WIN_eh_sjlj_longjmp
@@ -1599,16 +1612,16 @@ def : T1Pat<(extloadi16 t_addrmode_rr:$addr),  (tLDRHr t_addrmode_rr:$addr)>;
 // and expand it just after ISel.
 let usesCustomInserter = 1, mayLoad =1,
     Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in
- def tLDR_postidx: tPseudoInst<(outs rGPR:$Rt, rGPR:$Rn_wb),
-                               (ins rGPR:$Rn, pred:$p),
+ def tLDR_postidx: tPseudoInst<(outs tGPR:$Rt, tGPR:$Rn_wb),
+                               (ins tGPR:$Rn, pred:$p),
                                4, IIC_iStore_ru,
                                []>;
 
 // post-inc STR -> STM r0!, {r1}. The layout of this (because it doesn't def
 // multiple registers) is the same in ISel as MachineInstr, so there's no need
 // for a pseudo.
-def : T1Pat<(post_store rGPR:$Rt, rGPR:$Rn, 4),
-            (tSTMIA_UPD rGPR:$Rn, rGPR:$Rt)>;
+def : T1Pat<(post_store tGPR:$Rt, tGPR:$Rn, 4),
+            (tSTMIA_UPD tGPR:$Rn, tGPR:$Rt)>;
 
 // If it's impossible to use [r,r] address mode for sextload, select to
 // ldr{b|h} + sxt{b|h} instead.
@@ -1677,9 +1690,9 @@ def : T1Pat<(i32 imm256_510:$src),
 // be expanded into two instructions late to allow if-conversion and
 // scheduling.
 let isReMaterializable = 1 in
-def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+def tLDRpci_pic : PseudoInst<(outs tGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
                              NoItinerary,
-               [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+               [(set tGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
                                            imm:$cp))]>,
                Requires<[IsThumb, IsThumb1Only]>;
 
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 7a6673b49d57..7cbfaba7a8eb 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1,9 +1,8 @@
 //===-- ARMInstrThumb2.td - Thumb2 support for ARM ---------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -26,6 +25,7 @@ def it_mask_asmoperand : AsmOperandClass { let Name = "ITMask"; }
 def it_mask : Operand<i32> {
   let PrintMethod = "printThumbITMask";
   let ParserMatchClass = it_mask_asmoperand;
+  let EncoderMethod = "getITMaskOpValue";
 }
 
 // t2_shift_imm: An integer that encodes a shift amount and the type of shift
@@ -40,6 +40,16 @@ def t2_shift_imm : Operand<i32> {
   let DecoderMethod = "DecodeT2ShifterImmOperand";
 }
 
+def mve_shift_imm : AsmOperandClass {
+  let Name = "MVELongShift";
+  let RenderMethod = "addImmOperands";
+  let DiagnosticString = "operand must be an immediate in the range [1,32]";
+}
+def long_shift : Operand<i32> {
+  let ParserMatchClass = mve_shift_imm;
+  let DecoderMethod = "DecodeLongShiftOperand";
+}
+
 // Shifted operands. No register controlled shifts for Thumb2.
 // Note: We do not support rrx shifted operands yet.
 def t2_so_reg : Operand<i32>,    // reg imm
@@ -151,6 +161,26 @@ def lo5AllOne : PatLeaf<(i32 imm), [{
 
 // Define Thumb2 specific addressing modes.
 
+// t2_addr_offset_none := reg
+def MemNoOffsetT2AsmOperand
+  : AsmOperandClass { let Name = "MemNoOffsetT2"; }
+def t2_addr_offset_none : MemOperand {
+  let PrintMethod = "printAddrMode7Operand";
+  let DecoderMethod = "DecodeGPRnopcRegisterClass";
+  let ParserMatchClass = MemNoOffsetT2AsmOperand;
+  let MIOperandInfo = (ops GPRnopc:$base);
+}
+
+// t2_nosp_addr_offset_none := reg
+def MemNoOffsetT2NoSpAsmOperand
+  : AsmOperandClass { let Name = "MemNoOffsetT2NoSp"; }
+def t2_nosp_addr_offset_none : MemOperand {
+  let PrintMethod = "printAddrMode7Operand";
+  let DecoderMethod = "DecoderGPRRegisterClass";
+  let ParserMatchClass = MemNoOffsetT2NoSpAsmOperand;
+  let MIOperandInfo = (ops rGPR:$base);
+}
+
 // t2addrmode_imm12  := reg + imm12
 def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";}
 def t2addrmode_imm12 : MemOperand,
@@ -182,31 +212,40 @@ def t2adrlabel : Operand<i32> {
 }
 
 // t2addrmode_posimm8  := reg + imm8
-def MemPosImm8OffsetAsmOperand : AsmOperandClass {let Name="MemPosImm8Offset";}
+def MemPosImm8OffsetAsmOperand : AsmOperandClass {
+  let Name="MemPosImm8Offset";
+  let RenderMethod = "addMemImmOffsetOperands";
+}
 def t2addrmode_posimm8 : MemOperand {
   let PrintMethod = "printT2AddrModeImm8Operand<false>";
-  let EncoderMethod = "getT2AddrModeImm8OpValue";
+  let EncoderMethod = "getT2AddrModeImmOpValue<8,0>";
   let DecoderMethod = "DecodeT2AddrModeImm8";
   let ParserMatchClass = MemPosImm8OffsetAsmOperand;
   let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
 }
 
 // t2addrmode_negimm8  := reg - imm8
-def MemNegImm8OffsetAsmOperand : AsmOperandClass {let Name="MemNegImm8Offset";}
+def MemNegImm8OffsetAsmOperand : AsmOperandClass {
+  let Name="MemNegImm8Offset";
+  let RenderMethod = "addMemImmOffsetOperands";
+}
 def t2addrmode_negimm8 : MemOperand,
                       ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
   let PrintMethod = "printT2AddrModeImm8Operand<false>";
-  let EncoderMethod = "getT2AddrModeImm8OpValue";
+  let EncoderMethod = "getT2AddrModeImmOpValue<8,0>";
   let DecoderMethod = "DecodeT2AddrModeImm8";
   let ParserMatchClass = MemNegImm8OffsetAsmOperand;
   let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
 }
 
 // t2addrmode_imm8  := reg +/- imm8
-def MemImm8OffsetAsmOperand : AsmOperandClass { let Name = "MemImm8Offset"; }
+def MemImm8OffsetAsmOperand : AsmOperandClass {
+  let Name = "MemImm8Offset";
+  let RenderMethod = "addMemImmOffsetOperands";
+}
 class T2AddrMode_Imm8 : MemOperand,
                         ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
-  let EncoderMethod = "getT2AddrModeImm8OpValue";
+  let EncoderMethod = "getT2AddrModeImmOpValue<8,0>";
   let DecoderMethod = "DecodeT2AddrModeImm8";
   let ParserMatchClass = MemImm8OffsetAsmOperand;
   let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
@@ -248,10 +287,38 @@ def t2addrmode_imm8s4_pre : T2AddrMode_Imm8s4 {
 def t2am_imm8s4_offset_asmoperand : AsmOperandClass { let Name = "Imm8s4"; }
 def t2am_imm8s4_offset : MemOperand {
   let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
-  let EncoderMethod = "getT2Imm8s4OpValue";
+  let EncoderMethod = "getT2ScaledImmOpValue<8,2>";
   let DecoderMethod = "DecodeT2Imm8S4";
 }
 
+// t2addrmode_imm7s4  := reg +/- (imm7 << 2)
+def MemImm7s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm7s4Offset";}
+class T2AddrMode_Imm7s4 : MemOperand {
+  let EncoderMethod = "getT2AddrModeImm7s4OpValue";
+  let DecoderMethod = "DecodeT2AddrModeImm7<2,0>";
+  let ParserMatchClass = MemImm7s4OffsetAsmOperand;
+  let MIOperandInfo = (ops GPRnopc:$base, i32imm:$offsimm);
+}
+
+def t2addrmode_imm7s4 : T2AddrMode_Imm7s4 {
+  // They are printed the same way as the imm8 version
+  let PrintMethod = "printT2AddrModeImm8s4Operand<false>";
+}
+
+def t2addrmode_imm7s4_pre : T2AddrMode_Imm7s4 {
+  // They are printed the same way as the imm8 version
+  let PrintMethod = "printT2AddrModeImm8s4Operand<true>";
+}
+
+def t2am_imm7s4_offset_asmoperand : AsmOperandClass { let Name = "Imm7s4"; }
+def t2am_imm7s4_offset : MemOperand {
+  // They are printed the same way as the imm8 version
+  let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
+  let ParserMatchClass = t2am_imm7s4_offset_asmoperand;
+  let EncoderMethod = "getT2ScaledImmOpValue<7,2>";
+  let DecoderMethod = "DecodeT2Imm7S4";
+}
+
 // t2addrmode_imm0_1020s4  := reg + (imm8 << 2)
 def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass {
   let Name = "MemImm0_1020s4Offset";
@@ -290,6 +357,75 @@ def addrmode_tbh : MemOperand {
   let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
 }
 
+// Define ARMv8.1-M specific addressing modes.
+
+// Label operands for BF/BFL/WLS/DLS/LE
+class BFLabelOp<string signed, string isNeg, string zeroPermitted, string size,
+                string fixup>
+  : Operand<OtherVT> {
+  let EncoderMethod = !strconcat("getBFTargetOpValue<", isNeg, ", ",
+                                 fixup, ">");
+  let OperandType = "OPERAND_PCREL";
+  let DecoderMethod = !strconcat("DecodeBFLabelOperand<", signed, ", ",
+                                 isNeg, ", ", zeroPermitted, ", ", size, ">");
+}
+def bflabel_u4  : BFLabelOp<"false", "false", "false", "4",  "ARM::fixup_bf_branch">;
+def bflabel_s12 : BFLabelOp<"true",  "false", "true",  "12", "ARM::fixup_bfc_target">;
+def bflabel_s16 : BFLabelOp<"true",  "false", "true",  "16", "ARM::fixup_bf_target">;
+def bflabel_s18 : BFLabelOp<"true",  "false", "true",  "18", "ARM::fixup_bfl_target">;
+
+def wlslabel_u11_asmoperand : AsmOperandClass {
+  let Name = "WLSLabel";
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isUnsignedOffset<11, 1>";
+  let DiagnosticString =
+    "loop end is out of range or not a positive multiple of 2";
+}
+def wlslabel_u11 : BFLabelOp<"false", "false", "true",  "11", "ARM::fixup_wls"> {
+  let ParserMatchClass = wlslabel_u11_asmoperand;
+}
+def lelabel_u11_asmoperand : AsmOperandClass {
+  let Name = "LELabel";
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isLEOffset";
+  let DiagnosticString =
+    "loop start is out of range or not a negative multiple of 2";
+}
+def lelabel_u11 : BFLabelOp<"false", "true",  "true",  "11", "ARM::fixup_le"> {
+  let ParserMatchClass = lelabel_u11_asmoperand;
+}
+
+def bfafter_target : Operand<OtherVT> {
+    let EncoderMethod = "getBFAfterTargetOpValue";
+    let OperandType = "OPERAND_PCREL";
+    let DecoderMethod = "DecodeBFAfterTargetOperand";
+}
+
+// pred operand excluding AL
+def pred_noal_asmoperand : AsmOperandClass {
+  let Name = "CondCodeNoAL";
+  let RenderMethod = "addITCondCodeOperands";
+  let PredicateMethod = "isITCondCodeNoAL";
+  let ParserMethod = "parseITCondCode";
+}
+def pred_noal : Operand<i32> {
+  let PrintMethod = "printMandatoryPredicateOperand";
+  let ParserMatchClass = pred_noal_asmoperand;
+  let DecoderMethod = "DecodePredNoALOperand";
+}
+
+
+// CSEL aliases inverted predicate
+def pred_noal_inv_asmoperand : AsmOperandClass {
+  let Name = "CondCodeNoALInv";
+  let RenderMethod = "addITCondCodeInvOperands";
+  let PredicateMethod = "isITCondCodeNoAL";
+  let ParserMethod = "parseITCondCode";
+}
+def pred_noal_inv : Operand<i32> {
+  let PrintMethod = "printMandatoryInvertedPredicateOperand";
+  let ParserMatchClass = pred_noal_inv_asmoperand;
+}
 //===----------------------------------------------------------------------===//
 // Multiclass helpers...
 //
@@ -604,6 +740,17 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
+     let Inst{15} = 0b0;
+     // In most of these instructions, and most versions of the Arm
+     // architecture, bit 15 of this encoding is listed as (0) rather
+     // than 0, i.e. setting it to 1 is UNPREDICTABLE or a soft-fail
+     // rather than a hard failure. In v8.1-M, this requirement is
+     // upgraded to a hard one for ORR, so that the encodings with 1
+     // in this bit can be reused for other instructions (such as
+     // CSEL). Setting Unpredictable{15} = 1 here would reintroduce
+     // that encoding clash in the auto- generated MC decoder, so I
+     // comment it out.
+     let Unpredictable{15} = !if(!eq(opcod, 0b0010), 0b0, 0b1);
      let Inst{14-12} = 0b000; // imm3
      let Inst{7-6} = 0b00; // imm2
      let Inst{5-4} = 0b00; // type
@@ -617,6 +764,8 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
+     let Inst{15} = 0;
+     let Unpredictable{15} = !if(!eq(opcod, 0b0010), 0b0, 0b1); // see above
    }
   // Assembly aliases for optional destination operand when it's the same
   // as the source operand.
@@ -880,6 +1029,7 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, SDNode opnode> {
      let Inst{31-27} = 0b11101;
      let Inst{26-21} = 0b010010;
      let Inst{19-16} = 0b1111; // Rn
+     let Inst{15}    = 0b0;
      let Inst{5-4} = opcod;
    }
    // register
@@ -923,15 +1073,15 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, SDNode opnode> {
 /// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
 /// patterns. Similar to T2I_bin_irs except the instruction does not produce
 /// a explicit result, only implicitly set CPSR.
-multiclass T2I_cmp_irs<bits<4> opcod, string opc,
+multiclass T2I_cmp_irs<bits<4> opcod, string opc, RegisterClass LHSGPR,
                      InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
                      SDPatternOperator opnode> {
 let isCompare = 1, Defs = [CPSR] in {
    // shifted imm
    def ri : T2OneRegCmpImm<
-                (outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), iii,
+                (outs), (ins LHSGPR:$Rn, t2_so_imm:$imm), iii,
                 opc, ".w\t$Rn, $imm",
-                [(opnode GPRnopc:$Rn, t2_so_imm:$imm)]>, Sched<[WriteCMP]> {
+                [(opnode LHSGPR:$Rn, t2_so_imm:$imm)]>, Sched<[WriteCMP]> {
      let Inst{31-27} = 0b11110;
      let Inst{25} = 0;
      let Inst{24-21} = opcod;
@@ -941,9 +1091,9 @@ let isCompare = 1, Defs = [CPSR] in {
    }
    // register
    def rr : T2TwoRegCmp<
-                (outs), (ins GPRnopc:$Rn, rGPR:$Rm), iir,
+                (outs), (ins LHSGPR:$Rn, rGPR:$Rm), iir,
                 opc, ".w\t$Rn, $Rm",
-                [(opnode GPRnopc:$Rn, rGPR:$Rm)]>, Sched<[WriteCMP]> {
+                [(opnode LHSGPR:$Rn, rGPR:$Rm)]>, Sched<[WriteCMP]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
@@ -955,9 +1105,9 @@ let isCompare = 1, Defs = [CPSR] in {
    }
    // shifted register
    def rs : T2OneRegCmpShiftedReg<
-                (outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), iis,
+                (outs), (ins LHSGPR:$Rn, t2_so_reg:$ShiftedRm), iis,
                 opc, ".w\t$Rn, $ShiftedRm",
-                [(opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]>,
+                [(opnode LHSGPR:$Rn, t2_so_reg:$ShiftedRm)]>,
                 Sched<[WriteCMPsi]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
@@ -971,9 +1121,9 @@ let isCompare = 1, Defs = [CPSR] in {
   // No alias here for 'rr' version as not all instantiations of this
   // multiclass want one (CMP in particular, does not).
   def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $imm"),
-     (!cast<Instruction>(NAME#"ri") GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>;
+     (!cast<Instruction>(NAME#"ri") LHSGPR:$Rn, t2_so_imm:$imm, pred:$p)>;
   def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $shift"),
-     (!cast<Instruction>(NAME#"rs") GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>;
+     (!cast<Instruction>(NAME#"rs") LHSGPR:$Rn, t2_so_reg:$shift, pred:$p)>;
 }
 
 /// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
@@ -1334,7 +1484,8 @@ def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
 def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
                           (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
                           AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
-                          "ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+                          "ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>,
+                  Sched<[WriteLd]>;
 
 def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
                             (ins t2addrmode_imm8_pre:$addr),
@@ -1872,6 +2023,7 @@ def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rm), IIC_iMOVr,
   let Inst{26-25} = 0b01;
   let Inst{24-21} = 0b0010;
   let Inst{19-16} = 0b1111; // Rn
+  let Inst{15} = 0b0;
   let Inst{14-12} = 0b000;
   let Inst{7-4} = 0b0000;
 }
@@ -2148,6 +2300,11 @@ def : T2Pat<(add        GPR:$src, imm0_4095_neg:$imm),
 def : T2Pat<(add        GPR:$src, imm0_65535_neg:$imm),
             (t2SUBrr    GPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
 
+// Do the same for v8m targets since they support movw with a 16-bit value.
+def : T1Pat<(add tGPR:$src, imm0_65535_neg:$imm),
+             (tSUBrr tGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>,
+             Requires<[HasV8MBaseline]>;
+
 let AddedComplexity = 1 in
 def : T2Pat<(ARMaddc    rGPR:$src, imm1_255_neg:$imm),
             (t2SUBSri   rGPR:$src, imm1_255_neg:$imm)>;
@@ -2327,14 +2484,14 @@ class T2SatI<dag iops, string opc, string asm>
 
 def t2SSAT: T2SatI<(ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
                    "ssat", "\t$Rd, $sat_imm, $Rn$sh">,
-                   Requires<[IsThumb2]> {
+                   Requires<[IsThumb2]>, Sched<[WriteALU]> {
   let Inst{23-22} = 0b00;
   let Inst{5}  = 0;
 }
 
 def t2SSAT16: T2SatI<(ins imm1_16:$sat_imm, rGPR:$Rn),
                      "ssat16", "\t$Rd, $sat_imm, $Rn">,
-                     Requires<[IsThumb2, HasDSP]> {
+                     Requires<[IsThumb2, HasDSP]>, Sched<[WriteALU]> {
   let Inst{23-22} = 0b00;
   let sh = 0b100000;
   let Inst{4} = 0;
@@ -2342,13 +2499,13 @@ def t2SSAT16: T2SatI<(ins imm1_16:$sat_imm, rGPR:$Rn),
 
 def t2USAT: T2SatI<(ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
                     "usat", "\t$Rd, $sat_imm, $Rn$sh">,
-                    Requires<[IsThumb2]> {
+                    Requires<[IsThumb2]>, Sched<[WriteALU]> {
   let Inst{23-22} = 0b10;
 }
 
 def t2USAT16: T2SatI<(ins imm0_15:$sat_imm, rGPR:$Rn),
                      "usat16", "\t$Rd, $sat_imm, $Rn">,
-                     Requires<[IsThumb2, HasDSP]> {
+                     Requires<[IsThumb2, HasDSP]>, Sched<[WriteALU]> {
   let Inst{23-22} = 0b10;
   let sh = 0b100000;
   let Inst{4} = 0;
@@ -2395,6 +2552,8 @@ def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
   let Inst{26-25} = 0b01;
   let Inst{24-21} = 0b0010;
   let Inst{19-16} = 0b1111; // Rn
+  let Inst{15} = 0b0;
+  let Unpredictable{15} = 0b1;
   let Inst{14-12} = 0b000;
   let Inst{7-4} = 0b0011;
 }
@@ -2472,7 +2631,7 @@ class T2TwoRegBitFI<dag oops, dag iops, InstrItinClass itin,
 let Constraints = "$src = $Rd" in
 def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm),
                 IIC_iUNAsi, "bfc", "\t$Rd, $imm",
-                [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]> {
+                [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]>, Sched<[WriteALU]> {
   let Inst{31-27} = 0b11110;
   let Inst{26} = 0; // should be 0.
   let Inst{25} = 1;
@@ -2488,7 +2647,7 @@ def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm),
 
 def t2SBFX: T2TwoRegBitFI<
                 (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb),
-                 IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []> {
+                 IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []>, Sched<[WriteALU]> {
   let Inst{31-27} = 0b11110;
   let Inst{25} = 1;
   let Inst{24-20} = 0b10100;
@@ -2497,7 +2656,7 @@ def t2SBFX: T2TwoRegBitFI<
 
 def t2UBFX: T2TwoRegBitFI<
                 (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb),
-                 IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []> {
+                 IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []>, Sched<[WriteALU]> {
   let Inst{31-27} = 0b11110;
   let Inst{25} = 1;
   let Inst{24-20} = 0b11100;
@@ -2523,7 +2682,7 @@ let Constraints = "$src = $Rd" in {
                   (ins rGPR:$src, rGPR:$Rn, bf_inv_mask_imm:$imm),
                   IIC_iBITi, "bfi", "\t$Rd, $Rn, $imm",
                   [(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn,
-                                   bf_inv_mask_imm:$imm))]> {
+                                   bf_inv_mask_imm:$imm))]>, Sched<[WriteALU]> {
     let Inst{31-27} = 0b11110;
     let Inst{26} = 0; // should be 0.
     let Inst{25} = 1;
@@ -2597,7 +2756,8 @@ def : T2Pat<(and     rGPR:$src, t2_so_imm_not:$imm),
 
 // top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
 def top16Zero: PatLeaf<(i32 rGPR:$src), [{
-  return CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
+  return !SDValue(N,0)->getValueType(0).isVector() &&
+         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
   }]>;
 
 // so_imm_notSext is needed instead of so_imm_not, as the value of imm
@@ -3054,7 +3214,7 @@ def t2CRC32CW : T2I_crc32<1, 0b10, "cw", int_arm_crc32cw>;
 //===----------------------------------------------------------------------===//
 //  Comparison Instructions...
 //
-defm t2CMP  : T2I_cmp_irs<0b1101, "cmp",
+defm t2CMP  : T2I_cmp_irs<0b1101, "cmp", GPRnopc,
                           IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi, ARMcmp>;
 
 def : T2Pat<(ARMcmpZ  GPRnopc:$lhs, t2_so_imm:$imm),
@@ -3122,10 +3282,10 @@ def : T2Pat<(ARMcmp  GPR:$src, t2_so_imm_neg:$imm),
 def : T2Pat<(ARMcmpZ GPRnopc:$src, t2_so_imm_neg:$imm),
             (t2CMNri GPRnopc:$src, t2_so_imm_neg:$imm)>;
 
-defm t2TST  : T2I_cmp_irs<0b0000, "tst",
+defm t2TST  : T2I_cmp_irs<0b0000, "tst", rGPR,
                           IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
                          BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>;
-defm t2TEQ  : T2I_cmp_irs<0b0100, "teq",
+defm t2TEQ  : T2I_cmp_irs<0b0100, "teq", rGPR,
                           IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
                          BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
 
@@ -3277,17 +3437,17 @@ def t2LDREXB : T2I_ldrex<0b0100, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
                          AddrModeNone, 4, NoItinerary,
                          "ldrexb", "\t$Rt, $addr", "",
                          [(set rGPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>,
-               Requires<[IsThumb, HasV8MBaseline]>;
+               Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteLd]>;
 def t2LDREXH : T2I_ldrex<0b0101, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
                          AddrModeNone, 4, NoItinerary,
                          "ldrexh", "\t$Rt, $addr", "",
                          [(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>,
-               Requires<[IsThumb, HasV8MBaseline]>;
+               Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteLd]>;
 def t2LDREX  : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr),
                        AddrModeT2_ldrex, 4, NoItinerary,
                        "ldrex", "\t$Rt, $addr", "",
                      [(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]>,
-               Requires<[IsThumb, HasV8MBaseline]> {
+               Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteLd]> {
   bits<4> Rt;
   bits<12> addr;
   let Inst{31-27} = 0b11101;
@@ -3303,7 +3463,7 @@ def t2LDREXD : T2I_ldrex<0b0111, (outs rGPR:$Rt, rGPR:$Rt2),
                          AddrModeNone, 4, NoItinerary,
                          "ldrexd", "\t$Rt, $Rt2, $addr", "",
                          [], {?, ?, ?, ?}>,
-               Requires<[IsThumb2, IsNotMClass]> {
+               Requires<[IsThumb2, IsNotMClass]>, Sched<[WriteLd]> {
   bits<4> Rt2;
   let Inst{11-8} = Rt2;
 }
@@ -3311,17 +3471,17 @@ def t2LDAEXB : T2I_ldrex<0b1100, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
                          AddrModeNone, 4, NoItinerary,
                          "ldaexb", "\t$Rt, $addr", "",
                          [(set rGPR:$Rt, (ldaex_1 addr_offset_none:$addr))]>,
-               Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>;
+               Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, Sched<[WriteLd]>;
 def t2LDAEXH : T2I_ldrex<0b1101, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
                          AddrModeNone, 4, NoItinerary,
                          "ldaexh", "\t$Rt, $addr", "",
                          [(set rGPR:$Rt, (ldaex_2 addr_offset_none:$addr))]>,
-               Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>;
+               Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, Sched<[WriteLd]>;
 def t2LDAEX  : Thumb2I<(outs rGPR:$Rt), (ins addr_offset_none:$addr),
                        AddrModeNone, 4, NoItinerary,
                        "ldaex", "\t$Rt, $addr", "",
                          [(set rGPR:$Rt, (ldaex_4 addr_offset_none:$addr))]>,
-               Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]> {
+               Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, Sched<[WriteLd]> {
   bits<4> Rt;
   bits<4> addr;
   let Inst{31-27} = 0b11101;
@@ -3337,7 +3497,7 @@ def t2LDAEXD : T2I_ldrex<0b1111, (outs rGPR:$Rt, rGPR:$Rt2),
                          AddrModeNone, 4, NoItinerary,
                          "ldaexd", "\t$Rt, $Rt2, $addr", "",
                          [], {?, ?, ?, ?}>, Requires<[IsThumb,
-                         HasAcquireRelease, HasV7Clrex, IsNotMClass]> {
+                         HasAcquireRelease, HasV7Clrex, IsNotMClass]>, Sched<[WriteLd]> {
   bits<4> Rt2;
   let Inst{11-8} = Rt2;
 
@@ -3352,14 +3512,14 @@ def t2STREXB : T2I_strex<0b0100, (outs rGPR:$Rd),
                          "strexb", "\t$Rd, $Rt, $addr", "",
                          [(set rGPR:$Rd,
                                (strex_1 rGPR:$Rt, addr_offset_none:$addr))]>,
-               Requires<[IsThumb, HasV8MBaseline]>;
+               Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteST]>;
 def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd),
                          (ins rGPR:$Rt, addr_offset_none:$addr),
                          AddrModeNone, 4, NoItinerary,
                          "strexh", "\t$Rd, $Rt, $addr", "",
                          [(set rGPR:$Rd,
                                (strex_2 rGPR:$Rt, addr_offset_none:$addr))]>,
-               Requires<[IsThumb, HasV8MBaseline]>;
+               Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteST]>;
 
 def t2STREX  : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
                              t2addrmode_imm0_1020s4:$addr),
@@ -3367,7 +3527,7 @@ def t2STREX  : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
                   "strex", "\t$Rd, $Rt, $addr", "",
                   [(set rGPR:$Rd,
                         (strex_4 rGPR:$Rt, t2addrmode_imm0_1020s4:$addr))]>,
-               Requires<[IsThumb, HasV8MBaseline]> {
+               Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteST]> {
   bits<4> Rd;
   bits<4> Rt;
   bits<12> addr;
@@ -3384,7 +3544,7 @@ def t2STREXD : T2I_strex<0b0111, (outs rGPR:$Rd),
                          AddrModeNone, 4, NoItinerary,
                          "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
                          {?, ?, ?, ?}>,
-               Requires<[IsThumb2, IsNotMClass]> {
+               Requires<[IsThumb2, IsNotMClass]>, Sched<[WriteST]> {
   bits<4> Rt2;
   let Inst{11-8} = Rt2;
 }
@@ -3395,7 +3555,7 @@ def t2STLEXB : T2I_strex<0b1100, (outs rGPR:$Rd),
                          [(set rGPR:$Rd,
                                (stlex_1 rGPR:$Rt, addr_offset_none:$addr))]>,
                          Requires<[IsThumb, HasAcquireRelease,
-                                   HasV7Clrex]>;
+                                   HasV7Clrex]>, Sched<[WriteST]>;
 
 def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd),
                          (ins rGPR:$Rt, addr_offset_none:$addr),
@@ -3404,7 +3564,7 @@ def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd),
                          [(set rGPR:$Rd,
                                (stlex_2 rGPR:$Rt, addr_offset_none:$addr))]>,
                          Requires<[IsThumb, HasAcquireRelease,
-                                   HasV7Clrex]>;
+                                   HasV7Clrex]>, Sched<[WriteST]>;
 
 def t2STLEX  : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
                              addr_offset_none:$addr),
@@ -3412,7 +3572,8 @@ def t2STLEX  : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
                   "stlex", "\t$Rd, $Rt, $addr", "",
                   [(set rGPR:$Rd,
                         (stlex_4 rGPR:$Rt, addr_offset_none:$addr))]>,
-                  Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]> {
+                  Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>,
+                  Sched<[WriteST]> {
   bits<4> Rd;
   bits<4> Rt;
   bits<4> addr;
@@ -3429,7 +3590,7 @@ def t2STLEXD : T2I_strex<0b1111, (outs rGPR:$Rd),
                          AddrModeNone, 4, NoItinerary,
                          "stlexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
                          {?, ?, ?, ?}>, Requires<[IsThumb, HasAcquireRelease,
-                         HasV7Clrex, IsNotMClass]> {
+                         HasV7Clrex, IsNotMClass]>, Sched<[WriteST]> {
   bits<4> Rt2;
   let Inst{11-8} = Rt2;
 }
@@ -4547,9 +4708,9 @@ def : t2InstAlias<"sub${s}${p} $Rdn, $ShiftedRm",
 def : t2InstAlias<"cmn${p} $Rn, $Rm",
                   (t2CMNzrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
 def : t2InstAlias<"teq${p} $Rn, $Rm",
-                  (t2TEQrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
+                  (t2TEQrr rGPR:$Rn, rGPR:$Rm, pred:$p)>;
 def : t2InstAlias<"tst${p} $Rn, $Rm",
-                  (t2TSTrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
+                  (t2TSTrr rGPR:$Rn, rGPR:$Rm, pred:$p)>;
 
 // Memory barriers
 def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p), 0>, Requires<[HasDB]>;
@@ -4888,3 +5049,227 @@ def : t2InstAlias<"pld${p} $addr",
 def : InstAlias<"pli${p} $addr",
                  (t2PLIpci  t2ldr_pcrel_imm12:$addr, pred:$p), 0>,
       Requires<[IsThumb2,HasV7]>;
+
+
+//===----------------------------------------------------------------------===//
+// ARMv8.1m instructions
+//
+
+class V8_1MI<dag oops, dag iops, AddrMode am, InstrItinClass itin, string asm,
+             string ops, string cstr, list<dag> pattern>
+  : Thumb2XI<oops, iops, am, 4, itin, !strconcat(asm, "\t", ops), cstr,
+             pattern>,
+    Requires<[HasV8_1MMainline]>;
+
+def t2CLRM : V8_1MI<(outs),
+                    (ins pred:$p, reglist_with_apsr:$regs, variable_ops),
+                    AddrModeNone, NoItinerary, "clrm", "${p}\t$regs", "", []> {
+  bits<16> regs;
+
+  let Inst{31-16} = 0b1110100010011111;
+  let Inst{15-14} = regs{15-14};
+  let Inst{13} = 0b0;
+  let Inst{12-0} = regs{12-0};
+}
+
+class t2BF<dag iops, string asm, string ops>
+  : V8_1MI<(outs ), iops, AddrModeNone, NoItinerary, asm, ops, "", []> {
+
+  let Inst{31-27} = 0b11110;
+  let Inst{15-14} = 0b11;
+  let Inst{12} = 0b0;
+  let Inst{0} = 0b1;
+
+  let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB];
+}
+
+def t2BF_LabelPseudo
+  : t2PseudoInst<(outs ), (ins pclabel:$cp), 0, NoItinerary, []> {
+  let isTerminator = 1;
+  let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB];
+}
+
+def t2BFi : t2BF<(ins bflabel_u4:$b_label, bflabel_s16:$label, pred:$p),
+                 !strconcat("bf", "${p}"), "$b_label, $label"> {
+  bits<4> b_label;
+  bits<16> label;
+
+  let Inst{26-23} = b_label{3-0};
+  let Inst{22-21} = 0b10;
+  let Inst{20-16} = label{15-11};
+  let Inst{13} = 0b1;
+  let Inst{11} = label{0};
+  let Inst{10-1} = label{10-1};
+}
+
+def t2BFic : t2BF<(ins bflabel_u4:$b_label, bflabel_s12:$label,
+                   bfafter_target:$ba_label, pred_noal:$bcond), "bfcsel",
+                  "$b_label, $label, $ba_label, $bcond"> {
+  bits<4> bcond;
+  bits<12> label;
+  bits<1> ba_label;
+  bits<4> b_label;
+
+  let Inst{26-23} = b_label{3-0};
+  let Inst{22} = 0b0;
+  let Inst{21-18} = bcond{3-0};
+  let Inst{17} = ba_label{0};
+  let Inst{16} = label{11};
+  let Inst{13} = 0b1;
+  let Inst{11} = label{0};
+  let Inst{10-1} = label{10-1};
+}
+
+def t2BFr : t2BF<(ins bflabel_u4:$b_label, rGPR:$Rn, pred:$p),
+                 !strconcat("bfx", "${p}"), "$b_label, $Rn"> {
+  bits<4> b_label;
+  bits<4> Rn;
+
+  let Inst{26-23} = b_label{3-0};
+  let Inst{22-20} = 0b110;
+  let Inst{19-16} = Rn{3-0};
+  let Inst{13-1} = 0b1000000000000;
+}
+
+def t2BFLi : t2BF<(ins bflabel_u4:$b_label, bflabel_s18:$label, pred:$p),
+                  !strconcat("bfl", "${p}"), "$b_label, $label"> {
+  bits<4> b_label;
+  bits<18> label;
+
+  let Inst{26-23} = b_label{3-0};
+  let Inst{22-16} = label{17-11};
+  let Inst{13} = 0b0;
+  let Inst{11} = label{0};
+  let Inst{10-1} = label{10-1};
+}
+
+def t2BFLr : t2BF<(ins bflabel_u4:$b_label, rGPR:$Rn, pred:$p),
+                  !strconcat("bflx", "${p}"), "$b_label, $Rn"> {
+  bits<4> b_label;
+  bits<4> Rn;
+
+  let Inst{26-23} = b_label{3-0};
+  let Inst{22-20} = 0b111;
+  let Inst{19-16} = Rn{3-0};
+  let Inst{13-1} = 0b1000000000000;
+}
+
+class t2LOL<dag oops, dag iops, string asm, string ops>
+  : V8_1MI<oops, iops, AddrModeNone, NoItinerary, asm, ops, "", [] > {
+  let Inst{31-23} = 0b111100000;
+  let Inst{15-14} = 0b11;
+  let Inst{0} = 0b1;
+  let isBranch = 1;
+  let isTerminator = 1;
+  let DecoderMethod = "DecodeLOLoop";
+  let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB];
+}
+
+let isNotDuplicable = 1 in {
+def t2WLS : t2LOL<(outs GPRlr:$LR),
+                  (ins rGPR:$Rn, wlslabel_u11:$label),
+                  "wls", "$LR, $Rn, $label"> {
+  bits<4> Rn;
+  bits<11> label;
+  let Inst{22-20} = 0b100;
+  let Inst{19-16} = Rn{3-0};
+  let Inst{13-12} = 0b00;
+  let Inst{11} = label{0};
+  let Inst{10-1} = label{10-1};
+  let usesCustomInserter = 1;
+}
+
+def t2DLS : t2LOL<(outs GPRlr:$LR), (ins rGPR:$Rn),
+                  "dls", "$LR, $Rn"> {
+  bits<4> Rn;
+  let isBranch = 0;
+  let isTerminator = 0;
+  let Inst{22-20} = 0b100;
+  let Inst{19-16} = Rn{3-0};
+  let Inst{13-1} = 0b1000000000000;
+  let usesCustomInserter = 1;
+}
+
+def t2LEUpdate : t2LOL<(outs GPRlr:$LRout),
+                       (ins GPRlr:$LRin, lelabel_u11:$label),
+                       "le", "$LRin, $label"> {
+  bits<11> label;
+  let Inst{22-16} = 0b0001111;
+  let Inst{13-12} = 0b00;
+  let Inst{11} = label{0};
+  let Inst{10-1} = label{10-1};
+  let usesCustomInserter = 1;
+}
+
+def t2LE : t2LOL<(outs ), (ins lelabel_u11:$label), "le", "$label"> {
+  bits<11> label;
+  let Inst{22-16} = 0b0101111;
+  let Inst{13-12} = 0b00;
+  let Inst{11} = label{0};
+  let Inst{10-1} = label{10-1};
+}
+
+def t2DoLoopStart :
+  t2PseudoInst<(outs), (ins rGPR:$elts), 4, IIC_Br,
+  [(int_set_loop_iterations rGPR:$elts)]>, Sched<[WriteBr]>;
+
+def t2LoopDec :
+  t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size),
+               4, IIC_Br, []>, Sched<[WriteBr]>;
+
+let isBranch = 1, isTerminator = 1, hasSideEffects = 1 in {
+def t2WhileLoopStart :
+    t2PseudoInst<(outs),
+                 (ins rGPR:$elts, brtarget:$target),
+                 4, IIC_Br, []>,
+                 Sched<[WriteBr]>;
+
+def t2LoopEnd :
+  t2PseudoInst<(outs), (ins GPRlr:$elts, brtarget:$target),
+  8, IIC_Br, []>, Sched<[WriteBr]>;
+
+} // end isBranch, isTerminator, hasSideEffects
+
+} // end isNotDuplicable
+
+class CS<string iname, bits<4> opcode, list<dag> pattern=[]>
+  : V8_1MI<(outs rGPR:$Rd), (ins GPRwithZR:$Rn, GPRwithZRnosp:$Rm, pred_noal:$fcond),
+           AddrModeNone, NoItinerary, iname, "$Rd, $Rn, $Rm, $fcond", "", pattern> {
+  bits<4> Rd;
+  bits<4> Rm;
+  bits<4> Rn;
+  bits<4> fcond;
+
+  let Inst{31-20} = 0b111010100101;
+  let Inst{19-16} = Rn{3-0};
+  let Inst{15-12} = opcode;
+  let Inst{11-8} = Rd{3-0};
+  let Inst{7-4} = fcond{3-0};
+  let Inst{3-0} = Rm{3-0};
+
+  let Uses = [CPSR];
+}
+
+def t2CSEL  : CS<"csel",  0b1000>;
+def t2CSINC : CS<"csinc", 0b1001>;
+def t2CSINV : CS<"csinv", 0b1010>;
+def t2CSNEG : CS<"csneg", 0b1011>;
+
+
+// CS aliases.
+let Predicates = [HasV8_1MMainline] in {
+  def : InstAlias<"csetm\t$Rd, $fcond",
+                 (t2CSINV rGPR:$Rd, ZR, ZR, pred_noal_inv:$fcond)>;
+
+  def : InstAlias<"cset\t$Rd, $fcond",
+                 (t2CSINC rGPR:$Rd, ZR, ZR, pred_noal_inv:$fcond)>;
+
+  def : InstAlias<"cinc\t$Rd, $Rn, $fcond",
+                 (t2CSINC rGPR:$Rd, GPRwithZRnosp:$Rn, GPRwithZRnosp:$Rn, pred_noal_inv:$fcond)>;
+
+  def : InstAlias<"cinv\t$Rd, $Rn, $fcond",
+                 (t2CSINV rGPR:$Rd, GPRwithZRnosp:$Rn, GPRwithZRnosp:$Rn, pred_noal_inv:$fcond)>;
+
+  def : InstAlias<"cneg\t$Rd, $Rn, $fcond",
+                 (t2CSNEG rGPR:$Rd, GPRwithZRnosp:$Rn, GPRwithZRnosp:$Rn, pred_noal_inv:$fcond)>;
+}
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index b58730c452f7..a0dd25de07ee 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -1,9 +1,8 @@
 //===-- ARMInstrVFP.td - VFP support for ARM ---------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -53,28 +52,50 @@ def vfp_f16imm : Operand<f16>,
   let ParserMatchClass = FPImmOperand;
 }
 
-def vfp_f32imm : Operand<f32>,
-                 PatLeaf<(f32 fpimm), [{
-      return ARM_AM::getFP32Imm(N->getValueAPF()) != -1;
-    }], SDNodeXForm<fpimm, [{
+def vfp_f32imm_xform : SDNodeXForm<fpimm, [{
       APFloat InVal = N->getValueAPF();
       uint32_t enc = ARM_AM::getFP32Imm(InVal);
       return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
-    }]>> {
+    }]>;
+
+def gi_vfp_f32imm : GICustomOperandRenderer<"renderVFPF32Imm">,
+                    GISDNodeXFormEquiv<vfp_f32imm_xform>;
+
+def vfp_f32imm : Operand<f32>,
+                 PatLeaf<(f32 fpimm), [{
+      return ARM_AM::getFP32Imm(N->getValueAPF()) != -1;
+    }], vfp_f32imm_xform> {
   let PrintMethod = "printFPImmOperand";
   let ParserMatchClass = FPImmOperand;
+  let GISelPredicateCode = [{
+      const auto &MO = MI.getOperand(1);
+      if (!MO.isFPImm())
+        return false;
+      return ARM_AM::getFP32Imm(MO.getFPImm()->getValueAPF()) != -1;
+    }];
 }
 
-def vfp_f64imm : Operand<f64>,
-                 PatLeaf<(f64 fpimm), [{
-      return ARM_AM::getFP64Imm(N->getValueAPF()) != -1;
-    }], SDNodeXForm<fpimm, [{
+def vfp_f64imm_xform : SDNodeXForm<fpimm, [{
       APFloat InVal = N->getValueAPF();
       uint32_t enc = ARM_AM::getFP64Imm(InVal);
       return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
-    }]>> {
+    }]>;
+
+def gi_vfp_f64imm : GICustomOperandRenderer<"renderVFPF64Imm">,
+                    GISDNodeXFormEquiv<vfp_f64imm_xform>;
+
+def vfp_f64imm : Operand<f64>,
+                 PatLeaf<(f64 fpimm), [{
+      return ARM_AM::getFP64Imm(N->getValueAPF()) != -1;
+    }], vfp_f64imm_xform> {
   let PrintMethod = "printFPImmOperand";
   let ParserMatchClass = FPImmOperand;
+  let GISelPredicateCode = [{
+      const auto &MO = MI.getOperand(1);
+      if (!MO.isFPImm())
+        return false;
+      return ARM_AM::getFP64Imm(MO.getFPImm()->getValueAPF()) != -1;
+    }];
 }
 
 def alignedload16 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
@@ -120,39 +141,45 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
 
 def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
                  IIC_fpLoad64, "vldr", "\t$Dd, $addr",
-                 [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>;
+                 [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>,
+            Requires<[HasFPRegs]>;
 
 def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
                  IIC_fpLoad32, "vldr", "\t$Sd, $addr",
-                 [(set SPR:$Sd, (alignedload32 addrmode5:$addr))]> {
+                 [(set SPR:$Sd, (alignedload32 addrmode5:$addr))]>,
+            Requires<[HasFPRegs]> {
   // Some single precision VFP instructions may be executed on both NEON and VFP
   // pipelines.
   let D = VFPNeonDomain;
 }
 
+let isUnpredicable = 1 in
 def VLDRH : AHI5<0b1101, 0b01, (outs HPR:$Sd), (ins addrmode5fp16:$addr),
                  IIC_fpLoad16, "vldr", ".16\t$Sd, $addr",
                  [(set HPR:$Sd, (alignedload16 addrmode5fp16:$addr))]>,
-            Requires<[HasFullFP16]>;
+            Requires<[HasFPRegs16]>;
 
 } // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
 
 def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
                  IIC_fpStore64, "vstr", "\t$Dd, $addr",
-                 [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>;
+                 [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>,
+            Requires<[HasFPRegs]>;
 
 def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
                  IIC_fpStore32, "vstr", "\t$Sd, $addr",
-                 [(alignedstore32 SPR:$Sd, addrmode5:$addr)]> {
+                 [(alignedstore32 SPR:$Sd, addrmode5:$addr)]>,
+            Requires<[HasFPRegs]> {
   // Some single precision VFP instructions may be executed on both NEON and VFP
   // pipelines.
   let D = VFPNeonDomain;
 }
 
+let isUnpredicable = 1 in
 def VSTRH : AHI5<0b1101, 0b00, (outs), (ins HPR:$Sd, addrmode5fp16:$addr),
                  IIC_fpStore16, "vstr", ".16\t$Sd, $addr",
                  [(alignedstore16 HPR:$Sd, addrmode5fp16:$addr)]>,
-            Requires<[HasFullFP16]>;
+            Requires<[HasFPRegs16]>;
 
 //===----------------------------------------------------------------------===//
 //  Load / store multiple Instructions.
@@ -160,6 +187,7 @@ def VSTRH : AHI5<0b1101, 0b00, (outs), (ins HPR:$Sd, addrmode5fp16:$addr),
 
 multiclass vfp_ldst_mult<string asm, bit L_bit,
                          InstrItinClass itin, InstrItinClass itin_upd> {
+  let Predicates = [HasFPRegs] in {
   // Double Precision
   def DIA :
     AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
@@ -227,6 +255,7 @@ multiclass vfp_ldst_mult<string asm, bit L_bit,
     // VFP pipelines.
     let D = VFPNeonDomain;
   }
+  }
 }
 
 let hasSideEffects = 0 in {
@@ -273,13 +302,13 @@ def VLSTM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
 }
 
 def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r), 0>,
-                Requires<[HasVFP2]>;
+                Requires<[HasFPRegs]>;
 def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r), 0>,
-                Requires<[HasVFP2]>;
+                Requires<[HasFPRegs]>;
 def : InstAlias<"vpop${p} $r",  (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r), 0>,
-                Requires<[HasVFP2]>;
+                Requires<[HasFPRegs]>;
 def : InstAlias<"vpop${p} $r",  (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r), 0>,
-                Requires<[HasVFP2]>;
+                Requires<[HasFPRegs]>;
 defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
                          (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>;
 defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
@@ -295,6 +324,7 @@ defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
 // However, there is no UAL syntax for them, so we keep them around for
 // (dis)assembly only.
 multiclass vfp_ldstx_mult<string asm, bit L_bit> {
+  let Predicates = [HasFPRegs] in {
   // Unknown precision
   def XIA :
     AXXI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
@@ -317,6 +347,7 @@ multiclass vfp_ldstx_mult<string asm, bit L_bit> {
     let Inst{21}    = 1;            // Writeback
     let Inst{20}    = L_bit;
   }
+  }
 }
 
 defm FLDM : vfp_ldstx_mult<"fldm", 1>;
@@ -452,7 +483,7 @@ def VNMULH : AHbI<0b11100, 0b10, 1, 0,
 
 multiclass vsel_inst<string op, bits<2> opc, int CC> {
   let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
-      Uses = [CPSR], AddedComplexity = 4 in {
+      Uses = [CPSR], AddedComplexity = 4, isUnpredicable = 1 in {
     def H : AHbInp<0b11100, opc, 0,
                    (outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
                    NoItinerary, !strconcat("vsel", op, ".f16\t$Sd, $Sn, $Sm"),
@@ -480,7 +511,8 @@ defm VSELEQ : vsel_inst<"eq", 0b00, 0>;
 defm VSELVS : vsel_inst<"vs", 0b01, 6>;
 
 multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
-  let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in {
+  let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
+      isUnpredicable = 1 in {
     def H : AHbInp<0b11101, 0b00, opc,
                    (outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
                    NoItinerary, !strconcat(op, ".f16\t$Sd, $Sn, $Sm"),
@@ -501,8 +533,8 @@ multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
   }
 }
 
-defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, fmaxnum>;
-defm VMINNM : vmaxmin_inst<"vminnm", 1, fminnum>;
+defm VFP_VMAXNM : vmaxmin_inst<"vmaxnm", 0, fmaxnum>;
+defm VFP_VMINNM : vmaxmin_inst<"vminnm", 1, fminnum>;
 
 // Match reassociated forms only if not sign dependent rounding.
 def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
@@ -571,9 +603,9 @@ def VABSS  : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
 }
 
 def VABSH  : AHuI<0b11101, 0b11, 0b0000, 0b11, 0,
-                   (outs SPR:$Sd), (ins SPR:$Sm),
+                   (outs HPR:$Sd), (ins HPR:$Sm),
                    IIC_fpUNA16, "vabs", ".f16\t$Sd, $Sm",
-                   []>;
+                   [(set HPR:$Sd, (fabs (f16 HPR:$Sm)))]>;
 
 let Defs = [FPSCR_NZCV] in {
 def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
@@ -682,8 +714,8 @@ def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  Requires<[HasFP16]>,
              Sched<[WriteFPCVT]>;
 
-def : FullFP16Pat<(f32 (fpextend HPR:$Sm)),
-                  (VCVTBHS (COPY_TO_REGCLASS HPR:$Sm, SPR))>;
+def : FP16Pat<(f32 (fpextend HPR:$Sm)),
+              (VCVTBHS (COPY_TO_REGCLASS HPR:$Sm, SPR))>;
 def : FP16Pat<(f16_to_fp GPR:$a),
               (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
 
@@ -693,8 +725,8 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  Requires<[HasFP16]>,
              Sched<[WriteFPCVT]>;
 
-def : FullFP16Pat<(f16 (fpround SPR:$Sm)),
-                  (COPY_TO_REGCLASS (VCVTBSH SPR:$Sm), HPR)>;
+def : FP16Pat<(f16 (fpround SPR:$Sm)),
+              (COPY_TO_REGCLASS (VCVTBSH SPR:$Sm), HPR)>;
 def : FP16Pat<(fp_to_f16 SPR:$a),
               (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
 
@@ -825,7 +857,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
 
       let Inst{17-16} = rm;
 
-      // Encode instruction operands
+      // Encode instruction operands.
       let Inst{3-0} = Dm{3-0};
       let Inst{5}   = Dm{4};
       let Inst{8} = 1;
@@ -906,9 +938,9 @@ def VNEGH  : AHuI<0b11101, 0b11, 0b0001, 0b01, 0,
 
 multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> {
   def H : AHuI<0b11101, 0b11, 0b0110, 0b11, 0,
-               (outs SPR:$Sd), (ins SPR:$Sm),
+               (outs HPR:$Sd), (ins HPR:$Sm),
                NoItinerary, !strconcat("vrint", opc), ".f16\t$Sd, $Sm",
-               []>,
+               [(set (f16 HPR:$Sd), (node (f16 HPR:$Sm)))]>,
                Requires<[HasFullFP16]> {
     let Inst{7} = op2;
     let Inst{16} = op;
@@ -948,11 +980,12 @@ defm VRINTX : vrint_inst_zrx<"x", 1, 0, frint>;
 
 multiclass vrint_inst_anpm<string opc, bits<2> rm,
                            SDPatternOperator node = null_frag> {
-  let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
+  let PostEncoderMethod = "", DecoderNamespace = "VFPV8",
+      isUnpredicable = 1 in {
     def H : AHuInp<0b11101, 0b11, 0b1000, 0b01, 0,
-                   (outs SPR:$Sd), (ins SPR:$Sm),
+                   (outs HPR:$Sd), (ins HPR:$Sm),
                    NoItinerary, !strconcat("vrint", opc, ".f16\t$Sd, $Sm"),
-                   []>,
+                   [(set (f16 HPR:$Sd), (node (f16 HPR:$Sm)))]>,
                    Requires<[HasFullFP16]> {
       let Inst{17-16} = rm;
     }
@@ -998,22 +1031,24 @@ def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
              Sched<[WriteFPSQRT32]>;
 
 def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0,
-                  (outs SPR:$Sd), (ins SPR:$Sm),
+                  (outs HPR:$Sd), (ins HPR:$Sm),
                   IIC_fpSQRT16, "vsqrt", ".f16\t$Sd, $Sm",
-                  []>;
+                  [(set HPR:$Sd, (fsqrt (f16 HPR:$Sm)))]>;
 
 let hasSideEffects = 0 in {
 let isMoveReg = 1 in {
 def VMOVD  : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
                   (outs DPR:$Dd), (ins DPR:$Dm),
-                  IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>;
+                  IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>,
+             Requires<[HasFPRegs64]>;
 
 def VMOVS  : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
                   (outs SPR:$Sd), (ins SPR:$Sm),
-                  IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>;
+                  IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>,
+             Requires<[HasFPRegs]>;
 } // isMoveReg
 
-let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
+let PostEncoderMethod = "", DecoderNamespace = "VFPV8", isUnpredicable = 1 in {
 def VMOVH  : ASuInp<0b11101, 0b11, 0b0000, 0b01, 0,
                   (outs SPR:$Sd), (ins SPR:$Sm),
                   IIC_fpUNA16, "vmovx.f16\t$Sd, $Sm", []>,
@@ -1035,6 +1070,7 @@ def VMOVRS : AVConv2I<0b11100001, 0b1010,
                       (outs GPR:$Rt), (ins SPR:$Sn),
                       IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
                       [(set GPR:$Rt, (bitconvert SPR:$Sn))]>,
+             Requires<[HasFPRegs]>,
              Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<4> Rt;
@@ -1058,7 +1094,7 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010,
                       (outs SPR:$Sn), (ins GPR:$Rt),
                       IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
                       [(set SPR:$Sn, (bitconvert GPR:$Rt))]>,
-             Requires<[HasVFP2, UseVMOVSR]>,
+             Requires<[HasFPRegs, UseVMOVSR]>,
              Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<5> Sn;
@@ -1084,6 +1120,7 @@ def VMOVRRD  : AVConv3I<0b11000101, 0b1011,
                         (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
                         IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
                  [(set GPR:$Rt, GPR:$Rt2, (arm_fmrrd DPR:$Dm))]>,
+               Requires<[HasFPRegs]>,
                Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<5> Dm;
@@ -1112,6 +1149,7 @@ def VMOVRRS  : AVConv3I<0b11000101, 0b1010,
                       (outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2),
                  IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2",
                  [/* For disassembly only; pattern left blank */]>,
+               Requires<[HasFPRegs]>,
                Sched<[WriteFPMOV]> {
   bits<5> src1;
   bits<4> Rt;
@@ -1139,6 +1177,7 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
                       (outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
                       IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
                       [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]>,
+              Requires<[HasFPRegs]>,
               Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<5> Dm;
@@ -1183,6 +1222,7 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010,
                      (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
                 IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
                 [/* For disassembly only; pattern left blank */]>,
+              Requires<[HasFPRegs]>,
               Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<5> dst1;
@@ -1206,10 +1246,10 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010,
 
 // Move H->R, clearing top 16 bits
 def VMOVRH : AVConv2I<0b11100001, 0b1001,
-                      (outs GPR:$Rt), (ins HPR:$Sn),
+                      (outs rGPR:$Rt), (ins HPR:$Sn),
                       IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn",
-                      [(set GPR:$Rt, (arm_vmovrh HPR:$Sn))]>,
-             Requires<[HasFullFP16]>,
+                      [(set rGPR:$Rt, (arm_vmovrh HPR:$Sn))]>,
+             Requires<[HasFPRegs16]>,
              Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<4> Rt;
@@ -1222,14 +1262,16 @@ def VMOVRH : AVConv2I<0b11100001, 0b1001,
 
   let Inst{6-5}   = 0b00;
   let Inst{3-0}   = 0b0000;
+
+  let isUnpredicable = 1;
 }
 
 // Move R->H, clearing top 16 bits
 def VMOVHR : AVConv4I<0b11100000, 0b1001,
-                      (outs HPR:$Sn), (ins GPR:$Rt),
+                      (outs HPR:$Sn), (ins rGPR:$Rt),
                       IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt",
-                      [(set HPR:$Sn, (arm_vmovhr GPR:$Rt))]>,
-             Requires<[HasFullFP16]>,
+                      [(set HPR:$Sn, (arm_vmovhr rGPR:$Rt))]>,
+             Requires<[HasFPRegs16]>,
              Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<5> Sn;
@@ -1242,6 +1284,8 @@ def VMOVHR : AVConv4I<0b11100000, 0b1001,
 
   let Inst{6-5}   = 0b00;
   let Inst{3-0}   = 0b0000;
+
+  let isUnpredicable = 1;
 }
 
 // FMRDH: SPR -> GPR
@@ -1348,6 +1392,7 @@ def VSITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
                                []>,
              Sched<[WriteFPCVT]> {
   let Inst{7} = 1; // s32
+  let isUnpredicable = 1;
 }
 
 def : VFPNoNEONPat<(f16 (sint_to_fp GPR:$a)),
@@ -1393,6 +1438,7 @@ def VUITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
                                 []>,
              Sched<[WriteFPCVT]> {
   let Inst{7} = 0; // u32
+  let isUnpredicable = 1;
 }
 
 def : VFPNoNEONPat<(f16 (uint_to_fp GPR:$a)),
@@ -1497,6 +1543,7 @@ def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
                                  []>,
               Sched<[WriteFPCVT]> {
   let Inst{7} = 1; // Z bit
+  let isUnpredicable = 1;
 }
 
 def : VFPNoNEONPat<(i32 (fp_to_sint HPR:$a)),
@@ -1543,6 +1590,7 @@ def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
                                  []>,
               Sched<[WriteFPCVT]> {
   let Inst{7} = 1; // Z bit
+  let isUnpredicable = 1;
 }
 
 def : VFPNoNEONPat<(i32 (fp_to_uint HPR:$a)),
@@ -1572,6 +1620,7 @@ def VTOSIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
                                  []>,
               Sched<[WriteFPCVT]> {
   let Inst{7} = 0; // Z bit
+  let isUnpredicable = 1;
 }
 
 def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
@@ -1596,6 +1645,7 @@ def VTOUIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
                                  []>,
               Sched<[WriteFPCVT]> {
   let Inst{7} = 0; // Z bit
+  let isUnpredicable = 1;
 }
 }
 
@@ -1643,6 +1693,8 @@ class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
   let Predicates = [HasVFP2, HasDPVFP];
 }
 
+let isUnpredicable = 1 in {
+
 def VTOSHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 0,
                        (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
                  IIC_fpCVTHI, "vcvt", ".s16.f16\t$dst, $a, $fbits", []>,
@@ -1667,6 +1719,8 @@ def VTOULH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 1,
              Requires<[HasFullFP16]>,
              Sched<[WriteFPCVT]>;
 
+} // End of 'let isUnpredicable = 1 in'
+
 def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
                        (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
                  IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []>,
@@ -1722,6 +1776,8 @@ def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1,
 
 // Fixed-Point to FP:
 
+let isUnpredicable = 1 in {
+
 def VSHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 0,
                        (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
                  IIC_fpCVTIH, "vcvt", ".f16.s16\t$dst, $a, $fbits", []>,
@@ -1746,6 +1802,8 @@ def VULTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 1,
              Requires<[HasFullFP16]>,
              Sched<[WriteFPCVT]>;
 
+} // End of 'let isUnpredicable = 1 in'
+
 def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
                        (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
                  IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []>,
@@ -2030,6 +2088,9 @@ def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)),
 def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)),
           (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
       Requires<[HasVFP4]>;
+def : Pat<(f16 (fma HPR:$Sn, HPR:$Sm, HPR:$Sdin)),
+          (VFMAH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+      Requires<[HasFullFP16]>;
 
 def VFMSD : ADbI<0b11101, 0b10, 1, 0,
                  (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -2208,13 +2269,13 @@ def VMOVDcc  : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p),
                     IIC_fpUNA64,
                     [(set (f64 DPR:$Dd),
                           (ARMcmov DPR:$Dn, DPR:$Dm, cmovpred:$p))]>,
-               RegConstraint<"$Dn = $Dd">, Requires<[HasVFP2,HasDPVFP]>;
+               RegConstraint<"$Dn = $Dd">, Requires<[HasFPRegs64]>;
 
 def VMOVScc  : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p),
                     IIC_fpUNA32,
                     [(set (f32 SPR:$Sd),
                           (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>,
-               RegConstraint<"$Sn = $Sd">, Requires<[HasVFP2]>;
+               RegConstraint<"$Sn = $Sd">, Requires<[HasFPRegs]>;
 } // hasSideEffects
 
 //===----------------------------------------------------------------------===//
@@ -2238,15 +2299,16 @@ class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
   let Inst{3-0}   = 0b0000;
 }
 
-// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
-// to APSR.
-let Defs = [CPSR], Uses = [FPSCR_NZCV], Rt = 0b1111 /* apsr_nzcv */ in
-def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
-                        "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
-
 let DecoderMethod = "DecodeForVMRSandVMSR" in {
+ // APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
+ // to APSR.
+ let Defs = [CPSR], Uses = [FPSCR_NZCV], Predicates = [HasFPRegs],
+     Rt = 0b1111 /* apsr_nzcv */ in
+ def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
+                         "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
+
  // Application level FPSCR -> GPR
- let hasSideEffects = 1, Uses = [FPSCR] in
+ let hasSideEffects = 1, Uses = [FPSCR], Predicates = [HasFPRegs] in
  def VMRS :  MovFromVFP<0b0001 /* fpscr */, (outs GPRnopc:$Rt), (ins),
                         "vmrs", "\t$Rt, fpscr",
                         [(set GPRnopc:$Rt, (int_arm_get_fpscr))]>;
@@ -2269,6 +2331,33 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in {
                                 "vmrs", "\t$Rt, fpinst", []>;
    def VMRS_FPINST2 : MovFromVFP<0b1010 /* fpinst2 */, (outs GPRnopc:$Rt),
                                  (ins), "vmrs", "\t$Rt, fpinst2", []>;
+   let Predicates = [HasV8_1MMainline, HasFPRegs] in {
+     // System level FPSCR_NZCVQC -> GPR
+     def VMRS_FPSCR_NZCVQC
+       : MovFromVFP<0b0010 /* fpscr_nzcvqc */,
+                    (outs GPR:$Rt), (ins cl_FPSCR_NZCV:$fpscr_in),
+                    "vmrs", "\t$Rt, fpscr_nzcvqc", []>;
+   }
+ }
+ let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
+   // System level FPSCR -> GPR, with context saving for security extensions
+   def VMRS_FPCXTNS : MovFromVFP<0b1110 /* fpcxtns */, (outs GPR:$Rt), (ins),
+                                 "vmrs", "\t$Rt, fpcxtns", []>;
+ }
+ let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
+   // System level FPSCR -> GPR, with context saving for security extensions
+   def VMRS_FPCXTS : MovFromVFP<0b1111 /* fpcxts */, (outs GPR:$Rt), (ins),
+                                "vmrs", "\t$Rt, fpcxts", []>;
+ }
+
+ let Predicates = [HasV8_1MMainline, HasMVEInt] in {
+   // System level VPR/P0 -> GPR
+   let Uses = [VPR] in
+   def VMRS_VPR : MovFromVFP<0b1100 /* vpr */, (outs GPR:$Rt), (ins),
+                             "vmrs", "\t$Rt, vpr", []>;
+
+   def VMRS_P0  : MovFromVFP<0b1101 /* p0 */, (outs GPR:$Rt), (ins VCCR:$cond),
+                             "vmrs", "\t$Rt, p0", []>;
  }
 }
 
@@ -2291,10 +2380,12 @@ class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
   let Inst{11-8}  = 0b1010;
   let Inst{7}     = 0;
   let Inst{4}     = 1;
+  let Predicates = [HasVFP2];
 }
 
 let DecoderMethod = "DecodeForVMRSandVMSR" in {
  let Defs = [FPSCR] in {
+   let Predicates = [HasFPRegs] in
    // Application level GPR -> FPSCR
    def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPRnopc:$src),
                        "vmsr", "\tfpscr, $src",
@@ -2310,6 +2401,33 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in {
    def VMSR_FPINST2 : MovToVFP<0b1010 /* fpinst2 */, (outs), (ins GPRnopc:$src),
                                "vmsr", "\tfpinst2, $src", []>;
  }
+ let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
+   // System level GPR -> FPSCR with context saving for security extensions
+   def VMSR_FPCXTNS : MovToVFP<0b1110 /* fpcxtns */, (outs), (ins GPR:$src),
+                               "vmsr", "\tfpcxtns, $src", []>;
+ }
+ let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
+   // System level GPR -> FPSCR with context saving for security extensions
+   def VMSR_FPCXTS : MovToVFP<0b1111 /* fpcxts */, (outs), (ins GPR:$src),
+                              "vmsr", "\tfpcxts, $src", []>;
+ }
+ let Predicates = [HasV8_1MMainline, HasFPRegs] in {
+   // System level GPR -> FPSCR_NZCVQC
+   def VMSR_FPSCR_NZCVQC
+     : MovToVFP<0b0010 /* fpscr_nzcvqc */,
+                (outs cl_FPSCR_NZCV:$fpscr_out), (ins GPR:$src),
+                "vmsr", "\tfpscr_nzcvqc, $src", []>;
+ }
+
+ let Predicates = [HasV8_1MMainline, HasMVEInt] in {
+   // System level GPR -> VPR/P0
+   let Defs = [VPR] in
+   def VMSR_VPR : MovToVFP<0b1100 /* vpr */, (outs), (ins GPR:$src),
+                           "vmsr", "\tvpr, $src", []>;
+
+   def VMSR_P0  : MovToVFP<0b1101 /* p0 */, (outs VCCR:$cond), (ins GPR:$src),
+                           "vmsr", "\tp0, $src", []>;
+ }
 }
 
 //===----------------------------------------------------------------------===//
@@ -2371,6 +2489,8 @@ def FCONSTH : VFPAI<(outs HPR:$Sd), (ins vfp_f16imm:$imm),
   let Inst{11-8}  = 0b1001;     // Half precision
   let Inst{7-4}   = 0b0000;
   let Inst{3-0}   = imm{3-0};
+
+  let isUnpredicable = 1;
 }
 }
 
@@ -2426,7 +2546,7 @@ def : VFP2DPInstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
 def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>;
 
 
-def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
+def : InstAlias<"fmstat${p}", (FMSTAT pred:$p), 0>, Requires<[HasFPRegs]>;
 def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
                     (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
 def : VFP2DPInstAlias<"faddd${p} $Dd, $Dn, $Dm",
@@ -2484,3 +2604,126 @@ def : VFP3InstAlias<"fconstd${p} $Dd, $val",
                     (FCONSTD DPR:$Dd, vfp_f64imm:$val, pred:$p)>;
 def : VFP3InstAlias<"fconsts${p} $Sd, $val",
                     (FCONSTS SPR:$Sd, vfp_f32imm:$val, pred:$p)>;
+
+def VSCCLRMD : VFPXI<(outs), (ins pred:$p, fp_dreglist_with_vpr:$regs, variable_ops),
+                      AddrModeNone, 4, IndexModeNone, VFPMiscFrm, NoItinerary,
+                      "vscclrm{$p}\t$regs", "", []>, Sched<[]> {
+  bits<13> regs;
+  let Inst{31-23} = 0b111011001;
+  let Inst{22} = regs{12};
+  let Inst{21-16} = 0b011111;
+  let Inst{15-12} = regs{11-8};
+  let Inst{11-8} = 0b1011;
+  let Inst{7-0} = regs{7-0};
+
+  let DecoderMethod = "DecodeVSCCLRM";
+
+  list<Predicate> Predicates = [HasV8_1MMainline, Has8MSecExt];
+}
+
+def VSCCLRMS : VFPXI<(outs), (ins pred:$p, fp_sreglist_with_vpr:$regs, variable_ops),
+                      AddrModeNone, 4, IndexModeNone, VFPMiscFrm, NoItinerary,
+                      "vscclrm{$p}\t$regs", "", []>, Sched<[]> {
+  bits<13> regs;
+  let Inst{31-23} = 0b111011001;
+  let Inst{22} = regs{8};
+  let Inst{21-16} = 0b011111;
+  let Inst{15-12} = regs{12-9};
+  let Inst{11-8} = 0b1010;
+  let Inst{7-0} = regs{7-0};
+
+  let DecoderMethod = "DecodeVSCCLRM";
+
+  list<Predicate> Predicates = [HasV8_1MMainline, Has8MSecExt];
+}
+
+//===----------------------------------------------------------------------===//
+// Store VFP System Register to memory.
+//
+
+class vfp_vstrldr<bit opc, bit P, bit W, bits<4> SysReg, string sysreg,
+                  dag oops, dag iops, IndexMode im, string Dest, string cstr>
+    : VFPI<oops, iops, AddrModeT2_i7s4, 4, im, VFPLdStFrm, IIC_fpSTAT,
+           !if(opc,"vldr","vstr"), !strconcat("\t", sysreg, ", ", Dest), cstr, []>,
+      Sched<[]> {
+  bits<12> addr;
+  let Inst{27-25} = 0b110;
+  let Inst{24} = P;
+  let Inst{23} = addr{7};
+  let Inst{22} = SysReg{3};
+  let Inst{21} = W;
+  let Inst{20} = opc;
+  let Inst{19-16} = addr{11-8};
+  let Inst{15-13} = SysReg{2-0};
+  let Inst{12-7} = 0b011111;
+  let Inst{6-0} = addr{6-0};
+  list<Predicate> Predicates = [HasFPRegs, HasV8_1MMainline];
+  let mayLoad = opc;
+  let mayStore = !if(opc, 0b0, 0b1);
+  let hasSideEffects = 1;
+}
+
+multiclass vfp_vstrldr_sysreg<bit opc, bits<4> SysReg, string sysreg,
+                              dag oops=(outs), dag iops=(ins)> {
+  def _off :
+    vfp_vstrldr<opc, 1, 0, SysReg, sysreg,
+                oops, !con(iops, (ins t2addrmode_imm7s4:$addr)),
+                IndexModePost, "$addr", "" > {
+    let DecoderMethod = "DecodeVSTRVLDR_SYSREG<false>";
+  }
+
+  def _pre :
+    vfp_vstrldr<opc, 1, 1, SysReg, sysreg,
+                !con(oops, (outs GPRnopc:$wb)),
+                !con(iops, (ins t2addrmode_imm7s4_pre:$addr)),
+                IndexModePre, "$addr!", "$addr.base = $wb"> {
+    let DecoderMethod = "DecodeVSTRVLDR_SYSREG<true>";
+  }
+
+  def _post :
+    vfp_vstrldr<opc, 0, 1, SysReg, sysreg,
+                !con(oops, (outs GPRnopc:$wb)),
+                !con(iops, (ins t2_addr_offset_none:$Rn,
+                                t2am_imm7s4_offset:$addr)),
+                IndexModePost, "$Rn$addr", "$Rn.base = $wb"> {
+   bits<4> Rn;
+   let Inst{19-16} = Rn{3-0};
+   let DecoderMethod = "DecodeVSTRVLDR_SYSREG<true>";
+ }
+}
+
+let Defs = [FPSCR] in {
+  defm VSTR_FPSCR          : vfp_vstrldr_sysreg<0b0,0b0001, "fpscr">;
+  defm VSTR_FPSCR_NZCVQC   : vfp_vstrldr_sysreg<0b0,0b0010, "fpscr_nzcvqc">;
+
+  let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
+    defm VSTR_FPCXTNS      : vfp_vstrldr_sysreg<0b0,0b1110, "fpcxtns">;
+    defm VSTR_FPCXTS       : vfp_vstrldr_sysreg<0b0,0b1111, "fpcxts">;
+  }
+}
+
+let Predicates = [HasV8_1MMainline, HasMVEInt] in {
+  let Uses = [VPR] in {
+    defm VSTR_VPR          : vfp_vstrldr_sysreg<0b0,0b1100, "vpr">;
+  }
+  defm VSTR_P0             : vfp_vstrldr_sysreg<0b0,0b1101, "p0",
+                                                (outs), (ins VCCR:$P0)>;
+}
+
+let Uses = [FPSCR] in {
+  defm VLDR_FPSCR          : vfp_vstrldr_sysreg<0b1,0b0001, "fpscr">;
+  defm VLDR_FPSCR_NZCVQC   : vfp_vstrldr_sysreg<0b1,0b0010, "fpscr_nzcvqc">;
+
+  let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
+    defm VLDR_FPCXTNS      : vfp_vstrldr_sysreg<0b1,0b1110, "fpcxtns">;
+    defm VLDR_FPCXTS       : vfp_vstrldr_sysreg<0b1,0b1111, "fpcxts">;
+  }
+}
+
+let Predicates = [HasV8_1MMainline, HasMVEInt] in {
+  let Defs = [VPR] in {
+    defm VLDR_VPR          : vfp_vstrldr_sysreg<0b1,0b1100, "vpr">;
+  }
+  defm VLDR_P0             : vfp_vstrldr_sysreg<0b1,0b1101, "p0",
+                                                (outs VCCR:$P0), (ins)>;
+}
diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp
index 293e734c97cd..4485a474a6df 100644
--- a/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -1,9 +1,8 @@
 //===- ARMInstructionSelector.cpp ----------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -76,6 +75,11 @@ private:
   const ARMRegisterBankInfo &RBI;
   const ARMSubtarget &STI;
 
+  // FIXME: This is necessary because DAGISel uses "Subtarget->" and GlobalISel
+  // uses "STI." in the code generated by TableGen. If we want to reuse some of
+  // the custom C++ predicates written for DAGISel, we need to have both around.
+  const ARMSubtarget *Subtarget = &STI;
+
   // Store the opcodes that we might need, so we don't have to check what kind
   // of subtarget (ARM vs Thumb) we have all the time.
   struct OpcodeCache {
@@ -98,6 +102,27 @@ private:
     unsigned STORE8;
     unsigned LOAD8;
 
+    unsigned ADDrr;
+    unsigned ADDri;
+
+    // Used for G_ICMP
+    unsigned CMPrr;
+    unsigned MOVi;
+    unsigned MOVCCi;
+
+    // Used for G_SELECT
+    unsigned MOVCCr;
+
+    unsigned TSTri;
+    unsigned Bcc;
+
+    // Used for G_GLOBAL_VALUE
+    unsigned MOVi32imm;
+    unsigned ConstPoolLoad;
+    unsigned MOV_ga_pcrel;
+    unsigned LDRLIT_ga_pcrel;
+    unsigned LDRLIT_ga_abs;
+
     OpcodeCache(const ARMSubtarget &STI);
   } const Opcodes;
 
@@ -112,6 +137,9 @@ private:
   unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank,
                                  unsigned Size) const;
 
+  void renderVFPF32Imm(MachineInstrBuilder &New, const MachineInstr &Old) const;
+  void renderVFPF64Imm(MachineInstrBuilder &New, const MachineInstr &Old) const;
+
 #define GET_GLOBALISEL_PREDICATES_DECL
 #include "ARMGenGlobalISel.inc"
 #undef GET_GLOBALISEL_PREDICATES_DECL
@@ -204,7 +232,7 @@ static bool selectMergeValues(MachineInstrBuilder &MIB,
                               MachineRegisterInfo &MRI,
                               const TargetRegisterInfo &TRI,
                               const RegisterBankInfo &RBI) {
-  assert(TII.getSubtarget().hasVFP2() && "Can't select merge without VFP");
+  assert(TII.getSubtarget().hasVFP2Base() && "Can't select merge without VFP");
 
   // We only support G_MERGE_VALUES as a way to stick together two scalar GPRs
   // into one DPR.
@@ -235,7 +263,8 @@ static bool selectUnmergeValues(MachineInstrBuilder &MIB,
                                 MachineRegisterInfo &MRI,
                                 const TargetRegisterInfo &TRI,
                                 const RegisterBankInfo &RBI) {
-  assert(TII.getSubtarget().hasVFP2() && "Can't select unmerge without VFP");
+  assert(TII.getSubtarget().hasVFP2Base() &&
+         "Can't select unmerge without VFP");
 
   // We only support G_UNMERGE_VALUES as a way to break up one DPR into two
   // GPRs.
@@ -285,6 +314,24 @@ ARMInstructionSelector::OpcodeCache::OpcodeCache(const ARMSubtarget &STI) {
 
   STORE_OPCODE(STORE8, STRBi12);
   STORE_OPCODE(LOAD8, LDRBi12);
+
+  STORE_OPCODE(ADDrr, ADDrr);
+  STORE_OPCODE(ADDri, ADDri);
+
+  STORE_OPCODE(CMPrr, CMPrr);
+  STORE_OPCODE(MOVi, MOVi);
+  STORE_OPCODE(MOVCCi, MOVCCi);
+
+  STORE_OPCODE(MOVCCr, MOVCCr);
+
+  STORE_OPCODE(TSTri, TSTri);
+  STORE_OPCODE(Bcc, Bcc);
+
+  STORE_OPCODE(MOVi32imm, MOVi32imm);
+  ConstPoolLoad = isThumb ? ARM::t2LDRpci : ARM::LDRi12;
+  STORE_OPCODE(MOV_ga_pcrel, MOV_ga_pcrel);
+  LDRLIT_ga_pcrel = isThumb ? ARM::tLDRLIT_ga_pcrel : ARM::LDRLIT_ga_pcrel;
+  LDRLIT_ga_abs = isThumb ? ARM::tLDRLIT_ga_abs : ARM::LDRLIT_ga_abs;
 #undef MAP_OPCODE
 }
 
@@ -408,10 +455,11 @@ getComparePreds(CmpInst::Predicate Pred) {
 }
 
 struct ARMInstructionSelector::CmpConstants {
-  CmpConstants(unsigned CmpOpcode, unsigned FlagsOpcode, unsigned OpRegBank,
-               unsigned OpSize)
+  CmpConstants(unsigned CmpOpcode, unsigned FlagsOpcode, unsigned SelectOpcode,
+               unsigned OpRegBank, unsigned OpSize)
       : ComparisonOpcode(CmpOpcode), ReadFlagsOpcode(FlagsOpcode),
-        OperandRegBankID(OpRegBank), OperandSize(OpSize) {}
+        SelectResultOpcode(SelectOpcode), OperandRegBankID(OpRegBank),
+        OperandSize(OpSize) {}
 
   // The opcode used for performing the comparison.
   const unsigned ComparisonOpcode;
@@ -420,6 +468,9 @@ struct ARMInstructionSelector::CmpConstants {
   // ARM::INSTRUCTION_LIST_END if we don't need to read the flags.
   const unsigned ReadFlagsOpcode;
 
+  // The opcode used for materializing the result of the comparison.
+  const unsigned SelectResultOpcode;
+
   // The assumed register bank ID for the operands.
   const unsigned OperandRegBankID;
 
@@ -439,7 +490,7 @@ struct ARMInstructionSelector::InsertInfo {
 
 void ARMInstructionSelector::putConstant(InsertInfo I, unsigned DestReg,
                                          unsigned Constant) const {
-  (void)BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(ARM::MOVi))
+  (void)BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(Opcodes.MOVi))
       .addDef(DestReg)
       .addImm(Constant)
       .add(predOps(ARMCC::AL))
@@ -542,7 +593,8 @@ bool ARMInstructionSelector::insertComparison(CmpConstants Helper, InsertInfo I,
   }
 
   // Select either 1 or the previous result based on the value of the flags.
-  auto Mov1I = BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(ARM::MOVCCi))
+  auto Mov1I = BuildMI(I.MBB, I.InsertBefore, I.DbgLoc,
+                       TII.get(Helper.SelectResultOpcode))
                    .addDef(ResReg)
                    .addUse(PrevRes)
                    .addImm(1)
@@ -569,7 +621,7 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
   auto &MBB = *MIB->getParent();
   auto &MF = *MBB.getParent();
 
-  bool UseMovt = STI.useMovt(MF);
+  bool UseMovt = STI.useMovt();
 
   unsigned Size = TM.getPointerSize(0);
   unsigned Alignment = 4;
@@ -577,7 +629,9 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
   auto addOpsForConstantPoolLoad = [&MF, Alignment,
                                     Size](MachineInstrBuilder &MIB,
                                           const GlobalValue *GV, bool IsSBREL) {
-    assert(MIB->getOpcode() == ARM::LDRi12 && "Unsupported instruction");
+    assert((MIB->getOpcode() == ARM::LDRi12 ||
+            MIB->getOpcode() == ARM::t2LDRpci) &&
+           "Unsupported instruction");
     auto ConstPool = MF.getConstantPool();
     auto CPIndex =
         // For SB relative entries we need a target-specific constant pool.
@@ -587,21 +641,38 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
                   ARMConstantPoolConstant::Create(GV, ARMCP::SBREL), Alignment)
             : ConstPool->getConstantPoolIndex(GV, Alignment);
     MIB.addConstantPoolIndex(CPIndex, /*Offset*/ 0, /*TargetFlags*/ 0)
-        .addMemOperand(
-            MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
-                                    MachineMemOperand::MOLoad, Size, Alignment))
-        .addImm(0)
-        .add(predOps(ARMCC::AL));
+        .addMemOperand(MF.getMachineMemOperand(
+            MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
+            Size, Alignment));
+    if (MIB->getOpcode() == ARM::LDRi12)
+      MIB.addImm(0);
+    MIB.add(predOps(ARMCC::AL));
+  };
+
+  auto addGOTMemOperand = [this, &MF, Alignment](MachineInstrBuilder &MIB) {
+    MIB.addMemOperand(MF.getMachineMemOperand(
+        MachinePointerInfo::getGOT(MF), MachineMemOperand::MOLoad,
+        TM.getProgramPointerSize(), Alignment));
   };
 
   if (TM.isPositionIndependent()) {
     bool Indirect = STI.isGVIndirectSymbol(GV);
+
+    // For ARM mode, we have different pseudoinstructions for direct accesses
+    // and indirect accesses, and the ones for indirect accesses include the
+    // load from GOT. For Thumb mode, we use the same pseudoinstruction for both
+    // direct and indirect accesses, and we need to manually generate the load
+    // from GOT.
+    bool UseOpcodeThatLoads = Indirect && !STI.isThumb();
+
     // FIXME: Taking advantage of MOVT for ELF is pretty involved, so we don't
     // support it yet. See PR28229.
     unsigned Opc =
         UseMovt && !STI.isTargetELF()
-            ? (Indirect ? ARM::MOV_ga_pcrel_ldr : ARM::MOV_ga_pcrel)
-            : (Indirect ? ARM::LDRLIT_ga_pcrel_ldr : ARM::LDRLIT_ga_pcrel);
+            ? (UseOpcodeThatLoads ? (unsigned)ARM::MOV_ga_pcrel_ldr
+                                  : Opcodes.MOV_ga_pcrel)
+            : (UseOpcodeThatLoads ? (unsigned)ARM::LDRLIT_ga_pcrel_ldr
+                                  : Opcodes.LDRLIT_ga_pcrel);
     MIB->setDesc(TII.get(Opc));
 
     int TargetFlags = ARMII::MO_NO_FLAG;
@@ -611,17 +682,35 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
       TargetFlags |= ARMII::MO_GOT;
     MIB->getOperand(1).setTargetFlags(TargetFlags);
 
-    if (Indirect)
-      MIB.addMemOperand(MF.getMachineMemOperand(
-          MachinePointerInfo::getGOT(MF), MachineMemOperand::MOLoad,
-          TM.getProgramPointerSize(), Alignment));
+    if (Indirect) {
+      if (!UseOpcodeThatLoads) {
+        auto ResultReg = MIB->getOperand(0).getReg();
+        auto AddressReg = MRI.createVirtualRegister(&ARM::GPRRegClass);
+
+        MIB->getOperand(0).setReg(AddressReg);
+
+        auto InsertBefore = std::next(MIB->getIterator());
+        auto MIBLoad = BuildMI(MBB, InsertBefore, MIB->getDebugLoc(),
+                               TII.get(Opcodes.LOAD32))
+                           .addDef(ResultReg)
+                           .addReg(AddressReg)
+                           .addImm(0)
+                           .add(predOps(ARMCC::AL));
+        addGOTMemOperand(MIBLoad);
+
+        if (!constrainSelectedInstRegOperands(*MIBLoad, TII, TRI, RBI))
+          return false;
+      } else {
+        addGOTMemOperand(MIB);
+      }
+    }
 
     return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
   }
 
   bool isReadOnly = STI.getTargetLowering()->isReadOnly(GV);
   if (STI.isROPI() && isReadOnly) {
-    unsigned Opc = UseMovt ? ARM::MOV_ga_pcrel : ARM::LDRLIT_ga_pcrel;
+    unsigned Opc = UseMovt ? Opcodes.MOV_ga_pcrel : Opcodes.LDRLIT_ga_pcrel;
     MIB->setDesc(TII.get(Opc));
     return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
   }
@@ -630,19 +719,19 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
     MachineInstrBuilder OffsetMIB;
     if (UseMovt) {
       OffsetMIB = BuildMI(MBB, *MIB, MIB->getDebugLoc(),
-                          TII.get(ARM::MOVi32imm), Offset);
+                          TII.get(Opcodes.MOVi32imm), Offset);
       OffsetMIB.addGlobalAddress(GV, /*Offset*/ 0, ARMII::MO_SBREL);
     } else {
       // Load the offset from the constant pool.
-      OffsetMIB =
-          BuildMI(MBB, *MIB, MIB->getDebugLoc(), TII.get(ARM::LDRi12), Offset);
+      OffsetMIB = BuildMI(MBB, *MIB, MIB->getDebugLoc(),
+                          TII.get(Opcodes.ConstPoolLoad), Offset);
       addOpsForConstantPoolLoad(OffsetMIB, GV, /*IsSBREL*/ true);
     }
     if (!constrainSelectedInstRegOperands(*OffsetMIB, TII, TRI, RBI))
       return false;
 
     // Add the offset to the SB register.
-    MIB->setDesc(TII.get(ARM::ADDrr));
+    MIB->setDesc(TII.get(Opcodes.ADDrr));
     MIB->RemoveOperand(1);
     MIB.addReg(ARM::R9) // FIXME: don't hardcode R9
         .addReg(Offset)
@@ -654,18 +743,18 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
 
   if (STI.isTargetELF()) {
     if (UseMovt) {
-      MIB->setDesc(TII.get(ARM::MOVi32imm));
+      MIB->setDesc(TII.get(Opcodes.MOVi32imm));
     } else {
       // Load the global's address from the constant pool.
-      MIB->setDesc(TII.get(ARM::LDRi12));
+      MIB->setDesc(TII.get(Opcodes.ConstPoolLoad));
       MIB->RemoveOperand(1);
       addOpsForConstantPoolLoad(MIB, GV, /*IsSBREL*/ false);
     }
   } else if (STI.isTargetMachO()) {
     if (UseMovt)
-      MIB->setDesc(TII.get(ARM::MOVi32imm));
+      MIB->setDesc(TII.get(Opcodes.MOVi32imm));
     else
-      MIB->setDesc(TII.get(ARM::LDRLIT_ga_abs));
+      MIB->setDesc(TII.get(Opcodes.LDRLIT_ga_abs));
   } else {
     LLVM_DEBUG(dbgs() << "Object format not supported yet\n");
     return false;
@@ -680,13 +769,13 @@ bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB,
   auto InsertBefore = std::next(MIB->getIterator());
   auto &DbgLoc = MIB->getDebugLoc();
 
-  // Compare the condition to 0.
+  // Compare the condition to 1.
   auto CondReg = MIB->getOperand(1).getReg();
   assert(validReg(MRI, CondReg, 1, ARM::GPRRegBankID) &&
          "Unsupported types for select operation");
-  auto CmpI = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(ARM::CMPri))
+  auto CmpI = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(Opcodes.TSTri))
                   .addUse(CondReg)
-                  .addImm(0)
+                  .addImm(1)
                   .add(predOps(ARMCC::AL));
   if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI))
     return false;
@@ -699,7 +788,7 @@ bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB,
   assert(validOpRegPair(MRI, ResReg, TrueReg, 32, ARM::GPRRegBankID) &&
          validOpRegPair(MRI, TrueReg, FalseReg, 32, ARM::GPRRegBankID) &&
          "Unsupported types for select operation");
-  auto Mov1I = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(ARM::MOVCCr))
+  auto Mov1I = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(Opcodes.MOVCCr))
                    .addDef(ResReg)
                    .addUse(TrueReg)
                    .addUse(FalseReg)
@@ -713,12 +802,37 @@ bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB,
 
 bool ARMInstructionSelector::selectShift(unsigned ShiftOpc,
                                          MachineInstrBuilder &MIB) const {
+  assert(!STI.isThumb() && "Unsupported subtarget");
   MIB->setDesc(TII.get(ARM::MOVsr));
   MIB.addImm(ShiftOpc);
   MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
   return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
 }
 
+void ARMInstructionSelector::renderVFPF32Imm(
+    MachineInstrBuilder &NewInstBuilder, const MachineInstr &OldInst) const {
+  assert(OldInst.getOpcode() == TargetOpcode::G_FCONSTANT &&
+         "Expected G_FCONSTANT");
+
+  APFloat FPImmValue = OldInst.getOperand(1).getFPImm()->getValueAPF();
+  int FPImmEncoding = ARM_AM::getFP32Imm(FPImmValue);
+  assert(FPImmEncoding != -1 && "Invalid immediate value");
+
+  NewInstBuilder.addImm(FPImmEncoding);
+}
+
+void ARMInstructionSelector::renderVFPF64Imm(
+    MachineInstrBuilder &NewInstBuilder, const MachineInstr &OldInst) const {
+  assert(OldInst.getOpcode() == TargetOpcode::G_FCONSTANT &&
+         "Expected G_FCONSTANT");
+
+  APFloat FPImmValue = OldInst.getOperand(1).getFPImm()->getValueAPF();
+  int FPImmEncoding = ARM_AM::getFP64Imm(FPImmValue);
+  assert(FPImmEncoding != -1 && "Invalid immediate value");
+
+  NewInstBuilder.addImm(FPImmEncoding);
+}
+
 bool ARMInstructionSelector::select(MachineInstr &I,
                                     CodeGenCoverage &CoverageInfo) const {
   assert(I.getParent() && "Instruction should be in a basic block!");
@@ -748,12 +862,8 @@ bool ARMInstructionSelector::select(MachineInstr &I,
     isSExt = true;
     LLVM_FALLTHROUGH;
   case G_ZEXT: {
-    LLT DstTy = MRI.getType(I.getOperand(0).getReg());
-    // FIXME: Smaller destination sizes coming soon!
-    if (DstTy.getSizeInBits() != 32) {
-      LLVM_DEBUG(dbgs() << "Unsupported destination size for extension");
-      return false;
-    }
+    assert(MRI.getType(I.getOperand(0).getReg()).getSizeInBits() <= 32 &&
+           "Unsupported destination size for extension");
 
     LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
     unsigned SrcSize = SrcTy.getSizeInBits();
@@ -869,10 +979,32 @@ bool ARMInstructionSelector::select(MachineInstr &I,
       }
     }
 
+    assert(!STI.isThumb() && "Unsupported subtarget");
     I.setDesc(TII.get(ARM::MOVi));
     MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
     break;
   }
+  case G_FCONSTANT: {
+    // Load from constant pool
+    unsigned Size = MRI.getType(I.getOperand(0).getReg()).getSizeInBits() / 8;
+    unsigned Alignment = Size;
+
+    assert((Size == 4 || Size == 8) && "Unsupported FP constant type");
+    auto LoadOpcode = Size == 4 ? ARM::VLDRS : ARM::VLDRD;
+
+    auto ConstPool = MF.getConstantPool();
+    auto CPIndex =
+        ConstPool->getConstantPoolIndex(I.getOperand(1).getFPImm(), Alignment);
+    MIB->setDesc(TII.get(LoadOpcode));
+    MIB->RemoveOperand(1);
+    MIB.addConstantPoolIndex(CPIndex, /*Offset*/ 0, /*TargetFlags*/ 0)
+        .addMemOperand(
+            MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
+                                    MachineMemOperand::MOLoad, Size, Alignment))
+        .addImm(0)
+        .add(predOps(ARMCC::AL));
+    break;
+  }
   case G_INTTOPTR:
   case G_PTRTOINT: {
     auto SrcReg = I.getOperand(1).getReg();
@@ -900,17 +1032,17 @@ bool ARMInstructionSelector::select(MachineInstr &I,
   case G_SELECT:
     return selectSelect(MIB, MRI);
   case G_ICMP: {
-    CmpConstants Helper(ARM::CMPrr, ARM::INSTRUCTION_LIST_END,
-                        ARM::GPRRegBankID, 32);
+    CmpConstants Helper(Opcodes.CMPrr, ARM::INSTRUCTION_LIST_END,
+                        Opcodes.MOVCCi, ARM::GPRRegBankID, 32);
     return selectCmp(Helper, MIB, MRI);
   }
   case G_FCMP: {
-    assert(STI.hasVFP2() && "Can't select fcmp without VFP");
+    assert(STI.hasVFP2Base() && "Can't select fcmp without VFP");
 
     unsigned OpReg = I.getOperand(2).getReg();
     unsigned Size = MRI.getType(OpReg).getSizeInBits();
 
-    if (Size == 64 && STI.isFPOnlySP()) {
+    if (Size == 64 && !STI.hasFP64()) {
       LLVM_DEBUG(dbgs() << "Subtarget only supports single precision");
       return false;
     }
@@ -920,7 +1052,7 @@ bool ARMInstructionSelector::select(MachineInstr &I,
     }
 
     CmpConstants Helper(Size == 32 ? ARM::VCMPS : ARM::VCMPD, ARM::FMSTAT,
-                        ARM::FPRRegBankID, Size);
+                        Opcodes.MOVCCi, ARM::FPRRegBankID, Size);
     return selectCmp(Helper, MIB, MRI);
   }
   case G_LSHR:
@@ -931,13 +1063,13 @@ bool ARMInstructionSelector::select(MachineInstr &I,
     return selectShift(ARM_AM::ShiftOpc::lsl, MIB);
   }
   case G_GEP:
-    I.setDesc(TII.get(ARM::ADDrr));
+    I.setDesc(TII.get(Opcodes.ADDrr));
     MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
     break;
   case G_FRAME_INDEX:
     // Add 0 to the given frame index and hope it will eventually be folded into
     // the user(s).
-    I.setDesc(TII.get(ARM::ADDri));
+    I.setDesc(TII.get(Opcodes.ADDri));
     MIB.addImm(0).add(predOps(ARMCC::AL)).add(condCodeOp());
     break;
   case G_GLOBAL_VALUE:
@@ -956,13 +1088,31 @@ bool ARMInstructionSelector::select(MachineInstr &I,
     LLT ValTy = MRI.getType(Reg);
     const auto ValSize = ValTy.getSizeInBits();
 
-    assert((ValSize != 64 || STI.hasVFP2()) &&
+    assert((ValSize != 64 || STI.hasVFP2Base()) &&
            "Don't know how to load/store 64-bit value without VFP");
 
     const auto NewOpc = selectLoadStoreOpCode(I.getOpcode(), RegBank, ValSize);
     if (NewOpc == G_LOAD || NewOpc == G_STORE)
       return false;
 
+    if (ValSize == 1 && NewOpc == Opcodes.STORE8) {
+      // Before storing a 1-bit value, make sure to clear out any unneeded bits.
+      unsigned OriginalValue = I.getOperand(0).getReg();
+
+      unsigned ValueToStore = MRI.createVirtualRegister(&ARM::GPRRegClass);
+      I.getOperand(0).setReg(ValueToStore);
+
+      auto InsertBefore = I.getIterator();
+      auto AndI = BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(Opcodes.AND))
+        .addDef(ValueToStore)
+        .addUse(OriginalValue)
+        .addImm(1)
+        .add(predOps(ARMCC::AL))
+        .add(condCodeOp());
+      if (!constrainSelectedInstRegOperands(*AndI, TII, TRI, RBI))
+        return false;
+    }
+
     I.setDesc(TII.get(NewOpc));
 
     if (NewOpc == ARM::LDRH || NewOpc == ARM::STRH)
@@ -988,17 +1138,19 @@ bool ARMInstructionSelector::select(MachineInstr &I,
     }
 
     // Set the flags.
-    auto Test = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::TSTri))
-                    .addReg(I.getOperand(0).getReg())
-                    .addImm(1)
-                    .add(predOps(ARMCC::AL));
+    auto Test =
+        BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcodes.TSTri))
+            .addReg(I.getOperand(0).getReg())
+            .addImm(1)
+            .add(predOps(ARMCC::AL));
     if (!constrainSelectedInstRegOperands(*Test, TII, TRI, RBI))
       return false;
 
     // Branch conditionally.
-    auto Branch = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::Bcc))
-                      .add(I.getOperand(1))
-                      .add(predOps(ARMCC::NE, ARM::CPSR));
+    auto Branch =
+        BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcodes.Bcc))
+            .add(I.getOperand(1))
+            .add(predOps(ARMCC::NE, ARM::CPSR));
     if (!constrainSelectedInstRegOperands(*Branch, TII, TRI, RBI))
       return false;
     I.eraseFromParent();
diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp
index 4a0c24d58474..73a57b297ad6 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -1,9 +1,8 @@
 //===- ARMLegalizerInfo.cpp --------------------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -83,41 +82,29 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
   }
 
   getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
-      .legalForCartesianProduct({s32}, {s1, s8, s16});
+      .legalForCartesianProduct({s8, s16, s32}, {s1, s8, s16});
 
-  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
+  getActionDefinitionsBuilder({G_MUL, G_AND, G_OR, G_XOR})
       .legalFor({s32})
       .minScalar(0, s32);
 
-  getActionDefinitionsBuilder(G_INTTOPTR).legalFor({{p0, s32}});
-  getActionDefinitionsBuilder(G_PTRTOINT).legalFor({{s32, p0}});
-
-  getActionDefinitionsBuilder(G_CONSTANT)
-      .legalFor({s32, p0})
-      .clampScalar(0, s32, s32);
-
-  // We're keeping these builders around because we'll want to add support for
-  // floating point to them.
-  auto &LoadStoreBuilder =
-      getActionDefinitionsBuilder({G_LOAD, G_STORE})
-          .legalForTypesWithMemSize({
-              {s1, p0, 8},
-              {s8, p0, 8},
-              {s16, p0, 16},
-              {s32, p0, 32},
-              {p0, p0, 32}});
-
-  if (ST.isThumb()) {
-    // FIXME: merge with the code for non-Thumb.
-    computeTables();
-    verify(*ST.getInstrInfo());
-    return;
-  }
+  if (ST.hasNEON())
+    getActionDefinitionsBuilder({G_ADD, G_SUB})
+        .legalFor({s32, s64})
+        .minScalar(0, s32);
+  else
+    getActionDefinitionsBuilder({G_ADD, G_SUB})
+        .legalFor({s32})
+        .minScalar(0, s32);
 
-  getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
-  getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
+  getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL})
+    .legalFor({{s32, s32}})
+    .minScalar(0, s32)
+    .clampScalar(1, s32, s32);
 
-  if (ST.hasDivideInARMMode())
+  bool HasHWDivide = (!ST.isThumb() && ST.hasDivideInARMMode()) ||
+                     (ST.isThumb() && ST.hasDivideInThumbMode());
+  if (HasHWDivide)
     getActionDefinitionsBuilder({G_SDIV, G_UDIV})
         .legalFor({s32})
         .clampScalar(0, s32, s32);
@@ -128,7 +115,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
 
   for (unsigned Op : {G_SREM, G_UREM}) {
     setLegalizeScalarToDifferentSizeStrategy(Op, 0, widen_8_16);
-    if (ST.hasDivideInARMMode())
+    if (HasHWDivide)
       setAction({Op, s32}, Lower);
     else if (AEABI(ST))
       setAction({Op, s32}, Custom);
@@ -136,46 +123,57 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
       setAction({Op, s32}, Libcall);
   }
 
-  getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL}).legalFor({s32});
-
-  if (ST.hasV5TOps()) {
-    getActionDefinitionsBuilder(G_CTLZ)
-        .legalFor({s32})
-        .clampScalar(0, s32, s32);
-    getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
-        .lowerFor({s32})
-        .clampScalar(0, s32, s32);
-  } else {
-    getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
-        .libcallFor({s32})
-        .clampScalar(0, s32, s32);
-    getActionDefinitionsBuilder(G_CTLZ)
-        .lowerFor({s32})
-        .clampScalar(0, s32, s32);
-  }
-
-  getActionDefinitionsBuilder(G_GEP).legalFor({{p0, s32}});
-
-  getActionDefinitionsBuilder(G_SELECT).legalForCartesianProduct({s32, p0},
-                                                                 {s1});
+  getActionDefinitionsBuilder(G_INTTOPTR)
+      .legalFor({{p0, s32}})
+      .minScalar(1, s32);
+  getActionDefinitionsBuilder(G_PTRTOINT)
+      .legalFor({{s32, p0}})
+      .minScalar(0, s32);
 
-  getActionDefinitionsBuilder(G_BRCOND).legalFor({s1});
+  getActionDefinitionsBuilder(G_CONSTANT)
+      .legalFor({s32, p0})
+      .clampScalar(0, s32, s32);
 
   getActionDefinitionsBuilder(G_ICMP)
       .legalForCartesianProduct({s1}, {s32, p0})
       .minScalar(1, s32);
 
+  getActionDefinitionsBuilder(G_SELECT)
+      .legalForCartesianProduct({s32, p0}, {s1})
+      .minScalar(0, s32);
+
   // We're keeping these builders around because we'll want to add support for
   // floating point to them.
+  auto &LoadStoreBuilder = getActionDefinitionsBuilder({G_LOAD, G_STORE})
+                               .legalForTypesWithMemDesc({{s1, p0, 8, 8},
+                                                          {s8, p0, 8, 8},
+                                                          {s16, p0, 16, 8},
+                                                          {s32, p0, 32, 8},
+                                                          {p0, p0, 32, 8}})
+                               .unsupportedIfMemSizeNotPow2();
+
+  getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
+  getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
+
   auto &PhiBuilder =
-      getActionDefinitionsBuilder(G_PHI).legalFor({s32, p0}).minScalar(0, s32);
+      getActionDefinitionsBuilder(G_PHI)
+          .legalFor({s32, p0})
+          .minScalar(0, s32);
+
+  getActionDefinitionsBuilder(G_GEP)
+      .legalFor({{p0, s32}})
+      .minScalar(1, s32);
 
-  if (!ST.useSoftFloat() && ST.hasVFP2()) {
+  getActionDefinitionsBuilder(G_BRCOND).legalFor({s1});
+
+  if (!ST.useSoftFloat() && ST.hasVFP2Base()) {
     getActionDefinitionsBuilder(
         {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FCONSTANT, G_FNEG})
         .legalFor({s32, s64});
 
-    LoadStoreBuilder.legalFor({{s64, p0}});
+    LoadStoreBuilder
+        .legalForTypesWithMemDesc({{s64, p0, 64, 32}})
+        .maxScalar(0, s32);
     PhiBuilder.legalFor({s64});
 
     getActionDefinitionsBuilder(G_FCMP).legalForCartesianProduct({s1},
@@ -219,13 +217,33 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
         .libcallForCartesianProduct({s32, s64}, {s32});
   }
 
-  if (!ST.useSoftFloat() && ST.hasVFP4())
+  if (!ST.useSoftFloat() && ST.hasVFP4Base())
     getActionDefinitionsBuilder(G_FMA).legalFor({s32, s64});
   else
     getActionDefinitionsBuilder(G_FMA).libcallFor({s32, s64});
 
   getActionDefinitionsBuilder({G_FREM, G_FPOW}).libcallFor({s32, s64});
 
+  if (ST.hasV5TOps()) {
+    getActionDefinitionsBuilder(G_CTLZ)
+        .legalFor({s32, s32})
+        .clampScalar(1, s32, s32)
+        .clampScalar(0, s32, s32);
+    getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
+        .lowerFor({s32, s32})
+        .clampScalar(1, s32, s32)
+        .clampScalar(0, s32, s32);
+  } else {
+    getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
+        .libcallFor({s32, s32})
+        .clampScalar(1, s32, s32)
+        .clampScalar(0, s32, s32);
+    getActionDefinitionsBuilder(G_CTLZ)
+        .lowerFor({s32, s32})
+        .clampScalar(1, s32, s32)
+        .clampScalar(0, s32, s32);
+  }
+
   computeTables();
   verify(*ST.getInstrInfo());
 }
@@ -351,7 +369,7 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
     return false;
   case G_SREM:
   case G_UREM: {
-    unsigned OriginalResult = MI.getOperand(0).getReg();
+    Register OriginalResult = MI.getOperand(0).getReg();
     auto Size = MRI.getType(OriginalResult).getSizeInBits();
     if (Size != 32)
       return false;
@@ -360,24 +378,17 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
         MI.getOpcode() == G_SREM ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;
 
     // Our divmod libcalls return a struct containing the quotient and the
-    // remainder. We need to create a virtual register for it.
+    // remainder. Create a new, unused register for the quotient and use the
+    // destination of the original instruction for the remainder.
     Type *ArgTy = Type::getInt32Ty(Ctx);
     StructType *RetTy = StructType::get(Ctx, {ArgTy, ArgTy}, /* Packed */ true);
-    auto RetVal = MRI.createGenericVirtualRegister(
-        getLLTForType(*RetTy, MIRBuilder.getMF().getDataLayout()));
-
-    auto Status = createLibcall(MIRBuilder, Libcall, {RetVal, RetTy},
+    Register RetRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
+                          OriginalResult};
+    auto Status = createLibcall(MIRBuilder, Libcall, {RetRegs, RetTy},
                                 {{MI.getOperand(1).getReg(), ArgTy},
                                  {MI.getOperand(2).getReg(), ArgTy}});
     if (Status != LegalizerHelper::Legalized)
       return false;
-
-    // The remainder is the second result of divmod. Split the return value into
-    // a new, unused register for the quotient and the destination of the
-    // original instruction for the remainder.
-    MIRBuilder.buildUnmerge(
-        {MRI.createGenericVirtualRegister(LLT::scalar(32)), OriginalResult},
-        RetVal);
     break;
   }
   case G_FCMP: {
@@ -405,7 +416,7 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
     auto *ArgTy = OpSize == 32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx);
     auto *RetTy = Type::getInt32Ty(Ctx);
 
-    SmallVector<unsigned, 2> Results;
+    SmallVector<Register, 2> Results;
     for (auto Libcall : Libcalls) {
       auto LibcallResult = MRI.createGenericVirtualRegister(LLT::scalar(32));
       auto Status =
diff --git a/lib/Target/ARM/ARMLegalizerInfo.h b/lib/Target/ARM/ARMLegalizerInfo.h
index 527bf87f1093..e95f8cf76103 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.h
+++ b/lib/Target/ARM/ARMLegalizerInfo.h
@@ -1,9 +1,8 @@
 //===- ARMLegalizerInfo ------------------------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 6da7430a8e51..90a1ce238c3f 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1,9 +1,8 @@
 //===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -174,12 +173,14 @@ namespace {
         MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
         int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
         ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
-        ArrayRef<std::pair<unsigned, bool>> Regs);
+        ArrayRef<std::pair<unsigned, bool>> Regs,
+        ArrayRef<MachineInstr*> Instrs);
     MachineInstr *CreateLoadStoreDouble(
         MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
         int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
         ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
-        ArrayRef<std::pair<unsigned, bool>> Regs) const;
+        ArrayRef<std::pair<unsigned, bool>> Regs,
+        ArrayRef<MachineInstr*> Instrs) const;
     void FormCandidates(const MemOpQueue &MemOps);
     MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
     bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
@@ -623,7 +624,8 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
     int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
     ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
-    ArrayRef<std::pair<unsigned, bool>> Regs) {
+    ArrayRef<std::pair<unsigned, bool>> Regs,
+    ArrayRef<MachineInstr*> Instrs) {
   unsigned NumRegs = Regs.size();
   assert(NumRegs > 1);
 
@@ -815,6 +817,8 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
   for (const std::pair<unsigned, bool> &R : Regs)
     MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
 
+  MIB.cloneMergedMemRefs(Instrs);
+
   return MIB.getInstr();
 }
 
@@ -822,7 +826,8 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
     int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
     ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
-    ArrayRef<std::pair<unsigned, bool>> Regs) const {
+    ArrayRef<std::pair<unsigned, bool>> Regs,
+    ArrayRef<MachineInstr*> Instrs) const {
   bool IsLoad = isi32Load(Opcode);
   assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
   unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
@@ -838,6 +843,7 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
        .addReg(Regs[1].first, getKillRegState(Regs[1].second));
   }
   MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+  MIB.cloneMergedMemRefs(Instrs);
   return MIB.getInstr();
 }
 
@@ -895,10 +901,11 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
   MachineInstr *Merged = nullptr;
   if (Cand.CanMergeToLSDouble)
     Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
-                                   Opcode, Pred, PredReg, DL, Regs);
+                                   Opcode, Pred, PredReg, DL, Regs,
+                                   Cand.Instrs);
   if (!Merged && Cand.CanMergeToLSMulti)
     Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
-                                  Opcode, Pred, PredReg, DL, Regs);
+                                  Opcode, Pred, PredReg, DL, Regs, Cand.Instrs);
   if (!Merged)
     return nullptr;
 
@@ -1287,7 +1294,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
       // can still change to a writeback form as that will save us 2 bytes
       // of code size. It can create WAW hazards though, so only do it if
       // we're minimizing code size.
-      if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill)
+      if (!STI->hasMinSize() || !BaseKill)
         return false;
 
       bool HighRegsUsed = false;
@@ -1436,14 +1443,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
       .addReg(Base, getKillRegState(isLd ? BaseKill : false))
       .addImm(Pred).addReg(PredReg)
       .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
-                            getKillRegState(MO.isKill())));
+                            getKillRegState(MO.isKill())))
+      .cloneMemRefs(*MI);
   } else if (isLd) {
     if (isAM2) {
       // LDR_PRE, LDR_POST
       if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
         BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
           .addReg(Base, RegState::Define)
-          .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+          .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg)
+          .cloneMemRefs(*MI);
       } else {
         int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
         BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
@@ -1451,7 +1460,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
             .addReg(Base)
             .addReg(0)
             .addImm(Imm)
-            .add(predOps(Pred, PredReg));
+            .add(predOps(Pred, PredReg))
+            .cloneMemRefs(*MI);
       }
     } else {
       // t2LDR_PRE, t2LDR_POST
@@ -1459,7 +1469,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
           .addReg(Base, RegState::Define)
           .addReg(Base)
           .addImm(Offset)
-          .add(predOps(Pred, PredReg));
+          .add(predOps(Pred, PredReg))
+          .cloneMemRefs(*MI);
     }
   } else {
     MachineOperand &MO = MI->getOperand(0);
@@ -1474,14 +1485,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
           .addReg(Base)
           .addReg(0)
           .addImm(Imm)
-          .add(predOps(Pred, PredReg));
+          .add(predOps(Pred, PredReg))
+          .cloneMemRefs(*MI);
     } else {
       // t2STR_PRE, t2STR_POST
       BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
           .addReg(MO.getReg(), getKillRegState(MO.isKill()))
           .addReg(Base)
           .addImm(Offset)
-          .add(predOps(Pred, PredReg));
+          .add(predOps(Pred, PredReg))
+          .cloneMemRefs(*MI);
     }
   }
   MBB.erase(MBBI);
@@ -1541,7 +1554,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
   // Transfer implicit operands.
   for (const MachineOperand &MO : MI.implicit_operands())
     MIB.add(MO);
-  MIB.setMemRefs(MI.memoperands());
+  MIB.cloneMemRefs(MI);
 
   MBB.erase(MBBI);
   return true;
@@ -1581,7 +1594,9 @@ static bool isMemoryOp(const MachineInstr &MI) {
   const MachineMemOperand &MMO = **MI.memoperands_begin();
 
   // Don't touch volatile memory accesses - we may be changing their order.
-  if (MMO.isVolatile())
+  // TODO: We could allow unordered and monotonic atomics here, but we need to
+  // make sure the resulting ldm/stm is correctly marked as atomic. 
+  if (MMO.isVolatile() || MMO.isAtomic())
     return false;
 
   // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
@@ -1607,19 +1622,26 @@ static void InsertLDR_STR(MachineBasicBlock &MBB,
                           bool isDef, unsigned NewOpc, unsigned Reg,
                           bool RegDeadKill, bool RegUndef, unsigned BaseReg,
                           bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
-                          unsigned PredReg, const TargetInstrInfo *TII) {
+                          unsigned PredReg, const TargetInstrInfo *TII,
+                          MachineInstr *MI) {
   if (isDef) {
     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
                                       TII->get(NewOpc))
       .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+    // FIXME: This is overly conservative; the new instruction accesses 4
+    // bytes, not 8.
+    MIB.cloneMemRefs(*MI);
   } else {
     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
                                       TII->get(NewOpc))
       .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+    // FIXME: This is overly conservative; the new instruction accesses 4
+    // bytes, not 8.
+    MIB.cloneMemRefs(*MI);
   }
 }
 
@@ -1677,7 +1699,8 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
         .addReg(BaseReg, getKillRegState(BaseKill))
         .addImm(Pred).addReg(PredReg)
         .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
-        .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
+        .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill))
+        .cloneMemRefs(*MI);
       ++NumLDRD2LDM;
     } else {
       BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
@@ -1686,7 +1709,8 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
         .addReg(EvenReg,
                 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
         .addReg(OddReg,
-                getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
+                getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef))
+        .cloneMemRefs(*MI);
       ++NumSTRD2STM;
     }
   } else {
@@ -1704,9 +1728,10 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
     if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
       assert(!TRI->regsOverlap(OddReg, BaseReg));
       InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
-                    false, BaseReg, false, BaseUndef, Pred, PredReg, TII);
+                    false, BaseReg, false, BaseUndef, Pred, PredReg, TII, MI);
       InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
-                    false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII);
+                    false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
+                    MI);
     } else {
       if (OddReg == EvenReg && EvenDeadKill) {
         // If the two source operands are the same, the kill marker is
@@ -1719,9 +1744,11 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
       if (EvenReg == BaseReg)
         EvenDeadKill = false;
       InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
-                    EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII);
+                    EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII,
+                    MI);
       InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
-                    OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII);
+                    OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
+                    MI);
     }
     if (isLd)
       ++NumLDRD2LDR;
@@ -2048,6 +2075,11 @@ char ARMPreAllocLoadStoreOpt::ID = 0;
 INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
                 ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
 
+// Limit the number of instructions to be rescheduled.
+// FIXME: tune this limit, and/or come up with some better heuristics.
+static cl::opt<unsigned> InstReorderLimit("arm-prera-ldst-opt-reorder-limit",
+                                          cl::init(8), cl::Hidden);
+
 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   if (AssumeMisalignedLoadStores || skipFunction(Fn.getFunction()))
     return false;
@@ -2140,7 +2172,8 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
   // At the moment, we ignore the memoryoperand's value.
   // If we want to use AliasAnalysis, we should check it accordingly.
   if (!Op0->hasOneMemOperand() ||
-      (*Op0->memoperands_begin())->isVolatile())
+      (*Op0->memoperands_begin())->isVolatile() ||
+      (*Op0->memoperands_begin())->isAtomic())
     return false;
 
   unsigned Align = (*Op0->memoperands_begin())->getAlignment();
@@ -2223,7 +2256,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
       }
 
       // Don't try to reschedule too many instructions.
-      if (NumMove == 8) // FIXME: Tune this limit.
+      if (NumMove == InstReorderLimit)
         break;
 
       // Found a mergable instruction; save information about it.
@@ -2351,10 +2384,13 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
   bool RetVal = false;
 
   DenseMap<MachineInstr*, unsigned> MI2LocMap;
-  DenseMap<unsigned, SmallVector<MachineInstr *, 4>> Base2LdsMap;
-  DenseMap<unsigned, SmallVector<MachineInstr *, 4>> Base2StsMap;
-  SmallVector<unsigned, 4> LdBases;
-  SmallVector<unsigned, 4> StBases;
+  using MapIt = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator;
+  using Base2InstMap = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>;
+  using BaseVec = SmallVector<unsigned, 4>;
+  Base2InstMap Base2LdsMap;
+  Base2InstMap Base2StsMap;
+  BaseVec LdBases;
+  BaseVec StBases;
 
   unsigned Loc = 0;
   MachineBasicBlock::iterator MBBI = MBB->begin();
@@ -2381,41 +2417,28 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
       bool isLd = isLoadSingle(Opc);
       unsigned Base = MI.getOperand(1).getReg();
       int Offset = getMemoryOpOffset(MI);
-
       bool StopHere = false;
-      if (isLd) {
-        DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator BI =
-          Base2LdsMap.find(Base);
-        if (BI != Base2LdsMap.end()) {
-          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
-            if (Offset == getMemoryOpOffset(*BI->second[i])) {
-              StopHere = true;
-              break;
-            }
-          }
-          if (!StopHere)
-            BI->second.push_back(&MI);
-        } else {
-          Base2LdsMap[Base].push_back(&MI);
-          LdBases.push_back(Base);
+      auto FindBases = [&] (Base2InstMap &Base2Ops, BaseVec &Bases) {
+        MapIt BI = Base2Ops.find(Base);
+        if (BI == Base2Ops.end()) {
+          Base2Ops[Base].push_back(&MI);
+          Bases.push_back(Base);
+          return;
         }
-      } else {
-        DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator BI =
-          Base2StsMap.find(Base);
-        if (BI != Base2StsMap.end()) {
-          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
-            if (Offset == getMemoryOpOffset(*BI->second[i])) {
-              StopHere = true;
-              break;
-            }
+        for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+          if (Offset == getMemoryOpOffset(*BI->second[i])) {
+            StopHere = true;
+            break;
           }
-          if (!StopHere)
-            BI->second.push_back(&MI);
-        } else {
-          Base2StsMap[Base].push_back(&MI);
-          StBases.push_back(Base);
         }
-      }
+        if (!StopHere)
+          BI->second.push_back(&MI);
+      };
+
+      if (isLd)
+        FindBases(Base2LdsMap, LdBases);
+      else
+        FindBases(Base2StsMap, StBases);
 
       if (StopHere) {
         // Found a duplicate (a base+offset combination that's seen earlier).
diff --git a/lib/Target/ARM/ARMLowOverheadLoops.cpp b/lib/Target/ARM/ARMLowOverheadLoops.cpp
new file mode 100644
index 000000000000..cedf3bd3c74e
--- /dev/null
+++ b/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -0,0 +1,384 @@
+//===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Finalize v8.1-m low-overhead loops by converting the associated pseudo
+/// instructions into machine operations.
+/// The expectation is that the loop contains three pseudo instructions:
+/// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
+///   form should be in the preheader, whereas the while form should be in the
+///   preheaders only predecessor. TODO: Could DoLoopStart get moved into the
+///   pre-preheader?
+/// - t2LoopDec - placed within in the loop body.
+/// - t2LoopEnd - the loop latch terminator.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMBasicBlockInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-low-overhead-loops"
+#define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
+
+namespace {
+
+  class ARMLowOverheadLoops : public MachineFunctionPass {
+    const ARMBaseInstrInfo    *TII = nullptr;
+    MachineRegisterInfo       *MRI = nullptr;
+    std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
+
+  public:
+    static char ID;
+
+    ARMLowOverheadLoops() : MachineFunctionPass(ID) { }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesCFG();
+      AU.addRequired<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
+    bool ProcessLoop(MachineLoop *ML);
+
+    void RevertWhile(MachineInstr *MI) const;
+
+    void RevertLoopDec(MachineInstr *MI) const;
+
+    void RevertLoopEnd(MachineInstr *MI) const;
+
+    void Expand(MachineLoop *ML, MachineInstr *Start,
+                MachineInstr *Dec, MachineInstr *End, bool Revert);
+
+    MachineFunctionProperties getRequiredProperties() const override {
+      return MachineFunctionProperties().set(
+          MachineFunctionProperties::Property::NoVRegs);
+    }
+
+    StringRef getPassName() const override {
+      return ARM_LOW_OVERHEAD_LOOPS_NAME;
+    }
+  };
+}
+  
+char ARMLowOverheadLoops::ID = 0;
+
+INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
+                false, false)
+
+bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) {
+  if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB())
+    return false;
+
+  LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n");
+
+  auto &MLI = getAnalysis<MachineLoopInfo>();
+  MRI = &MF.getRegInfo();
+  TII = static_cast<const ARMBaseInstrInfo*>(
+    MF.getSubtarget().getInstrInfo());
+  BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF));
+  BBUtils->computeAllBlockSizes();
+  BBUtils->adjustBBOffsetsAfter(&MF.front());
+
+  bool Changed = false;
+  for (auto ML : MLI) {
+    if (!ML->getParentLoop())
+      Changed |= ProcessLoop(ML);
+  }
+  return Changed;
+}
+
+bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
+
+  bool Changed = false;
+
+  // Process inner loops first.
+  for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
+    Changed |= ProcessLoop(*I);
+
+  LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML);
+
+  auto IsLoopStart = [](MachineInstr &MI) {
+    return MI.getOpcode() == ARM::t2DoLoopStart ||
+           MI.getOpcode() == ARM::t2WhileLoopStart;
+  };
+
+  // Search the given block for a loop start instruction. If one isn't found,
+  // and there's only one predecessor block, search that one too.
+  std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart =
+    [&IsLoopStart, &SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* {
+    for (auto &MI : *MBB) {
+      if (IsLoopStart(MI))
+        return &MI;
+    }
+    if (MBB->pred_size() == 1)
+      return SearchForStart(*MBB->pred_begin());
+    return nullptr;
+  };
+
+  MachineInstr *Start = nullptr;
+  MachineInstr *Dec = nullptr;
+  MachineInstr *End = nullptr;
+  bool Revert = false;
+
+  // Search the preheader for the start intrinsic, or look through the
+  // predecessors of the header to find exactly one set.iterations intrinsic.
+  // FIXME: I don't see why we shouldn't be supporting multiple predecessors
+  // with potentially multiple set.loop.iterations, so we need to enable this.
+  if (auto *Preheader = ML->getLoopPreheader()) {
+    Start = SearchForStart(Preheader);
+  } else {
+    LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n"
+               << " - Performing manual predecessor search.\n");
+    MachineBasicBlock *Pred = nullptr;
+    for (auto *MBB : ML->getHeader()->predecessors()) {
+      if (!ML->contains(MBB)) {
+        if (Pred) {
+          LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n");
+          Start = nullptr;
+          break;
+        }
+        Pred = MBB;
+        Start = SearchForStart(MBB);
+      }
+    }
+  }
+
+  // Find the low-overhead loop components and decide whether or not to fall
+  // back to a normal loop.
+  for (auto *MBB : reverse(ML->getBlocks())) {
+    for (auto &MI : *MBB) {
+      if (MI.getOpcode() == ARM::t2LoopDec)
+        Dec = &MI;
+      else if (MI.getOpcode() == ARM::t2LoopEnd)
+        End = &MI;
+      else if (MI.getDesc().isCall())
+        // TODO: Though the call will require LE to execute again, does this
+        // mean we should revert? Always executing LE hopefully should be
+        // faster than performing a sub,cmp,br or even subs,br.
+        Revert = true;
+
+      if (!Dec)
+        continue;
+
+      // If we find that we load/store LR between LoopDec and LoopEnd, expect
+      // that the decremented value has been spilled to the stack. Because
+      // this value isn't actually going to be produced until the latch, by LE,
+      // we would need to generate a real sub. The value is also likely to be
+      // reloaded for use of LoopEnd - in which in case we'd need to perform
+      // an add because it gets negated again by LE! The other option is to
+      // then generate the other form of LE which doesn't perform the sub.
+      if (MI.mayLoad() || MI.mayStore())
+        Revert =
+          MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == ARM::LR;
+    }
+
+    if (Dec && End && Revert)
+      break;
+  }
+
+  if (!Start && !Dec && !End) {
+    LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
+    return Changed;
+  } if (!(Start && Dec && End)) {
+    report_fatal_error("Failed to find all loop components");
+  }
+
+  if (!End->getOperand(1).isMBB() ||
+      End->getOperand(1).getMBB() != ML->getHeader())
+    report_fatal_error("Expected LoopEnd to target Loop Header");
+
+  // The WLS and LE instructions have 12-bits for the label offset. WLS
+  // requires a positive offset, while LE uses negative.
+  if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML->getHeader()) ||
+      !BBUtils->isBBInRange(End, ML->getHeader(), 4094)) {
+    LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
+    Revert = true;
+  }
+  if (Start->getOpcode() == ARM::t2WhileLoopStart &&
+      (BBUtils->getOffsetOf(Start) >
+       BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) ||
+       !BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) {
+    LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
+    Revert = true;
+  }
+
+  LLVM_DEBUG(dbgs() << "ARM Loops:\n - Found Loop Start: " << *Start
+                    << " - Found Loop Dec: " << *Dec
+                    << " - Found Loop End: " << *End);
+
+  Expand(ML, Start, Dec, End, Revert);
+  return true;
+}
+
+// WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
+// beq that branches to the exit branch.
+// FIXME: Need to check that we're not trashing the CPSR when generating the
+// cmp. We could also try to generate a cbz if the value in LR is also in
+// another low register.
+void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
+  LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI);
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
+                                    TII->get(ARM::t2CMPri));
+  MIB.addReg(ARM::LR);
+  MIB.addImm(0);
+  MIB.addImm(ARMCC::AL);
+  MIB.addReg(ARM::CPSR);
+
+  // TODO: Try to use tBcc instead
+  MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc));
+  MIB.add(MI->getOperand(1));   // branch target
+  MIB.addImm(ARMCC::EQ);        // condition code
+  MIB.addReg(ARM::CPSR);
+  MI->eraseFromParent();
+}
+
+// TODO: Check flags so that we can possibly generate a tSubs or tSub.
+void ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
+  LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI);
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
+                                    TII->get(ARM::t2SUBri));
+  MIB.addDef(ARM::LR);
+  MIB.add(MI->getOperand(1));
+  MIB.add(MI->getOperand(2));
+  MIB.addImm(ARMCC::AL);
+  MIB.addReg(0);
+  MIB.addReg(0);
+  MI->eraseFromParent();
+}
+
+// Generate a subs, or sub and cmp, and a branch instead of an LE.
+// FIXME: Need to check that we're not trashing the CPSR when generating
+// the cmp.
+void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI) const {
+  LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI);
+
+  // Create cmp
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
+                                    TII->get(ARM::t2CMPri));
+  MIB.addReg(ARM::LR);
+  MIB.addImm(0);
+  MIB.addImm(ARMCC::AL);
+  MIB.addReg(ARM::CPSR);
+
+  // TODO Try to use tBcc instead.
+  // Create bne
+  MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc));
+  MIB.add(MI->getOperand(1));   // branch target
+  MIB.addImm(ARMCC::NE);        // condition code
+  MIB.addReg(ARM::CPSR);
+  MI->eraseFromParent();
+}
+
+void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
+                                 MachineInstr *Dec, MachineInstr *End,
+                                 bool Revert) {
+
+  auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) {
+    // The trip count should already been held in LR since the instructions
+    // within the loop can only read and write to LR. So, there should be a
+    // mov to setup the count. WLS/DLS perform this move, so find the original
+    // and delete it - inserting WLS/DLS in its place.
+    MachineBasicBlock *MBB = Start->getParent();
+    MachineInstr *InsertPt = Start;
+    for (auto &I : MRI->def_instructions(ARM::LR)) {
+      if (I.getParent() != MBB)
+        continue;
+
+      // Always execute.
+      if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() != ARMCC::AL)
+        continue;
+
+      // Only handle move reg, if the trip count it will need moving into a reg
+      // before the setup instruction anyway.
+      if (!I.getDesc().isMoveReg() ||
+          !I.getOperand(1).isIdenticalTo(Start->getOperand(0)))
+        continue;
+      InsertPt = &I;
+      break;
+    }
+
+    unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ?
+      ARM::t2DLS : ARM::t2WLS;
+    MachineInstrBuilder MIB =
+      BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc));
+
+    MIB.addDef(ARM::LR);
+    MIB.add(Start->getOperand(0));
+    if (Opc == ARM::t2WLS)
+      MIB.add(Start->getOperand(1));
+
+    if (InsertPt != Start)
+      InsertPt->eraseFromParent();
+    Start->eraseFromParent();
+    LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);
+    return &*MIB;
+  };
+
+  // Combine the LoopDec and LoopEnd instructions into LE(TP).
+  auto ExpandLoopEnd = [this](MachineLoop *ML, MachineInstr *Dec,
+                              MachineInstr *End) {
+    MachineBasicBlock *MBB = End->getParent();
+    MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(),
+                                      TII->get(ARM::t2LEUpdate));
+    MIB.addDef(ARM::LR);
+    MIB.add(End->getOperand(0));
+    MIB.add(End->getOperand(1));
+    LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
+
+    End->eraseFromParent();
+    Dec->eraseFromParent();
+    return &*MIB;
+  };
+
+  // TODO: We should be able to automatically remove these branches before we
+  // get here - probably by teaching analyzeBranch about the pseudo
+  // instructions.
+  // If there is an unconditional branch, after I, that just branches to the
+  // next block, remove it.
+  auto RemoveDeadBranch = [](MachineInstr *I) {
+    MachineBasicBlock *BB = I->getParent();
+    MachineInstr *Terminator = &BB->instr_back();
+    if (Terminator->isUnconditionalBranch() && I != Terminator) {
+      MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB();
+      if (BB->isLayoutSuccessor(Succ)) {
+        LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator);
+        Terminator->eraseFromParent();
+      }
+    }
+  };
+
+  if (Revert) {
+    if (Start->getOpcode() == ARM::t2WhileLoopStart)
+      RevertWhile(Start);
+    else
+      Start->eraseFromParent();
+    RevertLoopDec(Dec);
+    RevertLoopEnd(End);
+  } else {
+    Start = ExpandLoopStart(ML, Start);
+    RemoveDeadBranch(Start);
+    End = ExpandLoopEnd(ML, Dec, End);
+    RemoveDeadBranch(End);
+  }
+}
+
+FunctionPass *llvm::createARMLowOverheadLoopsPass() {
+  return new ARMLowOverheadLoops();
+}
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 48b02d40b246..90c5ad025e56 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -1,9 +1,8 @@
 //===-- ARMMCInstLower.cpp - Convert ARM MachineInstr to an MCInst --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index e25d36b57616..3b676ca4c883 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
 //===-- ARMMachineFunctionInfo.cpp - ARM machine function info ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 91310e81e398..90d794cd27b1 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -1,9 +1,8 @@
 //===-- ARMMachineFunctionInfo.h - ARM machine function info ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -62,6 +61,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   /// enable far jump.
   bool LRSpilledForFarJump = false;
 
+  /// LRSpilled - True if the LR register has been for spilled for
+  /// any reason, so it's legal to emit an ARM::tBfar (i.e. "bl").
+  bool LRSpilled = false;
+
   /// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer
   /// spill stack offset.
   unsigned FramePtrSpillOffset = 0;
@@ -151,6 +154,9 @@ public:
   bool shouldRestoreSPFromFP() const { return RestoreSPFromFP; }
   void setShouldRestoreSPFromFP(bool s) { RestoreSPFromFP = s; }
 
+  bool isLRSpilled() const { return LRSpilled; }
+  void setLRIsSpilled(bool s) { LRSpilled = s; }
+
   bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
   void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
 
@@ -239,6 +245,8 @@ public:
   void setPromotedConstpoolIncrease(int Sz) {
     PromotedGlobalsIncrease = Sz;
   }
+
+  DenseMap<unsigned, unsigned> EHPrologueRemappedRegs;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/ARMMacroFusion.cpp b/lib/Target/ARM/ARMMacroFusion.cpp
index df1da9d8e474..38bf28ba8219 100644
--- a/lib/Target/ARM/ARMMacroFusion.cpp
+++ b/lib/Target/ARM/ARMMacroFusion.cpp
@@ -1,9 +1,8 @@
 //===- ARMMacroFusion.cpp - ARM Macro Fusion ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARMMacroFusion.h b/lib/Target/ARM/ARMMacroFusion.h
index b3abd7b593a1..4896a4a2544d 100644
--- a/lib/Target/ARM/ARMMacroFusion.h
+++ b/lib/Target/ARM/ARMMacroFusion.h
@@ -1,9 +1,8 @@
 //===- ARMMacroFusion.h - ARM Macro Fusion ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
index cff4a256100d..348895da713f 100644
--- a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
+++ b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
@@ -1,10 +1,9 @@
 //===-- ARMOptimizeBarriersPass - two DMBs without a memory access in between,
 //removed one -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===------------------------------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARM/ARMParallelDSP.cpp b/lib/Target/ARM/ARMParallelDSP.cpp
index fc3258914f92..5389d09bf7d7 100644
--- a/lib/Target/ARM/ARMParallelDSP.cpp
+++ b/lib/Target/ARM/ARMParallelDSP.cpp
@@ -1,9 +1,8 @@
 //===- ParallelDSP.cpp - Parallel DSP Pass --------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -49,12 +48,12 @@ DisableParallelDSP("disable-arm-parallel-dsp", cl::Hidden, cl::init(false),
 namespace {
   struct OpChain;
   struct BinOpChain;
-  struct Reduction;
+  class Reduction;
 
   using OpChainList     = SmallVector<std::unique_ptr<OpChain>, 8>;
   using ReductionList   = SmallVector<Reduction, 8>;
   using ValueList       = SmallVector<Value*, 8>;
-  using MemInstList     = SmallVector<Instruction*, 8>;
+  using MemInstList     = SmallVector<LoadInst*, 8>;
   using PMACPair        = std::pair<BinOpChain*,BinOpChain*>;
   using PMACPairList    = SmallVector<PMACPair, 8>;
   using Instructions    = SmallVector<Instruction*,16>;
@@ -64,31 +63,24 @@ namespace {
     Instruction   *Root;
     ValueList     AllValues;
     MemInstList   VecLd;    // List of all load instructions.
-    MemLocList    MemLocs;  // All memory locations read by this tree.
+    MemInstList   Loads;
     bool          ReadOnly = true;
 
     OpChain(Instruction *I, ValueList &vl) : Root(I), AllValues(vl) { }
     virtual ~OpChain() = default;
 
-    void SetMemoryLocations() {
-      const auto Size = LocationSize::unknown();
+    void PopulateLoads() {
       for (auto *V : AllValues) {
-        if (auto *I = dyn_cast<Instruction>(V)) {
-          if (I->mayWriteToMemory())
-            ReadOnly = false;
-          if (auto *Ld = dyn_cast<LoadInst>(V))
-            MemLocs.push_back(MemoryLocation(Ld->getPointerOperand(), Size));
-        }
+        if (auto *Ld = dyn_cast<LoadInst>(V))
+          Loads.push_back(Ld);
       }
     }
 
     unsigned size() const { return AllValues.size(); }
   };
 
-  // 'BinOpChain' and 'Reduction' are just some bookkeeping data structures.
-  // 'Reduction' contains the phi-node and accumulator statement from where we
-  // start pattern matching, and 'BinOpChain' the multiplication
-  // instructions that are candidates for parallel execution.
+  // 'BinOpChain' holds the multiplication instructions that are candidates
+  // for parallel execution.
   struct BinOpChain : public OpChain {
     ValueList     LHS;      // List of all (narrow) left hand operands.
     ValueList     RHS;      // List of all (narrow) right hand operands.
@@ -103,15 +95,85 @@ namespace {
     bool AreSymmetrical(BinOpChain *Other);
   };
 
-  struct Reduction {
-    PHINode         *Phi;             // The Phi-node from where we start
-                                      // pattern matching.
-    Instruction     *AccIntAdd;       // The accumulating integer add statement,
-                                      // i.e, the reduction statement.
-    OpChainList     MACCandidates;    // The MAC candidates associated with
-                                      // this reduction statement.
-    PMACPairList    PMACPairs;
-    Reduction (PHINode *P, Instruction *Acc) : Phi(P), AccIntAdd(Acc) { };
+  /// Represent a sequence of multiply-accumulate operations with the aim to
+  /// perform the multiplications in parallel.
+  class Reduction {
+    Instruction     *Root = nullptr;
+    Value           *Acc = nullptr;
+    OpChainList     Muls;
+    PMACPairList        MulPairs;
+    SmallPtrSet<Instruction*, 4> Adds;
+
+  public:
+    Reduction() = delete;
+
+    Reduction (Instruction *Add) : Root(Add) { }
+
+    /// Record an Add instruction that is a part of the this reduction.
+    void InsertAdd(Instruction *I) { Adds.insert(I); }
+
+    /// Record a BinOpChain, rooted at a Mul instruction, that is a part of
+    /// this reduction.
+    void InsertMul(Instruction *I, ValueList &LHS, ValueList &RHS) {
+      Muls.push_back(make_unique<BinOpChain>(I, LHS, RHS));
+    }
+
+    /// Add the incoming accumulator value, returns true if a value had not
+    /// already been added. Returning false signals to the user that this
+    /// reduction already has a value to initialise the accumulator.
+    bool InsertAcc(Value *V) {
+      if (Acc)
+        return false;
+      Acc = V;
+      return true;
+    }
+
+    /// Set two BinOpChains, rooted at muls, that can be executed as a single
+    /// parallel operation.
+    void AddMulPair(BinOpChain *Mul0, BinOpChain *Mul1) {
+      MulPairs.push_back(std::make_pair(Mul0, Mul1));
+    }
+
+    /// Return true if enough mul operations are found that can be executed in
+    /// parallel.
+    bool CreateParallelPairs();
+
+    /// Return the add instruction which is the root of the reduction.
+    Instruction *getRoot() { return Root; }
+
+    /// Return the incoming value to be accumulated. This maybe null.
+    Value *getAccumulator() { return Acc; }
+
+    /// Return the set of adds that comprise the reduction.
+    SmallPtrSetImpl<Instruction*> &getAdds() { return Adds; }
+
+    /// Return the BinOpChain, rooted at mul instruction, that comprise the
+    /// the reduction.
+    OpChainList &getMuls() { return Muls; }
+
+    /// Return the BinOpChain, rooted at mul instructions, that have been
+    /// paired for parallel execution.
+    PMACPairList &getMulPairs() { return MulPairs; }
+
+    /// To finalise, replace the uses of the root with the intrinsic call.
+    void UpdateRoot(Instruction *SMLAD) {
+      Root->replaceAllUsesWith(SMLAD);
+    }
+  };
+
+  class WidenedLoad {
+    LoadInst *NewLd = nullptr;
+    SmallVector<LoadInst*, 4> Loads;
+
+  public:
+    WidenedLoad(SmallVectorImpl<LoadInst*> &Lds, LoadInst *Wide)
+      : NewLd(Wide) {
+      for (auto *I : Lds)
+        Loads.push_back(I);
+    }
+    LoadInst *getLoad() {
+      return NewLd;
+    }
   };
 
   class ARMParallelDSP : public LoopPass {
@@ -124,28 +186,37 @@ namespace {
     const DataLayout  *DL;
     Module            *M;
     std::map<LoadInst*, LoadInst*> LoadPairs;
-    std::map<LoadInst*, SmallVector<LoadInst*, 4>> SequentialLoads;
+    SmallPtrSet<LoadInst*, 4> OffsetLoads;
+    std::map<LoadInst*, std::unique_ptr<WidenedLoad>> WideLoads;
+
+    template<unsigned>
+    bool IsNarrowSequence(Value *V, ValueList &VL);
 
-    bool RecordSequentialLoads(BasicBlock *Header);
-    bool InsertParallelMACs(Reduction &Reduction);
+    bool RecordMemoryOps(BasicBlock *BB);
+    void InsertParallelMACs(Reduction &Reduction);
     bool AreSequentialLoads(LoadInst *Ld0, LoadInst *Ld1, MemInstList &VecMem);
-    void CreateParallelMACPairs(Reduction &R);
-    Instruction *CreateSMLADCall(LoadInst *VecLd0, LoadInst *VecLd1,
-                                 Instruction *Acc, bool Exchange,
-                                 Instruction *InsertAfter);
+    LoadInst* CreateWideLoad(SmallVectorImpl<LoadInst*> &Loads,
+                             IntegerType *LoadTy);
+    bool CreateParallelPairs(Reduction &R);
 
     /// Try to match and generate: SMLAD, SMLADX - Signed Multiply Accumulate
     /// Dual performs two signed 16x16-bit multiplications. It adds the
     /// products to a 32-bit accumulate operand. Optionally, the instruction can
     /// exchange the halfwords of the second operand before performing the
     /// arithmetic.
-    bool MatchSMLAD(Function &F);
+    bool MatchSMLAD(Loop *L);
 
   public:
     static char ID;
 
     ARMParallelDSP() : LoopPass(ID) { }
 
+    bool doInitialization(Loop *L, LPPassManager &LPM) override {
+      LoadPairs.clear();
+      WideLoads.clear();
+      return true;
+    }
+
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       LoopPass::getAnalysisUsage(AU);
       AU.addRequired<AssumptionCacheTracker>();
@@ -183,6 +254,9 @@ namespace {
         return false;
       }
 
+      if (!TheLoop->getLoopPreheader())
+        InsertPreheaderForLoop(L, DT, LI, nullptr, true);
+
       Function &F = *Header->getParent();
       M = F.getParent();
       DL = &M->getDataLayout();
@@ -202,31 +276,62 @@ namespace {
         return false;
       }
 
+      if (!ST->isLittle()) {
+        LLVM_DEBUG(dbgs() << "Only supporting little endian: not running pass "
+                          << "ARMParallelDSP\n");
+        return false;
+      }
+
       LoopAccessInfo LAI(L, SE, TLI, AA, DT, LI);
-      bool Changes = false;
 
       LLVM_DEBUG(dbgs() << "\n== Parallel DSP pass ==\n");
       LLVM_DEBUG(dbgs() << " - " << F.getName() << "\n\n");
 
-      if (!RecordSequentialLoads(Header)) {
+      if (!RecordMemoryOps(Header)) {
         LLVM_DEBUG(dbgs() << " - No sequential loads found.\n");
         return false;
       }
 
-      Changes = MatchSMLAD(F);
+      bool Changes = MatchSMLAD(L);
       return Changes;
     }
   };
 }
 
+template<typename MemInst>
+static bool AreSequentialAccesses(MemInst *MemOp0, MemInst *MemOp1,
+                                  const DataLayout &DL, ScalarEvolution &SE) {
+  if (isConsecutiveAccess(MemOp0, MemOp1, DL, SE))
+    return true;
+  return false;
+}
+
+bool ARMParallelDSP::AreSequentialLoads(LoadInst *Ld0, LoadInst *Ld1,
+                                        MemInstList &VecMem) {
+  if (!Ld0 || !Ld1)
+    return false;
+
+  if (!LoadPairs.count(Ld0) || LoadPairs[Ld0] != Ld1)
+    return false;
+
+  LLVM_DEBUG(dbgs() << "Loads are sequential and valid:\n";
+    dbgs() << "Ld0:"; Ld0->dump();
+    dbgs() << "Ld1:"; Ld1->dump();
+  );
+
+  VecMem.clear();
+  VecMem.push_back(Ld0);
+  VecMem.push_back(Ld1);
+  return true;
+}
+
 // MaxBitwidth: the maximum supported bitwidth of the elements in the DSP
 // instructions, which is set to 16. So here we should collect all i8 and i16
 // narrow operations.
 // TODO: we currently only collect i16, and will support i8 later, so that's
 // why we check that types are equal to MaxBitWidth, and not <= MaxBitWidth.
 template<unsigned MaxBitWidth>
-static bool IsNarrowSequence(Value *V, ValueList &VL) {
-  LLVM_DEBUG(dbgs() << "Is narrow sequence? "; V->dump());
+bool ARMParallelDSP::IsNarrowSequence(Value *V, ValueList &VL) {
   ConstantInt *CInt;
 
   if (match(V, m_ConstantInt(CInt))) {
@@ -236,7 +341,7 @@ static bool IsNarrowSequence(Value *V, ValueList &VL) {
 
   auto *I = dyn_cast<Instruction>(V);
   if (!I)
-   return false;
+    return false;
 
   Value *Val, *LHS, *RHS;
   if (match(V, m_Trunc(m_Value(Val)))) {
@@ -245,108 +350,253 @@ static bool IsNarrowSequence(Value *V, ValueList &VL) {
   } else if (match(V, m_Add(m_Value(LHS), m_Value(RHS)))) {
     // TODO: we need to implement sadd16/sadd8 for this, which enables to
     // also do the rewrite for smlad8.ll, but it is unsupported for now.
-    LLVM_DEBUG(dbgs() << "No, unsupported Op:\t"; I->dump());
     return false;
   } else if (match(V, m_ZExtOrSExt(m_Value(Val)))) {
-    if (cast<CastInst>(I)->getSrcTy()->getIntegerBitWidth() != MaxBitWidth) {
-      LLVM_DEBUG(dbgs() << "No, wrong SrcTy size: " <<
-        cast<CastInst>(I)->getSrcTy()->getIntegerBitWidth() << "\n");
+    if (cast<CastInst>(I)->getSrcTy()->getIntegerBitWidth() != MaxBitWidth)
       return false;
-    }
 
     if (match(Val, m_Load(m_Value()))) {
-      LLVM_DEBUG(dbgs() << "Yes, found narrow Load:\t"; Val->dump());
+      auto *Ld = cast<LoadInst>(Val);
+
+      // Check that these load could be paired.
+      if (!LoadPairs.count(Ld) && !OffsetLoads.count(Ld))
+        return false;
+
       VL.push_back(Val);
       VL.push_back(I);
       return true;
     }
   }
-  LLVM_DEBUG(dbgs() << "No, unsupported Op:\t"; I->dump());
   return false;
 }
 
-template<typename MemInst>
-static bool AreSequentialAccesses(MemInst *MemOp0, MemInst *MemOp1,
-                                  const DataLayout &DL, ScalarEvolution &SE) {
-  if (!MemOp0->isSimple() || !MemOp1->isSimple()) {
-    LLVM_DEBUG(dbgs() << "No, not touching volatile access\n");
-    return false;
-  }
-  if (isConsecutiveAccess(MemOp0, MemOp1, DL, SE)) {
-    LLVM_DEBUG(dbgs() << "OK: accesses are consecutive.\n");
-    return true;
+/// Iterate through the block and record base, offset pairs of loads which can
+/// be widened into a single load.
+bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) {
+  SmallVector<LoadInst*, 8> Loads;
+  SmallVector<Instruction*, 8> Writes;
+
+  // Collect loads and instruction that may write to memory. For now we only
+  // record loads which are simple, sign-extended and have a single user.
+  // TODO: Allow zero-extended loads.
+  for (auto &I : *BB) {
+    if (I.mayWriteToMemory())
+      Writes.push_back(&I);
+    auto *Ld = dyn_cast<LoadInst>(&I);
+    if (!Ld || !Ld->isSimple() ||
+        !Ld->hasOneUse() || !isa<SExtInst>(Ld->user_back()))
+      continue;
+    Loads.push_back(Ld);
   }
-  LLVM_DEBUG(dbgs() << "No, accesses aren't consecutive.\n");
-  return false;
-}
 
-bool ARMParallelDSP::AreSequentialLoads(LoadInst *Ld0, LoadInst *Ld1,
-                                        MemInstList &VecMem) {
-  if (!Ld0 || !Ld1)
-    return false;
+  using InstSet = std::set<Instruction*>;
+  using DepMap = std::map<Instruction*, InstSet>;
+  DepMap RAWDeps;
 
-  LLVM_DEBUG(dbgs() << "Are consecutive loads:\n";
-    dbgs() << "Ld0:"; Ld0->dump();
-    dbgs() << "Ld1:"; Ld1->dump();
-  );
+  // Record any writes that may alias a load.
+  const auto Size = LocationSize::unknown();
+  for (auto Read : Loads) {
+    for (auto Write : Writes) {
+      MemoryLocation ReadLoc =
+        MemoryLocation(Read->getPointerOperand(), Size);
 
-  if (!Ld0->hasOneUse() || !Ld1->hasOneUse()) {
-    LLVM_DEBUG(dbgs() << "No, load has more than one use.\n");
-    return false;
+      if (!isModOrRefSet(intersectModRef(AA->getModRefInfo(Write, ReadLoc),
+          ModRefInfo::ModRef)))
+        continue;
+      if (DT->dominates(Write, Read))
+        RAWDeps[Read].insert(Write);
+    }
   }
 
-  if (!LoadPairs.count(Ld0) || LoadPairs[Ld0] != Ld1)
-    return false;
+  // Check whether there's not a write between the two loads which would
+  // prevent them from being safely merged.
+  auto SafeToPair = [&](LoadInst *Base, LoadInst *Offset) {
+    LoadInst *Dominator = DT->dominates(Base, Offset) ? Base : Offset;
+    LoadInst *Dominated = DT->dominates(Base, Offset) ? Offset : Base;
 
-  VecMem.clear();
-  VecMem.push_back(Ld0);
-  VecMem.push_back(Ld1);
-  return true;
-}
+    if (RAWDeps.count(Dominated)) {
+      InstSet &WritesBefore = RAWDeps[Dominated];
 
-/// Iterate through the block and record base, offset pairs of loads as well as
-/// maximal sequences of sequential loads.
-bool ARMParallelDSP::RecordSequentialLoads(BasicBlock *Header) {
-  SmallVector<LoadInst*, 8> Loads;
-  for (auto &I : *Header) {
-    auto *Ld = dyn_cast<LoadInst>(&I);
-    if (!Ld)
-      continue;
-    Loads.push_back(Ld);
-  }
+      for (auto Before : WritesBefore) {
 
-  std::map<LoadInst*, LoadInst*> BaseLoads;
+        // We can't move the second load backward, past a write, to merge
+        // with the first load.
+        if (DT->dominates(Dominator, Before))
+          return false;
+      }
+    }
+    return true;
+  };
 
-  for (auto *Ld0 : Loads) {
-    for (auto *Ld1 : Loads) {
-      if (Ld0 == Ld1)
+  // Record base, offset load pairs.
+  for (auto *Base : Loads) {
+    for (auto *Offset : Loads) {
+      if (Base == Offset)
         continue;
 
-      if (AreSequentialAccesses<LoadInst>(Ld0, Ld1, *DL, *SE)) {
-        LoadPairs[Ld0] = Ld1;
-        if (BaseLoads.count(Ld0)) {
-          LoadInst *Base = BaseLoads[Ld0];
-          BaseLoads[Ld1] = Base;
-          SequentialLoads[Base].push_back(Ld1);
-        } else {
-          BaseLoads[Ld1] = Ld0;
-          SequentialLoads[Ld0].push_back(Ld1);
-        }
+      if (AreSequentialAccesses<LoadInst>(Base, Offset, *DL, *SE) &&
+          SafeToPair(Base, Offset)) {
+        LoadPairs[Base] = Offset;
+        OffsetLoads.insert(Offset);
+        break;
       }
     }
   }
+
+  LLVM_DEBUG(if (!LoadPairs.empty()) {
+               dbgs() << "Consecutive load pairs:\n";
+               for (auto &MapIt : LoadPairs) {
+                 LLVM_DEBUG(dbgs() << *MapIt.first << ", "
+                            << *MapIt.second << "\n");
+               }
+             });
   return LoadPairs.size() > 1;
 }
 
-void ARMParallelDSP::CreateParallelMACPairs(Reduction &R) {
-  OpChainList &Candidates = R.MACCandidates;
-  PMACPairList &PMACPairs = R.PMACPairs;
-  const unsigned Elems = Candidates.size();
+// Loop Pass that needs to identify integer add/sub reductions of 16-bit vector
+// multiplications.
+// To use SMLAD:
+// 1) we first need to find integer add then look for this pattern:
+//
+// acc0 = ...
+// ld0 = load i16
+// sext0 = sext i16 %ld0 to i32
+// ld1 = load i16
+// sext1 = sext i16 %ld1 to i32
+// mul0 = mul %sext0, %sext1
+// ld2 = load i16
+// sext2 = sext i16 %ld2 to i32
+// ld3 = load i16
+// sext3 = sext i16 %ld3 to i32
+// mul1 = mul i32 %sext2, %sext3
+// add0 = add i32 %mul0, %acc0
+// acc1 = add i32 %add0, %mul1
+//
+// Which can be selected to:
+//
+// ldr r0
+// ldr r1
+// smlad r2, r0, r1, r2
+//
+// If constants are used instead of loads, these will need to be hoisted
+// out and into a register.
+//
+// If loop invariants are used instead of loads, these need to be packed
+// before the loop begins.
+//
+bool ARMParallelDSP::MatchSMLAD(Loop *L) {
+  // Search recursively back through the operands to find a tree of values that
+  // form a multiply-accumulate chain. The search records the Add and Mul
+  // instructions that form the reduction and allows us to find a single value
+  // to be used as the initial input to the accumlator.
+  std::function<bool(Value*, Reduction&)> Search = [&]
+    (Value *V, Reduction &R) -> bool {
+
+    // If we find a non-instruction, try to use it as the initial accumulator
+    // value. This may have already been found during the search in which case
+    // this function will return false, signaling a search fail.
+    auto *I = dyn_cast<Instruction>(V);
+    if (!I)
+      return R.InsertAcc(V);
+
+    switch (I->getOpcode()) {
+    default:
+      break;
+    case Instruction::PHI:
+      // Could be the accumulator value.
+      return R.InsertAcc(V);
+    case Instruction::Add: {
+      // Adds should be adding together two muls, or another add and a mul to
+      // be within the mac chain. One of the operands may also be the
+      // accumulator value at which point we should stop searching.
+      bool ValidLHS = Search(I->getOperand(0), R);
+      bool ValidRHS = Search(I->getOperand(1), R);
+      if (!ValidLHS && !ValidLHS)
+        return false;
+      else if (ValidLHS && ValidRHS) {
+        R.InsertAdd(I);
+        return true;
+      } else {
+        R.InsertAdd(I);
+        return R.InsertAcc(I);
+      }
+    }
+    case Instruction::Mul: {
+      Value *MulOp0 = I->getOperand(0);
+      Value *MulOp1 = I->getOperand(1);
+      if (isa<SExtInst>(MulOp0) && isa<SExtInst>(MulOp1)) {
+        ValueList LHS;
+        ValueList RHS;
+        if (IsNarrowSequence<16>(MulOp0, LHS) &&
+            IsNarrowSequence<16>(MulOp1, RHS)) {
+          R.InsertMul(I, LHS, RHS);
+          return true;
+        }
+      }
+      return false;
+    }
+    case Instruction::SExt:
+      return Search(I->getOperand(0), R);
+    }
+    return false;
+  };
+
+  bool Changed = false;
+  SmallPtrSet<Instruction*, 4> AllAdds;
+  BasicBlock *Latch = L->getLoopLatch();
+
+  for (Instruction &I : reverse(*Latch)) {
+    if (I.getOpcode() != Instruction::Add)
+      continue;
+
+    if (AllAdds.count(&I))
+      continue;
+
+    const auto *Ty = I.getType();
+    if (!Ty->isIntegerTy(32) && !Ty->isIntegerTy(64))
+      continue;
+
+    Reduction R(&I);
+    if (!Search(&I, R))
+      continue;
+
+    if (!CreateParallelPairs(R))
+      continue;
+
+    InsertParallelMACs(R);
+    Changed = true;
+    AllAdds.insert(R.getAdds().begin(), R.getAdds().end());
+  }
+
+  return Changed;
+}
+
+bool ARMParallelDSP::CreateParallelPairs(Reduction &R) {
+
+  // Not enough mul operations to make a pair.
+  if (R.getMuls().size() < 2)
+    return false;
 
-  if (Elems < 2)
-    return;
+  // Check that the muls operate directly upon sign extended loads.
+  for (auto &MulChain : R.getMuls()) {
+    // A mul has 2 operands, and a narrow op consist of sext and a load; thus
+    // we expect at least 4 items in this operand value list.
+    if (MulChain->size() < 4) {
+      LLVM_DEBUG(dbgs() << "Operand list too short.\n");
+      return false;
+    }
+    MulChain->PopulateLoads();
+    ValueList &LHS = static_cast<BinOpChain*>(MulChain.get())->LHS;
+    ValueList &RHS = static_cast<BinOpChain*>(MulChain.get())->RHS;
+
+    // Use +=2 to skip over the expected extend instructions.
+    for (unsigned i = 0, e = LHS.size(); i < e; i += 2) {
+      if (!isa<LoadInst>(LHS[i]) || !isa<LoadInst>(RHS[i]))
+        return false;
+    }
+  }
 
-  auto CanPair = [&](BinOpChain *PMul0, BinOpChain *PMul1) {
+  auto CanPair = [&](Reduction &R, BinOpChain *PMul0, BinOpChain *PMul1) {
     if (!PMul0->AreSymmetrical(PMul1))
       return false;
 
@@ -363,23 +613,22 @@ void ARMParallelDSP::CreateParallelMACPairs(Reduction &R) {
       if (!Ld0 || !Ld1 || !Ld2 || !Ld3)
         return false;
 
-      LLVM_DEBUG(dbgs() << "Looking at operands " << x << ":\n"
-                 << "\t Ld0: " << *Ld0 << "\n"
-                 << "\t Ld1: " << *Ld1 << "\n"
-                 << "and operands " << x + 2 << ":\n"
-                 << "\t Ld2: " << *Ld2 << "\n"
-                 << "\t Ld3: " << *Ld3 << "\n");
+      LLVM_DEBUG(dbgs() << "Loads:\n"
+                 << " - " << *Ld0 << "\n"
+                 << " - " << *Ld1 << "\n"
+                 << " - " << *Ld2 << "\n"
+                 << " - " << *Ld3 << "\n");
 
       if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd)) {
         if (AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
           LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
-          PMACPairs.push_back(std::make_pair(PMul0, PMul1));
+          R.AddMulPair(PMul0, PMul1);
           return true;
         } else if (AreSequentialLoads(Ld3, Ld2, PMul1->VecLd)) {
           LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
           LLVM_DEBUG(dbgs() << "    exchanging Ld2 and Ld3\n");
           PMul1->Exchange = true;
-          PMACPairs.push_back(std::make_pair(PMul0, PMul1));
+          R.AddMulPair(PMul0, PMul1);
           return true;
         }
       } else if (AreSequentialLoads(Ld1, Ld0, PMul0->VecLd) &&
@@ -389,16 +638,18 @@ void ARMParallelDSP::CreateParallelMACPairs(Reduction &R) {
         LLVM_DEBUG(dbgs() << "    and swapping muls\n");
         PMul0->Exchange = true;
         // Only the second operand can be exchanged, so swap the muls.
-        PMACPairs.push_back(std::make_pair(PMul1, PMul0));
+        R.AddMulPair(PMul1, PMul0);
         return true;
       }
     }
     return false;
   };
 
+  OpChainList &Muls = R.getMuls();
+  const unsigned Elems = Muls.size();
   SmallPtrSet<const Instruction*, 4> Paired;
   for (unsigned i = 0; i < Elems; ++i) {
-    BinOpChain *PMul0 = static_cast<BinOpChain*>(Candidates[i].get());
+    BinOpChain *PMul0 = static_cast<BinOpChain*>(Muls[i].get());
     if (Paired.count(PMul0->Root))
       continue;
 
@@ -406,7 +657,7 @@ void ARMParallelDSP::CreateParallelMACPairs(Reduction &R) {
       if (i == j)
         continue;
 
-      BinOpChain *PMul1 = static_cast<BinOpChain*>(Candidates[j].get());
+      BinOpChain *PMul1 = static_cast<BinOpChain*>(Muls[j].get());
       if (Paired.count(PMul1->Root))
         continue;
 
@@ -417,315 +668,133 @@ void ARMParallelDSP::CreateParallelMACPairs(Reduction &R) {
 
       assert(PMul0 != PMul1 && "expected different chains");
 
-      LLVM_DEBUG(dbgs() << "\nCheck parallel muls:\n";
-                 dbgs() << "- "; Mul0->dump();
-                 dbgs() << "- "; Mul1->dump());
-
-      LLVM_DEBUG(dbgs() << "OK: mul operands list match:\n");
-      if (CanPair(PMul0, PMul1)) {
+      if (CanPair(R, PMul0, PMul1)) {
         Paired.insert(Mul0);
         Paired.insert(Mul1);
         break;
       }
     }
   }
+  return !R.getMulPairs().empty();
 }
 
-bool ARMParallelDSP::InsertParallelMACs(Reduction &Reduction) {
-  Instruction *Acc = Reduction.Phi;
-  Instruction *InsertAfter = Reduction.AccIntAdd;
-
-  for (auto &Pair : Reduction.PMACPairs) {
-    BinOpChain *PMul0 = Pair.first;
-    BinOpChain *PMul1 = Pair.second;
-    LLVM_DEBUG(dbgs() << "Found parallel MACs!!\n";
-               dbgs() << "- "; PMul0->Root->dump();
-               dbgs() << "- "; PMul1->Root->dump());
-
-    auto *VecLd0 = cast<LoadInst>(PMul0->VecLd[0]);
-    auto *VecLd1 = cast<LoadInst>(PMul1->VecLd[0]);
-    Acc = CreateSMLADCall(VecLd0, VecLd1, Acc, PMul1->Exchange, InsertAfter);
-    InsertAfter = Acc;
-  }
-
-  if (Acc != Reduction.Phi) {
-    LLVM_DEBUG(dbgs() << "Replace Accumulate: "; Acc->dump());
-    Reduction.AccIntAdd->replaceAllUsesWith(Acc);
-    return true;
-  }
-  return false;
-}
-
-static void MatchReductions(Function &F, Loop *TheLoop, BasicBlock *Header,
-                            ReductionList &Reductions) {
-  RecurrenceDescriptor RecDesc;
-  const bool HasFnNoNaNAttr =
-    F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
-  const BasicBlock *Latch = TheLoop->getLoopLatch();
-
-  // We need a preheader as getIncomingValueForBlock assumes there is one.
-  if (!TheLoop->getLoopPreheader()) {
-    LLVM_DEBUG(dbgs() << "No preheader found, bailing out\n");
-    return;
-  }
-
-  for (PHINode &Phi : Header->phis()) {
-    const auto *Ty = Phi.getType();
-    if (!Ty->isIntegerTy(32) && !Ty->isIntegerTy(64))
-      continue;
-
-    const bool IsReduction =
-      RecurrenceDescriptor::AddReductionVar(&Phi,
-                                            RecurrenceDescriptor::RK_IntegerAdd,
-                                            TheLoop, HasFnNoNaNAttr, RecDesc);
-    if (!IsReduction)
-      continue;
-
-    Instruction *Acc = dyn_cast<Instruction>(Phi.getIncomingValueForBlock(Latch));
-    if (!Acc)
-      continue;
-
-    Reductions.push_back(Reduction(&Phi, Acc));
-  }
-
-  LLVM_DEBUG(
-    dbgs() << "\nAccumulating integer additions (reductions) found:\n";
-    for (auto &R : Reductions) {
-      dbgs() << "-  "; R.Phi->dump();
-      dbgs() << "-> "; R.AccIntAdd->dump();
-    }
-  );
-}
-
-static void AddMACCandidate(OpChainList &Candidates,
-                            Instruction *Mul,
-                            Value *MulOp0, Value *MulOp1) {
-  LLVM_DEBUG(dbgs() << "OK, found acc mul:\t"; Mul->dump());
-  assert(Mul->getOpcode() == Instruction::Mul &&
-         "expected mul instruction");
-  ValueList LHS;
-  ValueList RHS;
-  if (IsNarrowSequence<16>(MulOp0, LHS) &&
-      IsNarrowSequence<16>(MulOp1, RHS)) {
-    LLVM_DEBUG(dbgs() << "OK, found narrow mul: "; Mul->dump());
-    Candidates.push_back(make_unique<BinOpChain>(Mul, LHS, RHS));
-  }
-}
-
-static void MatchParallelMACSequences(Reduction &R,
-                                      OpChainList &Candidates) {
-  Instruction *Acc = R.AccIntAdd;
-  LLVM_DEBUG(dbgs() << "\n- Analysing:\t" << *Acc);
-
-  // Returns false to signal the search should be stopped.
-  std::function<bool(Value*)> Match =
-    [&Candidates, &Match](Value *V) -> bool {
 
-    auto *I = dyn_cast<Instruction>(V);
-    if (!I)
-      return false;
-
-    switch (I->getOpcode()) {
-    case Instruction::Add:
-      if (Match(I->getOperand(0)) || (Match(I->getOperand(1))))
-        return true;
-      break;
-    case Instruction::Mul: {
-      Value *MulOp0 = I->getOperand(0);
-      Value *MulOp1 = I->getOperand(1);
-      if (isa<SExtInst>(MulOp0) && isa<SExtInst>(MulOp1))
-        AddMACCandidate(Candidates, I, MulOp0, MulOp1);
-      return false;
-    }
-    case Instruction::SExt:
-      return Match(I->getOperand(0));
-    }
-    return false;
+void ARMParallelDSP::InsertParallelMACs(Reduction &R) {
+
+  auto CreateSMLADCall = [&](SmallVectorImpl<LoadInst*> &VecLd0,
+                             SmallVectorImpl<LoadInst*> &VecLd1,
+                             Value *Acc, bool Exchange,
+                             Instruction *InsertAfter) {
+    // Replace the reduction chain with an intrinsic call
+    IntegerType *Ty = IntegerType::get(M->getContext(), 32);
+    LoadInst *WideLd0 = WideLoads.count(VecLd0[0]) ?
+      WideLoads[VecLd0[0]]->getLoad() : CreateWideLoad(VecLd0, Ty);
+    LoadInst *WideLd1 = WideLoads.count(VecLd1[0]) ?
+      WideLoads[VecLd1[0]]->getLoad() : CreateWideLoad(VecLd1, Ty);
+
+    Value* Args[] = { WideLd0, WideLd1, Acc };
+    Function *SMLAD = nullptr;
+    if (Exchange)
+      SMLAD = Acc->getType()->isIntegerTy(32) ?
+        Intrinsic::getDeclaration(M, Intrinsic::arm_smladx) :
+        Intrinsic::getDeclaration(M, Intrinsic::arm_smlaldx);
+    else
+      SMLAD = Acc->getType()->isIntegerTy(32) ?
+        Intrinsic::getDeclaration(M, Intrinsic::arm_smlad) :
+        Intrinsic::getDeclaration(M, Intrinsic::arm_smlald);
+
+    IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
+                                ++BasicBlock::iterator(InsertAfter));
+    Instruction *Call = Builder.CreateCall(SMLAD, Args);
+    NumSMLAD++;
+    return Call;
   };
 
-  while (Match (Acc));
-  LLVM_DEBUG(dbgs() << "Finished matching MAC sequences, found "
-             << Candidates.size() << " candidates.\n");
-}
-
-// Collects all instructions that are not part of the MAC chains, which is the
-// set of instructions that can potentially alias with the MAC operands.
-static void AliasCandidates(BasicBlock *Header, Instructions &Reads,
-                            Instructions &Writes) {
-  for (auto &I : *Header) {
-    if (I.mayReadFromMemory())
-      Reads.push_back(&I);
-    if (I.mayWriteToMemory())
-      Writes.push_back(&I);
-  }
-}
-
-// Check whether statements in the basic block that write to memory alias with
-// the memory locations accessed by the MAC-chains.
-// TODO: we need the read statements when we accept more complicated chains.
-static bool AreAliased(AliasAnalysis *AA, Instructions &Reads,
-                       Instructions &Writes, OpChainList &MACCandidates) {
-  LLVM_DEBUG(dbgs() << "Alias checks:\n");
-  for (auto &MAC : MACCandidates) {
-    LLVM_DEBUG(dbgs() << "mul: "; MAC->Root->dump());
-
-    // At the moment, we allow only simple chains that only consist of reads,
-    // accumulate their result with an integer add, and thus that don't write
-    // memory, and simply bail if they do.
-    if (!MAC->ReadOnly)
-      return true;
-
-    // Now for all writes in the basic block, check that they don't alias with
-    // the memory locations accessed by our MAC-chain:
-    for (auto *I : Writes) {
-      LLVM_DEBUG(dbgs() << "- "; I->dump());
-      assert(MAC->MemLocs.size() >= 2 && "expecting at least 2 memlocs");
-      for (auto &MemLoc : MAC->MemLocs) {
-        if (isModOrRefSet(intersectModRef(AA->getModRefInfo(I, MemLoc),
-                                          ModRefInfo::ModRef))) {
-          LLVM_DEBUG(dbgs() << "Yes, aliases found\n");
-          return true;
-        }
-      }
-    }
-  }
-
-  LLVM_DEBUG(dbgs() << "OK: no aliases found!\n");
-  return false;
-}
+  Instruction *InsertAfter = R.getRoot();
+  Value *Acc = R.getAccumulator();
+  if (!Acc)
+    Acc = ConstantInt::get(IntegerType::get(M->getContext(), 32), 0);
 
-static bool CheckMACMemory(OpChainList &Candidates) {
-  for (auto &C : Candidates) {
-    // A mul has 2 operands, and a narrow op consist of sext and a load; thus
-    // we expect at least 4 items in this operand value list.
-    if (C->size() < 4) {
-      LLVM_DEBUG(dbgs() << "Operand list too short.\n");
-      return false;
-    }
-    C->SetMemoryLocations();
-    ValueList &LHS = static_cast<BinOpChain*>(C.get())->LHS;
-    ValueList &RHS = static_cast<BinOpChain*>(C.get())->RHS;
+  LLVM_DEBUG(dbgs() << "Root: " << *InsertAfter << "\n"
+             << "Acc: " << *Acc << "\n");
+  for (auto &Pair : R.getMulPairs()) {
+    BinOpChain *PMul0 = Pair.first;
+    BinOpChain *PMul1 = Pair.second;
+    LLVM_DEBUG(dbgs() << "Muls:\n"
+               << "- " << *PMul0->Root << "\n"
+               << "- " << *PMul1->Root << "\n");
 
-    // Use +=2 to skip over the expected extend instructions.
-    for (unsigned i = 0, e = LHS.size(); i < e; i += 2) {
-      if (!isa<LoadInst>(LHS[i]) || !isa<LoadInst>(RHS[i]))
-        return false;
-    }
+    Acc = CreateSMLADCall(PMul0->VecLd, PMul1->VecLd, Acc, PMul1->Exchange,
+                          InsertAfter);
+    InsertAfter = cast<Instruction>(Acc);
   }
-  return true;
+  R.UpdateRoot(cast<Instruction>(Acc));
 }
 
-// Loop Pass that needs to identify integer add/sub reductions of 16-bit vector
-// multiplications.
-// To use SMLAD:
-// 1) we first need to find integer add reduction PHIs,
-// 2) then from the PHI, look for this pattern:
-//
-// acc0 = phi i32 [0, %entry], [%acc1, %loop.body]
-// ld0 = load i16
-// sext0 = sext i16 %ld0 to i32
-// ld1 = load i16
-// sext1 = sext i16 %ld1 to i32
-// mul0 = mul %sext0, %sext1
-// ld2 = load i16
-// sext2 = sext i16 %ld2 to i32
-// ld3 = load i16
-// sext3 = sext i16 %ld3 to i32
-// mul1 = mul i32 %sext2, %sext3
-// add0 = add i32 %mul0, %acc0
-// acc1 = add i32 %add0, %mul1
-//
-// Which can be selected to:
-//
-// ldr.h r0
-// ldr.h r1
-// smlad r2, r0, r1, r2
-//
-// If constants are used instead of loads, these will need to be hoisted
-// out and into a register.
-//
-// If loop invariants are used instead of loads, these need to be packed
-// before the loop begins.
-//
-bool ARMParallelDSP::MatchSMLAD(Function &F) {
-  BasicBlock *Header = L->getHeader();
-  LLVM_DEBUG(dbgs() << "= Matching SMLAD =\n";
-             dbgs() << "Header block:\n"; Header->dump();
-             dbgs() << "Loop info:\n\n"; L->dump());
+LoadInst* ARMParallelDSP::CreateWideLoad(SmallVectorImpl<LoadInst*> &Loads,
+                                         IntegerType *LoadTy) {
+  assert(Loads.size() == 2 && "currently only support widening two loads");
 
-  bool Changed = false;
-  ReductionList Reductions;
-  MatchReductions(F, L, Header, Reductions);
+  LoadInst *Base = Loads[0];
+  LoadInst *Offset = Loads[1];
 
-  for (auto &R : Reductions) {
-    OpChainList MACCandidates;
-    MatchParallelMACSequences(R, MACCandidates);
-    if (!CheckMACMemory(MACCandidates))
-      continue;
+  Instruction *BaseSExt = dyn_cast<SExtInst>(Base->user_back());
+  Instruction *OffsetSExt = dyn_cast<SExtInst>(Offset->user_back());
 
-    R.MACCandidates = std::move(MACCandidates);
+  assert((BaseSExt && OffsetSExt)
+         && "Loads should have a single, extending, user");
 
-    LLVM_DEBUG(dbgs() << "MAC candidates:\n";
-      for (auto &M : R.MACCandidates)
-        M->Root->dump();
-      dbgs() << "\n";);
-  }
+  std::function<void(Value*, Value*)> MoveBefore =
+    [&](Value *A, Value *B) -> void {
+      if (!isa<Instruction>(A) || !isa<Instruction>(B))
+        return;
 
-  // Collect all instructions that may read or write memory. Our alias
-  // analysis checks bail out if any of these instructions aliases with an
-  // instruction from the MAC-chain.
-  Instructions Reads, Writes;
-  AliasCandidates(Header, Reads, Writes);
+      auto *Source = cast<Instruction>(A);
+      auto *Sink = cast<Instruction>(B);
 
-  for (auto &R : Reductions) {
-    if (AreAliased(AA, Reads, Writes, R.MACCandidates))
-      return false;
-    CreateParallelMACPairs(R);
-    Changed |= InsertParallelMACs(R);
-  }
+      if (DT->dominates(Source, Sink) ||
+          Source->getParent() != Sink->getParent() ||
+          isa<PHINode>(Source) || isa<PHINode>(Sink))
+        return;
 
-  LLVM_DEBUG(if (Changed) dbgs() << "Header block:\n"; Header->dump(););
-  return Changed;
-}
+      Source->moveBefore(Sink);
+      for (auto &U : Source->uses())
+        MoveBefore(Source, U.getUser());
+    };
 
-static LoadInst *CreateLoadIns(IRBuilder<NoFolder> &IRB, LoadInst &BaseLoad,
-                               const Type *LoadTy) {
-  const unsigned AddrSpace = BaseLoad.getPointerAddressSpace();
+  // Insert the load at the point of the original dominating load.
+  LoadInst *DomLoad = DT->dominates(Base, Offset) ? Base : Offset;
+  IRBuilder<NoFolder> IRB(DomLoad->getParent(),
+                          ++BasicBlock::iterator(DomLoad));
 
-  Value *VecPtr = IRB.CreateBitCast(BaseLoad.getPointerOperand(),
+  // Bitcast the pointer to a wider type and create the wide load, while making
+  // sure to maintain the original alignment as this prevents ldrd from being
+  // generated when it could be illegal due to memory alignment.
+  const unsigned AddrSpace = DomLoad->getPointerAddressSpace();
+  Value *VecPtr = IRB.CreateBitCast(Base->getPointerOperand(),
                                     LoadTy->getPointerTo(AddrSpace));
-  return IRB.CreateAlignedLoad(VecPtr, BaseLoad.getAlignment());
-}
-
-Instruction *ARMParallelDSP::CreateSMLADCall(LoadInst *VecLd0, LoadInst *VecLd1,
-                                             Instruction *Acc, bool Exchange,
-                                             Instruction *InsertAfter) {
-  LLVM_DEBUG(dbgs() << "Create SMLAD intrinsic using:\n"
-             << "- " << *VecLd0 << "\n"
-             << "- " << *VecLd1 << "\n"
-             << "- " << *Acc << "\n"
-             << "Exchange: " << Exchange << "\n");
-
-  IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
-                              ++BasicBlock::iterator(InsertAfter));
-
-  // Replace the reduction chain with an intrinsic call
-  const Type *Ty = IntegerType::get(M->getContext(), 32);
-  LoadInst *NewLd0 = CreateLoadIns(Builder, VecLd0[0], Ty);
-  LoadInst *NewLd1 = CreateLoadIns(Builder, VecLd1[0], Ty);
-  Value* Args[] = { NewLd0, NewLd1, Acc };
-  Function *SMLAD = nullptr;
-  if (Exchange)
-    SMLAD = Acc->getType()->isIntegerTy(32) ?
-      Intrinsic::getDeclaration(M, Intrinsic::arm_smladx) :
-      Intrinsic::getDeclaration(M, Intrinsic::arm_smlaldx);
-  else
-    SMLAD = Acc->getType()->isIntegerTy(32) ?
-      Intrinsic::getDeclaration(M, Intrinsic::arm_smlad) :
-      Intrinsic::getDeclaration(M, Intrinsic::arm_smlald);
-  CallInst *Call = Builder.CreateCall(SMLAD, Args);
-  NumSMLAD++;
-  return Call;
+  LoadInst *WideLoad = IRB.CreateAlignedLoad(LoadTy, VecPtr,
+                                             Base->getAlignment());
+
+  // Make sure everything is in the correct order in the basic block.
+  MoveBefore(Base->getPointerOperand(), VecPtr);
+  MoveBefore(VecPtr, WideLoad);
+
+  // From the wide load, create two values that equal the original two loads.
+  // Loads[0] needs trunc while Loads[1] needs a lshr and trunc.
+  // TODO: Support big-endian as well.
+  Value *Bottom = IRB.CreateTrunc(WideLoad, Base->getType());
+  BaseSExt->setOperand(0, Bottom);
+
+  IntegerType *OffsetTy = cast<IntegerType>(Offset->getType());
+  Value *ShiftVal = ConstantInt::get(LoadTy, OffsetTy->getBitWidth());
+  Value *Top = IRB.CreateLShr(WideLoad, ShiftVal);
+  Value *Trunc = IRB.CreateTrunc(Top, OffsetTy);
+  OffsetSExt->setOperand(0, Trunc);
+
+  WideLoads.emplace(std::make_pair(Base,
+                                   make_unique<WidenedLoad>(Loads, WideLoad)));
+  return WideLoad;
 }
 
 // Compare the value lists in Other to this chain.
@@ -741,7 +810,6 @@ bool BinOpChain::AreSymmetrical(BinOpChain *Other) {
     }
 
     const unsigned Pairs = VL0.size();
-    LLVM_DEBUG(dbgs() << "Number of operand pairs: " << Pairs << "\n");
 
     for (unsigned i = 0; i < Pairs; ++i) {
       const Value *V0 = VL0[i];
@@ -749,24 +817,17 @@ bool BinOpChain::AreSymmetrical(BinOpChain *Other) {
       const auto *Inst0 = dyn_cast<Instruction>(V0);
       const auto *Inst1 = dyn_cast<Instruction>(V1);
 
-      LLVM_DEBUG(dbgs() << "Pair " << i << ":\n";
-                dbgs() << "mul1: "; V0->dump();
-                dbgs() << "mul2: "; V1->dump());
-
       if (!Inst0 || !Inst1)
         return false;
 
-      if (Inst0->isSameOperationAs(Inst1)) {
-        LLVM_DEBUG(dbgs() << "OK: same operation found!\n");
+      if (Inst0->isSameOperationAs(Inst1))
         continue;
-      }
 
       const APInt *C0, *C1;
       if (!(match(V0, m_APInt(C0)) && match(V1, m_APInt(C1)) && C0 == C1))
         return false;
     }
 
-    LLVM_DEBUG(dbgs() << "OK: found symmetrical operand lists.\n");
     return true;
   };
 
diff --git a/lib/Target/ARM/ARMPerfectShuffle.h b/lib/Target/ARM/ARMPerfectShuffle.h
index 3ff0bee7e5bf..d519490c9c57 100644
--- a/lib/Target/ARM/ARMPerfectShuffle.h
+++ b/lib/Target/ARM/ARMPerfectShuffle.h
@@ -1,9 +1,8 @@
 //===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARMPredicates.td b/lib/Target/ARM/ARMPredicates.td
new file mode 100644
index 000000000000..0b6b40de80dd
--- /dev/null
+++ b/lib/Target/ARM/ARMPredicates.td
@@ -0,0 +1,211 @@
+//===-- ARMPredicates.td - ARM Instruction Predicates ------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+def HasV4T           : Predicate<"Subtarget->hasV4TOps()">,
+                                 AssemblerPredicate<"HasV4TOps", "armv4t">;
+def NoV4T            : Predicate<"!Subtarget->hasV4TOps()">;
+def HasV5T           : Predicate<"Subtarget->hasV5TOps()">,
+                                 AssemblerPredicate<"HasV5TOps", "armv5t">;
+def NoV5T            : Predicate<"!Subtarget->hasV5TOps()">;
+def HasV5TE          : Predicate<"Subtarget->hasV5TEOps()">,
+                                 AssemblerPredicate<"HasV5TEOps", "armv5te">;
+def HasV6            : Predicate<"Subtarget->hasV6Ops()">,
+                                 AssemblerPredicate<"HasV6Ops", "armv6">;
+def NoV6             : Predicate<"!Subtarget->hasV6Ops()">;
+def HasV6M           : Predicate<"Subtarget->hasV6MOps()">,
+                                 AssemblerPredicate<"HasV6MOps",
+                                                    "armv6m or armv6t2">;
+def HasV8MBaseline   : Predicate<"Subtarget->hasV8MBaselineOps()">,
+                                 AssemblerPredicate<"HasV8MBaselineOps",
+                                                    "armv8m.base">;
+def HasV8MMainline   : Predicate<"Subtarget->hasV8MMainlineOps()">,
+                                 AssemblerPredicate<"HasV8MMainlineOps",
+                                                    "armv8m.main">;
+def HasV8_1MMainline : Predicate<"Subtarget->hasV8_1MMainlineOps()">,
+                                 AssemblerPredicate<"HasV8_1MMainlineOps",
+                                                    "armv8.1m.main">;
+def HasMVEInt        : Predicate<"Subtarget->hasMVEIntegerOps()">,
+                                 AssemblerPredicate<"HasMVEIntegerOps",
+                                                    "mve">;
+def HasMVEFloat      : Predicate<"Subtarget->hasMVEFloatOps()">,
+                                 AssemblerPredicate<"HasMVEFloatOps",
+                                                    "mve.fp">;
+def HasFPRegs        : Predicate<"Subtarget->hasFPRegs()">,
+                                 AssemblerPredicate<"FeatureFPRegs",
+                                                    "fp registers">;
+def HasFPRegs16      : Predicate<"Subtarget->hasFPRegs16()">,
+                                 AssemblerPredicate<"FeatureFPRegs16",
+                                                    "16-bit fp registers">;
+def HasFPRegs64      : Predicate<"Subtarget->hasFPRegs64()">,
+                                 AssemblerPredicate<"FeatureFPRegs64",
+                                                    "64-bit fp registers">;
+def HasFPRegsV8_1M   : Predicate<"Subtarget->hasFPRegs() && Subtarget->hasV8_1MMainlineOps()">,
+                                 AssemblerPredicate<"FeatureFPRegs,HasV8_1MMainlineOps",
+                                                    "armv8.1m.main with FP or MVE">;
+def HasV6T2          : Predicate<"Subtarget->hasV6T2Ops()">,
+                                 AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
+def NoV6T2           : Predicate<"!Subtarget->hasV6T2Ops()">;
+def HasV6K           : Predicate<"Subtarget->hasV6KOps()">,
+                                 AssemblerPredicate<"HasV6KOps", "armv6k">;
+def NoV6K            : Predicate<"!Subtarget->hasV6KOps()">;
+def HasV7            : Predicate<"Subtarget->hasV7Ops()">,
+                                 AssemblerPredicate<"HasV7Ops", "armv7">;
+def HasV8            : Predicate<"Subtarget->hasV8Ops()">,
+                                 AssemblerPredicate<"HasV8Ops", "armv8">;
+def PreV8            : Predicate<"!Subtarget->hasV8Ops()">,
+                                 AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">;
+def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
+                                 AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
+def HasV8_2a         : Predicate<"Subtarget->hasV8_2aOps()">,
+                                 AssemblerPredicate<"HasV8_2aOps", "armv8.2a">;
+def HasV8_3a         : Predicate<"Subtarget->hasV8_3aOps()">,
+                                 AssemblerPredicate<"HasV8_3aOps", "armv8.3a">;
+def HasV8_4a         : Predicate<"Subtarget->hasV8_4aOps()">,
+                                 AssemblerPredicate<"HasV8_4aOps", "armv8.4a">;
+def HasV8_5a         : Predicate<"Subtarget->hasV8_5aOps()">,
+                                 AssemblerPredicate<"HasV8_5aOps", "armv8.5a">;
+def NoVFP            : Predicate<"!Subtarget->hasVFP2Base()">;
+def HasVFP2          : Predicate<"Subtarget->hasVFP2Base()">,
+                                 AssemblerPredicate<"FeatureVFP2_D16_SP", "VFP2">;
+def HasVFP3          : Predicate<"Subtarget->hasVFP3Base()">,
+                                 AssemblerPredicate<"FeatureVFP3_D16_SP", "VFP3">;
+def HasVFP4          : Predicate<"Subtarget->hasVFP4Base()">,
+                                 AssemblerPredicate<"FeatureVFP4_D16_SP", "VFP4">;
+def HasDPVFP         : Predicate<"Subtarget->hasFP64()">,
+                                 AssemblerPredicate<"FeatureFP64",
+                                                    "double precision VFP">;
+def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8Base()">,
+                                 AssemblerPredicate<"FeatureFPARMv8_D16_SP", "FPARMv8">;
+def HasNEON          : Predicate<"Subtarget->hasNEON()">,
+                                 AssemblerPredicate<"FeatureNEON", "NEON">;
+def HasSHA2          : Predicate<"Subtarget->hasSHA2()">,
+                                 AssemblerPredicate<"FeatureSHA2", "sha2">;
+def HasAES           : Predicate<"Subtarget->hasAES()">,
+                                 AssemblerPredicate<"FeatureAES", "aes">;
+def HasCrypto        : Predicate<"Subtarget->hasCrypto()">,
+                                 AssemblerPredicate<"FeatureCrypto", "crypto">;
+def HasDotProd       : Predicate<"Subtarget->hasDotProd()">,
+                                 AssemblerPredicate<"FeatureDotProd", "dotprod">;
+def HasCRC           : Predicate<"Subtarget->hasCRC()">,
+                                 AssemblerPredicate<"FeatureCRC", "crc">;
+def HasRAS           : Predicate<"Subtarget->hasRAS()">,
+                                 AssemblerPredicate<"FeatureRAS", "ras">;
+def HasLOB           : Predicate<"Subtarget->hasLOB()">,
+                                 AssemblerPredicate<"FeatureLOB", "lob">;
+def HasFP16          : Predicate<"Subtarget->hasFP16()">,
+                                 AssemblerPredicate<"FeatureFP16","half-float conversions">;
+def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
+                                 AssemblerPredicate<"FeatureFullFP16","full half-float">;
+def HasFP16FML       : Predicate<"Subtarget->hasFP16FML()">,
+                                 AssemblerPredicate<"FeatureFP16FML","full half-float fml">;
+def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">,
+                                 AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">;
+def HasDivideInARM   : Predicate<"Subtarget->hasDivideInARMMode()">,
+                                 AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">;
+def HasDSP           : Predicate<"Subtarget->hasDSP()">,
+                                 AssemblerPredicate<"FeatureDSP", "dsp">;
+def HasDB            : Predicate<"Subtarget->hasDataBarrier()">,
+                                 AssemblerPredicate<"FeatureDB",
+                                                    "data-barriers">;
+def HasDFB           : Predicate<"Subtarget->hasFullDataBarrier()">,
+                                 AssemblerPredicate<"FeatureDFB",
+                                                    "full-data-barrier">;
+def HasV7Clrex  : Predicate<"Subtarget->hasV7Clrex()">,
+                            AssemblerPredicate<"FeatureV7Clrex",
+                                               "v7 clrex">;
+def HasAcquireRelease : Predicate<"Subtarget->hasAcquireRelease()">,
+                                  AssemblerPredicate<"FeatureAcquireRelease",
+                                                     "acquire/release">;
+def HasMP            : Predicate<"Subtarget->hasMPExtension()">,
+                                 AssemblerPredicate<"FeatureMP",
+                                                    "mp-extensions">;
+def HasVirtualization: Predicate<"false">,
+                                 AssemblerPredicate<"FeatureVirtualization",
+                                                   "virtualization-extensions">;
+def HasTrustZone     : Predicate<"Subtarget->hasTrustZone()">,
+                                 AssemblerPredicate<"FeatureTrustZone",
+                                                    "TrustZone">;
+def Has8MSecExt      : Predicate<"Subtarget->has8MSecExt()">,
+                                 AssemblerPredicate<"Feature8MSecExt",
+                                                    "ARMv8-M Security Extensions">;
+def HasZCZ           : Predicate<"Subtarget->hasZeroCycleZeroing()">;
+def UseNEONForFP     : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
+def IsThumb          : Predicate<"Subtarget->isThumb()">,
+                                 AssemblerPredicate<"ModeThumb", "thumb">;
+def IsThumb1Only     : Predicate<"Subtarget->isThumb1Only()">;
+def IsThumb2         : Predicate<"Subtarget->isThumb2()">,
+                                 AssemblerPredicate<"ModeThumb,FeatureThumb2",
+                                                    "thumb2">;
+def IsMClass         : Predicate<"Subtarget->isMClass()">,
+                                 AssemblerPredicate<"FeatureMClass", "armv*m">;
+def IsNotMClass      : Predicate<"!Subtarget->isMClass()">,
+                                 AssemblerPredicate<"!FeatureMClass",
+                                                    "!armv*m">;
+def IsARM            : Predicate<"!Subtarget->isThumb()">,
+                                 AssemblerPredicate<"!ModeThumb", "arm-mode">;
+def IsMachO          : Predicate<"Subtarget->isTargetMachO()">;
+def IsNotMachO       : Predicate<"!Subtarget->isTargetMachO()">;
+def IsNaCl           : Predicate<"Subtarget->isTargetNaCl()">;
+def IsWindows        : Predicate<"Subtarget->isTargetWindows()">;
+def IsNotWindows     : Predicate<"!Subtarget->isTargetWindows()">;
+def IsReadTPHard     : Predicate<"Subtarget->isReadTPHard()">;
+def IsReadTPSoft     : Predicate<"!Subtarget->isReadTPHard()">;
+def UseNaClTrap      : Predicate<"Subtarget->useNaClTrap()">,
+                                 AssemblerPredicate<"FeatureNaClTrap", "NaCl">;
+def DontUseNaClTrap  : Predicate<"!Subtarget->useNaClTrap()">;
+
+def UseNegativeImmediates :
+  Predicate<"false">,
+            AssemblerPredicate<"!FeatureNoNegativeImmediates",
+                               "NegativeImmediates">;
+
+// FIXME: Eventually this will be just "hasV6T2Ops".
+let RecomputePerFunction = 1 in {
+  def UseMovt          : Predicate<"Subtarget->useMovt()">;
+  def DontUseMovt      : Predicate<"!Subtarget->useMovt()">;
+  def UseMovtInPic     : Predicate<"Subtarget->useMovt() && Subtarget->allowPositionIndependentMovt()">;
+  def DontUseMovtInPic : Predicate<"!Subtarget->useMovt() || !Subtarget->allowPositionIndependentMovt()">;
+
+  def UseFPVMLx: Predicate<"((Subtarget->useFPVMLx() &&"
+                           "  TM.Options.AllowFPOpFusion != FPOpFusion::Fast) ||"
+                           "Subtarget->hasMinSize())">;
+}
+def UseMulOps        : Predicate<"Subtarget->useMulOps()">;
+
+// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
+// But only select them if more precision in FP computation is allowed, and when
+// they are not slower than a mul + add sequence.
+// Do not use them for Darwin platforms.
+def UseFusedMAC      : Predicate<"(TM.Options.AllowFPOpFusion =="
+                                 " FPOpFusion::Fast && "
+                                 " Subtarget->hasVFP4Base()) && "
+                                 "!Subtarget->isTargetDarwin() &&"
+                                 "Subtarget->useFPVMLx()">;
+
+def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">;
+def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">;
+
+def HasFastVDUP32 : Predicate<"!Subtarget->hasSlowVDUP32()">;
+def HasSlowVDUP32 : Predicate<"Subtarget->hasSlowVDUP32()">;
+
+def UseVMOVSR : Predicate<"Subtarget->preferVMOVSR() ||"
+                          "!Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() &&"
+                              "Subtarget->useNEONForSinglePrecisionFP()">;
+
+let RecomputePerFunction = 1 in {
+  def IsLE             : Predicate<"MF->getDataLayout().isLittleEndian()">;
+  def IsBE             : Predicate<"MF->getDataLayout().isBigEndian()">;
+}
+
+def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">;
+
+// Armv8.5-A extensions
+def HasSB            : Predicate<"Subtarget->hasSB()">,
+                       AssemblerPredicate<"FeatureSB", "sb">;
diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 4f28f2dafc70..b100150175fc 100644
--- a/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -1,9 +1,8 @@
 //===- ARMRegisterBankInfo.cpp -----------------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -161,6 +160,10 @@ ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI)
          "Subclass not added?");
   assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPR_and_tcGPRRegClassID)) &&
          "Subclass not added?");
+  assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPREven_and_tGPR_and_tcGPRRegClassID)) &&
+         "Subclass not added?");
+  assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPROdd_and_tcGPRRegClassID)) &&
+         "Subclass not added?");
   assert(RBGPR.getSize() == 32 && "GPRs should hold up to 32-bit");
 
 #ifndef NDEBUG
@@ -182,6 +185,13 @@ const RegisterBank &ARMRegisterBankInfo::getRegBankFromRegClass(
   case tGPR_and_tcGPRRegClassID:
   case tcGPRRegClassID:
   case tGPRRegClassID:
+  case tGPREvenRegClassID:
+  case tGPROddRegClassID:
+  case tGPR_and_tGPREvenRegClassID:
+  case tGPR_and_tGPROddRegClassID:
+  case tGPREven_and_tcGPRRegClassID:
+  case tGPREven_and_tGPR_and_tcGPRRegClassID:
+  case tGPROdd_and_tcGPRRegClassID:
     return getRegBank(ARM::GPRRegBankID);
   case HPRRegClassID:
   case SPR_8RegClassID:
@@ -218,7 +228,15 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
 
   switch (Opc) {
   case G_ADD:
-  case G_SUB:
+  case G_SUB: {
+    // Integer operations where the source and destination are in the
+    // same register class.
+    LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+    OperandsMapping = Ty.getSizeInBits() == 64
+                          ? &ARM::ValueMappings[ARM::DPR3OpsIdx]
+                          : &ARM::ValueMappings[ARM::GPR3OpsIdx];
+    break;
+  }
   case G_MUL:
   case G_AND:
   case G_OR:
@@ -337,6 +355,14 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
                                     &ARM::ValueMappings[ARM::GPR3OpsIdx]});
     break;
   }
+  case G_FCONSTANT: {
+    LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+    OperandsMapping = getOperandsMapping(
+        {Ty.getSizeInBits() == 64 ? &ARM::ValueMappings[ARM::DPR3OpsIdx]
+                                  : &ARM::ValueMappings[ARM::SPR3OpsIdx],
+         nullptr});
+    break;
+  }
   case G_CONSTANT:
   case G_FRAME_INDEX:
   case G_GLOBAL_VALUE:
@@ -424,6 +450,19 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     OperandsMapping =
         getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr});
     break;
+  case DBG_VALUE: {
+    SmallVector<const ValueMapping *, 4> OperandBanks(NumOperands);
+    const MachineOperand &MaybeReg = MI.getOperand(0);
+    if (MaybeReg.isReg() && MaybeReg.getReg()) {
+      unsigned Size = MRI.getType(MaybeReg.getReg()).getSizeInBits();
+      if (Size > 32 && Size != 64)
+        return getInvalidInstructionMapping();
+      OperandBanks[0] = Size == 64 ? &ARM::ValueMappings[ARM::DPR3OpsIdx]
+                                   : &ARM::ValueMappings[ARM::GPR3OpsIdx];
+    }
+    OperandsMapping = getOperandsMapping(OperandBanks);
+    break;
+  }
   default:
     return getInvalidInstructionMapping();
   }
@@ -433,7 +472,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     for (const auto &Mapping : OperandsMapping[i]) {
       assert(
           (Mapping.RegBank->getID() != ARM::FPRRegBankID ||
-           MF.getSubtarget<ARMSubtarget>().hasVFP2()) &&
+           MF.getSubtarget<ARMSubtarget>().hasVFP2Base()) &&
           "Trying to use floating point register bank on target without vfp");
     }
   }
diff --git a/lib/Target/ARM/ARMRegisterBankInfo.h b/lib/Target/ARM/ARMRegisterBankInfo.h
index 9650b358f319..1961f7af49bb 100644
--- a/lib/Target/ARM/ARMRegisterBankInfo.h
+++ b/lib/Target/ARM/ARMRegisterBankInfo.h
@@ -1,9 +1,8 @@
 //===- ARMRegisterBankInfo ---------------------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/Target/ARM/ARMRegisterBanks.td b/lib/Target/ARM/ARMRegisterBanks.td
index 6e3834da3bb5..e4ebf793f9b0 100644
--- a/lib/Target/ARM/ARMRegisterBanks.td
+++ b/lib/Target/ARM/ARMRegisterBanks.td
@@ -1,9 +1,8 @@
 //=- ARMRegisterBank.td - Describe the AArch64 Banks ---------*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index e6e8cdf965e2..6649750bb388 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===-- ARMRegisterInfo.cpp - ARM Register Information --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index e2e650e4af93..87c0f322d3b3 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -1,9 +1,8 @@
 //===-- ARMRegisterInfo.h - ARM Register Information Impl -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index dc56186cb54a..92ae26b3729d 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -1,9 +1,8 @@
 //===-- ARMRegisterInfo.td - ARM Register defs -------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -14,7 +13,8 @@ include "ARMSystemRegister.td"
 //===----------------------------------------------------------------------===//
 
 // Registers are identified with 4-bit ID numbers.
-class ARMReg<bits<16> Enc, string n, list<Register> subregs = []> : Register<n> {
+class ARMReg<bits<16> Enc, string n, list<Register> subregs = [],
+             list<string> altNames = []> : Register<n, altNames> {
   let HWEncoding = Enc;
   let Namespace = "ARM";
   let SubRegs = subregs;
@@ -27,6 +27,11 @@ class ARMFReg<bits<16> Enc, string n> : Register<n> {
   let Namespace = "ARM";
 }
 
+let Namespace = "ARM",
+    FallbackRegAltNameIndex = NoRegAltName in {
+  def RegNamesRaw : RegAltNameIndex;
+}
+
 // Subregister indices.
 let Namespace = "ARM" in {
 def qqsub_0 : SubRegIndex<256>;
@@ -84,9 +89,11 @@ def R9  : ARMReg< 9, "r9">,  DwarfRegNum<[9]>;
 def R10 : ARMReg<10, "r10">, DwarfRegNum<[10]>;
 def R11 : ARMReg<11, "r11">, DwarfRegNum<[11]>;
 def R12 : ARMReg<12, "r12">, DwarfRegNum<[12]>;
-def SP  : ARMReg<13, "sp">,  DwarfRegNum<[13]>;
-def LR  : ARMReg<14, "lr">,  DwarfRegNum<[14]>;
-def PC  : ARMReg<15, "pc">,  DwarfRegNum<[15]>;
+let RegAltNameIndices = [RegNamesRaw] in {
+def SP  : ARMReg<13, "sp", [], ["r13"]>,  DwarfRegNum<[13]>;
+def LR  : ARMReg<14, "lr", [], ["r14"]>,  DwarfRegNum<[14]>;
+def PC  : ARMReg<15, "pc", [], ["r15"]>,  DwarfRegNum<[15]>;
+}
 }
 
 // Float registers
@@ -190,6 +197,17 @@ def MVFR0   : ARMReg<7,  "mvfr0">;
 def FPEXC   : ARMReg<8,  "fpexc">;
 def FPINST  : ARMReg<9,  "fpinst">;
 def FPINST2 : ARMReg<10, "fpinst2">;
+// These encodings aren't actual instruction encodings, their encoding depends
+// on the instruction they are used in and for VPR 32 was chosen such that it
+// always comes last in spr_reglist_with_vpr.
+def VPR     : ARMReg<32, "vpr">;
+def FPSCR_NZCVQC
+            : ARMReg<2, "fpscr_nzcvqc">;
+def P0      : ARMReg<13, "p0">;
+def FPCXTNS : ARMReg<14, "fpcxtns">;
+def FPCXTS  : ARMReg<15, "fpcxts">;
+
+def ZR  : ARMReg<15, "zr">,  DwarfRegNum<[15]>;
 
 // Register classes.
 //
@@ -209,9 +227,10 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
   // know how to spill them. If we make our prologue/epilogue code smarter at
   // some point, we can go back to using the above allocation orders for the
   // Thumb1 instructions that know how to use hi regs.
-  let AltOrders = [(add LR, GPR), (trunc GPR, 8)];
+  let AltOrders = [(add LR, GPR), (trunc GPR, 8),
+                   (add (trunc GPR, 8), R12, LR, (shl GPR, 8))];
   let AltOrderSelect = [{
-      return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+      return MF.getSubtarget<ARMSubtarget>().getGPRAllocationOrder(MF);
   }];
   let DiagnosticString = "operand must be a register in range [r0, r15]";
 }
@@ -220,9 +239,10 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
 // certain operand slots, particularly as the destination.  Primarily
 // useful for disassembly.
 def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> {
-  let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)];
+  let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8),
+                   (add (trunc GPRnopc, 8), R12, LR, (shl GPRnopc, 8))];
   let AltOrderSelect = [{
-      return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+      return MF.getSubtarget<ARMSubtarget>().getGPRAllocationOrder(MF);
   }];
   let DiagnosticString = "operand must be a register in range [r0, r14]";
 }
@@ -238,6 +258,27 @@ def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), APSR_NZCV)
   let DiagnosticString = "operand must be a register in range [r0, r14] or apsr_nzcv";
 }
 
+// GPRs without the PC and SP registers but with APSR. Used by CLRM instruction.
+def GPRwithAPSRnosp : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12), LR, APSR)> {
+  let isAllocatable = 0;
+}
+
+def GPRwithZR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), ZR)> {
+  let AltOrders = [(add LR, GPRwithZR), (trunc GPRwithZR, 8)];
+  let AltOrderSelect = [{
+      return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+  }];
+  let DiagnosticString = "operand must be a register in range [r0, r14] or zr";
+}
+
+def GPRwithZRnosp : RegisterClass<"ARM", [i32], 32, (sub GPRwithZR, SP)> {
+  let AltOrders = [(add LR, GPRwithZRnosp), (trunc GPRwithZRnosp, 8)];
+  let AltOrderSelect = [{
+      return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+  }];
+  let DiagnosticString = "operand must be a register in range [r0, r12] or r14 or zr";
+}
+
 // GPRsp - Only the SP is legal. Used by Thumb1 instructions that want the
 // implied SP argument list.
 // FIXME: It would be better to not use this at all and refactor the
@@ -247,14 +288,19 @@ def GPRsp : RegisterClass<"ARM", [i32], 32, (add SP)> {
   let DiagnosticString = "operand must be a register sp";
 }
 
+// GPRlr - Only LR is legal. Used by ARMv8.1-M Low Overhead Loop instructions
+// where LR is the only legal loop counter register.
+def GPRlr : RegisterClass<"ARM", [i32], 32, (add LR)>;
+
 // restricted GPR register class. Many Thumb2 instructions allow the full
 // register range for operands, but have undefined behaviours when PC
 // or SP (R13 or R15) are used. The ARM ISA refers to these operands
 // via the BadReg() pseudo-code description.
 def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
-  let AltOrders = [(add LR, rGPR), (trunc rGPR, 8)];
+  let AltOrders = [(add LR, rGPR), (trunc rGPR, 8),
+                   (add (trunc rGPR, 8), R12, LR, (shl rGPR, 8))];
   let AltOrderSelect = [{
-      return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+      return MF.getSubtarget<ARMSubtarget>().getGPRAllocationOrder(MF);
   }];
   let DiagnosticType = "rGPR";
 }
@@ -285,12 +331,38 @@ def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R12)> {
   }];
 }
 
+def tGPROdd : RegisterClass<"ARM", [i32], 32, (add R1, R3, R5, R7, R9, R11)> {
+  let AltOrders = [(and tGPROdd, tGPR)];
+  let AltOrderSelect = [{
+      return MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+  }];
+  let DiagnosticString =
+    "operand must be an odd-numbered register in range [r1,r11]";
+}
+
+def tGPREven : RegisterClass<"ARM", [i32], 32, (add R0, R2, R4, R6, R8, R10, R12, LR)> {
+  let AltOrders = [(and tGPREven, tGPR)];
+  let AltOrderSelect = [{
+      return MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+  }];
+  let DiagnosticString = "operand must be an even-numbered register";
+}
+
 // Condition code registers.
 def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
   let CopyCost = -1;  // Don't allow copying of status registers.
   let isAllocatable = 0;
 }
 
+// MVE Condition code register.
+def VCCR : RegisterClass<"ARM", [i32, v16i1, v8i1, v4i1], 32, (add VPR)> {
+//  let CopyCost = -1;  // Don't allow copying of status registers.
+}
+
+// FPSCR, when the flags at the top of it are used as the input or
+// output to an instruction such as MVE VADC.
+def cl_FPSCR_NZCV : RegisterClass<"ARM", [i32], 32, (add FPSCR_NZCV)>;
+
 // Scalar single precision floating point register class..
 // FIXME: Allocation order changed to s0, s2, ... or s0, s4, ... as a quick hack
 // to avoid partial-write dependencies on D or Q (depending on platform)
@@ -302,7 +374,7 @@ def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)> {
                         (decimate (rotl SPR, 1), 4),
                         (decimate (rotl SPR, 1), 2))];
   let AltOrderSelect = [{
-    return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+    return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs();
   }];
   let DiagnosticString = "operand must be a register in range [s0, s31]";
 }
@@ -314,7 +386,7 @@ def HPR : RegisterClass<"ARM", [f16], 32, (sequence "S%u", 0, 31)> {
                         (decimate (rotl HPR, 1), 4),
                         (decimate (rotl HPR, 1), 2))];
   let AltOrderSelect = [{
-    return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+    return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs();
   }];
   let DiagnosticString = "operand must be a register in range [s0, s31]";
 }
@@ -336,11 +408,18 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 6
   let AltOrders = [(rotl DPR, 16),
                    (add (decimate (rotl DPR, 16), 2), (rotl DPR, 16))];
   let AltOrderSelect = [{
-    return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+    return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs();
   }];
   let DiagnosticType = "DPR";
 }
 
+// Scalar single and double precision floating point and VPR register class,
+// this is only used for parsing, don't use it anywhere else as the size and
+// types don't match!
+def FPWithVPR : RegisterClass<"ARM", [f32], 32, (add SPR, DPR, VPR)> {
+    let isAllocatable = 0;
+}
+
 // Subset of DPR that are accessible with VFP2 (and so that also have
 // 32-bit SPR subregs).
 def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
@@ -359,8 +438,10 @@ def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16],
 def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128,
                         (sequence "Q%u", 0, 15)> {
   // Allocate non-VFP2 aliases Q8-Q15 first.
-  let AltOrders = [(rotl QPR, 8)];
-  let AltOrderSelect = [{ return 1; }];
+  let AltOrders = [(rotl QPR, 8), (trunc QPR, 8)];
+  let AltOrderSelect = [{
+    return 1 + MF.getSubtarget<ARMSubtarget>().hasMVEIntegerOps();
+  }];
   let DiagnosticString = "operand must be a register in range [q0, q15]";
 }
 
@@ -376,6 +457,12 @@ def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
   let DiagnosticString = "operand must be a register in range [q0, q3]";
 }
 
+// MVE 128-bit vector register class. This class is only really needed for
+// parsing assembly, since we still have to truncate the register set in the QPR
+// class anyway.
+def MQPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16],
+                         128, (trunc QPR, 8)>;
+
 // Pseudo-registers representing odd-even pairs of D registers. The even-odd
 // pairs are already represented by the Q registers.
 // These are needed by NEON instructions requiring two consecutive D registers.
@@ -390,8 +477,11 @@ def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
                           128, (interleave QPR, TuplesOE2D)> {
   // Allocate starting at non-VFP2 registers D16-D31 first.
   // Prefer even-odd pairs as they are easier to copy.
-  let AltOrders = [(add (rotl QPR, 8), (rotl DPair, 16))];
-  let AltOrderSelect = [{ return 1; }];
+  let AltOrders = [(add (rotl QPR, 8),  (rotl DPair, 16)),
+                   (add (trunc QPR, 8), (trunc DPair, 16))];
+  let AltOrderSelect = [{
+    return 1 + MF.getSubtarget<ARMSubtarget>().hasMVEIntegerOps();
+  }];
 }
 
 // Pseudo-registers representing even-odd pairs of GPRs from R1 to R13/SP.
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index ed5a3a7bb696..ce74d325c4e5 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -1,9 +1,8 @@
 //===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //===----------------------------------------------------------------------===//
@@ -425,4 +424,4 @@ include "ARMScheduleA9.td"
 include "ARMScheduleSwift.td"
 include "ARMScheduleR52.td"
 include "ARMScheduleA57.td"
-include "ARMScheduleM3.td"
+include "ARMScheduleM4.td"
diff --git a/lib/Target/ARM/ARMScheduleA57.td b/lib/Target/ARM/ARMScheduleA57.td
index 63f975ba6e39..a79f3348f338 100644
--- a/lib/Target/ARM/ARMScheduleA57.td
+++ b/lib/Target/ARM/ARMScheduleA57.td
@@ -1,9 +1,8 @@
 //=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -95,6 +94,9 @@ def CortexA57Model : SchedMachineModel {
 
   // FIXME: Remove when all errors have been fixed.
   let FullInstRWOverlapCheck = 0;
+
+  let UnsupportedFeatures = [HasV8_1MMainline, HasMVEInt, HasMVEFloat,
+                             HasFPRegsV8_1M];
 }
 
 //===----------------------------------------------------------------------===//
@@ -1175,7 +1177,8 @@ def : InstRW<[A57Write_8cyc_1V], (instregex
 
 // ASIMD FP max/min
 def : InstRW<[A57Write_5cyc_1V], (instregex
-  "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "VMAXNM", "VMINNM")>;
+  "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "(NEON|VFP)_VMAXNM",
+  "(NEON|VFP)_VMINNM")>;
 
 // ASIMD FP multiply
 def A57WriteVMUL_VecFP  : SchedWriteRes<[A57UnitV]> { let Latency = 5;  }
diff --git a/lib/Target/ARM/ARMScheduleA57WriteRes.td b/lib/Target/ARM/ARMScheduleA57WriteRes.td
index 670717dc7c13..5ba61503686e 100644
--- a/lib/Target/ARM/ARMScheduleA57WriteRes.td
+++ b/lib/Target/ARM/ARMScheduleA57WriteRes.td
@@ -1,9 +1,8 @@
 //=- ARMScheduleA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index ba380cba100f..1be0ee4334a8 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -1,9 +1,8 @@
 //=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index fc301c589269..21d32bde4710 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1,9 +1,8 @@
 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARMScheduleM3.td b/lib/Target/ARM/ARMScheduleM3.td
deleted file mode 100644
index 93f8299f9bd0..000000000000
--- a/lib/Target/ARM/ARMScheduleM3.td
+++ /dev/null
@@ -1,21 +0,0 @@
-//=- ARMScheduleM3.td - ARM Cortex-M3 Scheduling Definitions -*- tablegen -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the machine model for the ARM Cortex-M3 processor.
-//
-//===----------------------------------------------------------------------===//
-
-def CortexM3Model : SchedMachineModel {
-  let IssueWidth        = 1; // Only IT can be dual-issued, so assume single-issue
-  let MicroOpBufferSize = 0; // In-order
-  let LoadLatency       = 2; // Latency when not pipelined, not pc-relative
-  let MispredictPenalty = 2; // Best case branch taken cost
-
-  let CompleteModel = 0;
-}
diff --git a/lib/Target/ARM/ARMScheduleM4.td b/lib/Target/ARM/ARMScheduleM4.td
new file mode 100644
index 000000000000..38c8ea2b4f35
--- /dev/null
+++ b/lib/Target/ARM/ARMScheduleM4.td
@@ -0,0 +1,119 @@
+//==- ARMScheduleM4.td - Cortex-M4 Scheduling Definitions -*- tablegen -*-====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SchedRead/Write data for the ARM Cortex-M4 processor.
+//
+//===----------------------------------------------------------------------===//
+
+def CortexM4Model : SchedMachineModel {
+  let IssueWidth        = 1; // Only IT can be dual-issued, so assume single-issue
+  let MicroOpBufferSize = 0; // In-order
+  let LoadLatency       = 2; // Latency when not pipelined, not pc-relative
+  let MispredictPenalty = 2; // Best case branch taken cost
+  let PostRAScheduler   = 1;
+
+  let CompleteModel = 0;
+}
+
+
+// We model the entire cpu as a single pipeline with a BufferSize = 0 since
+// Cortex-M4 is in-order.
+
+def M4Unit : ProcResource<1> { let BufferSize = 0; }
+
+
+let SchedModel = CortexM4Model in {
+
+// Some definitions of latencies we apply to different instructions
+
+class M4UnitL1<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 1; }
+class M4UnitL2<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 2; }
+class M4UnitL3<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 3; }
+class M4UnitL14<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 14; }
+def M4UnitL1_wr : SchedWriteRes<[M4Unit]> { let Latency = 1; }
+def M4UnitL2_wr : SchedWriteRes<[M4Unit]> { let Latency = 2; }
+class M4UnitL1I<dag instr> : InstRW<[M4UnitL1_wr], instr>;
+class M4UnitL2I<dag instr> : InstRW<[M4UnitL2_wr], instr>;
+
+
+// Loads, MAC's and DIV all get a higher latency of 2
+def : M4UnitL2<WriteLd>;
+def : M4UnitL2<WriteMAC32>;
+def : M4UnitL2<WriteMAC64Hi>;
+def : M4UnitL2<WriteMAC64Lo>;
+def : M4UnitL2<WriteMAC16>;
+def : M4UnitL2<WriteDIV>;
+
+def : M4UnitL2I<(instregex "(t|t2)LDM")>;
+
+
+// Stores we use a latency of 1 as they have no outputs
+
+def : M4UnitL1<WriteST>;
+def : M4UnitL1I<(instregex "(t|t2)STM")>;
+
+
+// Everything else has a Latency of 1
+
+def : M4UnitL1<WriteALU>;
+def : M4UnitL1<WriteALUsi>;
+def : M4UnitL1<WriteALUsr>;
+def : M4UnitL1<WriteALUSsr>;
+def : M4UnitL1<WriteBr>;
+def : M4UnitL1<WriteBrL>;
+def : M4UnitL1<WriteBrTbl>;
+def : M4UnitL1<WriteCMPsi>;
+def : M4UnitL1<WriteCMPsr>;
+def : M4UnitL1<WriteCMP>;
+def : M4UnitL1<WriteMUL32>;
+def : M4UnitL1<WriteMUL64Hi>;
+def : M4UnitL1<WriteMUL64Lo>;
+def : M4UnitL1<WriteMUL16>;
+def : M4UnitL1<WriteNoop>;
+def : M4UnitL1<WritePreLd>;
+def : M4UnitL1I<(instregex "(t|t2)MOV")>;
+def : M4UnitL1I<(instrs COPY)>;
+def : M4UnitL1I<(instregex "t2IT")>;
+def : M4UnitL1I<(instregex "t2SEL", "t2USAD8",
+    "t2(S|Q|SH|U|UQ|UH)(ADD16|ASX|SAX|SUB16|ADD8|SUB8)", "t2USADA8", "(t|t2)REV")>;
+
+def : ReadAdvance<ReadALU, 0>;
+def : ReadAdvance<ReadALUsr, 0>;
+def : ReadAdvance<ReadMUL, 0>;
+def : ReadAdvance<ReadMAC, 0>;
+
+// Most FP instructions are single-cycle latency, except MAC's, Div's and Sqrt's.
+// Loads still take 2 cycles.
+
+def : M4UnitL1<WriteFPCVT>;
+def : M4UnitL1<WriteFPMOV>;
+def : M4UnitL1<WriteFPALU32>;
+def : M4UnitL1<WriteFPALU64>;
+def : M4UnitL1<WriteFPMUL32>;
+def : M4UnitL1<WriteFPMUL64>;
+def : M4UnitL2I<(instregex "VLD")>;
+def : M4UnitL1I<(instregex "VST")>;
+def : M4UnitL3<WriteFPMAC32>;
+def : M4UnitL3<WriteFPMAC64>;
+def : M4UnitL14<WriteFPDIV32>;
+def : M4UnitL14<WriteFPDIV64>;
+def : M4UnitL14<WriteFPSQRT32>;
+def : M4UnitL14<WriteFPSQRT64>;
+def : M4UnitL1<WriteVLD1>;
+def : M4UnitL1<WriteVLD2>;
+def : M4UnitL1<WriteVLD3>;
+def : M4UnitL1<WriteVLD4>;
+def : M4UnitL1<WriteVST1>;
+def : M4UnitL1<WriteVST2>;
+def : M4UnitL1<WriteVST3>;
+def : M4UnitL1<WriteVST4>;
+
+def : ReadAdvance<ReadFPMUL, 0>;
+def : ReadAdvance<ReadFPMAC, 0>;
+
+}
diff --git a/lib/Target/ARM/ARMScheduleR52.td b/lib/Target/ARM/ARMScheduleR52.td
index 11bce45161b3..d1cbf754b5a1 100644
--- a/lib/Target/ARM/ARMScheduleR52.td
+++ b/lib/Target/ARM/ARMScheduleR52.td
@@ -1,9 +1,8 @@
 //==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td
index 87984648139b..00a44599b1b2 100644
--- a/lib/Target/ARM/ARMScheduleSwift.td
+++ b/lib/Target/ARM/ARMScheduleSwift.td
@@ -1,9 +1,8 @@
 //=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
index 57d0bfb65049..9b86097329c0 100644
--- a/lib/Target/ARM/ARMScheduleV6.td
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -1,9 +1,8 @@
 //===-- ARMScheduleV6.td - ARM v6 Scheduling Definitions ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 4d685158e258..cade06e8c109 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -1,9 +1,8 @@
 //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -171,7 +170,7 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
 
   // Code size optimisation: do not inline memcpy if expansion results in
   // more instructions than the libary call.
-  if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction().optForMinSize()) {
+  if (NumMEMCPYs > 1 && Subtarget.hasMinSize()) {
     return SDValue();
   }
 
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index 2ddb42c95397..b8a86ae7310f 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -1,9 +1,8 @@
 //===-- ARMSelectionDAGInfo.h - ARM SelectionDAG Info -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index b1d0761e3231..978faed776b0 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -1,9 +1,8 @@
 //===-- ARMSubtarget.cpp - ARM Subtarget Information ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -93,10 +92,12 @@ ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU,
 
 ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
                            const std::string &FS,
-                           const ARMBaseTargetMachine &TM, bool IsLittle)
+                           const ARMBaseTargetMachine &TM, bool IsLittle,
+                           bool MinSize)
     : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps),
-      CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options),
-      TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)),
+      CPUString(CPU), OptMinSize(MinSize), IsLittle(IsLittle),
+      TargetTriple(TT), Options(TM.Options), TM(TM),
+      FrameLowering(initializeFrameLowering(CPU, FS)),
       // At this point initializeSubtargetDependencies has been called so
       // we can query directly.
       InstrInfo(isThumb1Only()
@@ -283,6 +284,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   case CortexA72:
   case CortexA73:
   case CortexA75:
+  case CortexA76:
   case CortexR4:
   case CortexR4F:
   case CortexR5:
@@ -359,6 +361,13 @@ unsigned ARMSubtarget::getMispredictionPenalty() const {
 }
 
 bool ARMSubtarget::enableMachineScheduler() const {
+  // The MachineScheduler can increase register usage, so we use more high
+  // registers and end up with more T2 instructions that cannot be converted to
+  // T1 instructions. At least until we do better at converting to thumb1
+  // instructions, on cortex-m at Oz where we are size-paranoid, don't use the
+  // Machine scheduler, relying on the DAG register pressure scheduler instead.
+  if (isMClass() && hasMinSize())
+    return false;
   // Enable the MachineScheduler before register allocation for subtargets
   // with the use-misched feature.
   return useMachineScheduler();
@@ -374,20 +383,20 @@ bool ARMSubtarget::enablePostRAScheduler() const {
 
 bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); }
 
-bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const {
+bool ARMSubtarget::useStride4VFPs() const {
   // For general targets, the prologue can grow when VFPs are allocated with
   // stride 4 (more vpush instructions). But WatchOS uses a compact unwind
   // format which it's more important to get right.
   return isTargetWatchABI() ||
-         (useWideStrideVFP() && !MF.getFunction().optForMinSize());
+         (useWideStrideVFP() && !OptMinSize);
 }
 
-bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
+bool ARMSubtarget::useMovt() const {
   // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
   // immediates as it is inherently position independent, and may be out of
   // range otherwise.
   return !NoMovt && hasV8MBaselineOps() &&
-         (isTargetWindows() || !MF.getFunction().optForMinSize() || genExecuteOnly());
+         (isTargetWindows() || !OptMinSize || genExecuteOnly());
 }
 
 bool ARMSubtarget::useFastISel() const {
@@ -404,3 +413,45 @@ bool ARMSubtarget::useFastISel() const {
          ((isTargetMachO() && !isThumb1Only()) ||
           (isTargetLinux() && !isThumb()) || (isTargetNaCl() && !isThumb()));
 }
+
+unsigned ARMSubtarget::getGPRAllocationOrder(const MachineFunction &MF) const {
+  // The GPR register class has multiple possible allocation orders, with
+  // tradeoffs preferred by different sub-architectures and optimisation goals.
+  // The allocation orders are:
+  // 0: (the default tablegen order, not used)
+  // 1: r14, r0-r13
+  // 2: r0-r7
+  // 3: r0-r7, r12, lr, r8-r11
+  // Note that the register allocator will change this order so that
+  // callee-saved registers are used later, as they require extra work in the
+  // prologue/epilogue (though we sometimes override that).
+
+  // For thumb1-only targets, only the low registers are allocatable.
+  if (isThumb1Only())
+    return 2;
+
+  // Allocate low registers first, so we can select more 16-bit instructions.
+  // We also (in ignoreCSRForAllocationOrder) override  the default behaviour
+  // with regards to callee-saved registers, because pushing extra registers is
+  // much cheaper (in terms of code size) than using high registers. After
+  // that, we allocate r12 (doesn't need to be saved), lr (saving it means we
+  // can return with the pop, don't need an extra "bx lr") and then the rest of
+  // the high registers.
+  if (isThumb2() && MF.getFunction().hasMinSize())
+    return 3;
+
+  // Otherwise, allocate in the default order, using LR first because saving it
+  // allows a shorter epilogue sequence.
+  return 1;
+}
+
+bool ARMSubtarget::ignoreCSRForAllocationOrder(const MachineFunction &MF,
+                                               unsigned PhysReg) const {
+  // To minimize code size in Thumb2, we prefer the usage of low regs (lower
+  // cost per use) so we can  use narrow encoding. By default, caller-saved
+  // registers (e.g. lr, r12) are always  allocated first, regardless of
+  // their cost per use. When optForMinSize, we prefer the low regs even if
+  // they are CSR because usually push/pop can be folded into existing ones.
+  return isThumb2() && MF.getFunction().hasMinSize() &&
+         ARM::GPRRegClass.contains(PhysReg);
+}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 11841b4467a2..c2b0f052b843 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -1,9 +1,8 @@
 //===-- ARMSubtarget.h - Define Subtarget for the ARM ----------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -60,6 +59,7 @@ protected:
     CortexA72,
     CortexA73,
     CortexA75,
+    CortexA76,
     CortexA8,
     CortexA9,
     CortexM3,
@@ -110,7 +110,8 @@ protected:
     ARMv8a,
     ARMv8mBaseline,
     ARMv8mMainline,
-    ARMv8r
+    ARMv8r,
+    ARMv81mMainline,
   };
 
 public:
@@ -157,6 +158,9 @@ protected:
   bool HasV8_5aOps = false;
   bool HasV8MBaselineOps = false;
   bool HasV8MMainlineOps = false;
+  bool HasV8_1MMainlineOps = false;
+  bool HasMVEIntegerOps = false;
+  bool HasMVEFloatOps = false;
 
   /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what
   /// floating point ISAs are supported.
@@ -165,6 +169,24 @@ protected:
   bool HasVFPv4 = false;
   bool HasFPARMv8 = false;
   bool HasNEON = false;
+  bool HasFPRegs = false;
+  bool HasFPRegs16 = false;
+  bool HasFPRegs64 = false;
+
+  /// Versions of the VFP flags restricted to single precision, or to
+  /// 16 d-registers, or both.
+  bool HasVFPv2SP = false;
+  bool HasVFPv3SP = false;
+  bool HasVFPv4SP = false;
+  bool HasFPARMv8SP = false;
+  bool HasVFPv2D16 = false;
+  bool HasVFPv3D16 = false;
+  bool HasVFPv4D16 = false;
+  bool HasFPARMv8D16 = false;
+  bool HasVFPv2D16SP = false;
+  bool HasVFPv3D16SP = false;
+  bool HasVFPv4D16SP = false;
+  bool HasFPARMv8D16SP = false;
 
   /// HasDotProd - True if the ARMv8.2A dot product instructions are supported.
   bool HasDotProd = false;
@@ -232,9 +254,9 @@ protected:
   /// HasFP16FML - True if subtarget supports half-precision FP fml operations
   bool HasFP16FML = false;
 
-  /// HasD16 - True if subtarget is limited to 16 double precision
+  /// HasD32 - True if subtarget has the full 32 double precision
   /// FP registers for VFPv3.
-  bool HasD16 = false;
+  bool HasD32 = false;
 
   /// HasHardwareDivide - True if subtarget supports [su]div in Thumb mode
   bool HasHardwareDivideInThumb = false;
@@ -291,9 +313,9 @@ protected:
   /// extension.
   bool HasVirtualization = false;
 
-  /// FPOnlySP - If true, the floating point unit only supports single
+  /// HasFP64 - If true, the floating point unit supports double
   /// precision.
-  bool FPOnlySP = false;
+  bool HasFP64 = false;
 
   /// If true, the processor supports the Performance Monitor Extensions. These
   /// include a generic cycle-counter as well as more fine-grained (often
@@ -321,6 +343,9 @@ protected:
   /// HasRAS - if true, the processor supports RAS extensions
   bool HasRAS = false;
 
+  /// HasLOB - if true, the processor supports the Low Overhead Branch extension
+  bool HasLOB = false;
+
   /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
   /// particularly effective at zeroing a VFP register.
   bool HasZeroCycleZeroing = false;
@@ -446,6 +471,10 @@ protected:
   /// What alignment is preferred for loop bodies, in log2(bytes).
   unsigned PrefLoopAlignment = 0;
 
+  /// OptMinSize - True if we're optimising for minimum code size, equal to
+  /// the function attribute.
+  bool OptMinSize = false;
+
   /// IsLittle - The target is Little Endian
   bool IsLittle;
 
@@ -468,7 +497,8 @@ public:
   /// of the specified triple.
   ///
   ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
-               const ARMBaseTargetMachine &TM, bool IsLittle);
+               const ARMBaseTargetMachine &TM, bool IsLittle,
+               bool MinSize = false);
 
   /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
   /// that still makes it profitable to inline the call.
@@ -546,6 +576,12 @@ public:
   bool hasV8_5aOps() const { return HasV8_5aOps; }
   bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
   bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
+  bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; }
+  bool hasMVEIntegerOps() const { return HasMVEIntegerOps; }
+  bool hasMVEFloatOps() const { return HasMVEFloatOps; }
+  bool hasFPRegs() const { return HasFPRegs; }
+  bool hasFPRegs16() const { return HasFPRegs16; }
+  bool hasFPRegs64() const { return HasFPRegs64; }
 
   /// @{
   /// These functions are obsolete, please consider adding subtarget features
@@ -564,10 +600,10 @@ public:
 
   bool hasARMOps() const { return !NoARM; }
 
-  bool hasVFP2() const { return HasVFPv2; }
-  bool hasVFP3() const { return HasVFPv3; }
-  bool hasVFP4() const { return HasVFPv4; }
-  bool hasFPARMv8() const { return HasFPARMv8; }
+  bool hasVFP2Base() const { return HasVFPv2D16SP; }
+  bool hasVFP3Base() const { return HasVFPv3D16SP; }
+  bool hasVFP4Base() const { return HasVFPv4D16SP; }
+  bool hasFPARMv8Base() const { return HasFPARMv8D16SP; }
   bool hasNEON() const { return HasNEON;  }
   bool hasSHA2() const { return HasSHA2; }
   bool hasAES() const { return HasAES; }
@@ -575,6 +611,7 @@ public:
   bool hasDotProd() const { return HasDotProd; }
   bool hasCRC() const { return HasCRC; }
   bool hasRAS() const { return HasRAS; }
+  bool hasLOB() const { return HasLOB; }
   bool hasVirtualization() const { return HasVirtualization; }
 
   bool useNEONForSinglePrecisionFP() const {
@@ -596,7 +633,7 @@ public:
   bool useFPVMLx() const { return !SlowFPVMLx; }
   bool hasVMLxForwarding() const { return HasVMLxForwarding; }
   bool isFPBrccSlow() const { return SlowFPBrcc; }
-  bool isFPOnlySP() const { return FPOnlySP; }
+  bool hasFP64() const { return HasFP64; }
   bool hasPerfMon() const { return HasPerfMon; }
   bool hasTrustZone() const { return HasTrustZone; }
   bool has8MSecExt() const { return Has8MSecExt; }
@@ -633,7 +670,7 @@ public:
   bool genExecuteOnly() const { return GenExecuteOnly; }
 
   bool hasFP16() const { return HasFP16; }
-  bool hasD16() const { return HasD16; }
+  bool hasD32() const { return HasD32; }
   bool hasFullFP16() const { return HasFullFP16; }
   bool hasFP16FML() const { return HasFP16FML; }
 
@@ -710,6 +747,7 @@ public:
   bool disablePostRAScheduler() const { return DisablePostRAScheduler; }
   bool useSoftFloat() const { return UseSoftFloat; }
   bool isThumb() const { return InThumbMode; }
+  bool hasMinSize() const { return OptMinSize; }
   bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
   bool isThumb2() const { return InThumbMode && HasThumb2; }
   bool hasThumb2() const { return HasThumb2; }
@@ -736,9 +774,9 @@ public:
            isThumb1Only();
   }
 
-  bool useStride4VFPs(const MachineFunction &MF) const;
+  bool useStride4VFPs() const;
 
-  bool useMovt(const MachineFunction &MF) const;
+  bool useMovt() const;
 
   bool supportsTailCall() const { return SupportsTailCall; }
 
@@ -818,6 +856,10 @@ public:
   unsigned getPrefLoopAlignment() const {
     return PrefLoopAlignment;
   }
+
+  bool ignoreCSRForAllocationOrder(const MachineFunction &MF,
+                                   unsigned PhysReg) const override;
+  unsigned getGPRAllocationOrder(const MachineFunction &MF) const;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/ARMSystemRegister.td b/lib/Target/ARM/ARMSystemRegister.td
index ad1d37168e08..f21c7f0246f9 100644
--- a/lib/Target/ARM/ARMSystemRegister.td
+++ b/lib/Target/ARM/ARMSystemRegister.td
@@ -1,9 +1,8 @@
 //===-- ARMSystemRegister.td - ARM Register defs -------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index ec02c840d5e1..7f0aae1739b3 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -1,9 +1,8 @@
 //===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,6 +16,7 @@
 #include "ARMTargetObjectFile.h"
 #include "ARMTargetTransformInfo.h"
 #include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "TargetInfo/ARMTargetInfo.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
@@ -95,6 +95,8 @@ extern "C" void LLVMInitializeARMTarget() {
   initializeARMExecutionDomainFixPass(Registry);
   initializeARMExpandPseudoPass(Registry);
   initializeThumb2SizeReducePass(Registry);
+  initializeMVEVPTBlockPass(Registry);
+  initializeARMLowOverheadLoopsPass(Registry);
 }
 
 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -142,6 +144,10 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
   // Pointers are 32 bits and aligned to 32 bits.
   Ret += "-p:32:32";
 
+  // Function pointers are aligned to 8 bits (because the LSB stores the
+  // ARM/Thumb state).
+  Ret += "-Fi8";
+
   // ABIs other than APCS have 64 bit integers with natural alignment.
   if (ABI != ARMBaseTargetMachine::ARM_ABI_APCS)
     Ret += "-i64:64";
@@ -264,13 +270,20 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
   if (SoftFloat)
     FS += FS.empty() ? "+soft-float" : ",+soft-float";
 
-  auto &I = SubtargetMap[CPU + FS];
+  // Use the optminsize to identify the subtarget, but don't use it in the
+  // feature string.
+  std::string Key = CPU + FS;
+  if (F.hasMinSize())
+    Key += "+minsize";
+
+  auto &I = SubtargetMap[Key];
   if (!I) {
     // This needs to be done before we create a new subtarget since any
     // creation will depend on the TM and the code generation flags on the
     // function that reside in TargetOptions.
     resetTargetOptions(F);
-    I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle);
+    I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle,
+                                        F.hasMinSize());
 
     if (!I->isThumb() && !I->hasARMOps())
       F.getContext().emitError("Function '" + F.getName() + "' uses ARM "
@@ -351,6 +364,8 @@ public:
   void addPreRegAlloc() override;
   void addPreSched2() override;
   void addPreEmitPass() override;
+
+  std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
 };
 
 class ARMExecutionDomainFix : public ExecutionDomainFix {
@@ -375,6 +390,10 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
   return new ARMPassConfig(*this, PM);
 }
 
+std::unique_ptr<CSEConfigBase> ARMPassConfig::getCSEConfig() const {
+  return getStandardCSEConfigForOpt(TM->getOptLevel());
+}
+
 void ARMPassConfig::addIRPasses() {
   if (TM->Options.ThreadModel == ThreadModel::Single)
     addPass(createLowerAtomicPass());
@@ -393,6 +412,10 @@ void ARMPassConfig::addIRPasses() {
 
   TargetPassConfig::addIRPasses();
 
+  // Run the parallel DSP pass.
+  if (getOptLevel() == CodeGenOpt::Aggressive) 
+    addPass(createARMParallelDSPPass());
+
   // Match interleaved memory accesses to ldN/stN intrinsics.
   if (TM->getOptLevel() != CodeGenOpt::None)
     addPass(createInterleavedAccessPass());
@@ -405,9 +428,6 @@ void ARMPassConfig::addCodeGenPrepare() {
 }
 
 bool ARMPassConfig::addPreISel() {
-  if (getOptLevel() != CodeGenOpt::None)
-    addPass(createARMParallelDSPPass());
-
   if ((TM->getOptLevel() != CodeGenOpt::None &&
        EnableGlobalMerge == cl::BOU_UNSET) ||
       EnableGlobalMerge == cl::BOU_TRUE) {
@@ -427,6 +447,9 @@ bool ARMPassConfig::addPreISel() {
                                   MergeExternalByDefault));
   }
 
+  if (TM->getOptLevel() != CodeGenOpt::None)
+    addPass(createHardwareLoopsPass());
+
   return false;
 }
 
@@ -490,6 +513,7 @@ void ARMPassConfig::addPreSched2() {
       return !MF.getSubtarget<ARMSubtarget>().isThumb1Only();
     }));
   }
+  addPass(createMVEVPTBlockPass());
   addPass(createThumb2ITBlockPass());
 }
 
@@ -506,4 +530,5 @@ void ARMPassConfig::addPreEmitPass() {
     addPass(createARMOptimizeBarriersPass());
 
   addPass(createARMConstantIslandPass());
+  addPass(createARMLowOverheadLoopsPass());
 }
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 2c791998e702..cb8650d8139b 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -1,9 +1,8 @@
 //===-- ARMTargetMachine.h - Define TargetMachine for ARM -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 9c13359cba71..891329d3f297 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/Target/ARMTargetObjectFile.cpp - ARM Object Info Impl --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index 0dc0882809c0..7b15dcc61f56 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -1,9 +1,8 @@
 //===-- llvm/Target/ARMTargetObjectFile.h - ARM Object Info -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index f72bb8632eb7..2a8ec734a05f 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1,9 +1,8 @@
 //===- ARMTargetTransformInfo.cpp - ARM specific TTI ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -22,6 +21,7 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Type.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/Casting.h"
@@ -36,6 +36,10 @@ using namespace llvm;
 
 #define DEBUG_TYPE "armtti"
 
+static cl::opt<bool> DisableLowOverheadLoops(
+  "disable-arm-loloops", cl::Hidden, cl::init(true),
+  cl::desc("Disable the generation of low-overhead loops"));
+
 bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
                                      const Function *Callee) const {
   const TargetMachine &TM = getTLI()->getTargetMachine();
@@ -107,9 +111,13 @@ int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
       Idx == 1)
     return 0;
 
-  if (Opcode == Instruction::And)
-      // Conversion to BIC is free, and means we can use ~Imm instead.
-      return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty));
+  if (Opcode == Instruction::And) {
+    // UXTB/UXTH
+    if (Imm == 255 || Imm == 65535)
+      return 0;
+    // Conversion to BIC is free, and means we can use ~Imm instead.
+    return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty));
+  }
 
   if (Opcode == Instruction::Add)
     // Conversion to SUB is free, and means we can use -Imm instead.
@@ -398,6 +406,40 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
   return 1;
 }
 
+int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
+  const MemCpyInst *MI = dyn_cast<MemCpyInst>(I);
+  assert(MI && "MemcpyInst expected");
+  ConstantInt *C = dyn_cast<ConstantInt>(MI->getLength());
+
+  // To model the cost of a library call, we assume 1 for the call, and
+  // 3 for the argument setup.
+  const unsigned LibCallCost = 4;
+
+  // If 'size' is not a constant, a library call will be generated.
+  if (!C)
+    return LibCallCost;
+
+  const unsigned Size = C->getValue().getZExtValue();
+  const unsigned DstAlign = MI->getDestAlignment();
+  const unsigned SrcAlign = MI->getSourceAlignment();
+  const Function *F = I->getParent()->getParent();
+  const unsigned Limit = TLI->getMaxStoresPerMemmove(F->hasMinSize());
+  std::vector<EVT> MemOps;
+
+  // MemOps will be poplulated with a list of data types that needs to be
+  // loaded and stored. That's why we multiply the number of elements by 2 to
+  // get the cost for this memcpy.
+  if (getTLI()->findOptimalMemOpLowering(
+          MemOps, Limit, Size, DstAlign, SrcAlign, false /*IsMemset*/,
+          false /*ZeroMemset*/, false /*MemcpyStrSrc*/, false /*AllowOverlap*/,
+          MI->getDestAddressSpace(), MI->getSourceAddressSpace(),
+          F->getAttributes()))
+    return MemOps.size() * 2;
+
+  // If we can't find an optimal memop lowering, return the default cost
+  return LibCallCost;
+}
+
 int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
                                Type *SubTp) {
   if (Kind == TTI::SK_Broadcast) {
@@ -590,6 +632,222 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                            UseMaskForCond, UseMaskForGaps);
 }
 
+bool ARMTTIImpl::isLoweredToCall(const Function *F) {
+  if (!F->isIntrinsic())
+    BaseT::isLoweredToCall(F);
+
+  // Assume all Arm-specific intrinsics map to an instruction.
+  if (F->getName().startswith("llvm.arm"))
+    return false;
+
+  switch (F->getIntrinsicID()) {
+  default: break;
+  case Intrinsic::powi:
+  case Intrinsic::sin:
+  case Intrinsic::cos:
+  case Intrinsic::pow:
+  case Intrinsic::log:
+  case Intrinsic::log10:
+  case Intrinsic::log2:
+  case Intrinsic::exp:
+  case Intrinsic::exp2:
+    return true;
+  case Intrinsic::sqrt:
+  case Intrinsic::fabs:
+  case Intrinsic::copysign:
+  case Intrinsic::floor:
+  case Intrinsic::ceil:
+  case Intrinsic::trunc:
+  case Intrinsic::rint:
+  case Intrinsic::nearbyint:
+  case Intrinsic::round:
+  case Intrinsic::canonicalize:
+  case Intrinsic::lround:
+  case Intrinsic::llround:
+  case Intrinsic::lrint:
+  case Intrinsic::llrint:
+    if (F->getReturnType()->isDoubleTy() && !ST->hasFP64())
+      return true;
+    if (F->getReturnType()->isHalfTy() && !ST->hasFullFP16())
+      return true;
+    // Some operations can be handled by vector instructions and assume
+    // unsupported vectors will be expanded into supported scalar ones.
+    // TODO Handle scalar operations properly.
+    return !ST->hasFPARMv8Base() && !ST->hasVFP2Base();
+  case Intrinsic::masked_store:
+  case Intrinsic::masked_load:
+  case Intrinsic::masked_gather:
+  case Intrinsic::masked_scatter:
+    return !ST->hasMVEIntegerOps();
+  case Intrinsic::sadd_with_overflow:
+  case Intrinsic::uadd_with_overflow:
+  case Intrinsic::ssub_with_overflow:
+  case Intrinsic::usub_with_overflow:
+  case Intrinsic::sadd_sat:
+  case Intrinsic::uadd_sat:
+  case Intrinsic::ssub_sat:
+  case Intrinsic::usub_sat:
+    return false;
+  }
+
+  return BaseT::isLoweredToCall(F);
+}
+
+bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+                                          AssumptionCache &AC,
+                                          TargetLibraryInfo *LibInfo,
+                                          HardwareLoopInfo &HWLoopInfo) {
+  // Low-overhead branches are only supported in the 'low-overhead branch'
+  // extension of v8.1-m.
+  if (!ST->hasLOB() || DisableLowOverheadLoops)
+    return false;
+
+  if (!SE.hasLoopInvariantBackedgeTakenCount(L))
+    return false;
+
+  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+    return false;
+
+  const SCEV *TripCountSCEV =
+    SE.getAddExpr(BackedgeTakenCount,
+                  SE.getOne(BackedgeTakenCount->getType()));
+
+  // We need to store the trip count in LR, a 32-bit register.
+  if (SE.getUnsignedRangeMax(TripCountSCEV).getBitWidth() > 32)
+    return false;
+
+  // Making a call will trash LR and clear LO_BRANCH_INFO, so there's little
+  // point in generating a hardware loop if that's going to happen.
+  auto MaybeCall = [this](Instruction &I) {
+    const ARMTargetLowering *TLI = getTLI();
+    unsigned ISD = TLI->InstructionOpcodeToISD(I.getOpcode());
+    EVT VT = TLI->getValueType(DL, I.getType(), true);
+    if (TLI->getOperationAction(ISD, VT) == TargetLowering::LibCall)
+      return true;
+
+    // Check if an intrinsic will be lowered to a call and assume that any
+    // other CallInst will generate a bl.
+    if (auto *Call = dyn_cast<CallInst>(&I)) {
+      if (isa<IntrinsicInst>(Call)) {
+        if (const Function *F = Call->getCalledFunction())
+          return isLoweredToCall(F);
+      }
+      return true;
+    }
+
+    // FPv5 provides conversions between integer, double-precision,
+    // single-precision, and half-precision formats.
+    switch (I.getOpcode()) {
+    default:
+      break;
+    case Instruction::FPToSI:
+    case Instruction::FPToUI:
+    case Instruction::SIToFP:
+    case Instruction::UIToFP:
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+      return !ST->hasFPARMv8Base();
+    }
+
+    // FIXME: Unfortunately the approach of checking the Operation Action does
+    // not catch all cases of Legalization that use library calls. Our
+    // Legalization step categorizes some transformations into library calls as
+    // Custom, Expand or even Legal when doing type legalization. So for now
+    // we have to special case for instance the SDIV of 64bit integers and the
+    // use of floating point emulation.
+    if (VT.isInteger() && VT.getSizeInBits() >= 64) {
+      switch (ISD) {
+      default:
+        break;
+      case ISD::SDIV:
+      case ISD::UDIV:
+      case ISD::SREM:
+      case ISD::UREM:
+      case ISD::SDIVREM:
+      case ISD::UDIVREM:
+        return true;
+      }
+    }
+
+    // Assume all other non-float operations are supported.
+    if (!VT.isFloatingPoint())
+      return false;
+
+    // We'll need a library call to handle most floats when using soft.
+    if (TLI->useSoftFloat()) {
+      switch (I.getOpcode()) {
+      default:
+        return true;
+      case Instruction::Alloca:
+      case Instruction::Load:
+      case Instruction::Store:
+      case Instruction::Select:
+      case Instruction::PHI:
+        return false;
+      }
+    }
+
+    // We'll need a libcall to perform double precision operations on a single
+    // precision only FPU.
+    if (I.getType()->isDoubleTy() && !ST->hasFP64())
+      return true;
+
+    // Likewise for half precision arithmetic.
+    if (I.getType()->isHalfTy() && !ST->hasFullFP16())
+      return true;
+
+    return false;
+  };
+
+  auto IsHardwareLoopIntrinsic = [](Instruction &I) {
+    if (auto *Call = dyn_cast<IntrinsicInst>(&I)) {
+      switch (Call->getIntrinsicID()) {
+      default:
+        break;
+      case Intrinsic::set_loop_iterations:
+      case Intrinsic::test_set_loop_iterations:
+      case Intrinsic::loop_decrement:
+      case Intrinsic::loop_decrement_reg:
+        return true;
+      }
+    }
+    return false;
+  };
+
+  // Scan the instructions to see if there's any that we know will turn into a
+  // call or if this loop is already a low-overhead loop.
+  auto ScanLoop = [&](Loop *L) {
+    for (auto *BB : L->getBlocks()) {
+      for (auto &I : *BB) {
+        if (MaybeCall(I) || IsHardwareLoopIntrinsic(I))
+          return false;
+      }
+    }
+    return true;
+  };
+
+  // Visit inner loops.
+  for (auto Inner : *L)
+    if (!ScanLoop(Inner))
+      return false;
+
+  if (!ScanLoop(L))
+    return false;
+
+  // TODO: Check whether the trip count calculation is expensive. If L is the
+  // inner loop but we know it has a low trip count, calculating that trip
+  // count (in the parent loop) may be detrimental.
+
+  LLVMContext &C = L->getHeader()->getContext();
+  HWLoopInfo.CounterInReg = true;
+  HWLoopInfo.IsNestingLegal = false;
+  HWLoopInfo.PerformEntryTest = true;
+  HWLoopInfo.CountType = Type::getInt32Ty(C);
+  HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
+  return true;
+}
+
 void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                          TTI::UnrollingPreferences &UP) {
   // Only currently enable these preferences for M-Class cores.
@@ -599,7 +857,7 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
   // Disable loop unrolling for Oz and Os.
   UP.OptSizeThreshold = 0;
   UP.PartialOptSizeThreshold = 0;
-  if (L->getHeader()->getParent()->optForSize())
+  if (L->getHeader()->getParent()->hasOptSize())
     return;
 
   // Only enable on Thumb-2 targets.
@@ -645,6 +903,7 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
 
   UP.Partial = true;
   UP.Runtime = true;
+  UP.UpperBound = true;
   UP.UnrollRemainder = true;
   UP.DefaultUnrollRuntimeCount = 4;
   UP.UnrollAndJam = true;
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index 2dd143d48a15..52f6ea4a6e2f 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -1,9 +1,8 @@
 //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -49,7 +48,7 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
   const ARMTargetLowering *TLI;
 
   // Currently the following features are excluded from InlineFeatureWhitelist.
-  // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureVFPOnlySP, FeatureD16
+  // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
   // Depending on whether they are set or unset, different
   // instructions/registers are available. For example, inlining a callee with
   // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
@@ -94,6 +93,12 @@ public:
 
   bool enableInterleavedAccessVectorization() { return true; }
 
+  bool shouldFavorBackedgeIndex(const Loop *L) const {
+    if (L->getHeader()->getParent()->hasOptSize())
+      return false;
+    return ST->isMClass() && ST->isThumb2() && L->getNumBlocks() == 1;
+  }
+
   /// Floating-point computation using ARMv8 AArch32 Advanced
   /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
   /// is IEEE-754 compliant, but it's not covered in this target.
@@ -143,6 +148,8 @@ public:
     return ST->getMaxInterleaveFactor();
   }
 
+  int getMemcpyCost(const Instruction *I);
+
   int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
 
   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
@@ -173,6 +180,12 @@ public:
                                  bool UseMaskForCond = false,
                                  bool UseMaskForGaps = false);
 
+  bool isLoweredToCall(const Function *F);
+  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+                                AssumptionCache &AC,
+                                TargetLibraryInfo *LibInfo,
+                                HardwareLoopInfo &HWLoopInfo);
+
   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                TTI::UnrollingPreferences &UP);
 
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 3832b0112b87..1da9452f1d22 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -1,19 +1,20 @@
 //===- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "ARMFeatures.h"
-#include "InstPrinter/ARMInstPrinter.h"
+#include "ARMBaseInstrInfo.h"
 #include "Utils/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMInstPrinter.h"
 #include "MCTargetDesc/ARMMCExpr.h"
 #include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "TargetInfo/ARMTargetInfo.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/None.h"
@@ -69,6 +70,10 @@
 
 using namespace llvm;
 
+namespace llvm {
+extern const MCInstrDesc ARMInsts[];
+} // end namespace llvm
+
 namespace {
 
 enum class ImplicitItModeTy { Always, Never, ARMOnly, ThumbOnly };
@@ -90,6 +95,16 @@ static cl::opt<bool> AddBuildAttributes("arm-add-build-attributes",
 
 enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
 
+static inline unsigned extractITMaskBit(unsigned Mask, unsigned Position) {
+  // Position==0 means we're not in an IT block at all. Position==1
+  // means we want the first state bit, which is always 0 (Then).
+  // Position==2 means we want the second state bit, stored at bit 3
+  // of Mask, and so on downwards. So (5 - Position) will shift the
+  // right bit down to bit 0, including the always-0 bit at bit 4 for
+  // the mandatory initial Then.
+  return (Mask >> (5 - Position) & 1);
+}
+
 class UnwindContext {
   using Locs = SmallVector<SMLoc, 4>;
 
@@ -165,6 +180,7 @@ public:
   }
 };
 
+
 class ARMAsmParser : public MCTargetAsmParser {
   const MCRegisterInfo *MRI;
   UnwindContext UC;
@@ -225,11 +241,10 @@ class ARMAsmParser : public MCTargetAsmParser {
     }
 
     // Emit the IT instruction
-    unsigned Mask = getITMaskEncoding();
     MCInst ITInst;
     ITInst.setOpcode(ARM::t2IT);
     ITInst.addOperand(MCOperand::createImm(ITState.Cond));
-    ITInst.addOperand(MCOperand::createImm(Mask));
+    ITInst.addOperand(MCOperand::createImm(ITState.Mask));
     Out.EmitInstruction(ITInst, getSTI());
 
     // Emit the conditonal instructions
@@ -287,27 +302,10 @@ class ARMAsmParser : public MCTargetAsmParser {
     return MRI->getSubReg(QReg, ARM::dsub_0);
   }
 
-  // Get the encoding of the IT mask, as it will appear in an IT instruction.
-  unsigned getITMaskEncoding() {
-    assert(inITBlock());
-    unsigned Mask = ITState.Mask;
-    unsigned TZ = countTrailingZeros(Mask);
-    if ((ITState.Cond & 1) == 0) {
-      assert(Mask && TZ <= 3 && "illegal IT mask value!");
-      Mask ^= (0xE << TZ) & 0xF;
-    }
-    return Mask;
-  }
-
   // Get the condition code corresponding to the current IT block slot.
   ARMCC::CondCodes currentITCond() {
-    unsigned MaskBit;
-    if (ITState.CurPosition == 1)
-      MaskBit = 1;
-    else
-      MaskBit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1;
-
-    return MaskBit ? ITState.Cond : ARMCC::getOppositeCondition(ITState.Cond);
+    unsigned MaskBit = extractITMaskBit(ITState.Mask, ITState.CurPosition);
+    return MaskBit ? ARMCC::getOppositeCondition(ITState.Cond) : ITState.Cond;
   }
 
   // Invert the condition of the current IT block slot without changing any
@@ -337,7 +335,7 @@ class ARMAsmParser : public MCTargetAsmParser {
     // Keep any existing condition bits.
     NewMask |= ITState.Mask & (0xE << TZ);
     // Insert the new condition bit.
-    NewMask |= (Cond == ITState.Cond) << TZ;
+    NewMask |= (Cond != ITState.Cond) << TZ;
     // Move the trailing 1 down one bit.
     NewMask |= 1 << (TZ - 1);
     ITState.Mask = NewMask;
@@ -352,9 +350,10 @@ class ARMAsmParser : public MCTargetAsmParser {
     ITState.IsExplicit = false;
   }
 
-  // Create a new explicit IT block with the given condition and mask. The mask
-  // should be in the parsed format, with a 1 implying 't', regardless of the
-  // low bit of the condition.
+  // Create a new explicit IT block with the given condition and mask.
+  // The mask should be in the format used in ARMOperand and
+  // MCOperand, with a 1 implying 'e', regardless of the low bit of
+  // the condition.
   void startExplicitITBlock(ARMCC::CondCodes Cond, unsigned Mask) {
     assert(!inITBlock());
     ITState.Cond = Cond;
@@ -363,6 +362,18 @@ class ARMAsmParser : public MCTargetAsmParser {
     ITState.IsExplicit = true;
   }
 
+  struct {
+    unsigned Mask : 4;
+    unsigned CurPosition;
+  } VPTState;
+  bool inVPTBlock() { return VPTState.CurPosition != ~0U; }
+  void forwardVPTPosition() {
+    if (!inVPTBlock()) return;
+    unsigned TZ = countTrailingZeros(VPTState.Mask);
+    if (++VPTState.CurPosition == 5 - TZ)
+      VPTState.CurPosition = ~0U;
+  }
+
   void Note(SMLoc L, const Twine &Msg, SMRange Range = None) {
     return getParser().Note(L, Msg, Range);
   }
@@ -383,7 +394,7 @@ class ARMAsmParser : public MCTargetAsmParser {
   int tryParseRegister();
   bool tryParseRegisterWithWriteBack(OperandVector &);
   int tryParseShiftRegister(OperandVector &);
-  bool parseRegisterList(OperandVector &);
+  bool parseRegisterList(OperandVector &, bool EnforceOrder = true);
   bool parseMemory(OperandVector &);
   bool parseOperand(OperandVector &, StringRef Mnemonic);
   bool parsePrefix(ARMMCExpr::VariantKind &RefKind);
@@ -421,12 +432,15 @@ class ARMAsmParser : public MCTargetAsmParser {
   bool parseDirectiveAlign(SMLoc L);
   bool parseDirectiveThumbSet(SMLoc L);
 
-  StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
-                          bool &CarrySetting, unsigned &ProcessorIMod,
-                          StringRef &ITMask);
-  void getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
-                             bool &CanAcceptCarrySet,
-                             bool &CanAcceptPredicationCode);
+  bool isMnemonicVPTPredicable(StringRef Mnemonic, StringRef ExtraToken);
+  StringRef splitMnemonic(StringRef Mnemonic, StringRef ExtraToken,
+                          unsigned &PredicationCode,
+                          unsigned &VPTPredicationCode, bool &CarrySetting,
+                          unsigned &ProcessorIMod, StringRef &ITMask);
+  void getMnemonicAcceptInfo(StringRef Mnemonic, StringRef ExtraToken,
+                             StringRef FullInst, bool &CanAcceptCarrySet,
+                             bool &CanAcceptPredicationCode,
+                             bool &CanAcceptVPTPredicationCode);
 
   void tryConvertingToTwoOperandForm(StringRef Mnemonic, bool CarrySetting,
                                      OperandVector &Operands);
@@ -478,7 +492,15 @@ class ARMAsmParser : public MCTargetAsmParser {
   bool hasV8MMainline() const {
     return getSTI().getFeatureBits()[ARM::HasV8MMainlineOps];
   }
-
+  bool hasV8_1MMainline() const {
+    return getSTI().getFeatureBits()[ARM::HasV8_1MMainlineOps];
+  }
+  bool hasMVE() const {
+    return getSTI().getFeatureBits()[ARM::HasMVEIntegerOps];
+  }
+  bool hasMVEFloat() const {
+    return getSTI().getFeatureBits()[ARM::HasMVEFloatOps];
+  }
   bool has8MSecExt() const {
     return getSTI().getFeatureBits()[ARM::Feature8MSecExt];
   }
@@ -491,8 +513,8 @@ class ARMAsmParser : public MCTargetAsmParser {
     return getSTI().getFeatureBits()[ARM::FeatureDSP];
   }
 
-  bool hasD16() const {
-    return getSTI().getFeatureBits()[ARM::FeatureD16];
+  bool hasD32() const {
+    return getSTI().getFeatureBits()[ARM::FeatureD32];
   }
 
   bool hasV8_1aOps() const {
@@ -505,7 +527,7 @@ class ARMAsmParser : public MCTargetAsmParser {
 
   void SwitchMode() {
     MCSubtargetInfo &STI = copySTI();
-    uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
+    auto FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
     setAvailableFeatures(FB);
   }
 
@@ -556,11 +578,13 @@ class ARMAsmParser : public MCTargetAsmParser {
   // Asm Match Converter Methods
   void cvtThumbMultiply(MCInst &Inst, const OperandVector &);
   void cvtThumbBranches(MCInst &Inst, const OperandVector &);
+  void cvtMVEVMOVQtoDReg(MCInst &Inst, const OperandVector &);
 
   bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
   bool processInstruction(MCInst &Inst, const OperandVector &Ops, MCStreamer &Out);
   bool shouldOmitCCOutOperand(StringRef Mnemonic, OperandVector &Operands);
   bool shouldOmitPredicateOperand(StringRef Mnemonic, OperandVector &Operands);
+  bool shouldOmitVectorPredicateOperand(StringRef Mnemonic, OperandVector &Operands);
   bool isITBlockTerminator(MCInst &Inst) const;
   void fixupGNULDRDAlias(StringRef Mnemonic, OperandVector &Operands);
   bool validateLDRDSTRD(MCInst &Inst, const OperandVector &Operands,
@@ -597,6 +621,8 @@ public:
     // Not in an ITBlock to start with.
     ITState.CurPosition = ~0U;
 
+    VPTState.CurPosition = ~0U;
+
     NextSymbolIsThumb = false;
   }
 
@@ -642,6 +668,7 @@ public:
 class ARMOperand : public MCParsedAsmOperand {
   enum KindTy {
     k_CondCode,
+    k_VPTPred,
     k_CCOut,
     k_ITCondMask,
     k_CoprocNum,
@@ -659,8 +686,11 @@ class ARMOperand : public MCParsedAsmOperand {
     k_VectorIndex,
     k_Register,
     k_RegisterList,
+    k_RegisterListWithAPSR,
     k_DPRRegisterList,
     k_SPRRegisterList,
+    k_FPSRegisterListWithVPR,
+    k_FPDRegisterListWithVPR,
     k_VectorList,
     k_VectorListAllLanes,
     k_VectorListIndexed,
@@ -681,6 +711,10 @@ class ARMOperand : public MCParsedAsmOperand {
     ARMCC::CondCodes Val;
   };
 
+  struct VCCOp {
+    ARMVCC::VPTCodes Val;
+  };
+
   struct CopOp {
     unsigned Val;
   };
@@ -797,6 +831,7 @@ class ARMOperand : public MCParsedAsmOperand {
 
   union {
     struct CCOp CC;
+    struct VCCOp VCC;
     struct CopOp Cop;
     struct CoprocOptionOp CoprocOption;
     struct MBOptOp MBOpt;
@@ -845,6 +880,11 @@ public:
     return CC.Val;
   }
 
+  ARMVCC::VPTCodes getVPTPred() const {
+    assert(isVPTPred() && "Invalid access!");
+    return VCC.Val;
+  }
+
   unsigned getCoproc() const {
     assert((Kind == k_CoprocNum || Kind == k_CoprocReg) && "Invalid access!");
     return Cop.Val;
@@ -861,8 +901,11 @@ public:
   }
 
   const SmallVectorImpl<unsigned> &getRegList() const {
-    assert((Kind == k_RegisterList || Kind == k_DPRRegisterList ||
-            Kind == k_SPRRegisterList) && "Invalid access!");
+    assert((Kind == k_RegisterList || Kind == k_RegisterListWithAPSR ||
+            Kind == k_DPRRegisterList || Kind == k_SPRRegisterList ||
+            Kind == k_FPSRegisterListWithVPR ||
+            Kind == k_FPDRegisterListWithVPR) &&
+           "Invalid access!");
     return Registers;
   }
 
@@ -915,6 +958,7 @@ public:
   bool isCoprocReg() const { return Kind == k_CoprocReg; }
   bool isCoprocOption() const { return Kind == k_CoprocOption; }
   bool isCondCode() const { return Kind == k_CondCode; }
+  bool isVPTPred() const { return Kind == k_VPTPred; }
   bool isCCOut() const { return Kind == k_CCOut; }
   bool isITMask() const { return Kind == k_ITCondMask; }
   bool isITCondCode() const { return Kind == k_CondCode; }
@@ -970,6 +1014,18 @@ public:
     return false;
   }
 
+  // checks whether this operand is an offset suitable for the LE /
+  // LETP instructions in Arm v8.1M
+  bool isLEOffset() const {
+    if (!isImm()) return false;
+    if (isa<MCSymbolRefExpr>(Imm.Val)) return true;
+    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) {
+      int64_t Val = CE->getValue();
+      return Val < 0 && Val >= -4094 && (Val & 1) == 0;
+    }
+    return false;
+  }
+
   // checks whether this operand is a memory operand computed as an offset
   // applied to PC. the offset may have 8 bits of magnitude and is represented
   // with two bits of shift. textually it may be either [pc, #imm], #imm or
@@ -982,7 +1038,7 @@ public:
       if (!CE) return false;
       Val = CE->getValue();
     }
-    else if (isMem()) {
+    else if (isGPRMem()) {
       if(!Memory.OffsetImm || Memory.OffsetRegNum) return false;
       if(Memory.BaseRegNum != ARM::PC) return false;
       Val = Memory.OffsetImm->getValue();
@@ -1016,7 +1072,14 @@ public:
     int64_t Value = CE->getValue();
     return ((Value & 3) == 0) && Value >= N && Value <= M;
   }
-
+  template<int64_t N, int64_t M>
+  bool isImmediateS2() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return ((Value & 1) == 0) && Value >= N && Value <= M;
+  }
   bool isFBits16() const {
     return isImmediate<0, 17>();
   }
@@ -1026,6 +1089,21 @@ public:
   bool isImm8s4() const {
     return isImmediateS4<-1020, 1020>();
   }
+  bool isImm7s4() const {
+    return isImmediateS4<-508, 508>();
+  }
+  bool isImm7Shift0() const {
+    return isImmediate<-127, 127>();
+  }
+  bool isImm7Shift1() const {
+    return isImmediateS2<-255, 255>();
+  }
+  bool isImm7Shift2() const {
+    return isImmediateS4<-511, 511>();
+  }
+  bool isImm7() const {
+    return isImmediate<-127, 127>();
+  }
   bool isImm0_1020s4() const {
     return isImmediateS4<0, 1020>();
   }
@@ -1098,6 +1176,34 @@ public:
     return isImmediate<1, 33>();
   }
 
+  template<int shift>
+  bool isExpImmValue(uint64_t Value) const {
+    uint64_t mask = (1 << shift) - 1;
+    if ((Value & mask) != 0 || (Value >> shift) > 0xff)
+      return false;
+    return true;
+  }
+
+  template<int shift>
+  bool isExpImm() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+
+    return isExpImmValue<shift>(CE->getValue());
+  }
+
+  template<int shift, int size>
+  bool isInvertedExpImm() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+
+    uint64_t OriginalValue = CE->getValue();
+    uint64_t InvertedValue = OriginalValue ^ (((uint64_t)1 << size) - 1);
+    return isExpImmValue<shift>(InvertedValue);
+  }
+
   bool isPKHLSLImm() const {
     return isImmediate<0, 32>();
   }
@@ -1167,13 +1273,34 @@ public:
 
   bool isReg() const override { return Kind == k_Register; }
   bool isRegList() const { return Kind == k_RegisterList; }
+  bool isRegListWithAPSR() const {
+    return Kind == k_RegisterListWithAPSR || Kind == k_RegisterList;
+  }
   bool isDPRRegList() const { return Kind == k_DPRRegisterList; }
   bool isSPRRegList() const { return Kind == k_SPRRegisterList; }
+  bool isFPSRegListWithVPR() const { return Kind == k_FPSRegisterListWithVPR; }
+  bool isFPDRegListWithVPR() const { return Kind == k_FPDRegisterListWithVPR; }
   bool isToken() const override { return Kind == k_Token; }
   bool isMemBarrierOpt() const { return Kind == k_MemBarrierOpt; }
   bool isInstSyncBarrierOpt() const { return Kind == k_InstSyncBarrierOpt; }
   bool isTraceSyncBarrierOpt() const { return Kind == k_TraceSyncBarrierOpt; }
   bool isMem() const override {
+      return isGPRMem() || isMVEMem();
+  }
+  bool isMVEMem() const {
+    if (Kind != k_Memory)
+      return false;
+    if (Memory.BaseRegNum &&
+        !ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Memory.BaseRegNum) &&
+        !ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(Memory.BaseRegNum))
+      return false;
+    if (Memory.OffsetRegNum &&
+        !ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(
+            Memory.OffsetRegNum))
+      return false;
+    return true;
+  }
+  bool isGPRMem() const {
     if (Kind != k_Memory)
       return false;
     if (Memory.BaseRegNum &&
@@ -1198,6 +1325,16 @@ public:
                RegShiftedImm.SrcReg);
   }
   bool isRotImm() const { return Kind == k_RotateImmediate; }
+
+  template<unsigned Min, unsigned Max>
+  bool isPowerTwoInRange() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value > 0 && countPopulation((uint64_t)Value) == 1 &&
+           Value >= Min && Value <= Max;
+  }
   bool isModImm() const { return Kind == k_ModifiedImmediate; }
 
   bool isModImmNot() const {
@@ -1243,14 +1380,50 @@ public:
     return isPostIdxRegShifted() && PostIdxReg.ShiftTy == ARM_AM::no_shift;
   }
   bool isMemNoOffset(bool alignOK = false, unsigned Alignment = 0) const {
-    if (!isMem())
+    if (!isGPRMem())
+      return false;
+    // No offset of any kind.
+    return Memory.OffsetRegNum == 0 && Memory.OffsetImm == nullptr &&
+     (alignOK || Memory.Alignment == Alignment);
+  }
+  bool isMemNoOffsetT2(bool alignOK = false, unsigned Alignment = 0) const {
+    if (!isGPRMem())
+      return false;
+
+    if (!ARMMCRegisterClasses[ARM::GPRnopcRegClassID].contains(
+            Memory.BaseRegNum))
+      return false;
+
+    // No offset of any kind.
+    return Memory.OffsetRegNum == 0 && Memory.OffsetImm == nullptr &&
+     (alignOK || Memory.Alignment == Alignment);
+  }
+  bool isMemNoOffsetT2NoSp(bool alignOK = false, unsigned Alignment = 0) const {
+    if (!isGPRMem())
+      return false;
+
+    if (!ARMMCRegisterClasses[ARM::rGPRRegClassID].contains(
+            Memory.BaseRegNum))
       return false;
+
+    // No offset of any kind.
+    return Memory.OffsetRegNum == 0 && Memory.OffsetImm == nullptr &&
+     (alignOK || Memory.Alignment == Alignment);
+  }
+  bool isMemNoOffsetT(bool alignOK = false, unsigned Alignment = 0) const {
+    if (!isGPRMem())
+      return false;
+
+    if (!ARMMCRegisterClasses[ARM::tGPRRegClassID].contains(
+            Memory.BaseRegNum))
+      return false;
+
     // No offset of any kind.
     return Memory.OffsetRegNum == 0 && Memory.OffsetImm == nullptr &&
      (alignOK || Memory.Alignment == Alignment);
   }
   bool isMemPCRelImm12() const {
-    if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+    if (!isGPRMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
       return false;
     // Base register must be PC.
     if (Memory.BaseRegNum != ARM::PC)
@@ -1337,7 +1510,7 @@ public:
   }
 
   bool isAddrMode2() const {
-    if (!isMem() || Memory.Alignment != 0) return false;
+    if (!isGPRMem() || Memory.Alignment != 0) return false;
     // Check for register offset.
     if (Memory.OffsetRegNum) return true;
     // Immediate offset in range [-4095, 4095].
@@ -1362,7 +1535,7 @@ public:
     // and we reject it.
     if (isImm() && !isa<MCConstantExpr>(getImm()))
       return true;
-    if (!isMem() || Memory.Alignment != 0) return false;
+    if (!isGPRMem() || Memory.Alignment != 0) return false;
     // No shifts are legal for AM3.
     if (Memory.ShiftType != ARM_AM::no_shift) return false;
     // Check for register offset.
@@ -1396,7 +1569,7 @@ public:
     // and we reject it.
     if (isImm() && !isa<MCConstantExpr>(getImm()))
       return true;
-    if (!isMem() || Memory.Alignment != 0) return false;
+    if (!isGPRMem() || Memory.Alignment != 0) return false;
     // Check for register offset.
     if (Memory.OffsetRegNum) return false;
     // Immediate offset in range [-1020, 1020] and a multiple of 4.
@@ -1412,7 +1585,7 @@ public:
     // and we reject it.
     if (isImm() && !isa<MCConstantExpr>(getImm()))
       return true;
-    if (!isMem() || Memory.Alignment != 0) return false;
+    if (!isGPRMem() || Memory.Alignment != 0) return false;
     // Check for register offset.
     if (Memory.OffsetRegNum) return false;
     // Immediate offset in range [-510, 510] and a multiple of 2.
@@ -1423,14 +1596,14 @@ public:
   }
 
   bool isMemTBB() const {
-    if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
+    if (!isGPRMem() || !Memory.OffsetRegNum || Memory.isNegative ||
         Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0)
       return false;
     return true;
   }
 
   bool isMemTBH() const {
-    if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
+    if (!isGPRMem() || !Memory.OffsetRegNum || Memory.isNegative ||
         Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm != 1 ||
         Memory.Alignment != 0 )
       return false;
@@ -1438,13 +1611,13 @@ public:
   }
 
   bool isMemRegOffset() const {
-    if (!isMem() || !Memory.OffsetRegNum || Memory.Alignment != 0)
+    if (!isGPRMem() || !Memory.OffsetRegNum || Memory.Alignment != 0)
       return false;
     return true;
   }
 
   bool isT2MemRegOffset() const {
-    if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
+    if (!isGPRMem() || !Memory.OffsetRegNum || Memory.isNegative ||
         Memory.Alignment != 0 || Memory.BaseRegNum == ARM::PC)
       return false;
     // Only lsl #{0, 1, 2, 3} allowed.
@@ -1458,7 +1631,7 @@ public:
   bool isMemThumbRR() const {
     // Thumb reg+reg addressing is simple. Just two registers, a base and
     // an offset. No shifts, negations or any other complicating factors.
-    if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
+    if (!isGPRMem() || !Memory.OffsetRegNum || Memory.isNegative ||
         Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0)
       return false;
     return isARMLowRegister(Memory.BaseRegNum) &&
@@ -1466,7 +1639,7 @@ public:
   }
 
   bool isMemThumbRIs4() const {
-    if (!isMem() || Memory.OffsetRegNum != 0 ||
+    if (!isGPRMem() || Memory.OffsetRegNum != 0 ||
         !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
       return false;
     // Immediate offset, multiple of 4 in range [0, 124].
@@ -1476,7 +1649,7 @@ public:
   }
 
   bool isMemThumbRIs2() const {
-    if (!isMem() || Memory.OffsetRegNum != 0 ||
+    if (!isGPRMem() || Memory.OffsetRegNum != 0 ||
         !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
       return false;
     // Immediate offset, multiple of 4 in range [0, 62].
@@ -1486,7 +1659,7 @@ public:
   }
 
   bool isMemThumbRIs1() const {
-    if (!isMem() || Memory.OffsetRegNum != 0 ||
+    if (!isGPRMem() || Memory.OffsetRegNum != 0 ||
         !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
       return false;
     // Immediate offset in range [0, 31].
@@ -1496,7 +1669,7 @@ public:
   }
 
   bool isMemThumbSPI() const {
-    if (!isMem() || Memory.OffsetRegNum != 0 ||
+    if (!isGPRMem() || Memory.OffsetRegNum != 0 ||
         Memory.BaseRegNum != ARM::SP || Memory.Alignment != 0)
       return false;
     // Immediate offset, multiple of 4 in range [0, 1020].
@@ -1511,7 +1684,7 @@ public:
     // and we reject it.
     if (isImm() && !isa<MCConstantExpr>(getImm()))
       return true;
-    if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+    if (!isGPRMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
       return false;
     // Immediate offset a multiple of 4 in range [-1020, 1020].
     if (!Memory.OffsetImm) return true;
@@ -1520,9 +1693,24 @@ public:
     return (Val >= -1020 && Val <= 1020 && (Val & 3) == 0) ||
            Val == std::numeric_limits<int32_t>::min();
   }
-
+  bool isMemImm7s4Offset() const {
+    // If we have an immediate that's not a constant, treat it as a label
+    // reference needing a fixup. If it is a constant, it's something else
+    // and we reject it.
+    if (isImm() && !isa<MCConstantExpr>(getImm()))
+      return true;
+    if (!isGPRMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0 ||
+        !ARMMCRegisterClasses[ARM::GPRnopcRegClassID].contains(
+            Memory.BaseRegNum))
+      return false;
+    // Immediate offset a multiple of 4 in range [-508, 508].
+    if (!Memory.OffsetImm) return true;
+    int64_t Val = Memory.OffsetImm->getValue();
+    // Special case, #-0 is INT32_MIN.
+    return (Val >= -508 && Val <= 508 && (Val & 3) == 0) || Val == INT32_MIN;
+  }
   bool isMemImm0_1020s4Offset() const {
-    if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+    if (!isGPRMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
       return false;
     // Immediate offset a multiple of 4 in range [0, 1020].
     if (!Memory.OffsetImm) return true;
@@ -1531,7 +1719,7 @@ public:
   }
 
   bool isMemImm8Offset() const {
-    if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+    if (!isGPRMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
       return false;
     // Base reg of PC isn't allowed for these encodings.
     if (Memory.BaseRegNum == ARM::PC) return false;
@@ -1542,8 +1730,81 @@ public:
            (Val > -256 && Val < 256);
   }
 
+  template<unsigned Bits, unsigned RegClassID>
+  bool isMemImm7ShiftedOffset() const {
+    if (!isGPRMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0 ||
+        !ARMMCRegisterClasses[RegClassID].contains(Memory.BaseRegNum))
+      return false;
+
+    // Expect an immediate offset equal to an element of the range
+    // [-127, 127], shifted left by Bits.
+
+    if (!Memory.OffsetImm) return true;
+    int64_t Val = Memory.OffsetImm->getValue();
+
+    // INT32_MIN is a special-case value (indicating the encoding with
+    // zero offset and the subtract bit set)
+    if (Val == INT32_MIN)
+      return true;
+
+    unsigned Divisor = 1U << Bits;
+
+    // Check that the low bits are zero
+    if (Val % Divisor != 0)
+      return false;
+
+    // Check that the remaining offset is within range.
+    Val /= Divisor;
+    return (Val >= -127 && Val <= 127);
+  }
+
+  template <int shift> bool isMemRegRQOffset() const {
+    if (!isMVEMem() || Memory.OffsetImm != 0 || Memory.Alignment != 0)
+      return false;
+
+    if (!ARMMCRegisterClasses[ARM::GPRnopcRegClassID].contains(
+            Memory.BaseRegNum))
+      return false;
+    if (!ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(
+            Memory.OffsetRegNum))
+      return false;
+
+    if (shift == 0 && Memory.ShiftType != ARM_AM::no_shift)
+      return false;
+
+    if (shift > 0 &&
+        (Memory.ShiftType != ARM_AM::uxtw || Memory.ShiftImm != shift))
+      return false;
+
+    return true;
+  }
+
+  template <int shift> bool isMemRegQOffset() const {
+    if (!isMVEMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+      return false;
+
+    if (!ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(
+            Memory.BaseRegNum))
+      return false;
+
+    if(!Memory.OffsetImm) return true;
+    static_assert(shift < 56,
+                  "Such that we dont shift by a value higher than 62");
+    int64_t Val = Memory.OffsetImm->getValue();
+
+    // The value must be a multiple of (1 << shift)
+    if ((Val & ((1U << shift) - 1)) != 0)
+      return false;
+
+    // And be in the right range, depending on the amount that it is shifted
+    // by.  Shift 0, is equal to 7 unsigned bits, the sign bit is set
+    // separately.
+    int64_t Range = (1U << (7+shift)) - 1;
+    return (Val == INT32_MIN) || (Val > -Range && Val < Range);
+  }
+
   bool isMemPosImm8Offset() const {
-    if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+    if (!isGPRMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
       return false;
     // Immediate offset in range [0, 255].
     if (!Memory.OffsetImm) return true;
@@ -1552,7 +1813,7 @@ public:
   }
 
   bool isMemNegImm8Offset() const {
-    if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+    if (!isGPRMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
       return false;
     // Base reg of PC isn't allowed for these encodings.
     if (Memory.BaseRegNum == ARM::PC) return false;
@@ -1564,7 +1825,7 @@ public:
   }
 
   bool isMemUImm12Offset() const {
-    if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+    if (!isGPRMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
       return false;
     // Immediate offset in range [0, 4095].
     if (!Memory.OffsetImm) return true;
@@ -1580,7 +1841,7 @@ public:
     if (isImm() && !isa<MCConstantExpr>(getImm()))
       return true;
 
-    if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+    if (!isGPRMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
       return false;
     // Immediate offset in range [-4095, 4095].
     if (!Memory.OffsetImm) return true;
@@ -1631,6 +1892,12 @@ public:
     return VectorList.Count == 1;
   }
 
+  bool isVecListTwoMQ() const {
+    return isSingleSpacedVectorList() && VectorList.Count == 2 &&
+           ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(
+               VectorList.RegNum);
+  }
+
   bool isVecListDPair() const {
     if (!isSingleSpacedVectorList()) return false;
     return (ARMMCRegisterClasses[ARM::DPairRegClassID]
@@ -1664,6 +1931,12 @@ public:
     return VectorList.Count == 4;
   }
 
+  bool isVecListFourMQ() const {
+    return isSingleSpacedVectorList() && VectorList.Count == 4 &&
+           ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(
+               VectorList.RegNum);
+  }
+
   bool isSingleSpacedVectorAllLanes() const {
     return Kind == k_VectorListAllLanes && !VectorList.isDoubleSpaced;
   }
@@ -1806,23 +2079,24 @@ public:
     return VectorList.Count == 4 && VectorList.LaneIndex <= 1;
   }
 
-  bool isVectorIndex8() const {
-    if (Kind != k_VectorIndex) return false;
-    return VectorIndex.Val < 8;
-  }
+  bool isVectorIndex() const { return Kind == k_VectorIndex; }
 
-  bool isVectorIndex16() const {
+  template <unsigned NumLanes>
+  bool isVectorIndexInRange() const {
     if (Kind != k_VectorIndex) return false;
-    return VectorIndex.Val < 4;
+    return VectorIndex.Val < NumLanes;
   }
 
-  bool isVectorIndex32() const {
-    if (Kind != k_VectorIndex) return false;
-    return VectorIndex.Val < 2;
-  }
-  bool isVectorIndex64() const {
+  bool isVectorIndex8()  const { return isVectorIndexInRange<8>(); }
+  bool isVectorIndex16() const { return isVectorIndexInRange<4>(); }
+  bool isVectorIndex32() const { return isVectorIndexInRange<2>(); }
+  bool isVectorIndex64() const { return isVectorIndexInRange<1>(); }
+
+  template<int PermittedValue, int OtherPermittedValue>
+  bool isMVEPairVectorIndex() const {
     if (Kind != k_VectorIndex) return false;
-    return VectorIndex.Val < 1;
+    return VectorIndex.Val == PermittedValue ||
+           VectorIndex.Val == OtherPermittedValue;
   }
 
   bool isNEONi8splat() const {
@@ -1992,6 +2266,51 @@ public:
     return (Value % Angle == Remainder && Value <= 270);
   }
 
+  bool isMVELongShift() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    // Must be a constant.
+    if (!CE) return false;
+    uint64_t Value = CE->getValue();
+    return Value >= 1 && Value <= 32;
+  }
+
+  bool isITCondCodeNoAL() const {
+    if (!isITCondCode()) return false;
+    ARMCC::CondCodes CC = getCondCode();
+    return CC != ARMCC::AL;
+  }
+
+  bool isITCondCodeRestrictedI() const {
+    if (!isITCondCode())
+      return false;
+    ARMCC::CondCodes CC = getCondCode();
+    return CC == ARMCC::EQ || CC == ARMCC::NE;
+  }
+
+  bool isITCondCodeRestrictedS() const {
+    if (!isITCondCode())
+      return false;
+    ARMCC::CondCodes CC = getCondCode();
+    return CC == ARMCC::LT || CC == ARMCC::GT || CC == ARMCC::LE ||
+           CC == ARMCC::GE;
+  }
+
+  bool isITCondCodeRestrictedU() const {
+    if (!isITCondCode())
+      return false;
+    ARMCC::CondCodes CC = getCondCode();
+    return CC == ARMCC::HS || CC == ARMCC::HI;
+  }
+
+  bool isITCondCodeRestrictedFP() const {
+    if (!isITCondCode())
+      return false;
+    ARMCC::CondCodes CC = getCondCode();
+    return CC == ARMCC::EQ || CC == ARMCC::NE || CC == ARMCC::LT ||
+           CC == ARMCC::GT || CC == ARMCC::LE || CC == ARMCC::GE;
+  }
+
   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
     // Add as immediates when possible.  Null MCExpr = 0.
     if (!Expr)
@@ -2019,6 +2338,30 @@ public:
     Inst.addOperand(MCOperand::createReg(RegNum));
   }
 
+  void addVPTPredNOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createImm(unsigned(getVPTPred())));
+    unsigned RegNum = getVPTPred() == ARMVCC::None ? 0: ARM::P0;
+    Inst.addOperand(MCOperand::createReg(RegNum));
+  }
+
+  void addVPTPredROperands(MCInst &Inst, unsigned N) const {
+    assert(N == 3 && "Invalid number of operands!");
+    addVPTPredNOperands(Inst, N-1);
+    unsigned RegNum;
+    if (getVPTPred() == ARMVCC::None) {
+      RegNum = 0;
+    } else {
+      unsigned NextOpIndex = Inst.getNumOperands();
+      const MCInstrDesc &MCID = ARMInsts[Inst.getOpcode()];
+      int TiedOp = MCID.getOperandConstraint(NextOpIndex, MCOI::TIED_TO);
+      assert(TiedOp >= 0 &&
+             "Inactive register in vpred_r is not tied to an output!");
+      RegNum = Inst.getOperand(TiedOp).getReg();
+    }
+    Inst.addOperand(MCOperand::createReg(RegNum));
+  }
+
   void addCoprocNumOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::createImm(getCoproc()));
@@ -2044,6 +2387,11 @@ public:
     Inst.addOperand(MCOperand::createImm(unsigned(getCondCode())));
   }
 
+  void addITCondCodeInvOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createImm(unsigned(ARMCC::getOppositeCondition(getCondCode()))));
+  }
+
   void addCCOutOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::createReg(getReg()));
@@ -2089,6 +2437,14 @@ public:
       Inst.addOperand(MCOperand::createReg(*I));
   }
 
+  void addRegListWithAPSROperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const SmallVectorImpl<unsigned> &RegList = getRegList();
+    for (SmallVectorImpl<unsigned>::const_iterator
+           I = RegList.begin(), E = RegList.end(); I != E; ++I)
+      Inst.addOperand(MCOperand::createReg(*I));
+  }
+
   void addDPRRegListOperands(MCInst &Inst, unsigned N) const {
     addRegListOperands(Inst, N);
   }
@@ -2097,6 +2453,14 @@ public:
     addRegListOperands(Inst, N);
   }
 
+  void addFPSRegListWithVPROperands(MCInst &Inst, unsigned N) const {
+    addRegListOperands(Inst, N);
+  }
+
+  void addFPDRegListWithVPROperands(MCInst &Inst, unsigned N) const {
+    addRegListOperands(Inst, N);
+  }
+
   void addRotImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     // Encoded as val>>3. The printer handles display as 8, 16, 24.
@@ -2184,6 +2548,42 @@ public:
     Inst.addOperand(MCOperand::createImm(CE->getValue()));
   }
 
+  void addImm7s4Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    // FIXME: We really want to scale the value here, but the VSTR/VLDR_VSYSR
+    // instruction don't encode operands that way yet.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::createImm(CE->getValue()));
+  }
+
+  void addImm7Shift0Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    assert(CE != nullptr && "Invalid operand type!");
+    Inst.addOperand(MCOperand::createImm(CE->getValue()));
+  }
+
+  void addImm7Shift1Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    assert(CE != nullptr && "Invalid operand type!");
+    Inst.addOperand(MCOperand::createImm(CE->getValue()));
+  }
+
+  void addImm7Shift2Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    assert(CE != nullptr && "Invalid operand type!");
+    Inst.addOperand(MCOperand::createImm(CE->getValue()));
+  }
+
+  void addImm7Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    assert(CE != nullptr && "Invalid operand type!");
+    Inst.addOperand(MCOperand::createImm(CE->getValue()));
+  }
+
   void addImm0_1020s4Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     // The immediate is scaled by four in the encoding and is stored
@@ -2293,7 +2693,7 @@ public:
       return;
     }
 
-    assert(isMem()  && "Unknown value type!");
+    assert(isGPRMem()  && "Unknown value type!");
     assert(isa<MCConstantExpr>(Memory.OffsetImm) && "Unknown value type!");
     Inst.addOperand(MCOperand::createImm(Memory.OffsetImm->getValue()));
   }
@@ -2318,6 +2718,21 @@ public:
     Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
   }
 
+  void addMemNoOffsetT2Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+  }
+
+  void addMemNoOffsetT2NoSpOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+  }
+
+  void addMemNoOffsetTOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+  }
+
   void addMemPCRelImm12Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     int32_t Imm = Memory.OffsetImm->getValue();
@@ -2535,6 +2950,22 @@ public:
     Inst.addOperand(MCOperand::createImm(Val));
   }
 
+  void addMemImm7s4OffsetOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+    // If we have an immediate that's not a constant, treat it as a label
+    // reference needing a fixup. If it is a constant, it's something else
+    // and we reject it.
+    if (isImm()) {
+      Inst.addOperand(MCOperand::createExpr(getImm()));
+      Inst.addOperand(MCOperand::createImm(0));
+      return;
+    }
+
+    int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createImm(Val));
+  }
+
   void addMemImm0_1020s4OffsetOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
     // The lower two bits are always zero and as such are not encoded.
@@ -2543,19 +2974,17 @@ public:
     Inst.addOperand(MCOperand::createImm(Val));
   }
 
-  void addMemImm8OffsetOperands(MCInst &Inst, unsigned N) const {
+  void addMemImmOffsetOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
     int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
     Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
     Inst.addOperand(MCOperand::createImm(Val));
   }
 
-  void addMemPosImm8OffsetOperands(MCInst &Inst, unsigned N) const {
-    addMemImm8OffsetOperands(Inst, N);
-  }
-
-  void addMemNegImm8OffsetOperands(MCInst &Inst, unsigned N) const {
-    addMemImm8OffsetOperands(Inst, N);
+  void addMemRegRQOffsetOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+    Inst.addOperand(MCOperand::createReg(Memory.OffsetRegNum));
   }
 
   void addMemUImm12OffsetOperands(MCInst &Inst, unsigned N) const {
@@ -2699,6 +3128,12 @@ public:
     Inst.addOperand(MCOperand::createImm(Imm));
   }
 
+  void addPowerTwoOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::createImm(CE->getValue()));
+  }
+
   void addMSRMaskOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::createImm(unsigned(getMSRMask())));
@@ -2719,6 +3154,37 @@ public:
     Inst.addOperand(MCOperand::createReg(VectorList.RegNum));
   }
 
+  void addMVEVecListOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    // When we come here, the VectorList field will identify a range
+    // of q-registers by its base register and length, and it will
+    // have already been error-checked to be the expected length of
+    // range and contain only q-regs in the range q0-q7. So we can
+    // count on the base register being in the range q0-q6 (for 2
+    // regs) or q0-q4 (for 4)
+    //
+    // The MVE instructions taking a register range of this kind will
+    // need an operand in the QQPR or QQQQPR class, representing the
+    // entire range as a unit. So we must translate into that class,
+    // by finding the index of the base register in the MQPR reg
+    // class, and returning the super-register at the corresponding
+    // index in the target class.
+
+    const MCRegisterClass *RC_in = &ARMMCRegisterClasses[ARM::MQPRRegClassID];
+    const MCRegisterClass *RC_out = (VectorList.Count == 2) ?
+      &ARMMCRegisterClasses[ARM::QQPRRegClassID] :
+      &ARMMCRegisterClasses[ARM::QQQQPRRegClassID];
+
+    unsigned I, E = RC_out->getNumRegs();
+    for (I = 0; I < E; I++)
+      if (RC_in->getRegister(I) == VectorList.RegNum)
+        break;
+    assert(I < E && "Invalid vector list start register!");
+
+    Inst.addOperand(MCOperand::createReg(RC_out->getRegister(I)));
+  }
+
   void addVecListIndexedOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::createReg(VectorList.RegNum));
@@ -2745,6 +3211,16 @@ public:
     Inst.addOperand(MCOperand::createImm(getVectorIndex()));
   }
 
+  void addMVEVectorIndexOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createImm(getVectorIndex()));
+  }
+
+  void addMVEPairVectorIndexOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createImm(getVectorIndex()));
+  }
+
   void addNEONi8splatOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     // The immediate encodes the type of constant as well as the value.
@@ -2913,6 +3389,15 @@ public:
     return Op;
   }
 
+  static std::unique_ptr<ARMOperand> CreateVPTPred(ARMVCC::VPTCodes CC,
+                                                   SMLoc S) {
+    auto Op = make_unique<ARMOperand>(k_VPTPred);
+    Op->VCC.Val = CC;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
   static std::unique_ptr<ARMOperand> CreateCoprocNum(unsigned CopVal, SMLoc S) {
     auto Op = make_unique<ARMOperand>(k_CoprocNum);
     Op->Cop.Val = CopVal;
@@ -3044,19 +3529,31 @@ public:
     assert(Regs.size() > 0 && "RegList contains no registers?");
     KindTy Kind = k_RegisterList;
 
-    if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Regs.front().second))
-      Kind = k_DPRRegisterList;
-    else if (ARMMCRegisterClasses[ARM::SPRRegClassID].
-             contains(Regs.front().second))
-      Kind = k_SPRRegisterList;
+    if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(
+            Regs.front().second)) {
+      if (Regs.back().second == ARM::VPR)
+        Kind = k_FPDRegisterListWithVPR;
+      else
+        Kind = k_DPRRegisterList;
+    } else if (ARMMCRegisterClasses[ARM::SPRRegClassID].contains(
+                   Regs.front().second)) {
+      if (Regs.back().second == ARM::VPR)
+        Kind = k_FPSRegisterListWithVPR;
+      else
+        Kind = k_SPRRegisterList;
+    }
 
     // Sort based on the register encoding values.
     array_pod_sort(Regs.begin(), Regs.end());
 
+    if (Kind == k_RegisterList && Regs.back().second == ARM::APSR)
+      Kind = k_RegisterListWithAPSR;
+
     auto Op = make_unique<ARMOperand>(Kind);
     for (SmallVectorImpl<std::pair<unsigned, unsigned>>::const_iterator
            I = Regs.begin(), E = Regs.end(); I != E; ++I)
       Op->Registers.push_back(I->second);
+
     Op->StartLoc = StartLoc;
     Op->EndLoc = EndLoc;
     return Op;
@@ -3217,15 +3714,18 @@ void ARMOperand::print(raw_ostream &OS) const {
   case k_CondCode:
     OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
     break;
+  case k_VPTPred:
+    OS << "<ARMVCC::" << ARMVPTPredToString(getVPTPred()) << ">";
+    break;
   case k_CCOut:
     OS << "<ccout " << RegName(getReg()) << ">";
     break;
   case k_ITCondMask: {
     static const char *const MaskStr[] = {
-      "(invalid)", "(teee)", "(tee)", "(teet)",
-      "(te)",      "(tete)", "(tet)", "(tett)",
-      "(t)",       "(ttee)", "(tte)", "(ttet)",
-      "(tt)",      "(ttte)", "(ttt)", "(tttt)"
+      "(invalid)", "(tttt)", "(ttt)", "(ttte)",
+      "(tt)",      "(ttet)", "(tte)", "(ttee)",
+      "(t)",       "(tett)", "(tet)", "(tete)",
+      "(te)",      "(teet)", "(tee)", "(teee)",
     };
     assert((ITMask.Mask & 0xf) == ITMask.Mask);
     OS << "<it-mask " << MaskStr[ITMask.Mask] << ">";
@@ -3324,8 +3824,11 @@ void ARMOperand::print(raw_ostream &OS) const {
        << ", width: " << Bitfield.Width << ">";
     break;
   case k_RegisterList:
+  case k_RegisterListWithAPSR:
   case k_DPRRegisterList:
-  case k_SPRRegisterList: {
+  case k_SPRRegisterList:
+  case k_FPSRegisterListWithVPR:
+  case k_FPDRegisterListWithVPR: {
     OS << "<register_list ";
 
     const SmallVectorImpl<unsigned> &RegList = getRegList();
@@ -3423,7 +3926,7 @@ int ARMAsmParser::tryParseRegister() {
   }
 
   // Some FPUs only have 16 D registers, so D16-D31 are invalid
-  if (hasD16() && RegNum >= ARM::D16 && RegNum <= ARM::D31)
+  if (!hasD32() && RegNum >= ARM::D16 && RegNum <= ARM::D31)
     return -1;
 
   Parser.Lex(); // Eat identifier token.
@@ -3662,11 +4165,10 @@ ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
   if (Tok.isNot(AsmToken::Identifier))
     return MatchOperand_NoMatch;
 
-  int Num = MatchCoprocessorOperandName(Tok.getString(), 'p');
+  int Num = MatchCoprocessorOperandName(Tok.getString().lower(), 'p');
   if (Num == -1)
     return MatchOperand_NoMatch;
-  // ARMv7 and v8 don't allow cp10/cp11 due to VFP/NEON specific instructions
-  if ((hasV7Ops() || hasV8Ops()) && (Num == 10 || Num == 11))
+  if (!isValidCoprocessorNumber(Num, getSTI().getFeatureBits()))
     return MatchOperand_NoMatch;
 
   Parser.Lex(); // Eat identifier token.
@@ -3685,7 +4187,7 @@ ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) {
   if (Tok.isNot(AsmToken::Identifier))
     return MatchOperand_NoMatch;
 
-  int Reg = MatchCoprocessorOperandName(Tok.getString(), 'c');
+  int Reg = MatchCoprocessorOperandName(Tok.getString().lower(), 'c');
   if (Reg == -1)
     return MatchOperand_NoMatch;
 
@@ -3752,7 +4254,8 @@ static unsigned getNextRegister(unsigned Reg) {
 }
 
 /// Parse a register list.
-bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
+bool ARMAsmParser::parseRegisterList(OperandVector &Operands,
+                                     bool EnforceOrder) {
   MCAsmParser &Parser = getParser();
   if (Parser.getTok().isNot(AsmToken::LCurly))
     return TokError("Token is not a Left Curly Brace");
@@ -3785,6 +4288,8 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
     RC = &ARMMCRegisterClasses[ARM::DPRRegClassID];
   else if (ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg))
     RC = &ARMMCRegisterClasses[ARM::SPRRegClassID];
+  else if (ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID].contains(Reg))
+    RC = &ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID];
   else
     return Error(RegLoc, "invalid register in register list");
 
@@ -3838,14 +4343,32 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
       Reg = getDRegFromQReg(Reg);
       isQReg = true;
     }
+    if (!RC->contains(Reg) &&
+        RC->getID() == ARMMCRegisterClasses[ARM::GPRRegClassID].getID() &&
+        ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID].contains(Reg)) {
+      // switch the register classes, as GPRwithAPSRnospRegClassID is a partial
+      // subset of GPRRegClassId except it contains APSR as well.
+      RC = &ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID];
+    }
+    if (Reg == ARM::VPR && (RC == &ARMMCRegisterClasses[ARM::SPRRegClassID] ||
+                            RC == &ARMMCRegisterClasses[ARM::DPRRegClassID])) {
+      RC = &ARMMCRegisterClasses[ARM::FPWithVPRRegClassID];
+      EReg = MRI->getEncodingValue(Reg);
+      Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg));
+      continue;
+    }
     // The register must be in the same register class as the first.
     if (!RC->contains(Reg))
       return Error(RegLoc, "invalid register in register list");
-    // List must be monotonically increasing.
-    if (MRI->getEncodingValue(Reg) < MRI->getEncodingValue(OldReg)) {
+    // In most cases, the list must be monotonically increasing. An
+    // exception is CLRM, which is order-independent anyway, so
+    // there's no potential for confusion if you write clrm {r2,r1}
+    // instead of clrm {r1,r2}.
+    if (EnforceOrder &&
+        MRI->getEncodingValue(Reg) < MRI->getEncodingValue(OldReg)) {
       if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
         Warning(RegLoc, "register list not in ascending order");
-      else
+      else if (!ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID].contains(Reg))
         return Error(RegLoc, "register list not in ascending order");
     }
     if (MRI->getEncodingValue(Reg) == MRI->getEncodingValue(OldReg)) {
@@ -3855,6 +4378,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
     }
     // VFP register lists must also be contiguous.
     if (RC != &ARMMCRegisterClasses[ARM::GPRRegClassID] &&
+        RC != &ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID] &&
         Reg != OldReg + 1)
       return Error(RegLoc, "non-contiguous register range");
     EReg = MRI->getEncodingValue(Reg);
@@ -3944,7 +4468,7 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
   // As an extension (to match gas), support a plain D register or Q register
   // (without encosing curly braces) as a single or double entry list,
   // respectively.
-  if (Parser.getTok().is(AsmToken::Identifier)) {
+  if (!hasMVE() && Parser.getTok().is(AsmToken::Identifier)) {
     SMLoc E = Parser.getTok().getEndLoc();
     int Reg = tryParseRegister();
     if (Reg == -1)
@@ -4012,9 +4536,14 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
   unsigned Count = 1;
   int Spacing = 0;
   unsigned FirstReg = Reg;
+
+  if (hasMVE() && !ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(Reg)) {
+      Error(Parser.getTok().getLoc(), "vector register in range Q0-Q7 expected");
+      return MatchOperand_ParseFail;
+  }
   // The list is of D registers, but we also allow Q regs and just interpret
   // them as the two D sub-registers.
-  if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+  else if (!hasMVE() && ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
     FirstReg = Reg = getDRegFromQReg(Reg);
     Spacing = 1; // double-spacing requires explicit D registers, otherwise
                  // it's ambiguous with four-register single spaced.
@@ -4044,14 +4573,17 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
         return MatchOperand_ParseFail;
       }
       // Allow Q regs and just interpret them as the two D sub-registers.
-      if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
+      if (!hasMVE() && ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
         EndReg = getDRegFromQReg(EndReg) + 1;
       // If the register is the same as the start reg, there's nothing
       // more to do.
       if (Reg == EndReg)
         continue;
       // The register must be in the same register class as the first.
-      if (!ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg)) {
+      if ((hasMVE() &&
+           !ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(EndReg)) ||
+          (!hasMVE() &&
+           !ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg))) {
         Error(AfterMinusLoc, "invalid register in register list");
         return MatchOperand_ParseFail;
       }
@@ -4084,13 +4616,21 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
       Error(RegLoc, "register expected");
       return MatchOperand_ParseFail;
     }
+
+    if (hasMVE()) {
+      if (!ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(Reg)) {
+        Error(RegLoc, "vector register in range Q0-Q7 expected");
+        return MatchOperand_ParseFail;
+      }
+      Spacing = 1;
+    }
     // vector register lists must be contiguous.
     // It's OK to use the enumeration values directly here rather, as the
     // VFP register classes have the enum sorted properly.
     //
     // The list is of D registers, but we also allow Q regs and just interpret
     // them as the two D sub-registers.
-    if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+    else if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
       if (!Spacing)
         Spacing = 1; // Register range implies a single spaced list.
       else if (Spacing == 2) {
@@ -4151,30 +4691,20 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
 
   switch (LaneKind) {
   case NoLanes:
+  case AllLanes: {
     // Two-register operands have been converted to the
     // composite register classes.
-    if (Count == 2) {
-      const MCRegisterClass *RC = (Spacing == 1) ?
-        &ARMMCRegisterClasses[ARM::DPairRegClassID] :
-        &ARMMCRegisterClasses[ARM::DPairSpcRegClassID];
-      FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC);
-    }
-    Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count,
-                                                    (Spacing == 2), S, E));
-    break;
-  case AllLanes:
-    // Two-register operands have been converted to the
-    // composite register classes.
-    if (Count == 2) {
+    if (Count == 2 && !hasMVE()) {
       const MCRegisterClass *RC = (Spacing == 1) ?
         &ARMMCRegisterClasses[ARM::DPairRegClassID] :
         &ARMMCRegisterClasses[ARM::DPairSpcRegClassID];
       FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC);
     }
-    Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count,
-                                                            (Spacing == 2),
-                                                            S, E));
+    auto Create = (LaneKind == NoLanes ? ARMOperand::CreateVectorList :
+                   ARMOperand::CreateVectorListAllLanes);
+    Operands.push_back(Create(FirstReg, Count, (Spacing == 2), S, E));
     break;
+  }
   case IndexedLane:
     Operands.push_back(ARMOperand::CreateVectorListIndexed(FirstReg, Count,
                                                            LaneIndex,
@@ -5061,6 +5591,21 @@ void ARMAsmParser::cvtThumbBranches(MCInst &Inst,
   ((ARMOperand &)*Operands[CondOp]).addCondCodeOperands(Inst, 2);
 }
 
+void ARMAsmParser::cvtMVEVMOVQtoDReg(
+  MCInst &Inst, const OperandVector &Operands) {
+
+  // mnemonic, condition code, Rt, Rt2, Qd, idx, Qd again, idx2
+  assert(Operands.size() == 8);
+
+  ((ARMOperand &)*Operands[2]).addRegOperands(Inst, 1); // Rt
+  ((ARMOperand &)*Operands[3]).addRegOperands(Inst, 1); // Rt2
+  ((ARMOperand &)*Operands[4]).addRegOperands(Inst, 1); // Qd
+  ((ARMOperand &)*Operands[5]).addMVEPairVectorIndexOperands(Inst, 1); // idx
+  // skip second copy of Qd in Operands[6]
+  ((ARMOperand &)*Operands[7]).addMVEPairVectorIndexOperands(Inst, 1); // idx2
+  ((ARMOperand &)*Operands[1]).addCondCodeOperands(Inst, 2); // condition code
+}
+
 /// Parse an ARM memory expression, return false if successful else return true
 /// or an error.  The first token must be a '[' when called.
 bool ARMAsmParser::parseMemory(OperandVector &Operands) {
@@ -5275,6 +5820,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
     St = ARM_AM::ror;
   else if (ShiftName == "rrx" || ShiftName == "RRX")
     St = ARM_AM::rrx;
+  else if (ShiftName == "uxtw" || ShiftName == "UXTW")
+    St = ARM_AM::uxtw;
   else
     return Error(Loc, "illegal shift operator");
   Parser.Lex(); // Eat shift type token.
@@ -5463,7 +6010,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
   case AsmToken::LBrac:
     return parseMemory(Operands);
   case AsmToken::LCurly:
-    return parseRegisterList(Operands);
+    return parseRegisterList(Operands, !Mnemonic.startswith("clr"));
   case AsmToken::Dollar:
   case AsmToken::Hash:
     // #42 -> immediate.
@@ -5595,6 +6142,9 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
   case MCObjectFileInfo::IsWasm:
     CurrentFormat = WASM;
     break;
+  case MCObjectFileInfo::IsXCOFF:
+    llvm_unreachable("unexpected object format");
+    break;
   }
 
   if (~Prefix->SupportedFormats & CurrentFormat) {
@@ -5621,11 +6171,14 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
 // FIXME: Would be nice to autogen this.
 // FIXME: This is a bit of a maze of special cases.
 StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
+                                      StringRef ExtraToken,
                                       unsigned &PredicationCode,
+                                      unsigned &VPTPredicationCode,
                                       bool &CarrySetting,
                                       unsigned &ProcessorIMod,
                                       StringRef &ITMask) {
   PredicationCode = ARMCC::AL;
+  VPTPredicationCode = ARMVCC::None;
   CarrySetting = false;
   ProcessorIMod = 0;
 
@@ -5649,7 +6202,12 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
       Mnemonic == "bxns"  || Mnemonic == "blxns" ||
       Mnemonic == "vudot" || Mnemonic == "vsdot" ||
       Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
-      Mnemonic == "vfmal" || Mnemonic == "vfmsl")
+      Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
+      Mnemonic == "wls" || Mnemonic == "le" || Mnemonic == "dls" ||
+      Mnemonic == "csel" || Mnemonic == "csinc" ||
+      Mnemonic == "csinv" || Mnemonic == "csneg" || Mnemonic == "cinc" ||
+      Mnemonic == "cinv" || Mnemonic == "cneg" || Mnemonic == "cset" ||
+      Mnemonic == "csetm")
     return Mnemonic;
 
   // First, split out any predication code. Ignore mnemonics we know aren't
@@ -5657,7 +6215,18 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
   if (Mnemonic != "adcs" && Mnemonic != "bics" && Mnemonic != "movs" &&
       Mnemonic != "muls" && Mnemonic != "smlals" && Mnemonic != "smulls" &&
       Mnemonic != "umlals" && Mnemonic != "umulls" && Mnemonic != "lsls" &&
-      Mnemonic != "sbcs" && Mnemonic != "rscs") {
+      Mnemonic != "sbcs" && Mnemonic != "rscs" &&
+      !(hasMVE() &&
+        (Mnemonic == "vmine" ||
+         Mnemonic == "vshle" || Mnemonic == "vshlt" || Mnemonic == "vshllt" ||
+         Mnemonic == "vrshle" || Mnemonic == "vrshlt" ||
+         Mnemonic == "vmvne" || Mnemonic == "vorne" ||
+         Mnemonic == "vnege" || Mnemonic == "vnegt" ||
+         Mnemonic == "vmule" || Mnemonic == "vmult" ||
+         Mnemonic == "vrintne" ||
+         Mnemonic == "vcmult" || Mnemonic == "vcmule" ||
+         Mnemonic == "vpsele" || Mnemonic == "vpselt" ||
+         Mnemonic.startswith("vq")))) {
     unsigned CC = ARMCondCodeFromString(Mnemonic.substr(Mnemonic.size()-2));
     if (CC != ~0U) {
       Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2);
@@ -5677,7 +6246,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
         Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" ||
         Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" ||
         Mnemonic == "vfms" || Mnemonic == "vfnms" || Mnemonic == "fconsts" ||
-        Mnemonic == "bxns" || Mnemonic == "blxns" ||
+        Mnemonic == "bxns" || Mnemonic == "blxns" || Mnemonic == "vfmas" ||
+        Mnemonic == "vmlas" ||
         (Mnemonic == "movs" && isThumb()))) {
     Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
     CarrySetting = true;
@@ -5698,12 +6268,36 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
     }
   }
 
+  if (isMnemonicVPTPredicable(Mnemonic, ExtraToken) && Mnemonic != "vmovlt" &&
+      Mnemonic != "vshllt" && Mnemonic != "vrshrnt" && Mnemonic != "vshrnt" &&
+      Mnemonic != "vqrshrunt" && Mnemonic != "vqshrunt" &&
+      Mnemonic != "vqrshrnt" && Mnemonic != "vqshrnt" && Mnemonic != "vmullt" &&
+      Mnemonic != "vqmovnt" && Mnemonic != "vqmovunt" &&
+      Mnemonic != "vqmovnt" && Mnemonic != "vmovnt" && Mnemonic != "vqdmullt" &&
+      Mnemonic != "vpnot" && Mnemonic != "vcvtt" && Mnemonic != "vcvt") {
+    unsigned CC = ARMVectorCondCodeFromString(Mnemonic.substr(Mnemonic.size()-1));
+    if (CC != ~0U) {
+      Mnemonic = Mnemonic.slice(0, Mnemonic.size()-1);
+      VPTPredicationCode = CC;
+    }
+    return Mnemonic;
+  }
+
   // The "it" instruction has the condition mask on the end of the mnemonic.
   if (Mnemonic.startswith("it")) {
     ITMask = Mnemonic.slice(2, Mnemonic.size());
     Mnemonic = Mnemonic.slice(0, 2);
   }
 
+  if (Mnemonic.startswith("vpst")) {
+    ITMask = Mnemonic.slice(4, Mnemonic.size());
+    Mnemonic = Mnemonic.slice(0, 4);
+  }
+  else if (Mnemonic.startswith("vpt")) {
+    ITMask = Mnemonic.slice(3, Mnemonic.size());
+    Mnemonic = Mnemonic.slice(0, 3);
+  }
+
   return Mnemonic;
 }
 
@@ -5711,9 +6305,14 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
 /// inclusion of carry set or predication code operands.
 //
 // FIXME: It would be nice to autogen this.
-void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
+void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic,
+                                         StringRef ExtraToken,
+                                         StringRef FullInst,
                                          bool &CanAcceptCarrySet,
-                                         bool &CanAcceptPredicationCode) {
+                                         bool &CanAcceptPredicationCode,
+                                         bool &CanAcceptVPTPredicationCode) {
+  CanAcceptVPTPredicationCode = isMnemonicVPTPredicable(Mnemonic, ExtraToken);
+
   CanAcceptCarrySet =
       Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
       Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
@@ -5742,7 +6341,18 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
       Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
       Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
       Mnemonic == "sb"    || Mnemonic == "ssbb"  ||
-      Mnemonic == "pssbb") {
+      Mnemonic == "pssbb" ||
+      Mnemonic == "bfcsel" || Mnemonic == "wls" ||
+      Mnemonic == "dls" || Mnemonic == "le" || Mnemonic == "csel" ||
+      Mnemonic == "csinc" || Mnemonic == "csinv" || Mnemonic == "csneg" ||
+      Mnemonic == "cinc" || Mnemonic == "cinv" || Mnemonic == "cneg" ||
+      Mnemonic == "cset" || Mnemonic == "csetm" ||
+      Mnemonic.startswith("vpt") || Mnemonic.startswith("vpst") ||
+      (hasMVE() &&
+       (Mnemonic.startswith("vst2") || Mnemonic.startswith("vld2") ||
+        Mnemonic.startswith("vst4") || Mnemonic.startswith("vld4") ||
+        Mnemonic.startswith("wlstp") || Mnemonic.startswith("dlstp") ||
+        Mnemonic.startswith("letp")))) {
     // These mnemonics are never predicable
     CanAcceptPredicationCode = false;
   } else if (!isThumb()) {
@@ -5976,7 +6586,8 @@ bool ARMAsmParser::shouldOmitPredicateOperand(StringRef Mnemonic,
                                               OperandVector &Operands) {
   // VRINT{Z, X} have a predicate operand in VFP, but not in NEON
   unsigned RegIdx = 3;
-  if ((Mnemonic == "vrintz" || Mnemonic == "vrintx") &&
+  if ((((Mnemonic == "vrintz" || Mnemonic == "vrintx") && !hasMVE()) ||
+      Mnemonic == "vrintr") &&
       (static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32" ||
        static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f16")) {
     if (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
@@ -5994,6 +6605,47 @@ bool ARMAsmParser::shouldOmitPredicateOperand(StringRef Mnemonic,
   return false;
 }
 
+bool ARMAsmParser::shouldOmitVectorPredicateOperand(StringRef Mnemonic,
+                                                    OperandVector &Operands) {
+  if (!hasMVE() || Operands.size() < 3)
+    return true;
+
+  if (Mnemonic.startswith("vld2") || Mnemonic.startswith("vld4") ||
+      Mnemonic.startswith("vst2") || Mnemonic.startswith("vst4"))
+    return true;
+
+  if (Mnemonic.startswith("vctp") || Mnemonic.startswith("vpnot"))
+    return false;
+
+  if (Mnemonic.startswith("vmov") &&
+      !(Mnemonic.startswith("vmovl") || Mnemonic.startswith("vmovn") ||
+        Mnemonic.startswith("vmovx"))) {
+    for (auto &Operand : Operands) {
+      if (static_cast<ARMOperand &>(*Operand).isVectorIndex() ||
+          ((*Operand).isReg() &&
+           (ARMMCRegisterClasses[ARM::SPRRegClassID].contains(
+             (*Operand).getReg()) ||
+            ARMMCRegisterClasses[ARM::DPRRegClassID].contains(
+              (*Operand).getReg())))) {
+        return true;
+      }
+    }
+    return false;
+  } else {
+    for (auto &Operand : Operands) {
+      // We check the larger class QPR instead of just the legal class
+      // MQPR, to more accurately report errors when using Q registers
+      // outside of the allowed range.
+      if (static_cast<ARMOperand &>(*Operand).isVectorIndex() ||
+          (Operand->isReg() &&
+           (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(
+             Operand->getReg()))))
+        return false;
+    }
+    return true;
+  }
+}
+
 static bool isDataTypeToken(StringRef Tok) {
   return Tok == ".8" || Tok == ".16" || Tok == ".32" || Tok == ".64" ||
     Tok == ".i8" || Tok == ".i16" || Tok == ".i32" || Tok == ".i64" ||
@@ -6010,7 +6662,8 @@ static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
   return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
 }
 
-static void applyMnemonicAliases(StringRef &Mnemonic, uint64_t Features,
+static void applyMnemonicAliases(StringRef &Mnemonic,
+                                 const FeatureBitset &Features,
                                  unsigned VariantID);
 
 // The GNU assembler has aliases of ldrd and strd with the second register
@@ -6033,7 +6686,7 @@ void ARMAsmParser::fixupGNULDRDAlias(StringRef Mnemonic,
 
   if (!Op2.isReg())
     return;
-  if (!Op3.isMem())
+  if (!Op3.isGPRMem())
     return;
 
   const MCRegisterClass &GPR = MRI->getRegClass(ARM::GPRRegClassID);
@@ -6068,7 +6721,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   // The generic tblgen'erated code does this later, at the start of
   // MatchInstructionImpl(), but that's too late for aliases that include
   // any sort of suffix.
-  uint64_t AvailableFeatures = getAvailableFeatures();
+  const FeatureBitset &AvailableFeatures = getAvailableFeatures();
   unsigned AssemblerDialect = getParser().getAssemblerDialect();
   applyMnemonicAliases(Name, AvailableFeatures, AssemblerDialect);
 
@@ -6084,14 +6737,16 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   // Create the leading tokens for the mnemonic, split by '.' characters.
   size_t Start = 0, Next = Name.find('.');
   StringRef Mnemonic = Name.slice(Start, Next);
+  StringRef ExtraToken = Name.slice(Next, Name.find(' ', Next + 1));
 
   // Split out the predication code and carry setting flag from the mnemonic.
   unsigned PredicationCode;
+  unsigned VPTPredicationCode;
   unsigned ProcessorIMod;
   bool CarrySetting;
   StringRef ITMask;
-  Mnemonic = splitMnemonic(Mnemonic, PredicationCode, CarrySetting,
-                           ProcessorIMod, ITMask);
+  Mnemonic = splitMnemonic(Mnemonic, ExtraToken, PredicationCode, VPTPredicationCode,
+                           CarrySetting, ProcessorIMod, ITMask);
 
   // In Thumb1, only the branch (B) instruction can be predicated.
   if (isThumbOne() && PredicationCode != ARMCC::AL && Mnemonic != "b") {
@@ -6100,15 +6755,24 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
 
   Operands.push_back(ARMOperand::CreateToken(Mnemonic, NameLoc));
 
-  // Handle the IT instruction ITMask. Convert it to a bitmask. This
-  // is the mask as it will be for the IT encoding if the conditional
-  // encoding has a '1' as it's bit0 (i.e. 't' ==> '1'). In the case
-  // where the conditional bit0 is zero, the instruction post-processing
-  // will adjust the mask accordingly.
-  if (Mnemonic == "it") {
-    SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + 2);
+  // Handle the mask for IT and VPT instructions. In ARMOperand and
+  // MCOperand, this is stored in a format independent of the
+  // condition code: the lowest set bit indicates the end of the
+  // encoding, and above that, a 1 bit indicates 'else', and an 0
+  // indicates 'then'. E.g.
+  //    IT    -> 1000
+  //    ITx   -> x100    (ITT -> 0100, ITE -> 1100)
+  //    ITxy  -> xy10    (e.g. ITET -> 1010)
+  //    ITxyz -> xyz1    (e.g. ITEET -> 1101)
+  if (Mnemonic == "it" || Mnemonic.startswith("vpt") ||
+      Mnemonic.startswith("vpst")) {
+    SMLoc Loc = Mnemonic == "it"  ? SMLoc::getFromPointer(NameLoc.getPointer() + 2) :
+                Mnemonic == "vpt" ? SMLoc::getFromPointer(NameLoc.getPointer() + 3) :
+                                    SMLoc::getFromPointer(NameLoc.getPointer() + 4);
     if (ITMask.size() > 3) {
-      return Error(Loc, "too many conditions on IT instruction");
+      if (Mnemonic == "it")
+        return Error(Loc, "too many conditions on IT instruction");
+      return Error(Loc, "too many conditions on VPT instruction");
     }
     unsigned Mask = 8;
     for (unsigned i = ITMask.size(); i != 0; --i) {
@@ -6117,7 +6781,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
         return Error(Loc, "illegal IT block condition mask '" + ITMask + "'");
       }
       Mask >>= 1;
-      if (ITMask[i - 1] == 't')
+      if (ITMask[i - 1] == 'e')
         Mask |= 8;
     }
     Operands.push_back(ARMOperand::CreateITMask(Mask, Loc));
@@ -6133,8 +6797,9 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   // ConditionCode operands to match the mnemonic "as written" and then we let
   // the matcher deal with finding the right instruction or generating an
   // appropriate error.
-  bool CanAcceptCarrySet, CanAcceptPredicationCode;
-  getMnemonicAcceptInfo(Mnemonic, Name, CanAcceptCarrySet, CanAcceptPredicationCode);
+  bool CanAcceptCarrySet, CanAcceptPredicationCode, CanAcceptVPTPredicationCode;
+  getMnemonicAcceptInfo(Mnemonic, ExtraToken, Name, CanAcceptCarrySet,
+                        CanAcceptPredicationCode, CanAcceptVPTPredicationCode);
 
   // If we had a carry-set on an instruction that can't do that, issue an
   // error.
@@ -6149,6 +6814,13 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
                  "' is not predicable, but condition code specified");
   }
 
+  // If we had a VPT predication code on an instruction that can't do that, issue an
+  // error.
+  if (!CanAcceptVPTPredicationCode && VPTPredicationCode != ARMVCC::None) {
+    return Error(NameLoc, "instruction '" + Mnemonic +
+                 "' is not VPT predicable, but VPT code T/E is specified");
+  }
+
   // Add the carry setting operand, if necessary.
   if (CanAcceptCarrySet) {
     SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size());
@@ -6161,7 +6833,24 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
     SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size() +
                                       CarrySetting);
     Operands.push_back(ARMOperand::CreateCondCode(
-                         ARMCC::CondCodes(PredicationCode), Loc));
+                       ARMCC::CondCodes(PredicationCode), Loc));
+  }
+
+  // Add the VPT predication code operand, if necessary.
+  // FIXME: We don't add them for the instructions filtered below as these can
+  // have custom operands which need special parsing.  This parsing requires
+  // the operand to be in the same place in the OperandVector as their
+  // definition in tblgen.  Since these instructions may also have the
+  // scalar predication operand we do not add the vector one and leave until
+  // now to fix it up.
+  if (CanAcceptVPTPredicationCode && Mnemonic != "vmov" &&
+      !Mnemonic.startswith("vcmp") &&
+      !(Mnemonic.startswith("vcvt") && Mnemonic != "vcvta" &&
+        Mnemonic != "vcvtn" && Mnemonic != "vcvtp" && Mnemonic != "vcvtm")) {
+    SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size() +
+                                      CarrySetting);
+    Operands.push_back(ARMOperand::CreateVPTPred(
+                         ARMVCC::VPTCodes(VPTPredicationCode), Loc));
   }
 
   // Add the processor imod operand, if necessary.
@@ -6177,7 +6866,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   while (Next != StringRef::npos) {
     Start = Next;
     Next = Name.find('.', Start + 1);
-    StringRef ExtraToken = Name.slice(Start, Next);
+    ExtraToken = Name.slice(Start, Next);
 
     // Some NEON instructions have an optional datatype suffix that is
     // completely ignored. Check for that.
@@ -6233,57 +6922,173 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
 
   // Some instructions have the same mnemonic, but don't always
   // have a predicate. Distinguish them here and delete the
-  // predicate if needed.
+  // appropriate predicate if needed.  This could be either the scalar
+  // predication code or the vector predication code.
   if (PredicationCode == ARMCC::AL &&
       shouldOmitPredicateOperand(Mnemonic, Operands))
     Operands.erase(Operands.begin() + 1);
 
-  // ARM mode 'blx' need special handling, as the register operand version
-  // is predicable, but the label operand version is not. So, we can't rely
-  // on the Mnemonic based checking to correctly figure out when to put
-  // a k_CondCode operand in the list. If we're trying to match the label
-  // version, remove the k_CondCode operand here.
-  if (!isThumb() && Mnemonic == "blx" && Operands.size() == 3 &&
-      static_cast<ARMOperand &>(*Operands[2]).isImm())
-    Operands.erase(Operands.begin() + 1);
 
-  // Adjust operands of ldrexd/strexd to MCK_GPRPair.
-  // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
-  // a single GPRPair reg operand is used in the .td file to replace the two
-  // GPRs. However, when parsing from asm, the two GRPs cannot be automatically
-  // expressed as a GPRPair, so we have to manually merge them.
-  // FIXME: We would really like to be able to tablegen'erate this.
-  if (!isThumb() && Operands.size() > 4 &&
-      (Mnemonic == "ldrexd" || Mnemonic == "strexd" || Mnemonic == "ldaexd" ||
-       Mnemonic == "stlexd")) {
-    bool isLoad = (Mnemonic == "ldrexd" || Mnemonic == "ldaexd");
-    unsigned Idx = isLoad ? 2 : 3;
-    ARMOperand &Op1 = static_cast<ARMOperand &>(*Operands[Idx]);
-    ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[Idx + 1]);
-
-    const MCRegisterClass& MRC = MRI->getRegClass(ARM::GPRRegClassID);
-    // Adjust only if Op1 and Op2 are GPRs.
-    if (Op1.isReg() && Op2.isReg() && MRC.contains(Op1.getReg()) &&
-        MRC.contains(Op2.getReg())) {
-      unsigned Reg1 = Op1.getReg();
-      unsigned Reg2 = Op2.getReg();
-      unsigned Rt = MRI->getEncodingValue(Reg1);
-      unsigned Rt2 = MRI->getEncodingValue(Reg2);
-
-      // Rt2 must be Rt + 1 and Rt must be even.
-      if (Rt + 1 != Rt2 || (Rt & 1)) {
-        return Error(Op2.getStartLoc(),
-                     isLoad ? "destination operands must be sequential"
-                            : "source operands must be sequential");
+  if (hasMVE()) {
+    if (!shouldOmitVectorPredicateOperand(Mnemonic, Operands) &&
+        Mnemonic == "vmov" && PredicationCode == ARMCC::LT) {
+      // Very nasty hack to deal with the vector predicated variant of vmovlt
+      // the scalar predicated vmov with condition 'lt'.  We can not tell them
+      // apart until we have parsed their operands.
+      Operands.erase(Operands.begin() + 1);
+      Operands.erase(Operands.begin());
+      SMLoc MLoc = SMLoc::getFromPointer(NameLoc.getPointer());
+      SMLoc PLoc = SMLoc::getFromPointer(NameLoc.getPointer() +
+                                         Mnemonic.size() - 1 + CarrySetting);
+      Operands.insert(Operands.begin(),
+                      ARMOperand::CreateVPTPred(ARMVCC::None, PLoc));
+      Operands.insert(Operands.begin(),
+                      ARMOperand::CreateToken(StringRef("vmovlt"), MLoc));
+    } else if (Mnemonic == "vcvt" && PredicationCode == ARMCC::NE &&
+               !shouldOmitVectorPredicateOperand(Mnemonic, Operands)) {
+      // Another nasty hack to deal with the ambiguity between vcvt with scalar
+      // predication 'ne' and vcvtn with vector predication 'e'.  As above we
+      // can only distinguish between the two after we have parsed their
+      // operands.
+      Operands.erase(Operands.begin() + 1);
+      Operands.erase(Operands.begin());
+      SMLoc MLoc = SMLoc::getFromPointer(NameLoc.getPointer());
+      SMLoc PLoc = SMLoc::getFromPointer(NameLoc.getPointer() +
+                                         Mnemonic.size() - 1 + CarrySetting);
+      Operands.insert(Operands.begin(),
+                      ARMOperand::CreateVPTPred(ARMVCC::Else, PLoc));
+      Operands.insert(Operands.begin(),
+                      ARMOperand::CreateToken(StringRef("vcvtn"), MLoc));
+    } else if (Mnemonic == "vmul" && PredicationCode == ARMCC::LT &&
+               !shouldOmitVectorPredicateOperand(Mnemonic, Operands)) {
+      // Another hack, this time to distinguish between scalar predicated vmul
+      // with 'lt' predication code and the vector instruction vmullt with
+      // vector predication code "none"
+      Operands.erase(Operands.begin() + 1);
+      Operands.erase(Operands.begin());
+      SMLoc MLoc = SMLoc::getFromPointer(NameLoc.getPointer());
+      Operands.insert(Operands.begin(),
+                      ARMOperand::CreateToken(StringRef("vmullt"), MLoc));
+    }
+    // For vmov and vcmp, as mentioned earlier, we did not add the vector
+    // predication code, since these may contain operands that require
+    // special parsing.  So now we have to see if they require vector
+    // predication and replace the scalar one with the vector predication
+    // operand if that is the case.
+    else if (Mnemonic == "vmov" || Mnemonic.startswith("vcmp") ||
+             (Mnemonic.startswith("vcvt") && !Mnemonic.startswith("vcvta") &&
+              !Mnemonic.startswith("vcvtn") && !Mnemonic.startswith("vcvtp") &&
+              !Mnemonic.startswith("vcvtm"))) {
+      if (!shouldOmitVectorPredicateOperand(Mnemonic, Operands)) {
+        // We could not split the vector predicate off vcvt because it might
+        // have been the scalar vcvtt instruction.  Now we know its a vector
+        // instruction, we still need to check whether its the vector
+        // predicated vcvt with 'Then' predication or the vector vcvtt.  We can
+        // distinguish the two based on the suffixes, if it is any of
+        // ".f16.f32", ".f32.f16", ".f16.f64" or ".f64.f16" then it is the vcvtt.
+        if (Mnemonic.startswith("vcvtt") && Operands.size() >= 4) {
+          auto Sz1 = static_cast<ARMOperand &>(*Operands[2]);
+          auto Sz2 = static_cast<ARMOperand &>(*Operands[3]);
+          if (!(Sz1.isToken() && Sz1.getToken().startswith(".f") &&
+              Sz2.isToken() && Sz2.getToken().startswith(".f"))) {
+            Operands.erase(Operands.begin());
+            SMLoc MLoc = SMLoc::getFromPointer(NameLoc.getPointer());
+            VPTPredicationCode = ARMVCC::Then;
+
+            Mnemonic = Mnemonic.substr(0, 4);
+            Operands.insert(Operands.begin(),
+                            ARMOperand::CreateToken(Mnemonic, MLoc));
+          }
+        }
+        Operands.erase(Operands.begin() + 1);
+        SMLoc PLoc = SMLoc::getFromPointer(NameLoc.getPointer() +
+                                          Mnemonic.size() + CarrySetting);
+        Operands.insert(Operands.begin() + 1,
+                        ARMOperand::CreateVPTPred(
+                            ARMVCC::VPTCodes(VPTPredicationCode), PLoc));
+      }
+    } else if (CanAcceptVPTPredicationCode) {
+      // For all other instructions, make sure only one of the two
+      // predication operands is left behind, depending on whether we should
+      // use the vector predication.
+      if (shouldOmitVectorPredicateOperand(Mnemonic, Operands)) {
+        if (CanAcceptPredicationCode)
+          Operands.erase(Operands.begin() + 2);
+        else
+          Operands.erase(Operands.begin() + 1);
+      } else if (CanAcceptPredicationCode && PredicationCode == ARMCC::AL) {
+        Operands.erase(Operands.begin() + 1);
       }
-      unsigned NewReg = MRI->getMatchingSuperReg(Reg1, ARM::gsub_0,
-          &(MRI->getRegClass(ARM::GPRPairRegClassID)));
-      Operands[Idx] =
-          ARMOperand::CreateReg(NewReg, Op1.getStartLoc(), Op2.getEndLoc());
-      Operands.erase(Operands.begin() + Idx + 1);
     }
   }
 
+  if (VPTPredicationCode != ARMVCC::None) {
+    bool usedVPTPredicationCode = false;
+    for (unsigned I = 1; I < Operands.size(); ++I)
+      if (static_cast<ARMOperand &>(*Operands[I]).isVPTPred())
+        usedVPTPredicationCode = true;
+    if (!usedVPTPredicationCode) {
+      // If we have a VPT predication code and we haven't just turned it
+      // into an operand, then it was a mistake for splitMnemonic to
+      // separate it from the rest of the mnemonic in the first place,
+      // and this may lead to wrong disassembly (e.g. scalar floating
+      // point VCMPE is actually a different instruction from VCMP, so
+      // we mustn't treat them the same). In that situation, glue it
+      // back on.
+      Mnemonic = Name.slice(0, Mnemonic.size() + 1);
+      Operands.erase(Operands.begin());
+      Operands.insert(Operands.begin(),
+                      ARMOperand::CreateToken(Mnemonic, NameLoc));
+    }
+  }
+
+    // ARM mode 'blx' need special handling, as the register operand version
+    // is predicable, but the label operand version is not. So, we can't rely
+    // on the Mnemonic based checking to correctly figure out when to put
+    // a k_CondCode operand in the list. If we're trying to match the label
+    // version, remove the k_CondCode operand here.
+    if (!isThumb() && Mnemonic == "blx" && Operands.size() == 3 &&
+        static_cast<ARMOperand &>(*Operands[2]).isImm())
+      Operands.erase(Operands.begin() + 1);
+
+    // Adjust operands of ldrexd/strexd to MCK_GPRPair.
+    // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
+    // a single GPRPair reg operand is used in the .td file to replace the two
+    // GPRs. However, when parsing from asm, the two GRPs cannot be
+    // automatically
+    // expressed as a GPRPair, so we have to manually merge them.
+    // FIXME: We would really like to be able to tablegen'erate this.
+    if (!isThumb() && Operands.size() > 4 &&
+        (Mnemonic == "ldrexd" || Mnemonic == "strexd" || Mnemonic == "ldaexd" ||
+         Mnemonic == "stlexd")) {
+      bool isLoad = (Mnemonic == "ldrexd" || Mnemonic == "ldaexd");
+      unsigned Idx = isLoad ? 2 : 3;
+      ARMOperand &Op1 = static_cast<ARMOperand &>(*Operands[Idx]);
+      ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[Idx + 1]);
+
+      const MCRegisterClass &MRC = MRI->getRegClass(ARM::GPRRegClassID);
+      // Adjust only if Op1 and Op2 are GPRs.
+      if (Op1.isReg() && Op2.isReg() && MRC.contains(Op1.getReg()) &&
+          MRC.contains(Op2.getReg())) {
+        unsigned Reg1 = Op1.getReg();
+        unsigned Reg2 = Op2.getReg();
+        unsigned Rt = MRI->getEncodingValue(Reg1);
+        unsigned Rt2 = MRI->getEncodingValue(Reg2);
+
+        // Rt2 must be Rt + 1 and Rt must be even.
+        if (Rt + 1 != Rt2 || (Rt & 1)) {
+          return Error(Op2.getStartLoc(),
+                       isLoad ? "destination operands must be sequential"
+                              : "source operands must be sequential");
+        }
+        unsigned NewReg = MRI->getMatchingSuperReg(
+            Reg1, ARM::gsub_0, &(MRI->getRegClass(ARM::GPRPairRegClassID)));
+        Operands[Idx] =
+            ARMOperand::CreateReg(NewReg, Op1.getStartLoc(), Op2.getEndLoc());
+        Operands.erase(Operands.begin() + Idx + 1);
+      }
+  }
+
   // GNU Assembler extension (compatibility).
   fixupGNULDRDAlias(Mnemonic, Operands);
 
@@ -6442,6 +7247,17 @@ bool ARMAsmParser::validateLDRDSTRD(MCInst &Inst,
   return false;
 }
 
+static int findFirstVectorPredOperandIdx(const MCInstrDesc &MCID) {
+  for (unsigned i = 0; i < MCID.NumOperands; ++i) {
+    if (ARM::isVpred(MCID.OpInfo[i].OperandType))
+      return i;
+  }
+  return -1;
+}
+
+static bool isVectorPredicable(const MCInstrDesc &MCID) {
+  return findFirstVectorPredOperandIdx(MCID) != -1;
+}
 
 // FIXME: We would really like to be able to tablegen'erate this.
 bool ARMAsmParser::validateInstruction(MCInst &Inst,
@@ -6473,12 +7289,25 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
   } else if (isThumbTwo() && MCID.isPredicable() &&
              Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() !=
              ARMCC::AL && Inst.getOpcode() != ARM::tBcc &&
-             Inst.getOpcode() != ARM::t2Bcc) {
+             Inst.getOpcode() != ARM::t2Bcc &&
+             Inst.getOpcode() != ARM::t2BFic) {
     return Error(Loc, "predicated instructions must be in IT block");
   } else if (!isThumb() && !useImplicitITARM() && MCID.isPredicable() &&
              Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() !=
                  ARMCC::AL) {
     return Warning(Loc, "predicated instructions should be in IT block");
+  } else if (!MCID.isPredicable()) {
+    // Check the instruction doesn't have a predicate operand anyway
+    // that it's not allowed to use. Sometimes this happens in order
+    // to keep instructions the same shape even though one cannot
+    // legally be predicated, e.g. vmul.f16 vs vmul.f32.
+    for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
+      if (MCID.OpInfo[i].isPredicate()) {
+        if (Inst.getOperand(i).getImm() != ARMCC::AL)
+          return Error(Loc, "instruction is not predicable");
+        break;
+      }
+    }
   }
 
   // PC-setting instructions in an IT block, but not the last instruction of
@@ -6487,6 +7316,28 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
     return Error(Loc, "instruction must be outside of IT block or the last instruction in an IT block");
   }
 
+  if (inVPTBlock() && !instIsBreakpoint(Inst)) {
+    unsigned Bit = extractITMaskBit(VPTState.Mask, VPTState.CurPosition);
+    if (!isVectorPredicable(MCID))
+      return Error(Loc, "instruction in VPT block must be predicable");
+    unsigned Pred = Inst.getOperand(findFirstVectorPredOperandIdx(MCID)).getImm();
+    unsigned VPTPred = Bit ? ARMVCC::Else : ARMVCC::Then;
+    if (Pred != VPTPred) {
+      SMLoc PredLoc;
+      for (unsigned I = 1; I < Operands.size(); ++I)
+        if (static_cast<ARMOperand &>(*Operands[I]).isVPTPred())
+          PredLoc = Operands[I]->getStartLoc();
+      return Error(PredLoc, "incorrect predication in VPT block; got '" +
+                   StringRef(ARMVPTPredToString(ARMVCC::VPTCodes(Pred))) +
+                   "', but expected '" +
+                   ARMVPTPredToString(ARMVCC::VPTCodes(VPTPred)) + "'");
+    }
+  }
+  else if (isVectorPredicable(MCID) &&
+           Inst.getOperand(findFirstVectorPredOperandIdx(MCID)).getImm() !=
+           ARMVCC::None)
+    return Error(Loc, "VPT predicated instructions must be in VPT block");
+
   const unsigned Opcode = Inst.getOpcode();
   switch (Opcode) {
   case ARM::t2IT: {
@@ -6496,11 +7347,10 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
     unsigned Cond = Inst.getOperand(0).getImm();
     unsigned Mask = Inst.getOperand(1).getImm();
 
-    // Mask hasn't been modified to the IT instruction encoding yet so
-    // conditions only allowing a 't' are a block of 1s starting at bit 3
-    // followed by all 0s. Easiest way is to just list the 4 possibilities.
-    if (Cond == ARMCC::AL && Mask != 8 && Mask != 12 && Mask != 14 &&
-        Mask != 15)
+    // Conditions only allowing a 't' are those with no set bit except
+    // the lowest-order one that indicates the end of the sequence. In
+    // other words, powers of 2.
+    if (Cond == ARMCC::AL && countPopulation(Mask) != 1)
       return Error(Loc, "unpredictable IT predicate sequence");
     break;
   }
@@ -6609,6 +7459,54 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
                    "destination register and base register can't be identical");
     return false;
   }
+
+  case ARM::MVE_VLDRBU8_rq:
+  case ARM::MVE_VLDRBU16_rq:
+  case ARM::MVE_VLDRBS16_rq:
+  case ARM::MVE_VLDRBU32_rq:
+  case ARM::MVE_VLDRBS32_rq:
+  case ARM::MVE_VLDRHU16_rq:
+  case ARM::MVE_VLDRHU16_rq_u:
+  case ARM::MVE_VLDRHU32_rq:
+  case ARM::MVE_VLDRHU32_rq_u:
+  case ARM::MVE_VLDRHS32_rq:
+  case ARM::MVE_VLDRHS32_rq_u:
+  case ARM::MVE_VLDRWU32_rq:
+  case ARM::MVE_VLDRWU32_rq_u:
+  case ARM::MVE_VLDRDU64_rq:
+  case ARM::MVE_VLDRDU64_rq_u:
+  case ARM::MVE_VLDRWU32_qi:
+  case ARM::MVE_VLDRWU32_qi_pre:
+  case ARM::MVE_VLDRDU64_qi:
+  case ARM::MVE_VLDRDU64_qi_pre: {
+    // Qd must be different from Qm.
+    unsigned QdIdx = 0, QmIdx = 2;
+    bool QmIsPointer = false;
+    switch (Opcode) {
+    case ARM::MVE_VLDRWU32_qi:
+    case ARM::MVE_VLDRDU64_qi:
+      QmIdx = 1;
+      QmIsPointer = true;
+      break;
+    case ARM::MVE_VLDRWU32_qi_pre:
+    case ARM::MVE_VLDRDU64_qi_pre:
+      QdIdx = 1;
+      QmIsPointer = true;
+      break;
+    }
+
+    const unsigned Qd = MRI->getEncodingValue(Inst.getOperand(QdIdx).getReg());
+    const unsigned Qm = MRI->getEncodingValue(Inst.getOperand(QmIdx).getReg());
+
+    if (Qd == Qm) {
+      return Error(Operands[3]->getStartLoc(),
+                   Twine("destination vector register and vector ") +
+                   (QmIsPointer ? "pointer" : "offset") +
+                   " register can't be identical");
+    }
+    return false;
+  }
+
   case ARM::SBFX:
   case ARM::t2SBFX:
   case ARM::UBFX:
@@ -6776,6 +7674,20 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
     }
     break;
 
+  case ARM::t2ADDri:
+  case ARM::t2ADDri12:
+  case ARM::t2ADDrr:
+  case ARM::t2ADDrs:
+  case ARM::t2SUBri:
+  case ARM::t2SUBri12:
+  case ARM::t2SUBrr:
+  case ARM::t2SUBrs:
+    if (Inst.getOperand(0).getReg() == ARM::SP &&
+        Inst.getOperand(1).getReg() != ARM::SP)
+      return Error(Operands[4]->getStartLoc(),
+                   "source register must be sp if destination is sp");
+    break;
+
   // Final range checking for Thumb unconditional branch instructions.
   case ARM::tB:
     if (!(static_cast<ARMOperand &>(*Operands[2])).isSignedOffset<11, 1>())
@@ -6845,6 +7757,61 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
                                                "code specified");
     break;
   }
+  case ARM::t2BFi:
+  case ARM::t2BFr:
+  case ARM::t2BFLi:
+  case ARM::t2BFLr: {
+    if (!static_cast<ARMOperand &>(*Operands[2]).isUnsignedOffset<4, 1>() ||
+        (Inst.getOperand(0).isImm() && Inst.getOperand(0).getImm() == 0))
+      return Error(Operands[2]->getStartLoc(),
+                   "branch location out of range or not a multiple of 2");
+
+    if (Opcode == ARM::t2BFi) {
+      if (!static_cast<ARMOperand &>(*Operands[3]).isSignedOffset<16, 1>())
+        return Error(Operands[3]->getStartLoc(),
+                     "branch target out of range or not a multiple of 2");
+    } else if (Opcode == ARM::t2BFLi) {
+      if (!static_cast<ARMOperand &>(*Operands[3]).isSignedOffset<18, 1>())
+        return Error(Operands[3]->getStartLoc(),
+                     "branch target out of range or not a multiple of 2");
+    }
+    break;
+  }
+  case ARM::t2BFic: {
+    if (!static_cast<ARMOperand &>(*Operands[1]).isUnsignedOffset<4, 1>() ||
+        (Inst.getOperand(0).isImm() && Inst.getOperand(0).getImm() == 0))
+      return Error(Operands[1]->getStartLoc(),
+                   "branch location out of range or not a multiple of 2");
+
+    if (!static_cast<ARMOperand &>(*Operands[2]).isSignedOffset<16, 1>())
+      return Error(Operands[2]->getStartLoc(),
+                   "branch target out of range or not a multiple of 2");
+
+    assert(Inst.getOperand(0).isImm() == Inst.getOperand(2).isImm() &&
+           "branch location and else branch target should either both be "
+           "immediates or both labels");
+
+    if (Inst.getOperand(0).isImm() && Inst.getOperand(2).isImm()) {
+      int Diff = Inst.getOperand(2).getImm() - Inst.getOperand(0).getImm();
+      if (Diff != 4 && Diff != 2)
+        return Error(
+            Operands[3]->getStartLoc(),
+            "else branch target must be 2 or 4 greater than the branch location");
+    }
+    break;
+  }
+  case ARM::t2CLRM: {
+    for (unsigned i = 2; i < Inst.getNumOperands(); i++) {
+      if (Inst.getOperand(i).isReg() &&
+          !ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID].contains(
+              Inst.getOperand(i).getReg())) {
+        return Error(Operands[2]->getStartLoc(),
+                     "invalid register in register list. Valid registers are "
+                     "r0-r12, lr/r14 and APSR.");
+      }
+    }
+    break;
+  }
   case ARM::DSB:
   case ARM::t2DSB: {
 
@@ -6892,6 +7859,39 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
                    "list of registers must be at least 1 and at most 16");
     break;
   }
+  case ARM::MVE_VQDMULLs32bh:
+  case ARM::MVE_VQDMULLs32th:
+  case ARM::MVE_VCMULf32:
+  case ARM::MVE_VMULLs32bh:
+  case ARM::MVE_VMULLs32th:
+  case ARM::MVE_VMULLu32bh:
+  case ARM::MVE_VMULLu32th: {
+    if (Operands[3]->getReg() == Operands[4]->getReg()) {
+      return Error (Operands[3]->getStartLoc(),
+                    "Qd register and Qn register can't be identical");
+    }
+    if (Operands[3]->getReg() == Operands[5]->getReg()) {
+      return Error (Operands[3]->getStartLoc(),
+                    "Qd register and Qm register can't be identical");
+    }
+    break;
+  }
+  case ARM::MVE_VMOV_rr_q: {
+    if (Operands[4]->getReg() != Operands[6]->getReg())
+      return Error (Operands[4]->getStartLoc(), "Q-registers must be the same");
+    if (static_cast<ARMOperand &>(*Operands[5]).getVectorIndex() !=
+        static_cast<ARMOperand &>(*Operands[7]).getVectorIndex() + 2)
+      return Error (Operands[5]->getStartLoc(), "Q-register indexes must be 2 and 0 or 3 and 1");
+    break;
+  }
+  case ARM::MVE_VMOV_q_rr: {
+    if (Operands[2]->getReg() != Operands[4]->getReg())
+      return Error (Operands[2]->getStartLoc(), "Q-registers must be the same");
+    if (static_cast<ARMOperand &>(*Operands[3]).getVectorIndex() !=
+        static_cast<ARMOperand &>(*Operands[5]).getVectorIndex() + 2)
+      return Error (Operands[3]->getStartLoc(), "Q-register indexes must be 2 and 0 or 3 and 1");
+    break;
+  }
   }
 
   return false;
@@ -7168,6 +8168,50 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
   }
 
   switch (Inst.getOpcode()) {
+  case ARM::MVE_VORNIZ0v4i32:
+  case ARM::MVE_VORNIZ0v8i16:
+  case ARM::MVE_VORNIZ8v4i32:
+  case ARM::MVE_VORNIZ8v8i16:
+  case ARM::MVE_VORNIZ16v4i32:
+  case ARM::MVE_VORNIZ24v4i32:
+  case ARM::MVE_VANDIZ0v4i32:
+  case ARM::MVE_VANDIZ0v8i16:
+  case ARM::MVE_VANDIZ8v4i32:
+  case ARM::MVE_VANDIZ8v8i16:
+  case ARM::MVE_VANDIZ16v4i32:
+  case ARM::MVE_VANDIZ24v4i32: {
+    unsigned Opcode;
+    bool imm16 = false;
+    switch(Inst.getOpcode()) {
+    case ARM::MVE_VORNIZ0v4i32: Opcode = ARM::MVE_VORRIZ0v4i32; break;
+    case ARM::MVE_VORNIZ0v8i16: Opcode = ARM::MVE_VORRIZ0v8i16; imm16 = true; break;
+    case ARM::MVE_VORNIZ8v4i32: Opcode = ARM::MVE_VORRIZ8v4i32; break;
+    case ARM::MVE_VORNIZ8v8i16: Opcode = ARM::MVE_VORRIZ8v8i16; imm16 = true; break;
+    case ARM::MVE_VORNIZ16v4i32: Opcode = ARM::MVE_VORRIZ16v4i32; break;
+    case ARM::MVE_VORNIZ24v4i32: Opcode = ARM::MVE_VORRIZ24v4i32; break;
+    case ARM::MVE_VANDIZ0v4i32: Opcode = ARM::MVE_VBICIZ0v4i32; break;
+    case ARM::MVE_VANDIZ0v8i16: Opcode = ARM::MVE_VBICIZ0v8i16; imm16 = true; break;
+    case ARM::MVE_VANDIZ8v4i32: Opcode = ARM::MVE_VBICIZ8v4i32; break;
+    case ARM::MVE_VANDIZ8v8i16: Opcode = ARM::MVE_VBICIZ8v8i16; imm16 = true; break;
+    case ARM::MVE_VANDIZ16v4i32: Opcode = ARM::MVE_VBICIZ16v4i32; break;
+    case ARM::MVE_VANDIZ24v4i32: Opcode = ARM::MVE_VBICIZ24v4i32; break;
+    default: llvm_unreachable("unexpected opcode");
+    }
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(Opcode);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+
+    // invert immediate
+    unsigned imm = ~Inst.getOperand(2).getImm() & (imm16 ? 0xffff : 0xffffffff);
+    TmpInst.addOperand(MCOperand::createImm(imm));
+
+    TmpInst.addOperand(Inst.getOperand(3));
+    TmpInst.addOperand(Inst.getOperand(4));
+    Inst = TmpInst;
+    return true;
+  }
   // Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction.
   case ARM::LDRT_POST:
   case ARM::LDRBT_POST: {
@@ -8990,15 +10034,11 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
   }
   case ARM::ITasm:
   case ARM::t2IT: {
-    MCOperand &MO = Inst.getOperand(1);
-    unsigned Mask = MO.getImm();
-    ARMCC::CondCodes Cond = ARMCC::CondCodes(Inst.getOperand(0).getImm());
-
     // Set up the IT block state according to the IT instruction we just
     // matched.
     assert(!inITBlock() && "nested IT blocks?!");
-    startExplicitITBlock(Cond, Mask);
-    MO.setImm(getITMaskEncoding());
+    startExplicitITBlock(ARMCC::CondCodes(Inst.getOperand(0).getImm()),
+                         Inst.getOperand(1).getImm());
     break;
   }
   case ARM::t2LSLrr:
@@ -9074,6 +10114,35 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
       return true;
     }
     return false;
+  case ARM::MVE_VPST:
+  case ARM::MVE_VPTv16i8:
+  case ARM::MVE_VPTv8i16:
+  case ARM::MVE_VPTv4i32:
+  case ARM::MVE_VPTv16u8:
+  case ARM::MVE_VPTv8u16:
+  case ARM::MVE_VPTv4u32:
+  case ARM::MVE_VPTv16s8:
+  case ARM::MVE_VPTv8s16:
+  case ARM::MVE_VPTv4s32:
+  case ARM::MVE_VPTv4f32:
+  case ARM::MVE_VPTv8f16:
+  case ARM::MVE_VPTv16i8r:
+  case ARM::MVE_VPTv8i16r:
+  case ARM::MVE_VPTv4i32r:
+  case ARM::MVE_VPTv16u8r:
+  case ARM::MVE_VPTv8u16r:
+  case ARM::MVE_VPTv4u32r:
+  case ARM::MVE_VPTv16s8r:
+  case ARM::MVE_VPTv8s16r:
+  case ARM::MVE_VPTv4s32r:
+  case ARM::MVE_VPTv4f32r:
+  case ARM::MVE_VPTv8f16r: {
+    assert(!inVPTBlock() && "Nested VPT blocks are not allowed");
+    MCOperand &MO = Inst.getOperand(0);
+    VPTState.Mask = MO.getImm();
+    VPTState.CurPosition = 0;
+    break;
+  }
   }
   return false;
 }
@@ -9138,18 +10207,50 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
       return Match_RequiresV8;
   }
 
-  // Use of SP for VMRS/VMSR is only allowed in ARM mode with the exception of
-  // ARMv8-A.
-  if ((Inst.getOpcode() == ARM::VMRS || Inst.getOpcode() == ARM::VMSR) &&
-      Inst.getOperand(0).getReg() == ARM::SP && (isThumb() && !hasV8Ops()))
-    return Match_InvalidOperand;
+  switch (Inst.getOpcode()) {
+  case ARM::VMRS:
+  case ARM::VMSR:
+  case ARM::VMRS_FPCXTS:
+  case ARM::VMRS_FPCXTNS:
+  case ARM::VMSR_FPCXTS:
+  case ARM::VMSR_FPCXTNS:
+  case ARM::VMRS_FPSCR_NZCVQC:
+  case ARM::VMSR_FPSCR_NZCVQC:
+  case ARM::FMSTAT:
+  case ARM::VMRS_VPR:
+  case ARM::VMRS_P0:
+  case ARM::VMSR_VPR:
+  case ARM::VMSR_P0:
+    // Use of SP for VMRS/VMSR is only allowed in ARM mode with the exception of
+    // ARMv8-A.
+    if (Inst.getOperand(0).isReg() && Inst.getOperand(0).getReg() == ARM::SP &&
+        (isThumb() && !hasV8Ops()))
+      return Match_InvalidOperand;
+    break;
+  default:
+    break;
+  }
 
   for (unsigned I = 0; I < MCID.NumOperands; ++I)
     if (MCID.OpInfo[I].RegClass == ARM::rGPRRegClassID) {
       // rGPRRegClass excludes PC, and also excluded SP before ARMv8
-      if ((Inst.getOperand(I).getReg() == ARM::SP) && !hasV8Ops())
+      const auto &Op = Inst.getOperand(I);
+      if (!Op.isReg()) {
+        // This can happen in awkward cases with tied operands, e.g. a
+        // writeback load/store with a complex addressing mode in
+        // which there's an output operand corresponding to the
+        // updated written-back base register: the Tablegen-generated
+        // AsmMatcher will have written a placeholder operand to that
+        // slot in the form of an immediate 0, because it can't
+        // generate the register part of the complex addressing-mode
+        // operand ahead of time.
+        continue;
+      }
+
+      unsigned Reg = Op.getReg();
+      if ((Reg == ARM::SP) && !hasV8Ops())
         return Match_RequiresV8;
-      else if (Inst.getOperand(I).getReg() == ARM::PC)
+      else if (Reg == ARM::PC)
         return Match_InvalidOperand;
     }
 
@@ -9268,7 +10369,7 @@ unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst,
   return PlainMatchResult;
 }
 
-static std::string ARMMnemonicSpellCheck(StringRef S, uint64_t FBS,
+static std::string ARMMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS,
                                          unsigned VariantID = 0);
 
 static const char *getSubtargetFeatureName(uint64_t Val);
@@ -9296,6 +10397,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
       // Still progress the IT block, otherwise one wrong condition causes
       // nasty cascading errors.
       forwardITPosition();
+      forwardVPTPosition();
       return true;
     }
 
@@ -9322,6 +10424,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     // and process gets a consistent answer about whether we're in an IT
     // block.
     forwardITPosition();
+    forwardVPTPosition();
 
     // ITasm is an ARM mode pseudo-instruction that just sets the ITblock and
     // doesn't actually encode.
@@ -9341,7 +10444,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     ReportNearMisses(NearMisses, IDLoc, Operands);
     return true;
   case Match_MnemonicFail: {
-    uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+    FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
     std::string Suggestion = ARMMnemonicSpellCheck(
       ((ARMOperand &)*Operands[0]).getToken(), FBS);
     return Error(IDLoc, "invalid instruction" + Suggestion,
@@ -10384,11 +11487,11 @@ ARMAsmParser::getCustomOperandDiag(ARMMatchResultTy MatchError) {
                       : "operand must be a register in range [r0, r12] or r14";
   // DPR contains 16 registers for some FPUs, and 32 for others.
   case Match_DPR:
-    return hasD16() ? "operand must be a register in range [d0, d15]"
-                    : "operand must be a register in range [d0, d31]";
+    return hasD32() ? "operand must be a register in range [d0, d31]"
+                    : "operand must be a register in range [d0, d15]";
   case Match_DPR_RegList:
-    return hasD16() ? "operand must be a list of registers in range [d0, d15]"
-                    : "operand must be a list of registers in range [d0, d31]";
+    return hasD32() ? "operand must be a list of registers in range [d0, d31]"
+                    : "operand must be a list of registers in range [d0, d15]";
 
   // For all other diags, use the static string from tablegen.
   default:
@@ -10416,7 +11519,7 @@ ARMAsmParser::FilterNearMisses(SmallVectorImpl<NearMissInfo> &NearMissesIn,
   // variants of an instruction that take 8- and 16-bit immediates, we want
   // to only report the widest one.
   std::multimap<unsigned, unsigned> OperandMissesSeen;
-  SmallSet<uint64_t, 4> FeatureMissesSeen;
+  SmallSet<FeatureBitset, 4> FeatureMissesSeen;
   bool ReportedTooFewOperands = false;
 
   // Process the near-misses in reverse order, so that we see more general ones
@@ -10467,7 +11570,7 @@ ARMAsmParser::FilterNearMisses(SmallVectorImpl<NearMissInfo> &NearMissesIn,
       break;
     }
     case NearMissInfo::NearMissFeature: {
-      uint64_t MissingFeatures = I.getFeatures();
+      const FeatureBitset &MissingFeatures = I.getFeatures();
       // Don't report the same set of features twice.
       if (FeatureMissesSeen.count(MissingFeatures))
         break;
@@ -10475,20 +11578,21 @@ ARMAsmParser::FilterNearMisses(SmallVectorImpl<NearMissInfo> &NearMissesIn,
 
       // Special case: don't report a feature set which includes arm-mode for
       // targets that don't have ARM mode.
-      if ((MissingFeatures & Feature_IsARM) && !hasARM())
+      if (MissingFeatures.test(Feature_IsARMBit) && !hasARM())
         break;
       // Don't report any near-misses that both require switching instruction
       // set, and adding other subtarget features.
-      if (isThumb() && (MissingFeatures & Feature_IsARM) &&
-          (MissingFeatures & ~Feature_IsARM))
+      if (isThumb() && MissingFeatures.test(Feature_IsARMBit) &&
+          MissingFeatures.count() > 1)
         break;
-      if (!isThumb() && (MissingFeatures & Feature_IsThumb) &&
-          (MissingFeatures & ~Feature_IsThumb))
+      if (!isThumb() && MissingFeatures.test(Feature_IsThumbBit) &&
+          MissingFeatures.count() > 1)
         break;
-      if (!isThumb() && (MissingFeatures & Feature_IsThumb2) &&
-          (MissingFeatures & ~(Feature_IsThumb2 | Feature_IsThumb)))
+      if (!isThumb() && MissingFeatures.test(Feature_IsThumb2Bit) &&
+          (MissingFeatures & ~FeatureBitset({Feature_IsThumb2Bit,
+                                             Feature_IsThumbBit})).any())
         break;
-      if (isMClass() && (MissingFeatures & Feature_HasNEON))
+      if (isMClass() && MissingFeatures.test(Feature_HasNEONBit))
         break;
 
       NearMissMessage Message;
@@ -10496,14 +11600,10 @@ ARMAsmParser::FilterNearMisses(SmallVectorImpl<NearMissInfo> &NearMissesIn,
       raw_svector_ostream OS(Message.Message);
 
       OS << "instruction requires:";
-      uint64_t Mask = 1;
-      for (unsigned MaskPos = 0; MaskPos < (sizeof(MissingFeatures) * 8 - 1);
-           ++MaskPos) {
-        if (MissingFeatures & Mask) {
-          OS << " " << getSubtargetFeatureName(MissingFeatures & Mask);
-        }
-        Mask <<= 1;
-      }
+      for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i)
+        if (MissingFeatures.test(i))
+          OS << ' ' << getSubtargetFeatureName(i);
+
       NearMissesOut.emplace_back(Message);
 
       break;
@@ -10579,38 +11679,44 @@ void ARMAsmParser::ReportNearMisses(SmallVectorImpl<NearMissInfo> &NearMisses,
   }
 }
 
-// FIXME: This structure should be moved inside ARMTargetParser
-// when we start to table-generate them, and we can use the ARM
-// flags below, that were generated by table-gen.
-static const struct {
-  const unsigned Kind;
-  const uint64_t ArchCheck;
-  const FeatureBitset Features;
-} Extensions[] = {
-  { ARM::AEK_CRC, Feature_HasV8, {ARM::FeatureCRC} },
-  { ARM::AEK_CRYPTO,  Feature_HasV8,
-    {ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} },
-  { ARM::AEK_FP, Feature_HasV8, {ARM::FeatureFPARMv8} },
-  { (ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass,
-    {ARM::FeatureHWDivThumb, ARM::FeatureHWDivARM} },
-  { ARM::AEK_MP, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} },
-  { ARM::AEK_SIMD, Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} },
-  { ARM::AEK_SEC, Feature_HasV6K, {ARM::FeatureTrustZone} },
-  // FIXME: Only available in A-class, isel not predicated
-  { ARM::AEK_VIRT, Feature_HasV7, {ARM::FeatureVirtualization} },
-  { ARM::AEK_FP16, Feature_HasV8_2a, {ARM::FeatureFPARMv8, ARM::FeatureFullFP16} },
-  { ARM::AEK_RAS, Feature_HasV8, {ARM::FeatureRAS} },
-  // FIXME: Unsupported extensions.
-  { ARM::AEK_OS, Feature_None, {} },
-  { ARM::AEK_IWMMXT, Feature_None, {} },
-  { ARM::AEK_IWMMXT2, Feature_None, {} },
-  { ARM::AEK_MAVERICK, Feature_None, {} },
-  { ARM::AEK_XSCALE, Feature_None, {} },
-};
-
 /// parseDirectiveArchExtension
 ///   ::= .arch_extension [no]feature
 bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
+  // FIXME: This structure should be moved inside ARMTargetParser
+  // when we start to table-generate them, and we can use the ARM
+  // flags below, that were generated by table-gen.
+  static const struct {
+    const unsigned Kind;
+    const FeatureBitset ArchCheck;
+    const FeatureBitset Features;
+  } Extensions[] = {
+    { ARM::AEK_CRC, {Feature_HasV8Bit}, {ARM::FeatureCRC} },
+    { ARM::AEK_CRYPTO,  {Feature_HasV8Bit},
+      {ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} },
+    { ARM::AEK_FP, {Feature_HasV8Bit},
+      {ARM::FeatureVFP2_D16_SP, ARM::FeatureFPARMv8} },
+    { (ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM),
+      {Feature_HasV7Bit, Feature_IsNotMClassBit},
+      {ARM::FeatureHWDivThumb, ARM::FeatureHWDivARM} },
+    { ARM::AEK_MP, {Feature_HasV7Bit, Feature_IsNotMClassBit},
+      {ARM::FeatureMP} },
+    { ARM::AEK_SIMD, {Feature_HasV8Bit},
+      {ARM::FeatureNEON, ARM::FeatureVFP2_D16_SP, ARM::FeatureFPARMv8} },
+    { ARM::AEK_SEC, {Feature_HasV6KBit}, {ARM::FeatureTrustZone} },
+    // FIXME: Only available in A-class, isel not predicated
+    { ARM::AEK_VIRT, {Feature_HasV7Bit}, {ARM::FeatureVirtualization} },
+    { ARM::AEK_FP16, {Feature_HasV8_2aBit},
+      {ARM::FeatureFPARMv8, ARM::FeatureFullFP16} },
+    { ARM::AEK_RAS, {Feature_HasV8Bit}, {ARM::FeatureRAS} },
+    { ARM::AEK_LOB, {Feature_HasV8_1MMainlineBit}, {ARM::FeatureLOB} },
+    // FIXME: Unsupported extensions.
+    { ARM::AEK_OS, {}, {} },
+    { ARM::AEK_IWMMXT, {}, {} },
+    { ARM::AEK_IWMMXT2, {}, {} },
+    { ARM::AEK_MAVERICK, {}, {} },
+    { ARM::AEK_XSCALE, {}, {} },
+  };
+
   MCAsmParser &Parser = getParser();
 
   if (getLexer().isNot(AsmToken::Identifier))
@@ -10646,12 +11752,12 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
                                "allowed for the current base architecture");
 
     MCSubtargetInfo &STI = copySTI();
-    FeatureBitset ToggleFeatures = EnableFeature
-      ? (~STI.getFeatureBits() & Extension.Features)
-      : ( STI.getFeatureBits() & Extension.Features);
-
-    uint64_t Features =
-        ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
+    if (EnableFeature) {
+      STI.SetFeatureBitsTransitively(Extension.Features);
+    } else {
+      STI.ClearFeatureBitsTransitively(Extension.Features);
+    }
+    FeatureBitset Features = ComputeAvailableFeatures(STI.getFeatureBits());
     setAvailableFeatures(Features);
     return false;
   }
@@ -10675,6 +11781,18 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
         if (CE->getValue() == 0)
           return Match_Success;
     break;
+  case MCK__35_8:
+    if (Op.isImm())
+      if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm()))
+        if (CE->getValue() == 8)
+          return Match_Success;
+    break;
+  case MCK__35_16:
+    if (Op.isImm())
+      if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm()))
+        if (CE->getValue() == 16)
+          return Match_Success;
+    break;
   case MCK_ModImm:
     if (Op.isImm()) {
       const MCExpr *SOExpr = Op.getImm();
@@ -10698,3 +11816,76 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
   }
   return Match_InvalidOperand;
 }
+
+bool ARMAsmParser::isMnemonicVPTPredicable(StringRef Mnemonic,
+                                           StringRef ExtraToken) {
+  if (!hasMVE())
+    return false;
+
+  return Mnemonic.startswith("vabav") || Mnemonic.startswith("vaddv") ||
+         Mnemonic.startswith("vaddlv") || Mnemonic.startswith("vminnmv") ||
+         Mnemonic.startswith("vminnmav") || Mnemonic.startswith("vminv") ||
+         Mnemonic.startswith("vminav") || Mnemonic.startswith("vmaxnmv") ||
+         Mnemonic.startswith("vmaxnmav") || Mnemonic.startswith("vmaxv") ||
+         Mnemonic.startswith("vmaxav") || Mnemonic.startswith("vmladav") ||
+         Mnemonic.startswith("vrmlaldavh") || Mnemonic.startswith("vrmlalvh") ||
+         Mnemonic.startswith("vmlsdav") || Mnemonic.startswith("vmlav") ||
+         Mnemonic.startswith("vmlaldav") || Mnemonic.startswith("vmlalv") ||
+         Mnemonic.startswith("vmaxnm") || Mnemonic.startswith("vminnm") ||
+         Mnemonic.startswith("vmax") || Mnemonic.startswith("vmin") ||
+         Mnemonic.startswith("vshlc") || Mnemonic.startswith("vmovlt") ||
+         Mnemonic.startswith("vmovlb") || Mnemonic.startswith("vshll") ||
+         Mnemonic.startswith("vrshrn") || Mnemonic.startswith("vshrn") ||
+         Mnemonic.startswith("vqrshrun") || Mnemonic.startswith("vqshrun") ||
+         Mnemonic.startswith("vqrshrn") || Mnemonic.startswith("vqshrn") ||
+         Mnemonic.startswith("vbic") || Mnemonic.startswith("vrev64") ||
+         Mnemonic.startswith("vrev32") || Mnemonic.startswith("vrev16") ||
+         Mnemonic.startswith("vmvn") || Mnemonic.startswith("veor") ||
+         Mnemonic.startswith("vorn") || Mnemonic.startswith("vorr") ||
+         Mnemonic.startswith("vand") || Mnemonic.startswith("vmul") ||
+         Mnemonic.startswith("vqrdmulh") || Mnemonic.startswith("vqdmulh") ||
+         Mnemonic.startswith("vsub") || Mnemonic.startswith("vadd") ||
+         Mnemonic.startswith("vqsub") || Mnemonic.startswith("vqadd") ||
+         Mnemonic.startswith("vabd") || Mnemonic.startswith("vrhadd") ||
+         Mnemonic.startswith("vhsub") || Mnemonic.startswith("vhadd") ||
+         Mnemonic.startswith("vdup") || Mnemonic.startswith("vcls") ||
+         Mnemonic.startswith("vclz") || Mnemonic.startswith("vneg") ||
+         Mnemonic.startswith("vabs") || Mnemonic.startswith("vqneg") ||
+         Mnemonic.startswith("vqabs") ||
+         (Mnemonic.startswith("vrint") && Mnemonic != "vrintr") ||
+         Mnemonic.startswith("vcmla") || Mnemonic.startswith("vfma") ||
+         Mnemonic.startswith("vfms") || Mnemonic.startswith("vcadd") ||
+         Mnemonic.startswith("vadd") || Mnemonic.startswith("vsub") ||
+         Mnemonic.startswith("vshl") || Mnemonic.startswith("vqshl") ||
+         Mnemonic.startswith("vqrshl") || Mnemonic.startswith("vrshl") ||
+         Mnemonic.startswith("vsri") || Mnemonic.startswith("vsli") ||
+         Mnemonic.startswith("vrshr") || Mnemonic.startswith("vshr") ||
+         Mnemonic.startswith("vpsel") || Mnemonic.startswith("vcmp") ||
+         Mnemonic.startswith("vqdmladh") || Mnemonic.startswith("vqrdmladh") ||
+         Mnemonic.startswith("vqdmlsdh") || Mnemonic.startswith("vqrdmlsdh") ||
+         Mnemonic.startswith("vcmul") || Mnemonic.startswith("vrmulh") ||
+         Mnemonic.startswith("vqmovn") || Mnemonic.startswith("vqmovun") ||
+         Mnemonic.startswith("vmovnt") || Mnemonic.startswith("vmovnb") ||
+         Mnemonic.startswith("vmaxa") || Mnemonic.startswith("vmaxnma") ||
+         Mnemonic.startswith("vhcadd") || Mnemonic.startswith("vadc") ||
+         Mnemonic.startswith("vsbc") || Mnemonic.startswith("vrshr") ||
+         Mnemonic.startswith("vshr") || Mnemonic.startswith("vstrb") ||
+         Mnemonic.startswith("vldrb") ||
+         (Mnemonic.startswith("vstrh") && Mnemonic != "vstrhi") ||
+         (Mnemonic.startswith("vldrh") && Mnemonic != "vldrhi") ||
+         Mnemonic.startswith("vstrw") || Mnemonic.startswith("vldrw") ||
+         Mnemonic.startswith("vldrd") || Mnemonic.startswith("vstrd") ||
+         Mnemonic.startswith("vqdmull") || Mnemonic.startswith("vbrsr") ||
+         Mnemonic.startswith("vfmas") || Mnemonic.startswith("vmlas") ||
+         Mnemonic.startswith("vmla") || Mnemonic.startswith("vqdmlash") ||
+         Mnemonic.startswith("vqdmlah") || Mnemonic.startswith("vqrdmlash") ||
+         Mnemonic.startswith("vqrdmlah") || Mnemonic.startswith("viwdup") ||
+         Mnemonic.startswith("vdwdup") || Mnemonic.startswith("vidup") ||
+         Mnemonic.startswith("vddup") || Mnemonic.startswith("vctp") ||
+         Mnemonic.startswith("vpnot") || Mnemonic.startswith("vbic") ||
+         Mnemonic.startswith("vrmlsldavh") || Mnemonic.startswith("vmlsldav") ||
+         Mnemonic.startswith("vcvt") ||
+         (Mnemonic.startswith("vmov") &&
+          !(ExtraToken == ".f16" || ExtraToken == ".32" ||
+            ExtraToken == ".16" || ExtraToken == ".8"));
+}
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 61bec04678dd..673691ebd93e 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -1,15 +1,16 @@
 //===- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
+#include "ARMBaseInstrInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "TargetInfo/ARMTargetInfo.h"
 #include "Utils/ARMBaseInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -63,22 +64,19 @@ namespace {
         return ITStates.size() == 1;
       }
 
-      // Called when decoding an IT instruction. Sets the IT state for the following
-      // instructions that for the IT block. Firstcond and Mask correspond to the
-      // fields in the IT instruction encoding.
+      // Called when decoding an IT instruction. Sets the IT state for
+      // the following instructions that for the IT block. Firstcond
+      // corresponds to the field in the IT instruction encoding; Mask
+      // is in the MCOperand format in which 1 means 'else' and 0 'then'.
       void setITState(char Firstcond, char Mask) {
         // (3 - the number of trailing zeros) is the number of then / else.
-        unsigned CondBit0 = Firstcond & 1;
         unsigned NumTZ = countTrailingZeros<uint8_t>(Mask);
         unsigned char CCBits = static_cast<unsigned char>(Firstcond & 0xf);
         assert(NumTZ <= 3 && "Invalid IT mask!");
         // push condition codes onto the stack the correct order for the pops
         for (unsigned Pos = NumTZ+1; Pos <= 3; ++Pos) {
-          bool T = ((Mask >> Pos) & 1) == CondBit0;
-          if (T)
-            ITStates.push_back(CCBits);
-          else
-            ITStates.push_back(CCBits ^ 1);
+          unsigned Else = (Mask >> Pos) & 1;
+          ITStates.push_back(CCBits ^ Else);
         }
         ITStates.push_back(CCBits);
       }
@@ -87,6 +85,47 @@ namespace {
       std::vector<unsigned char> ITStates;
   };
 
+  class VPTStatus
+  {
+    public:
+      unsigned getVPTPred() {
+        unsigned Pred = ARMVCC::None;
+        if (instrInVPTBlock())
+          Pred = VPTStates.back();
+        return Pred;
+      }
+
+      void advanceVPTState() {
+        VPTStates.pop_back();
+      }
+
+      bool instrInVPTBlock() {
+        return !VPTStates.empty();
+      }
+
+      bool instrLastInVPTBlock() {
+        return VPTStates.size() == 1;
+      }
+
+      void setVPTState(char Mask) {
+        // (3 - the number of trailing zeros) is the number of then / else.
+        unsigned NumTZ = countTrailingZeros<uint8_t>(Mask);
+        assert(NumTZ <= 3 && "Invalid VPT mask!");
+        // push predicates onto the stack the correct order for the pops
+        for (unsigned Pos = NumTZ+1; Pos <= 3; ++Pos) {
+          bool T = ((Mask >> Pos) & 1) == 0;
+          if (T)
+            VPTStates.push_back(ARMVCC::Then);
+          else
+            VPTStates.push_back(ARMVCC::Else);
+        }
+        VPTStates.push_back(ARMVCC::Then);
+      }
+
+    private:
+      SmallVector<unsigned char, 4> VPTStates;
+  };
+
 /// ARM disassembler for all ARM platforms.
 class ARMDisassembler : public MCDisassembler {
 public:
@@ -100,27 +139,23 @@ public:
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
                               raw_ostream &VStream,
                               raw_ostream &CStream) const override;
-};
-
-/// Thumb disassembler for all Thumb platforms.
-class ThumbDisassembler : public MCDisassembler {
-public:
-  ThumbDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
-    MCDisassembler(STI, Ctx) {
-  }
 
-  ~ThumbDisassembler() override = default;
+private:
+  DecodeStatus getARMInstruction(MCInst &Instr, uint64_t &Size,
+                                 ArrayRef<uint8_t> Bytes, uint64_t Address,
+                                 raw_ostream &VStream,
+                                 raw_ostream &CStream) const;
 
-  DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
-                              ArrayRef<uint8_t> Bytes, uint64_t Address,
-                              raw_ostream &VStream,
-                              raw_ostream &CStream) const override;
+  DecodeStatus getThumbInstruction(MCInst &Instr, uint64_t &Size,
+                                   ArrayRef<uint8_t> Bytes, uint64_t Address,
+                                   raw_ostream &VStream,
+                                   raw_ostream &CStream) const;
 
-private:
   mutable ITStatus ITBlock;
+  mutable VPTStatus VPTBlock;
 
   DecodeStatus AddThumbPredicate(MCInst&) const;
-  void UpdateThumbVFPPredicate(MCInst&) const;
+  void UpdateThumbVFPPredicate(DecodeStatus &, MCInst&) const;
 };
 
 } // end anonymous namespace
@@ -144,12 +179,23 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) {
 // Definitions are further down.
 static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCLRMGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, const void *Decoder);
+static DecodeStatus DecodetGPROddRegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, const void *Decoder);
+static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst,
                                                unsigned RegNo, uint64_t Address,
                                                const void *Decoder);
 static DecodeStatus DecodeGPRwithAPSRRegisterClass(MCInst &Inst,
                                                unsigned RegNo, uint64_t Address,
                                                const void *Decoder);
+static DecodeStatus DecodeGPRwithZRRegisterClass(MCInst &Inst,
+                                               unsigned RegNo, uint64_t Address,
+                                               const void *Decoder);
+static DecodeStatus DecodeGPRwithZRnospRegisterClass(
+    MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder);
 static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
 static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
@@ -166,12 +212,20 @@ static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst,
                                                 unsigned RegNo,
                                                 uint64_t Address,
                                                 const void *Decoder);
 static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
@@ -262,6 +316,10 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeNEONModImmInstruction(MCInst &Inst,unsigned Val,
                                uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMVEModImmInstruction(MCInst &Inst,unsigned Val,
+                               uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMVEVADCInstruction(MCInst &Inst, unsigned Insn,
+                               uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val,
@@ -276,6 +334,11 @@ static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
 static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMveAddrModeRQ(MCInst &Inst, unsigned Insn,
+                               uint64_t Address, const void *Decoder);
+template<int shift>
+static DecodeStatus DecodeMveAddrModeQ(MCInst &Inst, unsigned Insn,
+                               uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Insn,
@@ -324,6 +387,8 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
                                 uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
                                 uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVCVTImmOperand(MCInst &Inst, unsigned Insn,
+                                         uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst,
                                                        unsigned Val,
                                                        uint64_t Address,
@@ -359,14 +424,28 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void* Decoder);
 static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val,
+                               uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm7s4(MCInst &Inst, unsigned Val,
+                                           uint64_t Address,
+                                           const void *Decoder);
 static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
                                uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
+template<int shift>
+static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val,
+                               uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
+template<int shift>
+static DecodeStatus DecodeTAddrModeImm7(MCInst &Inst, unsigned Val,
+                               uint64_t Address, const void *Decoder);
+template<int shift, int WriteBack>
+static DecodeStatus DecodeT2AddrModeImm7(MCInst &Inst, unsigned Val,
+                               uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Val,
                                uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
@@ -409,6 +488,82 @@ static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val,
 static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val,
                                          uint64_t Address, const void *Decoder);
 
+template <bool isSigned, bool isNeg, bool zeroPermitted, int size>
+static DecodeStatus DecodeBFLabelOperand(MCInst &Inst, unsigned val,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeBFAfterTargetOperand(MCInst &Inst, unsigned val,
+                                               uint64_t Address,
+                                               const void *Decoder);
+static DecodeStatus DecodePredNoALOperand(MCInst &Inst, unsigned Val,
+                                          uint64_t Address,
+                                          const void *Decoder);
+static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address,
+                                 const void *Decoder);
+static DecodeStatus DecodeLongShiftOperand(MCInst &Inst, unsigned Val,
+                                           uint64_t Address,
+                                           const void *Decoder);
+static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address,
+                                  const void *Decoder);
+static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned Val,
+                                        uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeRestrictedIPredicateOperand(MCInst &Inst, unsigned Val,
+                                                     uint64_t Address,
+                                                     const void *Decoder);
+static DecodeStatus DecodeRestrictedSPredicateOperand(MCInst &Inst, unsigned Val,
+                                                     uint64_t Address,
+                                                     const void *Decoder);
+static DecodeStatus DecodeRestrictedUPredicateOperand(MCInst &Inst, unsigned Val,
+                                                     uint64_t Address,
+                                                     const void *Decoder);
+static DecodeStatus DecodeRestrictedFPPredicateOperand(MCInst &Inst,
+                                                       unsigned Val,
+                                                       uint64_t Address,
+                                                       const void *Decoder);
+template<bool Writeback>
+static DecodeStatus DecodeVSTRVLDR_SYSREG(MCInst &Inst, unsigned Insn,
+                                          uint64_t Address,
+                                          const void *Decoder);
+template<int shift>
+static DecodeStatus DecodeMVE_MEM_1_pre(MCInst &Inst, unsigned Val,
+                                        uint64_t Address, const void *Decoder);
+template<int shift>
+static DecodeStatus DecodeMVE_MEM_2_pre(MCInst &Inst, unsigned Val,
+                                        uint64_t Address, const void *Decoder);
+template<int shift>
+static DecodeStatus DecodeMVE_MEM_3_pre(MCInst &Inst, unsigned Val,
+                                        uint64_t Address, const void *Decoder);
+template<unsigned MinLog, unsigned MaxLog>
+static DecodeStatus DecodePowerTwoOperand(MCInst &Inst, unsigned Val,
+                                          uint64_t Address,
+                                          const void *Decoder);
+template <int shift>
+static DecodeStatus DecodeExpandedImmOperand(MCInst &Inst, unsigned Val,
+                                             uint64_t Address,
+                                             const void *Decoder);
+template<unsigned start>
+static DecodeStatus DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val,
+                                                    uint64_t Address,
+                                                    const void *Decoder);
+static DecodeStatus DecodeMVEVMOVQtoDReg(MCInst &Inst, unsigned Insn,
+                                         uint64_t Address,
+                                         const void *Decoder);
+static DecodeStatus DecodeMVEVMOVDRegtoQ(MCInst &Inst, unsigned Insn,
+                                         uint64_t Address,
+                                         const void *Decoder);
+static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn,
+                                      uint64_t Address, const void *Decoder);
+typedef DecodeStatus OperandDecoder(MCInst &Inst, unsigned Val,
+                                    uint64_t Address, const void *Decoder);
+template<bool scalar, OperandDecoder predicate_decoder>
+static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn,
+                                  uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn,
+                                  uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn,
+                                                  uint64_t Address,
+                                                  const void *Decoder);
 #include "ARMGenDisassemblerTables.inc"
 
 static MCDisassembler *createARMDisassembler(const Target &T,
@@ -417,12 +572,6 @@ static MCDisassembler *createARMDisassembler(const Target &T,
   return new ARMDisassembler(STI, Ctx);
 }
 
-static MCDisassembler *createThumbDisassembler(const Target &T,
-                                               const MCSubtargetInfo &STI,
-                                               MCContext &Ctx) {
-  return new ThumbDisassembler(STI, Ctx);
-}
-
 // Post-decoding checks
 static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size,
                                             uint64_t Address, raw_ostream &OS,
@@ -440,6 +589,18 @@ static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size,
         return MCDisassembler::SoftFail;
       return Result;
     }
+    case ARM::t2ADDri:
+    case ARM::t2ADDri12:
+    case ARM::t2ADDrr:
+    case ARM::t2ADDrs:
+    case ARM::t2SUBri:
+    case ARM::t2SUBri12:
+    case ARM::t2SUBrr:
+    case ARM::t2SUBrs:
+      if (MI.getOperand(0).getReg() == ARM::SP &&
+          MI.getOperand(1).getReg() != ARM::SP)
+        return MCDisassembler::SoftFail;
+      return Result;
     default: return Result;
   }
 }
@@ -448,6 +609,16 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
                                              ArrayRef<uint8_t> Bytes,
                                              uint64_t Address, raw_ostream &OS,
                                              raw_ostream &CS) const {
+  if (STI.getFeatureBits()[ARM::ModeThumb])
+    return getThumbInstruction(MI, Size, Bytes, Address, OS, CS);
+  return getARMInstruction(MI, Size, Bytes, Address, OS, CS);
+}
+
+DecodeStatus ARMDisassembler::getARMInstruction(MCInst &MI, uint64_t &Size,
+                                                ArrayRef<uint8_t> Bytes,
+                                                uint64_t Address,
+                                                raw_ostream &OS,
+                                                raw_ostream &CS) const {
   CommentStream = &CS;
 
   assert(!STI.getFeatureBits()[ARM::ModeThumb] &&
@@ -569,12 +740,22 @@ static void AddThumb1SBit(MCInst &MI, bool InITBlock) {
   MI.insert(I, MCOperand::createReg(InITBlock ? 0 : ARM::CPSR));
 }
 
+static bool isVectorPredicable(unsigned Opcode) {
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned short NumOps = ARMInsts[Opcode].NumOperands;
+  for (unsigned i = 0; i < NumOps; ++i) {
+    if (ARM::isVpred(OpInfo[i].OperandType))
+      return true;
+  }
+  return false;
+}
+
 // Most Thumb instructions don't have explicit predicates in the
 // encoding, but rather get their predicates from IT context.  We need
 // to fix up the predicate operands using this context information as a
 // post-pass.
 MCDisassembler::DecodeStatus
-ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
+ARMDisassembler::AddThumbPredicate(MCInst &MI) const {
   MCDisassembler::DecodeStatus S = Success;
 
   const FeatureBitset &FeatureBits = getSubtargetInfo().getFeatureBits();
@@ -590,6 +771,10 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
     case ARM::t2CPS3p:
     case ARM::t2CPS2p:
     case ARM::t2CPS1p:
+    case ARM::t2CSEL:
+    case ARM::t2CSINC:
+    case ARM::t2CSINV:
+    case ARM::t2CSNEG:
     case ARM::tMOVSr:
     case ARM::tSETEND:
       // Some instructions (mostly conditional branches) are not
@@ -616,37 +801,66 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
       break;
   }
 
-  // If we're in an IT block, base the predicate on that.  Otherwise,
+  // Warn on non-VPT predicable instruction in a VPT block and a VPT
+  // predicable instruction in an IT block
+  if ((!isVectorPredicable(MI.getOpcode()) && VPTBlock.instrInVPTBlock()) ||
+       (isVectorPredicable(MI.getOpcode()) && ITBlock.instrInITBlock()))
+    S = SoftFail;
+
+  // If we're in an IT/VPT block, base the predicate on that.  Otherwise,
   // assume a predicate of AL.
-  unsigned CC;
-  CC = ITBlock.getITCC();
-  if (CC == 0xF)
-    CC = ARMCC::AL;
-  if (ITBlock.instrInITBlock())
+  unsigned CC = ARMCC::AL;
+  unsigned VCC = ARMVCC::None;
+  if (ITBlock.instrInITBlock()) {
+    CC = ITBlock.getITCC();
     ITBlock.advanceITState();
+  } else if (VPTBlock.instrInVPTBlock()) {
+    VCC = VPTBlock.getVPTPred();
+    VPTBlock.advanceVPTState();
+  }
 
   const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
   unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
-  MCInst::iterator I = MI.begin();
-  for (unsigned i = 0; i < NumOps; ++i, ++I) {
-    if (I == MI.end()) break;
-    if (OpInfo[i].isPredicate()) {
-      I = MI.insert(I, MCOperand::createImm(CC));
-      ++I;
-      if (CC == ARMCC::AL)
-        MI.insert(I, MCOperand::createReg(0));
-      else
-        MI.insert(I, MCOperand::createReg(ARM::CPSR));
-      return S;
-    }
+
+  MCInst::iterator CCI = MI.begin();
+  for (unsigned i = 0; i < NumOps; ++i, ++CCI) {
+    if (OpInfo[i].isPredicate() || CCI == MI.end()) break;
   }
 
-  I = MI.insert(I, MCOperand::createImm(CC));
-  ++I;
-  if (CC == ARMCC::AL)
-    MI.insert(I, MCOperand::createReg(0));
-  else
-    MI.insert(I, MCOperand::createReg(ARM::CPSR));
+  if (ARMInsts[MI.getOpcode()].isPredicable()) {
+    CCI = MI.insert(CCI, MCOperand::createImm(CC));
+    ++CCI;
+    if (CC == ARMCC::AL)
+      MI.insert(CCI, MCOperand::createReg(0));
+    else
+      MI.insert(CCI, MCOperand::createReg(ARM::CPSR));
+  } else if (CC != ARMCC::AL) {
+    Check(S, SoftFail);
+  }
+
+  MCInst::iterator VCCI = MI.begin();
+  unsigned VCCPos;
+  for (VCCPos = 0; VCCPos < NumOps; ++VCCPos, ++VCCI) {
+    if (ARM::isVpred(OpInfo[VCCPos].OperandType) || VCCI == MI.end()) break;
+  }
+
+  if (isVectorPredicable(MI.getOpcode())) {
+    VCCI = MI.insert(VCCI, MCOperand::createImm(VCC));
+    ++VCCI;
+    if (VCC == ARMVCC::None)
+      MI.insert(VCCI, MCOperand::createReg(0));
+    else
+      MI.insert(VCCI, MCOperand::createReg(ARM::P0));
+    if (OpInfo[VCCPos].OperandType == ARM::OPERAND_VPRED_R) {
+      int TiedOp = ARMInsts[MI.getOpcode()].getOperandConstraint(
+        VCCPos + 2, MCOI::TIED_TO);
+      assert(TiedOp >= 0 &&
+             "Inactive register in vpred_r is not tied to an output!");
+      MI.insert(VCCI, MI.getOperand(TiedOp));
+    }
+  } else if (VCC != ARMVCC::None) {
+    Check(S, SoftFail);
+  }
 
   return S;
 }
@@ -656,19 +870,26 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
 // mode, the auto-generated decoder will give them an (incorrect)
 // predicate operand.  We need to rewrite these operands based on the IT
 // context as a post-pass.
-void ThumbDisassembler::UpdateThumbVFPPredicate(MCInst &MI) const {
+void ARMDisassembler::UpdateThumbVFPPredicate(
+  DecodeStatus &S, MCInst &MI) const {
   unsigned CC;
   CC = ITBlock.getITCC();
   if (CC == 0xF)
     CC = ARMCC::AL;
   if (ITBlock.instrInITBlock())
     ITBlock.advanceITState();
+  else if (VPTBlock.instrInVPTBlock()) {
+    CC = VPTBlock.getVPTPred();
+    VPTBlock.advanceVPTState();
+  }
 
   const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
   MCInst::iterator I = MI.begin();
   unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
   for (unsigned i = 0; i < NumOps; ++i, ++I) {
     if (OpInfo[i].isPredicate() ) {
+      if (CC != ARMCC::AL && !ARMInsts[MI.getOpcode()].isPredicable())
+        Check(S, SoftFail);
       I->setImm(CC);
       ++I;
       if (CC == ARMCC::AL)
@@ -680,11 +901,11 @@ void ThumbDisassembler::UpdateThumbVFPPredicate(MCInst &MI) const {
   }
 }
 
-DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
-                                               ArrayRef<uint8_t> Bytes,
-                                               uint64_t Address,
-                                               raw_ostream &OS,
-                                               raw_ostream &CS) const {
+DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
+                                                  ArrayRef<uint8_t> Bytes,
+                                                  uint64_t Address,
+                                                  raw_ostream &OS,
+                                                  raw_ostream &CS) const {
   CommentStream = &CS;
 
   assert(STI.getFeatureBits()[ARM::ModeThumb] &&
@@ -751,6 +972,27 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
 
   uint32_t Insn32 =
       (Bytes[3] << 8) | (Bytes[2] << 0) | (Bytes[1] << 24) | (Bytes[0] << 16);
+
+  Result =
+      decodeInstruction(DecoderTableMVE32, MI, Insn32, Address, this, STI);
+  if (Result != MCDisassembler::Fail) {
+    Size = 4;
+
+    // Nested VPT blocks are UNPREDICTABLE. Must be checked before we add
+    // the VPT predicate.
+    if (isVPTOpcode(MI.getOpcode()) && VPTBlock.instrInVPTBlock())
+      Result = MCDisassembler::SoftFail;
+
+    Check(Result, AddThumbPredicate(MI));
+
+    if (isVPTOpcode(MI.getOpcode())) {
+      unsigned Mask = MI.getOperand(0).getImm();
+      VPTBlock.setVPTState(Mask);
+    }
+
+    return Result;
+  }
+
   Result =
       decodeInstruction(DecoderTableThumb32, MI, Insn32, Address, this, STI);
   if (Result != MCDisassembler::Fail) {
@@ -766,7 +1008,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   if (Result != MCDisassembler::Fail) {
     Size = 4;
     Check(Result, AddThumbPredicate(MI));
-    return Result;
+    return checkDecodedInstruction(MI, Size, Address, OS, CS, Insn32, Result);
   }
 
   if (fieldFromInstruction(Insn32, 28, 4) == 0xE) {
@@ -774,7 +1016,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
         decodeInstruction(DecoderTableVFP32, MI, Insn32, Address, this, STI);
     if (Result != MCDisassembler::Fail) {
       Size = 4;
-      UpdateThumbVFPPredicate(MI);
+      UpdateThumbVFPPredicate(Result, MI);
       return Result;
     }
   }
@@ -861,9 +1103,9 @@ extern "C" void LLVMInitializeARMDisassembler() {
   TargetRegistry::RegisterMCDisassembler(getTheARMBETarget(),
                                          createARMDisassembler);
   TargetRegistry::RegisterMCDisassembler(getTheThumbLETarget(),
-                                         createThumbDisassembler);
+                                         createARMDisassembler);
   TargetRegistry::RegisterMCDisassembler(getTheThumbBETarget(),
-                                         createThumbDisassembler);
+                                         createARMDisassembler);
 }
 
 static const uint16_t GPRDecoderTable[] = {
@@ -873,6 +1115,13 @@ static const uint16_t GPRDecoderTable[] = {
   ARM::R12, ARM::SP, ARM::LR, ARM::PC
 };
 
+static const uint16_t CLRMGPRDecoderTable[] = {
+  ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+  ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+  ARM::R8, ARM::R9, ARM::R10, ARM::R11,
+  ARM::R12, 0, ARM::LR, ARM::APSR
+};
+
 static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder) {
   if (RegNo > 15)
@@ -883,6 +1132,20 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeCLRMGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  if (RegNo > 15)
+    return MCDisassembler::Fail;
+
+  unsigned Register = CLRMGPRDecoderTable[RegNo];
+  if (Register == 0)
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::createReg(Register));
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus
 DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
                            uint64_t Address, const void *Decoder) {
@@ -911,6 +1174,34 @@ DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo,
   return S;
 }
 
+static DecodeStatus
+DecodeGPRwithZRRegisterClass(MCInst &Inst, unsigned RegNo,
+                             uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  if (RegNo == 15)
+  {
+    Inst.addOperand(MCOperand::createReg(ARM::ZR));
+    return MCDisassembler::Success;
+  }
+
+  if (RegNo == 13)
+    Check(S, MCDisassembler::SoftFail);
+
+  Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
+  return S;
+}
+
+static DecodeStatus
+DecodeGPRwithZRnospRegisterClass(MCInst &Inst, unsigned RegNo,
+                                 uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+  if (RegNo == 13)
+    return MCDisassembler::Fail;
+  Check(S, DecodeGPRwithZRRegisterClass(Inst, RegNo, Address, Decoder));
+  return S;
+}
+
 static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder) {
   if (RegNo > 7)
@@ -1024,9 +1315,9 @@ static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
   const FeatureBitset &featureBits =
     ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
 
-  bool hasD16 = featureBits[ARM::FeatureD16];
+  bool hasD32 = featureBits[ARM::FeatureD32];
 
-  if (RegNo > 31 || (hasD16 && RegNo > 15))
+  if (RegNo > 31 || (!hasD32 && RegNo > 15))
     return MCDisassembler::Fail;
 
   unsigned Register = DPRDecoderTable[RegNo];
@@ -1041,6 +1332,13 @@ static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
   return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
 }
 
+static DecodeStatus DecodeSPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, const void *Decoder) {
+  if (RegNo > 15)
+    return MCDisassembler::Fail;
+  return DecodeSPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
 static DecodeStatus
 DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo,
                             uint64_t Address, const void *Decoder) {
@@ -1111,16 +1409,19 @@ static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
 
 static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
   if (Val == 0xF) return MCDisassembler::Fail;
   // AL predicate is not allowed on Thumb1 branches.
   if (Inst.getOpcode() == ARM::tBcc && Val == 0xE)
     return MCDisassembler::Fail;
+  if (Val != ARMCC::AL && !ARMInsts[Inst.getOpcode()].isPredicable())
+    Check(S, MCDisassembler::SoftFail);
   Inst.addOperand(MCOperand::createImm(Val));
   if (Val == ARMCC::AL) {
     Inst.addOperand(MCOperand::createReg(0));
   } else
     Inst.addOperand(MCOperand::createReg(ARM::CPSR));
-  return MCDisassembler::Success;
+  return S;
 }
 
 static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
@@ -1210,6 +1511,7 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
 
   bool NeedDisjointWriteback = false;
   unsigned WritebackReg = 0;
+  bool CLRM = false;
   switch (Inst.getOpcode()) {
   default:
     break;
@@ -1224,17 +1526,26 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
     NeedDisjointWriteback = true;
     WritebackReg = Inst.getOperand(0).getReg();
     break;
+  case ARM::t2CLRM:
+    CLRM = true;
+    break;
   }
 
   // Empty register lists are not allowed.
   if (Val == 0) return MCDisassembler::Fail;
   for (unsigned i = 0; i < 16; ++i) {
     if (Val & (1 << i)) {
-      if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder)))
-        return MCDisassembler::Fail;
-      // Writeback not allowed if Rn is in the target list.
-      if (NeedDisjointWriteback && WritebackReg == Inst.end()[-1].getReg())
-        Check(S, MCDisassembler::SoftFail);
+      if (CLRM) {
+        if (!Check(S, DecodeCLRMGPRRegisterClass(Inst, i, Address, Decoder))) {
+          return MCDisassembler::Fail;
+        }
+      } else {
+        if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder)))
+          return MCDisassembler::Fail;
+        // Writeback not allowed if Rn is in the target list.
+        if (NeedDisjointWriteback && WritebackReg == Inst.end()[-1].getReg())
+          Check(S, MCDisassembler::SoftFail);
+      }
     }
   }
 
@@ -1327,6 +1638,8 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
   unsigned imm = fieldFromInstruction(Insn, 0, 8);
   unsigned Rn = fieldFromInstruction(Insn, 16, 4);
   unsigned U = fieldFromInstruction(Insn, 23, 1);
+  const FeatureBitset &featureBits =
+    ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
 
   switch (Inst.getOpcode()) {
     case ARM::LDC_OFFSET:
@@ -1361,15 +1674,42 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
     case ARM::t2STCL_PRE:
     case ARM::t2STCL_POST:
     case ARM::t2STCL_OPTION:
-      if (coproc == 0xA || coproc == 0xB)
+    case ARM::t2LDC2_OFFSET:
+    case ARM::t2LDC2L_OFFSET:
+    case ARM::t2LDC2_PRE:
+    case ARM::t2LDC2L_PRE:
+    case ARM::t2STC2_OFFSET:
+    case ARM::t2STC2L_OFFSET:
+    case ARM::t2STC2_PRE:
+    case ARM::t2STC2L_PRE:
+    case ARM::LDC2_OFFSET:
+    case ARM::LDC2L_OFFSET:
+    case ARM::LDC2_PRE:
+    case ARM::LDC2L_PRE:
+    case ARM::STC2_OFFSET:
+    case ARM::STC2L_OFFSET:
+    case ARM::STC2_PRE:
+    case ARM::STC2L_PRE:
+    case ARM::t2LDC2_OPTION:
+    case ARM::t2STC2_OPTION:
+    case ARM::t2LDC2_POST:
+    case ARM::t2LDC2L_POST:
+    case ARM::t2STC2_POST:
+    case ARM::t2STC2L_POST:
+    case ARM::LDC2_POST:
+    case ARM::LDC2L_POST:
+    case ARM::STC2_POST:
+    case ARM::STC2L_POST:
+      if (coproc == 0xA || coproc == 0xB ||
+          (featureBits[ARM::HasV8_1MMainlineOps] &&
+           (coproc == 0x8 || coproc == 0x9 || coproc == 0xA || coproc == 0xB ||
+            coproc == 0xE || coproc == 0xF)))
         return MCDisassembler::Fail;
       break;
     default:
       break;
   }
 
-  const FeatureBitset &featureBits =
-    ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
   if (featureBits[ARM::HasV8Ops] && (coproc != 14))
     return MCDisassembler::Fail;
 
@@ -3150,6 +3490,60 @@ DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn,
   return S;
 }
 
+static DecodeStatus
+DecodeMVEModImmInstruction(MCInst &Inst, unsigned Insn,
+                           uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  unsigned Qd = ((fieldFromInstruction(Insn, 22, 1) << 3) |
+                 fieldFromInstruction(Insn, 13, 3));
+  unsigned cmode = fieldFromInstruction(Insn, 8, 4);
+  unsigned imm = fieldFromInstruction(Insn, 0, 4);
+  imm |= fieldFromInstruction(Insn, 16, 3) << 4;
+  imm |= fieldFromInstruction(Insn, 28, 1) << 7;
+  imm |= cmode                             << 8;
+  imm |= fieldFromInstruction(Insn, 5, 1)  << 12;
+
+  if (cmode == 0xF && Inst.getOpcode() == ARM::MVE_VMVNimmi32)
+    return MCDisassembler::Fail;
+
+  if (!Check(S, DecodeMQPRRegisterClass(Inst, Qd, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::createImm(imm));
+
+  Inst.addOperand(MCOperand::createImm(ARMVCC::None));
+  Inst.addOperand(MCOperand::createReg(0));
+  Inst.addOperand(MCOperand::createImm(0));
+
+  return S;
+}
+
+static DecodeStatus DecodeMVEVADCInstruction(MCInst &Inst, unsigned Insn,
+                               uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  unsigned Qd = fieldFromInstruction(Insn, 13, 3);
+  Qd |= fieldFromInstruction(Insn, 22, 1) << 3;
+  if (!Check(S, DecodeMQPRRegisterClass(Inst, Qd, Address, Decoder)))
+    return MCDisassembler::Fail;
+  Inst.addOperand(MCOperand::createReg(ARM::FPSCR_NZCV));
+
+  unsigned Qn = fieldFromInstruction(Insn, 17, 3);
+  Qn |= fieldFromInstruction(Insn, 7, 1) << 3;
+  if (!Check(S, DecodeMQPRRegisterClass(Inst, Qn, Address, Decoder)))
+    return MCDisassembler::Fail;
+  unsigned Qm = fieldFromInstruction(Insn, 1, 3);
+  Qm |= fieldFromInstruction(Insn, 5, 1) << 3;
+  if (!Check(S, DecodeMQPRRegisterClass(Inst, Qm, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!fieldFromInstruction(Insn, 12, 1)) // I bit clear => need input FPSCR
+    Inst.addOperand(MCOperand::createReg(ARM::FPSCR_NZCV));
+  Inst.addOperand(MCOperand::createImm(Qd));
+
+  return S;
+}
+
 static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn,
                                         uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
@@ -3706,6 +4100,21 @@ static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val, uint64_t Address,
+                                   const void *Decoder) {
+  if (Val == 0)
+    Inst.addOperand(MCOperand::createImm(INT32_MIN));
+  else {
+    int imm = Val & 0x7F;
+
+    if (!(Val & 0x80))
+      imm *= -1;
+    Inst.addOperand(MCOperand::createImm(imm * 4));
+  }
+
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
                                    uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
@@ -3721,6 +4130,22 @@ static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
   return S;
 }
 
+static DecodeStatus DecodeT2AddrModeImm7s4(MCInst &Inst, unsigned Val,
+                                           uint64_t Address,
+                                           const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  unsigned Rn = fieldFromInstruction(Val, 8, 4);
+  unsigned imm = fieldFromInstruction(Val, 0, 8);
+
+  if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeT2Imm7S4(Inst, imm, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  return S;
+}
+
 static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
                                    uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
@@ -3748,8 +4173,23 @@ static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
-                                 uint64_t Address, const void *Decoder) {
+template<int shift>
+static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val,
+                         uint64_t Address, const void *Decoder) {
+  int imm = Val & 0x7F;
+  if (Val == 0)
+    imm = INT32_MIN;
+  else if (!(Val & 0x80))
+    imm *= -1;
+  if (imm != INT32_MIN)
+    imm *= (1U << shift);
+  Inst.addOperand(MCOperand::createImm(imm));
+
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
+                                 uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
   unsigned Rn = fieldFromInstruction(Val, 9, 4);
@@ -3794,6 +4234,42 @@ static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
   return S;
 }
 
+template<int shift>
+static DecodeStatus DecodeTAddrModeImm7(MCInst &Inst, unsigned Val,
+                                         uint64_t Address,
+                                         const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  unsigned Rn = fieldFromInstruction(Val, 8, 3);
+  unsigned imm = fieldFromInstruction(Val, 0, 8);
+
+  if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeT2Imm7<shift>(Inst, imm, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  return S;
+}
+
+template<int shift, int WriteBack>
+static DecodeStatus DecodeT2AddrModeImm7(MCInst &Inst, unsigned Val,
+                                         uint64_t Address,
+                                         const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  unsigned Rn = fieldFromInstruction(Val, 8, 4);
+  unsigned imm = fieldFromInstruction(Val, 0, 8);
+  if (WriteBack) {
+    if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder)))
+      return MCDisassembler::Fail;
+  } else if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeT2Imm7<shift>(Inst, imm, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  return S;
+}
+
 static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
@@ -3941,6 +4417,43 @@ static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
   return S;
 }
 
+static DecodeStatus DecodeMveAddrModeRQ(MCInst &Inst, unsigned Insn,
+                             uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+  unsigned Rn = fieldFromInstruction(Insn, 3, 4);
+  unsigned Qm = fieldFromInstruction(Insn, 0, 3);
+
+  if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeMQPRRegisterClass(Inst, Qm, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  return S;
+}
+
+template<int shift>
+static DecodeStatus DecodeMveAddrModeQ(MCInst &Inst, unsigned Insn,
+                             uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+  unsigned Qm = fieldFromInstruction(Insn, 8, 3);
+  int imm = fieldFromInstruction(Insn, 0, 7);
+
+  if (!Check(S, DecodeMQPRRegisterClass(Inst, Qm, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  if(!fieldFromInstruction(Insn, 7, 1)) {
+    if (imm == 0)
+      imm = INT32_MIN;                 // indicate -0
+    else
+      imm *= -1;
+  }
+  if (imm != INT32_MIN)
+    imm *= (1U << shift);
+  Inst.addOperand(MCOperand::createImm(imm));
+
+  return S;
+}
+
 static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
   // Val is passed in as S:J1:J2:imm10H:imm10L:'0'
@@ -3973,7 +4486,7 @@ static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val,
   const FeatureBitset &featureBits =
     ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
 
-  if (featureBits[ARM::HasV8Ops] && !(Val == 14 || Val == 15))
+  if (!isValidCoprocessorNumber(Val, featureBits))
     return MCDisassembler::Fail;
 
   Inst.addOperand(MCOperand::createImm(Val));
@@ -4981,6 +5494,16 @@ static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn,
   if (mask == 0x0)
     return MCDisassembler::Fail;
 
+  // IT masks are encoded as a sequence of replacement low-order bits
+  // for the condition code. So if the low bit of the starting
+  // condition code is 1, then we have to flip all the bits above the
+  // terminating bit (which is the lowest 1 bit).
+  if (pred & 1) {
+    unsigned LowBit = mask & -mask;
+    unsigned BitsAboveLowBit = 0xF & (-LowBit << 1);
+    mask ^= BitsAboveLowBit;
+  }
+
   Inst.addOperand(MCOperand::createImm(pred));
   Inst.addOperand(MCOperand::createImm(mask));
   return S;
@@ -5341,14 +5864,37 @@ static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val,
       ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rt = fieldFromInstruction(Val, 12, 4);
+  // Add explicit operand for the destination sysreg, for cases where
+  // we have to model it for code generation purposes.
+  switch (Inst.getOpcode()) {
+  case ARM::VMSR_FPSCR_NZCVQC:
+    Inst.addOperand(MCOperand::createReg(ARM::FPSCR_NZCV));
+    break;
+  case ARM::VMSR_P0:
+    Inst.addOperand(MCOperand::createReg(ARM::VPR));
+    break;
+  }
 
-  if (featureBits[ARM::ModeThumb] && !featureBits[ARM::HasV8Ops]) {
-    if (Rt == 13 || Rt == 15)
-      S = MCDisassembler::SoftFail;
-    Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder));
-  } else
-    Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder));
+  if (Inst.getOpcode() != ARM::FMSTAT) {
+    unsigned Rt = fieldFromInstruction(Val, 12, 4);
+
+    if (featureBits[ARM::ModeThumb] && !featureBits[ARM::HasV8Ops]) {
+      if (Rt == 13 || Rt == 15)
+        S = MCDisassembler::SoftFail;
+      Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder));
+    } else
+      Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder));
+  }
+
+  // Add explicit operand for the source sysreg, similarly to above.
+  switch (Inst.getOpcode()) {
+  case ARM::VMRS_FPSCR_NZCVQC:
+    Inst.addOperand(MCOperand::createReg(ARM::FPSCR_NZCV));
+    break;
+  case ARM::VMRS_P0:
+    Inst.addOperand(MCOperand::createReg(ARM::VPR));
+    break;
+  }
 
   if (featureBits[ARM::ModeThumb]) {
     Inst.addOperand(MCOperand::createImm(ARMCC::AL));
@@ -5361,3 +5907,668 @@ static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val,
 
   return S;
 }
+
+template <bool isSigned, bool isNeg, bool zeroPermitted, int size>
+static DecodeStatus DecodeBFLabelOperand(MCInst &Inst, unsigned Val,
+                                         uint64_t Address,
+                                         const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+  if (Val == 0 && !zeroPermitted)
+    S = MCDisassembler::Fail;
+
+  uint64_t DecVal;
+  if (isSigned)
+    DecVal = SignExtend32<size + 1>(Val << 1);
+  else
+    DecVal = (Val << 1);
+
+  if (!tryAddingSymbolicOperand(Address, Address + DecVal + 4, true, 4, Inst,
+                                Decoder))
+    Inst.addOperand(MCOperand::createImm(isNeg ? -DecVal : DecVal));
+  return S;
+}
+
+static DecodeStatus DecodeBFAfterTargetOperand(MCInst &Inst, unsigned Val,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+
+  uint64_t LocImm = Inst.getOperand(0).getImm();
+  Val = LocImm + (2 << Val);
+  if (!tryAddingSymbolicOperand(Address, Address + Val + 4, true, 4, Inst,
+                                Decoder))
+    Inst.addOperand(MCOperand::createImm(Val));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodePredNoALOperand(MCInst &Inst, unsigned Val,
+                                          uint64_t Address,
+                                          const void *Decoder) {
+  if (Val >= ARMCC::AL)  // also exclude the non-condition NV
+    return MCDisassembler::Fail;
+  Inst.addOperand(MCOperand::createImm(Val));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address,
+                                 const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  if (Inst.getOpcode() == ARM::MVE_LCTP)
+    return S;
+
+  unsigned Imm = fieldFromInstruction(Insn, 11, 1) |
+                 fieldFromInstruction(Insn, 1, 10) << 1;
+  switch (Inst.getOpcode()) {
+  case ARM::t2LEUpdate:
+  case ARM::MVE_LETP:
+    Inst.addOperand(MCOperand::createReg(ARM::LR));
+    Inst.addOperand(MCOperand::createReg(ARM::LR));
+    LLVM_FALLTHROUGH;
+  case ARM::t2LE:
+    if (!Check(S, DecodeBFLabelOperand<false, true, true, 11>(
+                   Inst, Imm, Address, Decoder)))
+      return MCDisassembler::Fail;
+    break;
+  case ARM::t2WLS:
+  case ARM::MVE_WLSTP_8:
+  case ARM::MVE_WLSTP_16:
+  case ARM::MVE_WLSTP_32:
+  case ARM::MVE_WLSTP_64:
+    Inst.addOperand(MCOperand::createReg(ARM::LR));
+    if (!Check(S,
+               DecoderGPRRegisterClass(Inst, fieldFromInstruction(Insn, 16, 4),
+                                       Address, Decoder)) ||
+        !Check(S, DecodeBFLabelOperand<false, false, true, 11>(
+                   Inst, Imm, Address, Decoder)))
+      return MCDisassembler::Fail;
+    break;
+  case ARM::t2DLS:
+  case ARM::MVE_DLSTP_8:
+  case ARM::MVE_DLSTP_16:
+  case ARM::MVE_DLSTP_32:
+  case ARM::MVE_DLSTP_64:
+    unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+    if (Rn == 0xF) {
+      // Enforce all the rest of the instruction bits in LCTP, which
+      // won't have been reliably checked based on LCTP's own tablegen
+      // record, because we came to this decode by a roundabout route.
+      uint32_t CanonicalLCTP = 0xF00FE001, SBZMask = 0x00300FFE;
+      if ((Insn & ~SBZMask) != CanonicalLCTP)
+        return MCDisassembler::Fail;   // a mandatory bit is wrong: hard fail
+      if (Insn != CanonicalLCTP)
+        Check(S, MCDisassembler::SoftFail); // an SBZ bit is wrong: soft fail
+
+      Inst.setOpcode(ARM::MVE_LCTP);
+    } else {
+      Inst.addOperand(MCOperand::createReg(ARM::LR));
+      if (!Check(S, DecoderGPRRegisterClass(Inst,
+                                            fieldFromInstruction(Insn, 16, 4),
+                                            Address, Decoder)))
+        return MCDisassembler::Fail;
+    }
+    break;
+  }
+  return S;
+}
+
+static DecodeStatus DecodeLongShiftOperand(MCInst &Inst, unsigned Val,
+                                           uint64_t Address,
+                                           const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  if (Val == 0)
+    Val = 32;
+
+  Inst.addOperand(MCOperand::createImm(Val));
+
+  return S;
+}
+
+static DecodeStatus DecodetGPROddRegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, const void *Decoder) {
+  if ((RegNo) + 1 > 11)
+    return MCDisassembler::Fail;
+
+  unsigned Register = GPRDecoderTable[(RegNo) + 1];
+  Inst.addOperand(MCOperand::createReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, const void *Decoder) {
+  if ((RegNo) > 14)
+    return MCDisassembler::Fail;
+
+  unsigned Register = GPRDecoderTable[(RegNo)];
+  Inst.addOperand(MCOperand::createReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address,
+                                  const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  Inst.addOperand(MCOperand::createImm(ARMCC::AL));
+  Inst.addOperand(MCOperand::createReg(0));
+  if (Inst.getOpcode() == ARM::VSCCLRMD) {
+    unsigned reglist = (fieldFromInstruction(Insn, 1, 7) << 1) |
+                       (fieldFromInstruction(Insn, 12, 4) << 8) |
+                       (fieldFromInstruction(Insn, 22, 1) << 12);
+    if (!Check(S, DecodeDPRRegListOperand(Inst, reglist, Address, Decoder))) {
+      return MCDisassembler::Fail;
+    }
+  } else {
+    unsigned reglist = fieldFromInstruction(Insn, 0, 8) |
+                       (fieldFromInstruction(Insn, 22, 1) << 8) |
+                       (fieldFromInstruction(Insn, 12, 4) << 9);
+    if (!Check(S, DecodeSPRRegListOperand(Inst, reglist, Address, Decoder))) {
+      return MCDisassembler::Fail;
+    }
+  }
+  Inst.addOperand(MCOperand::createReg(ARM::VPR));
+
+  return S;
+}
+
+static DecodeStatus DecodeMQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+                              uint64_t Address,
+                              const void *Decoder) {
+  if (RegNo > 7)
+    return MCDisassembler::Fail;
+
+  unsigned Register = QPRDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Register));
+  return MCDisassembler::Success;
+}
+
+static const uint16_t QQPRDecoderTable[] = {
+     ARM::Q0_Q1,  ARM::Q1_Q2,  ARM::Q2_Q3,  ARM::Q3_Q4,
+     ARM::Q4_Q5,  ARM::Q5_Q6,  ARM::Q6_Q7
+};
+
+static DecodeStatus DecodeQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+                              uint64_t Address,
+                              const void *Decoder) {
+  if (RegNo > 6)
+    return MCDisassembler::Fail;
+
+  unsigned Register = QQPRDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Register));
+  return MCDisassembler::Success;
+}
+
+static const uint16_t QQQQPRDecoderTable[] = {
+     ARM::Q0_Q1_Q2_Q3,  ARM::Q1_Q2_Q3_Q4,  ARM::Q2_Q3_Q4_Q5,
+     ARM::Q3_Q4_Q5_Q6,  ARM::Q4_Q5_Q6_Q7
+};
+
+static DecodeStatus DecodeQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+                              uint64_t Address,
+                              const void *Decoder) {
+  if (RegNo > 4)
+    return MCDisassembler::Fail;
+
+  unsigned Register = QQQQPRDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val,
+                                         uint64_t Address,
+                                         const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  // Parse VPT mask and encode it in the MCInst as an immediate with the same
+  // format as the it_mask.  That is, from the second 'e|t' encode 'e' as 1 and
+  // 't' as 0 and finish with a 1.
+  unsigned Imm = 0;
+  // We always start with a 't'.
+  unsigned CurBit = 0;
+  for (int i = 3; i >= 0; --i) {
+    // If the bit we are looking at is not the same as last one, invert the
+    // CurBit, if it is the same leave it as is.
+    CurBit ^= (Val >> i) & 1U;
+
+    // Encode the CurBit at the right place in the immediate.
+    Imm |= (CurBit << i);
+
+    // If we are done, finish the encoding with a 1.
+    if ((Val & ~(~0U << i)) == 0) {
+      Imm |= 1U << i;
+      break;
+    }
+  }
+
+  Inst.addOperand(MCOperand::createImm(Imm));
+
+  return S;
+}
+
+static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned RegNo,
+                                        uint64_t Address, const void *Decoder) {
+  // The vpred_r operand type includes an MQPR register field derived
+  // from the encoding. But we don't actually want to add an operand
+  // to the MCInst at this stage, because AddThumbPredicate will do it
+  // later, and will infer the register number from the TIED_TO
+  // constraint. So this is a deliberately empty decoder method that
+  // will inhibit the auto-generated disassembly code from adding an
+  // operand at all.
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRestrictedIPredicateOperand(MCInst &Inst,
+                                                      unsigned Val,
+                                                      uint64_t Address,
+                                                      const void *Decoder) {
+  Inst.addOperand(MCOperand::createImm((Val & 0x1) == 0 ? ARMCC::EQ : ARMCC::NE));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRestrictedSPredicateOperand(MCInst &Inst,
+                                                      unsigned Val,
+                                                      uint64_t Address,
+                                                      const void *Decoder) {
+  unsigned Code;
+  switch (Val & 0x3) {
+  case 0:
+    Code = ARMCC::GE;
+    break;
+  case 1:
+    Code = ARMCC::LT;
+    break;
+  case 2:
+    Code = ARMCC::GT;
+    break;
+  case 3:
+    Code = ARMCC::LE;
+    break;
+  }
+  Inst.addOperand(MCOperand::createImm(Code));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRestrictedUPredicateOperand(MCInst &Inst,
+                                                      unsigned Val,
+                                                      uint64_t Address,
+                                                      const void *Decoder) {
+  Inst.addOperand(MCOperand::createImm((Val & 0x1) == 0 ? ARMCC::HS : ARMCC::HI));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRestrictedFPPredicateOperand(MCInst &Inst, unsigned Val,
+                                                     uint64_t Address,
+                                                     const void *Decoder) {
+  unsigned Code;
+  switch (Val) {
+  default:
+    return MCDisassembler::Fail;
+  case 0:
+    Code = ARMCC::EQ;
+    break;
+  case 1:
+    Code = ARMCC::NE;
+    break;
+  case 4:
+    Code = ARMCC::GE;
+    break;
+  case 5:
+    Code = ARMCC::LT;
+    break;
+  case 6:
+    Code = ARMCC::GT;
+    break;
+  case 7:
+    Code = ARMCC::LE;
+    break;
+  }
+
+  Inst.addOperand(MCOperand::createImm(Code));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeVCVTImmOperand(MCInst &Inst, unsigned Val,
+                                         uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  unsigned DecodedVal = 64 - Val;
+
+  switch (Inst.getOpcode()) {
+  case ARM::MVE_VCVTf16s16_fix:
+  case ARM::MVE_VCVTs16f16_fix:
+  case ARM::MVE_VCVTf16u16_fix:
+  case ARM::MVE_VCVTu16f16_fix:
+    if (DecodedVal > 16)
+      return MCDisassembler::Fail;
+    break;
+  case ARM::MVE_VCVTf32s32_fix:
+  case ARM::MVE_VCVTs32f32_fix:
+  case ARM::MVE_VCVTf32u32_fix:
+  case ARM::MVE_VCVTu32f32_fix:
+    if (DecodedVal > 32)
+      return MCDisassembler::Fail;
+    break;
+  }
+
+  Inst.addOperand(MCOperand::createImm(64 - Val));
+
+  return S;
+}
+
+static unsigned FixedRegForVSTRVLDR_SYSREG(unsigned Opcode) {
+  switch (Opcode) {
+  case ARM::VSTR_P0_off:
+  case ARM::VSTR_P0_pre:
+  case ARM::VSTR_P0_post:
+  case ARM::VLDR_P0_off:
+  case ARM::VLDR_P0_pre:
+  case ARM::VLDR_P0_post:
+    return ARM::P0;
+  default:
+    return 0;
+  }
+}
+
+template<bool Writeback>
+static DecodeStatus DecodeVSTRVLDR_SYSREG(MCInst &Inst, unsigned Val,
+                                          uint64_t Address,
+                                          const void *Decoder) {
+  switch (Inst.getOpcode()) {
+  case ARM::VSTR_FPSCR_pre:
+  case ARM::VSTR_FPSCR_NZCVQC_pre:
+  case ARM::VLDR_FPSCR_pre:
+  case ARM::VLDR_FPSCR_NZCVQC_pre:
+  case ARM::VSTR_FPSCR_off:
+  case ARM::VSTR_FPSCR_NZCVQC_off:
+  case ARM::VLDR_FPSCR_off:
+  case ARM::VLDR_FPSCR_NZCVQC_off:
+  case ARM::VSTR_FPSCR_post:
+  case ARM::VSTR_FPSCR_NZCVQC_post:
+  case ARM::VLDR_FPSCR_post:
+  case ARM::VLDR_FPSCR_NZCVQC_post:
+    const FeatureBitset &featureBits =
+        ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
+
+    if (!featureBits[ARM::HasMVEIntegerOps] && !featureBits[ARM::FeatureVFP2])
+      return MCDisassembler::Fail;
+  }
+
+  DecodeStatus S = MCDisassembler::Success;
+  if (unsigned Sysreg = FixedRegForVSTRVLDR_SYSREG(Inst.getOpcode()))
+    Inst.addOperand(MCOperand::createReg(Sysreg));
+  unsigned Rn = fieldFromInstruction(Val, 16, 4);
+  unsigned addr = fieldFromInstruction(Val, 0, 7) |
+                  (fieldFromInstruction(Val, 23, 1) << 7) | (Rn << 8);
+
+  if (Writeback) {
+    if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+      return MCDisassembler::Fail;
+  }
+  if (!Check(S, DecodeT2AddrModeImm7s4(Inst, addr, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::createImm(ARMCC::AL));
+  Inst.addOperand(MCOperand::createReg(0));
+
+  return S;
+}
+
+static inline DecodeStatus DecodeMVE_MEM_pre(
+  MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder,
+  unsigned Rn, OperandDecoder RnDecoder, OperandDecoder AddrDecoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  unsigned Qd = fieldFromInstruction(Val, 13, 3);
+  unsigned addr = fieldFromInstruction(Val, 0, 7) |
+                  (fieldFromInstruction(Val, 23, 1) << 7) | (Rn << 8);
+
+  if (!Check(S, RnDecoder(Inst, Rn, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeMQPRRegisterClass(Inst, Qd, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, AddrDecoder(Inst, addr, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  return S;
+}
+
+template <int shift>
+static DecodeStatus DecodeMVE_MEM_1_pre(MCInst &Inst, unsigned Val,
+                                        uint64_t Address, const void *Decoder) {
+  return DecodeMVE_MEM_pre(Inst, Val, Address, Decoder,
+                           fieldFromInstruction(Val, 16, 3),
+                           DecodetGPRRegisterClass,
+                           DecodeTAddrModeImm7<shift>);
+}
+
+template <int shift>
+static DecodeStatus DecodeMVE_MEM_2_pre(MCInst &Inst, unsigned Val,
+                                        uint64_t Address, const void *Decoder) {
+  return DecodeMVE_MEM_pre(Inst, Val, Address, Decoder,
+                           fieldFromInstruction(Val, 16, 4),
+                           DecoderGPRRegisterClass,
+                           DecodeT2AddrModeImm7<shift,1>);
+}
+
+template <int shift>
+static DecodeStatus DecodeMVE_MEM_3_pre(MCInst &Inst, unsigned Val,
+                                        uint64_t Address, const void *Decoder) {
+  return DecodeMVE_MEM_pre(Inst, Val, Address, Decoder,
+                           fieldFromInstruction(Val, 17, 3),
+                           DecodeMQPRRegisterClass,
+                           DecodeMveAddrModeQ<shift>);
+}
+
+template<unsigned MinLog, unsigned MaxLog>
+static DecodeStatus DecodePowerTwoOperand(MCInst &Inst, unsigned Val,
+                                          uint64_t Address,
+                                          const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  if (Val < MinLog || Val > MaxLog)
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::createImm(1LL << Val));
+  return S;
+}
+
+template <int shift>
+static DecodeStatus DecodeExpandedImmOperand(MCInst &Inst, unsigned Val,
+                                             uint64_t Address,
+                                             const void *Decoder) {
+    Val <<= shift;
+
+    Inst.addOperand(MCOperand::createImm(Val));
+    return MCDisassembler::Success;
+}
+
+template<unsigned start>
+static DecodeStatus DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val,
+                                                    uint64_t Address,
+                                                    const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  Inst.addOperand(MCOperand::createImm(start + Val));
+
+  return S;
+}
+
+static DecodeStatus DecodeMVEVMOVQtoDReg(MCInst &Inst, unsigned Insn,
+                                         uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+  unsigned Rt = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
+  unsigned Qd = ((fieldFromInstruction(Insn, 22, 1) << 3) |
+                 fieldFromInstruction(Insn, 13, 3));
+  unsigned index = fieldFromInstruction(Insn, 4, 1);
+
+  if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeGPRRegisterClass(Inst, Rt2, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeMQPRRegisterClass(Inst, Qd, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeMVEPairVectorIndexOperand<2>(Inst, index, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeMVEPairVectorIndexOperand<0>(Inst, index, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  return S;
+}
+
+static DecodeStatus DecodeMVEVMOVDRegtoQ(MCInst &Inst, unsigned Insn,
+                                         uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+  unsigned Rt = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
+  unsigned Qd = ((fieldFromInstruction(Insn, 22, 1) << 3) |
+                 fieldFromInstruction(Insn, 13, 3));
+  unsigned index = fieldFromInstruction(Insn, 4, 1);
+
+  if (!Check(S, DecodeMQPRRegisterClass(Inst, Qd, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeMQPRRegisterClass(Inst, Qd, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeGPRRegisterClass(Inst, Rt2, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeMVEPairVectorIndexOperand<2>(Inst, index, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeMVEPairVectorIndexOperand<0>(Inst, index, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  return S;
+}
+
+static DecodeStatus DecodeMVEOverlappingLongShift(
+  MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  unsigned RdaLo = fieldFromInstruction(Insn, 17, 3) << 1;
+  unsigned RdaHi = fieldFromInstruction(Insn, 9, 3) << 1;
+  unsigned Rm = fieldFromInstruction(Insn, 12, 4);
+
+  if (RdaHi == 14) {
+    // This value of RdaHi (really indicating pc, because RdaHi has to
+    // be an odd-numbered register, so the low bit will be set by the
+    // decode function below) indicates that we must decode as SQRSHR
+    // or UQRSHL, which both have a single Rda register field with all
+    // four bits.
+    unsigned Rda = fieldFromInstruction(Insn, 16, 4);
+
+    switch (Inst.getOpcode()) {
+      case ARM::MVE_ASRLr:
+      case ARM::MVE_SQRSHRL:
+        Inst.setOpcode(ARM::MVE_SQRSHR);
+        break;
+      case ARM::MVE_LSLLr:
+      case ARM::MVE_UQRSHLL:
+        Inst.setOpcode(ARM::MVE_UQRSHL);
+        break;
+      default:
+        llvm_unreachable("Unexpected starting opcode!");
+    }
+
+    // Rda as output parameter
+    if (!Check(S, DecoderGPRRegisterClass(Inst, Rda, Address, Decoder)))
+      return MCDisassembler::Fail;
+
+    // Rda again as input parameter
+    if (!Check(S, DecoderGPRRegisterClass(Inst, Rda, Address, Decoder)))
+      return MCDisassembler::Fail;
+
+    // Rm, the amount to shift by
+    if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
+      return MCDisassembler::Fail;
+
+    return S;
+  }
+
+  // Otherwise, we decode as whichever opcode our caller has already
+  // put into Inst. Those all look the same:
+
+  // RdaLo,RdaHi as output parameters
+  if (!Check(S, DecodetGPREvenRegisterClass(Inst, RdaLo, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodetGPROddRegisterClass(Inst, RdaHi, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  // RdaLo,RdaHi again as input parameters
+  if (!Check(S, DecodetGPREvenRegisterClass(Inst, RdaLo, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodetGPROddRegisterClass(Inst, RdaHi, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  // Rm, the amount to shift by
+  if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  return S;
+}
+
+static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn, uint64_t Address,
+                                      const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+  unsigned Qd = ((fieldFromInstruction(Insn, 22, 1) << 3) |
+                 fieldFromInstruction(Insn, 13, 3));
+  unsigned Qm = ((fieldFromInstruction(Insn, 5, 1) << 3) |
+                 fieldFromInstruction(Insn, 1, 3));
+  unsigned imm6 = fieldFromInstruction(Insn, 16, 6);
+
+  if (!Check(S, DecodeMQPRRegisterClass(Inst, Qd, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeMQPRRegisterClass(Inst, Qm, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeVCVTImmOperand(Inst, imm6, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  return S;
+}
+
+template<bool scalar, OperandDecoder predicate_decoder>
+static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, uint64_t Address,
+                                  const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+  Inst.addOperand(MCOperand::createReg(ARM::VPR));
+  unsigned Qn = fieldFromInstruction(Insn, 17, 3);
+  if (!Check(S, DecodeMQPRRegisterClass(Inst, Qn, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  unsigned fc;
+
+  if (scalar) {
+    fc = fieldFromInstruction(Insn, 12, 1) << 2 |
+         fieldFromInstruction(Insn, 7, 1) |
+         fieldFromInstruction(Insn, 5, 1) << 1;
+    unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+    if (!Check(S, DecodeGPRwithZRRegisterClass(Inst, Rm, Address, Decoder)))
+      return MCDisassembler::Fail;
+  } else {
+    fc = fieldFromInstruction(Insn, 12, 1) << 2 |
+         fieldFromInstruction(Insn, 7, 1) |
+         fieldFromInstruction(Insn, 0, 1) << 1;
+    unsigned Qm = fieldFromInstruction(Insn, 5, 1) << 4 |
+                  fieldFromInstruction(Insn, 1, 3);
+    if (!Check(S, DecodeMQPRRegisterClass(Inst, Qm, Address, Decoder)))
+      return MCDisassembler::Fail;
+  }
+
+  if (!Check(S, predicate_decoder(Inst, fc, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::createImm(ARMVCC::None));
+  Inst.addOperand(MCOperand::createReg(0));
+  Inst.addOperand(MCOperand::createImm(0));
+
+  return S;
+}
+
+static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address,
+                                  const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+  Inst.addOperand(MCOperand::createReg(ARM::VPR));
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder)))
+    return MCDisassembler::Fail;
+  return S;
+}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
deleted file mode 100644
index 2f84719c4c4f..000000000000
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ /dev/null
@@ -1,1571 +0,0 @@
-//===-- ARMInstPrinter.cpp - Convert ARM MCInst to assembly syntax --------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an ARM MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARMInstPrinter.h"
-#include "Utils/ARMBaseInfo.h"
-#include "MCTargetDesc/ARMAddressingModes.h"
-#include "MCTargetDesc/ARMBaseInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-#define PRINT_ALIAS_INSTR
-#include "ARMGenAsmWriter.inc"
-
-/// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing.
-///
-/// getSORegOffset returns an integer from 0-31, representing '32' as 0.
-static unsigned translateShiftImm(unsigned imm) {
-  // lsr #32 and asr #32 exist, but should be encoded as a 0.
-  assert((imm & ~0x1f) == 0 && "Invalid shift encoding");
-
-  if (imm == 0)
-    return 32;
-  return imm;
-}
-
-/// Prints the shift value with an immediate value.
-static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc,
-                             unsigned ShImm, bool UseMarkup) {
-  if (ShOpc == ARM_AM::no_shift || (ShOpc == ARM_AM::lsl && !ShImm))
-    return;
-  O << ", ";
-
-  assert(!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0");
-  O << getShiftOpcStr(ShOpc);
-
-  if (ShOpc != ARM_AM::rrx) {
-    O << " ";
-    if (UseMarkup)
-      O << "<imm:";
-    O << "#" << translateShiftImm(ShImm);
-    if (UseMarkup)
-      O << ">";
-  }
-}
-
-ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                               const MCRegisterInfo &MRI)
-    : MCInstPrinter(MAI, MII, MRI) {}
-
-void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  OS << markup("<reg:") << getRegisterName(RegNo) << markup(">");
-}
-
-void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                               StringRef Annot, const MCSubtargetInfo &STI) {
-  unsigned Opcode = MI->getOpcode();
-
-  switch (Opcode) {
-  // Check for MOVs and print canonical forms, instead.
-  case ARM::MOVsr: {
-    // FIXME: Thumb variants?
-    const MCOperand &Dst = MI->getOperand(0);
-    const MCOperand &MO1 = MI->getOperand(1);
-    const MCOperand &MO2 = MI->getOperand(2);
-    const MCOperand &MO3 = MI->getOperand(3);
-
-    O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()));
-    printSBitModifierOperand(MI, 6, STI, O);
-    printPredicateOperand(MI, 4, STI, O);
-
-    O << '\t';
-    printRegName(O, Dst.getReg());
-    O << ", ";
-    printRegName(O, MO1.getReg());
-
-    O << ", ";
-    printRegName(O, MO2.getReg());
-    assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
-    printAnnotation(O, Annot);
-    return;
-  }
-
-  case ARM::MOVsi: {
-    // FIXME: Thumb variants?
-    const MCOperand &Dst = MI->getOperand(0);
-    const MCOperand &MO1 = MI->getOperand(1);
-    const MCOperand &MO2 = MI->getOperand(2);
-
-    O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()));
-    printSBitModifierOperand(MI, 5, STI, O);
-    printPredicateOperand(MI, 3, STI, O);
-
-    O << '\t';
-    printRegName(O, Dst.getReg());
-    O << ", ";
-    printRegName(O, MO1.getReg());
-
-    if (ARM_AM::getSORegShOp(MO2.getImm()) == ARM_AM::rrx) {
-      printAnnotation(O, Annot);
-      return;
-    }
-
-    O << ", " << markup("<imm:") << "#"
-      << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())) << markup(">");
-    printAnnotation(O, Annot);
-    return;
-  }
-
-  // A8.6.123 PUSH
-  case ARM::STMDB_UPD:
-  case ARM::t2STMDB_UPD:
-    if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) {
-      // Should only print PUSH if there are at least two registers in the list.
-      O << '\t' << "push";
-      printPredicateOperand(MI, 2, STI, O);
-      if (Opcode == ARM::t2STMDB_UPD)
-        O << ".w";
-      O << '\t';
-      printRegisterList(MI, 4, STI, O);
-      printAnnotation(O, Annot);
-      return;
-    } else
-      break;
-
-  case ARM::STR_PRE_IMM:
-    if (MI->getOperand(2).getReg() == ARM::SP &&
-        MI->getOperand(3).getImm() == -4) {
-      O << '\t' << "push";
-      printPredicateOperand(MI, 4, STI, O);
-      O << "\t{";
-      printRegName(O, MI->getOperand(1).getReg());
-      O << "}";
-      printAnnotation(O, Annot);
-      return;
-    } else
-      break;
-
-  // A8.6.122 POP
-  case ARM::LDMIA_UPD:
-  case ARM::t2LDMIA_UPD:
-    if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) {
-      // Should only print POP if there are at least two registers in the list.
-      O << '\t' << "pop";
-      printPredicateOperand(MI, 2, STI, O);
-      if (Opcode == ARM::t2LDMIA_UPD)
-        O << ".w";
-      O << '\t';
-      printRegisterList(MI, 4, STI, O);
-      printAnnotation(O, Annot);
-      return;
-    } else
-      break;
-
-  case ARM::LDR_POST_IMM:
-    if (MI->getOperand(2).getReg() == ARM::SP &&
-        MI->getOperand(4).getImm() == 4) {
-      O << '\t' << "pop";
-      printPredicateOperand(MI, 5, STI, O);
-      O << "\t{";
-      printRegName(O, MI->getOperand(0).getReg());
-      O << "}";
-      printAnnotation(O, Annot);
-      return;
-    } else
-      break;
-
-  // A8.6.355 VPUSH
-  case ARM::VSTMSDB_UPD:
-  case ARM::VSTMDDB_UPD:
-    if (MI->getOperand(0).getReg() == ARM::SP) {
-      O << '\t' << "vpush";
-      printPredicateOperand(MI, 2, STI, O);
-      O << '\t';
-      printRegisterList(MI, 4, STI, O);
-      printAnnotation(O, Annot);
-      return;
-    } else
-      break;
-
-  // A8.6.354 VPOP
-  case ARM::VLDMSIA_UPD:
-  case ARM::VLDMDIA_UPD:
-    if (MI->getOperand(0).getReg() == ARM::SP) {
-      O << '\t' << "vpop";
-      printPredicateOperand(MI, 2, STI, O);
-      O << '\t';
-      printRegisterList(MI, 4, STI, O);
-      printAnnotation(O, Annot);
-      return;
-    } else
-      break;
-
-  case ARM::tLDMIA: {
-    bool Writeback = true;
-    unsigned BaseReg = MI->getOperand(0).getReg();
-    for (unsigned i = 3; i < MI->getNumOperands(); ++i) {
-      if (MI->getOperand(i).getReg() == BaseReg)
-        Writeback = false;
-    }
-
-    O << "\tldm";
-
-    printPredicateOperand(MI, 1, STI, O);
-    O << '\t';
-    printRegName(O, BaseReg);
-    if (Writeback)
-      O << "!";
-    O << ", ";
-    printRegisterList(MI, 3, STI, O);
-    printAnnotation(O, Annot);
-    return;
-  }
-
-  // Combine 2 GPRs from disassember into a GPRPair to match with instr def.
-  // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
-  // a single GPRPair reg operand is used in the .td file to replace the two
-  // GPRs. However, when decoding them, the two GRPs cannot be automatically
-  // expressed as a GPRPair, so we have to manually merge them.
-  // FIXME: We would really like to be able to tablegen'erate this.
-  case ARM::LDREXD:
-  case ARM::STREXD:
-  case ARM::LDAEXD:
-  case ARM::STLEXD: {
-    const MCRegisterClass &MRC = MRI.getRegClass(ARM::GPRRegClassID);
-    bool isStore = Opcode == ARM::STREXD || Opcode == ARM::STLEXD;
-    unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg();
-    if (MRC.contains(Reg)) {
-      MCInst NewMI;
-      MCOperand NewReg;
-      NewMI.setOpcode(Opcode);
-
-      if (isStore)
-        NewMI.addOperand(MI->getOperand(0));
-      NewReg = MCOperand::createReg(MRI.getMatchingSuperReg(
-          Reg, ARM::gsub_0, &MRI.getRegClass(ARM::GPRPairRegClassID)));
-      NewMI.addOperand(NewReg);
-
-      // Copy the rest operands into NewMI.
-      for (unsigned i = isStore ? 3 : 2; i < MI->getNumOperands(); ++i)
-        NewMI.addOperand(MI->getOperand(i));
-      printInstruction(&NewMI, STI, O);
-      return;
-    }
-    break;
-  }
-  case ARM::TSB:
-  case ARM::t2TSB:
-    O << "\ttsb\tcsync";
-    return;
-  case ARM::t2DSB:
-    switch (MI->getOperand(0).getImm()) {
-    default:
-      if (!printAliasInstr(MI, STI, O))
-        printInstruction(MI, STI, O);
-      break;
-    case 0:
-      O << "\tssbb";
-      break;
-    case 4:
-      O << "\tpssbb";
-      break;
-    }
-    printAnnotation(O, Annot);
-    return;
-  }
-
-  if (!printAliasInstr(MI, STI, O))
-    printInstruction(MI, STI, O);
-
-  printAnnotation(O, Annot);
-}
-
-void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                  const MCSubtargetInfo &STI, raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    unsigned Reg = Op.getReg();
-    printRegName(O, Reg);
-  } else if (Op.isImm()) {
-    O << markup("<imm:") << '#' << formatImm(Op.getImm()) << markup(">");
-  } else {
-    assert(Op.isExpr() && "unknown operand kind in printOperand");
-    const MCExpr *Expr = Op.getExpr();
-    switch (Expr->getKind()) {
-    case MCExpr::Binary:
-      O << '#';
-      Expr->print(O, &MAI);
-      break;
-    case MCExpr::Constant: {
-      // If a symbolic branch target was added as a constant expression then
-      // print that address in hex. And only print 32 unsigned bits for the
-      // address.
-      const MCConstantExpr *Constant = cast<MCConstantExpr>(Expr);
-      int64_t TargetAddress;
-      if (!Constant->evaluateAsAbsolute(TargetAddress)) {
-        O << '#';
-        Expr->print(O, &MAI);
-      } else {
-        O << "0x";
-        O.write_hex(static_cast<uint32_t>(TargetAddress));
-      }
-      break;
-    }
-    default:
-      // FIXME: Should we always treat this as if it is a constant literal and
-      // prefix it with '#'?
-      Expr->print(O, &MAI);
-      break;
-    }
-  }
-}
-
-void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
-                                               const MCSubtargetInfo &STI,
-                                               raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  if (MO1.isExpr()) {
-    MO1.getExpr()->print(O, &MAI);
-    return;
-  }
-
-  O << markup("<mem:") << "[pc, ";
-
-  int32_t OffImm = (int32_t)MO1.getImm();
-  bool isSub = OffImm < 0;
-
-  // Special value for #-0. All others are normal.
-  if (OffImm == INT32_MIN)
-    OffImm = 0;
-  if (isSub) {
-    O << markup("<imm:") << "#-" << formatImm(-OffImm) << markup(">");
-  } else {
-    O << markup("<imm:") << "#" << formatImm(OffImm) << markup(">");
-  }
-  O << "]" << markup(">");
-}
-
-// so_reg is a 4-operand unit corresponding to register forms of the A5.1
-// "Addressing Mode 1 - Data-processing operands" forms.  This includes:
-//    REG 0   0           - e.g. R5
-//    REG REG 0,SH_OPC    - e.g. R5, ROR R3
-//    REG 0   IMM,SH_OPC  - e.g. R5, LSL #3
-void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
-                                          const MCSubtargetInfo &STI,
-                                          raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
-  const MCOperand &MO3 = MI->getOperand(OpNum + 2);
-
-  printRegName(O, MO1.getReg());
-
-  // Print the shift opc.
-  ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
-  O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
-  if (ShOpc == ARM_AM::rrx)
-    return;
-
-  O << ' ';
-  printRegName(O, MO2.getReg());
-  assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
-}
-
-void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum,
-                                          const MCSubtargetInfo &STI,
-                                          raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
-
-  printRegName(O, MO1.getReg());
-
-  // Print the shift opc.
-  printRegImmShift(O, ARM_AM::getSORegShOp(MO2.getImm()),
-                   ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
-}
-
-//===--------------------------------------------------------------------===//
-// Addressing Mode #2
-//===--------------------------------------------------------------------===//
-
-void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
-                                                const MCSubtargetInfo &STI,
-                                                raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op + 1);
-  const MCOperand &MO3 = MI->getOperand(Op + 2);
-
-  O << markup("<mem:") << "[";
-  printRegName(O, MO1.getReg());
-
-  if (!MO2.getReg()) {
-    if (ARM_AM::getAM2Offset(MO3.getImm())) { // Don't print +0.
-      O << ", " << markup("<imm:") << "#"
-        << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
-        << ARM_AM::getAM2Offset(MO3.getImm()) << markup(">");
-    }
-    O << "]" << markup(">");
-    return;
-  }
-
-  O << ", ";
-  O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()));
-  printRegName(O, MO2.getReg());
-
-  printRegImmShift(O, ARM_AM::getAM2ShiftOpc(MO3.getImm()),
-                   ARM_AM::getAM2Offset(MO3.getImm()), UseMarkup);
-  O << "]" << markup(">");
-}
-
-void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op,
-                                      const MCSubtargetInfo &STI,
-                                      raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op + 1);
-  O << markup("<mem:") << "[";
-  printRegName(O, MO1.getReg());
-  O << ", ";
-  printRegName(O, MO2.getReg());
-  O << "]" << markup(">");
-}
-
-void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op,
-                                      const MCSubtargetInfo &STI,
-                                      raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op + 1);
-  O << markup("<mem:") << "[";
-  printRegName(O, MO1.getReg());
-  O << ", ";
-  printRegName(O, MO2.getReg());
-  O << ", lsl " << markup("<imm:") << "#1" << markup(">") << "]" << markup(">");
-}
-
-void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
-                                           const MCSubtargetInfo &STI,
-                                           raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(Op);
-
-  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, Op, STI, O);
-    return;
-  }
-
-#ifndef NDEBUG
-  const MCOperand &MO3 = MI->getOperand(Op + 2);
-  unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm());
-  assert(IdxMode != ARMII::IndexModePost && "Should be pre or offset index op");
-#endif
-
-  printAM2PreOrOffsetIndexOp(MI, Op, STI, O);
-}
-
-void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
-                                                 unsigned OpNum,
-                                                 const MCSubtargetInfo &STI,
-                                                 raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
-
-  if (!MO1.getReg()) {
-    unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
-    O << markup("<imm:") << '#'
-      << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << ImmOffs
-      << markup(">");
-    return;
-  }
-
-  O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()));
-  printRegName(O, MO1.getReg());
-
-  printRegImmShift(O, ARM_AM::getAM2ShiftOpc(MO2.getImm()),
-                   ARM_AM::getAM2Offset(MO2.getImm()), UseMarkup);
-}
-
-//===--------------------------------------------------------------------===//
-// Addressing Mode #3
-//===--------------------------------------------------------------------===//
-
-void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
-                                                raw_ostream &O,
-                                                bool AlwaysPrintImm0) {
-  const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op + 1);
-  const MCOperand &MO3 = MI->getOperand(Op + 2);
-
-  O << markup("<mem:") << '[';
-  printRegName(O, MO1.getReg());
-
-  if (MO2.getReg()) {
-    O << ", " << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()));
-    printRegName(O, MO2.getReg());
-    O << ']' << markup(">");
-    return;
-  }
-
-  // If the op is sub we have to print the immediate even if it is 0
-  unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
-  ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm());
-
-  if (AlwaysPrintImm0 || ImmOffs || (op == ARM_AM::sub)) {
-    O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(op) << ImmOffs
-      << markup(">");
-  }
-  O << ']' << markup(">");
-}
-
-template <bool AlwaysPrintImm0>
-void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
-                                           const MCSubtargetInfo &STI,
-                                           raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(Op);
-  if (!MO1.isReg()) { //  For label symbolic references.
-    printOperand(MI, Op, STI, O);
-    return;
-  }
-
-  assert(ARM_AM::getAM3IdxMode(MI->getOperand(Op + 2).getImm()) !=
-             ARMII::IndexModePost &&
-         "unexpected idxmode");
-  printAM3PreOrOffsetIndexOp(MI, Op, O, AlwaysPrintImm0);
-}
-
-void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
-                                                 unsigned OpNum,
-                                                 const MCSubtargetInfo &STI,
-                                                 raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
-
-  if (MO1.getReg()) {
-    O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()));
-    printRegName(O, MO1.getReg());
-    return;
-  }
-
-  unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
-  O << markup("<imm:") << '#'
-    << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs
-    << markup(">");
-}
-
-void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
-                                             const MCSubtargetInfo &STI,
-                                             raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand(OpNum);
-  unsigned Imm = MO.getImm();
-  O << markup("<imm:") << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff)
-    << markup(">");
-}
-
-void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
-                                            const MCSubtargetInfo &STI,
-                                            raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
-
-  O << (MO2.getImm() ? "" : "-");
-  printRegName(O, MO1.getReg());
-}
-
-void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum,
-                                               const MCSubtargetInfo &STI,
-                                               raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand(OpNum);
-  unsigned Imm = MO.getImm();
-  O << markup("<imm:") << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2)
-    << markup(">");
-}
-
-void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
-                                           const MCSubtargetInfo &STI,
-                                           raw_ostream &O) {
-  ARM_AM::AMSubMode Mode =
-      ARM_AM::getAM4SubMode(MI->getOperand(OpNum).getImm());
-  O << ARM_AM::getAMSubModeStr(Mode);
-}
-
-template <bool AlwaysPrintImm0>
-void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
-                                           const MCSubtargetInfo &STI,
-                                           raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
-
-  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, OpNum, STI, O);
-    return;
-  }
-
-  O << markup("<mem:") << "[";
-  printRegName(O, MO1.getReg());
-
-  unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
-  ARM_AM::AddrOpc Op = ARM_AM::getAM5Op(MO2.getImm());
-  if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
-    O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(Op)
-      << ImmOffs * 4 << markup(">");
-  }
-  O << "]" << markup(">");
-}
-
-template <bool AlwaysPrintImm0>
-void ARMInstPrinter::printAddrMode5FP16Operand(const MCInst *MI, unsigned OpNum,
-                                               const MCSubtargetInfo &STI,
-                                               raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
-
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, OpNum, STI, O);
-    return;
-  }
-
-  O << markup("<mem:") << "[";
-  printRegName(O, MO1.getReg());
-
-  unsigned ImmOffs = ARM_AM::getAM5FP16Offset(MO2.getImm());
-  unsigned Op = ARM_AM::getAM5FP16Op(MO2.getImm());
-  if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
-    O << ", "
-      << markup("<imm:")
-      << "#"
-      << ARM_AM::getAddrOpcStr(ARM_AM::getAM5FP16Op(MO2.getImm()))
-      << ImmOffs * 2
-      << markup(">");
-  }
-  O << "]" << markup(">");
-}
-
-void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
-                                           const MCSubtargetInfo &STI,
-                                           raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
-
-  O << markup("<mem:") << "[";
-  printRegName(O, MO1.getReg());
-  if (MO2.getImm()) {
-    O << ":" << (MO2.getImm() << 3);
-  }
-  O << "]" << markup(">");
-}
-
-void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
-                                           const MCSubtargetInfo &STI,
-                                           raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  O << markup("<mem:") << "[";
-  printRegName(O, MO1.getReg());
-  O << "]" << markup(">");
-}
-
-void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
-                                                 unsigned OpNum,
-                                                 const MCSubtargetInfo &STI,
-                                                 raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand(OpNum);
-  if (MO.getReg() == 0)
-    O << "!";
-  else {
-    O << ", ";
-    printRegName(O, MO.getReg());
-  }
-}
-
-void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
-                                                    unsigned OpNum,
-                                                    const MCSubtargetInfo &STI,
-                                                    raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand(OpNum);
-  uint32_t v = ~MO.getImm();
-  int32_t lsb = countTrailingZeros(v);
-  int32_t width = (32 - countLeadingZeros(v)) - lsb;
-  assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
-  O << markup("<imm:") << '#' << lsb << markup(">") << ", " << markup("<imm:")
-    << '#' << width << markup(">");
-}
-
-void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  unsigned val = MI->getOperand(OpNum).getImm();
-  O << ARM_MB::MemBOptToString(val, STI.getFeatureBits()[ARM::HasV8Ops]);
-}
-
-void ARMInstPrinter::printInstSyncBOption(const MCInst *MI, unsigned OpNum,
-                                          const MCSubtargetInfo &STI,
-                                          raw_ostream &O) {
-  unsigned val = MI->getOperand(OpNum).getImm();
-  O << ARM_ISB::InstSyncBOptToString(val);
-}
-
-void ARMInstPrinter::printTraceSyncBOption(const MCInst *MI, unsigned OpNum,
-                                          const MCSubtargetInfo &STI,
-                                          raw_ostream &O) {
-  unsigned val = MI->getOperand(OpNum).getImm();
-  O << ARM_TSB::TraceSyncBOptToString(val);
-}
-
-void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
-                                          const MCSubtargetInfo &STI,
-                                          raw_ostream &O) {
-  unsigned ShiftOp = MI->getOperand(OpNum).getImm();
-  bool isASR = (ShiftOp & (1 << 5)) != 0;
-  unsigned Amt = ShiftOp & 0x1f;
-  if (isASR) {
-    O << ", asr " << markup("<imm:") << "#" << (Amt == 0 ? 32 : Amt)
-      << markup(">");
-  } else if (Amt) {
-    O << ", lsl " << markup("<imm:") << "#" << Amt << markup(">");
-  }
-}
-
-void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
-                                         const MCSubtargetInfo &STI,
-                                         raw_ostream &O) {
-  unsigned Imm = MI->getOperand(OpNum).getImm();
-  if (Imm == 0)
-    return;
-  assert(Imm > 0 && Imm < 32 && "Invalid PKH shift immediate value!");
-  O << ", lsl " << markup("<imm:") << "#" << Imm << markup(">");
-}
-
-void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
-                                         const MCSubtargetInfo &STI,
-                                         raw_ostream &O) {
-  unsigned Imm = MI->getOperand(OpNum).getImm();
-  // A shift amount of 32 is encoded as 0.
-  if (Imm == 0)
-    Imm = 32;
-  assert(Imm > 0 && Imm <= 32 && "Invalid PKH shift immediate value!");
-  O << ", asr " << markup("<imm:") << "#" << Imm << markup(">");
-}
-
-void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
-                                       const MCSubtargetInfo &STI,
-                                       raw_ostream &O) {
-  assert(std::is_sorted(MI->begin() + OpNum, MI->end(),
-                        [&](const MCOperand &LHS, const MCOperand &RHS) {
-                          return MRI.getEncodingValue(LHS.getReg()) <
-                                 MRI.getEncodingValue(RHS.getReg());
-                        }));
-
-  O << "{";
-  for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
-    if (i != OpNum)
-      O << ", ";
-    printRegName(O, MI->getOperand(i).getReg());
-  }
-  O << "}";
-}
-
-void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum,
-                                         const MCSubtargetInfo &STI,
-                                         raw_ostream &O) {
-  unsigned Reg = MI->getOperand(OpNum).getReg();
-  printRegName(O, MRI.getSubReg(Reg, ARM::gsub_0));
-  O << ", ";
-  printRegName(O, MRI.getSubReg(Reg, ARM::gsub_1));
-}
-
-void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNum);
-  if (Op.getImm())
-    O << "be";
-  else
-    O << "le";
-}
-
-void ARMInstPrinter::printCPSIMod(const MCInst *MI, unsigned OpNum,
-                                  const MCSubtargetInfo &STI, raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNum);
-  O << ARM_PROC::IModToString(Op.getImm());
-}
-
-void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
-                                   const MCSubtargetInfo &STI, raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNum);
-  unsigned IFlags = Op.getImm();
-  for (int i = 2; i >= 0; --i)
-    if (IFlags & (1 << i))
-      O << ARM_PROC::IFlagsToString(1 << i);
-
-  if (IFlags == 0)
-    O << "none";
-}
-
-void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
-                                         const MCSubtargetInfo &STI,
-                                         raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNum);
-  const FeatureBitset &FeatureBits = STI.getFeatureBits();
-  if (FeatureBits[ARM::FeatureMClass]) {
-
-    unsigned SYSm = Op.getImm() & 0xFFF; // 12-bit SYSm
-    unsigned Opcode = MI->getOpcode();
-
-    // For writes, handle extended mask bits if the DSP extension is present.
-    if (Opcode == ARM::t2MSR_M && FeatureBits[ARM::FeatureDSP]) {
-      auto TheReg =ARMSysReg::lookupMClassSysRegBy12bitSYSmValue(SYSm);
-      if (TheReg && TheReg->isInRequiredFeatures({ARM::FeatureDSP})) {
-          O << TheReg->Name;
-          return;
-      }
-    }
-
-    // Handle the basic 8-bit mask.
-    SYSm &= 0xff;
-    if (Opcode == ARM::t2MSR_M && FeatureBits [ARM::HasV7Ops]) {
-      // ARMv7-M deprecates using MSR APSR without a _<bits> qualifier as an
-      // alias for MSR APSR_nzcvq.
-      auto TheReg = ARMSysReg::lookupMClassSysRegAPSRNonDeprecated(SYSm);
-      if (TheReg) {
-          O << TheReg->Name;
-          return;
-      }
-    }
-
-    auto TheReg = ARMSysReg::lookupMClassSysRegBy8bitSYSmValue(SYSm);
-    if (TheReg) {
-      O << TheReg->Name;
-      return;
-    }
-
-    O << SYSm;
-
-    return;
-  }
-
-  // As special cases, CPSR_f, CPSR_s and CPSR_fs prefer printing as
-  // APSR_nzcvq, APSR_g and APSRnzcvqg, respectively.
-  unsigned SpecRegRBit = Op.getImm() >> 4;
-  unsigned Mask = Op.getImm() & 0xf;
-
-  if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) {
-    O << "APSR_";
-    switch (Mask) {
-    default:
-      llvm_unreachable("Unexpected mask value!");
-    case 4:
-      O << "g";
-      return;
-    case 8:
-      O << "nzcvq";
-      return;
-    case 12:
-      O << "nzcvqg";
-      return;
-    }
-  }
-
-  if (SpecRegRBit)
-    O << "SPSR";
-  else
-    O << "CPSR";
-
-  if (Mask) {
-    O << '_';
-    if (Mask & 8)
-      O << 'f';
-    if (Mask & 4)
-      O << 's';
-    if (Mask & 2)
-      O << 'x';
-    if (Mask & 1)
-      O << 'c';
-  }
-}
-
-void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum,
-                                           const MCSubtargetInfo &STI,
-                                           raw_ostream &O) {
-  uint32_t Banked = MI->getOperand(OpNum).getImm();
-  auto TheReg = ARMBankedReg::lookupBankedRegByEncoding(Banked);
-  assert(TheReg && "invalid banked register operand");
-  std::string Name = TheReg->Name;
-
-  uint32_t isSPSR = (Banked & 0x20) >> 5;
-  if (isSPSR)
-    Name.replace(0, 4, "SPSR"); // convert 'spsr_' to 'SPSR_'
-  O << Name;
-}
-
-void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
-                                           const MCSubtargetInfo &STI,
-                                           raw_ostream &O) {
-  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
-  // Handle the undefined 15 CC value here for printing so we don't abort().
-  if ((unsigned)CC == 15)
-    O << "<und>";
-  else if (CC != ARMCC::AL)
-    O << ARMCondCodeToString(CC);
-}
-
-void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
-                                                    unsigned OpNum,
-                                                    const MCSubtargetInfo &STI,
-                                                    raw_ostream &O) {
-  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
-  O << ARMCondCodeToString(CC);
-}
-
-void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
-                                              const MCSubtargetInfo &STI,
-                                              raw_ostream &O) {
-  if (MI->getOperand(OpNum).getReg()) {
-    assert(MI->getOperand(OpNum).getReg() == ARM::CPSR &&
-           "Expect ARM CPSR register!");
-    O << 's';
-  }
-}
-
-void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum,
-                                          const MCSubtargetInfo &STI,
-                                          raw_ostream &O) {
-  O << MI->getOperand(OpNum).getImm();
-}
-
-void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  O << "p" << MI->getOperand(OpNum).getImm();
-}
-
-void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  O << "c" << MI->getOperand(OpNum).getImm();
-}
-
-void ARMInstPrinter::printCoprocOptionImm(const MCInst *MI, unsigned OpNum,
-                                          const MCSubtargetInfo &STI,
-                                          raw_ostream &O) {
-  O << "{" << MI->getOperand(OpNum).getImm() << "}";
-}
-
-void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
-                                  const MCSubtargetInfo &STI, raw_ostream &O) {
-  llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
-}
-
-template <unsigned scale>
-void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
-                                          const MCSubtargetInfo &STI,
-                                          raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand(OpNum);
-
-  if (MO.isExpr()) {
-    MO.getExpr()->print(O, &MAI);
-    return;
-  }
-
-  int32_t OffImm = (int32_t)MO.getImm() << scale;
-
-  O << markup("<imm:");
-  if (OffImm == INT32_MIN)
-    O << "#-0";
-  else if (OffImm < 0)
-    O << "#-" << -OffImm;
-  else
-    O << "#" << OffImm;
-  O << markup(">");
-}
-
-void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
-                                            const MCSubtargetInfo &STI,
-                                            raw_ostream &O) {
-  O << markup("<imm:") << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4)
-    << markup(">");
-}
-
-void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  unsigned Imm = MI->getOperand(OpNum).getImm();
-  O << markup("<imm:") << "#" << formatImm((Imm == 0 ? 32 : Imm))
-    << markup(">");
-}
-
-void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
-                                      const MCSubtargetInfo &STI,
-                                      raw_ostream &O) {
-  // (3 - the number of trailing zeros) is the number of then / else.
-  unsigned Mask = MI->getOperand(OpNum).getImm();
-  unsigned Firstcond = MI->getOperand(OpNum - 1).getImm();
-  unsigned CondBit0 = Firstcond & 1;
-  unsigned NumTZ = countTrailingZeros(Mask);
-  assert(NumTZ <= 3 && "Invalid IT mask!");
-  for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
-    bool T = ((Mask >> Pos) & 1) == CondBit0;
-    if (T)
-      O << 't';
-    else
-      O << 'e';
-  }
-}
-
-void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
-                                                 const MCSubtargetInfo &STI,
-                                                 raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op + 1);
-
-  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, Op, STI, O);
-    return;
-  }
-
-  O << markup("<mem:") << "[";
-  printRegName(O, MO1.getReg());
-  if (unsigned RegNum = MO2.getReg()) {
-    O << ", ";
-    printRegName(O, RegNum);
-  }
-  O << "]" << markup(">");
-}
-
-void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
-                                                    unsigned Op,
-                                                    const MCSubtargetInfo &STI,
-                                                    raw_ostream &O,
-                                                    unsigned Scale) {
-  const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op + 1);
-
-  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, Op, STI, O);
-    return;
-  }
-
-  O << markup("<mem:") << "[";
-  printRegName(O, MO1.getReg());
-  if (unsigned ImmOffs = MO2.getImm()) {
-    O << ", " << markup("<imm:") << "#" << formatImm(ImmOffs * Scale)
-      << markup(">");
-  }
-  O << "]" << markup(">");
-}
-
-void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI,
-                                                     unsigned Op,
-                                                     const MCSubtargetInfo &STI,
-                                                     raw_ostream &O) {
-  printThumbAddrModeImm5SOperand(MI, Op, STI, O, 1);
-}
-
-void ARMInstPrinter::printThumbAddrModeImm5S2Operand(const MCInst *MI,
-                                                     unsigned Op,
-                                                     const MCSubtargetInfo &STI,
-                                                     raw_ostream &O) {
-  printThumbAddrModeImm5SOperand(MI, Op, STI, O, 2);
-}
-
-void ARMInstPrinter::printThumbAddrModeImm5S4Operand(const MCInst *MI,
-                                                     unsigned Op,
-                                                     const MCSubtargetInfo &STI,
-                                                     raw_ostream &O) {
-  printThumbAddrModeImm5SOperand(MI, Op, STI, O, 4);
-}
-
-void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op,
-                                                 const MCSubtargetInfo &STI,
-                                                 raw_ostream &O) {
-  printThumbAddrModeImm5SOperand(MI, Op, STI, O, 4);
-}
-
-// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
-// register with shift forms.
-// REG 0   0           - e.g. R5
-// REG IMM, SH_OPC     - e.g. R5, LSL #3
-void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
-                                      const MCSubtargetInfo &STI,
-                                      raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
-
-  unsigned Reg = MO1.getReg();
-  printRegName(O, Reg);
-
-  // Print the shift opc.
-  assert(MO2.isImm() && "Not a valid t2_so_reg value!");
-  printRegImmShift(O, ARM_AM::getSORegShOp(MO2.getImm()),
-                   ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
-}
-
-template <bool AlwaysPrintImm0>
-void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
-                                               const MCSubtargetInfo &STI,
-                                               raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
-
-  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, OpNum, STI, O);
-    return;
-  }
-
-  O << markup("<mem:") << "[";
-  printRegName(O, MO1.getReg());
-
-  int32_t OffImm = (int32_t)MO2.getImm();
-  bool isSub = OffImm < 0;
-  // Special value for #-0. All others are normal.
-  if (OffImm == INT32_MIN)
-    OffImm = 0;
-  if (isSub) {
-    O << ", " << markup("<imm:") << "#-" << formatImm(-OffImm) << markup(">");
-  } else if (AlwaysPrintImm0 || OffImm > 0) {
-    O << ", " << markup("<imm:") << "#" << formatImm(OffImm) << markup(">");
-  }
-  O << "]" << markup(">");
-}
-
-template <bool AlwaysPrintImm0>
-void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
-                                                unsigned OpNum,
-                                                const MCSubtargetInfo &STI,
-                                                raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
-
-  O << markup("<mem:") << "[";
-  printRegName(O, MO1.getReg());
-
-  int32_t OffImm = (int32_t)MO2.getImm();
-  bool isSub = OffImm < 0;
-  // Don't print +0.
-  if (OffImm == INT32_MIN)
-    OffImm = 0;
-  if (isSub) {
-    O << ", " << markup("<imm:") << "#-" << -OffImm << markup(">");
-  } else if (AlwaysPrintImm0 || OffImm > 0) {
-    O << ", " << markup("<imm:") << "#" << OffImm << markup(">");
-  }
-  O << "]" << markup(">");
-}
-
-template <bool AlwaysPrintImm0>
-void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
-                                                  unsigned OpNum,
-                                                  const MCSubtargetInfo &STI,
-                                                  raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
-
-  if (!MO1.isReg()) { //  For label symbolic references.
-    printOperand(MI, OpNum, STI, O);
-    return;
-  }
-
-  O << markup("<mem:") << "[";
-  printRegName(O, MO1.getReg());
-
-  int32_t OffImm = (int32_t)MO2.getImm();
-  bool isSub = OffImm < 0;
-
-  assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
-
-  // Don't print +0.
-  if (OffImm == INT32_MIN)
-    OffImm = 0;
-  if (isSub) {
-    O << ", " << markup("<imm:") << "#-" << -OffImm << markup(">");
-  } else if (AlwaysPrintImm0 || OffImm > 0) {
-    O << ", " << markup("<imm:") << "#" << OffImm << markup(">");
-  }
-  O << "]" << markup(">");
-}
-
-void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(
-    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
-    raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
-
-  O << markup("<mem:") << "[";
-  printRegName(O, MO1.getReg());
-  if (MO2.getImm()) {
-    O << ", " << markup("<imm:") << "#" << formatImm(MO2.getImm() * 4)
-      << markup(">");
-  }
-  O << "]" << markup(">");
-}
-
-void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(
-    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
-    raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  int32_t OffImm = (int32_t)MO1.getImm();
-  O << ", " << markup("<imm:");
-  if (OffImm == INT32_MIN)
-    O << "#-0";
-  else if (OffImm < 0)
-    O << "#-" << -OffImm;
-  else
-    O << "#" << OffImm;
-  O << markup(">");
-}
-
-void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(
-    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
-    raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  int32_t OffImm = (int32_t)MO1.getImm();
-
-  assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
-
-  O << ", " << markup("<imm:");
-  if (OffImm == INT32_MIN)
-    O << "#-0";
-  else if (OffImm < 0)
-    O << "#-" << -OffImm;
-  else
-    O << "#" << OffImm;
-  O << markup(">");
-}
-
-void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
-                                                 unsigned OpNum,
-                                                 const MCSubtargetInfo &STI,
-                                                 raw_ostream &O) {
-  const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
-  const MCOperand &MO3 = MI->getOperand(OpNum + 2);
-
-  O << markup("<mem:") << "[";
-  printRegName(O, MO1.getReg());
-
-  assert(MO2.getReg() && "Invalid so_reg load / store address!");
-  O << ", ";
-  printRegName(O, MO2.getReg());
-
-  unsigned ShAmt = MO3.getImm();
-  if (ShAmt) {
-    assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
-    O << ", lsl " << markup("<imm:") << "#" << ShAmt << markup(">");
-  }
-  O << "]" << markup(">");
-}
-
-void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
-                                       const MCSubtargetInfo &STI,
-                                       raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand(OpNum);
-  O << markup("<imm:") << '#' << ARM_AM::getFPImmFloat(MO.getImm())
-    << markup(">");
-}
-
-void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
-                                            const MCSubtargetInfo &STI,
-                                            raw_ostream &O) {
-  unsigned EncodedImm = MI->getOperand(OpNum).getImm();
-  unsigned EltBits;
-  uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
-  O << markup("<imm:") << "#0x";
-  O.write_hex(Val);
-  O << markup(">");
-}
-
-void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
-                                            const MCSubtargetInfo &STI,
-                                            raw_ostream &O) {
-  unsigned Imm = MI->getOperand(OpNum).getImm();
-  O << markup("<imm:") << "#" << formatImm(Imm + 1) << markup(">");
-}
-
-void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O) {
-  unsigned Imm = MI->getOperand(OpNum).getImm();
-  if (Imm == 0)
-    return;
-  assert(Imm <= 3 && "illegal ror immediate!");
-  O << ", ror " << markup("<imm:") << "#" << 8 * Imm << markup(">");
-}
-
-void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O) {
-  MCOperand Op = MI->getOperand(OpNum);
-
-  // Support for fixups (MCFixup)
-  if (Op.isExpr())
-    return printOperand(MI, OpNum, STI, O);
-
-  unsigned Bits = Op.getImm() & 0xFF;
-  unsigned Rot = (Op.getImm() & 0xF00) >> 7;
-
-  bool PrintUnsigned = false;
-  switch (MI->getOpcode()) {
-  case ARM::MOVi:
-    // Movs to PC should be treated unsigned
-    PrintUnsigned = (MI->getOperand(OpNum - 1).getReg() == ARM::PC);
-    break;
-  case ARM::MSRi:
-    // Movs to special registers should be treated unsigned
-    PrintUnsigned = true;
-    break;
-  }
-
-  int32_t Rotated = ARM_AM::rotr32(Bits, Rot);
-  if (ARM_AM::getSOImmVal(Rotated) == Op.getImm()) {
-    // #rot has the least possible value
-    O << "#" << markup("<imm:");
-    if (PrintUnsigned)
-      O << static_cast<uint32_t>(Rotated);
-    else
-      O << Rotated;
-    O << markup(">");
-    return;
-  }
-
-  // Explicit #bits, #rot implied
-  O << "#" << markup("<imm:") << Bits << markup(">") << ", #" << markup("<imm:")
-    << Rot << markup(">");
-}
-
-void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum,
-                                  const MCSubtargetInfo &STI, raw_ostream &O) {
-  O << markup("<imm:") << "#" << 16 - MI->getOperand(OpNum).getImm()
-    << markup(">");
-}
-
-void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum,
-                                  const MCSubtargetInfo &STI, raw_ostream &O) {
-  O << markup("<imm:") << "#" << 32 - MI->getOperand(OpNum).getImm()
-    << markup(">");
-}
-
-void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
-                                      const MCSubtargetInfo &STI,
-                                      raw_ostream &O) {
-  O << "[" << MI->getOperand(OpNum).getImm() << "]";
-}
-
-void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O) {
-  O << "{";
-  printRegName(O, MI->getOperand(OpNum).getReg());
-  O << "}";
-}
-
-void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O) {
-  unsigned Reg = MI->getOperand(OpNum).getReg();
-  unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
-  unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
-  O << "{";
-  printRegName(O, Reg0);
-  O << ", ";
-  printRegName(O, Reg1);
-  O << "}";
-}
-
-void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
-                                              const MCSubtargetInfo &STI,
-                                              raw_ostream &O) {
-  unsigned Reg = MI->getOperand(OpNum).getReg();
-  unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
-  unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
-  O << "{";
-  printRegName(O, Reg0);
-  O << ", ";
-  printRegName(O, Reg1);
-  O << "}";
-}
-
-void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum,
-                                          const MCSubtargetInfo &STI,
-                                          raw_ostream &O) {
-  // Normally, it's not safe to use register enum values directly with
-  // addition to get the next register, but for VFP registers, the
-  // sort order is guaranteed because they're all of the form D<n>.
-  O << "{";
-  printRegName(O, MI->getOperand(OpNum).getReg());
-  O << ", ";
-  printRegName(O, MI->getOperand(OpNum).getReg() + 1);
-  O << ", ";
-  printRegName(O, MI->getOperand(OpNum).getReg() + 2);
-  O << "}";
-}
-
-void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
-                                         const MCSubtargetInfo &STI,
-                                         raw_ostream &O) {
-  // Normally, it's not safe to use register enum values directly with
-  // addition to get the next register, but for VFP registers, the
-  // sort order is guaranteed because they're all of the form D<n>.
-  O << "{";
-  printRegName(O, MI->getOperand(OpNum).getReg());
-  O << ", ";
-  printRegName(O, MI->getOperand(OpNum).getReg() + 1);
-  O << ", ";
-  printRegName(O, MI->getOperand(OpNum).getReg() + 2);
-  O << ", ";
-  printRegName(O, MI->getOperand(OpNum).getReg() + 3);
-  O << "}";
-}
-
-void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
-                                                unsigned OpNum,
-                                                const MCSubtargetInfo &STI,
-                                                raw_ostream &O) {
-  O << "{";
-  printRegName(O, MI->getOperand(OpNum).getReg());
-  O << "[]}";
-}
-
-void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
-                                                unsigned OpNum,
-                                                const MCSubtargetInfo &STI,
-                                                raw_ostream &O) {
-  unsigned Reg = MI->getOperand(OpNum).getReg();
-  unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
-  unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
-  O << "{";
-  printRegName(O, Reg0);
-  O << "[], ";
-  printRegName(O, Reg1);
-  O << "[]}";
-}
-
-void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
-                                                  unsigned OpNum,
-                                                  const MCSubtargetInfo &STI,
-                                                  raw_ostream &O) {
-  // Normally, it's not safe to use register enum values directly with
-  // addition to get the next register, but for VFP registers, the
-  // sort order is guaranteed because they're all of the form D<n>.
-  O << "{";
-  printRegName(O, MI->getOperand(OpNum).getReg());
-  O << "[], ";
-  printRegName(O, MI->getOperand(OpNum).getReg() + 1);
-  O << "[], ";
-  printRegName(O, MI->getOperand(OpNum).getReg() + 2);
-  O << "[]}";
-}
-
-void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
-                                                 unsigned OpNum,
-                                                 const MCSubtargetInfo &STI,
-                                                 raw_ostream &O) {
-  // Normally, it's not safe to use register enum values directly with
-  // addition to get the next register, but for VFP registers, the
-  // sort order is guaranteed because they're all of the form D<n>.
-  O << "{";
-  printRegName(O, MI->getOperand(OpNum).getReg());
-  O << "[], ";
-  printRegName(O, MI->getOperand(OpNum).getReg() + 1);
-  O << "[], ";
-  printRegName(O, MI->getOperand(OpNum).getReg() + 2);
-  O << "[], ";
-  printRegName(O, MI->getOperand(OpNum).getReg() + 3);
-  O << "[]}";
-}
-
-void ARMInstPrinter::printVectorListTwoSpacedAllLanes(
-    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
-    raw_ostream &O) {
-  unsigned Reg = MI->getOperand(OpNum).getReg();
-  unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
-  unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
-  O << "{";
-  printRegName(O, Reg0);
-  O << "[], ";
-  printRegName(O, Reg1);
-  O << "[]}";
-}
-
-void ARMInstPrinter::printVectorListThreeSpacedAllLanes(
-    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
-    raw_ostream &O) {
-  // Normally, it's not safe to use register enum values directly with
-  // addition to get the next register, but for VFP registers, the
-  // sort order is guaranteed because they're all of the form D<n>.
-  O << "{";
-  printRegName(O, MI->getOperand(OpNum).getReg());
-  O << "[], ";
-  printRegName(O, MI->getOperand(OpNum).getReg() + 2);
-  O << "[], ";
-  printRegName(O, MI->getOperand(OpNum).getReg() + 4);
-  O << "[]}";
-}
-
-void ARMInstPrinter::printVectorListFourSpacedAllLanes(
-    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
-    raw_ostream &O) {
-  // Normally, it's not safe to use register enum values directly with
-  // addition to get the next register, but for VFP registers, the
-  // sort order is guaranteed because they're all of the form D<n>.
-  O << "{";
-  printRegName(O, MI->getOperand(OpNum).getReg());
-  O << "[], ";
-  printRegName(O, MI->getOperand(OpNum).getReg() + 2);
-  O << "[], ";
-  printRegName(O, MI->getOperand(OpNum).getReg() + 4);
-  O << "[], ";
-  printRegName(O, MI->getOperand(OpNum).getReg() + 6);
-  O << "[]}";
-}
-
-void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI,
-                                                unsigned OpNum,
-                                                const MCSubtargetInfo &STI,
-                                                raw_ostream &O) {
-  // Normally, it's not safe to use register enum values directly with
-  // addition to get the next register, but for VFP registers, the
-  // sort order is guaranteed because they're all of the form D<n>.
-  O << "{";
-  printRegName(O, MI->getOperand(OpNum).getReg());
-  O << ", ";
-  printRegName(O, MI->getOperand(OpNum).getReg() + 2);
-  O << ", ";
-  printRegName(O, MI->getOperand(OpNum).getReg() + 4);
-  O << "}";
-}
-
-void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
-                                               const MCSubtargetInfo &STI,
-                                               raw_ostream &O) {
-  // Normally, it's not safe to use register enum values directly with
-  // addition to get the next register, but for VFP registers, the
-  // sort order is guaranteed because they're all of the form D<n>.
-  O << "{";
-  printRegName(O, MI->getOperand(OpNum).getReg());
-  O << ", ";
-  printRegName(O, MI->getOperand(OpNum).getReg() + 2);
-  O << ", ";
-  printRegName(O, MI->getOperand(OpNum).getReg() + 4);
-  O << ", ";
-  printRegName(O, MI->getOperand(OpNum).getReg() + 6);
-  O << "}";
-}
-
-template<int64_t Angle, int64_t Remainder>
-void ARMInstPrinter::printComplexRotationOp(const MCInst *MI, unsigned OpNo,
-                                            const MCSubtargetInfo &STI,
-                                            raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNo).getImm();
-  O << "#" << (Val * Angle) + Remainder;
-}
-
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
deleted file mode 100644
index afc8515136bc..000000000000
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ /dev/null
@@ -1,243 +0,0 @@
-//===- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an ARM MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
-#define LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
-
-#include "llvm/MC/MCInstPrinter.h"
-
-namespace llvm {
-
-class ARMInstPrinter : public MCInstPrinter {
-public:
-  ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                 const MCRegisterInfo &MRI);
-
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
-  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-
-  // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
-                        raw_ostream &O);
-  virtual bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
-                               raw_ostream &O);
-  virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
-                                       unsigned PrintMethodIdx,
-                                       const MCSubtargetInfo &STI,
-                                       raw_ostream &O);
-  static const char *getRegisterName(unsigned RegNo);
-
-  void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                    raw_ostream &O);
-
-  void printSORegRegOperand(const MCInst *MI, unsigned OpNum,
-                            const MCSubtargetInfo &STI, raw_ostream &O);
-  void printSORegImmOperand(const MCInst *MI, unsigned OpNum,
-                            const MCSubtargetInfo &STI, raw_ostream &O);
-
-  void printAddrModeTBB(const MCInst *MI, unsigned OpNum,
-                        const MCSubtargetInfo &STI, raw_ostream &O);
-  void printAddrModeTBH(const MCInst *MI, unsigned OpNum,
-                        const MCSubtargetInfo &STI, raw_ostream &O);
-  void printAddrMode2Operand(const MCInst *MI, unsigned OpNum,
-                             const MCSubtargetInfo &STI, raw_ostream &O);
-  void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum,
-                           const MCSubtargetInfo &STI, raw_ostream &O);
-  void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
-                                  const MCSubtargetInfo &STI, raw_ostream &O);
-  void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
-                                   const MCSubtargetInfo &STI, raw_ostream &O);
-  template <bool AlwaysPrintImm0>
-  void printAddrMode3Operand(const MCInst *MI, unsigned OpNum,
-                             const MCSubtargetInfo &STI, raw_ostream &O);
-  void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
-                                   const MCSubtargetInfo &STI, raw_ostream &O);
-  void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O,
-                                  bool AlwaysPrintImm0);
-  void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
-                               const MCSubtargetInfo &STI, raw_ostream &O);
-  void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
-                              const MCSubtargetInfo &STI, raw_ostream &O);
-  void printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum,
-                                 const MCSubtargetInfo &STI, raw_ostream &O);
-
-  void printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
-                             const MCSubtargetInfo &STI, raw_ostream &O);
-  template <bool AlwaysPrintImm0>
-  void printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
-                             const MCSubtargetInfo &STI, raw_ostream &O);
-  template <bool AlwaysPrintImm0>
-  void printAddrMode5FP16Operand(const MCInst *MI, unsigned OpNum,
-                                 const MCSubtargetInfo &STI, raw_ostream &O);
-  void printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
-                             const MCSubtargetInfo &STI, raw_ostream &O);
-  void printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
-                             const MCSubtargetInfo &STI, raw_ostream &O);
-  void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum,
-                                   const MCSubtargetInfo &STI, raw_ostream &O);
-
-  void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
-                                      const MCSubtargetInfo &STI,
-                                      raw_ostream &O);
-  void printMemBOption(const MCInst *MI, unsigned OpNum,
-                       const MCSubtargetInfo &STI, raw_ostream &O);
-  void printInstSyncBOption(const MCInst *MI, unsigned OpNum,
-                            const MCSubtargetInfo &STI, raw_ostream &O);
-  void printTraceSyncBOption(const MCInst *MI, unsigned OpNum,
-                             const MCSubtargetInfo &STI, raw_ostream &O);
-  void printShiftImmOperand(const MCInst *MI, unsigned OpNum,
-                            const MCSubtargetInfo &STI, raw_ostream &O);
-  void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
-                           const MCSubtargetInfo &STI, raw_ostream &O);
-  void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
-                           const MCSubtargetInfo &STI, raw_ostream &O);
-
-  template <unsigned scale>
-  void printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
-                            const MCSubtargetInfo &STI, raw_ostream &O);
-  void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
-                              const MCSubtargetInfo &STI, raw_ostream &O);
-  void printThumbSRImm(const MCInst *MI, unsigned OpNum,
-                       const MCSubtargetInfo &STI, raw_ostream &O);
-  void printThumbITMask(const MCInst *MI, unsigned OpNum,
-                        const MCSubtargetInfo &STI, raw_ostream &O);
-  void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum,
-                                   const MCSubtargetInfo &STI, raw_ostream &O);
-  void printThumbAddrModeImm5SOperand(const MCInst *MI, unsigned OpNum,
-                                      const MCSubtargetInfo &STI,
-                                      raw_ostream &O, unsigned Scale);
-  void printThumbAddrModeImm5S1Operand(const MCInst *MI, unsigned OpNum,
-                                       const MCSubtargetInfo &STI,
-                                       raw_ostream &O);
-  void printThumbAddrModeImm5S2Operand(const MCInst *MI, unsigned OpNum,
-                                       const MCSubtargetInfo &STI,
-                                       raw_ostream &O);
-  void printThumbAddrModeImm5S4Operand(const MCInst *MI, unsigned OpNum,
-                                       const MCSubtargetInfo &STI,
-                                       raw_ostream &O);
-  void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum,
-                                   const MCSubtargetInfo &STI, raw_ostream &O);
-
-  void printT2SOOperand(const MCInst *MI, unsigned OpNum,
-                        const MCSubtargetInfo &STI, raw_ostream &O);
-  template <bool AlwaysPrintImm0>
-  void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
-                                 const MCSubtargetInfo &STI, raw_ostream &O);
-  template <bool AlwaysPrintImm0>
-  void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
-                                  const MCSubtargetInfo &STI, raw_ostream &O);
-  template <bool AlwaysPrintImm0>
-  void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum,
-                                    const MCSubtargetInfo &STI, raw_ostream &O);
-  void printT2AddrModeImm0_1020s4Operand(const MCInst *MI, unsigned OpNum,
-                                         const MCSubtargetInfo &STI,
-                                         raw_ostream &O);
-  void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O);
-  void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum,
-                                          const MCSubtargetInfo &STI,
-                                          raw_ostream &O);
-  void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum,
-                                   const MCSubtargetInfo &STI, raw_ostream &O);
-
-  void printSetendOperand(const MCInst *MI, unsigned OpNum,
-                          const MCSubtargetInfo &STI, raw_ostream &O);
-  void printCPSIMod(const MCInst *MI, unsigned OpNum,
-                    const MCSubtargetInfo &STI, raw_ostream &O);
-  void printCPSIFlag(const MCInst *MI, unsigned OpNum,
-                     const MCSubtargetInfo &STI, raw_ostream &O);
-  void printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
-                           const MCSubtargetInfo &STI, raw_ostream &O);
-  void printBankedRegOperand(const MCInst *MI, unsigned OpNum,
-                             const MCSubtargetInfo &STI, raw_ostream &O);
-  void printPredicateOperand(const MCInst *MI, unsigned OpNum,
-                             const MCSubtargetInfo &STI, raw_ostream &O);
-  void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
-                                      const MCSubtargetInfo &STI,
-                                      raw_ostream &O);
-  void printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
-                                const MCSubtargetInfo &STI, raw_ostream &O);
-  void printRegisterList(const MCInst *MI, unsigned OpNum,
-                         const MCSubtargetInfo &STI, raw_ostream &O);
-  void printNoHashImmediate(const MCInst *MI, unsigned OpNum,
-                            const MCSubtargetInfo &STI, raw_ostream &O);
-  void printPImmediate(const MCInst *MI, unsigned OpNum,
-                       const MCSubtargetInfo &STI, raw_ostream &O);
-  void printCImmediate(const MCInst *MI, unsigned OpNum,
-                       const MCSubtargetInfo &STI, raw_ostream &O);
-  void printCoprocOptionImm(const MCInst *MI, unsigned OpNum,
-                            const MCSubtargetInfo &STI, raw_ostream &O);
-  void printFPImmOperand(const MCInst *MI, unsigned OpNum,
-                         const MCSubtargetInfo &STI, raw_ostream &O);
-  void printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
-                              const MCSubtargetInfo &STI, raw_ostream &O);
-  void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
-                              const MCSubtargetInfo &STI, raw_ostream &O);
-  void printRotImmOperand(const MCInst *MI, unsigned OpNum,
-                          const MCSubtargetInfo &STI, raw_ostream &O);
-  void printModImmOperand(const MCInst *MI, unsigned OpNum,
-                          const MCSubtargetInfo &STI, raw_ostream &O);
-  void printGPRPairOperand(const MCInst *MI, unsigned OpNum,
-                           const MCSubtargetInfo &STI, raw_ostream &O);
-
-  void printPCLabel(const MCInst *MI, unsigned OpNum,
-                    const MCSubtargetInfo &STI, raw_ostream &O);
-  void printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
-                                 const MCSubtargetInfo &STI, raw_ostream &O);
-  void printFBits16(const MCInst *MI, unsigned OpNum,
-                    const MCSubtargetInfo &STI, raw_ostream &O);
-  void printFBits32(const MCInst *MI, unsigned OpNum,
-                    const MCSubtargetInfo &STI, raw_ostream &O);
-  void printVectorIndex(const MCInst *MI, unsigned OpNum,
-                        const MCSubtargetInfo &STI, raw_ostream &O);
-  void printVectorListOne(const MCInst *MI, unsigned OpNum,
-                          const MCSubtargetInfo &STI, raw_ostream &O);
-  void printVectorListTwo(const MCInst *MI, unsigned OpNum,
-                          const MCSubtargetInfo &STI, raw_ostream &O);
-  void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
-                                const MCSubtargetInfo &STI, raw_ostream &O);
-  void printVectorListThree(const MCInst *MI, unsigned OpNum,
-                            const MCSubtargetInfo &STI, raw_ostream &O);
-  void printVectorListFour(const MCInst *MI, unsigned OpNum,
-                           const MCSubtargetInfo &STI, raw_ostream &O);
-  void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,
-                                  const MCSubtargetInfo &STI, raw_ostream &O);
-  void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum,
-                                  const MCSubtargetInfo &STI, raw_ostream &O);
-  void printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum,
-                                    const MCSubtargetInfo &STI, raw_ostream &O);
-  void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum,
-                                   const MCSubtargetInfo &STI, raw_ostream &O);
-  void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum,
-                                        const MCSubtargetInfo &STI,
-                                        raw_ostream &O);
-  void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum,
-                                          const MCSubtargetInfo &STI,
-                                          raw_ostream &O);
-  void printVectorListFourSpacedAllLanes(const MCInst *MI, unsigned OpNum,
-                                         const MCSubtargetInfo &STI,
-                                         raw_ostream &O);
-  void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum,
-                                  const MCSubtargetInfo &STI, raw_ostream &O);
-  void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
-                                 const MCSubtargetInfo &STI, raw_ostream &O);
-  template<int64_t Angle, int64_t Remainder>
-  void printComplexRotationOp(const MCInst *MI, unsigned OpNum,
-                              const MCSubtargetInfo &STI, raw_ostream &O);
-};
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
diff --git a/lib/Target/ARM/LICENSE.TXT b/lib/Target/ARM/LICENSE.TXT
deleted file mode 100755
index 68afea12ed44..000000000000
--- a/lib/Target/ARM/LICENSE.TXT
+++ /dev/null
@@ -1,47 +0,0 @@
-ARM Limited
-
-Software Grant License Agreement ("Agreement")
-
-Except for the license granted herein to you, ARM Limited ("ARM") reserves all
-right, title, and interest in and to the Software (defined below).
-
-Definition
-
-"Software" means the code and documentation as well as any original work of
-authorship, including any modifications or additions to an existing work, that
-is intentionally submitted by ARM to llvm.org (http://llvm.org) ("LLVM") for
-inclusion in, or documentation of, any of the products owned or managed by LLVM
-(the "Work"). For the purposes of this definition, "submitted" means any form of
-electronic, verbal, or written communication sent to LLVM or its
-representatives, including but not limited to communication on electronic
-mailing lists, source code control systems, and issue tracking systems that are
-managed by, or on behalf of, LLVM for the purpose of discussing and improving
-the Work, but excluding communication that is conspicuously marked otherwise.
-
-1. Grant of Copyright License. Subject to the terms and conditions of this
-   Agreement, ARM hereby grants to you and to recipients of the Software
-   distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge,
-   royalty-free, irrevocable copyright license to reproduce, prepare derivative
-   works of, publicly display, publicly perform, sublicense, and distribute the
-   Software and such derivative works.
-
-2. Grant of Patent License. Subject to the terms and conditions of this
-   Agreement, ARM hereby grants you and to recipients of the Software
-   distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge,
-   royalty-free, irrevocable (except as stated in this section) patent license
-   to make, have made, use, offer to sell, sell, import, and otherwise transfer
-   the Work, where such license applies only to those patent claims licensable
-   by ARM that are necessarily infringed by ARM's Software alone or by
-   combination of the Software with the Work to which such Software was
-   submitted. If any entity institutes patent litigation against ARM or any
-   other entity (including a cross-claim or counterclaim in a lawsuit) alleging
-   that ARM's Software, or the Work to which ARM has contributed constitutes
-   direct or contributory patent infringement, then any patent licenses granted
-   to that entity under this Agreement for the Software or Work shall terminate
-   as of the date such litigation is filed.
-
-Unless required by applicable law or agreed to in writing, the software is
-provided on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
-either express or implied, including, without limitation, any warranties or
-conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-PARTICULAR PURPOSE.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index e1ea5964cf67..7732a6485a85 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -1,9 +1,8 @@
 //===-- ARMAddressingModes.h - ARM Addressing Modes -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -31,7 +30,8 @@ namespace ARM_AM {
     lsl,
     lsr,
     ror,
-    rrx
+    rrx,
+    uxtw
   };
 
   enum AddrOpc {
@@ -49,6 +49,7 @@ namespace ARM_AM {
     case ARM_AM::lsr: return "lsr";
     case ARM_AM::ror: return "ror";
     case ARM_AM::rrx: return "rrx";
+    case ARM_AM::uxtw: return "uxtw";
     }
   }
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index c2a07d4ddcef..aeab5be78ab4 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -1,9 +1,8 @@
 //===-- ARMAsmBackend.cpp - ARM Assembler Backend -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -30,6 +29,7 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCAsmLayout.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -47,6 +47,13 @@ public:
 };
 } // end anonymous namespace
 
+Optional<MCFixupKind> ARMAsmBackend::getFixupKind(StringRef Name) const {
+  if (STI.getTargetTriple().isOSBinFormatELF() && Name == "R_ARM_NONE")
+    return FK_NONE;
+
+  return MCAsmBackend::getFixupKind(Name);
+}
+
 const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
   const static MCFixupKindInfo InfosLE[ARM::NumTargetFixupKinds] = {
       // This table *must* be in the order that the fixup_* kinds are defined in
@@ -98,6 +105,13 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
       {"fixup_t2_movw_lo16", 0, 20, 0},
       {"fixup_arm_mod_imm", 0, 12, 0},
       {"fixup_t2_so_imm", 0, 26, 0},
+      {"fixup_bf_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_bf_target", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_bfl_target", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_bfc_target", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_bfcsel_else_target", 0, 32, 0},
+      {"fixup_wls", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_le", 0, 32, MCFixupKindInfo::FKF_IsPCRel}
   };
   const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = {
       // This table *must* be in the order that the fixup_* kinds are defined in
@@ -149,6 +163,13 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
       {"fixup_t2_movw_lo16", 12, 20, 0},
       {"fixup_arm_mod_imm", 20, 12, 0},
       {"fixup_t2_so_imm", 26, 6, 0},
+      {"fixup_bf_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_bf_target", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_bfl_target", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_bfc_target", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_bfcsel_else_target", 0, 32, 0},
+      {"fixup_wls", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_le", 0, 32, MCFixupKindInfo::FKF_IsPCRel}
   };
 
   if (Kind < FirstTargetFixupKind)
@@ -203,6 +224,13 @@ bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst,
   return false;
 }
 
+static const char *checkPCRelOffset(uint64_t Value, int64_t Min, int64_t Max) {
+  int64_t Offset = int64_t(Value) - 4;
+  if (Offset < Min || Offset > Max)
+    return "out of range pc-relative fixup value";
+  return nullptr;
+}
+
 const char *ARMAsmBackend::reasonForFixupRelaxation(const MCFixup &Fixup,
                                                     uint64_t Value) const {
   switch ((unsigned)Fixup.getKind()) {
@@ -250,6 +278,32 @@ const char *ARMAsmBackend::reasonForFixupRelaxation(const MCFixup &Fixup,
       return "will be converted to nop";
     break;
   }
+  case ARM::fixup_bf_branch:
+    return checkPCRelOffset(Value, 0, 30);
+  case ARM::fixup_bf_target:
+    return checkPCRelOffset(Value, -0x10000, +0xfffe);
+  case ARM::fixup_bfl_target:
+    return checkPCRelOffset(Value, -0x40000, +0x3fffe);
+  case ARM::fixup_bfc_target:
+    return checkPCRelOffset(Value, -0x1000, +0xffe);
+  case ARM::fixup_wls:
+    return checkPCRelOffset(Value, 0, +0xffe);
+  case ARM::fixup_le:
+    // The offset field in the LE and LETP instructions is an 11-bit
+    // value shifted left by 2 (i.e. 0,2,4,...,4094), and it is
+    // interpreted as a negative offset from the value read from pc,
+    // i.e. from instruction_address+4.
+    //
+    // So an LE instruction can in principle address the instruction
+    // immediately after itself, or (not very usefully) the address
+    // half way through the 4-byte LE.
+    return checkPCRelOffset(Value, -0xffe, 0);
+  case ARM::fixup_bfcsel_else_target: {
+    if (Value != 2 && Value != 4)
+      return "out of range label-relative fixup value";
+    break;
+  }
+
   default:
     llvm_unreachable("Unexpected fixup kind in reasonForFixupRelaxation()!");
   }
@@ -384,6 +438,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
   default:
     Ctx.reportError(Fixup.getLoc(), "bad relocation fixup type");
     return 0;
+  case FK_NONE:
   case FK_Data_1:
   case FK_Data_2:
   case FK_Data_4:
@@ -753,6 +808,60 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
     EncValue |= (Value & 0xff);
     return swapHalfWords(EncValue, Endian == support::little);
   }
+  case ARM::fixup_bf_branch: {
+    const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+    if (FixupDiagnostic) {
+      Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
+      return 0;
+    }
+    uint32_t out = (((Value - 4) >> 1) & 0xf) << 23;
+    return swapHalfWords(out, Endian == support::little);
+  }
+  case ARM::fixup_bf_target:
+  case ARM::fixup_bfl_target:
+  case ARM::fixup_bfc_target: {
+    const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+    if (FixupDiagnostic) {
+      Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
+      return 0;
+    }
+    uint32_t out = 0;
+    uint32_t HighBitMask = (Kind == ARM::fixup_bf_target ? 0xf800 :
+                            Kind == ARM::fixup_bfl_target ? 0x3f800 : 0x800);
+    out |= (((Value - 4) >> 1) & 0x1) << 11;
+    out |= (((Value - 4) >> 1) & 0x7fe);
+    out |= (((Value - 4) >> 1) & HighBitMask) << 5;
+    return swapHalfWords(out, Endian == support::little);
+  }
+  case ARM::fixup_bfcsel_else_target: {
+    // If this is a fixup of a branch future's else target then it should be a
+    // constant MCExpr representing the distance between the branch targetted
+    // and the instruction after that same branch.
+    Value = Target.getConstant();
+
+    const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+    if (FixupDiagnostic) {
+      Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
+      return 0;
+    }
+    uint32_t out = ((Value >> 2) & 1) << 17;
+    return swapHalfWords(out, Endian == support::little);
+  }
+  case ARM::fixup_wls:
+  case ARM::fixup_le: {
+    const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+    if (FixupDiagnostic) {
+      Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
+      return 0;
+    }
+    uint64_t real_value = Value - 4;
+    uint32_t out = 0;
+    if (Kind == ARM::fixup_le)
+      real_value = -real_value;
+    out |= ((real_value >> 1) & 0x1) << 11;
+    out |= ((real_value >> 1) & 0x7fe);
+    return swapHalfWords(out, Endian == support::little);
+  }
   }
 }
 
@@ -762,7 +871,9 @@ bool ARMAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
   const MCSymbolRefExpr *A = Target.getSymA();
   const MCSymbol *Sym = A ? &A->getSymbol() : nullptr;
   const unsigned FixupKind = Fixup.getKind() ;
-  if ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
+  if (FixupKind == FK_NONE)
+    return true;
+  if (FixupKind == ARM::fixup_arm_thumb_bl) {
     assert(Sym && "How did we resolve this?");
 
     // If the symbol is external the linker will handle it.
@@ -804,6 +915,9 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
   default:
     llvm_unreachable("Unknown fixup kind!");
 
+  case FK_NONE:
+    return 0;
+
   case FK_Data_1:
   case ARM::fixup_arm_thumb_bcc:
   case ARM::fixup_arm_thumb_cp:
@@ -842,6 +956,13 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
   case ARM::fixup_t2_movt_hi16:
   case ARM::fixup_t2_movw_lo16:
   case ARM::fixup_t2_so_imm:
+  case ARM::fixup_bf_branch:
+  case ARM::fixup_bf_target:
+  case ARM::fixup_bfl_target:
+  case ARM::fixup_bfc_target:
+  case ARM::fixup_bfcsel_else_target:
+  case ARM::fixup_wls:
+  case ARM::fixup_le:
     return 4;
 
   case FK_SecRel_2:
@@ -858,6 +979,9 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
   default:
     llvm_unreachable("Unknown fixup kind!");
 
+  case FK_NONE:
+    return 0;
+
   case FK_Data_1:
     return 1;
   case FK_Data_2:
@@ -876,6 +1000,7 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
   case ARM::fixup_arm_pcrel_10_unscaled:
   case ARM::fixup_arm_ldst_pcrel_12:
   case ARM::fixup_arm_pcrel_10:
+  case ARM::fixup_arm_pcrel_9:
   case ARM::fixup_arm_adr_pcrel_12:
   case ARM::fixup_arm_uncondbl:
   case ARM::fixup_arm_condbl:
@@ -895,6 +1020,13 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
   case ARM::fixup_t2_movw_lo16:
   case ARM::fixup_arm_mod_imm:
   case ARM::fixup_t2_so_imm:
+  case ARM::fixup_bf_branch:
+  case ARM::fixup_bf_target:
+  case ARM::fixup_bfl_target:
+  case ARM::fixup_bfc_target:
+  case ARM::fixup_bfcsel_else_target:
+  case ARM::fixup_wls:
+  case ARM::fixup_le:
     // Instruction size is 4 bytes.
     return 4;
   }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 88c476bf65f4..67722a5e5b64 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -1,9 +1,8 @@
 //===-- ARMAsmBackend.h - ARM Assembler Backend -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -38,6 +37,8 @@ public:
   // different.
   bool hasNOP() const { return STI.getFeatureBits()[ARM::HasV6T2Ops]; }
 
+  Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
+
   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
 
   bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index de1bfaf203e4..87e56940f46d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -1,9 +1,8 @@
 //===-- ARMAsmBackendDarwin.h   ARM Asm Backend Darwin ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
index 86a583b19cf7..5d735114d441 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
@@ -1,9 +1,8 @@
 //===-- ARMAsmBackendELF.h  ARM Asm Backend ELF -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
index 553922d20f43..8cd7a4a00ead 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
@@ -1,9 +1,8 @@
 //===-- ARMAsmBackendWinCOFF.h - ARM Asm Backend WinCOFF --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 33c32d5464af..c4daafe8ee97 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -1,9 +1,8 @@
 //===-- ARMBaseInfo.h - Top level definitions for ARM -------- --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -203,6 +202,9 @@ namespace ARMII {
     AddrMode_i12    = 16,
     AddrMode5FP16   = 17,  // i8 * 2
     AddrModeT2_ldrex = 18, // i8 * 4, with unscaled offset in MCInst
+    AddrModeT2_i7s4 = 19, // i7 * 4
+    AddrModeT2_i7s2 = 20, // i7 * 2
+    AddrModeT2_i7   = 21, // i7 * 1
   };
 
   inline static const char *AddrModeToString(AddrMode addrmode) {
@@ -226,6 +228,9 @@ namespace ARMII {
     case AddrModeT2_i8s4: return "AddrModeT2_i8s4";
     case AddrMode_i12:    return "AddrMode_i12";
     case AddrModeT2_ldrex:return "AddrModeT2_ldrex";
+    case AddrModeT2_i7s4: return "AddrModeT2_i7s4";
+    case AddrModeT2_i7s2: return "AddrModeT2_i7s2";
+    case AddrModeT2_i7:   return "AddrModeT2_i7";
     }
   }
 
@@ -386,16 +391,17 @@ namespace ARMII {
     // instruction. Used by the parser to determine whether to require the 'S'
     // suffix on the mnemonic (when not in an IT block) or preclude it (when
     // in an IT block).
-    ThumbArithFlagSetting = 1 << 18,
+    ThumbArithFlagSetting = 1 << 19,
 
     //===------------------------------------------------------------------===//
     // Code domain.
     DomainShift   = 15,
-    DomainMask    = 7 << DomainShift,
+    DomainMask    = 15 << DomainShift,
     DomainGeneral = 0 << DomainShift,
     DomainVFP     = 1 << DomainShift,
     DomainNEON    = 2 << DomainShift,
     DomainNEONA8  = 4 << DomainShift,
+    DomainMVE     = 8 << DomainShift,
 
     //===------------------------------------------------------------------===//
     // Field shifts - such shifts are used to set field while generating
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index b8ba7584911b..fda19eea1de6 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===-- ARMELFObjectWriter.cpp - ARM ELF Writer ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -138,12 +137,20 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
       default:
         return ELF::R_ARM_THM_CALL;
       }
+    case ARM::fixup_bf_target:
+      return ELF::R_ARM_THM_BF16;
+    case ARM::fixup_bfc_target:
+      return ELF::R_ARM_THM_BF12;
+    case ARM::fixup_bfl_target:
+      return ELF::R_ARM_THM_BF18;
     }
   }
   switch ((unsigned)Fixup.getKind()) {
   default:
     Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol");
     return ELF::R_ARM_NONE;
+  case FK_NONE:
+    return ELF::R_ARM_NONE;
   case FK_Data_1:
     switch (Modifier) {
     default:
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index d3744fffac32..f51fbdcd84da 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -1,9 +1,8 @@
 //===- lib/MC/ARMELFStreamer.cpp - ELF Object Output for ARM --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -485,8 +484,8 @@ public:
   /// This function is the one used to emit instruction data into the ELF
   /// streamer. We override it to add the appropriate mapping symbol if
   /// necessary.
-  void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
-                       bool) override {
+  void EmitInstruction(const MCInst &Inst,
+                       const MCSubtargetInfo &STI) override {
     if (IsThumb)
       EmitThumbMappingSymbol();
     else
diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index 831589ba0581..bdf04a208b24 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -1,9 +1,8 @@
 //===-- ARMFixupKinds.h - ARM Specific Fixup Entries ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -104,6 +103,15 @@ enum Fixups {
   // Fixup for Thumb2 8-bit rotated operand
   fixup_t2_so_imm,
 
+  // Fixups for Branch Future.
+  fixup_bf_branch,
+  fixup_bf_target,
+  fixup_bfl_target,
+  fixup_bfc_target,
+  fixup_bfcsel_else_target,
+  fixup_wls,
+  fixup_le,
+
   // Marker
   LastTargetFixupKind,
   NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp b/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
new file mode 100644
index 000000000000..45be1ee96342
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
@@ -0,0 +1,1678 @@
+//===-- ARMInstPrinter.cpp - Convert ARM MCInst to assembly syntax --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARM MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMInstPrinter.h"
+#include "Utils/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+#define PRINT_ALIAS_INSTR
+#include "ARMGenAsmWriter.inc"
+
+/// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing.
+///
+/// getSORegOffset returns an integer from 0-31, representing '32' as 0.
+static unsigned translateShiftImm(unsigned imm) {
+  // lsr #32 and asr #32 exist, but should be encoded as a 0.
+  assert((imm & ~0x1f) == 0 && "Invalid shift encoding");
+
+  if (imm == 0)
+    return 32;
+  return imm;
+}
+
+/// Prints the shift value with an immediate value.
+static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc,
+                             unsigned ShImm, bool UseMarkup) {
+  if (ShOpc == ARM_AM::no_shift || (ShOpc == ARM_AM::lsl && !ShImm))
+    return;
+  O << ", ";
+
+  assert(!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0");
+  O << getShiftOpcStr(ShOpc);
+
+  if (ShOpc != ARM_AM::rrx) {
+    O << " ";
+    if (UseMarkup)
+      O << "<imm:";
+    O << "#" << translateShiftImm(ShImm);
+    if (UseMarkup)
+      O << ">";
+  }
+}
+
+ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                               const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
+
+bool ARMInstPrinter::applyTargetSpecificCLOption(StringRef Opt) {
+  if (Opt == "reg-names-std") {
+    DefaultAltIdx = ARM::NoRegAltName;
+    return true;
+  }
+  if (Opt == "reg-names-raw") {
+    DefaultAltIdx = ARM::RegNamesRaw;
+    return true;
+  }
+  return false;
+}
+
+void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << markup("<reg:") << getRegisterName(RegNo, DefaultAltIdx) << markup(">");
+}
+
+void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                               StringRef Annot, const MCSubtargetInfo &STI) {
+  unsigned Opcode = MI->getOpcode();
+
+  switch (Opcode) {
+  // Check for MOVs and print canonical forms, instead.
+  case ARM::MOVsr: {
+    // FIXME: Thumb variants?
+    const MCOperand &Dst = MI->getOperand(0);
+    const MCOperand &MO1 = MI->getOperand(1);
+    const MCOperand &MO2 = MI->getOperand(2);
+    const MCOperand &MO3 = MI->getOperand(3);
+
+    O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()));
+    printSBitModifierOperand(MI, 6, STI, O);
+    printPredicateOperand(MI, 4, STI, O);
+
+    O << '\t';
+    printRegName(O, Dst.getReg());
+    O << ", ";
+    printRegName(O, MO1.getReg());
+
+    O << ", ";
+    printRegName(O, MO2.getReg());
+    assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+    printAnnotation(O, Annot);
+    return;
+  }
+
+  case ARM::MOVsi: {
+    // FIXME: Thumb variants?
+    const MCOperand &Dst = MI->getOperand(0);
+    const MCOperand &MO1 = MI->getOperand(1);
+    const MCOperand &MO2 = MI->getOperand(2);
+
+    O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()));
+    printSBitModifierOperand(MI, 5, STI, O);
+    printPredicateOperand(MI, 3, STI, O);
+
+    O << '\t';
+    printRegName(O, Dst.getReg());
+    O << ", ";
+    printRegName(O, MO1.getReg());
+
+    if (ARM_AM::getSORegShOp(MO2.getImm()) == ARM_AM::rrx) {
+      printAnnotation(O, Annot);
+      return;
+    }
+
+    O << ", " << markup("<imm:") << "#"
+      << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())) << markup(">");
+    printAnnotation(O, Annot);
+    return;
+  }
+
+  // A8.6.123 PUSH
+  case ARM::STMDB_UPD:
+  case ARM::t2STMDB_UPD:
+    if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) {
+      // Should only print PUSH if there are at least two registers in the list.
+      O << '\t' << "push";
+      printPredicateOperand(MI, 2, STI, O);
+      if (Opcode == ARM::t2STMDB_UPD)
+        O << ".w";
+      O << '\t';
+      printRegisterList(MI, 4, STI, O);
+      printAnnotation(O, Annot);
+      return;
+    } else
+      break;
+
+  case ARM::STR_PRE_IMM:
+    if (MI->getOperand(2).getReg() == ARM::SP &&
+        MI->getOperand(3).getImm() == -4) {
+      O << '\t' << "push";
+      printPredicateOperand(MI, 4, STI, O);
+      O << "\t{";
+      printRegName(O, MI->getOperand(1).getReg());
+      O << "}";
+      printAnnotation(O, Annot);
+      return;
+    } else
+      break;
+
+  // A8.6.122 POP
+  case ARM::LDMIA_UPD:
+  case ARM::t2LDMIA_UPD:
+    if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) {
+      // Should only print POP if there are at least two registers in the list.
+      O << '\t' << "pop";
+      printPredicateOperand(MI, 2, STI, O);
+      if (Opcode == ARM::t2LDMIA_UPD)
+        O << ".w";
+      O << '\t';
+      printRegisterList(MI, 4, STI, O);
+      printAnnotation(O, Annot);
+      return;
+    } else
+      break;
+
+  case ARM::LDR_POST_IMM:
+    if (MI->getOperand(2).getReg() == ARM::SP &&
+        MI->getOperand(4).getImm() == 4) {
+      O << '\t' << "pop";
+      printPredicateOperand(MI, 5, STI, O);
+      O << "\t{";
+      printRegName(O, MI->getOperand(0).getReg());
+      O << "}";
+      printAnnotation(O, Annot);
+      return;
+    } else
+      break;
+
+  // A8.6.355 VPUSH
+  case ARM::VSTMSDB_UPD:
+  case ARM::VSTMDDB_UPD:
+    if (MI->getOperand(0).getReg() == ARM::SP) {
+      O << '\t' << "vpush";
+      printPredicateOperand(MI, 2, STI, O);
+      O << '\t';
+      printRegisterList(MI, 4, STI, O);
+      printAnnotation(O, Annot);
+      return;
+    } else
+      break;
+
+  // A8.6.354 VPOP
+  case ARM::VLDMSIA_UPD:
+  case ARM::VLDMDIA_UPD:
+    if (MI->getOperand(0).getReg() == ARM::SP) {
+      O << '\t' << "vpop";
+      printPredicateOperand(MI, 2, STI, O);
+      O << '\t';
+      printRegisterList(MI, 4, STI, O);
+      printAnnotation(O, Annot);
+      return;
+    } else
+      break;
+
+  case ARM::tLDMIA: {
+    bool Writeback = true;
+    unsigned BaseReg = MI->getOperand(0).getReg();
+    for (unsigned i = 3; i < MI->getNumOperands(); ++i) {
+      if (MI->getOperand(i).getReg() == BaseReg)
+        Writeback = false;
+    }
+
+    O << "\tldm";
+
+    printPredicateOperand(MI, 1, STI, O);
+    O << '\t';
+    printRegName(O, BaseReg);
+    if (Writeback)
+      O << "!";
+    O << ", ";
+    printRegisterList(MI, 3, STI, O);
+    printAnnotation(O, Annot);
+    return;
+  }
+
+  // Combine 2 GPRs from disassember into a GPRPair to match with instr def.
+  // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
+  // a single GPRPair reg operand is used in the .td file to replace the two
+  // GPRs. However, when decoding them, the two GRPs cannot be automatically
+  // expressed as a GPRPair, so we have to manually merge them.
+  // FIXME: We would really like to be able to tablegen'erate this.
+  case ARM::LDREXD:
+  case ARM::STREXD:
+  case ARM::LDAEXD:
+  case ARM::STLEXD: {
+    const MCRegisterClass &MRC = MRI.getRegClass(ARM::GPRRegClassID);
+    bool isStore = Opcode == ARM::STREXD || Opcode == ARM::STLEXD;
+    unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg();
+    if (MRC.contains(Reg)) {
+      MCInst NewMI;
+      MCOperand NewReg;
+      NewMI.setOpcode(Opcode);
+
+      if (isStore)
+        NewMI.addOperand(MI->getOperand(0));
+      NewReg = MCOperand::createReg(MRI.getMatchingSuperReg(
+          Reg, ARM::gsub_0, &MRI.getRegClass(ARM::GPRPairRegClassID)));
+      NewMI.addOperand(NewReg);
+
+      // Copy the rest operands into NewMI.
+      for (unsigned i = isStore ? 3 : 2; i < MI->getNumOperands(); ++i)
+        NewMI.addOperand(MI->getOperand(i));
+      printInstruction(&NewMI, STI, O);
+      return;
+    }
+    break;
+  }
+  case ARM::TSB:
+  case ARM::t2TSB:
+    O << "\ttsb\tcsync";
+    return;
+  case ARM::t2DSB:
+    switch (MI->getOperand(0).getImm()) {
+    default:
+      if (!printAliasInstr(MI, STI, O))
+        printInstruction(MI, STI, O);
+      break;
+    case 0:
+      O << "\tssbb";
+      break;
+    case 4:
+      O << "\tpssbb";
+      break;
+    }
+    printAnnotation(O, Annot);
+    return;
+  }
+
+  if (!printAliasInstr(MI, STI, O))
+    printInstruction(MI, STI, O);
+
+  printAnnotation(O, Annot);
+}
+
+void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    unsigned Reg = Op.getReg();
+    printRegName(O, Reg);
+  } else if (Op.isImm()) {
+    O << markup("<imm:") << '#' << formatImm(Op.getImm()) << markup(">");
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    const MCExpr *Expr = Op.getExpr();
+    switch (Expr->getKind()) {
+    case MCExpr::Binary:
+      O << '#';
+      Expr->print(O, &MAI);
+      break;
+    case MCExpr::Constant: {
+      // If a symbolic branch target was added as a constant expression then
+      // print that address in hex. And only print 32 unsigned bits for the
+      // address.
+      const MCConstantExpr *Constant = cast<MCConstantExpr>(Expr);
+      int64_t TargetAddress;
+      if (!Constant->evaluateAsAbsolute(TargetAddress)) {
+        O << '#';
+        Expr->print(O, &MAI);
+      } else {
+        O << "0x";
+        O.write_hex(static_cast<uint32_t>(TargetAddress));
+      }
+      break;
+    }
+    default:
+      // FIXME: Should we always treat this as if it is a constant literal and
+      // prefix it with '#'?
+      Expr->print(O, &MAI);
+      break;
+    }
+  }
+}
+
+void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
+                                               raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  if (MO1.isExpr()) {
+    MO1.getExpr()->print(O, &MAI);
+    return;
+  }
+
+  O << markup("<mem:") << "[pc, ";
+
+  int32_t OffImm = (int32_t)MO1.getImm();
+  bool isSub = OffImm < 0;
+
+  // Special value for #-0. All others are normal.
+  if (OffImm == INT32_MIN)
+    OffImm = 0;
+  if (isSub) {
+    O << markup("<imm:") << "#-" << formatImm(-OffImm) << markup(">");
+  } else {
+    O << markup("<imm:") << "#" << formatImm(OffImm) << markup(">");
+  }
+  O << "]" << markup(">");
+}
+
+// so_reg is a 4-operand unit corresponding to register forms of the A5.1
+// "Addressing Mode 1 - Data-processing operands" forms.  This includes:
+//    REG 0   0           - e.g. R5
+//    REG REG 0,SH_OPC    - e.g. R5, ROR R3
+//    REG 0   IMM,SH_OPC  - e.g. R5, LSL #3
+void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+  const MCOperand &MO3 = MI->getOperand(OpNum + 2);
+
+  printRegName(O, MO1.getReg());
+
+  // Print the shift opc.
+  ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
+  O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+  if (ShOpc == ARM_AM::rrx)
+    return;
+
+  O << ' ';
+  printRegName(O, MO2.getReg());
+  assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+}
+
+void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+  printRegName(O, MO1.getReg());
+
+  // Print the shift opc.
+  printRegImmShift(O, ARM_AM::getSORegShOp(MO2.getImm()),
+                   ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
+}
+
+//===--------------------------------------------------------------------===//
+// Addressing Mode #2
+//===--------------------------------------------------------------------===//
+
+void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
+                                                const MCSubtargetInfo &STI,
+                                                raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
+  const MCOperand &MO3 = MI->getOperand(Op + 2);
+
+  O << markup("<mem:") << "[";
+  printRegName(O, MO1.getReg());
+
+  if (!MO2.getReg()) {
+    if (ARM_AM::getAM2Offset(MO3.getImm())) { // Don't print +0.
+      O << ", " << markup("<imm:") << "#"
+        << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+        << ARM_AM::getAM2Offset(MO3.getImm()) << markup(">");
+    }
+    O << "]" << markup(">");
+    return;
+  }
+
+  O << ", ";
+  O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()));
+  printRegName(O, MO2.getReg());
+
+  printRegImmShift(O, ARM_AM::getAM2ShiftOpc(MO3.getImm()),
+                   ARM_AM::getAM2Offset(MO3.getImm()), UseMarkup);
+  O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
+  O << markup("<mem:") << "[";
+  printRegName(O, MO1.getReg());
+  O << ", ";
+  printRegName(O, MO2.getReg());
+  O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
+  O << markup("<mem:") << "[";
+  printRegName(O, MO1.getReg());
+  O << ", ";
+  printRegName(O, MO2.getReg());
+  O << ", lsl " << markup("<imm:") << "#1" << markup(">") << "]" << markup(">");
+}
+
+void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
+                                           const MCSubtargetInfo &STI,
+                                           raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+
+  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, STI, O);
+    return;
+  }
+
+#ifndef NDEBUG
+  const MCOperand &MO3 = MI->getOperand(Op + 2);
+  unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm());
+  assert(IdxMode != ARMII::IndexModePost && "Should be pre or offset index op");
+#endif
+
+  printAM2PreOrOffsetIndexOp(MI, Op, STI, O);
+}
+
+void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
+                                                 unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
+                                                 raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+  if (!MO1.getReg()) {
+    unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
+    O << markup("<imm:") << '#'
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << ImmOffs
+      << markup(">");
+    return;
+  }
+
+  O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()));
+  printRegName(O, MO1.getReg());
+
+  printRegImmShift(O, ARM_AM::getAM2ShiftOpc(MO2.getImm()),
+                   ARM_AM::getAM2Offset(MO2.getImm()), UseMarkup);
+}
+
+//===--------------------------------------------------------------------===//
+// Addressing Mode #3
+//===--------------------------------------------------------------------===//
+
+void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
+                                                raw_ostream &O,
+                                                bool AlwaysPrintImm0) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
+  const MCOperand &MO3 = MI->getOperand(Op + 2);
+
+  O << markup("<mem:") << '[';
+  printRegName(O, MO1.getReg());
+
+  if (MO2.getReg()) {
+    O << ", " << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()));
+    printRegName(O, MO2.getReg());
+    O << ']' << markup(">");
+    return;
+  }
+
+  // If the op is sub we have to print the immediate even if it is 0
+  unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
+  ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm());
+
+  if (AlwaysPrintImm0 || ImmOffs || (op == ARM_AM::sub)) {
+    O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(op) << ImmOffs
+      << markup(">");
+  }
+  O << ']' << markup(">");
+}
+
+template <bool AlwaysPrintImm0>
+void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
+                                           const MCSubtargetInfo &STI,
+                                           raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  if (!MO1.isReg()) { //  For label symbolic references.
+    printOperand(MI, Op, STI, O);
+    return;
+  }
+
+  assert(ARM_AM::getAM3IdxMode(MI->getOperand(Op + 2).getImm()) !=
+             ARMII::IndexModePost &&
+         "unexpected idxmode");
+  printAM3PreOrOffsetIndexOp(MI, Op, O, AlwaysPrintImm0);
+}
+
+void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
+                                                 unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
+                                                 raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+  if (MO1.getReg()) {
+    O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()));
+    printRegName(O, MO1.getReg());
+    return;
+  }
+
+  unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
+  O << markup("<imm:") << '#'
+    << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs
+    << markup(">");
+}
+
+void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
+                                             const MCSubtargetInfo &STI,
+                                             raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  unsigned Imm = MO.getImm();
+  O << markup("<imm:") << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff)
+    << markup(">");
+}
+
+void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
+                                            const MCSubtargetInfo &STI,
+                                            raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+  O << (MO2.getImm() ? "" : "-");
+  printRegName(O, MO1.getReg());
+}
+
+void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
+                                               raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  unsigned Imm = MO.getImm();
+  O << markup("<imm:") << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2)
+    << markup(">");
+}
+
+template<int shift>
+void ARMInstPrinter::printMveAddrModeRQOperand(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
+                                               raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+  O << markup("<mem:") << "[";
+  printRegName(O, MO1.getReg());
+  O << ", ";
+  printRegName(O, MO2.getReg());
+
+  if (shift > 0)
+    printRegImmShift(O, ARM_AM::uxtw, shift, UseMarkup);
+
+  O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printMveAddrModeQOperand(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
+                                               raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+  O << markup("<mem:") << "[";
+  printRegName(O, MO1.getReg());
+
+  int64_t Imm = MO2.getImm();
+  if (Imm != 0)
+    O << ", " << markup("<imm:") << '#' << Imm << markup(">");
+
+  O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
+                                           raw_ostream &O) {
+  ARM_AM::AMSubMode Mode =
+      ARM_AM::getAM4SubMode(MI->getOperand(OpNum).getImm());
+  O << ARM_AM::getAMSubModeStr(Mode);
+}
+
+template <bool AlwaysPrintImm0>
+void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
+                                           raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, OpNum, STI, O);
+    return;
+  }
+
+  O << markup("<mem:") << "[";
+  printRegName(O, MO1.getReg());
+
+  unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
+  ARM_AM::AddrOpc Op = ARM_AM::getAM5Op(MO2.getImm());
+  if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
+    O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(Op)
+      << ImmOffs * 4 << markup(">");
+  }
+  O << "]" << markup(">");
+}
+
+template <bool AlwaysPrintImm0>
+void ARMInstPrinter::printAddrMode5FP16Operand(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
+                                               raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, OpNum, STI, O);
+    return;
+  }
+
+  O << markup("<mem:") << "[";
+  printRegName(O, MO1.getReg());
+
+  unsigned ImmOffs = ARM_AM::getAM5FP16Offset(MO2.getImm());
+  unsigned Op = ARM_AM::getAM5FP16Op(MO2.getImm());
+  if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
+    O << ", "
+      << markup("<imm:")
+      << "#"
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM5FP16Op(MO2.getImm()))
+      << ImmOffs * 2
+      << markup(">");
+  }
+  O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
+                                           raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+  O << markup("<mem:") << "[";
+  printRegName(O, MO1.getReg());
+  if (MO2.getImm()) {
+    O << ":" << (MO2.getImm() << 3);
+  }
+  O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
+                                           raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  O << markup("<mem:") << "[";
+  printRegName(O, MO1.getReg());
+  O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
+                                                 unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
+                                                 raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  if (MO.getReg() == 0)
+    O << "!";
+  else {
+    O << ", ";
+    printRegName(O, MO.getReg());
+  }
+}
+
+void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
+                                                    unsigned OpNum,
+                                                    const MCSubtargetInfo &STI,
+                                                    raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  uint32_t v = ~MO.getImm();
+  int32_t lsb = countTrailingZeros(v);
+  int32_t width = (32 - countLeadingZeros(v)) - lsb;
+  assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
+  O << markup("<imm:") << '#' << lsb << markup(">") << ", " << markup("<imm:")
+    << '#' << width << markup(">");
+}
+
+void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  unsigned val = MI->getOperand(OpNum).getImm();
+  O << ARM_MB::MemBOptToString(val, STI.getFeatureBits()[ARM::HasV8Ops]);
+}
+
+void ARMInstPrinter::printInstSyncBOption(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
+  unsigned val = MI->getOperand(OpNum).getImm();
+  O << ARM_ISB::InstSyncBOptToString(val);
+}
+
+void ARMInstPrinter::printTraceSyncBOption(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
+  unsigned val = MI->getOperand(OpNum).getImm();
+  O << ARM_TSB::TraceSyncBOptToString(val);
+}
+
+void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
+  unsigned ShiftOp = MI->getOperand(OpNum).getImm();
+  bool isASR = (ShiftOp & (1 << 5)) != 0;
+  unsigned Amt = ShiftOp & 0x1f;
+  if (isASR) {
+    O << ", asr " << markup("<imm:") << "#" << (Amt == 0 ? 32 : Amt)
+      << markup(">");
+  } else if (Amt) {
+    O << ", lsl " << markup("<imm:") << "#" << Amt << markup(">");
+  }
+}
+
+void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
+  unsigned Imm = MI->getOperand(OpNum).getImm();
+  if (Imm == 0)
+    return;
+  assert(Imm > 0 && Imm < 32 && "Invalid PKH shift immediate value!");
+  O << ", lsl " << markup("<imm:") << "#" << Imm << markup(">");
+}
+
+void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
+  unsigned Imm = MI->getOperand(OpNum).getImm();
+  // A shift amount of 32 is encoded as 0.
+  if (Imm == 0)
+    Imm = 32;
+  assert(Imm > 0 && Imm <= 32 && "Invalid PKH shift immediate value!");
+  O << ", asr " << markup("<imm:") << "#" << Imm << markup(">");
+}
+
+void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O) {
+  if (MI->getOpcode() != ARM::t2CLRM) {
+    assert(std::is_sorted(MI->begin() + OpNum, MI->end(),
+                          [&](const MCOperand &LHS, const MCOperand &RHS) {
+                            return MRI.getEncodingValue(LHS.getReg()) <
+                                   MRI.getEncodingValue(RHS.getReg());
+                          }));
+  }
+
+  O << "{";
+  for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
+    if (i != OpNum)
+      O << ", ";
+    printRegName(O, MI->getOperand(i).getReg());
+  }
+  O << "}";
+}
+
+void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+  printRegName(O, MRI.getSubReg(Reg, ARM::gsub_0));
+  O << ", ";
+  printRegName(O, MRI.getSubReg(Reg, ARM::gsub_1));
+}
+
+void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  if (Op.getImm())
+    O << "be";
+  else
+    O << "le";
+}
+
+void ARMInstPrinter::printCPSIMod(const MCInst *MI, unsigned OpNum,
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  O << ARM_PROC::IModToString(Op.getImm());
+}
+
+void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
+                                   const MCSubtargetInfo &STI, raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  unsigned IFlags = Op.getImm();
+  for (int i = 2; i >= 0; --i)
+    if (IFlags & (1 << i))
+      O << ARM_PROC::IFlagsToString(1 << i);
+
+  if (IFlags == 0)
+    O << "none";
+}
+
+void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  const FeatureBitset &FeatureBits = STI.getFeatureBits();
+  if (FeatureBits[ARM::FeatureMClass]) {
+
+    unsigned SYSm = Op.getImm() & 0xFFF; // 12-bit SYSm
+    unsigned Opcode = MI->getOpcode();
+
+    // For writes, handle extended mask bits if the DSP extension is present.
+    if (Opcode == ARM::t2MSR_M && FeatureBits[ARM::FeatureDSP]) {
+      auto TheReg =ARMSysReg::lookupMClassSysRegBy12bitSYSmValue(SYSm);
+      if (TheReg && TheReg->isInRequiredFeatures({ARM::FeatureDSP})) {
+          O << TheReg->Name;
+          return;
+      }
+    }
+
+    // Handle the basic 8-bit mask.
+    SYSm &= 0xff;
+    if (Opcode == ARM::t2MSR_M && FeatureBits [ARM::HasV7Ops]) {
+      // ARMv7-M deprecates using MSR APSR without a _<bits> qualifier as an
+      // alias for MSR APSR_nzcvq.
+      auto TheReg = ARMSysReg::lookupMClassSysRegAPSRNonDeprecated(SYSm);
+      if (TheReg) {
+          O << TheReg->Name;
+          return;
+      }
+    }
+
+    auto TheReg = ARMSysReg::lookupMClassSysRegBy8bitSYSmValue(SYSm);
+    if (TheReg) {
+      O << TheReg->Name;
+      return;
+    }
+
+    O << SYSm;
+
+    return;
+  }
+
+  // As special cases, CPSR_f, CPSR_s and CPSR_fs prefer printing as
+  // APSR_nzcvq, APSR_g and APSRnzcvqg, respectively.
+  unsigned SpecRegRBit = Op.getImm() >> 4;
+  unsigned Mask = Op.getImm() & 0xf;
+
+  if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) {
+    O << "APSR_";
+    switch (Mask) {
+    default:
+      llvm_unreachable("Unexpected mask value!");
+    case 4:
+      O << "g";
+      return;
+    case 8:
+      O << "nzcvq";
+      return;
+    case 12:
+      O << "nzcvqg";
+      return;
+    }
+  }
+
+  if (SpecRegRBit)
+    O << "SPSR";
+  else
+    O << "CPSR";
+
+  if (Mask) {
+    O << '_';
+    if (Mask & 8)
+      O << 'f';
+    if (Mask & 4)
+      O << 's';
+    if (Mask & 2)
+      O << 'x';
+    if (Mask & 1)
+      O << 'c';
+  }
+}
+
+void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
+                                           raw_ostream &O) {
+  uint32_t Banked = MI->getOperand(OpNum).getImm();
+  auto TheReg = ARMBankedReg::lookupBankedRegByEncoding(Banked);
+  assert(TheReg && "invalid banked register operand");
+  std::string Name = TheReg->Name;
+
+  uint32_t isSPSR = (Banked & 0x20) >> 5;
+  if (isSPSR)
+    Name.replace(0, 4, "SPSR"); // convert 'spsr_' to 'SPSR_'
+  O << Name;
+}
+
+void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
+                                           raw_ostream &O) {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+  // Handle the undefined 15 CC value here for printing so we don't abort().
+  if ((unsigned)CC == 15)
+    O << "<und>";
+  else if (CC != ARMCC::AL)
+    O << ARMCondCodeToString(CC);
+}
+
+void ARMInstPrinter::printMandatoryRestrictedPredicateOperand(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
+  if ((ARMCC::CondCodes)MI->getOperand(OpNum).getImm() == ARMCC::HS)
+    O << "cs";
+  else
+    printMandatoryPredicateOperand(MI, OpNum, STI, O);
+}
+
+void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
+                                                    unsigned OpNum,
+                                                    const MCSubtargetInfo &STI,
+                                                    raw_ostream &O) {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+  O << ARMCondCodeToString(CC);
+}
+
+void ARMInstPrinter::printMandatoryInvertedPredicateOperand(const MCInst *MI,
+                                                            unsigned OpNum,
+                                                            const MCSubtargetInfo &STI,
+                                                            raw_ostream &O) {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+  O << ARMCondCodeToString(ARMCC::getOppositeCondition(CC));
+}
+
+void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
+                                              raw_ostream &O) {
+  if (MI->getOperand(OpNum).getReg()) {
+    assert(MI->getOperand(OpNum).getReg() == ARM::CPSR &&
+           "Expect ARM CPSR register!");
+    O << 's';
+  }
+}
+
+void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
+  O << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  O << "p" << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  O << "c" << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printCoprocOptionImm(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
+  O << "{" << MI->getOperand(OpNum).getImm() << "}";
+}
+
+void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
+  llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
+}
+
+template <unsigned scale>
+void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+
+  if (MO.isExpr()) {
+    MO.getExpr()->print(O, &MAI);
+    return;
+  }
+
+  int32_t OffImm = (int32_t)MO.getImm() << scale;
+
+  O << markup("<imm:");
+  if (OffImm == INT32_MIN)
+    O << "#-0";
+  else if (OffImm < 0)
+    O << "#-" << -OffImm;
+  else
+    O << "#" << OffImm;
+  O << markup(">");
+}
+
+void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
+                                            const MCSubtargetInfo &STI,
+                                            raw_ostream &O) {
+  O << markup("<imm:") << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4)
+    << markup(">");
+}
+
+void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  unsigned Imm = MI->getOperand(OpNum).getImm();
+  O << markup("<imm:") << "#" << formatImm((Imm == 0 ? 32 : Imm))
+    << markup(">");
+}
+
+void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
+  // (3 - the number of trailing zeros) is the number of then / else.
+  unsigned Mask = MI->getOperand(OpNum).getImm();
+  unsigned NumTZ = countTrailingZeros(Mask);
+  assert(NumTZ <= 3 && "Invalid IT mask!");
+  for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
+    if ((Mask >> Pos) & 1)
+      O << 'e';
+    else
+      O << 't';
+  }
+}
+
+void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
+                                                 const MCSubtargetInfo &STI,
+                                                 raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
+
+  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, STI, O);
+    return;
+  }
+
+  O << markup("<mem:") << "[";
+  printRegName(O, MO1.getReg());
+  if (unsigned RegNum = MO2.getReg()) {
+    O << ", ";
+    printRegName(O, RegNum);
+  }
+  O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
+                                                    unsigned Op,
+                                                    const MCSubtargetInfo &STI,
+                                                    raw_ostream &O,
+                                                    unsigned Scale) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
+
+  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, STI, O);
+    return;
+  }
+
+  O << markup("<mem:") << "[";
+  printRegName(O, MO1.getReg());
+  if (unsigned ImmOffs = MO2.getImm()) {
+    O << ", " << markup("<imm:") << "#" << formatImm(ImmOffs * Scale)
+      << markup(">");
+  }
+  O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI,
+                                                     unsigned Op,
+                                                     const MCSubtargetInfo &STI,
+                                                     raw_ostream &O) {
+  printThumbAddrModeImm5SOperand(MI, Op, STI, O, 1);
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S2Operand(const MCInst *MI,
+                                                     unsigned Op,
+                                                     const MCSubtargetInfo &STI,
+                                                     raw_ostream &O) {
+  printThumbAddrModeImm5SOperand(MI, Op, STI, O, 2);
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S4Operand(const MCInst *MI,
+                                                     unsigned Op,
+                                                     const MCSubtargetInfo &STI,
+                                                     raw_ostream &O) {
+  printThumbAddrModeImm5SOperand(MI, Op, STI, O, 4);
+}
+
+void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op,
+                                                 const MCSubtargetInfo &STI,
+                                                 raw_ostream &O) {
+  printThumbAddrModeImm5SOperand(MI, Op, STI, O, 4);
+}
+
+// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
+// register with shift forms.
+// REG 0   0           - e.g. R5
+// REG IMM, SH_OPC     - e.g. R5, LSL #3
+void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+  unsigned Reg = MO1.getReg();
+  printRegName(O, Reg);
+
+  // Print the shift opc.
+  assert(MO2.isImm() && "Not a valid t2_so_reg value!");
+  printRegImmShift(O, ARM_AM::getSORegShOp(MO2.getImm()),
+                   ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
+}
+
+template <bool AlwaysPrintImm0>
+void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
+                                               raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, OpNum, STI, O);
+    return;
+  }
+
+  O << markup("<mem:") << "[";
+  printRegName(O, MO1.getReg());
+
+  int32_t OffImm = (int32_t)MO2.getImm();
+  bool isSub = OffImm < 0;
+  // Special value for #-0. All others are normal.
+  if (OffImm == INT32_MIN)
+    OffImm = 0;
+  if (isSub) {
+    O << ", " << markup("<imm:") << "#-" << formatImm(-OffImm) << markup(">");
+  } else if (AlwaysPrintImm0 || OffImm > 0) {
+    O << ", " << markup("<imm:") << "#" << formatImm(OffImm) << markup(">");
+  }
+  O << "]" << markup(">");
+}
+
+template <bool AlwaysPrintImm0>
+void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
+                                                unsigned OpNum,
+                                                const MCSubtargetInfo &STI,
+                                                raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+  O << markup("<mem:") << "[";
+  printRegName(O, MO1.getReg());
+
+  int32_t OffImm = (int32_t)MO2.getImm();
+  bool isSub = OffImm < 0;
+  // Don't print +0.
+  if (OffImm == INT32_MIN)
+    OffImm = 0;
+  if (isSub) {
+    O << ", " << markup("<imm:") << "#-" << -OffImm << markup(">");
+  } else if (AlwaysPrintImm0 || OffImm > 0) {
+    O << ", " << markup("<imm:") << "#" << OffImm << markup(">");
+  }
+  O << "]" << markup(">");
+}
+
+template <bool AlwaysPrintImm0>
+void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
+                                                  unsigned OpNum,
+                                                  const MCSubtargetInfo &STI,
+                                                  raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+  if (!MO1.isReg()) { //  For label symbolic references.
+    printOperand(MI, OpNum, STI, O);
+    return;
+  }
+
+  O << markup("<mem:") << "[";
+  printRegName(O, MO1.getReg());
+
+  int32_t OffImm = (int32_t)MO2.getImm();
+  bool isSub = OffImm < 0;
+
+  assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
+
+  // Don't print +0.
+  if (OffImm == INT32_MIN)
+    OffImm = 0;
+  if (isSub) {
+    O << ", " << markup("<imm:") << "#-" << -OffImm << markup(">");
+  } else if (AlwaysPrintImm0 || OffImm > 0) {
+    O << ", " << markup("<imm:") << "#" << OffImm << markup(">");
+  }
+  O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+  O << markup("<mem:") << "[";
+  printRegName(O, MO1.getReg());
+  if (MO2.getImm()) {
+    O << ", " << markup("<imm:") << "#" << formatImm(MO2.getImm() * 4)
+      << markup(">");
+  }
+  O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  int32_t OffImm = (int32_t)MO1.getImm();
+  O << ", " << markup("<imm:");
+  if (OffImm == INT32_MIN)
+    O << "#-0";
+  else if (OffImm < 0)
+    O << "#-" << -OffImm;
+  else
+    O << "#" << OffImm;
+  O << markup(">");
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  int32_t OffImm = (int32_t)MO1.getImm();
+
+  assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
+
+  O << ", " << markup("<imm:");
+  if (OffImm == INT32_MIN)
+    O << "#-0";
+  else if (OffImm < 0)
+    O << "#-" << -OffImm;
+  else
+    O << "#" << OffImm;
+  O << markup(">");
+}
+
+void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
+                                                 unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
+                                                 raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+  const MCOperand &MO3 = MI->getOperand(OpNum + 2);
+
+  O << markup("<mem:") << "[";
+  printRegName(O, MO1.getReg());
+
+  assert(MO2.getReg() && "Invalid so_reg load / store address!");
+  O << ", ";
+  printRegName(O, MO2.getReg());
+
+  unsigned ShAmt = MO3.getImm();
+  if (ShAmt) {
+    assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
+    O << ", lsl " << markup("<imm:") << "#" << ShAmt << markup(">");
+  }
+  O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  O << markup("<imm:") << '#' << ARM_AM::getFPImmFloat(MO.getImm())
+    << markup(">");
+}
+
+void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+                                            const MCSubtargetInfo &STI,
+                                            raw_ostream &O) {
+  unsigned EncodedImm = MI->getOperand(OpNum).getImm();
+  unsigned EltBits;
+  uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
+  O << markup("<imm:") << "#0x";
+  O.write_hex(Val);
+  O << markup(">");
+}
+
+void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
+                                            const MCSubtargetInfo &STI,
+                                            raw_ostream &O) {
+  unsigned Imm = MI->getOperand(OpNum).getImm();
+  O << markup("<imm:") << "#" << formatImm(Imm + 1) << markup(">");
+}
+
+void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  unsigned Imm = MI->getOperand(OpNum).getImm();
+  if (Imm == 0)
+    return;
+  assert(Imm <= 3 && "illegal ror immediate!");
+  O << ", ror " << markup("<imm:") << "#" << 8 * Imm << markup(">");
+}
+
+void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  MCOperand Op = MI->getOperand(OpNum);
+
+  // Support for fixups (MCFixup)
+  if (Op.isExpr())
+    return printOperand(MI, OpNum, STI, O);
+
+  unsigned Bits = Op.getImm() & 0xFF;
+  unsigned Rot = (Op.getImm() & 0xF00) >> 7;
+
+  bool PrintUnsigned = false;
+  switch (MI->getOpcode()) {
+  case ARM::MOVi:
+    // Movs to PC should be treated unsigned
+    PrintUnsigned = (MI->getOperand(OpNum - 1).getReg() == ARM::PC);
+    break;
+  case ARM::MSRi:
+    // Movs to special registers should be treated unsigned
+    PrintUnsigned = true;
+    break;
+  }
+
+  int32_t Rotated = ARM_AM::rotr32(Bits, Rot);
+  if (ARM_AM::getSOImmVal(Rotated) == Op.getImm()) {
+    // #rot has the least possible value
+    O << "#" << markup("<imm:");
+    if (PrintUnsigned)
+      O << static_cast<uint32_t>(Rotated);
+    else
+      O << Rotated;
+    O << markup(">");
+    return;
+  }
+
+  // Explicit #bits, #rot implied
+  O << "#" << markup("<imm:") << Bits << markup(">") << ", #" << markup("<imm:")
+    << Rot << markup(">");
+}
+
+void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum,
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
+  O << markup("<imm:") << "#" << 16 - MI->getOperand(OpNum).getImm()
+    << markup(">");
+}
+
+void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum,
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
+  O << markup("<imm:") << "#" << 32 - MI->getOperand(OpNum).getImm()
+    << markup(">");
+}
+
+void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
+  O << "[" << MI->getOperand(OpNum).getImm() << "]";
+}
+
+void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  O << "{";
+  printRegName(O, MI->getOperand(OpNum).getReg());
+  O << "}";
+}
+
+void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+  unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+  unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
+  O << "{";
+  printRegName(O, Reg0);
+  O << ", ";
+  printRegName(O, Reg1);
+  O << "}";
+}
+
+void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
+                                              raw_ostream &O) {
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+  unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+  unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
+  O << "{";
+  printRegName(O, Reg0);
+  O << ", ";
+  printRegName(O, Reg1);
+  O << "}";
+}
+
+void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
+  // Normally, it's not safe to use register enum values directly with
+  // addition to get the next register, but for VFP registers, the
+  // sort order is guaranteed because they're all of the form D<n>.
+  O << "{";
+  printRegName(O, MI->getOperand(OpNum).getReg());
+  O << ", ";
+  printRegName(O, MI->getOperand(OpNum).getReg() + 1);
+  O << ", ";
+  printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+  O << "}";
+}
+
+void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
+  // Normally, it's not safe to use register enum values directly with
+  // addition to get the next register, but for VFP registers, the
+  // sort order is guaranteed because they're all of the form D<n>.
+  O << "{";
+  printRegName(O, MI->getOperand(OpNum).getReg());
+  O << ", ";
+  printRegName(O, MI->getOperand(OpNum).getReg() + 1);
+  O << ", ";
+  printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+  O << ", ";
+  printRegName(O, MI->getOperand(OpNum).getReg() + 3);
+  O << "}";
+}
+
+void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
+                                                unsigned OpNum,
+                                                const MCSubtargetInfo &STI,
+                                                raw_ostream &O) {
+  O << "{";
+  printRegName(O, MI->getOperand(OpNum).getReg());
+  O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
+                                                unsigned OpNum,
+                                                const MCSubtargetInfo &STI,
+                                                raw_ostream &O) {
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+  unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+  unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
+  O << "{";
+  printRegName(O, Reg0);
+  O << "[], ";
+  printRegName(O, Reg1);
+  O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
+                                                  unsigned OpNum,
+                                                  const MCSubtargetInfo &STI,
+                                                  raw_ostream &O) {
+  // Normally, it's not safe to use register enum values directly with
+  // addition to get the next register, but for VFP registers, the
+  // sort order is guaranteed because they're all of the form D<n>.
+  O << "{";
+  printRegName(O, MI->getOperand(OpNum).getReg());
+  O << "[], ";
+  printRegName(O, MI->getOperand(OpNum).getReg() + 1);
+  O << "[], ";
+  printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+  O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
+                                                 unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
+                                                 raw_ostream &O) {
+  // Normally, it's not safe to use register enum values directly with
+  // addition to get the next register, but for VFP registers, the
+  // sort order is guaranteed because they're all of the form D<n>.
+  O << "{";
+  printRegName(O, MI->getOperand(OpNum).getReg());
+  O << "[], ";
+  printRegName(O, MI->getOperand(OpNum).getReg() + 1);
+  O << "[], ";
+  printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+  O << "[], ";
+  printRegName(O, MI->getOperand(OpNum).getReg() + 3);
+  O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListTwoSpacedAllLanes(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+  unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+  unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
+  O << "{";
+  printRegName(O, Reg0);
+  O << "[], ";
+  printRegName(O, Reg1);
+  O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListThreeSpacedAllLanes(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
+  // Normally, it's not safe to use register enum values directly with
+  // addition to get the next register, but for VFP registers, the
+  // sort order is guaranteed because they're all of the form D<n>.
+  O << "{";
+  printRegName(O, MI->getOperand(OpNum).getReg());
+  O << "[], ";
+  printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+  O << "[], ";
+  printRegName(O, MI->getOperand(OpNum).getReg() + 4);
+  O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListFourSpacedAllLanes(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
+  // Normally, it's not safe to use register enum values directly with
+  // addition to get the next register, but for VFP registers, the
+  // sort order is guaranteed because they're all of the form D<n>.
+  O << "{";
+  printRegName(O, MI->getOperand(OpNum).getReg());
+  O << "[], ";
+  printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+  O << "[], ";
+  printRegName(O, MI->getOperand(OpNum).getReg() + 4);
+  O << "[], ";
+  printRegName(O, MI->getOperand(OpNum).getReg() + 6);
+  O << "[]}";
+}
+
+void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI,
+                                                unsigned OpNum,
+                                                const MCSubtargetInfo &STI,
+                                                raw_ostream &O) {
+  // Normally, it's not safe to use register enum values directly with
+  // addition to get the next register, but for VFP registers, the
+  // sort order is guaranteed because they're all of the form D<n>.
+  O << "{";
+  printRegName(O, MI->getOperand(OpNum).getReg());
+  O << ", ";
+  printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+  O << ", ";
+  printRegName(O, MI->getOperand(OpNum).getReg() + 4);
+  O << "}";
+}
+
+void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
+                                               raw_ostream &O) {
+  // Normally, it's not safe to use register enum values directly with
+  // addition to get the next register, but for VFP registers, the
+  // sort order is guaranteed because they're all of the form D<n>.
+  O << "{";
+  printRegName(O, MI->getOperand(OpNum).getReg());
+  O << ", ";
+  printRegName(O, MI->getOperand(OpNum).getReg() + 2);
+  O << ", ";
+  printRegName(O, MI->getOperand(OpNum).getReg() + 4);
+  O << ", ";
+  printRegName(O, MI->getOperand(OpNum).getReg() + 6);
+  O << "}";
+}
+
+template<unsigned NumRegs>
+void ARMInstPrinter::printMVEVectorList(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+  const char *Prefix = "{";
+  for (unsigned i = 0; i < NumRegs; i++) {
+    O << Prefix;
+    printRegName(O, MRI.getSubReg(Reg, ARM::qsub_0 + i));
+    Prefix = ", ";
+  }
+  O << "}";
+}
+
+template<int64_t Angle, int64_t Remainder>
+void ARMInstPrinter::printComplexRotationOp(const MCInst *MI, unsigned OpNo,
+                                            const MCSubtargetInfo &STI,
+                                            raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNo).getImm();
+  O << "#" << (Val * Angle) + Remainder;
+}
+
+void ARMInstPrinter::printVPTPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
+                                              raw_ostream &O) {
+  ARMVCC::VPTCodes CC = (ARMVCC::VPTCodes)MI->getOperand(OpNum).getImm();
+  if (CC != ARMVCC::None)
+    O << ARMVPTPredToString(CC);
+}
+
+void ARMInstPrinter::printVPTMask(const MCInst *MI, unsigned OpNum,
+                                  const MCSubtargetInfo &STI,
+                                  raw_ostream &O) {
+  // (3 - the number of trailing zeroes) is the number of them / else.
+  unsigned Mask = MI->getOperand(OpNum).getImm();
+  unsigned NumTZ = countTrailingZeros(Mask);
+  assert(NumTZ <= 3 && "Invalid VPT mask!");
+  for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
+    bool T = ((Mask >> Pos) & 1) == 0;
+    if (T)
+      O << 't';
+    else
+      O << 'e';
+  }
+}
+
+void ARMInstPrinter::printExpandedImmOperand(const MCInst *MI, unsigned OpNum,
+                                             const MCSubtargetInfo &STI,
+                                             raw_ostream &O) {
+  uint32_t Val = MI->getOperand(OpNum).getImm();
+  O << markup("<imm:") << "#0x";
+  O.write_hex(Val);
+  O << markup(">");
+}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h b/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
new file mode 100644
index 000000000000..69026956b60e
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
@@ -0,0 +1,272 @@
+//===- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARM MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMINSTPRINTER_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMINSTPRINTER_H
+
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class ARMInstPrinter : public MCInstPrinter {
+public:
+  ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                 const MCRegisterInfo &MRI);
+
+  bool applyTargetSpecificCLOption(StringRef Opt) override;
+
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+                        raw_ostream &O);
+  virtual bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+                               raw_ostream &O);
+  virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+                                       unsigned PrintMethodIdx,
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo,
+                                     unsigned AltIdx = ARM::NoRegAltName);
+
+  void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+
+  void printSORegRegOperand(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSORegImmOperand(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printAddrModeTBB(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddrModeTBH(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddrMode2Operand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
+  template <bool AlwaysPrintImm0>
+  void printAddrMode3Operand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O,
+                                  bool AlwaysPrintImm0);
+  void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
+                               const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum,
+                                 const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  template <bool AlwaysPrintImm0>
+  void printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  template <bool AlwaysPrintImm0>
+  void printAddrMode5FP16Operand(const MCInst *MI, unsigned OpNum,
+                                 const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O);
+  void printMemBOption(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printInstSyncBOption(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printTraceSyncBOption(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
+
+  template <unsigned scale>
+  void printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printThumbSRImm(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printThumbITMask(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum,
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
+  void printThumbAddrModeImm5SOperand(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O, unsigned Scale);
+  void printThumbAddrModeImm5S1Operand(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O);
+  void printThumbAddrModeImm5S2Operand(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O);
+  void printThumbAddrModeImm5S4Operand(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O);
+  void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum,
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printT2SOOperand(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  template <bool AlwaysPrintImm0>
+  void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+                                 const MCSubtargetInfo &STI, raw_ostream &O);
+  template <bool AlwaysPrintImm0>
+  void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
+  template <bool AlwaysPrintImm0>
+  void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum,
+                                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printT2AddrModeImm0_1020s4Operand(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O);
+  void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O);
+  void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O);
+  void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum,
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printSetendOperand(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printCPSIMod(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printCPSIFlag(const MCInst *MI, unsigned OpNum,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
+  void printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
+  void printBankedRegOperand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPredicateOperand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O);
+  void printMandatoryRestrictedPredicateOperand(const MCInst *MI,
+                                                unsigned OpNum,
+                                                const MCSubtargetInfo &STI,
+                                                raw_ostream &O);
+  void printMandatoryInvertedPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O);
+  void printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+                                const MCSubtargetInfo &STI, raw_ostream &O);
+  void printRegisterList(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+  void printNoHashImmediate(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPImmediate(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printCImmediate(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printCoprocOptionImm(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+  void printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printRotImmOperand(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printModImmOperand(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printGPRPairOperand(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printPCLabel(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
+                                 const MCSubtargetInfo &STI, raw_ostream &O);
+  void printFBits16(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printFBits32(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorIndex(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListOne(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListTwo(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+                                const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListThree(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListFour(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum,
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum,
+                                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum,
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O);
+  void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O);
+  void printVectorListFourSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O);
+  void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum,
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
+                                 const MCSubtargetInfo &STI, raw_ostream &O);
+  template<unsigned NumRegs>
+  void printMVEVectorList(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  template<int64_t Angle, int64_t Remainder>
+  void printComplexRotationOp(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  // MVE
+  void printVPTPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                const MCSubtargetInfo &STI,
+                                raw_ostream &O);
+  void printVPTMask(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  template<int shift>
+  void printMveAddrModeRQOperand(const MCInst *MI, unsigned OpNum,
+                                 const MCSubtargetInfo &STI, raw_ostream &O);
+  void printMveAddrModeQOperand(const MCInst *MI, unsigned OpNum,
+                                const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpandedImmOperand(const MCInst *MI, unsigned OpNum,
+                               const MCSubtargetInfo &STI, raw_ostream &O);
+
+private:
+  unsigned DefaultAltIdx = ARM::NoRegAltName;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMINSTPRINTER_H
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 3ee63ac374b3..d30d15df3d00 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -1,9 +1,8 @@
 //===-- ARMMCAsmInfo.cpp - ARM asm properties -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index 5e548162bec6..55d7b299674d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -1,9 +1,8 @@
 //===-- ARMMCAsmInfo.h - ARM asm properties --------------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index b37b8073548f..dca6fe37d49a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
 //===-- ARM/ARMMCCodeEmitter.cpp - Convert ARM code to machine code -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -50,7 +49,7 @@ namespace {
 
 class ARMMCCodeEmitter : public MCCodeEmitter {
   const MCInstrInfo &MCII;
-  const MCContext &CTX;
+  MCContext &CTX;
   bool IsLittleEndian;
 
 public:
@@ -163,6 +162,15 @@ public:
                               SmallVectorImpl<MCFixup> &Fixups,
                               const MCSubtargetInfo &STI) const;
 
+  uint32_t getITMaskOpValue(const MCInst &MI, unsigned OpIdx,
+                            SmallVectorImpl<MCFixup> &Fixups,
+                            const MCSubtargetInfo &STI) const;
+
+  /// getMVEShiftImmOpValue - Return encoding info for the 'sz:imm5'
+  /// operand.
+  uint32_t getMVEShiftImmOpValue(const MCInst &MI, unsigned OpIdx,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const;
 
   /// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12'
   /// operand.
@@ -181,18 +189,37 @@ public:
                                    SmallVectorImpl<MCFixup> &Fixups,
                                    const MCSubtargetInfo &STI) const;
 
+  /// getT2AddrModeImm7s4OpValue - Return encoding info for 'reg +/- imm7<<2'
+  /// operand.
+  uint32_t getT2AddrModeImm7s4OpValue(const MCInst &MI, unsigned OpIdx,
+                                      SmallVectorImpl<MCFixup> &Fixups,
+                                      const MCSubtargetInfo &STI) const;
+
   /// getT2AddrModeImm0_1020s4OpValue - Return encoding info for 'reg + imm8<<2'
   /// operand.
   uint32_t getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
                                    SmallVectorImpl<MCFixup> &Fixups,
                                    const MCSubtargetInfo &STI) const;
 
-  /// getT2Imm8s4OpValue - Return encoding info for '+/- imm8<<2'
+  /// getT2ScaledImmOpValue - Return encoding info for '+/- immX<<Y'
   /// operand.
-  uint32_t getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
-                              SmallVectorImpl<MCFixup> &Fixups,
-                              const MCSubtargetInfo &STI) const;
+  template<unsigned Bits, unsigned Shift>
+  uint32_t getT2ScaledImmOpValue(const MCInst &MI, unsigned OpIdx,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const;
 
+  /// getMveAddrModeRQOpValue - Return encoding info for 'reg, vreg'
+  /// operand.
+  uint32_t getMveAddrModeRQOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups,
+                                   const MCSubtargetInfo &STI) const;
+
+  /// getMveAddrModeQOpValue - Return encoding info for 'reg +/- imm7<<{shift}'
+  /// operand.
+  template<int shift>
+  uint32_t getMveAddrModeQOpValue(const MCInst &MI, unsigned OpIdx,
+                                  SmallVectorImpl<MCFixup> &Fixups,
+                                  const MCSubtargetInfo &STI) const;
 
   /// getLdStSORegOpValue - Return encoding info for 'reg +/- reg shop imm'
   /// operand as needed by load/store instructions.
@@ -224,8 +251,9 @@ public:
     case ARM_AM::asr: return 2;
     case ARM_AM::ror:
     case ARM_AM::rrx: return 3;
+    default:
+      llvm_unreachable("Invalid ShiftOpc!");
     }
-    llvm_unreachable("Invalid ShiftOpc!");
   }
 
   /// getAddrMode2OffsetOpValue - Return encoding for am2offset operands.
@@ -283,40 +311,6 @@ public:
     return MI.getOperand(Op).getReg() == ARM::CPSR;
   }
 
-  /// getSOImmOpValue - Return an encoded 12-bit shifted-immediate value.
-  unsigned getSOImmOpValue(const MCInst &MI, unsigned Op,
-                           SmallVectorImpl<MCFixup> &Fixups,
-                           const MCSubtargetInfo &STI) const {
-    const MCOperand &MO = MI.getOperand(Op);
-
-    // We expect MO to be an immediate or an expression,
-    // if it is an immediate - that's fine, just encode the value.
-    // Otherwise - create a Fixup.
-    if (MO.isExpr()) {
-      const MCExpr *Expr = MO.getExpr();
-      // In instruction code this value always encoded as lowest 12 bits,
-      // so we don't have to perform any specific adjustments.
-      // Due to requirements of relocatable records we have to use FK_Data_4.
-      // See ARMELFObjectWriter::ExplicitRelSym and
-      //     ARMELFObjectWriter::GetRelocTypeInner for more details.
-      MCFixupKind Kind = MCFixupKind(FK_Data_4);
-      Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
-      return 0;
-    }
-
-    unsigned SoImm = MO.getImm();
-    int SoImmVal = ARM_AM::getSOImmVal(SoImm);
-    assert(SoImmVal != -1 && "Not a valid so_imm value!");
-
-    // Encode rotate_imm.
-    unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
-      << ARMII::SoRotImmShift;
-
-    // Encode immed_8.
-    Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
-    return Binary;
-  }
-
   unsigned getModImmOpValue(const MCInst &MI, unsigned Op,
                             SmallVectorImpl<MCFixup> &Fixups,
                             const MCSubtargetInfo &ST) const {
@@ -358,7 +352,8 @@ public:
   unsigned getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
     SmallVectorImpl<MCFixup> &Fixups,
     const MCSubtargetInfo &STI) const;
-  unsigned getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
+  template<unsigned Bits, unsigned Shift>
+  unsigned getT2AddrModeImmOpValue(const MCInst &MI, unsigned OpNum,
     SmallVectorImpl<MCFixup> &Fixups,
     const MCSubtargetInfo &STI) const;
   unsigned getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
@@ -418,6 +413,14 @@ public:
   unsigned getThumbSRImmOpValue(const MCInst &MI, unsigned Op,
                                  SmallVectorImpl<MCFixup> &Fixups,
                                  const MCSubtargetInfo &STI) const;
+  template <uint8_t shift, bool invert>
+  unsigned getExpandedImmOpValue(const MCInst &MI, unsigned Op,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const {
+    static_assert(shift <= 32, "Shift count must be less than or equal to 32.");
+    const MCOperand MO = MI.getOperand(Op);
+    return (invert ? (MO.getImm() ^ 0xff) : MO.getImm()) >> shift;
+  }
 
   unsigned NEONThumb2DataIPostEncoder(const MCInst &MI,
                                       unsigned EncodedValue,
@@ -436,6 +439,10 @@ public:
                                 unsigned EncodedValue,
                                 const MCSubtargetInfo &STI) const;
 
+  uint32_t getPowerTwoOpValue(const MCInst &MI, unsigned OpIdx,
+                              SmallVectorImpl<MCFixup> &Fixups,
+                              const MCSubtargetInfo &STI) const;
+
   void EmitByte(unsigned char C, raw_ostream &OS) const {
     OS << (char)C;
   }
@@ -451,6 +458,26 @@ public:
   void encodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups,
                          const MCSubtargetInfo &STI) const override;
+
+  template <bool isNeg, ARM::Fixups fixup>
+  uint32_t getBFTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                              SmallVectorImpl<MCFixup> &Fixups,
+                              const MCSubtargetInfo &STI) const;
+
+  uint32_t getBFAfterTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups,
+                                   const MCSubtargetInfo &STI) const;
+
+  uint32_t getVPTMaskOpValue(const MCInst &MI, unsigned OpIdx,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             const MCSubtargetInfo &STI) const;
+  uint32_t getRestrictedCondCodeOpValue(const MCInst &MI, unsigned OpIdx,
+                                        SmallVectorImpl<MCFixup> &Fixups,
+                                        const MCSubtargetInfo &STI) const;
+  template <unsigned size>
+  uint32_t getMVEPairVectorIndexOpValue(const MCInst &MI, unsigned OpIdx,
+                                        SmallVectorImpl<MCFixup> &Fixups,
+                                        const MCSubtargetInfo &STI) const;
 };
 
 } // end anonymous namespace
@@ -537,7 +564,15 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
     unsigned Reg = MO.getReg();
     unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg);
 
-    // Q registers are encoded as 2x their register number.
+    // In NEON, Q registers are encoded as 2x their register number,
+    // because they're using the same indices as the D registers they
+    // overlap. In MVE, there are no 64-bit vector instructions, so
+    // the encodings all refer to Q-registers by their literal
+    // register number.
+
+    if (STI.getFeatureBits()[ARM::HasMVEIntegerOps])
+      return RegNo;
+
     switch (Reg) {
     default:
       return RegNo;
@@ -849,6 +884,33 @@ getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
   return Val;
 }
 
+/// getITMaskOpValue - Return the architectural encoding of an IT
+/// predication mask, given the MCOperand format.
+uint32_t ARMMCCodeEmitter::
+getITMaskOpValue(const MCInst &MI, unsigned OpIdx,
+                 SmallVectorImpl<MCFixup> &Fixups,
+                 const MCSubtargetInfo &STI) const {
+  const MCOperand MaskMO = MI.getOperand(OpIdx);
+  assert(MaskMO.isImm() && "Unexpected operand type!");
+
+  unsigned Mask = MaskMO.getImm();
+
+  // IT masks are encoded as a sequence of replacement low-order bits
+  // for the condition code. So if the low bit of the starting
+  // condition code is 1, then we have to flip all the bits above the
+  // terminating bit (which is the lowest 1 bit).
+  assert(OpIdx > 0 && "IT mask appears first!");
+  const MCOperand CondMO = MI.getOperand(OpIdx-1);
+  assert(CondMO.isImm() && "Unexpected operand type!");
+  if (CondMO.getImm() & 1) {
+    unsigned LowBit = Mask & -Mask;
+    unsigned BitsAboveLowBit = 0xF & (-LowBit << 1);
+    Mask ^= BitsAboveLowBit;
+  }
+
+  return Mask;
+}
+
 /// getThumbAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
 /// target.
 uint32_t ARMMCCodeEmitter::
@@ -878,6 +940,41 @@ getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
   return (Rm << 3) | Rn;
 }
 
+/// getMVEShiftImmOpValue - Return encoding info for the 'sz:imm5'
+/// operand.
+uint32_t
+ARMMCCodeEmitter::getMVEShiftImmOpValue(const MCInst &MI, unsigned OpIdx,
+                                        SmallVectorImpl<MCFixup> &Fixups,
+                                        const MCSubtargetInfo &STI) const {
+  // {4-0} = szimm5
+  // The value we are trying to encode is an immediate between either the
+  // range of [1-7] or [1-15] depending on whether we are dealing with the
+  // u8/s8 or the u16/s16 variants respectively.
+  // This value is encoded as follows, if ShiftImm is the value within those
+  // ranges then the encoding szimm5 = ShiftImm + size, where size is either 8
+  // or 16.
+
+  unsigned Size, ShiftImm;
+  switch(MI.getOpcode()) {
+    case ARM::MVE_VSHLL_imms16bh:
+    case ARM::MVE_VSHLL_imms16th:
+    case ARM::MVE_VSHLL_immu16bh:
+    case ARM::MVE_VSHLL_immu16th:
+      Size = 16;
+      break;
+    case ARM::MVE_VSHLL_imms8bh:
+    case ARM::MVE_VSHLL_imms8th:
+    case ARM::MVE_VSHLL_immu8bh:
+    case ARM::MVE_VSHLL_immu8th:
+      Size = 8;
+      break;
+    default:
+      llvm_unreachable("Use of operand not supported by this instruction");
+  }
+  ShiftImm = MI.getOperand(OpIdx).getImm();
+  return Size + ShiftImm;
+}
+
 /// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12' operand.
 uint32_t ARMMCCodeEmitter::
 getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
@@ -929,12 +1026,11 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
   return Binary;
 }
 
-/// getT2Imm8s4OpValue - Return encoding info for
-/// '+/- imm8<<2' operand.
+template<unsigned Bits, unsigned Shift>
 uint32_t ARMMCCodeEmitter::
-getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
-                   SmallVectorImpl<MCFixup> &Fixups,
-                   const MCSubtargetInfo &STI) const {
+getT2ScaledImmOpValue(const MCInst &MI, unsigned OpIdx,
+                      SmallVectorImpl<MCFixup> &Fixups,
+                      const MCSubtargetInfo &STI) const {
   // FIXME: The immediate operand should have already been encoded like this
   // before ever getting here. The encoder method should just need to combine
   // the MI operands for the register and the offset into a single
@@ -942,25 +1038,75 @@ getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
   // style, unfortunately. As-is, we can't represent the distinct encoding
   // for #-0.
 
-  // {8}    = (U)nsigned (add == '1', sub == '0')
-  // {7-0}  = imm8
-  int32_t Imm8 = MI.getOperand(OpIdx).getImm();
-  bool isAdd = Imm8 >= 0;
+  // {Bits}    = (U)nsigned (add == '1', sub == '0')
+  // {(Bits-1)-0}  = immediate
+  int32_t Imm = MI.getOperand(OpIdx).getImm();
+  bool isAdd = Imm >= 0;
 
   // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
-  if (Imm8 < 0)
-    Imm8 = -(uint32_t)Imm8;
+  if (Imm < 0)
+    Imm = -(uint32_t)Imm;
 
-  // Scaled by 4.
-  Imm8 /= 4;
+  Imm >>= Shift;
 
-  uint32_t Binary = Imm8 & 0xff;
+  uint32_t Binary = Imm & ((1U << Bits) - 1);
   // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
   if (isAdd)
-    Binary |= (1 << 8);
+    Binary |= (1U << Bits);
   return Binary;
 }
 
+/// getMveAddrModeRQOpValue - Return encoding info for 'reg, vreg'
+/// operand.
+uint32_t ARMMCCodeEmitter::
+getMveAddrModeRQOpValue(const MCInst &MI, unsigned OpIdx,
+                        SmallVectorImpl<MCFixup> &Fixups,
+                        const MCSubtargetInfo &STI) const {
+    // {6-3} Rn
+    // {2-0} Qm
+    const MCOperand &M0 = MI.getOperand(OpIdx);
+    const MCOperand &M1 = MI.getOperand(OpIdx + 1);
+
+    unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(M0.getReg());
+    unsigned Qm = CTX.getRegisterInfo()->getEncodingValue(M1.getReg());
+
+    assert(Qm < 8 && "Qm is supposed to be encodable in 3 bits");
+
+    return (Rn << 3) | Qm;
+}
+
+/// getMveAddrModeRQOpValue - Return encoding info for 'reg, vreg'
+/// operand.
+template<int shift>
+uint32_t ARMMCCodeEmitter::
+getMveAddrModeQOpValue(const MCInst &MI, unsigned OpIdx,
+                        SmallVectorImpl<MCFixup> &Fixups,
+                        const MCSubtargetInfo &STI) const {
+    // {10-8} Qm
+    // {7-0} Imm
+    const MCOperand &M0 = MI.getOperand(OpIdx);
+    const MCOperand &M1 = MI.getOperand(OpIdx + 1);
+
+    unsigned Qm = CTX.getRegisterInfo()->getEncodingValue(M0.getReg());
+    int32_t Imm = M1.getImm();
+
+    bool isAdd = Imm >= 0;
+
+    Imm >>= shift;
+
+    if (!isAdd)
+      Imm = -(uint32_t)Imm;
+
+    Imm &= 0x7f;
+
+    if (isAdd)
+      Imm |= 0x80;
+
+    assert(Qm < 8 && "Qm is supposed to be encodable in 3 bits");
+
+    return (Qm << 8) | Imm;
+}
+
 /// getT2AddrModeImm8s4OpValue - Return encoding info for
 /// 'reg +/- imm8<<2' operand.
 uint32_t ARMMCCodeEmitter::
@@ -1002,6 +1148,33 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
   return Binary;
 }
 
+/// getT2AddrModeImm7s4OpValue - Return encoding info for
+/// 'reg +/- imm7<<2' operand.
+uint32_t
+ARMMCCodeEmitter::getT2AddrModeImm7s4OpValue(const MCInst &MI, unsigned OpIdx,
+                                             SmallVectorImpl<MCFixup> &Fixups,
+                                             const MCSubtargetInfo &STI) const {
+  // {11-8} = reg
+  // {7}    = (A)dd (add == '1', sub == '0')
+  // {6-0}  = imm7
+  unsigned Reg, Imm7;
+  // If The first operand isn't a register, we have a label reference.
+  bool isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm7, Fixups, STI);
+
+  // FIXME: The immediate operand should have already been encoded like this
+  // before ever getting here. The encoder method should just need to combine
+  // the MI operands for the register and the offset into a single
+  // representation for the complex operand in the .td file. This isn't just
+  // style, unfortunately. As-is, we can't represent the distinct encoding
+  // for #-0.
+  uint32_t Binary = (Imm7 >> 2) & 0xff;
+  // Immediate is always encoded as positive. The 'A' bit controls add vs sub.
+  if (isAdd)
+    Binary |= (1 << 7);
+  Binary |= (Reg << 8);
+  return Binary;
+}
+
 /// getT2AddrModeImm0_1020s4OpValue - Return encoding info for
 /// 'reg + imm8<<2' operand.
 uint32_t ARMMCCodeEmitter::
@@ -1434,25 +1607,29 @@ getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
   return Value;
 }
 
+template<unsigned Bits, unsigned Shift>
 unsigned ARMMCCodeEmitter::
-getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
-                         SmallVectorImpl<MCFixup> &Fixups,
-                         const MCSubtargetInfo &STI) const {
+getT2AddrModeImmOpValue(const MCInst &MI, unsigned OpNum,
+                        SmallVectorImpl<MCFixup> &Fixups,
+                        const MCSubtargetInfo &STI) const {
   const MCOperand &MO1 = MI.getOperand(OpNum);
   const MCOperand &MO2 = MI.getOperand(OpNum+1);
 
   // FIXME: Needs fixup support.
   unsigned Value = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg());
 
-  // Even though the immediate is 8 bits long, we need 9 bits in order
+  // If the immediate is B bits long, we need B+1 bits in order
   // to represent the (inverse of the) sign bit.
-  Value <<= 9;
+  Value <<= (Bits + 1);
   int32_t tmp = (int32_t)MO2.getImm();
-  if (tmp < 0)
+  if (tmp == INT32_MIN) { // represents subtracting zero rather than adding it
+    tmp = 0;
+  } else if (tmp < 0) {
     tmp = abs(tmp);
-  else
-    Value |= 256; // Set the ADD bit
-  Value |= tmp & 255;
+  } else {
+    Value |= (1U << Bits); // Set the ADD bit
+  }
+  Value |= (tmp >> Shift) & ((1U << Bits) - 1);
   return Value;
 }
 
@@ -1534,7 +1711,7 @@ unsigned ARMMCCodeEmitter::
 getRegisterListOpValue(const MCInst &MI, unsigned Op,
                        SmallVectorImpl<MCFixup> &Fixups,
                        const MCSubtargetInfo &STI) const {
-  // VLDM/VSTM:
+  // VLDM/VSTM/VSCCLRM:
   //   {12-8} = Vd
   //   {7-0}  = Number of registers
   //
@@ -1543,28 +1720,40 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
   unsigned Reg = MI.getOperand(Op).getReg();
   bool SPRRegs = ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg);
   bool DPRRegs = ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg);
+  bool CLRMRegs = MI.getOpcode() == ARM::t2CLRM;
 
   unsigned Binary = 0;
 
   if (SPRRegs || DPRRegs) {
-    // VLDM/VSTM
+    // VLDM/VSTM/VSCCLRM
     unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg);
     unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff;
     Binary |= (RegNo & 0x1f) << 8;
+
+    // Ignore VPR
+    if (MI.getOpcode() == ARM::VSCCLRMD || MI.getOpcode() == ARM::VSCCLRMS)
+      --NumRegs;
     if (SPRRegs)
       Binary |= NumRegs;
     else
       Binary |= NumRegs * 2;
   } else {
     const MCRegisterInfo &MRI = *CTX.getRegisterInfo();
-    assert(std::is_sorted(MI.begin() + Op, MI.end(),
-                          [&](const MCOperand &LHS, const MCOperand &RHS) {
-                            return MRI.getEncodingValue(LHS.getReg()) <
-                                   MRI.getEncodingValue(RHS.getReg());
-                          }));
+    if (!CLRMRegs) {
+      assert(std::is_sorted(MI.begin() + Op, MI.end(),
+                            [&](const MCOperand &LHS, const MCOperand &RHS) {
+                              return MRI.getEncodingValue(LHS.getReg()) <
+                                     MRI.getEncodingValue(RHS.getReg());
+                            }));
+    }
 
     for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
-      unsigned RegNo = MRI.getEncodingValue(MI.getOperand(I).getReg());
+      unsigned RegNo;
+      if (CLRMRegs && MI.getOperand(I).getReg() == ARM::APSR) {
+        RegNo = 15;
+      } else {
+        RegNo = MRI.getEncodingValue(MI.getOperand(I).getReg());
+      }
       Binary |= 1 << RegNo;
     }
   }
@@ -1710,6 +1899,120 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
   ++MCNumEmitted;  // Keep track of the # of mi's emitted.
 }
 
+template <bool isNeg, ARM::Fixups fixup>
+uint32_t
+ARMMCCodeEmitter::getBFTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                     SmallVectorImpl<MCFixup> &Fixups,
+                                     const MCSubtargetInfo &STI) const {
+  const MCOperand MO = MI.getOperand(OpIdx);
+  if (MO.isExpr())
+    return ::getBranchTargetOpValue(MI, OpIdx, fixup, Fixups, STI);
+  return isNeg ? -(MO.getImm() >> 1) : (MO.getImm() >> 1);
+}
+
+uint32_t
+ARMMCCodeEmitter::getBFAfterTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                          SmallVectorImpl<MCFixup> &Fixups,
+                                          const MCSubtargetInfo &STI) const {
+  const MCOperand MO = MI.getOperand(OpIdx);
+  const MCOperand BranchMO = MI.getOperand(0);
+
+  if (MO.isExpr()) {
+    assert(BranchMO.isExpr());
+    const MCExpr *DiffExpr = MCBinaryExpr::createSub(
+        MO.getExpr(), BranchMO.getExpr(), CTX);
+    MCFixupKind Kind = MCFixupKind(ARM::fixup_bfcsel_else_target);
+    Fixups.push_back(llvm::MCFixup::create(0, DiffExpr, Kind, MI.getLoc()));
+    return 0;
+  }
+
+  assert(MO.isImm() && BranchMO.isImm());
+  int Diff = MO.getImm() - BranchMO.getImm();
+  assert(Diff == 4 || Diff == 2);
+
+  return Diff == 4;
+}
+
+uint32_t ARMMCCodeEmitter::getVPTMaskOpValue(const MCInst &MI, unsigned OpIdx,
+                                             SmallVectorImpl<MCFixup> &Fixups,
+                                             const MCSubtargetInfo &STI)const {
+  const MCOperand MO = MI.getOperand(OpIdx);
+  assert(MO.isImm() && "Unexpected operand type!");
+
+  int Value = MO.getImm();
+  int Imm = 0;
+
+  // VPT Masks are actually encoded as a series of invert/don't invert bits,
+  // rather than true/false bits.
+  unsigned PrevBit = 0;
+  for (int i = 3; i >= 0; --i) {
+    unsigned Bit = (Value >> i) & 1;
+
+    // Check if we are at the end of the mask.
+    if ((Value & ~(~0U << i)) == 0) {
+      Imm |= (1 << i);
+      break;
+    }
+
+    // Convert the bit in the mask based on the previous bit.
+    if (Bit != PrevBit)
+      Imm |= (1 << i);
+
+    PrevBit = Bit;
+  }
+
+  return Imm;
+}
+
+uint32_t ARMMCCodeEmitter::getRestrictedCondCodeOpValue(
+    const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
+    const MCSubtargetInfo &STI) const {
+
+  const MCOperand MO = MI.getOperand(OpIdx);
+  assert(MO.isImm() && "Unexpected operand type!");
+
+  switch (MO.getImm()) {
+  default:
+    assert(0 && "Unexpected Condition!");
+    return 0;
+  case ARMCC::HS:
+  case ARMCC::EQ:
+    return 0;
+  case ARMCC::HI:
+  case ARMCC::NE:
+    return 1;
+  case ARMCC::GE:
+    return 4;
+  case ARMCC::LT:
+    return 5;
+  case ARMCC::GT:
+    return 6;
+  case ARMCC::LE:
+    return 7;
+  }
+}
+
+uint32_t ARMMCCodeEmitter::
+getPowerTwoOpValue(const MCInst &MI, unsigned OpIdx,
+                   SmallVectorImpl<MCFixup> &Fixups,
+                   const MCSubtargetInfo &STI) const {
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  assert(MO.isImm() && "Unexpected operand type!");
+  return countTrailingZeros((uint64_t)MO.getImm());
+}
+
+template <unsigned start>
+uint32_t ARMMCCodeEmitter::
+getMVEPairVectorIndexOpValue(const MCInst &MI, unsigned OpIdx,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             const MCSubtargetInfo &STI) const {
+  const MCOperand MO = MI.getOperand(OpIdx);
+  assert(MO.isImm() && "Unexpected operand type!");
+
+  int Value = MO.getImm();
+  return Value - start;
+}
+
 #include "ARMGenMCCodeEmitter.inc"
 
 MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index 306f068312f5..fbad05fb1759 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -1,9 +1,8 @@
 //===-- ARMMCExpr.cpp - ARM specific MC expression classes ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index 75dde8008fca..033a43288f3e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -1,9 +1,8 @@
 //===-- ARMMCExpr.h - ARM specific MC expression classes --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 46434007a854..90022a8d88a6 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -1,9 +1,8 @@
 //===-- ARMMCTargetDesc.cpp - ARM Target Descriptions ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,8 +12,9 @@
 
 #include "ARMMCTargetDesc.h"
 #include "ARMBaseInfo.h"
+#include "ARMInstPrinter.h"
 #include "ARMMCAsmInfo.h"
-#include "InstPrinter/ARMInstPrinter.h"
+#include "TargetInfo/ARMTargetInfo.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCCodeEmitter.h"
@@ -277,14 +277,29 @@ class ThumbMCInstrAnalysis : public ARMMCInstrAnalysis {
 public:
   ThumbMCInstrAnalysis(const MCInstrInfo *Info) : ARMMCInstrAnalysis(Info) {}
 
-  bool evaluateBranch(const MCInst &Inst, uint64_t Addr,
-                      uint64_t Size, uint64_t &Target) const override {
+  bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+                      uint64_t &Target) const override {
+    unsigned OpId;
+    switch (Inst.getOpcode()) {
+    default:
+      OpId = 0;
+      break;
+    case ARM::t2WLS:
+    case ARM::t2LEUpdate:
+      OpId = 2;
+      break;
+    case ARM::t2LE:
+      OpId = 1;
+      break;
+    }
+
     // We only handle PCRel branches for now.
-    if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL)
+    if (Info->get(Inst.getOpcode()).OpInfo[OpId].OperandType !=
+        MCOI::OPERAND_PCREL)
       return false;
 
-    int64_t Imm = Inst.getOperand(0).getImm();
-    Target = Addr+Imm+4; // In Thumb mode the PC is always off by 4 bytes.
+    // In Thumb mode the PC is always off by 4 bytes.
+    Target = Addr + Inst.getOperand(OpId).getImm() + 4;
     return true;
   }
 };
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 3ee004592ac6..9cbbd56225ef 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -1,9 +1,8 @@
 //===-- ARMMCTargetDesc.h - ARM Target Descriptions -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,6 +14,7 @@
 #define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCTARGETDESC_H
 
 #include "llvm/Support/DataTypes.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include <memory>
 #include <string>
 
@@ -39,11 +39,6 @@ class Triple;
 class raw_ostream;
 class raw_pwrite_stream;
 
-Target &getTheARMLETarget();
-Target &getTheThumbLETarget();
-Target &getTheARMBETarget();
-Target &getTheThumbBETarget();
-
 namespace ARM_MC {
 std::string ParseARMTriple(const Triple &TT, StringRef CPU);
 
@@ -100,6 +95,20 @@ createARMWinCOFFObjectWriter(bool Is64Bit);
 
 /// Construct ARM Mach-O relocation info.
 MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx);
+
+namespace ARM {
+enum OperandType {
+  OPERAND_VPRED_R = MCOI::OPERAND_FIRST_TARGET,
+  OPERAND_VPRED_N,
+};
+inline bool isVpred(OperandType op) {
+  return op == OPERAND_VPRED_R || op == OPERAND_VPRED_N;
+}
+inline bool isVpred(uint8_t op) {
+  return isVpred(static_cast<OperandType>(op));
+}
+} // end namespace ARM
+
 } // End llvm namespace
 
 // Defines symbolic names for ARM registers.  This defines a mapping from
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
index 6259c98321f4..886b7e7bc84e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
@@ -1,9 +1,8 @@
 //===- ARMMachORelocationInfo.cpp -----------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 0ced8195790d..c49885023cb2 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 91836cff95c8..b863517c0cca 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -1,9 +1,8 @@
 //===- ARMTargetStreamer.cpp - ARMTargetStreamer class --*- C++ -*---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -125,7 +124,9 @@ static ARMBuildAttrs::CPUArch getArchForCPU(const MCSubtargetInfo &STI) {
     if (STI.hasFeature(ARM::FeatureRClass))
       return ARMBuildAttrs::v8_R;
     return ARMBuildAttrs::v8_A;
-  } else if (STI.hasFeature(ARM::HasV8MMainlineOps))
+  } else if (STI.hasFeature(ARM::HasV8_1MMainlineOps))
+    return ARMBuildAttrs::v8_1_M_Main;
+  else if (STI.hasFeature(ARM::HasV8MMainlineOps))
     return ARMBuildAttrs::v8_M_Main;
   else if (STI.hasFeature(ARM::HasV7Ops)) {
     if (STI.hasFeature(ARM::FeatureMClass) && STI.hasFeature(ARM::FeatureDSP))
@@ -223,37 +224,37 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
                         ? ARMBuildAttrs::AllowNeonARMv8_1a
                         : ARMBuildAttrs::AllowNeonARMv8);
   } else {
-    if (STI.hasFeature(ARM::FeatureFPARMv8))
+    if (STI.hasFeature(ARM::FeatureFPARMv8_D16_SP))
       // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
       // FPU, but there are two different names for it depending on the CPU.
-      emitFPU(STI.hasFeature(ARM::FeatureD16)
-                  ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV5_SP_D16
-                                                           : ARM::FK_FPV5_D16)
-                  : ARM::FK_FP_ARMV8);
-    else if (STI.hasFeature(ARM::FeatureVFP4))
-      emitFPU(STI.hasFeature(ARM::FeatureD16)
-                  ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV4_SP_D16
-                                                           : ARM::FK_VFPV4_D16)
-                  : ARM::FK_VFPV4);
-    else if (STI.hasFeature(ARM::FeatureVFP3))
+      emitFPU(STI.hasFeature(ARM::FeatureD32)
+                  ? ARM::FK_FP_ARMV8
+                  : (STI.hasFeature(ARM::FeatureFP64) ? ARM::FK_FPV5_D16
+                                                      : ARM::FK_FPV5_SP_D16));
+    else if (STI.hasFeature(ARM::FeatureVFP4_D16_SP))
+      emitFPU(STI.hasFeature(ARM::FeatureD32)
+                  ? ARM::FK_VFPV4
+                  : (STI.hasFeature(ARM::FeatureFP64) ? ARM::FK_VFPV4_D16
+                                                      : ARM::FK_FPV4_SP_D16));
+    else if (STI.hasFeature(ARM::FeatureVFP3_D16_SP))
       emitFPU(
-          STI.hasFeature(ARM::FeatureD16)
-              // +d16
-              ? (STI.hasFeature(ARM::FeatureVFPOnlySP)
-                     ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16
-                                                         : ARM::FK_VFPV3XD)
-                     : (STI.hasFeature(ARM::FeatureFP16)
+          STI.hasFeature(ARM::FeatureD32)
+              // +d32
+              ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16
+                                                  : ARM::FK_VFPV3)
+              // -d32
+              : (STI.hasFeature(ARM::FeatureFP64)
+                     ? (STI.hasFeature(ARM::FeatureFP16)
                             ? ARM::FK_VFPV3_D16_FP16
-                            : ARM::FK_VFPV3_D16))
-              // -d16
-              : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16
-                                                  : ARM::FK_VFPV3));
-    else if (STI.hasFeature(ARM::FeatureVFP2))
+                            : ARM::FK_VFPV3_D16)
+                     : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16
+                                                         : ARM::FK_VFPV3XD)));
+    else if (STI.hasFeature(ARM::FeatureVFP2_D16_SP))
       emitFPU(ARM::FK_VFPV2);
   }
 
   // ABI_HardFP_use attribute to indicate single precision FP.
-  if (STI.hasFeature(ARM::FeatureVFPOnlySP))
+  if (STI.hasFeature(ARM::FeatureVFP2_D16_SP) && !STI.hasFeature(ARM::FeatureFP64))
     emitAttribute(ARMBuildAttrs::ABI_HardFP_use,
                   ARMBuildAttrs::HardFPSinglePrecision);
 
@@ -263,6 +264,11 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
   if (STI.hasFeature(ARM::FeatureMP))
     emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);
 
+  if (STI.hasFeature(ARM::HasMVEFloatOps))
+    emitAttribute(ARMBuildAttrs::MVE_arch, ARMBuildAttrs::AllowMVEIntegerAndFloat);
+  else if (STI.hasFeature(ARM::HasMVEIntegerOps))
+    emitAttribute(ARMBuildAttrs::MVE_arch, ARMBuildAttrs::AllowMVEInteger);
+
   // Hardware divide in ARM mode is part of base arch, starting from ARMv8.
   // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M).
   // It is not possible to produce DisallowDIV: if hwdiv is present in the base
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
index d3ab83bbccbc..38667d686b85 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -1,9 +1,8 @@
 //===-- ARMUnwindOpAsm.cpp - ARM Unwind Opcodes Assembler -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
index a7bfbdf4938e..c3134c04b33a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
@@ -1,9 +1,8 @@
 //===-- ARMUnwindOpAsm.h - ARM Unwind Opcodes Assembler ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 30cbde1ca71f..054a95dd1e12 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===-- ARMWinCOFFObjectWriter.cpp - ARM Windows COFF Object Writer -- C++ -==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index 32cb3dcdcad8..2e816bea5e91 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -1,9 +1,8 @@
 //===-- ARMWinCOFFStreamer.cpp - ARM Target WinCOFF Streamer ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 7f03e1463c1d..4b25986b90a7 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -1,9 +1,8 @@
 //===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
index b0491a4108a6..86cb907abfa3 100644
--- a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
+++ b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -1,13 +1,12 @@
 //===-- ARMTargetInfo.cpp - ARM Target Implementation ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "TargetInfo/ARMTargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/ARM/TargetInfo/ARMTargetInfo.h b/lib/Target/ARM/TargetInfo/ARMTargetInfo.h
new file mode 100644
index 000000000000..c217dd5c4612
--- /dev/null
+++ b/lib/Target/ARM/TargetInfo/ARMTargetInfo.h
@@ -0,0 +1,23 @@
+//===-- ARMTargetInfo.h - ARM Target Implementation -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_TARGETINFO_ARMTARGETINFO_H
+#define LLVM_LIB_TARGET_ARM_TARGETINFO_ARMTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheARMLETarget();
+Target &getTheARMBETarget();
+Target &getTheThumbLETarget();
+Target &getTheThumbBETarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_ARM_TARGETINFO_ARMTARGETINFO_H
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 5c745e112b2e..426e9a0ed9b8 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -1,9 +1,8 @@
 //===- Thumb1FrameLowering.cpp - Thumb1 Frame Information -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -64,15 +63,52 @@ bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{
   return !MFI.hasVarSizedObjects();
 }
 
-static void emitSPUpdate(MachineBasicBlock &MBB,
-                         MachineBasicBlock::iterator &MBBI,
-                         const TargetInstrInfo &TII, const DebugLoc &dl,
-                         const ThumbRegisterInfo &MRI, int NumBytes,
-                         unsigned MIFlags = MachineInstr::NoFlags) {
+static void
+emitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator &MBBI,
+                             const TargetInstrInfo &TII, const DebugLoc &dl,
+                             const ThumbRegisterInfo &MRI, int NumBytes,
+                             unsigned ScratchReg, unsigned MIFlags) {
+  // If it would take more than three instructions to adjust the stack pointer
+  // using tADDspi/tSUBspi, load an immediate instead.
+  if (std::abs(NumBytes) > 508 * 3) {
+    // We use a different codepath here from the normal
+    // emitThumbRegPlusImmediate so we don't have to deal with register
+    // scavenging. (Scavenging could try to use the emergency spill slot
+    // before we've actually finished setting up the stack.)
+    if (ScratchReg == ARM::NoRegister)
+      report_fatal_error("Failed to emit Thumb1 stack adjustment");
+    MachineFunction &MF = *MBB.getParent();
+    const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>();
+    if (ST.genExecuteOnly()) {
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ScratchReg)
+        .addImm(NumBytes).setMIFlags(MIFlags);
+    } else {
+      MRI.emitLoadConstPool(MBB, MBBI, dl, ScratchReg, 0, NumBytes, ARMCC::AL,
+                            0, MIFlags);
+    }
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDhirr), ARM::SP)
+      .addReg(ARM::SP).addReg(ScratchReg, RegState::Kill)
+      .add(predOps(ARMCC::AL));
+    return;
+  }
+  // FIXME: This is assuming the heuristics in emitThumbRegPlusImmediate
+  // won't change.
   emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
                             MRI, MIFlags);
+
 }
 
+static void emitCallSPUpdate(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator &MBBI,
+                             const TargetInstrInfo &TII, const DebugLoc &dl,
+                             const ThumbRegisterInfo &MRI, int NumBytes,
+                             unsigned MIFlags = MachineInstr::NoFlags) {
+  emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
+                            MRI, MIFlags);
+}
+
+
 MachineBasicBlock::iterator Thumb1FrameLowering::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
@@ -96,10 +132,10 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
       // Replace the pseudo instruction with a new instruction...
       unsigned Opc = Old.getOpcode();
       if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
-        emitSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount);
+        emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount);
       } else {
         assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
-        emitSPUpdate(MBB, I, TII, dl, *RegInfo, Amount);
+        emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, Amount);
       }
     }
   }
@@ -142,8 +178,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
   int FramePtrSpillFI = 0;
 
   if (ArgRegsSaveSize) {
-    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize,
-                 MachineInstr::FrameSetup);
+    emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize,
+                                 ARM::NoRegister, MachineInstr::FrameSetup);
     CFAOffset -= ArgRegsSaveSize;
     unsigned CFIIndex = MF.addFrameInst(
         MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
@@ -154,8 +190,9 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
 
   if (!AFI->hasStackFrame()) {
     if (NumBytes - ArgRegsSaveSize != 0) {
-      emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -(NumBytes - ArgRegsSaveSize),
-                   MachineInstr::FrameSetup);
+      emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo,
+                                   -(NumBytes - ArgRegsSaveSize),
+                                   ARM::NoRegister, MachineInstr::FrameSetup);
       CFAOffset -= NumBytes - ArgRegsSaveSize;
       unsigned CFIIndex = MF.addFrameInst(
           MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
@@ -332,8 +369,20 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
 
   if (NumBytes) {
     // Insert it after all the callee-save spills.
-    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
-                 MachineInstr::FrameSetup);
+    //
+    // For a large stack frame, we might need a scratch register to store
+    // the size of the frame.  We know all callee-save registers are free
+    // at this point in the prologue, so pick one.
+    unsigned ScratchRegister = ARM::NoRegister;
+    for (auto &I : CSI) {
+      unsigned Reg = I.getReg();
+      if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) {
+        ScratchRegister = Reg;
+        break;
+      }
+    }
+    emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
+                                 ScratchRegister, MachineInstr::FrameSetup);
     if (!HasFP) {
       CFAOffset -= NumBytes;
       unsigned CFIIndex = MF.addFrameInst(
@@ -438,7 +487,9 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
 
   if (!AFI->hasStackFrame()) {
     if (NumBytes - ArgRegsSaveSize != 0)
-      emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes - ArgRegsSaveSize);
+      emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo,
+                                   NumBytes - ArgRegsSaveSize, ARM::NoRegister,
+                                   MachineInstr::NoFlags);
   } else {
     // Unwind MBBI to point to first LDR / VLDRD.
     if (MBBI != MBB.begin()) {
@@ -473,13 +524,27 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
             .addReg(FramePtr)
             .add(predOps(ARMCC::AL));
     } else {
+      // For a large stack frame, we might need a scratch register to store
+      // the size of the frame.  We know all callee-save registers are free
+      // at this point in the epilogue, so pick one.
+      unsigned ScratchRegister = ARM::NoRegister;
+      bool HasFP = hasFP(MF);
+      for (auto &I : MFI.getCalleeSavedInfo()) {
+        unsigned Reg = I.getReg();
+        if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) {
+          ScratchRegister = Reg;
+          break;
+        }
+      }
       if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
           &MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
         MachineBasicBlock::iterator PMBBI = std::prev(MBBI);
         if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*PMBBI, NumBytes))
-          emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
+          emitPrologueEpilogueSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes,
+                                       ScratchRegister, MachineInstr::NoFlags);
       } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
-        emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
+        emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes,
+                                     ScratchRegister, MachineInstr::NoFlags);
     }
   }
 
@@ -666,7 +731,9 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
     // Advance past the pop instruction.
     MBBI++;
     // Increment the SP.
-    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize + 4);
+    emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo,
+                                 ArgRegsSaveSize + 4, ARM::NoRegister,
+                                 MachineInstr::NoFlags);
     return true;
   }
 
@@ -707,7 +774,8 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
       .add(predOps(ARMCC::AL))
       .addReg(PopReg, RegState::Define);
 
-  emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
+  emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize,
+                               ARM::NoRegister, MachineInstr::NoFlags);
 
   BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
       .addReg(ARM::LR, RegState::Define)
@@ -821,8 +889,9 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
         findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
 
     // Create the PUSH, but don't insert it yet (the MOVs need to come first).
-    MachineInstrBuilder PushMIB =
-        BuildMI(MF, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
+    MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH))
+                                      .add(predOps(ARMCC::AL))
+                                      .setMIFlags(MachineInstr::FrameSetup);
 
     SmallVector<unsigned, 4> RegsToPush;
     while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
@@ -835,7 +904,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
         BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
             .addReg(*CopyReg, RegState::Define)
             .addReg(*HiRegToSave, getKillRegState(isKill))
-            .add(predOps(ARMCC::AL));
+            .add(predOps(ARMCC::AL))
+            .setMIFlags(MachineInstr::FrameSetup);
 
         // Record the register that must be added to the PUSH.
         RegsToPush.push_back(*CopyReg);
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index a4d6451ccf12..61af48712b6c 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -1,9 +1,8 @@
 //===- Thumb1FrameLowering.h - Thumb1-specific frame info stuff ---*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 11aa285fc939..f57d93a2e83d 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -1,9 +1,8 @@
 //===-- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 9f04a3ed262f..bc433e7a7a93 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -1,9 +1,8 @@
 //===-- Thumb1InstrInfo.h - Thumb-1 Instruction Information -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index e0a5f7f04fa9..3143eb9840ed 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -1,9 +1,8 @@
 //===-- Thumb2ITBlockPass.cpp - Insert Thumb-2 IT blocks ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -32,13 +31,16 @@
 using namespace llvm;
 
 #define DEBUG_TYPE "thumb2-it"
+#define PASS_NAME "Thumb IT blocks insertion pass"
 
 STATISTIC(NumITs,        "Number of IT blocks inserted");
 STATISTIC(NumMovedInsts, "Number of predicated instructions moved");
 
+using RegisterSet = SmallSet<unsigned, 4>;
+
 namespace {
 
-  class Thumb2ITBlockPass : public MachineFunctionPass {
+  class Thumb2ITBlock : public MachineFunctionPass {
   public:
     static char ID;
 
@@ -47,7 +49,7 @@ namespace {
     const TargetRegisterInfo *TRI;
     ARMFunctionInfo *AFI;
 
-    Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
+    Thumb2ITBlock() : MachineFunctionPass(ID) {}
 
     bool runOnMachineFunction(MachineFunction &Fn) override;
 
@@ -57,33 +59,32 @@ namespace {
     }
 
     StringRef getPassName() const override {
-      return "Thumb IT blocks insertion pass";
+      return PASS_NAME;
     }
 
   private:
     bool MoveCopyOutOfITBlock(MachineInstr *MI,
                               ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
-                              SmallSet<unsigned, 4> &Defs,
-                              SmallSet<unsigned, 4> &Uses);
-    bool InsertITInstructions(MachineBasicBlock &MBB);
+                              RegisterSet &Defs, RegisterSet &Uses);
+    bool InsertITInstructions(MachineBasicBlock &Block);
   };
 
-  char Thumb2ITBlockPass::ID = 0;
+  char Thumb2ITBlock::ID = 0;
 
 } // end anonymous namespace
 
+INITIALIZE_PASS(Thumb2ITBlock, DEBUG_TYPE, PASS_NAME, false, false)
+
 /// TrackDefUses - Tracking what registers are being defined and used by
 /// instructions in the IT block. This also tracks "dependencies", i.e. uses
 /// in the IT block that are defined before the IT instruction.
-static void TrackDefUses(MachineInstr *MI,
-                         SmallSet<unsigned, 4> &Defs,
-                         SmallSet<unsigned, 4> &Uses,
+static void TrackDefUses(MachineInstr *MI, RegisterSet &Defs, RegisterSet &Uses,
                          const TargetRegisterInfo *TRI) {
-  SmallVector<unsigned, 4> LocalDefs;
-  SmallVector<unsigned, 4> LocalUses;
+  using RegList = SmallVector<unsigned, 4>;
+  RegList LocalDefs;
+  RegList LocalUses;
 
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
+  for (auto &MO : MI->operands()) {
     if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
@@ -95,27 +96,21 @@ static void TrackDefUses(MachineInstr *MI,
       LocalDefs.push_back(Reg);
   }
 
-  for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
-    unsigned Reg = LocalUses[i];
-    for (MCSubRegIterator Subreg(Reg, TRI, /*IncludeSelf=*/true);
-         Subreg.isValid(); ++Subreg)
-      Uses.insert(*Subreg);
-  }
+  auto InsertUsesDefs = [&](RegList &Regs, RegisterSet &UsesDefs) {
+    for (unsigned Reg : Regs)
+      for (MCSubRegIterator Subreg(Reg, TRI, /*IncludeSelf=*/true);
+           Subreg.isValid(); ++Subreg)
+        UsesDefs.insert(*Subreg);
+  };
 
-  for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
-    unsigned Reg = LocalDefs[i];
-    for (MCSubRegIterator Subreg(Reg, TRI, /*IncludeSelf=*/true);
-         Subreg.isValid(); ++Subreg)
-      Defs.insert(*Subreg);
-    if (Reg == ARM::CPSR)
-      continue;
-  }
+  InsertUsesDefs(LocalDefs, Defs);
+  InsertUsesDefs(LocalUses, Uses);
 }
 
 /// Clear kill flags for any uses in the given set.  This will likely
 /// conservatively remove more kill flags than are necessary, but removing them
 /// is safer than incorrect kill flags remaining on instructions.
-static void ClearKillFlags(MachineInstr *MI, SmallSet<unsigned, 4> &Uses) {
+static void ClearKillFlags(MachineInstr *MI, RegisterSet &Uses) {
   for (MachineOperand &MO : MI->operands()) {
     if (!MO.isReg() || MO.isDef() || !MO.isKill())
       continue;
@@ -138,10 +133,9 @@ static bool isCopy(MachineInstr *MI) {
 }
 
 bool
-Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
-                                      ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
-                                        SmallSet<unsigned, 4> &Defs,
-                                        SmallSet<unsigned, 4> &Uses) {
+Thumb2ITBlock::MoveCopyOutOfITBlock(MachineInstr *MI,
+                                    ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+                                    RegisterSet &Defs, RegisterSet &Uses) {
   if (!isCopy(MI))
     return false;
   // llvm models select's as two-address instructions. That means a copy
@@ -181,10 +175,13 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
 
   // Then peek at the next instruction to see if it's predicated on CC or OCC.
   // If not, then there is nothing to be gained by moving the copy.
-  MachineBasicBlock::iterator I = MI; ++I;
+  MachineBasicBlock::iterator I = MI;
+  ++I;
   MachineBasicBlock::iterator E = MI->getParent()->end();
+
   while (I != E && I->isDebugInstr())
     ++I;
+
   if (I != E) {
     unsigned NPredReg = 0;
     ARMCC::CondCodes NCC = getITInstrPredicate(*I, NPredReg);
@@ -194,12 +191,11 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
   return false;
 }
 
-bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
+bool Thumb2ITBlock::InsertITInstructions(MachineBasicBlock &MBB) {
   bool Modified = false;
-
-  SmallSet<unsigned, 4> Defs;
-  SmallSet<unsigned, 4> Uses;
+  RegisterSet Defs, Uses;
   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+
   while (MBBI != E) {
     MachineInstr *MI = &*MBBI;
     DebugLoc dl = MI->getDebugLoc();
@@ -246,7 +242,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
         unsigned NPredReg = 0;
         ARMCC::CondCodes NCC = getITInstrPredicate(*NMI, NPredReg);
         if (NCC == CC || NCC == OCC) {
-          Mask |= (NCC & 1) << Pos;
+          Mask |= ((NCC ^ CC) & 1) << Pos;
           // Add implicit use of ITSTATE.
           NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
                                                  true/*isImp*/, false/*isKill*/));
@@ -270,8 +266,6 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
 
     // Finalize IT mask.
     Mask |= (1 << Pos);
-    // Tag along (firstcond[0] << 4) with the mask.
-    Mask |= (CC & 1) << 4;
     MIB.addImm(Mask);
 
     // Last instruction in IT block kills ITSTATE.
@@ -288,7 +282,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
   return Modified;
 }
 
-bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
+bool Thumb2ITBlock::runOnMachineFunction(MachineFunction &Fn) {
   const ARMSubtarget &STI =
       static_cast<const ARMSubtarget &>(Fn.getSubtarget());
   if (!STI.isThumb2())
@@ -302,11 +296,8 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
     return false;
 
   bool Modified = false;
-  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ) {
-    MachineBasicBlock &MBB = *MFI;
-    ++MFI;
+  for (auto &MBB : Fn )
     Modified |= InsertITInstructions(MBB);
-  }
 
   if (Modified)
     AFI->setHasITBlocks(true);
@@ -316,6 +307,132 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
 
 /// createThumb2ITBlockPass - Returns an instance of the Thumb2 IT blocks
 /// insertion pass.
-FunctionPass *llvm::createThumb2ITBlockPass() {
-  return new Thumb2ITBlockPass();
+FunctionPass *llvm::createThumb2ITBlockPass() { return new Thumb2ITBlock(); }
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "arm-mve-vpt"
+
+namespace {
+  class MVEVPTBlock : public MachineFunctionPass {
+  public:
+    static char ID;
+    const Thumb2InstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+
+    MVEVPTBlock() : MachineFunctionPass(ID) {}
+
+    bool runOnMachineFunction(MachineFunction &Fn) override;
+
+    MachineFunctionProperties getRequiredProperties() const override {
+      return MachineFunctionProperties().set(
+          MachineFunctionProperties::Property::NoVRegs);
+    }
+
+    StringRef getPassName() const override {
+      return "MVE VPT block insertion pass";
+    }
+
+  private:
+    bool InsertVPTBlocks(MachineBasicBlock &MBB);
+  };
+
+  char MVEVPTBlock::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false)
+
+enum VPTMaskValue {
+  T     =  8, // 0b1000
+  TT    =  4, // 0b0100
+  TE    = 12, // 0b1100
+  TTT   =  2, // 0b0010
+  TTE   =  6, // 0b0110
+  TEE   = 10, // 0b1010
+  TET   = 14, // 0b1110
+  TTTT  =  1, // 0b0001
+  TTTE  =  3, // 0b0011
+  TTEE  =  5, // 0b0101
+  TTET  =  7, // 0b0111
+  TEEE  =  9, // 0b1001
+  TEET  = 11, // 0b1011
+  TETT  = 13, // 0b1101
+  TETE  = 15  // 0b1111
+};
+
+bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
+  bool Modified = false;
+  MachineBasicBlock::iterator MBIter = Block.begin();
+  MachineBasicBlock::iterator EndIter = Block.end();
+
+  while (MBIter != EndIter) {
+    MachineInstr *MI = &*MBIter;
+    unsigned PredReg = 0;
+    DebugLoc dl = MI->getDebugLoc();
+
+    ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg);
+
+    // The idea of the predicate is that None, Then and Else are for use when
+    // handling assembly language: they correspond to the three possible
+    // suffixes "", "t" and "e" on the mnemonic. So when instructions are read
+    // from assembly source or disassembled from object code, you expect to see
+    // a mixture whenever there's a long VPT block. But in code generation, we
+    // hope we'll never generate an Else as input to this pass.
+
+    assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds");
+
+    if (Pred == ARMVCC::None) {
+      ++MBIter;
+      continue;
+    }
+
+    MachineInstrBuilder MIBuilder =
+        BuildMI(Block, MBIter, dl, TII->get(ARM::MVE_VPST));
+    // The mask value for the VPST instruction is T = 0b1000 = 8
+    MIBuilder.addImm(VPTMaskValue::T);
+
+    MachineBasicBlock::iterator VPSTInsertPos = MIBuilder.getInstr();
+    int VPTInstCnt = 1;
+    ARMVCC::VPTCodes NextPred;
+
+    do {
+      ++MBIter;
+      NextPred = getVPTInstrPredicate(*MBIter, PredReg);
+    } while (NextPred != ARMVCC::None && NextPred == Pred && ++VPTInstCnt < 4);
+
+    MachineInstr *LastMI = &*MBIter;
+    finalizeBundle(Block, VPSTInsertPos.getInstrIterator(),
+                   ++LastMI->getIterator());
+
+    Modified = true;
+    LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump(););
+
+    ++MBIter;
+  }
+  return Modified;
+}
+
+bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
+  const ARMSubtarget &STI =
+      static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+
+  if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
+    return false;
+
+  TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
+  TRI = STI.getRegisterInfo();
+
+  LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n"
+                    << "********** Function: " << Fn.getName() << '\n');
+
+  bool Modified = false;
+  for (MachineBasicBlock &MBB : Fn)
+    Modified |= InsertVPTBlocks(MBB);
+
+  LLVM_DEBUG(dbgs() << "**************************************\n");
+  return Modified;
 }
+
+/// createMVEVPTBlock - Returns an instance of the MVE VPT block
+/// insertion pass.
+FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); }
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index d567d3339049..5a965f7a6b9b 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -1,9 +1,8 @@
 //===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -162,7 +161,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     // otherwise).
     if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
       MachineRegisterInfo *MRI = &MF.getRegInfo();
-      MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+      MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_GPRwithAPSRnospRegClass);
     }
 
     MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8));
@@ -204,7 +203,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
       MachineRegisterInfo *MRI = &MF.getRegInfo();
       MRI->constrainRegClass(DestReg,
-                             &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+                             &ARM::GPRPair_with_gsub_1_in_GPRwithAPSRnospRegClass);
     }
 
     MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8));
@@ -478,7 +477,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
   bool isSub = false;
 
   // Memory operands in inline assembly always use AddrModeT2_i12.
-  if (Opcode == ARM::INLINEASM)
+  if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
     AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2?
 
   if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) {
@@ -611,9 +610,23 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
         Offset = -Offset;
         isSub = true;
       }
+    } else if (AddrMode == ARMII::AddrModeT2_i7s4 ||
+               AddrMode == ARMII::AddrModeT2_i7s2 ||
+               AddrMode == ARMII::AddrModeT2_i7) {
+      Offset += MI.getOperand(FrameRegIdx + 1).getImm();
+      unsigned OffsetMask;
+      switch (AddrMode) {
+      case ARMII::AddrModeT2_i7s4: NumBits = 9; OffsetMask = 0x3; break;
+      case ARMII::AddrModeT2_i7s2: NumBits = 8; OffsetMask = 0x1; break;
+      default:                     NumBits = 7; OffsetMask = 0x0; break;
+      }
+      // MCInst operand expects already scaled value.
+      Scale = 1;
+      assert((Offset & OffsetMask) == 0 && "Can't encode this offset!");
+      (void)OffsetMask; // squash unused-variable warning at -NDEBUG
     } else if (AddrMode == ARMII::AddrModeT2_i8s4) {
       Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4;
-      NumBits = 10; // 8 bits scaled by 4
+      NumBits = 8 + 2;
       // MCInst operand expects already scaled value.
       Scale = 1;
       assert((Offset & 3) == 0 && "Can't encode this offset!");
@@ -639,7 +652,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
       // Replace the FrameIndex with fp/sp
       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
       if (isSub) {
-        if (AddrMode == ARMII::AddrMode5)
+        if (AddrMode == ARMII::AddrMode5 || AddrMode == ARMII::AddrMode5FP16)
           // FIXME: Not consistent.
           ImmedOffset |= 1 << NumBits;
         else
@@ -653,7 +666,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
     // Otherwise, offset doesn't fit. Pull in what we can to simplify
     ImmedOffset = ImmedOffset & Mask;
     if (isSub) {
-      if (AddrMode == ARMII::AddrMode5)
+      if (AddrMode == ARMII::AddrMode5 || AddrMode == ARMII::AddrMode5FP16)
         // FIXME: Not consistent.
         ImmedOffset |= 1 << NumBits;
       else {
@@ -678,3 +691,28 @@ ARMCC::CondCodes llvm::getITInstrPredicate(const MachineInstr &MI,
     return ARMCC::AL;
   return getInstrPredicate(MI, PredReg);
 }
+
+int llvm::findFirstVPTPredOperandIdx(const MachineInstr &MI) {
+  const MCInstrDesc &MCID = MI.getDesc();
+
+  if (!MCID.OpInfo)
+    return -1;
+
+  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
+    if (ARM::isVpred(MCID.OpInfo[i].OperandType))
+      return i;
+
+  return -1;
+}
+
+ARMVCC::VPTCodes llvm::getVPTInstrPredicate(const MachineInstr &MI,
+                                            unsigned &PredReg) {
+  int PIdx = findFirstVPTPredOperandIdx(MI);
+  if (PIdx == -1) {
+    PredReg = 0;
+    return ARMVCC::None;
+  }
+
+  PredReg = MI.getOperand(PIdx+1).getReg();
+  return (ARMVCC::VPTCodes)MI.getOperand(PIdx).getImm();
+}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index c834ba73bfea..a6712d5a0e72 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -1,9 +1,8 @@
 //===-- Thumb2InstrInfo.h - Thumb-2 Instruction Information -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -69,6 +68,12 @@ private:
 /// to llvm::getInstrPredicate except it returns AL for conditional branch
 /// instructions which are "predicated", but are not in IT blocks.
 ARMCC::CondCodes getITInstrPredicate(const MachineInstr &MI, unsigned &PredReg);
+
+// getVPTInstrPredicate: VPT analogue of that, plus a helper function
+// corresponding to MachineInstr::findFirstPredOperandIdx.
+int findFirstVPTPredOperandIdx(const MachineInstr &MI);
+ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI,
+                                      unsigned &PredReg);
 }
 
 #endif
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 65889fc4e28b..37a85fa38417 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -1,9 +1,8 @@
 //===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -454,7 +453,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
     break;
   case ARM::t2LDR_POST:
   case ARM::t2STR_POST: {
-    if (!MBB.getParent()->getFunction().optForMinSize())
+    if (!MinimizeSize)
       return false;
 
     if (!MI->hasOneMemOperand() ||
@@ -1128,8 +1127,8 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
   TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
 
   // Optimizing / minimizing size? Minimizing size implies optimizing for size.
-  OptimizeSize = MF.getFunction().optForSize();
-  MinimizeSize = MF.getFunction().optForMinSize();
+  OptimizeSize = MF.getFunction().hasOptSize();
+  MinimizeSize = STI->hasMinSize();
 
   BlockInfo.clear();
   BlockInfo.resize(MF.getNumBlockIDs());
diff --git a/lib/Target/ARM/ThumbRegisterInfo.cpp b/lib/Target/ARM/ThumbRegisterInfo.cpp
index e4bdd40fb743..a96417ffce4d 100644
--- a/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===-- ThumbRegisterInfo.cpp - Thumb-1 Register Information -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -447,63 +446,6 @@ void ThumbRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
   (void)Done;
 }
 
-/// saveScavengerRegister - Spill the register so it can be used by the
-/// register scavenger. Return true.
-bool ThumbRegisterInfo::saveScavengerRegister(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-    MachineBasicBlock::iterator &UseMI, const TargetRegisterClass *RC,
-    unsigned Reg) const {
-
-  const ARMSubtarget &STI = MBB.getParent()->getSubtarget<ARMSubtarget>();
-  if (!STI.isThumb1Only())
-    return ARMBaseRegisterInfo::saveScavengerRegister(MBB, I, UseMI, RC, Reg);
-
-  // Thumb1 can't use the emergency spill slot on the stack because
-  // ldr/str immediate offsets must be positive, and if we're referencing
-  // off the frame pointer (if, for example, there are alloca() calls in
-  // the function, the offset will be negative. Use R12 instead since that's
-  // a call clobbered register that we know won't be used in Thumb1 mode.
-  const TargetInstrInfo &TII = *STI.getInstrInfo();
-  DebugLoc DL;
-  BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
-      .addReg(ARM::R12, RegState::Define)
-      .addReg(Reg, RegState::Kill)
-      .add(predOps(ARMCC::AL));
-
-  // The UseMI is where we would like to restore the register. If there's
-  // interference with R12 before then, however, we'll need to restore it
-  // before that instead and adjust the UseMI.
-  bool done = false;
-  for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
-    if (II->isDebugInstr())
-      continue;
-    // If this instruction affects R12, adjust our restore point.
-    for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = II->getOperand(i);
-      if (MO.isRegMask() && MO.clobbersPhysReg(ARM::R12)) {
-        UseMI = II;
-        done = true;
-        break;
-      }
-      if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
-          TargetRegisterInfo::isVirtualRegister(MO.getReg()))
-        continue;
-      if (MO.getReg() == ARM::R12) {
-        UseMI = II;
-        done = true;
-        break;
-      }
-    }
-  }
-  // Restore the register from R12
-  BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr))
-      .addReg(Reg, RegState::Define)
-      .addReg(ARM::R12, RegState::Kill)
-      .add(predOps(ARMCC::AL));
-
-  return true;
-}
-
 void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                             int SPAdj, unsigned FIOperandNum,
                                             RegScavenger *RS) const {
@@ -619,3 +561,14 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   if (MI.isPredicable())
     MIB.add(predOps(ARMCC::AL));
 }
+
+bool
+ThumbRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
+  if (MF.getSubtarget<ARMSubtarget>().isThumb1Only()) {
+    // For Thumb1, the emergency spill slot must be some small positive
+    // offset from the base/stack pointer.
+    return false;
+  }
+  // For Thumb2, put the emergency spill slot next to FP.
+  return true;
+}
diff --git a/lib/Target/ARM/ThumbRegisterInfo.h b/lib/Target/ARM/ThumbRegisterInfo.h
index 75c3fe9ae8ad..08cf67284d4c 100644
--- a/lib/Target/ARM/ThumbRegisterInfo.h
+++ b/lib/Target/ARM/ThumbRegisterInfo.h
@@ -1,9 +1,8 @@
 //===- ThumbRegisterInfo.h - Thumb Register Information Impl -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -52,14 +51,10 @@ public:
                          const ARMBaseInstrInfo &TII) const;
   void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
                          int64_t Offset) const override;
-  bool saveScavengerRegister(MachineBasicBlock &MBB,
-                             MachineBasicBlock::iterator I,
-                             MachineBasicBlock::iterator &UseMI,
-                             const TargetRegisterClass *RC,
-                             unsigned Reg) const override;
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
+  bool useFPForScavengingIndex(const MachineFunction &MF) const override;
 };
 }
 
diff --git a/lib/Target/ARM/Utils/ARMBaseInfo.cpp b/lib/Target/ARM/Utils/ARMBaseInfo.cpp
index 534f78c6d4d2..4ace61cccd0f 100644
--- a/lib/Target/ARM/Utils/ARMBaseInfo.cpp
+++ b/lib/Target/ARM/Utils/ARMBaseInfo.cpp
@@ -1,9 +1,8 @@
 //===-- ARMBaseInfo.cpp - ARM Base encoding information------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/ARM/Utils/ARMBaseInfo.h b/lib/Target/ARM/Utils/ARMBaseInfo.h
index f32d8223f53c..aa3aca359cb8 100644
--- a/lib/Target/ARM/Utils/ARMBaseInfo.h
+++ b/lib/Target/ARM/Utils/ARMBaseInfo.h
@@ -1,9 +1,8 @@
 //===-- ARMBaseInfo.h - Top level definitions for ARM ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -67,6 +66,30 @@ inline static CondCodes getOppositeCondition(CondCodes CC) {
 }
 } // end namespace ARMCC
 
+namespace ARMVCC {
+  enum VPTCodes {
+    None = 0,
+    Then,
+    Else
+  };
+}
+
+inline static const char *ARMVPTPredToString(ARMVCC::VPTCodes CC) {
+  switch (CC) {
+  case ARMVCC::None:  return "none";
+  case ARMVCC::Then:  return "t";
+  case ARMVCC::Else:  return "e";
+  }
+  llvm_unreachable("Unknown VPT code");
+}
+
+inline static unsigned ARMVectorCondCodeFromString(StringRef CC) {
+  return StringSwitch<unsigned>(CC.lower())
+    .Case("t", ARMVCC::Then)
+    .Case("e", ARMVCC::Else)
+    .Default(~0U);
+}
+
 inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
   switch (CC) {
   case ARMCC::EQ:  return "eq";
diff --git a/lib/Target/AVR/AVR.h b/lib/Target/AVR/AVR.h
index 48327fd377b2..f0746d73c95f 100644
--- a/lib/Target/AVR/AVR.h
+++ b/lib/Target/AVR/AVR.h
@@ -1,9 +1,8 @@
 //===-- AVR.h - Top-level interface for AVR representation ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AVR/AVR.td b/lib/Target/AVR/AVR.td
index d03b983aa70b..53768f99df3b 100644
--- a/lib/Target/AVR/AVR.td
+++ b/lib/Target/AVR/AVR.td
@@ -1,9 +1,8 @@
 //===-- AVR.td - Describe the AVR Target Machine ----------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===---------------------------------------------------------------------===//
 // This is the top level entry point for the AVR target.
diff --git a/lib/Target/AVR/AVRAsmPrinter.cpp b/lib/Target/AVR/AVRAsmPrinter.cpp
index f9a6e77387b2..7586bd7b78fc 100644
--- a/lib/Target/AVR/AVRAsmPrinter.cpp
+++ b/lib/Target/AVR/AVRAsmPrinter.cpp
@@ -1,9 +1,8 @@
 //===-- AVRAsmPrinter.cpp - AVR LLVM assembly writer ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,7 +14,8 @@
 #include "AVR.h"
 #include "AVRMCInstLower.h"
 #include "AVRSubtarget.h"
-#include "InstPrinter/AVRInstPrinter.h"
+#include "MCTargetDesc/AVRInstPrinter.h"
+#include "TargetInfo/AVRTargetInfo.h"
 
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -43,16 +43,13 @@ public:
 
   StringRef getPassName() const override { return "AVR Assembly Printer"; }
 
-  void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O,
-                    const char *Modifier = 0);
+  void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
 
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
-                       unsigned AsmVariant, const char *ExtraCode,
-                       raw_ostream &O) override;
+                       const char *ExtraCode, raw_ostream &O) override;
 
   bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
-                             unsigned AsmVariant, const char *ExtraCode,
-                             raw_ostream &O) override;
+                             const char *ExtraCode, raw_ostream &O) override;
 
   void EmitInstruction(const MachineInstr *MI) override;
 
@@ -61,7 +58,7 @@ private:
 };
 
 void AVRAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
-                                 raw_ostream &O, const char *Modifier) {
+                                 raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(OpNo);
 
   switch (MO.getType()) {
@@ -86,11 +83,10 @@ void AVRAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
 }
 
 bool AVRAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
-                                    unsigned AsmVariant, const char *ExtraCode,
-                                    raw_ostream &O) {
+                                    const char *ExtraCode, raw_ostream &O) {
   // Default asm printer can only deal with some extra codes,
   // so try it first.
-  bool Error = AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O);
+  bool Error = AsmPrinter::PrintAsmOperand(MI, OpNum, ExtraCode, O);
 
   if (Error && ExtraCode && ExtraCode[0]) {
     if (ExtraCode[1] != 0)
@@ -138,8 +134,7 @@ bool AVRAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
 }
 
 bool AVRAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                          unsigned OpNum, unsigned AsmVariant,
-                                          const char *ExtraCode,
+                                          unsigned OpNum, const char *ExtraCode,
                                           raw_ostream &O) {
   if (ExtraCode && ExtraCode[0]) {
     llvm_unreachable("This branch is not implemented yet");
diff --git a/lib/Target/AVR/AVRCallingConv.td b/lib/Target/AVR/AVRCallingConv.td
index 68dbce02706f..213e35fca66d 100644
--- a/lib/Target/AVR/AVRCallingConv.td
+++ b/lib/Target/AVR/AVRCallingConv.td
@@ -1,9 +1,8 @@
 //===-- AVRCallingConv.td - Calling Conventions for AVR ----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This describes the calling conventions for AVR architecture.
diff --git a/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index 536a54759c77..c45b2d0e39c1 100644
--- a/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -1,9 +1,8 @@
 //===-- AVRExpandPseudoInsts.cpp - Expand pseudo instructions -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -583,8 +582,8 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) {
   unsigned TmpReg = 0; // 0 for no temporary register
   unsigned SrcReg = MI.getOperand(1).getReg();
   bool SrcIsKill = MI.getOperand(1).isKill();
-  OpLo = AVR::LDRdPtrPi;
-  OpHi = AVR::LDRdPtr;
+  OpLo = AVR::LDRdPtr;
+  OpHi = AVR::LDDRdPtrQ;
   TRI->splitReg(DstReg, DstLoReg, DstHiReg);
 
   // Use a temporary register if src and dst registers are the same.
@@ -597,8 +596,7 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) {
   // Load low byte.
   auto MIBLO = buildMI(MBB, MBBI, OpLo)
     .addReg(CurDstLoReg, RegState::Define)
-    .addReg(SrcReg, RegState::Define)
-    .addReg(SrcReg);
+    .addReg(SrcReg, RegState::Define);
 
   // Push low byte onto stack if necessary.
   if (TmpReg)
@@ -607,7 +605,8 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) {
   // Load high byte.
   auto MIBHI = buildMI(MBB, MBBI, OpHi)
     .addReg(CurDstHiReg, RegState::Define)
-    .addReg(SrcReg, getKillRegState(SrcIsKill));
+    .addReg(SrcReg, getKillRegState(SrcIsKill))
+    .addImm(1);
 
   if (TmpReg) {
     // Move the high byte into the final destination.
diff --git a/lib/Target/AVR/AVRFrameLowering.cpp b/lib/Target/AVR/AVRFrameLowering.cpp
index 3b7322365772..5e91bb8632c1 100644
--- a/lib/Target/AVR/AVRFrameLowering.cpp
+++ b/lib/Target/AVR/AVRFrameLowering.cpp
@@ -1,9 +1,8 @@
 //===-- AVRFrameLowering.cpp - AVR Frame Information ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -362,13 +361,12 @@ MachineBasicBlock::iterator AVRFrameLowering::eliminateCallFramePseudoInstr(
     MachineFunction &MF, MachineBasicBlock &MBB,
     MachineBasicBlock::iterator MI) const {
   const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
-  const TargetFrameLowering &TFI = *STI.getFrameLowering();
   const AVRInstrInfo &TII = *STI.getInstrInfo();
 
   // There is nothing to insert when the call frame memory is allocated during
   // function entry. Delete the call frame pseudo and replace all pseudo stores
   // with real store instructions.
-  if (TFI.hasReservedCallFrame(MF)) {
+  if (hasReservedCallFrame(MF)) {
     fixStackStores(MBB, MI, TII, false);
     return MBB.erase(MI);
   }
@@ -382,7 +380,7 @@ MachineBasicBlock::iterator AVRFrameLowering::eliminateCallFramePseudoInstr(
   // For adjcallstackdown we convert it into an 'adiw reg, <amt>' handling
   // the read and write of SP in I/O space.
   if (Amount != 0) {
-    assert(TFI.getStackAlignment() == 1 && "Unsupported stack alignment");
+    assert(getStackAlignment() == 1 && "Unsupported stack alignment");
 
     if (Opcode == TII.getCallFrameSetupOpcode()) {
       fixStackStores(MBB, MI, TII, true);
diff --git a/lib/Target/AVR/AVRFrameLowering.h b/lib/Target/AVR/AVRFrameLowering.h
index a0ba6c951276..a7658438232a 100644
--- a/lib/Target/AVR/AVRFrameLowering.h
+++ b/lib/Target/AVR/AVRFrameLowering.h
@@ -1,9 +1,8 @@
 //===-- AVRFrameLowering.h - Define frame lowering for AVR ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AVR/AVRISelDAGToDAG.cpp b/lib/Target/AVR/AVRISelDAGToDAG.cpp
index 85abf42eaa67..5cb4441c4380 100644
--- a/lib/Target/AVR/AVRISelDAGToDAG.cpp
+++ b/lib/Target/AVR/AVRISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
 //===-- AVRISelDAGToDAG.cpp - A dag to dag inst selector for AVR ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp
index 57fc978b54bb..b6ba5f22fafb 100644
--- a/lib/Target/AVR/AVRISelLowering.cpp
+++ b/lib/Target/AVR/AVRISelLowering.cpp
@@ -1,9 +1,8 @@
 //===-- AVRISelLowering.cpp - AVR DAG Lowering Implementation -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -26,19 +25,21 @@
 
 #include "AVR.h"
 #include "AVRMachineFunctionInfo.h"
+#include "AVRSubtarget.h"
 #include "AVRTargetMachine.h"
 #include "MCTargetDesc/AVRMCTargetDesc.h"
 
 namespace llvm {
 
-AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm)
-    : TargetLowering(tm) {
+AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM,
+                                     const AVRSubtarget &STI)
+    : TargetLowering(TM), Subtarget(STI) {
   // Set up the register classes.
   addRegisterClass(MVT::i8, &AVR::GPR8RegClass);
   addRegisterClass(MVT::i16, &AVR::DREGSRegClass);
 
   // Compute derived properties from the register classes.
-  computeRegisterProperties(tm.getSubtargetImpl()->getRegisterInfo());
+  computeRegisterProperties(Subtarget.getRegisterInfo());
 
   setBooleanContents(ZeroOrOneBooleanContent);
   setBooleanVectorContents(ZeroOrOneBooleanContent);
@@ -88,9 +89,9 @@ AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm)
   setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand);
 
   setOperationAction(ISD::ROTL, MVT::i8, Custom);
-  setOperationAction(ISD::ROTL, MVT::i16, Custom);
+  setOperationAction(ISD::ROTL, MVT::i16, Expand);
   setOperationAction(ISD::ROTR, MVT::i8, Custom);
-  setOperationAction(ISD::ROTR, MVT::i16, Custom);
+  setOperationAction(ISD::ROTR, MVT::i16, Expand);
 
   setOperationAction(ISD::BR_CC, MVT::i8, Custom);
   setOperationAction(ISD::BR_CC, MVT::i16, Custom);
@@ -163,6 +164,13 @@ AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm)
   setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
   setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
 
+  // Expand multiplications to libcalls when there is
+  // no hardware MUL.
+  if (!Subtarget.supportsMultiplication()) {
+    setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
+    setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
+  }
+
   for (MVT VT : MVT::integer_valuetypes()) {
     setOperationAction(ISD::MULHS, VT, Expand);
     setOperationAction(ISD::MULHU, VT, Expand);
@@ -229,7 +237,7 @@ AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm)
   setLibcallName(RTLIB::COS_F32, "cos");
 
   setMinFunctionAlignment(1);
-  setMinimumJumpTableEntries(INT_MAX);
+  setMinimumJumpTableEntries(UINT_MAX);
 }
 
 const char *AVRTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -935,7 +943,7 @@ static void analyzeStandardArguments(TargetLowering::CallLoweringInfo *CLI,
                                         AVR::R19R18, AVR::R17R16, AVR::R15R14,
                                         AVR::R13R12, AVR::R11R10, AVR::R9R8};
   if (IsVarArg) {
-    // Variadic functions do not need all the analisys below.
+    // Variadic functions do not need all the analysis below.
     if (IsCall) {
       CCInfo.AnalyzeCallOperands(*Outs, ArgCC_AVR_Vararg);
     } else {
@@ -1270,8 +1278,7 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   }
 
   // Add a register mask operand representing the call-preserved registers.
-  const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
-  const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
   const uint32_t *Mask =
       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
   assert(Mask && "Missing call preserved mask for calling convention");
@@ -1433,8 +1440,7 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
   bool HasRepeatedOperand = false;
   MachineFunction *F = BB->getParent();
   MachineRegisterInfo &RI = F->getRegInfo();
-  const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
-  const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   DebugLoc dl = MI.getDebugLoc();
 
   switch (MI.getOpcode()) {
@@ -1574,8 +1580,7 @@ static bool isCopyMulResult(MachineBasicBlock::iterator const &I) {
 // it, but it works for now.
 MachineBasicBlock *AVRTargetLowering::insertMul(MachineInstr &MI,
                                                 MachineBasicBlock *BB) const {
-  const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
-  const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   MachineBasicBlock::iterator I(MI);
   ++I; // in any case insert *after* the mul instruction
   if (isCopyMulResult(I))
@@ -1629,6 +1634,15 @@ AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
 
   MachineFunction *MF = MBB->getParent();
   const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  MachineBasicBlock *FallThrough = MBB->getFallThrough();
+
+  // If the current basic block falls through to another basic block,
+  // we must insert an unconditional branch to the fallthrough destination
+  // if we are to insert basic blocks at the prior fallthrough point.
+  if (FallThrough != nullptr) {
+    BuildMI(MBB, dl, TII.get(AVR::RJMPk)).addMBB(FallThrough);
+  }
+
   MachineBasicBlock *trueMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *falseMBB = MF->CreateMachineBasicBlock(LLVM_BB);
 
@@ -1838,9 +1852,6 @@ std::pair<unsigned, const TargetRegisterClass *>
 AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
                                                 StringRef Constraint,
                                                 MVT VT) const {
-  auto STI = static_cast<const AVRTargetMachine &>(this->getTargetMachine())
-                 .getSubtargetImpl();
-
   // We only support i8 and i16.
   //
   //:FIXME: remove this assert for now since it gets sometimes executed
@@ -1884,8 +1895,8 @@ AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
     }
   }
 
-  return TargetLowering::getRegForInlineAsmConstraint(STI->getRegisterInfo(),
-                                                      Constraint, VT);
+  return TargetLowering::getRegForInlineAsmConstraint(
+      Subtarget.getRegisterInfo(), Constraint, VT);
 }
 
 void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
diff --git a/lib/Target/AVR/AVRISelLowering.h b/lib/Target/AVR/AVRISelLowering.h
index c90c65c81f70..ed2d0835903c 100644
--- a/lib/Target/AVR/AVRISelLowering.h
+++ b/lib/Target/AVR/AVRISelLowering.h
@@ -1,9 +1,8 @@
 //===-- AVRISelLowering.h - AVR DAG Lowering Interface ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -64,12 +63,14 @@ enum NodeType {
 
 } // end of namespace AVRISD
 
+class AVRSubtarget;
 class AVRTargetMachine;
 
 /// Performs target lowering for the AVR.
 class AVRTargetLowering : public TargetLowering {
 public:
-  explicit AVRTargetLowering(AVRTargetMachine &TM);
+  explicit AVRTargetLowering(const AVRTargetMachine &TM,
+                             const AVRSubtarget &STI);
 
 public:
   MVT getScalarShiftAmountTy(const DataLayout &, EVT LHSTy) const override {
@@ -127,6 +128,11 @@ public:
   unsigned getRegisterByName(const char* RegName, EVT VT,
                              SelectionDAG &DAG) const override;
 
+  bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL)
+    const override {
+    return false;
+  }
+
 private:
   SDValue getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AVRcc,
                     SelectionDAG &DAG, SDLoc dl) const;
@@ -164,6 +170,10 @@ private:
                           const SDLoc &dl, SelectionDAG &DAG,
                           SmallVectorImpl<SDValue> &InVals) const;
 
+protected:
+
+  const AVRSubtarget &Subtarget;
+
 private:
   MachineBasicBlock *insertShift(MachineInstr &MI, MachineBasicBlock *BB) const;
   MachineBasicBlock *insertMul(MachineInstr &MI, MachineBasicBlock *BB) const;
diff --git a/lib/Target/AVR/AVRInstrFormats.td b/lib/Target/AVR/AVRInstrFormats.td
index ce5e606f9787..347e683cd47f 100644
--- a/lib/Target/AVR/AVRInstrFormats.td
+++ b/lib/Target/AVR/AVRInstrFormats.td
@@ -1,9 +1,8 @@
 //===-- AVRInstrInfo.td - AVR Instruction Formats ----------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AVR/AVRInstrInfo.cpp b/lib/Target/AVR/AVRInstrInfo.cpp
index 0c32334167f0..ba7a95e92c5c 100644
--- a/lib/Target/AVR/AVRInstrInfo.cpp
+++ b/lib/Target/AVR/AVRInstrInfo.cpp
@@ -1,9 +1,8 @@
 //===-- AVRInstrInfo.cpp - AVR Instruction Information --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -488,7 +487,8 @@ unsigned AVRInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   case TargetOpcode::KILL:
   case TargetOpcode::DBG_VALUE:
     return 0;
-  case TargetOpcode::INLINEASM: {
+  case TargetOpcode::INLINEASM:
+  case TargetOpcode::INLINEASM_BR: {
     const MachineFunction &MF = *MI.getParent()->getParent();
     const AVRTargetMachine &TM = static_cast<const AVRTargetMachine&>(MF.getTarget());
     const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
diff --git a/lib/Target/AVR/AVRInstrInfo.h b/lib/Target/AVR/AVRInstrInfo.h
index 354edcec3466..ba74af325474 100644
--- a/lib/Target/AVR/AVRInstrInfo.h
+++ b/lib/Target/AVR/AVRInstrInfo.h
@@ -1,9 +1,8 @@
 //===-- AVRInstrInfo.h - AVR Instruction Information ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AVR/AVRInstrInfo.td b/lib/Target/AVR/AVRInstrInfo.td
index 5720af7d8df6..caca9b617609 100644
--- a/lib/Target/AVR/AVRInstrInfo.td
+++ b/lib/Target/AVR/AVRInstrInfo.td
@@ -1,9 +1,8 @@
 //===-- AVRInstrInfo.td - AVR Instruction defs -------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -90,6 +89,22 @@ def imm0_63_neg : PatLeaf<(imm),
 
 def uimm6 : PatLeaf<(imm), [{ return isUInt<6>(N->getZExtValue()); }]>;
 
+// imm_com8_XFORM - Return the complement of a imm_com8 value
+def imm_com8_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(~((uint8_t)N->getZExtValue()), SDLoc(N),
+                                   MVT::i8);
+}]>;
+
+// imm_com8 - Match an immediate that is a complement
+// of a 8-bit immediate.
+// Note: this pattern doesn't require an encoder method and such, as it's
+// only used on aliases (Pat<> and InstAlias<>). The actual encoding
+// is handled by the destination instructions, which use imm_com8.
+def imm_com8_asmoperand : AsmOperandClass { let Name = "ImmCom8"; }
+def imm_com8 : Operand<i8> {
+  let ParserMatchClass = imm_com8_asmoperand;
+}
+
 def ioaddr_XFORM : SDNodeXForm<imm,
 [{
   return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()) - 0x20, SDLoc(N), MVT::i8);
@@ -157,13 +172,6 @@ def memspi : Operand<iPTR>
   let MIOperandInfo = (ops GPRSP, i16imm);
 }
 
-def imm_com8 : Operand<i8>
-{
-  let EncoderMethod = "encodeComplement";
-
-  let MIOperandInfo = (ops i8imm);
-}
-
 def relbrtarget_7 : Operand<OtherVT>
 {
     let PrintMethod   = "printPCRelImm";
@@ -1151,11 +1159,11 @@ isReMaterializable = 1 in
   // LDW Rd+1:Rd, P
   //
   // Expands to:
-  // ld Rd,   P+
-  // ld Rd+1, P
+  // ld  Rd,   P
+  // ldd Rd+1, P+1
   let Constraints = "@earlyclobber $reg" in
   def LDWRdPtr : Pseudo<(outs DREGS:$reg),
-                        (ins PTRREGS:$ptrreg),
+                        (ins PTRDISPREGS:$ptrreg),
                         "ldw\t$reg, $ptrreg",
                         [(set i16:$reg, (load i16:$ptrreg))]>,
                  Requires<[HasSRAM]>;
@@ -1222,7 +1230,7 @@ isReMaterializable = 1 in
   // ldd Rd,   P+q
   // ldd Rd+1, P+q+1
   let Constraints = "@earlyclobber $dst" in
-  def LDDWRdPtrQ : Pseudo<(outs DREGS_WITHOUT_Z_WORKAROUND:$dst),
+  def LDDWRdPtrQ : Pseudo<(outs DREGS_WITHOUT_YZ_WORKAROUND:$dst),
                           (ins memri:$memri),
                           "lddw\t$dst, $memri",
                           [(set i16:$dst, (load addr:$memri))]>,
@@ -1729,20 +1737,7 @@ def BLD : FRdB<0b00,
                "bld\t$rd, $b",
                []>;
 
-// Set/clear bit in register operations.
-let Constraints = "$src = $rd",
-Defs = [SREG] in
-{
-  // CBR Rd, K
-  // Alias for `ANDI Rd, COM(K)` where COM(K) is the complement of K.
-  // FIXME: This uses the 'complement' encoder. We need it to also use the
-  // imm_ldi8 encoder. This will cause no fixups to be created on this instruction.
-  def CBRRdK : FRdK<0b0111,
-                    (outs LD8:$rd),
-                    (ins LD8:$src, imm_com8:$k),
-                    "cbr\t$rd, $k",
-                    []>;
-}
+def CBR : InstAlias<"cbr\t$rd, $k", (ANDIRdK LD8:$rd, imm_com8:$k), 0>;
 
 // CLR Rd
 // Alias for EOR Rd, Rd
diff --git a/lib/Target/AVR/AVRMCInstLower.cpp b/lib/Target/AVR/AVRMCInstLower.cpp
index dfefd09bc4b8..49a318762b63 100644
--- a/lib/Target/AVR/AVRMCInstLower.cpp
+++ b/lib/Target/AVR/AVRMCInstLower.cpp
@@ -1,9 +1,8 @@
 //===-- AVRMCInstLower.cpp - Convert AVR MachineInstr to an MCInst --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AVR/AVRMCInstLower.h b/lib/Target/AVR/AVRMCInstLower.h
index 2e2d1014485e..5e0f42ac16a7 100644
--- a/lib/Target/AVR/AVRMCInstLower.h
+++ b/lib/Target/AVR/AVRMCInstLower.h
@@ -1,9 +1,8 @@
 //===-- AVRMCInstLower.h - Lower MachineInstr to MCInst ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AVR/AVRMachineFunctionInfo.h b/lib/Target/AVR/AVRMachineFunctionInfo.h
index cf0c73576301..5226e30491c3 100644
--- a/lib/Target/AVR/AVRMachineFunctionInfo.h
+++ b/lib/Target/AVR/AVRMachineFunctionInfo.h
@@ -1,9 +1,8 @@
 //===-- AVRMachineFuctionInfo.h - AVR machine function info -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp
index 808a85e459c1..a6b36f80485d 100644
--- a/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===-- AVRRegisterInfo.cpp - AVR Register Information --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,6 +16,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
 
@@ -233,9 +233,9 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
     // No need to set SREG as dead here otherwise if the next instruction is a
     // cond branch it will be using a dead register.
-    New = BuildMI(MBB, std::next(II), dl, TII.get(SubOpc), AVR::R29R28)
-              .addReg(AVR::R29R28, RegState::Kill)
-              .addImm(Offset - 63 + 1);
+    BuildMI(MBB, std::next(II), dl, TII.get(SubOpc), AVR::R29R28)
+        .addReg(AVR::R29R28, RegState::Kill)
+        .addImm(Offset - 63 + 1);
 
     Offset = 62;
   }
@@ -245,7 +245,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
 }
 
-unsigned AVRRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register AVRRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
   if (TFI->hasFP(MF)) {
     // The Y pointer register
@@ -273,4 +273,18 @@ void AVRRegisterInfo::splitReg(unsigned Reg,
     HiReg = getSubReg(Reg, AVR::sub_hi);
 }
 
+bool AVRRegisterInfo::shouldCoalesce(MachineInstr *MI,
+                                     const TargetRegisterClass *SrcRC,
+                                     unsigned SubReg,
+                                     const TargetRegisterClass *DstRC,
+                                     unsigned DstSubReg,
+                                     const TargetRegisterClass *NewRC,
+                                     LiveIntervals &LIS) const {
+  if(this->getRegClass(AVR::PTRDISPREGSRegClassID)->hasSubClassEq(NewRC)) {
+    return false;
+  }
+
+  return TargetRegisterInfo::shouldCoalesce(MI, SrcRC, SubReg, DstRC, DstSubReg, NewRC, LIS);
+}
+
 } // end of namespace llvm
diff --git a/lib/Target/AVR/AVRRegisterInfo.h b/lib/Target/AVR/AVRRegisterInfo.h
index 104b336b9c48..8e6e63af3d57 100644
--- a/lib/Target/AVR/AVRRegisterInfo.h
+++ b/lib/Target/AVR/AVRRegisterInfo.h
@@ -1,9 +1,8 @@
 //===-- AVRRegisterInfo.h - AVR Register Information Impl -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -42,7 +41,7 @@ public:
                            unsigned FIOperandNum,
                            RegScavenger *RS = NULL) const override;
 
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
+  Register getFrameRegister(const MachineFunction &MF) const override;
 
   const TargetRegisterClass *
   getPointerRegClass(const MachineFunction &MF,
@@ -56,6 +55,13 @@ public:
     return true;
   }
 
+  bool shouldCoalesce(MachineInstr *MI,
+                      const TargetRegisterClass *SrcRC,
+                      unsigned SubReg,
+                      const TargetRegisterClass *DstRC,
+                      unsigned DstSubReg,
+                      const TargetRegisterClass *NewRC,
+                      LiveIntervals &LIS) const override;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/AVR/AVRRegisterInfo.td b/lib/Target/AVR/AVRRegisterInfo.td
index d55252bcac46..ea38fedd22ce 100644
--- a/lib/Target/AVR/AVRRegisterInfo.td
+++ b/lib/Target/AVR/AVRRegisterInfo.td
@@ -1,9 +1,8 @@
 //===-- AVRRegisterInfo.td - AVR Register defs -------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -166,14 +165,14 @@ def DREGS : RegisterClass<"AVR", [i16], 8,
 // cannot use Z; it's simply a workaround a regalloc bug.
 //
 // More information can be found in PR39553.
-def DREGS_WITHOUT_Z_WORKAROUND : RegisterClass<"AVR", [i16], 8,
+def DREGS_WITHOUT_YZ_WORKAROUND : RegisterClass<"AVR", [i16], 8,
   (
     // Return value and arguments.
     add R25R24, R19R18, R21R20, R23R22,
     // Scratch registers.
     R27R26,
     // Callee saved registers.
-    R29R28, R17R16, R15R14, R13R12, R11R10,
+    R17R16, R15R14, R13R12, R11R10,
     R9R8, R7R6, R5R4, R3R2, R1R0
   )>;
 
diff --git a/lib/Target/AVR/AVRRelaxMemOperations.cpp b/lib/Target/AVR/AVRRelaxMemOperations.cpp
index fdb09897eda8..6be901743e82 100644
--- a/lib/Target/AVR/AVRRelaxMemOperations.cpp
+++ b/lib/Target/AVR/AVRRelaxMemOperations.cpp
@@ -1,9 +1,8 @@
 //===-- AVRRelaxMemOperations.cpp - Relax out of range loads/stores -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AVR/AVRSelectionDAGInfo.h b/lib/Target/AVR/AVRSelectionDAGInfo.h
index 6474c8779330..3e7bd57f10cf 100644
--- a/lib/Target/AVR/AVRSelectionDAGInfo.h
+++ b/lib/Target/AVR/AVRSelectionDAGInfo.h
@@ -1,9 +1,8 @@
 //===-- AVRSelectionDAGInfo.h - AVR SelectionDAG Info -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AVR/AVRSubtarget.cpp b/lib/Target/AVR/AVRSubtarget.cpp
index 556d69ec5234..6a41036fdd6c 100644
--- a/lib/Target/AVR/AVRSubtarget.cpp
+++ b/lib/Target/AVR/AVRSubtarget.cpp
@@ -1,9 +1,8 @@
 //===-- AVRSubtarget.cpp - AVR Subtarget Information ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -29,9 +28,9 @@
 namespace llvm {
 
 AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU,
-                           const std::string &FS, AVRTargetMachine &TM)
+                           const std::string &FS, const AVRTargetMachine &TM)
     : AVRGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(),
-      TLInfo(TM), TSInfo(),
+      TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)), TSInfo(),
 
       // Subtarget features
       m_hasSRAM(false), m_hasJMPCALL(false), m_hasIJMPCALL(false),
@@ -44,4 +43,12 @@ AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU,
   ParseSubtargetFeatures(CPU, FS);
 }
 
+AVRSubtarget &
+AVRSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS,
+                                              const TargetMachine &TM) {
+  // Parse features string.
+  ParseSubtargetFeatures(CPU, FS);
+  return *this;
+}
+
 } // end of namespace llvm
diff --git a/lib/Target/AVR/AVRSubtarget.h b/lib/Target/AVR/AVRSubtarget.h
index fa26738da190..da9289af7c8d 100644
--- a/lib/Target/AVR/AVRSubtarget.h
+++ b/lib/Target/AVR/AVRSubtarget.h
@@ -1,9 +1,8 @@
 //===-- AVRSubtarget.h - Define Subtarget for the AVR -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -37,7 +36,7 @@ public:
   //! \param FS  The feature string.
   //! \param TM  The target machine.
   AVRSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
-               AVRTargetMachine &TM);
+               const AVRTargetMachine &TM);
 
   const AVRInstrInfo *getInstrInfo() const override { return &InstrInfo; }
   const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; }
@@ -49,6 +48,9 @@ public:
   /// \note Definition of function is auto generated by `tblgen`.
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
+  AVRSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS,
+                                                const TargetMachine &TM);
+
   // Subtarget feature getters.
   // See AVR.td for details.
   bool hasSRAM() const { return m_hasSRAM; }
diff --git a/lib/Target/AVR/AVRTargetMachine.cpp b/lib/Target/AVR/AVRTargetMachine.cpp
index 9828cdab68c3..a36c8b0f9649 100644
--- a/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/lib/Target/AVR/AVRTargetMachine.cpp
@@ -1,9 +1,8 @@
 //===-- AVRTargetMachine.cpp - Define TargetMachine for AVR ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -22,6 +21,7 @@
 #include "AVR.h"
 #include "AVRTargetObjectFile.h"
 #include "MCTargetDesc/AVRMCTargetDesc.h"
+#include "TargetInfo/AVRTargetInfo.h"
 
 namespace llvm {
 
diff --git a/lib/Target/AVR/AVRTargetMachine.h b/lib/Target/AVR/AVRTargetMachine.h
index ffcf4350d45a..f9015c8741ea 100644
--- a/lib/Target/AVR/AVRTargetMachine.h
+++ b/lib/Target/AVR/AVRTargetMachine.h
@@ -1,9 +1,8 @@
 //===-- AVRTargetMachine.h - Define TargetMachine for AVR -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AVR/AVRTargetObjectFile.cpp b/lib/Target/AVR/AVRTargetObjectFile.cpp
index 0cebb0f043f9..980096a09835 100644
--- a/lib/Target/AVR/AVRTargetObjectFile.cpp
+++ b/lib/Target/AVR/AVRTargetObjectFile.cpp
@@ -1,9 +1,8 @@
 //===-- AVRTargetObjectFile.cpp - AVR Object Files ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AVR/AVRTargetObjectFile.h b/lib/Target/AVR/AVRTargetObjectFile.h
index ba91036fd64c..53d8510d9a21 100644
--- a/lib/Target/AVR/AVRTargetObjectFile.h
+++ b/lib/Target/AVR/AVRTargetObjectFile.h
@@ -1,9 +1,8 @@
 //===-- AVRTargetObjectFile.h - AVR Object Info -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index f2bb59265271..aac5644711e2 100644
--- a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -1,9 +1,8 @@
 //===---- AVRAsmParser.cpp - Parse AVR assembly to MCInst instructions ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -12,6 +11,7 @@
 #include "MCTargetDesc/AVRMCELFStreamer.h"
 #include "MCTargetDesc/AVRMCExpr.h"
 #include "MCTargetDesc/AVRMCTargetDesc.h"
+#include "TargetInfo/AVRTargetInfo.h"
 
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -160,6 +160,22 @@ public:
     addExpr(Inst, getImm());
   }
 
+  void addImmCom8Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    // The operand is actually a imm8, but we have its bitwise
+    // negation in the assembly source, so twiddle it here.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::createImm(~(uint8_t)CE->getValue()));
+  }
+
+  bool isImmCom8() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return isUInt<8>(Value);
+  }
+
   bool isReg() const { return Kind == k_Register; }
   bool isImm() const { return Kind == k_Immediate; }
   bool isToken() const { return Kind == k_Token; }
diff --git a/lib/Target/AVR/Disassembler/AVRDisassembler.cpp b/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
index e69accfa9393..e203a5069c85 100644
--- a/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
+++ b/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
@@ -1,9 +1,8 @@
 //===- AVRDisassembler.cpp - Disassembler for AVR ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,6 +14,7 @@
 #include "AVRRegisterInfo.h"
 #include "AVRSubtarget.h"
 #include "MCTargetDesc/AVRMCTargetDesc.h"
+#include "TargetInfo/AVRTargetInfo.h"
 
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
diff --git a/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp b/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp
deleted file mode 100644
index 0f34b8e18ff9..000000000000
--- a/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp
+++ /dev/null
@@ -1,171 +0,0 @@
-//===-- AVRInstPrinter.cpp - Convert AVR MCInst to assembly syntax --------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an AVR MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AVRInstPrinter.h"
-
-#include "MCTargetDesc/AVRMCTargetDesc.h"
-
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-
-#include <cstring>
-
-#define DEBUG_TYPE "asm-printer"
-
-namespace llvm {
-
-// Include the auto-generated portion of the assembly writer.
-#define PRINT_ALIAS_INSTR
-#include "AVRGenAsmWriter.inc"
-
-void AVRInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                               StringRef Annot, const MCSubtargetInfo &STI) {
-  unsigned Opcode = MI->getOpcode();
-
-  // First handle load and store instructions with postinc or predec
-  // of the form "ld reg, X+".
-  // TODO: We should be able to rewrite this using TableGen data.
-  switch (Opcode) {
-  case AVR::LDRdPtr:
-  case AVR::LDRdPtrPi:
-  case AVR::LDRdPtrPd:
-    O << "\tld\t";
-    printOperand(MI, 0, O);
-    O << ", ";
-
-    if (Opcode == AVR::LDRdPtrPd)
-      O << '-';
-
-    printOperand(MI, 1, O);
-
-    if (Opcode == AVR::LDRdPtrPi)
-      O << '+';
-    break;
-  case AVR::STPtrRr:
-    O << "\tst\t";
-    printOperand(MI, 0, O);
-    O << ", ";
-    printOperand(MI, 1, O);
-    break;
-  case AVR::STPtrPiRr:
-  case AVR::STPtrPdRr:
-    O << "\tst\t";
-
-    if (Opcode == AVR::STPtrPdRr)
-      O << '-';
-
-    printOperand(MI, 1, O);
-
-    if (Opcode == AVR::STPtrPiRr)
-      O << '+';
-
-    O << ", ";
-    printOperand(MI, 2, O);
-    break;
-  default:
-    if (!printAliasInstr(MI, O))
-      printInstruction(MI, O);
-
-    printAnnotation(O, Annot);
-    break;
-  }
-}
-
-const char *AVRInstPrinter::getPrettyRegisterName(unsigned RegNum,
-                                                  MCRegisterInfo const &MRI) {
-  // GCC prints register pairs by just printing the lower register
-  // If the register contains a subregister, print it instead
-  if (MRI.getNumSubRegIndices() > 0) {
-    unsigned RegLoNum = MRI.getSubReg(RegNum, AVR::sub_lo);
-    RegNum = (RegLoNum != AVR::NoRegister) ? RegLoNum : RegNum;
-  }
-
-  return getRegisterName(RegNum);
-}
-
-void AVRInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                  raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  const MCOperandInfo &MOI = this->MII.get(MI->getOpcode()).OpInfo[OpNo];
-
-  if (Op.isReg()) {
-    bool isPtrReg = (MOI.RegClass == AVR::PTRREGSRegClassID) ||
-                    (MOI.RegClass == AVR::PTRDISPREGSRegClassID) ||
-                    (MOI.RegClass == AVR::ZREGRegClassID);
-
-    if (isPtrReg) {
-      O << getRegisterName(Op.getReg(), AVR::ptr);
-    } else {
-      O << getPrettyRegisterName(Op.getReg(), MRI);
-    }
-  } else if (Op.isImm()) {
-    O << Op.getImm();
-  } else {
-    assert(Op.isExpr() && "Unknown operand kind in printOperand");
-    O << *Op.getExpr();
-  }
-}
-
-/// This is used to print an immediate value that ends up
-/// being encoded as a pc-relative value.
-void AVRInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo,
-                                   raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-
-  if (Op.isImm()) {
-    int64_t Imm = Op.getImm();
-    O << '.';
-
-    // Print a position sign if needed.
-    // Negative values have their sign printed automatically.
-    if (Imm >= 0)
-      O << '+';
-
-    O << Imm;
-  } else {
-    assert(Op.isExpr() && "Unknown pcrel immediate operand");
-    O << *Op.getExpr();
-  }
-}
-
-void AVRInstPrinter::printMemri(const MCInst *MI, unsigned OpNo,
-                                raw_ostream &O) {
-  assert(MI->getOperand(OpNo).isReg() && "Expected a register for the first operand");
-
-  const MCOperand &OffsetOp = MI->getOperand(OpNo + 1);
-
-  // Print the register.
-  printOperand(MI, OpNo, O);
-
-  // Print the {+,-}offset.
-  if (OffsetOp.isImm()) {
-    int64_t Offset = OffsetOp.getImm();
-
-    if (Offset >= 0)
-      O << '+';
-
-    O << Offset;
-  } else if (OffsetOp.isExpr()) {
-    O << *OffsetOp.getExpr();
-  } else {
-    llvm_unreachable("unknown type for offset");
-  }
-}
-
-} // end of namespace llvm
-
diff --git a/lib/Target/AVR/InstPrinter/AVRInstPrinter.h b/lib/Target/AVR/InstPrinter/AVRInstPrinter.h
deleted file mode 100644
index c9f65b922745..000000000000
--- a/lib/Target/AVR/InstPrinter/AVRInstPrinter.h
+++ /dev/null
@@ -1,54 +0,0 @@
-//===- AVRInstPrinter.h - Convert AVR MCInst to assembly syntax -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an AVR MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_AVR_INST_PRINTER_H
-#define LLVM_AVR_INST_PRINTER_H
-
-#include "llvm/MC/MCInstPrinter.h"
-
-#include "MCTargetDesc/AVRMCTargetDesc.h"
-
-namespace llvm {
-
-/// Prints AVR instructions to a textual stream.
-class AVRInstPrinter : public MCInstPrinter {
-public:
-  AVRInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                 const MCRegisterInfo &MRI)
-      : MCInstPrinter(MAI, MII, MRI) {}
-
-  static const char *getPrettyRegisterName(unsigned RegNo,
-                                           MCRegisterInfo const &MRI);
-
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
-
-private:
-  static const char *getRegisterName(unsigned RegNo,
-                                     unsigned AltIdx = AVR::NoRegAltName);
-
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printMemri(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-
-  // Autogenerated by TableGen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
-  bool printAliasInstr(const MCInst *MI, raw_ostream &O);
-  void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
-                               unsigned PrintMethodIdx, raw_ostream &O);
-};
-
-} // end namespace llvm
-
-#endif // LLVM_AVR_INST_PRINTER_H
-
diff --git a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
index f81a57dd71e3..e92b16c8ee9d 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@@ -1,9 +1,8 @@
 //===-- AVRAsmBackend.cpp - AVR Asm Backend  ------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
index d48077c3ab8e..1e713db38145 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
@@ -1,9 +1,8 @@
 //===-- AVRAsmBackend.h - AVR Asm Backend  --------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp b/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp
index 4a921a1601a9..6025e4b2437c 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===-- AVRELFObjectWriter.cpp - AVR ELF Writer ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.h b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.h
index e5df6cc34e40..461f1660c952 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.h
@@ -1,9 +1,8 @@
 //===----- AVRELFStreamer.h - AVR Target Streamer --------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h b/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h
index cdb0b215bc60..b3504b89e4d3 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h
@@ -1,9 +1,8 @@
 //===-- AVRFixupKinds.h - AVR Specific Fixup Entries ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp b/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
new file mode 100644
index 000000000000..88ce9a25680e
--- /dev/null
+++ b/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
@@ -0,0 +1,170 @@
+//===-- AVRInstPrinter.cpp - Convert AVR MCInst to assembly syntax --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an AVR MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AVRInstPrinter.h"
+
+#include "MCTargetDesc/AVRMCTargetDesc.h"
+
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+
+#include <cstring>
+
+#define DEBUG_TYPE "asm-printer"
+
+namespace llvm {
+
+// Include the auto-generated portion of the assembly writer.
+#define PRINT_ALIAS_INSTR
+#include "AVRGenAsmWriter.inc"
+
+void AVRInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                               StringRef Annot, const MCSubtargetInfo &STI) {
+  unsigned Opcode = MI->getOpcode();
+
+  // First handle load and store instructions with postinc or predec
+  // of the form "ld reg, X+".
+  // TODO: We should be able to rewrite this using TableGen data.
+  switch (Opcode) {
+  case AVR::LDRdPtr:
+  case AVR::LDRdPtrPi:
+  case AVR::LDRdPtrPd:
+    O << "\tld\t";
+    printOperand(MI, 0, O);
+    O << ", ";
+
+    if (Opcode == AVR::LDRdPtrPd)
+      O << '-';
+
+    printOperand(MI, 1, O);
+
+    if (Opcode == AVR::LDRdPtrPi)
+      O << '+';
+    break;
+  case AVR::STPtrRr:
+    O << "\tst\t";
+    printOperand(MI, 0, O);
+    O << ", ";
+    printOperand(MI, 1, O);
+    break;
+  case AVR::STPtrPiRr:
+  case AVR::STPtrPdRr:
+    O << "\tst\t";
+
+    if (Opcode == AVR::STPtrPdRr)
+      O << '-';
+
+    printOperand(MI, 1, O);
+
+    if (Opcode == AVR::STPtrPiRr)
+      O << '+';
+
+    O << ", ";
+    printOperand(MI, 2, O);
+    break;
+  default:
+    if (!printAliasInstr(MI, O))
+      printInstruction(MI, O);
+
+    printAnnotation(O, Annot);
+    break;
+  }
+}
+
+const char *AVRInstPrinter::getPrettyRegisterName(unsigned RegNum,
+                                                  MCRegisterInfo const &MRI) {
+  // GCC prints register pairs by just printing the lower register
+  // If the register contains a subregister, print it instead
+  if (MRI.getNumSubRegIndices() > 0) {
+    unsigned RegLoNum = MRI.getSubReg(RegNum, AVR::sub_lo);
+    RegNum = (RegLoNum != AVR::NoRegister) ? RegLoNum : RegNum;
+  }
+
+  return getRegisterName(RegNum);
+}
+
+void AVRInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  const MCOperandInfo &MOI = this->MII.get(MI->getOpcode()).OpInfo[OpNo];
+
+  if (Op.isReg()) {
+    bool isPtrReg = (MOI.RegClass == AVR::PTRREGSRegClassID) ||
+                    (MOI.RegClass == AVR::PTRDISPREGSRegClassID) ||
+                    (MOI.RegClass == AVR::ZREGRegClassID);
+
+    if (isPtrReg) {
+      O << getRegisterName(Op.getReg(), AVR::ptr);
+    } else {
+      O << getPrettyRegisterName(Op.getReg(), MRI);
+    }
+  } else if (Op.isImm()) {
+    O << Op.getImm();
+  } else {
+    assert(Op.isExpr() && "Unknown operand kind in printOperand");
+    O << *Op.getExpr();
+  }
+}
+
+/// This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value.
+void AVRInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+
+  if (Op.isImm()) {
+    int64_t Imm = Op.getImm();
+    O << '.';
+
+    // Print a position sign if needed.
+    // Negative values have their sign printed automatically.
+    if (Imm >= 0)
+      O << '+';
+
+    O << Imm;
+  } else {
+    assert(Op.isExpr() && "Unknown pcrel immediate operand");
+    O << *Op.getExpr();
+  }
+}
+
+void AVRInstPrinter::printMemri(const MCInst *MI, unsigned OpNo,
+                                raw_ostream &O) {
+  assert(MI->getOperand(OpNo).isReg() && "Expected a register for the first operand");
+
+  const MCOperand &OffsetOp = MI->getOperand(OpNo + 1);
+
+  // Print the register.
+  printOperand(MI, OpNo, O);
+
+  // Print the {+,-}offset.
+  if (OffsetOp.isImm()) {
+    int64_t Offset = OffsetOp.getImm();
+
+    if (Offset >= 0)
+      O << '+';
+
+    O << Offset;
+  } else if (OffsetOp.isExpr()) {
+    O << *OffsetOp.getExpr();
+  } else {
+    llvm_unreachable("unknown type for offset");
+  }
+}
+
+} // end of namespace llvm
+
diff --git a/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h b/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
new file mode 100644
index 000000000000..5b758a7503c9
--- /dev/null
+++ b/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
@@ -0,0 +1,53 @@
+//===- AVRInstPrinter.h - Convert AVR MCInst to assembly syntax -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an AVR MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_INST_PRINTER_H
+#define LLVM_AVR_INST_PRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+#include "MCTargetDesc/AVRMCTargetDesc.h"
+
+namespace llvm {
+
+/// Prints AVR instructions to a textual stream.
+class AVRInstPrinter : public MCInstPrinter {
+public:
+  AVRInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                 const MCRegisterInfo &MRI)
+      : MCInstPrinter(MAI, MII, MRI) {}
+
+  static const char *getPrettyRegisterName(unsigned RegNo,
+                                           MCRegisterInfo const &MRI);
+
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+
+private:
+  static const char *getRegisterName(unsigned RegNo,
+                                     unsigned AltIdx = AVR::NoRegAltName);
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printMemri(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  // Autogenerated by TableGen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+  void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+                               unsigned PrintMethodIdx, raw_ostream &O);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_AVR_INST_PRINTER_H
+
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
index 535bb012eb07..99b2172c562f 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
@@ -1,9 +1,8 @@
 //===-- AVRMCAsmInfo.cpp - AVR asm properties -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -24,6 +23,7 @@ AVRMCAsmInfo::AVRMCAsmInfo(const Triple &TT) {
   PrivateGlobalPrefix = ".L";
   UsesELFSectionDirectiveForBSS = true;
   UseIntegratedAssembler = true;
+  SupportsDebugInformation = true;
 }
 
 } // end of namespace llvm
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h b/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h
index cc2207a3cfae..b2fa18777bc0 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h
@@ -1,9 +1,8 @@
 //===-- AVRMCAsmInfo.h - AVR asm properties ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
index 4dbbce8c205e..bc0488778685 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
 //===-- AVRMCCodeEmitter.cpp - Convert AVR Code to Machine Code -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
index 883abf8db78a..2e24d885c155 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
@@ -1,9 +1,8 @@
 //===-- AVRMCCodeEmitter.h - Convert AVR Code to Machine Code -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp
index 861acd47347f..d9169f90a765 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp
@@ -1,9 +1,8 @@
 //===--------- AVRMCELFStreamer.cpp - AVR subclass of MCELFStreamer -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.h b/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.h
index 12e805fc7d13..37a610bc4248 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.h
@@ -1,9 +1,8 @@
 //===--------- AVRMCELFStreamer.h - AVR subclass of MCELFStreamer ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
index d4a67973af7f..0a53e5346779 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
@@ -1,9 +1,8 @@
 //===-- AVRMCExpr.cpp - AVR specific MC expression classes ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h
index a166b0946749..3b696bab1715 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h
@@ -1,9 +1,8 @@
 //===-- AVRMCExpr.h - AVR specific MC expression classes --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
index 8c39b5f4039e..f6607b26a065 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
@@ -1,9 +1,8 @@
 //===-- AVRMCTargetDesc.cpp - AVR Target Descriptions ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,11 +11,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "AVRELFStreamer.h"
+#include "AVRInstPrinter.h"
 #include "AVRMCAsmInfo.h"
 #include "AVRMCELFStreamer.h"
 #include "AVRMCTargetDesc.h"
 #include "AVRTargetStreamer.h"
-#include "InstPrinter/AVRInstPrinter.h"
+#include "TargetInfo/AVRTargetInfo.h"
 
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCELFStreamer.h"
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
index a764f15bd065..470db01ff468 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
@@ -1,9 +1,8 @@
 //===-- AVRMCTargetDesc.h - AVR Target Descriptions -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -33,8 +32,6 @@ class Target;
 class Triple;
 class raw_pwrite_stream;
 
-Target &getTheAVRTarget();
-
 MCInstrInfo *createAVRMCInstrInfo();
 
 /// Creates a machine code emitter for AVR.
diff --git a/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp b/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp
index 2b45d9adc7e9..3487a2bbb864 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp
@@ -1,9 +1,8 @@
 //===-- AVRTargetStreamer.cpp - AVR Target Streamer Methods ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h b/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h
index 815088b0a5de..5c4d1a22f6c6 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h
@@ -1,9 +1,8 @@
 //===-- AVRTargetStreamer.h - AVR Target Streamer --------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp b/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
index abe9cf45fcb3..c62d5cb85bc4 100644
--- a/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
+++ b/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
@@ -1,13 +1,12 @@
 //===-- AVRTargetInfo.cpp - AVR Target Implementation ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IR/Module.h"
+#include "TargetInfo/AVRTargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 namespace llvm {
 Target &getTheAVRTarget() {
diff --git a/lib/Target/AVR/TargetInfo/AVRTargetInfo.h b/lib/Target/AVR/TargetInfo/AVRTargetInfo.h
new file mode 100644
index 000000000000..7e0186bbdae1
--- /dev/null
+++ b/lib/Target/AVR/TargetInfo/AVRTargetInfo.h
@@ -0,0 +1,18 @@
+//===-- AVRTargetInfo.h - AVR Target Implementation -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_TARGET_INFO_H
+#define LLVM_AVR_TARGET_INFO_H
+
+namespace llvm {
+class Target;
+
+Target &getTheAVRTarget();
+} // namespace llvm
+
+#endif // LLVM_AVR_TARGET_INFO_H
diff --git a/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 8890fb8adf4d..75885fd058a7 100644
--- a/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -1,13 +1,13 @@
 //===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/BPFMCTargetDesc.h"
+#include "TargetInfo/BPFTargetInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/MC/MCContext.h"
@@ -126,7 +126,7 @@ public:
   bool isMem() const override { return false; }
 
   bool isConstantImm() const {
-    return isImm() && dyn_cast<MCConstantExpr>(getImm());
+    return isImm() && isa<MCConstantExpr>(getImm());
   }
 
   int64_t getConstantImm() const {
diff --git a/lib/Target/BPF/BPF.h b/lib/Target/BPF/BPF.h
index 9749e369c2c1..d311fc154094 100644
--- a/lib/Target/BPF/BPF.h
+++ b/lib/Target/BPF/BPF.h
@@ -1,9 +1,8 @@
 //===-- BPF.h - Top-level interface for BPF representation ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -16,11 +15,16 @@
 namespace llvm {
 class BPFTargetMachine;
 
+ModulePass *createBPFAbstractMemberAccess();
+
 FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
+FunctionPass *createBPFMISimplifyPatchablePass();
 FunctionPass *createBPFMIPeepholePass();
 FunctionPass *createBPFMIPreEmitPeepholePass();
 FunctionPass *createBPFMIPreEmitCheckingPass();
 
+void initializeBPFAbstractMemberAccessPass(PassRegistry&);
+void initializeBPFMISimplifyPatchablePass(PassRegistry&);
 void initializeBPFMIPeepholePass(PassRegistry&);
 void initializeBPFMIPreEmitPeepholePass(PassRegistry&);
 void initializeBPFMIPreEmitCheckingPass(PassRegistry&);
diff --git a/lib/Target/BPF/BPF.td b/lib/Target/BPF/BPF.td
index 877bd15f4f2b..fad966ff5a13 100644
--- a/lib/Target/BPF/BPF.td
+++ b/lib/Target/BPF/BPF.td
@@ -1,9 +1,8 @@
 //===-- BPF.td - Describe the BPF Target Machine -----------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -21,6 +20,7 @@ class Proc<string Name, list<SubtargetFeature> Features>
 def : Proc<"generic", []>;
 def : Proc<"v1", []>;
 def : Proc<"v2", []>;
+def : Proc<"v3", []>;
 def : Proc<"probe", []>;
 
 def DummyFeature : SubtargetFeature<"dummy", "isDummyMode",
diff --git a/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/lib/Target/BPF/BPFAbstractMemberAccess.cpp
new file mode 100644
index 000000000000..51d4cbc8a429
--- /dev/null
+++ b/lib/Target/BPF/BPFAbstractMemberAccess.cpp
@@ -0,0 +1,482 @@
+//===------ BPFAbstractMemberAccess.cpp - Abstracting Member Accesses -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass abstracted struct/union member accesses in order to support
+// compile-once run-everywhere (CO-RE). The CO-RE intends to compile the program
+// which can run on different kernels. In particular, if bpf program tries to
+// access a particular kernel data structure member, the details of the
+// intermediate member access will be remembered so bpf loader can do
+// necessary adjustment right before program loading.
+//
+// For example,
+//
+//   struct s {
+//     int a;
+//     int b;
+//   };
+//   struct t {
+//     struct s c;
+//     int d;
+//   };
+//   struct t e;
+//
+// For the member access e.c.b, the compiler will generate code
+//   &e + 4
+//
+// The compile-once run-everywhere instead generates the following code
+//   r = 4
+//   &e + r
+// The "4" in "r = 4" can be changed based on a particular kernel version.
+// For example, on a particular kernel version, if struct s is changed to
+//
+//   struct s {
+//     int new_field;
+//     int a;
+//     int b;
+//   }
+//
+// By repeating the member access on the host, the bpf loader can
+// adjust "r = 4" as "r = 8".
+//
+// This feature relies on the following three intrinsic calls:
+//   addr = preserve_array_access_index(base, dimension, index)
+//   addr = preserve_union_access_index(base, di_index)
+//          !llvm.preserve.access.index <union_ditype>
+//   addr = preserve_struct_access_index(base, gep_index, di_index)
+//          !llvm.preserve.access.index <struct_ditype>
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "BPFCORE.h"
+#include "BPFTargetMachine.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#define DEBUG_TYPE "bpf-abstract-member-access"
+
+namespace llvm {
+const std::string BPFCoreSharedInfo::AmaAttr = "btf_ama";
+const std::string BPFCoreSharedInfo::PatchableExtSecName =
+    ".BPF.patchable_externs";
+} // namespace llvm
+
+using namespace llvm;
+
+namespace {
+
+class BPFAbstractMemberAccess final : public ModulePass {
+  StringRef getPassName() const override {
+    return "BPF Abstract Member Access";
+  }
+
+  bool runOnModule(Module &M) override;
+
+public:
+  static char ID;
+  BPFAbstractMemberAccess() : ModulePass(ID) {}
+
+private:
+  enum : uint32_t {
+    BPFPreserveArrayAI = 1,
+    BPFPreserveUnionAI = 2,
+    BPFPreserveStructAI = 3,
+  };
+
+  std::map<std::string, GlobalVariable *> GEPGlobals;
+  // A map to link preserve_*_access_index instrinsic calls.
+  std::map<CallInst *, std::pair<CallInst *, uint32_t>> AIChain;
+  // A map to hold all the base preserve_*_access_index instrinsic calls.
+  // The base call is not an input of any other preserve_*_access_index
+  // intrinsics.
+  std::map<CallInst *, uint32_t> BaseAICalls;
+
+  bool doTransformation(Module &M);
+
+  void traceAICall(CallInst *Call, uint32_t Kind);
+  void traceBitCast(BitCastInst *BitCast, CallInst *Parent, uint32_t Kind);
+  void traceGEP(GetElementPtrInst *GEP, CallInst *Parent, uint32_t Kind);
+  void collectAICallChains(Module &M, Function &F);
+
+  bool IsPreserveDIAccessIndexCall(const CallInst *Call, uint32_t &Kind);
+  bool removePreserveAccessIndexIntrinsic(Module &M);
+  void replaceWithGEP(std::vector<CallInst *> &CallList,
+                      uint32_t NumOfZerosIndex, uint32_t DIIndex);
+
+  Value *computeBaseAndAccessStr(CallInst *Call, std::string &AccessStr,
+                                 std::string &AccessKey, uint32_t Kind,
+                                 MDNode *&TypeMeta);
+  bool getAccessIndex(const Value *IndexValue, uint64_t &AccessIndex);
+  bool transformGEPChain(Module &M, CallInst *Call, uint32_t Kind);
+};
+} // End anonymous namespace
+
+char BPFAbstractMemberAccess::ID = 0;
+INITIALIZE_PASS(BPFAbstractMemberAccess, DEBUG_TYPE,
+                "abstracting struct/union member accessees", false, false)
+
+ModulePass *llvm::createBPFAbstractMemberAccess() {
+  return new BPFAbstractMemberAccess();
+}
+
+bool BPFAbstractMemberAccess::runOnModule(Module &M) {
+  LLVM_DEBUG(dbgs() << "********** Abstract Member Accesses **********\n");
+
+  // Bail out if no debug info.
+  if (empty(M.debug_compile_units()))
+    return false;
+
+  return doTransformation(M);
+}
+
+/// Check whether a call is a preserve_*_access_index intrinsic call or not.
+bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
+                                                          uint32_t &Kind) {
+  if (!Call)
+    return false;
+
+  const auto *GV = dyn_cast<GlobalValue>(Call->getCalledValue());
+  if (!GV)
+    return false;
+  if (GV->getName().startswith("llvm.preserve.array.access.index")) {
+    Kind = BPFPreserveArrayAI;
+    return true;
+  }
+  if (GV->getName().startswith("llvm.preserve.union.access.index")) {
+    Kind = BPFPreserveUnionAI;
+    return true;
+  }
+  if (GV->getName().startswith("llvm.preserve.struct.access.index")) {
+    Kind = BPFPreserveStructAI;
+    return true;
+  }
+
+  return false;
+}
+
+void BPFAbstractMemberAccess::replaceWithGEP(std::vector<CallInst *> &CallList,
+                                             uint32_t DimensionIndex,
+                                             uint32_t GEPIndex) {
+  for (auto Call : CallList) {
+    uint32_t Dimension = 1;
+    if (DimensionIndex > 0)
+      Dimension = cast<ConstantInt>(Call->getArgOperand(DimensionIndex))
+                      ->getZExtValue();
+
+    Constant *Zero =
+        ConstantInt::get(Type::getInt32Ty(Call->getParent()->getContext()), 0);
+    SmallVector<Value *, 4> IdxList;
+    for (unsigned I = 0; I < Dimension; ++I)
+      IdxList.push_back(Zero);
+    IdxList.push_back(Call->getArgOperand(GEPIndex));
+
+    auto *GEP = GetElementPtrInst::CreateInBounds(Call->getArgOperand(0),
+                                                  IdxList, "", Call);
+    Call->replaceAllUsesWith(GEP);
+    Call->eraseFromParent();
+  }
+}
+
+bool BPFAbstractMemberAccess::removePreserveAccessIndexIntrinsic(Module &M) {
+  std::vector<CallInst *> PreserveArrayIndexCalls;
+  std::vector<CallInst *> PreserveUnionIndexCalls;
+  std::vector<CallInst *> PreserveStructIndexCalls;
+  bool Found = false;
+
+  for (Function &F : M)
+    for (auto &BB : F)
+      for (auto &I : BB) {
+        auto *Call = dyn_cast<CallInst>(&I);
+        uint32_t Kind;
+        if (!IsPreserveDIAccessIndexCall(Call, Kind))
+          continue;
+
+        Found = true;
+        if (Kind == BPFPreserveArrayAI)
+          PreserveArrayIndexCalls.push_back(Call);
+        else if (Kind == BPFPreserveUnionAI)
+          PreserveUnionIndexCalls.push_back(Call);
+        else
+          PreserveStructIndexCalls.push_back(Call);
+      }
+
+  // do the following transformation:
+  // . addr = preserve_array_access_index(base, dimension, index)
+  //   is transformed to
+  //     addr = GEP(base, dimenion's zero's, index)
+  // . addr = preserve_union_access_index(base, di_index)
+  //   is transformed to
+  //     addr = base, i.e., all usages of "addr" are replaced by "base".
+  // . addr = preserve_struct_access_index(base, gep_index, di_index)
+  //   is transformed to
+  //     addr = GEP(base, 0, gep_index)
+  replaceWithGEP(PreserveArrayIndexCalls, 1, 2);
+  replaceWithGEP(PreserveStructIndexCalls, 0, 1);
+  for (auto Call : PreserveUnionIndexCalls) {
+    Call->replaceAllUsesWith(Call->getArgOperand(0));
+    Call->eraseFromParent();
+  }
+
+  return Found;
+}
+
+void BPFAbstractMemberAccess::traceAICall(CallInst *Call, uint32_t Kind) {
+  for (User *U : Call->users()) {
+    Instruction *Inst = dyn_cast<Instruction>(U);
+    if (!Inst)
+      continue;
+
+    if (auto *BI = dyn_cast<BitCastInst>(Inst)) {
+      traceBitCast(BI, Call, Kind);
+    } else if (auto *CI = dyn_cast<CallInst>(Inst)) {
+      uint32_t CIKind;
+      if (IsPreserveDIAccessIndexCall(CI, CIKind)) {
+        AIChain[CI] = std::make_pair(Call, Kind);
+        traceAICall(CI, CIKind);
+      } else {
+        BaseAICalls[Call] = Kind;
+      }
+    } else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) {
+      if (GI->hasAllZeroIndices())
+        traceGEP(GI, Call, Kind);
+      else
+        BaseAICalls[Call] = Kind;
+    }
+  }
+}
+
+void BPFAbstractMemberAccess::traceBitCast(BitCastInst *BitCast,
+                                           CallInst *Parent, uint32_t Kind) {
+  for (User *U : BitCast->users()) {
+    Instruction *Inst = dyn_cast<Instruction>(U);
+    if (!Inst)
+      continue;
+
+    if (auto *BI = dyn_cast<BitCastInst>(Inst)) {
+      traceBitCast(BI, Parent, Kind);
+    } else if (auto *CI = dyn_cast<CallInst>(Inst)) {
+      uint32_t CIKind;
+      if (IsPreserveDIAccessIndexCall(CI, CIKind)) {
+        AIChain[CI] = std::make_pair(Parent, Kind);
+        traceAICall(CI, CIKind);
+      } else {
+        BaseAICalls[Parent] = Kind;
+      }
+    } else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) {
+      if (GI->hasAllZeroIndices())
+        traceGEP(GI, Parent, Kind);
+      else
+        BaseAICalls[Parent] = Kind;
+    }
+  }
+}
+
+void BPFAbstractMemberAccess::traceGEP(GetElementPtrInst *GEP, CallInst *Parent,
+                                       uint32_t Kind) {
+  for (User *U : GEP->users()) {
+    Instruction *Inst = dyn_cast<Instruction>(U);
+    if (!Inst)
+      continue;
+
+    if (auto *BI = dyn_cast<BitCastInst>(Inst)) {
+      traceBitCast(BI, Parent, Kind);
+    } else if (auto *CI = dyn_cast<CallInst>(Inst)) {
+      uint32_t CIKind;
+      if (IsPreserveDIAccessIndexCall(CI, CIKind)) {
+        AIChain[CI] = std::make_pair(Parent, Kind);
+        traceAICall(CI, CIKind);
+      } else {
+        BaseAICalls[Parent] = Kind;
+      }
+    } else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) {
+      if (GI->hasAllZeroIndices())
+        traceGEP(GI, Parent, Kind);
+      else
+        BaseAICalls[Parent] = Kind;
+    }
+  }
+}
+
+void BPFAbstractMemberAccess::collectAICallChains(Module &M, Function &F) {
+  AIChain.clear();
+  BaseAICalls.clear();
+
+  for (auto &BB : F)
+    for (auto &I : BB) {
+      uint32_t Kind;
+      auto *Call = dyn_cast<CallInst>(&I);
+      if (!IsPreserveDIAccessIndexCall(Call, Kind) ||
+          AIChain.find(Call) != AIChain.end())
+        continue;
+
+      traceAICall(Call, Kind);
+    }
+}
+
+/// Get access index from the preserve_*_access_index intrinsic calls.
+bool BPFAbstractMemberAccess::getAccessIndex(const Value *IndexValue,
+                                             uint64_t &AccessIndex) {
+  const ConstantInt *CV = dyn_cast<ConstantInt>(IndexValue);
+  if (!CV)
+    return false;
+
+  AccessIndex = CV->getValue().getZExtValue();
+  return true;
+}
+
+/// Compute the base of the whole preserve_*_access_index chains, i.e., the base
+/// pointer of the first preserve_*_access_index call, and construct the access
+/// string, which will be the name of a global variable.
+Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call,
+                                                        std::string &AccessStr,
+                                                        std::string &AccessKey,
+                                                        uint32_t Kind,
+                                                        MDNode *&TypeMeta) {
+  Value *Base = nullptr;
+  std::vector<uint64_t> AccessIndices;
+  uint64_t TypeNameIndex = 0;
+  std::string LastTypeName;
+
+  while (Call) {
+    // Base of original corresponding GEP
+    Base = Call->getArgOperand(0);
+
+    // Type Name
+    std::string TypeName;
+    MDNode *MDN;
+    if (Kind == BPFPreserveUnionAI || Kind == BPFPreserveStructAI) {
+      MDN = Call->getMetadata(LLVMContext::MD_preserve_access_index);
+      if (!MDN)
+        return nullptr;
+
+      DIType *Ty = dyn_cast<DIType>(MDN);
+      if (!Ty)
+        return nullptr;
+
+      TypeName = Ty->getName();
+    }
+
+    // Access Index
+    uint64_t AccessIndex;
+    uint32_t ArgIndex = (Kind == BPFPreserveUnionAI) ? 1 : 2;
+    if (!getAccessIndex(Call->getArgOperand(ArgIndex), AccessIndex))
+      return nullptr;
+
+    AccessIndices.push_back(AccessIndex);
+    if (TypeName.size()) {
+      TypeNameIndex = AccessIndices.size() - 1;
+      LastTypeName = TypeName;
+      TypeMeta = MDN;
+    }
+
+    Kind = AIChain[Call].second;
+    Call = AIChain[Call].first;
+  }
+
+  // The intial type name is required.
+  // FIXME: if the initial type access is an array index, e.g.,
+  // &a[3].b.c, only one dimentional array is supported.
+  if (!LastTypeName.size() || AccessIndices.size() > TypeNameIndex + 2)
+    return nullptr;
+
+  // Construct the type string AccessStr.
+  for (unsigned I = 0; I < AccessIndices.size(); ++I)
+    AccessStr = std::to_string(AccessIndices[I]) + ":" + AccessStr;
+
+  if (TypeNameIndex == AccessIndices.size() - 1)
+    AccessStr = "0:" + AccessStr;
+
+  // Access key is the type name + access string, uniquely identifying
+  // one kernel memory access.
+  AccessKey = LastTypeName + ":" + AccessStr;
+
+  return Base;
+}
+
+/// Call/Kind is the base preserve_*_access_index() call. Attempts to do
+/// transformation to a chain of relocable GEPs.
+bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call,
+                                                uint32_t Kind) {
+  std::string AccessStr, AccessKey;
+  MDNode *TypeMeta = nullptr;
+  Value *Base =
+      computeBaseAndAccessStr(Call, AccessStr, AccessKey, Kind, TypeMeta);
+  if (!Base)
+    return false;
+
+  // Do the transformation
+  // For any original GEP Call and Base %2 like
+  //   %4 = bitcast %struct.net_device** %dev1 to i64*
+  // it is transformed to:
+  //   %6 = load __BTF_0:sk_buff:0:0:2:0:
+  //   %7 = bitcast %struct.sk_buff* %2 to i8*
+  //   %8 = getelementptr i8, i8* %7, %6
+  //   %9 = bitcast i8* %8 to i64*
+  //   using %9 instead of %4
+  // The original Call inst is removed.
+  BasicBlock *BB = Call->getParent();
+  GlobalVariable *GV;
+
+  if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) {
+    GV = new GlobalVariable(M, Type::getInt64Ty(BB->getContext()), false,
+                            GlobalVariable::ExternalLinkage, NULL, AccessStr);
+    GV->addAttribute(BPFCoreSharedInfo::AmaAttr);
+    // Set the metadata (debuginfo types) for the global.
+    if (TypeMeta)
+      GV->setMetadata(LLVMContext::MD_preserve_access_index, TypeMeta);
+    GEPGlobals[AccessKey] = GV;
+  } else {
+    GV = GEPGlobals[AccessKey];
+  }
+
+  // Load the global variable.
+  auto *LDInst = new LoadInst(Type::getInt64Ty(BB->getContext()), GV);
+  BB->getInstList().insert(Call->getIterator(), LDInst);
+
+  // Generate a BitCast
+  auto *BCInst = new BitCastInst(Base, Type::getInt8PtrTy(BB->getContext()));
+  BB->getInstList().insert(Call->getIterator(), BCInst);
+
+  // Generate a GetElementPtr
+  auto *GEP = GetElementPtrInst::Create(Type::getInt8Ty(BB->getContext()),
+                                        BCInst, LDInst);
+  BB->getInstList().insert(Call->getIterator(), GEP);
+
+  // Generate a BitCast
+  auto *BCInst2 = new BitCastInst(GEP, Call->getType());
+  BB->getInstList().insert(Call->getIterator(), BCInst2);
+
+  Call->replaceAllUsesWith(BCInst2);
+  Call->eraseFromParent();
+
+  return true;
+}
+
+bool BPFAbstractMemberAccess::doTransformation(Module &M) {
+  bool Transformed = false;
+
+  for (Function &F : M) {
+    // Collect PreserveDIAccessIndex Intrinsic call chains.
+    // The call chains will be used to generate the access
+    // patterns similar to GEP.
+    collectAICallChains(M, F);
+
+    for (auto &C : BaseAICalls)
+      Transformed = transformGEPChain(M, C.first, C.second) || Transformed;
+  }
+
+  return removePreserveAccessIndexIntrinsic(M) || Transformed;
+}
diff --git a/lib/Target/BPF/BPFAsmPrinter.cpp b/lib/Target/BPF/BPFAsmPrinter.cpp
index ada5eb923f40..e61e73468057 100644
--- a/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -1,9 +1,8 @@
 //===-- BPFAsmPrinter.cpp - BPF LLVM assembly writer ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,7 +16,8 @@
 #include "BPFMCInstLower.h"
 #include "BPFTargetMachine.h"
 #include "BTFDebug.h"
-#include "InstPrinter/BPFInstPrinter.h"
+#include "MCTargetDesc/BPFInstPrinter.h"
+#include "TargetInfo/BPFTargetInfo.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -38,27 +38,30 @@ class BPFAsmPrinter : public AsmPrinter {
 public:
   explicit BPFAsmPrinter(TargetMachine &TM,
                          std::unique_ptr<MCStreamer> Streamer)
-      : AsmPrinter(TM, std::move(Streamer)) {}
+      : AsmPrinter(TM, std::move(Streamer)), BTF(nullptr) {}
 
   StringRef getPassName() const override { return "BPF Assembly Printer"; }
   bool doInitialization(Module &M) override;
   void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                       unsigned AsmVariant, const char *ExtraCode,
-                       raw_ostream &O) override;
+                       const char *ExtraCode, raw_ostream &O) override;
   bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
-                             unsigned AsmVariant, const char *ExtraCode,
-                             raw_ostream &O) override;
+                             const char *ExtraCode, raw_ostream &O) override;
 
   void EmitInstruction(const MachineInstr *MI) override;
+
+private:
+  BTFDebug *BTF;
 };
 } // namespace
 
 bool BPFAsmPrinter::doInitialization(Module &M) {
   AsmPrinter::doInitialization(M);
 
-  if (MAI->doesSupportDebugInformation()) {
-    Handlers.push_back(HandlerInfo(new BTFDebug(this), "emit",
+  // Only emit BTF when debuginfo available.
+  if (MAI->doesSupportDebugInformation() && !empty(M.debug_compile_units())) {
+    BTF = new BTFDebug(this);
+    Handlers.push_back(HandlerInfo(std::unique_ptr<BTFDebug>(BTF), "emit",
                                    "Debug Info Emission", "BTF",
                                    "BTF Emission"));
   }
@@ -105,18 +108,16 @@ void BPFAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
 }
 
 bool BPFAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                    unsigned /*AsmVariant*/,
                                     const char *ExtraCode, raw_ostream &O) {
   if (ExtraCode && ExtraCode[0])
-    return true; // BPF does not have special modifiers
+    return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
 
   printOperand(MI, OpNo, O);
   return false;
 }
 
 bool BPFAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                          unsigned OpNum, unsigned AsmVariant,
-                                          const char *ExtraCode,
+                                          unsigned OpNum, const char *ExtraCode,
                                           raw_ostream &O) {
   assert(OpNum + 1 < MI->getNumOperands() && "Insufficient operands");
   const MachineOperand &BaseMO = MI->getOperand(OpNum);
@@ -137,11 +138,12 @@ bool BPFAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 }
 
 void BPFAsmPrinter::EmitInstruction(const MachineInstr *MI) {
-
-  BPFMCInstLower MCInstLowering(OutContext, *this);
-
   MCInst TmpInst;
-  MCInstLowering.Lower(MI, TmpInst);
+
+  if (!BTF || !BTF->InstLower(MI, TmpInst)) {
+    BPFMCInstLower MCInstLowering(OutContext, *this);
+    MCInstLowering.Lower(MI, TmpInst);
+  }
   EmitToStreamer(*OutStreamer, TmpInst);
 }
 
diff --git a/lib/Target/BPF/BPFCORE.h b/lib/Target/BPF/BPFCORE.h
new file mode 100644
index 000000000000..e0950d95f8d7
--- /dev/null
+++ b/lib/Target/BPF/BPFCORE.h
@@ -0,0 +1,24 @@
+//===- BPFCORE.h - Common info for Compile-Once Run-EveryWhere  -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_BPFCORE_H
+#define LLVM_LIB_TARGET_BPF_BPFCORE_H
+
+namespace llvm {
+
+class BPFCoreSharedInfo {
+public:
+  /// The attribute attached to globals representing a member offset
+  static const std::string AmaAttr;
+  /// The section name to identify a patchable external global
+  static const std::string PatchableExtSecName;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/BPF/BPFCallingConv.td b/lib/Target/BPF/BPFCallingConv.td
index 637f9752ec42..ef4ef1930aa8 100644
--- a/lib/Target/BPF/BPFCallingConv.td
+++ b/lib/Target/BPF/BPFCallingConv.td
@@ -1,9 +1,8 @@
 //===-- BPFCallingConv.td - Calling Conventions BPF --------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/BPF/BPFFrameLowering.cpp b/lib/Target/BPF/BPFFrameLowering.cpp
index c2806c85f24f..8812cfdd86da 100644
--- a/lib/Target/BPF/BPFFrameLowering.cpp
+++ b/lib/Target/BPF/BPFFrameLowering.cpp
@@ -1,9 +1,8 @@
 //===-- BPFFrameLowering.cpp - BPF Frame Information ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/BPF/BPFFrameLowering.h b/lib/Target/BPF/BPFFrameLowering.h
index b4ffa0713fa6..2dc6277d2244 100644
--- a/lib/Target/BPF/BPFFrameLowering.h
+++ b/lib/Target/BPF/BPFFrameLowering.h
@@ -1,9 +1,8 @@
 //===-- BPFFrameLowering.h - Define frame lowering for BPF -----*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp
index 8b9bc08e144f..1bd705c55188 100644
--- a/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
 //===-- BPFISelDAGToDAG.cpp - A dag to dag inst selector for BPF ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp
index 9272cf692dc9..ff69941d26fb 100644
--- a/lib/Target/BPF/BPFISelLowering.cpp
+++ b/lib/Target/BPF/BPFISelLowering.cpp
@@ -1,9 +1,8 @@
 //===-- BPFISelLowering.cpp - BPF DAG Lowering Implementation  ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -106,7 +105,8 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
 
   if (STI.getHasAlu32()) {
     setOperationAction(ISD::BSWAP, MVT::i32, Promote);
-    setOperationAction(ISD::BR_CC, MVT::i32, Promote);
+    setOperationAction(ISD::BR_CC, MVT::i32,
+                       STI.getHasJmp32() ? Custom : Promote);
   }
 
   setOperationAction(ISD::CTTZ, MVT::i64, Custom);
@@ -163,6 +163,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
 
   // CPU/Feature control
   HasAlu32 = STI.getHasAlu32();
+  HasJmp32 = STI.getHasJmp32();
   HasJmpExt = STI.getHasJmpExt();
 }
 
@@ -507,7 +508,7 @@ SDValue BPFTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
     NegateCC(LHS, RHS, CC);
 
   return DAG.getNode(BPFISD::BR_CC, DL, Op.getValueType(), Chain, LHS, RHS,
-                     DAG.getConstant(CC, DL, MVT::i64), Dest);
+                     DAG.getConstant(CC, DL, LHS.getValueType()), Dest);
 }
 
 SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -677,36 +678,23 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   int CC = MI.getOperand(3).getImm();
   int NewCC;
   switch (CC) {
-  case ISD::SETGT:
-    NewCC = isSelectRROp ? BPF::JSGT_rr : BPF::JSGT_ri;
-    break;
-  case ISD::SETUGT:
-    NewCC = isSelectRROp ? BPF::JUGT_rr : BPF::JUGT_ri;
-    break;
-  case ISD::SETGE:
-    NewCC = isSelectRROp ? BPF::JSGE_rr : BPF::JSGE_ri;
-    break;
-  case ISD::SETUGE:
-    NewCC = isSelectRROp ? BPF::JUGE_rr : BPF::JUGE_ri;
-    break;
-  case ISD::SETEQ:
-    NewCC = isSelectRROp ? BPF::JEQ_rr : BPF::JEQ_ri;
-    break;
-  case ISD::SETNE:
-    NewCC = isSelectRROp ? BPF::JNE_rr : BPF::JNE_ri;
-    break;
-  case ISD::SETLT:
-    NewCC = isSelectRROp ? BPF::JSLT_rr : BPF::JSLT_ri;
-    break;
-  case ISD::SETULT:
-    NewCC = isSelectRROp ? BPF::JULT_rr : BPF::JULT_ri;
-    break;
-  case ISD::SETLE:
-    NewCC = isSelectRROp ? BPF::JSLE_rr : BPF::JSLE_ri;
-    break;
-  case ISD::SETULE:
-    NewCC = isSelectRROp ? BPF::JULE_rr : BPF::JULE_ri;
-    break;
+#define SET_NEWCC(X, Y) \
+  case ISD::X: \
+    if (is32BitCmp && HasJmp32) \
+      NewCC = isSelectRROp ? BPF::Y##_rr_32 : BPF::Y##_ri_32; \
+    else \
+      NewCC = isSelectRROp ? BPF::Y##_rr : BPF::Y##_ri; \
+    break
+  SET_NEWCC(SETGT, JSGT);
+  SET_NEWCC(SETUGT, JUGT);
+  SET_NEWCC(SETGE, JSGE);
+  SET_NEWCC(SETUGE, JUGE);
+  SET_NEWCC(SETEQ, JEQ);
+  SET_NEWCC(SETNE, JNE);
+  SET_NEWCC(SETLT, JSLT);
+  SET_NEWCC(SETULT, JULT);
+  SET_NEWCC(SETLE, JSLE);
+  SET_NEWCC(SETULE, JULE);
   default:
     report_fatal_error("unimplemented select CondCode " + Twine(CC));
   }
@@ -724,13 +712,13 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   //
   // We simply do extension for all situations in this method, but we will
   // try to remove those unnecessary in BPFMIPeephole pass.
-  if (is32BitCmp)
+  if (is32BitCmp && !HasJmp32)
     LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp);
 
   if (isSelectRROp) {
     unsigned RHS = MI.getOperand(2).getReg();
 
-    if (is32BitCmp)
+    if (is32BitCmp && !HasJmp32)
       RHS = EmitSubregExt(MI, BB, RHS, isSignedCmp);
 
     BuildMI(BB, DL, TII.get(NewCC)).addReg(LHS).addReg(RHS).addMBB(Copy1MBB);
diff --git a/lib/Target/BPF/BPFISelLowering.h b/lib/Target/BPF/BPFISelLowering.h
index 0aa8b9ac57ac..b81bf4e1320d 100644
--- a/lib/Target/BPF/BPFISelLowering.h
+++ b/lib/Target/BPF/BPFISelLowering.h
@@ -1,9 +1,8 @@
 //===-- BPFISelLowering.h - BPF DAG Lowering Interface ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -56,6 +55,7 @@ public:
                               MachineBasicBlock *BB) const override;
 
   bool getHasAlu32() const { return HasAlu32; }
+  bool getHasJmp32() const { return HasJmp32; }
   bool getHasJmpExt() const { return HasJmpExt; }
 
   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
@@ -66,6 +66,7 @@ public:
 private:
   // Control Instruction Selection Features
   bool HasAlu32;
+  bool HasJmp32;
   bool HasJmpExt;
 
   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
@@ -100,7 +101,7 @@ private:
 
   EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
                           bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
-                          MachineFunction &MF) const override {
+                          const AttributeList &FuncAttributes) const override {
     return Size >= 8 ? MVT::i64 : MVT::i32;
   }
 
diff --git a/lib/Target/BPF/BPFInstrFormats.td b/lib/Target/BPF/BPFInstrFormats.td
index 92d4a62fd875..9f00dc85d789 100644
--- a/lib/Target/BPF/BPFInstrFormats.td
+++ b/lib/Target/BPF/BPFInstrFormats.td
@@ -1,9 +1,8 @@
 //===-- BPFInstrFormats.td - BPF Instruction Formats -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -17,6 +16,7 @@ def BPF_ST    : BPFOpClass<0x2>;
 def BPF_STX   : BPFOpClass<0x3>;
 def BPF_ALU   : BPFOpClass<0x4>;
 def BPF_JMP   : BPFOpClass<0x5>;
+def BPF_JMP32 : BPFOpClass<0x6>;
 def BPF_ALU64 : BPFOpClass<0x7>;
 
 class BPFSrcType<bits<1> val> {
diff --git a/lib/Target/BPF/BPFInstrInfo.cpp b/lib/Target/BPF/BPFInstrInfo.cpp
index 4d47debdaa74..932f718d5490 100644
--- a/lib/Target/BPF/BPFInstrInfo.cpp
+++ b/lib/Target/BPF/BPFInstrInfo.cpp
@@ -1,9 +1,8 @@
 //===-- BPFInstrInfo.cpp - BPF Instruction Information ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/BPF/BPFInstrInfo.h b/lib/Target/BPF/BPFInstrInfo.h
index fb65a86a6d18..e4bd757da560 100644
--- a/lib/Target/BPF/BPFInstrInfo.h
+++ b/lib/Target/BPF/BPFInstrInfo.h
@@ -1,9 +1,8 @@
 //===-- BPFInstrInfo.h - BPF Instruction Information ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td
index aaef5fb706e0..c44702a78ec8 100644
--- a/lib/Target/BPF/BPFInstrInfo.td
+++ b/lib/Target/BPF/BPFInstrInfo.td
@@ -1,9 +1,8 @@
 //===-- BPFInstrInfo.td - Target Description for BPF Target ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -102,6 +101,26 @@ def BPF_CC_LTU : PatLeaf<(i64 imm),
                          [{return (N->getZExtValue() == ISD::SETULT);}]>;
 def BPF_CC_LEU : PatLeaf<(i64 imm),
                          [{return (N->getZExtValue() == ISD::SETULE);}]>;
+def BPF_CC_EQ_32  : PatLeaf<(i32 imm),
+                         [{return (N->getZExtValue() == ISD::SETEQ);}]>;
+def BPF_CC_NE_32  : PatLeaf<(i32 imm),
+                         [{return (N->getZExtValue() == ISD::SETNE);}]>;
+def BPF_CC_GE_32  : PatLeaf<(i32 imm),
+                         [{return (N->getZExtValue() == ISD::SETGE);}]>;
+def BPF_CC_GT_32  : PatLeaf<(i32 imm),
+                         [{return (N->getZExtValue() == ISD::SETGT);}]>;
+def BPF_CC_GTU_32 : PatLeaf<(i32 imm),
+                         [{return (N->getZExtValue() == ISD::SETUGT);}]>;
+def BPF_CC_GEU_32 : PatLeaf<(i32 imm),
+                         [{return (N->getZExtValue() == ISD::SETUGE);}]>;
+def BPF_CC_LE_32  : PatLeaf<(i32 imm),
+                         [{return (N->getZExtValue() == ISD::SETLE);}]>;
+def BPF_CC_LT_32  : PatLeaf<(i32 imm),
+                         [{return (N->getZExtValue() == ISD::SETLT);}]>;
+def BPF_CC_LTU_32 : PatLeaf<(i32 imm),
+                         [{return (N->getZExtValue() == ISD::SETULT);}]>;
+def BPF_CC_LEU_32 : PatLeaf<(i32 imm),
+                         [{return (N->getZExtValue() == ISD::SETULE);}]>;
 
 // For arithmetic and jump instructions the 8-bit 'code'
 // field is divided into three parts:
@@ -167,23 +186,57 @@ class JMP_RI<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond>
   let BPFClass = BPF_JMP;
 }
 
-multiclass J<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond> {
+class JMP_RR_32<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond>
+    : TYPE_ALU_JMP<Opc.Value, BPF_X.Value,
+                   (outs),
+                   (ins GPR32:$dst, GPR32:$src, brtarget:$BrDst),
+                   "if $dst "#OpcodeStr#" $src goto $BrDst",
+                   [(BPFbrcc i32:$dst, i32:$src, Cond, bb:$BrDst)]> {
+  bits<4> dst;
+  bits<4> src;
+  bits<16> BrDst;
+
+  let Inst{55-52} = src;
+  let Inst{51-48} = dst;
+  let Inst{47-32} = BrDst;
+  let BPFClass = BPF_JMP32;
+}
+
+class JMP_RI_32<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond>
+    : TYPE_ALU_JMP<Opc.Value, BPF_K.Value,
+                   (outs),
+                   (ins GPR32:$dst, i32imm:$imm, brtarget:$BrDst),
+                   "if $dst "#OpcodeStr#" $imm goto $BrDst",
+                   [(BPFbrcc i32:$dst, i32immSExt32:$imm, Cond, bb:$BrDst)]> {
+  bits<4> dst;
+  bits<16> BrDst;
+  bits<32> imm;
+
+  let Inst{51-48} = dst;
+  let Inst{47-32} = BrDst;
+  let Inst{31-0} = imm;
+  let BPFClass = BPF_JMP32;
+}
+
+multiclass J<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond, PatLeaf Cond32> {
   def _rr : JMP_RR<Opc, OpcodeStr, Cond>;
   def _ri : JMP_RI<Opc, OpcodeStr, Cond>;
+  def _rr_32 : JMP_RR_32<Opc, OpcodeStr, Cond32>;
+  def _ri_32 : JMP_RI_32<Opc, OpcodeStr, Cond32>;
 }
 
 let isBranch = 1, isTerminator = 1, hasDelaySlot=0 in {
 // cmp+goto instructions
-defm JEQ  : J<BPF_JEQ, "==",  BPF_CC_EQ>;
-defm JUGT : J<BPF_JGT, ">", BPF_CC_GTU>;
-defm JUGE : J<BPF_JGE, ">=", BPF_CC_GEU>;
-defm JNE  : J<BPF_JNE, "!=",  BPF_CC_NE>;
-defm JSGT : J<BPF_JSGT, "s>", BPF_CC_GT>;
-defm JSGE : J<BPF_JSGE, "s>=", BPF_CC_GE>;
-defm JULT : J<BPF_JLT, "<", BPF_CC_LTU>;
-defm JULE : J<BPF_JLE, "<=", BPF_CC_LEU>;
-defm JSLT : J<BPF_JSLT, "s<", BPF_CC_LT>;
-defm JSLE : J<BPF_JSLE, "s<=", BPF_CC_LE>;
+defm JEQ  : J<BPF_JEQ, "==",  BPF_CC_EQ, BPF_CC_EQ_32>;
+defm JUGT : J<BPF_JGT, ">", BPF_CC_GTU, BPF_CC_GTU_32>;
+defm JUGE : J<BPF_JGE, ">=", BPF_CC_GEU, BPF_CC_GEU_32>;
+defm JNE  : J<BPF_JNE, "!=",  BPF_CC_NE, BPF_CC_NE_32>;
+defm JSGT : J<BPF_JSGT, "s>", BPF_CC_GT, BPF_CC_GT_32>;
+defm JSGE : J<BPF_JSGE, "s>=", BPF_CC_GE, BPF_CC_GE_32>;
+defm JULT : J<BPF_JLT, "<", BPF_CC_LTU, BPF_CC_LTU_32>;
+defm JULE : J<BPF_JLE, "<=", BPF_CC_LEU, BPF_CC_LEU_32>;
+defm JSLT : J<BPF_JSLT, "s<", BPF_CC_LT, BPF_CC_LT_32>;
+defm JSLE : J<BPF_JSLE, "s<=", BPF_CC_LE, BPF_CC_LE_32>;
 }
 
 // ALU instructions
@@ -561,11 +614,31 @@ class XADD<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
   let BPFClass = BPF_STX;
 }
 
+class XADD32<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
+    : TYPE_LD_ST<BPF_XADD.Value, SizeOp.Value,
+                 (outs GPR32:$dst),
+                 (ins MEMri:$addr, GPR32:$val),
+                 "lock *("#OpcodeStr#" *)($addr) += $val",
+                 [(set GPR32:$dst, (OpNode ADDRri:$addr, GPR32:$val))]> {
+  bits<4> dst;
+  bits<20> addr;
+
+  let Inst{51-48} = addr{19-16}; // base reg
+  let Inst{55-52} = dst;
+  let Inst{47-32} = addr{15-0}; // offset
+  let BPFClass = BPF_STX;
+}
+
 let Constraints = "$dst = $val" in {
-def XADD32 : XADD<BPF_W, "u32", atomic_load_add_32>;
-def XADD64 : XADD<BPF_DW, "u64", atomic_load_add_64>;
-// undefined def XADD16 : XADD<1, "xadd16", atomic_load_add_16>;
-// undefined def XADD8  : XADD<2, "xadd8", atomic_load_add_8>;
+  let Predicates = [BPFNoALU32] in {
+    def XADDW : XADD<BPF_W, "u32", atomic_load_add_32>;
+  }
+
+  let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
+    def XADDW32 : XADD32<BPF_W, "u32", atomic_load_add_32>;
+  }
+
+  def XADDD : XADD<BPF_DW, "u64", atomic_load_add_64>;
 }
 
 // bswap16, bswap32, bswap64
diff --git a/lib/Target/BPF/BPFMCInstLower.cpp b/lib/Target/BPF/BPFMCInstLower.cpp
index c8528e867310..846798a63cb7 100644
--- a/lib/Target/BPF/BPFMCInstLower.cpp
+++ b/lib/Target/BPF/BPFMCInstLower.cpp
@@ -1,9 +1,8 @@
 //=-- BPFMCInstLower.cpp - Convert BPF MachineInstr to an MCInst ------------=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/BPF/BPFMCInstLower.h b/lib/Target/BPF/BPFMCInstLower.h
index eac811f4cf88..0622d20814d3 100644
--- a/lib/Target/BPF/BPFMCInstLower.h
+++ b/lib/Target/BPF/BPFMCInstLower.h
@@ -1,9 +1,8 @@
 //===-- BPFMCInstLower.h - Lower MachineInstr to MCInst ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/BPF/BPFMIChecking.cpp b/lib/Target/BPF/BPFMIChecking.cpp
index 0a311378e777..4c46289656b4 100644
--- a/lib/Target/BPF/BPFMIChecking.cpp
+++ b/lib/Target/BPF/BPFMIChecking.cpp
@@ -1,9 +1,8 @@
 //===-------------- BPFMIChecking.cpp - MI Checking Legality -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -62,14 +61,107 @@ void BPFMIPreEmitChecking::initialize(MachineFunction &MFParm) {
   LLVM_DEBUG(dbgs() << "*** BPF PreEmit checking pass ***\n\n");
 }
 
+// Make sure all Defs of XADD are dead, meaning any result of XADD insn is not
+// used.
+//
+// NOTE: BPF backend hasn't enabled sub-register liveness track, so when the
+// source and destination operands of XADD are GPR32, there is no sub-register
+// dead info. If we rely on the generic MachineInstr::allDefsAreDead, then we
+// will raise false alarm on GPR32 Def.
+//
+// To support GPR32 Def, ideally we could just enable sub-registr liveness track
+// on BPF backend, then allDefsAreDead could work on GPR32 Def. This requires
+// implementing TargetSubtargetInfo::enableSubRegLiveness on BPF.
+//
+// However, sub-register liveness tracking module inside LLVM is actually
+// designed for the situation where one register could be split into more than
+// one sub-registers for which case each sub-register could have their own
+// liveness and kill one of them doesn't kill others. So, tracking liveness for
+// each make sense.
+//
+// For BPF, each 64-bit register could only have one 32-bit sub-register. This
+// is exactly the case which LLVM think brings no benefits for doing
+// sub-register tracking, because the live range of sub-register must always
+// equal to its parent register, therefore liveness tracking is disabled even
+// the back-end has implemented enableSubRegLiveness. The detailed information
+// is at r232695:
+//
+//   Author: Matthias Braun <matze@braunis.de>
+//   Date:   Thu Mar 19 00:21:58 2015 +0000
+//   Do not track subregister liveness when it brings no benefits
+//
+// Hence, for BPF, we enhance MachineInstr::allDefsAreDead. Given the solo
+// sub-register always has the same liveness as its parent register, LLVM is
+// already attaching a implicit 64-bit register Def whenever the there is
+// a sub-register Def. The liveness of the implicit 64-bit Def is available.
+// For example, for "lock *(u32 *)(r0 + 4) += w9", the MachineOperand info could
+// be:
+//
+//   $w9 = XADDW32 killed $r0, 4, $w9(tied-def 0),
+//                        implicit killed $r9, implicit-def dead $r9
+//
+// Even though w9 is not marked as Dead, the parent register r9 is marked as
+// Dead correctly, and it is safe to use such information or our purpose.
+static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
+  const MCRegisterClass *GPR64RegClass =
+    &BPFMCRegisterClasses[BPF::GPRRegClassID];
+  std::vector<unsigned> GPR32LiveDefs;
+  std::vector<unsigned> GPR64DeadDefs;
+
+  for (const MachineOperand &MO : MI.operands()) {
+    bool RegIsGPR64;
+
+    if (!MO.isReg() || MO.isUse())
+      continue;
+
+    RegIsGPR64 = GPR64RegClass->contains(MO.getReg());
+    if (!MO.isDead()) {
+      // It is a GPR64 live Def, we are sure it is live. */
+      if (RegIsGPR64)
+        return true;
+      // It is a GPR32 live Def, we are unsure whether it is really dead due to
+      // no sub-register liveness tracking. Push it to vector for deferred
+      // check.
+      GPR32LiveDefs.push_back(MO.getReg());
+      continue;
+    }
+
+    // Record any GPR64 dead Def as some unmarked GPR32 could be alias of its
+    // low 32-bit.
+    if (RegIsGPR64)
+      GPR64DeadDefs.push_back(MO.getReg());
+  }
+
+  // No GPR32 live Def, safe to return false.
+  if (GPR32LiveDefs.empty())
+    return false;
+
+  // No GPR64 dead Def, so all those GPR32 live Def can't have alias, therefore
+  // must be truely live, safe to return true.
+  if (GPR64DeadDefs.empty())
+    return true;
+
+  // Otherwise, return true if any aliased SuperReg of GPR32 is not dead.
+  std::vector<unsigned>::iterator search_begin = GPR64DeadDefs.begin();
+  std::vector<unsigned>::iterator search_end = GPR64DeadDefs.end();
+  for (auto I : GPR32LiveDefs)
+    for (MCSuperRegIterator SR(I, TRI); SR.isValid(); ++SR)
+       if (std::find(search_begin, search_end, *SR) == search_end)
+         return true;
+
+  return false;
+}
+
 void BPFMIPreEmitChecking::checkingIllegalXADD(void) {
   for (MachineBasicBlock &MBB : *MF) {
     for (MachineInstr &MI : MBB) {
-      if (MI.getOpcode() != BPF::XADD32 && MI.getOpcode() != BPF::XADD64)
+      if (MI.getOpcode() != BPF::XADDW &&
+          MI.getOpcode() != BPF::XADDD &&
+          MI.getOpcode() != BPF::XADDW32)
         continue;
 
       LLVM_DEBUG(MI.dump());
-      if (!MI.allDefsAreDead()) {
+      if (hasLiveDefs(MI, TRI)) {
         DebugLoc Empty;
         const DebugLoc &DL = MI.getDebugLoc();
         if (DL != Empty)
diff --git a/lib/Target/BPF/BPFMIPeephole.cpp b/lib/Target/BPF/BPFMIPeephole.cpp
index 9e984d0facfb..156ba793e359 100644
--- a/lib/Target/BPF/BPFMIPeephole.cpp
+++ b/lib/Target/BPF/BPFMIPeephole.cpp
@@ -1,9 +1,8 @@
 //===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups  -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/lib/Target/BPF/BPFMISimplifyPatchable.cpp
new file mode 100644
index 000000000000..e9114d7187e3
--- /dev/null
+++ b/lib/Target/BPF/BPFMISimplifyPatchable.cpp
@@ -0,0 +1,163 @@
+//===----- BPFMISimplifyPatchable.cpp - MI Simplify Patchable Insts -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass targets a subset of instructions like below
+//    ld_imm64 r1, @global
+//    ldd r2, r1, 0
+//    add r3, struct_base_reg, r2
+//
+// Here @global should either present a AMA (abstruct member access) or
+// a patchable extern variable. And these two kinds of accesses
+// are subject to bpf load time patching. After this pass, the
+// code becomes
+//    ld_imm64 r1, @global
+//    add r3, struct_base_reg, r1
+//
+// Eventually, at BTF output stage, a relocation record will be generated
+// for ld_imm64 which should be replaced later by bpf loader:
+//    r1 = <calculated offset> or <to_be_patched_extern_val>
+//    add r3, struct_base_reg, r1
+// or
+//    ld_imm64 r1, <to_be_patched_extern_val>
+//    add r3, struct_base_reg, r1
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "BPFCORE.h"
+#include "BPFInstrInfo.h"
+#include "BPFTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "bpf-mi-simplify-patchable"
+
+namespace {
+
+struct BPFMISimplifyPatchable : public MachineFunctionPass {
+
+  static char ID;
+  const BPFInstrInfo *TII;
+  MachineFunction *MF;
+
+  BPFMISimplifyPatchable() : MachineFunctionPass(ID) {
+    initializeBPFMISimplifyPatchablePass(*PassRegistry::getPassRegistry());
+  }
+
+private:
+  // Initialize class variables.
+  void initialize(MachineFunction &MFParm);
+
+  bool removeLD(void);
+
+public:
+  // Main entry point for this pass.
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    if (!skipFunction(MF.getFunction())) {
+      initialize(MF);
+    }
+    return removeLD();
+  }
+};
+
+// Initialize class variables.
+void BPFMISimplifyPatchable::initialize(MachineFunction &MFParm) {
+  MF = &MFParm;
+  TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
+  LLVM_DEBUG(dbgs() << "*** BPF simplify patchable insts pass ***\n\n");
+}
+
+/// Remove unneeded Load instructions.
+bool BPFMISimplifyPatchable::removeLD() {
+  MachineRegisterInfo *MRI = &MF->getRegInfo();
+  MachineInstr *ToErase = nullptr;
+  bool Changed = false;
+
+  for (MachineBasicBlock &MBB : *MF) {
+    for (MachineInstr &MI : MBB) {
+      if (ToErase) {
+        ToErase->eraseFromParent();
+        ToErase = nullptr;
+      }
+
+      // Ensure the register format is LOAD <reg>, <reg>, 0
+      if (MI.getOpcode() != BPF::LDD && MI.getOpcode() != BPF::LDW &&
+          MI.getOpcode() != BPF::LDH && MI.getOpcode() != BPF::LDB &&
+          MI.getOpcode() != BPF::LDW32 && MI.getOpcode() != BPF::LDH32 &&
+          MI.getOpcode() != BPF::LDB32)
+        continue;
+
+      if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg())
+        continue;
+
+      if (!MI.getOperand(2).isImm() || MI.getOperand(2).getImm())
+        continue;
+
+      unsigned DstReg = MI.getOperand(0).getReg();
+      unsigned SrcReg = MI.getOperand(1).getReg();
+      int64_t ImmVal = MI.getOperand(2).getImm();
+
+      MachineInstr *DefInst = MRI->getUniqueVRegDef(SrcReg);
+      if (!DefInst)
+        continue;
+
+      bool IsCandidate = false;
+      if (DefInst->getOpcode() == BPF::LD_imm64) {
+        const MachineOperand &MO = DefInst->getOperand(1);
+        if (MO.isGlobal()) {
+          const GlobalValue *GVal = MO.getGlobal();
+          auto *GVar = dyn_cast<GlobalVariable>(GVal);
+          if (GVar) {
+            // Global variables representing structure offset or
+            // patchable extern globals.
+            if (GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) {
+              assert(ImmVal == 0);
+              IsCandidate = true;
+            } else if (!GVar->hasInitializer() && GVar->hasExternalLinkage() &&
+                       GVar->getSection() ==
+                           BPFCoreSharedInfo::PatchableExtSecName) {
+              if (ImmVal == 0)
+                IsCandidate = true;
+              else
+                errs() << "WARNING: unhandled patchable extern "
+                       << GVar->getName() << " with load offset " << ImmVal
+                       << "\n";
+            }
+          }
+        }
+      }
+
+      if (!IsCandidate)
+        continue;
+
+      auto Begin = MRI->use_begin(DstReg), End = MRI->use_end();
+      decltype(End) NextI;
+      for (auto I = Begin; I != End; I = NextI) {
+        NextI = std::next(I);
+        I->setReg(SrcReg);
+      }
+
+      ToErase = &MI;
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
+} // namespace
+
+INITIALIZE_PASS(BPFMISimplifyPatchable, DEBUG_TYPE,
+                "BPF PreEmit SimplifyPatchable", false, false)
+
+char BPFMISimplifyPatchable::ID = 0;
+FunctionPass *llvm::createBPFMISimplifyPatchablePass() {
+  return new BPFMISimplifyPatchable();
+}
diff --git a/lib/Target/BPF/BPFRegisterInfo.cpp b/lib/Target/BPF/BPFRegisterInfo.cpp
index 635c11113151..714af06e11d9 100644
--- a/lib/Target/BPF/BPFRegisterInfo.cpp
+++ b/lib/Target/BPF/BPFRegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===-- BPFRegisterInfo.cpp - BPF Register Information ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -122,6 +121,6 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   }
 }
 
-unsigned BPFRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register BPFRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   return BPF::R10;
 }
diff --git a/lib/Target/BPF/BPFRegisterInfo.h b/lib/Target/BPF/BPFRegisterInfo.h
index 4202850e9eb9..e7b870b720a4 100644
--- a/lib/Target/BPF/BPFRegisterInfo.h
+++ b/lib/Target/BPF/BPFRegisterInfo.h
@@ -1,9 +1,8 @@
 //===-- BPFRegisterInfo.h - BPF Register Information Impl -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -33,7 +32,7 @@ struct BPFRegisterInfo : public BPFGenRegisterInfo {
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
+  Register getFrameRegister(const MachineFunction &MF) const override;
 };
 }
 
diff --git a/lib/Target/BPF/BPFRegisterInfo.td b/lib/Target/BPF/BPFRegisterInfo.td
index da1d6b505f84..88dec063be70 100644
--- a/lib/Target/BPF/BPFRegisterInfo.td
+++ b/lib/Target/BPF/BPFRegisterInfo.td
@@ -1,9 +1,8 @@
 //===-- BPFRegisterInfo.td - BPF Register defs -------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/BPF/BPFSelectionDAGInfo.cpp b/lib/Target/BPF/BPFSelectionDAGInfo.cpp
index 24d5f59bbfd7..a711294048ba 100644
--- a/lib/Target/BPF/BPFSelectionDAGInfo.cpp
+++ b/lib/Target/BPF/BPFSelectionDAGInfo.cpp
@@ -1,9 +1,8 @@
 //===-- BPFSelectionDAGInfo.cpp - BPF SelectionDAG Info -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/BPF/BPFSelectionDAGInfo.h b/lib/Target/BPF/BPFSelectionDAGInfo.h
index 19d3c5769573..fb88c32ceb0c 100644
--- a/lib/Target/BPF/BPFSelectionDAGInfo.h
+++ b/lib/Target/BPF/BPFSelectionDAGInfo.h
@@ -1,9 +1,8 @@
 //===-- BPFSelectionDAGInfo.h - BPF SelectionDAG Info -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/BPF/BPFSubtarget.cpp b/lib/Target/BPF/BPFSubtarget.cpp
index 56780bd9d46f..ab3452501b95 100644
--- a/lib/Target/BPF/BPFSubtarget.cpp
+++ b/lib/Target/BPF/BPFSubtarget.cpp
@@ -1,9 +1,8 @@
 //===-- BPFSubtarget.cpp - BPF Subtarget Information ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -36,6 +35,7 @@ BPFSubtarget &BPFSubtarget::initializeSubtargetDependencies(StringRef CPU,
 
 void BPFSubtarget::initializeEnvironment() {
   HasJmpExt = false;
+  HasJmp32 = false;
   HasAlu32 = false;
   UseDwarfRIS = false;
 }
@@ -49,6 +49,11 @@ void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
     HasJmpExt = true;
     return;
   }
+  if (CPU == "v3") {
+    HasJmpExt = true;
+    HasJmp32 = true;
+    return;
+  }
 }
 
 BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU,
diff --git a/lib/Target/BPF/BPFSubtarget.h b/lib/Target/BPF/BPFSubtarget.h
index 60e56435fe4c..3da6a026ab7e 100644
--- a/lib/Target/BPF/BPFSubtarget.h
+++ b/lib/Target/BPF/BPFSubtarget.h
@@ -1,9 +1,8 @@
 //===-- BPFSubtarget.h - Define Subtarget for the BPF -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -48,6 +47,10 @@ protected:
   // whether the cpu supports jmp ext
   bool HasJmpExt;
 
+  // whether the cpu supports jmp32 ext.
+  // NOTE: jmp32 is not enabled when alu32 enabled.
+  bool HasJmp32;
+
   // whether the cpu supports alu32 instructions.
   bool HasAlu32;
 
@@ -66,6 +69,7 @@ public:
   // subtarget options.  Definition of function is auto generated by tblgen.
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
   bool getHasJmpExt() const { return HasJmpExt; }
+  bool getHasJmp32() const { return HasJmp32; }
   bool getHasAlu32() const { return HasAlu32; }
   bool getUseDwarfRIS() const { return UseDwarfRIS; }
 
diff --git a/lib/Target/BPF/BPFTargetMachine.cpp b/lib/Target/BPF/BPFTargetMachine.cpp
index 350465b118ed..24c0ff0f7f15 100644
--- a/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/lib/Target/BPF/BPFTargetMachine.cpp
@@ -1,9 +1,8 @@
 //===-- BPFTargetMachine.cpp - Define TargetMachine for BPF ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,6 +13,7 @@
 #include "BPFTargetMachine.h"
 #include "BPF.h"
 #include "MCTargetDesc/BPFMCAsmInfo.h"
+#include "TargetInfo/BPFTargetInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -34,6 +34,7 @@ extern "C" void LLVMInitializeBPFTarget() {
   RegisterTargetMachine<BPFTargetMachine> Z(getTheBPFTarget());
 
   PassRegistry &PR = *PassRegistry::getPassRegistry();
+  initializeBPFAbstractMemberAccessPass(PR);
   initializeBPFMIPeepholePass(PR);
 }
 
@@ -68,6 +69,7 @@ BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT,
       static_cast<BPFMCAsmInfo *>(const_cast<MCAsmInfo *>(AsmInfo.get()));
   MAI->setDwarfUsesRelocationsAcrossSections(!Subtarget.getUseDwarfRIS());
 }
+
 namespace {
 // BPF Code Generator Pass Configuration Options.
 class BPFPassConfig : public TargetPassConfig {
@@ -79,6 +81,7 @@ public:
     return getTM<BPFTargetMachine>();
   }
 
+  void addIRPasses() override;
   bool addInstSelector() override;
   void addMachineSSAOptimization() override;
   void addPreEmitPass() override;
@@ -89,6 +92,13 @@ TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) {
   return new BPFPassConfig(*this, PM);
 }
 
+void BPFPassConfig::addIRPasses() {
+
+  addPass(createBPFAbstractMemberAccess());
+
+  TargetPassConfig::addIRPasses();
+}
+
 // Install an instruction selector pass using
 // the ISelDag to gen BPF code.
 bool BPFPassConfig::addInstSelector() {
@@ -98,6 +108,8 @@ bool BPFPassConfig::addInstSelector() {
 }
 
 void BPFPassConfig::addMachineSSAOptimization() {
+  addPass(createBPFMISimplifyPatchablePass());
+
   // The default implementation must be called first as we want eBPF
   // Peephole ran at last.
   TargetPassConfig::addMachineSSAOptimization();
diff --git a/lib/Target/BPF/BPFTargetMachine.h b/lib/Target/BPF/BPFTargetMachine.h
index a560dd27335a..beac7bd862da 100644
--- a/lib/Target/BPF/BPFTargetMachine.h
+++ b/lib/Target/BPF/BPFTargetMachine.h
@@ -1,9 +1,8 @@
 //===-- BPFTargetMachine.h - Define TargetMachine for BPF --- C++ ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/BPF/BTF.def b/lib/Target/BPF/BTF.def
index 54c5bc3cf092..2d2e9a04aa6d 100644
--- a/lib/Target/BPF/BTF.def
+++ b/lib/Target/BPF/BTF.def
@@ -1,9 +1,8 @@
 //===- BTF.def - BTF definitions --------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -29,5 +28,7 @@ HANDLE_BTF_KIND(10, CONST)
 HANDLE_BTF_KIND(11, RESTRICT)
 HANDLE_BTF_KIND(12, FUNC)
 HANDLE_BTF_KIND(13, FUNC_PROTO)
+HANDLE_BTF_KIND(14, VAR)
+HANDLE_BTF_KIND(15, DATASEC)
 
 #undef HANDLE_BTF_KIND
diff --git a/lib/Target/BPF/BTF.h b/lib/Target/BPF/BTF.h
index 1e1680faf1b8..ad56716710a6 100644
--- a/lib/Target/BPF/BTF.h
+++ b/lib/Target/BPF/BTF.h
@@ -1,9 +1,8 @@
 //===-- BTF.h --------------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -18,7 +17,7 @@
 ///
 /// The binary layout for .BTF.ext section:
 ///   struct ExtHeader
-///   FuncInfo and LineInfo subsections
+///   FuncInfo, LineInfo, OffsetReloc and ExternReloc subsections
 /// The FuncInfo subsection is defined as below:
 ///   BTFFuncInfo Size
 ///   struct SecFuncInfo for ELF section #1
@@ -33,6 +32,20 @@
 ///   struct SecLineInfo for ELF section #2
 ///   A number of struct BPFLineInfo for ELF section #2
 ///   ...
+/// The OffsetReloc subsection is defined as below:
+///   BPFOffsetReloc Size
+///   struct SecOffsetReloc for ELF section #1
+///   A number of struct BPFOffsetReloc for ELF section #1
+///   struct SecOffsetReloc for ELF section #2
+///   A number of struct BPFOffsetReloc for ELF section #2
+///   ...
+/// The ExternReloc subsection is defined as below:
+///   BPFExternReloc Size
+///   struct SecExternReloc for ELF section #1
+///   A number of struct BPFExternReloc for ELF section #1
+///   struct SecExternReloc for ELF section #2
+///   A number of struct BPFExternReloc for ELF section #2
+///   ...
 ///
 /// The section formats are also defined at
 ///    https://github.com/torvalds/linux/blob/master/include/uapi/linux/btf.h
@@ -50,16 +63,21 @@ enum : uint32_t { MAGIC = 0xeB9F, VERSION = 1 };
 /// Sizes in bytes of various things in the BTF format.
 enum {
   HeaderSize = 24,
-  ExtHeaderSize = 24,
+  ExtHeaderSize = 40,
   CommonTypeSize = 12,
   BTFArraySize = 12,
   BTFEnumSize = 8,
   BTFMemberSize = 12,
   BTFParamSize = 8,
+  BTFDataSecVarSize = 12,
   SecFuncInfoSize = 8,
   SecLineInfoSize = 8,
+  SecOffsetRelocSize = 8,
+  SecExternRelocSize = 8,
   BPFFuncInfoSize = 8,
-  BPFLineInfoSize = 16
+  BPFLineInfoSize = 16,
+  BPFOffsetRelocSize = 12,
+  BPFExternRelocSize = 8,
 };
 
 /// The .BTF section header definition.
@@ -77,7 +95,7 @@ struct Header {
 };
 
 enum : uint32_t {
-  MAX_VLEN = 0xffff         ///< Max # of struct/union/enum members or func args
+  MAX_VLEN = 0xffff ///< Max # of struct/union/enum members or func args
 };
 
 enum TypeKinds : uint8_t {
@@ -104,7 +122,7 @@ struct CommonType {
   /// "Size" tells the size of the type it is describing.
   ///
   /// "Type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
-  /// FUNC and FUNC_PROTO.
+  /// FUNC, FUNC_PROTO and VAR.
   /// "Type" is a type_id referring to another type.
   union {
     uint32_t Size;
@@ -122,7 +140,11 @@ struct CommonType {
 // BTF_INT_BITS(VAL) : ((VAL) & 0x000000ff)
 
 /// Attributes stored in the INT_ENCODING.
-enum : uint8_t { INT_SIGNED = (1 << 0), INT_CHAR = (1 << 1), INT_BOOL = (1 << 2) };
+enum : uint8_t {
+  INT_SIGNED = (1 << 0),
+  INT_CHAR = (1 << 1),
+  INT_BOOL = (1 << 2)
+};
 
 /// BTF_KIND_ENUM is followed by multiple "struct BTFEnum".
 /// The exact number of btf_enum is stored in the vlen (of the
@@ -163,6 +185,23 @@ struct BTFParam {
   uint32_t Type;
 };
 
+/// Variable scoping information.
+enum : uint8_t {
+  VAR_STATIC = 0,           ///< Linkage: InternalLinkage
+  VAR_GLOBAL_ALLOCATED = 1, ///< Linkage: ExternalLinkage
+  VAR_GLOBAL_TENTATIVE = 2, ///< Linkage: CommonLinkage
+  VAR_GLOBAL_EXTERNAL = 3,  ///< Linkage: ExternalLinkage
+};
+
+/// BTF_KIND_DATASEC are followed by multiple "struct BTFDataSecVar".
+/// The exist number of BTFDataSec is stored in the vlen (of the info
+/// in "struct CommonType").
+struct BTFDataSec {
+  uint32_t Type;   ///< A BTF_KIND_VAR type
+  uint32_t Offset; ///< In-section offset
+  uint32_t Size;   ///< Occupied memory size
+};
+
 /// The .BTF.ext section header definition.
 struct ExtHeader {
   uint16_t Magic;
@@ -170,10 +209,14 @@ struct ExtHeader {
   uint8_t Flags;
   uint32_t HdrLen;
 
-  uint32_t FuncInfoOff; ///< Offset of func info section
-  uint32_t FuncInfoLen; ///< Length of func info section
-  uint32_t LineInfoOff; ///< Offset of line info section
-  uint32_t LineInfoLen; ///< Length of line info section
+  uint32_t FuncInfoOff;    ///< Offset of func info section
+  uint32_t FuncInfoLen;    ///< Length of func info section
+  uint32_t LineInfoOff;    ///< Offset of line info section
+  uint32_t LineInfoLen;    ///< Length of line info section
+  uint32_t OffsetRelocOff; ///< Offset of offset reloc section
+  uint32_t OffsetRelocLen; ///< Length of offset reloc section
+  uint32_t ExternRelocOff; ///< Offset of extern reloc section
+  uint32_t ExternRelocLen; ///< Length of extern reloc section
 };
 
 /// Specifying one function info.
@@ -199,10 +242,35 @@ struct BPFLineInfo {
 
 /// Specifying line info's in one section.
 struct SecLineInfo {
-  uint32_t SecNameOff;  ///< Section name index in the .BTF string tble
+  uint32_t SecNameOff;  ///< Section name index in the .BTF string table
   uint32_t NumLineInfo; ///< Number of line info's in this section
 };
 
+/// Specifying one offset relocation.
+struct BPFOffsetReloc {
+  uint32_t InsnOffset;    ///< Byte offset in this section
+  uint32_t TypeID;        ///< TypeID for the relocation
+  uint32_t OffsetNameOff; ///< The string to traverse types
+};
+
+/// Specifying offset relocation's in one section.
+struct SecOffsetReloc {
+  uint32_t SecNameOff;     ///< Section name index in the .BTF string table
+  uint32_t NumOffsetReloc; ///< Number of offset reloc's in this section
+};
+
+/// Specifying one offset relocation.
+struct BPFExternReloc {
+  uint32_t InsnOffset;    ///< Byte offset in this section
+  uint32_t ExternNameOff; ///< The string for external variable
+};
+
+/// Specifying extern relocation's in one section.
+struct SecExternReloc {
+  uint32_t SecNameOff;     ///< Section name index in the .BTF string table
+  uint32_t NumExternReloc; ///< Number of extern reloc's in this section
+};
+
 } // End namespace BTF.
 } // End namespace llvm.
 
diff --git a/lib/Target/BPF/BTFDebug.cpp b/lib/Target/BPF/BTFDebug.cpp
index 96efea4ba8ee..fa35c6619e21 100644
--- a/lib/Target/BPF/BTFDebug.cpp
+++ b/lib/Target/BPF/BTFDebug.cpp
@@ -1,9 +1,8 @@
 //===- BTFDebug.cpp - BTF Generator ---------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,6 +11,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "BTFDebug.h"
+#include "BPF.h"
+#include "BPFCORE.h"
+#include "MCTargetDesc/BPFMCTargetDesc.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
@@ -19,8 +21,7 @@
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
-#include <fstream>
-#include <sstream>
+#include "llvm/Support/LineIterator.h"
 
 using namespace llvm;
 
@@ -39,8 +40,9 @@ void BTFTypeBase::emitType(MCStreamer &OS) {
   OS.EmitIntValue(BTFType.Size, 4);
 }
 
-BTFTypeDerived::BTFTypeDerived(const DIDerivedType *DTy, unsigned Tag)
-    : DTy(DTy) {
+BTFTypeDerived::BTFTypeDerived(const DIDerivedType *DTy, unsigned Tag,
+                               bool NeedsFixup)
+    : DTy(DTy), NeedsFixup(NeedsFixup) {
   switch (Tag) {
   case dwarf::DW_TAG_pointer_type:
     Kind = BTF::BTF_KIND_PTR;
@@ -64,10 +66,17 @@ BTFTypeDerived::BTFTypeDerived(const DIDerivedType *DTy, unsigned Tag)
 }
 
 void BTFTypeDerived::completeType(BTFDebug &BDebug) {
+  if (IsCompleted)
+    return;
+  IsCompleted = true;
+
   BTFType.NameOff = BDebug.addString(DTy->getName());
 
+  if (NeedsFixup)
+    return;
+
   // The base type for PTR/CONST/VOLATILE could be void.
-  const DIType *ResolvedType = DTy->getBaseType().resolve();
+  const DIType *ResolvedType = DTy->getBaseType();
   if (!ResolvedType) {
     assert((Kind == BTF::BTF_KIND_PTR || Kind == BTF::BTF_KIND_CONST ||
             Kind == BTF::BTF_KIND_VOLATILE) &&
@@ -80,6 +89,10 @@ void BTFTypeDerived::completeType(BTFDebug &BDebug) {
 
 void BTFTypeDerived::emitType(MCStreamer &OS) { BTFTypeBase::emitType(OS); }
 
+void BTFTypeDerived::setPointeeType(uint32_t PointeeType) {
+  BTFType.Type = PointeeType;
+}
+
 /// Represent a struct/union forward declaration.
 BTFTypeFwd::BTFTypeFwd(StringRef Name, bool IsUnion) : Name(Name) {
   Kind = BTF::BTF_KIND_FWD;
@@ -88,6 +101,10 @@ BTFTypeFwd::BTFTypeFwd(StringRef Name, bool IsUnion) : Name(Name) {
 }
 
 void BTFTypeFwd::completeType(BTFDebug &BDebug) {
+  if (IsCompleted)
+    return;
+  IsCompleted = true;
+
   BTFType.NameOff = BDebug.addString(Name);
 }
 
@@ -121,6 +138,10 @@ BTFTypeInt::BTFTypeInt(uint32_t Encoding, uint32_t SizeInBits,
 }
 
 void BTFTypeInt::completeType(BTFDebug &BDebug) {
+  if (IsCompleted)
+    return;
+  IsCompleted = true;
+
   BTFType.NameOff = BDebug.addString(Name);
 }
 
@@ -137,6 +158,10 @@ BTFTypeEnum::BTFTypeEnum(const DICompositeType *ETy, uint32_t VLen) : ETy(ETy) {
 }
 
 void BTFTypeEnum::completeType(BTFDebug &BDebug) {
+  if (IsCompleted)
+    return;
+  IsCompleted = true;
+
   BTFType.NameOff = BDebug.addString(ETy->getName());
 
   DINodeArray Elements = ETy->getElements();
@@ -159,45 +184,29 @@ void BTFTypeEnum::emitType(MCStreamer &OS) {
   }
 }
 
-BTFTypeArray::BTFTypeArray(const DICompositeType *ATy) : ATy(ATy) {
+BTFTypeArray::BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize,
+                           uint32_t NumElems)
+    : ElemSize(ElemSize) {
   Kind = BTF::BTF_KIND_ARRAY;
+  BTFType.NameOff = 0;
   BTFType.Info = Kind << 24;
+  BTFType.Size = 0;
+
+  ArrayInfo.ElemType = ElemTypeId;
+  ArrayInfo.Nelems = NumElems;
 }
 
-/// Represent a BTF array. BTF does not record array dimensions,
-/// so conceptually a BTF array is a one-dimensional array.
+/// Represent a BTF array.
 void BTFTypeArray::completeType(BTFDebug &BDebug) {
-  BTFType.NameOff = BDebug.addString(ATy->getName());
-  BTFType.Size = 0;
-
-  auto *BaseType = ATy->getBaseType().resolve();
-  ArrayInfo.ElemType = BDebug.getTypeId(BaseType);
+  if (IsCompleted)
+    return;
+  IsCompleted = true;
 
   // The IR does not really have a type for the index.
   // A special type for array index should have been
   // created during initial type traversal. Just
   // retrieve that type id.
   ArrayInfo.IndexType = BDebug.getArrayIndexTypeId();
-
-  // Get the number of array elements.
-  // If the array size is 0, set the number of elements as 0.
-  // Otherwise, recursively traverse the base types to
-  // find the element size. The number of elements is
-  // the totoal array size in bits divided by
-  // element size in bits.
-  uint64_t ArraySizeInBits = ATy->getSizeInBits();
-  if (!ArraySizeInBits) {
-    ArrayInfo.Nelems = 0;
-  } else {
-    uint32_t BaseTypeSize = BaseType->getSizeInBits();
-    while (!BaseTypeSize) {
-      const auto *DDTy = cast<DIDerivedType>(BaseType);
-      BaseType = DDTy->getBaseType().resolve();
-      assert(BaseType);
-      BaseTypeSize = BaseType->getSizeInBits();
-    }
-    ArrayInfo.Nelems = ATy->getSizeInBits() / BaseTypeSize;
-  }
 }
 
 void BTFTypeArray::emitType(MCStreamer &OS) {
@@ -207,6 +216,12 @@ void BTFTypeArray::emitType(MCStreamer &OS) {
   OS.EmitIntValue(ArrayInfo.Nelems, 4);
 }
 
+void BTFTypeArray::getLocInfo(uint32_t Loc, uint32_t &LocOffset,
+                              uint32_t &ElementTypeId) {
+  ElementTypeId = ArrayInfo.ElemType;
+  LocOffset = Loc * ElemSize;
+}
+
 /// Represent either a struct or a union.
 BTFTypeStruct::BTFTypeStruct(const DICompositeType *STy, bool IsStruct,
                              bool HasBitField, uint32_t Vlen)
@@ -217,6 +232,10 @@ BTFTypeStruct::BTFTypeStruct(const DICompositeType *STy, bool IsStruct,
 }
 
 void BTFTypeStruct::completeType(BTFDebug &BDebug) {
+  if (IsCompleted)
+    return;
+  IsCompleted = true;
+
   BTFType.NameOff = BDebug.addString(STy->getName());
 
   // Add struct/union members.
@@ -232,7 +251,7 @@ void BTFTypeStruct::completeType(BTFDebug &BDebug) {
     } else {
       BTFMember.Offset = DDTy->getOffsetInBits();
     }
-    BTFMember.Type = BDebug.getTypeId(DDTy->getBaseType().resolve());
+    BTFMember.Type = BDebug.getTypeId(DDTy->getBaseType());
     Members.push_back(BTFMember);
   }
 }
@@ -247,6 +266,17 @@ void BTFTypeStruct::emitType(MCStreamer &OS) {
   }
 }
 
+std::string BTFTypeStruct::getName() { return STy->getName(); }
+
+void BTFTypeStruct::getMemberInfo(uint32_t Loc, uint32_t &MemberOffset,
+                                  uint32_t &MemberType) {
+  MemberType = Members[Loc].Type;
+  MemberOffset =
+      HasBitField ? Members[Loc].Offset & 0xffffff : Members[Loc].Offset;
+}
+
+uint32_t BTFTypeStruct::getStructSize() { return STy->getSizeInBits() >> 3; }
+
 /// The Func kind represents both subprogram and pointee of function
 /// pointers. If the FuncName is empty, it represents a pointee of function
 /// pointer. Otherwise, it represents a subprogram. The func arg names
@@ -261,8 +291,12 @@ BTFTypeFuncProto::BTFTypeFuncProto(
 }
 
 void BTFTypeFuncProto::completeType(BTFDebug &BDebug) {
+  if (IsCompleted)
+    return;
+  IsCompleted = true;
+
   DITypeRefArray Elements = STy->getTypeArray();
-  auto RetType = Elements[0].resolve();
+  auto RetType = Elements[0];
   BTFType.Type = RetType ? BDebug.getTypeId(RetType) : 0;
   BTFType.NameOff = 0;
 
@@ -270,7 +304,7 @@ void BTFTypeFuncProto::completeType(BTFDebug &BDebug) {
   // to represent the vararg, encode the NameOff/Type to be 0.
   for (unsigned I = 1, N = Elements.size(); I < N; ++I) {
     struct BTF::BTFParam Param;
-    auto Element = Elements[I].resolve();
+    auto Element = Elements[I];
     if (Element) {
       Param.NameOff = BDebug.addString(FuncArgNames[I]);
       Param.Type = BDebug.getTypeId(Element);
@@ -298,11 +332,54 @@ BTFTypeFunc::BTFTypeFunc(StringRef FuncName, uint32_t ProtoTypeId)
 }
 
 void BTFTypeFunc::completeType(BTFDebug &BDebug) {
+  if (IsCompleted)
+    return;
+  IsCompleted = true;
+
   BTFType.NameOff = BDebug.addString(Name);
 }
 
 void BTFTypeFunc::emitType(MCStreamer &OS) { BTFTypeBase::emitType(OS); }
 
+BTFKindVar::BTFKindVar(StringRef VarName, uint32_t TypeId, uint32_t VarInfo)
+    : Name(VarName) {
+  Kind = BTF::BTF_KIND_VAR;
+  BTFType.Info = Kind << 24;
+  BTFType.Type = TypeId;
+  Info = VarInfo;
+}
+
+void BTFKindVar::completeType(BTFDebug &BDebug) {
+  BTFType.NameOff = BDebug.addString(Name);
+}
+
+void BTFKindVar::emitType(MCStreamer &OS) {
+  BTFTypeBase::emitType(OS);
+  OS.EmitIntValue(Info, 4);
+}
+
+BTFKindDataSec::BTFKindDataSec(AsmPrinter *AsmPrt, std::string SecName)
+    : Asm(AsmPrt), Name(SecName) {
+  Kind = BTF::BTF_KIND_DATASEC;
+  BTFType.Info = Kind << 24;
+  BTFType.Size = 0;
+}
+
+void BTFKindDataSec::completeType(BTFDebug &BDebug) {
+  BTFType.NameOff = BDebug.addString(Name);
+  BTFType.Info |= Vars.size();
+}
+
+void BTFKindDataSec::emitType(MCStreamer &OS) {
+  BTFTypeBase::emitType(OS);
+
+  for (const auto &V : Vars) {
+    OS.EmitIntValue(std::get<0>(V), 4);
+    Asm->EmitLabelReference(std::get<1>(V), 4);
+    OS.EmitIntValue(std::get<2>(V), 4);
+  }
+}
+
 uint32_t BTFStringTable::addString(StringRef S) {
   // Check whether the string already exists.
   for (auto &OffsetM : OffsetToIdMap) {
@@ -319,15 +396,18 @@ uint32_t BTFStringTable::addString(StringRef S) {
 
 BTFDebug::BTFDebug(AsmPrinter *AP)
     : DebugHandlerBase(AP), OS(*Asm->OutStreamer), SkipInstruction(false),
-      LineInfoGenerated(false), SecNameOff(0), ArrayIndexTypeId(0) {
+      LineInfoGenerated(false), SecNameOff(0), ArrayIndexTypeId(0),
+      MapDefNotCollected(true) {
   addString("\0");
 }
 
-void BTFDebug::addType(std::unique_ptr<BTFTypeBase> TypeEntry,
-                       const DIType *Ty) {
+uint32_t BTFDebug::addType(std::unique_ptr<BTFTypeBase> TypeEntry,
+                           const DIType *Ty) {
   TypeEntry->setId(TypeEntries.size() + 1);
-  DIToIdMap[Ty] = TypeEntry->getId();
+  uint32_t Id = TypeEntry->getId();
+  DIToIdMap[Ty] = Id;
   TypeEntries.push_back(std::move(TypeEntry));
+  return Id;
 }
 
 uint32_t BTFDebug::addType(std::unique_ptr<BTFTypeBase> TypeEntry) {
@@ -337,7 +417,7 @@ uint32_t BTFDebug::addType(std::unique_ptr<BTFTypeBase> TypeEntry) {
   return Id;
 }
 
-void BTFDebug::visitBasicType(const DIBasicType *BTy) {
+void BTFDebug::visitBasicType(const DIBasicType *BTy, uint32_t &TypeId) {
   // Only int types are supported in BTF.
   uint32_t Encoding = BTy->getEncoding();
   if (Encoding != dwarf::DW_ATE_boolean && Encoding != dwarf::DW_ATE_signed &&
@@ -350,7 +430,7 @@ void BTFDebug::visitBasicType(const DIBasicType *BTy) {
   // DIToIdMap for cross-type reference check.
   auto TypeEntry = llvm::make_unique<BTFTypeInt>(
       Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName());
-  addType(std::move(TypeEntry), BTy);
+  TypeId = addType(std::move(TypeEntry), BTy);
 }
 
 /// Handle subprogram or subroutine types.
@@ -371,16 +451,17 @@ void BTFDebug::visitSubroutineType(
   if (ForSubprog)
     TypeId = addType(std::move(TypeEntry)); // For subprogram
   else
-    addType(std::move(TypeEntry), STy); // For func ptr
+    TypeId = addType(std::move(TypeEntry), STy); // For func ptr
 
   // Visit return type and func arg types.
   for (const auto Element : Elements) {
-    visitTypeEntry(Element.resolve());
+    visitTypeEntry(Element);
   }
 }
 
 /// Handle structure/union types.
-void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct) {
+void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct,
+                               uint32_t &TypeId) {
   const DINodeArray Elements = CTy->getElements();
   uint32_t VLen = Elements.size();
   if (VLen > BTF::MAX_VLEN)
@@ -398,16 +479,49 @@ void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct) {
 
   auto TypeEntry =
       llvm::make_unique<BTFTypeStruct>(CTy, IsStruct, HasBitField, VLen);
-  addType(std::move(TypeEntry), CTy);
+  StructTypes.push_back(TypeEntry.get());
+  TypeId = addType(std::move(TypeEntry), CTy);
 
   // Visit all struct members.
   for (const auto *Element : Elements)
     visitTypeEntry(cast<DIDerivedType>(Element));
 }
 
-void BTFDebug::visitArrayType(const DICompositeType *CTy) {
-  auto TypeEntry = llvm::make_unique<BTFTypeArray>(CTy);
-  addType(std::move(TypeEntry), CTy);
+void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) {
+  // Visit array element type.
+  uint32_t ElemTypeId, ElemSize;
+  const DIType *ElemType = CTy->getBaseType();
+  visitTypeEntry(ElemType, ElemTypeId, false, false);
+  ElemSize = ElemType->getSizeInBits() >> 3;
+
+  if (!CTy->getSizeInBits()) {
+    auto TypeEntry = llvm::make_unique<BTFTypeArray>(ElemTypeId, 0, 0);
+    ArrayTypes.push_back(TypeEntry.get());
+    ElemTypeId = addType(std::move(TypeEntry), CTy);
+  } else {
+    // Visit array dimensions.
+    DINodeArray Elements = CTy->getElements();
+    for (int I = Elements.size() - 1; I >= 0; --I) {
+      if (auto *Element = dyn_cast_or_null<DINode>(Elements[I]))
+        if (Element->getTag() == dwarf::DW_TAG_subrange_type) {
+          const DISubrange *SR = cast<DISubrange>(Element);
+          auto *CI = SR->getCount().dyn_cast<ConstantInt *>();
+          int64_t Count = CI->getSExtValue();
+
+          auto TypeEntry =
+              llvm::make_unique<BTFTypeArray>(ElemTypeId, ElemSize, Count);
+          ArrayTypes.push_back(TypeEntry.get());
+          if (I == 0)
+            ElemTypeId = addType(std::move(TypeEntry), CTy);
+          else
+            ElemTypeId = addType(std::move(TypeEntry));
+          ElemSize = ElemSize * Count;
+        }
+    }
+  }
+
+  // The array TypeId is the type id of the outermost dimension.
+  TypeId = ElemTypeId;
 
   // The IR does not have a type for array index while BTF wants one.
   // So create an array index type if there is none.
@@ -416,85 +530,162 @@ void BTFDebug::visitArrayType(const DICompositeType *CTy) {
                                                    0, "__ARRAY_SIZE_TYPE__");
     ArrayIndexTypeId = addType(std::move(TypeEntry));
   }
-
-  // Visit array element type.
-  visitTypeEntry(CTy->getBaseType().resolve());
 }
 
-void BTFDebug::visitEnumType(const DICompositeType *CTy) {
+void BTFDebug::visitEnumType(const DICompositeType *CTy, uint32_t &TypeId) {
   DINodeArray Elements = CTy->getElements();
   uint32_t VLen = Elements.size();
   if (VLen > BTF::MAX_VLEN)
     return;
 
   auto TypeEntry = llvm::make_unique<BTFTypeEnum>(CTy, VLen);
-  addType(std::move(TypeEntry), CTy);
+  TypeId = addType(std::move(TypeEntry), CTy);
   // No need to visit base type as BTF does not encode it.
 }
 
 /// Handle structure/union forward declarations.
-void BTFDebug::visitFwdDeclType(const DICompositeType *CTy, bool IsUnion) {
+void BTFDebug::visitFwdDeclType(const DICompositeType *CTy, bool IsUnion,
+                                uint32_t &TypeId) {
   auto TypeEntry = llvm::make_unique<BTFTypeFwd>(CTy->getName(), IsUnion);
-  addType(std::move(TypeEntry), CTy);
+  TypeId = addType(std::move(TypeEntry), CTy);
 }
 
 /// Handle structure, union, array and enumeration types.
-void BTFDebug::visitCompositeType(const DICompositeType *CTy) {
+void BTFDebug::visitCompositeType(const DICompositeType *CTy,
+                                  uint32_t &TypeId) {
   auto Tag = CTy->getTag();
   if (Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) {
     // Handle forward declaration differently as it does not have members.
     if (CTy->isForwardDecl())
-      visitFwdDeclType(CTy, Tag == dwarf::DW_TAG_union_type);
+      visitFwdDeclType(CTy, Tag == dwarf::DW_TAG_union_type, TypeId);
     else
-      visitStructType(CTy, Tag == dwarf::DW_TAG_structure_type);
+      visitStructType(CTy, Tag == dwarf::DW_TAG_structure_type, TypeId);
   } else if (Tag == dwarf::DW_TAG_array_type)
-    visitArrayType(CTy);
+    visitArrayType(CTy, TypeId);
   else if (Tag == dwarf::DW_TAG_enumeration_type)
-    visitEnumType(CTy);
+    visitEnumType(CTy, TypeId);
 }
 
 /// Handle pointer, typedef, const, volatile, restrict and member types.
-void BTFDebug::visitDerivedType(const DIDerivedType *DTy) {
+void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId,
+                                bool CheckPointer, bool SeenPointer) {
   unsigned Tag = DTy->getTag();
 
+  /// Try to avoid chasing pointees, esp. structure pointees which may
+  /// unnecessary bring in a lot of types.
+  if (CheckPointer && !SeenPointer) {
+    SeenPointer = Tag == dwarf::DW_TAG_pointer_type;
+  }
+
+  if (CheckPointer && SeenPointer) {
+    const DIType *Base = DTy->getBaseType();
+    if (Base) {
+      if (const auto *CTy = dyn_cast<DICompositeType>(Base)) {
+        auto CTag = CTy->getTag();
+        if ((CTag == dwarf::DW_TAG_structure_type ||
+             CTag == dwarf::DW_TAG_union_type) &&
+            !CTy->isForwardDecl()) {
+          /// Find a candidate, generate a fixup. Later on the struct/union
+          /// pointee type will be replaced with either a real type or
+          /// a forward declaration.
+          auto TypeEntry = llvm::make_unique<BTFTypeDerived>(DTy, Tag, true);
+          auto &Fixup = FixupDerivedTypes[CTy->getName()];
+          Fixup.first = CTag == dwarf::DW_TAG_union_type;
+          Fixup.second.push_back(TypeEntry.get());
+          TypeId = addType(std::move(TypeEntry), DTy);
+          return;
+        }
+      }
+    }
+  }
+
   if (Tag == dwarf::DW_TAG_pointer_type || Tag == dwarf::DW_TAG_typedef ||
       Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
       Tag == dwarf::DW_TAG_restrict_type) {
-    auto TypeEntry = llvm::make_unique<BTFTypeDerived>(DTy, Tag);
-    addType(std::move(TypeEntry), DTy);
+    auto TypeEntry = llvm::make_unique<BTFTypeDerived>(DTy, Tag, false);
+    TypeId = addType(std::move(TypeEntry), DTy);
   } else if (Tag != dwarf::DW_TAG_member) {
     return;
   }
 
   // Visit base type of pointer, typedef, const, volatile, restrict or
   // struct/union member.
-  visitTypeEntry(DTy->getBaseType().resolve());
+  uint32_t TempTypeId = 0;
+  if (Tag == dwarf::DW_TAG_member)
+    visitTypeEntry(DTy->getBaseType(), TempTypeId, true, false);
+  else
+    visitTypeEntry(DTy->getBaseType(), TempTypeId, CheckPointer, SeenPointer);
 }
 
-void BTFDebug::visitTypeEntry(const DIType *Ty) {
-  if (!Ty || DIToIdMap.find(Ty) != DIToIdMap.end())
+void BTFDebug::visitTypeEntry(const DIType *Ty, uint32_t &TypeId,
+                              bool CheckPointer, bool SeenPointer) {
+  if (!Ty || DIToIdMap.find(Ty) != DIToIdMap.end()) {
+    TypeId = DIToIdMap[Ty];
     return;
+  }
 
-  uint32_t TypeId;
   if (const auto *BTy = dyn_cast<DIBasicType>(Ty))
-    visitBasicType(BTy);
+    visitBasicType(BTy, TypeId);
   else if (const auto *STy = dyn_cast<DISubroutineType>(Ty))
     visitSubroutineType(STy, false, std::unordered_map<uint32_t, StringRef>(),
                         TypeId);
   else if (const auto *CTy = dyn_cast<DICompositeType>(Ty))
-    visitCompositeType(CTy);
+    visitCompositeType(CTy, TypeId);
   else if (const auto *DTy = dyn_cast<DIDerivedType>(Ty))
-    visitDerivedType(DTy);
+    visitDerivedType(DTy, TypeId, CheckPointer, SeenPointer);
   else
     llvm_unreachable("Unknown DIType");
 }
 
+void BTFDebug::visitTypeEntry(const DIType *Ty) {
+  uint32_t TypeId;
+  visitTypeEntry(Ty, TypeId, false, false);
+}
+
+void BTFDebug::visitMapDefType(const DIType *Ty, uint32_t &TypeId) {
+  if (!Ty || DIToIdMap.find(Ty) != DIToIdMap.end()) {
+    TypeId = DIToIdMap[Ty];
+    return;
+  }
+
+  // MapDef type is a struct type
+  const auto *CTy = dyn_cast<DICompositeType>(Ty);
+  if (!CTy)
+    return;
+
+  auto Tag = CTy->getTag();
+  if (Tag != dwarf::DW_TAG_structure_type || CTy->isForwardDecl())
+    return;
+
+  // Record this type
+  const DINodeArray Elements = CTy->getElements();
+  bool HasBitField = false;
+  for (const auto *Element : Elements) {
+    auto E = cast<DIDerivedType>(Element);
+    if (E->isBitField()) {
+      HasBitField = true;
+      break;
+    }
+  }
+
+  auto TypeEntry =
+      llvm::make_unique<BTFTypeStruct>(CTy, true, HasBitField, Elements.size());
+  StructTypes.push_back(TypeEntry.get());
+  TypeId = addType(std::move(TypeEntry), CTy);
+
+  // Visit all struct members
+  for (const auto *Element : Elements) {
+    const auto *MemberType = cast<DIDerivedType>(Element);
+    visitTypeEntry(MemberType->getBaseType());
+  }
+}
+
 /// Read file contents from the actual file or from the source
 std::string BTFDebug::populateFileContent(const DISubprogram *SP) {
   auto File = SP->getFile();
   std::string FileName;
 
-  if (File->getDirectory().size())
+  if (!File->getFilename().startswith("/") && File->getDirectory().size())
     FileName = File->getDirectory().str() + "/" + File->getFilename().str();
   else
     FileName = File->getFilename();
@@ -507,16 +698,16 @@ std::string BTFDebug::populateFileContent(const DISubprogram *SP) {
   std::string Line;
   Content.push_back(Line); // Line 0 for empty string
 
+  std::unique_ptr<MemoryBuffer> Buf;
   auto Source = File->getSource();
-  if (Source) {
-    std::istringstream InputString(Source.getValue());
-    while (std::getline(InputString, Line))
-      Content.push_back(Line);
-  } else {
-    std::ifstream InputFile(FileName);
-    while (std::getline(InputFile, Line))
-      Content.push_back(Line);
-  }
+  if (Source)
+    Buf = MemoryBuffer::getMemBufferCopy(*Source);
+  else if (ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
+               MemoryBuffer::getFile(FileName))
+    Buf = std::move(*BufOrErr);
+  if (Buf)
+    for (line_iterator I(*Buf, false), E; I != E; ++I)
+      Content.push_back(*I);
 
   FileContent[FileName] = Content;
   return FileName;
@@ -547,6 +738,10 @@ void BTFDebug::emitCommonHeader() {
 }
 
 void BTFDebug::emitBTFSection() {
+  // Do not emit section if no types and only "" string.
+  if (!TypeEntries.size() && StringTable.getSize() == 1)
+    return;
+
   MCContext &Ctx = OS.getContext();
   OS.SwitchSection(Ctx.getELFSection(".BTF", ELF::SHT_PROGBITS, 0));
 
@@ -579,6 +774,11 @@ void BTFDebug::emitBTFSection() {
 }
 
 void BTFDebug::emitBTFExtSection() {
+  // Do not emit section if empty FuncInfoTable and LineInfoTable.
+  if (!FuncInfoTable.size() && !LineInfoTable.size() &&
+      !OffsetRelocTable.size() && !ExternRelocTable.size())
+    return;
+
   MCContext &Ctx = OS.getContext();
   OS.SwitchSection(Ctx.getELFSection(".BTF.ext", ELF::SHT_PROGBITS, 0));
 
@@ -588,6 +788,8 @@ void BTFDebug::emitBTFExtSection() {
 
   // Account for FuncInfo/LineInfo record size as well.
   uint32_t FuncLen = 4, LineLen = 4;
+  // Do not account for optional OffsetReloc/ExternReloc.
+  uint32_t OffsetRelocLen = 0, ExternRelocLen = 0;
   for (const auto &FuncSec : FuncInfoTable) {
     FuncLen += BTF::SecFuncInfoSize;
     FuncLen += FuncSec.second.size() * BTF::BPFFuncInfoSize;
@@ -596,11 +798,28 @@ void BTFDebug::emitBTFExtSection() {
     LineLen += BTF::SecLineInfoSize;
     LineLen += LineSec.second.size() * BTF::BPFLineInfoSize;
   }
+  for (const auto &OffsetRelocSec : OffsetRelocTable) {
+    OffsetRelocLen += BTF::SecOffsetRelocSize;
+    OffsetRelocLen += OffsetRelocSec.second.size() * BTF::BPFOffsetRelocSize;
+  }
+  for (const auto &ExternRelocSec : ExternRelocTable) {
+    ExternRelocLen += BTF::SecExternRelocSize;
+    ExternRelocLen += ExternRelocSec.second.size() * BTF::BPFExternRelocSize;
+  }
+
+  if (OffsetRelocLen)
+    OffsetRelocLen += 4;
+  if (ExternRelocLen)
+    ExternRelocLen += 4;
 
   OS.EmitIntValue(0, 4);
   OS.EmitIntValue(FuncLen, 4);
   OS.EmitIntValue(FuncLen, 4);
   OS.EmitIntValue(LineLen, 4);
+  OS.EmitIntValue(FuncLen + LineLen, 4);
+  OS.EmitIntValue(OffsetRelocLen, 4);
+  OS.EmitIntValue(FuncLen + LineLen + OffsetRelocLen, 4);
+  OS.EmitIntValue(ExternRelocLen, 4);
 
   // Emit func_info table.
   OS.AddComment("FuncInfo");
@@ -633,6 +852,39 @@ void BTFDebug::emitBTFExtSection() {
       OS.EmitIntValue(LineInfo.LineNum << 10 | LineInfo.ColumnNum, 4);
     }
   }
+
+  // Emit offset reloc table.
+  if (OffsetRelocLen) {
+    OS.AddComment("OffsetReloc");
+    OS.EmitIntValue(BTF::BPFOffsetRelocSize, 4);
+    for (const auto &OffsetRelocSec : OffsetRelocTable) {
+      OS.AddComment("Offset reloc section string offset=" +
+                    std::to_string(OffsetRelocSec.first));
+      OS.EmitIntValue(OffsetRelocSec.first, 4);
+      OS.EmitIntValue(OffsetRelocSec.second.size(), 4);
+      for (const auto &OffsetRelocInfo : OffsetRelocSec.second) {
+        Asm->EmitLabelReference(OffsetRelocInfo.Label, 4);
+        OS.EmitIntValue(OffsetRelocInfo.TypeID, 4);
+        OS.EmitIntValue(OffsetRelocInfo.OffsetNameOff, 4);
+      }
+    }
+  }
+
+  // Emit extern reloc table.
+  if (ExternRelocLen) {
+    OS.AddComment("ExternReloc");
+    OS.EmitIntValue(BTF::BPFExternRelocSize, 4);
+    for (const auto &ExternRelocSec : ExternRelocTable) {
+      OS.AddComment("Extern reloc section string offset=" +
+                    std::to_string(ExternRelocSec.first));
+      OS.EmitIntValue(ExternRelocSec.first, 4);
+      OS.EmitIntValue(ExternRelocSec.second.size(), 4);
+      for (const auto &ExternRelocInfo : ExternRelocSec.second) {
+        Asm->EmitLabelReference(ExternRelocInfo.Label, 4);
+        OS.EmitIntValue(ExternRelocInfo.ExternNameOff, 4);
+      }
+    }
+  }
 }
 
 void BTFDebug::beginFunctionImpl(const MachineFunction *MF) {
@@ -645,18 +897,42 @@ void BTFDebug::beginFunctionImpl(const MachineFunction *MF) {
   }
   SkipInstruction = false;
 
+  // Collect MapDef types. Map definition needs to collect
+  // pointee types. Do it first. Otherwise, for the following
+  // case:
+  //    struct m { ...};
+  //    struct t {
+  //      struct m *key;
+  //    };
+  //    foo(struct t *arg);
+  //
+  //    struct mapdef {
+  //      ...
+  //      struct m *key;
+  //      ...
+  //    } __attribute__((section(".maps"))) hash_map;
+  //
+  // If subroutine foo is traversed first, a type chain
+  // "ptr->struct m(fwd)" will be created and later on
+  // when traversing mapdef, since "ptr->struct m" exists,
+  // the traversal of "struct m" will be omitted.
+  if (MapDefNotCollected) {
+    processGlobals(true);
+    MapDefNotCollected = false;
+  }
+
   // Collect all types locally referenced in this function.
   // Use RetainedNodes so we can collect all argument names
   // even if the argument is not used.
   std::unordered_map<uint32_t, StringRef> FuncArgNames;
   for (const DINode *DN : SP->getRetainedNodes()) {
     if (const auto *DV = dyn_cast<DILocalVariable>(DN)) {
-      visitTypeEntry(DV->getType().resolve());
-
       // Collect function arguments for subprogram func type.
       uint32_t Arg = DV->getArg();
-      if (Arg)
+      if (Arg) {
+        visitTypeEntry(DV->getType());
         FuncArgNames[Arg] = DV->getName();
+      }
     }
   }
 
@@ -669,6 +945,9 @@ void BTFDebug::beginFunctionImpl(const MachineFunction *MF) {
       llvm::make_unique<BTFTypeFunc>(SP->getName(), ProtoTypeId);
   uint32_t FuncTypeId = addType(std::move(FuncTypeEntry));
 
+  for (const auto &TypeEntry : TypeEntries)
+    TypeEntry->completeType(*this);
+
   // Construct funcinfo and the first lineinfo for the function.
   MCSymbol *FuncLabel = Asm->getFunctionBegin();
   BTFFuncInfo FuncInfo;
@@ -691,6 +970,133 @@ void BTFDebug::endFunctionImpl(const MachineFunction *MF) {
   SecNameOff = 0;
 }
 
+/// On-demand populate struct types as requested from abstract member
+/// accessing.
+unsigned BTFDebug::populateStructType(const DIType *Ty) {
+  unsigned Id;
+  visitTypeEntry(Ty, Id, false, false);
+  for (const auto &TypeEntry : TypeEntries)
+    TypeEntry->completeType(*this);
+  return Id;
+}
+
+// Find struct/array debuginfo types given a type id.
+void BTFDebug::setTypeFromId(uint32_t TypeId, BTFTypeStruct **PrevStructType,
+                             BTFTypeArray **PrevArrayType) {
+  for (const auto &StructType : StructTypes) {
+    if (StructType->getId() == TypeId) {
+      *PrevStructType = StructType;
+      return;
+    }
+  }
+  for (const auto &ArrayType : ArrayTypes) {
+    if (ArrayType->getId() == TypeId) {
+      *PrevArrayType = ArrayType;
+      return;
+    }
+  }
+}
+
+/// Generate a struct member offset relocation.
+void BTFDebug::generateOffsetReloc(const MachineInstr *MI,
+                                   const MCSymbol *ORSym, DIType *RootTy,
+                                   StringRef AccessPattern) {
+  BTFTypeStruct *PrevStructType = nullptr;
+  BTFTypeArray *PrevArrayType = nullptr;
+  unsigned RootId = populateStructType(RootTy);
+  setTypeFromId(RootId, &PrevStructType, &PrevArrayType);
+  unsigned RootTySize = PrevStructType->getStructSize();
+
+  BTFOffsetReloc OffsetReloc;
+  OffsetReloc.Label = ORSym;
+  OffsetReloc.OffsetNameOff = addString(AccessPattern.drop_back());
+  OffsetReloc.TypeID = RootId;
+
+  uint32_t Start = 0, End = 0, Offset = 0;
+  bool FirstAccess = true;
+  for (auto C : AccessPattern) {
+    if (C != ':') {
+      End++;
+    } else {
+      std::string SubStr = AccessPattern.substr(Start, End - Start);
+      int Loc = std::stoi(SubStr);
+
+      if (FirstAccess) {
+        Offset = Loc * RootTySize;
+        FirstAccess = false;
+      } else if (PrevStructType) {
+        uint32_t MemberOffset, MemberTypeId;
+        PrevStructType->getMemberInfo(Loc, MemberOffset, MemberTypeId);
+
+        Offset += MemberOffset >> 3;
+        PrevStructType = nullptr;
+        setTypeFromId(MemberTypeId, &PrevStructType, &PrevArrayType);
+      } else if (PrevArrayType) {
+        uint32_t LocOffset, ElementTypeId;
+        PrevArrayType->getLocInfo(Loc, LocOffset, ElementTypeId);
+
+        Offset += LocOffset;
+        PrevArrayType = nullptr;
+        setTypeFromId(ElementTypeId, &PrevStructType, &PrevArrayType);
+      }
+      Start = End + 1;
+      End = Start;
+    }
+  }
+  AccessOffsets[RootTy->getName().str() + ":" + AccessPattern.str()] = Offset;
+  OffsetRelocTable[SecNameOff].push_back(OffsetReloc);
+}
+
+void BTFDebug::processLDimm64(const MachineInstr *MI) {
+  // If the insn is an LD_imm64, the following two cases
+  // will generate an .BTF.ext record.
+  //
+  // If the insn is "r2 = LD_imm64 @__BTF_...",
+  // add this insn into the .BTF.ext OffsetReloc subsection.
+  // Relocation looks like:
+  //  . SecName:
+  //    . InstOffset
+  //    . TypeID
+  //    . OffSetNameOff
+  // Later, the insn is replaced with "r2 = <offset>"
+  // where "<offset>" equals to the offset based on current
+  // type definitions.
+  //
+  // If the insn is "r2 = LD_imm64 @VAR" and VAR is
+  // a patchable external global, add this insn into the .BTF.ext
+  // ExternReloc subsection.
+  // Relocation looks like:
+  //  . SecName:
+  //    . InstOffset
+  //    . ExternNameOff
+  // Later, the insn is replaced with "r2 = <value>" or
+  // "LD_imm64 r2, <value>" where "<value>" = 0.
+
+  // check whether this is a candidate or not
+  const MachineOperand &MO = MI->getOperand(1);
+  if (MO.isGlobal()) {
+    const GlobalValue *GVal = MO.getGlobal();
+    auto *GVar = dyn_cast<GlobalVariable>(GVal);
+    if (GVar && GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) {
+      MCSymbol *ORSym = OS.getContext().createTempSymbol();
+      OS.EmitLabel(ORSym);
+
+      MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index);
+      DIType *Ty = dyn_cast<DIType>(MDN);
+      generateOffsetReloc(MI, ORSym, Ty, GVar->getName());
+    } else if (GVar && !GVar->hasInitializer() && GVar->hasExternalLinkage() &&
+               GVar->getSection() == BPFCoreSharedInfo::PatchableExtSecName) {
+      MCSymbol *ORSym = OS.getContext().createTempSymbol();
+      OS.EmitLabel(ORSym);
+
+      BTFExternReloc ExternReloc;
+      ExternReloc.Label = ORSym;
+      ExternReloc.ExternNameOff = addString(GVar->getName());
+      ExternRelocTable[SecNameOff].push_back(ExternReloc);
+    }
+  }
+}
+
 void BTFDebug::beginInstruction(const MachineInstr *MI) {
   DebugHandlerBase::beginInstruction(MI);
 
@@ -711,6 +1117,9 @@ void BTFDebug::beginInstruction(const MachineInstr *MI) {
       return;
   }
 
+  if (MI->getOpcode() == BPF::LD_imm64)
+    processLDimm64(MI);
+
   // Skip this instruction if no DebugLoc or the DebugLoc
   // is the same as the previous instruction.
   const DebugLoc &DL = MI->getDebugLoc();
@@ -739,13 +1148,145 @@ void BTFDebug::beginInstruction(const MachineInstr *MI) {
   PrevInstLoc = DL;
 }
 
-void BTFDebug::endModule() {
+void BTFDebug::processGlobals(bool ProcessingMapDef) {
   // Collect all types referenced by globals.
   const Module *M = MMI->getModule();
-  for (const DICompileUnit *CUNode : M->debug_compile_units()) {
-    for (const auto *GVE : CUNode->getGlobalVariables()) {
-      DIGlobalVariable *GV = GVE->getVariable();
-      visitTypeEntry(GV->getType().resolve());
+  for (const GlobalVariable &Global : M->globals()) {
+    // Ignore external globals for now.
+    if (!Global.hasInitializer() && Global.hasExternalLinkage())
+      continue;
+
+    // Decide the section name.
+    StringRef SecName;
+    if (Global.hasSection()) {
+      SecName = Global.getSection();
+    } else {
+      // data, bss, or readonly sections
+      if (Global.isConstant())
+        SecName = ".rodata";
+      else
+        SecName = Global.getInitializer()->isZeroValue() ? ".bss" : ".data";
+    }
+
+    if (ProcessingMapDef != SecName.startswith(".maps"))
+      continue;
+
+    SmallVector<DIGlobalVariableExpression *, 1> GVs;
+    Global.getDebugInfo(GVs);
+    uint32_t GVTypeId = 0;
+    for (auto *GVE : GVs) {
+      if (SecName.startswith(".maps"))
+        visitMapDefType(GVE->getVariable()->getType(), GVTypeId);
+      else
+        visitTypeEntry(GVE->getVariable()->getType(), GVTypeId, false, false);
+      break;
+    }
+
+    // Only support the following globals:
+    //  . static variables
+    //  . non-static global variables with section attributes
+    // Essentially means:
+    //  . .bcc/.data/.rodata DataSec entities only contain static data
+    //  . Other DataSec entities contain static or initialized global data.
+    //    Initialized global data are mostly used for finding map key/value type
+    //    id's. Whether DataSec is readonly or not can be found from
+    //    corresponding ELF section flags.
+    auto Linkage = Global.getLinkage();
+    if (Linkage != GlobalValue::InternalLinkage &&
+        (Linkage != GlobalValue::ExternalLinkage || !Global.hasSection()))
+      continue;
+
+    uint32_t GVarInfo = Linkage == GlobalValue::ExternalLinkage
+                            ? BTF::VAR_GLOBAL_ALLOCATED
+                            : BTF::VAR_STATIC;
+    auto VarEntry =
+        llvm::make_unique<BTFKindVar>(Global.getName(), GVTypeId, GVarInfo);
+    uint32_t VarId = addType(std::move(VarEntry));
+
+    // Find or create a DataSec
+    if (DataSecEntries.find(SecName) == DataSecEntries.end()) {
+      DataSecEntries[SecName] = llvm::make_unique<BTFKindDataSec>(Asm, SecName);
+    }
+
+    // Calculate symbol size
+    const DataLayout &DL = Global.getParent()->getDataLayout();
+    uint32_t Size = DL.getTypeAllocSize(Global.getType()->getElementType());
+
+    DataSecEntries[SecName]->addVar(VarId, Asm->getSymbol(&Global), Size);
+  }
+}
+
+/// Emit proper patchable instructions.
+bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) {
+  if (MI->getOpcode() == BPF::LD_imm64) {
+    const MachineOperand &MO = MI->getOperand(1);
+    if (MO.isGlobal()) {
+      const GlobalValue *GVal = MO.getGlobal();
+      auto *GVar = dyn_cast<GlobalVariable>(GVal);
+      if (GVar && GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) {
+        MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index);
+        DIType *Ty = dyn_cast<DIType>(MDN);
+        std::string TypeName = Ty->getName();
+        int64_t Imm = AccessOffsets[TypeName + ":" + GVar->getName().str()];
+
+        // Emit "mov ri, <imm>" for abstract member accesses.
+        OutMI.setOpcode(BPF::MOV_ri);
+        OutMI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
+        OutMI.addOperand(MCOperand::createImm(Imm));
+        return true;
+      } else if (GVar && !GVar->hasInitializer() &&
+                 GVar->hasExternalLinkage() &&
+                 GVar->getSection() == BPFCoreSharedInfo::PatchableExtSecName) {
+        const IntegerType *IntTy = dyn_cast<IntegerType>(GVar->getValueType());
+        assert(IntTy);
+        // For patchable externals, emit "LD_imm64, ri, 0" if the external
+        // variable is 64bit width, emit "mov ri, 0" otherwise.
+        if (IntTy->getBitWidth() == 64)
+          OutMI.setOpcode(BPF::LD_imm64);
+        else
+          OutMI.setOpcode(BPF::MOV_ri);
+        OutMI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
+        OutMI.addOperand(MCOperand::createImm(0));
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+void BTFDebug::endModule() {
+  // Collect MapDef globals if not collected yet.
+  if (MapDefNotCollected) {
+    processGlobals(true);
+    MapDefNotCollected = false;
+  }
+
+  // Collect global types/variables except MapDef globals.
+  processGlobals(false);
+  for (auto &DataSec : DataSecEntries)
+    addType(std::move(DataSec.second));
+
+  // Fixups
+  for (auto &Fixup : FixupDerivedTypes) {
+    StringRef TypeName = Fixup.first;
+    bool IsUnion = Fixup.second.first;
+
+    // Search through struct types
+    uint32_t StructTypeId = 0;
+    for (const auto &StructType : StructTypes) {
+      if (StructType->getName() == TypeName) {
+        StructTypeId = StructType->getId();
+        break;
+      }
+    }
+
+    if (StructTypeId == 0) {
+      auto FwdTypeEntry = llvm::make_unique<BTFTypeFwd>(TypeName, IsUnion);
+      StructTypeId = addType(std::move(FwdTypeEntry));
+    }
+
+    for (auto &DType : Fixup.second.second) {
+      DType->setPointeeType(StructTypeId);
     }
   }
 
diff --git a/lib/Target/BPF/BTFDebug.h b/lib/Target/BPF/BTFDebug.h
index afd4ed87f63d..6c0cdde17d9b 100644
--- a/lib/Target/BPF/BTFDebug.h
+++ b/lib/Target/BPF/BTFDebug.h
@@ -1,9 +1,8 @@
 //===- BTFDebug.h -----------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -33,10 +32,12 @@ class MachineFunction;
 class BTFTypeBase {
 protected:
   uint8_t Kind;
+  bool IsCompleted;
   uint32_t Id;
   struct BTF::CommonType BTFType;
 
 public:
+  BTFTypeBase() : IsCompleted(false) {}
   virtual ~BTFTypeBase() = default;
   void setId(uint32_t Id) { this->Id = Id; }
   uint32_t getId() { return Id; }
@@ -55,11 +56,13 @@ public:
 /// volatile, typedef and restrict.
 class BTFTypeDerived : public BTFTypeBase {
   const DIDerivedType *DTy;
+  bool NeedsFixup;
 
 public:
-  BTFTypeDerived(const DIDerivedType *Ty, unsigned Tag);
+  BTFTypeDerived(const DIDerivedType *Ty, unsigned Tag, bool NeedsFixup);
   void completeType(BTFDebug &BDebug);
   void emitType(MCStreamer &OS);
+  void setPointeeType(uint32_t PointeeType);
 };
 
 /// Handle struct or union forward declaration.
@@ -101,14 +104,15 @@ public:
 
 /// Handle array type.
 class BTFTypeArray : public BTFTypeBase {
-  const DICompositeType *ATy;
+  uint32_t ElemSize;
   struct BTF::BTFArray ArrayInfo;
 
 public:
-  BTFTypeArray(const DICompositeType *ATy);
+  BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize, uint32_t NumElems);
   uint32_t getSize() { return BTFTypeBase::getSize() + BTF::BTFArraySize; }
   void completeType(BTFDebug &BDebug);
   void emitType(MCStreamer &OS);
+  void getLocInfo(uint32_t Loc, uint32_t &LocOffset, uint32_t &ElementTypeId);
 };
 
 /// Handle struct/union type.
@@ -125,6 +129,9 @@ public:
   }
   void completeType(BTFDebug &BDebug);
   void emitType(MCStreamer &OS);
+  std::string getName();
+  void getMemberInfo(uint32_t Loc, uint32_t &Offset, uint32_t &MemberType);
+  uint32_t getStructSize();
 };
 
 /// Handle function pointer.
@@ -154,6 +161,37 @@ public:
   void emitType(MCStreamer &OS);
 };
 
+/// Handle variable instances
+class BTFKindVar : public BTFTypeBase {
+  StringRef Name;
+  uint32_t Info;
+
+public:
+  BTFKindVar(StringRef VarName, uint32_t TypeId, uint32_t VarInfo);
+  uint32_t getSize() { return BTFTypeBase::getSize() + 4; }
+  void completeType(BTFDebug &BDebug);
+  void emitType(MCStreamer &OS);
+};
+
+/// Handle data sections
+class BTFKindDataSec : public BTFTypeBase {
+  AsmPrinter *Asm;
+  std::string Name;
+  std::vector<std::tuple<uint32_t, const MCSymbol *, uint32_t>> Vars;
+
+public:
+  BTFKindDataSec(AsmPrinter *AsmPrt, std::string SecName);
+  uint32_t getSize() {
+    return BTFTypeBase::getSize() + BTF::BTFDataSecVarSize * Vars.size();
+  }
+  void addVar(uint32_t Id, const MCSymbol *Sym, uint32_t Size) {
+    Vars.push_back(std::make_tuple(Id, Sym, Size));
+  }
+  std::string getName() { return Name; }
+  void completeType(BTFDebug &BDebug);
+  void emitType(MCStreamer &OS);
+};
+
 /// String table.
 class BTFStringTable {
   /// String table size in bytes.
@@ -189,6 +227,19 @@ struct BTFLineInfo {
   uint32_t ColumnNum;   ///< the column number
 };
 
+/// Represent one offset relocation.
+struct BTFOffsetReloc {
+  const MCSymbol *Label;  ///< MCSymbol identifying insn for the reloc
+  uint32_t TypeID;        ///< Type ID
+  uint32_t OffsetNameOff; ///< The string to traverse types
+};
+
+/// Represent one extern relocation.
+struct BTFExternReloc {
+  const MCSymbol *Label;  ///< MCSymbol identifying insn for the reloc
+  uint32_t ExternNameOff; ///< The extern variable name
+};
+
 /// Collect and emit BTF information.
 class BTFDebug : public DebugHandlerBase {
   MCStreamer &OS;
@@ -196,17 +247,26 @@ class BTFDebug : public DebugHandlerBase {
   bool LineInfoGenerated;
   uint32_t SecNameOff;
   uint32_t ArrayIndexTypeId;
+  bool MapDefNotCollected;
   BTFStringTable StringTable;
   std::vector<std::unique_ptr<BTFTypeBase>> TypeEntries;
   std::unordered_map<const DIType *, uint32_t> DIToIdMap;
-  std::unordered_map<uint32_t, std::vector<BTFFuncInfo>> FuncInfoTable;
-  std::unordered_map<uint32_t, std::vector<BTFLineInfo>> LineInfoTable;
+  std::map<uint32_t, std::vector<BTFFuncInfo>> FuncInfoTable;
+  std::map<uint32_t, std::vector<BTFLineInfo>> LineInfoTable;
+  std::map<uint32_t, std::vector<BTFOffsetReloc>> OffsetRelocTable;
+  std::map<uint32_t, std::vector<BTFExternReloc>> ExternRelocTable;
   StringMap<std::vector<std::string>> FileContent;
+  std::map<std::string, std::unique_ptr<BTFKindDataSec>> DataSecEntries;
+  std::vector<BTFTypeStruct *> StructTypes;
+  std::vector<BTFTypeArray *> ArrayTypes;
+  std::map<std::string, int64_t> AccessOffsets;
+  std::map<StringRef, std::pair<bool, std::vector<BTFTypeDerived *>>>
+      FixupDerivedTypes;
 
   /// Add types to TypeEntries.
   /// @{
   /// Add types to TypeEntries and DIToIdMap.
-  void addType(std::unique_ptr<BTFTypeBase> TypeEntry, const DIType *Ty);
+  uint32_t addType(std::unique_ptr<BTFTypeBase> TypeEntry, const DIType *Ty);
   /// Add types to TypeEntries only and return type id.
   uint32_t addType(std::unique_ptr<BTFTypeBase> TypeEntry);
   /// @}
@@ -214,17 +274,23 @@ class BTFDebug : public DebugHandlerBase {
   /// IR type visiting functions.
   /// @{
   void visitTypeEntry(const DIType *Ty);
-  void visitBasicType(const DIBasicType *BTy);
+  void visitTypeEntry(const DIType *Ty, uint32_t &TypeId, bool CheckPointer,
+                      bool SeenPointer);
+  void visitBasicType(const DIBasicType *BTy, uint32_t &TypeId);
   void visitSubroutineType(
       const DISubroutineType *STy, bool ForSubprog,
       const std::unordered_map<uint32_t, StringRef> &FuncArgNames,
       uint32_t &TypeId);
-  void visitFwdDeclType(const DICompositeType *CTy, bool IsUnion);
-  void visitCompositeType(const DICompositeType *CTy);
-  void visitStructType(const DICompositeType *STy, bool IsStruct);
-  void visitArrayType(const DICompositeType *ATy);
-  void visitEnumType(const DICompositeType *ETy);
-  void visitDerivedType(const DIDerivedType *DTy);
+  void visitFwdDeclType(const DICompositeType *CTy, bool IsUnion,
+                        uint32_t &TypeId);
+  void visitCompositeType(const DICompositeType *CTy, uint32_t &TypeId);
+  void visitStructType(const DICompositeType *STy, bool IsStruct,
+                       uint32_t &TypeId);
+  void visitArrayType(const DICompositeType *ATy, uint32_t &TypeId);
+  void visitEnumType(const DICompositeType *ETy, uint32_t &TypeId);
+  void visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId,
+                        bool CheckPointer, bool SeenPointer);
+  void visitMapDefType(const DIType *Ty, uint32_t &TypeId);
   /// @}
 
   /// Get the file content for the subprogram. Certain lines of the file
@@ -235,6 +301,23 @@ class BTFDebug : public DebugHandlerBase {
   void constructLineInfo(const DISubprogram *SP, MCSymbol *Label, uint32_t Line,
                          uint32_t Column);
 
+  /// Generate types and variables for globals.
+  void processGlobals(bool ProcessingMapDef);
+
+  /// Generate one offset relocation record.
+  void generateOffsetReloc(const MachineInstr *MI, const MCSymbol *ORSym,
+                           DIType *RootTy, StringRef AccessPattern);
+
+  /// Set the to-be-traversed Struct/Array Type based on TypeId.
+  void setTypeFromId(uint32_t TypeId, BTFTypeStruct **PrevStructType,
+                     BTFTypeArray **PrevArrayType);
+
+  /// Populating unprocessed struct type.
+  unsigned populateStructType(const DIType *Ty);
+
+  /// Process LD_imm64 instructions.
+  void processLDimm64(const MachineInstr *MI);
+
   /// Emit common header of .BTF and .BTF.ext sections.
   void emitCommonHeader();
 
@@ -254,6 +337,9 @@ protected:
 public:
   BTFDebug(AsmPrinter *AP);
 
+  ///
+  bool InstLower(const MachineInstr *MI, MCInst &OutMI);
+
   /// Get the special array index type id.
   uint32_t getArrayIndexTypeId() {
     assert(ArrayIndexTypeId);
diff --git a/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
index 9f80b762fe36..c845524ad657 100644
--- a/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ b/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -1,9 +1,8 @@
 //===- BPFDisassembler.cpp - Disassembler for BPF ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/BPFMCTargetDesc.h"
+#include "TargetInfo/BPFTargetInfo.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
@@ -40,7 +40,7 @@ public:
     BPF_STX = 0x3,
     BPF_ALU = 0x4,
     BPF_JMP = 0x5,
-    BPF_RES = 0x6,
+    BPF_JMP32 = 0x6,
     BPF_ALU64 = 0x7
   };
 
@@ -172,9 +172,10 @@ DecodeStatus BPFDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
   if (Result == MCDisassembler::Fail) return MCDisassembler::Fail;
 
   uint8_t InstClass = getInstClass(Insn);
+  uint8_t InstMode = getInstMode(Insn);
   if ((InstClass == BPF_LDX || InstClass == BPF_STX) &&
       getInstSize(Insn) != BPF_DW &&
-      getInstMode(Insn) == BPF_MEM &&
+      (InstMode == BPF_MEM || InstMode == BPF_XADD) &&
       STI.getFeatureBits()[BPF::ALU32])
     Result = decodeInstruction(DecoderTableBPFALU3264, Instr, Insn, Address,
                                this, STI);
diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
deleted file mode 100644
index 20627da38817..000000000000
--- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-//===-- BPFInstPrinter.cpp - Convert BPF MCInst to asm syntax -------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an BPF MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "BPFInstPrinter.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-// Include the auto-generated portion of the assembly writer.
-#include "BPFGenAsmWriter.inc"
-
-void BPFInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                               StringRef Annot, const MCSubtargetInfo &STI) {
-  printInstruction(MI, O);
-  printAnnotation(O, Annot);
-}
-
-static void printExpr(const MCExpr *Expr, raw_ostream &O) {
-#ifndef NDEBUG
-  const MCSymbolRefExpr *SRE;
-
-  if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr))
-    SRE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
-  else
-    SRE = dyn_cast<MCSymbolRefExpr>(Expr);
-  assert(SRE && "Unexpected MCExpr type.");
-
-  MCSymbolRefExpr::VariantKind Kind = SRE->getKind();
-
-  assert(Kind == MCSymbolRefExpr::VK_None);
-#endif
-  O << *Expr;
-}
-
-void BPFInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                  raw_ostream &O, const char *Modifier) {
-  assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    O << getRegisterName(Op.getReg());
-  } else if (Op.isImm()) {
-    O << formatImm((int32_t)Op.getImm());
-  } else {
-    assert(Op.isExpr() && "Expected an expression");
-    printExpr(Op.getExpr(), O);
-  }
-}
-
-void BPFInstPrinter::printMemOperand(const MCInst *MI, int OpNo, raw_ostream &O,
-                                     const char *Modifier) {
-  const MCOperand &RegOp = MI->getOperand(OpNo);
-  const MCOperand &OffsetOp = MI->getOperand(OpNo + 1);
-
-  // register
-  assert(RegOp.isReg() && "Register operand not a register");
-  O << getRegisterName(RegOp.getReg());
-
-  // offset
-  if (OffsetOp.isImm()) {
-    auto Imm = OffsetOp.getImm();
-    if (Imm >= 0)
-      O << " + " << formatImm(Imm);
-    else
-      O << " - " << formatImm(-Imm);
-  } else {
-    assert(0 && "Expected an immediate");
-  }
-}
-
-void BPFInstPrinter::printImm64Operand(const MCInst *MI, unsigned OpNo,
-                                       raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isImm())
-    O << formatImm(Op.getImm());
-  else if (Op.isExpr())
-    printExpr(Op.getExpr(), O);
-  else
-    O << Op;
-}
-
-void BPFInstPrinter::printBrTargetOperand(const MCInst *MI, unsigned OpNo,
-                                       raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isImm()) {
-    int16_t Imm = Op.getImm();
-    O << ((Imm >= 0) ? "+" : "") << formatImm(Imm);
-  } else if (Op.isExpr()) {
-    printExpr(Op.getExpr(), O);
-  } else {
-    O << Op;
-  }
-}
diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
deleted file mode 100644
index bb0b0d71da53..000000000000
--- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
+++ /dev/null
@@ -1,41 +0,0 @@
-//===-- BPFInstPrinter.h - Convert BPF MCInst to asm syntax -------*- C++ -*--//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints a BPF MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_BPF_INSTPRINTER_BPFINSTPRINTER_H
-#define LLVM_LIB_TARGET_BPF_INSTPRINTER_BPFINSTPRINTER_H
-
-#include "llvm/MC/MCInstPrinter.h"
-
-namespace llvm {
-class BPFInstPrinter : public MCInstPrinter {
-public:
-  BPFInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                 const MCRegisterInfo &MRI)
-      : MCInstPrinter(MAI, MII, MRI) {}
-
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
-                    const char *Modifier = nullptr);
-  void printMemOperand(const MCInst *MI, int OpNo, raw_ostream &O,
-                       const char *Modifier = nullptr);
-  void printImm64Operand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printBrTargetOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-
-  // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
-  static const char *getRegisterName(unsigned RegNo);
-};
-}
-
-#endif
diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 1822d8688fa2..ba35a175b9a7 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -1,9 +1,8 @@
 //===-- BPFAsmBackend.cpp - BPF Assembler Backend -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -73,12 +72,12 @@ void BPFAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
                                bool IsResolved,
                                const MCSubtargetInfo *STI) const {
   if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) {
-    if (Value) {
-      MCContext &Ctx = Asm.getContext();
-      Ctx.reportError(Fixup.getLoc(),
-                      "Unsupported relocation: try to compile with -O2 or above, "
-                      "or check your static variable usage");
-    }
+    // The Value is 0 for global variables, and the in-section offset
+    // for static variables. Write to the immediate field of the inst.
+    assert(Value <= UINT32_MAX);
+    support::endian::write<uint32_t>(&Data[Fixup.getOffset() + 4],
+                                     static_cast<uint32_t>(Value),
+                                     Endian);
   } else if (Fixup.getKind() == FK_Data_4) {
     support::endian::write<uint32_t>(&Data[Fixup.getOffset()], Value, Endian);
   } else if (Fixup.getKind() == FK_Data_8) {
diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index 32e79d0f527e..057bbf5c3b06 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===-- BPFELFObjectWriter.cpp - BPF ELF Writer ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -51,21 +50,33 @@ unsigned BPFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
   case FK_Data_8:
     return ELF::R_BPF_64_64;
   case FK_Data_4:
-    // .BTF.ext generates FK_Data_4 relocations for
-    // insn offset by creating temporary labels.
-    // The insn offset is within the code section and
-    // already been fulfilled by applyFixup(). No
-    // further relocation is needed.
     if (const MCSymbolRefExpr *A = Target.getSymA()) {
-      if (A->getSymbol().isTemporary()) {
-        MCSection &Section = A->getSymbol().getSection();
+      const MCSymbol &Sym = A->getSymbol();
+
+      if (Sym.isDefined()) {
+        MCSection &Section = Sym.getSection();
         const MCSectionELF *SectionELF = dyn_cast<MCSectionELF>(&Section);
         assert(SectionELF && "Null section for reloc symbol");
 
-        // The reloc symbol should be in text section.
         unsigned Flags = SectionELF->getFlags();
-        if ((Flags & ELF::SHF_ALLOC) && (Flags & ELF::SHF_EXECINSTR))
-          return ELF::R_BPF_NONE;
+
+        if (Sym.isTemporary()) {
+          // .BTF.ext generates FK_Data_4 relocations for
+          // insn offset by creating temporary labels.
+          // The insn offset is within the code section and
+          // already been fulfilled by applyFixup(). No
+          // further relocation is needed.
+          // The reloc symbol should be in text section.
+          if ((Flags & ELF::SHF_ALLOC) && (Flags & ELF::SHF_EXECINSTR))
+            return ELF::R_BPF_NONE;
+        } else {
+          // .BTF generates FK_Data_4 relocations for variable
+          // offset in DataSec kind. Similar to the above .BTF.ext
+          // insn offset, no further relocation is needed.
+          // The reloc symbol should be in data section.
+          if ((Flags & ELF::SHF_ALLOC) && (Flags & ELF::SHF_WRITE))
+            return ELF::R_BPF_NONE;
+        }
       }
     }
     return ELF::R_BPF_64_32;
diff --git a/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp b/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
new file mode 100644
index 000000000000..079202994c8d
--- /dev/null
+++ b/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
@@ -0,0 +1,107 @@
+//===-- BPFInstPrinter.cpp - Convert BPF MCInst to asm syntax -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an BPF MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/BPFInstPrinter.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+// Include the auto-generated portion of the assembly writer.
+#include "BPFGenAsmWriter.inc"
+
+void BPFInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                               StringRef Annot, const MCSubtargetInfo &STI) {
+  printInstruction(MI, O);
+  printAnnotation(O, Annot);
+}
+
+static void printExpr(const MCExpr *Expr, raw_ostream &O) {
+#ifndef NDEBUG
+  const MCSymbolRefExpr *SRE;
+
+  if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr))
+    SRE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
+  else
+    SRE = dyn_cast<MCSymbolRefExpr>(Expr);
+  assert(SRE && "Unexpected MCExpr type.");
+
+  MCSymbolRefExpr::VariantKind Kind = SRE->getKind();
+
+  assert(Kind == MCSymbolRefExpr::VK_None);
+#endif
+  O << *Expr;
+}
+
+void BPFInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O, const char *Modifier) {
+  assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    O << getRegisterName(Op.getReg());
+  } else if (Op.isImm()) {
+    O << formatImm((int32_t)Op.getImm());
+  } else {
+    assert(Op.isExpr() && "Expected an expression");
+    printExpr(Op.getExpr(), O);
+  }
+}
+
+void BPFInstPrinter::printMemOperand(const MCInst *MI, int OpNo, raw_ostream &O,
+                                     const char *Modifier) {
+  const MCOperand &RegOp = MI->getOperand(OpNo);
+  const MCOperand &OffsetOp = MI->getOperand(OpNo + 1);
+
+  // register
+  assert(RegOp.isReg() && "Register operand not a register");
+  O << getRegisterName(RegOp.getReg());
+
+  // offset
+  if (OffsetOp.isImm()) {
+    auto Imm = OffsetOp.getImm();
+    if (Imm >= 0)
+      O << " + " << formatImm(Imm);
+    else
+      O << " - " << formatImm(-Imm);
+  } else {
+    assert(0 && "Expected an immediate");
+  }
+}
+
+void BPFInstPrinter::printImm64Operand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isImm())
+    O << formatImm(Op.getImm());
+  else if (Op.isExpr())
+    printExpr(Op.getExpr(), O);
+  else
+    O << Op;
+}
+
+void BPFInstPrinter::printBrTargetOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isImm()) {
+    int16_t Imm = Op.getImm();
+    O << ((Imm >= 0) ? "+" : "") << formatImm(Imm);
+  } else if (Op.isExpr()) {
+    printExpr(Op.getExpr(), O);
+  } else {
+    O << Op;
+  }
+}
diff --git a/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.h b/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.h
new file mode 100644
index 000000000000..8c9a0bc94cff
--- /dev/null
+++ b/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.h
@@ -0,0 +1,40 @@
+//===-- BPFInstPrinter.h - Convert BPF MCInst to asm syntax -------*- C++ -*--//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a BPF MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFINSTPRINTER_H
+#define LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+class BPFInstPrinter : public MCInstPrinter {
+public:
+  BPFInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                 const MCRegisterInfo &MRI)
+      : MCInstPrinter(MAI, MII, MRI) {}
+
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                    const char *Modifier = nullptr);
+  void printMemOperand(const MCInst *MI, int OpNo, raw_ostream &O,
+                       const char *Modifier = nullptr);
+  void printImm64Operand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printBrTargetOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+};
+}
+
+#endif
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index af3ad5315253..04a6a87cebc9 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -1,9 +1,8 @@
 //===-- BPFMCAsmInfo.h - BPF asm properties -------------------*- C++ -*--====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
index 437f658caf6e..f9abe76c976b 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
 //===-- BPFMCCodeEmitter.cpp - Convert BPF code to machine code -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -64,9 +63,10 @@ public:
                          const MCSubtargetInfo &STI) const override;
 
 private:
-  uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
-  void verifyInstructionPredicates(const MCInst &MI,
-                                   uint64_t AvailableFeatures) const;
+  FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+  void
+  verifyInstructionPredicates(const MCInst &MI,
+                              const FeatureBitset &AvailableFeatures) const;
 };
 
 } // end anonymous namespace
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index 834b57527882..fa27b335f3a1 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -1,9 +1,8 @@
 //===-- BPFMCTargetDesc.cpp - BPF Target Descriptions ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,9 +11,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/BPFMCTargetDesc.h"
-#include "BPF.h"
-#include "InstPrinter/BPFInstPrinter.h"
+#include "MCTargetDesc/BPFInstPrinter.h"
 #include "MCTargetDesc/BPFMCAsmInfo.h"
+#include "TargetInfo/BPFTargetInfo.h"
 #include "llvm/MC/MCInstrAnalysis.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
index 6d2f0a1601e6..1a391321f60d 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@@ -1,9 +1,8 @@
 //===-- BPFMCTargetDesc.h - BPF Target Descriptions -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -34,10 +33,6 @@ class Triple;
 class raw_ostream;
 class raw_pwrite_stream;
 
-Target &getTheBPFleTarget();
-Target &getTheBPFbeTarget();
-Target &getTheBPFTarget();
-
 MCCodeEmitter *createBPFMCCodeEmitter(const MCInstrInfo &MCII,
                                       const MCRegisterInfo &MRI,
                                       MCContext &Ctx);
diff --git a/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp b/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
index 1f7b8a04d589..5dfa915034ba 100644
--- a/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
+++ b/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
@@ -1,30 +1,28 @@
 //===-- BPFTargetInfo.cpp - BPF Target Implementation ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "BPF.h"
+#include "TargetInfo/BPFTargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
+
 using namespace llvm;
 
-namespace llvm {
-Target &getTheBPFleTarget() {
+Target &llvm::getTheBPFleTarget() {
   static Target TheBPFleTarget;
   return TheBPFleTarget;
 }
-Target &getTheBPFbeTarget() {
+Target &llvm::getTheBPFbeTarget() {
   static Target TheBPFbeTarget;
   return TheBPFbeTarget;
 }
-Target &getTheBPFTarget() {
+Target &llvm::getTheBPFTarget() {
   static Target TheBPFTarget;
   return TheBPFTarget;
 }
-} // namespace llvm
 
 extern "C" void LLVMInitializeBPFTargetInfo() {
   TargetRegistry::RegisterTarget(getTheBPFTarget(), "bpf", "BPF (host endian)",
diff --git a/lib/Target/BPF/TargetInfo/BPFTargetInfo.h b/lib/Target/BPF/TargetInfo/BPFTargetInfo.h
new file mode 100644
index 000000000000..150526c1a9db
--- /dev/null
+++ b/lib/Target/BPF/TargetInfo/BPFTargetInfo.h
@@ -0,0 +1,22 @@
+//===-- BPFTargetInfo.h - BPF Target Implementation -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_TARGETINFO_BPFTARGETINFO_H
+#define LLVM_LIB_TARGET_BPF_TARGETINFO_BPFTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheBPFleTarget();
+Target &getTheBPFbeTarget();
+Target &getTheBPFTarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_BPF_TARGETINFO_BPFTARGETINFO_H
diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index 2eb1f0fc8bd9..0881bf841f90 100644
--- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -1,15 +1,13 @@
 //===-- HexagonAsmParser.cpp - Parse Hexagon asm to MCInst instructions----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mcasmparser"
 
-#include "Hexagon.h"
 #include "HexagonTargetStreamer.h"
 #include "MCTargetDesc/HexagonMCChecker.h"
 #include "MCTargetDesc/HexagonMCELFStreamer.h"
@@ -17,6 +15,7 @@
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "MCTargetDesc/HexagonShuffler.h"
+#include "TargetInfo/HexagonTargetInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
@@ -1684,8 +1683,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
     int64_t Value;
     MCExpr const &Expr = *Imm.getExpr();
     bool Absolute = Expr.evaluateAsAbsolute(Value);
-    assert(Absolute);
-    (void)Absolute;
+    if (!Absolute)
+      return Match_InvalidOperand;
     if (!HexagonMCInstrInfo::mustExtend(Expr) &&
         ((Value <= -256) || Value >= 256))
       return Match_InvalidOperand;
@@ -1707,8 +1706,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
     MCInst TmpInst;
     int64_t Value;
     bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
-    assert(Absolute);
-    (void)Absolute;
+    if (!Absolute)
+      return Match_InvalidOperand;
     if (Value == 0) { // convert to $Rd = $Rs
       TmpInst.setOpcode(Hexagon::A2_tfr);
       MCOperand &Rd = Inst.getOperand(0);
@@ -1737,8 +1736,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
     MCOperand &Imm = Inst.getOperand(2);
     int64_t Value;
     bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
-    assert(Absolute);
-    (void)Absolute;
+    if (!Absolute)
+      return Match_InvalidOperand;
     if (Value == 0) { // convert to $Rdd = combine ($Rs[0], $Rs[1])
       MCInst TmpInst;
       unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg());
@@ -1861,8 +1860,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
     MCOperand &Imm = Inst.getOperand(2);
     int64_t Value;
     bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
-    assert(Absolute);
-    (void)Absolute;
+    if (!Absolute)
+      return Match_InvalidOperand;
     if (Value == 0)
       Inst.setOpcode(Hexagon::S2_vsathub);
     else {
@@ -1881,8 +1880,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
     MCOperand &Imm = Inst.getOperand(2);
     int64_t Value;
     bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
-    assert(Absolute);
-    (void)Absolute;
+    if (!Absolute)
+      return Match_InvalidOperand;
     if (Value == 0) {
       MCInst TmpInst;
       unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg());
diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp
index 69529b0d1162..b7e95caf24fb 100644
--- a/lib/Target/Hexagon/BitTracker.cpp
+++ b/lib/Target/Hexagon/BitTracker.cpp
@@ -1,9 +1,8 @@
 //===- BitTracker.cpp -----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/BitTracker.h b/lib/Target/Hexagon/BitTracker.h
index 058225c0d812..efb21805b801 100644
--- a/lib/Target/Hexagon/BitTracker.h
+++ b/lib/Target/Hexagon/BitTracker.h
@@ -1,9 +1,8 @@
 //===- BitTracker.h ---------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 428b42eba30d..99e3ee871570 100644
--- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -1,9 +1,8 @@
 //===- HexagonDisassembler.cpp - Disassembler for Hexagon ISA -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -13,6 +12,7 @@
 #include "MCTargetDesc/HexagonMCChecker.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "TargetInfo/HexagonTargetInfo.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCContext.h"
@@ -149,7 +149,7 @@ static DecodeStatus s32_0ImmDecoder(MCInst &MI, unsigned tmp,
                                     uint64_t /*Address*/, const void *Decoder);
 static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
                                     const void *Decoder);
-#include "HexagonDepDecoders.h"
+#include "HexagonDepDecoders.inc"
 #include "HexagonGenDisassemblerTables.inc"
 
 static MCDisassembler *createHexagonDisassembler(const Target &T,
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index c18492da803b..58dadf012da5 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -1,9 +1,8 @@
 //=-- Hexagon.h - Top-level interface for Hexagon representation --*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
index 868353e18832..26869391c7a3 100644
--- a/lib/Target/Hexagon/Hexagon.td
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -1,9 +1,8 @@
 //===-- Hexagon.td - Describe the Hexagon Target Machine --*- tablegen -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index f44fb16e2d8e..b07d15609ede 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -1,9 +1,8 @@
 //===- HexagonAsmPrinter.cpp - Print machine instrs to Hexagon assembly ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -22,6 +21,7 @@
 #include "MCTargetDesc/HexagonMCExpr.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "TargetInfo/HexagonTargetInfo.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
@@ -92,9 +92,7 @@ void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     GetCPISymbol(MO.getIndex())->print(O, MAI);
     return;
   case MachineOperand::MO_GlobalAddress:
-    // Computing the address of a global symbol, not calling it.
-    getSymbol(MO.getGlobal())->print(O, MAI);
-    printOffset(MO.getOffset(), O);
+    PrintSymbolOperand(MO, O);
     return;
   }
 }
@@ -114,7 +112,6 @@ bool HexagonAsmPrinter::isBlockOnlyReachableByFallthrough(
 
 /// PrintAsmOperand - Print out an operand for an inline asm expression.
 bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                        unsigned AsmVariant,
                                         const char *ExtraCode,
                                         raw_ostream &OS) {
   // Does this asm operand have a single letter operand modifier?
@@ -125,11 +122,7 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
     switch (ExtraCode[0]) {
     default:
       // See if this is a generic print operand
-      return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS);
-    case 'c': // Don't print "$" before a global var name or constant.
-      // Hexagon never has a prefix.
-      printOperand(MI, OpNo, OS);
-      return false;
+      return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, OS);
     case 'L':
     case 'H': { // The highest-numbered register of a pair.
       const MachineOperand &MO = MI->getOperand(OpNo);
@@ -161,7 +154,6 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
 
 bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
                                               unsigned OpNo,
-                                              unsigned AsmVariant,
                                               const char *ExtraCode,
                                               raw_ostream &O) {
   if (ExtraCode && ExtraCode[0])
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h
index d0629d173a65..6c4b664e83f5 100755
--- a/lib/Target/Hexagon/HexagonAsmPrinter.h
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -1,9 +1,8 @@
 //===- HexagonAsmPrinter.h - Print machine code to an Hexagon .s file -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,7 +13,6 @@
 #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONASMPRINTER_H
 #define LLVM_LIB_TARGET_HEXAGON_HEXAGONASMPRINTER_H
 
-#include "Hexagon.h"
 #include "HexagonSubtarget.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -53,11 +51,9 @@ class TargetMachine;
 
     void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
     bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                         unsigned AsmVariant, const char *ExtraCode,
-                         raw_ostream &OS) override;
+                         const char *ExtraCode, raw_ostream &OS) override;
     bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                               unsigned AsmVariant, const char *ExtraCode,
-                               raw_ostream &OS) override;
+                               const char *ExtraCode, raw_ostream &OS) override;
   };
 
 } // end namespace llvm
diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 1bdebe557a8c..7b75d251ccd3 100644
--- a/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -1,9 +1,8 @@
 //===- HexagonBitSimplify.cpp ---------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp
index 92b6da871a4c..ba50faac2cf9 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -1,9 +1,8 @@
 //===- HexagonBitTracker.cpp ----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonBitTracker.h b/lib/Target/Hexagon/HexagonBitTracker.h
index f0b7c9d91950..02607d50f686 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.h
+++ b/lib/Target/Hexagon/HexagonBitTracker.h
@@ -1,9 +1,8 @@
 //===- HexagonBitTracker.h --------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonBlockRanges.cpp b/lib/Target/Hexagon/HexagonBlockRanges.cpp
index 48a4505458ae..999150fc8c6e 100644
--- a/lib/Target/Hexagon/HexagonBlockRanges.cpp
+++ b/lib/Target/Hexagon/HexagonBlockRanges.cpp
@@ -1,9 +1,8 @@
 //===- HexagonBlockRanges.cpp ---------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonBlockRanges.h b/lib/Target/Hexagon/HexagonBlockRanges.h
index 4da5a970a659..61115e29a708 100644
--- a/lib/Target/Hexagon/HexagonBlockRanges.h
+++ b/lib/Target/Hexagon/HexagonBlockRanges.h
@@ -1,9 +1,8 @@
 //===- HexagonBlockRanges.h -------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonBranchRelaxation.cpp b/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
index 2fa7888dd02b..ee93739b2c7b 100644
--- a/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
+++ b/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
@@ -1,9 +1,8 @@
 //===--- HexagonBranchRelaxation.cpp - Identify and relax long jumps ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index a22ac8c9fdf5..11a455ce4347 100644
--- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -1,9 +1,8 @@
 //===- HexagonCFGOptimizer.cpp - CFG optimizations ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonCallingConv.td b/lib/Target/Hexagon/HexagonCallingConv.td
index ed2f87570d6b..5c31a81a1e87 100644
--- a/lib/Target/Hexagon/HexagonCallingConv.td
+++ b/lib/Target/Hexagon/HexagonCallingConv.td
@@ -1,9 +1,8 @@
 //===- HexagonCallingConv.td ----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonCommonGEP.cpp b/lib/Target/Hexagon/HexagonCommonGEP.cpp
index f315e24eba62..cf1b0a0f7daa 100644
--- a/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -1,9 +1,8 @@
 //===- HexagonCommonGEP.cpp -----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -12,6 +11,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -71,7 +71,7 @@ namespace {
   using NodeToValueMap = std::map<GepNode *, Value *>;
   using NodeVect = std::vector<GepNode *>;
   using NodeChildrenMap = std::map<GepNode *, NodeVect>;
-  using UseSet = std::set<Use *>;
+  using UseSet = SetVector<Use *>;
   using NodeToUsesMap = std::map<GepNode *, UseSet>;
 
   // Numbering map for gep nodes. Used to keep track of ordering for
@@ -980,15 +980,13 @@ void HexagonCommonGEP::separateChainForNode(GepNode *Node, Use *U,
   assert(UF != Uses.end());
   UseSet &Us = UF->second;
   UseSet NewUs;
-  for (UseSet::iterator I = Us.begin(); I != Us.end(); ) {
-    User *S = (*I)->getUser();
-    UseSet::iterator Nx = std::next(I);
-    if (S == R) {
-      NewUs.insert(*I);
-      Us.erase(I);
-    }
-    I = Nx;
+  for (Use *U : Us) {
+    if (U->getUser() == R)
+      NewUs.insert(U);
   }
+  for (Use *U : NewUs)
+    Us.remove(U); // erase takes an iterator.
+
   if (Us.empty()) {
     Node->Flags &= ~GepNode::Used;
     Uses.erase(UF);
diff --git a/lib/Target/Hexagon/HexagonConstExtenders.cpp b/lib/Target/Hexagon/HexagonConstExtenders.cpp
index ba9f638796eb..cfed0ecef272 100644
--- a/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -1,9 +1,8 @@
 //===- HexagonConstExtenders.cpp ------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonConstPropagation.cpp b/lib/Target/Hexagon/HexagonConstPropagation.cpp
index fa192391313e..d1fde5da5fe8 100644
--- a/lib/Target/Hexagon/HexagonConstPropagation.cpp
+++ b/lib/Target/Hexagon/HexagonConstPropagation.cpp
@@ -1,9 +1,8 @@
 //===- HexagonConstPropagation.cpp ----------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -80,18 +79,21 @@ namespace {
 
   // A representation of a register as it can appear in a MachineOperand,
   // i.e. a pair register:subregister.
-  struct Register {
+
+  // FIXME: Use TargetInstrInfo::RegSubRegPair. Also duplicated in
+  // HexagonGenPredicate
+  struct RegisterSubReg {
     unsigned Reg, SubReg;
 
-    explicit Register(unsigned R, unsigned SR = 0) : Reg(R), SubReg(SR) {}
-    explicit Register(const MachineOperand &MO)
+    explicit RegisterSubReg(unsigned R, unsigned SR = 0) : Reg(R), SubReg(SR) {}
+    explicit RegisterSubReg(const MachineOperand &MO)
       : Reg(MO.getReg()), SubReg(MO.getSubReg()) {}
 
     void print(const TargetRegisterInfo *TRI = nullptr) const {
       dbgs() << printReg(Reg, TRI, SubReg);
     }
 
-    bool operator== (const Register &R) const {
+    bool operator== (const RegisterSubReg &R) const {
       return (Reg == R.Reg) && (SubReg == R.SubReg);
     }
   };
@@ -301,7 +303,7 @@ namespace {
     using CellMap = MachineConstPropagator::CellMap;
     virtual bool evaluate(const MachineInstr &MI, const CellMap &Inputs,
                           CellMap &Outputs) = 0;
-    virtual bool evaluate(const Register &R, const LatticeCell &SrcC,
+    virtual bool evaluate(const RegisterSubReg &R, const LatticeCell &SrcC,
                           LatticeCell &Result) = 0;
     virtual bool evaluate(const MachineInstr &BrI, const CellMap &Inputs,
                           SetVector<const MachineBasicBlock*> &Targets,
@@ -344,17 +346,17 @@ namespace {
 
     // Helper functions.
 
-    bool getCell(const Register &R, const CellMap &Inputs, LatticeCell &RC);
+    bool getCell(const RegisterSubReg &R, const CellMap &Inputs, LatticeCell &RC);
     bool constToInt(const Constant *C, APInt &Val) const;
     bool constToFloat(const Constant *C, APFloat &Val) const;
     const ConstantInt *intToConst(const APInt &Val) const;
 
     // Compares.
-    bool evaluateCMPrr(uint32_t Cmp, const Register &R1, const Register &R2,
+    bool evaluateCMPrr(uint32_t Cmp, const RegisterSubReg &R1, const RegisterSubReg &R2,
           const CellMap &Inputs, bool &Result);
-    bool evaluateCMPri(uint32_t Cmp, const Register &R1, const APInt &A2,
+    bool evaluateCMPri(uint32_t Cmp, const RegisterSubReg &R1, const APInt &A2,
           const CellMap &Inputs, bool &Result);
-    bool evaluateCMPrp(uint32_t Cmp, const Register &R1, uint64_t Props2,
+    bool evaluateCMPrp(uint32_t Cmp, const RegisterSubReg &R1, uint64_t Props2,
           const CellMap &Inputs, bool &Result);
     bool evaluateCMPii(uint32_t Cmp, const APInt &A1, const APInt &A2,
           bool &Result);
@@ -363,52 +365,52 @@ namespace {
     bool evaluateCMPpp(uint32_t Cmp, uint32_t Props1, uint32_t Props2,
           bool &Result);
 
-    bool evaluateCOPY(const Register &R1, const CellMap &Inputs,
+    bool evaluateCOPY(const RegisterSubReg &R1, const CellMap &Inputs,
           LatticeCell &Result);
 
     // Logical operations.
-    bool evaluateANDrr(const Register &R1, const Register &R2,
+    bool evaluateANDrr(const RegisterSubReg &R1, const RegisterSubReg &R2,
           const CellMap &Inputs, LatticeCell &Result);
-    bool evaluateANDri(const Register &R1, const APInt &A2,
+    bool evaluateANDri(const RegisterSubReg &R1, const APInt &A2,
           const CellMap &Inputs, LatticeCell &Result);
     bool evaluateANDii(const APInt &A1, const APInt &A2, APInt &Result);
-    bool evaluateORrr(const Register &R1, const Register &R2,
+    bool evaluateORrr(const RegisterSubReg &R1, const RegisterSubReg &R2,
           const CellMap &Inputs, LatticeCell &Result);
-    bool evaluateORri(const Register &R1, const APInt &A2,
+    bool evaluateORri(const RegisterSubReg &R1, const APInt &A2,
           const CellMap &Inputs, LatticeCell &Result);
     bool evaluateORii(const APInt &A1, const APInt &A2, APInt &Result);
-    bool evaluateXORrr(const Register &R1, const Register &R2,
+    bool evaluateXORrr(const RegisterSubReg &R1, const RegisterSubReg &R2,
           const CellMap &Inputs, LatticeCell &Result);
-    bool evaluateXORri(const Register &R1, const APInt &A2,
+    bool evaluateXORri(const RegisterSubReg &R1, const APInt &A2,
           const CellMap &Inputs, LatticeCell &Result);
     bool evaluateXORii(const APInt &A1, const APInt &A2, APInt &Result);
 
     // Extensions.
-    bool evaluateZEXTr(const Register &R1, unsigned Width, unsigned Bits,
+    bool evaluateZEXTr(const RegisterSubReg &R1, unsigned Width, unsigned Bits,
           const CellMap &Inputs, LatticeCell &Result);
     bool evaluateZEXTi(const APInt &A1, unsigned Width, unsigned Bits,
           APInt &Result);
-    bool evaluateSEXTr(const Register &R1, unsigned Width, unsigned Bits,
+    bool evaluateSEXTr(const RegisterSubReg &R1, unsigned Width, unsigned Bits,
           const CellMap &Inputs, LatticeCell &Result);
     bool evaluateSEXTi(const APInt &A1, unsigned Width, unsigned Bits,
           APInt &Result);
 
     // Leading/trailing bits.
-    bool evaluateCLBr(const Register &R1, bool Zeros, bool Ones,
+    bool evaluateCLBr(const RegisterSubReg &R1, bool Zeros, bool Ones,
           const CellMap &Inputs, LatticeCell &Result);
     bool evaluateCLBi(const APInt &A1, bool Zeros, bool Ones, APInt &Result);
-    bool evaluateCTBr(const Register &R1, bool Zeros, bool Ones,
+    bool evaluateCTBr(const RegisterSubReg &R1, bool Zeros, bool Ones,
           const CellMap &Inputs, LatticeCell &Result);
     bool evaluateCTBi(const APInt &A1, bool Zeros, bool Ones, APInt &Result);
 
     // Bitfield extract.
-    bool evaluateEXTRACTr(const Register &R1, unsigned Width, unsigned Bits,
+    bool evaluateEXTRACTr(const RegisterSubReg &R1, unsigned Width, unsigned Bits,
           unsigned Offset, bool Signed, const CellMap &Inputs,
           LatticeCell &Result);
     bool evaluateEXTRACTi(const APInt &A1, unsigned Bits, unsigned Offset,
           bool Signed, APInt &Result);
     // Vector operations.
-    bool evaluateSplatr(const Register &R1, unsigned Bits, unsigned Count,
+    bool evaluateSplatr(const RegisterSubReg &R1, unsigned Bits, unsigned Count,
           const CellMap &Inputs, LatticeCell &Result);
     bool evaluateSplati(const APInt &A1, unsigned Bits, unsigned Count,
           APInt &Result);
@@ -620,7 +622,7 @@ void MachineConstPropagator::visitPHI(const MachineInstr &PN) {
   LLVM_DEBUG(dbgs() << "Visiting FI(" << printMBBReference(*MB) << "): " << PN);
 
   const MachineOperand &MD = PN.getOperand(0);
-  Register DefR(MD);
+  RegisterSubReg DefR(MD);
   assert(TargetRegisterInfo::isVirtualRegister(DefR.Reg));
 
   bool Changed = false;
@@ -647,7 +649,7 @@ Bottomize:
       continue;
     }
     const MachineOperand &SO = PN.getOperand(i);
-    Register UseR(SO);
+    RegisterSubReg UseR(SO);
     // If the input is not a virtual register, we don't really know what
     // value it holds.
     if (!TargetRegisterInfo::isVirtualRegister(UseR.Reg))
@@ -690,7 +692,7 @@ void MachineConstPropagator::visitNonBranch(const MachineInstr &MI) {
   for (const MachineOperand &MO : MI.operands()) {
     if (!MO.isReg() || !MO.isDef())
       continue;
-    Register DefR(MO);
+    RegisterSubReg DefR(MO);
     // Only track virtual registers.
     if (!TargetRegisterInfo::isVirtualRegister(DefR.Reg))
       continue;
@@ -1066,7 +1068,7 @@ bool MachineConstPropagator::run(MachineFunction &MF) {
 // --------------------------------------------------------------------
 // Machine const evaluator.
 
-bool MachineConstEvaluator::getCell(const Register &R, const CellMap &Inputs,
+bool MachineConstEvaluator::getCell(const RegisterSubReg &R, const CellMap &Inputs,
       LatticeCell &RC) {
   if (!TargetRegisterInfo::isVirtualRegister(R.Reg))
     return false;
@@ -1092,8 +1094,8 @@ const ConstantInt *MachineConstEvaluator::intToConst(const APInt &Val) const {
   return ConstantInt::get(CX, Val);
 }
 
-bool MachineConstEvaluator::evaluateCMPrr(uint32_t Cmp, const Register &R1,
-      const Register &R2, const CellMap &Inputs, bool &Result) {
+bool MachineConstEvaluator::evaluateCMPrr(uint32_t Cmp, const RegisterSubReg &R1,
+      const RegisterSubReg &R2, const CellMap &Inputs, bool &Result) {
   assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg));
   LatticeCell LS1, LS2;
   if (!getCell(R1, Inputs, LS1) || !getCell(R2, Inputs, LS2))
@@ -1131,7 +1133,7 @@ bool MachineConstEvaluator::evaluateCMPrr(uint32_t Cmp, const Register &R1,
   return IsTrue || IsFalse;
 }
 
-bool MachineConstEvaluator::evaluateCMPri(uint32_t Cmp, const Register &R1,
+bool MachineConstEvaluator::evaluateCMPri(uint32_t Cmp, const RegisterSubReg &R1,
       const APInt &A2, const CellMap &Inputs, bool &Result) {
   assert(Inputs.has(R1.Reg));
   LatticeCell LS;
@@ -1158,7 +1160,7 @@ bool MachineConstEvaluator::evaluateCMPri(uint32_t Cmp, const Register &R1,
   return IsTrue || IsFalse;
 }
 
-bool MachineConstEvaluator::evaluateCMPrp(uint32_t Cmp, const Register &R1,
+bool MachineConstEvaluator::evaluateCMPrp(uint32_t Cmp, const RegisterSubReg &R1,
       uint64_t Props2, const CellMap &Inputs, bool &Result) {
   assert(Inputs.has(R1.Reg));
   LatticeCell LS;
@@ -1351,13 +1353,13 @@ bool MachineConstEvaluator::evaluateCMPpp(uint32_t Cmp, uint32_t Props1,
   return false;
 }
 
-bool MachineConstEvaluator::evaluateCOPY(const Register &R1,
+bool MachineConstEvaluator::evaluateCOPY(const RegisterSubReg &R1,
       const CellMap &Inputs, LatticeCell &Result) {
   return getCell(R1, Inputs, Result);
 }
 
-bool MachineConstEvaluator::evaluateANDrr(const Register &R1,
-      const Register &R2, const CellMap &Inputs, LatticeCell &Result) {
+bool MachineConstEvaluator::evaluateANDrr(const RegisterSubReg &R1,
+      const RegisterSubReg &R2, const CellMap &Inputs, LatticeCell &Result) {
   assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg));
   const LatticeCell &L1 = Inputs.get(R2.Reg);
   const LatticeCell &L2 = Inputs.get(R2.Reg);
@@ -1387,7 +1389,7 @@ bool MachineConstEvaluator::evaluateANDrr(const Register &R1,
   return !Result.isBottom();
 }
 
-bool MachineConstEvaluator::evaluateANDri(const Register &R1,
+bool MachineConstEvaluator::evaluateANDri(const RegisterSubReg &R1,
       const APInt &A2, const CellMap &Inputs, LatticeCell &Result) {
   assert(Inputs.has(R1.Reg));
   if (A2 == -1)
@@ -1423,8 +1425,8 @@ bool MachineConstEvaluator::evaluateANDii(const APInt &A1,
   return true;
 }
 
-bool MachineConstEvaluator::evaluateORrr(const Register &R1,
-      const Register &R2, const CellMap &Inputs, LatticeCell &Result) {
+bool MachineConstEvaluator::evaluateORrr(const RegisterSubReg &R1,
+      const RegisterSubReg &R2, const CellMap &Inputs, LatticeCell &Result) {
   assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg));
   const LatticeCell &L1 = Inputs.get(R2.Reg);
   const LatticeCell &L2 = Inputs.get(R2.Reg);
@@ -1454,7 +1456,7 @@ bool MachineConstEvaluator::evaluateORrr(const Register &R1,
   return !Result.isBottom();
 }
 
-bool MachineConstEvaluator::evaluateORri(const Register &R1,
+bool MachineConstEvaluator::evaluateORri(const RegisterSubReg &R1,
       const APInt &A2, const CellMap &Inputs, LatticeCell &Result) {
   assert(Inputs.has(R1.Reg));
   if (A2 == 0)
@@ -1490,8 +1492,8 @@ bool MachineConstEvaluator::evaluateORii(const APInt &A1,
   return true;
 }
 
-bool MachineConstEvaluator::evaluateXORrr(const Register &R1,
-      const Register &R2, const CellMap &Inputs, LatticeCell &Result) {
+bool MachineConstEvaluator::evaluateXORrr(const RegisterSubReg &R1,
+      const RegisterSubReg &R2, const CellMap &Inputs, LatticeCell &Result) {
   assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg));
   LatticeCell LS1, LS2;
   if (!getCell(R1, Inputs, LS1) || !getCell(R2, Inputs, LS2))
@@ -1519,7 +1521,7 @@ bool MachineConstEvaluator::evaluateXORrr(const Register &R1,
   return !Result.isBottom();
 }
 
-bool MachineConstEvaluator::evaluateXORri(const Register &R1,
+bool MachineConstEvaluator::evaluateXORri(const RegisterSubReg &R1,
       const APInt &A2, const CellMap &Inputs, LatticeCell &Result) {
   assert(Inputs.has(R1.Reg));
   LatticeCell LS1;
@@ -1552,7 +1554,7 @@ bool MachineConstEvaluator::evaluateXORii(const APInt &A1,
   return true;
 }
 
-bool MachineConstEvaluator::evaluateZEXTr(const Register &R1, unsigned Width,
+bool MachineConstEvaluator::evaluateZEXTr(const RegisterSubReg &R1, unsigned Width,
       unsigned Bits, const CellMap &Inputs, LatticeCell &Result) {
   assert(Inputs.has(R1.Reg));
   LatticeCell LS1;
@@ -1583,7 +1585,7 @@ bool MachineConstEvaluator::evaluateZEXTi(const APInt &A1, unsigned Width,
   return true;
 }
 
-bool MachineConstEvaluator::evaluateSEXTr(const Register &R1, unsigned Width,
+bool MachineConstEvaluator::evaluateSEXTr(const RegisterSubReg &R1, unsigned Width,
       unsigned Bits, const CellMap &Inputs, LatticeCell &Result) {
   assert(Inputs.has(R1.Reg));
   LatticeCell LS1;
@@ -1648,7 +1650,7 @@ bool MachineConstEvaluator::evaluateSEXTi(const APInt &A1, unsigned Width,
   return true;
 }
 
-bool MachineConstEvaluator::evaluateCLBr(const Register &R1, bool Zeros,
+bool MachineConstEvaluator::evaluateCLBr(const RegisterSubReg &R1, bool Zeros,
       bool Ones, const CellMap &Inputs, LatticeCell &Result) {
   assert(Inputs.has(R1.Reg));
   LatticeCell LS1;
@@ -1683,7 +1685,7 @@ bool MachineConstEvaluator::evaluateCLBi(const APInt &A1, bool Zeros,
   return true;
 }
 
-bool MachineConstEvaluator::evaluateCTBr(const Register &R1, bool Zeros,
+bool MachineConstEvaluator::evaluateCTBr(const RegisterSubReg &R1, bool Zeros,
       bool Ones, const CellMap &Inputs, LatticeCell &Result) {
   assert(Inputs.has(R1.Reg));
   LatticeCell LS1;
@@ -1718,7 +1720,7 @@ bool MachineConstEvaluator::evaluateCTBi(const APInt &A1, bool Zeros,
   return true;
 }
 
-bool MachineConstEvaluator::evaluateEXTRACTr(const Register &R1,
+bool MachineConstEvaluator::evaluateEXTRACTr(const RegisterSubReg &R1,
       unsigned Width, unsigned Bits, unsigned Offset, bool Signed,
       const CellMap &Inputs, LatticeCell &Result) {
   assert(Inputs.has(R1.Reg));
@@ -1776,7 +1778,7 @@ bool MachineConstEvaluator::evaluateEXTRACTi(const APInt &A1, unsigned Bits,
   return true;
 }
 
-bool MachineConstEvaluator::evaluateSplatr(const Register &R1,
+bool MachineConstEvaluator::evaluateSplatr(const RegisterSubReg &R1,
       unsigned Bits, unsigned Count, const CellMap &Inputs,
       LatticeCell &Result) {
   assert(Inputs.has(R1.Reg));
@@ -1833,7 +1835,7 @@ namespace {
 
     bool evaluate(const MachineInstr &MI, const CellMap &Inputs,
           CellMap &Outputs) override;
-    bool evaluate(const Register &R, const LatticeCell &SrcC,
+    bool evaluate(const RegisterSubReg &R, const LatticeCell &SrcC,
           LatticeCell &Result) override;
     bool evaluate(const MachineInstr &BrI, const CellMap &Inputs,
           SetVector<const MachineBasicBlock*> &Targets, bool &FallsThru)
@@ -1848,7 +1850,7 @@ namespace {
           const MachineOperand &MO);
     void replaceWithNop(MachineInstr &MI);
 
-    bool evaluateHexRSEQ32(Register RL, Register RH, const CellMap &Inputs,
+    bool evaluateHexRSEQ32(RegisterSubReg RL, RegisterSubReg RH, const CellMap &Inputs,
           LatticeCell &Result);
     bool evaluateHexCompare(const MachineInstr &MI, const CellMap &Inputs,
           CellMap &Outputs);
@@ -1922,14 +1924,14 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI,
     return false;
 
   unsigned Opc = MI.getOpcode();
-  Register DefR(MD);
+  RegisterSubReg DefR(MD);
   assert(!DefR.SubReg);
   if (!TargetRegisterInfo::isVirtualRegister(DefR.Reg))
     return false;
 
   if (MI.isCopy()) {
     LatticeCell RC;
-    Register SrcR(MI.getOperand(1));
+    RegisterSubReg SrcR(MI.getOperand(1));
     bool Eval = evaluateCOPY(SrcR, Inputs, RC);
     if (!Eval)
       return false;
@@ -1951,7 +1953,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI,
     const MachineOperand &OpLo = LoIs1 ? MI.getOperand(1) : MI.getOperand(3);
     const MachineOperand &OpHi = LoIs1 ? MI.getOperand(3) : MI.getOperand(1);
     LatticeCell RC;
-    Register SrcRL(OpLo), SrcRH(OpHi);
+    RegisterSubReg SrcRL(OpLo), SrcRH(OpHi);
     bool Eval = evaluateHexRSEQ32(SrcRL, SrcRH, Inputs, RC);
     if (!Eval)
       return false;
@@ -2038,7 +2040,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI,
       int64_t B = MI.getOperand(2).getImm();
       assert(B >=0 && B < 32);
       APInt A(32, (1ull << B), false);
-      Register R(MI.getOperand(1));
+      RegisterSubReg R(MI.getOperand(1));
       LatticeCell RC = Outputs.get(DefR.Reg);
       bool Eval = evaluateORri(R, A, Inputs, RC);
       if (!Eval)
@@ -2078,7 +2080,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI,
       using namespace Hexagon;
 
       bool Ones = (Opc == S2_ct1) || (Opc == S2_ct1p);
-      Register R1(MI.getOperand(1));
+      RegisterSubReg R1(MI.getOperand(1));
       assert(Inputs.has(R1.Reg));
       LatticeCell T;
       bool Eval = evaluateCTBr(R1, !Ones, Ones, Inputs, T);
@@ -2110,7 +2112,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI,
 
       bool OnlyZeros = (Opc == S2_cl0) || (Opc == S2_cl0p);
       bool OnlyOnes =  (Opc == S2_cl1) || (Opc == S2_cl1p);
-      Register R1(MI.getOperand(1));
+      RegisterSubReg R1(MI.getOperand(1));
       assert(Inputs.has(R1.Reg));
       LatticeCell T;
       bool Eval = evaluateCLBr(R1, !OnlyOnes, !OnlyZeros, Inputs, T);
@@ -2138,7 +2140,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI,
     {
       bool Signed = (Opc == Hexagon::S4_extract) ||
                     (Opc == Hexagon::S4_extractp);
-      Register R1(MI.getOperand(1));
+      RegisterSubReg R1(MI.getOperand(1));
       unsigned BW = getRegBitWidth(R1.Reg);
       unsigned Bits = MI.getOperand(2).getImm();
       unsigned Offset = MI.getOperand(3).getImm();
@@ -2189,7 +2191,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI,
   return true;
 }
 
-bool HexagonConstEvaluator::evaluate(const Register &R,
+bool HexagonConstEvaluator::evaluate(const RegisterSubReg &R,
       const LatticeCell &Input, LatticeCell &Result) {
   if (!R.SubReg) {
     Result = Input;
@@ -2280,7 +2282,7 @@ Undetermined:
 
   if (SimpleBranch) {
     const MachineOperand &MD = BrI.getOperand(0);
-    Register PR(MD);
+    RegisterSubReg PR(MD);
     // If the condition operand has a subregister, this is not something
     // we currently recognize.
     if (PR.SubReg)
@@ -2502,7 +2504,7 @@ void HexagonConstEvaluator::replaceWithNop(MachineInstr &MI) {
     MI.RemoveOperand(0);
 }
 
-bool HexagonConstEvaluator::evaluateHexRSEQ32(Register RL, Register RH,
+bool HexagonConstEvaluator::evaluateHexRSEQ32(RegisterSubReg RL, RegisterSubReg RH,
       const CellMap &Inputs, LatticeCell &Result) {
   assert(Inputs.has(RL.Reg) && Inputs.has(RH.Reg));
   LatticeCell LSL, LSH;
@@ -2571,7 +2573,7 @@ bool HexagonConstEvaluator::evaluateHexCompare(const MachineInstr &MI,
     if (Computed) {
       // Only create a zero/non-zero cell. At this time there isn't really
       // much need for specific values.
-      Register DefR(MI.getOperand(0));
+      RegisterSubReg DefR(MI.getOperand(0));
       LatticeCell L = Outputs.get(DefR.Reg);
       uint32_t P = Result ? ConstantProperties::NonZero
                           : ConstantProperties::Zero;
@@ -2591,9 +2593,9 @@ bool HexagonConstEvaluator::evaluateHexCompare2(unsigned Opc,
   bool Reg1 = Src1.isReg(), Reg2 = Src2.isReg();
   bool Imm1 = Src1.isImm(), Imm2 = Src2.isImm();
   if (Reg1) {
-    Register R1(Src1);
+    RegisterSubReg R1(Src1);
     if (Reg2) {
-      Register R2(Src2);
+      RegisterSubReg R2(Src2);
       return evaluateCMPrr(Cmp, R1, R2, Inputs, Result);
     } else if (Imm2) {
       APInt A2 = getCmpImm(Opc, 2, Src2);
@@ -2602,7 +2604,7 @@ bool HexagonConstEvaluator::evaluateHexCompare2(unsigned Opc,
   } else if (Imm1) {
     APInt A1 = getCmpImm(Opc, 1, Src1);
     if (Reg2) {
-      Register R2(Src2);
+      RegisterSubReg R2(Src2);
       uint32_t NegCmp = Comparison::negate(Cmp);
       return evaluateCMPri(NegCmp, R2, A1, Inputs, Result);
     } else if (Imm2) {
@@ -2621,7 +2623,7 @@ bool HexagonConstEvaluator::evaluateHexLogical(const MachineInstr &MI,
     return false;
   const MachineOperand &Src1 = MI.getOperand(1);
   const MachineOperand &Src2 = MI.getOperand(2);
-  Register R1(Src1);
+  RegisterSubReg R1(Src1);
   bool Eval = false;
   LatticeCell RC;
   switch (Opc) {
@@ -2629,7 +2631,7 @@ bool HexagonConstEvaluator::evaluateHexLogical(const MachineInstr &MI,
       return false;
     case Hexagon::A2_and:
     case Hexagon::A2_andp:
-      Eval = evaluateANDrr(R1, Register(Src2), Inputs, RC);
+      Eval = evaluateANDrr(R1, RegisterSubReg(Src2), Inputs, RC);
       break;
     case Hexagon::A2_andir: {
       if (!Src2.isImm())
@@ -2640,7 +2642,7 @@ bool HexagonConstEvaluator::evaluateHexLogical(const MachineInstr &MI,
     }
     case Hexagon::A2_or:
     case Hexagon::A2_orp:
-      Eval = evaluateORrr(R1, Register(Src2), Inputs, RC);
+      Eval = evaluateORrr(R1, RegisterSubReg(Src2), Inputs, RC);
       break;
     case Hexagon::A2_orir: {
       if (!Src2.isImm())
@@ -2651,11 +2653,11 @@ bool HexagonConstEvaluator::evaluateHexLogical(const MachineInstr &MI,
     }
     case Hexagon::A2_xor:
     case Hexagon::A2_xorp:
-      Eval = evaluateXORrr(R1, Register(Src2), Inputs, RC);
+      Eval = evaluateXORrr(R1, RegisterSubReg(Src2), Inputs, RC);
       break;
   }
   if (Eval) {
-    Register DefR(MI.getOperand(0));
+    RegisterSubReg DefR(MI.getOperand(0));
     Outputs.update(DefR.Reg, RC);
   }
   return Eval;
@@ -2664,7 +2666,7 @@ bool HexagonConstEvaluator::evaluateHexLogical(const MachineInstr &MI,
 bool HexagonConstEvaluator::evaluateHexCondMove(const MachineInstr &MI,
       const CellMap &Inputs, CellMap &Outputs) {
   // Dst0 = Cond1 ? Src2 : Src3
-  Register CR(MI.getOperand(1));
+  RegisterSubReg CR(MI.getOperand(1));
   assert(Inputs.has(CR.Reg));
   LatticeCell LS;
   if (!getCell(CR, Inputs, LS))
@@ -2679,7 +2681,7 @@ bool HexagonConstEvaluator::evaluateHexCondMove(const MachineInstr &MI,
     return false;
 
   const MachineOperand &ValOp = MI.getOperand(TakeOp);
-  Register DefR(MI.getOperand(0));
+  RegisterSubReg DefR(MI.getOperand(0));
   LatticeCell RC = Outputs.get(DefR.Reg);
 
   if (ValOp.isImm()) {
@@ -2692,7 +2694,7 @@ bool HexagonConstEvaluator::evaluateHexCondMove(const MachineInstr &MI,
     return true;
   }
   if (ValOp.isReg()) {
-    Register R(ValOp);
+    RegisterSubReg R(ValOp);
     const LatticeCell &LR = Inputs.get(R.Reg);
     LatticeCell LSR;
     if (!evaluate(R, LR, LSR))
@@ -2707,7 +2709,7 @@ bool HexagonConstEvaluator::evaluateHexCondMove(const MachineInstr &MI,
 bool HexagonConstEvaluator::evaluateHexExt(const MachineInstr &MI,
       const CellMap &Inputs, CellMap &Outputs) {
   // Dst0 = ext R1
-  Register R1(MI.getOperand(1));
+  RegisterSubReg R1(MI.getOperand(1));
   assert(Inputs.has(R1.Reg));
 
   unsigned Opc = MI.getOpcode();
@@ -2724,6 +2726,8 @@ bool HexagonConstEvaluator::evaluateHexExt(const MachineInstr &MI,
     case Hexagon::A2_sxtw:
       Bits = 32;
       break;
+    default:
+      llvm_unreachable("Unhandled extension opcode");
   }
 
   bool Signed = false;
@@ -2735,7 +2739,7 @@ bool HexagonConstEvaluator::evaluateHexExt(const MachineInstr &MI,
       break;
   }
 
-  Register DefR(MI.getOperand(0));
+  RegisterSubReg DefR(MI.getOperand(0));
   unsigned BW = getRegBitWidth(DefR.Reg);
   LatticeCell RC = Outputs.get(DefR.Reg);
   bool Eval = Signed ? evaluateSEXTr(R1, BW, Bits, Inputs, RC)
@@ -2749,8 +2753,8 @@ bool HexagonConstEvaluator::evaluateHexExt(const MachineInstr &MI,
 bool HexagonConstEvaluator::evaluateHexVector1(const MachineInstr &MI,
       const CellMap &Inputs, CellMap &Outputs) {
   // DefR = op R1
-  Register DefR(MI.getOperand(0));
-  Register R1(MI.getOperand(1));
+  RegisterSubReg DefR(MI.getOperand(0));
+  RegisterSubReg R1(MI.getOperand(1));
   assert(Inputs.has(R1.Reg));
   LatticeCell RC = Outputs.get(DefR.Reg);
   bool Eval;
@@ -2788,7 +2792,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI,
     for (const MachineOperand &MO : MI.operands()) {
       if (!MO.isReg() || !MO.isUse() || MO.isImplicit())
         continue;
-      Register R(MO);
+      RegisterSubReg R(MO);
       if (!TargetRegisterInfo::isVirtualRegister(R.Reg))
         continue;
       HasUse = true;
@@ -2954,10 +2958,10 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI,
     //   to   DefR += mpyi(R, #imm),
     //   or   DefR -= mpyi(R, #imm).
     {
-      Register DefR(MI.getOperand(0));
+      RegisterSubReg DefR(MI.getOperand(0));
       assert(!DefR.SubReg);
-      Register R2(MI.getOperand(2));
-      Register R3(MI.getOperand(3));
+      RegisterSubReg R2(MI.getOperand(2));
+      RegisterSubReg R3(MI.getOperand(3));
       assert(Inputs.has(R2.Reg) && Inputs.has(R3.Reg));
       LatticeCell LS2, LS3;
       // It is enough to get one of the input cells, since we will only try
@@ -2971,7 +2975,7 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI,
       if (Zero) {
         // DefR == R1 (tied operands).
         MachineOperand &Acc = MI.getOperand(1);
-        Register R1(Acc);
+        RegisterSubReg R1(Acc);
         unsigned NewR = R1.Reg;
         if (R1.SubReg) {
           // Generate COPY. FIXME: Replace with the register:subregister.
@@ -3018,8 +3022,8 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI,
 
     case Hexagon::A2_and:
     {
-      Register R1(MI.getOperand(1));
-      Register R2(MI.getOperand(2));
+      RegisterSubReg R1(MI.getOperand(1));
+      RegisterSubReg R2(MI.getOperand(2));
       assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg));
       LatticeCell LS1, LS2;
       unsigned CopyOf = 0;
@@ -3037,8 +3041,8 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI,
       if (!CopyOf)
         return false;
       MachineOperand &SO = MI.getOperand(CopyOf);
-      Register SR(SO);
-      Register DefR(MI.getOperand(0));
+      RegisterSubReg SR(SO);
+      RegisterSubReg DefR(MI.getOperand(0));
       unsigned NewR = SR.Reg;
       if (SR.SubReg) {
         const TargetRegisterClass *RC = MRI->getRegClass(DefR.Reg);
@@ -3054,8 +3058,8 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI,
 
     case Hexagon::A2_or:
     {
-      Register R1(MI.getOperand(1));
-      Register R2(MI.getOperand(2));
+      RegisterSubReg R1(MI.getOperand(1));
+      RegisterSubReg R2(MI.getOperand(2));
       assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg));
       LatticeCell LS1, LS2;
       unsigned CopyOf = 0;
@@ -3069,8 +3073,8 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI,
       if (!CopyOf)
         return false;
       MachineOperand &SO = MI.getOperand(CopyOf);
-      Register SR(SO);
-      Register DefR(MI.getOperand(0));
+      RegisterSubReg SR(SO);
+      RegisterSubReg DefR(MI.getOperand(0));
       unsigned NewR = SR.Reg;
       if (SR.SubReg) {
         const TargetRegisterClass *RC = MRI->getRegClass(DefR.Reg);
diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index 28965b69e284..a09ccab483cf 100644
--- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -1,9 +1,8 @@
 //===------- HexagonCopyToCombine.cpp - Hexagon Copy-To-Combine Pass ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This pass replaces transfer instructions by combine instructions.
@@ -255,8 +254,8 @@ static bool isUnsafeToMoveAcross(MachineInstr &MI, unsigned UseReg,
          MI.isMetaInstruction();
 }
 
-static unsigned UseReg(const MachineOperand& MO) {
-  return MO.isReg() ? MO.getReg() : 0;
+static Register UseReg(const MachineOperand& MO) {
+  return MO.isReg() ? MO.getReg() : Register();
 }
 
 /// isSafeToMoveTogether - Returns true if it is safe to move I1 next to I2 such
diff --git a/lib/Target/Hexagon/HexagonDepArch.h b/lib/Target/Hexagon/HexagonDepArch.h
index dff2b2f471d0..529be7ef0ac7 100644
--- a/lib/Target/Hexagon/HexagonDepArch.h
+++ b/lib/Target/Hexagon/HexagonDepArch.h
@@ -1,9 +1,8 @@
 //===----------------------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepArch.td b/lib/Target/Hexagon/HexagonDepArch.td
index f1aadae555c8..115cf2383a7a 100644
--- a/lib/Target/Hexagon/HexagonDepArch.td
+++ b/lib/Target/Hexagon/HexagonDepArch.td
@@ -1,9 +1,8 @@
 //===----------------------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepDecoders.h b/lib/Target/Hexagon/HexagonDepDecoders.h
deleted file mode 100644
index 9f78412f45d2..000000000000
--- a/lib/Target/Hexagon/HexagonDepDecoders.h
+++ /dev/null
@@ -1,79 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Automatically generated file, please consult code owner before editing.
-//===----------------------------------------------------------------------===//
-
-// clang-format off
-
-#if defined(__clang__)
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wunused-function"
-#endif
-
-static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp,
-    uint64_t, const void *Decoder) {
-  signedDecoder<4>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-static DecodeStatus s29_3ImmDecoder(MCInst &MI, unsigned tmp,
-    uint64_t, const void *Decoder) {
-  signedDecoder<14>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp,
-    uint64_t, const void *Decoder) {
-  signedDecoder<8>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp,
-    uint64_t, const void *Decoder) {
-  signedDecoder<7>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-static DecodeStatus s31_1ImmDecoder(MCInst &MI, unsigned tmp,
-    uint64_t, const void *Decoder) {
-  signedDecoder<12>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp,
-    uint64_t, const void *Decoder) {
-  signedDecoder<3>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-static DecodeStatus s30_2ImmDecoder(MCInst &MI, unsigned tmp,
-    uint64_t, const void *Decoder) {
-  signedDecoder<13>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp,
-    uint64_t, const void *Decoder) {
-  signedDecoder<6>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-static DecodeStatus s6_3ImmDecoder(MCInst &MI, unsigned tmp,
-    uint64_t, const void *Decoder) {
-  signedDecoder<9>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp,
-    uint64_t, const void *Decoder) {
-  signedDecoder<5>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp,
-    uint64_t, const void *Decoder) {
-  signedDecoder<6>(MI, tmp, Decoder);
-  return MCDisassembler::Success;
-}
-
-#if defined(__clang__)
-#pragma clang diagnostic pop
-#endif
-
-// clang-format on
diff --git a/lib/Target/Hexagon/HexagonDepDecoders.inc b/lib/Target/Hexagon/HexagonDepDecoders.inc
new file mode 100644
index 000000000000..10068abce7ec
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonDepDecoders.inc
@@ -0,0 +1,78 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, please consult code owner before editing.
+//===----------------------------------------------------------------------===//
+
+// clang-format off
+
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-function"
+#endif
+
+static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<4>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+static DecodeStatus s29_3ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<14>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<8>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<7>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+static DecodeStatus s31_1ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<12>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<3>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+static DecodeStatus s30_2ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<13>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<6>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+static DecodeStatus s6_3ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<9>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<5>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp,
+    uint64_t, const void *Decoder) {
+  signedDecoder<6>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+
+// clang-format on
diff --git a/lib/Target/Hexagon/HexagonDepIICHVX.td b/lib/Target/Hexagon/HexagonDepIICHVX.td
index 9e3dea9f3e9b..fefbbfd3f1ac 100644
--- a/lib/Target/Hexagon/HexagonDepIICHVX.td
+++ b/lib/Target/Hexagon/HexagonDepIICHVX.td
@@ -1,9 +1,8 @@
 //===----------------------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepIICScalar.td b/lib/Target/Hexagon/HexagonDepIICScalar.td
index 9da25952fb1c..34da0be02d19 100644
--- a/lib/Target/Hexagon/HexagonDepIICScalar.td
+++ b/lib/Target/Hexagon/HexagonDepIICScalar.td
@@ -1,9 +1,8 @@
 //===----------------------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepITypes.h b/lib/Target/Hexagon/HexagonDepITypes.h
index 81e3971e21d2..358345e027d8 100644
--- a/lib/Target/Hexagon/HexagonDepITypes.h
+++ b/lib/Target/Hexagon/HexagonDepITypes.h
@@ -1,9 +1,8 @@
 //===----------------------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepITypes.td b/lib/Target/Hexagon/HexagonDepITypes.td
index f694062a5232..91c02b84b87c 100644
--- a/lib/Target/Hexagon/HexagonDepITypes.td
+++ b/lib/Target/Hexagon/HexagonDepITypes.td
@@ -1,9 +1,8 @@
 //===----------------------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepInstrFormats.td b/lib/Target/Hexagon/HexagonDepInstrFormats.td
index ffe212ef9d97..c08d9a388d3e 100644
--- a/lib/Target/Hexagon/HexagonDepInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonDepInstrFormats.td
@@ -1,9 +1,8 @@
 //===----------------------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepInstrInfo.td b/lib/Target/Hexagon/HexagonDepInstrInfo.td
index 3ef1c49eb7ee..a49051888c77 100644
--- a/lib/Target/Hexagon/HexagonDepInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -1,9 +1,8 @@
 //===----------------------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
index 2346fa572626..2ce1419e4790 100644
--- a/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
+++ b/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
@@ -1,9 +1,8 @@
 //===----------------------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepMappings.td b/lib/Target/Hexagon/HexagonDepMappings.td
index b3132d41b903..22ee495b25e6 100644
--- a/lib/Target/Hexagon/HexagonDepMappings.td
+++ b/lib/Target/Hexagon/HexagonDepMappings.td
@@ -1,9 +1,8 @@
 //===----------------------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepOperands.td b/lib/Target/Hexagon/HexagonDepOperands.td
index ef2d4fa45702..fdba7b971258 100644
--- a/lib/Target/Hexagon/HexagonDepOperands.td
+++ b/lib/Target/Hexagon/HexagonDepOperands.td
@@ -1,9 +1,8 @@
 //===----------------------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepTimingClasses.h b/lib/Target/Hexagon/HexagonDepTimingClasses.h
index 0fd55e8b7997..b6be74f848bb 100644
--- a/lib/Target/Hexagon/HexagonDepTimingClasses.h
+++ b/lib/Target/Hexagon/HexagonDepTimingClasses.h
@@ -1,9 +1,8 @@
 //===----------------------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index 8e2f5093038e..c1f32e54e98d 100644
--- a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -1,9 +1,8 @@
 //===- HexagonEarlyIfConv.cpp ---------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index 1a762c0c9de7..c343e426ac7d 100644
--- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -1,9 +1,8 @@
 //===- HexagonExpandCondsets.cpp ------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -734,7 +733,7 @@ bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) {
     HasDef = true;
   }
   for (auto &Mo : MI->memoperands())
-    if (Mo->isVolatile())
+    if (Mo->isVolatile() || Mo->isAtomic())
       return false;
   return true;
 }
diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
index e9067e2285a8..f7edc168de4a 100644
--- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
+++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -1,9 +1,8 @@
 //===---- HexagonFixupHwLoops.cpp - Fixup HW loops too far from LOOPn. ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 // The loop start address in the LOOPn instruction is encoded as a distance
 // from the LOOPn instruction itself. If the start address is too far from
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index f5736546a87c..3368ee4fb3b9 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -1,9 +1,8 @@
 //===- HexagonFrameLowering.cpp - Define frame lowering -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //
 //===----------------------------------------------------------------------===//
@@ -375,17 +374,17 @@ static bool isRestoreCall(unsigned Opc) {
 }
 
 static inline bool isOptNone(const MachineFunction &MF) {
-    return MF.getFunction().hasFnAttribute(Attribute::OptimizeNone) ||
+    return MF.getFunction().hasOptNone() ||
            MF.getTarget().getOptLevel() == CodeGenOpt::None;
 }
 
 static inline bool isOptSize(const MachineFunction &MF) {
     const Function &F = MF.getFunction();
-    return F.optForSize() && !F.optForMinSize();
+    return F.hasOptSize() && !F.hasMinSize();
 }
 
 static inline bool isMinSize(const MachineFunction &MF) {
-    return MF.getFunction().optForMinSize();
+    return MF.getFunction().hasMinSize();
 }
 
 /// Implements shrink-wrapping of the stack frame. By default, stack frame
@@ -2102,7 +2101,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
         }
         if (!Bad) {
           for (auto *Mo : In.memoperands()) {
-            if (!Mo->isVolatile())
+            if (!Mo->isVolatile() && !Mo->isAtomic())
               continue;
             Bad = true;
             break;
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
index d65d870750f8..65e8c7686640 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -1,9 +1,8 @@
 //==- HexagonFrameLowering.h - Define frame lowering for Hexagon -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonGenExtract.cpp b/lib/Target/Hexagon/HexagonGenExtract.cpp
index 08a016b74650..3417c74e359b 100644
--- a/lib/Target/Hexagon/HexagonGenExtract.cpp
+++ b/lib/Target/Hexagon/HexagonGenExtract.cpp
@@ -1,9 +1,8 @@
 //===- HexagonGenExtract.cpp ----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -211,7 +210,7 @@ bool HexagonGenExtract::convert(Instruction *In) {
   Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu
                                    : Intrinsic::hexagon_S2_extractup;
   Module *Mod = BB->getParent()->getParent();
-  Value *ExtF = Intrinsic::getDeclaration(Mod, IntId);
+  Function *ExtF = Intrinsic::getDeclaration(Mod, IntId);
   Value *NewIn = IRB.CreateCall(ExtF, {BF, IRB.getInt32(W), IRB.getInt32(SR)});
   if (SL != 0)
     NewIn = IRB.CreateShl(NewIn, SL, CSL->getName());
diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp
index e3492e7374e9..81025c1c5325 100644
--- a/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -1,9 +1,8 @@
 //===- HexagonGenInsert.cpp -----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -437,7 +436,7 @@ namespace {
 } // end anonymous namespace
 
 void OrderedRegisterList::insert(unsigned VR) {
-  iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord);
+  iterator L = llvm::lower_bound(Seq, VR, Ord);
   if (L == Seq.end())
     Seq.push_back(VR);
   else
@@ -450,7 +449,7 @@ void OrderedRegisterList::insert(unsigned VR) {
 }
 
 void OrderedRegisterList::remove(unsigned VR) {
-  iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord);
+  iterator L = llvm::lower_bound(Seq, VR, Ord);
   if (L != Seq.end())
     Seq.erase(L);
 }
diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp
index e5af96468af1..cdafbc20ab86 100644
--- a/lib/Target/Hexagon/HexagonGenMux.cpp
+++ b/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -1,9 +1,8 @@
 //===- HexagonGenMux.cpp --------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -304,8 +303,8 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
     std::advance(It2, MaxX);
     MachineInstr &Def1 = *It1, &Def2 = *It2;
     MachineOperand *Src1 = &Def1.getOperand(2), *Src2 = &Def2.getOperand(2);
-    unsigned SR1 = Src1->isReg() ? Src1->getReg() : 0;
-    unsigned SR2 = Src2->isReg() ? Src2->getReg() : 0;
+    Register SR1 = Src1->isReg() ? Src1->getReg() : Register();
+    Register SR2 = Src2->isReg() ? Src2->getReg() : Register();
     bool Failure = false, CanUp = true, CanDown = true;
     for (unsigned X = MinX+1; X < MaxX; X++) {
       const DefUseInfo &DU = DUM.lookup(X);
diff --git a/lib/Target/Hexagon/HexagonGenPredicate.cpp b/lib/Target/Hexagon/HexagonGenPredicate.cpp
index c0d2de90467a..e991fa8b61c8 100644
--- a/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -1,9 +1,8 @@
 //===- HexagonGenPredicate.cpp --------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -46,17 +45,19 @@ namespace llvm {
 
 namespace {
 
-  struct Register {
+  // FIXME: Use TargetInstrInfo::RegSubRegPair
+  struct RegisterSubReg {
     unsigned R, S;
 
-    Register(unsigned r = 0, unsigned s = 0) : R(r), S(s) {}
-    Register(const MachineOperand &MO) : R(MO.getReg()), S(MO.getSubReg()) {}
+    RegisterSubReg(unsigned r = 0, unsigned s = 0) : R(r), S(s) {}
+    RegisterSubReg(const MachineOperand &MO) : R(MO.getReg()), S(MO.getSubReg()) {}
+    RegisterSubReg(const Register &Reg) : R(Reg), S(0) {}
 
-    bool operator== (const Register &Reg) const {
+    bool operator== (const RegisterSubReg &Reg) const {
       return R == Reg.R && S == Reg.S;
     }
 
-    bool operator< (const Register &Reg) const {
+    bool operator< (const RegisterSubReg &Reg) const {
       return R < Reg.R || (R == Reg.R && S < Reg.S);
     }
   };
@@ -64,10 +65,10 @@ namespace {
   struct PrintRegister {
     friend raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR);
 
-    PrintRegister(Register R, const TargetRegisterInfo &I) : Reg(R), TRI(I) {}
+    PrintRegister(RegisterSubReg R, const TargetRegisterInfo &I) : Reg(R), TRI(I) {}
 
   private:
-    Register Reg;
+    RegisterSubReg Reg;
     const TargetRegisterInfo &TRI;
   };
 
@@ -99,8 +100,8 @@ namespace {
 
   private:
     using VectOfInst = SetVector<MachineInstr *>;
-    using SetOfReg = std::set<Register>;
-    using RegToRegMap = std::map<Register, Register>;
+    using SetOfReg = std::set<RegisterSubReg>;
+    using RegToRegMap = std::map<RegisterSubReg, RegisterSubReg>;
 
     const HexagonInstrInfo *TII = nullptr;
     const HexagonRegisterInfo *TRI = nullptr;
@@ -111,12 +112,12 @@ namespace {
 
     bool isPredReg(unsigned R);
     void collectPredicateGPR(MachineFunction &MF);
-    void processPredicateGPR(const Register &Reg);
+    void processPredicateGPR(const RegisterSubReg &Reg);
     unsigned getPredForm(unsigned Opc);
     bool isConvertibleToPredForm(const MachineInstr *MI);
     bool isScalarCmp(unsigned Opc);
-    bool isScalarPred(Register PredReg);
-    Register getPredRegFor(const Register &Reg);
+    bool isScalarPred(RegisterSubReg PredReg);
+    RegisterSubReg getPredRegFor(const RegisterSubReg &Reg);
     bool convertToPredForm(MachineInstr *MI);
     bool eliminatePredCopies(MachineFunction &MF);
   };
@@ -211,7 +212,7 @@ void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) {
         case Hexagon::C2_tfrpr:
         case TargetOpcode::COPY:
           if (isPredReg(MI->getOperand(1).getReg())) {
-            Register RD = MI->getOperand(0);
+            RegisterSubReg RD = MI->getOperand(0);
             if (TargetRegisterInfo::isVirtualRegister(RD.R))
               PredGPRs.insert(RD);
           }
@@ -221,7 +222,7 @@ void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) {
   }
 }
 
-void HexagonGenPredicate::processPredicateGPR(const Register &Reg) {
+void HexagonGenPredicate::processPredicateGPR(const RegisterSubReg &Reg) {
   LLVM_DEBUG(dbgs() << __func__ << ": " << printReg(Reg.R, TRI, Reg.S) << "\n");
   using use_iterator = MachineRegisterInfo::use_iterator;
 
@@ -240,7 +241,7 @@ void HexagonGenPredicate::processPredicateGPR(const Register &Reg) {
   }
 }
 
-Register HexagonGenPredicate::getPredRegFor(const Register &Reg) {
+RegisterSubReg HexagonGenPredicate::getPredRegFor(const RegisterSubReg &Reg) {
   // Create a predicate register for a given Reg. The newly created register
   // will have its value copied from Reg, so that it can be later used as
   // an operand in other instructions.
@@ -255,7 +256,7 @@ Register HexagonGenPredicate::getPredRegFor(const Register &Reg) {
   unsigned Opc = DefI->getOpcode();
   if (Opc == Hexagon::C2_tfrpr || Opc == TargetOpcode::COPY) {
     assert(DefI->getOperand(0).isDef() && DefI->getOperand(1).isUse());
-    Register PR = DefI->getOperand(1);
+    RegisterSubReg PR = DefI->getOperand(1);
     G2P.insert(std::make_pair(Reg, PR));
     LLVM_DEBUG(dbgs() << " -> " << PrintRegister(PR, *TRI) << '\n');
     return PR;
@@ -272,10 +273,10 @@ Register HexagonGenPredicate::getPredRegFor(const Register &Reg) {
     MachineBasicBlock::iterator DefIt = DefI;
     BuildMI(B, std::next(DefIt), DL, TII->get(TargetOpcode::COPY), NewPR)
       .addReg(Reg.R, 0, Reg.S);
-    G2P.insert(std::make_pair(Reg, Register(NewPR)));
-    LLVM_DEBUG(dbgs() << " -> !" << PrintRegister(Register(NewPR), *TRI)
+    G2P.insert(std::make_pair(Reg, RegisterSubReg(NewPR)));
+    LLVM_DEBUG(dbgs() << " -> !" << PrintRegister(RegisterSubReg(NewPR), *TRI)
                       << '\n');
-    return Register(NewPR);
+    return RegisterSubReg(NewPR);
   }
 
   llvm_unreachable("Invalid argument");
@@ -317,12 +318,12 @@ bool HexagonGenPredicate::isScalarCmp(unsigned Opc) {
   return false;
 }
 
-bool HexagonGenPredicate::isScalarPred(Register PredReg) {
-  std::queue<Register> WorkQ;
+bool HexagonGenPredicate::isScalarPred(RegisterSubReg PredReg) {
+  std::queue<RegisterSubReg> WorkQ;
   WorkQ.push(PredReg);
 
   while (!WorkQ.empty()) {
-    Register PR = WorkQ.front();
+    RegisterSubReg PR = WorkQ.front();
     WorkQ.pop();
     const MachineInstr *DefI = MRI->getVRegDef(PR.R);
     if (!DefI)
@@ -351,7 +352,7 @@ bool HexagonGenPredicate::isScalarPred(Register PredReg) {
         // Add operands to the queue.
         for (const MachineOperand &MO : DefI->operands())
           if (MO.isReg() && MO.isUse())
-            WorkQ.push(Register(MO.getReg()));
+            WorkQ.push(RegisterSubReg(MO.getReg()));
         break;
 
       // All non-vector compares are ok, everything else is bad.
@@ -373,7 +374,7 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
     MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg() || !MO.isUse())
       continue;
-    Register Reg(MO);
+    RegisterSubReg Reg(MO);
     if (Reg.S && Reg.S != Hexagon::isub_lo)
       return false;
     if (!PredGPRs.count(Reg))
@@ -400,7 +401,7 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
     // If it's a scalar predicate register, then all bits in it are
     // the same. Otherwise, to determine whether all bits are 0 or not
     // we would need to use any8.
-    Register PR = getPredRegFor(MI->getOperand(1));
+    RegisterSubReg PR = getPredRegFor(MI->getOperand(1));
     if (!isScalarPred(PR))
       return false;
     // This will skip the immediate argument when creating the predicate
@@ -411,19 +412,19 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
   // Some sanity: check that def is in operand #0.
   MachineOperand &Op0 = MI->getOperand(0);
   assert(Op0.isDef());
-  Register OutR(Op0);
+  RegisterSubReg OutR(Op0);
 
   // Don't use getPredRegFor, since it will create an association between
   // the argument and a created predicate register (i.e. it will insert a
   // copy if a new predicate register is created).
   const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
-  Register NewPR = MRI->createVirtualRegister(PredRC);
+  RegisterSubReg NewPR = MRI->createVirtualRegister(PredRC);
   MachineInstrBuilder MIB = BuildMI(B, MI, DL, TII->get(NewOpc), NewPR.R);
 
   // Add predicate counterparts of the GPRs.
   for (unsigned i = 1; i < NumOps; ++i) {
-    Register GPR = MI->getOperand(i);
-    Register Pred = getPredRegFor(GPR);
+    RegisterSubReg GPR = MI->getOperand(i);
+    RegisterSubReg Pred = getPredRegFor(GPR);
     MIB.addReg(Pred.R, 0, Pred.S);
   }
   LLVM_DEBUG(dbgs() << "generated: " << *MIB);
@@ -441,7 +442,7 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
   // then the output will be a predicate register.  Do not visit the
   // users of it.
   if (!isPredReg(NewOutR)) {
-    Register R(NewOutR);
+    RegisterSubReg R(NewOutR);
     PredGPRs.insert(R);
     processPredicateGPR(R);
   }
@@ -468,8 +469,8 @@ bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) {
     for (MachineInstr &MI : MBB) {
       if (MI.getOpcode() != TargetOpcode::COPY)
         continue;
-      Register DR = MI.getOperand(0);
-      Register SR = MI.getOperand(1);
+      RegisterSubReg DR = MI.getOperand(0);
+      RegisterSubReg SR = MI.getOperand(1);
       if (!TargetRegisterInfo::isVirtualRegister(DR.R))
         continue;
       if (!TargetRegisterInfo::isVirtualRegister(SR.R))
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 239cf49ca8a2..cecbaedb6d70 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -1,9 +1,8 @@
 //===- HexagonHardwareLoops.cpp - Identify and generate hardware loops ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/HexagonHazardRecognizer.cpp b/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
index 44f1f554c662..e45126bec6ef 100644
--- a/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
+++ b/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
@@ -1,9 +1,8 @@
 //===-- HexagonHazardRecognizer.cpp - Hexagon Post RA Hazard Recognizer ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/HexagonHazardRecognizer.h b/lib/Target/Hexagon/HexagonHazardRecognizer.h
index 2874d73ce819..53b9cb43b4b6 100644
--- a/lib/Target/Hexagon/HexagonHazardRecognizer.h
+++ b/lib/Target/Hexagon/HexagonHazardRecognizer.h
@@ -1,9 +1,8 @@
 //===--- HexagonHazardRecognizer.h - Hexagon Post RA Hazard Recognizer ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This file defines the hazard recognizer for scheduling on Hexagon.
diff --git a/lib/Target/Hexagon/HexagonIICHVX.td b/lib/Target/Hexagon/HexagonIICHVX.td
index a804c5a80d03..06e9c83cf306 100644
--- a/lib/Target/Hexagon/HexagonIICHVX.td
+++ b/lib/Target/Hexagon/HexagonIICHVX.td
@@ -1,9 +1,8 @@
 //===--- HexagonIICHVX.td -------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -17,12 +16,14 @@ class HVXItin {
        InstrStage<1, [CVI_XLANE,CVI_SHIFT, CVI_MPY0, CVI_MPY1]>],
       [9, 7, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>,
 
-    // Used by Gather Pseudo Instructions which are expanded into
-    // V6_vgather* and V6_vS32b_new_ai. Even though these instructions
-    // use CVI_ST resource, it's not included below to avoid having more than
-    // 4 InstrStages and thus changing 'MaxResTerms' to 5.
+    // Used by gather pseudo-instructions which are expanded into V6_vgather*
+    // and V6_vS32b_new_ai. Even though these instructions use CVI_LD resource,
+    // it's not included below to avoid having more than 4 InstrStages and
+    // thus changing 'MaxResTerms' to 5. Instead, both SLOT0 and SLOT1 are
+    // used, which should be sufficient.
     InstrItinData <CVI_GATHER_PSEUDO,
       [InstrStage<1, [SLOT0], 0>,
-       InstrStage<1, [CVI_LD], 0>, InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_ST], 0>,
        InstrStage<1, [CVI_MPY01, CVI_XLSHF]>]>];
 }
diff --git a/lib/Target/Hexagon/HexagonIICScalar.td b/lib/Target/Hexagon/HexagonIICScalar.td
index 5fe713346e38..d37cc3a2cc3e 100644
--- a/lib/Target/Hexagon/HexagonIICScalar.td
+++ b/lib/Target/Hexagon/HexagonIICScalar.td
@@ -1,9 +1,8 @@
 //===--- HexagonIICScalar.td ----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 470b05bda4c6..605fcfc25559 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
 //===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -849,6 +848,9 @@ void HexagonDAGToDAGISel::SelectD2P(SDNode *N) {
 void HexagonDAGToDAGISel::SelectV2Q(SDNode *N) {
   const SDLoc &dl(N);
   MVT ResTy = N->getValueType(0).getSimpleVT();
+  // The argument to V2Q should be a single vector.
+  MVT OpTy = N->getOperand(0).getValueType().getSimpleVT(); (void)OpTy;
+  assert(HST->getVectorLength() * 8 == OpTy.getSizeInBits());
 
   SDValue C = CurDAG->getTargetConstant(-1, dl, MVT::i32);
   SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
@@ -860,6 +862,8 @@ void HexagonDAGToDAGISel::SelectV2Q(SDNode *N) {
 void HexagonDAGToDAGISel::SelectQ2V(SDNode *N) {
   const SDLoc &dl(N);
   MVT ResTy = N->getValueType(0).getSimpleVT();
+  // The result of V2Q should be a single vector.
+  assert(HST->getVectorLength() * 8 == ResTy.getSizeInBits());
 
   SDValue C = CurDAG->getTargetConstant(-1, dl, MVT::i32);
   SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.h b/lib/Target/Hexagon/HexagonISelDAGToDAG.h
index f4f09dd4e758..65edb09603b3 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.h
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.h
@@ -1,9 +1,8 @@
 //===-- HexagonISelDAGToDAG.h -----------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Hexagon specific code to select Hexagon machine instructions for
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index b796e442d4fa..e7f1c345af1d 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -1,9 +1,8 @@
 //===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 1edf3e498dfa..fef5a98cdb00 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1,9 +1,8 @@
 //===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -579,7 +578,8 @@ HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
   unsigned LR = HRI.getRARegister();
 
-  if (Op.getOpcode() != ISD::INLINEASM || HMFI.hasClobberLR())
+  if ((Op.getOpcode() != ISD::INLINEASM &&
+       Op.getOpcode() != ISD::INLINEASM_BR) || HMFI.hasClobberLR())
     return Op;
 
   unsigned NumOps = Op.getNumOperands();
@@ -1292,6 +1292,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::BUILD_PAIR,           MVT::i64,   Expand);
   setOperationAction(ISD::SIGN_EXTEND_INREG,    MVT::i1,    Expand);
   setOperationAction(ISD::INLINEASM,            MVT::Other, Custom);
+  setOperationAction(ISD::INLINEASM_BR,         MVT::Other, Custom);
   setOperationAction(ISD::PREFETCH,             MVT::Other, Custom);
   setOperationAction(ISD::READCYCLECOUNTER,     MVT::i64,   Custom);
   setOperationAction(ISD::INTRINSIC_VOID,       MVT::Other, Custom);
@@ -1324,7 +1325,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   if (EmitJumpTables)
     setMinimumJumpTableEntries(MinimumJumpTables);
   else
-    setMinimumJumpTableEntries(std::numeric_limits<int>::max());
+    setMinimumJumpTableEntries(std::numeric_limits<unsigned>::max());
   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
 
   setOperationAction(ISD::ABS, MVT::i32, Legal);
@@ -1333,8 +1334,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   // Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
   // but they only operate on i64.
   for (MVT VT : MVT::integer_valuetypes()) {
-    setOperationAction(ISD::UADDO,    VT, Expand);
-    setOperationAction(ISD::USUBO,    VT, Expand);
+    setOperationAction(ISD::UADDO,    VT, Custom);
+    setOperationAction(ISD::USUBO,    VT, Custom);
     setOperationAction(ISD::SADDO,    VT, Expand);
     setOperationAction(ISD::SSUBO,    VT, Expand);
     setOperationAction(ISD::ADDCARRY, VT, Expand);
@@ -2619,7 +2620,6 @@ HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
   const SDLoc &dl(Op);
   const DataLayout &DL = DAG.getDataLayout();
   LLVMContext &Ctx = *DAG.getContext();
-  unsigned AS = LN->getAddressSpace();
 
   // If the load aligning is disabled or the load can be broken up into two
   // smaller legal loads, do the default (target-independent) expansion.
@@ -2629,15 +2629,15 @@ HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
     DoDefault = true;
 
   if (!AlignLoads) {
-    if (allowsMemoryAccess(Ctx, DL, LN->getMemoryVT(), AS, HaveAlign))
+    if (allowsMemoryAccess(Ctx, DL, LN->getMemoryVT(), *LN->getMemOperand()))
       return Op;
     DoDefault = true;
   }
-  if (!DoDefault && 2*HaveAlign == NeedAlign) {
+  if (!DoDefault && (2 * HaveAlign) == NeedAlign) {
     // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
-    MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8*HaveAlign)
+    MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8 * HaveAlign)
                                 : MVT::getVectorVT(MVT::i8, HaveAlign);
-    DoDefault = allowsMemoryAccess(Ctx, DL, PartTy, AS, HaveAlign);
+    DoDefault = allowsMemoryAccess(Ctx, DL, PartTy, *LN->getMemOperand());
   }
   if (DoDefault) {
     std::pair<SDValue, SDValue> P = expandUnalignedLoad(LN, DAG);
@@ -2691,6 +2691,43 @@ HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
   return M;
 }
 
+SDValue
+HexagonTargetLowering::LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const {
+  SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
+  auto *CY = dyn_cast<ConstantSDNode>(Y);
+  if (!CY)
+    return SDValue();
+
+  const SDLoc &dl(Op);
+  SDVTList VTs = Op.getNode()->getVTList();
+  assert(VTs.NumVTs == 2);
+  assert(VTs.VTs[1] == MVT::i1);
+  unsigned Opc = Op.getOpcode();
+
+  if (CY) {
+    uint32_t VY = CY->getZExtValue();
+    assert(VY != 0 && "This should have been folded");
+    // X +/- 1
+    if (VY != 1)
+      return SDValue();
+
+    if (Opc == ISD::UADDO) {
+      SDValue Op = DAG.getNode(ISD::ADD, dl, VTs.VTs[0], {X, Y});
+      SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op, getZero(dl, ty(Op), DAG),
+                                ISD::SETEQ);
+      return DAG.getMergeValues({Op, Ov}, dl);
+    }
+    if (Opc == ISD::USUBO) {
+      SDValue Op = DAG.getNode(ISD::SUB, dl, VTs.VTs[0], {X, Y});
+      SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op,
+                                DAG.getConstant(-1, dl, ty(Op)), ISD::SETEQ);
+      return DAG.getMergeValues({Op, Ov}, dl);
+    }
+  }
+
+  return SDValue();
+}
+
 SDValue
 HexagonTargetLowering::LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const {
   const SDLoc &dl(Op);
@@ -2741,7 +2778,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   unsigned Opc = Op.getOpcode();
 
   // Handle INLINEASM first.
-  if (Opc == ISD::INLINEASM)
+  if (Opc == ISD::INLINEASM || Opc == ISD::INLINEASM_BR)
     return LowerINLINEASM(Op, DAG);
 
   if (isHvxOperation(Op)) {
@@ -2768,6 +2805,8 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     case ISD::BITCAST:              return LowerBITCAST(Op, DAG);
     case ISD::LOAD:                 return LowerLoad(Op, DAG);
     case ISD::STORE:                return LowerStore(Op, DAG);
+    case ISD::UADDO:
+    case ISD::USUBO:                return LowerUAddSubO(Op, DAG);
     case ISD::ADDCARRY:
     case ISD::SUBCARRY:             return LowerAddSubCarry(Op, DAG);
     case ISD::SRA:
@@ -2923,7 +2962,8 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
 /// isFPImmLegal - Returns true if the target can instruction select the
 /// specified FP immediate natively. If false, the legalizer will
 /// materialize the FP immediate as a load from a constant pool.
-bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+                                         bool ForCodeSize) const {
   return true;
 }
 
@@ -3047,7 +3087,7 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
 /// determined using generic target-independent logic.
 EVT HexagonTargetLowering::getOptimalMemOpType(uint64_t Size,
       unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset,
-      bool MemcpyStrSrc, MachineFunction &MF) const {
+      bool MemcpyStrSrc, const AttributeList &FuncAttributes) const {
 
   auto Aligned = [](unsigned GivenA, unsigned MinA) -> bool {
     return (GivenA % MinA) == 0;
@@ -3063,8 +3103,9 @@ EVT HexagonTargetLowering::getOptimalMemOpType(uint64_t Size,
   return MVT::Other;
 }
 
-bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
-      unsigned AS, unsigned Align, bool *Fast) const {
+bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(
+    EVT VT, unsigned AS, unsigned Align, MachineMemOperand::Flags Flags,
+    bool *Fast) const {
   if (Fast)
     *Fast = false;
   return Subtarget.isHVXVectorType(VT.getSimpleVT());
@@ -3111,13 +3152,21 @@ Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
       AtomicOrdering Ord) const {
   BasicBlock *BB = Builder.GetInsertBlock();
   Module *M = BB->getParent()->getParent();
-  Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
+  auto PT = cast<PointerType>(Addr->getType());
+  Type *Ty = PT->getElementType();
   unsigned SZ = Ty->getPrimitiveSizeInBits();
   assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported");
   Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked
                                    : Intrinsic::hexagon_L4_loadd_locked;
-  Value *Fn = Intrinsic::getDeclaration(M, IntID);
-  return Builder.CreateCall(Fn, Addr, "larx");
+  Function *Fn = Intrinsic::getDeclaration(M, IntID);
+
+  PointerType *NewPtrTy
+    = Builder.getIntNTy(SZ)->getPointerTo(PT->getAddressSpace());
+  Addr = Builder.CreateBitCast(Addr, NewPtrTy);
+
+  Value *Call = Builder.CreateCall(Fn, Addr, "larx");
+
+  return Builder.CreateBitCast(Call, Ty);
 }
 
 /// Perform a store-conditional operation to Addr. Return the status of the
@@ -3128,10 +3177,17 @@ Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder,
   Module *M = BB->getParent()->getParent();
   Type *Ty = Val->getType();
   unsigned SZ = Ty->getPrimitiveSizeInBits();
+
+  Type *CastTy = Builder.getIntNTy(SZ);
   assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported");
   Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked
                                    : Intrinsic::hexagon_S4_stored_locked;
-  Value *Fn = Intrinsic::getDeclaration(M, IntID);
+  Function *Fn = Intrinsic::getDeclaration(M, IntID);
+
+  unsigned AS = Addr->getType()->getPointerAddressSpace();
+  Addr = Builder.CreateBitCast(Addr, CastTy->getPointerTo(AS));
+  Val = Builder.CreateBitCast(Val, CastTy);
+
   Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx");
   Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), "");
   Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext()));
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 265c37e6ae61..4e467cb22727 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -1,9 +1,8 @@
 //===-- HexagonISelLowering.h - Hexagon DAG Lowering Interface --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -168,6 +167,7 @@ namespace HexagonISD {
     SDValue LowerLoad(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerStore(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
@@ -285,7 +285,8 @@ namespace HexagonISD {
     /// is legal.  It is frequently not legal in PIC relocation models.
     bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
 
-    bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+    bool isFPImmLegal(const APFloat &Imm, EVT VT,
+                      bool ForCodeSize) const override;
 
     /// isLegalICmpImmediate - Return true if the specified immediate is legal
     /// icmp immediate, that is the target has icmp instructions which can
@@ -295,10 +296,10 @@ namespace HexagonISD {
 
     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
         unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
-        MachineFunction &MF) const override;
+        const AttributeList &FuncAttributes) const override;
 
     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
-        unsigned Align, bool *Fast) const override;
+        unsigned Align, MachineMemOperand::Flags Flags, bool *Fast) const override;
 
     /// Returns relocation base for the given PIC jumptable.
     SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)
diff --git a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index a6400b5d8266..345c657787a0 100644
--- a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -1,9 +1,8 @@
 //===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -1542,6 +1541,8 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
       case ISD::SRL:
       case ISD::SETCC:
       case ISD::VSELECT:
+      case ISD::SIGN_EXTEND:
+      case ISD::ZERO_EXTEND:
       case ISD::SIGN_EXTEND_INREG:
         return SplitHvxPairOp(Op, DAG);
     }
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
index 2236140d5dd7..f156de671059 100644
--- a/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -1,9 +1,8 @@
 //==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV5.td b/lib/Target/Hexagon/HexagonInstrFormatsV5.td
index c8de5cbcc1e0..68ef2d2d3a8a 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV5.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV5.td
@@ -1,9 +1,8 @@
 //==- HexagonInstrFormatsV5.td - Hexagon Instruction Formats --*- tablegen -==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/lib/Target/Hexagon/HexagonInstrFormatsV60.td
index 1347a655353f..86a82183a1ad 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV60.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV60.td
@@ -1,9 +1,8 @@
 //==- HexagonInstrFormatsV60.td - Hexagon Instruction Formats -*- tablegen -==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV65.td b/lib/Target/Hexagon/HexagonInstrFormatsV65.td
index cddb8777b417..eaecffe9c89e 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV65.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV65.td
@@ -1,9 +1,8 @@
 //==- HexagonInstrFormatsV65.td - Hexagon Instruction Formats -*- tablegen -==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index de0d6c4d9e4e..a156de5ba128 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -1,9 +1,8 @@
 //===- HexagonInstrInfo.cpp - Hexagon Instruction Information -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -698,11 +697,11 @@ bool HexagonInstrInfo::analyzeLoop(MachineLoop &L,
 /// Generate code to reduce the loop iteration by one and check if the loop is
 /// finished. Return the value/register of the new loop count. this function
 /// assumes the nth iteration is peeled first.
-unsigned HexagonInstrInfo::reduceLoopCount(MachineBasicBlock &MBB,
-      MachineInstr *IndVar, MachineInstr &Cmp,
-      SmallVectorImpl<MachineOperand> &Cond,
-      SmallVectorImpl<MachineInstr *> &PrevInsts,
-      unsigned Iter, unsigned MaxIter) const {
+unsigned HexagonInstrInfo::reduceLoopCount(
+    MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar,
+    MachineInstr &Cmp, SmallVectorImpl<MachineOperand> &Cond,
+    SmallVectorImpl<MachineInstr *> &PrevInsts, unsigned Iter,
+    unsigned MaxIter) const {
   // We expect a hardware loop currently. This means that IndVar is set
   // to null, and the compare is the ENDLOOP instruction.
   assert((!IndVar) && isEndLoopN(Cmp.getOpcode())
@@ -1314,6 +1313,38 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
       return true;
     }
 
+    case Hexagon::PS_crash: {
+      // Generate a misaligned load that is guaranteed to cause a crash.
+      class CrashPseudoSourceValue : public PseudoSourceValue {
+      public:
+        CrashPseudoSourceValue(const TargetInstrInfo &TII)
+          : PseudoSourceValue(TargetCustom, TII) {}
+
+        bool isConstant(const MachineFrameInfo *) const override {
+          return false;
+        }
+        bool isAliased(const MachineFrameInfo *) const override {
+          return false;
+        }
+        bool mayAlias(const MachineFrameInfo *) const override {
+          return false;
+        }
+        void printCustom(raw_ostream &OS) const override {
+          OS << "MisalignedCrash";
+        }
+      };
+
+      static const CrashPseudoSourceValue CrashPSV(*this);
+      MachineMemOperand *MMO = MF.getMachineMemOperand(
+          MachinePointerInfo(&CrashPSV),
+          MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 8, 1);
+      BuildMI(MBB, MI, DL, get(Hexagon::PS_loadrdabs), Hexagon::D13)
+        .addImm(0xBADC0FEE)  // Misaligned load.
+        .addMemOperand(MMO);
+      MBB.erase(MI);
+      return true;
+    }
+
     case Hexagon::PS_tailcall_i:
       MI.setDesc(get(Hexagon::J2_jump));
       return true;
@@ -1681,17 +1712,19 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
 /// Hexagon counts the number of ##'s and adjust for that many
 /// constant exenders.
 unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str,
-      const MCAsmInfo &MAI) const {
+                                              const MCAsmInfo &MAI,
+                                              const TargetSubtargetInfo *STI) const {
   StringRef AStr(Str);
   // Count the number of instructions in the asm.
   bool atInsnStart = true;
   unsigned Length = 0;
+  const unsigned MaxInstLength = MAI.getMaxInstLength(STI);
   for (; *Str; ++Str) {
     if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
                                 strlen(MAI.getSeparatorString())) == 0)
       atInsnStart = true;
     if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
-      Length += MAI.getMaxInstLength();
+      Length += MaxInstLength;
       atInsnStart = false;
     }
     if (atInsnStart && strncmp(Str, MAI.getCommentString().data(),
@@ -1823,7 +1856,8 @@ DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState(
 //  S2_storeri_io %r29, 132, killed %r1; flags:  mem:ST4[FixedStack1]
 // Currently AA considers the addresses in these instructions to be aliasing.
 bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(
-    MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
+    const MachineInstr &MIa, const MachineInstr &MIb,
+    AliasAnalysis *AA) const {
   if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
       MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
     return false;
@@ -2425,7 +2459,7 @@ bool HexagonInstrInfo::isPredicated(unsigned Opcode) const {
 
 bool HexagonInstrInfo::isPredicateLate(unsigned Opcode) const {
   const uint64_t F = get(Opcode).TSFlags;
-  return ~(F >> HexagonII::PredicateLatePos) & HexagonII::PredicateLateMask;
+  return (F >> HexagonII::PredicateLatePos) & HexagonII::PredicateLateMask;
 }
 
 bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const {
@@ -2894,7 +2928,7 @@ bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1,
 
 /// Get the base register and byte offset of a load/store instr.
 bool HexagonInstrInfo::getMemOperandWithOffset(
-    MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset,
+    const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
     const TargetRegisterInfo *TRI) const {
   unsigned AccessSize = 0;
   BaseOp = getBaseAndOffset(LdSt, Offset, AccessSize);
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 9b840762e88a..e0a999d0f4c4 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -1,9 +1,8 @@
 //===- HexagonInstrInfo.h - Hexagon Instruction Information -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -140,7 +139,7 @@ public:
   /// is finished.  Return the value/register of the new loop count.  We need
   /// this function when peeling off one or more iterations of a loop. This
   /// function assumes the nth iteration is peeled first.
-  unsigned reduceLoopCount(MachineBasicBlock &MBB,
+  unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader,
                            MachineInstr *IndVar, MachineInstr &Cmp,
                            SmallVectorImpl<MachineOperand> &Cond,
                            SmallVectorImpl<MachineInstr *> &PrevInsts,
@@ -216,7 +215,8 @@ public:
   bool expandPostRAPseudo(MachineInstr &MI) const override;
 
   /// Get the base register and byte offset of a load/store instr.
-  bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
+  bool getMemOperandWithOffset(const MachineInstr &LdSt,
+                               const MachineOperand *&BaseOp,
                                int64_t &Offset,
                                const TargetRegisterInfo *TRI) const override;
 
@@ -264,8 +264,10 @@ public:
 
   /// Measure the specified inline asm to determine an approximation of its
   /// length.
-  unsigned getInlineAsmLength(const char *Str,
-                              const MCAsmInfo &MAI) const override;
+  unsigned getInlineAsmLength(
+    const char *Str,
+    const MCAsmInfo &MAI,
+    const TargetSubtargetInfo *STI = nullptr) const override;
 
   /// Allocate and return a hazard recognizer to use for this target when
   /// scheduling the machine instructions after register allocation.
@@ -296,7 +298,8 @@ public:
   // memory addresses. This function returns true if two MIs access different
   // memory addresses and false otherwise.
   bool
-  areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
+  areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+                                  const MachineInstr &MIb,
                                   AliasAnalysis *AA = nullptr) const override;
 
   /// For instructions with a base and offset, return the position of the
diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td
index 9cab5748bef2..cabfd783effa 100644
--- a/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -1,9 +1,8 @@
 //===-- HexagonIntrinsics.td - Instruction intrinsics ------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV5.td b/lib/Target/Hexagon/HexagonIntrinsicsV5.td
index a852394f2160..44f39a3e9b16 100644
--- a/lib/Target/Hexagon/HexagonIntrinsicsV5.td
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV5.td
@@ -1,9 +1,8 @@
 //===- HexagonIntrinsicsV5.td - V5 Instruction intrinsics --*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV60.td b/lib/Target/Hexagon/HexagonIntrinsicsV60.td
index 5e5c77b38e8e..a60c80beb5d6 100644
--- a/lib/Target/Hexagon/HexagonIntrinsicsV60.td
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV60.td
@@ -1,9 +1,8 @@
 //=- HexagonIntrinsicsV60.td - Target Description for Hexagon -*- tablegen *-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index 985f41f3a7d9..ac48e1dc30b0 100644
--- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -1,9 +1,8 @@
 //===- HexagonLoopIdiomRecognition.cpp ------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -1001,6 +1000,7 @@ bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
 void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
       IntegerType *DestTy, BasicBlock *LoopB) {
   Type *OrigTy = In->getType();
+  assert(!OrigTy->isVoidTy() && "Invalid instruction to promote");
 
   // Leave boolean values alone.
   if (!In->getType()->isIntegerTy(1))
@@ -1081,7 +1081,8 @@ bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
   std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),
                  [](Instruction &In) { return &In; });
   for (Instruction *In : LoopIns)
-    promoteTo(In, DestTy, LoopB);
+    if (!In->isTerminator())
+      promoteTo(In, DestTy, LoopB);
 
   // Fix up the PHI nodes in the exit block.
   Instruction *EndI = ExitB->getFirstNonPHI();
@@ -1522,7 +1523,7 @@ Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
       ParsedValues &PV) {
   IRBuilder<> B(&*At);
   Module *M = At->getParent()->getParent()->getParent();
-  Value *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw);
+  Function *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw);
 
   Value *P = PV.P, *Q = PV.Q, *P0 = P;
   unsigned IC = PV.IterCount;
@@ -2252,10 +2253,8 @@ CleanupAndExit:
       Type *Int32PtrTy = Type::getInt32PtrTy(Ctx);
       Type *VoidTy = Type::getVoidTy(Ctx);
       Module *M = Func->getParent();
-      Constant *CF = M->getOrInsertFunction(HexagonVolatileMemcpyName, VoidTy,
-                                            Int32PtrTy, Int32PtrTy, Int32Ty);
-      Function *Fn = cast<Function>(CF);
-      Fn->setLinkage(Function::ExternalLinkage);
+      FunctionCallee Fn = M->getOrInsertFunction(
+          HexagonVolatileMemcpyName, VoidTy, Int32PtrTy, Int32PtrTy, Int32Ty);
 
       const SCEV *OneS = SE->getConstant(Int32Ty, 1);
       const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty);
diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp
index fb5752ade1de..d1a153920e5e 100644
--- a/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -1,9 +1,8 @@
 //===- HexagonMCInstLower.cpp - Convert Hexagon MachineInstr to an MCInst -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp b/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp
index 9579c8b6df16..aabae009d7c3 100644
--- a/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
 //= HexagonMachineFunctionInfo.cpp - Hexagon machine function info *- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index d83bcbc41553..2961e16cc9dc 100644
--- a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -1,9 +1,8 @@
 //=- HexagonMachineFunctionInfo.h - Hexagon machine function info -*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 908ce24136c7..0e6555024303 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -1,9 +1,8 @@
 //===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -113,6 +112,7 @@ bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {
   case TargetOpcode::IMPLICIT_DEF:
   case TargetOpcode::COPY:
   case TargetOpcode::INLINEASM:
+  case TargetOpcode::INLINEASM_BR:
     break;
   }
 
@@ -168,6 +168,7 @@ bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) {
   case TargetOpcode::EH_LABEL:
   case TargetOpcode::COPY:
   case TargetOpcode::INLINEASM:
+  case TargetOpcode::INLINEASM_BR:
     break;
   }
   Packet.push_back(SU);
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
index 585a7858ad2b..fb0a7abd339b 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -1,9 +1,8 @@
 //===- HexagonMachineScheduler.h - Custom Hexagon MI scheduler --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/HexagonMapAsm2IntrinV62.gen.td b/lib/Target/Hexagon/HexagonMapAsm2IntrinV62.gen.td
index b7b0de0efaea..2fcefe6a4ef6 100644
--- a/lib/Target/Hexagon/HexagonMapAsm2IntrinV62.gen.td
+++ b/lib/Target/Hexagon/HexagonMapAsm2IntrinV62.gen.td
@@ -1,9 +1,8 @@
 //===--- HexagonMapAsm2IntrinV62.gen.td -----------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td b/lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td
index c29a75e6fe74..7293075532c6 100644
--- a/lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td
+++ b/lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td
@@ -1,9 +1,8 @@
 //===--- HexagonMapAsm2IntrinV65.gen.td -----------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index f2a6627c99be..db44901ca706 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -1,9 +1,8 @@
 //===- HexagonNewValueJump.cpp - Hexagon Backend New Value Jump -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td
index 232946ec1579..212cf03bee67 100644
--- a/lib/Target/Hexagon/HexagonOperands.td
+++ b/lib/Target/Hexagon/HexagonOperands.td
@@ -1,9 +1,8 @@
 //===--- HexagonOperands.td -----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index c3a5bd5d57bf..547da9fd598f 100644
--- a/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -1,9 +1,8 @@
 //===- HexagonOptAddrMode.cpp ---------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This implements a Hexagon-specific pass to optimize addressing mode for
diff --git a/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp b/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
index 101de3d8fbee..d00fc23102a5 100644
--- a/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
+++ b/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
@@ -1,9 +1,8 @@
 //===- HexagonOptimizeSZextends.cpp - Remove unnecessary argument extends -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td
index 89177564057e..fb731f56bfbf 100644
--- a/lib/Target/Hexagon/HexagonPatterns.td
+++ b/lib/Target/Hexagon/HexagonPatterns.td
@@ -1,9 +1,8 @@
 //==- HexagonPatterns.td - Target Description for Hexagon -*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -279,7 +278,7 @@ class Su_ni1<PatFrag Op>
             if (hasOneUse(N)){
               // Check if Op1 is an immediate operand.
               SDValue Op1 = N->getOperand(1);
-              return !dyn_cast<ConstantSDNode>(Op1);
+              return !isa<ConstantSDNode>(Op1);
             }
             return false;}],
             Op.OperandTransform>;
@@ -3082,7 +3081,7 @@ def: Pat<(HexagonALLOCA I32:$Rs, (i32 imm:$A)),
 def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>;
 def: Pat<(HexagonBARRIER), (Y2_barrier)>;
 
-def: Pat<(trap), (J2_trap0 (i32 0))>;
+def: Pat<(trap), (PS_crash)>;
 
 // Read cycle counter.
 def SDTInt64Leaf: SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
diff --git a/lib/Target/Hexagon/HexagonPatternsV65.td b/lib/Target/Hexagon/HexagonPatternsV65.td
index 50b76847b563..4cd45ecbe1a1 100644
--- a/lib/Target/Hexagon/HexagonPatternsV65.td
+++ b/lib/Target/Hexagon/HexagonPatternsV65.td
@@ -1,9 +1,8 @@
 //==- HexagonPatternsV65.td -------------------------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index 3c588a89b0da..8f761d2d4805 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -1,9 +1,8 @@
 //===-- HexagonPeephole.cpp - Hexagon Peephole Optimiztions ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 // This peephole pass optimizes in the following cases.
 // 1. Optimizes redundant sign extends for the following case
diff --git a/lib/Target/Hexagon/HexagonPseudo.td b/lib/Target/Hexagon/HexagonPseudo.td
index b9748c7e189c..7dd25d7d93d5 100644
--- a/lib/Target/Hexagon/HexagonPseudo.td
+++ b/lib/Target/Hexagon/HexagonPseudo.td
@@ -1,9 +1,8 @@
 //===--- HexagonPseudo.td -------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -560,3 +559,8 @@ defm PS_storerh : NewCircularStore<IntRegs, HalfWordAccess>;
 defm PS_storerf : NewCircularStore<IntRegs, HalfWordAccess>;
 defm PS_storeri : NewCircularStore<IntRegs, WordAccess>;
 defm PS_storerd : NewCircularStore<DoubleRegs, WordAccess>;
+
+// A pseudo that generates a runtime crash. This is used to implement
+// __builtin_trap.
+let hasSideEffects = 1, isPseudo = 1, isCodeGenOnly = 1, isSolo = 1 in
+def PS_crash: InstHexagon<(outs), (ins), "", [], "", PSEUDO, TypePSEUDO>;
diff --git a/lib/Target/Hexagon/HexagonRDFOpt.cpp b/lib/Target/Hexagon/HexagonRDFOpt.cpp
index 413bc8edf2b6..910a17540e6e 100644
--- a/lib/Target/Hexagon/HexagonRDFOpt.cpp
+++ b/lib/Target/Hexagon/HexagonRDFOpt.cpp
@@ -1,9 +1,8 @@
 //===- HexagonRDFOpt.cpp --------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 9b8f4e07376f..4f5f750e5842 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===-- HexagonRegisterInfo.cpp - Hexagon Register Information ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -287,7 +286,7 @@ unsigned HexagonRegisterInfo::getRARegister() const {
 }
 
 
-unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction
+Register HexagonRegisterInfo::getFrameRegister(const MachineFunction
                                                &MF) const {
   const HexagonFrameLowering *TFI = getFrameLowering(MF);
   if (TFI->hasFP(MF))
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index 3e7b63a462f0..fc166b5a3410 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -1,9 +1,8 @@
 //==- HexagonRegisterInfo.h - Hexagon Register Information Impl --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -67,7 +66,7 @@ public:
 
   // Debug information queries.
   unsigned getRARegister() const;
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
+  Register getFrameRegister(const MachineFunction &MF) const override;
   unsigned getFrameRegister() const;
   unsigned getStackRegister() const;
 
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td
index da90911e2c05..f12189052699 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -1,9 +1,8 @@
 //===-- HexagonRegisterInfo.td - Hexagon Register defs -----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td
index 1024198e9b3f..0834e9000460 100644
--- a/lib/Target/Hexagon/HexagonSchedule.td
+++ b/lib/Target/Hexagon/HexagonSchedule.td
@@ -1,9 +1,8 @@
 //===- HexagonSchedule.td - Hexagon Scheduling Definitions -*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonScheduleV5.td b/lib/Target/Hexagon/HexagonScheduleV5.td
index 9a893f6dde02..ba0da2c196ab 100644
--- a/lib/Target/Hexagon/HexagonScheduleV5.td
+++ b/lib/Target/Hexagon/HexagonScheduleV5.td
@@ -1,9 +1,8 @@
 //=-HexagonScheduleV5.td - HexagonV5 Scheduling Definitions --*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonScheduleV55.td b/lib/Target/Hexagon/HexagonScheduleV55.td
index ca738be5d6ef..f88dd5d2056d 100644
--- a/lib/Target/Hexagon/HexagonScheduleV55.td
+++ b/lib/Target/Hexagon/HexagonScheduleV55.td
@@ -1,9 +1,8 @@
 //=-HexagonScheduleV55.td - HexagonV55 Scheduling Definitions -*- tablegen -*=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonScheduleV60.td b/lib/Target/Hexagon/HexagonScheduleV60.td
index 861a8d2b0339..c6539597a9e7 100644
--- a/lib/Target/Hexagon/HexagonScheduleV60.td
+++ b/lib/Target/Hexagon/HexagonScheduleV60.td
@@ -1,9 +1,8 @@
 //=-HexagonScheduleV60.td - HexagonV60 Scheduling Definitions *- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonScheduleV62.td b/lib/Target/Hexagon/HexagonScheduleV62.td
index 1c274191277c..782d76760992 100644
--- a/lib/Target/Hexagon/HexagonScheduleV62.td
+++ b/lib/Target/Hexagon/HexagonScheduleV62.td
@@ -1,9 +1,8 @@
 //=-HexagonScheduleV62.td - HexagonV62 Scheduling Definitions *- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/HexagonScheduleV65.td b/lib/Target/Hexagon/HexagonScheduleV65.td
index 46a79d521795..ac64410e559b 100644
--- a/lib/Target/Hexagon/HexagonScheduleV65.td
+++ b/lib/Target/Hexagon/HexagonScheduleV65.td
@@ -1,9 +1,8 @@
 //=-HexagonScheduleV65.td - HexagonV65 Scheduling Definitions *- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonScheduleV66.td b/lib/Target/Hexagon/HexagonScheduleV66.td
index 38e3d21d3701..56dc59e2a948 100644
--- a/lib/Target/Hexagon/HexagonScheduleV66.td
+++ b/lib/Target/Hexagon/HexagonScheduleV66.td
@@ -1,9 +1,8 @@
 //=-HexagonScheduleV66.td - HexagonV66 Scheduling Definitions *- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
index 002e87fb32ce..c5ba7ced4c30 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -1,9 +1,8 @@
 //===-- HexagonSelectionDAGInfo.cpp - Hexagon SelectionDAG Info -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
index a83a8efb7588..af8b8318b059 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -1,9 +1,8 @@
 //===-- HexagonSelectionDAGInfo.h - Hexagon SelectionDAG Info ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index 55de25120943..bd4254aea276 100644
--- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -1,9 +1,8 @@
 //=== HexagonSplitConst32AndConst64.cpp - split CONST32/Const64 into HI/LO ===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp
index e018785f24d8..013eede2d414 100644
--- a/lib/Target/Hexagon/HexagonSplitDouble.cpp
+++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -1,9 +1,8 @@
 //===- HexagonSplitDouble.cpp ---------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -153,8 +152,8 @@ bool HexagonSplitDoubleRegs::isInduction(unsigned Reg, LoopRegMap &IRM) const {
 }
 
 bool HexagonSplitDoubleRegs::isVolatileInstr(const MachineInstr *MI) const {
-  for (auto &I : MI->memoperands())
-    if (I->isVolatile())
+  for (auto &MO : MI->memoperands())
+    if (MO->isVolatile() || MO->isAtomic())
       return true;
   return false;
 }
diff --git a/lib/Target/Hexagon/HexagonStoreWidening.cpp b/lib/Target/Hexagon/HexagonStoreWidening.cpp
index 61c2121163b8..b8b61517ff95 100644
--- a/lib/Target/Hexagon/HexagonStoreWidening.cpp
+++ b/lib/Target/Hexagon/HexagonStoreWidening.cpp
@@ -1,9 +1,8 @@
 //===- HexagonStoreWidening.cpp -------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Replace sequences of "narrow" stores to adjacent memory locations with
@@ -338,8 +337,7 @@ bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin,
     return false;
 
   OG.push_back(FirstMI);
-  MachineInstr *S1 = FirstMI, *S2 = *(Begin+1);
-  InstrGroup::iterator I = Begin+1;
+  MachineInstr *S1 = FirstMI;
 
   // Pow2Num will be the largest number of elements in OG such that the sum
   // of sizes of stores 0...Pow2Num-1 will be a power of 2.
@@ -351,8 +349,8 @@ bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin,
   // does not exceed the limit (MaxSize).
   // Keep track of when the total size covered is a power of 2, since
   // this is a size a single store can cover.
-  while (I != End) {
-    S2 = *I;
+  for (InstrGroup::iterator I = Begin + 1; I != End; ++I) {
+    MachineInstr *S2 = *I;
     // Stores are sorted, so if S1 and S2 are not adjacent, there won't be
     // any other store to fill the "hole".
     if (!storesAreAdjacent(S1, S2))
@@ -372,7 +370,6 @@ bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin,
       break;
 
     S1 = S2;
-    ++I;
   }
 
   // The stores don't add up to anything that can be widened.  Clean up.
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 9c77135c2f2f..7ec63a642b0c 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -1,9 +1,8 @@
 //===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index 3a5acb53682c..007423ef1902 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -1,9 +1,8 @@
 //===- HexagonSubtarget.h - Define Subtarget for the Hexagon ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index ddfda7e27793..80b8480448fe 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -1,9 +1,8 @@
 //===-- HexagonTargetMachine.cpp - Define TargetMachine for Hexagon -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,6 +16,7 @@
 #include "HexagonMachineScheduler.h"
 #include "HexagonTargetObjectFile.h"
 #include "HexagonTargetTransformInfo.h"
+#include "TargetInfo/HexagonTargetInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/LegacyPassManager.h"
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index a7c6a3437fbc..7ee4474e90e3 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -1,9 +1,8 @@
 //=-- HexagonTargetMachine.h - Define TargetMachine for Hexagon ---*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index 2185bf8eebc6..fdcc41a4ca41 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -1,9 +1,8 @@
 //===-- HexagonTargetObjectFile.cpp ---------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -239,10 +238,7 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
     return false;
   }
 
-  Type *GType = GVar->getType();
-  if (PointerType *PT = dyn_cast<PointerType>(GType))
-    GType = PT->getElementType();
-
+  Type *GType = GVar->getValueType();
   if (isa<ArrayType>(GType)) {
     LLVM_DEBUG(dbgs() << "no, is an array\n");
     return false;
@@ -342,7 +338,7 @@ unsigned HexagonTargetObjectFile::getSmallestAddressableSize(const Type *Ty,
 
 MCSection *HexagonTargetObjectFile::selectSmallSectionForGlobal(
     const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
-  const Type *GTy = GO->getType()->getElementType();
+  const Type *GTy = GO->getValueType();
   unsigned Size = getSmallestAddressableSize(GTy, GO, TM);
 
   // If we have -ffunction-section or -fdata-section then we should emit the
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.h b/lib/Target/Hexagon/HexagonTargetObjectFile.h
index 18863630fde2..b36282578950 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.h
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -1,9 +1,8 @@
 //===-- HexagonTargetObjectFile.h -----------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonTargetStreamer.h b/lib/Target/Hexagon/HexagonTargetStreamer.h
index e19c404450e6..c5200b76933e 100644
--- a/lib/Target/Hexagon/HexagonTargetStreamer.h
+++ b/lib/Target/Hexagon/HexagonTargetStreamer.h
@@ -1,9 +1,8 @@
 //===-- HexagonTargetStreamer.h - Hexagon Target Streamer ------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index c942f645aa88..38062e8e922c 100644
--- a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -1,9 +1,8 @@
 //===- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 /// \file
 /// This file implements a TargetTransformInfo analysis pass specific to the
@@ -161,14 +160,15 @@ unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
     unsigned VecWidth = VecTy->getBitWidth();
     if (useHVX() && isTypeForHVX(VecTy)) {
       unsigned RegWidth = getRegisterBitWidth(true);
-      Alignment = std::min(Alignment, RegWidth/8);
+      assert(RegWidth && "Non-zero vector register width expected");
       // Cost of HVX loads.
       if (VecWidth % RegWidth == 0)
         return VecWidth / RegWidth;
       // Cost of constructing HVX vector from scalar loads.
+      Alignment = std::min(Alignment, RegWidth / 8);
       unsigned AlignWidth = 8 * std::max(1u, Alignment);
       unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
-      return 3*NumLoads;
+      return 3 * NumLoads;
     }
 
     // Non-HVX vectors.
diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index 5c6f85584ec2..27e8fc019007 100644
--- a/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -1,9 +1,8 @@
 //==- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 /// \file
 /// This file implements a TargetTransformInfo analysis pass specific to the
diff --git a/lib/Target/Hexagon/HexagonVExtract.cpp b/lib/Target/Hexagon/HexagonVExtract.cpp
index 929ac2bd0d93..a9692f42e468 100644
--- a/lib/Target/Hexagon/HexagonVExtract.cpp
+++ b/lib/Target/Hexagon/HexagonVExtract.cpp
@@ -1,9 +1,8 @@
 //===- HexagonVExtract.cpp ------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This pass will replace multiple occurrences of V6_extractw from the same
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 722699907ca0..3619e4c239d7 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -1,9 +1,8 @@
 //===- HexagonPacketizer.cpp - VLIW packetizer ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/lib/Target/Hexagon/HexagonVLIWPacketizer.h
index ca70cf967a46..daa86b6f5393 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.h
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.h
@@ -1,9 +1,8 @@
 //===- HexagonPacketizer.h - VLIW packetizer --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
index 9d1073346c72..e5df1d456c1e 100644
--- a/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
+++ b/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
@@ -1,9 +1,8 @@
 //===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -239,10 +238,17 @@ namespace {
     // used over the backedge. This is teh value that gets reused from a
     // previous iteration.
     Instruction *BackedgeInst = nullptr;
+    std::map<Instruction *, DepChain *> DepChains;
+    int Iterations = -1;
 
     ReuseValue() = default;
 
-    void reset() { Inst2Replace = nullptr; BackedgeInst = nullptr; }
+    void reset() {
+      Inst2Replace = nullptr;
+      BackedgeInst = nullptr;
+      DepChains.clear();
+      Iterations = -1;
+    }
     bool isDefined() { return Inst2Replace != nullptr; }
   };
 
@@ -289,10 +295,10 @@ namespace {
     void findDepChainFromPHI(Instruction *I, DepChain &D);
     void reuseValue();
     Value *findValueInBlock(Value *Op, BasicBlock *BB);
-    bool isDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
-    DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2);
+    DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
     bool isEquivalentOperation(Instruction *I1, Instruction *I2);
     bool canReplace(Instruction *I);
+    bool isCallInstCommutative(CallInst *C);
   };
 
 } // end anonymous namespace
@@ -327,6 +333,70 @@ bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) {
   return doVLCR();
 }
 
+bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) {
+  switch (C->getCalledFunction()->getIntrinsicID()) {
+    case Intrinsic::hexagon_V6_vaddb:
+    case Intrinsic::hexagon_V6_vaddb_128B:
+    case Intrinsic::hexagon_V6_vaddh:
+    case Intrinsic::hexagon_V6_vaddh_128B:
+    case Intrinsic::hexagon_V6_vaddw:
+    case Intrinsic::hexagon_V6_vaddw_128B:
+    case Intrinsic::hexagon_V6_vaddubh:
+    case Intrinsic::hexagon_V6_vaddubh_128B:
+    case Intrinsic::hexagon_V6_vadduhw:
+    case Intrinsic::hexagon_V6_vadduhw_128B:
+    case Intrinsic::hexagon_V6_vaddhw:
+    case Intrinsic::hexagon_V6_vaddhw_128B:
+    case Intrinsic::hexagon_V6_vmaxb:
+    case Intrinsic::hexagon_V6_vmaxb_128B:
+    case Intrinsic::hexagon_V6_vmaxh:
+    case Intrinsic::hexagon_V6_vmaxh_128B:
+    case Intrinsic::hexagon_V6_vmaxw:
+    case Intrinsic::hexagon_V6_vmaxw_128B:
+    case Intrinsic::hexagon_V6_vmaxub:
+    case Intrinsic::hexagon_V6_vmaxub_128B:
+    case Intrinsic::hexagon_V6_vmaxuh:
+    case Intrinsic::hexagon_V6_vmaxuh_128B:
+    case Intrinsic::hexagon_V6_vminub:
+    case Intrinsic::hexagon_V6_vminub_128B:
+    case Intrinsic::hexagon_V6_vminuh:
+    case Intrinsic::hexagon_V6_vminuh_128B:
+    case Intrinsic::hexagon_V6_vminb:
+    case Intrinsic::hexagon_V6_vminb_128B:
+    case Intrinsic::hexagon_V6_vminh:
+    case Intrinsic::hexagon_V6_vminh_128B:
+    case Intrinsic::hexagon_V6_vminw:
+    case Intrinsic::hexagon_V6_vminw_128B:
+    case Intrinsic::hexagon_V6_vmpyub:
+    case Intrinsic::hexagon_V6_vmpyub_128B:
+    case Intrinsic::hexagon_V6_vmpyuh:
+    case Intrinsic::hexagon_V6_vmpyuh_128B:
+    case Intrinsic::hexagon_V6_vavgub:
+    case Intrinsic::hexagon_V6_vavgub_128B:
+    case Intrinsic::hexagon_V6_vavgh:
+    case Intrinsic::hexagon_V6_vavgh_128B:
+    case Intrinsic::hexagon_V6_vavguh:
+    case Intrinsic::hexagon_V6_vavguh_128B:
+    case Intrinsic::hexagon_V6_vavgw:
+    case Intrinsic::hexagon_V6_vavgw_128B:
+    case Intrinsic::hexagon_V6_vavgb:
+    case Intrinsic::hexagon_V6_vavgb_128B:
+    case Intrinsic::hexagon_V6_vavguw:
+    case Intrinsic::hexagon_V6_vavguw_128B:
+    case Intrinsic::hexagon_V6_vabsdiffh:
+    case Intrinsic::hexagon_V6_vabsdiffh_128B:
+    case Intrinsic::hexagon_V6_vabsdiffub:
+    case Intrinsic::hexagon_V6_vabsdiffub_128B:
+    case Intrinsic::hexagon_V6_vabsdiffuh:
+    case Intrinsic::hexagon_V6_vabsdiffuh_128B:
+    case Intrinsic::hexagon_V6_vabsdiffw:
+    case Intrinsic::hexagon_V6_vabsdiffw_128B:
+      return true;
+    default:
+      return false;
+  }
+}
+
 bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
                                                           Instruction *I2) {
   if (!I1->isSameOperationAs(I2))
@@ -361,13 +431,19 @@ bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
 
 bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
   const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
-  if (II &&
-      (II->getIntrinsicID() == Intrinsic::hexagon_V6_hi ||
-       II->getIntrinsicID() == Intrinsic::hexagon_V6_lo)) {
+  if (!II)
+    return true;
+
+  switch (II->getIntrinsicID()) {
+  case Intrinsic::hexagon_V6_hi:
+  case Intrinsic::hexagon_V6_lo:
+  case Intrinsic::hexagon_V6_hi_128B:
+  case Intrinsic::hexagon_V6_lo_128B:
     LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
     return false;
+  default:
+    return true;
   }
-  return true;
 }
 void HexagonVectorLoopCarriedReuse::findValueToReuse() {
   for (auto *D : Dependences) {
@@ -428,27 +504,85 @@ void HexagonVectorLoopCarriedReuse::findValueToReuse() {
 
         int NumOperands = I->getNumOperands();
 
-        for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
-          Value *Op = I->getOperand(OpNo);
-          Instruction *OpInst = dyn_cast<Instruction>(Op);
-          if (!OpInst)
-            continue;
-
-          Value *BEOp = BEUser->getOperand(OpNo);
-          Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
-
-          if (!isDepChainBtwn(OpInst, BEOpInst, Iters)) {
-            BEUser = nullptr;
-            break;
+        // Take operands of each PNUser one by one and try to find DepChain
+        // with every operand of the BEUser. If any of the operands of BEUser
+        // has DepChain with current operand of the PNUser, break the matcher
+        // loop. Keep doing this for Every PNUser operand. If PNUser operand
+        // does not have DepChain with any of the BEUser operand, break the
+        // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate.
+        // This ensures that DepChain exist for all the PNUser operand with
+        // BEUser operand. This also ensures that DepChains are independent of
+        // the positions in PNUser and BEUser.
+        std::map<Instruction *, DepChain *> DepChains;
+        CallInst *C1 = dyn_cast<CallInst>(I);
+        if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
+          bool Found = false;
+          for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
+            Value *Op = I->getOperand(OpNo);
+            Instruction *OpInst = dyn_cast<Instruction>(Op);
+            Found = false;
+            for (int T = 0; T < NumOperands; ++T) {
+              Value *BEOp = BEUser->getOperand(T);
+              Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
+              if (!OpInst && !BEOpInst) {
+                if (Op == BEOp) {
+                  Found = true;
+                  break;
+                }
+              }
+
+              if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
+                continue;
+
+              DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
+
+              if (D) {
+                Found = true;
+                DepChains[OpInst] = D;
+                break;
+              }
+            }
+            if (!Found) {
+              BEUser = nullptr;
+              break;
+            }
+          }
+        } else {
+
+          for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
+            Value *Op = I->getOperand(OpNo);
+            Value *BEOp = BEUser->getOperand(OpNo);
+
+            Instruction *OpInst = dyn_cast<Instruction>(Op);
+            if (!OpInst) {
+              if (Op == BEOp)
+                continue;
+              // Do not allow reuse to occur when the operands may be different
+              // values.
+              BEUser = nullptr;
+              break;
+            }
+
+            Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
+            DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
+
+            if (D) {
+              DepChains[OpInst] = D;
+            } else {
+              BEUser = nullptr;
+              break;
+            }
           }
         }
         if (BEUser) {
           LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
           ReuseCandidate.Inst2Replace = I;
           ReuseCandidate.BackedgeInst = BEUser;
+          ReuseCandidate.DepChains = DepChains;
+          ReuseCandidate.Iterations = Iters;
           return;
-        } else
-          ReuseCandidate.reset();
+        }
+        ReuseCandidate.reset();
       }
     }
   }
@@ -468,27 +602,10 @@ void HexagonVectorLoopCarriedReuse::reuseValue() {
   Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
   Instruction *BEInst = ReuseCandidate.BackedgeInst;
   int NumOperands = Inst2Replace->getNumOperands();
-  std::map<Instruction *, DepChain *> DepChains;
-  int Iterations = -1;
+  std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
+  int Iterations = ReuseCandidate.Iterations;
   BasicBlock *LoopPH = CurLoop->getLoopPreheader();
-
-  for (int i = 0; i < NumOperands; ++i) {
-    Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(i));
-    if(!I)
-      continue;
-    else {
-      Instruction *J = cast<Instruction>(BEInst->getOperand(i));
-      DepChain *D = getDepChainBtwn(I, J);
-
-      assert(D &&
-             "No DepChain between corresponding operands in ReuseCandidate\n");
-      if (Iterations == -1)
-        Iterations = D->iterations();
-      assert(Iterations == D->iterations() && "Iterations mismatch");
-      DepChains[I] = D;
-    }
-  }
-
+  assert(!DepChains.empty() && "No DepChains");
   LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
 
   SmallVector<Instruction *, 4> InstsInPreheader;
@@ -597,20 +714,11 @@ void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
   }
 }
 
-bool HexagonVectorLoopCarriedReuse::isDepChainBtwn(Instruction *I1,
-                                                      Instruction *I2,
-                                                      int Iters) {
-  for (auto *D : Dependences) {
-    if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
-      return true;
-  }
-  return false;
-}
-
 DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
-                                                            Instruction *I2) {
+                                                         Instruction *I2,
+                                                         int Iters) {
   for (auto *D : Dependences) {
-    if (D->front() == I1 && D->back() == I2)
+    if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
       return D;
   }
   return nullptr;
diff --git a/lib/Target/Hexagon/HexagonVectorPrint.cpp b/lib/Target/Hexagon/HexagonVectorPrint.cpp
index 18d2f2f4acde..65a8dcd75bdc 100644
--- a/lib/Target/Hexagon/HexagonVectorPrint.cpp
+++ b/lib/Target/Hexagon/HexagonVectorPrint.cpp
@@ -1,9 +1,8 @@
 //===- HexagonVectorPrint.cpp - Generate vector printing instructions -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index af1e5429d0c2..7c0770926abe 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -1,13 +1,11 @@
 //===-- HexagonAsmBackend.cpp - Hexagon Assembler Backend -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "Hexagon.h"
 #include "HexagonFixupKinds.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonMCChecker.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index 6543d8313900..3c64893bae45 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -1,9 +1,8 @@
 //===- HexagonBaseInfo.h - Top level definitions for Hexagon ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
index e82e6b559f62..f678bf49322e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -1,14 +1,13 @@
 //===-- HexagonELFObjectWriter.cpp - Hexagon Target Descriptions ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "Hexagon.h"
 #include "MCTargetDesc/HexagonFixupKinds.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCObjectWriter.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h b/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h
index 347327669ad9..8b0ddbcb949f 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h
@@ -1,9 +1,8 @@
 //===-- HexagonFixupKinds.h - Hexagon Specific Fixup Entries --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 687e79a7dbab..6b9e63f5ac9e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -1,9 +1,8 @@
 //===- HexagonInstPrinter.cpp - Convert Hexagon MCInst to assembly syntax -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "HexagonInstPrinter.h"
-#include "HexagonAsmPrinter.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index 17af046ce090..ca32c3c1f50f 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -1,9 +1,8 @@
 //===-- HexagonInstPrinter.h - Convert Hexagon MCInst to assembly syntax --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
index 446b3b2ce668..f3da67562320 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -1,9 +1,8 @@
 //===-- HexagonMCAsmInfo.cpp - Hexagon asm properties ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
index efeff2436234..e1f0a26cf858 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
@@ -1,9 +1,8 @@
 //===-- HexagonTargetAsmInfo.h - Hexagon asm properties --------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
index 53f3cba052bc..fcd3758600c1 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
@@ -1,9 +1,8 @@
 //===----- HexagonMCChecker.cpp - Instruction bundle checking -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/HexagonMCChecker.h"
-#include "Hexagon.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "MCTargetDesc/HexagonMCShuffler.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
index 7577baace20c..bc55ade9ccd7 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
@@ -1,9 +1,8 @@
 //===- HexagonMCChecker.h - Instruction bundle checking ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 3382684803aa..95e23c99868a 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -1,14 +1,12 @@
 //===- HexagonMCCodeEmitter.cpp - Hexagon Target Descriptions -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/HexagonMCCodeEmitter.h"
-#include "Hexagon.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonFixupKinds.h"
 #include "MCTargetDesc/HexagonMCExpr.h"
@@ -378,7 +376,7 @@ void HexagonMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
   State.Bundle = &MI;
   State.Index = 0;
   size_t Last = HexagonMCInstrInfo::bundleSize(HMB) - 1;
-  uint64_t Features = computeAvailableFeatures(STI.getFeatureBits());
+  FeatureBitset Features = computeAvailableFeatures(STI.getFeatureBits());
 
   for (auto &I : HexagonMCInstrInfo::bundleInstructions(HMB)) {
     MCInst &HMI = const_cast<MCInst &>(*I.getInst());
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
index fcea63db23a3..9e86dc8e4989 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
@@ -1,9 +1,8 @@
 //===- HexagonMCCodeEmitter.h - Hexagon Target Descriptions -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -83,9 +82,10 @@ private:
   // Return parse bits for instruction `MCI' inside bundle `MCB'
   uint32_t parseBits(size_t Last, MCInst const &MCB, MCInst const &MCI) const;
 
-  uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
-  void verifyInstructionPredicates(const MCInst &MI,
-                                   uint64_t AvailableFeatures) const;
+  FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+  void
+  verifyInstructionPredicates(const MCInst &MI,
+                              const FeatureBitset &AvailableFeatures) const;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
index 3eaef9ac7410..ed571188c1e8 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -1,9 +1,8 @@
 //=== HexagonMCCompound.cpp - Hexagon Compound checker  -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -11,7 +10,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Hexagon.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "MCTargetDesc/HexagonMCShuffler.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
index f0654d612b4b..3cbb8600ce7a 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -1,9 +1,8 @@
 //===- HexagonMCDuplexInfo.cpp - Instruction bundle checking --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index f304bc50530f..f2432883af6f 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -1,9 +1,8 @@
 //=== HexagonMCELFStreamer.cpp - Hexagon subclass of MCELFStreamer -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -60,7 +59,7 @@ HexagonMCELFStreamer::HexagonMCELFStreamer(
       MCII(createHexagonMCInstrInfo()) {}
 
 void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCB,
-                                           const MCSubtargetInfo &STI, bool) {
+                                           const MCSubtargetInfo &STI) {
   assert(MCB.getOpcode() == Hexagon::BUNDLE);
   assert(HexagonMCInstrInfo::bundleSize(MCB) <= HEXAGON_PACKET_SIZE);
   assert(HexagonMCInstrInfo::bundleSize(MCB) > 0);
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
index c02bef8f06f7..6248bd25d433 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
@@ -1,9 +1,8 @@
 //===- HexagonMCELFStreamer.h - Hexagon subclass of MCElfStreamer ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -31,8 +30,7 @@ public:
                        std::unique_ptr<MCCodeEmitter> Emitter,
                        MCAssembler *Assembler);
 
-  void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
-                       bool) override;
+  void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
   void EmitSymbol(const MCInst &Inst);
   void HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                       unsigned ByteAlignment,
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
index f0689252b396..1e708ba1bcd3 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
@@ -1,10 +1,9 @@
 //===-- HexagonMCExpr.cpp - Hexagon specific MC expression classes
 //----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
index acfd996ccf82..59b1326adf0c 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
@@ -1,9 +1,8 @@
 //==- HexagonMCExpr.h - Hexagon specific MC expression classes --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index a11aa92ccbe1..0750bfe74f76 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -1,9 +1,8 @@
 //===- HexagonMCInstrInfo.cpp - Hexagon sub-class of MCInst ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
-#include "Hexagon.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonMCChecker.h"
 #include "MCTargetDesc/HexagonMCExpr.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
index d040bea23b6d..829f872c453e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -1,9 +1,8 @@
 //===- HexagonMCInstrInfo.cpp - Utility functions on Hexagon MCInsts ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
index 4281144acaee..7d45b4fcfdde 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
@@ -1,9 +1,8 @@
 //===----- HexagonMCShuffler.cpp - MC bundle shuffling --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,7 +14,6 @@
 #define DEBUG_TYPE "hexagon-shuffle"
 
 #include "MCTargetDesc/HexagonMCShuffler.h"
-#include "Hexagon.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "MCTargetDesc/HexagonShuffler.h"
 #include "llvm/MC/MCInst.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
index 59658999d24d..3410c0ddbd84 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
@@ -1,9 +1,8 @@
 //===- HexagonMCShuffler.h --------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 92ce7345f358..9c50b25156c3 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -1,9 +1,8 @@
 //===-- HexagonMCTargetDesc.cpp - Hexagon Target Descriptions -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,13 +11,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/HexagonMCTargetDesc.h"
-#include "Hexagon.h"
 #include "HexagonDepArch.h"
 #include "HexagonTargetStreamer.h"
 #include "MCTargetDesc/HexagonInstPrinter.h"
 #include "MCTargetDesc/HexagonMCAsmInfo.h"
 #include "MCTargetDesc/HexagonMCELFStreamer.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "TargetInfo/HexagonTargetInfo.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/ELF.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index d6ea664222d3..7b42460a2a1c 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -1,9 +1,8 @@
 //===-- HexagonMCTargetDesc.h - Hexagon Target Descriptions -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -64,7 +63,6 @@ class StringRef;
 class raw_ostream;
 class raw_pwrite_stream;
 
-Target &getTheHexagonTarget();
 extern cl::opt<bool> HexagonDisableCompound;
 extern cl::opt<bool> HexagonDisableDuplex;
 extern const InstrStage HexagonStages[];
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index f4ee2bbfaaaa..18c7790a17cc 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -1,9 +1,8 @@
 //===- HexagonShuffler.cpp - Instruction bundle shuffling -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,7 +14,6 @@
 #define DEBUG_TYPE "hexagon-shuffle"
 
 #include "MCTargetDesc/HexagonShuffler.h"
-#include "Hexagon.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "MCTargetDesc/HexagonMCTargetDesc.h"
@@ -23,6 +21,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index ef50c5bebbfb..bf3bad36dfe5 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -1,9 +1,8 @@
 //===- HexagonShuffler.h - Instruction bundle shuffling ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,8 +14,8 @@
 #ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONSHUFFLER_H
 #define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONSHUFFLER_H
 
-#include "Hexagon.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
diff --git a/lib/Target/Hexagon/RDFCopy.cpp b/lib/Target/Hexagon/RDFCopy.cpp
index 4339fa2089d9..7702024f87bd 100644
--- a/lib/Target/Hexagon/RDFCopy.cpp
+++ b/lib/Target/Hexagon/RDFCopy.cpp
@@ -1,9 +1,8 @@
 //===- RDFCopy.cpp --------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/RDFCopy.h b/lib/Target/Hexagon/RDFCopy.h
index 7b2e78bdf633..1450ab884849 100644
--- a/lib/Target/Hexagon/RDFCopy.h
+++ b/lib/Target/Hexagon/RDFCopy.h
@@ -1,9 +1,8 @@
 //===- RDFCopy.h ------------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/RDFDeadCode.cpp b/lib/Target/Hexagon/RDFDeadCode.cpp
index 8dcd485d65e9..52178931aa6d 100644
--- a/lib/Target/Hexagon/RDFDeadCode.cpp
+++ b/lib/Target/Hexagon/RDFDeadCode.cpp
@@ -1,9 +1,8 @@
 //===--- RDFDeadCode.cpp --------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/RDFDeadCode.h b/lib/Target/Hexagon/RDFDeadCode.h
index 8977e730b855..7f91977e1d6c 100644
--- a/lib/Target/Hexagon/RDFDeadCode.h
+++ b/lib/Target/Hexagon/RDFDeadCode.h
@@ -1,9 +1,8 @@
 //===--- RDFDeadCode.h ----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp
index d8ca08e70505..9d8f706b8a0f 100644
--- a/lib/Target/Hexagon/RDFGraph.cpp
+++ b/lib/Target/Hexagon/RDFGraph.cpp
@@ -1,9 +1,8 @@
 //===- RDFGraph.cpp -------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -55,7 +54,6 @@ raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P) {
   return OS;
 }
 
-template<>
 raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterRef> &P) {
   auto &TRI = P.G.getTRI();
   if (P.Obj.Reg > 0 && P.Obj.Reg < TRI.getNumRegs())
@@ -66,7 +64,6 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterRef> &P) {
   return OS;
 }
 
-template<>
 raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) {
   auto NA = P.G.addr<NodeBase*>(P.Obj);
   uint16_t Attrs = NA.Addr->getAttrs();
@@ -116,7 +113,6 @@ static void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
     OS << '!';
 }
 
-template<>
 raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<DefNode*>> &P) {
   printRefHeader(OS, P.Obj, P.G);
   OS << '(';
@@ -134,7 +130,6 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<DefNode*>> &P) {
   return OS;
 }
 
-template<>
 raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<UseNode*>> &P) {
   printRefHeader(OS, P.Obj, P.G);
   OS << '(';
@@ -146,7 +141,6 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<UseNode*>> &P) {
   return OS;
 }
 
-template<>
 raw_ostream &operator<< (raw_ostream &OS,
       const Print<NodeAddr<PhiUseNode*>> &P) {
   printRefHeader(OS, P.Obj, P.G);
@@ -162,7 +156,6 @@ raw_ostream &operator<< (raw_ostream &OS,
   return OS;
 }
 
-template<>
 raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<RefNode*>> &P) {
   switch (P.Obj.Addr->getKind()) {
     case NodeAttrs::Def:
@@ -178,7 +171,6 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<RefNode*>> &P) {
   return OS;
 }
 
-template<>
 raw_ostream &operator<< (raw_ostream &OS, const Print<NodeList> &P) {
   unsigned N = P.Obj.size();
   for (auto I : P.Obj) {
@@ -189,7 +181,6 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeList> &P) {
   return OS;
 }
 
-template<>
 raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) {
   unsigned N = P.Obj.size();
   for (auto I : P.Obj) {
@@ -224,16 +215,13 @@ namespace {
 
 } // end anonymous namespace
 
-template<>
 raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) {
   OS << Print<NodeId>(P.Obj.Id, P.G) << ": phi ["
      << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']';
   return OS;
 }
 
-template<>
-raw_ostream &operator<< (raw_ostream &OS,
-      const Print<NodeAddr<StmtNode*>> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<StmtNode *>> &P) {
   const MachineInstr &MI = *P.Obj.Addr->getCode();
   unsigned Opc = MI.getOpcode();
   OS << Print<NodeId>(P.Obj.Id, P.G) << ": " << P.G.getTII().getName(Opc);
@@ -258,7 +246,6 @@ raw_ostream &operator<< (raw_ostream &OS,
   return OS;
 }
 
-template<>
 raw_ostream &operator<< (raw_ostream &OS,
       const Print<NodeAddr<InstrNode*>> &P) {
   switch (P.Obj.Addr->getKind()) {
@@ -275,7 +262,6 @@ raw_ostream &operator<< (raw_ostream &OS,
   return OS;
 }
 
-template<>
 raw_ostream &operator<< (raw_ostream &OS,
       const Print<NodeAddr<BlockNode*>> &P) {
   MachineBasicBlock *BB = P.Obj.Addr->getCode();
@@ -309,9 +295,7 @@ raw_ostream &operator<< (raw_ostream &OS,
   return OS;
 }
 
-template<>
-raw_ostream &operator<< (raw_ostream &OS,
-      const Print<NodeAddr<FuncNode*>> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<FuncNode *>> &P) {
   OS << "DFG dump:[\n" << Print<NodeId>(P.Obj.Id, P.G) << ": Function: "
      << P.Obj.Addr->getCode()->getName() << '\n';
   for (auto I : P.Obj.Addr->members(P.G))
@@ -320,7 +304,6 @@ raw_ostream &operator<< (raw_ostream &OS,
   return OS;
 }
 
-template<>
 raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterSet> &P) {
   OS << '{';
   for (auto I : P.Obj)
@@ -329,13 +312,11 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterSet> &P) {
   return OS;
 }
 
-template<>
 raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterAggr> &P) {
   P.Obj.print(OS);
   return OS;
 }
 
-template<>
 raw_ostream &operator<< (raw_ostream &OS,
       const Print<DataFlowGraph::DefStack> &P) {
   for (auto I = P.Obj.top(), E = P.Obj.bottom(); I != E; ) {
diff --git a/lib/Target/Hexagon/RDFGraph.h b/lib/Target/Hexagon/RDFGraph.h
index e3abb0e22f76..585f43e116f9 100644
--- a/lib/Target/Hexagon/RDFGraph.h
+++ b/lib/Target/Hexagon/RDFGraph.h
@@ -1,9 +1,8 @@
 //===- RDFGraph.h -----------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -925,10 +924,6 @@ namespace rdf {
     return MM;
   }
 
-  template <typename T> struct Print;
-  template <typename T>
-  raw_ostream &operator<< (raw_ostream &OS, const Print<T> &P);
-
   template <typename T>
   struct Print {
     Print(const T &x, const DataFlowGraph &g) : Obj(x), G(g) {}
@@ -943,6 +938,29 @@ namespace rdf {
       : Print<NodeAddr<T>>(x, g) {}
   };
 
+  raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterRef> &P);
+  raw_ostream &operator<<(raw_ostream &OS, const Print<NodeId> &P);
+  raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<DefNode *>> &P);
+  raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<UseNode *>> &P);
+  raw_ostream &operator<<(raw_ostream &OS,
+                          const Print<NodeAddr<PhiUseNode *>> &P);
+  raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<RefNode *>> &P);
+  raw_ostream &operator<<(raw_ostream &OS, const Print<NodeList> &P);
+  raw_ostream &operator<<(raw_ostream &OS, const Print<NodeSet> &P);
+  raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<PhiNode *>> &P);
+  raw_ostream &operator<<(raw_ostream &OS,
+                          const Print<NodeAddr<StmtNode *>> &P);
+  raw_ostream &operator<<(raw_ostream &OS,
+                          const Print<NodeAddr<InstrNode *>> &P);
+  raw_ostream &operator<<(raw_ostream &OS,
+                          const Print<NodeAddr<BlockNode *>> &P);
+  raw_ostream &operator<<(raw_ostream &OS,
+                          const Print<NodeAddr<FuncNode *>> &P);
+  raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterSet> &P);
+  raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterAggr> &P);
+  raw_ostream &operator<<(raw_ostream &OS,
+                          const Print<DataFlowGraph::DefStack> &P);
+
 } // end namespace rdf
 
 } // end namespace llvm
diff --git a/lib/Target/Hexagon/RDFLiveness.cpp b/lib/Target/Hexagon/RDFLiveness.cpp
index 9ff48d25a026..9cd304aa10bc 100644
--- a/lib/Target/Hexagon/RDFLiveness.cpp
+++ b/lib/Target/Hexagon/RDFLiveness.cpp
@@ -1,9 +1,8 @@
 //===- RDFLiveness.cpp ----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -58,7 +57,6 @@ static cl::opt<unsigned> MaxRecNest("rdf-liveness-max-rec", cl::init(25),
 namespace llvm {
 namespace rdf {
 
-  template<>
   raw_ostream &operator<< (raw_ostream &OS, const Print<Liveness::RefMap> &P) {
     OS << '{';
     for (auto &I : P.Obj) {
diff --git a/lib/Target/Hexagon/RDFLiveness.h b/lib/Target/Hexagon/RDFLiveness.h
index eaeb4ea115b3..ea4890271726 100644
--- a/lib/Target/Hexagon/RDFLiveness.h
+++ b/lib/Target/Hexagon/RDFLiveness.h
@@ -1,9 +1,8 @@
 //===- RDFLiveness.h --------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -143,6 +142,8 @@ namespace rdf {
         unsigned Nest, unsigned MaxNest);
   };
 
+  raw_ostream &operator<<(raw_ostream &OS, const Print<Liveness::RefMap> &P);
+
 } // end namespace rdf
 
 } // end namespace llvm
diff --git a/lib/Target/Hexagon/RDFRegisters.cpp b/lib/Target/Hexagon/RDFRegisters.cpp
index 9408c5dc3952..6e0f33695f0e 100644
--- a/lib/Target/Hexagon/RDFRegisters.cpp
+++ b/lib/Target/Hexagon/RDFRegisters.cpp
@@ -1,9 +1,8 @@
 //===- RDFRegisters.cpp ---------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/RDFRegisters.h b/lib/Target/Hexagon/RDFRegisters.h
index 459850d87df1..646233bacda5 100644
--- a/lib/Target/Hexagon/RDFRegisters.h
+++ b/lib/Target/Hexagon/RDFRegisters.h
@@ -1,9 +1,8 @@
 //===- RDFRegisters.h -------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
index 78e2f2b2ddb3..d77b235d0077 100644
--- a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
+++ b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
@@ -1,14 +1,12 @@
 //===-- HexagonTargetInfo.cpp - Hexagon Target Implementation ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "Hexagon.h"
-#include "llvm/IR/Module.h"
+#include "TargetInfo/HexagonTargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.h b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.h
new file mode 100644
index 000000000000..902b61cb5b6c
--- /dev/null
+++ b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.h
@@ -0,0 +1,20 @@
+//===-- HexagonTargetInfo.h - Hexagon Target Implementation -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_TARGETINFO_HEXAGONTARGETINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_TARGETINFO_HEXAGONTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheHexagonTarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_TARGETINFO_HEXAGONTARGETINFO_H
diff --git a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index a77b2b8f15ca..9af8a0b35b2f 100644
--- a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -1,16 +1,16 @@
 //===-- LanaiAsmParser.cpp - Parse Lanai assembly to MCInst instructions --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "Lanai.h"
 #include "LanaiAluCode.h"
 #include "LanaiCondCode.h"
+#include "LanaiInstrInfo.h"
 #include "MCTargetDesc/LanaiMCExpr.h"
+#include "TargetInfo/LanaiTargetInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
diff --git a/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp b/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
index 609b650e5d32..25ae7c521706 100644
--- a/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
+++ b/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
@@ -1,9 +1,8 @@
 //===- LanaiDisassembler.cpp - Disassembler for Lanai -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,8 +12,10 @@
 
 #include "LanaiDisassembler.h"
 
-#include "Lanai.h"
-#include "LanaiSubtarget.h"
+#include "LanaiAluCode.h"
+#include "LanaiCondCode.h"
+#include "LanaiInstrInfo.h"
+#include "TargetInfo/LanaiTargetInfo.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSubtargetInfo.h"
diff --git a/lib/Target/Lanai/Disassembler/LanaiDisassembler.h b/lib/Target/Lanai/Disassembler/LanaiDisassembler.h
index e0c19e8ea644..ae821df303d8 100644
--- a/lib/Target/Lanai/Disassembler/LanaiDisassembler.h
+++ b/lib/Target/Lanai/Disassembler/LanaiDisassembler.h
@@ -1,9 +1,8 @@
 //===- LanaiDisassembler.cpp - Disassembler for Lanai -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.cpp b/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.cpp
deleted file mode 100644
index 2fa411fcfd87..000000000000
--- a/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.cpp
+++ /dev/null
@@ -1,305 +0,0 @@
-//===-- LanaiInstPrinter.cpp - Convert Lanai MCInst to asm syntax ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an Lanai MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "LanaiInstPrinter.h"
-#include "Lanai.h"
-#include "MCTargetDesc/LanaiMCExpr.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-// Include the auto-generated portion of the assembly writer.
-#define PRINT_ALIAS_INSTR
-#include "LanaiGenAsmWriter.inc"
-
-void LanaiInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  OS << StringRef(getRegisterName(RegNo)).lower();
-}
-
-bool LanaiInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
-                                 StringRef Alias, unsigned OpNo0,
-                                 unsigned OpNo1) {
-  OS << "\t" << Alias << " ";
-  printOperand(MI, OpNo0, OS);
-  OS << ", ";
-  printOperand(MI, OpNo1, OS);
-  return true;
-}
-
-static bool usesGivenOffset(const MCInst *MI, int AddOffset) {
-  unsigned AluCode = MI->getOperand(3).getImm();
-  return LPAC::encodeLanaiAluCode(AluCode) == LPAC::ADD &&
-         (MI->getOperand(2).getImm() == AddOffset ||
-          MI->getOperand(2).getImm() == -AddOffset);
-}
-
-static bool isPreIncrementForm(const MCInst *MI, int AddOffset) {
-  unsigned AluCode = MI->getOperand(3).getImm();
-  return LPAC::isPreOp(AluCode) && usesGivenOffset(MI, AddOffset);
-}
-
-static bool isPostIncrementForm(const MCInst *MI, int AddOffset) {
-  unsigned AluCode = MI->getOperand(3).getImm();
-  return LPAC::isPostOp(AluCode) && usesGivenOffset(MI, AddOffset);
-}
-
-static StringRef decIncOperator(const MCInst *MI) {
-  if (MI->getOperand(2).getImm() < 0)
-    return "--";
-  return "++";
-}
-
-bool LanaiInstPrinter::printMemoryLoadIncrement(const MCInst *MI,
-                                                raw_ostream &OS,
-                                                StringRef Opcode,
-                                                int AddOffset) {
-  if (isPreIncrementForm(MI, AddOffset)) {
-    OS << "\t" << Opcode << "\t[" << decIncOperator(MI) << "%"
-       << getRegisterName(MI->getOperand(1).getReg()) << "], %"
-       << getRegisterName(MI->getOperand(0).getReg());
-    return true;
-  }
-  if (isPostIncrementForm(MI, AddOffset)) {
-    OS << "\t" << Opcode << "\t[%"
-       << getRegisterName(MI->getOperand(1).getReg()) << decIncOperator(MI)
-       << "], %" << getRegisterName(MI->getOperand(0).getReg());
-    return true;
-  }
-  return false;
-}
-
-bool LanaiInstPrinter::printMemoryStoreIncrement(const MCInst *MI,
-                                                 raw_ostream &OS,
-                                                 StringRef Opcode,
-                                                 int AddOffset) {
-  if (isPreIncrementForm(MI, AddOffset)) {
-    OS << "\t" << Opcode << "\t%" << getRegisterName(MI->getOperand(0).getReg())
-       << ", [" << decIncOperator(MI) << "%"
-       << getRegisterName(MI->getOperand(1).getReg()) << "]";
-    return true;
-  }
-  if (isPostIncrementForm(MI, AddOffset)) {
-    OS << "\t" << Opcode << "\t%" << getRegisterName(MI->getOperand(0).getReg())
-       << ", [%" << getRegisterName(MI->getOperand(1).getReg())
-       << decIncOperator(MI) << "]";
-    return true;
-  }
-  return false;
-}
-
-bool LanaiInstPrinter::printAlias(const MCInst *MI, raw_ostream &OS) {
-  switch (MI->getOpcode()) {
-  case Lanai::LDW_RI:
-    // ld 4[*%rN], %rX => ld [++imm], %rX
-    // ld -4[*%rN], %rX => ld [--imm], %rX
-    // ld 4[%rN*], %rX => ld [imm++], %rX
-    // ld -4[%rN*], %rX => ld [imm--], %rX
-    return printMemoryLoadIncrement(MI, OS, "ld", 4);
-  case Lanai::LDHs_RI:
-    return printMemoryLoadIncrement(MI, OS, "ld.h", 2);
-  case Lanai::LDHz_RI:
-    return printMemoryLoadIncrement(MI, OS, "uld.h", 2);
-  case Lanai::LDBs_RI:
-    return printMemoryLoadIncrement(MI, OS, "ld.b", 1);
-  case Lanai::LDBz_RI:
-    return printMemoryLoadIncrement(MI, OS, "uld.b", 1);
-  case Lanai::SW_RI:
-    // st %rX, 4[*%rN] => st %rX, [++imm]
-    // st %rX, -4[*%rN] => st %rX, [--imm]
-    // st %rX, 4[%rN*] => st %rX, [imm++]
-    // st %rX, -4[%rN*] => st %rX, [imm--]
-    return printMemoryStoreIncrement(MI, OS, "st", 4);
-  case Lanai::STH_RI:
-    return printMemoryStoreIncrement(MI, OS, "st.h", 2);
-  case Lanai::STB_RI:
-    return printMemoryStoreIncrement(MI, OS, "st.b", 1);
-  default:
-    return false;
-  }
-}
-
-void LanaiInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
-                                 StringRef Annotation,
-                                 const MCSubtargetInfo & /*STI*/) {
-  if (!printAlias(MI, OS) && !printAliasInstr(MI, OS))
-    printInstruction(MI, OS);
-  printAnnotation(OS, Annotation);
-}
-
-void LanaiInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                    raw_ostream &OS, const char *Modifier) {
-  assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg())
-    OS << "%" << getRegisterName(Op.getReg());
-  else if (Op.isImm())
-    OS << formatHex(Op.getImm());
-  else {
-    assert(Op.isExpr() && "Expected an expression");
-    Op.getExpr()->print(OS, &MAI);
-  }
-}
-
-void LanaiInstPrinter::printMemImmOperand(const MCInst *MI, unsigned OpNo,
-                                          raw_ostream &OS) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isImm()) {
-    OS << '[' << formatHex(Op.getImm()) << ']';
-  } else {
-    // Symbolic operand will be lowered to immediate value by linker
-    assert(Op.isExpr() && "Expected an expression");
-    OS << '[';
-    Op.getExpr()->print(OS, &MAI);
-    OS << ']';
-  }
-}
-
-void LanaiInstPrinter::printHi16ImmOperand(const MCInst *MI, unsigned OpNo,
-                                           raw_ostream &OS) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isImm()) {
-    OS << formatHex(Op.getImm() << 16);
-  } else {
-    // Symbolic operand will be lowered to immediate value by linker
-    assert(Op.isExpr() && "Expected an expression");
-    Op.getExpr()->print(OS, &MAI);
-  }
-}
-
-void LanaiInstPrinter::printHi16AndImmOperand(const MCInst *MI, unsigned OpNo,
-                                              raw_ostream &OS) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isImm()) {
-    OS << formatHex((Op.getImm() << 16) | 0xffff);
-  } else {
-    // Symbolic operand will be lowered to immediate value by linker
-    assert(Op.isExpr() && "Expected an expression");
-    Op.getExpr()->print(OS, &MAI);
-  }
-}
-
-void LanaiInstPrinter::printLo16AndImmOperand(const MCInst *MI, unsigned OpNo,
-                                              raw_ostream &OS) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isImm()) {
-    OS << formatHex(0xffff0000 | Op.getImm());
-  } else {
-    // Symbolic operand will be lowered to immediate value by linker
-    assert(Op.isExpr() && "Expected an expression");
-    Op.getExpr()->print(OS, &MAI);
-  }
-}
-
-static void printMemoryBaseRegister(raw_ostream &OS, const unsigned AluCode,
-                                    const MCOperand &RegOp) {
-  assert(RegOp.isReg() && "Register operand expected");
-  OS << "[";
-  if (LPAC::isPreOp(AluCode))
-    OS << "*";
-  OS << "%" << LanaiInstPrinter::getRegisterName(RegOp.getReg());
-  if (LPAC::isPostOp(AluCode))
-    OS << "*";
-  OS << "]";
-}
-
-template <unsigned SizeInBits>
-static void printMemoryImmediateOffset(const MCAsmInfo &MAI,
-                                       const MCOperand &OffsetOp,
-                                       raw_ostream &OS) {
-  assert((OffsetOp.isImm() || OffsetOp.isExpr()) && "Immediate expected");
-  if (OffsetOp.isImm()) {
-    assert(isInt<SizeInBits>(OffsetOp.getImm()) && "Constant value truncated");
-    OS << OffsetOp.getImm();
-  } else
-    OffsetOp.getExpr()->print(OS, &MAI);
-}
-
-void LanaiInstPrinter::printMemRiOperand(const MCInst *MI, int OpNo,
-                                         raw_ostream &OS,
-                                         const char * /*Modifier*/) {
-  const MCOperand &RegOp = MI->getOperand(OpNo);
-  const MCOperand &OffsetOp = MI->getOperand(OpNo + 1);
-  const MCOperand &AluOp = MI->getOperand(OpNo + 2);
-  const unsigned AluCode = AluOp.getImm();
-
-  // Offset
-  printMemoryImmediateOffset<16>(MAI, OffsetOp, OS);
-
-  // Register
-  printMemoryBaseRegister(OS, AluCode, RegOp);
-}
-
-void LanaiInstPrinter::printMemRrOperand(const MCInst *MI, int OpNo,
-                                         raw_ostream &OS,
-                                         const char * /*Modifier*/) {
-  const MCOperand &RegOp = MI->getOperand(OpNo);
-  const MCOperand &OffsetOp = MI->getOperand(OpNo + 1);
-  const MCOperand &AluOp = MI->getOperand(OpNo + 2);
-  const unsigned AluCode = AluOp.getImm();
-  assert(OffsetOp.isReg() && RegOp.isReg() && "Registers expected.");
-
-  // [ Base OP Offset ]
-  OS << "[";
-  if (LPAC::isPreOp(AluCode))
-    OS << "*";
-  OS << "%" << getRegisterName(RegOp.getReg());
-  if (LPAC::isPostOp(AluCode))
-    OS << "*";
-  OS << " " << LPAC::lanaiAluCodeToString(AluCode) << " ";
-  OS << "%" << getRegisterName(OffsetOp.getReg());
-  OS << "]";
-}
-
-void LanaiInstPrinter::printMemSplsOperand(const MCInst *MI, int OpNo,
-                                           raw_ostream &OS,
-                                           const char * /*Modifier*/) {
-  const MCOperand &RegOp = MI->getOperand(OpNo);
-  const MCOperand &OffsetOp = MI->getOperand(OpNo + 1);
-  const MCOperand &AluOp = MI->getOperand(OpNo + 2);
-  const unsigned AluCode = AluOp.getImm();
-
-  // Offset
-  printMemoryImmediateOffset<10>(MAI, OffsetOp, OS);
-
-  // Register
-  printMemoryBaseRegister(OS, AluCode, RegOp);
-}
-
-void LanaiInstPrinter::printCCOperand(const MCInst *MI, int OpNo,
-                                      raw_ostream &OS) {
-  LPCC::CondCode CC =
-      static_cast<LPCC::CondCode>(MI->getOperand(OpNo).getImm());
-  // Handle the undefined value here for printing so we don't abort().
-  if (CC >= LPCC::UNKNOWN)
-    OS << "<und>";
-  else
-    OS << lanaiCondCodeToString(CC);
-}
-
-void LanaiInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
-                                             raw_ostream &OS) {
-  LPCC::CondCode CC =
-      static_cast<LPCC::CondCode>(MI->getOperand(OpNo).getImm());
-  // Handle the undefined value here for printing so we don't abort().
-  if (CC >= LPCC::UNKNOWN)
-    OS << "<und>";
-  else if (CC != LPCC::ICC_T)
-    OS << "." << lanaiCondCodeToString(CC);
-}
diff --git a/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h b/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h
deleted file mode 100644
index 59904fbaa318..000000000000
--- a/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h
+++ /dev/null
@@ -1,66 +0,0 @@
-//= LanaiInstPrinter.h - Convert Lanai MCInst to asm syntax -------*- C++ -*--//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints a Lanai MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H
-#define LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCInstPrinter.h"
-
-namespace llvm {
-
-class LanaiInstPrinter : public MCInstPrinter {
-public:
-  LanaiInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                   const MCRegisterInfo &MRI)
-      : MCInstPrinter(MAI, MII, MRI) {}
-
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
-                    const char *Modifier = nullptr);
-  void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printMemRiOperand(const MCInst *MI, int OpNo, raw_ostream &O,
-                         const char *Modifier = nullptr);
-  void printMemRrOperand(const MCInst *MI, int OpNo, raw_ostream &O,
-                         const char *Modifier = nullptr);
-  void printMemSplsOperand(const MCInst *MI, int OpNo, raw_ostream &O,
-                           const char *Modifier = nullptr);
-  void printCCOperand(const MCInst *MI, int OpNo, raw_ostream &O);
-  void printAluOperand(const MCInst *MI, int OpNo, raw_ostream &O);
-  void printHi16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printHi16AndImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printLo16AndImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printMemImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-
-  // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
-  bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
-  void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
-                               unsigned PrintMethodIdx, raw_ostream &O);
-  static const char *getRegisterName(unsigned RegNo);
-  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-
-private:
-  bool printAlias(const MCInst *MI, raw_ostream &Ostream);
-  bool printInst(const MCInst *MI, raw_ostream &Ostream, StringRef Alias,
-                 unsigned OpNo0, unsigned OpnNo1);
-  bool printMemoryLoadIncrement(const MCInst *MI, raw_ostream &Ostream,
-                                StringRef Opcode, int AddOffset);
-  bool printMemoryStoreIncrement(const MCInst *MI, raw_ostream &Ostream,
-                                 StringRef Opcode, int AddOffset);
-};
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H
diff --git a/lib/Target/Lanai/Lanai.h b/lib/Target/Lanai/Lanai.h
index c1fdf793305b..2f06ea91ab03 100644
--- a/lib/Target/Lanai/Lanai.h
+++ b/lib/Target/Lanai/Lanai.h
@@ -1,9 +1,8 @@
 //===-- Lanai.h - Top-level interface for Lanai representation --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,12 +14,7 @@
 #ifndef LLVM_LIB_TARGET_LANAI_LANAI_H
 #define LLVM_LIB_TARGET_LANAI_LANAI_H
 
-#include "LanaiAluCode.h"
-#include "LanaiCondCode.h"
-#include "MCTargetDesc/LanaiBaseInfo.h"
-#include "MCTargetDesc/LanaiMCTargetDesc.h"
-#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 class FunctionPass;
@@ -45,7 +39,6 @@ FunctionPass *createLanaiMemAluCombinerPass();
 // operations.
 FunctionPass *createLanaiSetflagAluCombinerPass();
 
-Target &getTheLanaiTarget();
 } // namespace llvm
 
 #endif // LLVM_LIB_TARGET_LANAI_LANAI_H
diff --git a/lib/Target/Lanai/Lanai.td b/lib/Target/Lanai/Lanai.td
index 73d080457034..c6d949f42047 100644
--- a/lib/Target/Lanai/Lanai.td
+++ b/lib/Target/Lanai/Lanai.td
@@ -1,9 +1,8 @@
 //===- Lanai.td - Describe the Lanai Target Machine --------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Lanai/LanaiAluCode.h b/lib/Target/Lanai/LanaiAluCode.h
index d5145694fe46..728332bff00b 100644
--- a/lib/Target/Lanai/LanaiAluCode.h
+++ b/lib/Target/Lanai/LanaiAluCode.h
@@ -1,9 +1,8 @@
 //===-- LanaiAluCode.h - ALU operator encoding ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Lanai/LanaiAsmPrinter.cpp b/lib/Target/Lanai/LanaiAsmPrinter.cpp
index 607b2a97b29f..64d963475e1a 100644
--- a/lib/Target/Lanai/LanaiAsmPrinter.cpp
+++ b/lib/Target/Lanai/LanaiAsmPrinter.cpp
@@ -1,9 +1,8 @@
 //===-- LanaiAsmPrinter.cpp - Lanai LLVM assembly writer ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,11 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstPrinter/LanaiInstPrinter.h"
-#include "Lanai.h"
+#include "MCTargetDesc/LanaiInstPrinter.h"
+#include "LanaiAluCode.h"
+#include "LanaiCondCode.h"
 #include "LanaiInstrInfo.h"
 #include "LanaiMCInstLower.h"
 #include "LanaiTargetMachine.h"
+#include "TargetInfo/LanaiTargetInfo.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -49,8 +50,7 @@ public:
 
   void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                       unsigned AsmVariant, const char *ExtraCode,
-                       raw_ostream &O) override;
+                       const char *ExtraCode, raw_ostream &O) override;
   void EmitInstruction(const MachineInstr *MI) override;
   bool isBlockOnlyReachableByFallthrough(
       const MachineBasicBlock *MBB) const override;
@@ -109,7 +109,6 @@ void LanaiAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
 
 // PrintAsmOperand - Print out an operand for an inline asm expression.
 bool LanaiAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                      unsigned /*AsmVariant*/,
                                       const char *ExtraCode, raw_ostream &O) {
   // Does this asm operand have a single letter operand modifier?
   if (ExtraCode && ExtraCode[0]) {
@@ -139,7 +138,7 @@ bool LanaiAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
       return false;
     }
     default:
-      return true; // Unknown modifier.
+      return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
     }
   }
   printOperand(MI, OpNo, O);
diff --git a/lib/Target/Lanai/LanaiCallingConv.td b/lib/Target/Lanai/LanaiCallingConv.td
index 056b329c33c5..e2306725290a 100644
--- a/lib/Target/Lanai/LanaiCallingConv.td
+++ b/lib/Target/Lanai/LanaiCallingConv.td
@@ -1,9 +1,8 @@
 //===- LanaiCallingConv.td - Calling Conventions Lanai -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Lanai/LanaiDelaySlotFiller.cpp b/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
index ea76a1128373..09c63dca23e2 100644
--- a/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
+++ b/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
@@ -1,9 +1,8 @@
 //===-- LanaiDelaySlotFiller.cpp - Lanai delay slot filler ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Lanai/LanaiFrameLowering.cpp b/lib/Target/Lanai/LanaiFrameLowering.cpp
index 0723668c743e..142c09c504cc 100644
--- a/lib/Target/Lanai/LanaiFrameLowering.cpp
+++ b/lib/Target/Lanai/LanaiFrameLowering.cpp
@@ -1,9 +1,8 @@
 //===-- LanaiFrameLowering.cpp - Lanai Frame Information ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,8 +12,8 @@
 
 #include "LanaiFrameLowering.h"
 
+#include "LanaiAluCode.h"
 #include "LanaiInstrInfo.h"
-#include "LanaiMachineFunctionInfo.h"
 #include "LanaiSubtarget.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/Lanai/LanaiFrameLowering.h b/lib/Target/Lanai/LanaiFrameLowering.h
index ca690d513fc2..5fe4535543ec 100644
--- a/lib/Target/Lanai/LanaiFrameLowering.h
+++ b/lib/Target/Lanai/LanaiFrameLowering.h
@@ -1,9 +1,8 @@
 //===-- LanaiFrameLowering.h - Define frame lowering for Lanai --*- C++-*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,7 +13,6 @@
 #ifndef LLVM_LIB_TARGET_LANAI_LANAIFRAMELOWERING_H
 #define LLVM_LIB_TARGET_LANAI_LANAIFRAMELOWERING_H
 
-#include "Lanai.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
 
 namespace llvm {
diff --git a/lib/Target/Lanai/LanaiISelDAGToDAG.cpp b/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
index 5081cfbe4922..aadcdc43f560 100644
--- a/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
+++ b/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
 //===-- LanaiISelDAGToDAG.cpp - A dag to dag inst selector for Lanai ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -11,7 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Lanai.h"
+#include "LanaiAluCode.h"
 #include "LanaiMachineFunctionInfo.h"
 #include "LanaiRegisterInfo.h"
 #include "LanaiSubtarget.h"
diff --git a/lib/Target/Lanai/LanaiISelLowering.cpp b/lib/Target/Lanai/LanaiISelLowering.cpp
index 0411704be6fb..1ed078bb433f 100644
--- a/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/lib/Target/Lanai/LanaiISelLowering.cpp
@@ -1,9 +1,8 @@
 //===-- LanaiISelLowering.cpp - Lanai DAG Lowering Implementation ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Lanai/LanaiISelLowering.h b/lib/Target/Lanai/LanaiISelLowering.h
index 0cde633cb41a..e7b5755e9041 100644
--- a/lib/Target/Lanai/LanaiISelLowering.h
+++ b/lib/Target/Lanai/LanaiISelLowering.h
@@ -1,9 +1,8 @@
 //===-- LanaiISelLowering.h - Lanai DAG Lowering Interface -....-*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Lanai/LanaiInstrFormats.td b/lib/Target/Lanai/LanaiInstrFormats.td
index 1bb6b3d26a49..4101aa912ade 100644
--- a/lib/Target/Lanai/LanaiInstrFormats.td
+++ b/lib/Target/Lanai/LanaiInstrFormats.td
@@ -1,9 +1,8 @@
 //===- LanaiInstrFormats.td - Lanai Instruction Formats ----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Lanai/LanaiInstrInfo.cpp b/lib/Target/Lanai/LanaiInstrInfo.cpp
index 196768fdc56a..700a86069102 100644
--- a/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -1,9 +1,8 @@
 //===-- LanaiInstrInfo.cpp - Lanai Instruction Information ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -11,10 +10,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Lanai.h"
 #include "LanaiInstrInfo.h"
-#include "LanaiMachineFunctionInfo.h"
-#include "LanaiTargetMachine.h"
+#include "LanaiAluCode.h"
+#include "LanaiCondCode.h"
+#include "MCTargetDesc/LanaiBaseInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -87,7 +86,8 @@ void LanaiInstrInfo::loadRegFromStackSlot(
 }
 
 bool LanaiInstrInfo::areMemAccessesTriviallyDisjoint(
-    MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis * /*AA*/) const {
+    const MachineInstr &MIa, const MachineInstr &MIb,
+    AliasAnalysis * /*AA*/) const {
   assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
   assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
 
@@ -101,7 +101,7 @@ bool LanaiInstrInfo::areMemAccessesTriviallyDisjoint(
   // the width doesn't overlap the offset of a higher memory access,
   // then the memory accesses are different.
   const TargetRegisterInfo *TRI = &getRegisterInfo();
-  MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
+  const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
   int64_t OffsetA = 0, OffsetB = 0;
   unsigned int WidthA = 0, WidthB = 0;
   if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
@@ -756,7 +756,7 @@ unsigned LanaiInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
 }
 
 bool LanaiInstrInfo::getMemOperandWithOffsetWidth(
-    MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset,
+    const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
     unsigned &Width, const TargetRegisterInfo * /*TRI*/) const {
   // Handle only loads/stores with base register followed by immediate offset
   // and with add as ALU op.
@@ -794,8 +794,8 @@ bool LanaiInstrInfo::getMemOperandWithOffsetWidth(
   return true;
 }
 
-bool LanaiInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
-                                        MachineOperand *&BaseOp,
+bool LanaiInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
+                                        const MachineOperand *&BaseOp,
                                         int64_t &Offset,
                                         const TargetRegisterInfo *TRI) const {
   switch (LdSt.getOpcode()) {
diff --git a/lib/Target/Lanai/LanaiInstrInfo.h b/lib/Target/Lanai/LanaiInstrInfo.h
index bdcf9a361b5f..d71424aeb0b1 100644
--- a/lib/Target/Lanai/LanaiInstrInfo.h
+++ b/lib/Target/Lanai/LanaiInstrInfo.h
@@ -1,9 +1,8 @@
 //===- LanaiInstrInfo.h - Lanai Instruction Information ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -36,7 +35,8 @@ public:
     return RegisterInfo;
   }
 
-  bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
+  bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+                                       const MachineInstr &MIb,
                                        AliasAnalysis *AA) const override;
 
   unsigned isLoadFromStackSlot(const MachineInstr &MI,
@@ -68,11 +68,13 @@ public:
 
   bool expandPostRAPseudo(MachineInstr &MI) const override;
 
-  bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
+  bool getMemOperandWithOffset(const MachineInstr &LdSt,
+                               const MachineOperand *&BaseOp,
                                int64_t &Offset,
                                const TargetRegisterInfo *TRI) const override;
 
-  bool getMemOperandWithOffsetWidth(MachineInstr &LdSt, MachineOperand *&BaseOp,
+  bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
+                                    const MachineOperand *&BaseOp,
                                     int64_t &Offset, unsigned &Width,
                                     const TargetRegisterInfo *TRI) const;
 
diff --git a/lib/Target/Lanai/LanaiInstrInfo.td b/lib/Target/Lanai/LanaiInstrInfo.td
index 66192b4a4704..fcf89a0b52f6 100644
--- a/lib/Target/Lanai/LanaiInstrInfo.td
+++ b/lib/Target/Lanai/LanaiInstrInfo.td
@@ -1,9 +1,8 @@
 //===-- LanaiInstrInfo.td - Target Description for Lanai Target -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Lanai/LanaiMCInstLower.cpp b/lib/Target/Lanai/LanaiMCInstLower.cpp
index 90ede6566acf..743f4f7c6e2f 100644
--- a/lib/Target/Lanai/LanaiMCInstLower.cpp
+++ b/lib/Target/Lanai/LanaiMCInstLower.cpp
@@ -1,9 +1,8 @@
 //=-- LanaiMCInstLower.cpp - Convert Lanai MachineInstr to an MCInst --------=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Lanai/LanaiMCInstLower.h b/lib/Target/Lanai/LanaiMCInstLower.h
index 6d7818d63d87..00d3ebb05045 100644
--- a/lib/Target/Lanai/LanaiMCInstLower.h
+++ b/lib/Target/Lanai/LanaiMCInstLower.h
@@ -1,9 +1,8 @@
 //===-- LanaiMCInstLower.h - Lower MachineInstr to MCInst -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp b/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp
index c72271b67790..7b4e0750ba08 100644
--- a/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp
+++ b/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
 //===-- LanaiMachineFuctionInfo.cpp - Lanai machine function info ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Lanai/LanaiMachineFunctionInfo.h b/lib/Target/Lanai/LanaiMachineFunctionInfo.h
index 3bd9112a9e13..2c97c619c246 100644
--- a/lib/Target/Lanai/LanaiMachineFunctionInfo.h
+++ b/lib/Target/Lanai/LanaiMachineFunctionInfo.h
@@ -1,9 +1,8 @@
 //===- LanaiMachineFuctionInfo.h - Lanai machine func info -------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Lanai/LanaiMemAluCombiner.cpp b/lib/Target/Lanai/LanaiMemAluCombiner.cpp
index 54500b0e52e3..67443b771d3d 100644
--- a/lib/Target/Lanai/LanaiMemAluCombiner.cpp
+++ b/lib/Target/Lanai/LanaiMemAluCombiner.cpp
@@ -1,9 +1,8 @@
 //===-- LanaiMemAluCombiner.cpp - Pass to combine memory & ALU operations -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Simple pass to combine memory and ALU operations
@@ -23,7 +22,7 @@
 // in the same machine basic block into one machine instruction.
 //===----------------------------------------------------------------------===//
 
-#include "Lanai.h"
+#include "LanaiAluCode.h"
 #include "LanaiTargetMachine.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -159,7 +158,8 @@ bool isNonVolatileMemoryOp(const MachineInstr &MI) {
   const MachineMemOperand *MemOperand = *MI.memoperands_begin();
 
   // Don't move volatile memory accesses
-  if (MemOperand->isVolatile())
+  // TODO: unclear if we need to be as conservative about atomics
+  if (MemOperand->isVolatile() || MemOperand->isAtomic())
     return false;
 
   return true;
diff --git a/lib/Target/Lanai/LanaiRegisterInfo.cpp b/lib/Target/Lanai/LanaiRegisterInfo.cpp
index 56a5e0ea2def..d3056a1eba8e 100644
--- a/lib/Target/Lanai/LanaiRegisterInfo.cpp
+++ b/lib/Target/Lanai/LanaiRegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===-- LanaiRegisterInfo.cpp - Lanai Register Information ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,8 +11,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "LanaiRegisterInfo.h"
-#include "Lanai.h"
-#include "LanaiSubtarget.h"
+#include "LanaiAluCode.h"
+#include "LanaiCondCode.h"
+#include "LanaiFrameLowering.h"
+#include "LanaiInstrInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -257,12 +258,12 @@ bool LanaiRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
 
 unsigned LanaiRegisterInfo::getRARegister() const { return Lanai::RCA; }
 
-unsigned
+Register
 LanaiRegisterInfo::getFrameRegister(const MachineFunction & /*MF*/) const {
   return Lanai::FP;
 }
 
-unsigned LanaiRegisterInfo::getBaseRegister() const { return Lanai::R14; }
+Register LanaiRegisterInfo::getBaseRegister() const { return Lanai::R14; }
 
 const uint32_t *
 LanaiRegisterInfo::getCallPreservedMask(const MachineFunction & /*MF*/,
diff --git a/lib/Target/Lanai/LanaiRegisterInfo.h b/lib/Target/Lanai/LanaiRegisterInfo.h
index 35f4788b2886..4e4da619d366 100644
--- a/lib/Target/Lanai/LanaiRegisterInfo.h
+++ b/lib/Target/Lanai/LanaiRegisterInfo.h
@@ -1,9 +1,8 @@
 //===- LanaiRegisterInfo.h - Lanai Register Information Impl ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -43,8 +42,8 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
 
   // Debug information queries.
   unsigned getRARegister() const;
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
-  unsigned getBaseRegister() const;
+  Register getFrameRegister(const MachineFunction &MF) const override;
+  Register getBaseRegister() const;
   bool hasBasePointer(const MachineFunction &MF) const;
 
   int getDwarfRegNum(unsigned RegNum, bool IsEH) const;
diff --git a/lib/Target/Lanai/LanaiRegisterInfo.td b/lib/Target/Lanai/LanaiRegisterInfo.td
index cf8cfe30cce9..5879dfca8d65 100644
--- a/lib/Target/Lanai/LanaiRegisterInfo.td
+++ b/lib/Target/Lanai/LanaiRegisterInfo.td
@@ -1,9 +1,8 @@
 //===- LanaiRegisterInfo.td - Lanai Register defs ------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //  Declarations that describe the Lanai register file
diff --git a/lib/Target/Lanai/LanaiSchedule.td b/lib/Target/Lanai/LanaiSchedule.td
index 7f931c4be8bb..32763c7fdf49 100644
--- a/lib/Target/Lanai/LanaiSchedule.td
+++ b/lib/Target/Lanai/LanaiSchedule.td
@@ -1,9 +1,8 @@
 //=-LanaiSchedule.td - Lanai Scheduling Definitions --*- tablegen -*-=========//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Lanai/LanaiSelectionDAGInfo.cpp b/lib/Target/Lanai/LanaiSelectionDAGInfo.cpp
index b71c30fe3e05..dff87a3e264d 100644
--- a/lib/Target/Lanai/LanaiSelectionDAGInfo.cpp
+++ b/lib/Target/Lanai/LanaiSelectionDAGInfo.cpp
@@ -1,9 +1,8 @@
 //===-- LanaiSelectionDAGInfo.cpp - Lanai SelectionDAG Info -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Lanai/LanaiSelectionDAGInfo.h b/lib/Target/Lanai/LanaiSelectionDAGInfo.h
index bfd2be2ede09..c5650a7c1f53 100644
--- a/lib/Target/Lanai/LanaiSelectionDAGInfo.h
+++ b/lib/Target/Lanai/LanaiSelectionDAGInfo.h
@@ -1,9 +1,8 @@
 //===-- LanaiSelectionDAGInfo.h - Lanai SelectionDAG Info -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Lanai/LanaiSubtarget.cpp b/lib/Target/Lanai/LanaiSubtarget.cpp
index 0fa5e82a7a66..9a872c789bcc 100644
--- a/lib/Target/Lanai/LanaiSubtarget.cpp
+++ b/lib/Target/Lanai/LanaiSubtarget.cpp
@@ -1,9 +1,8 @@
 //===- LanaiSubtarget.cpp - Lanai Subtarget Information -----------*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Lanai/LanaiSubtarget.h b/lib/Target/Lanai/LanaiSubtarget.h
index 4bfa19920239..116c83a4df91 100644
--- a/lib/Target/Lanai/LanaiSubtarget.h
+++ b/lib/Target/Lanai/LanaiSubtarget.h
@@ -1,9 +1,8 @@
 //=====-- LanaiSubtarget.h - Define Subtarget for the Lanai -----*- C++ -*--==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Lanai/LanaiTargetMachine.cpp b/lib/Target/Lanai/LanaiTargetMachine.cpp
index 10bd9e2c65d2..8ae0225629ab 100644
--- a/lib/Target/Lanai/LanaiTargetMachine.cpp
+++ b/lib/Target/Lanai/LanaiTargetMachine.cpp
@@ -1,9 +1,8 @@
 //===-- LanaiTargetMachine.cpp - Define TargetMachine for Lanai ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,6 +15,7 @@
 #include "Lanai.h"
 #include "LanaiTargetObjectFile.h"
 #include "LanaiTargetTransformInfo.h"
+#include "TargetInfo/LanaiTargetInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
diff --git a/lib/Target/Lanai/LanaiTargetMachine.h b/lib/Target/Lanai/LanaiTargetMachine.h
index 0db286ec13e7..d2ac40007e24 100644
--- a/lib/Target/Lanai/LanaiTargetMachine.h
+++ b/lib/Target/Lanai/LanaiTargetMachine.h
@@ -1,9 +1,8 @@
 //===-- LanaiTargetMachine.h - Define TargetMachine for Lanai --- C++ ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Lanai/LanaiTargetObjectFile.cpp b/lib/Target/Lanai/LanaiTargetObjectFile.cpp
index 7d165e9c5f8c..b0f7c090bb8e 100644
--- a/lib/Target/Lanai/LanaiTargetObjectFile.cpp
+++ b/lib/Target/Lanai/LanaiTargetObjectFile.cpp
@@ -1,8 +1,7 @@
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Lanai/LanaiTargetObjectFile.h b/lib/Target/Lanai/LanaiTargetObjectFile.h
index 99ec1956da4b..938a1e675b6a 100644
--- a/lib/Target/Lanai/LanaiTargetObjectFile.h
+++ b/lib/Target/Lanai/LanaiTargetObjectFile.h
@@ -1,9 +1,8 @@
 //===-- LanaiTargetObjectFile.h - Lanai Object Info -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Lanai/LanaiTargetTransformInfo.h b/lib/Target/Lanai/LanaiTargetTransformInfo.h
index 3b5a1b88326b..63cc47dedce3 100644
--- a/lib/Target/Lanai/LanaiTargetTransformInfo.h
+++ b/lib/Target/Lanai/LanaiTargetTransformInfo.h
@@ -1,9 +1,8 @@
 //===-- LanaiTargetTransformInfo.h - Lanai specific TTI ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
index 82fa93ea5e5e..a6ce3d5eb4ff 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
@@ -1,9 +1,8 @@
 //===-- LanaiAsmBackend.cpp - Lanai Assembler Backend ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiBaseInfo.h b/lib/Target/Lanai/MCTargetDesc/LanaiBaseInfo.h
index ce7f83509c9b..1bc84014e736 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiBaseInfo.h
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiBaseInfo.h
@@ -1,9 +1,8 @@
 //===-- LanaiBaseInfo.h - Top level definitions for Lanai MC ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
index 7676891ef981..4313fa5a82b5 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===-- LanaiELFObjectWriter.cpp - Lanai ELF Writer -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -35,7 +34,7 @@ protected:
 
 LanaiELFObjectWriter::LanaiELFObjectWriter(uint8_t OSABI)
     : MCELFObjectTargetWriter(/*Is64Bit_=*/false, OSABI, ELF::EM_LANAI,
-                              /*HasRelocationAddend=*/true) {}
+                              /*HasRelocationAddend_=*/true) {}
 
 unsigned LanaiELFObjectWriter::getRelocType(MCContext & /*Ctx*/,
                                             const MCValue & /*Target*/,
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiFixupKinds.h b/lib/Target/Lanai/MCTargetDesc/LanaiFixupKinds.h
index 9ff8340d2922..1e692f8d31cb 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiFixupKinds.h
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiFixupKinds.h
@@ -1,9 +1,8 @@
 //===-- LanaiFixupKinds.h - Lanai Specific Fixup Entries --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp
new file mode 100644
index 000000000000..0d42612824b4
--- /dev/null
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp
@@ -0,0 +1,307 @@
+//===-- LanaiInstPrinter.cpp - Convert Lanai MCInst to asm syntax ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an Lanai MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LanaiInstPrinter.h"
+#include "LanaiMCExpr.h"
+#include "LanaiAluCode.h"
+#include "LanaiCondCode.h"
+#include "MCTargetDesc/LanaiMCTargetDesc.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+// Include the auto-generated portion of the assembly writer.
+#define PRINT_ALIAS_INSTR
+#include "LanaiGenAsmWriter.inc"
+
+void LanaiInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << StringRef(getRegisterName(RegNo)).lower();
+}
+
+bool LanaiInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+                                 StringRef Alias, unsigned OpNo0,
+                                 unsigned OpNo1) {
+  OS << "\t" << Alias << " ";
+  printOperand(MI, OpNo0, OS);
+  OS << ", ";
+  printOperand(MI, OpNo1, OS);
+  return true;
+}
+
+static bool usesGivenOffset(const MCInst *MI, int AddOffset) {
+  unsigned AluCode = MI->getOperand(3).getImm();
+  return LPAC::encodeLanaiAluCode(AluCode) == LPAC::ADD &&
+         (MI->getOperand(2).getImm() == AddOffset ||
+          MI->getOperand(2).getImm() == -AddOffset);
+}
+
+static bool isPreIncrementForm(const MCInst *MI, int AddOffset) {
+  unsigned AluCode = MI->getOperand(3).getImm();
+  return LPAC::isPreOp(AluCode) && usesGivenOffset(MI, AddOffset);
+}
+
+static bool isPostIncrementForm(const MCInst *MI, int AddOffset) {
+  unsigned AluCode = MI->getOperand(3).getImm();
+  return LPAC::isPostOp(AluCode) && usesGivenOffset(MI, AddOffset);
+}
+
+static StringRef decIncOperator(const MCInst *MI) {
+  if (MI->getOperand(2).getImm() < 0)
+    return "--";
+  return "++";
+}
+
+bool LanaiInstPrinter::printMemoryLoadIncrement(const MCInst *MI,
+                                                raw_ostream &OS,
+                                                StringRef Opcode,
+                                                int AddOffset) {
+  if (isPreIncrementForm(MI, AddOffset)) {
+    OS << "\t" << Opcode << "\t[" << decIncOperator(MI) << "%"
+       << getRegisterName(MI->getOperand(1).getReg()) << "], %"
+       << getRegisterName(MI->getOperand(0).getReg());
+    return true;
+  }
+  if (isPostIncrementForm(MI, AddOffset)) {
+    OS << "\t" << Opcode << "\t[%"
+       << getRegisterName(MI->getOperand(1).getReg()) << decIncOperator(MI)
+       << "], %" << getRegisterName(MI->getOperand(0).getReg());
+    return true;
+  }
+  return false;
+}
+
+bool LanaiInstPrinter::printMemoryStoreIncrement(const MCInst *MI,
+                                                 raw_ostream &OS,
+                                                 StringRef Opcode,
+                                                 int AddOffset) {
+  if (isPreIncrementForm(MI, AddOffset)) {
+    OS << "\t" << Opcode << "\t%" << getRegisterName(MI->getOperand(0).getReg())
+       << ", [" << decIncOperator(MI) << "%"
+       << getRegisterName(MI->getOperand(1).getReg()) << "]";
+    return true;
+  }
+  if (isPostIncrementForm(MI, AddOffset)) {
+    OS << "\t" << Opcode << "\t%" << getRegisterName(MI->getOperand(0).getReg())
+       << ", [%" << getRegisterName(MI->getOperand(1).getReg())
+       << decIncOperator(MI) << "]";
+    return true;
+  }
+  return false;
+}
+
+bool LanaiInstPrinter::printAlias(const MCInst *MI, raw_ostream &OS) {
+  switch (MI->getOpcode()) {
+  case Lanai::LDW_RI:
+    // ld 4[*%rN], %rX => ld [++imm], %rX
+    // ld -4[*%rN], %rX => ld [--imm], %rX
+    // ld 4[%rN*], %rX => ld [imm++], %rX
+    // ld -4[%rN*], %rX => ld [imm--], %rX
+    return printMemoryLoadIncrement(MI, OS, "ld", 4);
+  case Lanai::LDHs_RI:
+    return printMemoryLoadIncrement(MI, OS, "ld.h", 2);
+  case Lanai::LDHz_RI:
+    return printMemoryLoadIncrement(MI, OS, "uld.h", 2);
+  case Lanai::LDBs_RI:
+    return printMemoryLoadIncrement(MI, OS, "ld.b", 1);
+  case Lanai::LDBz_RI:
+    return printMemoryLoadIncrement(MI, OS, "uld.b", 1);
+  case Lanai::SW_RI:
+    // st %rX, 4[*%rN] => st %rX, [++imm]
+    // st %rX, -4[*%rN] => st %rX, [--imm]
+    // st %rX, 4[%rN*] => st %rX, [imm++]
+    // st %rX, -4[%rN*] => st %rX, [imm--]
+    return printMemoryStoreIncrement(MI, OS, "st", 4);
+  case Lanai::STH_RI:
+    return printMemoryStoreIncrement(MI, OS, "st.h", 2);
+  case Lanai::STB_RI:
+    return printMemoryStoreIncrement(MI, OS, "st.b", 1);
+  default:
+    return false;
+  }
+}
+
+void LanaiInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+                                 StringRef Annotation,
+                                 const MCSubtargetInfo & /*STI*/) {
+  if (!printAlias(MI, OS) && !printAliasInstr(MI, OS))
+    printInstruction(MI, OS);
+  printAnnotation(OS, Annotation);
+}
+
+void LanaiInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                    raw_ostream &OS, const char *Modifier) {
+  assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg())
+    OS << "%" << getRegisterName(Op.getReg());
+  else if (Op.isImm())
+    OS << formatHex(Op.getImm());
+  else {
+    assert(Op.isExpr() && "Expected an expression");
+    Op.getExpr()->print(OS, &MAI);
+  }
+}
+
+void LanaiInstPrinter::printMemImmOperand(const MCInst *MI, unsigned OpNo,
+                                          raw_ostream &OS) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isImm()) {
+    OS << '[' << formatHex(Op.getImm()) << ']';
+  } else {
+    // Symbolic operand will be lowered to immediate value by linker
+    assert(Op.isExpr() && "Expected an expression");
+    OS << '[';
+    Op.getExpr()->print(OS, &MAI);
+    OS << ']';
+  }
+}
+
+void LanaiInstPrinter::printHi16ImmOperand(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &OS) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isImm()) {
+    OS << formatHex(Op.getImm() << 16);
+  } else {
+    // Symbolic operand will be lowered to immediate value by linker
+    assert(Op.isExpr() && "Expected an expression");
+    Op.getExpr()->print(OS, &MAI);
+  }
+}
+
+void LanaiInstPrinter::printHi16AndImmOperand(const MCInst *MI, unsigned OpNo,
+                                              raw_ostream &OS) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isImm()) {
+    OS << formatHex((Op.getImm() << 16) | 0xffff);
+  } else {
+    // Symbolic operand will be lowered to immediate value by linker
+    assert(Op.isExpr() && "Expected an expression");
+    Op.getExpr()->print(OS, &MAI);
+  }
+}
+
+void LanaiInstPrinter::printLo16AndImmOperand(const MCInst *MI, unsigned OpNo,
+                                              raw_ostream &OS) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isImm()) {
+    OS << formatHex(0xffff0000 | Op.getImm());
+  } else {
+    // Symbolic operand will be lowered to immediate value by linker
+    assert(Op.isExpr() && "Expected an expression");
+    Op.getExpr()->print(OS, &MAI);
+  }
+}
+
+static void printMemoryBaseRegister(raw_ostream &OS, const unsigned AluCode,
+                                    const MCOperand &RegOp) {
+  assert(RegOp.isReg() && "Register operand expected");
+  OS << "[";
+  if (LPAC::isPreOp(AluCode))
+    OS << "*";
+  OS << "%" << LanaiInstPrinter::getRegisterName(RegOp.getReg());
+  if (LPAC::isPostOp(AluCode))
+    OS << "*";
+  OS << "]";
+}
+
+template <unsigned SizeInBits>
+static void printMemoryImmediateOffset(const MCAsmInfo &MAI,
+                                       const MCOperand &OffsetOp,
+                                       raw_ostream &OS) {
+  assert((OffsetOp.isImm() || OffsetOp.isExpr()) && "Immediate expected");
+  if (OffsetOp.isImm()) {
+    assert(isInt<SizeInBits>(OffsetOp.getImm()) && "Constant value truncated");
+    OS << OffsetOp.getImm();
+  } else
+    OffsetOp.getExpr()->print(OS, &MAI);
+}
+
+void LanaiInstPrinter::printMemRiOperand(const MCInst *MI, int OpNo,
+                                         raw_ostream &OS,
+                                         const char * /*Modifier*/) {
+  const MCOperand &RegOp = MI->getOperand(OpNo);
+  const MCOperand &OffsetOp = MI->getOperand(OpNo + 1);
+  const MCOperand &AluOp = MI->getOperand(OpNo + 2);
+  const unsigned AluCode = AluOp.getImm();
+
+  // Offset
+  printMemoryImmediateOffset<16>(MAI, OffsetOp, OS);
+
+  // Register
+  printMemoryBaseRegister(OS, AluCode, RegOp);
+}
+
+void LanaiInstPrinter::printMemRrOperand(const MCInst *MI, int OpNo,
+                                         raw_ostream &OS,
+                                         const char * /*Modifier*/) {
+  const MCOperand &RegOp = MI->getOperand(OpNo);
+  const MCOperand &OffsetOp = MI->getOperand(OpNo + 1);
+  const MCOperand &AluOp = MI->getOperand(OpNo + 2);
+  const unsigned AluCode = AluOp.getImm();
+  assert(OffsetOp.isReg() && RegOp.isReg() && "Registers expected.");
+
+  // [ Base OP Offset ]
+  OS << "[";
+  if (LPAC::isPreOp(AluCode))
+    OS << "*";
+  OS << "%" << getRegisterName(RegOp.getReg());
+  if (LPAC::isPostOp(AluCode))
+    OS << "*";
+  OS << " " << LPAC::lanaiAluCodeToString(AluCode) << " ";
+  OS << "%" << getRegisterName(OffsetOp.getReg());
+  OS << "]";
+}
+
+void LanaiInstPrinter::printMemSplsOperand(const MCInst *MI, int OpNo,
+                                           raw_ostream &OS,
+                                           const char * /*Modifier*/) {
+  const MCOperand &RegOp = MI->getOperand(OpNo);
+  const MCOperand &OffsetOp = MI->getOperand(OpNo + 1);
+  const MCOperand &AluOp = MI->getOperand(OpNo + 2);
+  const unsigned AluCode = AluOp.getImm();
+
+  // Offset
+  printMemoryImmediateOffset<10>(MAI, OffsetOp, OS);
+
+  // Register
+  printMemoryBaseRegister(OS, AluCode, RegOp);
+}
+
+void LanaiInstPrinter::printCCOperand(const MCInst *MI, int OpNo,
+                                      raw_ostream &OS) {
+  LPCC::CondCode CC =
+      static_cast<LPCC::CondCode>(MI->getOperand(OpNo).getImm());
+  // Handle the undefined value here for printing so we don't abort().
+  if (CC >= LPCC::UNKNOWN)
+    OS << "<und>";
+  else
+    OS << lanaiCondCodeToString(CC);
+}
+
+void LanaiInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
+                                             raw_ostream &OS) {
+  LPCC::CondCode CC =
+      static_cast<LPCC::CondCode>(MI->getOperand(OpNo).getImm());
+  // Handle the undefined value here for printing so we don't abort().
+  if (CC >= LPCC::UNKNOWN)
+    OS << "<und>";
+  else if (CC != LPCC::ICC_T)
+    OS << "." << lanaiCondCodeToString(CC);
+}
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h b/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h
new file mode 100644
index 000000000000..721a129a859e
--- /dev/null
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h
@@ -0,0 +1,65 @@
+//= LanaiInstPrinter.h - Convert Lanai MCInst to asm syntax -------*- C++ -*--//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a Lanai MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LANAI_MCTARGETDESC_LANAIINSTPRINTER_H
+#define LLVM_LIB_TARGET_LANAI_MCTARGETDESC_LANAIINSTPRINTER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class LanaiInstPrinter : public MCInstPrinter {
+public:
+  LanaiInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                   const MCRegisterInfo &MRI)
+      : MCInstPrinter(MAI, MII, MRI) {}
+
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                    const char *Modifier = nullptr);
+  void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printMemRiOperand(const MCInst *MI, int OpNo, raw_ostream &O,
+                         const char *Modifier = nullptr);
+  void printMemRrOperand(const MCInst *MI, int OpNo, raw_ostream &O,
+                         const char *Modifier = nullptr);
+  void printMemSplsOperand(const MCInst *MI, int OpNo, raw_ostream &O,
+                           const char *Modifier = nullptr);
+  void printCCOperand(const MCInst *MI, int OpNo, raw_ostream &O);
+  void printAluOperand(const MCInst *MI, int OpNo, raw_ostream &O);
+  void printHi16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printHi16AndImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printLo16AndImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printMemImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
+  void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+                               unsigned PrintMethodIdx, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+
+private:
+  bool printAlias(const MCInst *MI, raw_ostream &Ostream);
+  bool printInst(const MCInst *MI, raw_ostream &Ostream, StringRef Alias,
+                 unsigned OpNo0, unsigned OpnNo1);
+  bool printMemoryLoadIncrement(const MCInst *MI, raw_ostream &Ostream,
+                                StringRef Opcode, int AddOffset);
+  bool printMemoryStoreIncrement(const MCInst *MI, raw_ostream &Ostream,
+                                 StringRef Opcode, int AddOffset);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_LANAI_MCTARGETDESC_LANAIINSTPRINTER_H
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp
index 7e2705e67b6d..14d3dac26d1f 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp
@@ -1,9 +1,8 @@
 //===-- LanaiMCAsmInfo.cpp - Lanai asm properties -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.h b/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.h
index 3eef0592d2fa..265af425d037 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.h
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.h
@@ -1,9 +1,8 @@
 //=====-- LanaiMCAsmInfo.h - Lanai asm properties -----------*- C++ -*--====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
index 21f4005aaf83..df4ee297155f 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
 //===-- LanaiMCCodeEmitter.cpp - Convert Lanai code to machine code -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -11,7 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Lanai.h"
+#include "LanaiAluCode.h"
 #include "MCTargetDesc/LanaiBaseInfo.h"
 #include "MCTargetDesc/LanaiFixupKinds.h"
 #include "MCTargetDesc/LanaiMCExpr.h"
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.cpp
index 201c95de07f4..56d5fbf40360 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.cpp
@@ -1,9 +1,8 @@
 //===-- LanaiMCExpr.cpp - Lanai specific MC expression classes ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.h b/lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.h
index 5004d541ff70..c99af32d9102 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.h
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.h
@@ -1,9 +1,8 @@
 //===-- LanaiMCExpr.h - Lanai specific MC expression classes ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
index ddb01cdd2d8f..a9de0416fcac 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
@@ -1,9 +1,8 @@
 //===-- LanaiMCTargetDesc.cpp - Lanai Target Descriptions -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,8 +11,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "LanaiMCTargetDesc.h"
-#include "InstPrinter/LanaiInstPrinter.h"
+#include "LanaiInstPrinter.h"
 #include "LanaiMCAsmInfo.h"
+#include "TargetInfo/LanaiTargetInfo.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCInst.h"
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
index 2d8828ea4fa9..cf66d3226659 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
@@ -1,9 +1,8 @@
 //===-- LanaiMCTargetDesc.h - Lanai Target Descriptions ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -32,8 +31,6 @@ class Triple;
 class StringRef;
 class raw_pwrite_stream;
 
-Target &getTheLanaiTarget();
-
 MCCodeEmitter *createLanaiMCCodeEmitter(const MCInstrInfo &MCII,
                                         const MCRegisterInfo &MRI,
                                         MCContext &Ctx);
diff --git a/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp b/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp
index ccf47b08fcff..93deb891dec5 100644
--- a/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp
+++ b/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp
@@ -1,23 +1,20 @@
 //===-- LanaiTargetInfo.cpp - Lanai Target Implementation -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IR/Module.h"
+#include "TargetInfo/LanaiTargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 
 using namespace llvm;
 
-namespace llvm {
-Target &getTheLanaiTarget() {
+Target &llvm::getTheLanaiTarget() {
   static Target TheLanaiTarget;
   return TheLanaiTarget;
 }
-} // namespace llvm
 
 extern "C" void LLVMInitializeLanaiTargetInfo() {
   RegisterTarget<Triple::lanai> X(getTheLanaiTarget(), "lanai", "Lanai",
diff --git a/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.h b/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.h
new file mode 100644
index 000000000000..429cf0234a60
--- /dev/null
+++ b/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.h
@@ -0,0 +1,20 @@
+//===-- LanaiTargetInfo.h - Lanai Target Implementation ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LANAI_TARGETINFO_LANAITARGETINFO_H
+#define LLVM_LIB_TARGET_LANAI_TARGETINFO_LANAITARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheLanaiTarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_LANAI_TARGETINFO_LANAITARGETINFO_H
diff --git a/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
index 1ad70ac72c73..a0ec14ae2381 100644
--- a/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
+++ b/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
@@ -1,15 +1,15 @@
 //===- MSP430AsmParser.cpp - Parse MSP430 assembly to MCInst instructions -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "MSP430.h"
 #include "MSP430RegisterInfo.h"
 #include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "TargetInfo/MSP430TargetInfo.h"
 
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/StringSwitch.h"
diff --git a/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp b/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
index e5da130f9bbb..59c12e24e8bf 100644
--- a/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
+++ b/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
@@ -1,9 +1,8 @@
 //===-- MSP430Disassembler.cpp - Disassembler for MSP430 ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,6 +12,7 @@
 
 #include "MSP430.h"
 #include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "TargetInfo/MSP430TargetInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
deleted file mode 100644
index 4d62547bc65b..000000000000
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-//===-- MSP430InstPrinter.cpp - Convert MSP430 MCInst to assembly syntax --===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an MSP430 MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MSP430InstPrinter.h"
-#include "MSP430.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-// Include the auto-generated portion of the assembly writer.
-#define PRINT_ALIAS_INSTR
-#include "MSP430GenAsmWriter.inc"
-
-void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                  StringRef Annot, const MCSubtargetInfo &STI) {
-  if (!printAliasInstr(MI, O))
-    printInstruction(MI, O);
-  printAnnotation(O, Annot);
-}
-
-void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo,
-                                             raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isImm()) {
-    int64_t Imm = Op.getImm() * 2 + 2;
-    O << "$";
-    if (Imm >= 0)
-      O << '+';
-    O << Imm;
-  } else {
-    assert(Op.isExpr() && "unknown pcrel immediate operand");
-    Op.getExpr()->print(O, &MAI);
-  }
-}
-
-void MSP430InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                     raw_ostream &O, const char *Modifier) {
-  assert((Modifier == nullptr || Modifier[0] == 0) && "No modifiers supported");
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    O << getRegisterName(Op.getReg());
-  } else if (Op.isImm()) {
-    O << '#' << Op.getImm();
-  } else {
-    assert(Op.isExpr() && "unknown operand kind in printOperand");
-    O << '#';
-    Op.getExpr()->print(O, &MAI);
-  }
-}
-
-void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo,
-                                           raw_ostream &O,
-                                           const char *Modifier) {
-  const MCOperand &Base = MI->getOperand(OpNo);
-  const MCOperand &Disp = MI->getOperand(OpNo+1);
-
-  // Print displacement first
-
-  // If the global address expression is a part of displacement field with a
-  // register base, we should not emit any prefix symbol here, e.g.
-  //   mov.w &foo, r1
-  // vs
-  //   mov.w glb(r1), r2
-  // Otherwise (!) msp430-as will silently miscompile the output :(
-  if (Base.getReg() == MSP430::SR)
-    O << '&';
-
-  if (Disp.isExpr())
-    Disp.getExpr()->print(O, &MAI);
-  else {
-    assert(Disp.isImm() && "Expected immediate in displacement field");
-    O << Disp.getImm();
-  }
-
-  // Print register base field
-  if ((Base.getReg() != MSP430::SR) &&
-      (Base.getReg() != MSP430::PC))
-    O << '(' << getRegisterName(Base.getReg()) << ')';
-}
-
-void MSP430InstPrinter::printIndRegOperand(const MCInst *MI, unsigned OpNo,
-                                           raw_ostream &O) {
-  const MCOperand &Base = MI->getOperand(OpNo);
-  O << "@" << getRegisterName(Base.getReg());
-}
-
-void MSP430InstPrinter::printPostIndRegOperand(const MCInst *MI, unsigned OpNo,
-                                               raw_ostream &O) {
-  const MCOperand &Base = MI->getOperand(OpNo);
-  O << "@" << getRegisterName(Base.getReg()) << "+";
-}
-
-void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo,
-                                       raw_ostream &O) {
-  unsigned CC = MI->getOperand(OpNo).getImm();
-
-  switch (CC) {
-  default:
-   llvm_unreachable("Unsupported CC code");
-  case MSP430CC::COND_E:
-   O << "eq";
-   break;
-  case MSP430CC::COND_NE:
-   O << "ne";
-   break;
-  case MSP430CC::COND_HS:
-   O << "hs";
-   break;
-  case MSP430CC::COND_LO:
-   O << "lo";
-   break;
-  case MSP430CC::COND_GE:
-   O << "ge";
-   break;
-  case MSP430CC::COND_L:
-   O << 'l';
-   break;
-  case MSP430CC::COND_N:
-   O << 'n';
-   break;
-  }
-}
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
deleted file mode 100644
index cd02c4fa645a..000000000000
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ /dev/null
@@ -1,50 +0,0 @@
-//= MSP430InstPrinter.h - Convert MSP430 MCInst to assembly syntax -*- C++ -*-//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints a MSP430 MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_MSP430_INSTPRINTER_MSP430INSTPRINTER_H
-#define LLVM_LIB_TARGET_MSP430_INSTPRINTER_MSP430INSTPRINTER_H
-
-#include "llvm/MC/MCInstPrinter.h"
-
-namespace llvm {
-  class MSP430InstPrinter : public MCInstPrinter {
-  public:
-    MSP430InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                      const MCRegisterInfo &MRI)
-      : MCInstPrinter(MAI, MII, MRI) {}
-
-    void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
-                   const MCSubtargetInfo &STI) override;
-
-    // Autogenerated by tblgen.
-    void printInstruction(const MCInst *MI, raw_ostream &O);
-    bool printAliasInstr(const MCInst *MI, raw_ostream &O);
-    void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
-                                 unsigned PrintMethodIdx, raw_ostream &O);
-    static const char *getRegisterName(unsigned RegNo);
-
-private:
-    void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
-                      const char *Modifier = nullptr);
-    void printPCRelImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-    void printSrcMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
-                            const char *Modifier = nullptr);
-    void printIndRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-    void printPostIndRegOperand(const MCInst *MI, unsigned OpNo,
-                                raw_ostream &O);
-    void printCCOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-
-  };
-}
-
-#endif
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
index bd69a9d8d795..365e5da74de0 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
@@ -1,9 +1,8 @@
 //===-- MSP430AsmBackend.cpp - MSP430 Assembler Backend -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
index e47db2400a05..38b7da32c246 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===-- MSP430ELFObjectWriter.cpp - MSP430 ELF Writer ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
index 9449cb278024..4e054f85ccc3 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
@@ -1,9 +1,8 @@
 //===-- MSP430ELFStreamer.cpp - MSP430 ELF Target Streamer Methods --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h b/lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h
index 1eb6a2759423..68e41b0fb874 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h
@@ -1,9 +1,8 @@
 //===-- MSP430FixupKinds.h - MSP430 Specific Fixup Entries ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp
new file mode 100644
index 000000000000..2f3c6ed3c17e
--- /dev/null
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp
@@ -0,0 +1,137 @@
+//===-- MSP430InstPrinter.cpp - Convert MSP430 MCInst to assembly syntax --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an MSP430 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430InstPrinter.h"
+#include "MSP430.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+// Include the auto-generated portion of the assembly writer.
+#define PRINT_ALIAS_INSTR
+#include "MSP430GenAsmWriter.inc"
+
+void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                                  StringRef Annot, const MCSubtargetInfo &STI) {
+  if (!printAliasInstr(MI, O))
+    printInstruction(MI, O);
+  printAnnotation(O, Annot);
+}
+
+void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo,
+                                             raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isImm()) {
+    int64_t Imm = Op.getImm() * 2 + 2;
+    O << "$";
+    if (Imm >= 0)
+      O << '+';
+    O << Imm;
+  } else {
+    assert(Op.isExpr() && "unknown pcrel immediate operand");
+    Op.getExpr()->print(O, &MAI);
+  }
+}
+
+void MSP430InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O, const char *Modifier) {
+  assert((Modifier == nullptr || Modifier[0] == 0) && "No modifiers supported");
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    O << getRegisterName(Op.getReg());
+  } else if (Op.isImm()) {
+    O << '#' << Op.getImm();
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    O << '#';
+    Op.getExpr()->print(O, &MAI);
+  }
+}
+
+void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O,
+                                           const char *Modifier) {
+  const MCOperand &Base = MI->getOperand(OpNo);
+  const MCOperand &Disp = MI->getOperand(OpNo+1);
+
+  // Print displacement first
+
+  // If the global address expression is a part of displacement field with a
+  // register base, we should not emit any prefix symbol here, e.g.
+  //   mov.w &foo, r1
+  // vs
+  //   mov.w glb(r1), r2
+  // Otherwise (!) msp430-as will silently miscompile the output :(
+  if (Base.getReg() == MSP430::SR)
+    O << '&';
+
+  if (Disp.isExpr())
+    Disp.getExpr()->print(O, &MAI);
+  else {
+    assert(Disp.isImm() && "Expected immediate in displacement field");
+    O << Disp.getImm();
+  }
+
+  // Print register base field
+  if ((Base.getReg() != MSP430::SR) &&
+      (Base.getReg() != MSP430::PC))
+    O << '(' << getRegisterName(Base.getReg()) << ')';
+}
+
+void MSP430InstPrinter::printIndRegOperand(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O) {
+  const MCOperand &Base = MI->getOperand(OpNo);
+  O << "@" << getRegisterName(Base.getReg());
+}
+
+void MSP430InstPrinter::printPostIndRegOperand(const MCInst *MI, unsigned OpNo,
+                                               raw_ostream &O) {
+  const MCOperand &Base = MI->getOperand(OpNo);
+  O << "@" << getRegisterName(Base.getReg()) << "+";
+}
+
+void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned CC = MI->getOperand(OpNo).getImm();
+
+  switch (CC) {
+  default:
+   llvm_unreachable("Unsupported CC code");
+  case MSP430CC::COND_E:
+   O << "eq";
+   break;
+  case MSP430CC::COND_NE:
+   O << "ne";
+   break;
+  case MSP430CC::COND_HS:
+   O << "hs";
+   break;
+  case MSP430CC::COND_LO:
+   O << "lo";
+   break;
+  case MSP430CC::COND_GE:
+   O << "ge";
+   break;
+  case MSP430CC::COND_L:
+   O << 'l';
+   break;
+  case MSP430CC::COND_N:
+   O << 'n';
+   break;
+  }
+}
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h b/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h
new file mode 100644
index 000000000000..25451033236e
--- /dev/null
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h
@@ -0,0 +1,49 @@
+//= MSP430InstPrinter.h - Convert MSP430 MCInst to assembly syntax -*- C++ -*-//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a MSP430 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430INSTPRINTER_H
+#define LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430INSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+  class MSP430InstPrinter : public MCInstPrinter {
+  public:
+    MSP430InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                      const MCRegisterInfo &MRI)
+      : MCInstPrinter(MAI, MII, MRI) {}
+
+    void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                   const MCSubtargetInfo &STI) override;
+
+    // Autogenerated by tblgen.
+    void printInstruction(const MCInst *MI, raw_ostream &O);
+    bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+    void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+                                 unsigned PrintMethodIdx, raw_ostream &O);
+    static const char *getRegisterName(unsigned RegNo);
+
+private:
+    void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                      const char *Modifier = nullptr);
+    void printPCRelImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+    void printSrcMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                            const char *Modifier = nullptr);
+    void printIndRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+    void printPostIndRegOperand(const MCInst *MI, unsigned OpNo,
+                                raw_ostream &O);
+    void printCCOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  };
+}
+
+#endif
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
index 36e9a9c31075..db5a49dd22a7 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -1,9 +1,8 @@
 //===-- MSP430MCAsmInfo.cpp - MSP430 asm properties -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -24,4 +23,5 @@ MSP430MCAsmInfo::MSP430MCAsmInfo(const Triple &TT) {
 
   AlignmentIsInBytes = false;
   UsesELFSectionDirectiveForBSS = true;
+  UseIntegratedAssembler = true;
 }
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
index de486ec4b7bd..93979df037e6 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -1,9 +1,8 @@
 //===-- MSP430MCAsmInfo.h - MSP430 asm properties --------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
index 06f9f307cb1a..cf57e87a073d 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
@@ -1,9 +1,8 @@
 //===-- MSP430MCCodeEmitter.cpp - Convert MSP430 code to machine code -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index b21145d3904a..da928733015f 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -1,9 +1,8 @@
 //===-- MSP430MCTargetDesc.cpp - MSP430 Target Descriptions ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,8 +11,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "MSP430MCTargetDesc.h"
-#include "InstPrinter/MSP430InstPrinter.h"
+#include "MSP430InstPrinter.h"
 #include "MSP430MCAsmInfo.h"
+#include "TargetInfo/MSP430TargetInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
index e484c79c9ee9..02bfbe40c6bf 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -1,9 +1,8 @@
 //===-- MSP430MCTargetDesc.h - MSP430 Target Descriptions -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -30,8 +29,6 @@ class MCObjectTargetWriter;
 class MCStreamer;
 class MCTargetStreamer;
 
-Target &getTheMSP430Target();
-
 /// Creates a machine code emitter for MSP430.
 MCCodeEmitter *createMSP430MCCodeEmitter(const MCInstrInfo &MCII,
                                          const MCRegisterInfo &MRI,
diff --git a/lib/Target/MSP430/MSP430.h b/lib/Target/MSP430/MSP430.h
index 7a5314a10844..67f35b8034d9 100644
--- a/lib/Target/MSP430/MSP430.h
+++ b/lib/Target/MSP430/MSP430.h
@@ -1,9 +1,8 @@
 //==-- MSP430.h - Top-level interface for MSP430 representation --*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/MSP430/MSP430.td b/lib/Target/MSP430/MSP430.td
index 8fa99dc13dd5..38aa30fcf4dd 100644
--- a/lib/Target/MSP430/MSP430.td
+++ b/lib/Target/MSP430/MSP430.td
@@ -1,9 +1,8 @@
 //===-- MSP430.td - Describe the MSP430 Target Machine -----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This is the top level entry point for the MSP430 target.
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index f39c21fc8aa2..3a71a084d1af 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -1,9 +1,8 @@
 //===-- MSP430AsmPrinter.cpp - MSP430 LLVM assembly writer ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,11 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstPrinter/MSP430InstPrinter.h"
+#include "MCTargetDesc/MSP430InstPrinter.h"
 #include "MSP430.h"
 #include "MSP430InstrInfo.h"
 #include "MSP430MCInstLower.h"
 #include "MSP430TargetMachine.h"
+#include "TargetInfo/MSP430TargetInfo.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -28,6 +29,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -44,20 +46,34 @@ namespace {
 
     StringRef getPassName() const override { return "MSP430 Assembly Printer"; }
 
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
+    void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &O) override;
     void printOperand(const MachineInstr *MI, int OpNum,
                       raw_ostream &O, const char* Modifier = nullptr);
     void printSrcMemOperand(const MachineInstr *MI, int OpNum,
                             raw_ostream &O);
     bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                         unsigned AsmVariant, const char *ExtraCode,
-                         raw_ostream &O) override;
-    bool PrintAsmMemoryOperand(const MachineInstr *MI,
-                               unsigned OpNo, unsigned AsmVariant,
+                         const char *ExtraCode, raw_ostream &O) override;
+    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
                                const char *ExtraCode, raw_ostream &O) override;
     void EmitInstruction(const MachineInstr *MI) override;
+
+    void EmitInterruptVectorSection(MachineFunction &ISR);
   };
 } // end of anonymous namespace
 
+void MSP430AsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
+                                          raw_ostream &O) {
+  uint64_t Offset = MO.getOffset();
+  if (Offset)
+    O << '(' << Offset << '+';
+
+  getSymbol(MO.getGlobal())->print(O, MAI);
+
+  if (Offset)
+    O << ')';
+}
 
 void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
                                     raw_ostream &O, const char *Modifier) {
@@ -76,25 +92,13 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     MO.getMBB()->getSymbol()->print(O, MAI);
     return;
   case MachineOperand::MO_GlobalAddress: {
-    bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
-    uint64_t Offset = MO.getOffset();
-
     // If the global address expression is a part of displacement field with a
     // register base, we should not emit any prefix symbol here, e.g.
-    //   mov.w &foo, r1
-    // vs
     //   mov.w glb(r1), r2
     // Otherwise (!) msp430-as will silently miscompile the output :(
     if (!Modifier || strcmp(Modifier, "nohash"))
-      O << (isMemOp ? '&' : '#');
-    if (Offset)
-      O << '(' << Offset << '+';
-
-    getSymbol(MO.getGlobal())->print(O, MAI);
-
-    if (Offset)
-      O << ')';
-
+      O << '#';
+    PrintSymbolOperand(MO, O);
     return;
   }
   }
@@ -108,12 +112,12 @@ void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum,
   // Print displacement first
 
   // Imm here is in fact global address - print extra modifier.
-  if (Disp.isImm() && !Base.getReg())
+  if (Disp.isImm() && Base.getReg() == MSP430::SR)
     O << '&';
   printOperand(MI, OpNum+1, O, "nohash");
 
   // Print register base field
-  if (Base.getReg()) {
+  if (Base.getReg() != MSP430::SR && Base.getReg() != MSP430::PC) {
     O << '(';
     printOperand(MI, OpNum, O);
     O << ')';
@@ -123,18 +127,17 @@ void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum,
 /// PrintAsmOperand - Print out an operand for an inline asm expression.
 ///
 bool MSP430AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                       unsigned AsmVariant,
                                        const char *ExtraCode, raw_ostream &O) {
   // Does this asm operand have a single letter operand modifier?
   if (ExtraCode && ExtraCode[0])
-    return true; // Unknown modifier.
+    return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
 
   printOperand(MI, OpNo, O);
   return false;
 }
 
 bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                             unsigned OpNo, unsigned AsmVariant,
+                                             unsigned OpNo,
                                              const char *ExtraCode,
                                              raw_ostream &O) {
   if (ExtraCode && ExtraCode[0]) {
@@ -153,6 +156,32 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   EmitToStreamer(*OutStreamer, TmpInst);
 }
 
+void MSP430AsmPrinter::EmitInterruptVectorSection(MachineFunction &ISR) {
+  MCSection *Cur = OutStreamer->getCurrentSectionOnly();
+  const auto *F = &ISR.getFunction();
+  assert(F->hasFnAttribute("interrupt") &&
+         "Functions with MSP430_INTR CC should have 'interrupt' attribute");
+  StringRef IVIdx = F->getFnAttribute("interrupt").getValueAsString();
+  MCSection *IV = OutStreamer->getContext().getELFSection(
+    "__interrupt_vector_" + IVIdx,
+    ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_EXECINSTR);
+  OutStreamer->SwitchSection(IV);
+
+  const MCSymbol *FunctionSymbol = getSymbol(F);
+  OutStreamer->EmitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
+  OutStreamer->SwitchSection(Cur);
+}
+
+bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  // Emit separate section for an interrupt vector if ISR
+  if (MF.getFunction().getCallingConv() == CallingConv::MSP430_INTR)
+    EmitInterruptVectorSection(MF);
+
+  SetupMachineFunction(MF);
+  EmitFunctionBody();
+  return false;
+}
+
 // Force static initialization.
 extern "C" void LLVMInitializeMSP430AsmPrinter() {
   RegisterAsmPrinter<MSP430AsmPrinter> X(getTheMSP430Target());
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index 2b3495405545..45e7c26e4d30 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -1,9 +1,8 @@
 //===-- MSP430BranchSelector.cpp - Emit long conditional branches ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/MSP430/MSP430CallingConv.td b/lib/Target/MSP430/MSP430CallingConv.td
index 0434f8abfbf4..49191fa5dd5f 100644
--- a/lib/Target/MSP430/MSP430CallingConv.td
+++ b/lib/Target/MSP430/MSP430CallingConv.td
@@ -1,9 +1,8 @@
 //==- MSP430CallingConv.td - Calling Conventions for MSP430 -*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This describes the calling conventions for MSP430 architecture.
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index 2421f09fbf59..de60ad9bd7e6 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -1,9 +1,8 @@
 //===-- MSP430FrameLowering.cpp - MSP430 Frame Information ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index 8807101f37ca..33ce3c70a2a3 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -1,9 +1,8 @@
 //==- MSP430FrameLowering.h - Define frame lowering for MSP430 --*- C++ -*--==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 7a1998ad355d..23449585505e 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
 //===-- MSP430ISelDAGToDAG.cpp - A dag to dag inst selector for MSP430 ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 3e706134afc5..fedfb857bd0f 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -1,9 +1,8 @@
 //===-- MSP430ISelLowering.cpp - MSP430 DAG Lowering Implementation  ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 731bc1406711..ee6b6316d7a9 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -1,9 +1,8 @@
 //===-- MSP430ISelLowering.h - MSP430 DAG Lowering Interface ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/MSP430/MSP430InstrFormats.td b/lib/Target/MSP430/MSP430InstrFormats.td
index e2e4503db20c..36f40d6fc89d 100644
--- a/lib/Target/MSP430/MSP430InstrFormats.td
+++ b/lib/Target/MSP430/MSP430InstrFormats.td
@@ -1,9 +1,8 @@
 //===-- MSP430InstrFormats.td - MSP430 Instruction Formats -*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index c136933a51bc..5c3a3fc69266 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -1,9 +1,8 @@
 //===-- MSP430InstrInfo.cpp - MSP430 Instruction Information --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -308,7 +307,8 @@ unsigned MSP430InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   case TargetOpcode::KILL:
   case TargetOpcode::DBG_VALUE:
     return 0;
-  case TargetOpcode::INLINEASM: {
+  case TargetOpcode::INLINEASM:
+  case TargetOpcode::INLINEASM_BR: {
     const MachineFunction *MF = MI.getParent()->getParent();
     const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
     return TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(),
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index fee3bea9b8d6..13c50ad23adc 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -1,9 +1,8 @@
 //===-- MSP430InstrInfo.h - MSP430 Instruction Information ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index 25c81d94f75b..aaca3504822d 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -1,9 +1,8 @@
 //===-- MSP430InstrInfo.td - MSP430 Instruction defs -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp
index 860c0006f782..1e57f33386e6 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -1,9 +1,8 @@
 //===-- MSP430MCInstLower.cpp - Convert MSP430 MachineInstr to an MCInst --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/MSP430/MSP430MCInstLower.h b/lib/Target/MSP430/MSP430MCInstLower.h
index ebd639744bcc..910ad4bb12d5 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.h
+++ b/lib/Target/MSP430/MSP430MCInstLower.h
@@ -1,9 +1,8 @@
 //===-- MSP430MCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp b/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
index b442fc03b257..1d3a6d118bd6 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
 //===-- MSP430MachineFunctionInfo.cpp - MSP430 machine function info ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index fcaa8a1d6c72..2b2c8967a749 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -1,9 +1,8 @@
 //=== MSP430MachineFunctionInfo.h - MSP430 machine function info -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 54e53e19eb54..afbb2f213b45 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===-- MSP430RegisterInfo.cpp - MSP430 Register Information --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -155,7 +154,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
 }
 
-unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const MSP430FrameLowering *TFI = getFrameLowering(MF);
   return TFI->hasFP(MF) ? MSP430::FP : MSP430::SP;
 }
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index 47a5e147953e..c3eff93f55d2 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -1,9 +1,8 @@
 //===-- MSP430RegisterInfo.h - MSP430 Register Information Impl -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -38,7 +37,7 @@ public:
                            RegScavenger *RS = nullptr) const override;
 
   // Debug information queries.
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
+  Register getFrameRegister(const MachineFunction &MF) const override;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td
index 1e86bdf34a0b..11003dba383f 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.td
+++ b/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -1,9 +1,8 @@
 //===-- MSP430RegisterInfo.td - MSP430 Register defs -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp
index 776a9dcb11d4..20168773cd53 100644
--- a/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -1,9 +1,8 @@
 //===-- MSP430Subtarget.cpp - MSP430 Subtarget Information ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
index 01a428056377..ab2b71e3bb1a 100644
--- a/lib/Target/MSP430/MSP430Subtarget.h
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -1,9 +1,8 @@
 //===-- MSP430Subtarget.h - Define Subtarget for the MSP430 ----*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 9f6ebba75ec6..8c4ca982c966 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -1,9 +1,8 @@
 //===-- MSP430TargetMachine.cpp - Define TargetMachine for MSP430 ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,6 +12,7 @@
 
 #include "MSP430TargetMachine.h"
 #include "MSP430.h"
+#include "TargetInfo/MSP430TargetInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index 4935b80cfdd9..96fbc3ba0377 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -1,9 +1,8 @@
 //===-- MSP430TargetMachine.h - Define TargetMachine for MSP430 -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
index dfa21f580cb7..5da7d588079f 100644
--- a/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
+++ b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
@@ -1,14 +1,12 @@
 //===-- MSP430TargetInfo.cpp - MSP430 Target Implementation ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "MSP430.h"
-#include "llvm/IR/Module.h"
+#include "TargetInfo/MSP430TargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.h b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.h
new file mode 100644
index 000000000000..17854244f28b
--- /dev/null
+++ b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.h
@@ -0,0 +1,20 @@
+//===-- MSP430TargetInfo.h - MSP430 Target Implementation -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_MSP430_TARGETINFO_MSP430TARGETINFO_H
+#define LLVM_LIB_TARGET_MSP430_TARGETINFO_MSP430TARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheMSP430Target();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_MSP430_TARGETINFO_MSP430TARGETINFO_H
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index d2fed6861477..1f7d095bf49b 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -1,9 +1,8 @@
 //===-- MipsAsmParser.cpp - Parse Mips assembly to MCInst instructions ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -13,6 +12,7 @@
 #include "MCTargetDesc/MipsMCExpr.h"
 #include "MCTargetDesc/MipsMCTargetDesc.h"
 #include "MipsTargetStreamer.h"
+#include "TargetInfo/MipsTargetInfo.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
@@ -29,6 +29,7 @@
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCParser/MCAsmParserUtils.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
 #include "llvm/MC/MCSectionELF.h"
@@ -65,10 +66,7 @@ class MCInstrInfo;
 
 } // end namespace llvm
 
-static cl::opt<bool>
-EmitJalrReloc("mips-jalr-reloc", cl::Hidden,
-              cl::desc("MIPS: Emit R_{MICRO}MIPS_JALR relocation with jalr"),
-              cl::init(true));
+extern cl::opt<bool> EmitJalrReloc;
 
 namespace {
 
@@ -148,6 +146,7 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool IsPicEnabled;
   bool IsCpRestoreSet;
   int CpRestoreOffset;
+  unsigned GPReg;
   unsigned CpSaveLocation;
   /// If true, then CpSaveLocation is a register, otherwise it's an offset.
   bool     CpSaveLocationIsRegister;
@@ -277,6 +276,15 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool expandUxw(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
                  const MCSubtargetInfo *STI);
 
+  bool expandSge(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+                 const MCSubtargetInfo *STI);
+
+  bool expandSgeImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+                    const MCSubtargetInfo *STI);
+
+  bool expandSgtImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+                    const MCSubtargetInfo *STI);
+
   bool expandRotation(MCInst &Inst, SMLoc IDLoc,
                       MCStreamer &Out, const MCSubtargetInfo *STI);
   bool expandRotationImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
@@ -304,6 +312,9 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool expandLoadStoreDMacro(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
                              const MCSubtargetInfo *STI, bool IsLoad);
 
+  bool expandStoreDM1Macro(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+                           const MCSubtargetInfo *STI);
+
   bool expandSeq(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
                  const MCSubtargetInfo *STI);
 
@@ -324,6 +335,7 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool parseSetFeature(uint64_t Feature);
   bool isPicAndNotNxxAbi(); // Used by .cpload, .cprestore, and .cpsetup.
   bool parseDirectiveCpLoad(SMLoc Loc);
+  bool parseDirectiveCpLocal(SMLoc Loc);
   bool parseDirectiveCpRestore(SMLoc Loc);
   bool parseDirectiveCPSetup();
   bool parseDirectiveCPReturn();
@@ -517,6 +529,7 @@ public:
 
     IsCpRestoreSet = false;
     CpRestoreOffset = -1;
+    GPReg = ABI.GetGlobalPtr();
 
     const Triple &TheTriple = sti.getTargetTriple();
     IsLittleEndian = TheTriple.isLittleEndian();
@@ -895,14 +908,6 @@ private:
         .getRegister(RegIdx.Index);
   }
 
-  /// Coerce the register to FGRH32 and return the real register for the current
-  /// target.
-  unsigned getFGRH32Reg() const {
-    assert(isRegIdx() && (RegIdx.Kind & RegKind_FGR) && "Invalid access!");
-    return RegIdx.RegInfo->getRegClass(Mips::FGRH32RegClassID)
-        .getRegister(RegIdx.Index);
-  }
-
   /// Coerce the register to FCC and return the real register for the current
   /// target.
   unsigned getFCCReg() const {
@@ -1100,11 +1105,6 @@ public:
                                 "registers");
   }
 
-  void addFGRH32AsmRegOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::createReg(getFGRH32Reg()));
-  }
-
   void addFCCAsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::createReg(getFCCReg()));
@@ -2043,7 +2043,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
         const MCExpr *Lo16RelocExpr =
             MipsMCExpr::create(MipsMCExpr::MEK_LO, JalExpr, getContext());
 
-        TOut.emitRRX(Mips::LW, Mips::T9, Mips::GP,
+        TOut.emitRRX(Mips::LW, Mips::T9, GPReg,
                      MCOperand::createExpr(Got16RelocExpr), IDLoc, STI);
         TOut.emitRRX(Mips::ADDiu, Mips::T9, Mips::T9,
                      MCOperand::createExpr(Lo16RelocExpr), IDLoc, STI);
@@ -2057,7 +2057,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
             MipsMCExpr::create(MipsMCExpr::MEK_GOT_DISP, JalExpr, getContext());
 
         TOut.emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9,
-                     Mips::GP, MCOperand::createExpr(GotDispRelocExpr), IDLoc,
+                     GPReg, MCOperand::createExpr(GotDispRelocExpr), IDLoc,
                      STI);
       }
     } else {
@@ -2068,7 +2068,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
       const MCExpr *Call16RelocExpr =
           MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, JalExpr, getContext());
 
-      TOut.emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9, Mips::GP,
+      TOut.emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9, GPReg,
                    MCOperand::createExpr(Call16RelocExpr), IDLoc, STI);
     }
 
@@ -2485,6 +2485,19 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
   case Mips::NORImm:
   case Mips::NORImm64:
     return expandAliasImmediate(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+  case Mips::SGE:
+  case Mips::SGEU:
+    return expandSge(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+  case Mips::SGEImm:
+  case Mips::SGEUImm:
+  case Mips::SGEImm64:
+  case Mips::SGEUImm64:
+    return expandSgeImm(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+  case Mips::SGTImm:
+  case Mips::SGTUImm:
+  case Mips::SGTImm64:
+  case Mips::SGTUImm64:
+    return expandSgtImm(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
   case Mips::SLTImm64:
     if (isInt<16>(Inst.getOperand(2).getImm())) {
       Inst.setOpcode(Mips::SLTi64);
@@ -2553,6 +2566,10 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
                                  Inst.getOpcode() == Mips::LDMacro)
                ? MER_Fail
                : MER_Success;
+  case Mips::SDC1_M1:
+    return expandStoreDM1Macro(Inst, IDLoc, Out, STI)
+               ? MER_Fail
+               : MER_Success;
   case Mips::SEQMacro:
     return expandSeq(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
   case Mips::SEQIMacro:
@@ -2879,8 +2896,8 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
                ELF::STB_LOCAL))) {
       const MCExpr *CallExpr =
           MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext());
-      TOut.emitRRX(Mips::LW, DstReg, ABI.GetGlobalPtr(),
-                   MCOperand::createExpr(CallExpr), IDLoc, STI);
+      TOut.emitRRX(Mips::LW, DstReg, GPReg, MCOperand::createExpr(CallExpr),
+                   IDLoc, STI);
       return false;
     }
 
@@ -2919,8 +2936,8 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
       TmpReg = ATReg;
     }
 
-    TOut.emitRRX(Mips::LW, TmpReg, ABI.GetGlobalPtr(),
-                 MCOperand::createExpr(GotExpr), IDLoc, STI);
+    TOut.emitRRX(Mips::LW, TmpReg, GPReg, MCOperand::createExpr(GotExpr), IDLoc,
+                 STI);
 
     if (LoExpr)
       TOut.emitRRX(Mips::ADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr),
@@ -2955,8 +2972,8 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
                ELF::STB_LOCAL))) {
       const MCExpr *CallExpr =
           MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext());
-      TOut.emitRRX(Mips::LD, DstReg, ABI.GetGlobalPtr(),
-                   MCOperand::createExpr(CallExpr), IDLoc, STI);
+      TOut.emitRRX(Mips::LD, DstReg, GPReg, MCOperand::createExpr(CallExpr),
+                   IDLoc, STI);
       return false;
     }
 
@@ -2998,8 +3015,8 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
       TmpReg = ATReg;
     }
 
-    TOut.emitRRX(Mips::LD, TmpReg, ABI.GetGlobalPtr(),
-                 MCOperand::createExpr(GotExpr), IDLoc, STI);
+    TOut.emitRRX(Mips::LD, TmpReg, GPReg, MCOperand::createExpr(GotExpr), IDLoc,
+                 STI);
 
     if (LoExpr)
       TOut.emitRRX(Mips::DADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr),
@@ -3229,10 +3246,10 @@ bool MipsAsmParser::emitPartialAddress(MipsTargetStreamer &TOut, SMLoc IDLoc,
         MipsMCExpr::create(MipsMCExpr::MEK_GOT, GotSym, getContext());
 
     if(isABI_O32() || isABI_N32()) {
-      TOut.emitRRX(Mips::LW, ATReg, Mips::GP, MCOperand::createExpr(GotExpr),
+      TOut.emitRRX(Mips::LW, ATReg, GPReg, MCOperand::createExpr(GotExpr),
                    IDLoc, STI);
     } else { //isABI_N64()
-      TOut.emitRRX(Mips::LD, ATReg, Mips::GP, MCOperand::createExpr(GotExpr),
+      TOut.emitRRX(Mips::LD, ATReg, GPReg, MCOperand::createExpr(GotExpr),
                    IDLoc, STI);
     }
   } else { //!IsPicEnabled
@@ -4293,6 +4310,143 @@ bool MipsAsmParser::expandUxw(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
   return false;
 }
 
+bool MipsAsmParser::expandSge(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+                              const MCSubtargetInfo *STI) {
+  MipsTargetStreamer &TOut = getTargetStreamer();
+
+  assert(Inst.getNumOperands() == 3 && "Invalid operand count");
+  assert(Inst.getOperand(0).isReg() &&
+         Inst.getOperand(1).isReg() &&
+         Inst.getOperand(2).isReg() && "Invalid instruction operand.");
+
+  unsigned DstReg = Inst.getOperand(0).getReg();
+  unsigned SrcReg = Inst.getOperand(1).getReg();
+  unsigned OpReg = Inst.getOperand(2).getReg();
+  unsigned OpCode;
+
+  warnIfNoMacro(IDLoc);
+
+  switch (Inst.getOpcode()) {
+  case Mips::SGE:
+    OpCode = Mips::SLT;
+    break;
+  case Mips::SGEU:
+    OpCode = Mips::SLTu;
+    break;
+  default:
+    llvm_unreachable("unexpected 'sge' opcode");
+  }
+
+  // $SrcReg >= $OpReg is equal to (not ($SrcReg < $OpReg))
+  TOut.emitRRR(OpCode, DstReg, SrcReg, OpReg, IDLoc, STI);
+  TOut.emitRRI(Mips::XORi, DstReg, DstReg, 1, IDLoc, STI);
+
+  return false;
+}
+
+bool MipsAsmParser::expandSgeImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+                                 const MCSubtargetInfo *STI) {
+  MipsTargetStreamer &TOut = getTargetStreamer();
+
+  assert(Inst.getNumOperands() == 3 && "Invalid operand count");
+  assert(Inst.getOperand(0).isReg() &&
+         Inst.getOperand(1).isReg() &&
+         Inst.getOperand(2).isImm() && "Invalid instruction operand.");
+
+  unsigned DstReg = Inst.getOperand(0).getReg();
+  unsigned SrcReg = Inst.getOperand(1).getReg();
+  int64_t ImmValue = Inst.getOperand(2).getImm();
+  unsigned OpRegCode, OpImmCode;
+
+  warnIfNoMacro(IDLoc);
+
+  switch (Inst.getOpcode()) {
+  case Mips::SGEImm:
+  case Mips::SGEImm64:
+    OpRegCode = Mips::SLT;
+    OpImmCode = Mips::SLTi;
+    break;
+  case Mips::SGEUImm:
+  case Mips::SGEUImm64:
+    OpRegCode = Mips::SLTu;
+    OpImmCode = Mips::SLTiu;
+    break;
+  default:
+    llvm_unreachable("unexpected 'sge' opcode with immediate");
+  }
+
+  // $SrcReg >= Imm is equal to (not ($SrcReg < Imm))
+  if (isInt<16>(ImmValue)) {
+    // Use immediate version of STL.
+    TOut.emitRRI(OpImmCode, DstReg, SrcReg, ImmValue, IDLoc, STI);
+    TOut.emitRRI(Mips::XORi, DstReg, DstReg, 1, IDLoc, STI);
+  } else {
+    unsigned ImmReg = DstReg;
+    if (DstReg == SrcReg) {
+      unsigned ATReg = getATReg(Inst.getLoc());
+      if (!ATReg)
+        return true;
+      ImmReg = ATReg;
+    }
+
+    if (loadImmediate(ImmValue, ImmReg, Mips::NoRegister, isInt<32>(ImmValue),
+                      false, IDLoc, Out, STI))
+      return true;
+
+    TOut.emitRRR(OpRegCode, DstReg, SrcReg, ImmReg, IDLoc, STI);
+    TOut.emitRRI(Mips::XORi, DstReg, DstReg, 1, IDLoc, STI);
+  }
+
+  return false;
+}
+
+bool MipsAsmParser::expandSgtImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+                                 const MCSubtargetInfo *STI) {
+  MipsTargetStreamer &TOut = getTargetStreamer();
+
+  assert(Inst.getNumOperands() == 3 && "Invalid operand count");
+  assert(Inst.getOperand(0).isReg() &&
+         Inst.getOperand(1).isReg() &&
+         Inst.getOperand(2).isImm() && "Invalid instruction operand.");
+
+  unsigned DstReg = Inst.getOperand(0).getReg();
+  unsigned SrcReg = Inst.getOperand(1).getReg();
+  unsigned ImmReg = DstReg;
+  int64_t ImmValue = Inst.getOperand(2).getImm();
+  unsigned OpCode;
+
+  warnIfNoMacro(IDLoc);
+
+  switch (Inst.getOpcode()) {
+  case Mips::SGTImm:
+  case Mips::SGTImm64:
+    OpCode = Mips::SLT;
+    break;
+  case Mips::SGTUImm:
+  case Mips::SGTUImm64:
+    OpCode = Mips::SLTu;
+    break;
+  default:
+    llvm_unreachable("unexpected 'sgt' opcode with immediate");
+  }
+
+  if (DstReg == SrcReg) {
+    unsigned ATReg = getATReg(Inst.getLoc());
+    if (!ATReg)
+      return true;
+    ImmReg = ATReg;
+  }
+
+  if (loadImmediate(ImmValue, ImmReg, Mips::NoRegister, isInt<32>(ImmValue),
+                    false, IDLoc, Out, STI))
+    return true;
+
+  // $SrcReg > $ImmReg is equal to $ImmReg < $SrcReg
+  TOut.emitRRR(OpCode, DstReg, ImmReg, SrcReg, IDLoc, STI);
+
+  return false;
+}
+
 bool MipsAsmParser::expandAliasImmediate(MCInst &Inst, SMLoc IDLoc,
                                          MCStreamer &Out,
                                          const MCSubtargetInfo *STI) {
@@ -4859,61 +5013,110 @@ bool MipsAsmParser::expandLoadStoreDMacro(MCInst &Inst, SMLoc IDLoc,
   return false;
 }
 
+
+// Expand 's.d $<reg> offset($reg2)' to 'swc1 $<reg+1>, offset($reg2);
+//                                       swc1 $<reg>, offset+4($reg2)'
+// or if little endian to 'swc1 $<reg>, offset($reg2);
+//                         swc1 $<reg+1>, offset+4($reg2)'
+// for Mips1.
+bool MipsAsmParser::expandStoreDM1Macro(MCInst &Inst, SMLoc IDLoc,
+                                        MCStreamer &Out,
+                                        const MCSubtargetInfo *STI) {
+  if (!isABI_O32())
+    return true;
+
+  warnIfNoMacro(IDLoc);
+
+  MipsTargetStreamer &TOut = getTargetStreamer();
+  unsigned Opcode = Mips::SWC1;
+  unsigned FirstReg = Inst.getOperand(0).getReg();
+  unsigned SecondReg = nextReg(FirstReg);
+  unsigned BaseReg = Inst.getOperand(1).getReg();
+  if (!SecondReg)
+    return true;
+
+  warnIfRegIndexIsAT(FirstReg, IDLoc);
+
+  assert(Inst.getOperand(2).isImm() &&
+         "Offset for macro is not immediate!");
+
+  MCOperand &FirstOffset = Inst.getOperand(2);
+  signed NextOffset = FirstOffset.getImm() + 4;
+  MCOperand SecondOffset = MCOperand::createImm(NextOffset);
+
+  if (!isInt<16>(FirstOffset.getImm()) || !isInt<16>(NextOffset))
+    return true;
+
+  if (!IsLittleEndian)
+    std::swap(FirstReg, SecondReg);
+
+  TOut.emitRRX(Opcode, FirstReg, BaseReg, FirstOffset, IDLoc, STI);
+  TOut.emitRRX(Opcode, SecondReg, BaseReg, SecondOffset, IDLoc, STI);
+
+  return false;
+}
+
 bool MipsAsmParser::expandSeq(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
                               const MCSubtargetInfo *STI) {
+  MipsTargetStreamer &TOut = getTargetStreamer();
+
+  assert(Inst.getNumOperands() == 3 && "Invalid operand count");
+  assert(Inst.getOperand(0).isReg() &&
+         Inst.getOperand(1).isReg() &&
+         Inst.getOperand(2).isReg() && "Invalid instruction operand.");
+
+  unsigned DstReg = Inst.getOperand(0).getReg();
+  unsigned SrcReg = Inst.getOperand(1).getReg();
+  unsigned OpReg = Inst.getOperand(2).getReg();
 
   warnIfNoMacro(IDLoc);
-  MipsTargetStreamer &TOut = getTargetStreamer();
 
-  if (Inst.getOperand(1).getReg() != Mips::ZERO &&
-      Inst.getOperand(2).getReg() != Mips::ZERO) {
-    TOut.emitRRR(Mips::XOR, Inst.getOperand(0).getReg(),
-                 Inst.getOperand(1).getReg(), Inst.getOperand(2).getReg(),
-                 IDLoc, STI);
-    TOut.emitRRI(Mips::SLTiu, Inst.getOperand(0).getReg(),
-                 Inst.getOperand(0).getReg(), 1, IDLoc, STI);
+  if (SrcReg != Mips::ZERO && OpReg != Mips::ZERO) {
+    TOut.emitRRR(Mips::XOR, DstReg, SrcReg, OpReg, IDLoc, STI);
+    TOut.emitRRI(Mips::SLTiu, DstReg, DstReg, 1, IDLoc, STI);
     return false;
   }
 
-  unsigned Reg = 0;
-  if (Inst.getOperand(1).getReg() == Mips::ZERO) {
-    Reg = Inst.getOperand(2).getReg();
-  } else {
-    Reg = Inst.getOperand(1).getReg();
-  }
-  TOut.emitRRI(Mips::SLTiu, Inst.getOperand(0).getReg(), Reg, 1, IDLoc, STI);
+  unsigned Reg = SrcReg == Mips::ZERO ? OpReg : SrcReg;
+  TOut.emitRRI(Mips::SLTiu, DstReg, Reg, 1, IDLoc, STI);
   return false;
 }
 
 bool MipsAsmParser::expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
                                const MCSubtargetInfo *STI) {
-  warnIfNoMacro(IDLoc);
   MipsTargetStreamer &TOut = getTargetStreamer();
 
-  unsigned Opc;
+  assert(Inst.getNumOperands() == 3 && "Invalid operand count");
+  assert(Inst.getOperand(0).isReg() &&
+         Inst.getOperand(1).isReg() &&
+         Inst.getOperand(2).isImm() && "Invalid instruction operand.");
+
+  unsigned DstReg = Inst.getOperand(0).getReg();
+  unsigned SrcReg = Inst.getOperand(1).getReg();
   int64_t Imm = Inst.getOperand(2).getImm();
-  unsigned Reg = Inst.getOperand(1).getReg();
+
+  warnIfNoMacro(IDLoc);
 
   if (Imm == 0) {
-    TOut.emitRRI(Mips::SLTiu, Inst.getOperand(0).getReg(),
-                 Inst.getOperand(1).getReg(), 1, IDLoc, STI);
+    TOut.emitRRI(Mips::SLTiu, DstReg, SrcReg, 1, IDLoc, STI);
     return false;
-  } else {
+  }
 
-    if (Reg == Mips::ZERO) {
-      Warning(IDLoc, "comparison is always false");
-      TOut.emitRRR(isGP64bit() ? Mips::DADDu : Mips::ADDu,
-                   Inst.getOperand(0).getReg(), Reg, Reg, IDLoc, STI);
-      return false;
-    }
+  if (SrcReg == Mips::ZERO) {
+    Warning(IDLoc, "comparison is always false");
+    TOut.emitRRR(isGP64bit() ? Mips::DADDu : Mips::ADDu,
+                 DstReg, SrcReg, SrcReg, IDLoc, STI);
+    return false;
+  }
 
-    if (Imm > -0x8000 && Imm < 0) {
-      Imm = -Imm;
-      Opc = isGP64bit() ? Mips::DADDiu : Mips::ADDiu;
-    } else {
-      Opc = Mips::XORi;
-    }
+  unsigned Opc;
+  if (Imm > -0x8000 && Imm < 0) {
+    Imm = -Imm;
+    Opc = isGP64bit() ? Mips::DADDiu : Mips::ADDiu;
+  } else {
+    Opc = Mips::XORi;
   }
+
   if (!isUInt<16>(Imm)) {
     unsigned ATReg = getATReg(IDLoc);
     if (!ATReg)
@@ -4923,17 +5126,13 @@ bool MipsAsmParser::expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
                       Out, STI))
       return true;
 
-    TOut.emitRRR(Mips::XOR, Inst.getOperand(0).getReg(),
-                 Inst.getOperand(1).getReg(), ATReg, IDLoc, STI);
-    TOut.emitRRI(Mips::SLTiu, Inst.getOperand(0).getReg(),
-                 Inst.getOperand(0).getReg(), 1, IDLoc, STI);
+    TOut.emitRRR(Mips::XOR, DstReg, SrcReg, ATReg, IDLoc, STI);
+    TOut.emitRRI(Mips::SLTiu, DstReg, DstReg, 1, IDLoc, STI);
     return false;
   }
 
-  TOut.emitRRI(Opc, Inst.getOperand(0).getReg(), Inst.getOperand(1).getReg(),
-               Imm, IDLoc, STI);
-  TOut.emitRRI(Mips::SLTiu, Inst.getOperand(0).getReg(),
-               Inst.getOperand(0).getReg(), 1, IDLoc, STI);
+  TOut.emitRRI(Opc, DstReg, SrcReg, Imm, IDLoc, STI);
+  TOut.emitRRI(Mips::SLTiu, DstReg, DstReg, 1, IDLoc, STI);
   return false;
 }
 
@@ -6325,7 +6524,7 @@ bool MipsAsmParser::parseBracketSuffix(StringRef Name,
   return false;
 }
 
-static std::string MipsMnemonicSpellCheck(StringRef S, uint64_t FBS,
+static std::string MipsMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS,
                                           unsigned VariantID = 0);
 
 bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
@@ -6338,7 +6537,7 @@ bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
 
   // Check if we have valid mnemonic
   if (!mnemonicIsValid(Name, 0)) {
-    uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+    FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
     std::string Suggestion = MipsMnemonicSpellCheck(Name, FBS);
     return Error(NameLoc, "unknown instruction" + Suggestion);
   }
@@ -6807,7 +7006,6 @@ bool MipsAsmParser::parseSetHardFloatDirective() {
 
 bool MipsAsmParser::parseSetAssignment() {
   StringRef Name;
-  const MCExpr *Value;
   MCAsmParser &Parser = getParser();
 
   if (Parser.parseIdentifier(Name))
@@ -6825,17 +7023,16 @@ bool MipsAsmParser::parseSetAssignment() {
     RegisterSets[Name] = Parser.getTok();
     Parser.Lex(); // Eat identifier.
     getContext().getOrCreateSymbol(Name);
-  } else if (!Parser.parseExpression(Value)) {
-    // Parse assignment of an expression including
-    // symbolic registers:
-    //   .set  $tmp, $BB0-$BB1
-    //   .set  r2, $f2
-    MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
-    Sym->setVariableValue(Value);
-  } else {
-    return reportParseError("expected valid expression after comma");
+    return false;
   }
 
+  MCSymbol *Sym;
+  const MCExpr *Value;
+  if (MCParserUtils::parseAssignmentExpression(Name, /* allow_redef */ true,
+                                               Parser, Sym, Value))
+    return true;
+  Sym->setVariableValue(Value);
+
   return false;
 }
 
@@ -7047,6 +7244,40 @@ bool MipsAsmParser::parseDirectiveCpLoad(SMLoc Loc) {
   return false;
 }
 
+bool MipsAsmParser::parseDirectiveCpLocal(SMLoc Loc) {
+  if (!isABI_N32() && !isABI_N64()) {
+    reportParseError(".cplocal is allowed only in N32 or N64 mode");
+    return false;
+  }
+
+  SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Reg;
+  OperandMatchResultTy ResTy = parseAnyRegister(Reg);
+  if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) {
+    reportParseError("expected register containing global pointer");
+    return false;
+  }
+
+  MipsOperand &RegOpnd = static_cast<MipsOperand &>(*Reg[0]);
+  if (!RegOpnd.isGPRAsmReg()) {
+    reportParseError(RegOpnd.getStartLoc(), "invalid register");
+    return false;
+  }
+
+  // If this is not the end of the statement, report an error.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    reportParseError("unexpected token, expected end of statement");
+    return false;
+  }
+  getParser().Lex(); // Consume the EndOfStatement.
+
+  unsigned NewReg = RegOpnd.getGPR32Reg();
+  if (IsPicEnabled)
+    GPReg = NewReg;
+
+  getTargetStreamer().emitDirectiveCpLocal(NewReg);
+  return false;
+}
+
 bool MipsAsmParser::parseDirectiveCpRestore(SMLoc Loc) {
   MCAsmParser &Parser = getParser();
 
@@ -7897,6 +8128,10 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
     parseDirectiveCpRestore(DirectiveID.getLoc());
     return false;
   }
+  if (IDVal == ".cplocal") {
+    parseDirectiveCpLocal(DirectiveID.getLoc());
+    return false;
+  }
   if (IDVal == ".ent") {
     StringRef SymbolName;
 
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 27b27ff1e1e2..ef13507fe63a 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -1,9 +1,8 @@
 //===- MipsDisassembler.cpp - Disassembler for Mips -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,6 +12,7 @@
 
 #include "MCTargetDesc/MipsMCTargetDesc.h"
 #include "Mips.h"
+#include "TargetInfo/MipsTargetInfo.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -541,15 +541,6 @@ static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned RegPair,
 static DecodeStatus DecodeMovePOperands(MCInst &Inst, unsigned Insn,
                                         uint64_t Address, const void *Decoder);
 
-namespace llvm {
-
-Target &getTheMipselTarget();
-Target &getTheMipsTarget();
-Target &getTheMips64Target();
-Target &getTheMips64elTarget();
-
-} // end namespace llvm
-
 static MCDisassembler *createMipsDisassembler(
                        const Target &T,
                        const MCSubtargetInfo &STI,
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
deleted file mode 100644
index 73732a40bb8a..000000000000
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ /dev/null
@@ -1,288 +0,0 @@
-//===-- MipsInstPrinter.cpp - Convert Mips MCInst to assembly syntax ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an Mips MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MipsInstPrinter.h"
-#include "MCTargetDesc/MipsMCExpr.h"
-#include "MipsInstrInfo.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-#define PRINT_ALIAS_INSTR
-#include "MipsGenAsmWriter.inc"
-
-template<unsigned R>
-static bool isReg(const MCInst &MI, unsigned OpNo) {
-  assert(MI.getOperand(OpNo).isReg() && "Register operand expected.");
-  return MI.getOperand(OpNo).getReg() == R;
-}
-
-const char* Mips::MipsFCCToString(Mips::CondCode CC) {
-  switch (CC) {
-  case FCOND_F:
-  case FCOND_T:   return "f";
-  case FCOND_UN:
-  case FCOND_OR:  return "un";
-  case FCOND_OEQ:
-  case FCOND_UNE: return "eq";
-  case FCOND_UEQ:
-  case FCOND_ONE: return "ueq";
-  case FCOND_OLT:
-  case FCOND_UGE: return "olt";
-  case FCOND_ULT:
-  case FCOND_OGE: return "ult";
-  case FCOND_OLE:
-  case FCOND_UGT: return "ole";
-  case FCOND_ULE:
-  case FCOND_OGT: return "ule";
-  case FCOND_SF:
-  case FCOND_ST:  return "sf";
-  case FCOND_NGLE:
-  case FCOND_GLE: return "ngle";
-  case FCOND_SEQ:
-  case FCOND_SNE: return "seq";
-  case FCOND_NGL:
-  case FCOND_GL:  return "ngl";
-  case FCOND_LT:
-  case FCOND_NLT: return "lt";
-  case FCOND_NGE:
-  case FCOND_GE:  return "nge";
-  case FCOND_LE:
-  case FCOND_NLE: return "le";
-  case FCOND_NGT:
-  case FCOND_GT:  return "ngt";
-  }
-  llvm_unreachable("Impossible condition code!");
-}
-
-void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  OS << '$' << StringRef(getRegisterName(RegNo)).lower();
-}
-
-void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                StringRef Annot, const MCSubtargetInfo &STI) {
-  switch (MI->getOpcode()) {
-  default:
-    break;
-  case Mips::RDHWR:
-  case Mips::RDHWR64:
-    O << "\t.set\tpush\n";
-    O << "\t.set\tmips32r2\n";
-    break;
-  case Mips::Save16:
-    O << "\tsave\t";
-    printSaveRestore(MI, O);
-    O << " # 16 bit inst\n";
-    return;
-  case Mips::SaveX16:
-    O << "\tsave\t";
-    printSaveRestore(MI, O);
-    O << "\n";
-    return;
-  case Mips::Restore16:
-    O << "\trestore\t";
-    printSaveRestore(MI, O);
-    O << " # 16 bit inst\n";
-    return;
-  case Mips::RestoreX16:
-    O << "\trestore\t";
-    printSaveRestore(MI, O);
-    O << "\n";
-    return;
-  }
-
-  // Try to print any aliases first.
-  if (!printAliasInstr(MI, O) && !printAlias(*MI, O))
-    printInstruction(MI, O);
-  printAnnotation(O, Annot);
-
-  switch (MI->getOpcode()) {
-  default:
-    break;
-  case Mips::RDHWR:
-  case Mips::RDHWR64:
-    O << "\n\t.set\tpop";
-  }
-}
-
-void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                   raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    printRegName(O, Op.getReg());
-    return;
-  }
-
-  if (Op.isImm()) {
-    O << formatImm(Op.getImm());
-    return;
-  }
-
-  assert(Op.isExpr() && "unknown operand kind in printOperand");
-  Op.getExpr()->print(O, &MAI, true);
-}
-
-template <unsigned Bits, unsigned Offset>
-void MipsInstPrinter::printUImm(const MCInst *MI, int opNum, raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand(opNum);
-  if (MO.isImm()) {
-    uint64_t Imm = MO.getImm();
-    Imm -= Offset;
-    Imm &= (1 << Bits) - 1;
-    Imm += Offset;
-    O << formatImm(Imm);
-    return;
-  }
-
-  printOperand(MI, opNum, O);
-}
-
-void MipsInstPrinter::
-printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
-  // Load/Store memory operands -- imm($reg)
-  // If PIC target the target is loaded as the
-  // pattern lw $25,%call16($28)
-
-  // opNum can be invalid if instruction had reglist as operand.
-  // MemOperand is always last operand of instruction (base + offset).
-  switch (MI->getOpcode()) {
-  default:
-    break;
-  case Mips::SWM32_MM:
-  case Mips::LWM32_MM:
-  case Mips::SWM16_MM:
-  case Mips::SWM16_MMR6:
-  case Mips::LWM16_MM:
-  case Mips::LWM16_MMR6:
-    opNum = MI->getNumOperands() - 2;
-    break;
-  }
-
-  printOperand(MI, opNum+1, O);
-  O << "(";
-  printOperand(MI, opNum, O);
-  O << ")";
-}
-
-void MipsInstPrinter::
-printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O) {
-  // when using stack locations for not load/store instructions
-  // print the same way as all normal 3 operand instructions.
-  printOperand(MI, opNum, O);
-  O << ", ";
-  printOperand(MI, opNum+1, O);
-}
-
-void MipsInstPrinter::
-printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) {
-  const MCOperand& MO = MI->getOperand(opNum);
-  O << MipsFCCToString((Mips::CondCode)MO.getImm());
-}
-
-void MipsInstPrinter::
-printSHFMask(const MCInst *MI, int opNum, raw_ostream &O) {
-  llvm_unreachable("TODO");
-}
-
-bool MipsInstPrinter::printAlias(const char *Str, const MCInst &MI,
-                                 unsigned OpNo, raw_ostream &OS) {
-  OS << "\t" << Str << "\t";
-  printOperand(&MI, OpNo, OS);
-  return true;
-}
-
-bool MipsInstPrinter::printAlias(const char *Str, const MCInst &MI,
-                                 unsigned OpNo0, unsigned OpNo1,
-                                 raw_ostream &OS) {
-  printAlias(Str, MI, OpNo0, OS);
-  OS << ", ";
-  printOperand(&MI, OpNo1, OS);
-  return true;
-}
-
-bool MipsInstPrinter::printAlias(const MCInst &MI, raw_ostream &OS) {
-  switch (MI.getOpcode()) {
-  case Mips::BEQ:
-  case Mips::BEQ_MM:
-    // beq $zero, $zero, $L2 => b $L2
-    // beq $r0, $zero, $L2 => beqz $r0, $L2
-    return (isReg<Mips::ZERO>(MI, 0) && isReg<Mips::ZERO>(MI, 1) &&
-            printAlias("b", MI, 2, OS)) ||
-           (isReg<Mips::ZERO>(MI, 1) && printAlias("beqz", MI, 0, 2, OS));
-  case Mips::BEQ64:
-    // beq $r0, $zero, $L2 => beqz $r0, $L2
-    return isReg<Mips::ZERO_64>(MI, 1) && printAlias("beqz", MI, 0, 2, OS);
-  case Mips::BNE:
-  case Mips::BNE_MM:
-    // bne $r0, $zero, $L2 => bnez $r0, $L2
-    return isReg<Mips::ZERO>(MI, 1) && printAlias("bnez", MI, 0, 2, OS);
-  case Mips::BNE64:
-    // bne $r0, $zero, $L2 => bnez $r0, $L2
-    return isReg<Mips::ZERO_64>(MI, 1) && printAlias("bnez", MI, 0, 2, OS);
-  case Mips::BGEZAL:
-    // bgezal $zero, $L1 => bal $L1
-    return isReg<Mips::ZERO>(MI, 0) && printAlias("bal", MI, 1, OS);
-  case Mips::BC1T:
-    // bc1t $fcc0, $L1 => bc1t $L1
-    return isReg<Mips::FCC0>(MI, 0) && printAlias("bc1t", MI, 1, OS);
-  case Mips::BC1F:
-    // bc1f $fcc0, $L1 => bc1f $L1
-    return isReg<Mips::FCC0>(MI, 0) && printAlias("bc1f", MI, 1, OS);
-  case Mips::JALR:
-    // jalr $ra, $r1 => jalr $r1
-    return isReg<Mips::RA>(MI, 0) && printAlias("jalr", MI, 1, OS);
-  case Mips::JALR64:
-    // jalr $ra, $r1 => jalr $r1
-    return isReg<Mips::RA_64>(MI, 0) && printAlias("jalr", MI, 1, OS);
-  case Mips::NOR:
-  case Mips::NOR_MM:
-  case Mips::NOR_MMR6:
-    // nor $r0, $r1, $zero => not $r0, $r1
-    return isReg<Mips::ZERO>(MI, 2) && printAlias("not", MI, 0, 1, OS);
-  case Mips::NOR64:
-    // nor $r0, $r1, $zero => not $r0, $r1
-    return isReg<Mips::ZERO_64>(MI, 2) && printAlias("not", MI, 0, 1, OS);
-  case Mips::OR:
-    // or $r0, $r1, $zero => move $r0, $r1
-    return isReg<Mips::ZERO>(MI, 2) && printAlias("move", MI, 0, 1, OS);
-  default: return false;
-  }
-}
-
-void MipsInstPrinter::printSaveRestore(const MCInst *MI, raw_ostream &O) {
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    if (i != 0) O << ", ";
-    if (MI->getOperand(i).isReg())
-      printRegName(O, MI->getOperand(i).getReg());
-    else
-      printUImm<16>(MI, i, O);
-  }
-}
-
-void MipsInstPrinter::
-printRegisterList(const MCInst *MI, int opNum, raw_ostream &O) {
-  // - 2 because register List is always first operand of instruction and it is
-  // always followed by memory operand (base + offset).
-  for (int i = opNum, e = MI->getNumOperands() - 2; i != e; ++i) {
-    if (i != opNum)
-      O << ", ";
-    printRegName(O, MI->getOperand(i).getReg());
-  }
-}
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
deleted file mode 100644
index f02443ee21d3..000000000000
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ /dev/null
@@ -1,113 +0,0 @@
-//=== MipsInstPrinter.h - Convert Mips MCInst to assembly syntax -*- C++ -*-==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints a Mips MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_MIPS_INSTPRINTER_MIPSINSTPRINTER_H
-#define LLVM_LIB_TARGET_MIPS_INSTPRINTER_MIPSINSTPRINTER_H
-#include "llvm/MC/MCInstPrinter.h"
-
-namespace llvm {
-// These enumeration declarations were originally in MipsInstrInfo.h but
-// had to be moved here to avoid circular dependencies between
-// LLVMMipsCodeGen and LLVMMipsAsmPrinter.
-namespace Mips {
-// Mips Branch Codes
-enum FPBranchCode {
-  BRANCH_F,
-  BRANCH_T,
-  BRANCH_FL,
-  BRANCH_TL,
-  BRANCH_INVALID
-};
-
-// Mips Condition Codes
-enum CondCode {
-  // To be used with float branch True
-  FCOND_F,
-  FCOND_UN,
-  FCOND_OEQ,
-  FCOND_UEQ,
-  FCOND_OLT,
-  FCOND_ULT,
-  FCOND_OLE,
-  FCOND_ULE,
-  FCOND_SF,
-  FCOND_NGLE,
-  FCOND_SEQ,
-  FCOND_NGL,
-  FCOND_LT,
-  FCOND_NGE,
-  FCOND_LE,
-  FCOND_NGT,
-
-  // To be used with float branch False
-  // This conditions have the same mnemonic as the
-  // above ones, but are used with a branch False;
-  FCOND_T,
-  FCOND_OR,
-  FCOND_UNE,
-  FCOND_ONE,
-  FCOND_UGE,
-  FCOND_OGE,
-  FCOND_UGT,
-  FCOND_OGT,
-  FCOND_ST,
-  FCOND_GLE,
-  FCOND_SNE,
-  FCOND_GL,
-  FCOND_NLT,
-  FCOND_GE,
-  FCOND_NLE,
-  FCOND_GT
-};
-
-const char *MipsFCCToString(Mips::CondCode CC);
-} // end namespace Mips
-
-class MipsInstPrinter : public MCInstPrinter {
-public:
-  MipsInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                  const MCRegisterInfo &MRI)
-    : MCInstPrinter(MAI, MII, MRI) {}
-
-  // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
-  static const char *getRegisterName(unsigned RegNo);
-
-  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
-
-  bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
-  void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
-                               unsigned PrintMethodIdx, raw_ostream &O);
-
-private:
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  template <unsigned Bits, unsigned Offset = 0>
-  void printUImm(const MCInst *MI, int opNum, raw_ostream &O);
-  void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
-  void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O);
-  void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O);
-  void printSHFMask(const MCInst *MI, int opNum, raw_ostream &O);
-
-  bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo,
-                  raw_ostream &OS);
-  bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo0,
-                  unsigned OpNo1, raw_ostream &OS);
-  bool printAlias(const MCInst &MI, raw_ostream &OS);
-  void printSaveRestore(const MCInst *MI, raw_ostream &O);
-  void printRegisterList(const MCInst *MI, int opNum, raw_ostream &O);
-};
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
index 4a2b75b9ae46..fca1149453c9 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
@@ -1,9 +1,8 @@
 //===- MipsABIFlagsSection.cpp - Mips ELF ABI Flags Section ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
index 68bf3829aab5..239e55495e9d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
@@ -1,9 +1,8 @@
 //===- MipsABIFlagsSection.h - Mips ELF ABI Flags Section -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
index 18d7dd99be34..bdd190fc17c9 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
@@ -1,9 +1,8 @@
 //===---- MipsABIInfo.cpp - Information about MIPS ABI's ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,6 +14,13 @@
 
 using namespace llvm;
 
+// Note: this option is defined here to be visible from libLLVMMipsAsmParser
+//       and libLLVMMipsCodeGen
+cl::opt<bool>
+EmitJalrReloc("mips-jalr-reloc", cl::Hidden,
+              cl::desc("MIPS: Emit R_{MICRO}MIPS_JALR relocation with jalr"),
+              cl::init(true));
+
 namespace {
 static const MCPhysReg O32IntRegs[4] = {Mips::A0, Mips::A1, Mips::A2, Mips::A3};
 
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
index 9372a3c2bb1f..534e6573b63c 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
@@ -1,9 +1,8 @@
 //===---- MipsABIInfo.h - Information about MIPS ABI's --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 265d1141cb0b..859f9cbbca07 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -1,9 +1,8 @@
 //===-- MipsAsmBackend.cpp - Mips Asm Backend  ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -303,7 +302,7 @@ void MipsAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
 
 Optional<MCFixupKind> MipsAsmBackend::getFixupKind(StringRef Name) const {
   return StringSwitch<Optional<MCFixupKind>>(Name)
-      .Case("R_MIPS_NONE", (MCFixupKind)Mips::fixup_Mips_NONE)
+      .Case("R_MIPS_NONE", FK_NONE)
       .Case("R_MIPS_32", FK_Data_4)
       .Case("R_MIPS_GOT_PAGE", (MCFixupKind)Mips::fixup_Mips_GOT_PAGE)
       .Case("R_MIPS_CALL_HI16", (MCFixupKind)Mips::fixup_Mips_CALL_HI16)
@@ -351,7 +350,6 @@ getFixupKindInfo(MCFixupKind Kind) const {
     // MipsFixupKinds.h.
     //
     // name                    offset  bits  flags
-    { "fixup_Mips_NONE",         0,      0,   0 },
     { "fixup_Mips_16",           0,     16,   0 },
     { "fixup_Mips_32",           0,     32,   0 },
     { "fixup_Mips_REL32",        0,     32,   0 },
@@ -431,7 +429,6 @@ getFixupKindInfo(MCFixupKind Kind) const {
     // MipsFixupKinds.h.
     //
     // name                    offset  bits  flags
-    { "fixup_Mips_NONE",         0,      0,   0 },
     { "fixup_Mips_16",          16,     16,   0 },
     { "fixup_Mips_32",           0,     32,   0 },
     { "fixup_Mips_REL32",        0,     32,   0 },
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index 30359132e92b..4d7e36995ae4 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -1,9 +1,8 @@
 //===-- MipsAsmBackend.h - Mips Asm Backend  ------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index a90db2384c46..6d8cb264158f 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -1,9 +1,8 @@
 //===-- MipsBaseInfo.h - Top level definitions for MIPS MC ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -89,7 +88,10 @@ namespace MipsII {
     MO_GOT_HI16,
     MO_GOT_LO16,
     MO_CALL_HI16,
-    MO_CALL_LO16
+    MO_CALL_LO16,
+
+    /// Helper operand used to generate R_MIPS_JALR
+    MO_JALR
   };
 
   enum {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 8ace2895d681..cf7bae98a27f 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===-- MipsELFObjectWriter.cpp - Mips ELF Writer -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -223,7 +222,7 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
   unsigned Kind = (unsigned)Fixup.getKind();
 
   switch (Kind) {
-  case Mips::fixup_Mips_NONE:
+  case FK_NONE:
     return ELF::R_MIPS_NONE;
   case FK_Data_1:
     Ctx.reportError(Fixup.getLoc(),
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index 21b01e850967..1b83e9445fb5 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -1,9 +1,8 @@
 //===-------- MipsELFStreamer.cpp - ELF Object Output ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -35,7 +34,7 @@ MipsELFStreamer::MipsELFStreamer(MCContext &Context,
 }
 
 void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
-                                      const MCSubtargetInfo &STI, bool) {
+                                      const MCSubtargetInfo &STI) {
   MCELFStreamer::EmitInstruction(Inst, STI);
 
   MCContext &Context = getContext();
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
index 56a0ff96c7bd..2febfbc69b6f 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -1,9 +1,8 @@
 //===- MipsELFStreamer.h - ELF Object Output --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -42,8 +41,7 @@ public:
   /// \p Inst is actually emitted. For example, we can inspect the operands and
   /// gather sufficient information that allows us to reason about the register
   /// usage for the translation unit.
-  void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
-                       bool = false) override;
+  void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
 
   /// Overriding this function allows us to record all labels that should be
   /// marked as microMIPS. Based on this data marking is done in
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index eedad16dddc3..b83d822bd8d0 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -1,9 +1,8 @@
 //===-- MipsFixupKinds.h - Mips Specific Fixup Entries ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -23,11 +22,8 @@ namespace Mips {
   // in MipsAsmBackend.cpp.
   //
   enum Fixups {
-    // Branch fixups resulting in R_MIPS_NONE.
-    fixup_Mips_NONE = FirstTargetFixupKind,
-
     // Branch fixups resulting in R_MIPS_16.
-    fixup_Mips_16,
+    fixup_Mips_16 = FirstTargetFixupKind,
 
     // Pure 32 bit data fixup resulting in - R_MIPS_32.
     fixup_Mips_32,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp b/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
new file mode 100644
index 000000000000..fb290a8e3f26
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
@@ -0,0 +1,287 @@
+//===-- MipsInstPrinter.cpp - Convert Mips MCInst to assembly syntax ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an Mips MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsInstPrinter.h"
+#include "MipsInstrInfo.h"
+#include "MipsMCExpr.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+#define PRINT_ALIAS_INSTR
+#include "MipsGenAsmWriter.inc"
+
+template<unsigned R>
+static bool isReg(const MCInst &MI, unsigned OpNo) {
+  assert(MI.getOperand(OpNo).isReg() && "Register operand expected.");
+  return MI.getOperand(OpNo).getReg() == R;
+}
+
+const char* Mips::MipsFCCToString(Mips::CondCode CC) {
+  switch (CC) {
+  case FCOND_F:
+  case FCOND_T:   return "f";
+  case FCOND_UN:
+  case FCOND_OR:  return "un";
+  case FCOND_OEQ:
+  case FCOND_UNE: return "eq";
+  case FCOND_UEQ:
+  case FCOND_ONE: return "ueq";
+  case FCOND_OLT:
+  case FCOND_UGE: return "olt";
+  case FCOND_ULT:
+  case FCOND_OGE: return "ult";
+  case FCOND_OLE:
+  case FCOND_UGT: return "ole";
+  case FCOND_ULE:
+  case FCOND_OGT: return "ule";
+  case FCOND_SF:
+  case FCOND_ST:  return "sf";
+  case FCOND_NGLE:
+  case FCOND_GLE: return "ngle";
+  case FCOND_SEQ:
+  case FCOND_SNE: return "seq";
+  case FCOND_NGL:
+  case FCOND_GL:  return "ngl";
+  case FCOND_LT:
+  case FCOND_NLT: return "lt";
+  case FCOND_NGE:
+  case FCOND_GE:  return "nge";
+  case FCOND_LE:
+  case FCOND_NLE: return "le";
+  case FCOND_NGT:
+  case FCOND_GT:  return "ngt";
+  }
+  llvm_unreachable("Impossible condition code!");
+}
+
+void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << '$' << StringRef(getRegisterName(RegNo)).lower();
+}
+
+void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                                StringRef Annot, const MCSubtargetInfo &STI) {
+  switch (MI->getOpcode()) {
+  default:
+    break;
+  case Mips::RDHWR:
+  case Mips::RDHWR64:
+    O << "\t.set\tpush\n";
+    O << "\t.set\tmips32r2\n";
+    break;
+  case Mips::Save16:
+    O << "\tsave\t";
+    printSaveRestore(MI, O);
+    O << " # 16 bit inst\n";
+    return;
+  case Mips::SaveX16:
+    O << "\tsave\t";
+    printSaveRestore(MI, O);
+    O << "\n";
+    return;
+  case Mips::Restore16:
+    O << "\trestore\t";
+    printSaveRestore(MI, O);
+    O << " # 16 bit inst\n";
+    return;
+  case Mips::RestoreX16:
+    O << "\trestore\t";
+    printSaveRestore(MI, O);
+    O << "\n";
+    return;
+  }
+
+  // Try to print any aliases first.
+  if (!printAliasInstr(MI, O) && !printAlias(*MI, O))
+    printInstruction(MI, O);
+  printAnnotation(O, Annot);
+
+  switch (MI->getOpcode()) {
+  default:
+    break;
+  case Mips::RDHWR:
+  case Mips::RDHWR64:
+    O << "\n\t.set\tpop";
+  }
+}
+
+void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    printRegName(O, Op.getReg());
+    return;
+  }
+
+  if (Op.isImm()) {
+    O << formatImm(Op.getImm());
+    return;
+  }
+
+  assert(Op.isExpr() && "unknown operand kind in printOperand");
+  Op.getExpr()->print(O, &MAI, true);
+}
+
+template <unsigned Bits, unsigned Offset>
+void MipsInstPrinter::printUImm(const MCInst *MI, int opNum, raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(opNum);
+  if (MO.isImm()) {
+    uint64_t Imm = MO.getImm();
+    Imm -= Offset;
+    Imm &= (1 << Bits) - 1;
+    Imm += Offset;
+    O << formatImm(Imm);
+    return;
+  }
+
+  printOperand(MI, opNum, O);
+}
+
+void MipsInstPrinter::
+printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
+  // Load/Store memory operands -- imm($reg)
+  // If PIC target the target is loaded as the
+  // pattern lw $25,%call16($28)
+
+  // opNum can be invalid if instruction had reglist as operand.
+  // MemOperand is always last operand of instruction (base + offset).
+  switch (MI->getOpcode()) {
+  default:
+    break;
+  case Mips::SWM32_MM:
+  case Mips::LWM32_MM:
+  case Mips::SWM16_MM:
+  case Mips::SWM16_MMR6:
+  case Mips::LWM16_MM:
+  case Mips::LWM16_MMR6:
+    opNum = MI->getNumOperands() - 2;
+    break;
+  }
+
+  printOperand(MI, opNum+1, O);
+  O << "(";
+  printOperand(MI, opNum, O);
+  O << ")";
+}
+
+void MipsInstPrinter::
+printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O) {
+  // when using stack locations for not load/store instructions
+  // print the same way as all normal 3 operand instructions.
+  printOperand(MI, opNum, O);
+  O << ", ";
+  printOperand(MI, opNum+1, O);
+}
+
+void MipsInstPrinter::
+printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) {
+  const MCOperand& MO = MI->getOperand(opNum);
+  O << MipsFCCToString((Mips::CondCode)MO.getImm());
+}
+
+void MipsInstPrinter::
+printSHFMask(const MCInst *MI, int opNum, raw_ostream &O) {
+  llvm_unreachable("TODO");
+}
+
+bool MipsInstPrinter::printAlias(const char *Str, const MCInst &MI,
+                                 unsigned OpNo, raw_ostream &OS) {
+  OS << "\t" << Str << "\t";
+  printOperand(&MI, OpNo, OS);
+  return true;
+}
+
+bool MipsInstPrinter::printAlias(const char *Str, const MCInst &MI,
+                                 unsigned OpNo0, unsigned OpNo1,
+                                 raw_ostream &OS) {
+  printAlias(Str, MI, OpNo0, OS);
+  OS << ", ";
+  printOperand(&MI, OpNo1, OS);
+  return true;
+}
+
+bool MipsInstPrinter::printAlias(const MCInst &MI, raw_ostream &OS) {
+  switch (MI.getOpcode()) {
+  case Mips::BEQ:
+  case Mips::BEQ_MM:
+    // beq $zero, $zero, $L2 => b $L2
+    // beq $r0, $zero, $L2 => beqz $r0, $L2
+    return (isReg<Mips::ZERO>(MI, 0) && isReg<Mips::ZERO>(MI, 1) &&
+            printAlias("b", MI, 2, OS)) ||
+           (isReg<Mips::ZERO>(MI, 1) && printAlias("beqz", MI, 0, 2, OS));
+  case Mips::BEQ64:
+    // beq $r0, $zero, $L2 => beqz $r0, $L2
+    return isReg<Mips::ZERO_64>(MI, 1) && printAlias("beqz", MI, 0, 2, OS);
+  case Mips::BNE:
+  case Mips::BNE_MM:
+    // bne $r0, $zero, $L2 => bnez $r0, $L2
+    return isReg<Mips::ZERO>(MI, 1) && printAlias("bnez", MI, 0, 2, OS);
+  case Mips::BNE64:
+    // bne $r0, $zero, $L2 => bnez $r0, $L2
+    return isReg<Mips::ZERO_64>(MI, 1) && printAlias("bnez", MI, 0, 2, OS);
+  case Mips::BGEZAL:
+    // bgezal $zero, $L1 => bal $L1
+    return isReg<Mips::ZERO>(MI, 0) && printAlias("bal", MI, 1, OS);
+  case Mips::BC1T:
+    // bc1t $fcc0, $L1 => bc1t $L1
+    return isReg<Mips::FCC0>(MI, 0) && printAlias("bc1t", MI, 1, OS);
+  case Mips::BC1F:
+    // bc1f $fcc0, $L1 => bc1f $L1
+    return isReg<Mips::FCC0>(MI, 0) && printAlias("bc1f", MI, 1, OS);
+  case Mips::JALR:
+    // jalr $ra, $r1 => jalr $r1
+    return isReg<Mips::RA>(MI, 0) && printAlias("jalr", MI, 1, OS);
+  case Mips::JALR64:
+    // jalr $ra, $r1 => jalr $r1
+    return isReg<Mips::RA_64>(MI, 0) && printAlias("jalr", MI, 1, OS);
+  case Mips::NOR:
+  case Mips::NOR_MM:
+  case Mips::NOR_MMR6:
+    // nor $r0, $r1, $zero => not $r0, $r1
+    return isReg<Mips::ZERO>(MI, 2) && printAlias("not", MI, 0, 1, OS);
+  case Mips::NOR64:
+    // nor $r0, $r1, $zero => not $r0, $r1
+    return isReg<Mips::ZERO_64>(MI, 2) && printAlias("not", MI, 0, 1, OS);
+  case Mips::OR:
+    // or $r0, $r1, $zero => move $r0, $r1
+    return isReg<Mips::ZERO>(MI, 2) && printAlias("move", MI, 0, 1, OS);
+  default: return false;
+  }
+}
+
+void MipsInstPrinter::printSaveRestore(const MCInst *MI, raw_ostream &O) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    if (i != 0) O << ", ";
+    if (MI->getOperand(i).isReg())
+      printRegName(O, MI->getOperand(i).getReg());
+    else
+      printUImm<16>(MI, i, O);
+  }
+}
+
+void MipsInstPrinter::
+printRegisterList(const MCInst *MI, int opNum, raw_ostream &O) {
+  // - 2 because register List is always first operand of instruction and it is
+  // always followed by memory operand (base + offset).
+  for (int i = opNum, e = MI->getNumOperands() - 2; i != e; ++i) {
+    if (i != opNum)
+      O << ", ";
+    printRegName(O, MI->getOperand(i).getReg());
+  }
+}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h b/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h
new file mode 100644
index 000000000000..a34a5c1d6418
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h
@@ -0,0 +1,112 @@
+//=== MipsInstPrinter.h - Convert Mips MCInst to assembly syntax -*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a Mips MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSINSTPRINTER_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSINSTPRINTER_H
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+// These enumeration declarations were originally in MipsInstrInfo.h but
+// had to be moved here to avoid circular dependencies between
+// LLVMMipsCodeGen and LLVMMipsAsmPrinter.
+namespace Mips {
+// Mips Branch Codes
+enum FPBranchCode {
+  BRANCH_F,
+  BRANCH_T,
+  BRANCH_FL,
+  BRANCH_TL,
+  BRANCH_INVALID
+};
+
+// Mips Condition Codes
+enum CondCode {
+  // To be used with float branch True
+  FCOND_F,
+  FCOND_UN,
+  FCOND_OEQ,
+  FCOND_UEQ,
+  FCOND_OLT,
+  FCOND_ULT,
+  FCOND_OLE,
+  FCOND_ULE,
+  FCOND_SF,
+  FCOND_NGLE,
+  FCOND_SEQ,
+  FCOND_NGL,
+  FCOND_LT,
+  FCOND_NGE,
+  FCOND_LE,
+  FCOND_NGT,
+
+  // To be used with float branch False
+  // This conditions have the same mnemonic as the
+  // above ones, but are used with a branch False;
+  FCOND_T,
+  FCOND_OR,
+  FCOND_UNE,
+  FCOND_ONE,
+  FCOND_UGE,
+  FCOND_OGE,
+  FCOND_UGT,
+  FCOND_OGT,
+  FCOND_ST,
+  FCOND_GLE,
+  FCOND_SNE,
+  FCOND_GL,
+  FCOND_NLT,
+  FCOND_GE,
+  FCOND_NLE,
+  FCOND_GT
+};
+
+const char *MipsFCCToString(Mips::CondCode CC);
+} // end namespace Mips
+
+class MipsInstPrinter : public MCInstPrinter {
+public:
+  MipsInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                  const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+
+  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+
+  bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
+  void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+                               unsigned PrintMethodIdx, raw_ostream &O);
+
+private:
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  template <unsigned Bits, unsigned Offset = 0>
+  void printUImm(const MCInst *MI, int opNum, raw_ostream &O);
+  void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
+  void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O);
+  void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O);
+  void printSHFMask(const MCInst *MI, int opNum, raw_ostream &O);
+
+  bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo,
+                  raw_ostream &OS);
+  bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo0,
+                  unsigned OpNo1, raw_ostream &OS);
+  bool printAlias(const MCInst &MI, raw_ostream &OS);
+  void printSaveRestore(const MCInst *MI, raw_ostream &O);
+  void printRegisterList(const MCInst *MI, int opNum, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index 1506b4a83649..ec78158d387d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -1,9 +1,8 @@
 //===-- MipsMCAsmInfo.cpp - Mips Asm Properties ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
index d4ccf0349c16..867f4d223de4 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
@@ -1,9 +1,8 @@
 //===-- MipsMCAsmInfo.h - Mips Asm Info ------------------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index f43a4d980f92..759a7fdb32b8 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
 //===-- MipsMCCodeEmitter.cpp - Convert Mips Code to Machine Code ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -186,7 +185,7 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
   // Check for unimplemented opcodes.
   // Unfortunately in MIPS both NOP and SLL will come in with Binary == 0
   // so we have to special check for them.
-  unsigned Opcode = TmpInst.getOpcode();
+  const unsigned Opcode = TmpInst.getOpcode();
   if ((Opcode != Mips::NOP) && (Opcode != Mips::SLL) &&
       (Opcode != Mips::SLL_MM) && (Opcode != Mips::SLL_MMR6) && !Binary)
     llvm_unreachable("unimplemented opcode in encodeInstruction()");
@@ -209,7 +208,6 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
       if (Fixups.size() > N)
         Fixups.pop_back();
 
-      Opcode = NewOpcode;
       TmpInst.setOpcode (NewOpcode);
       Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
     }
@@ -614,8 +612,9 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
       llvm_unreachable("Unhandled fixup kind!");
       break;
     case MipsMCExpr::MEK_DTPREL:
-      llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
-      break;
+      // MEK_DTPREL is used for marking TLS DIEExpr only
+      // and contains a regular sub-expression.
+      return getExprOpValue(MipsExpr->getSubExpr(), Fixups, STI);
     case MipsMCExpr::MEK_CALL_HI16:
       FixupKind = Mips::fixup_Mips_CALL_HI16;
       break;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
index 09d50d4776ba..ff6e1d62b05f 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
@@ -1,9 +1,8 @@
 //===- MipsMCCodeEmitter.h - Convert Mips Code to Machine Code --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
index 99857e083c6c..680806c4deb2 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@@ -1,9 +1,8 @@
 //===-- MipsMCExpr.cpp - Mips specific MC expression classes --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -44,8 +43,10 @@ void MipsMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
     llvm_unreachable("MEK_None and MEK_Special are invalid");
     break;
   case MEK_DTPREL:
-    llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
-    break;
+    // MEK_DTPREL is used for marking TLS DIEExpr only
+    // and contains a regular sub-expression.
+    getSubExpr()->print(OS, MAI, true);
+    return;
   case MEK_CALL_HI16:
     OS << "%call_hi";
     break;
@@ -161,7 +162,9 @@ MipsMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
     case MEK_Special:
       llvm_unreachable("MEK_None and MEK_Special are invalid");
     case MEK_DTPREL:
-      llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
+      // MEK_DTPREL is used for marking TLS DIEExpr only
+      // and contains a regular sub-expression.
+      return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup);
     case MEK_DTPREL_HI:
     case MEK_DTPREL_LO:
     case MEK_GOT:
@@ -249,9 +252,6 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
   case MEK_Special:
     llvm_unreachable("MEK_None and MEK_Special are invalid");
     break;
-  case MEK_DTPREL:
-    llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
-    break;
   case MEK_CALL_HI16:
   case MEK_CALL_LO16:
   case MEK_GOT:
@@ -274,6 +274,7 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
     if (const MipsMCExpr *E = dyn_cast<const MipsMCExpr>(getSubExpr()))
       E->fixELFSymbolsInTLSFixups(Asm);
     break;
+  case MEK_DTPREL:
   case MEK_DTPREL_HI:
   case MEK_DTPREL_LO:
   case MEK_TLSLDM:
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
index bf3274ab5d17..edc12e87e9b6 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
@@ -1,9 +1,8 @@
 //===- MipsMCExpr.h - Mips specific MC expression classes -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
index 988629ed1bca..ad5aff6552f6 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
@@ -1,9 +1,8 @@
 //===-- MipsMCNaCl.h - NaCl-related declarations --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index a8cd7b0d9b03..ddeec03ba784 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -1,9 +1,8 @@
 //===-- MipsMCTargetDesc.cpp - Mips Target Descriptions -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,12 +11,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "MipsMCTargetDesc.h"
-#include "InstPrinter/MipsInstPrinter.h"
 #include "MipsAsmBackend.h"
 #include "MipsELFStreamer.h"
+#include "MipsInstPrinter.h"
 #include "MipsMCAsmInfo.h"
 #include "MipsMCNaCl.h"
 #include "MipsTargetStreamer.h"
+#include "TargetInfo/MipsTargetInfo.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCELFStreamer.h"
@@ -85,7 +85,7 @@ static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI,
   MCAsmInfo *MAI = new MipsMCAsmInfo(TT);
 
   unsigned SP = MRI.getDwarfRegNum(Mips::SP, true);
-  MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, SP, 0);
+  MCCFIInstruction Inst = MCCFIInstruction::createDefCfaRegister(nullptr, SP);
   MAI->addInitialFrameState(Inst);
 
   return MAI;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 4fc174ab5871..809be99ff3f4 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -1,9 +1,8 @@
 //===-- MipsMCTargetDesc.h - Mips Target Descriptions -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -33,11 +32,6 @@ class Triple;
 class raw_ostream;
 class raw_pwrite_stream;
 
-Target &getTheMipsTarget();
-Target &getTheMipselTarget();
-Target &getTheMips64Target();
-Target &getTheMips64elTarget();
-
 MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
                                          const MCRegisterInfo &MRI,
                                          MCContext &Ctx);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index 6bf62ea618b4..c050db8a17fd 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -1,9 +1,8 @@
 //===-- MipsNaClELFStreamer.cpp - ELF Object Output for Mips NaCl ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -144,8 +143,8 @@ private:
 public:
   /// This function is the one used to emit instruction data into the ELF
   /// streamer.  We override it to mask dangerous instructions.
-  void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
-                       bool) override {
+  void EmitInstruction(const MCInst &Inst,
+                       const MCSubtargetInfo &STI) override {
     // Sandbox indirect jumps.
     if (isIndirectJump(Inst)) {
       if (PendingCall)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
index 2d84528e7469..b4ebb9d18b72 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
@@ -1,9 +1,8 @@
 //===- MipsOptionRecord.cpp - Abstraction for storing information ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 58f9717e1cc6..e3bdb3b140a8 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -1,9 +1,8 @@
 //===-- MipsTargetStreamer.cpp - Mips Target Streamer Methods -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,7 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MipsTargetStreamer.h"
-#include "InstPrinter/MipsInstPrinter.h"
+#include "MipsInstPrinter.h"
 #include "MCTargetDesc/MipsABIInfo.h"
 #include "MipsELFStreamer.h"
 #include "MipsMCExpr.h"
@@ -36,7 +35,7 @@ static cl::opt<bool> RoundSectionSizes(
 } // end anonymous namespace
 
 MipsTargetStreamer::MipsTargetStreamer(MCStreamer &S)
-    : MCTargetStreamer(S), ModuleDirectiveAllowed(true) {
+    : MCTargetStreamer(S), GPReg(Mips::GP), ModuleDirectiveAllowed(true) {
   GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
 }
 void MipsTargetStreamer::emitDirectiveSetMicroMips() {}
@@ -107,6 +106,23 @@ void MipsTargetStreamer::emitDirectiveSetDsp() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveSetDspr2() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {}
+void MipsTargetStreamer::emitDirectiveCpLocal(unsigned RegNo) {
+  // .cplocal $reg
+  // This directive forces to use the alternate register for context pointer.
+  // For example
+  //   .cplocal $4
+  //   jal foo
+  // expands to
+  //   ld    $25, %call16(foo)($4)
+  //   jalr  $25
+
+  if (!getABI().IsN32() && !getABI().IsN64())
+    return;
+
+  GPReg = RegNo;
+
+  forbidModuleDirective();
+}
 bool MipsTargetStreamer::emitDirectiveCpRestore(
     int Offset, function_ref<unsigned()> GetATReg, SMLoc IDLoc,
     const MCSubtargetInfo *STI) {
@@ -258,8 +274,7 @@ void MipsTargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) {
 /// Emit the $gp restore operation for .cprestore.
 void MipsTargetStreamer::emitGPRestore(int Offset, SMLoc IDLoc,
                                        const MCSubtargetInfo *STI) {
-  emitLoadWithImmOffset(Mips::LW, Mips::GP, Mips::SP, Offset, Mips::GP, IDLoc,
-                        STI);
+  emitLoadWithImmOffset(Mips::LW, GPReg, Mips::SP, Offset, GPReg, IDLoc, STI);
 }
 
 /// Emit a store instruction with an immediate offset.
@@ -666,6 +681,12 @@ void MipsTargetAsmStreamer::emitDirectiveCpLoad(unsigned RegNo) {
   forbidModuleDirective();
 }
 
+void MipsTargetAsmStreamer::emitDirectiveCpLocal(unsigned RegNo) {
+  OS << "\t.cplocal\t$"
+     << StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << "\n";
+  MipsTargetStreamer::emitDirectiveCpLocal(RegNo);
+}
+
 bool MipsTargetAsmStreamer::emitDirectiveCpRestore(
     int Offset, function_ref<unsigned()> GetATReg, SMLoc IDLoc,
     const MCSubtargetInfo *STI) {
@@ -700,8 +721,11 @@ void MipsTargetAsmStreamer::emitDirectiveCpreturn(unsigned SaveLocation,
 }
 
 void MipsTargetAsmStreamer::emitDirectiveModuleFP() {
-  OS << "\t.module\tfp=";
-  OS << ABIFlagsSection.getFpABIString(ABIFlagsSection.getFpABI()) << "\n";
+  MipsABIFlagsSection::FpABIKind FpABI = ABIFlagsSection.getFpABI();
+  if (FpABI == MipsABIFlagsSection::FpABIKind::SOFT)
+    OS << "\t.module\tsoftfloat\n";
+  else
+    OS << "\t.module\tfp=" << ABIFlagsSection.getFpABIString(FpABI) << "\n";
 }
 
 void MipsTargetAsmStreamer::emitDirectiveSetFp(
@@ -1133,7 +1157,7 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
 
   MCInst TmpInst;
   TmpInst.setOpcode(Mips::LUi);
-  TmpInst.addOperand(MCOperand::createReg(Mips::GP));
+  TmpInst.addOperand(MCOperand::createReg(GPReg));
   const MCExpr *HiSym = MipsMCExpr::create(
       MipsMCExpr::MEK_HI,
       MCSymbolRefExpr::create("_gp_disp", MCSymbolRefExpr::VK_None,
@@ -1145,8 +1169,8 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
   TmpInst.clear();
 
   TmpInst.setOpcode(Mips::ADDiu);
-  TmpInst.addOperand(MCOperand::createReg(Mips::GP));
-  TmpInst.addOperand(MCOperand::createReg(Mips::GP));
+  TmpInst.addOperand(MCOperand::createReg(GPReg));
+  TmpInst.addOperand(MCOperand::createReg(GPReg));
   const MCExpr *LoSym = MipsMCExpr::create(
       MipsMCExpr::MEK_LO,
       MCSymbolRefExpr::create("_gp_disp", MCSymbolRefExpr::VK_None,
@@ -1158,14 +1182,19 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
   TmpInst.clear();
 
   TmpInst.setOpcode(Mips::ADDu);
-  TmpInst.addOperand(MCOperand::createReg(Mips::GP));
-  TmpInst.addOperand(MCOperand::createReg(Mips::GP));
+  TmpInst.addOperand(MCOperand::createReg(GPReg));
+  TmpInst.addOperand(MCOperand::createReg(GPReg));
   TmpInst.addOperand(MCOperand::createReg(RegNo));
   getStreamer().EmitInstruction(TmpInst, STI);
 
   forbidModuleDirective();
 }
 
+void MipsTargetELFStreamer::emitDirectiveCpLocal(unsigned RegNo) {
+  if (Pic)
+    MipsTargetStreamer::emitDirectiveCpLocal(RegNo);
+}
+
 bool MipsTargetELFStreamer::emitDirectiveCpRestore(
     int Offset, function_ref<unsigned()> GetATReg, SMLoc IDLoc,
     const MCSubtargetInfo *STI) {
@@ -1182,7 +1211,7 @@ bool MipsTargetELFStreamer::emitDirectiveCpRestore(
     return true;
 
   // Store the $gp on the stack.
-  emitStoreWithImmOffset(Mips::SW, Mips::GP, Mips::SP, Offset, GetATReg, IDLoc,
+  emitStoreWithImmOffset(Mips::SW, GPReg, Mips::SP, Offset, GetATReg, IDLoc,
                          STI);
   return true;
 }
@@ -1203,10 +1232,10 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
   // Either store the old $gp in a register or on the stack
   if (IsReg) {
     // move $save, $gpreg
-    emitRRR(Mips::OR64, RegOrOffset, Mips::GP, Mips::ZERO, SMLoc(), &STI);
+    emitRRR(Mips::OR64, RegOrOffset, GPReg, Mips::ZERO, SMLoc(), &STI);
   } else {
     // sd $gpreg, offset($sp)
-    emitRRI(Mips::SD, Mips::GP, Mips::SP, RegOrOffset, SMLoc(), &STI);
+    emitRRI(Mips::SD, GPReg, Mips::SP, RegOrOffset, SMLoc(), &STI);
   }
 
   if (getABI().IsN32()) {
@@ -1219,11 +1248,11 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
         MCA.getContext());
 
     // lui $gp, %hi(__gnu_local_gp)
-    emitRX(Mips::LUi, Mips::GP, MCOperand::createExpr(HiExpr), SMLoc(), &STI);
+    emitRX(Mips::LUi, GPReg, MCOperand::createExpr(HiExpr), SMLoc(), &STI);
 
     // addiu  $gp, $gp, %lo(__gnu_local_gp)
-    emitRRX(Mips::ADDiu, Mips::GP, Mips::GP, MCOperand::createExpr(LoExpr),
-            SMLoc(), &STI);
+    emitRRX(Mips::ADDiu, GPReg, GPReg, MCOperand::createExpr(LoExpr), SMLoc(),
+            &STI);
 
     return;
   }
@@ -1236,14 +1265,14 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
       MCA.getContext());
 
   // lui $gp, %hi(%neg(%gp_rel(funcSym)))
-  emitRX(Mips::LUi, Mips::GP, MCOperand::createExpr(HiExpr), SMLoc(), &STI);
+  emitRX(Mips::LUi, GPReg, MCOperand::createExpr(HiExpr), SMLoc(), &STI);
 
   // addiu  $gp, $gp, %lo(%neg(%gp_rel(funcSym)))
-  emitRRX(Mips::ADDiu, Mips::GP, Mips::GP, MCOperand::createExpr(LoExpr),
-          SMLoc(), &STI);
+  emitRRX(Mips::ADDiu, GPReg, GPReg, MCOperand::createExpr(LoExpr), SMLoc(),
+          &STI);
 
   // daddu  $gp, $gp, $funcreg
-  emitRRR(Mips::DADDu, Mips::GP, Mips::GP, RegNo, SMLoc(), &STI);
+  emitRRR(Mips::DADDu, GPReg, GPReg, RegNo, SMLoc(), &STI);
 }
 
 void MipsTargetELFStreamer::emitDirectiveCpreturn(unsigned SaveLocation,
@@ -1256,12 +1285,12 @@ void MipsTargetELFStreamer::emitDirectiveCpreturn(unsigned SaveLocation,
   // Either restore the old $gp from a register or on the stack
   if (SaveLocationIsRegister) {
     Inst.setOpcode(Mips::OR);
-    Inst.addOperand(MCOperand::createReg(Mips::GP));
+    Inst.addOperand(MCOperand::createReg(GPReg));
     Inst.addOperand(MCOperand::createReg(SaveLocation));
     Inst.addOperand(MCOperand::createReg(Mips::ZERO));
   } else {
     Inst.setOpcode(Mips::LD);
-    Inst.addOperand(MCOperand::createReg(Mips::GP));
+    Inst.addOperand(MCOperand::createReg(GPReg));
     Inst.addOperand(MCOperand::createReg(Mips::SP));
     Inst.addOperand(MCOperand::createImm(SaveLocation));
   }
diff --git a/lib/Target/Mips/MicroMips32r6InstrFormats.td b/lib/Target/Mips/MicroMips32r6InstrFormats.td
index ed5b8dd71a51..dbff0f6200f2 100644
--- a/lib/Target/Mips/MicroMips32r6InstrFormats.td
+++ b/lib/Target/Mips/MicroMips32r6InstrFormats.td
@@ -1,9 +1,8 @@
 //=- MicroMips32r6InstrFormats.td - Mips32r6 Instruction Formats -*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MicroMips32r6InstrInfo.td b/lib/Target/Mips/MicroMips32r6InstrInfo.td
index 814918d25e70..425773dc57f1 100644
--- a/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -1,9 +1,8 @@
 //=- MicroMips32r6InstrInfo.td - MicroMips r6 Instruction Information -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -246,6 +245,7 @@ class MADDF_D_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"maddf.d", 1, 0b110111000>;
 class MSUBF_S_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"msubf.s", 0, 0b111111000>;
 class MSUBF_D_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"msubf.d", 1, 0b111111000>;
 class FMOV_S_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"mov.s", 0, 0b0000001>;
+class FMOV_D_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"mov.d", 1, 0b0000001>;
 class FNEG_S_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"neg.s", 0, 0b0101101>;
 class MAX_S_MMR6_ENC : POOL32F_MINMAX_FM<"max.s", 0, 0b000001011>;
 class MAX_D_MMR6_ENC : POOL32F_MINMAX_FM<"max.d", 1, 0b000001011>;
@@ -460,6 +460,7 @@ class JALRC16_MMR6_DESC_BASE<string opstr, RegisterOperand RO>
   let isCall = 1;
   let hasDelaySlot = 0;
   let Defs = [RA];
+  let hasPostISelHook = 1;
 }
 class JALRC16_MMR6_DESC : JALRC16_MMR6_DESC_BASE<"jalr", GPR32Opnd>;
 
@@ -889,6 +890,8 @@ class FMOV_FNEG_MMR6_DESC_BASE<string instr_asm, RegisterOperand DstRC,
 }
 class FMOV_S_MMR6_DESC
   : FMOV_FNEG_MMR6_DESC_BASE<"mov.s", FGR32Opnd, FGR32Opnd, II_MOV_S>;
+class FMOV_D_MMR6_DESC
+  : FMOV_FNEG_MMR6_DESC_BASE<"mov.d", FGR64Opnd, FGR64Opnd, II_MOV_D>;
 class FNEG_S_MMR6_DESC
   : FMOV_FNEG_MMR6_DESC_BASE<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>;
 
@@ -1039,7 +1042,7 @@ class TRUNC_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.l.d", FGR64Opnd,
 class TRUNC_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.s", FGR32Opnd,
                                                     FGR32Opnd, II_TRUNC>;
 class TRUNC_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.d", FGR32Opnd,
-                                                    AFGR64Opnd, II_TRUNC>;
+                                                    FGR64Opnd, II_TRUNC>;
 class SQRT_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.s", FGR32Opnd, FGR32Opnd,
                                                  II_SQRT_S, fsqrt>;
 class SQRT_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.d", AFGR64Opnd, AFGR64Opnd,
@@ -1210,7 +1213,7 @@ class SW16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sw16", GPRMM16OpndZero, GPRMM16Opnd,
 class SWSP_MMR6_DESC
     : MicroMipsInst16<(outs), (ins GPR32Opnd:$rt, mem_mm_sp_imm5_lsl2:$offset),
                       !strconcat("sw", "\t$rt, $offset"), [], II_SW, FrmI>,
-      MMR6Arch<"sw"> {
+      MMR6Arch<"swsp"> {
   let DecoderMethod = "DecodeMemMMSPImm5Lsl2";
   let mayStore = 1;
 }
@@ -1461,6 +1464,8 @@ def MSUBF_D_MMR6 : R6MMR6Rel, MSUBF_D_MMR6_ENC, MSUBF_D_MMR6_DESC,
                    ISA_MICROMIPS32R6;
 def FMOV_S_MMR6 : StdMMR6Rel, FMOV_S_MMR6_ENC, FMOV_S_MMR6_DESC,
                   ISA_MICROMIPS32R6;
+def FMOV_D_MMR6 : StdMMR6Rel, FMOV_D_MMR6_ENC, FMOV_D_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
 def FNEG_S_MMR6 : StdMMR6Rel, FNEG_S_MMR6_ENC, FNEG_S_MMR6_DESC,
                   ISA_MICROMIPS32R6;
 def MAX_S_MMR6 : R6MMR6Rel, MAX_S_MMR6_ENC, MAX_S_MMR6_DESC, ISA_MICROMIPS32R6;
@@ -1749,6 +1754,8 @@ def : MipsPat<(f32 fpimm0), (MTC1_MMR6 ZERO)>, ISA_MICROMIPS32R6;
 def : MipsPat<(f32 fpimm0neg), (FNEG_S_MMR6 (MTC1_MMR6 ZERO))>, ISA_MICROMIPS32R6;
 def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src),
               (TRUNC_W_D_MMR6 FGR64Opnd:$src)>, ISA_MICROMIPS32R6;
+def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src),
+              (TRUNC_W_S_MMR6 FGR32Opnd:$src)>, ISA_MICROMIPS32R6;
 
 def : MipsPat<(and GPRMM16:$src, immZExtAndi16:$imm),
               (ANDI16_MMR6 GPRMM16:$src, immZExtAndi16:$imm)>,
@@ -1767,6 +1774,19 @@ let AddedComplexity = 41 in {
   def : StoreRegImmPat<SDC1_D64_MMR6, f64>, FGR_64, ISA_MICROMIPS32R6;
 }
 
+let isCall=1, hasDelaySlot=0, isCTI=1, Defs = [RA] in {
+  class JumpLinkMMR6<Instruction JumpInst, DAGOperand Opnd> :
+    PseudoSE<(outs), (ins calltarget:$target), [], II_JAL>,
+    PseudoInstExpansion<(JumpInst Opnd:$target)>;
+}
+
+def JAL_MMR6 : JumpLinkMMR6<BALC_MMR6, brtarget26_mm>, ISA_MICROMIPS32R6;
+
+def : MipsPat<(MipsJmpLink (i32 texternalsym:$dst)),
+              (JAL_MMR6 texternalsym:$dst)>, ISA_MICROMIPS32R6;
+def : MipsPat<(MipsJmpLink (iPTR tglobaladdr:$dst)),
+              (JAL_MMR6 tglobaladdr:$dst)>, ISA_MICROMIPS32R6;
+
 def TAILCALL_MMR6 : TailCall<BC_MMR6, brtarget26_mm>, ISA_MICROMIPS32R6;
 
 def TAILCALLREG_MMR6  : TailCallReg<JRC16_MM, GPR32Opnd>, ISA_MICROMIPS32R6;
diff --git a/lib/Target/Mips/MicroMipsDSPInstrFormats.td b/lib/Target/Mips/MicroMipsDSPInstrFormats.td
index 0d444dfc9fad..26b6cf8994ca 100644
--- a/lib/Target/Mips/MicroMipsDSPInstrFormats.td
+++ b/lib/Target/Mips/MicroMipsDSPInstrFormats.td
@@ -1,9 +1,8 @@
 //===-- MicroMipsDSPInstrFormats.td - Instruction Formats --*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Mips/MicroMipsDSPInstrInfo.td b/lib/Target/Mips/MicroMipsDSPInstrInfo.td
index 132de6be750d..5a12568893af 100644
--- a/lib/Target/Mips/MicroMipsDSPInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsDSPInstrInfo.td
@@ -1,9 +1,8 @@
 //===- MicroMipsDSPInstrInfo.td - Micromips DSP instructions -*- tablegen *-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MicroMipsInstrFPU.td b/lib/Target/Mips/MicroMipsInstrFPU.td
index 1731afc1961f..5d87068ff407 100644
--- a/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -1,9 +1,8 @@
 //==- MicroMipsInstrFPU.td - microMIPS FPU Instruction Info -*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -114,8 +113,7 @@ multiclass ABSS_MMM<string opstr, InstrItinClass Itin,
                 ISA_MICROMIPS, FGR_32 {
     string DecoderNamespace = "MicroMips";
   }
-  // FIXME: This needs to be part of the instruction mapping tables.
-  def _D64_MM : ABSS_FT<opstr, FGR64Opnd, FGR64Opnd, Itin, OpNode>,
+  def _D64_MM : StdMMR6Rel, ABSS_FT<opstr, FGR64Opnd, FGR64Opnd, Itin, OpNode>,
                 ISA_MICROMIPS, FGR_64 {
     string DecoderNamespace = "MicroMipsFP64";
   }
@@ -124,7 +122,7 @@ multiclass ABSS_MMM<string opstr, InstrItinClass Itin,
 defm FSQRT : ABSS_MMM<"sqrt.d", II_SQRT_D, fsqrt>, ROUND_W_FM_MM<1, 0x28>;
 defm FABS : ABSS_MMM<"abs.d", II_SQRT_D, fabs>, ABS_FM_MM<1, 0xd>;
 
-let DecoderNamespace = "MicroMips" in {
+let DecoderNamespace = "MicroMips", AdditionalPredicates = [UseAbs] in {
   def FABS_S_MM : MMRel, ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, II_ABS, fabs>,
                   ABS_FM_MM<0, 0xd>, ISA_MICROMIPS;
 }
@@ -266,7 +264,7 @@ let DecoderNamespace = "MicroMips" in {
                      ROUND_W_FM_MM<0b1, 0b01001000>, ISA_MICROMIPS, FGR_64;
   def RSQRT_S_MM : MMRel, ABSS_FT<"rsqrt.s", FGR32Opnd, FGR32Opnd,
                                   II_RECIP_S>,
-                   ROUND_W_FM_MM<0b0, 0b00001000>;
+                   ROUND_W_FM_MM<0b0, 0b00001000>, ISA_MICROMIPS;
   def RSQRT_D32_MM : MMRel, ABSS_FT<"rsqrt.d", AFGR64Opnd, AFGR64Opnd,
                                   II_RECIP_D>,
                    ROUND_W_FM_MM<0b1, 0b00001000>, ISA_MICROMIPS, FGR_32 {
@@ -425,6 +423,11 @@ def : MipsPat<(f64 (fpextend FGR32Opnd:$src)),
 def : MipsPat<(MipsTruncIntFP AFGR64Opnd:$src),
               (TRUNC_W_MM AFGR64Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6,
               FGR_32;
+def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src),
+              (CVT_W_D64_MM FGR64Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6,
+              FGR_64;
+def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src),
+              (TRUNC_W_S_MM FGR32Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6;
 
 // Selects
 defm : MovzPats0<GPR32, FGR32, MOVZ_I_S_MM, SLT_MM, SLTu_MM, SLTi_MM, SLTiu_MM>,
diff --git a/lib/Target/Mips/MicroMipsInstrFormats.td b/lib/Target/Mips/MicroMipsInstrFormats.td
index 2a4cc279ef0d..e9fb9b310e3b 100644
--- a/lib/Target/Mips/MicroMipsInstrFormats.td
+++ b/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -1,9 +1,8 @@
 //===-- MicroMipsInstrFormats.td - microMIPS Inst Formats -*- tablegen -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
index af380a0ec71e..9b7f7b25fa94 100644
--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -1,9 +1,8 @@
 //===--- MicroMipsInstrFormats.td - microMIPS Inst Defs -*- tablegen -*----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -426,6 +425,7 @@ class JumpLinkRegMM16<string opstr, RegisterOperand RO> :
   let isCall = 1;
   let hasDelaySlot = 1;
   let Defs = [RA];
+  let hasPostISelHook = 1;
 }
 
 // 16-bit Jump Reg
@@ -654,7 +654,7 @@ def LWGP_MM : LoadGPMM16<"lw", GPRMM16Opnd, II_LW, mem_mm_gp_simm7_lsl2>,
                          LOAD_GP_FM_MM16<0x19>, ISA_MICROMIPS;
 def LWSP_MM : LoadSPMM16<"lw", GPR32Opnd, II_LW, mem_mm_sp_imm5_lsl2>,
               LOAD_STORE_SP_FM_MM16<0x12>, ISA_MICROMIPS;
-def SWSP_MM : StoreSPMM16<"sw", GPR32Opnd, II_SW, mem_mm_sp_imm5_lsl2>,
+def SWSP_MM : StoreSPMM16<"swsp", GPR32Opnd, II_SW, mem_mm_sp_imm5_lsl2>,
               LOAD_STORE_SP_FM_MM16<0x32>, ISA_MICROMIPS32_NOT_MIPS32R6;
 def ADDIUR1SP_MM : AddImmUR1SP<"addiur1sp", GPRMM16Opnd>, ADDIUR1SP_FM_MM16,
                    ISA_MICROMIPS;
@@ -694,6 +694,10 @@ def BREAK16_MM : BrkSdbbp16MM<"break16", II_BREAK>, BRKSDBBP16_FM_MM<0x28>,
 def SDBBP16_MM : BrkSdbbp16MM<"sdbbp16", II_SDBBP>, BRKSDBBP16_FM_MM<0x2C>,
                  ISA_MICROMIPS32_NOT_MIPS32R6;
 
+class WaitMM<string opstr> :
+  InstSE<(outs), (ins uimm10:$code_), !strconcat(opstr, "\t$code_"), [],
+         II_WAIT, FrmOther, opstr>;
+
 let DecoderNamespace = "MicroMips" in {
   /// Load and Store Instructions - multiple
   def SWM16_MM : StoreMultMM16<"swm16", II_SWM>, LWM_FM_MM16<0x5>,
@@ -706,13 +710,7 @@ let DecoderNamespace = "MicroMips" in {
   def CTC2_MM : InstSE<(outs COP2Opnd:$impl), (ins GPR32Opnd:$rt),
                        "ctc2\t$rt, $impl", [], II_CTC2, FrmFR, "ctc2">,
                 POOL32A_CFTC2_FM_MM<0b1101110100>, ISA_MICROMIPS;
-}
-
-class WaitMM<string opstr> :
-  InstSE<(outs), (ins uimm10:$code_), !strconcat(opstr, "\t$code_"), [],
-         II_WAIT, FrmOther, opstr>;
 
-let DecoderNamespace = "MicroMips" in {
   /// Compact Branch Instructions
   def BEQZC_MM : CompactBranchMM<"beqzc", brtarget_mm, seteq, GPR32Opnd>,
                  COMPACT_BRANCH_FM_MM<0x7>, ISA_MICROMIPS32_NOT_MIPS32R6;
@@ -822,8 +820,7 @@ let DecoderNamespace = "MicroMips" in {
     def SW_MM  : Store<"sw", GPR32Opnd, null_frag, II_SW>, MMRel,
                  LW_FM_MM<0x3e>, ISA_MICROMIPS;
   }
-}
-let DecoderNamespace = "MicroMips" in {
+
   let DecoderMethod = "DecodeMemMMImm9" in {
     def LBE_MM  : MMRel, Load<"lbe", GPR32Opnd, null_frag, II_LBE>,
                   POOL32C_LHUE_FM_MM<0x18, 0x6, 0x4>, ISA_MICROMIPS, ASE_EVA;
@@ -881,8 +878,7 @@ let DecoderNamespace = "MicroMips" in {
   def SWR_MM : MMRel, StoreLeftRightMM<"swr", MipsSWR, GPR32Opnd, mem_mm_12,
                                        II_SWR>, LWL_FM_MM<0x9>,
                ISA_MICROMIPS32_NOT_MIPS32R6;
-}
-let DecoderNamespace = "MicroMips" in {
+
   /// Load and Store Instructions - multiple
   def SWM32_MM  : StoreMultMM<"swm32", II_SWM>, LWM_FM_MM<0xd>, ISA_MICROMIPS;
   def LWM32_MM  : LoadMultMM<"lwm32", II_LWM>, LWM_FM_MM<0x5>, ISA_MICROMIPS;
@@ -1125,7 +1121,8 @@ let AdditionalPredicates = [NotDSP] in {
                        ISA_MICROMIPS32_NOT_MIPS32R6;
 }
 
-def TAILCALL_MM : TailCall<J_MM, jmptarget_mm>, ISA_MIPS1_NOT_32R6_64R6;
+def TAILCALL_MM : TailCall<J_MM, jmptarget_mm>,
+                  ISA_MICROMIPS32_NOT_MIPS32R6;
 
 def TAILCALLREG_MM  : TailCallReg<JRC16_MM, GPR32Opnd>,
                       ISA_MICROMIPS32_NOT_MIPS32R6;
@@ -1139,9 +1136,7 @@ let DecoderNamespace = "MicroMips" in {
   def LWU_MM : MMRel, LoadMM<"lwu", GPR32Opnd, zextloadi32, II_LWU,
                              mem_simm12>, LL_FM_MM<0xe>,
                ISA_MICROMIPS32_NOT_MIPS32R6;
-}
 
-let DecoderNamespace = "MicroMips" in {
   def MFGC0_MM    : MMRel, MfCop0MM<"mfgc0", GPR32Opnd, COP0Opnd, II_MFGC0>,
                     POOL32A_MFTC0_FM_MM<0b10011, 0b111100>,
                     ISA_MICROMIPS32R5, ASE_VIRT;
@@ -1204,7 +1199,7 @@ def : MipsPat<(atomic_load_32 addr:$a), (LW_MM addr:$a)>, ISA_MICROMIPS;
 def : MipsPat<(i32 immLi16:$imm),
               (LI16_MM immLi16:$imm)>, ISA_MICROMIPS;
 
-defm :  MaterializeImms<i32, ZERO, ADDiu_MM, LUi_MM, ORi_MM>, ISA_MICROMIPS;
+defm : MaterializeImms<i32, ZERO, ADDiu_MM, LUi_MM, ORi_MM>, ISA_MICROMIPS;
 
 def : MipsPat<(not GPRMM16:$in),
               (NOT16_MM GPRMM16:$in)>, ISA_MICROMIPS;
@@ -1453,3 +1448,6 @@ def : MipsInstAlias<"mtgc0 $rt, $rs",
 def : MipsInstAlias<"mthgc0 $rt, $rs",
                     (MTHGC0_MM COP0Opnd:$rs, GPR32Opnd:$rt, 0), 0>,
                     ISA_MICROMIPS32R5, ASE_VIRT;
+def : MipsInstAlias<"sw $rt, $offset",
+                    (SWSP_MM GPR32Opnd:$rt, mem_mm_sp_imm5_lsl2:$offset), 1>,
+                    ISA_MICROMIPS;
diff --git a/lib/Target/Mips/MicroMipsSizeReduction.cpp b/lib/Target/Mips/MicroMipsSizeReduction.cpp
index f9062cc23da2..70af95592aa5 100644
--- a/lib/Target/Mips/MicroMipsSizeReduction.cpp
+++ b/lib/Target/Mips/MicroMipsSizeReduction.cpp
@@ -1,9 +1,8 @@
 //=== MicroMipsSizeReduction.cpp - MicroMips size reduction pass --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///\file
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index 6bb7aecc867a..b3faaab436f0 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -1,9 +1,8 @@
 //===-- Mips.h - Top-level interface for Mips representation ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 2f3a1c399d3e..7b83ea8535ae 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -1,9 +1,8 @@
 //===-- Mips.td - Describe the Mips Target Machine ---------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This is the top level entry point for the Mips target.
@@ -83,6 +82,8 @@ def FeatureFPXX        : SubtargetFeature<"fpxx", "IsFPXX", "true",
                                 "Support for FPXX">;
 def FeatureNaN2008     : SubtargetFeature<"nan2008", "IsNaN2008bit", "true",
                                 "IEEE 754-2008 NaN encoding">;
+def FeatureAbs2008     : SubtargetFeature<"abs2008", "Abs2008", "true",
+                                          "Disable IEEE 754-2008 abs.fmt mode">;
 def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
                                 "true", "Only supports single precision float">;
 def FeatureSoftFloat   : SubtargetFeature<"soft-float", "IsSoftFloat", "true",
@@ -142,7 +143,7 @@ def FeatureMips32r6    : SubtargetFeature<"mips32r6", "MipsArchVersion",
                                 "Mips32r6",
                                 "Mips32r6 ISA Support [experimental]",
                                 [FeatureMips32r5, FeatureFP64Bit,
-                                 FeatureNaN2008]>;
+                                 FeatureNaN2008, FeatureAbs2008]>;
 def FeatureMips64      : SubtargetFeature<"mips64", "MipsArchVersion",
                                 "Mips64", "Mips64 ISA Support",
                                 [FeatureMips5, FeatureMips32]>;
@@ -159,7 +160,7 @@ def FeatureMips64r6    : SubtargetFeature<"mips64r6", "MipsArchVersion",
                                 "Mips64r6",
                                 "Mips64r6 ISA Support [experimental]",
                                 [FeatureMips32r6, FeatureMips64r5,
-                                 FeatureNaN2008]>;
+                                 FeatureNaN2008, FeatureAbs2008]>;
 def FeatureSym32       : SubtargetFeature<"sym32", "HasSym32", "true",
                                           "Symbols are 32 bit on Mips64">;
 
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index 122c1f5377b6..5a2a916a6b7a 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -1,9 +1,8 @@
 //===- Mips16FrameLowering.cpp - Mips16 Frame Information -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index f7fa4dc3d86d..6b62453f8dfe 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -1,9 +1,8 @@
 //===-- Mips16FrameLowering.h - Mips16 frame lowering  ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp
index f237bb6d4006..e9a3c7ec4b19 100644
--- a/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/lib/Target/Mips/Mips16HardFloat.cpp
@@ -1,9 +1,8 @@
 //===- Mips16HardFloat.cpp for Mips16 Hard Float --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -415,7 +414,7 @@ static bool fixupFPReturnAndCall(Function &F, Module *M,
                            Attribute::ReadNone);
         A = A.addAttribute(C, AttributeList::FunctionIndex,
                            Attribute::NoInline);
-        Value *F = (M->getOrInsertFunction(Name, A, MyVoid, T));
+        FunctionCallee F = (M->getOrInsertFunction(Name, A, MyVoid, T));
         CallInst::Create(F, Params, "", &I);
       } else if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
         FunctionType *FT = CI->getFunctionType();
diff --git a/lib/Target/Mips/Mips16HardFloatInfo.cpp b/lib/Target/Mips/Mips16HardFloatInfo.cpp
index 2eb6e5ddd2d9..8a02e8156175 100644
--- a/lib/Target/Mips/Mips16HardFloatInfo.cpp
+++ b/lib/Target/Mips/Mips16HardFloatInfo.cpp
@@ -1,9 +1,8 @@
 //===---- Mips16HardFloatInfo.cpp for Mips16 Hard Float              -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/Mips16HardFloatInfo.h b/lib/Target/Mips/Mips16HardFloatInfo.h
index 7295c287576d..b8c485b7e2e3 100644
--- a/lib/Target/Mips/Mips16HardFloatInfo.h
+++ b/lib/Target/Mips/Mips16HardFloatInfo.h
@@ -1,9 +1,8 @@
 //===---- Mips16HardFloatInfo.h for Mips16 Hard Float              --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index a0d5bd9ef305..3ab4f1e064da 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
 //===-- Mips16ISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Mips16 ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h
index bbf8cc36f241..1ef194029f50 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.h
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h
@@ -1,9 +1,8 @@
 //===---- Mips16ISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
index 79df622241a0..6d8e5aef2a3f 100644
--- a/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -1,9 +1,8 @@
 //===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -156,11 +155,8 @@ llvm::createMips16TargetLowering(const MipsTargetMachine &TM,
   return new Mips16TargetLowering(TM, STI);
 }
 
-bool
-Mips16TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
-                                                     unsigned,
-                                                     unsigned,
-                                                     bool *Fast) const {
+bool Mips16TargetLowering::allowsMisalignedMemoryAccesses(
+    EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const {
   return false;
 }
 
@@ -463,8 +459,7 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
         }
         // one more look at list of intrinsics
         const Mips16IntrinsicHelperType *Helper =
-            std::lower_bound(std::begin(Mips16IntrinsicHelper),
-                             std::end(Mips16IntrinsicHelper), IntrinsicFind);
+            llvm::lower_bound(Mips16IntrinsicHelper, IntrinsicFind);
         if (Helper != std::end(Mips16IntrinsicHelper) &&
             *Helper == IntrinsicFind) {
           Mips16HelperFunction = Helper->Helper;
diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h
index 0ee0b816ef70..200249933577 100644
--- a/lib/Target/Mips/Mips16ISelLowering.h
+++ b/lib/Target/Mips/Mips16ISelLowering.h
@@ -1,9 +1,8 @@
 //===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -24,6 +23,7 @@ namespace llvm {
 
     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
                                         unsigned Align,
+                                        MachineMemOperand::Flags Flags,
                                         bool *Fast) const override;
 
     MachineBasicBlock *
diff --git a/lib/Target/Mips/Mips16InstrFormats.td b/lib/Target/Mips/Mips16InstrFormats.td
index 4ff68bef957e..f4ac160c2ba5 100644
--- a/lib/Target/Mips/Mips16InstrFormats.td
+++ b/lib/Target/Mips/Mips16InstrFormats.td
@@ -1,9 +1,8 @@
 //===- Mips16InstrFormats.td - Mips Instruction Formats ----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index efebc99b5dae..c234c309d760 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -1,9 +1,8 @@
 //===- Mips16InstrInfo.cpp - Mips16 Instruction Information ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index 6a802e4cce5d..dadcaa3055b3 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -1,9 +1,8 @@
 //===- Mips16InstrInfo.h - Mips16 Instruction Information -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index b7a1b9ce41bf..36b6c73d1008 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -1,9 +1,8 @@
 //===- Mips16InstrInfo.td - Target Description for Mips16  -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -484,13 +483,11 @@ class SelT<string op1, string op2>:
 //
 // 32 bit constant
 //
-def Constant32:
-  MipsPseudo16<(outs), (ins simm32:$imm), "\t.word $imm", []>;
+def Constant32 : MipsPseudo16<(outs), (ins simm32:$imm), "\t.word $imm", []>;
 
-def LwConstant32:
+def LwConstant32 :
   MipsPseudo16<(outs CPU16Regs:$rx), (ins simm32:$imm, simm32:$constid),
-    "lw\t$rx, 1f\n\tb\t2f\n\t.align\t2\n1: \t.word\t$imm\n2:", []>;
-
+               "lw\t$rx, 1f\n\tb\t2f\n\t.align\t2\n1: \t.word\t$imm\n2:", []>;
 
 //
 // Some general instruction class info
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
index 751afd5ed369..5703f585a6a2 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===-- Mips16RegisterInfo.cpp - MIPS16 Register Information --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h
index d67a79b64033..fca78b43f96b 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.h
+++ b/lib/Target/Mips/Mips16RegisterInfo.h
@@ -1,9 +1,8 @@
 //===-- Mips16RegisterInfo.h - Mips16 Register Information ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/Mips32r6InstrFormats.td b/lib/Target/Mips/Mips32r6InstrFormats.td
index 623af570a5e6..ccb6d1df777a 100644
--- a/lib/Target/Mips/Mips32r6InstrFormats.td
+++ b/lib/Target/Mips/Mips32r6InstrFormats.td
@@ -1,9 +1,8 @@
 //=- Mips32r6InstrFormats.td - Mips32r6 Instruction Formats -*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td
index 2bd0cf2d59a6..2c3048411a5c 100644
--- a/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -1,9 +1,8 @@
 //=- Mips32r6InstrInfo.td - Mips32r6 Instruction Information -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -150,7 +149,6 @@ class SELEQZ_ENC : SPECIAL_3R_FM<0b00000, 0b110101>;
 class SELNEZ_ENC : SPECIAL_3R_FM<0b00000, 0b110111>;
 
 class LWPC_ENC   : PCREL19_FM<OPCODE2_LWPC>;
-class LWUPC_ENC  : PCREL19_FM<OPCODE2_LWUPC>;
 
 class MAX_S_ENC : COP1_3R_FM<0b011101, FIELD_FMT_S>;
 class MAX_D_ENC : COP1_3R_FM<0b011101, FIELD_FMT_D>;
@@ -326,7 +324,6 @@ class PCREL_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
 class ADDIUPC_DESC : PCREL_DESC_BASE<"addiupc", GPR32Opnd, simm19_lsl2,
                                      II_ADDIUPC>;
 class LWPC_DESC: PCREL_DESC_BASE<"lwpc", GPR32Opnd, simm19_lsl2, II_LWPC>;
-class LWUPC_DESC: PCREL_DESC_BASE<"lwupc", GPR32Opnd, simm19_lsl2, II_LWUPC>;
 
 class ALIGN_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
                       Operand ImmOpnd, InstrItinClass itin>
@@ -927,7 +924,6 @@ let AdditionalPredicates = [NotInMicroMips] in {
 }
 def LWPC : R6MMR6Rel, LWPC_ENC, LWPC_DESC, ISA_MIPS32R6;
 let AdditionalPredicates = [NotInMicroMips] in {
-  def LWUPC : R6MMR6Rel, LWUPC_ENC, LWUPC_DESC, ISA_MIPS32R6;
   def MADDF_S : MADDF_S_ENC, MADDF_S_DESC, ISA_MIPS32R6, HARDFLOAT;
   def MADDF_D : MADDF_D_ENC, MADDF_D_DESC, ISA_MIPS32R6, HARDFLOAT;
   def MAXA_D : MAXA_D_ENC, MAXA_D_DESC, ISA_MIPS32R6, HARDFLOAT;
@@ -1105,7 +1101,7 @@ def : MipsPat<(select i32:$cond, immz, i32:$f),
 
 // Pseudo instructions
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
-    hasExtraSrcRegAllocReq = 1, isCTI = 1, Defs = [AT] in {
+    hasExtraSrcRegAllocReq = 1, isCTI = 1, Defs = [AT], hasPostISelHook = 1 in {
   class TailCallRegR6<Instruction JumpInst, Register RT, RegisterOperand RO> :
     PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
     PseudoInstExpansion<(JumpInst RT:$rt, RO:$rs)>;
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 5729182deafb..7f35280f7936 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -1,9 +1,8 @@
 //===- Mips64InstrInfo.td - Mips64 Instruction Information -*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -250,7 +249,7 @@ def SC64 : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>, PTR_64,
 def JR64   : IndirectBranch<"jr", GPR64Opnd>, MTLO_FM<8>, PTR_64;
 }
 
-def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM;
+def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM, PTR_64;
 
 /// Jump and Branch Instructions
 let isCodeGenOnly = 1 in {
@@ -267,14 +266,15 @@ let isCodeGenOnly = 1 in {
   def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>,
                GPR_64;
   let AdditionalPredicates = [NoIndirectJumpGuards] in
-    def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>;
+    def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>,
+                       PTR_64;
 }
 let AdditionalPredicates = [NotInMicroMips],
     DecoderNamespace = "Mips64" in {
-  def JR_HB64 : JR_HB_DESC<GPR64Opnd>, JR_HB_ENC, ISA_MIPS32_NOT_32R6_64R6;
-  def JALR_HB64 : JALR_HB_DESC<GPR64Opnd>, JALR_HB_ENC, ISA_MIPS32R2;
+  def JR_HB64 : JR_HB_DESC<GPR64Opnd>, JR_HB_ENC, ISA_MIPS64_NOT_64R6;
+  def JALR_HB64 : JALR_HB_DESC<GPR64Opnd>, JALR_HB_ENC, ISA_MIPS64R2;
 }
-def PseudoReturn64 : PseudoReturnBase<GPR64Opnd>;
+def PseudoReturn64 : PseudoReturnBase<GPR64Opnd>, GPR_64;
 
 let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
                             NoIndirectJumpGuards] in {
@@ -290,7 +290,7 @@ let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
                         ISA_MIPS32R2_NOT_32R6_64R6, PTR_64;
   def PseudoIndirectHazardBranch64 : PseudoIndirectBranchBase<JR_HB64,
                                                               GPR64Opnd>,
-                                     ISA_MIPS32R2_NOT_32R6_64R6;
+                                     ISA_MIPS32R2_NOT_32R6_64R6, PTR_64;
 }
 
 /// Multiply and Divide Instructions.
@@ -332,17 +332,17 @@ def PseudoMTLOHI64 : PseudoMTLOHI<ACC128, GPR64>, ISA_MIPS3_NOT_32R6_64R6;
 
 /// Sign Ext In Register Instructions.
 def SEB64 : SignExtInReg<"seb", i8, GPR64Opnd, II_SEB>, SEB_FM<0x10, 0x20>,
-            ISA_MIPS32R2;
+            ISA_MIPS32R2, GPR_64;
 def SEH64 : SignExtInReg<"seh", i16, GPR64Opnd, II_SEH>, SEB_FM<0x18, 0x20>,
-            ISA_MIPS32R2;
+            ISA_MIPS32R2, GPR_64;
 }
 
 /// Count Leading
 let AdditionalPredicates = [NotInMicroMips] in {
   def DCLZ : CountLeading0<"dclz", GPR64Opnd, II_DCLZ>, CLO_FM<0x24>,
-             ISA_MIPS64_NOT_64R6;
+             ISA_MIPS64_NOT_64R6, GPR_64;
   def DCLO : CountLeading1<"dclo", GPR64Opnd, II_DCLO>, CLO_FM<0x25>,
-             ISA_MIPS64_NOT_64R6;
+             ISA_MIPS64_NOT_64R6, GPR_64;
 
 /// Double Word Swap Bytes/HalfWords
   def DSBH : SubwordSwap<"dsbh", GPR64Opnd, II_DSBH>, SEB_FM<2, 0x24>,
@@ -417,17 +417,25 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
 // explanation.
 
 // Expands to: lui $dst, %highest/%higher/%hi/%lo($tgt)
-def LONG_BRANCH_LUi2Op_64 : PseudoSE<(outs GPR64Opnd:$dst),
-  (ins brtarget:$tgt), []>, GPR_64;
+def LONG_BRANCH_LUi2Op_64 :
+    PseudoSE<(outs GPR64Opnd:$dst), (ins brtarget:$tgt), []>, GPR_64 {
+  bit hasNoSchedulingInfo = 1;
+}
 // Expands to: addiu $dst, %highest/%higher/%hi/%lo($tgt)
-def LONG_BRANCH_DADDiu2Op : PseudoSE<(outs GPR64Opnd:$dst),
-  (ins GPR64Opnd:$src, brtarget:$tgt), []>, GPR_64;
-
+def LONG_BRANCH_DADDiu2Op :
+    PseudoSE<(outs GPR64Opnd:$dst), (ins GPR64Opnd:$src, brtarget:$tgt), []>,
+    GPR_64 {
+  bit hasNoSchedulingInfo = 1;
+}
 // Expands to: daddiu $dst, $src, %PART($tgt - $baltgt)
 // where %PART may be %hi or %lo, depending on the relocation kind
 // that $tgt is annotated with.
-def LONG_BRANCH_DADDiu : PseudoSE<(outs GPR64Opnd:$dst),
-  (ins GPR64Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>, GPR_64;
+def LONG_BRANCH_DADDiu :
+    PseudoSE<(outs GPR64Opnd:$dst),
+             (ins GPR64Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>,
+    GPR_64 {
+  bit hasNoSchedulingInfo = 1;
+}
 
 // Cavium Octeon cnMIPS instructions
 let DecoderNamespace = "CnMips",
@@ -580,15 +588,15 @@ def DMTC2_OCTEON : MFC2OP<"dmtc2", GPR64Opnd, II_DMTC2>, MFC2OP_FM<0x12, 5>,
 }
 
 /// Move between CPU and coprocessor registers
-let DecoderNamespace = "Mips64", Predicates = [HasMips64] in {
+let DecoderNamespace = "Mips64" in {
 def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd, COP0Opnd, II_DMFC0>,
-            MFC3OP_FM<0x10, 1, 0>, ISA_MIPS3;
+            MFC3OP_FM<0x10, 1, 0>, ISA_MIPS3, GPR_64;
 def DMTC0 : MTC3OP<"dmtc0", COP0Opnd, GPR64Opnd, II_DMTC0>,
-            MFC3OP_FM<0x10, 5, 0>, ISA_MIPS3;
+            MFC3OP_FM<0x10, 5, 0>, ISA_MIPS3, GPR_64;
 def DMFC2 : MFC3OP<"dmfc2", GPR64Opnd, COP2Opnd, II_DMFC2>,
-            MFC3OP_FM<0x12, 1, 0>, ISA_MIPS3;
+            MFC3OP_FM<0x12, 1, 0>, ISA_MIPS3, GPR_64;
 def DMTC2 : MTC3OP<"dmtc2", COP2Opnd, GPR64Opnd, II_DMTC2>,
-            MFC3OP_FM<0x12, 5, 0>, ISA_MIPS3;
+            MFC3OP_FM<0x12, 5, 0>, ISA_MIPS3, GPR_64;
 }
 
 /// Move between CPU and guest coprocessor registers (Virtualization ASE)
@@ -600,7 +608,7 @@ let DecoderNamespace = "Mips64" in {
 }
 
 let AdditionalPredicates = [UseIndirectJumpsHazard] in
-  def JALRHB64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR_HB64, RA_64>;
+  def JALRHB64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR_HB64, RA_64>, PTR_64;
 
 //===----------------------------------------------------------------------===//
 //  Arbitrary patterns that map to one or more instructions
@@ -845,7 +853,7 @@ def : MipsPat<(i64 (sext (i32 (sub GPR32:$src, GPR32:$src2)))),
               (SUBu GPR32:$src, GPR32:$src2), sub_32)>;
 def : MipsPat<(i64 (sext (i32 (mul GPR32:$src, GPR32:$src2)))),
               (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
-              (MUL GPR32:$src, GPR32:$src2), sub_32)>, ISA_MIPS3_NOT_32R6_64R6;
+              (MUL GPR32:$src, GPR32:$src2), sub_32)>, ISA_MIPS32_NOT_32R6_64R6;
 def : MipsPat<(i64 (sext (i32 (MipsMFHI ACC64:$src)))),
               (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
               (PseudoMFHI ACC64:$src), sub_32)>;
@@ -1147,5 +1155,33 @@ def SLTUImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rs),
 def : MipsInstAlias<"sltu\t$rs, $imm", (SLTUImm64 GPR64Opnd:$rs, GPR64Opnd:$rs,
                                                   imm64:$imm)>, GPR_64;
 
+def SGEImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rd),
+                                 (ins GPR64Opnd:$rs, imm64:$imm),
+                                 "sge\t$rd, $rs, $imm">, GPR_64;
+def : MipsInstAlias<"sge $rs, $imm", (SGEImm64 GPR64Opnd:$rs,
+                                               GPR64Opnd:$rs,
+                                               imm64:$imm), 0>, GPR_64;
+
+def SGEUImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rd),
+                                  (ins GPR64Opnd:$rs, imm64:$imm),
+                                  "sgeu\t$rd, $rs, $imm">, GPR_64;
+def : MipsInstAlias<"sgeu $rs, $imm", (SGEUImm64 GPR64Opnd:$rs,
+                                                 GPR64Opnd:$rs,
+                                                 imm64:$imm), 0>, GPR_64;
+
+def SGTImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rd),
+                                 (ins GPR64Opnd:$rs, imm64:$imm),
+                                 "sgt\t$rd, $rs, $imm">, GPR_64;
+def : MipsInstAlias<"sgt $rs, $imm", (SGTImm64 GPR64Opnd:$rs,
+                                               GPR64Opnd:$rs,
+                                               imm64:$imm), 0>, GPR_64;
+
+def SGTUImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rd),
+                                  (ins GPR64Opnd:$rs, imm64:$imm),
+                                  "sgtu\t$rd, $rs, $imm">, GPR_64;
+def : MipsInstAlias<"sgtu $rs, $imm", (SGTUImm64 GPR64Opnd:$rs,
+                                                 GPR64Opnd:$rs,
+                                                 imm64:$imm), 0>, GPR_64;
+
 def : MipsInstAlias<"rdhwr $rt, $rs",
                     (RDHWR64 GPR64Opnd:$rt, HWRegsOpnd:$rs, 0), 1>, GPR_64;
diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td
index ac223bc77256..d746bb61f824 100644
--- a/lib/Target/Mips/Mips64r6InstrInfo.td
+++ b/lib/Target/Mips/Mips64r6InstrInfo.td
@@ -1,9 +1,8 @@
 //=- Mips64r6InstrInfo.td - Mips64r6 Instruction Information -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -37,6 +36,7 @@ class DMUHU_ENC   : SPECIAL_3R_FM<0b00011, 0b011101>;
 class DMUL_R6_ENC : SPECIAL_3R_FM<0b00010, 0b011100>;
 class DMULU_ENC   : SPECIAL_3R_FM<0b00010, 0b011101>;
 class LDPC_ENC    : PCREL18_FM<OPCODE3_LDPC>;
+class LWUPC_ENC   : PCREL19_FM<OPCODE2_LWUPC>;
 class LLD_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_LLD>;
 class SCD_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_SCD>;
 class CRC32D_ENC  : SPECIAL3_2R_SZ_CRC<3,0>;
@@ -73,6 +73,7 @@ class DMUHU_DESC   : MUL_R6_DESC_BASE<"dmuhu", GPR64Opnd, II_DMUHU, mulhu>;
 class DMUL_R6_DESC : MUL_R6_DESC_BASE<"dmul", GPR64Opnd, II_DMUL, mul>;
 class DMULU_DESC   : MUL_R6_DESC_BASE<"dmulu", GPR64Opnd, II_DMUL>;
 class LDPC_DESC    : PCREL_DESC_BASE<"ldpc", GPR64Opnd, simm18_lsl3, II_LDPC>;
+class LWUPC_DESC   : PCREL_DESC_BASE<"lwupc", GPR32Opnd, simm19_lsl2, II_LWUPC>;
 class LLD_R6_DESC   : LL_R6_DESC_BASE<"lld", GPR64Opnd, mem_simmptr, II_LLD>;
 class SCD_R6_DESC   : SC_R6_DESC_BASE<"scd", GPR64Opnd, II_SCD>;
 class SELEQZ64_DESC : SELEQNE_Z_DESC_BASE<"seleqz", GPR64Opnd>;
@@ -148,6 +149,7 @@ let AdditionalPredicates = [NotInMicroMips] in {
   def LLD_R6 : LLD_R6_ENC, LLD_R6_DESC, ISA_MIPS64R6;
 }
 def LDPC: LDPC_ENC, LDPC_DESC, ISA_MIPS64R6;
+def LWUPC : LWUPC_ENC, LWUPC_DESC, ISA_MIPS64R6;
 def SCD_R6 : SCD_R6_ENC, SCD_R6_DESC, ISA_MIPS32R6;
 let DecoderNamespace = "Mips32r6_64r6_GP64" in {
   def SELEQZ64 : SELEQZ_ENC, SELEQZ64_DESC, ISA_MIPS32R6, GPR_64;
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.cpp b/lib/Target/Mips/MipsAnalyzeImmediate.cpp
index 4e17ee327ab6..ae2b83c414db 100644
--- a/lib/Target/Mips/MipsAnalyzeImmediate.cpp
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.cpp
@@ -1,9 +1,8 @@
 //===- MipsAnalyzeImmediate.cpp - Analyze Immediates ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.h b/lib/Target/Mips/MipsAnalyzeImmediate.h
index 1c520242fb8d..018b9d824526 100644
--- a/lib/Target/Mips/MipsAnalyzeImmediate.h
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.h
@@ -1,9 +1,8 @@
 //===- MipsAnalyzeImmediate.h - Analyze Immediates -------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 362431fd42a6..db83fe49cec0 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -1,9 +1,8 @@
 //===- MipsAsmPrinter.cpp - Mips LLVM Assembly Printer --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "MipsAsmPrinter.h"
-#include "InstPrinter/MipsInstPrinter.h"
 #include "MCTargetDesc/MipsABIInfo.h"
 #include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsInstPrinter.h"
 #include "MCTargetDesc/MipsMCNaCl.h"
 #include "MCTargetDesc/MipsMCTargetDesc.h"
 #include "Mips.h"
@@ -24,6 +23,7 @@
 #include "MipsSubtarget.h"
 #include "MipsTargetMachine.h"
 #include "MipsTargetStreamer.h"
+#include "TargetInfo/MipsTargetInfo.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
@@ -68,6 +68,8 @@ using namespace llvm;
 
 #define DEBUG_TYPE "mips-asm-printer"
 
+extern cl::opt<bool> EmitJalrReloc;
+
 MipsTargetStreamer &MipsAsmPrinter::getTargetStreamer() const {
   return static_cast<MipsTargetStreamer &>(*OutStreamer->getTargetStreamer());
 }
@@ -148,6 +150,40 @@ void MipsAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer,
   EmitToStreamer(OutStreamer, TmpInst0);
 }
 
+// If there is an MO_JALR operand, insert:
+//
+// .reloc tmplabel, R_{MICRO}MIPS_JALR, symbol
+// tmplabel:
+//
+// This is an optimization hint for the linker which may then replace
+// an indirect call with a direct branch.
+static void emitDirectiveRelocJalr(const MachineInstr &MI,
+                                   MCContext &OutContext,
+                                   TargetMachine &TM,
+                                   MCStreamer &OutStreamer,
+                                   const MipsSubtarget &Subtarget) {
+  for (unsigned int I = MI.getDesc().getNumOperands(), E = MI.getNumOperands();
+       I < E; ++I) {
+    MachineOperand MO = MI.getOperand(I);
+    if (MO.isMCSymbol() && (MO.getTargetFlags() & MipsII::MO_JALR)) {
+      MCSymbol *Callee = MO.getMCSymbol();
+      if (Callee && !Callee->getName().empty()) {
+        MCSymbol *OffsetLabel = OutContext.createTempSymbol();
+        const MCExpr *OffsetExpr =
+            MCSymbolRefExpr::create(OffsetLabel, OutContext);
+        const MCExpr *CaleeExpr =
+            MCSymbolRefExpr::create(Callee, OutContext);
+        OutStreamer.EmitRelocDirective
+            (*OffsetExpr,
+             Subtarget.inMicroMipsMode() ? "R_MICROMIPS_JALR" : "R_MIPS_JALR",
+             CaleeExpr, SMLoc(), *TM.getMCSubtargetInfo());
+        OutStreamer.EmitLabel(OffsetLabel);
+        return;
+      }
+    }
+  }
+}
+
 void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   MipsTargetStreamer &TS = getTargetStreamer();
   unsigned Opc = MI->getOpcode();
@@ -207,6 +243,11 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
 
+  if (EmitJalrReloc &&
+      (MI->isReturn() || MI->isCall() || MI->isIndirectBranch())) {
+    emitDirectiveRelocJalr(*MI, OutContext, TM, *OutStreamer, *Subtarget);
+  }
+
   MachineBasicBlock::const_instr_iterator I = MI->getIterator();
   MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
 
@@ -470,8 +511,7 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
 
 // Print out an operand for an inline asm expression.
 bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
-                                     unsigned AsmVariant, const char *ExtraCode,
-                                     raw_ostream &O) {
+                                     const char *ExtraCode, raw_ostream &O) {
   // Does this asm operand have a single letter operand modifier?
   if (ExtraCode && ExtraCode[0]) {
     if (ExtraCode[1] != 0) return true; // Unknown modifier.
@@ -480,7 +520,7 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
     switch (ExtraCode[0]) {
     default:
       // See if this is a generic print operand
-      return AsmPrinter::PrintAsmOperand(MI,OpNum,AsmVariant,ExtraCode,O);
+      return AsmPrinter::PrintAsmOperand(MI, OpNum, ExtraCode, O);
     case 'X': // hex const int
       if ((MO.getType()) != MachineOperand::MO_Immediate)
         return true;
@@ -576,7 +616,7 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
 }
 
 bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                           unsigned OpNum, unsigned AsmVariant,
+                                           unsigned OpNum,
                                            const char *ExtraCode,
                                            raw_ostream &O) {
   assert(OpNum + 1 < MI->getNumOperands() && "Insufficient operands");
@@ -653,7 +693,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
       return;
 
     case MachineOperand::MO_GlobalAddress:
-      getSymbol(MO.getGlobal())->print(O, MAI);
+      PrintSymbolOperand(MO, O);
       break;
 
     case MachineOperand::MO_BlockAddress: {
@@ -772,7 +812,8 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
   // We should always emit a '.module fp=...' but binutils 2.24 does not accept
   // it. We therefore emit it when it contradicts the ABI defaults (-mfpxx or
   // -mfp64) and omit it otherwise.
-  if (ABI.IsO32() && (STI.isABI_FPXX() || STI.isFP64bit()))
+  if ((ABI.IsO32() && (STI.isABI_FPXX() || STI.isFP64bit())) ||
+      STI.useSoftFloat())
     TS.emitDirectiveModuleFP();
 
   // We should always emit a '.module [no]oddspreg' but binutils 2.24 does not
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index eb58234e3e77..173a1312812e 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -1,9 +1,8 @@
 //===- MipsAsmPrinter.h - Mips LLVM Assembly Printer -----------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -146,11 +145,9 @@ public:
   bool isBlockOnlyReachableByFallthrough(
                                    const MachineBasicBlock* MBB) const override;
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                       unsigned AsmVariant, const char *ExtraCode,
-                       raw_ostream &O) override;
+                       const char *ExtraCode, raw_ostream &O) override;
   bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
-                             unsigned AsmVariant, const char *ExtraCode,
-                             raw_ostream &O) override;
+                             const char *ExtraCode, raw_ostream &O) override;
   void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
   void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
   void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O);
diff --git a/lib/Target/Mips/MipsBranchExpansion.cpp b/lib/Target/Mips/MipsBranchExpansion.cpp
index e59267c4fd9b..1523a6c020aa 100644
--- a/lib/Target/Mips/MipsBranchExpansion.cpp
+++ b/lib/Target/Mips/MipsBranchExpansion.cpp
@@ -1,9 +1,8 @@
 //===----------------------- MipsBranchExpansion.cpp ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/Target/Mips/MipsCCState.cpp b/lib/Target/Mips/MipsCCState.cpp
index 90cb3f437bd5..ef48c850a1b8 100644
--- a/lib/Target/Mips/MipsCCState.cpp
+++ b/lib/Target/Mips/MipsCCState.cpp
@@ -1,9 +1,8 @@
 //===---- MipsCCState.cpp - CCState with Mips specific extensions ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Mips/MipsCCState.h b/lib/Target/Mips/MipsCCState.h
index 27901699480b..fd2fd97c8f13 100644
--- a/lib/Target/Mips/MipsCCState.h
+++ b/lib/Target/Mips/MipsCCState.h
@@ -1,9 +1,8 @@
 //===---- MipsCCState.h - CCState with Mips specific extensions -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Mips/MipsCallLowering.cpp b/lib/Target/Mips/MipsCallLowering.cpp
index c550fadf6632..da65689ecff5 100644
--- a/lib/Target/Mips/MipsCallLowering.cpp
+++ b/lib/Target/Mips/MipsCallLowering.cpp
@@ -1,9 +1,8 @@
 //===- MipsCallLowering.cpp -------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,6 +14,7 @@
 
 #include "MipsCallLowering.h"
 #include "MipsCCState.h"
+#include "MipsMachineFunction.h"
 #include "MipsTargetMachine.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -24,10 +24,10 @@ using namespace llvm;
 MipsCallLowering::MipsCallLowering(const MipsTargetLowering &TLI)
     : CallLowering(&TLI) {}
 
-bool MipsCallLowering::MipsHandler::assign(unsigned VReg,
-                                           const CCValAssign &VA) {
+bool MipsCallLowering::MipsHandler::assign(Register VReg, const CCValAssign &VA,
+                                           const EVT &VT) {
   if (VA.isRegLoc()) {
-    assignValueToReg(VReg, VA);
+    assignValueToReg(VReg, VA, VT);
   } else if (VA.isMemLoc()) {
     assignValueToAddress(VReg, VA);
   } else {
@@ -36,24 +36,25 @@ bool MipsCallLowering::MipsHandler::assign(unsigned VReg,
   return true;
 }
 
-bool MipsCallLowering::MipsHandler::assignVRegs(ArrayRef<unsigned> VRegs,
+bool MipsCallLowering::MipsHandler::assignVRegs(ArrayRef<Register> VRegs,
                                                 ArrayRef<CCValAssign> ArgLocs,
-                                                unsigned ArgLocsStartIndex) {
+                                                unsigned ArgLocsStartIndex,
+                                                const EVT &VT) {
   for (unsigned i = 0; i < VRegs.size(); ++i)
-    if (!assign(VRegs[i], ArgLocs[ArgLocsStartIndex + i]))
+    if (!assign(VRegs[i], ArgLocs[ArgLocsStartIndex + i], VT))
       return false;
   return true;
 }
 
 void MipsCallLowering::MipsHandler::setLeastSignificantFirst(
-    SmallVectorImpl<unsigned> &VRegs) {
+    SmallVectorImpl<Register> &VRegs) {
   if (!MIRBuilder.getMF().getDataLayout().isLittleEndian())
     std::reverse(VRegs.begin(), VRegs.end());
 }
 
 bool MipsCallLowering::MipsHandler::handle(
     ArrayRef<CCValAssign> ArgLocs, ArrayRef<CallLowering::ArgInfo> Args) {
-  SmallVector<unsigned, 4> VRegs;
+  SmallVector<Register, 4> VRegs;
   unsigned SplitLength;
   const Function &F = MIRBuilder.getMF().getFunction();
   const DataLayout &DL = F.getParent()->getDataLayout();
@@ -65,6 +66,8 @@ bool MipsCallLowering::MipsHandler::handle(
     EVT VT = TLI.getValueType(DL, Args[ArgsIndex].Ty);
     SplitLength = TLI.getNumRegistersForCallingConv(F.getContext(),
                                                     F.getCallingConv(), VT);
+    assert(Args[ArgsIndex].Regs.size() == 1 && "Can't handle multple regs yet");
+
     if (SplitLength > 1) {
       VRegs.clear();
       MVT RegisterVT = TLI.getRegisterTypeForCallingConv(
@@ -72,10 +75,11 @@ bool MipsCallLowering::MipsHandler::handle(
       for (unsigned i = 0; i < SplitLength; ++i)
         VRegs.push_back(MRI.createGenericVirtualRegister(LLT{RegisterVT}));
 
-      if (!handleSplit(VRegs, ArgLocs, ArgLocsIndex, Args[ArgsIndex].Reg))
+      if (!handleSplit(VRegs, ArgLocs, ArgLocsIndex, Args[ArgsIndex].Regs[0],
+                       VT))
         return false;
     } else {
-      if (!assign(Args[ArgsIndex].Reg, ArgLocs[ArgLocsIndex]))
+      if (!assign(Args[ArgsIndex].Regs[0], ArgLocs[ArgLocsIndex], VT))
         return false;
     }
   }
@@ -89,24 +93,25 @@ public:
       : MipsHandler(MIRBuilder, MRI) {}
 
 private:
-  void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) override;
+  void assignValueToReg(Register ValVReg, const CCValAssign &VA,
+                        const EVT &VT) override;
 
-  unsigned getStackAddress(const CCValAssign &VA,
+  Register getStackAddress(const CCValAssign &VA,
                            MachineMemOperand *&MMO) override;
 
-  void assignValueToAddress(unsigned ValVReg, const CCValAssign &VA) override;
+  void assignValueToAddress(Register ValVReg, const CCValAssign &VA) override;
 
-  bool handleSplit(SmallVectorImpl<unsigned> &VRegs,
+  bool handleSplit(SmallVectorImpl<Register> &VRegs,
                    ArrayRef<CCValAssign> ArgLocs, unsigned ArgLocsStartIndex,
-                   unsigned ArgsReg) override;
+                   Register ArgsReg, const EVT &VT) override;
 
   virtual void markPhysRegUsed(unsigned PhysReg) {
     MIRBuilder.getMBB().addLiveIn(PhysReg);
   }
 
-  void buildLoad(unsigned Val, const CCValAssign &VA) {
+  void buildLoad(Register Val, const CCValAssign &VA) {
     MachineMemOperand *MMO;
-    unsigned Addr = getStackAddress(VA, MMO);
+    Register Addr = getStackAddress(VA, MMO);
     MIRBuilder.buildLoad(Val, Addr, *MMO);
   }
 };
@@ -127,59 +132,88 @@ private:
 
 } // end anonymous namespace
 
-void IncomingValueHandler::assignValueToReg(unsigned ValVReg,
-                                            const CCValAssign &VA) {
-  unsigned PhysReg = VA.getLocReg();
-  switch (VA.getLocInfo()) {
-  case CCValAssign::LocInfo::SExt:
-  case CCValAssign::LocInfo::ZExt:
-  case CCValAssign::LocInfo::AExt: {
-    auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
-    MIRBuilder.buildTrunc(ValVReg, Copy);
-    break;
-  }
-  default:
-    MIRBuilder.buildCopy(ValVReg, PhysReg);
-    break;
+void IncomingValueHandler::assignValueToReg(Register ValVReg,
+                                            const CCValAssign &VA,
+                                            const EVT &VT) {
+  const MipsSubtarget &STI =
+      static_cast<const MipsSubtarget &>(MIRBuilder.getMF().getSubtarget());
+  Register PhysReg = VA.getLocReg();
+  if (VT == MVT::f64 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) {
+    const MipsSubtarget &STI =
+        static_cast<const MipsSubtarget &>(MIRBuilder.getMF().getSubtarget());
+
+    MIRBuilder
+        .buildInstr(STI.isFP64bit() ? Mips::BuildPairF64_64
+                                    : Mips::BuildPairF64)
+        .addDef(ValVReg)
+        .addUse(PhysReg + (STI.isLittle() ? 0 : 1))
+        .addUse(PhysReg + (STI.isLittle() ? 1 : 0))
+        .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
+                          *STI.getRegBankInfo());
+    markPhysRegUsed(PhysReg);
+    markPhysRegUsed(PhysReg + 1);
+  } else if (VT == MVT::f32 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) {
+    MIRBuilder.buildInstr(Mips::MTC1)
+        .addDef(ValVReg)
+        .addUse(PhysReg)
+        .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
+                          *STI.getRegBankInfo());
+    markPhysRegUsed(PhysReg);
+  } else {
+    switch (VA.getLocInfo()) {
+    case CCValAssign::LocInfo::SExt:
+    case CCValAssign::LocInfo::ZExt:
+    case CCValAssign::LocInfo::AExt: {
+      auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
+      MIRBuilder.buildTrunc(ValVReg, Copy);
+      break;
+    }
+    default:
+      MIRBuilder.buildCopy(ValVReg, PhysReg);
+      break;
+    }
+    markPhysRegUsed(PhysReg);
   }
-  markPhysRegUsed(PhysReg);
 }
 
-unsigned IncomingValueHandler::getStackAddress(const CCValAssign &VA,
+Register IncomingValueHandler::getStackAddress(const CCValAssign &VA,
                                                MachineMemOperand *&MMO) {
+  MachineFunction &MF = MIRBuilder.getMF();
   unsigned Size = alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
   unsigned Offset = VA.getLocMemOffset();
-  MachineFrameInfo &MFI = MIRBuilder.getMF().getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
 
   int FI = MFI.CreateFixedObject(Size, Offset, true);
   MachinePointerInfo MPO =
       MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
-  MMO = MIRBuilder.getMF().getMachineMemOperand(MPO, MachineMemOperand::MOLoad,
-                                                Size, /* Alignment */ 0);
 
-  unsigned AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 32));
+  const TargetFrameLowering *TFL = MF.getSubtarget().getFrameLowering();
+  unsigned Align = MinAlign(TFL->getStackAlignment(), Offset);
+  MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, Size, Align);
+
+  Register AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 32));
   MIRBuilder.buildFrameIndex(AddrReg, FI);
 
   return AddrReg;
 }
 
-void IncomingValueHandler::assignValueToAddress(unsigned ValVReg,
+void IncomingValueHandler::assignValueToAddress(Register ValVReg,
                                                 const CCValAssign &VA) {
   if (VA.getLocInfo() == CCValAssign::SExt ||
       VA.getLocInfo() == CCValAssign::ZExt ||
       VA.getLocInfo() == CCValAssign::AExt) {
-    unsigned LoadReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
+    Register LoadReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
     buildLoad(LoadReg, VA);
     MIRBuilder.buildTrunc(ValVReg, LoadReg);
   } else
     buildLoad(ValVReg, VA);
 }
 
-bool IncomingValueHandler::handleSplit(SmallVectorImpl<unsigned> &VRegs,
+bool IncomingValueHandler::handleSplit(SmallVectorImpl<Register> &VRegs,
                                        ArrayRef<CCValAssign> ArgLocs,
                                        unsigned ArgLocsStartIndex,
-                                       unsigned ArgsReg) {
-  if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex))
+                                       Register ArgsReg, const EVT &VT) {
+  if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex, VT))
     return false;
   setLeastSignificantFirst(VRegs);
   MIRBuilder.buildMerge(ArgsReg, VRegs);
@@ -194,78 +228,111 @@ public:
       : MipsHandler(MIRBuilder, MRI), MIB(MIB) {}
 
 private:
-  void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) override;
+  void assignValueToReg(Register ValVReg, const CCValAssign &VA,
+                        const EVT &VT) override;
 
-  unsigned getStackAddress(const CCValAssign &VA,
+  Register getStackAddress(const CCValAssign &VA,
                            MachineMemOperand *&MMO) override;
 
-  void assignValueToAddress(unsigned ValVReg, const CCValAssign &VA) override;
+  void assignValueToAddress(Register ValVReg, const CCValAssign &VA) override;
 
-  bool handleSplit(SmallVectorImpl<unsigned> &VRegs,
+  bool handleSplit(SmallVectorImpl<Register> &VRegs,
                    ArrayRef<CCValAssign> ArgLocs, unsigned ArgLocsStartIndex,
-                   unsigned ArgsReg) override;
+                   Register ArgsReg, const EVT &VT) override;
 
-  unsigned extendRegister(unsigned ValReg, const CCValAssign &VA);
+  Register extendRegister(Register ValReg, const CCValAssign &VA);
 
   MachineInstrBuilder &MIB;
 };
 } // end anonymous namespace
 
-void OutgoingValueHandler::assignValueToReg(unsigned ValVReg,
-                                            const CCValAssign &VA) {
-  unsigned PhysReg = VA.getLocReg();
-  unsigned ExtReg = extendRegister(ValVReg, VA);
-  MIRBuilder.buildCopy(PhysReg, ExtReg);
-  MIB.addUse(PhysReg, RegState::Implicit);
+void OutgoingValueHandler::assignValueToReg(Register ValVReg,
+                                            const CCValAssign &VA,
+                                            const EVT &VT) {
+  Register PhysReg = VA.getLocReg();
+  const MipsSubtarget &STI =
+      static_cast<const MipsSubtarget &>(MIRBuilder.getMF().getSubtarget());
+
+  if (VT == MVT::f64 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) {
+    MIRBuilder
+        .buildInstr(STI.isFP64bit() ? Mips::ExtractElementF64_64
+                                    : Mips::ExtractElementF64)
+        .addDef(PhysReg + (STI.isLittle() ? 1 : 0))
+        .addUse(ValVReg)
+        .addImm(1)
+        .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
+                          *STI.getRegBankInfo());
+    MIRBuilder
+        .buildInstr(STI.isFP64bit() ? Mips::ExtractElementF64_64
+                                    : Mips::ExtractElementF64)
+        .addDef(PhysReg + (STI.isLittle() ? 0 : 1))
+        .addUse(ValVReg)
+        .addImm(0)
+        .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
+                          *STI.getRegBankInfo());
+  } else if (VT == MVT::f32 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) {
+    MIRBuilder.buildInstr(Mips::MFC1)
+        .addDef(PhysReg)
+        .addUse(ValVReg)
+        .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
+                          *STI.getRegBankInfo());
+  } else {
+    Register ExtReg = extendRegister(ValVReg, VA);
+    MIRBuilder.buildCopy(PhysReg, ExtReg);
+    MIB.addUse(PhysReg, RegState::Implicit);
+  }
 }
 
-unsigned OutgoingValueHandler::getStackAddress(const CCValAssign &VA,
+Register OutgoingValueHandler::getStackAddress(const CCValAssign &VA,
                                                MachineMemOperand *&MMO) {
+  MachineFunction &MF = MIRBuilder.getMF();
+  const TargetFrameLowering *TFL = MF.getSubtarget().getFrameLowering();
+
   LLT p0 = LLT::pointer(0, 32);
   LLT s32 = LLT::scalar(32);
-  unsigned SPReg = MRI.createGenericVirtualRegister(p0);
-  MIRBuilder.buildCopy(SPReg, Mips::SP);
+  Register SPReg = MRI.createGenericVirtualRegister(p0);
+  MIRBuilder.buildCopy(SPReg, Register(Mips::SP));
 
-  unsigned OffsetReg = MRI.createGenericVirtualRegister(s32);
+  Register OffsetReg = MRI.createGenericVirtualRegister(s32);
   unsigned Offset = VA.getLocMemOffset();
   MIRBuilder.buildConstant(OffsetReg, Offset);
 
-  unsigned AddrReg = MRI.createGenericVirtualRegister(p0);
+  Register AddrReg = MRI.createGenericVirtualRegister(p0);
   MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
 
   MachinePointerInfo MPO =
       MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
   unsigned Size = alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
-  MMO = MIRBuilder.getMF().getMachineMemOperand(MPO, MachineMemOperand::MOStore,
-                                                Size, /* Alignment */ 0);
+  unsigned Align = MinAlign(TFL->getStackAlignment(), Offset);
+  MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, Size, Align);
 
   return AddrReg;
 }
 
-void OutgoingValueHandler::assignValueToAddress(unsigned ValVReg,
+void OutgoingValueHandler::assignValueToAddress(Register ValVReg,
                                                 const CCValAssign &VA) {
   MachineMemOperand *MMO;
-  unsigned Addr = getStackAddress(VA, MMO);
-  unsigned ExtReg = extendRegister(ValVReg, VA);
+  Register Addr = getStackAddress(VA, MMO);
+  Register ExtReg = extendRegister(ValVReg, VA);
   MIRBuilder.buildStore(ExtReg, Addr, *MMO);
 }
 
-unsigned OutgoingValueHandler::extendRegister(unsigned ValReg,
+Register OutgoingValueHandler::extendRegister(Register ValReg,
                                               const CCValAssign &VA) {
   LLT LocTy{VA.getLocVT()};
   switch (VA.getLocInfo()) {
   case CCValAssign::SExt: {
-    unsigned ExtReg = MRI.createGenericVirtualRegister(LocTy);
+    Register ExtReg = MRI.createGenericVirtualRegister(LocTy);
     MIRBuilder.buildSExt(ExtReg, ValReg);
     return ExtReg;
   }
   case CCValAssign::ZExt: {
-    unsigned ExtReg = MRI.createGenericVirtualRegister(LocTy);
+    Register ExtReg = MRI.createGenericVirtualRegister(LocTy);
     MIRBuilder.buildZExt(ExtReg, ValReg);
     return ExtReg;
   }
   case CCValAssign::AExt: {
-    unsigned ExtReg = MRI.createGenericVirtualRegister(LocTy);
+    Register ExtReg = MRI.createGenericVirtualRegister(LocTy);
     MIRBuilder.buildAnyExt(ExtReg, ValReg);
     return ExtReg;
   }
@@ -278,13 +345,13 @@ unsigned OutgoingValueHandler::extendRegister(unsigned ValReg,
   llvm_unreachable("unable to extend register");
 }
 
-bool OutgoingValueHandler::handleSplit(SmallVectorImpl<unsigned> &VRegs,
+bool OutgoingValueHandler::handleSplit(SmallVectorImpl<Register> &VRegs,
                                        ArrayRef<CCValAssign> ArgLocs,
                                        unsigned ArgLocsStartIndex,
-                                       unsigned ArgsReg) {
+                                       Register ArgsReg, const EVT &VT) {
   MIRBuilder.buildUnmerge(VRegs, ArgsReg);
   setLeastSignificantFirst(VRegs);
-  if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex))
+  if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex, VT))
     return false;
 
   return true;
@@ -295,6 +362,8 @@ static bool isSupportedType(Type *T) {
     return true;
   if (T->isPointerTy())
     return true;
+  if (T->isFloatingPointTy())
+    return true;
   return false;
 }
 
@@ -330,7 +399,7 @@ static void setLocInfo(SmallVectorImpl<CCValAssign> &ArgLocs,
 
 bool MipsCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
                                    const Value *Val,
-                                   ArrayRef<unsigned> VRegs) const {
+                                   ArrayRef<Register> VRegs) const {
 
   MachineInstrBuilder Ret = MIRBuilder.buildInstrNoInsert(Mips::RetRA);
 
@@ -376,9 +445,9 @@ bool MipsCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
   return true;
 }
 
-bool MipsCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
-                                            const Function &F,
-                                            ArrayRef<unsigned> VRegs) const {
+bool MipsCallLowering::lowerFormalArguments(
+    MachineIRBuilder &MIRBuilder, const Function &F,
+    ArrayRef<ArrayRef<Register>> VRegs) const {
 
   // Quick exit if there aren't any args.
   if (F.arg_empty())
@@ -444,7 +513,8 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
     if (Arg.Flags.isByVal() || Arg.Flags.isSRet())
       return false;
   }
-  if (OrigRet.Reg && !isSupportedType(OrigRet.Ty))
+
+  if (OrigRet.Regs[0] && !isSupportedType(OrigRet.Ty))
     return false;
 
   MachineFunction &MF = MIRBuilder.getMF();
@@ -457,14 +527,22 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   MachineInstrBuilder CallSeqStart =
       MIRBuilder.buildInstr(Mips::ADJCALLSTACKDOWN);
 
-  // FIXME: Add support for pic calling sequences, long call sequences for O32,
-  //       N32 and N64. First handle the case when Callee.isReg().
-  if (Callee.isReg())
-    return false;
+  const bool IsCalleeGlobalPIC =
+      Callee.isGlobal() && TM.isPositionIndependent();
 
-  MachineInstrBuilder MIB = MIRBuilder.buildInstrNoInsert(Mips::JAL);
+  MachineInstrBuilder MIB = MIRBuilder.buildInstrNoInsert(
+      Callee.isReg() || IsCalleeGlobalPIC ? Mips::JALRPseudo : Mips::JAL);
   MIB.addDef(Mips::SP, RegState::Implicit);
-  MIB.add(Callee);
+  if (IsCalleeGlobalPIC) {
+    Register CalleeReg =
+        MF.getRegInfo().createGenericVirtualRegister(LLT::pointer(0, 32));
+    MachineInstr *CalleeGlobalValue =
+        MIRBuilder.buildGlobalValue(CalleeReg, Callee.getGlobal());
+    if (!Callee.getGlobal()->hasLocalLinkage())
+      CalleeGlobalValue->getOperand(1).setTargetFlags(MipsII::MO_GOT_CALL);
+    MIB.addUse(CalleeReg);
+  } else
+    MIB.add(Callee);
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   MIB.addRegMask(TRI->getCallPreservedMask(MF, F.getCallingConv()));
 
@@ -507,10 +585,21 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   NextStackOffset = alignTo(NextStackOffset, StackAlignment);
   CallSeqStart.addImm(NextStackOffset).addImm(0);
 
+  if (IsCalleeGlobalPIC) {
+    MIRBuilder.buildCopy(
+      Register(Mips::GP),
+      MF.getInfo<MipsFunctionInfo>()->getGlobalBaseRegForGlobalISel());
+    MIB.addDef(Mips::GP, RegState::Implicit);
+  }
   MIRBuilder.insertInstr(MIB);
+  if (MIB->getOpcode() == Mips::JALRPseudo) {
+    const MipsSubtarget &STI =
+        static_cast<const MipsSubtarget &>(MIRBuilder.getMF().getSubtarget());
+    MIB.constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
+                         *STI.getRegBankInfo());
+  }
 
-  if (OrigRet.Reg) {
-
+  if (OrigRet.Regs[0]) {
     ArgInfos.clear();
     SmallVector<unsigned, 8> OrigRetIndices;
 
diff --git a/lib/Target/Mips/MipsCallLowering.h b/lib/Target/Mips/MipsCallLowering.h
index 9916b04ef50c..11c2d53ad35d 100644
--- a/lib/Target/Mips/MipsCallLowering.h
+++ b/lib/Target/Mips/MipsCallLowering.h
@@ -1,9 +1,8 @@
 //===- MipsCallLowering.h ---------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -35,37 +34,39 @@ public:
                 ArrayRef<CallLowering::ArgInfo> Args);
 
   protected:
-    bool assignVRegs(ArrayRef<unsigned> VRegs, ArrayRef<CCValAssign> ArgLocs,
-                     unsigned Index);
+    bool assignVRegs(ArrayRef<Register> VRegs, ArrayRef<CCValAssign> ArgLocs,
+                     unsigned ArgLocsStartIndex, const EVT &VT);
 
-    void setLeastSignificantFirst(SmallVectorImpl<unsigned> &VRegs);
+    void setLeastSignificantFirst(SmallVectorImpl<Register> &VRegs);
 
     MachineIRBuilder &MIRBuilder;
     MachineRegisterInfo &MRI;
 
   private:
-    bool assign(unsigned VReg, const CCValAssign &VA);
+    bool assign(Register VReg, const CCValAssign &VA, const EVT &VT);
 
-    virtual unsigned getStackAddress(const CCValAssign &VA,
+    virtual Register getStackAddress(const CCValAssign &VA,
                                      MachineMemOperand *&MMO) = 0;
 
-    virtual void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) = 0;
+    virtual void assignValueToReg(Register ValVReg, const CCValAssign &VA,
+                                  const EVT &VT) = 0;
 
-    virtual void assignValueToAddress(unsigned ValVReg,
+    virtual void assignValueToAddress(Register ValVReg,
                                       const CCValAssign &VA) = 0;
 
-    virtual bool handleSplit(SmallVectorImpl<unsigned> &VRegs,
+    virtual bool handleSplit(SmallVectorImpl<Register> &VRegs,
                              ArrayRef<CCValAssign> ArgLocs,
-                             unsigned ArgLocsStartIndex, unsigned ArgsReg) = 0;
+                             unsigned ArgLocsStartIndex, Register ArgsReg,
+                             const EVT &VT) = 0;
   };
 
   MipsCallLowering(const MipsTargetLowering &TLI);
 
   bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
-                   ArrayRef<unsigned> VRegs) const override;
+                   ArrayRef<Register> VRegs) const override;
 
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
-                            ArrayRef<unsigned> VRegs) const override;
+                            ArrayRef<ArrayRef<Register>> VRegs) const override;
 
   bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
                  const MachineOperand &Callee, const ArgInfo &OrigRet,
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index b5df78f89a6b..88236d8e9abd 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -1,9 +1,8 @@
 //===-- MipsCallingConv.td - Calling Conventions for Mips --*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This describes the calling conventions for Mips architecture.
diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td
index 0d7e3e200b5f..5affbcbc2101 100644
--- a/lib/Target/Mips/MipsCondMov.td
+++ b/lib/Target/Mips/MipsCondMov.td
@@ -1,9 +1,8 @@
 //===-- MipsCondMov.td - Describe Mips Conditional Moves --*- tablegen -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -110,11 +109,11 @@ let AdditionalPredicates = [NotInMicroMips] in {
 
   let isCodeGenOnly = 1 in {
     def MOVZ_I_I64   : CMov_I_I_FT<"movz", GPR32Opnd, GPR64Opnd, II_MOVZ>,
-                       ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6;
+                       ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
     def MOVZ_I64_I   : CMov_I_I_FT<"movz", GPR64Opnd, GPR32Opnd, II_MOVZ>,
-                       ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6;
+                       ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
     def MOVZ_I64_I64 : CMov_I_I_FT<"movz", GPR64Opnd, GPR64Opnd, II_MOVZ>,
-                       ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6;
+                       ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
   }
 
   def MOVN_I_I       : MMRel, CMov_I_I_FT<"movn", GPR32Opnd, GPR32Opnd, II_MOVN>,
@@ -122,11 +121,11 @@ let AdditionalPredicates = [NotInMicroMips] in {
 
   let isCodeGenOnly = 1 in {
     def MOVN_I_I64   : CMov_I_I_FT<"movn", GPR32Opnd, GPR64Opnd, II_MOVN>,
-                       ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6;
+                       ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
     def MOVN_I64_I   : CMov_I_I_FT<"movn", GPR64Opnd, GPR32Opnd, II_MOVN>,
-                       ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6;
+                       ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
     def MOVN_I64_I64 : CMov_I_I_FT<"movn", GPR64Opnd, GPR64Opnd, II_MOVN>,
-                       ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6;
+                       ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
   }
   def MOVZ_I_S : MMRel, CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32Opnd, II_MOVZ_S>,
                  CMov_I_F_FM<18, 16>, INSN_MIPS4_32_NOT_32R6_64R6;
@@ -156,9 +155,11 @@ let AdditionalPredicates = [NotInMicroMips] in {
                      CMov_I_F_FM<19, 17>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
     let isCodeGenOnly = 1 in {
       def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", GPR64Opnd, FGR64Opnd, II_MOVZ_D>,
-                         CMov_I_F_FM<18, 17>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
+                         CMov_I_F_FM<18, 17>,
+                         INSN_MIPS4_32_NOT_32R6_64R6, GPR_64, FGR_64;
       def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", GPR64Opnd, FGR64Opnd, II_MOVN_D>,
-                         CMov_I_F_FM<19, 17>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
+                         CMov_I_F_FM<19, 17>,
+                         INSN_MIPS4_32_NOT_32R6_64R6, GPR_64, FGR_64;
     }
   }
 
@@ -262,7 +263,7 @@ let AdditionalPredicates = [NotInMicroMips] in {
 }
 // For targets that don't have conditional-move instructions
 // we have to match SELECT nodes with pseudo instructions.
-let usesCustomInserter = 1 in {
+let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
   class Select_Pseudo<RegisterOperand RC> :
     PseudoSE<(outs RC:$dst), (ins GPR32Opnd:$cond, RC:$T, RC:$F),
             [(set RC:$dst, (select GPR32Opnd:$cond, RC:$T, RC:$F))]>,
@@ -297,7 +298,7 @@ def PseudoSELECTFP_F_S : SelectFP_Pseudo_F<FGR32Opnd>;
 def PseudoSELECTFP_F_D32 : SelectFP_Pseudo_F<AFGR64Opnd>, FGR_32;
 def PseudoSELECTFP_F_D64 : SelectFP_Pseudo_F<FGR64Opnd>, FGR_64;
 
-let usesCustomInserter = 1 in {
+let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
 class D_SELECT_CLASS<RegisterOperand RC> :
   PseudoSE<(outs RC:$dst1, RC:$dst2),
            (ins GPR32Opnd:$cond, RC:$a1, RC:$a2, RC:$b1, RC:$b2), []>,
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
index 744523cc6cb9..eea28df7eda1 100644
--- a/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -1,9 +1,8 @@
 //===- MipsConstantIslandPass.cpp - Emit Pc Relative loads ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -842,9 +841,7 @@ void MipsConstantIslands::updateForInsertedWaterBlock
 
   // Next, update WaterList.  Specifically, we need to add NewMBB as having
   // available water after it.
-  water_iterator IP =
-    std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
-                     CompareMBBNumbers);
+  water_iterator IP = llvm::lower_bound(WaterList, NewBB, CompareMBBNumbers);
   WaterList.insert(IP, NewBB);
 }
 
@@ -894,9 +891,7 @@ MipsConstantIslands::splitBlockBeforeInstr(MachineInstr &MI) {
   // available water after it (but not if it's already there, which happens
   // when splitting before a conditional branch that is followed by an
   // unconditional branch - in that case we want to insert NewBB).
-  water_iterator IP =
-    std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
-                     CompareMBBNumbers);
+  water_iterator IP = llvm::lower_bound(WaterList, OrigBB, CompareMBBNumbers);
   MachineBasicBlock* WaterBB = *IP;
   if (WaterBB == OrigBB)
     WaterList.insert(std::next(IP), NewBB);
diff --git a/lib/Target/Mips/MipsDSPInstrFormats.td b/lib/Target/Mips/MipsDSPInstrFormats.td
index 5f0763f5ea46..6f062d0f3c25 100644
--- a/lib/Target/Mips/MipsDSPInstrFormats.td
+++ b/lib/Target/Mips/MipsDSPInstrFormats.td
@@ -1,9 +1,8 @@
 //===- MipsDSPInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td
index b9824220b558..daca8b907081 100644
--- a/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -1,9 +1,8 @@
 //===- MipsDSPInstrInfo.td - DSP ASE instructions -*- tablegen ------------*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -516,6 +515,7 @@ class MTHI_DESC_BASE<string instr_asm, RegisterOperand RO, InstrItinClass itin>
 
 class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> :
   MipsPseudo<(outs GPR32Opnd:$dst), (ins), [(set GPR32Opnd:$dst, (OpNode))]> {
+  bit hasNoSchedulingInfo = 1;
   bit usesCustomInserter = 1;
 }
 
@@ -1314,7 +1314,9 @@ def PseudoCMPU_LE_QB : PseudoCMP<CMPU_LE_QB>;
 def PseudoPICK_PH : PseudoPICK<PICK_PH>;
 def PseudoPICK_QB : PseudoPICK<PICK_QB>;
 
-def PseudoMTLOHI_DSP : PseudoMTLOHI<ACC64DSP, GPR32>;
+let AdditionalPredicates = [HasDSP] in {
+  def PseudoMTLOHI_DSP : PseudoMTLOHI<ACC64DSP, GPR32>;
+}
 
 // Patterns.
 class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> :
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index e3823e0dfdb8..aa07dac86828 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -1,9 +1,8 @@
 //===- MipsDelaySlotFiller.cpp - Mips Delay Slot Filler -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -493,14 +492,12 @@ MemDefsUses::MemDefsUses(const DataLayout &DL, const MachineFrameInfo *MFI_)
 
 bool MemDefsUses::hasHazard_(const MachineInstr &MI) {
   bool HasHazard = false;
-  SmallVector<ValueType, 4> Objs;
 
   // Check underlying object list.
+  SmallVector<ValueType, 4> Objs;
   if (getUnderlyingObjects(MI, Objs)) {
-    for (SmallVectorImpl<ValueType>::const_iterator I = Objs.begin();
-         I != Objs.end(); ++I)
-      HasHazard |= updateDefsUses(*I, MI.mayStore());
-
+    for (ValueType VT : Objs)
+      HasHazard |= updateDefsUses(VT, MI.mayStore());
     return HasHazard;
   }
 
@@ -526,33 +523,32 @@ bool MemDefsUses::updateDefsUses(ValueType V, bool MayStore) {
 bool MemDefsUses::
 getUnderlyingObjects(const MachineInstr &MI,
                      SmallVectorImpl<ValueType> &Objects) const {
-  if (!MI.hasOneMemOperand() ||
-      (!(*MI.memoperands_begin())->getValue() &&
-       !(*MI.memoperands_begin())->getPseudoValue()))
+  if (!MI.hasOneMemOperand())
     return false;
 
-  if (const PseudoSourceValue *PSV =
-      (*MI.memoperands_begin())->getPseudoValue()) {
+  auto & MMO = **MI.memoperands_begin();
+
+  if (const PseudoSourceValue *PSV = MMO.getPseudoValue()) {
     if (!PSV->isAliased(MFI))
       return false;
     Objects.push_back(PSV);
     return true;
   }
 
-  const Value *V = (*MI.memoperands_begin())->getValue();
+  if (const Value *V = MMO.getValue()) {
+    SmallVector<const Value *, 4> Objs;
+    GetUnderlyingObjects(V, Objs, DL);
 
-  SmallVector<Value *, 4> Objs;
-  GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
+    for (const Value *UValue : Objs) {
+      if (!isIdentifiedObject(V))
+        return false;
 
-  for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), E = Objs.end();
-       I != E; ++I) {
-    if (!isIdentifiedObject(V))
-      return false;
-
-    Objects.push_back(*I);
+      Objects.push_back(UValue);
+    }
+    return true;
   }
 
-  return true;
+  return false;
 }
 
 // Replace Branch with the compact branch instruction.
@@ -726,6 +722,7 @@ bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin,
     // but we don't have enough information to make that decision.
      if (InMicroMipsMode && TII->getInstSizeInBytes(*CurrI) == 2 &&
         (Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch ||
+         Opcode == Mips::PseudoIndirectBranch_MM ||
          Opcode == Mips::PseudoReturn || Opcode == Mips::TAILCALL))
       continue;
      // Instructions LWP/SWP and MOVEP should not be in a delay slot as that
diff --git a/lib/Target/Mips/MipsEVAInstrFormats.td b/lib/Target/Mips/MipsEVAInstrFormats.td
index 61785d0e891a..9820e4dcfc88 100644
--- a/lib/Target/Mips/MipsEVAInstrFormats.td
+++ b/lib/Target/Mips/MipsEVAInstrFormats.td
@@ -1,9 +1,8 @@
 //===- MipsEVAInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MipsEVAInstrInfo.td b/lib/Target/Mips/MipsEVAInstrInfo.td
index ff54b1f17877..73cca8cfa5d9 100644
--- a/lib/Target/Mips/MipsEVAInstrInfo.td
+++ b/lib/Target/Mips/MipsEVAInstrInfo.td
@@ -1,9 +1,8 @@
 //===- MipsEVAInstrInfo.td - EVA ASE instructions -*- tablegen ------------*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MipsExpandPseudo.cpp b/lib/Target/Mips/MipsExpandPseudo.cpp
index acf66d1fb1b2..65d84a6c44a0 100644
--- a/lib/Target/Mips/MipsExpandPseudo.cpp
+++ b/lib/Target/Mips/MipsExpandPseudo.cpp
@@ -1,9 +1,8 @@
 //===-- MipsExpandPseudoInsts.cpp - Expand pseudo instructions ------------===//
 //
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 22ade31a72cd..123d3cc242f0 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -1,9 +1,8 @@
 //===- MipsFastISel.cpp - Mips FastISel implementation --------------------===//
 //
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -56,6 +55,7 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSymbol.h"
@@ -75,6 +75,8 @@
 
 using namespace llvm;
 
+extern cl::opt<bool> EmitJalrReloc;
+
 namespace {
 
 class MipsFastISel final : public FastISel {
@@ -951,21 +953,34 @@ bool MipsFastISel::selectBranch(const Instruction *I) {
   //
   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
-  // For now, just try the simplest case where it's fed by a compare.
+
+  // Fold the common case of a conditional branch with a comparison
+  // in the same block.
+  unsigned ZExtCondReg = 0;
   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
-    MVT CIMVT =
-        TLI.getValueType(DL, CI->getOperand(0)->getType(), true).getSimpleVT();
-    if (CIMVT == MVT::i1)
+    if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
+      ZExtCondReg = createResultReg(&Mips::GPR32RegClass);
+      if (!emitCmp(ZExtCondReg, CI))
+        return false;
+    }
+  }
+
+  // For the general case, we need to mask with 1.
+  if (ZExtCondReg == 0) {
+    unsigned CondReg = getRegForValue(BI->getCondition());
+    if (CondReg == 0)
       return false;
 
-    unsigned CondReg = getRegForValue(CI);
-    BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ))
-        .addReg(CondReg)
-        .addMBB(TBB);
-    finishCondBranch(BI->getParent(), TBB, FBB);
-    return true;
+    ZExtCondReg = emitIntExt(MVT::i1, CondReg, MVT::i32, true);
+    if (ZExtCondReg == 0)
+      return false;
   }
-  return false;
+
+  BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ))
+      .addReg(ZExtCondReg)
+      .addMBB(TBB);
+  finishCondBranch(BI->getParent(), TBB, FBB);
+  return true;
 }
 
 bool MipsFastISel::selectCmp(const Instruction *I) {
@@ -1551,6 +1566,16 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
 
   CLI.Call = MIB;
 
+  if (EmitJalrReloc && !Subtarget->inMips16Mode()) {
+    // Attach callee address to the instruction, let asm printer emit
+    // .reloc R_MIPS_JALR.
+    if (Symbol)
+      MIB.addSym(Symbol, MipsII::MO_JALR);
+    else
+      MIB.addSym(FuncInfo.MF->getContext().getOrCreateSymbol(
+	                   Addr.getGlobalValue()->getName()), MipsII::MO_JALR);
+  }
+
   // Finish off the call including any return values.
   return finishCall(CLI, RetVT, NumBytes);
 }
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 27a85970da6f..8d5eabf59b71 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -1,9 +1,8 @@
 //===-- MipsFrameLowering.cpp - Mips Frame Information --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index 0ead56eddd2f..0537cfd1cb30 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -1,9 +1,8 @@
 //===-- MipsFrameLowering.h - Define frame lowering for Mips ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index f99f3a1b3e0a..9ba54d6bb73c 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
 //===-- MipsISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Mips --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
index 09003459d180..bae3bbf71f3b 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -1,9 +1,8 @@
 //===---- MipsISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 8c2a364cdfa9..0ff09007da4b 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -1,9 +1,8 @@
 //===- MipsISelLowering.cpp - Mips DAG Lowering Implementation ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "MipsISelLowering.h"
-#include "InstPrinter/MipsInstPrinter.h"
 #include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsInstPrinter.h"
 #include "MCTargetDesc/MipsMCTargetDesc.h"
 #include "MipsCCState.h"
 #include "MipsInstrInfo.h"
@@ -57,6 +56,7 @@
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CodeGen.h"
@@ -91,6 +91,8 @@ NoZeroDivCheck("mno-check-zero-division", cl::Hidden,
                cl::desc("MIPS: Don't trap on integer division by zero."),
                cl::init(false));
 
+extern cl::opt<bool> EmitJalrReloc;
+
 static const MCPhysReg Mips64DPRegs[8] = {
   Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64,
   Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64
@@ -362,6 +364,11 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
   setOperationAction(ISD::FCOPYSIGN,          MVT::f64,   Custom);
   setOperationAction(ISD::FP_TO_SINT,         MVT::i32,   Custom);
 
+  if (!(TM.Options.NoNaNsFPMath || Subtarget.inAbs2008Mode())) {
+    setOperationAction(ISD::FABS, MVT::f32, Custom);
+    setOperationAction(ISD::FABS, MVT::f64, Custom);
+  }
+
   if (Subtarget.isGP64bit()) {
     setOperationAction(ISD::GlobalAddress,      MVT::i64,   Custom);
     setOperationAction(ISD::BlockAddress,       MVT::i64,   Custom);
@@ -1183,14 +1190,22 @@ bool MipsTargetLowering::isCheapToSpeculateCtlz() const {
   return Subtarget.hasMips32();
 }
 
+bool MipsTargetLowering::shouldFoldConstantShiftPairToMask(
+    const SDNode *N, CombineLevel Level) const {
+  if (N->getOperand(0).getValueType().isVector())
+    return false;
+  return true;
+}
+
 void
 MipsTargetLowering::LowerOperationWrapper(SDNode *N,
                                           SmallVectorImpl<SDValue> &Results,
                                           SelectionDAG &DAG) const {
   SDValue Res = LowerOperation(SDValue(N, 0), DAG);
 
-  for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I)
-    Results.push_back(Res.getValue(I));
+  if (Res)
+    for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I)
+      Results.push_back(Res.getValue(I));
 }
 
 void
@@ -1216,6 +1231,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
   case ISD::VASTART:            return lowerVASTART(Op, DAG);
   case ISD::VAARG:              return lowerVAARG(Op, DAG);
   case ISD::FCOPYSIGN:          return lowerFCOPYSIGN(Op, DAG);
+  case ISD::FABS:               return lowerFABS(Op, DAG);
   case ISD::FRAMEADDR:          return lowerFRAMEADDR(Op, DAG);
   case ISD::RETURNADDR:         return lowerRETURNADDR(Op, DAG);
   case ISD::EH_RETURN:          return lowerEH_RETURN(Op, DAG);
@@ -1709,7 +1725,7 @@ MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI,
 
   assert((MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I32 ||
           MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I64) &&
-         "Unsupported atomic psseudo for EmitAtomicCmpSwap.");
+         "Unsupported atomic pseudo for EmitAtomicCmpSwap.");
 
   const unsigned Size = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I32 ? 4 : 8;
 
@@ -1735,12 +1751,10 @@ MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI,
   // after fast register allocation, the spills will end up outside of the
   // blocks that their values are defined in, causing livein errors.
 
-  unsigned DestCopy = MRI.createVirtualRegister(MRI.getRegClass(Dest));
   unsigned PtrCopy = MRI.createVirtualRegister(MRI.getRegClass(Ptr));
   unsigned OldValCopy = MRI.createVirtualRegister(MRI.getRegClass(OldVal));
   unsigned NewValCopy = MRI.createVirtualRegister(MRI.getRegClass(NewVal));
 
-  BuildMI(*BB, II, DL, TII->get(Mips::COPY), DestCopy).addReg(Dest);
   BuildMI(*BB, II, DL, TII->get(Mips::COPY), PtrCopy).addReg(Ptr);
   BuildMI(*BB, II, DL, TII->get(Mips::COPY), OldValCopy).addReg(OldVal);
   BuildMI(*BB, II, DL, TII->get(Mips::COPY), NewValCopy).addReg(NewVal);
@@ -2293,11 +2307,79 @@ MipsTargetLowering::lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   return lowerFCOPYSIGN32(Op, DAG, Subtarget.hasExtractInsert());
 }
 
+static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG,
+                           bool HasExtractInsert) {
+  SDLoc DL(Op);
+  SDValue Res, Const1 = DAG.getConstant(1, DL, MVT::i32);
+
+  // If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it
+  // to i32.
+  SDValue X = (Op.getValueType() == MVT::f32)
+                  ? DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(0))
+                  : DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
+                                Op.getOperand(0), Const1);
+
+  // Clear MSB.
+  if (HasExtractInsert)
+    Res = DAG.getNode(MipsISD::Ins, DL, MVT::i32,
+                      DAG.getRegister(Mips::ZERO, MVT::i32),
+                      DAG.getConstant(31, DL, MVT::i32), Const1, X);
+  else {
+    // TODO: Provide DAG patterns which transform (and x, cst)
+    // back to a (shl (srl x (clz cst)) (clz cst)) sequence.
+    SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i32, X, Const1);
+    Res = DAG.getNode(ISD::SRL, DL, MVT::i32, SllX, Const1);
+  }
+
+  if (Op.getValueType() == MVT::f32)
+    return DAG.getNode(ISD::BITCAST, DL, MVT::f32, Res);
+
+  // FIXME: For mips32r2, the sequence of (BuildPairF64 (ins (ExtractElementF64
+  // Op 1), $zero, 31 1) (ExtractElementF64 Op 0)) and the Op has one use, we
+  // should be able to drop the usage of mfc1/mtc1 and rewrite the register in
+  // place.
+  SDValue LowX =
+      DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
+                  DAG.getConstant(0, DL, MVT::i32));
+  return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
+}
+
+static SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG,
+                           bool HasExtractInsert) {
+  SDLoc DL(Op);
+  SDValue Res, Const1 = DAG.getConstant(1, DL, MVT::i32);
+
+  // Bitcast to integer node.
+  SDValue X = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(0));
+
+  // Clear MSB.
+  if (HasExtractInsert)
+    Res = DAG.getNode(MipsISD::Ins, DL, MVT::i64,
+                      DAG.getRegister(Mips::ZERO_64, MVT::i64),
+                      DAG.getConstant(63, DL, MVT::i32), Const1, X);
+  else {
+    SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i64, X, Const1);
+    Res = DAG.getNode(ISD::SRL, DL, MVT::i64, SllX, Const1);
+  }
+
+  return DAG.getNode(ISD::BITCAST, DL, MVT::f64, Res);
+}
+
+SDValue MipsTargetLowering::lowerFABS(SDValue Op, SelectionDAG &DAG) const {
+  if ((ABI.IsN32() || ABI.IsN64()) && (Op.getValueType() == MVT::f64))
+    return lowerFABS64(Op, DAG, Subtarget.hasExtractInsert());
+
+  return lowerFABS32(Op, DAG, Subtarget.hasExtractInsert());
+}
+
 SDValue MipsTargetLowering::
 lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   // check the depth
-  assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
-         "Frame address can only be determined for current frame.");
+  if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) {
+    DAG.getContext()->emitError(
+        "return address can be determined only for current frame");
+    return SDValue();
+  }
 
   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
   MFI.setFrameAddressIsTaken(true);
@@ -2314,8 +2396,11 @@ SDValue MipsTargetLowering::lowerRETURNADDR(SDValue Op,
     return SDValue();
 
   // check the depth
-  assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
-         "Return address can be determined only for current frame.");
+  if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) {
+    DAG.getContext()->emitError(
+        "return address can be determined only for current frame");
+    return SDValue();
+  }
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -2879,6 +2964,54 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
     Ops.push_back(InFlag);
 }
 
+void MipsTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
+                                                       SDNode *Node) const {
+  switch (MI.getOpcode()) {
+    default:
+      return;
+    case Mips::JALR:
+    case Mips::JALRPseudo:
+    case Mips::JALR64:
+    case Mips::JALR64Pseudo:
+    case Mips::JALR16_MM:
+    case Mips::JALRC16_MMR6:
+    case Mips::TAILCALLREG:
+    case Mips::TAILCALLREG64:
+    case Mips::TAILCALLR6REG:
+    case Mips::TAILCALL64R6REG:
+    case Mips::TAILCALLREG_MM:
+    case Mips::TAILCALLREG_MMR6: {
+      if (!EmitJalrReloc ||
+          Subtarget.inMips16Mode() ||
+          !isPositionIndependent() ||
+          Node->getNumOperands() < 1 ||
+          Node->getOperand(0).getNumOperands() < 2) {
+        return;
+      }
+      // We are after the callee address, set by LowerCall().
+      // If added to MI, asm printer will emit .reloc R_MIPS_JALR for the
+      // symbol.
+      const SDValue TargetAddr = Node->getOperand(0).getOperand(1);
+      StringRef Sym;
+      if (const GlobalAddressSDNode *G =
+              dyn_cast_or_null<const GlobalAddressSDNode>(TargetAddr)) {
+        Sym = G->getGlobal()->getName();
+      }
+      else if (const ExternalSymbolSDNode *ES =
+                   dyn_cast_or_null<const ExternalSymbolSDNode>(TargetAddr)) {
+        Sym = ES->getSymbol();
+      }
+
+      if (Sym.empty())
+        return;
+
+      MachineFunction *MF = MI.getParent()->getParent();
+      MCSymbol *S = MF->getContext().getOrCreateSymbol(Sym);
+      MI.addOperand(MachineOperand::CreateMCSymbol(S, MipsII::MO_JALR));
+    }
+  }
+}
+
 /// LowerCall - functions arguments are copied from virtual regs to
 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
 SDValue
@@ -2930,7 +3063,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // the maximum out going argument area (including the reserved area), and
   // preallocates the stack space on entrance to the caller.
   //
-  // FIXME: We should do the same for efficency and space.
+  // FIXME: We should do the same for efficiency and space.
 
   // Note: The check on the calling convention below must match
   //       MipsABIInfo::GetCalleeAllocdArgSizeInBytes().
@@ -4007,18 +4140,18 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   return false;
 }
 
-EVT MipsTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
-                                            unsigned SrcAlign,
-                                            bool IsMemset, bool ZeroMemset,
-                                            bool MemcpyStrSrc,
-                                            MachineFunction &MF) const {
+EVT MipsTargetLowering::getOptimalMemOpType(
+    uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+    bool ZeroMemset, bool MemcpyStrSrc,
+    const AttributeList &FuncAttributes) const {
   if (Subtarget.hasMips64())
     return MVT::i64;
 
   return MVT::i32;
 }
 
-bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+                                      bool ForCodeSize) const {
   if (VT != MVT::f32 && VT != MVT::f64)
     return false;
   if (Imm.isNegZero())
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index e043f133a09f..2db60e9801f1 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -1,9 +1,8 @@
 //===- MipsISelLowering.h - Mips DAG Lowering Interface ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -285,6 +284,8 @@ class TargetRegisterClass;
 
     bool isCheapToSpeculateCttz() const override;
     bool isCheapToSpeculateCtlz() const override;
+    bool shouldFoldConstantShiftPairToMask(const SDNode *N,
+                                           CombineLevel Level) const override;
 
     /// Return the register type for a given MVT, ensuring vectors are treated
     /// as a series of gpr sized integers.
@@ -341,6 +342,9 @@ class TargetRegisterClass;
     EmitInstrWithCustomInserter(MachineInstr &MI,
                                 MachineBasicBlock *MBB) const override;
 
+    void AdjustInstrPostInstrSelection(MachineInstr &MI,
+                                       SDNode *Node) const override;
+
     void HandleByVal(CCState *, unsigned &, unsigned) const override;
 
     unsigned getRegisterByName(const char* RegName, EVT VT,
@@ -649,9 +653,11 @@ class TargetRegisterClass;
 
     unsigned
     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
+      if (ConstraintCode == "o")
+        return InlineAsm::Constraint_o;
       if (ConstraintCode == "R")
         return InlineAsm::Constraint_R;
-      else if (ConstraintCode == "ZC")
+      if (ConstraintCode == "ZC")
         return InlineAsm::Constraint_ZC;
       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
     }
@@ -666,12 +672,13 @@ class TargetRegisterClass;
                             unsigned SrcAlign,
                             bool IsMemset, bool ZeroMemset,
                             bool MemcpyStrSrc,
-                            MachineFunction &MF) const override;
+                            const AttributeList &FuncAttributes) const override;
 
     /// isFPImmLegal - Returns true if the target can instruction select the
     /// specified FP immediate natively. If false, the legalizer will
     /// materialize the FP immediate as a load from a constant pool.
-    bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+    bool isFPImmLegal(const APFloat &Imm, EVT VT,
+                      bool ForCodeSize) const override;
 
     unsigned getJumpTableEncoding() const override;
     bool useSoftFloat() const override;
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 4cb8574e08f6..e94e107e64c2 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -1,9 +1,8 @@
 //===-- MipsInstrFPU.td - Mips FPU Instruction Information -*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -143,7 +142,7 @@ multiclass ABSS_M<string opstr, InstrItinClass Itin,
                   SDPatternOperator OpNode= null_frag> {
   def _D32 : MMRel, ABSS_FT<opstr, AFGR64Opnd, AFGR64Opnd, Itin, OpNode>,
              FGR_32;
-  def _D64 : ABSS_FT<opstr, FGR64Opnd, FGR64Opnd, Itin, OpNode>, FGR_64 {
+  def _D64 : StdMMR6Rel, ABSS_FT<opstr, FGR64Opnd, FGR64Opnd, Itin, OpNode>, FGR_64 {
     string DecoderNamespace = "MipsFP64";
   }
 }
@@ -487,7 +486,7 @@ let isPseudo = 1, isCodeGenOnly = 1 in {
   def PseudoCVT_D64_L : ABSS_FT<"", FGR64Opnd, GPR64Opnd, II_CVT>;
 }
 
-let AdditionalPredicates = [NotInMicroMips] in {
+let AdditionalPredicates = [NotInMicroMips, UseAbs] in {
   def FABS_S : MMRel, ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, II_ABS, fabs>,
                ABSS_FM<0x5, 16>, ISA_MIPS1;
   defm FABS : ABSS_M<"abs.d", II_ABS, fabs>, ABSS_FM<0x5, 17>, ISA_MIPS1;
@@ -551,12 +550,7 @@ let AdditionalPredicates = [NotInMicroMips] in {
   let isMoveReg = 1 in {
     def FMOV_S   : MMRel, ABSS_FT<"mov.s", FGR32Opnd, FGR32Opnd, II_MOV_S>,
                    ABSS_FM<0x6, 16>, ISA_MIPS1;
-    def FMOV_D32 : MMRel, ABSS_FT<"mov.d", AFGR64Opnd, AFGR64Opnd, II_MOV_D>,
-                   ABSS_FM<0x6, 17>, ISA_MIPS1, FGR_32;
-    def FMOV_D64 : ABSS_FT<"mov.d", FGR64Opnd, FGR64Opnd, II_MOV_D>,
-                   ABSS_FM<0x6, 17>, ISA_MIPS1, FGR_64 {
-                     let DecoderNamespace = "MipsFP64";
-    }
+    defm FMOV : ABSS_M<"mov.d", II_MOV_D>, ABSS_FM<0x6, 17>, ISA_MIPS1;
   } // isMoveReg
 }
 
@@ -793,6 +787,11 @@ def LoadImmDoubleFGR : MipsAsmPseudoInst<(outs StrictlyFGR64Opnd:$rd),
                                          "li.d\t$rd, $fpimm">,
                        FGR_64, HARDFLOAT;
 
+def SDC1_M1 : MipsAsmPseudoInst<(outs AFGR64Opnd:$fd),
+                                (ins mem_simm16:$addr),
+                                "s.d\t$fd, $addr">,
+              FGR_32, ISA_MIPS1, HARDFLOAT;
+
 //===----------------------------------------------------------------------===//
 // InstAliases.
 //===----------------------------------------------------------------------===//
@@ -805,6 +804,9 @@ def : MipsInstAlias
 def : MipsInstAlias
         <"s.d $fd, $addr", (SDC164 FGR64Opnd:$fd, mem_simm16:$addr), 0>,
       FGR_64, ISA_MIPS2, HARDFLOAT;
+def : MipsInstAlias
+        <"s.d $fd, $addr", (SDC1_M1 AFGR64Opnd:$fd, mem_simm16:$addr), 0>,
+      FGR_32, ISA_MIPS1, HARDFLOAT;
 
 def : MipsInstAlias
         <"l.s $fd, $addr", (LWC1 FGR32Opnd:$fd, mem_simm16:$addr), 0>,
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index ebbdcdf0df89..14f01514f33f 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -1,9 +1,8 @@
 //===-- MipsInstrFormats.td - Mips Instruction Formats -----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -146,6 +145,7 @@ class PseudoSE<dag outs, dag ins, list<dag> pattern,
 class MipsAsmPseudoInst<dag outs, dag ins, string asmstr>:
   MipsInst<outs, ins, asmstr, [], IIPseudo, Pseudo> {
   let isPseudo = 1;
+  let hasNoSchedulingInfo = 1;
   let Pattern = [];
 }
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index bfb4c775205d..fbd56206b249 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -1,9 +1,8 @@
 //===- MipsInstrInfo.cpp - Mips Instruction Information -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -578,7 +577,8 @@ unsigned MipsInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   default:
     return MI.getDesc().getSize();
-  case  TargetOpcode::INLINEASM: {       // Inline Asm: Variable size.
+  case  TargetOpcode::INLINEASM:
+  case  TargetOpcode::INLINEASM_BR: {       // Inline Asm: Variable size.
     const MachineFunction *MF = MI.getParent()->getParent();
     const char *AsmStr = MI.getOperand(0).getSymbolName();
     return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
@@ -653,6 +653,16 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
 
     MIB.addImm(0);
 
+    // If I has an MCSymbol operand (used by asm printer, to emit R_MIPS_JALR),
+    // add it to the new instruction.
+    for (unsigned J = I->getDesc().getNumOperands(), E = I->getNumOperands();
+         J < E; ++J) {
+      const MachineOperand &MO = I->getOperand(J);
+      if (MO.isMCSymbol() && (MO.getTargetFlags() & MipsII::MO_JALR))
+        MIB.addSym(MO.getMCSymbol(), MipsII::MO_JALR);
+    }
+
+
   } else {
     for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) {
       if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J)
@@ -825,7 +835,8 @@ MipsInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
     {MO_GOT_HI16,     "mips-got-hi16"},
     {MO_GOT_LO16,     "mips-got-lo16"},
     {MO_CALL_HI16,    "mips-call-hi16"},
-    {MO_CALL_LO16,    "mips-call-lo16"}
+    {MO_CALL_LO16,    "mips-call-lo16"},
+    {MO_JALR,         "mips-jalr"}
   };
   return makeArrayRef(Flags);
 }
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 9d27b8f66211..a626c0c3fdb8 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -1,9 +1,8 @@
 //===- MipsInstrInfo.h - Mips Instruction Information -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index d9398b7d6024..a4e85a38ab28 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -1,9 +1,8 @@
 //===- MipsInstrInfo.td - Target Description for Mips Target -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -221,6 +220,8 @@ def IsNotN64    :     Predicate<"!Subtarget->isABI_N64()">;
 def RelocNotPIC :     Predicate<"!TM.isPositionIndependent()">;
 def RelocPIC    :     Predicate<"TM.isPositionIndependent()">;
 def NoNaNsFPMath :    Predicate<"TM.Options.NoNaNsFPMath">;
+def UseAbs :          Predicate<"Subtarget->inAbs2008Mode() ||"
+                                "TM.Options.NoNaNsFPMath">;
 def HasStdEnc :       Predicate<"Subtarget->hasStandardEncoding()">,
                       AssemblerPredicate<"!FeatureMips16">;
 def NotDSP :          Predicate<"!Subtarget->hasDSP()">;
@@ -1623,11 +1624,15 @@ let isCall=1, hasDelaySlot=1, isCTI=1, Defs = [RA] in {
   class JumpLinkRegPseudo<RegisterOperand RO, Instruction JALRInst,
                           Register RetReg, RegisterOperand ResRO = RO>:
     PseudoSE<(outs), (ins RO:$rs), [(MipsJmpLink RO:$rs)], II_JALR>,
-    PseudoInstExpansion<(JALRInst RetReg, ResRO:$rs)>;
+    PseudoInstExpansion<(JALRInst RetReg, ResRO:$rs)> {
+    let hasPostISelHook = 1;
+  }
 
   class JumpLinkReg<string opstr, RegisterOperand RO>:
     InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
-           [], II_JALR, FrmR, opstr>;
+           [], II_JALR, FrmR, opstr> {
+    let hasPostISelHook = 1;
+  }
 
   class BGEZAL_FT<string opstr, DAGOperand opnd,
                   RegisterOperand RO> :
@@ -1646,7 +1651,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
 
   class TailCallReg<Instruction JumpInst, RegisterOperand RO> :
     PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
-    PseudoInstExpansion<(JumpInst RO:$rs)>;
+    PseudoInstExpansion<(JumpInst RO:$rs)> {
+    let hasPostISelHook = 1;
+  }
 }
 
 class BAL_BR_Pseudo<Instruction RealInst, DAGOperand opnd> :
@@ -1844,7 +1851,9 @@ class InsBase<string opstr, RegisterOperand RO, Operand PosOpnd,
 // Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*).
 class Atomic2Ops<PatFrag Op, RegisterClass DRC> :
   PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$incr),
-           [(set DRC:$dst, (Op iPTR:$ptr, DRC:$incr))]>;
+           [(set DRC:$dst, (Op iPTR:$ptr, DRC:$incr))]> {
+  let hasNoSchedulingInfo = 1;
+}
 
 class Atomic2OpsPostRA<RegisterClass RC> :
   PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$incr), []> {
@@ -1861,7 +1870,9 @@ class Atomic2OpsSubwordPostRA<RegisterClass RC> :
 // during ISelLowering, which produces the PostRA version of this instruction.
 class AtomicCmpSwap<PatFrag Op, RegisterClass DRC> :
   PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap),
-           [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>;
+           [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]> {
+  let hasNoSchedulingInfo = 1;
+}
 
 class AtomicCmpSwapPostRA<RegisterClass RC> :
   PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$cmp, RC:$swap), []> {
@@ -1876,7 +1887,6 @@ class AtomicCmpSwapSubwordPostRA<RegisterClass RC> :
   let mayStore = 1;
 }
 
-
 class LLBase<string opstr, RegisterOperand RO, DAGOperand MO = mem> :
   InstSE<(outs RO:$rt), (ins MO:$addr), !strconcat(opstr, "\t$rt, $addr"),
          [], II_LL, FrmI, opstr> {
@@ -1928,7 +1938,7 @@ let isReturn=1, isTerminator=1, isBarrier=1, hasCtrlDep=1, isCTI=1 in {
   def ERet : PseudoSE<(outs), (ins), [(MipsERet)]>;
 }
 
-let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+let Defs = [SP], Uses = [SP], hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
 def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
                                   [(callseq_start timm:$amt1, timm:$amt2)]>;
 def ADJCALLSTACKUP   : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
@@ -2004,17 +2014,25 @@ let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in {
 
 // Expands to: lui $dst, %highest/%higher/%hi/%lo($tgt - $baltgt)
 def LONG_BRANCH_LUi : PseudoSE<(outs GPR32Opnd:$dst),
-  (ins brtarget:$tgt, brtarget:$baltgt), []>;
+  (ins brtarget:$tgt, brtarget:$baltgt), []> {
+  bit hasNoSchedulingInfo = 1;
+}
 // Expands to: lui $dst, highest/%higher/%hi/%lo($tgt)
 def LONG_BRANCH_LUi2Op : PseudoSE<(outs GPR32Opnd:$dst),
-  (ins brtarget:$tgt), []>;
+  (ins brtarget:$tgt), []> {
+  bit hasNoSchedulingInfo = 1;
+}
 
 // Expands to: addiu $dst, $src, %highest/%higher/%hi/%lo($tgt - $baltgt)
 def LONG_BRANCH_ADDiu : PseudoSE<(outs GPR32Opnd:$dst),
-  (ins GPR32Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>;
+  (ins GPR32Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []> {
+  bit hasNoSchedulingInfo = 1;
+}
 // Expands to: addiu $dst, $src, %highest/%higher/%hi/%lo($tgt)
 def LONG_BRANCH_ADDiu2Op : PseudoSE<(outs GPR32Opnd:$dst),
-  (ins GPR32Opnd:$src, brtarget:$tgt), []>;
+  (ins GPR32Opnd:$src, brtarget:$tgt), []> {
+  bit hasNoSchedulingInfo = 1;
+}
 
 //===----------------------------------------------------------------------===//
 // Instruction definition
@@ -2117,7 +2135,7 @@ let AdditionalPredicates = [NotInMicroMips] in {
             LW_FM<0x28>, ISA_MIPS1;
   def SH  : Store<"sh", GPR32Opnd, truncstorei16, II_SH>, MMRel, LW_FM<0x29>,
             ISA_MIPS1;
-  def SW  : Store<"sw", GPR32Opnd, store, II_SW>, MMRel, LW_FM<0x2b>, ISA_MIPS1;
+  def SW  : StdMMR6Rel, Store<"sw", GPR32Opnd, store, II_SW>, MMRel, LW_FM<0x2b>, ISA_MIPS1;
 }
 
 /// load/store left/right
@@ -2324,12 +2342,12 @@ def SDT_MipsEHRET : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisPtrTy<1>]>;
 def MIPSehret : SDNode<"MipsISD::EH_RETURN", SDT_MipsEHRET,
                       [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 
-let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1, isCTI = 1 in {
+let Uses = [V0, V1], isTerminator = 1, isReturn = 1,
+           isBarrier = 1, isCTI = 1, hasNoSchedulingInfo = 1 in {
   def MIPSeh_return32 : MipsPseudo<(outs), (ins GPR32:$spoff, GPR32:$dst),
-                                [(MIPSehret GPR32:$spoff, GPR32:$dst)]>;
-  def MIPSeh_return64 : MipsPseudo<(outs), (ins GPR64:$spoff,
-                                                GPR64:$dst),
-                                [(MIPSehret GPR64:$spoff, GPR64:$dst)]>;
+                                   [(MIPSehret GPR32:$spoff, GPR32:$dst)]>;
+  def MIPSeh_return64 : MipsPseudo<(outs), (ins GPR64:$spoff, GPR64:$dst),
+                                   [(MIPSehret GPR64:$spoff, GPR64:$dst)]>;
 }
 
 /// Multiply and Divide Instructions.
@@ -2675,18 +2693,64 @@ let AdditionalPredicates = [NotInMicroMips] in {
                       (SUBu GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>, ISA_MIPS1;
   def : MipsInstAlias<"negu $rt",
                       (SUBu GPR32Opnd:$rt, ZERO, GPR32Opnd:$rt), 1>, ISA_MIPS1;
+
+  def SGE : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+                              (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
+                              "sge\t$rd, $rs, $rt">, ISA_MIPS1;
+  def : MipsInstAlias<"sge $rs, $rt",
+                      (SGE GPR32Opnd:$rs, GPR32Opnd:$rs, GPR32Opnd:$rt), 0>,
+        ISA_MIPS1;
+  def SGEImm : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+                                 (ins GPR32Opnd:$rs, simm32:$imm),
+                                 "sge\t$rd, $rs, $imm">, GPR_32;
+  def : MipsInstAlias<"sge $rs, $imm", (SGEImm GPR32Opnd:$rs,
+                                               GPR32Opnd:$rs,
+                                               simm32:$imm), 0>,
+        GPR_32;
+
+  def SGEU : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+                               (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
+                               "sgeu\t$rd, $rs, $rt">, ISA_MIPS1;
+  def : MipsInstAlias<"sgeu $rs, $rt",
+                      (SGEU GPR32Opnd:$rs, GPR32Opnd:$rs, GPR32Opnd:$rt), 0>,
+        ISA_MIPS1;
+  def SGEUImm : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+                                  (ins GPR32Opnd:$rs, uimm32_coerced:$imm),
+                                  "sgeu\t$rd, $rs, $imm">, GPR_32;
+  def : MipsInstAlias<"sgeu $rs, $imm", (SGEUImm GPR32Opnd:$rs,
+                                                 GPR32Opnd:$rs,
+                                                 uimm32_coerced:$imm), 0>,
+        GPR_32;
+
   def : MipsInstAlias<
           "sgt $rd, $rs, $rt",
           (SLT GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>, ISA_MIPS1;
   def : MipsInstAlias<
           "sgt $rs, $rt",
           (SLT GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>, ISA_MIPS1;
+
+  def SGTImm : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+                                 (ins GPR32Opnd:$rs, simm32:$imm),
+                                 "sgt\t$rd, $rs, $imm">, GPR_32;
+  def : MipsInstAlias<"sgt $rs, $imm", (SGTImm GPR32Opnd:$rs,
+                                               GPR32Opnd:$rs,
+                                               simm32:$imm), 0>,
+        GPR_32;
   def : MipsInstAlias<
           "sgtu $rd, $rs, $rt",
           (SLTu GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>, ISA_MIPS1;
   def : MipsInstAlias<
           "sgtu $$rs, $rt",
           (SLTu GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>, ISA_MIPS1;
+
+  def SGTUImm : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+                                  (ins GPR32Opnd:$rs, uimm32_coerced:$imm),
+                                  "sgtu\t$rd, $rs, $imm">, GPR_32;
+  def : MipsInstAlias<"sgtu $rs, $imm", (SGTUImm GPR32Opnd:$rs,
+                                                 GPR32Opnd:$rs,
+                                                 uimm32_coerced:$imm), 0>,
+        GPR_32;
+
   def : MipsInstAlias<
           "not $rt, $rs",
           (NOR GPR32Opnd:$rt, GPR32Opnd:$rs, ZERO), 0>, ISA_MIPS1;
@@ -2737,14 +2801,14 @@ let AdditionalPredicates = [NotInMicroMips] in {
   def : MipsInstAlias<"bnez $rs,$offset",
                       (BNE GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>,
         ISA_MIPS1;
-  def : MipsInstAlias<"bnezl $rs,$offset",
-                      (BNEL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>,
+  def : MipsInstAlias<"bnezl $rs, $offset",
+                      (BNEL GPR32Opnd:$rs, ZERO, brtarget:$offset), 1>,
         ISA_MIPS2;
   def : MipsInstAlias<"beqz $rs,$offset",
                       (BEQ GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>,
         ISA_MIPS1;
-  def : MipsInstAlias<"beqzl $rs,$offset",
-                      (BEQL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>,
+  def : MipsInstAlias<"beqzl $rs, $offset",
+                      (BEQL GPR32Opnd:$rs, ZERO, brtarget:$offset), 1>,
         ISA_MIPS2;
 
   def : MipsInstAlias<"syscall", (SYSCALL 0), 1>, ISA_MIPS1;
diff --git a/lib/Target/Mips/MipsInstructionSelector.cpp b/lib/Target/Mips/MipsInstructionSelector.cpp
index b041590ee343..45a47ad3c087 100644
--- a/lib/Target/Mips/MipsInstructionSelector.cpp
+++ b/lib/Target/Mips/MipsInstructionSelector.cpp
@@ -1,9 +1,8 @@
 //===- MipsInstructionSelector.cpp ------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -12,6 +11,8 @@
 /// \todo This should be generated by TableGen.
 //===----------------------------------------------------------------------===//
 
+#include "MCTargetDesc/MipsInstPrinter.h"
+#include "MipsMachineFunction.h"
 #include "MipsRegisterBankInfo.h"
 #include "MipsTargetMachine.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
@@ -37,6 +38,12 @@ public:
 
 private:
   bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
+  bool materialize32BitImm(Register DestReg, APInt Imm,
+                           MachineIRBuilder &B) const;
+  bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const;
+  const TargetRegisterClass *
+  getRegClassForTypeOnBank(unsigned OpSize, const RegisterBank &RB,
+                           const RegisterBankInfo &RBI) const;
 
   const MipsTargetMachine &TM;
   const MipsSubtarget &STI;
@@ -74,15 +81,24 @@ MipsInstructionSelector::MipsInstructionSelector(
 {
 }
 
-static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
-                       MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
-                       const RegisterBankInfo &RBI) {
-  unsigned DstReg = I.getOperand(0).getReg();
+bool MipsInstructionSelector::selectCopy(MachineInstr &I,
+                                         MachineRegisterInfo &MRI) const {
+  Register DstReg = I.getOperand(0).getReg();
   if (TargetRegisterInfo::isPhysicalRegister(DstReg))
     return true;
 
-  const TargetRegisterClass *RC = &Mips::GPR32RegClass;
+  const RegisterBank *RegBank = RBI.getRegBank(DstReg, MRI, TRI);
+  const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
 
+  const TargetRegisterClass *RC = &Mips::GPR32RegClass;
+  if (RegBank->getID() == Mips::FPRBRegBankID) {
+    if (DstSize == 32)
+      RC = &Mips::FGR32RegClass;
+    else if (DstSize == 64)
+      RC = STI.isFP64bit() ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass;
+    else
+      llvm_unreachable("Unsupported destination size");
+  }
   if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
                       << " operand\n");
@@ -91,6 +107,102 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
   return true;
 }
 
+const TargetRegisterClass *MipsInstructionSelector::getRegClassForTypeOnBank(
+    unsigned OpSize, const RegisterBank &RB,
+    const RegisterBankInfo &RBI) const {
+  if (RB.getID() == Mips::GPRBRegBankID)
+    return &Mips::GPR32RegClass;
+
+  if (RB.getID() == Mips::FPRBRegBankID)
+    return OpSize == 32
+               ? &Mips::FGR32RegClass
+               : STI.hasMips32r6() || STI.isFP64bit() ? &Mips::FGR64RegClass
+                                                      : &Mips::AFGR64RegClass;
+
+  llvm_unreachable("getRegClassForTypeOnBank can't find register class.");
+  return nullptr;
+}
+
+bool MipsInstructionSelector::materialize32BitImm(Register DestReg, APInt Imm,
+                                                  MachineIRBuilder &B) const {
+  assert(Imm.getBitWidth() == 32 && "Unsupported immediate size.");
+  // Ori zero extends immediate. Used for values with zeros in high 16 bits.
+  if (Imm.getHiBits(16).isNullValue()) {
+    MachineInstr *Inst = B.buildInstr(Mips::ORi, {DestReg}, {Register(Mips::ZERO)})
+                             .addImm(Imm.getLoBits(16).getLimitedValue());
+    return constrainSelectedInstRegOperands(*Inst, TII, TRI, RBI);
+  }
+  // Lui places immediate in high 16 bits and sets low 16 bits to zero.
+  if (Imm.getLoBits(16).isNullValue()) {
+    MachineInstr *Inst = B.buildInstr(Mips::LUi, {DestReg}, {})
+                             .addImm(Imm.getHiBits(16).getLimitedValue());
+    return constrainSelectedInstRegOperands(*Inst, TII, TRI, RBI);
+  }
+  // ADDiu sign extends immediate. Used for values with 1s in high 17 bits.
+  if (Imm.isSignedIntN(16)) {
+    MachineInstr *Inst = B.buildInstr(Mips::ADDiu, {DestReg}, {Register(Mips::ZERO)})
+                             .addImm(Imm.getLoBits(16).getLimitedValue());
+    return constrainSelectedInstRegOperands(*Inst, TII, TRI, RBI);
+  }
+  // Values that cannot be materialized with single immediate instruction.
+  Register LUiReg = B.getMRI()->createVirtualRegister(&Mips::GPR32RegClass);
+  MachineInstr *LUi = B.buildInstr(Mips::LUi, {LUiReg}, {})
+                          .addImm(Imm.getHiBits(16).getLimitedValue());
+  MachineInstr *ORi = B.buildInstr(Mips::ORi, {DestReg}, {LUiReg})
+                          .addImm(Imm.getLoBits(16).getLimitedValue());
+  if (!constrainSelectedInstRegOperands(*LUi, TII, TRI, RBI))
+    return false;
+  if (!constrainSelectedInstRegOperands(*ORi, TII, TRI, RBI))
+    return false;
+  return true;
+}
+
+/// Returning Opc indicates that we failed to select MIPS instruction opcode.
+static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned MemSizeInBytes,
+                                      unsigned RegBank, bool isFP64) {
+  bool isStore = Opc == TargetOpcode::G_STORE;
+  if (RegBank == Mips::GPRBRegBankID) {
+    if (isStore)
+      switch (MemSizeInBytes) {
+      case 4:
+        return Mips::SW;
+      case 2:
+        return Mips::SH;
+      case 1:
+        return Mips::SB;
+      default:
+        return Opc;
+      }
+    else
+      // Unspecified extending load is selected into zeroExtending load.
+      switch (MemSizeInBytes) {
+      case 4:
+        return Mips::LW;
+      case 2:
+        return Opc == TargetOpcode::G_SEXTLOAD ? Mips::LH : Mips::LHu;
+      case 1:
+        return Opc == TargetOpcode::G_SEXTLOAD ? Mips::LB : Mips::LBu;
+      default:
+        return Opc;
+      }
+  }
+
+  if (RegBank == Mips::FPRBRegBankID) {
+    switch (MemSizeInBytes) {
+    case 4:
+      return isStore ? Mips::SWC1 : Mips::LWC1;
+    case 8:
+      if (isFP64)
+        return isStore ? Mips::SDC164 : Mips::LDC164;
+      else
+        return isStore ? Mips::SDC1 : Mips::LDC1;
+    default:
+      return Opc;
+    }
+  }
+  return Opc;
+}
+
 bool MipsInstructionSelector::select(MachineInstr &I,
                                      CodeGenCoverage &CoverageInfo) const {
 
@@ -100,19 +212,52 @@ bool MipsInstructionSelector::select(MachineInstr &I,
 
   if (!isPreISelGenericOpcode(I.getOpcode())) {
     if (I.isCopy())
-      return selectCopy(I, TII, MRI, TRI, RBI);
+      return selectCopy(I, MRI);
 
     return true;
   }
 
-  if (selectImpl(I, CoverageInfo)) {
+  if (I.getOpcode() == Mips::G_MUL) {
+    MachineInstr *Mul = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::MUL))
+                            .add(I.getOperand(0))
+                            .add(I.getOperand(1))
+                            .add(I.getOperand(2));
+    if (!constrainSelectedInstRegOperands(*Mul, TII, TRI, RBI))
+      return false;
+    Mul->getOperand(3).setIsDead(true);
+    Mul->getOperand(4).setIsDead(true);
+
+    I.eraseFromParent();
     return true;
   }
 
+  if (selectImpl(I, CoverageInfo))
+    return true;
+
   MachineInstr *MI = nullptr;
   using namespace TargetOpcode;
 
   switch (I.getOpcode()) {
+  case G_UMULH: {
+    Register PseudoMULTuReg = MRI.createVirtualRegister(&Mips::ACC64RegClass);
+    MachineInstr *PseudoMULTu, *PseudoMove;
+
+    PseudoMULTu = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::PseudoMULTu))
+                      .addDef(PseudoMULTuReg)
+                      .add(I.getOperand(1))
+                      .add(I.getOperand(2));
+    if (!constrainSelectedInstRegOperands(*PseudoMULTu, TII, TRI, RBI))
+      return false;
+
+    PseudoMove = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::PseudoMFHI))
+                     .addDef(I.getOperand(0).getReg())
+                     .addUse(PseudoMULTuReg);
+    if (!constrainSelectedInstRegOperands(*PseudoMove, TII, TRI, RBI))
+      return false;
+
+    I.eraseFromParent();
+    return true;
+  }
   case G_GEP: {
     MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDu))
              .add(I.getOperand(0))
@@ -127,16 +272,46 @@ bool MipsInstructionSelector::select(MachineInstr &I,
              .addImm(0);
     break;
   }
+  case G_BRCOND: {
+    MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::BNE))
+             .add(I.getOperand(0))
+             .addUse(Mips::ZERO)
+             .add(I.getOperand(1));
+    break;
+  }
+  case G_PHI: {
+    const Register DestReg = I.getOperand(0).getReg();
+    const unsigned OpSize = MRI.getType(DestReg).getSizeInBits();
+
+    const TargetRegisterClass *DefRC = nullptr;
+    if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+      DefRC = TRI.getRegClass(DestReg);
+    else
+      DefRC = getRegClassForTypeOnBank(OpSize,
+                                       *RBI.getRegBank(DestReg, MRI, TRI), RBI);
+
+    I.setDesc(TII.get(TargetOpcode::PHI));
+    return RBI.constrainGenericRegister(DestReg, *DefRC, MRI);
+  }
   case G_STORE:
-  case G_LOAD: {
-    const unsigned DestReg = I.getOperand(0).getReg();
+  case G_LOAD:
+  case G_ZEXTLOAD:
+  case G_SEXTLOAD: {
+    const Register DestReg = I.getOperand(0).getReg();
     const unsigned DestRegBank = RBI.getRegBank(DestReg, MRI, TRI)->getID();
     const unsigned OpSize = MRI.getType(DestReg).getSizeInBits();
+    const unsigned OpMemSizeInBytes = (*I.memoperands_begin())->getSize();
 
-    if (DestRegBank != Mips::GPRBRegBankID || OpSize != 32)
+    if (DestRegBank == Mips::GPRBRegBankID && OpSize != 32)
       return false;
 
-    const unsigned NewOpc = I.getOpcode() == G_STORE ? Mips::SW : Mips::LW;
+    if (DestRegBank == Mips::FPRBRegBankID && OpSize != 32 && OpSize != 64)
+      return false;
+
+    const unsigned NewOpc = selectLoadStoreOpCode(
+        I.getOpcode(), OpMemSizeInBytes, DestRegBank, STI.isFP64bit());
+    if (NewOpc == I.getOpcode())
+      return false;
 
     MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
              .add(I.getOperand(0))
@@ -149,7 +324,7 @@ bool MipsInstructionSelector::select(MachineInstr &I,
   case G_UREM:
   case G_SDIV:
   case G_SREM: {
-    unsigned HILOReg = MRI.createVirtualRegister(&Mips::ACC64RegClass);
+    Register HILOReg = MRI.createVirtualRegister(&Mips::ACC64RegClass);
     bool IsSigned = I.getOpcode() == G_SREM || I.getOpcode() == G_SDIV;
     bool IsDiv = I.getOpcode() == G_UDIV || I.getOpcode() == G_SDIV;
 
@@ -182,58 +357,150 @@ bool MipsInstructionSelector::select(MachineInstr &I,
     break;
   }
   case G_CONSTANT: {
-    int Imm = I.getOperand(1).getCImm()->getValue().getLimitedValue();
-    unsigned LUiReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
-    MachineInstr *LUi, *ORi;
+    MachineIRBuilder B(I);
+    if (!materialize32BitImm(I.getOperand(0).getReg(),
+                             I.getOperand(1).getCImm()->getValue(), B))
+      return false;
 
-    LUi = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LUi))
-              .addDef(LUiReg)
-              .addImm(Imm >> 16);
+    I.eraseFromParent();
+    return true;
+  }
+  case G_FCONSTANT: {
+    const APFloat &FPimm = I.getOperand(1).getFPImm()->getValueAPF();
+    APInt APImm = FPimm.bitcastToAPInt();
+    unsigned Size = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
+
+    if (Size == 32) {
+      Register GPRReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+      MachineIRBuilder B(I);
+      if (!materialize32BitImm(GPRReg, APImm, B))
+        return false;
 
-    ORi = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ORi))
-              .addDef(I.getOperand(0).getReg())
-              .addUse(LUiReg)
-              .addImm(Imm & 0xFFFF);
+      MachineInstrBuilder MTC1 =
+          B.buildInstr(Mips::MTC1, {I.getOperand(0).getReg()}, {GPRReg});
+      if (!MTC1.constrainAllUses(TII, TRI, RBI))
+        return false;
+    }
+    if (Size == 64) {
+      Register GPRRegHigh = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+      Register GPRRegLow = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+      MachineIRBuilder B(I);
+      if (!materialize32BitImm(GPRRegHigh, APImm.getHiBits(32).trunc(32), B))
+        return false;
+      if (!materialize32BitImm(GPRRegLow, APImm.getLoBits(32).trunc(32), B))
+        return false;
+
+      MachineInstrBuilder PairF64 = B.buildInstr(
+          STI.isFP64bit() ? Mips::BuildPairF64_64 : Mips::BuildPairF64,
+          {I.getOperand(0).getReg()}, {GPRRegLow, GPRRegHigh});
+      if (!PairF64.constrainAllUses(TII, TRI, RBI))
+        return false;
+    }
 
-    if (!constrainSelectedInstRegOperands(*LUi, TII, TRI, RBI))
+    I.eraseFromParent();
+    return true;
+  }
+  case G_FABS: {
+    unsigned Size = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
+    unsigned FABSOpcode =
+        Size == 32 ? Mips::FABS_S
+                   : STI.isFP64bit() ? Mips::FABS_D64 : Mips::FABS_D32;
+    MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(FABSOpcode))
+             .add(I.getOperand(0))
+             .add(I.getOperand(1));
+    break;
+  }
+  case G_FPTOSI: {
+    unsigned FromSize = MRI.getType(I.getOperand(1).getReg()).getSizeInBits();
+    unsigned ToSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
+    (void)ToSize;
+    assert((ToSize == 32) && "Unsupported integer size for G_FPTOSI");
+    assert((FromSize == 32 || FromSize == 64) &&
+           "Unsupported floating point size for G_FPTOSI");
+
+    unsigned Opcode;
+    if (FromSize == 32)
+      Opcode = Mips::TRUNC_W_S;
+    else
+      Opcode = STI.isFP64bit() ? Mips::TRUNC_W_D64 : Mips::TRUNC_W_D32;
+    unsigned ResultInFPR = MRI.createVirtualRegister(&Mips::FGR32RegClass);
+    MachineInstr *Trunc = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Opcode))
+                .addDef(ResultInFPR)
+                .addUse(I.getOperand(1).getReg());
+    if (!constrainSelectedInstRegOperands(*Trunc, TII, TRI, RBI))
       return false;
-    if (!constrainSelectedInstRegOperands(*ORi, TII, TRI, RBI))
+
+    MachineInstr *Move = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::MFC1))
+                             .addDef(I.getOperand(0).getReg())
+                             .addUse(ResultInFPR);
+    if (!constrainSelectedInstRegOperands(*Move, TII, TRI, RBI))
       return false;
 
     I.eraseFromParent();
     return true;
   }
   case G_GLOBAL_VALUE: {
-    if (MF.getTarget().isPositionIndependent())
-      return false;
-
     const llvm::GlobalValue *GVal = I.getOperand(1).getGlobal();
-    unsigned LUiReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
-    MachineInstr *LUi, *ADDiu;
+    if (MF.getTarget().isPositionIndependent()) {
+      MachineInstr *LWGOT = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LW))
+                                .addDef(I.getOperand(0).getReg())
+                                .addReg(MF.getInfo<MipsFunctionInfo>()
+                                            ->getGlobalBaseRegForGlobalISel())
+                                .addGlobalAddress(GVal);
+      // Global Values that don't have local linkage are handled differently
+      // when they are part of call sequence. MipsCallLowering::lowerCall
+      // creates G_GLOBAL_VALUE instruction as part of call sequence and adds
+      // MO_GOT_CALL flag when Callee doesn't have local linkage.
+      if (I.getOperand(1).getTargetFlags() == MipsII::MO_GOT_CALL)
+        LWGOT->getOperand(2).setTargetFlags(MipsII::MO_GOT_CALL);
+      else
+        LWGOT->getOperand(2).setTargetFlags(MipsII::MO_GOT);
+      LWGOT->addMemOperand(
+          MF, MF.getMachineMemOperand(MachinePointerInfo::getGOT(MF),
+                                      MachineMemOperand::MOLoad, 4, 4));
+      if (!constrainSelectedInstRegOperands(*LWGOT, TII, TRI, RBI))
+        return false;
 
-    LUi = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LUi))
-              .addDef(LUiReg)
-              .addGlobalAddress(GVal);
-    LUi->getOperand(1).setTargetFlags(MipsII::MO_ABS_HI);
+      if (GVal->hasLocalLinkage()) {
+        Register LWGOTDef = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+        LWGOT->getOperand(0).setReg(LWGOTDef);
 
-    ADDiu = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu))
+        MachineInstr *ADDiu =
+            BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu))
                 .addDef(I.getOperand(0).getReg())
-                .addUse(LUiReg)
+                .addReg(LWGOTDef)
                 .addGlobalAddress(GVal);
-    ADDiu->getOperand(2).setTargetFlags(MipsII::MO_ABS_LO);
-
-    if (!constrainSelectedInstRegOperands(*LUi, TII, TRI, RBI))
-      return false;
-    if (!constrainSelectedInstRegOperands(*ADDiu, TII, TRI, RBI))
-      return false;
+        ADDiu->getOperand(2).setTargetFlags(MipsII::MO_ABS_LO);
+        if (!constrainSelectedInstRegOperands(*ADDiu, TII, TRI, RBI))
+          return false;
+      }
+    } else {
+      Register LUiReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+
+      MachineInstr *LUi = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LUi))
+                              .addDef(LUiReg)
+                              .addGlobalAddress(GVal);
+      LUi->getOperand(1).setTargetFlags(MipsII::MO_ABS_HI);
+      if (!constrainSelectedInstRegOperands(*LUi, TII, TRI, RBI))
+        return false;
 
+      MachineInstr *ADDiu =
+          BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu))
+              .addDef(I.getOperand(0).getReg())
+              .addUse(LUiReg)
+              .addGlobalAddress(GVal);
+      ADDiu->getOperand(2).setTargetFlags(MipsII::MO_ABS_LO);
+      if (!constrainSelectedInstRegOperands(*ADDiu, TII, TRI, RBI))
+        return false;
+    }
     I.eraseFromParent();
     return true;
   }
   case G_ICMP: {
     struct Instr {
-      unsigned Opcode, Def, LHS, RHS;
-      Instr(unsigned Opcode, unsigned Def, unsigned LHS, unsigned RHS)
+      unsigned Opcode;
+      Register Def, LHS, RHS;
+      Instr(unsigned Opcode, Register Def, Register LHS, Register RHS)
           : Opcode(Opcode), Def(Def), LHS(LHS), RHS(RHS){};
 
       bool hasImm() const {
@@ -244,10 +511,10 @@ bool MipsInstructionSelector::select(MachineInstr &I,
     };
 
     SmallVector<struct Instr, 2> Instructions;
-    unsigned ICMPReg = I.getOperand(0).getReg();
-    unsigned Temp = MRI.createVirtualRegister(&Mips::GPR32RegClass);
-    unsigned LHS = I.getOperand(2).getReg();
-    unsigned RHS = I.getOperand(3).getReg();
+    Register ICMPReg = I.getOperand(0).getReg();
+    Register Temp = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+    Register LHS = I.getOperand(2).getReg();
+    Register RHS = I.getOperand(3).getReg();
     CmpInst::Predicate Cond =
         static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
 
@@ -309,6 +576,84 @@ bool MipsInstructionSelector::select(MachineInstr &I,
     I.eraseFromParent();
     return true;
   }
+  case G_FCMP: {
+    unsigned MipsFCMPCondCode;
+    bool isLogicallyNegated;
+    switch (CmpInst::Predicate Cond = static_cast<CmpInst::Predicate>(
+                I.getOperand(1).getPredicate())) {
+    case CmpInst::FCMP_UNO: // Unordered
+    case CmpInst::FCMP_ORD: // Ordered (OR)
+      MipsFCMPCondCode = Mips::FCOND_UN;
+      isLogicallyNegated = Cond != CmpInst::FCMP_UNO;
+      break;
+    case CmpInst::FCMP_OEQ: // Equal
+    case CmpInst::FCMP_UNE: // Not Equal (NEQ)
+      MipsFCMPCondCode = Mips::FCOND_OEQ;
+      isLogicallyNegated = Cond != CmpInst::FCMP_OEQ;
+      break;
+    case CmpInst::FCMP_UEQ: // Unordered or Equal
+    case CmpInst::FCMP_ONE: // Ordered or Greater Than or Less Than (OGL)
+      MipsFCMPCondCode = Mips::FCOND_UEQ;
+      isLogicallyNegated = Cond != CmpInst::FCMP_UEQ;
+      break;
+    case CmpInst::FCMP_OLT: // Ordered or Less Than
+    case CmpInst::FCMP_UGE: // Unordered or Greater Than or Equal (UGE)
+      MipsFCMPCondCode = Mips::FCOND_OLT;
+      isLogicallyNegated = Cond != CmpInst::FCMP_OLT;
+      break;
+    case CmpInst::FCMP_ULT: // Unordered or Less Than
+    case CmpInst::FCMP_OGE: // Ordered or Greater Than or Equal (OGE)
+      MipsFCMPCondCode = Mips::FCOND_ULT;
+      isLogicallyNegated = Cond != CmpInst::FCMP_ULT;
+      break;
+    case CmpInst::FCMP_OLE: // Ordered or Less Than or Equal
+    case CmpInst::FCMP_UGT: // Unordered or Greater Than (UGT)
+      MipsFCMPCondCode = Mips::FCOND_OLE;
+      isLogicallyNegated = Cond != CmpInst::FCMP_OLE;
+      break;
+    case CmpInst::FCMP_ULE: // Unordered or Less Than or Equal
+    case CmpInst::FCMP_OGT: // Ordered or Greater Than (OGT)
+      MipsFCMPCondCode = Mips::FCOND_ULE;
+      isLogicallyNegated = Cond != CmpInst::FCMP_ULE;
+      break;
+    default:
+      return false;
+    }
+
+    // Default compare result in gpr register will be `true`.
+    // We will move `false` (MIPS::Zero) to gpr result when fcmp gives false
+    // using MOVF_I. When orignal predicate (Cond) is logically negated
+    // MipsFCMPCondCode, result is inverted i.e. MOVT_I is used.
+    unsigned MoveOpcode = isLogicallyNegated ? Mips::MOVT_I : Mips::MOVF_I;
+
+    unsigned TrueInReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+    BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu))
+        .addDef(TrueInReg)
+        .addUse(Mips::ZERO)
+        .addImm(1);
+
+    unsigned Size = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
+    unsigned FCMPOpcode =
+        Size == 32 ? Mips::FCMP_S32
+                   : STI.isFP64bit() ? Mips::FCMP_D64 : Mips::FCMP_D32;
+    MachineInstr *FCMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(FCMPOpcode))
+                             .addUse(I.getOperand(2).getReg())
+                             .addUse(I.getOperand(3).getReg())
+                             .addImm(MipsFCMPCondCode);
+    if (!constrainSelectedInstRegOperands(*FCMP, TII, TRI, RBI))
+      return false;
+
+    MachineInstr *Move = BuildMI(MBB, I, I.getDebugLoc(), TII.get(MoveOpcode))
+                             .addDef(I.getOperand(0).getReg())
+                             .addUse(Mips::ZERO)
+                             .addUse(Mips::FCC0)
+                             .addUse(TrueInReg);
+    if (!constrainSelectedInstRegOperands(*Move, TII, TRI, RBI))
+      return false;
+
+    I.eraseFromParent();
+    return true;
+  }
   default:
     return false;
   }
diff --git a/lib/Target/Mips/MipsLegalizerInfo.cpp b/lib/Target/Mips/MipsLegalizerInfo.cpp
index c629f02af00e..e442a81837ed 100644
--- a/lib/Target/Mips/MipsLegalizerInfo.cpp
+++ b/lib/Target/Mips/MipsLegalizerInfo.cpp
@@ -1,9 +1,8 @@
 //===- MipsLegalizerInfo.cpp ------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -25,35 +24,65 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
   const LLT s64 = LLT::scalar(64);
   const LLT p0 = LLT::pointer(0, 32);
 
-  getActionDefinitionsBuilder(G_ADD)
+  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
       .legalFor({s32})
       .clampScalar(0, s32, s32);
 
-  getActionDefinitionsBuilder(G_UADDE)
+  getActionDefinitionsBuilder({G_UADDO, G_UADDE, G_USUBO, G_USUBE, G_UMULO})
       .lowerFor({{s32, s1}});
 
+  getActionDefinitionsBuilder(G_UMULH)
+      .legalFor({s32})
+      .maxScalar(0, s32);
+
   getActionDefinitionsBuilder({G_LOAD, G_STORE})
-      .legalForCartesianProduct({p0, s32}, {p0});
+      .legalForTypesWithMemDesc({{s32, p0, 8, 8},
+                                 {s32, p0, 16, 8},
+                                 {s32, p0, 32, 8},
+                                 {s64, p0, 64, 8},
+                                 {p0, p0, 32, 8}})
+      .minScalar(0, s32);
+
+  getActionDefinitionsBuilder(G_UNMERGE_VALUES)
+     .legalFor({{s32, s64}});
+
+  getActionDefinitionsBuilder(G_MERGE_VALUES)
+     .legalFor({{s64, s32}});
+
+  getActionDefinitionsBuilder({G_ZEXTLOAD, G_SEXTLOAD})
+    .legalForTypesWithMemDesc({{s32, p0, 8, 8},
+                               {s32, p0, 16, 8}})
+      .minScalar(0, s32);
 
   getActionDefinitionsBuilder(G_SELECT)
-      .legalForCartesianProduct({p0, s32}, {s32})
+      .legalForCartesianProduct({p0, s32, s64}, {s32})
       .minScalar(0, s32)
       .minScalar(1, s32);
 
+  getActionDefinitionsBuilder(G_BRCOND)
+      .legalFor({s32})
+      .minScalar(0, s32);
+
+  getActionDefinitionsBuilder(G_PHI)
+      .legalFor({p0, s32, s64})
+      .minScalar(0, s32);
+
   getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
       .legalFor({s32})
       .clampScalar(0, s32, s32);
 
-  getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
-      .legalFor({s32});
-
   getActionDefinitionsBuilder({G_SDIV, G_SREM, G_UREM, G_UDIV})
       .legalFor({s32})
       .minScalar(0, s32)
       .libcallFor({s64});
 
+  getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
+    .legalFor({s32, s32})
+    .minScalar(1, s32);
+
   getActionDefinitionsBuilder(G_ICMP)
-      .legalFor({{s32, s32}})
+      .legalForCartesianProduct({s32}, {s32, p0})
+      .clampScalar(1, s32, s32)
       .minScalar(0, s32);
 
   getActionDefinitionsBuilder(G_CONSTANT)
@@ -69,6 +98,46 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
   getActionDefinitionsBuilder(G_GLOBAL_VALUE)
       .legalFor({p0});
 
+  // FP instructions
+  getActionDefinitionsBuilder(G_FCONSTANT)
+      .legalFor({s32, s64});
+
+  getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FABS, G_FSQRT})
+      .legalFor({s32, s64});
+
+  getActionDefinitionsBuilder(G_FCMP)
+      .legalFor({{s32, s32}, {s32, s64}})
+      .minScalar(0, s32);
+
+  getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR})
+      .libcallFor({s32, s64});
+
+  getActionDefinitionsBuilder(G_FPEXT)
+      .legalFor({{s64, s32}});
+
+  getActionDefinitionsBuilder(G_FPTRUNC)
+      .legalFor({{s32, s64}});
+
+  // FP to int conversion instructions
+  getActionDefinitionsBuilder(G_FPTOSI)
+      .legalForCartesianProduct({s32}, {s64, s32})
+      .libcallForCartesianProduct({s64}, {s64, s32})
+      .minScalar(0, s32);
+
+  getActionDefinitionsBuilder(G_FPTOUI)
+      .libcallForCartesianProduct({s64}, {s64, s32})
+      .minScalar(0, s32);
+
+  // Int to FP conversion instructions
+  getActionDefinitionsBuilder(G_SITOFP)
+      .legalForCartesianProduct({s64, s32}, {s32})
+      .libcallForCartesianProduct({s64, s32}, {s64})
+      .minScalar(1, s32);
+
+  getActionDefinitionsBuilder(G_UITOFP)
+      .libcallForCartesianProduct({s64, s32}, {s64})
+      .minScalar(1, s32);
+
   computeTables();
   verify(*ST.getInstrInfo());
 }
diff --git a/lib/Target/Mips/MipsLegalizerInfo.h b/lib/Target/Mips/MipsLegalizerInfo.h
index 75fadd6cf613..e5021e081890 100644
--- a/lib/Target/Mips/MipsLegalizerInfo.h
+++ b/lib/Target/Mips/MipsLegalizerInfo.h
@@ -1,9 +1,8 @@
 //===- MipsLegalizerInfo ----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index 46b37ceae391..fd984058a2bf 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -1,9 +1,8 @@
 //===- MipsMCInstLower.cpp - Convert Mips MachineInstr to MCInst ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -117,6 +116,8 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
   case MipsII::MO_CALL_LO16:
     TargetKind = MipsMCExpr::MEK_CALL_LO16;
     break;
+  case MipsII::MO_JALR:
+    return MCOperand();
   }
 
   switch (MOTy) {
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index e19f21c98839..29af6f21de82 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -1,9 +1,8 @@
 //===- MipsMCInstLower.h - Lower MachineInstr to MCInst --------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Mips/MipsMSAInstrFormats.td b/lib/Target/Mips/MipsMSAInstrFormats.td
index d4e225678184..2bfc92c85e96 100644
--- a/lib/Target/Mips/MipsMSAInstrFormats.td
+++ b/lib/Target/Mips/MipsMSAInstrFormats.td
@@ -1,9 +1,8 @@
 //===- MipsMSAInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td
index eecc7c573df1..907ed9ef746f 100644
--- a/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -1,9 +1,8 @@
 //===- MipsMSAInstrInfo.td - MSA ASE instructions -*- tablegen ------------*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1240,6 +1239,7 @@ class MSA_COPY_PSEUDO_BASE<SDPatternOperator OpNode, ValueType VecTy,
       MSAPseudo<(outs RCD:$wd), (ins RCWS:$ws, ImmOp:$n),
                 [(set RCD:$wd, (OpNode (VecTy RCWS:$ws), Imm:$n))]> {
   bit usesCustomInserter = 1;
+  bit hasNoSchedulingInfo = 1;
 }
 
 class MSA_I5_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -1447,6 +1447,7 @@ class MSA_INSERT_VIDX_PSEUDO_BASE<SDPatternOperator OpNode, ValueType Ty,
                 [(set ROWD:$wd, (OpNode (Ty ROWD:$wd_in), ROFS:$fs,
                                         ROIdx:$n))]> {
   bit usesCustomInserter = 1;
+  bit hasNoSchedulingInfo = 1;
   string Constraints = "$wd = $wd_in";
 }
 
@@ -2044,7 +2045,7 @@ class FEXDO_W_DESC : MSA_3RF_DESC_BASE<"fexdo.w", int_mips_fexdo_w,
 // 1.0 when we only need to match ISD::FEXP2.
 class FEXP2_W_DESC : MSA_3RF_DESC_BASE<"fexp2.w", mul_fexp2, MSA128WOpnd>;
 class FEXP2_D_DESC : MSA_3RF_DESC_BASE<"fexp2.d", mul_fexp2, MSA128DOpnd>;
-let usesCustomInserter = 1 in {
+let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
   class FEXP2_W_1_PSEUDO_DESC :
       MSAPseudo<(outs MSA128W:$wd), (ins MSA128W:$ws),
                 [(set MSA128W:$wd, (fexp2 MSA128W:$ws))]>;
@@ -3738,6 +3739,7 @@ class MSA_CBRANCH_PSEUDO_DESC_BASE<SDPatternOperator OpNode, ValueType TyNode,
              (ins RCWS:$ws),
              [(set GPR32:$dst, (OpNode (TyNode RCWS:$ws)))]> {
   bit usesCustomInserter = 1;
+  bit hasNoSchedulingInfo = 1;
 }
 
 def SNZ_B_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v16i8,
@@ -3765,52 +3767,38 @@ def SZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyZero, v16i8,
 // Pseudoes used to implement transparent fp16 support.
 
 let ASEPredicate = [HasMSA] in {
- def ST_F16 : MipsPseudo<(outs), (ins MSA128F16:$ws, mem_simm10:$addr),
-                          [(store (f16 MSA128F16:$ws), (addrimm10:$addr))]> {
-   let usesCustomInserter = 1;
- }
-
- def LD_F16 : MipsPseudo<(outs MSA128F16:$ws), (ins mem_simm10:$addr),
-                         [(set MSA128F16:$ws, (f16 (load addrimm10:$addr)))]> {
-   let usesCustomInserter = 1;
- }
-
- def MSA_FP_EXTEND_W_PSEUDO : MipsPseudo<(outs FGR32Opnd:$fd),
-                                         (ins MSA128F16:$ws),
-                              [(set FGR32Opnd:$fd,
-                                    (f32 (fpextend MSA128F16:$ws)))]> {
-  let usesCustomInserter = 1;
- }
-
- def MSA_FP_ROUND_W_PSEUDO : MipsPseudo<(outs MSA128F16:$wd),
-                                        (ins FGR32Opnd:$fs),
-                              [(set MSA128F16:$wd,
-                                    (f16 (fpround FGR32Opnd:$fs)))]> {
-  let usesCustomInserter = 1;
- }
-
- def MSA_FP_EXTEND_D_PSEUDO : MipsPseudo<(outs FGR64Opnd:$fd),
-                                         (ins MSA128F16:$ws),
-                              [(set FGR64Opnd:$fd,
-                                    (f64 (fpextend MSA128F16:$ws)))]> {
-  let usesCustomInserter = 1;
- }
-
- def MSA_FP_ROUND_D_PSEUDO : MipsPseudo<(outs MSA128F16:$wd),
-                                        (ins FGR64Opnd:$fs),
-                              [(set MSA128F16:$wd,
-                                    (f16 (fpround FGR64Opnd:$fs)))]> {
-  let usesCustomInserter = 1;
- }
-
- def : MipsPat<(MipsTruncIntFP MSA128F16:$ws),
-               (TRUNC_W_D64 (MSA_FP_EXTEND_D_PSEUDO MSA128F16:$ws))>, ISA_MIPS1,
-               ASE_MSA;
-
- def : MipsPat<(MipsFPCmp MSA128F16:$ws, MSA128F16:$wt, imm:$cond),
-               (FCMP_S32 (MSA_FP_EXTEND_W_PSEUDO MSA128F16:$ws),
-                         (MSA_FP_EXTEND_W_PSEUDO MSA128F16:$wt), imm:$cond)>,
-       ISA_MIPS1_NOT_32R6_64R6, ASE_MSA;
+  let usesCustomInserter = 1 in {
+    def ST_F16 :
+        MipsPseudo<(outs), (ins MSA128F16:$ws, mem_simm10:$addr),
+                   [(store (f16 MSA128F16:$ws), (addrimm10:$addr))]>;
+    def LD_F16 :
+        MipsPseudo<(outs MSA128F16:$ws), (ins mem_simm10:$addr),
+                   [(set MSA128F16:$ws, (f16 (load addrimm10:$addr)))]>;
+  }
+
+  let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
+    def MSA_FP_EXTEND_W_PSEUDO :
+        MipsPseudo<(outs FGR32Opnd:$fd), (ins MSA128F16:$ws),
+                   [(set FGR32Opnd:$fd, (f32 (fpextend MSA128F16:$ws)))]>;
+    def MSA_FP_ROUND_W_PSEUDO :
+        MipsPseudo<(outs MSA128F16:$wd), (ins FGR32Opnd:$fs),
+                   [(set MSA128F16:$wd, (f16 (fpround FGR32Opnd:$fs)))]>;
+    def MSA_FP_EXTEND_D_PSEUDO :
+        MipsPseudo<(outs FGR64Opnd:$fd), (ins MSA128F16:$ws),
+                   [(set FGR64Opnd:$fd, (f64 (fpextend MSA128F16:$ws)))]>;
+    def MSA_FP_ROUND_D_PSEUDO :
+        MipsPseudo<(outs MSA128F16:$wd), (ins FGR64Opnd:$fs),
+                   [(set MSA128F16:$wd, (f16 (fpround FGR64Opnd:$fs)))]>;
+  }
+
+  def : MipsPat<(MipsTruncIntFP MSA128F16:$ws),
+                (TRUNC_W_D64 (MSA_FP_EXTEND_D_PSEUDO MSA128F16:$ws))>,
+        ISA_MIPS1, ASE_MSA;
+
+  def : MipsPat<(MipsFPCmp MSA128F16:$ws, MSA128F16:$wt, imm:$cond),
+                (FCMP_S32 (MSA_FP_EXTEND_W_PSEUDO MSA128F16:$ws),
+                          (MSA_FP_EXTEND_W_PSEUDO MSA128F16:$wt), imm:$cond)>,
+        ISA_MIPS1_NOT_32R6_64R6, ASE_MSA;
 }
 
 def vsplati64_imm_eq_63 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{
diff --git a/lib/Target/Mips/MipsMTInstrFormats.td b/lib/Target/Mips/MipsMTInstrFormats.td
index c2c22e2ad61c..22c290b1c114 100644
--- a/lib/Target/Mips/MipsMTInstrFormats.td
+++ b/lib/Target/Mips/MipsMTInstrFormats.td
@@ -1,9 +1,8 @@
 //===-- MipsMTInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Mips/MipsMTInstrInfo.td b/lib/Target/Mips/MipsMTInstrInfo.td
index 72e626cbec40..3edeb57b1876 100644
--- a/lib/Target/Mips/MipsMTInstrInfo.td
+++ b/lib/Target/Mips/MipsMTInstrInfo.td
@@ -1,9 +1,8 @@
 //===-- MipsMTInstrInfo.td - Mips MT Instruction Infos -----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index 81b4352670c0..85b20fc58231 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -1,9 +1,8 @@
 //===-- MipsMachineFunctionInfo.cpp - Private data used for Mips ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -45,13 +44,109 @@ static const TargetRegisterClass &getGlobalBaseRegClass(MachineFunction &MF) {
   return Mips::GPR32RegClass;
 }
 
-unsigned MipsFunctionInfo::getGlobalBaseReg() {
+Register MipsFunctionInfo::getGlobalBaseReg() {
   if (!GlobalBaseReg)
     GlobalBaseReg =
         MF.getRegInfo().createVirtualRegister(&getGlobalBaseRegClass(MF));
   return GlobalBaseReg;
 }
 
+Register MipsFunctionInfo::getGlobalBaseRegForGlobalISel() {
+  if (!GlobalBaseReg) {
+    getGlobalBaseReg();
+    initGlobalBaseReg();
+  }
+  return GlobalBaseReg;
+}
+
+void MipsFunctionInfo::initGlobalBaseReg() {
+  if (!GlobalBaseReg)
+    return;
+
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator I = MBB.begin();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  DebugLoc DL;
+  unsigned V0, V1;
+  const TargetRegisterClass *RC;
+  const MipsABIInfo &ABI =
+      static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI();
+  RC = (ABI.IsN64()) ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+
+  V0 = RegInfo.createVirtualRegister(RC);
+  V1 = RegInfo.createVirtualRegister(RC);
+
+  if (ABI.IsN64()) {
+    MF.getRegInfo().addLiveIn(Mips::T9_64);
+    MBB.addLiveIn(Mips::T9_64);
+
+    // lui $v0, %hi(%neg(%gp_rel(fname)))
+    // daddu $v1, $v0, $t9
+    // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+    const GlobalValue *FName = &MF.getFunction();
+    BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
+        .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+    BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
+        .addReg(Mips::T9_64);
+    BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
+        .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+    return;
+  }
+
+  if (!MF.getTarget().isPositionIndependent()) {
+    // Set global register to __gnu_local_gp.
+    //
+    // lui   $v0, %hi(__gnu_local_gp)
+    // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
+    BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+        .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
+    BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
+        .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
+    return;
+  }
+
+  MF.getRegInfo().addLiveIn(Mips::T9);
+  MBB.addLiveIn(Mips::T9);
+
+  if (ABI.IsN32()) {
+    // lui $v0, %hi(%neg(%gp_rel(fname)))
+    // addu $v1, $v0, $t9
+    // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+    const GlobalValue *FName = &MF.getFunction();
+    BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+        .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+    BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
+    BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
+        .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+    return;
+  }
+
+  assert(ABI.IsO32());
+
+  // For O32 ABI, the following instruction sequence is emitted to initialize
+  // the global base register:
+  //
+  //  0. lui   $2, %hi(_gp_disp)
+  //  1. addiu $2, $2, %lo(_gp_disp)
+  //  2. addu  $globalbasereg, $2, $t9
+  //
+  // We emit only the last instruction here.
+  //
+  // GNU linker requires that the first two instructions appear at the beginning
+  // of a function and no instructions be inserted before or between them.
+  // The two instructions are emitted during lowering to MC layer in order to
+  // avoid any reordering.
+  //
+  // Register $2 (Mips::V0) is added to the list of live-in registers to ensure
+  // the value instruction 1 (addiu) defines is valid when instruction 2 (addu)
+  // reads it.
+  MF.getRegInfo().addLiveIn(Mips::V0);
+  MBB.addLiveIn(Mips::V0);
+  BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
+      .addReg(Mips::V0).addReg(Mips::T9);
+}
+
 void MipsFunctionInfo::createEhDataRegsFI() {
   const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
   for (int I = 0; I < 4; ++I) {
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index 553a66703b26..aaa1e0e18441 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -1,9 +1,8 @@
 //===- MipsMachineFunctionInfo.h - Private data used for Mips ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -33,7 +32,12 @@ public:
   void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
 
   bool globalBaseRegSet() const;
-  unsigned getGlobalBaseReg();
+  Register getGlobalBaseReg();
+  Register getGlobalBaseRegForGlobalISel();
+
+  // Insert instructions to initialize the global base register in the
+  // first MBB of the function.
+  void initGlobalBaseReg();
 
   int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
   void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
diff --git a/lib/Target/Mips/MipsOptimizePICCall.cpp b/lib/Target/Mips/MipsOptimizePICCall.cpp
index 27bc4843f410..5ef07a2d283e 100644
--- a/lib/Target/Mips/MipsOptimizePICCall.cpp
+++ b/lib/Target/Mips/MipsOptimizePICCall.cpp
@@ -1,9 +1,8 @@
 //===- MipsOptimizePICCall.cpp - Optimize PIC Calls -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MipsOptionRecord.h b/lib/Target/Mips/MipsOptionRecord.h
index 4708784063d3..7897095ef894 100644
--- a/lib/Target/Mips/MipsOptionRecord.h
+++ b/lib/Target/Mips/MipsOptionRecord.h
@@ -1,9 +1,8 @@
 //===- MipsOptionRecord.h - Abstraction for storing information -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp
index 4edcb3132ada..ac4e55f8a1f5 100644
--- a/lib/Target/Mips/MipsOs16.cpp
+++ b/lib/Target/Mips/MipsOs16.cpp
@@ -1,9 +1,8 @@
 //===---- MipsOs16.cpp for Mips Option -Os16                       --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MipsPreLegalizerCombiner.cpp b/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
index 1cff1c8396ea..85076590d407 100644
--- a/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
+++ b/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
@@ -1,9 +1,8 @@
 //=== lib/CodeGen/GlobalISel/MipsPreLegalizerCombiner.cpp --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,6 +13,7 @@
 
 #include "MipsTargetMachine.h"
 #include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -35,6 +35,16 @@ public:
 bool MipsPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
                                            MachineInstr &MI,
                                            MachineIRBuilder &B) const {
+  CombinerHelper Helper(Observer, B);
+
+  switch (MI.getOpcode()) {
+  default:
+    return false;
+  case TargetOpcode::G_LOAD:
+  case TargetOpcode::G_SEXTLOAD:
+  case TargetOpcode::G_ZEXTLOAD:
+    return Helper.tryCombineExtendingLoads(MI);
+  }
   return false;
 }
 
diff --git a/lib/Target/Mips/MipsRegisterBankInfo.cpp b/lib/Target/Mips/MipsRegisterBankInfo.cpp
index 6af1f10189df..d8bcf16afd50 100644
--- a/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -1,9 +1,8 @@
 //===- MipsRegisterBankInfo.cpp ---------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -11,36 +10,55 @@
 /// \todo This should be generated by TableGen.
 //===----------------------------------------------------------------------===//
 
-#include "MipsInstrInfo.h"
 #include "MipsRegisterBankInfo.h"
+#include "MipsInstrInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 
 #define GET_TARGET_REGBANK_IMPL
 
-#define DEBUG_TYPE "registerbankinfo"
-
 #include "MipsGenRegisterBank.inc"
 
 namespace llvm {
 namespace Mips {
 enum PartialMappingIdx {
   PMI_GPR,
+  PMI_SPR,
+  PMI_DPR,
   PMI_Min = PMI_GPR,
 };
 
 RegisterBankInfo::PartialMapping PartMappings[]{
-    {0, 32, GPRBRegBank}
+    {0, 32, GPRBRegBank},
+    {0, 32, FPRBRegBank},
+    {0, 64, FPRBRegBank}
 };
 
-enum ValueMappingIdx { InvalidIdx = 0, GPRIdx = 1 };
+enum ValueMappingIdx {
+    InvalidIdx = 0,
+    GPRIdx = 1,
+    SPRIdx = 4,
+    DPRIdx = 7
+};
 
 RegisterBankInfo::ValueMapping ValueMappings[] = {
     // invalid
     {nullptr, 0},
-    // 3 operands in GPRs
+    // up to 3 operands in GPRs
     {&PartMappings[PMI_GPR - PMI_Min], 1},
     {&PartMappings[PMI_GPR - PMI_Min], 1},
-    {&PartMappings[PMI_GPR - PMI_Min], 1}};
+    {&PartMappings[PMI_GPR - PMI_Min], 1},
+    // up to 3 ops operands FPRs - single precission
+    {&PartMappings[PMI_SPR - PMI_Min], 1},
+    {&PartMappings[PMI_SPR - PMI_Min], 1},
+    {&PartMappings[PMI_SPR - PMI_Min], 1},
+    // up to 3 ops operands FPRs - double precission
+    {&PartMappings[PMI_DPR - PMI_Min], 1},
+    {&PartMappings[PMI_DPR - PMI_Min], 1},
+    {&PartMappings[PMI_DPR - PMI_Min], 1}
+};
 
 } // end namespace Mips
 } // end namespace llvm
@@ -62,30 +80,313 @@ const RegisterBank &MipsRegisterBankInfo::getRegBankFromRegClass(
   case Mips::GPRMM16MoveP_and_CPU16Regs_and_GPRMM16ZeroRegClassID:
   case Mips::GPRMM16MovePPairFirst_and_GPRMM16MovePPairSecondRegClassID:
   case Mips::SP32RegClassID:
+  case Mips::GP32RegClassID:
     return getRegBank(Mips::GPRBRegBankID);
+  case Mips::FGRCCRegClassID:
+  case Mips::FGR32RegClassID:
+  case Mips::FGR64RegClassID:
+  case Mips::AFGR64RegClassID:
+    return getRegBank(Mips::FPRBRegBankID);
   default:
     llvm_unreachable("Register class not supported");
   }
 }
 
+// Instructions where all register operands are floating point.
+static bool isFloatingPointOpcode(unsigned Opc) {
+  switch (Opc) {
+  case TargetOpcode::G_FCONSTANT:
+  case TargetOpcode::G_FADD:
+  case TargetOpcode::G_FSUB:
+  case TargetOpcode::G_FMUL:
+  case TargetOpcode::G_FDIV:
+  case TargetOpcode::G_FABS:
+  case TargetOpcode::G_FSQRT:
+  case TargetOpcode::G_FCEIL:
+  case TargetOpcode::G_FFLOOR:
+  case TargetOpcode::G_FPEXT:
+  case TargetOpcode::G_FPTRUNC:
+    return true;
+  default:
+    return false;
+  }
+}
+
+// Instructions where use operands are floating point registers.
+// Def operands are general purpose.
+static bool isFloatingPointOpcodeUse(unsigned Opc) {
+  switch (Opc) {
+  case TargetOpcode::G_FPTOSI:
+  case TargetOpcode::G_FPTOUI:
+  case TargetOpcode::G_FCMP:
+  case Mips::MFC1:
+  case Mips::ExtractElementF64:
+  case Mips::ExtractElementF64_64:
+    return true;
+  default:
+    return isFloatingPointOpcode(Opc);
+  }
+}
+
+// Instructions where def operands are floating point registers.
+// Use operands are general purpose.
+static bool isFloatingPointOpcodeDef(unsigned Opc) {
+  switch (Opc) {
+  case TargetOpcode::G_SITOFP:
+  case TargetOpcode::G_UITOFP:
+  case Mips::MTC1:
+  case Mips::BuildPairF64:
+  case Mips::BuildPairF64_64:
+    return true;
+  default:
+    return isFloatingPointOpcode(Opc);
+  }
+}
+
+static bool isAmbiguous(unsigned Opc) {
+  switch (Opc) {
+  case TargetOpcode::G_LOAD:
+  case TargetOpcode::G_STORE:
+  case TargetOpcode::G_PHI:
+  case TargetOpcode::G_SELECT:
+    return true;
+  default:
+    return false;
+  }
+}
+
+void MipsRegisterBankInfo::AmbiguousRegDefUseContainer::addDefUses(
+    Register Reg, const MachineRegisterInfo &MRI) {
+  assert(!MRI.getType(Reg).isPointer() &&
+         "Pointers are gprb, they should not be considered as ambiguous.\n");
+  for (MachineInstr &UseMI : MRI.use_instructions(Reg)) {
+    MachineInstr *NonCopyInstr = skipCopiesOutgoing(&UseMI);
+    // Copy with many uses.
+    if (NonCopyInstr->getOpcode() == TargetOpcode::COPY &&
+        !TargetRegisterInfo::isPhysicalRegister(
+            NonCopyInstr->getOperand(0).getReg()))
+      addDefUses(NonCopyInstr->getOperand(0).getReg(), MRI);
+    else
+      DefUses.push_back(skipCopiesOutgoing(&UseMI));
+  }
+}
+
+void MipsRegisterBankInfo::AmbiguousRegDefUseContainer::addUseDef(
+    Register Reg, const MachineRegisterInfo &MRI) {
+  assert(!MRI.getType(Reg).isPointer() &&
+         "Pointers are gprb, they should not be considered as ambiguous.\n");
+  MachineInstr *DefMI = MRI.getVRegDef(Reg);
+  UseDefs.push_back(skipCopiesIncoming(DefMI));
+}
+
+MachineInstr *
+MipsRegisterBankInfo::AmbiguousRegDefUseContainer::skipCopiesOutgoing(
+    MachineInstr *MI) const {
+  const MachineFunction &MF = *MI->getParent()->getParent();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  MachineInstr *Ret = MI;
+  while (Ret->getOpcode() == TargetOpcode::COPY &&
+         !TargetRegisterInfo::isPhysicalRegister(Ret->getOperand(0).getReg()) &&
+         MRI.hasOneUse(Ret->getOperand(0).getReg())) {
+    Ret = &(*MRI.use_instr_begin(Ret->getOperand(0).getReg()));
+  }
+  return Ret;
+}
+
+MachineInstr *
+MipsRegisterBankInfo::AmbiguousRegDefUseContainer::skipCopiesIncoming(
+    MachineInstr *MI) const {
+  const MachineFunction &MF = *MI->getParent()->getParent();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  MachineInstr *Ret = MI;
+  while (Ret->getOpcode() == TargetOpcode::COPY &&
+         !TargetRegisterInfo::isPhysicalRegister(Ret->getOperand(1).getReg()))
+    Ret = MRI.getVRegDef(Ret->getOperand(1).getReg());
+  return Ret;
+}
+
+MipsRegisterBankInfo::AmbiguousRegDefUseContainer::AmbiguousRegDefUseContainer(
+    const MachineInstr *MI) {
+  assert(isAmbiguous(MI->getOpcode()) &&
+         "Not implemented for non Ambiguous opcode.\n");
+
+  const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
+
+  if (MI->getOpcode() == TargetOpcode::G_LOAD)
+    addDefUses(MI->getOperand(0).getReg(), MRI);
+
+  if (MI->getOpcode() == TargetOpcode::G_STORE)
+    addUseDef(MI->getOperand(0).getReg(), MRI);
+
+  if (MI->getOpcode() == TargetOpcode::G_PHI) {
+    addDefUses(MI->getOperand(0).getReg(), MRI);
+
+    for (unsigned i = 1; i < MI->getNumOperands(); i += 2)
+      addUseDef(MI->getOperand(i).getReg(), MRI);
+  }
+
+  if (MI->getOpcode() == TargetOpcode::G_SELECT) {
+    addDefUses(MI->getOperand(0).getReg(), MRI);
+
+    addUseDef(MI->getOperand(2).getReg(), MRI);
+    addUseDef(MI->getOperand(3).getReg(), MRI);
+  }
+}
+
+bool MipsRegisterBankInfo::TypeInfoForMF::visit(
+    const MachineInstr *MI, const MachineInstr *WaitingForTypeOfMI) {
+  assert(isAmbiguous(MI->getOpcode()) && "Visiting non-Ambiguous opcode.\n");
+  if (wasVisited(MI))
+    return true; // InstType has already been determined for MI.
+
+  startVisit(MI);
+  AmbiguousRegDefUseContainer DefUseContainer(MI);
+
+  // Visit instructions where MI's DEF operands are USED.
+  if (visitAdjacentInstrs(MI, DefUseContainer.getDefUses(), true))
+    return true;
+
+  // Visit instructions that DEFINE MI's USE operands.
+  if (visitAdjacentInstrs(MI, DefUseContainer.getUseDefs(), false))
+    return true;
+
+  // All MI's adjacent instructions, are ambiguous.
+  if (!WaitingForTypeOfMI) {
+    // This is chain of ambiguous instructions.
+    setTypes(MI, InstType::Ambiguous);
+    return true;
+  }
+  // Excluding WaitingForTypeOfMI, MI is either connected to chains of ambiguous
+  // instructions or has no other adjacent instructions. Anyway InstType could
+  // not be determined. There could be unexplored path from some of
+  // WaitingForTypeOfMI's adjacent instructions to an instruction with only one
+  // mapping available.
+  // We are done with this branch, add MI to WaitingForTypeOfMI's WaitingQueue,
+  // this way when WaitingForTypeOfMI figures out its InstType same InstType
+  // will be assigned to all instructions in this branch.
+  addToWaitingQueue(WaitingForTypeOfMI, MI);
+  return false;
+}
+
+bool MipsRegisterBankInfo::TypeInfoForMF::visitAdjacentInstrs(
+    const MachineInstr *MI, SmallVectorImpl<MachineInstr *> &AdjacentInstrs,
+    bool isDefUse) {
+  while (!AdjacentInstrs.empty()) {
+    MachineInstr *AdjMI = AdjacentInstrs.pop_back_val();
+
+    if (isDefUse ? isFloatingPointOpcodeUse(AdjMI->getOpcode())
+                 : isFloatingPointOpcodeDef(AdjMI->getOpcode())) {
+      setTypes(MI, InstType::FloatingPoint);
+      return true;
+    }
+
+    // Determine InstType from register bank of phys register that is
+    // 'isDefUse ? def : use' of this copy.
+    if (AdjMI->getOpcode() == TargetOpcode::COPY) {
+      setTypesAccordingToPhysicalRegister(MI, AdjMI, isDefUse ? 0 : 1);
+      return true;
+    }
+
+    // Defaults to integer instruction. Includes G_MERGE_VALUES and
+    // G_UNMERGE_VALUES.
+    if (!isAmbiguous(AdjMI->getOpcode())) {
+      setTypes(MI, InstType::Integer);
+      return true;
+    }
+
+    // When AdjMI was visited first, MI has to continue to explore remaining
+    // adjacent instructions and determine InstType without visiting AdjMI.
+    if (!wasVisited(AdjMI) ||
+        getRecordedTypeForInstr(AdjMI) != InstType::NotDetermined) {
+      if (visit(AdjMI, MI)) {
+        // InstType is successfully determined and is same as for AdjMI.
+        setTypes(MI, getRecordedTypeForInstr(AdjMI));
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+void MipsRegisterBankInfo::TypeInfoForMF::setTypes(const MachineInstr *MI,
+                                                   InstType InstTy) {
+  changeRecordedTypeForInstr(MI, InstTy);
+  for (const MachineInstr *WaitingInstr : getWaitingQueueFor(MI)) {
+    setTypes(WaitingInstr, InstTy);
+  }
+}
+
+void MipsRegisterBankInfo::TypeInfoForMF::setTypesAccordingToPhysicalRegister(
+    const MachineInstr *MI, const MachineInstr *CopyInst, unsigned Op) {
+  assert((TargetRegisterInfo::isPhysicalRegister(
+             CopyInst->getOperand(Op).getReg())) &&
+         "Copies of non physical registers should not be considered here.\n");
+
+  const MachineFunction &MF = *CopyInst->getMF();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+  const RegisterBankInfo &RBI =
+      *CopyInst->getMF()->getSubtarget().getRegBankInfo();
+  const RegisterBank *Bank =
+      RBI.getRegBank(CopyInst->getOperand(Op).getReg(), MRI, TRI);
+
+  if (Bank == &Mips::FPRBRegBank)
+    setTypes(MI, InstType::FloatingPoint);
+  else if (Bank == &Mips::GPRBRegBank)
+    setTypes(MI, InstType::Integer);
+  else
+    llvm_unreachable("Unsupported register bank.\n");
+}
+
+MipsRegisterBankInfo::InstType
+MipsRegisterBankInfo::TypeInfoForMF::determineInstType(const MachineInstr *MI) {
+  visit(MI, nullptr);
+  return getRecordedTypeForInstr(MI);
+}
+
+void MipsRegisterBankInfo::TypeInfoForMF::cleanupIfNewFunction(
+    llvm::StringRef FunctionName) {
+  if (MFName != FunctionName) {
+    MFName = FunctionName;
+    WaitingQueues.clear();
+    Types.clear();
+  }
+}
+
 const RegisterBankInfo::InstructionMapping &
 MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
 
+  static TypeInfoForMF TI;
+
+  // Reset TI internal data when MF changes.
+  TI.cleanupIfNewFunction(MI.getMF()->getName());
+
   unsigned Opc = MI.getOpcode();
+  const MachineFunction &MF = *MI.getParent()->getParent();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
 
-  const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI);
-  if (Mapping.isValid())
-    return Mapping;
+  if (MI.getOpcode() != TargetOpcode::G_PHI) {
+    const RegisterBankInfo::InstructionMapping &Mapping =
+        getInstrMappingImpl(MI);
+    if (Mapping.isValid())
+      return Mapping;
+  }
 
   using namespace TargetOpcode;
 
   unsigned NumOperands = MI.getNumOperands();
   const ValueMapping *OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx];
+  unsigned MappingID = DefaultMappingID;
+  const unsigned CustomMappingID = 1;
 
   switch (Opc) {
+  case G_TRUNC:
   case G_ADD:
-  case G_LOAD:
-  case G_STORE:
+  case G_SUB:
+  case G_MUL:
+  case G_UMULH:
+  case G_ZEXTLOAD:
+  case G_SEXTLOAD:
   case G_GEP:
   case G_AND:
   case G_OR:
@@ -99,9 +400,183 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   case G_UREM:
     OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx];
     break;
+  case G_LOAD: {
+    unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+    InstType InstTy = InstType::Integer;
+    if (!MRI.getType(MI.getOperand(0).getReg()).isPointer()) {
+      InstTy = TI.determineInstType(&MI);
+    }
+
+    if (InstTy == InstType::FloatingPoint ||
+        (Size == 64 && InstTy == InstType::Ambiguous)) { // fprb
+      OperandsMapping =
+          getOperandsMapping({Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
+                                         : &Mips::ValueMappings[Mips::DPRIdx],
+                              &Mips::ValueMappings[Mips::GPRIdx]});
+      break;
+    } else { // gprb
+      OperandsMapping =
+          getOperandsMapping({Size <= 32 ? &Mips::ValueMappings[Mips::GPRIdx]
+                                         : &Mips::ValueMappings[Mips::DPRIdx],
+                              &Mips::ValueMappings[Mips::GPRIdx]});
+      if (Size == 64)
+        MappingID = CustomMappingID;
+    }
+
+    break;
+  }
+  case G_STORE: {
+    unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+    InstType InstTy = InstType::Integer;
+    if (!MRI.getType(MI.getOperand(0).getReg()).isPointer()) {
+      InstTy = TI.determineInstType(&MI);
+    }
+
+    if (InstTy == InstType::FloatingPoint ||
+        (Size == 64 && InstTy == InstType::Ambiguous)) { // fprb
+      OperandsMapping =
+          getOperandsMapping({Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
+                                         : &Mips::ValueMappings[Mips::DPRIdx],
+                              &Mips::ValueMappings[Mips::GPRIdx]});
+      break;
+    } else { // gprb
+      OperandsMapping =
+          getOperandsMapping({Size <= 32 ? &Mips::ValueMappings[Mips::GPRIdx]
+                                         : &Mips::ValueMappings[Mips::DPRIdx],
+                              &Mips::ValueMappings[Mips::GPRIdx]});
+      if (Size == 64)
+        MappingID = CustomMappingID;
+    }
+    break;
+  }
+  case G_PHI: {
+    unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+    InstType InstTy = InstType::Integer;
+    if (!MRI.getType(MI.getOperand(0).getReg()).isPointer()) {
+      InstTy = TI.determineInstType(&MI);
+    }
+
+    // PHI is copylike and should have one regbank in mapping for def register.
+    if (InstTy == InstType::Integer && Size == 64) { // fprb
+      OperandsMapping =
+          getOperandsMapping({&Mips::ValueMappings[Mips::DPRIdx]});
+      return getInstructionMapping(CustomMappingID, /*Cost=*/1, OperandsMapping,
+                                   /*NumOperands=*/1);
+    }
+    // Use default handling for PHI, i.e. set reg bank of def operand to match
+    // register banks of use operands.
+    const RegisterBankInfo::InstructionMapping &Mapping =
+        getInstrMappingImpl(MI);
+    return Mapping;
+  }
+  case G_SELECT: {
+    unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+    InstType InstTy = InstType::Integer;
+    if (!MRI.getType(MI.getOperand(0).getReg()).isPointer()) {
+      InstTy = TI.determineInstType(&MI);
+    }
+
+    if (InstTy == InstType::FloatingPoint ||
+        (Size == 64 && InstTy == InstType::Ambiguous)) { // fprb
+      const RegisterBankInfo::ValueMapping *Bank =
+          Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
+                     : &Mips::ValueMappings[Mips::DPRIdx];
+      OperandsMapping = getOperandsMapping(
+          {Bank, &Mips::ValueMappings[Mips::GPRIdx], Bank, Bank});
+      break;
+    } else { // gprb
+      const RegisterBankInfo::ValueMapping *Bank =
+          Size <= 32 ? &Mips::ValueMappings[Mips::GPRIdx]
+                     : &Mips::ValueMappings[Mips::DPRIdx];
+      OperandsMapping = getOperandsMapping(
+          {Bank, &Mips::ValueMappings[Mips::GPRIdx], Bank, Bank});
+      if (Size == 64)
+        MappingID = CustomMappingID;
+    }
+    break;
+  }
+  case G_UNMERGE_VALUES: {
+    OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx],
+                                          &Mips::ValueMappings[Mips::GPRIdx],
+                                          &Mips::ValueMappings[Mips::DPRIdx]});
+    MappingID = CustomMappingID;
+    break;
+  }
+  case G_MERGE_VALUES: {
+    OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::DPRIdx],
+                                          &Mips::ValueMappings[Mips::GPRIdx],
+                                          &Mips::ValueMappings[Mips::GPRIdx]});
+    MappingID = CustomMappingID;
+    break;
+  }
+  case G_FADD:
+  case G_FSUB:
+  case G_FMUL:
+  case G_FDIV:
+  case G_FABS:
+  case G_FSQRT:{
+    unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+    assert((Size == 32 || Size == 64) && "Unsupported floating point size");
+    OperandsMapping = Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
+                                 : &Mips::ValueMappings[Mips::DPRIdx];
+    break;
+  }
+  case G_FCONSTANT: {
+    unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+    assert((Size == 32 || Size == 64) && "Unsupported floating point size");
+    const RegisterBankInfo::ValueMapping *FPRValueMapping =
+        Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
+                   : &Mips::ValueMappings[Mips::DPRIdx];
+    OperandsMapping = getOperandsMapping({FPRValueMapping, nullptr});
+    break;
+  }
+  case G_FCMP: {
+    unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+    assert((Size == 32 || Size == 64) && "Unsupported floating point size");
+    const RegisterBankInfo::ValueMapping *FPRValueMapping =
+        Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
+                   : &Mips::ValueMappings[Mips::DPRIdx];
+    OperandsMapping =
+        getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr,
+                            FPRValueMapping, FPRValueMapping});
+    break;
+  }
+  case G_FPEXT:
+    OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::DPRIdx],
+                                          &Mips::ValueMappings[Mips::SPRIdx]});
+    break;
+  case G_FPTRUNC:
+    OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::SPRIdx],
+                                          &Mips::ValueMappings[Mips::DPRIdx]});
+    break;
+  case G_FPTOSI: {
+    unsigned SizeFP = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+    assert((MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 32) &&
+           "Unsupported integer size");
+    assert((SizeFP == 32 || SizeFP == 64) && "Unsupported floating point size");
+    OperandsMapping = getOperandsMapping({
+        &Mips::ValueMappings[Mips::GPRIdx],
+        SizeFP == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
+                     : &Mips::ValueMappings[Mips::DPRIdx],
+    });
+    break;
+  }
+  case G_SITOFP: {
+    unsigned SizeInt = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+    unsigned SizeFP = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+    (void)SizeInt;
+    assert((SizeInt == 32) && "Unsupported integer size");
+    assert((SizeFP == 32 || SizeFP == 64) && "Unsupported floating point size");
+    OperandsMapping =
+        getOperandsMapping({SizeFP == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
+                                         : &Mips::ValueMappings[Mips::DPRIdx],
+                            &Mips::ValueMappings[Mips::GPRIdx]});
+    break;
+  }
   case G_CONSTANT:
   case G_FRAME_INDEX:
   case G_GLOBAL_VALUE:
+  case G_BRCOND:
     OperandsMapping =
         getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr});
     break;
@@ -111,17 +586,92 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
                             &Mips::ValueMappings[Mips::GPRIdx],
                             &Mips::ValueMappings[Mips::GPRIdx]});
     break;
-  case G_SELECT:
-    OperandsMapping =
-        getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx],
-                            &Mips::ValueMappings[Mips::GPRIdx],
-                            &Mips::ValueMappings[Mips::GPRIdx],
-                            &Mips::ValueMappings[Mips::GPRIdx]});
-    break;
   default:
     return getInvalidInstructionMapping();
   }
 
-  return getInstructionMapping(DefaultMappingID, /*Cost=*/1, OperandsMapping,
+  return getInstructionMapping(MappingID, /*Cost=*/1, OperandsMapping,
                                NumOperands);
 }
+
+using InstListTy = GISelWorkList<4>;
+namespace {
+class InstManager : public GISelChangeObserver {
+  InstListTy &InstList;
+
+public:
+  InstManager(InstListTy &Insts) : InstList(Insts) {}
+
+  void createdInstr(MachineInstr &MI) override { InstList.insert(&MI); }
+  void erasingInstr(MachineInstr &MI) override {}
+  void changingInstr(MachineInstr &MI) override {}
+  void changedInstr(MachineInstr &MI) override {}
+};
+} // end anonymous namespace
+
+/// Here we have to narrowScalar s64 operands to s32, combine away
+/// G_MERGE/G_UNMERGE and erase instructions that became dead in the process.
+/// We manually assign 32 bit gprb to register operands of all new instructions
+/// that got created in the process since they will not end up in RegBankSelect
+/// loop. Careful not to delete instruction after MI i.e. MI.getIterator()++.
+void MipsRegisterBankInfo::applyMappingImpl(
+    const OperandsMapper &OpdMapper) const {
+  MachineInstr &MI = OpdMapper.getMI();
+  InstListTy NewInstrs;
+  MachineIRBuilder B(MI);
+  MachineFunction *MF = MI.getMF();
+  MachineRegisterInfo &MRI = OpdMapper.getMRI();
+
+  InstManager NewInstrObserver(NewInstrs);
+  GISelObserverWrapper WrapperObserver(&NewInstrObserver);
+  LegalizerHelper Helper(*MF, WrapperObserver, B);
+  LegalizationArtifactCombiner ArtCombiner(
+      B, MF->getRegInfo(), *MF->getSubtarget().getLegalizerInfo());
+
+  switch (MI.getOpcode()) {
+  case TargetOpcode::G_LOAD:
+  case TargetOpcode::G_STORE:
+  case TargetOpcode::G_PHI:
+  case TargetOpcode::G_SELECT: {
+    Helper.narrowScalar(MI, 0, LLT::scalar(32));
+    // Handle new instructions.
+    while (!NewInstrs.empty()) {
+      MachineInstr *NewMI = NewInstrs.pop_back_val();
+      // This is new G_UNMERGE that was created during narrowScalar and will
+      // not be considered for regbank selection. RegBankSelect for mips
+      // visits/makes corresponding G_MERGE first. Combine them here.
+      if (NewMI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
+        SmallVector<MachineInstr *, 2> DeadInstrs;
+        ArtCombiner.tryCombineMerges(*NewMI, DeadInstrs);
+        for (MachineInstr *DeadMI : DeadInstrs)
+          DeadMI->eraseFromParent();
+      }
+      // This G_MERGE will be combined away when its corresponding G_UNMERGE
+      // gets regBankSelected.
+      else if (NewMI->getOpcode() == TargetOpcode::G_MERGE_VALUES)
+        continue;
+      else
+        // Manually set register banks for all register operands to 32 bit gprb.
+        for (auto Op : NewMI->operands()) {
+          if (Op.isReg()) {
+            assert(MRI.getType(Op.getReg()).getSizeInBits() == 32 &&
+                   "Only 32 bit gprb is handled here.\n");
+            MRI.setRegBank(Op.getReg(), getRegBank(Mips::GPRBRegBankID));
+          }
+        }
+    }
+    return;
+  }
+  case TargetOpcode::G_UNMERGE_VALUES: {
+    SmallVector<MachineInstr *, 2> DeadInstrs;
+    ArtCombiner.tryCombineMerges(MI, DeadInstrs);
+    for (MachineInstr *DeadMI : DeadInstrs)
+      DeadMI->eraseFromParent();
+    return;
+  }
+  default:
+    break;
+  }
+
+  return applyDefaultMapping(OpdMapper);
+}
diff --git a/lib/Target/Mips/MipsRegisterBankInfo.h b/lib/Target/Mips/MipsRegisterBankInfo.h
index 64a79abaa74d..176813c031ed 100644
--- a/lib/Target/Mips/MipsRegisterBankInfo.h
+++ b/lib/Target/Mips/MipsRegisterBankInfo.h
@@ -1,9 +1,8 @@
 //===- MipsRegisterBankInfo.h -----------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -38,6 +37,131 @@ public:
 
   const InstructionMapping &
   getInstrMapping(const MachineInstr &MI) const override;
+
+  void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
+
+private:
+  /// Some instructions are used with both floating point and integer operands.
+  /// We assign InstType to such instructions as it helps us to avoid cross bank
+  /// copies. InstType deppends on context.
+  enum InstType {
+    /// Temporary type, when visit(..., nullptr) finishes will convert to one of
+    /// the remaining types: Integer, FloatingPoint or Ambiguous.
+    NotDetermined,
+    /// Connected with instruction that interprets 'bags of bits' as integers.
+    /// Select gprb to avoid cross bank copies.
+    Integer,
+    /// Connected with instruction that interprets 'bags of bits' as floating
+    /// point numbers. Select fprb to avoid cross bank copies.
+    FloatingPoint,
+    /// Represents moving 'bags of bits' around. Select same bank for entire
+    /// chain to avoid cross bank copies. Currently we select fprb for s64 and
+    /// gprb for s32 Ambiguous operands.
+    Ambiguous
+  };
+
+  /// Some generic instructions have operands that can be mapped to either fprb
+  /// or gprb e.g. for G_LOAD we consider only operand 0 as ambiguous, operand 1
+  /// is always gprb since it is a pointer.
+  /// This class provides containers for MI's ambiguous:
+  /// DefUses : MachineInstrs that use one of MI's ambiguous def operands.
+  /// UseDefs : MachineInstrs that define MI's ambiguous use operands.
+  class AmbiguousRegDefUseContainer {
+    SmallVector<MachineInstr *, 2> DefUses;
+    SmallVector<MachineInstr *, 2> UseDefs;
+
+    void addDefUses(Register Reg, const MachineRegisterInfo &MRI);
+    void addUseDef(Register Reg, const MachineRegisterInfo &MRI);
+
+    /// Skip copy instructions until we get to a non-copy instruction or to a
+    /// copy with phys register as def. Used during search for DefUses.
+    /// MI :  %5 = COPY %4
+    ///       %6 = COPY %5
+    ///       $v0 = COPY %6 <- we want this one.
+    MachineInstr *skipCopiesOutgoing(MachineInstr *MI) const;
+
+    /// Skip copy instructions until we get to a non-copy instruction or to a
+    /// copy with phys register as use. Used during search for UseDefs.
+    ///       %1 = COPY $a1 <- we want this one.
+    ///       %2 = COPY %1
+    /// MI =  %3 = COPY %2
+    MachineInstr *skipCopiesIncoming(MachineInstr *MI) const;
+
+  public:
+    AmbiguousRegDefUseContainer(const MachineInstr *MI);
+    SmallVectorImpl<MachineInstr *> &getDefUses() { return DefUses; }
+    SmallVectorImpl<MachineInstr *> &getUseDefs() { return UseDefs; }
+  };
+
+  class TypeInfoForMF {
+    /// MachineFunction name is used to recognise when MF changes.
+    std::string MFName = "";
+    /// <key, value> : value is vector of all MachineInstrs that are waiting for
+    /// key to figure out type of some of its ambiguous operands.
+    DenseMap<const MachineInstr *, SmallVector<const MachineInstr *, 2>>
+        WaitingQueues;
+    /// Recorded InstTypes for visited instructions.
+    DenseMap<const MachineInstr *, InstType> Types;
+
+    /// Recursively visit MI's adjacent instructions and find MI's InstType.
+    bool visit(const MachineInstr *MI, const MachineInstr *WaitingForTypeOfMI);
+
+    /// Visit MI's adjacent UseDefs or DefUses.
+    bool visitAdjacentInstrs(const MachineInstr *MI,
+                             SmallVectorImpl<MachineInstr *> &AdjacentInstrs,
+                             bool isDefUse);
+
+    /// Set type for MI, and recursively for all instructions that are
+    /// waiting for MI's type.
+    void setTypes(const MachineInstr *MI, InstType ITy);
+
+    /// InstType for MI is determined, set it to InstType that corresponds to
+    /// physical regisiter that is operand number Op in CopyInst.
+    void setTypesAccordingToPhysicalRegister(const MachineInstr *MI,
+                                             const MachineInstr *CopyInst,
+                                             unsigned Op);
+
+    /// Set default values for MI in order to start visit.
+    void startVisit(const MachineInstr *MI) {
+      Types.try_emplace(MI, InstType::NotDetermined);
+      WaitingQueues.try_emplace(MI);
+    }
+
+    /// Returns true if instruction was already visited. Type might not be
+    /// determined at this point but will be when visit(..., nullptr) finishes.
+    bool wasVisited(const MachineInstr *MI) const { return Types.count(MI); };
+
+    /// Returns recorded type for instruction.
+    const InstType &getRecordedTypeForInstr(const MachineInstr *MI) const {
+      assert(wasVisited(MI) && "Instruction was not visited!");
+      return Types.find(MI)->getSecond();
+    };
+
+    /// Change recorded type for instruction.
+    void changeRecordedTypeForInstr(const MachineInstr *MI, InstType InstTy) {
+      assert(wasVisited(MI) && "Instruction was not visited!");
+      Types.find(MI)->getSecond() = InstTy;
+    };
+
+    /// Returns WaitingQueue for instruction.
+    const SmallVectorImpl<const MachineInstr *> &
+    getWaitingQueueFor(const MachineInstr *MI) const {
+      assert(WaitingQueues.count(MI) && "Instruction was not visited!");
+      return WaitingQueues.find(MI)->getSecond();
+    };
+
+    /// Add WaitingForMI to MI's WaitingQueue.
+    void addToWaitingQueue(const MachineInstr *MI,
+                           const MachineInstr *WaitingForMI) {
+      assert(WaitingQueues.count(MI) && "Instruction was not visited!");
+      WaitingQueues.find(MI)->getSecond().push_back(WaitingForMI);
+    };
+
+  public:
+    InstType determineInstType(const MachineInstr *MI);
+
+    void cleanupIfNewFunction(llvm::StringRef FunctionName);
+  };
 };
 } // end namespace llvm
 #endif
diff --git a/lib/Target/Mips/MipsRegisterBanks.td b/lib/Target/Mips/MipsRegisterBanks.td
index 5f1687048fac..14a0181f8f11 100644
--- a/lib/Target/Mips/MipsRegisterBanks.td
+++ b/lib/Target/Mips/MipsRegisterBanks.td
@@ -1,9 +1,8 @@
 //===- MipsRegisterBank.td ---------------------------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -11,3 +10,5 @@
 //===----------------------------------------------------------------------===//
 
 def GPRBRegBank : RegisterBank<"GPRB", [GPR32]>;
+
+def FPRBRegBank : RegisterBank<"FPRB", [FGR64, AFGR64]>;
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 3c108c2ba9b7..7b02d126eb28 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===- MipsRegisterInfo.cpp - MIPS Register Information -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -160,8 +159,6 @@ getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
   const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
 
-  using RegIter = TargetRegisterClass::const_iterator;
-
   for (unsigned I = 0; I < array_lengthof(ReservedGPR32); ++I)
     Reserved.set(ReservedGPR32[I]);
 
@@ -183,14 +180,12 @@ getReservedRegs(const MachineFunction &MF) const {
 
   if (Subtarget.isFP64bit()) {
     // Reserve all registers in AFGR64.
-    for (RegIter Reg = Mips::AFGR64RegClass.begin(),
-         EReg = Mips::AFGR64RegClass.end(); Reg != EReg; ++Reg)
-      Reserved.set(*Reg);
+    for (MCPhysReg Reg : Mips::AFGR64RegClass)
+      Reserved.set(Reg);
   } else {
     // Reserve all registers in FGR64.
-    for (RegIter Reg = Mips::FGR64RegClass.begin(),
-         EReg = Mips::FGR64RegClass.end(); Reg != EReg; ++Reg)
-      Reserved.set(*Reg);
+    for (MCPhysReg Reg : Mips::FGR64RegClass)
+      Reserved.set(Reg);
   }
   // Reserve FP if this function should have a dedicated frame pointer register.
   if (Subtarget.getFrameLowering()->hasFP(MF)) {
@@ -222,14 +217,8 @@ getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(Mips::DSPOutFlag);
 
   // Reserve MSA control registers.
-  Reserved.set(Mips::MSAIR);
-  Reserved.set(Mips::MSACSR);
-  Reserved.set(Mips::MSAAccess);
-  Reserved.set(Mips::MSASave);
-  Reserved.set(Mips::MSAModify);
-  Reserved.set(Mips::MSARequest);
-  Reserved.set(Mips::MSAMap);
-  Reserved.set(Mips::MSAUnmap);
+  for (MCPhysReg Reg : Mips::MSACtrlRegClass)
+    Reserved.set(Reg);
 
   // Reserve RA if in mips16 mode.
   if (Subtarget.inMips16Mode()) {
@@ -248,11 +237,6 @@ getReservedRegs(const MachineFunction &MF) const {
     Reserved.set(Mips::GP_64);
   }
 
-  if (Subtarget.isABI_O32() && !Subtarget.useOddSPReg()) {
-    for (const auto &Reg : Mips::OddSPRegClass)
-      Reserved.set(Reg);
-  }
-
   return Reserved;
 }
 
@@ -293,7 +277,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
   eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset);
 }
 
-unsigned MipsRegisterInfo::
+Register MipsRegisterInfo::
 getFrameRegister(const MachineFunction &MF) const {
   const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
   const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
@@ -322,8 +306,8 @@ bool MipsRegisterInfo::canRealignStack(const MachineFunction &MF) const {
   unsigned FP = Subtarget.isGP32bit() ? Mips::FP : Mips::FP_64;
   unsigned BP = Subtarget.isGP32bit() ? Mips::S7 : Mips::S7_64;
 
-  // Support dynamic stack realignment only for targets with standard encoding.
-  if (!Subtarget.hasStandardEncoding())
+  // Support dynamic stack realignment for all targets except Mips16.
+  if (Subtarget.inMips16Mode())
     return false;
 
   // We can't perform dynamic stack realignment if we can't reserve the
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index b84aaad05eb5..4ed32b09718b 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -1,9 +1,8 @@
 //===- MipsRegisterInfo.h - Mips Register Information Impl ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -70,7 +69,7 @@ public:
   bool canRealignStack(const MachineFunction &MF) const override;
 
   /// Debug information queries.
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
+  Register getFrameRegister(const MachineFunction &MF) const override;
 
   /// Return GPR register class.
   virtual const TargetRegisterClass *intRegClass(unsigned Size) const = 0;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index a943a0ad4094..8a6279da46b7 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -1,9 +1,8 @@
 //===-- MipsRegisterInfo.td - Mips Register defs -----------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -259,6 +258,11 @@ let Namespace = "Mips" in {
   def MSARequest : MipsReg<5, "5">;
   def MSAMap     : MipsReg<6, "6">;
   def MSAUnmap   : MipsReg<7, "7">;
+  // MSA-ASE fake control registers.
+  // These registers do not exist, but instructions like `cfcmsa`
+  // and `ctcmsa` allows to specify them.
+  foreach I = 8-31 in
+  def MSA#I : MipsReg<#I, ""#I>;
 
   // Octeon multiplier and product registers
   def MPL0 : MipsReg<0, "mpl0">;
@@ -383,10 +387,14 @@ def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>, Unallocatable;
 // 32bit fp:
 // * FGR32 - 16 32-bit even registers
 // * FGR32 - 32 32-bit registers (single float only mode)
-def FGR32 : RegisterClass<"Mips", [f32], 32, (sequence "F%u", 0, 31)>;
-
-def FGRH32 : RegisterClass<"Mips", [f32], 32, (sequence "F_HI%u", 0, 31)>,
-             Unallocatable;
+def FGR32 : RegisterClass<"Mips", [f32], 32, (sequence "F%u", 0, 31)> {
+  // Do not allocate odd registers when given -mattr=+nooddspreg.
+  let AltOrders = [(decimate FGR32, 2)];
+  let AltOrderSelect = [{
+    const auto & S = MF.getSubtarget<MipsSubtarget>();
+    return S.isABI_O32() && !S.useOddSPReg();
+  }];
+}
 
 def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
   // Return Values and Arguments
@@ -400,16 +408,14 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
   // Callee save
   D10, D11, D12, D13, D14, D15)>;
 
-def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>;
-
-// Used to reserve odd registers when given -mattr=+nooddspreg
-// FIXME: Remove double precision registers from this set.
-def OddSP : RegisterClass<"Mips", [f32], 32,
-                          (add (decimate (sequence "F%u", 1, 31), 2),
-                               (decimate (sequence "F_HI%u", 1, 31), 2),
-                               (decimate (sequence "D%u", 1, 15), 2),
-                               (decimate (sequence "D%u_64", 1, 31), 2))>,
-            Unallocatable;
+def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)> {
+  // Do not allocate odd registers when given -mattr=+nooddspreg.
+  let AltOrders = [(decimate FGR64, 2)];
+  let AltOrderSelect = [{
+    const auto & S = MF.getSubtarget<MipsSubtarget>();
+    return S.isABI_O32() && !S.useOddSPReg();
+  }];
+}
 
 // FP control registers.
 def CCR : RegisterClass<"Mips", [i32], 32, (sequence "FCR%u", 0, 31)>,
@@ -437,7 +443,8 @@ def MSA128WEvens: RegisterClass<"Mips", [v4i32, v4f32], 128,
                                 (decimate (sequence "W%u", 0, 31), 2)>;
 
 def MSACtrl: RegisterClass<"Mips", [i32], 32, (add
-  MSAIR, MSACSR, MSAAccess, MSASave, MSAModify, MSARequest, MSAMap, MSAUnmap)>;
+  MSAIR, MSACSR, MSAAccess, MSASave, MSAModify, MSARequest, MSAMap, MSAUnmap,
+  (sequence "MSA%u", 8, 31))>, Unallocatable;
 
 // Hi/Lo Registers
 def LO32 : RegisterClass<"Mips", [i32], 32, (add LO0)>;
@@ -591,11 +598,6 @@ def StrictlyFGR32AsmOperand : MipsAsmRegOperand {
   let PredicateMethod = "isStrictlyFGRAsmReg";
 }
 
-def FGRH32AsmOperand : MipsAsmRegOperand {
-  let Name = "FGRH32AsmReg";
-  let PredicateMethod = "isFGRAsmReg";
-}
-
 def FCCRegsAsmOperand : MipsAsmRegOperand {
   let Name = "FCCAsmReg";
 }
@@ -703,10 +705,6 @@ def FGRCCOpnd : RegisterOperand<FGRCC> {
   let ParserMatchClass = FGR32AsmOperand;
 }
 
-def FGRH32Opnd : RegisterOperand<FGRH32> {
-  let ParserMatchClass = FGRH32AsmOperand;
-}
-
 def FCCRegsOpnd : RegisterOperand<FCC> {
   let ParserMatchClass = FCCRegsAsmOperand;
 }
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index ef1b3c09bdc4..4c6cc1ef771c 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -1,9 +1,8 @@
 //===- MipsSEFrameLowering.cpp - Mips32/64 Frame Information --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index cb2119d6880b..78ffe161d9c6 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -1,9 +1,8 @@
 //===- MipsSEFrameLowering.h - Mips32/64 frame lowering ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index cf196b597278..703f99f37dd1 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
 //===-- MipsSEISelDAGToDAG.cpp - A Dag to Dag Inst Selector for MipsSE ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -76,18 +75,8 @@ void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
 }
 
 unsigned MipsSEDAGToDAGISel::getMSACtrlReg(const SDValue RegIdx) const {
-  switch (cast<ConstantSDNode>(RegIdx)->getZExtValue()) {
-  default:
-    llvm_unreachable("Could not map int to register");
-  case 0: return Mips::MSAIR;
-  case 1: return Mips::MSACSR;
-  case 2: return Mips::MSAAccess;
-  case 3: return Mips::MSASave;
-  case 4: return Mips::MSAModify;
-  case 5: return Mips::MSARequest;
-  case 6: return Mips::MSAMap;
-  case 7: return Mips::MSAUnmap;
-  }
+  uint64_t RegNum = cast<ConstantSDNode>(RegIdx)->getZExtValue();
+  return Mips::MSACtrlRegClass.getRegister(RegNum);
 }
 
 bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
@@ -135,97 +124,8 @@ bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
   return true;
 }
 
-void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
-  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-
-  if (!MipsFI->globalBaseRegSet())
-    return;
-
-  MachineBasicBlock &MBB = MF.front();
-  MachineBasicBlock::iterator I = MBB.begin();
-  MachineRegisterInfo &RegInfo = MF.getRegInfo();
-  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
-  DebugLoc DL;
-  unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
-  const TargetRegisterClass *RC;
-  const MipsABIInfo &ABI = static_cast<const MipsTargetMachine &>(TM).getABI();
-  RC = (ABI.IsN64()) ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
-
-  V0 = RegInfo.createVirtualRegister(RC);
-  V1 = RegInfo.createVirtualRegister(RC);
-
-  if (ABI.IsN64()) {
-    MF.getRegInfo().addLiveIn(Mips::T9_64);
-    MBB.addLiveIn(Mips::T9_64);
-
-    // lui $v0, %hi(%neg(%gp_rel(fname)))
-    // daddu $v1, $v0, $t9
-    // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
-    const GlobalValue *FName = &MF.getFunction();
-    BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
-      .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
-    BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
-      .addReg(Mips::T9_64);
-    BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
-      .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
-    return;
-  }
-
-  if (!MF.getTarget().isPositionIndependent()) {
-    // Set global register to __gnu_local_gp.
-    //
-    // lui   $v0, %hi(__gnu_local_gp)
-    // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
-    BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
-      .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
-    BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
-      .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
-    return;
-  }
-
-  MF.getRegInfo().addLiveIn(Mips::T9);
-  MBB.addLiveIn(Mips::T9);
-
-  if (ABI.IsN32()) {
-    // lui $v0, %hi(%neg(%gp_rel(fname)))
-    // addu $v1, $v0, $t9
-    // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
-    const GlobalValue *FName = &MF.getFunction();
-    BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
-      .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
-    BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
-    BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
-      .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
-    return;
-  }
-
-  assert(ABI.IsO32());
-
-  // For O32 ABI, the following instruction sequence is emitted to initialize
-  // the global base register:
-  //
-  //  0. lui   $2, %hi(_gp_disp)
-  //  1. addiu $2, $2, %lo(_gp_disp)
-  //  2. addu  $globalbasereg, $2, $t9
-  //
-  // We emit only the last instruction here.
-  //
-  // GNU linker requires that the first two instructions appear at the beginning
-  // of a function and no instructions be inserted before or between them.
-  // The two instructions are emitted during lowering to MC layer in order to
-  // avoid any reordering.
-  //
-  // Register $2 (Mips::V0) is added to the list of live-in registers to ensure
-  // the value instruction 1 (addiu) defines is valid when instruction 2 (addu)
-  // reads it.
-  MF.getRegInfo().addLiveIn(Mips::V0);
-  MBB.addLiveIn(Mips::V0);
-  BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
-    .addReg(Mips::V0).addReg(Mips::T9);
-}
-
 void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
-  initGlobalBaseReg(MF);
+  MF.getInfo<MipsFunctionInfo>()->initGlobalBaseReg();
 
   MachineRegisterInfo *MRI = &MF.getRegInfo();
 
@@ -1337,6 +1237,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
     OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
     return false;
   case InlineAsm::Constraint_m:
+  case InlineAsm::Constraint_o:
     if (selectAddrRegImm16(Op, Base, Offset)) {
       OutOps.push_back(Base);
       OutOps.push_back(Offset);
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index eb3657aae050..ce594e1fb4fa 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -1,9 +1,8 @@
 //===-- MipsSEISelDAGToDAG.h - A Dag to Dag Inst Selector for MipsSE -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -131,10 +130,6 @@ private:
 
   void processFunctionAfterISel(MachineFunction &MF) override;
 
-  // Insert instructions to initialize the global base register in the
-  // first MBB of the function.
-  void initGlobalBaseReg(MachineFunction &MF);
-
   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
                                     unsigned ConstraintID,
                                     std::vector<SDValue> &OutOps) override;
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index a78e544c35f0..edf57a3840d1 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1,9 +1,8 @@
 //===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -214,6 +213,11 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
 
+  if (Subtarget.hasMips32r2() && !Subtarget.useSoftFloat() &&
+      !Subtarget.hasMips64()) {
+    setOperationAction(ISD::BITCAST, MVT::i64, Custom);
+  }
+
   if (NoDPLoadStore) {
     setOperationAction(ISD::LOAD, MVT::f64, Custom);
     setOperationAction(ISD::STORE, MVT::f64, Custom);
@@ -415,11 +419,8 @@ SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
                      Op->getOperand(2));
 }
 
-bool
-MipsSETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
-                                                     unsigned,
-                                                     unsigned,
-                                                     bool *Fast) const {
+bool MipsSETargetLowering::allowsMisalignedMemoryAccesses(
+    EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const {
   MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
 
   if (Subtarget.systemSupportsUnalignedAccess()) {
@@ -463,6 +464,7 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
   case ISD::BUILD_VECTOR:       return lowerBUILD_VECTOR(Op, DAG);
   case ISD::VECTOR_SHUFFLE:     return lowerVECTOR_SHUFFLE(Op, DAG);
   case ISD::SELECT:             return lowerSELECT(Op, DAG);
+  case ISD::BITCAST:            return lowerBITCAST(Op, DAG);
   }
 
   return MipsTargetLowering::LowerOperation(Op, DAG);
@@ -714,8 +716,31 @@ static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
                                                SelectionDAG &DAG,
                                                const MipsSubtarget &Subtarget) {
   // Estimate the number of operations the below transform will turn a
-  // constant multiply into. The number is approximately how many powers
-  // of two summed together that the constant can be broken down into.
+  // constant multiply into. The number is approximately equal to the minimal
+  // number of powers of two that constant can be broken down to by adding
+  // or subtracting them.
+  //
+  // If we have taken more than 12[1] / 8[2] steps to attempt the
+  // optimization for a native sized value, it is more than likely that this
+  // optimization will make things worse.
+  //
+  // [1] MIPS64 requires 6 instructions at most to materialize any constant,
+  //     multiplication requires at least 4 cycles, but another cycle (or two)
+  //     to retrieve the result from the HI/LO registers.
+  //
+  // [2] For MIPS32, more than 8 steps is expensive as the constant could be
+  //     materialized in 2 instructions, multiplication requires at least 4
+  //     cycles, but another cycle (or two) to retrieve the result from the
+  //     HI/LO registers.
+  //
+  // TODO:
+  // - MaxSteps needs to consider the `VT` of the constant for the current
+  //   target.
+  // - Consider to perform this optimization after type legalization.
+  //   That allows to remove a workaround for types not supported natively.
+  // - Take in account `-Os, -Oz` flags because this optimization
+  //   increases code size.
+  unsigned MaxSteps = Subtarget.isABI_O32() ? 8 : 12;
 
   SmallVector<APInt, 16> WorkStack(1, C);
   unsigned Steps = 0;
@@ -727,6 +752,9 @@ static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
     if (Val == 0 || Val == 1)
       continue;
 
+    if (Steps >= MaxSteps)
+      return false;
+
     if (Val.isPowerOf2()) {
       ++Steps;
       continue;
@@ -735,36 +763,15 @@ static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
     APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
     APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0)
                                   : APInt(BitWidth, 1) << C.ceilLogBase2();
-
     if ((Val - Floor).ule(Ceil - Val)) {
       WorkStack.push_back(Floor);
       WorkStack.push_back(Val - Floor);
-      ++Steps;
-      continue;
+    } else {
+      WorkStack.push_back(Ceil);
+      WorkStack.push_back(Ceil - Val);
     }
 
-    WorkStack.push_back(Ceil);
-    WorkStack.push_back(Ceil - Val);
     ++Steps;
-
-    // If we have taken more than 12[1] / 8[2] steps to attempt the
-    // optimization for a native sized value, it is more than likely that this
-    // optimization will make things worse.
-    //
-    // [1] MIPS64 requires 6 instructions at most to materialize any constant,
-    //     multiplication requires at least 4 cycles, but another cycle (or two)
-    //     to retrieve the result from the HI/LO registers.
-    //
-    // [2] For MIPS32, more than 8 steps is expensive as the constant could be
-    //     materialized in 2 instructions, multiplication requires at least 4
-    //     cycles, but another cycle (or two) to retrieve the result from the
-    //     HI/LO registers.
-
-    if (Steps > 12 && (Subtarget.isABI_N32() || Subtarget.isABI_N64()))
-      return false;
-
-    if (Steps > 8 && Subtarget.isABI_O32())
-      return false;
   }
 
   // If the value being multiplied is not supported natively, we have to pay
@@ -1221,6 +1228,36 @@ SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
                       Nd.getMemOperand()->getFlags(), Nd.getAAInfo());
 }
 
+SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  MVT Src = Op.getOperand(0).getValueType().getSimpleVT();
+  MVT Dest = Op.getValueType().getSimpleVT();
+
+  // Bitcast i64 to double.
+  if (Src == MVT::i64 && Dest == MVT::f64) {
+    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
+                             Op.getOperand(0), DAG.getIntPtrConstant(0, DL));
+    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
+                             Op.getOperand(0), DAG.getIntPtrConstant(1, DL));
+    return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
+  }
+
+  // Bitcast double to i64.
+  if (Src == MVT::f64 && Dest == MVT::i64) {
+    SDValue Lo =
+        DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
+                    DAG.getConstant(0, DL, MVT::i32));
+    SDValue Hi =
+        DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
+                    DAG.getConstant(1, DL, MVT::i32));
+    return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
+  }
+
+  // Skip other cases of bitcast and use default lowering.
+  return SDValue();
+}
+
 SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
                                           bool HasLo, bool HasHi,
                                           SelectionDAG &DAG) const {
@@ -1379,9 +1416,10 @@ static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
 
 static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG,
                                 bool IsSigned = false) {
+  auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
   return DAG.getConstant(
       APInt(Op->getValueType(0).getScalarType().getSizeInBits(),
-            Op->getConstantOperandVal(ImmOp), IsSigned),
+            IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
       SDLoc(Op), Op->getValueType(0));
 }
 
@@ -3725,8 +3763,8 @@ MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI,
 
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL = MI.getDebugLoc();
-  unsigned Fd = MI.getOperand(0).getReg();
-  unsigned Ws = MI.getOperand(1).getReg();
+  Register Fd = MI.getOperand(0).getReg();
+  Register Ws = MI.getOperand(1).getReg();
 
   MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
   const TargetRegisterClass *GPRRC =
@@ -3734,10 +3772,10 @@ MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI,
   unsigned MTC1Opc = IsFGR64onMips64
                          ? Mips::DMTC1
                          : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1);
-  unsigned COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W;
+  Register COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W;
 
-  unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
-  unsigned WPHI = Wtemp;
+  Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+  Register WPHI = Wtemp;
 
   BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws);
   if (IsFGR64) {
@@ -3746,15 +3784,15 @@ MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI,
   }
 
   // Perform the safety regclass copy mentioned above.
-  unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC);
-  unsigned FPRPHI = IsFGR64onMips32
+  Register Rtemp = RegInfo.createVirtualRegister(GPRRC);
+  Register FPRPHI = IsFGR64onMips32
                         ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass)
                         : Fd;
   BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0);
   BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp);
 
   if (IsFGR64onMips32) {
-    unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
+    Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
     BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2)
         .addReg(WPHI)
         .addImm(1);
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
index 761ff3b1fa4d..433d019332cf 100644
--- a/lib/Target/Mips/MipsSEISelLowering.h
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -1,9 +1,8 @@
 //===- MipsSEISelLowering.h - MipsSE DAG Lowering Interface -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -41,9 +40,10 @@ class TargetRegisterClass;
     void addMSAFloatType(MVT::SimpleValueType Ty,
                          const TargetRegisterClass *RC);
 
-    bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS = 0,
-                                        unsigned Align = 1,
-                                        bool *Fast = nullptr) const override;
+    bool allowsMisalignedMemoryAccesses(
+        EVT VT, unsigned AS = 0, unsigned Align = 1,
+        MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+        bool *Fast = nullptr) const override;
 
     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 
@@ -73,6 +73,7 @@ class TargetRegisterClass;
 
     SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi,
                         SelectionDAG &DAG) const;
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index c7ab90ed2a3b..4e49f5e7d9d1 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -1,9 +1,8 @@
 //===-- MipsSEInstrInfo.cpp - Mips32/64 Instruction Information -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,7 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MipsSEInstrInfo.h"
-#include "InstPrinter/MipsInstPrinter.h"
+#include "MCTargetDesc/MipsInstPrinter.h"
 #include "MipsAnalyzeImmediate.h"
 #include "MipsMachineFunction.h"
 #include "MipsTargetMachine.h"
@@ -447,6 +446,9 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   case Mips::PseudoMTLOHI_DSP:
     expandPseudoMTLoHi(MBB, MI, Mips::MTLO_DSP, Mips::MTHI_DSP, true);
     break;
+  case Mips::PseudoMTLOHI_MM:
+    expandPseudoMTLoHi(MBB, MI, Mips::MTLO_MM, Mips::MTHI_MM, false);
+    break;
   case Mips::PseudoCVT_S_W:
     expandCvtFPInt(MBB, MI, Mips::CVT_S_W, Mips::MTC1, false);
     break;
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index fce0fe5f58ad..3111d1c21a0a 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -1,9 +1,8 @@
 //===-- MipsSEInstrInfo.h - Mips32/64 Instruction Information ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index e7d720a4b769..f4b164d5c0ab 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===-- MipsSERegisterInfo.cpp - MIPS32/64 Register Information -== -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h
index ebae1909d233..82ddf40f56a7 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.h
+++ b/lib/Target/Mips/MipsSERegisterInfo.h
@@ -1,9 +1,8 @@
 //===-- MipsSERegisterInfo.h - Mips32/64 Register Information ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td
index 410fa655a225..0c0ddeab22c4 100644
--- a/lib/Target/Mips/MipsSchedule.td
+++ b/lib/Target/Mips/MipsSchedule.td
@@ -1,9 +1,8 @@
 //===-- MipsSchedule.td - Mips Scheduling Definitions ------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Mips/MipsScheduleGeneric.td b/lib/Target/Mips/MipsScheduleGeneric.td
index 80ffe7ada7c8..e8a0a30b8e9b 100644
--- a/lib/Target/Mips/MipsScheduleGeneric.td
+++ b/lib/Target/Mips/MipsScheduleGeneric.td
@@ -1,9 +1,8 @@
 //=- MipsScheduleGeneric.td - Generic Scheduling Definitions -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -25,11 +24,11 @@ def MipsGenericModel : SchedMachineModel {
   int HighLatency = 37;
   list<Predicate> UnsupportedFeatures = [];
 
-  let CompleteModel = 0;
+  let CompleteModel = 1;
   let PostRAScheduler = 1;
 
   // FIXME: Remove when all errors have been fixed.
-  let FullInstRWOverlapCheck = 0;
+  let FullInstRWOverlapCheck = 1;
 }
 
 let SchedModel = MipsGenericModel in {
@@ -42,35 +41,122 @@ def GenericIssueALU : ProcResource<1> { let Super = GenericALU; }
 
 def GenericWriteALU : SchedWriteRes<[GenericIssueALU]>;
 
-// and, lui, nor, or, slti, sltiu, sub, subu, xor
-// add, addi, addiu, addu, andi, ori, rotr, se[bh], sllv?, sr[al]v?, slt, sltu,
-// xori
-def : ItinRW<[GenericWriteALU], [II_ADD, II_ADDU, II_ADDI, II_ADDIU, II_ANDI,
-                                 II_AND, II_ANDI, II_CLO, II_CLZ, II_EXT,
-                                 II_INS, II_LUI, II_MULT, II_MULTU, II_NOR,
-                                 II_ORI, II_OR, II_ROTR, II_ROTRV, II_SEB,
-                                 II_SEH, II_SLTI_SLTIU, II_SLT_SLTU, II_SLL,
-                                 II_SRA, II_SRL, II_SLLV, II_SRAV, II_SRLV,
-                                 II_SSNOP, II_SUB, II_SUBU, II_WSBH, II_XOR,
-                                 II_XORI]>;
+// add, addi, addiu, addu, and, andi, clo, clz, ext, ins, lui, nor, or, ori,
+// rotr, rotrv, seb, seh, sll, sllv, slt, slti, sltiu, sltu, sra, srav, srl,
+// srlv, ssnop, sub, subu, wsbh, xor, xori
+def : InstRW<[GenericWriteALU], (instrs ADD, ADDi, ADDiu, ADDu, AND, ANDi,
+                                 CLO, CLZ, EXT, INS, LEA_ADDiu, LUi, NOP,
+                                 NOR, OR, ORi, ROTR, ROTRV, SEB, SEH, SLL,
+                                 SLLV, SLT, SLTi, SLTiu, SLTu, SRA, SRAV, SRL,
+                                 SRLV, SSNOP, SUB, SUBu, WSBH, XOR, XORi)>;
 
 def : InstRW<[GenericWriteALU], (instrs COPY)>;
 
+// MIPSR6
+// ======
+
+// addiupc, align, aluipc, aui, auipc, bitswap, clo, clz, lsa, seleqz, selnez
+def : InstRW<[GenericWriteALU], (instrs ADDIUPC, ALIGN, ALUIPC, AUI,
+                                 AUIPC, BITSWAP, CLO_R6, CLZ_R6, LSA_R6,
+                                 SELEQZ, SELNEZ)>;
+
+// MIPS16e
+// =======
+
+def : InstRW<[GenericWriteALU], (instrs AddiuRxImmX16, AddiuRxRxImm16,
+                                 AddiuRxRxImmX16, AddiuRxRyOffMemX16,
+                                 AddiuRxPcImmX16, AddiuSpImm16, AddiuSpImmX16,
+                                 AdduRxRyRz16, AndRxRxRy16, CmpRxRy16,
+                                 CmpiRxImm16, CmpiRxImmX16, LiRxImm16,
+                                 LiRxImmX16, LiRxImmAlignX16, Move32R16,
+                                 MoveR3216, Mfhi16, Mflo16, NegRxRy16,
+                                 NotRxRy16, OrRxRxRy16, SebRx16, SehRx16,
+                                 SllX16, SllvRxRy16, SltiRxImm16,
+                                 SltiRxImmX16, SltiCCRxImmX16,
+                                 SltiuRxImm16, SltiuRxImmX16, SltiuCCRxImmX16,
+                                 SltRxRy16, SltCCRxRy16, SltuRxRy16,
+                                 SltuRxRyRz16, SltuCCRxRy16, SravRxRy16,
+                                 SraX16, SrlvRxRy16, SrlX16, SubuRxRyRz16,
+                                 XorRxRxRy16)>;
+
+def : InstRW<[GenericWriteALU], (instrs Constant32, LwConstant32,
+                                 GotPrologue16, CONSTPOOL_ENTRY)>;
+
+// microMIPS
+// =========
+
+def : InstRW<[GenericWriteALU], (instrs ADDIUPC_MM, ADDIUR1SP_MM, ADDIUR2_MM,
+                                 ADDIUS5_MM, ADDIUSP_MM, ADDU16_MM, ADD_MM,
+                                 ADDi_MM, ADDiu_MM, ADDu_MM, AND16_MM,
+                                 ANDI16_MM, AND_MM, ANDi_MM, CLO_MM, CLZ_MM,
+                                 EXT_MM, INS_MM, LEA_ADDiu_MM, LI16_MM,
+                                 LUi_MM, MOVE16_MM, MOVEP_MM, NOR_MM,
+                                 NOT16_MM, OR16_MM, OR_MM, ORi_MM, ROTRV_MM,
+                                 ROTR_MM, SEB_MM, SEH_MM, SLL16_MM, SLLV_MM,
+                                 SLL_MM, SLT_MM, SLTi_MM, SLTiu_MM, SLTu_MM,
+                                 SRAV_MM, SRA_MM, SRL16_MM, SRLV_MM, SRL_MM,
+                                 SSNOP_MM, SUBU16_MM, SUB_MM, SUBu_MM,
+                                 WSBH_MM, XOR16_MM, XOR_MM, XORi_MM)>;
+
+// microMIPS32r6
+// =============
+
+def : InstRW<[GenericWriteALU], (instrs ADDIUPC_MMR6, ADDIU_MMR6, ADDU16_MMR6,
+                                 ADDU_MMR6, ADD_MMR6, ALIGN_MMR6, ALUIPC_MMR6,
+                                 AND16_MMR6, ANDI16_MMR6, ANDI_MMR6, AND_MMR6,
+                                 AUIPC_MMR6, AUI_MMR6, BITSWAP_MMR6, CLO_MMR6,
+                                 CLZ_MMR6, EXT_MMR6, INS_MMR6, LI16_MMR6,
+                                 LSA_MMR6, LUI_MMR6, MOVE16_MMR6, NOR_MMR6,
+                                 NOT16_MMR6, OR16_MMR6, ORI_MMR6, OR_MMR6,
+                                 SELEQZ_MMR6, SELNEZ_MMR6, SLL16_MMR6,
+                                 SLL_MMR6, SRL16_MMR6, SSNOP_MMR6, SUBU16_MMR6,
+                                 SUBU_MMR6, SUB_MMR6, WSBH_MMR6, XOR16_MMR6,
+										             XORI_MMR6, XOR_MMR6)>;
+
+// MIPS64
+// ======
+
+def : InstRW<[GenericWriteALU], (instrs AND64, ANDi64, DEXT64_32, DSLL64_32,
+                                 ORi64, SEB64, SEH64, SLL64_32, SLL64_64,
+                                 SLT64, SLTi64, SLTiu64, SLTu64, XOR64,
+                                 XORi64)>;
+
+def : InstRW<[GenericWriteALU], (instrs DADD, DADDi, DADDiu, DADDu, DCLO,
+                                 DCLZ, DEXT, DEXTM, DEXTU, DINS, DINSM, DINSU,
+                                 DROTR, DROTR32, DROTRV, DSBH, DSHD, DSLL,
+                                 DSLL32, DSLLV, DSRA, DSRA32, DSRAV, DSRL,
+                                 DSRL32, DSRLV, DSUB, DSUBu, LEA_ADDiu64,
+                                 LUi64, NOR64, OR64)>;
+
+// MIPS64R6
+// ========
+
+def : InstRW<[GenericWriteALU], (instrs DALIGN, DAHI, DATI, DAUI, DCLO_R6,
+                                 DCLZ_R6, DBITSWAP, DLSA, DLSA_R6, SELEQZ64,
+                                 SELNEZ64)>;
+
+
 def GenericMDU : ProcResource<1> { let BufferSize = 1; }
 def GenericIssueMDU : ProcResource<1> { let Super = GenericALU; }
 def GenericIssueDIV : ProcResource<1> { let Super = GenericMDU; }
 def GenericWriteHILO : SchedWriteRes<[GenericIssueMDU]>;
 def GenericWriteALULong : SchedWriteRes<[GenericIssueALU]> { let Latency = 5; }
 def GenericWriteMove : SchedWriteRes<[GenericIssueALU]> { let Latency = 2; }
+def GenericWriteMul : SchedWriteRes<[GenericIssueMDU]> { let Latency = 4; }
+
+def : InstRW<[GenericWriteHILO], (instrs MADD, MADDU, MSUB, MSUBU)>;
 
-def : ItinRW<[GenericWriteHILO], [II_MADD, II_MADDU, II_MSUB, II_MSUBU]>;
+def : InstRW<[GenericWriteHILO], (instrs PseudoMADD_MM, PseudoMADDU_MM,
+                                  PseudoMSUB_MM, PseudoMSUBU_MM,
+                                  PseudoMULT_MM, PseudoMULTu_MM)>;
+
+def : InstRW<[GenericWriteHILO], (instrs PseudoMADD, PseudoMADDU, PseudoMSUB,
+                                  PseudoMSUBU, PseudoMULT, PseudoMULTu)>;
 
 def GenericWriteMDUtoGPR : SchedWriteRes<[GenericIssueMDU]> {
   let Latency = 5;
 }
 
-def : ItinRW<[GenericWriteMDUtoGPR], [II_MUL]>;
-
 def GenericWriteDIV : SchedWriteRes<[GenericIssueDIV]> {
   // Estimated worst case
   let Latency = 33;
@@ -82,63 +168,105 @@ def GenericWriteDIVU : SchedWriteRes<[GenericIssueDIV]> {
   let ResourceCycles = [31];
 }
 
-def : ItinRW<[GenericWriteDIV], [II_DIV]>;
+// mul
+def : InstRW<[GenericWriteMDUtoGPR], (instrs MUL)>;
 
-def : ItinRW<[GenericWriteDIVU], [II_DIVU]>;
+// mult, multu
+def : InstRW<[GenericWriteMul], (instrs MULT, MULTu)>;
 
-// MIPS64
-// ======
+// div, sdiv
+def : InstRW<[GenericWriteDIV], (instrs PseudoSDIV, SDIV)>;
+
+def : InstRW<[GenericWriteDIVU], (instrs PseudoUDIV, UDIV)>;
+
+// mfhi, mflo, movn, mthi, mtlo, rdwhr
+def : InstRW<[GenericWriteALULong], (instrs MFHI, MFLO, PseudoMFHI,
+                                     PseudoMFLO)>;
+
+def : InstRW<[GenericWriteALULong], (instrs PseudoMFHI_MM, PseudoMFLO_MM)>;
 
-def : ItinRW<[GenericWriteALU], [II_DADDIU, II_DADDU, II_DADDI, II_DADD,
-                                 II_DCLO, II_DCLZ, II_DROTR, II_DROTR32,
-                                 II_DROTRV, II_DSBH, II_DSHD, II_DSLL,
-                                 II_DSLL32, II_DSLLV, II_DSRA, II_DSRA32,
-                                 II_DSRAV, II_DSRL, II_DSRL32, II_DSRLV,
-                                 II_DSUBU, II_DSUB]>;
+def : InstRW<[GenericWriteMove], (instrs MTHI, MTLO, RDHWR, PseudoMTLOHI)>;
+def : InstRW<[GenericWriteMove], (instrs PseudoMTLOHI_MM)>;
 
-def : ItinRW<[GenericWriteDIV], [II_DDIV]>;
+def : InstRW<[GenericWriteALU], (instrs MOVN_I_I, MOVZ_I_I)>;
 
-def : ItinRW<[GenericWriteDIVU], [II_DDIVU]>;
+// MIPSR6
+// ======
 
-def : ItinRW<[GenericWriteMDUtoGPR], [II_DMUL]>;
+// muh, muhu, mulu, mul
+def : InstRW<[GenericWriteMul], (instrs MUH, MUHU, MULU, MUL_R6)>;
+
+// divu, udiv
+def : InstRW<[GenericWriteDIV], (instrs MOD, MODU, DIV, DIVU)>;
 
-def : ItinRW<[GenericWriteHILO], [II_DMULU, II_DMULT, II_DMULTU]>;
 
 // MIPS16e
 // =======
 
-def : ItinRW<[GenericWriteALU], [IIM16Alu, IIPseudo]>;
+def : InstRW<[GenericWriteHILO], (instrs MultRxRy16, MultuRxRy16,
+                                  MultRxRyRz16, MultuRxRyRz16)>;
+
+def : InstRW<[GenericWriteDIV], (instrs DivRxRy16)>;
+
+def : InstRW<[GenericWriteDIVU], (instrs DivuRxRy16)>;
 
 // microMIPS
 // =========
 
-def : ItinRW<[GenericWriteALU], [II_MOVE, II_LI, II_NOT]>;
+def : InstRW<[GenericWriteMul], (instrs MULT_MM, MULTu_MM, MADD_MM, MADDU_MM,
+                                 MSUB_MM, MSUBU_MM)>;
 
-// MIPSR6
+def : InstRW<[GenericWriteALULong], (instrs MUL_MM)>;
+
+def : InstRW<[GenericWriteDIV], (instrs SDIV_MM, SDIV_MM_Pseudo)>;
+
+def : InstRW<[GenericWriteDIVU], (instrs UDIV_MM, UDIV_MM_Pseudo)>;
+
+def : InstRW<[GenericWriteMove], (instrs MFHI16_MM, MFLO16_MM, MOVF_I_MM,
+                                  MOVT_I_MM, MFHI_MM, MFLO_MM, MTHI_MM,
+                                  MTLO_MM)>;
+
+def : InstRW<[GenericWriteMove], (instrs RDHWR_MM)>;
+
+// microMIPS32r6
+// =============
+
+def : InstRW<[GenericWriteMul], (instrs MUHU_MMR6, MUH_MMR6, MULU_MMR6,
+                                 MUL_MMR6)>;
+
+def : InstRW<[GenericWriteDIV], (instrs MODU_MMR6, MOD_MMR6, DIVU_MMR6,
+                                 DIV_MMR6)>;
+
+def : InstRW<[GenericWriteMove], (instrs RDHWR_MMR6)>;
+
+// MIPS64
 // ======
 
-def GenericWriteMul : SchedWriteRes<[GenericIssueMDU]> { let Latency = 4; }
-def : ItinRW<[GenericWriteMul], [II_MUH, II_MUHU, II_MULU]>;
+def : InstRW<[GenericWriteHILO], (instrs DMULU, DMULT, DMULTu, PseudoDMULT,
+                                  PseudoDMULTu)>;
+
+def : InstRW<[GenericWriteDIV], (instrs DSDIV, PseudoDSDIV)>;
 
-def : ItinRW<[GenericWriteDIV], [II_MOD, II_MODU]>;
+def : InstRW<[GenericWriteDIVU], (instrs DUDIV, PseudoDUDIV)>;
+
+def : InstRW<[GenericWriteALULong], (instrs MFHI64, MFLO64, PseudoMFHI64,
+                                     PseudoMFLO64, PseudoMTLOHI64)>;
+
+def : InstRW<[GenericWriteMove], (instrs MTHI64, MTLO64, RDHWR64)>;
+
+// mov[zn]
+def : InstRW<[GenericWriteALU], (instrs MOVN_I_I64, MOVN_I64_I, MOVN_I64_I64,
+                                 MOVZ_I_I64, MOVZ_I64_I, MOVZ_I64_I64)>;
 
-def : ItinRW<[GenericWriteALU], [II_ADDIUPC, II_ALIGN, II_ALUIPC, II_AUI,
-                                 II_AUIPC, II_BITSWAP, II_LSA, II_SELCCZ]>;
 
 // MIPS64R6
 // ========
 
-def : ItinRW<[GenericWriteALU], [II_DALIGN, II_DAHI, II_DATI, II_DAUI,
-                               II_DBITSWAP, II_DLSA]>;
-
-def : ItinRW<[GenericWriteMDUtoGPR], [II_DMUH, II_DMUHU]>;
-def : ItinRW<[GenericWriteDIV], [II_DMOD, II_DMODU]>;
+def : InstRW<[GenericWriteMDUtoGPR], (instrs DMUH, DMUHU, DMUL_R6)>;
 
-// clo, clz, di, mfhi, mflo
-def : ItinRW<[GenericWriteALULong], [II_MFHI_MFLO]>;
-def : ItinRW<[GenericWriteALU], [II_MOVN, II_MOVZ]>;
-def : ItinRW<[GenericWriteMove], [II_MTHI_MTLO, II_RDHWR]>;
+def : InstRW<[GenericWriteDIV], (instrs DDIV, DMOD)>;
 
+def : InstRW<[GenericWriteDIVU], (instrs DDIVU, DMODU)>;
 
 // CTISTD Pipeline
 // ---------------
@@ -155,31 +283,150 @@ def GenericWriteJumpAndLink : SchedWriteRes<[GenericIssueCTISTD]> {
 
 // b, beq, beql, bg[et]z, bl[et]z, bne, bnel, j, syscall, jal, bltzal, jalx,
 // jalr, jr.hb, jr, jalr.hb, jarlc, jialc
-def : ItinRW<[GenericWriteJump], [II_B, II_BCC, II_BCCZ, II_BCCZAL, II_J,
-                                  II_JR, II_JR_HB, II_ERET, II_ERETNC,
-                                  II_DERET]>;
+def : InstRW<[GenericWriteJump], (instrs B, BAL, BAL_BR, BEQ, BNE, BGTZ, BGEZ,
+                                  BLEZ, BLTZ, BLTZAL, J, JALX, JR, JR_HB, ERET,
+                                  ERet, ERETNC, DERET)>;
+
+def : InstRW<[GenericWriteJump], (instrs BEQL, BNEL, BGEZL, BGTZL, BLEZL,
+                                  BLTZL)>;
+
+def : InstRW<[GenericWriteJump], (instrs TAILCALL, TAILCALLREG,
+                                  TAILCALLREGHB, PseudoIndirectBranch,
+                                  PseudoIndirectHazardBranch, PseudoReturn,
+                                  RetRA)>;
+
+def : InstRW<[GenericWriteJumpAndLink], (instrs BGEZAL, JAL, JALR, JALR_HB,
+                                         JALRHBPseudo, JALRPseudo)>;
 
-def : ItinRW<[GenericWriteJumpAndLink], [II_JAL, II_JALR, II_JALR_HB,
-                                         II_BC2CCZ]>;
+def : InstRW<[GenericWriteJumpAndLink], (instrs BGEZALL, BLTZALL)>;
 
-def : ItinRW<[GenericWriteJump], [II_JRC, II_JRADDIUSP]>;
+def GenericWriteTrap : SchedWriteRes<[GenericIssueCTISTD]>;
 
-def : ItinRW<[GenericWriteJumpAndLink], [II_BCCZALS, II_JALS, II_JALRS]>;
+def : InstRW<[GenericWriteTrap], (instrs BREAK, SYSCALL, TEQ, TEQI,
+                                  TGE, TGEI, TGEIU, TGEU, TNE,
+                                  TNEI, TLT, TLTI, TLTU, TTLTIU,
+                                  TRAP, SDBBP)>;
 
 // MIPSR6
 // ======
 
-def : ItinRW<[GenericWriteJumpAndLink], [II_BALC, II_JALRC, II_JIALC]>;
+def : InstRW<[GenericWriteJumpAndLink], (instrs BALC, BEQZALC, BGEZALC,
+                                         BGTZALC, BLEZALC, BLTZALC,
+                                         BNEZALC,
+                                         JIALC)>;
 
-def : ItinRW<[GenericWriteJump], [II_JIC, II_BC, II_BCCC, II_BCCZC]>;
+def : InstRW<[GenericWriteJump], (instrs BC, BC2EQZ, BC2NEZ, BEQC, BEQZC, BGEC,
+                                  BGEUC, BGEZC, BGTZC, BLEZC, BLTC, BLTUC,
+                                  BLTZC, BNEC, BNEZC, BNVC, BOVC, JIC, JR_HB_R6,
+                                  SIGRIE, PseudoIndirectBranchR6,
+                                  PseudoIndrectHazardBranchR6)>;
 
+def : InstRW<[GenericWriteJump], (instrs TAILCALLR6REG, TAILCALLHBR6REG)>;
 
-def GenericWriteTrap : SchedWriteRes<[GenericIssueCTISTD]>;
+def : InstRW<[GenericWriteTrap], (instrs SDBBP_R6)>;
+
+// MIPS16e
+// =======
+
+def : InstRW<[GenericWriteJump], (instrs Bimm16, BimmX16, BeqzRxImm16,
+                                  BeqzRxImmX16, BnezRxImm16, BnezRxImmX16,
+                                  Bteqz16, BteqzX16, BteqzT8CmpX16,
+                                  BteqzT8CmpiX16, BteqzT8SltX16,
+                                  BteqzT8SltuX16, BteqzT8SltiX16,
+                                  BteqzT8SltiuX16, Btnez16, BtnezX16,
+                                  BtnezT8CmpX16, BtnezT8CmpiX16,
+                                  BtnezT8SltX16, BtnezT8SltuX16,
+                                  BtnezT8SltiX16, BtnezT8SltiuX16, JrRa16,
+                                  JrcRa16, JrcRx16, RetRA16)>;
+
+def : InstRW<[GenericWriteJumpAndLink], (instrs Jal16, JalB16, JumpLinkReg16)>;
+
+def : InstRW<[GenericWriteTrap], (instrs Break16)>;
+
+def : InstRW<[GenericWriteALULong], (instrs SelBeqZ, SelTBteqZCmp,
+                                     SelTBteqZCmpi, SelTBteqZSlt,
+                                     SelTBteqZSlti, SelTBteqZSltu,
+                                     SelTBteqZSltiu, SelBneZ, SelTBtneZCmp,
+                                     SelTBtneZCmpi, SelTBtneZSlt,
+                                     SelTBtneZSlti, SelTBtneZSltu,
+                                     SelTBtneZSltiu)>;
+
+// microMIPS
+// =========
+
+def : InstRW<[GenericWriteJump], (instrs B16_MM, BAL_BR_MM, BC1F_MM, BC1T_MM,
+                                  BEQZ16_MM, BEQZC_MM, BEQ_MM, BGEZ_MM,
+                                  BGTZ_MM, BLEZ_MM, BLTZ_MM, BNEZ16_MM,
+                                  BNEZC_MM, BNE_MM, B_MM, DERET_MM, ERET_MM,
+                                  JR16_MM, JR_MM, J_MM, B_MM_Pseudo)>;
+
+def : InstRW<[GenericWriteJumpAndLink], (instrs BGEZALS_MM, BGEZAL_MM,
+                                         BLTZALS_MM, BLTZAL_MM, JALR16_MM,
+                                         JALRS16_MM, JALRS_MM, JALR_MM,
+                                         JALS_MM, JALX_MM, JAL_MM)>;
+
+def : InstRW<[GenericWriteJump], (instrs TAILCALLREG_MM, TAILCALL_MM,
+                                  PseudoIndirectBranch_MM)>;
+
+def : InstRW<[GenericWriteTrap], (instrs BREAK16_MM, BREAK_MM, SDBBP16_MM,
+                                  SDBBP_MM, SYSCALL_MM, TEQI_MM, TEQ_MM,
+                                  TGEIU_MM, TGEI_MM, TGEU_MM, TGE_MM, TLTIU_MM,
+                                  TLTI_MM, TLTU_MM, TLT_MM, TNEI_MM, TNE_MM,
+                                  TRAP_MM)>;
+
+// microMIPS32r6
+// =============
 
-def : ItinRW<[GenericWriteTrap], [II_BREAK, II_SYSCALL, II_TEQ, II_TEQI,
-                                  II_TGE, II_TGEI, II_TGEIU, II_TGEU, II_TNE,
-                                  II_TNEI, II_TLT, II_TLTI, II_TLTU, II_TTLTIU,
-                                  II_TRAP, II_SDBBP, II_SIGRIE]>;
+def : InstRW<[GenericWriteJump], (instrs BC16_MMR6, BC1EQZC_MMR6, BC1NEZC_MMR6,
+                                  BC2EQZC_MMR6, BC2NEZC_MMR6, BC_MMR6,
+                                  BEQC_MMR6, BEQZC16_MMR6, BEQZC_MMR6,
+                                  BGEC_MMR6, BGEUC_MMR6, BGEZC_MMR6,
+                                  BGTZC_MMR6, BLEZC_MMR6, BLTC_MMR6,
+                                  BLTUC_MMR6, BLTZC_MMR6, BNEC_MMR6,
+                                  BNEZC16_MMR6, BNEZC_MMR6, BNVC_MMR6,
+                                  BOVC_MMR6, DERET_MMR6, ERETNC_MMR6, JAL_MMR6,
+                                  ERET_MMR6, JIC_MMR6, JRADDIUSP, JRC16_MM,
+                                  JRC16_MMR6, JRCADDIUSP_MMR6, SIGRIE_MMR6,
+                                  B_MMR6_Pseudo, PseudoIndirectBranch_MMR6)>;
+
+def : InstRW<[GenericWriteJumpAndLink], (instrs BALC_MMR6, BEQZALC_MMR6,
+                                         BGEZALC_MMR6, BGTZALC_MMR6,
+                                         BLEZALC_MMR6, BLTZALC_MMR6,
+                                         BNEZALC_MMR6, JALRC16_MMR6,
+                                         JALRC_HB_MMR6, JALRC_MMR6,
+                                         JIALC_MMR6)>;
+
+def : InstRW<[GenericWriteJump], (instrs TAILCALLREG_MMR6, TAILCALL_MMR6)>;
+
+def : InstRW<[GenericWriteTrap], (instrs BREAK16_MMR6, BREAK_MMR6, SDBBP_MMR6,
+                                  SDBBP16_MMR6)>;
+
+// MIPS64
+// ======
+
+def : InstRW<[GenericWriteJump], (instrs BEQ64, BGEZ64, BGTZ64, BLEZ64,
+                                  BLTZ64, BNE64, JR64)>;
+
+def : InstRW<[GenericWriteJumpAndLink], (instrs JALR64, JALR64Pseudo,
+                                         JALRHB64Pseudo, JALR_HB64)>;
+
+def : InstRW<[GenericWriteJump], (instrs JR_HB64, TAILCALLREG64,
+                                  TAILCALLREGHB64, PseudoReturn64)>;
+
+// MIPS64R6
+// ========
+
+def : InstRW<[GenericWriteJump], (instrs BEQC64, BEQZC64, BGEC64, BGEUC64,
+                                  BGEZC64, BGTZC64, BLEZC64, BLTC64, BLTUC64,
+                                  BLTZC64, BNEC64, BNEZC64, JIC64,
+                                  PseudoIndirectBranch64,
+                                  PseudoIndirectHazardBranch64)>;
+
+def : InstRW<[GenericWriteJumpAndLink], (instrs JIALC64)>;
+
+def : InstRW<[GenericWriteJump], (instrs JR_HB64_R6, TAILCALL64R6REG,
+                                  TAILCALLHB64R6REG, PseudoIndirectBranch64R6,
+                                  PseudoIndrectHazardBranch64R6)>;
 
 // COP0 Pipeline
 // =============
@@ -196,35 +443,100 @@ def GenericReadWriteCOP0Long : SchedWriteRes<[GenericIssueCOP0]> {
 }
 def GenericWriteCOP0Short : SchedWriteRes<[GenericIssueCOP0]>;
 
-def : ItinRW<[GenericWriteCOP0TLB], [II_TLBP, II_TLBR, II_TLBWI, II_TLBWR]>;
-def : ItinRW<[GenericWriteCOP0TLB], [II_TLBINV, II_TLBINVF]>;
+def : InstRW<[GenericWriteCOP0TLB], (instrs TLBP, TLBR, TLBWI, TLBWR)>;
+def : InstRW<[GenericWriteCOP0TLB], (instrs TLBINV, TLBINVF)>;
 
-def : ItinRW<[GenericReadCOP0], [II_MFC0]>;
-def : ItinRW<[GenericWriteCOP0], [II_MTC0]>;
+def : InstRW<[GenericReadCOP0], (instrs MFC0)>;
+def : InstRW<[GenericWriteCOP0], (instrs MTC0)>;
 
-def : ItinRW<[GenericWriteCOP0], [II_EVP, II_DVP]>;
+def : InstRW<[GenericWriteCOP0], (instrs EVP, DVP)>;
 
-// MIPSR5
-// ======
-def : ItinRW<[GenericReadCOP0], [II_MFHC0]>;
-def : ItinRW<[GenericWriteCOP0], [II_MTHC0]>;
+def : InstRW<[GenericWriteCOP0], (instrs DI, EI)>;
+
+def : InstRW<[GenericWriteCOP0], (instrs EHB, PAUSE, WAIT)>;
+
+// microMIPS
+// =========
+
+def : InstRW<[GenericWriteCOP0TLB], (instrs TLBP_MM, TLBR_MM, TLBWI_MM,
+                                     TLBWR_MM)>;
+
+def : InstRW<[GenericWriteCOP0], (instrs DI_MM, EI_MM)>;
+
+def : InstRW<[GenericWriteCOP0], (instrs EHB_MM, PAUSE_MM, WAIT_MM)>;
+
+
+// microMIPS32R6
+// =============
+
+def : InstRW<[GenericWriteCOP0], (instrs RDPGPR_MMR6, WRPGPR_MMR6)>;
+
+def : InstRW<[GenericWriteCOP0TLB], (instrs TLBINV_MMR6, TLBINVF_MMR6)>;
+
+def : InstRW<[GenericReadCOP0], (instrs MFHC0_MMR6, MFC0_MMR6, MFHC2_MMR6,
+                                 MFC2_MMR6)>;
+
+def : InstRW<[GenericWriteCOP0], (instrs MTHC0_MMR6, MTC0_MMR6, MTHC2_MMR6,
+                                  MTC2_MMR6)>;
+
+def : InstRW<[GenericWriteCOP0], (instrs EVP_MMR6, DVP_MMR6)>;
+
+def : InstRW<[GenericWriteCOP0], (instrs DI_MMR6, EI_MMR6)>;
+
+def : InstRW<[GenericWriteCOP0], (instrs EHB_MMR6, PAUSE_MMR6, WAIT_MMR6)>;
 
 // MIPS64
 // ======
 
-def : ItinRW<[GenericReadCOP0], [II_DMFC0]>;
-def : ItinRW<[GenericWriteCOP0], [II_DMTC0]>;
+def : InstRW<[GenericReadCOP0], (instrs DMFC0)>;
 
-def : ItinRW<[GenericWriteCOP0], [II_RDPGPR, II_WRPGPR]>;
+def : InstRW<[GenericWriteCOP0], (instrs DMTC0)>;
 
-def : ItinRW<[GenericWriteCOP0], [II_DI, II_EI]>;
-
-def : ItinRW<[GenericWriteCOP0], [II_EHB, II_PAUSE, II_WAIT]>;
 
 def GenericCOP2 : ProcResource<1> { let BufferSize = 1; }
 def GenericWriteCOPOther : SchedWriteRes<[GenericCOP2]>;
 
-def : ItinRW<[GenericWriteCOPOther], [II_MFC2, II_MTC2, II_DMFC2, II_DMTC2]>;
+def : InstRW<[GenericWriteCOPOther], (instrs MFC2, MTC2)>;
+
+def : InstRW<[GenericWriteCOPOther], (instrs DMFC2, DMTC2)>;
+
+// microMIPS32R6
+// =============
+
+// The latency and repeat rate of these instructions are implementation
+// dependant.
+def : InstRW<[GenericWriteMove], (instrs CFC2_MM, CTC2_MM)>;
+
+
+// MIPS MT ASE - hasMT
+// ====================
+
+def : InstRW<[GenericWriteMove], (instrs DMT, DVPE, EMT, EVPE, MFTR,
+                                  MTTR)>;
+
+def : InstRW<[GenericReadWriteCOP0Long], (instrs YIELD)>;
+
+def : InstRW<[GenericWriteCOP0Short], (instrs FORK)>;
+
+// MIPS Virtualization ASE
+// =======================
+
+def : InstRW<[GenericWriteCOP0Short], (instrs HYPCALL, TLBGINV, TLBGINVF, TLBGP,
+                                       TLBGR, TLBGWI, TLBGWR, MFGC0, MFHGC0,
+                                       MTGC0, MTHGC0)>;
+
+// MIPS64 Virtualization ASE
+// =========================
+
+def : InstRW<[GenericWriteCOP0Short], (instrs DMFGC0, DMTGC0)>;
+
+// microMIPS virtualization ASE
+// ============================
+
+def : InstRW<[GenericWriteCOP0Short], (instrs HYPCALL_MM, TLBGINVF_MM,
+                                       TLBGINV_MM, TLBGP_MM, TLBGR_MM,
+                                       TLBGWI_MM, TLBGWR_MM, MFGC0_MM,
+                                       MFHGC0_MM, MTGC0_MM, MTHGC0_MM)>;
 
 // LDST Pipeline
 // -------------
@@ -250,97 +562,168 @@ def GenericWriteLoadToOtherUnits : SchedWriteRes<[GenericIssueLDST]> {
 }
 
 // l[bhw], l[bh]u, ll
-def : ItinRW<[GenericWriteLoad], [II_LB, II_LBU, II_LH, II_LHU, II_LW, II_LL,
-                                  II_LWC2, II_LWC3, II_LDC2, II_LDC3]>;
+def : InstRW<[GenericWriteLoad], (instrs LB, LBu, LH, LHu, LW, LL,
+                                  LWC2, LWC3, LDC2, LDC3)>;
 
 // lw[lr]
-def : ItinRW<[GenericWriteLoad], [II_LWL, II_LWR]>;
+def : InstRW<[GenericWriteLoad], (instrs LWL, LWR)>;
 
-// MIPS64 loads
-def : ItinRW<[GenericWriteLoad], [II_LD, II_LLD, II_LWU]>;
+// s[bhw], sc, s[dw]c[23]
+def : InstRW<[GenericWriteStore], (instrs SB, SH, SW, SWC2, SWC3,
+                                   SDC2, SDC3)>;
 
-// ld[lr]
-def : ItinRW<[GenericWriteLoad], [II_LDL, II_LDR]>;
+// PreMIPSR6 sw[lr]
+def : InstRW<[GenericWriteStore], (instrs SWL, SWR)>;
 
-// MIPS32 EVA
-def : ItinRW<[GenericWriteLoad], [II_LBE, II_LBUE, II_LHE, II_LHUE, II_LWE,
-                                  II_LLE]>;
+def : InstRW<[GenericWriteStoreSC], (instrs SC, SC_MMR6)>;
 
-def : ItinRW<[GenericWriteLoad], [II_LWLE, II_LWRE]>;
+// pref
+def : InstRW<[GenericWritePref], (instrs PREF)>;
+// cache
+def : InstRW<[GenericWriteCache], (instrs CACHE)>;
 
-// MIPS MT instructions
-// ====================
+// sync
+def : InstRW<[GenericWriteSync], (instrs SYNC, SYNCI)>;
 
-def : ItinRW<[GenericWriteMove], [II_DMT, II_DVPE, II_EMT, II_EVPE, II_MFTR,
-                                  II_MTTR]>;
+// MIPSR6
+// ======
 
-def : ItinRW<[GenericReadWriteCOP0Long], [II_YIELD]>;
+def : InstRW<[GenericWriteLoad], (instrs LDC2_R6, LL_R6, LWC2_R6, LWPC)>;
 
-def : ItinRW<[GenericWriteCOP0Short], [II_FORK]>;
+def : InstRW<[GenericWriteStore], (instrs SWC2_R6,  SDC2_R6)>;
 
-// MIPS32R6 and MIPS16e
-// ====================
+def : InstRW<[GenericWriteStoreSC], (instrs SC_R6)>;
 
-def : ItinRW<[GenericWriteLoad], [II_LWPC]>;
+def : InstRW<[GenericWritePref], (instrs PREF_R6)>;
 
-// MIPS64R6
-// ====================
+def : InstRW<[GenericWriteCache], (instrs CACHE_R6)>;
+
+def : InstRW<[GenericWriteSync], (instrs GINVI, GINVT)>;
 
-def : ItinRW<[GenericWriteLoad], [II_LWUPC, II_LDPC]>;
+// MIPS32 EVA
+// ==========
 
+def : InstRW<[GenericWriteLoad], (instrs LBE, LBuE, LHE, LHuE, LWE,
+                                  LLE)>;
 
-// s[bhw], sc, s[dw]c[23]
-def : ItinRW<[GenericWriteStore], [II_SB, II_SH, II_SW, II_SWC2, II_SWC3,
-                                   II_SDC2, II_SDC3]>;
+def : InstRW<[GenericWriteStore], (instrs SBE, SHE, SWE, SCE)>;
 
-def : ItinRW<[GenericWriteStoreSC], [II_SC]>;
+def : InstRW<[GenericWriteLoad], (instrs LWLE, LWRE)>;
 
-// PreMIPSR6 sw[lr]
-def : ItinRW<[GenericWriteStore], [II_SWL, II_SWR]>;
+def : InstRW<[GenericWriteStore], (instrs SWLE, SWRE)>;
 
-// EVA ASE stores
-def : ItinRW<[GenericWriteStore], [II_SBE, II_SHE, II_SWE, II_SCE]>;
+def : InstRW<[GenericWritePref], (instrs PREFE)>;
 
-def : ItinRW<[GenericWriteStore], [II_SWLE, II_SWRE]>;
+def : InstRW<[GenericWriteCache], (instrs CACHEE)>;
 
-// MIPS64
-// ======
+// microMIPS EVA ASE - InMicroMipsMode, hasEVA
+// ===========================================
 
-def : ItinRW<[GenericWriteStore], [II_SD, II_SCD]>;
+def : InstRW<[GenericWriteLoad], (instrs LBE_MM, LBuE_MM, LHE_MM, LHuE_MM,
+                                  LWE_MM, LWLE_MM, LWRE_MM, LLE_MM)>;
 
-// PreMIPSR6 stores
-// ================
+def : InstRW<[GenericWriteStore], (instrs SBE_MM, SB_MM, SHE_MM, SWE_MM,
+                                   SWLE_MM, SWRE_MM, SCE_MM)>;
+
+def : InstRW<[GenericWritePref], (instrs PREFE_MM)>;
+def : InstRW<[GenericWriteCache], (instrs CACHEE_MM)>;
 
-def : ItinRW<[GenericWriteStore], [II_SDL, II_SDR]>;
 
 // MIPS16e
 // =======
 
-def : ItinRW<[GenericWriteLoad], [II_RESTORE]>;
+def : InstRW<[GenericWriteLoad], (instrs Restore16, RestoreX16,
+                                  LbRxRyOffMemX16,
+                                  LbuRxRyOffMemX16, LhRxRyOffMemX16,
+                                  LhuRxRyOffMemX16, LwRxRyOffMemX16,
+                                  LwRxSpImmX16, LwRxPcTcp16, LwRxPcTcpX16)>;
 
-def : ItinRW<[GenericWriteStore], [II_SAVE]>;
+def : InstRW<[GenericWriteStore], (instrs Save16, SaveX16, SbRxRyOffMemX16,
+                                   ShRxRyOffMemX16, SwRxRyOffMemX16,
+                                   SwRxSpImmX16)>;
 
 // microMIPS
 // =========
 
-def : ItinRW<[GenericWriteLoad], [II_LWM, II_LWP, II_LWXS]>;
+def : InstRW<[GenericWriteLoad], (instrs LBU16_MM, LB_MM, LBu_MM, LHU16_MM,
+                                  LH_MM, LHu_MM, LL_MM, LW16_MM, LWGP_MM,
+                                  LWL_MM, LWM16_MM, LWM32_MM, LWP_MM, LWR_MM,
+                                  LWSP_MM, LWU_MM, LWXS_MM, LW_MM)>;
 
-def : ItinRW<[GenericWriteStore], [II_SWM, II_SWP]>;
+def : InstRW<[GenericWriteStore], (instrs SB16_MM, SC_MM, SH16_MM, SH_MM,
+                                   SW16_MM, SWL_MM, SWM16_MM, SWM32_MM, SWM_MM,
+                                   SWP_MM, SWR_MM, SWSP_MM, SW_MM)>;
 
-// pref
-def : ItinRW<[GenericWritePref], [II_PREF]>;
 
-def : ItinRW<[GenericWritePref], [II_PREFE]>;
+def : InstRW<[GenericWritePref], (instrs PREF_MM, PREFX_MM)>;
 
-// cache
-def : ItinRW<[GenericWriteCache], [II_CACHE]>;
+def : InstRW<[GenericWriteCache], (instrs CACHE_MM)>;
 
-def : ItinRW<[GenericWriteCache], [II_CACHEE]>;
+def : InstRW<[GenericWriteSync], (instrs SYNC_MM, SYNCI_MM)>;
+def : InstRW<[GenericWriteSync], (instrs GINVI_MMR6, GINVT_MMR6)>;
 
-// sync
-def : ItinRW<[GenericWriteSync], [II_SYNC]>;
+// microMIPS32r6
+// =============
+
+def : InstRW<[GenericWriteLoad], (instrs LBU_MMR6, LB_MMR6, LDC2_MMR6, LL_MMR6,
+                                  LWM16_MMR6, LWC2_MMR6, LWPC_MMR6, LW_MMR6)>;
+
+def : InstRW<[GenericWriteStore], (instrs SB16_MMR6, SB_MMR6, SDC2_MMR6,
+                                   SH16_MMR6, SH_MMR6, SW16_MMR6, SWC2_MMR6,
+                                   SWM16_MMR6, SWSP_MMR6, SW_MMR6)>;
+
+def : InstRW<[GenericWriteSync], (instrs SYNC_MMR6, SYNCI_MMR6)>;
+
+def : InstRW<[GenericWritePref], (instrs PREF_MMR6)>;
 
-def : ItinRW<[GenericWriteSync], [II_SYNCI]>;
+def : InstRW<[GenericWriteCache], (instrs CACHE_MMR6)>;
+
+// MIPS64
+// ======
+
+def : InstRW<[GenericWriteLoad], (instrs LD, LL64, LLD, LWu, LB64, LBu64,
+                                  LH64, LHu64, LW64)>;
+
+// l[dw][lr]
+def : InstRW<[GenericWriteLoad], (instrs LWL64, LWR64, LDL, LDR)>;
+
+def : InstRW<[GenericWriteStore], (instrs SD, SC64, SCD, SB64, SH64, SW64,
+                                   SWL64, SWR64)>;
+
+def : InstRW<[GenericWriteStore], (instrs SDL, SDR)>;
+
+// MIPS64R6
+// ========
+
+def : InstRW<[GenericWriteLoad], (instrs LWUPC, LDPC)>;
+
+def : InstRW<[GenericWriteLoad], (instrs LLD_R6, LL64_R6)>;
+
+def : InstRW<[GenericWriteStoreSC], (instrs SC64_R6, SCD_R6)>;
+
+// MIPSR6 CRC ASE - hasCRC
+// =======================
+
+def : InstRW<[GenericWriteALU], (instrs CRC32B, CRC32H, CRC32W, CRC32CB,
+                                 CRC32CH, CRC32CW)>;
+
+// MIPS64R6 CRC ASE - hasCRC
+// -------------------------
+
+def : InstRW<[GenericWriteALU], (instrs CRC32D, CRC32CD)>;
+
+
+// Cavium Networks MIPS (cnMIPS) - Octeon, HasCnMips
+// =================================================
+
+def : InstRW<[GenericWriteALU], (instrs BADDu, BBIT0, BBIT032, BBIT1, BBIT132,
+                                 CINS, CINS32, CINS64_32, CINS_i32,
+                                 DMFC2_OCTEON, DMTC2_OCTEON, DPOP, EXTS,
+                                 EXTS32, MTM0, MTM1, MTM2, MTP0, MTP1, MTP2,
+                                 POP, SEQ, SEQi, SNE, SNEi, V3MULU, VMM0,
+                                 VMULU)>;
+
+def : InstRW<[GenericWriteMDUtoGPR], (instrs DMUL)>;
 
 // FPU Pipelines
 // =============
@@ -408,10 +791,10 @@ def GenericWriteFPUSqrtD : SchedWriteRes<[GenericFPUDivSqrt]> {
 // ---------------------------------
 //
 // c.<cc>.[ds], bc1[tf], bc1[tf]l
-def : ItinRW<[GenericWriteFPUCmp], [II_C_CC_D, II_C_CC_S, II_BC1F, II_BC1T,
-                                    II_BC1FL, II_BC1TL]>;
+def : InstRW<[GenericWriteFPUCmp], (instrs FCMP_D32, FCMP_D64, FCMP_S32, BC1F,
+                                    BC1T, BC1FL, BC1TL)>;
 
-def : ItinRW<[GenericWriteFPUCmp], [II_CMP_CC_D, II_CMP_CC_S]>;
+def : InstRW<[GenericWriteFPUCmp], (instregex "C_[A-Z]+_(S|D32|D64)$")>;
 
 // Short Pipe
 // ----------
@@ -419,21 +802,10 @@ def : ItinRW<[GenericWriteFPUCmp], [II_CMP_CC_D, II_CMP_CC_S]>;
 // abs.[ds], abs.ps, add.[ds], neg.[ds], neg.ps, madd.s, msub.s, nmadd,s
 // nmsub.s, sub.[ds], mul.s
 
-def : ItinRW<[GenericWriteFPUS], [II_ABS, II_ADD_D, II_ADD_S, II_MADD_S,
-                                  II_MSUB_S, II_MUL_S, II_NEG, II_NMADD_S,
-                                  II_NMSUB_S, II_SUB_S, II_SUB_D]>;
-// mov[tf].[ds]
-
-def : ItinRW<[GenericWriteFPUS], [II_MOVF_S, II_MOVF_D, II_MOVT_S, II_MOVT_D]>;
-
-// MIPSR6
-// ------
-//
-// sel(eq|ne).[ds], max.[ds], maxa.[ds], min.[ds], mina.[ds], class.[ds]
-def : ItinRW<[GenericWriteFPUS], [II_SELCCZ_S, II_SELCCZ_D, II_MAX_S,
-                                  II_MAX_D, II_MAXA_S, II_MAXA_D, II_MIN_S,
-                                  II_MIN_D, II_MINA_S, II_MINA_D, II_CLASS_S,
-                                  II_CLASS_D]>;
+def : InstRW<[GenericWriteFPUS], (instrs FABS_S, FABS_D32, FABS_D64, FADD_D32,
+                                  FADD_D64, FADD_S, MADD_S, MSUB_S, FMUL_S,
+                                  FNEG_S, FNEG_D32, FNEG_D64, NMADD_S, NMSUB_S,
+                                  FSUB_S, FSUB_D32, FSUB_D64)>;
 
 // Long Pipe
 // ----------
@@ -445,71 +817,211 @@ def : ItinRW<[GenericWriteFPUS], [II_SELCCZ_S, II_SELCCZ_D, II_MAX_S,
 // madd.d, msub.dm mul.d, mul.ps, nmadd.d, nmsub.d, ceil.[wl].[sd], cvt.d.[sw],
 // cvt.s.[dw], cvt.w.[sd], cvt.[sw].ps, round.[lw].[ds], floor.[lw].ds,
 // trunc.w.[ds], trunc.w.ps,
-def : ItinRW<[GenericWriteFPUL], [II_MADD_D, II_MSUB_D, II_MUL_D, II_NMADD_D,
-                                  II_NMSUB_D, II_CEIL, II_CVT,
-                                  II_FLOOR, II_ROUND, II_TRUNC]>;
+def : InstRW<[GenericWriteFPUL], (instrs CEIL_L_D64, CEIL_L_S, CEIL_W_D32,
+                                  CEIL_W_D64, CEIL_W_S, CVT_D32_S, CVT_D32_W,
+                                  CVT_D64_L, CVT_D64_S, CVT_D64_W, CVT_L_D64,
+                                  CVT_L_S, CVT_S_D32, CVT_S_D64, CVT_S_L,
+                                  CVT_S_W, CVT_W_D32, CVT_W_D64, CVT_W_S,
+                                  CVT_PS_S64, CVT_S_PL64, CVT_S_PU64,
+                                  FLOOR_L_D64, FLOOR_L_S, FLOOR_W_D32,
+                                  FLOOR_W_D64, FLOOR_W_S, FMUL_D32, FMUL_D64,
+                                  MADD_D32, MADD_D64, MSUB_D32, MSUB_D64,
+                                  NMADD_D32, NMADD_D64, NMSUB_D32, NMSUB_D64,
+                                  PLL_PS64, PLU_PS64,
+                                  ROUND_L_D64, ROUND_L_S, ROUND_W_D32,
+                                  ROUND_W_D64, ROUND_W_S, TRUNC_L_D64,
+                                  TRUNC_L_S, TRUNC_W_D32, TRUNC_W_D64,
+                                  TRUNC_W_S, PseudoTRUNC_W_D,
+                                  PseudoTRUNC_W_D32, PseudoTRUNC_W_S)>;
+
+// Pseudo convert instruction
+def : InstRW<[GenericWriteFPUL], (instrs PseudoCVT_D32_W, PseudoCVT_D64_L,
+                                  PseudoCVT_D64_W, PseudoCVT_S_L,
+                                  PseudoCVT_S_W)>;
 
 // div.[ds], div.ps
-def : ItinRW<[GenericWriteFPUDivS], [II_DIV_S]>;
-def : ItinRW<[GenericWriteFPUDivD], [II_DIV_D]>;
+def : InstRW<[GenericWriteFPUDivS], (instrs FDIV_S)>;
+def : InstRW<[GenericWriteFPUDivD], (instrs FDIV_D32, FDIV_D64)>;
 
 // sqrt.[ds], sqrt.ps
-def : ItinRW<[GenericWriteFPUSqrtS], [II_SQRT_S]>;
-def : ItinRW<[GenericWriteFPUSqrtD], [II_SQRT_D]>;
+def : InstRW<[GenericWriteFPUSqrtS], (instrs FSQRT_S)>;
+def : InstRW<[GenericWriteFPUSqrtD], (instrs FSQRT_D32, FSQRT_D64)>;
 
 // rsqrt.[ds], recip.[ds]
-def : ItinRW<[GenericWriteFPURcpS], [II_RECIP_S, II_RSQRT_S]>;
-def : ItinRW<[GenericWriteFPURcpD], [II_RECIP_D, II_RSQRT_D]>;
+def : InstRW<[GenericWriteFPURcpS], (instrs RECIP_S, RSQRT_S)>;
+def : InstRW<[GenericWriteFPURcpD], (instrs RECIP_D32, RECIP_D64,
+                                     RSQRT_D32, RSQRT_D64)>;
 
-// MIPSR6
-// ======
-//
-// rint.[ds]
-def : ItinRW<[GenericWriteFPUL], [II_RINT_S, II_RINT_D]>;
 
 // Load Pipe
 // ---------
 
 // ctc1, mtc1, mthc1, cfc1, mfc1, mfhc1
-def : ItinRW<[GenericWriteFPUMoveGPRFPU], [II_CFC1, II_CTC1, II_MFC1, II_MFHC1,
-                                           II_MTC1, II_MTHC1]>;
+def : InstRW<[GenericWriteFPUMoveGPRFPU], (instrs BuildPairF64,
+                                           BuildPairF64_64, ExtractElementF64,
+                                           ExtractElementF64_64, CFC1, CTC1,
+                                           MFC1, MFC1_D64, MFHC1_D32,
+                                           MFHC1_D64, MTC1, MTC1_D64,
+                                           MTHC1_D32, MTHC1_D64)>;
 
 // swc1, swxc1
-def : ItinRW<[GenericWriteFPUStore], [II_SDC1, II_SDXC1, II_SUXC1, II_SWC1,
-                                      II_SWXC1]>;
+def : InstRW<[GenericWriteFPUStore], (instrs SDC1, SDC164, SDXC1, SDXC164,
+                                      SUXC1, SUXC164, SWC1, SWXC1)>;
+
+def : InstRW<[GenericWriteFPUMoveFP], (instrs FMOV_D32, FMOV_D64, FMOV_S)>;
+
 
 // movn.[ds], movz.[ds]
-def : ItinRW<[GenericWriteFPUMoveFP], [II_MOV_D, II_MOV_S, II_MOVF, II_MOVT,
-                                       II_MOVN_D, II_MOVN_S, II_MOVZ_D,
-                                       II_MOVZ_S]>;
+def : InstRW<[GenericWriteFPUMoveFP], (instrs MOVF_I, MOVF_D32, MOVF_D64,
+                                       MOVF_S, MOVT_I, MOVT_D32, MOVT_D64,
+                                       MOVT_S, MOVN_I_D32, MOVN_I_D64,
+                                       MOVN_I_S, MOVZ_I_D32, MOVZ_I_D64,
+                                       MOVZ_I_S)>;
+
+def : InstRW<[GenericWriteFPUMoveFP], (instrs MOVT_I64, MOVF_I64, MOVZ_I64_S,
+                                       MOVN_I64_D64, MOVN_I64_S,
+                                       MOVZ_I64_D64)>;
 
 // l[dw]x?c1
-def : ItinRW<[GenericWriteFPULoad], [II_LDC1, II_LDXC1, II_LUXC1, II_LWC1,
-                                     II_LWXC1]>;
+def : InstRW<[GenericWriteFPULoad], (instrs LDC1, LDC164, LDXC1, LDXC164,
+                                     LUXC1, LUXC164, LWC1, LWXC1)>;
 
-// MIPS64
+// MIPSR6
 // ======
 
-def : ItinRW<[GenericWriteFPUMoveGPRFPU], [II_DMFC1, II_DMTC1]>;
+// sel(eq|ne).[ds], max.[ds], maxa.[ds], min.[ds], mina.[ds], class.[ds]
+def : InstRW<[GenericWriteFPUS], (instrs SELEQZ_S, SELNEZ_S, SELEQZ_D, SELNEZ_D,
+                                  MAX_S, MAX_D, MAXA_S, MAXA_D, MIN_S, MIN_D,
+                                  MINA_S, MINA_D, CLASS_S, CLASS_D)>;
 
-// MIPSR6
-// ======
+def : InstRW<[GenericWriteFPUL], (instrs RINT_S, RINT_D)>;
 
-def : ItinRW<[GenericWriteFPUS], [II_MADDF_S, II_MSUBF_S]>;
+def : InstRW<[GenericWriteFPUCmp], (instrs BC1EQZ, BC1NEZ, SEL_D, SEL_S)>;
 
-def : ItinRW<[GenericWriteFPUS], [II_MADDF_D, II_MSUBF_D]>;
+def : InstRW<[GenericWriteFPUS], (instrs MADDF_S, MSUBF_S, MADDF_D, MSUBF_D)>;
 
-def : ItinRW<[GenericWriteFPUCmp], [II_BC1CCZ, II_SEL_D, II_SEL_S]>;
 
-// Cavium Networks MIPS (cnMIPS) - Octeon, HasCnMips
-// =================================================
+// microMIPS
+// =========
+
+def : InstRW<[GenericWriteFPUMoveFP], (instrs MOVF_D32_MM, MOVF_S_MM,
+                                       MOVN_I_D32_MM, MOVN_I_S_MM,
+                                       MOVT_D32_MM, MOVT_S_MM, MOVZ_I_D32_MM,
+                                       MOVZ_I_S_MM)>;
+
+
+//  cvt.?.?, ceil.?, floor.?, round.?, trunc.? (n)madd.? (n)msub.?
+def : InstRW<[GenericWriteFPUL], (instrs CVT_D32_S_MM, CVT_D32_W_MM,
+                                  CVT_D64_S_MM, CVT_D64_W_MM, CVT_L_D64_MM,
+                                  CVT_L_S_MM, CVT_S_D32_MM, CVT_S_D64_MM,
+                                  CVT_S_W_MM, CVT_W_D32_MM, CVT_W_D64_MM,
+                                  CVT_W_S_MM, CEIL_W_MM, CEIL_W_S_MM,
+                                  FLOOR_W_MM, FLOOR_W_S_MM, NMADD_S_MM,
+                                  NMADD_D32_MM, NMSUB_S_MM, NMSUB_D32_MM,
+                                  MADD_S_MM, MADD_D32_MM, ROUND_W_MM,
+                                  ROUND_W_S_MM, TRUNC_W_MM, TRUNC_W_S_MM)>;
+
+def : InstRW<[GenericWriteFPUCmp], (instregex "^C_[A-Z]_(S|D32|D64)_MM$")>;
+def : InstRW<[GenericWriteFPUCmp], (instregex "^C_[A-Z][A-Z]_(S|D32|D64)_MM$")>;
+def : InstRW<[GenericWriteFPUCmp], (instregex "^C_[A-Z][A-Z][A-Z]_(S|D32|D64)_MM$")>;
+def : InstRW<[GenericWriteFPUCmp], (instregex "^C_NGLE_(S|D32|D64)_MM$")>;
+def : InstRW<[GenericWriteFPUCmp], (instrs FCMP_S32_MM, FCMP_D32_MM)>;
+
+def : InstRW<[GenericWriteFPUS], (instrs MFC1_MM, MFHC1_D32_MM, MFHC1_D64_MM,
+                                  MTC1_MM, MTC1_D64_MM,
+                                  MTHC1_D32_MM, MTHC1_D64_MM)>;
+
+def : InstRW<[GenericWriteFPUS], (instrs FABS_D32_MM, FABS_D64_MM, FABS_S_MM,
+                                  FNEG_D32_MM, FNEG_D64_MM, FNEG_S_MM,
+                                  FADD_D32_MM, FADD_D64_MM, FADD_S_MM,
+                                  FMOV_D32_MM, FMOV_D64_MM, FMOV_S_MM,
+                                  FMUL_D32_MM, FMUL_D64_MM, FMUL_S_MM,
+                                  FSUB_D32_MM, FSUB_D64_MM, FSUB_S_MM,
+                                  MSUB_S_MM, MSUB_D32_MM)>;
+
+def : InstRW<[GenericWriteFPUDivS], (instrs FDIV_S_MM)>;
+def : InstRW<[GenericWriteFPUDivD], (instrs FDIV_D32_MM, FDIV_D64_MM)>;
+
+def : InstRW<[GenericWriteFPUSqrtS], (instrs FSQRT_S_MM)>;
+def : InstRW<[GenericWriteFPUSqrtD], (instrs FSQRT_D32_MM, FSQRT_D64_MM)>;
+
+def : InstRW<[GenericWriteFPURcpS], (instrs RECIP_S_MM, RSQRT_S_MM)>;
+def : InstRW<[GenericWriteFPURcpD], (instrs RECIP_D32_MM, RECIP_D64_MM,
+                                     RSQRT_D32_MM, RSQRT_D64_MM)>;
+
+def : InstRW<[GenericWriteFPUStore], (instrs SDC1_MM, SWC1_MM, SUXC1_MM,
+                                      SWXC1_MM)>;
+
+def : InstRW<[GenericWriteFPUMoveGPRFPU], (instrs CFC1_MM, CTC1_MM)>;
+
+def : InstRW<[GenericWriteFPULoad], (instrs LDC1_MM, LUXC1_MM, LWC1_MM,
+                                     LWXC1_MM)>;
+
+// microMIPS32r6
+// =============
+
+def : InstRW<[GenericWriteFPUS], (instrs FNEG_S_MMR6)>;
+
+def : InstRW<[GenericWriteFPUCmp], (instregex "CMP_[A-Z][A-Z]_(S|D)_MMR6")>;
+def : InstRW<[GenericWriteFPUCmp],
+             (instregex "CMP_[A-Z][A-Z][A-Z]_(S|D)_MMR6")>;
+def : InstRW<[GenericWriteFPUCmp],
+             (instregex "CMP_[A-Z][A-Z][A-Z][A-Z]_(S|D)_MMR6")>;
+
+def : InstRW<[GenericWriteFPUL],
+             (instregex "CVT_(L|D|S|W)_(L|D|S|L|W)_MMR6")>;
 
-def : ItinRW<[GenericWriteALU], [II_SEQ_SNE, II_SEQI_SNEI, II_POP, II_BADDU,
-                                 II_BBIT]>;
+def : InstRW<[GenericWriteFPUL],
+             (instregex "TRUNC_(L|W)_(D|S)_MMR6")>;
+
+def : InstRW<[GenericWriteFPUL],
+             (instregex "ROUND_(L|W)_(D|S)_MMR6")>;
+
+def : InstRW<[GenericWriteFPUL],
+             (instregex "FLOOR_(L|W)_(D|S)_MMR6")>;
+
+def : InstRW<[GenericWriteFPUL],
+             (instregex "CEIL_(L|W)_(S|D)_MMR6")>;
+
+def : InstRW<[GenericWriteFPUS],
+             (instrs MFC1_MMR6, MTC1_MMR6, CLASS_S_MMR6, CLASS_D_MMR6,
+              FADD_S_MMR6)>;
+
+def : InstRW<[GenericWriteFPUS], (instregex "M(IN|AX)_(S|D)_MMR6")>;
+
+def : InstRW<[GenericWriteFPUS], (instregex "M(IN|AX)A_(S|D)_MMR6")>;
+
+def : InstRW<[GenericWriteFPUS], (instregex "SEL(EQ|NE)Z_(S|D)_MMR6")>;
+
+def : InstRW<[GenericWriteFPUS], (instregex "SEL_(S|D)_MMR6")>;
+
+def : InstRW<[GenericWriteFPUL], (instrs RINT_S_MMR6, RINT_D_MMR6)>;
+
+def : InstRW<[GenericWriteFPUS], (instregex "M(ADD|SUB)F_(S|D)_MMR6")>;
+
+def : InstRW<[GenericWriteFPUS], (instrs FMOV_S_MMR6, FMUL_S_MMR6,
+                                  FSUB_S_MMR6, FMOV_D_MMR6)>;
+
+def : InstRW<[GenericWriteFPUL], (instrs FDIV_S_MMR6)>;
+
+def : InstRW<[GenericWriteFPUStore], (instrs SDC1_D64_MMR6)>;
+
+def : InstRW<[GenericWriteFPULoad], (instrs LDC1_D64_MMR6)>;
+
+// MIPS64
+// ======
+
+def : InstRW<[GenericWriteFPUMoveGPRFPU], (instrs DMFC1, DMTC1)>;
 
 // MIPS DSP ASE, HasDSP
 // ====================
 
+def : InstRW<[GenericWriteStore], (instrs SWDSP)>;
+
+def : InstRW<[GenericWriteLoad], (instrs LWDSP)>;
+
+def : InstRW<[GenericWriteMove], (instrs PseudoMTLOHI_DSP)>;
+
 def GenericDSP : ProcResource<1> { let BufferSize = 1; }
 def GenericDSPShort : SchedWriteRes<[GenericDSP]> { let Latency = 2; }
 def GenericDSPLong : SchedWriteRes<[GenericDSP]> { let Latency = 6; }
@@ -634,6 +1146,11 @@ def : InstRW<[GenericDSPShort], (instregex "^SUBU_QB$")>;
 def : InstRW<[GenericDSPShort], (instregex "^SUBU_S_QB$")>;
 def : InstRW<[GenericDSPShort], (instregex "^WRDSP$")>;
 
+def : InstRW<[GenericDSPShort],
+             (instregex "^Pseudo(CMP|CMPU)_(EQ|LE|LT)_(PH|QB)$")>;
+def : InstRW<[GenericDSPShort],
+						 (instregex "^PseudoPICK_(PH|QB)$")>;
+
 // MIPS DSP R2 - hasDSP, HasDSPR2, InMicroMips
 // ===========================================
 
@@ -687,6 +1204,10 @@ def : InstRW<[GenericDSPShort], (instregex "^SUBUH_R_QB$")>;
 // microMIPS DSP R1 - HasDSP, InMicroMips
 // ======================================
 
+def : InstRW<[GenericWriteLoad], (instrs LWDSP_MM)>;
+
+def : InstRW<[GenericWriteStore], (instrs SWDSP_MM)>;
+
 def : InstRW<[GenericDSPShort], (instregex "^ABSQ_S_PH_MM$")>;
 def : InstRW<[GenericDSPShort], (instregex "^ABSQ_S_W_MM$")>;
 def : InstRW<[GenericDSPShort], (instregex "^ADDQ_PH_MM$")>;
@@ -740,7 +1261,6 @@ def : InstRW<[GenericDSPShort], (instregex "^MAQ_S_W_PHR_MM$")>;
 def : InstRW<[GenericDSPShort], (instregex "^MFHI_DSP_MM$")>;
 def : InstRW<[GenericDSPShort], (instregex "^MFLO_DSP_MM$")>;
 def : InstRW<[GenericDSPShort], (instregex "^MODSUB_MM$")>;
-def : InstRW<[GenericDSPShort], (instregex "^MOVEP_MM$")>;
 def : InstRW<[GenericDSPShort], (instregex "^MOVEP_MMR6$")>;
 def : InstRW<[GenericDSPShort], (instregex "^MOVN_I_MM$")>;
 def : InstRW<[GenericDSPShort], (instregex "^MOVZ_I_MM$")>;
@@ -902,12 +1422,14 @@ def : InstRW<[GenericWriteMSAShortInt], (instregex "^ADDVI?_[BHWD]$")>;
 def : InstRW<[GenericWriteMSAShortInt], (instregex "^ASUB_[US].[BHWD]$")>;
 def : InstRW<[GenericWriteMSAShortInt], (instregex "^AVER?_[US].[BHWD]$")>;
 
-// and.v, andi.b, move.v, ldi.[bhwd], xor.v, nor.v, xori.b, nori.b
+// and.v, andi.b, move.v, ldi.[bhwd], xor.v, nor.v, xori.b, nori.b, lsa
 def : InstRW<[GenericWriteMSAShortLogic], (instregex "^MOVE_V$")>;
 def : InstRW<[GenericWriteMSAShortLogic], (instregex "^LDI_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instrs LSA)>;
 def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)_V$")>;
 def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)I_B$")>;
-def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)I_B$")>;
+def : InstRW<[GenericWriteMSAShortLogic],
+             (instregex "^(AND|OR|[XN]OR)_V_[DHW]_PSEUDO$")>;
 
 // vshf.[bhwd], binsl.[bhwd], binsr.[bhwd], insert.[bhwd], sld?.[bhwd],
 // bset.[bhwd], bclr.[bhwd], bneg.[bhwd], bsel_v, bseli_b
@@ -921,8 +1443,10 @@ def : InstRW<[GenericWriteMSAShortInt], (instregex "^(BCLR|BCLRI)_[BHWD]$")>;
 def : InstRW<[GenericWriteMSAShortInt], (instregex "^(BNEG|BNEGI)_[BHWD]$")>;
 def : InstRW<[GenericWriteMSAShortInt], (instregex "^(BSEL_V|BSELI_B)$")>;
 def : InstRW<[GenericWriteMSAShortInt], (instregex "^BMN*Z.*$")>;
+def : InstRW<[GenericWriteMSAShortInt],
+             (instregex "^BSEL_(H|W|D|FW|FD)_PSEUDO$")>;
 
-// pcnt.[bhwd], sat_s.[bhwd], sat_u.bhwd]
+// pcnt.[bhwd], sat_s.[bhwd], sat_u.[bhwd]
 def : InstRW<[GenericWriteMSAOther3], (instregex "^PCNT_[BHWD]$")>;
 def : InstRW<[GenericWriteMSAOther3], (instregex "^SAT_(S|U)_[BHWD]$")>;
 
@@ -935,10 +1459,6 @@ def : InstRW<[GenericWriteMSAShortInt], (instregex "^SHF_[BHW]$")>;
 def : InstRW<[GenericWriteMSAShortInt], (instregex "^FILL_[BHWD]$")>;
 def : InstRW<[GenericWriteMSAShortInt], (instregex "^(SPLAT|SPLATI)_[BHWD]$")>;
 
-// pcnt.[bhwd], sat_s.[bhwd], sat_u.bhwd]
-def : InstRW<[GenericWriteMSAOther3], (instregex "^PCNT_[BHWD]$")>;
-def : InstRW<[GenericWriteMSAOther3], (instregex "^SAT_(S|U)_[BHWD]$")>;
-
 // fexp2_w, fexp2_d
 def : InstRW<[GenericWriteFPUS], (instregex "^FEXP2_(W|D)$")>;
 
@@ -953,6 +1473,15 @@ def : InstRW<[GenericWriteFPUS], (instregex "^CMP_LT_(S|D)$")>;
 def : InstRW<[GenericWriteFPUS], (instregex "^CMP_ULT_(S|D)$")>;
 def : InstRW<[GenericWriteFPUS], (instregex "^CMP_LE_(S|D)$")>;
 def : InstRW<[GenericWriteFPUS], (instregex "^CMP_ULE_(S|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_F_(D|S)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_SAF_(D|S)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_SEQ_(D|S)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_SLE_(D|S)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_SLT_(D|S)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_SUEQ_(D|S)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_SULE_(D|S)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_SULT_(D|S)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_SUN_(D|S)$")>;
 def : InstRW<[GenericWriteFPUS], (instregex "^FS(AF|EQ|LT|LE|NE|OR)_(W|D)$")>;
 def : InstRW<[GenericWriteFPUS], (instregex "^FSUEQ_(W|D)$")>;
 def : InstRW<[GenericWriteFPUS], (instregex "^FSULE_(W|D)$")>;
@@ -995,7 +1524,6 @@ def : InstRW<[GenericWriteFPUS], (instregex "^FLOG2_(W|D)$")>;
 // interleave right/left, interleave even/odd, insert
 def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(ILVR|ILVL)_[BHWD]$")>;
 def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(ILVEV|ILVOD)_[BHWD]$")>;
-def : InstRW<[GenericWriteMSAShortLogic], (instregex "^INSVE_[BHWD]$")>;
 
 // subs_?.[bhwd], subsus_?.[bhwd], subsuu_?.[bhwd], subvi.[bhwd], subv.[bhwd],
 def : InstRW<[GenericWriteMSAShortInt], (instregex "^SUBS_(S|U)_[BHWD]$")>;
@@ -1027,6 +1555,8 @@ def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(SLL|SLLI)_[BHWD]$")>;
 def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(PCKEV|PCKOD)_[BHWD]$")>;
 def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(NLOC|NLZC)_[BHWD]$")>;
 def : InstRW<[GenericWriteMSAShortLogic], (instregex "^INSVE_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^INSERT_F(D|W)_PSEUDO$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^FILL_F(D|W)_PSEUDO$")>;
 
 // dpadd_?.[bhwd], dpsub_?.[bhwd], dotp_?.[bhwd], msubv.[bhwd], maddv.[bhwd]
 // mulv.[bhwd].
@@ -1062,5 +1592,23 @@ def : InstRW<[GenericWriteFPUMoveGPRFPU], (instregex "^COPY_U_[BHW]$")>;
 def : InstRW<[GenericWriteFPUMoveGPRFPU], (instregex "^COPY_S_[BHWD]$")>;
 
 def : InstRW<[GenericWriteFPUStore], (instregex "^ST_[BHWD]$")>;
+def : InstRW<[GenericWriteFPUStore], (instrs ST_F16)>;
 def : InstRW<[GenericWriteFPULoad], (instregex "^LD_[BHWD]$")>;
+def : InstRW<[GenericWriteFPULoad], (instrs LD_F16)>;
+
+// Atomic instructions
+
+// FIXME: Define `WriteAtomic` in the MipsSchedule.td and
+// attach it to the Atomic2OpsPostRA, AtomicCmpSwapPostRA, ...
+// classes. Then just define resources for the `WriteAtomic` in each
+// machine models.
+def GenericAtomic : ProcResource<1> { let BufferSize = 1; }
+def GenericWriteAtomic : SchedWriteRes<[GenericAtomic]> { let Latency = 2; }
+
+def : InstRW<[GenericWriteAtomic],
+    (instregex "^ATOMIC_SWAP_I(8|16|32|64)_POSTRA$")>;
+def : InstRW<[GenericWriteAtomic],
+    (instregex "^ATOMIC_CMP_SWAP_I(8|16|32|64)_POSTRA$")>;
+def : InstRW<[GenericWriteAtomic],
+    (instregex "^ATOMIC_LOAD_(ADD|SUB|AND|OR|XOR|NAND)_I(8|16|32|64)_POSTRA$")>;
 }
diff --git a/lib/Target/Mips/MipsScheduleP5600.td b/lib/Target/Mips/MipsScheduleP5600.td
index 846fa11494c7..f97b03bff08e 100644
--- a/lib/Target/Mips/MipsScheduleP5600.td
+++ b/lib/Target/Mips/MipsScheduleP5600.td
@@ -1,9 +1,8 @@
 //==- MipsScheduleP5600.td - P5600 Scheduling Definitions --*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -13,12 +12,13 @@ def MipsP5600Model : SchedMachineModel {
   int LoadLatency = 4;
   int MispredictPenalty = 8; // TODO: Estimated
 
-  let CompleteModel = 0;
+  let CompleteModel = 1;
   let FullInstRWOverlapCheck = 1;
 
-  list<Predicate> UnsupportedFeatures = [HasMips32r6, HasMips64r6,
-                                         HasMips3, HasMips64r2, HasCnMips,
-                                         InMicroMips, InMips16Mode,
+  list<Predicate> UnsupportedFeatures = [HasMips3, HasMips32r6, HasMips64,
+                                         HasMips64r2, HasMips64r5, HasMips64r6,
+                                         IsGP64bit, IsPTR64bit,
+                                         InMicroMips, InMips16Mode, HasCnMips,
                                          HasDSP, HasDSPR2, HasMT, HasCRC];
 }
 
@@ -59,15 +59,21 @@ def P5600WriteJumpAndLink : SchedWriteRes<[P5600IssueCTISTD, P5600CTISTD]> {
   let Latency = 2;
 }
 
+def P5600Nop : SchedWriteRes<[P5600IssueCTISTD]> {
+  let Latency = 0;
+}
+
+def : InstRW<[P5600Nop], (instrs SSNOP, NOP)>;
+
 // b, beq, beql, bg[et]z, bl[et]z, bne, bnel, j, syscall, jal, bltzal,
 // jalr, jr.hb, jr
 def : InstRW<[P5600WriteJump], (instrs B, BAL, BAL_BR, BEQ, BEQL, BGEZ, BGEZAL,
                                 BGEZALL, BGEZL, BGTZ, BGTZL, BLEZ, BLEZL, BLTZ,
                                 BLTZAL, BLTZALL, BLTZL, BNE, BNEL, BREAK,
-                                DERET, ERET, ERETNC, J, JR, JR_HB,
+                                DERET, ERET, ERet, ERETNC, J, JR, JR_HB,
                                 PseudoIndirectBranch,
                                 PseudoIndirectHazardBranch, PseudoReturn,
-                                SDBBP, SSNOP, SYSCALL, TAILCALL, TAILCALLREG,
+                                SDBBP, SYSCALL, RetRA, TAILCALL, TAILCALLREG,
                                 TAILCALLREGHB, TEQ, TEQI, TGE, TGEI, TGEIU,
                                 TGEU, TLT, TLTI, TLTU, TNE, TNEI, TRAP,
                                 TTLTIU, WAIT, PAUSE)>;
@@ -90,6 +96,11 @@ def : InstRW<[P5600COP2], (instrs MFC2, MTC2)> {
   let Unsupported = 1;
 }
 
+// MIPS Virtualization ASE
+// =======================
+def : InstRW<[P5600COP0], (instrs HYPCALL, MFGC0, MFHGC0, MTGC0, MTHGC0,
+                           TLBGINV, TLBGINVF, TLBGP, TLBGR, TLBGWI, TLBGWR)>;
+
 // LDST Pipeline
 // -------------
 
@@ -288,6 +299,8 @@ def : InstRW<[P5600WriteMSAShortInt], (instregex "^(BCLR|BCLRI)_[BHWD]$")>;
 def : InstRW<[P5600WriteMSAShortInt], (instregex "^(BNEG|BNEGI)_[BHWD]$")>;
 def : InstRW<[P5600WriteMSAShortInt], (instregex "^(BSEL_V|BSELI_B)$")>;
 def : InstRW<[P5600WriteMSAShortInt], (instregex "^BMN*Z.*$")>;
+def : InstRW<[P5600WriteMSAShortInt],
+             (instregex "^BSEL_(H|W|D|FW|FD)_PSEUDO$")>;
 
 // pcnt.[bhwd], sat_s.[bhwd], sat_u.bhwd]
 def : InstRW<[P5600WriteMSAOther3], (instregex "^PCNT_[BHWD]$")>;
@@ -335,6 +348,10 @@ def : InstRW<[P5600WriteMSAShortLogic], (instregex "^MOVE_V$")>;
 def : InstRW<[P5600WriteMSAShortLogic], (instregex "^LDI_[BHWD]$")>;
 def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)_V$")>;
 def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)I_B$")>;
+def : InstRW<[P5600WriteMSAShortLogic],
+             (instregex "^(AND|OR|[XN]OR)_V_[DHW]_PSEUDO$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^FILL_F(D|W)_PSEUDO$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^INSERT_F(D|W)_PSEUDO$")>;
 
 // fexp2_w, fexp2_d
 def : InstRW<[P5600WriteFPUS], (instregex "^FEXP2_(W|D)$")>;
@@ -427,17 +444,19 @@ def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(NLOC|NLZC)_[BHWD]$")>;
 // ----------
 //
 // add.[ds], add.ps, cvt.d.[sw], cvt.s.[dw], cvt.w.[sd], cvt.[sw].ps,
-// cvt.ps.[sw], c.<cc>.[ds], c.<cc>.ps, mul.[ds], mul.ps, sub.[ds], sub.ps,
-// trunc.w.[ds], trunc.w.ps
+// cvt.ps.[sw], cvt.s.(pl|pu), c.<cc>.[ds], c.<cc>.ps, mul.[ds], mul.ps,
+// pl[lu].ps, sub.[ds], sub.ps, trunc.w.[ds], trunc.w.ps
 def : InstRW<[P5600WriteFPUL],
              (instrs FADD_D32, FADD_D64, FADD_S, FMUL_D32, FMUL_D64, FMUL_S,
               FSUB_D32, FSUB_D64, FSUB_S)>;
 def : InstRW<[P5600WriteFPUL], (instregex "^TRUNC_(L|W)_(S|D32|D64)$")>;
 def : InstRW<[P5600WriteFPUL],
              (instregex "^CVT_(S|D32|D64|L|W)_(S|D32|D64|L|W)$")>;
+def : InstRW<[P5600WriteFPUL], (instrs CVT_PS_S64, CVT_S_PL64, CVT_S_PU64)>;
 def : InstRW<[P5600WriteFPUL], (instregex "^C_[A-Z]+_(S|D32|D64)$")>;
 def : InstRW<[P5600WriteFPUL], (instregex "^FCMP_(S32|D32|D64)$")>;
 def : InstRW<[P5600WriteFPUL], (instregex "^PseudoCVT_(S|D32|D64)_(L|W)$")>;
+def : InstRW<[P5600WriteFPUL], (instrs PLL_PS64, PLU_PS64)>;
 
 // div.[ds], div.ps
 def : InstRW<[P5600WriteFPUDivS], (instrs FDIV_S)>;
@@ -555,16 +574,20 @@ def : InstRW<[P5600WriteMoveFPUToGPR], (instrs BC1F, BC1FL, BC1T, BC1TL, CFC1,
                                         ExtractElementF64_64)>;
 
 // swc1, swxc1, st.[bhwd]
-def : InstRW<[P5600WriteStoreFPUS], (instrs SDC1, SDXC1, SUXC1, SWC1, SWXC1)>;
+def : InstRW<[P5600WriteStoreFPUS], (instrs SDC1, SDC164, SDXC1, SDXC164,
+                                     SWC1, SWXC1, SUXC1, SUXC164)>;
 def : InstRW<[P5600WriteStoreFPUS], (instregex "^ST_[BHWD]$")>;
+def : InstRW<[P5600WriteStoreFPUS], (instrs ST_F16)>;
 
 // movn.[ds], movz.[ds]
 def : InstRW<[P5600WriteStoreFPUL], (instrs MOVN_I_D32, MOVN_I_D64, MOVN_I_S,
                                      MOVZ_I_D32, MOVZ_I_D64, MOVZ_I_S)>;
 
 // l[dw]x?c1, ld.[bhwd]
-def : InstRW<[P5600WriteLoadFPU], (instrs LDC1, LDXC1, LWC1, LWXC1, LUXC1)>;
+def : InstRW<[P5600WriteLoadFPU], (instrs LDC1, LDC164, LDXC1, LDXC164,
+                                   LWC1, LWXC1, LUXC1, LUXC164)>;
 def : InstRW<[P5600WriteLoadFPU], (instregex "LD_[BHWD]")>;
+def : InstRW<[P5600WriteLoadFPU], (instrs LD_F16)>;
 
 // Unsupported Instructions
 // ========================
@@ -593,4 +616,20 @@ def : InstRW<[P5600WriteFPUL], (instregex "^ROUND_(L|W)_(S|D32|D64)$")>;
 // Reason behind guess: rotr is in the same category and the two register forms
 //                      generally follow the immediate forms in this category
 def : InstRW<[P5600WriteEitherALU], (instrs ROTRV)>;
+
+// Atomic instructions
+
+// FIXME: Define `WriteAtomic` in the MipsSchedule.td and
+// attach it to the Atomic2OpsPostRA, AtomicCmpSwapPostRA, ...
+// classes. Then just define resources for the `WriteAtomic` in each
+// machine models.
+def P5600Atomic : ProcResource<1> { let BufferSize = 1; }
+def P5600WriteAtomic : SchedWriteRes<[P5600Atomic]> { let Latency = 2; }
+
+def : InstRW<[P5600WriteAtomic],
+    (instregex "^ATOMIC_SWAP_I(8|16|32|64)_POSTRA$")>;
+def : InstRW<[P5600WriteAtomic],
+    (instregex "^ATOMIC_CMP_SWAP_I(8|16|32|64)_POSTRA$")>;
+def : InstRW<[P5600WriteAtomic],
+    (instregex "^ATOMIC_LOAD_(ADD|SUB|AND|OR|XOR|NAND)_I(8|16|32|64)_POSTRA$")>;
 }
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 0c39a45467c4..d021b3d021b1 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -1,9 +1,8 @@
 //===-- MipsSubtarget.cpp - Mips Subtarget Information --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -73,7 +72,7 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
                              unsigned StackAlignOverride)
     : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(MipsDefault),
       IsLittle(little), IsSoftFloat(false), IsSingleFloat(false), IsFPXX(false),
-      NoABICalls(false), IsFP64bit(false), UseOddSPReg(true),
+      NoABICalls(false), Abs2008(false), IsFP64bit(false), UseOddSPReg(true),
       IsNaN2008bit(false), IsGP64bit(false), HasVFPU(false), HasCnMips(false),
       HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false),
       HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
@@ -109,6 +108,11 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
                        "See -mattr=+fp64.",
                        false);
 
+  if (isFP64bit() && !hasMips64() && hasMips32() && !hasMips32r2())
+    report_fatal_error(
+        "FPU with 64-bit registers is not available on MIPS32 pre revision 2. "
+        "Use -mcpu=mips32r2 or greater.");
+
   if (!isABI_O32() && !useOddSPReg())
     report_fatal_error("-mattr=+nooddspreg requires the O32 ABI.", false);
 
@@ -129,11 +133,18 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
       report_fatal_error(
           "indirect jumps with hazard barriers requires MIPS32R2 or later");
   }
+  if (inAbs2008Mode() && hasMips32() && !hasMips32r2()) {
+    report_fatal_error("IEEE 754-2008 abs.fmt is not supported for the given "
+                       "architecture.",
+                       false);
+  }
+
   if (hasMips32r6()) {
     StringRef ISA = hasMips64r6() ? "MIPS64r6" : "MIPS32r6";
 
     assert(isFP64bit());
     assert(isNaN2008());
+    assert(inAbs2008Mode());
     if (hasDSP())
       report_fatal_error(ISA + " is not compatible with the DSP ASE", false);
   }
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index ad8f4848b870..aa1200579fc8 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -1,9 +1,8 @@
 //===-- MipsSubtarget.h - Define Subtarget for the Mips ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -87,6 +86,9 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
   // NoABICalls - Disable SVR4-style position-independent code.
   bool NoABICalls;
 
+  // Abs2008 - Use IEEE 754-2008 abs.fmt instruction.
+  bool Abs2008;
+
   // IsFP64bit - The target processor has 64-bit floating point registers.
   bool IsFP64bit;
 
@@ -273,6 +275,7 @@ public:
   bool useOddSPReg() const { return UseOddSPReg; }
   bool noOddSPReg() const { return !UseOddSPReg; }
   bool isNaN2008() const { return IsNaN2008bit; }
+  bool inAbs2008Mode() const { return Abs2008; }
   bool isGP64bit() const { return IsGP64bit; }
   bool isGP32bit() const { return !IsGP64bit; }
   unsigned getGPRSizeInBytes() const { return isGP64bit() ? 8 : 4; }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 8466298cf36f..c878abb042e4 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -1,9 +1,8 @@
 //===-- MipsTargetMachine.cpp - Define TargetMachine for Mips -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,6 +18,7 @@
 #include "MipsSEISelDAGToDAG.h"
 #include "MipsSubtarget.h"
 #include "MipsTargetObjectFile.h"
+#include "TargetInfo/MipsTargetInfo.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
@@ -205,8 +205,7 @@ MipsTargetMachine::getSubtargetImpl(const Function &F) const {
 void MipsTargetMachine::resetSubtarget(MachineFunction *MF) {
   LLVM_DEBUG(dbgs() << "resetSubtarget\n");
 
-  Subtarget = const_cast<MipsSubtarget *>(getSubtargetImpl(MF->getFunction()));
-  MF->setSubtarget(Subtarget);
+  Subtarget = &MF->getSubtarget<MipsSubtarget>();
 }
 
 namespace {
@@ -240,6 +239,8 @@ public:
   bool addLegalizeMachineIR() override;
   bool addRegBankSelect() override;
   bool addGlobalInstructionSelect() override;
+
+  std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
 };
 
 } // end anonymous namespace
@@ -248,6 +249,10 @@ TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) {
   return new MipsPassConfig(*this, PM);
 }
 
+std::unique_ptr<CSEConfigBase> MipsPassConfig::getCSEConfig() const {
+  return getStandardCSEConfigForOpt(TM->getOptLevel());
+}
+
 void MipsPassConfig::addIRPasses() {
   TargetPassConfig::addIRPasses();
   addPass(createAtomicExpandPass());
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index d9b73d151119..25300504a02d 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -1,9 +1,8 @@
 //===- MipsTargetMachine.h - Define TargetMachine for Mips ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -30,7 +29,7 @@ class MipsTargetMachine : public LLVMTargetMachine {
   std::unique_ptr<TargetLoweringObjectFile> TLOF;
   // Selected ABI
   MipsABIInfo ABI;
-  MipsSubtarget *Subtarget;
+  const MipsSubtarget *Subtarget;
   MipsSubtarget DefaultSubtarget;
   MipsSubtarget NoMips16Subtarget;
   MipsSubtarget Mips16Subtarget;
@@ -66,10 +65,6 @@ public:
 
   bool isLittleEndian() const { return isLittle; }
   const MipsABIInfo &getABI() const { return ABI; }
-
-  bool isMachineVerifierClean() const override {
-    return false;
-  }
 };
 
 /// Mips32/64 big endian target machine.
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index f53ee0631b5e..0852b5a18c68 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -1,9 +1,8 @@
 //===-- MipsTargetObjectFile.cpp - Mips Object Files ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h
index a37ec154ff79..bdf485f83260 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/lib/Target/Mips/MipsTargetObjectFile.h
@@ -1,9 +1,8 @@
 //===-- llvm/Target/MipsTargetObjectFile.h - Mips Object Info ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
index a282366f6d40..1fa8ebadd643 100644
--- a/lib/Target/Mips/MipsTargetStreamer.h
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -1,9 +1,8 @@
 //===-- MipsTargetStreamer.h - Mips Target Streamer ------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -92,6 +91,7 @@ public:
 
   // PIC support
   virtual void emitDirectiveCpLoad(unsigned RegNo);
+  virtual void emitDirectiveCpLocal(unsigned RegNo);
   virtual bool emitDirectiveCpRestore(int Offset,
                                       function_ref<unsigned()> GetATReg,
                                       SMLoc IDLoc, const MCSubtargetInfo *STI);
@@ -200,6 +200,7 @@ protected:
   bool FrameInfoSet;
   int FrameOffset;
   unsigned FrameReg;
+  unsigned GPReg;
   unsigned ReturnReg;
 
 private:
@@ -275,6 +276,7 @@ public:
 
   // PIC support
   void emitDirectiveCpLoad(unsigned RegNo) override;
+  void emitDirectiveCpLocal(unsigned RegNo) override;
 
   /// Emit a .cprestore directive.  If the offset is out of range then it will
   /// be synthesized using the assembler temporary.
@@ -346,6 +348,7 @@ public:
 
   // PIC support
   void emitDirectiveCpLoad(unsigned RegNo) override;
+  void emitDirectiveCpLocal(unsigned RegNo) override;
   bool emitDirectiveCpRestore(int Offset, function_ref<unsigned()> GetATReg,
                               SMLoc IDLoc, const MCSubtargetInfo *STI) override;
   void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
diff --git a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
index 22be564b6502..0082ca34cdbd 100644
--- a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
+++ b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -1,14 +1,12 @@
 //===-- MipsTargetInfo.cpp - Mips Target Implementation -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "Mips.h"
-#include "llvm/IR/Module.h"
+#include "TargetInfo/MipsTargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/Mips/TargetInfo/MipsTargetInfo.h b/lib/Target/Mips/TargetInfo/MipsTargetInfo.h
new file mode 100644
index 000000000000..d91a2719108d
--- /dev/null
+++ b/lib/Target/Mips/TargetInfo/MipsTargetInfo.h
@@ -0,0 +1,23 @@
+//===-- MipsTargetInfo.h - Mips Target Implementation -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_MIPS_TARGETINFO_MIPSTARGETINFO_H
+#define LLVM_LIB_TARGET_MIPS_TARGETINFO_MIPSTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheMipsTarget();
+Target &getTheMipselTarget();
+Target &getTheMips64Target();
+Target &getTheMips64elTarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_MIPS_TARGETINFO_MIPSTARGETINFO_H
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
deleted file mode 100644
index b774fe169d71..000000000000
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
+++ /dev/null
@@ -1,296 +0,0 @@
-//===-- NVPTXInstPrinter.cpp - PTX assembly instruction printing ----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Print MCInst instructions to .ptx format.
-//
-//===----------------------------------------------------------------------===//
-
-#include "InstPrinter/NVPTXInstPrinter.h"
-#include "MCTargetDesc/NVPTXBaseInfo.h"
-#include "NVPTX.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-#include <cctype>
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-#include "NVPTXGenAsmWriter.inc"
-
-NVPTXInstPrinter::NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                                   const MCRegisterInfo &MRI)
-    : MCInstPrinter(MAI, MII, MRI) {}
-
-void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  // Decode the virtual register
-  // Must be kept in sync with NVPTXAsmPrinter::encodeVirtualRegister
-  unsigned RCId = (RegNo >> 28);
-  switch (RCId) {
-  default: report_fatal_error("Bad virtual register encoding");
-  case 0:
-    // This is actually a physical register, so defer to the autogenerated
-    // register printer
-    OS << getRegisterName(RegNo);
-    return;
-  case 1:
-    OS << "%p";
-    break;
-  case 2:
-    OS << "%rs";
-    break;
-  case 3:
-    OS << "%r";
-    break;
-  case 4:
-    OS << "%rd";
-    break;
-  case 5:
-    OS << "%f";
-    break;
-  case 6:
-    OS << "%fd";
-    break;
-  case 7:
-    OS << "%h";
-    break;
-  case 8:
-    OS << "%hh";
-    break;
-  }
-
-  unsigned VReg = RegNo & 0x0FFFFFFF;
-  OS << VReg;
-}
-
-void NVPTXInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
-                                 StringRef Annot, const MCSubtargetInfo &STI) {
-  printInstruction(MI, OS);
-
-  // Next always print the annotation.
-  printAnnotation(OS, Annot);
-}
-
-void NVPTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                    raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    unsigned Reg = Op.getReg();
-    printRegName(O, Reg);
-  } else if (Op.isImm()) {
-    O << markup("<imm:") << formatImm(Op.getImm()) << markup(">");
-  } else {
-    assert(Op.isExpr() && "Unknown operand kind in printOperand");
-    Op.getExpr()->print(O, &MAI);
-  }
-}
-
-void NVPTXInstPrinter::printCvtMode(const MCInst *MI, int OpNum, raw_ostream &O,
-                                    const char *Modifier) {
-  const MCOperand &MO = MI->getOperand(OpNum);
-  int64_t Imm = MO.getImm();
-
-  if (strcmp(Modifier, "ftz") == 0) {
-    // FTZ flag
-    if (Imm & NVPTX::PTXCvtMode::FTZ_FLAG)
-      O << ".ftz";
-  } else if (strcmp(Modifier, "sat") == 0) {
-    // SAT flag
-    if (Imm & NVPTX::PTXCvtMode::SAT_FLAG)
-      O << ".sat";
-  } else if (strcmp(Modifier, "base") == 0) {
-    // Default operand
-    switch (Imm & NVPTX::PTXCvtMode::BASE_MASK) {
-    default:
-      return;
-    case NVPTX::PTXCvtMode::NONE:
-      break;
-    case NVPTX::PTXCvtMode::RNI:
-      O << ".rni";
-      break;
-    case NVPTX::PTXCvtMode::RZI:
-      O << ".rzi";
-      break;
-    case NVPTX::PTXCvtMode::RMI:
-      O << ".rmi";
-      break;
-    case NVPTX::PTXCvtMode::RPI:
-      O << ".rpi";
-      break;
-    case NVPTX::PTXCvtMode::RN:
-      O << ".rn";
-      break;
-    case NVPTX::PTXCvtMode::RZ:
-      O << ".rz";
-      break;
-    case NVPTX::PTXCvtMode::RM:
-      O << ".rm";
-      break;
-    case NVPTX::PTXCvtMode::RP:
-      O << ".rp";
-      break;
-    }
-  } else {
-    llvm_unreachable("Invalid conversion modifier");
-  }
-}
-
-void NVPTXInstPrinter::printCmpMode(const MCInst *MI, int OpNum, raw_ostream &O,
-                                    const char *Modifier) {
-  const MCOperand &MO = MI->getOperand(OpNum);
-  int64_t Imm = MO.getImm();
-
-  if (strcmp(Modifier, "ftz") == 0) {
-    // FTZ flag
-    if (Imm & NVPTX::PTXCmpMode::FTZ_FLAG)
-      O << ".ftz";
-  } else if (strcmp(Modifier, "base") == 0) {
-    switch (Imm & NVPTX::PTXCmpMode::BASE_MASK) {
-    default:
-      return;
-    case NVPTX::PTXCmpMode::EQ:
-      O << ".eq";
-      break;
-    case NVPTX::PTXCmpMode::NE:
-      O << ".ne";
-      break;
-    case NVPTX::PTXCmpMode::LT:
-      O << ".lt";
-      break;
-    case NVPTX::PTXCmpMode::LE:
-      O << ".le";
-      break;
-    case NVPTX::PTXCmpMode::GT:
-      O << ".gt";
-      break;
-    case NVPTX::PTXCmpMode::GE:
-      O << ".ge";
-      break;
-    case NVPTX::PTXCmpMode::LO:
-      O << ".lo";
-      break;
-    case NVPTX::PTXCmpMode::LS:
-      O << ".ls";
-      break;
-    case NVPTX::PTXCmpMode::HI:
-      O << ".hi";
-      break;
-    case NVPTX::PTXCmpMode::HS:
-      O << ".hs";
-      break;
-    case NVPTX::PTXCmpMode::EQU:
-      O << ".equ";
-      break;
-    case NVPTX::PTXCmpMode::NEU:
-      O << ".neu";
-      break;
-    case NVPTX::PTXCmpMode::LTU:
-      O << ".ltu";
-      break;
-    case NVPTX::PTXCmpMode::LEU:
-      O << ".leu";
-      break;
-    case NVPTX::PTXCmpMode::GTU:
-      O << ".gtu";
-      break;
-    case NVPTX::PTXCmpMode::GEU:
-      O << ".geu";
-      break;
-    case NVPTX::PTXCmpMode::NUM:
-      O << ".num";
-      break;
-    case NVPTX::PTXCmpMode::NotANumber:
-      O << ".nan";
-      break;
-    }
-  } else {
-    llvm_unreachable("Empty Modifier");
-  }
-}
-
-void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
-                                     raw_ostream &O, const char *Modifier) {
-  if (Modifier) {
-    const MCOperand &MO = MI->getOperand(OpNum);
-    int Imm = (int) MO.getImm();
-    if (!strcmp(Modifier, "volatile")) {
-      if (Imm)
-        O << ".volatile";
-    } else if (!strcmp(Modifier, "addsp")) {
-      switch (Imm) {
-      case NVPTX::PTXLdStInstCode::GLOBAL:
-        O << ".global";
-        break;
-      case NVPTX::PTXLdStInstCode::SHARED:
-        O << ".shared";
-        break;
-      case NVPTX::PTXLdStInstCode::LOCAL:
-        O << ".local";
-        break;
-      case NVPTX::PTXLdStInstCode::PARAM:
-        O << ".param";
-        break;
-      case NVPTX::PTXLdStInstCode::CONSTANT:
-        O << ".const";
-        break;
-      case NVPTX::PTXLdStInstCode::GENERIC:
-        break;
-      default:
-        llvm_unreachable("Wrong Address Space");
-      }
-    } else if (!strcmp(Modifier, "sign")) {
-      if (Imm == NVPTX::PTXLdStInstCode::Signed)
-        O << "s";
-      else if (Imm == NVPTX::PTXLdStInstCode::Unsigned)
-        O << "u";
-      else if (Imm == NVPTX::PTXLdStInstCode::Untyped)
-        O << "b";
-      else if (Imm == NVPTX::PTXLdStInstCode::Float)
-        O << "f";
-      else
-        llvm_unreachable("Unknown register type");
-    } else if (!strcmp(Modifier, "vec")) {
-      if (Imm == NVPTX::PTXLdStInstCode::V2)
-        O << ".v2";
-      else if (Imm == NVPTX::PTXLdStInstCode::V4)
-        O << ".v4";
-    } else
-      llvm_unreachable("Unknown Modifier");
-  } else
-    llvm_unreachable("Empty Modifier");
-}
-
-void NVPTXInstPrinter::printMemOperand(const MCInst *MI, int OpNum,
-                                       raw_ostream &O, const char *Modifier) {
-  printOperand(MI, OpNum, O);
-
-  if (Modifier && !strcmp(Modifier, "add")) {
-    O << ", ";
-    printOperand(MI, OpNum + 1, O);
-  } else {
-    if (MI->getOperand(OpNum + 1).isImm() &&
-        MI->getOperand(OpNum + 1).getImm() == 0)
-      return; // don't print ',0' or '+0'
-    O << "+";
-    printOperand(MI, OpNum + 1, O);
-  }
-}
-
-void NVPTXInstPrinter::printProtoIdent(const MCInst *MI, int OpNum,
-                                       raw_ostream &O, const char *Modifier) {
-  const MCOperand &Op = MI->getOperand(OpNum);
-  assert(Op.isExpr() && "Call prototype is not an MCExpr?");
-  const MCExpr *Expr = Op.getExpr();
-  const MCSymbol &Sym = cast<MCSymbolRefExpr>(Expr)->getSymbol();
-  O << Sym.getName();
-}
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
deleted file mode 100644
index f0f223aa057b..000000000000
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ /dev/null
@@ -1,52 +0,0 @@
-//= NVPTXInstPrinter.h - Convert NVPTX MCInst to assembly syntax --*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an NVPTX MCInst to .ptx file syntax.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NVPTX_INSTPRINTER_NVPTXINSTPRINTER_H
-#define LLVM_LIB_TARGET_NVPTX_INSTPRINTER_NVPTXINSTPRINTER_H
-
-#include "llvm/MC/MCInstPrinter.h"
-
-namespace llvm {
-
-class MCSubtargetInfo;
-
-class NVPTXInstPrinter : public MCInstPrinter {
-public:
-  NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                   const MCRegisterInfo &MRI);
-
-  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
-
-  // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
-  static const char *getRegisterName(unsigned RegNo);
-  // End
-
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printCvtMode(const MCInst *MI, int OpNum, raw_ostream &O,
-                    const char *Modifier = nullptr);
-  void printCmpMode(const MCInst *MI, int OpNum, raw_ostream &O,
-                    const char *Modifier = nullptr);
-  void printLdStCode(const MCInst *MI, int OpNum,
-                     raw_ostream &O, const char *Modifier = nullptr);
-  void printMemOperand(const MCInst *MI, int OpNum,
-                       raw_ostream &O, const char *Modifier = nullptr);
-  void printProtoIdent(const MCInst *MI, int OpNum,
-                       raw_ostream &O, const char *Modifier = nullptr);
-};
-
-}
-
-#endif
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index 1cb92005979d..815b600fe93a 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -1,9 +1,8 @@
 //===-- NVPTXBaseInfo.h - Top-level definitions for NVPTX -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
new file mode 100644
index 000000000000..b6eefe206268
--- /dev/null
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -0,0 +1,309 @@
+//===-- NVPTXInstPrinter.cpp - PTX assembly instruction printing ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Print MCInst instructions to .ptx format.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/NVPTXInstPrinter.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
+#include "NVPTX.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include <cctype>
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+#include "NVPTXGenAsmWriter.inc"
+
+NVPTXInstPrinter::NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                                   const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
+
+void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  // Decode the virtual register
+  // Must be kept in sync with NVPTXAsmPrinter::encodeVirtualRegister
+  unsigned RCId = (RegNo >> 28);
+  switch (RCId) {
+  default: report_fatal_error("Bad virtual register encoding");
+  case 0:
+    // This is actually a physical register, so defer to the autogenerated
+    // register printer
+    OS << getRegisterName(RegNo);
+    return;
+  case 1:
+    OS << "%p";
+    break;
+  case 2:
+    OS << "%rs";
+    break;
+  case 3:
+    OS << "%r";
+    break;
+  case 4:
+    OS << "%rd";
+    break;
+  case 5:
+    OS << "%f";
+    break;
+  case 6:
+    OS << "%fd";
+    break;
+  case 7:
+    OS << "%h";
+    break;
+  case 8:
+    OS << "%hh";
+    break;
+  }
+
+  unsigned VReg = RegNo & 0x0FFFFFFF;
+  OS << VReg;
+}
+
+void NVPTXInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+                                 StringRef Annot, const MCSubtargetInfo &STI) {
+  printInstruction(MI, OS);
+
+  // Next always print the annotation.
+  printAnnotation(OS, Annot);
+}
+
+void NVPTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    unsigned Reg = Op.getReg();
+    printRegName(O, Reg);
+  } else if (Op.isImm()) {
+    O << markup("<imm:") << formatImm(Op.getImm()) << markup(">");
+  } else {
+    assert(Op.isExpr() && "Unknown operand kind in printOperand");
+    Op.getExpr()->print(O, &MAI);
+  }
+}
+
+void NVPTXInstPrinter::printCvtMode(const MCInst *MI, int OpNum, raw_ostream &O,
+                                    const char *Modifier) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  int64_t Imm = MO.getImm();
+
+  if (strcmp(Modifier, "ftz") == 0) {
+    // FTZ flag
+    if (Imm & NVPTX::PTXCvtMode::FTZ_FLAG)
+      O << ".ftz";
+  } else if (strcmp(Modifier, "sat") == 0) {
+    // SAT flag
+    if (Imm & NVPTX::PTXCvtMode::SAT_FLAG)
+      O << ".sat";
+  } else if (strcmp(Modifier, "base") == 0) {
+    // Default operand
+    switch (Imm & NVPTX::PTXCvtMode::BASE_MASK) {
+    default:
+      return;
+    case NVPTX::PTXCvtMode::NONE:
+      break;
+    case NVPTX::PTXCvtMode::RNI:
+      O << ".rni";
+      break;
+    case NVPTX::PTXCvtMode::RZI:
+      O << ".rzi";
+      break;
+    case NVPTX::PTXCvtMode::RMI:
+      O << ".rmi";
+      break;
+    case NVPTX::PTXCvtMode::RPI:
+      O << ".rpi";
+      break;
+    case NVPTX::PTXCvtMode::RN:
+      O << ".rn";
+      break;
+    case NVPTX::PTXCvtMode::RZ:
+      O << ".rz";
+      break;
+    case NVPTX::PTXCvtMode::RM:
+      O << ".rm";
+      break;
+    case NVPTX::PTXCvtMode::RP:
+      O << ".rp";
+      break;
+    }
+  } else {
+    llvm_unreachable("Invalid conversion modifier");
+  }
+}
+
+void NVPTXInstPrinter::printCmpMode(const MCInst *MI, int OpNum, raw_ostream &O,
+                                    const char *Modifier) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  int64_t Imm = MO.getImm();
+
+  if (strcmp(Modifier, "ftz") == 0) {
+    // FTZ flag
+    if (Imm & NVPTX::PTXCmpMode::FTZ_FLAG)
+      O << ".ftz";
+  } else if (strcmp(Modifier, "base") == 0) {
+    switch (Imm & NVPTX::PTXCmpMode::BASE_MASK) {
+    default:
+      return;
+    case NVPTX::PTXCmpMode::EQ:
+      O << ".eq";
+      break;
+    case NVPTX::PTXCmpMode::NE:
+      O << ".ne";
+      break;
+    case NVPTX::PTXCmpMode::LT:
+      O << ".lt";
+      break;
+    case NVPTX::PTXCmpMode::LE:
+      O << ".le";
+      break;
+    case NVPTX::PTXCmpMode::GT:
+      O << ".gt";
+      break;
+    case NVPTX::PTXCmpMode::GE:
+      O << ".ge";
+      break;
+    case NVPTX::PTXCmpMode::LO:
+      O << ".lo";
+      break;
+    case NVPTX::PTXCmpMode::LS:
+      O << ".ls";
+      break;
+    case NVPTX::PTXCmpMode::HI:
+      O << ".hi";
+      break;
+    case NVPTX::PTXCmpMode::HS:
+      O << ".hs";
+      break;
+    case NVPTX::PTXCmpMode::EQU:
+      O << ".equ";
+      break;
+    case NVPTX::PTXCmpMode::NEU:
+      O << ".neu";
+      break;
+    case NVPTX::PTXCmpMode::LTU:
+      O << ".ltu";
+      break;
+    case NVPTX::PTXCmpMode::LEU:
+      O << ".leu";
+      break;
+    case NVPTX::PTXCmpMode::GTU:
+      O << ".gtu";
+      break;
+    case NVPTX::PTXCmpMode::GEU:
+      O << ".geu";
+      break;
+    case NVPTX::PTXCmpMode::NUM:
+      O << ".num";
+      break;
+    case NVPTX::PTXCmpMode::NotANumber:
+      O << ".nan";
+      break;
+    }
+  } else {
+    llvm_unreachable("Empty Modifier");
+  }
+}
+
+void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
+                                     raw_ostream &O, const char *Modifier) {
+  if (Modifier) {
+    const MCOperand &MO = MI->getOperand(OpNum);
+    int Imm = (int) MO.getImm();
+    if (!strcmp(Modifier, "volatile")) {
+      if (Imm)
+        O << ".volatile";
+    } else if (!strcmp(Modifier, "addsp")) {
+      switch (Imm) {
+      case NVPTX::PTXLdStInstCode::GLOBAL:
+        O << ".global";
+        break;
+      case NVPTX::PTXLdStInstCode::SHARED:
+        O << ".shared";
+        break;
+      case NVPTX::PTXLdStInstCode::LOCAL:
+        O << ".local";
+        break;
+      case NVPTX::PTXLdStInstCode::PARAM:
+        O << ".param";
+        break;
+      case NVPTX::PTXLdStInstCode::CONSTANT:
+        O << ".const";
+        break;
+      case NVPTX::PTXLdStInstCode::GENERIC:
+        break;
+      default:
+        llvm_unreachable("Wrong Address Space");
+      }
+    } else if (!strcmp(Modifier, "sign")) {
+      if (Imm == NVPTX::PTXLdStInstCode::Signed)
+        O << "s";
+      else if (Imm == NVPTX::PTXLdStInstCode::Unsigned)
+        O << "u";
+      else if (Imm == NVPTX::PTXLdStInstCode::Untyped)
+        O << "b";
+      else if (Imm == NVPTX::PTXLdStInstCode::Float)
+        O << "f";
+      else
+        llvm_unreachable("Unknown register type");
+    } else if (!strcmp(Modifier, "vec")) {
+      if (Imm == NVPTX::PTXLdStInstCode::V2)
+        O << ".v2";
+      else if (Imm == NVPTX::PTXLdStInstCode::V4)
+        O << ".v4";
+    } else
+      llvm_unreachable("Unknown Modifier");
+  } else
+    llvm_unreachable("Empty Modifier");
+}
+
+void NVPTXInstPrinter::printMmaCode(const MCInst *MI, int OpNum, raw_ostream &O,
+                                    const char *Modifier) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  int Imm = (int)MO.getImm();
+  if (Modifier == nullptr || strcmp(Modifier, "version") == 0) {
+    O << Imm; // Just print out PTX version
+  } else if (strcmp(Modifier, "aligned") == 0) {
+    // PTX63 requires '.aligned' in the name of the instruction.
+    if (Imm >= 63)
+      O << ".aligned";
+  } else
+    llvm_unreachable("Unknown Modifier");
+}
+
+void NVPTXInstPrinter::printMemOperand(const MCInst *MI, int OpNum,
+                                       raw_ostream &O, const char *Modifier) {
+  printOperand(MI, OpNum, O);
+
+  if (Modifier && !strcmp(Modifier, "add")) {
+    O << ", ";
+    printOperand(MI, OpNum + 1, O);
+  } else {
+    if (MI->getOperand(OpNum + 1).isImm() &&
+        MI->getOperand(OpNum + 1).getImm() == 0)
+      return; // don't print ',0' or '+0'
+    O << "+";
+    printOperand(MI, OpNum + 1, O);
+  }
+}
+
+void NVPTXInstPrinter::printProtoIdent(const MCInst *MI, int OpNum,
+                                       raw_ostream &O, const char *Modifier) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  assert(Op.isExpr() && "Call prototype is not an MCExpr?");
+  const MCExpr *Expr = Op.getExpr();
+  const MCSymbol &Sym = cast<MCSymbolRefExpr>(Expr)->getSymbol();
+  O << Sym.getName();
+}
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
new file mode 100644
index 000000000000..c38472925a29
--- /dev/null
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
@@ -0,0 +1,53 @@
+//= NVPTXInstPrinter.h - Convert NVPTX MCInst to assembly syntax --*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an NVPTX MCInst to .ptx file syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXINSTPRINTER_H
+#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class MCSubtargetInfo;
+
+class NVPTXInstPrinter : public MCInstPrinter {
+public:
+  NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                   const MCRegisterInfo &MRI);
+
+  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+  // End
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printCvtMode(const MCInst *MI, int OpNum, raw_ostream &O,
+                    const char *Modifier = nullptr);
+  void printCmpMode(const MCInst *MI, int OpNum, raw_ostream &O,
+                    const char *Modifier = nullptr);
+  void printLdStCode(const MCInst *MI, int OpNum,
+                     raw_ostream &O, const char *Modifier = nullptr);
+  void printMmaCode(const MCInst *MI, int OpNum, raw_ostream &O,
+                    const char *Modifier = nullptr);
+  void printMemOperand(const MCInst *MI, int OpNum,
+                       raw_ostream &O, const char *Modifier = nullptr);
+  void printProtoIdent(const MCInst *MI, int OpNum,
+                       raw_ostream &O, const char *Modifier = nullptr);
+};
+
+}
+
+#endif
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index f6cbd23f01c4..556745825a15 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -1,9 +1,8 @@
 //===-- NVPTXMCAsmInfo.cpp - NVPTX asm properties -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -38,12 +37,11 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple) {
   HiddenDeclarationVisibilityAttr = HiddenVisibilityAttr = MCSA_Invalid;
   ProtectedVisibilityAttr = MCSA_Invalid;
 
-  // FIXME: remove comment once debug info is properly supported.
-  Data8bitsDirective = "// .b8 ";
+  Data8bitsDirective = ".b8 ";
   Data16bitsDirective = nullptr; // not supported
-  Data32bitsDirective = "// .b32 ";
-  Data64bitsDirective = "// .b64 ";
-  ZeroDirective = "// .b8";
+  Data32bitsDirective = ".b32 ";
+  Data64bitsDirective = ".b64 ";
+  ZeroDirective = ".b8";
   AsciiDirective = nullptr; // not supported
   AscizDirective = nullptr; // not supported
   SupportsQuotedNames = false;
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
index 9fd7600cf67f..e888526da898 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
@@ -1,9 +1,8 @@
 //===-- NVPTXMCAsmInfo.h - NVPTX asm properties ----------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index b1a77a17ec15..c8b85b2718a6 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -1,9 +1,8 @@
 //===-- NVPTXMCTargetDesc.cpp - NVPTX Target Descriptions -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -11,10 +10,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstPrinter/NVPTXInstPrinter.h"
+#include "NVPTXInstPrinter.h"
 #include "NVPTXMCAsmInfo.h"
 #include "NVPTXMCTargetDesc.h"
 #include "NVPTXTargetStreamer.h"
+#include "TargetInfo/NVPTXTargetInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
index 0c9ad977e7ec..e1691d2384e6 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
@@ -1,9 +1,8 @@
 //===-- NVPTXMCTargetDesc.h - NVPTX Target Descriptions ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,9 +18,6 @@
 namespace llvm {
 class Target;
 
-Target &getTheNVPTXTarget32();
-Target &getTheNVPTXTarget64();
-
 } // End llvm namespace
 
 // Defines symbolic names for PTX registers.
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
index f7b4cf3a0f72..17f5ba7d900b 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
@@ -1,9 +1,8 @@
 //=====- NVPTXTargetStreamer.cpp - NVPTXTargetStreamer class ------------=====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -31,6 +30,11 @@ void NVPTXTargetStreamer::outputDwarfFileDirectives() {
   DwarfFiles.clear();
 }
 
+void NVPTXTargetStreamer::closeLastSection() {
+  if (HasSections)
+    getStreamer().EmitRawText("\t}");
+}
+
 void NVPTXTargetStreamer::emitDwarfFileDirective(StringRef Directive) {
   DwarfFiles.emplace_back(Directive);
 }
@@ -82,22 +86,27 @@ void NVPTXTargetStreamer::changeSection(const MCSection *CurSection,
                                         raw_ostream &OS) {
   assert(!SubSection && "SubSection is not null!");
   const MCObjectFileInfo *FI = getStreamer().getContext().getObjectFileInfo();
-  // FIXME: remove comment once debug info is properly supported.
   // Emit closing brace for DWARF sections only.
   if (isDwarfSection(FI, CurSection))
-    OS << "//\t}\n";
+    OS << "\t}\n";
   if (isDwarfSection(FI, Section)) {
     // Emit DWARF .file directives in the outermost scope.
     outputDwarfFileDirectives();
-    OS << "//\t.section";
+    OS << "\t.section";
     Section->PrintSwitchToSection(*getStreamer().getContext().getAsmInfo(),
                                   FI->getTargetTriple(), OS, SubSection);
     // DWARF sections are enclosed into braces - emit the open one.
-    OS << "//\t{\n";
+    OS << "\t{\n";
+    HasSections = true;
   }
 }
 
 void NVPTXTargetStreamer::emitRawBytes(StringRef Data) {
+  MCTargetStreamer::emitRawBytes(Data);
+  // TODO: enable this once the bug in the ptxas with the packed bytes is
+  // resolved. Currently, (it is confirmed by NVidia) it causes a crash in
+  // ptxas.
+#if 0
   const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
   const char *Directive = MAI->getData8bitsDirective();
   unsigned NumElements = Data.size();
@@ -121,5 +130,6 @@ void NVPTXTargetStreamer::emitRawBytes(StringRef Data) {
     }
     Streamer.EmitRawText(OS.str());
   }
+#endif
 }
 
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h
index f18e61cdca57..8185efadefdb 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h
@@ -1,9 +1,8 @@
 //=====-- NVPTXTargetStreamer.h - NVPTX Target Streamer ------*- C++ -*--=====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -19,6 +18,7 @@ class MCSection;
 class NVPTXTargetStreamer : public MCTargetStreamer {
 private:
   SmallVector<std::string, 4> DwarfFiles;
+  bool HasSections = false;
 
 public:
   NVPTXTargetStreamer(MCStreamer &S);
@@ -26,6 +26,8 @@ public:
 
   /// Outputs the list of the DWARF '.file' directives to the streamer.
   void outputDwarfFileDirectives();
+  /// Close last section.
+  void closeLastSection();
 
   /// Record DWARF file directives for later output.
   /// According to PTX ISA, CUDA Toolkit documentation, 11.5.3. Debugging
diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h
index 7fc0156216f5..bbcbb4598040 100644
--- a/lib/Target/NVPTX/ManagedStringPool.h
+++ b/lib/Target/NVPTX/ManagedStringPool.h
@@ -1,9 +1,8 @@
 //===-- ManagedStringPool.h - Managed String Pool ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index 07bfc58a8da7..6530c40ea100 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -1,9 +1,8 @@
 //===-- NVPTX.h - Top-level interface for NVPTX representation --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,14 +14,8 @@
 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTX_H
 #define LLVM_LIB_TARGET_NVPTX_NVPTX_H
 
-#include "MCTargetDesc/NVPTXBaseInfo.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetMachine.h"
-#include <cassert>
-#include <iosfwd>
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
 
 namespace llvm {
 class NVPTXTargetMachine;
@@ -55,9 +48,6 @@ BasicBlockPass *createNVPTXLowerAllocaPass();
 MachineFunctionPass *createNVPTXPeephole();
 MachineFunctionPass *createNVPTXProxyRegErasurePass();
 
-Target &getTheNVPTXTarget32();
-Target &getTheNVPTXTarget64();
-
 namespace NVPTX {
 enum DrvInterface {
   NVCL,
diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td
index 3731b2f37f6c..1d947ef1ce62 100644
--- a/lib/Target/NVPTX/NVPTX.td
+++ b/lib/Target/NVPTX/NVPTX.td
@@ -1,9 +1,8 @@
 //===- NVPTX.td - Describe the NVPTX Target Machine -----------*- tblgen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This is the top level entry point for the NVPTX target.
@@ -76,6 +75,8 @@ def PTX61 : SubtargetFeature<"ptx61", "PTXVersion", "61",
                              "Use PTX version 6.1">;
 def PTX63 : SubtargetFeature<"ptx63", "PTXVersion", "63",
                              "Use PTX version 6.3">;
+def PTX64 : SubtargetFeature<"ptx64", "PTXVersion", "64",
+                             "Use PTX version 6.4">;
 
 //===----------------------------------------------------------------------===//
 // NVPTX supported processors.
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
index bf922eb8a195..f2c7751df1df 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -1,9 +1,8 @@
 //===-- AllocaHoisting.cpp - Hoist allocas to the entry block --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
index 7a6fc7d9b14d..d7de8e3a2f46 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
@@ -1,9 +1,8 @@
 //===-- AllocaHoisting.h - Hosist allocas to the entry block ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 6284ad8b82e8..5f38b4a3c4c5 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1,9 +1,8 @@
 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "NVPTXAsmPrinter.h"
-#include "InstPrinter/NVPTXInstPrinter.h"
 #include "MCTargetDesc/NVPTXBaseInfo.h"
+#include "MCTargetDesc/NVPTXInstPrinter.h"
 #include "MCTargetDesc/NVPTXMCAsmInfo.h"
 #include "MCTargetDesc/NVPTXTargetStreamer.h"
 #include "NVPTX.h"
@@ -24,6 +23,7 @@
 #include "NVPTXSubtarget.h"
 #include "NVPTXTargetMachine.h"
 #include "NVPTXUtilities.h"
+#include "TargetInfo/NVPTXTargetInfo.h"
 #include "cl_common_defines.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
@@ -473,6 +473,9 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
   // Emit open brace for function body.
   OutStreamer->EmitRawText(StringRef("{\n"));
   setAndEmitFunctionVirtualRegisters(*MF);
+  // Emit initial .loc debug directive for correct relocation symbol data.
+  if (MMI && MMI->hasDebugInfo())
+    emitInitialRawDwarfLocDirective(*MF);
 }
 
 bool NVPTXAsmPrinter::runOnMachineFunction(MachineFunction &F) {
@@ -597,36 +600,6 @@ void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
   O << getVirtualRegisterName(vr);
 }
 
-void NVPTXAsmPrinter::printVecModifiedImmediate(
-    const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
-  static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
-  int Imm = (int) MO.getImm();
-  if (0 == strcmp(Modifier, "vecelem"))
-    O << "_" << vecelem[Imm];
-  else if (0 == strcmp(Modifier, "vecv4comm1")) {
-    if ((Imm < 0) || (Imm > 3))
-      O << "//";
-  } else if (0 == strcmp(Modifier, "vecv4comm2")) {
-    if ((Imm < 4) || (Imm > 7))
-      O << "//";
-  } else if (0 == strcmp(Modifier, "vecv4pos")) {
-    if (Imm < 0)
-      Imm = 0;
-    O << "_" << vecelem[Imm % 4];
-  } else if (0 == strcmp(Modifier, "vecv2comm1")) {
-    if ((Imm < 0) || (Imm > 1))
-      O << "//";
-  } else if (0 == strcmp(Modifier, "vecv2comm2")) {
-    if ((Imm < 2) || (Imm > 3))
-      O << "//";
-  } else if (0 == strcmp(Modifier, "vecv2pos")) {
-    if (Imm < 0)
-      Imm = 0;
-    O << "_" << vecelem[Imm % 2];
-  } else
-    llvm_unreachable("Unknown Modifier on immediate operand");
-}
-
 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
   emitLinkageDirective(F, O);
   if (isKernelFunction(*F))
@@ -899,9 +872,8 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
     if (HasFullDebugInfo)
       break;
   }
-  // FIXME: remove comment once debug info is properly supported.
   if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
-    O << "//, debug";
+    O << ", debug";
 
   O << "\n";
 
@@ -952,10 +924,13 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
   clearAnnotationCache(&M);
 
   delete[] gv_array;
-  // FIXME: remove comment once debug info is properly supported.
   // Close the last emitted section
-  if (HasDebugInfo)
-    OutStreamer->EmitRawText("//\t}");
+  if (HasDebugInfo) {
+    static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
+        ->closeLastSection();
+    // Emit empty .debug_loc section for better support of the empty files.
+    OutStreamer->EmitRawText("\t.section\t.debug_loc\t{\t}");
+  }
 
   // Output last DWARF .file directives, if any.
   static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
@@ -2199,7 +2174,6 @@ void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
 /// PrintAsmOperand - Print out an operand for an inline asm expression.
 ///
 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                      unsigned AsmVariant,
                                       const char *ExtraCode, raw_ostream &O) {
   if (ExtraCode && ExtraCode[0]) {
     if (ExtraCode[1] != 0)
@@ -2208,7 +2182,7 @@ bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
     switch (ExtraCode[0]) {
     default:
       // See if this is a generic print operand
-      return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+      return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
     case 'r':
       break;
     }
@@ -2219,9 +2193,10 @@ bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
   return false;
 }
 
-bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
-    const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
-    const char *ExtraCode, raw_ostream &O) {
+bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                            unsigned OpNo,
+                                            const char *ExtraCode,
+                                            raw_ostream &O) {
   if (ExtraCode && ExtraCode[0])
     return true; // Unknown modifier
 
@@ -2233,7 +2208,7 @@ bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
 }
 
 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
-                                   raw_ostream &O, const char *Modifier) {
+                                   raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(opNum);
   switch (MO.getType()) {
   case MachineOperand::MO_Register:
@@ -2245,29 +2220,23 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
     } else {
       emitVirtualRegister(MO.getReg(), O);
     }
-    return;
+    break;
 
   case MachineOperand::MO_Immediate:
-    if (!Modifier)
-      O << MO.getImm();
-    else if (strstr(Modifier, "vec") == Modifier)
-      printVecModifiedImmediate(MO, Modifier, O);
-    else
-      llvm_unreachable(
-          "Don't know how to handle modifier on immediate operand");
-    return;
+    O << MO.getImm();
+    break;
 
   case MachineOperand::MO_FPImmediate:
     printFPConstant(MO.getFPImm(), O);
     break;
 
   case MachineOperand::MO_GlobalAddress:
-    getSymbol(MO.getGlobal())->print(O, MAI);
+    PrintSymbolOperand(MO, O);
     break;
 
   case MachineOperand::MO_MachineBasicBlock:
     MO.getMBB()->getSymbol()->print(O, MAI);
-    return;
+    break;
 
   default:
     llvm_unreachable("Operand type not supported.");
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 44a09f5fe513..43ae57ac1262 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -1,9 +1,8 @@
 //===-- NVPTXAsmPrinter.h - NVPTX LLVM assembly writer ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -213,8 +212,6 @@ private:
   MCOperand GetSymbolRef(const MCSymbol *Symbol);
   unsigned encodeVirtualRegister(unsigned Reg);
 
-  void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier,
-                                 raw_ostream &O);
   void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
                        const char *Modifier = nullptr);
   void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O,
@@ -231,13 +228,10 @@ private:
   void printReturnValStr(const Function *, raw_ostream &O);
   void printReturnValStr(const MachineFunction &MF, raw_ostream &O);
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                       unsigned AsmVariant, const char *ExtraCode,
-                       raw_ostream &) override;
-  void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
-                    const char *Modifier = nullptr);
+                       const char *ExtraCode, raw_ostream &) override;
+  void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
   bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                             unsigned AsmVariant, const char *ExtraCode,
-                             raw_ostream &) override;
+                             const char *ExtraCode, raw_ostream &) override;
 
   const MCExpr *lowerConstantForGV(const Constant *CV, bool ProcessingGeneric);
   void printMCExpr(const MCExpr &Expr, raw_ostream &OS);
diff --git a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
index 41e9ae827180..a8a43cee9ab7 100644
--- a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
+++ b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
@@ -1,9 +1,8 @@
 //===-- NVPTXAssignValidGlobalNames.cpp - Assign valid names to globals ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index e5e6637967b2..46f08b23d31a 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -1,9 +1,8 @@
 //=======- NVPTXFrameLowering.cpp - NVPTX Frame Information ---*- C++ -*-=====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
index 0a7856b9d5de..40269f58f06e 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -1,9 +1,8 @@
 //===--- NVPTXFrameLowering.h - Define frame lowering for NVPTX -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index fd63fdbaced6..b36d9b2e240a 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -1,9 +1,8 @@
 //===-- GenericToNVVM.cpp - Convert generic module to NVVM module - C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index ffc6a59cd6c8..3d2447d75c77 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,6 +12,7 @@
 
 #include "NVPTXISelDAGToDAG.h"
 #include "NVPTXUtilities.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Instructions.h"
@@ -702,11 +702,11 @@ static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
   // We use GetUnderlyingObjects() here instead of GetUnderlyingObject() mainly
   // because the former looks through phi nodes while the latter does not. We
   // need to look through phi nodes to handle pointer induction variables.
-  SmallVector<Value *, 8> Objs;
-  GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
+  SmallVector<const Value *, 8> Objs;
+  GetUnderlyingObjects(N->getMemOperand()->getValue(),
                        Objs, F->getDataLayout());
 
-  return all_of(Objs, [&](Value *V) {
+  return all_of(Objs, [&](const Value *V) {
     if (auto *A = dyn_cast<const Argument>(V))
       return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
     if (auto *GV = dyn_cast<const GlobalVariable>(V))
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index e911ba0c167d..e4e5069b7a80 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -1,9 +1,8 @@
 //===-- NVPTXISelDAGToDAG.h - A dag to dag inst selector for NVPTX --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,6 +17,7 @@
 #include "NVPTXISelLowering.h"
 #include "NVPTXRegisterInfo.h"
 #include "NVPTXTargetMachine.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/Compiler.h"
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index bec8ece29050..ae1aa98da0e8 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1,9 +1,8 @@
 //===-- NVPTXISelLowering.cpp - NVPTX DAG Lowering Implementation ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -547,13 +546,19 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
 
   // These map to conversion instructions for scalar FP types.
   for (const auto &Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT,
-                         ISD::FROUND, ISD::FTRUNC}) {
+                         ISD::FTRUNC}) {
     setOperationAction(Op, MVT::f16, Legal);
     setOperationAction(Op, MVT::f32, Legal);
     setOperationAction(Op, MVT::f64, Legal);
     setOperationAction(Op, MVT::v2f16, Expand);
   }
 
+  setOperationAction(ISD::FROUND, MVT::f16, Promote);
+  setOperationAction(ISD::FROUND, MVT::v2f16, Expand);
+  setOperationAction(ISD::FROUND, MVT::f32, Custom);
+  setOperationAction(ISD::FROUND, MVT::f64, Custom);
+
+
   // 'Expand' implements FCOPYSIGN without calling an external library.
   setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
   setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand);
@@ -1503,7 +1508,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
         // New store.
         if (VectorInfo[j] & PVF_FIRST) {
-          assert(StoreOperands.empty() && "Unfinished preceeding store.");
+          assert(StoreOperands.empty() && "Unfinished preceding store.");
           StoreOperands.push_back(Chain);
           StoreOperands.push_back(DAG.getConstant(paramCount, dl, MVT::i32));
           StoreOperands.push_back(DAG.getConstant(Offsets[j], dl, MVT::i32));
@@ -2069,6 +2074,100 @@ SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
   }
 }
 
+SDValue NVPTXTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+
+  if (VT == MVT::f32)
+    return LowerFROUND32(Op, DAG);
+
+  if (VT == MVT::f64)
+    return LowerFROUND64(Op, DAG);
+
+  llvm_unreachable("unhandled type");
+}
+
+// This is the the rounding method used in CUDA libdevice in C like code:
+// float roundf(float A)
+// {
+//   float RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f));
+//   RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA;
+//   return abs(A) < 0.5 ? (float)(int)A : RoundedA;
+// }
+SDValue NVPTXTargetLowering::LowerFROUND32(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue A = Op.getOperand(0);
+  EVT VT = Op.getValueType();
+
+  SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A);
+
+  // RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f))
+  SDValue Bitcast  = DAG.getNode(ISD::BITCAST, SL, MVT::i32, A);
+  const int SignBitMask = 0x80000000;
+  SDValue Sign = DAG.getNode(ISD::AND, SL, MVT::i32, Bitcast,
+                             DAG.getConstant(SignBitMask, SL, MVT::i32));
+  const int PointFiveInBits = 0x3F000000;
+  SDValue PointFiveWithSignRaw =
+      DAG.getNode(ISD::OR, SL, MVT::i32, Sign,
+                  DAG.getConstant(PointFiveInBits, SL, MVT::i32));
+  SDValue PointFiveWithSign =
+      DAG.getNode(ISD::BITCAST, SL, VT, PointFiveWithSignRaw);
+  SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, A, PointFiveWithSign);
+  SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA);
+
+  // RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA;
+  EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+  SDValue IsLarge =
+      DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 23.0), SL, VT),
+                   ISD::SETOGT);
+  RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA);
+
+  // return abs(A) < 0.5 ? (float)(int)A : RoundedA;
+  SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA,
+                                DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT);
+  SDValue RoundedAForSmallA = DAG.getNode(ISD::FTRUNC, SL, VT, A);
+  return DAG.getNode(ISD::SELECT, SL, VT, IsSmall, RoundedAForSmallA, RoundedA);
+}
+
+// The implementation of round(double) is similar to that of round(float) in
+// that they both separate the value range into three regions and use a method
+// specific to the region to round the values. However, round(double) first
+// calculates the round of the absolute value and then adds the sign back while
+// round(float) directly rounds the value with sign.
+SDValue NVPTXTargetLowering::LowerFROUND64(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+  SDValue A = Op.getOperand(0);
+  EVT VT = Op.getValueType();
+
+  SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A);
+
+  // double RoundedA = (double) (int) (abs(A) + 0.5f);
+  SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, AbsA,
+                                  DAG.getConstantFP(0.5, SL, VT));
+  SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA);
+
+  // RoundedA = abs(A) < 0.5 ? (double)0 : RoundedA;
+  EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+  SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA,
+                                DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT);
+  RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsSmall,
+                         DAG.getConstantFP(0, SL, VT),
+                         RoundedA);
+
+  // Add sign to rounded_A
+  RoundedA = DAG.getNode(ISD::FCOPYSIGN, SL, VT, RoundedA, A);
+  DAG.getNode(ISD::FTRUNC, SL, VT, A);
+
+  // RoundedA = abs(A) > 0x1.0p52 ? A : RoundedA;
+  SDValue IsLarge =
+      DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 52.0), SL, VT),
+                   ISD::SETOGT);
+  return DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA);
+}
+
+
+
 SDValue
 NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
@@ -2099,6 +2198,8 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return LowerShiftRightParts(Op, DAG);
   case ISD::SELECT:
     return LowerSelect(Op, DAG);
+  case ISD::FROUND:
+    return LowerFROUND(Op, DAG);
   default:
     llvm_unreachable("Custom lowering not defined for operation");
   }
@@ -2130,7 +2231,7 @@ SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
     LoadSDNode *Load = cast<LoadSDNode>(Op);
     EVT MemVT = Load->getMemoryVT();
     if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
-                            Load->getAddressSpace(), Load->getAlignment())) {
+                            *Load->getMemOperand())) {
       SDValue Ops[2];
       std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
       return DAG.getMergeValues(Ops, SDLoc(Op));
@@ -2173,7 +2274,7 @@ SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   // stores and have to handle it here.
   if (VT == MVT::v2f16 &&
       !allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
-                          Store->getAddressSpace(), Store->getAlignment()))
+                          *Store->getMemOperand()))
     return expandUnalignedStore(Store, DAG);
 
   if (VT.isVector())
@@ -3399,6 +3500,94 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
     Info.align = 16;
     return true;
   }
+  case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col:
+  case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col_stride:
+  case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col_stride:
+  case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col:
+  case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row:
+  case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row_stride:
+  case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row_stride:
+  case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row:
+  case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col:
+  case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col_stride:
+  case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col_stride:
+  case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col:
+  case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row:
+  case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row_stride:
+  case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row_stride:
+  case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row: {
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::v2i32;
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Info.flags = MachineMemOperand::MOLoad;
+    Info.align = 8;
+    return true;
+  }
+
+  case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col:
+  case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col_stride:
+  case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col_stride:
+  case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col:
+  case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row:
+  case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row_stride:
+  case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row_stride:
+  case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row:
+
+  case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col:
+  case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col_stride:
+  case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col_stride:
+  case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col:
+  case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row:
+  case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row_stride:
+  case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row_stride:
+  case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row: {
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::v4i32;
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Info.flags = MachineMemOperand::MOLoad;
+    Info.align = 16;
+    return true;
+  }
+
+  case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col:
+  case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col_stride:
+  case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col_stride:
+  case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col:
+  case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row:
+  case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row_stride:
+  case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row_stride:
+  case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row:
+
+  case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col:
+  case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col_stride:
+  case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col_stride:
+  case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col:
+  case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row:
+  case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row_stride:
+  case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row_stride:
+  case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row:
+  case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row:
+  case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row_stride:
+  case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col:
+  case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col_stride:
+  case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row:
+  case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row_stride:
+  case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row_stride:
+  case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row:
+  case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col:
+  case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col_stride:
+  case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col_stride:
+  case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col: {
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::i32;
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Info.flags = MachineMemOperand::MOLoad;
+    Info.align = 4;
+    return true;
+  }
 
   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col:
   case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row:
@@ -3442,6 +3631,44 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
     return true;
   }
 
+  case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col:
+  case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col_stride:
+  case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row:
+  case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row_stride:
+  case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col:
+  case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col_stride:
+  case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row:
+  case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row_stride:
+  case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col:
+  case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col_stride:
+  case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row:
+  case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row_stride: {
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::v8i32;
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Info.flags = MachineMemOperand::MOLoad;
+    Info.align = 16;
+    return true;
+  }
+
+  case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col:
+  case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col_stride:
+  case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row:
+  case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row_stride:
+  case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col:
+  case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col_stride:
+  case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row:
+  case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row_stride: {
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::v2i32;
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Info.flags = MachineMemOperand::MOLoad;
+    Info.align = 8;
+    return true;
+  }
+
   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col:
   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row:
   case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride:
@@ -3484,8 +3711,44 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
     return true;
   }
 
-  case Intrinsic::nvvm_atomic_load_add_f32:
-  case Intrinsic::nvvm_atomic_load_add_f64:
+  case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col:
+  case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col_stride:
+  case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row:
+  case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row_stride:
+  case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col:
+  case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col_stride:
+  case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row:
+  case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row_stride:
+  case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col:
+  case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col_stride:
+  case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row:
+  case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row_stride: {
+    Info.opc = ISD::INTRINSIC_VOID;
+    Info.memVT = MVT::v8i32;
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Info.flags = MachineMemOperand::MOStore;
+    Info.align = 16;
+    return true;
+  }
+
+  case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col:
+  case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col_stride:
+  case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row:
+  case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row_stride:
+  case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col:
+  case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col_stride:
+  case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row:
+  case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row_stride: {
+    Info.opc = ISD::INTRINSIC_VOID;
+    Info.memVT = MVT::v2i32;
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Info.flags = MachineMemOperand::MOStore;
+    Info.align = 8;
+    return true;
+  }
+
   case Intrinsic::nvvm_atomic_load_inc_32:
   case Intrinsic::nvvm_atomic_load_dec_32:
 
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 66fab2b6f480..ef645fc1e541 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -1,9 +1,8 @@
 //===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -557,6 +556,10 @@ private:
   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
 
+  SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
+
   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
 
diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index ad1d7cbb52fc..74ab2f7b8453 100644
--- a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -1,9 +1,8 @@
 //===-- NVPTXImageOptimizer.cpp - Image optimization pass -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/NVPTXInstrFormats.td b/lib/Target/NVPTX/NVPTXInstrFormats.td
index ffcb5d5273a2..77961c386827 100644
--- a/lib/Target/NVPTX/NVPTXInstrFormats.td
+++ b/lib/Target/NVPTX/NVPTXInstrFormats.td
@@ -1,9 +1,8 @@
 //===- NVPTXInstrFormats.td - NVPTX Instruction Formats-------*- tblgen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 50815bff6c67..f928b44c91e0 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -1,9 +1,8 @@
 //===- NVPTXInstrInfo.cpp - NVPTX Instruction Information -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h
index 4ab1bb481958..7c0912808f7b 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -1,9 +1,8 @@
 //===- NVPTXInstrInfo.h - NVPTX Instruction Information----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the niversity of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index 02a40b9f5262..62da3c79f465 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1,9 +1,8 @@
 //===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -143,9 +142,12 @@ def true : Predicate<"true">;
 def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">;
 def hasPTX60 : Predicate<"Subtarget->getPTXVersion() >= 60">;
 def hasPTX61 : Predicate<"Subtarget->getPTXVersion() >= 61">;
+def hasPTX63 : Predicate<"Subtarget->getPTXVersion() >= 63">;
 
 def hasSM30 : Predicate<"Subtarget->getSmVersion() >= 30">;
 def hasSM70 : Predicate<"Subtarget->getSmVersion() >= 70">;
+def hasSM72 : Predicate<"Subtarget->getSmVersion() >= 72">;
+def hasSM75 : Predicate<"Subtarget->getSmVersion() >= 75">;
 
 def useShortPtr : Predicate<"useShortPointers()">;
 def useFP16Math: Predicate<"Subtarget->allowFP16Math()">;
@@ -1549,6 +1551,10 @@ def LdStCode : Operand<i32> {
   let PrintMethod = "printLdStCode";
 }
 
+def MmaCode : Operand<i32> {
+  let PrintMethod = "printMmaCode";
+}
+
 def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
 def Wrapper    : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
 
@@ -3003,15 +3009,6 @@ def : Pat<(ffloor Float32Regs:$a),
 def : Pat<(ffloor Float64Regs:$a),
           (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
 
-def : Pat<(f16 (fround Float16Regs:$a)),
-          (CVT_f16_f16 Float16Regs:$a, CvtRNI)>;
-def : Pat<(fround Float32Regs:$a),
-          (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(f32 (fround Float32Regs:$a)),
-          (CVT_f32_f32 Float32Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
-def : Pat<(f64 (fround Float64Regs:$a)),
-          (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
-
 def : Pat<(ftrunc Float16Regs:$a),
           (CVT_f16_f16 Float16Regs:$a, CvtRZI)>;
 def : Pat<(ftrunc Float32Regs:$a),
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index 47dcdcf6e0bd..1752d3e0575e 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1,9 +1,8 @@
 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -27,7 +26,35 @@ def immDouble1 : PatLeaf<(fpimm), [{
     return (d==1.0);
 }]>;
 
+def AS_match {
+  code generic = [{
+   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
+  }];
+  code shared = [{
+   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
+  }];
+  code global = [{
+   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
+  }];
+}
 
+// A node that will be replaced with the current PTX version.
+class PTX {
+  SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
+    return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
+  }]>;
+  // (i32 0) will be XForm'ed to the currently used PTX version.
+  dag version = (PTXVerXform (i32 0));
+}
+def ptx : PTX;
+
+// Generates list of n sequential register names.
+// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
+class RegSeq<int n, string prefix> {
+  list<string> ret = !if(n, !listconcat(RegSeq<!add(n,-1), prefix>.ret,
+                                        [prefix # !add(n, -1)]),
+                            []);
+}
 
 //-----------------------------------
 // Synchronization and shuffle functions
@@ -1007,17 +1034,11 @@ def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$
 //-----------------------------------
 
 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
- : PatFrag<ops, frag, [{
-   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
-}]>;
+ : PatFrag<ops, frag, AS_match.global>;
 class ATOMIC_SHARED_CHK <dag ops, dag frag>
- : PatFrag<ops, frag, [{
-   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
-}]>;
+ : PatFrag<ops, frag, AS_match.shared>;
 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
- : PatFrag<ops, frag, [{
-   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
-}]>;
+ : PatFrag<ops, frag, AS_match.generic>;
 
 multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
@@ -1113,18 +1134,12 @@ def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
   (atomic_load_add_64 node:$a, node:$b)>;
 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   (atomic_load_add_64 node:$a, node:$b)>;
-def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
-  (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
-def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
-  (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
-def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
-  (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
-def atomic_load_add_f64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
-  (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
-def atomic_load_add_f64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
-  (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
-def atomic_load_add_f64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
-  (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
+def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+  (atomic_load_fadd node:$a, node:$b)>;
+def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+  (atomic_load_fadd node:$a, node:$b)>;
+def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+  (atomic_load_fadd node:$a, node:$b)>;
 
 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
   atomic_load_add_32_g, i32imm, imm>;
@@ -1145,18 +1160,18 @@ defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
   ".add", atomic_load_add_64_gen, i64imm, imm>;
 
 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
-  atomic_load_add_f32_g, f32imm, fpimm>;
+  atomic_load_add_g, f32imm, fpimm>;
 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
-  atomic_load_add_f32_s, f32imm, fpimm>;
+  atomic_load_add_s, f32imm, fpimm>;
 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
-  atomic_load_add_f32_gen, f32imm, fpimm>;
+  atomic_load_add_gen, f32imm, fpimm>;
 
 defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
-  atomic_load_add_f64_g, f64imm, fpimm, [hasAtomAddF64]>;
+  atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
 defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
-  atomic_load_add_f64_s, f64imm, fpimm, [hasAtomAddF64]>;
+  atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
 defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
-  atomic_load_add_f64_gen, f64imm, fpimm, [hasAtomAddF64]>;
+  atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
 
 // atom_sub
 
@@ -7381,383 +7396,258 @@ def INT_PTX_SREG_WARPSIZE :
     NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
               [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
 
+// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
+// In addition to target-independent fields provided by WMMA_REGS, it adds
+// the fields commonly used to implement specific PTX instruction -- register
+// types and names, constraints, parts of assembly, etc.
+class WMMA_REGINFO<WMMA_REGS r>
+      : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
+  // NVPTX register types used to carry fragment data.
+  NVPTXRegClass regclass = !cond(
+    !eq(ptx_elt_type, "f16") : Float16x2Regs,
+    !eq(ptx_elt_type, "f32") : Float32Regs,
+    !eq(ptx_elt_type, "s32") : Int32Regs,
+    !eq(ptx_elt_type, "s8") : Int32Regs,
+    !eq(ptx_elt_type, "u8") : Int32Regs,
+    !eq(ptx_elt_type, "s4") : Int32Regs,
+    !eq(ptx_elt_type, "u4") : Int32Regs,
+    !eq(ptx_elt_type, "b1") : Int32Regs);
+
+  // Instruction input/output arguments for the fragment.
+  list<NVPTXRegClass> ptx_regs = !foreach(tmp, regs, regclass);
+
+  // List of register names for the fragment -- ["ra0", "ra1",...]
+  list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
+
+  // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
+  string regstring = "{{$" # !head(reg_names)
+                           # !foldl("", !tail(reg_names), a, b,
+                                    !strconcat(a, ", $", b))
+                     # "}}";
+
+  // Predicates for particular fragment variant. Technically those are
+  // per-instruction predicates, but currently all fragments that can be used in
+  // a given instruction are subject to the same constraints, so an instruction
+  // can use predicates from any of its fragments. If/when this is no
+  // longer the case, we can concat all per-fragment predicates to enforce that
+  // all fragments of the instruction are viable.
+  list<Predicate> Predicates = !cond(
+    // fp16 -> fp16/fp32 @ m16n16k16
+    !and(!eq(geom, "m16n16k16"),
+         !or(!eq(ptx_elt_type, "f16"),
+             !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60],
+
+    // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
+    !and(!or(!eq(geom, "m8n32k16"),
+             !eq(geom, "m32n8k16")),
+         !or(!eq(ptx_elt_type, "f16"),
+             !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61],
+
+    // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
+    !and(!or(!eq(geom,"m16n16k16"),
+             !eq(geom,"m8n32k16"),
+             !eq(geom,"m32n8k16")),
+         !or(!eq(ptx_elt_type, "u8"),
+             !eq(ptx_elt_type, "s8"),
+             !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63],
+
+    // u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1)
+    !or(!eq(geom,"m8n8k128"),
+        !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63]);
+
+  // template DAGs for instruction inputs/output.
+  dag Outs = !dag(outs, ptx_regs, reg_names);
+  dag Ins = !dag(ins, ptx_regs, reg_names);
+}
+
+// Convert dag of arguments into a dag to match given intrinsic.
+class BuildPatternI<Intrinsic Intr, dag Ins> {
+  // Build a dag pattern that matches the intrinsic call.
+  dag ret = !foreach(tmp, Ins,
+                          !subst(imem, ADDRvar,
+                          !subst(MEMri64, ADDRri64,
+                          !subst(MEMri, ADDRri,
+                          !subst(ins, Intr, tmp)))));
+}
+
+// Same as above, but uses PatFrag instead of an Intrinsic.
+class BuildPatternPF<PatFrag Intr, dag Ins> {
+  // Build a dag pattern that matches the intrinsic call.
+  dag ret = !foreach(tmp, Ins,
+                          !subst(imem, ADDRvar,
+                          !subst(MEMri64, ADDRri64,
+                          !subst(MEMri, ADDRri,
+                          !subst(ins, Intr, tmp)))));
+}
+
+// Common WMMA-related fields used for building patterns for all MMA instructions.
+class WMMA_INSTR<string _Intr, list<dag> _Args>
+  : NVPTXInst<(outs), (ins), "?", []> {
+  Intrinsic Intr = !cast<Intrinsic>(_Intr);
+  // Concatenate all arguments into a single dag.
+  dag Args = !foldl((ins), _Args, a, b, !con(a,b));
+  // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
+  dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
+}
+
 //
 // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
 //
 
-class EmptyNVPTXInst : NVPTXInst<(outs), (ins), "?", []>;
-
-class WMMA_LOAD_GALSTOS<string Geometry, string Abc, string Layout,
-                        string Space, string Type, NVPTXRegClass regclass,
-                        DAGOperand SrcOp, bit WithStride>
-  : EmptyNVPTXInst,
-    Requires<[!if(!eq(Geometry, "m16n16k16"),
-                  hasPTX60,
-                  hasPTX61),
-              hasSM70]> {
-  // Pattern (created by WMMA_LOAD_INTR_HELPER below) that matches the intrinsic
-  // for this function.
-  PatFrag IntrMatcher = !cast<PatFrag>("INT_WMMA_"
-                                       # Geometry # "_load_"
-                                       # !subst("c", "c_" # Type, Abc)
-                                       # "_" # Layout
-                                       # !subst(".", "_", Space)
-                                       # !if(WithStride,"_stride", "")
-                                       # "_Intr");
-  dag OutsR03 = (outs regclass:$r0, regclass:$r1, regclass:$r2, regclass:$r3);
-  dag OutsR47 = (outs regclass:$r4, regclass:$r5, regclass:$r6, regclass:$r7);
-  dag Outs = !if(!eq(Abc#Type,"cf16"), OutsR03, !con(OutsR03, OutsR47));
-
-  dag StrideArg = !if(WithStride, (ins Int32Regs:$ldm), (ins));
-  dag Ins = !con((ins SrcOp:$src), StrideArg);
-
-  // Build a dag pattern that matches the intrinsic call.
-  // We want a dag that looks like this:
-  // (set <output args>, (intrinsic <input arguments>)) where input and
-  // output arguments are named patterns that would match corresponding
-  // input/output arguments of the instruction.
-  //
-  // First we construct (set <output arguments>) from instruction's outs dag by
-  // replacing dag operator 'outs' with 'set'.
-  dag PatOuts = !foreach(tmp, Outs, !subst(outs, set, tmp));
-  // Similarly, construct (intrinsic <input arguments>) sub-dag from
-  // instruction's input arguments, only now we also need to replace operands
-  // with patterns that would match them and the operator 'ins' with the
-  // intrinsic.
-  dag PatArgs = !foreach(tmp, Ins,
-                              !subst(imem, ADDRvar,
-                              !subst(MEMri64, ADDRri64,
-                              !subst(MEMri, ADDRri,
-                              !subst(ins, IntrMatcher, tmp)))));
-  // Finally, consatenate both parts together. !con() requires both dags to have
-  // the same operator, so we wrap PatArgs in a (set ...) dag.
-  let Pattern = [!con(PatOuts, (set PatArgs))];
-  let OutOperandList = Outs;
-  let InOperandList = Ins;
+class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
+                DAGOperand SrcOp>
+  : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
+                              [!con((ins SrcOp:$src),
+                                    !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
+    Requires<Frag.Predicates> {
+  // Load/store intrinsics are overloaded on pointer's address space.
+  // To match the right intrinsic, we need to build AS-constrained PatFrag.
+  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
+  dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
+  // Build PatFrag that only matches particular address space.
+  PatFrag IntrFrag = PatFrag<PFOperands,
+                             !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
+                             !cond(!eq(Space, ".shared"): AS_match.shared,
+                                   !eq(Space, ".global"): AS_match.global,
+                                   1: AS_match.generic)>;
+  // Build AS-constrained pattern.
+  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
+
+  let OutOperandList = Frag.Outs;
+  let InOperandList = !con(Args, (ins MmaCode:$ptx));
   let AsmString = "wmma.load."
-                  # Abc
+                  # Frag.frag
                   # ".sync"
+                  # "${ptx:aligned}"
                   # "." # Layout
-                  # "." # Geometry
+                  # "." # Frag.geom
                   # Space
-                  # "." # Type # " \t"
-                  # !if(!eq(Abc#Type, "cf16"),
-                        "{{$r0, $r1, $r2, $r3}}",
-                        "{{$r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7}}")
+                  # "." # Frag.ptx_elt_type # " \t"
+                  # Frag.regstring
                   # ", [$src]"
                   # !if(WithStride, ", $ldm", "")
                   # ";";
 }
 
-class WMMA_LOAD_INTR_HELPER<string Geometry, string Abc, string Layout,
-                            string Space, string Type, bit WithStride>
-                           : PatFrag <(ops),(ops)> {
-  // Intrinsic that matches this instruction.
-  Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma"
-                                    # "_" # Geometry # "_load_"
-                                    # Abc # "_" # Type # "_" # Layout
-                                    # !if(WithStride,"_stride", ""));
-  code match_generic = [{
-   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
-  }];
-  code match_shared = [{
-   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
-  }];
-  code match_global = [{
-   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
-  }];
-
-  let Operands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
-  let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
-  let PredicateCode = !if(!eq(Space, ".shared"), match_shared,
-                      !if(!eq(Space, ".global"), match_global, match_generic));
-}
-
-multiclass WMMA_LOAD_GALSTS<string Geometry, string Abc, string Layout,
-                            string Space, string Type, NVPTXRegClass regclass,
-                            bit WithStride> {
-  def _avar:  WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
-                                imem, WithStride>;
-  def _areg: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
-                                Int32Regs, WithStride>;
-  def _areg64: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
-                                Int64Regs, WithStride>;
-  def _ari: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
-                                MEMri, WithStride>;
-  def _ari64: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
-                                MEMri64, WithStride>;
-}
-
-multiclass WMMA_LOAD_GALSTSh<string Geometry, string Abc, string Layout,
-                             string Space, string Type, NVPTXRegClass regclass,
-                             bit WithStride> {
-  // Define a PatFrag that matches appropriate intrinsic that loads from the
-  // given address space.
-  def _Intr:  WMMA_LOAD_INTR_HELPER<Geometry, Abc, Layout, Space, Type,
-                                    WithStride>;
-  defm NAME:  WMMA_LOAD_GALSTS<Geometry, Abc, Layout, Space, Type, regclass,
-                               WithStride>;
-}
-
-multiclass WMMA_LOAD_GALST<string Geometry, string Abc, string Layout,
-                           string Space, string Type, NVPTXRegClass regclass> {
-  defm _stride: WMMA_LOAD_GALSTSh<Geometry, Abc, Layout, Space, Type, regclass, 1>;
-  defm NAME:    WMMA_LOAD_GALSTSh<Geometry, Abc, Layout, Space, Type, regclass, 0>;
-}
-
-multiclass WMMA_LOAD_GALT<string Geometry, string Abc, string Layout,
-                          string Type, NVPTXRegClass regclass> {
-  defm _global: WMMA_LOAD_GALST<Geometry, Abc, Layout, ".global",
-                                Type, regclass>;
-  defm _shared: WMMA_LOAD_GALST<Geometry, Abc, Layout, ".shared",
-                                Type, regclass>;
-  defm NAME:    WMMA_LOAD_GALST<Geometry, Abc, Layout,        "",
-                                Type, regclass>;
-}
-
-multiclass WMMA_LOAD_GAT<string Geometry, string Abc,
-                         string Type, NVPTXRegClass regclass> {
-  defm _row: WMMA_LOAD_GALT<Geometry, Abc, "row", Type, regclass>;
-  defm _col: WMMA_LOAD_GALT<Geometry, Abc, "col", Type, regclass>;
-}
-
-multiclass WMMA_LOAD_G<string Geometry> {
-  defm _load_a: WMMA_LOAD_GAT<Geometry, "a", "f16", Float16x2Regs>;
-  defm _load_b: WMMA_LOAD_GAT<Geometry, "b", "f16", Float16x2Regs>;
-  defm _load_c_f16: WMMA_LOAD_GAT<Geometry, "c", "f16", Float16x2Regs>;
-  defm _load_c_f32: WMMA_LOAD_GAT<Geometry, "c", "f32", Float32Regs>;
-}
-
-defm INT_WMMA_m32n8k16: WMMA_LOAD_G<"m32n8k16">;
-defm INT_WMMA_m16n16k16: WMMA_LOAD_G<"m16n16k16">;
-defm INT_WMMA_m8n32k16: WMMA_LOAD_G<"m8n32k16">;
-
 //
 // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
 //
-class WMMA_STORE_D_GLSTSO<string Geometry, string Layout, string Space,
-                          string Type, NVPTXRegClass regclass,
-                          bit WithStride, DAGOperand DstOp>
-  : EmptyNVPTXInst,
-    Requires<[!if(!eq(Geometry, "m16n16k16"),
-                  hasPTX60,
-                  hasPTX61),
-              hasSM70]> {
-  PatFrag IntrMatcher = !cast<PatFrag>("INT_WMMA"
-                                       # "_" # Geometry # "_store_d"
-                                       # "_" # Type
-                                       # "_" # Layout
-                                       # !subst(".", "_", Space)
-                                       # !if(WithStride,"_stride", "")
-                                       # "_Intr");
-  dag InsR03 = (ins DstOp:$src, regclass:$r0, regclass:$r1,
-                                regclass:$r2, regclass:$r3);
-  dag InsR47 = (ins regclass:$r4, regclass:$r5,
-                    regclass:$r6, regclass:$r7);
-  dag InsR = !if(!eq(Type,"f16"), InsR03, !con(InsR03, InsR47));
-  dag StrideArg = !if(WithStride, (ins Int32Regs:$ldm), (ins));
-  dag Ins = !con(InsR, StrideArg);
-
-  // Construct the pattern to match corresponding intrinsic call. See the
-  // details in the comments in WMMA_LOAD_ALSTOS.
-  dag PatArgs = !foreach(tmp, Ins,
-                              !subst(imem, ADDRvar,
-                              !subst(MEMri64, ADDRri64,
-                              !subst(MEMri, ADDRri,
-                              !subst(ins, IntrMatcher, tmp)))));
-  let Pattern = [PatArgs];
+class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
+                   bit WithStride, DAGOperand DstOp>
+  : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
+               [!con((ins DstOp:$dst),
+                     Frag.Ins,
+                     !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
+    Requires<Frag.Predicates> {
+
+  // Load/store intrinsics are overloaded on pointer's address space.
+  // To match the right intrinsic, we need to build AS-constrained PatFrag.
+  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
+  dag PFOperands = !con((ops node:$dst),
+                        !dag(ops, !foreach(tmp, Frag.regs, node), Frag.reg_names),
+                        !if(WithStride, (ops node:$ldm), (ops)));
+  // Build PatFrag that only matches particular address space.
+  PatFrag IntrFrag = PatFrag<PFOperands,
+                             !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
+                             !cond(!eq(Space, ".shared"): AS_match.shared,
+                                   !eq(Space, ".global"): AS_match.global,
+                                   1: AS_match.generic)>;
+  // Build AS-constrained pattern.
+  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
+
+  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
   let OutOperandList = (outs);
-  let InOperandList = Ins;
-  let AsmString = "wmma.store.d.sync."
-                  # Layout
-                  # "." # Geometry
+  let AsmString = "wmma.store.d.sync"
+                  # "${ptx:aligned}"
+                  # "." # Layout
+                  # "." # Frag.geom
                   # Space
-                  # "." # Type
-                  # " \t[$src],"
-                  # !if(!eq(Type,"f16"),
-                        "{{$r0, $r1, $r2, $r3}}",
-                        "{{$r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7}}")
+                  # "." # Frag.ptx_elt_type
+                  # " \t[$dst],"
+                  # Frag.regstring
                   # !if(WithStride, ", $ldm", "")
                   # ";";
-
-}
-
-class WMMA_STORE_INTR_HELPER<string Geometry, string Layout, string Space,
-                             string Type, bit WithStride>
-                            : PatFrag <(ops),(ops)> {
-  // Intrinsic that matches this instruction.
-  Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma_"
-                                    # Geometry
-                                    # "_store_d"
-                                    # "_" # Type
-                                    # "_" # Layout
-                                    # !if(WithStride, "_stride", ""));
-  code match_generic = [{
-   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
-  }];
-  code match_shared = [{
-   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
-  }];
-  code match_global = [{
-   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
-  }];
-
-  dag Args = !if(!eq(Type,"f16"),
-                 (ops node:$dst, node:$r0, node:$r1, node:$r2, node:$r3),
-                 (ops node:$dst, node:$r0, node:$r1, node:$r2, node:$r3,
-                                 node:$r4, node:$r5, node:$r6, node:$r7));
-  dag StrideArg = !if(WithStride, (ops node:$ldm), (ops));
-  let Operands = !con(Args, StrideArg);
-  let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
-  let PredicateCode = !if(!eq(Space, ".shared"), match_shared,
-                      !if(!eq(Space, ".global"), match_global, match_generic));
-}
-
-multiclass WMMA_STORE_D_GLSTS<string Geometry, string Layout, string Space,
-                              string Type, NVPTXRegClass regclass,
-                              bit WithStride> {
-  def _avar:   WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
-                                   WithStride, imem>;
-  def _areg:   WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
-                                   WithStride, Int32Regs>;
-  def _areg64: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
-                                   WithStride, Int64Regs>;
-  def _ari:    WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
-                                   WithStride, MEMri>;
-  def _ari64:  WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
-                                   WithStride, MEMri64>;
 }
 
-multiclass WMMA_STORE_D_GLSTSh<string Geometry, string Layout, string Space,
-                               string Type, NVPTXRegClass regclass,
-                               bit WithStride> {
-  // Define a PatFrag that matches appropriate intrinsic that loads from the
-  // given address space.
-  def _Intr:    WMMA_STORE_INTR_HELPER<Geometry, Layout, Space, Type,
-                                       WithStride>;
-  defm NAME:    WMMA_STORE_D_GLSTS<Geometry, Layout, Space, Type, regclass,
-                                   WithStride>;
-}
-
-multiclass WMMA_STORE_D_GLST<string Geometry, string Layout, string Space,
-                             string Type, NVPTXRegClass regclass > {
-  defm _stride: WMMA_STORE_D_GLSTSh<Geometry, Layout, Space, Type, regclass, 1>;
-  defm NAME:    WMMA_STORE_D_GLSTSh<Geometry, Layout, Space, Type, regclass, 0>;
-}
-
-multiclass WMMA_STORE_D_GLT<string Geometry, string Layout,
-                           string Type, NVPTXRegClass regclass> {
-  defm _global: WMMA_STORE_D_GLST<Geometry, Layout, ".global", Type, regclass>;
-  defm _shared: WMMA_STORE_D_GLST<Geometry, Layout, ".shared", Type, regclass>;
-  defm NAME:    WMMA_STORE_D_GLST<Geometry, Layout,        "", Type, regclass>;
-}
-
-multiclass WMMA_STORE_D_GT<string Geometry, string Type,
-                           NVPTXRegClass regclass> {
-  defm _row:    WMMA_STORE_D_GLT<Geometry, "row", Type, regclass>;
-  defm _col:    WMMA_STORE_D_GLT<Geometry, "col", Type, regclass>;
-}
-
-multiclass WMMA_STORE_D_G<string Geometry> {
-  defm _store_d_f16: WMMA_STORE_D_GT<Geometry, "f16", Float16x2Regs>;
-  defm _store_d_f32: WMMA_STORE_D_GT<Geometry, "f32", Float32Regs>;
-}
-
-defm INT_WMMA_m32n8k16: WMMA_STORE_D_G<"m32n8k16">;
-defm INT_WMMA_m16n16k16: WMMA_STORE_D_G<"m16n16k16">;
-defm INT_WMMA_m8n32k16: WMMA_STORE_D_G<"m8n32k16">;
+// Create all load/store variants
+defset list<WMMA_INSTR> MMA_LDSTs  = {
+  foreach layout = ["row", "col"] in {
+    foreach stride = [0, 1] in {
+      foreach space = [".global", ".shared", ""] in {
+        foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
+          foreach frag = NVVM_MMA_OPS.all_ld_ops in
+            foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
+              def : WMMA_LOAD<WMMA_REGINFO<frag>, layout, space, stride, addr>;
+          foreach frag = NVVM_MMA_OPS.all_st_ops in
+            foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
+              def : WMMA_STORE_D<WMMA_REGINFO<frag>, layout, space, stride, addr>;
+        } // addr
+      } // space
+    } // stride
+  } // layout
+} // defset
 
 // WMMA.MMA
-class WMMA_MMA_GABDCS<string Geometry, string ALayout, string BLayout,
-                     string DType, NVPTXRegClass d_reg,
-                     string CType, NVPTXRegClass c_reg,
-                     NVPTXRegClass ab_reg,
-                     string Satfinite = "">
-  : EmptyNVPTXInst,
-    Requires<[!if(!eq(Geometry, "m16n16k16"),
-                  hasPTX60,
-                  hasPTX61),
-              hasSM70]> {
-  Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma_"
-                                    # Geometry
-                                    # "_mma"
-                                    # "_" # ALayout
-                                    # "_" # BLayout
-                                    # "_" # DType
-                                    # "_" # CType
-                                    # !subst(".", "_", Satfinite));
-  dag Outs = !if(!eq(DType,"f16"),
-                 (outs d_reg:$d0, d_reg:$d1, d_reg:$d2, d_reg:$d3),
-                 (outs d_reg:$d0, d_reg:$d1, d_reg:$d2, d_reg:$d3,
-                       d_reg:$d4, d_reg:$d5, d_reg:$d6, d_reg:$d7));
-  dag InsExtraCArgs = !if(!eq(CType,"f16"),
-                          (ins),
-                          (ins c_reg:$c4,  c_reg:$c5,  c_reg:$c6,  c_reg:$c7));
-  dag Ins = !con((ins ab_reg:$a0, ab_reg:$a1, ab_reg:$a2, ab_reg:$a3,
-                      ab_reg:$a4, ab_reg:$a5, ab_reg:$a6, ab_reg:$a7,
-                      ab_reg:$b0, ab_reg:$b1, ab_reg:$b2, ab_reg:$b3,
-                      ab_reg:$b4, ab_reg:$b5, ab_reg:$b6, ab_reg:$b7,
-                      c_reg:$c0,  c_reg:$c1,  c_reg:$c2,  c_reg:$c3),
-                  InsExtraCArgs);
-
-  // Construct the pattern to match corresponding intrinsic call. See the
-  // details in the comments in WMMA_LOAD_ALSTOS.
-  dag PatOuts = !foreach(tmp, Outs, !subst(outs, set, tmp));
-  dag PatArgs = !foreach(tmp, Ins, !subst(ins, Intr, tmp));
-  let Pattern = [!con(PatOuts, (set PatArgs))];
-  let OutOperandList = Outs;
-  let InOperandList  = Ins;
-  let AsmString = "wmma.mma.sync."
-                  # ALayout
+class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
+               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
+               string ALayout, string BLayout, int Satfinite>
+  : WMMA_INSTR<WMMA_NAME_MMA<ALayout, BLayout, Satfinite, FragA, FragB, FragC, FragD>.record,
+                             [FragA.Ins, FragB.Ins, FragC.Ins]>,
+    // Requires does not seem to have effect on Instruction w/o Patterns.
+    // We set it here anyways and propagate to the Pat<> we construct below.
+    Requires<FragA.Predicates> {
+  let OutOperandList = FragD.Outs;
+  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
+  string TypeList = !cond(
+    !eq(FragD.ptx_elt_type, "s32") : ".s32"
+                                     # "." # FragA.ptx_elt_type
+                                     # "." # FragB.ptx_elt_type
+                                     # ".s32",
+    1: "." # FragD.ptx_elt_type # "." # FragC.ptx_elt_type,
+  );
+  let AsmString = "wmma.mma"
+                  # !if(!eq(FragA.ptx_elt_type, "b1"), ".xor.popc", "")
+                  # ".sync"
+                  # "${ptx:aligned}"
+                  # "." # ALayout
                   # "." # BLayout
-                  # "." # Geometry
-                  # "." # DType
-                  # "." # CType
-                  # Satfinite # "\n\t\t"
-                  # !if(!eq(DType,"f16"),
-                        "{{$d0, $d1, $d2, $d3}}, \n\t\t",
-                        "{{$d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7}},\n\t\t")
-                  # "{{$a0, $a1, $a2, $a3, $a4, $a5, $a6, $a7}},\n\t\t"
-                  # "{{$b0, $b1, $b2, $b3, $b4, $b5, $b6, $b7}},\n\t\t"
-                  # !if(!eq(CType,"f16"),
-                        "{{$c0, $c1, $c2, $c3}};",
-                        "{{$c0, $c1, $c2, $c3, $c4, $c5, $c6, $c7}};");
-}
-
-multiclass WMMA_MMA_GABDC<string Geometry, string ALayout, string BLayout,
-                         string DType, NVPTXRegClass d_reg,
-                         string CType, NVPTXRegClass c_reg> {
-  def _satfinite: WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
-                                 DType, d_reg, CType, c_reg,
-                                 Float16x2Regs, ".satfinite">;
-  def NAME:       WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
-                                 DType, d_reg, CType, c_reg,
-                                 Float16x2Regs>;
-}
-
-multiclass WMMA_MMA_GABD<string Geometry, string ALayout, string BLayout,
-                        string DType, NVPTXRegClass d_reg> {
-  defm _f16: WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_reg,
-                            "f16", Float16x2Regs>;
-  defm _f32: WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_reg,
-                            "f32", Float32Regs>;
-}
-
-multiclass WMMA_MMA_GAB<string Geometry, string ALayout, string BLayout> {
-  defm _f16: WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f16", Float16x2Regs>;
-  defm _f32: WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f32", Float32Regs>;
-}
-
-multiclass WMMA_MMA_GA<string Geometry, string ALayout> {
-  defm _col: WMMA_MMA_GAB<Geometry, ALayout, "col">;
-  defm _row: WMMA_MMA_GAB<Geometry, ALayout, "row">;
-}
-
-multiclass WMMA_MMA_G<string Geometry> {
-  defm _col: WMMA_MMA_GA<Geometry, "col">;
-  defm _row: WMMA_MMA_GA<Geometry, "row">;
+                  # "." # FragA.geom
+                  # TypeList
+                  # !if(Satfinite, ".satfinite", "") # "\n\t\t"
+                  # FragD.regstring # ",\n\t\t"
+                  # FragA.regstring # ",\n\t\t"
+                  # FragB.regstring # ",\n\t\t"
+                  # FragC.regstring # ";";
 }
 
-defm INT_WMMA_MMA_m32n8k16 : WMMA_MMA_G<"m32n8k16">;
-defm INT_WMMA_MMA_m16n16k16 : WMMA_MMA_G<"m16n16k16">;
-defm INT_WMMA_MMA_m8n32k16 : WMMA_MMA_G<"m8n32k16">;
+defset list<WMMA_INSTR> MMAs  = {
+  foreach layout_a = ["row", "col"] in {
+    foreach layout_b = ["row", "col"] in {
+      foreach satf = [0, 1] in {
+        foreach op = NVVM_MMA_OPS.all_mma_ops in {
+          foreach _ = NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret in {
+            def : WMMA_MMA<WMMA_REGINFO<op[0]>,
+                           WMMA_REGINFO<op[1]>,
+                           WMMA_REGINFO<op[2]>,
+                           WMMA_REGINFO<op[3]>,
+                           layout_a, layout_b, satf>;
+          }
+        } // op
+      } // satf
+    } // layout_b
+  } // layout_a
+} // defset
+
+
+// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
+// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
+// the instruction record.
+class WMMA_PAT<WMMA_INSTR wi>
+      : Pat<wi.IntrinsicPattern,
+            !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
+                 (wi ptx.version))>,
+        Requires<wi.Predicates>;
+
+// Build intrinsic->instruction patterns for all MMA instructions.
+foreach mma = !listconcat(MMAs, MMA_LDSTs) in
+  def : WMMA_PAT<mma>;
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 52ced266b91c..0743a2986718 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -1,9 +1,8 @@
 //===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
index 3c39f53eb30a..59d5ef40e9ac 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
@@ -1,9 +1,8 @@
 //===-- llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index e94c1914029d..76fb9f3fa692 100644
--- a/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -1,9 +1,8 @@
 //===-- NVPTXLowerAlloca.cpp - Make alloca to use local memory =====--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -27,6 +26,7 @@
 
 #include "NVPTX.h"
 #include "NVPTXUtilities.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
diff --git a/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 139dc7fbeeda..c5e02e34e25e 100644
--- a/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -1,9 +1,8 @@
 //===-- NVPTXLowerArgs.cpp - Lower arguments ------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -92,6 +91,7 @@
 #include "NVPTX.h"
 #include "NVPTXTargetMachine.h"
 #include "NVPTXUtilities.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
@@ -170,7 +170,8 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
   Value *ArgInParam = new AddrSpaceCastInst(
       Arg, PointerType::get(StructType, ADDRESS_SPACE_PARAM), Arg->getName(),
       FirstInst);
-  LoadInst *LI = new LoadInst(ArgInParam, Arg->getName(), FirstInst);
+  LoadInst *LI =
+      new LoadInst(StructType, ArgInParam, Arg->getName(), FirstInst);
   new StoreInst(LI, AllocA, FirstInst);
 }
 
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.cpp b/lib/Target/NVPTX/NVPTXMCExpr.cpp
index a754a6a36dab..5ec1b2425e68 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.cpp
+++ b/lib/Target/NVPTX/NVPTXMCExpr.cpp
@@ -1,9 +1,8 @@
 //===-- NVPTXMCExpr.cpp - NVPTX specific MC expression classes ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h
index 95741d9b0451..440fa1310003 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -1,9 +1,8 @@
 //===-- NVPTXMCExpr.h - NVPTX specific MC expression classes ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
index 5a9115f6f7f1..cf63fc33e621 100644
--- a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
+++ b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
@@ -1,9 +1,8 @@
 //===-- NVPTXMachineFunctionInfo.h - NVPTX-specific Function Info  --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/NVPTXPeephole.cpp b/lib/Target/NVPTX/NVPTXPeephole.cpp
index 02c32c68ee2c..629757db8707 100644
--- a/lib/Target/NVPTX/NVPTXPeephole.cpp
+++ b/lib/Target/NVPTX/NVPTXPeephole.cpp
@@ -1,9 +1,8 @@
 //===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 2ca0ccf2dfa7..4c5a9adf1f65 100644
--- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -1,9 +1,8 @@
 //===-- NVPTXPrologEpilogPass.cpp - NVPTX prolog/epilog inserter ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -73,8 +72,8 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
               TFI.getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg);
           MI.getOperand(0).ChangeToRegister(Reg, /*isDef=*/false);
           MI.getOperand(0).setIsDebug();
-          auto *DIExpr = DIExpression::prepend(MI.getDebugExpression(),
-                                               DIExpression::NoDeref, Offset);
+          auto *DIExpr = DIExpression::prepend(
+              MI.getDebugExpression(), DIExpression::ApplyOffset, Offset);
           MI.getOperand(3).setMetadata(DIExpr);
           continue;
         }
diff --git a/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp b/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
index f60d841c1683..af50a7465d1a 100644
--- a/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
+++ b/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
@@ -1,9 +1,8 @@
 //===- NVPTXProxyRegErasure.cpp - NVPTX Proxy Register Instruction Erasure -==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 755738329881..5cdec0925b26 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===- NVPTXRegisterInfo.cpp - NVPTX Register Information -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -127,6 +126,6 @@ void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
 }
 
-unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   return NVPTX::VRFrame;
 }
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h
index 6185a0b54cac..9ef6940daf86 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -1,9 +1,8 @@
 //===- NVPTXRegisterInfo.h - NVPTX Register Information Impl ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -43,7 +42,7 @@ public:
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
+  Register getFrameRegister(const MachineFunction &MF) const override;
 
   ManagedStringPool *getStrPool() const {
     return const_cast<ManagedStringPool *>(&ManagedStrPool);
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td
index f04764a9e9a3..4b755dcb55ff 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -1,9 +1,8 @@
 //===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index 82befe4b101b..e213089e4085 100644
--- a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -1,9 +1,8 @@
 //===-- NVPTXReplaceImageHandles.cpp - Replace image handles for Fermi ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,6 +16,7 @@
 #include "NVPTXMachineFunctionInfo.h"
 #include "NVPTXSubtarget.h"
 #include "NVPTXTargetMachine.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index acbee86ae386..357826c2d19c 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -1,9 +1,8 @@
 //===- NVPTXSubtarget.cpp - NVPTX Subtarget Information -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index b02822a099d9..0e9fa1fd3e56 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -1,9 +1,8 @@
 //=====-- NVPTXSubtarget.h - Define Subtarget for the NVPTX ---*- C++ -*--====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 8ec0ddb9b3d5..11b3fe2fa3d3 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -1,9 +1,8 @@
 //===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,6 +16,7 @@
 #include "NVPTXLowerAggrCopies.h"
 #include "NVPTXTargetObjectFile.h"
 #include "NVPTXTargetTransformInfo.h"
+#include "TargetInfo/NVPTXTargetInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -167,8 +167,16 @@ public:
   void addMachineSSAOptimization() override;
 
   FunctionPass *createTargetRegisterAllocator(bool) override;
-  void addFastRegAlloc(FunctionPass *RegAllocPass) override;
-  void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
+  void addFastRegAlloc() override;
+  void addOptimizedRegAlloc() override;
+
+  bool addRegAssignmentFast() override {
+    llvm_unreachable("should not be used");
+  }
+
+  bool addRegAssignmentOptimized() override {
+    llvm_unreachable("should not be used");
+  }
 
 private:
   // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This
@@ -323,15 +331,12 @@ FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
   return nullptr; // No reg alloc
 }
 
-void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
-  assert(!RegAllocPass && "NVPTX uses no regalloc!");
+void NVPTXPassConfig::addFastRegAlloc() {
   addPass(&PHIEliminationID);
   addPass(&TwoAddressInstructionPassID);
 }
 
-void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
-  assert(!RegAllocPass && "NVPTX uses no regalloc!");
-
+void NVPTXPassConfig::addOptimizedRegAlloc() {
   addPass(&ProcessImplicitDefsID);
   addPass(&LiveVariablesID);
   addPass(&MachineLoopInfoID);
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index ca540b8e0389..d84600c74e29 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -1,9 +1,8 @@
 //===-- NVPTXTargetMachine.h - Define TargetMachine for NVPTX ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index c706b053ab8f..ab2a93b75922 100644
--- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -1,9 +1,8 @@
 //===-- NVPTXTargetObjectFile.h - NVPTX Object Info -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 307654aed37f..be0416f90fca 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -1,9 +1,8 @@
 //===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -39,7 +38,6 @@ static bool readsLaneId(const IntrinsicInst *II) {
 static bool isNVVMAtomic(const IntrinsicInst *II) {
   switch (II->getIntrinsicID()) {
     default: return false;
-    case Intrinsic::nvvm_atomic_load_add_f32:
     case Intrinsic::nvvm_atomic_load_inc_32:
     case Intrinsic::nvvm_atomic_load_dec_32:
 
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 14e93f7447dd..b179a28fa713 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -1,9 +1,8 @@
 //===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -17,8 +16,8 @@
 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
 #define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
 
-#include "NVPTX.h"
 #include "NVPTXTargetMachine.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/CodeGen/TargetLowering.h"
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
index e464f474b1d5..665eb1383253 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -1,13 +1,13 @@
 //===- NVPTXUtilities.cpp - Utility Functions -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file contains miscellaneous utility functions
+//
 //===----------------------------------------------------------------------===//
 
 #include "NVPTXUtilities.h"
diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h
index a0cc4e78ac21..bf1524194cfb 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/lib/Target/NVPTX/NVPTXUtilities.h
@@ -1,9 +1,8 @@
 //===-- NVPTXUtilities - Utilities -----------------------------*- C++ -*-====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/NVVMIntrRange.cpp b/lib/Target/NVPTX/NVVMIntrRange.cpp
index 11277f5ba596..5cf7b6691e63 100644
--- a/lib/Target/NVPTX/NVVMIntrRange.cpp
+++ b/lib/Target/NVPTX/NVVMIntrRange.cpp
@@ -1,9 +1,8 @@
 //===- NVVMIntrRange.cpp - Set !range metadata for NVVM intrinsics --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
index 64c262664fda..634a052e2ee7 100644
--- a/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -1,9 +1,8 @@
 //===- NVVMReflect.cpp - NVVM Emulate conditional compilation -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
index 803d643844f8..2c71ec58ec42 100644
--- a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
+++ b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
@@ -1,14 +1,12 @@
 //===-- NVPTXTargetInfo.cpp - NVPTX Target Implementation -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "NVPTX.h"
-#include "llvm/IR/Module.h"
+#include "TargetInfo/NVPTXTargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.h b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.h
new file mode 100644
index 000000000000..5c5691349ae9
--- /dev/null
+++ b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.h
@@ -0,0 +1,21 @@
+//===-- NVPTXTargetInfo.h - NVPTX Target Implementation ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_NVPTX_TARGETINFO_NVPTXTARGETINFO_H
+#define LLVM_LIB_TARGET_NVPTX_TARGETINFO_NVPTXTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheNVPTXTarget32();
+Target &getTheNVPTXTarget64();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_NVPTX_TARGETINFO_NVPTXTARGETINFO_H
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 8b3480f772e9..c9524da93acd 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -1,15 +1,15 @@
 //===-- PPCAsmParser.cpp - Parse PowerPC asm to MCInst instructions -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/PPCMCExpr.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "PPCTargetStreamer.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
@@ -147,8 +147,7 @@ public:
     : MCTargetAsmParser(Options, STI, MII) {
     // Check for 64-bit vs. 32-bit pointer mode.
     const Triple &TheTriple = STI.getTargetTriple();
-    IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
-               TheTriple.getArch() == Triple::ppc64le);
+    IsPPC64 = TheTriple.isPPC64();
     IsDarwin = TheTriple.isMacOSX();
     // Initialize the set of available features.
     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -1129,7 +1128,7 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
   }
 }
 
-static std::string PPCMnemonicSpellCheck(StringRef S, uint64_t FBS,
+static std::string PPCMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS,
                                          unsigned VariantID = 0);
 
 bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -1148,7 +1147,7 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   case Match_MissingFeature:
     return Error(IDLoc, "instruction use requires an option to be enabled");
   case Match_MnemonicFail: {
-    uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+    FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
     std::string Suggestion = PPCMnemonicSpellCheck(
         ((PPCOperand &)*Operands[0]).getToken(), FBS);
     return Error(IDLoc, "invalid instruction" + Suggestion,
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 26869f250823..7a8af57961cb 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -1,13 +1,13 @@
 //===------ PPCDisassembler.cpp - Disassembler for PowerPC ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCInst.h"
@@ -61,6 +61,14 @@ extern "C" void LLVMInitializePowerPCDisassembler() {
                                          createPPCLEDisassembler);
 }
 
+static DecodeStatus DecodePCRel24BranchTarget(MCInst &Inst, unsigned Imm,
+                                              uint64_t Addr,
+                                              const void *Decoder) {
+  int32_t Offset = SignExtend32<24>(Imm);
+  Inst.addOperand(MCOperand::createImm(Offset));
+  return MCDisassembler::Success;
+}
+
 // FIXME: These can be generated by TableGen from the existing register
 // encoding values!
 
@@ -78,12 +86,6 @@ static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
   return decodeRegisterClass(Inst, RegNo, CRRegs);
 }
 
-static DecodeStatus DecodeCRRC0RegisterClass(MCInst &Inst, uint64_t RegNo,
-                                            uint64_t Address,
-                                            const void *Decoder) {
-  return decodeRegisterClass(Inst, RegNo, CRRegs);
-}
-
 static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo,
                                             uint64_t Address,
                                             const void *Decoder) {
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
deleted file mode 100644
index fc29e4effbb1..000000000000
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ /dev/null
@@ -1,532 +0,0 @@
-//===-- PPCInstPrinter.cpp - Convert PPC MCInst to assembly syntax --------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an PPC MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PPCInstPrinter.h"
-#include "MCTargetDesc/PPCMCTargetDesc.h"
-#include "MCTargetDesc/PPCPredicates.h"
-#include "PPCInstrInfo.h"
-#include "llvm/CodeGen/TargetOpcodes.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-// FIXME: Once the integrated assembler supports full register names, tie this
-// to the verbose-asm setting.
-static cl::opt<bool>
-FullRegNames("ppc-asm-full-reg-names", cl::Hidden, cl::init(false),
-             cl::desc("Use full register names when printing assembly"));
-
-// Useful for testing purposes. Prints vs{31-63} as v{0-31} respectively.
-static cl::opt<bool>
-ShowVSRNumsAsVR("ppc-vsr-nums-as-vr", cl::Hidden, cl::init(false),
-             cl::desc("Prints full register names with vs{31-63} as v{0-31}"));
-
-// Prints full register names with percent symbol.
-static cl::opt<bool>
-FullRegNamesWithPercent("ppc-reg-with-percent-prefix", cl::Hidden,
-                        cl::init(false),
-                        cl::desc("Prints full register names with percent"));
-
-#define PRINT_ALIAS_INSTR
-#include "PPCGenAsmWriter.inc"
-
-void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  const char *RegName = getRegisterName(RegNo);
-  if (RegName[0] == 'q' /* QPX */) {
-    // The system toolchain on the BG/Q does not understand QPX register names
-    // in .cfi_* directives, so print the name of the floating-point
-    // subregister instead.
-    std::string RN(RegName);
-
-    RN[0] = 'f';
-    OS << RN;
-
-    return;
-  }
-
-  OS << RegName;
-}
-
-void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                               StringRef Annot, const MCSubtargetInfo &STI) {
-  // Check for slwi/srwi mnemonics.
-  if (MI->getOpcode() == PPC::RLWINM) {
-    unsigned char SH = MI->getOperand(2).getImm();
-    unsigned char MB = MI->getOperand(3).getImm();
-    unsigned char ME = MI->getOperand(4).getImm();
-    bool useSubstituteMnemonic = false;
-    if (SH <= 31 && MB == 0 && ME == (31-SH)) {
-      O << "\tslwi "; useSubstituteMnemonic = true;
-    }
-    if (SH <= 31 && MB == (32-SH) && ME == 31) {
-      O << "\tsrwi "; useSubstituteMnemonic = true;
-      SH = 32-SH;
-    }
-    if (useSubstituteMnemonic) {
-      printOperand(MI, 0, O);
-      O << ", ";
-      printOperand(MI, 1, O);
-      O << ", " << (unsigned int)SH;
-
-      printAnnotation(O, Annot);
-      return;
-    }
-  }
-
-  if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) &&
-      MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
-    O << "\tmr ";
-    printOperand(MI, 0, O);
-    O << ", ";
-    printOperand(MI, 1, O);
-    printAnnotation(O, Annot);
-    return;
-  }
-
-  if (MI->getOpcode() == PPC::RLDICR ||
-      MI->getOpcode() == PPC::RLDICR_32) {
-    unsigned char SH = MI->getOperand(2).getImm();
-    unsigned char ME = MI->getOperand(3).getImm();
-    // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
-    if (63-SH == ME) {
-      O << "\tsldi ";
-      printOperand(MI, 0, O);
-      O << ", ";
-      printOperand(MI, 1, O);
-      O << ", " << (unsigned int)SH;
-      printAnnotation(O, Annot);
-      return;
-    }
-  }
-
-  // dcbt[st] is printed manually here because:
-  //  1. The assembly syntax is different between embedded and server targets
-  //  2. We must print the short mnemonics for TH == 0 because the
-  //     embedded/server syntax default will not be stable across assemblers
-  //  The syntax for dcbt is:
-  //    dcbt ra, rb, th [server]
-  //    dcbt th, ra, rb [embedded]
-  //  where th can be omitted when it is 0. dcbtst is the same.
-  if (MI->getOpcode() == PPC::DCBT || MI->getOpcode() == PPC::DCBTST) {
-    unsigned char TH = MI->getOperand(0).getImm();
-    O << "\tdcbt";
-    if (MI->getOpcode() == PPC::DCBTST)
-      O << "st";
-    if (TH == 16)
-      O << "t";
-    O << " ";
-
-    bool IsBookE = STI.getFeatureBits()[PPC::FeatureBookE];
-    if (IsBookE && TH != 0 && TH != 16)
-      O << (unsigned int) TH << ", ";
-
-    printOperand(MI, 1, O);
-    O << ", ";
-    printOperand(MI, 2, O);
-
-    if (!IsBookE && TH != 0 && TH != 16)
-      O << ", " << (unsigned int) TH;
-
-    printAnnotation(O, Annot);
-    return;
-  }
-
-  if (MI->getOpcode() == PPC::DCBF) {
-    unsigned char L = MI->getOperand(0).getImm();
-    if (!L || L == 1 || L == 3) {
-      O << "\tdcbf";
-      if (L == 1 || L == 3)
-        O << "l";
-      if (L == 3)
-        O << "p";
-      O << " ";
-
-      printOperand(MI, 1, O);
-      O << ", ";
-      printOperand(MI, 2, O);
-
-      printAnnotation(O, Annot);
-      return;
-    }
-  }
-
-  if (!printAliasInstr(MI, O))
-    printInstruction(MI, O);
-  printAnnotation(O, Annot);
-}
-
-
-void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
-                                           raw_ostream &O,
-                                           const char *Modifier) {
-  unsigned Code = MI->getOperand(OpNo).getImm();
-
-  if (StringRef(Modifier) == "cc") {
-    switch ((PPC::Predicate)Code) {
-    case PPC::PRED_LT_MINUS:
-    case PPC::PRED_LT_PLUS:
-    case PPC::PRED_LT:
-      O << "lt";
-      return;
-    case PPC::PRED_LE_MINUS:
-    case PPC::PRED_LE_PLUS:
-    case PPC::PRED_LE:
-      O << "le";
-      return;
-    case PPC::PRED_EQ_MINUS:
-    case PPC::PRED_EQ_PLUS:
-    case PPC::PRED_EQ:
-      O << "eq";
-      return;
-    case PPC::PRED_GE_MINUS:
-    case PPC::PRED_GE_PLUS:
-    case PPC::PRED_GE:
-      O << "ge";
-      return;
-    case PPC::PRED_GT_MINUS:
-    case PPC::PRED_GT_PLUS:
-    case PPC::PRED_GT:
-      O << "gt";
-      return;
-    case PPC::PRED_NE_MINUS:
-    case PPC::PRED_NE_PLUS:
-    case PPC::PRED_NE:
-      O << "ne";
-      return;
-    case PPC::PRED_UN_MINUS:
-    case PPC::PRED_UN_PLUS:
-    case PPC::PRED_UN:
-      O << "un";
-      return;
-    case PPC::PRED_NU_MINUS:
-    case PPC::PRED_NU_PLUS:
-    case PPC::PRED_NU:
-      O << "nu";
-      return;
-    case PPC::PRED_BIT_SET:
-    case PPC::PRED_BIT_UNSET:
-      llvm_unreachable("Invalid use of bit predicate code");
-    }
-    llvm_unreachable("Invalid predicate code");
-  }
-
-  if (StringRef(Modifier) == "pm") {
-    switch ((PPC::Predicate)Code) {
-    case PPC::PRED_LT:
-    case PPC::PRED_LE:
-    case PPC::PRED_EQ:
-    case PPC::PRED_GE:
-    case PPC::PRED_GT:
-    case PPC::PRED_NE:
-    case PPC::PRED_UN:
-    case PPC::PRED_NU:
-      return;
-    case PPC::PRED_LT_MINUS:
-    case PPC::PRED_LE_MINUS:
-    case PPC::PRED_EQ_MINUS:
-    case PPC::PRED_GE_MINUS:
-    case PPC::PRED_GT_MINUS:
-    case PPC::PRED_NE_MINUS:
-    case PPC::PRED_UN_MINUS:
-    case PPC::PRED_NU_MINUS:
-      O << "-";
-      return;
-    case PPC::PRED_LT_PLUS:
-    case PPC::PRED_LE_PLUS:
-    case PPC::PRED_EQ_PLUS:
-    case PPC::PRED_GE_PLUS:
-    case PPC::PRED_GT_PLUS:
-    case PPC::PRED_NE_PLUS:
-    case PPC::PRED_UN_PLUS:
-    case PPC::PRED_NU_PLUS:
-      O << "+";
-      return;
-    case PPC::PRED_BIT_SET:
-    case PPC::PRED_BIT_UNSET:
-      llvm_unreachable("Invalid use of bit predicate code");
-    }
-    llvm_unreachable("Invalid predicate code");
-  }
-
-  assert(StringRef(Modifier) == "reg" &&
-         "Need to specify 'cc', 'pm' or 'reg' as predicate op modifier!");
-  printOperand(MI, OpNo+1, O);
-}
-
-void PPCInstPrinter::printATBitsAsHint(const MCInst *MI, unsigned OpNo,
-                                       raw_ostream &O) {
-  unsigned Code = MI->getOperand(OpNo).getImm();
-  if (Code == 2)
-    O << "-";
-  else if (Code == 3)
-    O << "+";
-}
-
-void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo,
-                                       raw_ostream &O) {
-  unsigned int Value = MI->getOperand(OpNo).getImm();
-  assert(Value <= 1 && "Invalid u1imm argument!");
-  O << (unsigned int)Value;
-}
-
-void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
-                                       raw_ostream &O) {
-  unsigned int Value = MI->getOperand(OpNo).getImm();
-  assert(Value <= 3 && "Invalid u2imm argument!");
-  O << (unsigned int)Value;
-}
-
-void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo,
-                                       raw_ostream &O) {
-  unsigned int Value = MI->getOperand(OpNo).getImm();
-  assert(Value <= 8 && "Invalid u3imm argument!");
-  O << (unsigned int)Value;
-}
-
-void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
-                                       raw_ostream &O) {
-  unsigned int Value = MI->getOperand(OpNo).getImm();
-  assert(Value <= 15 && "Invalid u4imm argument!");
-  O << (unsigned int)Value;
-}
-
-void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
-                                       raw_ostream &O) {
-  int Value = MI->getOperand(OpNo).getImm();
-  Value = SignExtend32<5>(Value);
-  O << (int)Value;
-}
-
-void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo,
-                                       raw_ostream &O) {
-  unsigned int Value = MI->getOperand(OpNo).getImm();
-  assert(Value <= 31 && "Invalid u5imm argument!");
-  O << (unsigned int)Value;
-}
-
-void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
-                                       raw_ostream &O) {
-  unsigned int Value = MI->getOperand(OpNo).getImm();
-  assert(Value <= 63 && "Invalid u6imm argument!");
-  O << (unsigned int)Value;
-}
-
-void PPCInstPrinter::printU7ImmOperand(const MCInst *MI, unsigned OpNo,
-                                       raw_ostream &O) {
-  unsigned int Value = MI->getOperand(OpNo).getImm();
-  assert(Value <= 127 && "Invalid u7imm argument!");
-  O << (unsigned int)Value;
-}
-
-// Operands of BUILD_VECTOR are signed and we use this to print operands
-// of XXSPLTIB which are unsigned. So we simply truncate to 8 bits and
-// print as unsigned.
-void PPCInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo,
-                                       raw_ostream &O) {
-  unsigned char Value = MI->getOperand(OpNo).getImm();
-  O << (unsigned int)Value;
-}
-
-void PPCInstPrinter::printU10ImmOperand(const MCInst *MI, unsigned OpNo,
-                                        raw_ostream &O) {
-  unsigned short Value = MI->getOperand(OpNo).getImm();
-  assert(Value <= 1023 && "Invalid u10imm argument!");
-  O << (unsigned short)Value;
-}
-
-void PPCInstPrinter::printU12ImmOperand(const MCInst *MI, unsigned OpNo,
-                                        raw_ostream &O) {
-  unsigned short Value = MI->getOperand(OpNo).getImm();
-  assert(Value <= 4095 && "Invalid u12imm argument!");
-  O << (unsigned short)Value;
-}
-
-void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
-                                        raw_ostream &O) {
-  if (MI->getOperand(OpNo).isImm())
-    O << (short)MI->getOperand(OpNo).getImm();
-  else
-    printOperand(MI, OpNo, O);
-}
-
-void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
-                                        raw_ostream &O) {
-  if (MI->getOperand(OpNo).isImm())
-    O << (unsigned short)MI->getOperand(OpNo).getImm();
-  else
-    printOperand(MI, OpNo, O);
-}
-
-void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
-                                        raw_ostream &O) {
-  if (!MI->getOperand(OpNo).isImm())
-    return printOperand(MI, OpNo, O);
-
-  // Branches can take an immediate operand.  This is used by the branch
-  // selection pass to print .+8, an eight byte displacement from the PC.
-  O << ".+";
-  printAbsBranchOperand(MI, OpNo, O);
-}
-
-void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
-                                           raw_ostream &O) {
-  if (!MI->getOperand(OpNo).isImm())
-    return printOperand(MI, OpNo, O);
-
-  O << SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2);
-}
-
-
-void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo,
-                                 raw_ostream &O) {
-  unsigned CCReg = MI->getOperand(OpNo).getReg();
-  unsigned RegNo;
-  switch (CCReg) {
-  default: llvm_unreachable("Unknown CR register");
-  case PPC::CR0: RegNo = 0; break;
-  case PPC::CR1: RegNo = 1; break;
-  case PPC::CR2: RegNo = 2; break;
-  case PPC::CR3: RegNo = 3; break;
-  case PPC::CR4: RegNo = 4; break;
-  case PPC::CR5: RegNo = 5; break;
-  case PPC::CR6: RegNo = 6; break;
-  case PPC::CR7: RegNo = 7; break;
-  }
-  O << (0x80 >> RegNo);
-}
-
-void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo,
-                                    raw_ostream &O) {
-  printS16ImmOperand(MI, OpNo, O);
-  O << '(';
-  if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
-    O << "0";
-  else
-    printOperand(MI, OpNo+1, O);
-  O << ')';
-}
-
-void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
-                                    raw_ostream &O) {
-  // When used as the base register, r0 reads constant zero rather than
-  // the value contained in the register.  For this reason, the darwin
-  // assembler requires that we print r0 as 0 (no r) when used as the base.
-  if (MI->getOperand(OpNo).getReg() == PPC::R0)
-    O << "0";
-  else
-    printOperand(MI, OpNo, O);
-  O << ", ";
-  printOperand(MI, OpNo+1, O);
-}
-
-void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
-                                  raw_ostream &O) {
-  // On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must
-  // come at the _end_ of the expression.
-  const MCOperand &Op = MI->getOperand(OpNo);
-  const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*Op.getExpr());
-  O << refExp.getSymbol().getName();
-  O << '(';
-  printOperand(MI, OpNo+1, O);
-  O << ')';
-  if (refExp.getKind() != MCSymbolRefExpr::VK_None)
-    O << '@' << MCSymbolRefExpr::getVariantKindName(refExp.getKind());
-}
-
-/// showRegistersWithPercentPrefix - Check if this register name should be
-/// printed with a percentage symbol as prefix.
-bool PPCInstPrinter::showRegistersWithPercentPrefix(const char *RegName) const {
-  if (!FullRegNamesWithPercent || TT.isOSDarwin() || TT.getOS() == Triple::AIX)
-    return false;
-
-  switch (RegName[0]) {
-  default:
-    return false;
-  case 'r':
-  case 'f':
-  case 'q':
-  case 'v':
-  case 'c':
-    return true;
-  }
-}
-
-/// getVerboseConditionalRegName - This method expands the condition register
-/// when requested explicitly or targetting Darwin.
-const char *PPCInstPrinter::getVerboseConditionRegName(unsigned RegNum,
-                                                       unsigned RegEncoding)
-                                                       const {
-  if (!TT.isOSDarwin() && !FullRegNames)
-    return nullptr;
-  if (RegNum < PPC::CR0EQ || RegNum > PPC::CR7UN)
-    return nullptr;
-  const char *CRBits[] = {
-    "lt", "gt", "eq", "un",
-    "4*cr1+lt", "4*cr1+gt", "4*cr1+eq", "4*cr1+un",
-    "4*cr2+lt", "4*cr2+gt", "4*cr2+eq", "4*cr2+un",
-    "4*cr3+lt", "4*cr3+gt", "4*cr3+eq", "4*cr3+un",
-    "4*cr4+lt", "4*cr4+gt", "4*cr4+eq", "4*cr4+un",
-    "4*cr5+lt", "4*cr5+gt", "4*cr5+eq", "4*cr5+un",
-    "4*cr6+lt", "4*cr6+gt", "4*cr6+eq", "4*cr6+un",
-    "4*cr7+lt", "4*cr7+gt", "4*cr7+eq", "4*cr7+un"
-  };
-  return CRBits[RegEncoding];
-}
-
-// showRegistersWithPrefix - This method determines whether registers
-// should be number-only or include the prefix.
-bool PPCInstPrinter::showRegistersWithPrefix() const {
-  if (TT.getOS() == Triple::AIX)
-    return false;
-  return TT.isOSDarwin() || FullRegNamesWithPercent || FullRegNames;
-}
-
-void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                  raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    unsigned Reg = Op.getReg();
-    if (!ShowVSRNumsAsVR)
-      Reg = PPCInstrInfo::getRegNumForOperand(MII.get(MI->getOpcode()),
-                                              Reg, OpNo);
-
-    const char *RegName;
-    RegName = getVerboseConditionRegName(Reg, MRI.getEncodingValue(Reg));
-    if (RegName == nullptr)
-     RegName = getRegisterName(Reg);
-    if (showRegistersWithPercentPrefix(RegName))
-      O << "%";
-    if (!showRegistersWithPrefix())
-      RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
-
-    O << RegName;
-    return;
-  }
-
-  if (Op.isImm()) {
-    O << Op.getImm();
-    return;
-  }
-
-  assert(Op.isExpr() && "unknown operand kind in printOperand");
-  Op.getExpr()->print(O, &MAI);
-}
-
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
deleted file mode 100644
index 351ccefa2da2..000000000000
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ /dev/null
@@ -1,77 +0,0 @@
-//===- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an PPC MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H
-#define LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H
-
-#include "llvm/ADT/Triple.h"
-#include "llvm/MC/MCInstPrinter.h"
-
-namespace llvm {
-
-class PPCInstPrinter : public MCInstPrinter {
-  Triple TT;
-private:
-  bool showRegistersWithPercentPrefix(const char *RegName) const;
-  bool showRegistersWithPrefix() const;
-  const char *getVerboseConditionRegName(unsigned RegNum,
-                                         unsigned RegEncoding) const;
-
-public:
-  PPCInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                 const MCRegisterInfo &MRI, Triple T)
-    : MCInstPrinter(MAI, MII, MRI), TT(T) {}
-
-  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
-
-  // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
-  static const char *getRegisterName(unsigned RegNo);
-
-  bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
-  void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
-                               unsigned PrintMethodIdx,
-                               raw_ostream &OS);
-
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printPredicateOperand(const MCInst *MI, unsigned OpNo,
-                             raw_ostream &O, const char *Modifier = nullptr);
-  void printATBitsAsHint(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-
-  void printU1ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printU3ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printU7ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printU10ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printU12ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printAbsBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printTLSCall(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-
-  void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-
-  void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-};
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index a405dd70c307..8778e916f7e4 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -1,9 +1,8 @@
 //===-- PPCAsmBackend.cpp - PPC Assembler Backend -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -29,6 +28,7 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
   switch (Kind) {
   default:
     llvm_unreachable("Unknown fixup kind!");
+  case FK_NONE:
   case FK_Data_1:
   case FK_Data_2:
   case FK_Data_4:
@@ -52,6 +52,8 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
   switch (Kind) {
   default:
     llvm_unreachable("Unknown fixup kind!");
+  case FK_NONE:
+    return 0;
   case FK_Data_1:
     return 1;
   case FK_Data_2:
@@ -74,10 +76,12 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
 namespace {
 
 class PPCAsmBackend : public MCAsmBackend {
-  const Target &TheTarget;
+protected:
+  Triple TT;
 public:
-  PPCAsmBackend(const Target &T, support::endianness Endian)
-      : MCAsmBackend(Endian), TheTarget(T) {}
+  PPCAsmBackend(const Target &T, const Triple &TT)
+      : MCAsmBackend(TT.isLittleEndian() ? support::little : support::big),
+        TT(TT) {}
 
   unsigned getNumFixupKinds() const override {
     return PPC::NumTargetFixupKinds;
@@ -136,9 +140,11 @@ public:
 
   bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
                              const MCValue &Target) override {
-    switch ((PPC::Fixups)Fixup.getKind()) {
+    switch ((unsigned)Fixup.getKind()) {
     default:
       return false;
+    case FK_NONE:
+      return true;
     case PPC::fixup_ppc_br24:
     case PPC::fixup_ppc_br24abs:
       // If the target symbol has a local entry point we must not attempt
@@ -187,59 +193,76 @@ public:
 
     return true;
   }
-
-  unsigned getPointerSize() const {
-    StringRef Name = TheTarget.getName();
-    if (Name == "ppc64" || Name == "ppc64le") return 8;
-    assert(Name == "ppc32" && "Unknown target name!");
-    return 4;
-  }
 };
 } // end anonymous namespace
 
 
 // FIXME: This should be in a separate file.
 namespace {
-  class DarwinPPCAsmBackend : public PPCAsmBackend {
-  public:
-    DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, support::big) { }
-
-    std::unique_ptr<MCObjectTargetWriter>
-    createObjectTargetWriter() const override {
-      bool is64 = getPointerSize() == 8;
-      return createPPCMachObjectWriter(
-          /*Is64Bit=*/is64,
-          (is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC),
-          MachO::CPU_SUBTYPE_POWERPC_ALL);
-    }
-  };
-
-  class ELFPPCAsmBackend : public PPCAsmBackend {
-    uint8_t OSABI;
-  public:
-    ELFPPCAsmBackend(const Target &T, support::endianness Endian,
-                     uint8_t OSABI)
-        : PPCAsmBackend(T, Endian), OSABI(OSABI) {}
-
-    std::unique_ptr<MCObjectTargetWriter>
-    createObjectTargetWriter() const override {
-      bool is64 = getPointerSize() == 8;
-      return createPPCELFObjectWriter(is64, OSABI);
-    }
-  };
+
+class DarwinPPCAsmBackend : public PPCAsmBackend {
+public:
+  DarwinPPCAsmBackend(const Target &T, const Triple &TT)
+      : PPCAsmBackend(T, TT) {}
+
+  std::unique_ptr<MCObjectTargetWriter>
+  createObjectTargetWriter() const override {
+    bool Is64 = TT.isPPC64();
+    return createPPCMachObjectWriter(
+        /*Is64Bit=*/Is64,
+        (Is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC),
+        MachO::CPU_SUBTYPE_POWERPC_ALL);
+  }
+};
+
+class ELFPPCAsmBackend : public PPCAsmBackend {
+public:
+  ELFPPCAsmBackend(const Target &T, const Triple &TT) : PPCAsmBackend(T, TT) {}
+
+  std::unique_ptr<MCObjectTargetWriter>
+  createObjectTargetWriter() const override {
+    uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
+    bool Is64 = TT.isPPC64();
+    return createPPCELFObjectWriter(Is64, OSABI);
+  }
+
+  Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
+};
+
+class XCOFFPPCAsmBackend : public PPCAsmBackend {
+public:
+  XCOFFPPCAsmBackend(const Target &T, const Triple &TT)
+      : PPCAsmBackend(T, TT) {}
+
+  std::unique_ptr<MCObjectTargetWriter>
+  createObjectTargetWriter() const override {
+    return createPPCXCOFFObjectWriter(TT.isArch64Bit());
+  }
+};
 
 } // end anonymous namespace
 
+Optional<MCFixupKind> ELFPPCAsmBackend::getFixupKind(StringRef Name) const {
+  if (TT.isPPC64()) {
+    if (Name == "R_PPC64_NONE")
+      return FK_NONE;
+  } else {
+    if (Name == "R_PPC_NONE")
+      return FK_NONE;
+  }
+  return MCAsmBackend::getFixupKind(Name);
+}
+
 MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
                                         const MCSubtargetInfo &STI,
                                         const MCRegisterInfo &MRI,
                                         const MCTargetOptions &Options) {
   const Triple &TT = STI.getTargetTriple();
   if (TT.isOSDarwin())
-    return new DarwinPPCAsmBackend(T);
+    return new DarwinPPCAsmBackend(T, TT);
+
+  if (TT.isOSBinFormatXCOFF())
+    return new XCOFFPPCAsmBackend(T, TT);
 
-  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
-  bool IsLittleEndian = TT.getArch() == Triple::ppc64le;
-  return new ELFPPCAsmBackend(
-      T, IsLittleEndian ? support::little : support::big, OSABI);
+  return new ELFPPCAsmBackend(T, TT);
 }
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index a3caf9a7a5ee..042ddf48d5df 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===-- PPCELFObjectWriter.cpp - PPC ELF Writer ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -134,6 +133,9 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
   } else {
     switch ((unsigned)Fixup.getKind()) {
       default: llvm_unreachable("invalid fixup kind!");
+    case FK_NONE:
+      Type = ELF::R_PPC_NONE;
+      break;
     case PPC::fixup_ppc_br24abs:
       Type = ELF::R_PPC_ADDR24;
       break;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index dce443997ea5..845489788c86 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -1,9 +1,8 @@
 //===-- PPCFixupKinds.h - PPC Specific Fixup Entries ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
new file mode 100644
index 000000000000..0e64ae55ab1c
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
@@ -0,0 +1,543 @@
+//===-- PPCInstPrinter.cpp - Convert PPC MCInst to assembly syntax --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an PPC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCInstPrinter.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+// FIXME: Once the integrated assembler supports full register names, tie this
+// to the verbose-asm setting.
+static cl::opt<bool>
+FullRegNames("ppc-asm-full-reg-names", cl::Hidden, cl::init(false),
+             cl::desc("Use full register names when printing assembly"));
+
+// Useful for testing purposes. Prints vs{31-63} as v{0-31} respectively.
+static cl::opt<bool>
+ShowVSRNumsAsVR("ppc-vsr-nums-as-vr", cl::Hidden, cl::init(false),
+             cl::desc("Prints full register names with vs{31-63} as v{0-31}"));
+
+// Prints full register names with percent symbol.
+static cl::opt<bool>
+FullRegNamesWithPercent("ppc-reg-with-percent-prefix", cl::Hidden,
+                        cl::init(false),
+                        cl::desc("Prints full register names with percent"));
+
+#define PRINT_ALIAS_INSTR
+#include "PPCGenAsmWriter.inc"
+
+void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  const char *RegName = getRegisterName(RegNo);
+  if (RegName[0] == 'q' /* QPX */) {
+    // The system toolchain on the BG/Q does not understand QPX register names
+    // in .cfi_* directives, so print the name of the floating-point
+    // subregister instead.
+    std::string RN(RegName);
+
+    RN[0] = 'f';
+    OS << RN;
+
+    return;
+  }
+
+  OS << RegName;
+}
+
+void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                               StringRef Annot, const MCSubtargetInfo &STI) {
+  // Check for slwi/srwi mnemonics.
+  if (MI->getOpcode() == PPC::RLWINM) {
+    unsigned char SH = MI->getOperand(2).getImm();
+    unsigned char MB = MI->getOperand(3).getImm();
+    unsigned char ME = MI->getOperand(4).getImm();
+    bool useSubstituteMnemonic = false;
+    if (SH <= 31 && MB == 0 && ME == (31-SH)) {
+      O << "\tslwi "; useSubstituteMnemonic = true;
+    }
+    if (SH <= 31 && MB == (32-SH) && ME == 31) {
+      O << "\tsrwi "; useSubstituteMnemonic = true;
+      SH = 32-SH;
+    }
+    if (useSubstituteMnemonic) {
+      printOperand(MI, 0, O);
+      O << ", ";
+      printOperand(MI, 1, O);
+      O << ", " << (unsigned int)SH;
+
+      printAnnotation(O, Annot);
+      return;
+    }
+  }
+
+  if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) &&
+      MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
+    O << "\tmr ";
+    printOperand(MI, 0, O);
+    O << ", ";
+    printOperand(MI, 1, O);
+    printAnnotation(O, Annot);
+    return;
+  }
+
+  if (MI->getOpcode() == PPC::RLDICR ||
+      MI->getOpcode() == PPC::RLDICR_32) {
+    unsigned char SH = MI->getOperand(2).getImm();
+    unsigned char ME = MI->getOperand(3).getImm();
+    // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
+    if (63-SH == ME) {
+      O << "\tsldi ";
+      printOperand(MI, 0, O);
+      O << ", ";
+      printOperand(MI, 1, O);
+      O << ", " << (unsigned int)SH;
+      printAnnotation(O, Annot);
+      return;
+    }
+  }
+
+  // dcbt[st] is printed manually here because:
+  //  1. The assembly syntax is different between embedded and server targets
+  //  2. We must print the short mnemonics for TH == 0 because the
+  //     embedded/server syntax default will not be stable across assemblers
+  //  The syntax for dcbt is:
+  //    dcbt ra, rb, th [server]
+  //    dcbt th, ra, rb [embedded]
+  //  where th can be omitted when it is 0. dcbtst is the same.
+  if (MI->getOpcode() == PPC::DCBT || MI->getOpcode() == PPC::DCBTST) {
+    unsigned char TH = MI->getOperand(0).getImm();
+    O << "\tdcbt";
+    if (MI->getOpcode() == PPC::DCBTST)
+      O << "st";
+    if (TH == 16)
+      O << "t";
+    O << " ";
+
+    bool IsBookE = STI.getFeatureBits()[PPC::FeatureBookE];
+    if (IsBookE && TH != 0 && TH != 16)
+      O << (unsigned int) TH << ", ";
+
+    printOperand(MI, 1, O);
+    O << ", ";
+    printOperand(MI, 2, O);
+
+    if (!IsBookE && TH != 0 && TH != 16)
+      O << ", " << (unsigned int) TH;
+
+    printAnnotation(O, Annot);
+    return;
+  }
+
+  if (MI->getOpcode() == PPC::DCBF) {
+    unsigned char L = MI->getOperand(0).getImm();
+    if (!L || L == 1 || L == 3) {
+      O << "\tdcbf";
+      if (L == 1 || L == 3)
+        O << "l";
+      if (L == 3)
+        O << "p";
+      O << " ";
+
+      printOperand(MI, 1, O);
+      O << ", ";
+      printOperand(MI, 2, O);
+
+      printAnnotation(O, Annot);
+      return;
+    }
+  }
+
+  if (!printAliasInstr(MI, O))
+    printInstruction(MI, O);
+  printAnnotation(O, Annot);
+}
+
+
+void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O,
+                                           const char *Modifier) {
+  unsigned Code = MI->getOperand(OpNo).getImm();
+
+  if (StringRef(Modifier) == "cc") {
+    switch ((PPC::Predicate)Code) {
+    case PPC::PRED_LT_MINUS:
+    case PPC::PRED_LT_PLUS:
+    case PPC::PRED_LT:
+      O << "lt";
+      return;
+    case PPC::PRED_LE_MINUS:
+    case PPC::PRED_LE_PLUS:
+    case PPC::PRED_LE:
+      O << "le";
+      return;
+    case PPC::PRED_EQ_MINUS:
+    case PPC::PRED_EQ_PLUS:
+    case PPC::PRED_EQ:
+      O << "eq";
+      return;
+    case PPC::PRED_GE_MINUS:
+    case PPC::PRED_GE_PLUS:
+    case PPC::PRED_GE:
+      O << "ge";
+      return;
+    case PPC::PRED_GT_MINUS:
+    case PPC::PRED_GT_PLUS:
+    case PPC::PRED_GT:
+      O << "gt";
+      return;
+    case PPC::PRED_NE_MINUS:
+    case PPC::PRED_NE_PLUS:
+    case PPC::PRED_NE:
+      O << "ne";
+      return;
+    case PPC::PRED_UN_MINUS:
+    case PPC::PRED_UN_PLUS:
+    case PPC::PRED_UN:
+      O << "un";
+      return;
+    case PPC::PRED_NU_MINUS:
+    case PPC::PRED_NU_PLUS:
+    case PPC::PRED_NU:
+      O << "nu";
+      return;
+    case PPC::PRED_BIT_SET:
+    case PPC::PRED_BIT_UNSET:
+      llvm_unreachable("Invalid use of bit predicate code");
+    }
+    llvm_unreachable("Invalid predicate code");
+  }
+
+  if (StringRef(Modifier) == "pm") {
+    switch ((PPC::Predicate)Code) {
+    case PPC::PRED_LT:
+    case PPC::PRED_LE:
+    case PPC::PRED_EQ:
+    case PPC::PRED_GE:
+    case PPC::PRED_GT:
+    case PPC::PRED_NE:
+    case PPC::PRED_UN:
+    case PPC::PRED_NU:
+      return;
+    case PPC::PRED_LT_MINUS:
+    case PPC::PRED_LE_MINUS:
+    case PPC::PRED_EQ_MINUS:
+    case PPC::PRED_GE_MINUS:
+    case PPC::PRED_GT_MINUS:
+    case PPC::PRED_NE_MINUS:
+    case PPC::PRED_UN_MINUS:
+    case PPC::PRED_NU_MINUS:
+      O << "-";
+      return;
+    case PPC::PRED_LT_PLUS:
+    case PPC::PRED_LE_PLUS:
+    case PPC::PRED_EQ_PLUS:
+    case PPC::PRED_GE_PLUS:
+    case PPC::PRED_GT_PLUS:
+    case PPC::PRED_NE_PLUS:
+    case PPC::PRED_UN_PLUS:
+    case PPC::PRED_NU_PLUS:
+      O << "+";
+      return;
+    case PPC::PRED_BIT_SET:
+    case PPC::PRED_BIT_UNSET:
+      llvm_unreachable("Invalid use of bit predicate code");
+    }
+    llvm_unreachable("Invalid predicate code");
+  }
+
+  assert(StringRef(Modifier) == "reg" &&
+         "Need to specify 'cc', 'pm' or 'reg' as predicate op modifier!");
+  printOperand(MI, OpNo+1, O);
+}
+
+void PPCInstPrinter::printATBitsAsHint(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned Code = MI->getOperand(OpNo).getImm();
+  if (Code == 2)
+    O << "-";
+  else if (Code == 3)
+    O << "+";
+}
+
+void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned int Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 1 && "Invalid u1imm argument!");
+  O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned int Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 3 && "Invalid u2imm argument!");
+  O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned int Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 8 && "Invalid u3imm argument!");
+  O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned int Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 15 && "Invalid u4imm argument!");
+  O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  int Value = MI->getOperand(OpNo).getImm();
+  Value = SignExtend32<5>(Value);
+  O << (int)Value;
+}
+
+void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned int Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 31 && "Invalid u5imm argument!");
+  O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned int Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 63 && "Invalid u6imm argument!");
+  O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printU7ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned int Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 127 && "Invalid u7imm argument!");
+  O << (unsigned int)Value;
+}
+
+// Operands of BUILD_VECTOR are signed and we use this to print operands
+// of XXSPLTIB which are unsigned. So we simply truncate to 8 bits and
+// print as unsigned.
+void PPCInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned char Value = MI->getOperand(OpNo).getImm();
+  O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printU10ImmOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  unsigned short Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 1023 && "Invalid u10imm argument!");
+  O << (unsigned short)Value;
+}
+
+void PPCInstPrinter::printU12ImmOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  unsigned short Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 4095 && "Invalid u12imm argument!");
+  O << (unsigned short)Value;
+}
+
+void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  if (MI->getOperand(OpNo).isImm())
+    O << (short)MI->getOperand(OpNo).getImm();
+  else
+    printOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  if (MI->getOperand(OpNo).isImm())
+    O << (unsigned short)MI->getOperand(OpNo).getImm();
+  else
+    printOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  if (!MI->getOperand(OpNo).isImm())
+    return printOperand(MI, OpNo, O);
+
+  // Branches can take an immediate operand.  This is used by the branch
+  // selection pass to print .+8, an eight byte displacement from the PC.
+  O << ".";
+  int32_t Imm = SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2);
+  if (Imm >= 0)
+    O << "+";
+  O << Imm;
+}
+
+void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O) {
+  if (!MI->getOperand(OpNo).isImm())
+    return printOperand(MI, OpNo, O);
+
+  O << SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2);
+}
+
+
+void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo,
+                                 raw_ostream &O) {
+  unsigned CCReg = MI->getOperand(OpNo).getReg();
+  unsigned RegNo;
+  switch (CCReg) {
+  default: llvm_unreachable("Unknown CR register");
+  case PPC::CR0: RegNo = 0; break;
+  case PPC::CR1: RegNo = 1; break;
+  case PPC::CR2: RegNo = 2; break;
+  case PPC::CR3: RegNo = 3; break;
+  case PPC::CR4: RegNo = 4; break;
+  case PPC::CR5: RegNo = 5; break;
+  case PPC::CR6: RegNo = 6; break;
+  case PPC::CR7: RegNo = 7; break;
+  }
+  O << (0x80 >> RegNo);
+}
+
+void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  printS16ImmOperand(MI, OpNo, O);
+  O << '(';
+  if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
+    O << "0";
+  else
+    printOperand(MI, OpNo+1, O);
+  O << ')';
+}
+
+void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  // When used as the base register, r0 reads constant zero rather than
+  // the value contained in the register.  For this reason, the darwin
+  // assembler requires that we print r0 as 0 (no r) when used as the base.
+  if (MI->getOperand(OpNo).getReg() == PPC::R0)
+    O << "0";
+  else
+    printOperand(MI, OpNo, O);
+  O << ", ";
+  printOperand(MI, OpNo+1, O);
+}
+
+void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  // On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must
+  // come at the _end_ of the expression.
+  const MCOperand &Op = MI->getOperand(OpNo);
+  const MCSymbolRefExpr *RefExp = nullptr;
+  const MCConstantExpr *ConstExp = nullptr;
+  if (const MCBinaryExpr *BinExpr = dyn_cast<MCBinaryExpr>(Op.getExpr())) {
+    RefExp = cast<MCSymbolRefExpr>(BinExpr->getLHS());
+    ConstExp = cast<MCConstantExpr>(BinExpr->getRHS());
+  } else
+    RefExp = cast<MCSymbolRefExpr>(Op.getExpr());
+
+  O << RefExp->getSymbol().getName();
+  O << '(';
+  printOperand(MI, OpNo+1, O);
+  O << ')';
+  if (RefExp->getKind() != MCSymbolRefExpr::VK_None)
+    O << '@' << MCSymbolRefExpr::getVariantKindName(RefExp->getKind());
+  if (ConstExp != nullptr)
+    O << '+' << ConstExp->getValue();
+}
+
+/// showRegistersWithPercentPrefix - Check if this register name should be
+/// printed with a percentage symbol as prefix.
+bool PPCInstPrinter::showRegistersWithPercentPrefix(const char *RegName) const {
+  if (!FullRegNamesWithPercent || TT.isOSDarwin() || TT.getOS() == Triple::AIX)
+    return false;
+
+  switch (RegName[0]) {
+  default:
+    return false;
+  case 'r':
+  case 'f':
+  case 'q':
+  case 'v':
+  case 'c':
+    return true;
+  }
+}
+
+/// getVerboseConditionalRegName - This method expands the condition register
+/// when requested explicitly or targetting Darwin.
+const char *PPCInstPrinter::getVerboseConditionRegName(unsigned RegNum,
+                                                       unsigned RegEncoding)
+                                                       const {
+  if (!TT.isOSDarwin() && !FullRegNames)
+    return nullptr;
+  if (RegNum < PPC::CR0EQ || RegNum > PPC::CR7UN)
+    return nullptr;
+  const char *CRBits[] = {
+    "lt", "gt", "eq", "un",
+    "4*cr1+lt", "4*cr1+gt", "4*cr1+eq", "4*cr1+un",
+    "4*cr2+lt", "4*cr2+gt", "4*cr2+eq", "4*cr2+un",
+    "4*cr3+lt", "4*cr3+gt", "4*cr3+eq", "4*cr3+un",
+    "4*cr4+lt", "4*cr4+gt", "4*cr4+eq", "4*cr4+un",
+    "4*cr5+lt", "4*cr5+gt", "4*cr5+eq", "4*cr5+un",
+    "4*cr6+lt", "4*cr6+gt", "4*cr6+eq", "4*cr6+un",
+    "4*cr7+lt", "4*cr7+gt", "4*cr7+eq", "4*cr7+un"
+  };
+  return CRBits[RegEncoding];
+}
+
+// showRegistersWithPrefix - This method determines whether registers
+// should be number-only or include the prefix.
+bool PPCInstPrinter::showRegistersWithPrefix() const {
+  if (TT.getOS() == Triple::AIX)
+    return false;
+  return TT.isOSDarwin() || FullRegNamesWithPercent || FullRegNames;
+}
+
+void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    unsigned Reg = Op.getReg();
+    if (!ShowVSRNumsAsVR)
+      Reg = PPCInstrInfo::getRegNumForOperand(MII.get(MI->getOpcode()),
+                                              Reg, OpNo);
+
+    const char *RegName;
+    RegName = getVerboseConditionRegName(Reg, MRI.getEncodingValue(Reg));
+    if (RegName == nullptr)
+     RegName = getRegisterName(Reg);
+    if (showRegistersWithPercentPrefix(RegName))
+      O << "%";
+    if (!showRegistersWithPrefix())
+      RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
+
+    O << RegName;
+    return;
+  }
+
+  if (Op.isImm()) {
+    O << Op.getImm();
+    return;
+  }
+
+  assert(Op.isExpr() && "unknown operand kind in printOperand");
+  Op.getExpr()->print(O, &MAI);
+}
+
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h b/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
new file mode 100644
index 000000000000..725ae2a7081b
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
@@ -0,0 +1,76 @@
+//===- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an PPC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCINSTPRINTER_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCINSTPRINTER_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class PPCInstPrinter : public MCInstPrinter {
+  Triple TT;
+private:
+  bool showRegistersWithPercentPrefix(const char *RegName) const;
+  bool showRegistersWithPrefix() const;
+  const char *getVerboseConditionRegName(unsigned RegNum,
+                                         unsigned RegEncoding) const;
+
+public:
+  PPCInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                 const MCRegisterInfo &MRI, Triple T)
+    : MCInstPrinter(MAI, MII, MRI), TT(T) {}
+
+  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+
+  bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
+  void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+                               unsigned PrintMethodIdx,
+                               raw_ostream &OS);
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printPredicateOperand(const MCInst *MI, unsigned OpNo,
+                             raw_ostream &O, const char *Modifier = nullptr);
+  void printATBitsAsHint(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  void printU1ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU3ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU7ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU10ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU12ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printAbsBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printTLSCall(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index fb7bf23509c7..5f0005ea1d7b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -1,9 +1,8 @@
 //===-- PPCMCAsmInfo.cpp - PPC asm properties -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -82,3 +81,9 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
   UseIntegratedAssembler = true;
 }
 
+void PPCXCOFFMCAsmInfo::anchor() {}
+
+PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) {
+  assert(!IsLittleEndian && "Little-endian XCOFF not supported.");
+  CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4;
+}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index e252ac944d40..42cb62ad26a4 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -1,13 +1,12 @@
 //===-- PPCMCAsmInfo.h - PPC asm properties --------------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the declaration of the MCAsmInfoDarwin class.
+// This file contains the declarations of the PowerPC MCAsmInfo classes.
 //
 //===----------------------------------------------------------------------===//
 
@@ -16,6 +15,7 @@
 
 #include "llvm/MC/MCAsmInfoDarwin.h"
 #include "llvm/MC/MCAsmInfoELF.h"
+#include "llvm/MC/MCAsmInfoXCOFF.h"
 
 namespace llvm {
 class Triple;
@@ -34,6 +34,13 @@ public:
   explicit PPCELFMCAsmInfo(bool is64Bit, const Triple &);
 };
 
+class PPCXCOFFMCAsmInfo : public MCAsmInfoXCOFF {
+  virtual void anchor();
+
+public:
+  explicit PPCXCOFFMCAsmInfo(bool is64Bit, const Triple &);
+};
+
 } // namespace llvm
 
 #endif
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 8c15ade6f9c4..676efc500455 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
 //===-- PPCMCCodeEmitter.cpp - Convert PPC code to machine code -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -217,7 +216,7 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
   Fixups.push_back(MCFixup::create(0, MO.getExpr(),
                                    (MCFixupKind)PPC::fixup_ppc_nofixup));
   const Triple &TT = STI.getTargetTriple();
-  bool isPPC64 = TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le;
+  bool isPPC64 = TT.isPPC64();
   return CTX.getRegisterInfo()->getEncodingValue(isPPC64 ? PPC::X13 : PPC::R2);
 }
 
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
index a4bcff4b9450..1324faa12553 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
@@ -1,9 +1,8 @@
 //===-- PPCMCCodeEmitter.h - Convert PPC code to machine code -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -99,9 +98,10 @@ public:
   unsigned getInstSizeInBytes(const MCInst &MI) const;
 
 private:
-  uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
-  void verifyInstructionPredicates(const MCInst &MI,
-                                   uint64_t AvailableFeatures) const;
+  FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+  void
+  verifyInstructionPredicates(const MCInst &MI,
+                              const FeatureBitset &AvailableFeatures) const;
 };
 
 } // namespace llvm
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index 32e6a0bdd65f..d467f5c4a439 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -1,9 +1,8 @@
 //===-- PPCMCExpr.cpp - PPC specific MC expression classes ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index 8bb4791d13dd..449e2c34f74d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -1,9 +1,8 @@
 //===-- PPCMCExpr.h - PPC specific MC expression classes --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index a1e4e07b25af..90c3c8d20edb 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -1,9 +1,8 @@
 //===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,9 +11,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/PPCMCTargetDesc.h"
-#include "InstPrinter/PPCInstPrinter.h"
+#include "MCTargetDesc/PPCInstPrinter.h"
 #include "MCTargetDesc/PPCMCAsmInfo.h"
 #include "PPCTargetStreamer.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/BinaryFormat/ELF.h"
@@ -47,9 +48,9 @@ using namespace llvm;
 #define GET_REGINFO_MC_DESC
 #include "PPCGenRegisterInfo.inc"
 
-// Pin the vtable to this file.
 PPCTargetStreamer::PPCTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
 
+// Pin the vtable to this file.
 PPCTargetStreamer::~PPCTargetStreamer() = default;
 
 static MCInstrInfo *createPPCMCInstrInfo() {
@@ -82,6 +83,8 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI,
   MCAsmInfo *MAI;
   if (TheTriple.isOSDarwin())
     MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple);
+  else if (TheTriple.isOSBinFormatXCOFF())
+    MAI = new PPCXCOFFMCAsmInfo(isPPC64, TheTriple);
   else
     MAI = new PPCELFMCAsmInfo(isPPC64, TheTriple);
 
@@ -182,16 +185,33 @@ public:
 
   void emitAssignment(MCSymbol *S, const MCExpr *Value) override {
     auto *Symbol = cast<MCSymbolELF>(S);
+
     // When encoding an assignment to set symbol A to symbol B, also copy
     // the st_other bits encoding the local entry point offset.
-    if (Value->getKind() != MCExpr::SymbolRef)
-      return;
-    const auto &RhsSym = cast<MCSymbolELF>(
-        static_cast<const MCSymbolRefExpr *>(Value)->getSymbol());
-    unsigned Other = Symbol->getOther();
+    if (copyLocalEntry(Symbol, Value))
+      UpdateOther.insert(Symbol);
+    else
+      UpdateOther.erase(Symbol);
+  }
+
+  void finish() override {
+    for (auto *Sym : UpdateOther)
+      copyLocalEntry(Sym, Sym->getVariableValue());
+  }
+
+private:
+  SmallPtrSet<MCSymbolELF *, 32> UpdateOther;
+
+  bool copyLocalEntry(MCSymbolELF *D, const MCExpr *S) {
+    auto *Ref = dyn_cast<const MCSymbolRefExpr>(S);
+    if (!Ref)
+      return false;
+    const auto &RhsSym = cast<MCSymbolELF>(Ref->getSymbol());
+    unsigned Other = D->getOther();
     Other &= ~ELF::STO_PPC64_LOCAL_MASK;
     Other |= RhsSym.getOther() & ELF::STO_PPC64_LOCAL_MASK;
-    Symbol->setOther(Other);
+    D->setOther(Other);
+    return true;
   }
 };
 
@@ -217,6 +237,27 @@ public:
   }
 };
 
+class PPCTargetXCOFFStreamer : public PPCTargetStreamer {
+public:
+  PPCTargetXCOFFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
+
+  void emitTCEntry(const MCSymbol &S) override {
+    report_fatal_error("TOC entries not supported yet.");
+  }
+
+  void emitMachine(StringRef CPU) override {
+    llvm_unreachable("Machine pseudo-ops are invalid for XCOFF.");
+  }
+
+  void emitAbiVersion(int AbiVersion) override {
+    llvm_unreachable("ABI-version pseudo-ops are invalid for XCOFF.");
+  }
+
+  void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
+    llvm_unreachable("Local-entry pseudo-ops are invalid for XCOFF.");
+  }
+};
+
 } // end anonymous namespace
 
 static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
@@ -231,6 +272,8 @@ createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
   const Triple &TT = STI.getTargetTriple();
   if (TT.isOSBinFormatELF())
     return new PPCTargetELFStreamer(S);
+  if (TT.isOSBinFormatXCOFF())
+    return new PPCTargetXCOFFStreamer(S);
   return new PPCTargetMachOStreamer(S);
 }
 
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index d6e450cba0d7..74b67bd2e928 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -1,9 +1,8 @@
 //===-- PPCMCTargetDesc.h - PowerPC Target Descriptions ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -37,10 +36,6 @@ class Triple;
 class StringRef;
 class raw_pwrite_stream;
 
-Target &getThePPC32Target();
-Target &getThePPC64Target();
-Target &getThePPC64LETarget();
-
 MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
                                       const MCRegisterInfo &MRI,
                                       MCContext &Ctx);
@@ -56,6 +51,9 @@ std::unique_ptr<MCObjectTargetWriter> createPPCELFObjectWriter(bool Is64Bit,
 std::unique_ptr<MCObjectTargetWriter>
 createPPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype);
 
+/// Construct a PPC XCOFF object writer.
+std::unique_ptr<MCObjectTargetWriter> createPPCXCOFFObjectWriter(bool Is64Bit);
+
 /// Returns true iff Val consists of one contiguous run of 1s with any number of
 /// 0s on either side.  The 1s are allowed to wrap from LSB to MSB, so
 /// 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.  0x0F0F0000 is not,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index ff6cf584da23..4cf7fd15fa75 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===-- PPCMachObjectWriter.cpp - PPC Mach-O Writer -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
index c2987b641c04..284e52c298a2 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
@@ -1,9 +1,8 @@
 //===-- PPCPredicates.cpp - PPC Branch Predicate Information --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index 481ba3f09cc7..d686a8ea2a22 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -1,9 +1,8 @@
 //===-- PPCPredicates.h - PPC Branch Predicate Information ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
new file mode 100644
index 000000000000..9c661286d455
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
@@ -0,0 +1,29 @@
+//===-- PPCXCOFFObjectWriter.cpp - PowerPC XCOFF Writer -------------------===//
+//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCTargetDesc.h"
+#include "llvm/MC/MCXCOFFObjectWriter.h"
+
+using namespace llvm;
+
+namespace {
+class PPCXCOFFObjectWriter : public MCXCOFFObjectTargetWriter {
+
+public:
+  PPCXCOFFObjectWriter(bool Is64Bit);
+};
+} // end anonymous namespace
+
+PPCXCOFFObjectWriter::PPCXCOFFObjectWriter(bool Is64Bit)
+    : MCXCOFFObjectTargetWriter(Is64Bit) {}
+
+std::unique_ptr<MCObjectTargetWriter>
+llvm::createPPCXCOFFObjectWriter(bool Is64Bit) {
+  return llvm::make_unique<PPCXCOFFObjectWriter>(Is64Bit);
+}
diff --git a/lib/Target/PowerPC/P9InstrResources.td b/lib/Target/PowerPC/P9InstrResources.td
index 17c37964c562..2a10322d3f49 100644
--- a/lib/Target/PowerPC/P9InstrResources.td
+++ b/lib/Target/PowerPC/P9InstrResources.td
@@ -1,22 +1,21 @@
-//===- P9InstrResources.td - P9 Instruction Resource Defs  -*- tablegen -*-===//
+//===- P9InstrResources.td - P9 Instruction Resource Defs  -*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines the resources required by P9 instructions. This is part
-// P9 processor model used for instruction scheduling. This file should contain
-// all of the instructions that may be used on Power 9. This is not just
-// instructions that are new on Power 9 but also instructions that were
+// This file defines the resources required by P9 instructions. This is part of
+// the P9 processor model used for instruction scheduling. This file should
+// contain all the instructions that may be used on Power 9. This is not
+// just instructions that are new on Power 9 but also instructions that were
 // available on earlier architectures and are still used in Power 9.
 //
 // The makeup of the P9 CPU is modeled as follows:
 //   - Each CPU is made up of two superslices.
 //   - Each superslice is made up of two slices. Therefore, there are 4 slices
-//      for each CPU.
+//   for each CPU.
 //   - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
 //   - Each CPU has:
 //     - One CY (Crypto) unit P9_CY_*
@@ -33,9 +32,8 @@
 
 // Two cycle ALU vector operation that uses an entire superslice.
 // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
-// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
-def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
+def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     (instregex "VADDU(B|H|W|D)M$"),
     (instregex "VAND(C)?$"),
@@ -85,9 +83,9 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
 )>;
 
 // Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
-// slingle slice. However, since it is Restricted it requires all 3 dispatches
+// single slice. However, since it is Restricted, it requires all 3 dispatches
 // (DISP) for that superslice.
-def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     (instregex "TABORT(D|W)C(I)?$"),
     (instregex "MTFSB(0|1)$"),
@@ -103,7 +101,7 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
 )>;
 
 // Standard Dispatch ALU operation for 3 cycles. Only one slice used.
-def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
       (instrs
     (instregex "XSMAX(C|J)?DP$"),
     (instregex "XSMIN(C|J)?DP$"),
@@ -120,11 +118,11 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
 )>;
 
 // Standard Dispatch ALU operation for 2 cycles. Only one slice used.
-def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
       (instrs
     (instregex "S(L|R)D$"),
     (instregex "SRAD(I)?$"),
-    (instregex "EXTSWSLI$"),
+    (instregex "EXTSWSLI_32_64$"),
     (instregex "MFV(S)?RD$"),
     (instregex "MTVSRD$"),
     (instregex "MTVSRW(A|Z)$"),
@@ -160,6 +158,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
     XSNEGDP,
     XSCPSGNDP,
     MFVSRWZ,
+    EXTSWSLI,
     SRADI_32,
     RLDIC,
     RFEBB,
@@ -171,9 +170,9 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
 )>;
 
 // Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
-//  slingle slice. However, since it is Restricted it requires all 3 dispatches
-//  (DISP) for that superslice.
-def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+// single slice. However, since it is Restricted, it requires all 3 dispatches
+// (DISP) for that superslice.
+def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     (instregex "RLDC(L|R)$"),
     (instregex "RLWIMI(8)?$"),
@@ -200,9 +199,8 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
 
 // Three cycle ALU vector operation that uses an entire superslice.
 // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
-// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
-def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
+def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     (instregex "M(T|F)VSCR$"),
     (instregex "VCMPNEZ(B|H|W)$"),
@@ -285,10 +283,9 @@ def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
 )>;
 
 // 7 cycle DP vector operation that uses an entire superslice.
-//  Uses both DP units (the even DPE and odd DPO units), two pipelines
-//  (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
-def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE,
+// EXECO) and all three dispatches (DISP) to the given superslice.
+def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     VADDFP,
     VCTSXS,
@@ -395,18 +392,17 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
     VSUMSWS
 )>;
 
-
 // 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
-//  dispatch units for the superslice.
-def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+// dispatch units for the superslice.
+def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
-    (instregex "MADD(HD|HDU|LD)$"),
+    (instregex "MADD(HD|HDU|LD|LD8)$"),
     (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?$")
 )>;
 
 // 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
-//  dispatch units for the superslice.
-def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+// dispatch units for the superslice.
+def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     FRSP,
     (instregex "FRI(N|P|Z|M)(D|S)$"),
@@ -448,26 +444,26 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
 )>;
 
 // 7 cycle Restricted DP operation and one 3 cycle ALU operation.
-// These operations can be done in parallel.
-//  The DP is restricted so we need a full 5 dispatches.
+// These operations can be done in parallel. The DP is restricted so we need a
+// full 4 dispatches.
 def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "FSEL(D|S)o$")
 )>;
 
 // 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
 def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "MUL(H|L)(D|W)(U)?o$")
 )>;
 
 // 7 cycle Restricted DP operation and one 3 cycle ALU operation.
-// These operations must be done sequentially.
-//  The DP is restricted so we need a full 5 dispatches.
+// These operations must be done sequentially.The DP is restricted so we need a
+// full 4 dispatches.
 def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "FRI(N|P|Z|M)(D|S)o$"),
     (instregex "FRE(S)?o$"),
@@ -483,8 +479,8 @@ def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
     FRSPo
 )>;
 
-// 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units.
-def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
+// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
+def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C],
       (instrs
     XSADDDP,
     XSADDSP,
@@ -520,9 +516,9 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
 )>;
 
 // Three Cycle PM operation. Only one PM unit per superslice so we use the whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
       (instrs
     (instregex "LVS(L|R)$"),
     (instregex "VSPLTIS(W|H|B)$"),
@@ -628,9 +624,9 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
 )>;
 
 // 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     BCDSRo,
     XSADDQP,
@@ -652,17 +648,17 @@ def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
 )>;
 
 // 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     BCDCTSQo
 )>;
 
 // 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     XSMADDQP,
     XSMADDQPO,
@@ -677,39 +673,39 @@ def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
 )>;
 
 // 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     BCDCFSQo
 )>;
 
 // 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     XSDIVQP,
     XSDIVQPO
 )>;
 
 // 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     XSSQRTQP,
     XSSQRTQPO
 )>;
 
 // 6 Cycle Load uses a single slice.
-def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C],
       (instrs
     (instregex "LXVL(L)?")
 )>;
 
 // 5 Cycle Load uses a single slice.
-def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C],
       (instrs
     (instregex "LVE(B|H|W)X$"),
     (instregex "LVX(L)?"),
@@ -728,7 +724,7 @@ def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
 )>;
 
 // 4 Cycle Load uses a single slice.
-def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C],
       (instrs
     (instregex "DCB(F|T|ST)(EP)?$"),
     (instregex "DCBZ(L)?(EP)?$"),
@@ -757,8 +753,8 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
 )>;
 
 // 4 Cycle Restricted load uses a single slice but the dispatch for the whole
-//  superslice.
-def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice.
+def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
       (instrs
     LFIWZX,
     LFDX,
@@ -768,7 +764,7 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
 // Cracked Load Instructions.
 // Load instructions that can be done in parallel.
 def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_PAIR_1C],
       (instrs
     SLBIA,
     SLBIE,
@@ -782,17 +778,26 @@ def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
 // Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
 // operations can be run in parallel.
 def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_PAIR_1C, DISP_PAIR_1C],
+      (instrs
+    (instregex "L(W|H)ZU(X)?(8)?$")
+)>;
+
+// Cracked TEND Instruction.
+// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
+// operations can be run in parallel.
+def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
+              DISP_1C, DISP_1C],
       (instrs
-    (instregex "L(W|H)ZU(X)?(8)?$"),
     TEND
 )>;
 
+
 // Cracked Store Instruction
 // Consecutive Store and ALU instructions. The store is restricted and requires
 // three dispatches.
 def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "ST(B|H|W|D)CX$")
 )>;
@@ -800,16 +805,16 @@ def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
 // Cracked Load Instruction.
 // Two consecutive load operations for a total of 8 cycles.
 def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C, DISP_1C],
       (instrs
     LDMX
 )>;
 
 // Cracked Load instruction.
 // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
-//  operations cannot be done at the same time and so their latencies are added.
+// operations cannot be done at the same time and so their latencies are added.
 def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C, DISP_1C],
       (instrs
     (instregex "LHA(X)?(8)?$"),
     (instregex "CP_PASTE(8)?o$"),
@@ -819,20 +824,19 @@ def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
 
 // Cracked Restricted Load instruction.
 // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
-//  operations cannot be done at the same time and so their latencies are added.
+// operations cannot be done at the same time and so their latencies are added.
 // Full 6 dispatches are required as this is both cracked and restricted.
 def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
     LFIWAX
 )>;
 
 // Cracked Load instruction.
 // Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
-//  operations cannot be done at the same time and so their latencies are added.
+// operations cannot be done at the same time and so their latencies are added.
 // Full 4 dispatches are required as this is a cracked instruction.
-def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
     LXSIWAX,
     LIWAX
@@ -844,7 +848,7 @@ def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
 // their latencies are added.
 // Full 6 dispatches are required as this is a restricted instruction.
 def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
     LFSX,
     LFS
@@ -852,10 +856,9 @@ def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
 
 // Cracked Load instruction.
 // Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
-//  operations cannot be done at the same time and so their latencies are added.
+// operations cannot be done at the same time and so their latencies are added.
 // Full 4 dispatches are required as this is a cracked instruction.
-def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
     LXSSP,
     LXSSPX,
@@ -866,7 +869,7 @@ def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C,
 // Cracked 3-Way Load Instruction
 // Load with two ALU operations that depend on each other
 def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C],
       (instrs
     (instregex "LHAU(X)?(8)?$"),
     LWAUX
@@ -874,12 +877,11 @@ def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
 
 // Cracked Load that requires the PM resource.
 // Since the Load and the PM cannot be done at the same time the latencies are
-//  added. Requires 8 cycles.
-// Since the PM requires the full superslice we need both EXECE, EXECO pipelines
-//  as well as 3 dispatches for the PM. The Load requires the remaining 2
-//  dispatches.
+// added. Requires 8 cycles. Since the PM requires the full superslice we need
+// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load
+// requires the remaining 1 dispatch.
 def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C, DISP_1C],
       (instrs
     LXVH8X,
     LXVDSX,
@@ -887,8 +889,8 @@ def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
 )>;
 
 // Single slice Restricted store operation. The restricted operation requires
-//  all three dispatches for the superslice.
-def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
+// all three dispatches for the superslice.
+def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
       (instrs
     (instregex "STF(S|D|IWX|SX|DX)$"),
     (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
@@ -905,10 +907,9 @@ def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
 )>;
 
 // Vector Store Instruction
-// Requires the whole superslice and therefore requires all three dispatches
+// Requires the whole superslice and therefore requires one dispatch
 // as well as both the Even and Odd exec pipelines.
-def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C],
       (instrs
     (instregex "STVE(B|H|W)X$"),
     (instregex "STVX(L)?$"),
@@ -916,18 +917,18 @@ def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
 )>;
 
 // 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// superslice. That includes both exec pipelines (EXECO, EXECE) and two
 // dispatches.
-def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
       (instrs
     (instregex "MTCTR(8)?(loop)?$"),
     (instregex "MTLR(8)?$")
 )>;
 
 // 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// superslice. That includes both exec pipelines (EXECO, EXECE) and two
 // dispatches.
-def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
       (instrs
     (instregex "M(T|F)VRSAVE(v)?$"),
     (instregex "M(T|F)PMR$"),
@@ -938,10 +939,9 @@ def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
 )>;
 
 // 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and two
+// dispatches.
+def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
       (instrs
     DIVW,
     DIVWU,
@@ -949,10 +949,9 @@ def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
 )>;
 
 // 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and two
+// dispatches.
+def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
       (instrs
     DIVWE,
     DIVD,
@@ -964,29 +963,28 @@ def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
 )>;
 
 // 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
       (instrs
     DIVDE,
     DIVDEU
 )>;
 
 // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
-//  and one full superslice for the DIV operation since there is only one DIV
-//  per superslice. Latency of DIV plus ALU is 26.
+// and one full superslice for the DIV operation since there is only one DIV per
+// superslice. Latency of DIV plus ALU is 26.
 def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_EVEN_1C, DISP_1C],
       (instrs
     (instregex "DIVW(U)?(O)?o$")
 )>;
 
 // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
-//  and one full superslice for the DIV operation since there is only one DIV
-//  per superslice. Latency of DIV plus ALU is 26.
+// and one full superslice for the DIV operation since there is only one DIV per
+// superslice. Latency of DIV plus ALU is 26.
 def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_EVEN_1C, DISP_1C],
       (instrs
     DIVDo,
     DIVDUo,
@@ -995,10 +993,10 @@ def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
 )>;
 
 // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
-//  and one full superslice for the DIV operation since there is only one DIV
-//  per superslice. Latency of DIV plus ALU is 42.
+// and one full superslice for the DIV operation since there is only one DIV per
+// superslice. Latency of DIV plus ALU is 42.
 def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_EVEN_1C, DISP_1C],
       (instrs
     DIVDEo,
     DIVDEUo
@@ -1008,11 +1006,11 @@ def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
 
 // Cracked, restricted, ALU operations.
 // Here the two ALU ops can actually be done in parallel and therefore the
-//  latencies are not added together. Otherwise this is like having two
-//  instructions running together on two pipelines and 6 dispatches.
-// ALU ops are 2 cycles each.
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 6 dispatches. ALU ops are
+// 2 cycles each.
 def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
     MTCRF,
     MTCRF8
@@ -1020,11 +1018,11 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
 
 // Cracked ALU operations.
 // Here the two ALU ops can actually be done in parallel and therefore the
-//  latencies are not added together. Otherwise this is like having two
-//  instructions running together on two pipelines and 4 dispatches.
-// ALU ops are 2 cycles each.
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 2 dispatches. ALU ops are
+// 2 cycles each.
 def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C, DISP_1C],
       (instrs
     (instregex "ADDC(8)?o$"),
     (instregex "SUBFC(8)?o$")
@@ -1036,7 +1034,7 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
 // One of the ALU ops is restricted the other is not so we have a total of
 // 5 dispatches.
 def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "F(N)?ABS(D|S)o$"),
     (instregex "FCPSGN(D|S)o$"),
@@ -1046,22 +1044,22 @@ def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
 
 // Cracked ALU operations.
 // Here the two ALU ops can actually be done in parallel and therefore the
-//  latencies are not added together. Otherwise this is like having two
-//  instructions running together on two pipelines and 4 dispatches.
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 2 dispatches.
 // ALU ops are 3 cycles each.
 def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C, DISP_1C],
       (instrs
     MCRFS
 )>;
 
 // Cracked Restricted ALU operations.
 // Here the two ALU ops can actually be done in parallel and therefore the
-//  latencies are not added together. Otherwise this is like having two
-//  instructions running together on two pipelines and 6 dispatches.
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 6 dispatches.
 // ALU ops are 3 cycles each.
 def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
     (instregex "MTFSF(b|o)?$"),
     (instregex "MTFSFI(o)?$")
@@ -1071,7 +1069,7 @@ def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
 // The two ops cannot be done in parallel.
 // One of the ALU ops is restricted and takes 3 dispatches.
 def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "RLD(I)?C(R|L)o$"),
     (instregex "RLW(IMI|INM|NM)(8)?o$"),
@@ -1086,7 +1084,7 @@ def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
 // The two ops cannot be done in parallel.
 // Both of the ALU ops are restricted and take 3 dispatches.
 def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
     (instregex "MFFS(L|CE|o)?$")
 )>;
@@ -1095,143 +1093,141 @@ def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
 // total of 6 cycles. All of the ALU operations are also restricted so each
 // takes 3 dispatches for a total of 9.
 def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
-              DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
     (instregex "MFCR(8)?$")
 )>;
 
 // Cracked instruction made of two ALU ops.
 // The two ops cannot be done in parallel.
-def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
-    (instregex "EXTSWSLIo$"),
+    (instregex "EXTSWSLI_32_64o$"),
     (instregex "SRAD(I)?o$"),
+    EXTSWSLIo,
     SLDo,
     SRDo,
     RLDICo
 )>;
 
 // 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
-def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     FDIV
 )>;
 
 // 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
 def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     FDIVo
 )>;
 
 // 36 Cycle DP Instruction.
 // Instruction can be done on a single slice.
-def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C],
       (instrs
     XSSQRTDP
 )>;
 
 // 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
-def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     FSQRT
 )>;
 
 // 36 Cycle DP Vector Instruction.
 def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C],
       (instrs
     XVSQRTDP
 )>;
 
 // 27 Cycle DP Vector Instruction.
 def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C],
       (instrs
     XVSQRTSP
 )>;
 
 // 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
 def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     FSQRTo
 )>;
 
 // 26 Cycle DP Instruction.
-def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C],
       (instrs
     XSSQRTSP
 )>;
 
 // 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
-def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     FSQRTS
 )>;
 
 // 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
 def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     FSQRTSo
 )>;
 
-// 33 Cycle DP Instruction. Takes one slice and 2 dispatches.
-def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C],
+// 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
+def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C],
       (instrs
     XSDIVDP
 )>;
 
 // 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
-def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     FDIVS
 )>;
 
 // 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
 def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     FDIVSo
 )>;
 
-// 22 Cycle DP Instruction. Takes one slice and 2 dispatches.
-def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
+// 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
+def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C],
       (instrs
     XSDIVSP
 )>;
 
 // 24 Cycle DP Vector Instruction. Takes one full superslice.
-// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
-//  superslice.
+// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
+// superslice.
 def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C],
       (instrs
     XVDIVSP
 )>;
 
 // 33 Cycle DP Vector Instruction. Takes one full superslice.
-// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
-//  superslice.
+// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
+// superslice.
 def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C],
       (instrs
     XVDIVDP
 )>;
 
 // Instruction cracked into three pieces. One Load and two ALU operations.
 // The Load and one of the ALU ops cannot be run at the same time and so the
-//  latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
+// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
 // Both the load and the ALU that depends on it are restricted and so they take
-//  a total of 6 dispatches. The final 2 dispatches come from the second ALU op.
+// a total of 7 dispatches. The final 2 dispatches come from the second ALU op.
 // The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
 def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
               IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "LF(SU|SUX)$")
 )>;
@@ -1240,7 +1236,7 @@ def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
 // the store and so it can be run at the same time as the store. The store is
 // also restricted.
 def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "STF(S|D)U(X)?$"),
     (instregex "ST(B|H|W|D)U(X)?(8)?$")
@@ -1249,20 +1245,19 @@ def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
 // Cracked instruction made up of a Load and an ALU. The ALU does not depend on
 // the load and so it can be run at the same time as the load.
 def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_PAIR_1C, DISP_PAIR_1C],
       (instrs
     (instregex "LBZU(X)?(8)?$"),
     (instregex "LDU(X)?$")
 )>;
 
-
 // Cracked instruction made up of a Load and an ALU. The ALU does not depend on
-//  the load and so it can be run at the same time as the load. The load is also
-//  restricted. 3 dispatches are from the restricted load while the other two
-//  are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
-//  is required for the ALU.
+// the load and so it can be run at the same time as the load. The load is also
+// restricted. 3 dispatches are from the restricted load while the other two
+// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
+// is required for the ALU.
 def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "LF(DU|DUX)$")
 )>;
@@ -1270,9 +1265,9 @@ def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
 // Crypto Instructions
 
 // 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
       (instrs
     (instregex "VPMSUM(B|H|W|D)$"),
     (instregex "V(N)?CIPHER(LAST)?$"),
@@ -1282,14 +1277,14 @@ def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
 // Branch Instructions
 
 // Two Cycle Branch
-def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C],
+def : InstRW<[P9_BR_2C, DISP_BR_1C],
       (instrs
   (instregex "BCCCTR(L)?(8)?$"),
   (instregex "BCCL(A|R|RL)?$"),
   (instregex "BCCTR(L)?(8)?(n)?$"),
   (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
   (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
-  (instregex "BL(_TLS)?$"),
+  (instregex "BL(_TLS|_NOP)?$"),
   (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
   (instregex "BLA(8|8_NOP)?$"),
   (instregex "BLR(8|L)?$"),
@@ -1313,8 +1308,7 @@ def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C],
 
 // Five Cycle Branch with a 2 Cycle ALU Op
 // Operations must be done consecutively and not in parallel.
-def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C],
       (instrs
     ADDPCIS
 )>;
@@ -1324,17 +1318,15 @@ def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C,
 // Atomic Load
 def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
               IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
-              IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
-              DISP_1C],
+              IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, 
+              DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     (instregex "L(D|W)AT$")
 )>;
 
 // Atomic Store
 def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
-              IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
-              DISP_1C],
+              IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "ST(D|W)AT$")
 )>;
@@ -1406,6 +1398,7 @@ def : InstRW<[],
   MBAR,
   MSYNC,
   SLBSYNC,
+  SLBFEEo,
   NAP,
   STOP,
   TRAP,
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index bfc613af3dc0..c6951ab67b08 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -1,9 +1,8 @@
 //===-- PPC.h - Top-level interface for PowerPC Target ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,7 +15,6 @@
 #define LLVM_LIB_TARGET_POWERPC_PPC_H
 
 #include "llvm/Support/CodeGen.h"
-#include "MCTargetDesc/PPCMCTargetDesc.h"
 
 // GCC #defines PPC on Linux but we use it as our namespace name
 #undef PPC
@@ -57,12 +55,26 @@ namespace llvm {
                                          MCOperand &OutMO, AsmPrinter &AP,
                                          bool isDarwin);
 
+  void initializePPCCTRLoopsPass(PassRegistry&);
+#ifndef NDEBUG
+  void initializePPCCTRLoopsVerifyPass(PassRegistry&);
+#endif
+  void initializePPCLoopPreIncPrepPass(PassRegistry&);
+  void initializePPCTOCRegDepsPass(PassRegistry&);
+  void initializePPCEarlyReturnPass(PassRegistry&);
+  void initializePPCVSXCopyPass(PassRegistry&);
   void initializePPCVSXFMAMutatePass(PassRegistry&);
+  void initializePPCVSXSwapRemovalPass(PassRegistry&);
+  void initializePPCReduceCRLogicalsPass(PassRegistry&);
+  void initializePPCBSelPass(PassRegistry&);
+  void initializePPCBranchCoalescingPass(PassRegistry&);
+  void initializePPCQPXLoadSplatPass(PassRegistry&);
   void initializePPCBoolRetToIntPass(PassRegistry&);
   void initializePPCExpandISELPass(PassRegistry &);
   void initializePPCPreEmitPeepholePass(PassRegistry &);
   void initializePPCTLSDynamicCallPass(PassRegistry &);
   void initializePPCMIPeepholePass(PassRegistry&);
+
   extern char &PPCVSXFMAMutateID;
 
   namespace PPCII {
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index 98e6e98e6974..8e94a2ae15e0 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -1,9 +1,8 @@
 //===-- PPC.td - Describe the PowerPC Target Machine -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -136,6 +135,9 @@ def FeatureQPX       : SubtargetFeature<"qpx","HasQPX", "true",
 def FeatureVSX       : SubtargetFeature<"vsx","HasVSX", "true",
                                         "Enable VSX instructions",
                                         [FeatureAltivec]>;
+def FeatureTwoConstNR :
+  SubtargetFeature<"two-const-nr", "NeedsTwoConstNR", "true",
+                   "Requires two constant Newton-Raphson computation">;
 def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true",
                                         "Enable POWER8 Altivec instructions",
                                         [FeatureAltivec]>;
@@ -162,8 +164,12 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
                                   "Enable Hardware Transactional Memory instructions">;
 def FeatureMFTB   : SubtargetFeature<"", "FeatureMFTB", "true",
                                         "Implement mftb using the mfspr instruction">;
-def FeatureFusion : SubtargetFeature<"fusion", "HasFusion", "true",
-                                     "Target supports add/load integer fusion.">;
+def FeaturePPCPreRASched:
+  SubtargetFeature<"ppc-prera-sched", "UsePPCPreRASchedStrategy", "true",
+                   "Use PowerPC pre-RA scheduling strategy">;
+def FeaturePPCPostRASched:
+  SubtargetFeature<"ppc-postra-sched", "UsePPCPostRASchedStrategy", "true",
+                   "Use PowerPC post-RA scheduling strategy">;
 def FeatureFloat128 :
   SubtargetFeature<"float128", "HasFloat128", "true",
                    "Enable the __float128 data type for IEEE-754R Binary128.",
@@ -191,6 +197,13 @@ def FeatureP9Vector  : SubtargetFeature<"power9-vector", "HasP9Vector", "true",
                                         "Enable POWER9 vector instructions",
                                         [FeatureISA3_0, FeatureP8Vector,
                                          FeatureP9Altivec]>;
+// A separate feature for this even though it is equivalent to P9Vector
+// because this is a feature of the implementation rather than the architecture
+// and may go away with future CPU's.
+def FeatureVectorsUseTwoUnits : SubtargetFeature<"vectors-use-two-units",
+                                                 "VectorsUseTwoUnits",
+                                                 "true",
+                                                 "Vectors use two units">;
 
 // Since new processors generally contain a superset of features of those that
 // came before them, the idea is to make implementations of new processors
@@ -215,15 +228,15 @@ def ProcessorFeatures {
        FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
        Feature64Bit /*, Feature64BitRegs */,
        FeatureBPERMD, FeatureExtDiv,
-       FeatureMFTB, DeprecatedDST];
+       FeatureMFTB, DeprecatedDST, FeatureTwoConstNR];
   list<SubtargetFeature> Power8SpecificFeatures =
       [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
-       FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic,
-       FeatureFusion];
+       FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic];
   list<SubtargetFeature> Power8FeatureList =
       !listconcat(Power7FeatureList, Power8SpecificFeatures);
   list<SubtargetFeature> Power9SpecificFeatures =
-      [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0];
+      [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0,
+       FeatureVectorsUseTwoUnits, FeaturePPCPreRASched, FeaturePPCPostRASched];
   list<SubtargetFeature> Power9FeatureList =
       !listconcat(Power8FeatureList, Power9SpecificFeatures);
 }
@@ -279,10 +292,9 @@ def getNonRecordFormOpcode : InstrMapping {
 
 def getAltVSXFMAOpcode : InstrMapping {
   let FilterClass = "AltVSXFMARel";
-  // Instructions with the same BaseName and Interpretation64Bit values
-  // form a row.
+  // Instructions with the same BaseName value form a row.
   let RowFields = ["BaseName"];
-  // Instructions with the same RC value form a column.
+  // Instructions with the same IsVSXFMAAlt value form a column.
   let ColFields = ["IsVSXFMAAlt"];
   // The key column are the (default) addend-killing instructions.
   let KeyCol = ["0"];
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 04aa3c9b1e22..bd87ce06b4fb 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1,9 +1,8 @@
 //===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,7 +15,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstPrinter/PPCInstPrinter.h"
+#include "MCTargetDesc/PPCInstPrinter.h"
 #include "MCTargetDesc/PPCMCExpr.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCPredicates.h"
@@ -26,6 +25,7 @@
 #include "PPCSubtarget.h"
 #include "PPCTargetMachine.h"
 #include "PPCTargetStreamer.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
@@ -95,68 +95,102 @@ public:
     return AsmPrinter::doInitialization(M);
   }
 
-    void EmitInstruction(const MachineInstr *MI) override;
+  void EmitInstruction(const MachineInstr *MI) override;
+
+  /// This function is for PrintAsmOperand and PrintAsmMemoryOperand,
+  /// invoked by EmitMSInlineAsmStr and EmitGCCInlineAsmStr only.
+  /// The \p MI would be INLINEASM ONLY.
+  void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+
+  void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &O) override;
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                       const char *ExtraCode, raw_ostream &O) override;
+  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                             const char *ExtraCode, raw_ostream &O) override;
+
+  void EmitEndOfAsmFile(Module &M) override;
+
+  void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI);
+  void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
+  void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    Subtarget = &MF.getSubtarget<PPCSubtarget>();
+    bool Changed = AsmPrinter::runOnMachineFunction(MF);
+    emitXRayTable();
+    return Changed;
+  }
+};
 
-    void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+/// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
+class PPCLinuxAsmPrinter : public PPCAsmPrinter {
+public:
+  explicit PPCLinuxAsmPrinter(TargetMachine &TM,
+                              std::unique_ptr<MCStreamer> Streamer)
+      : PPCAsmPrinter(TM, std::move(Streamer)) {}
 
-    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                         unsigned AsmVariant, const char *ExtraCode,
-                         raw_ostream &O) override;
-    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                               unsigned AsmVariant, const char *ExtraCode,
-                               raw_ostream &O) override;
+  StringRef getPassName() const override {
+    return "Linux PPC Assembly Printer";
+  }
 
-    void EmitEndOfAsmFile(Module &M) override;
+  bool doFinalization(Module &M) override;
+  void EmitStartOfAsmFile(Module &M) override;
 
-    void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI);
-    void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
-    void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
-    bool runOnMachineFunction(MachineFunction &MF) override {
-      Subtarget = &MF.getSubtarget<PPCSubtarget>();
-      bool Changed = AsmPrinter::runOnMachineFunction(MF);
-      emitXRayTable();
-      return Changed;
-    }
-  };
+  void EmitFunctionEntryLabel() override;
 
-  /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
-  class PPCLinuxAsmPrinter : public PPCAsmPrinter {
-  public:
-    explicit PPCLinuxAsmPrinter(TargetMachine &TM,
-                                std::unique_ptr<MCStreamer> Streamer)
-        : PPCAsmPrinter(TM, std::move(Streamer)) {}
+  void EmitFunctionBodyStart() override;
+  void EmitFunctionBodyEnd() override;
+  void EmitInstruction(const MachineInstr *MI) override;
+};
 
-    StringRef getPassName() const override {
-      return "Linux PPC Assembly Printer";
-    }
+/// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
+/// OS X
+class PPCDarwinAsmPrinter : public PPCAsmPrinter {
+public:
+  explicit PPCDarwinAsmPrinter(TargetMachine &TM,
+                               std::unique_ptr<MCStreamer> Streamer)
+      : PPCAsmPrinter(TM, std::move(Streamer)) {}
 
-    bool doFinalization(Module &M) override;
-    void EmitStartOfAsmFile(Module &M) override;
+  StringRef getPassName() const override {
+    return "Darwin PPC Assembly Printer";
+  }
 
-    void EmitFunctionEntryLabel() override;
+  bool doFinalization(Module &M) override;
+  void EmitStartOfAsmFile(Module &M) override;
+};
 
-    void EmitFunctionBodyStart() override;
-    void EmitFunctionBodyEnd() override;
-    void EmitInstruction(const MachineInstr *MI) override;
-  };
+class PPCAIXAsmPrinter : public PPCAsmPrinter {
+public:
+  PPCAIXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+      : PPCAsmPrinter(TM, std::move(Streamer)) {}
 
-  /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
-  /// OS X
-  class PPCDarwinAsmPrinter : public PPCAsmPrinter {
-  public:
-    explicit PPCDarwinAsmPrinter(TargetMachine &TM,
-                                 std::unique_ptr<MCStreamer> Streamer)
-        : PPCAsmPrinter(TM, std::move(Streamer)) {}
+  StringRef getPassName() const override { return "AIX PPC Assembly Printer"; }
+};
 
-    StringRef getPassName() const override {
-      return "Darwin PPC Assembly Printer";
-    }
+} // end anonymous namespace
 
-    bool doFinalization(Module &M) override;
-    void EmitStartOfAsmFile(Module &M) override;
-  };
+void PPCAsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
+                                       raw_ostream &O) {
+  // Computing the address of a global symbol, not calling it.
+  const GlobalValue *GV = MO.getGlobal();
+  MCSymbol *SymToPrint;
+
+  // External or weakly linked global variables need non-lazily-resolved stubs
+  if (Subtarget->hasLazyResolverStub(GV)) {
+    SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+    MachineModuleInfoImpl::StubValueTy &StubSym =
+        MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(
+            SymToPrint);
+    if (!StubSym.getPointer())
+      StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV),
+                                                   !GV->hasInternalLinkage());
+  } else {
+    SymToPrint = getSymbol(GV);
+  }
 
-} // end anonymous namespace
+  SymToPrint->print(O, MAI);
+
+  printOffset(MO.getOffset(), O);
+}
 
 void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
                                  raw_ostream &O) {
@@ -165,10 +199,8 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
 
   switch (MO.getType()) {
   case MachineOperand::MO_Register: {
-    unsigned Reg = PPCInstrInfo::getRegNumForOperand(MI->getDesc(),
-                                                     MO.getReg(), OpNo);
-
-    const char *RegName = PPCInstPrinter::getRegisterName(Reg);
+    // The MI is INLINEASM ONLY and UseVSXReg is always false.
+    const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
 
     // Linux assembler (Others?) does not take register mnemonics.
     // FIXME - What about special registers used in mfspr/mtspr?
@@ -192,26 +224,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI);
     return;
   case MachineOperand::MO_GlobalAddress: {
-    // Computing the address of a global symbol, not calling it.
-    const GlobalValue *GV = MO.getGlobal();
-    MCSymbol *SymToPrint;
-
-    // External or weakly linked global variables need non-lazily-resolved stubs
-    if (Subtarget->hasLazyResolverStub(GV)) {
-      SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-      MachineModuleInfoImpl::StubValueTy &StubSym =
-          MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(
-              SymToPrint);
-      if (!StubSym.getPointer())
-        StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV),
-                                                     !GV->hasInternalLinkage());
-    } else {
-      SymToPrint = getSymbol(GV);
-    }
-
-    SymToPrint->print(O, MAI);
-
-    printOffset(MO.getOffset(), O);
+    PrintSymbolOperand(MO, O);
     return;
   }
 
@@ -224,7 +237,6 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
 /// PrintAsmOperand - Print out an operand for an inline asm expression.
 ///
 bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                    unsigned AsmVariant,
                                     const char *ExtraCode, raw_ostream &O) {
   // Does this asm operand have a single letter operand modifier?
   if (ExtraCode && ExtraCode[0]) {
@@ -233,9 +245,7 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
     switch (ExtraCode[0]) {
     default:
       // See if this is a generic print operand
-      return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
-    case 'c': // Don't print "$" before a global var name or constant.
-      break; // PPC never has a prefix.
+      return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
     case 'L': // Write second word of DImode reference.
       // Verify that this operand has two consecutive registers.
       if (!MI->getOperand(OpNo).isReg() ||
@@ -277,7 +287,6 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
 // assembler operand.
 
 bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                                          unsigned AsmVariant,
                                           const char *ExtraCode,
                                           raw_ostream &O) {
   if (ExtraCode && ExtraCode[0]) {
@@ -460,6 +469,7 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
   StringRef Name = "__tls_get_addr";
   MCSymbol *TlsGetAddr = OutContext.getOrCreateSymbol(Name);
   MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+  const Module *M = MF->getFunction().getParent();
 
   assert(MI->getOperand(0).isReg() &&
          ((Subtarget->isPPC64() && MI->getOperand(0).getReg() == PPC::X3) ||
@@ -473,8 +483,14 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
   if (!Subtarget->isPPC64() && !Subtarget->isDarwin() &&
       isPositionIndependent())
     Kind = MCSymbolRefExpr::VK_PLT;
-  const MCSymbolRefExpr *TlsRef =
+  const MCExpr *TlsRef =
     MCSymbolRefExpr::create(TlsGetAddr, Kind, OutContext);
+
+  // Add 32768 offset to the symbol so we follow up the latest GOT/PLT ABI.
+  if (Kind == MCSymbolRefExpr::VK_PLT && Subtarget->isSecurePlt() &&
+      M->getPICLevel() == PICLevel::BigPIC)
+    TlsRef = MCBinaryExpr::createAdd(
+        TlsRef, MCConstantExpr::create(32768, OutContext), OutContext);
   const MachineOperand &MO = MI->getOperand(2);
   const GlobalValue *GValue = MO.getGlobal();
   MCSymbol *MOSymbol = getSymbol(GValue);
@@ -576,34 +592,30 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // Into: lwz %rt, .L0$poff - .L0$pb(%ri)
     //       add %rd, %rt, %ri
     // or into (if secure plt mode is on):
-    //       addis r30, r30, .LTOC - .L0$pb@ha
-    //       addi r30, r30, .LTOC - .L0$pb@l
+    //       addis r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@ha
+    //       addi r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@l
     // Get the offset from the GOT Base Register to the GOT
     LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
     if (Subtarget->isSecurePlt() && isPositionIndependent() ) {
       unsigned PICR = TmpInst.getOperand(0).getReg();
-      MCSymbol *LTOCSymbol = OutContext.getOrCreateSymbol(StringRef(".LTOC"));
+      MCSymbol *BaseSymbol = OutContext.getOrCreateSymbol(
+          M->getPICLevel() == PICLevel::SmallPIC ? "_GLOBAL_OFFSET_TABLE_"
+                                                 : ".LTOC");
       const MCExpr *PB =
-        MCSymbolRefExpr::create(MF->getPICBaseSymbol(),
-                                OutContext);
+          MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
 
-      const MCExpr *LTOCDeltaExpr =
-        MCBinaryExpr::createSub(MCSymbolRefExpr::create(LTOCSymbol, OutContext),
-                                PB, OutContext);
+      const MCExpr *DeltaExpr = MCBinaryExpr::createSub(
+          MCSymbolRefExpr::create(BaseSymbol, OutContext), PB, OutContext);
 
-      const MCExpr *LTOCDeltaHi =
-        PPCMCExpr::createHa(LTOCDeltaExpr, false, OutContext);
-      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
-                                   .addReg(PICR)
-                                   .addReg(PICR)
-                                   .addExpr(LTOCDeltaHi));
+      const MCExpr *DeltaHi = PPCMCExpr::createHa(DeltaExpr, false, OutContext);
+      EmitToStreamer(
+          *OutStreamer,
+          MCInstBuilder(PPC::ADDIS).addReg(PICR).addReg(PICR).addExpr(DeltaHi));
 
-      const MCExpr *LTOCDeltaLo =
-        PPCMCExpr::createLo(LTOCDeltaExpr, false, OutContext);
-      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI)
-                                   .addReg(PICR)
-                                   .addReg(PICR)
-                                   .addExpr(LTOCDeltaLo));
+      const MCExpr *DeltaLo = PPCMCExpr::createLo(DeltaExpr, false, OutContext);
+      EmitToStreamer(
+          *OutStreamer,
+          MCInstBuilder(PPC::ADDI).addReg(PICR).addReg(PICR).addExpr(DeltaLo));
       return;
     } else {
       MCSymbol *PICOffset =
@@ -1640,6 +1652,9 @@ createPPCAsmPrinterPass(TargetMachine &tm,
                         std::unique_ptr<MCStreamer> &&Streamer) {
   if (tm.getTargetTriple().isMacOSX())
     return new PPCDarwinAsmPrinter(tm, std::move(Streamer));
+  if (tm.getTargetTriple().isOSAIX())
+    return new PPCAIXAsmPrinter(tm, std::move(Streamer));
+
   return new PPCLinuxAsmPrinter(tm, std::move(Streamer));
 }
 
diff --git a/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/lib/Target/PowerPC/PPCBoolRetToInt.cpp
index 55e105dad0e5..104cf2ba3c00 100644
--- a/lib/Target/PowerPC/PPCBoolRetToInt.cpp
+++ b/lib/Target/PowerPC/PPCBoolRetToInt.cpp
@@ -1,9 +1,8 @@
 //===- PPCBoolRetToInt.cpp ------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/lib/Target/PowerPC/PPCBranchCoalescing.cpp
index bbb977f090c5..5e9a661f8f0b 100644
--- a/lib/Target/PowerPC/PPCBranchCoalescing.cpp
+++ b/lib/Target/PowerPC/PPCBranchCoalescing.cpp
@@ -1,9 +1,8 @@
 //===-- CoalesceBranches.cpp - Coalesce blocks with the same condition ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -34,10 +33,6 @@ STATISTIC(NumBlocksCoalesced, "Number of blocks coalesced");
 STATISTIC(NumPHINotMoved, "Number of PHI Nodes that cannot be merged");
 STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced");
 
-namespace llvm {
-    void initializePPCBranchCoalescingPass(PassRegistry&);
-}
-
 //===----------------------------------------------------------------------===//
 //                               PPCBranchCoalescing
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 0d1bb9297bcb..793d690baec3 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -1,9 +1,8 @@
 //===-- PPCBranchSelector.cpp - Emit long conditional branches ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -26,16 +25,13 @@
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetMachine.h"
+#include <algorithm>
 using namespace llvm;
 
 #define DEBUG_TYPE "ppc-branch-select"
 
 STATISTIC(NumExpanded, "Number of branches expanded to long format");
 
-namespace llvm {
-  void initializePPCBSelPass(PassRegistry&);
-}
-
 namespace {
   struct PPCBSel : public MachineFunctionPass {
     static char ID;
@@ -48,6 +44,17 @@ namespace {
     // size that is due to potential padding.
     std::vector<std::pair<unsigned, unsigned>> BlockSizes;
 
+    // The first block number which has imprecise instruction address.
+    int FirstImpreciseBlock = -1;
+
+    unsigned GetAlignmentAdjustment(MachineBasicBlock &MBB, unsigned Offset);
+    unsigned ComputeBlockSizes(MachineFunction &Fn);
+    void modifyAdjustment(MachineFunction &Fn);
+    int computeBranchSize(MachineFunction &Fn,
+                          const MachineBasicBlock *Src,
+                          const MachineBasicBlock *Dest,
+                          unsigned BrOffset);
+
     bool runOnMachineFunction(MachineFunction &Fn) override;
 
     MachineFunctionProperties getRequiredProperties() const override {
@@ -70,43 +77,47 @@ FunctionPass *llvm::createPPCBranchSelectionPass() {
   return new PPCBSel();
 }
 
-bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
-  const PPCInstrInfo *TII =
-      static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
-  // Give the blocks of the function a dense, in-order, numbering.
-  Fn.RenumberBlocks();
-  BlockSizes.resize(Fn.getNumBlockIDs());
-
-  auto GetAlignmentAdjustment =
-    [](MachineBasicBlock &MBB, unsigned Offset) -> unsigned {
-    unsigned Align = MBB.getAlignment();
-    if (!Align)
-      return 0;
-
-    unsigned AlignAmt = 1 << Align;
-    unsigned ParentAlign = MBB.getParent()->getAlignment();
-
-    if (Align <= ParentAlign)
-      return OffsetToAlignment(Offset, AlignAmt);
-
-    // The alignment of this MBB is larger than the function's alignment, so we
-    // can't tell whether or not it will insert nops. Assume that it will.
-    return AlignAmt + OffsetToAlignment(Offset, AlignAmt);
-  };
+/// In order to make MBB aligned, we need to add an adjustment value to the
+/// original Offset.
+unsigned PPCBSel::GetAlignmentAdjustment(MachineBasicBlock &MBB,
+                                         unsigned Offset) {
+  unsigned Align = MBB.getAlignment();
+  if (!Align)
+    return 0;
+
+  unsigned AlignAmt = 1 << Align;
+  unsigned ParentAlign = MBB.getParent()->getAlignment();
+
+  if (Align <= ParentAlign)
+    return OffsetToAlignment(Offset, AlignAmt);
+
+  // The alignment of this MBB is larger than the function's alignment, so we
+  // can't tell whether or not it will insert nops. Assume that it will.
+  if (FirstImpreciseBlock < 0)
+    FirstImpreciseBlock = MBB.getNumber();
+  return AlignAmt + OffsetToAlignment(Offset, AlignAmt);
+}
 
-  // We need to be careful about the offset of the first block in the function
-  // because it might not have the function's alignment. This happens because,
-  // under the ELFv2 ABI, for functions which require a TOC pointer, we add a
-  // two-instruction sequence to the start of the function.
-  // Note: This needs to be synchronized with the check in
-  // PPCLinuxAsmPrinter::EmitFunctionBodyStart.
+/// We need to be careful about the offset of the first block in the function
+/// because it might not have the function's alignment. This happens because,
+/// under the ELFv2 ABI, for functions which require a TOC pointer, we add a
+/// two-instruction sequence to the start of the function.
+/// Note: This needs to be synchronized with the check in
+/// PPCLinuxAsmPrinter::EmitFunctionBodyStart.
+static inline unsigned GetInitialOffset(MachineFunction &Fn) {
   unsigned InitialOffset = 0;
   if (Fn.getSubtarget<PPCSubtarget>().isELFv2ABI() &&
       !Fn.getRegInfo().use_empty(PPC::X2))
     InitialOffset = 8;
+  return InitialOffset;
+}
+
+/// Measure each MBB and compute a size for the entire function.
+unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) {
+  const PPCInstrInfo *TII =
+      static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
+  unsigned FuncSize = GetInitialOffset(Fn);
 
-  // Measure each MBB and compute a size for the entire function.
-  unsigned FuncSize = InitialOffset;
   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
        ++MFI) {
     MachineBasicBlock *MBB = &*MFI;
@@ -124,13 +135,145 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
     }
 
     unsigned BlockSize = 0;
-    for (MachineInstr &MI : *MBB)
+    for (MachineInstr &MI : *MBB) {
       BlockSize += TII->getInstSizeInBytes(MI);
+      if (MI.isInlineAsm() && (FirstImpreciseBlock < 0))
+        FirstImpreciseBlock = MBB->getNumber();
+    }
 
     BlockSizes[MBB->getNumber()].first = BlockSize;
     FuncSize += BlockSize;
   }
 
+  return FuncSize;
+}
+
+/// Modify the basic block align adjustment.
+void PPCBSel::modifyAdjustment(MachineFunction &Fn) {
+  unsigned Offset = GetInitialOffset(Fn);
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI) {
+    MachineBasicBlock *MBB = &*MFI;
+
+    if (MBB->getNumber() > 0) {
+      auto &BS = BlockSizes[MBB->getNumber()-1];
+      BS.first -= BS.second;
+      Offset -= BS.second;
+
+      unsigned AlignExtra = GetAlignmentAdjustment(*MBB, Offset);
+
+      BS.first += AlignExtra;
+      BS.second = AlignExtra;
+
+      Offset += AlignExtra;
+    }
+
+    Offset += BlockSizes[MBB->getNumber()].first;
+  }
+}
+
+/// Determine the offset from the branch in Src block to the Dest block.
+/// BrOffset is the offset of the branch instruction inside Src block.
+int PPCBSel::computeBranchSize(MachineFunction &Fn,
+                               const MachineBasicBlock *Src,
+                               const MachineBasicBlock *Dest,
+                               unsigned BrOffset) {
+  int BranchSize;
+  unsigned MaxAlign = 2;
+  bool NeedExtraAdjustment = false;
+  if (Dest->getNumber() <= Src->getNumber()) {
+    // If this is a backwards branch, the delta is the offset from the
+    // start of this block to this branch, plus the sizes of all blocks
+    // from this block to the dest.
+    BranchSize = BrOffset;
+    MaxAlign = std::max(MaxAlign, Src->getAlignment());
+
+    int DestBlock = Dest->getNumber();
+    BranchSize += BlockSizes[DestBlock].first;
+    for (unsigned i = DestBlock+1, e = Src->getNumber(); i < e; ++i) {
+      BranchSize += BlockSizes[i].first;
+      MaxAlign = std::max(MaxAlign,
+                          Fn.getBlockNumbered(i)->getAlignment());
+    }
+
+    NeedExtraAdjustment = (FirstImpreciseBlock >= 0) &&
+                          (DestBlock >= FirstImpreciseBlock);
+  } else {
+    // Otherwise, add the size of the blocks between this block and the
+    // dest to the number of bytes left in this block.
+    unsigned StartBlock = Src->getNumber();
+    BranchSize = BlockSizes[StartBlock].first - BrOffset;
+
+    MaxAlign = std::max(MaxAlign, Dest->getAlignment());
+    for (unsigned i = StartBlock+1, e = Dest->getNumber(); i != e; ++i) {
+      BranchSize += BlockSizes[i].first;
+      MaxAlign = std::max(MaxAlign,
+                          Fn.getBlockNumbered(i)->getAlignment());
+    }
+
+    NeedExtraAdjustment = (FirstImpreciseBlock >= 0) &&
+                          (Src->getNumber() >= FirstImpreciseBlock);
+  }
+
+  // We tend to over estimate code size due to large alignment and
+  // inline assembly. Usually it causes larger computed branch offset.
+  // But sometimes it may also causes smaller computed branch offset
+  // than actual branch offset. If the offset is close to the limit of
+  // encoding, it may cause problem at run time.
+  // Following is a simplified example.
+  //
+  //              actual        estimated
+  //              address        address
+  //    ...
+  //   bne Far      100            10c
+  //   .p2align 4
+  //   Near:        110            110
+  //    ...
+  //   Far:        8108           8108
+  //
+  //   Actual offset:    0x8108 - 0x100 = 0x8008
+  //   Computed offset:  0x8108 - 0x10c = 0x7ffc
+  //
+  // This example also shows when we can get the largest gap between
+  // estimated offset and actual offset. If there is an aligned block
+  // ABB between branch and target, assume its alignment is <align>
+  // bits. Now consider the accumulated function size FSIZE till the end
+  // of previous block PBB. If the estimated FSIZE is multiple of
+  // 2^<align>, we don't need any padding for the estimated address of
+  // ABB. If actual FSIZE at the end of PBB is 4 bytes more than
+  // multiple of 2^<align>, then we need (2^<align> - 4) bytes of
+  // padding. It also means the actual branch offset is (2^<align> - 4)
+  // larger than computed offset. Other actual FSIZE needs less padding
+  // bytes, so causes smaller gap between actual and computed offset.
+  //
+  // On the other hand, if the inline asm or large alignment occurs
+  // between the branch block and destination block, the estimated address
+  // can be <delta> larger than actual address. If padding bytes are
+  // needed for a later aligned block, the actual number of padding bytes
+  // is at most <delta> more than estimated padding bytes. So the actual
+  // aligned block address is less than or equal to the estimated aligned
+  // block address. So the actual branch offset is less than or equal to
+  // computed branch offset.
+  //
+  // The computed offset is at most ((1 << alignment) - 4) bytes smaller
+  // than actual offset. So we add this number to the offset for safety.
+  if (NeedExtraAdjustment)
+    BranchSize += (1 << MaxAlign) - 4;
+
+  return BranchSize;
+}
+
+bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
+  const PPCInstrInfo *TII =
+      static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
+  // Give the blocks of the function a dense, in-order, numbering.
+  Fn.RenumberBlocks();
+  BlockSizes.resize(Fn.getNumBlockIDs());
+  FirstImpreciseBlock = -1;
+
+  // Measure each MBB and compute a size for the entire function.
+  unsigned FuncSize = ComputeBlockSizes(Fn);
+
   // If the entire function is smaller than the displacement of a branch field,
   // we know we don't need to shrink any branches in this function.  This is a
   // common case.
@@ -178,23 +321,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
 
         // Determine the offset from the current branch to the destination
         // block.
-        int BranchSize;
-        if (Dest->getNumber() <= MBB.getNumber()) {
-          // If this is a backwards branch, the delta is the offset from the
-          // start of this block to this branch, plus the sizes of all blocks
-          // from this block to the dest.
-          BranchSize = MBBStartOffset;
-
-          for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
-            BranchSize += BlockSizes[i].first;
-        } else {
-          // Otherwise, add the size of the blocks between this block and the
-          // dest to the number of bytes left in this block.
-          BranchSize = -MBBStartOffset;
-
-          for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
-            BranchSize += BlockSizes[i].first;
-        }
+        int BranchSize = computeBranchSize(Fn, &MBB, Dest, MBBStartOffset);
 
         // If this branch is in range, ignore it.
         if (isInt<16>(BranchSize)) {
@@ -253,26 +380,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
     if (MadeChange) {
       // If we're going to iterate again, make sure we've updated our
       // padding-based contributions to the block sizes.
-      unsigned Offset = InitialOffset;
-      for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
-           ++MFI) {
-        MachineBasicBlock *MBB = &*MFI;
-
-        if (MBB->getNumber() > 0) {
-          auto &BS = BlockSizes[MBB->getNumber()-1];
-          BS.first -= BS.second;
-          Offset -= BS.second;
-
-          unsigned AlignExtra = GetAlignmentAdjustment(*MBB, Offset);
-
-          BS.first += AlignExtra;
-          BS.second = AlignExtra;
-
-          Offset += AlignExtra;
-        }
-
-        Offset += BlockSizes[MBB->getNumber()].first;
-      }
+      modifyAdjustment(Fn);
     }
 
     EverMadeChange |= MadeChange;
diff --git a/lib/Target/PowerPC/PPCCCState.cpp b/lib/Target/PowerPC/PPCCCState.cpp
index 5510a95430f5..5116f0d121f4 100644
--- a/lib/Target/PowerPC/PPCCCState.cpp
+++ b/lib/Target/PowerPC/PPCCCState.cpp
@@ -1,9 +1,8 @@
 //===---- PPCCCState.cpp - CCState with PowerPC specific extensions ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/PowerPC/PPCCCState.h b/lib/Target/PowerPC/PPCCCState.h
index 9be9f11dbea3..e3499597474c 100644
--- a/lib/Target/PowerPC/PPCCCState.h
+++ b/lib/Target/PowerPC/PPCCCState.h
@@ -1,9 +1,8 @@
 //===---- PPCCCState.h - CCState with PowerPC specific extensions -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 6b9e2383e36f..2b8d9b87724f 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -1,9 +1,8 @@
 //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -72,70 +71,7 @@ using namespace llvm;
 static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
 #endif
 
-// The latency of mtctr is only justified if there are more than 4
-// comparisons that will be removed as a result.
-static cl::opt<unsigned>
-SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
-                      cl::desc("Loops with a constant trip count smaller than "
-                               "this value will not use the count register."));
-
-STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
-
-namespace llvm {
-  void initializePPCCTRLoopsPass(PassRegistry&);
-#ifndef NDEBUG
-  void initializePPCCTRLoopsVerifyPass(PassRegistry&);
-#endif
-}
-
 namespace {
-  struct PPCCTRLoops : public FunctionPass {
-
-#ifndef NDEBUG
-    static int Counter;
-#endif
-
-  public:
-    static char ID;
-
-    PPCCTRLoops() : FunctionPass(ID) {
-      initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
-    }
-
-    bool runOnFunction(Function &F) override;
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<LoopInfoWrapperPass>();
-      AU.addPreserved<LoopInfoWrapperPass>();
-      AU.addRequired<DominatorTreeWrapperPass>();
-      AU.addPreserved<DominatorTreeWrapperPass>();
-      AU.addRequired<ScalarEvolutionWrapperPass>();
-      AU.addRequired<AssumptionCacheTracker>();
-      AU.addRequired<TargetTransformInfoWrapperPass>();
-    }
-
-  private:
-    bool mightUseCTR(BasicBlock *BB);
-    bool convertToCTRLoop(Loop *L);
-
-  private:
-    const PPCTargetMachine *TM;
-    const PPCSubtarget *STI;
-    const PPCTargetLowering *TLI;
-    const DataLayout *DL;
-    const TargetLibraryInfo *LibInfo;
-    const TargetTransformInfo *TTI;
-    LoopInfo *LI;
-    ScalarEvolution *SE;
-    DominatorTree *DT;
-    bool PreserveLCSSA;
-    TargetSchedModel SchedModel;
-  };
-
-  char PPCCTRLoops::ID = 0;
-#ifndef NDEBUG
-  int PPCCTRLoops::Counter = 0;
-#endif
 
 #ifndef NDEBUG
   struct PPCCTRLoopsVerify : public MachineFunctionPass {
@@ -161,16 +97,6 @@ namespace {
 #endif // NDEBUG
 } // end anonymous namespace
 
-INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
-                      false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
-                    false, false)
-
-FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); }
-
 #ifndef NDEBUG
 INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
                       "PowerPC CTR Loops Verify", false, false)
@@ -183,511 +109,6 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() {
 }
 #endif // NDEBUG
 
-bool PPCCTRLoops::runOnFunction(Function &F) {
-  if (skipFunction(F))
-    return false;
-
-  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
-  if (!TPC)
-    return false;
-
-  TM = &TPC->getTM<PPCTargetMachine>();
-  STI = TM->getSubtargetImpl(F);
-  TLI = STI->getTargetLowering();
-
-  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
-  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
-  DL = &F.getParent()->getDataLayout();
-  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
-  LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
-  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
-
-  bool MadeChange = false;
-
-  for (LoopInfo::iterator I = LI->begin(), E = LI->end();
-       I != E; ++I) {
-    Loop *L = *I;
-    if (!L->getParentLoop())
-      MadeChange |= convertToCTRLoop(L);
-  }
-
-  return MadeChange;
-}
-
-static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
-  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
-    return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
-
-  return false;
-}
-
-// Determining the address of a TLS variable results in a function call in
-// certain TLS models.
-static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) {
-  const auto *GV = dyn_cast<GlobalValue>(MemAddr);
-  if (!GV) {
-    // Recurse to check for constants that refer to TLS global variables.
-    if (const auto *CV = dyn_cast<Constant>(MemAddr))
-      for (const auto &CO : CV->operands())
-        if (memAddrUsesCTR(TM, CO))
-          return true;
-
-    return false;
-  }
-
-  if (!GV->isThreadLocal())
-    return false;
-  TLSModel::Model Model = TM.getTLSModel(GV);
-  return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
-}
-
-// Loop through the inline asm constraints and look for something that clobbers
-// ctr.
-static bool asmClobbersCTR(InlineAsm *IA) {
-  InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
-  for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
-    InlineAsm::ConstraintInfo &C = CIV[i];
-    if (C.Type != InlineAsm::isInput)
-      for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
-        if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
-          return true;
-  }
-  return false;
-}
-
-bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
-  for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
-       J != JE; ++J) {
-    if (CallInst *CI = dyn_cast<CallInst>(J)) {
-      // Inline ASM is okay, unless it clobbers the ctr register.
-      if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
-        if (asmClobbersCTR(IA))
-          return true;
-        continue;
-      }
-
-      if (Function *F = CI->getCalledFunction()) {
-        // Most intrinsics don't become function calls, but some might.
-        // sin, cos, exp and log are always calls.
-        unsigned Opcode = 0;
-        if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
-          switch (F->getIntrinsicID()) {
-          default: continue;
-          // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
-          // we're definitely using CTR.
-          case Intrinsic::ppc_is_decremented_ctr_nonzero:
-          case Intrinsic::ppc_mtctr:
-            return true;
-
-// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp) && \
-                       !defined(setjmp_undefined_for_msvc)
-#  pragma push_macro("setjmp")
-#  undef setjmp
-#  define setjmp_undefined_for_msvc
-#endif
-
-          case Intrinsic::setjmp:
-
-#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
- // let's return it to _setjmp state
-#  pragma pop_macro("setjmp")
-#  undef setjmp_undefined_for_msvc
-#endif
-
-          case Intrinsic::longjmp:
-
-          // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
-          // because, although it does clobber the counter register, the
-          // control can't then return to inside the loop unless there is also
-          // an eh_sjlj_setjmp.
-          case Intrinsic::eh_sjlj_setjmp:
-
-          case Intrinsic::memcpy:
-          case Intrinsic::memmove:
-          case Intrinsic::memset:
-          case Intrinsic::powi:
-          case Intrinsic::log:
-          case Intrinsic::log2:
-          case Intrinsic::log10:
-          case Intrinsic::exp:
-          case Intrinsic::exp2:
-          case Intrinsic::pow:
-          case Intrinsic::sin:
-          case Intrinsic::cos:
-            return true;
-          case Intrinsic::copysign:
-            if (CI->getArgOperand(0)->getType()->getScalarType()->
-                isPPC_FP128Ty())
-              return true;
-            else
-              continue; // ISD::FCOPYSIGN is never a library call.
-          case Intrinsic::sqrt:               Opcode = ISD::FSQRT;      break;
-          case Intrinsic::floor:              Opcode = ISD::FFLOOR;     break;
-          case Intrinsic::ceil:               Opcode = ISD::FCEIL;      break;
-          case Intrinsic::trunc:              Opcode = ISD::FTRUNC;     break;
-          case Intrinsic::rint:               Opcode = ISD::FRINT;      break;
-          case Intrinsic::nearbyint:          Opcode = ISD::FNEARBYINT; break;
-          case Intrinsic::round:              Opcode = ISD::FROUND;     break;
-          case Intrinsic::minnum:             Opcode = ISD::FMINNUM;    break;
-          case Intrinsic::maxnum:             Opcode = ISD::FMAXNUM;    break;
-          case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO;      break;
-          case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO;      break;
-          }
-        }
-
-        // PowerPC does not use [US]DIVREM or other library calls for
-        // operations on regular types which are not otherwise library calls
-        // (i.e. soft float or atomics). If adapting for targets that do,
-        // additional care is required here.
-
-        LibFunc Func;
-        if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
-            LibInfo->getLibFunc(F->getName(), Func) &&
-            LibInfo->hasOptimizedCodeGen(Func)) {
-          // Non-read-only functions are never treated as intrinsics.
-          if (!CI->onlyReadsMemory())
-            return true;
-
-          // Conversion happens only for FP calls.
-          if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
-            return true;
-
-          switch (Func) {
-          default: return true;
-          case LibFunc_copysign:
-          case LibFunc_copysignf:
-            continue; // ISD::FCOPYSIGN is never a library call.
-          case LibFunc_copysignl:
-            return true;
-          case LibFunc_fabs:
-          case LibFunc_fabsf:
-          case LibFunc_fabsl:
-            continue; // ISD::FABS is never a library call.
-          case LibFunc_sqrt:
-          case LibFunc_sqrtf:
-          case LibFunc_sqrtl:
-            Opcode = ISD::FSQRT; break;
-          case LibFunc_floor:
-          case LibFunc_floorf:
-          case LibFunc_floorl:
-            Opcode = ISD::FFLOOR; break;
-          case LibFunc_nearbyint:
-          case LibFunc_nearbyintf:
-          case LibFunc_nearbyintl:
-            Opcode = ISD::FNEARBYINT; break;
-          case LibFunc_ceil:
-          case LibFunc_ceilf:
-          case LibFunc_ceill:
-            Opcode = ISD::FCEIL; break;
-          case LibFunc_rint:
-          case LibFunc_rintf:
-          case LibFunc_rintl:
-            Opcode = ISD::FRINT; break;
-          case LibFunc_round:
-          case LibFunc_roundf:
-          case LibFunc_roundl:
-            Opcode = ISD::FROUND; break;
-          case LibFunc_trunc:
-          case LibFunc_truncf:
-          case LibFunc_truncl:
-            Opcode = ISD::FTRUNC; break;
-          case LibFunc_fmin:
-          case LibFunc_fminf:
-          case LibFunc_fminl:
-            Opcode = ISD::FMINNUM; break;
-          case LibFunc_fmax:
-          case LibFunc_fmaxf:
-          case LibFunc_fmaxl:
-            Opcode = ISD::FMAXNUM; break;
-          }
-        }
-
-        if (Opcode) {
-          EVT EVTy =
-              TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true);
-
-          if (EVTy == MVT::Other)
-            return true;
-
-          if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
-            continue;
-          else if (EVTy.isVector() &&
-                   TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
-            continue;
-
-          return true;
-        }
-      }
-
-      return true;
-    } else if (isa<BinaryOperator>(J) &&
-               J->getType()->getScalarType()->isPPC_FP128Ty()) {
-      // Most operations on ppc_f128 values become calls.
-      return true;
-    } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
-               isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
-      CastInst *CI = cast<CastInst>(J);
-      if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
-          CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
-          isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) ||
-          isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType()))
-        return true;
-    } else if (isLargeIntegerTy(!TM->isPPC64(),
-                                J->getType()->getScalarType()) &&
-               (J->getOpcode() == Instruction::UDiv ||
-                J->getOpcode() == Instruction::SDiv ||
-                J->getOpcode() == Instruction::URem ||
-                J->getOpcode() == Instruction::SRem)) {
-      return true;
-    } else if (!TM->isPPC64() &&
-               isLargeIntegerTy(false, J->getType()->getScalarType()) &&
-               (J->getOpcode() == Instruction::Shl ||
-                J->getOpcode() == Instruction::AShr ||
-                J->getOpcode() == Instruction::LShr)) {
-      // Only on PPC32, for 128-bit integers (specifically not 64-bit
-      // integers), these might be runtime calls.
-      return true;
-    } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
-      // On PowerPC, indirect jumps use the counter register.
-      return true;
-    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
-      if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
-        return true;
-    }
-
-    // FREM is always a call.
-    if (J->getOpcode() == Instruction::FRem)
-      return true;
-
-    if (STI->useSoftFloat()) {
-      switch(J->getOpcode()) {
-      case Instruction::FAdd:
-      case Instruction::FSub:
-      case Instruction::FMul:
-      case Instruction::FDiv:
-      case Instruction::FPTrunc:
-      case Instruction::FPExt:
-      case Instruction::FPToUI:
-      case Instruction::FPToSI:
-      case Instruction::UIToFP:
-      case Instruction::SIToFP:
-      case Instruction::FCmp:
-        return true;
-      }
-    }
-
-    for (Value *Operand : J->operands())
-      if (memAddrUsesCTR(*TM, Operand))
-        return true;
-  }
-
-  return false;
-}
-bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
-  bool MadeChange = false;
-
-  // Do not convert small short loops to CTR loop.
-  unsigned ConstTripCount = SE->getSmallConstantTripCount(L);
-  if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
-    SmallPtrSet<const Value *, 32> EphValues;
-    auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
-        *L->getHeader()->getParent());
-    CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
-    CodeMetrics Metrics;
-    for (BasicBlock *BB : L->blocks())
-      Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
-    // 6 is an approximate latency for the mtctr instruction.
-    if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
-      return false;
-  }
-
-  // Process nested loops first.
-  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
-    MadeChange |= convertToCTRLoop(*I);
-    LLVM_DEBUG(dbgs() << "Nested loop converted\n");
-  }
-
-  // If a nested loop has been converted, then we can't convert this loop.
-  if (MadeChange)
-    return MadeChange;
-
-  // Bail out if the loop has irreducible control flow.
-  LoopBlocksRPO RPOT(L);
-  RPOT.perform(LI);
-  if (containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI))
-    return false;
-
-#ifndef NDEBUG
-  // Stop trying after reaching the limit (if any).
-  int Limit = CTRLoopLimit;
-  if (Limit >= 0) {
-    if (Counter >= CTRLoopLimit)
-      return false;
-    Counter++;
-  }
-#endif
-
-  // We don't want to spill/restore the counter register, and so we don't
-  // want to use the counter register if the loop contains calls.
-  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
-       I != IE; ++I)
-    if (mightUseCTR(*I))
-      return MadeChange;
-
-  SmallVector<BasicBlock*, 4> ExitingBlocks;
-  L->getExitingBlocks(ExitingBlocks);
-
-  // If there is an exit edge known to be frequently taken,
-  // we should not transform this loop.
-  for (auto &BB : ExitingBlocks) {
-    Instruction *TI = BB->getTerminator();
-    if (!TI) continue;
-
-    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
-      uint64_t TrueWeight = 0, FalseWeight = 0;
-      if (!BI->isConditional() ||
-          !BI->extractProfMetadata(TrueWeight, FalseWeight))
-        continue;
-
-      // If the exit path is more frequent than the loop path,
-      // we return here without further analysis for this loop.
-      bool TrueIsExit = !L->contains(BI->getSuccessor(0));
-      if (( TrueIsExit && FalseWeight < TrueWeight) ||
-          (!TrueIsExit && FalseWeight > TrueWeight))
-        return MadeChange;
-    }
-  }
-
-  BasicBlock *CountedExitBlock = nullptr;
-  const SCEV *ExitCount = nullptr;
-  BranchInst *CountedExitBranch = nullptr;
-  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
-       IE = ExitingBlocks.end(); I != IE; ++I) {
-    const SCEV *EC = SE->getExitCount(L, *I);
-    LLVM_DEBUG(dbgs() << "Exit Count for " << *L << " from block "
-                      << (*I)->getName() << ": " << *EC << "\n");
-    if (isa<SCEVCouldNotCompute>(EC))
-      continue;
-    if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
-      if (ConstEC->getValue()->isZero())
-        continue;
-    } else if (!SE->isLoopInvariant(EC, L))
-      continue;
-
-    if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32))
-      continue;
-
-    // If this exiting block is contained in a nested loop, it is not eligible
-    // for insertion of the branch-and-decrement since the inner loop would
-    // end up messing up the value in the CTR.
-    if (LI->getLoopFor(*I) != L)
-      continue;
-
-    // We now have a loop-invariant count of loop iterations (which is not the
-    // constant zero) for which we know that this loop will not exit via this
-    // existing block.
-
-    // We need to make sure that this block will run on every loop iteration.
-    // For this to be true, we must dominate all blocks with backedges. Such
-    // blocks are in-loop predecessors to the header block.
-    bool NotAlways = false;
-    for (pred_iterator PI = pred_begin(L->getHeader()),
-         PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
-      if (!L->contains(*PI))
-        continue;
-
-      if (!DT->dominates(*I, *PI)) {
-        NotAlways = true;
-        break;
-      }
-    }
-
-    if (NotAlways)
-      continue;
-
-    // Make sure this blocks ends with a conditional branch.
-    Instruction *TI = (*I)->getTerminator();
-    if (!TI)
-      continue;
-
-    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
-      if (!BI->isConditional())
-        continue;
-
-      CountedExitBranch = BI;
-    } else
-      continue;
-
-    // Note that this block may not be the loop latch block, even if the loop
-    // has a latch block.
-    CountedExitBlock = *I;
-    ExitCount = EC;
-    break;
-  }
-
-  if (!CountedExitBlock)
-    return MadeChange;
-
-  BasicBlock *Preheader = L->getLoopPreheader();
-
-  // If we don't have a preheader, then insert one. If we already have a
-  // preheader, then we can use it (except if the preheader contains a use of
-  // the CTR register because some such uses might be reordered by the
-  // selection DAG after the mtctr instruction).
-  if (!Preheader || mightUseCTR(Preheader))
-    Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
-  if (!Preheader)
-    return MadeChange;
-
-  LLVM_DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName()
-                    << "\n");
-
-  // Insert the count into the preheader and replace the condition used by the
-  // selected branch.
-  MadeChange = true;
-
-  SCEVExpander SCEVE(*SE, *DL, "loopcnt");
-  LLVMContext &C = SE->getContext();
-  Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C);
-  if (!ExitCount->getType()->isPointerTy() &&
-      ExitCount->getType() != CountType)
-    ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
-  ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType));
-  Value *ECValue =
-      SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator());
-
-  IRBuilder<> CountBuilder(Preheader->getTerminator());
-  Module *M = Preheader->getParent()->getParent();
-  Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
-                                               CountType);
-  CountBuilder.CreateCall(MTCTRFunc, ECValue);
-
-  IRBuilder<> CondBuilder(CountedExitBranch);
-  Value *DecFunc =
-    Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
-  Value *NewCond = CondBuilder.CreateCall(DecFunc, {});
-  Value *OldCond = CountedExitBranch->getCondition();
-  CountedExitBranch->setCondition(NewCond);
-
-  // The false branch must exit the loop.
-  if (!L->contains(CountedExitBranch->getSuccessor(0)))
-    CountedExitBranch->swapSuccessors();
-
-  // The old condition may be dead now, and may have even created a dead PHI
-  // (the original induction variable).
-  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
-  // Run through the basic blocks of the loop and see if any of them have dead
-  // PHIs that can be removed.
-  for (auto I : L->blocks())
-    DeleteDeadPHIs(I);
-
-  ++NumCTRLoops;
-  return MadeChange;
-}
-
 #ifndef NDEBUG
 static bool clobbersCTR(const MachineInstr &MI) {
   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
diff --git a/lib/Target/PowerPC/PPCCallingConv.cpp b/lib/Target/PowerPC/PPCCallingConv.cpp
new file mode 100644
index 000000000000..77cdf5c939dc
--- /dev/null
+++ b/lib/Target/PowerPC/PPCCallingConv.cpp
@@ -0,0 +1,162 @@
+//===-- PPCCallingConv.h - --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCRegisterInfo.h"
+#include "PPCCallingConv.h"
+#include "PPCSubtarget.h"
+#include "PPCCCState.h"
+using namespace llvm;
+
+inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &,
+                                CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
+                                CCState &) {
+  llvm_unreachable("The AnyReg calling convention is only supported by the " \
+                   "stackmap and patchpoint intrinsics.");
+  // gracefully fallback to PPC C calling convention on Release builds.
+  return false;
+}
+
+static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                       CCValAssign::LocInfo &LocInfo,
+                                       ISD::ArgFlagsTy &ArgFlags,
+                                       CCState &State) {
+  return true;
+}
+
+static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+                                              MVT &LocVT,
+                                              CCValAssign::LocInfo &LocInfo,
+                                              ISD::ArgFlagsTy &ArgFlags,
+                                              CCState &State) {
+  static const MCPhysReg ArgRegs[] = {
+    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+  };
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
+
+  // Skip one register if the first unallocated register has an even register
+  // number and there are still argument registers available which have not been
+  // allocated yet. RegNum is actually an index into ArgRegs, which means we
+  // need to skip a register if RegNum is odd.
+  if (RegNum != NumArgRegs && RegNum % 2 == 1) {
+    State.AllocateReg(ArgRegs[RegNum]);
+  }
+
+  // Always return false here, as this function only makes sure that the first
+  // unallocated register has an odd register number and does not actually
+  // allocate a register for the current argument.
+  return false;
+}
+
+static bool CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(
+    unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+    ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  static const MCPhysReg ArgRegs[] = {
+    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+  };
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
+  int RegsLeft = NumArgRegs - RegNum;
+
+  // Skip if there is not enough registers left for long double type (4 gpr regs
+  // in soft float mode) and put long double argument on the stack.
+  if (RegNum != NumArgRegs && RegsLeft < 4) {
+    for (int i = 0; i < RegsLeft; i++) {
+      State.AllocateReg(ArgRegs[RegNum + i]);
+    }
+  }
+
+  return false;
+}
+
+static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+                                                MVT &LocVT,
+                                                CCValAssign::LocInfo &LocInfo,
+                                                ISD::ArgFlagsTy &ArgFlags,
+                                                CCState &State) {
+  static const MCPhysReg ArgRegs[] = {
+    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+    PPC::F8
+  };
+
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
+
+  // If there is only one Floating-point register left we need to put both f64
+  // values of a split ppc_fp128 value on the stack.
+  if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
+    State.AllocateReg(ArgRegs[RegNum]);
+  }
+
+  // Always return false here, as this function only makes sure that the two f64
+  // values a ppc_fp128 value is split into are both passed in registers or both
+  // passed on the stack and does not actually allocate a register for the
+  // current argument.
+  return false;
+}
+
+// Split F64 arguments into two 32-bit consecutive registers.
+static bool CC_PPC32_SPE_CustomSplitFP64(unsigned &ValNo, MVT &ValVT,
+                                        MVT &LocVT,
+                                        CCValAssign::LocInfo &LocInfo,
+                                        ISD::ArgFlagsTy &ArgFlags,
+                                        CCState &State) {
+  static const MCPhysReg HiRegList[] = { PPC::R3, PPC::R5, PPC::R7, PPC::R9 };
+  static const MCPhysReg LoRegList[] = { PPC::R4, PPC::R6, PPC::R8, PPC::R10 };
+
+  // Try to get the first register.
+  unsigned Reg = State.AllocateReg(HiRegList);
+  if (!Reg)
+    return false;
+
+  unsigned i;
+  for (i = 0; i < sizeof(HiRegList) / sizeof(HiRegList[0]); ++i)
+    if (HiRegList[i] == Reg)
+      break;
+
+  unsigned T = State.AllocateReg(LoRegList[i]);
+  (void)T;
+  assert(T == LoRegList[i] && "Could not allocate register");
+
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+                                         LocVT, LocInfo));
+  return true;
+}
+
+// Same as above, but for return values, so only allocate for R3 and R4
+static bool CC_PPC32_SPE_RetF64(unsigned &ValNo, MVT &ValVT,
+                               MVT &LocVT,
+                               CCValAssign::LocInfo &LocInfo,
+                               ISD::ArgFlagsTy &ArgFlags,
+                               CCState &State) {
+  static const MCPhysReg HiRegList[] = { PPC::R3 };
+  static const MCPhysReg LoRegList[] = { PPC::R4 };
+
+  // Try to get the first register.
+  unsigned Reg = State.AllocateReg(HiRegList, LoRegList);
+  if (!Reg)
+    return false;
+
+  unsigned i;
+  for (i = 0; i < sizeof(HiRegList) / sizeof(HiRegList[0]); ++i)
+    if (HiRegList[i] == Reg)
+      break;
+
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+                                         LocVT, LocInfo));
+  return true;
+}
+
+#include "PPCGenCallingConv.inc"
diff --git a/lib/Target/PowerPC/PPCCallingConv.h b/lib/Target/PowerPC/PPCCallingConv.h
index eb904a858592..03d9be0a73d9 100644
--- a/lib/Target/PowerPC/PPCCallingConv.h
+++ b/lib/Target/PowerPC/PPCCallingConv.h
@@ -1,9 +1,8 @@
 //=== PPCCallingConv.h - PPC Custom Calling Convention Routines -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -20,14 +19,27 @@
 
 namespace llvm {
 
-inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &,
-                                CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
-                                CCState &) {
-  llvm_unreachable("The AnyReg calling convention is only supported by the " \
-                   "stackmap and patchpoint intrinsics.");
-  // gracefully fallback to PPC C calling convention on Release builds.
-  return false;
-}
+bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT,
+               CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+               CCState &State);
+bool RetCC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT,
+                         CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                         CCState &State);
+bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT,
+                    CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                    CCState &State);
+bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT,
+                   CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                   CCState &State);
+bool CC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT,
+                      CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                      CCState &State);
+bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT,
+                         CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                         CCState &State);
+bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+                          CCValAssign::LocInfo LocInfo,
+                          ISD::ArgFlagsTy ArgFlags, CCState &State);
 
 } // End llvm namespace
 
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 22842d516e7d..369b9ce1a711 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -1,9 +1,8 @@
 //===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -46,6 +45,7 @@ def RetCC_PPC64_AnyReg : CallingConv<[
 ]>;
 
 // Return-value convention for PowerPC coldcc.
+let Entry = 1 in
 def RetCC_PPC_Cold : CallingConv<[
   // Use the same return registers as RetCC_PPC, but limited to only
   // one return value. The remaining return values will be saved to
@@ -70,6 +70,7 @@ def RetCC_PPC_Cold : CallingConv<[
 ]>;
 
 // Return-value convention for PowerPC
+let Entry = 1 in
 def RetCC_PPC : CallingConv<[
   CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>,
 
@@ -90,7 +91,7 @@ def RetCC_PPC : CallingConv<[
   CCIfSubtarget<"hasSPE()",
        CCIfType<[f32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>,
   CCIfSubtarget<"hasSPE()",
-       CCIfType<[f64], CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>,
+       CCIfType<[f64], CCCustom<"CC_PPC32_SPE_RetF64">>>,
 
   // For P9, f128 are passed in vector registers.
   CCIfType<[f128],
@@ -126,6 +127,7 @@ def CC_PPC64_AnyReg : CallingConv<[
 // Simple calling convention for 64-bit ELF PowerPC fast isel.
 // Only handle ints and floats.  All ints are promoted to i64.
 // Vector types and quadword ints are not handled.
+let Entry = 1 in
 def CC_PPC64_ELF_FIS : CallingConv<[
   CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_PPC64_AnyReg>>,
 
@@ -141,6 +143,7 @@ def CC_PPC64_ELF_FIS : CallingConv<[
 // All small ints are promoted to i64.  Vector types, quadword ints,
 // and multiple register returns are "supported" to avoid compile
 // errors, but none are handled by the fast selector.
+let Entry = 1 in
 def RetCC_PPC64_ELF_FIS : CallingConv<[
   CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>,
 
@@ -179,6 +182,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[
   CCIfType<[i32],
   CCIfSplit<CCIfNotSubtarget<"useSoftFloat()", 
                             CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>>,
+  CCIfType<[f64],
+  CCIfSubtarget<"hasSPE()",
+                CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>,
   CCIfSplit<CCIfSubtarget<"useSoftFloat()",
                           CCIfOrigArgWasPPCF128<CCCustom<
                           "CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128">>>>,
@@ -199,7 +205,7 @@ def CC_PPC32_SVR4_Common : CallingConv<[
                             CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>>,
   CCIfType<[f64],
            CCIfSubtarget<"hasSPE()",
-                         CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>,
+                         CCCustom<"CC_PPC32_SPE_CustomSplitFP64">>>,
   CCIfType<[f32],
            CCIfSubtarget<"hasSPE()",
                          CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>,
@@ -228,12 +234,14 @@ def CC_PPC32_SVR4_Common : CallingConv<[
 // This calling convention puts vector arguments always on the stack. It is used
 // to assign vector arguments which belong to the variable portion of the
 // parameter list of a variable argument function.
+let Entry = 1 in
 def CC_PPC32_SVR4_VarArg : CallingConv<[
   CCDelegateTo<CC_PPC32_SVR4_Common>
 ]>;
 
 // In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to
 // put vector arguments in vector registers before putting them on the stack.
+let Entry = 1 in
 def CC_PPC32_SVR4 : CallingConv<[
   // QPX vectors mirror the scalar FP convention.
   CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()",
@@ -265,6 +273,7 @@ def CC_PPC32_SVR4 : CallingConv<[
 // The only purpose of CC_PPC32_SVR4_Custom_Dummy is to skip arguments which are
 // not passed by value.
  
+let Entry = 1 in
 def CC_PPC32_SVR4_ByVal : CallingConv<[
   CCIfByVal<CCPassByVal<4, 4>>,
   
@@ -300,6 +309,13 @@ def CSR_SVR432_Altivec : CalleeSavedRegs<(add CSR_SVR432, CSR_Altivec)>;
 
 def CSR_SVR432_SPE : CalleeSavedRegs<(add CSR_SVR432_COMM, CSR_SPE)>;
 
+def CSR_AIX32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
+                                     R21, R22, R23, R24, R25, R26, R27, R28,
+                                     R29, R30, R31, F14, F15, F16, F17, F18,
+                                     F19, F20, F21, F22, F23, F24, F25, F26,
+                                     F27, F28, F29, F30, F31, CR2, CR3, CR4
+                                )>;
+
 def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20,
                                         X21, X22, X23, X24, X25, X26, X27, X28,
                                         X29, X30, X31, F14, F15, F16, F17, F18,
@@ -316,6 +332,13 @@ def CSR_SVR464   : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
                                         F27, F28, F29, F30, F31, CR2, CR3, CR4
                                    )>;
 
+def CSR_AIX64 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
+                                     X21, X22, X23, X24, X25, X26, X27, X28,
+                                     X29, X30, X31, F14, F15, F16, F17, F18,
+                                     F19, F20, F21, F22, F23, F24, F25, F26,
+                                     F27, F28, F29, F30, F31, CR2, CR3, CR4
+                                )>;
+
 // CSRs that are handled by prologue, epilogue.
 def CSR_SRV464_TLS_PE : CalleeSavedRegs<(add)>;
 
@@ -343,15 +366,22 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>;
 // and value may be altered by inter-library calls.
 // Do not include r12 as it is used as a scratch register.
 // Do not include return registers r3, f1, v2.
-def CSR_SVR32_ColdCC : CalleeSavedRegs<(add (sequence "R%u", 4, 10),
-                                          (sequence "R%u", 14, 31),
-                                          F0, (sequence "F%u", 2, 31),
-                                          (sequence "CR%u", 0, 7))>;
+def CSR_SVR32_ColdCC_Common : CalleeSavedRegs<(add (sequence "R%u", 4, 10),
+                                                (sequence "R%u", 14, 31),
+                                                (sequence "CR%u", 0, 7))>;
+
+def CSR_SVR32_ColdCC : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Common,
+                                          F0, (sequence "F%u", 2, 31))>;
+
 
 def CSR_SVR32_ColdCC_Altivec : CalleeSavedRegs<(add CSR_SVR32_ColdCC,
                                             (sequence "V%u", 0, 1),
                                             (sequence "V%u", 3, 31))>;
 
+def CSR_SVR32_ColdCC_SPE : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Common,
+                                            (sequence "S%u", 4, 10),
+                                            (sequence "S%u", 14, 31))>;
+
 def CSR_SVR64_ColdCC : CalleeSavedRegs<(add  (sequence "X%u", 4, 10),
                                              (sequence "X%u", 14, 31),
                                              F0, (sequence "F%u", 2, 31),
diff --git a/lib/Target/PowerPC/PPCEarlyReturn.cpp b/lib/Target/PowerPC/PPCEarlyReturn.cpp
index ac931f7d0ec0..aa5d830b549e 100644
--- a/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -1,9 +1,8 @@
 //===------------- PPCEarlyReturn.cpp - Form Early Returns ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -37,10 +36,6 @@ using namespace llvm;
 STATISTIC(NumBCLR, "Number of early conditional returns");
 STATISTIC(NumBLR,  "Number of early returns");
 
-namespace llvm {
-  void initializePPCEarlyReturnPass(PassRegistry&);
-}
-
 namespace {
   // PPCEarlyReturn pass - For simple functions without epilogue code, move
   // returns up, and create conditional returns, to avoid unnecessary
@@ -184,11 +179,11 @@ public:
       // nothing to do.
       if (MF.size() < 2)
         return Changed;
-
-      for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+      
+      // We can't use a range-based for loop due to clobbering the iterator.
+      for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E;) {
         MachineBasicBlock &B = *I++;
-        if (processBlock(B))
-          Changed = true;
+        Changed |= processBlock(B);
       }
 
       return Changed;
diff --git a/lib/Target/PowerPC/PPCExpandISEL.cpp b/lib/Target/PowerPC/PPCExpandISEL.cpp
index a03e691ef5bb..e8ef451c7ec9 100644
--- a/lib/Target/PowerPC/PPCExpandISEL.cpp
+++ b/lib/Target/PowerPC/PPCExpandISEL.cpp
@@ -1,9 +1,8 @@
 //===------------- PPCExpandISEL.cpp - Expand ISEL instruction ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 3b2d92db78b9..264d6b590f95 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -1,9 +1,8 @@
 //===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -152,6 +151,14 @@ class PPCFastISel final : public FastISel {
     bool isVSSRCRegClass(const TargetRegisterClass *RC) const {
       return RC->getID() == PPC::VSSRCRegClassID;
     }
+    unsigned copyRegToRegClass(const TargetRegisterClass *ToRC,
+                               unsigned SrcReg, unsigned Flag = 0,
+                               unsigned SubReg = 0) {
+      unsigned TmpReg = createResultReg(ToRC);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+              TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg, Flag, SubReg);
+      return TmpReg;
+    }
     bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
                     bool isZExt, unsigned DestReg,
                     const PPC::Predicate Pred);
@@ -187,7 +194,6 @@ class PPCFastISel final : public FastISel {
                          unsigned &NumBytes,
                          bool IsVarArg);
     bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
-    LLVM_ATTRIBUTE_UNUSED CCAssignFn *usePPC32CCs(unsigned Flag);
 
   private:
   #include "PPCGenFastISel.inc"
@@ -196,23 +202,6 @@ class PPCFastISel final : public FastISel {
 
 } // end anonymous namespace
 
-#include "PPCGenCallingConv.inc"
-
-// Function whose sole purpose is to kill compiler warnings
-// stemming from unused functions included from PPCGenCallingConv.inc.
-CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
-  if (Flag == 1)
-    return CC_PPC32_SVR4;
-  else if (Flag == 2)
-    return CC_PPC32_SVR4_ByVal;
-  else if (Flag == 3)
-    return CC_PPC32_SVR4_VarArg;
-  else if (Flag == 4)
-    return RetCC_PPC_Cold;
-  else
-    return RetCC_PPC;
-}
-
 static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
   switch (Pred) {
     // These are not representable with any single compare.
@@ -874,7 +863,10 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
 
   unsigned CmpOpc;
   bool NeedsExt = false;
-  auto RC = MRI.getRegClass(SrcReg1);
+
+  auto RC1 = MRI.getRegClass(SrcReg1);
+  auto RC2 = SrcReg2 != 0 ? MRI.getRegClass(SrcReg2) : nullptr;
+
   switch (SrcVT.SimpleTy) {
     default: return false;
     case MVT::f32:
@@ -893,12 +885,10 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
         }
       } else {
         CmpOpc = PPC::FCMPUS;
-        if (isVSSRCRegClass(RC)) {
-          unsigned TmpReg = createResultReg(&PPC::F4RCRegClass);
-          BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-                  TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg1);
-          SrcReg1 = TmpReg;
-        }
+        if (isVSSRCRegClass(RC1))
+          SrcReg1 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg1);
+        if (RC2 && isVSSRCRegClass(RC2))
+          SrcReg2 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg2);
       }
       break;
     case MVT::f64:
@@ -915,7 +905,7 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
             CmpOpc = PPC::EFDCMPGT;
             break;
         }
-      } else if (isVSFRCRegClass(RC)) {
+      } else if (isVSFRCRegClass(RC1) || (RC2 && isVSFRCRegClass(RC2))) {
         CmpOpc = PPC::XSCMPUDP;
       } else {
         CmpOpc = PPC::FCMPUD;
@@ -997,12 +987,17 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
 
   // Round the result to single precision.
   unsigned DestReg;
-
+  auto RC = MRI.getRegClass(SrcReg);
   if (PPCSubTarget->hasSPE()) {
     DestReg = createResultReg(&PPC::SPE4RCRegClass);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
       TII.get(PPC::EFSCFD), DestReg)
       .addReg(SrcReg);
+  } else if (isVSFRCRegClass(RC)) {
+    DestReg = createResultReg(&PPC::VSSRCRegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+      TII.get(PPC::XSRSP), DestReg)
+      .addReg(SrcReg);
   } else {
     DestReg = createResultReg(&PPC::F4RCRegClass);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1217,21 +1212,19 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
   if (SrcReg == 0)
     return false;
 
-  // Convert f32 to f64 if necessary.  This is just a meaningless copy
-  // to get the register class right.
+  // Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
+  // meaningless copy to get the register class right.
   const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
-  if (InRC == &PPC::F4RCRegClass) {
-    unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-            TII.get(TargetOpcode::COPY), TmpReg)
-      .addReg(SrcReg);
-    SrcReg = TmpReg;
-  }
+  if (InRC == &PPC::F4RCRegClass)
+    SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
+  else if (InRC == &PPC::VSSRCRegClass)
+    SrcReg = copyRegToRegClass(&PPC::VSFRCRegClass, SrcReg);
 
   // Determine the opcode for the conversion, which takes place
-  // entirely within FPRs.
+  // entirely within FPRs or VSRs.
   unsigned DestReg;
   unsigned Opc;
+  auto RC = MRI.getRegClass(SrcReg);
 
   if (PPCSubTarget->hasSPE()) {
     DestReg = createResultReg(&PPC::GPRCRegClass);
@@ -1239,6 +1232,12 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
       Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
     else
       Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
+  } else if (isVSFRCRegClass(RC)) {
+    DestReg = createResultReg(&PPC::VSFRCRegClass);
+    if (DstVT == MVT::i32) 
+      Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
+    else
+      Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
   } else {
     DestReg = createResultReg(&PPC::F8RCRegClass);
     if (DstVT == MVT::i32)
@@ -1520,11 +1519,7 @@ bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumByte
 
     if (RetVT == CopyVT) {
       const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
-      ResultReg = createResultReg(CpyRC);
-
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-              TII.get(TargetOpcode::COPY), ResultReg)
-        .addReg(SourcePhysReg);
+      ResultReg = copyRegToRegClass(CpyRC, SourcePhysReg);
 
     // If necessary, round the floating result to single precision.
     } else if (CopyVT == MVT::f64) {
@@ -1537,12 +1532,9 @@ bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumByte
     // used along the fast-isel path (not lowered), and downstream logic
     // also doesn't like a direct subreg copy on a physical reg.)
     } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
-      ResultReg = createResultReg(&PPC::GPRCRegClass);
       // Convert physical register from G8RC to GPRC.
       SourcePhysReg -= PPC::X0 - PPC::R0;
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-              TII.get(TargetOpcode::COPY), ResultReg)
-        .addReg(SourcePhysReg);
+      ResultReg = copyRegToRegClass(&PPC::GPRCRegClass, SourcePhysReg);
     }
 
     assert(ResultReg && "ResultReg unset!");
@@ -1894,13 +1886,8 @@ bool PPCFastISel::SelectTrunc(const Instruction *I) {
     return false;
 
   // The only interesting case is when we need to switch register classes.
-  if (SrcVT == MVT::i64) {
-    unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-            TII.get(TargetOpcode::COPY),
-            ResultReg).addReg(SrcReg, 0, PPC::sub_32);
-    SrcReg = ResultReg;
-  }
+  if (SrcVT == MVT::i64)
+    SrcReg = copyRegToRegClass(&PPC::GPRCRegClass, SrcReg, 0, PPC::sub_32);
 
   updateValueMap(I, SrcReg);
   return true;
@@ -1977,6 +1964,13 @@ bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
     case Instruction::Sub:
       return SelectBinaryIntOp(I, ISD::SUB);
     case Instruction::Call:
+      // On AIX, call lowering uses the DAG-ISEL path currently so that the
+      // callee of the direct function call instruction will be mapped to the
+      // symbol for the function's entry point, which is distinct from the
+      // function descriptor symbol. The latter is the symbol whose XCOFF symbol
+      // name is the C-linkage name of the source level function.
+      if (TM.getTargetTriple().isOSAIX())
+        break;
       return selectCall(I);
     case Instruction::Ret:
       return SelectRet(I);
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 8263954994d2..ebfb1ef7f49b 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1,9 +1,8 @@
 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -30,7 +29,6 @@
 using namespace llvm;
 
 #define DEBUG_TYPE "framelowering"
-STATISTIC(NumNoNeedForFrame, "Number of functions without frames");
 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
 
@@ -73,10 +71,10 @@ static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
 }
 
 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
-  if (STI.isDarwinABI() || STI.isPPC64())
+  if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64())
     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
 
-  // SVR4 ABI:
+  // 32-bit SVR4 ABI:
   return 8;
 }
 
@@ -446,12 +444,27 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
 }
 
+/// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
+/// call frame size. Update the MachineFunction object with the stack size.
+unsigned
+PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
+                                                bool UseEstimate) const {
+  unsigned NewMaxCallFrameSize = 0;
+  unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
+                                            &NewMaxCallFrameSize);
+  MF.getFrameInfo().setStackSize(FrameSize);
+  MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
+  return FrameSize;
+}
+
 /// determineFrameLayout - Determine the size of the frame and maximum call
 /// frame size.
-unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
-                                                bool UpdateMF,
-                                                bool UseEstimate) const {
-  MachineFrameInfo &MFI = MF.getFrameInfo();
+unsigned
+PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
+                                       bool UseEstimate,
+                                       unsigned *NewMaxCallFrameSize) const {
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
 
   // Get the number of bytes to allocate from the FrameInfo
   unsigned FrameSize =
@@ -469,6 +482,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
                        !MFI.adjustsStack() &&       // No calls.
                        !MustSaveLR(MF, LR) &&       // No need to save LR.
+                       !FI->mustSaveTOC() &&        // No need to save TOC.
                        !RegInfo->hasBasePointer(MF); // No special alignment.
 
   // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
@@ -477,10 +491,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
 
   // Check whether we can skip adjusting the stack pointer (by using red zone)
   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
-    NumNoNeedForFrame++;
     // No need for frame
-    if (UpdateMF)
-      MFI.setStackSize(0);
     return 0;
   }
 
@@ -496,9 +507,9 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
   if (MFI.hasVarSizedObjects())
     maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
 
-  // Update maximum call frame size.
-  if (UpdateMF)
-    MFI.setMaxCallFrameSize(maxCallFrameSize);
+  // Update the new max call frame size if the caller passes in a valid pointer.
+  if (NewMaxCallFrameSize)
+    *NewMaxCallFrameSize = maxCallFrameSize;
 
   // Include call frame size in total.
   FrameSize += maxCallFrameSize;
@@ -506,10 +517,6 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
   // Make sure the frame is aligned.
   FrameSize = (FrameSize + AlignMask) & ~AlignMask;
 
-  // Update frame info.
-  if (UpdateMF)
-    MFI.setStackSize(FrameSize);
-
   return FrameSize;
 }
 
@@ -690,7 +697,7 @@ PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
   MachineFunction &MF = *(MBB->getParent());
   bool HasBP = RegInfo->hasBasePointer(MF);
-  unsigned FrameSize = determineFrameLayout(MF, false);
+  unsigned FrameSize = determineFrameLayout(MF);
   int NegFrameSize = -FrameSize;
   bool IsLargeFrame = !isInt<16>(NegFrameSize);
   MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -713,6 +720,50 @@ bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
   return findScratchRegister(TmpMBB, true);
 }
 
+bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
+  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+
+  // Abort if there is no register info or function info.
+  if (!RegInfo || !FI)
+    return false;
+
+  // Only move the stack update on ELFv2 ABI and PPC64.
+  if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
+    return false;
+
+  // Check the frame size first and return false if it does not fit the
+  // requirements.
+  // We need a non-zero frame size as well as a frame that will fit in the red
+  // zone. This is because by moving the stack pointer update we are now storing
+  // to the red zone until the stack pointer is updated. If we get an interrupt
+  // inside the prologue but before the stack update we now have a number of
+  // stores to the red zone and those stores must all fit.
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  unsigned FrameSize = MFI.getStackSize();
+  if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
+    return false;
+
+  // Frame pointers and base pointers complicate matters so don't do anything
+  // if we have them. For example having a frame pointer will sometimes require
+  // a copy of r1 into r31 and that makes keeping track of updates to r1 more
+  // difficult.
+  if (hasFP(MF) || RegInfo->hasBasePointer(MF))
+    return false;
+
+  // Calls to fast_cc functions use different rules for passing parameters on
+  // the stack from the ABI and using PIC base in the function imposes
+  // similar restrictions to using the base pointer. It is not generally safe
+  // to move the stack pointer update in these situations.
+  if (FI->hasFastCall() || FI->usesPICBase())
+    return false;
+
+  // Finally we can move the stack update if we do not require register
+  // scavenging. Register scavenging can introduce more spills and so
+  // may make the frame size larger than we have computed.
+  return !RegInfo->requiresFrameIndexScavenging(MF);
+}
+
 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -748,7 +799,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
   MBBI = MBB.begin();
 
   // Work out frame sizes.
-  unsigned FrameSize = determineFrameLayout(MF);
+  unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
   int NegFrameSize = -FrameSize;
   if (!isInt<32>(NegFrameSize))
     llvm_unreachable("Unhandled stack size!");
@@ -759,6 +810,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
   // Check if the link register (LR) must be saved.
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   bool MustSaveLR = FI->mustSaveLR();
+  bool MustSaveTOC = FI->mustSaveTOC();
   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
   bool MustSaveCR = !MustSaveCRs.empty();
   // Do we have a frame pointer and/or base pointer for this function?
@@ -770,6 +822,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
   unsigned BPReg       = RegInfo->getBaseRegister(MF);
   unsigned FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
   unsigned LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
+  unsigned TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
   unsigned ScratchReg  = 0;
   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
@@ -855,6 +908,45 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
   assert((isPPC64 || !MustSaveCR) &&
          "Prologue CR saving supported only in 64-bit mode");
 
+  // Check if we can move the stack update instruction (stdu) down the prologue
+  // past the callee saves. Hopefully this will avoid the situation where the
+  // saves are waiting for the update on the store with update to complete.
+  MachineBasicBlock::iterator StackUpdateLoc = MBBI;
+  bool MovingStackUpdateDown = false;
+
+  // Check if we can move the stack update.
+  if (stackUpdateCanBeMoved(MF)) {
+    const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
+    for (CalleeSavedInfo CSI : Info) {
+      int FrIdx = CSI.getFrameIdx();
+      // If the frame index is not negative the callee saved info belongs to a
+      // stack object that is not a fixed stack object. We ignore non-fixed
+      // stack objects because we won't move the stack update pointer past them.
+      if (FrIdx >= 0)
+        continue;
+
+      if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
+        StackUpdateLoc++;
+        MovingStackUpdateDown = true;
+      } else {
+        // We need all of the Frame Indices to meet these conditions.
+        // If they do not, abort the whole operation.
+        StackUpdateLoc = MBBI;
+        MovingStackUpdateDown = false;
+        break;
+      }
+    }
+
+    // If the operation was not aborted then update the object offset.
+    if (MovingStackUpdateDown) {
+      for (CalleeSavedInfo CSI : Info) {
+        int FrIdx = CSI.getFrameIdx();
+        if (FrIdx < 0)
+          MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
+      }
+    }
+  }
+
   // If we need to spill the CR and the LR but we don't have two separate
   // registers available, we must spill them one at a time
   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
@@ -918,7 +1010,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
   }
 
   if (MustSaveLR)
-    BuildMI(MBB, MBBI, dl, StoreInst)
+    BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
       .addReg(ScratchReg, getKillRegState(true))
       .addImm(LROffset)
       .addReg(SPReg);
@@ -986,7 +1078,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
     HasSTUX = true;
 
   } else if (!isLargeFrame) {
-    BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg)
+    BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
       .addReg(SPReg)
       .addImm(NegFrameSize)
       .addReg(SPReg);
@@ -1004,6 +1096,16 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
     HasSTUX = true;
   }
 
+  // Save the TOC register after the stack pointer update if a prologue TOC
+  // save is required for the function.
+  if (MustSaveTOC) {
+    assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
+    BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
+      .addReg(TOCReg, getKillRegState(true))
+      .addImm(TOCSaveOffset)
+      .addReg(SPReg);
+  }
+
   if (!HasRedZone) {
     assert(!isPPC64 && "A red zone is always available on PPC64");
     if (HasSTUX) {
@@ -1205,6 +1307,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
       if (PPC::CRBITRCRegClass.contains(Reg))
         continue;
 
+      if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
+        continue;
+
       // For SVR4, don't emit a move for the CR spill slot if we haven't
       // spilled CRs.
       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
@@ -1234,6 +1339,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
           .addCFIIndex(CFIRegister);
       } else {
         int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
+        // We have changed the object offset above but we do not want to change
+        // the actual offsets in the CFI instruction so we have to undo the
+        // offset change here.
+        if (MovingStackUpdateDown)
+          Offset -= NegFrameSize;
+
         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -1380,6 +1491,32 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
   unsigned RBReg = SPReg;
   unsigned SPAdd = 0;
 
+  // Check if we can move the stack update instruction up the epilogue
+  // past the callee saves. This will allow the move to LR instruction
+  // to be executed before the restores of the callee saves which means
+  // that the callee saves can hide the latency from the MTLR instrcution.
+  MachineBasicBlock::iterator StackUpdateLoc = MBBI;
+  if (stackUpdateCanBeMoved(MF)) {
+    const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
+    for (CalleeSavedInfo CSI : Info) {
+      int FrIdx = CSI.getFrameIdx();
+      // If the frame index is not negative the callee saved info belongs to a
+      // stack object that is not a fixed stack object. We ignore non-fixed
+      // stack objects because we won't move the update of the stack pointer
+      // past them.
+      if (FrIdx >= 0)
+        continue;
+
+      if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
+        StackUpdateLoc--;
+      else {
+        // Abort the operation as we can't update all CSR restores.
+        StackUpdateLoc = MBBI;
+        break;
+      }
+    }
+  }
+
   if (FrameSize) {
     // In the prologue, the loaded (or persistent) stack pointer value is
     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
@@ -1409,7 +1546,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
       }
     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
       if (HasRedZone) {
-        BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+        BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
           .addReg(SPReg)
           .addImm(FrameSize);
       } else {
@@ -1433,7 +1570,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
             .addReg(FPReg);
         RBReg = FPReg;
       }
-      BuildMI(MBB, MBBI, dl, LoadInst, RBReg)
+      BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
         .addImm(0)
         .addReg(SPReg);
     }
@@ -1466,7 +1603,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
   // a base register anyway, because it may happen to be R0.
   bool LoadedLR = false;
   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
-    BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
+    BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
       .addImm(LROffset+SPAdd)
       .addReg(RBReg);
     LoadedLR = true;
@@ -1538,7 +1675,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
         .addReg(TempReg, getKillRegState(i == e-1));
 
   if (MustSaveLR)
-    BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg);
+    BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
 
   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
   // call optimization
@@ -1732,6 +1869,9 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
 
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     unsigned Reg = CSI[i].getReg();
+    assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
+            (Reg != PPC::X2 && Reg != PPC::R2)) &&
+           "Not expecting to try to spill R2 in a function that must save TOC");
     if (PPC::GPRCRegClass.contains(Reg) ||
         PPC::SPE4RCRegClass.contains(Reg)) {
       HasGPSaveArea = true;
@@ -1947,7 +2087,7 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
   // the 16-bit immediate. We don't know the complete frame size here
   // because we've not yet computed callee-saved register spills or the
   // needed alignment padding.
-  unsigned StackSize = determineFrameLayout(MF, false, true);
+  unsigned StackSize = determineFrameLayout(MF, true);
   MachineFrameInfo &MFI = MF.getFrameInfo();
   if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
       hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
@@ -2041,6 +2181,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   MachineFunction *MF = MBB.getParent();
   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+  PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+  bool MustSaveTOC = FI->mustSaveTOC();
   DebugLoc DL;
   bool CRSpilled = false;
   MachineInstrBuilder CRMIB;
@@ -2071,6 +2213,10 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
       continue;
     }
 
+    // The actual spill will happen in the prologue.
+    if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
+      continue;
+
     // Insert the spill to the stack frame.
     if (IsCRField) {
       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
@@ -2198,6 +2344,8 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   MachineFunction *MF = MBB.getParent();
   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+  PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+  bool MustSaveTOC = FI->mustSaveTOC();
   bool CR2Spilled = false;
   bool CR3Spilled = false;
   bool CR4Spilled = false;
@@ -2220,6 +2368,9 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
     if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
       continue;
 
+    if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
+      continue;
+
     if (Reg == PPC::CR2) {
       CR2Spilled = true;
       // The spill slot is associated only with CR2, which is the
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 69bd1484d6e5..d116e9fd22e1 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -1,9 +1,8 @@
 //===-- PPCFrameLowering.h - Define frame lowering for PowerPC --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,7 +12,6 @@
 #ifndef LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H
 #define LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H
 
-#include "PPC.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
@@ -73,12 +71,29 @@ class PPCFrameLowering: public TargetFrameLowering {
    */
   void createTailCallBranchInstr(MachineBasicBlock &MBB) const;
 
+  /**
+    * Check if the conditions are correct to allow for the stack update
+    * to be moved past the CSR save/restore code.
+    */
+  bool stackUpdateCanBeMoved(MachineFunction &MF) const;
+
 public:
   PPCFrameLowering(const PPCSubtarget &STI);
 
-  unsigned determineFrameLayout(MachineFunction &MF,
-                                bool UpdateMF = true,
-                                bool UseEstimate = false) const;
+  /**
+   * Determine the frame layout and update the machine function.
+   */
+  unsigned determineFrameLayoutAndUpdate(MachineFunction &MF,
+                                         bool UseEstimate = false) const;
+
+  /**
+   * Determine the frame layout but do not update the machine function.
+   * The MachineFunction object can be const in this case as it is not
+   * modified.
+   */
+  unsigned determineFrameLayout(const MachineFunction &MF,
+                                bool UseEstimate = false,
+                                unsigned *NewMaxCallFrameSize = nullptr) const;
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 5f6966cecd61..391ebcc1a143 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -1,9 +1,8 @@
 //===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,9 +11,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCHazardRecognizers.h"
-#include "PPC.h"
 #include "PPCInstrInfo.h"
-#include "PPCTargetMachine.h"
+#include "PPCSubtarget.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index 4b502147ca63..5b32147ca88d 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -1,9 +1,8 @@
 //===-- PPCHazardRecognizers.h - PowerPC Hazard Recognizers -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 31acd0ff870f..543cac075f55 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -219,13 +218,6 @@ namespace {
     SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
                      const SDLoc &dl);
 
-    /// SelectAddrImm - Returns true if the address N can be represented by
-    /// a base register plus a signed 16-bit displacement [r+imm].
-    bool SelectAddrImm(SDValue N, SDValue &Disp,
-                       SDValue &Base) {
-      return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
-    }
-
     /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
     /// immediate field.  Note that the operand at this point is already the
     /// result of a prior SelectAddressRegImm call.
@@ -239,26 +231,61 @@ namespace {
       return false;
     }
 
-    /// SelectAddrIdx - Given the specified addressed, check to see if it can be
-    /// represented as an indexed [r+r] operation.  Returns false if it can
-    /// be represented by [r+imm], which are preferred.
+    /// SelectAddrIdx - Given the specified address, check to see if it can be
+    /// represented as an indexed [r+r] operation.
+    /// This is for xform instructions whose associated displacement form is D.
+    /// The last parameter \p 0 means associated D form has no requirment for 16
+    /// bit signed displacement.
+    /// Returns false if it can be represented by [r+imm], which are preferred.
     bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
-      return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG);
+      return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 0);
+    }
+
+    /// SelectAddrIdx4 - Given the specified address, check to see if it can be
+    /// represented as an indexed [r+r] operation.
+    /// This is for xform instructions whose associated displacement form is DS.
+    /// The last parameter \p 4 means associated DS form 16 bit signed
+    /// displacement must be a multiple of 4.
+    /// Returns false if it can be represented by [r+imm], which are preferred.
+    bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
+      return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 4);
+    }
+
+    /// SelectAddrIdx16 - Given the specified address, check to see if it can be
+    /// represented as an indexed [r+r] operation.
+    /// This is for xform instructions whose associated displacement form is DQ.
+    /// The last parameter \p 16 means associated DQ form 16 bit signed
+    /// displacement must be a multiple of 16.
+    /// Returns false if it can be represented by [r+imm], which are preferred.
+    bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
+      return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 16);
     }
 
-    /// SelectAddrIdxOnly - Given the specified addressed, force it to be
+    /// SelectAddrIdxOnly - Given the specified address, force it to be
     /// represented as an indexed [r+r] operation.
     bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
       return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
     }
+    
+    /// SelectAddrImm - Returns true if the address N can be represented by
+    /// a base register plus a signed 16-bit displacement [r+imm].
+    /// The last parameter \p 0 means D form has no requirment for 16 bit signed
+    /// displacement.
+    bool SelectAddrImm(SDValue N, SDValue &Disp,
+                       SDValue &Base) {
+      return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
+    }
 
     /// SelectAddrImmX4 - Returns true if the address N can be represented by
-    /// a base register plus a signed 16-bit displacement that is a multiple of 4.
-    /// Suitable for use by STD and friends.
+    /// a base register plus a signed 16-bit displacement that is a multiple of
+    /// 4 (last parameter). Suitable for use by STD and friends.
     bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
       return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4);
     }
 
+    /// SelectAddrImmX16 - Returns true if the address N can be represented by
+    /// a base register plus a signed 16-bit displacement that is a multiple of
+    /// 16(last parameter). Suitable for use by STXV and friends.
     bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
       return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16);
     }
@@ -412,7 +439,8 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
     if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
       if (PPCSubTarget->isTargetELF()) {
         GlobalBaseReg = PPC::R30;
-        if (M->getPICLevel() == PICLevel::SmallPIC) {
+        if (!PPCSubTarget->isSecurePlt() &&
+            M->getPICLevel() == PICLevel::SmallPIC) {
           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
           MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
@@ -2373,7 +2401,7 @@ public:
 
   // Here we try to match complex bit permutations into a set of
   // rotate-and-shift/shift/and/or instructions, using a set of heuristics
-  // known to produce optimial code for common cases (like i32 byte swapping).
+  // known to produce optimal code for common cases (like i32 byte swapping).
   SDNode *Select(SDNode *N) {
     Memoizer.clear();
     auto Result =
@@ -4214,12 +4242,12 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
 
   // Without this setb optimization, the outer SELECT_CC will be manually
   // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
-  // transforms pseduo instruction to isel instruction. When there are more than
+  // transforms pseudo instruction to isel instruction. When there are more than
   // one use for result like zext/sext, with current optimization we only see
   // isel is replaced by setb but can't see any significant gain. Since
   // setb has longer latency than original isel, we should avoid this. Another
   // point is that setb requires comparison always kept, it can break the
-  // oppotunity to get the comparison away if we have in future.
+  // opportunity to get the comparison away if we have in future.
   if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
     return false;
 
@@ -4354,13 +4382,23 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
     if (trySETCC(N))
       return;
     break;
-
-  case PPCISD::CALL: {
-    const Module *M = MF->getFunction().getParent();
-
+  // These nodes will be transformed into GETtlsADDR32 node, which
+  // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
+  case PPCISD::ADDI_TLSLD_L_ADDR:
+  case PPCISD::ADDI_TLSGD_L_ADDR: {
+    const Module *Mod = MF->getFunction().getParent();
     if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
         !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() ||
-        M->getPICLevel() == PICLevel::SmallPIC)
+        Mod->getPICLevel() == PICLevel::SmallPIC)
+      break;
+    // Attach global base pointer on GETtlsADDR32 node in order to
+    // generate secure plt code for TLS symbols.
+    getGlobalBaseReg();
+  } break;
+  case PPCISD::CALL: {
+    if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
+        !TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt() ||
+        !PPCSubTarget->isTargetELF())
       break;
 
     SDValue Op = N->getOperand(1);
@@ -5305,7 +5343,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
     SDValue V = Queue.pop_back_val();
 
     for (const SDValue &O : V.getNode()->ops()) {
-      unsigned b;
+      unsigned b = 0;
       uint64_t M = 0, A = 0;
       SDValue OLHS, ORHS;
       if (O.getOpcode() == ISD::OR) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 39608cb74bee..24d50074860d 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1,9 +1,8 @@
 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -45,6 +44,7 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
@@ -70,8 +70,10 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
 #include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSymbolXCOFF.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/Casting.h"
@@ -111,6 +113,9 @@ cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
 static cl::opt<bool> DisableSCO("disable-ppc-sco",
 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
 
+static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
+cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
+
 static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
 cl::desc("enable quad precision float support on ppc"), cl::Hidden);
 
@@ -119,6 +124,8 @@ STATISTIC(NumSiblingCalls, "Number of sibling calls");
 
 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
 
+static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
+
 // FIXME: Remove this once the bug has been fixed!
 extern cl::opt<bool> ANDIGlueBug;
 
@@ -550,7 +557,18 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       // add/sub are legal for all supported vector VT's.
       setOperationAction(ISD::ADD, VT, Legal);
       setOperationAction(ISD::SUB, VT, Legal);
-      setOperationAction(ISD::ABS, VT, Custom);
+
+      // For v2i64, these are only valid with P8Vector. This is corrected after
+      // the loop.
+      setOperationAction(ISD::SMAX, VT, Legal);
+      setOperationAction(ISD::SMIN, VT, Legal);
+      setOperationAction(ISD::UMAX, VT, Legal);
+      setOperationAction(ISD::UMIN, VT, Legal);
+
+      if (Subtarget.hasVSX()) {
+        setOperationAction(ISD::FMAXNUM, VT, Legal);
+        setOperationAction(ISD::FMINNUM, VT, Legal);
+      }
 
       // Vector instructions introduced in P8
       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
@@ -635,11 +653,28 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
       }
     }
+    if (!Subtarget.hasP8Vector()) {
+      setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
+      setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
+      setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
+      setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
+    }
+
+    for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
+      setOperationAction(ISD::ABS, VT, Custom);
 
     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
     // with merges, splats, etc.
     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
 
+    // Vector truncates to sub-word integer that fit in an Altivec/VSX register
+    // are cheap, so handle them before they get expanded to scalar.
+    setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
+    setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
+    setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
+    setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
+    setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
+
     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
@@ -804,6 +839,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
+      setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
+      setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
 
       if (Subtarget.hasDirectMove())
         setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
@@ -866,6 +903,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
         setOperationAction(ISD::FPOWI, MVT::f128, Expand);
         setOperationAction(ISD::FREM, MVT::f128, Expand);
       }
+      setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
 
     }
 
@@ -1060,6 +1098,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   setTargetDAGCombine(ISD::SHL);
   setTargetDAGCombine(ISD::SRA);
   setTargetDAGCombine(ISD::SRL);
+  setTargetDAGCombine(ISD::MUL);
   setTargetDAGCombine(ISD::SINT_TO_FP);
   setTargetDAGCombine(ISD::BUILD_VECTOR);
   if (Subtarget.hasFPCVT())
@@ -1232,22 +1271,6 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
   return Align;
 }
 
-unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
-                                                          CallingConv:: ID CC,
-                                                          EVT VT) const {
-  if (Subtarget.hasSPE() && VT == MVT::f64)
-    return 2;
-  return PPCTargetLowering::getNumRegisters(Context, VT);
-}
-
-MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
-                                                     CallingConv:: ID CC,
-                                                     EVT VT) const {
-  if (Subtarget.hasSPE() && VT == MVT::f64)
-    return MVT::i32;
-  return PPCTargetLowering::getRegisterType(Context, VT);
-}
-
 bool PPCTargetLowering::useSoftFloat() const {
   return Subtarget.useSoftFloat();
 }
@@ -1256,6 +1279,10 @@ bool PPCTargetLowering::hasSPE() const {
   return Subtarget.hasSPE();
 }
 
+bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
+  return VT.isScalarInteger();
+}
+
 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch ((PPCISD::NodeType)Opcode) {
   case PPCISD::FIRST_NUMBER:    break;
@@ -1365,7 +1392,11 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::QBFLT:           return "PPCISD::QBFLT";
   case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
   case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
+  case PPCISD::BUILD_SPE64:     return "PPCISD::BUILD_SPE64";
+  case PPCISD::EXTRACT_SPE:     return "PPCISD::EXTRACT_SPE";
   case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI";
+  case PPCISD::LD_VSX_LH:       return "PPCISD::LD_VSX_LH";
+  case PPCISD::FP_EXTEND_LH:    return "PPCISD::FP_EXTEND_LH";
   }
   return nullptr;
 }
@@ -2202,16 +2233,43 @@ bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
   return isIntS16Immediate(Op.getNode(), Imm);
 }
 
+
+/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
+/// be represented as an indexed [r+r] operation.
+bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
+                                               SDValue &Index,
+                                               SelectionDAG &DAG) const {
+  for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+      UI != E; ++UI) {
+    if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
+      if (Memop->getMemoryVT() == MVT::f64) {
+          Base = N.getOperand(0);
+          Index = N.getOperand(1);
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
 /// SelectAddressRegReg - Given the specified addressed, check to see if it
 /// can be represented as an indexed [r+r] operation.  Returns false if it
-/// can be more efficiently represented with [r+imm].
+/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
+/// non-zero and N can be represented by a base register plus a signed 16-bit
+/// displacement, make a more precise judgement by checking (displacement % \p
+/// EncodingAlignment).
 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
-                                            SDValue &Index,
-                                            SelectionDAG &DAG) const {
+                                            SDValue &Index, SelectionDAG &DAG,
+                                            unsigned EncodingAlignment) const {
   int16_t imm = 0;
   if (N.getOpcode() == ISD::ADD) {
-    if (isIntS16Immediate(N.getOperand(1), imm))
-      return false;    // r+i
+    // Is there any SPE load/store (f64), which can't handle 16bit offset?
+    // SPE load/store can only handle 8-bit offsets.
+    if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
+        return true;
+    if (isIntS16Immediate(N.getOperand(1), imm) &&
+        (!EncodingAlignment || !(imm % EncodingAlignment)))
+      return false; // r+i
     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
       return false;    // r+i
 
@@ -2219,8 +2277,9 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
     Index = N.getOperand(1);
     return true;
   } else if (N.getOpcode() == ISD::OR) {
-    if (isIntS16Immediate(N.getOperand(1), imm))
-      return false;    // r+i can fold it if we can.
+    if (isIntS16Immediate(N.getOperand(1), imm) &&
+        (!EncodingAlignment || !(imm % EncodingAlignment)))
+      return false; // r+i can fold it if we can.
 
     // If this is an or of disjoint bitfields, we can codegen this as an add
     // (for better address arithmetic) if the LHS and RHS of the OR are provably
@@ -2284,22 +2343,22 @@ static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
 
 /// Returns true if the address N can be represented by a base register plus
 /// a signed 16-bit displacement [r+imm], and if it is not better
-/// represented as reg+reg.  If \p Alignment is non-zero, only accept
+/// represented as reg+reg.  If \p EncodingAlignment is non-zero, only accept
 /// displacements that are multiples of that value.
 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
                                             SDValue &Base,
                                             SelectionDAG &DAG,
-                                            unsigned Alignment) const {
+                                            unsigned EncodingAlignment) const {
   // FIXME dl should come from parent load or store, not from address
   SDLoc dl(N);
   // If this can be more profitably realized as r+r, fail.
-  if (SelectAddressRegReg(N, Disp, Base, DAG))
+  if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
     return false;
 
   if (N.getOpcode() == ISD::ADD) {
     int16_t imm = 0;
     if (isIntS16Immediate(N.getOperand(1), imm) &&
-        (!Alignment || (imm % Alignment) == 0)) {
+        (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
@@ -2323,7 +2382,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
   } else if (N.getOpcode() == ISD::OR) {
     int16_t imm = 0;
     if (isIntS16Immediate(N.getOperand(1), imm) &&
-        (!Alignment || (imm % Alignment) == 0)) {
+        (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
       // If this is an or of disjoint bitfields, we can codegen this as an add
       // (for better address arithmetic) if the LHS and RHS of the OR are
       // provably disjoint.
@@ -2349,7 +2408,8 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
     // If this address fits entirely in a 16-bit sext immediate field, codegen
     // this as "d, 0"
     int16_t Imm;
-    if (isIntS16Immediate(CN, Imm) && (!Alignment || (Imm % Alignment) == 0)) {
+    if (isIntS16Immediate(CN, Imm) &&
+        (!EncodingAlignment || (Imm % EncodingAlignment) == 0)) {
       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
                              CN->getValueType(0));
@@ -2359,7 +2419,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
     // Handle 32-bit sext immediates with LIS + addr mode.
     if ((CN->getValueType(0) == MVT::i32 ||
          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
-        (!Alignment || (CN->getZExtValue() % Alignment) == 0)) {
+        (!EncodingAlignment || (CN->getZExtValue() % EncodingAlignment) == 0)) {
       int Addr = (int)CN->getZExtValue();
 
       // Otherwise, break this down into an LIS + disp.
@@ -2416,24 +2476,45 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
 
 /// Returns true if we should use a direct load into vector instruction
 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
-static bool usePartialVectorLoads(SDNode *N) {
-  if (!N->hasOneUse())
-    return false;
+static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
 
   // If there are any other uses other than scalar to vector, then we should
   // keep it as a scalar load -> direct move pattern to prevent multiple
-  // loads.  Currently, only check for i64 since we have lxsd/lfd to do this
-  // efficiently, but no update equivalent.
-  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
-    EVT MemVT = LD->getMemoryVT();
-    if (MemVT.isSimple() && MemVT.getSimpleVT().SimpleTy == MVT::i64) {
-      SDNode *User = *(LD->use_begin());
-      if (User->getOpcode() == ISD::SCALAR_TO_VECTOR)
-        return true;
-    }
+  // loads.
+  LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
+  if (!LD)
+    return false;
+
+  EVT MemVT = LD->getMemoryVT();
+  if (!MemVT.isSimple())
+    return false;
+  switch(MemVT.getSimpleVT().SimpleTy) {
+  case MVT::i64:
+    break;
+  case MVT::i32:
+    if (!ST.hasP8Vector())
+      return false;
+    break;
+  case MVT::i16:
+  case MVT::i8:
+    if (!ST.hasP9Vector())
+      return false;
+    break;
+  default:
+    return false;
   }
 
-  return false;
+  SDValue LoadedVal(N, 0);
+  if (!LoadedVal.hasOneUse())
+    return false;
+
+  for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
+       UI != UE; ++UI)
+    if (UI.getUse().get().getResNo() == 0 &&
+        UI->getOpcode() != ISD::SCALAR_TO_VECTOR)
+      return false;
+
+  return true;
 }
 
 /// getPreIndexedAddressParts - returns true by value, base pointer and
@@ -2464,7 +2545,7 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
   // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
   // instructions because we can fold these into a more efficient instruction
   // instead, (such as LXSD).
-  if (isLoad && usePartialVectorLoads(N)) {
+  if (isLoad && usePartialVectorLoads(N, Subtarget)) {
     return false;
   }
 
@@ -2745,7 +2826,8 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
   const Module *M = DAG.getMachineFunction().getFunction().getParent();
   PICLevel::Level picLevel = M->getPICLevel();
 
-  TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
+  const TargetMachine &TM = getTargetMachine();
+  TLSModel::Model Model = TM.getTLSModel(GV);
 
   if (Model == TLSModel::LocalExec) {
     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
@@ -2769,8 +2851,14 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
       GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
                            PtrVT, GOTReg, TGA);
-    } else
-      GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
+    } else {
+      if (!TM.isPositionIndependent())
+        GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
+      else if (picLevel == PICLevel::SmallPIC)
+        GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
+      else
+        GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+    }
     SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
                                    PtrVT, TGA, GOTPtr);
     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
@@ -3147,101 +3235,6 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
                       MachinePointerInfo(SV, nextOffset));
 }
 
-#include "PPCGenCallingConv.inc"
-
-// Function whose sole purpose is to kill compiler warnings
-// stemming from unused functions included from PPCGenCallingConv.inc.
-CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
-  return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
-}
-
-bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                      CCValAssign::LocInfo &LocInfo,
-                                      ISD::ArgFlagsTy &ArgFlags,
-                                      CCState &State) {
-  return true;
-}
-
-bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
-                                             MVT &LocVT,
-                                             CCValAssign::LocInfo &LocInfo,
-                                             ISD::ArgFlagsTy &ArgFlags,
-                                             CCState &State) {
-  static const MCPhysReg ArgRegs[] = {
-    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
-    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
-  };
-  const unsigned NumArgRegs = array_lengthof(ArgRegs);
-
-  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
-
-  // Skip one register if the first unallocated register has an even register
-  // number and there are still argument registers available which have not been
-  // allocated yet. RegNum is actually an index into ArgRegs, which means we
-  // need to skip a register if RegNum is odd.
-  if (RegNum != NumArgRegs && RegNum % 2 == 1) {
-    State.AllocateReg(ArgRegs[RegNum]);
-  }
-
-  // Always return false here, as this function only makes sure that the first
-  // unallocated register has an odd register number and does not actually
-  // allocate a register for the current argument.
-  return false;
-}
-
-bool
-llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
-                                                  MVT &LocVT,
-                                                  CCValAssign::LocInfo &LocInfo,
-                                                  ISD::ArgFlagsTy &ArgFlags,
-                                                  CCState &State) {
-  static const MCPhysReg ArgRegs[] = {
-    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
-    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
-  };
-  const unsigned NumArgRegs = array_lengthof(ArgRegs);
-
-  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
-  int RegsLeft = NumArgRegs - RegNum;
-
-  // Skip if there is not enough registers left for long double type (4 gpr regs
-  // in soft float mode) and put long double argument on the stack.
-  if (RegNum != NumArgRegs && RegsLeft < 4) {
-    for (int i = 0; i < RegsLeft; i++) {
-      State.AllocateReg(ArgRegs[RegNum + i]);
-    }
-  }
-
-  return false;
-}
-
-bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
-                                               MVT &LocVT,
-                                               CCValAssign::LocInfo &LocInfo,
-                                               ISD::ArgFlagsTy &ArgFlags,
-                                               CCState &State) {
-  static const MCPhysReg ArgRegs[] = {
-    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
-    PPC::F8
-  };
-
-  const unsigned NumArgRegs = array_lengthof(ArgRegs);
-
-  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
-
-  // If there is only one Floating-point register left we need to put both f64
-  // values of a split ppc_fp128 value on the stack.
-  if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
-    State.AllocateReg(ArgRegs[RegNum]);
-  }
-
-  // Always return false here, as this function only makes sure that the two f64
-  // values a ppc_fp128 value is split into are both passed in registers or both
-  // passed on the stack and does not actually allocate a register for the
-  // current argument.
-  return false;
-}
-
 /// FPR - The set of FP registers that should be allocated for arguments,
 /// on Darwin.
 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
@@ -3449,7 +3442,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
   // Reserve space for the linkage area on the stack.
   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
-  if (useSoftFloat() || hasSPE())
+  if (useSoftFloat())
     CCInfo.PreAnalyzeFormalArguments(Ins);
 
   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
@@ -3482,7 +3475,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
           if (Subtarget.hasVSX())
             RC = &PPC::VSFRCRegClass;
           else if (Subtarget.hasSPE())
-            RC = &PPC::SPERCRegClass;
+            // SPE passes doubles in GPR pairs.
+            RC = &PPC::GPRCRegClass;
           else
             RC = &PPC::F8RCRegClass;
           break;
@@ -3506,13 +3500,26 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
           break;
       }
 
-      // Transform the arguments stored in physical registers into virtual ones.
-      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
-      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
-                                            ValVT == MVT::i1 ? MVT::i32 : ValVT);
-
-      if (ValVT == MVT::i1)
-        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
+      SDValue ArgValue;
+      // Transform the arguments stored in physical registers into
+      // virtual ones.
+      if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
+        assert(i + 1 < e && "No second half of double precision argument");
+        unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
+        unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
+        SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
+        SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
+        if (!Subtarget.isLittleEndian())
+          std::swap (ArgValueLo, ArgValueHi);
+        ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
+                               ArgValueHi);
+      } else {
+        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+        ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
+                                      ValVT == MVT::i1 ? MVT::i32 : ValVT);
+        if (ValVT == MVT::i1)
+          ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
+      }
 
       InVals.push_back(ArgValue);
     } else {
@@ -4448,24 +4455,27 @@ static bool isFunctionGlobalAddress(SDValue Callee);
 static bool
 callsShareTOCBase(const Function *Caller, SDValue Callee,
                     const TargetMachine &TM) {
-  // If !G, Callee can be an external symbol.
-  GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
-  if (!G)
-    return false;
-
+   // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
+   // don't have enough information to determine if the caller and calle share
+   // the same  TOC base, so we have to pessimistically assume they don't for
+   // correctness.
+   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+   if (!G)
+     return false;
+
+   const GlobalValue *GV = G->getGlobal();
   // The medium and large code models are expected to provide a sufficiently
   // large TOC to provide all data addressing needs of a module with a
   // single TOC. Since each module will be addressed with a single TOC then we
   // only need to check that caller and callee don't cross dso boundaries.
   if (CodeModel::Medium == TM.getCodeModel() ||
       CodeModel::Large == TM.getCodeModel())
-    return TM.shouldAssumeDSOLocal(*Caller->getParent(), G->getGlobal());
+    return TM.shouldAssumeDSOLocal(*Caller->getParent(), GV);
 
   // Otherwise we need to ensure callee and caller are in the same section,
   // since the linker may allocate multiple TOCs, and we don't know which
   // sections will belong to the same TOC base.
 
-  const GlobalValue *GV = G->getGlobal();
   if (!GV->isStrongDefinitionForLinker())
     return false;
 
@@ -4917,6 +4927,7 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
   bool isPPC64 = Subtarget.isPPC64();
   bool isSVR4ABI = Subtarget.isSVR4ABI();
   bool isELFv2ABI = Subtarget.isELFv2ABI();
+  bool isAIXABI = Subtarget.isAIXABI();
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
   NodeTys.push_back(MVT::Other);   // Returns a chain
@@ -4943,17 +4954,18 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
   bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
   bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
 
+  // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+  // every direct call is) turn it into a TargetGlobalAddress /
+  // TargetExternalSymbol node so that legalize doesn't hack it.
   if (isFunctionGlobalAddress(Callee)) {
     GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
+
     // A call to a TLS address is actually an indirect call to a
     // thread-specific pointer.
     unsigned OpFlags = 0;
     if (UsePlt)
       OpFlags = PPCII::MO_PLT;
 
-    // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
-    // every direct call is) turn it into a TargetGlobalAddress /
-    // TargetExternalSymbol node so that legalize doesn't hack it.
     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
                                         Callee.getValueType(), 0, OpFlags);
     needIndirectCall = false;
@@ -5095,17 +5107,18 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
                                   RegsToPass[i].second.getValueType()));
 
-  // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
-  // into the call.
-  // We do need to reserve X2 to appease the verifier for the PATCHPOINT.
-  if (isSVR4ABI && isPPC64) {
+  // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register
+  // live into the call.
+  // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT.
+  if ((isSVR4ABI && isPPC64) || isAIXABI) {
     setUsesTOCBasePtr(DAG);
 
-    // We cannot add X2 as an operand here for PATCHPOINT, because there is no
-    // way to mark dependencies as implicit here. We will add the X2 dependency
-    // in EmitInstrWithCustomInserter.
-    if (!isPatchPoint) 
-      Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
+    // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
+    // no way to mark dependencies as implicit here.
+    // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
+    if (!isPatchPoint)
+      Ops.push_back(DAG.getRegister(isPPC64 ? PPC::X2
+                                            : PPC::R2, PtrVT));
   }
 
   return CallOpc;
@@ -5129,10 +5142,27 @@ SDValue PPCTargetLowering::LowerCallResult(
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
-    SDValue Val = DAG.getCopyFromReg(Chain, dl,
-                                     VA.getLocReg(), VA.getLocVT(), InFlag);
-    Chain = Val.getValue(1);
-    InFlag = Val.getValue(2);
+    SDValue Val;
+
+    if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
+      SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+                                      InFlag);
+      Chain = Lo.getValue(1);
+      InFlag = Lo.getValue(2);
+      VA = RVLocs[++i]; // skip ahead to next loc
+      SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+                                      InFlag);
+      Chain = Hi.getValue(1);
+      InFlag = Hi.getValue(2);
+      if (!Subtarget.isLittleEndian())
+        std::swap (Lo, Hi);
+      Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
+    } else {
+      Val = DAG.getCopyFromReg(Chain, dl,
+                               VA.getLocReg(), VA.getLocVT(), InFlag);
+      Chain = Val.getValue(1);
+      InFlag = Val.getValue(2);
+    }
 
     switch (VA.getLocInfo()) {
     default: llvm_unreachable("Unknown loc info!");
@@ -5206,18 +5236,24 @@ SDValue PPCTargetLowering::FinishCall(
   }
 
   // Add a NOP immediately after the branch instruction when using the 64-bit
-  // SVR4 ABI. At link time, if caller and callee are in a different module and
+  // SVR4 or the AIX ABI.
+  // At link time, if caller and callee are in a different module and
   // thus have a different TOC, the call will be replaced with a call to a stub
   // function which saves the current TOC, loads the TOC of the callee and
   // branches to the callee. The NOP will be replaced with a load instruction
   // which restores the TOC of the caller from the TOC save slot of the current
   // stack frame. If caller and callee belong to the same module (and have the
-  // same TOC), the NOP will remain unchanged.
+  // same TOC), the NOP will remain unchanged, or become some other NOP.
 
   MachineFunction &MF = DAG.getMachineFunction();
-  if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
-      !isPatchPoint) {
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+  if (!isTailCall && !isPatchPoint &&
+      ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) ||
+       Subtarget.isAIXABI())) {
     if (CallOpc == PPCISD::BCTRL) {
+      if (Subtarget.isAIXABI())
+        report_fatal_error("Indirect call on AIX is not implemented.");
+
       // This is a call through a function pointer.
       // Restore the caller TOC from the save area into R2.
       // See PrepareCall() for more information about calls through function
@@ -5229,7 +5265,6 @@ SDValue PPCTargetLowering::FinishCall(
       // allocated and an unnecessary move instruction being generated.
       CallOpc = PPCISD::BCTRL_LOAD_TOC;
 
-      EVT PtrVT = getPointerTy(DAG.getDataLayout());
       SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
@@ -5245,6 +5280,19 @@ SDValue PPCTargetLowering::FinishCall(
     }
   }
 
+  if (Subtarget.isAIXABI() && isFunctionGlobalAddress(Callee)) {
+    // On AIX, direct function calls reference the symbol for the function's
+    // entry point, which is named by inserting a "." before the function's
+    // C-linkage name.
+    GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
+    auto &Context = DAG.getMachineFunction().getMMI().getContext();
+    MCSymbol *S = Context.getOrCreateSymbol(Twine(".") +
+                                            Twine(G->getGlobal()->getName()));
+    Callee = DAG.getMCSymbol(S, PtrVT);
+    // Replace the GlobalAddressSDNode Callee with the MCSymbolSDNode.
+    Ops[1] = Callee;
+  }
+
   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
   InFlag = Chain.getValue(1);
 
@@ -5314,16 +5362,20 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       !isTailCall)
     Callee = LowerGlobalAddress(Callee, DAG);
 
-  if (Subtarget.isSVR4ABI()) {
-    if (Subtarget.isPPC64())
-      return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
-                              isTailCall, isPatchPoint, Outs, OutVals, Ins,
-                              dl, DAG, InVals, CS);
-    else
-      return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
-                              isTailCall, isPatchPoint, Outs, OutVals, Ins,
-                              dl, DAG, InVals, CS);
-  }
+  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
+    return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
+                            isTailCall, isPatchPoint, Outs, OutVals, Ins,
+                            dl, DAG, InVals, CS);
+
+  if (Subtarget.isSVR4ABI())
+    return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
+                            isTailCall, isPatchPoint, Outs, OutVals, Ins,
+                            dl, DAG, InVals, CS);
+
+  if (Subtarget.isAIXABI())
+    return LowerCall_AIX(Chain, Callee, CallConv, isVarArg,
+                         isTailCall, isPatchPoint, Outs, OutVals, Ins,
+                         dl, DAG, InVals, CS);
 
   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
                           isTailCall, isPatchPoint, Outs, OutVals, Ins,
@@ -5444,12 +5496,15 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
 
   bool seenFloatArg = false;
   // Walk the register/memloc assignments, inserting copies/loads.
-  for (unsigned i = 0, j = 0, e = ArgLocs.size();
+  // i - Tracks the index into the list of registers allocated for the call
+  // RealArgIdx - Tracks the index into the list of actual function arguments
+  // j - Tracks the index into the list of byval arguments
+  for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
        i != e;
-       ++i) {
+       ++i, ++RealArgIdx) {
     CCValAssign &VA = ArgLocs[i];
-    SDValue Arg = OutVals[i];
-    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+    SDValue Arg = OutVals[RealArgIdx];
+    ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
 
     if (Flags.isByVal()) {
       // Argument is an aggregate which is passed by value, thus we need to
@@ -5498,7 +5553,17 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
     if (VA.isRegLoc()) {
       seenFloatArg |= VA.getLocVT().isFloatingPoint();
       // Put argument in a physical register.
-      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+      if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
+        bool IsLE = Subtarget.isLittleEndian();
+        SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
+                        DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
+        RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
+        SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
+                           DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
+        RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
+                             SVal.getValue(0)));
+      } else
+        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
     } else {
       // Put argument in the parameter list area of the current stack frame.
       assert(VA.isMemLoc());
@@ -6613,6 +6678,128 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
                     NumBytes, Ins, InVals, CS);
 }
 
+
+SDValue PPCTargetLowering::LowerCall_AIX(
+    SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
+    bool isTailCall, bool isPatchPoint,
+    const SmallVectorImpl<ISD::OutputArg> &Outs,
+    const SmallVectorImpl<SDValue> &OutVals,
+    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
+    ImmutableCallSite CS) const {
+
+  assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) &&
+         "Unimplemented calling convention!");
+  if (isVarArg || isPatchPoint)
+    report_fatal_error("This call type is unimplemented on AIX.");
+
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+  bool isPPC64 = PtrVT == MVT::i64;
+  unsigned PtrByteSize = isPPC64 ? 8 : 4;
+  unsigned NumOps = Outs.size();
+
+
+  // Count how many bytes are to be pushed on the stack, including the linkage
+  // area, parameter list area.
+  // On XCOFF, we start with 24/48, which is reserved space for
+  // [SP][CR][LR][2 x reserved][TOC].
+  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+
+  // The prolog code of the callee may store up to 8 GPR argument registers to
+  // the stack, allowing va_start to index over them in memory if the callee
+  // is variadic.
+  // Because we cannot tell if this is needed on the caller side, we have to
+  // conservatively assume that it is needed.  As such, make sure we have at
+  // least enough stack space for the caller to store the 8 GPRs.
+  unsigned NumBytes = LinkageSize + 8 * PtrByteSize;
+
+  // Adjust the stack pointer for the new arguments...
+  // These operations are automatically eliminated by the prolog/epilog
+  // inserter pass.
+  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
+  SDValue CallSeqStart = Chain;
+
+  static const MCPhysReg GPR_32[] = {           // 32-bit registers.
+    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+    PPC::R7, PPC::R8, PPC::R9, PPC::R10
+  };
+  static const MCPhysReg GPR_64[] = {           // 64-bit registers.
+    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+    PPC::X7, PPC::X8, PPC::X9, PPC::X10
+  };
+
+  const unsigned NumGPRs = isPPC64 ? array_lengthof(GPR_64)
+                                   : array_lengthof(GPR_32);
+  const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
+  unsigned GPR_idx = 0;
+
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+
+  if (isTailCall)
+    report_fatal_error("Handling of tail call is unimplemented!");
+  int SPDiff = 0;
+
+  for (unsigned i = 0; i != NumOps; ++i) {
+    SDValue Arg = OutVals[i];
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+    // Promote integers if needed.
+    if (Arg.getValueType() == MVT::i1 ||
+        (isPPC64 && Arg.getValueType() == MVT::i32)) {
+      unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+      Arg = DAG.getNode(ExtOp, dl, PtrVT, Arg);
+    }
+
+    // Note: "by value" is code for passing a structure by value, not
+    // basic types.
+    if (Flags.isByVal())
+      report_fatal_error("Passing structure by value is unimplemented!");
+
+    switch (Arg.getSimpleValueType().SimpleTy) {
+    default: llvm_unreachable("Unexpected ValueType for argument!");
+    case MVT::i1:
+    case MVT::i32:
+    case MVT::i64:
+      if (GPR_idx != NumGPRs)
+        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+      else
+        report_fatal_error("Handling of placing parameters on the stack is "
+                           "unimplemented!");
+      break;
+    case MVT::f32:
+    case MVT::f64:
+    case MVT::v4f32:
+    case MVT::v4i32:
+    case MVT::v8i16:
+    case MVT::v16i8:
+    case MVT::v2f64:
+    case MVT::v2i64:
+    case MVT::v1i128:
+    case MVT::f128:
+    case MVT::v4f64:
+    case MVT::v4i1:
+      report_fatal_error("Handling of this parameter type is unimplemented!");
+    }
+  }
+
+  if (!isFunctionGlobalAddress(Callee) &&
+      !isa<ExternalSymbolSDNode>(Callee))
+    report_fatal_error("Handling of indirect call is unimplemented!");
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain
+  // and flag operands which copy the outgoing args into the appropriate regs.
+  SDValue InFlag;
+  for (auto Reg : RegsToPass) {
+    Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
+                    /* unused except on PPC64 ELFv1 */ false, DAG,
+                    RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
+                    NumBytes, Ins, InVals, CS);
+}
+
 bool
 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
                                   MachineFunction &MF, bool isVarArg,
@@ -6644,11 +6831,11 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   SmallVector<SDValue, 4> RetOps(1, Chain);
 
   // Copy the result values into the output registers.
-  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+  for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
-    SDValue Arg = OutVals[i];
+    SDValue Arg = OutVals[RealResIdx];
 
     switch (VA.getLocInfo()) {
     default: llvm_unreachable("Unknown loc info!");
@@ -6663,8 +6850,21 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
       break;
     }
-
-    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+    if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
+      bool isLittleEndian = Subtarget.isLittleEndian();
+      // Legalize ret f64 -> ret 2 x i32.
+      SDValue SVal =
+          DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
+                      DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
+      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
+      RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+      SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
+                         DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
+      Flag = Chain.getValue(1);
+      VA = RVLocs[++i]; // skip ahead to next loc
+      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
+    } else
+      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
     Flag = Chain.getValue(1);
     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   }
@@ -6890,6 +7090,61 @@ SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
                      Op.getOperand(0));
 }
 
+SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
+                                               SelectionDAG &DAG) const {
+
+  // Implements a vector truncate that fits in a vector register as a shuffle.
+  // We want to legalize vector truncates down to where the source fits in
+  // a vector register (and target is therefore smaller than vector register
+  // size).  At that point legalization will try to custom lower the sub-legal
+  // result and get here - where we can contain the truncate as a single target
+  // operation.
+
+  // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
+  //   <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
+  //
+  // We will implement it for big-endian ordering as this (where x denotes
+  // undefined):
+  //   < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
+  //   < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
+  //
+  // The same operation in little-endian ordering will be:
+  //   <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
+  //   <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
+
+  assert(Op.getValueType().isVector() && "Vector type expected.");
+
+  SDLoc DL(Op);
+  SDValue N1 = Op.getOperand(0);
+  unsigned SrcSize = N1.getValueType().getSizeInBits();
+  assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector");
+  SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
+
+  EVT TrgVT = Op.getValueType();
+  unsigned TrgNumElts = TrgVT.getVectorNumElements();
+  EVT EltVT = TrgVT.getVectorElementType();
+  unsigned WideNumElts = 128 / EltVT.getSizeInBits();
+  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
+
+  // First list the elements we want to keep.
+  unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
+  SmallVector<int, 16> ShuffV;
+  if (Subtarget.isLittleEndian())
+    for (unsigned i = 0; i < TrgNumElts; ++i)
+      ShuffV.push_back(i * SizeMult);
+  else
+    for (unsigned i = 1; i <= TrgNumElts; ++i)
+      ShuffV.push_back(i * SizeMult - 1);
+
+  // Populate the remaining elements with undefs.
+  for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
+    // ShuffV.push_back(i + WideNumElts);
+    ShuffV.push_back(WideNumElts + 1);
+
+  SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc);
+  return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV);
+}
+
 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
 /// possible.
 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -9604,10 +9859,63 @@ SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
     BifID = Intrinsic::ppc_altivec_vmaxsh;
   else if (VT == MVT::v16i8)
     BifID = Intrinsic::ppc_altivec_vmaxsb;
-  
+
   return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
 }
 
+// Custom lowering for fpext vf32 to v2f64
+SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+
+  assert(Op.getOpcode() == ISD::FP_EXTEND &&
+         "Should only be called for ISD::FP_EXTEND");
+
+  // We only want to custom lower an extend from v2f32 to v2f64.
+  if (Op.getValueType() != MVT::v2f64 ||
+      Op.getOperand(0).getValueType() != MVT::v2f32)
+    return SDValue();
+
+  SDLoc dl(Op);
+  SDValue Op0 = Op.getOperand(0);
+
+  switch (Op0.getOpcode()) {
+  default:
+    return SDValue();
+  case ISD::FADD:
+  case ISD::FMUL:
+  case ISD::FSUB: {
+    SDValue NewLoad[2];
+    for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
+      // Ensure both input are loads.
+      SDValue LdOp = Op0.getOperand(i);
+      if (LdOp.getOpcode() != ISD::LOAD)
+        return SDValue();
+      // Generate new load node.
+      LoadSDNode *LD = cast<LoadSDNode>(LdOp);
+      SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() };
+      NewLoad[i] =
+        DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl,
+                                DAG.getVTList(MVT::v4f32, MVT::Other),
+                                LoadOps, LD->getMemoryVT(),
+                                LD->getMemOperand());
+    }
+    SDValue NewOp = DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32,
+                              NewLoad[0], NewLoad[1],
+                              Op0.getNode()->getFlags());
+    return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewOp);
+  }
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Op0);
+    SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() };
+    SDValue NewLd =
+      DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl,
+                              DAG.getVTList(MVT::v4f32, MVT::Other),
+                              LoadOps, LD->getMemoryVT(), LD->getMemOperand());
+    return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewLd);
+  }
+  }
+  llvm_unreachable("ERROR:Should return for all cases within swtich.");
+}
+
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -9661,6 +9969,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
   case ISD::MUL:                return LowerMUL(Op, DAG);
   case ISD::ABS:                return LowerABS(Op, DAG);
+  case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
 
   // For counter-based loop handling.
   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
@@ -9701,7 +10010,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
   }
   case ISD::INTRINSIC_W_CHAIN: {
     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
-        Intrinsic::ppc_is_decremented_ctr_nonzero)
+        Intrinsic::loop_decrement)
       break;
 
     assert(N->getValueType(0) == MVT::i1 &&
@@ -9737,6 +10046,14 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
       return;
     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
     return;
+  case ISD::TRUNCATE: {
+    EVT TrgVT = N->getValueType(0);
+    if (TrgVT.isVector() &&
+        isOperationCustom(N->getOpcode(), TrgVT) &&
+        N->getOperand(0).getValueType().getSizeInBits() <= 128)
+      Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
+    return;
+  }
   case ISD::BITCAST:
     // Don't handle bitcast here.
     return;
@@ -9822,10 +10139,10 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
   MachineFunction *F = BB->getParent();
   MachineFunction::iterator It = ++BB->getIterator();
 
-  unsigned dest = MI.getOperand(0).getReg();
-  unsigned ptrA = MI.getOperand(1).getReg();
-  unsigned ptrB = MI.getOperand(2).getReg();
-  unsigned incr = MI.getOperand(3).getReg();
+  Register dest = MI.getOperand(0).getReg();
+  Register ptrA = MI.getOperand(1).getReg();
+  Register ptrB = MI.getOperand(2).getReg();
+  Register incr = MI.getOperand(3).getReg();
   DebugLoc dl = MI.getDebugLoc();
 
   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
@@ -9841,7 +10158,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
 
   MachineRegisterInfo &RegInfo = F->getRegInfo();
-  unsigned TmpReg = (!BinOpcode) ? incr :
+  Register TmpReg = (!BinOpcode) ? incr :
     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
                                            : &PPC::GPRCRegClass);
 
@@ -9949,20 +10266,20 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
       is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
 
-  unsigned PtrReg = RegInfo.createVirtualRegister(RC);
-  unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC);
-  unsigned ShiftReg =
+  Register PtrReg = RegInfo.createVirtualRegister(RC);
+  Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
+  Register ShiftReg =
       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
-  unsigned Incr2Reg = RegInfo.createVirtualRegister(GPRC);
-  unsigned MaskReg = RegInfo.createVirtualRegister(GPRC);
-  unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC);
-  unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC);
-  unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
-  unsigned Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
-  unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
-  unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC);
-  unsigned Ptr1Reg;
-  unsigned TmpReg =
+  Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
+  Register MaskReg = RegInfo.createVirtualRegister(GPRC);
+  Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
+  Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
+  Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
+  Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
+  Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
+  Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
+  Register Ptr1Reg;
+  Register TmpReg =
       (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
 
   //  thisMBB:
@@ -10764,23 +11081,23 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
         is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
     const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
 
-    unsigned PtrReg = RegInfo.createVirtualRegister(RC);
-    unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC);
-    unsigned ShiftReg =
+    Register PtrReg = RegInfo.createVirtualRegister(RC);
+    Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
+    Register ShiftReg =
         isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
-    unsigned NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
-    unsigned NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
-    unsigned OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
-    unsigned OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
-    unsigned MaskReg = RegInfo.createVirtualRegister(GPRC);
-    unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC);
-    unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC);
-    unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
-    unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
-    unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC);
-    unsigned Ptr1Reg;
-    unsigned TmpReg = RegInfo.createVirtualRegister(GPRC);
-    unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
+    Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
+    Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
+    Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
+    Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
+    Register MaskReg = RegInfo.createVirtualRegister(GPRC);
+    Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
+    Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
+    Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
+    Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
+    Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
+    Register Ptr1Reg;
+    Register TmpReg = RegInfo.createVirtualRegister(GPRC);
+    Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
     //  thisMBB:
     //   ...
     //   fallthrough --> loopMBB
@@ -10968,7 +11285,147 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     MachineRegisterInfo &RegInfo = F->getRegInfo();
     unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
-    return BB;
+    BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
+            MI.getOperand(0).getReg())
+        .addReg(CRReg);
+  } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
+    DebugLoc Dl = MI.getDebugLoc();
+    unsigned Imm = MI.getOperand(1).getImm();
+    BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
+    BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
+            MI.getOperand(0).getReg())
+        .addReg(PPC::CR0EQ);
+  } else if (MI.getOpcode() == PPC::SETRNDi) {
+    DebugLoc dl = MI.getDebugLoc();
+    unsigned OldFPSCRReg = MI.getOperand(0).getReg();
+
+    // Save FPSCR value.
+    BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
+
+    // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
+    // the following settings:
+    //   00 Round to nearest
+    //   01 Round to 0
+    //   10 Round to +inf
+    //   11 Round to -inf
+
+    // When the operand is immediate, using the two least significant bits of
+    // the immediate to set the bits 62:63 of FPSCR.
+    unsigned Mode = MI.getOperand(1).getImm();
+    BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
+      .addImm(31);
+
+    BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
+      .addImm(30);
+  } else if (MI.getOpcode() == PPC::SETRND) {
+    DebugLoc dl = MI.getDebugLoc();
+
+    // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
+    // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
+    // If the target doesn't have DirectMove, we should use stack to do the
+    // conversion, because the target doesn't have the instructions like mtvsrd
+    // or mfvsrd to do this conversion directly.
+    auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
+      if (Subtarget.hasDirectMove()) {
+        BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
+          .addReg(SrcReg);
+      } else {
+        // Use stack to do the register copy.
+        unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
+        MachineRegisterInfo &RegInfo = F->getRegInfo();
+        const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
+        if (RC == &PPC::F8RCRegClass) {
+          // Copy register from F8RCRegClass to G8RCRegclass.
+          assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
+                 "Unsupported RegClass.");
+
+          StoreOp = PPC::STFD;
+          LoadOp = PPC::LD;
+        } else {
+          // Copy register from G8RCRegClass to F8RCRegclass.
+          assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
+                 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
+                 "Unsupported RegClass.");
+        }
+
+        MachineFrameInfo &MFI = F->getFrameInfo();
+        int FrameIdx = MFI.CreateStackObject(8, 8, false);
+
+        MachineMemOperand *MMOStore = F->getMachineMemOperand(
+          MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
+          MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
+          MFI.getObjectAlignment(FrameIdx));
+
+        // Store the SrcReg into the stack.
+        BuildMI(*BB, MI, dl, TII->get(StoreOp))
+          .addReg(SrcReg)
+          .addImm(0)
+          .addFrameIndex(FrameIdx)
+          .addMemOperand(MMOStore);
+
+        MachineMemOperand *MMOLoad = F->getMachineMemOperand(
+          MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
+          MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
+          MFI.getObjectAlignment(FrameIdx));
+
+        // Load from the stack where SrcReg is stored, and save to DestReg,
+        // so we have done the RegClass conversion from RegClass::SrcReg to
+        // RegClass::DestReg.
+        BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
+          .addImm(0)
+          .addFrameIndex(FrameIdx)
+          .addMemOperand(MMOLoad);
+      }
+    };
+
+    unsigned OldFPSCRReg = MI.getOperand(0).getReg();
+
+    // Save FPSCR value.
+    BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
+
+    // When the operand is gprc register, use two least significant bits of the
+    // register and mtfsf instruction to set the bits 62:63 of FPSCR.
+    //
+    // copy OldFPSCRTmpReg, OldFPSCRReg
+    // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
+    // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
+    // copy NewFPSCRReg, NewFPSCRTmpReg
+    // mtfsf 255, NewFPSCRReg
+    MachineOperand SrcOp = MI.getOperand(1);
+    MachineRegisterInfo &RegInfo = F->getRegInfo();
+    unsigned OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+
+    copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
+
+    unsigned ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+    unsigned ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+
+    // The first operand of INSERT_SUBREG should be a register which has
+    // subregisters, we only care about its RegClass, so we should use an
+    // IMPLICIT_DEF register.
+    BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
+    BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
+      .addReg(ImDefReg)
+      .add(SrcOp)
+      .addImm(1);
+
+    unsigned NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+    BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
+      .addReg(OldFPSCRTmpReg)
+      .addReg(ExtSrcReg)
+      .addImm(0)
+      .addImm(62);
+
+    unsigned NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+    copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
+
+    // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
+    // bits of FPSCR.
+    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
+      .addImm(255)
+      .addReg(NewFPSCRReg)
+      .addImm(0)
+      .addImm(0);
   } else {
     llvm_unreachable("Unexpected instr type to insert");
   }
@@ -11006,7 +11463,9 @@ SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
     if (RefinementSteps == ReciprocalEstimate::Unspecified)
       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
 
-    UseOneConstNR = true;
+    // The Newton-Raphson computation with a single constant does not provide
+    // enough accuracy on some CPUs.
+    UseOneConstNR = !Subtarget.needsTwoConstNR();
     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
   }
   return SDValue();
@@ -12062,9 +12521,14 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
          "Should be called with a BUILD_VECTOR node");
 
   SDLoc dl(N);
+
+  // Return early for non byte-sized type, as they can't be consecutive.
+  if (!N->getValueType(0).getVectorElementType().isByteSized())
+    return SDValue();
+
   bool InputsAreConsecutiveLoads = true;
   bool InputsAreReverseConsecutive = true;
-  unsigned ElemSize = N->getValueType(0).getScalarSizeInBits() / 8;
+  unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
   SDValue FirstInput = N->getOperand(0);
   bool IsRoundOfExtLoad = false;
 
@@ -12332,9 +12796,8 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
   if (!Ext1Op || !Ext2Op)
     return SDValue();
-  if (Ext1.getValueType() != MVT::i32 ||
-      Ext2.getValueType() != MVT::i32)
-  if (Ext1.getOperand(0) != Ext2.getOperand(0))
+  if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
+      Ext1.getOperand(0) != Ext2.getOperand(0))
     return SDValue();
 
   int FirstElem = Ext1Op->getZExtValue();
@@ -12664,6 +13127,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     return combineSRA(N, DCI);
   case ISD::SRL:
     return combineSRL(N, DCI);
+  case ISD::MUL:
+    return combineMUL(N, DCI);
   case PPCISD::SHL:
     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
         return N->getOperand(0);
@@ -13246,7 +13711,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
 
     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
-          Intrinsic::ppc_is_decremented_ctr_nonzero) {
+          Intrinsic::loop_decrement) {
 
       // We now need to make the intrinsic dead (it cannot be instruction
       // selected).
@@ -13272,14 +13737,14 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     if (LHS.getOpcode() == ISD::AND &&
         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
-          Intrinsic::ppc_is_decremented_ctr_nonzero &&
+          Intrinsic::loop_decrement &&
         isa<ConstantSDNode>(LHS.getOperand(1)) &&
         !isNullConstant(LHS.getOperand(1)))
       LHS = LHS.getOperand(0);
 
     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
-          Intrinsic::ppc_is_decremented_ctr_nonzero &&
+          Intrinsic::loop_decrement &&
         isa<ConstantSDNode>(RHS)) {
       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
              "Counter decrement comparison is not EQ or NE");
@@ -13355,9 +13820,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
   }
   case ISD::BUILD_VECTOR:
     return DAGCombineBuildVector(N, DCI);
-  case ISD::ABS: 
+  case ISD::ABS:
     return combineABS(N, DCI);
-  case ISD::VSELECT: 
+  case ISD::VSELECT:
     return combineVSelect(N, DCI);
   }
 
@@ -13453,6 +13918,15 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
     if (!ML)
       break;
 
+    if (!DisableInnermostLoopAlign32) {
+      // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
+      // so that we can decrease cache misses and branch-prediction misses.
+      // Actual alignment of the loop will depend on the hotness check and other
+      // logic in alignBlocks.
+      if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
+        return 5;
+    }
+
     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
 
     // For small loops (between 5 and 8 instructions), align to a 32-byte
@@ -13502,7 +13976,7 @@ PPCTargetLowering::getConstraintType(StringRef Constraint) const {
     return C_RegisterClass;
   } else if (Constraint == "wa" || Constraint == "wd" ||
              Constraint == "wf" || Constraint == "ws" ||
-             Constraint == "wi") {
+             Constraint == "wi" || Constraint == "ww") {
     return C_RegisterClass; // VSX registers.
   }
   return TargetLowering::getConstraintType(Constraint);
@@ -13530,10 +14004,12 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
             StringRef(constraint) == "wf") &&
            type->isVectorTy())
     return CW_Register;
-  else if (StringRef(constraint) == "ws" && type->isDoubleTy())
-    return CW_Register;
   else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
     return CW_Register; // just hold 64-bit integers data.
+  else if (StringRef(constraint) == "ws" && type->isDoubleTy())
+    return CW_Register;
+  else if (StringRef(constraint) == "ww" && type->isFloatTy())
+    return CW_Register;
 
   switch (*constraint) {
   default:
@@ -13619,7 +14095,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
              Constraint == "wf" || Constraint == "wi") &&
              Subtarget.hasVSX()) {
     return std::make_pair(0U, &PPC::VSRCRegClass);
-  } else if (Constraint == "ws" && Subtarget.hasVSX()) {
+  } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
     if (VT == MVT::f32 && Subtarget.hasP8Vector())
       return std::make_pair(0U, &PPC::VSSRCRegClass);
     else
@@ -13865,7 +14341,7 @@ bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
   if (CModel == CodeModel::Small || CModel == CodeModel::Large)
     return true;
 
-  // JumpTable and BlockAddress are accessed as got-indirect. 
+  // JumpTable and BlockAddress are accessed as got-indirect.
   if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
     return true;
 
@@ -14082,18 +14558,16 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
 /// source is constant so it does not need to be loaded.
 /// It returns EVT::Other if the type should be determined using generic
 /// target-independent logic.
-EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
-                                           unsigned DstAlign, unsigned SrcAlign,
-                                           bool IsMemset, bool ZeroMemset,
-                                           bool MemcpyStrSrc,
-                                           MachineFunction &MF) const {
+EVT PPCTargetLowering::getOptimalMemOpType(
+    uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+    bool ZeroMemset, bool MemcpyStrSrc,
+    const AttributeList &FuncAttributes) const {
   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
-    const Function &F = MF.getFunction();
     // When expanding a memset, require at least two QPX instructions to cover
     // the cost of loading the value to be stored from the constant pool.
     if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
        (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
-        !F.hasFnAttribute(Attribute::NoImplicitFloat)) {
+        !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
       return MVT::v4f64;
     }
 
@@ -14178,6 +14652,7 @@ bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
                                                        unsigned,
                                                        unsigned,
+                                                       MachineMemOperand::Flags,
                                                        bool *Fast) const {
   if (DisablePPCUnaligned)
     return false;
@@ -14324,7 +14799,7 @@ void PPCTargetLowering::insertCopiesSplitCSR(
     BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
       .addReg(*I);
 
-    // Insert the copy-back instructions right before the terminator
+    // Insert the copy-back instructions right before the terminator.
     for (auto *Exit : Exits)
       BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
               TII->get(TargetOpcode::COPY), *I)
@@ -14345,7 +14820,8 @@ void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
     return TargetLowering::insertSSPDeclarations(M);
 }
 
-bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+                                     bool ForCodeSize) const {
   if (!VT.isSimple() || !Subtarget.hasVSX())
     return false;
 
@@ -14585,6 +15061,89 @@ SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
   return SDValue();
 }
 
+SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+
+  ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
+  if (!ConstOpOrElement)
+    return SDValue();
+
+  // An imul is usually smaller than the alternative sequence for legal type.
+  if (DAG.getMachineFunction().getFunction().hasMinSize() &&
+      isOperationLegal(ISD::MUL, N->getValueType(0)))
+    return SDValue();
+
+  auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
+    switch (this->Subtarget.getDarwinDirective()) {
+    default:
+      // TODO: enhance the condition for subtarget before pwr8
+      return false;
+    case PPC::DIR_PWR8:
+      //  type        mul     add    shl
+      // scalar        4       1      1
+      // vector        7       2      2
+      return true;
+    case PPC::DIR_PWR9:
+      //  type        mul     add    shl
+      // scalar        5       2      2
+      // vector        7       2      2
+
+      // The cycle RATIO of related operations are showed as a table above.
+      // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
+      // scalar and vector type. For 2 instrs patterns, add/sub + shl
+      // are 4, it is always profitable; but for 3 instrs patterns
+      // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
+      // So we should only do it for vector type.
+      return IsAddOne && IsNeg ? VT.isVector() : true;
+    }
+  };
+
+  EVT VT = N->getValueType(0);
+  SDLoc DL(N);
+
+  const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
+  bool IsNeg = MulAmt.isNegative();
+  APInt MulAmtAbs = MulAmt.abs();
+
+  if ((MulAmtAbs - 1).isPowerOf2()) {
+    // (mul x, 2^N + 1) => (add (shl x, N), x)
+    // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
+
+    if (!IsProfitable(IsNeg, true, VT))
+      return SDValue();
+
+    SDValue Op0 = N->getOperand(0);
+    SDValue Op1 =
+        DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                    DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
+    SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
+
+    if (!IsNeg)
+      return Res;
+
+    return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
+  } else if ((MulAmtAbs + 1).isPowerOf2()) {
+    // (mul x, 2^N - 1) => (sub (shl x, N), x)
+    // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+
+    if (!IsProfitable(IsNeg, false, VT))
+      return SDValue();
+
+    SDValue Op0 = N->getOperand(0);
+    SDValue Op1 =
+        DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                    DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
+
+    if (!IsNeg)
+      return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
+    else
+      return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
+
+  } else {
+    return SDValue();
+  }
+}
+
 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
   // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
   if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 30acd60eba6f..97422c6eda36 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -1,9 +1,8 @@
 //===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,7 +14,6 @@
 #ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
 #define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
 
-#include "PPC.h"
 #include "PPCInstrInfo.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -41,7 +39,7 @@ namespace llvm {
     // the enum. The order of elements in this enum matters!
     // Values that are added after this entry:
     //     STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE
-    // are considerd memory opcodes and are treated differently than entries
+    // are considered memory opcodes and are treated differently than entries
     // that come before it. For example, ADD or MUL should be placed before
     // the ISD::FIRST_TARGET_MEMORY_OPCODE while a LOAD or STORE should come
     // after it.
@@ -161,7 +159,7 @@ namespace llvm {
 
       /// CALL - A direct function call.
       /// CALL_NOP is a call with the special NOP which follows 64-bit
-      /// SVR4 calls.
+      /// SVR4 calls and 32-bit/64-bit AIX calls.
       CALL, CALL_NOP,
 
       /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
@@ -193,9 +191,18 @@ namespace llvm {
       /// Direct move from a GPR to a VSX register (zero)
       MTVSRZ,
 
-      /// Direct move of 2 consective GPR to a VSX register.
+      /// Direct move of 2 consecutive GPR to a VSX register.
       BUILD_FP128,
 
+      /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and
+      /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is
+      /// unsupported for this target.
+      /// Merge 2 GPRs to a single SPE register.
+      BUILD_SPE64,
+
+      /// Extract SPE register component, second argument is high or low.
+      EXTRACT_SPE,
+
       /// Extract a subvector from signed integer vector and convert to FP.
       /// It is primarily used to convert a (widened) illegal integer vector
       /// type to a legal floating point vector type.
@@ -265,11 +272,11 @@ namespace llvm {
       CR6UNSET,
 
       /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
-      /// on PPC32.
+      /// for non-position independent code on PPC32.
       PPC32_GOT,
 
       /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
-      /// local dynamic TLS on PPC32.
+      /// local dynamic TLS and position indendepent code on PPC32.
       PPC32_PICGOT,
 
       /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec
@@ -405,6 +412,9 @@ namespace llvm {
       /// representation.
       QBFLT,
 
+      /// Custom extend v4f32 to v2f64.
+      FP_EXTEND_LH,
+
       /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
       /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
       /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
@@ -446,6 +456,10 @@ namespace llvm {
       /// an xxswapd.
       LXVD2X,
 
+      /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
+      /// v2f32 value into the lower half of a VSR register.
+      LD_VSX_LH,
+
       /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
       /// Maps directly to an stxvd2x instruction that will be preceded by
       /// an xxswapd.
@@ -620,6 +634,8 @@ namespace llvm {
       return true;
     }
 
+    bool preferIncOfAddToSubOfNot(EVT VT) const override;
+
     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
       return VT.isScalarInteger();
     }
@@ -653,18 +669,27 @@ namespace llvm {
                                    ISD::MemIndexedMode &AM,
                                    SelectionDAG &DAG) const override;
 
+    /// SelectAddressEVXRegReg - Given the specified addressed, check to see if
+    /// it can be more efficiently represented as [r+imm].
+    bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index,
+                                SelectionDAG &DAG) const;
+
     /// SelectAddressRegReg - Given the specified addressed, check to see if it
-    /// can be represented as an indexed [r+r] operation.  Returns false if it
-    /// can be more efficiently represented with [r+imm].
+    /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment
+    /// is non-zero, only accept displacement which is not suitable for [r+imm].
+    /// Returns false if it can be represented by [r+imm], which are preferred.
     bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
-                             SelectionDAG &DAG) const;
+                             SelectionDAG &DAG,
+                             unsigned EncodingAlignment = 0) const;
 
     /// SelectAddressRegImm - Returns true if the address N can be represented
     /// by a base register plus a signed 16-bit displacement [r+imm], and if it
-    /// is not better represented as reg+reg.  If Aligned is true, only accept
-    /// displacements suitable for STD and friends, i.e. multiples of 4.
+    /// is not better represented as reg+reg. If \p EncodingAlignment is
+    /// non-zero, only accept displacements suitable for instruction encoding
+    /// requirement, i.e. multiples of 4 for DS form.
     bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
-                             SelectionDAG &DAG, unsigned Alignment) const;
+                             SelectionDAG &DAG,
+                             unsigned EncodingAlignment) const;
 
     /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
     /// represented as an indexed [r+r] operation.
@@ -833,14 +858,14 @@ namespace llvm {
     EVT
     getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
                         bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
-                        MachineFunction &MF) const override;
+                        const AttributeList &FuncAttributes) const override;
 
     /// Is unaligned memory access allowed for the given type, and is it fast
     /// relative to software emulation.
-    bool allowsMisalignedMemoryAccesses(EVT VT,
-                                        unsigned AddrSpace,
-                                        unsigned Align = 1,
-                                        bool *Fast = nullptr) const override;
+    bool allowsMisalignedMemoryAccesses(
+        EVT VT, unsigned AddrSpace, unsigned Align = 1,
+        MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+        bool *Fast = nullptr) const override;
 
     /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
     /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
@@ -888,7 +913,8 @@ namespace llvm {
     bool useLoadStackGuardNode() const override;
     void insertSSPDeclarations(Module &M) const override;
 
-    bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+    bool isFPImmLegal(const APFloat &Imm, EVT VT,
+                      bool ForCodeSize) const override;
 
     unsigned getJumpTableEncoding() const override;
     bool isJumpTableRelative() const override;
@@ -898,14 +924,6 @@ namespace llvm {
                                                unsigned JTI,
                                                MCContext &Ctx) const override;
 
-    unsigned getNumRegistersForCallingConv(LLVMContext &Context,
-                                           CallingConv:: ID CC,
-                                           EVT VT) const override;
-
-    MVT getRegisterTypeForCallingConv(LLVMContext &Context,
-                                      CallingConv:: ID CC,
-                                      EVT VT) const override;
-
   private:
     struct ReuseLoadInfo {
       SDValue Ptr;
@@ -953,6 +971,8 @@ namespace llvm {
     SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
                                  const SDLoc &dl) const;
 
+    SDValue LowerTRUNCATEVector(SDValue Op, SelectionDAG &DAG) const;
+
     SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
     SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
 
@@ -1019,6 +1039,7 @@ namespace llvm {
     SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
@@ -1106,6 +1127,15 @@ namespace llvm {
                              const SDLoc &dl, SelectionDAG &DAG,
                              SmallVectorImpl<SDValue> &InVals,
                              ImmutableCallSite CS) const;
+    SDValue LowerCall_AIX(SDValue Chain, SDValue Callee,
+                          CallingConv::ID CallConv, bool isVarArg,
+                          bool isTailCall, bool isPatchPoint,
+                          const SmallVectorImpl<ISD::OutputArg> &Outs,
+                          const SmallVectorImpl<SDValue> &OutVals,
+                          const SmallVectorImpl<ISD::InputArg> &Ins,
+                          const SDLoc &dl, SelectionDAG &DAG,
+                          SmallVectorImpl<SDValue> &InVals,
+                          ImmutableCallSite CS) const;
 
     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
@@ -1119,6 +1149,7 @@ namespace llvm {
     SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -1137,8 +1168,6 @@ namespace llvm {
                              int &RefinementSteps) const override;
     unsigned combineRepeatedFPDivisors() const override;
 
-    CCAssignFn *useFastISelCCs(unsigned Flag) const;
-
     SDValue
     combineElementTruncationToVectorTruncation(SDNode *N,
                                                DAGCombinerInfo &DCI) const;
@@ -1169,30 +1198,6 @@ namespace llvm {
 
   } // end namespace PPC
 
-  bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                  CCValAssign::LocInfo &LocInfo,
-                                  ISD::ArgFlagsTy &ArgFlags,
-                                  CCState &State);
-
-  bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
-                                         MVT &LocVT,
-                                         CCValAssign::LocInfo &LocInfo,
-                                         ISD::ArgFlagsTy &ArgFlags,
-                                         CCState &State);
-
-  bool
-  CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
-                                                 MVT &LocVT,
-                                                 CCValAssign::LocInfo &LocInfo,
-                                                 ISD::ArgFlagsTy &ArgFlags,
-                                                 CCState &State);
-
-  bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
-                                           MVT &LocVT,
-                                           CCValAssign::LocInfo &LocInfo,
-                                           ISD::ArgFlagsTy &ArgFlags,
-                                           CCState &State);
-
   bool isIntS16Immediate(SDNode *N, int16_t &Imm);
   bool isIntS16Immediate(SDValue Op, int16_t &Imm);
 
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 2ce6ad3293eb..d598567f8e4e 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1,9 +1,8 @@
 //===-- PPCInstr64Bit.td - The PowerPC 64-bit Support ------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -168,7 +167,7 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
     XLForm_2_ext_and_DSForm_1<19, 528, 20, 0, 1, 58, 0, (outs),
                               (ins memrix:$src),
                               "bctrl\n\tld 2, $src", IIC_BrB,
-                              [(PPCbctrl_load_toc ixaddr:$src)]>,
+                              [(PPCbctrl_load_toc iaddrX4:$src)]>,
     Requires<[In64BitMode]>;
 }
 
@@ -193,6 +192,12 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)),
 def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
           (BL8_NOP texternalsym:$dst)>;
 
+// Calls for AIX
+def : Pat<(PPCcall (i64 mcsym:$dst)),
+          (BL8 mcsym:$dst)>;
+def : Pat<(PPCcall_nop (i64 mcsym:$dst)),
+          (BL8_NOP mcsym:$dst)>;
+
 // Atomic operations
 // FIXME: some of these might be used with constant operands. This will result
 // in constant materialization instructions that may be redundant. We currently
@@ -383,7 +388,7 @@ def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
              PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 let hasSideEffects = 1, Defs = [CTR8] in {
-let Pattern = [(int_ppc_mtctr i64:$rS)] in
+let Pattern = [(int_set_loop_iterations i64:$rS)] in
 def MTCTR8loop : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
                                "mtctr $rS", IIC_SprMTSPR>,
                  PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -720,10 +725,17 @@ defm SRADI  : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
                          "sradi", "$rA, $rS, $SH", IIC_IntRotateDI,
                          [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
 
-defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins gprc:$rS, u6imm:$SH),
-                          "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
-                          [(set i64:$rA, (PPCextswsli i32:$rS, (i32 imm:$SH)))]>,
-                          isPPC64, Requires<[IsISA3_0]>;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm EXTSWSLI_32_64 : XSForm_1r<31, 445, (outs g8rc:$rA),
+                                (ins gprc:$rS, u6imm:$SH),
+                                "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
+                                [(set i64:$rA,
+                                      (PPCextswsli i32:$rS, (i32 imm:$SH)))]>,
+                                isPPC64, Requires<[IsISA3_0]>;
+
+defm EXTSWSLI : XSForm_1rc<31, 445, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
+                           "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
+                           []>, isPPC64, Requires<[IsISA3_0]>;
 
 // For fast-isel:
 let isCodeGenOnly = 1, Defs = [CARRY] in
@@ -773,13 +785,21 @@ def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
 let Predicates = [IsISA3_0] in {
 def MADDHD : VAForm_1a<48, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
                        "maddhd $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
-def MADDHDU : VAForm_1a<49, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
+def MADDHDU : VAForm_1a<49, 
+                       (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
                        "maddhdu $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
-def MADDLD : VAForm_1a<51, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
-                       "maddld $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
+def MADDLD : VAForm_1a<51, (outs gprc :$RT), (ins gprc:$RA, gprc:$RB, gprc:$RC),
+                       "maddld $RT, $RA, $RB, $RC", IIC_IntMulHD,
+                       [(set i32:$RT, (add_without_simm16 (mul_without_simm16 i32:$RA, i32:$RB), i32:$RC))]>,
+                       isPPC64;
 def SETB : XForm_44<31, 128, (outs gprc:$RT), (ins crrc:$BFA),
                        "setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
 let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+  def MADDLD8 : VAForm_1a<51, 
+                       (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
+                       "maddld $RT, $RA, $RB, $RC", IIC_IntMulHD,
+                       [(set i64:$RT, (add_without_simm16 (mul_without_simm16 i64:$RA, i64:$RB), i64:$RC))]>,
+                       isPPC64;
   def SETB8 : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA),
                        "setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
 }
@@ -911,7 +931,7 @@ def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
 def LWA  : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src),
                     "lwa $rD, $src", IIC_LdStLWA,
                     [(set i64:$rD,
-                          (aligned4sextloadi32 ixaddr:$src))]>, isPPC64,
+                          (aligned4sextloadi32 iaddrX4:$src))]>, isPPC64,
                     PPC970_DGroup_Cracked;
 let Interpretation64Bit = 1, isCodeGenOnly = 1 in
 def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src),
@@ -920,7 +940,7 @@ def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src),
                         PPC970_DGroup_Cracked;
 def LWAX : XForm_1_memOp<31, 341, (outs g8rc:$rD), (ins memrr:$src),
                         "lwax $rD, $src", IIC_LdStLHA,
-                        [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
+                        [(set i64:$rD, (sextloadi32 xaddrX4:$src))]>, isPPC64,
                         PPC970_DGroup_Cracked;
 // For fast-isel:
 let isCodeGenOnly = 1, mayLoad = 1 in {
@@ -1022,7 +1042,7 @@ def LWZUX8 : XForm_1_memOp<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
 let PPC970_Unit = 2 in {
 def LD   : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
                     "ld $rD, $src", IIC_LdStLD,
-                    [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
+                    [(set i64:$rD, (aligned4load iaddrX4:$src))]>, isPPC64;
 // The following four definitions are selected for small code model only.
 // Otherwise, we need to create two instructions to form a 32-bit offset,
 // so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
@@ -1045,7 +1065,7 @@ def LDtocBA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
 
 def LDX  : XForm_1_memOp<31,  21, (outs g8rc:$rD), (ins memrr:$src),
                         "ldx $rD, $src", IIC_LdStLD,
-                        [(set i64:$rD, (load xaddr:$src))]>, isPPC64;
+                        [(set i64:$rD, (load xaddrX4:$src))]>, isPPC64;
 def LDBRX : XForm_1_memOp<31,  532, (outs g8rc:$rD), (ins memrr:$src),
                           "ldbrx $rD, $src", IIC_LdStLoad,
                           [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
@@ -1214,10 +1234,10 @@ def STWX8 : XForm_8_memOp<31, 151, (outs), (ins g8rc:$rS, memrr:$dst),
 // Normal 8-byte stores.
 def STD  : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst),
                     "std $rS, $dst", IIC_LdStSTD,
-                    [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64;
+                    [(aligned4store i64:$rS, iaddrX4:$dst)]>, isPPC64;
 def STDX  : XForm_8_memOp<31, 149, (outs), (ins g8rc:$rS, memrr:$dst),
                           "stdx $rS, $dst", IIC_LdStSTD,
-                          [(store i64:$rS, xaddr:$dst)]>, isPPC64,
+                          [(store i64:$rS, xaddrX4:$dst)]>, isPPC64,
                           PPC970_DGroup_Cracked;
 def STDBRX: XForm_8_memOp<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
                           "stdbrx $rS, $dst", IIC_LdStStore,
@@ -1433,11 +1453,11 @@ def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
           (STDX $rS, xoaddr:$dst)>;
 
 // 64-bits atomic loads and stores
-def : Pat<(atomic_load_64 ixaddr:$src), (LD  memrix:$src)>;
-def : Pat<(atomic_load_64 xaddr:$src),  (LDX memrr:$src)>;
+def : Pat<(atomic_load_64 iaddrX4:$src), (LD  memrix:$src)>;
+def : Pat<(atomic_load_64 xaddrX4:$src),  (LDX memrr:$src)>;
 
-def : Pat<(atomic_store_64 ixaddr:$ptr, i64:$val), (STD  g8rc:$val, memrix:$ptr)>;
-def : Pat<(atomic_store_64 xaddr:$ptr,  i64:$val), (STDX g8rc:$val, memrr:$ptr)>;
+def : Pat<(atomic_store_64 iaddrX4:$ptr, i64:$val), (STD  g8rc:$val, memrix:$ptr)>;
+def : Pat<(atomic_store_64 xaddrX4:$ptr,  i64:$val), (STDX g8rc:$val, memrr:$ptr)>;
 
 let Predicates = [IsISA3_0] in {
 
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 69b19e45c3e9..8176c5120a83 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1,9 +1,8 @@
 //===-- PPCInstrAltivec.td - The PowerPC Altivec Extension -*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -822,7 +821,9 @@ def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
 def VCMPGTUW  : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
 def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
 
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
+    isReMaterializable = 1 in {
+
 def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
                       "vxor $vD, $vD, $vD", IIC_VecFP,
                       [(set v16i8:$vD, (v16i8 immAllZerosV))]>;
@@ -899,6 +900,32 @@ def : Pat<(v1i128 (bitconvert (v4i32 VRRC:$src))), (v1i128 VRRC:$src)>;
 def : Pat<(v1i128 (bitconvert (v4f32 VRRC:$src))), (v1i128 VRRC:$src)>;
 def : Pat<(v1i128 (bitconvert (v2i64 VRRC:$src))), (v1i128 VRRC:$src)>;
 
+// Max/Min
+def : Pat<(v16i8 (umax v16i8:$src1, v16i8:$src2)),
+          (v16i8 (VMAXUB $src1, $src2))>;
+def : Pat<(v16i8 (smax v16i8:$src1, v16i8:$src2)),
+          (v16i8 (VMAXSB $src1, $src2))>;
+def : Pat<(v8i16 (umax v8i16:$src1, v8i16:$src2)),
+          (v8i16 (VMAXUH $src1, $src2))>;
+def : Pat<(v8i16 (smax v8i16:$src1, v8i16:$src2)),
+          (v8i16 (VMAXSH $src1, $src2))>;
+def : Pat<(v4i32 (umax v4i32:$src1, v4i32:$src2)),
+          (v4i32 (VMAXUW $src1, $src2))>;
+def : Pat<(v4i32 (smax v4i32:$src1, v4i32:$src2)),
+          (v4i32 (VMAXSW $src1, $src2))>;
+def : Pat<(v16i8 (umin v16i8:$src1, v16i8:$src2)),
+          (v16i8 (VMINUB $src1, $src2))>;
+def : Pat<(v16i8 (smin v16i8:$src1, v16i8:$src2)),
+          (v16i8 (VMINSB $src1, $src2))>;
+def : Pat<(v8i16 (umin v8i16:$src1, v8i16:$src2)),
+          (v8i16 (VMINUH $src1, $src2))>;
+def : Pat<(v8i16 (smin v8i16:$src1, v8i16:$src2)),
+          (v8i16 (VMINSH $src1, $src2))>;
+def : Pat<(v4i32 (umin v4i32:$src1, v4i32:$src2)),
+          (v4i32 (VMINUW $src1, $src2))>;
+def : Pat<(v4i32 (smin v4i32:$src1, v4i32:$src2)),
+          (v4i32 (VMINSW $src1, $src2))>;
+
 // Shuffles.
 
 // Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
diff --git a/lib/Target/PowerPC/PPCInstrBuilder.h b/lib/Target/PowerPC/PPCInstrBuilder.h
index cf71b1c59869..323f7e39adf7 100644
--- a/lib/Target/PowerPC/PPCInstrBuilder.h
+++ b/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -1,9 +1,8 @@
 //===-- PPCInstrBuilder.h - Aides for building PPC insts --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 2fe765dd99e1..a48eb1690695 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -1,9 +1,8 @@
 //===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -38,14 +37,6 @@ class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
   let TSFlags{2}   = PPC970_Cracked;
   let TSFlags{5-3} = PPC970_Unit;
 
-  /// Indicate that the VSX instruction is to use VSX numbering/encoding.
-  /// Since ISA 3.0, there are scalar instructions that use the upper
-  /// half of the VSX register set only. Rather than adding further complexity
-  /// to the register class set, the VSX registers just include the Altivec
-  /// registers and this flag decides the numbering to be used for them.
-  bits<1> UseVSXReg = 0;
-  let TSFlags{6}   = UseVSXReg;
-
   // Indicate that this instruction is of type X-Form Load or Store
   bits<1> XFormMemOp = 0;
   let TSFlags{7}  = XFormMemOp;
@@ -74,7 +65,6 @@ class PPC970_Unit_VALU     { bits<3> PPC970_Unit = 5;   }
 class PPC970_Unit_VPERM    { bits<3> PPC970_Unit = 6;   }
 class PPC970_Unit_BRU      { bits<3> PPC970_Unit = 7;   }
 
-class UseVSXReg { bits<1> UseVSXReg = 1; }
 class XFormMemOp { bits<1> XFormMemOp = 1; }
 
 // Two joined instructions; used to emit two adjacent instructions as one.
@@ -730,6 +720,7 @@ class XForm_25_memOp<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
   : XForm_base_r3xo_memOp<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
 }
 
+// [PO RT /// RB XO RC]
 class XForm_26<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
                InstrItinClass itin, list<dag> pattern>
   : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
@@ -1193,9 +1184,9 @@ class XX2_RD6_DCMX7_RS6<bits<6> opcode, bits<4> xo1, bits<3> xo2,
   let Inst{11-15} = DCMX{4-0};
   let Inst{16-20} = XB{4-0};
   let Inst{21-24} = xo1;
-  let Inst{25}    = DCMX{5};
+  let Inst{25}    = DCMX{6};
   let Inst{26-28} = xo2;
-  let Inst{29}    = DCMX{6};
+  let Inst{29}    = DCMX{5};
   let Inst{30}    = XB{5};
   let Inst{31}    = XT{5};
 }
diff --git a/lib/Target/PowerPC/PPCInstrHTM.td b/lib/Target/PowerPC/PPCInstrHTM.td
index 0efe797c765d..104b57a70a2e 100644
--- a/lib/Target/PowerPC/PPCInstrHTM.td
+++ b/lib/Target/PowerPC/PPCInstrHTM.td
@@ -1,9 +1,8 @@
 //===-- PPCInstrHTM.td - The PowerPC Hardware Transactional Memory  -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -21,55 +20,53 @@ def HTM_get_imm : SDNodeXForm<imm, [{
 }]>;
 
 let hasSideEffects = 1 in {
-def TCHECK_RET : PPCCustomInserterPseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>;
+def TCHECK_RET : PPCCustomInserterPseudo<(outs gprc:$out), (ins), "#TCHECK_RET", []>;
+def TBEGIN_RET : PPCCustomInserterPseudo<(outs gprc:$out), (ins u1imm:$R), "#TBEGIN_RET", []>;
 }
 
 
 let Predicates = [HasHTM] in {
 
+let Defs = [CR0] in {
 def TBEGIN : XForm_htm0 <31, 654,
-                         (outs crrc0:$ret), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>;
+                         (outs), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>;
 
 def TEND : XForm_htm1 <31, 686,
-                       (outs crrc0:$ret), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>;
+                       (outs), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>;
 
 def TABORT : XForm_base_r3xo <31, 910,
-                              (outs crrc0:$ret), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
+                              (outs), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
                               []>, isDOT {
   let RST = 0;
   let B = 0;
 }
 
 def TABORTWC : XForm_base_r3xo <31, 782,
-                                (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+                                (outs), (ins u5imm:$RTS, gprc:$A, gprc:$B),
                                 "tabortwc. $RTS, $A, $B", IIC_SprMTSPR, []>,
                                 isDOT;
 
 def TABORTWCI : XForm_base_r3xo <31, 846,
-                                 (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+                                 (outs), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
                                  "tabortwci. $RTS, $A, $B", IIC_SprMTSPR, []>,
                                  isDOT;
 
 def TABORTDC : XForm_base_r3xo <31, 814,
-                                (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+                                (outs), (ins u5imm:$RTS, gprc:$A, gprc:$B),
                                 "tabortdc. $RTS, $A, $B", IIC_SprMTSPR, []>,
                                 isDOT;
 
 def TABORTDCI : XForm_base_r3xo <31, 878,
-                                 (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+                                 (outs), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
                                  "tabortdci. $RTS, $A, $B", IIC_SprMTSPR, []>,
                                  isDOT;
 
 def TSR : XForm_htm2 <31, 750,
-                      (outs crrc0:$ret), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>,
+                      (outs), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>,
                       isDOT;
 
-def TCHECK : XForm_htm3 <31, 718,
-                        (outs), (ins crrc:$BF), "tcheck $BF", IIC_SprMTSPR, []>;
-
-
 def TRECLAIM : XForm_base_r3xo <31, 942,
-                                (outs crrc:$ret), (ins gprc:$A), "treclaim. $A",
+                                (outs), (ins gprc:$A), "treclaim. $A",
                                 IIC_SprMTSPR, []>,
                                 isDOT {
   let RST = 0;
@@ -77,13 +74,17 @@ def TRECLAIM : XForm_base_r3xo <31, 942,
 }
 
 def TRECHKPT : XForm_base_r3xo <31, 1006,
-                                (outs crrc:$ret), (ins), "trechkpt.", IIC_SprMTSPR, []>,
+                                (outs), (ins), "trechkpt.", IIC_SprMTSPR, []>,
                                 isDOT {
   let RST = 0;
   let A = 0;
   let B = 0;
 }
 
+}
+
+def TCHECK : XForm_htm3 <31, 718,
+                        (outs crrc:$BF), (ins), "tcheck $BF", IIC_SprMTSPR, []>;
 // Builtins
 
 // All HTM instructions, with the exception of tcheck, set CR0 with the
@@ -94,15 +95,11 @@ def TRECHKPT : XForm_base_r3xo <31, 1006,
 // tbegin builtin API which defines a return value of 1 as success.
 
 def : Pat<(int_ppc_tbegin i32:$R),
-           (XORI
-             (EXTRACT_SUBREG (
-               TBEGIN (HTM_get_imm imm:$R)), sub_eq),
-            1)>;
+           (XORI (TBEGIN_RET(HTM_get_imm imm:$R)), 1)>;
 
 def : Pat<(int_ppc_tend i32:$R),
           (TEND (HTM_get_imm imm:$R))>;
 
-
 def : Pat<(int_ppc_tabort i32:$R),
           (TABORT $R)>;
 
@@ -167,6 +164,8 @@ def : Pat<(int_ppc_tsuspend),
           (TSR 0)>;
 
 def : Pat<(i64 (int_ppc_ttest)),
-          (RLDICL (i64 (COPY (TABORTWCI 0, ZERO, 0))), 36, 28)>;
+          (RLDICL (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+                                      (TABORTWCI 0, (LI 0), 0), sub_32)),
+                   36, 28)>;
 
 } // [HasHTM]
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index d754ce2990d2..a787bdd56b9d 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1,9 +1,8 @@
 //===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -333,6 +332,17 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
   case PPC::ADDIStocHA:
   case PPC::ADDItocL:
   case PPC::LOAD_STACK_GUARD:
+  case PPC::XXLXORz:
+  case PPC::XXLXORspz:
+  case PPC::XXLXORdpz:
+  case PPC::V_SET0B:
+  case PPC::V_SET0H:
+  case PPC::V_SET0:
+  case PPC::V_SETALLONESB:
+  case PPC::V_SETALLONESH:
+  case PPC::V_SETALLONES:
+  case PPC::CRSET:
+  case PPC::CRUNSET:
     return true;
   }
   return false;
@@ -381,9 +391,9 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
   // Swap op1/op2
   assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
          "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMIo.");
-  unsigned Reg0 = MI.getOperand(0).getReg();
-  unsigned Reg1 = MI.getOperand(1).getReg();
-  unsigned Reg2 = MI.getOperand(2).getReg();
+  Register Reg0 = MI.getOperand(0).getReg();
+  Register Reg1 = MI.getOperand(1).getReg();
+  Register Reg2 = MI.getOperand(2).getReg();
   unsigned SubReg1 = MI.getOperand(1).getSubReg();
   unsigned SubReg2 = MI.getOperand(2).getSubReg();
   bool Reg1IsKill = MI.getOperand(1).isKill();
@@ -411,7 +421,7 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
 
   if (NewMI) {
     // Create a new instruction.
-    unsigned Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();
+    Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();
     bool Reg0IsDead = MI.getOperand(0).isDead();
     return BuildMI(MF, MI.getDebugLoc(), MI.getDesc())
         .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
@@ -942,12 +952,16 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     return;
   } else if (PPC::G8RCRegClass.contains(SrcReg) &&
              PPC::VSFRCRegClass.contains(DestReg)) {
+    assert(Subtarget.hasDirectMove() &&
+           "Subtarget doesn't support directmove, don't know how to copy.");
     BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg);
     NumGPRtoVSRSpill++;
     getKillRegState(KillSrc);
     return;
   } else if (PPC::VSFRCRegClass.contains(SrcReg) &&
              PPC::G8RCRegClass.contains(DestReg)) {
+    assert(Subtarget.hasDirectMove() &&
+           "Subtarget doesn't support directmove, don't know how to copy.");
     BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg);
     getKillRegState(KillSrc);
     return;
@@ -963,7 +977,6 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     return;
   }
 
-
   unsigned Opc;
   if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
     Opc = PPC::OR;
@@ -996,6 +1009,8 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     Opc = PPC::QVFMRb;
   else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
     Opc = PPC::CROR;
+  else if (PPC::SPE4RCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::OR;
   else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
     Opc = PPC::EVOR;
   else
@@ -1066,6 +1081,10 @@ unsigned PPCInstrInfo::getStoreOpcodeForSpill(unsigned Reg,
       OpcodeIndex = SOK_Float8Spill;
     } else if (PPC::F4RCRegClass.contains(Reg)) {
       OpcodeIndex = SOK_Float4Spill;
+    } else if (PPC::SPERCRegClass.contains(Reg)) {
+      OpcodeIndex = SOK_SPESpill;
+    } else if (PPC::SPE4RCRegClass.contains(Reg)) {
+      OpcodeIndex = SOK_SPE4Spill;
     } else if (PPC::CRRCRegClass.contains(Reg)) {
       OpcodeIndex = SOK_CRSpill;
     } else if (PPC::CRBITRCRegClass.contains(Reg)) {
@@ -1152,6 +1171,10 @@ PPCInstrInfo::getLoadOpcodeForSpill(unsigned Reg,
       OpcodeIndex = SOK_Float8Spill;
     } else if (PPC::F4RCRegClass.contains(Reg)) {
       OpcodeIndex = SOK_Float4Spill;
+    } else if (PPC::SPERCRegClass.contains(Reg)) {
+      OpcodeIndex = SOK_SPESpill;
+    } else if (PPC::SPE4RCRegClass.contains(Reg)) {
+      OpcodeIndex = SOK_SPE4Spill;
     } else if (PPC::CRRCRegClass.contains(Reg)) {
       OpcodeIndex = SOK_CRSpill;
     } else if (PPC::CRBITRCRegClass.contains(Reg)) {
@@ -1632,6 +1655,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
   if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
     return false;
 
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
   // The record forms set the condition register based on a signed comparison
   // with zero (so says the ISA manual). This is not as straightforward as it
   // seems, however, because this is always a 64-bit comparison on PPC64, even
@@ -1645,6 +1669,11 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
   bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
   bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
 
+  // Look through copies unless that gets us to a physical register.
+  unsigned ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
+  if (TargetRegisterInfo::isVirtualRegister(ActualSrc))
+    SrcReg = ActualSrc;
+
   // Get the unique definition of SrcReg.
   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
   if (!MI) return false;
@@ -1745,7 +1774,6 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
       return false;
 
     PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm();
-    PPC::Predicate NewPred = Pred;
     unsigned PredCond = PPC::getPredicateCondition(Pred);
     unsigned PredHint = PPC::getPredicateHint(Pred);
     int16_t Immed = (int16_t)Value;
@@ -1755,25 +1783,23 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
     if (Immed == -1 && PredCond == PPC::PRED_GT)
       // We convert "greater than -1" into "greater than or equal to 0",
       // since we are assuming signed comparison by !equalityOnly
-      NewPred = PPC::getPredicate(PPC::PRED_GE, PredHint);
+      Pred = PPC::getPredicate(PPC::PRED_GE, PredHint);
     else if (Immed == -1 && PredCond == PPC::PRED_LE)
       // We convert "less than or equal to -1" into "less than 0".
-      NewPred = PPC::getPredicate(PPC::PRED_LT, PredHint);
+      Pred = PPC::getPredicate(PPC::PRED_LT, PredHint);
     else if (Immed == 1 && PredCond == PPC::PRED_LT)
       // We convert "less than 1" into "less than or equal to 0".
-      NewPred = PPC::getPredicate(PPC::PRED_LE, PredHint);
+      Pred = PPC::getPredicate(PPC::PRED_LE, PredHint);
     else if (Immed == 1 && PredCond == PPC::PRED_GE)
       // We convert "greater than or equal to 1" into "greater than 0".
-      NewPred = PPC::getPredicate(PPC::PRED_GT, PredHint);
+      Pred = PPC::getPredicate(PPC::PRED_GT, PredHint);
     else
       return false;
 
-    PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
-                                            NewPred));
+    PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), Pred));
   }
 
   // Search for Sub.
-  const TargetRegisterInfo *TRI = &getRegisterInfo();
   --I;
 
   // Get ready to iterate backward from CmpInstr.
@@ -1992,7 +2018,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
 unsigned PPCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   unsigned Opcode = MI.getOpcode();
 
-  if (Opcode == PPC::INLINEASM) {
+  if (Opcode == PPC::INLINEASM || Opcode == PPC::INLINEASM_BR) {
     const MachineFunction *MF = MI.getParent()->getParent();
     const char *AsmStr = MI.getOperand(0).getSymbolName();
     return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
@@ -2358,13 +2384,6 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
         MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
         It++;
         unsigned Reg = MI.getOperand(i).getReg();
-        // MachineInstr::readsRegister only returns true if the machine
-        // instruction reads the exact register or its super-register. It
-        // does not consider uses of sub-registers which seems like strange
-        // behaviour. Nonetheless, if we end up with a 64-bit register here,
-        // get the corresponding 32-bit register to check.
-        if (PPC::G8RCRegClass.contains(Reg))
-          Reg = Reg - PPC::X0 + PPC::R0;
 
         // Is this register defined by some form of add-immediate (including
         // load-immediate) within this basic block?
@@ -2381,7 +2400,7 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
               return &*It;
             }
             break;
-          } else if (It->readsRegister(Reg, &getRegisterInfo())) 
+          } else if (It->readsRegister(Reg, &getRegisterInfo()))
             // If we see another use of this reg between the def and the MI,
             // we want to flat it so the def isn't deleted.
             SeenIntermediateUse = true;
@@ -2424,6 +2443,83 @@ const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const {
   return OpcodesForSpill[(Subtarget.hasP9Vector()) ? 1 : 0];
 }
 
+void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
+                                     unsigned RegNo) const {
+  const MachineRegisterInfo &MRI =
+      StartMI.getParent()->getParent()->getRegInfo();
+  if (MRI.isSSA())
+    return;
+
+  // Instructions between [StartMI, EndMI] should be in same basic block.
+  assert((StartMI.getParent() == EndMI.getParent()) &&
+         "Instructions are not in same basic block");
+
+  bool IsKillSet = false;
+
+  auto clearOperandKillInfo = [=] (MachineInstr &MI, unsigned Index) {
+    MachineOperand &MO = MI.getOperand(Index);
+    if (MO.isReg() && MO.isUse() && MO.isKill() &&
+        getRegisterInfo().regsOverlap(MO.getReg(), RegNo))
+      MO.setIsKill(false);
+  };
+
+  // Set killed flag for EndMI.
+  // No need to do anything if EndMI defines RegNo.
+  int UseIndex =
+      EndMI.findRegisterUseOperandIdx(RegNo, false, &getRegisterInfo());
+  if (UseIndex != -1) {
+    EndMI.getOperand(UseIndex).setIsKill(true);
+    IsKillSet = true;
+    // Clear killed flag for other EndMI operands related to RegNo. In some
+    // upexpected cases, killed may be set multiple times for same register
+    // operand in same MI.
+    for (int i = 0, e = EndMI.getNumOperands(); i != e; ++i)
+      if (i != UseIndex)
+        clearOperandKillInfo(EndMI, i);
+  }
+
+  // Walking the inst in reverse order (EndMI -> StartMI].
+  MachineBasicBlock::reverse_iterator It = EndMI;
+  MachineBasicBlock::reverse_iterator E = EndMI.getParent()->rend();
+  // EndMI has been handled above, skip it here.
+  It++;
+  MachineOperand *MO = nullptr;
+  for (; It != E; ++It) {
+    // Skip insturctions which could not be a def/use of RegNo.
+    if (It->isDebugInstr() || It->isPosition())
+      continue;
+
+    // Clear killed flag for all It operands related to RegNo. In some
+    // upexpected cases, killed may be set multiple times for same register
+    // operand in same MI.
+    for (int i = 0, e = It->getNumOperands(); i != e; ++i)
+        clearOperandKillInfo(*It, i);
+
+    // If killed is not set, set killed for its last use or set dead for its def
+    // if no use found.
+    if (!IsKillSet) {
+      if ((MO = It->findRegisterUseOperand(RegNo, false, &getRegisterInfo()))) {
+        // Use found, set it killed.
+        IsKillSet = true;
+        MO->setIsKill(true);
+        continue;
+      } else if ((MO = It->findRegisterDefOperand(RegNo, false, true,
+                                                  &getRegisterInfo()))) {
+        // No use found, set dead for its def.
+        assert(&*It == &StartMI && "No new def between StartMI and EndMI.");
+        MO->setIsDead(true);
+        break;
+      }
+    }
+
+    if ((&*It) == &StartMI)
+      break;
+  }
+  // Ensure RegMo liveness is killed after EndMI.
+  assert((IsKillSet || (MO && MO->isDead())) &&
+         "RegNo should be killed or dead");
+}
+
 // If this instruction has an immediate form and one of its operands is a
 // result of a load-immediate or an add-immediate, convert it to
 // the immediate form if the constant is in range.
@@ -2440,8 +2536,9 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
     return false;
   assert(ForwardingOperand < MI.getNumOperands() &&
          "The forwarding operand needs to be valid at this point");
-  bool KillFwdDefMI = !SeenIntermediateUse &&
-    MI.getOperand(ForwardingOperand).isKill();
+  bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();
+  bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
+  unsigned ForwardingOperandReg = MI.getOperand(ForwardingOperand).getReg();
   if (KilledDef && KillFwdDefMI)
     *KilledDef = DefMI;
 
@@ -2450,8 +2547,9 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
   // If this is a reg+reg instruction that has a reg+imm form,
   // and one of the operands is produced by an add-immediate,
   // try to convert it.
-  if (HasImmForm && transformToImmFormFedByAdd(MI, III, ForwardingOperand,
-                                               *DefMI, KillFwdDefMI))
+  if (HasImmForm &&
+      transformToImmFormFedByAdd(MI, III, ForwardingOperand, *DefMI,
+                                 KillFwdDefMI))
     return true;
 
   if ((DefMI->getOpcode() != PPC::LI && DefMI->getOpcode() != PPC::LI8) ||
@@ -2466,7 +2564,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
   // If this is a reg+reg instruction that has a reg+imm form,
   // and one of the operands is produced by LI, convert it now.
   if (HasImmForm)
-    return transformToImmFormFedByLI(MI, III, ForwardingOperand, SExtImm);
+    return transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI, SExtImm);
 
   bool ReplaceWithLI = false;
   bool Is64BitLI = false;
@@ -2486,6 +2584,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
   case PPC::CMPLDI: {
     // Doing this post-RA would require dataflow analysis to reliably find uses
     // of the CR register set by the compare.
+    // No need to fixup killed/dead flag since this transformation is only valid
+    // before RA.
     if (PostRA)
       return false;
     // If a compare-immediate is fed by an immediate and is itself an input of
@@ -2662,6 +2762,14 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
     if (KilledDef && SetCR)
       *KilledDef = nullptr;
     replaceInstrWithLI(MI, LII);
+
+    // Fixup killed/dead flag after transformation.
+    // Pattern:
+    // ForwardingOperandReg = LI imm1
+    // y = op2 imm2, ForwardingOperandReg(killed)
+    if (IsForwardingOperandKilled)
+      fixupIsDeadOrKill(*DefMI, MI, ForwardingOperandReg);
+
     LLVM_DEBUG(dbgs() << "With:\n");
     LLVM_DEBUG(MI.dump());
     return true;
@@ -2669,10 +2777,6 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
   return false;
 }
 
-static bool isVFReg(unsigned Reg) {
-  return PPC::VFRCRegClass.contains(Reg);
-}
-
 bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
                                    ImmInstrInfo &III, bool PostRA) const {
   unsigned Opc = MI.getOpcode();
@@ -3007,7 +3111,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
       break;
     case PPC::LXSSPX:
       if (PostRA) {
-        if (isVFReg(MI.getOperand(0).getReg()))
+        if (isVFRegister(MI.getOperand(0).getReg()))
           III.ImmOpcode = PPC::LXSSP;
         else {
           III.ImmOpcode = PPC::LFS;
@@ -3021,7 +3125,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
       break;
     case PPC::LXSDX:
       if (PostRA) {
-        if (isVFReg(MI.getOperand(0).getReg()))
+        if (isVFRegister(MI.getOperand(0).getReg()))
           III.ImmOpcode = PPC::LXSD;
         else {
           III.ImmOpcode = PPC::LFD;
@@ -3039,7 +3143,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
       break;
     case PPC::STXSSPX:
       if (PostRA) {
-        if (isVFReg(MI.getOperand(0).getReg()))
+        if (isVFRegister(MI.getOperand(0).getReg()))
           III.ImmOpcode = PPC::STXSSP;
         else {
           III.ImmOpcode = PPC::STFS;
@@ -3053,7 +3157,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
       break;
     case PPC::STXSDX:
       if (PostRA) {
-        if (isVFReg(MI.getOperand(0).getReg()))
+        if (isVFRegister(MI.getOperand(0).getReg()))
           III.ImmOpcode = PPC::STXSD;
         else {
           III.ImmOpcode = PPC::STFD;
@@ -3110,7 +3214,7 @@ static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
   }
 }
 
-// Check if the 'MI' that has the index OpNoForForwarding 
+// Check if the 'MI' that has the index OpNoForForwarding
 // meets the requirement described in the ImmInstrInfo.
 bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
                                                const ImmInstrInfo &III,
@@ -3156,7 +3260,7 @@ bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
                                                MachineOperand *&RegMO) const {
   unsigned Opc = DefMI.getOpcode();
   if (Opc != PPC::ADDItocL && Opc != PPC::ADDI && Opc != PPC::ADDI8)
-    return false; 
+    return false;
 
   assert(DefMI.getNumOperands() >= 3 &&
          "Add inst must have at least three operands");
@@ -3169,11 +3273,10 @@ bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
   return isAnImmediateOperand(*ImmMO);
 }
 
-bool PPCInstrInfo::isRegElgibleForForwarding(const MachineOperand &RegMO,
-                                             const MachineInstr &DefMI,
-                                             const MachineInstr &MI,
-                                             bool KillDefMI
-                                             ) const {
+bool PPCInstrInfo::isRegElgibleForForwarding(
+    const MachineOperand &RegMO, const MachineInstr &DefMI,
+    const MachineInstr &MI, bool KillDefMI,
+    bool &IsFwdFeederRegKilled) const {
   // x = addi y, imm
   // ...
   // z = lfdx 0, x   -> z = lfd imm(y)
@@ -3184,14 +3287,7 @@ bool PPCInstrInfo::isRegElgibleForForwarding(const MachineOperand &RegMO,
   if (MRI.isSSA())
     return false;
 
-  // MachineInstr::readsRegister only returns true if the machine
-  // instruction reads the exact register or its super-register. It
-  // does not consider uses of sub-registers which seems like strange
-  // behaviour. Nonetheless, if we end up with a 64-bit register here,
-  // get the corresponding 32-bit register to check.
   unsigned Reg = RegMO.getReg();
-  if (PPC::G8RCRegClass.contains(Reg))
-    Reg = Reg - PPC::X0 + PPC::R0;
 
   // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
   MachineBasicBlock::const_reverse_iterator It = MI;
@@ -3200,15 +3296,17 @@ bool PPCInstrInfo::isRegElgibleForForwarding(const MachineOperand &RegMO,
   for (; It != E; ++It) {
     if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
       return false;
+    else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
+      IsFwdFeederRegKilled = true;
     // Made it to DefMI without encountering a clobber.
     if ((&*It) == &DefMI)
       break;
   }
   assert((&*It) == &DefMI && "DefMI is missing");
 
-  // If DefMI also uses the register to be forwarded, we can only forward it
+  // If DefMI also defines the register to be forwarded, we can only forward it
   // if DefMI is being erased.
-  if (DefMI.readsRegister(Reg, &getRegisterInfo()))
+  if (DefMI.modifiesRegister(Reg, &getRegisterInfo()))
     return KillDefMI;
 
   return true;
@@ -3271,11 +3369,9 @@ bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
 // is the literal zero, attempt to forward the source of the add-immediate to
 // the corresponding D-Form instruction with the displacement coming from
 // the immediate being added.
-bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI,
-                                              const ImmInstrInfo &III,
-                                              unsigned OpNoForForwarding,
-                                              MachineInstr &DefMI,
-                                              bool KillDefMI) const {
+bool PPCInstrInfo::transformToImmFormFedByAdd(
+    MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding,
+    MachineInstr &DefMI, bool KillDefMI) const {
   //         RegMO ImmMO
   //           |    |
   // x = addi reg, imm  <----- DefMI
@@ -3300,10 +3396,19 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI,
   if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm))
     return false;
 
+  bool IsFwdFeederRegKilled = false;
   // Check if the RegMO can be forwarded to MI.
-  if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI))
+  if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,
+                                 IsFwdFeederRegKilled))
     return false;
 
+  // Get killed info in case fixup needed after transformation.
+  unsigned ForwardKilledOperandReg = ~0U;
+  MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+  bool PostRA = !MRI.isSSA();
+  if (PostRA && MI.getOperand(OpNoForForwarding).isKill())
+    ForwardKilledOperandReg = MI.getOperand(OpNoForForwarding).getReg();
+
   // We know that, the MI and DefMI both meet the pattern, and
   // the Imm also meet the requirement with the new Imm-form.
   // It is safe to do the transformation now.
@@ -3327,7 +3432,7 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI,
     // Otherwise, it is Constant Pool Index(CPI) or Global,
     // which is relocation in fact. We need to replace the special zero
     // register with ImmMO.
-    // Before that, we need to fixup the target flags for imm. 
+    // Before that, we need to fixup the target flags for imm.
     // For some reason, we miss to set the flag for the ImmMO if it is CPI.
     if (DefMI.getOpcode() == PPC::ADDItocL)
       ImmMO->setTargetFlags(PPCII::MO_TOC_LO);
@@ -3354,6 +3459,22 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI,
   // Update the opcode.
   MI.setDesc(get(III.ImmOpcode));
 
+  // Fix up killed/dead flag after transformation.
+  // Pattern 1:
+  // x = ADD KilledFwdFeederReg, imm
+  // n = opn KilledFwdFeederReg(killed), regn
+  // y = XOP 0, x
+  // Pattern 2:
+  // x = ADD reg(killed), imm
+  // y = XOP 0, x
+  if (IsFwdFeederRegKilled || RegMO->isKill())
+    fixupIsDeadOrKill(DefMI, MI, RegMO->getReg());
+  // Pattern 3:
+  // ForwardKilledOperandReg = ADD reg, imm
+  // y = XOP 0, ForwardKilledOperandReg(killed)
+  if (ForwardKilledOperandReg != ~0U)
+    fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg);
+
   LLVM_DEBUG(dbgs() << "With:\n");
   LLVM_DEBUG(MI.dump());
 
@@ -3363,6 +3484,7 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI,
 bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
                                              const ImmInstrInfo &III,
                                              unsigned ConstantOpNo,
+                                             MachineInstr &DefMI,
                                              int64_t Imm) const {
   MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
   bool PostRA = !MRI.isSSA();
@@ -3401,6 +3523,11 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
       return false;
   }
 
+  // Get killed info in case fixup needed after transformation.
+  unsigned ForwardKilledOperandReg = ~0U;
+  if (PostRA && MI.getOperand(ConstantOpNo).isKill())
+    ForwardKilledOperandReg = MI.getOperand(ConstantOpNo).getReg();
+
   unsigned Opc = MI.getOpcode();
   bool SpecialShift32 =
     Opc == PPC::SLW || Opc == PPC::SLWo || Opc == PPC::SRW || Opc == PPC::SRWo;
@@ -3483,6 +3610,13 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
       }
     }
   }
+
+  // Fix up killed/dead flag after transformation.
+  // Pattern:
+  // ForwardKilledOperandReg = LI imm
+  // y = XOP reg, ForwardKilledOperandReg(killed)
+  if (ForwardKilledOperandReg != ~0U)
+    fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg);
   return true;
 }
 
@@ -3784,3 +3918,133 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
   }
   return false;
 }
+
+bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {
+  return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
+}
+
+bool PPCInstrInfo::analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
+                               MachineInstr *&CmpInst) const {
+  MachineBasicBlock *LoopEnd = L.getBottomBlock();
+  MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator();
+  // We really "analyze" only CTR loops right now.
+  if (I != LoopEnd->end() && isBDNZ(I->getOpcode())) {
+    IndVarInst = nullptr;
+    CmpInst = &*I;
+    return false;
+  }
+  return true;
+}
+
+MachineInstr *
+PPCInstrInfo::findLoopInstr(MachineBasicBlock &PreHeader) const {
+
+  unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
+
+  // The loop set-up instruction should be in preheader
+  for (auto &I : PreHeader.instrs())
+    if (I.getOpcode() == LOOPi)
+      return &I;
+  return nullptr;
+}
+
+unsigned PPCInstrInfo::reduceLoopCount(
+    MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar,
+    MachineInstr &Cmp, SmallVectorImpl<MachineOperand> &Cond,
+    SmallVectorImpl<MachineInstr *> &PrevInsts, unsigned Iter,
+    unsigned MaxIter) const {
+  // We expect a hardware loop currently. This means that IndVar is set
+  // to null, and the compare is the ENDLOOP instruction.
+  assert((!IndVar) && isBDNZ(Cmp.getOpcode()) && "Expecting a CTR loop");
+  MachineFunction *MF = MBB.getParent();
+  DebugLoc DL = Cmp.getDebugLoc();
+  MachineInstr *Loop = findLoopInstr(PreHeader);
+  if (!Loop)
+    return 0;
+  unsigned LoopCountReg = Loop->getOperand(0).getReg();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
+
+  if (!LoopCount)
+    return 0;
+  // If the loop trip count is a compile-time value, then just change the
+  // value.
+  if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI) {
+    int64_t Offset = LoopCount->getOperand(1).getImm();
+    if (Offset <= 1) {
+      LoopCount->eraseFromParent();
+      Loop->eraseFromParent();
+      return 0;
+    }
+    LoopCount->getOperand(1).setImm(Offset - 1);
+    return Offset - 1;
+  }
+
+  // The loop trip count is a run-time value.
+  // We need to subtract one from the trip count,
+  // and insert branch later to check if we're done with the loop.
+
+  // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
+  // so we don't need to generate any thing here.
+  Cond.push_back(MachineOperand::CreateImm(0));
+  Cond.push_back(MachineOperand::CreateReg(
+      Subtarget.isPPC64() ? PPC::CTR8 : PPC::CTR, true));
+  return LoopCountReg;
+}
+
+// Return true if get the base operand, byte offset of an instruction and the
+// memory width. Width is the size of memory that is being loaded/stored.
+bool PPCInstrInfo::getMemOperandWithOffsetWidth(
+  const MachineInstr &LdSt,
+  const MachineOperand *&BaseReg,
+  int64_t &Offset,
+  unsigned &Width,
+  const TargetRegisterInfo *TRI) const {
+  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
+
+  // Handle only loads/stores with base register followed by immediate offset.
+  if (LdSt.getNumExplicitOperands() != 3)
+    return false;
+  if (!LdSt.getOperand(1).isImm() || !LdSt.getOperand(2).isReg())
+    return false;
+
+  if (!LdSt.hasOneMemOperand())
+    return false;
+
+  Width = (*LdSt.memoperands_begin())->getSize();
+  Offset = LdSt.getOperand(1).getImm();
+  BaseReg = &LdSt.getOperand(2);
+  return true;
+}
+
+bool PPCInstrInfo::areMemAccessesTriviallyDisjoint(
+    const MachineInstr &MIa, const MachineInstr &MIb,
+    AliasAnalysis * /*AA*/) const {
+  assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
+  assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
+
+  if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
+      MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
+    return false;
+
+  // Retrieve the base register, offset from the base register and width. Width
+  // is the size of memory that is being loaded/stored (e.g. 1, 2, 4).  If
+  // base registers are identical, and the offset of a lower memory access +
+  // the width doesn't overlap the offset of a higher memory access,
+  // then the memory accesses are different.
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
+  const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
+  int64_t OffsetA = 0, OffsetB = 0;
+  unsigned int WidthA = 0, WidthB = 0;
+  if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
+      getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
+    if (BaseOpA->isIdenticalTo(*BaseOpB)) {
+      int LowOffset = std::min(OffsetA, OffsetB);
+      int HighOffset = std::max(OffsetA, OffsetB);
+      int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
+      if (LowOffset + LowWidth <= HighOffset)
+        return true;
+    }
+  }
+  return false;
+}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 7ed558b835af..70fb757e8f1e 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -1,9 +1,8 @@
 //===-- PPCInstrInfo.h - PowerPC Instruction Information --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,7 +13,6 @@
 #ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
 #define LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
 
-#include "PPC.h"
 #include "PPCRegisterInfo.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 
@@ -66,9 +64,6 @@ enum {
   /// Shift count to bypass PPC970 flags
   NewDef_Shift = 6,
 
-  /// The VSX instruction that uses VSX register (vs0-vs63), instead of VMX
-  /// register (v0-v31).
-  UseVSXReg = 0x1 << NewDef_Shift,
   /// This instruction is an X-Form memory operation.
   XFormMemOp = 0x1 << (NewDef_Shift+1)
 };
@@ -129,12 +124,12 @@ class PPCInstrInfo : public PPCGenInstrInfo {
   // If the inst has imm-form and one of its operand is produced by a LI,
   // put the imm into the inst directly and remove the LI if possible.
   bool transformToImmFormFedByLI(MachineInstr &MI, const ImmInstrInfo &III,
-                                 unsigned ConstantOpNo, int64_t Imm) const;
+                                 unsigned ConstantOpNo, MachineInstr &DefMI,
+                                 int64_t Imm) const;
   // If the inst has imm-form and one of its operand is produced by an
   // add-immediate, try to transform it when possible.
   bool transformToImmFormFedByAdd(MachineInstr &MI, const ImmInstrInfo &III,
-                                  unsigned ConstantOpNo,
-                                  MachineInstr &DefMI,
+                                  unsigned ConstantOpNo, MachineInstr &DefMI,
                                   bool KillDefMI) const;
   // Try to find that, if the instruction 'MI' contains any operand that
   // could be forwarded from some inst that feeds it. If yes, return the
@@ -159,8 +154,8 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                                  int64_t &Imm) const;
   bool isRegElgibleForForwarding(const MachineOperand &RegMO,
                                  const MachineInstr &DefMI,
-                                 const MachineInstr &MI,
-                                 bool KillDefMI) const;
+                                 const MachineInstr &MI, bool KillDefMI,
+                                 bool &IsFwdFeederRegKilled) const;
   const unsigned *getStoreOpcodesForSpillArray() const;
   const unsigned *getLoadOpcodesForSpillArray() const;
   virtual void anchor();
@@ -362,6 +357,22 @@ public:
                             unsigned SrcReg2, int Mask, int Value,
                             const MachineRegisterInfo *MRI) const override;
 
+
+  /// Return true if get the base operand, byte offset of an instruction and
+  /// the memory width. Width is the size of memory that is being
+  /// loaded/stored (e.g. 1, 2, 4, 8).
+  bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
+                                    const MachineOperand *&BaseOp,
+                                    int64_t &Offset, unsigned &Width,
+                                    const TargetRegisterInfo *TRI) const;
+
+  /// Return true if two MIs access different memory addresses and false
+  /// otherwise
+  bool
+  areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+                                  const MachineInstr &MIb,
+                                  AliasAnalysis *AA = nullptr) const override;
+
   /// GetInstSize - Return the number of bytes of code the specified
   /// instruction may be.  This returns the maximum number of bytes.
   ///
@@ -412,6 +423,18 @@ public:
 
   bool convertToImmediateForm(MachineInstr &MI,
                               MachineInstr **KilledDef = nullptr) const;
+
+  /// Fixup killed/dead flag for register \p RegNo between instructions [\p
+  /// StartMI, \p EndMI]. Some PostRA transformations may violate register
+  /// killed/dead flags semantics, this function can be called to fix up. Before
+  /// calling this function,
+  /// 1. Ensure that \p RegNo liveness is killed after instruction \p EndMI.
+  /// 2. Ensure that there is no new definition between (\p StartMI, \p EndMI)
+  ///    and possible definition for \p RegNo is \p StartMI or \p EndMI.
+  /// 3. Ensure that all instructions between [\p StartMI, \p EndMI] are in same
+  ///    basic block.
+  void fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
+                         unsigned RegNo) const;
   void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const;
   void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo,
                                   int64_t Imm) const;
@@ -429,14 +452,55 @@ public:
   /// operands).
   static unsigned getRegNumForOperand(const MCInstrDesc &Desc, unsigned Reg,
                                       unsigned OpNo) {
-    if (Desc.TSFlags & PPCII::UseVSXReg) {
-      if (isVRRegister(Reg))
-        Reg = PPC::VSX32 + (Reg - PPC::V0);
-      else if (isVFRegister(Reg))
-        Reg = PPC::VSX32 + (Reg - PPC::VF0);
+    int16_t regClass = Desc.OpInfo[OpNo].RegClass;
+    switch (regClass) {
+      // We store F0-F31, VF0-VF31 in MCOperand and it should be F0-F31,
+      // VSX32-VSX63 during encoding/disassembling
+      case PPC::VSSRCRegClassID:
+      case PPC::VSFRCRegClassID:
+        if (isVFRegister(Reg))
+          return PPC::VSX32 + (Reg - PPC::VF0);
+        break;
+      // We store VSL0-VSL31, V0-V31 in MCOperand and it should be VSL0-VSL31,
+      // VSX32-VSX63 during encoding/disassembling
+      case PPC::VSRCRegClassID:
+        if (isVRRegister(Reg))
+          return PPC::VSX32 + (Reg - PPC::V0);
+        break;
+      // Other RegClass doesn't need mapping
+      default:
+        break;
     }
     return Reg;
   }
+
+  /// Check \p Opcode is BDNZ (Decrement CTR and branch if it is still nonzero).
+  bool isBDNZ(unsigned Opcode) const;
+
+  /// Find the hardware loop instruction used to set-up the specified loop.
+  /// On PPC, we have two instructions used to set-up the hardware loop
+  /// (MTCTRloop, MTCTR8loop) with corresponding endloop (BDNZ, BDNZ8)
+  /// instructions to indicate the end of a loop.
+  MachineInstr *findLoopInstr(MachineBasicBlock &PreHeader) const;
+
+  /// Analyze the loop code to find the loop induction variable and compare used
+  /// to compute the number of iterations. Currently, we analyze loop that are
+  /// controlled using hardware loops.  In this case, the induction variable
+  /// instruction is null.  For all other cases, this function returns true,
+  /// which means we're unable to analyze it. \p IndVarInst and \p CmpInst will
+  /// return new values when we can analyze the readonly loop \p L, otherwise,
+  /// nothing got changed
+  bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
+                   MachineInstr *&CmpInst) const override;
+  /// Generate code to reduce the loop iteration by one and check if the loop
+  /// is finished.  Return the value/register of the new loop count.  We need
+  /// this function when peeling off one or more iterations of a loop. This
+  /// function assumes the last iteration is peeled first.
+  unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader,
+                           MachineInstr *IndVar, MachineInstr &Cmp,
+                           SmallVectorImpl<MachineOperand> &Cond,
+                           SmallVectorImpl<MachineInstr *> &PrevInsts,
+                           unsigned Iter, unsigned MaxIter) const override;
 };
 
 }
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index dd3f1ac79089..c313337047f0 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1,9 +1,8 @@
 //===-- PPCInstrInfo.td - The PowerPC Instruction Set ------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -231,6 +230,18 @@ def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128",
                               SDTCisSameAs<1,2>]>,
                            []>;
 
+def PPCbuild_spe64: SDNode<"PPCISD::BUILD_SPE64",
+                           SDTypeProfile<1, 2,
+                             [SDTCisVT<0, f64>, SDTCisVT<1,i32>,
+                             SDTCisVT<1,i32>]>,
+                           []>;
+
+def PPCextract_spe : SDNode<"PPCISD::EXTRACT_SPE",
+                            SDTypeProfile<1, 2,
+                              [SDTCisVT<0, i32>, SDTCisVT<1, f64>,
+                              SDTCisPtrTy<2>]>,
+                              []>;
+
 // These are target-independent nodes, but have target-specific formats.
 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
                            [SDNPHasChain, SDNPOutGlue]>;
@@ -458,6 +469,17 @@ def nonQuadwOffsetStore : PatFrag<(ops node:$val, node:$ptr),
   return !isOffsetMultipleOf(N, 16);
 }]>;
 
+// PatFrag for binary operation whose operands are both non-constant
+class BinOpWithoutSImm16Operand<SDNode opcode> :
+  PatFrag<(ops node:$left, node:$right), (opcode node:$left, node:$right), [{
+    int16_t Imm;
+    return !isIntS16Immediate(N->getOperand(0), Imm)
+             && !isIntS16Immediate(N->getOperand(1), Imm);
+}]>;
+
+def add_without_simm16 : BinOpWithoutSImm16Operand<add>;
+def mul_without_simm16 : BinOpWithoutSImm16Operand<mul>;
+
 //===----------------------------------------------------------------------===//
 // PowerPC Flag Definitions.
 
@@ -546,10 +568,6 @@ def PPCRegCRRCAsmOperand : AsmOperandClass {
 def crrc : RegisterOperand<CRRC> {
   let ParserMatchClass = PPCRegCRRCAsmOperand;
 }
-def crrc0 : RegisterOperand<CRRC0> {
-  let ParserMatchClass = PPCRegCRRCAsmOperand;
-}
-
 def PPCRegSPERCAsmOperand : AsmOperandClass {
   let Name = "RegSPERC"; let PredicateMethod = "isRegNumber";
 }
@@ -737,7 +755,9 @@ def abscondbrtarget : Operand<OtherVT> {
 def calltarget : Operand<iPTR> {
   let PrintMethod = "printBranchOperand";
   let EncoderMethod = "getDirectBrEncoding";
+  let DecoderMethod = "DecodePCRel24BranchTarget";
   let ParserMatchClass = PPCDirectBrAsmOperand;
+  let OperandType = "OPERAND_PCREL";
 }
 def abscalltarget : Operand<iPTR> {
   let PrintMethod = "printAbsBranchOperand";
@@ -881,11 +901,24 @@ def pred : Operand<OtherVT> {
 }
 
 // Define PowerPC specific addressing mode.
-def iaddr  : ComplexPattern<iPTR, 2, "SelectAddrImm",    [], []>;
-def xaddr  : ComplexPattern<iPTR, 2, "SelectAddrIdx",    [], []>;
+
+// d-form
+def iaddr    : ComplexPattern<iPTR, 2, "SelectAddrImm",     [], []>;  // "stb"
+// ds-form
+def iaddrX4  : ComplexPattern<iPTR, 2, "SelectAddrImmX4",   [], []>;  // "std"
+// dq-form
+def iaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrImmX16",  [], []>;  // "stxv"
+
+// Below forms are all x-form addressing mode, use three different ones so we
+// can make a accurate check for x-form instructions in ISEL.
+// x-form addressing mode whose associated diplacement form is D.
+def xaddr  : ComplexPattern<iPTR, 2, "SelectAddrIdx",     [], []>;    // "stbx"
+// x-form addressing mode whose associated diplacement form is DS.
+def xaddrX4 : ComplexPattern<iPTR, 2, "SelectAddrIdxX4",    [], []>;  // "stdx"
+// x-form addressing mode whose associated diplacement form is DQ.
+def xaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrIdxX16",   [], []>; // "stxvx"
+
 def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
-def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX4",  [], []>;  // "std"
-def iqaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX16",  [], []>; // "stxv"
 
 // The address in a single register. This is used with the SjLj
 // pseudo-instructions.
@@ -1309,6 +1342,15 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
   }
 }
 
+// Set the float rounding mode.
+let Uses = [RM], Defs = [RM] in { 
+def SETRNDi : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins u2imm:$RND),
+                    "#SETRNDi", [(set f64:$FRT, (int_ppc_setrnd (i32 imm:$RND)))]>;
+
+def SETRND : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins gprc:$in),
+                    "#SETRND", [(set f64:$FRT, (int_ppc_setrnd gprc :$in))]>;
+}
+
 let Defs = [LR] in
   def MovePCtoLR : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR", []>,
                    PPC970_Unit_BRU;
@@ -1435,6 +1477,9 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
       def BCLn : BForm_4<16, 4, 0, 1, (outs),
                          (ins crbitrc:$bi, condbrtarget:$dst),
                          "bcl 4, $bi, $dst">;
+      def BL_NOP  : IForm_and_DForm_4_zero<18, 0, 1, 24,
+                                           (outs), (ins calltarget:$func),
+                                           "bl $func\n\tnop", IIC_BrB, []>;
     }
   }
   let Uses = [CTR, RM] in {
@@ -2512,6 +2557,7 @@ def CRORC  : XLForm_1<19, 417, (outs crbitrc:$CRD),
                       [(set i1:$CRD, (or i1:$CRA, (not i1:$CRB)))]>;
 
 let isCodeGenOnly = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
 def CRSET  : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
               "creqv $dst, $dst, $dst", IIC_BrCR,
               [(set i1:$dst, 1)]>;
@@ -2519,6 +2565,7 @@ def CRSET  : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
 def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins),
               "crxor $dst, $dst, $dst", IIC_BrCR,
               [(set i1:$dst, 0)]>;
+}
 
 let Defs = [CR1EQ], CRD = 6 in {
 def CR6SET  : XLForm_1_ext<19, 289, (outs), (ins),
@@ -2566,7 +2613,7 @@ def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
             PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR] in {
-let Pattern = [(int_ppc_mtctr i32:$rS)] in
+let Pattern = [(int_set_loop_iterations i32:$rS)] in
 def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
                               "mtctr $rS", IIC_SprMTSPR>,
                 PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -2993,9 +3040,16 @@ def : Pat<(and (rotl i32:$in, i32:$sh), maskimm32:$imm),
 // Calls
 def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
           (BL tglobaladdr:$dst)>;
+
 def : Pat<(PPCcall (i32 texternalsym:$dst)),
           (BL texternalsym:$dst)>;
 
+// Calls for AIX only
+def : Pat<(PPCcall (i32 mcsym:$dst)),
+          (BL mcsym:$dst)>;
+def : Pat<(PPCcall_nop (i32 mcsym:$dst)),
+          (BL_NOP mcsym:$dst)>;
+
 def : Pat<(PPCtc_return (i32 tglobaladdr:$dst),  imm:$imm),
           (TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
 
@@ -4071,6 +4125,10 @@ def SLBMFEV : XLForm_1_gen<31, 851, (outs gprc:$RT), (ins gprc:$RB),
 
 def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>;
 
+let Defs = [CR0] in
+def SLBFEEo : XForm_26<31, 979, (outs gprc:$RT), (ins gprc:$RB),
+                         "slbfee. $RT, $RB", IIC_SprSLBFEE, []>, isDOT;
+
 def TLBIA : XForm_0<31, 370, (outs), (ins),
                         "tlbia", IIC_SprTLBIA, []>;
 
diff --git a/lib/Target/PowerPC/PPCInstrQPX.td b/lib/Target/PowerPC/PPCInstrQPX.td
index ef589ad01fd7..d67041d46d9f 100644
--- a/lib/Target/PowerPC/PPCInstrQPX.td
+++ b/lib/Target/PowerPC/PPCInstrQPX.td
@@ -1,9 +1,8 @@
 //===- PPCInstrQPX.td - The PowerPC QPX Extension --*- tablegen -*-===//
 // 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 // 
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/PowerPC/PPCInstrSPE.td b/lib/Target/PowerPC/PPCInstrSPE.td
index 9f5891a45f22..935c3044ae47 100644
--- a/lib/Target/PowerPC/PPCInstrSPE.td
+++ b/lib/Target/PowerPC/PPCInstrSPE.td
@@ -1,9 +1,8 @@
 //=======-- PPCInstrSPE.td - The PowerPC SPE Extension -*- tablegen -*-=======//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -512,7 +511,7 @@ def EVLWWSPLATX    : EVXForm_1<792, (outs sperc:$RT), (ins memrr:$src),
 
 def EVMERGEHI      : EVXForm_1<556, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
                                "evmergehi $RT, $RA, $RB", IIC_VecGeneral, []>;
-def EVMERGELO      : EVXForm_1<557, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
+def EVMERGELO      : EVXForm_1<557, (outs sperc:$RT), (ins gprc:$RA, gprc:$RB),
                                "evmergelo $RT, $RA, $RB", IIC_VecGeneral, []>;
 def EVMERGEHILO    : EVXForm_1<558, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
                                "evmergehilo $RT, $RA, $RB", IIC_VecGeneral, []>;
@@ -887,4 +886,14 @@ def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)),
           (SELECT_SPE (CRANDC $lhs, $rhs), $tval, $fval)>;
 def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
           (SELECT_SPE (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+
+def : Pat<(f64 (PPCbuild_spe64 i32:$rB, i32:$rA)),
+          (f64 (COPY_TO_REGCLASS (EVMERGELO $rA, $rB), SPERC))>;
+
+def : Pat<(i32 (PPCextract_spe f64:$rA, 1)),
+          (i32 (EXTRACT_SUBREG (EVMERGEHI $rA, $rA), sub_32))>;
+def : Pat<(i32 (PPCextract_spe f64:$rA, 0)),
+          (i32 (EXTRACT_SUBREG $rA, sub_32))>;
+
 }
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 0f073388dc74..07f38a61d098 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1,9 +1,8 @@
 //===- PPCInstrVSX.td - The PowerPC VSX Extension --*- tablegen -*-===//
 // 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 // 
 //===----------------------------------------------------------------------===//
 //
@@ -54,6 +53,15 @@ def PPCRegSPILLTOVSRRCAsmOperand : AsmOperandClass {
 def spilltovsrrc : RegisterOperand<SPILLTOVSRRC> {
   let ParserMatchClass = PPCRegSPILLTOVSRRCAsmOperand;
 }
+
+def SDT_PPCldvsxlh : SDTypeProfile<1, 1, [
+  SDTCisVT<0, v4f32>, SDTCisPtrTy<1>
+]>;
+
+def SDT_PPCfpextlh : SDTypeProfile<1, 1, [
+  SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32>
+]>;
+
 // Little-endian-specific nodes.
 def SDT_PPClxvd2x : SDTypeProfile<1, 1, [
   SDTCisVT<0, v2f64>, SDTCisPtrTy<1>
@@ -85,6 +93,10 @@ def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;
 def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;
 def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>;
 
+def PPCfpextlh : SDNode<"PPCISD::FP_EXTEND_LH", SDT_PPCfpextlh, []>;
+def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh,
+                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
 multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
                     string asmstr, InstrItinClass itin, Intrinsic Int,
                     ValueType OutTy, ValueType InTy> {
@@ -124,7 +136,6 @@ def HasOnlySwappingMemOps : Predicate<"!PPCSubTarget->hasP9Vector()">;
 
 let Predicates = [HasVSX] in {
 let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
-let UseVSXReg = 1 in {
 let hasSideEffects = 0 in { // VSX instructions don't have side effects.
 let Uses = [RM] in {
 
@@ -841,12 +852,12 @@ let Uses = [RM] in {
                        "xxlxor $XT, $XA, $XB", IIC_VecGeneral,
                        [(set v4i32:$XT, (xor v4i32:$XA, v4i32:$XB))]>;
   } // isCommutable
-  let isCodeGenOnly = 1 in
-  def XXLXORz : XX3Form_Zero<60, 154, (outs vsrc:$XT), (ins),
+
+  let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
+      isReMaterializable = 1 in {
+    def XXLXORz : XX3Form_Zero<60, 154, (outs vsrc:$XT), (ins),
                        "xxlxor $XT, $XT, $XT", IIC_VecGeneral,
                        [(set v4i32:$XT, (v4i32 immAllZerosV))]>;
-
-  let isCodeGenOnly = 1 in {
     def XXLXORdpz : XX3Form_SetZero<60, 154,
                          (outs vsfrc:$XT), (ins),
                          "xxlxor $XT, $XT, $XT", IIC_VecGeneral,
@@ -895,11 +906,10 @@ let Uses = [RM] in {
                              (PPCxxsplt v4i32:$XB, imm32SExt16:$UIM))]>;
   let isCodeGenOnly = 1 in
   def XXSPLTWs : XX2Form_2<60, 164,
-                       (outs vsrc:$XT), (ins vfrc:$XB, u2imm:$UIM),
+                       (outs vsrc:$XT), (ins vsfrc:$XB, u2imm:$UIM),
                        "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
 
 } // hasSideEffects
-} // UseVSXReg = 1
 
 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
 // instruction selection into a branch sequence.
@@ -961,6 +971,10 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
 
 def : Pat<(v4i32 (vnot_ppc v4i32:$A)),
           (v4i32 (XXLNOR $A, $A))>;
+def : Pat<(v4i32 (or (and (vnot_ppc v4i32:$C), v4i32:$A),
+                     (and v4i32:$B, v4i32:$C))),
+          (v4i32 (XXSEL $A, $B, $C))>;
+
 let Predicates = [IsBigEndian] in {
 def : Pat<(v2f64 (scalar_to_vector f64:$A)),
           (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
@@ -1063,6 +1077,8 @@ def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)),
 def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)),
           (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>;
 
+def : Pat<(v2f64 (PPCfpextlh v4f32:$C)), (XVCVSPDP (XXMRGHW $C, $C))>;
+
 // Loads.
 let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
   def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>;
@@ -1176,6 +1192,15 @@ def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC),
 def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC),
           (XXSEL $vC, $vB, $vA)>;
 
+def : Pat<(v4f32 (fmaxnum v4f32:$src1, v4f32:$src2)),
+          (v4f32 (XVMAXSP $src1, $src2))>;
+def : Pat<(v4f32 (fminnum v4f32:$src1, v4f32:$src2)),
+          (v4f32 (XVMINSP $src1, $src2))>;
+def : Pat<(v2f64 (fmaxnum v2f64:$src1, v2f64:$src2)),
+          (v2f64 (XVMAXDP $src1, $src2))>;
+def : Pat<(v2f64 (fminnum v2f64:$src1, v2f64:$src2)),
+          (v2f64 (XVMINDP $src1, $src2))>;
+
 let Predicates = [IsLittleEndian] in {
 def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
           (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
@@ -1248,7 +1273,7 @@ def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">;
 def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">;
 let Predicates = [HasP8Vector] in {
 let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
-  let isCommutable = 1, UseVSXReg = 1 in {
+  let isCommutable = 1 in {
     def XXLEQV : XX3Form<60, 186,
                          (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                          "xxleqv $XT, $XA, $XB", IIC_VecGeneral,
@@ -1258,12 +1283,11 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
                           "xxlnand $XT, $XA, $XB", IIC_VecGeneral,
                           [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA,
                                                     v4i32:$XB)))]>;
-  } // isCommutable, UseVSXReg
+  } // isCommutable
 
   def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
             (XXLEQV $A, $B)>;
 
-  let UseVSXReg = 1 in {
   def XXLORC : XX3Form<60, 170,
                        (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                        "xxlorc $XT, $XA, $XB", IIC_VecGeneral,
@@ -1312,7 +1336,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
                        "#STIWX",
                       [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
   } // mayStore
-  } // UseVSXReg = 1
 
   def : Pat<(f64 (extloadf32 xoaddr:$src)),
             (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>;
@@ -1342,7 +1365,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
   def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
             (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
 
-  let UseVSXReg = 1 in {
   // VSX Elementary Scalar FP arithmetic (SP)
   let isCommutable = 1 in {
     def XSADDSP : XX3Form<60, 0,
@@ -1354,7 +1376,10 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
                           "xsmulsp $XT, $XA, $XB", IIC_VecFP,
                           [(set f32:$XT, (fmul f32:$XA, f32:$XB))]>;
   } // isCommutable
-
+  def XSSUBSP : XX3Form<60, 8,
+                        (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
+                        "xssubsp $XT, $XA, $XB", IIC_VecFP,
+                        [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>;
   def XSDIVSP : XX3Form<60, 24,
                         (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
                         "xsdivsp $XT, $XA, $XB", IIC_FPDivS,
@@ -1374,10 +1399,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
                            (outs vssrc:$XT), (ins vssrc:$XB),
                            "xsrsqrtesp $XT, $XB", IIC_VecFP,
                            [(set f32:$XT, (PPCfrsqrte f32:$XB))]>;
-  def XSSUBSP : XX3Form<60, 8,
-                        (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
-                        "xssubsp $XT, $XA, $XB", IIC_VecFP,
-                        [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>;
 
   // FMA Instructions
   let BaseName = "XSMADDASP" in {
@@ -1470,7 +1491,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
                           "xscvdpspn $XT, $XB", IIC_VecFP, []>;
   def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
                           "xscvspdpn $XT, $XB", IIC_VecFP, []>;
-  } // UseVSXReg = 1
 
   let Predicates = [IsLittleEndian] in {
   def : Pat<DWToSPExtractConv.El0SS1,
@@ -1514,10 +1534,22 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
               (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4),
             (STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
 
+  def : Pat<(v2i64 (smax v2i64:$src1, v2i64:$src2)),
+            (v2i64 (VMAXSD (COPY_TO_REGCLASS $src1, VRRC),
+                           (COPY_TO_REGCLASS $src2, VRRC)))>;
+  def : Pat<(v2i64 (umax v2i64:$src1, v2i64:$src2)),
+            (v2i64 (VMAXUD (COPY_TO_REGCLASS $src1, VRRC),
+                           (COPY_TO_REGCLASS $src2, VRRC)))>;
+  def : Pat<(v2i64 (smin v2i64:$src1, v2i64:$src2)),
+            (v2i64 (VMINSD (COPY_TO_REGCLASS $src1, VRRC),
+                           (COPY_TO_REGCLASS $src2, VRRC)))>;
+  def : Pat<(v2i64 (umin v2i64:$src1, v2i64:$src2)),
+            (v2i64 (VMINUD (COPY_TO_REGCLASS $src1, VRRC),
+                           (COPY_TO_REGCLASS $src2, VRRC)))>;
 } // AddedComplexity = 400
 } // HasP8Vector
 
-let UseVSXReg = 1, AddedComplexity = 400 in {
+let AddedComplexity = 400 in {
 let Predicates = [HasDirectMove] in {
   // VSX direct move instructions
   def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
@@ -1525,7 +1557,7 @@ let Predicates = [HasDirectMove] in {
                               [(set i64:$rA, (PPCmfvsr f64:$XT))]>,
       Requires<[In64BitMode]>;
   let isCodeGenOnly = 1 in
-  def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vrrc:$XT),
+  def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsrc:$XT),
                              "mfvsrd $rA, $XT", IIC_VecGeneral,
                              []>,
       Requires<[In64BitMode]>;
@@ -1557,7 +1589,7 @@ let Predicates = [IsISA3_0, HasDirectMove] in {
                               []>, Requires<[In64BitMode]>;
 
 } // IsISA3_0, HasDirectMove
-} // UseVSXReg = 1
+} // AddedComplexity = 400
 
 // We want to parse this from asm, but we don't want to emit this as it would
 // be emitted with a VSX reg. So leave Emit = 0 here.
@@ -2415,7 +2447,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
                          list<dag> pattern>
     : X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isDOT;
 
-  let UseVSXReg = 1 in {
   // [PO T XO B XO BX /]
   class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
                         list<dag> pattern>
@@ -2434,7 +2465,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
                   InstrItinClass itin, list<dag> pattern>
     : XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB),
               !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>;
-  } // UseVSXReg = 1
 
   // [PO VRT VRA VRB XO /]
   class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
@@ -2482,69 +2512,70 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   let isCommutable = 1 in {
   def XSADDQP   : X_VT5_VA5_VB5   <63,   4, "xsaddqp",
                                    [(set f128:$vT, (fadd f128:$vA, f128:$vB))]>;
+  def XSMULQP   : X_VT5_VA5_VB5   <63,  36, "xsmulqp",
+                                   [(set f128:$vT, (fmul f128:$vA, f128:$vB))]>;
+  }
+  def XSSUBQP   : X_VT5_VA5_VB5   <63, 516, "xssubqp" ,
+                                   [(set f128:$vT, (fsub f128:$vA, f128:$vB))]>;
+  def XSDIVQP   : X_VT5_VA5_VB5   <63, 548, "xsdivqp",
+                                   [(set f128:$vT, (fdiv f128:$vA, f128:$vB))]>;
+  // Square-Root
+  def XSSQRTQP  : X_VT5_XO5_VB5   <63, 27, 804, "xssqrtqp",
+                                   [(set f128:$vT, (fsqrt f128:$vB))]>;
+  // (Negative) Multiply-{Add/Subtract}
+  def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp",
+                                    [(set f128:$vT,
+                                          (fma f128:$vA, f128:$vB,
+                                               f128:$vTi))]>;
+  def XSMSUBQP  : X_VT5_VA5_VB5_FMA   <63, 420, "xsmsubqp"  ,
+                                       [(set f128:$vT,
+                                             (fma f128:$vA, f128:$vB,
+                                                  (fneg f128:$vTi)))]>;
+  def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp",
+                                     [(set f128:$vT,
+                                           (fneg (fma f128:$vA, f128:$vB,
+                                                      f128:$vTi)))]>;
+  def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp",
+                                     [(set f128:$vT,
+                                           (fneg (fma f128:$vA, f128:$vB,
+                                                      (fneg f128:$vTi))))]>;
+
+  let isCommutable = 1 in {
   def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo",
                                   [(set f128:$vT,
                                   (int_ppc_addf128_round_to_odd
                                   f128:$vA, f128:$vB))]>;
-  def XSMULQP   : X_VT5_VA5_VB5   <63,  36, "xsmulqp",
-                                   [(set f128:$vT, (fmul f128:$vA, f128:$vB))]>;
   def XSMULQPO : X_VT5_VA5_VB5_Ro<63, 36, "xsmulqpo",
                                   [(set f128:$vT,
                                   (int_ppc_mulf128_round_to_odd
                                   f128:$vA, f128:$vB))]>;
   }
-
-  def XSSUBQP   : X_VT5_VA5_VB5   <63, 516, "xssubqp" ,
-                                   [(set f128:$vT, (fsub f128:$vA, f128:$vB))]>;
   def XSSUBQPO : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo",
                                   [(set f128:$vT,
                                   (int_ppc_subf128_round_to_odd
                                   f128:$vA, f128:$vB))]>;
-  def XSDIVQP   : X_VT5_VA5_VB5   <63, 548, "xsdivqp",
-                                   [(set f128:$vT, (fdiv f128:$vA, f128:$vB))]>;
   def XSDIVQPO : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo",
                                   [(set f128:$vT,
                                   (int_ppc_divf128_round_to_odd
                                   f128:$vA, f128:$vB))]>;
-
-  // Square-Root
-  def XSSQRTQP  : X_VT5_XO5_VB5   <63, 27, 804, "xssqrtqp",
-                                   [(set f128:$vT, (fsqrt f128:$vB))]>;
   def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo",
                                   [(set f128:$vT,
                                   (int_ppc_sqrtf128_round_to_odd f128:$vB))]>;
 
-  // (Negative) Multiply-{Add/Subtract}
-  def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp",
-                                    [(set f128:$vT,
-                                          (fma f128:$vA, f128:$vB,
-                                               f128:$vTi))]>;
 
   def XSMADDQPO : X_VT5_VA5_VB5_FMA_Ro<63, 388, "xsmaddqpo",
                                       [(set f128:$vT,
                                       (int_ppc_fmaf128_round_to_odd
                                       f128:$vA,f128:$vB,f128:$vTi))]>;
 
-  def XSMSUBQP  : X_VT5_VA5_VB5_FMA   <63, 420, "xsmsubqp"  ,
-                                       [(set f128:$vT,
-                                             (fma f128:$vA, f128:$vB,
-                                                  (fneg f128:$vTi)))]>;
   def XSMSUBQPO : X_VT5_VA5_VB5_FMA_Ro<63, 420, "xsmsubqpo" ,
                                       [(set f128:$vT,
                                       (int_ppc_fmaf128_round_to_odd
                                       f128:$vA, f128:$vB, (fneg f128:$vTi)))]>;
-  def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp",
-                                     [(set f128:$vT,
-                                           (fneg (fma f128:$vA, f128:$vB,
-                                                      f128:$vTi)))]>;
   def XSNMADDQPO: X_VT5_VA5_VB5_FMA_Ro<63, 452, "xsnmaddqpo",
                                       [(set f128:$vT,
                                       (fneg (int_ppc_fmaf128_round_to_odd
                                       f128:$vA, f128:$vB, f128:$vTi)))]>;
-  def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp",
-                                     [(set f128:$vT,
-                                           (fneg (fma f128:$vA, f128:$vB,
-                                                      (fneg f128:$vTi))))]>;
   def XSNMSUBQPO: X_VT5_VA5_VB5_FMA_Ro<63, 484, "xsnmsubqpo",
                                       [(set f128:$vT,
                                       (fneg (int_ppc_fmaf128_round_to_odd
@@ -2572,8 +2603,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   // DP/QP Compare Exponents
   def XSCMPEXPDP : XX3Form_1<60, 59,
                              (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
-                             "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>,
-                   UseVSXReg;
+                             "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>;
   def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>;
 
   // DP Compare ==, >=, >, !=
@@ -2631,7 +2661,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   def : Pat<(f128 (uint_to_fp (i32 (load xoaddr:$src)))),
             (f128 (XSCVUDQP (LIWZX xoaddr:$src)))>;
 
-  let UseVSXReg = 1 in {
   //===--------------------------------------------------------------------===//
   // Round to Floating-Point Integer Instructions
 
@@ -2648,8 +2677,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
                                  [(set v4f32:$XT,
                                      (int_ppc_vsx_xvcvsphp v4f32:$XB))]>;
 
-  } // UseVSXReg = 1
-
   // Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a
   // separate pattern so that it can convert the input register class from
   // VRRC(v8i16) to VSRC.
@@ -2691,7 +2718,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   // Insert Exponent DP/QP
   // XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU
   def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB),
-                          "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>, UseVSXReg;
+                          "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>;
   // vB NOTE: only vB.dword[0] is used, that's why we don't use
   //          X_VT5_VA5_VB5 form
   def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB),
@@ -2712,7 +2739,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
                            (v2i64 (XSXEXPQP $vA)), sub_64)))>;
 
   // Vector Insert Word
-  let UseVSXReg = 1 in {
   // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB.
   def XXINSERTW   :
     XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT),
@@ -2726,7 +2752,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165,
                                   (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM),
                                   "xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>;
-  } // UseVSXReg = 1
 
   // Vector Insert Exponent DP/SP
   def XVIEXPDP : XX3_XT5_XA5_XB5<60, 248, "xviexpdp", vsrc, vsrc, vsrc,
@@ -2759,20 +2784,17 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   //===--------------------------------------------------------------------===//
 
   // Test Data Class SP/DP/QP
-  let UseVSXReg = 1 in {
   def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298,
                               (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
                               "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>;
   def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362,
                               (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
                               "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>;
-  } // UseVSXReg = 1
   def XSTSTDCQP : X_BF3_DCMX7_RS5  <63, 708,
                               (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB),
                               "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>;
 
   // Vector Test Data Class SP/DP
-  let UseVSXReg = 1 in {
   def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5,
                               (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
                               "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP,
@@ -2783,7 +2805,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
                               "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP,
                               [(set v2i64: $XT,
                                (int_ppc_vsx_xvtstdcdp v2f64:$XB, imm:$DCMX))]>;
-  } // UseVSXReg = 1
 
   //===--------------------------------------------------------------------===//
 
@@ -2824,7 +2845,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
 
   // Vector Splat Immediate Byte
   def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8),
-                            "xxspltib $XT, $IMM8", IIC_VecPerm, []>, UseVSXReg;
+                            "xxspltib $XT, $IMM8", IIC_VecPerm, []>;
 
   //===--------------------------------------------------------------------===//
   // Vector/Scalar Load/Store Instructions
@@ -2834,7 +2855,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   let mayLoad = 1, mayStore = 0 in {
   // Load Vector
   def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
-                            "lxv $XT, $src", IIC_LdStLFD, []>, UseVSXReg;
+                            "lxv $XT, $src", IIC_LdStLFD, []>;
   // Load DWord
   def LXSD  : DSForm_1<57, 2, (outs vfrc:$vD), (ins memrix:$src),
                        "lxsd $vD, $src", IIC_LdStLFD, []>;
@@ -2847,7 +2868,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   class X_XT6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
                       RegisterOperand vtype, list<dag> pattern>
     : XX1Form_memOp<opcode, xo, (outs vtype:$XT), (ins memrr:$src),
-              !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>, UseVSXReg;
+              !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>;
 
   // Load as Integer Byte/Halfword & Zero Indexed
   def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc,
@@ -2861,16 +2882,14 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
 
   // Load Vector Indexed
   def LXVX    : X_XT6_RA5_RB5<31, 268, "lxvx"   , vsrc,
-                [(set v2f64:$XT, (load xaddr:$src))]>;
+                [(set v2f64:$XT, (load xaddrX16:$src))]>;
   // Load Vector (Left-justified) with Length
   def LXVL : XX1Form_memOp<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
                    "lxvl $XT, $src, $rB", IIC_LdStLoad,
-                   [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$src, i64:$rB))]>,
-                    UseVSXReg;
+                   [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$src, i64:$rB))]>;
   def LXVLL : XX1Form_memOp<31,301, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
                    "lxvll $XT, $src, $rB", IIC_LdStLoad,
-                   [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$src, i64:$rB))]>,
-                    UseVSXReg;
+                   [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$src, i64:$rB))]>;
 
   // Load Vector Word & Splat Indexed
   def LXVWSX  : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>;
@@ -2881,7 +2900,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   let mayStore = 1, mayLoad = 0 in {
   // Store Vector
   def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
-                             "stxv $XT, $dst", IIC_LdStSTFD, []>, UseVSXReg;
+                             "stxv $XT, $dst", IIC_LdStSTFD, []>;
   // Store DWord
   def STXSD  : DSForm_1<61, 2, (outs), (ins vfrc:$vS, memrix:$dst),
                         "stxsd $vS, $dst", IIC_LdStSTFD, []>;
@@ -2893,7 +2912,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   class X_XS6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
                       RegisterOperand vtype, list<dag> pattern>
     : XX1Form_memOp<opcode, xo, (outs), (ins vtype:$XT, memrr:$dst),
-              !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>, UseVSXReg;
+              !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>;
 
   // Store as Integer Byte/Halfword Indexed
   def STXSIBX  : X_XS6_RA5_RB5<31,  909, "stxsibx" , vsfrc,
@@ -2901,8 +2920,8 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   def STXSIHX  : X_XS6_RA5_RB5<31,  941, "stxsihx" , vsfrc,
                                [(PPCstxsix f64:$XT, xoaddr:$dst, 2)]>;
   let isCodeGenOnly = 1 in {
-    def STXSIBXv  : X_XS6_RA5_RB5<31,  909, "stxsibx" , vrrc, []>;
-    def STXSIHXv  : X_XS6_RA5_RB5<31,  941, "stxsihx" , vrrc, []>;
+    def STXSIBXv  : X_XS6_RA5_RB5<31,  909, "stxsibx" , vsrc, []>;
+    def STXSIHXv  : X_XS6_RA5_RB5<31,  941, "stxsihx" , vsrc, []>;
   }
 
   // Store Vector Halfword*8/Byte*16 Indexed
@@ -2911,21 +2930,19 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
 
   // Store Vector Indexed
   def STXVX    : X_XS6_RA5_RB5<31,  396, "stxvx"   , vsrc,
-                 [(store v2f64:$XT, xaddr:$dst)]>;
+                 [(store v2f64:$XT, xaddrX16:$dst)]>;
 
   // Store Vector (Left-justified) with Length
   def STXVL : XX1Form_memOp<31, 397, (outs),
                             (ins vsrc:$XT, memr:$dst, g8rc:$rB),
                             "stxvl $XT, $dst, $rB", IIC_LdStLoad,
                             [(int_ppc_vsx_stxvl v4i32:$XT, addr:$dst,
-                              i64:$rB)]>,
-                            UseVSXReg;
+                              i64:$rB)]>;
   def STXVLL : XX1Form_memOp<31, 429, (outs),
                             (ins vsrc:$XT, memr:$dst, g8rc:$rB),
                             "stxvll $XT, $dst, $rB", IIC_LdStLoad,
                             [(int_ppc_vsx_stxvll v4i32:$XT, addr:$dst,
-                              i64:$rB)]>,
-                            UseVSXReg;
+                              i64:$rB)]>;
   } // mayStore
 
   let Predicates = [IsLittleEndian] in {
@@ -3045,24 +3062,24 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   } // IsLittleEndian, HasP9Vector
 
   // D-Form Load/Store
-  def : Pat<(v4i32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
-  def : Pat<(v4f32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
-  def : Pat<(v2i64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
-  def : Pat<(v2f64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
-  def : Pat<(f128  (quadwOffsetLoad iqaddr:$src)),
+  def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+  def : Pat<(v4f32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+  def : Pat<(v2i64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+  def : Pat<(v2f64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+  def : Pat<(f128  (quadwOffsetLoad iaddrX16:$src)),
             (COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>;
-  def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iqaddr:$src)), (LXV memrix16:$src)>;
-  def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iqaddr:$src)), (LXV memrix16:$src)>;
+  def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddrX16:$src)), (LXV memrix16:$src)>;
+  def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddrX16:$src)), (LXV memrix16:$src)>;
 
-  def : Pat<(quadwOffsetStore v4f32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
-  def : Pat<(quadwOffsetStore v4i32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
-  def : Pat<(quadwOffsetStore v2f64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
-  def : Pat<(quadwOffsetStore  f128:$rS, iqaddr:$dst),
+  def : Pat<(quadwOffsetStore v4f32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+  def : Pat<(quadwOffsetStore v4i32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+  def : Pat<(quadwOffsetStore v2f64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+  def : Pat<(quadwOffsetStore  f128:$rS, iaddrX16:$dst),
             (STXV (COPY_TO_REGCLASS $rS, VSRC), memrix16:$dst)>;
-  def : Pat<(quadwOffsetStore v2i64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
-  def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iqaddr:$dst),
+  def : Pat<(quadwOffsetStore v2i64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+  def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddrX16:$dst),
             (STXV $rS, memrix16:$dst)>;
-  def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iqaddr:$dst),
+  def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddrX16:$dst),
             (STXV $rS, memrix16:$dst)>;
 
 
@@ -3159,109 +3176,109 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   let Predicates = [IsBigEndian, HasP9Vector] in {
   // Scalar stores of i8
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 9)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 11)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 13)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 15)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
-            (STXSIBXv $S, xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 1)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 3)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 5)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 7)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
 
   // Scalar stores of i16
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
-            (STXSIHXv $S, xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
   } // IsBigEndian, HasP9Vector
 
   let Predicates = [IsLittleEndian, HasP9Vector] in {
   // Scalar stores of i8
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 7)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 5)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 3)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 1)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
-            (STXSIBXv $S, xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 15)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 13)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 11)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
-            (STXSIBXv (v16i8 (VSLDOI $S, $S, 9)), xoaddr:$dst)>;
+            (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>;
 
   // Scalar stores of i16
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
-            (STXSIHXv $S, xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
   def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
-            (STXSIHXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>;
+            (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
   } // IsLittleEndian, HasP9Vector
 
 
@@ -3273,53 +3290,97 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
 
   def DFLOADf32  : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src),
                           "#DFLOADf32",
-                          [(set f32:$XT, (load ixaddr:$src))]>;
+                          [(set f32:$XT, (load iaddrX4:$src))]>;
   def DFLOADf64  : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src),
                           "#DFLOADf64",
-                          [(set f64:$XT, (load ixaddr:$src))]>;
+                          [(set f64:$XT, (load iaddrX4:$src))]>;
   def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst),
                           "#DFSTOREf32",
-                          [(store f32:$XT, ixaddr:$dst)]>;
+                          [(store f32:$XT, iaddrX4:$dst)]>;
   def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
                           "#DFSTOREf64",
-                          [(store f64:$XT, ixaddr:$dst)]>;
+                          [(store f64:$XT, iaddrX4:$dst)]>;
 
-  def : Pat<(f64 (extloadf32 ixaddr:$src)),
-            (COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>;
-  def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))),
-            (f32 (DFLOADf32 ixaddr:$src))>;
+  def : Pat<(f64 (extloadf32 iaddrX4:$src)),
+            (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$src), VSFRC)>;
+  def : Pat<(f32 (fpround (f64 (extloadf32 iaddrX4:$src)))),
+            (f32 (DFLOADf32 iaddrX4:$src))>;
 
+  def : Pat<(v4f32 (PPCldvsxlh xaddr:$src)),
+            (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC)>;
+  def : Pat<(v4f32 (PPCldvsxlh iaddrX4:$src)),
+            (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC)>;
 
   let AddedComplexity = 400 in {
   // The following pseudoinstructions are used to ensure the utilization
   // of all 64 VSX registers.
     let Predicates = [IsLittleEndian, HasP9Vector] in {
-      def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))),
+      def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
                 (v2i64 (XXPERMDIs
-                (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>;
-      def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))),
+                (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC), 2))>;
+      def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
                 (v2i64 (XXPERMDIs
-		(COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;
+		(COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC), 2))>;
 
-      def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))),
+      def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
                 (v2f64 (XXPERMDIs
-                (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>;
-      def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
+                (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC), 2))>;
+      def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
                 (v2f64 (XXPERMDIs
-                (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;
-    }
+                (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC), 2))>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), xaddrX4:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), xaddrX4:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), iaddrX4:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+                            iaddrX4:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+    } // IsLittleEndian, HasP9Vector
 
     let Predicates = [IsBigEndian, HasP9Vector] in {
-      def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))),
-                (v2i64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
-      def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))),
-                (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;
-
-      def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))),
-                (v2f64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
-      def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
-                (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;
-    }
+      def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
+                (v2i64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
+      def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
+                (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
+
+      def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
+                (v2f64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
+      def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
+                (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), xaddrX4:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), xaddrX4:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
+                (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), iaddrX4:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                             sub_64), iaddrX4:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+    } // IsBigEndian, HasP9Vector
   }
 
   let Predicates = [IsBigEndian, HasP9Vector] in {
@@ -3455,14 +3516,14 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   } // IsLittleEndian, HasP9Vector
 
   // Convert (Un)Signed DWord in memory -> QP
-  def : Pat<(f128 (sint_to_fp (i64 (load xaddr:$src)))),
-            (f128 (XSCVSDQP (LXSDX xaddr:$src)))>;
-  def : Pat<(f128 (sint_to_fp (i64 (load ixaddr:$src)))),
-            (f128 (XSCVSDQP (LXSD ixaddr:$src)))>;
-  def : Pat<(f128 (uint_to_fp (i64 (load xaddr:$src)))),
-            (f128 (XSCVUDQP (LXSDX xaddr:$src)))>;
-  def : Pat<(f128 (uint_to_fp (i64 (load ixaddr:$src)))),
-            (f128 (XSCVUDQP (LXSD ixaddr:$src)))>;
+  def : Pat<(f128 (sint_to_fp (i64 (load xaddrX4:$src)))),
+            (f128 (XSCVSDQP (LXSDX xaddrX4:$src)))>;
+  def : Pat<(f128 (sint_to_fp (i64 (load iaddrX4:$src)))),
+            (f128 (XSCVSDQP (LXSD iaddrX4:$src)))>;
+  def : Pat<(f128 (uint_to_fp (i64 (load xaddrX4:$src)))),
+            (f128 (XSCVUDQP (LXSDX xaddrX4:$src)))>;
+  def : Pat<(f128 (uint_to_fp (i64 (load iaddrX4:$src)))),
+            (f128 (XSCVUDQP (LXSD iaddrX4:$src)))>;
 
   // Convert Unsigned HWord in memory -> QP
   def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi16)),
@@ -3483,13 +3544,13 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   // Instructions for store(fptosi).
   // The 8-byte version is repeated here due to availability of D-Form STXSD.
   def : Pat<(PPCstore_scal_int_from_vsr
-              (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddr:$dst, 8),
+              (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddrX4:$dst, 8),
             (STXSDX (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
-                    xaddr:$dst)>;
+                    xaddrX4:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
-              (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), ixaddr:$dst, 8),
+              (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), iaddrX4:$dst, 8),
             (STXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
-                   ixaddr:$dst)>;
+                   iaddrX4:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
               (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 4),
             (STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
@@ -3500,11 +3561,11 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
               (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 1),
             (STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
-              (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddr:$dst, 8),
-            (STXSDX (XSCVDPSXDS f64:$src), xaddr:$dst)>;
+              (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddrX4:$dst, 8),
+            (STXSDX (XSCVDPSXDS f64:$src), xaddrX4:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
-              (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), ixaddr:$dst, 8),
-            (STXSD (XSCVDPSXDS f64:$src), ixaddr:$dst)>;
+              (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), iaddrX4:$dst, 8),
+            (STXSD (XSCVDPSXDS f64:$src), iaddrX4:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
               (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2),
             (STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
@@ -3514,13 +3575,13 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
 
   // Instructions for store(fptoui).
   def : Pat<(PPCstore_scal_int_from_vsr
-              (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddr:$dst, 8),
+              (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddrX4:$dst, 8),
             (STXSDX (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
-                    xaddr:$dst)>;
+                    xaddrX4:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
-              (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), ixaddr:$dst, 8),
+              (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), iaddrX4:$dst, 8),
             (STXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
-                   ixaddr:$dst)>;
+                   iaddrX4:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
               (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 4),
             (STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
@@ -3531,11 +3592,11 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
               (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 1),
             (STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
-              (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddr:$dst, 8),
-            (STXSDX (XSCVDPUXDS f64:$src), xaddr:$dst)>;
+              (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddrX4:$dst, 8),
+            (STXSDX (XSCVDPUXDS f64:$src), xaddrX4:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
-              (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), ixaddr:$dst, 8),
-            (STXSD (XSCVDPUXDS f64:$src), ixaddr:$dst)>;
+              (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), iaddrX4:$dst, 8),
+            (STXSD (XSCVDPUXDS f64:$src), iaddrX4:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
               (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2),
             (STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
@@ -3668,13 +3729,13 @@ def FltToLongLoad {
   dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A)))));
 }
 def FltToLongLoadP9 {
-  dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 ixaddr:$A)))));
+  dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 iaddrX4:$A)))));
 }
 def FltToULongLoad {
   dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A)))));
 }
 def FltToULongLoadP9 {
-  dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 ixaddr:$A)))));
+  dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 iaddrX4:$A)))));
 }
 def FltToLong {
   dag A = (i64 (PPCmfvsr (f64 (PPCfctidz (fpextend f32:$A)))));
@@ -3704,13 +3765,13 @@ def DblToIntLoad {
   dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A)))));
 }
 def DblToIntLoadP9 {
-  dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load ixaddr:$A)))));
+  dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load iaddrX4:$A)))));
 }
 def DblToUIntLoad {
   dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A)))));
 }
 def DblToUIntLoadP9 {
-  dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load ixaddr:$A)))));
+  dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load iaddrX4:$A)))));
 }
 def DblToLongLoad {
   dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A)))));
@@ -3834,8 +3895,38 @@ let AddedComplexity = 400 in {
     def : Pat<DWToSPExtractConv.BVS,
               (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3),
                               (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>;
+    def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src),
+              (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+    def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src),
+              (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+
+    // Elements in a register on a BE system are in order <0, 1, 2, 3>.
+    // The store instructions store the second word from the left.
+    // So to align element zero, we need to modulo-left-shift by 3 words.
+    // Similar logic applies for elements 2 and 3.
+    foreach Idx = [ [0,3], [2,1], [3,2] ] in {
+      def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
+                (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+                                       sub_64), xoaddr:$src)>;
+      def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
+                (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+                                       sub_64), xoaddr:$src)>;
+    }
   }
 
+  let Predicates = [HasP8Vector, IsBigEndian, NoP9Vector] in {
+    def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+    def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+    def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+                          xoaddr:$src)>;
+    def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+                          xoaddr:$src)>;
+   }
+
   // Big endian, available on all targets with VSX
   let Predicates = [IsBigEndian, HasVSX] in {
     def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
@@ -3871,8 +3962,38 @@ let AddedComplexity = 400 in {
     def : Pat<DWToSPExtractConv.BVS,
               (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3),
                               (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>;
+    def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src),
+              (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+    def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src),
+              (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+
+    // Elements in a register on a LE system are in order <3, 2, 1, 0>.
+    // The store instructions store the second word from the left.
+    // So to align element 3, we need to modulo-left-shift by 3 words.
+    // Similar logic applies for elements 0 and 1.
+    foreach Idx = [ [0,2], [1,1], [3,3] ] in {
+      def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
+                (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+                                       sub_64), xoaddr:$src)>;
+      def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
+                (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+                                       sub_64), xoaddr:$src)>;
+    }
   }
 
+  let Predicates = [HasP8Vector, IsLittleEndian, NoP9Vector] in {
+    def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+                          xoaddr:$src)>;
+    def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+                          xoaddr:$src)>;
+    def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+    def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
+              (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+   }
+
   let Predicates = [IsLittleEndian, HasVSX] in {
   // Little endian, available on all targets with VSX
     def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
@@ -3969,17 +4090,17 @@ let AddedComplexity = 400 in {
               (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>;
     def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)),
               (v4i32 (XXSPLTW (COPY_TO_REGCLASS
-                                (XSCVDPSXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>;
+                                (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>;
     def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)),
               (v4i32 (XXSPLTW (COPY_TO_REGCLASS
-                                (XSCVDPUXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>;
+                                (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>;
     def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)),
               (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
-                                              (DFLOADf32 ixaddr:$A),
+                                              (DFLOADf32 iaddrX4:$A),
                                               VSFRC)), 0))>;
     def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)),
               (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
-                                              (DFLOADf32 ixaddr:$A),
+                                              (DFLOADf32 iaddrX4:$A),
                                               VSFRC)), 0))>;
   }
 
diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index 0b57dd9b618d..4d45d96d4479 100644
--- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -1,9 +1,8 @@
 //===------ PPCLoopPreIncPrep.cpp - Loop Pre-Inc. AM Prep. Pass -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -65,12 +64,6 @@ static cl::opt<unsigned> MaxVars("ppc-preinc-prep-max-vars",
 
 STATISTIC(PHINodeAlreadyExists, "PHI node already in pre-increment form");
 
-namespace llvm {
-
-  void initializePPCLoopPreIncPrepPass(PassRegistry&);
-
-} // end namespace llvm
-
 namespace {
 
   class PPCLoopPreIncPrep : public FunctionPass {
@@ -338,7 +331,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
   // iteration space), insert a new preheader for the loop.
   if (!LoopPredecessor ||
       !LoopPredecessor->getTerminator()->getType()->isVoidTy()) {
-    LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
+    LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
     if (LoopPredecessor)
       MadeChange = true;
   }
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index e731c0bc0c23..027e6bd1ba06 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -1,9 +1,8 @@
 //===-- PPCMCInstLower.cpp - Convert PPC MachineInstr to an MCInst --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -111,16 +110,16 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
     RefKind = MCSymbolRefExpr::VK_PLT;
 
   const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+  const Module *M = MF->getFunction().getParent();
   const PPCSubtarget *Subtarget = &(MF->getSubtarget<PPCSubtarget>());
   const TargetMachine &TM = Printer.TM;
   const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx);
-  // -msecure-plt option works only in PIC mode. If secure plt mode
-  // is on add 32768 to symbol.
+  // If -msecure-plt -fPIC, add 32768 to symbol.
   if (Subtarget->isSecurePlt() && TM.isPositionIndependent() &&
+      M->getPICLevel() == PICLevel::BigPIC &&
       MO.getTargetFlags() == PPCII::MO_PLT)
-    Expr = MCBinaryExpr::createAdd(Expr,
-                                   MCConstantExpr::create(32768, Ctx),
-                                   Ctx);
+    Expr =
+        MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(32768, Ctx), Ctx);
 
   if (!MO.isJTI() && MO.getOffset())
     Expr = MCBinaryExpr::createAdd(Expr,
diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp
index 0068df19f0c8..446246358e96 100644
--- a/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -1,9 +1,8 @@
 //===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===---------------------------------------------------------------------===//
 //
@@ -22,9 +21,12 @@
 #include "PPC.h"
 #include "PPCInstrBuilder.h"
 #include "PPCInstrInfo.h"
+#include "PPCMachineFunctionInfo.h"
 #include "PPCTargetMachine.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -38,6 +40,7 @@ using namespace llvm;
 STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
 STATISTIC(MultiTOCSaves,
           "Number of functions with multiple TOC saves that must be kept");
+STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
 STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
 STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
 STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
@@ -48,6 +51,10 @@ STATISTIC(NumFunctionsEnteredInMIPeephole,
 STATISTIC(NumFixedPointIterations,
           "Number of fixed-point iterations converting reg-reg instructions "
           "to reg-imm ones");
+STATISTIC(NumRotatesCollapsed,
+          "Number of pairs of rotate left, clear left/right collapsed");
+STATISTIC(NumEXTSWAndSLDICombined,
+          "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
 
 static cl::opt<bool>
 FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
@@ -83,6 +90,9 @@ struct PPCMIPeephole : public MachineFunctionPass {
 
 private:
   MachineDominatorTree *MDT;
+  MachinePostDominatorTree *MPDT;
+  MachineBlockFrequencyInfo *MBFI;
+  uint64_t EntryFreq;
 
   // Initialize class variables.
   void initialize(MachineFunction &MFParm);
@@ -93,6 +103,8 @@ private:
   // Perform peepholes.
   bool eliminateRedundantCompare(void);
   bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
+  bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
+  bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI);
   void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
                       MachineInstr *MI);
 
@@ -100,7 +112,11 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<MachineDominatorTree>();
+    AU.addRequired<MachinePostDominatorTree>();
+    AU.addRequired<MachineBlockFrequencyInfo>();
     AU.addPreserved<MachineDominatorTree>();
+    AU.addPreserved<MachinePostDominatorTree>();
+    AU.addPreserved<MachineBlockFrequencyInfo>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
@@ -118,6 +134,9 @@ void PPCMIPeephole::initialize(MachineFunction &MFParm) {
   MF = &MFParm;
   MRI = &MF->getRegInfo();
   MDT = &getAnalysis<MachineDominatorTree>();
+  MPDT = &getAnalysis<MachinePostDominatorTree>();
+  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+  EntryFreq = MBFI->getEntryFreq();
   TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
   LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
   LLVM_DEBUG(MF->dump());
@@ -198,6 +217,30 @@ getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
 void PPCMIPeephole::UpdateTOCSaves(
   std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
   assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
+  assert(MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
+         "TOC-save removal only supported on ELFv2");
+  PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+
+  MachineBasicBlock *Entry = &MF->front();
+  uint64_t CurrBlockFreq = MBFI->getBlockFreq(MI->getParent()).getFrequency();
+
+  // If the block in which the TOC save resides is in a block that
+  // post-dominates Entry, or a block that is hotter than entry (keep in mind
+  // that early MachineLICM has already run so the TOC save won't be hoisted)
+  // we can just do the save in the prologue.
+  if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
+    FI->setMustSaveTOC(true);
+
+  // If we are saving the TOC in the prologue, all the TOC saves can be removed
+  // from the code.
+  if (FI->mustSaveTOC()) {
+    for (auto &TOCSave : TOCSaves)
+      TOCSave.second = false;
+    // Add new instruction to map.
+    TOCSaves[MI] = false;
+    return;
+  }
+
   bool Keep = true;
   for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) {
     MachineInstr *CurrInst = It->first;
@@ -758,6 +801,11 @@ bool PPCMIPeephole::simplifyCode(void) {
         NumOptADDLIs++;
         break;
       }
+      case PPC::RLDICR: {
+        Simplified |= emitRLDICWhenLoweringJumpTables(MI) ||
+                      combineSEXTAndSHL(MI, ToErase);
+        break;
+      }
       }
     }
 
@@ -771,6 +819,10 @@ bool PPCMIPeephole::simplifyCode(void) {
 
   // Eliminate all the TOC save instructions which are redundant.
   Simplified |= eliminateRedundantTOCSaves(TOCSaves);
+  PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+  if (FI->mustSaveTOC())
+    NumTOCSavesInPrologue++;
+
   // We try to eliminate redundant compare instruction.
   Simplified |= eliminateRedundantCompare();
 
@@ -1275,10 +1327,136 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) {
   return Simplified;
 }
 
+// We miss the opportunity to emit an RLDIC when lowering jump tables
+// since ISEL sees only a single basic block. When selecting, the clear
+// and shift left will be in different blocks.
+bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
+  if (MI.getOpcode() != PPC::RLDICR)
+    return false;
+
+  unsigned SrcReg = MI.getOperand(1).getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+    return false;
+
+  MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+  if (SrcMI->getOpcode() != PPC::RLDICL)
+    return false;
+
+  MachineOperand MOpSHSrc = SrcMI->getOperand(2);
+  MachineOperand MOpMBSrc = SrcMI->getOperand(3);
+  MachineOperand MOpSHMI = MI.getOperand(2);
+  MachineOperand MOpMEMI = MI.getOperand(3);
+  if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
+        MOpMEMI.isImm()))
+    return false;
+
+  uint64_t SHSrc = MOpSHSrc.getImm();
+  uint64_t MBSrc = MOpMBSrc.getImm();
+  uint64_t SHMI = MOpSHMI.getImm();
+  uint64_t MEMI = MOpMEMI.getImm();
+  uint64_t NewSH = SHSrc + SHMI;
+  uint64_t NewMB = MBSrc - SHMI;
+  if (NewMB > 63 || NewSH > 63)
+    return false;
+
+  // The bits cleared with RLDICL are [0, MBSrc).
+  // The bits cleared with RLDICR are (MEMI, 63].
+  // After the sequence, the bits cleared are:
+  // [0, MBSrc-SHMI) and (MEMI, 63).
+  //
+  // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
+  if ((63 - NewSH) != MEMI)
+    return false;
+
+  LLVM_DEBUG(dbgs() << "Converting pair: ");
+  LLVM_DEBUG(SrcMI->dump());
+  LLVM_DEBUG(MI.dump());
+
+  MI.setDesc(TII->get(PPC::RLDIC));
+  MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+  MI.getOperand(2).setImm(NewSH);
+  MI.getOperand(3).setImm(NewMB);
+
+  LLVM_DEBUG(dbgs() << "To: ");
+  LLVM_DEBUG(MI.dump());
+  NumRotatesCollapsed++;
+  return true;
+}
+
+// For case in LLVM IR
+// entry:
+//   %iconv = sext i32 %index to i64
+//   br i1 undef label %true, label %false
+// true:
+//   %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
+// ...
+// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
+// different BBs when conducting instruction selection. We can do a peephole
+// optimization to combine these two instructions into extswsli after
+// instruction selection.
+bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
+                                      MachineInstr *&ToErase) {
+  if (MI.getOpcode() != PPC::RLDICR)
+    return false;
+
+  if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
+    return false;
+
+  assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
+
+  MachineOperand MOpSHMI = MI.getOperand(2);
+  MachineOperand MOpMEMI = MI.getOperand(3);
+  if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
+    return false;
+
+  uint64_t SHMI = MOpSHMI.getImm();
+  uint64_t MEMI = MOpMEMI.getImm();
+  if (SHMI + MEMI != 63)
+    return false;
+
+  unsigned SrcReg = MI.getOperand(1).getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+    return false;
+
+  MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+  if (SrcMI->getOpcode() != PPC::EXTSW &&
+      SrcMI->getOpcode() != PPC::EXTSW_32_64)
+    return false;
+
+  // If the register defined by extsw has more than one use, combination is not
+  // needed.
+  if (!MRI->hasOneNonDBGUse(SrcReg))
+    return false;
+
+  LLVM_DEBUG(dbgs() << "Combining pair: ");
+  LLVM_DEBUG(SrcMI->dump());
+  LLVM_DEBUG(MI.dump());
+
+  MachineInstr *NewInstr =
+      BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
+              SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI)
+                                               : TII->get(PPC::EXTSWSLI_32_64),
+              MI.getOperand(0).getReg())
+          .add(SrcMI->getOperand(1))
+          .add(MOpSHMI);
+  (void)NewInstr;
+
+  LLVM_DEBUG(dbgs() << "TO: ");
+  LLVM_DEBUG(NewInstr->dump());
+  ++NumEXTSWAndSLDICombined;
+  ToErase = &MI;
+  // SrcMI, which is extsw, is of no use now, erase it.
+  SrcMI->eraseFromParent();
+  return true;
+}
+
 } // end default namespace
 
 INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
                       "PowerPC MI Peephole Optimization", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
 INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
                     "PowerPC MI Peephole Optimization", false, false)
 
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index 3923417257e8..2f65d6a2855b 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
 //===-- PPCMachineFunctionInfo.cpp - Private data used for PowerPC --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 8a3f50aa9565..dfae19804d94 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -1,9 +1,8 @@
 //===-- PPCMachineFunctionInfo.h - Private data used for PowerPC --*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -45,6 +44,12 @@ class PPCFunctionInfo : public MachineFunctionInfo {
   /// PEI.
   bool MustSaveLR;
 
+  /// MustSaveTOC - Indicates that the TOC save needs to be performed in the
+  /// prologue of the function. This is typically the case when there are
+  /// indirect calls in the function and it is more profitable to save the
+  /// TOC pointer in the prologue than in the block(s) containing the call(s).
+  bool MustSaveTOC = false;
+
   /// Do we have to disable shrink-wrapping? This has to be set if we emit any
   /// instructions that clobber LR in the entry block because discovering this
   /// in PEI is too late (happens after shrink-wrapping);
@@ -152,6 +157,9 @@ public:
   void setMustSaveLR(bool U) { MustSaveLR = U; }
   bool mustSaveLR() const    { return MustSaveLR; }
 
+  void setMustSaveTOC(bool U) { MustSaveTOC = U; }
+  bool mustSaveTOC() const    { return MustSaveTOC; }
+
   /// We certainly don't want to shrink wrap functions if we've emitted a
   /// MovePCtoLR8 as that has to go into the entry, so the prologue definitely
   /// has to go into the entry block.
diff --git a/lib/Target/PowerPC/PPCMachineScheduler.cpp b/lib/Target/PowerPC/PPCMachineScheduler.cpp
new file mode 100644
index 000000000000..a38c8f475066
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMachineScheduler.cpp
@@ -0,0 +1,83 @@
+//===- PPCMachineScheduler.cpp - MI Scheduler for PowerPC -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMachineScheduler.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+
+using namespace llvm;
+
+static cl::opt<bool> 
+DisableAddiLoadHeuristic("disable-ppc-sched-addi-load",
+                         cl::desc("Disable scheduling addi instruction before" 
+                                  "load for ppc"), cl::Hidden);
+
+bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand,
+                                                  SchedCandidate &TryCand,
+                                                  SchedBoundary &Zone) const {
+  if (DisableAddiLoadHeuristic)
+    return false;
+
+  auto isADDIInstr = [&] (const MachineInstr &Inst) {
+    return Inst.getOpcode() == PPC::ADDI || Inst.getOpcode() == PPC::ADDI8;
+  };
+
+  SchedCandidate &FirstCand = Zone.isTop() ? TryCand : Cand;
+  SchedCandidate &SecondCand = Zone.isTop() ? Cand : TryCand;
+  if (isADDIInstr(*FirstCand.SU->getInstr()) &&
+      SecondCand.SU->getInstr()->mayLoad()) {
+    TryCand.Reason = Stall;
+    return true;
+  }
+  if (FirstCand.SU->getInstr()->mayLoad() &&
+      isADDIInstr(*SecondCand.SU->getInstr())) {
+    TryCand.Reason = NoCand;
+    return true;
+  }
+
+  return false;
+}
+
+void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
+                                         SchedCandidate &TryCand,
+                                         SchedBoundary *Zone) const {
+  GenericScheduler::tryCandidate(Cand, TryCand, Zone);
+
+  if (!Cand.isValid() || !Zone)
+    return;
+
+  // Add powerpc specific heuristic only when TryCand isn't selected or
+  // selected as node order.
+  if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand)
+    return;
+
+  // There are some benefits to schedule the ADDI before the load to hide the
+  // latency, as RA may create a true dependency between the load and addi.
+  if (biasAddiLoadCandidate(Cand, TryCand, *Zone))
+    return;
+}
+
+void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) {
+  // Custom PPC PostRA specific behavior here.
+  PostGenericScheduler::enterMBB(MBB);
+}
+
+void PPCPostRASchedStrategy::leaveMBB() {
+  // Custom PPC PostRA specific behavior here.
+  PostGenericScheduler::leaveMBB();
+}
+
+void PPCPostRASchedStrategy::initialize(ScheduleDAGMI *Dag) {
+  // Custom PPC PostRA specific initialization here.
+  PostGenericScheduler::initialize(Dag);
+}
+
+SUnit *PPCPostRASchedStrategy::pickNode(bool &IsTopNode) {
+  // Custom PPC PostRA specific scheduling here.
+  return PostGenericScheduler::pickNode(IsTopNode);
+}
+
diff --git a/lib/Target/PowerPC/PPCMachineScheduler.h b/lib/Target/PowerPC/PPCMachineScheduler.h
new file mode 100644
index 000000000000..93532d9545a6
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMachineScheduler.h
@@ -0,0 +1,49 @@
+//===- PPCMachineScheduler.h - Custom PowerPC MI scheduler --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Custom PowerPC MI scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_POWERPCMACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_POWERPC_POWERPCMACHINESCHEDULER_H
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+/// A MachineSchedStrategy implementation for PowerPC pre RA scheduling.
+class PPCPreRASchedStrategy : public GenericScheduler {
+public:
+  PPCPreRASchedStrategy(const MachineSchedContext *C) :
+    GenericScheduler(C) {}
+protected:
+  void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
+                    SchedBoundary *Zone) const override;
+private:
+  bool biasAddiLoadCandidate(SchedCandidate &Cand,
+                             SchedCandidate &TryCand,
+                             SchedBoundary &Zone) const;
+};
+
+/// A MachineSchedStrategy implementation for PowerPC post RA scheduling.
+class PPCPostRASchedStrategy : public PostGenericScheduler {
+public:
+  PPCPostRASchedStrategy(const MachineSchedContext *C) :
+    PostGenericScheduler(C) {}
+
+protected:
+  void initialize(ScheduleDAGMI *Dag) override;
+  SUnit *pickNode(bool &IsTopNode) override;
+  void enterMBB(MachineBasicBlock *MBB) override;
+  void leaveMBB() override;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_POWERPC_POWERPCMACHINESCHEDULER_H
diff --git a/lib/Target/PowerPC/PPCPerfectShuffle.h b/lib/Target/PowerPC/PPCPerfectShuffle.h
index 8a1d68011c5f..d0d84efdbd20 100644
--- a/lib/Target/PowerPC/PPCPerfectShuffle.h
+++ b/lib/Target/PowerPC/PPCPerfectShuffle.h
@@ -1,9 +1,8 @@
 //===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/PowerPC/PPCPfmCounters.td b/lib/Target/PowerPC/PPCPfmCounters.td
index d2a09f30c0f3..20b9efdc9df9 100644
--- a/lib/Target/PowerPC/PPCPfmCounters.td
+++ b/lib/Target/PowerPC/PPCPfmCounters.td
@@ -1,9 +1,8 @@
 //===-- PPCPfmCounters.td - PPC Hardware Counters ----------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index 4458b92ceb5e..d83c92276800 100644
--- a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -1,9 +1,8 @@
 //===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/PowerPC/PPCQPXLoadSplat.cpp b/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
index 25b2b54cbe98..3a83cc27439c 100644
--- a/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
+++ b/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
@@ -1,9 +1,8 @@
 //===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -31,10 +30,6 @@ using namespace llvm;
 
 STATISTIC(NumSimplified, "Number of QPX load splats simplified");
 
-namespace llvm {
-  void initializePPCQPXLoadSplatPass(PassRegistry&);
-}
-
 namespace {
   struct PPCQPXLoadSplat : public MachineFunctionPass {
     static char ID;
diff --git a/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
index 173fc18b9ebf..8eaa6dfe2bf7 100644
--- a/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
+++ b/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
@@ -1,9 +1,8 @@
 //===---- PPCReduceCRLogicals.cpp - Reduce CR Bit Logical operations ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===---------------------------------------------------------------------===//
 //
@@ -49,10 +48,6 @@ STATISTIC(NumNotSplitChainCopies,
 STATISTIC(NumNotSplitWrongOpcode,
           "Number of blocks not split due to the wrong opcode.");
 
-namespace llvm {
-  void initializePPCReduceCRLogicalsPass(PassRegistry&);
-}
-
 /// Given a basic block \p Successor that potentially contains PHIs, this
 /// function will look for any incoming values in the PHIs that are supposed to
 /// be coming from \p OrigMBB but whose definition is actually in \p NewMBB.
@@ -171,9 +166,33 @@ static bool splitMBB(BlockSplitInfo &BSI) {
                                            : *ThisMBB->succ_begin();
   MachineBasicBlock *NewBRTarget =
       BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget;
-  BranchProbability ProbToNewTarget =
-      !BSI.MBPI ? BranchProbability::getUnknown()
-                : BSI.MBPI->getEdgeProbability(ThisMBB, NewBRTarget);
+
+  // It's impossible to know the precise branch probability after the split.
+  // But it still needs to be reasonable, the whole probability to original
+  // targets should not be changed.
+  // After split NewBRTarget will get two incoming edges. Assume P0 is the
+  // original branch probability to NewBRTarget, P1 and P2 are new branch
+  // probabilies to NewBRTarget after split. If the two edge frequencies are
+  // same, then
+  //      F * P1 = F * P0 / 2            ==>  P1 = P0 / 2
+  //      F * (1 - P1) * P2 = F * P1     ==>  P2 = P1 / (1 - P1)
+  BranchProbability ProbToNewTarget, ProbFallThrough;     // Prob for new Br.
+  BranchProbability ProbOrigTarget, ProbOrigFallThrough;  // Prob for orig Br.
+  ProbToNewTarget = ProbFallThrough = BranchProbability::getUnknown();
+  ProbOrigTarget = ProbOrigFallThrough = BranchProbability::getUnknown();
+  if (BSI.MBPI) {
+    if (BSI.BranchToFallThrough) {
+      ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigFallThrough) / 2;
+      ProbFallThrough = ProbToNewTarget.getCompl();
+      ProbOrigFallThrough = ProbToNewTarget / ProbToNewTarget.getCompl();
+      ProbOrigTarget = ProbOrigFallThrough.getCompl();
+    } else {
+      ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigTarget) / 2;
+      ProbFallThrough = ProbToNewTarget.getCompl();
+      ProbOrigTarget = ProbToNewTarget / ProbToNewTarget.getCompl();
+      ProbOrigFallThrough = ProbOrigTarget.getCompl();
+    }
+  }
 
   // Create a new basic block.
   MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore;
@@ -185,11 +204,16 @@ static bool splitMBB(BlockSplitInfo &BSI) {
   // Move everything after SplitBefore into the new block.
   NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end());
   NewMBB->transferSuccessors(ThisMBB);
+  if (!ProbOrigTarget.isUnknown()) {
+    auto MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigTarget);
+    NewMBB->setSuccProbability(MBBI, ProbOrigTarget);
+    MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigFallThrough);
+    NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough);
+  }
 
-  // Add the two successors to ThisMBB. The probabilities come from the
-  // existing blocks if available.
+  // Add the two successors to ThisMBB.
   ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget);
-  ThisMBB->addSuccessor(NewMBB, ProbToNewTarget.getCompl());
+  ThisMBB->addSuccessor(NewMBB, ProbFallThrough);
 
   // Add the branches to ThisMBB.
   BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 3d067aa8e621..12554ea8d079 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===-- PPCRegisterInfo.cpp - PowerPC Register Information ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCRegisterInfo.h"
-#include "PPC.h"
 #include "PPCFrameLowering.h"
 #include "PPCInstrBuilder.h"
 #include "PPCMachineFunctionInfo.h"
@@ -71,6 +69,14 @@ StackPtrConst("ppc-stack-ptr-caller-preserved",
                          "caller preserved registers can be LICM candidates"),
                 cl::init(true), cl::Hidden);
 
+static cl::opt<unsigned>
+MaxCRBitSpillDist("ppc-max-crbit-spill-dist",
+                  cl::desc("Maximum search distance for definition of CR bit "
+                           "spill on ppc"),
+                  cl::Hidden, cl::init(100));
+
+static unsigned offsetMinAlignForOpcode(unsigned OpC);
+
 PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
   : PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR,
                        TM.isPPC64() ? 0 : 1,
@@ -153,30 +159,39 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   if (TM.isPPC64() && MF->getInfo<PPCFunctionInfo>()->isSplitCSR())
     return CSR_SRV464_TLS_PE_SaveList;
 
-  if (Subtarget.hasSPE())
-    return CSR_SVR432_SPE_SaveList;
-
   // On PPC64, we might need to save r2 (but only if it is not reserved).
   bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2);
 
+  // Cold calling convention CSRs.
   if (MF->getFunction().getCallingConv() == CallingConv::Cold) {
-    return TM.isPPC64()
-               ? (Subtarget.hasAltivec()
-                      ? (SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList
-                                : CSR_SVR64_ColdCC_Altivec_SaveList)
-                      : (SaveR2 ? CSR_SVR64_ColdCC_R2_SaveList
-                                : CSR_SVR64_ColdCC_SaveList))
-               : (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_SaveList
-                                         : CSR_SVR32_ColdCC_SaveList);
+    if (TM.isPPC64()) {
+      if (Subtarget.hasAltivec())
+        return SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList
+                      : CSR_SVR64_ColdCC_Altivec_SaveList;
+      return SaveR2 ? CSR_SVR64_ColdCC_R2_SaveList
+                    : CSR_SVR64_ColdCC_SaveList;
+    }
+    // 32-bit targets.
+    if (Subtarget.hasAltivec())
+      return CSR_SVR32_ColdCC_Altivec_SaveList;
+    else if (Subtarget.hasSPE())
+      return CSR_SVR32_ColdCC_SPE_SaveList;
+    return CSR_SVR32_ColdCC_SaveList;
   }
-
-  return TM.isPPC64()
-             ? (Subtarget.hasAltivec()
-                    ? (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList
-                              : CSR_SVR464_Altivec_SaveList)
-                    : (SaveR2 ? CSR_SVR464_R2_SaveList : CSR_SVR464_SaveList))
-             : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_SaveList
-                                       : CSR_SVR432_SaveList);
+  // Standard calling convention CSRs.
+  if (TM.isPPC64()) {
+    if (Subtarget.hasAltivec())
+      return SaveR2 ? CSR_SVR464_R2_Altivec_SaveList
+                    : CSR_SVR464_Altivec_SaveList;
+    return SaveR2 ? CSR_SVR464_R2_SaveList
+                  : CSR_SVR464_SaveList;
+  }
+  // 32-bit targets.
+  if (Subtarget.hasAltivec())
+    return CSR_SVR432_Altivec_SaveList;
+  else if (Subtarget.hasSPE())
+    return CSR_SVR432_SPE_SaveList;
+  return CSR_SVR432_SaveList;
 }
 
 const MCPhysReg *
@@ -221,18 +236,26 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
                                                   : CSR_Darwin64_RegMask)
                         : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_RegMask
                                                   : CSR_Darwin32_RegMask);
+  if (Subtarget.isAIXABI()) {
+    assert(!Subtarget.hasAltivec() && "Altivec is not implemented on AIX yet.");
+    return TM.isPPC64() ? CSR_AIX64_RegMask : CSR_AIX32_RegMask;
+  }
 
   if (CC == CallingConv::Cold) {
     return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask
                                                   : CSR_SVR64_ColdCC_RegMask)
                         : (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_RegMask
-                                                  : CSR_SVR32_ColdCC_RegMask);
+                                                  : (Subtarget.hasSPE()
+                                                  ? CSR_SVR32_ColdCC_SPE_RegMask
+                                                  : CSR_SVR32_ColdCC_RegMask));
   }
 
   return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR464_Altivec_RegMask
                                                 : CSR_SVR464_RegMask)
                       : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_RegMask
-                                                : CSR_SVR432_RegMask);
+                                                : (Subtarget.hasSPE()
+                                                  ? CSR_SVR432_SPE_RegMask
+                                                  : CSR_SVR432_RegMask));
 }
 
 const uint32_t*
@@ -288,6 +311,11 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     markSuperRegs(Reserved, PPC::R13); // Small Data Area pointer register
   }
 
+  // Always reserve r2 on AIX for now.
+  // TODO: Make r2 allocatable on AIX/XCOFF for some leaf functions.
+  if (Subtarget.isAIXABI())
+    markSuperRegs(Reserved, PPC::R2);  // System-reserved register
+
   // On PPC64, r13 is the thread pointer. Never allocate this register.
   if (TM.isPPC64())
     markSuperRegs(Reserved, PPC::R13);
@@ -316,6 +344,51 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
+bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const {
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+  const PPCInstrInfo *InstrInfo =  Subtarget.getInstrInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
+
+  // If the callee saved info is invalid we have to default to true for safety.
+  if (!MFI.isCalleeSavedInfoValid())
+    return true;
+
+  // We will require the use of X-Forms because the frame is larger than what
+  // can be represented in signed 16 bits that fit in the immediate of a D-Form.
+  // If we need an X-Form then we need a register to store the address offset.
+  unsigned FrameSize = MFI.getStackSize();
+  // Signed 16 bits means that the FrameSize cannot be more than 15 bits.
+  if (FrameSize & ~0x7FFF)
+    return true;
+
+  // The callee saved info is valid so it can be traversed.
+  // Checking for registers that need saving that do not have load or store
+  // forms where the address offset is an immediate.
+  for (unsigned i = 0; i < Info.size(); i++) {
+    int FrIdx = Info[i].getFrameIdx();
+    unsigned Reg = Info[i].getReg();
+
+    unsigned Opcode = InstrInfo->getStoreOpcodeForSpill(Reg);
+    if (!MFI.isFixedObjectIndex(FrIdx)) {
+      // This is not a fixed object. If it requires alignment then we may still
+      // need to use the XForm.
+      if (offsetMinAlignForOpcode(Opcode) > 1)
+        return true;
+    }
+
+    // This is eiher:
+    // 1) A fixed frame index object which we know are aligned so
+    // as long as we have a valid DForm/DSForm/DQForm (non XForm) we don't
+    // need to consider the alignement here.
+    // 2) A not fixed object but in that case we now know that the min required
+    // alignment is no more than 1 based on the previous check.
+    if (InstrInfo->isXFormMemOp(Opcode))
+      return true;
+  }
+  return false;
+}
+
 bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg,
                                                const MachineFunction &MF) const {
   assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
@@ -664,6 +737,7 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
   MachineFunction &MF = *MBB.getParent();
   const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+  const TargetRegisterInfo* TRI = Subtarget.getRegisterInfo();
   DebugLoc dl = MI.getDebugLoc();
 
   bool LP64 = TM.isPPC64();
@@ -673,27 +747,59 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
   unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
   unsigned SrcReg = MI.getOperand(0).getReg();
 
-  // We need to move the CR field that contains the CR bit we are spilling.
-  // The super register may not be explicitly defined (i.e. it can be defined
-  // by a CR-logical that only defines the subreg) so we state that the CR
-  // field is undef. Also, in order to preserve the kill flag on the CR bit,
-  // we add it as an implicit use.
-  BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
+  // Search up the BB to find the definition of the CR bit.
+  MachineBasicBlock::reverse_iterator Ins;
+  unsigned CRBitSpillDistance = 0;
+  for (Ins = MI; Ins != MBB.rend(); Ins++) {
+    // Definition found.
+    if (Ins->modifiesRegister(SrcReg, TRI))
+      break;
+    // Unable to find CR bit definition within maximum search distance.
+    if (CRBitSpillDistance == MaxCRBitSpillDist) {
+      Ins = MI;
+      break;
+    }
+    // Skip debug instructions when counting CR bit spill distance.
+    if (!Ins->isDebugInstr())
+      CRBitSpillDistance++;
+  }
+
+  // Unable to find the definition of the CR bit in the MBB.
+  if (Ins == MBB.rend())
+    Ins = MI;
+
+  // There is no need to extract the CR bit if its value is already known.
+  switch (Ins->getOpcode()) {
+  case PPC::CRUNSET:
+    BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LI8 : PPC::LI), Reg)
+      .addImm(0);
+    break;
+  case PPC::CRSET:
+    BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LIS8 : PPC::LIS), Reg)
+      .addImm(-32768);
+    break;
+  default:
+    // We need to move the CR field that contains the CR bit we are spilling.
+    // The super register may not be explicitly defined (i.e. it can be defined
+    // by a CR-logical that only defines the subreg) so we state that the CR
+    // field is undef. Also, in order to preserve the kill flag on the CR bit,
+    // we add it as an implicit use.
+    BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
       .addReg(getCRFromCRBit(SrcReg), RegState::Undef)
       .addReg(SrcReg,
               RegState::Implicit | getKillRegState(MI.getOperand(0).isKill()));
 
-  // If the saved register wasn't CR0LT, shift the bits left so that the bit to
-  // store is the first one. Mask all but that bit.
-  unsigned Reg1 = Reg;
-  Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
-
-  // rlwinm rA, rA, ShiftBits, 0, 0.
-  BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
-    .addReg(Reg1, RegState::Kill)
-    .addImm(getEncodingValue(SrcReg))
-    .addImm(0).addImm(0);
+    // If the saved register wasn't CR0LT, shift the bits left so that the bit
+    // to store is the first one. Mask all but that bit.
+    unsigned Reg1 = Reg;
+    Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
 
+    // rlwinm rA, rA, ShiftBits, 0, 0.
+    BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
+      .addReg(Reg1, RegState::Kill)
+      .addImm(getEncodingValue(SrcReg))
+      .addImm(0).addImm(0);
+  }
   addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
                     .addReg(Reg, RegState::Kill),
                     FrameIndex);
@@ -826,9 +932,7 @@ bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
 }
 
 // If the offset must be a multiple of some value, return what that value is.
-static unsigned offsetMinAlign(const MachineInstr &MI) {
-  unsigned OpC = MI.getOpcode();
-
+static unsigned offsetMinAlignForOpcode(unsigned OpC) {
   switch (OpC) {
   default:
     return 1;
@@ -847,12 +951,21 @@ static unsigned offsetMinAlign(const MachineInstr &MI) {
   case PPC::STXSD:
   case PPC::STXSSP:
     return 4;
+  case PPC::EVLDD:
+  case PPC::EVSTDD:
+    return 8;
   case PPC::LXV:
   case PPC::STXV:
     return 16;
   }
 }
 
+// If the offset must be a multiple of some value, return what that value is.
+static unsigned offsetMinAlign(const MachineInstr &MI) {
+  unsigned OpC = MI.getOpcode();
+  return offsetMinAlignForOpcode(OpC);
+}
+
 // Return the OffsetOperandNo given the FIOperandNum (and the instruction).
 static unsigned getOffsetONFromFION(const MachineInstr &MI,
                                     unsigned FIOperandNum) {
@@ -963,7 +1076,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // happen in invalid code.
   assert(OpC != PPC::DBG_VALUE &&
          "This should be handled in a target-independent way");
-  if (!noImmForm && ((isInt<16>(Offset) &&
+  bool OffsetFitsMnemonic = (OpC == PPC::EVSTDD || OpC == PPC::EVLDD) ?
+                            isUInt<8>(Offset) :
+                            isInt<16>(Offset);
+  if (!noImmForm && ((OffsetFitsMnemonic &&
                       ((Offset % offsetMinAlign(MI)) == 0)) ||
                      OpC == TargetOpcode::STACKMAP ||
                      OpC == TargetOpcode::PATCHPOINT)) {
@@ -1001,7 +1117,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   if (noImmForm)
     OperandBase = 1;
-  else if (OpC != TargetOpcode::INLINEASM) {
+  else if (OpC != TargetOpcode::INLINEASM &&
+           OpC != TargetOpcode::INLINEASM_BR) {
     assert(ImmToIdxMap.count(OpC) &&
            "No indexed form of load or store available!");
     unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
@@ -1016,7 +1133,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true);
 }
 
-unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const PPCFrameLowering *TFI = getFrameLowering(MF);
 
   if (!TM.isPPC64())
@@ -1025,7 +1142,7 @@ unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
     return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
 }
 
-unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
+Register PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
   const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
   if (!hasBasePointer(MF))
     return getFrameRegister(MF);
@@ -1080,7 +1197,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
   MachineBasicBlock &MBB = *MI->getParent();
   MachineFunction &MF = *MBB.getParent();
   const PPCFrameLowering *TFI = getFrameLowering(MF);
-  unsigned StackEst = TFI->determineFrameLayout(MF, false, true);
+  unsigned StackEst = TFI->determineFrameLayout(MF, true);
 
   // If we likely don't need a stack frame, then we probably don't need a
   // virtual base register either.
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index e93fe4ce3453..a50e05920cd4 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -1,9 +1,8 @@
 //===-- PPCRegisterInfo.h - PowerPC Register Information Impl ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,13 +14,14 @@
 #ifndef LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H
 #define LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H
 
-#include "PPC.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "llvm/ADT/DenseMap.h"
 
 #define GET_REGINFO_HEADER
 #include "PPCGenRegisterInfo.inc"
 
 namespace llvm {
+class PPCTargetMachine;
 
 inline static unsigned getCRFromCRBit(unsigned SrcReg) {
   unsigned Reg = 0;
@@ -90,9 +90,7 @@ public:
     return true;
   }
 
-  bool requiresFrameIndexScavenging(const MachineFunction &MF) const override {
-    return true;
-  }
+  bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
 
   bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
     return true;
@@ -134,10 +132,10 @@ public:
                           int64_t Offset) const override;
 
   // Debug information queries.
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
+  Register getFrameRegister(const MachineFunction &MF) const override;
 
   // Base pointer (stack realignment) support.
-  unsigned getBaseRegister(const MachineFunction &MF) const;
+  Register getBaseRegister(const MachineFunction &MF) const;
   bool hasBasePointer(const MachineFunction &MF) const;
 
   /// stripRegisterPrefix - This method strips the character prefix from a
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index d0d29b6d2c7d..af0dff6347a6 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -1,9 +1,8 @@
 //===-- PPCRegisterInfo.td - The PowerPC Register File -----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -375,8 +374,6 @@ def CRBITRC : RegisterClass<"PPC", [i1], 32,
 def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
                                                 CR7, CR2, CR3, CR4)>;
 
-def CRRC0 : RegisterClass<"PPC", [i32], 32, (add CR0)>;
-
 // The CTR registers are not allocatable because they're used by the
 // decrement-and-branch instructions, and thus need to stay live across
 // multiple basic blocks.
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index c8fe7d7eea78..4fa29d96ca14 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -1,9 +1,8 @@
 //===-- PPCSchedule.td - PowerPC Scheduling Definitions ----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -106,6 +105,7 @@ def IIC_VecVSL       : InstrItinClass;
 def IIC_VecVSR       : InstrItinClass;
 def IIC_SprMTMSRD    : InstrItinClass;
 def IIC_SprSLIE      : InstrItinClass;
+def IIC_SprSLBFEE    : InstrItinClass;
 def IIC_SprSLBIE     : InstrItinClass;
 def IIC_SprSLBIEG    : InstrItinClass;
 def IIC_SprSLBMTE    : InstrItinClass;
diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td
index 646822eedbe0..708261fc7cc8 100644
--- a/lib/Target/PowerPC/PPCSchedule440.td
+++ b/lib/Target/PowerPC/PPCSchedule440.td
@@ -1,9 +1,8 @@
 //===-- PPCSchedule440.td - PPC 440 Scheduling Definitions -*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td
index f34c1accc0fd..c2b298524e00 100644
--- a/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/lib/Target/PowerPC/PPCScheduleA2.td
@@ -1,9 +1,8 @@
 //===- PPCScheduleA2.td - PPC A2 Scheduling Definitions --*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/PowerPC/PPCScheduleE500.td b/lib/Target/PowerPC/PPCScheduleE500.td
index 479a970b2537..74744dda54f7 100644
--- a/lib/Target/PowerPC/PPCScheduleE500.td
+++ b/lib/Target/PowerPC/PPCScheduleE500.td
@@ -1,9 +1,8 @@
 //===-- PPCScheduleE500.td - e500 Scheduling Defs ------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/PowerPC/PPCScheduleE500mc.td b/lib/Target/PowerPC/PPCScheduleE500mc.td
index d8bda073833f..1a1c041565b6 100644
--- a/lib/Target/PowerPC/PPCScheduleE500mc.td
+++ b/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -1,9 +1,8 @@
 //===-- PPCScheduleE500mc.td - e500mc Scheduling Defs ------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/PowerPC/PPCScheduleE5500.td b/lib/Target/PowerPC/PPCScheduleE5500.td
index 3e50803955c4..4480d7fba4fb 100644
--- a/lib/Target/PowerPC/PPCScheduleE5500.td
+++ b/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -1,9 +1,8 @@
 //===-- PPCScheduleE500mc.td - e5500 Scheduling Defs -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td
index 0995b7200d93..8f1907f2c016 100644
--- a/lib/Target/PowerPC/PPCScheduleG3.td
+++ b/lib/Target/PowerPC/PPCScheduleG3.td
@@ -1,9 +1,8 @@
 //===-- PPCScheduleG3.td - PPC G3 Scheduling Definitions ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td
index 1b15c7b3c7ad..0eabc49d7841 100644
--- a/lib/Target/PowerPC/PPCScheduleG4.td
+++ b/lib/Target/PowerPC/PPCScheduleG4.td
@@ -1,9 +1,8 @@
 //===-- PPCScheduleG4.td - PPC G4 Scheduling Definitions ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td
index 0044c3c6a449..9c84aec638d7 100644
--- a/lib/Target/PowerPC/PPCScheduleG4Plus.td
+++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -1,9 +1,8 @@
 //===-- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. ----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td
index c802b80170fb..087073537796 100644
--- a/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/lib/Target/PowerPC/PPCScheduleG5.td
@@ -1,9 +1,8 @@
 //===-- PPCScheduleG5.td - PPC G5 Scheduling Definitions ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/PowerPC/PPCScheduleP7.td b/lib/Target/PowerPC/PPCScheduleP7.td
index 1d6e509819da..5a8c1eb2b837 100644
--- a/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/lib/Target/PowerPC/PPCScheduleP7.td
@@ -1,9 +1,8 @@
 //===-- PPCScheduleP7.td - PPC P7 Scheduling Definitions ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/PowerPC/PPCScheduleP8.td b/lib/Target/PowerPC/PPCScheduleP8.td
index ff39dfda7016..70a58f42a98a 100644
--- a/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/lib/Target/PowerPC/PPCScheduleP8.td
@@ -1,9 +1,8 @@
 //===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/PowerPC/PPCScheduleP9.td b/lib/Target/PowerPC/PPCScheduleP9.td
index a1e625c855e0..6a79cca89194 100644
--- a/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/lib/Target/PowerPC/PPCScheduleP9.td
@@ -1,9 +1,8 @@
 //===-- PPCScheduleP9.td - PPC P9 Scheduling Definitions ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -51,8 +50,21 @@ let SchedModel = P9Model in {
 
   // ***************** Processor Resources *****************
 
-  //Dispatcher:
-  def DISPATCHER : ProcResource<12>;
+  // Dispatcher slots:
+  // x0, x1, x2, and x3 are the dedicated slice dispatch ports, where each
+  // corresponds to one of the four execution slices.
+  def DISPx02 : ProcResource<2>;
+  def DISPx13 : ProcResource<2>;
+  // The xa and xb ports can be used to send an iop to either of the two slices
+  // of the superslice, but are restricted to iops with only two primary sources.
+  def DISPxab : ProcResource<2>;
+  // b0 and b1 are dedicated dispatch ports into the branch slice.
+  def DISPb01 : ProcResource<2>;
+
+  // Any non BR dispatch ports
+  def DISP_NBR
+      : ProcResGroup<[ DISPx02, DISPx13, DISPxab]>;
+  def DISP_SS : ProcResGroup<[ DISPx02, DISPx13]>;
 
   // Issue Ports
   // An instruction can go down one of two issue queues.
@@ -117,8 +129,37 @@ let SchedModel = P9Model in {
 
   // ***************** SchedWriteRes Definitions *****************
 
-  //Dispatcher
-  def DISP_1C : SchedWriteRes<[DISPATCHER]> {
+  // Dispatcher
+  // Dispatch Rules: '-' or 'V'
+  // Vector ('V') - vector iops (128-bit operand) take only one decode and
+  // dispatch slot but are dispatched to both the even and odd slices of a
+  // superslice.
+  def DISP_1C : SchedWriteRes<[DISP_NBR]> {
+    let NumMicroOps = 0;
+    let Latency = 1;
+  }
+  // Dispatch Rules: 'E' 
+  // Even slice ('E')- certain operations must be sent only to an even slice.
+  // Also consumes odd dispatch slice slot of the same superslice at dispatch
+  def DISP_EVEN_1C : SchedWriteRes<[ DISPx02, DISPx13 ]> {
+    let NumMicroOps = 0;
+    let Latency = 1;
+  }
+  // Dispatch Rules: 'P'
+  // Paired ('P') - certain cracked and expanded iops are paired such that they
+  // must dispatch together to the same superslice.
+  def DISP_PAIR_1C : SchedWriteRes<[ DISP_SS, DISP_SS]> {
+    let NumMicroOps = 0;
+    let Latency = 1;
+  }
+  // Tuple Restricted ('R') - certain iops preclude dispatching more than one
+  // operation per slice for the super- slice to which they are dispatched
+  def DISP_3SLOTS_1C : SchedWriteRes<[DISPx02, DISPx13, DISPxab]> {
+    let NumMicroOps = 0;
+    let Latency = 1;
+  }
+  // Each execution and branch slice can receive up to two iops per cycle
+  def DISP_BR_1C : SchedWriteRes<[ DISPxab ]> {
     let NumMicroOps = 0;
     let Latency = 1;
   }
@@ -148,7 +189,7 @@ let SchedModel = P9Model in {
 
   // ALU Units
   // An ALU may take either 2 or 3 cycles to complete the operation.
-  // However, the ALU unit is only every busy for 1 cycle at a time and may
+  // However, the ALU unit is only ever busy for 1 cycle at a time and may
   // receive new instructions each cycle.
   def P9_ALU_2C : SchedWriteRes<[ALU]> {
     let Latency = 2;
@@ -203,10 +244,6 @@ let SchedModel = P9Model in {
   // DP Unit
   // A DP unit may take from 2 to 36 cycles to complete.
   // Some DP operations keep the unit busy for up to 10 cycles.
-  def P9_DP_2C : SchedWriteRes<[DP]> {
-    let Latency = 2;
-  }
-
   def P9_DP_5C : SchedWriteRes<[DP]> {
     let Latency = 5;
   }
@@ -228,11 +265,6 @@ let SchedModel = P9Model in {
     let Latency = 22;
   }
 
-  def P9_DP_24C_8 : SchedWriteRes<[DP]> {
-    let ResourceCycles = [8];
-    let Latency = 24;
-  }
-
   def P9_DPO_24C_8 : SchedWriteRes<[DPO]> {
     let ResourceCycles = [8];
     let Latency = 24;
@@ -248,11 +280,6 @@ let SchedModel = P9Model in {
     let Latency = 22;
   }
 
-  def P9_DP_27C_7 : SchedWriteRes<[DP]> {
-    let ResourceCycles = [7];
-    let Latency = 27;
-  }
-
   def P9_DPE_27C_10 : SchedWriteRes<[DP]> {
     let ResourceCycles = [10];
     let Latency = 27;
@@ -383,16 +410,12 @@ let SchedModel = P9Model in {
   def P9_IntDivAndALUOp_26C_8 : WriteSequence<[P9_DIV_24C_8, P9_ALU_2C]>;
   def P9_IntDivAndALUOp_42C_8 : WriteSequence<[P9_DIV_40C_8, P9_ALU_2C]>;
   def P9_StoreAndALUOp_3C : WriteSequence<[P9_LS_1C, P9_ALU_2C]>;
-  def P9_StoreAndALUOp_4C : WriteSequence<[P9_LS_1C, P9_ALU_3C]>;
   def P9_ALUOpAndALUOp_4C : WriteSequence<[P9_ALU_2C, P9_ALU_2C]>;
   def P9_ALU2OpAndALU2Op_6C : WriteSequence<[P9_ALU_3C, P9_ALU_3C]>;
   def P9_ALUOpAndALUOpAndALUOp_6C :
     WriteSequence<[P9_ALU_2C, P9_ALU_2C, P9_ALU_2C]>;
   def P9_DPOpAndALUOp_7C : WriteSequence<[P9_DP_5C, P9_ALU_2C]>;
-  def P9_DPOpAndALUOp_9C : WriteSequence<[P9_DP_7C, P9_ALU_2C]>;
   def P9_DPOpAndALU2Op_10C : WriteSequence<[P9_DP_7C, P9_ALU_3C]>;
-  def P9_DPOpAndALUOp_24C_5 : WriteSequence<[P9_DP_22C_5, P9_ALU_2C]>;
-  def P9_DPOpAndALUOp_35C_8 : WriteSequence<[P9_DP_33C_8, P9_ALU_2C]>;
   def P9_DPOpAndALU2Op_25C_5 : WriteSequence<[P9_DP_22C_5, P9_ALU_3C]>;
   def P9_DPOpAndALU2Op_29C_5 : WriteSequence<[P9_DP_26C_5, P9_ALU_3C]>;
   def P9_DPOpAndALU2Op_36C_8 : WriteSequence<[P9_DP_33C_8, P9_ALU_3C]>;
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index c0cbfd779cb9..6aa7528634d3 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -1,9 +1,8 @@
 //===-- PowerPCSubtarget.cpp - PPC Subtarget Information ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -40,6 +39,11 @@ static cl::opt<bool> QPXStackUnaligned("qpx-stack-unaligned",
   cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"),
   cl::Hidden);
 
+static cl::opt<bool>
+    EnableMachinePipeliner("ppc-enable-pipeliner",
+                           cl::desc("Enable Machine Pipeliner for PPC"),
+                           cl::init(false), cl::Hidden);
+
 PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
                                                             StringRef FS) {
   initializeEnvironment();
@@ -68,6 +72,7 @@ void PPCSubtarget::initializeEnvironment() {
   HasFPU = false;
   HasQPX = false;
   HasVSX = false;
+  NeedsTwoConstNR = false;
   HasP8Vector = false;
   HasP8Altivec = false;
   HasP8Crypto = false;
@@ -103,11 +108,13 @@ void PPCSubtarget::initializeEnvironment() {
   HasDirectMove = false;
   IsQPXStackUnaligned = false;
   HasHTM = false;
-  HasFusion = false;
   HasFloat128 = false;
   IsISA3_0 = false;
   UseLongCalls = false;
   SecurePlt = false;
+  VectorsUseTwoUnits = false;
+  UsePPCPreRASchedStrategy = false;
+  UsePPCPostRASchedStrategy = false;
 
   HasPOPCNTD = POPCNTD_Unavailable;
 }
@@ -138,6 +145,10 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   if (isDarwin())
     HasLazyResolverStubs = true;
 
+  if (TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD() ||
+      TargetTriple.isMusl())
+    SecurePlt = true;
+
   if (HasSPE && IsPPC64)
     report_fatal_error( "SPE is only supported for 32-bit targets.\n", false);
   if (HasSPE && (HasAltivec || HasQPX || HasVSX || HasFPU))
@@ -175,10 +186,14 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const {
   return false;
 }
 
-bool PPCSubtarget::enableMachineScheduler() const {
-  return true;
+bool PPCSubtarget::enableMachineScheduler() const { return true; }
+
+bool PPCSubtarget::enableMachinePipeliner() const {
+  return (DarwinDirective == PPC::DIR_PWR9) && EnableMachinePipeliner;
 }
 
+bool PPCSubtarget::useDFAforSMS() const { return false; }
+
 // This overrides the PostRAScheduler bit in the SchedModel for each CPU.
 bool PPCSubtarget::enablePostRAScheduler() const { return true; }
 
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index c56f254d6bec..55fec1cb6d99 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -1,9 +1,8 @@
 //===-- PPCSubtarget.h - Define Subtarget for the PPC ----------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -99,6 +98,7 @@ protected:
   bool HasSPE;
   bool HasQPX;
   bool HasVSX;
+  bool NeedsTwoConstNR;
   bool HasP8Vector;
   bool HasP8Altivec;
   bool HasP8Crypto;
@@ -131,11 +131,13 @@ protected:
   bool HasPartwordAtomics;
   bool HasDirectMove;
   bool HasHTM;
-  bool HasFusion;
   bool HasFloat128;
   bool IsISA3_0;
   bool UseLongCalls;
   bool SecurePlt;
+  bool VectorsUseTwoUnits;
+  bool UsePPCPreRASchedStrategy;
+  bool UsePPCPostRASchedStrategy;
 
   POPCNTDKind HasPOPCNTD;
 
@@ -244,6 +246,7 @@ public:
   bool hasFPU() const { return HasFPU; }
   bool hasQPX() const { return HasQPX; }
   bool hasVSX() const { return HasVSX; }
+  bool needsTwoConstNR() const { return NeedsTwoConstNR; }
   bool hasP8Vector() const { return HasP8Vector; }
   bool hasP8Altivec() const { return HasP8Altivec; }
   bool hasP8Crypto() const { return HasP8Crypto; }
@@ -260,6 +263,7 @@ public:
   bool isPPC4xx() const { return IsPPC4xx; }
   bool isPPC6xx() const { return IsPPC6xx; }
   bool isSecurePlt() const {return SecurePlt; }
+  bool vectorsUseTwoUnits() const {return VectorsUseTwoUnits; }
   bool isE500() const { return IsE500; }
   bool isFeatureMFTB() const { return FeatureMFTB; }
   bool isDeprecatedDST() const { return DeprecatedDST; }
@@ -267,6 +271,8 @@ public:
   bool hasInvariantFunctionDescriptors() const {
     return HasInvariantFunctionDescriptors;
   }
+  bool usePPCPreRASchedStrategy() const { return UsePPCPreRASchedStrategy; }
+  bool usePPCPostRASchedStrategy() const { return UsePPCPostRASchedStrategy; }
   bool hasPartwordAtomics() const { return HasPartwordAtomics; }
   bool hasDirectMove() const { return HasDirectMove; }
 
@@ -285,7 +291,6 @@ public:
   }
 
   bool hasHTM() const { return HasHTM; }
-  bool hasFusion() const { return HasFusion; }
   bool hasFloat128() const { return HasFloat128; }
   bool isISA3_0() const { return IsISA3_0; }
   bool useLongCalls() const { return UseLongCalls; }
@@ -307,16 +312,21 @@ public:
   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
 
   bool isDarwinABI() const { return isTargetMachO() || isDarwin(); }
-  bool isSVR4ABI() const { return !isDarwinABI(); }
+  bool isAIXABI() const { return TargetTriple.isOSAIX(); }
+  bool isSVR4ABI() const { return !isDarwinABI() && !isAIXABI(); }
   bool isELFv2ABI() const;
 
   /// Originally, this function return hasISEL(). Now we always enable it,
   /// but may expand the ISEL instruction later.
   bool enableEarlyIfConversion() const override { return true; }
 
-  // Scheduling customization.
+  /// Scheduling customization.
   bool enableMachineScheduler() const override;
-  // This overrides the PostRAScheduler bit in the SchedModel for each CPU.
+  /// Pipeliner customization.
+  bool enableMachinePipeliner() const override;
+  /// Machine Pipeliner customization
+  bool useDFAforSMS() const override;
+  /// This overrides the PostRAScheduler bit in the SchedModel for each CPU.
   bool enablePostRAScheduler() const override;
   AntiDepBreakMode getAntiDepBreakMode() const override;
   void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override;
diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index ac36abbe8439..fb826c4a32f1 100644
--- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -1,9 +1,8 @@
 //===---------- PPCTLSDynamicCall.cpp - TLS Dynamic Call Fixup ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -35,10 +34,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "ppc-tls-dynamic-call"
 
-namespace llvm {
-  void initializePPCTLSDynamicCallPass(PassRegistry&);
-}
-
 namespace {
   struct PPCTLSDynamicCall : public MachineFunctionPass {
     static char ID;
diff --git a/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/lib/Target/PowerPC/PPCTOCRegDeps.cpp
index 17345b6ca8d3..3eb0569fb955 100644
--- a/lib/Target/PowerPC/PPCTOCRegDeps.cpp
+++ b/lib/Target/PowerPC/PPCTOCRegDeps.cpp
@@ -1,9 +1,8 @@
 //===-- PPCTOCRegDeps.cpp - Add Extra TOC Register Dependencies -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -83,10 +82,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "ppc-toc-reg-deps"
 
-namespace llvm {
-  void initializePPCTOCRegDepsPass(PassRegistry&);
-}
-
 namespace {
   // PPCTOCRegDeps pass - For simple functions without epilogue code, move
   // returns up, and create conditional returns, to avoid unnecessary
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 580d057602f5..ce00f848dd72 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -1,9 +1,8 @@
 //===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,9 +13,11 @@
 #include "PPCTargetMachine.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "PPC.h"
+#include "PPCMachineScheduler.h"
 #include "PPCSubtarget.h"
 #include "PPCTargetObjectFile.h"
 #include "PPCTargetTransformInfo.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
@@ -100,6 +101,19 @@ extern "C" void LLVMInitializePowerPCTarget() {
   RegisterTargetMachine<PPCTargetMachine> C(getThePPC64LETarget());
 
   PassRegistry &PR = *PassRegistry::getPassRegistry();
+#ifndef NDEBUG
+  initializePPCCTRLoopsVerifyPass(PR);
+#endif
+  initializePPCLoopPreIncPrepPass(PR);
+  initializePPCTOCRegDepsPass(PR);
+  initializePPCEarlyReturnPass(PR);
+  initializePPCVSXCopyPass(PR);
+  initializePPCVSXFMAMutatePass(PR);
+  initializePPCVSXSwapRemovalPass(PR);
+  initializePPCReduceCRLogicalsPass(PR);
+  initializePPCBSelPass(PR);
+  initializePPCBranchCoalescingPass(PR);
+  initializePPCQPXLoadSplatPass(PR);
   initializePPCBoolRetToIntPass(PR);
   initializePPCExpandISELPass(PR);
   initializePPCPreEmitPeepholePass(PR);
@@ -199,6 +213,8 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
   case Triple::ppc64le:
     return PPCTargetMachine::PPC_ABI_ELFv2;
   case Triple::ppc64:
+    if (TT.getEnvironment() == llvm::Triple::ELFv2)
+      return PPCTargetMachine::PPC_ABI_ELFv2;
     return PPCTargetMachine::PPC_ABI_ELFv1;
   default:
     return PPCTargetMachine::PPC_ABI_UNKNOWN;
@@ -227,9 +243,9 @@ static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT,
                                                  bool JIT) {
   if (CM) {
     if (*CM == CodeModel::Tiny)
-      report_fatal_error("Target does not support the tiny CodeModel");
+      report_fatal_error("Target does not support the tiny CodeModel", false);
     if (*CM == CodeModel::Kernel)
-      report_fatal_error("Target does not support the kernel CodeModel");
+      report_fatal_error("Target does not support the kernel CodeModel", false);
     return *CM;
   }
   if (!TT.isOSDarwin() && !JIT &&
@@ -238,6 +254,29 @@ static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT,
   return CodeModel::Small;
 }
 
+
+static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) {
+  const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>();
+  ScheduleDAGMILive *DAG =
+    new ScheduleDAGMILive(C, ST.usePPCPreRASchedStrategy() ?
+                          llvm::make_unique<PPCPreRASchedStrategy>(C) :
+                          llvm::make_unique<GenericScheduler>(C));
+  // add DAG Mutations here.
+  DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
+  return DAG;
+}
+
+static ScheduleDAGInstrs *createPPCPostMachineScheduler(
+  MachineSchedContext *C) {
+  const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>();
+  ScheduleDAGMI *DAG =
+    new ScheduleDAGMI(C, ST.usePPCPostRASchedStrategy() ?
+                      llvm::make_unique<PPCPostRASchedStrategy>(C) :
+                      llvm::make_unique<PostGenericScheduler>(C), true);
+  // add DAG Mutations here.
+  return DAG;
+}
+
 // The FeatureString here is a little subtle. We are modifying the feature
 // string with what are (currently) non-function specific overrides as it goes
 // into the LLVMTargetMachine constructor and then using the stored value in the
@@ -331,6 +370,14 @@ public:
   void addPreRegAlloc() override;
   void addPreSched2() override;
   void addPreEmitPass() override;
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override {
+    return createPPCMachineScheduler(C);
+  }
+  ScheduleDAGInstrs *
+  createPostMachineScheduler(MachineSchedContext *C) const override {
+    return createPPCPostMachineScheduler(C);
+  }
 };
 
 } // end anonymous namespace
@@ -374,7 +421,7 @@ bool PPCPassConfig::addPreISel() {
     addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
 
   if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
-    addPass(createPPCCTRLoops());
+    addPass(createHardwareLoopsPass());
 
   return false;
 }
@@ -441,6 +488,9 @@ void PPCPassConfig::addPreRegAlloc() {
   }
   if (EnableExtraTOCRegDeps)
     addPass(createPPCTOCRegDepsPass());
+
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(&MachinePipelinerID);
 }
 
 void PPCPassConfig::addPreSched2() {
@@ -469,3 +519,13 @@ TargetTransformInfo
 PPCTargetMachine::getTargetTransformInfo(const Function &F) {
   return TargetTransformInfo(PPCTTIImpl(this, F));
 }
+
+static MachineSchedRegistry
+PPCPreRASchedRegistry("ppc-prera",
+                      "Run PowerPC PreRA specific scheduler",
+                      createPPCMachineScheduler);
+
+static MachineSchedRegistry
+PPCPostRASchedRegistry("ppc-postra",
+                       "Run PowerPC PostRA specific scheduler",
+                       createPPCPostMachineScheduler);
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 75b98a815ab4..fd1d14ae32d4 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -1,9 +1,8 @@
 //===-- PPCTargetMachine.h - Define TargetMachine for PowerPC ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -59,10 +58,6 @@ public:
     const Triple &TT = getTargetTriple();
     return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le);
   };
-
-  bool isMachineVerifierClean() const override {
-    return false;
-  }
 };
 } // end namespace llvm
 
diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/lib/Target/PowerPC/PPCTargetObjectFile.cpp
index a049dc3fda93..e237fab1b267 100644
--- a/lib/Target/PowerPC/PPCTargetObjectFile.cpp
+++ b/lib/Target/PowerPC/PPCTargetObjectFile.cpp
@@ -1,9 +1,8 @@
 //===-- PPCTargetObjectFile.cpp - PPC Object Info -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.h b/lib/Target/PowerPC/PPCTargetObjectFile.h
index 417b8ed0d612..78a5840c87c7 100644
--- a/lib/Target/PowerPC/PPCTargetObjectFile.h
+++ b/lib/Target/PowerPC/PPCTargetObjectFile.h
@@ -1,9 +1,8 @@
 //===-- PPCTargetObjectFile.h - PPC Object Info -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h
index 310fea9ef09f..e17361d997fd 100644
--- a/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -1,9 +1,8 @@
 //===- PPCTargetStreamer.h - PPC Target Streamer ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index bc9bcab83a0a..ff3dfbfaca05 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1,17 +1,18 @@
 //===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "PPCTargetTransformInfo.h"
+#include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/CodeGen/CostTable.h"
 #include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 using namespace llvm;
@@ -32,6 +33,13 @@ EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false),
                 cl::desc("Enable using coldcc calling conv for cold "
                          "internal functions"));
 
+// The latency of mtctr is only justified if there are more than 4
+// comparisons that will be removed as a result.
+static cl::opt<unsigned>
+SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
+                      cl::desc("Loops with a constant trip count smaller than "
+                               "this value will not use the count register."));
+
 //===----------------------------------------------------------------------===//
 //
 // PPC cost model.
@@ -205,6 +213,341 @@ unsigned PPCTTIImpl::getUserCost(const User *U,
   return BaseT::getUserCost(U, Operands);
 }
 
+bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
+                             TargetLibraryInfo *LibInfo) {
+  const PPCTargetMachine &TM = ST->getTargetMachine();
+
+  // Loop through the inline asm constraints and look for something that
+  // clobbers ctr.
+  auto asmClobbersCTR = [](InlineAsm *IA) {
+    InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
+    for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
+      InlineAsm::ConstraintInfo &C = CIV[i];
+      if (C.Type != InlineAsm::isInput)
+        for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
+          if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
+            return true;
+    }
+    return false;
+  };
+
+  // Determining the address of a TLS variable results in a function call in
+  // certain TLS models.
+  std::function<bool(const Value*)> memAddrUsesCTR =
+    [&memAddrUsesCTR, &TM](const Value *MemAddr) -> bool {
+    const auto *GV = dyn_cast<GlobalValue>(MemAddr);
+    if (!GV) {
+      // Recurse to check for constants that refer to TLS global variables.
+      if (const auto *CV = dyn_cast<Constant>(MemAddr))
+        for (const auto &CO : CV->operands())
+          if (memAddrUsesCTR(CO))
+            return true;
+
+      return false;
+    }
+
+    if (!GV->isThreadLocal())
+      return false;
+    TLSModel::Model Model = TM.getTLSModel(GV);
+    return Model == TLSModel::GeneralDynamic ||
+      Model == TLSModel::LocalDynamic;
+  };
+
+  auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) {
+    if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+      return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
+
+    return false;
+  };
+
+  for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
+       J != JE; ++J) {
+    if (CallInst *CI = dyn_cast<CallInst>(J)) {
+      // Inline ASM is okay, unless it clobbers the ctr register.
+      if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
+        if (asmClobbersCTR(IA))
+          return true;
+        continue;
+      }
+
+      if (Function *F = CI->getCalledFunction()) {
+        // Most intrinsics don't become function calls, but some might.
+        // sin, cos, exp and log are always calls.
+        unsigned Opcode = 0;
+        if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
+          switch (F->getIntrinsicID()) {
+          default: continue;
+          // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
+          // we're definitely using CTR.
+          case Intrinsic::set_loop_iterations:
+          case Intrinsic::loop_decrement:
+            return true;
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+                       !defined(setjmp_undefined_for_msvc)
+#  pragma push_macro("setjmp")
+#  undef setjmp
+#  define setjmp_undefined_for_msvc
+#endif
+
+          case Intrinsic::setjmp:
+
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+ // let's return it to _setjmp state
+#  pragma pop_macro("setjmp")
+#  undef setjmp_undefined_for_msvc
+#endif
+
+          case Intrinsic::longjmp:
+
+          // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
+          // because, although it does clobber the counter register, the
+          // control can't then return to inside the loop unless there is also
+          // an eh_sjlj_setjmp.
+          case Intrinsic::eh_sjlj_setjmp:
+
+          case Intrinsic::memcpy:
+          case Intrinsic::memmove:
+          case Intrinsic::memset:
+          case Intrinsic::powi:
+          case Intrinsic::log:
+          case Intrinsic::log2:
+          case Intrinsic::log10:
+          case Intrinsic::exp:
+          case Intrinsic::exp2:
+          case Intrinsic::pow:
+          case Intrinsic::sin:
+          case Intrinsic::cos:
+            return true;
+          case Intrinsic::copysign:
+            if (CI->getArgOperand(0)->getType()->getScalarType()->
+                isPPC_FP128Ty())
+              return true;
+            else
+              continue; // ISD::FCOPYSIGN is never a library call.
+          case Intrinsic::sqrt:               Opcode = ISD::FSQRT;      break;
+          case Intrinsic::floor:              Opcode = ISD::FFLOOR;     break;
+          case Intrinsic::ceil:               Opcode = ISD::FCEIL;      break;
+          case Intrinsic::trunc:              Opcode = ISD::FTRUNC;     break;
+          case Intrinsic::rint:               Opcode = ISD::FRINT;      break;
+          case Intrinsic::nearbyint:          Opcode = ISD::FNEARBYINT; break;
+          case Intrinsic::round:              Opcode = ISD::FROUND;     break;
+          case Intrinsic::minnum:             Opcode = ISD::FMINNUM;    break;
+          case Intrinsic::maxnum:             Opcode = ISD::FMAXNUM;    break;
+          case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO;      break;
+          case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO;      break;
+          }
+        }
+
+        // PowerPC does not use [US]DIVREM or other library calls for
+        // operations on regular types which are not otherwise library calls
+        // (i.e. soft float or atomics). If adapting for targets that do,
+        // additional care is required here.
+
+        LibFunc Func;
+        if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
+            LibInfo->getLibFunc(F->getName(), Func) &&
+            LibInfo->hasOptimizedCodeGen(Func)) {
+          // Non-read-only functions are never treated as intrinsics.
+          if (!CI->onlyReadsMemory())
+            return true;
+
+          // Conversion happens only for FP calls.
+          if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
+            return true;
+
+          switch (Func) {
+          default: return true;
+          case LibFunc_copysign:
+          case LibFunc_copysignf:
+            continue; // ISD::FCOPYSIGN is never a library call.
+          case LibFunc_copysignl:
+            return true;
+          case LibFunc_fabs:
+          case LibFunc_fabsf:
+          case LibFunc_fabsl:
+            continue; // ISD::FABS is never a library call.
+          case LibFunc_sqrt:
+          case LibFunc_sqrtf:
+          case LibFunc_sqrtl:
+            Opcode = ISD::FSQRT; break;
+          case LibFunc_floor:
+          case LibFunc_floorf:
+          case LibFunc_floorl:
+            Opcode = ISD::FFLOOR; break;
+          case LibFunc_nearbyint:
+          case LibFunc_nearbyintf:
+          case LibFunc_nearbyintl:
+            Opcode = ISD::FNEARBYINT; break;
+          case LibFunc_ceil:
+          case LibFunc_ceilf:
+          case LibFunc_ceill:
+            Opcode = ISD::FCEIL; break;
+          case LibFunc_rint:
+          case LibFunc_rintf:
+          case LibFunc_rintl:
+            Opcode = ISD::FRINT; break;
+          case LibFunc_round:
+          case LibFunc_roundf:
+          case LibFunc_roundl:
+            Opcode = ISD::FROUND; break;
+          case LibFunc_trunc:
+          case LibFunc_truncf:
+          case LibFunc_truncl:
+            Opcode = ISD::FTRUNC; break;
+          case LibFunc_fmin:
+          case LibFunc_fminf:
+          case LibFunc_fminl:
+            Opcode = ISD::FMINNUM; break;
+          case LibFunc_fmax:
+          case LibFunc_fmaxf:
+          case LibFunc_fmaxl:
+            Opcode = ISD::FMAXNUM; break;
+          }
+        }
+
+        if (Opcode) {
+          EVT EVTy =
+              TLI->getValueType(DL, CI->getArgOperand(0)->getType(), true);
+
+          if (EVTy == MVT::Other)
+            return true;
+
+          if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
+            continue;
+          else if (EVTy.isVector() &&
+                   TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
+            continue;
+
+          return true;
+        }
+      }
+
+      return true;
+    } else if (isa<BinaryOperator>(J) &&
+               J->getType()->getScalarType()->isPPC_FP128Ty()) {
+      // Most operations on ppc_f128 values become calls.
+      return true;
+    } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
+               isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
+      CastInst *CI = cast<CastInst>(J);
+      if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
+          CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
+          isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) ||
+          isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType()))
+        return true;
+    } else if (isLargeIntegerTy(!TM.isPPC64(),
+                                J->getType()->getScalarType()) &&
+               (J->getOpcode() == Instruction::UDiv ||
+                J->getOpcode() == Instruction::SDiv ||
+                J->getOpcode() == Instruction::URem ||
+                J->getOpcode() == Instruction::SRem)) {
+      return true;
+    } else if (!TM.isPPC64() &&
+               isLargeIntegerTy(false, J->getType()->getScalarType()) &&
+               (J->getOpcode() == Instruction::Shl ||
+                J->getOpcode() == Instruction::AShr ||
+                J->getOpcode() == Instruction::LShr)) {
+      // Only on PPC32, for 128-bit integers (specifically not 64-bit
+      // integers), these might be runtime calls.
+      return true;
+    } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
+      // On PowerPC, indirect jumps use the counter register.
+      return true;
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
+      if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
+        return true;
+    }
+
+    // FREM is always a call.
+    if (J->getOpcode() == Instruction::FRem)
+      return true;
+
+    if (ST->useSoftFloat()) {
+      switch(J->getOpcode()) {
+      case Instruction::FAdd:
+      case Instruction::FSub:
+      case Instruction::FMul:
+      case Instruction::FDiv:
+      case Instruction::FPTrunc:
+      case Instruction::FPExt:
+      case Instruction::FPToUI:
+      case Instruction::FPToSI:
+      case Instruction::UIToFP:
+      case Instruction::SIToFP:
+      case Instruction::FCmp:
+        return true;
+      }
+    }
+
+    for (Value *Operand : J->operands())
+      if (memAddrUsesCTR(Operand))
+        return true;
+  }
+
+  return false;
+}
+
+bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+                                          AssumptionCache &AC,
+                                          TargetLibraryInfo *LibInfo,
+                                          HardwareLoopInfo &HWLoopInfo) {
+  const PPCTargetMachine &TM = ST->getTargetMachine();
+  TargetSchedModel SchedModel;
+  SchedModel.init(ST);
+
+  // Do not convert small short loops to CTR loop.
+  unsigned ConstTripCount = SE.getSmallConstantTripCount(L);
+  if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
+    SmallPtrSet<const Value *, 32> EphValues;
+    CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
+    CodeMetrics Metrics;
+    for (BasicBlock *BB : L->blocks())
+      Metrics.analyzeBasicBlock(BB, *this, EphValues);
+    // 6 is an approximate latency for the mtctr instruction.
+    if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
+      return false;
+  }
+
+  // We don't want to spill/restore the counter register, and so we don't
+  // want to use the counter register if the loop contains calls.
+  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+       I != IE; ++I)
+    if (mightUseCTR(*I, LibInfo))
+      return false;
+
+  SmallVector<BasicBlock*, 4> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+
+  // If there is an exit edge known to be frequently taken,
+  // we should not transform this loop.
+  for (auto &BB : ExitingBlocks) {
+    Instruction *TI = BB->getTerminator();
+    if (!TI) continue;
+
+    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+      uint64_t TrueWeight = 0, FalseWeight = 0;
+      if (!BI->isConditional() ||
+          !BI->extractProfMetadata(TrueWeight, FalseWeight))
+        continue;
+
+      // If the exit path is more frequent than the loop path,
+      // we return here without further analysis for this loop.
+      bool TrueIsExit = !L->contains(BI->getSuccessor(0));
+      if (( TrueIsExit && FalseWeight < TrueWeight) ||
+          (!TrueIsExit && FalseWeight > TrueWeight))
+        return false;
+    }
+  }
+
+  LLVMContext &C = L->getHeader()->getContext();
+  HWLoopInfo.CountType = TM.isPPC64() ?
+    Type::getInt64Ty(C) : Type::getInt32Ty(C);
+  HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
+  return true;
+}
+
 void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                          TTI::UnrollingPreferences &UP) {
   if (ST->getDarwinDirective() == PPC::DIR_A2) {
@@ -239,17 +582,12 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
   return LoopHasReductions;
 }
 
-const PPCTTIImpl::TTI::MemCmpExpansionOptions *
-PPCTTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {
-  static const auto Options = []() {
-    TTI::MemCmpExpansionOptions Options;
-    Options.LoadSizes.push_back(8);
-    Options.LoadSizes.push_back(4);
-    Options.LoadSizes.push_back(2);
-    Options.LoadSizes.push_back(1);
-    return Options;
-  }();
-  return &Options;
+PPCTTIImpl::TTI::MemCmpExpansionOptions
+PPCTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+  TTI::MemCmpExpansionOptions Options;
+  Options.LoadSizes = {8, 4, 2, 1};
+  Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
+  return Options;
 }
 
 bool PPCTTIImpl::enableInterleavedAccessVectorization() {
@@ -324,6 +662,33 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
   return 2;
 }
 
+// Adjust the cost of vector instructions on targets which there is overlap
+// between the vector and scalar units, thereby reducing the overall throughput
+// of vector code wrt. scalar code.
+int PPCTTIImpl::vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1,
+                                     Type *Ty2) {
+  if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
+    return Cost;
+
+  std::pair<int, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1);
+  // If type legalization involves splitting the vector, we don't want to
+  // double the cost at every step - only the last step.
+  if (LT1.first != 1 || !LT1.second.isVector())
+    return Cost;
+
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  if (TLI->isOperationExpand(ISD, LT1.second))
+    return Cost;
+
+  if (Ty2) {
+    std::pair<int, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2);
+    if (LT2.first != 1 || !LT2.second.isVector())
+      return Cost;
+  }
+
+  return Cost * 2;
+}
+
 int PPCTTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
     TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
@@ -331,8 +696,9 @@ int PPCTTIImpl::getArithmeticInstrCost(
   assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
 
   // Fallback to the default implementation.
-  return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
-                                       Opd1PropInfo, Opd2PropInfo);
+  int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+                                           Opd1PropInfo, Opd2PropInfo);
+  return vectorCostAdjustment(Cost, Opcode, Ty, nullptr);
 }
 
 int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
@@ -345,19 +711,22 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
   // instruction). We need one such shuffle instruction for each actual
   // register (this is not true for arbitrary shuffles, but is true for the
   // structured types of shuffles covered by TTI::ShuffleKind).
-  return LT.first;
+  return vectorCostAdjustment(LT.first, Instruction::ShuffleVector, Tp,
+                              nullptr);
 }
 
 int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                                  const Instruction *I) {
   assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
 
-  return BaseT::getCastInstrCost(Opcode, Dst, Src);
+  int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src);
+  return vectorCostAdjustment(Cost, Opcode, Dst, Src);
 }
 
 int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
                                    const Instruction *I) {
-  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+  int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+  return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr);
 }
 
 int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
@@ -366,18 +735,23 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
+  int Cost = BaseT::getVectorInstrCost(Opcode, Val, Index);
+  Cost = vectorCostAdjustment(Cost, Opcode, Val, nullptr);
+
   if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
-    // Double-precision scalars are already located in index #0.
-    if (Index == 0)
+    // Double-precision scalars are already located in index #0 (or #1 if LE).
+    if (ISD == ISD::EXTRACT_VECTOR_ELT &&
+        Index == (ST->isLittleEndian() ? 1 : 0))
       return 0;
 
-    return BaseT::getVectorInstrCost(Opcode, Val, Index);
+    return Cost;
+
   } else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {
     // Floating point scalars are already located in index #0.
     if (Index == 0)
       return 0;
 
-    return BaseT::getVectorInstrCost(Opcode, Val, Index);
+    return Cost;
   }
 
   // Estimated cost of a load-hit-store delay.  This was obtained
@@ -394,9 +768,9 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
   // these need to be estimated as very costly.
   if (ISD == ISD::EXTRACT_VECTOR_ELT ||
       ISD == ISD::INSERT_VECTOR_ELT)
-    return LHSPenalty + BaseT::getVectorInstrCost(Opcode, Val, Index);
+    return LHSPenalty + Cost;
 
-  return BaseT::getVectorInstrCost(Opcode, Val, Index);
+  return Cost;
 }
 
 int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
@@ -407,6 +781,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
          "Invalid Opcode");
 
   int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+  Cost = vectorCostAdjustment(Cost, Opcode, Src, nullptr);
 
   bool IsAltivecType = ST->hasAltivec() &&
                        (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
@@ -500,3 +875,25 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
   return Cost;
 }
 
+bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
+                            LoopInfo *LI, DominatorTree *DT,
+                            AssumptionCache *AC, TargetLibraryInfo *LibInfo) {
+  // Process nested loops first.
+  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    if (canSaveCmp(*I, BI, SE, LI, DT, AC, LibInfo))
+      return false; // Stop search.
+
+  HardwareLoopInfo HWLoopInfo(L);
+
+  if (!HWLoopInfo.canAnalyze(*LI))
+    return false;
+
+  if (!isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo))
+    return false;
+
+  if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT))
+    return false;
+
+  *BI = HWLoopInfo.ExitBranch;
+  return true;
+}
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 9221a910288a..5d76ee418b69 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -1,9 +1,8 @@
 //===-- PPCTargetTransformInfo.h - PPC specific TTI -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -17,7 +16,6 @@
 #ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETTRANSFORMINFO_H
 #define LLVM_LIB_TARGET_POWERPC_PPCTARGETTRANSFORMINFO_H
 
-#include "PPC.h"
 #include "PPCTargetMachine.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
@@ -35,6 +33,7 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
 
   const PPCSubtarget *getST() const { return ST; }
   const PPCTargetLowering *getTLI() const { return TLI; }
+  bool mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo);
 
 public:
   explicit PPCTTIImpl(const PPCTargetMachine *TM, const Function &F)
@@ -54,6 +53,13 @@ public:
   unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
 
   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+                                AssumptionCache &AC,
+                                TargetLibraryInfo *LibInfo,
+                                HardwareLoopInfo &HWLoopInfo);
+  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
+                  DominatorTree *DT, AssumptionCache *AC,
+                  TargetLibraryInfo *LibInfo);
   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                TTI::UnrollingPreferences &UP);
 
@@ -63,14 +69,15 @@ public:
   /// @{
   bool useColdCCForColdCall(Function &F);
   bool enableAggressiveInterleaving(bool LoopHasReductions);
-  const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
-      bool IsZeroCmp) const;
+  TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+                                                    bool IsZeroCmp) const;
   bool enableInterleavedAccessVectorization();
   unsigned getNumberOfRegisters(bool Vector);
   unsigned getRegisterBitWidth(bool Vector) const;
   unsigned getCacheLineSize();
   unsigned getPrefetchDistance();
   unsigned getMaxInterleaveFactor(unsigned VF);
+  int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2);
   int getArithmeticInstrCost(
       unsigned Opcode, Type *Ty,
       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp
index 93fe3230ab81..719ed7b63878 100644
--- a/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -1,9 +1,8 @@
 //===-------------- PPCVSXCopy.cpp - VSX Copy Legalization ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -37,10 +36,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "ppc-vsx-copy"
 
-namespace llvm {
-  void initializePPCVSXCopyPass(PassRegistry&);
-}
-
 namespace {
   // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers
   // (Altivec and scalar floating-point registers), we need to transform the
diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index 6586f503a7b8..ce78239df0a8 100644
--- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -1,9 +1,8 @@
 //===--------------- PPCVSXFMAMutate.cpp - VSX FMA Mutation ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 1be193e08c01..44175af7f9b6 100644
--- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -1,9 +1,8 @@
 //===----------- PPCVSXSwapRemoval.cpp - Remove VSX LE Swaps -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===---------------------------------------------------------------------===//
 //
@@ -60,10 +59,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "ppc-vsx-swaps"
 
-namespace llvm {
-  void initializePPCVSXSwapRemovalPass(PassRegistry&);
-}
-
 namespace {
 
 // A PPCVSXSwapEntry is created for each machine instruction that
@@ -427,6 +422,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
       // of opcodes having a common attribute in TableGen.  Should this
       // change, this is a prime candidate to use such a mechanism.
       case PPC::INLINEASM:
+      case PPC::INLINEASM_BR:
       case PPC::EXTRACT_SUBREG:
       case PPC::INSERT_SUBREG:
       case PPC::COPY_TO_REGCLASS:
diff --git a/lib/Target/PowerPC/README_P9.txt b/lib/Target/PowerPC/README_P9.txt
index d56f7cca7b21..c9984b7604bd 100644
--- a/lib/Target/PowerPC/README_P9.txt
+++ b/lib/Target/PowerPC/README_P9.txt
@@ -512,8 +512,8 @@ Fixed Point Facility:
                         "lxsdx $XT, $src", IIC_LdStLFD,
                         [(set f64:$XT, (load xoaddr:$src))]>;
 
-  . (set f64:$XT, (load ixaddr:$src))
-    (set f64:$XT, (store ixaddr:$dst))
+  . (set f64:$XT, (load iaddrX4:$src))
+    (set f64:$XT, (store iaddrX4:$dst))
 
 - Load/Store SP, with conversion from/to DP: lxssp stxssp
   . Similar to lxsspx/stxsspx:
@@ -521,8 +521,8 @@ Fixed Point Facility:
                          "lxsspx $XT, $src", IIC_LdStLFD,
                          [(set f32:$XT, (load xoaddr:$src))]>;
 
-  . (set f32:$XT, (load ixaddr:$src))
-    (set f32:$XT, (store ixaddr:$dst))
+  . (set f32:$XT, (load iaddrX4:$src))
+    (set f32:$XT, (store iaddrX4:$dst))
 
 - Load as Integer Byte/Halfword & Zero Indexed: lxsibzx lxsihzx
   . Similar to lxsiwzx:
diff --git a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index 979595264472..99b5dec74668 100644
--- a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -1,14 +1,12 @@
 //===-- PowerPCTargetInfo.cpp - PowerPC Target Implementation -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "PPC.h"
-#include "llvm/IR/Module.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h
new file mode 100644
index 000000000000..2d0afbfb1be0
--- /dev/null
+++ b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h
@@ -0,0 +1,22 @@
+//===-- PowerPCTargetInfo.h - PowerPC Target Implementation -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_TARGETINFO_POWERPCTARGETINFO_H
+#define LLVM_LIB_TARGET_POWERPC_TARGETINFO_POWERPCTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getThePPC32Target();
+Target &getThePPC64Target();
+Target &getThePPC64LETarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_POWERPC_TARGETINFO_POWERPCTARGETINFO_H
diff --git a/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 1d1112cc5124..0172c6298772 100644
--- a/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -1,9 +1,8 @@
 //===-- RISCVAsmParser.cpp - Parse RISCV assembly to MCInst instructions --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -11,6 +10,7 @@
 #include "MCTargetDesc/RISCVMCExpr.h"
 #include "MCTargetDesc/RISCVMCTargetDesc.h"
 #include "MCTargetDesc/RISCVTargetStreamer.h"
+#include "TargetInfo/RISCVTargetInfo.h"
 #include "Utils/RISCVBaseInfo.h"
 #include "Utils/RISCVMatInt.h"
 #include "llvm/ADT/STLExtras.h"
@@ -21,6 +21,7 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
@@ -47,6 +48,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
 
   SMLoc getLoc() const { return getParser().getTok().getLoc(); }
   bool isRV64() const { return getSTI().hasFeature(RISCV::Feature64Bit); }
+  bool isRV32E() const { return getSTI().hasFeature(RISCV::FeatureRV32E); }
 
   RISCVTargetStreamer &getTargetStreamer() {
     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
@@ -79,14 +81,42 @@ class RISCVAsmParser : public MCTargetAsmParser {
   // synthesize the desired immedate value into the destination register.
   void emitLoadImm(unsigned DestReg, int64_t Value, MCStreamer &Out);
 
+  // Helper to emit a combination of AUIPC and SecondOpcode. Used to implement
+  // helpers such as emitLoadLocalAddress and emitLoadAddress.
+  void emitAuipcInstPair(MCOperand DestReg, MCOperand TmpReg,
+                         const MCExpr *Symbol, RISCVMCExpr::VariantKind VKHi,
+                         unsigned SecondOpcode, SMLoc IDLoc, MCStreamer &Out);
+
   // Helper to emit pseudo instruction "lla" used in PC-rel addressing.
   void emitLoadLocalAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
 
+  // Helper to emit pseudo instruction "la" used in GOT/PC-rel addressing.
+  void emitLoadAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+
+  // Helper to emit pseudo instruction "la.tls.ie" used in initial-exec TLS
+  // addressing.
+  void emitLoadTLSIEAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+
+  // Helper to emit pseudo instruction "la.tls.gd" used in global-dynamic TLS
+  // addressing.
+  void emitLoadTLSGDAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+
+  // Helper to emit pseudo load/store instruction with a symbol.
+  void emitLoadStoreSymbol(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
+                           MCStreamer &Out, bool HasTmpReg);
+
+  // Checks that a PseudoAddTPRel is using x4/tp in its second input operand.
+  // Enforcing this using a restricted register class for the second input
+  // operand of PseudoAddTPRel results in a poor diagnostic due to the fact
+  // 'add' is an overloaded mnemonic.
+  bool checkPseudoAddTPRel(MCInst &Inst, OperandVector &Operands);
+
   /// Helper for processing MC instructions that have been successfully matched
   /// by MatchAndEmitInstruction. Modifications to the emitted instructions,
   /// like the expansion of pseudo instructions (e.g., "li"), can be performed
   /// in this method.
-  bool processInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+  bool processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands,
+                          MCStreamer &Out);
 
 // Auto-generated instruction matching functions
 #define GET_ASSEMBLER_HEADER
@@ -99,6 +129,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
   OperandMatchResultTy parseMemOpBaseReg(OperandVector &Operands);
   OperandMatchResultTy parseOperandWithModifier(OperandVector &Operands);
   OperandMatchResultTy parseBareSymbol(OperandVector &Operands);
+  OperandMatchResultTy parseCallSymbol(OperandVector &Operands);
   OperandMatchResultTy parseJALOffset(OperandVector &Operands);
 
   bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
@@ -269,6 +300,27 @@ public:
            VK == RISCVMCExpr::VK_RISCV_None;
   }
 
+  bool isCallSymbol() const {
+    int64_t Imm;
+    RISCVMCExpr::VariantKind VK;
+    // Must be of 'immediate' type but not a constant.
+    if (!isImm() || evaluateConstantImm(getImm(), Imm, VK))
+      return false;
+    return RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm) &&
+           (VK == RISCVMCExpr::VK_RISCV_CALL ||
+            VK == RISCVMCExpr::VK_RISCV_CALL_PLT);
+  }
+
+  bool isTPRelAddSymbol() const {
+    int64_t Imm;
+    RISCVMCExpr::VariantKind VK;
+    // Must be of 'immediate' type but not a constant.
+    if (!isImm() || evaluateConstantImm(getImm(), Imm, VK))
+      return false;
+    return RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm) &&
+           VK == RISCVMCExpr::VK_RISCV_TPREL_ADD;
+  }
+
   bool isCSRSystemRegister() const { return isSystemRegister(); }
 
   /// Return true if the operand is a valid for the fence instruction e.g.
@@ -463,7 +515,8 @@ public:
       IsValid = isInt<12>(Imm);
     return IsValid && ((IsConstantImm && VK == RISCVMCExpr::VK_RISCV_None) ||
                        VK == RISCVMCExpr::VK_RISCV_LO ||
-                       VK == RISCVMCExpr::VK_RISCV_PCREL_LO);
+                       VK == RISCVMCExpr::VK_RISCV_PCREL_LO ||
+                       VK == RISCVMCExpr::VK_RISCV_TPREL_LO);
   }
 
   bool isSImm12Lsb0() const { return isBareSimmNLsb0<12>(); }
@@ -489,10 +542,12 @@ public:
     bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
     if (!IsConstantImm) {
       IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
-      return IsValid && VK == RISCVMCExpr::VK_RISCV_HI;
+      return IsValid && (VK == RISCVMCExpr::VK_RISCV_HI ||
+                         VK == RISCVMCExpr::VK_RISCV_TPREL_HI);
     } else {
       return isUInt<20>(Imm) && (VK == RISCVMCExpr::VK_RISCV_None ||
-                                 VK == RISCVMCExpr::VK_RISCV_HI);
+                                 VK == RISCVMCExpr::VK_RISCV_HI ||
+                                 VK == RISCVMCExpr::VK_RISCV_TPREL_HI);
     }
   }
 
@@ -505,10 +560,16 @@ public:
     bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
     if (!IsConstantImm) {
       IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
-      return IsValid && VK == RISCVMCExpr::VK_RISCV_PCREL_HI;
+      return IsValid && (VK == RISCVMCExpr::VK_RISCV_PCREL_HI ||
+                         VK == RISCVMCExpr::VK_RISCV_GOT_HI ||
+                         VK == RISCVMCExpr::VK_RISCV_TLS_GOT_HI ||
+                         VK == RISCVMCExpr::VK_RISCV_TLS_GD_HI);
     } else {
       return isUInt<20>(Imm) && (VK == RISCVMCExpr::VK_RISCV_None ||
-                                 VK == RISCVMCExpr::VK_RISCV_PCREL_HI);
+                                 VK == RISCVMCExpr::VK_RISCV_PCREL_HI ||
+                                 VK == RISCVMCExpr::VK_RISCV_GOT_HI ||
+                                 VK == RISCVMCExpr::VK_RISCV_TLS_GOT_HI ||
+                                 VK == RISCVMCExpr::VK_RISCV_TLS_GD_HI);
     }
   }
 
@@ -753,7 +814,7 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   default:
     break;
   case Match_Success:
-    return processInstruction(Inst, IDLoc, Out);
+    return processInstruction(Inst, IDLoc, Operands, Out);
   case Match_MissingFeature:
     return Error(IDLoc, "instruction use requires an option to be enabled");
   case Match_MnemonicFail:
@@ -844,8 +905,8 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   case Match_InvalidSImm12:
     return generateImmOutOfRangeError(
         Operands, ErrorInfo, -(1 << 11), (1 << 11) - 1,
-        "operand must be a symbol with %lo/%pcrel_lo modifier or an integer in "
-        "the range");
+        "operand must be a symbol with %lo/%pcrel_lo/%tprel_lo modifier or an "
+        "integer in the range");
   case Match_InvalidSImm12Lsb0:
     return generateImmOutOfRangeError(
         Operands, ErrorInfo, -(1 << 11), (1 << 11) - 2,
@@ -856,13 +917,15 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
         "immediate must be a multiple of 2 bytes in the range");
   case Match_InvalidUImm20LUI:
     return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 20) - 1,
-                                      "operand must be a symbol with %hi() "
-                                      "modifier or an integer in the range");
+                                      "operand must be a symbol with "
+                                      "%hi/%tprel_hi modifier or an integer in "
+                                      "the range");
   case Match_InvalidUImm20AUIPC:
     return generateImmOutOfRangeError(
         Operands, ErrorInfo, 0, (1 << 20) - 1,
-        "operand must be a symbol with %pcrel_hi() modifier or an integer in "
-        "the range");
+        "operand must be a symbol with a "
+        "%pcrel_hi/%got_pcrel_hi/%tls_ie_pcrel_hi/%tls_gd_pcrel_hi modifier or "
+        "an integer in the range");
   case Match_InvalidSImm21Lsb0JAL:
     return generateImmOutOfRangeError(
         Operands, ErrorInfo, -(1 << 20), (1 << 20) - 2,
@@ -888,11 +951,33 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
     return Error(ErrorLoc, "operand must be a bare symbol name");
   }
+  case Match_InvalidCallSymbol: {
+    SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
+    return Error(ErrorLoc, "operand must be a bare symbol name");
+  }
+  case Match_InvalidTPRelAddSymbol: {
+    SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
+    return Error(ErrorLoc, "operand must be a symbol with %tprel_add modifier");
+  }
   }
 
   llvm_unreachable("Unknown match type detected!");
 }
 
+// Attempts to match Name as a register (either using the default name or
+// alternative ABI names), setting RegNo to the matching register. Upon
+// failure, returns true and sets RegNo to 0. If IsRV32E then registers
+// x16-x31 will be rejected.
+static bool matchRegisterNameHelper(bool IsRV32E, unsigned &RegNo,
+                                    StringRef Name) {
+  RegNo = MatchRegisterName(Name);
+  if (RegNo == 0)
+    RegNo = MatchRegisterAltName(Name);
+  if (IsRV32E && RegNo >= RISCV::X16 && RegNo <= RISCV::X31)
+    RegNo = 0;
+  return RegNo == 0;
+}
+
 bool RISCVAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
                                    SMLoc &EndLoc) {
   const AsmToken &Tok = getParser().getTok();
@@ -901,42 +986,45 @@ bool RISCVAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
   RegNo = 0;
   StringRef Name = getLexer().getTok().getIdentifier();
 
-  if (!MatchRegisterName(Name) || !MatchRegisterAltName(Name)) {
-    getParser().Lex(); // Eat identifier token.
-    return false;
-  }
+  if (matchRegisterNameHelper(isRV32E(), RegNo, Name))
+    return Error(StartLoc, "invalid register name");
 
-  return Error(StartLoc, "invalid register name");
+  getParser().Lex(); // Eat identifier token.
+  return false;
 }
 
 OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands,
                                                    bool AllowParens) {
   SMLoc FirstS = getLoc();
   bool HadParens = false;
-  AsmToken Buf[2];
+  AsmToken LParen;
 
-  // If this a parenthesised register name is allowed, parse it atomically
+  // If this is an LParen and a parenthesised register name is allowed, parse it
+  // atomically.
   if (AllowParens && getLexer().is(AsmToken::LParen)) {
+    AsmToken Buf[2];
     size_t ReadCount = getLexer().peekTokens(Buf);
     if (ReadCount == 2 && Buf[1].getKind() == AsmToken::RParen) {
       HadParens = true;
+      LParen = getParser().getTok();
       getParser().Lex(); // Eat '('
     }
   }
 
   switch (getLexer().getKind()) {
   default:
+    if (HadParens)
+      getLexer().UnLex(LParen);
     return MatchOperand_NoMatch;
   case AsmToken::Identifier:
     StringRef Name = getLexer().getTok().getIdentifier();
-    unsigned RegNo = MatchRegisterName(Name);
+    unsigned RegNo;
+    matchRegisterNameHelper(isRV32E(), RegNo, Name);
+
     if (RegNo == 0) {
-      RegNo = MatchRegisterAltName(Name);
-      if (RegNo == 0) {
-        if (HadParens)
-          getLexer().UnLex(Buf[0]);
-        return MatchOperand_NoMatch;
-      }
+      if (HadParens)
+        getLexer().UnLex(LParen);
+      return MatchOperand_NoMatch;
     }
     if (HadParens)
       Operands.push_back(RISCVOperand::createToken("(", FirstS, isRV64()));
@@ -965,6 +1053,8 @@ RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) {
   case AsmToken::LParen:
   case AsmToken::Minus:
   case AsmToken::Plus:
+  case AsmToken::Exclaim:
+  case AsmToken::Tilde:
   case AsmToken::Integer:
   case AsmToken::String: {
     if (getParser().parseExpression(Res))
@@ -1029,8 +1119,11 @@ OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) {
   default:
     return MatchOperand_NoMatch;
   case AsmToken::LParen:
+  case AsmToken::Dot:
   case AsmToken::Minus:
   case AsmToken::Plus:
+  case AsmToken::Exclaim:
+  case AsmToken::Tilde:
   case AsmToken::Integer:
   case AsmToken::String:
   case AsmToken::Identifier:
@@ -1093,12 +1186,55 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
   if (getLexer().getKind() != AsmToken::Identifier)
     return MatchOperand_NoMatch;
 
+  StringRef Identifier;
+  AsmToken Tok = getLexer().getTok();
+
+  if (getParser().parseIdentifier(Identifier))
+    return MatchOperand_ParseFail;
+
+  if (Identifier.consume_back("@plt")) {
+    Error(getLoc(), "'@plt' operand not valid for instruction");
+    return MatchOperand_ParseFail;
+  }
+
+  MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
+
+  if (Sym->isVariable()) {
+    const MCExpr *V = Sym->getVariableValue(/*SetUsed=*/false);
+    if (!isa<MCSymbolRefExpr>(V)) {
+      getLexer().UnLex(Tok); // Put back if it's not a bare symbol.
+      return MatchOperand_NoMatch;
+    }
+    Res = V;
+  } else
+    Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+  Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
+  return MatchOperand_Success;
+}
+
+OperandMatchResultTy RISCVAsmParser::parseCallSymbol(OperandVector &Operands) {
+  SMLoc S = getLoc();
+  SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+  const MCExpr *Res;
+
+  if (getLexer().getKind() != AsmToken::Identifier)
+    return MatchOperand_NoMatch;
+
+  // Avoid parsing the register in `call rd, foo` as a call symbol.
+  if (getLexer().peekTok().getKind() != AsmToken::EndOfStatement)
+    return MatchOperand_NoMatch;
+
   StringRef Identifier;
   if (getParser().parseIdentifier(Identifier))
     return MatchOperand_ParseFail;
 
+  RISCVMCExpr::VariantKind Kind = RISCVMCExpr::VK_RISCV_CALL;
+  if (Identifier.consume_back("@plt"))
+    Kind = RISCVMCExpr::VK_RISCV_CALL_PLT;
+
   MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
   Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+  Res = RISCVMCExpr::create(Res, Kind, getContext());
   Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
   return MatchOperand_Success;
 }
@@ -1408,42 +1544,144 @@ void RISCVAsmParser::emitLoadImm(unsigned DestReg, int64_t Value,
   }
 }
 
-void RISCVAsmParser::emitLoadLocalAddress(MCInst &Inst, SMLoc IDLoc,
-                                          MCStreamer &Out) {
-  // The local load address pseudo-instruction "lla" is used in PC-relative
-  // addressing of symbols:
-  //   lla rdest, symbol
-  // expands to
-  //   TmpLabel: AUIPC rdest, %pcrel_hi(symbol)
-  //             ADDI rdest, %pcrel_lo(TmpLabel)
+void RISCVAsmParser::emitAuipcInstPair(MCOperand DestReg, MCOperand TmpReg,
+                                       const MCExpr *Symbol,
+                                       RISCVMCExpr::VariantKind VKHi,
+                                       unsigned SecondOpcode, SMLoc IDLoc,
+                                       MCStreamer &Out) {
+  // A pair of instructions for PC-relative addressing; expands to
+  //   TmpLabel: AUIPC TmpReg, VKHi(symbol)
+  //             OP DestReg, TmpReg, %pcrel_lo(TmpLabel)
   MCContext &Ctx = getContext();
 
   MCSymbol *TmpLabel = Ctx.createTempSymbol(
       "pcrel_hi", /* AlwaysAddSuffix */ true, /* CanBeUnnamed */ false);
   Out.EmitLabel(TmpLabel);
 
-  MCOperand DestReg = Inst.getOperand(0);
-  const RISCVMCExpr *Symbol = RISCVMCExpr::create(
-      Inst.getOperand(1).getExpr(), RISCVMCExpr::VK_RISCV_PCREL_HI, Ctx);
-
+  const RISCVMCExpr *SymbolHi = RISCVMCExpr::create(Symbol, VKHi, Ctx);
   emitToStreamer(
-      Out, MCInstBuilder(RISCV::AUIPC).addOperand(DestReg).addExpr(Symbol));
+      Out, MCInstBuilder(RISCV::AUIPC).addOperand(TmpReg).addExpr(SymbolHi));
 
   const MCExpr *RefToLinkTmpLabel =
       RISCVMCExpr::create(MCSymbolRefExpr::create(TmpLabel, Ctx),
                           RISCVMCExpr::VK_RISCV_PCREL_LO, Ctx);
 
-  emitToStreamer(Out, MCInstBuilder(RISCV::ADDI)
-                          .addOperand(DestReg)
+  emitToStreamer(Out, MCInstBuilder(SecondOpcode)
                           .addOperand(DestReg)
+                          .addOperand(TmpReg)
                           .addExpr(RefToLinkTmpLabel));
 }
 
+void RISCVAsmParser::emitLoadLocalAddress(MCInst &Inst, SMLoc IDLoc,
+                                          MCStreamer &Out) {
+  // The load local address pseudo-instruction "lla" is used in PC-relative
+  // addressing of local symbols:
+  //   lla rdest, symbol
+  // expands to
+  //   TmpLabel: AUIPC rdest, %pcrel_hi(symbol)
+  //             ADDI rdest, rdest, %pcrel_lo(TmpLabel)
+  MCOperand DestReg = Inst.getOperand(0);
+  const MCExpr *Symbol = Inst.getOperand(1).getExpr();
+  emitAuipcInstPair(DestReg, DestReg, Symbol, RISCVMCExpr::VK_RISCV_PCREL_HI,
+                    RISCV::ADDI, IDLoc, Out);
+}
+
+void RISCVAsmParser::emitLoadAddress(MCInst &Inst, SMLoc IDLoc,
+                                     MCStreamer &Out) {
+  // The load address pseudo-instruction "la" is used in PC-relative and
+  // GOT-indirect addressing of global symbols:
+  //   la rdest, symbol
+  // expands to either (for non-PIC)
+  //   TmpLabel: AUIPC rdest, %pcrel_hi(symbol)
+  //             ADDI rdest, rdest, %pcrel_lo(TmpLabel)
+  // or (for PIC)
+  //   TmpLabel: AUIPC rdest, %got_pcrel_hi(symbol)
+  //             Lx rdest, %pcrel_lo(TmpLabel)(rdest)
+  MCOperand DestReg = Inst.getOperand(0);
+  const MCExpr *Symbol = Inst.getOperand(1).getExpr();
+  unsigned SecondOpcode;
+  RISCVMCExpr::VariantKind VKHi;
+  // FIXME: Should check .option (no)pic when implemented
+  if (getContext().getObjectFileInfo()->isPositionIndependent()) {
+    SecondOpcode = isRV64() ? RISCV::LD : RISCV::LW;
+    VKHi = RISCVMCExpr::VK_RISCV_GOT_HI;
+  } else {
+    SecondOpcode = RISCV::ADDI;
+    VKHi = RISCVMCExpr::VK_RISCV_PCREL_HI;
+  }
+  emitAuipcInstPair(DestReg, DestReg, Symbol, VKHi, SecondOpcode, IDLoc, Out);
+}
+
+void RISCVAsmParser::emitLoadTLSIEAddress(MCInst &Inst, SMLoc IDLoc,
+                                          MCStreamer &Out) {
+  // The load TLS IE address pseudo-instruction "la.tls.ie" is used in
+  // initial-exec TLS model addressing of global symbols:
+  //   la.tls.ie rdest, symbol
+  // expands to
+  //   TmpLabel: AUIPC rdest, %tls_ie_pcrel_hi(symbol)
+  //             Lx rdest, %pcrel_lo(TmpLabel)(rdest)
+  MCOperand DestReg = Inst.getOperand(0);
+  const MCExpr *Symbol = Inst.getOperand(1).getExpr();
+  unsigned SecondOpcode = isRV64() ? RISCV::LD : RISCV::LW;
+  emitAuipcInstPair(DestReg, DestReg, Symbol, RISCVMCExpr::VK_RISCV_TLS_GOT_HI,
+                    SecondOpcode, IDLoc, Out);
+}
+
+void RISCVAsmParser::emitLoadTLSGDAddress(MCInst &Inst, SMLoc IDLoc,
+                                          MCStreamer &Out) {
+  // The load TLS GD address pseudo-instruction "la.tls.gd" is used in
+  // global-dynamic TLS model addressing of global symbols:
+  //   la.tls.gd rdest, symbol
+  // expands to
+  //   TmpLabel: AUIPC rdest, %tls_gd_pcrel_hi(symbol)
+  //             ADDI rdest, rdest, %pcrel_lo(TmpLabel)
+  MCOperand DestReg = Inst.getOperand(0);
+  const MCExpr *Symbol = Inst.getOperand(1).getExpr();
+  emitAuipcInstPair(DestReg, DestReg, Symbol, RISCVMCExpr::VK_RISCV_TLS_GD_HI,
+                    RISCV::ADDI, IDLoc, Out);
+}
+
+void RISCVAsmParser::emitLoadStoreSymbol(MCInst &Inst, unsigned Opcode,
+                                         SMLoc IDLoc, MCStreamer &Out,
+                                         bool HasTmpReg) {
+  // The load/store pseudo-instruction does a pc-relative load with
+  // a symbol.
+  //
+  // The expansion looks like this
+  //
+  //   TmpLabel: AUIPC tmp, %pcrel_hi(symbol)
+  //             [S|L]X    rd, %pcrel_lo(TmpLabel)(tmp)
+  MCOperand DestReg = Inst.getOperand(0);
+  unsigned SymbolOpIdx = HasTmpReg ? 2 : 1;
+  unsigned TmpRegOpIdx = HasTmpReg ? 1 : 0;
+  MCOperand TmpReg = Inst.getOperand(TmpRegOpIdx);
+  const MCExpr *Symbol = Inst.getOperand(SymbolOpIdx).getExpr();
+  emitAuipcInstPair(DestReg, TmpReg, Symbol, RISCVMCExpr::VK_RISCV_PCREL_HI,
+                    Opcode, IDLoc, Out);
+}
+
+bool RISCVAsmParser::checkPseudoAddTPRel(MCInst &Inst,
+                                         OperandVector &Operands) {
+  assert(Inst.getOpcode() == RISCV::PseudoAddTPRel && "Invalid instruction");
+  assert(Inst.getOperand(2).isReg() && "Unexpected second operand kind");
+  if (Inst.getOperand(2).getReg() != RISCV::X4) {
+    SMLoc ErrorLoc = ((RISCVOperand &)*Operands[3]).getStartLoc();
+    return Error(ErrorLoc, "the second input operand must be tp/x4 when using "
+                           "%tprel_add modifier");
+  }
+
+  return false;
+}
+
 bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
+                                        OperandVector &Operands,
                                         MCStreamer &Out) {
   Inst.setLoc(IDLoc);
 
-  if (Inst.getOpcode() == RISCV::PseudoLI) {
+  switch (Inst.getOpcode()) {
+  default:
+    break;
+  case RISCV::PseudoLI: {
     unsigned Reg = Inst.getOperand(0).getReg();
     const MCOperand &Op1 = Inst.getOperand(1);
     if (Op1.isExpr()) {
@@ -1463,9 +1701,68 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
       Imm = SignExtend64<32>(Imm);
     emitLoadImm(Reg, Imm, Out);
     return false;
-  } else if (Inst.getOpcode() == RISCV::PseudoLLA) {
+  }
+  case RISCV::PseudoLLA:
     emitLoadLocalAddress(Inst, IDLoc, Out);
     return false;
+  case RISCV::PseudoLA:
+    emitLoadAddress(Inst, IDLoc, Out);
+    return false;
+  case RISCV::PseudoLA_TLS_IE:
+    emitLoadTLSIEAddress(Inst, IDLoc, Out);
+    return false;
+  case RISCV::PseudoLA_TLS_GD:
+    emitLoadTLSGDAddress(Inst, IDLoc, Out);
+    return false;
+  case RISCV::PseudoLB:
+    emitLoadStoreSymbol(Inst, RISCV::LB, IDLoc, Out, /*HasTmpReg=*/false);
+    return false;
+  case RISCV::PseudoLBU:
+    emitLoadStoreSymbol(Inst, RISCV::LBU, IDLoc, Out, /*HasTmpReg=*/false);
+    return false;
+  case RISCV::PseudoLH:
+    emitLoadStoreSymbol(Inst, RISCV::LH, IDLoc, Out, /*HasTmpReg=*/false);
+    return false;
+  case RISCV::PseudoLHU:
+    emitLoadStoreSymbol(Inst, RISCV::LHU, IDLoc, Out, /*HasTmpReg=*/false);
+    return false;
+  case RISCV::PseudoLW:
+    emitLoadStoreSymbol(Inst, RISCV::LW, IDLoc, Out, /*HasTmpReg=*/false);
+    return false;
+  case RISCV::PseudoLWU:
+    emitLoadStoreSymbol(Inst, RISCV::LWU, IDLoc, Out, /*HasTmpReg=*/false);
+    return false;
+  case RISCV::PseudoLD:
+    emitLoadStoreSymbol(Inst, RISCV::LD, IDLoc, Out, /*HasTmpReg=*/false);
+    return false;
+  case RISCV::PseudoFLW:
+    emitLoadStoreSymbol(Inst, RISCV::FLW, IDLoc, Out, /*HasTmpReg=*/true);
+    return false;
+  case RISCV::PseudoFLD:
+    emitLoadStoreSymbol(Inst, RISCV::FLD, IDLoc, Out, /*HasTmpReg=*/true);
+    return false;
+  case RISCV::PseudoSB:
+    emitLoadStoreSymbol(Inst, RISCV::SB, IDLoc, Out, /*HasTmpReg=*/true);
+    return false;
+  case RISCV::PseudoSH:
+    emitLoadStoreSymbol(Inst, RISCV::SH, IDLoc, Out, /*HasTmpReg=*/true);
+    return false;
+  case RISCV::PseudoSW:
+    emitLoadStoreSymbol(Inst, RISCV::SW, IDLoc, Out, /*HasTmpReg=*/true);
+    return false;
+  case RISCV::PseudoSD:
+    emitLoadStoreSymbol(Inst, RISCV::SD, IDLoc, Out, /*HasTmpReg=*/true);
+    return false;
+  case RISCV::PseudoFSW:
+    emitLoadStoreSymbol(Inst, RISCV::FSW, IDLoc, Out, /*HasTmpReg=*/true);
+    return false;
+  case RISCV::PseudoFSD:
+    emitLoadStoreSymbol(Inst, RISCV::FSD, IDLoc, Out, /*HasTmpReg=*/true);
+    return false;
+  case RISCV::PseudoAddTPRel:
+    if (checkPseudoAddTPRel(Inst, Operands))
+      return true;
+    break;
   }
 
   emitToStreamer(Out, Inst);
diff --git a/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index eafa09d56315..36200c03f703 100644
--- a/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -1,9 +1,8 @@
 //===-- RISCVDisassembler.cpp - Disassembler for RISCV --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "TargetInfo/RISCVTargetInfo.h"
 #include "Utils/RISCVBaseInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -70,7 +70,13 @@ static const unsigned GPRDecoderTable[] = {
 static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo,
                                            uint64_t Address,
                                            const void *Decoder) {
-  if (RegNo > sizeof(GPRDecoderTable))
+  const FeatureBitset &FeatureBits =
+      static_cast<const MCDisassembler *>(Decoder)
+          ->getSubtargetInfo()
+          .getFeatureBits();
+  bool IsRV32E = FeatureBits[RISCV::FeatureRV32E];
+
+  if (RegNo > array_lengthof(GPRDecoderTable) || (IsRV32E && RegNo > 15))
     return MCDisassembler::Fail;
 
   // We must define our own mapping from RegNo to register identifier.
@@ -95,7 +101,7 @@ static const unsigned FPR32DecoderTable[] = {
 static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo,
                                              uint64_t Address,
                                              const void *Decoder) {
-  if (RegNo > sizeof(FPR32DecoderTable))
+  if (RegNo > array_lengthof(FPR32DecoderTable))
     return MCDisassembler::Fail;
 
   // We must define our own mapping from RegNo to register identifier.
@@ -131,7 +137,7 @@ static const unsigned FPR64DecoderTable[] = {
 static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo,
                                              uint64_t Address,
                                              const void *Decoder) {
-  if (RegNo > sizeof(FPR64DecoderTable))
+  if (RegNo > array_lengthof(FPR64DecoderTable))
     return MCDisassembler::Fail;
 
   // We must define our own mapping from RegNo to register identifier.
diff --git a/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp b/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp
deleted file mode 100644
index 979c8f4e2fa7..000000000000
--- a/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-//===-- RISCVInstPrinter.cpp - Convert RISCV MCInst to asm syntax ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an RISCV MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "RISCVInstPrinter.h"
-#include "MCTargetDesc/RISCVMCExpr.h"
-#include "Utils/RISCVBaseInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-// Include the auto-generated portion of the assembly writer.
-#define PRINT_ALIAS_INSTR
-#include "RISCVGenAsmWriter.inc"
-
-// Include the auto-generated portion of the compress emitter.
-#define GEN_UNCOMPRESS_INSTR
-#include "RISCVGenCompressInstEmitter.inc"
-
-static cl::opt<bool>
-    NoAliases("riscv-no-aliases",
-              cl::desc("Disable the emission of assembler pseudo instructions"),
-              cl::init(false), cl::Hidden);
-
-void RISCVInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                 StringRef Annot, const MCSubtargetInfo &STI) {
-  bool Res = false;
-  const MCInst *NewMI = MI;
-  MCInst UncompressedMI;
-  if (!NoAliases)
-    Res = uncompressInst(UncompressedMI, *MI, MRI, STI);
-  if (Res)
-    NewMI = const_cast<MCInst *>(&UncompressedMI);
-  if (NoAliases || !printAliasInstr(NewMI, STI, O))
-    printInstruction(NewMI, STI, O);
-  printAnnotation(O, Annot);
-}
-
-void RISCVInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
-  O << getRegisterName(RegNo);
-}
-
-void RISCVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                    const MCSubtargetInfo &STI, raw_ostream &O,
-                                    const char *Modifier) {
-  assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
-  const MCOperand &MO = MI->getOperand(OpNo);
-
-  if (MO.isReg()) {
-    printRegName(O, MO.getReg());
-    return;
-  }
-
-  if (MO.isImm()) {
-    O << MO.getImm();
-    return;
-  }
-
-  assert(MO.isExpr() && "Unknown operand kind in printOperand");
-  MO.getExpr()->print(O, &MAI);
-}
-
-void RISCVInstPrinter::printCSRSystemRegister(const MCInst *MI, unsigned OpNo,
-                                              const MCSubtargetInfo &STI,
-                                              raw_ostream &O) {
-  unsigned Imm = MI->getOperand(OpNo).getImm();
-  auto SysReg = RISCVSysReg::lookupSysRegByEncoding(Imm);
-  if (SysReg && SysReg->haveRequiredFeatures(STI.getFeatureBits()))
-    O << SysReg->Name;
-  else
-    O << Imm;
-}
-
-void RISCVInstPrinter::printFenceArg(const MCInst *MI, unsigned OpNo,
-                                     const MCSubtargetInfo &STI,
-                                     raw_ostream &O) {
-  unsigned FenceArg = MI->getOperand(OpNo).getImm();
-  assert (((FenceArg >> 4) == 0) && "Invalid immediate in printFenceArg");
-
-  if ((FenceArg & RISCVFenceField::I) != 0)
-    O << 'i';
-  if ((FenceArg & RISCVFenceField::O) != 0)
-    O << 'o';
-  if ((FenceArg & RISCVFenceField::R) != 0)
-    O << 'r';
-  if ((FenceArg & RISCVFenceField::W) != 0)
-    O << 'w';
-  if (FenceArg == 0)
-    O << "unknown";
-}
-
-void RISCVInstPrinter::printFRMArg(const MCInst *MI, unsigned OpNo,
-                                   const MCSubtargetInfo &STI, raw_ostream &O) {
-  auto FRMArg =
-      static_cast<RISCVFPRndMode::RoundingMode>(MI->getOperand(OpNo).getImm());
-  O << RISCVFPRndMode::roundingModeToString(FRMArg);
-}
diff --git a/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h b/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h
deleted file mode 100644
index 0f9bed184996..000000000000
--- a/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h
+++ /dev/null
@@ -1,55 +0,0 @@
-//===-- RISCVInstPrinter.h - Convert RISCV MCInst to asm syntax ---*- C++ -*--//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints a RISCV MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_RISCV_INSTPRINTER_RISCVINSTPRINTER_H
-#define LLVM_LIB_TARGET_RISCV_INSTPRINTER_RISCVINSTPRINTER_H
-
-#include "MCTargetDesc/RISCVMCTargetDesc.h"
-#include "llvm/MC/MCInstPrinter.h"
-
-namespace llvm {
-class MCOperand;
-
-class RISCVInstPrinter : public MCInstPrinter {
-public:
-  RISCVInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                   const MCRegisterInfo &MRI)
-      : MCInstPrinter(MAI, MII, MRI) {}
-
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
-  void printRegName(raw_ostream &O, unsigned RegNo) const override;
-
-  void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                    raw_ostream &O, const char *Modifier = nullptr);
-  void printCSRSystemRegister(const MCInst *MI, unsigned OpNo,
-                              const MCSubtargetInfo &STI, raw_ostream &O);
-  void printFenceArg(const MCInst *MI, unsigned OpNo,
-                     const MCSubtargetInfo &STI, raw_ostream &O);
-  void printFRMArg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                   raw_ostream &O);
-
-  // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
-                        raw_ostream &O);
-  bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
-                       raw_ostream &O);
-  void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
-                               unsigned PrintMethodIdx,
-                               const MCSubtargetInfo &STI, raw_ostream &O);
-  static const char *getRegisterName(unsigned RegNo,
-                                     unsigned AltIdx = RISCV::ABIRegAltName);
-};
-} // namespace llvm
-
-#endif
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 7672fea5d95b..ee5f760ebcb0 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -1,9 +1,8 @@
 //===-- RISCVAsmBackend.cpp - RISCV Assembler Backend ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -17,6 +16,7 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -33,6 +33,10 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
   switch ((unsigned)Fixup.getKind()) {
   default:
     break;
+  case RISCV::fixup_riscv_got_hi20:
+  case RISCV::fixup_riscv_tls_got_hi20:
+  case RISCV::fixup_riscv_tls_gd_hi20:
+    return true;
   case RISCV::fixup_riscv_pcrel_lo12_i:
   case RISCV::fixup_riscv_pcrel_lo12_s:
     // For pcrel_lo12, force a relocation if the target of the corresponding
@@ -48,6 +52,11 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
     default:
       llvm_unreachable("Unexpected fixup kind for pcrel_lo12");
       break;
+    case RISCV::fixup_riscv_got_hi20:
+    case RISCV::fixup_riscv_tls_got_hi20:
+    case RISCV::fixup_riscv_tls_gd_hi20:
+      ShouldForce = true;
+      break;
     case RISCV::fixup_riscv_pcrel_hi20:
       ShouldForce = T->getValue()->findAssociatedFragment() !=
                     Fixup.getValue()->findAssociatedFragment();
@@ -153,16 +162,12 @@ bool RISCVAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
     return false;
 
   // The canonical nop on RISC-V is addi x0, x0, 0.
-  uint64_t Nop32Count = Count / 4;
-  for (uint64_t i = Nop32Count; i != 0; --i)
+  for (; Count >= 4; Count -= 4)
     OS.write("\x13\0\0\0", 4);
 
   // The canonical nop on RVC is c.nop.
-  if (HasStdExtC) {
-    uint64_t Nop16Count = (Count - Nop32Count * 4) / 2;
-    for (uint64_t i = Nop16Count; i != 0; --i)
-      OS.write("\x01\0", 2);
-  }
+  if (Count && HasStdExtC)
+    OS.write("\x01\0", 2);
 
   return true;
 }
@@ -173,6 +178,10 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
   switch (Kind) {
   default:
     llvm_unreachable("Unknown fixup kind!");
+  case RISCV::fixup_riscv_got_hi20:
+  case RISCV::fixup_riscv_tls_got_hi20:
+  case RISCV::fixup_riscv_tls_gd_hi20:
+    llvm_unreachable("Relocation should be unconditionally forced\n");
   case FK_Data_1:
   case FK_Data_2:
   case FK_Data_4:
@@ -180,12 +189,15 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
     return Value;
   case RISCV::fixup_riscv_lo12_i:
   case RISCV::fixup_riscv_pcrel_lo12_i:
+  case RISCV::fixup_riscv_tprel_lo12_i:
     return Value & 0xfff;
   case RISCV::fixup_riscv_lo12_s:
   case RISCV::fixup_riscv_pcrel_lo12_s:
+  case RISCV::fixup_riscv_tprel_lo12_s:
     return (((Value >> 5) & 0x7f) << 25) | ((Value & 0x1f) << 7);
   case RISCV::fixup_riscv_hi20:
   case RISCV::fixup_riscv_pcrel_hi20:
+  case RISCV::fixup_riscv_tprel_hi20:
     // Add 1 if bit 11 is 1, to compensate for low 12 bits being negative.
     return ((Value + 0x800) >> 12) & 0xfffff;
   case RISCV::fixup_riscv_jal: {
@@ -223,7 +235,8 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
     Value = (Sbit << 31) | (Mid6 << 25) | (Lo4 << 8) | (Hi1 << 7);
     return Value;
   }
-  case RISCV::fixup_riscv_call: {
+  case RISCV::fixup_riscv_call:
+  case RISCV::fixup_riscv_call_plt: {
     // Jalr will add UpperImm with the sign-extended 12-bit LowerImm,
     // we need to add 0x800ULL before extract upper bits to reflect the
     // effect of the sign extension.
@@ -287,6 +300,60 @@ void RISCVAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
   }
 }
 
+// Linker relaxation may change code size. We have to insert Nops
+// for .align directive when linker relaxation enabled. So then Linker
+// could satisfy alignment by removing Nops.
+// The function return the total Nops Size we need to insert.
+bool RISCVAsmBackend::shouldInsertExtraNopBytesForCodeAlign(
+    const MCAlignFragment &AF, unsigned &Size) {
+  // Calculate Nops Size only when linker relaxation enabled.
+  if (!STI.getFeatureBits()[RISCV::FeatureRelax])
+    return false;
+
+  bool HasStdExtC = STI.getFeatureBits()[RISCV::FeatureStdExtC];
+  unsigned MinNopLen = HasStdExtC ? 2 : 4;
+
+  if (AF.getAlignment() <= MinNopLen) {
+    return false;
+  } else {
+    Size = AF.getAlignment() - MinNopLen;
+    return true;
+  }
+}
+
+// We need to insert R_RISCV_ALIGN relocation type to indicate the
+// position of Nops and the total bytes of the Nops have been inserted
+// when linker relaxation enabled.
+// The function insert fixup_riscv_align fixup which eventually will
+// transfer to R_RISCV_ALIGN relocation type.
+bool RISCVAsmBackend::shouldInsertFixupForCodeAlign(MCAssembler &Asm,
+                                                    const MCAsmLayout &Layout,
+                                                    MCAlignFragment &AF) {
+  // Insert the fixup only when linker relaxation enabled.
+  if (!STI.getFeatureBits()[RISCV::FeatureRelax])
+    return false;
+
+  // Calculate total Nops we need to insert. If there are none to insert
+  // then simply return.
+  unsigned Count;
+  if (!shouldInsertExtraNopBytesForCodeAlign(AF, Count) || (Count == 0))
+    return false;
+
+  MCContext &Ctx = Asm.getContext();
+  const MCExpr *Dummy = MCConstantExpr::create(0, Ctx);
+  // Create fixup_riscv_align fixup.
+  MCFixup Fixup =
+      MCFixup::create(0, Dummy, MCFixupKind(RISCV::fixup_riscv_align), SMLoc());
+
+  uint64_t FixedValue = 0;
+  MCValue NopBytes = MCValue::get(Count);
+
+  Asm.getWriter().recordRelocation(Asm, Layout, &AF, Fixup, NopBytes,
+                                   FixedValue);
+
+  return true;
+}
+
 std::unique_ptr<MCObjectTargetWriter>
 RISCVAsmBackend::createObjectTargetWriter() const {
   return createRISCVELFObjectWriter(OSABI, Is64Bit);
@@ -298,5 +365,5 @@ MCAsmBackend *llvm::createRISCVAsmBackend(const Target &T,
                                           const MCTargetOptions &Options) {
   const Triple &TT = STI.getTargetTriple();
   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
-  return new RISCVAsmBackend(STI, OSABI, TT.isArch64Bit());
+  return new RISCVAsmBackend(STI, OSABI, TT.isArch64Bit(), Options);
 }
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
index b98e45f4053f..254249c87dc8 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -1,9 +1,8 @@
 //===-- RISCVAsmBackend.h - RISCV Assembler Backend -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -12,6 +11,7 @@
 
 #include "MCTargetDesc/RISCVFixupKinds.h"
 #include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "Utils/RISCVBaseInfo.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
@@ -26,21 +26,45 @@ class RISCVAsmBackend : public MCAsmBackend {
   uint8_t OSABI;
   bool Is64Bit;
   bool ForceRelocs = false;
+  const MCTargetOptions &TargetOptions;
+  RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown;
 
 public:
-  RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit)
-      : MCAsmBackend(support::little), STI(STI), OSABI(OSABI),
-        Is64Bit(Is64Bit) {}
+  RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit,
+                  const MCTargetOptions &Options)
+      : MCAsmBackend(support::little), STI(STI), OSABI(OSABI), Is64Bit(Is64Bit),
+        TargetOptions(Options) {
+    TargetABI = RISCVABI::computeTargetABI(
+        STI.getTargetTriple(), STI.getFeatureBits(), Options.getABIName());
+    RISCVFeatures::validate(STI.getTargetTriple(), STI.getFeatureBits());
+  }
   ~RISCVAsmBackend() override {}
 
   void setForceRelocs() { ForceRelocs = true; }
 
+  // Returns true if relocations will be forced for shouldForceRelocation by
+  // default. This will be true if relaxation is enabled or had previously
+  // been enabled.
+  bool willForceRelocations() const {
+    return ForceRelocs || STI.getFeatureBits()[RISCV::FeatureRelax];
+  }
+
   // Generate diff expression relocations if the relax feature is enabled or had
   // previously been enabled, otherwise it is safe for the assembler to
   // calculate these internally.
   bool requiresDiffExpressionRelocations() const override {
-    return STI.getFeatureBits()[RISCV::FeatureRelax] || ForceRelocs;
+    return willForceRelocations();
   }
+
+  // Return Size with extra Nop Bytes for alignment directive in code section.
+  bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF,
+                                             unsigned &Size) override;
+
+  // Insert target specific fixup type for alignment directive in code section.
+  bool shouldInsertFixupForCodeAlign(MCAssembler &Asm,
+                                     const MCAsmLayout &Layout,
+                                     MCAlignFragment &AF) override;
+
   void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
                   const MCValue &Target, MutableArrayRef<char> Data,
                   uint64_t Value, bool IsResolved,
@@ -80,12 +104,21 @@ public:
       { "fixup_riscv_pcrel_hi20",   12,     20,  MCFixupKindInfo::FKF_IsPCRel },
       { "fixup_riscv_pcrel_lo12_i", 20,     12,  MCFixupKindInfo::FKF_IsPCRel },
       { "fixup_riscv_pcrel_lo12_s",  0,     32,  MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_riscv_got_hi20",     12,     20,  MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_riscv_tprel_hi20",   12,     20,  0 },
+      { "fixup_riscv_tprel_lo12_i", 20,     12,  0 },
+      { "fixup_riscv_tprel_lo12_s",  0,     32,  0 },
+      { "fixup_riscv_tprel_add",     0,      0,  0 },
+      { "fixup_riscv_tls_got_hi20", 12,     20,  MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_riscv_tls_gd_hi20",  12,     20,  MCFixupKindInfo::FKF_IsPCRel },
       { "fixup_riscv_jal",          12,     20,  MCFixupKindInfo::FKF_IsPCRel },
       { "fixup_riscv_branch",        0,     32,  MCFixupKindInfo::FKF_IsPCRel },
       { "fixup_riscv_rvc_jump",      2,     11,  MCFixupKindInfo::FKF_IsPCRel },
       { "fixup_riscv_rvc_branch",    0,     16,  MCFixupKindInfo::FKF_IsPCRel },
       { "fixup_riscv_call",          0,     64,  MCFixupKindInfo::FKF_IsPCRel },
-      { "fixup_riscv_relax",         0,      0,  0 }
+      { "fixup_riscv_call_plt",      0,     64,  MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_riscv_relax",         0,      0,  0 },
+      { "fixup_riscv_align",         0,      0,  0 }
     };
     static_assert((array_lengthof(Infos)) == RISCV::NumTargetFixupKinds,
                   "Not all fixup kinds added to Infos array");
@@ -107,6 +140,9 @@ public:
 
 
   bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+
+  const MCTargetOptions &getTargetOptions() const { return TargetOptions; }
+  RISCVABI::ABI getTargetABI() const { return TargetABI; }
 };
 }
 
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
index 9b88614aa693..3ccbc86d2619 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===-- RISCVELFObjectWriter.cpp - RISCV ELF Writer -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -49,7 +48,42 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
                                             const MCFixup &Fixup,
                                             bool IsPCRel) const {
   // Determine the type of the relocation
-  switch ((unsigned)Fixup.getKind()) {
+  unsigned Kind = Fixup.getKind();
+  if (IsPCRel) {
+    switch (Kind) {
+    default:
+      llvm_unreachable("invalid fixup kind!");
+    case FK_Data_4:
+    case FK_PCRel_4:
+      return ELF::R_RISCV_32_PCREL;
+    case RISCV::fixup_riscv_pcrel_hi20:
+      return ELF::R_RISCV_PCREL_HI20;
+    case RISCV::fixup_riscv_pcrel_lo12_i:
+      return ELF::R_RISCV_PCREL_LO12_I;
+    case RISCV::fixup_riscv_pcrel_lo12_s:
+      return ELF::R_RISCV_PCREL_LO12_S;
+    case RISCV::fixup_riscv_got_hi20:
+      return ELF::R_RISCV_GOT_HI20;
+    case RISCV::fixup_riscv_tls_got_hi20:
+      return ELF::R_RISCV_TLS_GOT_HI20;
+    case RISCV::fixup_riscv_tls_gd_hi20:
+      return ELF::R_RISCV_TLS_GD_HI20;
+    case RISCV::fixup_riscv_jal:
+      return ELF::R_RISCV_JAL;
+    case RISCV::fixup_riscv_branch:
+      return ELF::R_RISCV_BRANCH;
+    case RISCV::fixup_riscv_rvc_jump:
+      return ELF::R_RISCV_RVC_JUMP;
+    case RISCV::fixup_riscv_rvc_branch:
+      return ELF::R_RISCV_RVC_BRANCH;
+    case RISCV::fixup_riscv_call:
+      return ELF::R_RISCV_CALL;
+    case RISCV::fixup_riscv_call_plt:
+      return ELF::R_RISCV_CALL_PLT;
+    }
+  }
+
+  switch (Kind) {
   default:
     llvm_unreachable("invalid fixup kind!");
   case FK_Data_4:
@@ -78,24 +112,18 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
     return ELF::R_RISCV_LO12_I;
   case RISCV::fixup_riscv_lo12_s:
     return ELF::R_RISCV_LO12_S;
-  case RISCV::fixup_riscv_pcrel_hi20:
-    return ELF::R_RISCV_PCREL_HI20;
-  case RISCV::fixup_riscv_pcrel_lo12_i:
-    return ELF::R_RISCV_PCREL_LO12_I;
-  case RISCV::fixup_riscv_pcrel_lo12_s:
-    return ELF::R_RISCV_PCREL_LO12_S;
-  case RISCV::fixup_riscv_jal:
-    return ELF::R_RISCV_JAL;
-  case RISCV::fixup_riscv_branch:
-    return ELF::R_RISCV_BRANCH;
-  case RISCV::fixup_riscv_rvc_jump:
-    return ELF::R_RISCV_RVC_JUMP;
-  case RISCV::fixup_riscv_rvc_branch:
-    return ELF::R_RISCV_RVC_BRANCH;
-  case RISCV::fixup_riscv_call:
-    return ELF::R_RISCV_CALL;
+  case RISCV::fixup_riscv_tprel_hi20:
+    return ELF::R_RISCV_TPREL_HI20;
+  case RISCV::fixup_riscv_tprel_lo12_i:
+    return ELF::R_RISCV_TPREL_LO12_I;
+  case RISCV::fixup_riscv_tprel_lo12_s:
+    return ELF::R_RISCV_TPREL_LO12_S;
+  case RISCV::fixup_riscv_tprel_add:
+    return ELF::R_RISCV_TPREL_ADD;
   case RISCV::fixup_riscv_relax:
     return ELF::R_RISCV_RELAX;
+  case RISCV::fixup_riscv_align:
+    return ELF::R_RISCV_ALIGN;
   }
 }
 
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
index a6ba1e41e964..40fa195f3790 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
@@ -1,9 +1,8 @@
 //===-- RISCVELFStreamer.cpp - RISCV ELF Target Streamer Methods ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,7 +11,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "RISCVELFStreamer.h"
+#include "MCTargetDesc/RISCVAsmBackend.h"
 #include "RISCVMCTargetDesc.h"
+#include "Utils/RISCVBaseInfo.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 
@@ -23,14 +24,35 @@ RISCVTargetELFStreamer::RISCVTargetELFStreamer(MCStreamer &S,
                                                const MCSubtargetInfo &STI)
     : RISCVTargetStreamer(S) {
   MCAssembler &MCA = getStreamer().getAssembler();
-
   const FeatureBitset &Features = STI.getFeatureBits();
+  auto &MAB = static_cast<RISCVAsmBackend &>(MCA.getBackend());
+  RISCVABI::ABI ABI = MAB.getTargetABI();
+  assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
 
   unsigned EFlags = MCA.getELFHeaderEFlags();
 
   if (Features[RISCV::FeatureStdExtC])
     EFlags |= ELF::EF_RISCV_RVC;
 
+  switch (ABI) {
+  case RISCVABI::ABI_ILP32:
+  case RISCVABI::ABI_LP64:
+    break;
+  case RISCVABI::ABI_ILP32F:
+  case RISCVABI::ABI_LP64F:
+    EFlags |= ELF::EF_RISCV_FLOAT_ABI_SINGLE;
+    break;
+  case RISCVABI::ABI_ILP32D:
+  case RISCVABI::ABI_LP64D:
+    EFlags |= ELF::EF_RISCV_FLOAT_ABI_DOUBLE;
+    break;
+  case RISCVABI::ABI_ILP32E:
+    EFlags |= ELF::EF_RISCV_RVE;
+    break;
+  case RISCVABI::ABI_Unknown:
+    llvm_unreachable("Improperly initialised target ABI");
+  }
+
   MCA.setELFHeaderEFlags(EFlags);
 }
 
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
index 1f36bbc43882..138df786eaf3 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
@@ -1,9 +1,8 @@
 //===-- RISCVELFStreamer.h - RISCV ELF Target Streamer ---------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h b/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
index 6a1224be774e..6c7933340608 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
@@ -1,9 +1,8 @@
 //===-- RISCVFixupKinds.h - RISCV Specific Fixup Entries --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -35,6 +34,27 @@ enum Fixups {
   // fixup_riscv_pcrel_lo12_s - 12-bit fixup corresponding to pcrel_lo(foo) for
   // the S-type store instructions
   fixup_riscv_pcrel_lo12_s,
+  // fixup_riscv_got_hi20 - 20-bit fixup corresponding to got_pcrel_hi(foo) for
+  // instructions like auipc
+  fixup_riscv_got_hi20,
+  // fixup_riscv_tprel_hi20 - 20-bit fixup corresponding to tprel_hi(foo) for
+  // instructions like lui
+  fixup_riscv_tprel_hi20,
+  // fixup_riscv_tprel_lo12_i - 12-bit fixup corresponding to tprel_lo(foo) for
+  // instructions like addi
+  fixup_riscv_tprel_lo12_i,
+  // fixup_riscv_tprel_lo12_s - 12-bit fixup corresponding to tprel_lo(foo) for
+  // the S-type store instructions
+  fixup_riscv_tprel_lo12_s,
+  // fixup_riscv_tprel_add - A fixup corresponding to %tprel_add(foo) for the
+  // add_tls instruction. Used to provide a hint to the linker.
+  fixup_riscv_tprel_add,
+  // fixup_riscv_tls_got_hi20 - 20-bit fixup corresponding to
+  // tls_ie_pcrel_hi(foo) for instructions like auipc
+  fixup_riscv_tls_got_hi20,
+  // fixup_riscv_tls_gd_hi20 - 20-bit fixup corresponding to
+  // tls_gd_pcrel_hi(foo) for instructions like auipc
+  fixup_riscv_tls_gd_hi20,
   // fixup_riscv_jal - 20-bit fixup for symbol references in the jal
   // instruction
   fixup_riscv_jal,
@@ -50,9 +70,17 @@ enum Fixups {
   // fixup_riscv_call - A fixup representing a call attached to the auipc
   // instruction in a pair composed of adjacent auipc+jalr instructions.
   fixup_riscv_call,
+  // fixup_riscv_call_plt - A fixup representing a procedure linkage table call
+  // attached to the auipc instruction in a pair composed of adjacent auipc+jalr
+  // instructions.
+  fixup_riscv_call_plt,
   // fixup_riscv_relax - Used to generate an R_RISCV_RELAX relocation type,
   // which indicates the linker may relax the instruction pair.
   fixup_riscv_relax,
+  // fixup_riscv_align - Used to generate an R_RISCV_ALIGN relocation type,
+  // which indicates the linker should fixup the alignment after linker
+  // relaxation.
+  fixup_riscv_align,
 
   // fixup_riscv_invalid - used as a sentinel and a marker, must be last fixup
   fixup_riscv_invalid,
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
new file mode 100644
index 000000000000..fe37b70811d8
--- /dev/null
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -0,0 +1,114 @@
+//===-- RISCVInstPrinter.cpp - Convert RISCV MCInst to asm syntax ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an RISCV MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVInstPrinter.h"
+#include "MCTargetDesc/RISCVMCExpr.h"
+#include "Utils/RISCVBaseInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+// Include the auto-generated portion of the assembly writer.
+#define PRINT_ALIAS_INSTR
+#include "RISCVGenAsmWriter.inc"
+
+// Include the auto-generated portion of the compress emitter.
+#define GEN_UNCOMPRESS_INSTR
+#include "RISCVGenCompressInstEmitter.inc"
+
+static cl::opt<bool>
+    NoAliases("riscv-no-aliases",
+              cl::desc("Disable the emission of assembler pseudo instructions"),
+              cl::init(false), cl::Hidden);
+
+void RISCVInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                                 StringRef Annot, const MCSubtargetInfo &STI) {
+  bool Res = false;
+  const MCInst *NewMI = MI;
+  MCInst UncompressedMI;
+  if (!NoAliases)
+    Res = uncompressInst(UncompressedMI, *MI, MRI, STI);
+  if (Res)
+    NewMI = const_cast<MCInst *>(&UncompressedMI);
+  if (NoAliases || !printAliasInstr(NewMI, STI, O))
+    printInstruction(NewMI, STI, O);
+  printAnnotation(O, Annot);
+}
+
+void RISCVInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
+  O << getRegisterName(RegNo);
+}
+
+void RISCVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                    const MCSubtargetInfo &STI, raw_ostream &O,
+                                    const char *Modifier) {
+  assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+  const MCOperand &MO = MI->getOperand(OpNo);
+
+  if (MO.isReg()) {
+    printRegName(O, MO.getReg());
+    return;
+  }
+
+  if (MO.isImm()) {
+    O << MO.getImm();
+    return;
+  }
+
+  assert(MO.isExpr() && "Unknown operand kind in printOperand");
+  MO.getExpr()->print(O, &MAI);
+}
+
+void RISCVInstPrinter::printCSRSystemRegister(const MCInst *MI, unsigned OpNo,
+                                              const MCSubtargetInfo &STI,
+                                              raw_ostream &O) {
+  unsigned Imm = MI->getOperand(OpNo).getImm();
+  auto SysReg = RISCVSysReg::lookupSysRegByEncoding(Imm);
+  if (SysReg && SysReg->haveRequiredFeatures(STI.getFeatureBits()))
+    O << SysReg->Name;
+  else
+    O << Imm;
+}
+
+void RISCVInstPrinter::printFenceArg(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  unsigned FenceArg = MI->getOperand(OpNo).getImm();
+  assert (((FenceArg >> 4) == 0) && "Invalid immediate in printFenceArg");
+
+  if ((FenceArg & RISCVFenceField::I) != 0)
+    O << 'i';
+  if ((FenceArg & RISCVFenceField::O) != 0)
+    O << 'o';
+  if ((FenceArg & RISCVFenceField::R) != 0)
+    O << 'r';
+  if ((FenceArg & RISCVFenceField::W) != 0)
+    O << 'w';
+  if (FenceArg == 0)
+    O << "unknown";
+}
+
+void RISCVInstPrinter::printFRMArg(const MCInst *MI, unsigned OpNo,
+                                   const MCSubtargetInfo &STI, raw_ostream &O) {
+  auto FRMArg =
+      static_cast<RISCVFPRndMode::RoundingMode>(MI->getOperand(OpNo).getImm());
+  O << RISCVFPRndMode::roundingModeToString(FRMArg);
+}
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h b/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
new file mode 100644
index 000000000000..5ca1d3fa20fe
--- /dev/null
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
@@ -0,0 +1,54 @@
+//===-- RISCVInstPrinter.h - Convert RISCV MCInst to asm syntax ---*- C++ -*--//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a RISCV MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVINSTPRINTER_H
+#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVINSTPRINTER_H
+
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+class MCOperand;
+
+class RISCVInstPrinter : public MCInstPrinter {
+public:
+  RISCVInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                   const MCRegisterInfo &MRI)
+      : MCInstPrinter(MAI, MII, MRI) {}
+
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  void printRegName(raw_ostream &O, unsigned RegNo) const override;
+
+  void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O, const char *Modifier = nullptr);
+  void printCSRSystemRegister(const MCInst *MI, unsigned OpNo,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printFenceArg(const MCInst *MI, unsigned OpNo,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
+  void printFRMArg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                   raw_ostream &O);
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+                        raw_ostream &O);
+  bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+                       raw_ostream &O);
+  void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+                               unsigned PrintMethodIdx,
+                               const MCSubtargetInfo &STI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo,
+                                     unsigned AltIdx = RISCV::ABIRegAltName);
+};
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
index 780dae410cd0..983629692883 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
@@ -1,9 +1,8 @@
 //===-- RISCVMCAsmInfo.cpp - RISCV Asm properties -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -22,6 +21,7 @@ RISCVMCAsmInfo::RISCVMCAsmInfo(const Triple &TT) {
   CommentString = "#";
   AlignmentIsInBytes = false;
   SupportsDebugInformation = true;
+  ExceptionsType = ExceptionHandling::DwarfCFI;
   Data16bitsDirective = "\t.half\t";
   Data32bitsDirective = "\t.word\t";
 }
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h
index 901a1eba8af2..043fdb7c08c0 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h
@@ -1,9 +1,8 @@
 //===-- RISCVMCAsmInfo.h - RISCV Asm Info ----------------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index c5a4ffc0e360..0fc775f63ed4 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
 //===-- RISCVMCCodeEmitter.cpp - Convert RISCV code to machine code -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -57,6 +56,10 @@ public:
                           SmallVectorImpl<MCFixup> &Fixups,
                           const MCSubtargetInfo &STI) const;
 
+  void expandAddTPRel(const MCInst &MI, raw_ostream &OS,
+                      SmallVectorImpl<MCFixup> &Fixups,
+                      const MCSubtargetInfo &STI) const;
+
   /// TableGen'erated function for getting the binary encoding for an
   /// instruction.
   uint64_t getBinaryCodeForInstr(const MCInst &MI,
@@ -85,28 +88,34 @@ MCCodeEmitter *llvm::createRISCVMCCodeEmitter(const MCInstrInfo &MCII,
   return new RISCVMCCodeEmitter(Ctx, MCII);
 }
 
-// Expand PseudoCALL and PseudoTAIL to AUIPC and JALR with relocation types.
-// We expand PseudoCALL and PseudoTAIL while encoding, meaning AUIPC and JALR
-// won't go through RISCV MC to MC compressed instruction transformation. This
-// is acceptable because AUIPC has no 16-bit form and C_JALR have no immediate
-// operand field.  We let linker relaxation deal with it. When linker
-// relaxation enabled, AUIPC and JALR have chance relax to JAL. If C extension
-// is enabled, JAL has chance relax to C_JAL.
+// Expand PseudoCALL(Reg) and PseudoTAIL to AUIPC and JALR with relocation
+// types. We expand PseudoCALL(Reg) and PseudoTAIL while encoding, meaning AUIPC
+// and JALR won't go through RISCV MC to MC compressed instruction
+// transformation. This is acceptable because AUIPC has no 16-bit form and
+// C_JALR have no immediate operand field.  We let linker relaxation deal with
+// it. When linker relaxation enabled, AUIPC and JALR have chance relax to JAL.
+// If C extension is enabled, JAL has chance relax to C_JAL.
 void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, raw_ostream &OS,
                                             SmallVectorImpl<MCFixup> &Fixups,
                                             const MCSubtargetInfo &STI) const {
   MCInst TmpInst;
-  MCOperand Func = MI.getOperand(0);
-  unsigned Ra = (MI.getOpcode() == RISCV::PseudoTAIL) ? RISCV::X6 : RISCV::X1;
+  MCOperand Func;
+  unsigned Ra;
+  if (MI.getOpcode() == RISCV::PseudoTAIL) {
+    Func = MI.getOperand(0);
+    Ra = RISCV::X6;
+  } else if (MI.getOpcode() == RISCV::PseudoCALLReg) {
+    Func = MI.getOperand(1);
+    Ra = MI.getOperand(0).getReg();
+  } else {
+    Func = MI.getOperand(0);
+    Ra = RISCV::X1;
+  }
   uint32_t Binary;
 
   assert(Func.isExpr() && "Expected expression");
 
-  const MCExpr *Expr = Func.getExpr();
-
-  // Create function call expression CallExpr for AUIPC.
-  const MCExpr *CallExpr =
-      RISCVMCExpr::create(Expr, RISCVMCExpr::VK_RISCV_CALL, Ctx);
+  const MCExpr *CallExpr = Func.getExpr();
 
   // Emit AUIPC Ra, Func with R_RISCV_CALL relocation type.
   TmpInst = MCInstBuilder(RISCV::AUIPC)
@@ -119,12 +128,50 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, raw_ostream &OS,
     // Emit JALR X0, X6, 0
     TmpInst = MCInstBuilder(RISCV::JALR).addReg(RISCV::X0).addReg(Ra).addImm(0);
   else
-    // Emit JALR X1, X1, 0
+    // Emit JALR Ra, Ra, 0
     TmpInst = MCInstBuilder(RISCV::JALR).addReg(Ra).addReg(Ra).addImm(0);
   Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
   support::endian::write(OS, Binary, support::little);
 }
 
+// Expand PseudoAddTPRel to a simple ADD with the correct relocation.
+void RISCVMCCodeEmitter::expandAddTPRel(const MCInst &MI, raw_ostream &OS,
+                                        SmallVectorImpl<MCFixup> &Fixups,
+                                        const MCSubtargetInfo &STI) const {
+  MCOperand DestReg = MI.getOperand(0);
+  MCOperand SrcReg = MI.getOperand(1);
+  MCOperand TPReg = MI.getOperand(2);
+  assert(TPReg.isReg() && TPReg.getReg() == RISCV::X4 &&
+         "Expected thread pointer as second input to TP-relative add");
+
+  MCOperand SrcSymbol = MI.getOperand(3);
+  assert(SrcSymbol.isExpr() &&
+         "Expected expression as third input to TP-relative add");
+
+  const RISCVMCExpr *Expr = dyn_cast<RISCVMCExpr>(SrcSymbol.getExpr());
+  assert(Expr && Expr->getKind() == RISCVMCExpr::VK_RISCV_TPREL_ADD &&
+         "Expected tprel_add relocation on TP-relative symbol");
+
+  // Emit the correct tprel_add relocation for the symbol.
+  Fixups.push_back(MCFixup::create(
+      0, Expr, MCFixupKind(RISCV::fixup_riscv_tprel_add), MI.getLoc()));
+
+  // Emit fixup_riscv_relax for tprel_add where the relax feature is enabled.
+  if (STI.getFeatureBits()[RISCV::FeatureRelax]) {
+    const MCConstantExpr *Dummy = MCConstantExpr::create(0, Ctx);
+    Fixups.push_back(MCFixup::create(
+        0, Dummy, MCFixupKind(RISCV::fixup_riscv_relax), MI.getLoc()));
+  }
+
+  // Emit a normal ADD instruction with the given operands.
+  MCInst TmpInst = MCInstBuilder(RISCV::ADD)
+                       .addOperand(DestReg)
+                       .addOperand(SrcReg)
+                       .addOperand(TPReg);
+  uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
+  support::endian::write(OS, Binary, support::little);
+}
+
 void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
                                            SmallVectorImpl<MCFixup> &Fixups,
                                            const MCSubtargetInfo &STI) const {
@@ -132,13 +179,20 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
   // Get byte count of instruction.
   unsigned Size = Desc.getSize();
 
-  if (MI.getOpcode() == RISCV::PseudoCALL ||
+  if (MI.getOpcode() == RISCV::PseudoCALLReg ||
+      MI.getOpcode() == RISCV::PseudoCALL ||
       MI.getOpcode() == RISCV::PseudoTAIL) {
     expandFunctionCall(MI, OS, Fixups, STI);
     MCNumEmitted += 2;
     return;
   }
 
+  if (MI.getOpcode() == RISCV::PseudoAddTPRel) {
+    expandAddTPRel(MI, OS, Fixups, STI);
+    MCNumEmitted += 1;
+    return;
+  }
+
   switch (Size) {
   default:
     llvm_unreachable("Unhandled encodeInstruction length!");
@@ -205,6 +259,7 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
   const MCExpr *Expr = MO.getExpr();
   MCExpr::ExprKind Kind = Expr->getKind();
   RISCV::Fixups FixupKind = RISCV::fixup_riscv_invalid;
+  bool RelaxCandidate = false;
   if (Kind == MCExpr::Target) {
     const RISCVMCExpr *RVExpr = cast<RISCVMCExpr>(Expr);
 
@@ -212,6 +267,13 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
     case RISCVMCExpr::VK_RISCV_None:
     case RISCVMCExpr::VK_RISCV_Invalid:
       llvm_unreachable("Unhandled fixup kind!");
+    case RISCVMCExpr::VK_RISCV_TPREL_ADD:
+      // tprel_add is only used to indicate that a relocation should be emitted
+      // for an add instruction used in TP-relative addressing. It should not be
+      // expanded as if representing an actual instruction operand and so to
+      // encounter it here is an error.
+      llvm_unreachable(
+          "VK_RISCV_TPREL_ADD should not represent an instruction operand");
     case RISCVMCExpr::VK_RISCV_LO:
       if (MIFrm == RISCVII::InstFormatI)
         FixupKind = RISCV::fixup_riscv_lo12_i;
@@ -219,9 +281,11 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
         FixupKind = RISCV::fixup_riscv_lo12_s;
       else
         llvm_unreachable("VK_RISCV_LO used with unexpected instruction format");
+      RelaxCandidate = true;
       break;
     case RISCVMCExpr::VK_RISCV_HI:
       FixupKind = RISCV::fixup_riscv_hi20;
+      RelaxCandidate = true;
       break;
     case RISCVMCExpr::VK_RISCV_PCREL_LO:
       if (MIFrm == RISCVII::InstFormatI)
@@ -231,12 +295,42 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
       else
         llvm_unreachable(
             "VK_RISCV_PCREL_LO used with unexpected instruction format");
+      RelaxCandidate = true;
       break;
     case RISCVMCExpr::VK_RISCV_PCREL_HI:
       FixupKind = RISCV::fixup_riscv_pcrel_hi20;
+      RelaxCandidate = true;
+      break;
+    case RISCVMCExpr::VK_RISCV_GOT_HI:
+      FixupKind = RISCV::fixup_riscv_got_hi20;
+      break;
+    case RISCVMCExpr::VK_RISCV_TPREL_LO:
+      if (MIFrm == RISCVII::InstFormatI)
+        FixupKind = RISCV::fixup_riscv_tprel_lo12_i;
+      else if (MIFrm == RISCVII::InstFormatS)
+        FixupKind = RISCV::fixup_riscv_tprel_lo12_s;
+      else
+        llvm_unreachable(
+            "VK_RISCV_TPREL_LO used with unexpected instruction format");
+      RelaxCandidate = true;
+      break;
+    case RISCVMCExpr::VK_RISCV_TPREL_HI:
+      FixupKind = RISCV::fixup_riscv_tprel_hi20;
+      RelaxCandidate = true;
+      break;
+    case RISCVMCExpr::VK_RISCV_TLS_GOT_HI:
+      FixupKind = RISCV::fixup_riscv_tls_got_hi20;
+      break;
+    case RISCVMCExpr::VK_RISCV_TLS_GD_HI:
+      FixupKind = RISCV::fixup_riscv_tls_gd_hi20;
       break;
     case RISCVMCExpr::VK_RISCV_CALL:
       FixupKind = RISCV::fixup_riscv_call;
+      RelaxCandidate = true;
+      break;
+    case RISCVMCExpr::VK_RISCV_CALL_PLT:
+      FixupKind = RISCV::fixup_riscv_call_plt;
+      RelaxCandidate = true;
       break;
     }
   } else if (Kind == MCExpr::SymbolRef &&
@@ -258,13 +352,15 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
       MCFixup::create(0, Expr, MCFixupKind(FixupKind), MI.getLoc()));
   ++MCNumFixups;
 
-  if (EnableRelax) {
-    if (FixupKind == RISCV::fixup_riscv_call) {
-      Fixups.push_back(
-      MCFixup::create(0, Expr, MCFixupKind(RISCV::fixup_riscv_relax),
-                      MI.getLoc()));
-      ++MCNumFixups;
-    }
+  // Ensure an R_RISCV_RELAX relocation will be emitted if linker relaxation is
+  // enabled and the current fixup will result in a relocation that may be
+  // relaxed.
+  if (EnableRelax && RelaxCandidate) {
+    const MCConstantExpr *Dummy = MCConstantExpr::create(0, Ctx);
+    Fixups.push_back(
+    MCFixup::create(0, Dummy, MCFixupKind(RISCV::fixup_riscv_relax),
+                    MI.getLoc()));
+    ++MCNumFixups;
   }
 
   return 0;
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
index 53648a5922c8..ae25ec818171 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
@@ -1,9 +1,8 @@
 //===-- RISCVMCExpr.cpp - RISCV specific MC expression classes ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,9 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "RISCV.h"
 #include "RISCVMCExpr.h"
+#include "MCTargetDesc/RISCVAsmBackend.h"
+#include "RISCV.h"
 #include "RISCVFixupKinds.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCStreamer.h"
@@ -32,11 +34,15 @@ const RISCVMCExpr *RISCVMCExpr::create(const MCExpr *Expr, VariantKind Kind,
 }
 
 void RISCVMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
-  bool HasVariant =
-      ((getKind() != VK_RISCV_None) && (getKind() != VK_RISCV_CALL));
+  VariantKind Kind = getKind();
+  bool HasVariant = ((Kind != VK_RISCV_None) && (Kind != VK_RISCV_CALL) &&
+                     (Kind != VK_RISCV_CALL_PLT));
+
   if (HasVariant)
     OS << '%' << getVariantKindName(getKind()) << '(';
   Expr->print(OS, MAI);
+  if (Kind == VK_RISCV_CALL_PLT)
+    OS << "@plt";
   if (HasVariant)
     OS << ')';
 }
@@ -50,19 +56,30 @@ const MCFixup *RISCVMCExpr::getPCRelHiFixup() const {
   if (!AUIPCSRE)
     return nullptr;
 
-  const auto *DF =
-      dyn_cast_or_null<MCDataFragment>(AUIPCSRE->findAssociatedFragment());
+  const MCSymbol *AUIPCSymbol = &AUIPCSRE->getSymbol();
+  const auto *DF = dyn_cast_or_null<MCDataFragment>(AUIPCSymbol->getFragment());
+
   if (!DF)
     return nullptr;
 
-  const MCSymbol *AUIPCSymbol = &AUIPCSRE->getSymbol();
+  uint64_t Offset = AUIPCSymbol->getOffset();
+  if (DF->getContents().size() == Offset) {
+    DF = dyn_cast_or_null<MCDataFragment>(DF->getNextNode());
+    if (!DF)
+      return nullptr;
+    Offset = 0;
+  }
+
   for (const MCFixup &F : DF->getFixups()) {
-    if (F.getOffset() != AUIPCSymbol->getOffset())
+    if (F.getOffset() != Offset)
       continue;
 
     switch ((unsigned)F.getKind()) {
     default:
       continue;
+    case RISCV::fixup_riscv_got_hi20:
+    case RISCV::fixup_riscv_tls_got_hi20:
+    case RISCV::fixup_riscv_tls_gd_hi20:
     case RISCV::fixup_riscv_pcrel_hi20:
       return &F;
     }
@@ -79,6 +96,16 @@ bool RISCVMCExpr::evaluatePCRelLo(MCValue &Res, const MCAsmLayout *Layout,
   // (<real target> + <offset from this fixup to the auipc fixup>).  The Fixup
   // is pcrel relative to the VK_RISCV_PCREL_LO fixup, so we need to add the
   // offset to the VK_RISCV_PCREL_HI Fixup from VK_RISCV_PCREL_LO to correct.
+
+  // Don't try to evaluate if the fixup will be forced as a relocation (e.g.
+  // as linker relaxation is enabled). If we evaluated pcrel_lo in this case,
+  // the modified fixup will be converted into a relocation that no longer
+  // points to the pcrel_hi as the linker requires.
+  auto &RAB =
+      static_cast<RISCVAsmBackend &>(Layout->getAssembler().getBackend());
+  if (RAB.willForceRelocations())
+    return false;
+
   MCValue AUIPCLoc;
   if (!getSubExpr()->evaluateAsValue(AUIPCLoc, *Layout))
     return false;
@@ -137,6 +164,12 @@ bool RISCVMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
     case VK_RISCV_HI:
     case VK_RISCV_PCREL_LO:
     case VK_RISCV_PCREL_HI:
+    case VK_RISCV_GOT_HI:
+    case VK_RISCV_TPREL_LO:
+    case VK_RISCV_TPREL_HI:
+    case VK_RISCV_TPREL_ADD:
+    case VK_RISCV_TLS_GOT_HI:
+    case VK_RISCV_TLS_GD_HI:
       return false;
     }
   }
@@ -154,6 +187,12 @@ RISCVMCExpr::VariantKind RISCVMCExpr::getVariantKindForName(StringRef name) {
       .Case("hi", VK_RISCV_HI)
       .Case("pcrel_lo", VK_RISCV_PCREL_LO)
       .Case("pcrel_hi", VK_RISCV_PCREL_HI)
+      .Case("got_pcrel_hi", VK_RISCV_GOT_HI)
+      .Case("tprel_lo", VK_RISCV_TPREL_LO)
+      .Case("tprel_hi", VK_RISCV_TPREL_HI)
+      .Case("tprel_add", VK_RISCV_TPREL_ADD)
+      .Case("tls_ie_pcrel_hi", VK_RISCV_TLS_GOT_HI)
+      .Case("tls_gd_pcrel_hi", VK_RISCV_TLS_GD_HI)
       .Default(VK_RISCV_Invalid);
 }
 
@@ -169,14 +208,71 @@ StringRef RISCVMCExpr::getVariantKindName(VariantKind Kind) {
     return "pcrel_lo";
   case VK_RISCV_PCREL_HI:
     return "pcrel_hi";
+  case VK_RISCV_GOT_HI:
+    return "got_pcrel_hi";
+  case VK_RISCV_TPREL_LO:
+    return "tprel_lo";
+  case VK_RISCV_TPREL_HI:
+    return "tprel_hi";
+  case VK_RISCV_TPREL_ADD:
+    return "tprel_add";
+  case VK_RISCV_TLS_GOT_HI:
+    return "tls_ie_pcrel_hi";
+  case VK_RISCV_TLS_GD_HI:
+    return "tls_gd_pcrel_hi";
   }
 }
 
+static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
+  switch (Expr->getKind()) {
+  case MCExpr::Target:
+    llvm_unreachable("Can't handle nested target expression");
+    break;
+  case MCExpr::Constant:
+    break;
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
+    fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm);
+    fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm);
+    break;
+  }
+
+  case MCExpr::SymbolRef: {
+    // We're known to be under a TLS fixup, so any symbol should be
+    // modified. There should be only one.
+    const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
+    cast<MCSymbolELF>(SymRef.getSymbol()).setType(ELF::STT_TLS);
+    break;
+  }
+
+  case MCExpr::Unary:
+    fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm);
+    break;
+  }
+}
+
+void RISCVMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
+  switch (getKind()) {
+  default:
+    return;
+  case VK_RISCV_TPREL_HI:
+  case VK_RISCV_TLS_GOT_HI:
+  case VK_RISCV_TLS_GD_HI:
+    break;
+  }
+
+  fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
+}
+
 bool RISCVMCExpr::evaluateAsConstant(int64_t &Res) const {
   MCValue Value;
 
   if (Kind == VK_RISCV_PCREL_HI || Kind == VK_RISCV_PCREL_LO ||
-      Kind == VK_RISCV_CALL)
+      Kind == VK_RISCV_GOT_HI || Kind == VK_RISCV_TPREL_HI ||
+      Kind == VK_RISCV_TPREL_LO || Kind == VK_RISCV_TPREL_ADD ||
+      Kind == VK_RISCV_TLS_GOT_HI || Kind == VK_RISCV_TLS_GD_HI ||
+      Kind == VK_RISCV_CALL || Kind == VK_RISCV_CALL_PLT)
     return false;
 
   if (!getSubExpr()->evaluateAsRelocatable(Value, nullptr, nullptr))
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h b/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
index 4eafcc08b51f..b5a292dc1b1a 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
@@ -1,9 +1,8 @@
 //===-- RISCVMCExpr.h - RISCV specific MC expression classes ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -29,7 +28,14 @@ public:
     VK_RISCV_HI,
     VK_RISCV_PCREL_LO,
     VK_RISCV_PCREL_HI,
+    VK_RISCV_GOT_HI,
+    VK_RISCV_TPREL_LO,
+    VK_RISCV_TPREL_HI,
+    VK_RISCV_TPREL_ADD,
+    VK_RISCV_TLS_GOT_HI,
+    VK_RISCV_TLS_GD_HI,
     VK_RISCV_CALL,
+    VK_RISCV_CALL_PLT,
     VK_RISCV_Invalid
   };
 
@@ -53,11 +59,11 @@ public:
 
   const MCExpr *getSubExpr() const { return Expr; }
 
-  /// Get the MCExpr of the VK_RISCV_PCREL_HI Fixup that the
-  /// VK_RISCV_PCREL_LO points to.
+  /// Get the corresponding PC-relative HI fixup that a VK_RISCV_PCREL_LO
+  /// points to.
   ///
   /// \returns nullptr if this isn't a VK_RISCV_PCREL_LO pointing to a
-  /// VK_RISCV_PCREL_HI.
+  /// known PC-relative HI fixup.
   const MCFixup *getPCRelHiFixup() const;
 
   void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
@@ -68,8 +74,7 @@ public:
     return getSubExpr()->findAssociatedFragment();
   }
 
-  // There are no TLS RISCVMCExprs at the moment.
-  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
+  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
 
   bool evaluateAsConstant(int64_t &Res) const;
 
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 133f3cd3d39a..bc45262ab2de 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -1,9 +1,8 @@
 //===-- RISCVMCTargetDesc.cpp - RISCV Target Descriptions -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -12,10 +11,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "RISCVMCTargetDesc.h"
-#include "InstPrinter/RISCVInstPrinter.h"
 #include "RISCVELFStreamer.h"
+#include "RISCVInstPrinter.h"
 #include "RISCVMCAsmInfo.h"
 #include "RISCVTargetStreamer.h"
+#include "TargetInfo/RISCVTargetInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
@@ -50,7 +50,13 @@ static MCRegisterInfo *createRISCVMCRegisterInfo(const Triple &TT) {
 
 static MCAsmInfo *createRISCVMCAsmInfo(const MCRegisterInfo &MRI,
                                        const Triple &TT) {
-  return new RISCVMCAsmInfo(TT);
+  MCAsmInfo *MAI = new RISCVMCAsmInfo(TT);
+
+  unsigned SP = MRI.getDwarfRegNum(RISCV::X2, true);
+  MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, SP, 0);
+  MAI->addInitialFrameState(Inst);
+
+  return MAI;
 }
 
 static MCSubtargetInfo *createRISCVMCSubtargetInfo(const Triple &TT,
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
index 0228253c08cb..b30997533ddf 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
@@ -1,9 +1,8 @@
 //===-- RISCVMCTargetDesc.h - RISCV Target Descriptions ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -33,9 +32,6 @@ class Triple;
 class raw_ostream;
 class raw_pwrite_stream;
 
-Target &getTheRISCV32Target();
-Target &getTheRISCV64Target();
-
 MCCodeEmitter *createRISCVMCCodeEmitter(const MCInstrInfo &MCII,
                                         const MCRegisterInfo &MRI,
                                         MCContext &Ctx);
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
index 8d5ef3dbd17f..913e1f744192 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
@@ -1,9 +1,8 @@
 //===-- RISCVTargetStreamer.cpp - RISCV Target Streamer Methods -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h b/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
index 74ec9e303933..1becc134b2a2 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
@@ -1,9 +1,8 @@
 //===-- RISCVTargetStreamer.h - RISCV Target Streamer ----------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/RISCV/RISCV.h b/lib/Target/RISCV/RISCV.h
index b25aee46200d..834a1d171143 100644
--- a/lib/Target/RISCV/RISCV.h
+++ b/lib/Target/RISCV/RISCV.h
@@ -1,9 +1,8 @@
 //===-- RISCV.h - Top-level interface for RISCV -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/RISCV/RISCV.td b/lib/Target/RISCV/RISCV.td
index 0e86e2bc5e98..e19b70b8e709 100644
--- a/lib/Target/RISCV/RISCV.td
+++ b/lib/Target/RISCV/RISCV.td
@@ -1,9 +1,8 @@
 //===-- RISCV.td - Describe the RISCV Target Machine -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -55,23 +54,29 @@ def IsRV32 : Predicate<"!Subtarget->is64Bit()">,
 def RV64           : HwMode<"+64bit">;
 def RV32           : HwMode<"-64bit">;
 
+def FeatureRV32E
+    : SubtargetFeature<"e", "IsRV32E", "true",
+                       "Implements RV32E (provides 16 rather than 32 GPRs)">;
+def IsRV32E : Predicate<"Subtarget->isRV32E()">,
+                        AssemblerPredicate<"FeatureRV32E">;
+
 def FeatureRelax
     : SubtargetFeature<"relax", "EnableLinkerRelax", "true",
                        "Enable Linker relaxation.">;
 
 //===----------------------------------------------------------------------===//
-// Registers, calling conventions, instruction descriptions.
+// Named operands for CSR instructions.
 //===----------------------------------------------------------------------===//
 
-include "RISCVRegisterInfo.td"
-include "RISCVCallingConv.td"
-include "RISCVInstrInfo.td"
+include "RISCVSystemOperands.td"
 
 //===----------------------------------------------------------------------===//
-// Named operands for CSR instructions.
+// Registers, calling conventions, instruction descriptions.
 //===----------------------------------------------------------------------===//
 
-include "RISCVSystemOperands.td"
+include "RISCVRegisterInfo.td"
+include "RISCVCallingConv.td"
+include "RISCVInstrInfo.td"
 
 //===----------------------------------------------------------------------===//
 // RISC-V processors supported.
diff --git a/lib/Target/RISCV/RISCVAsmPrinter.cpp b/lib/Target/RISCV/RISCVAsmPrinter.cpp
index bdf8e5d840b3..57631dcb5115 100644
--- a/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -1,9 +1,8 @@
 //===-- RISCVAsmPrinter.cpp - RISCV LLVM assembly writer ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,9 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "RISCV.h"
-#include "InstPrinter/RISCVInstPrinter.h"
+#include "MCTargetDesc/RISCVInstPrinter.h"
 #include "MCTargetDesc/RISCVMCExpr.h"
 #include "RISCVTargetMachine.h"
+#include "TargetInfo/RISCVTargetInfo.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -43,11 +43,9 @@ public:
   void EmitInstruction(const MachineInstr *MI) override;
 
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                       unsigned AsmVariant, const char *ExtraCode,
-                       raw_ostream &OS) override;
+                       const char *ExtraCode, raw_ostream &OS) override;
   bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                             unsigned AsmVariant, const char *ExtraCode,
-                             raw_ostream &OS) override;
+                             const char *ExtraCode, raw_ostream &OS) override;
 
   void EmitToStreamer(MCStreamer &S, const MCInst &Inst);
   bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
@@ -84,39 +82,50 @@ void RISCVAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 }
 
 bool RISCVAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                      unsigned AsmVariant,
                                       const char *ExtraCode, raw_ostream &OS) {
-  if (AsmVariant != 0)
-    report_fatal_error("There are no defined alternate asm variants");
-
   // First try the generic code, which knows about modifiers like 'c' and 'n'.
-  if (!AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS))
+  if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, OS))
     return false;
 
-  if (!ExtraCode) {
-    const MachineOperand &MO = MI->getOperand(OpNo);
-    switch (MO.getType()) {
-    case MachineOperand::MO_Immediate:
-      OS << MO.getImm();
-      return false;
-    case MachineOperand::MO_Register:
-      OS << RISCVInstPrinter::getRegisterName(MO.getReg());
-      return false;
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0)
+      return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
     default:
+      return true; // Unknown modifier.
+    case 'z':      // Print zero register if zero, regular printing otherwise.
+      if (MO.isImm() && MO.getImm() == 0) {
+        OS << RISCVInstPrinter::getRegisterName(RISCV::X0);
+        return false;
+      }
       break;
+    case 'i': // Literal 'i' if operand is not a register.
+      if (!MO.isReg())
+        OS << 'i';
+      return false;
     }
   }
 
+  switch (MO.getType()) {
+  case MachineOperand::MO_Immediate:
+    OS << MO.getImm();
+    return false;
+  case MachineOperand::MO_Register:
+    OS << RISCVInstPrinter::getRegisterName(MO.getReg());
+    return false;
+  default:
+    break;
+  }
+
   return true;
 }
 
 bool RISCVAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                            unsigned OpNo, unsigned AsmVariant,
+                                            unsigned OpNo,
                                             const char *ExtraCode,
                                             raw_ostream &OS) {
-  if (AsmVariant != 0)
-    report_fatal_error("There are no defined alternate asm variants");
-
   if (!ExtraCode) {
     const MachineOperand &MO = MI->getOperand(OpNo);
     // For now, we only support register memory operands in registers and
@@ -128,7 +137,7 @@ bool RISCVAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
     return false;
   }
 
-  return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, AsmVariant, ExtraCode, OS);
+  return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, ExtraCode, OS);
 }
 
 // Force static initialization.
diff --git a/lib/Target/RISCV/RISCVCallingConv.td b/lib/Target/RISCV/RISCVCallingConv.td
index ef146258c383..db13e6e8beca 100644
--- a/lib/Target/RISCV/RISCVCallingConv.td
+++ b/lib/Target/RISCV/RISCVCallingConv.td
@@ -1,9 +1,8 @@
 //===-- RISCVCallingConv.td - Calling Conventions RISCV ----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,7 +13,16 @@
 // The RISC-V calling convention is handled with custom code in
 // RISCVISelLowering.cpp (CC_RISCV).
 
-def CSR : CalleeSavedRegs<(add X1, X3, X4, X8, X9, (sequence "X%u", 18, 27))>;
+def CSR_ILP32_LP64
+    : CalleeSavedRegs<(add X1, X3, X4, X8, X9, (sequence "X%u", 18, 27))>;
+
+def CSR_ILP32F_LP64F
+    : CalleeSavedRegs<(add CSR_ILP32_LP64,
+                       F8_32, F9_32, (sequence "F%u_32", 18, 27))>;
+
+def CSR_ILP32D_LP64D
+    : CalleeSavedRegs<(add CSR_ILP32_LP64,
+                       F8_64, F9_64, (sequence "F%u_64", 18, 27))>;
 
 // Needed for implementation of RISCVRegisterInfo::getNoPreservedMask()
 def CSR_NoRegs : CalleeSavedRegs<(add)>;
diff --git a/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 35c185aa5edd..1c5171a7b7a4 100644
--- a/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -1,9 +1,8 @@
 //===-- RISCVExpandPseudoInsts.cpp - Expand pseudo instructions -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -55,6 +54,22 @@ private:
   bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MBBI, bool IsMasked,
                            int Width, MachineBasicBlock::iterator &NextMBBI);
+  bool expandAuipcInstPair(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI,
+                           MachineBasicBlock::iterator &NextMBBI,
+                           unsigned FlagsHi, unsigned SecondOpcode);
+  bool expandLoadLocalAddress(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MBBI,
+                              MachineBasicBlock::iterator &NextMBBI);
+  bool expandLoadAddress(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator MBBI,
+                         MachineBasicBlock::iterator &NextMBBI);
+  bool expandLoadTLSIEAddress(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MBBI,
+                              MachineBasicBlock::iterator &NextMBBI);
+  bool expandLoadTLSGDAddress(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MBBI,
+                              MachineBasicBlock::iterator &NextMBBI);
 };
 
 char RISCVExpandPseudo::ID = 0;
@@ -87,6 +102,9 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
   case RISCV::PseudoAtomicLoadNand32:
     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
                              NextMBBI);
+  case RISCV::PseudoAtomicLoadNand64:
+    return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
+                             NextMBBI);
   case RISCV::PseudoMaskedAtomicSwap32:
     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
                              NextMBBI);
@@ -111,8 +129,18 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
                                 NextMBBI);
   case RISCV::PseudoCmpXchg32:
     return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
+  case RISCV::PseudoCmpXchg64:
+    return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
   case RISCV::PseudoMaskedCmpXchg32:
     return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
+  case RISCV::PseudoLLA:
+    return expandLoadLocalAddress(MBB, MBBI, NextMBBI);
+  case RISCV::PseudoLA:
+    return expandLoadAddress(MBB, MBBI, NextMBBI);
+  case RISCV::PseudoLA_TLS_IE:
+    return expandLoadTLSIEAddress(MBB, MBBI, NextMBBI);
+  case RISCV::PseudoLA_TLS_GD:
+    return expandLoadTLSGDAddress(MBB, MBBI, NextMBBI);
   }
 
   return false;
@@ -152,12 +180,61 @@ static unsigned getSCForRMW32(AtomicOrdering Ordering) {
   }
 }
 
+static unsigned getLRForRMW64(AtomicOrdering Ordering) {
+  switch (Ordering) {
+  default:
+    llvm_unreachable("Unexpected AtomicOrdering");
+  case AtomicOrdering::Monotonic:
+    return RISCV::LR_D;
+  case AtomicOrdering::Acquire:
+    return RISCV::LR_D_AQ;
+  case AtomicOrdering::Release:
+    return RISCV::LR_D;
+  case AtomicOrdering::AcquireRelease:
+    return RISCV::LR_D_AQ;
+  case AtomicOrdering::SequentiallyConsistent:
+    return RISCV::LR_D_AQ_RL;
+  }
+}
+
+static unsigned getSCForRMW64(AtomicOrdering Ordering) {
+  switch (Ordering) {
+  default:
+    llvm_unreachable("Unexpected AtomicOrdering");
+  case AtomicOrdering::Monotonic:
+    return RISCV::SC_D;
+  case AtomicOrdering::Acquire:
+    return RISCV::SC_D;
+  case AtomicOrdering::Release:
+    return RISCV::SC_D_RL;
+  case AtomicOrdering::AcquireRelease:
+    return RISCV::SC_D_RL;
+  case AtomicOrdering::SequentiallyConsistent:
+    return RISCV::SC_D_AQ_RL;
+  }
+}
+
+static unsigned getLRForRMW(AtomicOrdering Ordering, int Width) {
+  if (Width == 32)
+    return getLRForRMW32(Ordering);
+  if (Width == 64)
+    return getLRForRMW64(Ordering);
+  llvm_unreachable("Unexpected LR width\n");
+}
+
+static unsigned getSCForRMW(AtomicOrdering Ordering, int Width) {
+  if (Width == 32)
+    return getSCForRMW32(Ordering);
+  if (Width == 64)
+    return getSCForRMW64(Ordering);
+  llvm_unreachable("Unexpected SC width\n");
+}
+
 static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
                                    DebugLoc DL, MachineBasicBlock *ThisMBB,
                                    MachineBasicBlock *LoopMBB,
                                    MachineBasicBlock *DoneMBB,
                                    AtomicRMWInst::BinOp BinOp, int Width) {
-  assert(Width == 32 && "RV64 atomic expansion currently unsupported");
   unsigned DestReg = MI.getOperand(0).getReg();
   unsigned ScratchReg = MI.getOperand(1).getReg();
   unsigned AddrReg = MI.getOperand(2).getReg();
@@ -166,11 +243,11 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
       static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
 
   // .loop:
-  //   lr.w dest, (addr)
+  //   lr.[w|d] dest, (addr)
   //   binop scratch, dest, val
-  //   sc.w scratch, scratch, (addr)
+  //   sc.[w|d] scratch, scratch, (addr)
   //   bnez scratch, loop
-  BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+  BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
       .addReg(AddrReg);
   switch (BinOp) {
   default:
@@ -184,7 +261,7 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
         .addImm(-1);
     break;
   }
-  BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+  BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
       .addReg(AddrReg)
       .addReg(ScratchReg);
   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
@@ -219,7 +296,7 @@ static void doMaskedAtomicBinOpExpansion(
     const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
     MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
     MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
-  assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+  assert(Width == 32 && "Should never need to expand masked 64-bit operations");
   unsigned DestReg = MI.getOperand(0).getReg();
   unsigned ScratchReg = MI.getOperand(1).getReg();
   unsigned AddrReg = MI.getOperand(2).getReg();
@@ -333,7 +410,7 @@ bool RISCVExpandPseudo::expandAtomicMinMaxOp(
     MachineBasicBlock::iterator &NextMBBI) {
   assert(IsMasked == true &&
          "Should only need to expand masked atomic max/min");
-  assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+  assert(Width == 32 && "Should never need to expand masked 64-bit operations");
 
   MachineInstr &MI = *MBBI;
   DebugLoc DL = MI.getDebugLoc();
@@ -451,7 +528,6 @@ bool RISCVExpandPseudo::expandAtomicMinMaxOp(
 bool RISCVExpandPseudo::expandAtomicCmpXchg(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
     int Width, MachineBasicBlock::iterator &NextMBBI) {
-  assert(Width == 32 && "RV64 atomic expansion currently unsupported");
   MachineInstr &MI = *MBBI;
   DebugLoc DL = MI.getDebugLoc();
   MachineFunction *MF = MBB.getParent();
@@ -483,18 +559,18 @@ bool RISCVExpandPseudo::expandAtomicCmpXchg(
 
   if (!IsMasked) {
     // .loophead:
-    //   lr.w dest, (addr)
+    //   lr.[w|d] dest, (addr)
     //   bne dest, cmpval, done
-    BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+    BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
         .addReg(AddrReg);
     BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
         .addReg(DestReg)
         .addReg(CmpValReg)
         .addMBB(DoneMBB);
     // .looptail:
-    //   sc.w scratch, newval, (addr)
+    //   sc.[w|d] scratch, newval, (addr)
     //   bnez scratch, loophead
-    BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+    BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
         .addReg(AddrReg)
         .addReg(NewValReg);
     BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
@@ -507,7 +583,7 @@ bool RISCVExpandPseudo::expandAtomicCmpXchg(
     //   and scratch, dest, mask
     //   bne scratch, cmpval, done
     unsigned MaskReg = MI.getOperand(5).getReg();
-    BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+    BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
         .addReg(AddrReg);
     BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
         .addReg(DestReg)
@@ -525,7 +601,7 @@ bool RISCVExpandPseudo::expandAtomicCmpXchg(
     //   bnez scratch, loophead
     insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
                       MaskReg, ScratchReg);
-    BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+    BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
         .addReg(AddrReg)
         .addReg(ScratchReg);
     BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
@@ -545,6 +621,90 @@ bool RISCVExpandPseudo::expandAtomicCmpXchg(
   return true;
 }
 
+bool RISCVExpandPseudo::expandAuipcInstPair(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI, unsigned FlagsHi,
+    unsigned SecondOpcode) {
+  MachineFunction *MF = MBB.getParent();
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned DestReg = MI.getOperand(0).getReg();
+  const MachineOperand &Symbol = MI.getOperand(1);
+
+  MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+  // Tell AsmPrinter that we unconditionally want the symbol of this label to be
+  // emitted.
+  NewMBB->setLabelMustBeEmitted();
+
+  MF->insert(++MBB.getIterator(), NewMBB);
+
+  BuildMI(NewMBB, DL, TII->get(RISCV::AUIPC), DestReg)
+      .addDisp(Symbol, 0, FlagsHi);
+  BuildMI(NewMBB, DL, TII->get(SecondOpcode), DestReg)
+      .addReg(DestReg)
+      .addMBB(NewMBB, RISCVII::MO_PCREL_LO);
+
+  // Move all the rest of the instructions to NewMBB.
+  NewMBB->splice(NewMBB->end(), &MBB, std::next(MBBI), MBB.end());
+  // Update machine-CFG edges.
+  NewMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+  // Make the original basic block fall-through to the new.
+  MBB.addSuccessor(NewMBB);
+
+  // Make sure live-ins are correctly attached to this new basic block.
+  LivePhysRegs LiveRegs;
+  computeAndAddLiveIns(LiveRegs, *NewMBB);
+
+  NextMBBI = MBB.end();
+  MI.eraseFromParent();
+  return true;
+}
+
+bool RISCVExpandPseudo::expandLoadLocalAddress(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI) {
+  return expandAuipcInstPair(MBB, MBBI, NextMBBI, RISCVII::MO_PCREL_HI,
+                             RISCV::ADDI);
+}
+
+bool RISCVExpandPseudo::expandLoadAddress(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI) {
+  MachineFunction *MF = MBB.getParent();
+
+  unsigned SecondOpcode;
+  unsigned FlagsHi;
+  if (MF->getTarget().isPositionIndependent()) {
+    const auto &STI = MF->getSubtarget<RISCVSubtarget>();
+    SecondOpcode = STI.is64Bit() ? RISCV::LD : RISCV::LW;
+    FlagsHi = RISCVII::MO_GOT_HI;
+  } else {
+    SecondOpcode = RISCV::ADDI;
+    FlagsHi = RISCVII::MO_PCREL_HI;
+  }
+  return expandAuipcInstPair(MBB, MBBI, NextMBBI, FlagsHi, SecondOpcode);
+}
+
+bool RISCVExpandPseudo::expandLoadTLSIEAddress(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI) {
+  MachineFunction *MF = MBB.getParent();
+
+  const auto &STI = MF->getSubtarget<RISCVSubtarget>();
+  unsigned SecondOpcode = STI.is64Bit() ? RISCV::LD : RISCV::LW;
+  return expandAuipcInstPair(MBB, MBBI, NextMBBI, RISCVII::MO_TLS_GOT_HI,
+                             SecondOpcode);
+}
+
+bool RISCVExpandPseudo::expandLoadTLSGDAddress(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI) {
+  return expandAuipcInstPair(MBB, MBBI, NextMBBI, RISCVII::MO_TLS_GD_HI,
+                             RISCV::ADDI);
+}
+
 } // end of anonymous namespace
 
 INITIALIZE_PASS(RISCVExpandPseudo, "riscv-expand-pseudo",
diff --git a/lib/Target/RISCV/RISCVFrameLowering.cpp b/lib/Target/RISCV/RISCVFrameLowering.cpp
index 74417899c8da..32c3b9684d2c 100644
--- a/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1,9 +1,8 @@
 //===-- RISCVFrameLowering.cpp - RISCV Frame Information ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,6 +18,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MCDwarf.h"
 
 using namespace llvm;
 
@@ -97,6 +97,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
 
   MachineFrameInfo &MFI = MF.getFrameInfo();
   auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
+  const RISCVRegisterInfo *RI = STI.getRegisterInfo();
+  const RISCVInstrInfo *TII = STI.getInstrInfo();
   MachineBasicBlock::iterator MBBI = MBB.begin();
 
   unsigned FPReg = getFPReg(STI);
@@ -120,6 +122,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
   // Allocate space on the stack if necessary.
   adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup);
 
+  // Emit ".cfi_def_cfa_offset StackSize"
+  unsigned CFIIndex = MF.addFrameInst(
+      MCCFIInstruction::createDefCfaOffset(nullptr, -StackSize));
+  BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+      .addCFIIndex(CFIIndex);
+
   // The frame pointer is callee-saved, and code has been generated for us to
   // save it to the stack. We need to skip over the storing of callee-saved
   // registers as the frame pointer must be modified after it has been saved
@@ -129,10 +137,28 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
   std::advance(MBBI, CSI.size());
 
+  // Iterate over list of callee-saved registers and emit .cfi_offset
+  // directives.
+  for (const auto &Entry : CSI) {
+    int64_t Offset = MFI.getObjectOffset(Entry.getFrameIdx());
+    unsigned Reg = Entry.getReg();
+    unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+        nullptr, RI->getDwarfRegNum(Reg, true), Offset));
+    BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex);
+  }
+
   // Generate new FP.
-  if (hasFP(MF))
+  if (hasFP(MF)) {
     adjustReg(MBB, MBBI, DL, FPReg, SPReg,
               StackSize - RVFI->getVarArgsSaveSize(), MachineInstr::FrameSetup);
+
+    // Emit ".cfi_def_cfa $fp, 0"
+    unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
+        nullptr, RI->getDwarfRegNum(FPReg, true), 0));
+    BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex);
+  }
 }
 
 void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
@@ -142,6 +168,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
   MachineFrameInfo &MFI = MF.getFrameInfo();
   auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
   DebugLoc DL = MBBI->getDebugLoc();
+  const RISCVInstrInfo *TII = STI.getInstrInfo();
   unsigned FPReg = getFPReg(STI);
   unsigned SPReg = getSPReg(STI);
 
@@ -151,19 +178,58 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
   auto LastFrameDestroy = std::prev(MBBI, MFI.getCalleeSavedInfo().size());
 
   uint64_t StackSize = MFI.getStackSize();
+  uint64_t FPOffset = StackSize - RVFI->getVarArgsSaveSize();
 
   // Restore the stack pointer using the value of the frame pointer. Only
   // necessary if the stack pointer was modified, meaning the stack size is
   // unknown.
   if (RI->needsStackRealignment(MF) || MFI.hasVarSizedObjects()) {
     assert(hasFP(MF) && "frame pointer should not have been eliminated");
-    adjustReg(MBB, LastFrameDestroy, DL, SPReg, FPReg,
-              -StackSize + RVFI->getVarArgsSaveSize(),
+    adjustReg(MBB, LastFrameDestroy, DL, SPReg, FPReg, -FPOffset,
               MachineInstr::FrameDestroy);
   }
 
+  if (hasFP(MF)) {
+    // To find the instruction restoring FP from stack.
+    for (auto &I = LastFrameDestroy; I != MBBI; ++I) {
+      if (I->mayLoad() && I->getOperand(0).isReg()) {
+        unsigned DestReg = I->getOperand(0).getReg();
+        if (DestReg == FPReg) {
+          // If there is frame pointer, after restoring $fp registers, we
+          // need adjust CFA to ($sp - FPOffset).
+          // Emit ".cfi_def_cfa $sp, -FPOffset"
+          unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
+              nullptr, RI->getDwarfRegNum(SPReg, true), -FPOffset));
+          BuildMI(MBB, std::next(I), DL,
+                  TII->get(TargetOpcode::CFI_INSTRUCTION))
+              .addCFIIndex(CFIIndex);
+          break;
+        }
+      }
+    }
+  }
+
+  // Add CFI directives for callee-saved registers.
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+  // Iterate over list of callee-saved registers and emit .cfi_restore
+  // directives.
+  for (const auto &Entry : CSI) {
+    unsigned Reg = Entry.getReg();
+    unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore(
+        nullptr, RI->getDwarfRegNum(Reg, true)));
+    BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex);
+  }
+
   // Deallocate stack
   adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy);
+
+  // After restoring $sp, we need to adjust CFA to $(sp + 0)
+  // Emit ".cfi_def_cfa_offset 0"
+  unsigned CFIIndex =
+      MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
+  BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+      .addCFIIndex(CFIIndex);
 }
 
 int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF,
diff --git a/lib/Target/RISCV/RISCVFrameLowering.h b/lib/Target/RISCV/RISCVFrameLowering.h
index ca653c2b9f17..0e045c3ff853 100644
--- a/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/lib/Target/RISCV/RISCVFrameLowering.h
@@ -1,9 +1,8 @@
 //===-- RISCVFrameLowering.h - Define frame lowering for RISCV -*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index aa80365feb83..d0a3af375a6d 100644
--- a/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -156,7 +155,15 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
         return;
       }
     }
+    break;
   }
+  case RISCVISD::READ_CYCLE_WIDE:
+    assert(!Subtarget->is64Bit() && "READ_CYCLE_WIDE is only used on riscv32");
+
+    ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ReadCycleWide, DL, MVT::i32,
+                                             MVT::i32, MVT::Other,
+                                             Node->getOperand(0)));
+    return;
   }
 
   // Select the default instruction.
diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp
index 508dcbd009ed..ce7b85911ab6 100644
--- a/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1,9 +1,8 @@
 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,6 +17,8 @@
 #include "RISCVRegisterInfo.h"
 #include "RISCVSubtarget.h"
 #include "RISCVTargetMachine.h"
+#include "Utils/RISCVMatInt.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -43,6 +44,24 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                                          const RISCVSubtarget &STI)
     : TargetLowering(TM), Subtarget(STI) {
 
+  if (Subtarget.isRV32E())
+    report_fatal_error("Codegen not yet implemented for RV32E");
+
+  RISCVABI::ABI ABI = Subtarget.getTargetABI();
+  assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
+
+  switch (ABI) {
+  default:
+    report_fatal_error("Don't know how to lower this ABI");
+  case RISCVABI::ABI_ILP32:
+  case RISCVABI::ABI_ILP32F:
+  case RISCVABI::ABI_ILP32D:
+  case RISCVABI::ABI_LP64:
+  case RISCVABI::ABI_LP64F:
+  case RISCVABI::ABI_LP64D:
+    break;
+  }
+
   MVT XLenVT = Subtarget.getXLenVT();
 
   // Set up the register classes.
@@ -81,10 +100,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
 
   if (Subtarget.is64Bit()) {
-    setTargetDAGCombine(ISD::SHL);
-    setTargetDAGCombine(ISD::SRL);
-    setTargetDAGCombine(ISD::SRA);
-    setTargetDAGCombine(ISD::ANY_EXTEND);
+    setOperationAction(ISD::SHL, MVT::i32, Custom);
+    setOperationAction(ISD::SRA, MVT::i32, Custom);
+    setOperationAction(ISD::SRL, MVT::i32, Custom);
   }
 
   if (!Subtarget.hasStdExtM()) {
@@ -97,14 +115,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::UREM, XLenVT, Expand);
   }
 
+  if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
+    setOperationAction(ISD::SDIV, MVT::i32, Custom);
+    setOperationAction(ISD::UDIV, MVT::i32, Custom);
+    setOperationAction(ISD::UREM, MVT::i32, Custom);
+  }
+
   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
 
-  setOperationAction(ISD::SHL_PARTS, XLenVT, Expand);
-  setOperationAction(ISD::SRL_PARTS, XLenVT, Expand);
-  setOperationAction(ISD::SRA_PARTS, XLenVT, Expand);
+  setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
+  setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
+  setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
 
   setOperationAction(ISD::ROTL, XLenVT, Expand);
   setOperationAction(ISD::ROTR, XLenVT, Expand);
@@ -114,9 +138,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::CTPOP, XLenVT, Expand);
 
   ISD::CondCode FPCCToExtend[] = {
-      ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETO,   ISD::SETUEQ,
-      ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE,
-      ISD::SETGT,  ISD::SETGE,  ISD::SETNE};
+      ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
+      ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
+      ISD::SETGE,  ISD::SETNE};
 
   ISD::NodeType FPOpToExtend[] = {
       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM};
@@ -133,6 +157,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       setOperationAction(Op, MVT::f32, Expand);
   }
 
+  if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
+    setOperationAction(ISD::BITCAST, MVT::i32, Custom);
+
   if (Subtarget.hasStdExtD()) {
     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
@@ -151,6 +178,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
 
+  setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
+
+  // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
+  // Unfortunately this can't be determined just from the ISA naming string.
+  setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
+                     Subtarget.is64Bit() ? Legal : Custom);
+
   if (Subtarget.hasStdExtA()) {
     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
     setMinCmpXchgSizeInBits(32);
@@ -276,6 +310,11 @@ bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
 }
 
+bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
+  return (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
+         (VT == MVT::f64 && Subtarget.hasStdExtD());
+}
+
 // Changes the condition code and swaps operands if necessary, so the SetCC
 // operation matches one of the comparisons supported directly in the RISC-V
 // ISA.
@@ -326,6 +365,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     return lowerBlockAddress(Op, DAG);
   case ISD::ConstantPool:
     return lowerConstantPool(Op, DAG);
+  case ISD::GlobalTLSAddress:
+    return lowerGlobalTLSAddress(Op, DAG);
   case ISD::SELECT:
     return lowerSELECT(Op, DAG);
   case ISD::VASTART:
@@ -334,6 +375,81 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     return lowerFRAMEADDR(Op, DAG);
   case ISD::RETURNADDR:
     return lowerRETURNADDR(Op, DAG);
+  case ISD::SHL_PARTS:
+    return lowerShiftLeftParts(Op, DAG);
+  case ISD::SRA_PARTS:
+    return lowerShiftRightParts(Op, DAG, true);
+  case ISD::SRL_PARTS:
+    return lowerShiftRightParts(Op, DAG, false);
+  case ISD::BITCAST: {
+    assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() &&
+           "Unexpected custom legalisation");
+    SDLoc DL(Op);
+    SDValue Op0 = Op.getOperand(0);
+    if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32)
+      return SDValue();
+    SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
+    SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
+    return FPConv;
+  }
+  }
+}
+
+static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
+                             SelectionDAG &DAG, unsigned Flags) {
+  return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
+}
+
+static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
+                             SelectionDAG &DAG, unsigned Flags) {
+  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
+                                   Flags);
+}
+
+static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
+                             SelectionDAG &DAG, unsigned Flags) {
+  return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
+                                   N->getOffset(), Flags);
+}
+
+template <class NodeTy>
+SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
+                                     bool IsLocal) const {
+  SDLoc DL(N);
+  EVT Ty = getPointerTy(DAG.getDataLayout());
+
+  if (isPositionIndependent()) {
+    SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
+    if (IsLocal)
+      // Use PC-relative addressing to access the symbol. This generates the
+      // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
+      // %pcrel_lo(auipc)).
+      return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
+
+    // Use PC-relative addressing to access the GOT for this symbol, then load
+    // the address from the GOT. This generates the pattern (PseudoLA sym),
+    // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
+    return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
+  }
+
+  switch (getTargetMachine().getCodeModel()) {
+  default:
+    report_fatal_error("Unsupported code model for lowering");
+  case CodeModel::Small: {
+    // Generate a sequence for accessing addresses within the first 2 GiB of
+    // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
+    SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
+    SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
+    SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
+    return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
+  }
+  case CodeModel::Medium: {
+    // Generate a sequence for accessing addresses within any 2GiB range within
+    // the address space. This generates the pattern (PseudoLLA sym), which
+    // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
+    SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
+    return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
+  }
   }
 }
 
@@ -342,67 +458,145 @@ SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
   SDLoc DL(Op);
   EVT Ty = Op.getValueType();
   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
-  const GlobalValue *GV = N->getGlobal();
   int64_t Offset = N->getOffset();
   MVT XLenVT = Subtarget.getXLenVT();
 
-  if (isPositionIndependent())
-    report_fatal_error("Unable to lowerGlobalAddress");
+  const GlobalValue *GV = N->getGlobal();
+  bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
+  SDValue Addr = getAddr(N, DAG, IsLocal);
+
   // In order to maximise the opportunity for common subexpression elimination,
   // emit a separate ADD node for the global address offset instead of folding
   // it in the global address node. Later peephole optimisations may choose to
   // fold it back in when profitable.
-  SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI);
-  SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO);
-  SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0);
-  SDValue MNLo =
-    SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0);
   if (Offset != 0)
-    return DAG.getNode(ISD::ADD, DL, Ty, MNLo,
+    return DAG.getNode(ISD::ADD, DL, Ty, Addr,
                        DAG.getConstant(Offset, DL, XLenVT));
-  return MNLo;
+  return Addr;
 }
 
 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
                                                SelectionDAG &DAG) const {
-  SDLoc DL(Op);
-  EVT Ty = Op.getValueType();
   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
-  const BlockAddress *BA = N->getBlockAddress();
-  int64_t Offset = N->getOffset();
-
-  if (isPositionIndependent())
-    report_fatal_error("Unable to lowerBlockAddress");
 
-  SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI);
-  SDValue BALo = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_LO);
-  SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, BAHi), 0);
-  SDValue MNLo =
-    SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, BALo), 0);
-  return MNLo;
+  return getAddr(N, DAG);
 }
 
 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
                                                SelectionDAG &DAG) const {
+  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+
+  return getAddr(N, DAG);
+}
+
+SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
+                                              SelectionDAG &DAG,
+                                              bool UseGOT) const {
+  SDLoc DL(N);
+  EVT Ty = getPointerTy(DAG.getDataLayout());
+  const GlobalValue *GV = N->getGlobal();
+  MVT XLenVT = Subtarget.getXLenVT();
+
+  if (UseGOT) {
+    // Use PC-relative addressing to access the GOT for this TLS symbol, then
+    // load the address from the GOT and add the thread pointer. This generates
+    // the pattern (PseudoLA_TLS_IE sym), which expands to
+    // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
+    SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
+    SDValue Load =
+        SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
+
+    // Add the thread pointer.
+    SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
+    return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
+  }
+
+  // Generate a sequence for accessing the address relative to the thread
+  // pointer, with the appropriate adjustment for the thread pointer offset.
+  // This generates the pattern
+  // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
+  SDValue AddrHi =
+      DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
+  SDValue AddrAdd =
+      DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
+  SDValue AddrLo =
+      DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
+
+  SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
+  SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
+  SDValue MNAdd = SDValue(
+      DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
+      0);
+  return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
+}
+
+SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
+                                               SelectionDAG &DAG) const {
+  SDLoc DL(N);
+  EVT Ty = getPointerTy(DAG.getDataLayout());
+  IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
+  const GlobalValue *GV = N->getGlobal();
+
+  // Use a PC-relative addressing mode to access the global dynamic GOT address.
+  // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
+  // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
+  SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
+  SDValue Load =
+      SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
+
+  // Prepare argument list to generate call.
+  ArgListTy Args;
+  ArgListEntry Entry;
+  Entry.Node = Load;
+  Entry.Ty = CallTy;
+  Args.push_back(Entry);
+
+  // Setup call to __tls_get_addr.
+  TargetLowering::CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(DL)
+      .setChain(DAG.getEntryNode())
+      .setLibCallee(CallingConv::C, CallTy,
+                    DAG.getExternalSymbol("__tls_get_addr", Ty),
+                    std::move(Args));
+
+  return LowerCallTo(CLI).first;
+}
+
+SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
+                                                   SelectionDAG &DAG) const {
   SDLoc DL(Op);
   EVT Ty = Op.getValueType();
-  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
-  const Constant *CPA = N->getConstVal();
+  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
   int64_t Offset = N->getOffset();
-  unsigned Alignment = N->getAlignment();
-
-  if (!isPositionIndependent()) {
-    SDValue CPAHi =
-        DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_HI);
-    SDValue CPALo =
-        DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_LO);
-    SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, CPAHi), 0);
-    SDValue MNLo =
-        SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, CPALo), 0);
-    return MNLo;
-  } else {
-    report_fatal_error("Unable to lowerConstantPool");
+  MVT XLenVT = Subtarget.getXLenVT();
+
+  // Non-PIC TLS lowering should always use the LocalExec model.
+  TLSModel::Model Model = isPositionIndependent()
+                              ? getTargetMachine().getTLSModel(N->getGlobal())
+                              : TLSModel::LocalExec;
+
+  SDValue Addr;
+  switch (Model) {
+  case TLSModel::LocalExec:
+    Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
+    break;
+  case TLSModel::InitialExec:
+    Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
+    break;
+  case TLSModel::LocalDynamic:
+  case TLSModel::GeneralDynamic:
+    Addr = getDynamicTLSAddr(N, DAG);
+    break;
   }
+
+  // In order to maximise the opportunity for common subexpression elimination,
+  // emit a separate ADD node for the global address offset instead of folding
+  // it in the global address node. Later peephole optimisations may choose to
+  // fold it back in when profitable.
+  if (Offset != 0)
+    return DAG.getNode(ISD::ADD, DL, Ty, Addr,
+                       DAG.getConstant(Offset, DL, XLenVT));
+  return Addr;
 }
 
 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
@@ -513,29 +707,184 @@ SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
 }
 
-// Return true if the given node is a shift with a non-constant shift amount.
-static bool isVariableShift(SDValue Val) {
-  switch (Val.getOpcode()) {
+SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  SDValue Lo = Op.getOperand(0);
+  SDValue Hi = Op.getOperand(1);
+  SDValue Shamt = Op.getOperand(2);
+  EVT VT = Lo.getValueType();
+
+  // if Shamt-XLEN < 0: // Shamt < XLEN
+  //   Lo = Lo << Shamt
+  //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
+  // else:
+  //   Lo = 0
+  //   Hi = Lo << (Shamt-XLEN)
+
+  SDValue Zero = DAG.getConstant(0, DL, VT);
+  SDValue One = DAG.getConstant(1, DL, VT);
+  SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
+  SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
+  SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
+  SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
+
+  SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
+  SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
+  SDValue ShiftRightLo =
+      DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
+  SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
+  SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
+  SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
+
+  SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
+
+  Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
+  Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
+
+  SDValue Parts[2] = {Lo, Hi};
+  return DAG.getMergeValues(Parts, DL);
+}
+
+SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
+                                                  bool IsSRA) const {
+  SDLoc DL(Op);
+  SDValue Lo = Op.getOperand(0);
+  SDValue Hi = Op.getOperand(1);
+  SDValue Shamt = Op.getOperand(2);
+  EVT VT = Lo.getValueType();
+
+  // SRA expansion:
+  //   if Shamt-XLEN < 0: // Shamt < XLEN
+  //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
+  //     Hi = Hi >>s Shamt
+  //   else:
+  //     Lo = Hi >>s (Shamt-XLEN);
+  //     Hi = Hi >>s (XLEN-1)
+  //
+  // SRL expansion:
+  //   if Shamt-XLEN < 0: // Shamt < XLEN
+  //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
+  //     Hi = Hi >>u Shamt
+  //   else:
+  //     Lo = Hi >>u (Shamt-XLEN);
+  //     Hi = 0;
+
+  unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
+
+  SDValue Zero = DAG.getConstant(0, DL, VT);
+  SDValue One = DAG.getConstant(1, DL, VT);
+  SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
+  SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
+  SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
+  SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
+
+  SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
+  SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
+  SDValue ShiftLeftHi =
+      DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
+  SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
+  SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
+  SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
+  SDValue HiFalse =
+      IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
+
+  SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
+
+  Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
+  Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
+
+  SDValue Parts[2] = {Lo, Hi};
+  return DAG.getMergeValues(Parts, DL);
+}
+
+// Returns the opcode of the target-specific SDNode that implements the 32-bit
+// form of the given Opcode.
+static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
+  switch (Opcode) {
   default:
-    return false;
+    llvm_unreachable("Unexpected opcode");
   case ISD::SHL:
+    return RISCVISD::SLLW;
   case ISD::SRA:
+    return RISCVISD::SRAW;
   case ISD::SRL:
-    return Val.getOperand(1).getOpcode() != ISD::Constant;
+    return RISCVISD::SRLW;
+  case ISD::SDIV:
+    return RISCVISD::DIVW;
+  case ISD::UDIV:
+    return RISCVISD::DIVUW;
+  case ISD::UREM:
+    return RISCVISD::REMUW;
   }
 }
 
-// Returns true if the given node is an sdiv, udiv, or urem with non-constant
-// operands.
-static bool isVariableSDivUDivURem(SDValue Val) {
-  switch (Val.getOpcode()) {
+// Converts the given 32-bit operation to a target-specific SelectionDAG node.
+// Because i32 isn't a legal type for RV64, these operations would otherwise
+// be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
+// later one because the fact the operation was originally of type i32 is
+// lost.
+static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) {
+  SDLoc DL(N);
+  RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
+  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+  SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
+  SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
+  // ReplaceNodeResults requires we maintain the same type for the return value.
+  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
+}
+
+void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
+                                             SmallVectorImpl<SDValue> &Results,
+                                             SelectionDAG &DAG) const {
+  SDLoc DL(N);
+  switch (N->getOpcode()) {
   default:
-    return false;
+    llvm_unreachable("Don't know how to custom type legalize this operation!");
+  case ISD::READCYCLECOUNTER: {
+    assert(!Subtarget.is64Bit() &&
+           "READCYCLECOUNTER only has custom type legalization on riscv32");
+
+    SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
+    SDValue RCW =
+        DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
+
+    Results.push_back(RCW);
+    Results.push_back(RCW.getValue(1));
+    Results.push_back(RCW.getValue(2));
+    break;
+  }
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+           "Unexpected custom legalisation");
+    if (N->getOperand(1).getOpcode() == ISD::Constant)
+      return;
+    Results.push_back(customLegalizeToWOp(N, DAG));
+    break;
   case ISD::SDIV:
   case ISD::UDIV:
   case ISD::UREM:
-    return Val.getOperand(0).getOpcode() != ISD::Constant &&
-           Val.getOperand(1).getOpcode() != ISD::Constant;
+    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+           Subtarget.hasStdExtM() && "Unexpected custom legalisation");
+    if (N->getOperand(0).getOpcode() == ISD::Constant ||
+        N->getOperand(1).getOpcode() == ISD::Constant)
+      return;
+    Results.push_back(customLegalizeToWOp(N, DAG));
+    break;
+  case ISD::BITCAST: {
+    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+           Subtarget.hasStdExtF() && "Unexpected custom legalisation");
+    SDLoc DL(N);
+    SDValue Op0 = N->getOperand(0);
+    if (Op0.getValueType() != MVT::f32)
+      return;
+    SDValue FPConv =
+        DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
+    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
+    break;
+  }
   }
 }
 
@@ -546,51 +895,225 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
   switch (N->getOpcode()) {
   default:
     break;
-  case ISD::SHL:
-  case ISD::SRL:
-  case ISD::SRA: {
-    assert(Subtarget.getXLen() == 64 && "Combine should be 64-bit only");
-    if (!DCI.isBeforeLegalize())
-      break;
-    SDValue RHS = N->getOperand(1);
-    if (N->getValueType(0) != MVT::i32 || RHS->getOpcode() == ISD::Constant ||
-        (RHS->getOpcode() == ISD::AssertZext &&
-         cast<VTSDNode>(RHS->getOperand(1))->getVT().getSizeInBits() <= 5))
-      break;
-    SDValue LHS = N->getOperand(0);
-    SDLoc DL(N);
-    SDValue NewRHS =
-        DAG.getNode(ISD::AssertZext, DL, RHS.getValueType(), RHS,
-                    DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 5)));
-    return DCI.CombineTo(
-        N, DAG.getNode(N->getOpcode(), DL, LHS.getValueType(), LHS, NewRHS));
-  }
-  case ISD::ANY_EXTEND: {
-    // If any-extending an i32 variable-length shift or sdiv/udiv/urem to i64,
-    // then instead sign-extend in order to increase the chance of being able
-    // to select the sllw/srlw/sraw/divw/divuw/remuw instructions.
-    SDValue Src = N->getOperand(0);
-    if (N->getValueType(0) != MVT::i64 || Src.getValueType() != MVT::i32)
-      break;
-    if (!isVariableShift(Src) &&
-        !(Subtarget.hasStdExtM() && isVariableSDivUDivURem(Src)))
-      break;
-    SDLoc DL(N);
-    return DCI.CombineTo(N, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src));
-  }
   case RISCVISD::SplitF64: {
+    SDValue Op0 = N->getOperand(0);
     // If the input to SplitF64 is just BuildPairF64 then the operation is
     // redundant. Instead, use BuildPairF64's operands directly.
+    if (Op0->getOpcode() == RISCVISD::BuildPairF64)
+      return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
+
+    SDLoc DL(N);
+
+    // It's cheaper to materialise two 32-bit integers than to load a double
+    // from the constant pool and transfer it to integer registers through the
+    // stack.
+    if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
+      APInt V = C->getValueAPF().bitcastToAPInt();
+      SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
+      SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
+      return DCI.CombineTo(N, Lo, Hi);
+    }
+
+    // This is a target-specific version of a DAGCombine performed in
+    // DAGCombiner::visitBITCAST. It performs the equivalent of:
+    // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
+    // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+    if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
+        !Op0.getNode()->hasOneUse())
+      break;
+    SDValue NewSplitF64 =
+        DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
+                    Op0.getOperand(0));
+    SDValue Lo = NewSplitF64.getValue(0);
+    SDValue Hi = NewSplitF64.getValue(1);
+    APInt SignBit = APInt::getSignMask(32);
+    if (Op0.getOpcode() == ISD::FNEG) {
+      SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
+                                  DAG.getConstant(SignBit, DL, MVT::i32));
+      return DCI.CombineTo(N, Lo, NewHi);
+    }
+    assert(Op0.getOpcode() == ISD::FABS);
+    SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
+                                DAG.getConstant(~SignBit, DL, MVT::i32));
+    return DCI.CombineTo(N, Lo, NewHi);
+  }
+  case RISCVISD::SLLW:
+  case RISCVISD::SRAW:
+  case RISCVISD::SRLW: {
+    // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
+    SDValue LHS = N->getOperand(0);
+    SDValue RHS = N->getOperand(1);
+    APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
+    APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
+    if ((SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI)) ||
+        (SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)))
+      return SDValue();
+    break;
+  }
+  case RISCVISD::FMV_X_ANYEXTW_RV64: {
+    SDLoc DL(N);
     SDValue Op0 = N->getOperand(0);
-    if (Op0->getOpcode() != RISCVISD::BuildPairF64)
+    // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
+    // conversion is unnecessary and can be replaced with an ANY_EXTEND
+    // of the FMV_W_X_RV64 operand.
+    if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
+      SDValue AExtOp =
+          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0.getOperand(0));
+      return DCI.CombineTo(N, AExtOp);
+    }
+
+    // This is a target-specific version of a DAGCombine performed in
+    // DAGCombiner::visitBITCAST. It performs the equivalent of:
+    // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
+    // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+    if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
+        !Op0.getNode()->hasOneUse())
       break;
-    return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
+    SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
+                                 Op0.getOperand(0));
+    APInt SignBit = APInt::getSignMask(32).sext(64);
+    if (Op0.getOpcode() == ISD::FNEG) {
+      return DCI.CombineTo(N,
+                           DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
+                                       DAG.getConstant(SignBit, DL, MVT::i64)));
+    }
+    assert(Op0.getOpcode() == ISD::FABS);
+    return DCI.CombineTo(N,
+                         DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
+                                     DAG.getConstant(~SignBit, DL, MVT::i64)));
   }
   }
 
   return SDValue();
 }
 
+bool RISCVTargetLowering::isDesirableToCommuteWithShift(
+    const SDNode *N, CombineLevel Level) const {
+  // The following folds are only desirable if `(OP _, c1 << c2)` can be
+  // materialised in fewer instructions than `(OP _, c1)`:
+  //
+  //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+  //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
+  SDValue N0 = N->getOperand(0);
+  EVT Ty = N0.getValueType();
+  if (Ty.isScalarInteger() &&
+      (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
+    auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+    auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    if (C1 && C2) {
+      APInt C1Int = C1->getAPIntValue();
+      APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
+
+      // We can materialise `c1 << c2` into an add immediate, so it's "free",
+      // and the combine should happen, to potentially allow further combines
+      // later.
+      if (isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
+        return true;
+
+      // We can materialise `c1` in an add immediate, so it's "free", and the
+      // combine should be prevented.
+      if (isLegalAddImmediate(C1Int.getSExtValue()))
+        return false;
+
+      // Neither constant will fit into an immediate, so find materialisation
+      // costs.
+      int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
+                                              Subtarget.is64Bit());
+      int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
+          ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
+
+      // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
+      // combine should be prevented.
+      if (C1Cost < ShiftedC1Cost)
+        return false;
+    }
+  }
+  return true;
+}
+
+unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
+    SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
+    unsigned Depth) const {
+  switch (Op.getOpcode()) {
+  default:
+    break;
+  case RISCVISD::SLLW:
+  case RISCVISD::SRAW:
+  case RISCVISD::SRLW:
+  case RISCVISD::DIVW:
+  case RISCVISD::DIVUW:
+  case RISCVISD::REMUW:
+    // TODO: As the result is sign-extended, this is conservatively correct. A
+    // more precise answer could be calculated for SRAW depending on known
+    // bits in the shift amount.
+    return 33;
+  }
+
+  return 1;
+}
+
+MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
+                                           MachineBasicBlock *BB) {
+  assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
+
+  // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
+  // Should the count have wrapped while it was being read, we need to try
+  // again.
+  // ...
+  // read:
+  // rdcycleh x3 # load high word of cycle
+  // rdcycle  x2 # load low word of cycle
+  // rdcycleh x4 # load high word of cycle
+  // bne x3, x4, read # check if high word reads match, otherwise try again
+  // ...
+
+  MachineFunction &MF = *BB->getParent();
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = ++BB->getIterator();
+
+  MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
+  MF.insert(It, LoopMBB);
+
+  MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
+  MF.insert(It, DoneMBB);
+
+  // Transfer the remainder of BB and its successor edges to DoneMBB.
+  DoneMBB->splice(DoneMBB->begin(), BB,
+                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
+  DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  BB->addSuccessor(LoopMBB);
+
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+  unsigned ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
+  unsigned LoReg = MI.getOperand(0).getReg();
+  unsigned HiReg = MI.getOperand(1).getReg();
+  DebugLoc DL = MI.getDebugLoc();
+
+  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
+      .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
+      .addReg(RISCV::X0);
+  BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
+      .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
+      .addReg(RISCV::X0);
+  BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
+      .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
+      .addReg(RISCV::X0);
+
+  BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
+      .addReg(HiReg)
+      .addReg(ReadAgainReg)
+      .addMBB(LoopMBB);
+
+  LoopMBB->addSuccessor(LoopMBB);
+  LoopMBB->addSuccessor(DoneMBB);
+
+  MI.eraseFromParent();
+
+  return DoneMBB;
+}
+
 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
                                              MachineBasicBlock *BB) {
   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
@@ -655,24 +1178,21 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
   return BB;
 }
 
-MachineBasicBlock *
-RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
-                                                 MachineBasicBlock *BB) const {
+static bool isSelectPseudo(MachineInstr &MI) {
   switch (MI.getOpcode()) {
   default:
-    llvm_unreachable("Unexpected instr type to insert");
+    return false;
   case RISCV::Select_GPR_Using_CC_GPR:
   case RISCV::Select_FPR32_Using_CC_GPR:
   case RISCV::Select_FPR64_Using_CC_GPR:
-    break;
-  case RISCV::BuildPairF64Pseudo:
-    return emitBuildPairF64Pseudo(MI, BB);
-  case RISCV::SplitF64Pseudo:
-    return emitSplitF64Pseudo(MI, BB);
+    return true;
   }
+}
 
-  // To "insert" a SELECT instruction, we actually have to insert the triangle
-  // control-flow pattern.  The incoming instruction knows the destination vreg
+static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
+                                           MachineBasicBlock *BB) {
+  // To "insert" Select_* instructions, we actually have to insert the triangle
+  // control-flow pattern.  The incoming instructions know the destination vreg
   // to set, the condition code register to branch on, the true/false values to
   // select between, and the condcode to use to select the appropriate branch.
   //
@@ -682,6 +1202,54 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   //     |  IfFalseMBB
   //     | /
   //    TailMBB
+  //
+  // When we find a sequence of selects we attempt to optimize their emission
+  // by sharing the control flow. Currently we only handle cases where we have
+  // multiple selects with the exact same condition (same LHS, RHS and CC).
+  // The selects may be interleaved with other instructions if the other
+  // instructions meet some requirements we deem safe:
+  // - They are debug instructions. Otherwise,
+  // - They do not have side-effects, do not access memory and their inputs do
+  //   not depend on the results of the select pseudo-instructions.
+  // The TrueV/FalseV operands of the selects cannot depend on the result of
+  // previous selects in the sequence.
+  // These conditions could be further relaxed. See the X86 target for a
+  // related approach and more information.
+  unsigned LHS = MI.getOperand(1).getReg();
+  unsigned RHS = MI.getOperand(2).getReg();
+  auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
+
+  SmallVector<MachineInstr *, 4> SelectDebugValues;
+  SmallSet<unsigned, 4> SelectDests;
+  SelectDests.insert(MI.getOperand(0).getReg());
+
+  MachineInstr *LastSelectPseudo = &MI;
+
+  for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
+       SequenceMBBI != E; ++SequenceMBBI) {
+    if (SequenceMBBI->isDebugInstr())
+      continue;
+    else if (isSelectPseudo(*SequenceMBBI)) {
+      if (SequenceMBBI->getOperand(1).getReg() != LHS ||
+          SequenceMBBI->getOperand(2).getReg() != RHS ||
+          SequenceMBBI->getOperand(3).getImm() != CC ||
+          SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
+          SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
+        break;
+      LastSelectPseudo = &*SequenceMBBI;
+      SequenceMBBI->collectDebugValues(SelectDebugValues);
+      SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
+    } else {
+      if (SequenceMBBI->hasUnmodeledSideEffects() ||
+          SequenceMBBI->mayLoadOrStore())
+        break;
+      if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
+            return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
+          }))
+        break;
+    }
+  }
+
   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   DebugLoc DL = MI.getDebugLoc();
@@ -694,20 +1262,23 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
 
   F->insert(I, IfFalseMBB);
   F->insert(I, TailMBB);
-  // Move all remaining instructions to TailMBB.
-  TailMBB->splice(TailMBB->begin(), HeadMBB,
-                  std::next(MachineBasicBlock::iterator(MI)), HeadMBB->end());
+
+  // Transfer debug instructions associated with the selects to TailMBB.
+  for (MachineInstr *DebugInstr : SelectDebugValues) {
+    TailMBB->push_back(DebugInstr->removeFromParent());
+  }
+
+  // Move all instructions after the sequence to TailMBB.
+  TailMBB->splice(TailMBB->end(), HeadMBB,
+                  std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
   // Update machine-CFG edges by transferring all successors of the current
-  // block to the new block which will contain the Phi node for the select.
+  // block to the new block which will contain the Phi nodes for the selects.
   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
   // Set the successors for HeadMBB.
   HeadMBB->addSuccessor(IfFalseMBB);
   HeadMBB->addSuccessor(TailMBB);
 
   // Insert appropriate branch.
-  unsigned LHS = MI.getOperand(1).getReg();
-  unsigned RHS = MI.getOperand(2).getReg();
-  auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
 
   BuildMI(HeadMBB, DL, TII.get(Opcode))
@@ -718,18 +1289,50 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   // IfFalseMBB just falls through to TailMBB.
   IfFalseMBB->addSuccessor(TailMBB);
 
-  // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
-  BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI),
-          MI.getOperand(0).getReg())
-      .addReg(MI.getOperand(4).getReg())
-      .addMBB(HeadMBB)
-      .addReg(MI.getOperand(5).getReg())
-      .addMBB(IfFalseMBB);
+  // Create PHIs for all of the select pseudo-instructions.
+  auto SelectMBBI = MI.getIterator();
+  auto SelectEnd = std::next(LastSelectPseudo->getIterator());
+  auto InsertionPoint = TailMBB->begin();
+  while (SelectMBBI != SelectEnd) {
+    auto Next = std::next(SelectMBBI);
+    if (isSelectPseudo(*SelectMBBI)) {
+      // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
+      BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
+              TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
+          .addReg(SelectMBBI->getOperand(4).getReg())
+          .addMBB(HeadMBB)
+          .addReg(SelectMBBI->getOperand(5).getReg())
+          .addMBB(IfFalseMBB);
+      SelectMBBI->eraseFromParent();
+    }
+    SelectMBBI = Next;
+  }
 
-  MI.eraseFromParent(); // The pseudo instruction is gone now.
+  F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
   return TailMBB;
 }
 
+MachineBasicBlock *
+RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
+                                                 MachineBasicBlock *BB) const {
+  switch (MI.getOpcode()) {
+  default:
+    llvm_unreachable("Unexpected instr type to insert");
+  case RISCV::ReadCycleWide:
+    assert(!Subtarget.is64Bit() &&
+           "ReadCycleWrite is only to be used on riscv32");
+    return emitReadCycleWidePseudo(MI, BB);
+  case RISCV::Select_GPR_Using_CC_GPR:
+  case RISCV::Select_FPR32_Using_CC_GPR:
+  case RISCV::Select_FPR64_Using_CC_GPR:
+    return emitSelectPseudo(MI, BB);
+  case RISCV::BuildPairF64Pseudo:
+    return emitBuildPairF64Pseudo(MI, BB);
+  case RISCV::SplitF64Pseudo:
+    return emitSplitF64Pseudo(MI, BB);
+  }
+}
+
 // Calling Convention Implementation.
 // The expectations for frontend ABI lowering vary from target to target.
 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
@@ -759,6 +1362,14 @@ static const MCPhysReg ArgGPRs[] = {
   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
 };
+static const MCPhysReg ArgFPR32s[] = {
+  RISCV::F10_32, RISCV::F11_32, RISCV::F12_32, RISCV::F13_32,
+  RISCV::F14_32, RISCV::F15_32, RISCV::F16_32, RISCV::F17_32
+};
+static const MCPhysReg ArgFPR64s[] = {
+  RISCV::F10_64, RISCV::F11_64, RISCV::F12_64, RISCV::F13_64,
+  RISCV::F14_64, RISCV::F15_64, RISCV::F16_64, RISCV::F17_64
+};
 
 // Pass a 2*XLEN argument that has been split into two XLEN values through
 // registers or the stack as necessary.
@@ -799,22 +1410,59 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
 }
 
 // Implements the RISC-V calling convention. Returns true upon failure.
-static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,
-                     CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
-                     CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) {
+static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
+                     MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
+                     ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
+                     bool IsRet, Type *OrigTy) {
   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
   assert(XLen == 32 || XLen == 64);
   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
-  if (ValVT == MVT::f32) {
-    LocVT = MVT::i32;
-    LocInfo = CCValAssign::BCvt;
-  }
 
   // Any return value split in to more than two values can't be returned
   // directly.
   if (IsRet && ValNo > 1)
     return true;
 
+  // UseGPRForF32 if targeting one of the soft-float ABIs, if passing a
+  // variadic argument, or if no F32 argument registers are available.
+  bool UseGPRForF32 = true;
+  // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
+  // variadic argument, or if no F64 argument registers are available.
+  bool UseGPRForF64 = true;
+
+  switch (ABI) {
+  default:
+    llvm_unreachable("Unexpected ABI");
+  case RISCVABI::ABI_ILP32:
+  case RISCVABI::ABI_LP64:
+    break;
+  case RISCVABI::ABI_ILP32F:
+  case RISCVABI::ABI_LP64F:
+    UseGPRForF32 = !IsFixed;
+    break;
+  case RISCVABI::ABI_ILP32D:
+  case RISCVABI::ABI_LP64D:
+    UseGPRForF32 = !IsFixed;
+    UseGPRForF64 = !IsFixed;
+    break;
+  }
+
+  if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s))
+    UseGPRForF32 = true;
+  if (State.getFirstUnallocated(ArgFPR64s) == array_lengthof(ArgFPR64s))
+    UseGPRForF64 = true;
+
+  // From this point on, rely on UseGPRForF32, UseGPRForF64 and similar local
+  // variables rather than directly checking against the target ABI.
+
+  if (UseGPRForF32 && ValVT == MVT::f32) {
+    LocVT = XLenVT;
+    LocInfo = CCValAssign::BCvt;
+  } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
+    LocVT = MVT::i64;
+    LocInfo = CCValAssign::BCvt;
+  }
+
   // If this is a variadic argument, the RISC-V calling convention requires
   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
@@ -838,8 +1486,9 @@ static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,
   assert(PendingLocs.size() == PendingArgFlags.size() &&
          "PendingLocs and PendingArgFlags out of sync");
 
-  // Handle passing f64 on RV32D with a soft float ABI.
-  if (XLen == 32 && ValVT == MVT::f64) {
+  // Handle passing f64 on RV32D with a soft float ABI or when floating point
+  // registers are exhausted.
+  if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
            "Can't lower f64 if it is split");
     // Depending on available argument GPRS, f64 may be passed in a pair of
@@ -888,7 +1537,13 @@ static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,
   }
 
   // Allocate to a register if possible, or else a stack slot.
-  unsigned Reg = State.AllocateReg(ArgGPRs);
+  unsigned Reg;
+  if (ValVT == MVT::f32 && !UseGPRForF32)
+    Reg = State.AllocateReg(ArgFPR32s, ArgFPR64s);
+  else if (ValVT == MVT::f64 && !UseGPRForF64)
+    Reg = State.AllocateReg(ArgFPR64s, ArgFPR32s);
+  else
+    Reg = State.AllocateReg(ArgGPRs);
   unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8);
 
   // If we reach this point and PendingLocs is non-empty, we must be at the
@@ -909,15 +1564,17 @@ static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,
     return false;
   }
 
-  assert(LocVT == XLenVT && "Expected an XLenVT at this stage");
+  assert((!UseGPRForF32 || !UseGPRForF64 || LocVT == XLenVT) &&
+         "Expected an XLenVT at this stage");
 
   if (Reg) {
     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
     return false;
   }
 
-  if (ValVT == MVT::f32) {
-    LocVT = MVT::f32;
+  // When an f32 or f64 is passed on the stack, no bit-conversion is needed.
+  if (ValVT == MVT::f32 || ValVT == MVT::f64) {
+    LocVT = ValVT;
     LocInfo = CCValAssign::Full;
   }
   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
@@ -940,7 +1597,8 @@ void RISCVTargetLowering::analyzeInputArgs(
     else if (Ins[i].isOrigArg())
       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
 
-    if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full,
+    RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
+    if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
                  ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) {
       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
                         << EVT(ArgVT).getEVTString() << '\n');
@@ -960,7 +1618,8 @@ void RISCVTargetLowering::analyzeOutputArgs(
     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
 
-    if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full,
+    RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
+    if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
                         << EVT(ArgVT).getEVTString() << "\n");
@@ -979,6 +1638,10 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
   case CCValAssign::Full:
     break;
   case CCValAssign::BCvt:
+    if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
+      Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
+      break;
+    }
     Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
     break;
   }
@@ -993,8 +1656,24 @@ static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
   EVT LocVT = VA.getLocVT();
   SDValue Val;
+  const TargetRegisterClass *RC;
+
+  switch (LocVT.getSimpleVT().SimpleTy) {
+  default:
+    llvm_unreachable("Unexpected register type");
+  case MVT::i32:
+  case MVT::i64:
+    RC = &RISCV::GPRRegClass;
+    break;
+  case MVT::f32:
+    RC = &RISCV::FPR32RegClass;
+    break;
+  case MVT::f64:
+    RC = &RISCV::FPR64RegClass;
+    break;
+  }
 
-  unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
+  unsigned VReg = RegInfo.createVirtualRegister(RC);
   RegInfo.addLiveIn(VA.getLocReg(), VReg);
   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
 
@@ -1014,6 +1693,10 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
   case CCValAssign::Full:
     break;
   case CCValAssign::BCvt:
+    if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
+      Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
+      break;
+    }
     Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
     break;
   }
@@ -1040,6 +1723,7 @@ static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
     llvm_unreachable("Unexpected CCValAssign::LocInfo");
   case CCValAssign::Full:
   case CCValAssign::Indirect:
+  case CCValAssign::BCvt:
     ExtType = ISD::NON_EXTLOAD;
     break;
   }
@@ -1227,12 +1911,12 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
   return Chain;
 }
 
-/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// isEligibleForTailCallOptimization - Check whether the call is eligible
 /// for tail call optimization.
 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
-bool RISCVTargetLowering::IsEligibleForTailCallOptimization(
-  CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
-  const SmallVector<CCValAssign, 16> &ArgLocs) const {
+bool RISCVTargetLowering::isEligibleForTailCallOptimization(
+    CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
+    const SmallVector<CCValAssign, 16> &ArgLocs) const {
 
   auto &Callee = CLI.Callee;
   auto CalleeCC = CLI.CallConv;
@@ -1335,8 +2019,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   // Check if it's really possible to do a tail call.
   if (IsTailCall)
-    IsTailCall = IsEligibleForTailCallOptimization(ArgCCInfo, CLI, MF,
-                                                   ArgLocs);
+    IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
 
   if (IsTailCall)
     ++NumTailCalls;
@@ -1482,9 +2165,21 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
   // split it and then direct call can be matched by PseudoCALL.
   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
-    Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, 0);
+    const GlobalValue *GV = S->getGlobal();
+
+    unsigned OpFlags = RISCVII::MO_CALL;
+    if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
+      OpFlags = RISCVII::MO_PLT;
+
+    Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
-    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, 0);
+    unsigned OpFlags = RISCVII::MO_CALL;
+
+    if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
+                                                 nullptr))
+      OpFlags = RISCVII::MO_PLT;
+
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
   }
 
   // The first call operand is the chain and the second is the target address.
@@ -1567,8 +2262,9 @@ bool RISCVTargetLowering::CanLowerReturn(
   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
     MVT VT = Outs[i].VT;
     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
-    if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags,
-                 CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr))
+    RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
+    if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
+                 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr))
       return false;
   }
   return true;
@@ -1679,6 +2375,24 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
     return "RISCVISD::SplitF64";
   case RISCVISD::TAIL:
     return "RISCVISD::TAIL";
+  case RISCVISD::SLLW:
+    return "RISCVISD::SLLW";
+  case RISCVISD::SRAW:
+    return "RISCVISD::SRAW";
+  case RISCVISD::SRLW:
+    return "RISCVISD::SRLW";
+  case RISCVISD::DIVW:
+    return "RISCVISD::DIVW";
+  case RISCVISD::DIVUW:
+    return "RISCVISD::DIVUW";
+  case RISCVISD::REMUW:
+    return "RISCVISD::REMUW";
+  case RISCVISD::FMV_W_X_RV64:
+    return "RISCVISD::FMV_W_X_RV64";
+  case RISCVISD::FMV_X_ANYEXTW_RV64:
+    return "RISCVISD::FMV_X_ANYEXTW_RV64";
+  case RISCVISD::READ_CYCLE_WIDE:
+    return "RISCVISD::READ_CYCLE_WIDE";
   }
   return nullptr;
 }
@@ -1701,6 +2415,44 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 }
 
+void RISCVTargetLowering::LowerAsmOperandForConstraint(
+    SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
+    SelectionDAG &DAG) const {
+  // Currently only support length 1 constraints.
+  if (Constraint.length() == 1) {
+    switch (Constraint[0]) {
+    case 'I':
+      // Validate & create a 12-bit signed immediate operand.
+      if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+        uint64_t CVal = C->getSExtValue();
+        if (isInt<12>(CVal))
+          Ops.push_back(
+              DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
+      }
+      return;
+    case 'J':
+      // Validate & create an integer zero operand.
+      if (auto *C = dyn_cast<ConstantSDNode>(Op))
+        if (C->getZExtValue() == 0)
+          Ops.push_back(
+              DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
+      return;
+    case 'K':
+      // Validate & create a 5-bit unsigned immediate operand.
+      if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+        uint64_t CVal = C->getZExtValue();
+        if (isUInt<5>(CVal))
+          Ops.push_back(
+              DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
+      }
+      return;
+    default:
+      break;
+    }
+  }
+  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
                                                    Instruction *Inst,
                                                    AtomicOrdering Ord) const {
@@ -1721,6 +2473,12 @@ Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
 
 TargetLowering::AtomicExpansionKind
 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+  // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
+  // point operations can't be used in an lr/sc sequence without breaking the
+  // forward-progress guarantee.
+  if (AI->isFloatingPointOperation())
+    return AtomicExpansionKind::CmpXChg;
+
   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
   if (Size == 8 || Size == 16)
     return AtomicExpansionKind::MaskedIntrinsic;
@@ -1728,37 +2486,74 @@ RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
 }
 
 static Intrinsic::ID
-getIntrinsicForMaskedAtomicRMWBinOp32(AtomicRMWInst::BinOp BinOp) {
-  switch (BinOp) {
-  default:
-    llvm_unreachable("Unexpected AtomicRMW BinOp");
-  case AtomicRMWInst::Xchg:
-    return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
-  case AtomicRMWInst::Add:
-    return Intrinsic::riscv_masked_atomicrmw_add_i32;
-  case AtomicRMWInst::Sub:
-    return Intrinsic::riscv_masked_atomicrmw_sub_i32;
-  case AtomicRMWInst::Nand:
-    return Intrinsic::riscv_masked_atomicrmw_nand_i32;
-  case AtomicRMWInst::Max:
-    return Intrinsic::riscv_masked_atomicrmw_max_i32;
-  case AtomicRMWInst::Min:
-    return Intrinsic::riscv_masked_atomicrmw_min_i32;
-  case AtomicRMWInst::UMax:
-    return Intrinsic::riscv_masked_atomicrmw_umax_i32;
-  case AtomicRMWInst::UMin:
-    return Intrinsic::riscv_masked_atomicrmw_umin_i32;
+getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
+  if (XLen == 32) {
+    switch (BinOp) {
+    default:
+      llvm_unreachable("Unexpected AtomicRMW BinOp");
+    case AtomicRMWInst::Xchg:
+      return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
+    case AtomicRMWInst::Add:
+      return Intrinsic::riscv_masked_atomicrmw_add_i32;
+    case AtomicRMWInst::Sub:
+      return Intrinsic::riscv_masked_atomicrmw_sub_i32;
+    case AtomicRMWInst::Nand:
+      return Intrinsic::riscv_masked_atomicrmw_nand_i32;
+    case AtomicRMWInst::Max:
+      return Intrinsic::riscv_masked_atomicrmw_max_i32;
+    case AtomicRMWInst::Min:
+      return Intrinsic::riscv_masked_atomicrmw_min_i32;
+    case AtomicRMWInst::UMax:
+      return Intrinsic::riscv_masked_atomicrmw_umax_i32;
+    case AtomicRMWInst::UMin:
+      return Intrinsic::riscv_masked_atomicrmw_umin_i32;
+    }
+  }
+
+  if (XLen == 64) {
+    switch (BinOp) {
+    default:
+      llvm_unreachable("Unexpected AtomicRMW BinOp");
+    case AtomicRMWInst::Xchg:
+      return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
+    case AtomicRMWInst::Add:
+      return Intrinsic::riscv_masked_atomicrmw_add_i64;
+    case AtomicRMWInst::Sub:
+      return Intrinsic::riscv_masked_atomicrmw_sub_i64;
+    case AtomicRMWInst::Nand:
+      return Intrinsic::riscv_masked_atomicrmw_nand_i64;
+    case AtomicRMWInst::Max:
+      return Intrinsic::riscv_masked_atomicrmw_max_i64;
+    case AtomicRMWInst::Min:
+      return Intrinsic::riscv_masked_atomicrmw_min_i64;
+    case AtomicRMWInst::UMax:
+      return Intrinsic::riscv_masked_atomicrmw_umax_i64;
+    case AtomicRMWInst::UMin:
+      return Intrinsic::riscv_masked_atomicrmw_umin_i64;
+    }
   }
+
+  llvm_unreachable("Unexpected XLen\n");
 }
 
 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
-  Value *Ordering = Builder.getInt32(static_cast<uint32_t>(AI->getOrdering()));
+  unsigned XLen = Subtarget.getXLen();
+  Value *Ordering =
+      Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
   Type *Tys[] = {AlignedAddr->getType()};
   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
       AI->getModule(),
-      getIntrinsicForMaskedAtomicRMWBinOp32(AI->getOperation()), Tys);
+      getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
+
+  if (XLen == 64) {
+    Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
+    Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
+    ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
+  }
+
+  Value *Result;
 
   // Must pass the shift amount needed to sign extend the loaded value prior
   // to performing a signed comparison for min/max. ShiftAmt is the number of
@@ -1770,13 +2565,18 @@ Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
     const DataLayout &DL = AI->getModule()->getDataLayout();
     unsigned ValWidth =
         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
-    Value *SextShamt = Builder.CreateSub(
-        Builder.getInt32(Subtarget.getXLen() - ValWidth), ShiftAmt);
-    return Builder.CreateCall(LrwOpScwLoop,
-                              {AlignedAddr, Incr, Mask, SextShamt, Ordering});
+    Value *SextShamt =
+        Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
+    Result = Builder.CreateCall(LrwOpScwLoop,
+                                {AlignedAddr, Incr, Mask, SextShamt, Ordering});
+  } else {
+    Result =
+        Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
   }
 
-  return Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
+  if (XLen == 64)
+    Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
+  return Result;
 }
 
 TargetLowering::AtomicExpansionKind
@@ -1791,10 +2591,31 @@ RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
-  Value *Ordering = Builder.getInt32(static_cast<uint32_t>(Ord));
+  unsigned XLen = Subtarget.getXLen();
+  Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
+  Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
+  if (XLen == 64) {
+    CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
+    NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
+    Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
+    CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
+  }
   Type *Tys[] = {AlignedAddr->getType()};
-  Function *MaskedCmpXchg = Intrinsic::getDeclaration(
-      CI->getModule(), Intrinsic::riscv_masked_cmpxchg_i32, Tys);
-  return Builder.CreateCall(MaskedCmpXchg,
-                            {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
+  Function *MaskedCmpXchg =
+      Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
+  Value *Result = Builder.CreateCall(
+      MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
+  if (XLen == 64)
+    Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
+  return Result;
+}
+
+unsigned RISCVTargetLowering::getExceptionPointerRegister(
+    const Constant *PersonalityFn) const {
+  return RISCV::X10;
+}
+
+unsigned RISCVTargetLowering::getExceptionSelectorRegister(
+    const Constant *PersonalityFn) const {
+  return RISCV::X11;
 }
diff --git a/lib/Target/RISCV/RISCVISelLowering.h b/lib/Target/RISCV/RISCVISelLowering.h
index 6970900bb062..17db03bbb69e 100644
--- a/lib/Target/RISCV/RISCVISelLowering.h
+++ b/lib/Target/RISCV/RISCVISelLowering.h
@@ -1,9 +1,8 @@
 //===-- RISCVISelLowering.h - RISCV DAG Lowering Interface ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -32,7 +31,27 @@ enum NodeType : unsigned {
   SELECT_CC,
   BuildPairF64,
   SplitF64,
-  TAIL
+  TAIL,
+  // RV64I shifts, directly matching the semantics of the named RISC-V
+  // instructions.
+  SLLW,
+  SRAW,
+  SRLW,
+  // 32-bit operations from RV64M that can't be simply matched with a pattern
+  // at instruction selection time.
+  DIVW,
+  DIVUW,
+  REMUW,
+  // FPR32<->GPR transfer operations for RV64. Needed as an i32<->f32 bitcast
+  // is not legal on RV64. FMV_W_X_RV64 matches the semantics of the FMV.W.X.
+  // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result.
+  // This is a more convenient semantic for producing dagcombines that remove
+  // unnecessary GPR->FPR->GPR moves.
+  FMV_W_X_RV64,
+  FMV_X_ANYEXTW_RV64,
+  // READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target
+  // (returns (Lo, Hi)). It takes a chain operand.
+  READ_CYCLE_WIDE
 };
 }
 
@@ -56,11 +75,20 @@ public:
   bool isZExtFree(SDValue Val, EVT VT2) const override;
   bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
 
+  bool hasBitPreservingFPLogic(EVT VT) const override;
+
   // Provide custom lowering hooks for some operations.
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+  void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+                          SelectionDAG &DAG) const override;
 
   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 
+  unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+                                           const APInt &DemandedElts,
+                                           const SelectionDAG &DAG,
+                                           unsigned Depth) const override;
+
   // This method returns the name of a target specific DAG node.
   const char *getTargetNodeName(unsigned Opcode) const override;
 
@@ -68,6 +96,10 @@ public:
   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
                                StringRef Constraint, MVT VT) const override;
 
+  void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+                                    std::vector<SDValue> &Ops,
+                                    SelectionDAG &DAG) const override;
+
   MachineBasicBlock *
   EmitInstrWithCustomInserter(MachineInstr &MI,
                               MachineBasicBlock *BB) const override;
@@ -75,6 +107,10 @@ public:
   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
                          EVT VT) const override;
 
+  bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
+    return VT.isScalarInteger();
+  }
+
   bool shouldInsertFencesForAtomic(const Instruction *I) const override {
     return isa<LoadInst>(I) || isa<StoreInst>(I);
   }
@@ -83,6 +119,28 @@ public:
   Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst,
                                  AtomicOrdering Ord) const override;
 
+  ISD::NodeType getExtendForAtomicOps() const override {
+    return ISD::SIGN_EXTEND;
+  }
+
+  bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
+    if (DAG.getMachineFunction().getFunction().hasMinSize())
+      return false;
+    return true;
+  }
+  bool isDesirableToCommuteWithShift(const SDNode *N,
+                                     CombineLevel Level) const override;
+
+  /// If a physical register, this returns the register that receives the
+  /// exception address on entry to an EH pad.
+  unsigned
+  getExceptionPointerRegister(const Constant *PersonalityFn) const override;
+
+  /// If a physical register, this returns the register that receives the
+  /// exception typeid on entry to a landing pad.
+  unsigned
+  getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
+
 private:
   void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
                         const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -110,17 +168,29 @@ private:
                                          Type *Ty) const override {
     return true;
   }
+
+  template <class NodeTy>
+  SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const;
+
+  SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
+                           bool UseGOT) const;
+  SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
+
+  bool shouldConsiderGEPOffsetSplit() const override { return true; }
   SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const;
 
-  bool IsEligibleForTailCallOptimization(CCState &CCInfo,
-    CallLoweringInfo &CLI, MachineFunction &MF,
-    const SmallVector<CCValAssign, 16> &ArgLocs) const;
+  bool isEligibleForTailCallOptimization(
+      CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
+      const SmallVector<CCValAssign, 16> &ArgLocs) const;
 
   TargetLowering::AtomicExpansionKind
   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
diff --git a/lib/Target/RISCV/RISCVInstrFormats.td b/lib/Target/RISCV/RISCVInstrFormats.td
index ebd676a6056e..7229ebfe1db0 100644
--- a/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/lib/Target/RISCV/RISCVInstrFormats.td
@@ -1,9 +1,8 @@
 //===-- RISCVInstrFormats.td - RISCV Instruction Formats ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -109,6 +108,35 @@ class Pseudo<dag outs, dag ins, list<dag> pattern, string opcodestr = "", string
   let isCodeGenOnly = 1;
 }
 
+// Pseudo load instructions.
+class PseudoLoad<string opcodestr, RegisterClass rdty = GPR>
+    : Pseudo<(outs rdty:$rd), (ins bare_symbol:$addr), [], opcodestr, "$rd, $addr"> {
+  let hasSideEffects = 0;
+  let mayLoad = 1;
+  let mayStore = 0;
+  let isCodeGenOnly = 0;
+  let isAsmParserOnly = 1;
+}
+
+class PseudoFloatLoad<string opcodestr, RegisterClass rdty = GPR>
+    : Pseudo<(outs rdty:$rd, GPR:$tmp), (ins bare_symbol:$addr), [], opcodestr, "$rd, $addr, $tmp"> {
+  let hasSideEffects = 0;
+  let mayLoad = 1;
+  let mayStore = 0;
+  let isCodeGenOnly = 0;
+  let isAsmParserOnly = 1;
+}
+
+// Pseudo store instructions.
+class PseudoStore<string opcodestr, RegisterClass rsty = GPR>
+    : Pseudo<(outs rsty:$rs, GPR:$tmp), (ins bare_symbol:$addr), [], opcodestr, "$rs, $addr, $tmp"> {
+  let hasSideEffects = 0;
+  let mayLoad = 0;
+  let mayStore = 1;
+  let isCodeGenOnly = 0;
+  let isAsmParserOnly = 1;
+}
+
 // Instruction formats are listed in the order they appear in the RISC-V
 // instruction set manual (R, I, S, B, U, J) with sub-formats (e.g. RVInstR4,
 // RVInstRAtomic) sorted alphabetically.
diff --git a/lib/Target/RISCV/RISCVInstrFormatsC.td b/lib/Target/RISCV/RISCVInstrFormatsC.td
index bda8bbb558eb..690bec5181e2 100644
--- a/lib/Target/RISCV/RISCVInstrFormatsC.td
+++ b/lib/Target/RISCV/RISCVInstrFormatsC.td
@@ -1,9 +1,8 @@
 //===-- RISCVInstrFormatsC.td - RISCV C Instruction Formats --*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/RISCV/RISCVInstrInfo.cpp b/lib/Target/RISCV/RISCVInstrInfo.cpp
index 76c74368ca11..99c8d2ef73de 100644
--- a/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1,9 +1,8 @@
 //===-- RISCVInstrInfo.cpp - RISCV Instruction Information ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -291,9 +290,9 @@ unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB,
     return 0;
 
   // Remove the branch.
-  I->eraseFromParent();
   if (BytesRemoved)
     *BytesRemoved += getInstSizeInBytes(*I);
+  I->eraseFromParent();
 
   I = MBB.end();
 
@@ -304,9 +303,9 @@ unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB,
     return 1;
 
   // Remove the branch.
-  I->eraseFromParent();
   if (BytesRemoved)
     *BytesRemoved += getInstSizeInBytes(*I);
+  I->eraseFromParent();
   return 2;
 }
 
@@ -383,8 +382,8 @@ unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
       .addMBB(&DestBB, RISCVII::MO_LO);
 
   RS->enterBasicBlockEnd(MBB);
-  unsigned Scav = RS->scavengeRegisterBackwards(
-      RISCV::GPRRegClass, MachineBasicBlock::iterator(LuiMI), false, 0);
+  unsigned Scav = RS->scavengeRegisterBackwards(RISCV::GPRRegClass,
+                                                LuiMI.getIterator(), false, 0);
   MRI.replaceRegWith(ScratchReg, Scav);
   MRI.clearVirtRegs();
   RS->setRegUsed(Scav);
@@ -437,10 +436,16 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   case TargetOpcode::KILL:
   case TargetOpcode::DBG_VALUE:
     return 0;
+  case RISCV::PseudoCALLReg:
   case RISCV::PseudoCALL:
   case RISCV::PseudoTAIL:
+  case RISCV::PseudoLLA:
+  case RISCV::PseudoLA:
+  case RISCV::PseudoLA_TLS_IE:
+  case RISCV::PseudoLA_TLS_GD:
     return 8;
-  case TargetOpcode::INLINEASM: {
+  case TargetOpcode::INLINEASM:
+  case TargetOpcode::INLINEASM_BR: {
     const MachineFunction &MF = *MI.getParent()->getParent();
     const auto &TM = static_cast<const RISCVTargetMachine &>(MF.getTarget());
     return getInlineAsmLength(MI.getOperand(0).getSymbolName(),
@@ -448,3 +453,16 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   }
   }
 }
+
+bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
+  const unsigned Opcode = MI.getOpcode();
+  switch(Opcode) {
+    default:
+      break;
+    case RISCV::ADDI:
+    case RISCV::ORI:
+    case RISCV::XORI:
+      return (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == RISCV::X0);
+  }
+  return MI.isAsCheapAsAMove();
+}
diff --git a/lib/Target/RISCV/RISCVInstrInfo.h b/lib/Target/RISCV/RISCVInstrInfo.h
index 1d3279c3d31e..ff098e660d19 100644
--- a/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/lib/Target/RISCV/RISCVInstrInfo.h
@@ -1,9 +1,8 @@
 //===-- RISCVInstrInfo.h - RISCV Instruction Information --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -79,6 +78,8 @@ public:
 
   bool isBranchOffsetInRange(unsigned BranchOpc,
                              int64_t BrOffset) const override;
+
+  bool isAsCheapAsAMove(const MachineInstr &MI) const override;
 };
 }
 #endif
diff --git a/lib/Target/RISCV/RISCVInstrInfo.td b/lib/Target/RISCV/RISCVInstrInfo.td
index d7cc13d4fabd..69bde15f1218 100644
--- a/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1,9 +1,8 @@
 //===-- RISCVInstrInfo.td - Target Description for RISCV ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -11,42 +10,48 @@
 //
 //===----------------------------------------------------------------------===//
 
-include "RISCVInstrFormats.td"
-
 //===----------------------------------------------------------------------===//
 // RISC-V specific DAG Nodes.
 //===----------------------------------------------------------------------===//
 
-def SDT_RISCVCall         : SDTypeProfile<0, -1, [SDTCisVT<0, XLenVT>]>;
-def SDT_RISCVCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>,
-                                            SDTCisVT<1, i32>]>;
-def SDT_RISCVCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>,
-                                          SDTCisVT<1, i32>]>;
-def SDT_RISCVSelectCC     : SDTypeProfile<1, 5, [SDTCisSameAs<1, 2>,
-                                                 SDTCisSameAs<0, 4>,
-                                                 SDTCisSameAs<4, 5>]>;
-
-
-def Call         : SDNode<"RISCVISD::CALL", SDT_RISCVCall,
-                          [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
-                           SDNPVariadic]>;
-def CallSeqStart : SDNode<"ISD::CALLSEQ_START", SDT_RISCVCallSeqStart,
-                          [SDNPHasChain, SDNPOutGlue]>;
-def CallSeqEnd   : SDNode<"ISD::CALLSEQ_END", SDT_RISCVCallSeqEnd,
-                          [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def RetFlag      : SDNode<"RISCVISD::RET_FLAG", SDTNone,
-                          [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def URetFlag     : SDNode<"RISCVISD::URET_FLAG", SDTNone,
-                          [SDNPHasChain, SDNPOptInGlue]>;
-def SRetFlag     : SDNode<"RISCVISD::SRET_FLAG", SDTNone,
-                          [SDNPHasChain, SDNPOptInGlue]>;
-def MRetFlag     : SDNode<"RISCVISD::MRET_FLAG", SDTNone,
-                          [SDNPHasChain, SDNPOptInGlue]>;
-def SelectCC     : SDNode<"RISCVISD::SELECT_CC", SDT_RISCVSelectCC,
-                          [SDNPInGlue]>;
-def Tail         : SDNode<"RISCVISD::TAIL", SDT_RISCVCall,
-                          [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
-                           SDNPVariadic]>;
+// Target-independent type requirements, but with target-specific formats.
+def SDT_CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>,
+                                       SDTCisVT<1, i32>]>;
+def SDT_CallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>,
+                                     SDTCisVT<1, i32>]>;
+
+// Target-dependent type requirements.
+def SDT_RISCVCall     : SDTypeProfile<0, -1, [SDTCisVT<0, XLenVT>]>;
+def SDT_RISCVSelectCC : SDTypeProfile<1, 5, [SDTCisSameAs<1, 2>,
+                                             SDTCisSameAs<0, 4>,
+                                             SDTCisSameAs<4, 5>]>;
+
+// Target-independent nodes, but with target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart,
+                           [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end   : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+// Target-dependent nodes.
+def riscv_call      : SDNode<"RISCVISD::CALL", SDT_RISCVCall,
+                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                              SDNPVariadic]>;
+def riscv_ret_flag  : SDNode<"RISCVISD::RET_FLAG", SDTNone,
+                             [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def riscv_uret_flag : SDNode<"RISCVISD::URET_FLAG", SDTNone,
+                             [SDNPHasChain, SDNPOptInGlue]>;
+def riscv_sret_flag : SDNode<"RISCVISD::SRET_FLAG", SDTNone,
+                             [SDNPHasChain, SDNPOptInGlue]>;
+def riscv_mret_flag : SDNode<"RISCVISD::MRET_FLAG", SDTNone,
+                             [SDNPHasChain, SDNPOptInGlue]>;
+def riscv_selectcc  : SDNode<"RISCVISD::SELECT_CC", SDT_RISCVSelectCC,
+                             [SDNPInGlue]>;
+def riscv_tail      : SDNode<"RISCVISD::TAIL", SDT_RISCVCall,
+                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                              SDNPVariadic]>;
+def riscv_sllw      : SDNode<"RISCVISD::SLLW", SDTIntShiftOp>;
+def riscv_sraw      : SDNode<"RISCVISD::SRAW", SDTIntShiftOp>;
+def riscv_srlw      : SDNode<"RISCVISD::SRLW", SDTIntShiftOp>;
 
 //===----------------------------------------------------------------------===//
 // Operand and SDNode transformation definitions.
@@ -185,6 +190,30 @@ def bare_symbol : Operand<XLenVT> {
   let ParserMatchClass = BareSymbol;
 }
 
+def CallSymbol : AsmOperandClass {
+  let Name = "CallSymbol";
+  let RenderMethod = "addImmOperands";
+  let DiagnosticType = "InvalidCallSymbol";
+  let ParserMethod = "parseCallSymbol";
+}
+
+// A bare symbol used in call/tail only.
+def call_symbol : Operand<XLenVT> {
+  let ParserMatchClass = CallSymbol;
+}
+
+def TPRelAddSymbol : AsmOperandClass {
+  let Name = "TPRelAddSymbol";
+  let RenderMethod = "addImmOperands";
+  let DiagnosticType = "InvalidTPRelAddSymbol";
+  let ParserMethod = "parseOperandWithModifier";
+}
+
+// A bare symbol with the %tprel_add variant.
+def tprel_add_symbol : Operand<XLenVT> {
+  let ParserMatchClass = TPRelAddSymbol;
+}
+
 def CSRSystemRegister : AsmOperandClass {
   let Name = "CSRSystemRegister";
   let ParserMethod = "parseCSRSystemRegister";
@@ -233,6 +262,12 @@ def HI20 : SDNodeXForm<imm, [{
                                    SDLoc(N), N->getValueType(0));
 }]>;
 
+//===----------------------------------------------------------------------===//
+// Instruction Formats
+//===----------------------------------------------------------------------===//
+
+include "RISCVInstrFormats.td"
+
 //===----------------------------------------------------------------------===//
 // Instruction Class Templates
 //===----------------------------------------------------------------------===//
@@ -307,7 +342,8 @@ class Priv<string opcodestr, bits<7> funct7>
 // Instructions
 //===----------------------------------------------------------------------===//
 
-let hasSideEffects = 0, isReMaterializable = 1, mayLoad = 0, mayStore = 0 in {
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def LUI : RVInstU<OPC_LUI, (outs GPR:$rd), (ins uimm20_lui:$imm20),
                   "lui", "$rd, $imm20">;
 
@@ -321,7 +357,7 @@ def JAL : RVInstJ<OPC_JAL, (outs GPR:$rd), (ins simm21_lsb0_jal:$imm20),
 let isCall = 1 in
 def JALR : RVInstI<0b000, OPC_JALR, (outs GPR:$rd),
                    (ins GPR:$rs1, simm12:$imm12),
-                   "jalr", "$rd, $rs1, $imm12">;
+                   "jalr", "$rd, ${imm12}(${rs1})">;
 } // hasSideEffects = 0, mayLoad = 0, mayStore = 0
 
 def BEQ  : BranchCC_rri<0b000, "beq">;
@@ -343,13 +379,17 @@ def SW : Store_rri<0b010, "sw">;
 
 // ADDI isn't always rematerializable, but isReMaterializable will be used as
 // a hint which is verified in isReallyTriviallyReMaterializable.
-let isReMaterializable = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def ADDI  : ALU_ri<0b000, "addi">;
 
 def SLTI  : ALU_ri<0b010, "slti">;
 def SLTIU : ALU_ri<0b011, "sltiu">;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
 def XORI  : ALU_ri<0b100, "xori">;
 def ORI   : ALU_ri<0b110, "ori">;
+}
+
 def ANDI  : ALU_ri<0b111, "andi">;
 
 def SLLI : Shift_ri<0, 0b001, "slli">;
@@ -485,12 +525,6 @@ def SFENCE_VMA : RVInstR<0b0001001, 0b000, OPC_SYSTEM, (outs),
 // Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20)
 //===----------------------------------------------------------------------===//
 
-// TODO la
-// TODO lb lh lw
-// TODO RV64I: ld
-// TODO sb sh sw
-// TODO RV64I: sd
-
 def : InstAlias<"nop",           (ADDI      X0,      X0,       0)>;
 
 // Note that the size is 32 because up to 8 32-bit instructions are needed to
@@ -502,6 +536,22 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 32,
 def PseudoLI : Pseudo<(outs GPR:$rd), (ins ixlenimm_li:$imm), [],
                       "li", "$rd, $imm">;
 
+def PseudoLB  : PseudoLoad<"lb">;
+def PseudoLBU : PseudoLoad<"lbu">;
+def PseudoLH  : PseudoLoad<"lh">;
+def PseudoLHU : PseudoLoad<"lhu">;
+def PseudoLW  : PseudoLoad<"lw">;
+
+def PseudoSB  : PseudoStore<"sb">;
+def PseudoSH  : PseudoStore<"sh">;
+def PseudoSW  : PseudoStore<"sw">;
+
+let Predicates = [IsRV64] in {
+def PseudoLWU : PseudoLoad<"lwu">;
+def PseudoLD  : PseudoLoad<"ld">;
+def PseudoSD  : PseudoStore<"sd">;
+} // Predicates = [IsRV64]
+
 def : InstAlias<"mv $rd, $rs",   (ADDI GPR:$rd, GPR:$rs,       0)>;
 def : InstAlias<"not $rd, $rs",  (XORI GPR:$rd, GPR:$rs,      -1)>;
 def : InstAlias<"neg $rd, $rs",  (SUB  GPR:$rd,      X0, GPR:$rs)>;
@@ -547,27 +597,36 @@ def : InstAlias<"bgtu $rs, $rt, $offset",
 def : InstAlias<"bleu $rs, $rt, $offset",
                 (BGEU GPR:$rt, GPR:$rs, simm13_lsb0:$offset), 0>;
 
-// "ret" has more weight since "ret" and "jr" alias the same "jalr" instruction.
-def : InstAlias<"j $offset",   (JAL  X0, simm21_lsb0_jal:$offset)>;
-def : InstAlias<"jal $offset", (JAL  X1, simm21_lsb0_jal:$offset)>;
-def : InstAlias<"jr $rs",      (JALR X0, GPR:$rs, 0)>;
-def : InstAlias<"jalr $rs",    (JALR X1, GPR:$rs, 0)>;
-def : InstAlias<"ret",         (JALR X0,      X1, 0), 2>;
+def : InstAlias<"j $offset",   (JAL X0, simm21_lsb0_jal:$offset)>;
+def : InstAlias<"jal $offset", (JAL X1, simm21_lsb0_jal:$offset)>;
+
+// Non-zero offset aliases of "jalr" are the lowest weight, followed by the
+// two-register form, then the one-register forms and finally "ret".
+def : InstAlias<"jr $rs",                (JALR      X0, GPR:$rs, 0), 3>;
+def : InstAlias<"jr ${offset}(${rs})",   (JALR      X0, GPR:$rs, simm12:$offset)>;
+def : InstAlias<"jalr $rs",              (JALR      X1, GPR:$rs, 0), 3>;
+def : InstAlias<"jalr ${offset}(${rs})", (JALR      X1, GPR:$rs, simm12:$offset)>;
+def : InstAlias<"jalr $rd, $rs",         (JALR GPR:$rd, GPR:$rs, 0), 2>;
+def : InstAlias<"ret",                   (JALR      X0,      X1, 0), 4>;
+
+// Non-canonical forms for jump targets also accepted by the assembler.
+def : InstAlias<"jr $rs, $offset",        (JALR      X0, GPR:$rs, simm12:$offset), 0>;
+def : InstAlias<"jalr $rs, $offset",      (JALR      X1, GPR:$rs, simm12:$offset), 0>;
+def : InstAlias<"jalr $rd, $rs, $offset", (JALR GPR:$rd, GPR:$rs, simm12:$offset), 0>;
+
 // TODO call
 // TODO tail
 
 def : InstAlias<"fence", (FENCE 0xF, 0xF)>; // 0xF == iorw
 
-// CSR Addresses: 0xC00 == cycle,  0xC01 == time,  0xC02 == instret
-//                0xC80 == cycleh, 0xC81 == timeh, 0xC82 == instreth
-def : InstAlias<"rdinstret $rd", (CSRRS GPR:$rd, 0xC02, X0)>;
-def : InstAlias<"rdcycle $rd",   (CSRRS GPR:$rd, 0xC00, X0)>;
-def : InstAlias<"rdtime $rd",    (CSRRS GPR:$rd, 0xC01, X0)>;
+def : InstAlias<"rdinstret $rd", (CSRRS GPR:$rd, INSTRET.Encoding, X0)>;
+def : InstAlias<"rdcycle $rd",   (CSRRS GPR:$rd, CYCLE.Encoding, X0)>;
+def : InstAlias<"rdtime $rd",    (CSRRS GPR:$rd, TIME.Encoding, X0)>;
 
 let Predicates = [IsRV32] in {
-def : InstAlias<"rdinstreth $rd", (CSRRS GPR:$rd, 0xC82, X0)>;
-def : InstAlias<"rdcycleh $rd",   (CSRRS GPR:$rd, 0xC80, X0)>;
-def : InstAlias<"rdtimeh $rd",    (CSRRS GPR:$rd, 0xC81, X0)>;
+def : InstAlias<"rdinstreth $rd", (CSRRS GPR:$rd, INSTRETH.Encoding, X0)>;
+def : InstAlias<"rdcycleh $rd",   (CSRRS GPR:$rd, CYCLEH.Encoding, X0)>;
+def : InstAlias<"rdtimeh $rd",    (CSRRS GPR:$rd, TIMEH.Encoding, X0)>;
 } // Predicates = [IsRV32]
 
 def : InstAlias<"csrr $rd, $csr", (CSRRS GPR:$rd, csr_sysreg:$csr,      X0)>;
@@ -593,6 +652,24 @@ def : InstAlias<"sfence.vma",     (SFENCE_VMA      X0, X0)>;
 def : InstAlias<"sfence.vma $rs", (SFENCE_VMA GPR:$rs, X0)>;
 
 let EmitPriority = 0 in {
+def : InstAlias<"lb $rd, (${rs1})",
+                (LB  GPR:$rd, GPR:$rs1, 0)>;
+def : InstAlias<"lh $rd, (${rs1})",
+                (LH  GPR:$rd, GPR:$rs1, 0)>;
+def : InstAlias<"lw $rd, (${rs1})",
+                (LW  GPR:$rd, GPR:$rs1, 0)>;
+def : InstAlias<"lbu $rd, (${rs1})",
+                (LBU  GPR:$rd, GPR:$rs1, 0)>;
+def : InstAlias<"lhu $rd, (${rs1})",
+                (LHU  GPR:$rd, GPR:$rs1, 0)>;
+
+def : InstAlias<"sb $rs2, (${rs1})",
+                (SB  GPR:$rs2, GPR:$rs1, 0)>;
+def : InstAlias<"sh $rs2, (${rs1})",
+                (SH  GPR:$rs2, GPR:$rs1, 0)>;
+def : InstAlias<"sw $rs2, (${rs1})",
+                (SW  GPR:$rs2, GPR:$rs1, 0)>;
+
 def : InstAlias<"add $rd, $rs1, $imm12",
                 (ADDI  GPR:$rd, GPR:$rs1, simm12:$imm12)>;
 def : InstAlias<"and $rd, $rs1, $imm12",
@@ -608,6 +685,13 @@ def : InstAlias<"srl $rd, $rs1, $shamt",
 def : InstAlias<"sra $rd, $rs1, $shamt",
                 (SRAI  GPR:$rd, GPR:$rs1, uimmlog2xlen:$shamt)>;
 let Predicates = [IsRV64] in {
+def : InstAlias<"lwu $rd, (${rs1})",
+                (LWU  GPR:$rd, GPR:$rs1, 0)>;
+def : InstAlias<"ld $rd, (${rs1})",
+                (LD  GPR:$rd, GPR:$rs1, 0)>;
+def : InstAlias<"sd $rs2, (${rs1})",
+                (SD  GPR:$rs2, GPR:$rs1, 0)>;
+
 def : InstAlias<"addw $rd, $rs1, $imm12",
                 (ADDIW  GPR:$rd, GPR:$rs1, simm12:$imm12)>;
 def : InstAlias<"sllw $rd, $rs1, $shamt",
@@ -663,21 +747,9 @@ def sexti32 : PatFrags<(ops node:$src),
 def assertzexti32 : PatFrag<(ops node:$src), (assertzext node:$src), [{
   return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32;
 }]>;
-def assertzexti5 : PatFrag<(ops node:$src), (assertzext node:$src), [{
-  return cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits() <= 5;
-}]>;
 def zexti32 : PatFrags<(ops node:$src),
                        [(and node:$src, 0xffffffff),
                         (assertzexti32 node:$src)]>;
-// Defines a legal mask for (assertzexti5 (and src, mask)) to be combinable
-// with a shiftw operation. The mask mustn't modify the lower 5 bits or the
-// upper 32 bits.
-def shiftwamt_mask : ImmLeaf<XLenVT, [{
-  return countTrailingOnes<uint64_t>(Imm) >= 5 && isUInt<32>(Imm);
-}]>;
-def shiftwamt : PatFrags<(ops node:$src),
-                         [(assertzexti5 (and node:$src, shiftwamt_mask)),
-                          (assertzexti5 node:$src)]>;
 
 /// Immediates
 
@@ -714,6 +786,15 @@ def : PatGprGpr<shiftop<shl>, SLL>;
 def : PatGprGpr<shiftop<srl>, SRL>;
 def : PatGprGpr<shiftop<sra>, SRA>;
 
+// This is a special case of the ADD instruction used to facilitate the use of a
+// fourth operand to emit a relocation on a symbol relating to this instruction.
+// The relocation does not affect any bits of the instruction itself but is used
+// as a hint to the linker.
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0 in
+def PseudoAddTPRel : Pseudo<(outs GPR:$rd),
+                            (ins GPR:$rs1, GPR:$rs2, tprel_add_symbol:$src), [],
+                            "add", "$rd, $rs1, $rs2, $src">;
+
 /// FrameIndex calculations
 
 def : Pat<(add (i32 AddrFI:$Rs), simm12:$imm12),
@@ -732,8 +813,12 @@ def : PatGprSimm12<setult, SLTIU>;
 // handled by a RISC-V instruction.
 def : Pat<(seteq GPR:$rs1, 0), (SLTIU GPR:$rs1, 1)>;
 def : Pat<(seteq GPR:$rs1, GPR:$rs2), (SLTIU (XOR GPR:$rs1, GPR:$rs2), 1)>;
+def : Pat<(seteq GPR:$rs1, simm12:$imm12),
+          (SLTIU (XORI GPR:$rs1, simm12:$imm12), 1)>;
 def : Pat<(setne GPR:$rs1, 0), (SLTU X0, GPR:$rs1)>;
 def : Pat<(setne GPR:$rs1, GPR:$rs2), (SLTU X0, (XOR GPR:$rs1, GPR:$rs2))>;
+def : Pat<(setne GPR:$rs1, simm12:$imm12),
+          (SLTU X0, (XORI GPR:$rs1, simm12:$imm12))>;
 def : Pat<(setugt GPR:$rs1, GPR:$rs2), (SLTU GPR:$rs2, GPR:$rs1)>;
 def : Pat<(setuge GPR:$rs1, GPR:$rs2), (XORI (SLTU GPR:$rs1, GPR:$rs2), 1)>;
 def : Pat<(setule GPR:$rs1, GPR:$rs2), (XORI (SLTU GPR:$rs2, GPR:$rs1), 1)>;
@@ -746,7 +831,7 @@ class SelectCC_rrirr<RegisterClass valty, RegisterClass cmpty>
     : Pseudo<(outs valty:$dst),
              (ins cmpty:$lhs, cmpty:$rhs, ixlenimm:$imm,
               valty:$truev, valty:$falsev),
-             [(set valty:$dst, (SelectCC cmpty:$lhs, cmpty:$rhs,
+             [(set valty:$dst, (riscv_selectcc cmpty:$lhs, cmpty:$rhs,
               (XLenVT imm:$imm), valty:$truev, valty:$falsev))]>;
 
 def Select_GPR_Using_CC_GPR : SelectCC_rrirr<GPR, GPR>;
@@ -794,6 +879,17 @@ def : Pat<(brind GPR:$rs1), (PseudoBRIND GPR:$rs1, 0)>;
 def : Pat<(brind (add GPR:$rs1, simm12:$imm12)),
           (PseudoBRIND GPR:$rs1, simm12:$imm12)>;
 
+// PsuedoCALLReg is a generic pseudo instruction for calls which will eventually
+// expand to auipc and jalr while encoding, with any given register used as the
+// destination.
+// Define AsmString to print "call" when compile with -S flag.
+// Define isCodeGenOnly = 0 to support parsing assembly "call" instruction.
+let isCall = 1, isBarrier = 1, isCodeGenOnly = 0, hasSideEffects = 0,
+    mayStore = 0, mayLoad = 0 in
+def PseudoCALLReg : Pseudo<(outs GPR:$rd), (ins call_symbol:$func), []> {
+  let AsmString = "call\t$rd, $func";
+}
+
 // PseudoCALL is a pseudo instruction which will eventually expand to auipc
 // and jalr while encoding. This is desirable, as an auipc+jalr pair with
 // R_RISCV_CALL and R_RISCV_RELAX relocations can be be relaxed by the linker
@@ -801,23 +897,24 @@ def : Pat<(brind (add GPR:$rs1, simm12:$imm12)),
 // Define AsmString to print "call" when compile with -S flag.
 // Define isCodeGenOnly = 0 to support parsing assembly "call" instruction.
 let isCall = 1, Defs = [X1], isCodeGenOnly = 0 in
-def PseudoCALL : Pseudo<(outs), (ins bare_symbol:$func),
-                        [(Call tglobaladdr:$func)]> {
+def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), []> {
   let AsmString = "call\t$func";
 }
 
-def : Pat<(Call texternalsym:$func), (PseudoCALL texternalsym:$func)>;
+def : Pat<(riscv_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>;
+def : Pat<(riscv_call texternalsym:$func), (PseudoCALL texternalsym:$func)>;
 
-def : Pat<(URetFlag), (URET X0, X0)>;
-def : Pat<(SRetFlag), (SRET X0, X0)>;
-def : Pat<(MRetFlag), (MRET X0, X0)>;
+def : Pat<(riscv_uret_flag), (URET X0, X0)>;
+def : Pat<(riscv_sret_flag), (SRET X0, X0)>;
+def : Pat<(riscv_mret_flag), (MRET X0, X0)>;
 
 let isCall = 1, Defs = [X1] in
-def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rs1), [(Call GPR:$rs1)]>,
+def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rs1),
+                                [(riscv_call GPR:$rs1)]>,
                          PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>;
 
 let isBarrier = 1, isReturn = 1, isTerminator = 1 in
-def PseudoRET : Pseudo<(outs), (ins), [(RetFlag)]>,
+def PseudoRET : Pseudo<(outs), (ins), [(riscv_ret_flag)]>,
                 PseudoInstExpansion<(JALR X0, X1, 0)>;
 
 // PseudoTAIL is a pseudo instruction similar to PseudoCALL and will eventually
@@ -825,17 +922,18 @@ def PseudoRET : Pseudo<(outs), (ins), [(RetFlag)]>,
 // Define AsmString to print "tail" when compile with -S flag.
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [X2],
     isCodeGenOnly = 0 in
-def PseudoTAIL : Pseudo<(outs), (ins bare_symbol:$dst), []> {
+def PseudoTAIL : Pseudo<(outs), (ins call_symbol:$dst), []> {
   let AsmString = "tail\t$dst";
 }
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [X2] in
-def PseudoTAILIndirect : Pseudo<(outs), (ins GPRTC:$rs1), [(Tail GPRTC:$rs1)]>,
+def PseudoTAILIndirect : Pseudo<(outs), (ins GPRTC:$rs1),
+                                [(riscv_tail GPRTC:$rs1)]>,
                          PseudoInstExpansion<(JALR X0, GPR:$rs1, 0)>;
 
-def : Pat<(Tail (iPTR tglobaladdr:$dst)),
+def : Pat<(riscv_tail (iPTR tglobaladdr:$dst)),
           (PseudoTAIL texternalsym:$dst)>;
-def : Pat<(Tail (iPTR texternalsym:$dst)),
+def : Pat<(riscv_tail (iPTR texternalsym:$dst)),
           (PseudoTAIL texternalsym:$dst)>;
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0,
@@ -843,6 +941,21 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0,
 def PseudoLLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
                        "lla", "$dst, $src">;
 
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0,
+    isAsmParserOnly = 1 in
+def PseudoLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
+                      "la", "$dst, $src">;
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0,
+    isAsmParserOnly = 1 in
+def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
+                             "la.tls.ie", "$dst, $src">;
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0,
+    isAsmParserOnly = 1 in
+def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
+                             "la.tls.gd", "$dst, $src">;
+
 /// Loads
 
 multiclass LdPat<PatFrag LoadOp, RVInst Inst> {
@@ -906,9 +1019,9 @@ def : Pat<(atomic_fence (XLenVT 7), (imm)), (FENCE 0b11, 0b11)>;
 // Pessimistically assume the stack pointer will be clobbered
 let Defs = [X2], Uses = [X2] in {
 def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
-                              [(CallSeqStart timm:$amt1, timm:$amt2)]>;
+                              [(callseq_start timm:$amt1, timm:$amt2)]>;
 def ADJCALLSTACKUP   : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
-                              [(CallSeqEnd timm:$amt1, timm:$amt2)]>;
+                              [(callseq_end timm:$amt1, timm:$amt2)]>;
 } // Defs = [X2], Uses = [X2]
 
 /// RV64 patterns
@@ -935,28 +1048,9 @@ def : Pat<(sext_inreg (shl GPR:$rs1, uimm5:$shamt), i32),
 def : Pat<(sra (sext_inreg GPR:$rs1, i32), uimm5:$shamt),
           (SRAIW GPR:$rs1, uimm5:$shamt)>;
 
-// For variable-length shifts, we rely on assertzexti5 being inserted during
-// lowering (see RISCVTargetLowering::PerformDAGCombine). This enables us to
-// guarantee that selecting a 32-bit variable shift is legal (as the variable
-// shift is known to be <= 32). We must also be careful not to create
-// semantically incorrect patterns. For instance, selecting SRLW for
-// (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2)),
-// is not guaranteed to be safe, as we don't know whether the upper 32-bits of
-// the result are used or not (in the case where rs2=0, this is a
-// sign-extension operation).
-
-def : Pat<(sext_inreg (shl GPR:$rs1, (shiftwamt GPR:$rs2)), i32),
-          (SLLW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(zexti32 (shl GPR:$rs1, (shiftwamt GPR:$rs2))),
-          (SRLI (SLLI (SLLW GPR:$rs1, GPR:$rs2), 32), 32)>;
-
-def : Pat<(sext_inreg (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2)), i32),
-          (SRLW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(zexti32 (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2))),
-          (SRLI (SLLI (SRLW GPR:$rs1, GPR:$rs2), 32), 32)>;
-
-def : Pat<(sra (sexti32 GPR:$rs1), (shiftwamt GPR:$rs2)),
-          (SRAW GPR:$rs1, GPR:$rs2)>;
+def : PatGprGpr<riscv_sllw, SLLW>;
+def : PatGprGpr<riscv_srlw, SRLW>;
+def : PatGprGpr<riscv_sraw, SRAW>;
 
 /// Loads
 
@@ -971,6 +1065,16 @@ defm : StPat<truncstorei32, SW, GPR>;
 defm : StPat<store, SD, GPR>;
 } // Predicates = [IsRV64]
 
+/// readcyclecounter
+// On RV64, we can directly read the 64-bit "cycle" CSR.
+let Predicates = [IsRV64] in
+def : Pat<(readcyclecounter), (CSRRS CYCLE.Encoding, X0)>;
+// On RV32, ReadCycleWide will be expanded to the suggested loop reading both
+// halves of the 64-bit "cycle" CSR.
+let Predicates = [IsRV32], usesCustomInserter = 1, hasSideEffects = 0,
+mayLoad = 0, mayStore = 0, hasNoSchedulingInfo = 1 in
+def ReadCycleWide : Pseudo<(outs GPR:$lo, GPR:$hi), (ins), [], "", "">;
+
 //===----------------------------------------------------------------------===//
 // Standard extensions
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/RISCV/RISCVInstrInfoA.td b/lib/Target/RISCV/RISCVInstrInfoA.td
index 9cb1d2f0b627..b768c9347b38 100644
--- a/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -1,9 +1,8 @@
 //===-- RISCVInstrInfoA.td - RISC-V 'A' instructions -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -85,7 +84,7 @@ defm AMOMIN_D   : AMO_rr_aq_rl<0b10000, 0b011, "amomin.d">;
 defm AMOMAX_D   : AMO_rr_aq_rl<0b10100, 0b011, "amomax.d">;
 defm AMOMINU_D  : AMO_rr_aq_rl<0b11000, 0b011, "amominu.d">;
 defm AMOMAXU_D  : AMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">;
-} // Predicates = [HasStedExtA, IsRV64]
+} // Predicates = [HasStdExtA, IsRV64]
 
 //===----------------------------------------------------------------------===//
 // Pseudo-instructions and codegen patterns
@@ -235,7 +234,7 @@ def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i32,
 
 class PseudoCmpXchg
     : Pseudo<(outs GPR:$res, GPR:$scratch),
-             (ins GPR:$addr, GPR:$cmpval, GPR:$newval, i32imm:$ordering), []> {
+             (ins GPR:$addr, GPR:$cmpval, GPR:$newval, ixlenimm:$ordering), []> {
   let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
   let mayLoad = 1;
   let mayStore = 1;
@@ -263,7 +262,7 @@ defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>;
 def PseudoMaskedCmpXchg32
     : Pseudo<(outs GPR:$res, GPR:$scratch),
              (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask,
-              i32imm:$ordering), []> {
+              ixlenimm:$ordering), []> {
   let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
   let mayLoad = 1;
   let mayStore = 1;
@@ -276,3 +275,79 @@ def : Pat<(int_riscv_masked_cmpxchg_i32
             GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
 
 } // Predicates = [HasStdExtA]
+
+let Predicates = [HasStdExtA, IsRV64] in {
+
+/// 64-bit atomic loads and stores
+
+// Fences will be inserted for atomic load/stores according to the logic in
+// RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}.
+defm : LdPat<atomic_load_64, LD>;
+defm : AtomicStPat<atomic_store_64, SD, GPR>;
+
+defm : AMOPat<"atomic_swap_64", "AMOSWAP_D">;
+defm : AMOPat<"atomic_load_add_64", "AMOADD_D">;
+defm : AMOPat<"atomic_load_and_64", "AMOAND_D">;
+defm : AMOPat<"atomic_load_or_64", "AMOOR_D">;
+defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D">;
+defm : AMOPat<"atomic_load_max_64", "AMOMAX_D">;
+defm : AMOPat<"atomic_load_min_64", "AMOMIN_D">;
+defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D">;
+defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D">;
+
+/// 64-bit AMOs
+
+def : Pat<(atomic_load_sub_64_monotonic GPR:$addr, GPR:$incr),
+          (AMOADD_D GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_64_acquire GPR:$addr, GPR:$incr),
+          (AMOADD_D_AQ GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_64_release GPR:$addr, GPR:$incr),
+          (AMOADD_D_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_64_acq_rel GPR:$addr, GPR:$incr),
+          (AMOADD_D_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_64_seq_cst GPR:$addr, GPR:$incr),
+          (AMOADD_D_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+
+/// 64-bit pseudo AMOs
+
+def PseudoAtomicLoadNand64 : PseudoAMO;
+// Ordering constants must be kept in sync with the AtomicOrdering enum in
+// AtomicOrdering.h.
+def : Pat<(atomic_load_nand_64_monotonic GPR:$addr, GPR:$incr),
+          (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 2)>;
+def : Pat<(atomic_load_nand_64_acquire GPR:$addr, GPR:$incr),
+          (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 4)>;
+def : Pat<(atomic_load_nand_64_release GPR:$addr, GPR:$incr),
+          (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 5)>;
+def : Pat<(atomic_load_nand_64_acq_rel GPR:$addr, GPR:$incr),
+          (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 6)>;
+def : Pat<(atomic_load_nand_64_seq_cst GPR:$addr, GPR:$incr),
+          (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 7)>;
+
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i64,
+                         PseudoMaskedAtomicSwap32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i64,
+                         PseudoMaskedAtomicLoadAdd32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i64,
+                         PseudoMaskedAtomicLoadSub32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i64,
+                         PseudoMaskedAtomicLoadNand32>;
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i64,
+                               PseudoMaskedAtomicLoadMax32>;
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i64,
+                               PseudoMaskedAtomicLoadMin32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i64,
+                         PseudoMaskedAtomicLoadUMax32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i64,
+                         PseudoMaskedAtomicLoadUMin32>;
+
+/// 64-bit compare and exchange
+
+def PseudoCmpXchg64 : PseudoCmpXchg;
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64>;
+
+def : Pat<(int_riscv_masked_cmpxchg_i64
+            GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering),
+          (PseudoMaskedCmpXchg32
+            GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
+} // Predicates = [HasStdExtA, IsRV64]
diff --git a/lib/Target/RISCV/RISCVInstrInfoC.td b/lib/Target/RISCV/RISCVInstrInfoC.td
index ad68b5a7dc97..94477341eea7 100644
--- a/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -1,9 +1,8 @@
 //===- RISCVInstrInfoC.td - Compressed RISCV instructions -*- tblgen-*-----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -523,6 +522,56 @@ def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther> {
 
 } // Predicates = [HasStdExtC]
 
+//===----------------------------------------------------------------------===//
+// Assembler Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+let EmitPriority = 0 in {
+let Predicates = [HasStdExtC, HasStdExtD] in
+def : InstAlias<"c.fld $rd, (${rs1})", (C_FLD FPR64C:$rd, GPRC:$rs1, 0)>;
+
+def : InstAlias<"c.lw $rd, (${rs1})", (C_LW GPRC:$rd, GPRC:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in
+def : InstAlias<"c.flw $rd, (${rs1})", (C_FLW FPR32C:$rd, GPRC:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, IsRV64] in
+def : InstAlias<"c.ld $rd, (${rs1})", (C_LD GPRC:$rd, GPRC:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, HasStdExtD] in
+def : InstAlias<"c.fsd $rs2, (${rs1})", (C_FSD FPR64C:$rs2, GPRC:$rs1, 0)>;
+
+def : InstAlias<"c.sw $rs2, (${rs1})", (C_SW GPRC:$rs2, GPRC:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in
+def : InstAlias<"c.fsw $rs2, (${rs1})", (C_FSW FPR32C:$rs2, GPRC:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, IsRV64] in
+def : InstAlias<"c.sd $rs2, (${rs1})", (C_SD GPRC:$rs2, GPRC:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, HasStdExtD] in
+def : InstAlias<"c.fldsp $rd, (${rs1})", (C_FLDSP FPR64C:$rd, SP:$rs1, 0)>;
+
+def : InstAlias<"c.lwsp $rd, (${rs1})", (C_LWSP GPRC:$rd, SP:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in
+def : InstAlias<"c.flwsp $rd, (${rs1})", (C_FLWSP FPR32C:$rd, SP:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, IsRV64] in
+def : InstAlias<"c.ldsp $rd, (${rs1})", (C_LDSP GPRC:$rd, SP:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, HasStdExtD] in
+def : InstAlias<"c.fsdsp $rs2, (${rs1})", (C_FSDSP FPR64C:$rs2, SP:$rs1, 0)>;
+
+def : InstAlias<"c.swsp $rs2, (${rs1})", (C_SWSP GPRC:$rs2, SP:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in
+def : InstAlias<"c.fswsp $rs2, (${rs1})", (C_FSWSP FPR32C:$rs2, SP:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, IsRV64] in
+def : InstAlias<"c.sdsp $rs2, (${rs1})", (C_SDSP GPRC:$rs2, SP:$rs1, 0)>;
+}
+
 //===----------------------------------------------------------------------===//
 // Compress Instruction tablegen backend.
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/RISCV/RISCVInstrInfoD.td b/lib/Target/RISCV/RISCVInstrInfoD.td
index 9f1cd50de595..fe38c4ff02d3 100644
--- a/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -1,9 +1,8 @@
 //===-- RISCVInstrInfoD.td - RISC-V 'D' instructions -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -179,8 +178,8 @@ def FMV_D_X : FPUnaryOp_r<0b1111001, 0b000, FPR64, GPR, "fmv.d.x"> {
 //===----------------------------------------------------------------------===//
 
 let Predicates = [HasStdExtD] in {
-// TODO fld
-// TODO fsd
+def : InstAlias<"fld $rd, (${rs1})",  (FLD FPR64:$rd,  GPR:$rs1, 0), 0>;
+def : InstAlias<"fsd $rs2, (${rs1})", (FSD FPR64:$rs2, GPR:$rs1, 0), 0>;
 
 def : InstAlias<"fmv.d $rd, $rs",  (FSGNJ_D  FPR64:$rd, FPR64:$rs, FPR64:$rs)>;
 def : InstAlias<"fabs.d $rd, $rs", (FSGNJX_D FPR64:$rd, FPR64:$rs, FPR64:$rs)>;
@@ -192,6 +191,9 @@ def : InstAlias<"fgt.d $rd, $rs, $rt",
                 (FLT_D GPR:$rd, FPR64:$rt, FPR64:$rs), 0>;
 def : InstAlias<"fge.d $rd, $rs, $rt",
                 (FLE_D GPR:$rd, FPR64:$rt, FPR64:$rs), 0>;
+
+def PseudoFLD  : PseudoFloatLoad<"fld", FPR64>;
+def PseudoFSD  : PseudoStore<"fsd", FPR64>;
 } // Predicates = [HasStdExtD]
 
 //===----------------------------------------------------------------------===//
@@ -268,6 +270,10 @@ def : PatFpr64Fpr64<setole, FLE_D>;
 // handled by a RISC-V instruction and aren't expanded in the SelectionDAG
 // Legalizer.
 
+def : Pat<(seto FPR64:$rs1, FPR64:$rs2),
+          (AND (FEQ_D FPR64:$rs1, FPR64:$rs1),
+               (FEQ_D FPR64:$rs2, FPR64:$rs2))>;
+
 def : Pat<(setuo FPR64:$rs1, FPR64:$rs2),
           (SLTIU (AND (FEQ_D FPR64:$rs1, FPR64:$rs1),
                       (FEQ_D FPR64:$rs2, FPR64:$rs2)),
@@ -308,3 +314,26 @@ def : Pat<(fp_to_uint FPR64:$rs1), (FCVT_WU_D FPR64:$rs1, 0b001)>;
 def : Pat<(sint_to_fp GPR:$rs1), (FCVT_D_W GPR:$rs1)>;
 def : Pat<(uint_to_fp GPR:$rs1), (FCVT_D_WU GPR:$rs1)>;
 } // Predicates = [HasStdExtD, IsRV32]
+
+let Predicates = [HasStdExtD, IsRV64] in {
+def : Pat<(bitconvert GPR:$rs1), (FMV_D_X GPR:$rs1)>;
+def : Pat<(bitconvert FPR64:$rs1), (FMV_X_D FPR64:$rs1)>;
+
+// FP->[u]int32 is mostly handled by the FP->[u]int64 patterns. This is safe
+// because fpto[u|s]i produce poison if the value can't fit into the target.
+// We match the single case below because fcvt.wu.d sign-extends its result so
+// is cheaper than fcvt.lu.d+sext.w.
+def : Pat<(sext_inreg (zexti32 (fp_to_uint FPR64:$rs1)), i32),
+          (FCVT_WU_D $rs1, 0b001)>;
+
+// [u]int32->fp
+def : Pat<(sint_to_fp (sext_inreg GPR:$rs1, i32)), (FCVT_D_W $rs1)>;
+def : Pat<(uint_to_fp (zexti32 GPR:$rs1)), (FCVT_D_WU $rs1)>;
+
+def : Pat<(fp_to_sint FPR64:$rs1), (FCVT_L_D FPR64:$rs1, 0b001)>;
+def : Pat<(fp_to_uint FPR64:$rs1), (FCVT_LU_D FPR64:$rs1, 0b001)>;
+
+// [u]int64->fp. Match GCC and default to using dynamic rounding mode.
+def : Pat<(sint_to_fp GPR:$rs1), (FCVT_D_L GPR:$rs1, 0b111)>;
+def : Pat<(uint_to_fp GPR:$rs1), (FCVT_D_LU GPR:$rs1, 0b111)>;
+} // Predicates = [HasStdExtD, IsRV64]
diff --git a/lib/Target/RISCV/RISCVInstrInfoF.td b/lib/Target/RISCV/RISCVInstrInfoF.td
index 03bdac45873d..032642942f2b 100644
--- a/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -1,9 +1,8 @@
 //===-- RISCVInstrInfoF.td - RISC-V 'F' instructions -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,6 +11,20 @@
 //
 //===----------------------------------------------------------------------===//
 
+//===----------------------------------------------------------------------===//
+// RISC-V specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDT_RISCVFMV_W_X_RV64
+    : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i64>]>;
+def SDT_RISCVFMV_X_ANYEXTW_RV64
+    : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>;
+
+def riscv_fmv_w_x_rv64
+    : SDNode<"RISCVISD::FMV_W_X_RV64", SDT_RISCVFMV_W_X_RV64>;
+def riscv_fmv_x_anyextw_rv64
+    : SDNode<"RISCVISD::FMV_X_ANYEXTW_RV64", SDT_RISCVFMV_X_ANYEXTW_RV64>;
+
 //===----------------------------------------------------------------------===//
 // Operand and SDNode transformation definitions.
 //===----------------------------------------------------------------------===//
@@ -193,8 +206,8 @@ def           : FPUnaryOpDynFrmAlias<FCVT_S_LU, "fcvt.s.lu", FPR32, GPR>;
 //===----------------------------------------------------------------------===//
 
 let Predicates = [HasStdExtF] in {
-// TODO flw
-// TODO fsw
+def : InstAlias<"flw $rd, (${rs1})",  (FLW FPR32:$rd,  GPR:$rs1, 0), 0>;
+def : InstAlias<"fsw $rs2, (${rs1})", (FSW FPR32:$rs2, GPR:$rs1, 0), 0>;
 
 def : InstAlias<"fmv.s $rd, $rs",  (FSGNJ_S  FPR32:$rd, FPR32:$rs, FPR32:$rs)>;
 def : InstAlias<"fabs.s $rd, $rs", (FSGNJX_S FPR32:$rd, FPR32:$rs, FPR32:$rs)>;
@@ -209,28 +222,30 @@ def : InstAlias<"fge.s $rd, $rs, $rt",
 
 // The following csr instructions actually alias instructions from the base ISA.
 // However, it only makes sense to support them when the F extension is enabled.
-// CSR Addresses: 0x003 == fcsr, 0x002 == frm, 0x001 == fflags
 // NOTE: "frcsr", "frrm", and "frflags" are more specialized version of "csrr".
-def : InstAlias<"frcsr $rd",      (CSRRS GPR:$rd, 0x003, X0), 2>;
-def : InstAlias<"fscsr $rd, $rs", (CSRRW GPR:$rd, 0x003, GPR:$rs)>;
-def : InstAlias<"fscsr $rs",      (CSRRW      X0, 0x003, GPR:$rs), 2>;
-
-def : InstAlias<"frrm $rd",        (CSRRS  GPR:$rd, 0x002, X0), 2>;
-def : InstAlias<"fsrm $rd, $rs",   (CSRRW  GPR:$rd, 0x002, GPR:$rs)>;
-def : InstAlias<"fsrm $rs",        (CSRRW       X0, 0x002, GPR:$rs), 2>;
-def : InstAlias<"fsrmi $rd, $imm", (CSRRWI GPR:$rd, 0x002, uimm5:$imm)>;
-def : InstAlias<"fsrmi $imm",      (CSRRWI      X0, 0x002, uimm5:$imm), 2>;
-
-def : InstAlias<"frflags $rd",        (CSRRS  GPR:$rd, 0x001, X0), 2>;
-def : InstAlias<"fsflags $rd, $rs",   (CSRRW  GPR:$rd, 0x001, GPR:$rs)>;
-def : InstAlias<"fsflags $rs",        (CSRRW       X0, 0x001, GPR:$rs), 2>;
-def : InstAlias<"fsflagsi $rd, $imm", (CSRRWI GPR:$rd, 0x001, uimm5:$imm)>;
-def : InstAlias<"fsflagsi $imm",      (CSRRWI      X0, 0x001, uimm5:$imm), 2>;
+def : InstAlias<"frcsr $rd",      (CSRRS GPR:$rd, FCSR.Encoding, X0), 2>;
+def : InstAlias<"fscsr $rd, $rs", (CSRRW GPR:$rd, FCSR.Encoding, GPR:$rs)>;
+def : InstAlias<"fscsr $rs",      (CSRRW      X0, FCSR.Encoding, GPR:$rs), 2>;
+
+def : InstAlias<"frrm $rd",        (CSRRS  GPR:$rd, FRM.Encoding, X0), 2>;
+def : InstAlias<"fsrm $rd, $rs",   (CSRRW  GPR:$rd, FRM.Encoding, GPR:$rs)>;
+def : InstAlias<"fsrm $rs",        (CSRRW       X0, FRM.Encoding, GPR:$rs), 2>;
+def : InstAlias<"fsrmi $rd, $imm", (CSRRWI GPR:$rd, FRM.Encoding, uimm5:$imm)>;
+def : InstAlias<"fsrmi $imm",      (CSRRWI      X0, FRM.Encoding, uimm5:$imm), 2>;
+
+def : InstAlias<"frflags $rd",        (CSRRS  GPR:$rd, FFLAGS.Encoding, X0), 2>;
+def : InstAlias<"fsflags $rd, $rs",   (CSRRW  GPR:$rd, FFLAGS.Encoding, GPR:$rs)>;
+def : InstAlias<"fsflags $rs",        (CSRRW       X0, FFLAGS.Encoding, GPR:$rs), 2>;
+def : InstAlias<"fsflagsi $rd, $imm", (CSRRWI GPR:$rd, FFLAGS.Encoding, uimm5:$imm)>;
+def : InstAlias<"fsflagsi $imm",      (CSRRWI      X0, FFLAGS.Encoding, uimm5:$imm), 2>;
 
 // fmv.w.x and fmv.x.w were previously known as fmv.s.x and fmv.x.s. Both
 // spellings should be supported by standard tools.
 def : MnemonicAlias<"fmv.s.x", "fmv.w.x">;
 def : MnemonicAlias<"fmv.x.s", "fmv.x.w">;
+
+def PseudoFLW  : PseudoFloatLoad<"flw", FPR32>;
+def PseudoFSW  : PseudoStore<"fsw", FPR32>;
 } // Predicates = [HasStdExtF]
 
 //===----------------------------------------------------------------------===//
@@ -308,6 +323,10 @@ def : PatFpr32Fpr32<setole, FLE_S>;
 // handled by a RISC-V instruction and aren't expanded in the SelectionDAG
 // Legalizer.
 
+def : Pat<(seto FPR32:$rs1, FPR32:$rs2),
+          (AND (FEQ_S FPR32:$rs1, FPR32:$rs1),
+               (FEQ_S FPR32:$rs2, FPR32:$rs2))>;
+
 def : Pat<(setuo FPR32:$rs1, FPR32:$rs2),
           (SLTIU (AND (FEQ_S FPR32:$rs1, FPR32:$rs1),
                       (FEQ_S FPR32:$rs2, FPR32:$rs2)),
@@ -334,3 +353,37 @@ def : Pat<(fp_to_uint FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
 def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_W $rs1, 0b111)>;
 def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1, 0b111)>;
 } // Predicates = [HasStdExtF, IsRV32]
+
+let Predicates = [HasStdExtF, IsRV32] in {
+// FP->[u]int. Round-to-zero must be used
+def : Pat<(fp_to_sint FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>;
+def : Pat<(fp_to_uint FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
+
+// [u]int->fp. Match GCC and default to using dynamic rounding mode.
+def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_W $rs1, 0b111)>;
+def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1, 0b111)>;
+} // Predicates = [HasStdExtF, IsRV32]
+
+let Predicates = [HasStdExtF, IsRV64] in {
+def : Pat<(riscv_fmv_w_x_rv64 GPR:$src), (FMV_W_X GPR:$src)>;
+def : Pat<(riscv_fmv_x_anyextw_rv64 FPR32:$src), (FMV_X_W FPR32:$src)>;
+def : Pat<(sexti32 (riscv_fmv_x_anyextw_rv64 FPR32:$src)),
+          (FMV_X_W FPR32:$src)>;
+
+// FP->[u]int32 is mostly handled by the FP->[u]int64 patterns. This is safe
+// because fpto[u|s]i produces poison if the value can't fit into the target.
+// We match the single case below because fcvt.wu.s sign-extends its result so
+// is cheaper than fcvt.lu.s+sext.w.
+def : Pat<(sext_inreg (assertzexti32 (fp_to_uint FPR32:$rs1)), i32),
+          (FCVT_WU_S $rs1, 0b001)>;
+
+// FP->[u]int64
+def : Pat<(fp_to_sint FPR32:$rs1), (FCVT_L_S $rs1, 0b001)>;
+def : Pat<(fp_to_uint FPR32:$rs1), (FCVT_LU_S $rs1, 0b001)>;
+
+// [u]int->fp. Match GCC and default to using dynamic rounding mode.
+def : Pat<(sint_to_fp (sext_inreg GPR:$rs1, i32)), (FCVT_S_W $rs1, 0b111)>;
+def : Pat<(uint_to_fp (zexti32 GPR:$rs1)), (FCVT_S_WU $rs1, 0b111)>;
+def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_L $rs1, 0b111)>;
+def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_LU $rs1, 0b111)>;
+} // Predicates = [HasStdExtF, IsRV64]
diff --git a/lib/Target/RISCV/RISCVInstrInfoM.td b/lib/Target/RISCV/RISCVInstrInfoM.td
index 05dd3311ad54..e75151ba99c7 100644
--- a/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -1,9 +1,8 @@
 //===-- RISCVInstrInfoM.td - RISC-V 'M' instructions -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,6 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+//===----------------------------------------------------------------------===//
+// RISC-V specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def riscv_divw  : SDNode<"RISCVISD::DIVW",  SDTIntBinOp>;
+def riscv_divuw : SDNode<"RISCVISD::DIVUW", SDTIntBinOp>;
+def riscv_remuw : SDNode<"RISCVISD::REMUW", SDTIntBinOp>;
+
 //===----------------------------------------------------------------------===//
 // Instructions
 //===----------------------------------------------------------------------===//
@@ -53,18 +60,19 @@ def : PatGprGpr<urem, REMU>;
 let Predicates = [HasStdExtM, IsRV64] in {
 def : Pat<(sext_inreg (mul GPR:$rs1, GPR:$rs2), i32),
           (MULW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(sext_inreg (sdiv (sexti32 GPR:$rs1),
-                            (sexti32 GPR:$rs2)), i32),
-          (DIVW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(zexti32 (sdiv (sexti32 GPR:$rs1),
-                         (sexti32 GPR:$rs2))),
-          (SRLI (SLLI (DIVW GPR:$rs1, GPR:$rs2), 32), 32)>;
-def : Pat<(sext_inreg (udiv (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32),
-          (DIVUW GPR:$rs1, GPR:$rs2)>;
-// It's cheaper to perform a divuw and zero-extend the result than to
-// zero-extend both inputs to a udiv.
-def : Pat<(udiv (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)),
-          (SRLI (SLLI (DIVUW GPR:$rs1, GPR:$rs2), 32), 32)>;
+
+def : PatGprGpr<riscv_divw, DIVW>;
+def : PatGprGpr<riscv_divuw, DIVUW>;
+def : PatGprGpr<riscv_remuw, REMUW>;
+
+// Handle the specific cases where using DIVU/REMU would be correct and result
+// in fewer instructions than emitting DIVUW/REMUW then zero-extending the
+// result.
+def : Pat<(zexti32 (riscv_divuw (zexti32 GPR:$rs1), (zexti32 GPR:$rs2))),
+          (DIVU GPR:$rs1, GPR:$rs2)>;
+def : Pat<(zexti32 (riscv_remuw (zexti32 GPR:$rs1), (zexti32 GPR:$rs2))),
+          (REMU GPR:$rs1, GPR:$rs2)>;
+
 // Although the sexti32 operands may not have originated from an i32 srem,
 // this pattern is safe as it is impossible for two sign extended inputs to
 // produce a result where res[63:32]=0 and res[31]=1.
@@ -73,10 +81,4 @@ def : Pat<(srem (sexti32 GPR:$rs1), (sexti32 GPR:$rs2)),
 def : Pat<(sext_inreg (srem (sexti32 GPR:$rs1),
                             (sexti32 GPR:$rs2)), i32),
           (REMW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(sext_inreg (urem (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32),
-          (REMUW GPR:$rs1, GPR:$rs2)>;
-// It's cheaper to perform a remuw and zero-extend the result than to
-// zero-extend both inputs to a urem.
-def : Pat<(urem (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)),
-          (SRLI (SLLI (REMUW GPR:$rs1, GPR:$rs2), 32), 32)>;
 } // Predicates = [HasStdExtM, IsRV64]
diff --git a/lib/Target/RISCV/RISCVMCInstLower.cpp b/lib/Target/RISCV/RISCVMCInstLower.cpp
index e0100b1679be..b1dbcfa7f738 100644
--- a/lib/Target/RISCV/RISCVMCInstLower.cpp
+++ b/lib/Target/RISCV/RISCVMCInstLower.cpp
@@ -1,9 +1,8 @@
 //===-- RISCVMCInstLower.cpp - Convert RISCV MachineInstr to an MCInst ------=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -37,12 +36,42 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym,
   case RISCVII::MO_None:
     Kind = RISCVMCExpr::VK_RISCV_None;
     break;
+  case RISCVII::MO_CALL:
+    Kind = RISCVMCExpr::VK_RISCV_CALL;
+    break;
+  case RISCVII::MO_PLT:
+    Kind = RISCVMCExpr::VK_RISCV_CALL_PLT;
+    break;
   case RISCVII::MO_LO:
     Kind = RISCVMCExpr::VK_RISCV_LO;
     break;
   case RISCVII::MO_HI:
     Kind = RISCVMCExpr::VK_RISCV_HI;
     break;
+  case RISCVII::MO_PCREL_LO:
+    Kind = RISCVMCExpr::VK_RISCV_PCREL_LO;
+    break;
+  case RISCVII::MO_PCREL_HI:
+    Kind = RISCVMCExpr::VK_RISCV_PCREL_HI;
+    break;
+  case RISCVII::MO_GOT_HI:
+    Kind = RISCVMCExpr::VK_RISCV_GOT_HI;
+    break;
+  case RISCVII::MO_TPREL_LO:
+    Kind = RISCVMCExpr::VK_RISCV_TPREL_LO;
+    break;
+  case RISCVII::MO_TPREL_HI:
+    Kind = RISCVMCExpr::VK_RISCV_TPREL_HI;
+    break;
+  case RISCVII::MO_TPREL_ADD:
+    Kind = RISCVMCExpr::VK_RISCV_TPREL_ADD;
+    break;
+  case RISCVII::MO_TLS_GOT_HI:
+    Kind = RISCVMCExpr::VK_RISCV_TLS_GOT_HI;
+    break;
+  case RISCVII::MO_TLS_GD_HI:
+    Kind = RISCVMCExpr::VK_RISCV_TLS_GD_HI;
+    break;
   }
 
   const MCExpr *ME =
diff --git a/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/lib/Target/RISCV/RISCVMachineFunctionInfo.h
index 2fea3a1bdd2f..585bff2bc20a 100644
--- a/lib/Target/RISCV/RISCVMachineFunctionInfo.h
+++ b/lib/Target/RISCV/RISCVMachineFunctionInfo.h
@@ -1,9 +1,8 @@
 //=- RISCVMachineFunctionInfo.h - RISCV machine function info -----*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -33,8 +32,6 @@ private:
   int MoveF64FrameIndex = -1;
 
 public:
-  //  RISCVMachineFunctionInfo() = default;
-
   RISCVMachineFunctionInfo(MachineFunction &MF) : MF(MF) {}
 
   int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
diff --git a/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index cea009c5447d..82b1209cb8e7 100644
--- a/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -1,9 +1,8 @@
 //===----- RISCVMergeBaseOffset.cpp - Optimise address calculations  ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/RISCV/RISCVRegisterInfo.cpp b/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 3ed1dec434ce..e6a126e3e513 100644
--- a/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===-- RISCVRegisterInfo.cpp - RISCV Register Information ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -33,17 +32,32 @@ RISCVRegisterInfo::RISCVRegisterInfo(unsigned HwMode)
 
 const MCPhysReg *
 RISCVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  auto &Subtarget = MF->getSubtarget<RISCVSubtarget>();
   if (MF->getFunction().hasFnAttribute("interrupt")) {
-    if (MF->getSubtarget<RISCVSubtarget>().hasStdExtD())
+    if (Subtarget.hasStdExtD())
       return CSR_XLEN_F64_Interrupt_SaveList;
-    if (MF->getSubtarget<RISCVSubtarget>().hasStdExtF())
+    if (Subtarget.hasStdExtF())
       return CSR_XLEN_F32_Interrupt_SaveList;
     return CSR_Interrupt_SaveList;
   }
-  return CSR_SaveList;
+
+  switch (Subtarget.getTargetABI()) {
+  default:
+    llvm_unreachable("Unrecognized ABI");
+  case RISCVABI::ABI_ILP32:
+  case RISCVABI::ABI_LP64:
+    return CSR_ILP32_LP64_SaveList;
+  case RISCVABI::ABI_ILP32F:
+  case RISCVABI::ABI_LP64F:
+    return CSR_ILP32F_LP64F_SaveList;
+  case RISCVABI::ABI_ILP32D:
+  case RISCVABI::ABI_LP64D:
+    return CSR_ILP32D_LP64D_SaveList;
+  }
 }
 
 BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = getFrameLowering(MF);
   BitVector Reserved(getNumRegs());
 
   // Use markSuperRegs to ensure any register aliases are also reserved
@@ -52,7 +66,8 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   markSuperRegs(Reserved, RISCV::X2); // sp
   markSuperRegs(Reserved, RISCV::X3); // gp
   markSuperRegs(Reserved, RISCV::X4); // tp
-  markSuperRegs(Reserved, RISCV::X8); // fp
+  if (TFI->hasFP(MF))
+    markSuperRegs(Reserved, RISCV::X8); // fp
   assert(checkAllSuperRegsMarked(Reserved));
   return Reserved;
 }
@@ -109,7 +124,7 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
 }
 
-unsigned RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const TargetFrameLowering *TFI = getFrameLowering(MF);
   return TFI->hasFP(MF) ? RISCV::X8 : RISCV::X2;
 }
@@ -117,12 +132,26 @@ unsigned RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
 const uint32_t *
 RISCVRegisterInfo::getCallPreservedMask(const MachineFunction & MF,
                                         CallingConv::ID /*CC*/) const {
+  auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
   if (MF.getFunction().hasFnAttribute("interrupt")) {
-    if (MF.getSubtarget<RISCVSubtarget>().hasStdExtD())
+    if (Subtarget.hasStdExtD())
       return CSR_XLEN_F64_Interrupt_RegMask;
-    if (MF.getSubtarget<RISCVSubtarget>().hasStdExtF())
+    if (Subtarget.hasStdExtF())
       return CSR_XLEN_F32_Interrupt_RegMask;
     return CSR_Interrupt_RegMask;
   }
-  return CSR_RegMask;
+
+  switch (Subtarget.getTargetABI()) {
+  default:
+    llvm_unreachable("Unrecognized ABI");
+  case RISCVABI::ABI_ILP32:
+  case RISCVABI::ABI_LP64:
+    return CSR_ILP32_LP64_RegMask;
+  case RISCVABI::ABI_ILP32F:
+  case RISCVABI::ABI_LP64F:
+    return CSR_ILP32F_LP64F_RegMask;
+  case RISCVABI::ABI_ILP32D:
+  case RISCVABI::ABI_LP64D:
+    return CSR_ILP32D_LP64D_RegMask;
+  }
 }
diff --git a/lib/Target/RISCV/RISCVRegisterInfo.h b/lib/Target/RISCV/RISCVRegisterInfo.h
index cbbb70079dd1..4f339475508f 100644
--- a/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -1,9 +1,8 @@
 //===-- RISCVRegisterInfo.h - RISCV Register Information Impl ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -40,7 +39,7 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
                            unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
+  Register getFrameRegister(const MachineFunction &MF) const override;
 
   bool requiresRegisterScavenging(const MachineFunction &MF) const override {
     return true;
diff --git a/lib/Target/RISCV/RISCVRegisterInfo.td b/lib/Target/RISCV/RISCVRegisterInfo.td
index 4be8ff9200e9..79f8ab12f6c0 100644
--- a/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -1,9 +1,8 @@
 //===-- RISCVRegisterInfo.td - RISC-V Register defs --------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -56,7 +55,7 @@ let RegAltNameIndices = [ABIRegAltName] in {
   def X6  : RISCVReg<6, "x6", ["t1"]>, DwarfRegNum<[6]>;
   def X7  : RISCVReg<7, "x7", ["t2"]>, DwarfRegNum<[7]>;
   }
-  def X8  : RISCVReg<8, "x8", ["s0"]>, DwarfRegNum<[8]>;
+  def X8  : RISCVReg<8, "x8", ["s0", "fp"]>, DwarfRegNum<[8]>;
   def X9  : RISCVReg<9, "x9", ["s1"]>, DwarfRegNum<[9]>;
   def X10 : RISCVReg<10,"x10", ["a0"]>, DwarfRegNum<[10]>;
   def X11 : RISCVReg<11,"x11", ["a1"]>, DwarfRegNum<[11]>;
diff --git a/lib/Target/RISCV/RISCVSubtarget.cpp b/lib/Target/RISCV/RISCVSubtarget.cpp
index b221ea84a33c..6902ed75d852 100644
--- a/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -1,9 +1,8 @@
 //===-- RISCVSubtarget.cpp - RISCV Subtarget Information ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -26,10 +25,10 @@ using namespace llvm;
 
 void RISCVSubtarget::anchor() {}
 
-RISCVSubtarget &RISCVSubtarget::initializeSubtargetDependencies(StringRef CPU,
-                                                                StringRef FS,
-                                                                bool Is64Bit) {
+RISCVSubtarget &RISCVSubtarget::initializeSubtargetDependencies(
+    const Triple &TT, StringRef CPU, StringRef FS, StringRef ABIName) {
   // Determine default and user-specified characteristics
+  bool Is64Bit = TT.isArch64Bit();
   std::string CPUName = CPU;
   if (CPUName.empty())
     CPUName = Is64Bit ? "generic-rv64" : "generic-rv32";
@@ -38,11 +37,14 @@ RISCVSubtarget &RISCVSubtarget::initializeSubtargetDependencies(StringRef CPU,
     XLenVT = MVT::i64;
     XLen = 64;
   }
+
+  TargetABI = RISCVABI::computeTargetABI(TT, getFeatureBits(), ABIName);
+  RISCVFeatures::validate(TT, getFeatureBits());
   return *this;
 }
 
-RISCVSubtarget::RISCVSubtarget(const Triple &TT, const std::string &CPU,
-                               const std::string &FS, const TargetMachine &TM)
+RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
+                               StringRef ABIName, const TargetMachine &TM)
     : RISCVGenSubtargetInfo(TT, CPU, FS),
-      FrameLowering(initializeSubtargetDependencies(CPU, FS, TT.isArch64Bit())),
+      FrameLowering(initializeSubtargetDependencies(TT, CPU, FS, ABIName)),
       InstrInfo(), RegInfo(getHwMode()), TLInfo(TM, *this) {}
diff --git a/lib/Target/RISCV/RISCVSubtarget.h b/lib/Target/RISCV/RISCVSubtarget.h
index 0e09391e7829..106ff49f021a 100644
--- a/lib/Target/RISCV/RISCVSubtarget.h
+++ b/lib/Target/RISCV/RISCVSubtarget.h
@@ -1,9 +1,8 @@
 //===-- RISCVSubtarget.h - Define Subtarget for the RISCV -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,6 +16,7 @@
 #include "RISCVFrameLowering.h"
 #include "RISCVISelLowering.h"
 #include "RISCVInstrInfo.h"
+#include "Utils/RISCVBaseInfo.h"
 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DataLayout.h"
@@ -36,9 +36,11 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool HasStdExtD = false;
   bool HasStdExtC = false;
   bool HasRV64 = false;
+  bool IsRV32E = false;
   bool EnableLinkerRelax = false;
   unsigned XLen = 32;
   MVT XLenVT = MVT::i32;
+  RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown;
   RISCVFrameLowering FrameLowering;
   RISCVInstrInfo InstrInfo;
   RISCVRegisterInfo RegInfo;
@@ -47,13 +49,14 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
 
   /// Initializes using the passed in CPU and feature strings so that we can
   /// use initializer lists for subtarget initialization.
-  RISCVSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS,
-                                                  bool Is64Bit);
+  RISCVSubtarget &initializeSubtargetDependencies(const Triple &TT,
+                                                  StringRef CPU, StringRef FS,
+                                                  StringRef ABIName);
 
 public:
   // Initializes the data members to match that of the specified triple.
-  RISCVSubtarget(const Triple &TT, const std::string &CPU,
-                 const std::string &FS, const TargetMachine &TM);
+  RISCVSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
+                 StringRef ABIName, const TargetMachine &TM);
 
   // Parses features string setting specified subtarget options. The
   // definition of this function is auto-generated by tblgen.
@@ -78,9 +81,11 @@ public:
   bool hasStdExtD() const { return HasStdExtD; }
   bool hasStdExtC() const { return HasStdExtC; }
   bool is64Bit() const { return HasRV64; }
+  bool isRV32E() const { return IsRV32E; }
   bool enableLinkerRelax() const { return EnableLinkerRelax; }
   MVT getXLenVT() const { return XLenVT; }
   unsigned getXLen() const { return XLen; }
+  RISCVABI::ABI getTargetABI() const { return TargetABI; }
 };
 } // End llvm namespace
 
diff --git a/lib/Target/RISCV/RISCVSystemOperands.td b/lib/Target/RISCV/RISCVSystemOperands.td
index f1b7984ffe6b..a46a32c4e7f2 100644
--- a/lib/Target/RISCV/RISCVSystemOperands.td
+++ b/lib/Target/RISCV/RISCVSystemOperands.td
@@ -1,9 +1,8 @@
 //===- RISCVSystemOperands.td ----------------------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -72,18 +71,16 @@ def : SysReg<"uip", 0x044>;
 // User Floating-Point CSRs
 //===--------------------------
 
-let FeaturesRequired = [{ {RISCV::FeatureStdExtF} }] in {
-def : SysReg<"fflags", 0x001>;
-def : SysReg<"frm", 0x002>;
-def : SysReg<"fcsr", 0x003>;
-}
+def FFLAGS : SysReg<"fflags", 0x001>;
+def FRM    : SysReg<"frm", 0x002>;
+def FCSR   : SysReg<"fcsr", 0x003>;
 
 //===--------------------------
 // User Counter/Timers
 //===--------------------------
-def : SysReg<"cycle", 0xC00>;
-def : SysReg<"time", 0xC01>;
-def : SysReg<"instret", 0xC02>;
+def CYCLE   : SysReg<"cycle", 0xC00>;
+def TIME    : SysReg<"time", 0xC01>;
+def INSTRET : SysReg<"instret", 0xC02>;
 
 def : SysReg<"hpmcounter3", 0xC03>;
 def : SysReg<"hpmcounter4", 0xC04>;
@@ -116,9 +113,9 @@ def : SysReg<"hpmcounter30", 0xC1E>;
 def : SysReg<"hpmcounter31", 0xC1F>;
 
 let isRV32Only = 1 in {
-def: SysReg<"cycleh", 0xC80>;
-def: SysReg<"timeh", 0xC81>;
-def: SysReg<"instreth", 0xC82>;
+def CYCLEH   : SysReg<"cycleh", 0xC80>;
+def TIMEH    : SysReg<"timeh", 0xC81>;
+def INSTRETH : SysReg<"instreth", 0xC82>;
 
 def: SysReg<"hpmcounter3h", 0xC83>;
 def: SysReg<"hpmcounter4h", 0xC84>;
diff --git a/lib/Target/RISCV/RISCVTargetMachine.cpp b/lib/Target/RISCV/RISCVTargetMachine.cpp
index 8937ec200bd7..f4e6ed9f6284 100644
--- a/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -1,9 +1,8 @@
 //===-- RISCVTargetMachine.cpp - Define TargetMachine for RISCV -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -11,10 +10,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "RISCV.h"
 #include "RISCVTargetMachine.h"
+#include "RISCV.h"
 #include "RISCVTargetObjectFile.h"
+#include "RISCVTargetTransformInfo.h"
+#include "TargetInfo/RISCVTargetInfo.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -31,7 +33,7 @@ extern "C" void LLVMInitializeRISCVTarget() {
   initializeRISCVExpandPseudoPass(*PR);
 }
 
-static std::string computeDataLayout(const Triple &TT) {
+static StringRef computeDataLayout(const Triple &TT) {
   if (TT.isArch64Bit()) {
     return "e-m:e-p:64:64-i64:64-i128:128-n64-S128";
   } else {
@@ -57,10 +59,15 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
                         getEffectiveRelocModel(TT, RM),
                         getEffectiveCodeModel(CM, CodeModel::Small), OL),
       TLOF(make_unique<RISCVELFTargetObjectFile>()),
-      Subtarget(TT, CPU, FS, *this) {
+      Subtarget(TT, CPU, FS, Options.MCOptions.getABIName(), *this) {
   initAsmInfo();
 }
 
+TargetTransformInfo
+RISCVTargetMachine::getTargetTransformInfo(const Function &F) {
+  return TargetTransformInfo(RISCVTTIImpl(this, F));
+}
+
 namespace {
 class RISCVPassConfig : public TargetPassConfig {
 public:
diff --git a/lib/Target/RISCV/RISCVTargetMachine.h b/lib/Target/RISCV/RISCVTargetMachine.h
index 02361dddebf7..ebf3f3c07955 100644
--- a/lib/Target/RISCV/RISCVTargetMachine.h
+++ b/lib/Target/RISCV/RISCVTargetMachine.h
@@ -1,9 +1,8 @@
 //===-- RISCVTargetMachine.h - Define TargetMachine for RISCV ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -40,6 +39,8 @@ public:
   TargetLoweringObjectFile *getObjFileLowering() const override {
     return TLOF.get();
   }
+
+  TargetTransformInfo getTargetTransformInfo(const Function &F) override;
 };
 }
 
diff --git a/lib/Target/RISCV/RISCVTargetObjectFile.cpp b/lib/Target/RISCV/RISCVTargetObjectFile.cpp
index 46e81b628b65..bbd45c970d3d 100644
--- a/lib/Target/RISCV/RISCVTargetObjectFile.cpp
+++ b/lib/Target/RISCV/RISCVTargetObjectFile.cpp
@@ -1,14 +1,16 @@
 //===-- RISCVTargetObjectFile.cpp - RISCV Object Info -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "RISCVTargetObjectFile.h"
 #include "RISCVTargetMachine.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
 
 using namespace llvm;
 
@@ -16,4 +18,97 @@ void RISCVELFTargetObjectFile::Initialize(MCContext &Ctx,
                                           const TargetMachine &TM) {
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
   InitializeELF(TM.Options.UseInitArray);
+
+  SmallDataSection = getContext().getELFSection(
+      ".sdata", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+  SmallBSSSection = getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+                                               ELF::SHF_WRITE | ELF::SHF_ALLOC);
+}
+
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section could be addressed by
+// using gp_rel operator.
+bool RISCVELFTargetObjectFile::isInSmallSection(uint64_t Size) const {
+  // gcc has traditionally not treated zero-sized objects as small data, so this
+  // is effectively part of the ABI.
+  return Size > 0 && Size <= SSThreshold;
+}
+
+// Return true if this global address should be placed into small data/bss
+// section.
+bool RISCVELFTargetObjectFile::isGlobalInSmallSection(
+    const GlobalObject *GO, const TargetMachine &TM) const {
+  // Only global variables, not functions.
+  const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GO);
+  if (!GVA)
+    return false;
+
+  // If the variable has an explicit section, it is placed in that section.
+  if (GVA->hasSection()) {
+    StringRef Section = GVA->getSection();
+
+    // Explicitly placing any variable in the small data section overrides
+    // the global -G value.
+    if (Section == ".sdata" || Section == ".sbss")
+      return true;
+
+    // Otherwise reject putting the variable to small section if it has an
+    // explicit section name.
+    return false;
+  }
+
+  if (((GVA->hasExternalLinkage() && GVA->isDeclaration()) ||
+       GVA->hasCommonLinkage()))
+    return false;
+
+  Type *Ty = GVA->getValueType();
+  // It is possible that the type of the global is unsized, i.e. a declaration
+  // of a extern struct. In this case don't presume it is in the small data
+  // section. This happens e.g. when building the FreeBSD kernel.
+  if (!Ty->isSized())
+    return false;
+
+  return isInSmallSection(
+      GVA->getParent()->getDataLayout().getTypeAllocSize(Ty));
+}
+
+MCSection *RISCVELFTargetObjectFile::SelectSectionForGlobal(
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+  // Handle Small Section classification here.
+  if (Kind.isBSS() && isGlobalInSmallSection(GO, TM))
+    return SmallBSSSection;
+  if (Kind.isData() && isGlobalInSmallSection(GO, TM))
+    return SmallDataSection;
+
+  // Otherwise, we work the same as ELF.
+  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, Kind, TM);
+}
+
+void RISCVELFTargetObjectFile::getModuleMetadata(Module &M) {
+  SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
+  M.getModuleFlagsMetadata(ModuleFlags);
+
+  for (const auto &MFE : ModuleFlags) {
+    StringRef Key = MFE.Key->getString();
+    if (Key == "SmallDataLimit") {
+      SSThreshold = mdconst::extract<ConstantInt>(MFE.Val)->getZExtValue();
+      break;
+    }
+  }
+}
+
+/// Return true if this constant should be placed into small data section.
+bool RISCVELFTargetObjectFile::isConstantInSmallSection(
+    const DataLayout &DL, const Constant *CN) const {
+  return isInSmallSection(DL.getTypeAllocSize(CN->getType()));
+}
+
+MCSection *RISCVELFTargetObjectFile::getSectionForConstant(
+    const DataLayout &DL, SectionKind Kind, const Constant *C,
+    unsigned &Align) const {
+  if (isConstantInSmallSection(DL, C))
+    return SmallDataSection;
+
+  // Otherwise, we work the same as ELF.
+  return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C, Align);
 }
diff --git a/lib/Target/RISCV/RISCVTargetObjectFile.h b/lib/Target/RISCV/RISCVTargetObjectFile.h
index 5467220301c1..b2daaaa9d364 100644
--- a/lib/Target/RISCV/RISCVTargetObjectFile.h
+++ b/lib/Target/RISCV/RISCVTargetObjectFile.h
@@ -1,9 +1,8 @@
 //===-- RISCVTargetObjectFile.h - RISCV Object Info -*- C++ ---------*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -17,7 +16,31 @@ class RISCVTargetMachine;
 
 /// This implementation is used for RISCV ELF targets.
 class RISCVELFTargetObjectFile : public TargetLoweringObjectFileELF {
+  MCSection *SmallDataSection;
+  MCSection *SmallBSSSection;
+  unsigned SSThreshold = 8;
+
+public:
   void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+
+  /// Return true if this global address should be placed into small data/bss
+  /// section.
+  bool isGlobalInSmallSection(const GlobalObject *GO,
+                              const TargetMachine &TM) const;
+
+  MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
+                                    const TargetMachine &TM) const override;
+
+  /// Return true if this constant should be placed into small data section.
+  bool isConstantInSmallSection(const DataLayout &DL, const Constant *CN) const;
+
+  MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
+                                   const Constant *C,
+                                   unsigned &Align) const override;
+
+  void getModuleMetadata(Module &M) override;
+
+  bool isInSmallSection(uint64_t Size) const;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
new file mode 100644
index 000000000000..2c6400cbb1eb
--- /dev/null
+++ b/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -0,0 +1,92 @@
+//===-- RISCVTargetTransformInfo.cpp - RISC-V specific TTI ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVTargetTransformInfo.h"
+#include "Utils/RISCVMatInt.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/CodeGen/TargetLowering.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "riscvtti"
+
+int RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+  assert(Ty->isIntegerTy() &&
+         "getIntImmCost can only estimate cost of materialising integers");
+
+  // We have a Zero register, so 0 is always free.
+  if (Imm == 0)
+    return TTI::TCC_Free;
+
+  // Otherwise, we check how many instructions it will take to materialise.
+  const DataLayout &DL = getDataLayout();
+  return RISCVMatInt::getIntMatCost(Imm, DL.getTypeSizeInBits(Ty),
+                                    getST()->is64Bit());
+}
+
+int RISCVTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+                                Type *Ty) {
+  assert(Ty->isIntegerTy() &&
+         "getIntImmCost can only estimate cost of materialising integers");
+
+  // We have a Zero register, so 0 is always free.
+  if (Imm == 0)
+    return TTI::TCC_Free;
+
+  // Some instructions in RISC-V can take a 12-bit immediate. Some of these are
+  // commutative, in others the immediate comes from a specific argument index.
+  bool Takes12BitImm = false;
+  unsigned ImmArgIdx = ~0U;
+
+  switch (Opcode) {
+  case Instruction::GetElementPtr:
+    // Never hoist any arguments to a GetElementPtr. CodeGenPrepare will
+    // split up large offsets in GEP into better parts than ConstantHoisting
+    // can.
+    return TTI::TCC_Free;
+  case Instruction::Add:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Mul:
+    Takes12BitImm = true;
+    break;
+  case Instruction::Sub:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+    Takes12BitImm = true;
+    ImmArgIdx = 1;
+    break;
+  default:
+    break;
+  }
+
+  if (Takes12BitImm) {
+    // Check immediate is the correct argument...
+    if (Instruction::isCommutative(Opcode) || Idx == ImmArgIdx) {
+      // ... and fits into the 12-bit immediate.
+      if (Imm.getMinSignedBits() <= 64 &&
+          getTLI()->isLegalAddImmediate(Imm.getSExtValue())) {
+        return TTI::TCC_Free;
+      }
+    }
+
+    // Otherwise, use the full materialisation cost.
+    return getIntImmCost(Imm, Ty);
+  }
+
+  // By default, prevent hoisting.
+  return TTI::TCC_Free;
+}
+
+int RISCVTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+                                const APInt &Imm, Type *Ty) {
+  // Prevent hoisting in unknown cases.
+  return TTI::TCC_Free;
+}
diff --git a/lib/Target/RISCV/RISCVTargetTransformInfo.h b/lib/Target/RISCV/RISCVTargetTransformInfo.h
new file mode 100644
index 000000000000..f361b25a0c70
--- /dev/null
+++ b/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -0,0 +1,52 @@
+//===- RISCVTargetTransformInfo.h - RISC-V specific TTI ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines a TargetTransformInfo::Concept conforming object specific
+/// to the RISC-V target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H
+
+#include "RISCVSubtarget.h"
+#include "RISCVTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/IR/Function.h"
+
+namespace llvm {
+
+class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
+  using BaseT = BasicTTIImplBase<RISCVTTIImpl>;
+  using TTI = TargetTransformInfo;
+
+  friend BaseT;
+
+  const RISCVSubtarget *ST;
+  const RISCVTargetLowering *TLI;
+
+  const RISCVSubtarget *getST() const { return ST; }
+  const RISCVTargetLowering *getTLI() const { return TLI; }
+
+public:
+  explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F)
+      : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+        TLI(ST->getTargetLowering()) {}
+
+  int getIntImmCost(const APInt &Imm, Type *Ty);
+  int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+                    Type *Ty);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H
\ No newline at end of file
diff --git a/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp b/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
index 0f369d960fe1..e44984a3fcc5 100644
--- a/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
+++ b/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
@@ -1,26 +1,24 @@
 //===-- RISCVTargetInfo.cpp - RISCV Target Implementation -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
+#include "TargetInfo/RISCVTargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
-namespace llvm {
-Target &getTheRISCV32Target() {
+Target &llvm::getTheRISCV32Target() {
   static Target TheRISCV32Target;
   return TheRISCV32Target;
 }
 
-Target &getTheRISCV64Target() {
+Target &llvm::getTheRISCV64Target() {
   static Target TheRISCV64Target;
   return TheRISCV64Target;
 }
-}
 
 extern "C" void LLVMInitializeRISCVTargetInfo() {
   RegisterTarget<Triple::riscv32> X(getTheRISCV32Target(), "riscv32",
diff --git a/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h b/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h
new file mode 100644
index 000000000000..ef3d9d116efa
--- /dev/null
+++ b/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h
@@ -0,0 +1,21 @@
+//===-- RISCVTargetInfo.h - RISCV Target Implementation ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_TARGETINFO_RISCVTARGETINFO_H
+#define LLVM_LIB_TARGET_RISCV_TARGETINFO_RISCVTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheRISCV32Target();
+Target &getTheRISCV64Target();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_RISCV_TARGETINFO_RISCVTARGETINFO_H
diff --git a/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp b/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
index 964af1f74cec..bc5395768ca1 100644
--- a/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
+++ b/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
@@ -1,9 +1,80 @@
 #include "RISCVBaseInfo.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
 namespace RISCVSysReg {
 #define GET_SysRegsList_IMPL
 #include "RISCVGenSystemOperands.inc"
 } // namespace RISCVSysReg
+
+namespace RISCVABI {
+ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits,
+                     StringRef ABIName) {
+  auto TargetABI = StringSwitch<ABI>(ABIName)
+                       .Case("ilp32", ABI_ILP32)
+                       .Case("ilp32f", ABI_ILP32F)
+                       .Case("ilp32d", ABI_ILP32D)
+                       .Case("ilp32e", ABI_ILP32E)
+                       .Case("lp64", ABI_LP64)
+                       .Case("lp64f", ABI_LP64F)
+                       .Case("lp64d", ABI_LP64D)
+                       .Default(ABI_Unknown);
+
+  bool IsRV64 = TT.isArch64Bit();
+  bool IsRV32E = FeatureBits[RISCV::FeatureRV32E];
+
+  if (!ABIName.empty() && TargetABI == ABI_Unknown) {
+    errs()
+        << "'" << ABIName
+        << "' is not a recognized ABI for this target (ignoring target-abi)\n";
+  } else if (ABIName.startswith("ilp32") && IsRV64) {
+    errs() << "32-bit ABIs are not supported for 64-bit targets (ignoring "
+              "target-abi)\n";
+    TargetABI = ABI_Unknown;
+  } else if (ABIName.startswith("lp64") && !IsRV64) {
+    errs() << "64-bit ABIs are not supported for 32-bit targets (ignoring "
+              "target-abi)\n";
+    TargetABI = ABI_Unknown;
+  } else if (ABIName.endswith("f") && !FeatureBits[RISCV::FeatureStdExtF]) {
+    errs() << "Hard-float 'f' ABI can't be used for a target that "
+              "doesn't support the F instruction set extension (ignoring "
+              "target-abi)\n";
+    TargetABI = ABI_Unknown;
+  } else if (ABIName.endswith("d") && !FeatureBits[RISCV::FeatureStdExtD]) {
+    errs() << "Hard-float 'd' ABI can't be used for a target that "
+              "doesn't support the D instruction set extension (ignoring "
+              "target-abi)\n";
+    TargetABI = ABI_Unknown;
+  } else if (IsRV32E && TargetABI != ABI_ILP32E && TargetABI != ABI_Unknown) {
+    errs()
+        << "Only the ilp32e ABI is supported for RV32E (ignoring target-abi)\n";
+    TargetABI = ABI_Unknown;
+  }
+
+  if (TargetABI != ABI_Unknown)
+    return TargetABI;
+
+  // For now, default to the ilp32/ilp32e/lp64 ABI if no explicit ABI is given
+  // or an invalid/unrecognised string is given. In the future, it might be
+  // worth changing this to default to ilp32f/lp64f and ilp32d/lp64d when
+  // hardware support for floating point is present.
+  if (IsRV32E)
+    return ABI_ILP32E;
+  if (IsRV64)
+    return ABI_LP64;
+  return ABI_ILP32;
+}
+} // namespace RISCVABI
+
+namespace RISCVFeatures {
+
+void validate(const Triple &TT, const FeatureBitset &FeatureBits) {
+  if (TT.isArch64Bit() && FeatureBits[RISCV::FeatureRV32E])
+    report_fatal_error("RV32E can't be enabled for an RV64 target");
+}
+
+} // namespace RISCVFeatures
+
 } // namespace llvm
diff --git a/lib/Target/RISCV/Utils/RISCVBaseInfo.h b/lib/Target/RISCV/Utils/RISCVBaseInfo.h
index 372e0e80bbaf..c33c72f24319 100644
--- a/lib/Target/RISCV/Utils/RISCVBaseInfo.h
+++ b/lib/Target/RISCV/Utils/RISCVBaseInfo.h
@@ -1,9 +1,8 @@
 //===-- RISCVBaseInfo.h - Top level definitions for RISCV MC ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -49,9 +48,18 @@ enum {
 
 enum {
   MO_None,
+  MO_CALL,
+  MO_PLT,
   MO_LO,
   MO_HI,
+  MO_PCREL_LO,
   MO_PCREL_HI,
+  MO_GOT_HI,
+  MO_TPREL_LO,
+  MO_TPREL_HI,
+  MO_TPREL_ADD,
+  MO_TLS_GOT_HI,
+  MO_TLS_GD_HI,
 };
 } // namespace RISCVII
 
@@ -153,6 +161,34 @@ struct SysReg {
 #include "RISCVGenSystemOperands.inc"
 } // end namespace RISCVSysReg
 
+namespace RISCVABI {
+
+enum ABI {
+  ABI_ILP32,
+  ABI_ILP32F,
+  ABI_ILP32D,
+  ABI_ILP32E,
+  ABI_LP64,
+  ABI_LP64F,
+  ABI_LP64D,
+  ABI_Unknown
+};
+
+// Returns the target ABI, or else a StringError if the requested ABIName is
+// not supported for the given TT and FeatureBits combination.
+ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits,
+                     StringRef ABIName);
+
+} // namespace RISCVABI
+
+namespace RISCVFeatures {
+
+// Validates if the given combination of features are valid for the target
+// triple. Exits with report_fatal_error if not.
+void validate(const Triple &TT, const FeatureBitset &FeatureBits);
+
+} // namespace RISCVFeatures
+
 } // namespace llvm
 
 #endif
diff --git a/lib/Target/RISCV/Utils/RISCVMatInt.cpp b/lib/Target/RISCV/Utils/RISCVMatInt.cpp
index 3dc298246bc5..f390ddb89e3c 100644
--- a/lib/Target/RISCV/Utils/RISCVMatInt.cpp
+++ b/lib/Target/RISCV/Utils/RISCVMatInt.cpp
@@ -1,9 +1,8 @@
 //===- RISCVMatInt.cpp - Immediate materialisation -------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -17,7 +16,7 @@
 namespace llvm {
 
 namespace RISCVMatInt {
-void generateInstSeq(int64_t Val, bool Is64Bit, InstSeq &Res) {
+void generateInstSeq(int64_t Val, bool IsRV64, InstSeq &Res) {
   if (isInt<32>(Val)) {
     // Depending on the active bits in the immediate Value v, the following
     // instruction sequences are emitted:
@@ -33,13 +32,13 @@ void generateInstSeq(int64_t Val, bool Is64Bit, InstSeq &Res) {
       Res.push_back(Inst(RISCV::LUI, Hi20));
 
     if (Lo12 || Hi20 == 0) {
-      unsigned AddiOpc = (Is64Bit && Hi20) ? RISCV::ADDIW : RISCV::ADDI;
+      unsigned AddiOpc = (IsRV64 && Hi20) ? RISCV::ADDIW : RISCV::ADDI;
       Res.push_back(Inst(AddiOpc, Lo12));
     }
     return;
   }
 
-  assert(Is64Bit && "Can't emit >32-bit imm for non-RV64 target");
+  assert(IsRV64 && "Can't emit >32-bit imm for non-RV64 target");
 
   // In the worst case, for a full 64-bit constant, a sequence of 8 instructions
   // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emmitted. Note
@@ -65,15 +64,30 @@ void generateInstSeq(int64_t Val, bool Is64Bit, InstSeq &Res) {
   // performed when the recursion returns.
 
   int64_t Lo12 = SignExtend64<12>(Val);
-  int64_t Hi52 = (Val + 0x800) >> 12;
+  int64_t Hi52 = ((uint64_t)Val + 0x800ull) >> 12;
   int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52);
   Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
 
-  generateInstSeq(Hi52, Is64Bit, Res);
+  generateInstSeq(Hi52, IsRV64, Res);
 
   Res.push_back(Inst(RISCV::SLLI, ShiftAmount));
   if (Lo12)
     Res.push_back(Inst(RISCV::ADDI, Lo12));
 }
+
+int getIntMatCost(const APInt &Val, unsigned Size, bool IsRV64) {
+  int PlatRegSize = IsRV64 ? 64 : 32;
+
+  // Split the constant into platform register sized chunks, and calculate cost
+  // of each chunk.
+  int Cost = 0;
+  for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) {
+    APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize);
+    InstSeq MatSeq;
+    generateInstSeq(Chunk.getSExtValue(), IsRV64, MatSeq);
+    Cost += MatSeq.size();
+  }
+  return std::max(1, Cost);
+}
 } // namespace RISCVMatInt
 } // namespace llvm
diff --git a/lib/Target/RISCV/Utils/RISCVMatInt.h b/lib/Target/RISCV/Utils/RISCVMatInt.h
index 49d1d89adc7a..b12ae2eade99 100644
--- a/lib/Target/RISCV/Utils/RISCVMatInt.h
+++ b/lib/Target/RISCV/Utils/RISCVMatInt.h
@@ -1,15 +1,15 @@
 //===- RISCVMatInt.h - Immediate materialisation ---------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_LIB_TARGET_RISCV_MATINT_H
 #define LLVM_LIB_TARGET_RISCV_MATINT_H
 
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/MachineValueType.h"
 #include <cstdint>
@@ -31,6 +31,14 @@ using InstSeq = SmallVector<Inst, 8>;
 // order to allow this helper to be used from both the MC layer and during
 // instruction selection.
 void generateInstSeq(int64_t Val, bool IsRV64, InstSeq &Res);
+
+// Helper to estimate the number of instructions required to materialise the
+// given immediate value into a register. This estimate does not account for
+// `Val` possibly fitting into an immediate, and so may over-estimate.
+//
+// This will attempt to produce instructions to materialise `Val` as an
+// `Size`-bit immediate. `IsRV64` should match the target architecture.
+int getIntMatCost(const APInt &Val, unsigned Size, bool IsRV64);
 } // namespace RISCVMatInt
 } // namespace llvm
 #endif
diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 691421e533ea..15453ae59a4f 100644
--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -1,14 +1,14 @@
 //===-- SparcAsmParser.cpp - Parse Sparc assembly to MCInst instructions --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/SparcMCExpr.h"
 #include "MCTargetDesc/SparcMCTargetDesc.h"
+#include "TargetInfo/SparcTargetInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -646,7 +646,8 @@ bool SparcAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
   return Error(StartLoc, "invalid register name");
 }
 
-static void applyMnemonicAliases(StringRef &Mnemonic, uint64_t Features,
+static void applyMnemonicAliases(StringRef &Mnemonic,
+                                 const FeatureBitset &Features,
                                  unsigned VariantID);
 
 bool SparcAsmParser::ParseInstruction(ParseInstructionInfo &Info,
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index 6290e5a15a8b..f1ca8e18c228 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -1,9 +1,8 @@
 //===-- DelaySlotFiller.cpp - SPARC delay slot filler ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 0045e63a824e..bee331874e96 100644
--- a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -1,9 +1,8 @@
 //===- SparcDisassembler.cpp - Disassembler for Sparc -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/SparcMCTargetDesc.h"
+#include "TargetInfo/SparcTargetInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -41,12 +41,6 @@ public:
 };
 }
 
-namespace llvm {
-Target &getTheSparcTarget();
-Target &getTheSparcV9Target();
-Target &getTheSparcelTarget();
-}
-
 static MCDisassembler *createSparcDisassembler(const Target &T,
                                                const MCSubtargetInfo &STI,
                                                MCContext &Ctx) {
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
deleted file mode 100644
index d152efae6d1f..000000000000
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
+++ /dev/null
@@ -1,220 +0,0 @@
-//===-- SparcInstPrinter.cpp - Convert Sparc MCInst to assembly syntax -----==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an Sparc MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SparcInstPrinter.h"
-#include "Sparc.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-// The generated AsmMatcher SparcGenAsmWriter uses "Sparc" as the target
-// namespace. But SPARC backend uses "SP" as its namespace.
-namespace llvm {
-namespace Sparc {
-  using namespace SP;
-}
-}
-
-#define GET_INSTRUCTION_NAME
-#define PRINT_ALIAS_INSTR
-#include "SparcGenAsmWriter.inc"
-
-bool SparcInstPrinter::isV9(const MCSubtargetInfo &STI) const {
-  return (STI.getFeatureBits()[Sparc::FeatureV9]) != 0;
-}
-
-void SparcInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const
-{
-  OS << '%' << StringRef(getRegisterName(RegNo)).lower();
-}
-
-void SparcInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                 StringRef Annot, const MCSubtargetInfo &STI) {
-  if (!printAliasInstr(MI, STI, O) && !printSparcAliasInstr(MI, STI, O))
-    printInstruction(MI, STI, O);
-  printAnnotation(O, Annot);
-}
-
-bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI,
-                                            const MCSubtargetInfo &STI,
-                                            raw_ostream &O) {
-  switch (MI->getOpcode()) {
-  default: return false;
-  case SP::JMPLrr:
-  case SP::JMPLri: {
-    if (MI->getNumOperands() != 3)
-      return false;
-    if (!MI->getOperand(0).isReg())
-      return false;
-    switch (MI->getOperand(0).getReg()) {
-    default: return false;
-    case SP::G0: // jmp $addr | ret | retl
-      if (MI->getOperand(2).isImm() &&
-          MI->getOperand(2).getImm() == 8) {
-        switch(MI->getOperand(1).getReg()) {
-        default: break;
-        case SP::I7: O << "\tret"; return true;
-        case SP::O7: O << "\tretl"; return true;
-        }
-      }
-      O << "\tjmp "; printMemOperand(MI, 1, STI, O);
-      return true;
-    case SP::O7: // call $addr
-      O << "\tcall "; printMemOperand(MI, 1, STI, O);
-      return true;
-    }
-  }
-  case SP::V9FCMPS:  case SP::V9FCMPD:  case SP::V9FCMPQ:
-  case SP::V9FCMPES: case SP::V9FCMPED: case SP::V9FCMPEQ: {
-    if (isV9(STI)
-        || (MI->getNumOperands() != 3)
-        || (!MI->getOperand(0).isReg())
-        || (MI->getOperand(0).getReg() != SP::FCC0))
-      return false;
-    // if V8, skip printing %fcc0.
-    switch(MI->getOpcode()) {
-    default:
-    case SP::V9FCMPS:  O << "\tfcmps "; break;
-    case SP::V9FCMPD:  O << "\tfcmpd "; break;
-    case SP::V9FCMPQ:  O << "\tfcmpq "; break;
-    case SP::V9FCMPES: O << "\tfcmpes "; break;
-    case SP::V9FCMPED: O << "\tfcmped "; break;
-    case SP::V9FCMPEQ: O << "\tfcmpeq "; break;
-    }
-    printOperand(MI, 1, STI, O);
-    O << ", ";
-    printOperand(MI, 2, STI, O);
-    return true;
-  }
-  }
-}
-
-void SparcInstPrinter::printOperand(const MCInst *MI, int opNum,
-                                    const MCSubtargetInfo &STI,
-                                    raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand (opNum);
-
-  if (MO.isReg()) {
-    printRegName(O, MO.getReg());
-    return ;
-  }
-
-  if (MO.isImm()) {
-    switch (MI->getOpcode()) {
-      default:
-        O << (int)MO.getImm();
-        return;
-
-      case SP::TICCri: // Fall through
-      case SP::TICCrr: // Fall through
-      case SP::TRAPri: // Fall through
-      case SP::TRAPrr: // Fall through
-      case SP::TXCCri: // Fall through
-      case SP::TXCCrr: // Fall through
-        // Only seven-bit values up to 127.
-        O << ((int) MO.getImm() & 0x7f);
-        return;
-    }
-  }
-
-  assert(MO.isExpr() && "Unknown operand kind in printOperand");
-  MO.getExpr()->print(O, &MAI);
-}
-
-void SparcInstPrinter::printMemOperand(const MCInst *MI, int opNum,
-                                       const MCSubtargetInfo &STI,
-                                       raw_ostream &O, const char *Modifier) {
-  printOperand(MI, opNum, STI, O);
-
-  // If this is an ADD operand, emit it like normal operands.
-  if (Modifier && !strcmp(Modifier, "arith")) {
-    O << ", ";
-    printOperand(MI, opNum+1, STI, O);
-    return;
-  }
-  const MCOperand &MO = MI->getOperand(opNum+1);
-
-  if (MO.isReg() && MO.getReg() == SP::G0)
-    return;   // don't print "+%g0"
-  if (MO.isImm() && MO.getImm() == 0)
-    return;   // don't print "+0"
-
-  O << "+";
-
-  printOperand(MI, opNum+1, STI, O);
-}
-
-void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum,
-                                      const MCSubtargetInfo &STI,
-                                      raw_ostream &O) {
-  int CC = (int)MI->getOperand(opNum).getImm();
-  switch (MI->getOpcode()) {
-  default: break;
-  case SP::FBCOND:
-  case SP::FBCONDA:
-  case SP::BPFCC:
-  case SP::BPFCCA:
-  case SP::BPFCCNT:
-  case SP::BPFCCANT:
-  case SP::MOVFCCrr:  case SP::V9MOVFCCrr:
-  case SP::MOVFCCri:  case SP::V9MOVFCCri:
-  case SP::FMOVS_FCC: case SP::V9FMOVS_FCC:
-  case SP::FMOVD_FCC: case SP::V9FMOVD_FCC:
-  case SP::FMOVQ_FCC: case SP::V9FMOVQ_FCC:
-    // Make sure CC is a fp conditional flag.
-    CC = (CC < 16) ? (CC + 16) : CC;
-    break;
-  case SP::CBCOND:
-  case SP::CBCONDA:
-    // Make sure CC is a cp conditional flag.
-    CC = (CC < 32) ? (CC + 32) : CC;
-    break;
-  }
-  O << SPARCCondCodeToString((SPCC::CondCodes)CC);
-}
-
-bool SparcInstPrinter::printGetPCX(const MCInst *MI, unsigned opNum,
-                                   const MCSubtargetInfo &STI,
-                                   raw_ostream &O) {
-  llvm_unreachable("FIXME: Implement SparcInstPrinter::printGetPCX.");
-  return true;
-}
-
-void SparcInstPrinter::printMembarTag(const MCInst *MI, int opNum,
-                                      const MCSubtargetInfo &STI,
-                                      raw_ostream &O) {
-  static const char *const TagNames[] = {
-      "#LoadLoad",  "#StoreLoad", "#LoadStore", "#StoreStore",
-      "#Lookaside", "#MemIssue",  "#Sync"};
-
-  unsigned Imm = MI->getOperand(opNum).getImm();
-
-  if (Imm > 127) {
-    O << Imm;
-    return;
-  }
-
-  bool First = true;
-  for (unsigned i = 0; i < sizeof(TagNames) / sizeof(char *); i++) {
-    if (Imm & (1 << i)) {
-      O << (First ? "" : " | ") << TagNames[i];
-      First = false;
-    }
-  }
-}
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
deleted file mode 100644
index 89015eb137c2..000000000000
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
+++ /dev/null
@@ -1,57 +0,0 @@
-//===-- SparcInstPrinter.h - Convert Sparc MCInst to assembly syntax ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an Sparc MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_SPARC_INSTPRINTER_SPARCINSTPRINTER_H
-#define LLVM_LIB_TARGET_SPARC_INSTPRINTER_SPARCINSTPRINTER_H
-
-#include "llvm/MC/MCInstPrinter.h"
-
-namespace llvm {
-
-class SparcInstPrinter : public MCInstPrinter {
-public:
-  SparcInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                   const MCRegisterInfo &MRI)
-      : MCInstPrinter(MAI, MII, MRI) {}
-
-  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
-  bool printSparcAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
-                            raw_ostream &OS);
-  bool isV9(const MCSubtargetInfo &STI) const;
-
-  // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
-                        raw_ostream &O);
-  bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
-                       raw_ostream &O);
-  void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
-                               unsigned PrintMethodIdx,
-                               const MCSubtargetInfo &STI, raw_ostream &O);
-  static const char *getRegisterName(unsigned RegNo);
-
-  void printOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
-                    raw_ostream &OS);
-  void printMemOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
-                       raw_ostream &OS, const char *Modifier = nullptr);
-  void printCCOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
-                      raw_ostream &OS);
-  bool printGetPCX(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
-                   raw_ostream &OS);
-  void printMembarTag(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
-                      raw_ostream &O);
-};
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/Sparc/LeonFeatures.td b/lib/Target/Sparc/LeonFeatures.td
index 61e5f16e0a1e..e0ea4e9c7645 100755
--- a/lib/Target/Sparc/LeonFeatures.td
+++ b/lib/Target/Sparc/LeonFeatures.td
@@ -1,9 +1,8 @@
 //===-- LeonFeatures.td - Describe the Leon Features -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/LeonPasses.cpp b/lib/Target/Sparc/LeonPasses.cpp
index 5ce00db365ab..e9d3aaeb9cfe 100755
--- a/lib/Target/Sparc/LeonPasses.cpp
+++ b/lib/Target/Sparc/LeonPasses.cpp
@@ -1,9 +1,8 @@
 //===------ LeonPasses.cpp - Define passes specific to LEON ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/LeonPasses.h b/lib/Target/Sparc/LeonPasses.h
index 1b3d9a7a32f9..154a2b467e16 100755
--- a/lib/Target/Sparc/LeonPasses.h
+++ b/lib/Target/Sparc/LeonPasses.h
@@ -1,9 +1,8 @@
 //===------- LeonPasses.h - Define passes specific to LEON ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index d7f1e3a1ab1d..2e8fa0dbaf4c 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -1,9 +1,8 @@
 //===-- SparcAsmBackend.cpp - Sparc Assembler Backend ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
index 5a730947796e..88547075c5ae 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===-- SparcELFObjectWriter.cpp - Sparc ELF Writer -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h b/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
index 99aa63fe2290..b5fac0264019 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
@@ -1,9 +1,8 @@
 //===-- SparcFixupKinds.h - Sparc Specific Fixup Entries --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
new file mode 100644
index 000000000000..c479459786d7
--- /dev/null
+++ b/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
@@ -0,0 +1,219 @@
+//===-- SparcInstPrinter.cpp - Convert Sparc MCInst to assembly syntax -----==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an Sparc MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcInstPrinter.h"
+#include "Sparc.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+// The generated AsmMatcher SparcGenAsmWriter uses "Sparc" as the target
+// namespace. But SPARC backend uses "SP" as its namespace.
+namespace llvm {
+namespace Sparc {
+  using namespace SP;
+}
+}
+
+#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "SparcGenAsmWriter.inc"
+
+bool SparcInstPrinter::isV9(const MCSubtargetInfo &STI) const {
+  return (STI.getFeatureBits()[Sparc::FeatureV9]) != 0;
+}
+
+void SparcInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const
+{
+  OS << '%' << StringRef(getRegisterName(RegNo)).lower();
+}
+
+void SparcInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                                 StringRef Annot, const MCSubtargetInfo &STI) {
+  if (!printAliasInstr(MI, STI, O) && !printSparcAliasInstr(MI, STI, O))
+    printInstruction(MI, STI, O);
+  printAnnotation(O, Annot);
+}
+
+bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI,
+                                            const MCSubtargetInfo &STI,
+                                            raw_ostream &O) {
+  switch (MI->getOpcode()) {
+  default: return false;
+  case SP::JMPLrr:
+  case SP::JMPLri: {
+    if (MI->getNumOperands() != 3)
+      return false;
+    if (!MI->getOperand(0).isReg())
+      return false;
+    switch (MI->getOperand(0).getReg()) {
+    default: return false;
+    case SP::G0: // jmp $addr | ret | retl
+      if (MI->getOperand(2).isImm() &&
+          MI->getOperand(2).getImm() == 8) {
+        switch(MI->getOperand(1).getReg()) {
+        default: break;
+        case SP::I7: O << "\tret"; return true;
+        case SP::O7: O << "\tretl"; return true;
+        }
+      }
+      O << "\tjmp "; printMemOperand(MI, 1, STI, O);
+      return true;
+    case SP::O7: // call $addr
+      O << "\tcall "; printMemOperand(MI, 1, STI, O);
+      return true;
+    }
+  }
+  case SP::V9FCMPS:  case SP::V9FCMPD:  case SP::V9FCMPQ:
+  case SP::V9FCMPES: case SP::V9FCMPED: case SP::V9FCMPEQ: {
+    if (isV9(STI)
+        || (MI->getNumOperands() != 3)
+        || (!MI->getOperand(0).isReg())
+        || (MI->getOperand(0).getReg() != SP::FCC0))
+      return false;
+    // if V8, skip printing %fcc0.
+    switch(MI->getOpcode()) {
+    default:
+    case SP::V9FCMPS:  O << "\tfcmps "; break;
+    case SP::V9FCMPD:  O << "\tfcmpd "; break;
+    case SP::V9FCMPQ:  O << "\tfcmpq "; break;
+    case SP::V9FCMPES: O << "\tfcmpes "; break;
+    case SP::V9FCMPED: O << "\tfcmped "; break;
+    case SP::V9FCMPEQ: O << "\tfcmpeq "; break;
+    }
+    printOperand(MI, 1, STI, O);
+    O << ", ";
+    printOperand(MI, 2, STI, O);
+    return true;
+  }
+  }
+}
+
+void SparcInstPrinter::printOperand(const MCInst *MI, int opNum,
+                                    const MCSubtargetInfo &STI,
+                                    raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand (opNum);
+
+  if (MO.isReg()) {
+    printRegName(O, MO.getReg());
+    return ;
+  }
+
+  if (MO.isImm()) {
+    switch (MI->getOpcode()) {
+      default:
+        O << (int)MO.getImm();
+        return;
+
+      case SP::TICCri: // Fall through
+      case SP::TICCrr: // Fall through
+      case SP::TRAPri: // Fall through
+      case SP::TRAPrr: // Fall through
+      case SP::TXCCri: // Fall through
+      case SP::TXCCrr: // Fall through
+        // Only seven-bit values up to 127.
+        O << ((int) MO.getImm() & 0x7f);
+        return;
+    }
+  }
+
+  assert(MO.isExpr() && "Unknown operand kind in printOperand");
+  MO.getExpr()->print(O, &MAI);
+}
+
+void SparcInstPrinter::printMemOperand(const MCInst *MI, int opNum,
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O, const char *Modifier) {
+  printOperand(MI, opNum, STI, O);
+
+  // If this is an ADD operand, emit it like normal operands.
+  if (Modifier && !strcmp(Modifier, "arith")) {
+    O << ", ";
+    printOperand(MI, opNum+1, STI, O);
+    return;
+  }
+  const MCOperand &MO = MI->getOperand(opNum+1);
+
+  if (MO.isReg() && MO.getReg() == SP::G0)
+    return;   // don't print "+%g0"
+  if (MO.isImm() && MO.getImm() == 0)
+    return;   // don't print "+0"
+
+  O << "+";
+
+  printOperand(MI, opNum+1, STI, O);
+}
+
+void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
+  int CC = (int)MI->getOperand(opNum).getImm();
+  switch (MI->getOpcode()) {
+  default: break;
+  case SP::FBCOND:
+  case SP::FBCONDA:
+  case SP::BPFCC:
+  case SP::BPFCCA:
+  case SP::BPFCCNT:
+  case SP::BPFCCANT:
+  case SP::MOVFCCrr:  case SP::V9MOVFCCrr:
+  case SP::MOVFCCri:  case SP::V9MOVFCCri:
+  case SP::FMOVS_FCC: case SP::V9FMOVS_FCC:
+  case SP::FMOVD_FCC: case SP::V9FMOVD_FCC:
+  case SP::FMOVQ_FCC: case SP::V9FMOVQ_FCC:
+    // Make sure CC is a fp conditional flag.
+    CC = (CC < 16) ? (CC + 16) : CC;
+    break;
+  case SP::CBCOND:
+  case SP::CBCONDA:
+    // Make sure CC is a cp conditional flag.
+    CC = (CC < 32) ? (CC + 32) : CC;
+    break;
+  }
+  O << SPARCCondCodeToString((SPCC::CondCodes)CC);
+}
+
+bool SparcInstPrinter::printGetPCX(const MCInst *MI, unsigned opNum,
+                                   const MCSubtargetInfo &STI,
+                                   raw_ostream &O) {
+  llvm_unreachable("FIXME: Implement SparcInstPrinter::printGetPCX.");
+  return true;
+}
+
+void SparcInstPrinter::printMembarTag(const MCInst *MI, int opNum,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
+  static const char *const TagNames[] = {
+      "#LoadLoad",  "#StoreLoad", "#LoadStore", "#StoreStore",
+      "#Lookaside", "#MemIssue",  "#Sync"};
+
+  unsigned Imm = MI->getOperand(opNum).getImm();
+
+  if (Imm > 127) {
+    O << Imm;
+    return;
+  }
+
+  bool First = true;
+  for (unsigned i = 0; i < sizeof(TagNames) / sizeof(char *); i++) {
+    if (Imm & (1 << i)) {
+      O << (First ? "" : " | ") << TagNames[i];
+      First = false;
+    }
+  }
+}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h b/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h
new file mode 100644
index 000000000000..499bcadb0d4d
--- /dev/null
+++ b/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h
@@ -0,0 +1,56 @@
+//===-- SparcInstPrinter.h - Convert Sparc MCInst to assembly syntax ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an Sparc MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCINSTPRINTER_H
+#define LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class SparcInstPrinter : public MCInstPrinter {
+public:
+  SparcInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                   const MCRegisterInfo &MRI)
+      : MCInstPrinter(MAI, MII, MRI) {}
+
+  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  bool printSparcAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+                            raw_ostream &OS);
+  bool isV9(const MCSubtargetInfo &STI) const;
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+                        raw_ostream &O);
+  bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+                       raw_ostream &O);
+  void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+                               unsigned PrintMethodIdx,
+                               const MCSubtargetInfo &STI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+
+  void printOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+                    raw_ostream &OS);
+  void printMemOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+                       raw_ostream &OS, const char *Modifier = nullptr);
+  void printCCOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+                      raw_ostream &OS);
+  bool printGetPCX(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                   raw_ostream &OS);
+  void printMembarTag(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+                      raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index 50e8825b15e8..1a2a040990ae 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -1,9 +1,8 @@
 //===- SparcMCAsmInfo.cpp - Sparc asm properties --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
index 5e8d0cb50312..c9162f2dc8a5 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
@@ -1,9 +1,8 @@
 //===- SparcMCAsmInfo.h - Sparc asm properties -----------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index 647be159a151..7e908011bd50 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
 //===-- SparcMCCodeEmitter.cpp - Convert Sparc code to machine code -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -84,9 +83,10 @@ public:
                                        const MCSubtargetInfo &STI) const;
 
 private:
-  uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
-  void verifyInstructionPredicates(const MCInst &MI,
-                                   uint64_t AvailableFeatures) const;
+  FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+  void
+  verifyInstructionPredicates(const MCInst &MI,
+                              const FeatureBitset &AvailableFeatures) const;
 };
 
 } // end anonymous namespace
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
index 4ddb72643a91..00f319fc37e1 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
@@ -1,9 +1,8 @@
 //===-- SparcMCExpr.cpp - Sparc specific MC expression classes --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
index cf2db067749c..c2467faca257 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
@@ -1,9 +1,8 @@
 //====- SparcMCExpr.h - Sparc specific MC expression classes --*- C++ -*-=====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index bd6596faee5d..ce593bb66770 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -1,9 +1,8 @@
 //===-- SparcMCTargetDesc.cpp - Sparc Target Descriptions -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,9 +11,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "SparcMCTargetDesc.h"
-#include "InstPrinter/SparcInstPrinter.h"
+#include "SparcInstPrinter.h"
 #include "SparcMCAsmInfo.h"
 #include "SparcTargetStreamer.h"
+#include "TargetInfo/SparcTargetInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index 3cd24104c443..e5699bb1c133 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -1,9 +1,8 @@
 //===-- SparcMCTargetDesc.h - Sparc Target Descriptions ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -33,10 +32,6 @@ class StringRef;
 class raw_pwrite_stream;
 class raw_ostream;
 
-Target &getTheSparcTarget();
-Target &getTheSparcV9Target();
-Target &getTheSparcelTarget();
-
 MCCodeEmitter *createSparcMCCodeEmitter(const MCInstrInfo &MCII,
                                         const MCRegisterInfo &MRI,
                                         MCContext &Ctx);
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.cpp b/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.cpp
index 94af791e0e75..a322d49adb87 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.cpp
@@ -1,9 +1,8 @@
 //===-- SparcTargetStreamer.cpp - Sparc Target Streamer Methods -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,7 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "SparcTargetStreamer.h"
-#include "InstPrinter/SparcInstPrinter.h"
+#include "SparcInstPrinter.h"
 #include "llvm/Support/FormattedStream.h"
 
 using namespace llvm;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h b/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h
index 8bb418e39ab4..9f729a6c2cf4 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h
@@ -1,9 +1,8 @@
 //===-- SparcTargetStreamer.h - Sparc Target Streamer ----------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
index 0cea53b359eb..967c463f5281 100644
--- a/lib/Target/Sparc/Sparc.h
+++ b/lib/Target/Sparc/Sparc.h
@@ -1,9 +1,8 @@
 //===-- Sparc.h - Top-level interface for Sparc representation --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td
index 0412215be8ab..ca6147edc46b 100644
--- a/lib/Target/Sparc/Sparc.td
+++ b/lib/Target/Sparc/Sparc.td
@@ -1,9 +1,8 @@
 //===-- Sparc.td - Describe the Sparc Target Machine -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index 5f0e359a3b00..4d5cbfbadc9d 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -1,9 +1,8 @@
 //===-- SparcAsmPrinter.cpp - Sparc LLVM assembly writer ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,12 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstPrinter/SparcInstPrinter.h"
+#include "MCTargetDesc/SparcInstPrinter.h"
 #include "MCTargetDesc/SparcMCExpr.h"
 #include "MCTargetDesc/SparcTargetStreamer.h"
 #include "Sparc.h"
 #include "SparcInstrInfo.h"
 #include "SparcTargetMachine.h"
+#include "TargetInfo/SparcTargetInfo.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
@@ -60,11 +60,9 @@ namespace {
     }
 
     bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                         unsigned AsmVariant, const char *ExtraCode,
-                         raw_ostream &O) override;
+                         const char *ExtraCode, raw_ostream &O) override;
     bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                               unsigned AsmVariant, const char *ExtraCode,
-                               raw_ostream &O) override;
+                               const char *ExtraCode, raw_ostream &O) override;
 
     void LowerGETPCXAndEmitMCInsts(const MachineInstr *MI,
                                    const MCSubtargetInfo &STI);
@@ -360,7 +358,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
     MO.getMBB()->getSymbol()->print(O, MAI);
     return;
   case MachineOperand::MO_GlobalAddress:
-    getSymbol(MO.getGlobal())->print(O, MAI);
+    PrintSymbolOperand(MO, O);
     break;
   case MachineOperand::MO_BlockAddress:
     O <<  GetBlockAddressSymbol(MO.getBlockAddress())->getName();
@@ -406,7 +404,6 @@ void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
 /// PrintAsmOperand - Print out an operand for an inline asm expression.
 ///
 bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                      unsigned AsmVariant,
                                       const char *ExtraCode,
                                       raw_ostream &O) {
   if (ExtraCode && ExtraCode[0]) {
@@ -415,7 +412,7 @@ bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
     switch (ExtraCode[0]) {
     default:
       // See if this is a generic print operand
-      return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+      return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
     case 'f':
     case 'r':
      break;
@@ -428,7 +425,7 @@ bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
 }
 
 bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                            unsigned OpNo, unsigned AsmVariant,
+                                            unsigned OpNo,
                                             const char *ExtraCode,
                                             raw_ostream &O) {
   if (ExtraCode && ExtraCode[0])
diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td
index 0aa29d186dc1..4be432211f1d 100644
--- a/lib/Target/Sparc/SparcCallingConv.td
+++ b/lib/Target/Sparc/SparcCallingConv.td
@@ -1,9 +1,8 @@
 //===-- SparcCallingConv.td - Calling Conventions Sparc ----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index 9f6c7d65592d..1834a6fd861d 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -1,9 +1,8 @@
 //===-- SparcFrameLowering.cpp - Sparc Frame Information ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index 6098afa68985..8e6001da05db 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -1,9 +1,8 @@
 //===-- SparcFrameLowering.h - Define frame lowering for Sparc --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index f845c41ede45..8cff50d19ed4 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
 //===-- SparcISelDAGToDAG.cpp - A dag to dag inst selector for Sparc ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -313,7 +312,7 @@ bool SparcDAGToDAGISel::tryInlineAsm(SDNode *N){
 
   SelectInlineAsmMemoryOperands(AsmNodeOperands, SDLoc(N));
 
-  SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
+  SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
   New->setNodeId(-1);
   ReplaceNode(N, New.getNode());
@@ -329,7 +328,8 @@ void SparcDAGToDAGISel::Select(SDNode *N) {
 
   switch (N->getOpcode()) {
   default: break;
-  case ISD::INLINEASM: {
+  case ISD::INLINEASM:
+  case ISD::INLINEASM_BR: {
     if (tryInlineAsm(N))
       return;
     break;
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index ae2257618a55..a6d440fa8aa2 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1,9 +1,8 @@
 //===-- SparcISelLowering.cpp - Sparc DAG Lowering Implementation ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,6 +17,7 @@
 #include "SparcRegisterInfo.h"
 #include "SparcTargetMachine.h"
 #include "SparcTargetObjectFile.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -3258,6 +3258,8 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
     case 'r':
       if (VT == MVT::v2i32)
         return std::make_pair(0U, &SP::IntPairRegClass);
+      else if (Subtarget->is64Bit())
+        return std::make_pair(0U, &SP::I64RegsRegClass);
       else
         return std::make_pair(0U, &SP::IntRegsRegClass);
     case 'f':
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 718851db25bf..8d557a4225e5 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -1,9 +1,8 @@
 //===-- SparcISelLowering.h - Sparc DAG Lowering Interface ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/SparcInstr64Bit.td b/lib/Target/Sparc/SparcInstr64Bit.td
index 0b94c6b614eb..2d4f687f72d2 100644
--- a/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/lib/Target/Sparc/SparcInstr64Bit.td
@@ -1,9 +1,8 @@
 //===-- SparcInstr64Bit.td - 64-bit instructions for Sparc Target ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/SparcInstrAliases.td b/lib/Target/Sparc/SparcInstrAliases.td
index 35987390d7ba..d4d056ea0af6 100644
--- a/lib/Target/Sparc/SparcInstrAliases.td
+++ b/lib/Target/Sparc/SparcInstrAliases.td
@@ -1,9 +1,8 @@
 //===-- SparcInstrAliases.td - Instruction Aliases for Sparc Target -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td
index 76366c6695f4..fbf08b49d60c 100644
--- a/lib/Target/Sparc/SparcInstrFormats.td
+++ b/lib/Target/Sparc/SparcInstrFormats.td
@@ -1,9 +1,8 @@
 //===-- SparcInstrFormats.td - Sparc Instruction Formats ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 47b42444b94d..ad343fe6f80a 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -1,9 +1,8 @@
 //===-- SparcInstrInfo.cpp - Sparc Instruction Information ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index 524b5d054163..b587b28c25fc 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -1,9 +1,8 @@
 //===-- SparcInstrInfo.h - Sparc Instruction Information --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index 558b37aeebcb..8474c7abffb3 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -1,9 +1,8 @@
 //===-- SparcInstrInfo.td - Target Description for Sparc Target -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/SparcInstrVIS.td b/lib/Target/Sparc/SparcInstrVIS.td
index d9adf3e8b0f5..bdefc70869d7 100644
--- a/lib/Target/Sparc/SparcInstrVIS.td
+++ b/lib/Target/Sparc/SparcInstrVIS.td
@@ -1,9 +1,8 @@
 //===---- SparcInstrVIS.td - Visual Instruction Set extensions (VIS) -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/SparcMCInstLower.cpp b/lib/Target/Sparc/SparcMCInstLower.cpp
index a784124ff688..8ea317fdd453 100644
--- a/lib/Target/Sparc/SparcMCInstLower.cpp
+++ b/lib/Target/Sparc/SparcMCInstLower.cpp
@@ -1,9 +1,8 @@
 //===-- SparcMCInstLower.cpp - Convert Sparc MachineInstr to MCInst -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.cpp b/lib/Target/Sparc/SparcMachineFunctionInfo.cpp
index e7442826e78b..7c36c4ab865f 100644
--- a/lib/Target/Sparc/SparcMachineFunctionInfo.cpp
+++ b/lib/Target/Sparc/SparcMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
 //===-- SparcMachineFunctionInfo.cpp - Sparc Machine Function Info --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h
index 104744279d9d..fe5705878693 100644
--- a/lib/Target/Sparc/SparcMachineFunctionInfo.h
+++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -1,9 +1,8 @@
 //===- SparcMachineFunctionInfo.h - Sparc Machine Function Info -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 33caa66154ff..ce11a423d10e 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===-- SparcRegisterInfo.cpp - SPARC Register Information ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -189,7 +188,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       MachineInstr *StMI =
         BuildMI(*MI.getParent(), II, dl, TII.get(SP::STDFri))
         .addReg(FrameReg).addImm(0).addReg(SrcEvenReg);
-      replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg);
+      replaceFI(MF, *StMI, *StMI, dl, 0, Offset, FrameReg);
       MI.setDesc(TII.get(SP::STDFri));
       MI.getOperand(2).setReg(SrcOddReg);
       Offset += 8;
@@ -198,10 +197,10 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       unsigned DestReg     = MI.getOperand(0).getReg();
       unsigned DestEvenReg = getSubReg(DestReg, SP::sub_even64);
       unsigned DestOddReg  = getSubReg(DestReg, SP::sub_odd64);
-      MachineInstr *StMI =
+      MachineInstr *LdMI =
         BuildMI(*MI.getParent(), II, dl, TII.get(SP::LDDFri), DestEvenReg)
         .addReg(FrameReg).addImm(0);
-      replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg);
+      replaceFI(MF, *LdMI, *LdMI, dl, 1, Offset, FrameReg);
 
       MI.setDesc(TII.get(SP::LDDFri));
       MI.getOperand(0).setReg(DestOddReg);
@@ -213,7 +212,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
 }
 
-unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   return SP::I6;
 }
 
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 8dd2569d10de..118ef9d80fae 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -1,9 +1,8 @@
 //===-- SparcRegisterInfo.h - Sparc Register Information Impl ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -39,7 +38,7 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
+  Register getFrameRegister(const MachineFunction &MF) const override;
 
   bool canRealignStack(const MachineFunction &MF) const override;
 
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
index 6625eaafd992..98959d512955 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -1,9 +1,8 @@
 //===-- SparcRegisterInfo.td - Sparc Register defs ---------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Sparc/SparcSchedule.td b/lib/Target/Sparc/SparcSchedule.td
index f243546b029b..31e43c9bd95d 100755
--- a/lib/Target/Sparc/SparcSchedule.td
+++ b/lib/Target/Sparc/SparcSchedule.td
@@ -1,9 +1,8 @@
 //===-- SparcSchedule.td - Describe the Sparc Itineries ----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
index 5301fc30a006..075a002a358d 100644
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -1,9 +1,8 @@
 //===-- SparcSubtarget.cpp - SPARC Subtarget Information ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index 24ea41a266e7..db19f99e3c9c 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -1,9 +1,8 @@
 //===-- SparcSubtarget.h - Define Subtarget for the SPARC -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 5b467235f809..195cff79de03 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -1,9 +1,8 @@
 //===-- SparcTargetMachine.cpp - Define TargetMachine for Sparc -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,6 +13,7 @@
 #include "LeonPasses.h"
 #include "Sparc.h"
 #include "SparcTargetObjectFile.h"
+#include "TargetInfo/SparcTargetInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/LegacyPassManager.h"
@@ -75,9 +75,9 @@ getEffectiveSparcCodeModel(Optional<CodeModel::Model> CM, Reloc::Model RM,
                            bool Is64Bit, bool JIT) {
   if (CM) {
     if (*CM == CodeModel::Tiny)
-      report_fatal_error("Target does not support the tiny CodeModel");
+      report_fatal_error("Target does not support the tiny CodeModel", false);
     if (*CM == CodeModel::Kernel)
-      report_fatal_error("Target does not support the kernel CodeModel");
+      report_fatal_error("Target does not support the kernel CodeModel", false);
     return *CM;
   }
   if (Is64Bit) {
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index d1eb1d329a4c..4083f61433b1 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -1,9 +1,8 @@
 //===-- SparcTargetMachine.h - Define TargetMachine for Sparc ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/Sparc/SparcTargetObjectFile.cpp b/lib/Target/Sparc/SparcTargetObjectFile.cpp
index d0db854f7849..e6ad4d2d67aa 100644
--- a/lib/Target/Sparc/SparcTargetObjectFile.cpp
+++ b/lib/Target/Sparc/SparcTargetObjectFile.cpp
@@ -1,9 +1,8 @@
 //===------- SparcTargetObjectFile.cpp - Sparc Object Info Impl -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Sparc/SparcTargetObjectFile.h b/lib/Target/Sparc/SparcTargetObjectFile.h
index 3b1b345c3b19..9bbe602b32b3 100644
--- a/lib/Target/Sparc/SparcTargetObjectFile.h
+++ b/lib/Target/Sparc/SparcTargetObjectFile.h
@@ -1,9 +1,8 @@
 //===-- SparcTargetObjectFile.h - Sparc Object Info -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
index d030bd9f232d..eafa2b4b2f13 100644
--- a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
+++ b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -1,14 +1,12 @@
 //===-- SparcTargetInfo.cpp - Sparc Target Implementation -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "Sparc.h"
-#include "llvm/IR/Module.h"
+#include "TargetInfo/SparcTargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.h b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.h
new file mode 100644
index 000000000000..e02ff59fdac3
--- /dev/null
+++ b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.h
@@ -0,0 +1,22 @@
+//===-- SparcTargetInfo.h - Sparc Target Implementation ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPARC_TARGETINFO_SPARCTARGETINFO_H
+#define LLVM_LIB_TARGET_SPARC_TARGETINFO_SPARCTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheSparcTarget();
+Target &getTheSparcV9Target();
+Target &getTheSparcelTarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_SPARC_TARGETINFO_SPARCTARGETINFO_H
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 91959b4151b3..a259ba3433d6 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -1,14 +1,14 @@
 //===-- SystemZAsmParser.cpp - Parse SystemZ assembly instructions --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstPrinter/SystemZInstPrinter.h"
+#include "MCTargetDesc/SystemZInstPrinter.h"
 #include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "TargetInfo/SystemZTargetInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -651,7 +651,6 @@ static void printMCExpr(const MCExpr *E, raw_ostream &OS) {
 
 void SystemZOperand::print(raw_ostream &OS) const {
   switch (Kind) {
-    break;
   case KindToken:
     OS << "Token:" << getToken();
     break;
@@ -1181,8 +1180,10 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands,
   // features to be available during the operand check, or else we will fail to
   // find the custom parser, and then we will later get an InvalidOperand error
   // instead of a MissingFeature errror.
-  uint64_t AvailableFeatures = getAvailableFeatures();
-  setAvailableFeatures(~(uint64_t)0);
+  FeatureBitset AvailableFeatures = getAvailableFeatures();
+  FeatureBitset All;
+  All.set();
+  setAvailableFeatures(All);
   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
   setAvailableFeatures(AvailableFeatures);
   if (ResTy == MatchOperand_Success)
@@ -1233,7 +1234,8 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands,
   return false;
 }
 
-static std::string SystemZMnemonicSpellCheck(StringRef S, uint64_t FBS,
+static std::string SystemZMnemonicSpellCheck(StringRef S,
+                                             const FeatureBitset &FBS,
                                              unsigned VariantID = 0);
 
 bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -1244,8 +1246,9 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   MCInst Inst;
   unsigned MatchResult;
 
+  FeatureBitset MissingFeatures;
   MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
-                                     MatchingInlineAsm);
+                                     MissingFeatures, MatchingInlineAsm);
   switch (MatchResult) {
   case Match_Success:
     Inst.setLoc(IDLoc);
@@ -1253,17 +1256,15 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     return false;
 
   case Match_MissingFeature: {
-    assert(ErrorInfo && "Unknown missing feature!");
+    assert(MissingFeatures.any() && "Unknown missing feature!");
     // Special case the error message for the very common case where only
     // a single subtarget feature is missing
     std::string Msg = "instruction requires:";
-    uint64_t Mask = 1;
-    for (unsigned I = 0; I < sizeof(ErrorInfo) * 8 - 1; ++I) {
-      if (ErrorInfo & Mask) {
+    for (unsigned I = 0, E = MissingFeatures.size(); I != E; ++I) {
+      if (MissingFeatures[I]) {
         Msg += " ";
-        Msg += getSubtargetFeatureName(ErrorInfo & Mask);
+        Msg += getSubtargetFeatureName(I);
       }
-      Mask <<= 1;
     }
     return Error(IDLoc, Msg);
   }
@@ -1282,7 +1283,7 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   }
 
   case Match_MnemonicFail: {
-    uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+    FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
     std::string Suggestion = SystemZMnemonicSpellCheck(
       ((SystemZOperand &)*Operands[0]).getToken(), FBS);
     return Error(IDLoc, "invalid instruction" + Suggestion,
diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 8903b57ffd0b..70c26db33ced 100644
--- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -1,14 +1,14 @@
 //===-- SystemZDisassembler.cpp - Disassembler for SystemZ ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/SystemZMCTargetDesc.h"
 #include "SystemZ.h"
+#include "TargetInfo/SystemZTargetInfo.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCInst.h"
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
deleted file mode 100644
index 6cd12e13e220..000000000000
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
+++ /dev/null
@@ -1,234 +0,0 @@
-//===- SystemZInstPrinter.cpp - Convert SystemZ MCInst to assembly syntax -===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SystemZInstPrinter.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-#include <cstdint>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-#include "SystemZGenAsmWriter.inc"
-
-void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp,
-                                      unsigned Index, raw_ostream &O) {
-  O << Disp;
-  if (Base || Index) {
-    O << '(';
-    if (Index) {
-      O << '%' << getRegisterName(Index);
-      if (Base)
-        O << ',';
-    }
-    if (Base)
-      O << '%' << getRegisterName(Base);
-    O << ')';
-  }
-}
-
-void SystemZInstPrinter::printOperand(const MCOperand &MO, const MCAsmInfo *MAI,
-                                      raw_ostream &O) {
-  if (MO.isReg())
-    O << '%' << getRegisterName(MO.getReg());
-  else if (MO.isImm())
-    O << MO.getImm();
-  else if (MO.isExpr())
-    MO.getExpr()->print(O, MAI);
-  else
-    llvm_unreachable("Invalid operand");
-}
-
-void SystemZInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                   StringRef Annot,
-                                   const MCSubtargetInfo &STI) {
-  printInstruction(MI, O);
-  printAnnotation(O, Annot);
-}
-
-void SystemZInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
-  O << '%' << getRegisterName(RegNo);
-}
-
-template <unsigned N>
-static void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
-  int64_t Value = MI->getOperand(OpNum).getImm();
-  assert(isUInt<N>(Value) && "Invalid uimm argument");
-  O << Value;
-}
-
-template <unsigned N>
-static void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
-  int64_t Value = MI->getOperand(OpNum).getImm();
-  assert(isInt<N>(Value) && "Invalid simm argument");
-  O << Value;
-}
-
-void SystemZInstPrinter::printU1ImmOperand(const MCInst *MI, int OpNum,
-                                           raw_ostream &O) {
-  printUImmOperand<1>(MI, OpNum, O);
-}
-
-void SystemZInstPrinter::printU2ImmOperand(const MCInst *MI, int OpNum,
-                                           raw_ostream &O) {
-  printUImmOperand<2>(MI, OpNum, O);
-}
-
-void SystemZInstPrinter::printU3ImmOperand(const MCInst *MI, int OpNum,
-                                           raw_ostream &O) {
-  printUImmOperand<3>(MI, OpNum, O);
-}
-
-void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum,
-                                           raw_ostream &O) {
-  printUImmOperand<4>(MI, OpNum, O);
-}
-
-void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum,
-                                           raw_ostream &O) {
-  printUImmOperand<6>(MI, OpNum, O);
-}
-
-void SystemZInstPrinter::printS8ImmOperand(const MCInst *MI, int OpNum,
-                                           raw_ostream &O) {
-  printSImmOperand<8>(MI, OpNum, O);
-}
-
-void SystemZInstPrinter::printU8ImmOperand(const MCInst *MI, int OpNum,
-                                           raw_ostream &O) {
-  printUImmOperand<8>(MI, OpNum, O);
-}
-
-void SystemZInstPrinter::printU12ImmOperand(const MCInst *MI, int OpNum,
-                                            raw_ostream &O) {
-  printUImmOperand<12>(MI, OpNum, O);
-}
-
-void SystemZInstPrinter::printS16ImmOperand(const MCInst *MI, int OpNum,
-                                            raw_ostream &O) {
-  printSImmOperand<16>(MI, OpNum, O);
-}
-
-void SystemZInstPrinter::printU16ImmOperand(const MCInst *MI, int OpNum,
-                                            raw_ostream &O) {
-  printUImmOperand<16>(MI, OpNum, O);
-}
-
-void SystemZInstPrinter::printS32ImmOperand(const MCInst *MI, int OpNum,
-                                            raw_ostream &O) {
-  printSImmOperand<32>(MI, OpNum, O);
-}
-
-void SystemZInstPrinter::printU32ImmOperand(const MCInst *MI, int OpNum,
-                                            raw_ostream &O) {
-  printUImmOperand<32>(MI, OpNum, O);
-}
-
-void SystemZInstPrinter::printU48ImmOperand(const MCInst *MI, int OpNum,
-                                            raw_ostream &O) {
-  printUImmOperand<48>(MI, OpNum, O);
-}
-
-void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum,
-                                           raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand(OpNum);
-  if (MO.isImm()) {
-    O << "0x";
-    O.write_hex(MO.getImm());
-  } else
-    MO.getExpr()->print(O, &MAI);
-}
-
-void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, int OpNum,
-                                              raw_ostream &O) {
-  // Output the PC-relative operand.
-  printPCRelOperand(MI, OpNum, O);
-
-  // Output the TLS marker if present.
-  if ((unsigned)OpNum + 1 < MI->getNumOperands()) {
-    const MCOperand &MO = MI->getOperand(OpNum + 1);
-    const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*MO.getExpr());
-    switch (refExp.getKind()) {
-      case MCSymbolRefExpr::VK_TLSGD:
-        O << ":tls_gdcall:";
-        break;
-      case MCSymbolRefExpr::VK_TLSLDM:
-        O << ":tls_ldcall:";
-        break;
-      default:
-        llvm_unreachable("Unexpected symbol kind");
-    }
-    O << refExp.getSymbol().getName();
-  }
-}
-
-void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum,
-                                      raw_ostream &O) {
-  printOperand(MI->getOperand(OpNum), &MAI, O);
-}
-
-void SystemZInstPrinter::printBDAddrOperand(const MCInst *MI, int OpNum,
-                                            raw_ostream &O) {
-  printAddress(MI->getOperand(OpNum).getReg(),
-               MI->getOperand(OpNum + 1).getImm(), 0, O);
-}
-
-void SystemZInstPrinter::printBDXAddrOperand(const MCInst *MI, int OpNum,
-                                             raw_ostream &O) {
-  printAddress(MI->getOperand(OpNum).getReg(),
-               MI->getOperand(OpNum + 1).getImm(),
-               MI->getOperand(OpNum + 2).getReg(), O);
-}
-
-void SystemZInstPrinter::printBDLAddrOperand(const MCInst *MI, int OpNum,
-                                             raw_ostream &O) {
-  unsigned Base = MI->getOperand(OpNum).getReg();
-  uint64_t Disp = MI->getOperand(OpNum + 1).getImm();
-  uint64_t Length = MI->getOperand(OpNum + 2).getImm();
-  O << Disp << '(' << Length;
-  if (Base)
-    O << ",%" << getRegisterName(Base);
-  O << ')';
-}
-
-void SystemZInstPrinter::printBDRAddrOperand(const MCInst *MI, int OpNum,
-                                             raw_ostream &O) {
-  unsigned Base = MI->getOperand(OpNum).getReg();
-  uint64_t Disp = MI->getOperand(OpNum + 1).getImm();
-  unsigned Length = MI->getOperand(OpNum + 2).getReg();
-  O << Disp << "(%" << getRegisterName(Length);
-  if (Base)
-    O << ",%" << getRegisterName(Base);
-  O << ')';
-}
-
-void SystemZInstPrinter::printBDVAddrOperand(const MCInst *MI, int OpNum,
-                                             raw_ostream &O) {
-  printAddress(MI->getOperand(OpNum).getReg(),
-               MI->getOperand(OpNum + 1).getImm(),
-               MI->getOperand(OpNum + 2).getReg(), O);
-}
-
-void SystemZInstPrinter::printCond4Operand(const MCInst *MI, int OpNum,
-                                           raw_ostream &O) {
-  static const char *const CondNames[] = {
-    "o", "h", "nle", "l", "nhe", "lh", "ne",
-    "e", "nlh", "he", "nl", "le", "nh", "no"
-  };
-  uint64_t Imm = MI->getOperand(OpNum).getImm();
-  assert(Imm > 0 && Imm < 15 && "Invalid condition");
-  O << CondNames[Imm - 1];
-}
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
deleted file mode 100644
index d65c661545eb..000000000000
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ /dev/null
@@ -1,78 +0,0 @@
-//==- SystemZInstPrinter.h - Convert SystemZ MCInst to assembly --*- C++ -*-==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints a SystemZ MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
-#define LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
-
-#include "llvm/MC/MCInstPrinter.h"
-#include <cstdint>
-
-namespace llvm {
-
-class MCOperand;
-
-class SystemZInstPrinter : public MCInstPrinter {
-public:
-  SystemZInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                     const MCRegisterInfo &MRI)
-    : MCInstPrinter(MAI, MII, MRI) {}
-
-  // Automatically generated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
-  static const char *getRegisterName(unsigned RegNo);
-
-  // Print an address with the given base, displacement and index.
-  static void printAddress(unsigned Base, int64_t Disp, unsigned Index,
-                           raw_ostream &O);
-
-  // Print the given operand.
-  static void printOperand(const MCOperand &MO, const MCAsmInfo *MAI,
-                           raw_ostream &O);
-
-  // Override MCInstPrinter.
-  void printRegName(raw_ostream &O, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
-
-private:
-  // Print various types of operand.
-  void printOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printBDRAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printBDVAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printU1ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printU2ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printU3ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printU4ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printU6ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printU8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printU12ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printS16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printU48ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printPCRelTLSOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-
-  // Print the mnemonic for a condition-code mask ("ne", "lh", etc.)
-  // This forms part of the instruction name rather than the operand list.
-  void printCond4Operand(const MCInst *MI, int OpNum, raw_ostream &O);
-};
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
new file mode 100644
index 000000000000..91cb35dd72f2
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
@@ -0,0 +1,233 @@
+//===- SystemZInstPrinter.cpp - Convert SystemZ MCInst to assembly syntax -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZInstPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+#include "SystemZGenAsmWriter.inc"
+
+void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp,
+                                      unsigned Index, raw_ostream &O) {
+  O << Disp;
+  if (Base || Index) {
+    O << '(';
+    if (Index) {
+      O << '%' << getRegisterName(Index);
+      if (Base)
+        O << ',';
+    }
+    if (Base)
+      O << '%' << getRegisterName(Base);
+    O << ')';
+  }
+}
+
+void SystemZInstPrinter::printOperand(const MCOperand &MO, const MCAsmInfo *MAI,
+                                      raw_ostream &O) {
+  if (MO.isReg())
+    O << '%' << getRegisterName(MO.getReg());
+  else if (MO.isImm())
+    O << MO.getImm();
+  else if (MO.isExpr())
+    MO.getExpr()->print(O, MAI);
+  else
+    llvm_unreachable("Invalid operand");
+}
+
+void SystemZInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                                   StringRef Annot,
+                                   const MCSubtargetInfo &STI) {
+  printInstruction(MI, O);
+  printAnnotation(O, Annot);
+}
+
+void SystemZInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
+  O << '%' << getRegisterName(RegNo);
+}
+
+template <unsigned N>
+static void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
+  int64_t Value = MI->getOperand(OpNum).getImm();
+  assert(isUInt<N>(Value) && "Invalid uimm argument");
+  O << Value;
+}
+
+template <unsigned N>
+static void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
+  int64_t Value = MI->getOperand(OpNum).getImm();
+  assert(isInt<N>(Value) && "Invalid simm argument");
+  O << Value;
+}
+
+void SystemZInstPrinter::printU1ImmOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  printUImmOperand<1>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU2ImmOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  printUImmOperand<2>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU3ImmOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  printUImmOperand<3>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  printUImmOperand<4>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  printUImmOperand<6>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printS8ImmOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  printSImmOperand<8>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU8ImmOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  printUImmOperand<8>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU12ImmOperand(const MCInst *MI, int OpNum,
+                                            raw_ostream &O) {
+  printUImmOperand<12>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printS16ImmOperand(const MCInst *MI, int OpNum,
+                                            raw_ostream &O) {
+  printSImmOperand<16>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU16ImmOperand(const MCInst *MI, int OpNum,
+                                            raw_ostream &O) {
+  printUImmOperand<16>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printS32ImmOperand(const MCInst *MI, int OpNum,
+                                            raw_ostream &O) {
+  printSImmOperand<32>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU32ImmOperand(const MCInst *MI, int OpNum,
+                                            raw_ostream &O) {
+  printUImmOperand<32>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU48ImmOperand(const MCInst *MI, int OpNum,
+                                            raw_ostream &O) {
+  printUImmOperand<48>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  if (MO.isImm()) {
+    O << "0x";
+    O.write_hex(MO.getImm());
+  } else
+    MO.getExpr()->print(O, &MAI);
+}
+
+void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, int OpNum,
+                                              raw_ostream &O) {
+  // Output the PC-relative operand.
+  printPCRelOperand(MI, OpNum, O);
+
+  // Output the TLS marker if present.
+  if ((unsigned)OpNum + 1 < MI->getNumOperands()) {
+    const MCOperand &MO = MI->getOperand(OpNum + 1);
+    const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*MO.getExpr());
+    switch (refExp.getKind()) {
+      case MCSymbolRefExpr::VK_TLSGD:
+        O << ":tls_gdcall:";
+        break;
+      case MCSymbolRefExpr::VK_TLSLDM:
+        O << ":tls_ldcall:";
+        break;
+      default:
+        llvm_unreachable("Unexpected symbol kind");
+    }
+    O << refExp.getSymbol().getName();
+  }
+}
+
+void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum,
+                                      raw_ostream &O) {
+  printOperand(MI->getOperand(OpNum), &MAI, O);
+}
+
+void SystemZInstPrinter::printBDAddrOperand(const MCInst *MI, int OpNum,
+                                            raw_ostream &O) {
+  printAddress(MI->getOperand(OpNum).getReg(),
+               MI->getOperand(OpNum + 1).getImm(), 0, O);
+}
+
+void SystemZInstPrinter::printBDXAddrOperand(const MCInst *MI, int OpNum,
+                                             raw_ostream &O) {
+  printAddress(MI->getOperand(OpNum).getReg(),
+               MI->getOperand(OpNum + 1).getImm(),
+               MI->getOperand(OpNum + 2).getReg(), O);
+}
+
+void SystemZInstPrinter::printBDLAddrOperand(const MCInst *MI, int OpNum,
+                                             raw_ostream &O) {
+  unsigned Base = MI->getOperand(OpNum).getReg();
+  uint64_t Disp = MI->getOperand(OpNum + 1).getImm();
+  uint64_t Length = MI->getOperand(OpNum + 2).getImm();
+  O << Disp << '(' << Length;
+  if (Base)
+    O << ",%" << getRegisterName(Base);
+  O << ')';
+}
+
+void SystemZInstPrinter::printBDRAddrOperand(const MCInst *MI, int OpNum,
+                                             raw_ostream &O) {
+  unsigned Base = MI->getOperand(OpNum).getReg();
+  uint64_t Disp = MI->getOperand(OpNum + 1).getImm();
+  unsigned Length = MI->getOperand(OpNum + 2).getReg();
+  O << Disp << "(%" << getRegisterName(Length);
+  if (Base)
+    O << ",%" << getRegisterName(Base);
+  O << ')';
+}
+
+void SystemZInstPrinter::printBDVAddrOperand(const MCInst *MI, int OpNum,
+                                             raw_ostream &O) {
+  printAddress(MI->getOperand(OpNum).getReg(),
+               MI->getOperand(OpNum + 1).getImm(),
+               MI->getOperand(OpNum + 2).getReg(), O);
+}
+
+void SystemZInstPrinter::printCond4Operand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  static const char *const CondNames[] = {
+    "o", "h", "nle", "l", "nhe", "lh", "ne",
+    "e", "nlh", "he", "nl", "le", "nh", "no"
+  };
+  uint64_t Imm = MI->getOperand(OpNum).getImm();
+  assert(Imm > 0 && Imm < 15 && "Invalid condition");
+  O << CondNames[Imm - 1];
+}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h b/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
new file mode 100644
index 000000000000..4235d4e21792
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
@@ -0,0 +1,77 @@
+//==- SystemZInstPrinter.h - Convert SystemZ MCInst to assembly --*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a SystemZ MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZINSTPRINTER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include <cstdint>
+
+namespace llvm {
+
+class MCOperand;
+
+class SystemZInstPrinter : public MCInstPrinter {
+public:
+  SystemZInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                     const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
+
+  // Automatically generated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+
+  // Print an address with the given base, displacement and index.
+  static void printAddress(unsigned Base, int64_t Disp, unsigned Index,
+                           raw_ostream &O);
+
+  // Print the given operand.
+  static void printOperand(const MCOperand &MO, const MCAsmInfo *MAI,
+                           raw_ostream &O);
+
+  // Override MCInstPrinter.
+  void printRegName(raw_ostream &O, unsigned RegNo) const override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+
+private:
+  // Print various types of operand.
+  void printOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printBDRAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printBDVAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU1ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU2ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU3ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU4ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU6ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU12ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printS16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU48ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printPCRelTLSOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+
+  // Print the mnemonic for a condition-code mask ("ne", "lh", etc.)
+  // This forms part of the instruction name rather than the operand list.
+  void printCond4Operand(const MCInst *MI, int OpNum, raw_ostream &O);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZINSTPRINTER_H
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 2146832f7794..23d8585095cc 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZMCAsmBackend.cpp - SystemZ assembler backend ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index 6e00981939b6..d6cdacfcab92 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZMCAsmInfo.cpp - SystemZ asm properties ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
index 800f89232063..b8818a65f9e3 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
@@ -1,9 +1,8 @@
 //====-- SystemZMCAsmInfo.h - SystemZ asm properties -----------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index d188f56512ab..a5ccf4f68ffd 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZMCCodeEmitter.cpp - Convert SystemZ code to machine code ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -144,9 +143,10 @@ private:
   }
 
 private:
-  uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
-  void verifyInstructionPredicates(const MCInst &MI,
-                                   uint64_t AvailableFeatures) const;
+  FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+  void
+  verifyInstructionPredicates(const MCInst &MI,
+                              const FeatureBitset &AvailableFeatures) const;
 };
 
 } // end anonymous namespace
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
index c012accc14dd..14f6198183b9 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
@@ -1,9 +1,8 @@
 //===-- SystemZMCFixups.h - SystemZ-specific fixup entries ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index 888be519fb16..8d8ba5644e10 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZMCObjectWriter.cpp - SystemZ ELF writer --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -37,8 +36,8 @@ protected:
 } // end anonymous namespace
 
 SystemZObjectWriter::SystemZObjectWriter(uint8_t OSABI)
-  : MCELFObjectTargetWriter(/*Is64Bit=*/true, OSABI, ELF::EM_S390,
-                            /*HasRelocationAddend=*/ true) {}
+  : MCELFObjectTargetWriter(/*Is64Bit_=*/true, OSABI, ELF::EM_S390,
+                            /*HasRelocationAddend_=*/ true) {}
 
 // Return the relocation type for an absolute value of MCFixupKind Kind.
 static unsigned getAbsoluteReloc(unsigned Kind) {
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 05688ed8efbb..3c0300cfd8f0 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -1,15 +1,16 @@
 //===-- SystemZMCTargetDesc.cpp - SystemZ target descriptions -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "SystemZMCTargetDesc.h"
-#include "InstPrinter/SystemZInstPrinter.h"
+#include "SystemZInstPrinter.h"
 #include "SystemZMCAsmInfo.h"
+#include "TargetInfo/SystemZTargetInfo.h"
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index 1617a807e65a..8f720c5abb34 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -1,9 +1,8 @@
 //===-- SystemZMCTargetDesc.h - SystemZ target descriptions -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -30,8 +29,6 @@ class Triple;
 class raw_pwrite_stream;
 class raw_ostream;
 
-Target &getTheSystemZTarget();
-
 namespace SystemZMC {
 // How many bytes are in the ABI-defined, caller-allocated part of
 // a stack frame.
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
index fdbde3d8dbc3..2b0f90182d7f 100644
--- a/lib/Target/SystemZ/SystemZ.h
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -1,9 +1,8 @@
 //==- SystemZ.h - Top-Level Interface for SystemZ representation -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -195,6 +194,7 @@ FunctionPass *createSystemZExpandPseudoPass(SystemZTargetMachine &TM);
 FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
 FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
 FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
+FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM);
 FunctionPass *createSystemZTDCPass();
 } // end namespace llvm
 
diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td
index 3800f7a26b79..ebbc6ffd2f1e 100644
--- a/lib/Target/SystemZ/SystemZ.td
+++ b/lib/Target/SystemZ/SystemZ.td
@@ -1,9 +1,8 @@
 //===-- SystemZ.td - Describe the SystemZ target machine -----*- tblgen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index e2de721be568..ef378e4ade7a 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly printer -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,9 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "SystemZAsmPrinter.h"
-#include "InstPrinter/SystemZInstPrinter.h"
+#include "MCTargetDesc/SystemZInstPrinter.h"
 #include "SystemZConstantPoolValue.h"
 #include "SystemZMCInstLower.h"
+#include "TargetInfo/SystemZTargetInfo.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/IR/Mangler.h"
@@ -80,6 +80,27 @@ static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) {
                                  Context);
 }
 
+// MI is an instruction that accepts an optional alignment hint,
+// and which was already lowered to LoweredMI.  If the alignment
+// of the original memory operand is known, update LoweredMI to
+// an instruction with the corresponding hint set.
+static void lowerAlignmentHint(const MachineInstr *MI, MCInst &LoweredMI,
+                               unsigned Opcode) {
+  if (!MI->hasOneMemOperand())
+    return;
+  const MachineMemOperand *MMO = *MI->memoperands_begin();
+  unsigned AlignmentHint = 0;
+  if (MMO->getAlignment() >= 16)
+    AlignmentHint = 4;
+  else if (MMO->getAlignment() >= 8)
+    AlignmentHint = 3;
+  if (AlignmentHint == 0)
+    return;
+
+  LoweredMI.setOpcode(Opcode);
+  LoweredMI.addOperand(MCOperand::createImm(AlignmentHint));
+}
+
 // MI loads the high part of a vector from memory.  Return an instruction
 // that uses replicating vector load Opcode to do the same thing.
 static MCInst lowerSubvectorLoad(const MachineInstr *MI, unsigned Opcode) {
@@ -351,6 +372,26 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg()));
     break;
 
+  case SystemZ::VL:
+    Lower.lower(MI, LoweredMI);
+    lowerAlignmentHint(MI, LoweredMI, SystemZ::VLAlign);
+    break;
+
+  case SystemZ::VST:
+    Lower.lower(MI, LoweredMI);
+    lowerAlignmentHint(MI, LoweredMI, SystemZ::VSTAlign);
+    break;
+
+  case SystemZ::VLM:
+    Lower.lower(MI, LoweredMI);
+    lowerAlignmentHint(MI, LoweredMI, SystemZ::VLMAlign);
+    break;
+
+  case SystemZ::VSTM:
+    Lower.lower(MI, LoweredMI);
+    lowerAlignmentHint(MI, LoweredMI, SystemZ::VSTMAlign);
+    break;
+
   case SystemZ::VL32:
     LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPF);
     break;
@@ -618,26 +659,19 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
   OutStreamer->EmitValue(Expr, Size);
 }
 
-bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
-                                        unsigned OpNo,
-                                        unsigned AsmVariant,
+bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                                         const char *ExtraCode,
                                         raw_ostream &OS) {
-  if (ExtraCode && *ExtraCode == 'n') {
-    if (!MI->getOperand(OpNo).isImm())
-      return true;
-    OS << -int64_t(MI->getOperand(OpNo).getImm());
-  } else {
-    SystemZMCInstLower Lower(MF->getContext(), *this);
-    MCOperand MO(Lower.lowerOperand(MI->getOperand(OpNo)));
-    SystemZInstPrinter::printOperand(MO, MAI, OS);
-  }
+  if (ExtraCode)
+    return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, OS);
+  SystemZMCInstLower Lower(MF->getContext(), *this);
+  MCOperand MO(Lower.lowerOperand(MI->getOperand(OpNo)));
+  SystemZInstPrinter::printOperand(MO, MAI, OS);
   return false;
 }
 
 bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
                                               unsigned OpNo,
-                                              unsigned AsmVariant,
                                               const char *ExtraCode,
                                               raw_ostream &OS) {
   SystemZInstPrinter::printAddress(MI->getOperand(OpNo).getReg(),
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.h b/lib/Target/SystemZ/SystemZAsmPrinter.h
index cb88ec32f83a..aa5d3ca78e61 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -1,9 +1,8 @@
 //===-- SystemZAsmPrinter.h - SystemZ LLVM assembly printer ----*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -37,11 +36,9 @@ public:
   void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override;
   void EmitEndOfAsmFile(Module &M) override;
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                       unsigned AsmVariant, const char *ExtraCode,
-                       raw_ostream &OS) override;
+                       const char *ExtraCode, raw_ostream &OS) override;
   bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                             unsigned AsmVariant, const char *ExtraCode,
-                             raw_ostream &OS) override;
+                             const char *ExtraCode, raw_ostream &OS) override;
 
   bool doInitialization(Module &M) override {
     SM.reset();
diff --git a/lib/Target/SystemZ/SystemZCallingConv.cpp b/lib/Target/SystemZ/SystemZCallingConv.cpp
index 72da51f74b10..91c7fae17a75 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.cpp
+++ b/lib/Target/SystemZ/SystemZCallingConv.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZCallingConv.cpp - Calling conventions for SystemZ ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/SystemZ/SystemZCallingConv.h b/lib/Target/SystemZ/SystemZCallingConv.h
index b5523e586f4c..82f29b6361f1 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.h
+++ b/lib/Target/SystemZ/SystemZCallingConv.h
@@ -1,9 +1,8 @@
 //===-- SystemZCallingConv.h - Calling conventions for SystemZ --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/SystemZ/SystemZCallingConv.td b/lib/Target/SystemZ/SystemZCallingConv.td
index deba27fee7fe..bbd51546ac9f 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/lib/Target/SystemZ/SystemZCallingConv.td
@@ -1,9 +1,8 @@
 //=- SystemZCallingConv.td - Calling conventions for SystemZ -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This describes the calling conventions for the SystemZ ABI.
diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
index 4a6beb67f182..ffeee4da95cc 100644
--- a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
+++ b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZConstantPoolValue.cpp - SystemZ constant-pool value --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.h b/lib/Target/SystemZ/SystemZConstantPoolValue.h
index a71b595560d2..6cb7710abdfe 100644
--- a/lib/Target/SystemZ/SystemZConstantPoolValue.h
+++ b/lib/Target/SystemZ/SystemZConstantPoolValue.h
@@ -1,9 +1,8 @@
 //===- SystemZConstantPoolValue.h - SystemZ constant-pool value -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp
index 668a77ac014f..9cbf6b320504 100644
--- a/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZElimCompare.cpp - Eliminate comparison instructions --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -147,6 +146,9 @@ static bool resultTests(MachineInstr &MI, unsigned Reg) {
 // Describe the references to Reg or any of its aliases in MI.
 Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) {
   Reference Ref;
+  if (MI.isDebugInstr())
+    return Ref;
+
   for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
     const MachineOperand &MO = MI.getOperand(I);
     if (MO.isReg()) {
@@ -523,9 +525,9 @@ bool SystemZElimCompare::fuseCompareOperations(
   // SrcReg2 is the register if the source operand is a register,
   // 0 if the source operand is immediate, and the base register
   // if the source operand is memory (index is not supported).
-  unsigned SrcReg = Compare.getOperand(0).getReg();
-  unsigned SrcReg2 =
-      Compare.getOperand(1).isReg() ? Compare.getOperand(1).getReg() : 0;
+  Register SrcReg = Compare.getOperand(0).getReg();
+  Register SrcReg2 =
+    Compare.getOperand(1).isReg() ? Compare.getOperand(1).getReg() : Register();
   MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
   for (++MBBI; MBBI != MBBE; ++MBBI)
     if (MBBI->modifiesRegister(SrcReg, TRI) ||
diff --git a/lib/Target/SystemZ/SystemZExpandPseudo.cpp b/lib/Target/SystemZ/SystemZExpandPseudo.cpp
index 67c80899d491..09708fb4241c 100644
--- a/lib/Target/SystemZ/SystemZExpandPseudo.cpp
+++ b/lib/Target/SystemZ/SystemZExpandPseudo.cpp
@@ -1,9 +1,8 @@
 //==-- SystemZExpandPseudo.cpp - Expand pseudo instructions -------*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/SystemZ/SystemZFeatures.td b/lib/Target/SystemZ/SystemZFeatures.td
index beff45dba81d..dae795e845b0 100644
--- a/lib/Target/SystemZ/SystemZFeatures.td
+++ b/lib/Target/SystemZ/SystemZFeatures.td
@@ -1,9 +1,8 @@
 //===-- SystemZ.td - SystemZ processors and features ---------*- tblgen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -240,6 +239,51 @@ def Arch12NewFeatures : SystemZFeatureList<[
     FeatureInsertReferenceBitsMultiple
 ]>;
 
+//===----------------------------------------------------------------------===//
+//
+// New features added in the Thirteenth Edition of the z/Architecture
+//
+//===----------------------------------------------------------------------===//
+
+def FeatureMiscellaneousExtensions3 : SystemZFeature<
+  "miscellaneous-extensions-3", "MiscellaneousExtensions3",
+  "Assume that the miscellaneous-extensions facility 3 is installed"
+>;
+
+def FeatureMessageSecurityAssist9 : SystemZFeature<
+  "message-security-assist-extension9", "MessageSecurityAssist9",
+  "Assume that the message-security-assist extension facility 9 is installed"
+>;
+
+def FeatureVectorEnhancements2 : SystemZFeature<
+  "vector-enhancements-2", "VectorEnhancements2",
+  "Assume that the vector enhancements facility 2 is installed"
+>;
+
+def FeatureVectorPackedDecimalEnhancement : SystemZFeature<
+  "vector-packed-decimal-enhancement", "VectorPackedDecimalEnhancement",
+  "Assume that the vector packed decimal enhancement facility is installed"
+>;
+
+def FeatureEnhancedSort : SystemZFeature<
+  "enhanced-sort", "EnhancedSort",
+  "Assume that the enhanced-sort facility is installed"
+>;
+
+def FeatureDeflateConversion : SystemZFeature<
+  "deflate-conversion", "DeflateConversion",
+  "Assume that the deflate-conversion facility is installed"
+>;
+
+def Arch13NewFeatures : SystemZFeatureList<[
+    FeatureMiscellaneousExtensions3,
+    FeatureMessageSecurityAssist9,
+    FeatureVectorEnhancements2,
+    FeatureVectorPackedDecimalEnhancement,
+    FeatureEnhancedSort,
+    FeatureDeflateConversion
+]>;
+
 //===----------------------------------------------------------------------===//
 //
 // Cumulative supported and unsupported feature sets
@@ -256,9 +300,13 @@ def Arch11SupportedFeatures
   : SystemZFeatureAdd<Arch10SupportedFeatures.List, Arch11NewFeatures.List>;
 def Arch12SupportedFeatures
   : SystemZFeatureAdd<Arch11SupportedFeatures.List, Arch12NewFeatures.List>;
+def Arch13SupportedFeatures
+  : SystemZFeatureAdd<Arch12SupportedFeatures.List, Arch13NewFeatures.List>;
 
-def Arch12UnsupportedFeatures
+def Arch13UnsupportedFeatures
   : SystemZFeatureList<[]>;
+def Arch12UnsupportedFeatures
+  : SystemZFeatureAdd<Arch13UnsupportedFeatures.List, Arch13NewFeatures.List>;
 def Arch11UnsupportedFeatures
   : SystemZFeatureAdd<Arch12UnsupportedFeatures.List, Arch12NewFeatures.List>;
 def Arch10UnsupportedFeatures
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 565299c90139..da28faebb326 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZFrameLowering.cpp - Frame lowering for SystemZ -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h
index 08c84c785cc0..71ef3e4dc240 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -1,9 +1,8 @@
 //===-- SystemZFrameLowering.h - Frame lowering for SystemZ -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
index 8726b56bc94f..e2af02227999 100644
--- a/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
+++ b/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -1,9 +1,8 @@
 //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.h b/lib/Target/SystemZ/SystemZHazardRecognizer.h
index 6292feefbfea..38bf41ebe96a 100644
--- a/lib/Target/SystemZ/SystemZHazardRecognizer.h
+++ b/lib/Target/SystemZ/SystemZHazardRecognizer.h
@@ -1,9 +1,8 @@
 //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 5bc2ab0ef2d8..9dc4512255cc 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "SystemZTargetMachine.h"
+#include "SystemZISelLowering.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Support/Debug.h"
@@ -304,6 +304,9 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
   void splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
                            uint64_t UpperVal, uint64_t LowerVal);
 
+  void loadVectorConstant(const SystemZVectorConstantInfo &VCI,
+                          SDNode *Node);
+
   // Try to use gather instruction Opcode to implement vector insertion N.
   bool tryGather(SDNode *N, unsigned Opcode);
 
@@ -1132,6 +1135,35 @@ void SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
   SelectCode(Or.getNode());
 }
 
+void SystemZDAGToDAGISel::loadVectorConstant(
+    const SystemZVectorConstantInfo &VCI, SDNode *Node) {
+  assert((VCI.Opcode == SystemZISD::BYTE_MASK ||
+          VCI.Opcode == SystemZISD::REPLICATE ||
+          VCI.Opcode == SystemZISD::ROTATE_MASK) &&
+         "Bad opcode!");
+  assert(VCI.VecVT.getSizeInBits() == 128 && "Expected a vector type");
+  EVT VT = Node->getValueType(0);
+  SDLoc DL(Node);
+  SmallVector<SDValue, 2> Ops;
+  for (unsigned OpVal : VCI.OpVals)
+    Ops.push_back(CurDAG->getConstant(OpVal, DL, MVT::i32));
+  SDValue Op = CurDAG->getNode(VCI.Opcode, DL, VCI.VecVT, Ops);
+
+  if (VCI.VecVT == VT.getSimpleVT())
+    ReplaceNode(Node, Op.getNode());
+  else if (VT.getSizeInBits() == 128) {
+    SDValue BitCast = CurDAG->getNode(ISD::BITCAST, DL, VT, Op);
+    ReplaceNode(Node, BitCast.getNode());
+    SelectCode(BitCast.getNode());
+  } else { // float or double
+    unsigned SubRegIdx =
+        (VT.getSizeInBits() == 32 ? SystemZ::subreg_h32 : SystemZ::subreg_h64);
+    ReplaceNode(
+        Node, CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, Op).getNode());
+  }
+  SelectCode(Op.getNode());
+}
+
 bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
   SDValue ElemV = N->getOperand(2);
   auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);
@@ -1243,6 +1275,9 @@ static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
     InputChain = LoadNode->getChain();
   } else if (Chain.getOpcode() == ISD::TokenFactor) {
     SmallVector<SDValue, 4> ChainOps;
+    SmallVector<const SDNode *, 4> LoopWorklist;
+    SmallPtrSet<const SDNode *, 16> Visited;
+    const unsigned int Max = 1024;
     for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
       SDValue Op = Chain.getOperand(i);
       if (Op == Load.getValue(1)) {
@@ -1251,28 +1286,26 @@ static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
         ChainOps.push_back(Load.getOperand(0));
         continue;
       }
-
-      // Make sure using Op as part of the chain would not cause a cycle here.
-      // In theory, we could check whether the chain node is a predecessor of
-      // the load. But that can be very expensive. Instead visit the uses and
-      // make sure they all have smaller node id than the load.
-      int LoadId = LoadNode->getNodeId();
-      for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
-             UE = UI->use_end(); UI != UE; ++UI) {
-        if (UI.getUse().getResNo() != 0)
-          continue;
-        if (UI->getNodeId() > LoadId)
-          return false;
-      }
-
+      LoopWorklist.push_back(Op.getNode());
       ChainOps.push_back(Op);
     }
 
-    if (ChainCheck)
+    if (ChainCheck) {
+      // Add the other operand of StoredVal to worklist.
+      for (SDValue Op : StoredVal->ops())
+        if (Op.getNode() != LoadNode)
+          LoopWorklist.push_back(Op.getNode());
+
+      // Check if Load is reachable from any of the nodes in the worklist.
+      if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
+                                       true))
+        return false;
+
       // Make a new TokenFactor with all the other input chains except
       // for the load.
       InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain),
                                    MVT::Other, ChainOps);
+    }
   }
   if (!ChainCheck)
     return false;
@@ -1447,6 +1480,23 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
         Node->getOperand(0).getOpcode() != ISD::Constant)
       if (auto *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
         uint64_t Val = Op1->getZExtValue();
+        // Don't split the operation if we can match one of the combined
+        // logical operations provided by miscellaneous-extensions-3.
+        if (Subtarget->hasMiscellaneousExtensions3()) {
+          unsigned ChildOpcode = Node->getOperand(0).getOpcode();
+          // Check whether this expression matches NAND/NOR/NXOR.
+          if (Val == (uint64_t)-1 && Opcode == ISD::XOR)
+            if (ChildOpcode == ISD::AND || ChildOpcode == ISD::OR ||
+                ChildOpcode == ISD::XOR)
+              break;
+          // Check whether this expression matches OR-with-complement.
+          if (Opcode == ISD::OR && ChildOpcode == ISD::XOR) {
+            auto Op0 = Node->getOperand(0);
+            if (auto *Op0Op1 = dyn_cast<ConstantSDNode>(Op0->getOperand(1)))
+              if (Op0Op1->getZExtValue() == (uint64_t)-1)
+                break;
+          }
+        }
         if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) {
           splitLargeImmediate(Opcode, Node, Node->getOperand(0),
                               Val - uint32_t(Val), uint32_t(Val));
@@ -1527,6 +1577,27 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
     break;
   }
 
+  case ISD::BUILD_VECTOR: {
+    auto *BVN = cast<BuildVectorSDNode>(Node);
+    SystemZVectorConstantInfo VCI(BVN);
+    if (VCI.isVectorConstantLegal(*Subtarget)) {
+      loadVectorConstant(VCI, Node);
+      return;
+    }
+    break;
+  }
+
+  case ISD::ConstantFP: {
+    APFloat Imm = cast<ConstantFPSDNode>(Node)->getValueAPF();
+    if (Imm.isZero() || Imm.isNegZero())
+      break;
+    SystemZVectorConstantInfo VCI(Imm);
+    bool Success = VCI.isVectorConstantLegal(*Subtarget); (void)Success;
+    assert(Success && "Expected legal FP immediate");
+    loadVectorConstant(VCI, Node);
+    return;
+  }
+
   case ISD::STORE: {
     if (tryFoldLoadStoreIntoMemOperand(Node))
       return;
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 2a825c1316f3..78820f511ab4 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -250,8 +249,15 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
 
   // We have native support for a 64-bit CTLZ, via FLOGR.
   setOperationAction(ISD::CTLZ, MVT::i32, Promote);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
   setOperationAction(ISD::CTLZ, MVT::i64, Legal);
 
+  // On arch13 we have native support for a 64-bit CTPOP.
+  if (Subtarget.hasMiscellaneousExtensions3()) {
+    setOperationAction(ISD::CTPOP, MVT::i32, Promote);
+    setOperationAction(ISD::CTPOP, MVT::i64, Legal);
+  }
+
   // Give LowerOperation the chance to replace 64-bit ORs with subregs.
   setOperationAction(ISD::OR, MVT::i64, Custom);
 
@@ -377,6 +383,17 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
   }
 
+  if (Subtarget.hasVectorEnhancements2()) {
+    setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal);
+    setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+    setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal);
+  }
+
   // Handle floating-point types.
   for (unsigned I = MVT::FIRST_FP_VALUETYPE;
        I <= MVT::LAST_FP_VALUETYPE;
@@ -401,6 +418,24 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::FSINCOS, VT, Expand);
       setOperationAction(ISD::FREM, VT, Expand);
       setOperationAction(ISD::FPOW, VT, Expand);
+
+      // Handle constrained floating-point operations.
+      setOperationAction(ISD::STRICT_FADD, VT, Legal);
+      setOperationAction(ISD::STRICT_FSUB, VT, Legal);
+      setOperationAction(ISD::STRICT_FMUL, VT, Legal);
+      setOperationAction(ISD::STRICT_FDIV, VT, Legal);
+      setOperationAction(ISD::STRICT_FMA, VT, Legal);
+      setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
+      setOperationAction(ISD::STRICT_FRINT, VT, Legal);
+      setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal);
+      setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
+      if (Subtarget.hasFPExtension()) {
+        setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
+        setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
+        setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
+        setOperationAction(ISD::STRICT_FROUND, VT, Legal);
+        setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
+      }
     }
   }
 
@@ -432,6 +467,20 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
     setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
     setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+
+    // Handle constrained floating-point operations.
+    setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
   }
 
   // The vector enhancements facility 1 has instructions for these.
@@ -475,6 +524,25 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
     setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
     setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
+
+    // Handle constrained floating-point operations.
+    setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
+    for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
+                     MVT::v4f32, MVT::v2f64 }) {
+      setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal);
+      setOperationAction(ISD::STRICT_FMINNUM, VT, Legal);
+    }
   }
 
   // We have fused multiply-addition for f32 and f64 but not f128.
@@ -525,6 +593,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
   setTargetDAGCombine(ISD::LOAD);
   setTargetDAGCombine(ISD::STORE);
+  setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
   setTargetDAGCombine(ISD::FP_ROUND);
   setTargetDAGCombine(ISD::FP_EXTEND);
@@ -577,9 +646,127 @@ bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
   return false;
 }
 
-bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+// Return true if the constant can be generated with a vector instruction,
+// such as VGM, VGMB or VREPI.
+bool SystemZVectorConstantInfo::isVectorConstantLegal(
+    const SystemZSubtarget &Subtarget) {
+  const SystemZInstrInfo *TII =
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+  if (!Subtarget.hasVector() ||
+      (isFP128 && !Subtarget.hasVectorEnhancements1()))
+    return false;
+
+  // Try using VECTOR GENERATE BYTE MASK.  This is the architecturally-
+  // preferred way of creating all-zero and all-one vectors so give it
+  // priority over other methods below.
+  unsigned Mask = 0;
+  unsigned I = 0;
+  for (; I < SystemZ::VectorBytes; ++I) {
+    uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
+    if (Byte == 0xff)
+      Mask |= 1ULL << I;
+    else if (Byte != 0)
+      break;
+  }
+  if (I == SystemZ::VectorBytes) {
+    Opcode = SystemZISD::BYTE_MASK;
+    OpVals.push_back(Mask);
+    VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16);
+    return true;
+  }
+
+  if (SplatBitSize > 64)
+    return false;
+
+  auto tryValue = [&](uint64_t Value) -> bool {
+    // Try VECTOR REPLICATE IMMEDIATE
+    int64_t SignedValue = SignExtend64(Value, SplatBitSize);
+    if (isInt<16>(SignedValue)) {
+      OpVals.push_back(((unsigned) SignedValue));
+      Opcode = SystemZISD::REPLICATE;
+      VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
+                               SystemZ::VectorBits / SplatBitSize);
+      return true;
+    }
+    // Try VECTOR GENERATE MASK
+    unsigned Start, End;
+    if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
+      // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
+      // denoting 1 << 63 and 63 denoting 1.  Convert them to bit numbers for
+      // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
+      OpVals.push_back(Start - (64 - SplatBitSize));
+      OpVals.push_back(End - (64 - SplatBitSize));
+      Opcode = SystemZISD::ROTATE_MASK;
+      VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
+                               SystemZ::VectorBits / SplatBitSize);
+      return true;
+    }
+    return false;
+  };
+
+  // First try assuming that any undefined bits above the highest set bit
+  // and below the lowest set bit are 1s.  This increases the likelihood of
+  // being able to use a sign-extended element value in VECTOR REPLICATE
+  // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
+  uint64_t SplatBitsZ = SplatBits.getZExtValue();
+  uint64_t SplatUndefZ = SplatUndef.getZExtValue();
+  uint64_t Lower =
+      (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
+  uint64_t Upper =
+      (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
+  if (tryValue(SplatBitsZ | Upper | Lower))
+    return true;
+
+  // Now try assuming that any undefined bits between the first and
+  // last defined set bits are set.  This increases the chances of
+  // using a non-wraparound mask.
+  uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
+  return tryValue(SplatBitsZ | Middle);
+}
+
+SystemZVectorConstantInfo::SystemZVectorConstantInfo(APFloat FPImm) {
+  IntBits = FPImm.bitcastToAPInt().zextOrSelf(128);
+  isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
+
+  // Find the smallest splat.
+  SplatBits = FPImm.bitcastToAPInt();
+  unsigned Width = SplatBits.getBitWidth();
+  while (Width > 8) {
+    unsigned HalfSize = Width / 2;
+    APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
+    APInt LowValue = SplatBits.trunc(HalfSize);
+
+    // If the two halves do not match, stop here.
+    if (HighValue != LowValue || 8 > HalfSize)
+      break;
+
+    SplatBits = HighValue;
+    Width = HalfSize;
+  }
+  SplatUndef = 0;
+  SplatBitSize = Width;
+}
+
+SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) {
+  assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
+  bool HasAnyUndefs;
+
+  // Get IntBits by finding the 128 bit splat.
+  BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
+                       true);
+
+  // Get SplatBits by finding the 8 bit or greater splat.
+  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
+                       true);
+}
+
+bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+                                         bool ForCodeSize) const {
   // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
-  return Imm.isZero() || Imm.isNegZero();
+  if (Imm.isZero() || Imm.isNegZero())
+    return true;
+
+  return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
 }
 
 bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
@@ -592,10 +779,8 @@ bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
   return isUInt<32>(Imm) || isUInt<32>(-Imm);
 }
 
-bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
-                                                           unsigned,
-                                                           unsigned,
-                                                           bool *Fast) const {
+bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
+    EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const {
   // Unaligned accesses should never be slower than the expanded version.
   // We check specifically for aligned accesses in the few cases where
   // they are required.
@@ -1642,6 +1827,20 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
     CCValid = SystemZ::CCMASK_ANY;
     return true;
 
+  case Intrinsic::s390_vstrsb:
+  case Intrinsic::s390_vstrsh:
+  case Intrinsic::s390_vstrsf:
+    Opcode = SystemZISD::VSTRS_CC;
+    CCValid = SystemZ::CCMASK_ANY;
+    return true;
+
+  case Intrinsic::s390_vstrszb:
+  case Intrinsic::s390_vstrszh:
+  case Intrinsic::s390_vstrszf:
+    Opcode = SystemZISD::VSTRSZ_CC;
+    CCValid = SystemZ::CCMASK_ANY;
+    return true;
+
   case Intrinsic::s390_vfcedbs:
   case Intrinsic::s390_vfcesbs:
     Opcode = SystemZISD::VFCMPES;
@@ -2511,9 +2710,8 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
     break;
   }
   if (Invert) {
-    SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
-                               DAG.getConstant(65535, DL, MVT::i32));
-    Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask);
+    SDValue Mask =
+      DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
     Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
   }
   return Cmp;
@@ -3261,6 +3459,18 @@ SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
   return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
 }
 
+static bool isAddCarryChain(SDValue Carry) {
+  while (Carry.getOpcode() == ISD::ADDCARRY)
+    Carry = Carry.getOperand(2);
+  return Carry.getOpcode() == ISD::UADDO;
+}
+
+static bool isSubBorrowChain(SDValue Carry) {
+  while (Carry.getOpcode() == ISD::SUBCARRY)
+    Carry = Carry.getOperand(2);
+  return Carry.getOpcode() == ISD::USUBO;
+}
+
 // Lower ADDCARRY/SUBCARRY nodes.
 SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
                                                 SelectionDAG &DAG) const {
@@ -3283,11 +3493,17 @@ SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Unknown instruction!");
   case ISD::ADDCARRY:
+    if (!isAddCarryChain(Carry))
+      return SDValue();
+
     BaseOp = SystemZISD::ADDCARRY;
     CCValid = SystemZ::CCMASK_LOGICAL;
     CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
     break;
   case ISD::SUBCARRY:
+    if (!isSubBorrowChain(Carry))
+      return SDValue();
+
     BaseOp = SystemZISD::SUBCARRY;
     CCValid = SystemZ::CCMASK_LOGICAL;
     CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
@@ -3331,14 +3547,14 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
       break;
     }
     case 32: {
-      SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
-                                DAG.getConstant(0, DL, MVT::i32));
+      SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
+                                            DAG.getConstant(0, DL, MVT::i32));
       Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
       break;
     }
     case 64: {
-      SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
-                                DAG.getConstant(0, DL, MVT::i32));
+      SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
+                                            DAG.getConstant(0, DL, MVT::i32));
       Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
       Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
       break;
@@ -3602,6 +3818,27 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
   return SDValue();
 }
 
+MachineMemOperand::Flags
+SystemZTargetLowering::getMMOFlags(const Instruction &I) const {
+  // Because of how we convert atomic_load and atomic_store to normal loads and
+  // stores in the DAG, we need to ensure that the MMOs are marked volatile
+  // since DAGCombine hasn't been updated to account for atomic, but non
+  // volatile loads.  (See D57601)
+  if (auto *SI = dyn_cast<StoreInst>(&I))
+    if (SI->isAtomic())
+      return MachineMemOperand::MOVolatile;
+  if (auto *LI = dyn_cast<LoadInst>(&I))
+    if (LI->isAtomic())
+      return MachineMemOperand::MOVolatile;
+  if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
+    if (AI->isAtomic())
+      return MachineMemOperand::MOVolatile;
+  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
+    if (AI->isAtomic())
+      return MachineMemOperand::MOVolatile;
+  return MachineMemOperand::MONone;
+}
+
 SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
                                               SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
@@ -4260,78 +4497,6 @@ static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
   return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
 }
 
-// Try to represent constant BUILD_VECTOR node BVN using a
-// SystemZISD::BYTE_MASK-style mask.  Store the mask value in Mask
-// on success.
-static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
-  EVT ElemVT = BVN->getValueType(0).getVectorElementType();
-  unsigned BytesPerElement = ElemVT.getStoreSize();
-  for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) {
-    SDValue Op = BVN->getOperand(I);
-    if (!Op.isUndef()) {
-      uint64_t Value;
-      if (Op.getOpcode() == ISD::Constant)
-        Value = cast<ConstantSDNode>(Op)->getZExtValue();
-      else if (Op.getOpcode() == ISD::ConstantFP)
-        Value = (cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt()
-                 .getZExtValue());
-      else
-        return false;
-      for (unsigned J = 0; J < BytesPerElement; ++J) {
-        uint64_t Byte = (Value >> (J * 8)) & 0xff;
-        if (Byte == 0xff)
-          Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J);
-        else if (Byte != 0)
-          return false;
-      }
-    }
-  }
-  return true;
-}
-
-// Try to load a vector constant in which BitsPerElement-bit value Value
-// is replicated to fill the vector.  VT is the type of the resulting
-// constant, which may have elements of a different size from BitsPerElement.
-// Return the SDValue of the constant on success, otherwise return
-// an empty value.
-static SDValue tryBuildVectorReplicate(SelectionDAG &DAG,
-                                       const SystemZInstrInfo *TII,
-                                       const SDLoc &DL, EVT VT, uint64_t Value,
-                                       unsigned BitsPerElement) {
-  // Signed 16-bit values can be replicated using VREPI.
-  // Mark the constants as opaque or DAGCombiner will convert back to
-  // BUILD_VECTOR.
-  int64_t SignedValue = SignExtend64(Value, BitsPerElement);
-  if (isInt<16>(SignedValue)) {
-    MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
-                                 SystemZ::VectorBits / BitsPerElement);
-    SDValue Op = DAG.getNode(
-        SystemZISD::REPLICATE, DL, VecVT,
-        DAG.getConstant(SignedValue, DL, MVT::i32, false, true /*isOpaque*/));
-    return DAG.getNode(ISD::BITCAST, DL, VT, Op);
-  }
-  // See whether rotating the constant left some N places gives a value that
-  // is one less than a power of 2 (i.e. all zeros followed by all ones).
-  // If so we can use VGM.
-  unsigned Start, End;
-  if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) {
-    // isRxSBGMask returns the bit numbers for a full 64-bit value,
-    // with 0 denoting 1 << 63 and 63 denoting 1.  Convert them to
-    // bit numbers for an BitsPerElement value, so that 0 denotes
-    // 1 << (BitsPerElement-1).
-    Start -= 64 - BitsPerElement;
-    End -= 64 - BitsPerElement;
-    MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
-                                 SystemZ::VectorBits / BitsPerElement);
-    SDValue Op = DAG.getNode(
-        SystemZISD::ROTATE_MASK, DL, VecVT,
-        DAG.getConstant(Start, DL, MVT::i32, false, true /*isOpaque*/),
-        DAG.getConstant(End, DL, MVT::i32, false, true /*isOpaque*/));
-    return DAG.getNode(ISD::BITCAST, DL, VT, Op);
-  }
-  return SDValue();
-}
-
 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
 // the non-EXTRACT_VECTOR_ELT elements.  See if the given BUILD_VECTOR
@@ -4385,9 +4550,18 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
   return GS.getNode(DAG, SDLoc(BVN));
 }
 
+bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
+  if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
+    return true;
+  if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
+    return true;
+  return false;
+}
+
 // Combine GPR scalar values Elems into a vector of type VT.
-static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
-                           SmallVectorImpl<SDValue> &Elems) {
+SDValue
+SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
+                                   SmallVectorImpl<SDValue> &Elems) const {
   // See whether there is a single replicated value.
   SDValue Single;
   unsigned int NumElements = Elems.size();
@@ -4416,13 +4590,13 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
   //   we would need 2 instructions to replicate it: VLVGP followed by VREPx.
   //   This is only a win if the single defined element is used more than once.
   //   In other cases we're better off using a single VLVGx.
-  if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD))
+  if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
     return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
 
   // If all elements are loads, use VLREP/VLEs (below).
   bool AllLoads = true;
   for (auto Elem : Elems)
-    if (Elem.getOpcode() != ISD::LOAD || cast<LoadSDNode>(Elem)->isIndexed()) {
+    if (!isVectorElementLoad(Elem)) {
       AllLoads = false;
       break;
     }
@@ -4494,8 +4668,7 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
     std::map<const SDNode*, unsigned> UseCounts;
     SDNode *LoadMaxUses = nullptr;
     for (unsigned I = 0; I < NumElements; ++I)
-      if (Elems[I].getOpcode() == ISD::LOAD &&
-          cast<LoadSDNode>(Elems[I])->isUnindexed()) {
+      if (isVectorElementLoad(Elems[I])) {
         SDNode *Ld = Elems[I].getNode();
         UseCounts[Ld]++;
         if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
@@ -4532,56 +4705,13 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
 
 SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
                                                  SelectionDAG &DAG) const {
-  const SystemZInstrInfo *TII =
-    static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
   auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
   SDLoc DL(Op);
   EVT VT = Op.getValueType();
 
   if (BVN->isConstant()) {
-    // Try using VECTOR GENERATE BYTE MASK.  This is the architecturally-
-    // preferred way of creating all-zero and all-one vectors so give it
-    // priority over other methods below.
-    uint64_t Mask = 0;
-    if (tryBuildVectorByteMask(BVN, Mask)) {
-      SDValue Op = DAG.getNode(
-          SystemZISD::BYTE_MASK, DL, MVT::v16i8,
-          DAG.getConstant(Mask, DL, MVT::i32, false, true /*isOpaque*/));
-      return DAG.getNode(ISD::BITCAST, DL, VT, Op);
-    }
-
-    // Try using some form of replication.
-    APInt SplatBits, SplatUndef;
-    unsigned SplatBitSize;
-    bool HasAnyUndefs;
-    if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
-                             8, true) &&
-        SplatBitSize <= 64) {
-      // First try assuming that any undefined bits above the highest set bit
-      // and below the lowest set bit are 1s.  This increases the likelihood of
-      // being able to use a sign-extended element value in VECTOR REPLICATE
-      // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
-      uint64_t SplatBitsZ = SplatBits.getZExtValue();
-      uint64_t SplatUndefZ = SplatUndef.getZExtValue();
-      uint64_t Lower = (SplatUndefZ
-                        & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
-      uint64_t Upper = (SplatUndefZ
-                        & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
-      uint64_t Value = SplatBitsZ | Upper | Lower;
-      SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value,
-                                           SplatBitSize);
-      if (Op.getNode())
-        return Op;
-
-      // Now try assuming that any undefined bits between the first and
-      // last defined set bits are set.  This increases the chances of
-      // using a non-wraparound mask.
-      uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
-      Value = SplatBitsZ | Middle;
-      Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize);
-      if (Op.getNode())
-        return Op;
-    }
+    if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
+      return Op;
 
     // Fall back to loading it from memory.
     return SDValue();
@@ -5074,6 +5204,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
     OPCODE(VISTR_CC);
     OPCODE(VSTRC_CC);
     OPCODE(VSTRCZ_CC);
+    OPCODE(VSTRS_CC);
+    OPCODE(VSTRSZ_CC);
     OPCODE(TDC);
     OPCODE(ATOMIC_SWAPW);
     OPCODE(ATOMIC_LOADW_ADD);
@@ -5093,6 +5225,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
     OPCODE(ATOMIC_CMP_SWAP_128);
     OPCODE(LRV);
     OPCODE(STRV);
+    OPCODE(VLER);
+    OPCODE(VSTER);
     OPCODE(PREFETCH);
   }
   return nullptr;
@@ -5340,8 +5474,7 @@ SDValue SystemZTargetLowering::combineMERGE(
   SDValue Op1 = N->getOperand(1);
   if (Op0.getOpcode() == ISD::BITCAST)
     Op0 = Op0.getOperand(0);
-  if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
-      cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
+  if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
     // (z_merge_* 0, 0) -> 0.  This is mostly useful for using VLLEZF
     // for v4f32.
     if (Op1 == N->getOperand(0))
@@ -5407,6 +5540,31 @@ SDValue SystemZTargetLowering::combineLOAD(
   return SDValue(N, 0);
 }
 
+bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
+  if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
+    return true;
+  if (Subtarget.hasVectorEnhancements2())
+    if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64)
+      return true;
+  return false;
+}
+
+static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) {
+  if (!VT.isVector() || !VT.isSimple() ||
+      VT.getSizeInBits() != 128 ||
+      VT.getScalarSizeInBits() % 8 != 0)
+    return false;
+
+  unsigned NumElts = VT.getVectorNumElements();
+  for (unsigned i = 0; i < NumElts; ++i) {
+    if (M[i] < 0) continue; // ignore UNDEF indices
+    if ((unsigned) M[i] != NumElts - 1 - i)
+      return false;
+  }
+
+  return true;
+}
+
 SDValue SystemZTargetLowering::combineSTORE(
     SDNode *N, DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -5428,13 +5586,11 @@ SDValue SystemZTargetLowering::combineSTORE(
                                SN->getMemOperand());
     }
   }
-  // Combine STORE (BSWAP) into STRVH/STRV/STRVG
+  // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
   if (!SN->isTruncatingStore() &&
       Op1.getOpcode() == ISD::BSWAP &&
       Op1.getNode()->hasOneUse() &&
-      (Op1.getValueType() == MVT::i16 ||
-       Op1.getValueType() == MVT::i32 ||
-       Op1.getValueType() == MVT::i64)) {
+      canLoadStoreByteSwapped(Op1.getValueType())) {
 
       SDValue BSwapOp = Op1.getOperand(0);
 
@@ -5449,15 +5605,97 @@ SDValue SystemZTargetLowering::combineSTORE(
         DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
                                 Ops, MemVT, SN->getMemOperand());
     }
+  // Combine STORE (element-swap) into VSTER
+  if (!SN->isTruncatingStore() &&
+      Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
+      Op1.getNode()->hasOneUse() &&
+      Subtarget.hasVectorEnhancements2()) {
+    ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
+    ArrayRef<int> ShuffleMask = SVN->getMask();
+    if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
+      SDValue Ops[] = {
+        N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
+      };
+
+      return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N),
+                                     DAG.getVTList(MVT::Other),
+                                     Ops, MemVT, SN->getMemOperand());
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
+    SDNode *N, DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  // Combine element-swap (LOAD) into VLER
+  if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
+      N->getOperand(0).hasOneUse() &&
+      Subtarget.hasVectorEnhancements2()) {
+    ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+    ArrayRef<int> ShuffleMask = SVN->getMask();
+    if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
+      SDValue Load = N->getOperand(0);
+      LoadSDNode *LD = cast<LoadSDNode>(Load);
+
+      // Create the element-swapping load.
+      SDValue Ops[] = {
+        LD->getChain(),    // Chain
+        LD->getBasePtr()   // Ptr
+      };
+      SDValue ESLoad =
+        DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N),
+                                DAG.getVTList(LD->getValueType(0), MVT::Other),
+                                Ops, LD->getMemoryVT(), LD->getMemOperand());
+
+      // First, combine the VECTOR_SHUFFLE away.  This makes the value produced
+      // by the load dead.
+      DCI.CombineTo(N, ESLoad);
+
+      // Next, combine the load away, we give it a bogus result value but a real
+      // chain result.  The result value is dead because the shuffle is dead.
+      DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
+
+      // Return N so it doesn't get rechecked!
+      return SDValue(N, 0);
+    }
+  }
+
   return SDValue();
 }
 
 SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
     SDNode *N, DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
 
   if (!Subtarget.hasVector())
     return SDValue();
 
+  // Look through bitcasts that retain the number of vector elements.
+  SDValue Op = N->getOperand(0);
+  if (Op.getOpcode() == ISD::BITCAST &&
+      Op.getValueType().isVector() &&
+      Op.getOperand(0).getValueType().isVector() &&
+      Op.getValueType().getVectorNumElements() ==
+      Op.getOperand(0).getValueType().getVectorNumElements())
+    Op = Op.getOperand(0);
+
+  // Pull BSWAP out of a vector extraction.
+  if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
+    EVT VecVT = Op.getValueType();
+    EVT EltVT = VecVT.getVectorElementType();
+    Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
+                     Op.getOperand(0), N->getOperand(1));
+    DCI.AddToWorklist(Op.getNode());
+    Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
+    if (EltVT != N->getValueType(0)) {
+      DCI.AddToWorklist(Op.getNode());
+      Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
+    }
+    return Op;
+  }
+
   // Try to simplify a vector extraction.
   if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
     SDValue Op0 = N->getOperand(0);
@@ -5480,6 +5718,10 @@ SDValue SystemZTargetLowering::combineJOIN_DWORDS(
 
 SDValue SystemZTargetLowering::combineFP_ROUND(
     SDNode *N, DAGCombinerInfo &DCI) const {
+
+  if (!Subtarget.hasVector())
+    return SDValue();
+
   // (fpround (extract_vector_elt X 0))
   // (fpround (extract_vector_elt X 1)) ->
   // (extract_vector_elt (VROUND X) 0)
@@ -5527,6 +5769,10 @@ SDValue SystemZTargetLowering::combineFP_ROUND(
 
 SDValue SystemZTargetLowering::combineFP_EXTEND(
     SDNode *N, DAGCombinerInfo &DCI) const {
+
+  if (!Subtarget.hasVector())
+    return SDValue();
+
   // (fpextend (extract_vector_elt X 0))
   // (fpextend (extract_vector_elt X 2)) ->
   // (extract_vector_elt (VEXTEND X) 0)
@@ -5575,11 +5821,10 @@ SDValue SystemZTargetLowering::combineFP_EXTEND(
 SDValue SystemZTargetLowering::combineBSWAP(
     SDNode *N, DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
-  // Combine BSWAP (LOAD) into LRVH/LRV/LRVG
+  // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
   if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
       N->getOperand(0).hasOneUse() &&
-      (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 ||
-       N->getValueType(0) == MVT::i64)) {
+      canLoadStoreByteSwapped(N->getValueType(0))) {
       SDValue Load = N->getOperand(0);
       LoadSDNode *LD = cast<LoadSDNode>(Load);
 
@@ -5612,61 +5857,170 @@ SDValue SystemZTargetLowering::combineBSWAP(
       // Return N so it doesn't get rechecked!
       return SDValue(N, 0);
     }
+
+  // Look through bitcasts that retain the number of vector elements.
+  SDValue Op = N->getOperand(0);
+  if (Op.getOpcode() == ISD::BITCAST &&
+      Op.getValueType().isVector() &&
+      Op.getOperand(0).getValueType().isVector() &&
+      Op.getValueType().getVectorNumElements() ==
+      Op.getOperand(0).getValueType().getVectorNumElements())
+    Op = Op.getOperand(0);
+
+  // Push BSWAP into a vector insertion if at least one side then simplifies.
+  if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
+    SDValue Vec = Op.getOperand(0);
+    SDValue Elt = Op.getOperand(1);
+    SDValue Idx = Op.getOperand(2);
+
+    if (DAG.isConstantIntBuildVectorOrConstantInt(Vec) ||
+        Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
+        DAG.isConstantIntBuildVectorOrConstantInt(Elt) ||
+        Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
+        (canLoadStoreByteSwapped(N->getValueType(0)) &&
+         ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
+      EVT VecVT = N->getValueType(0);
+      EVT EltVT = N->getValueType(0).getVectorElementType();
+      if (VecVT != Vec.getValueType()) {
+        Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
+        DCI.AddToWorklist(Vec.getNode());
+      }
+      if (EltVT != Elt.getValueType()) {
+        Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
+        DCI.AddToWorklist(Elt.getNode());
+      }
+      Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
+      DCI.AddToWorklist(Vec.getNode());
+      Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
+      DCI.AddToWorklist(Elt.getNode());
+      return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
+                         Vec, Elt, Idx);
+    }
+  }
+
+  // Push BSWAP into a vector shuffle if at least one side then simplifies.
+  ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
+  if (SV && Op.hasOneUse()) {
+    SDValue Op0 = Op.getOperand(0);
+    SDValue Op1 = Op.getOperand(1);
+
+    if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) ||
+        Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
+        DAG.isConstantIntBuildVectorOrConstantInt(Op1) ||
+        Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
+      EVT VecVT = N->getValueType(0);
+      if (VecVT != Op0.getValueType()) {
+        Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
+        DCI.AddToWorklist(Op0.getNode());
+      }
+      if (VecVT != Op1.getValueType()) {
+        Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
+        DCI.AddToWorklist(Op1.getNode());
+      }
+      Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
+      DCI.AddToWorklist(Op0.getNode());
+      Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
+      DCI.AddToWorklist(Op1.getNode());
+      return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
+    }
+  }
+
   return SDValue();
 }
 
 static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
   // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
   // set by the CCReg instruction using the CCValid / CCMask masks,
-  // If the CCReg instruction is itself a (ICMP (SELECT_CCMASK)) testing
-  // the condition code set by some other instruction, see whether we
-  // can directly use that condition code.
-  bool Invert = false;
+  // If the CCReg instruction is itself a ICMP testing the condition
+  // code set by some other instruction, see whether we can directly
+  // use that condition code.
 
-  // Verify that we have an appropriate mask for a EQ or NE comparison.
+  // Verify that we have an ICMP against some constant.
   if (CCValid != SystemZ::CCMASK_ICMP)
     return false;
-  if (CCMask == SystemZ::CCMASK_CMP_NE)
-    Invert = !Invert;
-  else if (CCMask != SystemZ::CCMASK_CMP_EQ)
-    return false;
-
-  // Verify that we have an ICMP that is the user of a SELECT_CCMASK.
-  SDNode *ICmp = CCReg.getNode();
+  auto *ICmp = CCReg.getNode();
   if (ICmp->getOpcode() != SystemZISD::ICMP)
     return false;
-  SDNode *Select = ICmp->getOperand(0).getNode();
-  if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
+  auto *CompareLHS = ICmp->getOperand(0).getNode();
+  auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
+  if (!CompareRHS)
     return false;
 
-  // Verify that the ICMP compares against one of select values.
-  auto *CompareVal = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
-  if (!CompareVal)
-    return false;
-  auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
-  if (!TrueVal)
-    return false;
-  auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
-  if (!FalseVal)
-    return false;
-  if (CompareVal->getZExtValue() == FalseVal->getZExtValue())
-    Invert = !Invert;
-  else if (CompareVal->getZExtValue() != TrueVal->getZExtValue())
-    return false;
+  // Optimize the case where CompareLHS is a SELECT_CCMASK.
+  if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
+    // Verify that we have an appropriate mask for a EQ or NE comparison.
+    bool Invert = false;
+    if (CCMask == SystemZ::CCMASK_CMP_NE)
+      Invert = !Invert;
+    else if (CCMask != SystemZ::CCMASK_CMP_EQ)
+      return false;
 
-  // Compute the effective CC mask for the new branch or select.
-  auto *NewCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
-  auto *NewCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
-  if (!NewCCValid || !NewCCMask)
-    return false;
-  CCValid = NewCCValid->getZExtValue();
-  CCMask = NewCCMask->getZExtValue();
-  if (Invert)
-    CCMask ^= CCValid;
+    // Verify that the ICMP compares against one of select values.
+    auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
+    if (!TrueVal)
+      return false;
+    auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
+    if (!FalseVal)
+      return false;
+    if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())
+      Invert = !Invert;
+    else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())
+      return false;
 
-  // Return the updated CCReg link.
-  CCReg = Select->getOperand(4);
-  return true;
+    // Compute the effective CC mask for the new branch or select.
+    auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
+    auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
+    if (!NewCCValid || !NewCCMask)
+      return false;
+    CCValid = NewCCValid->getZExtValue();
+    CCMask = NewCCMask->getZExtValue();
+    if (Invert)
+      CCMask ^= CCValid;
+
+    // Return the updated CCReg link.
+    CCReg = CompareLHS->getOperand(4);
+    return true;
+  }
+
+  // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
+  if (CompareLHS->getOpcode() == ISD::SRA) {
+    auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
+    if (!SRACount || SRACount->getZExtValue() != 30)
+      return false;
+    auto *SHL = CompareLHS->getOperand(0).getNode();
+    if (SHL->getOpcode() != ISD::SHL)
+      return false;
+    auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
+    if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
+      return false;
+    auto *IPM = SHL->getOperand(0).getNode();
+    if (IPM->getOpcode() != SystemZISD::IPM)
+      return false;
+
+    // Avoid introducing CC spills (because SRA would clobber CC).
+    if (!CompareLHS->hasOneUse())
+      return false;
+    // Verify that the ICMP compares against zero.
+    if (CompareRHS->getZExtValue() != 0)
+      return false;
+
+    // Compute the effective CC mask for the new branch or select.
+    switch (CCMask) {
+    case SystemZ::CCMASK_CMP_EQ: break;
+    case SystemZ::CCMASK_CMP_NE: break;
+    case SystemZ::CCMASK_CMP_LT: CCMask = SystemZ::CCMASK_CMP_GT; break;
+    case SystemZ::CCMASK_CMP_GT: CCMask = SystemZ::CCMASK_CMP_LT; break;
+    case SystemZ::CCMASK_CMP_LE: CCMask = SystemZ::CCMASK_CMP_GE; break;
+    case SystemZ::CCMASK_CMP_GE: CCMask = SystemZ::CCMASK_CMP_LE; break;
+    default: return false;
+    }
+
+    // Return the updated CCReg link.
+    CCReg = IPM->getOperand(0);
+    return true;
+  }
+
+  return false;
 }
 
 SDValue SystemZTargetLowering::combineBR_CCMASK(
@@ -5770,12 +6124,18 @@ SDValue SystemZTargetLowering::combineIntDIVREM(
   // since it is not Legal but Custom it can only happen before
   // legalization. Therefore we must scalarize this early before Combine
   // 1. For widened vectors, this is already the result of type legalization.
-  if (VT.isVector() && isTypeLegal(VT) &&
+  if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
       DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
     return DAG.UnrollVectorOp(N);
   return SDValue();
 }
 
+SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
+  if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
+    return N->getOperand(0);
+  return N;
+}
+
 SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
                                                  DAGCombinerInfo &DCI) const {
   switch(N->getOpcode()) {
@@ -5787,6 +6147,7 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
   case SystemZISD::MERGE_LOW:   return combineMERGE(N, DCI);
   case ISD::LOAD:               return combineLOAD(N, DCI);
   case ISD::STORE:              return combineSTORE(N, DCI);
+  case ISD::VECTOR_SHUFFLE:     return combineVECTOR_SHUFFLE(N, DCI);
   case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
   case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
   case ISD::FP_ROUND:           return combineFP_ROUND(N, DCI);
@@ -5977,12 +6338,10 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     case Intrinsic::s390_vuplhw:
     case Intrinsic::s390_vuplf: {
       SDValue SrcOp = Op.getOperand(1);
-      unsigned SrcBitWidth = SrcOp.getScalarValueSizeInBits();
       APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
       Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
       if (IsLogical) {
-        Known = Known.zext(BitWidth);
-        Known.Zero.setBitsFrom(SrcBitWidth);
+        Known = Known.zext(BitWidth, true);
       } else
         Known = Known.sext(BitWidth);
       break;
@@ -6011,7 +6370,7 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
   // Known has the width of the source operand(s). Adjust if needed to match
   // the passed bitwidth.
   if (Known.getBitWidth() != BitWidth)
-    Known = Known.zextOrTrunc(BitWidth);
+    Known = Known.zextOrTrunc(BitWidth, false);
 }
 
 static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
@@ -6125,7 +6484,7 @@ static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI,
 }
 
 // Force base value Base into a register before MI.  Return the register.
-static unsigned forceReg(MachineInstr &MI, MachineOperand &Base,
+static Register forceReg(MachineInstr &MI, MachineOperand &Base,
                          const SystemZInstrInfo *TII) {
   if (Base.isReg())
     return Base.getReg();
@@ -6134,7 +6493,7 @@ static unsigned forceReg(MachineInstr &MI, MachineOperand &Base,
   MachineFunction &MF = *MBB->getParent();
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
-  unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+  Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
   BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
       .add(Base)
       .addImm(0)
@@ -6213,7 +6572,8 @@ static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin,
   // destination registers, and the registers that went into the PHI.
   DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;
 
-  for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
+  for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd;
+       MIIt = skipDebugInstructionsForward(++MIIt, MIItEnd)) {
     unsigned DestReg = MIIt->getOperand(0).getReg();
     unsigned TrueReg = MIIt->getOperand(1).getReg();
     unsigned FalseReg = MIIt->getOperand(2).getReg();
@@ -6237,6 +6597,8 @@ static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin,
     // Add this PHI to the rewrite table.
     RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
   }
+
+  MF->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
 }
 
 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
@@ -6254,8 +6616,8 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
   // same condition code value, we want to expand all of them into
   // a single pair of basic blocks using the same condition.
   MachineInstr *LastMI = &MI;
-  MachineBasicBlock::iterator NextMIIt =
-      std::next(MachineBasicBlock::iterator(MI));
+  MachineBasicBlock::iterator NextMIIt = skipDebugInstructionsForward(
+      std::next(MachineBasicBlock::iterator(MI)), MBB->end());
 
   if (isSelectPseudo(MI))
     while (NextMIIt != MBB->end() && isSelectPseudo(*NextMIIt) &&
@@ -6263,7 +6625,7 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
            (NextMIIt->getOperand(4).getImm() == CCMask ||
             NextMIIt->getOperand(4).getImm() == (CCValid ^ CCMask))) {
       LastMI = &*NextMIIt;
-      ++NextMIIt;
+      NextMIIt = skipDebugInstructionsForward(++NextMIIt, MBB->end());
     }
 
   MachineBasicBlock *StartMBB = MBB;
@@ -6296,8 +6658,8 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
   //  ...
   MBB = JoinMBB;
   MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI);
-  MachineBasicBlock::iterator MIItEnd =
-      std::next(MachineBasicBlock::iterator(LastMI));
+  MachineBasicBlock::iterator MIItEnd = skipDebugInstructionsForward(
+      std::next(MachineBasicBlock::iterator(LastMI)), MBB->end());
   createPHIsForSelects(MIItBegin, MIItEnd, StartMBB, FalseMBB, MBB);
 
   StartMBB->erase(MIItBegin, MIItEnd);
@@ -6415,8 +6777,8 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
   MachineOperand Base = earlyUseOperand(MI.getOperand(1));
   int64_t Disp = MI.getOperand(2).getImm();
   MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
-  unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0);
-  unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0);
+  Register BitShift = IsSubWord ? MI.getOperand(4).getReg() : Register();
+  Register NegBitShift = IsSubWord ? MI.getOperand(5).getReg() : Register();
   DebugLoc DL = MI.getDebugLoc();
   if (IsSubWord)
     BitSize = MI.getOperand(6).getImm();
@@ -6434,12 +6796,12 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
   assert(LOpcode && CSOpcode && "Displacement out of range");
 
   // Create virtual registers for temporary results.
-  unsigned OrigVal       = MRI.createVirtualRegister(RC);
-  unsigned OldVal        = MRI.createVirtualRegister(RC);
-  unsigned NewVal        = (BinOpcode || IsSubWord ?
+  Register OrigVal       = MRI.createVirtualRegister(RC);
+  Register OldVal        = MRI.createVirtualRegister(RC);
+  Register NewVal        = (BinOpcode || IsSubWord ?
                             MRI.createVirtualRegister(RC) : Src2.getReg());
-  unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
-  unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
+  Register RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
+  Register RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
 
   // Insert a basic block for the main loop.
   MachineBasicBlock *StartMBB = MBB;
@@ -6532,9 +6894,9 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
   unsigned Dest = MI.getOperand(0).getReg();
   MachineOperand Base = earlyUseOperand(MI.getOperand(1));
   int64_t Disp = MI.getOperand(2).getImm();
-  unsigned Src2 = MI.getOperand(3).getReg();
-  unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0);
-  unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0);
+  Register Src2 = MI.getOperand(3).getReg();
+  Register BitShift = (IsSubWord ? MI.getOperand(4).getReg() : Register());
+  Register NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : Register());
   DebugLoc DL = MI.getDebugLoc();
   if (IsSubWord)
     BitSize = MI.getOperand(6).getImm();
@@ -6552,12 +6914,12 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
   assert(LOpcode && CSOpcode && "Displacement out of range");
 
   // Create virtual registers for temporary results.
-  unsigned OrigVal       = MRI.createVirtualRegister(RC);
-  unsigned OldVal        = MRI.createVirtualRegister(RC);
-  unsigned NewVal        = MRI.createVirtualRegister(RC);
-  unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
-  unsigned RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2);
-  unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
+  Register OrigVal       = MRI.createVirtualRegister(RC);
+  Register OldVal        = MRI.createVirtualRegister(RC);
+  Register NewVal        = MRI.createVirtualRegister(RC);
+  Register RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
+  Register RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2);
+  Register RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
 
   // Insert 3 basic blocks for the loop.
   MachineBasicBlock *StartMBB  = MBB;
@@ -6840,22 +7202,22 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
   if (MI.getNumExplicitOperands() > 5) {
     bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
 
-    uint64_t StartCountReg = MI.getOperand(5).getReg();
-    uint64_t StartSrcReg   = forceReg(MI, SrcBase, TII);
-    uint64_t StartDestReg  = (HaveSingleBase ? StartSrcReg :
+    Register StartCountReg = MI.getOperand(5).getReg();
+    Register StartSrcReg   = forceReg(MI, SrcBase, TII);
+    Register StartDestReg  = (HaveSingleBase ? StartSrcReg :
                               forceReg(MI, DestBase, TII));
 
     const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
-    uint64_t ThisSrcReg  = MRI.createVirtualRegister(RC);
-    uint64_t ThisDestReg = (HaveSingleBase ? ThisSrcReg :
+    Register ThisSrcReg  = MRI.createVirtualRegister(RC);
+    Register ThisDestReg = (HaveSingleBase ? ThisSrcReg :
                             MRI.createVirtualRegister(RC));
-    uint64_t NextSrcReg  = MRI.createVirtualRegister(RC);
-    uint64_t NextDestReg = (HaveSingleBase ? NextSrcReg :
+    Register NextSrcReg  = MRI.createVirtualRegister(RC);
+    Register NextDestReg = (HaveSingleBase ? NextSrcReg :
                             MRI.createVirtualRegister(RC));
 
     RC = &SystemZ::GR64BitRegClass;
-    uint64_t ThisCountReg = MRI.createVirtualRegister(RC);
-    uint64_t NextCountReg = MRI.createVirtualRegister(RC);
+    Register ThisCountReg = MRI.createVirtualRegister(RC);
+    Register NextCountReg = MRI.createVirtualRegister(RC);
 
     MachineBasicBlock *StartMBB = MBB;
     MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 622da32e418d..23cdcc72bc42 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -1,9 +1,8 @@
 //===-- SystemZISelLowering.h - SystemZ DAG lowering interface --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,6 +15,7 @@
 #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H
 
 #include "SystemZ.h"
+#include "SystemZInstrInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/TargetLowering.h"
@@ -281,6 +281,8 @@ enum NodeType : unsigned {
   VISTR_CC,
   VSTRC_CC,
   VSTRCZ_CC,
+  VSTRS_CC,
+  VSTRSZ_CC,
 
   // Test Data Class.
   //
@@ -340,6 +342,9 @@ enum NodeType : unsigned {
   // Byte swapping load/store.  Same operands as regular load/store.
   LRV, STRV,
 
+  // Element swapping load/store.  Same operands as regular load/store.
+  VLER, VSTER,
+
   // Prefetch from the second operand using the 4-bit control code in
   // the first operand.  The code is 1 for a load prefetch and 2 for
   // a store prefetch.
@@ -396,10 +401,12 @@ public:
       return TypeWidenVector;
     return TargetLoweringBase::getPreferredVectorAction(VT);
   }
+  bool isCheapToSpeculateCtlz() const override { return true; }
   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &,
                          EVT) const override;
   bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
-  bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+  bool isFPImmLegal(const APFloat &Imm, EVT VT,
+                    bool ForCodeSize) const override;
   bool isLegalICmpImmediate(int64_t Imm) const override;
   bool isLegalAddImmediate(int64_t Imm) const override;
   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
@@ -407,6 +414,7 @@ public:
                              Instruction *I = nullptr) const override;
   bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
                                       unsigned Align,
+                                      MachineMemOperand::Flags Flags,
                                       bool *Fast) const override;
   bool isTruncateFree(Type *, Type *) const override;
   bool isTruncateFree(EVT, EVT) const override;
@@ -568,6 +576,9 @@ private:
   SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+  bool isVectorElementLoad(SDValue Op) const;
+  SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
+                      SmallVectorImpl<SDValue> &Elems) const;
   SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
@@ -587,8 +598,10 @@ private:
   SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const;
+  bool canLoadStoreByteSwapped(EVT VT) const;
   SDValue combineLOAD(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue combineVECTOR_SHUFFLE(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -599,6 +612,8 @@ private:
   SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineIntDIVREM(SDNode *N, DAGCombinerInfo &DCI) const;
 
+  SDValue unwrapAddress(SDValue N) const override;
+
   // If the last instruction before MBBI in MBB was some form of COMPARE,
   // try to replace it with a COMPARE AND BRANCH just before MBBI.
   // CCMask and Target are the BRC-like operands for the branch.
@@ -639,8 +654,27 @@ private:
                                          MachineBasicBlock *MBB,
                                          unsigned Opcode) const;
 
+  MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override;
   const TargetRegisterClass *getRepRegClassFor(MVT VT) const override;
 };
+
+struct SystemZVectorConstantInfo {
+private:
+  APInt IntBits;             // The 128 bits as an integer.
+  APInt SplatBits;           // Smallest splat value.
+  APInt SplatUndef;          // Bits correspoding to undef operands of the BVN.
+  unsigned SplatBitSize = 0;
+  bool isFP128 = false;
+
+public:
+  unsigned Opcode = 0;
+  SmallVector<unsigned, 2> OpVals;
+  MVT VecVT;
+  SystemZVectorConstantInfo(APFloat FPImm);
+  SystemZVectorConstantInfo(BuildVectorSDNode *BVN);
+  bool isVectorConstantLegal(const SystemZSubtarget &Subtarget);
+};
+
 } // end namespace llvm
 
 #endif
diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h
index 896b665d25eb..ec7639e71f81 100644
--- a/lib/Target/SystemZ/SystemZInstrBuilder.h
+++ b/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -1,9 +1,8 @@
 //===-- SystemZInstrBuilder.h - Functions to aid building insts -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/SystemZ/SystemZInstrDFP.td b/lib/Target/SystemZ/SystemZInstrDFP.td
index 08ab2d7bbc52..8d7a773ff4d9 100644
--- a/lib/Target/SystemZ/SystemZInstrDFP.td
+++ b/lib/Target/SystemZ/SystemZInstrDFP.td
@@ -1,9 +1,8 @@
 //==- SystemZInstrDFP.td - Floating-point SystemZ instructions -*- tblgen-*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -20,7 +19,7 @@
 //===----------------------------------------------------------------------===//
 
 // Load and test.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
   def LTDTR : UnaryRRE<"ltdtr", 0xB3D6, null_frag, FP64,  FP64>;
   def LTXTR : UnaryRRE<"ltxtr", 0xB3DE, null_frag, FP128, FP128>;
 }
@@ -32,25 +31,31 @@ let Defs = [CC] in {
 
 // Convert floating-point values to narrower representations.  The destination
 // of LDXTR is a 128-bit value, but only the first register of the pair is used.
-def LEDTR : TernaryRRFe<"ledtr", 0xB3D5, FP32,  FP64>;
-def LDXTR : TernaryRRFe<"ldxtr", 0xB3DD, FP128, FP128>;
+let Uses = [FPC] in {
+  def LEDTR : TernaryRRFe<"ledtr", 0xB3D5, FP32,  FP64>;
+  def LDXTR : TernaryRRFe<"ldxtr", 0xB3DD, FP128, FP128>;
+}
 
 // Extend floating-point values to wider representations.
-def LDETR : BinaryRRFd<"ldetr", 0xB3D4, FP64,  FP32>;
-def LXDTR : BinaryRRFd<"lxdtr", 0xB3DC, FP128, FP64>;
+let Uses = [FPC] in {
+  def LDETR : BinaryRRFd<"ldetr", 0xB3D4, FP64,  FP32>;
+  def LXDTR : BinaryRRFd<"lxdtr", 0xB3DC, FP128, FP64>;
+}
 
 // Convert a signed integer value to a floating-point one.
-def CDGTR : UnaryRRE<"cdgtr", 0xB3F1, null_frag, FP64,  GR64>;
-def CXGTR : UnaryRRE<"cxgtr", 0xB3F9, null_frag, FP128, GR64>;
-let Predicates = [FeatureFPExtension] in {
-  def CDGTRA : TernaryRRFe<"cdgtra", 0xB3F1, FP64,  GR64>;
-  def CXGTRA : TernaryRRFe<"cxgtra", 0xB3F9, FP128, GR64>;
-  def CDFTR : TernaryRRFe<"cdftr", 0xB951, FP64,  GR32>;
-  def CXFTR : TernaryRRFe<"cxftr", 0xB959, FP128, GR32>;
+let Uses = [FPC] in {
+  def CDGTR : UnaryRRE<"cdgtr", 0xB3F1, null_frag, FP64,  GR64>;
+  def CXGTR : UnaryRRE<"cxgtr", 0xB3F9, null_frag, FP128, GR64>;
+  let Predicates = [FeatureFPExtension] in {
+    def CDGTRA : TernaryRRFe<"cdgtra", 0xB3F1, FP64,  GR64>;
+    def CXGTRA : TernaryRRFe<"cxgtra", 0xB3F9, FP128, GR64>;
+    def CDFTR : TernaryRRFe<"cdftr", 0xB951, FP64,  GR32>;
+    def CXFTR : TernaryRRFe<"cxftr", 0xB959, FP128, GR32>;
+  }
 }
 
 // Convert an unsigned integer value to a floating-point one.
-let Predicates = [FeatureFPExtension] in {
+let Uses = [FPC], Predicates = [FeatureFPExtension] in {
   def CDLGTR : TernaryRRFe<"cdlgtr", 0xB952, FP64,  GR64>;
   def CXLGTR : TernaryRRFe<"cxlgtr", 0xB95A, FP128, GR64>;
   def CDLFTR : TernaryRRFe<"cdlftr", 0xB953, FP64,  GR32>;
@@ -58,7 +63,7 @@ let Predicates = [FeatureFPExtension] in {
 }
 
 // Convert a floating-point value to a signed integer value.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
   def CGDTR : BinaryRRFe<"cgdtr", 0xB3E1, GR64, FP64>;
   def CGXTR : BinaryRRFe<"cgxtr", 0xB3E9, GR64, FP128>;
   let Predicates = [FeatureFPExtension] in {
@@ -70,7 +75,7 @@ let Defs = [CC] in {
 }
 
 // Convert a floating-point value to an unsigned integer value.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
   let Predicates = [FeatureFPExtension] in {
     def CLGDTR : TernaryRRFe<"clgdtr", 0xB942, GR64, FP64>;
     def CLGXTR : TernaryRRFe<"clgxtr", 0xB94A, GR64, FP128>;
@@ -108,7 +113,7 @@ let Predicates = [FeatureDFPPackedConversion] in {
 }
 
 // Perform floating-point operation.
-let Defs = [CC, R1L, F0Q], Uses = [R0L, F4Q] in
+let Defs = [CC, R1L, F0Q], Uses = [FPC, R0L, F4Q] in
   def PFPO : SideEffectInherentE<"pfpo", 0x010A>;
 
 
@@ -118,8 +123,10 @@ let Defs = [CC, R1L, F0Q], Uses = [R0L, F4Q] in
 
 // Round to an integer, with the second operand (M3) specifying the rounding
 // mode.  M4 can be set to 4 to suppress detection of inexact conditions.
-def FIDTR : TernaryRRFe<"fidtr", 0xB3D7, FP64,  FP64>;
-def FIXTR : TernaryRRFe<"fixtr", 0xB3DF, FP128, FP128>;
+let Uses = [FPC] in {
+  def FIDTR : TernaryRRFe<"fidtr", 0xB3D7, FP64,  FP64>;
+  def FIXTR : TernaryRRFe<"fixtr", 0xB3DF, FP128, FP128>;
+}
 
 // Extract biased exponent.
 def EEDTR : UnaryRRE<"eedtr", 0xB3E5, null_frag, FP64,  FP64>;
@@ -135,7 +142,7 @@ def ESXTR : UnaryRRE<"esxtr", 0xB3EF, null_frag, FP128, FP128>;
 //===----------------------------------------------------------------------===//
 
 // Addition.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
   let isCommutable = 1 in {
     def ADTR : BinaryRRFa<"adtr", 0xB3D2, null_frag, FP64,  FP64,  FP64>;
     def AXTR : BinaryRRFa<"axtr", 0xB3DA, null_frag, FP128, FP128, FP128>;
@@ -147,7 +154,7 @@ let Defs = [CC] in {
 }
 
 // Subtraction.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
   def SDTR : BinaryRRFa<"sdtr", 0xB3D3, null_frag, FP64,  FP64,  FP64>;
   def SXTR : BinaryRRFa<"sxtr", 0xB3DB, null_frag, FP128, FP128, FP128>;
   let Predicates = [FeatureFPExtension] in {
@@ -157,30 +164,38 @@ let Defs = [CC] in {
 }
 
 // Multiplication.
-let isCommutable = 1 in {
-  def MDTR : BinaryRRFa<"mdtr", 0xB3D0, null_frag, FP64,  FP64,  FP64>;
-  def MXTR : BinaryRRFa<"mxtr", 0xB3D8, null_frag, FP128, FP128, FP128>;
-}
-let Predicates = [FeatureFPExtension] in {
-  def MDTRA : TernaryRRFa<"mdtra", 0xB3D0, FP64,  FP64,  FP64>;
-  def MXTRA : TernaryRRFa<"mxtra", 0xB3D8, FP128, FP128, FP128>;
+let Uses = [FPC] in {
+  let isCommutable = 1 in {
+    def MDTR : BinaryRRFa<"mdtr", 0xB3D0, null_frag, FP64,  FP64,  FP64>;
+    def MXTR : BinaryRRFa<"mxtr", 0xB3D8, null_frag, FP128, FP128, FP128>;
+  }
+  let Predicates = [FeatureFPExtension] in {
+    def MDTRA : TernaryRRFa<"mdtra", 0xB3D0, FP64,  FP64,  FP64>;
+    def MXTRA : TernaryRRFa<"mxtra", 0xB3D8, FP128, FP128, FP128>;
+  }
 }
 
 // Division.
-def DDTR : BinaryRRFa<"ddtr", 0xB3D1, null_frag, FP64,  FP64,  FP64>;
-def DXTR : BinaryRRFa<"dxtr", 0xB3D9, null_frag, FP128, FP128, FP128>;
-let Predicates = [FeatureFPExtension] in {
-  def DDTRA : TernaryRRFa<"ddtra", 0xB3D1, FP64,  FP64,  FP64>;
-  def DXTRA : TernaryRRFa<"dxtra", 0xB3D9, FP128, FP128, FP128>;
+let Uses = [FPC] in {
+  def DDTR : BinaryRRFa<"ddtr", 0xB3D1, null_frag, FP64,  FP64,  FP64>;
+  def DXTR : BinaryRRFa<"dxtr", 0xB3D9, null_frag, FP128, FP128, FP128>;
+  let Predicates = [FeatureFPExtension] in {
+    def DDTRA : TernaryRRFa<"ddtra", 0xB3D1, FP64,  FP64,  FP64>;
+    def DXTRA : TernaryRRFa<"dxtra", 0xB3D9, FP128, FP128, FP128>;
+  }
 }
 
 // Quantize.
-def QADTR : TernaryRRFb<"qadtr", 0xB3F5, FP64,  FP64,  FP64>;
-def QAXTR : TernaryRRFb<"qaxtr", 0xB3FD, FP128, FP128, FP128>;
+let Uses = [FPC] in {
+  def QADTR : TernaryRRFb<"qadtr", 0xB3F5, FP64,  FP64,  FP64>;
+  def QAXTR : TernaryRRFb<"qaxtr", 0xB3FD, FP128, FP128, FP128>;
+}
 
 // Reround.
-def RRDTR : TernaryRRFb<"rrdtr", 0xB3F7, FP64,  FP64,  FP64>;
-def RRXTR : TernaryRRFb<"rrxtr", 0xB3FF, FP128, FP128, FP128>;
+let Uses = [FPC] in {
+  def RRDTR : TernaryRRFb<"rrdtr", 0xB3F7, FP64,  FP64,  FP64>;
+  def RRXTR : TernaryRRFb<"rrxtr", 0xB3FF, FP128, FP128, FP128>;
+}
 
 // Shift significand left/right.
 def SLDT : BinaryRXF<"sldt", 0xED40, null_frag, FP64,  FP64,  null_frag, 0>;
@@ -198,13 +213,13 @@ def IEXTR : BinaryRRFb<"iextr", 0xB3FE, null_frag, FP128, FP128, FP128>;
 //===----------------------------------------------------------------------===//
 
 // Compare.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
   def CDTR : CompareRRE<"cdtr", 0xB3E4, null_frag, FP64,  FP64>;
   def CXTR : CompareRRE<"cxtr", 0xB3EC, null_frag, FP128, FP128>;
 }
 
 // Compare and signal.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
   def KDTR : CompareRRE<"kdtr", 0xB3E0, null_frag, FP64,  FP64>;
   def KXTR : CompareRRE<"kxtr", 0xB3E8, null_frag, FP128, FP128>;
 }
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index 1374ee91fa29..19c7ec58ed3d 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -1,9 +1,8 @@
 //==- SystemZInstrFP.td - Floating-point SystemZ instructions --*- tblgen-*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -53,7 +52,8 @@ let isCodeGenOnly = 1 in
 
 // Moves between two floating-point registers that also set the condition
 // codes.
-let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+let Uses = [FPC], mayRaiseFPException = 1,
+    Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
   defm LTEBR : LoadAndTestRRE<"ltebr", 0xB302, FP32>;
   defm LTDBR : LoadAndTestRRE<"ltdbr", 0xB312, FP64>;
   defm LTXBR : LoadAndTestRRE<"ltxbr", 0xB342, FP128>;
@@ -69,7 +69,8 @@ let Predicates = [FeatureNoVector] in {
 
 // Use a normal load-and-test for compare against zero in case of
 // vector support (via a pseudo to simplify instruction selection).
-let Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
+let Uses = [FPC], mayRaiseFPException = 1,
+    Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
   def LTEBRCompare_VecPseudo : Pseudo<(outs), (ins FP32:$R1, FP32:$R2), []>;
   def LTDBRCompare_VecPseudo : Pseudo<(outs), (ins FP64:$R1, FP64:$R2), []>;
   def LTXBRCompare_VecPseudo : Pseudo<(outs), (ins FP128:$R1, FP128:$R2), []>;
@@ -174,56 +175,64 @@ let SimpleBDXStore = 1, mayStore = 1 in {
 // Convert floating-point values to narrower representations, rounding
 // according to the current mode.  The destination of LEXBR and LDXBR
 // is a 128-bit value, but only the first register of the pair is used.
-def LEDBR : UnaryRRE<"ledbr", 0xB344, fpround,    FP32,  FP64>;
-def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>;
-def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>;
-
-def LEDBRA : TernaryRRFe<"ledbra", 0xB344, FP32,  FP64>,
-             Requires<[FeatureFPExtension]>;
-def LEXBRA : TernaryRRFe<"lexbra", 0xB346, FP128, FP128>,
-             Requires<[FeatureFPExtension]>;
-def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>,
-             Requires<[FeatureFPExtension]>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+  def LEDBR : UnaryRRE<"ledbr", 0xB344, any_fpround, FP32, FP64>;
+  def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>;
+  def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>;
+
+  def LEDBRA : TernaryRRFe<"ledbra", 0xB344, FP32,  FP64>,
+               Requires<[FeatureFPExtension]>;
+  def LEXBRA : TernaryRRFe<"lexbra", 0xB346, FP128, FP128>,
+               Requires<[FeatureFPExtension]>;
+  def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>,
+               Requires<[FeatureFPExtension]>;
+}
 
 let Predicates = [FeatureNoVectorEnhancements1] in {
-  def : Pat<(f32 (fpround FP128:$src)),
+  def : Pat<(f32 (any_fpround FP128:$src)),
             (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>;
-  def : Pat<(f64 (fpround FP128:$src)),
+  def : Pat<(f64 (any_fpround FP128:$src)),
             (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
 }
 
 // Extend register floating-point values to wider representations.
-def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend,  FP64,  FP32>;
-def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>;
-def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+  def LDEBR : UnaryRRE<"ldebr", 0xB304, any_fpextend, FP64, FP32>;
+  def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>;
+  def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>;
+}
 let Predicates = [FeatureNoVectorEnhancements1] in {
-  def : Pat<(f128 (fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>;
-  def : Pat<(f128 (fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>;
+  def : Pat<(f128 (any_fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>;
+  def : Pat<(f128 (any_fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>;
 }
 
 // Extend memory floating-point values to wider representations.
-def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64,  4>;
-def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag,  FP128, 4>;
-def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag,  FP128, 8>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+  def LDEB : UnaryRXE<"ldeb", 0xED04, any_extloadf32, FP64, 4>;
+  def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>;
+  def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>;
+}
 let Predicates = [FeatureNoVectorEnhancements1] in {
-  def : Pat<(f128 (extloadf32 bdxaddr12only:$src)),
+  def : Pat<(f128 (any_extloadf32 bdxaddr12only:$src)),
             (LXEB bdxaddr12only:$src)>;
-  def : Pat<(f128 (extloadf64 bdxaddr12only:$src)),
+  def : Pat<(f128 (any_extloadf64 bdxaddr12only:$src)),
             (LXDB bdxaddr12only:$src)>;
 }
 
 // Convert a signed integer register value to a floating-point one.
-def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32,  GR32>;
-def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64,  GR32>;
-def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>;
-
-def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32,  GR64>;
-def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64,  GR64>;
-def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+  def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32,  GR32>;
+  def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64,  GR32>;
+  def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>;
+
+  def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32,  GR64>;
+  def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64,  GR64>;
+  def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>;
+}
 
 // The FP extension feature provides versions of the above that allow
 // specifying rounding mode and inexact-exception suppression flags.
-let Predicates = [FeatureFPExtension] in {
+let Uses = [FPC], mayRaiseFPException = 1, Predicates = [FeatureFPExtension] in {
   def CEFBRA : TernaryRRFe<"cefbra", 0xB394, FP32,  GR32>;
   def CDFBRA : TernaryRRFe<"cdfbra", 0xB395, FP64,  GR32>;
   def CXFBRA : TernaryRRFe<"cxfbra", 0xB396, FP128, GR32>;
@@ -235,13 +244,15 @@ let Predicates = [FeatureFPExtension] in {
 
 // Convert am unsigned integer register value to a floating-point one.
 let Predicates = [FeatureFPExtension] in {
-  def CELFBR : TernaryRRFe<"celfbr", 0xB390, FP32,  GR32>;
-  def CDLFBR : TernaryRRFe<"cdlfbr", 0xB391, FP64,  GR32>;
-  def CXLFBR : TernaryRRFe<"cxlfbr", 0xB392, FP128, GR32>;
-
-  def CELGBR : TernaryRRFe<"celgbr", 0xB3A0, FP32,  GR64>;
-  def CDLGBR : TernaryRRFe<"cdlgbr", 0xB3A1, FP64,  GR64>;
-  def CXLGBR : TernaryRRFe<"cxlgbr", 0xB3A2, FP128, GR64>;
+  let Uses = [FPC], mayRaiseFPException = 1 in {
+    def CELFBR : TernaryRRFe<"celfbr", 0xB390, FP32,  GR32>;
+    def CDLFBR : TernaryRRFe<"cdlfbr", 0xB391, FP64,  GR32>;
+    def CXLFBR : TernaryRRFe<"cxlfbr", 0xB392, FP128, GR32>;
+
+    def CELGBR : TernaryRRFe<"celgbr", 0xB3A0, FP32,  GR64>;
+    def CDLGBR : TernaryRRFe<"cdlgbr", 0xB3A1, FP64,  GR64>;
+    def CXLGBR : TernaryRRFe<"cxlgbr", 0xB3A2, FP128, GR64>;
+  }
 
   def : Pat<(f32  (uint_to_fp GR32:$src)), (CELFBR 0, GR32:$src, 0)>;
   def : Pat<(f64  (uint_to_fp GR32:$src)), (CDLFBR 0, GR32:$src, 0)>;
@@ -254,7 +265,7 @@ let Predicates = [FeatureFPExtension] in {
 
 // Convert a floating-point register value to a signed integer value,
 // with the second operand (modifier M3) specifying the rounding mode.
-let Defs = [CC] in {
+let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
   def CFEBR : BinaryRRFe<"cfebr", 0xB398, GR32, FP32>;
   def CFDBR : BinaryRRFe<"cfdbr", 0xB399, GR32, FP64>;
   def CFXBR : BinaryRRFe<"cfxbr", 0xB39A, GR32, FP128>;
@@ -275,7 +286,8 @@ def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>;
 
 // The FP extension feature provides versions of the above that allow
 // also specifying the inexact-exception suppression flag.
-let Predicates = [FeatureFPExtension], Defs = [CC] in {
+let Uses = [FPC], mayRaiseFPException = 1,
+    Predicates = [FeatureFPExtension], Defs = [CC] in {
   def CFEBRA : TernaryRRFe<"cfebra", 0xB398, GR32, FP32>;
   def CFDBRA : TernaryRRFe<"cfdbra", 0xB399, GR32, FP64>;
   def CFXBRA : TernaryRRFe<"cfxbra", 0xB39A, GR32, FP128>;
@@ -287,7 +299,7 @@ let Predicates = [FeatureFPExtension], Defs = [CC] in {
 
 // Convert a floating-point register value to an unsigned integer value.
 let Predicates = [FeatureFPExtension] in {
-  let Defs = [CC] in {
+  let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
     def CLFEBR : TernaryRRFe<"clfebr", 0xB39C, GR32, FP32>;
     def CLFDBR : TernaryRRFe<"clfdbr", 0xB39D, GR32, FP64>;
     def CLFXBR : TernaryRRFe<"clfxbr", 0xB39E, GR32, FP128>;
@@ -353,59 +365,65 @@ let isCodeGenOnly = 1 in
   def LNDFR_32 : UnaryRRE<"lndfr", 0xB371, fnabs, FP32,  FP32>;
 
 // Square root.
-def SQEBR : UnaryRRE<"sqebr", 0xB314, fsqrt, FP32,  FP32>;
-def SQDBR : UnaryRRE<"sqdbr", 0xB315, fsqrt, FP64,  FP64>;
-def SQXBR : UnaryRRE<"sqxbr", 0xB316, fsqrt, FP128, FP128>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+  def SQEBR : UnaryRRE<"sqebr", 0xB314, any_fsqrt, FP32,  FP32>;
+  def SQDBR : UnaryRRE<"sqdbr", 0xB315, any_fsqrt, FP64,  FP64>;
+  def SQXBR : UnaryRRE<"sqxbr", 0xB316, any_fsqrt, FP128, FP128>;
 
-def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32, 4>;
-def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64, 8>;
+  def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<any_fsqrt>, FP32, 4>;
+  def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<any_fsqrt>, FP64, 8>;
+}
 
 // Round to an integer, with the second operand (modifier M3) specifying
 // the rounding mode.  These forms always check for inexact conditions.
-def FIEBR : BinaryRRFe<"fiebr", 0xB357, FP32,  FP32>;
-def FIDBR : BinaryRRFe<"fidbr", 0xB35F, FP64,  FP64>;
-def FIXBR : BinaryRRFe<"fixbr", 0xB347, FP128, FP128>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+  def FIEBR : BinaryRRFe<"fiebr", 0xB357, FP32,  FP32>;
+  def FIDBR : BinaryRRFe<"fidbr", 0xB35F, FP64,  FP64>;
+  def FIXBR : BinaryRRFe<"fixbr", 0xB347, FP128, FP128>;
+}
 
 // frint rounds according to the current mode (modifier 0) and detects
 // inexact conditions.
-def : Pat<(frint FP32:$src),  (FIEBR 0, FP32:$src)>;
-def : Pat<(frint FP64:$src),  (FIDBR 0, FP64:$src)>;
-def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>;
+def : Pat<(any_frint FP32:$src),  (FIEBR 0, FP32:$src)>;
+def : Pat<(any_frint FP64:$src),  (FIDBR 0, FP64:$src)>;
+def : Pat<(any_frint FP128:$src), (FIXBR 0, FP128:$src)>;
 
 let Predicates = [FeatureFPExtension] in {
   // Extended forms of the FIxBR instructions.  M4 can be set to 4
   // to suppress detection of inexact conditions.
-  def FIEBRA : TernaryRRFe<"fiebra", 0xB357, FP32,  FP32>;
-  def FIDBRA : TernaryRRFe<"fidbra", 0xB35F, FP64,  FP64>;
-  def FIXBRA : TernaryRRFe<"fixbra", 0xB347, FP128, FP128>;
+  let Uses = [FPC], mayRaiseFPException = 1 in {
+    def FIEBRA : TernaryRRFe<"fiebra", 0xB357, FP32,  FP32>;
+    def FIDBRA : TernaryRRFe<"fidbra", 0xB35F, FP64,  FP64>;
+    def FIXBRA : TernaryRRFe<"fixbra", 0xB347, FP128, FP128>;
+  }
 
   // fnearbyint is like frint but does not detect inexact conditions.
-  def : Pat<(fnearbyint FP32:$src),  (FIEBRA 0, FP32:$src,  4)>;
-  def : Pat<(fnearbyint FP64:$src),  (FIDBRA 0, FP64:$src,  4)>;
-  def : Pat<(fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>;
+  def : Pat<(any_fnearbyint FP32:$src),  (FIEBRA 0, FP32:$src,  4)>;
+  def : Pat<(any_fnearbyint FP64:$src),  (FIDBRA 0, FP64:$src,  4)>;
+  def : Pat<(any_fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>;
 
   // floor is no longer allowed to raise an inexact condition,
   // so restrict it to the cases where the condition can be suppressed.
   // Mode 7 is round towards -inf.
-  def : Pat<(ffloor FP32:$src),  (FIEBRA 7, FP32:$src,  4)>;
-  def : Pat<(ffloor FP64:$src),  (FIDBRA 7, FP64:$src,  4)>;
-  def : Pat<(ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>;
+  def : Pat<(any_ffloor FP32:$src),  (FIEBRA 7, FP32:$src,  4)>;
+  def : Pat<(any_ffloor FP64:$src),  (FIDBRA 7, FP64:$src,  4)>;
+  def : Pat<(any_ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>;
 
   // Same idea for ceil, where mode 6 is round towards +inf.
-  def : Pat<(fceil FP32:$src),  (FIEBRA 6, FP32:$src,  4)>;
-  def : Pat<(fceil FP64:$src),  (FIDBRA 6, FP64:$src,  4)>;
-  def : Pat<(fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>;
+  def : Pat<(any_fceil FP32:$src),  (FIEBRA 6, FP32:$src,  4)>;
+  def : Pat<(any_fceil FP64:$src),  (FIDBRA 6, FP64:$src,  4)>;
+  def : Pat<(any_fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>;
 
   // Same idea for trunc, where mode 5 is round towards zero.
-  def : Pat<(ftrunc FP32:$src),  (FIEBRA 5, FP32:$src,  4)>;
-  def : Pat<(ftrunc FP64:$src),  (FIDBRA 5, FP64:$src,  4)>;
-  def : Pat<(ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>;
+  def : Pat<(any_ftrunc FP32:$src),  (FIEBRA 5, FP32:$src,  4)>;
+  def : Pat<(any_ftrunc FP64:$src),  (FIDBRA 5, FP64:$src,  4)>;
+  def : Pat<(any_ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>;
 
   // Same idea for round, where mode 1 is round towards nearest with
   // ties away from zero.
-  def : Pat<(fround FP32:$src),  (FIEBRA 1, FP32:$src,  4)>;
-  def : Pat<(fround FP64:$src),  (FIDBRA 1, FP64:$src,  4)>;
-  def : Pat<(fround FP128:$src), (FIXBRA 1, FP128:$src, 4)>;
+  def : Pat<(any_fround FP32:$src),  (FIEBRA 1, FP32:$src,  4)>;
+  def : Pat<(any_fround FP64:$src),  (FIDBRA 1, FP64:$src,  4)>;
+  def : Pat<(any_fround FP128:$src), (FIXBRA 1, FP128:$src, 4)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -413,87 +431,103 @@ let Predicates = [FeatureFPExtension] in {
 //===----------------------------------------------------------------------===//
 
 // Addition.
-let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+let Uses = [FPC], mayRaiseFPException = 1,
+    Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
   let isCommutable = 1 in {
-    def AEBR : BinaryRRE<"aebr", 0xB30A, fadd, FP32,  FP32>;
-    def ADBR : BinaryRRE<"adbr", 0xB31A, fadd, FP64,  FP64>;
-    def AXBR : BinaryRRE<"axbr", 0xB34A, fadd, FP128, FP128>;
+    def AEBR : BinaryRRE<"aebr", 0xB30A, any_fadd, FP32,  FP32>;
+    def ADBR : BinaryRRE<"adbr", 0xB31A, any_fadd, FP64,  FP64>;
+    def AXBR : BinaryRRE<"axbr", 0xB34A, any_fadd, FP128, FP128>;
   }
-  def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load, 4>;
-  def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load, 8>;
+  def AEB : BinaryRXE<"aeb", 0xED0A, any_fadd, FP32, load, 4>;
+  def ADB : BinaryRXE<"adb", 0xED1A, any_fadd, FP64, load, 8>;
 }
 
 // Subtraction.
-let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
-  def SEBR : BinaryRRE<"sebr", 0xB30B, fsub, FP32,  FP32>;
-  def SDBR : BinaryRRE<"sdbr", 0xB31B, fsub, FP64,  FP64>;
-  def SXBR : BinaryRRE<"sxbr", 0xB34B, fsub, FP128, FP128>;
-
-  def SEB : BinaryRXE<"seb",  0xED0B, fsub, FP32, load, 4>;
-  def SDB : BinaryRXE<"sdb",  0xED1B, fsub, FP64, load, 8>;
+let Uses = [FPC], mayRaiseFPException = 1,
+    Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+  def SEBR : BinaryRRE<"sebr", 0xB30B, any_fsub, FP32,  FP32>;
+  def SDBR : BinaryRRE<"sdbr", 0xB31B, any_fsub, FP64,  FP64>;
+  def SXBR : BinaryRRE<"sxbr", 0xB34B, any_fsub, FP128, FP128>;
+
+  def SEB : BinaryRXE<"seb",  0xED0B, any_fsub, FP32, load, 4>;
+  def SDB : BinaryRXE<"sdb",  0xED1B, any_fsub, FP64, load, 8>;
 }
 
 // Multiplication.
-let isCommutable = 1 in {
-  def MEEBR : BinaryRRE<"meebr", 0xB317, fmul, FP32,  FP32>;
-  def MDBR  : BinaryRRE<"mdbr",  0xB31C, fmul, FP64,  FP64>;
-  def MXBR  : BinaryRRE<"mxbr",  0xB34C, fmul, FP128, FP128>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+  let isCommutable = 1 in {
+    def MEEBR : BinaryRRE<"meebr", 0xB317, any_fmul, FP32,  FP32>;
+    def MDBR  : BinaryRRE<"mdbr",  0xB31C, any_fmul, FP64,  FP64>;
+    def MXBR  : BinaryRRE<"mxbr",  0xB34C, any_fmul, FP128, FP128>;
+  }
+  def MEEB : BinaryRXE<"meeb", 0xED17, any_fmul, FP32, load, 4>;
+  def MDB  : BinaryRXE<"mdb",  0xED1C, any_fmul, FP64, load, 8>;
 }
-def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load, 4>;
-def MDB  : BinaryRXE<"mdb",  0xED1C, fmul, FP64, load, 8>;
 
 // f64 multiplication of two FP32 registers.
-def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
-def : Pat<(fmul (f64 (fpextend FP32:$src1)), (f64 (fpextend FP32:$src2))),
+let Uses = [FPC], mayRaiseFPException = 1 in
+  def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
+def : Pat<(any_fmul (f64 (fpextend FP32:$src1)),
+                    (f64 (fpextend FP32:$src2))),
           (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
                                 FP32:$src1, subreg_h32), FP32:$src2)>;
 
 // f64 multiplication of an FP32 register and an f32 memory.
-def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
-def : Pat<(fmul (f64 (fpextend FP32:$src1)),
-                (f64 (extloadf32 bdxaddr12only:$addr))),
+let Uses = [FPC], mayRaiseFPException = 1 in
+  def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
+def : Pat<(any_fmul (f64 (fpextend FP32:$src1)),
+                    (f64 (extloadf32 bdxaddr12only:$addr))),
           (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32),
                 bdxaddr12only:$addr)>;
 
 // f128 multiplication of two FP64 registers.
-def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>;
+let Uses = [FPC], mayRaiseFPException = 1 in
+  def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>;
 let Predicates = [FeatureNoVectorEnhancements1] in
-  def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))),
+  def : Pat<(any_fmul (f128 (fpextend FP64:$src1)),
+                      (f128 (fpextend FP64:$src2))),
             (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
                                   FP64:$src1, subreg_h64), FP64:$src2)>;
 
 // f128 multiplication of an FP64 register and an f64 memory.
-def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>;
+let Uses = [FPC], mayRaiseFPException = 1 in
+  def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>;
 let Predicates = [FeatureNoVectorEnhancements1] in
-  def : Pat<(fmul (f128 (fpextend FP64:$src1)),
-                  (f128 (extloadf64 bdxaddr12only:$addr))),
+  def : Pat<(any_fmul (f128 (fpextend FP64:$src1)),
+                      (f128 (extloadf64 bdxaddr12only:$addr))),
             (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64),
                   bdxaddr12only:$addr)>;
 
 // Fused multiply-add.
-def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32, FP32>;
-def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64, FP64>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+  def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>;
+  def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>;
 
-def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, FP32, load, 4>;
-def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, FP64, load, 8>;
+  def MAEB : TernaryRXF<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>;
+  def MADB : TernaryRXF<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>;
+}
 
 // Fused multiply-subtract.
-def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32, FP32>;
-def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64, FP64>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+  def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>;
+  def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>;
 
-def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, FP32, load, 4>;
-def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, FP64, load, 8>;
+  def MSEB : TernaryRXF<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>;
+  def MSDB : TernaryRXF<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>;
+}
 
 // Division.
-def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32,  FP32>;
-def DDBR : BinaryRRE<"ddbr", 0xB31D, fdiv, FP64,  FP64>;
-def DXBR : BinaryRRE<"dxbr", 0xB34D, fdiv, FP128, FP128>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+  def DEBR : BinaryRRE<"debr", 0xB30D, any_fdiv, FP32,  FP32>;
+  def DDBR : BinaryRRE<"ddbr", 0xB31D, any_fdiv, FP64,  FP64>;
+  def DXBR : BinaryRRE<"dxbr", 0xB34D, any_fdiv, FP128, FP128>;
 
-def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load, 4>;
-def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>;
+  def DEB : BinaryRXE<"deb", 0xED0D, any_fdiv, FP32, load, 4>;
+  def DDB : BinaryRXE<"ddb", 0xED1D, any_fdiv, FP64, load, 8>;
+}
 
 // Divide to integer.
-let Defs = [CC] in {
+let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
   def DIEBR : TernaryRRFb<"diebr", 0xB353, FP32, FP32, FP32>;
   def DIDBR : TernaryRRFb<"didbr", 0xB35B, FP64, FP64, FP64>;
 }
@@ -502,7 +536,7 @@ let Defs = [CC] in {
 // Comparisons
 //===----------------------------------------------------------------------===//
 
-let Defs = [CC], CCValues = 0xF in {
+let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC], CCValues = 0xF in {
   def CEBR : CompareRRE<"cebr", 0xB309, z_fcmp, FP32,  FP32>;
   def CDBR : CompareRRE<"cdbr", 0xB319, z_fcmp, FP64,  FP64>;
   def CXBR : CompareRRE<"cxbr", 0xB349, z_fcmp, FP128, FP128>;
@@ -532,20 +566,28 @@ let Defs = [CC], CCValues = 0xC in {
 let hasSideEffects = 1 in {
   let mayLoad = 1, mayStore = 1 in {
     // TODO: EFPC and SFPC do not touch memory at all
-    def EFPC  : InherentRRE<"efpc", 0xB38C, GR32, int_s390_efpc>;
-    def STFPC : StoreInherentS<"stfpc", 0xB29C, storei<int_s390_efpc>, 4>;
-
-    def SFPC : SideEffectUnaryRRE<"sfpc", 0xB384, GR32, int_s390_sfpc>;
-    def LFPC : SideEffectUnaryS<"lfpc", 0xB29D, loadu<int_s390_sfpc>, 4>;
+    let Uses = [FPC] in {
+      def EFPC  : InherentRRE<"efpc", 0xB38C, GR32, int_s390_efpc>;
+      def STFPC : StoreInherentS<"stfpc", 0xB29C, storei<int_s390_efpc>, 4>;
+    }
+
+    let Defs = [FPC] in {
+      def SFPC : SideEffectUnaryRRE<"sfpc", 0xB384, GR32, int_s390_sfpc>;
+      def LFPC : SideEffectUnaryS<"lfpc", 0xB29D, loadu<int_s390_sfpc>, 4>;
+    }
   }
 
-  def SFASR : SideEffectUnaryRRE<"sfasr", 0xB385, GR32, null_frag>;
-  def LFAS  : SideEffectUnaryS<"lfas", 0xB2BD, null_frag, 4>;
+  let Defs = [FPC], mayRaiseFPException = 1 in {
+    def SFASR : SideEffectUnaryRRE<"sfasr", 0xB385, GR32, null_frag>;
+    def LFAS  : SideEffectUnaryS<"lfas", 0xB2BD, null_frag, 4>;
+  }
 
-  def SRNMB : SideEffectAddressS<"srnmb", 0xB2B8, null_frag, shift12only>,
-              Requires<[FeatureFPExtension]>;
-  def SRNM  : SideEffectAddressS<"srnm", 0xB299, null_frag, shift12only>;
-  def SRNMT : SideEffectAddressS<"srnmt", 0xB2B9, null_frag, shift12only>;
+  let Uses = [FPC], Defs = [FPC] in {
+    def SRNMB : SideEffectAddressS<"srnmb", 0xB2B8, null_frag, shift12only>,
+                Requires<[FeatureFPExtension]>;
+    def SRNM  : SideEffectAddressS<"srnm", 0xB299, null_frag, shift12only>;
+    def SRNMT : SideEffectAddressS<"srnmt", 0xB2B9, null_frag, shift12only>;
+  }
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index 1e904a86ea79..2a1d14de3ddf 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -1,9 +1,8 @@
 //==- SystemZInstrFormats.td - SystemZ Instruction Formats --*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -38,6 +37,12 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr,
   string OpKey = "";
   string OpType = "none";
 
+  // MemKey identifies a targe reg-mem opcode, while MemType can be either
+  // "pseudo" or "target". This is used to map a pseduo memory instruction to
+  // its corresponding target opcode. See comment at MemFoldPseudo.
+  string MemKey = "";
+  string MemType = "none";
+
   // Many distinct-operands instructions have older 2-operand equivalents.
   // NumOpsKey uniquely identifies one of these 2-operand and 3-operand pairs,
   // with NumOpsValue being "2" or "3" as appropriate.
@@ -121,7 +126,8 @@ def getDisp20Opcode : InstrMapping {
   let ValueCols = [["20"]];
 }
 
-// Return the memory form of a register instruction.
+// Return the memory form of a register instruction. Note that this may
+// return a MemFoldPseudo instruction (see below).
 def getMemOpcode : InstrMapping {
   let FilterClass = "InstSystemZ";
   let RowFields = ["OpKey"];
@@ -130,13 +136,22 @@ def getMemOpcode : InstrMapping {
   let ValueCols = [["mem"]];
 }
 
-// Return the 3-operand form of a 2-operand instruction.
-def getThreeOperandOpcode : InstrMapping {
+// Return the target memory instruction for a MemFoldPseudo.
+def getTargetMemOpcode : InstrMapping {
+  let FilterClass = "InstSystemZ";
+  let RowFields = ["MemKey"];
+  let ColFields = ["MemType"];
+  let KeyCol = ["pseudo"];
+  let ValueCols = [["target"]];
+}
+
+// Return the 2-operand form of a 3-operand instruction.
+def getTwoOperandOpcode : InstrMapping {
   let FilterClass = "InstSystemZ";
   let RowFields = ["NumOpsKey"];
   let ColFields = ["NumOpsValue"];
-  let KeyCol = ["2"];
-  let ValueCols = [["3"]];
+  let KeyCol = ["3"];
+  let ValueCols = [["2"]];
 }
 
 //===----------------------------------------------------------------------===//
@@ -1399,13 +1414,15 @@ class InstVRRi<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   bits<4> R1;
   bits<5> V2;
   bits<4> M3;
+  bits<4> M4;
 
   let Inst{47-40} = op{15-8};
   let Inst{39-36} = R1;
   let Inst{35-32} = V2{3-0};
   let Inst{31-24} = 0;
   let Inst{23-20} = M3;
-  let Inst{19-12} = 0;
+  let Inst{19-16} = M4;
+  let Inst{15-12} = 0;
   let Inst{11}    = 0;
   let Inst{10}    = V2{4};
   let Inst{9-8}   = 0;
@@ -2410,11 +2427,16 @@ class LoadMultipleSSe<string mnemonic, bits<8> opcode, RegisterOperand cls>
   let mayLoad = 1;
 }
 
-class LoadMultipleVRSa<string mnemonic, bits<16> opcode>
-  : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3), (ins bdaddr12only:$BD2),
-             mnemonic#"\t$V1, $V3, $BD2", []> {
-  let M4 = 0;
-  let mayLoad = 1;
+multiclass LoadMultipleVRSaAlign<string mnemonic, bits<16> opcode> {
+  let mayLoad = 1 in {
+    def Align : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3),
+                        (ins bdaddr12only:$BD2, imm32zx4:$M4),
+                        mnemonic#"\t$V1, $V3, $BD2, $M4", []>;
+    let M4 = 0 in
+      def "" : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3),
+                        (ins bdaddr12only:$BD2),
+                        mnemonic#"\t$V1, $V3, $BD2", []>;
+  }
 }
 
 class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
@@ -2469,12 +2491,29 @@ class StoreVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                TypedReg tr, bits<5> bytes, bits<4> type = 0>
   : InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2),
             mnemonic#"\t$V1, $XBD2",
-            [(set (tr.vt tr.op:$V1), (operator bdxaddr12only:$XBD2))]> {
+            [(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2)]> {
   let M3 = type;
   let mayStore = 1;
   let AccessBytes = bytes;
 }
 
+class StoreVRXGeneric<string mnemonic, bits<16> opcode>
+  : InstVRX<opcode, (outs), (ins VR128:$V1, bdxaddr12only:$XBD2, imm32zx4:$M3),
+            mnemonic#"\t$V1, $XBD2, $M3", []> {
+  let mayStore = 1;
+}
+
+multiclass StoreVRXAlign<string mnemonic, bits<16> opcode> {
+  let mayStore = 1, AccessBytes = 16 in {
+    def Align : InstVRX<opcode, (outs),
+                        (ins VR128:$V1, bdxaddr12only:$XBD2, imm32zx4:$M3),
+                        mnemonic#"\t$V1, $XBD2, $M3", []>;
+    let M3 = 0 in
+      def "" : InstVRX<opcode, (outs), (ins VR128:$V1, bdxaddr12only:$XBD2),
+                       mnemonic#"\t$V1, $XBD2", []>;
+  }
+}
+
 class StoreLengthVRSb<string mnemonic, bits<16> opcode,
                       SDPatternOperator operator, bits<5> bytes>
   : InstVRSb<opcode, (outs), (ins VR128:$V1, GR32:$R3, bdaddr12only:$BD2),
@@ -2527,11 +2566,16 @@ multiclass StoreMultipleRSPair<string mnemonic, bits<8> rsOpcode,
   }
 }
 
-class StoreMultipleVRSa<string mnemonic, bits<16> opcode>
-  : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3, bdaddr12only:$BD2),
-             mnemonic#"\t$V1, $V3, $BD2", []> {
-  let M4 = 0;
-  let mayStore = 1;
+multiclass StoreMultipleVRSaAlign<string mnemonic, bits<16> opcode> {
+  let mayStore = 1 in {
+    def Align : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3,
+                                              bdaddr12only:$BD2, imm32zx4:$M4),
+                         mnemonic#"\t$V1, $V3, $BD2, $M4", []>;
+    let M4 = 0 in
+      def "" : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3,
+                                             bdaddr12only:$BD2),
+                        mnemonic#"\t$V1, $V3, $BD2", []>;
+  }
 }
 
 // StoreSI* instructions are used to store an integer to memory, but the
@@ -2925,6 +2969,17 @@ class UnaryVRXGeneric<string mnemonic, bits<16> opcode>
   let mayLoad = 1;
 }
 
+multiclass UnaryVRXAlign<string mnemonic, bits<16> opcode> {
+  let mayLoad = 1, AccessBytes = 16 in {
+    def Align : InstVRX<opcode, (outs VR128:$V1),
+                        (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
+                        mnemonic#"\t$V1, $XBD2, $M3", []>;
+    let M3 = 0 in
+      def "" : InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2),
+                       mnemonic#"\t$V1, $XBD2", []>;
+  }
+}
+
 class SideEffectBinaryRX<string mnemonic, bits<8> opcode,
                          RegisterOperand cls>
   : InstRXa<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2),
@@ -3067,6 +3122,8 @@ class BinaryRRFa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
              mnemonic#"\t$R1, $R2, $R3",
              [(set cls1:$R1, (operator cls2:$R2, cls3:$R3))]> {
   let M4 = 0;
+  let OpKey = mnemonic#cls1;
+  let OpType = "reg";
 }
 
 multiclass BinaryRRAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
@@ -3074,9 +3131,9 @@ multiclass BinaryRRAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
                         RegisterOperand cls2> {
   let NumOpsKey = mnemonic in {
     let NumOpsValue = "3" in
-      def K : BinaryRRFa<mnemonic#"k", opcode2, null_frag, cls1, cls1, cls2>,
+      def K : BinaryRRFa<mnemonic#"k", opcode2, operator, cls1, cls1, cls2>,
               Requires<[FeatureDistinctOps]>;
-    let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+    let NumOpsValue = "2" in
       def "" : BinaryRR<mnemonic, opcode1, operator, cls1, cls2>;
   }
 }
@@ -3086,9 +3143,9 @@ multiclass BinaryRREAndK<string mnemonic, bits<16> opcode1, bits<16> opcode2,
                          RegisterOperand cls2> {
   let NumOpsKey = mnemonic in {
     let NumOpsValue = "3" in
-      def K : BinaryRRFa<mnemonic#"k", opcode2, null_frag, cls1, cls1, cls2>,
+      def K : BinaryRRFa<mnemonic#"k", opcode2, operator, cls1, cls1, cls2>,
               Requires<[FeatureDistinctOps]>;
-    let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+    let NumOpsValue = "2" in
       def "" : BinaryRRE<mnemonic, opcode1, operator, cls1, cls2>;
   }
 }
@@ -3102,6 +3159,11 @@ class BinaryRRFb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let M4 = 0;
 }
 
+class BinaryRRFc<string mnemonic, bits<16> opcode,
+                 RegisterOperand cls1, RegisterOperand cls2>
+  : InstRRFc<opcode, (outs cls1:$R1), (ins cls2:$R2, imm32zx4:$M3),
+             mnemonic#"\t$R1, $R2, $M3", []>;
+
 class BinaryMemRRFc<string mnemonic, bits<16> opcode,
                     RegisterOperand cls1, RegisterOperand cls2, Immediate imm>
   : InstRRFc<opcode, (outs cls2:$R2, cls1:$R1), (ins cls1:$R1src, imm:$M3),
@@ -3169,6 +3231,41 @@ multiclass CondBinaryRRFPair<string mnemonic, bits<16> opcode,
   def Asm : AsmCondBinaryRRF<mnemonic, opcode, cls1, cls2>;
 }
 
+class CondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+                    RegisterOperand cls2, RegisterOperand cls3>
+  : InstRRFa<opcode, (outs cls1:$R1),
+             (ins cls3:$R3, cls2:$R2, cond4:$valid, cond4:$M4),
+             mnemonic#"$M4\t$R1, $R2, $R3",
+             [(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3,
+                                              cond4:$valid, cond4:$M4))]> {
+  let CCMaskLast = 1;
+}
+
+// Like CondBinaryRRFa, but used for the raw assembly form.  The condition-code
+// mask is the third operand rather than being part of the mnemonic.
+class AsmCondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+                        RegisterOperand cls2, RegisterOperand cls3>
+  : InstRRFa<opcode, (outs cls1:$R1), (ins cls3:$R3, cls2:$R2, imm32zx4:$M4),
+             mnemonic#"\t$R1, $R2, $R3, $M4", []>;
+
+// Like CondBinaryRRFa, but with a fixed CC mask.
+class FixedCondBinaryRRFa<CondVariant V, string mnemonic, bits<16> opcode,
+                         RegisterOperand cls1, RegisterOperand cls2,
+                         RegisterOperand cls3>
+  : InstRRFa<opcode, (outs cls1:$R1), (ins cls3:$R3, cls2:$R2),
+             mnemonic#V.suffix#"\t$R1, $R2, $R3", []> {
+  let isAsmParserOnly = V.alternate;
+  let M4 = V.ccmask;
+}
+
+multiclass CondBinaryRRFaPair<string mnemonic, bits<16> opcode,
+                             RegisterOperand cls1, RegisterOperand cls2,
+                             RegisterOperand cls3> {
+  let isCodeGenOnly = 1 in
+    def "" : CondBinaryRRFa<mnemonic, opcode, cls1, cls2, cls3>;
+  def Asm : AsmCondBinaryRRFa<mnemonic, opcode, cls1, cls2, cls3>;
+}
+
 class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
                RegisterOperand cls, Immediate imm>
   : InstRIa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
@@ -3189,9 +3286,9 @@ multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2,
                         Immediate imm> {
   let NumOpsKey = mnemonic in {
     let NumOpsValue = "3" in
-      def K : BinaryRIE<mnemonic##"k", opcode2, null_frag, cls, imm>,
+      def K : BinaryRIE<mnemonic##"k", opcode2, operator, cls, imm>,
               Requires<[FeatureDistinctOps]>;
-    let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+    let NumOpsValue = "2" in
       def "" : BinaryRI<mnemonic, opcode1, operator, cls, imm>;
   }
 }
@@ -3266,9 +3363,9 @@ multiclass BinaryRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
                         SDPatternOperator operator, RegisterOperand cls> {
   let NumOpsKey = mnemonic in {
     let NumOpsValue = "3" in
-      def K  : BinaryRSY<mnemonic##"k", opcode2, null_frag, cls>,
+      def K  : BinaryRSY<mnemonic##"k", opcode2, operator, cls>,
                Requires<[FeatureDistinctOps]>;
-    let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+    let NumOpsValue = "2" in
       def "" : BinaryRS<mnemonic, opcode1, operator, cls>;
   }
 }
@@ -3563,7 +3660,9 @@ class BinaryVRRf<string mnemonic, bits<16> opcode, SDPatternOperator operator,
 
 class BinaryVRRi<string mnemonic, bits<16> opcode, RegisterOperand cls>
   : InstVRRi<opcode, (outs cls:$R1), (ins VR128:$V2, imm32zx4:$M3),
-             mnemonic#"\t$R1, $V2, $M3", []>;
+             mnemonic#"\t$R1, $V2, $M3", []> {
+  let M4 = 0;
+}
 
 class BinaryVRSa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                  TypedReg tr1, TypedReg tr2, bits<4> type>
@@ -3941,6 +4040,17 @@ class SideEffectTernaryRRFa<string mnemonic, bits<16> opcode,
   let M4 = 0;
 }
 
+class SideEffectTernaryMemMemRRFa<string mnemonic, bits<16> opcode,
+                                  RegisterOperand cls1, RegisterOperand cls2,
+                                  RegisterOperand cls3>
+  : InstRRFa<opcode, (outs cls1:$R1, cls2:$R2),
+             (ins cls1:$R1src, cls2:$R2src, cls3:$R3),
+             mnemonic#"\t$R1, $R2, $R3", []> {
+  let Constraints = "$R1 = $R1src, $R2 = $R2src";
+  let DisableEncoding = "$R1src, $R2src";
+  let M4 = 0;
+}
+
 class SideEffectTernaryRRFb<string mnemonic, bits<16> opcode,
                             RegisterOperand cls1, RegisterOperand cls2,
                             RegisterOperand cls3>
@@ -4229,7 +4339,7 @@ class TernaryVRRcFloatGeneric<string mnemonic, bits<16> opcode>
              mnemonic#"\t$V1, $V2, $V3, $M4, $M5, $M6", []>;
 
 class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-                  TypedReg tr1, TypedReg tr2, bits<4> type = 0>
+                  TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m6 = 0>
   : InstVRRd<opcode, (outs tr1.op:$V1),
              (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4),
              mnemonic#"\t$V1, $V2, $V3, $V4",
@@ -4237,7 +4347,7 @@ class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                                                   (tr2.vt tr2.op:$V3),
                                                   (tr1.vt tr1.op:$V4)))]> {
   let M5 = type;
-  let M6 = 0;
+  let M6 = m6;
 }
 
 class TernaryVRRdGeneric<string mnemonic, bits<16> opcode>
@@ -4247,6 +4357,34 @@ class TernaryVRRdGeneric<string mnemonic, bits<16> opcode>
   let M6 = 0;
 }
 
+// Ternary operation where the assembler mnemonic has an extra operand to
+// optionally allow specifiying arbitrary M6 values.
+multiclass TernaryExtraVRRd<string mnemonic, bits<16> opcode,
+                             SDPatternOperator operator,
+                             TypedReg tr1, TypedReg tr2, bits<4> type> {
+  let M5 = type, Defs = [CC] in
+    def "" : InstVRRd<opcode, (outs tr1.op:$V1),
+                      (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4, imm32zx4:$M6),
+                      mnemonic#"\t$V1, $V2, $V3, $V4, $M6", []>;
+  def : Pat<(operator (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3),
+                      (tr1.vt tr1.op:$V4)),
+            (!cast<Instruction>(NAME) tr2.op:$V2, tr2.op:$V3, tr1.op:$V4, 0)>;
+  def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4",
+                  (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
+                                            tr2.op:$V3, tr1.op:$V4, 0)>;
+}
+
+multiclass TernaryExtraVRRdGeneric<string mnemonic, bits<16> opcode> {
+  let Defs = [CC] in
+    def "" : InstVRRd<opcode, (outs VR128:$V1),
+                      (ins VR128:$V2, VR128:$V3, VR128:$V4,
+                       imm32zx4:$M5, imm32zx4:$M6),
+                      mnemonic#"\t$V1, $V2, $V3, $V4, $M5, $M6", []>;
+  def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4, $M5",
+                  (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
+                                            VR128:$V4, imm32zx4:$M5, 0)>;
+}
+
 class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                   TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0>
   : InstVRRe<opcode, (outs tr1.op:$V1),
@@ -4277,6 +4415,11 @@ class TernaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let M4 = type;
 }
 
+class TernaryVRRi<string mnemonic, bits<16> opcode, RegisterOperand cls>
+  : InstVRRi<opcode, (outs cls:$R1), (ins VR128:$V2,
+                                      imm32zx4:$M3, imm32zx4:$M4),
+             mnemonic#"\t$R1, $V2, $M3, $M4", []>;
+
 class TernaryVRSbGeneric<string mnemonic, bits<16> opcode>
   : InstVRSb<opcode, (outs VR128:$V1),
              (ins VR128:$V1src, GR64:$R3, shift12only:$BD2, imm32zx4:$M4),
@@ -4594,14 +4737,31 @@ multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator,
                               RegisterOperand cls, Immediate imm> {
   let NumOpsKey = key in {
     let NumOpsValue = "3" in
-      def K : BinaryRIEPseudo<null_frag, cls, imm>,
+      def K : BinaryRIEPseudo<operator, cls, imm>,
               Requires<[FeatureHighWord, FeatureDistinctOps]>;
-    let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+    let NumOpsValue = "2" in
       def "" : BinaryRIPseudo<operator, cls, imm>,
                Requires<[FeatureHighWord]>;
   }
 }
 
+// A pseudo that is used during register allocation when folding a memory
+// operand. The 3-address register instruction with a spilled source cannot
+// be converted directly to a target 2-address reg/mem instruction.
+// Mapping:  <INSN>R  ->  MemFoldPseudo  ->  <INSN>
+class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes,
+                    AddressingMode mode>
+  : Pseudo<(outs cls:$R1), (ins cls:$R2, mode:$XBD2), []> {
+    let OpKey = mnemonic#"rk"#cls;
+    let OpType = "mem";
+    let MemKey = mnemonic#cls;
+    let MemType = "pseudo";
+    let mayLoad = 1;
+    let AccessBytes = bytes;
+    let HasIndex = 1;
+    let hasNoSchedulingInfo = 1;
+}
+
 // Like CompareRI, but expanded after RA depending on the choice of register.
 class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls,
                       Immediate imm>
@@ -4639,6 +4799,17 @@ class CondBinaryRRFPseudo<RegisterOperand cls1, RegisterOperand cls2>
   let CCMaskLast = 1;
 }
 
+// Like CondBinaryRRFa, but expanded after RA depending on the choice of
+// register.
+class CondBinaryRRFaPseudo<RegisterOperand cls1, RegisterOperand cls2,
+                           RegisterOperand cls3>
+  : Pseudo<(outs cls1:$R1),
+           (ins cls3:$R3, cls2:$R2, cond4:$valid, cond4:$M4),
+           [(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3,
+                                            cond4:$valid, cond4:$M4))]> {
+  let CCMaskLast = 1;
+}
+
 // Like CondBinaryRIE, but expanded after RA depending on the choice of
 // register.
 class CondBinaryRIEPseudo<RegisterOperand cls, Immediate imm>
@@ -4776,58 +4947,6 @@ class AtomicLoadWBinaryReg<SDPatternOperator operator>
 class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm>
   : AtomicLoadWBinary<operator, (i32 imm:$src2), imm>;
 
-// Define an instruction that operates on two fixed-length blocks of memory,
-// and associated pseudo instructions for operating on blocks of any size.
-// The Sequence form uses a straight-line sequence of instructions and
-// the Loop form uses a loop of length-256 instructions followed by
-// another instruction to handle the excess.
-multiclass MemorySS<string mnemonic, bits<8> opcode,
-                    SDPatternOperator sequence, SDPatternOperator loop> {
-  def "" : SideEffectBinarySSa<mnemonic, opcode>;
-  let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [CC] in {
-    def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
-                                       imm64:$length),
-                           [(sequence bdaddr12only:$dest, bdaddr12only:$src,
-                                      imm64:$length)]>;
-    def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
-                                   imm64:$length, GR64:$count256),
-                      [(loop bdaddr12only:$dest, bdaddr12only:$src,
-                             imm64:$length, GR64:$count256)]>;
-  }
-}
-
-// The same, but setting a CC result as comparion operator.
-multiclass CompareMemorySS<string mnemonic, bits<8> opcode,
-                          SDPatternOperator sequence, SDPatternOperator loop> {
-  def "" : SideEffectBinarySSa<mnemonic, opcode>;
-  let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
-    def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
-                                       imm64:$length),
-                           [(set CC, (sequence bdaddr12only:$dest, bdaddr12only:$src,
-                                               imm64:$length))]>;
-    def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
-                                   imm64:$length, GR64:$count256),
-                      [(set CC, (loop bdaddr12only:$dest, bdaddr12only:$src,
-                                      imm64:$length, GR64:$count256))]>;
-  }
-}
-
-// Define an instruction that operates on two strings, both terminated
-// by the character in R0.  The instruction processes a CPU-determinated
-// number of bytes at a time and sets CC to 3 if the instruction needs
-// to be repeated.  Also define a pseudo instruction that represents
-// the full loop (the main instruction plus the branch on CC==3).
-multiclass StringRRE<string mnemonic, bits<16> opcode,
-                     SDPatternOperator operator> {
-  let Uses = [R0L] in
-    def "" : SideEffectBinaryMemMemRRE<mnemonic, opcode, GR64, GR64>;
-  let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in
-    def Loop : Pseudo<(outs GR64:$end),
-                      (ins GR64:$start1, GR64:$start2, GR32:$char),
-                      [(set GR64:$end, (operator GR64:$start1, GR64:$start2,
-                                                 GR32:$char))]>;
-}
-
 // A pseudo instruction that is a direct alias of a real instruction.
 // These aliases are used in cases where a particular register operand is
 // fixed or where the same instruction is used with different register sizes.
@@ -4893,3 +5012,90 @@ class RotateSelectAliasRIEf<RegisterOperand cls1, RegisterOperand cls2>
                imm32zx6:$I5), []> {
   let Constraints = "$R1 = $R1src";
 }
+
+//===----------------------------------------------------------------------===//
+// Multiclasses that emit both real and pseudo instructions
+//===----------------------------------------------------------------------===//
+
+multiclass BinaryRXYAndPseudo<string mnemonic, bits<16> opcode,
+                              SDPatternOperator operator, RegisterOperand cls,
+                              SDPatternOperator load, bits<5> bytes,
+                              AddressingMode mode = bdxaddr20only> {
+
+  def "" : BinaryRXY<mnemonic, opcode, operator, cls, load, bytes, mode> {
+    let MemKey = mnemonic#cls;
+    let MemType = "target";
+  }
+  let Has20BitOffset = 1 in
+    def _MemFoldPseudo : MemFoldPseudo<mnemonic, cls, bytes, mode>;
+}
+
+multiclass BinaryRXPairAndPseudo<string mnemonic, bits<8> rxOpcode,
+                                 bits<16> rxyOpcode, SDPatternOperator operator,
+                                 RegisterOperand cls,
+                                 SDPatternOperator load, bits<5> bytes> {
+  let DispKey = mnemonic ## #cls in {
+    def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bytes,
+                      bdxaddr12pair> {
+      let DispSize = "12";
+      let MemKey = mnemonic#cls;
+      let MemType = "target";
+    }
+    let DispSize = "20" in
+      def Y  : BinaryRXY<mnemonic#"y", rxyOpcode, operator, cls, load,
+                         bytes, bdxaddr20pair>;
+  }
+  def _MemFoldPseudo : MemFoldPseudo<mnemonic, cls, bytes, bdxaddr12pair>;
+}
+
+// Define an instruction that operates on two fixed-length blocks of memory,
+// and associated pseudo instructions for operating on blocks of any size.
+// The Sequence form uses a straight-line sequence of instructions and
+// the Loop form uses a loop of length-256 instructions followed by
+// another instruction to handle the excess.
+multiclass MemorySS<string mnemonic, bits<8> opcode,
+                    SDPatternOperator sequence, SDPatternOperator loop> {
+  def "" : SideEffectBinarySSa<mnemonic, opcode>;
+  let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [CC] in {
+    def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+                                       imm64:$length),
+                           [(sequence bdaddr12only:$dest, bdaddr12only:$src,
+                                      imm64:$length)]>;
+    def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+                                   imm64:$length, GR64:$count256),
+                      [(loop bdaddr12only:$dest, bdaddr12only:$src,
+                             imm64:$length, GR64:$count256)]>;
+  }
+}
+
+// The same, but setting a CC result as comparion operator.
+multiclass CompareMemorySS<string mnemonic, bits<8> opcode,
+                          SDPatternOperator sequence, SDPatternOperator loop> {
+  def "" : SideEffectBinarySSa<mnemonic, opcode>;
+  let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
+    def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+                                       imm64:$length),
+                           [(set CC, (sequence bdaddr12only:$dest, bdaddr12only:$src,
+                                               imm64:$length))]>;
+    def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+                                   imm64:$length, GR64:$count256),
+                      [(set CC, (loop bdaddr12only:$dest, bdaddr12only:$src,
+                                      imm64:$length, GR64:$count256))]>;
+  }
+}
+
+// Define an instruction that operates on two strings, both terminated
+// by the character in R0.  The instruction processes a CPU-determinated
+// number of bytes at a time and sets CC to 3 if the instruction needs
+// to be repeated.  Also define a pseudo instruction that represents
+// the full loop (the main instruction plus the branch on CC==3).
+multiclass StringRRE<string mnemonic, bits<16> opcode,
+                     SDPatternOperator operator> {
+  let Uses = [R0L] in
+    def "" : SideEffectBinaryMemMemRRE<mnemonic, opcode, GR64, GR64>;
+  let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in
+    def Loop : Pseudo<(outs GR64:$end),
+                      (ins GR64:$start1, GR64:$start2, GR32:$char),
+                      [(set GR64:$end, (operator GR64:$start1, GR64:$start2,
+                                                 GR32:$char))]>;
+}
diff --git a/lib/Target/SystemZ/SystemZInstrHFP.td b/lib/Target/SystemZ/SystemZInstrHFP.td
index 6d5b4b92f650..2e3c9932d621 100644
--- a/lib/Target/SystemZ/SystemZInstrHFP.td
+++ b/lib/Target/SystemZ/SystemZInstrHFP.td
@@ -1,9 +1,8 @@
 //==- SystemZInstrHFP.td - Floating-point SystemZ instructions -*- tblgen-*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index b03b4edaa4ab..57c1cf4ec70a 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZInstrInfo.cpp - SystemZ instruction information ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -169,11 +168,13 @@ void SystemZInstrInfo::expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode,
   if (!DestIsHigh && !SrcIsHigh)
     MI.setDesc(get(LowOpcodeK));
   else {
-    emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, SrcReg,
-                  SystemZ::LR, 32, MI.getOperand(1).isKill(),
-                  MI.getOperand(1).isUndef());
+    if (DestReg != SrcReg) {
+      emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, SrcReg,
+                    SystemZ::LR, 32, MI.getOperand(1).isKill(),
+                    MI.getOperand(1).isUndef());
+      MI.getOperand(1).setReg(DestReg);
+    }
     MI.setDesc(get(DestIsHigh ? HighOpcode : LowOpcode));
-    MI.getOperand(1).setReg(DestReg);
     MI.tieOperands(0, 1);
   }
 }
@@ -222,6 +223,65 @@ void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
   // correctly.  This change is defered to the SystemZExpandPseudo pass.
 }
 
+// MI is a select pseudo instruction.  Replace it with LowOpcode if source
+// and destination are all low GR32s and HighOpcode if source and destination
+// are all high GR32s.  Otherwise, use the two-operand MixedOpcode.
+void SystemZInstrInfo::expandSELRPseudo(MachineInstr &MI, unsigned LowOpcode,
+                                        unsigned HighOpcode,
+                                        unsigned MixedOpcode) const {
+  unsigned DestReg = MI.getOperand(0).getReg();
+  unsigned Src1Reg = MI.getOperand(1).getReg();
+  unsigned Src2Reg = MI.getOperand(2).getReg();
+  bool DestIsHigh = isHighReg(DestReg);
+  bool Src1IsHigh = isHighReg(Src1Reg);
+  bool Src2IsHigh = isHighReg(Src2Reg);
+
+  // If sources and destination aren't all high or all low, we may be able to
+  // simplify the operation by moving one of the sources to the destination
+  // first.  But only if this doesn't clobber the other source.
+  if (DestReg != Src1Reg && DestReg != Src2Reg) {
+    if (DestIsHigh != Src1IsHigh) {
+      emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, Src1Reg,
+                    SystemZ::LR, 32, MI.getOperand(1).isKill(),
+                    MI.getOperand(1).isUndef());
+      MI.getOperand(1).setReg(DestReg);
+      Src1Reg = DestReg;
+      Src1IsHigh = DestIsHigh;
+    } else if (DestIsHigh != Src2IsHigh) {
+      emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, Src2Reg,
+                    SystemZ::LR, 32, MI.getOperand(2).isKill(),
+                    MI.getOperand(2).isUndef());
+      MI.getOperand(2).setReg(DestReg);
+      Src2Reg = DestReg;
+      Src2IsHigh = DestIsHigh;
+    }
+  }
+
+  // If the destination (now) matches one source, prefer this to be first.
+  if (DestReg != Src1Reg && DestReg == Src2Reg) {
+    commuteInstruction(MI, false, 1, 2);
+    std::swap(Src1Reg, Src2Reg);
+    std::swap(Src1IsHigh, Src2IsHigh);
+  }
+
+  if (!DestIsHigh && !Src1IsHigh && !Src2IsHigh)
+    MI.setDesc(get(LowOpcode));
+  else if (DestIsHigh && Src1IsHigh && Src2IsHigh)
+    MI.setDesc(get(HighOpcode));
+  else {
+    // Given the simplifcation above, we must already have a two-operand case.
+    assert (DestReg == Src1Reg);
+    MI.setDesc(get(MixedOpcode));
+    MI.tieOperands(0, 1);
+    LOCRMuxJumps++;
+  }
+
+  // If we were unable to implement the pseudo with a single instruction, we
+  // need to convert it back into a branch sequence.  This cannot be done here
+  // since the caller of expandPostRAPseudo does not handle changes to the CFG
+  // correctly.  This change is defered to the SystemZExpandPseudo pass.
+}
+
 // MI is an RR-style pseudo instruction that zero-extends the low Size bits
 // of one GRX32 into another.  Replace it with LowOpcode if both operands
 // are low registers, otherwise use RISB[LH]G.
@@ -311,6 +371,10 @@ MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI,
   };
 
   switch (MI.getOpcode()) {
+  case SystemZ::SELRMux:
+  case SystemZ::SELFHR:
+  case SystemZ::SELR:
+  case SystemZ::SELGR:
   case SystemZ::LOCRMux:
   case SystemZ::LOCFHR:
   case SystemZ::LOCR:
@@ -557,80 +621,6 @@ bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
   return false;
 }
 
-// If Reg is a virtual register, return its definition, otherwise return null.
-static MachineInstr *getDef(unsigned Reg,
-                            const MachineRegisterInfo *MRI) {
-  if (TargetRegisterInfo::isPhysicalRegister(Reg))
-    return nullptr;
-  return MRI->getUniqueVRegDef(Reg);
-}
-
-// Return true if MI is a shift of type Opcode by Imm bits.
-static bool isShift(MachineInstr *MI, unsigned Opcode, int64_t Imm) {
-  return (MI->getOpcode() == Opcode &&
-          !MI->getOperand(2).getReg() &&
-          MI->getOperand(3).getImm() == Imm);
-}
-
-// If the destination of MI has no uses, delete it as dead.
-static void eraseIfDead(MachineInstr *MI, const MachineRegisterInfo *MRI) {
-  if (MRI->use_nodbg_empty(MI->getOperand(0).getReg()))
-    MI->eraseFromParent();
-}
-
-// Compare compares SrcReg against zero.  Check whether SrcReg contains
-// the result of an IPM sequence whose input CC survives until Compare,
-// and whether Compare is therefore redundant.  Delete it and return
-// true if so.
-static bool removeIPMBasedCompare(MachineInstr &Compare, unsigned SrcReg,
-                                  const MachineRegisterInfo *MRI,
-                                  const TargetRegisterInfo *TRI) {
-  MachineInstr *LGFR = nullptr;
-  MachineInstr *RLL = getDef(SrcReg, MRI);
-  if (RLL && RLL->getOpcode() == SystemZ::LGFR) {
-    LGFR = RLL;
-    RLL = getDef(LGFR->getOperand(1).getReg(), MRI);
-  }
-  if (!RLL || !isShift(RLL, SystemZ::RLL, 31))
-    return false;
-
-  MachineInstr *SRL = getDef(RLL->getOperand(1).getReg(), MRI);
-  if (!SRL || !isShift(SRL, SystemZ::SRL, SystemZ::IPM_CC))
-    return false;
-
-  MachineInstr *IPM = getDef(SRL->getOperand(1).getReg(), MRI);
-  if (!IPM || IPM->getOpcode() != SystemZ::IPM)
-    return false;
-
-  // Check that there are no assignments to CC between the IPM and Compare,
-  if (IPM->getParent() != Compare.getParent())
-    return false;
-  MachineBasicBlock::iterator MBBI = IPM, MBBE = Compare.getIterator();
-  for (++MBBI; MBBI != MBBE; ++MBBI) {
-    MachineInstr &MI = *MBBI;
-    if (MI.modifiesRegister(SystemZ::CC, TRI))
-      return false;
-  }
-
-  Compare.eraseFromParent();
-  if (LGFR)
-    eraseIfDead(LGFR, MRI);
-  eraseIfDead(RLL, MRI);
-  eraseIfDead(SRL, MRI);
-  eraseIfDead(IPM, MRI);
-
-  return true;
-}
-
-bool SystemZInstrInfo::optimizeCompareInstr(
-    MachineInstr &Compare, unsigned SrcReg, unsigned SrcReg2, int Mask,
-    int Value, const MachineRegisterInfo *MRI) const {
-  assert(!SrcReg2 && "Only optimizing constant comparisons so far");
-  bool IsLogical = (Compare.getDesc().TSFlags & SystemZII::IsLogical) != 0;
-  return Value == 0 && !IsLogical &&
-         removeIPMBasedCompare(Compare, SrcReg, MRI, &RI);
-}
-
 bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
                                        ArrayRef<MachineOperand> Pred,
                                        unsigned TrueReg, unsigned FalseReg,
@@ -679,7 +669,9 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
 
   unsigned Opc;
   if (SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) {
-    if (STI.hasLoadStoreOnCond2())
+    if (STI.hasMiscellaneousExtensions3())
+      Opc = SystemZ::SELRMux;
+    else if (STI.hasLoadStoreOnCond2())
       Opc = SystemZ::LOCRMux;
     else {
       Opc = SystemZ::LOCR;
@@ -691,9 +683,12 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
       TrueReg = TReg;
       FalseReg = FReg;
     }
-  } else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC))
-    Opc = SystemZ::LOCGR;
-  else
+  } else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC)) {
+    if (STI.hasMiscellaneousExtensions3())
+      Opc = SystemZ::SELGR;
+    else
+      Opc = SystemZ::LOCGR;
+  } else
     llvm_unreachable("Invalid register class");
 
   BuildMI(MBB, I, DL, get(Opc), DstReg)
@@ -716,7 +711,11 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
   unsigned NewUseOpc;
   unsigned UseIdx;
   int CommuteIdx = -1;
+  bool TieOps = false;
   switch (UseOpc) {
+  case SystemZ::SELRMux:
+    TieOps = true;
+    LLVM_FALLTHROUGH;
   case SystemZ::LOCRMux:
     if (!STI.hasLoadStoreOnCond2())
       return false;
@@ -728,6 +727,9 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
     else
       return false;
     break;
+  case SystemZ::SELGR:
+    TieOps = true;
+    LLVM_FALLTHROUGH;
   case SystemZ::LOCGR:
     if (!STI.hasLoadStoreOnCond2())
       return false;
@@ -749,6 +751,8 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
 
   bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
   UseMI.setDesc(get(NewUseOpc));
+  if (TieOps)
+    UseMI.tieOperands(0, 1);
   UseMI.getOperand(UseIdx).ChangeToImmediate(ImmVal);
   if (DeleteDef)
     DefMI.eraseFromParent();
@@ -1032,73 +1036,13 @@ static void transferDeadCC(MachineInstr *OldMI, MachineInstr *NewMI) {
   }
 }
 
-// Used to return from convertToThreeAddress after replacing two-address
-// instruction OldMI with three-address instruction NewMI.
-static MachineInstr *finishConvertToThreeAddress(MachineInstr *OldMI,
-                                                 MachineInstr *NewMI,
-                                                 LiveVariables *LV) {
-  if (LV) {
-    unsigned NumOps = OldMI->getNumOperands();
-    for (unsigned I = 1; I < NumOps; ++I) {
-      MachineOperand &Op = OldMI->getOperand(I);
-      if (Op.isReg() && Op.isKill())
-        LV->replaceKillInstruction(Op.getReg(), *OldMI, *NewMI);
-    }
-  }
-  transferDeadCC(OldMI, NewMI);
-  return NewMI;
-}
-
 MachineInstr *SystemZInstrInfo::convertToThreeAddress(
     MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const {
   MachineBasicBlock *MBB = MI.getParent();
-  MachineFunction *MF = MBB->getParent();
-  MachineRegisterInfo &MRI = MF->getRegInfo();
-
-  unsigned Opcode = MI.getOpcode();
-  unsigned NumOps = MI.getNumOperands();
-
-  // Try to convert something like SLL into SLLK, if supported.
-  // We prefer to keep the two-operand form where possible both
-  // because it tends to be shorter and because some instructions
-  // have memory forms that can be used during spilling.
-  if (STI.hasDistinctOps()) {
-    MachineOperand &Dest = MI.getOperand(0);
-    MachineOperand &Src = MI.getOperand(1);
-    unsigned DestReg = Dest.getReg();
-    unsigned SrcReg = Src.getReg();
-    // AHIMux is only really a three-operand instruction when both operands
-    // are low registers.  Try to constrain both operands to be low if
-    // possible.
-    if (Opcode == SystemZ::AHIMux &&
-        TargetRegisterInfo::isVirtualRegister(DestReg) &&
-        TargetRegisterInfo::isVirtualRegister(SrcReg) &&
-        MRI.getRegClass(DestReg)->contains(SystemZ::R1L) &&
-        MRI.getRegClass(SrcReg)->contains(SystemZ::R1L)) {
-      MRI.constrainRegClass(DestReg, &SystemZ::GR32BitRegClass);
-      MRI.constrainRegClass(SrcReg, &SystemZ::GR32BitRegClass);
-    }
-    int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode);
-    if (ThreeOperandOpcode >= 0) {
-      // Create three address instruction without adding the implicit
-      // operands. Those will instead be copied over from the original
-      // instruction by the loop below.
-      MachineInstrBuilder MIB(
-          *MF, MF->CreateMachineInstr(get(ThreeOperandOpcode), MI.getDebugLoc(),
-                                      /*NoImplicit=*/true));
-      MIB.add(Dest);
-      // Keep the kill state, but drop the tied flag.
-      MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg());
-      // Keep the remaining operands as-is.
-      for (unsigned I = 2; I < NumOps; ++I)
-        MIB.add(MI.getOperand(I));
-      MBB->insert(MI, MIB);
-      return finishConvertToThreeAddress(&MI, MIB, LV);
-    }
-  }
 
   // Try to convert an AND into an RISBG-type instruction.
-  if (LogicOp And = interpretAndImmediate(Opcode)) {
+  // TODO: It might be beneficial to select RISBG and shorten to AND instead.
+  if (LogicOp And = interpretAndImmediate(MI.getOpcode())) {
     uint64_t Imm = MI.getOperand(2).getImm() << And.ImmLSB;
     // AND IMMEDIATE leaves the other bits of the register unchanged.
     Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB);
@@ -1126,7 +1070,16 @@ MachineInstr *SystemZInstrInfo::convertToThreeAddress(
               .addImm(Start)
               .addImm(End + 128)
               .addImm(0);
-      return finishConvertToThreeAddress(&MI, MIB, LV);
+      if (LV) {
+        unsigned NumOps = MI.getNumOperands();
+        for (unsigned I = 1; I < NumOps; ++I) {
+          MachineOperand &Op = MI.getOperand(I);
+          if (Op.isReg() && Op.isKill())
+            LV->replaceKillInstruction(Op.getReg(), MI, *MIB);
+        }
+      }
+      transferDeadCC(&MI, MIB);
+      return MIB;
     }
   }
   return nullptr;
@@ -1135,7 +1088,7 @@ MachineInstr *SystemZInstrInfo::convertToThreeAddress(
 MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
     MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
     MachineBasicBlock::iterator InsertPt, int FrameIndex,
-    LiveIntervals *LIS) const {
+    LiveIntervals *LIS, VirtRegMap *VRM) const {
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   unsigned Size = MFI.getObjectSize(FrameIndex);
@@ -1263,7 +1216,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
   // MVCs that turn out to be redundant.
   if (OpNum == 0 && MI.hasOneMemOperand()) {
     MachineMemOperand *MMO = *MI.memoperands_begin();
-    if (MMO->getSize() == Size && !MMO->isVolatile()) {
+    if (MMO->getSize() == Size && !MMO->isVolatile() && !MMO->isAtomic()) {
       // Handle conversion of loads.
       if (isSimpleBD12Move(&MI, SystemZII::SimpleBDXLoad)) {
         return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
@@ -1289,12 +1242,37 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
     }
   }
 
-  // If the spilled operand is the final one, try to change <INSN>R
-  // into <INSN>.
+  // If the spilled operand is the final one or the instruction is
+  // commutable, try to change <INSN>R into <INSN>.
+  unsigned NumOps = MI.getNumExplicitOperands();
   int MemOpcode = SystemZ::getMemOpcode(Opcode);
+
+  // See if this is a 3-address instruction that is convertible to 2-address
+  // and suitable for folding below.  Only try this with virtual registers
+  // and a provided VRM (during regalloc).
+  bool NeedsCommute = false;
+  if (SystemZ::getTwoOperandOpcode(Opcode) != -1 && MemOpcode != -1) {
+    if (VRM == nullptr)
+      MemOpcode = -1;
+    else {
+      assert(NumOps == 3 && "Expected two source registers.");
+      Register DstReg = MI.getOperand(0).getReg();
+      Register DstPhys =
+        (TRI->isVirtualRegister(DstReg) ? VRM->getPhys(DstReg) : DstReg);
+      Register SrcReg = (OpNum == 2 ? MI.getOperand(1).getReg()
+                                    : ((OpNum == 1 && MI.isCommutable())
+                                           ? MI.getOperand(2).getReg()
+                                         : Register()));
+      if (DstPhys && !SystemZ::GRH32BitRegClass.contains(DstPhys) && SrcReg &&
+          TRI->isVirtualRegister(SrcReg) && DstPhys == VRM->getPhys(SrcReg))
+        NeedsCommute = (OpNum == 1);
+      else
+        MemOpcode = -1;
+    }
+  }
+
   if (MemOpcode >= 0) {
-    unsigned NumOps = MI.getNumExplicitOperands();
-    if (OpNum == NumOps - 1) {
+    if ((OpNum == NumOps - 1) || NeedsCommute) {
       const MCInstrDesc &MemDesc = get(MemOpcode);
       uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags);
       assert(AccessBytes != 0 && "Size of access should be known");
@@ -1302,8 +1280,12 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
       uint64_t Offset = Size - AccessBytes;
       MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
                                         MI.getDebugLoc(), get(MemOpcode));
-      for (unsigned I = 0; I < OpNum; ++I)
-        MIB.add(MI.getOperand(I));
+      MIB.add(MI.getOperand(0));
+      if (NeedsCommute)
+        MIB.add(MI.getOperand(2));
+      else
+        for (unsigned I = 1; I < OpNum; ++I)
+          MIB.add(MI.getOperand(I));
       MIB.addFrameIndex(FrameIndex).addImm(Offset);
       if (MemDesc.TSFlags & SystemZII::HasIndex)
         MIB.addReg(0);
@@ -1380,6 +1362,11 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR);
     return true;
 
+  case SystemZ::SELRMux:
+    expandSELRPseudo(MI, SystemZ::SELR, SystemZ::SELFHR,
+                         SystemZ::LOCRMux);
+    return true;
+
   case SystemZ::STCMux:
     expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH);
     return true;
@@ -1506,7 +1493,7 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
 }
 
 unsigned SystemZInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
-  if (MI.getOpcode() == TargetOpcode::INLINEASM) {
+  if (MI.isInlineAsm()) {
     const MachineFunction *MF = MI.getParent()->getParent();
     const char *AsmStr = MI.getOperand(0).getSymbolName();
     return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
@@ -1857,7 +1844,8 @@ void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB,
 }
 
 bool SystemZInstrInfo::
-areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
+areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+                                const MachineInstr &MIb,
                                 AliasAnalysis *AA) const {
 
   if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand())
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index 216139eb7c79..2edde175542e 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -1,9 +1,8 @@
 //===-- SystemZInstrInfo.h - SystemZ instruction information ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -142,6 +141,11 @@ enum FusedCompareType {
 
 } // end namespace SystemZII
 
+namespace SystemZ {
+int getTwoOperandOpcode(uint16_t Opcode);
+int getTargetMemOpcode(uint16_t Opcode);
+}
+
 class SystemZInstrInfo : public SystemZGenInstrInfo {
   const SystemZRegisterInfo RI;
   SystemZSubtarget &STI;
@@ -158,6 +162,8 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
                        unsigned HighOpcode) const;
   void expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
                         unsigned HighOpcode) const;
+  void expandSELRPseudo(MachineInstr &MI, unsigned LowOpcode,
+                        unsigned HighOpcode, unsigned MixedOpcode) const;
   void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
                         unsigned Size) const;
   void expandLoadStackGuard(MachineInstr *MI) const;
@@ -208,9 +214,6 @@ public:
                         int *BytesAdded = nullptr) const override;
   bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
                       unsigned &SrcReg2, int &Mask, int &Value) const override;
-  bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
-                            unsigned SrcReg2, int Mask, int Value,
-                            const MachineRegisterInfo *MRI) const override;
   bool canInsertSelect(const MachineBasicBlock&, ArrayRef<MachineOperand> Cond,
                        unsigned, unsigned, int&, int&, int&) const override;
   void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
@@ -252,7 +255,8 @@ public:
   foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
                         ArrayRef<unsigned> Ops,
                         MachineBasicBlock::iterator InsertPt, int FrameIndex,
-                        LiveIntervals *LIS = nullptr) const override;
+                        LiveIntervals *LIS = nullptr,
+                        VirtRegMap *VRM = nullptr) const override;
   MachineInstr *foldMemoryOperandImpl(
       MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
       MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
@@ -317,7 +321,8 @@ public:
   // addresses. This function returns true if two MIs access different
   // memory addresses and false otherwise.
   bool
-  areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
+  areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+                                  const MachineInstr &MIb,
                                   AliasAnalysis *AA = nullptr) const override;
 };
 
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 8d3b1011d0a7..91856893e3bd 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1,9 +1,8 @@
 //===-- SystemZInstrInfo.td - General SystemZ instructions ----*- tblgen-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -256,7 +255,7 @@ let isCall = 1, Defs = [CC] in {
 }
 
 // Regular calls.
-let isCall = 1, Defs = [R14D, CC] in {
+let isCall = 1, Defs = [R14D, CC], Uses = [FPC] in {
   def CallBRASL : Alias<6, (outs), (ins pcrel32:$I2, variable_ops),
                         [(z_call pcrel32:$I2)]>;
   def CallBASR  : Alias<2, (outs), (ins ADDR64:$R2, variable_ops),
@@ -362,9 +361,6 @@ defm CondStore64 : CondStores<GR64, nonvolatile_store,
 //===----------------------------------------------------------------------===//
 
 // Register moves.
-// Expands to LR, RISBHG or RISBLG, depending on the choice of registers.
-def LRMux : UnaryRRPseudo<"lr", null_frag, GRX32, GRX32>,
-            Requires<[FeatureHighWord]>;
 def LR  : UnaryRR <"lr",  0x18,   null_frag, GR32, GR32>;
 def LGR : UnaryRRE<"lgr", 0xB904, null_frag, GR64, GR64>;
 
@@ -478,6 +474,11 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in {
   def MVCLU : SideEffectTernaryMemMemRSY<"mvclu", 0xEB8E, GR128, GR128>;
 }
 
+// Move right.
+let Predicates = [FeatureMiscellaneousExtensions3],
+    mayLoad = 1, mayStore = 1, Uses = [R0L] in
+  def MVCRL : SideEffectBinarySSE<"mvcrl", 0xE50A>;
+
 // String moves.
 let mayLoad = 1, mayStore = 1, Defs = [CC] in
   defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>;
@@ -486,6 +487,29 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in
 // Conditional move instructions
 //===----------------------------------------------------------------------===//
 
+let Predicates = [FeatureMiscellaneousExtensions3], Uses = [CC] in {
+  // Select.
+  let isCommutable = 1 in {
+    // Expands to SELR or SELFHR or a branch-and-move sequence,
+    // depending on the choice of registers.
+    def  SELRMux : CondBinaryRRFaPseudo<GRX32, GRX32, GRX32>;
+    defm SELFHR  : CondBinaryRRFaPair<"selfhr", 0xB9C0, GRH32, GRH32, GRH32>;
+    defm SELR    : CondBinaryRRFaPair<"selr",   0xB9F0, GR32, GR32, GR32>;
+    defm SELGR   : CondBinaryRRFaPair<"selgr",  0xB9E3, GR64, GR64, GR64>;
+  }
+
+  // Define AsmParser extended mnemonics for each general condition-code mask.
+  foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE",
+                "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in {
+    def SELRAsm#V   : FixedCondBinaryRRFa<CV<V>, "selr",   0xB9F0,
+                                          GR32, GR32, GR32>;
+    def SELFHRAsm#V : FixedCondBinaryRRFa<CV<V>, "selfhr", 0xB9C0,
+                                          GRH32, GRH32, GRH32>;
+    def SELGRAsm#V  : FixedCondBinaryRRFa<CV<V>, "selgr",  0xB9E3,
+                                          GR64, GR64, GR64>;
+  }
+}
+
 let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in {
   // Load immediate on condition.  Matched via DAG pattern and created
   // by the PeepholeOptimizer via FoldImmediate.
@@ -920,11 +944,11 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
 
   // Addition of memory.
   defm AH  : BinaryRXPair<"ah", 0x4A, 0xE37A, z_sadd, GR32, asextloadi16, 2>;
-  defm A   : BinaryRXPair<"a",  0x5A, 0xE35A, z_sadd, GR32, load, 4>;
+  defm A   : BinaryRXPairAndPseudo<"a",  0x5A, 0xE35A, z_sadd, GR32, load, 4>;
   def  AGH : BinaryRXY<"agh", 0xE338, z_sadd, GR64, asextloadi16, 2>,
              Requires<[FeatureMiscellaneousExtensions2]>;
   def  AGF : BinaryRXY<"agf", 0xE318, z_sadd, GR64, asextloadi32, 4>;
-  def  AG  : BinaryRXY<"ag",  0xE308, z_sadd, GR64, load, 8>;
+  defm AG  : BinaryRXYAndPseudo<"ag",  0xE308, z_sadd, GR64, load, 8>;
 
   // Addition to memory.
   def ASI  : BinarySIY<"asi",  0xEB6A, add, imm32sx8>;
@@ -962,9 +986,9 @@ let Defs = [CC] in {
               Requires<[FeatureHighWord]>;
 
   // Addition of memory.
-  defm AL   : BinaryRXPair<"al", 0x5E, 0xE35E, z_uadd, GR32, load, 4>;
+  defm AL   : BinaryRXPairAndPseudo<"al", 0x5E, 0xE35E, z_uadd, GR32, load, 4>;
   def  ALGF : BinaryRXY<"algf", 0xE31A, z_uadd, GR64, azextloadi32, 4>;
-  def  ALG  : BinaryRXY<"alg",  0xE30A, z_uadd, GR64, load, 8>;
+  defm ALG  : BinaryRXYAndPseudo<"alg",  0xE30A, z_uadd, GR64, load, 8>;
 
   // Addition to memory.
   def ALSI  : BinarySIY<"alsi",  0xEB6E, null_frag, imm32sx8>;
@@ -1007,11 +1031,11 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
 
   // Subtraction of memory.
   defm SH  : BinaryRXPair<"sh", 0x4B, 0xE37B, z_ssub, GR32, asextloadi16, 2>;
-  defm S   : BinaryRXPair<"s", 0x5B, 0xE35B, z_ssub, GR32, load, 4>;
+  defm S   : BinaryRXPairAndPseudo<"s", 0x5B, 0xE35B, z_ssub, GR32, load, 4>;
   def  SGH : BinaryRXY<"sgh", 0xE339, z_ssub, GR64, asextloadi16, 2>,
              Requires<[FeatureMiscellaneousExtensions2]>;
   def  SGF : BinaryRXY<"sgf", 0xE319, z_ssub, GR64, asextloadi32, 4>;
-  def  SG  : BinaryRXY<"sg",  0xE309, z_ssub, GR64, load, 8>;
+  defm SG  : BinaryRXYAndPseudo<"sg",  0xE309, z_ssub, GR64, load, 8>;
 }
 defm : SXB<z_ssub, GR64, SGFR>;
 
@@ -1033,6 +1057,14 @@ let AddedComplexity = 1 in {
             (AGFI GR64:$src1, imm64sx32n:$src2)>;
 }
 
+// And vice versa in one special case, where we need to load a
+// constant into a register in any case, but the negated constant
+// requires fewer instructions to load.
+def : Pat<(z_saddo GR64:$src1, imm64lh16n:$src2),
+          (SGR GR64:$src1, (LLILH imm64lh16n:$src2))>;
+def : Pat<(z_saddo GR64:$src1, imm64lf32n:$src2),
+          (SGR GR64:$src1, (LLILF imm64lf32n:$src2))>;
+
 // Subtraction producing a carry.
 let Defs = [CC] in {
   // Subtraction of a register.
@@ -1051,9 +1083,9 @@ let Defs = [CC] in {
   def SLGFI : BinaryRIL<"slgfi", 0xC24, z_usub, GR64, imm64zx32>;
 
   // Subtraction of memory.
-  defm SL   : BinaryRXPair<"sl", 0x5F, 0xE35F, z_usub, GR32, load, 4>;
+  defm SL   : BinaryRXPairAndPseudo<"sl", 0x5F, 0xE35F, z_usub, GR32, load, 4>;
   def  SLGF : BinaryRXY<"slgf", 0xE31B, z_usub, GR64, azextloadi32, 4>;
-  def  SLG  : BinaryRXY<"slg",  0xE30B, z_usub, GR64, load, 8>;
+  defm SLG  : BinaryRXYAndPseudo<"slg",  0xE30B, z_usub, GR64, load, 8>;
 }
 defm : ZXB<z_usub, GR64, SLGFR>;
 
@@ -1128,8 +1160,8 @@ let Defs = [CC] in {
 
   // ANDs of memory.
   let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
-    defm N  : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
-    def  NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
+    defm N  : BinaryRXPairAndPseudo<"n", 0x54, 0xE354, and, GR32, load, 4>;
+    defm NG : BinaryRXYAndPseudo<"ng", 0xE380, and, GR64, load, 8>;
   }
 
   // AND to memory
@@ -1185,8 +1217,8 @@ let Defs = [CC] in {
 
   // ORs of memory.
   let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
-    defm O  : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>;
-    def  OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>;
+    defm O  : BinaryRXPairAndPseudo<"o", 0x56, 0xE356, or, GR32, load, 4>;
+    defm OG : BinaryRXYAndPseudo<"og", 0xE381, or, GR64, load, 8>;
   }
 
   // OR to memory
@@ -1225,8 +1257,8 @@ let Defs = [CC] in {
 
   // XORs of memory.
   let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
-    defm X  : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>;
-    def  XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>;
+    defm X  : BinaryRXPairAndPseudo<"x",0x57, 0xE357, xor, GR32, load, 4>;
+    defm XG : BinaryRXYAndPseudo<"xg", 0xE382, xor, GR64, load, 8>;
   }
 
   // XOR to memory
@@ -1239,6 +1271,43 @@ let Defs = [CC] in {
 defm : RMWIByte<xor, bdaddr12pair, XI>;
 defm : RMWIByte<xor, bdaddr20pair, XIY>;
 
+//===----------------------------------------------------------------------===//
+// Combined logical operations
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureMiscellaneousExtensions3],
+    Defs = [CC] in {
+  // AND with complement.
+  let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+    def NCRK : BinaryRRFa<"ncrk", 0xB9F5, andc, GR32, GR32, GR32>;
+    def NCGRK : BinaryRRFa<"ncgrk", 0xB9E5, andc, GR64, GR64, GR64>;
+  }
+
+  // OR with complement.
+  let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+    def OCRK : BinaryRRFa<"ocrk", 0xB975, orc, GR32, GR32, GR32>;
+    def OCGRK : BinaryRRFa<"ocgrk", 0xB965, orc, GR64, GR64, GR64>;
+  }
+
+  // NAND.
+  let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+    def NNRK : BinaryRRFa<"nnrk", 0xB974, nand, GR32, GR32, GR32>;
+    def NNGRK : BinaryRRFa<"nngrk", 0xB964, nand, GR64, GR64, GR64>;
+  }
+
+  // NOR.
+  let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+    def NORK : BinaryRRFa<"nork", 0xB976, nor, GR32, GR32, GR32>;
+    def NOGRK : BinaryRRFa<"nogrk", 0xB966, nor, GR64, GR64, GR64>;
+  }
+
+  // NXOR.
+  let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+    def NXRK : BinaryRRFa<"nxrk", 0xB977, nxor, GR32, GR32, GR32>;
+    def NXGRK : BinaryRRFa<"nxgrk", 0xB967, nxor, GR64, GR64, GR64>;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Multiplication
 //===----------------------------------------------------------------------===//
@@ -1833,6 +1902,9 @@ let mayLoad = 1, mayStore = 1, Uses = [R0L, R1D], Defs = [CC] in {
   let Predicates = [FeatureMessageSecurityAssist8] in
     def KMA : SideEffectTernaryMemMemMemRRFb<"kma", 0xB929,
                                               GR128, GR128, GR128>;
+
+  let Predicates = [FeatureMessageSecurityAssist9] in
+    def KDSA : SideEffectBinaryMemRRE<"kdsa", 0xB93A, GR64, GR128>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2013,7 +2085,12 @@ let Defs = [CC] in
 def : Pat<(ctlz GR64:$src),
           (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
 
-// Population count.  Counts bits set per byte.
+// Population count.  Counts bits set per byte or doubleword.
+let Predicates = [FeatureMiscellaneousExtensions3] in {
+  let Defs = [CC] in
+    def POPCNTOpt : BinaryRRFc<"popcnt", 0xB9E1, GR64, GR64>;
+  def : Pat<(ctpop GR64:$src), (POPCNTOpt GR64:$src, 8)>;
+}
 let Predicates = [FeaturePopulationCount], Defs = [CC] in
   def POPCNT : UnaryRRE<"popcnt", 0xB9E1, z_popcnt, GR64, GR64>;
 
@@ -2044,6 +2121,17 @@ let mayLoad = 1, Defs = [CC] in
 let mayLoad = 1, mayStore = 1, Defs = [CC, R1D], Uses = [R0L, R1D] in
   def CMPSC : SideEffectBinaryMemMemRRE<"cmpsc", 0xB263, GR128, GR128>;
 
+// Sort lists.
+let Predicates = [FeatureEnhancedSort],
+    mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L, R1D] in
+  def SORTL : SideEffectBinaryMemMemRRE<"sortl", 0xB938, GR128, GR128>;
+
+// Deflate conversion call.
+let Predicates = [FeatureDeflateConversion],
+    mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L, R1D] in
+  def DFLTCC : SideEffectTernaryMemMemRRFa<"dfltcc", 0xB939,
+                                           GR128, GR128, GR64>;
+
 // Execute.
 let hasSideEffects = 1 in {
   def EX   : SideEffectBinaryRX<"ex", 0x44, GR64>;
@@ -2186,6 +2274,22 @@ let AddedComplexity = 4 in {
             (RLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
 }
 
+// Substitute (x*64-s) with (-s), since shift/rotate instructions only
+// use the last 6 bits of the second operand register (making it modulo 64).
+let AddedComplexity = 4 in {
+  def : Pat<(shl GR64:$val, (sub imm32mod64,  GR32:$shift)),
+            (SLLG GR64:$val, (LCR GR32:$shift), 0)>;
+
+  def : Pat<(sra GR64:$val, (sub imm32mod64,  GR32:$shift)),
+            (SRAG GR64:$val, (LCR GR32:$shift), 0)>;
+
+  def : Pat<(srl GR64:$val, (sub imm32mod64,  GR32:$shift)),
+            (SRLG GR64:$val, (LCR GR32:$shift), 0)>;
+
+  def : Pat<(rotl GR64:$val, (sub imm32mod64,  GR32:$shift)),
+            (RLLG GR64:$val, (LCR GR32:$shift), 0)>;
+}
+
 // Peepholes for turning scalar operations into block operations.
 defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence,
                       XCSequence, 1>;
diff --git a/lib/Target/SystemZ/SystemZInstrSystem.td b/lib/Target/SystemZ/SystemZInstrSystem.td
index c351577fa5bd..ecce16c9cd73 100644
--- a/lib/Target/SystemZ/SystemZInstrSystem.td
+++ b/lib/Target/SystemZ/SystemZInstrSystem.td
@@ -1,9 +1,8 @@
 //==- SystemZInstrSystem.td - SystemZ system instructions -*- tblgen-*-----==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td
index 6c97b85277c3..261727f89058 100644
--- a/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/lib/Target/SystemZ/SystemZInstrVector.td
@@ -1,9 +1,8 @@
 //==- SystemZInstrVector.td - SystemZ Vector instructions ------*- tblgen-*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -104,7 +103,7 @@ let Predicates = [FeatureVector] in {
 
 let Predicates = [FeatureVector] in {
   // Load.
-  def VL : UnaryVRX<"vl", 0xE706, null_frag, v128any, 16>;
+  defm VL : UnaryVRXAlign<"vl", 0xE706>;
 
   // Load to block boundary.  The number of loaded bytes is only known
   // at run time.  The instruction is really polymorphic, but v128b matches
@@ -123,7 +122,7 @@ let Predicates = [FeatureVector] in {
   def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>;
 
   // Load multiple.
-  def VLM : LoadMultipleVRSa<"vlm", 0xE736>;
+  defm VLM : LoadMultipleVRSaAlign<"vlm", 0xE736>;
 
   // Load and replicate
   def VLREP  : UnaryVRXGeneric<"vlrep", 0xE705>;
@@ -208,13 +207,13 @@ defm : ReplicatePeephole<VLREPG, v2f64, load, f64>;
 
 let Predicates = [FeatureVector] in {
   // Store.
-  def VST : StoreVRX<"vst", 0xE70E, null_frag, v128any, 16>;
+  defm VST : StoreVRXAlign<"vst", 0xE70E>;
 
   // Store with length.  The number of stored bytes is only known at run time.
   def VSTL : StoreLengthVRSb<"vstl", 0xE73F, int_s390_vstl, 0>;
 
   // Store multiple.
-  def VSTM : StoreMultipleVRSa<"vstm", 0xE73E>;
+  defm VSTM : StoreMultipleVRSaAlign<"vstm", 0xE73E>;
 
   // Store element.
   def VSTEB : StoreBinaryVRX<"vsteb", 0xE708, z_vstei8,  v128b, 1, imm32zx4>;
@@ -249,6 +248,81 @@ let Predicates = [FeatureVectorPackedDecimal] in {
   def VSTRLR : StoreLengthVRSd<"vstrlr", 0xE63F, int_s390_vstrl, 0>;
 }
 
+//===----------------------------------------------------------------------===//
+// Byte swaps
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVectorEnhancements2] in {
+  // Load byte-reversed elements.
+  def VLBR  : UnaryVRXGeneric<"vlbr", 0xE606>;
+  def VLBRH : UnaryVRX<"vlbrh", 0xE606, z_loadbswap, v128h, 16, 1>;
+  def VLBRF : UnaryVRX<"vlbrf", 0xE606, z_loadbswap, v128f, 16, 2>;
+  def VLBRG : UnaryVRX<"vlbrg", 0xE606, z_loadbswap, v128g, 16, 3>;
+  def VLBRQ : UnaryVRX<"vlbrq", 0xE606, null_frag, v128q, 16, 4>;
+
+  // Load elements reversed.
+  def VLER  : UnaryVRXGeneric<"vler", 0xE607>;
+  def VLERH : UnaryVRX<"vlerh", 0xE607, z_loadeswap, v128h, 16, 1>;
+  def VLERF : UnaryVRX<"vlerf", 0xE607, z_loadeswap, v128f, 16, 2>;
+  def VLERG : UnaryVRX<"vlerg", 0xE607, z_loadeswap, v128g, 16, 3>;
+  def : Pat<(v4f32 (z_loadeswap bdxaddr12only:$addr)),
+            (VLERF bdxaddr12only:$addr)>;
+  def : Pat<(v2f64 (z_loadeswap bdxaddr12only:$addr)),
+            (VLERG bdxaddr12only:$addr)>;
+  def : Pat<(v16i8 (z_loadeswap bdxaddr12only:$addr)),
+            (VLBRQ bdxaddr12only:$addr)>;
+
+  // Load byte-reversed element.
+  def VLEBRH : TernaryVRX<"vlebrh", 0xE601, z_vlebri16, v128h, v128h, 2, imm32zx3>;
+  def VLEBRF : TernaryVRX<"vlebrf", 0xE603, z_vlebri32, v128f, v128f, 4, imm32zx2>;
+  def VLEBRG : TernaryVRX<"vlebrg", 0xE602, z_vlebri64, v128g, v128g, 8, imm32zx1>;
+
+  // Load byte-reversed element and zero.
+  def VLLEBRZ  : UnaryVRXGeneric<"vllebrz", 0xE604>;
+  def VLLEBRZH : UnaryVRX<"vllebrzh", 0xE604, z_vllebrzi16, v128h, 2, 1>;
+  def VLLEBRZF : UnaryVRX<"vllebrzf", 0xE604, z_vllebrzi32, v128f, 4, 2>;
+  def VLLEBRZG : UnaryVRX<"vllebrzg", 0xE604, z_vllebrzi64, v128g, 8, 3>;
+  def VLLEBRZE : UnaryVRX<"vllebrze", 0xE604, z_vllebrzli32, v128f, 4, 6>;
+  def : InstAlias<"lerv\t$V1, $XBD2",
+                  (VLLEBRZE VR128:$V1, bdxaddr12only:$XBD2), 0>;
+  def : InstAlias<"ldrv\t$V1, $XBD2",
+                  (VLLEBRZG VR128:$V1, bdxaddr12only:$XBD2), 0>;
+
+  // Load byte-reversed element and replicate.
+  def VLBRREP  : UnaryVRXGeneric<"vlbrrep", 0xE605>;
+  def VLBRREPH : UnaryVRX<"vlbrreph", 0xE605, z_replicate_loadbswapi16, v128h, 2, 1>;
+  def VLBRREPF : UnaryVRX<"vlbrrepf", 0xE605, z_replicate_loadbswapi32, v128f, 4, 2>;
+  def VLBRREPG : UnaryVRX<"vlbrrepg", 0xE605, z_replicate_loadbswapi64, v128g, 8, 3>;
+
+  // Store byte-reversed elements.
+  def VSTBR  : StoreVRXGeneric<"vstbr", 0xE60E>;
+  def VSTBRH : StoreVRX<"vstbrh", 0xE60E, z_storebswap, v128h, 16, 1>;
+  def VSTBRF : StoreVRX<"vstbrf", 0xE60E, z_storebswap, v128f, 16, 2>;
+  def VSTBRG : StoreVRX<"vstbrg", 0xE60E, z_storebswap, v128g, 16, 3>;
+  def VSTBRQ : StoreVRX<"vstbrq", 0xE60E, null_frag, v128q, 16, 4>;
+
+  // Store elements reversed.
+  def VSTER  : StoreVRXGeneric<"vster", 0xE60F>;
+  def VSTERH : StoreVRX<"vsterh", 0xE60F, z_storeeswap, v128h, 16, 1>;
+  def VSTERF : StoreVRX<"vsterf", 0xE60F, z_storeeswap, v128f, 16, 2>;
+  def VSTERG : StoreVRX<"vsterg", 0xE60F, z_storeeswap, v128g, 16, 3>;
+  def : Pat<(z_storeeswap (v4f32 VR128:$val), bdxaddr12only:$addr),
+            (VSTERF VR128:$val, bdxaddr12only:$addr)>;
+  def : Pat<(z_storeeswap (v2f64 VR128:$val), bdxaddr12only:$addr),
+            (VSTERG VR128:$val, bdxaddr12only:$addr)>;
+  def : Pat<(z_storeeswap (v16i8 VR128:$val), bdxaddr12only:$addr),
+            (VSTBRQ VR128:$val, bdxaddr12only:$addr)>;
+
+  // Store byte-reversed element.
+  def VSTEBRH : StoreBinaryVRX<"vstebrh", 0xE609, z_vstebri16, v128h, 2, imm32zx3>;
+  def VSTEBRF : StoreBinaryVRX<"vstebrf", 0xE60B, z_vstebri32, v128f, 4, imm32zx2>;
+  def VSTEBRG : StoreBinaryVRX<"vstebrg", 0xE60A, z_vstebri64, v128g, 8, imm32zx1>;
+  def : InstAlias<"sterv\t$V1, $XBD2",
+                  (VSTEBRF VR128:$V1, bdxaddr12only:$XBD2, 0), 0>;
+  def : InstAlias<"stdrv\t$V1, $XBD2",
+                  (VSTEBRG VR128:$V1, bdxaddr12only:$XBD2, 0), 0>;
+}
+
 //===----------------------------------------------------------------------===//
 // Selects and permutes
 //===----------------------------------------------------------------------===//
@@ -707,6 +781,10 @@ let Predicates = [FeatureVector] in {
   def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z),
             (VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>;
 
+  // Shift left double by bit.
+  let Predicates = [FeatureVectorEnhancements2] in
+    def VSLD : TernaryVRId<"vsld", 0xE786, int_s390_vsld, v128b, v128b, 0>;
+
   // Shift right arithmetic.
   def VSRA : BinaryVRRc<"vsra", 0xE77E, int_s390_vsra, v128b, v128b>;
 
@@ -719,6 +797,10 @@ let Predicates = [FeatureVector] in {
   // Shift right logical by byte.
   def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, int_s390_vsrlb, v128b, v128b>;
 
+  // Shift right double by bit.
+  let Predicates = [FeatureVectorEnhancements2] in
+    def VSRD : TernaryVRId<"vsrd", 0xE787, int_s390_vsrd, v128b, v128b, 0>;
+
   // Subtract.
   def VS  : BinaryVRRcGeneric<"vs", 0xE7F7>;
   def VSB : BinaryVRRc<"vsb", 0xE7F7, sub, v128b, v128b, 0>;
@@ -925,126 +1007,190 @@ let Predicates = [FeatureVector] in {
 // See comments in SystemZInstrFP.td for the suppression flags and
 // rounding modes.
 multiclass VectorRounding<Instruction insn, TypedReg tr> {
-  def : FPConversion<insn, frint,      tr, tr, 0, 0>;
-  def : FPConversion<insn, fnearbyint, tr, tr, 4, 0>;
-  def : FPConversion<insn, ffloor,     tr, tr, 4, 7>;
-  def : FPConversion<insn, fceil,      tr, tr, 4, 6>;
-  def : FPConversion<insn, ftrunc,     tr, tr, 4, 5>;
-  def : FPConversion<insn, fround,     tr, tr, 4, 1>;
+  def : FPConversion<insn, any_frint,      tr, tr, 0, 0>;
+  def : FPConversion<insn, any_fnearbyint, tr, tr, 4, 0>;
+  def : FPConversion<insn, any_ffloor,     tr, tr, 4, 7>;
+  def : FPConversion<insn, any_fceil,      tr, tr, 4, 6>;
+  def : FPConversion<insn, any_ftrunc,     tr, tr, 4, 5>;
+  def : FPConversion<insn, any_fround,     tr, tr, 4, 1>;
 }
 
 let Predicates = [FeatureVector] in {
   // Add.
-  def VFA   : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>;
-  def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>;
-  def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>;
-  let Predicates = [FeatureVectorEnhancements1] in {
-    def VFASB : BinaryVRRc<"vfasb", 0xE7E3, fadd, v128sb, v128sb, 2, 0>;
-    def WFASB : BinaryVRRc<"wfasb", 0xE7E3, fadd, v32sb, v32sb, 2, 8>;
-    def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, fadd, v128xb, v128xb, 4, 8>;
+  let Uses = [FPC], mayRaiseFPException = 1 in {
+    def VFA   : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>;
+    def VFADB : BinaryVRRc<"vfadb", 0xE7E3, any_fadd, v128db, v128db, 3, 0>;
+    def WFADB : BinaryVRRc<"wfadb", 0xE7E3, any_fadd, v64db, v64db, 3, 8>;
+    let Predicates = [FeatureVectorEnhancements1] in {
+      def VFASB : BinaryVRRc<"vfasb", 0xE7E3, any_fadd, v128sb, v128sb, 2, 0>;
+      def WFASB : BinaryVRRc<"wfasb", 0xE7E3, any_fadd, v32sb, v32sb, 2, 8>;
+      def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, any_fadd, v128xb, v128xb, 4, 8>;
+    }
   }
 
-  // Convert from fixed 64-bit.
-  def VCDG  : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>;
-  def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
-  def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
+  // Convert from fixed.
+  let Uses = [FPC], mayRaiseFPException = 1 in {
+    def VCDG  : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>;
+    def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
+    def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
+  }
   def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>;
+  let Predicates = [FeatureVectorEnhancements2] in {
+    let Uses = [FPC], mayRaiseFPException = 1 in {
+      let isAsmParserOnly = 1 in
+        def VCFPS  : TernaryVRRaFloatGeneric<"vcfps", 0xE7C3>;
+      def VCEFB : TernaryVRRa<"vcefb", 0xE7C3, null_frag, v128sb, v128g, 2, 0>;
+      def WCEFB : TernaryVRRa<"wcefb", 0xE7C3, null_frag, v32sb, v32f, 2, 8>;
+    }
+    def : FPConversion<VCEFB, sint_to_fp, v128sb, v128f, 0, 0>;
+  }
 
-  // Convert from logical 64-bit.
-  def VCDLG  : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>;
-  def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
-  def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
+  // Convert from logical.
+  let Uses = [FPC], mayRaiseFPException = 1 in {
+    def VCDLG  : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>;
+    def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
+    def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
+  }
   def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>;
+  let Predicates = [FeatureVectorEnhancements2] in {
+    let Uses = [FPC], mayRaiseFPException = 1 in {
+      let isAsmParserOnly = 1 in
+        def VCFPL  : TernaryVRRaFloatGeneric<"vcfpl", 0xE7C1>;
+      def VCELFB : TernaryVRRa<"vcelfb", 0xE7C1, null_frag, v128sb, v128g, 2, 0>;
+      def WCELFB : TernaryVRRa<"wcelfb", 0xE7C1, null_frag, v32sb, v32f, 2, 8>;
+    }
+    def : FPConversion<VCELFB, uint_to_fp, v128sb, v128f, 0, 0>;
+  }
 
-  // Convert to fixed 64-bit.
-  def VCGD  : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>;
-  def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>;
-  def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>;
+  // Convert to fixed.
+  let Uses = [FPC], mayRaiseFPException = 1 in {
+    def VCGD  : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>;
+    def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>;
+    def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>;
+  }
   // Rounding mode should agree with SystemZInstrFP.td.
   def : FPConversion<VCGDB, fp_to_sint, v128g, v128db, 0, 5>;
+  let Predicates = [FeatureVectorEnhancements2] in {
+    let Uses = [FPC], mayRaiseFPException = 1 in {
+      let isAsmParserOnly = 1 in
+        def VCSFP  : TernaryVRRaFloatGeneric<"vcsfp", 0xE7C2>;
+      def VCFEB : TernaryVRRa<"vcfeb", 0xE7C2, null_frag, v128sb, v128g, 2, 0>;
+      def WCFEB : TernaryVRRa<"wcfeb", 0xE7C2, null_frag, v32sb, v32f, 2, 8>;
+    }
+    // Rounding mode should agree with SystemZInstrFP.td.
+    def : FPConversion<VCFEB, fp_to_sint, v128f, v128sb, 0, 5>;
+  }
 
-  // Convert to logical 64-bit.
-  def VCLGD  : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>;
-  def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>;
-  def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>;
+  // Convert to logical.
+  let Uses = [FPC], mayRaiseFPException = 1 in {
+    def VCLGD  : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>;
+    def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>;
+    def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>;
+  }
   // Rounding mode should agree with SystemZInstrFP.td.
   def : FPConversion<VCLGDB, fp_to_uint, v128g, v128db, 0, 5>;
+  let Predicates = [FeatureVectorEnhancements2] in {
+    let Uses = [FPC], mayRaiseFPException = 1 in {
+      let isAsmParserOnly = 1 in
+        def VCLFP  : TernaryVRRaFloatGeneric<"vclfp", 0xE7C0>;
+      def VCLFEB : TernaryVRRa<"vclfeb", 0xE7C0, null_frag, v128sb, v128g, 2, 0>;
+      def WCLFEB : TernaryVRRa<"wclfeb", 0xE7C0, null_frag, v32sb, v32f, 2, 8>;
+    }
+    // Rounding mode should agree with SystemZInstrFP.td.
+    def : FPConversion<VCLFEB, fp_to_uint, v128f, v128sb, 0, 5>;
+  }
 
   // Divide.
-  def VFD   : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>;
-  def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>;
-  def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>;
-  let Predicates = [FeatureVectorEnhancements1] in {
-    def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, fdiv, v128sb, v128sb, 2, 0>;
-    def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, fdiv, v32sb, v32sb, 2, 8>;
-    def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, fdiv, v128xb, v128xb, 4, 8>;
+  let Uses = [FPC], mayRaiseFPException = 1 in {
+    def VFD   : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>;
+    def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, any_fdiv, v128db, v128db, 3, 0>;
+    def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, any_fdiv, v64db, v64db, 3, 8>;
+    let Predicates = [FeatureVectorEnhancements1] in {
+      def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, any_fdiv, v128sb, v128sb, 2, 0>;
+      def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, any_fdiv, v32sb, v32sb, 2, 8>;
+      def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, any_fdiv, v128xb, v128xb, 4, 8>;
+    }
   }
 
   // Load FP integer.
-  def VFI   : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>;
-  def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>;
-  def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
+  let Uses = [FPC], mayRaiseFPException = 1 in {
+    def VFI   : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>;
+    def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>;
+    def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
+  }
   defm : VectorRounding<VFIDB, v128db>;
   defm : VectorRounding<WFIDB, v64db>;
   let Predicates = [FeatureVectorEnhancements1] in {
-    def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>;
-    def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>;
-    def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>;
+    let Uses = [FPC], mayRaiseFPException = 1 in {
+      def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>;
+      def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>;
+      def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>;
+    }
     defm : VectorRounding<VFISB, v128sb>;
     defm : VectorRounding<WFISB, v32sb>;
     defm : VectorRounding<WFIXB, v128xb>;
   }
 
   // Load lengthened.
-  def VLDE  : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>;
-  def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>;
-  def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32sb, 2, 8>;
+  let Uses = [FPC], mayRaiseFPException = 1 in {
+    def VLDE  : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>;
+    def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>;
+    def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8>;
+  }
   let Predicates = [FeatureVectorEnhancements1] in {
-    let isAsmParserOnly = 1 in {
-      def VFLL  : UnaryVRRaFloatGeneric<"vfll", 0xE7C4>;
-      def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>;
-      def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>;
+    let Uses = [FPC], mayRaiseFPException = 1 in {
+      let isAsmParserOnly = 1 in {
+        def VFLL  : UnaryVRRaFloatGeneric<"vfll", 0xE7C4>;
+        def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>;
+        def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>;
+      }
+      def WFLLD : UnaryVRRa<"wflld", 0xE7C4, any_fpextend, v128xb, v64db, 3, 8>;
     }
-    def WFLLD : UnaryVRRa<"wflld", 0xE7C4, fpextend, v128xb, v64db, 3, 8>;
-    def : Pat<(f128 (fpextend (f32 VR32:$src))),
+    def : Pat<(f128 (any_fpextend (f32 VR32:$src))),
               (WFLLD (WLDEB VR32:$src))>;
   }
 
   // Load rounded.
-  def VLED  : TernaryVRRaFloatGeneric<"vled", 0xE7C5>;
-  def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
-  def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
+  let Uses = [FPC], mayRaiseFPException = 1 in {
+    def VLED  : TernaryVRRaFloatGeneric<"vled", 0xE7C5>;
+    def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
+    def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
+  }
   def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
-  def : FPConversion<WLEDB, fpround, v32sb, v64db, 0, 0>;
+  def : FPConversion<WLEDB, any_fpround, v32sb, v64db, 0, 0>;
   let Predicates = [FeatureVectorEnhancements1] in {
-    let isAsmParserOnly = 1 in {
-      def VFLR  : TernaryVRRaFloatGeneric<"vflr", 0xE7C5>;
-      def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
-      def WFLRD : TernaryVRRa<"wflrd", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
+    let Uses = [FPC], mayRaiseFPException = 1 in {
+      let isAsmParserOnly = 1 in {
+        def VFLR  : TernaryVRRaFloatGeneric<"vflr", 0xE7C5>;
+        def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
+        def WFLRD : TernaryVRRa<"wflrd", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
+      }
+      def WFLRX : TernaryVRRa<"wflrx", 0xE7C5, null_frag, v64db, v128xb, 4, 8>;
     }
-    def WFLRX : TernaryVRRa<"wflrx", 0xE7C5, null_frag, v64db, v128xb, 4, 8>;
-    def : FPConversion<WFLRX, fpround, v64db, v128xb, 0, 0>;
-    def : Pat<(f32 (fpround (f128 VR128:$src))),
+    def : FPConversion<WFLRX, any_fpround, v64db, v128xb, 0, 0>;
+    def : Pat<(f32 (any_fpround (f128 VR128:$src))),
               (WLEDB (WFLRX VR128:$src, 0, 3), 0, 0)>;
   }
 
   // Maximum.
   multiclass VectorMax<Instruction insn, TypedReg tr> {
-    def : FPMinMax<insn, fmaxnum, tr, 4>;
+    def : FPMinMax<insn, any_fmaxnum, tr, 4>;
     def : FPMinMax<insn, fmaximum, tr, 1>;
   }
   let Predicates = [FeatureVectorEnhancements1] in {
-    def VFMAX   : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>;
-    def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb,
-                                   v128db, v128db, 3, 0>;
-    def WFMAXDB : TernaryVRRcFloat<"wfmaxdb", 0xE7EF, null_frag,
-                                   v64db, v64db, 3, 8>;
-    def VFMAXSB : TernaryVRRcFloat<"vfmaxsb", 0xE7EF, int_s390_vfmaxsb,
-                                   v128sb, v128sb, 2, 0>;
-    def WFMAXSB : TernaryVRRcFloat<"wfmaxsb", 0xE7EF, null_frag,
-                                   v32sb, v32sb, 2, 8>;
-    def WFMAXXB : TernaryVRRcFloat<"wfmaxxb", 0xE7EF, null_frag,
-                                   v128xb, v128xb, 4, 8>;
+    let Uses = [FPC], mayRaiseFPException = 1 in {
+      def VFMAX   : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>;
+      def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb,
+                                     v128db, v128db, 3, 0>;
+      def WFMAXDB : TernaryVRRcFloat<"wfmaxdb", 0xE7EF, null_frag,
+                                     v64db, v64db, 3, 8>;
+      def VFMAXSB : TernaryVRRcFloat<"vfmaxsb", 0xE7EF, int_s390_vfmaxsb,
+                                     v128sb, v128sb, 2, 0>;
+      def WFMAXSB : TernaryVRRcFloat<"wfmaxsb", 0xE7EF, null_frag,
+                                     v32sb, v32sb, 2, 8>;
+      def WFMAXXB : TernaryVRRcFloat<"wfmaxxb", 0xE7EF, null_frag,
+                                     v128xb, v128xb, 4, 8>;
+    }
     defm : VectorMax<VFMAXDB, v128db>;
     defm : VectorMax<WFMAXDB, v64db>;
     defm : VectorMax<VFMAXSB, v128sb>;
@@ -1054,21 +1200,23 @@ let Predicates = [FeatureVector] in {
 
   // Minimum.
   multiclass VectorMin<Instruction insn, TypedReg tr> {
-    def : FPMinMax<insn, fminnum, tr, 4>;
+    def : FPMinMax<insn, any_fminnum, tr, 4>;
     def : FPMinMax<insn, fminimum, tr, 1>;
   }
   let Predicates = [FeatureVectorEnhancements1] in {
-    def VFMIN   : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>;
-    def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb,
-                                   v128db, v128db, 3, 0>;
-    def WFMINDB : TernaryVRRcFloat<"wfmindb", 0xE7EE, null_frag,
-                                   v64db, v64db, 3, 8>;
-    def VFMINSB : TernaryVRRcFloat<"vfminsb", 0xE7EE, int_s390_vfminsb,
-                                   v128sb, v128sb, 2, 0>;
-    def WFMINSB : TernaryVRRcFloat<"wfminsb", 0xE7EE, null_frag,
-                                   v32sb, v32sb, 2, 8>;
-    def WFMINXB : TernaryVRRcFloat<"wfminxb", 0xE7EE, null_frag,
-                                   v128xb, v128xb, 4, 8>;
+    let Uses = [FPC], mayRaiseFPException = 1 in {
+      def VFMIN   : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>;
+      def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb,
+                                     v128db, v128db, 3, 0>;
+      def WFMINDB : TernaryVRRcFloat<"wfmindb", 0xE7EE, null_frag,
+                                     v64db, v64db, 3, 8>;
+      def VFMINSB : TernaryVRRcFloat<"vfminsb", 0xE7EE, int_s390_vfminsb,
+                                     v128sb, v128sb, 2, 0>;
+      def WFMINSB : TernaryVRRcFloat<"wfminsb", 0xE7EE, null_frag,
+                                     v32sb, v32sb, 2, 8>;
+      def WFMINXB : TernaryVRRcFloat<"wfminxb", 0xE7EE, null_frag,
+                                     v128xb, v128xb, 4, 8>;
+    }
     defm : VectorMin<VFMINDB, v128db>;
     defm : VectorMin<WFMINDB, v64db>;
     defm : VectorMin<VFMINSB, v128sb>;
@@ -1077,53 +1225,61 @@ let Predicates = [FeatureVector] in {
   }
 
   // Multiply.
-  def VFM   : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>;
-  def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>;
-  def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>;
-  let Predicates = [FeatureVectorEnhancements1] in {
-    def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, fmul, v128sb, v128sb, 2, 0>;
-    def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, fmul, v32sb, v32sb, 2, 8>;
-    def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, fmul, v128xb, v128xb, 4, 8>;
+  let Uses = [FPC], mayRaiseFPException = 1 in {
+    def VFM   : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>;
+    def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, any_fmul, v128db, v128db, 3, 0>;
+    def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, any_fmul, v64db, v64db, 3, 8>;
+    let Predicates = [FeatureVectorEnhancements1] in {
+      def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, any_fmul, v128sb, v128sb, 2, 0>;
+      def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, any_fmul, v32sb, v32sb, 2, 8>;
+      def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, any_fmul, v128xb, v128xb, 4, 8>;
+    }
   }
 
   // Multiply and add.
-  def VFMA   : TernaryVRReFloatGeneric<"vfma", 0xE78F>;
-  def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>;
-  def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>;
-  let Predicates = [FeatureVectorEnhancements1] in {
-    def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, fma, v128sb, v128sb, 0, 2>;
-    def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, fma, v32sb, v32sb, 8, 2>;
-    def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, fma, v128xb, v128xb, 8, 4>;
+  let Uses = [FPC], mayRaiseFPException = 1 in {
+    def VFMA   : TernaryVRReFloatGeneric<"vfma", 0xE78F>;
+    def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, any_fma, v128db, v128db, 0, 3>;
+    def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, any_fma, v64db, v64db, 8, 3>;
+    let Predicates = [FeatureVectorEnhancements1] in {
+      def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, any_fma, v128sb, v128sb, 0, 2>;
+      def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, any_fma, v32sb, v32sb, 8, 2>;
+      def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, any_fma, v128xb, v128xb, 8, 4>;
+    }
   }
 
   // Multiply and subtract.
-  def VFMS   : TernaryVRReFloatGeneric<"vfms", 0xE78E>;
-  def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>;
-  def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>;
-  let Predicates = [FeatureVectorEnhancements1] in {
-    def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, fms, v128sb, v128sb, 0, 2>;
-    def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, fms, v32sb, v32sb, 8, 2>;
-    def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, fms, v128xb, v128xb, 8, 4>;
+  let Uses = [FPC], mayRaiseFPException = 1 in {
+    def VFMS   : TernaryVRReFloatGeneric<"vfms", 0xE78E>;
+    def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, any_fms, v128db, v128db, 0, 3>;
+    def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, any_fms, v64db, v64db, 8, 3>;
+    let Predicates = [FeatureVectorEnhancements1] in {
+      def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, any_fms, v128sb, v128sb, 0, 2>;
+      def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, any_fms, v32sb, v32sb, 8, 2>;
+      def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, any_fms, v128xb, v128xb, 8, 4>;
+    }
   }
 
   // Negative multiply and add.
-  let Predicates = [FeatureVectorEnhancements1] in {
+  let Uses = [FPC], mayRaiseFPException = 1,
+      Predicates = [FeatureVectorEnhancements1] in {
     def VFNMA   : TernaryVRReFloatGeneric<"vfnma", 0xE79F>;
-    def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, fnma, v128db, v128db, 0, 3>;
-    def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, fnma, v64db, v64db, 8, 3>;
-    def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, fnma, v128sb, v128sb, 0, 2>;
-    def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, fnma, v32sb, v32sb, 8, 2>;
-    def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, fnma, v128xb, v128xb, 8, 4>;
+    def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, any_fnma, v128db, v128db, 0, 3>;
+    def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, any_fnma, v64db, v64db, 8, 3>;
+    def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, any_fnma, v128sb, v128sb, 0, 2>;
+    def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, any_fnma, v32sb, v32sb, 8, 2>;
+    def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, any_fnma, v128xb, v128xb, 8, 4>;
   }
 
   // Negative multiply and subtract.
-  let Predicates = [FeatureVectorEnhancements1] in {
+  let Uses = [FPC], mayRaiseFPException = 1,
+      Predicates = [FeatureVectorEnhancements1] in {
     def VFNMS   : TernaryVRReFloatGeneric<"vfnms", 0xE79E>;
-    def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, fnms, v128db, v128db, 0, 3>;
-    def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, fnms, v64db, v64db, 8, 3>;
-    def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, fnms, v128sb, v128sb, 0, 2>;
-    def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, fnms, v32sb, v32sb, 8, 2>;
-    def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, fnms, v128xb, v128xb, 8, 4>;
+    def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, any_fnms, v128db, v128db, 0, 3>;
+    def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, any_fnms, v64db, v64db, 8, 3>;
+    def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, any_fnms, v128sb, v128sb, 0, 2>;
+    def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, any_fnms, v32sb, v32sb, 8, 2>;
+    def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, any_fnms, v128xb, v128xb, 8, 4>;
   }
 
   // Perform sign operation.
@@ -1164,23 +1320,27 @@ let Predicates = [FeatureVector] in {
   }
 
   // Square root.
-  def VFSQ   : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>;
-  def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>;
-  def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>;
-  let Predicates = [FeatureVectorEnhancements1] in {
-    def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, fsqrt, v128sb, v128sb, 2, 0>;
-    def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, fsqrt, v32sb, v32sb, 2, 8>;
-    def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, fsqrt, v128xb, v128xb, 4, 8>;
+  let Uses = [FPC], mayRaiseFPException = 1 in {
+    def VFSQ   : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>;
+    def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, any_fsqrt, v128db, v128db, 3, 0>;
+    def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, any_fsqrt, v64db, v64db, 3, 8>;
+    let Predicates = [FeatureVectorEnhancements1] in {
+      def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, any_fsqrt, v128sb, v128sb, 2, 0>;
+      def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, any_fsqrt, v32sb, v32sb, 2, 8>;
+      def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, any_fsqrt, v128xb, v128xb, 4, 8>;
+    }
   }
 
   // Subtract.
-  def VFS   : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>;
-  def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>;
-  def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>;
-  let Predicates = [FeatureVectorEnhancements1] in {
-    def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, fsub, v128sb, v128sb, 2, 0>;
-    def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, fsub, v32sb, v32sb, 2, 8>;
-    def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, fsub, v128xb, v128xb, 4, 8>;
+  let Uses = [FPC], mayRaiseFPException = 1 in {
+    def VFS   : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>;
+    def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, any_fsub, v128db, v128db, 3, 0>;
+    def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, any_fsub, v64db, v64db, 3, 8>;
+    let Predicates = [FeatureVectorEnhancements1] in {
+      def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, any_fsub, v128sb, v128sb, 2, 0>;
+      def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, any_fsub, v32sb, v32sb, 2, 8>;
+      def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, any_fsub, v128xb, v128xb, 4, 8>;
+    }
   }
 
   // Test data class immediate.
@@ -1202,7 +1362,7 @@ let Predicates = [FeatureVector] in {
 
 let Predicates = [FeatureVector] in {
   // Compare scalar.
-  let Defs = [CC] in {
+  let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
     def WFC   : CompareVRRaFloatGeneric<"wfc", 0xE7CB>;
     def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
     let Predicates = [FeatureVectorEnhancements1] in {
@@ -1212,7 +1372,7 @@ let Predicates = [FeatureVector] in {
   }
 
   // Compare and signal scalar.
-  let Defs = [CC] in {
+  let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
     def WFK   : CompareVRRaFloatGeneric<"wfk", 0xE7CA>;
     def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
     let Predicates = [FeatureVectorEnhancements1] in {
@@ -1222,22 +1382,25 @@ let Predicates = [FeatureVector] in {
   }
 
   // Compare equal.
-  def  VFCE   : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>;
-  defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes,
-                                v128g, v128db, 3, 0>;
-  defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag,
-                                v64g, v64db, 3, 8>;
-  let Predicates = [FeatureVectorEnhancements1] in {
-    defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes,
-                                  v128f, v128sb, 2, 0>;
-    defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag,
-                                  v32f, v32sb, 2, 8>;
-    defm WFCEXB : BinaryVRRcSPair<"wfcexb", 0xE7E8, null_frag, null_frag,
-                                  v128q, v128xb, 4, 8>;
+  let Uses = [FPC], mayRaiseFPException = 1 in {
+    def  VFCE   : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>;
+    defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes,
+                                  v128g, v128db, 3, 0>;
+    defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag,
+                                  v64g, v64db, 3, 8>;
+    let Predicates = [FeatureVectorEnhancements1] in {
+      defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes,
+                                    v128f, v128sb, 2, 0>;
+      defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag,
+                                    v32f, v32sb, 2, 8>;
+      defm WFCEXB : BinaryVRRcSPair<"wfcexb", 0xE7E8, null_frag, null_frag,
+                                    v128q, v128xb, 4, 8>;
+    }
   }
 
   // Compare and signal equal.
-  let Predicates = [FeatureVectorEnhancements1] in {
+  let Uses = [FPC], mayRaiseFPException = 1,
+      Predicates = [FeatureVectorEnhancements1] in {
     defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, null_frag, null_frag,
                                   v128g, v128db, 3, 4>;
     defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag,
@@ -1251,22 +1414,25 @@ let Predicates = [FeatureVector] in {
   }
 
   // Compare high.
-  def  VFCH   : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>;
-  defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs,
-                                v128g, v128db, 3, 0>;
-  defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag,
-                                v64g, v64db, 3, 8>;
-  let Predicates = [FeatureVectorEnhancements1] in {
-    defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs,
-                                  v128f, v128sb, 2, 0>;
-    defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag,
-                                  v32f, v32sb, 2, 8>;
-    defm WFCHXB : BinaryVRRcSPair<"wfchxb", 0xE7EB, null_frag, null_frag,
-                                  v128q, v128xb, 4, 8>;
+  let Uses = [FPC], mayRaiseFPException = 1 in {
+    def  VFCH   : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>;
+    defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs,
+                                  v128g, v128db, 3, 0>;
+    defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag,
+                                  v64g, v64db, 3, 8>;
+    let Predicates = [FeatureVectorEnhancements1] in {
+      defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs,
+                                    v128f, v128sb, 2, 0>;
+      defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag,
+                                    v32f, v32sb, 2, 8>;
+      defm WFCHXB : BinaryVRRcSPair<"wfchxb", 0xE7EB, null_frag, null_frag,
+                                    v128q, v128xb, 4, 8>;
+    }
   }
 
   // Compare and signal high.
-  let Predicates = [FeatureVectorEnhancements1] in {
+  let Uses = [FPC], mayRaiseFPException = 1,
+      Predicates = [FeatureVectorEnhancements1] in {
     defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, null_frag, null_frag,
                                   v128g, v128db, 3, 4>;
     defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag,
@@ -1280,22 +1446,25 @@ let Predicates = [FeatureVector] in {
   }
 
   // Compare high or equal.
-  def  VFCHE   : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>;
-  defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes,
-                                 v128g, v128db, 3, 0>;
-  defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag,
-                                 v64g, v64db, 3, 8>;
-  let Predicates = [FeatureVectorEnhancements1] in {
-    defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes,
-                                   v128f, v128sb, 2, 0>;
-    defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag,
-                                   v32f, v32sb, 2, 8>;
-    defm WFCHEXB : BinaryVRRcSPair<"wfchexb", 0xE7EA, null_frag, null_frag,
-                                   v128q, v128xb, 4, 8>;
+  let Uses = [FPC], mayRaiseFPException = 1 in {
+    def  VFCHE   : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>;
+    defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes,
+                                   v128g, v128db, 3, 0>;
+    defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag,
+                                   v64g, v64db, 3, 8>;
+    let Predicates = [FeatureVectorEnhancements1] in {
+      defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes,
+                                     v128f, v128sb, 2, 0>;
+      defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag,
+                                     v32f, v32sb, 2, 8>;
+      defm WFCHEXB : BinaryVRRcSPair<"wfchexb", 0xE7EA, null_frag, null_frag,
+                                     v128q, v128xb, 4, 8>;
+    }
   }
 
   // Compare and signal high or equal.
-  let Predicates = [FeatureVectorEnhancements1] in {
+  let Uses = [FPC], mayRaiseFPException = 1,
+      Predicates = [FeatureVectorEnhancements1] in {
     defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, null_frag, null_frag,
                                    v128g, v128db, 3, 4>;
     defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag,
@@ -1520,6 +1689,24 @@ let Predicates = [FeatureVector] in {
                                         z_vstrcz_cc, v128f, v128f, 2, 2>;
 }
 
+let Predicates = [FeatureVectorEnhancements2] in {
+  defm VSTRS  : TernaryExtraVRRdGeneric<"vstrs", 0xE78B>;
+  defm VSTRSB : TernaryExtraVRRd<"vstrsb", 0xE78B,
+                                 z_vstrs_cc, v128b, v128b, 0>;
+  defm VSTRSH : TernaryExtraVRRd<"vstrsh", 0xE78B,
+                                 z_vstrs_cc, v128b, v128h, 1>;
+  defm VSTRSF : TernaryExtraVRRd<"vstrsf", 0xE78B,
+                                 z_vstrs_cc, v128b, v128f, 2>;
+  let Defs = [CC] in {
+    def VSTRSZB : TernaryVRRd<"vstrszb", 0xE78B,
+                              z_vstrsz_cc, v128b, v128b, 0, 2>;
+    def VSTRSZH : TernaryVRRd<"vstrszh", 0xE78B,
+                              z_vstrsz_cc, v128b, v128h, 1, 2>;
+    def VSTRSZF : TernaryVRRd<"vstrszf", 0xE78B,
+                              z_vstrsz_cc, v128b, v128f, 2, 2>;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Packed-decimal instructions
 //===----------------------------------------------------------------------===//
@@ -1531,6 +1718,10 @@ let Predicates = [FeatureVectorPackedDecimal] in {
   def VUPKZ : StoreLengthVSI<"vupkz", 0xE63C, null_frag, 0>;
 
   let Defs = [CC] in {
+    let Predicates = [FeatureVectorPackedDecimalEnhancement] in {
+      def VCVBOpt : TernaryVRRi<"vcvb", 0xE650, GR32>;
+      def VCVBGOpt : TernaryVRRi<"vcvbg", 0xE652, GR64>;
+    }
     def VCVB : BinaryVRRi<"vcvb", 0xE650, GR32>;
     def VCVBG : BinaryVRRi<"vcvbg", 0xE652, GR64>;
     def VCVD : TernaryVRIi<"vcvd", 0xE658, GR32>;
diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp
index f532e9e23b1f..06d893d043e9 100644
--- a/lib/Target/SystemZ/SystemZLDCleanup.cpp
+++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZLDCleanup.cpp - Clean up local-dynamic TLS accesses --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp
index 802962bd4db0..95d7e22dec32 100644
--- a/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZLongBranch.cpp - Branch lengthening for SystemZ ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/SystemZ/SystemZMCInstLower.cpp b/lib/Target/SystemZ/SystemZMCInstLower.cpp
index 2655e4866b20..ef39f80a94ef 100644
--- a/lib/Target/SystemZ/SystemZMCInstLower.cpp
+++ b/lib/Target/SystemZ/SystemZMCInstLower.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZMCInstLower.cpp - Lower MachineInstr to MCInst -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/SystemZ/SystemZMCInstLower.h b/lib/Target/SystemZ/SystemZMCInstLower.h
index 7173cfa42959..14ad06488312 100644
--- a/lib/Target/SystemZ/SystemZMCInstLower.h
+++ b/lib/Target/SystemZ/SystemZMCInstLower.h
@@ -1,9 +1,8 @@
 //===-- SystemZMCInstLower.h - Lower MachineInstr to MCInst ----*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp b/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
index 1a7c0d7f687a..9b6aa3593ce0 100644
--- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
 //=== SystemZMachineFunctionInfo.cpp - SystemZ machine function info ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index 4f64f4c65f1d..9eec3f37bc28 100644
--- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -1,9 +1,8 @@
 //=== SystemZMachineFunctionInfo.h - SystemZ machine function info -*- C++ -*-//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/lib/Target/SystemZ/SystemZMachineScheduler.cpp
index 98e761ef87fe..0becfaa1d49c 100644
--- a/lib/Target/SystemZ/SystemZMachineScheduler.cpp
+++ b/lib/Target/SystemZ/SystemZMachineScheduler.cpp
@@ -1,9 +1,8 @@
 //-- SystemZMachineScheduler.cpp - SystemZ Scheduler Interface -*- C++ -*---==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.h b/lib/Target/SystemZ/SystemZMachineScheduler.h
index ab820e5d3e63..0d5cc2e03e8d 100644
--- a/lib/Target/SystemZ/SystemZMachineScheduler.h
+++ b/lib/Target/SystemZ/SystemZMachineScheduler.h
@@ -1,9 +1,8 @@
 //==- SystemZMachineScheduler.h - SystemZ Scheduler Interface ----*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
index 7bf32bf19a4a..56632e1529a2 100644
--- a/lib/Target/SystemZ/SystemZOperands.td
+++ b/lib/Target/SystemZ/SystemZOperands.td
@@ -1,9 +1,8 @@
 //===-- SystemZOperands.td - SystemZ instruction operands ----*- tblgen-*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -189,6 +188,17 @@ def HF32 : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
 }]>;
 
+// Negated variants.
+def NEGLH16 : SDNodeXForm<imm, [{
+  uint64_t Value = (-N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16;
+  return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
+}]>;
+
+def NEGLF32 : SDNodeXForm<imm, [{
+  uint64_t Value = -N->getZExtValue() & 0x00000000FFFFFFFFULL;
+  return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
+}]>;
+
 // Truncate an immediate to a 8-bit signed quantity.
 def SIMM8 : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(int8_t(N->getZExtValue()), SDLoc(N),
@@ -431,6 +441,15 @@ def imm64hf32c : Immediate<i64, [{
   return SystemZ::isImmHF(uint64_t(~N->getZExtValue()));
 }], HF32, "U32Imm">;
 
+// Negated immediates that fit LF32 or LH16.
+def imm64lh16n : Immediate<i64, [{
+  return SystemZ::isImmLH(uint64_t(-N->getZExtValue()));
+}], NEGLH16, "U16Imm">;
+
+def imm64lf32n : Immediate<i64, [{
+  return SystemZ::isImmLF(uint64_t(-N->getZExtValue()));
+}], NEGLF32, "U32Imm">;
+
 // Short immediates.
 def imm64sx8 : Immediate<i64, [{
   return isInt<8>(N->getSExtValue());
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index 626675bfb70c..15bd12bc98a4 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -1,9 +1,8 @@
 //===-- SystemZOperators.td - SystemZ-specific operators ------*- tblgen-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -192,6 +191,12 @@ def SDT_ZVecTernary         : SDTypeProfile<1, 3,
                                              SDTCisSameAs<0, 1>,
                                              SDTCisSameAs<0, 2>,
                                              SDTCisSameAs<0, 3>]>;
+def SDT_ZVecTernaryConvCC   : SDTypeProfile<2, 3,
+                                            [SDTCisVec<0>,
+                                             SDTCisVT<1, i32>,
+                                             SDTCisVec<2>,
+                                             SDTCisSameAs<2, 3>,
+                                             SDTCisSameAs<0, 4>]>;
 def SDT_ZVecTernaryInt      : SDTypeProfile<1, 3,
                                             [SDTCisVec<0>,
                                              SDTCisSameAs<0, 1>,
@@ -279,6 +284,10 @@ def z_loadbswap        : SDNode<"SystemZISD::LRV", SDTLoad,
                                  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 def z_storebswap       : SDNode<"SystemZISD::STRV", SDTStore,
                                  [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def z_loadeswap        : SDNode<"SystemZISD::VLER", SDTLoad,
+                                 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def z_storeeswap       : SDNode<"SystemZISD::VSTER", SDTStore,
+                                 [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 
 def z_tdc               : SDNode<"SystemZISD::TDC", SDT_ZTest>;
 
@@ -338,6 +347,10 @@ def z_vstrc_cc          : SDNode<"SystemZISD::VSTRC_CC",
                                  SDT_ZVecQuaternaryIntCC>;
 def z_vstrcz_cc         : SDNode<"SystemZISD::VSTRCZ_CC",
                                  SDT_ZVecQuaternaryIntCC>;
+def z_vstrs_cc          : SDNode<"SystemZISD::VSTRS_CC",
+                                 SDT_ZVecTernaryConvCC>;
+def z_vstrsz_cc         : SDNode<"SystemZISD::VSTRSZ_CC",
+                                 SDT_ZVecTernaryConvCC>;
 def z_vftci             : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvIntCC>;
 
 class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW>
@@ -662,22 +675,34 @@ def z_usub : PatFrags<(ops node:$src1, node:$src2),
                       [(z_usubo node:$src1, node:$src2),
                        (sub node:$src1, node:$src2)]>;
 
+// Combined logical operations.
+def andc : PatFrag<(ops node:$src1, node:$src2),
+                   (and node:$src1, (not node:$src2))>;
+def orc  : PatFrag<(ops node:$src1, node:$src2),
+                   (or node:$src1, (not node:$src2))>;
+def nand : PatFrag<(ops node:$src1, node:$src2),
+                   (not (and node:$src1, node:$src2))>;
+def nor  : PatFrag<(ops node:$src1, node:$src2),
+                   (not (or node:$src1, node:$src2))>;
+def nxor : PatFrag<(ops node:$src1, node:$src2),
+                   (not (xor node:$src1, node:$src2))>;
+
 // Fused multiply-subtract, using the natural operand order.
-def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                  (fma node:$src1, node:$src2, (fneg node:$src3))>;
+def any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                      (any_fma node:$src1, node:$src2, (fneg node:$src3))>;
 
 // Fused multiply-add and multiply-subtract, but with the order of the
 // operands matching SystemZ's MA and MS instructions.
-def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                    (fma node:$src2, node:$src3, node:$src1)>;
-def z_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                    (fma node:$src2, node:$src3, (fneg node:$src1))>;
+def z_any_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                        (any_fma node:$src2, node:$src3, node:$src1)>;
+def z_any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                        (any_fma node:$src2, node:$src3, (fneg node:$src1))>;
 
 // Negative fused multiply-add and multiply-subtract.
-def fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                   (fneg (fma node:$src1, node:$src2, node:$src3))>;
-def fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                   (fneg (fms node:$src1, node:$src2, node:$src3))>;
+def any_fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                       (fneg (any_fma node:$src1, node:$src2, node:$src3))>;
+def any_fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                       (fneg (any_fms node:$src1, node:$src2, node:$src3))>;
 
 // Floating-point negative absolute.
 def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>;
@@ -709,9 +734,9 @@ class shiftop<SDPatternOperator operator>
              [(operator node:$val, node:$count),
               (operator node:$val, (and node:$count, imm32bottom6set))]>;
 
-// Vector representation of all-zeros and all-ones.
-def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>;
-def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>;
+def imm32mod64  : PatLeaf<(i32 imm), [{
+  return (N->getZExtValue() % 64 == 0);
+}]>;
 
 // Load a scalar and replicate it in all elements of a vector.
 class z_replicate_load<ValueType scalartype, SDPatternOperator load>
@@ -723,6 +748,10 @@ def z_replicate_loadi32 : z_replicate_load<i32, load>;
 def z_replicate_loadi64 : z_replicate_load<i64, load>;
 def z_replicate_loadf32 : z_replicate_load<f32, load>;
 def z_replicate_loadf64 : z_replicate_load<f64, load>;
+// Byte-swapped replicated vector element loads.
+def z_replicate_loadbswapi16 : z_replicate_load<i32, z_loadbswap16>;
+def z_replicate_loadbswapi32 : z_replicate_load<i32, z_loadbswap32>;
+def z_replicate_loadbswapi64 : z_replicate_load<i64, z_loadbswap64>;
 
 // Load a scalar and insert it into a single element of a vector.
 class z_vle<ValueType scalartype, SDPatternOperator load>
@@ -735,18 +764,22 @@ def z_vlei32 : z_vle<i32, load>;
 def z_vlei64 : z_vle<i64, load>;
 def z_vlef32 : z_vle<f32, load>;
 def z_vlef64 : z_vle<f64, load>;
+// Byte-swapped vector element loads.
+def z_vlebri16 : z_vle<i32, z_loadbswap16>;
+def z_vlebri32 : z_vle<i32, z_loadbswap32>;
+def z_vlebri64 : z_vle<i64, z_loadbswap64>;
 
 // Load a scalar and insert it into the low element of the high i64 of a
 // zeroed vector.
 class z_vllez<ValueType scalartype, SDPatternOperator load, int index>
   : PatFrag<(ops node:$addr),
-            (z_vector_insert (z_vzero),
+            (z_vector_insert immAllZerosV,
                              (scalartype (load node:$addr)), (i32 index))>;
 def z_vllezi8  : z_vllez<i32, anyextloadi8, 7>;
 def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>;
 def z_vllezi32 : z_vllez<i32, load, 1>;
 def z_vllezi64 : PatFrags<(ops node:$addr),
-                          [(z_vector_insert (z_vzero),
+                          [(z_vector_insert immAllZerosV,
                                             (i64 (load node:$addr)), (i32 0)),
                            (z_join_dwords (i64 (load node:$addr)), (i64 0))]>;
 // We use high merges to form a v4f32 from four f32s.  Propagating zero
@@ -759,11 +792,12 @@ def z_vllezf32 : PatFrag<(ops node:$addr),
                              (bitconvert
                               (v4f32 (scalar_to_vector
                                       (f32 (load node:$addr)))))))),
-                          (v2i64 (z_vzero)))>;
+                          (v2i64
+                           (bitconvert (v4f32 immAllZerosV))))>;
 def z_vllezf64 : PatFrag<(ops node:$addr),
                          (z_merge_high
                           (v2f64 (scalar_to_vector (f64 (load node:$addr)))),
-                          (z_vzero))>;
+                          immAllZerosV)>;
 
 // Similarly for the high element of a zeroed vector.
 def z_vllezli32 : z_vllez<i32, load, 0>;
@@ -774,8 +808,21 @@ def z_vllezlf32 : PatFrag<(ops node:$addr),
                              (z_merge_high
                               (v4f32 (scalar_to_vector
                                       (f32 (load node:$addr)))),
-                              (v4f32 (z_vzero))))),
-                           (v2i64 (z_vzero)))>;
+                              (v4f32 immAllZerosV)))),
+                           (v2i64
+                            (bitconvert (v4f32 immAllZerosV))))>;
+
+// Byte-swapped variants.
+def z_vllebrzi16  : z_vllez<i32, z_loadbswap16, 3>;
+def z_vllebrzi32  : z_vllez<i32, z_loadbswap32, 1>;
+def z_vllebrzli32 : z_vllez<i32, z_loadbswap32, 0>;
+def z_vllebrzi64  : PatFrags<(ops node:$addr),
+                             [(z_vector_insert immAllZerosV,
+                                               (i64 (z_loadbswap64 node:$addr)),
+                                               (i32 0)),
+                              (z_join_dwords (i64 (z_loadbswap64 node:$addr)),
+                                             (i64 0))]>;
+
 
 // Store one element of a vector.
 class z_vste<ValueType scalartype, SDPatternOperator store>
@@ -788,18 +835,22 @@ def z_vstei32 : z_vste<i32, store>;
 def z_vstei64 : z_vste<i64, store>;
 def z_vstef32 : z_vste<f32, store>;
 def z_vstef64 : z_vste<f64, store>;
+// Byte-swapped vector element stores.
+def z_vstebri16 : z_vste<i32, z_storebswap16>;
+def z_vstebri32 : z_vste<i32, z_storebswap32>;
+def z_vstebri64 : z_vste<i64, z_storebswap64>;
 
 // Arithmetic negation on vectors.
-def z_vneg : PatFrag<(ops node:$x), (sub (z_vzero), node:$x)>;
+def z_vneg : PatFrag<(ops node:$x), (sub immAllZerosV, node:$x)>;
 
 // Bitwise negation on vectors.
-def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (z_vones))>;
+def z_vnot : PatFrag<(ops node:$x), (xor node:$x, immAllOnesV)>;
 
 // Signed "integer greater than zero" on vectors.
-def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (z_vzero))>;
+def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, immAllZerosV)>;
 
 // Signed "integer less than zero" on vectors.
-def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (z_vzero), node:$x)>;
+def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph immAllZerosV, node:$x)>;
 
 // Integer absolute on vectors.
 class z_viabs<int shift>
diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td
index 152521fb66a8..beaf4de285a3 100644
--- a/lib/Target/SystemZ/SystemZPatterns.td
+++ b/lib/Target/SystemZ/SystemZPatterns.td
@@ -1,9 +1,8 @@
 //===-- SystemZPatterns.td - SystemZ-specific pattern rules ---*- tblgen-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/SystemZ/SystemZPostRewrite.cpp b/lib/Target/SystemZ/SystemZPostRewrite.cpp
new file mode 100644
index 000000000000..8e4060eac74c
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZPostRewrite.cpp
@@ -0,0 +1,124 @@
+//==---- SystemZPostRewrite.cpp - Select pseudos after RegAlloc ---*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that is run immediately after VirtRegRewriter
+// but before MachineCopyPropagation. The purpose is to lower pseudos to
+// target instructions before any later pass might substitute a register for
+// another.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+using namespace llvm;
+
+#define SYSTEMZ_POSTREWRITE_NAME "SystemZ Post Rewrite pass"
+
+#define DEBUG_TYPE "systemz-postrewrite"
+STATISTIC(MemFoldCopies, "Number of copies inserted before folded mem ops.");
+
+namespace llvm {
+  void initializeSystemZPostRewritePass(PassRegistry&);
+}
+
+namespace {
+
+class SystemZPostRewrite : public MachineFunctionPass {
+public:
+  static char ID;
+  SystemZPostRewrite() : MachineFunctionPass(ID) {
+    initializeSystemZPostRewritePass(*PassRegistry::getPassRegistry());
+  }
+
+  const SystemZInstrInfo *TII;
+
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+
+  StringRef getPassName() const override { return SYSTEMZ_POSTREWRITE_NAME; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+private:
+  bool selectMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                MachineBasicBlock::iterator &NextMBBI);
+  bool selectMBB(MachineBasicBlock &MBB);
+};
+
+char SystemZPostRewrite::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(SystemZPostRewrite, "systemz-post-rewrite",
+                SYSTEMZ_POSTREWRITE_NAME, false, false)
+
+/// Returns an instance of the Post Rewrite pass.
+FunctionPass *llvm::createSystemZPostRewritePass(SystemZTargetMachine &TM) {
+  return new SystemZPostRewrite();
+}
+
+/// If MBBI references a pseudo instruction that should be selected here,
+/// do it and return true.  Otherwise return false.
+bool SystemZPostRewrite::selectMI(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MBBI,
+                                MachineBasicBlock::iterator &NextMBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned Opcode = MI.getOpcode();
+
+  // Note: If this could be done during regalloc in foldMemoryOperandImpl()
+  // while also updating the LiveIntervals, there would be no need for the
+  // MemFoldPseudo to begin with.
+  int TargetMemOpcode = SystemZ::getTargetMemOpcode(Opcode);
+  if (TargetMemOpcode != -1) {
+    MI.setDesc(TII->get(TargetMemOpcode));
+    MI.tieOperands(0, 1);
+    unsigned DstReg = MI.getOperand(0).getReg();
+    MachineOperand &SrcMO = MI.getOperand(1);
+    if (DstReg != SrcMO.getReg()) {
+      BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), DstReg)
+        .addReg(SrcMO.getReg());
+      SrcMO.setReg(DstReg);
+      MemFoldCopies++;
+    }
+    return true;
+  }
+
+  return false;
+}
+
+/// Iterate over the instructions in basic block MBB and select any
+/// pseudo instructions.  Return true if anything was modified.
+bool SystemZPostRewrite::selectMBB(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+    Modified |= selectMI(MBB, MBBI, NMBBI);
+    MBBI = NMBBI;
+  }
+
+  return Modified;
+}
+
+bool SystemZPostRewrite::runOnMachineFunction(MachineFunction &MF) {
+  TII = static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+  bool Modified = false;
+  for (auto &MBB : MF)
+    Modified |= selectMBB(MBB);
+
+  return Modified;
+}
+
diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td
index 0dca4582dc0d..b27c25beb58c 100644
--- a/lib/Target/SystemZ/SystemZProcessors.td
+++ b/lib/Target/SystemZ/SystemZProcessors.td
@@ -1,9 +1,8 @@
 //===-- SystemZ.td - SystemZ processors and features ---------*- tblgen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -36,3 +35,5 @@ def : ProcessorModel<"z13", Z13Model, Arch11SupportedFeatures.List>;
 def : ProcessorModel<"arch12", Z14Model, Arch12SupportedFeatures.List>;
 def : ProcessorModel<"z14", Z14Model, Arch12SupportedFeatures.List>;
 
+def : ProcessorModel<"arch13", Arch13Model, Arch13SupportedFeatures.List>;
+
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index e9f9188048da..e7cd6871dbb4 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZRegisterInfo.cpp - SystemZ register information ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -54,6 +53,26 @@ static const TargetRegisterClass *getRC32(MachineOperand &MO,
   return RC;
 }
 
+// Pass the registers of RC as hints while making sure that if any of these
+// registers are copy hints (and therefore already in Hints), hint them
+// first.
+static void addHints(ArrayRef<MCPhysReg> Order,
+                     SmallVectorImpl<MCPhysReg> &Hints,
+                     const TargetRegisterClass *RC,
+                     const MachineRegisterInfo *MRI) {
+  SmallSet<unsigned, 4> CopyHints;
+  CopyHints.insert(Hints.begin(), Hints.end());
+  Hints.clear();
+  for (MCPhysReg Reg : Order)
+    if (CopyHints.count(Reg) &&
+        RC->contains(Reg) && !MRI->isReserved(Reg))
+      Hints.push_back(Reg);
+  for (MCPhysReg Reg : Order)
+    if (!CopyHints.count(Reg) &&
+        RC->contains(Reg) && !MRI->isReserved(Reg))
+      Hints.push_back(Reg);
+}
+
 bool
 SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
                                            ArrayRef<MCPhysReg> Order,
@@ -62,7 +81,8 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
                                            const VirtRegMap *VRM,
                                            const LiveRegMatrix *Matrix) const {
   const MachineRegisterInfo *MRI = &MF.getRegInfo();
-  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
 
   bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
       VirtReg, Order, Hints, MF, VRM, Matrix);
@@ -76,31 +96,23 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
       if (!DoneRegs.insert(Reg).second)
         continue;
 
-      for (auto &Use : MRI->use_instructions(Reg))
+      for (auto &Use : MRI->reg_instructions(Reg)) {
         // For LOCRMux, see if the other operand is already a high or low
-        // register, and in that case give the correpsonding hints for
+        // register, and in that case give the corresponding hints for
         // VirtReg. LOCR instructions need both operands in either high or
-        // low parts.
-        if (Use.getOpcode() == SystemZ::LOCRMux) {
+        // low parts. Same handling for SELRMux.
+        if (Use.getOpcode() == SystemZ::LOCRMux ||
+            Use.getOpcode() == SystemZ::SELRMux) {
           MachineOperand &TrueMO = Use.getOperand(1);
           MachineOperand &FalseMO = Use.getOperand(2);
           const TargetRegisterClass *RC =
             TRI->getCommonSubClass(getRC32(FalseMO, VRM, MRI),
                                    getRC32(TrueMO, VRM, MRI));
+          if (Use.getOpcode() == SystemZ::SELRMux)
+            RC = TRI->getCommonSubClass(RC,
+                                        getRC32(Use.getOperand(0), VRM, MRI));
           if (RC && RC != &SystemZ::GRX32BitRegClass) {
-            // Pass the registers of RC as hints while making sure that if
-            // any of these registers are copy hints, hint them first.
-            SmallSet<unsigned, 4> CopyHints;
-            CopyHints.insert(Hints.begin(), Hints.end());
-            Hints.clear();
-            for (MCPhysReg Reg : Order)
-              if (CopyHints.count(Reg) &&
-                  RC->contains(Reg) && !MRI->isReserved(Reg))
-                Hints.push_back(Reg);
-            for (MCPhysReg Reg : Order)
-              if (!CopyHints.count(Reg) &&
-                  RC->contains(Reg) && !MRI->isReserved(Reg))
-                Hints.push_back(Reg);
+            addHints(Order, Hints, RC, MRI);
             // Return true to make these hints the only regs available to
             // RA. This may mean extra spilling but since the alternative is
             // a jump sequence expansion of the LOCRMux, it is preferred.
@@ -112,10 +124,70 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
             (TrueMO.getReg() == Reg ? FalseMO.getReg() : TrueMO.getReg());
           if (MRI->getRegClass(OtherReg) == &SystemZ::GRX32BitRegClass)
             Worklist.push_back(OtherReg);
-        }
+        } // end LOCRMux
+        else if (Use.getOpcode() == SystemZ::CHIMux ||
+                 Use.getOpcode() == SystemZ::CFIMux) {
+          if (Use.getOperand(1).getImm() == 0) {
+            bool OnlyLMuxes = true;
+            for (MachineInstr &DefMI : MRI->def_instructions(VirtReg))
+              if (DefMI.getOpcode() != SystemZ::LMux)
+                OnlyLMuxes = false;
+            if (OnlyLMuxes) {
+              addHints(Order, Hints, &SystemZ::GR32BitRegClass, MRI);
+              // Return false to make these hints preferred but not obligatory.
+              return false;
+            }
+          }
+        } // end CHIMux / CFIMux
+      }
     }
   }
 
+  if (VRM == nullptr)
+    return BaseImplRetVal;
+
+  // Add any two address hints after any copy hints.
+  SmallSet<unsigned, 4> TwoAddrHints;
+  for (auto &Use : MRI->reg_nodbg_instructions(VirtReg))
+    if (SystemZ::getTwoOperandOpcode(Use.getOpcode()) != -1) {
+      const MachineOperand *VRRegMO = nullptr;
+      const MachineOperand *OtherMO = nullptr;
+      const MachineOperand *CommuMO = nullptr;
+      if (VirtReg == Use.getOperand(0).getReg()) {
+        VRRegMO = &Use.getOperand(0);
+        OtherMO = &Use.getOperand(1);
+        if (Use.isCommutable())
+          CommuMO = &Use.getOperand(2);
+      } else if (VirtReg == Use.getOperand(1).getReg()) {
+        VRRegMO = &Use.getOperand(1);
+        OtherMO = &Use.getOperand(0);
+      } else if (VirtReg == Use.getOperand(2).getReg() && Use.isCommutable()) {
+        VRRegMO = &Use.getOperand(2);
+        OtherMO = &Use.getOperand(0);
+      } else
+        continue;
+
+      auto tryAddHint = [&](const MachineOperand *MO) -> void {
+        Register Reg = MO->getReg();
+        Register PhysReg = isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg);
+        if (PhysReg) {
+          if (MO->getSubReg())
+            PhysReg = getSubReg(PhysReg, MO->getSubReg());
+          if (VRRegMO->getSubReg())
+            PhysReg = getMatchingSuperReg(PhysReg, VRRegMO->getSubReg(),
+                                          MRI->getRegClass(VirtReg));
+          if (!MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg))
+            TwoAddrHints.insert(PhysReg);
+        }
+      };
+      tryAddHint(OtherMO);
+      if (CommuMO)
+        tryAddHint(CommuMO);
+    }
+  for (MCPhysReg OrderReg : Order)
+    if (TwoAddrHints.count(OrderReg))
+      Hints.push_back(OrderReg);
+
   return BaseImplRetVal;
 }
 
@@ -169,6 +241,9 @@ SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(SystemZ::A0);
   Reserved.set(SystemZ::A1);
 
+  // FPC is the floating-point control register.
+  Reserved.set(SystemZ::FPC);
+
   return Reserved;
 }
 
@@ -328,7 +403,7 @@ bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI,
   return true;
 }
 
-unsigned
+Register
 SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const SystemZFrameLowering *TFI = getFrameLowering(MF);
   return TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index 9fd2e4ae4f00..4f721ec23e53 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -1,9 +1,8 @@
 //===-- SystemZRegisterInfo.h - SystemZ register information ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -84,7 +83,7 @@ public:
                       const TargetRegisterClass *NewRC,
                       LiveIntervals &LIS) const override;
 
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
+  Register getFrameRegister(const MachineFunction &MF) const override;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
index cea88c088b86..3567b0f3acf8 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -1,9 +1,8 @@
 //==- SystemZRegisterInfo.td - SystemZ register definitions -*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -296,6 +295,13 @@ def CC : SystemZReg<"cc">;
 let isAllocatable = 0, CopyCost = -1 in
   def CCR : RegisterClass<"SystemZ", [i32], 32, (add CC)>;
 
+// The floating-point control register.
+// Note: We only model the current rounding modes and the IEEE masks.
+// IEEE flags and DXC are not modeled here.
+def FPC : SystemZReg<"fpc">;
+let isAllocatable = 0 in
+  def FPCRegs : RegisterClass<"SystemZ", [i32], 32, (add FPC)>;
+
 // Access registers.
 class ACR32<bits<16> num, string n> : SystemZReg<n> {
   let HWEncoding = num;
diff --git a/lib/Target/SystemZ/SystemZSchedule.td b/lib/Target/SystemZ/SystemZSchedule.td
index 83bf97e6841a..98eca2802242 100644
--- a/lib/Target/SystemZ/SystemZSchedule.td
+++ b/lib/Target/SystemZ/SystemZSchedule.td
@@ -1,9 +1,8 @@
 //==-- SystemZSchedule.td - SystemZ Scheduling Definitions ----*- tblgen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -60,6 +59,7 @@ def VBU : SchedWrite; // Virtual branching unit
 
 def MCD : SchedWrite; // Millicode
 
+include "SystemZScheduleArch13.td"
 include "SystemZScheduleZ14.td"
 include "SystemZScheduleZ13.td"
 include "SystemZScheduleZEC12.td"
diff --git a/lib/Target/SystemZ/SystemZScheduleArch13.td b/lib/Target/SystemZ/SystemZScheduleArch13.td
new file mode 100644
index 000000000000..9f82f24d0e8f
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZScheduleArch13.td
@@ -0,0 +1,1695 @@
+//-- SystemZScheduleArch13.td - SystemZ Scheduling Definitions ----*- tblgen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Arch13 to support instruction
+// scheduling and other instruction cost heuristics.
+//
+// Pseudos expanded right after isel do not need to be modelled here.
+//
+//===----------------------------------------------------------------------===//
+
+def Arch13Model : SchedMachineModel {
+
+    let UnsupportedFeatures = Arch13UnsupportedFeatures.List;
+
+    let IssueWidth = 6;             // Number of instructions decoded per cycle.
+    let MicroOpBufferSize = 60;     // Issue queues
+    let LoadLatency = 1;            // Optimistic load latency.
+
+    let PostRAScheduler = 1;
+
+    // Extra cycles for a mispredicted branch.
+    let MispredictPenalty = 20;
+}
+
+let SchedModel = Arch13Model in  {
+// These definitions need the SchedModel value. They could be put in a
+// subtarget common include file, but it seems the include system in Tablegen
+// currently (2016) rejects multiple includes of same file.
+
+// Decoder grouping rules
+let NumMicroOps = 1 in {
+  def : WriteRes<NormalGr, []>;
+  def : WriteRes<BeginGroup, []> { let BeginGroup  = 1; }
+  def : WriteRes<EndGroup, []>   { let EndGroup    = 1; }
+}
+def : WriteRes<Cracked, []> {
+  let NumMicroOps = 2;
+  let BeginGroup  = 1;
+}
+def : WriteRes<GroupAlone, []> {
+  let NumMicroOps = 3;
+  let BeginGroup  = 1;
+  let EndGroup    = 1;
+}
+def : WriteRes<GroupAlone2, []> {
+  let NumMicroOps = 6;
+  let BeginGroup  = 1;
+  let EndGroup    = 1;
+}
+def : WriteRes<GroupAlone3, []> {
+  let NumMicroOps = 9;
+  let BeginGroup  = 1;
+  let EndGroup    = 1;
+}
+
+// Incoming latency removed from the register operand which is used together
+// with a memory operand by the instruction.
+def : ReadAdvance<RegReadAdv, 4>;
+
+// LoadLatency (above) is not used for instructions in this file. This is
+// instead the role of LSULatency, which is the latency value added to the
+// result of loads and instructions with folded memory operands.
+def : WriteRes<LSULatency, []> { let Latency = 4; let NumMicroOps = 0; }
+
+let NumMicroOps = 0 in {
+  foreach L = 1-30 in
+    def : WriteRes<!cast<SchedWrite>("WLat"#L), []> { let Latency = L; }
+}
+
+// Execution units.
+def Arch13_FXaUnit     : ProcResource<2>;
+def Arch13_FXbUnit     : ProcResource<2>;
+def Arch13_LSUnit      : ProcResource<2>;
+def Arch13_VecUnit     : ProcResource<2>;
+def Arch13_VecFPdUnit  : ProcResource<2> { let BufferSize = 1; /* blocking */ }
+def Arch13_VBUnit      : ProcResource<2>;
+def Arch13_MCD         : ProcResource<1>;
+
+// Subtarget specific definitions of scheduling resources.
+let NumMicroOps = 0 in {
+  def : WriteRes<FXa, [Arch13_FXaUnit]>;
+  def : WriteRes<FXb, [Arch13_FXbUnit]>;
+  def : WriteRes<LSU, [Arch13_LSUnit]>;
+  def : WriteRes<VecBF,  [Arch13_VecUnit]>;
+  def : WriteRes<VecDF,  [Arch13_VecUnit]>;
+  def : WriteRes<VecDFX, [Arch13_VecUnit]>;
+  def : WriteRes<VecMul,  [Arch13_VecUnit]>;
+  def : WriteRes<VecStr,  [Arch13_VecUnit]>;
+  def : WriteRes<VecXsPm, [Arch13_VecUnit]>;
+  foreach Num = 2-5 in { let ResourceCycles = [Num] in {
+    def : WriteRes<!cast<SchedWrite>("FXa"#Num), [Arch13_FXaUnit]>;
+    def : WriteRes<!cast<SchedWrite>("FXb"#Num), [Arch13_FXbUnit]>;
+    def : WriteRes<!cast<SchedWrite>("LSU"#Num), [Arch13_LSUnit]>;
+    def : WriteRes<!cast<SchedWrite>("VecBF"#Num), [Arch13_VecUnit]>;
+    def : WriteRes<!cast<SchedWrite>("VecDF"#Num), [Arch13_VecUnit]>;
+    def : WriteRes<!cast<SchedWrite>("VecDFX"#Num), [Arch13_VecUnit]>;
+    def : WriteRes<!cast<SchedWrite>("VecMul"#Num), [Arch13_VecUnit]>;
+    def : WriteRes<!cast<SchedWrite>("VecStr"#Num), [Arch13_VecUnit]>;
+    def : WriteRes<!cast<SchedWrite>("VecXsPm"#Num), [Arch13_VecUnit]>;
+  }}
+
+  def : WriteRes<VecFPd,  [Arch13_VecFPdUnit]> { let ResourceCycles = [30]; }
+
+  def : WriteRes<VBU,     [Arch13_VBUnit]>; // Virtual Branching Unit
+}
+
+def : WriteRes<MCD, [Arch13_MCD]> { let NumMicroOps = 3;
+                                    let BeginGroup  = 1;
+                                    let EndGroup    = 1; }
+
+// -------------------------- INSTRUCTIONS ---------------------------------- //
+
+// InstRW constructs have been used in order to preserve the
+// readability of the InstrInfo files.
+
+// For each instruction, as matched by a regexp, provide a list of
+// resources that it needs. These will be combined into a SchedClass.
+
+//===----------------------------------------------------------------------===//
+// Stack allocation
+//===----------------------------------------------------------------------===//
+
+// Pseudo -> LA / LAY
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ADJDYNALLOC$")>;
+
+//===----------------------------------------------------------------------===//
+// Branch instructions
+//===----------------------------------------------------------------------===//
+
+// Branch
+def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?BRC(L)?(Asm.*)?$")>;
+def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?J(G)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?BC(R)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "BI(C)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>;
+def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>;
+def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>;
+def : InstRW<[WLat1, FXa2, FXb2, GroupAlone2],
+             (instregex "B(R)?X(H|L).*$")>;
+
+// Compare and branch
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb2, GroupAlone],
+             (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Trap instructions
+//===----------------------------------------------------------------------===//
+
+// Trap
+def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Cond)?Trap$")>;
+
+// Compare and trap
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?(I|R)T(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(G)?RT(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Call and return instructions
+//===----------------------------------------------------------------------===//
+
+// Call
+def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
+
+// Return
+def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn$")>;
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Moves
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MV(G|H)?HI$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MVI(Y)?$")>;
+
+// Move character
+def : InstRW<[WLat1, FXb, LSU3, GroupAlone], (instregex "MVC$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>;
+def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "MVCRL$")>;
+
+// Pseudo -> reg move
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "EXTRACT_SUBREG$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "INSERT_SUBREG$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "REG_SEQUENCE$")>;
+
+// Loads
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(Y|FH|RL|Mux)?$")>;
+def : InstRW<[LSULatency, LSULatency, LSU, NormalGr], (instregex "LCBB$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LG(RL)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L128$")>;
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIH(F|H|L)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIL(F|H|L)$")>;
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(F|H)I$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LHI(Mux)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR(Mux)?$")>;
+
+// Load and zero rightmost byte
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LZR(F|G)$")>;
+
+// Load and trap
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "L(FH|G)?AT$")>;
+
+// Load and test
+def : InstRW<[WLat1LSU, WLat1LSU, LSU, FXa, NormalGr], (instregex "LT(G)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LT(G)?R$")>;
+
+// Stores
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STG(RL)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST128$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(Y|FH|RL|Mux)?$")>;
+
+// String moves.
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVST$")>;
+
+//===----------------------------------------------------------------------===//
+// Conditional move instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOCRMux$")>;
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|FH)?R(Asm.*)?$")>;
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>;
+def : InstRW<[WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr],
+             (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>;
+
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "SELRMux$")>;
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "SEL(G|FH)?R(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Sign extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "L(B|H|G)R$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(B|H|F)R$")>;
+
+def : InstRW<[WLat1LSU, WLat1LSU, FXa, LSU, NormalGr], (instregex "LTGF$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LTGFR$")>;
+
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LB(H|Mux)?$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(Y)?$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(H|Mux|RL)$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(B|H|F)$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(H|F)RL$")>;
+
+//===----------------------------------------------------------------------===//
+// Zero extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLCR(Mux)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLHR(Mux)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLG(C|H|F|T)R$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLC(Mux)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLH(Mux)?$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LL(C|H)H$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLHRL$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLG(C|H|F|T|HRL|FRL)$")>;
+
+// Load and zero rightmost byte
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLZRGF$")>;
+
+// Load and trap
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "LLG(F|T)?AT$")>;
+
+//===----------------------------------------------------------------------===//
+// Truncations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STC(H|Y|Mux)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STH(H|Y|RL|Mux)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STCM(H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Multi-register moves
+//===----------------------------------------------------------------------===//
+
+// Load multiple (estimated average of 5 ops)
+def : InstRW<[WLat10, WLat10, LSU5, GroupAlone], (instregex "LM(H|Y|G)?$")>;
+
+// Load multiple disjoint
+def : InstRW<[WLat30, WLat30, MCD], (instregex "LMD$")>;
+
+// Store multiple
+def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "STM(G|H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Byte swaps
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LRV(G)?R$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LRV(G|H)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STRV(G|H)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "MVCIN$")>;
+
+//===----------------------------------------------------------------------===//
+// Load address instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LA(Y|RL)?$")>;
+
+// Load the Global Offset Table address ( -> larl )
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "GOT$")>;
+
+//===----------------------------------------------------------------------===//
+// Absolute and Negation
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LP(G)?R$")>;
+def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "L(N|P)GFR$")>;
+def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LN(R|GR)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LC(R|GR)$")>;
+def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "LCGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Insertion
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "IC(Y)?$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "IC32(Y)?$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, WLat1LSU, FXa, LSU, NormalGr],
+             (instregex "ICM(H|Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "II(F|H|L)Mux$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILL(64)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Addition
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "A(Y)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "AH(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AIH$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AFI(Mux)?$")>;
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "AG$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGFI$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGHI(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHI(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHIMux(K)?$")>;
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "AL(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AL(FI|HSIK)$")>;
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "ALG(F)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGHSIK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGF(I|R)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>;
+def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>;
+
+// Logical addition with carry
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone],
+             (instregex "ALC(G)?$")>;
+def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "ALC(G)?R$")>;
+
+// Add with sign extension (16/32 -> 64)
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "AG(F|H)$")>;
+def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "AGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Subtraction
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "S(G|Y)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "SH(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLFI$")>;
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "SL(G|GF|Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGF(I|R)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>;
+def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>;
+
+// Subtraction with borrow
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone],
+             (instregex "SLB(G)?$")>;
+def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "SLB(G)?R$")>;
+
+// Subtraction with sign extension (16/32 -> 64)
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "SG(F|H)$")>;
+def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "SGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// AND
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "N(G|Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NI(FMux|HMux|LMux)$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "NI(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NR(K)?$")>;
+def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "NC$")>;
+
+//===----------------------------------------------------------------------===//
+// OR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "O(G|Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OGR(K)?$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "OI(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OI(FMux|HMux|LMux)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OR(K)?$")>;
+def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "OC$")>;
+
+//===----------------------------------------------------------------------===//
+// XOR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "X(G|Y)?$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "XI(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIFMux$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIHF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XILF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XR(K)?$")>;
+def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "XC$")>;
+
+//===----------------------------------------------------------------------===//
+// Combined logical operations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NC(G)?RK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OC(G)?RK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NN(G)?RK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NO(G)?RK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NX(G)?RK$")>;
+
+//===----------------------------------------------------------------------===//
+// Multiplication
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat5LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "MS(GF|Y)?$")>;
+def : InstRW<[WLat5, FXa, NormalGr], (instregex "MS(R|FI)$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MSG$")>;
+def : InstRW<[WLat7, FXa, NormalGr], (instregex "MSGR$")>;
+def : InstRW<[WLat5, FXa, NormalGr], (instregex "MSGF(I|R)$")>;
+def : InstRW<[WLat8LSU, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MLG$")>;
+def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MLGR$")>;
+def : InstRW<[WLat4, FXa, NormalGr], (instregex "MGHI$")>;
+def : InstRW<[WLat4, FXa, NormalGr], (instregex "MHI$")>;
+def : InstRW<[WLat4LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MH(Y)?$")>;
+def : InstRW<[WLat6, FXa2, GroupAlone], (instregex "M(L)?R$")>;
+def : InstRW<[WLat6LSU, RegReadAdv, FXa2, LSU, GroupAlone],
+             (instregex "M(FY|L)?$")>;
+def : InstRW<[WLat8, RegReadAdv, FXa, LSU, NormalGr], (instregex "MGH$")>;
+def : InstRW<[WLat12, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MG$")>;
+def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MGRK$")>;
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "MSC$")>;
+def : InstRW<[WLat8LSU, WLat8LSU, RegReadAdv, FXa, LSU, NormalGr],
+             (instregex "MSGC$")>;
+def : InstRW<[WLat6, WLat6, FXa, NormalGr], (instregex "MSRKC$")>;
+def : InstRW<[WLat8, WLat8, FXa, NormalGr], (instregex "MSGRKC$")>;
+
+//===----------------------------------------------------------------------===//
+// Division and remainder
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>;
+def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2],
+             (instregex "DSG(F)?$")>;
+def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>;
+def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2],
+             (instregex "DL(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Shifts
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>;
+def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2],
+             (instregex "S(L|R)D(A|L)$")>;
+
+// Rotate
+def : InstRW<[WLat2LSU, FXa, LSU, NormalGr], (instregex "RLL(G)?$")>;
+
+// Rotate and insert
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBG(N|32)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBH(G|H|L)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBL(G|H|L)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBMux$")>;
+
+// Rotate and Select
+def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "R(N|O|X)SBG$")>;
+
+//===----------------------------------------------------------------------===//
+// Comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr],
+             (instregex "C(G|Y|Mux)?$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CRL$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(F|H)I(Mux)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CG(F|H)I$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CG(HSI|RL)$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?R$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CIH$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CHF$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CHSI$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr],
+             (instregex "CL(Y|Mux)?$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLFHSI$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLFI(Mux)?$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLG$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLG(HRL|HSI)$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLGF$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGFRL$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGF(I|R)$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGR$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGRL$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLHF$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLH(RL|HSI)$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLIH$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLI(Y)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLR$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>;
+def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>;
+
+// Compare halfword
+def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CHRL$")>;
+def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGH$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGHRL$")>;
+def : InstRW<[WLat2LSU, FXa, FXb, LSU, Cracked], (instregex "CHHSI$")>;
+
+// Compare with sign extension (32 -> 64)
+def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGF$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGFRL$")>;
+def : InstRW<[WLat2, FXb, NormalGr], (instregex "CGFR$")>;
+
+// Compare logical character
+def : InstRW<[WLat6, FXb, LSU2, Cracked], (instregex "CLC$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLCL(E|U)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLST$")>;
+
+// Test under mask
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "TM(Y)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TM(H|L)Mux$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHH(64)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHL(64)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLH(64)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLL(64)?$")>;
+
+// Compare logical characters under mask
+def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr],
+             (instregex "CLM(H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Prefetch and execution hint
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, LSU, NormalGr], (instregex "PFD(RL)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "BPP$")>;
+def : InstRW<[FXb, EndGroup], (instregex "BPRP$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "NIAI$")>;
+
+//===----------------------------------------------------------------------===//
+// Atomic operations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, EndGroup], (instregex "Serialize$")>;
+
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAA(G)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAAL(G)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAN(G)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAO(G)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAX(G)?$")>;
+
+// Test and set
+def : InstRW<[WLat2LSU, FXb, LSU, EndGroup], (instregex "TS$")>;
+
+// Compare and swap
+def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone],
+             (instregex "CS(G|Y)?$")>;
+
+// Compare double and swap
+def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2],
+             (instregex "CDS(Y)?$")>;
+def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3,
+              GroupAlone3], (instregex "CDSG$")>;
+
+// Compare and swap and store
+def : InstRW<[WLat30, MCD], (instregex "CSST$")>;
+
+// Perform locked operation
+def : InstRW<[WLat30, MCD], (instregex "PLO$")>;
+
+// Load/store pair from/to quadword
+def : InstRW<[WLat4LSU, LSU2, GroupAlone], (instregex "LPQ$")>;
+def : InstRW<[WLat1, FXb2, LSU, GroupAlone], (instregex "STPQ$")>;
+
+// Load pair disjoint
+def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Translate and convert
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>;
+def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2],
+             (instregex "TRT$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRT(R)?E(Opt)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TR(T|O)(T|O)(Opt)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD],
+             (instregex "CU(12|14|21|24|41|42)(Opt)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(CUUTF|CUTFU)(Opt)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Message-security assist
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD],
+             (instregex "KM(C|F|O|CTR|A)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD],
+             (instregex "(KIMD|KLMD|KMAC|KDSA)$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD],
+             (instregex "(PCC|PPNO|PRNO)$")>;
+
+//===----------------------------------------------------------------------===//
+// Guarded storage
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LGG$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLGFSG$")>;
+def : InstRW<[WLat30, MCD], (instregex "(L|ST)GSC$")>;
+
+//===----------------------------------------------------------------------===//
+// Decimal arithmetic
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat20, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2],
+             (instregex "CVBG$")>;
+def : InstRW<[WLat20, RegReadAdv, FXb, VecDF, LSU, GroupAlone2],
+             (instregex "CVB(Y)?$")>;
+def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>;
+def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>;
+def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>;
+def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
+def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>;
+def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>;
+
+def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2],
+             (instregex "(A|S|ZA)P$")>;
+def : InstRW<[WLat1, FXb, VecDFX2, LSU3, GroupAlone2], (instregex "MP$")>;
+def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "DP$")>;
+def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>;
+def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>;
+def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>;
+def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Access registers
+//===----------------------------------------------------------------------===//
+
+// Extract/set/copy access register
+def : InstRW<[WLat3, LSU, NormalGr], (instregex "(EAR|SAR|CPYA)$")>;
+
+// Load address extended
+def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>;
+
+// Load/store access multiple (not modeled precisely)
+def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Program mask and addressing mode
+//===----------------------------------------------------------------------===//
+
+// Insert Program Mask
+def : InstRW<[WLat3, FXa, EndGroup], (instregex "IPM$")>;
+
+// Set Program Mask
+def : InstRW<[WLat3, LSU, EndGroup], (instregex "SPM$")>;
+
+// Branch and link
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BAL(R)?$")>;
+
+// Test addressing mode
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TAM$")>;
+
+// Set addressing mode
+def : InstRW<[WLat1, FXb, EndGroup], (instregex "SAM(24|31|64)$")>;
+
+// Branch (and save) and set mode.
+def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BSM$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>;
+
+//===----------------------------------------------------------------------===//
+// Transactional execution
+//===----------------------------------------------------------------------===//
+
+// Transaction begin
+def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>;
+
+// Transaction end
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>;
+
+// Transaction abort
+def : InstRW<[WLat30, MCD], (instregex "TABORT$")>;
+
+// Extract Transaction Nesting Depth
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ETND$")>;
+
+// Nontransactional store
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "NTSTG$")>;
+
+//===----------------------------------------------------------------------===//
+// Processor assist
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "PPA$")>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//===----------------------------------------------------------------------===//
+
+// Find leftmost one
+def : InstRW<[WLat5, WLat5, FXa2, GroupAlone], (instregex "FLOGR$")>;
+
+// Population count
+def : InstRW<[WLat3, WLat3, FXa, NormalGr], (instregex "POPCNT(Opt)?$")>;
+
+// String instructions
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "SRST(U)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CUSE$")>;
+
+// Various complex instructions
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CFC$")>;
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, WLat30, WLat30, MCD],
+             (instregex "UPT$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CKSM$")>;
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CMPSC$")>;
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "SORTL$")>;
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "DFLTCC$")>;
+
+// Execute
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "EX(RL)?$")>;
+
+//===----------------------------------------------------------------------===//
+// .insn directive instructions
+//===----------------------------------------------------------------------===//
+
+// An "empty" sched-class will be assigned instead of the "invalid sched-class".
+// getNumDecoderSlots() will then return 1 instead of 0.
+def : InstRW<[], (instregex "Insn.*")>;
+
+
+// ----------------------------- Floating point ----------------------------- //
+
+//===----------------------------------------------------------------------===//
+// FP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load zero
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER)$")>;
+def : InstRW<[WLat2, FXb2, Cracked], (instregex "LZXR$")>;
+
+// Load
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R32|GR)$")>;
+def : InstRW<[WLat3, FXb, NormalGr], (instregex "LGDR$")>;
+def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>;
+
+// Load and Test
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone],
+             (instregex "LTXBR(Compare)?$")>;
+
+// Copy sign
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Load instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "LE(Y)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LD(Y|E32)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Store instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEDBR(A)?$")>;
+def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "L(E|D)XBR(A)?$")>;
+
+// Load lengthened
+def : InstRW<[WLat6LSU, VecBF, LSU, NormalGr], (instregex "LDEB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LDEBR$")>;
+def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)B$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>;
+def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked],
+             (instregex "C(F|G)(E|D)BR(A)?$")>;
+def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked],
+             (instregex "C(F|G)XBR(A)?$")>;
+def : InstRW<[WLat9, WLat9, FXb, VecBF, GroupAlone], (instregex "CLFEBR$")>;
+def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLFDBR$")>;
+def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLG(E|D)BR$")>;
+def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "CL(F|G)XBR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XBR$")>;
+
+// Square root
+def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)B$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQ(E|D)BR$")>;
+def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXBR$")>;
+
+// Load FP integer
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)BR(A)?$")>;
+def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXBR(A)?$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+             (instregex "A(E|D)B$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D)BR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXBR$")>;
+
+// Subtraction
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+             (instregex "S(E|D)B$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D)BR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXBR$")>;
+
+// Multiply
+def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+             (instregex "M(D|DE|EE)B$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|EE)BR$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone],
+             (instregex "MXDB$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDBR$")>;
+def : InstRW<[WLat15, VecDF4, GroupAlone], (instregex "MXBR$")>;
+
+// Multiply and add / subtract
+def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone],
+             (instregex "M(A|S)EB$")>;
+def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)EBR$")>;
+def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone],
+             (instregex "M(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(A|S)DBR$")>;
+
+// Division
+def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr],
+             (instregex "D(E|D)B$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "D(E|D)BR$")>;
+def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXBR$")>;
+
+// Divide to integer
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "DI(E|D)BR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[WLat3LSU, RegReadAdv, VecXsPm, LSU, NormalGr],
+             (instregex "(K|C)(E|D)B$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "(K|C)(E|D)BR$")>;
+def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>;
+
+// Test Data Class
+def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>;
+def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Floating-point control register instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat4, FXa, LSU, GroupAlone], (instregex "EFPC$")>;
+def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "STFPC$")>;
+def : InstRW<[WLat3, LSU, GroupAlone], (instregex "SFPC$")>;
+def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LFPC$")>;
+def : InstRW<[WLat30, MCD], (instregex "SFASR$")>;
+def : InstRW<[WLat30, MCD], (instregex "LFAS$")>;
+def : InstRW<[WLat3, FXb, GroupAlone], (instregex "SRNM(B|T)?$")>;
+
+
+// --------------------- Hexadecimal floating point ------------------------- //
+
+//===----------------------------------------------------------------------===//
+// HFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)R$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "(LEDR|LRER)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEXR$")>;
+def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "(LDXR|LRDR)$")>;
+
+// Load lengthened
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LDE$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LDER$")>;
+def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>;
+
+// Convert from fixed
+def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>;
+
+// Convert to fixed
+def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>;
+def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "C(F|G)XR$")>;
+
+// Convert BFP to HFP / HFP to BFP.
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "THD(E)?R$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "TB(E)?DR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)R$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XR$")>;
+
+// Halve
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "H(E|D)R$")>;
+
+// Square root
+def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQ(E|D)R$")>;
+def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXR$")>;
+
+// Load FP integer
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)R$")>;
+def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+             (instregex "A(E|D|U|W)$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D|U|W)R$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXR$")>;
+
+// Subtraction
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+             (instregex "S(E|D|U|W)$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D|U|W)R$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXR$")>;
+
+// Multiply
+def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+             (instregex "M(D|DE|E|EE)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|E|EE)R$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone],
+             (instregex "MXD$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDR$")>;
+def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXR$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], (instregex "MY$")>;
+def : InstRW<[WLat6LSU, RegReadAdv, VecBF2, LSU, GroupAlone],
+             (instregex "MY(H|L)$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MYR$")>;
+def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MY(H|L)R$")>;
+
+// Multiply and add / subtract
+def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone],
+             (instregex "M(A|S)(E|D)$")>;
+def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)(E|D)R$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF4, LSU, GroupAlone],
+             (instregex "MAY$")>;
+def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone],
+             (instregex "MAY(H|L)$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MAYR$")>;
+def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MAY(H|L)R$")>;
+
+// Division
+def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], (instregex "D(E|D)$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "D(E|D)R$")>;
+def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+             (instregex "C(E|D)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "C(E|D)R$")>;
+def : InstRW<[WLat10, VecDF2, GroupAlone], (instregex "CXR$")>;
+
+
+// ------------------------ Decimal floating point -------------------------- //
+
+//===----------------------------------------------------------------------===//
+// DFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "LTDTR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[WLat15, VecDF, NormalGr], (instregex "LEDTR$")>;
+def : InstRW<[WLat15, VecDF2, NormalGr], (instregex "LDXTR$")>;
+
+// Load lengthened
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "LDETR$")>;
+def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDFTR(A)?$")>;
+def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDGTR(A)?$")>;
+def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXFTR(A)?$")>;
+def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXGTR(A)?$")>;
+def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDLFTR$")>;
+def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDLGTR$")>;
+def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXLFTR$")>;
+def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXLGTR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked],
+             (instregex "C(F|G)DTR(A)?$")>;
+def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked],
+             (instregex "C(F|G)XTR(A)?$")>;
+def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked], (instregex "CL(F|G)DTR$")>;
+def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>;
+
+// Convert from / to signed / unsigned packed
+def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>;
+def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>;
+def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>;
+def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>;
+
+// Convert from / to zoned
+def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>;
+def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>;
+def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>;
+
+// Convert from / to packed
+def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>;
+def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>;
+def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>;
+
+// Perform floating-point operation
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load FP integer
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "FIDTR$")>;
+def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXTR$")>;
+
+// Extract biased exponent
+def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEDTR$")>;
+def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEXTR$")>;
+
+// Extract significance
+def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "ESDTR$")>;
+def : InstRW<[WLat12, FXb, VecDF2, Cracked], (instregex "ESXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "ADTR(A)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXTR(A)?$")>;
+
+// Subtraction
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "SDTR(A)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXTR(A)?$")>;
+
+// Multiply
+def : InstRW<[WLat30, VecDF, NormalGr], (instregex "MDTR(A)?$")>;
+def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXTR(A)?$")>;
+
+// Division
+def : InstRW<[WLat30, VecDF, NormalGr], (instregex "DDTR(A)?$")>;
+def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "DXTR(A)?$")>;
+
+// Quantize
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "QADTR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>;
+
+// Reround
+def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>;
+def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>;
+
+// Shift significand left/right
+def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>;
+def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>;
+
+// Insert biased exponent
+def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "(K|C)DTR$")>;
+def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XTR$")>;
+
+// Compare biased exponent
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEDTR$")>;
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEXTR$")>;
+
+// Test Data Class/Group
+def : InstRW<[WLat15, LSU, VecDF, NormalGr], (instregex "TD(C|G)(E|D)T$")>;
+def : InstRW<[WLat15, LSU, VecDF2, GroupAlone], (instregex "TD(C|G)XT$")>;
+
+
+// --------------------------------- Vector --------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// Vector: Move instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLR(32|64)?$")>;
+def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLGV(B|F|G|H)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLVG(B|F|G|H)?$")>;
+def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLVGP(32)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Immediate instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VZERO$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VONE$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGBM$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGM(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREPI(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Loads
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>;
+def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr],
+             (instregex "VLE(B|F|G|H)$")>;
+def : InstRW<[WLat5LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked],
+             (instregex "VGE(F|G)$")>;
+def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone],
+             (instregex "VLM(Align)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Stores
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>;
+def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>;
+def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>;
+def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTRL(R)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Byte swaps
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBR(H|F|G|Q)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLER(H|F|G)?$")>;
+def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr],
+             (instregex "VLEBR(H|F|G)$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEBRZ(H|F|G|E)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBRREP(H|F|G)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTBR(H|F|G|Q)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTER(H|F|G)?$")>;
+def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTEBRH$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTEBR(F|G)$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Selects and permutes
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRH(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPERM$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPDI$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VBPERM$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREP(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEL$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Widening and narrowing
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPK(F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)S$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)S$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEG(B|F|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPH(B|F|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPL(B|F)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLH(B|F|H|W)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLL(B|F|H)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Integer arithmetic
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VA(B|F|G|H|Q|C|CQ)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VACC(B|F|G|H|Q|C|CQ)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVG(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVGL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VN(C|O|N|X)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VO(C)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VCKSM$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCLZ(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCTZ(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VX$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFMA(B|F|G|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM(B|F|G|H)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLC(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLP(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMX(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMXL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMN(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMNL(B|F|G|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAL(B|F)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALE(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALH(B|F|H|W)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALO(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAO(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAE(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAH(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VME(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMH(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VML(B|F)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLE(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLH(B|F|H|W)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLO(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMO(B|F|H)?$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VMSL(G)?$")>;
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPOPCT(B|F|G|H)?$")>;
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLLV(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERIM(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESLV(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRA(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRAV(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRLV(B|F|G|H)?$")>;
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSL(DB)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)B$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLD$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSRD$")>;
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSCBI(B|F|G|H|Q)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VS(F|G|H|Q)?$")>;
+
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUM(B|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMG(F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMQ(F|G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Integer comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VEC(B|F|G|H)?$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VECL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)S$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)S$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)S$")>;
+def : InstRW<[WLat4, VecStr, NormalGr], (instregex "VTM$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point arithmetic
+//===----------------------------------------------------------------------===//
+
+// Conversion and rounding
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCFP(S|L)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?G$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?GB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCD(L)?GB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCE(L)?FB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCE(L)?FB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(S|L)FP$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GD$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GDB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?GDB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?FEB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?FEB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)B$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WL(DE|ED)B$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(L|R)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(LS|RD)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFL(LS|RD)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFLLD$")>;
+def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFLRX$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFI(DB)?$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFIDB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFISB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFISB$")>;
+def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFIXB$")>;
+
+// Sign operations
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VFPSO$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSODB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSOSB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFPSOXB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)SB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFL(C|N|P)XB$")>;
+
+// Minimum / maximum
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)SB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)SB$")>;
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WF(MAX|MIN)XB$")>;
+
+// Test data class
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFTCI$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCIDB$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCISB$")>;
+def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFTCIXB$")>;
+
+// Add / subtract
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)SB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)SB$")>;
+def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WF(A|S)XB$")>;
+
+// Multiply / multiply-and-add/subtract
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFM(DB)?$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFM(D|S)B$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFMSB$")>;
+def : InstRW<[WLat20, VecDF2, NormalGr], (instregex "WFMXB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)SB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)SB$")>;
+def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "WF(N)?M(A|S)XB$")>;
+
+// Divide / square root
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFD$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDDB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDSB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFDXB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFSQ$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQDB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQSB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFSQXB$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)SB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SB$")>;
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XB$")>;
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XB$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFC(E|H|HE)DBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFK(E|H|HE)DBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr],
+             (instregex "WF(C|K)(E|H|HE)DBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr],
+             (instregex "VF(C|K)(E|H|HE)SBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SBS$")>;
+def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XBS$")>;
+def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XBS$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)DB$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)SB$")>;
+def : InstRW<[WLat3, VecDFX, NormalGr], (instregex "WF(C|K)XB$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point insertion and extraction
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR$")>;
+def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: String instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(B)?$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(F|H)$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAE(B|F|H)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFEE(B|F|H|ZB|ZF|ZH)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr],
+             (instregex "VFEE(B|F|H|ZB|ZF|ZH)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFENE(B|F|H|ZB|ZF|ZH)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr],
+             (instregex "VFENE(B|F|H|ZB|ZF|ZH)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VISTR(B|F|H)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VISTR(B|F|H)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRC(B|F|H)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRC(B|F|H)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)S$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRS(B|F|H)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRSZ(B|F|H)$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Packed-decimal instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "VLIP$")>;
+def : InstRW<[WLat6, VecDFX, LSU, GroupAlone2], (instregex "VPKZ$")>;
+def : InstRW<[WLat1, VecDFX, FXb, LSU2, GroupAlone2], (instregex "VUPKZ$")>;
+def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone],
+             (instregex "VCVB(G)?(Opt)?$")>;
+def : InstRW<[WLat15, WLat15, VecDF2, FXb, GroupAlone],
+             (instregex "VCVD(G)?$")>;
+def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "V(A|S)P$")>;
+def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VM(S)?P$")>;
+def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "V(D|R)P$")>;
+def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VSDP$")>;
+def : InstRW<[WLat10, WLat10, VecDF2, NormalGr], (instregex "VSRP$")>;
+def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "VPSOP$")>;
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "V(T|C)P$")>;
+
+
+// -------------------------------- System ---------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// System: Program-Status Word Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, WLat30, MCD], (instregex "EPSW$")>;
+def : InstRW<[WLat20, GroupAlone3], (instregex "LPSW(E)?$")>;
+def : InstRW<[WLat3, FXa, GroupAlone], (instregex "IPK$")>;
+def : InstRW<[WLat1, LSU, EndGroup], (instregex "SPKA$")>;
+def : InstRW<[WLat1, LSU, EndGroup], (instregex "SSM$")>;
+def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>;
+def : InstRW<[WLat3, FXa, NormalGr], (instregex "IAC$")>;
+def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Control Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>;
+def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>;
+def : InstRW<[WLat30, MCD], (instregex "ESEA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Prefix-Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "S(T)?PX$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Storage-Key and Real Memory Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "ISKE$")>;
+def : InstRW<[WLat30, MCD], (instregex "IVSK$")>;
+def : InstRW<[WLat30, MCD], (instregex "SSKE(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "RRB(E|M)$")>;
+def : InstRW<[WLat30, MCD], (instregex "IRBM$")>;
+def : InstRW<[WLat30, MCD], (instregex "PFMF$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "TB$")>;
+def : InstRW<[WLat30, MCD], (instregex "PGIN$")>;
+def : InstRW<[WLat30, MCD], (instregex "PGOUT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Dynamic-Address-Translation Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "IPTE(Opt)?(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "IDTE(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "CRDTE(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "PTLB$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "CSP(G)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "LPTEA$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "LRA(Y|G)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "STRAG$")>;
+def : InstRW<[WLat30, MCD], (instregex "LURA(G)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "STUR(A|G)$")>;
+def : InstRW<[WLat30, MCD], (instregex "TPROT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Memory-move Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>;
+def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>;
+def : InstRW<[WLat30, MCD], (instregex "MVPG$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Address-Space Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "LASP$")>;
+def : InstRW<[WLat1, LSU, GroupAlone], (instregex "PALB$")>;
+def : InstRW<[WLat30, MCD], (instregex "PC$")>;
+def : InstRW<[WLat30, MCD], (instregex "PR$")>;
+def : InstRW<[WLat30, MCD], (instregex "PT(I)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "RP$")>;
+def : InstRW<[WLat30, MCD], (instregex "BS(G|A)$")>;
+def : InstRW<[WLat30, MCD], (instregex "TAR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Linkage-Stack Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "BAKR$")>;
+def : InstRW<[WLat30, MCD], (instregex "EREG(G)?$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Time-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "PTFF$")>;
+def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>;
+def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>;
+def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>;
+def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>;
+def : InstRW<[WLat30, MCD], (instregex "STCKC$")>;
+def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "STAP$")>;
+def : InstRW<[WLat30, MCD], (instregex "STIDP$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "STSI$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "STFL(E)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "ECAG$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "ECTG$")>;
+def : InstRW<[WLat30, MCD], (instregex "PTF$")>;
+def : InstRW<[WLat30, MCD], (instregex "PCKMO$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Miscellaneous Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "SVC$")>;
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "MC$")>;
+def : InstRW<[WLat30, MCD], (instregex "DIAG$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TRAC(E|G)$")>;
+def : InstRW<[WLat30, MCD], (instregex "TRAP(2|4)$")>;
+def : InstRW<[WLat30, MCD], (instregex "SIG(P|A)$")>;
+def : InstRW<[WLat30, MCD], (instregex "SIE$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Measurement Facility Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LPP$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "ECPGA$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "E(C|P)CTR$")>;
+def : InstRW<[WLat30, MCD], (instregex "LCCTL$")>;
+def : InstRW<[WLat30, MCD], (instregex "L(P|S)CTL$")>;
+def : InstRW<[WLat30, MCD], (instregex "Q(S|CTR)I$")>;
+def : InstRW<[WLat30, MCD], (instregex "S(C|P)CTR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: I/O Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "(C|H|R|X)SCH$")>;
+def : InstRW<[WLat30, MCD], (instregex "(M|S|ST|T)SCH$")>;
+def : InstRW<[WLat30, MCD], (instregex "RCHP$")>;
+def : InstRW<[WLat30, MCD], (instregex "SCHM$")>;
+def : InstRW<[WLat30, MCD], (instregex "STC(PS|RW)$")>;
+def : InstRW<[WLat30, MCD], (instregex "TPI$")>;
+def : InstRW<[WLat30, MCD], (instregex "SAL$")>;
+
+}
+
diff --git a/lib/Target/SystemZ/SystemZScheduleZ13.td b/lib/Target/SystemZ/SystemZScheduleZ13.td
index 74e1dad87908..b3266051da4e 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ13.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -1,9 +1,8 @@
 //-- SystemZScheduleZ13.td - SystemZ Scheduling Definitions ----*- tblgen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1192,8 +1191,8 @@ def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>;
 // Vector: Loads
 //===----------------------------------------------------------------------===//
 
-def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(BB)?$")>;
-def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLL$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>;
 def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>;
 def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H)?$")>;
 def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>;
@@ -1201,16 +1200,17 @@ def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr],
              (instregex "VLE(B|F|G|H)$")>;
 def : InstRW<[WLat6LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked],
              (instregex "VGE(F|G)$")>;
-def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], (instregex "VLM$")>;
+def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone],
+             (instregex "VLM(Align)?$")>;
 
 //===----------------------------------------------------------------------===//
 // Vector: Stores
 //===----------------------------------------------------------------------===//
 
-def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(L|32|64)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>;
 def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>;
 def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>;
-def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM$")>;
+def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>;
 def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZScheduleZ14.td b/lib/Target/SystemZ/SystemZScheduleZ14.td
index 1962fdf3a1d1..df7282a2961b 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ14.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ14.td
@@ -1,9 +1,8 @@
 //-- SystemZScheduleZ14.td - SystemZ Scheduling Definitions ----*- tblgen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1210,8 +1209,8 @@ def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>;
 // Vector: Loads
 //===----------------------------------------------------------------------===//
 
-def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(BB)?$")>;
-def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLL$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>;
 def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>;
 def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>;
 def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>;
@@ -1219,17 +1218,18 @@ def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr],
              (instregex "VLE(B|F|G|H)$")>;
 def : InstRW<[WLat5LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked],
              (instregex "VGE(F|G)$")>;
-def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], (instregex "VLM$")>;
+def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone],
+             (instregex "VLM(Align)?$")>;
 def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>;
 
 //===----------------------------------------------------------------------===//
 // Vector: Stores
 //===----------------------------------------------------------------------===//
 
-def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(L|32|64)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>;
 def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>;
 def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>;
-def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM$")>;
+def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>;
 def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>;
 def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTRL(R)?$")>;
 
diff --git a/lib/Target/SystemZ/SystemZScheduleZ196.td b/lib/Target/SystemZ/SystemZScheduleZ196.td
index 7535739f813a..ca714ef1a702 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ196.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ196.td
@@ -1,9 +1,8 @@
 //=- SystemZScheduleZ196.td - SystemZ Scheduling Definitions ---*- tblgen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/SystemZ/SystemZScheduleZEC12.td b/lib/Target/SystemZ/SystemZScheduleZEC12.td
index a21d2c4cef70..fb226be678da 100644
--- a/lib/Target/SystemZ/SystemZScheduleZEC12.td
+++ b/lib/Target/SystemZ/SystemZScheduleZEC12.td
@@ -1,9 +1,8 @@
 //=- SystemZScheduleZEC12.td - SystemZ Scheduling Definitions --*- tblgen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index e0d7bca9a94b..a50e6aa59711 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -164,17 +163,17 @@ static SDValue emitCLC(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
 }
 
 // Convert the current CC value into an integer that is 0 if CC == 0,
-// less than zero if CC == 1 and greater than zero if CC >= 2.
+// greater than zero if CC == 1 and less than zero if CC >= 2.
 // The sequence starts with IPM, which puts CC into bits 29 and 28
 // of an integer and clears bits 30 and 31.
 static SDValue addIPMSequence(const SDLoc &DL, SDValue CCReg,
                               SelectionDAG &DAG) {
   SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
-  SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
-                            DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
-  SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL,
-                             DAG.getConstant(31, DL, MVT::i32));
-  return ROTL;
+  SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, IPM,
+                            DAG.getConstant(30 - SystemZ::IPM_CC, DL, MVT::i32));
+  SDValue SRA = DAG.getNode(ISD::SRA, DL, MVT::i32, SHL,
+                            DAG.getConstant(30, DL, MVT::i32));
+  return SRA;
 }
 
 std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp(
@@ -184,7 +183,8 @@ std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp(
   if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) {
     uint64_t Bytes = CSize->getZExtValue();
     assert(Bytes > 0 && "Caller should have handled 0-size case");
-    SDValue CCReg = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes);
+    // Swap operands to invert CC == 1 vs. CC == 2 cases.
+    SDValue CCReg = emitCLC(DAG, DL, Chain, Src2, Src1, Bytes);
     Chain = CCReg.getValue(1);
     return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain);
   }
@@ -232,7 +232,8 @@ std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrcmp(
     SDValue Src2, MachinePointerInfo Op1PtrInfo,
     MachinePointerInfo Op2PtrInfo) const {
   SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::i32, MVT::Other);
-  SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2,
+  // Swap operands to invert CC == 1 vs. CC == 2 cases.
+  SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src2, Src1,
                                DAG.getConstant(0, DL, MVT::i32));
   SDValue CCReg = Unused.getValue(1);
   Chain = Unused.getValue(2);
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
index 93cd970c30c6..7d63bae83cf3 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -1,9 +1,8 @@
 //===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp
index 195fa20a2c90..e79dfc5b4b9e 100644
--- a/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZShortenInst.cpp - Instruction-shortening pass --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -47,6 +46,7 @@ private:
   bool shortenOn001(MachineInstr &MI, unsigned Opcode);
   bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode);
   bool shortenFPConv(MachineInstr &MI, unsigned Opcode);
+  bool shortenSelect(MachineInstr &MI, unsigned Opcode);
 
   const SystemZInstrInfo *TII;
   const TargetRegisterInfo *TRI;
@@ -176,6 +176,23 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
   return false;
 }
 
+// MI is a three-operand select instruction.  If one of the sources match
+// the destination, convert to the equivalent load-on-condition.
+bool SystemZShortenInst::shortenSelect(MachineInstr &MI, unsigned Opcode) {
+  if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
+    MI.setDesc(TII->get(Opcode));
+    MI.tieOperands(0, 1);
+    return true;
+  }
+  if (MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
+    TII->commuteInstruction(MI, false, 1, 2);
+    MI.setDesc(TII->get(Opcode));
+    MI.tieOperands(0, 1);
+    return true;
+  }
+  return false;
+}
+
 // Process all instructions in MBB.  Return true if something changed.
 bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
   bool Changed = false;
@@ -196,6 +213,18 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
       Changed |= shortenIIF(MI, SystemZ::LLIHL, SystemZ::LLIHH);
       break;
 
+    case SystemZ::SELR:
+      Changed |= shortenSelect(MI, SystemZ::LOCR);
+      break;
+
+    case SystemZ::SELFHR:
+      Changed |= shortenSelect(MI, SystemZ::LOCFHR);
+      break;
+
+    case SystemZ::SELGR:
+      Changed |= shortenSelect(MI, SystemZ::LOCGR);
+      break;
+
     case SystemZ::WFADB:
       Changed |= shortenOn001AddCC(MI, SystemZ::ADBR);
       break;
@@ -300,6 +329,31 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
     case SystemZ::VST64:
       Changed |= shortenOn0(MI, SystemZ::STD);
       break;
+
+    default: {
+      int TwoOperandOpcode = SystemZ::getTwoOperandOpcode(MI.getOpcode());
+      if (TwoOperandOpcode == -1)
+        break;
+
+      if ((MI.getOperand(0).getReg() != MI.getOperand(1).getReg()) &&
+          (!MI.isCommutable() ||
+           MI.getOperand(0).getReg() != MI.getOperand(2).getReg() ||
+           !TII->commuteInstruction(MI, false, 1, 2)))
+          break;
+
+      MI.setDesc(TII->get(TwoOperandOpcode));
+      MI.tieOperands(0, 1);
+      if (TwoOperandOpcode == SystemZ::SLL ||
+          TwoOperandOpcode == SystemZ::SLA ||
+          TwoOperandOpcode == SystemZ::SRL ||
+          TwoOperandOpcode == SystemZ::SRA) {
+        // These shifts only use the low 6 bits of the shift count.
+        MachineOperand &ImmMO = MI.getOperand(3);
+        ImmMO.setImm(ImmMO.getImm() & 0xfff);
+      }
+      Changed = true;
+      break;
+    }
     }
 
     LiveRegs.stepBackward(MI);
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index fb030a207bc7..5e8af81842c4 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZSubtarget.cpp - SystemZ subtarget information --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -56,6 +55,9 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
       HasMessageSecurityAssist7(false), HasMessageSecurityAssist8(false),
       HasVectorEnhancements1(false), HasVectorPackedDecimal(false),
       HasInsertReferenceBitsMultiple(false),
+      HasMiscellaneousExtensions3(false), HasMessageSecurityAssist9(false),
+      HasVectorEnhancements2(false), HasVectorPackedDecimalEnhancement(false),
+      HasEnhancedSort(false), HasDeflateConversion(false),
       TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
       TLInfo(TM, *this), TSInfo(), FrameLowering() {}
 
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index cb6b21a1d465..fa3f65d93c91 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -1,9 +1,8 @@
 //===-- SystemZSubtarget.h - SystemZ subtarget information -----*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -63,6 +62,12 @@ protected:
   bool HasVectorEnhancements1;
   bool HasVectorPackedDecimal;
   bool HasInsertReferenceBitsMultiple;
+  bool HasMiscellaneousExtensions3;
+  bool HasMessageSecurityAssist9;
+  bool HasVectorEnhancements2;
+  bool HasVectorPackedDecimalEnhancement;
+  bool HasEnhancedSort;
+  bool HasDeflateConversion;
 
 private:
   Triple TargetTriple;
@@ -210,6 +215,30 @@ public:
     return HasInsertReferenceBitsMultiple;
   }
 
+  // Return true if the target has the miscellaneous-extensions facility 3.
+  bool hasMiscellaneousExtensions3() const {
+    return HasMiscellaneousExtensions3;
+  }
+
+  // Return true if the target has the message-security-assist
+  // extension facility 9.
+  bool hasMessageSecurityAssist9() const { return HasMessageSecurityAssist9; }
+
+  // Return true if the target has the vector-enhancements facility 2.
+  bool hasVectorEnhancements2() const { return HasVectorEnhancements2; }
+
+  // Return true if the target has the vector-packed-decimal
+  // enhancement facility.
+  bool hasVectorPackedDecimalEnhancement() const {
+    return HasVectorPackedDecimalEnhancement;
+  }
+
+  // Return true if the target has the enhanced-sort facility.
+  bool hasEnhancedSort() const { return HasEnhancedSort; }
+
+  // Return true if the target has the deflate-conversion facility.
+  bool hasDeflateConversion() const { return HasDeflateConversion; }
+
   // Return true if GV can be accessed using LARL for reloc model RM
   // and code model CM.
   bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const;
diff --git a/lib/Target/SystemZ/SystemZTDC.cpp b/lib/Target/SystemZ/SystemZTDC.cpp
index 5dbd23d420a3..478848c30701 100644
--- a/lib/Target/SystemZ/SystemZTDC.cpp
+++ b/lib/Target/SystemZ/SystemZTDC.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -356,8 +355,8 @@ bool SystemZTDCPass::runOnFunction(Function &F) {
       if (!Worthy)
         continue;
       // Call the intrinsic, compare result with 0.
-      Value *TDCFunc = Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc,
-                                                 V->getType());
+      Function *TDCFunc =
+          Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc, V->getType());
       IRBuilder<> IRB(I);
       Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
       Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal});
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 9596a2b6388d..5c49e6eff0bf 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -12,6 +11,7 @@
 #include "SystemZ.h"
 #include "SystemZMachineScheduler.h"
 #include "SystemZTargetTransformInfo.h"
+#include "TargetInfo/SystemZTargetInfo.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
@@ -133,9 +133,9 @@ getEffectiveSystemZCodeModel(Optional<CodeModel::Model> CM, Reloc::Model RM,
                              bool JIT) {
   if (CM) {
     if (*CM == CodeModel::Tiny)
-      report_fatal_error("Target does not support the tiny CodeModel");
+      report_fatal_error("Target does not support the tiny CodeModel", false);
     if (*CM == CodeModel::Kernel)
-      report_fatal_error("Target does not support the kernel CodeModel");
+      report_fatal_error("Target does not support the kernel CodeModel", false);
     return *CM;
   }
   if (JIT)
@@ -183,6 +183,7 @@ public:
   void addIRPasses() override;
   bool addInstSelector() override;
   bool addILPOpts() override;
+  void addPostRewrite() override;
   void addPreSched2() override;
   void addPreEmitPass() override;
 };
@@ -212,7 +213,16 @@ bool SystemZPassConfig::addILPOpts() {
   return true;
 }
 
+void SystemZPassConfig::addPostRewrite() {
+  addPass(createSystemZPostRewritePass(getSystemZTargetMachine()));
+}
+
 void SystemZPassConfig::addPreSched2() {
+  // PostRewrite needs to be run at -O0 also (in which case addPostRewrite()
+  // is not called).
+  if (getOptLevel() == CodeGenOpt::None)
+    addPass(createSystemZPostRewritePass(getSystemZTargetMachine()));
+
   addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine()));
 
   if (getOptLevel() != CodeGenOpt::None)
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 52bf8bba55de..ac04a080f580 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -1,9 +1,8 @@
 //=- SystemZTargetMachine.h - Define TargetMachine for SystemZ ----*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 129610fe095b..145cf87ef9f5 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -1,9 +1,8 @@
 //===-- SystemZTargetTransformInfo.cpp - SystemZ-specific TTI -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -467,6 +466,27 @@ int SystemZTTIImpl::getArithmeticInstrCost(
     if (Opcode == Instruction::FRem)
       return LIBCALL_COST;
 
+    // Give discount for some combined logical operations if supported.
+    if (Args.size() == 2 && ST->hasMiscellaneousExtensions3()) {
+      if (Opcode == Instruction::Xor) {
+        for (const Value *A : Args) {
+          if (const Instruction *I = dyn_cast<Instruction>(A))
+            if (I->hasOneUse() &&
+                (I->getOpcode() == Instruction::And ||
+                 I->getOpcode() == Instruction::Or ||
+                 I->getOpcode() == Instruction::Xor))
+              return 0;
+        }
+      }
+      else if (Opcode == Instruction::Or || Opcode == Instruction::And) {
+        for (const Value *A : Args) {
+          if (const Instruction *I = dyn_cast<Instruction>(A))
+            if (I->hasOneUse() && I->getOpcode() == Instruction::Xor)
+              return 0;
+        }
+      }
+    }
+
     // Or requires one instruction, although it has custom handling for i64.
     if (Opcode == Instruction::Or)
       return 1;
@@ -687,9 +707,9 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
       // TODO: Fix base implementation which could simplify things a bit here
       // (seems to miss on differentiating on scalar/vector types).
 
-      // Only 64 bit vector conversions are natively supported.
-      if (DstScalarBits == 64) {
-        if (SrcScalarBits == 64)
+      // Only 64 bit vector conversions are natively supported before arch13.
+      if (DstScalarBits == 64 || ST->hasVectorEnhancements2()) {
+        if (SrcScalarBits == DstScalarBits)
           return NumDstVectors;
 
         if (SrcScalarBits == 1)
@@ -857,7 +877,7 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
     case Instruction::Select:
       if (ValTy->isFloatingPointTy())
         return 4; // No load on condition for FP - costs a conditional jump.
-      return 1; // Load On Condition.
+      return 1; // Load On Condition / Select Register.
     }
   }
 
@@ -1010,7 +1030,8 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
     (Src->isVectorTy() ? getNumVectorRegs(Src) : getNumberOfParts(Src));
 
   // Store/Load reversed saves one instruction.
-  if (!Src->isVectorTy() && NumOps == 1 && I != nullptr) {
+  if (((!Src->isVectorTy() && NumOps == 1) || ST->hasVectorEnhancements2()) &&
+      I != nullptr) {
     if (Opcode == Instruction::Load && I->hasOneUse()) {
       const Instruction *LdUser = cast<Instruction>(*I->user_begin());
       // In case of load -> bswap -> store, return normal cost for the load.
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index e79bee1ea3a8..16ce2ef1d7a0 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -1,9 +1,8 @@
 //===-- SystemZTargetTransformInfo.h - SystemZ-specific TTI ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
index e2b9efd35d3e..713a55ee8400 100644
--- a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
+++ b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
@@ -1,13 +1,12 @@
 //===-- SystemZTargetInfo.cpp - SystemZ target implementation -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "SystemZ.h"
+#include "TargetInfo/SystemZTargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 
 using namespace llvm;
diff --git a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.h b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.h
new file mode 100644
index 000000000000..cad141c81e6b
--- /dev/null
+++ b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.h
@@ -0,0 +1,20 @@
+//===-- SystemZTargetInfo.h - SystemZ target implementation -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_TARGETINFO_SYSTEMZTARGETINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_TARGETINFO_SYSTEMZTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheSystemZTarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_SYSTEMZ_TARGETINFO_SYSTEMZTARGETINFO_H
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index f23ea72eb513..8a46c77492c5 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -1,9 +1,8 @@
 //===-- Target.cpp --------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/TargetIntrinsicInfo.cpp b/lib/Target/TargetIntrinsicInfo.cpp
index e8b71924e0d9..256514c8c22d 100644
--- a/lib/Target/TargetIntrinsicInfo.cpp
+++ b/lib/Target/TargetIntrinsicInfo.cpp
@@ -1,9 +1,8 @@
 //===-- TargetIntrinsicInfo.cpp - Target Instruction Information ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index bb937923b47e..17274e1c2c6e 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -1,9 +1,8 @@
 //===-- llvm/Target/TargetLoweringObjectFile.cpp - Object File Info -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -48,6 +47,7 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx,
 
   // Reset various EH DWARF encodings.
   PersonalityEncoding = LSDAEncoding = TTypeEncoding = dwarf::DW_EH_PE_absptr;
+  CallSiteEncoding = dwarf::DW_EH_PE_uleb128;
 }
 
 TargetLoweringObjectFile::~TargetLoweringObjectFile() {
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 39d5705b2a53..634866d93570 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -1,9 +1,8 @@
 //===-- TargetMachine.cpp - General Target Information ---------------------==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -145,6 +144,12 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
       isa<GlobalVariable>(GV))
     return false;
 
+  // On COFF, don't mark 'extern_weak' symbols as DSO local. If these symbols
+  // remain unresolved in the link, they can be resolved to zero, which is
+  // outside the current DSO.
+  if (TT.isOSBinFormatCOFF() && GV && GV->hasExternalWeakLinkage())
+    return false;
+
   // Every other GV is local on COFF.
   // Make an exception for windows OS in the triple: Some firmware builds use
   // *-win32-macho triples. This (accidentally?) produced windows relocations
@@ -168,7 +173,12 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
     return GV && GV->isStrongDefinitionForLinker();
   }
 
-  assert(TT.isOSBinFormatELF());
+  // Due to the AIX linkage model, any global with default visibility is
+  // considered non-local.
+  if (TT.isOSBinFormatXCOFF())
+    return false;
+
+  assert(TT.isOSBinFormatELF() || TT.isOSBinFormatWasm());
   assert(RM != Reloc::DynamicNoPIC);
 
   bool IsExecutable =
@@ -196,7 +206,7 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
       return true;
   }
 
-  // ELF supports preemption of other symbols.
+  // ELF & wasm support preemption of other symbols.
   return false;
 }
 
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index bae45ae28c45..5d9029682fdd 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -1,9 +1,8 @@
 //===-- TargetMachine.cpp -------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index 0a5908f43790..09628e872dd5 100644
--- a/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -1,9 +1,8 @@
 //==- WebAssemblyAsmParser.cpp - Assembler for WebAssembly -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -16,12 +15,15 @@
 
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "MCTargetDesc/WebAssemblyTargetStreamer.h"
+#include "TargetInfo/WebAssemblyTargetInfo.h"
 #include "WebAssembly.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCSectionWasm.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
@@ -87,9 +89,8 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
   }
 
   bool isToken() const override { return Kind == Token; }
-  bool isImm() const override {
-    return Kind == Integer || Kind == Float || Kind == Symbol;
-  }
+  bool isImm() const override { return Kind == Integer || Kind == Symbol; }
+  bool isFPImm() const { return Kind == Float; }
   bool isMem() const override { return false; }
   bool isReg() const override { return false; }
   bool isBrList() const { return Kind == BrList; }
@@ -116,12 +117,18 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
     assert(N == 1 && "Invalid number of operands!");
     if (Kind == Integer)
       Inst.addOperand(MCOperand::createImm(Int.Val));
-    else if (Kind == Float)
-      Inst.addOperand(MCOperand::createFPImm(Flt.Val));
     else if (Kind == Symbol)
       Inst.addOperand(MCOperand::createExpr(Sym.Exp));
     else
-      llvm_unreachable("Should be immediate or symbol!");
+      llvm_unreachable("Should be integer immediate or symbol!");
+  }
+
+  void addFPImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    if (Kind == Float)
+      Inst.addOperand(MCOperand::createFPImm(Flt.Val));
+    else
+      llvm_unreachable("Should be float immediate!");
   }
 
   void addBrListOperands(MCInst &Inst, unsigned N) const {
@@ -170,6 +177,8 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
     FunctionStart,
     FunctionLocals,
     Instructions,
+    EndFunction,
+    DataSection,
   } CurrentState = FileStart;
 
   // For ensuring blocks are properly nested.
@@ -187,6 +196,7 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
   // We track this to see if a .functype following a label is the same,
   // as this is how we recognize the start of a function.
   MCSymbol *LastLabel = nullptr;
+  MCSymbol *LastFunctionLabel = nullptr;
 
 public:
   WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
@@ -250,13 +260,13 @@ public:
   }
 
   bool ensureEmptyNestingStack() {
-    auto err = !NestingStack.empty();
+    auto Err = !NestingStack.empty();
     while (!NestingStack.empty()) {
       error(Twine("Unmatched block construct(s) at function end: ") +
             nestingString(NestingStack.back()).first);
       NestingStack.pop_back();
     }
-    return err;
+    return Err;
   }
 
   bool isNext(AsmToken::TokenKind Kind) {
@@ -298,6 +308,8 @@ public:
         Type == "i32x4" || Type == "i64x2" || Type == "f32x4" ||
         Type == "f64x2")
       return wasm::ValType::V128;
+    if (Type == "exnref")
+      return wasm::ValType::EXNREF;
     return Optional<wasm::ValType>();
   }
 
@@ -308,7 +320,7 @@ public:
         .Case("f32", WebAssembly::ExprType::F32)
         .Case("f64", WebAssembly::ExprType::F64)
         .Case("v128", WebAssembly::ExprType::V128)
-        .Case("except_ref", WebAssembly::ExprType::ExceptRef)
+        .Case("exnref", WebAssembly::ExprType::Exnref)
         .Case("void", WebAssembly::ExprType::Void)
         .Default(WebAssembly::ExprType::Invalid);
   }
@@ -317,7 +329,7 @@ public:
     while (Lexer.is(AsmToken::Identifier)) {
       auto Type = parseType(Lexer.getTok().getString());
       if (!Type)
-        return true;
+        return error("unknown type: ", Lexer.getTok());
       Types.push_back(Type.getValue());
       Parser.Lex();
       if (!isNext(AsmToken::Comma))
@@ -337,27 +349,67 @@ public:
     Parser.Lex();
   }
 
-  bool parseOperandStartingWithInteger(bool IsNegative, OperandVector &Operands,
-                                       StringRef InstName) {
-    parseSingleInteger(IsNegative, Operands);
+  bool parseSingleFloat(bool IsNegative, OperandVector &Operands) {
+    auto &Flt = Lexer.getTok();
+    double Val;
+    if (Flt.getString().getAsDouble(Val, false))
+      return error("Cannot parse real: ", Flt);
+    if (IsNegative)
+      Val = -Val;
+    Operands.push_back(make_unique<WebAssemblyOperand>(
+        WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(),
+        WebAssemblyOperand::FltOp{Val}));
+    Parser.Lex();
+    return false;
+  }
+
+  bool parseSpecialFloatMaybe(bool IsNegative, OperandVector &Operands) {
+    if (Lexer.isNot(AsmToken::Identifier))
+      return true;
+    auto &Flt = Lexer.getTok();
+    auto S = Flt.getString();
+    double Val;
+    if (S.compare_lower("infinity") == 0) {
+      Val = std::numeric_limits<double>::infinity();
+    } else if (S.compare_lower("nan") == 0) {
+      Val = std::numeric_limits<double>::quiet_NaN();
+    } else {
+      return true;
+    }
+    if (IsNegative)
+      Val = -Val;
+    Operands.push_back(make_unique<WebAssemblyOperand>(
+        WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(),
+        WebAssemblyOperand::FltOp{Val}));
+    Parser.Lex();
+    return false;
+  }
+
+  bool checkForP2AlignIfLoadStore(OperandVector &Operands, StringRef InstName) {
     // FIXME: there is probably a cleaner way to do this.
-    auto IsLoadStore = InstName.startswith("load") ||
-                       InstName.startswith("store") ||
-                       InstName.startswith("atomic_load") ||
-                       InstName.startswith("atomic_store");
-    if (IsLoadStore) {
-      // Parse load/store operands of the form: offset align
-      auto &Offset = Lexer.getTok();
-      if (Offset.is(AsmToken::Integer)) {
+    auto IsLoadStore = InstName.find(".load") != StringRef::npos ||
+                       InstName.find(".store") != StringRef::npos;
+    auto IsAtomic = InstName.find("atomic.") != StringRef::npos;
+    if (IsLoadStore || IsAtomic) {
+      // Parse load/store operands of the form: offset:p2align=align
+      if (IsLoadStore && isNext(AsmToken::Colon)) {
+        auto Id = expectIdent();
+        if (Id != "p2align")
+          return error("Expected p2align, instead got: " + Id);
+        if (expect(AsmToken::Equal, "="))
+          return true;
+        if (!Lexer.is(AsmToken::Integer))
+          return error("Expected integer constant");
         parseSingleInteger(false, Operands);
       } else {
-        // Alignment not specified.
-        // FIXME: correctly derive a default from the instruction.
+        // Alignment not specified (or atomics, must use default alignment).
         // We can't just call WebAssembly::GetDefaultP2Align since we don't have
-        // an opcode until after the assembly matcher.
+        // an opcode until after the assembly matcher, so set a default to fix
+        // up later.
+        auto Tok = Lexer.getTok();
         Operands.push_back(make_unique<WebAssemblyOperand>(
-            WebAssemblyOperand::Integer, Offset.getLoc(), Offset.getEndLoc(),
-            WebAssemblyOperand::IntOp{0}));
+            WebAssemblyOperand::Integer, Tok.getLoc(), Tok.getEndLoc(),
+            WebAssemblyOperand::IntOp{-1}));
       }
     }
     return false;
@@ -400,51 +452,45 @@ public:
     Operands.push_back(make_unique<WebAssemblyOperand>(
         WebAssemblyOperand::Token, NameLoc, SMLoc::getFromPointer(Name.end()),
         WebAssemblyOperand::TokOp{Name}));
-    auto NamePair = Name.split('.');
-    // If no '.', there is no type prefix.
-    auto BaseName = NamePair.second.empty() ? NamePair.first : NamePair.second;
 
     // If this instruction is part of a control flow structure, ensure
     // proper nesting.
     bool ExpectBlockType = false;
-    if (BaseName == "block") {
+    if (Name == "block") {
       push(Block);
       ExpectBlockType = true;
-    } else if (BaseName == "loop") {
+    } else if (Name == "loop") {
       push(Loop);
       ExpectBlockType = true;
-    } else if (BaseName == "try") {
+    } else if (Name == "try") {
       push(Try);
       ExpectBlockType = true;
-    } else if (BaseName == "if") {
+    } else if (Name == "if") {
       push(If);
       ExpectBlockType = true;
-    } else if (BaseName == "else") {
-      if (pop(BaseName, If))
+    } else if (Name == "else") {
+      if (pop(Name, If))
         return true;
       push(Else);
-    } else if (BaseName == "catch") {
-      if (pop(BaseName, Try))
-        return true;
-      push(Try);
-    } else if (BaseName == "catch_all") {
-      if (pop(BaseName, Try))
+    } else if (Name == "catch") {
+      if (pop(Name, Try))
         return true;
       push(Try);
-    } else if (BaseName == "end_if") {
-      if (pop(BaseName, If, Else))
+    } else if (Name == "end_if") {
+      if (pop(Name, If, Else))
         return true;
-    } else if (BaseName == "end_try") {
-      if (pop(BaseName, Try))
+    } else if (Name == "end_try") {
+      if (pop(Name, Try))
         return true;
-    } else if (BaseName == "end_loop") {
-      if (pop(BaseName, Loop))
+    } else if (Name == "end_loop") {
+      if (pop(Name, Loop))
         return true;
-    } else if (BaseName == "end_block") {
-      if (pop(BaseName, Block))
+    } else if (Name == "end_block") {
+      if (pop(Name, Block))
         return true;
-    } else if (BaseName == "end_function") {
-      if (pop(BaseName, Function) || ensureEmptyNestingStack())
+    } else if (Name == "end_function") {
+      CurrentState = EndFunction;
+      if (pop(Name, Function) || ensureEmptyNestingStack())
         return true;
     }
 
@@ -452,6 +498,8 @@ public:
       auto &Tok = Lexer.getTok();
       switch (Tok.getKind()) {
       case AsmToken::Identifier: {
+        if (!parseSpecialFloatMaybe(false, Operands))
+          break;
         auto &Id = Lexer.getTok();
         if (ExpectBlockType) {
           // Assume this identifier is a block_type.
@@ -464,33 +512,39 @@ public:
           // Assume this identifier is a label.
           const MCExpr *Val;
           SMLoc End;
-          if (Parser.parsePrimaryExpr(Val, End))
+          if (Parser.parseExpression(Val, End))
             return error("Cannot parse symbol: ", Lexer.getTok());
           Operands.push_back(make_unique<WebAssemblyOperand>(
               WebAssemblyOperand::Symbol, Id.getLoc(), Id.getEndLoc(),
               WebAssemblyOperand::SymOp{Val}));
+          if (checkForP2AlignIfLoadStore(Operands, Name))
+            return true;
         }
         break;
       }
       case AsmToken::Minus:
         Parser.Lex();
-        if (Lexer.isNot(AsmToken::Integer))
-          return error("Expected integer instead got: ", Lexer.getTok());
-        if (parseOperandStartingWithInteger(true, Operands, BaseName))
-          return true;
+        if (Lexer.is(AsmToken::Integer)) {
+          parseSingleInteger(true, Operands);
+          if (checkForP2AlignIfLoadStore(Operands, Name))
+            return true;
+        } else if(Lexer.is(AsmToken::Real)) {
+          if (parseSingleFloat(true, Operands))
+            return true;
+        } else if (!parseSpecialFloatMaybe(true, Operands)) {
+        } else {
+          return error("Expected numeric constant instead got: ",
+                       Lexer.getTok());
+        }
         break;
       case AsmToken::Integer:
-        if (parseOperandStartingWithInteger(false, Operands, BaseName))
+        parseSingleInteger(false, Operands);
+        if (checkForP2AlignIfLoadStore(Operands, Name))
           return true;
         break;
       case AsmToken::Real: {
-        double Val;
-        if (Tok.getString().getAsDouble(Val, false))
-          return error("Cannot parse real: ", Tok);
-        Operands.push_back(make_unique<WebAssemblyOperand>(
-            WebAssemblyOperand::Float, Tok.getLoc(), Tok.getEndLoc(),
-            WebAssemblyOperand::FltOp{Val}));
-        Parser.Lex();
+        if (parseSingleFloat(false, Operands))
+          return true;
         break;
       }
       case AsmToken::LCurly: {
@@ -547,6 +601,17 @@ public:
     return false;
   }
 
+  bool CheckDataSection() {
+    if (CurrentState != DataSection) {
+      auto WS = cast<MCSectionWasm>(getStreamer().getCurrentSection().first);
+      if (WS && WS->getKind().isText())
+        return error("data directive must occur in a data segment: ",
+                     Lexer.getTok());
+    }
+    CurrentState = DataSection;
+    return false;
+  }
+
   // This function processes wasm-specific directives streamed to
   // WebAssemblyTargetStreamer, all others go to the generic parser
   // (see WasmAsmParser).
@@ -561,6 +626,7 @@ public:
     auto &Out = getStreamer();
     auto &TOut =
         reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer());
+    auto &Ctx = Out.getContext();
 
     // TODO: any time we return an error, at least one token must have been
     // consumed, otherwise this will not signal an error to the caller.
@@ -578,8 +644,7 @@ public:
       if (!Type)
         return error("Unknown type in .globaltype directive: ", TypeTok);
       // Now set this symbol with the correct type.
-      auto WasmSym = cast<MCSymbolWasm>(
-          TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+      auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
       WasmSym->setGlobalType(
           wasm::WasmGlobalType{uint8_t(Type.getValue()), true});
@@ -597,13 +662,13 @@ public:
       auto SymName = expectIdent();
       if (SymName.empty())
         return true;
-      auto WasmSym = cast<MCSymbolWasm>(
-          TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+      auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
       if (CurrentState == Label && WasmSym == LastLabel) {
         // This .functype indicates a start of a function.
         if (ensureEmptyNestingStack())
           return true;
         CurrentState = FunctionStart;
+        LastFunctionLabel = LastLabel;
         push(Function);
       }
       auto Signature = make_unique<wasm::WasmSignature>();
@@ -621,8 +686,7 @@ public:
       auto SymName = expectIdent();
       if (SymName.empty())
         return true;
-      auto WasmSym = cast<MCSymbolWasm>(
-          TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+      auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
       auto Signature = make_unique<wasm::WasmSignature>();
       if (parseRegTypeList(Signature->Params))
         return true;
@@ -646,6 +710,30 @@ public:
       return expect(AsmToken::EndOfStatement, "EOL");
     }
 
+    if (DirectiveID.getString() == ".int8" ||
+        DirectiveID.getString() == ".int16" ||
+        DirectiveID.getString() == ".int32" ||
+        DirectiveID.getString() == ".int64") {
+      if (CheckDataSection()) return true;
+      const MCExpr *Val;
+      SMLoc End;
+      if (Parser.parseExpression(Val, End))
+        return error("Cannot parse .int expression: ", Lexer.getTok());
+      size_t NumBits = 0;
+      DirectiveID.getString().drop_front(4).getAsInteger(10, NumBits);
+      Out.EmitValue(Val, NumBits / 8, End);
+      return expect(AsmToken::EndOfStatement, "EOL");
+    }
+
+    if (DirectiveID.getString() == ".asciz") {
+      if (CheckDataSection()) return true;
+      std::string S;
+      if (Parser.parseEscapedString(S))
+        return error("Cannot parse string constant: ", Lexer.getTok());
+      Out.EmitBytes(StringRef(S.c_str(), S.length() + 1));
+      return expect(AsmToken::EndOfStatement, "EOL");
+    }
+
     return true; // We didn't process this directive.
   }
 
@@ -667,8 +755,19 @@ public:
             *Out.getTargetStreamer());
         TOut.emitLocal(SmallVector<wasm::ValType, 0>());
       }
-      CurrentState = Instructions;
+      // Fix unknown p2align operands.
+      auto Align = WebAssembly::GetDefaultP2AlignAny(Inst.getOpcode());
+      if (Align != -1U) {
+        auto &Op0 = Inst.getOperand(0);
+        if (Op0.getImm() == -1)
+          Op0.setImm(Align);
+      }
       Out.EmitInstruction(Inst, getSTI());
+      if (CurrentState == EndFunction) {
+        onEndOfFunction();
+      } else {
+        CurrentState = Instructions;
+      }
       return false;
     }
     case Match_MissingFeature:
@@ -694,6 +793,35 @@ public:
     llvm_unreachable("Implement any new match types added!");
   }
 
+  void doBeforeLabelEmit(MCSymbol *Symbol) override {
+    // Start a new section for the next function automatically, since our
+    // object writer expects each function to have its own section. This way
+    // The user can't forget this "convention".
+    auto SymName = Symbol->getName();
+    if (SymName.startswith(".L"))
+      return; // Local Symbol.
+    // Only create a new text section if we're already in one.
+    auto CWS = cast<MCSectionWasm>(getStreamer().getCurrentSection().first);
+    if (!CWS || !CWS->getKind().isText())
+      return;
+    auto SecName = ".text." + SymName;
+    auto WS = getContext().getWasmSection(SecName, SectionKind::getText());
+    getStreamer().SwitchSection(WS);
+  }
+
+  void onEndOfFunction() {
+    // Automatically output a .size directive, so it becomes optional for the
+    // user.
+    if (!LastFunctionLabel) return;
+    auto TempSym = getContext().createLinkerPrivateTempSymbol();
+    getStreamer().EmitLabel(TempSym);
+    auto Start = MCSymbolRefExpr::create(LastFunctionLabel, getContext());
+    auto End = MCSymbolRefExpr::create(TempSym, getContext());
+    auto Expr =
+        MCBinaryExpr::create(MCBinaryExpr::Sub, End, Start, getContext());
+    getStreamer().emitELFSize(LastFunctionLabel, Expr);
+  }
+
   void onEndOfFile() override { ensureEmptyNestingStack(); }
 };
 } // end anonymous namespace
diff --git a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index 6acc9b20eed2..f9bf3f85d30f 100644
--- a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -1,9 +1,8 @@
 //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -15,7 +14,9 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "MCTargetDesc/WebAssemblyInstPrinter.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "TargetInfo/WebAssemblyTargetInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
@@ -45,6 +46,10 @@ class WebAssemblyDisassembler final : public MCDisassembler {
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
                               raw_ostream &VStream,
                               raw_ostream &CStream) const override;
+  DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
+                             ArrayRef<uint8_t> Bytes, uint64_t Address,
+                             raw_ostream &VStream,
+                             raw_ostream &CStream) const override;
 
 public:
   WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
@@ -77,7 +82,7 @@ static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
 }
 
 static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
-                    bool Signed = false) {
+                    bool Signed) {
   unsigned N = 0;
   const char *Error = nullptr;
   Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
@@ -104,9 +109,8 @@ template <typename T>
 bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
   if (Size + sizeof(T) > Bytes.size())
     return false;
-  T Val;
-  memcpy(&Val, Bytes.data() + Size, sizeof(T));
-  support::endian::byte_swap<T, support::endianness::little>(Val);
+  T Val = support::endian::read<T, support::endianness::little, 1>(
+      Bytes.data() + Size);
   Size += sizeof(T);
   if (std::is_floating_point<T>::value) {
     MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val)));
@@ -116,6 +120,41 @@ bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
   return true;
 }
 
+MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart(
+    StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
+    raw_ostream &VStream, raw_ostream &CStream) const {
+  Size = 0;
+  if (Address == 0) {
+    // Start of a code section: we're parsing only the function count.
+    int64_t FunctionCount;
+    if (!nextLEB(FunctionCount, Bytes, Size, false))
+      return MCDisassembler::Fail;
+    outs() << "        # " << FunctionCount << " functions in section.";
+  } else {
+    // Parse the start of a single function.
+    int64_t BodySize, LocalEntryCount;
+    if (!nextLEB(BodySize, Bytes, Size, false) ||
+        !nextLEB(LocalEntryCount, Bytes, Size, false))
+      return MCDisassembler::Fail;
+    if (LocalEntryCount) {
+      outs() << "        .local ";
+      for (int64_t I = 0; I < LocalEntryCount; I++) {
+        int64_t Count, Type;
+        if (!nextLEB(Count, Bytes, Size, false) ||
+            !nextLEB(Type, Bytes, Size, false))
+          return MCDisassembler::Fail;
+        for (int64_t J = 0; J < Count; J++) {
+          if (I || J)
+            outs() << ", ";
+          outs() << WebAssembly::anyTypeToString(Type);
+        }
+      }
+    }
+  }
+  outs() << "\n";
+  return MCDisassembler::Success;
+}
+
 MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
     MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
     raw_ostream & /*OS*/, raw_ostream &CS) const {
@@ -138,7 +177,7 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
     if (!WasmInst)
       return MCDisassembler::Fail;
     int64_t PrefixedOpc;
-    if (!nextLEB(PrefixedOpc, Bytes, Size))
+    if (!nextLEB(PrefixedOpc, Bytes, Size, false))
       return MCDisassembler::Fail;
     if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
       return MCDisassembler::Fail;
@@ -161,6 +200,7 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
     case WebAssembly::OPERAND_OFFSET32:
     case WebAssembly::OPERAND_P2ALIGN:
     case WebAssembly::OPERAND_TYPEINDEX:
+    case WebAssembly::OPERAND_EVENT:
     case MCOI::OPERAND_IMMEDIATE: {
       if (!parseLEBImmediate(MI, Size, Bytes, false))
         return MCDisassembler::Fail;
diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
deleted file mode 100644
index 15532d7ff1a6..000000000000
--- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
+++ /dev/null
@@ -1,310 +0,0 @@
-//=- WebAssemblyInstPrinter.cpp - WebAssembly assembly instruction printing -=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Print MCInst instructions to wasm format.
-///
-//===----------------------------------------------------------------------===//
-
-#include "InstPrinter/WebAssemblyInstPrinter.h"
-#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "WebAssembly.h"
-#include "WebAssemblyMachineFunctionInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-#include "WebAssemblyGenAsmWriter.inc"
-
-WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI,
-                                               const MCInstrInfo &MII,
-                                               const MCRegisterInfo &MRI)
-    : MCInstPrinter(MAI, MII, MRI) {}
-
-void WebAssemblyInstPrinter::printRegName(raw_ostream &OS,
-                                          unsigned RegNo) const {
-  assert(RegNo != WebAssemblyFunctionInfo::UnusedReg);
-  // Note that there's an implicit local.get/local.set here!
-  OS << "$" << RegNo;
-}
-
-void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
-                                       StringRef Annot,
-                                       const MCSubtargetInfo &STI) {
-  // Print the instruction (this uses the AsmStrings from the .td files).
-  printInstruction(MI, OS);
-
-  // Print any additional variadic operands.
-  const MCInstrDesc &Desc = MII.get(MI->getOpcode());
-  if (Desc.isVariadic())
-    for (auto i = Desc.getNumOperands(), e = MI->getNumOperands(); i < e; ++i) {
-      // FIXME: For CALL_INDIRECT_VOID, don't print a leading comma, because
-      // we have an extra flags operand which is not currently printed, for
-      // compatiblity reasons.
-      if (i != 0 && ((MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID &&
-                      MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID_S) ||
-                     i != Desc.getNumOperands()))
-        OS << ", ";
-      printOperand(MI, i, OS);
-    }
-
-  // Print any added annotation.
-  printAnnotation(OS, Annot);
-
-  if (CommentStream) {
-    // Observe any effects on the control flow stack, for use in annotating
-    // control flow label references.
-    unsigned Opc = MI->getOpcode();
-    switch (Opc) {
-    default:
-      break;
-
-    case WebAssembly::LOOP:
-    case WebAssembly::LOOP_S:
-      printAnnotation(OS, "label" + utostr(ControlFlowCounter) + ':');
-      ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, true));
-      break;
-
-    case WebAssembly::BLOCK:
-    case WebAssembly::BLOCK_S:
-      ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
-      break;
-
-    case WebAssembly::TRY:
-    case WebAssembly::TRY_S:
-      ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
-      EHPadStack.push_back(EHPadStackCounter++);
-      LastSeenEHInst = TRY;
-      break;
-
-    case WebAssembly::END_LOOP:
-    case WebAssembly::END_LOOP_S:
-      if (ControlFlowStack.empty()) {
-        printAnnotation(OS, "End marker mismatch!");
-      } else {
-        ControlFlowStack.pop_back();
-      }
-      break;
-
-    case WebAssembly::END_BLOCK:
-    case WebAssembly::END_BLOCK_S:
-      if (ControlFlowStack.empty()) {
-        printAnnotation(OS, "End marker mismatch!");
-      } else {
-        printAnnotation(
-            OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
-      }
-      break;
-
-    case WebAssembly::END_TRY:
-    case WebAssembly::END_TRY_S:
-      if (ControlFlowStack.empty()) {
-        printAnnotation(OS, "End marker mismatch!");
-      } else {
-        printAnnotation(
-            OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
-        LastSeenEHInst = END_TRY;
-      }
-      break;
-
-    case WebAssembly::CATCH_I32:
-    case WebAssembly::CATCH_I32_S:
-    case WebAssembly::CATCH_I64:
-    case WebAssembly::CATCH_I64_S:
-    case WebAssembly::CATCH_ALL:
-    case WebAssembly::CATCH_ALL_S:
-      // There can be multiple catch instructions for one try instruction, so we
-      // print a label only for the first 'catch' label.
-      if (LastSeenEHInst != CATCH) {
-        if (EHPadStack.empty()) {
-          printAnnotation(OS, "try-catch mismatch!");
-        } else {
-          printAnnotation(OS,
-                          "catch" + utostr(EHPadStack.pop_back_val()) + ':');
-        }
-      }
-      LastSeenEHInst = CATCH;
-      break;
-    }
-
-    // Annotate any control flow label references.
-    unsigned NumFixedOperands = Desc.NumOperands;
-    SmallSet<uint64_t, 8> Printed;
-    for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
-      // See if this operand denotes a basic block target.
-      if (i < NumFixedOperands) {
-        // A non-variable_ops operand, check its type.
-        if (Desc.OpInfo[i].OperandType != WebAssembly::OPERAND_BASIC_BLOCK)
-          continue;
-      } else {
-        // A variable_ops operand, which currently can be immediates (used in
-        // br_table) which are basic block targets, or for call instructions
-        // when using -wasm-keep-registers (in which case they are registers,
-        // and should not be processed).
-        if (!MI->getOperand(i).isImm())
-          continue;
-      }
-      uint64_t Depth = MI->getOperand(i).getImm();
-      if (!Printed.insert(Depth).second)
-        continue;
-
-      if (Opc == WebAssembly::RETHROW || Opc == WebAssembly::RETHROW_S) {
-        if (Depth > EHPadStack.size()) {
-          printAnnotation(OS, "Invalid depth argument!");
-        } else if (Depth == EHPadStack.size()) {
-          // This can happen when rethrow instruction breaks out of all nests
-          // and throws up to the current function's caller.
-          printAnnotation(OS, utostr(Depth) + ": " + "to caller");
-        } else {
-          uint64_t CatchNo = EHPadStack.rbegin()[Depth];
-          printAnnotation(OS, utostr(Depth) + ": " + "down to catch" +
-                                  utostr(CatchNo));
-        }
-
-      } else {
-        if (Depth >= ControlFlowStack.size()) {
-          printAnnotation(OS, "Invalid depth argument!");
-        } else {
-          const auto &Pair = ControlFlowStack.rbegin()[Depth];
-          printAnnotation(OS, utostr(Depth) + ": " +
-                                  (Pair.second ? "up" : "down") + " to label" +
-                                  utostr(Pair.first));
-        }
-      }
-    }
-  }
-}
-
-static std::string toString(const APFloat &FP) {
-  // Print NaNs with custom payloads specially.
-  if (FP.isNaN() && !FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics())) &&
-      !FP.bitwiseIsEqual(
-          APFloat::getQNaN(FP.getSemantics(), /*Negative=*/true))) {
-    APInt AI = FP.bitcastToAPInt();
-    return std::string(AI.isNegative() ? "-" : "") + "nan:0x" +
-           utohexstr(AI.getZExtValue() &
-                         (AI.getBitWidth() == 32 ? INT64_C(0x007fffff)
-                                                 : INT64_C(0x000fffffffffffff)),
-                     /*LowerCase=*/true);
-  }
-
-  // Use C99's hexadecimal floating-point representation.
-  static const size_t BufBytes = 128;
-  char buf[BufBytes];
-  auto Written = FP.convertToHexString(
-      buf, /*hexDigits=*/0, /*upperCase=*/false, APFloat::rmNearestTiesToEven);
-  (void)Written;
-  assert(Written != 0);
-  assert(Written < BufBytes);
-  return buf;
-}
-
-void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                          raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    unsigned WAReg = Op.getReg();
-    if (int(WAReg) >= 0)
-      printRegName(O, WAReg);
-    else if (OpNo >= MII.get(MI->getOpcode()).getNumDefs())
-      O << "$pop" << WebAssemblyFunctionInfo::getWARegStackId(WAReg);
-    else if (WAReg != WebAssemblyFunctionInfo::UnusedReg)
-      O << "$push" << WebAssemblyFunctionInfo::getWARegStackId(WAReg);
-    else
-      O << "$drop";
-    // Add a '=' suffix if this is a def.
-    if (OpNo < MII.get(MI->getOpcode()).getNumDefs())
-      O << '=';
-  } else if (Op.isImm()) {
-    O << Op.getImm();
-  } else if (Op.isFPImm()) {
-    const MCInstrDesc &Desc = MII.get(MI->getOpcode());
-    const MCOperandInfo &Info = Desc.OpInfo[OpNo];
-    if (Info.OperandType == WebAssembly::OPERAND_F32IMM) {
-      // TODO: MC converts all floating point immediate operands to double.
-      // This is fine for numeric values, but may cause NaNs to change bits.
-      O << ::toString(APFloat(float(Op.getFPImm())));
-    } else {
-      assert(Info.OperandType == WebAssembly::OPERAND_F64IMM);
-      O << ::toString(APFloat(Op.getFPImm()));
-    }
-  } else {
-    assert(Op.isExpr() && "unknown operand kind in printOperand");
-    Op.getExpr()->print(O, &MAI);
-  }
-}
-
-void WebAssemblyInstPrinter::printBrList(const MCInst *MI, unsigned OpNo,
-                                         raw_ostream &O) {
-  O << "{";
-  for (unsigned I = OpNo, E = MI->getNumOperands(); I != E; ++I) {
-    if (I != OpNo)
-      O << ", ";
-    O << MI->getOperand(I).getImm();
-  }
-  O << "}";
-}
-
-void WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(const MCInst *MI,
-                                                            unsigned OpNo,
-                                                            raw_ostream &O) {
-  int64_t Imm = MI->getOperand(OpNo).getImm();
-  if (Imm == WebAssembly::GetDefaultP2Align(MI->getOpcode()))
-    return;
-  O << ":p2align=" << Imm;
-}
-
-void WebAssemblyInstPrinter::printWebAssemblySignatureOperand(const MCInst *MI,
-                                                              unsigned OpNo,
-                                                              raw_ostream &O) {
-  auto Imm = static_cast<unsigned>(MI->getOperand(OpNo).getImm());
-  if (Imm != wasm::WASM_TYPE_NORESULT)
-    O << WebAssembly::anyTypeToString(Imm);
-}
-
-// We have various enums representing a subset of these types, use this
-// function to convert any of them to text.
-const char *llvm::WebAssembly::anyTypeToString(unsigned Ty) {
-  switch (Ty) {
-  case wasm::WASM_TYPE_I32:
-    return "i32";
-  case wasm::WASM_TYPE_I64:
-    return "i64";
-  case wasm::WASM_TYPE_F32:
-    return "f32";
-  case wasm::WASM_TYPE_F64:
-    return "f64";
-  case wasm::WASM_TYPE_V128:
-    return "v128";
-  case wasm::WASM_TYPE_FUNCREF:
-    return "funcref";
-  case wasm::WASM_TYPE_FUNC:
-    return "func";
-  case wasm::WASM_TYPE_EXCEPT_REF:
-    return "except_ref";
-  case wasm::WASM_TYPE_NORESULT:
-    return "void";
-  default:
-    return "invalid_type";
-  }
-}
-
-const char *llvm::WebAssembly::typeToString(wasm::ValType Ty) {
-  return anyTypeToString(static_cast<unsigned>(Ty));
-}
diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
deleted file mode 100644
index 5ad45c7d5c7f..000000000000
--- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
+++ /dev/null
@@ -1,66 +0,0 @@
-// WebAssemblyInstPrinter.h - Print wasm MCInst to assembly syntax -*- C++ -*-//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This class prints an WebAssembly MCInst to wasm file syntax.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H
-#define LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/BinaryFormat/Wasm.h"
-#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/Support/MachineValueType.h"
-
-namespace llvm {
-
-class MCSubtargetInfo;
-
-class WebAssemblyInstPrinter final : public MCInstPrinter {
-  uint64_t ControlFlowCounter = 0;
-  uint64_t EHPadStackCounter = 0;
-  SmallVector<std::pair<uint64_t, bool>, 4> ControlFlowStack;
-  SmallVector<uint64_t, 4> EHPadStack;
-
-  enum EHInstKind { TRY, CATCH, END_TRY };
-  EHInstKind LastSeenEHInst = END_TRY;
-
-public:
-  WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                         const MCRegisterInfo &MRI);
-
-  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
-
-  // Used by tblegen code.
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printBrList(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printWebAssemblyP2AlignOperand(const MCInst *MI, unsigned OpNo,
-                                      raw_ostream &O);
-  void printWebAssemblySignatureOperand(const MCInst *MI, unsigned OpNo,
-                                        raw_ostream &O);
-
-  // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
-  static const char *getRegisterName(unsigned RegNo);
-};
-
-namespace WebAssembly {
-
-const char *typeToString(wasm::ValType Ty);
-const char *anyTypeToString(unsigned Ty);
-
-} // end namespace WebAssembly
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index 0726dd481174..70b409cf4a90 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyAsmBackend.cpp - WebAssembly Assembler Backend ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -36,7 +35,6 @@ class WebAssemblyAsmBackend final : public MCAsmBackend {
 public:
   explicit WebAssemblyAsmBackend(bool Is64Bit)
       : MCAsmBackend(support::little), Is64Bit(Is64Bit) {}
-  ~WebAssemblyAsmBackend() override {}
 
   unsigned getNumFixupKinds() const override {
     return WebAssembly::NumTargetFixupKinds;
@@ -77,9 +75,9 @@ WebAssemblyAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
       // WebAssemblyFixupKinds.h.
       //
       // Name                     Offset (bits) Size (bits)     Flags
-      {"fixup_code_sleb128_i32", 0, 5 * 8, 0},
-      {"fixup_code_sleb128_i64", 0, 10 * 8, 0},
-      {"fixup_code_uleb128_i32", 0, 5 * 8, 0},
+      {"fixup_sleb128_i32", 0, 5 * 8, 0},
+      {"fixup_sleb128_i64", 0, 10 * 8, 0},
+      {"fixup_uleb128_i32", 0, 5 * 8, 0},
   };
 
   if (Kind < FirstTargetFixupKind)
@@ -92,7 +90,7 @@ WebAssemblyAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
 
 bool WebAssemblyAsmBackend::writeNopData(raw_ostream &OS,
                                          uint64_t Count) const {
-  for (uint64_t i = 0; i < Count; ++i)
+  for (uint64_t I = 0; I < Count; ++I)
     OS << char(WebAssembly::Nop);
 
   return true;
@@ -119,8 +117,8 @@ void WebAssemblyAsmBackend::applyFixup(const MCAssembler &Asm,
 
   // For each byte of the fragment that the fixup touches, mask in the
   // bits from the fixup value.
-  for (unsigned i = 0; i != NumBytes; ++i)
-    Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+  for (unsigned I = 0; I != NumBytes; ++I)
+    Data[Offset + I] |= uint8_t((Value >> (I * 8)) & 0xff);
 }
 
 std::unique_ptr<MCObjectTargetWriter>
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
index c2fac5f93a2f..33e8de282955 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
@@ -1,9 +1,8 @@
 //=- WebAssemblyFixupKinds.h - WebAssembly Specific Fixup Entries -*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,9 +14,9 @@
 namespace llvm {
 namespace WebAssembly {
 enum Fixups {
-  fixup_code_sleb128_i32 = FirstTargetFixupKind, // 32-bit signed
-  fixup_code_sleb128_i64,                        // 64-bit signed
-  fixup_code_uleb128_i32,                        // 32-bit unsigned
+  fixup_sleb128_i32 = FirstTargetFixupKind, // 32-bit signed
+  fixup_sleb128_i64,                        // 64-bit signed
+  fixup_uleb128_i32,                        // 32-bit unsigned
 
   // Marker
   LastTargetFixupKind,
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
new file mode 100644
index 000000000000..b5d4d369b726
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
@@ -0,0 +1,296 @@
+//=- WebAssemblyInstPrinter.cpp - WebAssembly assembly instruction printing -=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Print MCInst instructions to wasm format.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyInstPrinter.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+#include "WebAssemblyGenAsmWriter.inc"
+
+WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI,
+                                               const MCInstrInfo &MII,
+                                               const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
+
+void WebAssemblyInstPrinter::printRegName(raw_ostream &OS,
+                                          unsigned RegNo) const {
+  assert(RegNo != WebAssemblyFunctionInfo::UnusedReg);
+  // Note that there's an implicit local.get/local.set here!
+  OS << "$" << RegNo;
+}
+
+void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+                                       StringRef Annot,
+                                       const MCSubtargetInfo &STI) {
+  // Print the instruction (this uses the AsmStrings from the .td files).
+  printInstruction(MI, OS);
+
+  // Print any additional variadic operands.
+  const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+  if (Desc.isVariadic())
+    for (auto I = Desc.getNumOperands(), E = MI->getNumOperands(); I < E; ++I) {
+      // FIXME: For CALL_INDIRECT_VOID, don't print a leading comma, because
+      // we have an extra flags operand which is not currently printed, for
+      // compatiblity reasons.
+      if (I != 0 && ((MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID &&
+                      MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID_S) ||
+                     I != Desc.getNumOperands()))
+        OS << ", ";
+      printOperand(MI, I, OS);
+    }
+
+  // Print any added annotation.
+  printAnnotation(OS, Annot);
+
+  if (CommentStream) {
+    // Observe any effects on the control flow stack, for use in annotating
+    // control flow label references.
+    unsigned Opc = MI->getOpcode();
+    switch (Opc) {
+    default:
+      break;
+
+    case WebAssembly::LOOP:
+    case WebAssembly::LOOP_S:
+      printAnnotation(OS, "label" + utostr(ControlFlowCounter) + ':');
+      ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, true));
+      break;
+
+    case WebAssembly::BLOCK:
+    case WebAssembly::BLOCK_S:
+      ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
+      break;
+
+    case WebAssembly::TRY:
+    case WebAssembly::TRY_S:
+      ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
+      EHPadStack.push_back(EHPadStackCounter++);
+      LastSeenEHInst = TRY;
+      break;
+
+    case WebAssembly::END_LOOP:
+    case WebAssembly::END_LOOP_S:
+      if (ControlFlowStack.empty()) {
+        printAnnotation(OS, "End marker mismatch!");
+      } else {
+        ControlFlowStack.pop_back();
+      }
+      break;
+
+    case WebAssembly::END_BLOCK:
+    case WebAssembly::END_BLOCK_S:
+      if (ControlFlowStack.empty()) {
+        printAnnotation(OS, "End marker mismatch!");
+      } else {
+        printAnnotation(
+            OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
+      }
+      break;
+
+    case WebAssembly::END_TRY:
+    case WebAssembly::END_TRY_S:
+      if (ControlFlowStack.empty()) {
+        printAnnotation(OS, "End marker mismatch!");
+      } else {
+        printAnnotation(
+            OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
+        LastSeenEHInst = END_TRY;
+      }
+      break;
+
+    case WebAssembly::CATCH:
+    case WebAssembly::CATCH_S:
+      if (EHPadStack.empty()) {
+        printAnnotation(OS, "try-catch mismatch!");
+      } else {
+        printAnnotation(OS, "catch" + utostr(EHPadStack.pop_back_val()) + ':');
+      }
+      break;
+    }
+
+    // Annotate any control flow label references.
+
+    // rethrow instruction does not take any depth argument and rethrows to the
+    // nearest enclosing catch scope, if any. If there's no enclosing catch
+    // scope, it throws up to the caller.
+    if (Opc == WebAssembly::RETHROW || Opc == WebAssembly::RETHROW_S) {
+      if (EHPadStack.empty()) {
+        printAnnotation(OS, "to caller");
+      } else {
+        printAnnotation(OS, "down to catch" + utostr(EHPadStack.back()));
+      }
+
+    } else {
+      unsigned NumFixedOperands = Desc.NumOperands;
+      SmallSet<uint64_t, 8> Printed;
+      for (unsigned I = 0, E = MI->getNumOperands(); I < E; ++I) {
+        // See if this operand denotes a basic block target.
+        if (I < NumFixedOperands) {
+          // A non-variable_ops operand, check its type.
+          if (Desc.OpInfo[I].OperandType != WebAssembly::OPERAND_BASIC_BLOCK)
+            continue;
+        } else {
+          // A variable_ops operand, which currently can be immediates (used in
+          // br_table) which are basic block targets, or for call instructions
+          // when using -wasm-keep-registers (in which case they are registers,
+          // and should not be processed).
+          if (!MI->getOperand(I).isImm())
+            continue;
+        }
+        uint64_t Depth = MI->getOperand(I).getImm();
+        if (!Printed.insert(Depth).second)
+          continue;
+        if (Depth >= ControlFlowStack.size()) {
+          printAnnotation(OS, "Invalid depth argument!");
+        } else {
+          const auto &Pair = ControlFlowStack.rbegin()[Depth];
+          printAnnotation(OS, utostr(Depth) + ": " +
+                                  (Pair.second ? "up" : "down") + " to label" +
+                                  utostr(Pair.first));
+        }
+      }
+    }
+  }
+}
+
+static std::string toString(const APFloat &FP) {
+  // Print NaNs with custom payloads specially.
+  if (FP.isNaN() && !FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics())) &&
+      !FP.bitwiseIsEqual(
+          APFloat::getQNaN(FP.getSemantics(), /*Negative=*/true))) {
+    APInt AI = FP.bitcastToAPInt();
+    return std::string(AI.isNegative() ? "-" : "") + "nan:0x" +
+           utohexstr(AI.getZExtValue() &
+                         (AI.getBitWidth() == 32 ? INT64_C(0x007fffff)
+                                                 : INT64_C(0x000fffffffffffff)),
+                     /*LowerCase=*/true);
+  }
+
+  // Use C99's hexadecimal floating-point representation.
+  static const size_t BufBytes = 128;
+  char Buf[BufBytes];
+  auto Written = FP.convertToHexString(
+      Buf, /*HexDigits=*/0, /*UpperCase=*/false, APFloat::rmNearestTiesToEven);
+  (void)Written;
+  assert(Written != 0);
+  assert(Written < BufBytes);
+  return Buf;
+}
+
+void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                          raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    unsigned WAReg = Op.getReg();
+    if (int(WAReg) >= 0)
+      printRegName(O, WAReg);
+    else if (OpNo >= MII.get(MI->getOpcode()).getNumDefs())
+      O << "$pop" << WebAssemblyFunctionInfo::getWARegStackId(WAReg);
+    else if (WAReg != WebAssemblyFunctionInfo::UnusedReg)
+      O << "$push" << WebAssemblyFunctionInfo::getWARegStackId(WAReg);
+    else
+      O << "$drop";
+    // Add a '=' suffix if this is a def.
+    if (OpNo < MII.get(MI->getOpcode()).getNumDefs())
+      O << '=';
+  } else if (Op.isImm()) {
+    O << Op.getImm();
+  } else if (Op.isFPImm()) {
+    const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+    const MCOperandInfo &Info = Desc.OpInfo[OpNo];
+    if (Info.OperandType == WebAssembly::OPERAND_F32IMM) {
+      // TODO: MC converts all floating point immediate operands to double.
+      // This is fine for numeric values, but may cause NaNs to change bits.
+      O << ::toString(APFloat(float(Op.getFPImm())));
+    } else {
+      assert(Info.OperandType == WebAssembly::OPERAND_F64IMM);
+      O << ::toString(APFloat(Op.getFPImm()));
+    }
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    Op.getExpr()->print(O, &MAI);
+  }
+}
+
+void WebAssemblyInstPrinter::printBrList(const MCInst *MI, unsigned OpNo,
+                                         raw_ostream &O) {
+  O << "{";
+  for (unsigned I = OpNo, E = MI->getNumOperands(); I != E; ++I) {
+    if (I != OpNo)
+      O << ", ";
+    O << MI->getOperand(I).getImm();
+  }
+  O << "}";
+}
+
+void WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(const MCInst *MI,
+                                                            unsigned OpNo,
+                                                            raw_ostream &O) {
+  int64_t Imm = MI->getOperand(OpNo).getImm();
+  if (Imm == WebAssembly::GetDefaultP2Align(MI->getOpcode()))
+    return;
+  O << ":p2align=" << Imm;
+}
+
+void WebAssemblyInstPrinter::printWebAssemblySignatureOperand(const MCInst *MI,
+                                                              unsigned OpNo,
+                                                              raw_ostream &O) {
+  auto Imm = static_cast<unsigned>(MI->getOperand(OpNo).getImm());
+  if (Imm != wasm::WASM_TYPE_NORESULT)
+    O << WebAssembly::anyTypeToString(Imm);
+}
+
+// We have various enums representing a subset of these types, use this
+// function to convert any of them to text.
+const char *llvm::WebAssembly::anyTypeToString(unsigned Ty) {
+  switch (Ty) {
+  case wasm::WASM_TYPE_I32:
+    return "i32";
+  case wasm::WASM_TYPE_I64:
+    return "i64";
+  case wasm::WASM_TYPE_F32:
+    return "f32";
+  case wasm::WASM_TYPE_F64:
+    return "f64";
+  case wasm::WASM_TYPE_V128:
+    return "v128";
+  case wasm::WASM_TYPE_FUNCREF:
+    return "funcref";
+  case wasm::WASM_TYPE_FUNC:
+    return "func";
+  case wasm::WASM_TYPE_EXNREF:
+    return "exnref";
+  case wasm::WASM_TYPE_NORESULT:
+    return "void";
+  default:
+    return "invalid_type";
+  }
+}
+
+const char *llvm::WebAssembly::typeToString(wasm::ValType Ty) {
+  return anyTypeToString(static_cast<unsigned>(Ty));
+}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
new file mode 100644
index 000000000000..b979de5028bf
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
@@ -0,0 +1,65 @@
+// WebAssemblyInstPrinter.h - Print wasm MCInst to assembly syntax -*- C++ -*-//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This class prints an WebAssembly MCInst to wasm file syntax.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/BinaryFormat/Wasm.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/MachineValueType.h"
+
+namespace llvm {
+
+class MCSubtargetInfo;
+
+class WebAssemblyInstPrinter final : public MCInstPrinter {
+  uint64_t ControlFlowCounter = 0;
+  uint64_t EHPadStackCounter = 0;
+  SmallVector<std::pair<uint64_t, bool>, 4> ControlFlowStack;
+  SmallVector<uint64_t, 4> EHPadStack;
+
+  enum EHInstKind { TRY, CATCH, END_TRY };
+  EHInstKind LastSeenEHInst = END_TRY;
+
+public:
+  WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                         const MCRegisterInfo &MRI);
+
+  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+
+  // Used by tblegen code.
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printBrList(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printWebAssemblyP2AlignOperand(const MCInst *MI, unsigned OpNo,
+                                      raw_ostream &O);
+  void printWebAssemblySignatureOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O);
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+};
+
+namespace WebAssembly {
+
+const char *typeToString(wasm::ValType Ty);
+const char *anyTypeToString(unsigned Ty);
+
+} // end namespace WebAssembly
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
index 44fcc129c39e..8f6531563e1b 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyMCAsmInfo.cpp - WebAssembly asm properties -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -20,7 +19,7 @@ using namespace llvm;
 
 #define DEBUG_TYPE "wasm-mc-asm-info"
 
-WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() {}
+WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() = default; // anchor.
 
 WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
   CodePointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4;
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
index 8627a6e40c6a..9efbbf881f59 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
@@ -1,9 +1,8 @@
 //===-- WebAssemblyMCAsmInfo.h - WebAssembly asm properties -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index 065a4dc94ca6..44b6d6a968a9 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
 //=- WebAssemblyMCCodeEmitter.cpp - Convert WebAssembly code to machine code -//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -49,7 +48,7 @@ class WebAssemblyMCCodeEmitter final : public MCCodeEmitter {
                          const MCSubtargetInfo &STI) const override;
 
 public:
-  WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii) : MCII(mcii) {}
+  WebAssemblyMCCodeEmitter(const MCInstrInfo &MCII) : MCII(MCII) {}
 };
 } // end anonymous namespace
 
@@ -82,14 +81,14 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
     encodeULEB128(MI.getNumOperands() - 2, OS);
 
   const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
-  for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) {
-    const MCOperand &MO = MI.getOperand(i);
+  for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
+    const MCOperand &MO = MI.getOperand(I);
     if (MO.isReg()) {
       /* nothing to encode */
 
     } else if (MO.isImm()) {
-      if (i < Desc.getNumOperands()) {
-        const MCOperandInfo &Info = Desc.OpInfo[i];
+      if (I < Desc.getNumOperands()) {
+        const MCOperandInfo &Info = Desc.OpInfo[I];
         LLVM_DEBUG(dbgs() << "Encoding immediate: type="
                           << int(Info.OperandType) << "\n");
         switch (Info.OperandType) {
@@ -127,28 +126,28 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
       }
 
     } else if (MO.isFPImm()) {
-      const MCOperandInfo &Info = Desc.OpInfo[i];
+      const MCOperandInfo &Info = Desc.OpInfo[I];
       if (Info.OperandType == WebAssembly::OPERAND_F32IMM) {
         // TODO: MC converts all floating point immediate operands to double.
         // This is fine for numeric values, but may cause NaNs to change bits.
-        float f = float(MO.getFPImm());
-        support::endian::write<float>(OS, f, support::little);
+        auto F = float(MO.getFPImm());
+        support::endian::write<float>(OS, F, support::little);
       } else {
         assert(Info.OperandType == WebAssembly::OPERAND_F64IMM);
-        double d = MO.getFPImm();
-        support::endian::write<double>(OS, d, support::little);
+        double D = MO.getFPImm();
+        support::endian::write<double>(OS, D, support::little);
       }
 
     } else if (MO.isExpr()) {
-      const MCOperandInfo &Info = Desc.OpInfo[i];
+      const MCOperandInfo &Info = Desc.OpInfo[I];
       llvm::MCFixupKind FixupKind;
       size_t PaddedSize = 5;
       switch (Info.OperandType) {
       case WebAssembly::OPERAND_I32IMM:
-        FixupKind = MCFixupKind(WebAssembly::fixup_code_sleb128_i32);
+        FixupKind = MCFixupKind(WebAssembly::fixup_sleb128_i32);
         break;
       case WebAssembly::OPERAND_I64IMM:
-        FixupKind = MCFixupKind(WebAssembly::fixup_code_sleb128_i64);
+        FixupKind = MCFixupKind(WebAssembly::fixup_sleb128_i64);
         PaddedSize = 10;
         break;
       case WebAssembly::OPERAND_FUNCTION32:
@@ -156,7 +155,7 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
       case WebAssembly::OPERAND_TYPEINDEX:
       case WebAssembly::OPERAND_GLOBAL:
       case WebAssembly::OPERAND_EVENT:
-        FixupKind = MCFixupKind(WebAssembly::fixup_code_uleb128_i32);
+        FixupKind = MCFixupKind(WebAssembly::fixup_uleb128_i32);
         break;
       default:
         llvm_unreachable("unexpected symbolic operand kind");
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index 390f367c2978..9c8ca1f13b18 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyMCTargetDesc.cpp - WebAssembly Target Descriptions -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -12,10 +11,11 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "WebAssemblyMCTargetDesc.h"
-#include "InstPrinter/WebAssemblyInstPrinter.h"
-#include "WebAssemblyMCAsmInfo.h"
-#include "WebAssemblyTargetStreamer.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "MCTargetDesc/WebAssemblyInstPrinter.h"
+#include "MCTargetDesc/WebAssemblyMCAsmInfo.h"
+#include "MCTargetDesc/WebAssemblyTargetStreamer.h"
+#include "TargetInfo/WebAssemblyTargetInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
@@ -40,13 +40,13 @@ static MCAsmInfo *createMCAsmInfo(const MCRegisterInfo & /*MRI*/,
 }
 
 static MCInstrInfo *createMCInstrInfo() {
-  MCInstrInfo *X = new MCInstrInfo();
+  auto *X = new MCInstrInfo();
   InitWebAssemblyMCInstrInfo(X);
   return X;
 }
 
 static MCRegisterInfo *createMCRegisterInfo(const Triple & /*T*/) {
-  MCRegisterInfo *X = new MCRegisterInfo();
+  auto *X = new MCRegisterInfo();
   InitWebAssemblyMCRegisterInfo(X, 0);
   return X;
 }
@@ -146,8 +146,8 @@ wasm::ValType WebAssembly::toValType(const MVT &Ty) {
   case MVT::v4f32:
   case MVT::v2f64:
     return wasm::ValType::V128;
-  case MVT::ExceptRef:
-    return wasm::ValType::EXCEPT_REF;
+  case MVT::exnref:
+    return wasm::ValType::EXNREF;
   default:
     llvm_unreachable("unexpected type");
   }
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index a01517fb90c3..7a9f59b1a4f2 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -1,9 +1,8 @@
 //==- WebAssemblyMCTargetDesc.h - WebAssembly Target Descriptions -*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -15,6 +14,7 @@
 #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
 #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
 
+#include "../WebAssemblySubtarget.h"
 #include "llvm/BinaryFormat/Wasm.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/DataTypes.h"
@@ -33,9 +33,6 @@ class Target;
 class Triple;
 class raw_pwrite_stream;
 
-Target &getTheWebAssemblyTarget32();
-Target &getTheWebAssemblyTarget64();
-
 MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII);
 
 MCAsmBackend *createWebAssemblyAsmBackend(const Triple &TT);
@@ -90,12 +87,23 @@ namespace WebAssemblyII {
 enum TOF {
   MO_NO_FLAG = 0,
 
-  // Flags to indicate the type of the symbol being referenced
-  MO_SYMBOL_FUNCTION = 0x1,
-  MO_SYMBOL_GLOBAL = 0x2,
-  MO_SYMBOL_EVENT = 0x4,
-  MO_SYMBOL_MASK = 0x7,
+  // On a symbol operand this indicates that the immediate is a wasm global
+  // index.  The value of the wasm global will be set to the symbol address at
+  // runtime.  This adds a level of indirection similar to the GOT on native
+  // platforms.
+  MO_GOT,
+
+  // On a symbol operand this indicates that the immediate is the symbol
+  // address relative the __memory_base wasm global.
+  // Only applicable to data symbols.
+  MO_MEMORY_BASE_REL,
+
+  // On a symbol operand this indicates that the immediate is the symbol
+  // address relative the __table_base wasm global.
+  // Only applicable to function symbols.
+  MO_TABLE_BASE_REL,
 };
+
 } // end namespace WebAssemblyII
 
 } // end namespace llvm
@@ -111,15 +119,30 @@ enum TOF {
 #define GET_INSTRINFO_ENUM
 #include "WebAssemblyGenInstrInfo.inc"
 
-#define GET_SUBTARGETINFO_ENUM
-#include "WebAssemblyGenSubtargetInfo.inc"
-
 namespace llvm {
 namespace WebAssembly {
 
+/// This is used to indicate block signatures.
+enum class ExprType : unsigned {
+  Void = 0x40,
+  I32 = 0x7F,
+  I64 = 0x7E,
+  F32 = 0x7D,
+  F64 = 0x7C,
+  V128 = 0x7B,
+  Exnref = 0x68,
+  Invalid = 0x00
+};
+
+/// Instruction opcodes emitted via means other than CodeGen.
+static const unsigned Nop = 0x01;
+static const unsigned End = 0x0b;
+
+wasm::ValType toValType(const MVT &Ty);
+
 /// Return the default p2align value for a load or store with the given opcode.
-inline unsigned GetDefaultP2Align(unsigned Opcode) {
-  switch (Opcode) {
+inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
+  switch (Opc) {
   case WebAssembly::LOAD8_S_I32:
   case WebAssembly::LOAD8_S_I32_S:
   case WebAssembly::LOAD8_U_I32:
@@ -328,35 +351,238 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
   case WebAssembly::STORE_v2f64_S:
     return 4;
   default:
+    return -1;
+  }
+}
+
+inline unsigned GetDefaultP2Align(unsigned Opc) {
+  auto Align = GetDefaultP2AlignAny(Opc);
+  if (Align == -1U) {
     llvm_unreachable("Only loads and stores have p2align values");
   }
+  return Align;
 }
 
-/// The operand number of the load or store address in load/store instructions.
-static const unsigned LoadAddressOperandNo = 3;
-static const unsigned StoreAddressOperandNo = 2;
+inline bool isArgument(unsigned Opc) {
+  switch (Opc) {
+  case WebAssembly::ARGUMENT_i32:
+  case WebAssembly::ARGUMENT_i32_S:
+  case WebAssembly::ARGUMENT_i64:
+  case WebAssembly::ARGUMENT_i64_S:
+  case WebAssembly::ARGUMENT_f32:
+  case WebAssembly::ARGUMENT_f32_S:
+  case WebAssembly::ARGUMENT_f64:
+  case WebAssembly::ARGUMENT_f64_S:
+  case WebAssembly::ARGUMENT_v16i8:
+  case WebAssembly::ARGUMENT_v16i8_S:
+  case WebAssembly::ARGUMENT_v8i16:
+  case WebAssembly::ARGUMENT_v8i16_S:
+  case WebAssembly::ARGUMENT_v4i32:
+  case WebAssembly::ARGUMENT_v4i32_S:
+  case WebAssembly::ARGUMENT_v2i64:
+  case WebAssembly::ARGUMENT_v2i64_S:
+  case WebAssembly::ARGUMENT_v4f32:
+  case WebAssembly::ARGUMENT_v4f32_S:
+  case WebAssembly::ARGUMENT_v2f64:
+  case WebAssembly::ARGUMENT_v2f64_S:
+  case WebAssembly::ARGUMENT_exnref:
+  case WebAssembly::ARGUMENT_exnref_S:
+    return true;
+  default:
+    return false;
+  }
+}
 
-/// The operand number of the load or store p2align in load/store instructions.
-static const unsigned LoadP2AlignOperandNo = 1;
-static const unsigned StoreP2AlignOperandNo = 0;
+inline bool isCopy(unsigned Opc) {
+  switch (Opc) {
+  case WebAssembly::COPY_I32:
+  case WebAssembly::COPY_I32_S:
+  case WebAssembly::COPY_I64:
+  case WebAssembly::COPY_I64_S:
+  case WebAssembly::COPY_F32:
+  case WebAssembly::COPY_F32_S:
+  case WebAssembly::COPY_F64:
+  case WebAssembly::COPY_F64_S:
+  case WebAssembly::COPY_V128:
+  case WebAssembly::COPY_V128_S:
+  case WebAssembly::COPY_EXNREF:
+  case WebAssembly::COPY_EXNREF_S:
+    return true;
+  default:
+    return false;
+  }
+}
 
-/// This is used to indicate block signatures.
-enum class ExprType : unsigned {
-  Void = 0x40,
-  I32 = 0x7F,
-  I64 = 0x7E,
-  F32 = 0x7D,
-  F64 = 0x7C,
-  V128 = 0x7B,
-  ExceptRef = 0x68,
-  Invalid = 0x00
-};
+inline bool isTee(unsigned Opc) {
+  switch (Opc) {
+  case WebAssembly::TEE_I32:
+  case WebAssembly::TEE_I32_S:
+  case WebAssembly::TEE_I64:
+  case WebAssembly::TEE_I64_S:
+  case WebAssembly::TEE_F32:
+  case WebAssembly::TEE_F32_S:
+  case WebAssembly::TEE_F64:
+  case WebAssembly::TEE_F64_S:
+  case WebAssembly::TEE_V128:
+  case WebAssembly::TEE_V128_S:
+  case WebAssembly::TEE_EXNREF:
+  case WebAssembly::TEE_EXNREF_S:
+    return true;
+  default:
+    return false;
+  }
+}
 
-/// Instruction opcodes emitted via means other than CodeGen.
-static const unsigned Nop = 0x01;
-static const unsigned End = 0x0b;
+inline bool isCallDirect(unsigned Opc) {
+  switch (Opc) {
+  case WebAssembly::CALL_VOID:
+  case WebAssembly::CALL_VOID_S:
+  case WebAssembly::CALL_i32:
+  case WebAssembly::CALL_i32_S:
+  case WebAssembly::CALL_i64:
+  case WebAssembly::CALL_i64_S:
+  case WebAssembly::CALL_f32:
+  case WebAssembly::CALL_f32_S:
+  case WebAssembly::CALL_f64:
+  case WebAssembly::CALL_f64_S:
+  case WebAssembly::CALL_v16i8:
+  case WebAssembly::CALL_v16i8_S:
+  case WebAssembly::CALL_v8i16:
+  case WebAssembly::CALL_v8i16_S:
+  case WebAssembly::CALL_v4i32:
+  case WebAssembly::CALL_v4i32_S:
+  case WebAssembly::CALL_v2i64:
+  case WebAssembly::CALL_v2i64_S:
+  case WebAssembly::CALL_v4f32:
+  case WebAssembly::CALL_v4f32_S:
+  case WebAssembly::CALL_v2f64:
+  case WebAssembly::CALL_v2f64_S:
+  case WebAssembly::CALL_exnref:
+  case WebAssembly::CALL_exnref_S:
+  case WebAssembly::RET_CALL:
+  case WebAssembly::RET_CALL_S:
+    return true;
+  default:
+    return false;
+  }
+}
 
-wasm::ValType toValType(const MVT &Ty);
+inline bool isCallIndirect(unsigned Opc) {
+  switch (Opc) {
+  case WebAssembly::CALL_INDIRECT_VOID:
+  case WebAssembly::CALL_INDIRECT_VOID_S:
+  case WebAssembly::CALL_INDIRECT_i32:
+  case WebAssembly::CALL_INDIRECT_i32_S:
+  case WebAssembly::CALL_INDIRECT_i64:
+  case WebAssembly::CALL_INDIRECT_i64_S:
+  case WebAssembly::CALL_INDIRECT_f32:
+  case WebAssembly::CALL_INDIRECT_f32_S:
+  case WebAssembly::CALL_INDIRECT_f64:
+  case WebAssembly::CALL_INDIRECT_f64_S:
+  case WebAssembly::CALL_INDIRECT_v16i8:
+  case WebAssembly::CALL_INDIRECT_v16i8_S:
+  case WebAssembly::CALL_INDIRECT_v8i16:
+  case WebAssembly::CALL_INDIRECT_v8i16_S:
+  case WebAssembly::CALL_INDIRECT_v4i32:
+  case WebAssembly::CALL_INDIRECT_v4i32_S:
+  case WebAssembly::CALL_INDIRECT_v2i64:
+  case WebAssembly::CALL_INDIRECT_v2i64_S:
+  case WebAssembly::CALL_INDIRECT_v4f32:
+  case WebAssembly::CALL_INDIRECT_v4f32_S:
+  case WebAssembly::CALL_INDIRECT_v2f64:
+  case WebAssembly::CALL_INDIRECT_v2f64_S:
+  case WebAssembly::CALL_INDIRECT_exnref:
+  case WebAssembly::CALL_INDIRECT_exnref_S:
+  case WebAssembly::RET_CALL_INDIRECT:
+  case WebAssembly::RET_CALL_INDIRECT_S:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// Returns the operand number of a callee, assuming the argument is a call
+/// instruction.
+inline unsigned getCalleeOpNo(unsigned Opc) {
+  switch (Opc) {
+  case WebAssembly::CALL_VOID:
+  case WebAssembly::CALL_VOID_S:
+  case WebAssembly::CALL_INDIRECT_VOID:
+  case WebAssembly::CALL_INDIRECT_VOID_S:
+  case WebAssembly::RET_CALL:
+  case WebAssembly::RET_CALL_S:
+  case WebAssembly::RET_CALL_INDIRECT:
+  case WebAssembly::RET_CALL_INDIRECT_S:
+    return 0;
+  case WebAssembly::CALL_i32:
+  case WebAssembly::CALL_i32_S:
+  case WebAssembly::CALL_i64:
+  case WebAssembly::CALL_i64_S:
+  case WebAssembly::CALL_f32:
+  case WebAssembly::CALL_f32_S:
+  case WebAssembly::CALL_f64:
+  case WebAssembly::CALL_f64_S:
+  case WebAssembly::CALL_v16i8:
+  case WebAssembly::CALL_v16i8_S:
+  case WebAssembly::CALL_v8i16:
+  case WebAssembly::CALL_v8i16_S:
+  case WebAssembly::CALL_v4i32:
+  case WebAssembly::CALL_v4i32_S:
+  case WebAssembly::CALL_v2i64:
+  case WebAssembly::CALL_v2i64_S:
+  case WebAssembly::CALL_v4f32:
+  case WebAssembly::CALL_v4f32_S:
+  case WebAssembly::CALL_v2f64:
+  case WebAssembly::CALL_v2f64_S:
+  case WebAssembly::CALL_exnref:
+  case WebAssembly::CALL_exnref_S:
+  case WebAssembly::CALL_INDIRECT_i32:
+  case WebAssembly::CALL_INDIRECT_i32_S:
+  case WebAssembly::CALL_INDIRECT_i64:
+  case WebAssembly::CALL_INDIRECT_i64_S:
+  case WebAssembly::CALL_INDIRECT_f32:
+  case WebAssembly::CALL_INDIRECT_f32_S:
+  case WebAssembly::CALL_INDIRECT_f64:
+  case WebAssembly::CALL_INDIRECT_f64_S:
+  case WebAssembly::CALL_INDIRECT_v16i8:
+  case WebAssembly::CALL_INDIRECT_v16i8_S:
+  case WebAssembly::CALL_INDIRECT_v8i16:
+  case WebAssembly::CALL_INDIRECT_v8i16_S:
+  case WebAssembly::CALL_INDIRECT_v4i32:
+  case WebAssembly::CALL_INDIRECT_v4i32_S:
+  case WebAssembly::CALL_INDIRECT_v2i64:
+  case WebAssembly::CALL_INDIRECT_v2i64_S:
+  case WebAssembly::CALL_INDIRECT_v4f32:
+  case WebAssembly::CALL_INDIRECT_v4f32_S:
+  case WebAssembly::CALL_INDIRECT_v2f64:
+  case WebAssembly::CALL_INDIRECT_v2f64_S:
+  case WebAssembly::CALL_INDIRECT_exnref:
+  case WebAssembly::CALL_INDIRECT_exnref_S:
+    return 1;
+  default:
+    llvm_unreachable("Not a call instruction");
+  }
+}
+
+inline bool isMarker(unsigned Opc) {
+  switch (Opc) {
+  case WebAssembly::BLOCK:
+  case WebAssembly::BLOCK_S:
+  case WebAssembly::END_BLOCK:
+  case WebAssembly::END_BLOCK_S:
+  case WebAssembly::LOOP:
+  case WebAssembly::LOOP_S:
+  case WebAssembly::END_LOOP:
+  case WebAssembly::END_LOOP_S:
+  case WebAssembly::TRY:
+  case WebAssembly::TRY_S:
+  case WebAssembly::END_TRY:
+  case WebAssembly::END_TRY_S:
+    return true;
+  default:
+    return false;
+  }
+}
 
 } // end namespace WebAssembly
 } // end namespace llvm
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
index 50143fb0ece3..e05efef7201b 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -1,9 +1,8 @@
 //==-- WebAssemblyTargetStreamer.cpp - WebAssembly Target Streamer Methods --=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -13,9 +12,9 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "WebAssemblyTargetStreamer.h"
-#include "InstPrinter/WebAssemblyInstPrinter.h"
-#include "WebAssemblyMCTargetDesc.h"
+#include "MCTargetDesc/WebAssemblyTargetStreamer.h"
+#include "MCTargetDesc/WebAssemblyInstPrinter.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionWasm.h"
 #include "llvm/MC/MCSubtargetInfo.h"
@@ -113,8 +112,15 @@ void WebAssemblyTargetAsmStreamer::emitEventType(const MCSymbolWasm *Sym) {
 }
 
 void WebAssemblyTargetAsmStreamer::emitImportModule(const MCSymbolWasm *Sym,
-                                                    StringRef ModuleName) {
-  OS << "\t.import_module\t" << Sym->getName() << ", " << ModuleName << '\n';
+                                                    StringRef ImportModule) {
+  OS << "\t.import_module\t" << Sym->getName() << ", "
+                             << ImportModule << '\n';
+}
+
+void WebAssemblyTargetAsmStreamer::emitImportName(const MCSymbolWasm *Sym,
+                                                  StringRef ImportName) {
+  OS << "\t.import_name\t" << Sym->getName() << ", "
+                           << ImportName << '\n';
 }
 
 void WebAssemblyTargetAsmStreamer::emitIndIdx(const MCExpr *Value) {
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index 3073938118b4..5ea62b179d22 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -1,9 +1,8 @@
 //==-- WebAssemblyTargetStreamer.h - WebAssembly Target Streamer -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -45,7 +44,10 @@ public:
   virtual void emitEventType(const MCSymbolWasm *Sym) = 0;
   /// .import_module
   virtual void emitImportModule(const MCSymbolWasm *Sym,
-                                StringRef ModuleName) = 0;
+                                StringRef ImportModule) = 0;
+  /// .import_name
+  virtual void emitImportName(const MCSymbolWasm *Sym,
+                              StringRef ImportName) = 0;
 
 protected:
   void emitValueType(wasm::ValType Type);
@@ -67,7 +69,8 @@ public:
   void emitIndIdx(const MCExpr *Value) override;
   void emitGlobalType(const MCSymbolWasm *Sym) override;
   void emitEventType(const MCSymbolWasm *Sym) override;
-  void emitImportModule(const MCSymbolWasm *Sym, StringRef ModuleName) override;
+  void emitImportModule(const MCSymbolWasm *Sym, StringRef ImportModule) override;
+  void emitImportName(const MCSymbolWasm *Sym, StringRef ImportName) override;
 };
 
 /// This part is for Wasm object output
@@ -82,7 +85,9 @@ public:
   void emitGlobalType(const MCSymbolWasm *Sym) override {}
   void emitEventType(const MCSymbolWasm *Sym) override {}
   void emitImportModule(const MCSymbolWasm *Sym,
-                        StringRef ModuleName) override {}
+                        StringRef ImportModule) override {}
+  void emitImportName(const MCSymbolWasm *Sym,
+                      StringRef ImportName) override {}
 };
 
 /// This part is for null output
@@ -98,6 +103,7 @@ public:
   void emitGlobalType(const MCSymbolWasm *) override {}
   void emitEventType(const MCSymbolWasm *) override {}
   void emitImportModule(const MCSymbolWasm *, StringRef) override {}
+  void emitImportName(const MCSymbolWasm *, StringRef) override {}
 };
 
 } // end namespace llvm
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
index 763e30be8e02..a1cc3e268e8f 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyWasmObjectWriter.cpp - WebAssembly Wasm Writer ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -43,26 +42,7 @@ private:
 WebAssemblyWasmObjectWriter::WebAssemblyWasmObjectWriter(bool Is64Bit)
     : MCWasmObjectTargetWriter(Is64Bit) {}
 
-// Test whether the given expression computes a function address.
-static bool IsFunctionExpr(const MCExpr *Expr) {
-  if (auto SyExp = dyn_cast<MCSymbolRefExpr>(Expr))
-    return cast<MCSymbolWasm>(SyExp->getSymbol()).isFunction();
-
-  if (auto BinOp = dyn_cast<MCBinaryExpr>(Expr))
-    return IsFunctionExpr(BinOp->getLHS()) != IsFunctionExpr(BinOp->getRHS());
-
-  if (auto UnOp = dyn_cast<MCUnaryExpr>(Expr))
-    return IsFunctionExpr(UnOp->getSubExpr());
-
-  return false;
-}
-
-static bool IsFunctionType(const MCValue &Target) {
-  const MCSymbolRefExpr *RefA = Target.getSymA();
-  return RefA && RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_TYPEINDEX;
-}
-
-static const MCSection *GetFixupSection(const MCExpr *Expr) {
+static const MCSection *getFixupSection(const MCExpr *Expr) {
   if (auto SyExp = dyn_cast<MCSymbolRefExpr>(Expr)) {
     if (SyExp->getSymbol().isInSection())
       return &SyExp->getSymbol().getSection();
@@ -70,63 +50,66 @@ static const MCSection *GetFixupSection(const MCExpr *Expr) {
   }
 
   if (auto BinOp = dyn_cast<MCBinaryExpr>(Expr)) {
-    auto SectionLHS = GetFixupSection(BinOp->getLHS());
-    auto SectionRHS = GetFixupSection(BinOp->getRHS());
+    auto SectionLHS = getFixupSection(BinOp->getLHS());
+    auto SectionRHS = getFixupSection(BinOp->getRHS());
     return SectionLHS == SectionRHS ? nullptr : SectionLHS;
   }
 
   if (auto UnOp = dyn_cast<MCUnaryExpr>(Expr))
-    return GetFixupSection(UnOp->getSubExpr());
+    return getFixupSection(UnOp->getSubExpr());
 
   return nullptr;
 }
 
-static bool IsGlobalType(const MCValue &Target) {
-  const MCSymbolRefExpr *RefA = Target.getSymA();
-  return RefA && RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_GLOBAL;
-}
-
-static bool IsEventType(const MCValue &Target) {
-  const MCSymbolRefExpr *RefA = Target.getSymA();
-  return RefA && RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_EVENT;
-}
-
 unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
                                                    const MCFixup &Fixup) const {
-  // WebAssembly functions are not allocated in the data address space. To
-  // resolve a pointer to a function, we must use a special relocation type.
-  bool IsFunction = IsFunctionExpr(Fixup.getValue());
+  const MCSymbolRefExpr *RefA = Target.getSymA();
+  assert(RefA);
+  auto& SymA = cast<MCSymbolWasm>(RefA->getSymbol());
+
+  MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
+
+  switch (Modifier) {
+    case MCSymbolRefExpr::VK_GOT:
+      return wasm::R_WASM_GLOBAL_INDEX_LEB;
+    case MCSymbolRefExpr::VK_WASM_TBREL:
+      assert(SymA.isFunction());
+      return wasm::R_WASM_TABLE_INDEX_REL_SLEB;
+    case MCSymbolRefExpr::VK_WASM_MBREL:
+      assert(SymA.isData());
+      return wasm::R_WASM_MEMORY_ADDR_REL_SLEB;
+    case MCSymbolRefExpr::VK_WASM_TYPEINDEX:
+      return wasm::R_WASM_TYPE_INDEX_LEB;
+    default:
+      break;
+  }
 
   switch (unsigned(Fixup.getKind())) {
-  case WebAssembly::fixup_code_sleb128_i32:
-    if (IsFunction)
-      return wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB;
-    return wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB;
-  case WebAssembly::fixup_code_sleb128_i64:
+  case WebAssembly::fixup_sleb128_i32:
+    if (SymA.isFunction())
+      return wasm::R_WASM_TABLE_INDEX_SLEB;
+    return wasm::R_WASM_MEMORY_ADDR_SLEB;
+  case WebAssembly::fixup_sleb128_i64:
     llvm_unreachable("fixup_sleb128_i64 not implemented yet");
-  case WebAssembly::fixup_code_uleb128_i32:
-    if (IsGlobalType(Target))
-      return wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB;
-    if (IsFunctionType(Target))
-      return wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB;
-    if (IsFunction)
-      return wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB;
-    if (IsEventType(Target))
-      return wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB;
-    return wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB;
+  case WebAssembly::fixup_uleb128_i32:
+    if (SymA.isGlobal())
+      return wasm::R_WASM_GLOBAL_INDEX_LEB;
+    if (SymA.isFunction())
+      return wasm::R_WASM_FUNCTION_INDEX_LEB;
+    if (SymA.isEvent())
+      return wasm::R_WASM_EVENT_INDEX_LEB;
+    return wasm::R_WASM_MEMORY_ADDR_LEB;
   case FK_Data_4:
-    if (IsFunction)
-      return wasm::R_WEBASSEMBLY_TABLE_INDEX_I32;
+    if (SymA.isFunction())
+      return wasm::R_WASM_TABLE_INDEX_I32;
     if (auto Section = static_cast<const MCSectionWasm *>(
-            GetFixupSection(Fixup.getValue()))) {
+            getFixupSection(Fixup.getValue()))) {
       if (Section->getKind().isText())
-        return wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32;
+        return wasm::R_WASM_FUNCTION_OFFSET_I32;
       else if (!Section->isWasmData())
-        return wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32;
+        return wasm::R_WASM_SECTION_OFFSET_I32;
     }
-    return wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32;
-  case FK_Data_8:
-    llvm_unreachable("FK_Data_8 not implemented yet");
+    return wasm::R_WASM_MEMORY_ADDR_I32;
   default:
     llvm_unreachable("unimplemented fixup kind");
   }
diff --git a/lib/Target/WebAssembly/README.txt b/lib/Target/WebAssembly/README.txt
index a154b4bf7ea8..ef3f5aaf7d33 100644
--- a/lib/Target/WebAssembly/README.txt
+++ b/lib/Target/WebAssembly/README.txt
@@ -14,7 +14,7 @@ can run in browsers and other environments. For more information, see the
 Emscripten documentation in general, and this page in particular:
 
   * https://github.com/kripken/emscripten/wiki/New-WebAssembly-Backend
- 
+
 Rust provides WebAssembly support integrated into Cargo. There are two
 main options:
  - wasm32-unknown-unknown, which provides a relatively minimal environment
diff --git a/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp b/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
index f7a417c0ed49..e4afe2bb2830 100644
--- a/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
+++ b/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyTargetInfo.cpp - WebAssembly Target Implementation -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -12,8 +11,7 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "llvm/ADT/Triple.h"
+#include "TargetInfo/WebAssemblyTargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h b/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h
new file mode 100644
index 000000000000..a7427f78c72c
--- /dev/null
+++ b/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h
@@ -0,0 +1,26 @@
+//===-- WebAssemblyTargetInfo.h - WebAssembly Target Impl -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file registers the WebAssembly target.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_TARGETINFO_WEBASSEMBLYTARGETINFO_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_TARGETINFO_WEBASSEMBLYTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheWebAssemblyTarget32();
+Target &getTheWebAssemblyTarget64();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_WEBASSEMBLY_TARGETINFO_WEBASSEMBLYTARGETINFO_H
diff --git a/lib/Target/WebAssembly/WebAssembly.h b/lib/Target/WebAssembly/WebAssembly.h
index 45145c0a6527..fcbd0a5082ff 100644
--- a/lib/Target/WebAssembly/WebAssembly.h
+++ b/lib/Target/WebAssembly/WebAssembly.h
@@ -1,9 +1,8 @@
 //===-- WebAssembly.h - Top-level interface for WebAssembly  ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -39,18 +38,17 @@ FunctionPass *createWebAssemblyArgumentMove();
 FunctionPass *createWebAssemblySetP2AlignOperands();
 
 // Late passes.
-FunctionPass *createWebAssemblyEHRestoreStackPointer();
 FunctionPass *createWebAssemblyReplacePhysRegs();
 FunctionPass *createWebAssemblyPrepareForLiveIntervals();
 FunctionPass *createWebAssemblyOptimizeLiveIntervals();
 FunctionPass *createWebAssemblyMemIntrinsicResults();
 FunctionPass *createWebAssemblyRegStackify();
 FunctionPass *createWebAssemblyRegColoring();
-FunctionPass *createWebAssemblyExplicitLocals();
 FunctionPass *createWebAssemblyFixIrreducibleControlFlow();
 FunctionPass *createWebAssemblyLateEHPrepare();
 FunctionPass *createWebAssemblyCFGSort();
 FunctionPass *createWebAssemblyCFGStackify();
+FunctionPass *createWebAssemblyExplicitLocals();
 FunctionPass *createWebAssemblyLowerBrUnless();
 FunctionPass *createWebAssemblyRegNumbering();
 FunctionPass *createWebAssemblyPeephole();
@@ -64,19 +62,18 @@ void initializeFixFunctionBitcastsPass(PassRegistry &);
 void initializeOptimizeReturnedPass(PassRegistry &);
 void initializeWebAssemblyArgumentMovePass(PassRegistry &);
 void initializeWebAssemblySetP2AlignOperandsPass(PassRegistry &);
-void initializeWebAssemblyEHRestoreStackPointerPass(PassRegistry &);
 void initializeWebAssemblyReplacePhysRegsPass(PassRegistry &);
 void initializeWebAssemblyPrepareForLiveIntervalsPass(PassRegistry &);
 void initializeWebAssemblyOptimizeLiveIntervalsPass(PassRegistry &);
 void initializeWebAssemblyMemIntrinsicResultsPass(PassRegistry &);
 void initializeWebAssemblyRegStackifyPass(PassRegistry &);
 void initializeWebAssemblyRegColoringPass(PassRegistry &);
-void initializeWebAssemblyExplicitLocalsPass(PassRegistry &);
 void initializeWebAssemblyFixIrreducibleControlFlowPass(PassRegistry &);
 void initializeWebAssemblyLateEHPreparePass(PassRegistry &);
 void initializeWebAssemblyExceptionInfoPass(PassRegistry &);
 void initializeWebAssemblyCFGSortPass(PassRegistry &);
 void initializeWebAssemblyCFGStackifyPass(PassRegistry &);
+void initializeWebAssemblyExplicitLocalsPass(PassRegistry &);
 void initializeWebAssemblyLowerBrUnlessPass(PassRegistry &);
 void initializeWebAssemblyRegNumberingPass(PassRegistry &);
 void initializeWebAssemblyPeepholePass(PassRegistry &);
diff --git a/lib/Target/WebAssembly/WebAssembly.td b/lib/Target/WebAssembly/WebAssembly.td
index 6b218f8aa880..b0b8a9b996a3 100644
--- a/lib/Target/WebAssembly/WebAssembly.td
+++ b/lib/Target/WebAssembly/WebAssembly.td
@@ -1,9 +1,8 @@
 //- WebAssembly.td - Describe the WebAssembly Target Machine --*- tablegen -*-//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -34,6 +33,7 @@ def FeatureUnimplementedSIMD128 :
 
 def FeatureAtomics : SubtargetFeature<"atomics", "HasAtomics", "true",
                                       "Enable Atomics">;
+
 def FeatureNontrappingFPToInt :
       SubtargetFeature<"nontrapping-fptoint",
                        "HasNontrappingFPToInt", "true",
@@ -44,10 +44,28 @@ def FeatureSignExt :
                        "HasSignExt", "true",
                        "Enable sign extension operators">;
 
+def FeatureTailCall :
+      SubtargetFeature<"tail-call",
+                       "HasTailCall", "true",
+                       "Enable tail call instructions">;
+
 def FeatureExceptionHandling :
       SubtargetFeature<"exception-handling", "HasExceptionHandling", "true",
                        "Enable Wasm exception handling">;
 
+def FeatureBulkMemory :
+      SubtargetFeature<"bulk-memory", "HasBulkMemory", "true",
+                       "Enable bulk memory operations">;
+
+def FeatureMultivalue :
+      SubtargetFeature<"multivalue",
+                       "HasMultivalue", "true",
+                       "Enable multivalue blocks, instructions, and functions">;
+
+def FeatureMutableGlobals :
+      SubtargetFeature<"mutable-globals", "HasMutableGlobals", "true",
+                       "Enable mutable globals">;
+
 //===----------------------------------------------------------------------===//
 // Architectures.
 //===----------------------------------------------------------------------===//
@@ -79,7 +97,8 @@ def : ProcessorModel<"generic", NoSchedModel, []>;
 // Latest and greatest experimental version of WebAssembly. Bugs included!
 def : ProcessorModel<"bleeding-edge", NoSchedModel,
                       [FeatureSIMD128, FeatureAtomics,
-                       FeatureNontrappingFPToInt, FeatureSignExt]>;
+                       FeatureNontrappingFPToInt, FeatureSignExt,
+                       FeatureMutableGlobals]>;
 
 //===----------------------------------------------------------------------===//
 // Target Declaration
diff --git a/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp b/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
index e49e2b67f435..b7a701f15782 100644
--- a/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyAddMissingPrototypes.cpp - Fix prototypeless functions -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -79,32 +78,33 @@ bool WebAssemblyAddMissingPrototypes::runOnModule(Module &M) {
       report_fatal_error(
           "Functions with 'no-prototype' attribute must take varargs: " +
           F.getName());
-    if (F.getFunctionType()->getNumParams() != 0)
-      report_fatal_error(
-          "Functions with 'no-prototype' attribute should not have params: " +
-          F.getName());
+    unsigned NumParams = F.getFunctionType()->getNumParams();
+    if (NumParams != 0) {
+      if (!(NumParams == 1 && F.arg_begin()->hasStructRetAttr()))
+        report_fatal_error("Functions with 'no-prototype' attribute should "
+                           "not have params: " +
+                           F.getName());
+    }
 
     // Create a function prototype based on the first call site (first bitcast)
     // that we find.
     FunctionType *NewType = nullptr;
-    Function *NewF = nullptr;
     for (Use &U : F.uses()) {
       LLVM_DEBUG(dbgs() << "prototype-less use: " << F.getName() << "\n");
+      LLVM_DEBUG(dbgs() << *U.getUser() << "\n");
       if (auto *BC = dyn_cast<BitCastOperator>(U.getUser())) {
         if (auto *DestType = dyn_cast<FunctionType>(
                 BC->getDestTy()->getPointerElementType())) {
           if (!NewType) {
             // Create a new function with the correct type
             NewType = DestType;
-            NewF = Function::Create(NewType, F.getLinkage(), F.getName());
-            NewF->setAttributes(F.getAttributes());
-            NewF->removeFnAttr("no-prototype");
-          } else {
-            if (NewType != DestType) {
-              report_fatal_error("Prototypeless function used with "
-                                 "conflicting signatures: " +
-                                 F.getName());
-            }
+            LLVM_DEBUG(dbgs() << "found function type: " << *NewType << "\n");
+          } else if (NewType != DestType) {
+            errs() << "warning: prototype-less function used with "
+                      "conflicting signatures: "
+                   << F.getName() << "\n";
+            LLVM_DEBUG(dbgs() << "  " << *DestType << "\n");
+            LLVM_DEBUG(dbgs() << "  "<<  *NewType << "\n");
           }
         }
       }
@@ -114,47 +114,30 @@ bool WebAssemblyAddMissingPrototypes::runOnModule(Module &M) {
       LLVM_DEBUG(
           dbgs() << "could not derive a function prototype from usage: " +
                         F.getName() + "\n");
-      continue;
+      // We could not derive a type for this function.  In this case strip
+      // the isVarArg and make it a simple zero-arg function.  This has more
+      // chance of being correct.  The current signature of (...) is illegal in
+      // C since it doesn't have any arguments before the "...", we this at
+      // least makes it possible for this symbol to be resolved by the linker.
+      NewType = FunctionType::get(F.getFunctionType()->getReturnType(), false);
     }
 
-    SmallVector<Instruction *, 4> DeadInsts;
-
-    for (Use &US : F.uses()) {
-      User *U = US.getUser();
-      if (auto *BC = dyn_cast<BitCastOperator>(U)) {
-        if (auto *Inst = dyn_cast<BitCastInst>(U)) {
-          // Replace with a new bitcast
-          IRBuilder<> Builder(Inst);
-          Value *NewCast = Builder.CreatePointerCast(NewF, BC->getDestTy());
-          Inst->replaceAllUsesWith(NewCast);
-          DeadInsts.push_back(Inst);
-        } else if (auto *Const = dyn_cast<ConstantExpr>(U)) {
-          Constant *NewConst =
-              ConstantExpr::getPointerCast(NewF, BC->getDestTy());
-          Const->replaceAllUsesWith(NewConst);
-        } else {
-          dbgs() << *U->getType() << "\n";
-#ifndef NDEBUG
-          U->dump();
-#endif
-          report_fatal_error("unexpected use of prototypeless function: " +
-                             F.getName() + "\n");
-        }
-      }
-    }
-
-    for (auto I : DeadInsts)
-      I->eraseFromParent();
+    Function *NewF =
+        Function::Create(NewType, F.getLinkage(), F.getName() + ".fixed_sig");
+    NewF->setAttributes(F.getAttributes());
+    NewF->removeFnAttr("no-prototype");
     Replacements.emplace_back(&F, NewF);
   }
 
-
-  // Finally replace the old function declarations with the new ones
   for (auto &Pair : Replacements) {
-    Function *Old = Pair.first;
-    Function *New = Pair.second;
-    Old->eraseFromParent();
-    M.getFunctionList().push_back(New);
+    Function *OldF = Pair.first;
+    Function *NewF = Pair.second;
+    std::string Name = OldF->getName();
+    M.getFunctionList().push_back(NewF);
+    OldF->replaceAllUsesWith(
+        ConstantExpr::getPointerBitCastOrAddrSpaceCast(NewF, OldF->getType()));
+    OldF->eraseFromParent();
+    NewF->setName(Name);
   }
 
   return !Replacements.empty();
diff --git a/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp b/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
index 7c8a631cde8a..02f5cc6da77c 100644
--- a/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyArgumentMove.cpp - Argument instruction moving ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -79,7 +78,7 @@ bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) {
 
   // Look for the first NonArg instruction.
   for (MachineInstr &MI : EntryMBB) {
-    if (!WebAssembly::isArgument(MI)) {
+    if (!WebAssembly::isArgument(MI.getOpcode())) {
       InsertPt = MI;
       break;
     }
@@ -88,7 +87,7 @@ bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) {
   // Now move any argument instructions later in the block
   // to before our first NonArg instruction.
   for (MachineInstr &MI : llvm::make_range(InsertPt, EntryMBB.end())) {
-    if (WebAssembly::isArgument(MI)) {
+    if (WebAssembly::isArgument(MI.getOpcode())) {
       EntryMBB.insert(InsertPt, MI.removeFromParent());
       Changed = true;
     }
diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index c4f03dfa7f9e..7f9d41da3978 100644
--- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyAsmPrinter.cpp - WebAssembly LLVM assembly writer ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -15,21 +14,27 @@
 //===----------------------------------------------------------------------===//
 
 #include "WebAssemblyAsmPrinter.h"
-#include "InstPrinter/WebAssemblyInstPrinter.h"
+#include "MCTargetDesc/WebAssemblyInstPrinter.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "MCTargetDesc/WebAssemblyTargetStreamer.h"
+#include "TargetInfo/WebAssemblyTargetInfo.h"
 #include "WebAssembly.h"
 #include "WebAssemblyMCInstLower.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblyRegisterInfo.h"
+#include "WebAssemblyTargetMachine.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/BinaryFormat/Wasm.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionWasm.h"
 #include "llvm/MC/MCStreamer.h"
@@ -38,10 +43,13 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+
 using namespace llvm;
 
 #define DEBUG_TYPE "asm-printer"
 
+extern cl::opt<bool> WasmKeepRegisters;
+
 //===----------------------------------------------------------------------===//
 // Helpers.
 //===----------------------------------------------------------------------===//
@@ -92,11 +100,11 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
     if (F.isDeclarationForLinker() && !F.isIntrinsic()) {
       SmallVector<MVT, 4> Results;
       SmallVector<MVT, 4> Params;
-      ComputeSignatureVTs(F.getFunctionType(), F, TM, Params, Results);
+      computeSignatureVTs(F.getFunctionType(), F, TM, Params, Results);
       auto *Sym = cast<MCSymbolWasm>(getSymbol(&F));
       Sym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
       if (!Sym->getSignature()) {
-        auto Signature = SignatureFromMVTs(Results, Params);
+        auto Signature = signatureFromMVTs(Results, Params);
         Sym->setSignature(Signature.get());
         addSignature(std::move(Signature));
       }
@@ -111,9 +119,16 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
           F.hasFnAttribute("wasm-import-module")) {
         StringRef Name =
             F.getFnAttribute("wasm-import-module").getValueAsString();
-        Sym->setModuleName(Name);
+        Sym->setImportModule(Name);
         getTargetStreamer()->emitImportModule(Sym, Name);
       }
+      if (TM.getTargetTriple().isOSBinFormatWasm() &&
+          F.hasFnAttribute("wasm-import-name")) {
+        StringRef Name =
+            F.getFnAttribute("wasm-import-name").getValueAsString();
+        Sym->setImportName(Name);
+        getTargetStreamer()->emitImportName(Sym, Name);
+      }
     }
   }
 
@@ -129,7 +144,7 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
 
   if (const NamedMDNode *Named = M.getNamedMetadata("wasm.custom_sections")) {
     for (const Metadata *MD : Named->operands()) {
-      const MDTuple *Tuple = dyn_cast<MDTuple>(MD);
+      const auto *Tuple = dyn_cast<MDTuple>(MD);
       if (!Tuple || Tuple->getNumOperands() != 2)
         continue;
       const MDString *Name = dyn_cast<MDString>(Tuple->getOperand(0));
@@ -139,13 +154,117 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
 
       OutStreamer->PushSection();
       std::string SectionName = (".custom_section." + Name->getString()).str();
-      MCSectionWasm *mySection =
+      MCSectionWasm *MySection =
           OutContext.getWasmSection(SectionName, SectionKind::getMetadata());
-      OutStreamer->SwitchSection(mySection);
+      OutStreamer->SwitchSection(MySection);
       OutStreamer->EmitBytes(Contents->getString());
       OutStreamer->PopSection();
     }
   }
+
+  EmitProducerInfo(M);
+  EmitTargetFeatures(M);
+}
+
+void WebAssemblyAsmPrinter::EmitProducerInfo(Module &M) {
+  llvm::SmallVector<std::pair<std::string, std::string>, 4> Languages;
+  if (const NamedMDNode *Debug = M.getNamedMetadata("llvm.dbg.cu")) {
+    llvm::SmallSet<StringRef, 4> SeenLanguages;
+    for (size_t I = 0, E = Debug->getNumOperands(); I < E; ++I) {
+      const auto *CU = cast<DICompileUnit>(Debug->getOperand(I));
+      StringRef Language = dwarf::LanguageString(CU->getSourceLanguage());
+      Language.consume_front("DW_LANG_");
+      if (SeenLanguages.insert(Language).second)
+        Languages.emplace_back(Language.str(), "");
+    }
+  }
+
+  llvm::SmallVector<std::pair<std::string, std::string>, 4> Tools;
+  if (const NamedMDNode *Ident = M.getNamedMetadata("llvm.ident")) {
+    llvm::SmallSet<StringRef, 4> SeenTools;
+    for (size_t I = 0, E = Ident->getNumOperands(); I < E; ++I) {
+      const auto *S = cast<MDString>(Ident->getOperand(I)->getOperand(0));
+      std::pair<StringRef, StringRef> Field = S->getString().split("version");
+      StringRef Name = Field.first.trim();
+      StringRef Version = Field.second.trim();
+      if (SeenTools.insert(Name).second)
+        Tools.emplace_back(Name.str(), Version.str());
+    }
+  }
+
+  int FieldCount = int(!Languages.empty()) + int(!Tools.empty());
+  if (FieldCount != 0) {
+    MCSectionWasm *Producers = OutContext.getWasmSection(
+        ".custom_section.producers", SectionKind::getMetadata());
+    OutStreamer->PushSection();
+    OutStreamer->SwitchSection(Producers);
+    OutStreamer->EmitULEB128IntValue(FieldCount);
+    for (auto &Producers : {std::make_pair("language", &Languages),
+            std::make_pair("processed-by", &Tools)}) {
+      if (Producers.second->empty())
+        continue;
+      OutStreamer->EmitULEB128IntValue(strlen(Producers.first));
+      OutStreamer->EmitBytes(Producers.first);
+      OutStreamer->EmitULEB128IntValue(Producers.second->size());
+      for (auto &Producer : *Producers.second) {
+        OutStreamer->EmitULEB128IntValue(Producer.first.size());
+        OutStreamer->EmitBytes(Producer.first);
+        OutStreamer->EmitULEB128IntValue(Producer.second.size());
+        OutStreamer->EmitBytes(Producer.second);
+      }
+    }
+    OutStreamer->PopSection();
+  }
+}
+
+void WebAssemblyAsmPrinter::EmitTargetFeatures(Module &M) {
+  struct FeatureEntry {
+    uint8_t Prefix;
+    StringRef Name;
+  };
+
+  // Read target features and linkage policies from module metadata
+  SmallVector<FeatureEntry, 4> EmittedFeatures;
+  for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) {
+    std::string MDKey = (StringRef("wasm-feature-") + KV.Key).str();
+    Metadata *Policy = M.getModuleFlag(MDKey);
+    if (Policy == nullptr)
+      continue;
+
+    FeatureEntry Entry;
+    Entry.Prefix = 0;
+    Entry.Name = KV.Key;
+
+    if (auto *MD = cast<ConstantAsMetadata>(Policy))
+      if (auto *I = cast<ConstantInt>(MD->getValue()))
+        Entry.Prefix = I->getZExtValue();
+
+    // Silently ignore invalid metadata
+    if (Entry.Prefix != wasm::WASM_FEATURE_PREFIX_USED &&
+        Entry.Prefix != wasm::WASM_FEATURE_PREFIX_REQUIRED &&
+        Entry.Prefix != wasm::WASM_FEATURE_PREFIX_DISALLOWED)
+      continue;
+
+    EmittedFeatures.push_back(Entry);
+  }
+
+  if (EmittedFeatures.size() == 0)
+    return;
+
+  // Emit features and linkage policies into the "target_features" section
+  MCSectionWasm *FeaturesSection = OutContext.getWasmSection(
+      ".custom_section.target_features", SectionKind::getMetadata());
+  OutStreamer->PushSection();
+  OutStreamer->SwitchSection(FeaturesSection);
+
+  OutStreamer->EmitULEB128IntValue(EmittedFeatures.size());
+  for (auto &F : EmittedFeatures) {
+    OutStreamer->EmitIntValue(F.Prefix, 1);
+    OutStreamer->EmitULEB128IntValue(F.Name.size());
+    OutStreamer->EmitBytes(F.Name);
+  }
+
+  OutStreamer->PopSection();
 }
 
 void WebAssemblyAsmPrinter::EmitConstantPool() {
@@ -161,8 +280,8 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
   const Function &F = MF->getFunction();
   SmallVector<MVT, 1> ResultVTs;
   SmallVector<MVT, 4> ParamVTs;
-  ComputeSignatureVTs(F.getFunctionType(), F, TM, ParamVTs, ResultVTs);
-  auto Signature = SignatureFromMVTs(ResultVTs, ParamVTs);
+  computeSignatureVTs(F.getFunctionType(), F, TM, ParamVTs, ResultVTs);
+  auto Signature = signatureFromMVTs(ResultVTs, ParamVTs);
   auto *WasmSym = cast<MCSymbolWasm>(CurrentFnSym);
   WasmSym->setSignature(Signature.get());
   addSignature(std::move(Signature));
@@ -180,7 +299,7 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
   }
 
   SmallVector<wasm::ValType, 16> Locals;
-  ValTypesFromMVTs(MFI->getLocals(), Locals);
+  valTypesFromMVTs(MFI->getLocals(), Locals);
   getTargetStreamer()->emitLocal(Locals);
 
   AsmPrinter::EmitFunctionBodyStart();
@@ -250,34 +369,34 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       OutStreamer->AddBlankLine();
     }
     break;
+  case WebAssembly::COMPILER_FENCE:
+    // This is a compiler barrier that prevents instruction reordering during
+    // backend compilation, and should not be emitted.
+    break;
+  case WebAssembly::EXTRACT_EXCEPTION_I32:
+  case WebAssembly::EXTRACT_EXCEPTION_I32_S:
+    // These are pseudo instructions that simulates popping values from stack.
+    // We print these only when we have -wasm-keep-registers on for assembly
+    // readability.
+    if (!WasmKeepRegisters)
+      break;
+    LLVM_FALLTHROUGH;
   default: {
     WebAssemblyMCInstLower MCInstLowering(OutContext, *this);
     MCInst TmpInst;
-    MCInstLowering.Lower(MI, TmpInst);
+    MCInstLowering.lower(MI, TmpInst);
     EmitToStreamer(*OutStreamer, TmpInst);
     break;
   }
   }
 }
 
-const MCExpr *WebAssemblyAsmPrinter::lowerConstant(const Constant *CV) {
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
-    if (GV->getValueType()->isFunctionTy()) {
-      return MCSymbolRefExpr::create(
-          getSymbol(GV), MCSymbolRefExpr::VK_WebAssembly_FUNCTION, OutContext);
-    }
-  return AsmPrinter::lowerConstant(CV);
-}
-
 bool WebAssemblyAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
-                                            unsigned OpNo, unsigned AsmVariant,
+                                            unsigned OpNo,
                                             const char *ExtraCode,
                                             raw_ostream &OS) {
-  if (AsmVariant != 0)
-    report_fatal_error("There are no defined alternate asm variants");
-
   // First try the generic code, which knows about modifiers like 'c' and 'n'.
-  if (!AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS))
+  if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, OS))
     return false;
 
   if (!ExtraCode) {
@@ -293,8 +412,7 @@ bool WebAssemblyAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
       OS << regToString(MO);
       return false;
     case MachineOperand::MO_GlobalAddress:
-      getSymbol(MO.getGlobal())->print(OS, MAI);
-      printOffset(MO.getOffset(), OS);
+      PrintSymbolOperand(MO, OS);
       return false;
     case MachineOperand::MO_ExternalSymbol:
       GetExternalSymbolSymbol(MO.getSymbolName())->print(OS, MAI);
@@ -313,19 +431,15 @@ bool WebAssemblyAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
 
 bool WebAssemblyAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
                                                   unsigned OpNo,
-                                                  unsigned AsmVariant,
                                                   const char *ExtraCode,
                                                   raw_ostream &OS) {
-  if (AsmVariant != 0)
-    report_fatal_error("There are no defined alternate asm variants");
-
   // The current approach to inline asm is that "r" constraints are expressed
   // as local indices, rather than values on the operand stack. This simplifies
   // using "r" as it eliminates the need to push and pop the values in a
   // particular order, however it also makes it impossible to have an "m"
   // constraint. So we don't support it.
 
-  return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, AsmVariant, ExtraCode, OS);
+  return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, ExtraCode, OS);
 }
 
 // Force static initialization.
diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
index f6cb5610bad3..4e55c81dec38 100644
--- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
+++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
@@ -1,9 +1,8 @@
 // WebAssemblyAsmPrinter.h - WebAssembly implementation of AsmPrinter-*- C++ -*-
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -59,17 +58,16 @@ public:
   //===------------------------------------------------------------------===//
 
   void EmitEndOfAsmFile(Module &M) override;
+  void EmitProducerInfo(Module &M);
+  void EmitTargetFeatures(Module &M);
   void EmitJumpTableInfo() override;
   void EmitConstantPool() override;
   void EmitFunctionBodyStart() override;
   void EmitInstruction(const MachineInstr *MI) override;
-  const MCExpr *lowerConstant(const Constant *CV) override;
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                       unsigned AsmVariant, const char *ExtraCode,
-                       raw_ostream &OS) override;
+                       const char *ExtraCode, raw_ostream &OS) override;
   bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                             unsigned AsmVariant, const char *ExtraCode,
-                             raw_ostream &OS) override;
+                             const char *ExtraCode, raw_ostream &OS) override;
 
   MVT getRegType(unsigned RegNo) const;
   std::string regToString(const MachineOperand &MO);
diff --git a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
index fc827e9d5780..4c5d0192fc28 100644
--- a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyCFGSort.cpp - CFG Sorting ------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -35,6 +34,14 @@ using namespace llvm;
 
 #define DEBUG_TYPE "wasm-cfg-sort"
 
+// Option to disable EH pad first sorting. Only for testing unwind destination
+// mismatches in CFGStackify.
+static cl::opt<bool> WasmDisableEHPadSort(
+    "wasm-disable-ehpad-sort", cl::ReallyHidden,
+    cl::desc(
+        "WebAssembly: Disable EH pad-first sort order. Testing purpose only."),
+    cl::init(false));
+
 namespace {
 
 // Wrapper for loops and exceptions
@@ -133,7 +140,7 @@ FunctionPass *llvm::createWebAssemblyCFGSort() {
   return new WebAssemblyCFGSort();
 }
 
-static void MaybeUpdateTerminator(MachineBasicBlock *MBB) {
+static void maybeUpdateTerminator(MachineBasicBlock *MBB) {
 #ifndef NDEBUG
   bool AnyBarrier = false;
 #endif
@@ -188,10 +195,12 @@ namespace {
 struct CompareBlockNumbers {
   bool operator()(const MachineBasicBlock *A,
                   const MachineBasicBlock *B) const {
-    if (A->isEHPad() && !B->isEHPad())
-      return false;
-    if (!A->isEHPad() && B->isEHPad())
-      return true;
+    if (!WasmDisableEHPadSort) {
+      if (A->isEHPad() && !B->isEHPad())
+        return false;
+      if (!A->isEHPad() && B->isEHPad())
+        return true;
+    }
 
     return A->getNumber() > B->getNumber();
   }
@@ -200,11 +209,12 @@ struct CompareBlockNumbers {
 struct CompareBlockNumbersBackwards {
   bool operator()(const MachineBasicBlock *A,
                   const MachineBasicBlock *B) const {
-    // We give a higher priority to an EH pad
-    if (A->isEHPad() && !B->isEHPad())
-      return false;
-    if (!A->isEHPad() && B->isEHPad())
-      return true;
+    if (!WasmDisableEHPadSort) {
+      if (A->isEHPad() && !B->isEHPad())
+        return false;
+      if (!A->isEHPad() && B->isEHPad())
+        return true;
+    }
 
     return A->getNumber() < B->getNumber();
   }
@@ -228,7 +238,7 @@ struct Entry {
 /// interrupted by blocks not dominated by their header.
 /// TODO: There are many opportunities for improving the heuristics here.
 /// Explore them.
-static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
+static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
                        const WebAssemblyExceptionInfo &WEI,
                        const MachineDominatorTree &MDT) {
   // Prepare for a topological sort: Record the number of predecessors each
@@ -260,10 +270,10 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
                 CompareBlockNumbersBackwards>
       Ready;
 
-  RegionInfo SUI(MLI, WEI);
+  RegionInfo RI(MLI, WEI);
   SmallVector<Entry, 4> Entries;
   for (MachineBasicBlock *MBB = &MF.front();;) {
-    const Region *R = SUI.getRegionFor(MBB);
+    const Region *R = RI.getRegionFor(MBB);
     if (R) {
       // If MBB is a region header, add it to the active region list. We can't
       // put any blocks that it doesn't dominate until we see the end of the
@@ -320,7 +330,7 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
     if (!Next) {
       // If there are no more blocks to process, we're done.
       if (Ready.empty()) {
-        MaybeUpdateTerminator(MBB);
+        maybeUpdateTerminator(MBB);
         break;
       }
       for (;;) {
@@ -338,7 +348,7 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
     }
     // Move the next block into place and iterate.
     Next->moveAfter(MBB);
-    MaybeUpdateTerminator(MBB);
+    maybeUpdateTerminator(MBB);
     MBB = Next;
   }
   assert(Entries.empty() && "Active sort region list not finished");
@@ -354,7 +364,7 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
 
   for (auto &MBB : MF) {
     assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative.");
-    const Region *Region = SUI.getRegionFor(&MBB);
+    const Region *Region = RI.getRegionFor(&MBB);
 
     if (Region && &MBB == Region->getHeader()) {
       if (Region->isLoop()) {
@@ -379,7 +389,7 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
       for (auto Pred : MBB.predecessors())
         assert(Pred->getNumber() < MBB.getNumber() &&
                "Non-loop-header predecessors should be topologically sorted");
-      assert(OnStack.count(SUI.getRegionFor(&MBB)) &&
+      assert(OnStack.count(RI.getRegionFor(&MBB)) &&
              "Blocks must be nested in their regions");
     }
     while (OnStack.size() > 1 && &MBB == WebAssembly::getBottom(OnStack.back()))
@@ -404,7 +414,7 @@ bool WebAssemblyCFGSort::runOnMachineFunction(MachineFunction &MF) {
   MF.getRegInfo().invalidateLiveness();
 
   // Sort the blocks, with contiguous sort regions.
-  SortBlocks(MF, MLI, WEI, MDT);
+  sortBlocks(MF, MLI, WEI, MDT);
 
   return true;
 }
diff --git a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index f8f5f4040c86..e6bfc5226e2e 100644
--- a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyCFGStackify.cpp - CFG Stackification -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -22,26 +21,21 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "WebAssembly.h"
 #include "WebAssemblyExceptionInfo.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
 #include "WebAssemblyUtilities.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/WasmEHFuncInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "wasm-cfg-stackify"
 
+STATISTIC(NumUnwindMismatches, "Number of EH pad unwind mismatches found");
+
 namespace {
 class WebAssemblyCFGStackify final : public MachineFunctionPass {
   StringRef getPassName() const override { return "WebAssembly CFG Stackify"; }
@@ -60,10 +54,13 @@ class WebAssemblyCFGStackify final : public MachineFunctionPass {
   // over scoped regions when walking blocks.
   SmallVector<MachineBasicBlock *, 8> ScopeTops;
 
+  // Placing markers.
   void placeMarkers(MachineFunction &MF);
   void placeBlockMarker(MachineBasicBlock &MBB);
   void placeLoopMarker(MachineBasicBlock &MBB);
   void placeTryMarker(MachineBasicBlock &MBB);
+  void removeUnnecessaryInstrs(MachineFunction &MF);
+  bool fixUnwindMismatches(MachineFunction &MF);
   void rewriteDepthImmediates(MachineFunction &MF);
   void fixEndsAtEndOfFunction(MachineFunction &MF);
 
@@ -75,16 +72,28 @@ class WebAssemblyCFGStackify final : public MachineFunctionPass {
   DenseMap<const MachineInstr *, MachineBasicBlock *> TryToEHPad;
   // <EH pad, TRY marker> map
   DenseMap<const MachineBasicBlock *, MachineInstr *> EHPadToTry;
-  // <LOOP|TRY marker, Loop/exception bottom BB> map
-  DenseMap<const MachineInstr *, MachineBasicBlock *> BeginToBottom;
 
-  // Helper functions to register scope information created by marker
-  // instructions.
+  // There can be an appendix block at the end of each function, shared for:
+  // - creating a correct signature for fallthrough returns
+  // - target for rethrows that need to unwind to the caller, but are trapped
+  //   inside another try/catch
+  MachineBasicBlock *AppendixBB = nullptr;
+  MachineBasicBlock *getAppendixBlock(MachineFunction &MF) {
+    if (!AppendixBB) {
+      AppendixBB = MF.CreateMachineBasicBlock();
+      // Give it a fake predecessor so that AsmPrinter prints its label.
+      AppendixBB->addSuccessor(AppendixBB);
+      MF.push_back(AppendixBB);
+    }
+    return AppendixBB;
+  }
+
+  // Helper functions to register / unregister scope information created by
+  // marker instructions.
   void registerScope(MachineInstr *Begin, MachineInstr *End);
   void registerTryScope(MachineInstr *Begin, MachineInstr *End,
                         MachineBasicBlock *EHPad);
-
-  MachineBasicBlock *getBottom(const MachineInstr *Begin);
+  void unregisterScope(MachineInstr *Begin);
 
 public:
   static char ID; // Pass identification, replacement for typeid
@@ -96,7 +105,7 @@ public:
 
 char WebAssemblyCFGStackify::ID = 0;
 INITIALIZE_PASS(WebAssemblyCFGStackify, DEBUG_TYPE,
-                "Insert BLOCK and LOOP markers for WebAssembly scopes", false,
+                "Insert BLOCK/LOOP/TRY markers for WebAssembly scopes", false,
                 false)
 
 FunctionPass *llvm::createWebAssemblyCFGStackify() {
@@ -108,14 +117,12 @@ FunctionPass *llvm::createWebAssemblyCFGStackify() {
 /// code) for a branch instruction to both branch to a block and fallthrough
 /// to it, so we check the actual branch operands to see if there are any
 /// explicit mentions.
-static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred,
+static bool explicitlyBranchesTo(MachineBasicBlock *Pred,
                                  MachineBasicBlock *MBB) {
   for (MachineInstr &MI : Pred->terminators())
-    // Even if a rethrow takes a BB argument, it is not a branch
-    if (!WebAssembly::isRethrow(MI))
-      for (MachineOperand &MO : MI.explicit_operands())
-        if (MO.isMBB() && MO.getMBB() == MBB)
-          return true;
+    for (MachineOperand &MO : MI.explicit_operands())
+      if (MO.isMBB() && MO.getMBB() == MBB)
+        return true;
   return false;
 }
 
@@ -125,7 +132,7 @@ static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred,
 // ones that should go after the marker. In this function, AfterSet is only
 // used for sanity checking.
 static MachineBasicBlock::iterator
-GetEarliestInsertPos(MachineBasicBlock *MBB,
+getEarliestInsertPos(MachineBasicBlock *MBB,
                      const SmallPtrSet<const MachineInstr *, 4> &BeforeSet,
                      const SmallPtrSet<const MachineInstr *, 4> &AfterSet) {
   auto InsertPos = MBB->end();
@@ -149,7 +156,7 @@ GetEarliestInsertPos(MachineBasicBlock *MBB,
 // ones that should go after the marker. In this function, BeforeSet is only
 // used for sanity checking.
 static MachineBasicBlock::iterator
-GetLatestInsertPos(MachineBasicBlock *MBB,
+getLatestInsertPos(MachineBasicBlock *MBB,
                    const SmallPtrSet<const MachineInstr *, 4> &BeforeSet,
                    const SmallPtrSet<const MachineInstr *, 4> &AfterSet) {
   auto InsertPos = MBB->begin();
@@ -181,33 +188,25 @@ void WebAssemblyCFGStackify::registerTryScope(MachineInstr *Begin,
   EHPadToTry[EHPad] = Begin;
 }
 
-// Given a LOOP/TRY marker, returns its bottom BB. Use cached information if any
-// to prevent recomputation.
-MachineBasicBlock *
-WebAssemblyCFGStackify::getBottom(const MachineInstr *Begin) {
-  const auto &MLI = getAnalysis<MachineLoopInfo>();
-  const auto &WEI = getAnalysis<WebAssemblyExceptionInfo>();
-  if (BeginToBottom.count(Begin))
-    return BeginToBottom[Begin];
-  if (Begin->getOpcode() == WebAssembly::LOOP) {
-    MachineLoop *L = MLI.getLoopFor(Begin->getParent());
-    assert(L);
-    BeginToBottom[Begin] = WebAssembly::getBottom(L);
-  } else if (Begin->getOpcode() == WebAssembly::TRY) {
-    WebAssemblyException *WE = WEI.getExceptionFor(TryToEHPad[Begin]);
-    assert(WE);
-    BeginToBottom[Begin] = WebAssembly::getBottom(WE);
-  } else
-    assert(false);
-  return BeginToBottom[Begin];
+void WebAssemblyCFGStackify::unregisterScope(MachineInstr *Begin) {
+  assert(BeginToEnd.count(Begin));
+  MachineInstr *End = BeginToEnd[Begin];
+  assert(EndToBegin.count(End));
+  BeginToEnd.erase(Begin);
+  EndToBegin.erase(End);
+  MachineBasicBlock *EHPad = TryToEHPad.lookup(Begin);
+  if (EHPad) {
+    assert(EHPadToTry.count(EHPad));
+    TryToEHPad.erase(Begin);
+    EHPadToTry.erase(EHPad);
+  }
 }
 
 /// Insert a BLOCK marker for branches to MBB (if needed).
+// TODO Consider a more generalized way of handling block (and also loop and
+// try) signatures when we implement the multi-value proposal later.
 void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
-  // This should have been handled in placeTryMarker.
-  if (MBB.isEHPad())
-    return;
-
+  assert(!MBB.isEHPad());
   MachineFunction &MF = *MBB.getParent();
   auto &MDT = getAnalysis<MachineDominatorTree>();
   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
@@ -218,12 +217,20 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
   // which reduces overall stack height.
   MachineBasicBlock *Header = nullptr;
   bool IsBranchedTo = false;
+  bool IsBrOnExn = false;
+  MachineInstr *BrOnExn = nullptr;
   int MBBNumber = MBB.getNumber();
   for (MachineBasicBlock *Pred : MBB.predecessors()) {
     if (Pred->getNumber() < MBBNumber) {
       Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred;
-      if (ExplicitlyBranchesTo(Pred, &MBB))
+      if (explicitlyBranchesTo(Pred, &MBB)) {
         IsBranchedTo = true;
+        if (Pred->getFirstTerminator()->getOpcode() == WebAssembly::BR_ON_EXN) {
+          IsBrOnExn = true;
+          assert(!BrOnExn && "There should be only one br_on_exn per block");
+          BrOnExn = &*Pred->getFirstTerminator();
+        }
+      }
     }
   }
   if (!Header)
@@ -232,7 +239,7 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
     return;
 
   assert(&MBB != &MF.front() && "Header blocks shouldn't have predecessors");
-  MachineBasicBlock *LayoutPred = &*std::prev(MachineFunction::iterator(&MBB));
+  MachineBasicBlock *LayoutPred = MBB.getPrevNode();
 
   // If the nearest common dominator is inside a more deeply nested context,
   // walk out to the nearest scope which isn't more deeply nested.
@@ -240,7 +247,7 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
     if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) {
       if (ScopeTop->getNumber() > Header->getNumber()) {
         // Skip over an intervening scope.
-        I = std::next(MachineFunction::iterator(ScopeTop));
+        I = std::next(ScopeTop->getIterator());
       } else {
         // We found a scope level at an appropriate depth.
         Header = ScopeTop;
@@ -256,13 +263,12 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
   // Instructions that should go after the BLOCK.
   SmallPtrSet<const MachineInstr *, 4> AfterSet;
   for (const auto &MI : *Header) {
-    // If there is a previously placed LOOP/TRY marker and the bottom block of
-    // the loop/exception is above MBB, it should be after the BLOCK, because
-    // the loop/exception is nested in this block. Otherwise it should be before
-    // the BLOCK.
-    if (MI.getOpcode() == WebAssembly::LOOP ||
-        MI.getOpcode() == WebAssembly::TRY) {
-      if (MBB.getNumber() > getBottom(&MI)->getNumber())
+    // If there is a previously placed LOOP marker and the bottom block of the
+    // loop is above MBB, it should be after the BLOCK, because the loop is
+    // nested in this BLOCK. Otherwise it should be before the BLOCK.
+    if (MI.getOpcode() == WebAssembly::LOOP) {
+      auto *LoopBottom = BeginToEnd[&MI]->getParent()->getPrevNode();
+      if (MBB.getNumber() > LoopBottom->getNumber())
         AfterSet.insert(&MI);
 #ifndef NDEBUG
       else
@@ -270,9 +276,10 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
 #endif
     }
 
-    // All previously inserted BLOCK markers should be after the BLOCK because
-    // they are all nested blocks.
-    if (MI.getOpcode() == WebAssembly::BLOCK)
+    // All previously inserted BLOCK/TRY markers should be after the BLOCK
+    // because they are all nested blocks.
+    if (MI.getOpcode() == WebAssembly::BLOCK ||
+        MI.getOpcode() == WebAssembly::TRY)
       AfterSet.insert(&MI);
 
 #ifndef NDEBUG
@@ -300,11 +307,27 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
   }
 
   // Add the BLOCK.
-  auto InsertPos = GetLatestInsertPos(Header, BeforeSet, AfterSet);
+
+  // 'br_on_exn' extracts exnref object and pushes variable number of values
+  // depending on its tag. For C++ exception, its a single i32 value, and the
+  // generated code will be in the form of:
+  // block i32
+  //   br_on_exn 0, $__cpp_exception
+  //   rethrow
+  // end_block
+  WebAssembly::ExprType ReturnType = WebAssembly::ExprType::Void;
+  if (IsBrOnExn) {
+    const char *TagName = BrOnExn->getOperand(1).getSymbolName();
+    if (std::strcmp(TagName, "__cpp_exception") != 0)
+      llvm_unreachable("Only C++ exception is supported");
+    ReturnType = WebAssembly::ExprType::I32;
+  }
+
+  auto InsertPos = getLatestInsertPos(Header, BeforeSet, AfterSet);
   MachineInstr *Begin =
       BuildMI(*Header, InsertPos, Header->findDebugLoc(InsertPos),
               TII.get(WebAssembly::BLOCK))
-          .addImm(int64_t(WebAssembly::ExprType::Void));
+          .addImm(int64_t(ReturnType));
 
   // Decide where in Header to put the END_BLOCK.
   BeforeSet.clear();
@@ -333,7 +356,7 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
   }
 
   // Mark the end of the block.
-  InsertPos = GetEarliestInsertPos(&MBB, BeforeSet, AfterSet);
+  InsertPos = getEarliestInsertPos(&MBB, BeforeSet, AfterSet);
   MachineInstr *End = BuildMI(MBB, InsertPos, MBB.findPrevDebugLoc(InsertPos),
                               TII.get(WebAssembly::END_BLOCK));
   registerScope(Begin, End);
@@ -358,13 +381,10 @@ void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) {
   // The operand of a LOOP is the first block after the loop. If the loop is the
   // bottom of the function, insert a dummy block at the end.
   MachineBasicBlock *Bottom = WebAssembly::getBottom(Loop);
-  auto Iter = std::next(MachineFunction::iterator(Bottom));
+  auto Iter = std::next(Bottom->getIterator());
   if (Iter == MF.end()) {
-    MachineBasicBlock *Label = MF.CreateMachineBasicBlock();
-    // Give it a fake predecessor so that AsmPrinter prints its label.
-    Label->addSuccessor(Label);
-    MF.push_back(Label);
-    Iter = std::next(MachineFunction::iterator(Bottom));
+    getAppendixBlock(MF);
+    Iter = std::next(Bottom->getIterator());
   }
   MachineBasicBlock *AfterLoop = &*Iter;
 
@@ -383,7 +403,7 @@ void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) {
   }
 
   // Mark the beginning of the loop.
-  auto InsertPos = GetEarliestInsertPos(&MBB, BeforeSet, AfterSet);
+  auto InsertPos = getEarliestInsertPos(&MBB, BeforeSet, AfterSet);
   MachineInstr *Begin = BuildMI(MBB, InsertPos, MBB.findDebugLoc(InsertPos),
                                 TII.get(WebAssembly::LOOP))
                             .addImm(int64_t(WebAssembly::ExprType::Void));
@@ -400,8 +420,10 @@ void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) {
 
   // Mark the end of the loop (using arbitrary debug location that branched to
   // the loop end as its location).
-  InsertPos = GetEarliestInsertPos(AfterLoop, BeforeSet, AfterSet);
-  DebugLoc EndDL = (*AfterLoop->pred_rbegin())->findBranchDebugLoc();
+  InsertPos = getEarliestInsertPos(AfterLoop, BeforeSet, AfterSet);
+  DebugLoc EndDL = AfterLoop->pred_empty()
+                       ? DebugLoc()
+                       : (*AfterLoop->pred_rbegin())->findBranchDebugLoc();
   MachineInstr *End =
       BuildMI(*AfterLoop, InsertPos, EndDL, TII.get(WebAssembly::END_LOOP));
   registerScope(Begin, End);
@@ -414,14 +436,7 @@ void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) {
 }
 
 void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
-  if (!MBB.isEHPad())
-    return;
-
-  // catch_all terminate pad is grouped together with catch terminate pad and
-  // does not need a separate TRY and END_TRY marker.
-  if (WebAssembly::isCatchAllTerminatePad(MBB))
-    return;
-
+  assert(MBB.isEHPad());
   MachineFunction &MF = *MBB.getParent();
   auto &MDT = getAnalysis<MachineDominatorTree>();
   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
@@ -434,7 +449,7 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
   for (auto *Pred : MBB.predecessors()) {
     if (Pred->getNumber() < MBBNumber) {
       Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred;
-      assert(!ExplicitlyBranchesTo(Pred, &MBB) &&
+      assert(!explicitlyBranchesTo(Pred, &MBB) &&
              "Explicit branch to an EH pad!");
     }
   }
@@ -447,19 +462,15 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
   assert(WE);
   MachineBasicBlock *Bottom = WebAssembly::getBottom(WE);
 
-  auto Iter = std::next(MachineFunction::iterator(Bottom));
+  auto Iter = std::next(Bottom->getIterator());
   if (Iter == MF.end()) {
-    MachineBasicBlock *Label = MF.CreateMachineBasicBlock();
-    // Give it a fake predecessor so that AsmPrinter prints its label.
-    Label->addSuccessor(Label);
-    MF.push_back(Label);
-    Iter = std::next(MachineFunction::iterator(Bottom));
+    getAppendixBlock(MF);
+    Iter = std::next(Bottom->getIterator());
   }
-  MachineBasicBlock *AfterTry = &*Iter;
+  MachineBasicBlock *Cont = &*Iter;
 
-  assert(AfterTry != &MF.front());
-  MachineBasicBlock *LayoutPred =
-      &*std::prev(MachineFunction::iterator(AfterTry));
+  assert(Cont != &MF.front());
+  MachineBasicBlock *LayoutPred = Cont->getPrevNode();
 
   // If the nearest common dominator is inside a more deeply nested context,
   // walk out to the nearest scope which isn't more deeply nested.
@@ -467,7 +478,7 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
     if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) {
       if (ScopeTop->getNumber() > Header->getNumber()) {
         // Skip over an intervening scope.
-        I = std::next(MachineFunction::iterator(ScopeTop));
+        I = std::next(ScopeTop->getIterator());
       } else {
         // We found a scope level at an appropriate depth.
         Header = ScopeTop;
@@ -478,16 +489,17 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
 
   // Decide where in Header to put the TRY.
 
-  // Instructions that should go before the BLOCK.
+  // Instructions that should go before the TRY.
   SmallPtrSet<const MachineInstr *, 4> BeforeSet;
-  // Instructions that should go after the BLOCK.
+  // Instructions that should go after the TRY.
   SmallPtrSet<const MachineInstr *, 4> AfterSet;
   for (const auto &MI : *Header) {
-    // If there is a previously placed LOOP marker and the bottom block of
-    // the loop is above MBB, the LOOP should be after the TRY, because the
-    // loop is nested in this try. Otherwise it should be before the TRY.
+    // If there is a previously placed LOOP marker and the bottom block of the
+    // loop is above MBB, it should be after the TRY, because the loop is nested
+    // in this TRY. Otherwise it should be before the TRY.
     if (MI.getOpcode() == WebAssembly::LOOP) {
-      if (MBB.getNumber() > Bottom->getNumber())
+      auto *LoopBottom = BeginToEnd[&MI]->getParent()->getPrevNode();
+      if (MBB.getNumber() > LoopBottom->getNumber())
         AfterSet.insert(&MI);
 #ifndef NDEBUG
       else
@@ -495,14 +507,16 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
 #endif
     }
 
-    // All previously inserted TRY markers should be after the TRY because they
-    // are all nested trys.
-    if (MI.getOpcode() == WebAssembly::TRY)
+    // All previously inserted BLOCK/TRY markers should be after the TRY because
+    // they are all nested trys.
+    if (MI.getOpcode() == WebAssembly::BLOCK ||
+        MI.getOpcode() == WebAssembly::TRY)
       AfterSet.insert(&MI);
 
 #ifndef NDEBUG
-    // All END_(LOOP/TRY) markers should be before the TRY.
-    if (MI.getOpcode() == WebAssembly::END_LOOP ||
+    // All END_(BLOCK/LOOP/TRY) markers should be before the TRY.
+    if (MI.getOpcode() == WebAssembly::END_BLOCK ||
+        MI.getOpcode() == WebAssembly::END_LOOP ||
         MI.getOpcode() == WebAssembly::END_TRY)
       BeforeSet.insert(&MI);
 #endif
@@ -530,10 +544,16 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
   // throw.
   if (MBB.isPredecessor(Header)) {
     auto TermPos = Header->getFirstTerminator();
-    if (TermPos == Header->end() || !WebAssembly::isRethrow(*TermPos)) {
+    if (TermPos == Header->end() ||
+        TermPos->getOpcode() != WebAssembly::RETHROW) {
       for (const auto &MI : reverse(*Header)) {
         if (MI.isCall()) {
           AfterSet.insert(&MI);
+          // Possibly throwing calls are usually wrapped by EH_LABEL
+          // instructions. We don't want to split them and the call.
+          if (MI.getIterator() != Header->begin() &&
+              std::prev(MI.getIterator())->isEHLabel())
+            AfterSet.insert(&*std::prev(MI.getIterator()));
           break;
         }
       }
@@ -541,7 +561,7 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
   }
 
   // Add the TRY.
-  auto InsertPos = GetLatestInsertPos(Header, BeforeSet, AfterSet);
+  auto InsertPos = getLatestInsertPos(Header, BeforeSet, AfterSet);
   MachineInstr *Begin =
       BuildMI(*Header, InsertPos, Header->findDebugLoc(InsertPos),
               TII.get(WebAssembly::TRY))
@@ -550,10 +570,11 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
   // Decide where in Header to put the END_TRY.
   BeforeSet.clear();
   AfterSet.clear();
-  for (const auto &MI : *AfterTry) {
+  for (const auto &MI : *Cont) {
 #ifndef NDEBUG
-    // END_TRY should precede existing LOOP markers.
-    if (MI.getOpcode() == WebAssembly::LOOP)
+    // END_TRY should precede existing LOOP and BLOCK markers.
+    if (MI.getOpcode() == WebAssembly::LOOP ||
+        MI.getOpcode() == WebAssembly::BLOCK)
       AfterSet.insert(&MI);
 
     // All END_TRY markers placed earlier belong to exceptions that contains
@@ -567,31 +588,595 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
     // the END_TRY marker should go after that. Otherwise, the whole try-catch
     // is contained within this loop, so the END_TRY should go before that.
     if (MI.getOpcode() == WebAssembly::END_LOOP) {
-      if (EndToBegin[&MI]->getParent()->getNumber() >= Header->getNumber())
+      // For a LOOP to be after TRY, LOOP's BB should be after TRY's BB; if they
+      // are in the same BB, LOOP is always before TRY.
+      if (EndToBegin[&MI]->getParent()->getNumber() > Header->getNumber())
         BeforeSet.insert(&MI);
 #ifndef NDEBUG
       else
         AfterSet.insert(&MI);
 #endif
     }
+
+    // It is not possible for an END_BLOCK to be already in this block.
   }
 
   // Mark the end of the TRY.
-  InsertPos = GetEarliestInsertPos(AfterTry, BeforeSet, AfterSet);
+  InsertPos = getEarliestInsertPos(Cont, BeforeSet, AfterSet);
   MachineInstr *End =
-      BuildMI(*AfterTry, InsertPos, Bottom->findBranchDebugLoc(),
+      BuildMI(*Cont, InsertPos, Bottom->findBranchDebugLoc(),
               TII.get(WebAssembly::END_TRY));
   registerTryScope(Begin, End, &MBB);
 
-  // Track the farthest-spanning scope that ends at this point.
-  int Number = AfterTry->getNumber();
-  if (!ScopeTops[Number] ||
-      ScopeTops[Number]->getNumber() > Header->getNumber())
-    ScopeTops[Number] = Header;
+  // Track the farthest-spanning scope that ends at this point. We create two
+  // mappings: (BB with 'end_try' -> BB with 'try') and (BB with 'catch' -> BB
+  // with 'try'). We need to create 'catch' -> 'try' mapping here too because
+  // markers should not span across 'catch'. For example, this should not
+  // happen:
+  //
+  // try
+  //   block     --|  (X)
+  // catch         |
+  //   end_block --|
+  // end_try
+  for (int Number : {Cont->getNumber(), MBB.getNumber()}) {
+    if (!ScopeTops[Number] ||
+        ScopeTops[Number]->getNumber() > Header->getNumber())
+      ScopeTops[Number] = Header;
+  }
+}
+
+void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) {
+  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+
+  // When there is an unconditional branch right before a catch instruction and
+  // it branches to the end of end_try marker, we don't need the branch, because
+  // it there is no exception, the control flow transfers to that point anyway.
+  // bb0:
+  //   try
+  //     ...
+  //     br bb2      <- Not necessary
+  // bb1:
+  //   catch
+  //     ...
+  // bb2:
+  //   end
+  for (auto &MBB : MF) {
+    if (!MBB.isEHPad())
+      continue;
+
+    MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+    SmallVector<MachineOperand, 4> Cond;
+    MachineBasicBlock *EHPadLayoutPred = MBB.getPrevNode();
+    MachineBasicBlock *Cont = BeginToEnd[EHPadToTry[&MBB]]->getParent();
+    bool Analyzable = !TII.analyzeBranch(*EHPadLayoutPred, TBB, FBB, Cond);
+    if (Analyzable && ((Cond.empty() && TBB && TBB == Cont) ||
+                       (!Cond.empty() && FBB && FBB == Cont)))
+      TII.removeBranch(*EHPadLayoutPred);
+  }
+
+  // When there are block / end_block markers that overlap with try / end_try
+  // markers, and the block and try markers' return types are the same, the
+  // block /end_block markers are not necessary, because try / end_try markers
+  // also can serve as boundaries for branches.
+  // block         <- Not necessary
+  //   try
+  //     ...
+  //   catch
+  //     ...
+  //   end
+  // end           <- Not necessary
+  SmallVector<MachineInstr *, 32> ToDelete;
+  for (auto &MBB : MF) {
+    for (auto &MI : MBB) {
+      if (MI.getOpcode() != WebAssembly::TRY)
+        continue;
+
+      MachineInstr *Try = &MI, *EndTry = BeginToEnd[Try];
+      MachineBasicBlock *TryBB = Try->getParent();
+      MachineBasicBlock *Cont = EndTry->getParent();
+      int64_t RetType = Try->getOperand(0).getImm();
+      for (auto B = Try->getIterator(), E = std::next(EndTry->getIterator());
+           B != TryBB->begin() && E != Cont->end() &&
+           std::prev(B)->getOpcode() == WebAssembly::BLOCK &&
+           E->getOpcode() == WebAssembly::END_BLOCK &&
+           std::prev(B)->getOperand(0).getImm() == RetType;
+           --B, ++E) {
+        ToDelete.push_back(&*std::prev(B));
+        ToDelete.push_back(&*E);
+      }
+    }
+  }
+  for (auto *MI : ToDelete) {
+    if (MI->getOpcode() == WebAssembly::BLOCK)
+      unregisterScope(MI);
+    MI->eraseFromParent();
+  }
+}
+
+bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
+  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  // Linearizing the control flow by placing TRY / END_TRY markers can create
+  // mismatches in unwind destinations. There are two kinds of mismatches we
+  // try to solve here.
+
+  // 1. When an instruction may throw, but the EH pad it will unwind to can be
+  //    different from the original CFG.
+  //
+  // Example: we have the following CFG:
+  // bb0:
+  //   call @foo (if it throws, unwind to bb2)
+  // bb1:
+  //   call @bar (if it throws, unwind to bb3)
+  // bb2 (ehpad):
+  //   catch
+  //   ...
+  // bb3 (ehpad)
+  //   catch
+  //   handler body
+  //
+  // And the CFG is sorted in this order. Then after placing TRY markers, it
+  // will look like: (BB markers are omitted)
+  // try $label1
+  //   try
+  //     call @foo
+  //     call @bar   (if it throws, unwind to bb3)
+  //   catch         <- ehpad (bb2)
+  //     ...
+  //   end_try
+  // catch           <- ehpad (bb3)
+  //   handler body
+  // end_try
+  //
+  // Now if bar() throws, it is going to end up ip in bb2, not bb3, where it
+  // is supposed to end up. We solve this problem by
+  // a. Split the target unwind EH pad (here bb3) so that the handler body is
+  //    right after 'end_try', which means we extract the handler body out of
+  //    the catch block. We do this because this handler body should be
+  //    somewhere branch-eable from the inner scope.
+  // b. Wrap the call that has an incorrect unwind destination ('call @bar'
+  //    here) with a nested try/catch/end_try scope, and within the new catch
+  //    block, branches to the handler body.
+  // c. Place a branch after the newly inserted nested end_try so it can bypass
+  //    the handler body, which is now outside of a catch block.
+  //
+  // The result will like as follows. (new: a) means this instruction is newly
+  // created in the process of doing 'a' above.
+  //
+  // block $label0                 (new: placeBlockMarker)
+  //   try $label1
+  //     try
+  //       call @foo
+  //       try                     (new: b)
+  //         call @bar
+  //       catch                   (new: b)
+  //         local.set n / drop    (new: b)
+  //         br $label1            (new: b)
+  //       end_try                 (new: b)
+  //     catch                     <- ehpad (bb2)
+  //     end_try
+  //     br $label0                (new: c)
+  //   catch                       <- ehpad (bb3)
+  //   end_try                     (hoisted: a)
+  //   handler body
+  // end_block                     (new: placeBlockMarker)
+  //
+  // Note that the new wrapping block/end_block will be generated later in
+  // placeBlockMarker.
+  //
+  // TODO Currently local.set and local.gets are generated to move exnref value
+  // created by catches. That's because we don't support yielding values from a
+  // block in LLVM machine IR yet, even though it is supported by wasm. Delete
+  // unnecessary local.get/local.sets once yielding values from a block is
+  // supported. The full EH spec requires multi-value support to do this, but
+  // for C++ we don't yet need it because we only throw a single i32.
+  //
+  // ---
+  // 2. The same as 1, but in this case an instruction unwinds to a caller
+  //    function and not another EH pad.
+  //
+  // Example: we have the following CFG:
+  // bb0:
+  //   call @foo (if it throws, unwind to bb2)
+  // bb1:
+  //   call @bar (if it throws, unwind to caller)
+  // bb2 (ehpad):
+  //   catch
+  //   ...
+  //
+  // And the CFG is sorted in this order. Then after placing TRY markers, it
+  // will look like:
+  // try
+  //   call @foo
+  //   call @bar   (if it throws, unwind to caller)
+  // catch         <- ehpad (bb2)
+  //   ...
+  // end_try
+  //
+  // Now if bar() throws, it is going to end up ip in bb2, when it is supposed
+  // throw up to the caller.
+  // We solve this problem by
+  // a. Create a new 'appendix' BB at the end of the function and put a single
+  //    'rethrow' instruction (+ local.get) in there.
+  // b. Wrap the call that has an incorrect unwind destination ('call @bar'
+  //    here) with a nested try/catch/end_try scope, and within the new catch
+  //    block, branches to the new appendix block.
+  //
+  // block $label0          (new: placeBlockMarker)
+  //   try
+  //     call @foo
+  //     try                (new: b)
+  //       call @bar
+  //     catch              (new: b)
+  //       local.set n      (new: b)
+  //       br $label0       (new: b)
+  //     end_try            (new: b)
+  //   catch                <- ehpad (bb2)
+  //     ...
+  //   end_try
+  // ...
+  // end_block              (new: placeBlockMarker)
+  // local.get n            (new: a)  <- appendix block
+  // rethrow                (new: a)
+  //
+  // In case there are multiple calls in a BB that may throw to the caller, they
+  // can be wrapped together in one nested try scope. (In 1, this couldn't
+  // happen, because may-throwing instruction there had an unwind destination,
+  // i.e., it was an invoke before, and there could be only one invoke within a
+  // BB.)
+
+  SmallVector<const MachineBasicBlock *, 8> EHPadStack;
+  // Range of intructions to be wrapped in a new nested try/catch
+  using TryRange = std::pair<MachineInstr *, MachineInstr *>;
+  // In original CFG, <unwind destionation BB, a vector of try ranges>
+  DenseMap<MachineBasicBlock *, SmallVector<TryRange, 4>> UnwindDestToTryRanges;
+  // In new CFG, <destination to branch to, a vector of try ranges>
+  DenseMap<MachineBasicBlock *, SmallVector<TryRange, 4>> BrDestToTryRanges;
+  // In new CFG, <destination to branch to, register containing exnref>
+  DenseMap<MachineBasicBlock *, unsigned> BrDestToExnReg;
+
+  // Gather possibly throwing calls (i.e., previously invokes) whose current
+  // unwind destination is not the same as the original CFG.
+  for (auto &MBB : reverse(MF)) {
+    bool SeenThrowableInstInBB = false;
+    for (auto &MI : reverse(MBB)) {
+      if (MI.getOpcode() == WebAssembly::TRY)
+        EHPadStack.pop_back();
+      else if (MI.getOpcode() == WebAssembly::CATCH)
+        EHPadStack.push_back(MI.getParent());
+
+      // In this loop we only gather calls that have an EH pad to unwind. So
+      // there will be at most 1 such call (= invoke) in a BB, so after we've
+      // seen one, we can skip the rest of BB. Also if MBB has no EH pad
+      // successor or MI does not throw, this is not an invoke.
+      if (SeenThrowableInstInBB || !MBB.hasEHPadSuccessor() ||
+          !WebAssembly::mayThrow(MI))
+        continue;
+      SeenThrowableInstInBB = true;
+
+      // If the EH pad on the stack top is where this instruction should unwind
+      // next, we're good.
+      MachineBasicBlock *UnwindDest = nullptr;
+      for (auto *Succ : MBB.successors()) {
+        if (Succ->isEHPad()) {
+          UnwindDest = Succ;
+          break;
+        }
+      }
+      if (EHPadStack.back() == UnwindDest)
+        continue;
+
+      // If not, record the range.
+      UnwindDestToTryRanges[UnwindDest].push_back(TryRange(&MI, &MI));
+    }
+  }
+
+  assert(EHPadStack.empty());
+
+  // Gather possibly throwing calls that are supposed to unwind up to the caller
+  // if they throw, but currently unwind to an incorrect destination. Unlike the
+  // loop above, there can be multiple calls within a BB that unwind to the
+  // caller, which we should group together in a range.
+  bool NeedAppendixBlock = false;
+  for (auto &MBB : reverse(MF)) {
+    MachineInstr *RangeBegin = nullptr, *RangeEnd = nullptr; // inclusive
+    for (auto &MI : reverse(MBB)) {
+      if (MI.getOpcode() == WebAssembly::TRY)
+        EHPadStack.pop_back();
+      else if (MI.getOpcode() == WebAssembly::CATCH)
+        EHPadStack.push_back(MI.getParent());
+
+      // If MBB has an EH pad successor, this inst does not unwind to caller.
+      if (MBB.hasEHPadSuccessor())
+        continue;
+
+      // We wrap up the current range when we see a marker even if we haven't
+      // finished a BB.
+      if (RangeEnd && WebAssembly::isMarker(MI.getOpcode())) {
+        NeedAppendixBlock = true;
+        // Record the range. nullptr here means the unwind destination is the
+        // caller.
+        UnwindDestToTryRanges[nullptr].push_back(
+            TryRange(RangeBegin, RangeEnd));
+        RangeBegin = RangeEnd = nullptr; // Reset range pointers
+      }
+
+      // If EHPadStack is empty, that means it is correctly unwind to caller if
+      // it throws, so we're good. If MI does not throw, we're good too.
+      if (EHPadStack.empty() || !WebAssembly::mayThrow(MI))
+        continue;
+
+      // We found an instruction that unwinds to the caller but currently has an
+      // incorrect unwind destination. Create a new range or increment the
+      // currently existing range.
+      if (!RangeEnd)
+        RangeBegin = RangeEnd = &MI;
+      else
+        RangeBegin = &MI;
+    }
+
+    if (RangeEnd) {
+      NeedAppendixBlock = true;
+      // Record the range. nullptr here means the unwind destination is the
+      // caller.
+      UnwindDestToTryRanges[nullptr].push_back(TryRange(RangeBegin, RangeEnd));
+      RangeBegin = RangeEnd = nullptr; // Reset range pointers
+    }
+  }
+
+  assert(EHPadStack.empty());
+  // We don't have any unwind destination mismatches to resolve.
+  if (UnwindDestToTryRanges.empty())
+    return false;
+
+  // If we found instructions that should unwind to the caller but currently
+  // have incorrect unwind destination, we create an appendix block at the end
+  // of the function with a local.get and a rethrow instruction.
+  if (NeedAppendixBlock) {
+    auto *AppendixBB = getAppendixBlock(MF);
+    unsigned ExnReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass);
+    BuildMI(AppendixBB, DebugLoc(), TII.get(WebAssembly::RETHROW))
+        .addReg(ExnReg);
+    // These instruction ranges should branch to this appendix BB.
+    for (auto Range : UnwindDestToTryRanges[nullptr])
+      BrDestToTryRanges[AppendixBB].push_back(Range);
+    BrDestToExnReg[AppendixBB] = ExnReg;
+  }
+
+  // We loop through unwind destination EH pads that are targeted from some
+  // inner scopes. Because these EH pads are destination of more than one scope
+  // now, we split them so that the handler body is after 'end_try'.
+  // - Before
+  // ehpad:
+  //   catch
+  //   local.set n / drop
+  //   handler body
+  // ...
+  // cont:
+  //   end_try
+  //
+  // - After
+  // ehpad:
+  //   catch
+  //   local.set n / drop
+  // brdest:               (new)
+  //   end_try             (hoisted from 'cont' BB)
+  //   handler body        (taken from 'ehpad')
+  // ...
+  // cont:
+  for (auto &P : UnwindDestToTryRanges) {
+    NumUnwindMismatches++;
+
+    // This means the destination is the appendix BB, which was separately
+    // handled above.
+    if (!P.first)
+      continue;
+
+    MachineBasicBlock *EHPad = P.first;
+
+    // Find 'catch' and 'local.set' or 'drop' instruction that follows the
+    // 'catch'. If -wasm-disable-explicit-locals is not set, 'catch' should be
+    // always followed by either 'local.set' or a 'drop', because 'br_on_exn' is
+    // generated after 'catch' in LateEHPrepare and we don't support blocks
+    // taking values yet.
+    MachineInstr *Catch = nullptr;
+    unsigned ExnReg = 0;
+    for (auto &MI : *EHPad) {
+      switch (MI.getOpcode()) {
+      case WebAssembly::CATCH:
+        Catch = &MI;
+        ExnReg = Catch->getOperand(0).getReg();
+        break;
+      }
+    }
+    assert(Catch && "EH pad does not have a catch");
+    assert(ExnReg != 0 && "Invalid register");
+
+    auto SplitPos = std::next(Catch->getIterator());
+
+    // Create a new BB that's gonna be the destination for branches from the
+    // inner mismatched scope.
+    MachineInstr *BeginTry = EHPadToTry[EHPad];
+    MachineInstr *EndTry = BeginToEnd[BeginTry];
+    MachineBasicBlock *Cont = EndTry->getParent();
+    auto *BrDest = MF.CreateMachineBasicBlock();
+    MF.insert(std::next(EHPad->getIterator()), BrDest);
+    // Hoist up the existing 'end_try'.
+    BrDest->insert(BrDest->end(), EndTry->removeFromParent());
+    // Take out the handler body from EH pad to the new branch destination BB.
+    BrDest->splice(BrDest->end(), EHPad, SplitPos, EHPad->end());
+    // Fix predecessor-successor relationship.
+    BrDest->transferSuccessors(EHPad);
+    EHPad->addSuccessor(BrDest);
+
+    // All try ranges that were supposed to unwind to this EH pad now have to
+    // branch to this new branch dest BB.
+    for (auto Range : UnwindDestToTryRanges[EHPad])
+      BrDestToTryRanges[BrDest].push_back(Range);
+    BrDestToExnReg[BrDest] = ExnReg;
+
+    // In case we fall through to the continuation BB after the catch block, we
+    // now have to add a branch to it.
+    // - Before
+    // try
+    //   ...
+    //   (falls through to 'cont')
+    // catch
+    //   handler body
+    // end
+    //               <-- cont
+    //
+    // - After
+    // try
+    //   ...
+    //   br %cont    (new)
+    // catch
+    // end
+    // handler body
+    //               <-- cont
+    MachineBasicBlock *EHPadLayoutPred = &*std::prev(EHPad->getIterator());
+    MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+    SmallVector<MachineOperand, 4> Cond;
+    bool Analyzable = !TII.analyzeBranch(*EHPadLayoutPred, TBB, FBB, Cond);
+    if (Analyzable && !TBB && !FBB) {
+      DebugLoc DL = EHPadLayoutPred->empty()
+                        ? DebugLoc()
+                        : EHPadLayoutPred->rbegin()->getDebugLoc();
+      BuildMI(EHPadLayoutPred, DL, TII.get(WebAssembly::BR)).addMBB(Cont);
+    }
+  }
+
+  // For possibly throwing calls whose unwind destinations are currently
+  // incorrect because of CFG linearization, we wrap them with a nested
+  // try/catch/end_try, and within the new catch block, we branch to the correct
+  // handler.
+  // - Before
+  // mbb:
+  //   call @foo       <- Unwind destination mismatch!
+  // ehpad:
+  //   ...
+  //
+  // - After
+  // mbb:
+  //   try                (new)
+  //   call @foo
+  // nested-ehpad:        (new)
+  //   catch              (new)
+  //   local.set n / drop (new)
+  //   br %brdest         (new)
+  // nested-end:          (new)
+  //   end_try            (new)
+  // ehpad:
+  //   ...
+  for (auto &P : BrDestToTryRanges) {
+    MachineBasicBlock *BrDest = P.first;
+    auto &TryRanges = P.second;
+    unsigned ExnReg = BrDestToExnReg[BrDest];
+
+    for (auto Range : TryRanges) {
+      MachineInstr *RangeBegin = nullptr, *RangeEnd = nullptr;
+      std::tie(RangeBegin, RangeEnd) = Range;
+      auto *MBB = RangeBegin->getParent();
+
+      // Include possible EH_LABELs in the range
+      if (RangeBegin->getIterator() != MBB->begin() &&
+          std::prev(RangeBegin->getIterator())->isEHLabel())
+        RangeBegin = &*std::prev(RangeBegin->getIterator());
+      if (std::next(RangeEnd->getIterator()) != MBB->end() &&
+          std::next(RangeEnd->getIterator())->isEHLabel())
+        RangeEnd = &*std::next(RangeEnd->getIterator());
+
+      MachineBasicBlock *EHPad = nullptr;
+      for (auto *Succ : MBB->successors()) {
+        if (Succ->isEHPad()) {
+          EHPad = Succ;
+          break;
+        }
+      }
+
+      // Create the nested try instruction.
+      MachineInstr *NestedTry =
+          BuildMI(*MBB, *RangeBegin, RangeBegin->getDebugLoc(),
+                  TII.get(WebAssembly::TRY))
+              .addImm(int64_t(WebAssembly::ExprType::Void));
+
+      // Create the nested EH pad and fill instructions in.
+      MachineBasicBlock *NestedEHPad = MF.CreateMachineBasicBlock();
+      MF.insert(std::next(MBB->getIterator()), NestedEHPad);
+      NestedEHPad->setIsEHPad();
+      NestedEHPad->setIsEHScopeEntry();
+      BuildMI(NestedEHPad, RangeEnd->getDebugLoc(), TII.get(WebAssembly::CATCH),
+              ExnReg);
+      BuildMI(NestedEHPad, RangeEnd->getDebugLoc(), TII.get(WebAssembly::BR))
+          .addMBB(BrDest);
+
+      // Create the nested continuation BB and end_try instruction.
+      MachineBasicBlock *NestedCont = MF.CreateMachineBasicBlock();
+      MF.insert(std::next(NestedEHPad->getIterator()), NestedCont);
+      MachineInstr *NestedEndTry =
+          BuildMI(*NestedCont, NestedCont->begin(), RangeEnd->getDebugLoc(),
+                  TII.get(WebAssembly::END_TRY));
+      // In case MBB has more instructions after the try range, move them to the
+      // new nested continuation BB.
+      NestedCont->splice(NestedCont->end(), MBB,
+                         std::next(RangeEnd->getIterator()), MBB->end());
+      registerTryScope(NestedTry, NestedEndTry, NestedEHPad);
+
+      // Fix predecessor-successor relationship.
+      NestedCont->transferSuccessors(MBB);
+      if (EHPad)
+        NestedCont->removeSuccessor(EHPad);
+      MBB->addSuccessor(NestedEHPad);
+      MBB->addSuccessor(NestedCont);
+      NestedEHPad->addSuccessor(BrDest);
+    }
+  }
+
+  // Renumber BBs and recalculate ScopeTop info because new BBs might have been
+  // created and inserted above.
+  MF.RenumberBlocks();
+  ScopeTops.clear();
+  ScopeTops.resize(MF.getNumBlockIDs());
+  for (auto &MBB : reverse(MF)) {
+    for (auto &MI : reverse(MBB)) {
+      if (ScopeTops[MBB.getNumber()])
+        break;
+      switch (MI.getOpcode()) {
+      case WebAssembly::END_BLOCK:
+      case WebAssembly::END_LOOP:
+      case WebAssembly::END_TRY:
+        ScopeTops[MBB.getNumber()] = EndToBegin[&MI]->getParent();
+        break;
+      case WebAssembly::CATCH:
+        ScopeTops[MBB.getNumber()] = EHPadToTry[&MBB]->getParent();
+        break;
+      }
+    }
+  }
+
+  // Recompute the dominator tree.
+  getAnalysis<MachineDominatorTree>().runOnMachineFunction(MF);
+
+  // Place block markers for newly added branches.
+  SmallVector <MachineBasicBlock *, 8> BrDests;
+  for (auto &P : BrDestToTryRanges)
+    BrDests.push_back(P.first);
+  llvm::sort(BrDests,
+             [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
+               auto ANum = A->getNumber();
+               auto BNum = B->getNumber();
+               return ANum < BNum;
+             });
+  for (auto *Dest : BrDests)
+    placeBlockMarker(*Dest);
+
+  return true;
 }
 
 static unsigned
-GetDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack,
+getDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack,
          const MachineBasicBlock *MBB) {
   unsigned Depth = 0;
   for (auto X : reverse(Stack)) {
@@ -617,19 +1202,19 @@ void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) {
   if (MFI.getResults().empty())
     return;
 
-  WebAssembly::ExprType retType;
+  WebAssembly::ExprType RetType;
   switch (MFI.getResults().front().SimpleTy) {
   case MVT::i32:
-    retType = WebAssembly::ExprType::I32;
+    RetType = WebAssembly::ExprType::I32;
     break;
   case MVT::i64:
-    retType = WebAssembly::ExprType::I64;
+    RetType = WebAssembly::ExprType::I64;
     break;
   case MVT::f32:
-    retType = WebAssembly::ExprType::F32;
+    RetType = WebAssembly::ExprType::F32;
     break;
   case MVT::f64:
-    retType = WebAssembly::ExprType::F64;
+    RetType = WebAssembly::ExprType::F64;
     break;
   case MVT::v16i8:
   case MVT::v8i16:
@@ -637,10 +1222,10 @@ void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) {
   case MVT::v2i64:
   case MVT::v4f32:
   case MVT::v2f64:
-    retType = WebAssembly::ExprType::V128;
+    RetType = WebAssembly::ExprType::V128;
     break;
-  case MVT::ExceptRef:
-    retType = WebAssembly::ExprType::ExceptRef;
+  case MVT::exnref:
+    RetType = WebAssembly::ExprType::Exnref;
     break;
   default:
     llvm_unreachable("unexpected return type");
@@ -651,11 +1236,11 @@ void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) {
       if (MI.isPosition() || MI.isDebugInstr())
         continue;
       if (MI.getOpcode() == WebAssembly::END_BLOCK) {
-        EndToBegin[&MI]->getOperand(0).setImm(int32_t(retType));
+        EndToBegin[&MI]->getOperand(0).setImm(int32_t(RetType));
         continue;
       }
       if (MI.getOpcode() == WebAssembly::END_LOOP) {
-        EndToBegin[&MI]->getOperand(0).setImm(int32_t(retType));
+        EndToBegin[&MI]->getOperand(0).setImm(int32_t(RetType));
         continue;
       }
       // Something other than an `end`. We're done.
@@ -666,7 +1251,7 @@ void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) {
 
 // WebAssembly functions end with an end instruction, as if the function body
 // were a block.
-static void AppendEndToFunction(MachineFunction &MF,
+static void appendEndToFunction(MachineFunction &MF,
                                 const WebAssemblyInstrInfo &TII) {
   BuildMI(MF.back(), MF.back().end(),
           MF.back().findPrevDebugLoc(MF.back().end()),
@@ -675,66 +1260,42 @@ static void AppendEndToFunction(MachineFunction &MF,
 
 /// Insert LOOP/TRY/BLOCK markers at appropriate places.
 void WebAssemblyCFGStackify::placeMarkers(MachineFunction &MF) {
-  const MCAsmInfo *MCAI = MF.getTarget().getMCAsmInfo();
   // We allocate one more than the number of blocks in the function to
   // accommodate for the possible fake block we may insert at the end.
   ScopeTops.resize(MF.getNumBlockIDs() + 1);
   // Place the LOOP for MBB if MBB is the header of a loop.
   for (auto &MBB : MF)
     placeLoopMarker(MBB);
-  // Place the TRY for MBB if MBB is the EH pad of an exception.
-  if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm &&
-      MF.getFunction().hasPersonalityFn())
-    for (auto &MBB : MF)
-      placeTryMarker(MBB);
-  // Place the BLOCK for MBB if MBB is branched to from above.
-  for (auto &MBB : MF)
-    placeBlockMarker(MBB);
+
+  const MCAsmInfo *MCAI = MF.getTarget().getMCAsmInfo();
+  for (auto &MBB : MF) {
+    if (MBB.isEHPad()) {
+      // Place the TRY for MBB if MBB is the EH pad of an exception.
+      if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm &&
+          MF.getFunction().hasPersonalityFn())
+        placeTryMarker(MBB);
+    } else {
+      // Place the BLOCK for MBB if MBB is branched to from above.
+      placeBlockMarker(MBB);
+    }
+  }
+  // Fix mismatches in unwind destinations induced by linearizing the code.
+  fixUnwindMismatches(MF);
 }
 
 void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) {
-  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
   // Now rewrite references to basic blocks to be depth immediates.
-  // We need two stacks: one for normal scopes and the other for EH pad scopes.
-  // EH pad stack is used to rewrite depths in rethrow instructions.
   SmallVector<const MachineBasicBlock *, 8> Stack;
-  SmallVector<const MachineBasicBlock *, 8> EHPadStack;
   for (auto &MBB : reverse(MF)) {
     for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) {
       MachineInstr &MI = *I;
       switch (MI.getOpcode()) {
       case WebAssembly::BLOCK:
-        assert(ScopeTops[Stack.back()->getNumber()]->getNumber() <=
-                   MBB.getNumber() &&
-               "Block/try should be balanced");
-        Stack.pop_back();
-        break;
-
       case WebAssembly::TRY:
         assert(ScopeTops[Stack.back()->getNumber()]->getNumber() <=
                    MBB.getNumber() &&
                "Block/try marker should be balanced");
         Stack.pop_back();
-        EHPadStack.pop_back();
-        break;
-
-      case WebAssembly::CATCH_I32:
-      case WebAssembly::CATCH_I64:
-      case WebAssembly::CATCH_ALL:
-        // Currently the only case there are more than one catch for a try is
-        // for catch terminate pad, in the form of
-        //   try
-        //   catch
-        //     call @__clang_call_terminate
-        //     unreachable
-        //   catch_all
-        //     call @std::terminate
-        //     unreachable
-        //   end
-        // So we shouldn't push the current BB for the second catch_all block
-        // here.
-        if (!WebAssembly::isCatchAllTerminatePad(MBB))
-          EHPadStack.push_back(&MBB);
         break;
 
       case WebAssembly::LOOP:
@@ -751,23 +1312,6 @@ void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) {
         Stack.push_back(EndToBegin[&MI]->getParent());
         break;
 
-      case WebAssembly::RETHROW: {
-        // Rewrite MBB operands to be depth immediates.
-        unsigned EHPadDepth = GetDepth(EHPadStack, MI.getOperand(0).getMBB());
-        MI.RemoveOperand(0);
-        MI.addOperand(MF, MachineOperand::CreateImm(EHPadDepth));
-        break;
-      }
-
-      case WebAssembly::RETHROW_TO_CALLER: {
-        MachineInstr *Rethrow =
-            BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(WebAssembly::RETHROW))
-                .addImm(EHPadStack.size());
-        MI.eraseFromParent();
-        I = MachineBasicBlock::reverse_iterator(Rethrow);
-        break;
-      }
-
       default:
         if (MI.isTerminator()) {
           // Rewrite MBB operands to be depth immediates.
@@ -776,7 +1320,7 @@ void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) {
             MI.RemoveOperand(MI.getNumOperands() - 1);
           for (auto MO : Ops) {
             if (MO.isMBB())
-              MO = MachineOperand::CreateImm(GetDepth(Stack, MO.getMBB()));
+              MO = MachineOperand::CreateImm(getDepth(Stack, MO.getMBB()));
             MI.addOperand(MF, MO);
           }
         }
@@ -793,13 +1337,14 @@ void WebAssemblyCFGStackify::releaseMemory() {
   EndToBegin.clear();
   TryToEHPad.clear();
   EHPadToTry.clear();
-  BeginToBottom.clear();
+  AppendixBB = nullptr;
 }
 
 bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
   LLVM_DEBUG(dbgs() << "********** CFG Stackifying **********\n"
                        "********** Function: "
                     << MF.getName() << '\n');
+  const MCAsmInfo *MCAI = MF.getTarget().getMCAsmInfo();
 
   releaseMemory();
 
@@ -809,6 +1354,11 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
   // Place the BLOCK/LOOP/TRY markers to indicate the beginnings of scopes.
   placeMarkers(MF);
 
+  // Remove unnecessary instructions possibly introduced by try/end_trys.
+  if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm &&
+      MF.getFunction().hasPersonalityFn())
+    removeUnnecessaryInstrs(MF);
+
   // Convert MBB operands in terminators to relative depth immediates.
   rewriteDepthImmediates(MF);
 
@@ -821,7 +1371,8 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
   if (!MF.getSubtarget<WebAssemblySubtarget>()
            .getTargetTriple()
            .isOSBinFormatELF())
-    AppendEndToFunction(MF, TII);
+    appendEndToFunction(MF, TII);
 
+  MF.getInfo<WebAssemblyFunctionInfo>()->setCFGStackified();
   return true;
 }
diff --git a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
index aaa6d286598f..2537e6042b1e 100644
--- a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyCallIndirectFixup.cpp - Fix call_indirects -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -61,19 +60,19 @@ FunctionPass *llvm::createWebAssemblyCallIndirectFixup() {
   return new WebAssemblyCallIndirectFixup();
 }
 
-static unsigned GetNonPseudoCallIndirectOpcode(const MachineInstr &MI) {
+static unsigned getNonPseudoCallIndirectOpcode(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
     using namespace WebAssembly;
   case PCALL_INDIRECT_VOID:
     return CALL_INDIRECT_VOID;
-  case PCALL_INDIRECT_I32:
-    return CALL_INDIRECT_I32;
-  case PCALL_INDIRECT_I64:
-    return CALL_INDIRECT_I64;
-  case PCALL_INDIRECT_F32:
-    return CALL_INDIRECT_F32;
-  case PCALL_INDIRECT_F64:
-    return CALL_INDIRECT_F64;
+  case PCALL_INDIRECT_i32:
+    return CALL_INDIRECT_i32;
+  case PCALL_INDIRECT_i64:
+    return CALL_INDIRECT_i64;
+  case PCALL_INDIRECT_f32:
+    return CALL_INDIRECT_f32;
+  case PCALL_INDIRECT_f64:
+    return CALL_INDIRECT_f64;
   case PCALL_INDIRECT_v16i8:
     return CALL_INDIRECT_v16i8;
   case PCALL_INDIRECT_v8i16:
@@ -86,13 +85,17 @@ static unsigned GetNonPseudoCallIndirectOpcode(const MachineInstr &MI) {
     return CALL_INDIRECT_v4f32;
   case PCALL_INDIRECT_v2f64:
     return CALL_INDIRECT_v2f64;
+  case PCALL_INDIRECT_exnref:
+    return CALL_INDIRECT_exnref;
+  case PRET_CALL_INDIRECT:
+    return RET_CALL_INDIRECT;
   default:
     return INSTRUCTION_LIST_END;
   }
 }
 
-static bool IsPseudoCallIndirect(const MachineInstr &MI) {
-  return GetNonPseudoCallIndirectOpcode(MI) !=
+static bool isPseudoCallIndirect(const MachineInstr &MI) {
+  return getNonPseudoCallIndirectOpcode(MI) !=
          WebAssembly::INSTRUCTION_LIST_END;
 }
 
@@ -106,11 +109,11 @@ bool WebAssemblyCallIndirectFixup::runOnMachineFunction(MachineFunction &MF) {
 
   for (MachineBasicBlock &MBB : MF) {
     for (MachineInstr &MI : MBB) {
-      if (IsPseudoCallIndirect(MI)) {
+      if (isPseudoCallIndirect(MI)) {
         LLVM_DEBUG(dbgs() << "Found call_indirect: " << MI << '\n');
 
         // Rewrite pseudo to non-pseudo
-        const MCInstrDesc &Desc = TII->get(GetNonPseudoCallIndirectOpcode(MI));
+        const MCInstrDesc &Desc = TII->get(getNonPseudoCallIndirectOpcode(MI));
         MI.setDesc(Desc);
 
         // Rewrite argument order
diff --git a/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp b/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
index 8ecc159951ad..579377c9a5d7 100644
--- a/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyDebugValueManager.cpp - WebAssembly DebugValue Manager -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h b/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h
index 73f317214058..06e8805b5ad0 100644
--- a/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h
+++ b/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h
@@ -1,9 +1,8 @@
 // WebAssemblyDebugValueManager.h - WebAssembly DebugValue Manager -*- C++ -*-//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Target/WebAssembly/WebAssemblyEHRestoreStackPointer.cpp b/lib/Target/WebAssembly/WebAssemblyEHRestoreStackPointer.cpp
deleted file mode 100644
index c86260ba408c..000000000000
--- a/lib/Target/WebAssembly/WebAssemblyEHRestoreStackPointer.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-//===-- WebAssemblyEHRestoreStackPointer.cpp - __stack_pointer restoration ===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// After the stack is unwound due to a thrown exception, the __stack_pointer
-/// global can point to an invalid address. This inserts instructions that
-/// restore __stack_pointer global.
-///
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "WebAssembly.h"
-#include "WebAssemblySubtarget.h"
-#include "WebAssemblyUtilities.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "wasm-eh-restore-stack-pointer"
-
-namespace {
-class WebAssemblyEHRestoreStackPointer final : public MachineFunctionPass {
-public:
-  static char ID; // Pass identification, replacement for typeid
-  WebAssemblyEHRestoreStackPointer() : MachineFunctionPass(ID) {}
-
-  StringRef getPassName() const override {
-    return "WebAssembly Restore Stack Pointer for Exception Handling";
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-};
-} // end anonymous namespace
-
-char WebAssemblyEHRestoreStackPointer::ID = 0;
-INITIALIZE_PASS(WebAssemblyEHRestoreStackPointer, DEBUG_TYPE,
-                "Restore Stack Pointer for Exception Handling", true, false)
-
-FunctionPass *llvm::createWebAssemblyEHRestoreStackPointer() {
-  return new WebAssemblyEHRestoreStackPointer();
-}
-
-bool WebAssemblyEHRestoreStackPointer::runOnMachineFunction(
-    MachineFunction &MF) {
-  LLVM_DEBUG(dbgs() << "********** EH Restore Stack Pointer **********\n"
-                       "********** Function: "
-                    << MF.getName() << '\n');
-
-  const auto *FrameLowering = static_cast<const WebAssemblyFrameLowering *>(
-      MF.getSubtarget().getFrameLowering());
-  if (!FrameLowering->needsPrologForEH(MF))
-    return false;
-  bool Changed = false;
-
-  for (auto &MBB : MF) {
-    if (!MBB.isEHPad())
-      continue;
-    Changed = true;
-
-    // Insert __stack_pointer restoring instructions at the beginning of each EH
-    // pad, after the catch instruction. (Catch instructions may have been
-    // reordered, and catch_all instructions have not been inserted yet, but
-    // those cases are handled in LateEHPrepare).
-    //
-    // Here it is safe to assume that SP32 holds the latest value of
-    // __stack_pointer, because the only exception for this case is when a
-    // function uses the red zone, but that only happens with leaf functions,
-    // and we don't restore __stack_pointer in leaf functions anyway.
-    auto InsertPos = MBB.begin();
-    if (WebAssembly::isCatch(*MBB.begin()))
-      InsertPos++;
-    FrameLowering->writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPos,
-                                   MBB.begin()->getDebugLoc());
-  }
-  return Changed;
-}
diff --git a/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp b/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
index 6b3a3e765786..0387957b14c2 100644
--- a/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
@@ -1,9 +1,8 @@
 //===--- WebAssemblyExceptionInfo.cpp - Exception Infomation --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -51,10 +50,6 @@ void WebAssemblyExceptionInfo::recalculate(
     MachineBasicBlock *EHPad = DomNode->getBlock();
     if (!EHPad->isEHPad())
       continue;
-    // We group catch & catch-all terminate pads together, so skip the second
-    // one
-    if (WebAssembly::isCatchAllTerminatePad(*EHPad))
-      continue;
     auto *WE = new WebAssemblyException(EHPad);
     discoverAndMapException(WE, MDT, MDF);
     Exceptions.push_back(WE);
@@ -105,16 +100,6 @@ void WebAssemblyExceptionInfo::discoverAndMapException(
 
   // Map blocks that belong to a catchpad / cleanuppad
   MachineBasicBlock *EHPad = WE->getEHPad();
-
-  // We group catch & catch-all terminate pads together within an exception
-  if (WebAssembly::isCatchTerminatePad(*EHPad)) {
-    assert(EHPad->succ_size() == 1 &&
-           "Catch terminate pad has more than one successors");
-    changeExceptionFor(EHPad, WE);
-    changeExceptionFor(*(EHPad->succ_begin()), WE);
-    return;
-  }
-
   SmallVector<MachineBasicBlock *, 8> WL;
   WL.push_back(EHPad);
   while (!WL.empty()) {
diff --git a/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h b/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h
index fcd7e2366e03..9a90d7df7d47 100644
--- a/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h
@@ -1,9 +1,8 @@
 //===-- WebAssemblyExceptionInfo.h - WebAssembly Exception Info -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index 27aabe6ba0bd..dbd62179f055 100644
--- a/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyExplicitLocals.cpp - Make Locals Explicit --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -91,13 +90,13 @@ static unsigned getDropOpcode(const TargetRegisterClass *RC) {
     return WebAssembly::DROP_F64;
   if (RC == &WebAssembly::V128RegClass)
     return WebAssembly::DROP_V128;
-  if (RC == &WebAssembly::EXCEPT_REFRegClass)
-    return WebAssembly::DROP_EXCEPT_REF;
+  if (RC == &WebAssembly::EXNREFRegClass)
+    return WebAssembly::DROP_EXNREF;
   llvm_unreachable("Unexpected register class");
 }
 
 /// Get the appropriate local.get opcode for the given register class.
-static unsigned getGetLocalOpcode(const TargetRegisterClass *RC) {
+static unsigned getLocalGetOpcode(const TargetRegisterClass *RC) {
   if (RC == &WebAssembly::I32RegClass)
     return WebAssembly::LOCAL_GET_I32;
   if (RC == &WebAssembly::I64RegClass)
@@ -108,13 +107,13 @@ static unsigned getGetLocalOpcode(const TargetRegisterClass *RC) {
     return WebAssembly::LOCAL_GET_F64;
   if (RC == &WebAssembly::V128RegClass)
     return WebAssembly::LOCAL_GET_V128;
-  if (RC == &WebAssembly::EXCEPT_REFRegClass)
-    return WebAssembly::LOCAL_GET_EXCEPT_REF;
+  if (RC == &WebAssembly::EXNREFRegClass)
+    return WebAssembly::LOCAL_GET_EXNREF;
   llvm_unreachable("Unexpected register class");
 }
 
 /// Get the appropriate local.set opcode for the given register class.
-static unsigned getSetLocalOpcode(const TargetRegisterClass *RC) {
+static unsigned getLocalSetOpcode(const TargetRegisterClass *RC) {
   if (RC == &WebAssembly::I32RegClass)
     return WebAssembly::LOCAL_SET_I32;
   if (RC == &WebAssembly::I64RegClass)
@@ -125,13 +124,13 @@ static unsigned getSetLocalOpcode(const TargetRegisterClass *RC) {
     return WebAssembly::LOCAL_SET_F64;
   if (RC == &WebAssembly::V128RegClass)
     return WebAssembly::LOCAL_SET_V128;
-  if (RC == &WebAssembly::EXCEPT_REFRegClass)
-    return WebAssembly::LOCAL_SET_EXCEPT_REF;
+  if (RC == &WebAssembly::EXNREFRegClass)
+    return WebAssembly::LOCAL_SET_EXNREF;
   llvm_unreachable("Unexpected register class");
 }
 
 /// Get the appropriate local.tee opcode for the given register class.
-static unsigned getTeeLocalOpcode(const TargetRegisterClass *RC) {
+static unsigned getLocalTeeOpcode(const TargetRegisterClass *RC) {
   if (RC == &WebAssembly::I32RegClass)
     return WebAssembly::LOCAL_TEE_I32;
   if (RC == &WebAssembly::I64RegClass)
@@ -142,8 +141,8 @@ static unsigned getTeeLocalOpcode(const TargetRegisterClass *RC) {
     return WebAssembly::LOCAL_TEE_F64;
   if (RC == &WebAssembly::V128RegClass)
     return WebAssembly::LOCAL_TEE_V128;
-  if (RC == &WebAssembly::EXCEPT_REFRegClass)
-    return WebAssembly::LOCAL_TEE_EXCEPT_REF;
+  if (RC == &WebAssembly::EXNREFRegClass)
+    return WebAssembly::LOCAL_TEE_EXNREF;
   llvm_unreachable("Unexpected register class");
 }
 
@@ -159,8 +158,8 @@ static MVT typeForRegClass(const TargetRegisterClass *RC) {
     return MVT::f64;
   if (RC == &WebAssembly::V128RegClass)
     return MVT::v16i8;
-  if (RC == &WebAssembly::EXCEPT_REFRegClass)
-    return MVT::ExceptRef;
+  if (RC == &WebAssembly::EXNREFRegClass)
+    return MVT::exnref;
   llvm_unreachable("unrecognized register class");
 }
 
@@ -206,7 +205,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
                                    E = MF.begin()->end();
        I != E;) {
     MachineInstr &MI = *I++;
-    if (!WebAssembly::isArgument(MI))
+    if (!WebAssembly::isArgument(MI.getOpcode()))
       break;
     unsigned Reg = MI.getOperand(0).getReg();
     assert(!MFI.isVRegStackified(Reg));
@@ -228,7 +227,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
   for (MachineBasicBlock &MBB : MF) {
     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
       MachineInstr &MI = *I++;
-      assert(!WebAssembly::isArgument(MI));
+      assert(!WebAssembly::isArgument(MI.getOpcode()));
 
       if (MI.isDebugInstr() || MI.isLabel())
         continue;
@@ -236,7 +235,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
       // Replace tee instructions with local.tee. The difference is that tee
       // instructions have two defs, while local.tee instructions have one def
       // and an index of a local to write to.
-      if (WebAssembly::isTee(MI)) {
+      if (WebAssembly::isTee(MI.getOpcode())) {
         assert(MFI.isVRegStackified(MI.getOperand(0).getReg()));
         assert(!MFI.isVRegStackified(MI.getOperand(1).getReg()));
         unsigned OldReg = MI.getOperand(2).getReg();
@@ -246,7 +245,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
         if (!MFI.isVRegStackified(OldReg)) {
           unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
           unsigned NewReg = MRI.createVirtualRegister(RC);
-          unsigned Opc = getGetLocalOpcode(RC);
+          unsigned Opc = getLocalGetOpcode(RC);
           BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Opc), NewReg)
               .addImm(LocalId);
           MI.getOperand(2).setReg(NewReg);
@@ -256,7 +255,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
         // Replace the TEE with a LOCAL_TEE.
         unsigned LocalId =
             getLocalId(Reg2Local, CurLocal, MI.getOperand(1).getReg());
-        unsigned Opc = getTeeLocalOpcode(RC);
+        unsigned Opc = getLocalTeeOpcode(RC);
         BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Opc),
                 MI.getOperand(0).getReg())
             .addImm(LocalId)
@@ -275,7 +274,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
         if (!MFI.isVRegStackified(OldReg)) {
           const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
           unsigned NewReg = MRI.createVirtualRegister(RC);
-          auto InsertPt = std::next(MachineBasicBlock::iterator(&MI));
+          auto InsertPt = std::next(MI.getIterator());
           if (MI.getOpcode() == WebAssembly::IMPLICIT_DEF) {
             MI.eraseFromParent();
             Changed = true;
@@ -290,7 +289,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
             Drop->getOperand(0).setIsKill();
           } else {
             unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
-            unsigned Opc = getSetLocalOpcode(RC);
+            unsigned Opc = getLocalSetOpcode(RC);
             BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc))
                 .addImm(LocalId)
                 .addReg(NewReg);
@@ -317,7 +316,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
         // with inline asm register operands is to provide local indices as
         // immediates.
         if (MO.isDef()) {
-          assert(MI.getOpcode() == TargetOpcode::INLINEASM);
+          assert(MI.isInlineAsm());
           unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
           // If this register operand is tied to another operand, we can't
           // change it to an immediate. Untie it first.
@@ -335,7 +334,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
 
         // Our contract with inline asm register operands is to provide local
         // indices as immediates.
-        if (MI.getOpcode() == TargetOpcode::INLINEASM) {
+        if (MI.isInlineAsm()) {
           unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
           // Untie it first if this reg operand is tied to another operand.
           MI.untieRegOperand(MI.getOperandNo(&MO));
@@ -347,7 +346,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
         unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
         const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
         unsigned NewReg = MRI.createVirtualRegister(RC);
-        unsigned Opc = getGetLocalOpcode(RC);
+        unsigned Opc = getLocalGetOpcode(RC);
         InsertPt =
             BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc), NewReg)
                 .addImm(LocalId);
@@ -357,7 +356,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
       }
 
       // Coalesce and eliminate COPY instructions.
-      if (WebAssembly::isCopy(MI)) {
+      if (WebAssembly::isCopy(MI.getOpcode())) {
         MRI.replaceRegWith(MI.getOperand(1).getReg(),
                            MI.getOperand(0).getReg());
         MI.eraseFromParent();
diff --git a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 3856700cca94..2552e9150833 100644
--- a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyFastISel.cpp - WebAssembly FastISel implementation -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -50,22 +49,22 @@ class WebAssemblyFastISel final : public FastISel {
   // All possible address modes.
   class Address {
   public:
-    typedef enum { RegBase, FrameIndexBase } BaseKind;
+    using BaseKind = enum { RegBase, FrameIndexBase };
 
   private:
-    BaseKind Kind;
+    BaseKind Kind = RegBase;
     union {
       unsigned Reg;
       int FI;
     } Base;
 
-    int64_t Offset;
+    int64_t Offset = 0;
 
-    const GlobalValue *GV;
+    const GlobalValue *GV = nullptr;
 
   public:
     // Innocuous defaults for our address.
-    Address() : Kind(RegBase), Offset(0), GV(0) { Base.Reg = 0; }
+    Address() { Base.Reg = 0; }
     void setKind(BaseKind K) {
       assert(!isSet() && "Can't change kind with non-zero base");
       Kind = K;
@@ -92,9 +91,9 @@ class WebAssemblyFastISel final : public FastISel {
       return Base.FI;
     }
 
-    void setOffset(int64_t Offset_) {
-      assert(Offset_ >= 0 && "Offsets must be non-negative");
-      Offset = Offset_;
+    void setOffset(int64_t NewOffset) {
+      assert(NewOffset >= 0 && "Offsets must be non-negative");
+      Offset = NewOffset;
     }
     int64_t getOffset() const { return Offset; }
     void setGlobalValue(const GlobalValue *G) { GV = G; }
@@ -116,7 +115,7 @@ class WebAssemblyFastISel final : public FastISel {
 private:
   // Utility helper routines
   MVT::SimpleValueType getSimpleType(Type *Ty) {
-    EVT VT = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
+    EVT VT = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true);
     return VT.isSimple() ? VT.getSimpleVT().SimpleTy
                          : MVT::INVALID_SIMPLE_VALUE_TYPE;
   }
@@ -130,7 +129,7 @@ private:
     case MVT::i64:
     case MVT::f32:
     case MVT::f64:
-    case MVT::ExceptRef:
+    case MVT::exnref:
       return VT;
     case MVT::f16:
       return MVT::f32;
@@ -208,10 +207,9 @@ public:
 } // end anonymous namespace
 
 bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
-
   const User *U = nullptr;
   unsigned Opcode = Instruction::UserOp1;
-  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
+  if (const auto *I = dyn_cast<Instruction>(Obj)) {
     // Don't walk into other basic blocks unless the object is an alloca from
     // another block, otherwise it may not have a virtual register assigned.
     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
@@ -219,7 +217,7 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
       Opcode = I->getOpcode();
       U = I;
     }
-  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
+  } else if (const auto *C = dyn_cast<ConstantExpr>(Obj)) {
     Opcode = C->getOpcode();
     U = C;
   }
@@ -230,9 +228,13 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
       // address spaces.
       return false;
 
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
+  if (const auto *GV = dyn_cast<GlobalValue>(Obj)) {
+    if (TLI.isPositionIndependent())
+      return false;
     if (Addr.getGlobalValue())
       return false;
+    if (GV->isThreadLocal())
+      return false;
     Addr.setGlobalValue(GV);
     return true;
   }
@@ -275,7 +277,7 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
       } else {
         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
         for (;;) {
-          if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+          if (const auto *CI = dyn_cast<ConstantInt>(Op)) {
             // Constant-offset addressing.
             TmpOffset += CI->getSExtValue() * S;
             break;
@@ -290,8 +292,7 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
           }
           if (canFoldAddIntoGEP(U, Op)) {
             // A compatible add with a constant operand. Fold the constant.
-            ConstantInt *CI =
-                cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+            auto *CI = cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
             TmpOffset += CI->getSExtValue() * S;
             // Iterate on the other operand.
             Op = cast<AddOperator>(Op)->getOperand(0);
@@ -315,7 +316,7 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
     break;
   }
   case Instruction::Alloca: {
-    const AllocaInst *AI = cast<AllocaInst>(Obj);
+    const auto *AI = cast<AllocaInst>(Obj);
     DenseMap<const AllocaInst *, int>::iterator SI =
         FuncInfo.StaticAllocaMap.find(AI);
     if (SI != FuncInfo.StaticAllocaMap.end()) {
@@ -336,7 +337,7 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
     if (isa<ConstantInt>(LHS))
       std::swap(LHS, RHS);
 
-    if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+    if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
       uint64_t TmpOffset = Addr.getOffset() + CI->getSExtValue();
       if (int64_t(TmpOffset) >= 0) {
         Addr.setOffset(TmpOffset);
@@ -356,7 +357,7 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
     const Value *LHS = U->getOperand(0);
     const Value *RHS = U->getOperand(1);
 
-    if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+    if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
       int64_t TmpOffset = Addr.getOffset() - CI->getSExtValue();
       if (TmpOffset >= 0) {
         Addr.setOffset(TmpOffset);
@@ -416,7 +417,7 @@ unsigned WebAssemblyFastISel::maskI1Value(unsigned Reg, const Value *V) {
 }
 
 unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V, bool &Not) {
-  if (const ICmpInst *ICmp = dyn_cast<ICmpInst>(V))
+  if (const auto *ICmp = dyn_cast<ICmpInst>(V))
     if (const ConstantInt *C = dyn_cast<ConstantInt>(ICmp->getOperand(1)))
       if (ICmp->isEquality() && C->isZero() && C->getType()->isIntegerTy(32)) {
         Not = ICmp->isTrueWhenEqual();
@@ -524,7 +525,10 @@ unsigned WebAssemblyFastISel::zeroExtend(unsigned Reg, const Value *V,
     return Result;
   }
 
-  return zeroExtendToI32(Reg, V, From);
+  if (To == MVT::i32)
+    return zeroExtendToI32(Reg, V, From);
+
+  return 0;
 }
 
 unsigned WebAssemblyFastISel::signExtend(unsigned Reg, const Value *V,
@@ -543,7 +547,10 @@ unsigned WebAssemblyFastISel::signExtend(unsigned Reg, const Value *V,
     return Result;
   }
 
-  return signExtendToI32(Reg, V, From);
+  if (To == MVT::i32)
+    return signExtendToI32(Reg, V, From);
+
+  return 0;
 }
 
 unsigned WebAssemblyFastISel::getRegForUnsignedValue(const Value *V) {
@@ -607,6 +614,10 @@ unsigned WebAssemblyFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
 
 unsigned WebAssemblyFastISel::fastMaterializeConstant(const Constant *C) {
   if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) {
+    if (TLI.isPositionIndependent())
+      return 0;
+    if (GV->isThreadLocal())
+      return 0;
     unsigned ResultReg =
         createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass
                                                : &WebAssembly::I32RegClass);
@@ -629,14 +640,14 @@ bool WebAssemblyFastISel::fastLowerArguments() {
   if (F->isVarArg())
     return false;
 
-  unsigned i = 0;
+  unsigned I = 0;
   for (auto const &Arg : F->args()) {
     const AttributeList &Attrs = F->getAttributes();
-    if (Attrs.hasParamAttribute(i, Attribute::ByVal) ||
-        Attrs.hasParamAttribute(i, Attribute::SwiftSelf) ||
-        Attrs.hasParamAttribute(i, Attribute::SwiftError) ||
-        Attrs.hasParamAttribute(i, Attribute::InAlloca) ||
-        Attrs.hasParamAttribute(i, Attribute::Nest))
+    if (Attrs.hasParamAttribute(I, Attribute::ByVal) ||
+        Attrs.hasParamAttribute(I, Attribute::SwiftSelf) ||
+        Attrs.hasParamAttribute(I, Attribute::SwiftError) ||
+        Attrs.hasParamAttribute(I, Attribute::InAlloca) ||
+        Attrs.hasParamAttribute(I, Attribute::Nest))
       return false;
 
     Type *ArgTy = Arg.getType();
@@ -691,19 +702,19 @@ bool WebAssemblyFastISel::fastLowerArguments() {
       Opc = WebAssembly::ARGUMENT_v2f64;
       RC = &WebAssembly::V128RegClass;
       break;
-    case MVT::ExceptRef:
-      Opc = WebAssembly::ARGUMENT_ExceptRef;
-      RC = &WebAssembly::EXCEPT_REFRegClass;
+    case MVT::exnref:
+      Opc = WebAssembly::ARGUMENT_exnref;
+      RC = &WebAssembly::EXNREFRegClass;
       break;
     default:
       return false;
     }
     unsigned ResultReg = createResultReg(RC);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
-        .addImm(i);
+        .addImm(I);
     updateValueMap(&Arg, ResultReg);
 
-    ++i;
+    ++I;
   }
 
   MRI.addLiveIn(WebAssembly::ARGUMENTS);
@@ -732,8 +743,9 @@ bool WebAssemblyFastISel::fastLowerArguments() {
 }
 
 bool WebAssemblyFastISel::selectCall(const Instruction *I) {
-  const CallInst *Call = cast<CallInst>(I);
+  const auto *Call = cast<CallInst>(I);
 
+  // TODO: Support tail calls in FastISel
   if (Call->isMustTailCall() || Call->isInlineAsm() ||
       Call->getFunctionType()->isVarArg())
     return false;
@@ -762,19 +774,19 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
     case MVT::i8:
     case MVT::i16:
     case MVT::i32:
-      Opc = IsDirect ? WebAssembly::CALL_I32 : WebAssembly::PCALL_INDIRECT_I32;
+      Opc = IsDirect ? WebAssembly::CALL_i32 : WebAssembly::PCALL_INDIRECT_i32;
       ResultReg = createResultReg(&WebAssembly::I32RegClass);
       break;
     case MVT::i64:
-      Opc = IsDirect ? WebAssembly::CALL_I64 : WebAssembly::PCALL_INDIRECT_I64;
+      Opc = IsDirect ? WebAssembly::CALL_i64 : WebAssembly::PCALL_INDIRECT_i64;
       ResultReg = createResultReg(&WebAssembly::I64RegClass);
       break;
     case MVT::f32:
-      Opc = IsDirect ? WebAssembly::CALL_F32 : WebAssembly::PCALL_INDIRECT_F32;
+      Opc = IsDirect ? WebAssembly::CALL_f32 : WebAssembly::PCALL_INDIRECT_f32;
       ResultReg = createResultReg(&WebAssembly::F32RegClass);
       break;
     case MVT::f64:
-      Opc = IsDirect ? WebAssembly::CALL_F64 : WebAssembly::PCALL_INDIRECT_F64;
+      Opc = IsDirect ? WebAssembly::CALL_f64 : WebAssembly::PCALL_INDIRECT_f64;
       ResultReg = createResultReg(&WebAssembly::F64RegClass);
       break;
     case MVT::v16i8:
@@ -807,10 +819,10 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
                      : WebAssembly::PCALL_INDIRECT_v2f64;
       ResultReg = createResultReg(&WebAssembly::V128RegClass);
       break;
-    case MVT::ExceptRef:
-      Opc = IsDirect ? WebAssembly::CALL_EXCEPT_REF
-                     : WebAssembly::PCALL_INDIRECT_EXCEPT_REF;
-      ResultReg = createResultReg(&WebAssembly::EXCEPT_REFRegClass);
+    case MVT::exnref:
+      Opc = IsDirect ? WebAssembly::CALL_exnref
+                     : WebAssembly::PCALL_INDIRECT_exnref;
+      ResultReg = createResultReg(&WebAssembly::EXNREFRegClass);
       break;
     default:
       return false;
@@ -818,25 +830,25 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
   }
 
   SmallVector<unsigned, 8> Args;
-  for (unsigned i = 0, e = Call->getNumArgOperands(); i < e; ++i) {
-    Value *V = Call->getArgOperand(i);
+  for (unsigned I = 0, E = Call->getNumArgOperands(); I < E; ++I) {
+    Value *V = Call->getArgOperand(I);
     MVT::SimpleValueType ArgTy = getSimpleType(V->getType());
     if (ArgTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
       return false;
 
     const AttributeList &Attrs = Call->getAttributes();
-    if (Attrs.hasParamAttribute(i, Attribute::ByVal) ||
-        Attrs.hasParamAttribute(i, Attribute::SwiftSelf) ||
-        Attrs.hasParamAttribute(i, Attribute::SwiftError) ||
-        Attrs.hasParamAttribute(i, Attribute::InAlloca) ||
-        Attrs.hasParamAttribute(i, Attribute::Nest))
+    if (Attrs.hasParamAttribute(I, Attribute::ByVal) ||
+        Attrs.hasParamAttribute(I, Attribute::SwiftSelf) ||
+        Attrs.hasParamAttribute(I, Attribute::SwiftError) ||
+        Attrs.hasParamAttribute(I, Attribute::InAlloca) ||
+        Attrs.hasParamAttribute(I, Attribute::Nest))
       return false;
 
     unsigned Reg;
 
-    if (Attrs.hasParamAttribute(i, Attribute::SExt))
+    if (Attrs.hasParamAttribute(I, Attribute::SExt))
       Reg = getRegForSignedValue(V);
-    else if (Attrs.hasParamAttribute(i, Attribute::ZExt))
+    else if (Attrs.hasParamAttribute(I, Attribute::ZExt))
       Reg = getRegForUnsignedValue(V);
     else
       Reg = getRegForValue(V);
@@ -847,6 +859,13 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
     Args.push_back(Reg);
   }
 
+  unsigned CalleeReg = 0;
+  if (!IsDirect) {
+    CalleeReg = getRegForValue(Call->getCalledValue());
+    if (!CalleeReg)
+      return false;
+  }
+
   auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
 
   if (!IsVoid)
@@ -854,12 +873,8 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
 
   if (IsDirect)
     MIB.addGlobalAddress(Func);
-  else {
-    unsigned Reg = getRegForValue(Call->getCalledValue());
-    if (Reg == 0)
-      return false;
-    MIB.addReg(Reg);
-  }
+  else
+    MIB.addReg(CalleeReg);
 
   for (unsigned ArgReg : Args)
     MIB.addReg(ArgReg);
@@ -870,7 +885,7 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
 }
 
 bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
-  const SelectInst *Select = cast<SelectInst>(I);
+  const auto *Select = cast<SelectInst>(I);
 
   bool Not;
   unsigned CondReg = getRegForI1Value(Select->getCondition(), Not);
@@ -910,9 +925,9 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
     Opc = WebAssembly::SELECT_F64;
     RC = &WebAssembly::F64RegClass;
     break;
-  case MVT::ExceptRef:
-    Opc = WebAssembly::SELECT_EXCEPT_REF;
-    RC = &WebAssembly::EXCEPT_REFRegClass;
+  case MVT::exnref:
+    Opc = WebAssembly::SELECT_EXNREF;
+    RC = &WebAssembly::EXNREFRegClass;
     break;
   default:
     return false;
@@ -929,7 +944,7 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
 }
 
 bool WebAssemblyFastISel::selectTrunc(const Instruction *I) {
-  const TruncInst *Trunc = cast<TruncInst>(I);
+  const auto *Trunc = cast<TruncInst>(I);
 
   unsigned Reg = getRegForValue(Trunc->getOperand(0));
   if (Reg == 0)
@@ -948,7 +963,7 @@ bool WebAssemblyFastISel::selectTrunc(const Instruction *I) {
 }
 
 bool WebAssemblyFastISel::selectZExt(const Instruction *I) {
-  const ZExtInst *ZExt = cast<ZExtInst>(I);
+  const auto *ZExt = cast<ZExtInst>(I);
 
   const Value *Op = ZExt->getOperand(0);
   MVT::SimpleValueType From = getSimpleType(Op->getType());
@@ -965,7 +980,7 @@ bool WebAssemblyFastISel::selectZExt(const Instruction *I) {
 }
 
 bool WebAssemblyFastISel::selectSExt(const Instruction *I) {
-  const SExtInst *SExt = cast<SExtInst>(I);
+  const auto *SExt = cast<SExtInst>(I);
 
   const Value *Op = SExt->getOperand(0);
   MVT::SimpleValueType From = getSimpleType(Op->getType());
@@ -982,11 +997,11 @@ bool WebAssemblyFastISel::selectSExt(const Instruction *I) {
 }
 
 bool WebAssemblyFastISel::selectICmp(const Instruction *I) {
-  const ICmpInst *ICmp = cast<ICmpInst>(I);
+  const auto *ICmp = cast<ICmpInst>(I);
 
   bool I32 = getSimpleType(ICmp->getOperand(0)->getType()) != MVT::i64;
   unsigned Opc;
-  bool isSigned = false;
+  bool IsSigned = false;
   switch (ICmp->getPredicate()) {
   case ICmpInst::ICMP_EQ:
     Opc = I32 ? WebAssembly::EQ_I32 : WebAssembly::EQ_I64;
@@ -1008,29 +1023,29 @@ bool WebAssemblyFastISel::selectICmp(const Instruction *I) {
     break;
   case ICmpInst::ICMP_SGT:
     Opc = I32 ? WebAssembly::GT_S_I32 : WebAssembly::GT_S_I64;
-    isSigned = true;
+    IsSigned = true;
     break;
   case ICmpInst::ICMP_SGE:
     Opc = I32 ? WebAssembly::GE_S_I32 : WebAssembly::GE_S_I64;
-    isSigned = true;
+    IsSigned = true;
     break;
   case ICmpInst::ICMP_SLT:
     Opc = I32 ? WebAssembly::LT_S_I32 : WebAssembly::LT_S_I64;
-    isSigned = true;
+    IsSigned = true;
     break;
   case ICmpInst::ICMP_SLE:
     Opc = I32 ? WebAssembly::LE_S_I32 : WebAssembly::LE_S_I64;
-    isSigned = true;
+    IsSigned = true;
     break;
   default:
     return false;
   }
 
-  unsigned LHS = getRegForPromotedValue(ICmp->getOperand(0), isSigned);
+  unsigned LHS = getRegForPromotedValue(ICmp->getOperand(0), IsSigned);
   if (LHS == 0)
     return false;
 
-  unsigned RHS = getRegForPromotedValue(ICmp->getOperand(1), isSigned);
+  unsigned RHS = getRegForPromotedValue(ICmp->getOperand(1), IsSigned);
   if (RHS == 0)
     return false;
 
@@ -1043,7 +1058,7 @@ bool WebAssemblyFastISel::selectICmp(const Instruction *I) {
 }
 
 bool WebAssemblyFastISel::selectFCmp(const Instruction *I) {
-  const FCmpInst *FCmp = cast<FCmpInst>(I);
+  const auto *FCmp = cast<FCmpInst>(I);
 
   unsigned LHS = getRegForValue(FCmp->getOperand(0));
   if (LHS == 0)
@@ -1139,7 +1154,7 @@ bool WebAssemblyFastISel::selectBitCast(const Instruction *I) {
 }
 
 bool WebAssemblyFastISel::selectLoad(const Instruction *I) {
-  const LoadInst *Load = cast<LoadInst>(I);
+  const auto *Load = cast<LoadInst>(I);
   if (Load->isAtomic())
     return false;
   if (!Subtarget->hasSIMD128() && Load->getType()->isVectorTy())
@@ -1196,7 +1211,7 @@ bool WebAssemblyFastISel::selectLoad(const Instruction *I) {
 }
 
 bool WebAssemblyFastISel::selectStore(const Instruction *I) {
-  const StoreInst *Store = cast<StoreInst>(I);
+  const auto *Store = cast<StoreInst>(I);
   if (Store->isAtomic())
     return false;
   if (!Subtarget->hasSIMD128() &&
@@ -1252,7 +1267,7 @@ bool WebAssemblyFastISel::selectStore(const Instruction *I) {
 }
 
 bool WebAssemblyFastISel::selectBr(const Instruction *I) {
-  const BranchInst *Br = cast<BranchInst>(I);
+  const auto *Br = cast<BranchInst>(I);
   if (Br->isUnconditional()) {
     MachineBasicBlock *MSucc = FuncInfo.MBBMap[Br->getSuccessor(0)];
     fastEmitBranch(MSucc, Br->getDebugLoc());
@@ -1283,7 +1298,7 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
   if (!FuncInfo.CanLowerReturn)
     return false;
 
-  const ReturnInst *Ret = cast<ReturnInst>(I);
+  const auto *Ret = cast<ReturnInst>(I);
 
   if (Ret->getNumOperands() == 0) {
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1330,8 +1345,8 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
   case MVT::v2f64:
     Opc = WebAssembly::RETURN_v2f64;
     break;
-  case MVT::ExceptRef:
-    Opc = WebAssembly::RETURN_EXCEPT_REF;
+  case MVT::exnref:
+    Opc = WebAssembly::RETURN_EXNREF;
     break;
   default:
     return false;
diff --git a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
index 1a416520f97d..b7fc65401fc4 100644
--- a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyFixFunctionBitcasts.cpp - Fix function bitcasts --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -36,11 +35,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "wasm-fix-function-bitcasts"
 
-static cl::opt<bool>
-    TemporaryWorkarounds("wasm-temporary-workarounds",
-                         cl::desc("Apply certain temporary workarounds"),
-                         cl::init(true), cl::Hidden);
-
 namespace {
 class FixFunctionBitcasts final : public ModulePass {
   StringRef getPassName() const override {
@@ -70,12 +64,12 @@ ModulePass *llvm::createWebAssemblyFixFunctionBitcasts() {
 
 // Recursively descend the def-use lists from V to find non-bitcast users of
 // bitcasts of V.
-static void FindUses(Value *V, Function &F,
+static void findUses(Value *V, Function &F,
                      SmallVectorImpl<std::pair<Use *, Function *>> &Uses,
                      SmallPtrSetImpl<Constant *> &ConstantBCs) {
   for (Use &U : V->uses()) {
-    if (BitCastOperator *BC = dyn_cast<BitCastOperator>(U.getUser()))
-      FindUses(BC, F, Uses, ConstantBCs);
+    if (auto *BC = dyn_cast<BitCastOperator>(U.getUser()))
+      findUses(BC, F, Uses, ConstantBCs);
     else if (U.get()->getType() != F.getType()) {
       CallSite CS(U.getUser());
       if (!CS)
@@ -87,8 +81,8 @@ static void FindUses(Value *V, Function &F,
         continue;
       if (isa<Constant>(U.get())) {
         // Only add constant bitcasts to the list once; they get RAUW'd
-        auto c = ConstantBCs.insert(cast<Constant>(U.get()));
-        if (!c.second)
+        auto C = ConstantBCs.insert(cast<Constant>(U.get()));
+        if (!C.second)
           continue;
       }
       Uses.push_back(std::make_pair(&U, &F));
@@ -119,7 +113,7 @@ static void FindUses(Value *V, Function &F,
 // For bitcasts that involve struct types we don't know at this stage if they
 // would be equivalent at the wasm level and so we can't know if we need to
 // generate a wrapper.
-static Function *CreateWrapper(Function *F, FunctionType *Ty) {
+static Function *createWrapper(Function *F, FunctionType *Ty) {
   Module *M = F->getParent();
 
   Function *Wrapper = Function::Create(Ty, Function::PrivateLinkage,
@@ -157,11 +151,11 @@ static Function *CreateWrapper(Function *F, FunctionType *Ty) {
         BB->getInstList().push_back(PtrCast);
         Args.push_back(PtrCast);
       } else if (ArgType->isStructTy() || ParamType->isStructTy()) {
-        LLVM_DEBUG(dbgs() << "CreateWrapper: struct param type in bitcast: "
+        LLVM_DEBUG(dbgs() << "createWrapper: struct param type in bitcast: "
                           << F->getName() << "\n");
         WrapperNeeded = false;
       } else {
-        LLVM_DEBUG(dbgs() << "CreateWrapper: arg type mismatch calling: "
+        LLVM_DEBUG(dbgs() << "createWrapper: arg type mismatch calling: "
                           << F->getName() << "\n");
         LLVM_DEBUG(dbgs() << "Arg[" << Args.size() << "] Expected: "
                           << *ParamType << " Got: " << *ArgType << "\n");
@@ -197,11 +191,11 @@ static Function *CreateWrapper(Function *F, FunctionType *Ty) {
       BB->getInstList().push_back(Cast);
       ReturnInst::Create(M->getContext(), Cast, BB);
     } else if (RtnType->isStructTy() || ExpectedRtnType->isStructTy()) {
-      LLVM_DEBUG(dbgs() << "CreateWrapper: struct return type in bitcast: "
+      LLVM_DEBUG(dbgs() << "createWrapper: struct return type in bitcast: "
                         << F->getName() << "\n");
       WrapperNeeded = false;
     } else {
-      LLVM_DEBUG(dbgs() << "CreateWrapper: return type mismatch calling: "
+      LLVM_DEBUG(dbgs() << "createWrapper: return type mismatch calling: "
                         << F->getName() << "\n");
       LLVM_DEBUG(dbgs() << "Expected: " << *ExpectedRtnType
                         << " Got: " << *RtnType << "\n");
@@ -218,15 +212,26 @@ static Function *CreateWrapper(Function *F, FunctionType *Ty) {
     new UnreachableInst(M->getContext(), BB);
     Wrapper->setName(F->getName() + "_bitcast_invalid");
   } else if (!WrapperNeeded) {
-    LLVM_DEBUG(dbgs() << "CreateWrapper: no wrapper needed: " << F->getName()
+    LLVM_DEBUG(dbgs() << "createWrapper: no wrapper needed: " << F->getName()
                       << "\n");
     Wrapper->eraseFromParent();
     return nullptr;
   }
-  LLVM_DEBUG(dbgs() << "CreateWrapper: " << F->getName() << "\n");
+  LLVM_DEBUG(dbgs() << "createWrapper: " << F->getName() << "\n");
   return Wrapper;
 }
 
+// Test whether a main function with type FuncTy should be rewritten to have
+// type MainTy.
+static bool shouldFixMainFunction(FunctionType *FuncTy, FunctionType *MainTy) {
+  // Only fix the main function if it's the standard zero-arg form. That way,
+  // the standard cases will work as expected, and users will see signature
+  // mismatches from the linker for non-standard cases.
+  return FuncTy->getReturnType() == MainTy->getReturnType() &&
+         FuncTy->getNumParams() == 0 &&
+         !FuncTy->isVarArg();
+}
+
 bool FixFunctionBitcasts::runOnModule(Module &M) {
   LLVM_DEBUG(dbgs() << "********** Fix Function Bitcasts **********\n");
 
@@ -237,27 +242,27 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
 
   // Collect all the places that need wrappers.
   for (Function &F : M) {
-    FindUses(&F, F, Uses, ConstantBCs);
+    findUses(&F, F, Uses, ConstantBCs);
 
     // If we have a "main" function, and its type isn't
     // "int main(int argc, char *argv[])", create an artificial call with it
     // bitcasted to that type so that we generate a wrapper for it, so that
     // the C runtime can call it.
-    if (!TemporaryWorkarounds && !F.isDeclaration() && F.getName() == "main") {
+    if (F.getName() == "main") {
       Main = &F;
       LLVMContext &C = M.getContext();
       Type *MainArgTys[] = {Type::getInt32Ty(C),
                             PointerType::get(Type::getInt8PtrTy(C), 0)};
       FunctionType *MainTy = FunctionType::get(Type::getInt32Ty(C), MainArgTys,
                                                /*isVarArg=*/false);
-      if (F.getFunctionType() != MainTy) {
+      if (shouldFixMainFunction(F.getFunctionType(), MainTy)) {
         LLVM_DEBUG(dbgs() << "Found `main` function with incorrect type: "
                           << *F.getFunctionType() << "\n");
         Value *Args[] = {UndefValue::get(MainArgTys[0]),
                          UndefValue::get(MainArgTys[1])};
         Value *Casted =
             ConstantExpr::getBitCast(Main, PointerType::get(MainTy, 0));
-        CallMain = CallInst::Create(Casted, Args, "call_main");
+        CallMain = CallInst::Create(MainTy, Casted, Args, "call_main");
         Use *UseMain = &CallMain->getOperandUse(2);
         Uses.push_back(std::make_pair(UseMain, &F));
       }
@@ -269,8 +274,8 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
   for (auto &UseFunc : Uses) {
     Use *U = UseFunc.first;
     Function *F = UseFunc.second;
-    PointerType *PTy = cast<PointerType>(U->get()->getType());
-    FunctionType *Ty = dyn_cast<FunctionType>(PTy->getElementType());
+    auto *PTy = cast<PointerType>(U->get()->getType());
+    auto *Ty = dyn_cast<FunctionType>(PTy->getElementType());
 
     // If the function is casted to something like i8* as a "generic pointer"
     // to be later casted to something else, we can't generate a wrapper for it.
@@ -280,7 +285,7 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
 
     auto Pair = Wrappers.insert(std::make_pair(std::make_pair(F, Ty), nullptr));
     if (Pair.second)
-      Pair.first->second = CreateWrapper(F, Ty);
+      Pair.first->second = createWrapper(F, Ty);
 
     Function *Wrapper = Pair.first->second;
     if (!Wrapper)
@@ -296,14 +301,20 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
   // one that gets called from startup.
   if (CallMain) {
     Main->setName("__original_main");
-    Function *MainWrapper =
+    auto *MainWrapper =
         cast<Function>(CallMain->getCalledValue()->stripPointerCasts());
-    MainWrapper->setName("main");
-    MainWrapper->setLinkage(Main->getLinkage());
-    MainWrapper->setVisibility(Main->getVisibility());
-    Main->setLinkage(Function::PrivateLinkage);
-    Main->setVisibility(Function::DefaultVisibility);
     delete CallMain;
+    if (Main->isDeclaration()) {
+      // The wrapper is not needed in this case as we don't need to export
+      // it to anyone else.
+      MainWrapper->eraseFromParent();
+    } else {
+      // Otherwise give the wrapper the same linkage as the original main
+      // function, so that it can be called from the same places.
+      MainWrapper->setName("main");
+      MainWrapper->setLinkage(Main->getLinkage());
+      MainWrapper->setVisibility(Main->getVisibility());
+    }
   }
 
   return true;
diff --git a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
index 108f2879a071..7d8e86d9b2c0 100644
--- a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
@@ -1,46 +1,48 @@
 //=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file implements a pass that transforms irreducible control flow into
-/// reducible control flow. Irreducible control flow means multiple-entry
-/// loops; they appear as CFG cycles that are not recorded in MachineLoopInfo
-/// due to being unnatural.
+/// This file implements a pass that removes irreducible control flow.
+/// Irreducible control flow means multiple-entry loops, which this pass
+/// transforms to have a single entry.
 ///
 /// Note that LLVM has a generic pass that lowers irreducible control flow, but
 /// it linearizes control flow, turning diamonds into two triangles, which is
 /// both unnecessary and undesirable for WebAssembly.
 ///
-/// The big picture: Ignoring natural loops (seeing them monolithically), we
-/// find all the blocks which can return to themselves ("loopers"). Loopers
-/// reachable from the non-loopers are loop entries: if there are 2 or more,
-/// then we have irreducible control flow. We fix that as follows: a new block
-/// is created that can dispatch to each of the loop entries, based on the
-/// value of a label "helper" variable, and we replace direct branches to the
-/// entries with assignments to the label variable and a branch to the dispatch
-/// block. Then the dispatch block is the single entry in a new natural loop.
+/// The big picture: We recursively process each "region", defined as a group
+/// of blocks with a single entry and no branches back to that entry. A region
+/// may be the entire function body, or the inner part of a loop, i.e., the
+/// loop's body without branches back to the loop entry. In each region we fix
+/// up multi-entry loops by adding a new block that can dispatch to each of the
+/// loop entries, based on the value of a label "helper" variable, and we
+/// replace direct branches to the entries with assignments to the label
+/// variable and a branch to the dispatch block. Then the dispatch block is the
+/// single entry in the loop containing the previous multiple entries. After
+/// ensuring all the loops in a region are reducible, we recurse into them. The
+/// total time complexity of this pass is:
+///
+///   O(NumBlocks * NumNestedLoops * NumIrreducibleLoops +
+///     NumLoops * NumLoops)
 ///
-/// This is similar to what the Relooper [1] does, both identify looping code
-/// that requires multiple entries, and resolve it in a similar way. In
-/// Relooper terminology, we implement a Multiple shape in a Loop shape. Note
+/// This pass is similar to what the Relooper [1] does. Both identify looping
+/// code that requires multiple entries, and resolve it in a similar way (in
+/// Relooper terminology, we implement a Multiple shape in a Loop shape). Note
 /// also that like the Relooper, we implement a "minimal" intervention: we only
 /// use the "label" helper for the blocks we absolutely must and no others. We
-/// also prioritize code size and do not perform node splitting (i.e. we don't
-/// duplicate code in order to resolve irreducibility).
+/// also prioritize code size and do not duplicate code in order to resolve
+/// irreducibility. The graph algorithms for finding loops and entries and so
+/// forth are also similar to the Relooper. The main differences between this
+/// pass and the Relooper are:
 ///
-/// The difference between this code and the Relooper is that the Relooper also
-/// generates ifs and loops and works in a recursive manner, knowing at each
-/// point what the entries are, and recursively breaks down the problem. Here
-/// we just want to resolve irreducible control flow, and we also want to use
-/// as much LLVM infrastructure as possible. So we use the MachineLoopInfo to
-/// identify natural loops, etc., and we start with the whole CFG and must
-/// identify both the looping code and its entries.
+///  * We just care about irreducibility, so we just look at loops.
+///  * The Relooper emits structured control flow (with ifs etc.), while we
+///    emit a CFG.
 ///
 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
 /// Proceedings of the ACM international conference companion on Object oriented
@@ -52,200 +54,277 @@
 
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "WebAssembly.h"
-#include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
-#include "llvm/ADT/PriorityQueue.h"
-#include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
 
 namespace {
 
-class LoopFixer {
+using BlockVector = SmallVector<MachineBasicBlock *, 4>;
+using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
+
+// Calculates reachability in a region. Ignores branches to blocks outside of
+// the region, and ignores branches to the region entry (for the case where
+// the region is the inner part of a loop).
+class ReachabilityGraph {
 public:
-  LoopFixer(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop)
-      : MF(MF), MLI(MLI), Loop(Loop) {}
+  ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks)
+      : Entry(Entry), Blocks(Blocks) {
+#ifndef NDEBUG
+    // The region must have a single entry.
+    for (auto *MBB : Blocks) {
+      if (MBB != Entry) {
+        for (auto *Pred : MBB->predecessors()) {
+          assert(inRegion(Pred));
+        }
+      }
+    }
+#endif
+    calculate();
+  }
+
+  bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) const {
+    assert(inRegion(From) && inRegion(To));
+    auto I = Reachable.find(From);
+    if (I == Reachable.end())
+      return false;
+    return I->second.count(To);
+  }
+
+  // "Loopers" are blocks that are in a loop. We detect these by finding blocks
+  // that can reach themselves.
+  const BlockSet &getLoopers() const { return Loopers; }
+
+  // Get all blocks that are loop entries.
+  const BlockSet &getLoopEntries() const { return LoopEntries; }
 
-  // Run the fixer on the given inputs. Returns whether changes were made.
-  bool run();
+  // Get all blocks that enter a particular loop from outside.
+  const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) const {
+    assert(inRegion(LoopEntry));
+    auto I = LoopEnterers.find(LoopEntry);
+    assert(I != LoopEnterers.end());
+    return I->second;
+  }
 
 private:
-  MachineFunction &MF;
-  MachineLoopInfo &MLI;
-  MachineLoop *Loop;
+  MachineBasicBlock *Entry;
+  const BlockSet &Blocks;
+
+  BlockSet Loopers, LoopEntries;
+  DenseMap<MachineBasicBlock *, BlockSet> LoopEnterers;
 
-  MachineBasicBlock *Header;
-  SmallPtrSet<MachineBasicBlock *, 4> LoopBlocks;
+  bool inRegion(MachineBasicBlock *MBB) const { return Blocks.count(MBB); }
 
-  using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
+  // Maps a block to all the other blocks it can reach.
   DenseMap<MachineBasicBlock *, BlockSet> Reachable;
 
-  // The worklist contains pairs of recent additions, (a, b), where we just
-  // added a link a => b.
-  using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
-  SmallVector<BlockPair, 4> WorkList;
-
-  // Get a canonical block to represent a block or a loop: the block, or if in
-  // an inner loop, the loop header, of it in an outer loop scope, we can
-  // ignore it. We need to call this on all blocks we work on.
-  MachineBasicBlock *canonicalize(MachineBasicBlock *MBB) {
-    MachineLoop *InnerLoop = MLI.getLoopFor(MBB);
-    if (InnerLoop == Loop) {
-      return MBB;
-    } else {
-      // This is either in an outer or an inner loop, and not in ours.
-      if (!LoopBlocks.count(MBB)) {
-        // It's in outer code, ignore it.
-        return nullptr;
+  void calculate() {
+    // Reachability computation work list. Contains pairs of recent additions
+    // (A, B) where we just added a link A => B.
+    using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
+    SmallVector<BlockPair, 4> WorkList;
+
+    // Add all relevant direct branches.
+    for (auto *MBB : Blocks) {
+      for (auto *Succ : MBB->successors()) {
+        if (Succ != Entry && inRegion(Succ)) {
+          Reachable[MBB].insert(Succ);
+          WorkList.emplace_back(MBB, Succ);
+        }
       }
-      assert(InnerLoop);
-      // It's in an inner loop, canonicalize it to the header of that loop.
-      return InnerLoop->getHeader();
     }
-  }
 
-  // For a successor we can additionally ignore it if it's a branch back to a
-  // natural loop top, as when we are in the scope of a loop, we just care
-  // about internal irreducibility, and can ignore the loop we are in. We need
-  // to call this on all blocks in a context where they are a successor.
-  MachineBasicBlock *canonicalizeSuccessor(MachineBasicBlock *MBB) {
-    if (Loop && MBB == Loop->getHeader()) {
-      // Ignore branches going to the loop's natural header.
-      return nullptr;
+    while (!WorkList.empty()) {
+      MachineBasicBlock *MBB, *Succ;
+      std::tie(MBB, Succ) = WorkList.pop_back_val();
+      assert(inRegion(MBB) && Succ != Entry && inRegion(Succ));
+      if (MBB != Entry) {
+        // We recently added MBB => Succ, and that means we may have enabled
+        // Pred => MBB => Succ.
+        for (auto *Pred : MBB->predecessors()) {
+          if (Reachable[Pred].insert(Succ).second) {
+            WorkList.emplace_back(Pred, Succ);
+          }
+        }
+      }
     }
-    return canonicalize(MBB);
-  }
 
-  // Potentially insert a new reachable edge, and if so, note it as further
-  // work.
-  void maybeInsert(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
-    assert(MBB == canonicalize(MBB));
-    assert(Succ);
-    // Succ may not be interesting as a sucessor.
-    Succ = canonicalizeSuccessor(Succ);
-    if (!Succ)
-      return;
-    if (Reachable[MBB].insert(Succ).second) {
-      // For there to be further work, it means that we have
-      //   X => MBB => Succ
-      // for some other X, and in that case X => Succ would be a new edge for
-      // us to discover later. However, if we don't care about MBB as a
-      // successor, then we don't care about that anyhow.
-      if (canonicalizeSuccessor(MBB)) {
-        WorkList.emplace_back(MBB, Succ);
+    // Blocks that can return to themselves are in a loop.
+    for (auto *MBB : Blocks) {
+      if (canReach(MBB, MBB)) {
+        Loopers.insert(MBB);
+      }
+    }
+    assert(!Loopers.count(Entry));
+
+    // Find the loop entries - loopers reachable from blocks not in that loop -
+    // and those outside blocks that reach them, the "loop enterers".
+    for (auto *Looper : Loopers) {
+      for (auto *Pred : Looper->predecessors()) {
+        // Pred can reach Looper. If Looper can reach Pred, it is in the loop;
+        // otherwise, it is a block that enters into the loop.
+        if (!canReach(Looper, Pred)) {
+          LoopEntries.insert(Looper);
+          LoopEnterers[Looper].insert(Pred);
+        }
       }
     }
   }
 };
 
-bool LoopFixer::run() {
-  Header = Loop ? Loop->getHeader() : &*MF.begin();
-
-  // Identify all the blocks in this loop scope.
-  if (Loop) {
-    for (auto *MBB : Loop->getBlocks()) {
-      LoopBlocks.insert(MBB);
-    }
-  } else {
-    for (auto &MBB : MF) {
-      LoopBlocks.insert(&MBB);
-    }
+// Finds the blocks in a single-entry loop, given the loop entry and the
+// list of blocks that enter the loop.
+class LoopBlocks {
+public:
+  LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers)
+      : Entry(Entry), Enterers(Enterers) {
+    calculate();
   }
 
-  // Compute which (canonicalized) blocks each block can reach.
-
-  // Add all the initial work.
-  for (auto *MBB : LoopBlocks) {
-    MachineLoop *InnerLoop = MLI.getLoopFor(MBB);
+  BlockSet &getBlocks() { return Blocks; }
 
-    if (InnerLoop == Loop) {
-      for (auto *Succ : MBB->successors()) {
-        maybeInsert(MBB, Succ);
-      }
-    } else {
-      // It can't be in an outer loop - we loop on LoopBlocks - and so it must
-      // be an inner loop.
-      assert(InnerLoop);
-      // Check if we are the canonical block for this loop.
-      if (canonicalize(MBB) != MBB) {
-        continue;
-      }
-      // The successors are those of the loop.
-      SmallVector<MachineBasicBlock *, 2> ExitBlocks;
-      InnerLoop->getExitBlocks(ExitBlocks);
-      for (auto *Succ : ExitBlocks) {
-        maybeInsert(MBB, Succ);
+private:
+  MachineBasicBlock *Entry;
+  const BlockSet &Enterers;
+
+  BlockSet Blocks;
+
+  void calculate() {
+    // Going backwards from the loop entry, if we ignore the blocks entering
+    // from outside, we will traverse all the blocks in the loop.
+    BlockVector WorkList;
+    BlockSet AddedToWorkList;
+    Blocks.insert(Entry);
+    for (auto *Pred : Entry->predecessors()) {
+      if (!Enterers.count(Pred)) {
+        WorkList.push_back(Pred);
+        AddedToWorkList.insert(Pred);
       }
     }
-  }
 
-  // Do work until we are all done.
-  while (!WorkList.empty()) {
-    MachineBasicBlock *MBB;
-    MachineBasicBlock *Succ;
-    std::tie(MBB, Succ) = WorkList.pop_back_val();
-    // The worklist item is an edge we just added, so it must have valid blocks
-    // (and not something canonicalized to nullptr).
-    assert(MBB);
-    assert(Succ);
-    // The successor in that pair must also be a valid successor.
-    assert(MBB == canonicalizeSuccessor(MBB));
-    // We recently added MBB => Succ, and that means we may have enabled
-    // Pred => MBB => Succ. Check all the predecessors. Note that our loop here
-    // is correct for both a block and a block representing a loop, as the loop
-    // is natural and so the predecessors are all predecessors of the loop
-    // header, which is the block we have here.
-    for (auto *Pred : MBB->predecessors()) {
-      // Canonicalize, make sure it's relevant, and check it's not the same
-      // block (an update to the block itself doesn't help compute that same
-      // block).
-      Pred = canonicalize(Pred);
-      if (Pred && Pred != MBB) {
-        maybeInsert(Pred, Succ);
+    while (!WorkList.empty()) {
+      auto *MBB = WorkList.pop_back_val();
+      assert(!Enterers.count(MBB));
+      if (Blocks.insert(MBB).second) {
+        for (auto *Pred : MBB->predecessors()) {
+          if (!AddedToWorkList.count(Pred)) {
+            WorkList.push_back(Pred);
+            AddedToWorkList.insert(Pred);
+          }
+        }
       }
     }
   }
+};
 
-  // It's now trivial to identify the loopers.
-  SmallPtrSet<MachineBasicBlock *, 4> Loopers;
-  for (auto MBB : LoopBlocks) {
-    if (Reachable[MBB].count(MBB)) {
-      Loopers.insert(MBB);
-    }
+class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
+  StringRef getPassName() const override {
+    return "WebAssembly Fix Irreducible Control Flow";
   }
-  // The header cannot be a looper. At the toplevel, LLVM does not allow the
-  // entry to be in a loop, and in a natural loop we should ignore the header.
-  assert(Loopers.count(Header) == 0);
-
-  // Find the entries, loopers reachable from non-loopers.
-  SmallPtrSet<MachineBasicBlock *, 4> Entries;
-  SmallVector<MachineBasicBlock *, 4> SortedEntries;
-  for (auto *Looper : Loopers) {
-    for (auto *Pred : Looper->predecessors()) {
-      Pred = canonicalize(Pred);
-      if (Pred && !Loopers.count(Pred)) {
-        Entries.insert(Looper);
-        SortedEntries.push_back(Looper);
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks,
+                     MachineFunction &MF);
+
+  void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks,
+                           MachineFunction &MF, const ReachabilityGraph &Graph);
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
+};
+
+bool WebAssemblyFixIrreducibleControlFlow::processRegion(
+    MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) {
+  bool Changed = false;
+
+  // Remove irreducibility before processing child loops, which may take
+  // multiple iterations.
+  while (true) {
+    ReachabilityGraph Graph(Entry, Blocks);
+
+    bool FoundIrreducibility = false;
+
+    for (auto *LoopEntry : Graph.getLoopEntries()) {
+      // Find mutual entries - all entries which can reach this one, and
+      // are reached by it (that always includes LoopEntry itself). All mutual
+      // entries must be in the same loop, so if we have more than one, then we
+      // have irreducible control flow.
+      //
+      // Note that irreducibility may involve inner loops, e.g. imagine A
+      // starts one loop, and it has B inside it which starts an inner loop.
+      // If we add a branch from all the way on the outside to B, then in a
+      // sense B is no longer an "inner" loop, semantically speaking. We will
+      // fix that irreducibility by adding a block that dispatches to either
+      // either A or B, so B will no longer be an inner loop in our output.
+      // (A fancier approach might try to keep it as such.)
+      //
+      // Note that we still need to recurse into inner loops later, to handle
+      // the case where the irreducibility is entirely nested - we would not
+      // be able to identify that at this point, since the enclosing loop is
+      // a group of blocks all of whom can reach each other. (We'll see the
+      // irreducibility after removing branches to the top of that enclosing
+      // loop.)
+      BlockSet MutualLoopEntries;
+      MutualLoopEntries.insert(LoopEntry);
+      for (auto *OtherLoopEntry : Graph.getLoopEntries()) {
+        if (OtherLoopEntry != LoopEntry &&
+            Graph.canReach(LoopEntry, OtherLoopEntry) &&
+            Graph.canReach(OtherLoopEntry, LoopEntry)) {
+          MutualLoopEntries.insert(OtherLoopEntry);
+        }
+      }
+
+      if (MutualLoopEntries.size() > 1) {
+        makeSingleEntryLoop(MutualLoopEntries, Blocks, MF, Graph);
+        FoundIrreducibility = true;
+        Changed = true;
         break;
       }
     }
+    // Only go on to actually process the inner loops when we are done
+    // removing irreducible control flow and changing the graph. Modifying
+    // the graph as we go is possible, and that might let us avoid looking at
+    // the already-fixed loops again if we are careful, but all that is
+    // complex and bug-prone. Since irreducible loops are rare, just starting
+    // another iteration is best.
+    if (FoundIrreducibility) {
+      continue;
+    }
+
+    for (auto *LoopEntry : Graph.getLoopEntries()) {
+      LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry));
+      // Each of these calls to processRegion may change the graph, but are
+      // guaranteed not to interfere with each other. The only changes we make
+      // to the graph are to add blocks on the way to a loop entry. As the
+      // loops are disjoint, that means we may only alter branches that exit
+      // another loop, which are ignored when recursing into that other loop
+      // anyhow.
+      if (processRegion(LoopEntry, InnerBlocks.getBlocks(), MF)) {
+        Changed = true;
+      }
+    }
+
+    return Changed;
   }
+}
 
-  // Check if we found irreducible control flow.
-  if (LLVM_LIKELY(Entries.size() <= 1))
-    return false;
+// Given a set of entries to a single loop, create a single entry for that
+// loop by creating a dispatch block for them, routing control flow using
+// a helper variable. Also updates Blocks with any new blocks created, so
+// that we properly track all the blocks in the region. But this does not update
+// ReachabilityGraph; this will be updated in the caller of this function as
+// needed.
+void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
+    BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF,
+    const ReachabilityGraph &Graph) {
+  assert(Entries.size() >= 2);
 
   // Sort the entries to ensure a deterministic build.
+  BlockVector SortedEntries(Entries.begin(), Entries.end());
   llvm::sort(SortedEntries,
              [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
                auto ANum = A->getNumber();
@@ -257,8 +336,8 @@ bool LoopFixer::run() {
   for (auto Block : SortedEntries)
     assert(Block->getNumber() != -1);
   if (SortedEntries.size() > 1) {
-    for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1;
-         I != E; ++I) {
+    for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E;
+         ++I) {
       auto ANum = (*I)->getNumber();
       auto BNum = (*(std::next(I)))->getNumber();
       assert(ANum != BNum);
@@ -269,12 +348,12 @@ bool LoopFixer::run() {
   // Create a dispatch block which will contain a jump table to the entries.
   MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
   MF.insert(MF.end(), Dispatch);
-  MLI.changeLoopFor(Dispatch, Loop);
+  Blocks.insert(Dispatch);
 
   // Add the jump table.
   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
-  MachineInstrBuilder MIB = BuildMI(*Dispatch, Dispatch->end(), DebugLoc(),
-                                    TII.get(WebAssembly::BR_TABLE_I32));
+  MachineInstrBuilder MIB =
+      BuildMI(Dispatch, DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32));
 
   // Add the register which will be used to tell the jump table which block to
   // jump to.
@@ -285,112 +364,110 @@ bool LoopFixer::run() {
   // Compute the indices in the superheader, one for each bad block, and
   // add them as successors.
   DenseMap<MachineBasicBlock *, unsigned> Indices;
-  for (auto *MBB : SortedEntries) {
-    auto Pair = Indices.insert(std::make_pair(MBB, 0));
-    if (!Pair.second) {
-      continue;
-    }
+  for (auto *Entry : SortedEntries) {
+    auto Pair = Indices.insert(std::make_pair(Entry, 0));
+    assert(Pair.second);
 
     unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
     Pair.first->second = Index;
 
-    MIB.addMBB(MBB);
-    Dispatch->addSuccessor(MBB);
+    MIB.addMBB(Entry);
+    Dispatch->addSuccessor(Entry);
   }
 
-  // Rewrite the problematic successors for every block that wants to reach the
-  // bad blocks. For simplicity, we just introduce a new block for every edge
-  // we need to rewrite. (Fancier things are possible.)
+  // Rewrite the problematic successors for every block that wants to reach
+  // the bad blocks. For simplicity, we just introduce a new block for every
+  // edge we need to rewrite. (Fancier things are possible.)
 
-  SmallVector<MachineBasicBlock *, 4> AllPreds;
-  for (auto *MBB : SortedEntries) {
-    for (auto *Pred : MBB->predecessors()) {
+  BlockVector AllPreds;
+  for (auto *Entry : SortedEntries) {
+    for (auto *Pred : Entry->predecessors()) {
       if (Pred != Dispatch) {
         AllPreds.push_back(Pred);
       }
     }
   }
 
-  for (MachineBasicBlock *MBB : AllPreds) {
-    DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map;
-    for (auto *Succ : MBB->successors()) {
-      if (!Entries.count(Succ)) {
+  // This set stores predecessors within this loop.
+  DenseSet<MachineBasicBlock *> InLoop;
+  for (auto *Pred : AllPreds) {
+    for (auto *Entry : Pred->successors()) {
+      if (!Entries.count(Entry))
         continue;
+      if (Graph.canReach(Entry, Pred)) {
+        InLoop.insert(Pred);
+        break;
       }
+    }
+  }
+
+  // Record if each entry has a layout predecessor. This map stores
+  // <<Predecessor is within the loop?, loop entry>, layout predecessor>
+  std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *>
+      EntryToLayoutPred;
+  for (auto *Pred : AllPreds)
+    for (auto *Entry : Pred->successors())
+      if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry))
+        EntryToLayoutPred[std::make_pair(InLoop.count(Pred), Entry)] = Pred;
+
+  // We need to create at most two routing blocks per entry: one for
+  // predecessors outside the loop and one for predecessors inside the loop.
+  // This map stores
+  // <<Predecessor is within the loop?, loop entry>, routing block>
+  std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *> Map;
+  for (auto *Pred : AllPreds) {
+    bool PredInLoop = InLoop.count(Pred);
+    for (auto *Entry : Pred->successors()) {
+      if (!Entries.count(Entry) ||
+          Map.count(std::make_pair(InLoop.count(Pred), Entry)))
+        continue;
+      // If there exists a layout predecessor of this entry and this predecessor
+      // is not that, we rather create a routing block after that layout
+      // predecessor to save a branch.
+      if (EntryToLayoutPred.count(std::make_pair(PredInLoop, Entry)) &&
+          EntryToLayoutPred[std::make_pair(PredInLoop, Entry)] != Pred)
+        continue;
 
       // This is a successor we need to rewrite.
-      MachineBasicBlock *Split = MF.CreateMachineBasicBlock();
-      MF.insert(MBB->isLayoutSuccessor(Succ) ? MachineFunction::iterator(Succ)
-                                             : MF.end(),
-                Split);
-      MLI.changeLoopFor(Split, Loop);
+      MachineBasicBlock *Routing = MF.CreateMachineBasicBlock();
+      MF.insert(Pred->isLayoutSuccessor(Entry)
+                    ? MachineFunction::iterator(Entry)
+                    : MF.end(),
+                Routing);
+      Blocks.insert(Routing);
 
       // Set the jump table's register of the index of the block we wish to
       // jump to, and jump to the jump table.
-      BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::CONST_I32),
-              Reg)
-          .addImm(Indices[Succ]);
-      BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::BR))
-          .addMBB(Dispatch);
-      Split->addSuccessor(Dispatch);
-      Map[Succ] = Split;
+      BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg)
+          .addImm(Indices[Entry]);
+      BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch);
+      Routing->addSuccessor(Dispatch);
+      Map[std::make_pair(PredInLoop, Entry)] = Routing;
     }
+  }
+
+  for (auto *Pred : AllPreds) {
+    bool PredInLoop = InLoop.count(Pred);
     // Remap the terminator operands and the successor list.
-    for (MachineInstr &Term : MBB->terminators())
+    for (MachineInstr &Term : Pred->terminators())
       for (auto &Op : Term.explicit_uses())
         if (Op.isMBB() && Indices.count(Op.getMBB()))
-          Op.setMBB(Map[Op.getMBB()]);
-    for (auto Rewrite : Map)
-      MBB->replaceSuccessor(Rewrite.first, Rewrite.second);
+          Op.setMBB(Map[std::make_pair(PredInLoop, Op.getMBB())]);
+
+    for (auto *Succ : Pred->successors()) {
+      if (!Entries.count(Succ))
+        continue;
+      auto *Routing = Map[std::make_pair(PredInLoop, Succ)];
+      Pred->replaceSuccessor(Succ, Routing);
+    }
   }
 
   // Create a fake default label, because br_table requires one.
   MIB.addMBB(MIB.getInstr()
                  ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1)
                  .getMBB());
-
-  return true;
 }
 
-class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
-  StringRef getPassName() const override {
-    return "WebAssembly Fix Irreducible Control Flow";
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
-    AU.addRequired<MachineDominatorTree>();
-    AU.addPreserved<MachineDominatorTree>();
-    AU.addRequired<MachineLoopInfo>();
-    AU.addPreserved<MachineLoopInfo>();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  bool runIteration(MachineFunction &MF, MachineLoopInfo &MLI) {
-    // Visit the function body, which is identified as a null loop.
-    if (LoopFixer(MF, MLI, nullptr).run()) {
-      return true;
-    }
-
-    // Visit all the loops.
-    SmallVector<MachineLoop *, 8> Worklist(MLI.begin(), MLI.end());
-    while (!Worklist.empty()) {
-      MachineLoop *Loop = Worklist.pop_back_val();
-      Worklist.append(Loop->begin(), Loop->end());
-      if (LoopFixer(MF, MLI, Loop).run()) {
-        return true;
-      }
-    }
-
-    return false;
-  }
-
-public:
-  static char ID; // Pass identification, replacement for typeid
-  WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
-};
 } // end anonymous namespace
 
 char WebAssemblyFixIrreducibleControlFlow::ID = 0;
@@ -407,23 +484,18 @@ bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
                        "********** Function: "
                     << MF.getName() << '\n');
 
-  bool Changed = false;
-  auto &MLI = getAnalysis<MachineLoopInfo>();
-
-  // When we modify something, bail out and recompute MLI, then start again, as
-  // we create a new natural loop when we resolve irreducible control flow, and
-  // other loops may become nested in it, etc. In practice this is not an issue
-  // because irreducible control flow is rare, only very few cycles are needed
-  // here.
-  while (LLVM_UNLIKELY(runIteration(MF, MLI))) {
-    // We rewrote part of the function; recompute MLI and start again.
-    LLVM_DEBUG(dbgs() << "Recomputing loops.\n");
+  // Start the recursive process on the entire function body.
+  BlockSet AllBlocks;
+  for (auto &MBB : MF) {
+    AllBlocks.insert(&MBB);
+  }
+
+  if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) {
+    // We rewrote part of the function; recompute relevant things.
     MF.getRegInfo().invalidateLiveness();
     MF.RenumberBlocks();
-    getAnalysis<MachineDominatorTree>().runOnMachineFunction(MF);
-    MLI.runOnMachineFunction(MF);
-    Changed = true;
+    return true;
   }
 
-  return Changed;
+  return false;
 }
diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index 2d5aff28d27b..5299068efdd4 100644
--- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyFrameLowering.cpp - WebAssembly Frame Lowering ----------==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -131,7 +130,7 @@ void WebAssemblyFrameLowering::writeSPToGlobal(
   const char *ES = "__stack_pointer";
   auto *SPSymbol = MF.createExternalSymbolName(ES);
   BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::GLOBAL_SET_I32))
-      .addExternalSymbol(SPSymbol, WebAssemblyII::MO_SYMBOL_GLOBAL)
+      .addExternalSymbol(SPSymbol)
       .addReg(SrcReg);
 }
 
@@ -165,7 +164,8 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
   auto &MRI = MF.getRegInfo();
 
   auto InsertPt = MBB.begin();
-  while (InsertPt != MBB.end() && WebAssembly::isArgument(*InsertPt))
+  while (InsertPt != MBB.end() &&
+         WebAssembly::isArgument(InsertPt->getOpcode()))
     ++InsertPt;
   DebugLoc DL;
 
@@ -178,7 +178,7 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
   const char *ES = "__stack_pointer";
   auto *SPSymbol = MF.createExternalSymbolName(ES);
   BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GLOBAL_GET_I32), SPReg)
-      .addExternalSymbol(SPSymbol, WebAssemblyII::MO_SYMBOL_GLOBAL);
+      .addExternalSymbol(SPSymbol);
 
   bool HasBP = hasBP(MF);
   if (HasBP) {
diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
index c6fa8261b03f..daddd4ca16ff 100644
--- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
@@ -1,9 +1,8 @@
 // WebAssemblyFrameLowering.h - TargetFrameLowering for WebAssembly -*- C++ -*-/
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Target/WebAssembly/WebAssemblyISD.def b/lib/Target/WebAssembly/WebAssemblyISD.def
index e987d7f7f43a..77217f16a727 100644
--- a/lib/Target/WebAssembly/WebAssemblyISD.def
+++ b/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -1,9 +1,8 @@
 //- WebAssemblyISD.def - WebAssembly ISD ---------------------------*- C++ -*-//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -16,9 +15,14 @@
 
 HANDLE_NODETYPE(CALL1)
 HANDLE_NODETYPE(CALL0)
+HANDLE_NODETYPE(RET_CALL)
 HANDLE_NODETYPE(RETURN)
 HANDLE_NODETYPE(ARGUMENT)
+// A wrapper node for TargetExternalSymbol, TargetGlobalAddress, and MCSymbol
 HANDLE_NODETYPE(Wrapper)
+// A special wapper used in PIC code for __memory_base/__table_base relcative
+// access.
+HANDLE_NODETYPE(WrapperPIC)
 HANDLE_NODETYPE(BR_IF)
 HANDLE_NODETYPE(BR_TABLE)
 HANDLE_NODETYPE(SHUFFLE)
@@ -26,5 +30,7 @@ HANDLE_NODETYPE(VEC_SHL)
 HANDLE_NODETYPE(VEC_SHR_S)
 HANDLE_NODETYPE(VEC_SHR_U)
 HANDLE_NODETYPE(THROW)
+HANDLE_NODETYPE(MEMORY_COPY)
+HANDLE_NODETYPE(MEMORY_FILL)
 
 // add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here...
diff --git a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index 0a7464cedc90..26339eaef37d 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
 //- WebAssemblyISelDAGToDAG.cpp - A dag to dag inst selector for WebAssembly -//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -16,6 +15,7 @@
 #include "WebAssembly.h"
 #include "WebAssemblyTargetMachine.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Function.h" // To access function attributes.
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/KnownBits.h"
@@ -38,9 +38,9 @@ class WebAssemblyDAGToDAGISel final : public SelectionDAGISel {
   bool ForCodeSize;
 
 public:
-  WebAssemblyDAGToDAGISel(WebAssemblyTargetMachine &tm,
+  WebAssemblyDAGToDAGISel(WebAssemblyTargetMachine &TM,
                           CodeGenOpt::Level OptLevel)
-      : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), ForCodeSize(false) {
+      : SelectionDAGISel(TM, OptLevel), Subtarget(nullptr), ForCodeSize(false) {
   }
 
   StringRef getPassName() const override {
@@ -52,8 +52,7 @@ public:
                          "********** Function: "
                       << MF.getName() << '\n');
 
-    ForCodeSize = MF.getFunction().hasFnAttribute(Attribute::OptimizeForSize) ||
-                  MF.getFunction().hasFnAttribute(Attribute::MinSize);
+    ForCodeSize = MF.getFunction().hasOptSize();
     Subtarget = &MF.getSubtarget<WebAssemblySubtarget>();
     return SelectionDAGISel::runOnMachineFunction(MF);
   }
@@ -79,14 +78,159 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
     return;
   }
 
-  // Few custom selection stuff. If we need WebAssembly-specific selection,
-  // uncomment this block add corresponding case statements.
-  /*
+  // Few custom selection stuff.
+  SDLoc DL(Node);
+  MachineFunction &MF = CurDAG->getMachineFunction();
   switch (Node->getOpcode()) {
+  case ISD::ATOMIC_FENCE: {
+    if (!MF.getSubtarget<WebAssemblySubtarget>().hasAtomics())
+      break;
+
+    uint64_t SyncScopeID =
+        cast<ConstantSDNode>(Node->getOperand(2).getNode())->getZExtValue();
+    switch (SyncScopeID) {
+    case SyncScope::SingleThread: {
+      // We lower a single-thread fence to a pseudo compiler barrier instruction
+      // preventing instruction reordering. This will not be emitted in final
+      // binary.
+      MachineSDNode *Fence =
+          CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE,
+                                 DL,                 // debug loc
+                                 MVT::Other,         // outchain type
+                                 Node->getOperand(0) // inchain
+          );
+      ReplaceNode(Node, Fence);
+      CurDAG->RemoveDeadNode(Node);
+      return;
+    }
+
+    case SyncScope::System: {
+      // For non-emscripten systems, we have not decided on what we should
+      // traslate fences to yet.
+      if (!Subtarget->getTargetTriple().isOSEmscripten())
+        report_fatal_error(
+            "ATOMIC_FENCE is not yet supported in non-emscripten OSes");
+
+      // Wasm does not have a fence instruction, but because all atomic
+      // instructions in wasm are sequentially consistent, we translate a
+      // fence to an idempotent atomic RMW instruction to a linear memory
+      // address. All atomic instructions in wasm are sequentially consistent,
+      // but this is to ensure a fence also prevents reordering of non-atomic
+      // instructions in the VM. Even though LLVM IR's fence instruction does
+      // not say anything about its relationship with non-atomic instructions,
+      // we think this is more user-friendly.
+      //
+      // While any address can work, here we use a value stored in
+      // __stack_pointer wasm global because there's high chance that area is
+      // in cache.
+      //
+      // So the selected instructions will be in the form of:
+      //   %addr = get_global $__stack_pointer
+      //   %0 = i32.const 0
+      //   i32.atomic.rmw.or %addr, %0
+      SDValue StackPtrSym = CurDAG->getTargetExternalSymbol(
+          "__stack_pointer", TLI->getPointerTy(CurDAG->getDataLayout()));
+      MachineSDNode *GetGlobal =
+          CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32, // opcode
+                                 DL,                          // debug loc
+                                 MVT::i32,                    // result type
+                                 StackPtrSym // __stack_pointer symbol
+          );
+
+      SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
+      auto *MMO = MF.getMachineMemOperand(
+          MachinePointerInfo::getUnknownStack(MF),
+          // FIXME Volatile isn't really correct, but currently all LLVM
+          // atomic instructions are treated as volatiles in the backend, so
+          // we should be consistent.
+          MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad |
+              MachineMemOperand::MOStore,
+          4, 4, AAMDNodes(), nullptr, SyncScope::System,
+          AtomicOrdering::SequentiallyConsistent);
+      MachineSDNode *Const0 =
+          CurDAG->getMachineNode(WebAssembly::CONST_I32, DL, MVT::i32, Zero);
+      MachineSDNode *AtomicRMW = CurDAG->getMachineNode(
+          WebAssembly::ATOMIC_RMW_OR_I32, // opcode
+          DL,                             // debug loc
+          MVT::i32,                       // result type
+          MVT::Other,                     // outchain type
+          {
+              Zero,                  // alignment
+              Zero,                  // offset
+              SDValue(GetGlobal, 0), // __stack_pointer
+              SDValue(Const0, 0),    // OR with 0 to make it idempotent
+              Node->getOperand(0)    // inchain
+          });
+
+      CurDAG->setNodeMemRefs(AtomicRMW, {MMO});
+      ReplaceUses(SDValue(Node, 0), SDValue(AtomicRMW, 1));
+      CurDAG->RemoveDeadNode(Node);
+      return;
+    }
+    default:
+      llvm_unreachable("Unknown scope!");
+    }
+  }
+
+  case ISD::GlobalTLSAddress: {
+    const auto *GA = cast<GlobalAddressSDNode>(Node);
+
+    if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
+      report_fatal_error("cannot use thread-local storage without bulk memory",
+                         false);
+
+    // Currently Emscripten does not support dynamic linking with threads.
+    // Therefore, if we have thread-local storage, only the local-exec model
+    // is possible.
+    // TODO: remove this and implement proper TLS models once Emscripten
+    // supports dynamic linking with threads.
+    if (GA->getGlobal()->getThreadLocalMode() !=
+            GlobalValue::LocalExecTLSModel &&
+        !Subtarget->getTargetTriple().isOSEmscripten()) {
+      report_fatal_error("only -ftls-model=local-exec is supported for now on "
+                         "non-Emscripten OSes: variable " +
+                             GA->getGlobal()->getName(),
+                         false);
+    }
+
+    MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
+    assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
+
+    SDValue TLSBaseSym = CurDAG->getTargetExternalSymbol("__tls_base", PtrVT);
+    SDValue TLSOffsetSym = CurDAG->getTargetGlobalAddress(
+        GA->getGlobal(), DL, PtrVT, GA->getOffset(), 0);
+
+    MachineSDNode *TLSBase = CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32,
+                                                    DL, MVT::i32, TLSBaseSym);
+    MachineSDNode *TLSOffset = CurDAG->getMachineNode(
+        WebAssembly::CONST_I32, DL, MVT::i32, TLSOffsetSym);
+    MachineSDNode *TLSAddress =
+        CurDAG->getMachineNode(WebAssembly::ADD_I32, DL, MVT::i32,
+                               SDValue(TLSBase, 0), SDValue(TLSOffset, 0));
+    ReplaceNode(Node, TLSAddress);
+    return;
+  }
+
+  case ISD::INTRINSIC_WO_CHAIN: {
+    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+    switch (IntNo) {
+    case Intrinsic::wasm_tls_size: {
+      MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
+      assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
+
+      MachineSDNode *TLSSize = CurDAG->getMachineNode(
+          WebAssembly::GLOBAL_GET_I32, DL, PtrVT,
+          CurDAG->getTargetExternalSymbol("__tls_size", MVT::i32));
+      ReplaceNode(Node, TLSSize);
+      return;
+    }
+    }
+    break;
+  }
+
   default:
     break;
   }
-  */
 
   // Select the default instruction.
   SelectCode(Node);
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 003848e34227..4064a983099c 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -1,9 +1,8 @@
 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -46,9 +45,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
   setBooleanContents(ZeroOrOneBooleanContent);
   // Except in SIMD vectors
   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
-  // WebAssembly does not produce floating-point exceptions on normal floating
-  // point operations.
-  setHasFloatingPointExceptions(false);
   // We don't know the microarchitecture here, so just reduce register pressure.
   setSchedulingPreference(Sched::RegPressure);
   // Tell ISel that we have a stack pointer.
@@ -64,10 +60,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
     addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
     addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
     addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
-    if (Subtarget->hasUnimplementedSIMD128()) {
-      addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
-      addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
-    }
+  }
+  if (Subtarget->hasUnimplementedSIMD128()) {
+    addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
+    addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
   }
   // Compute derived properties from the register classes.
   computeRegisterProperties(Subtarget->getRegisterInfo());
@@ -111,56 +107,62 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
     setTruncStoreAction(T, MVT::f16, Expand);
   }
 
-  // Support saturating add for i8x16 and i16x8
-  if (Subtarget->hasSIMD128())
-    for (auto T : {MVT::v16i8, MVT::v8i16})
-      for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
-        setOperationAction(Op, T, Legal);
-
   // Expand unavailable integer operations.
   for (auto Op :
        {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
         ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
         ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
-    for (auto T : {MVT::i32, MVT::i64}) {
+    for (auto T : {MVT::i32, MVT::i64})
       setOperationAction(Op, T, Expand);
-    }
-    if (Subtarget->hasSIMD128()) {
-      for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) {
+    if (Subtarget->hasSIMD128())
+      for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
         setOperationAction(Op, T, Expand);
-      }
-      if (Subtarget->hasUnimplementedSIMD128()) {
-        setOperationAction(Op, MVT::v2i64, Expand);
-      }
-    }
+    if (Subtarget->hasUnimplementedSIMD128())
+      setOperationAction(Op, MVT::v2i64, Expand);
   }
 
-  // There is no i64x2.mul instruction
-  setOperationAction(ISD::MUL, MVT::v2i64, Expand);
-
-  // We have custom shuffle lowering to expose the shuffle mask
+  // SIMD-specific configuration
   if (Subtarget->hasSIMD128()) {
-    for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) {
+    // Support saturating add for i8x16 and i16x8
+    for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
+      for (auto T : {MVT::v16i8, MVT::v8i16})
+        setOperationAction(Op, T, Legal);
+
+    // Custom lower BUILD_VECTORs to minimize number of replace_lanes
+    for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
+      setOperationAction(ISD::BUILD_VECTOR, T, Custom);
+    if (Subtarget->hasUnimplementedSIMD128())
+      for (auto T : {MVT::v2i64, MVT::v2f64})
+        setOperationAction(ISD::BUILD_VECTOR, T, Custom);
+
+    // We have custom shuffle lowering to expose the shuffle mask
+    for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
       setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
-    }
-    if (Subtarget->hasUnimplementedSIMD128()) {
-      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
-      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
-    }
-  }
+    if (Subtarget->hasUnimplementedSIMD128())
+      for (auto T: {MVT::v2i64, MVT::v2f64})
+        setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
 
-  // Custom lowering since wasm shifts must have a scalar shift amount
-  if (Subtarget->hasSIMD128()) {
-    for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
-      for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
+    // Custom lowering since wasm shifts must have a scalar shift amount
+    for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) {
+      for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
         setOperationAction(Op, T, Custom);
-    if (Subtarget->hasUnimplementedSIMD128())
-      for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
+      if (Subtarget->hasUnimplementedSIMD128())
         setOperationAction(Op, MVT::v2i64, Custom);
-  }
+    }
 
-  // There are no select instructions for vectors
-  if (Subtarget->hasSIMD128())
+    // Custom lower lane accesses to expand out variable indices
+    for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}) {
+      for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
+        setOperationAction(Op, T, Custom);
+      if (Subtarget->hasUnimplementedSIMD128())
+        for (auto T : {MVT::v2i64, MVT::v2f64})
+          setOperationAction(Op, T, Custom);
+    }
+
+    // There is no i64x2.mul instruction
+    setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+
+    // There are no vector select instructions
     for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT}) {
       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
         setOperationAction(Op, T, Expand);
@@ -169,6 +171,31 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
           setOperationAction(Op, T, Expand);
     }
 
+    // Expand integer operations supported for scalars but not SIMD
+    for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
+                    ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR}) {
+      for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
+        setOperationAction(Op, T, Expand);
+      if (Subtarget->hasUnimplementedSIMD128())
+        setOperationAction(Op, MVT::v2i64, Expand);
+    }
+
+    // Expand float operations supported for scalars but not SIMD
+    for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
+                    ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
+                    ISD::FEXP, ISD::FEXP2, ISD::FRINT}) {
+      setOperationAction(Op, MVT::v4f32, Expand);
+      if (Subtarget->hasUnimplementedSIMD128())
+        setOperationAction(Op, MVT::v2f64, Expand);
+    }
+
+    // Expand additional SIMD ops that V8 hasn't implemented yet
+    if (!Subtarget->hasUnimplementedSIMD128()) {
+      setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+      setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
+    }
+  }
+
   // As a special case, these operators use the type to mean the type to
   // sign-extend from.
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
@@ -220,25 +247,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
     }
   }
 
-  // Expand additional SIMD ops that V8 hasn't implemented yet
-  if (Subtarget->hasSIMD128() && !Subtarget->hasUnimplementedSIMD128()) {
-    setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
-    setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
-  }
-
-  // Custom lower lane accesses to expand out variable indices
-  if (Subtarget->hasSIMD128()) {
-    for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) {
-      setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
-      setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom);
-    }
-    if (Subtarget->hasUnimplementedSIMD128()) {
-      for (auto T : {MVT::v2i64, MVT::v2f64}) {
-        setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
-        setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom);
-      }
-    }
-  }
+  // Don't do anything clever with build_pairs
+  setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 
   // Trap lowers to wasm unreachable
   setOperationAction(ISD::TRAP, MVT::Other, Legal);
@@ -248,6 +258,31 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
 
   setMaxAtomicSizeInBitsSupported(64);
+
+  if (Subtarget->hasBulkMemory()) {
+    // Use memory.copy and friends over multiple loads and stores
+    MaxStoresPerMemcpy = 1;
+    MaxStoresPerMemcpyOptSize = 1;
+    MaxStoresPerMemmove = 1;
+    MaxStoresPerMemmoveOptSize = 1;
+    MaxStoresPerMemset = 1;
+    MaxStoresPerMemsetOptSize = 1;
+  }
+
+  // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is
+  // consistent with the f64 and f128 names.
+  setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
+  setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
+
+  // Define the emscripten name for return address helper.
+  // TODO: when implementing other WASM backends, make this generic or only do
+  // this on emscripten depending on what they end up doing.
+  setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address");
+
+  // Always convert switches to br_tables unless there is only one case, which
+  // is equivalent to a simple branch. This reduces code size for wasm, and we
+  // defer possible jump table optimizations to the VM.
+  setMinimumJumpTableEntries(2);
 }
 
 TargetLowering::AtomicExpansionKind
@@ -272,12 +307,6 @@ FastISel *WebAssemblyTargetLowering::createFastISel(
   return WebAssembly::createFastISel(FuncInfo, LibInfo);
 }
 
-bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
-    const GlobalAddressSDNode * /*GA*/) const {
-  // All offsets can be folded.
-  return true;
-}
-
 MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
                                                       EVT VT) const {
   unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
@@ -324,11 +353,11 @@ static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
   auto &Context = BB->getParent()->getFunction().getContext();
   Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
 
-  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  const BasicBlock *LLVMBB = BB->getBasicBlock();
   MachineFunction *F = BB->getParent();
-  MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
+  MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
+  MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
 
   MachineFunction::iterator It = ++BB->getIterator();
   F->insert(It, FalseMBB);
@@ -336,8 +365,7 @@ static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
   F->insert(It, DoneMBB);
 
   // Transfer the remainder of BB and its successor edges to DoneMBB.
-  DoneMBB->splice(DoneMBB->begin(), BB,
-                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
+  DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
 
   BB->addSuccessor(TrueMBB);
@@ -502,7 +530,8 @@ bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
 }
 
 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
-    EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/, bool *Fast) const {
+    EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/,
+    MachineMemOperand::Flags /*Flags*/, bool *Fast) const {
   // WebAssembly supports unaligned accesses, though it should be declared
   // with the p2align attribute on loads and stores which do so, and there
   // may be a performance impact. We tell LLVM they're "fast" because
@@ -578,14 +607,14 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
 // Lowering Code
 //===----------------------------------------------------------------------===//
 
-static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *msg) {
+static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
   MachineFunction &MF = DAG.getMachineFunction();
   DAG.getContext()->diagnose(
-      DiagnosticInfoUnsupported(MF.getFunction(), msg, DL.getDebugLoc()));
+      DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
 }
 
 // Test whether the given calling convention is supported.
-static bool CallingConvSupported(CallingConv::ID CallConv) {
+static bool callingConvSupported(CallingConv::ID CallConv) {
   // We currently support the language-independent target-independent
   // conventions. We don't yet have a way to annotate calls with properties like
   // "cold", and we don't have any call-clobbered registers, so these are mostly
@@ -608,20 +637,21 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
   auto Layout = MF.getDataLayout();
 
   CallingConv::ID CallConv = CLI.CallConv;
-  if (!CallingConvSupported(CallConv))
+  if (!callingConvSupported(CallConv))
     fail(DL, DAG,
          "WebAssembly doesn't support language-specific or target-specific "
          "calling conventions yet");
   if (CLI.IsPatchPoint)
     fail(DL, DAG, "WebAssembly doesn't support patch point yet");
 
-  // WebAssembly doesn't currently support explicit tail calls. If they are
-  // required, fail. Otherwise, just disable them.
-  if ((CallConv == CallingConv::Fast && CLI.IsTailCall &&
-       MF.getTarget().Options.GuaranteedTailCallOpt) ||
-      (CLI.CS && CLI.CS.isMustTailCall()))
-    fail(DL, DAG, "WebAssembly doesn't support tail call yet");
-  CLI.IsTailCall = false;
+  // Fail if tail calls are required but not enabled
+  if (!Subtarget->hasTailCall()) {
+    if ((CallConv == CallingConv::Fast && CLI.IsTailCall &&
+         MF.getTarget().Options.GuaranteedTailCallOpt) ||
+        (CLI.CS && CLI.CS.isMustTailCall()))
+      fail(DL, DAG, "WebAssembly 'tail-call' feature not enabled");
+    CLI.IsTailCall = false;
+  }
 
   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
   if (Ins.size() > 1)
@@ -630,9 +660,9 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
   unsigned NumFixedArgs = 0;
-  for (unsigned i = 0; i < Outs.size(); ++i) {
-    const ISD::OutputArg &Out = Outs[i];
-    SDValue &OutVal = OutVals[i];
+  for (unsigned I = 0; I < Outs.size(); ++I) {
+    const ISD::OutputArg &Out = Outs[I];
+    SDValue &OutVal = OutVals[I];
     if (Out.Flags.isNest())
       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
     if (Out.Flags.isInAlloca())
@@ -669,13 +699,16 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
   if (IsVarArg) {
     // Outgoing non-fixed arguments are placed in a buffer. First
     // compute their offsets and the total amount of buffer space needed.
-    for (SDValue Arg :
-         make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
+    for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
+      const ISD::OutputArg &Out = Outs[I];
+      SDValue &Arg = OutVals[I];
       EVT VT = Arg.getValueType();
       assert(VT != MVT::iPTR && "Legalized args should be concrete");
       Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+      unsigned Align = std::max(Out.Flags.getOrigAlign(),
+                                Layout.getABITypeAlignment(Ty));
       unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty),
-                                             Layout.getABITypeAlignment(Ty));
+                                             Align);
       CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
                                         Offset, VT.getSimpleVT(),
                                         CCValAssign::Full));
@@ -711,6 +744,18 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
     FINode = DAG.getIntPtrConstant(0, DL);
   }
 
+  if (Callee->getOpcode() == ISD::GlobalAddress) {
+    // If the callee is a GlobalAddress node (quite common, every direct call
+    // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
+    // doesn't at MO_GOT which is not needed for direct calls.
+    GlobalAddressSDNode* GA = cast<GlobalAddressSDNode>(Callee);
+    Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
+                                        getPointerTy(DAG.getDataLayout()),
+                                        GA->getOffset());
+    Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
+                         getPointerTy(DAG.getDataLayout()), Callee);
+  }
+
   // Compute the operands for the CALLn node.
   SmallVector<SDValue, 16> Ops;
   Ops.push_back(Chain);
@@ -739,6 +784,13 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
     // registers.
     InTys.push_back(In.VT);
   }
+
+  if (CLI.IsTailCall) {
+    // ret_calls do not return values to the current frame
+    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+    return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
+  }
+
   InTys.push_back(MVT::Other);
   SDVTList InTyList = DAG.getVTList(InTys);
   SDValue Res =
@@ -768,7 +820,7 @@ SDValue WebAssemblyTargetLowering::LowerReturn(
     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
     SelectionDAG &DAG) const {
   assert(Outs.size() <= 1 && "WebAssembly can only return up to one value");
-  if (!CallingConvSupported(CallConv))
+  if (!callingConvSupported(CallConv))
     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
 
   SmallVector<SDValue, 4> RetOps(1, Chain);
@@ -795,7 +847,7 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
-  if (!CallingConvSupported(CallConv))
+  if (!callingConvSupported(CallConv))
     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
 
   MachineFunction &MF = DAG.getMachineFunction();
@@ -842,7 +894,7 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
   // Record the number and types of arguments and results.
   SmallVector<MVT, 4> Params;
   SmallVector<MVT, 4> Results;
-  ComputeSignatureVTs(MF.getFunction().getFunctionType(), MF.getFunction(),
+  computeSignatureVTs(MF.getFunction().getFunctionType(), MF.getFunction(),
                       DAG.getTarget(), Params, Results);
   for (MVT VT : Results)
     MFI->addResult(VT);
@@ -855,6 +907,21 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
   return Chain;
 }
 
+void WebAssemblyTargetLowering::ReplaceNodeResults(
+    SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
+  switch (N->getOpcode()) {
+  case ISD::SIGN_EXTEND_INREG:
+    // Do not add any results, signifying that N should not be custom lowered
+    // after all. This happens because simd128 turns on custom lowering for
+    // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
+    // illegal type.
+    break;
+  default:
+    llvm_unreachable(
+        "ReplaceNodeResults not implemented for this op for WebAssembly!");
+  }
+}
+
 //===----------------------------------------------------------------------===//
 //  Custom lowering hooks.
 //===----------------------------------------------------------------------===//
@@ -882,22 +949,23 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
   case ISD::BRIND:
     fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
     return SDValue();
-  case ISD::RETURNADDR: // Probably nothing meaningful can be returned here.
-    fail(DL, DAG, "WebAssembly hasn't implemented __builtin_return_address");
-    return SDValue();
+  case ISD::RETURNADDR:
+    return LowerRETURNADDR(Op, DAG);
   case ISD::FRAMEADDR:
     return LowerFRAMEADDR(Op, DAG);
   case ISD::CopyToReg:
     return LowerCopyToReg(Op, DAG);
-  case ISD::INTRINSIC_WO_CHAIN:
-    return LowerINTRINSIC_WO_CHAIN(Op, DAG);
   case ISD::EXTRACT_VECTOR_ELT:
   case ISD::INSERT_VECTOR_ELT:
     return LowerAccessVectorElement(Op, DAG);
   case ISD::INTRINSIC_VOID:
-    return LowerINTRINSIC_VOID(Op, DAG);
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_W_CHAIN:
+    return LowerIntrinsic(Op, DAG);
   case ISD::SIGN_EXTEND_INREG:
     return LowerSIGN_EXTEND_INREG(Op, DAG);
+  case ISD::BUILD_VECTOR:
+    return LowerBUILD_VECTOR(Op, DAG);
   case ISD::VECTOR_SHUFFLE:
     return LowerVECTOR_SHUFFLE(Op, DAG);
   case ISD::SHL:
@@ -939,6 +1007,26 @@ SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
   return DAG.getTargetFrameIndex(FI, Op.getValueType());
 }
 
+SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+
+  if (!Subtarget->getTargetTriple().isOSEmscripten()) {
+    fail(DL, DAG,
+         "Non-Emscripten WebAssembly hasn't implemented "
+         "__builtin_return_address");
+    return SDValue();
+  }
+
+  if (verifyReturnAddressArgumentIsConstant(Op, DAG))
+    return SDValue();
+
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
+                     {DAG.getConstant(Depth, DL, MVT::i32)}, false, DL)
+      .first;
+}
+
 SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
                                                   SelectionDAG &DAG) const {
   // Non-zero depths are not supported by WebAssembly currently. Use the
@@ -963,9 +1051,40 @@ SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
          "Unexpected target flags on generic GlobalAddressSDNode");
   if (GA->getAddressSpace() != 0)
     fail(DL, DAG, "WebAssembly only expects the 0 address space");
-  return DAG.getNode(
-      WebAssemblyISD::Wrapper, DL, VT,
-      DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset()));
+
+  unsigned OperandFlags = 0;
+  if (isPositionIndependent()) {
+    const GlobalValue *GV = GA->getGlobal();
+    if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) {
+      MachineFunction &MF = DAG.getMachineFunction();
+      MVT PtrVT = getPointerTy(MF.getDataLayout());
+      const char *BaseName;
+      if (GV->getValueType()->isFunctionTy()) {
+        BaseName = MF.createExternalSymbolName("__table_base");
+        OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL;
+      }
+      else {
+        BaseName = MF.createExternalSymbolName("__memory_base");
+        OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL;
+      }
+      SDValue BaseAddr =
+          DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
+                      DAG.getTargetExternalSymbol(BaseName, PtrVT));
+
+      SDValue SymAddr = DAG.getNode(
+          WebAssemblyISD::WrapperPIC, DL, VT,
+          DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
+                                     OperandFlags));
+
+      return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
+    } else {
+      OperandFlags = WebAssemblyII::MO_GOT;
+    }
+  }
+
+  return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
+                     DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
+                                                GA->getOffset(), OperandFlags));
 }
 
 SDValue
@@ -976,15 +1095,8 @@ WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
   EVT VT = Op.getValueType();
   assert(ES->getTargetFlags() == 0 &&
          "Unexpected target flags on generic ExternalSymbolSDNode");
-  // Set the TargetFlags to 0x1 which indicates that this is a "function"
-  // symbol rather than a data symbol. We do this unconditionally even though
-  // we don't know anything about the symbol other than its name, because all
-  // external symbols used in target-independent SelectionDAG code are for
-  // functions.
-  return DAG.getNode(
-      WebAssemblyISD::Wrapper, DL, VT,
-      DAG.getTargetExternalSymbol(ES->getSymbol(), VT,
-                                  WebAssemblyII::MO_SYMBOL_FUNCTION));
+  return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
+                     DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
 }
 
 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
@@ -1038,17 +1150,28 @@ SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
                       MachinePointerInfo(SV), 0);
 }
 
-SDValue
-WebAssemblyTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
-                                                   SelectionDAG &DAG) const {
-  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  unsigned IntNo;
+  switch (Op.getOpcode()) {
+  case ISD::INTRINSIC_VOID:
+  case ISD::INTRINSIC_W_CHAIN:
+    IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+    break;
+  case ISD::INTRINSIC_WO_CHAIN:
+    IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+    break;
+  default:
+    llvm_unreachable("Invalid intrinsic");
+  }
   SDLoc DL(Op);
+
   switch (IntNo) {
   default:
-    return {}; // Don't custom lower most intrinsics.
+    return SDValue(); // Don't custom lower most intrinsics.
 
   case Intrinsic::wasm_lsda: {
-    MachineFunction &MF = DAG.getMachineFunction();
     EVT VT = Op.getValueType();
     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
     MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
@@ -1058,43 +1181,24 @@ WebAssemblyTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
                        DAG.getMCSymbol(S, PtrVT));
   }
-  }
-}
-
-SDValue
-WebAssemblyTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
-                                               SelectionDAG &DAG) const {
-  MachineFunction &MF = DAG.getMachineFunction();
-  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-  SDLoc DL(Op);
-
-  switch (IntNo) {
-  default:
-    return {}; // Don't custom lower most intrinsics.
 
   case Intrinsic::wasm_throw: {
+    // We only support C++ exceptions for now
     int Tag = cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
-    switch (Tag) {
-    case CPP_EXCEPTION: {
-      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-      MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
-      const char *SymName = MF.createExternalSymbolName("__cpp_exception");
-      SDValue SymNode =
-          DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
-                      DAG.getTargetExternalSymbol(
-                          SymName, PtrVT, WebAssemblyII::MO_SYMBOL_EVENT));
-      return DAG.getNode(WebAssemblyISD::THROW, DL,
-                         MVT::Other, // outchain type
-                         {
-                             Op.getOperand(0), // inchain
-                             SymNode,          // exception symbol
-                             Op.getOperand(3)  // thrown value
-                         });
-    }
-    default:
+    if (Tag != CPP_EXCEPTION)
       llvm_unreachable("Invalid tag!");
-    }
-    break;
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+    MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+    const char *SymName = MF.createExternalSymbolName("__cpp_exception");
+    SDValue SymNode = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
+                                  DAG.getTargetExternalSymbol(SymName, PtrVT));
+    return DAG.getNode(WebAssemblyISD::THROW, DL,
+                       MVT::Other, // outchain type
+                       {
+                           Op.getOperand(0), // inchain
+                           SymNode,          // exception symbol
+                           Op.getOperand(3)  // thrown value
+                       });
   }
   }
 }
@@ -1102,6 +1206,7 @@ WebAssemblyTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
 SDValue
 WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
                                                   SelectionDAG &DAG) const {
+  SDLoc DL(Op);
   // If sign extension operations are disabled, allow sext_inreg only if operand
   // is a vector extract. SIMD does not depend on sign extension operations, but
   // allowing sext_inreg in this context lets us have simple patterns to select
@@ -1109,12 +1214,136 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
   // simpler in this file, but would necessitate large and brittle patterns to
   // undo the expansion and select extract_lane_s instructions.
   assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
-  if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT)
-    return Op;
+  if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+    const SDValue &Extract = Op.getOperand(0);
+    MVT VecT = Extract.getOperand(0).getSimpleValueType();
+    MVT ExtractedLaneT = static_cast<VTSDNode *>(Op.getOperand(1).getNode())
+                             ->getVT()
+                             .getSimpleVT();
+    MVT ExtractedVecT =
+        MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
+    if (ExtractedVecT == VecT)
+      return Op;
+    // Bitcast vector to appropriate type to ensure ISel pattern coverage
+    const SDValue &Index = Extract.getOperand(1);
+    unsigned IndexVal =
+        static_cast<ConstantSDNode *>(Index.getNode())->getZExtValue();
+    unsigned Scale =
+        ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
+    assert(Scale > 1);
+    SDValue NewIndex =
+        DAG.getConstant(IndexVal * Scale, DL, Index.getValueType());
+    SDValue NewExtract = DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
+        DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
+    return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(),
+                       NewExtract, Op.getOperand(1));
+  }
   // Otherwise expand
   return SDValue();
 }
 
+SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
+                                                     SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  const EVT VecT = Op.getValueType();
+  const EVT LaneT = Op.getOperand(0).getValueType();
+  const size_t Lanes = Op.getNumOperands();
+  auto IsConstant = [](const SDValue &V) {
+    return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
+  };
+
+  // Find the most common operand, which is approximately the best to splat
+  using Entry = std::pair<SDValue, size_t>;
+  SmallVector<Entry, 16> ValueCounts;
+  size_t NumConst = 0, NumDynamic = 0;
+  for (const SDValue &Lane : Op->op_values()) {
+    if (Lane.isUndef()) {
+      continue;
+    } else if (IsConstant(Lane)) {
+      NumConst++;
+    } else {
+      NumDynamic++;
+    }
+    auto CountIt = std::find_if(ValueCounts.begin(), ValueCounts.end(),
+                                [&Lane](Entry A) { return A.first == Lane; });
+    if (CountIt == ValueCounts.end()) {
+      ValueCounts.emplace_back(Lane, 1);
+    } else {
+      CountIt->second++;
+    }
+  }
+  auto CommonIt =
+      std::max_element(ValueCounts.begin(), ValueCounts.end(),
+                       [](Entry A, Entry B) { return A.second < B.second; });
+  assert(CommonIt != ValueCounts.end() && "Unexpected all-undef build_vector");
+  SDValue SplatValue = CommonIt->first;
+  size_t NumCommon = CommonIt->second;
+
+  // If v128.const is available, consider using it instead of a splat
+  if (Subtarget->hasUnimplementedSIMD128()) {
+    // {i32,i64,f32,f64}.const opcode, and value
+    const size_t ConstBytes = 1 + std::max(size_t(4), 16 / Lanes);
+    // SIMD prefix and opcode
+    const size_t SplatBytes = 2;
+    const size_t SplatConstBytes = SplatBytes + ConstBytes;
+    // SIMD prefix, opcode, and lane index
+    const size_t ReplaceBytes = 3;
+    const size_t ReplaceConstBytes = ReplaceBytes + ConstBytes;
+    // SIMD prefix, v128.const opcode, and 128-bit value
+    const size_t VecConstBytes = 18;
+    // Initial v128.const and a replace_lane for each non-const operand
+    const size_t ConstInitBytes = VecConstBytes + NumDynamic * ReplaceBytes;
+    // Initial splat and all necessary replace_lanes
+    const size_t SplatInitBytes =
+        IsConstant(SplatValue)
+            // Initial constant splat
+            ? (SplatConstBytes +
+               // Constant replace_lanes
+               (NumConst - NumCommon) * ReplaceConstBytes +
+               // Dynamic replace_lanes
+               (NumDynamic * ReplaceBytes))
+            // Initial dynamic splat
+            : (SplatBytes +
+               // Constant replace_lanes
+               (NumConst * ReplaceConstBytes) +
+               // Dynamic replace_lanes
+               (NumDynamic - NumCommon) * ReplaceBytes);
+    if (ConstInitBytes < SplatInitBytes) {
+      // Create build_vector that will lower to initial v128.const
+      SmallVector<SDValue, 16> ConstLanes;
+      for (const SDValue &Lane : Op->op_values()) {
+        if (IsConstant(Lane)) {
+          ConstLanes.push_back(Lane);
+        } else if (LaneT.isFloatingPoint()) {
+          ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
+        } else {
+          ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
+        }
+      }
+      SDValue Result = DAG.getBuildVector(VecT, DL, ConstLanes);
+      // Add replace_lane instructions for non-const lanes
+      for (size_t I = 0; I < Lanes; ++I) {
+        const SDValue &Lane = Op->getOperand(I);
+        if (!Lane.isUndef() && !IsConstant(Lane))
+          Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
+                               DAG.getConstant(I, DL, MVT::i32));
+      }
+      return Result;
+    }
+  }
+  // Use a splat for the initial vector
+  SDValue Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
+  // Add replace_lane instructions for other values
+  for (size_t I = 0; I < Lanes; ++I) {
+    const SDValue &Lane = Op->getOperand(I);
+    if (Lane != SplatValue)
+      Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
+                           DAG.getConstant(I, DL, MVT::i32));
+  }
+  return Result;
+}
+
 SDValue
 WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
                                                SelectionDAG &DAG) const {
@@ -1131,11 +1360,10 @@ WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
   Ops[OpIdx++] = Op.getOperand(1);
 
   // Expand mask indices to byte indices and materialize them as operands
-  for (size_t I = 0, Lanes = Mask.size(); I < Lanes; ++I) {
+  for (int M : Mask) {
     for (size_t J = 0; J < LaneBytes; ++J) {
       // Lower undefs (represented by -1 in mask) to zero
-      uint64_t ByteIndex =
-          Mask[I] == -1 ? 0 : (uint64_t)Mask[I] * LaneBytes + J;
+      uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J;
       Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
     }
   }
@@ -1155,7 +1383,7 @@ WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
     return SDValue();
 }
 
-static SDValue UnrollVectorShift(SDValue Op, SelectionDAG &DAG) {
+static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
   EVT LaneT = Op.getSimpleValueType().getVectorElementType();
   // 32-bit and 64-bit unrolled shifts will have proper semantics
   if (LaneT.bitsGE(MVT::i32))
@@ -1190,17 +1418,17 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
   // Expand all vector shifts until V8 fixes its implementation
   // TODO: remove this once V8 is fixed
   if (!Subtarget->hasUnimplementedSIMD128())
-    return UnrollVectorShift(Op, DAG);
+    return unrollVectorShift(Op, DAG);
 
   // Unroll non-splat vector shifts
   BuildVectorSDNode *ShiftVec;
   SDValue SplatVal;
   if (!(ShiftVec = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) ||
       !(SplatVal = ShiftVec->getSplatValue()))
-    return UnrollVectorShift(Op, DAG);
+    return unrollVectorShift(Op, DAG);
 
   // All splats except i64x2 const splats are handled by patterns
-  ConstantSDNode *SplatConst = dyn_cast<ConstantSDNode>(SplatVal);
+  auto *SplatConst = dyn_cast<ConstantSDNode>(SplatVal);
   if (!SplatConst || Op.getSimpleValueType() != MVT::v2i64)
     return Op;
 
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 59f4230ed889..b3c7f3defd5f 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -1,9 +1,8 @@
 //- WebAssemblyISelLowering.h - WebAssembly DAG Lowering Interface -*- C++ -*-//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -47,7 +46,6 @@ private:
   AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
   FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
                            const TargetLibraryInfo *LibInfo) const override;
-  bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
   MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
   MachineBasicBlock *
   EmitInstrWithCustomInserter(MachineInstr &MI,
@@ -62,6 +60,7 @@ private:
                              unsigned AS,
                              Instruction *I = nullptr) const override;
   bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, unsigned Align,
+                                      MachineMemOperand::Flags Flags,
                                       bool *Fast) const override;
   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
 
@@ -87,9 +86,17 @@ private:
                                const SDLoc &DL, SelectionDAG &DAG,
                                SmallVectorImpl<SDValue> &InVals) const override;
 
+  void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+                          SelectionDAG &DAG) const override;
+
+  const char *getClearCacheBuiltinName() const override {
+    report_fatal_error("llvm.clear_cache is not supported on wasm");
+  }
+
   // Custom lowering hooks.
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
   SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
@@ -97,9 +104,9 @@ private:
   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerCopyToReg(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerIntrinsic(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerAccessVectorElement(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
index 5fb8ef90bc43..e85aa57efc42 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -1,9 +1,8 @@
 // WebAssemblyInstrAtomics.td-WebAssembly Atomic codegen support-*- tablegen -*-
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -12,20 +11,132 @@
 ///
 //===----------------------------------------------------------------------===//
 
+let UseNamedOperandTable = 1 in
+multiclass ATOMIC_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
+                    list<dag> pattern_r, string asmstr_r = "",
+                    string asmstr_s = "", bits<32> atomic_op = -1> {
+  defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
+              !or(0xfe00, !and(0xff, atomic_op))>,
+            Requires<[HasAtomics]>;
+}
+
+multiclass ATOMIC_NRI<dag oops, dag iops, list<dag> pattern, string asmstr = "",
+                      bits<32> atomic_op = -1> {
+  defm "" : NRI<oops, iops, pattern, asmstr,
+                !or(0xfe00, !and(0xff, atomic_op))>,
+            Requires<[HasAtomics]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Atomic wait / notify
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 1 in {
+defm ATOMIC_NOTIFY :
+  ATOMIC_I<(outs I32:$dst),
+           (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$count),
+           (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+           "atomic.notify \t$dst, ${off}(${addr})${p2align}, $count",
+           "atomic.notify \t${off}${p2align}", 0x00>;
+let mayLoad = 1 in {
+defm ATOMIC_WAIT_I32 :
+  ATOMIC_I<(outs I32:$dst),
+           (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$exp,
+                I64:$timeout),
+           (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+           "i32.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
+           "i32.atomic.wait \t${off}${p2align}", 0x01>;
+defm ATOMIC_WAIT_I64 :
+  ATOMIC_I<(outs I32:$dst),
+           (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I64:$exp,
+                I64:$timeout),
+           (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+           "i64.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
+           "i64.atomic.wait \t${off}${p2align}", 0x02>;
+} // mayLoad = 1
+} // hasSideEffects = 1
+
+let Predicates = [HasAtomics] in {
+// Select notifys with no constant offset.
+def NotifyPatNoOffset :
+  Pat<(i32 (int_wasm_atomic_notify I32:$addr, I32:$count)),
+      (ATOMIC_NOTIFY 0, 0, I32:$addr, I32:$count)>;
+
+// Select notifys with a constant offset.
+
+// Pattern with address + immediate offset
+class NotifyPatImmOff<PatFrag operand> :
+  Pat<(i32 (int_wasm_atomic_notify (operand I32:$addr, imm:$off), I32:$count)),
+      (ATOMIC_NOTIFY 0, imm:$off, I32:$addr, I32:$count)>;
+def : NotifyPatImmOff<regPlusImm>;
+def : NotifyPatImmOff<or_is_add>;
+
+def NotifyPatGlobalAddr :
+  Pat<(i32 (int_wasm_atomic_notify (regPlusGA I32:$addr,
+                                    (WebAssemblywrapper tglobaladdr:$off)),
+                                   I32:$count)),
+      (ATOMIC_NOTIFY 0, tglobaladdr:$off, I32:$addr, I32:$count)>;
+
+// Select notifys with just a constant offset.
+def NotifyPatOffsetOnly :
+  Pat<(i32 (int_wasm_atomic_notify imm:$off, I32:$count)),
+      (ATOMIC_NOTIFY 0, imm:$off, (CONST_I32 0), I32:$count)>;
+
+def NotifyPatGlobalAddrOffOnly :
+  Pat<(i32 (int_wasm_atomic_notify (WebAssemblywrapper tglobaladdr:$off),
+                                   I32:$count)),
+      (ATOMIC_NOTIFY 0, tglobaladdr:$off, (CONST_I32 0), I32:$count)>;
+
+// Select waits with no constant offset.
+class WaitPatNoOffset<ValueType ty, Intrinsic kind, NI inst> :
+  Pat<(i32 (kind I32:$addr, ty:$exp, I64:$timeout)),
+      (inst 0, 0, I32:$addr, ty:$exp, I64:$timeout)>;
+def : WaitPatNoOffset<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatNoOffset<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+// Select waits with a constant offset.
+
+// Pattern with address + immediate offset
+class WaitPatImmOff<ValueType ty, Intrinsic kind, PatFrag operand, NI inst> :
+  Pat<(i32 (kind (operand I32:$addr, imm:$off), ty:$exp, I64:$timeout)),
+      (inst 0, imm:$off, I32:$addr, ty:$exp, I64:$timeout)>;
+def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, regPlusImm, ATOMIC_WAIT_I32>;
+def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, or_is_add, ATOMIC_WAIT_I32>;
+def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, regPlusImm, ATOMIC_WAIT_I64>;
+def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, or_is_add, ATOMIC_WAIT_I64>;
+
+class WaitPatGlobalAddr<ValueType ty, Intrinsic kind, NI inst> :
+  Pat<(i32 (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)),
+                 ty:$exp, I64:$timeout)),
+      (inst 0, tglobaladdr:$off, I32:$addr, ty:$exp, I64:$timeout)>;
+def : WaitPatGlobalAddr<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatGlobalAddr<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+// Select wait_i32, ATOMIC_WAIT_I32s with just a constant offset.
+class WaitPatOffsetOnly<ValueType ty, Intrinsic kind, NI inst> :
+  Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)),
+      (inst 0, imm:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
+def : WaitPatOffsetOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatOffsetOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+class WaitPatGlobalAddrOffOnly<ValueType ty, Intrinsic kind, NI inst> :
+  Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, I64:$timeout)),
+      (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
+def : WaitPatGlobalAddrOffOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatGlobalAddrOffOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+} // Predicates = [HasAtomics]
+
 //===----------------------------------------------------------------------===//
 // Atomic loads
 //===----------------------------------------------------------------------===//
 
-multiclass ATOMIC_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
-                    list<dag> pattern_r, string asmstr_r = "",
-                    string asmstr_s = "", bits<32> inst = -1> {
-  defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
-              inst>,
+multiclass AtomicLoad<WebAssemblyRegClass rc, string name, int atomic_op> {
+  defm "" : WebAssemblyLoad<rc, name, !or(0xfe00, !and(0xff, atomic_op))>,
             Requires<[HasAtomics]>;
 }
 
-defm ATOMIC_LOAD_I32 : WebAssemblyLoad<I32, "i32.atomic.load", 0xfe10>;
-defm ATOMIC_LOAD_I64 : WebAssemblyLoad<I64, "i64.atomic.load", 0xfe11>;
+defm ATOMIC_LOAD_I32 : AtomicLoad<I32, "i32.atomic.load", 0x10>;
+defm ATOMIC_LOAD_I64 : AtomicLoad<I64, "i64.atomic.load", 0x11>;
 
 // Select loads with no constant offset.
 let Predicates = [HasAtomics] in {
@@ -43,9 +154,6 @@ def : LoadPatImmOff<i64, atomic_load_64, or_is_add, ATOMIC_LOAD_I64>;
 def : LoadPatGlobalAddr<i32, atomic_load_32, ATOMIC_LOAD_I32>;
 def : LoadPatGlobalAddr<i64, atomic_load_64, ATOMIC_LOAD_I64>;
 
-def : LoadPatExternalSym<i32, atomic_load_32, ATOMIC_LOAD_I32>;
-def : LoadPatExternalSym<i64, atomic_load_64, ATOMIC_LOAD_I64>;
-
 // Select loads with just a constant offset.
 def : LoadPatOffsetOnly<i32, atomic_load_32, ATOMIC_LOAD_I32>;
 def : LoadPatOffsetOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>;
@@ -53,18 +161,15 @@ def : LoadPatOffsetOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>;
 def : LoadPatGlobalAddrOffOnly<i32, atomic_load_32, ATOMIC_LOAD_I32>;
 def : LoadPatGlobalAddrOffOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>;
 
-def : LoadPatExternSymOffOnly<i32, atomic_load_32, ATOMIC_LOAD_I32>;
-def : LoadPatExternSymOffOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>;
-
 } // Predicates = [HasAtomics]
 
 // Extending loads. Note that there are only zero-extending atomic loads, no
 // sign-extending loads.
-defm ATOMIC_LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.atomic.load8_u", 0xfe12>;
-defm ATOMIC_LOAD16_U_I32 : WebAssemblyLoad<I32, "i32.atomic.load16_u", 0xfe13>;
-defm ATOMIC_LOAD8_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load8_u", 0xfe14>;
-defm ATOMIC_LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load16_u", 0xfe15>;
-defm ATOMIC_LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load32_u", 0xfe16>;
+defm ATOMIC_LOAD8_U_I32 : AtomicLoad<I32, "i32.atomic.load8_u", 0x12>;
+defm ATOMIC_LOAD16_U_I32 : AtomicLoad<I32, "i32.atomic.load16_u", 0x13>;
+defm ATOMIC_LOAD8_U_I64 : AtomicLoad<I64, "i64.atomic.load8_u", 0x14>;
+defm ATOMIC_LOAD16_U_I64 : AtomicLoad<I64, "i64.atomic.load16_u", 0x15>;
+defm ATOMIC_LOAD32_U_I64 : AtomicLoad<I64, "i64.atomic.load32_u", 0x16>;
 
 // Fragments for extending loads. These are different from regular loads because
 // the SDNodes are derived from AtomicSDNode rather than LoadSDNode and
@@ -149,16 +254,6 @@ def : LoadPatGlobalAddr<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
 def : LoadPatGlobalAddr<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>;
 def : LoadPatGlobalAddr<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>;
 
-def : LoadPatExternalSym<i32, zext_aload_8_32, ATOMIC_LOAD8_U_I32>;
-def : LoadPatExternalSym<i32, zext_aload_16_32, ATOMIC_LOAD16_U_I32>;
-def : LoadPatExternalSym<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>;
-def : LoadPatExternalSym<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>;
-def : LoadPatExternalSym<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>;
-def : LoadPatExternalSym<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
-def : LoadPatExternalSym<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
-def : LoadPatExternalSym<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>;
-def : LoadPatExternalSym<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>;
-
 // Extending loads with just a constant offset
 def : LoadPatOffsetOnly<i32, zext_aload_8_32, ATOMIC_LOAD8_U_I32>;
 def : LoadPatOffsetOnly<i32, zext_aload_16_32, ATOMIC_LOAD16_U_I32>;
@@ -180,24 +275,19 @@ def : LoadPatGlobalAddrOffOnly<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
 def : LoadPatGlobalAddrOffOnly<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>;
 def : LoadPatGlobalAddrOffOnly<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>;
 
-def : LoadPatExternSymOffOnly<i32, zext_aload_8_32, ATOMIC_LOAD8_U_I32>;
-def : LoadPatExternSymOffOnly<i32, zext_aload_16_32, ATOMIC_LOAD16_U_I32>;
-def : LoadPatExternSymOffOnly<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>;
-def : LoadPatExternSymOffOnly<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>;
-def : LoadPatExternSymOffOnly<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>;
-def : LoadPatExternSymOffOnly<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
-def : LoadPatExternSymOffOnly<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
-def : LoadPatExternSymOffOnly<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>;
-def : LoadPatExternSymOffOnly<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>;
-
 } // Predicates = [HasAtomics]
 
 //===----------------------------------------------------------------------===//
 // Atomic stores
 //===----------------------------------------------------------------------===//
 
-defm ATOMIC_STORE_I32 : WebAssemblyStore<I32, "i32.atomic.store", 0xfe17>;
-defm ATOMIC_STORE_I64 : WebAssemblyStore<I64, "i64.atomic.store", 0xfe18>;
+multiclass AtomicStore<WebAssemblyRegClass rc, string name, int atomic_op> {
+  defm "" : WebAssemblyStore<rc, name, !or(0xfe00, !and(0xff, atomic_op))>,
+            Requires<[HasAtomics]>;
+}
+
+defm ATOMIC_STORE_I32 : AtomicStore<I32, "i32.atomic.store", 0x17>;
+defm ATOMIC_STORE_I64 : AtomicStore<I64, "i64.atomic.store", 0x18>;
 
 // We need an 'atomic' version of store patterns because store and atomic_store
 // nodes have different operand orders:
@@ -230,12 +320,6 @@ class AStorePatGlobalAddr<ValueType ty, PatFrag kind, NI inst> :
 def : AStorePatGlobalAddr<i32, atomic_store_32, ATOMIC_STORE_I32>;
 def : AStorePatGlobalAddr<i64, atomic_store_64, ATOMIC_STORE_I64>;
 
-class AStorePatExternalSym<ValueType ty, PatFrag kind, NI inst> :
-  Pat<(kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)), ty:$val),
-      (inst 0, texternalsym:$off, I32:$addr, ty:$val)>;
-def : AStorePatExternalSym<i32, atomic_store_32, ATOMIC_STORE_I32>;
-def : AStorePatExternalSym<i64, atomic_store_64, ATOMIC_STORE_I64>;
-
 // Select stores with just a constant offset.
 class AStorePatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
   Pat<(kind imm:$off, ty:$val), (inst 0, imm:$off, (CONST_I32 0), ty:$val)>;
@@ -248,20 +332,14 @@ class AStorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> :
 def : AStorePatGlobalAddrOffOnly<i32, atomic_store_32, ATOMIC_STORE_I32>;
 def : AStorePatGlobalAddrOffOnly<i64, atomic_store_64, ATOMIC_STORE_I64>;
 
-class AStorePatExternSymOffOnly<ValueType ty, PatFrag kind, NI inst> :
-  Pat<(kind (WebAssemblywrapper texternalsym:$off), ty:$val),
-      (inst 0, texternalsym:$off, (CONST_I32 0), ty:$val)>;
-def : AStorePatExternSymOffOnly<i32, atomic_store_32, ATOMIC_STORE_I32>;
-def : AStorePatExternSymOffOnly<i64, atomic_store_64, ATOMIC_STORE_I64>;
-
 } // Predicates = [HasAtomics]
 
 // Truncating stores.
-defm ATOMIC_STORE8_I32 : WebAssemblyStore<I32, "i32.atomic.store8", 0xfe19>;
-defm ATOMIC_STORE16_I32 : WebAssemblyStore<I32, "i32.atomic.store16", 0xfe1a>;
-defm ATOMIC_STORE8_I64 : WebAssemblyStore<I64, "i64.atomic.store8", 0xfe1b>;
-defm ATOMIC_STORE16_I64 : WebAssemblyStore<I64, "i64.atomic.store16", 0xfe1c>;
-defm ATOMIC_STORE32_I64 : WebAssemblyStore<I64, "i64.atomic.store32", 0xfe1d>;
+defm ATOMIC_STORE8_I32 : AtomicStore<I32, "i32.atomic.store8", 0x19>;
+defm ATOMIC_STORE16_I32 : AtomicStore<I32, "i32.atomic.store16", 0x1a>;
+defm ATOMIC_STORE8_I64 : AtomicStore<I64, "i64.atomic.store8", 0x1b>;
+defm ATOMIC_STORE16_I64 : AtomicStore<I64, "i64.atomic.store16", 0x1c>;
+defm ATOMIC_STORE32_I64 : AtomicStore<I64, "i64.atomic.store32", 0x1d>;
 
 // Fragments for truncating stores.
 
@@ -302,12 +380,6 @@ def : AStorePatGlobalAddr<i64, trunc_astore_8_64, ATOMIC_STORE8_I64>;
 def : AStorePatGlobalAddr<i64, trunc_astore_16_64, ATOMIC_STORE16_I64>;
 def : AStorePatGlobalAddr<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>;
 
-def : AStorePatExternalSym<i32, atomic_store_8, ATOMIC_STORE8_I32>;
-def : AStorePatExternalSym<i32, atomic_store_16, ATOMIC_STORE16_I32>;
-def : AStorePatExternalSym<i64, trunc_astore_8_64, ATOMIC_STORE8_I64>;
-def : AStorePatExternalSym<i64, trunc_astore_16_64, ATOMIC_STORE16_I64>;
-def : AStorePatExternalSym<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>;
-
 // Truncating stores with just a constant offset
 def : AStorePatOffsetOnly<i32, atomic_store_8, ATOMIC_STORE8_I32>;
 def : AStorePatOffsetOnly<i32, atomic_store_16, ATOMIC_STORE16_I32>;
@@ -321,105 +393,101 @@ def : AStorePatGlobalAddrOffOnly<i64, trunc_astore_8_64, ATOMIC_STORE8_I64>;
 def : AStorePatGlobalAddrOffOnly<i64, trunc_astore_16_64, ATOMIC_STORE16_I64>;
 def : AStorePatGlobalAddrOffOnly<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>;
 
-def : AStorePatExternSymOffOnly<i32, atomic_store_8, ATOMIC_STORE8_I32>;
-def : AStorePatExternSymOffOnly<i32, atomic_store_16, ATOMIC_STORE16_I32>;
-def : AStorePatExternSymOffOnly<i64, trunc_astore_8_64, ATOMIC_STORE8_I64>;
-def : AStorePatExternSymOffOnly<i64, trunc_astore_16_64, ATOMIC_STORE16_I64>;
-def : AStorePatExternSymOffOnly<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>;
-
 } // Predicates = [HasAtomics]
 
 //===----------------------------------------------------------------------===//
 // Atomic binary read-modify-writes
 //===----------------------------------------------------------------------===//
 
-multiclass WebAssemblyBinRMW<WebAssemblyRegClass rc, string Name, int Opcode> {
-  defm "" : I<(outs rc:$dst),
-              (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val),
-              (outs), (ins P2Align:$p2align, offset32_op:$off), [],
-              !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}, $val"),
-              !strconcat(Name, "\t${off}, ${p2align}"), Opcode>;
+multiclass WebAssemblyBinRMW<WebAssemblyRegClass rc, string name,
+                             int atomic_op> {
+  defm "" :
+    ATOMIC_I<(outs rc:$dst),
+             (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val),
+             (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+             !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $val"),
+             !strconcat(name, "\t${off}${p2align}"), atomic_op>;
 }
 
-defm ATOMIC_RMW_ADD_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.add", 0xfe1e>;
-defm ATOMIC_RMW_ADD_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.add", 0xfe1f>;
+defm ATOMIC_RMW_ADD_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.add", 0x1e>;
+defm ATOMIC_RMW_ADD_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.add", 0x1f>;
 defm ATOMIC_RMW8_U_ADD_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw8.add_u", 0xfe20>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw8.add_u", 0x20>;
 defm ATOMIC_RMW16_U_ADD_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw16.add_u", 0xfe21>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw16.add_u", 0x21>;
 defm ATOMIC_RMW8_U_ADD_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw8.add_u", 0xfe22>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw8.add_u", 0x22>;
 defm ATOMIC_RMW16_U_ADD_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw16.add_u", 0xfe23>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw16.add_u", 0x23>;
 defm ATOMIC_RMW32_U_ADD_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw32.add_u", 0xfe24>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw32.add_u", 0x24>;
 
-defm ATOMIC_RMW_SUB_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.sub", 0xfe25>;
-defm ATOMIC_RMW_SUB_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.sub", 0xfe26>;
+defm ATOMIC_RMW_SUB_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.sub", 0x25>;
+defm ATOMIC_RMW_SUB_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.sub", 0x26>;
 defm ATOMIC_RMW8_U_SUB_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw8.sub_u", 0xfe27>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw8.sub_u", 0x27>;
 defm ATOMIC_RMW16_U_SUB_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw16.sub_u", 0xfe28>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw16.sub_u", 0x28>;
 defm ATOMIC_RMW8_U_SUB_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw8.sub_u", 0xfe29>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw8.sub_u", 0x29>;
 defm ATOMIC_RMW16_U_SUB_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw16.sub_u", 0xfe2a>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw16.sub_u", 0x2a>;
 defm ATOMIC_RMW32_U_SUB_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw32.sub_u", 0xfe2b>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw32.sub_u", 0x2b>;
 
-defm ATOMIC_RMW_AND_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.and", 0xfe2c>;
-defm ATOMIC_RMW_AND_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.and", 0xfe2d>;
+defm ATOMIC_RMW_AND_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.and", 0x2c>;
+defm ATOMIC_RMW_AND_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.and", 0x2d>;
 defm ATOMIC_RMW8_U_AND_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw8.and_u", 0xfe2e>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw8.and_u", 0x2e>;
 defm ATOMIC_RMW16_U_AND_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw16.and_u", 0xfe2f>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw16.and_u", 0x2f>;
 defm ATOMIC_RMW8_U_AND_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw8.and_u", 0xfe30>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw8.and_u", 0x30>;
 defm ATOMIC_RMW16_U_AND_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw16.and_u", 0xfe31>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw16.and_u", 0x31>;
 defm ATOMIC_RMW32_U_AND_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw32.and_u", 0xfe32>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw32.and_u", 0x32>;
 
-defm ATOMIC_RMW_OR_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.or", 0xfe33>;
-defm ATOMIC_RMW_OR_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.or", 0xfe34>;
+defm ATOMIC_RMW_OR_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.or", 0x33>;
+defm ATOMIC_RMW_OR_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.or", 0x34>;
 defm ATOMIC_RMW8_U_OR_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw8.or_u", 0xfe35>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw8.or_u", 0x35>;
 defm ATOMIC_RMW16_U_OR_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw16.or_u", 0xfe36>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw16.or_u", 0x36>;
 defm ATOMIC_RMW8_U_OR_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw8.or_u", 0xfe37>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw8.or_u", 0x37>;
 defm ATOMIC_RMW16_U_OR_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw16.or_u", 0xfe38>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw16.or_u", 0x38>;
 defm ATOMIC_RMW32_U_OR_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw32.or_u", 0xfe39>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw32.or_u", 0x39>;
 
-defm ATOMIC_RMW_XOR_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.xor", 0xfe3a>;
-defm ATOMIC_RMW_XOR_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.xor", 0xfe3b>;
+defm ATOMIC_RMW_XOR_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.xor", 0x3a>;
+defm ATOMIC_RMW_XOR_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.xor", 0x3b>;
 defm ATOMIC_RMW8_U_XOR_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw8.xor_u", 0xfe3c>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw8.xor_u", 0x3c>;
 defm ATOMIC_RMW16_U_XOR_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw16.xor_u", 0xfe3d>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw16.xor_u", 0x3d>;
 defm ATOMIC_RMW8_U_XOR_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw8.xor_u", 0xfe3e>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw8.xor_u", 0x3e>;
 defm ATOMIC_RMW16_U_XOR_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw16.xor_u", 0xfe3f>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw16.xor_u", 0x3f>;
 defm ATOMIC_RMW32_U_XOR_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw32.xor_u", 0xfe40>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw32.xor_u", 0x40>;
 
 defm ATOMIC_RMW_XCHG_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw.xchg", 0xfe41>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw.xchg", 0x41>;
 defm ATOMIC_RMW_XCHG_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw.xchg", 0xfe42>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw.xchg", 0x42>;
 defm ATOMIC_RMW8_U_XCHG_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw8.xchg_u", 0xfe43>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw8.xchg_u", 0x43>;
 defm ATOMIC_RMW16_U_XCHG_I32 :
-  WebAssemblyBinRMW<I32, "i32.atomic.rmw16.xchg_u", 0xfe44>;
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw16.xchg_u", 0x44>;
 defm ATOMIC_RMW8_U_XCHG_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw8.xchg_u", 0xfe45>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw8.xchg_u", 0x45>;
 defm ATOMIC_RMW16_U_XCHG_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw16.xchg_u", 0xfe46>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw16.xchg_u", 0x46>;
 defm ATOMIC_RMW32_U_XCHG_I64 :
-  WebAssemblyBinRMW<I64, "i64.atomic.rmw32.xchg_u", 0xfe47>;
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw32.xchg_u", 0x47>;
 
 // Select binary RMWs with no constant offset.
 class BinRMWPatNoOffset<ValueType ty, PatFrag kind, NI inst> :
@@ -437,11 +505,6 @@ class BinRMWPatGlobalAddr<ValueType ty, PatFrag kind, NI inst> :
                 ty:$val)),
       (inst 0, tglobaladdr:$off, I32:$addr, ty:$val)>;
 
-class BinRMWPatExternalSym<ValueType ty, PatFrag kind, NI inst> :
-  Pat<(ty (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)),
-                ty:$val)),
-      (inst 0, texternalsym:$off, I32:$addr, ty:$val)>;
-
 // Select binary RMWs with just a constant offset.
 class BinRMWPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
   Pat<(ty (kind imm:$off, ty:$val)),
@@ -451,10 +514,6 @@ class BinRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> :
   Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$val)),
       (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>;
 
-class BinRMWPatExternSymOffOnly<ValueType ty, PatFrag kind, NI inst> :
-  Pat<(ty (kind (WebAssemblywrapper texternalsym:$off), ty:$val)),
-      (inst 0, texternalsym:$off, (CONST_I32 0), ty:$val)>;
-
 // Patterns for various addressing modes.
 multiclass BinRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32,
                          NI inst_64> {
@@ -469,17 +528,11 @@ multiclass BinRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32,
   def : BinRMWPatGlobalAddr<i32, rmw_32, inst_32>;
   def : BinRMWPatGlobalAddr<i64, rmw_64, inst_64>;
 
-  def : BinRMWPatExternalSym<i32, rmw_32, inst_32>;
-  def : BinRMWPatExternalSym<i64, rmw_64, inst_64>;
-
   def : BinRMWPatOffsetOnly<i32, rmw_32, inst_32>;
   def : BinRMWPatOffsetOnly<i64, rmw_64, inst_64>;
 
   def : BinRMWPatGlobalAddrOffOnly<i32, rmw_32, inst_32>;
   def : BinRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>;
-
-  def : BinRMWPatExternSymOffOnly<i32, rmw_32, inst_32>;
-  def : BinRMWPatExternSymOffOnly<i64, rmw_64, inst_64>;
 }
 
 let Predicates = [HasAtomics] in {
@@ -580,17 +633,6 @@ multiclass BinRMWTruncExtPattern<
   def : BinRMWPatGlobalAddr<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
   def : BinRMWPatGlobalAddr<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
 
-  def : BinRMWPatExternalSym<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
-  def : BinRMWPatExternalSym<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>;
-  def : BinRMWPatExternalSym<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>;
-  def : BinRMWPatExternalSym<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>;
-  def : BinRMWPatExternalSym<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>;
-
-  def : BinRMWPatExternalSym<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>;
-  def : BinRMWPatExternalSym<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>;
-  def : BinRMWPatExternalSym<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
-  def : BinRMWPatExternalSym<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
-
   // Truncating-extending binary RMWs with just a constant offset
   def : BinRMWPatOffsetOnly<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
   def : BinRMWPatOffsetOnly<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>;
@@ -613,17 +655,6 @@ multiclass BinRMWTruncExtPattern<
   def : BinRMWPatGlobalAddrOffOnly<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>;
   def : BinRMWPatGlobalAddrOffOnly<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
   def : BinRMWPatGlobalAddrOffOnly<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
-
-  def : BinRMWPatExternSymOffOnly<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
-  def : BinRMWPatExternSymOffOnly<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>;
-  def : BinRMWPatExternSymOffOnly<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>;
-  def : BinRMWPatExternSymOffOnly<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>;
-  def : BinRMWPatExternSymOffOnly<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>;
-
-  def : BinRMWPatExternSymOffOnly<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>;
-  def : BinRMWPatExternSymOffOnly<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>;
-  def : BinRMWPatExternSymOffOnly<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
-  def : BinRMWPatExternSymOffOnly<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
 }
 
 let Predicates = [HasAtomics] in {
@@ -663,29 +694,31 @@ defm : BinRMWTruncExtPattern<
 // Consider adding a pass after instruction selection that optimizes this case
 // if it is frequent.
 
-multiclass WebAssemblyTerRMW<WebAssemblyRegClass rc, string Name, int Opcode> {
-  defm "" : I<(outs rc:$dst),
-              (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$exp,
-                   rc:$new),
-              (outs), (ins P2Align:$p2align, offset32_op:$off), [],
-              !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new"),
-              !strconcat(Name, "\t${off}, ${p2align}"), Opcode>;
+multiclass WebAssemblyTerRMW<WebAssemblyRegClass rc, string name,
+                             int atomic_op> {
+  defm "" :
+    ATOMIC_I<(outs rc:$dst),
+             (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$exp,
+                  rc:$new_),
+             (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+             !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new_"),
+             !strconcat(name, "\t${off}${p2align}"), atomic_op>;
 }
 
 defm ATOMIC_RMW_CMPXCHG_I32 :
-  WebAssemblyTerRMW<I32, "i32.atomic.rmw.cmpxchg", 0xfe48>;
+  WebAssemblyTerRMW<I32, "i32.atomic.rmw.cmpxchg", 0x48>;
 defm ATOMIC_RMW_CMPXCHG_I64 :
-  WebAssemblyTerRMW<I64, "i64.atomic.rmw.cmpxchg", 0xfe49>;
+  WebAssemblyTerRMW<I64, "i64.atomic.rmw.cmpxchg", 0x49>;
 defm ATOMIC_RMW8_U_CMPXCHG_I32 :
-  WebAssemblyTerRMW<I32, "i32.atomic.rmw8.cmpxchg_u", 0xfe4a>;
+  WebAssemblyTerRMW<I32, "i32.atomic.rmw8.cmpxchg_u", 0x4a>;
 defm ATOMIC_RMW16_U_CMPXCHG_I32 :
-  WebAssemblyTerRMW<I32, "i32.atomic.rmw16.cmpxchg_u", 0xfe4b>;
+  WebAssemblyTerRMW<I32, "i32.atomic.rmw16.cmpxchg_u", 0x4b>;
 defm ATOMIC_RMW8_U_CMPXCHG_I64 :
-  WebAssemblyTerRMW<I64, "i64.atomic.rmw8.cmpxchg_u", 0xfe4c>;
+  WebAssemblyTerRMW<I64, "i64.atomic.rmw8.cmpxchg_u", 0x4c>;
 defm ATOMIC_RMW16_U_CMPXCHG_I64 :
-  WebAssemblyTerRMW<I64, "i64.atomic.rmw16.cmpxchg_u", 0xfe4d>;
+  WebAssemblyTerRMW<I64, "i64.atomic.rmw16.cmpxchg_u", 0x4d>;
 defm ATOMIC_RMW32_U_CMPXCHG_I64 :
-  WebAssemblyTerRMW<I64, "i64.atomic.rmw32.cmpxchg_u", 0xfe4e>;
+  WebAssemblyTerRMW<I64, "i64.atomic.rmw32.cmpxchg_u", 0x4e>;
 
 // Select ternary RMWs with no constant offset.
 class TerRMWPatNoOffset<ValueType ty, PatFrag kind, NI inst> :
@@ -704,11 +737,6 @@ class TerRMWPatGlobalAddr<ValueType ty, PatFrag kind, NI inst> :
                 ty:$exp, ty:$new)),
       (inst 0, tglobaladdr:$off, I32:$addr, ty:$exp, ty:$new)>;
 
-class TerRMWPatExternalSym<ValueType ty, PatFrag kind, NI inst> :
-  Pat<(ty (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)),
-                ty:$exp, ty:$new)),
-      (inst 0, texternalsym:$off, I32:$addr, ty:$exp, ty:$new)>;
-
 // Select ternary RMWs with just a constant offset.
 class TerRMWPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
   Pat<(ty (kind imm:$off, ty:$exp, ty:$new)),
@@ -718,10 +746,6 @@ class TerRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> :
   Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)),
       (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, ty:$new)>;
 
-class TerRMWPatExternSymOffOnly<ValueType ty, PatFrag kind, NI inst> :
-  Pat<(ty (kind (WebAssemblywrapper texternalsym:$off), ty:$exp, ty:$new)),
-      (inst 0, texternalsym:$off, (CONST_I32 0), ty:$exp, ty:$new)>;
-
 // Patterns for various addressing modes.
 multiclass TerRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32,
                          NI inst_64> {
@@ -736,23 +760,16 @@ multiclass TerRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32,
   def : TerRMWPatGlobalAddr<i32, rmw_32, inst_32>;
   def : TerRMWPatGlobalAddr<i64, rmw_64, inst_64>;
 
-  def : TerRMWPatExternalSym<i32, rmw_32, inst_32>;
-  def : TerRMWPatExternalSym<i64, rmw_64, inst_64>;
-
   def : TerRMWPatOffsetOnly<i32, rmw_32, inst_32>;
   def : TerRMWPatOffsetOnly<i64, rmw_64, inst_64>;
 
   def : TerRMWPatGlobalAddrOffOnly<i32, rmw_32, inst_32>;
   def : TerRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>;
-
-  def : TerRMWPatExternSymOffOnly<i32, rmw_32, inst_32>;
-  def : TerRMWPatExternSymOffOnly<i64, rmw_64, inst_64>;
 }
 
-let Predicates = [HasAtomics] in {
+let Predicates = [HasAtomics] in
 defm : TerRMWPattern<atomic_cmp_swap_32, atomic_cmp_swap_64,
                      ATOMIC_RMW_CMPXCHG_I32, ATOMIC_RMW_CMPXCHG_I64>;
-} // Predicates = [HasAtomics]
 
 // Truncating & zero-extending ternary RMW patterns.
 // DAG legalization & optimization before instruction selection may introduce
@@ -840,17 +857,6 @@ multiclass TerRMWTruncExtPattern<
   def : TerRMWPatGlobalAddr<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
   def : TerRMWPatGlobalAddr<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
 
-  def : TerRMWPatExternalSym<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
-  def : TerRMWPatExternalSym<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
-  def : TerRMWPatExternalSym<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
-  def : TerRMWPatExternalSym<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
-  def : TerRMWPatExternalSym<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
-
-  def : TerRMWPatExternalSym<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
-  def : TerRMWPatExternalSym<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
-  def : TerRMWPatExternalSym<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
-  def : TerRMWPatExternalSym<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
-
   // Truncating-extending ternary RMWs with just a constant offset
   def : TerRMWPatOffsetOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
   def : TerRMWPatOffsetOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
@@ -873,147 +879,21 @@ multiclass TerRMWTruncExtPattern<
   def : TerRMWPatGlobalAddrOffOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
   def : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
   def : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
-
-  def : TerRMWPatExternSymOffOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
-  def : TerRMWPatExternSymOffOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
-  def : TerRMWPatExternSymOffOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
-  def : TerRMWPatExternSymOffOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
-  def : TerRMWPatExternSymOffOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
-
-  def : TerRMWPatExternSymOffOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
-  def : TerRMWPatExternSymOffOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
-  def : TerRMWPatExternSymOffOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
-  def : TerRMWPatExternSymOffOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
 }
 
-let Predicates = [HasAtomics] in {
+let Predicates = [HasAtomics] in
 defm : TerRMWTruncExtPattern<
   atomic_cmp_swap_8, atomic_cmp_swap_16, atomic_cmp_swap_32, atomic_cmp_swap_64,
   ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32,
   ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64,
   ATOMIC_RMW32_U_CMPXCHG_I64>;
-}
 
 //===----------------------------------------------------------------------===//
-// Atomic wait / notify
+// Atomic fences
 //===----------------------------------------------------------------------===//
 
-let hasSideEffects = 1 in {
-defm ATOMIC_NOTIFY :
-  I<(outs I32:$dst),
-    (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$count),
-    (outs), (ins P2Align:$p2align, offset32_op:$off), [],
-    "atomic.notify \t$dst, ${off}(${addr})${p2align}, $count",
-    "atomic.notify \t${off}, ${p2align}", 0xfe00>;
-let mayLoad = 1 in {
-defm ATOMIC_WAIT_I32 :
-  I<(outs I32:$dst),
-    (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$exp, I64:$timeout),
-    (outs), (ins P2Align:$p2align, offset32_op:$off), [],
-    "i32.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
-    "i32.atomic.wait \t${off}, ${p2align}", 0xfe01>;
-defm ATOMIC_WAIT_I64 :
-  I<(outs I32:$dst),
-    (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I64:$exp, I64:$timeout),
-    (outs), (ins P2Align:$p2align, offset32_op:$off), [],
-    "i64.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
-    "i64.atomic.wait \t${off}, ${p2align}", 0xfe02>;
-} // mayLoad = 1
-} // hasSideEffects = 1
-
-let Predicates = [HasAtomics] in {
-// Select notifys with no constant offset.
-class NotifyPatNoOffset<Intrinsic kind> :
-  Pat<(i32 (kind I32:$addr, I32:$count)),
-      (ATOMIC_NOTIFY 0, 0, I32:$addr, I32:$count)>;
-def : NotifyPatNoOffset<int_wasm_atomic_notify>;
-
-// Select notifys with a constant offset.
-
-// Pattern with address + immediate offset
-class NotifyPatImmOff<Intrinsic kind, PatFrag operand> :
-  Pat<(i32 (kind (operand I32:$addr, imm:$off), I32:$count)),
-      (ATOMIC_NOTIFY 0, imm:$off, I32:$addr, I32:$count)>;
-def : NotifyPatImmOff<int_wasm_atomic_notify, regPlusImm>;
-def : NotifyPatImmOff<int_wasm_atomic_notify, or_is_add>;
-
-class NotifyPatGlobalAddr<Intrinsic kind> :
-  Pat<(i32 (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)),
-                 I32:$count)),
-      (ATOMIC_NOTIFY 0, tglobaladdr:$off, I32:$addr, I32:$count)>;
-def : NotifyPatGlobalAddr<int_wasm_atomic_notify>;
-
-class NotifyPatExternalSym<Intrinsic kind> :
-  Pat<(i32 (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)),
-                 I32:$count)),
-      (ATOMIC_NOTIFY 0, texternalsym:$off, I32:$addr, I32:$count)>;
-def : NotifyPatExternalSym<int_wasm_atomic_notify>;
-
-// Select notifys with just a constant offset.
-class NotifyPatOffsetOnly<Intrinsic kind> :
-  Pat<(i32 (kind imm:$off, I32:$count)),
-      (ATOMIC_NOTIFY 0, imm:$off, (CONST_I32 0), I32:$count)>;
-def : NotifyPatOffsetOnly<int_wasm_atomic_notify>;
-
-class NotifyPatGlobalAddrOffOnly<Intrinsic kind> :
-  Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), I32:$count)),
-      (ATOMIC_NOTIFY 0, tglobaladdr:$off, (CONST_I32 0), I32:$count)>;
-def : NotifyPatGlobalAddrOffOnly<int_wasm_atomic_notify>;
-
-class NotifyPatExternSymOffOnly<Intrinsic kind> :
-  Pat<(i32 (kind (WebAssemblywrapper texternalsym:$off), I32:$count)),
-      (ATOMIC_NOTIFY 0, texternalsym:$off, (CONST_I32 0), I32:$count)>;
-def : NotifyPatExternSymOffOnly<int_wasm_atomic_notify>;
-
-// Select waits with no constant offset.
-class WaitPatNoOffset<ValueType ty, Intrinsic kind, NI inst> :
-  Pat<(i32 (kind I32:$addr, ty:$exp, I64:$timeout)),
-      (inst 0, 0, I32:$addr, ty:$exp, I64:$timeout)>;
-def : WaitPatNoOffset<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
-def : WaitPatNoOffset<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
-
-// Select waits with a constant offset.
-
-// Pattern with address + immediate offset
-class WaitPatImmOff<ValueType ty, Intrinsic kind, PatFrag operand, NI inst> :
-  Pat<(i32 (kind (operand I32:$addr, imm:$off), ty:$exp, I64:$timeout)),
-      (inst 0, imm:$off, I32:$addr, ty:$exp, I64:$timeout)>;
-def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, regPlusImm, ATOMIC_WAIT_I32>;
-def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, or_is_add, ATOMIC_WAIT_I32>;
-def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, regPlusImm, ATOMIC_WAIT_I64>;
-def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, or_is_add, ATOMIC_WAIT_I64>;
-
-class WaitPatGlobalAddr<ValueType ty, Intrinsic kind, NI inst> :
-  Pat<(i32 (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)),
-                 ty:$exp, I64:$timeout)),
-      (inst 0, tglobaladdr:$off, I32:$addr, ty:$exp, I64:$timeout)>;
-def : WaitPatGlobalAddr<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
-def : WaitPatGlobalAddr<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
-
-class WaitPatExternalSym<ValueType ty, Intrinsic kind, NI inst> :
-  Pat<(i32 (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)),
-                 ty:$exp, I64:$timeout)),
-      (inst 0, texternalsym:$off, I32:$addr, ty:$exp, I64:$timeout)>;
-def : WaitPatExternalSym<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
-def : WaitPatExternalSym<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
-
-// Select wait_i32, ATOMIC_WAIT_I32s with just a constant offset.
-class WaitPatOffsetOnly<ValueType ty, Intrinsic kind, NI inst> :
-  Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)),
-      (inst 0, imm:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
-def : WaitPatOffsetOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
-def : WaitPatOffsetOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
-
-class WaitPatGlobalAddrOffOnly<ValueType ty, Intrinsic kind, NI inst> :
-  Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, I64:$timeout)),
-      (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
-def : WaitPatGlobalAddrOffOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
-def : WaitPatGlobalAddrOffOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
-
-class WaitPatExternSymOffOnly<ValueType ty, Intrinsic kind, NI inst> :
-  Pat<(i32 (kind (WebAssemblywrapper texternalsym:$off), ty:$exp,
-                 I64:$timeout)),
-      (inst 0, texternalsym:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
-def : WaitPatExternSymOffOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
-def : WaitPatExternSymOffOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
-} // Predicates = [HasAtomics]
+// A compiler fence instruction that prevents reordering of instructions.
+let Defs = [ARGUMENTS] in {
+let isPseudo = 1, hasSideEffects = 1 in
+defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">;
+} // Defs = [ARGUMENTS]
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td
new file mode 100644
index 000000000000..f4352e3d12ec
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td
@@ -0,0 +1,71 @@
+// WebAssemblyInstrBulkMemory.td - bulk memory codegen support --*- tablegen -*-
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// WebAssembly bulk memory codegen constructs.
+///
+//===----------------------------------------------------------------------===//
+
+// Instruction requiring HasBulkMemory and the bulk memory prefix byte
+multiclass BULK_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
+                  list<dag> pattern_r, string asmstr_r = "",
+                  string asmstr_s = "", bits<32> simdop = -1> {
+  defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
+              !or(0xfc00, !and(0xff, simdop))>,
+            Requires<[HasBulkMemory]>;
+}
+
+// Bespoke types and nodes for bulk memory ops
+def wasm_memcpy_t : SDTypeProfile<0, 5,
+  [SDTCisInt<0>, SDTCisInt<1>, SDTCisPtrTy<2>, SDTCisPtrTy<3>, SDTCisInt<4>]
+>;
+def wasm_memcpy : SDNode<"WebAssemblyISD::MEMORY_COPY", wasm_memcpy_t,
+                         [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+
+def wasm_memset_t : SDTypeProfile<0, 4,
+  [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>, SDTCisInt<3>]
+>;
+def wasm_memset : SDNode<"WebAssemblyISD::MEMORY_FILL", wasm_memset_t,
+                         [SDNPHasChain, SDNPMayStore]>;
+
+let mayStore = 1, hasSideEffects = 1 in
+defm MEMORY_INIT :
+  BULK_I<(outs),
+         (ins i32imm_op:$seg, i32imm_op:$idx, I32:$dest,
+              I32:$offset, I32:$size),
+         (outs), (ins i32imm_op:$seg, i32imm_op:$idx),
+         [(int_wasm_memory_init (i32 imm:$seg), (i32 imm:$idx), I32:$dest,
+            I32:$offset, I32:$size
+          )],
+         "memory.init\t$seg, $idx, $dest, $offset, $size",
+         "memory.init\t$seg, $idx", 0x08>;
+
+let hasSideEffects = 1 in
+defm DATA_DROP :
+  BULK_I<(outs), (ins i32imm_op:$seg), (outs), (ins i32imm_op:$seg),
+         [(int_wasm_data_drop (i32 imm:$seg))],
+         "data.drop\t$seg", "data.drop\t$seg", 0x09>;
+
+let mayLoad = 1, mayStore = 1 in
+defm MEMORY_COPY :
+  BULK_I<(outs), (ins i32imm_op:$src_idx, i32imm_op:$dst_idx,
+                      I32:$dst, I32:$src, I32:$len),
+         (outs), (ins i32imm_op:$src_idx, i32imm_op:$dst_idx),
+         [(wasm_memcpy (i32 imm:$src_idx), (i32 imm:$dst_idx),
+           I32:$dst, I32:$src, I32:$len
+         )],
+         "memory.copy\t$src_idx, $dst_idx, $dst, $src, $len",
+         "memory.copy\t$src_idx, $dst_idx", 0x0a>;
+
+let mayStore = 1 in
+defm MEMORY_FILL :
+  BULK_I<(outs), (ins i32imm_op:$idx, I32:$dst, I32:$value, I32:$size),
+         (outs), (ins i32imm_op:$idx),
+         [(wasm_memset (i32 imm:$idx), I32:$dst, I32:$value, I32:$size)],
+         "memory.fill\t$idx, $dst, $value, $size",
+         "memory.fill\t$idx", 0x0b>;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/lib/Target/WebAssembly/WebAssemblyInstrCall.td
index 07839b790114..703c15d58c93 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrCall.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrCall.td
@@ -1,9 +1,8 @@
 //===- WebAssemblyInstrCall.td-WebAssembly Call codegen support -*- tablegen -*-
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -22,109 +21,112 @@ defm ADJCALLSTACKDOWN : NRI<(outs), (ins i32imm:$amt, i32imm:$amt2),
                             [(WebAssemblycallseq_start timm:$amt, timm:$amt2)]>;
 defm ADJCALLSTACKUP : NRI<(outs), (ins i32imm:$amt, i32imm:$amt2),
                           [(WebAssemblycallseq_end timm:$amt, timm:$amt2)]>;
-} // isCodeGenOnly = 1
+} // Uses = [SP32, SP64], Defs = [SP32, SP64], isCodeGenOnly = 1
 
-multiclass CALL<WebAssemblyRegClass vt, string prefix> {
-  defm CALL_#vt : I<(outs vt:$dst), (ins function32_op:$callee, variable_ops),
-                    (outs), (ins function32_op:$callee),
-                    [(set vt:$dst, (WebAssemblycall1 (i32 imm:$callee)))],
-                    !strconcat(prefix, "call\t$dst, $callee"),
-                    !strconcat(prefix, "call\t$callee"),
-                    0x10>;
+multiclass CALL<ValueType vt, WebAssemblyRegClass rt, string prefix,
+                list<Predicate> preds = []> {
+  defm CALL_#vt :
+    I<(outs rt:$dst), (ins function32_op:$callee, variable_ops),
+      (outs), (ins function32_op:$callee),
+      [(set (vt rt:$dst), (WebAssemblycall1 (i32 imm:$callee)))],
+      !strconcat(prefix, "call\t$dst, $callee"),
+      !strconcat(prefix, "call\t$callee"),
+      0x10>,
+    Requires<preds>;
 
-  let isCodeGenOnly = 1 in {
-    defm PCALL_INDIRECT_#vt : I<(outs vt:$dst), (ins I32:$callee, variable_ops),
-                                (outs), (ins I32:$callee),
-                               [(set vt:$dst, (WebAssemblycall1 I32:$callee))],
-                               "PSEUDO CALL INDIRECT\t$callee",
-                               "PSEUDO CALL INDIRECT\t$callee">;
-  } // isCodeGenOnly = 1
+  let isCodeGenOnly = 1 in
+  defm PCALL_INDIRECT_#vt :
+    I<(outs rt:$dst), (ins I32:$callee, variable_ops),
+      (outs), (ins I32:$callee),
+      [(set (vt rt:$dst), (WebAssemblycall1 I32:$callee))],
+      "PSEUDO CALL INDIRECT\t$callee",
+      "PSEUDO CALL INDIRECT\t$callee">,
+    Requires<preds>;
 
-  defm CALL_INDIRECT_#vt : I<(outs vt:$dst),
-                             (ins TypeIndex:$type, i32imm:$flags, variable_ops),
-                             (outs), (ins TypeIndex:$type, i32imm:$flags),
-                             [],
-                             !strconcat(prefix, "call_indirect\t$dst"),
-                             !strconcat(prefix, "call_indirect\t$type"),
-                             0x11>;
+  defm CALL_INDIRECT_#vt :
+    I<(outs rt:$dst),
+      (ins TypeIndex:$type, i32imm:$flags, variable_ops),
+      (outs), (ins TypeIndex:$type, i32imm:$flags),
+      [],
+      !strconcat(prefix, "call_indirect\t$dst"),
+      !strconcat(prefix, "call_indirect\t$type"),
+      0x11>,
+    Requires<preds>;
 }
 
-multiclass SIMD_CALL<ValueType vt, string prefix> {
+let Uses = [SP32, SP64], isCall = 1 in {
+defm "" : CALL<i32, I32, "i32.">;
+defm "" : CALL<i64, I64, "i64.">;
+defm "" : CALL<f32, F32, "f32.">;
+defm "" : CALL<f64, F64, "f64.">;
+defm "" : CALL<exnref, EXNREF, "exnref.", [HasExceptionHandling]>;
+defm "" : CALL<v16i8, V128, "v128.", [HasSIMD128]>;
+defm "" : CALL<v8i16, V128, "v128.", [HasSIMD128]>;
+defm "" : CALL<v4i32, V128, "v128.", [HasSIMD128]>;
+defm "" : CALL<v2i64, V128, "v128.", [HasSIMD128]>;
+defm "" : CALL<v4f32, V128, "v128.", [HasSIMD128]>;
+defm "" : CALL<v2f64, V128, "v128.", [HasSIMD128]>;
 
-  defm CALL_#vt : I<(outs V128:$dst), (ins function32_op:$callee, variable_ops),
-                    (outs), (ins function32_op:$callee),
-                    [(set (vt V128:$dst),
-                      (WebAssemblycall1 (i32 imm:$callee)))],
-                    !strconcat(prefix, "call\t$dst, $callee"),
-                    !strconcat(prefix, "call\t$callee"),
-                    0x10>,
-                  Requires<[HasSIMD128]>;
+let IsCanonical = 1 in {
+defm CALL_VOID :
+  I<(outs), (ins function32_op:$callee, variable_ops),
+    (outs), (ins function32_op:$callee),
+    [(WebAssemblycall0 (i32 imm:$callee))],
+    "call    \t$callee", "call\t$callee", 0x10>;
 
-  let isCodeGenOnly = 1 in {
-    defm PCALL_INDIRECT_#vt : I<(outs V128:$dst),
-                                (ins I32:$callee, variable_ops),
-                                (outs), (ins I32:$callee),
-                                [(set (vt V128:$dst),
-                                      (WebAssemblycall1 I32:$callee))],
-                                "PSEUDO CALL INDIRECT\t$callee",
-                                "PSEUDO CALL INDIRECT\t$callee">,
-                              Requires<[HasSIMD128]>;
-  } // isCodeGenOnly = 1
+let isReturn = 1 in
+defm RET_CALL :
+  I<(outs), (ins function32_op:$callee, variable_ops),
+    (outs), (ins function32_op:$callee),
+    [(WebAssemblyretcall (i32 imm:$callee))],
+    "return_call    \t$callee", "return_call\t$callee", 0x12>,
+  Requires<[HasTailCall]>;
 
-  defm CALL_INDIRECT_#vt : I<(outs V128:$dst),
-                             (ins TypeIndex:$type, i32imm:$flags, variable_ops),
-                             (outs), (ins TypeIndex:$type, i32imm:$flags),
-                             [],
-                             !strconcat(prefix, "call_indirect\t$dst"),
-                             !strconcat(prefix, "call_indirect\t$type"),
-                             0x11>,
-                           Requires<[HasSIMD128]>;
-}
+let isCodeGenOnly = 1 in
+defm PCALL_INDIRECT_VOID :
+  I<(outs), (ins I32:$callee, variable_ops),
+    (outs), (ins I32:$callee),
+    [(WebAssemblycall0 I32:$callee)],
+    "PSEUDO CALL INDIRECT\t$callee",
+    "PSEUDO CALL INDIRECT\t$callee">;
 
-let Uses = [SP32, SP64], isCall = 1 in {
-  defm "" : CALL<I32, "i32.">;
-  defm "" : CALL<I64, "i64.">;
-  defm "" : CALL<F32, "f32.">;
-  defm "" : CALL<F64, "f64.">;
-  defm "" : CALL<EXCEPT_REF, "except_ref.">;
-  defm "" : SIMD_CALL<v16i8, "v128.">;
-  defm "" : SIMD_CALL<v8i16, "v128.">;
-  defm "" : SIMD_CALL<v4i32, "v128.">;
-  defm "" : SIMD_CALL<v2i64, "v128.">;
-  defm "" : SIMD_CALL<v4f32, "v128.">;
-  defm "" : SIMD_CALL<v2f64, "v128.">;
+defm CALL_INDIRECT_VOID :
+  I<(outs), (ins TypeIndex:$type, i32imm:$flags, variable_ops),
+    (outs), (ins TypeIndex:$type, i32imm:$flags),
+    [],
+    "call_indirect\t", "call_indirect\t$type",
+    0x11>;
 
-  defm CALL_VOID : I<(outs), (ins function32_op:$callee, variable_ops),
-                     (outs), (ins function32_op:$callee),
-                     [(WebAssemblycall0 (i32 imm:$callee))],
-                     "call    \t$callee", "call\t$callee", 0x10>;
+let isReturn = 1 in
+defm RET_CALL_INDIRECT :
+  I<(outs), (ins TypeIndex:$type, i32imm:$flags, variable_ops),
+    (outs), (ins TypeIndex:$type, i32imm:$flags),
+    [],
+    "return_call_indirect\t", "return_call_indirect\t$type",
+    0x13>,
+  Requires<[HasTailCall]>;
 
-  let isCodeGenOnly = 1 in {
-    defm PCALL_INDIRECT_VOID : I<(outs), (ins I32:$callee, variable_ops),
-                                 (outs), (ins I32:$callee),
-                                 [(WebAssemblycall0 I32:$callee)],
-                                 "PSEUDO CALL INDIRECT\t$callee",
-                                 "PSEUDO CALL INDIRECT\t$callee">;
-  } // isCodeGenOnly = 1
+let isCodeGenOnly = 1, isReturn = 1 in
+defm PRET_CALL_INDIRECT:
+    I<(outs), (ins I32:$callee, variable_ops),
+      (outs), (ins I32:$callee),
+      [(WebAssemblyretcall I32:$callee)],
+      "PSEUDO RET_CALL INDIRECT\t$callee",
+      "PSEUDO RET_CALL INDIRECT\t$callee">,
+    Requires<[HasTailCall]>;
 
-  defm CALL_INDIRECT_VOID : I<(outs),
-                              (ins TypeIndex:$type, i32imm:$flags,
-                                variable_ops),
-                              (outs), (ins TypeIndex:$type, i32imm:$flags),
-                              [],
-                              "call_indirect\t", "call_indirect\t$type",
-                              0x11>;
+} // IsCanonical = 1
 } // Uses = [SP32,SP64], isCall = 1
 
 // Patterns for matching a direct call to a global address.
 def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
-          (CALL_I32 tglobaladdr:$callee)>;
+          (CALL_i32 tglobaladdr:$callee)>;
 def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
-          (CALL_I64 tglobaladdr:$callee)>;
+          (CALL_i64 tglobaladdr:$callee)>;
 def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
-          (CALL_F32 tglobaladdr:$callee)>;
+          (CALL_f32 tglobaladdr:$callee)>;
 def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
-          (CALL_F64 tglobaladdr:$callee)>;
+          (CALL_f64 tglobaladdr:$callee)>;
 def : Pat<(v16i8 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
           (CALL_v16i8 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
 def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
@@ -137,21 +139,23 @@ def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
           (CALL_v4f32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
 def : Pat<(v2f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
           (CALL_v2f64 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
-def : Pat<(ExceptRef
-           (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
-          (CALL_EXCEPT_REF tglobaladdr:$callee)>;
+def : Pat<(exnref (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+          (CALL_exnref tglobaladdr:$callee)>,
+      Requires<[HasExceptionHandling]>;
 def : Pat<(WebAssemblycall0 (WebAssemblywrapper tglobaladdr:$callee)),
           (CALL_VOID tglobaladdr:$callee)>;
+def : Pat<(WebAssemblyretcall (WebAssemblywrapper tglobaladdr:$callee)),
+          (RET_CALL tglobaladdr:$callee)>, Requires<[HasTailCall]>;
 
 // Patterns for matching a direct call to an external symbol.
 def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
-          (CALL_I32 texternalsym:$callee)>;
+          (CALL_i32 texternalsym:$callee)>;
 def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
-          (CALL_I64 texternalsym:$callee)>;
+          (CALL_i64 texternalsym:$callee)>;
 def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
-          (CALL_F32 texternalsym:$callee)>;
+          (CALL_f32 texternalsym:$callee)>;
 def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
-          (CALL_F64 texternalsym:$callee)>;
+          (CALL_f64 texternalsym:$callee)>;
 def : Pat<(v16i8 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
           (CALL_v16i8 texternalsym:$callee)>, Requires<[HasSIMD128]>;
 def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
@@ -164,8 +168,10 @@ def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
           (CALL_v4f32 texternalsym:$callee)>, Requires<[HasSIMD128]>;
 def : Pat<(v2f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
           (CALL_v2f64 texternalsym:$callee)>, Requires<[HasSIMD128]>;
-def : Pat<(ExceptRef
-           (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
-          (CALL_EXCEPT_REF texternalsym:$callee)>;
+def : Pat<(exnref (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+          (CALL_exnref texternalsym:$callee)>,
+      Requires<[HasExceptionHandling]>;
 def : Pat<(WebAssemblycall0 (WebAssemblywrapper texternalsym:$callee)),
           (CALL_VOID texternalsym:$callee)>;
+def : Pat<(WebAssemblyretcall (WebAssemblywrapper texternalsym:$callee)),
+          (RET_CALL texternalsym:$callee)>, Requires<[HasTailCall]>;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index 7eb6cbf4d249..1870c5bc34b0 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -1,9 +1,8 @@
 //===- WebAssemblyInstrControl.td-WebAssembly control-flow ------*- tablegen -*-
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -21,11 +20,10 @@ defm BR_IF : I<(outs), (ins bb_op:$dst, I32:$cond),
 let isCodeGenOnly = 1 in
 defm BR_UNLESS : I<(outs), (ins bb_op:$dst, I32:$cond),
                    (outs), (ins bb_op:$dst), []>;
-let isBarrier = 1 in {
+let isBarrier = 1 in
 defm BR   : NRI<(outs), (ins bb_op:$dst),
                 [(br bb:$dst)],
                 "br      \t$dst", 0x0c>;
-} // isBarrier = 1
 } // isBranch = 1, isTerminator = 1, hasCtrlDep = 1
 
 def : Pat<(brcond (i32 (setne I32:$cond, 0)), bb:$dst),
@@ -36,14 +34,11 @@ def : Pat<(brcond (i32 (seteq I32:$cond, 0)), bb:$dst),
 // A list of branch targets enclosed in {} and separated by comma.
 // Used by br_table only.
 def BrListAsmOperand : AsmOperandClass { let Name = "BrList"; }
-let OperandNamespace = "WebAssembly" in {
-let OperandType = "OPERAND_BRLIST" in {
+let OperandNamespace = "WebAssembly", OperandType = "OPERAND_BRLIST" in
 def brlist : Operand<i32> {
   let ParserMatchClass = BrListAsmOperand;
   let PrintMethod = "printBrList";
 }
-} // OPERAND_BRLIST
-} // OperandNamespace = "WebAssembly"
 
 // TODO: SelectionDAG's lowering insists on using a pointer as the index for
 // jump tables, so in practice we don't ever use BR_TABLE_I64 in wasm32 mode
@@ -82,6 +77,9 @@ defm ELSE : NRI<(outs), (ins), [], "else", 0x05>;
 defm END_BLOCK : NRI<(outs), (ins), [], "end_block", 0x0b>;
 defm END_LOOP  : NRI<(outs), (ins), [], "end_loop", 0x0b>;
 defm END_IF    : NRI<(outs), (ins), [], "end_if", 0x0b>;
+// Generic instruction, for disassembler.
+let IsCanonical = 1 in
+defm END       : NRI<(outs), (ins), [], "end", 0x0b>;
 let isTerminator = 1, isBarrier = 1 in
 defm END_FUNCTION : NRI<(outs), (ins), [], "end_function", 0x0b>;
 } // Uses = [VALUE_STACK], Defs = [VALUE_STACK]
@@ -106,7 +104,7 @@ multiclass SIMD_RETURN<ValueType vt> {
   let isCodeGenOnly = 1 in
   defm FALLTHROUGH_RETURN_#vt : I<(outs), (ins V128:$val), (outs), (ins),
                                   []>,
-                                Requires<[HasSIMD128]>;
+                                  Requires<[HasSIMD128]>;
 }
 
 let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
@@ -116,7 +114,7 @@ let isReturn = 1 in {
   defm "": RETURN<I64>;
   defm "": RETURN<F32>;
   defm "": RETURN<F64>;
-  defm "": RETURN<EXCEPT_REF>;
+  defm "": RETURN<EXNREF>;
   defm "": SIMD_RETURN<v16i8>;
   defm "": SIMD_RETURN<v8i16>;
   defm "": SIMD_RETURN<v4i32>;
@@ -142,23 +140,17 @@ let Predicates = [HasExceptionHandling] in {
 
 // Throwing an exception: throw / rethrow
 let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
-defm THROW_I32 : I<(outs), (ins event_op:$tag, I32:$val),
-                   (outs), (ins event_op:$tag),
-                   [(WebAssemblythrow (WebAssemblywrapper texternalsym:$tag),
-                                      I32:$val)],
-                   "throw   \t$tag, $val", "throw   \t$tag",
-                   0x08>;
-defm THROW_I64 : I<(outs), (ins event_op:$tag, I64:$val),
-                   (outs), (ins event_op:$tag),
-                   [(WebAssemblythrow (WebAssemblywrapper texternalsym:$tag),
-                                      I64:$val)],
-                   "throw   \t$tag, $val", "throw   \t$tag",
-                   0x08>;
-defm RETHROW : NRI<(outs), (ins bb_op:$dst), [], "rethrow \t$dst", 0x09>;
-let isCodeGenOnly = 1 in
-// This is used when the destination for rethrow is the caller function. This
-// will be converted to a rethrow in CFGStackify.
-defm RETHROW_TO_CALLER : NRI<(outs), (ins), [], "rethrow">;
+defm THROW : I<(outs), (ins event_op:$tag, variable_ops),
+               (outs), (ins event_op:$tag),
+               [(WebAssemblythrow (WebAssemblywrapper texternalsym:$tag))],
+               "throw   \t$tag", "throw   \t$tag", 0x08>;
+defm RETHROW : I<(outs), (ins EXNREF:$exn), (outs), (ins), [],
+                 "rethrow \t$exn", "rethrow", 0x09>;
+// Pseudo instruction to be the lowering target of int_wasm_rethrow_in_catch
+// intrinsic. Will be converted to the real rethrow instruction later.
+let isPseudo = 1 in
+defm RETHROW_IN_CATCH : NRI<(outs), (ins), [(int_wasm_rethrow_in_catch)],
+                            "rethrow_in_catch", 0>;
 } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
 
 // Region within which an exception is caught: try / end_try
@@ -167,24 +159,33 @@ defm TRY     : NRI<(outs), (ins Signature:$sig), [], "try     \t$sig", 0x06>;
 defm END_TRY : NRI<(outs), (ins), [], "end_try", 0x0b>;
 } // Uses = [VALUE_STACK], Defs = [VALUE_STACK]
 
-// Catching an exception: catch / catch_all
-let hasCtrlDep = 1, hasSideEffects = 1 in {
-defm CATCH_I32 : I<(outs I32:$dst), (ins i32imm:$tag),
-                   (outs), (ins i32imm:$tag),
-                   [(set I32:$dst, (int_wasm_catch imm:$tag))],
-                   "i32.catch   \t$dst, $tag", "i32.catch   \t$tag", 0x07>;
-defm CATCH_I64 : I<(outs I64:$dst), (ins i32imm:$tag),
-                   (outs), (ins i32imm:$tag),
-                   [(set I64:$dst, (int_wasm_catch imm:$tag))],
-                   "i64.catch   \t$dst, $tag", "i64.catch   \t$tag", 0x07>;
-defm CATCH_ALL : NRI<(outs), (ins), [], "catch_all", 0x05>;
-}
+// Catching an exception: catch / extract_exception
+let hasCtrlDep = 1, hasSideEffects = 1 in
+defm CATCH : I<(outs EXNREF:$dst), (ins), (outs), (ins), [],
+               "catch   \t$dst", "catch", 0x07>;
+
+// Querying / extracing exception: br_on_exn
+// br_on_exn queries an exnref to see if it matches the corresponding exception
+// tag index. If true it branches to the given label and pushes the
+// corresponding argument values of the exception onto the stack.
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in
+defm BR_ON_EXN : I<(outs), (ins bb_op:$dst, event_op:$tag, EXNREF:$exn),
+                   (outs), (ins bb_op:$dst, event_op:$tag), [],
+                   "br_on_exn \t$dst, $tag, $exn", "br_on_exn \t$dst, $tag",
+                   0x0a>;
+// This is a pseudo instruction that simulates popping a value from stack, which
+// has been pushed by br_on_exn
+let isCodeGenOnly = 1, hasSideEffects = 1 in
+defm EXTRACT_EXCEPTION_I32 : NRI<(outs I32:$dst), (ins),
+                                 [(set I32:$dst, (int_wasm_extract_exception))],
+                                 "extract_exception\t$dst">;
 
 // Pseudo instructions: cleanupret / catchret
 let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
-    isCodeGenOnly = 1, isEHScopeReturn = 1 in {
-  defm CLEANUPRET : NRI<(outs), (ins), [(cleanupret)], "", 0>;
+    isPseudo = 1, isEHScopeReturn = 1 in {
+  defm CLEANUPRET : NRI<(outs), (ins), [(cleanupret)], "cleanupret", 0>;
   defm CATCHRET : NRI<(outs), (ins bb_op:$dst, bb_op:$from),
-                   [(catchret bb:$dst, bb:$from)], "", 0>;
-}
-}
+                      [(catchret bb:$dst, bb:$from)], "catchret", 0>;
+} // isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
+  // isPseudo = 1, isEHScopeReturn = 1
+} // Predicates = [HasExceptionHandling]
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/lib/Target/WebAssembly/WebAssemblyInstrConv.td
index e128656a142c..661fee2715ba 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrConv.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrConv.td
@@ -1,9 +1,8 @@
 //===-- WebAssemblyInstrConv.td-WebAssembly Conversion support -*- tablegen -*-=
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td b/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td
deleted file mode 100644
index a251d60b89ee..000000000000
--- a/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td
+++ /dev/null
@@ -1,27 +0,0 @@
-// WebAssemblyInstrExceptRef.td-WebAssembly except_ref codegen --*- tablegen -*-
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// WebAssembly except_ref operand code-gen constructs.
-///
-//===----------------------------------------------------------------------===//
-
-defm SELECT_EXCEPT_REF : I<(outs EXCEPT_REF:$dst),
-                           (ins EXCEPT_REF:$lhs, EXCEPT_REF:$rhs, I32:$cond),
-                           (outs), (ins),
-                           [(set EXCEPT_REF:$dst,
-                            (select I32:$cond, EXCEPT_REF:$lhs,
-                             EXCEPT_REF:$rhs))],
-                           "except_ref.select\t$dst, $lhs, $rhs, $cond",
-                           "except_ref.select", 0x1b>;
-
-def : Pat<(select (i32 (setne I32:$cond, 0)), EXCEPT_REF:$lhs, EXCEPT_REF:$rhs),
-          (SELECT_EXCEPT_REF EXCEPT_REF:$lhs, EXCEPT_REF:$rhs, I32:$cond)>;
-def : Pat<(select (i32 (seteq I32:$cond, 0)), EXCEPT_REF:$lhs, EXCEPT_REF:$rhs),
-          (SELECT_EXCEPT_REF EXCEPT_REF:$rhs, EXCEPT_REF:$lhs, I32:$cond)>;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
index c5290f00b431..5c9b34f44734 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
@@ -1,9 +1,8 @@
 // WebAssemblyInstrFloat.td-WebAssembly Float codegen support ---*- tablegen -*-
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
index 15a9714a55a1..aff4d20d8d82 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
@@ -1,9 +1,8 @@
 //=- WebAssemblyInstrFormats.td - WebAssembly Instr. Formats -*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -23,6 +22,9 @@ class WebAssemblyInst<bits<32> inst, string asmstr, string stack> : StackRel,
   let Namespace   = "WebAssembly";
   let Pattern     = [];
   let AsmString   = asmstr;
+  // When there are multiple instructions that map to the same encoding (in
+  // e.g. the disassembler use case) prefer the one where IsCanonical == 1.
+  bit IsCanonical = 0;
 }
 
 // Normal instructions. Default instantiation of a WebAssemblyInst.
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index 5efff32d6167..a86c9af28f0d 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyInstrInfo.cpp - WebAssembly Instruction Information ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -28,6 +27,10 @@ using namespace llvm;
 #define GET_INSTRINFO_CTOR_DTOR
 #include "WebAssemblyGenInstrInfo.inc"
 
+// defines WebAssembly::getNamedOperandIdx
+#define GET_INSTRINFO_NAMED_OPS
+#include "WebAssemblyGenInstrInfo.inc"
+
 WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI)
     : WebAssemblyGenInstrInfo(WebAssembly::ADJCALLSTACKDOWN,
                               WebAssembly::ADJCALLSTACKUP,
@@ -72,6 +75,8 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     CopyOpcode = WebAssembly::COPY_F64;
   else if (RC == &WebAssembly::V128RegClass)
     CopyOpcode = WebAssembly::COPY_V128;
+  else if (RC == &WebAssembly::EXNREFRegClass)
+    CopyOpcode = WebAssembly::COPY_EXNREF;
   else
     llvm_unreachable("Unexpected register class");
 
@@ -98,6 +103,13 @@ bool WebAssemblyInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
                                          MachineBasicBlock *&FBB,
                                          SmallVectorImpl<MachineOperand> &Cond,
                                          bool /*AllowModify*/) const {
+  const auto &MFI = *MBB.getParent()->getInfo<WebAssemblyFunctionInfo>();
+  // WebAssembly has control flow that doesn't have explicit branches or direct
+  // fallthrough (e.g. try/catch), which can't be modeled by analyzeBranch. It
+  // is created after CFGStackify.
+  if (MFI.isCFGStackified())
+    return true;
+
   bool HaveCond = false;
   for (MachineInstr &MI : MBB.terminators()) {
     switch (MI.getOpcode()) {
@@ -107,9 +119,6 @@ bool WebAssemblyInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
     case WebAssembly::BR_IF:
       if (HaveCond)
         return true;
-      // If we're running after CFGStackify, we can't optimize further.
-      if (!MI.getOperand(0).isMBB())
-        return true;
       Cond.push_back(MachineOperand::CreateImm(true));
       Cond.push_back(MI.getOperand(1));
       TBB = MI.getOperand(0).getMBB();
@@ -118,23 +127,25 @@ bool WebAssemblyInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
     case WebAssembly::BR_UNLESS:
       if (HaveCond)
         return true;
-      // If we're running after CFGStackify, we can't optimize further.
-      if (!MI.getOperand(0).isMBB())
-        return true;
       Cond.push_back(MachineOperand::CreateImm(false));
       Cond.push_back(MI.getOperand(1));
       TBB = MI.getOperand(0).getMBB();
       HaveCond = true;
       break;
     case WebAssembly::BR:
-      // If we're running after CFGStackify, we can't optimize further.
-      if (!MI.getOperand(0).isMBB())
-        return true;
       if (!HaveCond)
         TBB = MI.getOperand(0).getMBB();
       else
         FBB = MI.getOperand(0).getMBB();
       break;
+    case WebAssembly::BR_ON_EXN:
+      if (HaveCond)
+        return true;
+      Cond.push_back(MachineOperand::CreateImm(true));
+      Cond.push_back(MI.getOperand(2));
+      TBB = MI.getOperand(0).getMBB();
+      HaveCond = true;
+      break;
     }
     if (MI.isBarrier())
       break;
@@ -180,9 +191,22 @@ unsigned WebAssemblyInstrInfo::insertBranch(
 
   assert(Cond.size() == 2 && "Expected a flag and a successor block");
 
+  MachineFunction &MF = *MBB.getParent();
+  auto &MRI = MF.getRegInfo();
+  bool IsBrOnExn = Cond[1].isReg() && MRI.getRegClass(Cond[1].getReg()) ==
+                                          &WebAssembly::EXNREFRegClass;
+
   if (Cond[0].getImm()) {
-    BuildMI(&MBB, DL, get(WebAssembly::BR_IF)).addMBB(TBB).add(Cond[1]);
+    if (IsBrOnExn) {
+      const char *CPPExnSymbol = MF.createExternalSymbolName("__cpp_exception");
+      BuildMI(&MBB, DL, get(WebAssembly::BR_ON_EXN))
+          .addMBB(TBB)
+          .addExternalSymbol(CPPExnSymbol)
+          .add(Cond[1]);
+    } else
+      BuildMI(&MBB, DL, get(WebAssembly::BR_IF)).addMBB(TBB).add(Cond[1]);
   } else {
+    assert(!IsBrOnExn && "br_on_exn does not have a reversed condition");
     BuildMI(&MBB, DL, get(WebAssembly::BR_UNLESS)).addMBB(TBB).add(Cond[1]);
   }
   if (!FBB)
@@ -194,7 +218,15 @@ unsigned WebAssemblyInstrInfo::insertBranch(
 
 bool WebAssemblyInstrInfo::reverseBranchCondition(
     SmallVectorImpl<MachineOperand> &Cond) const {
-  assert(Cond.size() == 2 && "Expected a flag and a successor block");
+  assert(Cond.size() == 2 && "Expected a flag and a condition expression");
+
+  // br_on_exn's condition cannot be reversed
+  MachineFunction &MF = *Cond[1].getParent()->getParent()->getParent();
+  auto &MRI = MF.getRegInfo();
+  if (Cond[1].isReg() &&
+      MRI.getRegClass(Cond[1].getReg()) == &WebAssembly::EXNREFRegClass)
+    return true;
+
   Cond.front() = MachineOperand::CreateImm(!Cond.front().getImm());
   return false;
 }
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
index 4a3763c345b0..df1051b4f42c 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
@@ -1,9 +1,8 @@
 //=- WebAssemblyInstrInfo.h - WebAssembly Instruction Information -*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -22,8 +21,17 @@
 #define GET_INSTRINFO_HEADER
 #include "WebAssemblyGenInstrInfo.inc"
 
+#define GET_INSTRINFO_OPERAND_ENUM
+#include "WebAssemblyGenInstrInfo.inc"
+
 namespace llvm {
 
+namespace WebAssembly {
+
+int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
+
+}
+
 class WebAssemblySubtarget;
 
 class WebAssemblyInstrInfo final : public WebAssemblyGenInstrInfo {
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index e3d795f2aab1..73ddbe85d551 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -1,9 +1,8 @@
 // WebAssemblyInstrInfo.td-Describe the WebAssembly Instructions-*- tablegen -*-
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -16,41 +15,52 @@
 // WebAssembly Instruction Predicate Definitions.
 //===----------------------------------------------------------------------===//
 
+def IsPIC     : Predicate<"TM.isPositionIndependent()">;
+def IsNotPIC  : Predicate<"!TM.isPositionIndependent()">;
+
 def HasAddr32 : Predicate<"!Subtarget->hasAddr64()">;
+
 def HasAddr64 : Predicate<"Subtarget->hasAddr64()">;
-def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">,
-                           AssemblerPredicate<"FeatureSIMD128", "simd128">;
+
+def HasSIMD128 :
+    Predicate<"Subtarget->hasSIMD128()">,
+    AssemblerPredicate<"FeatureSIMD128", "simd128">;
+
 def HasUnimplementedSIMD128 :
     Predicate<"Subtarget->hasUnimplementedSIMD128()">,
     AssemblerPredicate<"FeatureUnimplementedSIMD128", "unimplemented-simd128">;
-def HasAtomics : Predicate<"Subtarget->hasAtomics()">,
-                           AssemblerPredicate<"FeatureAtomics", "atomics">;
+
+def HasAtomics :
+    Predicate<"Subtarget->hasAtomics()">,
+    AssemblerPredicate<"FeatureAtomics", "atomics">;
+
+def HasMultivalue :
+    Predicate<"Subtarget->hasMultivalue()">,
+    AssemblerPredicate<"FeatureMultivalue", "multivalue">;
+
 def HasNontrappingFPToInt :
     Predicate<"Subtarget->hasNontrappingFPToInt()">,
-              AssemblerPredicate<"FeatureNontrappingFPToInt",
-                                 "nontrapping-fptoint">;
+    AssemblerPredicate<"FeatureNontrappingFPToInt", "nontrapping-fptoint">;
+
 def NotHasNontrappingFPToInt :
     Predicate<"!Subtarget->hasNontrappingFPToInt()">,
-              AssemblerPredicate<"!FeatureNontrappingFPToInt",
-                                 "nontrapping-fptoint">;
+    AssemblerPredicate<"!FeatureNontrappingFPToInt", "nontrapping-fptoint">;
+
 def HasSignExt :
     Predicate<"Subtarget->hasSignExt()">,
-              AssemblerPredicate<"FeatureSignExt",
-                                 "sign-ext">;
-def NotHasSignExt :
-    Predicate<"!Subtarget->hasSignExt()">,
-              AssemblerPredicate<"!FeatureSignExt",
-                                 "sign-ext">;
+    AssemblerPredicate<"FeatureSignExt", "sign-ext">;
+
+def HasTailCall :
+    Predicate<"Subtarget->hasTailCall()">,
+    AssemblerPredicate<"FeatureTailCall", "tail-call">;
 
 def HasExceptionHandling :
     Predicate<"Subtarget->hasExceptionHandling()">,
-              AssemblerPredicate<"FeatureExceptionHandling",
-                                 "exception-handling">;
+    AssemblerPredicate<"FeatureExceptionHandling", "exception-handling">;
 
-def NotHasExceptionHandling :
-    Predicate<"!Subtarget->hasExceptionHandling()">,
-              AssemblerPredicate<"!FeatureExceptionHandling",
-                                 "exception-handling">;
+def HasBulkMemory :
+    Predicate<"Subtarget->hasBulkMemory()">,
+    AssemblerPredicate<"FeatureBulkMemory", "bulk-memory">;
 
 //===----------------------------------------------------------------------===//
 // WebAssembly-specific DAG Node Types.
@@ -60,14 +70,16 @@ def SDT_WebAssemblyCallSeqStart : SDCallSeqStart<[SDTCisVT<0, iPTR>,
                                                   SDTCisVT<1, iPTR>]>;
 def SDT_WebAssemblyCallSeqEnd :
     SDCallSeqEnd<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
-def SDT_WebAssemblyCall0    : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
-def SDT_WebAssemblyCall1    : SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>;
-def SDT_WebAssemblyBrTable  : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
-def SDT_WebAssemblyArgument : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>;
-def SDT_WebAssemblyReturn   : SDTypeProfile<0, -1, []>;
-def SDT_WebAssemblyWrapper  : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
-                                                   SDTCisPtrTy<0>]>;
-def SDT_WebAssemblyThrow    : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
+def SDT_WebAssemblyCall0      : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_WebAssemblyCall1      : SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>;
+def SDT_WebAssemblyBrTable    : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_WebAssemblyArgument   : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>;
+def SDT_WebAssemblyReturn     : SDTypeProfile<0, -1, []>;
+def SDT_WebAssemblyWrapper    : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+                                                     SDTCisPtrTy<0>]>;
+def SDT_WebAssemblyWrapperPIC : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+                                                     SDTCisPtrTy<0>]>;
+def SDT_WebAssemblyThrow      : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
 
 //===----------------------------------------------------------------------===//
 // WebAssembly-specific DAG Nodes.
@@ -85,6 +97,9 @@ def WebAssemblycall0 : SDNode<"WebAssemblyISD::CALL0",
 def WebAssemblycall1 : SDNode<"WebAssemblyISD::CALL1",
                               SDT_WebAssemblyCall1,
                               [SDNPHasChain, SDNPVariadic]>;
+def WebAssemblyretcall : SDNode<"WebAssemblyISD::RET_CALL",
+                                SDT_WebAssemblyCall0,
+                                [SDNPHasChain, SDNPVariadic]>;
 def WebAssemblybr_table : SDNode<"WebAssemblyISD::BR_TABLE",
                                  SDT_WebAssemblyBrTable,
                                  [SDNPHasChain, SDNPVariadic]>;
@@ -94,13 +109,26 @@ def WebAssemblyreturn   : SDNode<"WebAssemblyISD::RETURN",
                                  SDT_WebAssemblyReturn, [SDNPHasChain]>;
 def WebAssemblywrapper  : SDNode<"WebAssemblyISD::Wrapper",
                                  SDT_WebAssemblyWrapper>;
+def WebAssemblywrapperPIC  : SDNode<"WebAssemblyISD::WrapperPIC",
+                                     SDT_WebAssemblyWrapperPIC>;
 def WebAssemblythrow : SDNode<"WebAssemblyISD::THROW", SDT_WebAssemblyThrow,
-                              [SDNPHasChain]>;
+                              [SDNPHasChain, SDNPVariadic]>;
 
 //===----------------------------------------------------------------------===//
 // WebAssembly-specific Operands.
 //===----------------------------------------------------------------------===//
 
+// Default Operand has AsmOperandClass "Imm" which is for integers (and
+// symbols), so specialize one for floats:
+def FPImmAsmOperand : AsmOperandClass {
+  let Name = "FPImm";
+  let PredicateMethod = "isFPImm";
+}
+
+class FPOperand<ValueType ty> : Operand<ty> {
+  AsmOperandClass ParserMatchClass = FPImmAsmOperand;
+}
+
 let OperandNamespace = "WebAssembly" in {
 
 let OperandType = "OPERAND_BASIC_BLOCK" in
@@ -119,10 +147,10 @@ let OperandType = "OPERAND_I64IMM" in
 def i64imm_op : Operand<i64>;
 
 let OperandType = "OPERAND_F32IMM" in
-def f32imm_op : Operand<f32>;
+def f32imm_op : FPOperand<f32>;
 
 let OperandType = "OPERAND_F64IMM" in
-def f64imm_op : Operand<f64>;
+def f64imm_op : FPOperand<f64>;
 
 let OperandType = "OPERAND_VEC_I8IMM" in
 def vec_i8imm_op : Operand<i32>;
@@ -152,11 +180,10 @@ def event_op : Operand<i32>;
 
 } // OperandType = "OPERAND_P2ALIGN"
 
-let OperandType = "OPERAND_SIGNATURE" in {
+let OperandType = "OPERAND_SIGNATURE" in
 def Signature : Operand<i32> {
   let PrintMethod = "printWebAssemblySignatureOperand";
 }
-} // OperandType = "OPERAND_SIGNATURE"
 
 let OperandType = "OPERAND_TYPEINDEX" in
 def TypeIndex : Operand<i32>;
@@ -187,8 +214,8 @@ include "WebAssemblyInstrFormats.td"
 //===----------------------------------------------------------------------===//
 
 multiclass ARGUMENT<WebAssemblyRegClass reg, ValueType vt> {
-  let hasSideEffects = 1, isCodeGenOnly = 1,
-      Defs = []<Register>, Uses = [ARGUMENTS] in
+  let hasSideEffects = 1, isCodeGenOnly = 1, Defs = []<Register>,
+      Uses = [ARGUMENTS] in
   defm ARGUMENT_#vt :
     I<(outs reg:$res), (ins i32imm:$argno), (outs), (ins i32imm:$argno),
       [(set (vt reg:$res), (WebAssemblyargument timm:$argno))]>;
@@ -197,12 +224,12 @@ defm "": ARGUMENT<I32, i32>;
 defm "": ARGUMENT<I64, i64>;
 defm "": ARGUMENT<F32, f32>;
 defm "": ARGUMENT<F64, f64>;
-defm "": ARGUMENT<EXCEPT_REF, ExceptRef>;
+defm "": ARGUMENT<EXNREF, exnref>;
 
 // local.get and local.set are not generated by instruction selection; they
 // are implied by virtual register uses and defs.
 multiclass LOCAL<WebAssemblyRegClass vt> {
-let hasSideEffects = 0 in {
+  let hasSideEffects = 0 in {
   // COPY is not an actual instruction in wasm, but since we allow local.get and
   // local.set to be implicit during most of codegen, we can have a COPY which
   // is actually a no-op because all the work is done in the implied local.get
@@ -267,7 +294,7 @@ defm "" : LOCAL<I64>;
 defm "" : LOCAL<F32>;
 defm "" : LOCAL<F64>;
 defm "" : LOCAL<V128>, Requires<[HasSIMD128]>;
-defm "" : LOCAL<EXCEPT_REF>, Requires<[HasExceptionHandling]>;
+defm "" : LOCAL<EXNREF>, Requires<[HasExceptionHandling]>;
 
 let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
 defm CONST_I32 : I<(outs I32:$res), (ins i32imm_op:$imm),
@@ -289,9 +316,20 @@ defm CONST_F64 : I<(outs F64:$res), (ins f64imm_op:$imm),
 } // isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1
 
 def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)),
-          (CONST_I32 tglobaladdr:$addr)>;
+          (CONST_I32 tglobaladdr:$addr)>, Requires<[IsNotPIC]>;
+
+def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)),
+          (GLOBAL_GET_I32 tglobaladdr:$addr)>, Requires<[IsPIC]>;
+
+def : Pat<(i32 (WebAssemblywrapperPIC tglobaladdr:$addr)),
+          (CONST_I32 tglobaladdr:$addr)>, Requires<[IsPIC]>;
+
 def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)),
-          (CONST_I32 texternalsym:$addr)>;
+          (GLOBAL_GET_I32 texternalsym:$addr)>, Requires<[IsPIC]>;
+
+def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)),
+          (CONST_I32 texternalsym:$addr)>, Requires<[IsNotPIC]>;
+
 def : Pat<(i32 (WebAssemblywrapper mcsym:$sym)), (CONST_I32 mcsym:$sym)>;
 def : Pat<(i64 (WebAssemblywrapper mcsym:$sym)), (CONST_I64 mcsym:$sym)>;
 
@@ -307,4 +345,5 @@ include "WebAssemblyInstrConv.td"
 include "WebAssemblyInstrFloat.td"
 include "WebAssemblyInstrAtomics.td"
 include "WebAssemblyInstrSIMD.td"
-include "WebAssemblyInstrExceptRef.td"
+include "WebAssemblyInstrRef.td"
+include "WebAssemblyInstrBulkMemory.td"
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
index d5b63d643697..18250cf8ef85 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
@@ -1,9 +1,8 @@
 // WebAssemblyInstrInteger.td-WebAssembly Integer codegen -------*- tablegen -*-
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -122,10 +121,3 @@ def : Pat<(select (i32 (seteq I32:$cond, 0)), I32:$lhs, I32:$rhs),
           (SELECT_I32 I32:$rhs, I32:$lhs, I32:$cond)>;
 def : Pat<(select (i32 (seteq I32:$cond, 0)), I64:$lhs, I64:$rhs),
           (SELECT_I64 I64:$rhs, I64:$lhs, I32:$cond)>;
-
-// The legalizer inserts an unnecessary `and 1` to make input conform
-// to getBooleanContents, which we can lower away.
-def : Pat<(select (i32 (and I32:$cond, 1)), I32:$lhs, I32:$rhs),
-          (SELECT_I32 I32:$lhs, I32:$rhs, I32:$cond)>;
-def : Pat<(select (i32 (and I32:$cond, 1)), I64:$lhs, I64:$rhs),
-          (SELECT_I64 I64:$lhs, I64:$rhs, I32:$cond)>;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index 518f81c61dc4..6916b165f970 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -1,9 +1,8 @@
 // WebAssemblyInstrMemory.td-WebAssembly Memory codegen support -*- tablegen -*-
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -53,7 +52,7 @@ def regPlusGA : PatFrag<(ops node:$addr, node:$off),
 
 // Defines atomic and non-atomic loads, regular and extending.
 multiclass WebAssemblyLoad<WebAssemblyRegClass rc, string Name, int Opcode> {
-  let mayLoad = 1 in
+  let mayLoad = 1, UseNamedOperandTable = 1 in
   defm "": I<(outs rc:$dst),
              (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
              (outs), (ins P2Align:$p2align, offset32_op:$off),
@@ -96,22 +95,13 @@ def : LoadPatImmOff<f64, load, or_is_add, LOAD_F64>;
 
 class LoadPatGlobalAddr<ValueType ty, PatFrag kind, NI inst> :
   Pat<(ty (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))),
-      (inst 0, tglobaladdr:$off, I32:$addr)>;
+      (inst 0, tglobaladdr:$off, I32:$addr)>, Requires<[IsNotPIC]>;
 
 def : LoadPatGlobalAddr<i32, load, LOAD_I32>;
 def : LoadPatGlobalAddr<i64, load, LOAD_I64>;
 def : LoadPatGlobalAddr<f32, load, LOAD_F32>;
 def : LoadPatGlobalAddr<f64, load, LOAD_F64>;
 
-class LoadPatExternalSym<ValueType ty, PatFrag kind, NI inst> :
-  Pat<(ty (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-      (inst 0, texternalsym:$off, I32:$addr)>;
-def : LoadPatExternalSym<i32, load, LOAD_I32>;
-def : LoadPatExternalSym<i64, load, LOAD_I64>;
-def : LoadPatExternalSym<f32, load, LOAD_F32>;
-def : LoadPatExternalSym<f64, load, LOAD_F64>;
-
-
 // Select loads with just a constant offset.
 class LoadPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
   Pat<(ty (kind imm:$off)), (inst 0, imm:$off, (CONST_I32 0))>;
@@ -123,21 +113,13 @@ def : LoadPatOffsetOnly<f64, load, LOAD_F64>;
 
 class LoadPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> :
   Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off))),
-      (inst 0, tglobaladdr:$off, (CONST_I32 0))>;
+      (inst 0, tglobaladdr:$off, (CONST_I32 0))>, Requires<[IsNotPIC]>;
 
 def : LoadPatGlobalAddrOffOnly<i32, load, LOAD_I32>;
 def : LoadPatGlobalAddrOffOnly<i64, load, LOAD_I64>;
 def : LoadPatGlobalAddrOffOnly<f32, load, LOAD_F32>;
 def : LoadPatGlobalAddrOffOnly<f64, load, LOAD_F64>;
 
-class LoadPatExternSymOffOnly<ValueType ty, PatFrag kind, NI inst> :
-  Pat<(ty (kind (WebAssemblywrapper texternalsym:$off))),
-      (inst 0, texternalsym:$off, (CONST_I32 0))>;
-def : LoadPatExternSymOffOnly<i32, load, LOAD_I32>;
-def : LoadPatExternSymOffOnly<i64, load, LOAD_I64>;
-def : LoadPatExternSymOffOnly<f32, load, LOAD_F32>;
-def : LoadPatExternSymOffOnly<f64, load, LOAD_F64>;
-
 // Extending load.
 defm LOAD8_S_I32 : WebAssemblyLoad<I32, "i32.load8_s", 0x2c>;
 defm LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.load8_u", 0x2d>;
@@ -197,18 +179,6 @@ def : LoadPatGlobalAddr<i64, zextloadi16, LOAD16_U_I64>;
 def : LoadPatGlobalAddr<i64, sextloadi32, LOAD32_S_I64>;
 def : LoadPatGlobalAddr<i64, zextloadi32, LOAD32_U_I64>;
 
-def : LoadPatExternalSym<i32, sextloadi8, LOAD8_S_I32>;
-def : LoadPatExternalSym<i32, zextloadi8, LOAD8_U_I32>;
-def : LoadPatExternalSym<i32, sextloadi16, LOAD16_S_I32>;
-def : LoadPatExternalSym<i32, zextloadi16, LOAD16_U_I32>;
-def : LoadPatExternalSym<i64, sextloadi8, LOAD8_S_I64>;
-def : LoadPatExternalSym<i64, zextloadi8, LOAD8_U_I64>;
-def : LoadPatExternalSym<i64, sextloadi16, LOAD16_S_I64>;
-def : LoadPatExternalSym<i64, zextloadi16, LOAD16_U_I64>;
-def : LoadPatExternalSym<i64, sextloadi32, LOAD32_S_I64>;
-def : LoadPatExternalSym<i64, zextloadi32, LOAD32_U_I64>;
-
-
 // Select extending loads with just a constant offset.
 def : LoadPatOffsetOnly<i32, sextloadi8, LOAD8_S_I32>;
 def : LoadPatOffsetOnly<i32, zextloadi8, LOAD8_U_I32>;
@@ -233,17 +203,6 @@ def : LoadPatGlobalAddrOffOnly<i64, zextloadi16, LOAD16_U_I64>;
 def : LoadPatGlobalAddrOffOnly<i64, sextloadi32, LOAD32_S_I64>;
 def : LoadPatGlobalAddrOffOnly<i64, zextloadi32, LOAD32_U_I64>;
 
-def : LoadPatExternSymOffOnly<i32, sextloadi8, LOAD8_S_I32>;
-def : LoadPatExternSymOffOnly<i32, zextloadi8, LOAD8_U_I32>;
-def : LoadPatExternSymOffOnly<i32, sextloadi16, LOAD16_S_I32>;
-def : LoadPatExternSymOffOnly<i32, zextloadi16, LOAD16_U_I32>;
-def : LoadPatExternSymOffOnly<i64, sextloadi8, LOAD8_S_I64>;
-def : LoadPatExternSymOffOnly<i64, zextloadi8, LOAD8_U_I64>;
-def : LoadPatExternSymOffOnly<i64, sextloadi16, LOAD16_S_I64>;
-def : LoadPatExternSymOffOnly<i64, zextloadi16, LOAD16_U_I64>;
-def : LoadPatExternSymOffOnly<i64, sextloadi32, LOAD32_S_I64>;
-def : LoadPatExternSymOffOnly<i64, zextloadi32, LOAD32_U_I64>;
-
 // Resolve "don't care" extending loads to zero-extending loads. This is
 // somewhat arbitrary, but zero-extending is conceptually simpler.
 
@@ -270,11 +229,6 @@ def : LoadPatGlobalAddr<i32, extloadi16, LOAD16_U_I32>;
 def : LoadPatGlobalAddr<i64, extloadi8, LOAD8_U_I64>;
 def : LoadPatGlobalAddr<i64, extloadi16, LOAD16_U_I64>;
 def : LoadPatGlobalAddr<i64, extloadi32, LOAD32_U_I64>;
-def : LoadPatExternalSym<i32, extloadi8, LOAD8_U_I32>;
-def : LoadPatExternalSym<i32, extloadi16, LOAD16_U_I32>;
-def : LoadPatExternalSym<i64, extloadi8, LOAD8_U_I64>;
-def : LoadPatExternalSym<i64, extloadi16, LOAD16_U_I64>;
-def : LoadPatExternalSym<i64, extloadi32, LOAD32_U_I64>;
 
 // Select "don't care" extending loads with just a constant offset.
 def : LoadPatOffsetOnly<i32, extloadi8, LOAD8_U_I32>;
@@ -287,15 +241,10 @@ def : LoadPatGlobalAddrOffOnly<i32, extloadi16, LOAD16_U_I32>;
 def : LoadPatGlobalAddrOffOnly<i64, extloadi8, LOAD8_U_I64>;
 def : LoadPatGlobalAddrOffOnly<i64, extloadi16, LOAD16_U_I64>;
 def : LoadPatGlobalAddrOffOnly<i64, extloadi32, LOAD32_U_I64>;
-def : LoadPatExternSymOffOnly<i32, extloadi8, LOAD8_U_I32>;
-def : LoadPatExternSymOffOnly<i32, extloadi16, LOAD16_U_I32>;
-def : LoadPatExternSymOffOnly<i64, extloadi8, LOAD8_U_I64>;
-def : LoadPatExternSymOffOnly<i64, extloadi16, LOAD16_U_I64>;
-def : LoadPatExternSymOffOnly<i64, extloadi32, LOAD32_U_I64>;
 
 // Defines atomic and non-atomic stores, regular and truncating
 multiclass WebAssemblyStore<WebAssemblyRegClass rc, string Name, int Opcode> {
-  let mayStore = 1 in
+  let mayStore = 1, UseNamedOperandTable = 1 in
   defm "" : I<(outs),
               (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val),
               (outs),
@@ -336,20 +285,12 @@ def : StorePatImmOff<f64, store, or_is_add, STORE_F64>;
 class StorePatGlobalAddr<ValueType ty, PatFrag kind, NI inst> :
   Pat<(kind ty:$val,
             (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))),
-      (inst 0, tglobaladdr:$off, I32:$addr, ty:$val)>;
+      (inst 0, tglobaladdr:$off, I32:$addr, ty:$val)>, Requires<[IsNotPIC]>;
 def : StorePatGlobalAddr<i32, store, STORE_I32>;
 def : StorePatGlobalAddr<i64, store, STORE_I64>;
 def : StorePatGlobalAddr<f32, store, STORE_F32>;
 def : StorePatGlobalAddr<f64, store, STORE_F64>;
 
-class StorePatExternalSym<ValueType ty, PatFrag kind, NI inst> :
-  Pat<(kind ty:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))),
-      (inst 0, texternalsym:$off, I32:$addr, ty:$val)>;
-def : StorePatExternalSym<i32, store, STORE_I32>;
-def : StorePatExternalSym<i64, store, STORE_I64>;
-def : StorePatExternalSym<f32, store, STORE_F32>;
-def : StorePatExternalSym<f64, store, STORE_F64>;
-
 // Select stores with just a constant offset.
 class StorePatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
   Pat<(kind ty:$val, imm:$off), (inst 0, imm:$off, (CONST_I32 0), ty:$val)>;
@@ -360,20 +301,12 @@ def : StorePatOffsetOnly<f64, store, STORE_F64>;
 
 class StorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> :
   Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)),
-      (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>;
+      (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>, Requires<[IsNotPIC]>;
 def : StorePatGlobalAddrOffOnly<i32, store, STORE_I32>;
 def : StorePatGlobalAddrOffOnly<i64, store, STORE_I64>;
 def : StorePatGlobalAddrOffOnly<f32, store, STORE_F32>;
 def : StorePatGlobalAddrOffOnly<f64, store, STORE_F64>;
 
-class StorePatExternSymOffOnly<ValueType ty, PatFrag kind, NI inst> :
-  Pat<(kind ty:$val, (WebAssemblywrapper texternalsym:$off)),
-      (inst 0, texternalsym:$off, (CONST_I32 0), ty:$val)>;
-def : StorePatExternSymOffOnly<i32, store, STORE_I32>;
-def : StorePatExternSymOffOnly<i64, store, STORE_I64>;
-def : StorePatExternSymOffOnly<f32, store, STORE_F32>;
-def : StorePatExternSymOffOnly<f64, store, STORE_F64>;
-
 // Truncating store.
 defm STORE8_I32 : WebAssemblyStore<I32, "i32.store8", 0x3a>;
 defm STORE16_I32 : WebAssemblyStore<I32, "i32.store16", 0x3b>;
@@ -405,11 +338,6 @@ def : StorePatGlobalAddr<i32, truncstorei16, STORE16_I32>;
 def : StorePatGlobalAddr<i64, truncstorei8, STORE8_I64>;
 def : StorePatGlobalAddr<i64, truncstorei16, STORE16_I64>;
 def : StorePatGlobalAddr<i64, truncstorei32, STORE32_I64>;
-def : StorePatExternalSym<i32, truncstorei8, STORE8_I32>;
-def : StorePatExternalSym<i32, truncstorei16, STORE16_I32>;
-def : StorePatExternalSym<i64, truncstorei8, STORE8_I64>;
-def : StorePatExternalSym<i64, truncstorei16, STORE16_I64>;
-def : StorePatExternalSym<i64, truncstorei32, STORE32_I64>;
 
 // Select truncating stores with just a constant offset.
 def : StorePatOffsetOnly<i32, truncstorei8, STORE8_I32>;
@@ -422,11 +350,6 @@ def : StorePatGlobalAddrOffOnly<i32, truncstorei16, STORE16_I32>;
 def : StorePatGlobalAddrOffOnly<i64, truncstorei8, STORE8_I64>;
 def : StorePatGlobalAddrOffOnly<i64, truncstorei16, STORE16_I64>;
 def : StorePatGlobalAddrOffOnly<i64, truncstorei32, STORE32_I64>;
-def : StorePatExternSymOffOnly<i32, truncstorei8, STORE8_I32>;
-def : StorePatExternSymOffOnly<i32, truncstorei16, STORE16_I32>;
-def : StorePatExternSymOffOnly<i64, truncstorei8, STORE8_I64>;
-def : StorePatExternSymOffOnly<i64, truncstorei16, STORE16_I64>;
-def : StorePatExternSymOffOnly<i64, truncstorei32, STORE32_I64>;
 
 // Current memory size.
 defm MEMORY_SIZE_I32 : I<(outs I32:$dst), (ins i32imm:$flags),
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrRef.td b/lib/Target/WebAssembly/WebAssemblyInstrRef.td
new file mode 100644
index 000000000000..afe89de60b36
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrRef.td
@@ -0,0 +1,25 @@
+// WebAssemblyInstrRef.td - WebAssembly reference type codegen --*- tablegen -*-
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// WebAssembly refence type operand codegen constructs.
+///
+//===----------------------------------------------------------------------===//
+
+defm SELECT_EXNREF : I<(outs EXNREF:$dst),
+                       (ins EXNREF:$lhs, EXNREF:$rhs, I32:$cond),
+                       (outs), (ins),
+                       [(set EXNREF:$dst,
+                         (select I32:$cond, EXNREF:$lhs, EXNREF:$rhs))],
+                       "exnref.select\t$dst, $lhs, $rhs, $cond",
+                       "exnref.select", 0x1b>;
+
+def : Pat<(select (i32 (setne I32:$cond, 0)), EXNREF:$lhs, EXNREF:$rhs),
+          (SELECT_EXNREF EXNREF:$lhs, EXNREF:$rhs, I32:$cond)>;
+def : Pat<(select (i32 (seteq I32:$cond, 0)), EXNREF:$lhs, EXNREF:$rhs),
+          (SELECT_EXNREF EXNREF:$rhs, EXNREF:$lhs, I32:$cond)>;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 587515c5b299..dd8930f079b0 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1,9 +1,8 @@
 // WebAssemblyInstrSIMD.td - WebAssembly SIMD codegen support -*- tablegen -*-//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -31,7 +30,7 @@ defm "" : ARGUMENT<V128, v2f64>;
 // Constrained immediate argument types
 foreach SIZE = [8, 16] in
 def ImmI#SIZE : ImmLeaf<i32,
-  "return ((uint64_t)Imm & ((1UL << "#SIZE#") - 1)) == (uint64_t)Imm;"
+  "return -(1 << ("#SIZE#" - 1)) <= Imm && Imm < (1 << ("#SIZE#" - 1));"
 >;
 foreach SIZE = [2, 4, 8, 16, 32] in
 def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">;
@@ -42,12 +41,12 @@ def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">;
 
 // Load: v128.load
 multiclass SIMDLoad<ValueType vec_t> {
-  let mayLoad = 1 in
+  let mayLoad = 1, UseNamedOperandTable = 1 in
   defm LOAD_#vec_t :
-    SIMD_I<(outs V128:$dst), (ins P2Align:$align, offset32_op:$off, I32:$addr),
-           (outs), (ins P2Align:$align, offset32_op:$off), [],
-           "v128.load\t$dst, ${off}(${addr})$align",
-           "v128.load\t$off$align", 0>;
+    SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+           (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+           "v128.load\t$dst, ${off}(${addr})$p2align",
+           "v128.load\t$off$p2align", 0>;
 }
 
 foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
@@ -58,20 +57,18 @@ def : LoadPatNoOffset<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
 def : LoadPatImmOff<vec_t, load, regPlusImm, !cast<NI>("LOAD_"#vec_t)>;
 def : LoadPatImmOff<vec_t, load, or_is_add, !cast<NI>("LOAD_"#vec_t)>;
 def : LoadPatGlobalAddr<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
-def : LoadPatExternalSym<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
 def : LoadPatOffsetOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
 def : LoadPatGlobalAddrOffOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
-def : LoadPatExternSymOffOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
 }
 
 // Store: v128.store
 multiclass SIMDStore<ValueType vec_t> {
-  let mayStore = 1 in
+  let mayStore = 1, UseNamedOperandTable = 1 in
   defm STORE_#vec_t :
-    SIMD_I<(outs), (ins P2Align:$align, offset32_op:$off, I32:$addr, V128:$vec),
-           (outs), (ins P2Align:$align, offset32_op:$off), [],
-           "v128.store\t${off}(${addr})$align, $vec",
-           "v128.store\t$off$align", 1>;
+    SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec),
+           (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+           "v128.store\t${off}(${addr})$p2align, $vec",
+           "v128.store\t$off$p2align", 1>;
 }
 
 foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
@@ -82,10 +79,8 @@ def : StorePatNoOffset<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
 def : StorePatImmOff<vec_t, store, regPlusImm, !cast<NI>("STORE_"#vec_t)>;
 def : StorePatImmOff<vec_t, store, or_is_add, !cast<NI>("STORE_"#vec_t)>;
 def : StorePatGlobalAddr<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
-def : StorePatExternalSym<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
 def : StorePatOffsetOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
 def : StorePatGlobalAddrOffOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
-def : StorePatExternSymOffOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -95,7 +90,7 @@ def : StorePatExternSymOffOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
 // Constant: v128.const
 multiclass ConstVec<ValueType vec_t, dag ops, dag pat, string args> {
   let isMoveImm = 1, isReMaterializable = 1,
-    Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
+      Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
   defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops,
                                   [(set V128:$dst, (vec_t pat))],
                                   "v128.const\t$dst, "#args,
@@ -126,6 +121,7 @@ defm "" : ConstVec<v8i16,
                      ImmI16:$i0, ImmI16:$i1, ImmI16:$i2, ImmI16:$i3,
                      ImmI16:$i4, ImmI16:$i5, ImmI16:$i6, ImmI16:$i7),
                    "$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7">;
+let IsCanonical = 1 in
 defm "" : ConstVec<v4i32,
                    (ins vec_i32imm_op:$i0, vec_i32imm_op:$i1,
                         vec_i32imm_op:$i2, vec_i32imm_op:$i3),
@@ -231,6 +227,19 @@ defm "" : Splat<v2i64, "i64x2", I64, splat2, 15>;
 defm "" : Splat<v4f32, "f32x4", F32, splat4, 18>;
 defm "" : Splat<v2f64, "f64x2", F64, splat2, 21>;
 
+// scalar_to_vector leaves high lanes undefined, so can be a splat
+class ScalarSplatPat<ValueType vec_t, ValueType lane_t,
+                     WebAssemblyRegClass reg_t> :
+  Pat<(vec_t (scalar_to_vector (lane_t reg_t:$x))),
+      (!cast<Instruction>("SPLAT_"#vec_t) reg_t:$x)>;
+
+def : ScalarSplatPat<v16i8, i32, I32>;
+def : ScalarSplatPat<v8i16, i32, I32>;
+def : ScalarSplatPat<v4i32, i32, I32>;
+def : ScalarSplatPat<v2i64, i64, I64>;
+def : ScalarSplatPat<v4f32, f32, F32>;
+def : ScalarSplatPat<v2f64, f64, F64>;
+
 //===----------------------------------------------------------------------===//
 // Accessing lanes
 //===----------------------------------------------------------------------===//
@@ -347,118 +356,6 @@ def : Pat<(vector_insert (v4f32 V128:$vec), F32:$x, undef),
 def : Pat<(vector_insert (v2f64 V128:$vec), F64:$x, undef),
           (REPLACE_LANE_v2f64 V128:$vec, 0, F64:$x)>;
 
-// Arbitrary other BUILD_VECTOR patterns
-def : Pat<(v16i8 (build_vector
-            (i32 I32:$x0), (i32 I32:$x1), (i32 I32:$x2), (i32 I32:$x3),
-            (i32 I32:$x4), (i32 I32:$x5), (i32 I32:$x6), (i32 I32:$x7),
-            (i32 I32:$x8), (i32 I32:$x9), (i32 I32:$x10), (i32 I32:$x11),
-            (i32 I32:$x12), (i32 I32:$x13), (i32 I32:$x14), (i32 I32:$x15)
-          )),
-          (v16i8 (REPLACE_LANE_v16i8
-            (v16i8 (REPLACE_LANE_v16i8
-              (v16i8 (REPLACE_LANE_v16i8
-                (v16i8 (REPLACE_LANE_v16i8
-                  (v16i8 (REPLACE_LANE_v16i8
-                    (v16i8 (REPLACE_LANE_v16i8
-                      (v16i8 (REPLACE_LANE_v16i8
-                        (v16i8 (REPLACE_LANE_v16i8
-                          (v16i8 (REPLACE_LANE_v16i8
-                            (v16i8 (REPLACE_LANE_v16i8
-                              (v16i8 (REPLACE_LANE_v16i8
-                                (v16i8 (REPLACE_LANE_v16i8
-                                  (v16i8 (REPLACE_LANE_v16i8
-                                    (v16i8 (REPLACE_LANE_v16i8
-                                      (v16i8 (REPLACE_LANE_v16i8
-                                        (v16i8 (SPLAT_v16i8 (i32 I32:$x0))),
-                                        1, I32:$x1
-                                      )),
-                                      2, I32:$x2
-                                    )),
-                                    3, I32:$x3
-                                  )),
-                                  4, I32:$x4
-                                )),
-                                5, I32:$x5
-                              )),
-                              6, I32:$x6
-                            )),
-                            7, I32:$x7
-                          )),
-                          8, I32:$x8
-                        )),
-                        9, I32:$x9
-                      )),
-                      10, I32:$x10
-                    )),
-                    11, I32:$x11
-                  )),
-                  12, I32:$x12
-                )),
-                13, I32:$x13
-              )),
-              14, I32:$x14
-            )),
-            15, I32:$x15
-          ))>;
-def : Pat<(v8i16 (build_vector
-            (i32 I32:$x0), (i32 I32:$x1), (i32 I32:$x2), (i32 I32:$x3),
-            (i32 I32:$x4), (i32 I32:$x5), (i32 I32:$x6), (i32 I32:$x7)
-          )),
-          (v8i16 (REPLACE_LANE_v8i16
-            (v8i16 (REPLACE_LANE_v8i16
-              (v8i16 (REPLACE_LANE_v8i16
-                (v8i16 (REPLACE_LANE_v8i16
-                  (v8i16 (REPLACE_LANE_v8i16
-                    (v8i16 (REPLACE_LANE_v8i16
-                      (v8i16 (REPLACE_LANE_v8i16
-                        (v8i16 (SPLAT_v8i16 (i32 I32:$x0))),
-                        1, I32:$x1
-                      )),
-                      2, I32:$x2
-                    )),
-                    3, I32:$x3
-                  )),
-                  4, I32:$x4
-                )),
-                5, I32:$x5
-              )),
-              6, I32:$x6
-            )),
-            7, I32:$x7
-          ))>;
-def : Pat<(v4i32 (build_vector
-            (i32 I32:$x0), (i32 I32:$x1), (i32 I32:$x2), (i32 I32:$x3)
-          )),
-          (v4i32 (REPLACE_LANE_v4i32
-            (v4i32 (REPLACE_LANE_v4i32
-              (v4i32 (REPLACE_LANE_v4i32
-                (v4i32 (SPLAT_v4i32 (i32 I32:$x0))),
-                1, I32:$x1
-              )),
-              2, I32:$x2
-            )),
-            3, I32:$x3
-          ))>;
-def : Pat<(v2i64 (build_vector (i64 I64:$x0), (i64 I64:$x1))),
-          (v2i64 (REPLACE_LANE_v2i64
-            (v2i64 (SPLAT_v2i64 (i64 I64:$x0))), 1, I64:$x1))>;
-def : Pat<(v4f32 (build_vector
-            (f32 F32:$x0), (f32 F32:$x1), (f32 F32:$x2), (f32 F32:$x3)
-          )),
-          (v4f32 (REPLACE_LANE_v4f32
-            (v4f32 (REPLACE_LANE_v4f32
-              (v4f32 (REPLACE_LANE_v4f32
-                (v4f32 (SPLAT_v4f32 (f32 F32:$x0))),
-                1, F32:$x1
-              )),
-              2, F32:$x2
-            )),
-            3, F32:$x3
-          ))>;
-def : Pat<(v2f64 (build_vector (f64 F64:$x0), (f64 F64:$x1))),
-          (v2f64 (REPLACE_LANE_v2f64
-            (v2f64 (SPLAT_v2f64 (f64 F64:$x0))), 1, F64:$x1))>;
-
 //===----------------------------------------------------------------------===//
 // Comparisons
 //===----------------------------------------------------------------------===//
@@ -520,16 +417,18 @@ defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 33>;
 defm GE : SIMDConditionFP<"ge", SETOGE, 69>;
 
 // Lower float comparisons that don't care about NaN to standard WebAssembly
-// float comparisons. These instructions are generated in the target-independent
-// expansion of unordered comparisons and ordered ne.
-def : Pat<(v4i32 (seteq (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
-          (v4i32 (EQ_v4f32 (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>;
-def : Pat<(v4i32 (setne (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
-          (v4i32 (NE_v4f32 (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>;
-def : Pat<(v2i64 (seteq (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
-          (v2i64 (EQ_v2f64 (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>;
-def : Pat<(v2i64 (setne (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
-          (v2i64 (NE_v2f64 (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>;
+// float comparisons. These instructions are generated with nnan and in the
+// target-independent expansion of unordered comparisons and ordered ne.
+foreach nodes = [[seteq, EQ_v4f32], [setne, NE_v4f32], [setlt, LT_v4f32],
+                 [setgt, GT_v4f32], [setle, LE_v4f32], [setge, GE_v4f32]] in
+def : Pat<(v4i32 (nodes[0] (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
+          (v4i32 (nodes[1] (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>;
+
+foreach nodes = [[seteq, EQ_v2f64], [setne, NE_v2f64], [setlt, LT_v2f64],
+                 [setgt, GT_v2f64], [setle, LE_v2f64], [setge, GE_v2f64]] in
+def : Pat<(v2i64 (nodes[0] (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
+          (v2i64 (nodes[1] (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>;
+
 
 //===----------------------------------------------------------------------===//
 // Bitwise operations
@@ -628,6 +527,28 @@ defm ANYTRUE : SIMDReduce<int_wasm_anytrue, "any_true", 82>;
 // All lanes true: all_true
 defm ALLTRUE : SIMDReduce<int_wasm_alltrue, "all_true", 83>;
 
+// Reductions already return 0 or 1, so and 1, setne 0, and seteq 1
+// can be folded out
+foreach reduction =
+  [["int_wasm_anytrue", "ANYTRUE"], ["int_wasm_alltrue", "ALLTRUE"]] in
+foreach ty = [v16i8, v8i16, v4i32, v2i64] in {
+def : Pat<(i32 (and
+            (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))),
+            (i32 1)
+          )),
+          (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>;
+def : Pat<(i32 (setne
+            (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))),
+            (i32 0)
+          )),
+          (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>;
+def : Pat<(i32 (seteq
+            (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))),
+            (i32 1)
+          )),
+          (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>;
+}
+
 //===----------------------------------------------------------------------===//
 // Bit shifts
 //===----------------------------------------------------------------------===//
@@ -658,10 +579,16 @@ defm SHL : SIMDShiftInt<shl, "shl", 84>;
 defm SHR_S : SIMDShiftInt<sra, "shr_s", 85>;
 defm SHR_U : SIMDShiftInt<srl, "shr_u", 86>;
 
-// Truncate i64 shift operands to i32s
-foreach shifts = [[shl, SHL_v2i64], [sra, SHR_S_v2i64], [srl, SHR_U_v2i64]] in
+// Truncate i64 shift operands to i32s, except if they are already i32s
+foreach shifts = [[shl, SHL_v2i64], [sra, SHR_S_v2i64], [srl, SHR_U_v2i64]] in {
+def : Pat<(v2i64 (shifts[0]
+            (v2i64 V128:$vec),
+            (v2i64 (splat2 (i64 (sext I32:$x))))
+          )),
+          (v2i64 (shifts[1] (v2i64 V128:$vec), (i32 I32:$x)))>;
 def : Pat<(v2i64 (shifts[0] (v2i64 V128:$vec), (v2i64 (splat2 I64:$x)))),
           (v2i64 (shifts[1] (v2i64 V128:$vec), (I32_WRAP_I64 I64:$x)))>;
+}
 
 // 2xi64 shifts with constant shift amounts are custom lowered to avoid wrapping
 def wasm_shift_t : SDTypeProfile<1, 2,
diff --git a/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
index ad838dfb574a..e92b34430272 100644
--- a/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
@@ -1,9 +1,8 @@
 //=== WebAssemblyLateEHPrepare.cpp - WebAssembly Exception Preparation -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -16,29 +15,26 @@
 #include "WebAssembly.h"
 #include "WebAssemblySubtarget.h"
 #include "WebAssemblyUtilities.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/WasmEHFuncInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
 using namespace llvm;
 
-#define DEBUG_TYPE "wasm-exception-prepare"
+#define DEBUG_TYPE "wasm-late-eh-prepare"
 
 namespace {
 class WebAssemblyLateEHPrepare final : public MachineFunctionPass {
   StringRef getPassName() const override {
-    return "WebAssembly Prepare Exception";
+    return "WebAssembly Late Prepare Exception";
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
-
-  bool removeUnnecessaryUnreachables(MachineFunction &MF);
+  bool addCatches(MachineFunction &MF);
   bool replaceFuncletReturns(MachineFunction &MF);
-  bool hoistCatches(MachineFunction &MF);
-  bool addCatchAlls(MachineFunction &MF);
-  bool addRethrows(MachineFunction &MF);
-  bool ensureSingleBBTermPads(MachineFunction &MF);
-  bool mergeTerminatePads(MachineFunction &MF);
-  bool addCatchAllTerminatePads(MachineFunction &MF);
+  bool removeUnnecessaryUnreachables(MachineFunction &MF);
+  bool addExceptionExtraction(MachineFunction &MF);
+  bool restoreStackPointer(MachineFunction &MF);
 
 public:
   static char ID; // Pass identification, replacement for typeid
@@ -112,48 +108,40 @@ bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) {
     return false;
 
   bool Changed = false;
+  if (MF.getFunction().hasPersonalityFn()) {
+    Changed |= addCatches(MF);
+    Changed |= replaceFuncletReturns(MF);
+  }
   Changed |= removeUnnecessaryUnreachables(MF);
-  Changed |= addRethrows(MF);
-  if (!MF.getFunction().hasPersonalityFn())
-    return Changed;
-  Changed |= replaceFuncletReturns(MF);
-  Changed |= hoistCatches(MF);
-  Changed |= addCatchAlls(MF);
-  Changed |= ensureSingleBBTermPads(MF);
-  Changed |= mergeTerminatePads(MF);
-  Changed |= addCatchAllTerminatePads(MF);
+  if (MF.getFunction().hasPersonalityFn()) {
+    Changed |= addExceptionExtraction(MF);
+    Changed |= restoreStackPointer(MF);
+  }
   return Changed;
 }
 
-bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables(
-    MachineFunction &MF) {
+// Add catch instruction to beginning of catchpads and cleanuppads.
+bool WebAssemblyLateEHPrepare::addCatches(MachineFunction &MF) {
   bool Changed = false;
+  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
   for (auto &MBB : MF) {
-    for (auto &MI : MBB) {
-      if (!WebAssembly::isThrow(MI))
-        continue;
+    if (MBB.isEHPad()) {
       Changed = true;
-
-      // The instruction after the throw should be an unreachable or a branch to
-      // another BB that should eventually lead to an unreachable. Delete it
-      // because throw itself is a terminator, and also delete successors if
-      // any.
-      MBB.erase(std::next(MachineBasicBlock::iterator(MI)), MBB.end());
-      SmallVector<MachineBasicBlock *, 8> Succs(MBB.succ_begin(),
-                                                MBB.succ_end());
-      for (auto *Succ : Succs)
-        MBB.removeSuccessor(Succ);
-      eraseDeadBBsAndChildren(Succs);
+      auto InsertPos = MBB.begin();
+      if (InsertPos->isEHLabel()) // EH pad starts with an EH label
+        ++InsertPos;
+      unsigned DstReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass);
+      BuildMI(MBB, InsertPos, MBB.begin()->getDebugLoc(),
+              TII.get(WebAssembly::CATCH), DstReg);
     }
   }
-
   return Changed;
 }
 
 bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) {
   bool Changed = false;
   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
-  auto *EHInfo = MF.getWasmEHFuncInfo();
 
   for (auto &MBB : MF) {
     auto Pos = MBB.getFirstTerminator();
@@ -172,15 +160,17 @@ bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) {
       Changed = true;
       break;
     }
-    case WebAssembly::CLEANUPRET: {
-      // Replace a cleanupret with a rethrow
-      if (EHInfo->hasThrowUnwindDest(&MBB))
-        BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW))
-            .addMBB(EHInfo->getThrowUnwindDest(&MBB));
-      else
-        BuildMI(MBB, TI, TI->getDebugLoc(),
-                TII.get(WebAssembly::RETHROW_TO_CALLER));
-
+    case WebAssembly::CLEANUPRET:
+    case WebAssembly::RETHROW_IN_CATCH: {
+      // Replace a cleanupret/rethrow_in_catch with a rethrow
+      auto *EHPad = getMatchingEHPad(TI);
+      auto CatchPos = EHPad->begin();
+      if (CatchPos->isEHLabel()) // EH pad starts with an EH label
+        ++CatchPos;
+      MachineInstr *Catch = &*CatchPos;
+      unsigned ExnReg = Catch->getOperand(0).getReg();
+      BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW))
+          .addReg(ExnReg);
       TI->eraseFromParent();
       Changed = true;
       break;
@@ -190,233 +180,208 @@ bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) {
   return Changed;
 }
 
-// Hoist catch instructions to the beginning of their matching EH pad BBs in
-// case,
-// (1) catch instruction is not the first instruction in EH pad.
-// ehpad:
-//   some_other_instruction
-//   ...
-//   %exn = catch 0
-// (2) catch instruction is in a non-EH pad BB. For example,
-// ehpad:
-//   br bb0
-// bb0:
-//   %exn = catch 0
-bool WebAssemblyLateEHPrepare::hoistCatches(MachineFunction &MF) {
-  bool Changed = false;
-  SmallVector<MachineInstr *, 16> Catches;
-  for (auto &MBB : MF)
-    for (auto &MI : MBB)
-      if (WebAssembly::isCatch(MI))
-        Catches.push_back(&MI);
-
-  for (auto *Catch : Catches) {
-    MachineBasicBlock *EHPad = getMatchingEHPad(Catch);
-    assert(EHPad && "No matching EH pad for catch");
-    if (EHPad->begin() == Catch)
-      continue;
-    Changed = true;
-    EHPad->insert(EHPad->begin(), Catch->removeFromParent());
-  }
-  return Changed;
-}
-
-// Add catch_all to beginning of cleanup pads.
-bool WebAssemblyLateEHPrepare::addCatchAlls(MachineFunction &MF) {
+bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables(
+    MachineFunction &MF) {
   bool Changed = false;
-  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
-
   for (auto &MBB : MF) {
-    if (!MBB.isEHPad())
-      continue;
-    // This runs after hoistCatches(), so we assume that if there is a catch,
-    // that should be the first instruction in an EH pad.
-    if (!WebAssembly::isCatch(*MBB.begin())) {
-      Changed = true;
-      BuildMI(MBB, MBB.begin(), MBB.begin()->getDebugLoc(),
-              TII.get(WebAssembly::CATCH_ALL));
-    }
-  }
-  return Changed;
-}
-
-// Add a 'rethrow' instruction after __cxa_rethrow() call
-bool WebAssemblyLateEHPrepare::addRethrows(MachineFunction &MF) {
-  bool Changed = false;
-  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
-  auto *EHInfo = MF.getWasmEHFuncInfo();
-
-  for (auto &MBB : MF)
     for (auto &MI : MBB) {
-      // Check if it is a call to __cxa_rethrow()
-      if (!MI.isCall())
+      if (MI.getOpcode() != WebAssembly::THROW &&
+          MI.getOpcode() != WebAssembly::RETHROW)
         continue;
-      MachineOperand &CalleeOp = MI.getOperand(0);
-      if (!CalleeOp.isGlobal() ||
-          CalleeOp.getGlobal()->getName() != WebAssembly::CxaRethrowFn)
-        continue;
-
-      // Now we have __cxa_rethrow() call
       Changed = true;
-      auto InsertPt = std::next(MachineBasicBlock::iterator(MI));
-      while (InsertPt != MBB.end() && InsertPt->isLabel()) // Skip EH_LABELs
-        ++InsertPt;
-      MachineInstr *Rethrow = nullptr;
-      if (EHInfo->hasThrowUnwindDest(&MBB))
-        Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(),
-                          TII.get(WebAssembly::RETHROW))
-                      .addMBB(EHInfo->getThrowUnwindDest(&MBB));
-      else
-        Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(),
-                          TII.get(WebAssembly::RETHROW_TO_CALLER));
 
-      // Because __cxa_rethrow does not return, the instruction after the
-      // rethrow should be an unreachable or a branch to another BB that should
-      // eventually lead to an unreachable. Delete it because rethrow itself is
-      // a terminator, and also delete non-EH pad successors if any.
-      MBB.erase(std::next(MachineBasicBlock::iterator(Rethrow)), MBB.end());
-      SmallVector<MachineBasicBlock *, 8> NonPadSuccessors;
-      for (auto *Succ : MBB.successors())
+      // The instruction after the throw should be an unreachable or a branch to
+      // another BB that should eventually lead to an unreachable. Delete it
+      // because throw itself is a terminator, and also delete successors if
+      // any.
+      MBB.erase(std::next(MI.getIterator()), MBB.end());
+      SmallVector<MachineBasicBlock *, 8> Succs(MBB.succ_begin(),
+                                                MBB.succ_end());
+      for (auto *Succ : Succs)
         if (!Succ->isEHPad())
-          NonPadSuccessors.push_back(Succ);
-      for (auto *Succ : NonPadSuccessors)
-        MBB.removeSuccessor(Succ);
-      eraseDeadBBsAndChildren(NonPadSuccessors);
+          MBB.removeSuccessor(Succ);
+      eraseDeadBBsAndChildren(Succs);
     }
+  }
+
   return Changed;
 }
 
-// Terminate pads are an single-BB EH pad in the form of
-// termpad:
-//   %exn = catch 0
-//   call @__clang_call_terminate(%exn)
-//   unreachable
-// (There can be local.set and local.gets before the call if we didn't run
-// RegStackify)
-// But code transformations can change or add more control flow, so the call to
-// __clang_call_terminate() function may not be in the original EH pad anymore.
-// This ensures every terminate pad is a single BB in the form illustrated
-// above.
-bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) {
+// Wasm uses 'br_on_exn' instruction to check the tag of an exception. It takes
+// exnref type object returned by 'catch', and branches to the destination if it
+// matches a given tag. We currently use __cpp_exception symbol to represent the
+// tag for all C++ exceptions.
+//
+// block $l (result i32)
+//   ...
+//   ;; exnref $e is on the stack at this point
+//   br_on_exn $l $e ;; branch to $l with $e's arguments
+//   ...
+// end
+// ;; Here we expect the extracted values are on top of the wasm value stack
+// ... Handle exception using values ...
+//
+// br_on_exn takes an exnref object and branches if it matches the given tag.
+// There can be multiple br_on_exn instructions if we want to match for another
+// tag, but for now we only test for __cpp_exception tag, and if it does not
+// match, i.e., it is a foreign exception, we rethrow it.
+//
+// In the destination BB that's the target of br_on_exn, extracted exception
+// values (in C++'s case a single i32, which represents an exception pointer)
+// are placed on top of the wasm stack. Because we can't model wasm stack in
+// LLVM instruction, we use 'extract_exception' pseudo instruction to retrieve
+// it. The pseudo instruction will be deleted later.
+bool WebAssemblyLateEHPrepare::addExceptionExtraction(MachineFunction &MF) {
   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+  auto *EHInfo = MF.getWasmEHFuncInfo();
+  SmallVector<MachineInstr *, 16> ExtractInstrs;
+  SmallVector<MachineInstr *, 8> ToDelete;
+  for (auto &MBB : MF) {
+    for (auto &MI : MBB) {
+      if (MI.getOpcode() == WebAssembly::EXTRACT_EXCEPTION_I32) {
+        if (MI.getOperand(0).isDead())
+          ToDelete.push_back(&MI);
+        else
+          ExtractInstrs.push_back(&MI);
+      }
+    }
+  }
+  bool Changed = !ToDelete.empty() || !ExtractInstrs.empty();
+  for (auto *MI : ToDelete)
+    MI->eraseFromParent();
+  if (ExtractInstrs.empty())
+    return Changed;
 
-  // Find calls to __clang_call_terminate()
-  SmallVector<MachineInstr *, 8> ClangCallTerminateCalls;
-  for (auto &MBB : MF)
-    for (auto &MI : MBB)
+  // Find terminate pads.
+  SmallSet<MachineBasicBlock *, 8> TerminatePads;
+  for (auto &MBB : MF) {
+    for (auto &MI : MBB) {
       if (MI.isCall()) {
         const MachineOperand &CalleeOp = MI.getOperand(0);
         if (CalleeOp.isGlobal() && CalleeOp.getGlobal()->getName() ==
                                        WebAssembly::ClangCallTerminateFn)
-          ClangCallTerminateCalls.push_back(&MI);
+          TerminatePads.insert(getMatchingEHPad(&MI));
       }
-
-  bool Changed = false;
-  for (auto *Call : ClangCallTerminateCalls) {
-    MachineBasicBlock *EHPad = getMatchingEHPad(Call);
-    assert(EHPad && "No matching EH pad for catch");
-
-    // If it is already the form we want, skip it
-    if (Call->getParent() == EHPad &&
-        Call->getNextNode()->getOpcode() == WebAssembly::UNREACHABLE)
-      continue;
-
-    // In case the __clang_call_terminate() call is not in its matching EH pad,
-    // move the call to the end of EH pad and add an unreachable instruction
-    // after that. Delete all successors and their children if any, because here
-    // the program terminates.
-    Changed = true;
-    MachineInstr *Catch = &*EHPad->begin();
-    // This runs after hoistCatches(), so catch instruction should be at the top
-    assert(WebAssembly::isCatch(*Catch));
-    // Takes the result register of the catch instruction as argument. There may
-    // have been some other local.set/local.gets in between, but at this point
-    // we don't care.
-    Call->getOperand(1).setReg(Catch->getOperand(0).getReg());
-    auto InsertPos = std::next(MachineBasicBlock::iterator(Catch));
-    EHPad->insert(InsertPos, Call->removeFromParent());
-    BuildMI(*EHPad, InsertPos, Call->getDebugLoc(),
-            TII.get(WebAssembly::UNREACHABLE));
-    EHPad->erase(InsertPos, EHPad->end());
-    SmallVector<MachineBasicBlock *, 8> Succs(EHPad->succ_begin(),
-                                              EHPad->succ_end());
-    for (auto *Succ : Succs)
-      EHPad->removeSuccessor(Succ);
-    eraseDeadBBsAndChildren(Succs);
+    }
   }
-  return Changed;
-}
 
-// In case there are multiple terminate pads, merge them into one for code size.
-// This runs after ensureSingleBBTermPads() and assumes every terminate pad is a
-// single BB.
-// In principle this violates EH scope relationship because it can merge
-// multiple inner EH scopes, each of which is in different outer EH scope. But
-// getEHScopeMembership() function will not be called after this, so it is fine.
-bool WebAssemblyLateEHPrepare::mergeTerminatePads(MachineFunction &MF) {
-  SmallVector<MachineBasicBlock *, 8> TermPads;
-  for (auto &MBB : MF)
-    if (WebAssembly::isCatchTerminatePad(MBB))
-      TermPads.push_back(&MBB);
-  if (TermPads.empty())
-    return false;
-
-  MachineBasicBlock *UniqueTermPad = TermPads.front();
-  for (auto *TermPad :
-       llvm::make_range(std::next(TermPads.begin()), TermPads.end())) {
-    SmallVector<MachineBasicBlock *, 2> Preds(TermPad->pred_begin(),
-                                              TermPad->pred_end());
-    for (auto *Pred : Preds)
-      Pred->replaceSuccessor(TermPad, UniqueTermPad);
-    TermPad->eraseFromParent();
+  for (auto *Extract : ExtractInstrs) {
+    MachineBasicBlock *EHPad = getMatchingEHPad(Extract);
+    assert(EHPad && "No matching EH pad for extract_exception");
+    auto CatchPos = EHPad->begin();
+    if (CatchPos->isEHLabel()) // EH pad starts with an EH label
+      ++CatchPos;
+    MachineInstr *Catch = &*CatchPos;
+
+    if (Catch->getNextNode() != Extract)
+      EHPad->insert(Catch->getNextNode(), Extract->removeFromParent());
+
+    // - Before:
+    // ehpad:
+    //   %exnref:exnref = catch
+    //   %exn:i32 = extract_exception
+    //   ... use exn ...
+    //
+    // - After:
+    // ehpad:
+    //   %exnref:exnref = catch
+    //   br_on_exn %thenbb, $__cpp_exception, %exnref
+    //   br %elsebb
+    // elsebb:
+    //   rethrow
+    // thenbb:
+    //   %exn:i32 = extract_exception
+    //   ... use exn ...
+    unsigned ExnReg = Catch->getOperand(0).getReg();
+    auto *ThenMBB = MF.CreateMachineBasicBlock();
+    auto *ElseMBB = MF.CreateMachineBasicBlock();
+    MF.insert(std::next(MachineFunction::iterator(EHPad)), ElseMBB);
+    MF.insert(std::next(MachineFunction::iterator(ElseMBB)), ThenMBB);
+    ThenMBB->splice(ThenMBB->end(), EHPad, Extract, EHPad->end());
+    ThenMBB->transferSuccessors(EHPad);
+    EHPad->addSuccessor(ThenMBB);
+    EHPad->addSuccessor(ElseMBB);
+
+    DebugLoc DL = Extract->getDebugLoc();
+    const char *CPPExnSymbol = MF.createExternalSymbolName("__cpp_exception");
+    BuildMI(EHPad, DL, TII.get(WebAssembly::BR_ON_EXN))
+        .addMBB(ThenMBB)
+        .addExternalSymbol(CPPExnSymbol)
+        .addReg(ExnReg);
+    BuildMI(EHPad, DL, TII.get(WebAssembly::BR)).addMBB(ElseMBB);
+
+    // When this is a terminate pad with __clang_call_terminate() call, we don't
+    // rethrow it anymore and call __clang_call_terminate() with a nullptr
+    // argument, which will call std::terminate().
+    //
+    // - Before:
+    // ehpad:
+    //   %exnref:exnref = catch
+    //   %exn:i32 = extract_exception
+    //   call @__clang_call_terminate(%exn)
+    //   unreachable
+    //
+    // - After:
+    // ehpad:
+    //   %exnref:exnref = catch
+    //   br_on_exn %thenbb, $__cpp_exception, %exnref
+    //   br %elsebb
+    // elsebb:
+    //   call @__clang_call_terminate(0)
+    //   unreachable
+    // thenbb:
+    //   %exn:i32 = extract_exception
+    //   call @__clang_call_terminate(%exn)
+    //   unreachable
+    if (TerminatePads.count(EHPad)) {
+      Function *ClangCallTerminateFn =
+          MF.getFunction().getParent()->getFunction(
+              WebAssembly::ClangCallTerminateFn);
+      assert(ClangCallTerminateFn &&
+             "There is no __clang_call_terminate() function");
+      BuildMI(ElseMBB, DL, TII.get(WebAssembly::CALL_VOID))
+          .addGlobalAddress(ClangCallTerminateFn)
+          .addImm(0);
+      BuildMI(ElseMBB, DL, TII.get(WebAssembly::UNREACHABLE));
+
+    } else {
+      BuildMI(ElseMBB, DL, TII.get(WebAssembly::RETHROW)).addReg(ExnReg);
+      if (EHInfo->hasEHPadUnwindDest(EHPad))
+        ElseMBB->addSuccessor(EHInfo->getEHPadUnwindDest(EHPad));
+    }
   }
+
   return true;
 }
 
-// Terminate pads are cleanup pads, so they should start with a 'catch_all'
-// instruction. But in the Itanium model, when we have a C++ exception object,
-// we pass them to __clang_call_terminate function, which calls __cxa_end_catch
-// with the passed exception pointer and then std::terminate. This is the reason
-// that terminate pads are generated with not a catch_all but a catch
-// instruction in clang and earlier llvm passes. Here we append a terminate pad
-// with a catch_all after each existing terminate pad so we can also catch
-// foreign exceptions. For every terminate pad:
-//   %exn = catch 0
-//   call @__clang_call_terminate(%exn)
-//   unreachable
-// We append this BB right after that:
-//   catch_all
-//   call @std::terminate()
-//   unreachable
-bool WebAssemblyLateEHPrepare::addCatchAllTerminatePads(MachineFunction &MF) {
-  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
-  SmallVector<MachineBasicBlock *, 8> TermPads;
-  for (auto &MBB : MF)
-    if (WebAssembly::isCatchTerminatePad(MBB))
-      TermPads.push_back(&MBB);
-  if (TermPads.empty())
+// After the stack is unwound due to a thrown exception, the __stack_pointer
+// global can point to an invalid address. This inserts instructions that
+// restore __stack_pointer global.
+bool WebAssemblyLateEHPrepare::restoreStackPointer(MachineFunction &MF) {
+  const auto *FrameLowering = static_cast<const WebAssemblyFrameLowering *>(
+      MF.getSubtarget().getFrameLowering());
+  if (!FrameLowering->needsPrologForEH(MF))
     return false;
+  bool Changed = false;
 
-  Function *StdTerminateFn =
-      MF.getFunction().getParent()->getFunction(WebAssembly::StdTerminateFn);
-  assert(StdTerminateFn && "There is no std::terminate() function");
-  for (auto *CatchTermPad : TermPads) {
-    DebugLoc DL = CatchTermPad->findDebugLoc(CatchTermPad->begin());
-    auto *CatchAllTermPad = MF.CreateMachineBasicBlock();
-    MF.insert(std::next(MachineFunction::iterator(CatchTermPad)),
-              CatchAllTermPad);
-    CatchAllTermPad->setIsEHPad();
-    BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::CATCH_ALL));
-    BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::CALL_VOID))
-        .addGlobalAddress(StdTerminateFn);
-    BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::UNREACHABLE));
+  for (auto &MBB : MF) {
+    if (!MBB.isEHPad())
+      continue;
+    Changed = true;
 
-    // Actually this CatchAllTermPad (new terminate pad with a catch_all) is not
-    // a successor of an existing terminate pad. CatchAllTermPad should have all
-    // predecessors CatchTermPad has instead. This is a hack to force
-    // CatchAllTermPad be always sorted right after CatchTermPad; the correct
-    // predecessor-successor relationships will be restored in CFGStackify pass.
-    CatchTermPad->addSuccessor(CatchAllTermPad);
+    // Insert __stack_pointer restoring instructions at the beginning of each EH
+    // pad, after the catch instruction. Here it is safe to assume that SP32
+    // holds the latest value of __stack_pointer, because the only exception for
+    // this case is when a function uses the red zone, but that only happens
+    // with leaf functions, and we don't restore __stack_pointer in leaf
+    // functions anyway.
+    auto InsertPos = MBB.begin();
+    if (InsertPos->isEHLabel()) // EH pad starts with an EH label
+      ++InsertPos;
+    if (InsertPos->getOpcode() == WebAssembly::CATCH)
+      ++InsertPos;
+    FrameLowering->writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPos,
+                                   MBB.begin()->getDebugLoc());
   }
-  return true;
+  return Changed;
 }
diff --git a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
index c9a3527d3fbd..34a8195ac4b4 100644
--- a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyLowerBrUnless.cpp - Lower br_unless --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index 0491f71cea7f..960d5134f6e9 100644
--- a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -1,9 +1,8 @@
 //=== WebAssemblyLowerEmscriptenEHSjLj.cpp - Lower exceptions for Emscripten =//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -240,16 +239,16 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
   bool EnableEH;   // Enable exception handling
   bool EnableSjLj; // Enable setjmp/longjmp handling
 
-  GlobalVariable *ThrewGV;
-  GlobalVariable *ThrewValueGV;
-  Function *GetTempRet0Func;
-  Function *SetTempRet0Func;
-  Function *ResumeF;
-  Function *EHTypeIDF;
-  Function *EmLongjmpF;
-  Function *EmLongjmpJmpbufF;
-  Function *SaveSetjmpF;
-  Function *TestSetjmpF;
+  GlobalVariable *ThrewGV = nullptr;
+  GlobalVariable *ThrewValueGV = nullptr;
+  Function *GetTempRet0Func = nullptr;
+  Function *SetTempRet0Func = nullptr;
+  Function *ResumeF = nullptr;
+  Function *EHTypeIDF = nullptr;
+  Function *EmLongjmpF = nullptr;
+  Function *EmLongjmpJmpbufF = nullptr;
+  Function *SaveSetjmpF = nullptr;
+  Function *TestSetjmpF = nullptr;
 
   // __cxa_find_matching_catch_N functions.
   // Indexed by the number of clauses in an original landingpad instruction.
@@ -282,11 +281,7 @@ public:
   static char ID;
 
   WebAssemblyLowerEmscriptenEHSjLj(bool EnableEH = true, bool EnableSjLj = true)
-      : ModulePass(ID), EnableEH(EnableEH), EnableSjLj(EnableSjLj),
-        ThrewGV(nullptr), ThrewValueGV(nullptr), GetTempRet0Func(nullptr),
-        SetTempRet0Func(nullptr), ResumeF(nullptr), EHTypeIDF(nullptr),
-        EmLongjmpF(nullptr), EmLongjmpJmpbufF(nullptr), SaveSetjmpF(nullptr),
-        TestSetjmpF(nullptr) {
+      : ModulePass(ID), EnableEH(EnableEH), EnableSjLj(EnableSjLj) {
     EHWhitelistSet.insert(EHWhitelist.begin(), EHWhitelist.end());
   }
   bool runOnModule(Module &M) override;
@@ -339,11 +334,12 @@ static bool canThrow(const Value *V) {
 // which will generate an import and asssumes that it will exist at link time.
 static GlobalVariable *getGlobalVariableI32(Module &M, IRBuilder<> &IRB,
                                             const char *Name) {
-  if (M.getNamedGlobal(Name))
-    report_fatal_error(Twine("variable name is reserved: ") + Name);
 
-  return new GlobalVariable(M, IRB.getInt32Ty(), false,
-                            GlobalValue::ExternalLinkage, nullptr, Name);
+  auto* GV = dyn_cast<GlobalVariable>(M.getOrInsertGlobal(Name, IRB.getInt32Ty()));
+  if (!GV)
+    report_fatal_error(Twine("unable to create global: ") + Name);
+
+  return GV;
 }
 
 // Simple function name mangler.
@@ -433,8 +429,8 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) {
   // No attributes for the callee pointer.
   ArgAttributes.push_back(AttributeSet());
   // Copy the argument attributes from the original
-  for (unsigned i = 0, e = CI->getNumArgOperands(); i < e; ++i)
-    ArgAttributes.push_back(InvokeAL.getParamAttributes(i));
+  for (unsigned I = 0, E = CI->getNumArgOperands(); I < E; ++I)
+    ArgAttributes.push_back(InvokeAL.getParamAttributes(I));
 
   // Reconstruct the AttributesList based on the vector we constructed.
   AttributeList NewCallAL =
@@ -446,7 +442,8 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) {
 
   // Post-invoke
   // %__THREW__.val = __THREW__; __THREW__ = 0;
-  Value *Threw = IRB.CreateLoad(ThrewGV, ThrewGV->getName() + ".val");
+  Value *Threw =
+      IRB.CreateLoad(IRB.getInt32Ty(), ThrewGV, ThrewGV->getName() + ".val");
   IRB.CreateStore(IRB.getInt32(0), ThrewGV);
   return Threw;
 }
@@ -488,6 +485,13 @@ bool WebAssemblyLowerEmscriptenEHSjLj::canLongjmp(Module &M,
     if (CalleeF->isIntrinsic())
       return false;
 
+  // Attempting to transform inline assembly will result in something like:
+  //     call void @__invoke_void(void ()* asm ...)
+  // which is invalid because inline assembly blocks do not have addresses
+  // and can't be passed by pointer. The result is a crash with illegal IR.
+  if (isa<InlineAsm>(Callee))
+    return false;
+
   // The reason we include malloc/free here is to exclude the malloc/free
   // calls generated in setjmp prep / cleanup routines.
   Function *SetjmpF = M.getFunction("setjmp");
@@ -549,8 +553,8 @@ void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
   BasicBlock *ElseBB1 = BasicBlock::Create(C, "if.else1", F);
   BasicBlock *EndBB1 = BasicBlock::Create(C, "if.end", F);
   Value *ThrewCmp = IRB.CreateICmpNE(Threw, IRB.getInt32(0));
-  Value *ThrewValue =
-      IRB.CreateLoad(ThrewValueGV, ThrewValueGV->getName() + ".val");
+  Value *ThrewValue = IRB.CreateLoad(IRB.getInt32Ty(), ThrewValueGV,
+                                     ThrewValueGV->getName() + ".val");
   Value *ThrewValueCmp = IRB.CreateICmpNE(ThrewValue, IRB.getInt32(0));
   Value *Cmp1 = IRB.CreateAnd(ThrewCmp, ThrewValueCmp, "cmp1");
   IRB.CreateCondBr(Cmp1, ThenBB1, ElseBB1);
@@ -562,8 +566,8 @@ void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
   BasicBlock *EndBB2 = BasicBlock::Create(C, "if.end2", F);
   Value *ThrewInt = IRB.CreateIntToPtr(Threw, Type::getInt32PtrTy(C),
                                        Threw->getName() + ".i32p");
-  Value *LoadedThrew =
-      IRB.CreateLoad(ThrewInt, ThrewInt->getName() + ".loaded");
+  Value *LoadedThrew = IRB.CreateLoad(IRB.getInt32Ty(), ThrewInt,
+                                      ThrewInt->getName() + ".loaded");
   Value *ThenLabel = IRB.CreateCall(
       TestSetjmpF, {LoadedThrew, SetjmpTable, SetjmpTableSize}, "label");
   Value *Cmp2 = IRB.CreateICmpEQ(ThenLabel, IRB.getInt32(0));
@@ -606,11 +610,11 @@ void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) {
         ++UI;
         SSA.Initialize(I.getType(), I.getName());
         SSA.AddAvailableValue(&BB, &I);
-        Instruction *User = cast<Instruction>(U.getUser());
+        auto *User = cast<Instruction>(U.getUser());
         if (User->getParent() == &BB)
           continue;
 
-        if (PHINode *UserPN = dyn_cast<PHINode>(User))
+        if (auto *UserPN = dyn_cast<PHINode>(User))
           if (UserPN->getIncomingBlock(U) == &BB)
             continue;
 
@@ -769,7 +773,8 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
       // This can't throw, and we don't need this invoke, just replace it with a
       // call+branch
       SmallVector<Value *, 16> Args(II->arg_begin(), II->arg_end());
-      CallInst *NewCall = IRB.CreateCall(II->getCalledValue(), Args);
+      CallInst *NewCall =
+          IRB.CreateCall(II->getFunctionType(), II->getCalledValue(), Args);
       NewCall->takeName(II);
       NewCall->setCallingConv(II->getCallingConv());
       NewCall->setDebugLoc(II->getDebugLoc());
@@ -836,15 +841,15 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
   for (LandingPadInst *LPI : LandingPads) {
     IRB.SetInsertPoint(LPI);
     SmallVector<Value *, 16> FMCArgs;
-    for (unsigned i = 0, e = LPI->getNumClauses(); i < e; ++i) {
-      Constant *Clause = LPI->getClause(i);
+    for (unsigned I = 0, E = LPI->getNumClauses(); I < E; ++I) {
+      Constant *Clause = LPI->getClause(I);
       // As a temporary workaround for the lack of aggregate varargs support
       // in the interface between JS and wasm, break out filter operands into
       // their component elements.
-      if (LPI->isFilter(i)) {
+      if (LPI->isFilter(I)) {
         auto *ATy = cast<ArrayType>(Clause->getType());
-        for (unsigned j = 0, e = ATy->getNumElements(); j < e; ++j) {
-          Value *EV = IRB.CreateExtractValue(Clause, makeArrayRef(j), "filter");
+        for (unsigned J = 0, E = ATy->getNumElements(); J < E; ++J) {
+          Value *EV = IRB.CreateExtractValue(Clause, makeArrayRef(J), "filter");
           FMCArgs.push_back(EV);
         }
       } else
@@ -954,8 +959,8 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
     BBs.push_back(&BB);
 
   // BBs.size() will change within the loop, so we query it every time
-  for (unsigned i = 0; i < BBs.size(); i++) {
-    BasicBlock *BB = BBs[i];
+  for (unsigned I = 0; I < BBs.size(); I++) {
+    BasicBlock *BB = BBs[I];
     for (Instruction &I : *BB) {
       assert(!isa<InvokeInst>(&I));
       auto *CI = dyn_cast<CallInst>(&I);
@@ -1028,9 +1033,9 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
       // switch case). 0 means a longjmp that is not ours to handle, needs a
       // rethrow. Otherwise the index is the same as the index in P+1 (to avoid
       // 0).
-      for (unsigned i = 0; i < SetjmpRetPHIs.size(); i++) {
-        SI->addCase(IRB.getInt32(i + 1), SetjmpRetPHIs[i]->getParent());
-        SetjmpRetPHIs[i]->addIncoming(LongjmpResult, EndBB);
+      for (unsigned I = 0; I < SetjmpRetPHIs.size(); I++) {
+        SI->addCase(IRB.getInt32(I + 1), SetjmpRetPHIs[I]->getParent());
+        SetjmpRetPHIs[I]->addIncoming(LongjmpResult, EndBB);
       }
 
       // We are splitting the block here, and must continue to find other calls
@@ -1077,7 +1082,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
     Use &U = *UI;
     // Increment the iterator before removing the use from the list.
     ++UI;
-    if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
+    if (auto *I = dyn_cast<Instruction>(U.getUser()))
       if (I->getParent() != &EntryBB)
         SetjmpTableSSA.RewriteUse(U);
   }
@@ -1085,7 +1090,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
        UI != UE;) {
     Use &U = *UI;
     ++UI;
-    if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
+    if (auto *I = dyn_cast<Instruction>(U.getUser()))
       if (I->getParent() != &EntryBB)
         SetjmpTableSizeSSA.RewriteUse(U);
   }
diff --git a/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp b/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
index 84c877cb8d02..494d3fadbc8c 100644
--- a/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyLowerGlobalDtors.cpp - Lower @llvm.global_dtors --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -62,7 +61,7 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
   LLVM_DEBUG(dbgs() << "********** Lower Global Destructors **********\n");
 
   GlobalVariable *GV = M.getGlobalVariable("llvm.global_dtors");
-  if (!GV)
+  if (!GV || !GV->hasInitializer())
     return false;
 
   const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
@@ -70,7 +69,7 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
     return false;
 
   // Sanity-check @llvm.global_dtor's type.
-  StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType());
+  auto *ETy = dyn_cast<StructType>(InitList->getType()->getElementType());
   if (!ETy || ETy->getNumElements() != 3 ||
       !ETy->getTypeAtIndex(0U)->isIntegerTy() ||
       !ETy->getTypeAtIndex(1U)->isPointerTy() ||
@@ -81,11 +80,11 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
   // associated symbol.
   std::map<uint16_t, MapVector<Constant *, std::vector<Constant *>>> DtorFuncs;
   for (Value *O : InitList->operands()) {
-    ConstantStruct *CS = dyn_cast<ConstantStruct>(O);
+    auto *CS = dyn_cast<ConstantStruct>(O);
     if (!CS)
       continue; // Malformed.
 
-    ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
+    auto *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
     if (!Priority)
       continue; // Malformed.
     uint16_t PriorityValue = Priority->getLimitedValue(UINT16_MAX);
@@ -110,10 +109,11 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
       FunctionType::get(Type::getVoidTy(C), AtExitFuncArgs,
                         /*isVarArg=*/false);
 
-  Type *AtExitArgs[] = {PointerType::get(AtExitFuncTy, 0), VoidStar, VoidStar};
-  FunctionType *AtExitTy = FunctionType::get(Type::getInt32Ty(C), AtExitArgs,
-                                             /*isVarArg=*/false);
-  Constant *AtExit = M.getOrInsertFunction("__cxa_atexit", AtExitTy);
+  FunctionCallee AtExit = M.getOrInsertFunction(
+      "__cxa_atexit",
+      FunctionType::get(Type::getInt32Ty(C),
+                        {PointerType::get(AtExitFuncTy, 0), VoidStar, VoidStar},
+                        /*isVarArg=*/false));
 
   // Declare __dso_local.
   Constant *DsoHandle = M.getNamedValue("__dso_handle");
@@ -143,13 +143,13 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
                                           : Twine()),
           &M);
       BasicBlock *BB = BasicBlock::Create(C, "body", CallDtors);
+      FunctionType *VoidVoid = FunctionType::get(Type::getVoidTy(C),
+                                                 /*isVarArg=*/false);
 
       for (auto Dtor : AssociatedAndMore.second)
-        CallInst::Create(Dtor, "", BB);
+        CallInst::Create(VoidVoid, Dtor, "", BB);
       ReturnInst::Create(C, BB);
 
-      FunctionType *VoidVoid = FunctionType::get(Type::getVoidTy(C),
-                                                 /*isVarArg=*/false);
       Function *RegisterCallDtors = Function::Create(
           VoidVoid, Function::PrivateLinkage,
           "register_call_dtors" +
diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index fa862fbaa634..288b991ae2c5 100644
--- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -1,9 +1,8 @@
 // WebAssemblyMCInstLower.cpp - Convert WebAssembly MachineInstr to an MCInst //
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -17,7 +16,7 @@
 #include "WebAssemblyAsmPrinter.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblyRuntimeLibcallSignatures.h"
-#include "WebAssemblyUtilities.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/IR/Constants.h"
@@ -37,7 +36,7 @@ using namespace llvm;
 
 // This disables the removal of registers when lowering into MC, as required
 // by some current tests.
-static cl::opt<bool>
+cl::opt<bool>
     WasmKeepRegisters("wasm-keep-registers", cl::Hidden,
                       cl::desc("WebAssembly: output stack registers in"
                                " instruction output for test purposes only."),
@@ -48,7 +47,7 @@ static void removeRegisterOperands(const MachineInstr *MI, MCInst &OutMI);
 MCSymbol *
 WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
   const GlobalValue *Global = MO.getGlobal();
-  MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Printer.getSymbol(Global));
+  auto *WasmSym = cast<MCSymbolWasm>(Printer.getSymbol(Global));
 
   if (const auto *FuncTy = dyn_cast<FunctionType>(Global->getValueType())) {
     const MachineFunction &MF = *MO.getParent()->getParent()->getParent();
@@ -57,9 +56,9 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
 
     SmallVector<MVT, 1> ResultMVTs;
     SmallVector<MVT, 4> ParamMVTs;
-    ComputeSignatureVTs(FuncTy, CurrentFunc, TM, ParamMVTs, ResultMVTs);
+    computeSignatureVTs(FuncTy, CurrentFunc, TM, ParamMVTs, ResultMVTs);
 
-    auto Signature = SignatureFromMVTs(ResultMVTs, ParamMVTs);
+    auto Signature = signatureFromMVTs(ResultMVTs, ParamMVTs);
     WasmSym->setSignature(Signature.get());
     Printer.addSignature(std::move(Signature));
     WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
@@ -71,20 +70,23 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
 MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
     const MachineOperand &MO) const {
   const char *Name = MO.getSymbolName();
-  MCSymbolWasm *WasmSym =
-      cast<MCSymbolWasm>(Printer.GetExternalSymbolSymbol(Name));
+  auto *WasmSym = cast<MCSymbolWasm>(Printer.GetExternalSymbolSymbol(Name));
   const WebAssemblySubtarget &Subtarget = Printer.getSubtarget();
 
-  // Except for the two exceptions (__stack_pointer and __cpp_exception), all
-  // other external symbols used by CodeGen are functions. It's OK to hardcode
-  // knowledge of specific symbols here; this method is precisely there for
-  // fetching the signatures of known Clang-provided symbols.
-  if (strcmp(Name, "__stack_pointer") == 0) {
+  // Except for certain known symbols, all symbols used by CodeGen are
+  // functions. It's OK to hardcode knowledge of specific symbols here; this
+  // method is precisely there for fetching the signatures of known
+  // Clang-provided symbols.
+  if (strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0 ||
+      strcmp(Name, "__memory_base") == 0 || strcmp(Name, "__table_base") == 0 ||
+      strcmp(Name, "__tls_size") == 0) {
+    bool Mutable =
+        strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0;
     WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
     WasmSym->setGlobalType(wasm::WasmGlobalType{
         uint8_t(Subtarget.hasAddr64() ? wasm::WASM_TYPE_I64
                                       : wasm::WASM_TYPE_I32),
-        true});
+        Mutable});
     return WasmSym;
   }
 
@@ -110,7 +112,7 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
                                            : wasm::ValType::I32);
   } else { // Function symbols
     WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
-    GetLibcallSignature(Subtarget, Name, Returns, Params);
+    getLibcallSignature(Subtarget, Name, Returns, Params);
   }
   auto Signature =
       make_unique<wasm::WasmSignature>(std::move(Returns), std::move(Params));
@@ -120,27 +122,42 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
   return WasmSym;
 }
 
-MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(MCSymbol *Sym,
-                                                     int64_t Offset,
-                                                     bool IsFunc, bool IsGlob,
-                                                     bool IsEvent) const {
-  MCSymbolRefExpr::VariantKind VK =
-      IsFunc ? MCSymbolRefExpr::VK_WebAssembly_FUNCTION
-             : IsGlob ? MCSymbolRefExpr::VK_WebAssembly_GLOBAL
-                      : IsEvent ? MCSymbolRefExpr::VK_WebAssembly_EVENT
-                                : MCSymbolRefExpr::VK_None;
+MCOperand WebAssemblyMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
+                                                     MCSymbol *Sym) const {
+  MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+  unsigned TargetFlags = MO.getTargetFlags();
+
+  switch (TargetFlags) {
+    case WebAssemblyII::MO_NO_FLAG:
+      break;
+    case WebAssemblyII::MO_GOT:
+      Kind = MCSymbolRefExpr::VK_GOT;
+      break;
+    case WebAssemblyII::MO_MEMORY_BASE_REL:
+      Kind = MCSymbolRefExpr::VK_WASM_MBREL;
+      break;
+    case WebAssemblyII::MO_TABLE_BASE_REL:
+      Kind = MCSymbolRefExpr::VK_WASM_TBREL;
+      break;
+    default:
+      llvm_unreachable("Unknown target flag on GV operand");
+  }
 
-  const MCExpr *Expr = MCSymbolRefExpr::create(Sym, VK, Ctx);
+  const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Kind, Ctx);
 
-  if (Offset != 0) {
-    if (IsFunc)
+  if (MO.getOffset() != 0) {
+    const auto *WasmSym = cast<MCSymbolWasm>(Sym);
+    if (TargetFlags == WebAssemblyII::MO_GOT)
+      report_fatal_error("GOT symbol references do not support offsets");
+    if (WasmSym->isFunction())
       report_fatal_error("Function addresses with offsets not supported");
-    if (IsGlob)
+    if (WasmSym->isGlobal())
       report_fatal_error("Global indexes with offsets not supported");
-    if (IsEvent)
+    if (WasmSym->isEvent())
       report_fatal_error("Event indexes with offsets not supported");
-    Expr =
-        MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, Ctx), Ctx);
+
+    Expr = MCBinaryExpr::createAdd(
+        Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
   }
 
   return MCOperand::createExpr(Expr);
@@ -161,13 +178,13 @@ static wasm::ValType getType(const TargetRegisterClass *RC) {
   llvm_unreachable("Unexpected register class");
 }
 
-void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
+void WebAssemblyMCInstLower::lower(const MachineInstr *MI,
                                    MCInst &OutMI) const {
   OutMI.setOpcode(MI->getOpcode());
 
   const MCInstrDesc &Desc = MI->getDesc();
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
+  for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+    const MachineOperand &MO = MI->getOperand(I);
 
     MCOperand MCOp;
     switch (MO.getType()) {
@@ -188,8 +205,8 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
       break;
     }
     case MachineOperand::MO_Immediate:
-      if (i < Desc.NumOperands) {
-        const MCOperandInfo &Info = Desc.OpInfo[i];
+      if (I < Desc.NumOperands) {
+        const MCOperandInfo &Info = Desc.OpInfo[I];
         if (Info.OperandType == WebAssembly::OPERAND_TYPEINDEX) {
           MCSymbol *Sym = Printer.createTempSymbol("typeindex");
 
@@ -206,10 +223,10 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
 
           // call_indirect instructions have a callee operand at the end which
           // doesn't count as a param.
-          if (WebAssembly::isCallIndirect(*MI))
+          if (WebAssembly::isCallIndirect(MI->getOpcode()))
             Params.pop_back();
 
-          MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Sym);
+          auto *WasmSym = cast<MCSymbolWasm>(Sym);
           auto Signature = make_unique<wasm::WasmSignature>(std::move(Returns),
                                                             std::move(Params));
           WasmSym->setSignature(Signature.get());
@@ -217,7 +234,7 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
           WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
 
           const MCExpr *Expr = MCSymbolRefExpr::create(
-              WasmSym, MCSymbolRefExpr::VK_WebAssembly_TYPEINDEX, Ctx);
+              WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, Ctx);
           MCOp = MCOperand::createExpr(Expr);
           break;
         }
@@ -237,30 +254,21 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
       break;
     }
     case MachineOperand::MO_GlobalAddress:
-      assert(MO.getTargetFlags() == WebAssemblyII::MO_NO_FLAG &&
-             "WebAssembly does not use target flags on GlobalAddresses");
-      MCOp = LowerSymbolOperand(GetGlobalAddressSymbol(MO), MO.getOffset(),
-                                MO.getGlobal()->getValueType()->isFunctionTy(),
-                                false, false);
+      MCOp = lowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
       break;
     case MachineOperand::MO_ExternalSymbol:
       // The target flag indicates whether this is a symbol for a
       // variable or a function.
-      assert((MO.getTargetFlags() & ~WebAssemblyII::MO_SYMBOL_MASK) == 0 &&
+      assert(MO.getTargetFlags() == 0 &&
              "WebAssembly uses only symbol flags on ExternalSymbols");
-      MCOp = LowerSymbolOperand(
-          GetExternalSymbolSymbol(MO), /*Offset=*/0,
-          (MO.getTargetFlags() & WebAssemblyII::MO_SYMBOL_FUNCTION) != 0,
-          (MO.getTargetFlags() & WebAssemblyII::MO_SYMBOL_GLOBAL) != 0,
-          (MO.getTargetFlags() & WebAssemblyII::MO_SYMBOL_EVENT) != 0);
+      MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
       break;
     case MachineOperand::MO_MCSymbol:
       // This is currently used only for LSDA symbols (GCC_except_table),
       // because global addresses or other external symbols are handled above.
       assert(MO.getTargetFlags() == 0 &&
              "WebAssembly does not use target flags on MCSymbol");
-      MCOp = LowerSymbolOperand(MO.getMCSymbol(), /*Offset=*/0, false, false,
-                                false);
+      MCOp = lowerSymbolOperand(MO, MO.getMCSymbol());
       break;
     }
 
diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
index fa7a0ea61b3b..2c375a01a7f5 100644
--- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
+++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
@@ -1,9 +1,8 @@
 //===-- WebAssemblyMCInstLower.h - Lower MachineInstr to MCInst -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -33,13 +32,12 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower {
 
   MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
   MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
-  MCOperand LowerSymbolOperand(MCSymbol *Sym, int64_t Offset, bool IsFunc,
-                               bool IsGlob, bool IsEvent) const;
+  MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
 
 public:
   WebAssemblyMCInstLower(MCContext &ctx, WebAssemblyAsmPrinter &printer)
       : Ctx(ctx), Printer(printer) {}
-  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+  void lower(const MachineInstr *MI, MCInst &OutMI) const;
 };
 } // end namespace llvm
 
diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
index 0157af0f8510..d31c1226bfdb 100644
--- a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
 //=- WebAssemblyMachineFunctionInfo.cpp - WebAssembly Machine Function Info -=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -19,7 +18,7 @@
 #include "llvm/CodeGen/Analysis.h"
 using namespace llvm;
 
-WebAssemblyFunctionInfo::~WebAssemblyFunctionInfo() {}
+WebAssemblyFunctionInfo::~WebAssemblyFunctionInfo() = default; // anchor.
 
 void WebAssemblyFunctionInfo::initWARegs() {
   assert(WARegs.empty());
@@ -27,7 +26,7 @@ void WebAssemblyFunctionInfo::initWARegs() {
   WARegs.resize(MF.getRegInfo().getNumVirtRegs(), Reg);
 }
 
-void llvm::ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
+void llvm::computeLegalValueVTs(const Function &F, const TargetMachine &TM,
                                 Type *Ty, SmallVectorImpl<MVT> &ValueVTs) {
   const DataLayout &DL(F.getParent()->getDataLayout());
   const WebAssemblyTargetLowering &TLI =
@@ -38,16 +37,16 @@ void llvm::ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
   for (EVT VT : VTs) {
     unsigned NumRegs = TLI.getNumRegisters(F.getContext(), VT);
     MVT RegisterVT = TLI.getRegisterType(F.getContext(), VT);
-    for (unsigned i = 0; i != NumRegs; ++i)
+    for (unsigned I = 0; I != NumRegs; ++I)
       ValueVTs.push_back(RegisterVT);
   }
 }
 
-void llvm::ComputeSignatureVTs(const FunctionType *Ty, const Function &F,
+void llvm::computeSignatureVTs(const FunctionType *Ty, const Function &F,
                                const TargetMachine &TM,
                                SmallVectorImpl<MVT> &Params,
                                SmallVectorImpl<MVT> &Results) {
-  ComputeLegalValueVTs(F, TM, Ty->getReturnType(), Results);
+  computeLegalValueVTs(F, TM, Ty->getReturnType(), Results);
 
   MVT PtrVT = MVT::getIntegerVT(TM.createDataLayout().getPointerSizeInBits());
   if (Results.size() > 1) {
@@ -59,22 +58,35 @@ void llvm::ComputeSignatureVTs(const FunctionType *Ty, const Function &F,
   }
 
   for (auto *Param : Ty->params())
-    ComputeLegalValueVTs(F, TM, Param, Params);
+    computeLegalValueVTs(F, TM, Param, Params);
   if (Ty->isVarArg())
     Params.push_back(PtrVT);
 }
 
-void llvm::ValTypesFromMVTs(const ArrayRef<MVT> &In,
+void llvm::valTypesFromMVTs(const ArrayRef<MVT> &In,
                             SmallVectorImpl<wasm::ValType> &Out) {
   for (MVT Ty : In)
     Out.push_back(WebAssembly::toValType(Ty));
 }
 
 std::unique_ptr<wasm::WasmSignature>
-llvm::SignatureFromMVTs(const SmallVectorImpl<MVT> &Results,
+llvm::signatureFromMVTs(const SmallVectorImpl<MVT> &Results,
                         const SmallVectorImpl<MVT> &Params) {
   auto Sig = make_unique<wasm::WasmSignature>();
-  ValTypesFromMVTs(Results, Sig->Returns);
-  ValTypesFromMVTs(Params, Sig->Params);
+  valTypesFromMVTs(Results, Sig->Returns);
+  valTypesFromMVTs(Params, Sig->Params);
   return Sig;
 }
+
+yaml::WebAssemblyFunctionInfo::WebAssemblyFunctionInfo(
+    const llvm::WebAssemblyFunctionInfo &MFI)
+    : CFGStackified(MFI.isCFGStackified()) {}
+
+void yaml::WebAssemblyFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
+  MappingTraits<WebAssemblyFunctionInfo>::mapping(YamlIO, *this);
+}
+
+void WebAssemblyFunctionInfo::initializeBaseYamlFields(
+    const yaml::WebAssemblyFunctionInfo &YamlMFI) {
+  CFGStackified = YamlMFI.CFGStackified;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
index 4be4beb85d04..4b9ba491dee6 100644
--- a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -1,9 +1,8 @@
 // WebAssemblyMachineFunctionInfo.h-WebAssembly machine function info-*- C++ -*-
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -18,11 +17,16 @@
 
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "llvm/BinaryFormat/Wasm.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/MC/MCSymbolWasm.h"
 
 namespace llvm {
 
+namespace yaml {
+struct WebAssemblyFunctionInfo;
+}
+
 /// This class is derived from MachineFunctionInfo and contains private
 /// WebAssembly-specific information for each MachineFunction.
 class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
@@ -52,9 +56,13 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
   // overaligned values on the user stack.
   unsigned BasePtrVreg = -1U;
 
+  // Function properties.
+  bool CFGStackified = false;
+
 public:
   explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {}
   ~WebAssemblyFunctionInfo() override;
+  void initializeBaseYamlFields(const yaml::WebAssemblyFunctionInfo &YamlMFI);
 
   void addParam(MVT VT) { Params.push_back(VT); }
   const std::vector<MVT> &getParams() const { return Params; }
@@ -118,24 +126,47 @@ public:
     assert(Reg & INT32_MIN);
     return Reg & INT32_MAX;
   }
+
+  bool isCFGStackified() const { return CFGStackified; }
+  void setCFGStackified(bool Value = true) { CFGStackified = Value; }
 };
 
-void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM, Type *Ty,
+void computeLegalValueVTs(const Function &F, const TargetMachine &TM, Type *Ty,
                           SmallVectorImpl<MVT> &ValueVTs);
 
 // Compute the signature for a given FunctionType (Ty). Note that it's not the
 // signature for F (F is just used to get varous context)
-void ComputeSignatureVTs(const FunctionType *Ty, const Function &F,
+void computeSignatureVTs(const FunctionType *Ty, const Function &F,
                          const TargetMachine &TM, SmallVectorImpl<MVT> &Params,
                          SmallVectorImpl<MVT> &Results);
 
-void ValTypesFromMVTs(const ArrayRef<MVT> &In,
+void valTypesFromMVTs(const ArrayRef<MVT> &In,
                       SmallVectorImpl<wasm::ValType> &Out);
 
 std::unique_ptr<wasm::WasmSignature>
-SignatureFromMVTs(const SmallVectorImpl<MVT> &Results,
+signatureFromMVTs(const SmallVectorImpl<MVT> &Results,
                   const SmallVectorImpl<MVT> &Params);
 
+namespace yaml {
+
+struct WebAssemblyFunctionInfo final : public yaml::MachineFunctionInfo {
+  bool CFGStackified = false;
+
+  WebAssemblyFunctionInfo() = default;
+  WebAssemblyFunctionInfo(const llvm::WebAssemblyFunctionInfo &MFI);
+
+  void mappingImpl(yaml::IO &YamlIO) override;
+  ~WebAssemblyFunctionInfo() = default;
+};
+
+template <> struct MappingTraits<WebAssemblyFunctionInfo> {
+  static void mapping(IO &YamlIO, WebAssemblyFunctionInfo &MFI) {
+    YamlIO.mapOptional("isCFGStackified", MFI.CFGStackified, false);
+  }
+};
+
+} // end namespace yaml
+
 } // end namespace llvm
 
 #endif
diff --git a/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp b/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
index c4b5e96db0c7..7ac0511c28b0 100644
--- a/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
@@ -1,9 +1,8 @@
 //== WebAssemblyMemIntrinsicResults.cpp - Optimize memory intrinsic results ==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -82,7 +81,7 @@ FunctionPass *llvm::createWebAssemblyMemIntrinsicResults() {
 }
 
 // Replace uses of FromReg with ToReg if they are dominated by MI.
-static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
+static bool replaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
                                  unsigned FromReg, unsigned ToReg,
                                  const MachineRegisterInfo &MRI,
                                  MachineDominatorTree &MDT,
@@ -157,10 +156,10 @@ static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI,
     return false;
 
   StringRef Name(Op1.getSymbolName());
-  bool callReturnsInput = Name == TLI.getLibcallName(RTLIB::MEMCPY) ||
+  bool CallReturnsInput = Name == TLI.getLibcallName(RTLIB::MEMCPY) ||
                           Name == TLI.getLibcallName(RTLIB::MEMMOVE) ||
                           Name == TLI.getLibcallName(RTLIB::MEMSET);
-  if (!callReturnsInput)
+  if (!CallReturnsInput)
     return false;
 
   LibFunc Func;
@@ -172,7 +171,7 @@ static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI,
   if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg))
     report_fatal_error("Memory Intrinsic results: call to builtin function "
                        "with wrong signature, from/to mismatch");
-  return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS);
+  return replaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS);
 }
 
 bool WebAssemblyMemIntrinsicResults::runOnMachineFunction(MachineFunction &MF) {
@@ -182,11 +181,11 @@ bool WebAssemblyMemIntrinsicResults::runOnMachineFunction(MachineFunction &MF) {
   });
 
   MachineRegisterInfo &MRI = MF.getRegInfo();
-  MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+  auto &MDT = getAnalysis<MachineDominatorTree>();
   const WebAssemblyTargetLowering &TLI =
       *MF.getSubtarget<WebAssemblySubtarget>().getTargetLowering();
   const auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-  LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+  auto &LIS = getAnalysis<LiveIntervals>();
   bool Changed = false;
 
   // We don't preserve SSA form.
@@ -201,8 +200,8 @@ bool WebAssemblyMemIntrinsicResults::runOnMachineFunction(MachineFunction &MF) {
       switch (MI.getOpcode()) {
       default:
         break;
-      case WebAssembly::CALL_I32:
-      case WebAssembly::CALL_I64:
+      case WebAssembly::CALL_i32:
+      case WebAssembly::CALL_i64:
         Changed |= optimizeCall(MBB, MI, MRI, MDT, LIS, TLI, LibInfo);
         break;
       }
diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
index 3d0a15244ee0..8c7c3305c201 100644
--- a/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
@@ -1,9 +1,8 @@
 //===--- WebAssemblyOptimizeLiveIntervals.cpp - LiveInterval processing ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -72,7 +71,7 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(
                     << MF.getName() << '\n');
 
   MachineRegisterInfo &MRI = MF.getRegInfo();
-  LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+  auto &LIS = getAnalysis<LiveIntervals>();
 
   // We don't preserve SSA form.
   MRI.leaveSSA();
@@ -81,8 +80,8 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(
 
   // Split multiple-VN LiveIntervals into multiple LiveIntervals.
   SmallVector<LiveInterval *, 4> SplitLIs;
-  for (unsigned i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) {
-    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+  for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
     if (MRI.reg_nodbg_empty(Reg))
       continue;
 
diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
index 2c018d0785a7..d20352259e07 100644
--- a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyOptimizeReturned.cpp - Optimize "returned" attributes --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -37,11 +36,11 @@ class OptimizeReturned final : public FunctionPass,
 
   bool runOnFunction(Function &F) override;
 
-  DominatorTree *DT;
+  DominatorTree *DT = nullptr;
 
 public:
   static char ID;
-  OptimizeReturned() : FunctionPass(ID), DT(nullptr) {}
+  OptimizeReturned() : FunctionPass(ID) {}
 
   void visitCallSite(CallSite CS);
 };
@@ -57,10 +56,10 @@ FunctionPass *llvm::createWebAssemblyOptimizeReturned() {
 }
 
 void OptimizeReturned::visitCallSite(CallSite CS) {
-  for (unsigned i = 0, e = CS.getNumArgOperands(); i < e; ++i)
-    if (CS.paramHasAttr(i, Attribute::Returned)) {
+  for (unsigned I = 0, E = CS.getNumArgOperands(); I < E; ++I)
+    if (CS.paramHasAttr(I, Attribute::Returned)) {
       Instruction *Inst = CS.getInstruction();
-      Value *Arg = CS.getArgOperand(i);
+      Value *Arg = CS.getArgOperand(I);
       // Ignore constants, globals, undef, etc.
       if (isa<Constant>(Arg))
         continue;
diff --git a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
index 2dfd85953f14..e11cdeaa0e79 100644
--- a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyPeephole.cpp - WebAssembly Peephole Optimiztions -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -58,7 +57,7 @@ FunctionPass *llvm::createWebAssemblyPeephole() {
 }
 
 /// If desirable, rewrite NewReg to a drop register.
-static bool MaybeRewriteToDrop(unsigned OldReg, unsigned NewReg,
+static bool maybeRewriteToDrop(unsigned OldReg, unsigned NewReg,
                                MachineOperand &MO, WebAssemblyFunctionInfo &MFI,
                                MachineRegisterInfo &MRI) {
   bool Changed = false;
@@ -72,7 +71,7 @@ static bool MaybeRewriteToDrop(unsigned OldReg, unsigned NewReg,
   return Changed;
 }
 
-static bool MaybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB,
+static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB,
                                       const MachineFunction &MF,
                                       WebAssemblyFunctionInfo &MFI,
                                       MachineRegisterInfo &MRI,
@@ -129,8 +128,8 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
       switch (MI.getOpcode()) {
       default:
         break;
-      case WebAssembly::CALL_I32:
-      case WebAssembly::CALL_I64: {
+      case WebAssembly::CALL_i32:
+      case WebAssembly::CALL_i64: {
         MachineOperand &Op1 = MI.getOperand(1);
         if (Op1.isSymbol()) {
           StringRef Name(Op1.getSymbolName());
@@ -150,7 +149,7 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
               if (MRI.getRegClass(NewReg) != MRI.getRegClass(OldReg))
                 report_fatal_error("Peephole: call to builtin function with "
                                    "wrong signature, from/to mismatch");
-              Changed |= MaybeRewriteToDrop(OldReg, NewReg, MO, MFI, MRI);
+              Changed |= maybeRewriteToDrop(OldReg, NewReg, MO, MFI, MRI);
             }
           }
         }
@@ -158,57 +157,57 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
       }
       // Optimize away an explicit void return at the end of the function.
       case WebAssembly::RETURN_I32:
-        Changed |= MaybeRewriteToFallthrough(
+        Changed |= maybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_I32,
             WebAssembly::COPY_I32);
         break;
       case WebAssembly::RETURN_I64:
-        Changed |= MaybeRewriteToFallthrough(
+        Changed |= maybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_I64,
             WebAssembly::COPY_I64);
         break;
       case WebAssembly::RETURN_F32:
-        Changed |= MaybeRewriteToFallthrough(
+        Changed |= maybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F32,
             WebAssembly::COPY_F32);
         break;
       case WebAssembly::RETURN_F64:
-        Changed |= MaybeRewriteToFallthrough(
+        Changed |= maybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F64,
             WebAssembly::COPY_F64);
         break;
       case WebAssembly::RETURN_v16i8:
-        Changed |= MaybeRewriteToFallthrough(
+        Changed |= maybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v16i8,
             WebAssembly::COPY_V128);
         break;
       case WebAssembly::RETURN_v8i16:
-        Changed |= MaybeRewriteToFallthrough(
+        Changed |= maybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v8i16,
             WebAssembly::COPY_V128);
         break;
       case WebAssembly::RETURN_v4i32:
-        Changed |= MaybeRewriteToFallthrough(
+        Changed |= maybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4i32,
             WebAssembly::COPY_V128);
         break;
       case WebAssembly::RETURN_v2i64:
-        Changed |= MaybeRewriteToFallthrough(
+        Changed |= maybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v2i64,
             WebAssembly::COPY_V128);
         break;
       case WebAssembly::RETURN_v4f32:
-        Changed |= MaybeRewriteToFallthrough(
+        Changed |= maybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4f32,
             WebAssembly::COPY_V128);
         break;
       case WebAssembly::RETURN_v2f64:
-        Changed |= MaybeRewriteToFallthrough(
+        Changed |= maybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v2f64,
             WebAssembly::COPY_V128);
         break;
       case WebAssembly::RETURN_VOID:
-        Changed |= MaybeRewriteToFallthrough(
+        Changed |= maybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_VOID,
             WebAssembly::INSTRUCTION_LIST_END);
         break;
diff --git a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
index 0be0ba657830..3bfbf607344d 100644
--- a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
@@ -1,9 +1,8 @@
 //===- WebAssemblyPrepareForLiveIntervals.cpp - Prepare for LiveIntervals -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -63,9 +62,9 @@ FunctionPass *llvm::createWebAssemblyPrepareForLiveIntervals() {
 }
 
 // Test whether the given register has an ARGUMENT def.
-static bool HasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
+static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
   for (const auto &Def : MRI.def_instructions(Reg))
-    if (WebAssembly::isArgument(Def))
+    if (WebAssembly::isArgument(Def.getOpcode()))
       return true;
   return false;
 }
@@ -95,15 +94,15 @@ bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(
   //
   // TODO: This is fairly heavy-handed; find a better approach.
   //
-  for (unsigned i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) {
-    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+  for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
 
     // Skip unused registers.
     if (MRI.use_nodbg_empty(Reg))
       continue;
 
     // Skip registers that have an ARGUMENT definition.
-    if (HasArgumentDef(Reg, MRI))
+    if (hasArgumentDef(Reg, MRI))
       continue;
 
     BuildMI(Entry, Entry.begin(), DebugLoc(),
@@ -115,7 +114,7 @@ bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(
   // liveness reflects the fact that these really are live-in values.
   for (auto MII = Entry.begin(), MIE = Entry.end(); MII != MIE;) {
     MachineInstr &MI = *MII++;
-    if (WebAssembly::isArgument(MI)) {
+    if (WebAssembly::isArgument(MI.getOpcode())) {
       MI.removeFromParent();
       Entry.insert(Entry.begin(), &MI);
     }
diff --git a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
index d97b13a8d699..6f09c45b6642 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyRegColoring.cpp - Register coloring --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -66,11 +65,11 @@ FunctionPass *llvm::createWebAssemblyRegColoring() {
 static float computeWeight(const MachineRegisterInfo *MRI,
                            const MachineBlockFrequencyInfo *MBFI,
                            unsigned VReg) {
-  float weight = 0.0f;
+  float Weight = 0.0f;
   for (MachineOperand &MO : MRI->reg_nodbg_operands(VReg))
-    weight += LiveIntervals::getSpillWeight(MO.isDef(), MO.isUse(), MBFI,
+    Weight += LiveIntervals::getSpillWeight(MO.isDef(), MO.isUse(), MBFI,
                                             *MO.getParent());
-  return weight;
+  return Weight;
 }
 
 bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
@@ -98,8 +97,8 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
   SortedIntervals.reserve(NumVRegs);
 
   LLVM_DEBUG(dbgs() << "Interesting register intervals:\n");
-  for (unsigned i = 0; i < NumVRegs; ++i) {
-    unsigned VReg = TargetRegisterInfo::index2VirtReg(i);
+  for (unsigned I = 0; I < NumVRegs; ++I) {
+    unsigned VReg = TargetRegisterInfo::index2VirtReg(I);
     if (MFI.isVRegStackified(VReg))
       continue;
     // Skip unused registers, which can use $drop.
@@ -134,10 +133,10 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
       SortedIntervals.size());
   BitVector UsedColors(SortedIntervals.size());
   bool Changed = false;
-  for (size_t i = 0, e = SortedIntervals.size(); i < e; ++i) {
-    LiveInterval *LI = SortedIntervals[i];
+  for (size_t I = 0, E = SortedIntervals.size(); I < E; ++I) {
+    LiveInterval *LI = SortedIntervals[I];
     unsigned Old = LI->reg;
-    size_t Color = i;
+    size_t Color = I;
     const TargetRegisterClass *RC = MRI->getRegClass(Old);
 
     // Check if it's possible to reuse any of the used colors.
@@ -154,7 +153,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
       }
 
     unsigned New = SortedIntervals[Color]->reg;
-    SlotMapping[i] = New;
+    SlotMapping[I] = New;
     Changed |= Old != New;
     UsedColors.set(Color);
     Assignments[Color].push_back(LI);
@@ -166,9 +165,9 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
     return false;
 
   // Rewrite register operands.
-  for (size_t i = 0, e = SortedIntervals.size(); i < e; ++i) {
-    unsigned Old = SortedIntervals[i]->reg;
-    unsigned New = SlotMapping[i];
+  for (size_t I = 0, E = SortedIntervals.size(); I < E; ++I) {
+    unsigned Old = SortedIntervals[I]->reg;
+    unsigned New = SlotMapping[I];
     if (Old != New)
       MRI->replaceRegWith(Old, New);
   }
diff --git a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
index 1e2a248f097e..cdca23f55b29 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyRegNumbering.cpp - Register Numbering ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -73,7 +72,7 @@ bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) {
   // variables. Assign the numbers for them first.
   MachineBasicBlock &EntryMBB = MF.front();
   for (MachineInstr &MI : EntryMBB) {
-    if (!WebAssembly::isArgument(MI))
+    if (!WebAssembly::isArgument(MI.getOpcode()))
       break;
 
     int64_t Imm = MI.getOperand(1).getImm();
diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 1eb32ed64494..a120a6471014 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -80,7 +79,7 @@ FunctionPass *llvm::createWebAssemblyRegStackify() {
 // Decorate the given instruction with implicit operands that enforce the
 // expression stack ordering constraints for an instruction which is on
 // the expression stack.
-static void ImposeStackOrdering(MachineInstr *MI) {
+static void imposeStackOrdering(MachineInstr *MI) {
   // Write the opaque VALUE_STACK register.
   if (!MI->definesRegister(WebAssembly::VALUE_STACK))
     MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK,
@@ -96,7 +95,7 @@ static void ImposeStackOrdering(MachineInstr *MI) {
 
 // Convert an IMPLICIT_DEF instruction into an instruction which defines
 // a constant zero value.
-static void ConvertImplicitDefToConstZero(MachineInstr *MI,
+static void convertImplicitDefToConstZero(MachineInstr *MI,
                                           MachineRegisterInfo &MRI,
                                           const TargetInstrInfo *TII,
                                           MachineFunction &MF,
@@ -112,12 +111,12 @@ static void ConvertImplicitDefToConstZero(MachineInstr *MI,
     MI->addOperand(MachineOperand::CreateImm(0));
   } else if (RegClass == &WebAssembly::F32RegClass) {
     MI->setDesc(TII->get(WebAssembly::CONST_F32));
-    ConstantFP *Val = cast<ConstantFP>(Constant::getNullValue(
+    auto *Val = cast<ConstantFP>(Constant::getNullValue(
         Type::getFloatTy(MF.getFunction().getContext())));
     MI->addOperand(MachineOperand::CreateFPImm(Val));
   } else if (RegClass == &WebAssembly::F64RegClass) {
     MI->setDesc(TII->get(WebAssembly::CONST_F64));
-    ConstantFP *Val = cast<ConstantFP>(Constant::getNullValue(
+    auto *Val = cast<ConstantFP>(Constant::getNullValue(
         Type::getDoubleTy(MF.getFunction().getContext())));
     MI->addOperand(MachineOperand::CreateFPImm(Val));
   } else if (RegClass == &WebAssembly::V128RegClass) {
@@ -136,7 +135,7 @@ static void ConvertImplicitDefToConstZero(MachineInstr *MI,
 // Determine whether a call to the callee referenced by
 // MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
 // effects.
-static void QueryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read,
+static void queryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read,
                         bool &Write, bool &Effects, bool &StackPointer) {
   // All calls can use the stack pointer.
   StackPointer = true;
@@ -144,11 +143,11 @@ static void QueryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read,
   const MachineOperand &MO = MI.getOperand(CalleeOpNo);
   if (MO.isGlobal()) {
     const Constant *GV = MO.getGlobal();
-    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+    if (const auto *GA = dyn_cast<GlobalAlias>(GV))
       if (!GA->isInterposable())
         GV = GA->getAliasee();
 
-    if (const Function *F = dyn_cast<Function>(GV)) {
+    if (const auto *F = dyn_cast<Function>(GV)) {
       if (!F->doesNotThrow())
         Effects = true;
       if (F->doesNotAccessMemory())
@@ -168,7 +167,7 @@ static void QueryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read,
 
 // Determine whether MI reads memory, writes memory, has side effects,
 // and/or uses the stack pointer value.
-static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
+static void query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
                   bool &Write, bool &Effects, bool &StackPointer) {
   assert(!MI.isTerminator());
 
@@ -253,13 +252,13 @@ static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
 
   // Analyze calls.
   if (MI.isCall()) {
-    unsigned CalleeOpNo = WebAssembly::getCalleeOpNo(MI);
-    QueryCallee(MI, CalleeOpNo, Read, Write, Effects, StackPointer);
+    unsigned CalleeOpNo = WebAssembly::getCalleeOpNo(MI.getOpcode());
+    queryCallee(MI, CalleeOpNo, Read, Write, Effects, StackPointer);
   }
 }
 
 // Test whether Def is safe and profitable to rematerialize.
-static bool ShouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA,
+static bool shouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA,
                                 const WebAssemblyInstrInfo *TII) {
   return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def, &AA);
 }
@@ -267,7 +266,7 @@ static bool ShouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA,
 // Identify the definition for this register at this point. This is a
 // generalization of MachineRegisterInfo::getUniqueVRegDef that uses
 // LiveIntervals to handle complex cases.
-static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert,
+static MachineInstr *getVRegDef(unsigned Reg, const MachineInstr *Insert,
                                 const MachineRegisterInfo &MRI,
                                 const LiveIntervals &LIS) {
   // Most registers are in SSA form here so we try a quick MRI query first.
@@ -285,7 +284,7 @@ static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert,
 // Test whether Reg, as defined at Def, has exactly one use. This is a
 // generalization of MachineRegisterInfo::hasOneUse that uses LiveIntervals
 // to handle complex cases.
-static bool HasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI,
+static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI,
                       MachineDominatorTree &MDT, LiveIntervals &LIS) {
   // Most registers are in SSA form here so we try a quick MRI query first.
   if (MRI.hasOneUse(Reg))
@@ -314,10 +313,22 @@ static bool HasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI,
 // walking the block.
 // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
 // more precise.
-static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
+static bool isSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
                          AliasAnalysis &AA, const MachineRegisterInfo &MRI) {
   assert(Def->getParent() == Insert->getParent());
 
+  // 'catch' and 'extract_exception' should be the first instruction of a BB and
+  // cannot move.
+  if (Def->getOpcode() == WebAssembly::CATCH ||
+      Def->getOpcode() == WebAssembly::EXTRACT_EXCEPTION_I32) {
+    const MachineBasicBlock *MBB = Def->getParent();
+    auto NextI = std::next(MachineBasicBlock::const_iterator(Def));
+    for (auto E = MBB->end(); NextI != E && NextI->isDebugInstr(); ++NextI)
+      ;
+    if (NextI != Insert)
+      return false;
+  }
+
   // Check for register dependencies.
   SmallVector<unsigned, 4> MutableRegisters;
   for (const MachineOperand &MO : Def->operands()) {
@@ -350,7 +361,7 @@ static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
   }
 
   bool Read = false, Write = false, Effects = false, StackPointer = false;
-  Query(*Def, AA, Read, Write, Effects, StackPointer);
+  query(*Def, AA, Read, Write, Effects, StackPointer);
 
   // If the instruction does not access memory and has no side effects, it has
   // no additional dependencies.
@@ -365,7 +376,7 @@ static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
     bool InterveningWrite = false;
     bool InterveningEffects = false;
     bool InterveningStackPointer = false;
-    Query(*I, AA, InterveningRead, InterveningWrite, InterveningEffects,
+    query(*I, AA, InterveningRead, InterveningWrite, InterveningEffects,
           InterveningStackPointer);
     if (Effects && InterveningEffects)
       return false;
@@ -386,7 +397,7 @@ static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
 }
 
 /// Test whether OneUse, a use of Reg, dominates all of Reg's other uses.
-static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
+static bool oneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
                                      const MachineBasicBlock &MBB,
                                      const MachineRegisterInfo &MRI,
                                      const MachineDominatorTree &MDT,
@@ -445,7 +456,7 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
 }
 
 /// Get the appropriate tee opcode for the given register class.
-static unsigned GetTeeOpcode(const TargetRegisterClass *RC) {
+static unsigned getTeeOpcode(const TargetRegisterClass *RC) {
   if (RC == &WebAssembly::I32RegClass)
     return WebAssembly::TEE_I32;
   if (RC == &WebAssembly::I64RegClass)
@@ -460,7 +471,7 @@ static unsigned GetTeeOpcode(const TargetRegisterClass *RC) {
 }
 
 // Shrink LI to its uses, cleaning up LI.
-static void ShrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
+static void shrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
   if (LIS.shrinkToUses(&LI)) {
     SmallVector<LiveInterval *, 4> SplitLIs;
     LIS.splitSeparateComponents(LI, SplitLIs);
@@ -469,7 +480,7 @@ static void ShrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
 
 /// A single-use def in the same block with no intervening memory or register
 /// dependencies; move the def down and nest it with the current instruction.
-static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand &Op,
+static MachineInstr *moveForSingleUse(unsigned Reg, MachineOperand &Op,
                                       MachineInstr *Def, MachineBasicBlock &MBB,
                                       MachineInstr *Insert, LiveIntervals &LIS,
                                       WebAssemblyFunctionInfo &MFI,
@@ -508,13 +519,13 @@ static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand &Op,
     LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
   }
 
-  ImposeStackOrdering(Def);
+  imposeStackOrdering(Def);
   return Def;
 }
 
 /// A trivially cloneable instruction; clone it and nest the new copy with the
 /// current instruction.
-static MachineInstr *RematerializeCheapDef(
+static MachineInstr *rematerializeCheapDef(
     unsigned Reg, MachineOperand &Op, MachineInstr &Def, MachineBasicBlock &MBB,
     MachineBasicBlock::instr_iterator Insert, LiveIntervals &LIS,
     WebAssemblyFunctionInfo &MFI, MachineRegisterInfo &MRI,
@@ -531,7 +542,7 @@ static MachineInstr *RematerializeCheapDef(
   LIS.InsertMachineInstrInMaps(*Clone);
   LIS.createAndComputeVirtRegInterval(NewReg);
   MFI.stackifyVReg(NewReg);
-  ImposeStackOrdering(Clone);
+  imposeStackOrdering(Clone);
 
   LLVM_DEBUG(dbgs() << " - Cloned to "; Clone->dump());
 
@@ -539,7 +550,7 @@ static MachineInstr *RematerializeCheapDef(
   bool IsDead = MRI.use_empty(Reg);
   if (!IsDead) {
     LiveInterval &LI = LIS.getInterval(Reg);
-    ShrinkToUses(LI, LIS);
+    shrinkToUses(LI, LIS);
     IsDead = !LI.liveAt(LIS.getInstructionIndex(Def).getDeadSlot());
   }
 
@@ -582,7 +593,7 @@ static MachineInstr *RematerializeCheapDef(
 ///
 /// with DefReg and TeeReg stackified. This eliminates a local.get from the
 /// resulting code.
-static MachineInstr *MoveAndTeeForMultiUse(
+static MachineInstr *moveAndTeeForMultiUse(
     unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB,
     MachineInstr *Insert, LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
     MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) {
@@ -600,7 +611,7 @@ static MachineInstr *MoveAndTeeForMultiUse(
   unsigned DefReg = MRI.createVirtualRegister(RegClass);
   MachineOperand &DefMO = Def->getOperand(0);
   MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(),
-                              TII->get(GetTeeOpcode(RegClass)), TeeReg)
+                              TII->get(getTeeOpcode(RegClass)), TeeReg)
                           .addReg(Reg, RegState::Define)
                           .addReg(DefReg, getUndefRegState(DefMO.isDead()));
   Op.setReg(TeeReg);
@@ -616,15 +627,15 @@ static MachineInstr *MoveAndTeeForMultiUse(
   VNInfo *ValNo = LI.getVNInfoAt(DefIdx);
   I->start = TeeIdx;
   ValNo->def = TeeIdx;
-  ShrinkToUses(LI, LIS);
+  shrinkToUses(LI, LIS);
 
   // Finish stackifying the new regs.
   LIS.createAndComputeVirtRegInterval(TeeReg);
   LIS.createAndComputeVirtRegInterval(DefReg);
   MFI.stackifyVReg(DefReg);
   MFI.stackifyVReg(TeeReg);
-  ImposeStackOrdering(Def);
-  ImposeStackOrdering(Tee);
+  imposeStackOrdering(Def);
+  imposeStackOrdering(Tee);
 
   DefDIs.clone(Tee, DefReg);
   DefDIs.clone(Insert, TeeReg);
@@ -638,9 +649,9 @@ namespace {
 /// A stack for walking the tree of instructions being built, visiting the
 /// MachineOperands in DFS order.
 class TreeWalkerState {
-  typedef MachineInstr::mop_iterator mop_iterator;
-  typedef std::reverse_iterator<mop_iterator> mop_reverse_iterator;
-  typedef iterator_range<mop_reverse_iterator> RangeTy;
+  using mop_iterator = MachineInstr::mop_iterator;
+  using mop_reverse_iterator = std::reverse_iterator<mop_iterator>;
+  using RangeTy = iterator_range<mop_reverse_iterator>;
   SmallVector<RangeTy, 4> Worklist;
 
 public:
@@ -650,9 +661,9 @@ public:
       Worklist.push_back(reverse(Range));
   }
 
-  bool Done() const { return Worklist.empty(); }
+  bool done() const { return Worklist.empty(); }
 
-  MachineOperand &Pop() {
+  MachineOperand &pop() {
     RangeTy &Range = Worklist.back();
     MachineOperand &Op = *Range.begin();
     Range = drop_begin(Range, 1);
@@ -665,7 +676,7 @@ public:
   }
 
   /// Push Instr's operands onto the stack to be visited.
-  void PushOperands(MachineInstr *Instr) {
+  void pushOperands(MachineInstr *Instr) {
     const iterator_range<mop_iterator> &Range(Instr->explicit_uses());
     if (Range.begin() != Range.end())
       Worklist.push_back(reverse(Range));
@@ -673,8 +684,8 @@ public:
 
   /// Some of Instr's operands are on the top of the stack; remove them and
   /// re-insert them starting from the beginning (because we've commuted them).
-  void ResetTopOperands(MachineInstr *Instr) {
-    assert(HasRemainingOperands(Instr) &&
+  void resetTopOperands(MachineInstr *Instr) {
+    assert(hasRemainingOperands(Instr) &&
            "Reseting operands should only be done when the instruction has "
            "an operand still on the stack");
     Worklist.back() = reverse(Instr->explicit_uses());
@@ -682,7 +693,7 @@ public:
 
   /// Test whether Instr has operands remaining to be visited at the top of
   /// the stack.
-  bool HasRemainingOperands(const MachineInstr *Instr) const {
+  bool hasRemainingOperands(const MachineInstr *Instr) const {
     if (Worklist.empty())
       return false;
     const RangeTy &Range = Worklist.back();
@@ -695,7 +706,7 @@ public:
   ///
   /// This is needed as a consequence of using implicit local.gets for
   /// uses and implicit local.sets for defs.
-  bool IsOnStack(unsigned Reg) const {
+  bool isOnStack(unsigned Reg) const {
     for (const RangeTy &Range : Worklist)
       for (const MachineOperand &MO : Range)
         if (MO.isReg() && MO.getReg() == Reg)
@@ -712,20 +723,18 @@ class CommutingState {
   /// state where we've commuted the operands of the current instruction and are
   /// revisiting it, and the declined state where we've reverted the operands
   /// back to their original order and will no longer commute it further.
-  bool TentativelyCommuting;
-  bool Declined;
+  bool TentativelyCommuting = false;
+  bool Declined = false;
 
   /// During the tentative state, these hold the operand indices of the commuted
   /// operands.
   unsigned Operand0, Operand1;
 
 public:
-  CommutingState() : TentativelyCommuting(false), Declined(false) {}
-
   /// Stackification for an operand was not successful due to ordering
   /// constraints. If possible, and if we haven't already tried it and declined
   /// it, commute Insert's operands and prepare to revisit it.
-  void MaybeCommute(MachineInstr *Insert, TreeWalkerState &TreeWalker,
+  void maybeCommute(MachineInstr *Insert, TreeWalkerState &TreeWalker,
                     const WebAssemblyInstrInfo *TII) {
     if (TentativelyCommuting) {
       assert(!Declined &&
@@ -734,13 +743,13 @@ public:
       TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
       TentativelyCommuting = false;
       Declined = true;
-    } else if (!Declined && TreeWalker.HasRemainingOperands(Insert)) {
+    } else if (!Declined && TreeWalker.hasRemainingOperands(Insert)) {
       Operand0 = TargetInstrInfo::CommuteAnyOperandIndex;
       Operand1 = TargetInstrInfo::CommuteAnyOperandIndex;
       if (TII->findCommutedOpIndices(*Insert, Operand0, Operand1)) {
         // Tentatively commute the operands and try again.
         TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
-        TreeWalker.ResetTopOperands(Insert);
+        TreeWalker.resetTopOperands(Insert);
         TentativelyCommuting = true;
         Declined = false;
       }
@@ -749,7 +758,7 @@ public:
 
   /// Stackification for some operand was successful. Reset to the default
   /// state.
-  void Reset() {
+  void reset() {
     TentativelyCommuting = false;
     Declined = false;
   }
@@ -767,8 +776,8 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
   const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
   AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
-  MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
-  LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+  auto &MDT = getAnalysis<MachineDominatorTree>();
+  auto &LIS = getAnalysis<LiveIntervals>();
 
   // Walk the instructions from the bottom up. Currently we don't look past
   // block boundaries, and the blocks aren't ordered so the block visitation
@@ -780,19 +789,19 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
       MachineInstr *Insert = &*MII;
       // Don't nest anything inside an inline asm, because we don't have
       // constraints for $push inputs.
-      if (Insert->getOpcode() == TargetOpcode::INLINEASM)
+      if (Insert->isInlineAsm())
         continue;
 
       // Ignore debugging intrinsics.
-      if (Insert->getOpcode() == TargetOpcode::DBG_VALUE)
+      if (Insert->isDebugValue())
         continue;
 
       // Iterate through the inputs in reverse order, since we'll be pulling
       // operands off the stack in LIFO order.
       CommutingState Commuting;
       TreeWalkerState TreeWalker(Insert);
-      while (!TreeWalker.Done()) {
-        MachineOperand &Op = TreeWalker.Pop();
+      while (!TreeWalker.done()) {
+        MachineOperand &Op = TreeWalker.pop();
 
         // We're only interested in explicit virtual register operands.
         if (!Op.isReg())
@@ -806,18 +815,36 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
           continue;
 
         // Identify the definition for this register at this point.
-        MachineInstr *Def = GetVRegDef(Reg, Insert, MRI, LIS);
+        MachineInstr *Def = getVRegDef(Reg, Insert, MRI, LIS);
         if (!Def)
           continue;
 
         // Don't nest an INLINE_ASM def into anything, because we don't have
         // constraints for $pop outputs.
-        if (Def->getOpcode() == TargetOpcode::INLINEASM)
+        if (Def->isInlineAsm())
           continue;
 
         // Argument instructions represent live-in registers and not real
         // instructions.
-        if (WebAssembly::isArgument(*Def))
+        if (WebAssembly::isArgument(Def->getOpcode()))
+          continue;
+
+        // Currently catch's return value register cannot be stackified, because
+        // the wasm LLVM backend currently does not support live-in values
+        // entering blocks, which is a part of multi-value proposal.
+        //
+        // Once we support live-in values of wasm blocks, this can be:
+        // catch                           ; push exnref value onto stack
+        // block exnref -> i32
+        // br_on_exn $__cpp_exception      ; pop the exnref value
+        // end_block
+        //
+        // But because we don't support it yet, the catch instruction's dst
+        // register should be assigned to a local to be propagated across
+        // 'block' boundary now.
+        //
+        // TODO Fix this once we support the multi-value proposal.
+        if (Def->getOpcode() == WebAssembly::CATCH)
           continue;
 
         // Decide which strategy to take. Prefer to move a single-use value
@@ -827,23 +854,23 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
         // supports intra-block moves) and it's MachineSink's job to catch all
         // the sinking opportunities anyway.
         bool SameBlock = Def->getParent() == &MBB;
-        bool CanMove = SameBlock && IsSafeToMove(Def, Insert, AA, MRI) &&
-                       !TreeWalker.IsOnStack(Reg);
-        if (CanMove && HasOneUse(Reg, Def, MRI, MDT, LIS)) {
-          Insert = MoveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
-        } else if (ShouldRematerialize(*Def, AA, TII)) {
+        bool CanMove = SameBlock && isSafeToMove(Def, Insert, AA, MRI) &&
+                       !TreeWalker.isOnStack(Reg);
+        if (CanMove && hasOneUse(Reg, Def, MRI, MDT, LIS)) {
+          Insert = moveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
+        } else if (shouldRematerialize(*Def, AA, TII)) {
           Insert =
-              RematerializeCheapDef(Reg, Op, *Def, MBB, Insert->getIterator(),
+              rematerializeCheapDef(Reg, Op, *Def, MBB, Insert->getIterator(),
                                     LIS, MFI, MRI, TII, TRI);
         } else if (CanMove &&
-                   OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) {
-          Insert = MoveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI,
+                   oneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) {
+          Insert = moveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI,
                                          MRI, TII);
         } else {
           // We failed to stackify the operand. If the problem was ordering
           // constraints, Commuting may be able to help.
           if (!CanMove && SameBlock)
-            Commuting.MaybeCommute(Insert, TreeWalker, TII);
+            Commuting.maybeCommute(Insert, TreeWalker, TII);
           // Proceed to the next operand.
           continue;
         }
@@ -852,18 +879,18 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
         // to a constant 0 so that the def is explicit, and the push/pop
         // correspondence is maintained.
         if (Insert->getOpcode() == TargetOpcode::IMPLICIT_DEF)
-          ConvertImplicitDefToConstZero(Insert, MRI, TII, MF, LIS);
+          convertImplicitDefToConstZero(Insert, MRI, TII, MF, LIS);
 
         // We stackified an operand. Add the defining instruction's operands to
         // the worklist stack now to continue to build an ever deeper tree.
-        Commuting.Reset();
-        TreeWalker.PushOperands(Insert);
+        Commuting.reset();
+        TreeWalker.pushOperands(Insert);
       }
 
       // If we stackified any operands, skip over the tree to start looking for
       // the next instruction we can build a tree on.
       if (Insert != &*MII) {
-        ImposeStackOrdering(&*MII);
+        imposeStackOrdering(&*MII);
         MII = MachineBasicBlock::iterator(Insert).getReverse();
         Changed = true;
       }
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index 1f0870865b06..ea9cfc00adfd 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyRegisterInfo.cpp - WebAssembly Register Information ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -67,19 +66,22 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
   assert(MFI.getObjectSize(FrameIndex) != 0 &&
          "We assume that variable-sized objects have already been lowered, "
          "and don't use FrameIndex operands.");
-  unsigned FrameRegister = getFrameRegister(MF);
+  Register FrameRegister = getFrameRegister(MF);
 
   // If this is the address operand of a load or store, make it relative to SP
   // and fold the frame offset directly in.
-  if ((MI.mayLoad() && FIOperandNum == WebAssembly::LoadAddressOperandNo) ||
-      (MI.mayStore() && FIOperandNum == WebAssembly::StoreAddressOperandNo)) {
-    assert(FrameOffset >= 0 && MI.getOperand(FIOperandNum - 1).getImm() >= 0);
-    int64_t Offset = MI.getOperand(FIOperandNum - 1).getImm() + FrameOffset;
+  unsigned AddrOperandNum = WebAssembly::getNamedOperandIdx(
+      MI.getOpcode(), WebAssembly::OpName::addr);
+  if (AddrOperandNum == FIOperandNum) {
+    unsigned OffsetOperandNum = WebAssembly::getNamedOperandIdx(
+        MI.getOpcode(), WebAssembly::OpName::off);
+    assert(FrameOffset >= 0 && MI.getOperand(OffsetOperandNum).getImm() >= 0);
+    int64_t Offset = MI.getOperand(OffsetOperandNum).getImm() + FrameOffset;
 
     if (static_cast<uint64_t>(Offset) <= std::numeric_limits<uint32_t>::max()) {
-      MI.getOperand(FIOperandNum - 1).setImm(Offset);
+      MI.getOperand(OffsetOperandNum).setImm(Offset);
       MI.getOperand(FIOperandNum)
-          .ChangeToRegister(FrameRegister, /*IsDef=*/false);
+          .ChangeToRegister(FrameRegister, /*isDef=*/false);
       return;
     }
   }
@@ -100,7 +102,7 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
           MachineOperand &ImmMO = Def->getOperand(1);
           ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset));
           MI.getOperand(FIOperandNum)
-              .ChangeToRegister(FrameRegister, /*IsDef=*/false);
+              .ChangeToRegister(FrameRegister, /*isDef=*/false);
           return;
         }
       }
@@ -125,10 +127,10 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
         .addReg(FrameRegister)
         .addReg(OffsetOp);
   }
-  MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*IsDef=*/false);
+  MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*isDef=*/false);
 }
 
-unsigned
+Register
 WebAssemblyRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   static const unsigned Regs[2][2] = {
       /*            !isArch64Bit       isArch64Bit      */
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
index 2a73dfd4b065..7880eb217dbf 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
@@ -1,9 +1,8 @@
 // WebAssemblyRegisterInfo.h - WebAssembly Register Information Impl -*- C++ -*-
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -40,7 +39,7 @@ public:
                            RegScavenger *RS = nullptr) const override;
 
   // Debug information queries.
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
+  Register getFrameRegister(const MachineFunction &MF) const override;
 
   const TargetRegisterClass *
   getPointerRegClass(const MachineFunction &MF,
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
index a7c3d177724d..6d3d6c723277 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
@@ -1,9 +1,8 @@
 //WebAssemblyRegisterInfo.td-Describe the WebAssembly Registers -*- tablegen -*-
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -44,7 +43,7 @@ def F64_0 : WebAssemblyReg<"%f64.0">;
 
 def V128_0: WebAssemblyReg<"%v128">;
 
-def EXCEPT_REF_0 : WebAssemblyReg<"%except_ref.0">;
+def EXNREF_0 : WebAssemblyReg<"%exnref.0">;
 
 // The value stack "register". This is an opaque entity which serves to order
 // uses and defs that must remain in LIFO order.
@@ -65,4 +64,4 @@ def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>;
 def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>;
 def V128 : WebAssemblyRegClass<[v4f32, v2f64, v2i64, v4i32, v16i8, v8i16], 128,
                                (add V128_0)>;
-def EXCEPT_REF : WebAssemblyRegClass<[ExceptRef], 0, (add EXCEPT_REF_0)>;
+def EXNREF : WebAssemblyRegClass<[exnref], 0, (add EXNREF_0)>;
diff --git a/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
index e5a3e47a3bcd..5eafd6c54e78 100644
--- a/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyReplacePhysRegs.cpp - Replace phys regs with virt regs -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
index 6cf81a9d77b3..7b9ae90326f0 100644
--- a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
@@ -1,9 +1,8 @@
 // CodeGen/RuntimeLibcallSignatures.cpp - R.T. Lib. Call Signatures -*- C++ -*--
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -52,6 +51,8 @@ enum RuntimeLibcallSignature {
   f64_func_f64_i32,
   f64_func_i64_i64,
   i16_func_f32,
+  i16_func_f64,
+  i16_func_i64_i64,
   i8_func_i8_i8,
   func_f32_iPTR_iPTR,
   func_f64_iPTR_iPTR,
@@ -85,6 +86,9 @@ enum RuntimeLibcallSignature {
   func_iPTR_i64_i64_i64_i64_i64_i64,
   i32_func_i64_i64,
   i32_func_i64_i64_i64_i64,
+  iPTR_func_f32,
+  iPTR_func_f64,
+  iPTR_func_i64_i64,
   unsupported
 };
 
@@ -215,6 +219,18 @@ struct RuntimeLibcallSignatureTable {
     Table[RTLIB::ROUND_F32] = f32_func_f32;
     Table[RTLIB::ROUND_F64] = f64_func_f64;
     Table[RTLIB::ROUND_F128] = func_iPTR_i64_i64;
+    Table[RTLIB::LROUND_F32] = iPTR_func_f32;
+    Table[RTLIB::LROUND_F64] = iPTR_func_f64;
+    Table[RTLIB::LROUND_F128] = iPTR_func_i64_i64;
+    Table[RTLIB::LLROUND_F32] = i64_func_f32;
+    Table[RTLIB::LLROUND_F64] = i64_func_f64;
+    Table[RTLIB::LLROUND_F128] = i64_func_i64_i64;
+    Table[RTLIB::LRINT_F32] = iPTR_func_f32;
+    Table[RTLIB::LRINT_F64] = iPTR_func_f64;
+    Table[RTLIB::LRINT_F128] = iPTR_func_i64_i64;
+    Table[RTLIB::LLRINT_F32] = i64_func_f32;
+    Table[RTLIB::LLRINT_F64] = i64_func_f64;
+    Table[RTLIB::LLRINT_F128] = i64_func_i64_i64;
     Table[RTLIB::FLOOR_F32] = f32_func_f32;
     Table[RTLIB::FLOOR_F64] = f64_func_f64;
     Table[RTLIB::FLOOR_F128] = func_iPTR_i64_i64;
@@ -229,13 +245,15 @@ struct RuntimeLibcallSignatureTable {
     Table[RTLIB::FMAX_F128] = func_iPTR_i64_i64_i64_i64;
 
     // Conversion
-    // All F80 and PPCF128 routines are unspported.
+    // All F80 and PPCF128 routines are unsupported.
     Table[RTLIB::FPEXT_F64_F128] = func_iPTR_f64;
     Table[RTLIB::FPEXT_F32_F128] = func_iPTR_f32;
     Table[RTLIB::FPEXT_F32_F64] = f64_func_f32;
     Table[RTLIB::FPEXT_F16_F32] = f32_func_i16;
     Table[RTLIB::FPROUND_F32_F16] = i16_func_f32;
+    Table[RTLIB::FPROUND_F64_F16] = i16_func_f64;
     Table[RTLIB::FPROUND_F64_F32] = f32_func_f64;
+    Table[RTLIB::FPROUND_F128_F16] = i16_func_i64_i64;
     Table[RTLIB::FPROUND_F128_F32] = f32_func_i64_i64;
     Table[RTLIB::FPROUND_F128_F64] = f64_func_i64_i64;
     Table[RTLIB::FPTOSINT_F32_I32] = i32_func_f32;
@@ -310,6 +328,12 @@ struct RuntimeLibcallSignatureTable {
     Table[RTLIB::MEMSET] = iPTR_func_iPTR_i32_iPTR;
     Table[RTLIB::MEMMOVE] = iPTR_func_iPTR_iPTR_iPTR;
 
+    // __stack_chk_fail
+    Table[RTLIB::STACKPROTECTOR_CHECK_FAIL] = func;
+
+    // Return address handling
+    Table[RTLIB::RETURN_ADDRESS] = i32_func_i32;
+
     // Element-wise Atomic memory
     // TODO: Fix these when we implement atomic support
     Table[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_1] = unsupported;
@@ -480,19 +504,25 @@ struct StaticLibcallNameMap {
         Map[NameLibcall.first] = NameLibcall.second;
       }
     }
+    // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is
+    // consistent with the f64 and f128 names.
+    Map["__extendhfsf2"] = RTLIB::FPEXT_F16_F32;
+    Map["__truncsfhf2"] = RTLIB::FPROUND_F32_F16;
+
+    Map["emscripten_return_address"] = RTLIB::RETURN_ADDRESS;
   }
 };
 
 } // end anonymous namespace
 
-void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
+void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget,
                                RTLIB::Libcall LC,
                                SmallVectorImpl<wasm::ValType> &Rets,
                                SmallVectorImpl<wasm::ValType> &Params) {
   assert(Rets.empty());
   assert(Params.empty());
 
-  wasm::ValType iPTR =
+  wasm::ValType PtrTy =
       Subtarget.hasAddr64() ? wasm::ValType::I64 : wasm::ValType::I32;
 
   auto &Table = RuntimeLibcallSignatures->Table;
@@ -593,6 +623,15 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
     Rets.push_back(wasm::ValType::I32);
     Params.push_back(wasm::ValType::F32);
     break;
+  case i16_func_f64:
+    Rets.push_back(wasm::ValType::I32);
+    Params.push_back(wasm::ValType::F64);
+    break;
+  case i16_func_i64_i64:
+    Rets.push_back(wasm::ValType::I32);
+    Params.push_back(wasm::ValType::I64);
+    Params.push_back(wasm::ValType::I64);
+    break;
   case i8_func_i8_i8:
     Rets.push_back(wasm::ValType::I32);
     Params.push_back(wasm::ValType::I32);
@@ -600,13 +639,13 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
     break;
   case func_f32_iPTR_iPTR:
     Params.push_back(wasm::ValType::F32);
-    Params.push_back(iPTR);
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
+    Params.push_back(PtrTy);
     break;
   case func_f64_iPTR_iPTR:
     Params.push_back(wasm::ValType::F64);
-    Params.push_back(iPTR);
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
+    Params.push_back(PtrTy);
     break;
   case i16_func_i16_i16:
     Rets.push_back(wasm::ValType::I32);
@@ -632,7 +671,7 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
     Rets.push_back(wasm::ValType::I32);
     Params.push_back(wasm::ValType::I32);
     Params.push_back(wasm::ValType::I32);
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
     break;
   case i64_func_i64_i64:
     Rets.push_back(wasm::ValType::I64);
@@ -643,14 +682,14 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
     Rets.push_back(wasm::ValType::I64);
     Params.push_back(wasm::ValType::I64);
     Params.push_back(wasm::ValType::I64);
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
     break;
   case i64_i64_func_f32:
 #if 0 // TODO: Enable this when wasm gets multiple-return-value support.
     Rets.push_back(wasm::ValType::I64);
     Rets.push_back(wasm::ValType::I64);
 #else
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
 #endif
     Params.push_back(wasm::ValType::F32);
     break;
@@ -659,7 +698,7 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
     Rets.push_back(wasm::ValType::I64);
     Rets.push_back(wasm::ValType::I64);
 #else
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
 #endif
     Params.push_back(wasm::ValType::F64);
     break;
@@ -668,7 +707,7 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
     Rets.push_back(wasm::ValType::I32);
     Rets.push_back(wasm::ValType::I32);
 #else
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
 #endif
     Params.push_back(wasm::ValType::I32);
     Params.push_back(wasm::ValType::I32);
@@ -678,7 +717,7 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
     Rets.push_back(wasm::ValType::I32);
     Rets.push_back(wasm::ValType::I32);
 #else
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
 #endif
     Params.push_back(wasm::ValType::I32);
     Params.push_back(wasm::ValType::I32);
@@ -688,7 +727,7 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
     Rets.push_back(wasm::ValType::I64);
     Rets.push_back(wasm::ValType::I64);
 #else
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
 #endif
     Params.push_back(wasm::ValType::I64);
     Params.push_back(wasm::ValType::I64);
@@ -698,7 +737,7 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
     Rets.push_back(wasm::ValType::I64);
     Rets.push_back(wasm::ValType::I64);
 #else
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
 #endif
     Params.push_back(wasm::ValType::I64);
     Params.push_back(wasm::ValType::I64);
@@ -710,13 +749,13 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
     Rets.push_back(wasm::ValType::I64);
     Rets.push_back(wasm::ValType::I64);
 #else
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
 #endif
     Params.push_back(wasm::ValType::I64);
     Params.push_back(wasm::ValType::I64);
     Params.push_back(wasm::ValType::I64);
     Params.push_back(wasm::ValType::I64);
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
     break;
   case i64_i64_i64_i64_func_i64_i64_i64_i64:
 #if 0 // TODO: Enable this when wasm gets multiple-return-value support.
@@ -725,7 +764,7 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
     Rets.push_back(wasm::ValType::I64);
     Rets.push_back(wasm::ValType::I64);
 #else
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
 #endif
     Params.push_back(wasm::ValType::I64);
     Params.push_back(wasm::ValType::I64);
@@ -739,23 +778,23 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
     Rets.push_back(wasm::ValType::I64);
     Rets.push_back(wasm::ValType::I64);
 #else
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
 #endif
     Params.push_back(wasm::ValType::I64);
     Params.push_back(wasm::ValType::I64);
     Params.push_back(wasm::ValType::I32);
     break;
   case iPTR_func_iPTR_i32_iPTR:
-    Rets.push_back(iPTR);
-    Params.push_back(iPTR);
+    Rets.push_back(PtrTy);
+    Params.push_back(PtrTy);
     Params.push_back(wasm::ValType::I32);
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
     break;
   case iPTR_func_iPTR_iPTR_iPTR:
-    Rets.push_back(iPTR);
-    Params.push_back(iPTR);
-    Params.push_back(iPTR);
-    Params.push_back(iPTR);
+    Rets.push_back(PtrTy);
+    Params.push_back(PtrTy);
+    Params.push_back(PtrTy);
+    Params.push_back(PtrTy);
     break;
   case f32_func_f32_f32_f32:
     Rets.push_back(wasm::ValType::F32);
@@ -772,39 +811,39 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
   case func_i64_i64_iPTR_iPTR:
     Params.push_back(wasm::ValType::I64);
     Params.push_back(wasm::ValType::I64);
-    Params.push_back(iPTR);
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
+    Params.push_back(PtrTy);
     break;
   case func_iPTR_f32:
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
     Params.push_back(wasm::ValType::F32);
     break;
   case func_iPTR_f64:
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
     Params.push_back(wasm::ValType::F64);
     break;
   case func_iPTR_i32:
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
     Params.push_back(wasm::ValType::I32);
     break;
   case func_iPTR_i64:
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
     Params.push_back(wasm::ValType::I64);
     break;
   case func_iPTR_i64_i64:
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
     Params.push_back(wasm::ValType::I64);
     Params.push_back(wasm::ValType::I64);
     break;
   case func_iPTR_i64_i64_i64_i64:
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
     Params.push_back(wasm::ValType::I64);
     Params.push_back(wasm::ValType::I64);
     Params.push_back(wasm::ValType::I64);
     Params.push_back(wasm::ValType::I64);
     break;
   case func_iPTR_i64_i64_i64_i64_i64_i64:
-    Params.push_back(iPTR);
+    Params.push_back(PtrTy);
     Params.push_back(wasm::ValType::I64);
     Params.push_back(wasm::ValType::I64);
     Params.push_back(wasm::ValType::I64);
@@ -824,6 +863,19 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
     Params.push_back(wasm::ValType::I64);
     Params.push_back(wasm::ValType::I64);
     break;
+  case iPTR_func_f32:
+    Rets.push_back(PtrTy);
+    Params.push_back(wasm::ValType::F32);
+    break;
+  case iPTR_func_f64:
+    Rets.push_back(PtrTy);
+    Params.push_back(wasm::ValType::F64);
+    break;
+  case iPTR_func_i64_i64:
+    Rets.push_back(PtrTy);
+    Params.push_back(wasm::ValType::I64);
+    Params.push_back(wasm::ValType::I64);
+    break;
   case unsupported:
     llvm_unreachable("unsupported runtime library signature");
   }
@@ -832,12 +884,17 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
 static ManagedStatic<StaticLibcallNameMap> LibcallNameMap;
 // TODO: If the RTLIB::Libcall-taking flavor of GetSignature remains unsed
 // other than here, just roll its logic into this version.
-void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
+void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget,
                                const char *Name,
                                SmallVectorImpl<wasm::ValType> &Rets,
                                SmallVectorImpl<wasm::ValType> &Params) {
   auto &Map = LibcallNameMap->Map;
-  auto val = Map.find(Name);
-  assert(val != Map.end() && "unexpected runtime library name");
-  return GetLibcallSignature(Subtarget, val->second, Rets, Params);
+  auto Val = Map.find(Name);
+#ifndef NDEBUG
+  if (Val == Map.end()) {
+    auto message = std::string("unexpected runtime library name: ") + Name;
+    llvm_unreachable(message.c_str());
+  }
+#endif
+  return getLibcallSignature(Subtarget, Val->second, Rets, Params);
 }
diff --git a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
index 7fa70bea96de..6ae8aaaba59c 100644
--- a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
+++ b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
@@ -1,9 +1,8 @@
 // CodeGen/RuntimeLibcallSignatures.h - R.T. Lib. Call Signatures -*- C++ -*--//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -23,12 +22,12 @@ namespace llvm {
 
 class WebAssemblySubtarget;
 
-extern void GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
+extern void getLibcallSignature(const WebAssemblySubtarget &Subtarget,
                                 RTLIB::Libcall LC,
                                 SmallVectorImpl<wasm::ValType> &Rets,
                                 SmallVectorImpl<wasm::ValType> &Params);
 
-extern void GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
+extern void getLibcallSignature(const WebAssemblySubtarget &Subtarget,
                                 const char *Name,
                                 SmallVectorImpl<wasm::ValType> &Rets,
                                 SmallVectorImpl<wasm::ValType> &Params);
diff --git a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
index bec72049258a..890e4b8e4e2a 100644
--- a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblySelectionDAGInfo.cpp - WebAssembly SelectionDAG Info ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -17,4 +16,44 @@ using namespace llvm;
 
 #define DEBUG_TYPE "wasm-selectiondag-info"
 
-WebAssemblySelectionDAGInfo::~WebAssemblySelectionDAGInfo() {}
+WebAssemblySelectionDAGInfo::~WebAssemblySelectionDAGInfo() = default; // anchor
+
+SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemcpy(
+    SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
+    SDValue Size, unsigned Align, bool IsVolatile, bool AlwaysInline,
+    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
+  if (!DAG.getMachineFunction()
+           .getSubtarget<WebAssemblySubtarget>()
+           .hasBulkMemory())
+    return SDValue();
+
+  SDValue MemIdx = DAG.getConstant(0, DL, MVT::i32);
+  return DAG.getNode(WebAssemblyISD::MEMORY_COPY, DL, MVT::Other,
+                     {Chain, MemIdx, MemIdx, Dst, Src,
+                      DAG.getZExtOrTrunc(Size, DL, MVT::i32)});
+}
+
+SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemmove(
+    SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Op1, SDValue Op2,
+    SDValue Op3, unsigned Align, bool IsVolatile,
+    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
+  return EmitTargetCodeForMemcpy(DAG, DL, Chain, Op1, Op2, Op3, Align,
+                                 IsVolatile, false, DstPtrInfo,
+                                 SrcPtrInfo);
+}
+
+SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemset(
+    SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Val,
+    SDValue Size, unsigned Align, bool IsVolatile,
+    MachinePointerInfo DstPtrInfo) const {
+  if (!DAG.getMachineFunction()
+           .getSubtarget<WebAssemblySubtarget>()
+           .hasBulkMemory())
+    return SDValue();
+
+  SDValue MemIdx = DAG.getConstant(0, DL, MVT::i32);
+  // Only low byte matters for val argument, so anyext the i8
+  return DAG.getNode(WebAssemblyISD::MEMORY_FILL, DL, MVT::Other, Chain, MemIdx,
+                     Dst, DAG.getAnyExtOrTrunc(Val, DL, MVT::i32),
+                     DAG.getZExtOrTrunc(Size, DL, MVT::i32));
+}
diff --git a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
index 31d150eded67..0b90ece27dff 100644
--- a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
@@ -1,9 +1,8 @@
 //=- WebAssemblySelectionDAGInfo.h - WebAssembly SelectionDAG Info -*- C++ -*-//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -23,6 +22,21 @@ namespace llvm {
 class WebAssemblySelectionDAGInfo final : public SelectionDAGTargetInfo {
 public:
   ~WebAssemblySelectionDAGInfo() override;
+  SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
+                                  SDValue Chain, SDValue Op1, SDValue Op2,
+                                  SDValue Op3, unsigned Align, bool isVolatile,
+                                  bool AlwaysInline,
+                                  MachinePointerInfo DstPtrInfo,
+                                  MachinePointerInfo SrcPtrInfo) const override;
+  SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl,
+                                   SDValue Chain, SDValue Op1, SDValue Op2,
+                                   SDValue Op3, unsigned Align, bool isVolatile,
+                                   MachinePointerInfo DstPtrInfo,
+                                   MachinePointerInfo SrcPtrInfo) const override;
+  SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL,
+                                  SDValue Chain, SDValue Op1, SDValue Op2,
+                                  SDValue Op3, unsigned Align, bool IsVolatile,
+                                  MachinePointerInfo DstPtrInfo) const override;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
index c95af88c6f43..a249ccf17638 100644
--- a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
+++ b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
@@ -1,9 +1,8 @@
 //=- WebAssemblySetP2AlignOperands.cpp - Set alignments on loads and stores -=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -14,6 +13,7 @@
 
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "WebAssembly.h"
+#include "WebAssemblyInstrInfo.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
@@ -54,7 +54,7 @@ FunctionPass *llvm::createWebAssemblySetP2AlignOperands() {
   return new WebAssemblySetP2AlignOperands();
 }
 
-static void RewriteP2Align(MachineInstr &MI, unsigned OperandNo) {
+static void rewriteP2Align(MachineInstr &MI, unsigned OperandNo) {
   assert(MI.getOperand(OperandNo).getImm() == 0 &&
          "ISel should set p2align operands to 0");
   assert(MI.hasOneMemOperand() &&
@@ -84,114 +84,11 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
 
   for (auto &MBB : MF) {
     for (auto &MI : MBB) {
-      switch (MI.getOpcode()) {
-      case WebAssembly::LOAD_I32:
-      case WebAssembly::LOAD_I64:
-      case WebAssembly::LOAD_F32:
-      case WebAssembly::LOAD_F64:
-      case WebAssembly::LOAD_v16i8:
-      case WebAssembly::LOAD_v8i16:
-      case WebAssembly::LOAD_v4i32:
-      case WebAssembly::LOAD_v2i64:
-      case WebAssembly::LOAD_v4f32:
-      case WebAssembly::LOAD_v2f64:
-      case WebAssembly::LOAD8_S_I32:
-      case WebAssembly::LOAD8_U_I32:
-      case WebAssembly::LOAD16_S_I32:
-      case WebAssembly::LOAD16_U_I32:
-      case WebAssembly::LOAD8_S_I64:
-      case WebAssembly::LOAD8_U_I64:
-      case WebAssembly::LOAD16_S_I64:
-      case WebAssembly::LOAD16_U_I64:
-      case WebAssembly::LOAD32_S_I64:
-      case WebAssembly::LOAD32_U_I64:
-      case WebAssembly::ATOMIC_LOAD_I32:
-      case WebAssembly::ATOMIC_LOAD8_U_I32:
-      case WebAssembly::ATOMIC_LOAD16_U_I32:
-      case WebAssembly::ATOMIC_LOAD_I64:
-      case WebAssembly::ATOMIC_LOAD8_U_I64:
-      case WebAssembly::ATOMIC_LOAD16_U_I64:
-      case WebAssembly::ATOMIC_LOAD32_U_I64:
-      case WebAssembly::ATOMIC_RMW8_U_ADD_I32:
-      case WebAssembly::ATOMIC_RMW8_U_ADD_I64:
-      case WebAssembly::ATOMIC_RMW8_U_SUB_I32:
-      case WebAssembly::ATOMIC_RMW8_U_SUB_I64:
-      case WebAssembly::ATOMIC_RMW8_U_AND_I32:
-      case WebAssembly::ATOMIC_RMW8_U_AND_I64:
-      case WebAssembly::ATOMIC_RMW8_U_OR_I32:
-      case WebAssembly::ATOMIC_RMW8_U_OR_I64:
-      case WebAssembly::ATOMIC_RMW8_U_XOR_I32:
-      case WebAssembly::ATOMIC_RMW8_U_XOR_I64:
-      case WebAssembly::ATOMIC_RMW8_U_XCHG_I32:
-      case WebAssembly::ATOMIC_RMW8_U_XCHG_I64:
-      case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32:
-      case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64:
-      case WebAssembly::ATOMIC_RMW16_U_ADD_I32:
-      case WebAssembly::ATOMIC_RMW16_U_ADD_I64:
-      case WebAssembly::ATOMIC_RMW16_U_SUB_I32:
-      case WebAssembly::ATOMIC_RMW16_U_SUB_I64:
-      case WebAssembly::ATOMIC_RMW16_U_AND_I32:
-      case WebAssembly::ATOMIC_RMW16_U_AND_I64:
-      case WebAssembly::ATOMIC_RMW16_U_OR_I32:
-      case WebAssembly::ATOMIC_RMW16_U_OR_I64:
-      case WebAssembly::ATOMIC_RMW16_U_XOR_I32:
-      case WebAssembly::ATOMIC_RMW16_U_XOR_I64:
-      case WebAssembly::ATOMIC_RMW16_U_XCHG_I32:
-      case WebAssembly::ATOMIC_RMW16_U_XCHG_I64:
-      case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32:
-      case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64:
-      case WebAssembly::ATOMIC_RMW_ADD_I32:
-      case WebAssembly::ATOMIC_RMW32_U_ADD_I64:
-      case WebAssembly::ATOMIC_RMW_SUB_I32:
-      case WebAssembly::ATOMIC_RMW32_U_SUB_I64:
-      case WebAssembly::ATOMIC_RMW_AND_I32:
-      case WebAssembly::ATOMIC_RMW32_U_AND_I64:
-      case WebAssembly::ATOMIC_RMW_OR_I32:
-      case WebAssembly::ATOMIC_RMW32_U_OR_I64:
-      case WebAssembly::ATOMIC_RMW_XOR_I32:
-      case WebAssembly::ATOMIC_RMW32_U_XOR_I64:
-      case WebAssembly::ATOMIC_RMW_XCHG_I32:
-      case WebAssembly::ATOMIC_RMW32_U_XCHG_I64:
-      case WebAssembly::ATOMIC_RMW_CMPXCHG_I32:
-      case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64:
-      case WebAssembly::ATOMIC_RMW_ADD_I64:
-      case WebAssembly::ATOMIC_RMW_SUB_I64:
-      case WebAssembly::ATOMIC_RMW_AND_I64:
-      case WebAssembly::ATOMIC_RMW_OR_I64:
-      case WebAssembly::ATOMIC_RMW_XOR_I64:
-      case WebAssembly::ATOMIC_RMW_XCHG_I64:
-      case WebAssembly::ATOMIC_RMW_CMPXCHG_I64:
-      case WebAssembly::ATOMIC_NOTIFY:
-      case WebAssembly::ATOMIC_WAIT_I32:
-      case WebAssembly::ATOMIC_WAIT_I64:
-        RewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo);
-        break;
-      case WebAssembly::STORE_I32:
-      case WebAssembly::STORE_I64:
-      case WebAssembly::STORE_F32:
-      case WebAssembly::STORE_F64:
-      case WebAssembly::STORE_v16i8:
-      case WebAssembly::STORE_v8i16:
-      case WebAssembly::STORE_v4i32:
-      case WebAssembly::STORE_v2i64:
-      case WebAssembly::STORE_v4f32:
-      case WebAssembly::STORE_v2f64:
-      case WebAssembly::STORE8_I32:
-      case WebAssembly::STORE16_I32:
-      case WebAssembly::STORE8_I64:
-      case WebAssembly::STORE16_I64:
-      case WebAssembly::STORE32_I64:
-      case WebAssembly::ATOMIC_STORE_I32:
-      case WebAssembly::ATOMIC_STORE8_I32:
-      case WebAssembly::ATOMIC_STORE16_I32:
-      case WebAssembly::ATOMIC_STORE_I64:
-      case WebAssembly::ATOMIC_STORE8_I64:
-      case WebAssembly::ATOMIC_STORE16_I64:
-      case WebAssembly::ATOMIC_STORE32_I64:
-        RewriteP2Align(MI, WebAssembly::StoreP2AlignOperandNo);
-        break;
-      default:
-        break;
+      int16_t P2AlignOpNum = WebAssembly::getNamedOperandIdx(
+          MI.getOpcode(), WebAssembly::OpName::p2align);
+      if (P2AlignOpNum != -1) {
+        rewriteP2Align(MI, P2AlignOpNum);
+        Changed = true;
       }
     }
   }
diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index 98133e2153a0..196a74565285 100644
--- a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblySubtarget.cpp - WebAssembly Subtarget Information ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -45,6 +44,11 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT,
       InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(),
       TLInfo(TM, *this) {}
 
+bool WebAssemblySubtarget::enableAtomicExpand() const {
+  // If atomics are disabled, atomic ops are lowered instead of expanded
+  return hasAtomics();
+}
+
 bool WebAssemblySubtarget::enableMachineScheduler() const {
   // Disable the MachineScheduler for now. Even with ShouldTrackPressure set and
   // enableMachineSchedDefaultSched overridden, it appears to have an overall
diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.h b/lib/Target/WebAssembly/WebAssemblySubtarget.h
index 0a0c04609ac4..8db2120f9834 100644
--- a/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -1,9 +1,8 @@
 //=- WebAssemblySubtarget.h - Define Subtarget for the WebAssembly -*- C++ -*-//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -23,11 +22,16 @@
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include <string>
 
+#define GET_SUBTARGETINFO_ENUM
 #define GET_SUBTARGETINFO_HEADER
 #include "WebAssemblyGenSubtargetInfo.inc"
 
 namespace llvm {
 
+// Defined in WebAssemblyGenSubtargetInfo.inc.
+extern const SubtargetFeatureKV
+    WebAssemblyFeatureKV[WebAssembly::NumSubtargetFeatures];
+
 class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
   enum SIMDEnum {
     NoSIMD,
@@ -39,6 +43,10 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
   bool HasNontrappingFPToInt = false;
   bool HasSignExt = false;
   bool HasExceptionHandling = false;
+  bool HasBulkMemory = false;
+  bool HasMultivalue = false;
+  bool HasMutableGlobals = false;
+  bool HasTailCall = false;
 
   /// String name of used CPU.
   std::string CPUString;
@@ -77,6 +85,8 @@ public:
     return &getInstrInfo()->getRegisterInfo();
   }
   const Triple &getTargetTriple() const { return TargetTriple; }
+  bool enableAtomicExpand() const override;
+  bool enableIndirectBrExpand() const override { return true; }
   bool enableMachineScheduler() const override;
   bool useAA() const override;
 
@@ -90,6 +100,10 @@ public:
   bool hasNontrappingFPToInt() const { return HasNontrappingFPToInt; }
   bool hasSignExt() const { return HasSignExt; }
   bool hasExceptionHandling() const { return HasExceptionHandling; }
+  bool hasBulkMemory() const { return HasBulkMemory; }
+  bool hasMultivalue() const { return HasMultivalue; }
+  bool hasMutableGlobals() const { return HasMutableGlobals; }
+  bool hasTailCall() const { return HasTailCall; }
 
   /// Parses features string setting specified subtarget options. Definition of
   /// function is auto generated by tblgen.
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 3bf8dd40892c..7e65368e671a 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -1,9 +1,8 @@
 //===- WebAssemblyTargetMachine.cpp - Define TargetMachine for WebAssembly -==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -14,9 +13,12 @@
 
 #include "WebAssemblyTargetMachine.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "TargetInfo/WebAssemblyTargetInfo.h"
 #include "WebAssembly.h"
+#include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblyTargetObjectFile.h"
 #include "WebAssemblyTargetTransformInfo.h"
+#include "llvm/CodeGen/MIRParser/MIParser.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
@@ -25,6 +27,7 @@
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LowerAtomic.h"
 #include "llvm/Transforms/Utils.h"
 using namespace llvm;
 
@@ -58,19 +61,18 @@ extern "C" void LLVMInitializeWebAssemblyTarget() {
   initializeOptimizeReturnedPass(PR);
   initializeWebAssemblyArgumentMovePass(PR);
   initializeWebAssemblySetP2AlignOperandsPass(PR);
-  initializeWebAssemblyEHRestoreStackPointerPass(PR);
   initializeWebAssemblyReplacePhysRegsPass(PR);
   initializeWebAssemblyPrepareForLiveIntervalsPass(PR);
   initializeWebAssemblyOptimizeLiveIntervalsPass(PR);
   initializeWebAssemblyMemIntrinsicResultsPass(PR);
   initializeWebAssemblyRegStackifyPass(PR);
   initializeWebAssemblyRegColoringPass(PR);
-  initializeWebAssemblyExplicitLocalsPass(PR);
   initializeWebAssemblyFixIrreducibleControlFlowPass(PR);
   initializeWebAssemblyLateEHPreparePass(PR);
   initializeWebAssemblyExceptionInfoPass(PR);
   initializeWebAssemblyCFGSortPass(PR);
   initializeWebAssemblyCFGStackifyPass(PR);
+  initializeWebAssemblyExplicitLocalsPass(PR);
   initializeWebAssemblyLowerBrUnlessPass(PR);
   initializeWebAssemblyRegNumberingPass(PR);
   initializeWebAssemblyPeepholePass(PR);
@@ -81,13 +83,22 @@ extern "C" void LLVMInitializeWebAssemblyTarget() {
 // WebAssembly Lowering public interface.
 //===----------------------------------------------------------------------===//
 
-static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
+static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM,
+                                           const Triple &TT) {
   if (!RM.hasValue()) {
     // Default to static relocation model.  This should always be more optimial
     // than PIC since the static linker can determine all global addresses and
     // assume direct function calls.
     return Reloc::Static;
   }
+
+  if (!TT.isOSEmscripten()) {
+    // Relocation modes other than static are currently implemented in a way
+    // that only works for Emscripten, so disable them if we aren't targeting
+    // Emscripten.
+    return Reloc::Static;
+  }
+
   return *RM;
 }
 
@@ -100,7 +111,7 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
     : LLVMTargetMachine(T,
                         TT.isArch64Bit() ? "e-m:e-p:64:64-i64:64-n32:64-S128"
                                          : "e-m:e-p:32:32-i64:64-n32:64-S128",
-                        TT, CPU, FS, Options, getEffectiveRelocModel(RM),
+                        TT, CPU, FS, Options, getEffectiveRelocModel(RM, TT),
                         getEffectiveCodeModel(CM, CodeModel::Large), OL),
       TLOF(new WebAssemblyTargetObjectFile()) {
   // WebAssembly type-checks instructions, but a noreturn function with a return
@@ -122,7 +133,17 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
   // splitting and tail merging.
 }
 
-WebAssemblyTargetMachine::~WebAssemblyTargetMachine() {}
+WebAssemblyTargetMachine::~WebAssemblyTargetMachine() = default; // anchor.
+
+const WebAssemblySubtarget *
+WebAssemblyTargetMachine::getSubtargetImpl(std::string CPU,
+                                           std::string FS) const {
+  auto &I = SubtargetMap[CPU + FS];
+  if (!I) {
+    I = llvm::make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this);
+  }
+  return I.get();
+}
 
 const WebAssemblySubtarget *
 WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const {
@@ -136,33 +157,141 @@ WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const {
                        ? FSAttr.getValueAsString().str()
                        : TargetFS;
 
-  auto &I = SubtargetMap[CPU + FS];
-  if (!I) {
-    // This needs to be done before we create a new subtarget since any
-    // creation will depend on the TM and the code generation flags on the
-    // function that reside in TargetOptions.
-    resetTargetOptions(F);
-    I = llvm::make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this);
-  }
-  return I.get();
+  // This needs to be done before we create a new subtarget since any
+  // creation will depend on the TM and the code generation flags on the
+  // function that reside in TargetOptions.
+  resetTargetOptions(F);
+
+  return getSubtargetImpl(CPU, FS);
 }
 
 namespace {
-class StripThreadLocal final : public ModulePass {
-  // The default thread model for wasm is single, where thread-local variables
-  // are identical to regular globals and should be treated the same. So this
-  // pass just converts all GlobalVariables to NotThreadLocal
+
+class CoalesceFeaturesAndStripAtomics final : public ModulePass {
+  // Take the union of all features used in the module and use it for each
+  // function individually, since having multiple feature sets in one module
+  // currently does not make sense for WebAssembly. If atomics are not enabled,
+  // also strip atomic operations and thread local storage.
   static char ID;
+  WebAssemblyTargetMachine *WasmTM;
 
 public:
-  StripThreadLocal() : ModulePass(ID) {}
+  CoalesceFeaturesAndStripAtomics(WebAssemblyTargetMachine *WasmTM)
+      : ModulePass(ID), WasmTM(WasmTM) {}
+
   bool runOnModule(Module &M) override {
-    for (auto &GV : M.globals())
-      GV.setThreadLocalMode(GlobalValue::ThreadLocalMode::NotThreadLocal);
+    FeatureBitset Features = coalesceFeatures(M);
+
+    std::string FeatureStr = getFeatureString(Features);
+    for (auto &F : M)
+      replaceFeatures(F, FeatureStr);
+
+    bool StrippedAtomics = false;
+    bool StrippedTLS = false;
+
+    if (!Features[WebAssembly::FeatureAtomics])
+      StrippedAtomics = stripAtomics(M);
+
+    if (!Features[WebAssembly::FeatureBulkMemory])
+      StrippedTLS = stripThreadLocals(M);
+
+    if (StrippedAtomics && !StrippedTLS)
+      stripThreadLocals(M);
+    else if (StrippedTLS && !StrippedAtomics)
+      stripAtomics(M);
+
+    recordFeatures(M, Features, StrippedAtomics || StrippedTLS);
+
+    // Conservatively assume we have made some change
+    return true;
+  }
+
+private:
+  FeatureBitset coalesceFeatures(const Module &M) {
+    FeatureBitset Features =
+        WasmTM
+            ->getSubtargetImpl(WasmTM->getTargetCPU(),
+                               WasmTM->getTargetFeatureString())
+            ->getFeatureBits();
+    for (auto &F : M)
+      Features |= WasmTM->getSubtargetImpl(F)->getFeatureBits();
+    return Features;
+  }
+
+  std::string getFeatureString(const FeatureBitset &Features) {
+    std::string Ret;
+    for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) {
+      if (Features[KV.Value])
+        Ret += (StringRef("+") + KV.Key + ",").str();
+    }
+    return Ret;
+  }
+
+  void replaceFeatures(Function &F, const std::string &Features) {
+    F.removeFnAttr("target-features");
+    F.removeFnAttr("target-cpu");
+    F.addFnAttr("target-features", Features);
+  }
+
+  bool stripAtomics(Module &M) {
+    // Detect whether any atomics will be lowered, since there is no way to tell
+    // whether the LowerAtomic pass lowers e.g. stores.
+    bool Stripped = false;
+    for (auto &F : M) {
+      for (auto &B : F) {
+        for (auto &I : B) {
+          if (I.isAtomic()) {
+            Stripped = true;
+            goto done;
+          }
+        }
+      }
+    }
+
+  done:
+    if (!Stripped)
+      return false;
+
+    LowerAtomicPass Lowerer;
+    FunctionAnalysisManager FAM;
+    for (auto &F : M)
+      Lowerer.run(F, FAM);
+
     return true;
   }
+
+  bool stripThreadLocals(Module &M) {
+    bool Stripped = false;
+    for (auto &GV : M.globals()) {
+      if (GV.getThreadLocalMode() !=
+          GlobalValue::ThreadLocalMode::NotThreadLocal) {
+        Stripped = true;
+        GV.setThreadLocalMode(GlobalValue::ThreadLocalMode::NotThreadLocal);
+      }
+    }
+    return Stripped;
+  }
+
+  void recordFeatures(Module &M, const FeatureBitset &Features, bool Stripped) {
+    for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) {
+      std::string MDKey = (StringRef("wasm-feature-") + KV.Key).str();
+      if (KV.Value == WebAssembly::FeatureAtomics && Stripped) {
+        // "atomics" is special: code compiled without atomics may have had its
+        // atomics lowered to nonatomic operations. In that case, atomics is
+        // disallowed to prevent unsafe linking with atomics-enabled objects.
+        assert(!Features[WebAssembly::FeatureAtomics] ||
+               !Features[WebAssembly::FeatureBulkMemory]);
+        M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey,
+                        wasm::WASM_FEATURE_PREFIX_DISALLOWED);
+      } else if (Features[KV.Value]) {
+        // Otherwise features are marked Used or not mentioned
+        M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey,
+                        wasm::WASM_FEATURE_PREFIX_USED);
+      }
+    }
+  }
 };
-char StripThreadLocal::ID = 0;
+char CoalesceFeaturesAndStripAtomics::ID = 0;
 
 /// WebAssembly Code Generator Pass Configuration Options.
 class WebAssemblyPassConfig final : public TargetPassConfig {
@@ -181,6 +310,12 @@ public:
   void addPostRegAlloc() override;
   bool addGCPasses() override { return false; }
   void addPreEmitPass() override;
+
+  // No reg alloc
+  bool addRegAssignmentFast() override { return false; }
+
+  // No reg alloc
+  bool addRegAssignmentOptimized() override { return false; }
 };
 } // end anonymous namespace
 
@@ -204,15 +339,11 @@ FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) {
 //===----------------------------------------------------------------------===//
 
 void WebAssemblyPassConfig::addIRPasses() {
-  if (TM->Options.ThreadModel == ThreadModel::Single) {
-    // In "single" mode, atomics get lowered to non-atomics.
-    addPass(createLowerAtomicPass());
-    addPass(new StripThreadLocal());
-  } else {
-    // Expand some atomic operations. WebAssemblyTargetLowering has hooks which
-    // control specifically what gets lowered.
-    addPass(createAtomicExpandPass());
-  }
+  // Runs LowerAtomicPass if necessary
+  addPass(new CoalesceFeaturesAndStripAtomics(&getWebAssemblyTargetMachine()));
+
+  // This is a no-op if atomics are not used in the module
+  addPass(createAtomicExpandPass());
 
   // Add signatures to prototype-less function declarations
   addPass(createWebAssemblyAddMissingPrototypes());
@@ -246,6 +377,9 @@ void WebAssemblyPassConfig::addIRPasses() {
     addPass(createWebAssemblyLowerEmscriptenEHSjLj(EnableEmException,
                                                    EnableEmSjLj));
 
+  // Expand indirectbr instructions to switches.
+  addPass(createIndirectBrExpandPass());
+
   TargetPassConfig::addIRPasses();
 }
 
@@ -279,20 +413,16 @@ void WebAssemblyPassConfig::addPostRegAlloc() {
   disablePass(&PatchableFunctionID);
   disablePass(&ShrinkWrapID);
 
+  // This pass hurts code size for wasm because it can generate irreducible
+  // control flow.
+  disablePass(&MachineBlockPlacementID);
+
   TargetPassConfig::addPostRegAlloc();
 }
 
 void WebAssemblyPassConfig::addPreEmitPass() {
   TargetPassConfig::addPreEmitPass();
 
-  // Restore __stack_pointer global after an exception is thrown.
-  addPass(createWebAssemblyEHRestoreStackPointer());
-
-  // Now that we have a prologue and epilogue and all frame indices are
-  // rewritten, eliminate SP and FP. This allows them to be stackified,
-  // colored, and numbered with the rest of the registers.
-  addPass(createWebAssemblyReplacePhysRegs());
-
   // Rewrite pseudo call_indirect instructions as real instructions.
   // This needs to run before register stackification, because we change the
   // order of the arguments.
@@ -302,8 +432,15 @@ void WebAssemblyPassConfig::addPreEmitPass() {
   addPass(createWebAssemblyFixIrreducibleControlFlow());
 
   // Do various transformations for exception handling.
+  // Every CFG-changing optimizations should come before this.
   addPass(createWebAssemblyLateEHPrepare());
 
+  // Now that we have a prologue and epilogue and all frame indices are
+  // rewritten, eliminate SP and FP. This allows them to be stackified,
+  // colored, and numbered with the rest of the registers.
+  addPass(createWebAssemblyReplacePhysRegs());
+
+  // Preparations and optimizations related to register stackification.
   if (getOptLevel() != CodeGenOpt::None) {
     // LiveIntervals isn't commonly run this late. Re-establish preconditions.
     addPass(createWebAssemblyPrepareForLiveIntervals());
@@ -327,9 +464,6 @@ void WebAssemblyPassConfig::addPreEmitPass() {
     addPass(createWebAssemblyRegColoring());
   }
 
-  // Insert explicit local.get and local.set operators.
-  addPass(createWebAssemblyExplicitLocals());
-
   // Sort the blocks of the CFG into topological order, a prerequisite for
   // BLOCK and LOOP markers.
   addPass(createWebAssemblyCFGSort());
@@ -337,6 +471,9 @@ void WebAssemblyPassConfig::addPreEmitPass() {
   // Insert BLOCK and LOOP markers.
   addPass(createWebAssemblyCFGStackify());
 
+  // Insert explicit local.get and local.set operators.
+  addPass(createWebAssemblyExplicitLocals());
+
   // Lower br_unless into br_if.
   addPass(createWebAssemblyLowerBrUnless());
 
@@ -347,3 +484,24 @@ void WebAssemblyPassConfig::addPreEmitPass() {
   // Create a mapping from LLVM CodeGen virtual registers to wasm registers.
   addPass(createWebAssemblyRegNumbering());
 }
+
+yaml::MachineFunctionInfo *
+WebAssemblyTargetMachine::createDefaultFuncInfoYAML() const {
+  return new yaml::WebAssemblyFunctionInfo();
+}
+
+yaml::MachineFunctionInfo *WebAssemblyTargetMachine::convertFuncInfoToYAML(
+    const MachineFunction &MF) const {
+  const auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
+  return new yaml::WebAssemblyFunctionInfo(*MFI);
+}
+
+bool WebAssemblyTargetMachine::parseMachineFunctionInfo(
+    const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS,
+    SMDiagnostic &Error, SMRange &SourceRange) const {
+  const auto &YamlMFI =
+      reinterpret_cast<const yaml::WebAssemblyFunctionInfo &>(MFI);
+  MachineFunction &MF = PFS.MF;
+  MF.getInfo<WebAssemblyFunctionInfo>()->initializeBaseYamlFields(YamlMFI);
+  return false;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.h b/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
index 41001e7a0cc7..850e6b9a9e9e 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
@@ -1,9 +1,8 @@
 // WebAssemblyTargetMachine.h - Define TargetMachine for WebAssembly -*- C++ -*-
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -33,6 +32,9 @@ public:
                            bool JIT);
 
   ~WebAssemblyTargetMachine() override;
+
+  const WebAssemblySubtarget *getSubtargetImpl(std::string CPU,
+                                               std::string FS) const;
   const WebAssemblySubtarget *
   getSubtargetImpl(const Function &F) const override;
 
@@ -46,6 +48,14 @@ public:
   TargetTransformInfo getTargetTransformInfo(const Function &F) override;
 
   bool usesPhysRegsForPEI() const override { return false; }
+
+  yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override;
+  yaml::MachineFunctionInfo *
+  convertFuncInfoToYAML(const MachineFunction &MF) const override;
+  bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &,
+                                PerFunctionMIParsingState &PFS,
+                                SMDiagnostic &Error,
+                                SMRange &SourceRange) const override;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp
index 0459bfca418d..ad57c600db10 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyTargetObjectFile.cpp - WebAssembly Object Info ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
index ce744ba8b8e8..f46bb2040a7d 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
+++ b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
@@ -1,9 +1,8 @@
 //===-- WebAssemblyTargetObjectFile.h - WebAssembly Object Info -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index 4a2777cc3a9f..46ef765ce0f4 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -51,7 +50,7 @@ unsigned WebAssemblyTTIImpl::getArithmeticInstrCost(
   unsigned Cost = BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost(
       Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
 
-  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+  if (auto *VTy = dyn_cast<VectorType>(Ty)) {
     switch (Opcode) {
     case Instruction::LShr:
     case Instruction::AShr:
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index 4300ca3defbf..1b11b4b631eb 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -1,9 +1,8 @@
 //==- WebAssemblyTargetTransformInfo.h - WebAssembly-specific TTI -*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
index ada6fb9a96d7..e9d88d4818a5 100644
--- a/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
@@ -1,9 +1,8 @@
 //===-- WebAssemblyUtilities.cpp - WebAssembly Utility Functions ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -25,70 +24,6 @@ const char *const WebAssembly::StdTerminateFn = "_ZSt9terminatev";
 const char *const WebAssembly::PersonalityWrapperFn =
     "_Unwind_Wasm_CallPersonality";
 
-bool WebAssembly::isArgument(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case WebAssembly::ARGUMENT_i32:
-  case WebAssembly::ARGUMENT_i32_S:
-  case WebAssembly::ARGUMENT_i64:
-  case WebAssembly::ARGUMENT_i64_S:
-  case WebAssembly::ARGUMENT_f32:
-  case WebAssembly::ARGUMENT_f32_S:
-  case WebAssembly::ARGUMENT_f64:
-  case WebAssembly::ARGUMENT_f64_S:
-  case WebAssembly::ARGUMENT_v16i8:
-  case WebAssembly::ARGUMENT_v16i8_S:
-  case WebAssembly::ARGUMENT_v8i16:
-  case WebAssembly::ARGUMENT_v8i16_S:
-  case WebAssembly::ARGUMENT_v4i32:
-  case WebAssembly::ARGUMENT_v4i32_S:
-  case WebAssembly::ARGUMENT_v2i64:
-  case WebAssembly::ARGUMENT_v2i64_S:
-  case WebAssembly::ARGUMENT_v4f32:
-  case WebAssembly::ARGUMENT_v4f32_S:
-  case WebAssembly::ARGUMENT_v2f64:
-  case WebAssembly::ARGUMENT_v2f64_S:
-    return true;
-  default:
-    return false;
-  }
-}
-
-bool WebAssembly::isCopy(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case WebAssembly::COPY_I32:
-  case WebAssembly::COPY_I32_S:
-  case WebAssembly::COPY_I64:
-  case WebAssembly::COPY_I64_S:
-  case WebAssembly::COPY_F32:
-  case WebAssembly::COPY_F32_S:
-  case WebAssembly::COPY_F64:
-  case WebAssembly::COPY_F64_S:
-  case WebAssembly::COPY_V128:
-  case WebAssembly::COPY_V128_S:
-    return true;
-  default:
-    return false;
-  }
-}
-
-bool WebAssembly::isTee(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case WebAssembly::TEE_I32:
-  case WebAssembly::TEE_I32_S:
-  case WebAssembly::TEE_I64:
-  case WebAssembly::TEE_I64_S:
-  case WebAssembly::TEE_F32:
-  case WebAssembly::TEE_F32_S:
-  case WebAssembly::TEE_F64:
-  case WebAssembly::TEE_F64_S:
-  case WebAssembly::TEE_V128:
-  case WebAssembly::TEE_V128_S:
-    return true;
-  default:
-    return false;
-  }
-}
-
 /// Test whether MI is a child of some other node in an expression tree.
 bool WebAssembly::isChild(const MachineInstr &MI,
                           const WebAssemblyFunctionInfo &MFI) {
@@ -102,201 +37,20 @@ bool WebAssembly::isChild(const MachineInstr &MI,
          MFI.isVRegStackified(Reg);
 }
 
-bool WebAssembly::isCallDirect(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case WebAssembly::CALL_VOID:
-  case WebAssembly::CALL_VOID_S:
-  case WebAssembly::CALL_I32:
-  case WebAssembly::CALL_I32_S:
-  case WebAssembly::CALL_I64:
-  case WebAssembly::CALL_I64_S:
-  case WebAssembly::CALL_F32:
-  case WebAssembly::CALL_F32_S:
-  case WebAssembly::CALL_F64:
-  case WebAssembly::CALL_F64_S:
-  case WebAssembly::CALL_v16i8:
-  case WebAssembly::CALL_v16i8_S:
-  case WebAssembly::CALL_v8i16:
-  case WebAssembly::CALL_v8i16_S:
-  case WebAssembly::CALL_v4i32:
-  case WebAssembly::CALL_v4i32_S:
-  case WebAssembly::CALL_v2i64:
-  case WebAssembly::CALL_v2i64_S:
-  case WebAssembly::CALL_v4f32:
-  case WebAssembly::CALL_v4f32_S:
-  case WebAssembly::CALL_v2f64:
-  case WebAssembly::CALL_v2f64_S:
-  case WebAssembly::CALL_EXCEPT_REF:
-  case WebAssembly::CALL_EXCEPT_REF_S:
-    return true;
-  default:
-    return false;
-  }
-}
-
-bool WebAssembly::isCallIndirect(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case WebAssembly::CALL_INDIRECT_VOID:
-  case WebAssembly::CALL_INDIRECT_VOID_S:
-  case WebAssembly::CALL_INDIRECT_I32:
-  case WebAssembly::CALL_INDIRECT_I32_S:
-  case WebAssembly::CALL_INDIRECT_I64:
-  case WebAssembly::CALL_INDIRECT_I64_S:
-  case WebAssembly::CALL_INDIRECT_F32:
-  case WebAssembly::CALL_INDIRECT_F32_S:
-  case WebAssembly::CALL_INDIRECT_F64:
-  case WebAssembly::CALL_INDIRECT_F64_S:
-  case WebAssembly::CALL_INDIRECT_v16i8:
-  case WebAssembly::CALL_INDIRECT_v16i8_S:
-  case WebAssembly::CALL_INDIRECT_v8i16:
-  case WebAssembly::CALL_INDIRECT_v8i16_S:
-  case WebAssembly::CALL_INDIRECT_v4i32:
-  case WebAssembly::CALL_INDIRECT_v4i32_S:
-  case WebAssembly::CALL_INDIRECT_v2i64:
-  case WebAssembly::CALL_INDIRECT_v2i64_S:
-  case WebAssembly::CALL_INDIRECT_v4f32:
-  case WebAssembly::CALL_INDIRECT_v4f32_S:
-  case WebAssembly::CALL_INDIRECT_v2f64:
-  case WebAssembly::CALL_INDIRECT_v2f64_S:
-  case WebAssembly::CALL_INDIRECT_EXCEPT_REF:
-  case WebAssembly::CALL_INDIRECT_EXCEPT_REF_S:
-    return true;
-  default:
-    return false;
-  }
-}
-
-unsigned WebAssembly::getCalleeOpNo(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case WebAssembly::CALL_VOID:
-  case WebAssembly::CALL_VOID_S:
-  case WebAssembly::CALL_INDIRECT_VOID:
-  case WebAssembly::CALL_INDIRECT_VOID_S:
-    return 0;
-  case WebAssembly::CALL_I32:
-  case WebAssembly::CALL_I32_S:
-  case WebAssembly::CALL_I64:
-  case WebAssembly::CALL_I64_S:
-  case WebAssembly::CALL_F32:
-  case WebAssembly::CALL_F32_S:
-  case WebAssembly::CALL_F64:
-  case WebAssembly::CALL_F64_S:
-  case WebAssembly::CALL_v16i8:
-  case WebAssembly::CALL_v16i8_S:
-  case WebAssembly::CALL_v8i16:
-  case WebAssembly::CALL_v8i16_S:
-  case WebAssembly::CALL_v4i32:
-  case WebAssembly::CALL_v4i32_S:
-  case WebAssembly::CALL_v2i64:
-  case WebAssembly::CALL_v2i64_S:
-  case WebAssembly::CALL_v4f32:
-  case WebAssembly::CALL_v4f32_S:
-  case WebAssembly::CALL_v2f64:
-  case WebAssembly::CALL_v2f64_S:
-  case WebAssembly::CALL_EXCEPT_REF:
-  case WebAssembly::CALL_EXCEPT_REF_S:
-  case WebAssembly::CALL_INDIRECT_I32:
-  case WebAssembly::CALL_INDIRECT_I32_S:
-  case WebAssembly::CALL_INDIRECT_I64:
-  case WebAssembly::CALL_INDIRECT_I64_S:
-  case WebAssembly::CALL_INDIRECT_F32:
-  case WebAssembly::CALL_INDIRECT_F32_S:
-  case WebAssembly::CALL_INDIRECT_F64:
-  case WebAssembly::CALL_INDIRECT_F64_S:
-  case WebAssembly::CALL_INDIRECT_v16i8:
-  case WebAssembly::CALL_INDIRECT_v16i8_S:
-  case WebAssembly::CALL_INDIRECT_v8i16:
-  case WebAssembly::CALL_INDIRECT_v8i16_S:
-  case WebAssembly::CALL_INDIRECT_v4i32:
-  case WebAssembly::CALL_INDIRECT_v4i32_S:
-  case WebAssembly::CALL_INDIRECT_v2i64:
-  case WebAssembly::CALL_INDIRECT_v2i64_S:
-  case WebAssembly::CALL_INDIRECT_v4f32:
-  case WebAssembly::CALL_INDIRECT_v4f32_S:
-  case WebAssembly::CALL_INDIRECT_v2f64:
-  case WebAssembly::CALL_INDIRECT_v2f64_S:
-  case WebAssembly::CALL_INDIRECT_EXCEPT_REF:
-  case WebAssembly::CALL_INDIRECT_EXCEPT_REF_S:
-    return 1;
-  default:
-    llvm_unreachable("Not a call instruction");
-  }
-}
-
-bool WebAssembly::isMarker(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case WebAssembly::BLOCK:
-  case WebAssembly::BLOCK_S:
-  case WebAssembly::END_BLOCK:
-  case WebAssembly::END_BLOCK_S:
-  case WebAssembly::LOOP:
-  case WebAssembly::LOOP_S:
-  case WebAssembly::END_LOOP:
-  case WebAssembly::END_LOOP_S:
-  case WebAssembly::TRY:
-  case WebAssembly::TRY_S:
-  case WebAssembly::END_TRY:
-  case WebAssembly::END_TRY_S:
-    return true;
-  default:
-    return false;
-  }
-}
-
-bool WebAssembly::isThrow(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case WebAssembly::THROW_I32:
-  case WebAssembly::THROW_I32_S:
-  case WebAssembly::THROW_I64:
-  case WebAssembly::THROW_I64_S:
-    return true;
-  default:
-    return false;
-  }
-}
-
-bool WebAssembly::isRethrow(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case WebAssembly::RETHROW:
-  case WebAssembly::RETHROW_S:
-  case WebAssembly::RETHROW_TO_CALLER:
-  case WebAssembly::RETHROW_TO_CALLER_S:
-    return true;
-  default:
-    return false;
-  }
-}
-
-bool WebAssembly::isCatch(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case WebAssembly::CATCH_I32:
-  case WebAssembly::CATCH_I32_S:
-  case WebAssembly::CATCH_I64:
-  case WebAssembly::CATCH_I64_S:
-  case WebAssembly::CATCH_ALL:
-  case WebAssembly::CATCH_ALL_S:
-    return true;
-  default:
-    return false;
-  }
-}
-
 bool WebAssembly::mayThrow(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
-  case WebAssembly::THROW_I32:
-  case WebAssembly::THROW_I32_S:
-  case WebAssembly::THROW_I64:
-  case WebAssembly::THROW_I64_S:
+  case WebAssembly::THROW:
+  case WebAssembly::THROW_S:
   case WebAssembly::RETHROW:
   case WebAssembly::RETHROW_S:
     return true;
   }
-  if (isCallIndirect(MI))
+  if (isCallIndirect(MI.getOpcode()))
     return true;
   if (!MI.isCall())
     return false;
 
-  const MachineOperand &MO = MI.getOperand(getCalleeOpNo(MI));
+  const MachineOperand &MO = MI.getOperand(getCalleeOpNo(MI.getOpcode()));
   assert(MO.isGlobal());
   const auto *F = dyn_cast<Function>(MO.getGlobal());
   if (!F)
@@ -307,43 +61,8 @@ bool WebAssembly::mayThrow(const MachineInstr &MI) {
   if (F->getName() == CxaBeginCatchFn || F->getName() == PersonalityWrapperFn ||
       F->getName() == ClangCallTerminateFn || F->getName() == StdTerminateFn)
     return false;
-  return true;
-}
-
-bool WebAssembly::isCatchTerminatePad(const MachineBasicBlock &MBB) {
-  if (!MBB.isEHPad())
-    return false;
-  bool SeenCatch = false;
-  for (auto &MI : MBB) {
-    if (MI.getOpcode() == WebAssembly::CATCH_I32 ||
-        MI.getOpcode() == WebAssembly::CATCH_I64 ||
-        MI.getOpcode() == WebAssembly::CATCH_I32_S ||
-        MI.getOpcode() == WebAssembly::CATCH_I64_S)
-      SeenCatch = true;
-    if (SeenCatch && MI.isCall()) {
-      const MachineOperand &CalleeOp = MI.getOperand(getCalleeOpNo(MI));
-      if (CalleeOp.isGlobal() &&
-          CalleeOp.getGlobal()->getName() == ClangCallTerminateFn)
-        return true;
-    }
-  }
-  return false;
-}
 
-bool WebAssembly::isCatchAllTerminatePad(const MachineBasicBlock &MBB) {
-  if (!MBB.isEHPad())
-    return false;
-  bool SeenCatchAll = false;
-  for (auto &MI : MBB) {
-    if (MI.getOpcode() == WebAssembly::CATCH_ALL ||
-        MI.getOpcode() == WebAssembly::CATCH_ALL_S)
-      SeenCatchAll = true;
-    if (SeenCatchAll && MI.isCall()) {
-      const MachineOperand &CalleeOp = MI.getOperand(getCalleeOpNo(MI));
-      if (CalleeOp.isGlobal() &&
-          CalleeOp.getGlobal()->getName() == StdTerminateFn)
-        return true;
-    }
-  }
-  return false;
+  // TODO Can we exclude call instructions that are marked as 'nounwind' in the
+  // original LLVm IR? (Even when the callee may throw)
+  return true;
 }
diff --git a/lib/Target/WebAssembly/WebAssemblyUtilities.h b/lib/Target/WebAssembly/WebAssemblyUtilities.h
index cdb7873e9013..26cf84de89b9 100644
--- a/lib/Target/WebAssembly/WebAssemblyUtilities.h
+++ b/lib/Target/WebAssembly/WebAssemblyUtilities.h
@@ -1,9 +1,8 @@
 //===-- WebAssemblyUtilities - WebAssembly Utility Functions ---*- C++ -*-====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -24,29 +23,9 @@ class WebAssemblyFunctionInfo;
 
 namespace WebAssembly {
 
-bool isArgument(const MachineInstr &MI);
-bool isCopy(const MachineInstr &MI);
-bool isTee(const MachineInstr &MI);
 bool isChild(const MachineInstr &MI, const WebAssemblyFunctionInfo &MFI);
-bool isCallDirect(const MachineInstr &MI);
-bool isCallIndirect(const MachineInstr &MI);
-bool isMarker(const MachineInstr &MI);
-bool isThrow(const MachineInstr &MI);
-bool isRethrow(const MachineInstr &MI);
-bool isCatch(const MachineInstr &MI);
 bool mayThrow(const MachineInstr &MI);
 
-/// Returns the operand number of a callee, assuming the argument is a call
-/// instruction.
-unsigned getCalleeOpNo(const MachineInstr &MI);
-
-/// Returns if the given BB is a single BB terminate pad which starts with a
-/// 'catch' instruction.
-bool isCatchTerminatePad(const MachineBasicBlock &MBB);
-/// Returns if the given BB is a single BB terminate pad which starts with a
-/// 'catch_all' insrtruction.
-bool isCatchAllTerminatePad(const MachineBasicBlock &MBB);
-
 // Exception-related function names
 extern const char *const ClangCallTerminateFn;
 extern const char *const CxaBeginCatchFn;
diff --git a/lib/Target/WebAssembly/known_gcc_test_failures.txt b/lib/Target/WebAssembly/known_gcc_test_failures.txt
index 364c871f61b0..701b347bcbd7 100644
--- a/lib/Target/WebAssembly/known_gcc_test_failures.txt
+++ b/lib/Target/WebAssembly/known_gcc_test_failures.txt
@@ -6,21 +6,13 @@
 # error). The format is
 # <name> <attributes> # comment
 
-# Computed gotos are not supported (Cannot select BlockAddress/BRIND)
-20071220-1.c
+# blockaddress without an indirectbr still can't be supported
+20071220-1.c O2 # Relocation against a BB address
 20071220-2.c
-20040302-1.c
-20041214-1.c O0
-20071210-1.c
-920501-4.c
-920501-5.c
-comp-goto-1.c
-980526-1.c
 990208-1.c
 label13.C O0
 label13a.C O0
 label3.C
-pr42462.C O0
 
 # WebAssembly hasn't implemented (will never?) __builtin_return_address
 20010122-1.c
@@ -75,7 +67,6 @@ pr41935.c
 920501-3.c
 920728-1.c
 pr28865.c
-widechar-2.c
 attr-alias-1.C
 attr-alias-2.C
 attr-ifunc-1.C
@@ -86,7 +77,6 @@ complit12.C
 va-arg-pack-1.C
 va-arg-pack-len-1.C
 builtin-line1.C
-builtin-location.C
 devirt-6.C  # bad main signature
 devirt-13.C  # bad main signature
 devirt-14.C  # bad main signature
@@ -94,11 +84,22 @@ devirt-21.C  # bad main signature
 devirt-23.C  # bad main signature
 lifetime2.C  # violates C++ DR1696
 
+# WASI doesn't have stdjmp.h yet
+pr56982.c
+simd-2.C
+
+# WASI doesn't have pthread.h yet
+thread_local3.C
+thread_local3g.C
+thread_local4.C
+thread_local4g.C
+thread_local5.C
+thread_local5g.C
+
 # Untriaged C++ failures
 spec5.C
 addr1.C
 ef_test.C
-friend18.C
 member2.C
 new39.C
 new40.C
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
deleted file mode 100644
index 2c376fd062ca..000000000000
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ /dev/null
@@ -1,1089 +0,0 @@
-//===-- X86AsmInstrumentation.cpp - Instrument X86 inline assembly --------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86AsmInstrumentation.h"
-#include "MCTargetDesc/X86MCTargetDesc.h"
-#include "X86Operand.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDwarf.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstBuilder.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/MC/MCParser/MCTargetAsmParser.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCTargetOptions.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/SMLoc.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <limits>
-#include <memory>
-#include <vector>
-
-// Following comment describes how assembly instrumentation works.
-// Currently we have only AddressSanitizer instrumentation, but we're
-// planning to implement MemorySanitizer for inline assembly too. If
-// you're not familiar with AddressSanitizer algorithm, please, read
-// https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm
-//
-// When inline assembly is parsed by an instance of X86AsmParser, all
-// instructions are emitted via EmitInstruction method. That's the
-// place where X86AsmInstrumentation analyzes an instruction and
-// decides, whether the instruction should be emitted as is or
-// instrumentation is required. The latter case happens when an
-// instruction reads from or writes to memory. Now instruction opcode
-// is explicitly checked, and if an instruction has a memory operand
-// (for instance, movq (%rsi, %rcx, 8), %rax) - it should be
-// instrumented.  There're also exist instructions that modify
-// memory but don't have an explicit memory operands, for instance,
-// movs.
-//
-// Let's consider at first 8-byte memory accesses when an instruction
-// has an explicit memory operand. In this case we need two registers -
-// AddressReg to compute address of a memory cells which are accessed
-// and ShadowReg to compute corresponding shadow address. So, we need
-// to spill both registers before instrumentation code and restore them
-// after instrumentation. Thus, in general, instrumentation code will
-// look like this:
-// PUSHF  # Store flags, otherwise they will be overwritten
-// PUSH AddressReg  # spill AddressReg
-// PUSH ShadowReg   # spill ShadowReg
-// LEA MemOp, AddressReg  # compute address of the memory operand
-// MOV AddressReg, ShadowReg
-// SHR ShadowReg, 3
-// # ShadowOffset(AddressReg >> 3) contains address of a shadow
-// # corresponding to MemOp.
-// CMP ShadowOffset(ShadowReg), 0  # test shadow value
-// JZ .Done  # when shadow equals to zero, everything is fine
-// MOV AddressReg, RDI
-// # Call __asan_report function with AddressReg as an argument
-// CALL __asan_report
-// .Done:
-// POP ShadowReg  # Restore ShadowReg
-// POP AddressReg  # Restore AddressReg
-// POPF  # Restore flags
-//
-// Memory accesses with different size (1-, 2-, 4- and 16-byte) are
-// handled in a similar manner, but small memory accesses (less than 8
-// byte) require an additional ScratchReg, which is used for shadow value.
-//
-// If, suppose, we're instrumenting an instruction like movs, only
-// contents of RDI, RDI + AccessSize * RCX, RSI, RSI + AccessSize *
-// RCX are checked.  In this case there're no need to spill and restore
-// AddressReg , ShadowReg or flags four times, they're saved on stack
-// just once, before instrumentation of these four addresses, and restored
-// at the end of the instrumentation.
-//
-// There exist several things which complicate this simple algorithm.
-// * Instrumented memory operand can have RSP as a base or an index
-//   register.  So we need to add a constant offset before computation
-//   of memory address, since flags, AddressReg, ShadowReg, etc. were
-//   already stored on stack and RSP was modified.
-// * Debug info (usually, DWARF) should be adjusted, because sometimes
-//   RSP is used as a frame register. So, we need to select some
-//   register as a frame register and temprorary override current CFA
-//   register.
-
-using namespace llvm;
-
-static cl::opt<bool> ClAsanInstrumentAssembly(
-    "asan-instrument-assembly",
-    cl::desc("instrument assembly with AddressSanitizer checks"), cl::Hidden,
-    cl::init(false));
-
-static const int64_t MinAllowedDisplacement =
-    std::numeric_limits<int32_t>::min();
-static const int64_t MaxAllowedDisplacement =
-    std::numeric_limits<int32_t>::max();
-
-static int64_t ApplyDisplacementBounds(int64_t Displacement) {
-  return std::max(std::min(MaxAllowedDisplacement, Displacement),
-                  MinAllowedDisplacement);
-}
-
-static void CheckDisplacementBounds(int64_t Displacement) {
-  assert(Displacement >= MinAllowedDisplacement &&
-         Displacement <= MaxAllowedDisplacement);
-}
-
-static bool IsStackReg(unsigned Reg) {
-  return Reg == X86::RSP || Reg == X86::ESP;
-}
-
-static bool IsSmallMemAccess(unsigned AccessSize) { return AccessSize < 8; }
-
-namespace {
-
-class X86AddressSanitizer : public X86AsmInstrumentation {
-public:
-  struct RegisterContext {
-  private:
-    enum RegOffset {
-      REG_OFFSET_ADDRESS = 0,
-      REG_OFFSET_SHADOW,
-      REG_OFFSET_SCRATCH
-    };
-
-  public:
-    RegisterContext(unsigned AddressReg, unsigned ShadowReg,
-                    unsigned ScratchReg) {
-      BusyRegs.push_back(convReg(AddressReg, 64));
-      BusyRegs.push_back(convReg(ShadowReg, 64));
-      BusyRegs.push_back(convReg(ScratchReg, 64));
-    }
-
-    unsigned AddressReg(unsigned Size) const {
-      return convReg(BusyRegs[REG_OFFSET_ADDRESS], Size);
-    }
-
-    unsigned ShadowReg(unsigned Size) const {
-      return convReg(BusyRegs[REG_OFFSET_SHADOW], Size);
-    }
-
-    unsigned ScratchReg(unsigned Size) const {
-      return convReg(BusyRegs[REG_OFFSET_SCRATCH], Size);
-    }
-
-    void AddBusyReg(unsigned Reg) {
-      if (Reg != X86::NoRegister)
-        BusyRegs.push_back(convReg(Reg, 64));
-    }
-
-    void AddBusyRegs(const X86Operand &Op) {
-      AddBusyReg(Op.getMemBaseReg());
-      AddBusyReg(Op.getMemIndexReg());
-    }
-
-    unsigned ChooseFrameReg(unsigned Size) const {
-      static const MCPhysReg Candidates[] = { X86::RBP, X86::RAX, X86::RBX,
-                                              X86::RCX, X86::RDX, X86::RDI,
-                                              X86::RSI };
-      for (unsigned Reg : Candidates) {
-        if (!std::count(BusyRegs.begin(), BusyRegs.end(), Reg))
-          return convReg(Reg, Size);
-      }
-      return X86::NoRegister;
-    }
-
-  private:
-    unsigned convReg(unsigned Reg, unsigned Size) const {
-      return Reg == X86::NoRegister ? Reg : getX86SubSuperRegister(Reg, Size);
-    }
-
-    std::vector<unsigned> BusyRegs;
-  };
-
-  X86AddressSanitizer(const MCSubtargetInfo *&STI)
-      : X86AsmInstrumentation(STI), RepPrefix(false), OrigSPOffset(0) {}
-
-  ~X86AddressSanitizer() override = default;
-
-  // X86AsmInstrumentation implementation:
-  void InstrumentAndEmitInstruction(const MCInst &Inst, OperandVector &Operands,
-                                    MCContext &Ctx, const MCInstrInfo &MII,
-                                    MCStreamer &Out,
-                                    /* unused */ bool) override {
-    InstrumentMOVS(Inst, Operands, Ctx, MII, Out);
-    if (RepPrefix)
-      EmitInstruction(Out, MCInstBuilder(X86::REP_PREFIX));
-
-    InstrumentMOV(Inst, Operands, Ctx, MII, Out);
-
-    RepPrefix = (Inst.getOpcode() == X86::REP_PREFIX);
-    if (!RepPrefix)
-      EmitInstruction(Out, Inst);
-  }
-
-  // Adjusts up stack and saves all registers used in instrumentation.
-  virtual void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
-                                            MCContext &Ctx,
-                                            MCStreamer &Out) = 0;
-
-  // Restores all registers used in instrumentation and adjusts stack.
-  virtual void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
-                                            MCContext &Ctx,
-                                            MCStreamer &Out) = 0;
-
-  virtual void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
-                                         bool IsWrite,
-                                         const RegisterContext &RegCtx,
-                                         MCContext &Ctx, MCStreamer &Out) = 0;
-  virtual void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
-                                         bool IsWrite,
-                                         const RegisterContext &RegCtx,
-                                         MCContext &Ctx, MCStreamer &Out) = 0;
-
-  virtual void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
-                                  MCStreamer &Out) = 0;
-
-  void InstrumentMemOperand(X86Operand &Op, unsigned AccessSize, bool IsWrite,
-                            const RegisterContext &RegCtx, MCContext &Ctx,
-                            MCStreamer &Out);
-  void InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg, unsigned CntReg,
-                          unsigned AccessSize, MCContext &Ctx, MCStreamer &Out);
-
-  void InstrumentMOVS(const MCInst &Inst, OperandVector &Operands,
-                      MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out);
-  void InstrumentMOV(const MCInst &Inst, OperandVector &Operands,
-                     MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out);
-
-protected:
-  void EmitLabel(MCStreamer &Out, MCSymbol *Label) { Out.EmitLabel(Label); }
-
-  void EmitLEA(X86Operand &Op, unsigned Size, unsigned Reg, MCStreamer &Out) {
-    assert(Size == 32 || Size == 64);
-    MCInst Inst;
-    Inst.setOpcode(Size == 32 ? X86::LEA32r : X86::LEA64r);
-    Inst.addOperand(MCOperand::createReg(getX86SubSuperRegister(Reg, Size)));
-    Op.addMemOperands(Inst, 5);
-    EmitInstruction(Out, Inst);
-  }
-
-  void ComputeMemOperandAddress(X86Operand &Op, unsigned Size,
-                                unsigned Reg, MCContext &Ctx, MCStreamer &Out);
-
-  // Creates new memory operand with Displacement added to an original
-  // displacement. Residue will contain a residue which could happen when the
-  // total displacement exceeds 32-bit limitation.
-  std::unique_ptr<X86Operand> AddDisplacement(X86Operand &Op,
-                                              int64_t Displacement,
-                                              MCContext &Ctx, int64_t *Residue);
-
-  bool is64BitMode() const {
-    return STI->getFeatureBits()[X86::Mode64Bit];
-  }
-
-  bool is32BitMode() const {
-    return STI->getFeatureBits()[X86::Mode32Bit];
-  }
-
-  bool is16BitMode() const {
-    return STI->getFeatureBits()[X86::Mode16Bit];
-  }
-
-  unsigned getPointerWidth() {
-    if (is16BitMode()) return 16;
-    if (is32BitMode()) return 32;
-    if (is64BitMode()) return 64;
-    llvm_unreachable("invalid mode");
-  }
-
-  // True when previous instruction was actually REP prefix.
-  bool RepPrefix;
-
-  // Offset from the original SP register.
-  int64_t OrigSPOffset;
-};
-
-void X86AddressSanitizer::InstrumentMemOperand(
-    X86Operand &Op, unsigned AccessSize, bool IsWrite,
-    const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
-  assert(Op.isMem() && "Op should be a memory operand.");
-  assert((AccessSize & (AccessSize - 1)) == 0 && AccessSize <= 16 &&
-         "AccessSize should be a power of two, less or equal than 16.");
-  // FIXME: take into account load/store alignment.
-  if (IsSmallMemAccess(AccessSize))
-    InstrumentMemOperandSmall(Op, AccessSize, IsWrite, RegCtx, Ctx, Out);
-  else
-    InstrumentMemOperandLarge(Op, AccessSize, IsWrite, RegCtx, Ctx, Out);
-}
-
-void X86AddressSanitizer::InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg,
-                                             unsigned CntReg,
-                                             unsigned AccessSize,
-                                             MCContext &Ctx, MCStreamer &Out) {
-  // FIXME: check whole ranges [DstReg .. DstReg + AccessSize * (CntReg - 1)]
-  // and [SrcReg .. SrcReg + AccessSize * (CntReg - 1)].
-  RegisterContext RegCtx(X86::RDX /* AddressReg */, X86::RAX /* ShadowReg */,
-                         IsSmallMemAccess(AccessSize)
-                             ? X86::RBX
-                             : X86::NoRegister /* ScratchReg */);
-  RegCtx.AddBusyReg(DstReg);
-  RegCtx.AddBusyReg(SrcReg);
-  RegCtx.AddBusyReg(CntReg);
-
-  InstrumentMemOperandPrologue(RegCtx, Ctx, Out);
-
-  // Test (%SrcReg)
-  {
-    const MCExpr *Disp = MCConstantExpr::create(0, Ctx);
-    std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
-        getPointerWidth(), 0, Disp, SrcReg, 0, AccessSize, SMLoc(), SMLoc()));
-    InstrumentMemOperand(*Op, AccessSize, false /* IsWrite */, RegCtx, Ctx,
-                         Out);
-  }
-
-  // Test -1(%SrcReg, %CntReg, AccessSize)
-  {
-    const MCExpr *Disp = MCConstantExpr::create(-1, Ctx);
-    std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
-        getPointerWidth(), 0, Disp, SrcReg, CntReg, AccessSize, SMLoc(),
-        SMLoc()));
-    InstrumentMemOperand(*Op, AccessSize, false /* IsWrite */, RegCtx, Ctx,
-                         Out);
-  }
-
-  // Test (%DstReg)
-  {
-    const MCExpr *Disp = MCConstantExpr::create(0, Ctx);
-    std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
-        getPointerWidth(), 0, Disp, DstReg, 0, AccessSize, SMLoc(), SMLoc()));
-    InstrumentMemOperand(*Op, AccessSize, true /* IsWrite */, RegCtx, Ctx, Out);
-  }
-
-  // Test -1(%DstReg, %CntReg, AccessSize)
-  {
-    const MCExpr *Disp = MCConstantExpr::create(-1, Ctx);
-    std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
-        getPointerWidth(), 0, Disp, DstReg, CntReg, AccessSize, SMLoc(),
-        SMLoc()));
-    InstrumentMemOperand(*Op, AccessSize, true /* IsWrite */, RegCtx, Ctx, Out);
-  }
-
-  InstrumentMemOperandEpilogue(RegCtx, Ctx, Out);
-}
-
-void X86AddressSanitizer::InstrumentMOVS(const MCInst &Inst,
-                                         OperandVector &Operands,
-                                         MCContext &Ctx, const MCInstrInfo &MII,
-                                         MCStreamer &Out) {
-  // Access size in bytes.
-  unsigned AccessSize = 0;
-
-  switch (Inst.getOpcode()) {
-  case X86::MOVSB:
-    AccessSize = 1;
-    break;
-  case X86::MOVSW:
-    AccessSize = 2;
-    break;
-  case X86::MOVSL:
-    AccessSize = 4;
-    break;
-  case X86::MOVSQ:
-    AccessSize = 8;
-    break;
-  default:
-    return;
-  }
-
-  InstrumentMOVSImpl(AccessSize, Ctx, Out);
-}
-
-void X86AddressSanitizer::InstrumentMOV(const MCInst &Inst,
-                                        OperandVector &Operands, MCContext &Ctx,
-                                        const MCInstrInfo &MII,
-                                        MCStreamer &Out) {
-  // Access size in bytes.
-  unsigned AccessSize = 0;
-
-  switch (Inst.getOpcode()) {
-  case X86::MOV8mi:
-  case X86::MOV8mr:
-  case X86::MOV8rm:
-    AccessSize = 1;
-    break;
-  case X86::MOV16mi:
-  case X86::MOV16mr:
-  case X86::MOV16rm:
-    AccessSize = 2;
-    break;
-  case X86::MOV32mi:
-  case X86::MOV32mr:
-  case X86::MOV32rm:
-    AccessSize = 4;
-    break;
-  case X86::MOV64mi32:
-  case X86::MOV64mr:
-  case X86::MOV64rm:
-    AccessSize = 8;
-    break;
-  case X86::MOVAPDmr:
-  case X86::MOVAPSmr:
-  case X86::MOVAPDrm:
-  case X86::MOVAPSrm:
-    AccessSize = 16;
-    break;
-  default:
-    return;
-  }
-
-  const bool IsWrite = MII.get(Inst.getOpcode()).mayStore();
-
-  for (unsigned Ix = 0; Ix < Operands.size(); ++Ix) {
-    assert(Operands[Ix]);
-    MCParsedAsmOperand &Op = *Operands[Ix];
-    if (Op.isMem()) {
-      X86Operand &MemOp = static_cast<X86Operand &>(Op);
-      RegisterContext RegCtx(
-          X86::RDI /* AddressReg */, X86::RAX /* ShadowReg */,
-          IsSmallMemAccess(AccessSize) ? X86::RCX
-                                       : X86::NoRegister /* ScratchReg */);
-      RegCtx.AddBusyRegs(MemOp);
-      InstrumentMemOperandPrologue(RegCtx, Ctx, Out);
-      InstrumentMemOperand(MemOp, AccessSize, IsWrite, RegCtx, Ctx, Out);
-      InstrumentMemOperandEpilogue(RegCtx, Ctx, Out);
-    }
-  }
-}
-
-void X86AddressSanitizer::ComputeMemOperandAddress(X86Operand &Op,
-                                                   unsigned Size,
-                                                   unsigned Reg, MCContext &Ctx,
-                                                   MCStreamer &Out) {
-  int64_t Displacement = 0;
-  if (IsStackReg(Op.getMemBaseReg()))
-    Displacement -= OrigSPOffset;
-  if (IsStackReg(Op.getMemIndexReg()))
-    Displacement -= OrigSPOffset * Op.getMemScale();
-
-  assert(Displacement >= 0);
-
-  // Emit Op as is.
-  if (Displacement == 0) {
-    EmitLEA(Op, Size, Reg, Out);
-    return;
-  }
-
-  int64_t Residue;
-  std::unique_ptr<X86Operand> NewOp =
-      AddDisplacement(Op, Displacement, Ctx, &Residue);
-  EmitLEA(*NewOp, Size, Reg, Out);
-
-  while (Residue != 0) {
-    const MCConstantExpr *Disp =
-        MCConstantExpr::create(ApplyDisplacementBounds(Residue), Ctx);
-    std::unique_ptr<X86Operand> DispOp =
-        X86Operand::CreateMem(getPointerWidth(), 0, Disp, Reg, 0, 1, SMLoc(),
-                              SMLoc());
-    EmitLEA(*DispOp, Size, Reg, Out);
-    Residue -= Disp->getValue();
-  }
-}
-
-std::unique_ptr<X86Operand>
-X86AddressSanitizer::AddDisplacement(X86Operand &Op, int64_t Displacement,
-                                     MCContext &Ctx, int64_t *Residue) {
-  assert(Displacement >= 0);
-
-  if (Displacement == 0 ||
-      (Op.getMemDisp() && Op.getMemDisp()->getKind() != MCExpr::Constant)) {
-    *Residue = Displacement;
-    return X86Operand::CreateMem(Op.getMemModeSize(), Op.getMemSegReg(),
-                                 Op.getMemDisp(), Op.getMemBaseReg(),
-                                 Op.getMemIndexReg(), Op.getMemScale(),
-                                 SMLoc(), SMLoc());
-  }
-
-  int64_t OrigDisplacement =
-      static_cast<const MCConstantExpr *>(Op.getMemDisp())->getValue();
-  CheckDisplacementBounds(OrigDisplacement);
-  Displacement += OrigDisplacement;
-
-  int64_t NewDisplacement = ApplyDisplacementBounds(Displacement);
-  CheckDisplacementBounds(NewDisplacement);
-
-  *Residue = Displacement - NewDisplacement;
-  const MCExpr *Disp = MCConstantExpr::create(NewDisplacement, Ctx);
-  return X86Operand::CreateMem(Op.getMemModeSize(), Op.getMemSegReg(), Disp,
-                               Op.getMemBaseReg(), Op.getMemIndexReg(),
-                               Op.getMemScale(), SMLoc(), SMLoc());
-}
-
-class X86AddressSanitizer32 : public X86AddressSanitizer {
-public:
-  static const long kShadowOffset = 0x20000000;
-
-  X86AddressSanitizer32(const MCSubtargetInfo *&STI)
-      : X86AddressSanitizer(STI) {}
-
-  ~X86AddressSanitizer32() override = default;
-
-  unsigned GetFrameReg(const MCContext &Ctx, MCStreamer &Out) {
-    unsigned FrameReg = GetFrameRegGeneric(Ctx, Out);
-    if (FrameReg == X86::NoRegister)
-      return FrameReg;
-    return getX86SubSuperRegister(FrameReg, 32);
-  }
-
-  void SpillReg(MCStreamer &Out, unsigned Reg) {
-    EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(Reg));
-    OrigSPOffset -= 4;
-  }
-
-  void RestoreReg(MCStreamer &Out, unsigned Reg) {
-    EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(Reg));
-    OrigSPOffset += 4;
-  }
-
-  void StoreFlags(MCStreamer &Out) {
-    EmitInstruction(Out, MCInstBuilder(X86::PUSHF32));
-    OrigSPOffset -= 4;
-  }
-
-  void RestoreFlags(MCStreamer &Out) {
-    EmitInstruction(Out, MCInstBuilder(X86::POPF32));
-    OrigSPOffset += 4;
-  }
-
-  void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
-                                    MCContext &Ctx,
-                                    MCStreamer &Out) override {
-    unsigned LocalFrameReg = RegCtx.ChooseFrameReg(32);
-    assert(LocalFrameReg != X86::NoRegister);
-
-    const MCRegisterInfo *MRI = Ctx.getRegisterInfo();
-    unsigned FrameReg = GetFrameReg(Ctx, Out);
-    if (MRI && FrameReg != X86::NoRegister) {
-      SpillReg(Out, LocalFrameReg);
-      if (FrameReg == X86::ESP) {
-        Out.EmitCFIAdjustCfaOffset(4 /* byte size of the LocalFrameReg */);
-        Out.EmitCFIRelOffset(
-            MRI->getDwarfRegNum(LocalFrameReg, true /* IsEH */), 0);
-      }
-      EmitInstruction(
-          Out,
-          MCInstBuilder(X86::MOV32rr).addReg(LocalFrameReg).addReg(FrameReg));
-      Out.EmitCFIRememberState();
-      Out.EmitCFIDefCfaRegister(
-          MRI->getDwarfRegNum(LocalFrameReg, true /* IsEH */));
-    }
-
-    SpillReg(Out, RegCtx.AddressReg(32));
-    SpillReg(Out, RegCtx.ShadowReg(32));
-    if (RegCtx.ScratchReg(32) != X86::NoRegister)
-      SpillReg(Out, RegCtx.ScratchReg(32));
-    StoreFlags(Out);
-  }
-
-  void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
-                                    MCContext &Ctx,
-                                    MCStreamer &Out) override {
-    unsigned LocalFrameReg = RegCtx.ChooseFrameReg(32);
-    assert(LocalFrameReg != X86::NoRegister);
-
-    RestoreFlags(Out);
-    if (RegCtx.ScratchReg(32) != X86::NoRegister)
-      RestoreReg(Out, RegCtx.ScratchReg(32));
-    RestoreReg(Out, RegCtx.ShadowReg(32));
-    RestoreReg(Out, RegCtx.AddressReg(32));
-
-    unsigned FrameReg = GetFrameReg(Ctx, Out);
-    if (Ctx.getRegisterInfo() && FrameReg != X86::NoRegister) {
-      RestoreReg(Out, LocalFrameReg);
-      Out.EmitCFIRestoreState();
-      if (FrameReg == X86::ESP)
-        Out.EmitCFIAdjustCfaOffset(-4 /* byte size of the LocalFrameReg */);
-    }
-  }
-
-  void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
-                                 bool IsWrite,
-                                 const RegisterContext &RegCtx,
-                                 MCContext &Ctx,
-                                 MCStreamer &Out) override;
-  void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
-                                 bool IsWrite,
-                                 const RegisterContext &RegCtx,
-                                 MCContext &Ctx,
-                                 MCStreamer &Out) override;
-  void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
-                          MCStreamer &Out) override;
-
-private:
-  void EmitCallAsanReport(unsigned AccessSize, bool IsWrite, MCContext &Ctx,
-                          MCStreamer &Out, const RegisterContext &RegCtx) {
-    EmitInstruction(Out, MCInstBuilder(X86::CLD));
-    EmitInstruction(Out, MCInstBuilder(X86::MMX_EMMS));
-
-    EmitInstruction(Out, MCInstBuilder(X86::AND32ri8)
-                             .addReg(X86::ESP)
-                             .addReg(X86::ESP)
-                             .addImm(-16));
-    EmitInstruction(
-        Out, MCInstBuilder(X86::PUSH32r).addReg(RegCtx.AddressReg(32)));
-
-    MCSymbol *FnSym = Ctx.getOrCreateSymbol(Twine("__asan_report_") +
-                                            (IsWrite ? "store" : "load") +
-                                            Twine(AccessSize));
-    const MCSymbolRefExpr *FnExpr =
-        MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
-    EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FnExpr));
-  }
-};
-
-void X86AddressSanitizer32::InstrumentMemOperandSmall(
-    X86Operand &Op, unsigned AccessSize, bool IsWrite,
-    const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
-  unsigned AddressRegI32 = RegCtx.AddressReg(32);
-  unsigned ShadowRegI32 = RegCtx.ShadowReg(32);
-  unsigned ShadowRegI8 = RegCtx.ShadowReg(8);
-
-  assert(RegCtx.ScratchReg(32) != X86::NoRegister);
-  unsigned ScratchRegI32 = RegCtx.ScratchReg(32);
-
-  ComputeMemOperandAddress(Op, 32, AddressRegI32, Ctx, Out);
-
-  EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ShadowRegI32).addReg(
-                           AddressRegI32));
-  EmitInstruction(Out, MCInstBuilder(X86::SHR32ri)
-                           .addReg(ShadowRegI32)
-                           .addReg(ShadowRegI32)
-                           .addImm(3));
-
-  {
-    MCInst Inst;
-    Inst.setOpcode(X86::MOV8rm);
-    Inst.addOperand(MCOperand::createReg(ShadowRegI8));
-    const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx);
-    std::unique_ptr<X86Operand> Op(
-        X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI32, 0, 1,
-                              SMLoc(), SMLoc()));
-    Op->addMemOperands(Inst, 5);
-    EmitInstruction(Out, Inst);
-  }
-
-  EmitInstruction(
-      Out, MCInstBuilder(X86::TEST8rr).addReg(ShadowRegI8).addReg(ShadowRegI8));
-  MCSymbol *DoneSym = Ctx.createTempSymbol();
-  const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
-  EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
-
-  EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ScratchRegI32).addReg(
-                           AddressRegI32));
-  EmitInstruction(Out, MCInstBuilder(X86::AND32ri)
-                           .addReg(ScratchRegI32)
-                           .addReg(ScratchRegI32)
-                           .addImm(7));
-
-  switch (AccessSize) {
-  default: llvm_unreachable("Incorrect access size");
-  case 1:
-    break;
-  case 2: {
-    const MCExpr *Disp = MCConstantExpr::create(1, Ctx);
-    std::unique_ptr<X86Operand> Op(
-        X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1,
-                              SMLoc(), SMLoc()));
-    EmitLEA(*Op, 32, ScratchRegI32, Out);
-    break;
-  }
-  case 4:
-    EmitInstruction(Out, MCInstBuilder(X86::ADD32ri8)
-                             .addReg(ScratchRegI32)
-                             .addReg(ScratchRegI32)
-                             .addImm(3));
-    break;
-  }
-
-  EmitInstruction(
-      Out,
-      MCInstBuilder(X86::MOVSX32rr8).addReg(ShadowRegI32).addReg(ShadowRegI8));
-  EmitInstruction(Out, MCInstBuilder(X86::CMP32rr).addReg(ScratchRegI32).addReg(
-                           ShadowRegI32));
-  EmitInstruction(Out, MCInstBuilder(X86::JL_1).addExpr(DoneExpr));
-
-  EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx);
-  EmitLabel(Out, DoneSym);
-}
-
-void X86AddressSanitizer32::InstrumentMemOperandLarge(
-    X86Operand &Op, unsigned AccessSize, bool IsWrite,
-    const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
-  unsigned AddressRegI32 = RegCtx.AddressReg(32);
-  unsigned ShadowRegI32 = RegCtx.ShadowReg(32);
-
-  ComputeMemOperandAddress(Op, 32, AddressRegI32, Ctx, Out);
-
-  EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ShadowRegI32).addReg(
-                           AddressRegI32));
-  EmitInstruction(Out, MCInstBuilder(X86::SHR32ri)
-                           .addReg(ShadowRegI32)
-                           .addReg(ShadowRegI32)
-                           .addImm(3));
-  {
-    MCInst Inst;
-    switch (AccessSize) {
-    default: llvm_unreachable("Incorrect access size");
-    case 8:
-      Inst.setOpcode(X86::CMP8mi);
-      break;
-    case 16:
-      Inst.setOpcode(X86::CMP16mi);
-      break;
-    }
-    const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx);
-    std::unique_ptr<X86Operand> Op(
-        X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI32, 0, 1,
-                              SMLoc(), SMLoc()));
-    Op->addMemOperands(Inst, 5);
-    Inst.addOperand(MCOperand::createImm(0));
-    EmitInstruction(Out, Inst);
-  }
-  MCSymbol *DoneSym = Ctx.createTempSymbol();
-  const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
-  EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
-
-  EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx);
-  EmitLabel(Out, DoneSym);
-}
-
-void X86AddressSanitizer32::InstrumentMOVSImpl(unsigned AccessSize,
-                                               MCContext &Ctx,
-                                               MCStreamer &Out) {
-  StoreFlags(Out);
-
-  // No need to test when ECX is equals to zero.
-  MCSymbol *DoneSym = Ctx.createTempSymbol();
-  const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
-  EmitInstruction(
-      Out, MCInstBuilder(X86::TEST32rr).addReg(X86::ECX).addReg(X86::ECX));
-  EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
-
-  // Instrument first and last elements in src and dst range.
-  InstrumentMOVSBase(X86::EDI /* DstReg */, X86::ESI /* SrcReg */,
-                     X86::ECX /* CntReg */, AccessSize, Ctx, Out);
-
-  EmitLabel(Out, DoneSym);
-  RestoreFlags(Out);
-}
-
-class X86AddressSanitizer64 : public X86AddressSanitizer {
-public:
-  static const long kShadowOffset = 0x7fff8000;
-
-  X86AddressSanitizer64(const MCSubtargetInfo *&STI)
-      : X86AddressSanitizer(STI) {}
-
-  ~X86AddressSanitizer64() override = default;
-
-  unsigned GetFrameReg(const MCContext &Ctx, MCStreamer &Out) {
-    unsigned FrameReg = GetFrameRegGeneric(Ctx, Out);
-    if (FrameReg == X86::NoRegister)
-      return FrameReg;
-    return getX86SubSuperRegister(FrameReg, 64);
-  }
-
-  void SpillReg(MCStreamer &Out, unsigned Reg) {
-    EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(Reg));
-    OrigSPOffset -= 8;
-  }
-
-  void RestoreReg(MCStreamer &Out, unsigned Reg) {
-    EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(Reg));
-    OrigSPOffset += 8;
-  }
-
-  void StoreFlags(MCStreamer &Out) {
-    EmitInstruction(Out, MCInstBuilder(X86::PUSHF64));
-    OrigSPOffset -= 8;
-  }
-
-  void RestoreFlags(MCStreamer &Out) {
-    EmitInstruction(Out, MCInstBuilder(X86::POPF64));
-    OrigSPOffset += 8;
-  }
-
-  void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
-                                    MCContext &Ctx,
-                                    MCStreamer &Out) override {
-    unsigned LocalFrameReg = RegCtx.ChooseFrameReg(64);
-    assert(LocalFrameReg != X86::NoRegister);
-
-    const MCRegisterInfo *MRI = Ctx.getRegisterInfo();
-    unsigned FrameReg = GetFrameReg(Ctx, Out);
-    if (MRI && FrameReg != X86::NoRegister) {
-      SpillReg(Out, X86::RBP);
-      if (FrameReg == X86::RSP) {
-        Out.EmitCFIAdjustCfaOffset(8 /* byte size of the LocalFrameReg */);
-        Out.EmitCFIRelOffset(
-            MRI->getDwarfRegNum(LocalFrameReg, true /* IsEH */), 0);
-      }
-      EmitInstruction(
-          Out,
-          MCInstBuilder(X86::MOV64rr).addReg(LocalFrameReg).addReg(FrameReg));
-      Out.EmitCFIRememberState();
-      Out.EmitCFIDefCfaRegister(
-          MRI->getDwarfRegNum(LocalFrameReg, true /* IsEH */));
-    }
-
-    EmitAdjustRSP(Ctx, Out, -128);
-    SpillReg(Out, RegCtx.ShadowReg(64));
-    SpillReg(Out, RegCtx.AddressReg(64));
-    if (RegCtx.ScratchReg(64) != X86::NoRegister)
-      SpillReg(Out, RegCtx.ScratchReg(64));
-    StoreFlags(Out);
-  }
-
-  void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
-                                    MCContext &Ctx,
-                                    MCStreamer &Out) override {
-    unsigned LocalFrameReg = RegCtx.ChooseFrameReg(64);
-    assert(LocalFrameReg != X86::NoRegister);
-
-    RestoreFlags(Out);
-    if (RegCtx.ScratchReg(64) != X86::NoRegister)
-      RestoreReg(Out, RegCtx.ScratchReg(64));
-    RestoreReg(Out, RegCtx.AddressReg(64));
-    RestoreReg(Out, RegCtx.ShadowReg(64));
-    EmitAdjustRSP(Ctx, Out, 128);
-
-    unsigned FrameReg = GetFrameReg(Ctx, Out);
-    if (Ctx.getRegisterInfo() && FrameReg != X86::NoRegister) {
-      RestoreReg(Out, LocalFrameReg);
-      Out.EmitCFIRestoreState();
-      if (FrameReg == X86::RSP)
-        Out.EmitCFIAdjustCfaOffset(-8 /* byte size of the LocalFrameReg */);
-    }
-  }
-
-  void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
-                                 bool IsWrite,
-                                 const RegisterContext &RegCtx,
-                                 MCContext &Ctx,
-                                 MCStreamer &Out) override;
-  void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
-                                 bool IsWrite,
-                                 const RegisterContext &RegCtx,
-                                 MCContext &Ctx,
-                                 MCStreamer &Out) override;
-  void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
-                          MCStreamer &Out) override;
-
-private:
-  void EmitAdjustRSP(MCContext &Ctx, MCStreamer &Out, long Offset) {
-    const MCExpr *Disp = MCConstantExpr::create(Offset, Ctx);
-    std::unique_ptr<X86Operand> Op(
-        X86Operand::CreateMem(getPointerWidth(), 0, Disp, X86::RSP, 0, 1,
-                              SMLoc(), SMLoc()));
-    EmitLEA(*Op, 64, X86::RSP, Out);
-    OrigSPOffset += Offset;
-  }
-
-  void EmitCallAsanReport(unsigned AccessSize, bool IsWrite, MCContext &Ctx,
-                          MCStreamer &Out, const RegisterContext &RegCtx) {
-    EmitInstruction(Out, MCInstBuilder(X86::CLD));
-    EmitInstruction(Out, MCInstBuilder(X86::MMX_EMMS));
-
-    EmitInstruction(Out, MCInstBuilder(X86::AND64ri8)
-                             .addReg(X86::RSP)
-                             .addReg(X86::RSP)
-                             .addImm(-16));
-
-    if (RegCtx.AddressReg(64) != X86::RDI) {
-      EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(X86::RDI).addReg(
-                               RegCtx.AddressReg(64)));
-    }
-    MCSymbol *FnSym = Ctx.getOrCreateSymbol(Twine("__asan_report_") +
-                                            (IsWrite ? "store" : "load") +
-                                            Twine(AccessSize));
-    const MCSymbolRefExpr *FnExpr =
-        MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
-    EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FnExpr));
-  }
-};
-
-} // end anonymous namespace
-
-void X86AddressSanitizer64::InstrumentMemOperandSmall(
-    X86Operand &Op, unsigned AccessSize, bool IsWrite,
-    const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
-  unsigned AddressRegI64 = RegCtx.AddressReg(64);
-  unsigned AddressRegI32 = RegCtx.AddressReg(32);
-  unsigned ShadowRegI64 = RegCtx.ShadowReg(64);
-  unsigned ShadowRegI32 = RegCtx.ShadowReg(32);
-  unsigned ShadowRegI8 = RegCtx.ShadowReg(8);
-
-  assert(RegCtx.ScratchReg(32) != X86::NoRegister);
-  unsigned ScratchRegI32 = RegCtx.ScratchReg(32);
-
-  ComputeMemOperandAddress(Op, 64, AddressRegI64, Ctx, Out);
-
-  EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(ShadowRegI64).addReg(
-                           AddressRegI64));
-  EmitInstruction(Out, MCInstBuilder(X86::SHR64ri)
-                           .addReg(ShadowRegI64)
-                           .addReg(ShadowRegI64)
-                           .addImm(3));
-  {
-    MCInst Inst;
-    Inst.setOpcode(X86::MOV8rm);
-    Inst.addOperand(MCOperand::createReg(ShadowRegI8));
-    const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx);
-    std::unique_ptr<X86Operand> Op(
-        X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI64, 0, 1,
-                              SMLoc(), SMLoc()));
-    Op->addMemOperands(Inst, 5);
-    EmitInstruction(Out, Inst);
-  }
-
-  EmitInstruction(
-      Out, MCInstBuilder(X86::TEST8rr).addReg(ShadowRegI8).addReg(ShadowRegI8));
-  MCSymbol *DoneSym = Ctx.createTempSymbol();
-  const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
-  EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
-
-  EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ScratchRegI32).addReg(
-                           AddressRegI32));
-  EmitInstruction(Out, MCInstBuilder(X86::AND32ri)
-                           .addReg(ScratchRegI32)
-                           .addReg(ScratchRegI32)
-                           .addImm(7));
-
-  switch (AccessSize) {
-  default: llvm_unreachable("Incorrect access size");
-  case 1:
-    break;
-  case 2: {
-    const MCExpr *Disp = MCConstantExpr::create(1, Ctx);
-    std::unique_ptr<X86Operand> Op(
-        X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1,
-                              SMLoc(), SMLoc()));
-    EmitLEA(*Op, 32, ScratchRegI32, Out);
-    break;
-  }
-  case 4:
-    EmitInstruction(Out, MCInstBuilder(X86::ADD32ri8)
-                             .addReg(ScratchRegI32)
-                             .addReg(ScratchRegI32)
-                             .addImm(3));
-    break;
-  }
-
-  EmitInstruction(
-      Out,
-      MCInstBuilder(X86::MOVSX32rr8).addReg(ShadowRegI32).addReg(ShadowRegI8));
-  EmitInstruction(Out, MCInstBuilder(X86::CMP32rr).addReg(ScratchRegI32).addReg(
-                           ShadowRegI32));
-  EmitInstruction(Out, MCInstBuilder(X86::JL_1).addExpr(DoneExpr));
-
-  EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx);
-  EmitLabel(Out, DoneSym);
-}
-
-void X86AddressSanitizer64::InstrumentMemOperandLarge(
-    X86Operand &Op, unsigned AccessSize, bool IsWrite,
-    const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
-  unsigned AddressRegI64 = RegCtx.AddressReg(64);
-  unsigned ShadowRegI64 = RegCtx.ShadowReg(64);
-
-  ComputeMemOperandAddress(Op, 64, AddressRegI64, Ctx, Out);
-
-  EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(ShadowRegI64).addReg(
-                           AddressRegI64));
-  EmitInstruction(Out, MCInstBuilder(X86::SHR64ri)
-                           .addReg(ShadowRegI64)
-                           .addReg(ShadowRegI64)
-                           .addImm(3));
-  {
-    MCInst Inst;
-    switch (AccessSize) {
-    default: llvm_unreachable("Incorrect access size");
-    case 8:
-      Inst.setOpcode(X86::CMP8mi);
-      break;
-    case 16:
-      Inst.setOpcode(X86::CMP16mi);
-      break;
-    }
-    const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx);
-    std::unique_ptr<X86Operand> Op(
-        X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI64, 0, 1,
-                              SMLoc(), SMLoc()));
-    Op->addMemOperands(Inst, 5);
-    Inst.addOperand(MCOperand::createImm(0));
-    EmitInstruction(Out, Inst);
-  }
-
-  MCSymbol *DoneSym = Ctx.createTempSymbol();
-  const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
-  EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
-
-  EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx);
-  EmitLabel(Out, DoneSym);
-}
-
-void X86AddressSanitizer64::InstrumentMOVSImpl(unsigned AccessSize,
-                                               MCContext &Ctx,
-                                               MCStreamer &Out) {
-  StoreFlags(Out);
-
-  // No need to test when RCX is equals to zero.
-  MCSymbol *DoneSym = Ctx.createTempSymbol();
-  const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
-  EmitInstruction(
-      Out, MCInstBuilder(X86::TEST64rr).addReg(X86::RCX).addReg(X86::RCX));
-  EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
-
-  // Instrument first and last elements in src and dst range.
-  InstrumentMOVSBase(X86::RDI /* DstReg */, X86::RSI /* SrcReg */,
-                     X86::RCX /* CntReg */, AccessSize, Ctx, Out);
-
-  EmitLabel(Out, DoneSym);
-  RestoreFlags(Out);
-}
-
-X86AsmInstrumentation::X86AsmInstrumentation(const MCSubtargetInfo *&STI)
-    : STI(STI) {}
-
-X86AsmInstrumentation::~X86AsmInstrumentation() = default;
-
-void X86AsmInstrumentation::InstrumentAndEmitInstruction(
-    const MCInst &Inst, OperandVector &Operands, MCContext &Ctx,
-    const MCInstrInfo &MII, MCStreamer &Out, bool PrintSchedInfoEnabled) {
-  EmitInstruction(Out, Inst, PrintSchedInfoEnabled);
-}
-
-void X86AsmInstrumentation::EmitInstruction(MCStreamer &Out, const MCInst &Inst,
-                                            bool PrintSchedInfoEnabled) {
-  Out.EmitInstruction(Inst, *STI, PrintSchedInfoEnabled);
-}
-
-unsigned X86AsmInstrumentation::GetFrameRegGeneric(const MCContext &Ctx,
-                                                   MCStreamer &Out) {
-  if (!Out.getNumFrameInfos()) // No active dwarf frame
-    return X86::NoRegister;
-  const MCDwarfFrameInfo &Frame = Out.getDwarfFrameInfos().back();
-  if (Frame.End) // Active dwarf frame is closed
-    return X86::NoRegister;
-  const MCRegisterInfo *MRI = Ctx.getRegisterInfo();
-  if (!MRI) // No register info
-    return X86::NoRegister;
-
-  if (InitialFrameReg) {
-    // FrameReg is set explicitly, we're instrumenting a MachineFunction.
-    return InitialFrameReg;
-  }
-
-  return MRI->getLLVMRegNum(Frame.CurrentCfaRegister, true /* IsEH */);
-}
-
-X86AsmInstrumentation *
-llvm::CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
-                                  const MCContext &Ctx,
-                                  const MCSubtargetInfo *&STI) {
-  Triple T(STI->getTargetTriple());
-  const bool hasCompilerRTSupport = T.isOSLinux();
-  if (ClAsanInstrumentAssembly && hasCompilerRTSupport &&
-      MCOptions.SanitizeAddress) {
-    if (STI->getFeatureBits()[X86::Mode32Bit] != 0)
-      return new X86AddressSanitizer32(STI);
-    if (STI->getFeatureBits()[X86::Mode64Bit] != 0)
-      return new X86AddressSanitizer64(STI);
-  }
-  return new X86AsmInstrumentation(STI);
-}
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
deleted file mode 100644
index 42a9dc3ba26a..000000000000
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
+++ /dev/null
@@ -1,68 +0,0 @@
-//===- X86AsmInstrumentation.h - Instrument X86 inline assembly -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_X86_ASMPARSER_X86ASMINSTRUMENTATION_H
-#define LLVM_LIB_TARGET_X86_ASMPARSER_X86ASMINSTRUMENTATION_H
-
-#include "llvm/ADT/SmallVector.h"
-#include <memory>
-
-namespace llvm {
-
-class MCContext;
-class MCInst;
-class MCInstrInfo;
-class MCParsedAsmOperand;
-class MCStreamer;
-class MCSubtargetInfo;
-class MCTargetOptions;
-class X86AsmInstrumentation;
-
-X86AsmInstrumentation *
-CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
-                            const MCContext &Ctx,
-                            const MCSubtargetInfo *&STI);
-
-class X86AsmInstrumentation {
-public:
-  virtual ~X86AsmInstrumentation();
-
-  // Sets frame register corresponding to a current frame.
-  void SetInitialFrameRegister(unsigned RegNo) {
-    InitialFrameReg = RegNo;
-  }
-
-  // Tries to instrument and emit instruction.
-  virtual void InstrumentAndEmitInstruction(
-      const MCInst &Inst,
-      SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>> &Operands,
-      MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out,
-      bool PrintSchedInfoEnabled);
-
-protected:
-  friend X86AsmInstrumentation *
-  CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
-                              const MCContext &Ctx,
-                              const MCSubtargetInfo *&STI);
-
-  X86AsmInstrumentation(const MCSubtargetInfo *&STI);
-
-  unsigned GetFrameRegGeneric(const MCContext &Ctx, MCStreamer &Out);
-
-  void EmitInstruction(MCStreamer &Out, const MCInst &Inst,
-                       bool PrintSchedInfoEnabled = false);
-
-  const MCSubtargetInfo *&STI;
-
-  unsigned InitialFrameReg = 0;
-};
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_X86_ASMPARSER_X86ASMINSTRUMENTATION_H
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 899b50d0f78f..95cbf46d37ed 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1,17 +1,16 @@
 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstPrinter/X86IntelInstPrinter.h"
 #include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86IntelInstPrinter.h"
 #include "MCTargetDesc/X86MCExpr.h"
 #include "MCTargetDesc/X86TargetStreamer.h"
-#include "X86AsmInstrumentation.h"
+#include "TargetInfo/X86TargetInfo.h"
 #include "X86AsmParserCommon.h"
 #include "X86Operand.h"
 #include "llvm/ADT/STLExtras.h"
@@ -71,9 +70,17 @@ static const char OpPrecedence[] = {
 
 class X86AsmParser : public MCTargetAsmParser {
   ParseInstructionInfo *InstInfo;
-  std::unique_ptr<X86AsmInstrumentation> Instrumentation;
   bool Code16GCC;
 
+  enum VEXEncoding {
+    VEXEncoding_Default,
+    VEXEncoding_VEX2,
+    VEXEncoding_VEX3,
+    VEXEncoding_EVEX,
+  };
+
+  VEXEncoding ForcedVEXEncoding = VEXEncoding_Default;
+
 private:
   SMLoc consumeToken() {
     MCAsmParser &Parser = getParser();
@@ -90,13 +97,14 @@ private:
   }
 
   unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
-                            uint64_t &ErrorInfo, bool matchingInlineAsm,
-                            unsigned VariantID = 0) {
+                            uint64_t &ErrorInfo, FeatureBitset &MissingFeatures,
+                            bool matchingInlineAsm, unsigned VariantID = 0) {
     // In Code16GCC mode, match as 32-bit.
     if (Code16GCC)
       SwitchMode(X86::Mode32Bit);
     unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
-                                       matchingInlineAsm, VariantID);
+                                       MissingFeatures, matchingInlineAsm,
+                                       VariantID);
     if (Code16GCC)
       SwitchMode(X86::Mode16Bit);
     return rv;
@@ -840,6 +848,8 @@ private:
                                               const SMLoc &StartLoc,
                                               SMLoc &EndLoc);
 
+  X86::CondCode ParseConditionCode(StringRef CCode);
+
   bool ParseIntelMemoryOperandSize(unsigned &Size);
   std::unique_ptr<X86Operand>
   CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
@@ -860,6 +870,8 @@ private:
   bool parseDirectiveFPOEndProc(SMLoc L);
   bool parseDirectiveFPOData(SMLoc L);
 
+  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
+
   bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
   bool processInstruction(MCInst &Inst, const OperandVector &Ops);
 
@@ -875,7 +887,7 @@ private:
   void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
                          MCStreamer &Out, bool MatchingInlineAsm);
 
-  bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
+  bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures,
                            bool MatchingInlineAsm);
 
   bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -914,7 +926,7 @@ private:
     MCSubtargetInfo &STI = copySTI();
     FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
     FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
-    uint64_t FB = ComputeAvailableFeatures(
+    FeatureBitset FB = ComputeAvailableFeatures(
       STI.ToggleFeature(OldMode.flip(mode)));
     setAvailableFeatures(FB);
 
@@ -941,6 +953,9 @@ private:
   /// }
 
 public:
+  enum X86MatchResultTy {
+    Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY,
+  };
 
   X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
                const MCInstrInfo &mii, const MCTargetOptions &Options)
@@ -951,14 +966,10 @@ public:
 
     // Initialize the set of available features.
     setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
-    Instrumentation.reset(
-        CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
   }
 
   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
 
-  void SetFrameRegister(unsigned RegNo) override;
-
   bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
 
   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
@@ -1115,8 +1126,7 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
   }
 
   // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
-  if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
-    RegNo = X86::ST0;
+  if (RegNo == X86::ST0) {
     Parser.Lex(); // Eat 'st'
 
     // Check to see if we have '(4)' after %st.
@@ -1194,10 +1204,6 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
   return false;
 }
 
-void X86AsmParser::SetFrameRegister(unsigned RegNo) {
-  Instrumentation->SetInitialFrameRegister(RegNo);
-}
-
 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
   bool Parse32 = is32BitMode() || Code16GCC;
   unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
@@ -1656,6 +1662,8 @@ X86AsmParser::ParseRoundingModeOp(SMLoc Start) {
   const AsmToken &Tok = Parser.getTok();
   // Eat "{" and mark the current place.
   const SMLoc consumedToken = consumeToken();
+  if (Tok.isNot(AsmToken::Identifier))
+    return ErrorOperand(Tok.getLoc(), "Expected an identifier after {");
   if (Tok.getIdentifier().startswith("r")){
     int rndMode = StringSwitch<int>(Tok.getIdentifier())
       .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
@@ -1999,6 +2007,29 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
   }
 }
 
+// X86::COND_INVALID if not a recognized condition code or alternate mnemonic,
+// otherwise the EFLAGS Condition Code enumerator.
+X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {
+  return StringSwitch<X86::CondCode>(CC)
+      .Case("o", X86::COND_O)          // Overflow
+      .Case("no", X86::COND_NO)        // No Overflow
+      .Cases("b", "nae", X86::COND_B)  // Below/Neither Above nor Equal
+      .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below
+      .Cases("e", "z", X86::COND_E)    // Equal/Zero
+      .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero
+      .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above
+      .Cases("a", "nbe", X86::COND_A)  // Above/Neither Below nor Equal
+      .Case("s", X86::COND_S)          // Sign
+      .Case("ns", X86::COND_NS)        // No Sign
+      .Cases("p", "pe", X86::COND_P)   // Parity/Parity Even
+      .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd
+      .Cases("l", "nge", X86::COND_L)  // Less/Neither Greater nor Equal
+      .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less
+      .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater
+      .Cases("g", "nle", X86::COND_G)  // Greater/Neither Less nor Equal
+      .Default(X86::COND_INVALID);
+}
+
 // true on failure, false otherwise
 // If no {z} mark was found - Parser doesn't advance
 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
@@ -2305,18 +2336,64 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
                                     SMLoc NameLoc, OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   InstInfo = &Info;
+
+  // Reset the forced VEX encoding.
+  ForcedVEXEncoding = VEXEncoding_Default;
+
+  // Parse pseudo prefixes.
+  while (1) {
+    if (Name == "{") {
+      if (getLexer().isNot(AsmToken::Identifier))
+        return Error(Parser.getTok().getLoc(), "Unexpected token after '{'");
+      std::string Prefix = Parser.getTok().getString().lower();
+      Parser.Lex(); // Eat identifier.
+      if (getLexer().isNot(AsmToken::RCurly))
+        return Error(Parser.getTok().getLoc(), "Expected '}'");
+      Parser.Lex(); // Eat curly.
+
+      if (Prefix == "vex2")
+        ForcedVEXEncoding = VEXEncoding_VEX2;
+      else if (Prefix == "vex3")
+        ForcedVEXEncoding = VEXEncoding_VEX3;
+      else if (Prefix == "evex")
+        ForcedVEXEncoding = VEXEncoding_EVEX;
+      else
+        return Error(NameLoc, "unknown prefix");
+
+      NameLoc = Parser.getTok().getLoc();
+      if (getLexer().is(AsmToken::LCurly)) {
+        Parser.Lex();
+        Name = "{";
+      } else {
+        if (getLexer().isNot(AsmToken::Identifier))
+          return Error(Parser.getTok().getLoc(), "Expected identifier");
+        // FIXME: The mnemonic won't match correctly if its not in lower case.
+        Name = Parser.getTok().getString();
+        Parser.Lex();
+      }
+      continue;
+    }
+
+    break;
+  }
+
   StringRef PatchedName = Name;
 
-  if ((Name.equals("jmp") || Name.equals("jc") || Name.equals("jz")) &&
-      isParsingIntelSyntax() && isParsingInlineAsm()) {
+  // Hack to skip "short" following Jcc.
+  if (isParsingIntelSyntax() &&
+      (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" ||
+       PatchedName == "jcxz" || PatchedName == "jexcz" ||
+       (PatchedName.startswith("j") &&
+        ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) {
     StringRef NextTok = Parser.getTok().getString();
     if (NextTok == "short") {
       SMLoc NameEndLoc =
           NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
-      // Eat the short keyword
+      // Eat the short keyword.
       Parser.Lex();
-      // MS ignores the short keyword, it determines the jmp type based
-      // on the distance of the label
+      // MS and GAS ignore the short keyword; they both determine the jmp type
+      // based on the distance of the label. (NASM does emit different code with
+      // and without "short," though.)
       InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
                                           NextTok.size() + 1);
     }
@@ -2327,13 +2404,15 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
       PatchedName != "setb" && PatchedName != "setnb")
     PatchedName = PatchedName.substr(0, Name.size()-1);
 
+  unsigned ComparisonPredicate = ~0U;
+
   // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
   if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
       (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
        PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
     bool IsVCMP = PatchedName[0] == 'v';
     unsigned CCIdx = IsVCMP ? 4 : 3;
-    unsigned ComparisonCode = StringSwitch<unsigned>(
+    unsigned CC = StringSwitch<unsigned>(
       PatchedName.slice(CCIdx, PatchedName.size() - 2))
       .Case("eq",       0x00)
       .Case("eq_oq",    0x00)
@@ -2383,26 +2462,29 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
       .Case("gt_oq",    0x1E)
       .Case("true_us",  0x1F)
       .Default(~0U);
-    if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
-
-      Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
-                                                 NameLoc));
-
-      const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
-                                                   getParser().getContext());
-      Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
+    if (CC != ~0U && (IsVCMP || CC < 8)) {
+      if (PatchedName.endswith("ss"))
+        PatchedName = IsVCMP ? "vcmpss" : "cmpss";
+      else if (PatchedName.endswith("sd"))
+        PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
+      else if (PatchedName.endswith("ps"))
+        PatchedName = IsVCMP ? "vcmpps" : "cmpps";
+      else if (PatchedName.endswith("pd"))
+        PatchedName = IsVCMP ? "vcmppd" : "cmppd";
+      else
+        llvm_unreachable("Unexpected suffix!");
 
-      PatchedName = PatchedName.substr(PatchedName.size() - 2);
+      ComparisonPredicate = CC;
     }
   }
 
   // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
   if (PatchedName.startswith("vpcmp") &&
-      (PatchedName.endswith("b") || PatchedName.endswith("w") ||
-       PatchedName.endswith("d") || PatchedName.endswith("q"))) {
-    unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
-    unsigned ComparisonCode = StringSwitch<unsigned>(
-      PatchedName.slice(5, PatchedName.size() - CCIdx))
+      (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
+       PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
+    unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
+    unsigned CC = StringSwitch<unsigned>(
+      PatchedName.slice(5, PatchedName.size() - SuffixSize))
       .Case("eq",    0x0) // Only allowed on unsigned. Checked below.
       .Case("lt",    0x1)
       .Case("le",    0x2)
@@ -2412,24 +2494,26 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
       .Case("nle",   0x6)
       //.Case("true",  0x7) // Not a documented alias.
       .Default(~0U);
-    if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
-      Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
-
-      const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
-                                                   getParser().getContext());
-      Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
-
-      PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
+    if (CC != ~0U && (CC != 0 || SuffixSize == 2)) {
+      switch (PatchedName.back()) {
+      default: llvm_unreachable("Unexpected character!");
+      case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break;
+      case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break;
+      case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break;
+      case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break;
+      }
+      // Set up the immediate to push into the operands later.
+      ComparisonPredicate = CC;
     }
   }
 
   // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
   if (PatchedName.startswith("vpcom") &&
-      (PatchedName.endswith("b") || PatchedName.endswith("w") ||
-       PatchedName.endswith("d") || PatchedName.endswith("q"))) {
-    unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
-    unsigned ComparisonCode = StringSwitch<unsigned>(
-      PatchedName.slice(5, PatchedName.size() - CCIdx))
+      (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
+       PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
+    unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
+    unsigned CC = StringSwitch<unsigned>(
+      PatchedName.slice(5, PatchedName.size() - SuffixSize))
       .Case("lt",    0x0)
       .Case("le",    0x1)
       .Case("gt",    0x2)
@@ -2439,14 +2523,16 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
       .Case("false", 0x6)
       .Case("true",  0x7)
       .Default(~0U);
-    if (ComparisonCode != ~0U) {
-      Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
-
-      const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
-                                                   getParser().getContext());
-      Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
-
-      PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
+    if (CC != ~0U) {
+      switch (PatchedName.back()) {
+      default: llvm_unreachable("Unexpected character!");
+      case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break;
+      case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break;
+      case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break;
+      case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break;
+      }
+      // Set up the immediate to push into the operands later.
+      ComparisonPredicate = CC;
     }
   }
 
@@ -2489,6 +2575,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
       Flags = X86::IP_NO_PREFIX;
       break;
     }
+    // FIXME: The mnemonic won't match correctly if its not in lower case.
     Name = Parser.getTok().getString();
     Parser.Lex(); // eat the prefix
     // Hack: we could have something like "rep # some comment" or
@@ -2496,6 +2583,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
     while (Name.startswith(";") || Name.startswith("\n") ||
            Name.startswith("#") || Name.startswith("\t") ||
            Name.startswith("/")) {
+      // FIXME: The mnemonic won't match correctly if its not in lower case.
       Name = Parser.getTok().getString();
       Parser.Lex(); // go to next prefix or instr
     }
@@ -2519,6 +2607,13 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
 
   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
 
+  // Push the immediate if we extracted one from the mnemonic.
+  if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) {
+    const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
+                                                 getParser().getContext());
+    Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
+  }
+
   // This does the actual operand parsing.  Don't parse any more if we have a
   // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
   // just want to parse the "lock" as the first instruction and the "incl" as
@@ -2553,6 +2648,13 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
       return TokError("unexpected token in argument list");
   }
 
+  // Push the immediate if we extracted one from the mnemonic.
+  if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) {
+    const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
+                                                 getParser().getContext());
+    Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
+  }
+
   // Consume the EndOfStatement or the prefix separator Slash
   if (getLexer().is(AsmToken::EndOfStatement) ||
       (isPrefix && getLexer().is(AsmToken::Slash)))
@@ -2576,13 +2678,13 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
     static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
   }
 
-  // Moving a 32 or 16 bit value into a segment register has the same
-  // behavior. Modify such instructions to always take shorter form.
   if ((Name == "mov" || Name == "movw" || Name == "movl") &&
       (Operands.size() == 3)) {
     X86Operand &Op1 = (X86Operand &)*Operands[1];
     X86Operand &Op2 = (X86Operand &)*Operands[2];
     SMLoc Loc = Op1.getEndLoc();
+    // Moving a 32 or 16 bit value into a segment register has the same
+    // behavior. Modify such instructions to always take shorter form.
     if (Op1.isReg() && Op2.isReg() &&
         X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
             Op2.getReg()) &&
@@ -2759,7 +2861,69 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
 }
 
 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
-  return false;
+  const MCRegisterInfo *MRI = getContext().getRegisterInfo();
+
+  switch (Inst.getOpcode()) {
+  default: return false;
+  case X86::VMOVZPQILo2PQIrr:
+  case X86::VMOVAPDrr:
+  case X86::VMOVAPDYrr:
+  case X86::VMOVAPSrr:
+  case X86::VMOVAPSYrr:
+  case X86::VMOVDQArr:
+  case X86::VMOVDQAYrr:
+  case X86::VMOVDQUrr:
+  case X86::VMOVDQUYrr:
+  case X86::VMOVUPDrr:
+  case X86::VMOVUPDYrr:
+  case X86::VMOVUPSrr:
+  case X86::VMOVUPSYrr: {
+    // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
+    // the registers is extended, but other isn't.
+    if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
+        MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
+        MRI->getEncodingValue(Inst.getOperand(1).getReg()) < 8)
+      return false;
+
+    unsigned NewOpc;
+    switch (Inst.getOpcode()) {
+    default: llvm_unreachable("Invalid opcode");
+    case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr;   break;
+    case X86::VMOVAPDrr:        NewOpc = X86::VMOVAPDrr_REV;  break;
+    case X86::VMOVAPDYrr:       NewOpc = X86::VMOVAPDYrr_REV; break;
+    case X86::VMOVAPSrr:        NewOpc = X86::VMOVAPSrr_REV;  break;
+    case X86::VMOVAPSYrr:       NewOpc = X86::VMOVAPSYrr_REV; break;
+    case X86::VMOVDQArr:        NewOpc = X86::VMOVDQArr_REV;  break;
+    case X86::VMOVDQAYrr:       NewOpc = X86::VMOVDQAYrr_REV; break;
+    case X86::VMOVDQUrr:        NewOpc = X86::VMOVDQUrr_REV;  break;
+    case X86::VMOVDQUYrr:       NewOpc = X86::VMOVDQUYrr_REV; break;
+    case X86::VMOVUPDrr:        NewOpc = X86::VMOVUPDrr_REV;  break;
+    case X86::VMOVUPDYrr:       NewOpc = X86::VMOVUPDYrr_REV; break;
+    case X86::VMOVUPSrr:        NewOpc = X86::VMOVUPSrr_REV;  break;
+    case X86::VMOVUPSYrr:       NewOpc = X86::VMOVUPSYrr_REV; break;
+    }
+    Inst.setOpcode(NewOpc);
+    return true;
+  }
+  case X86::VMOVSDrr:
+  case X86::VMOVSSrr: {
+    // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
+    // the registers is extended, but other isn't.
+    if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
+        MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
+        MRI->getEncodingValue(Inst.getOperand(2).getReg()) < 8)
+      return false;
+
+    unsigned NewOpc;
+    switch (Inst.getOpcode()) {
+    default: llvm_unreachable("Invalid opcode");
+    case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
+    case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
+    }
+    Inst.setOpcode(NewOpc);
+    return true;
+  }
+  }
 }
 
 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
@@ -2865,9 +3029,7 @@ static const char *getSubtargetFeatureName(uint64_t Val);
 
 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
                                    MCStreamer &Out) {
-  Instrumentation->InstrumentAndEmitInstruction(
-      Inst, Operands, getContext(), MII, Out,
-      getParser().shouldPrintSchedInfo());
+  Out.EmitInstruction(Inst, getSTI());
 }
 
 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -2907,17 +3069,16 @@ void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
   }
 }
 
-bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
+bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc,
+                                       const FeatureBitset &MissingFeatures,
                                        bool MatchingInlineAsm) {
-  assert(ErrorInfo && "Unknown missing feature!");
+  assert(MissingFeatures.any() && "Unknown missing feature!");
   SmallString<126> Msg;
   raw_svector_ostream OS(Msg);
   OS << "instruction requires:";
-  uint64_t Mask = 1;
-  for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
-    if (ErrorInfo & Mask)
-      OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
-    Mask <<= 1;
+  for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
+    if (MissingFeatures[i])
+      OS << ' ' << getSubtargetFeatureName(i);
   }
   return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
 }
@@ -2932,30 +3093,70 @@ static unsigned getPrefixes(OperandVector &Operands) {
   return Result;
 }
 
+unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
+  unsigned Opc = Inst.getOpcode();
+  const MCInstrDesc &MCID = MII.get(Opc);
+
+  if (ForcedVEXEncoding == VEXEncoding_EVEX &&
+      (MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX)
+    return Match_Unsupported;
+
+  if ((ForcedVEXEncoding == VEXEncoding_VEX2 ||
+       ForcedVEXEncoding == VEXEncoding_VEX3) &&
+      (MCID.TSFlags & X86II::EncodingMask) != X86II::VEX)
+    return Match_Unsupported;
+
+  // These instructions match ambiguously with their VEX encoded counterparts
+  // and appear first in the matching table. Reject them unless we're forcing
+  // EVEX encoding.
+  // FIXME: We really need a way to break the ambiguity.
+  switch (Opc) {
+  case X86::VCVTSD2SIZrm_Int:
+  case X86::VCVTSD2SI64Zrm_Int:
+  case X86::VCVTSS2SIZrm_Int:
+  case X86::VCVTSS2SI64Zrm_Int:
+  case X86::VCVTTSD2SIZrm:   case X86::VCVTTSD2SIZrm_Int:
+  case X86::VCVTTSD2SI64Zrm: case X86::VCVTTSD2SI64Zrm_Int:
+  case X86::VCVTTSS2SIZrm:   case X86::VCVTTSS2SIZrm_Int:
+  case X86::VCVTTSS2SI64Zrm: case X86::VCVTTSS2SI64Zrm_Int:
+    if (ForcedVEXEncoding != VEXEncoding_EVEX)
+      return Match_Unsupported;
+  }
+
+  return Match_Success;
+}
+
 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
                                               OperandVector &Operands,
                                               MCStreamer &Out,
                                               uint64_t &ErrorInfo,
                                               bool MatchingInlineAsm) {
   assert(!Operands.empty() && "Unexpect empty operand list!");
-  X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
-  assert(Op.isToken() && "Leading operand should always be a mnemonic!");
+  assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
   SMRange EmptyRange = None;
 
   // First, handle aliases that expand to multiple instructions.
-  MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
-
-  bool WasOriginallyInvalidOperand = false;
+  MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands,
+                    Out, MatchingInlineAsm);
+  X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
   unsigned Prefixes = getPrefixes(Operands);
 
   MCInst Inst;
 
+  // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
+  // encoder.
+  if (ForcedVEXEncoding == VEXEncoding_VEX3)
+    Prefixes |= X86::IP_USE_VEX3;
+
   if (Prefixes)
     Inst.setFlags(Prefixes);
 
   // First, try a direct match.
-  switch (MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm,
-                           isParsingIntelSyntax())) {
+  FeatureBitset MissingFeatures;
+  unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo,
+                                            MissingFeatures, MatchingInlineAsm,
+                                            isParsingIntelSyntax());
+  switch (OriginalError) {
   default: llvm_unreachable("Unexpected match result!");
   case Match_Success:
     if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
@@ -2973,13 +3174,17 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
     Opcode = Inst.getOpcode();
     return false;
   case Match_MissingFeature:
-    return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
+    return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm);
   case Match_InvalidOperand:
-    WasOriginallyInvalidOperand = true;
-    break;
   case Match_MnemonicFail:
+  case Match_Unsupported:
     break;
   }
+  if (Op.getToken().empty()) {
+    Error(IDLoc, "instruction must have size higher than 0", EmptyRange,
+          MatchingInlineAsm);
+    return true;
+  }
 
   // FIXME: Ideally, we would only attempt suffix matches for things which are
   // valid prefixes, and we could just infer the right unambiguous
@@ -3003,16 +3208,17 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
 
   // Check for the various suffix matches.
   uint64_t ErrorInfoIgnore;
-  uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
+  FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings.
   unsigned Match[4];
 
   for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
     Tmp.back() = Suffixes[I];
     Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
-                                MatchingInlineAsm, isParsingIntelSyntax());
+                                MissingFeatures, MatchingInlineAsm,
+                                isParsingIntelSyntax());
     // If this returned as a missing feature failure, remember that.
     if (Match[I] == Match_MissingFeature)
-      ErrorInfoMissingFeature = ErrorInfoIgnore;
+      ErrorInfoMissingFeatures = MissingFeatures;
   }
 
   // Restore the old token.
@@ -3062,11 +3268,15 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
   // If all of the instructions reported an invalid mnemonic, then the original
   // mnemonic was invalid.
   if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
-    if (!WasOriginallyInvalidOperand) {
+    if (OriginalError == Match_MnemonicFail)
       return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
                    Op.getLocRange(), MatchingInlineAsm);
-    }
 
+    if (OriginalError == Match_Unsupported)
+      return Error(IDLoc, "unsupported instruction", EmptyRange,
+                   MatchingInlineAsm);
+
+    assert(OriginalError == Match_InvalidOperand && "Unexpected error");
     // Recover location info for the operand if we know which was the problem.
     if (ErrorInfo != ~0ULL) {
       if (ErrorInfo >= Operands.size())
@@ -3085,12 +3295,19 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
                  MatchingInlineAsm);
   }
 
+  // If one instruction matched as unsupported, report this as unsupported.
+  if (std::count(std::begin(Match), std::end(Match),
+                 Match_Unsupported) == 1) {
+    return Error(IDLoc, "unsupported instruction", EmptyRange,
+                 MatchingInlineAsm);
+  }
+
   // If one instruction matched with a missing feature, report this as a
   // missing feature.
   if (std::count(std::begin(Match), std::end(Match),
                  Match_MissingFeature) == 1) {
-    ErrorInfo = ErrorInfoMissingFeature;
-    return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
+    ErrorInfo = Match_MissingFeature;
+    return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
                                MatchingInlineAsm);
   }
 
@@ -3114,18 +3331,23 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
                                                 uint64_t &ErrorInfo,
                                                 bool MatchingInlineAsm) {
   assert(!Operands.empty() && "Unexpect empty operand list!");
-  X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
-  assert(Op.isToken() && "Leading operand should always be a mnemonic!");
-  StringRef Mnemonic = Op.getToken();
+  assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
+  StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken();
   SMRange EmptyRange = None;
-  StringRef Base = Op.getToken();
+  StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken();
   unsigned Prefixes = getPrefixes(Operands);
 
   // First, handle aliases that expand to multiple instructions.
-  MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
+  MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, Out, MatchingInlineAsm);
+  X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
 
   MCInst Inst;
 
+  // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
+  // encoder.
+  if (ForcedVEXEncoding == VEXEncoding_VEX3)
+    Prefixes |= X86::IP_USE_VEX3;
+
   if (Prefixes)
     Inst.setFlags(Prefixes);
 
@@ -3154,7 +3376,8 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
   }
 
   SmallVector<unsigned, 8> Match;
-  uint64_t ErrorInfoMissingFeature = 0;
+  FeatureBitset ErrorInfoMissingFeatures;
+  FeatureBitset MissingFeatures;
 
   // If unsized push has immediate operand we should default the default pointer
   // size for the size.
@@ -3174,7 +3397,7 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
         Op.setTokenValue(Tmp);
         // Do match in ATT mode to allow explicit suffix usage.
         Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
-                                         MatchingInlineAsm,
+                                         MissingFeatures, MatchingInlineAsm,
                                          false /*isParsingIntelSyntax()*/));
         Op.setTokenValue(Base);
       }
@@ -3191,13 +3414,14 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
       uint64_t ErrorInfoIgnore;
       unsigned LastOpcode = Inst.getOpcode();
       unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
-                                    MatchingInlineAsm, isParsingIntelSyntax());
+                                    MissingFeatures, MatchingInlineAsm,
+                                    isParsingIntelSyntax());
       if (Match.empty() || LastOpcode != Inst.getOpcode())
         Match.push_back(M);
 
       // If this returned as a missing feature failure, remember that.
       if (Match.back() == Match_MissingFeature)
-        ErrorInfoMissingFeature = ErrorInfoIgnore;
+        ErrorInfoMissingFeatures = MissingFeatures;
     }
 
     // Restore the size of the unsized memory operand if we modified it.
@@ -3209,10 +3433,11 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
   // matching with the unsized operand.
   if (Match.empty()) {
     Match.push_back(MatchInstruction(
-        Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax()));
+        Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
+        isParsingIntelSyntax()));
     // If this returned as a missing feature failure, remember that.
     if (Match.back() == Match_MissingFeature)
-      ErrorInfoMissingFeature = ErrorInfo;
+      ErrorInfoMissingFeatures = MissingFeatures;
   }
 
   // Restore the size of the unsized memory operand if we modified it.
@@ -3234,7 +3459,8 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
       UnsizedMemOp->getMemFrontendSize()) {
     UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
     unsigned M = MatchInstruction(
-        Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax());
+        Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
+        isParsingIntelSyntax());
     if (M == Match_Success)
       NumSuccessfulMatches = 1;
 
@@ -3270,12 +3496,19 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
                  UnsizedMemOp->getLocRange());
   }
 
+  // If one instruction matched as unsupported, report this as unsupported.
+  if (std::count(std::begin(Match), std::end(Match),
+                 Match_Unsupported) == 1) {
+    return Error(IDLoc, "unsupported instruction", EmptyRange,
+                 MatchingInlineAsm);
+  }
+
   // If one instruction matched with a missing feature, report this as a
   // missing feature.
   if (std::count(std::begin(Match), std::end(Match),
                  Match_MissingFeature) == 1) {
-    ErrorInfo = ErrorInfoMissingFeature;
-    return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
+    ErrorInfo = Match_MissingFeature;
+    return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
                                MatchingInlineAsm);
   }
 
diff --git a/lib/Target/X86/AsmParser/X86AsmParserCommon.h b/lib/Target/X86/AsmParser/X86AsmParserCommon.h
index c45a3f14ef11..5bc979d1f18c 100644
--- a/lib/Target/X86/AsmParser/X86AsmParserCommon.h
+++ b/lib/Target/X86/AsmParser/X86AsmParserCommon.h
@@ -1,9 +1,8 @@
 //===-- X86AsmParserCommon.h - Common functions for X86AsmParser ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h
index 4d4aae0a1c6a..a771ba366318 100644
--- a/lib/Target/X86/AsmParser/X86Operand.h
+++ b/lib/Target/X86/AsmParser/X86Operand.h
@@ -1,16 +1,15 @@
 //===- X86Operand.h - Parsed X86 machine instruction ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_LIB_TARGET_X86_ASMPARSER_X86OPERAND_H
 #define LLVM_LIB_TARGET_X86_ASMPARSER_X86OPERAND_H
 
-#include "InstPrinter/X86IntelInstPrinter.h"
+#include "MCTargetDesc/X86IntelInstPrinter.h"
 #include "MCTargetDesc/X86MCTargetDesc.h"
 #include "X86AsmParserCommon.h"
 #include "llvm/ADT/STLExtras.h"
@@ -452,6 +451,31 @@ struct X86Operand final : public MCParsedAsmOperand {
       X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg()));
   }
 
+  bool isVK1Pair() const {
+    return Kind == Register &&
+      X86MCRegisterClasses[X86::VK1RegClassID].contains(getReg());
+  }
+
+  bool isVK2Pair() const {
+    return Kind == Register &&
+      X86MCRegisterClasses[X86::VK2RegClassID].contains(getReg());
+  }
+
+  bool isVK4Pair() const {
+    return Kind == Register &&
+      X86MCRegisterClasses[X86::VK4RegClassID].contains(getReg());
+  }
+
+  bool isVK8Pair() const {
+    return Kind == Register &&
+      X86MCRegisterClasses[X86::VK8RegClassID].contains(getReg());
+  }
+
+  bool isVK16Pair() const {
+    return Kind == Register &&
+      X86MCRegisterClasses[X86::VK16RegClassID].contains(getReg());
+  }
+
   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
     // Add as immediates when possible.
     if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
@@ -483,6 +507,30 @@ struct X86Operand final : public MCParsedAsmOperand {
     addExpr(Inst, getImm());
   }
 
+  void addMaskPairOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    unsigned Reg = getReg();
+    switch (Reg) {
+    case X86::K0:
+    case X86::K1:
+      Reg = X86::K0_K1;
+      break;
+    case X86::K2:
+    case X86::K3:
+      Reg = X86::K2_K3;
+      break;
+    case X86::K4:
+    case X86::K5:
+      Reg = X86::K4_K5;
+      break;
+    case X86::K6:
+    case X86::K7:
+      Reg = X86::K6_K7;
+      break;
+    }
+    Inst.addOperand(MCOperand::createReg(Reg));
+  }
+
   void addMemOperands(MCInst &Inst, unsigned N) const {
     assert((N == 5) && "Invalid number of operands!");
     Inst.addOperand(MCOperand::createReg(getMemBaseReg()));
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 62312777318e..9a635bbe5f85 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -1,9 +1,8 @@
 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -76,6 +75,7 @@
 
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "MCTargetDesc/X86MCTargetDesc.h"
+#include "TargetInfo/X86TargetInfo.h"
 #include "X86DisassemblerDecoder.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -446,211 +446,6 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
     case ENCODING_IO:
       break;
     }
-  } else if (type == TYPE_IMM3) {
-    // Check for immediates that printSSECC can't handle.
-    if (immediate >= 8) {
-      unsigned NewOpc;
-      switch (mcInst.getOpcode()) {
-      default: llvm_unreachable("unexpected opcode");
-      case X86::CMPPDrmi:  NewOpc = X86::CMPPDrmi_alt;  break;
-      case X86::CMPPDrri:  NewOpc = X86::CMPPDrri_alt;  break;
-      case X86::CMPPSrmi:  NewOpc = X86::CMPPSrmi_alt;  break;
-      case X86::CMPPSrri:  NewOpc = X86::CMPPSrri_alt;  break;
-      case X86::CMPSDrm:   NewOpc = X86::CMPSDrm_alt;   break;
-      case X86::CMPSDrr:   NewOpc = X86::CMPSDrr_alt;   break;
-      case X86::CMPSSrm:   NewOpc = X86::CMPSSrm_alt;   break;
-      case X86::CMPSSrr:   NewOpc = X86::CMPSSrr_alt;   break;
-      case X86::VPCOMBri:  NewOpc = X86::VPCOMBri_alt;  break;
-      case X86::VPCOMBmi:  NewOpc = X86::VPCOMBmi_alt;  break;
-      case X86::VPCOMWri:  NewOpc = X86::VPCOMWri_alt;  break;
-      case X86::VPCOMWmi:  NewOpc = X86::VPCOMWmi_alt;  break;
-      case X86::VPCOMDri:  NewOpc = X86::VPCOMDri_alt;  break;
-      case X86::VPCOMDmi:  NewOpc = X86::VPCOMDmi_alt;  break;
-      case X86::VPCOMQri:  NewOpc = X86::VPCOMQri_alt;  break;
-      case X86::VPCOMQmi:  NewOpc = X86::VPCOMQmi_alt;  break;
-      case X86::VPCOMUBri: NewOpc = X86::VPCOMUBri_alt; break;
-      case X86::VPCOMUBmi: NewOpc = X86::VPCOMUBmi_alt; break;
-      case X86::VPCOMUWri: NewOpc = X86::VPCOMUWri_alt; break;
-      case X86::VPCOMUWmi: NewOpc = X86::VPCOMUWmi_alt; break;
-      case X86::VPCOMUDri: NewOpc = X86::VPCOMUDri_alt; break;
-      case X86::VPCOMUDmi: NewOpc = X86::VPCOMUDmi_alt; break;
-      case X86::VPCOMUQri: NewOpc = X86::VPCOMUQri_alt; break;
-      case X86::VPCOMUQmi: NewOpc = X86::VPCOMUQmi_alt; break;
-      }
-      // Switch opcode to the one that doesn't get special printing.
-      mcInst.setOpcode(NewOpc);
-    }
-  } else if (type == TYPE_IMM5) {
-    // Check for immediates that printAVXCC can't handle.
-    if (immediate >= 32) {
-      unsigned NewOpc;
-      switch (mcInst.getOpcode()) {
-      default: llvm_unreachable("unexpected opcode");
-      case X86::VCMPPDrmi:   NewOpc = X86::VCMPPDrmi_alt;   break;
-      case X86::VCMPPDrri:   NewOpc = X86::VCMPPDrri_alt;   break;
-      case X86::VCMPPSrmi:   NewOpc = X86::VCMPPSrmi_alt;   break;
-      case X86::VCMPPSrri:   NewOpc = X86::VCMPPSrri_alt;   break;
-      case X86::VCMPSDrm:    NewOpc = X86::VCMPSDrm_alt;    break;
-      case X86::VCMPSDrr:    NewOpc = X86::VCMPSDrr_alt;    break;
-      case X86::VCMPSSrm:    NewOpc = X86::VCMPSSrm_alt;    break;
-      case X86::VCMPSSrr:    NewOpc = X86::VCMPSSrr_alt;    break;
-      case X86::VCMPPDYrmi:  NewOpc = X86::VCMPPDYrmi_alt;  break;
-      case X86::VCMPPDYrri:  NewOpc = X86::VCMPPDYrri_alt;  break;
-      case X86::VCMPPSYrmi:  NewOpc = X86::VCMPPSYrmi_alt;  break;
-      case X86::VCMPPSYrri:  NewOpc = X86::VCMPPSYrri_alt;  break;
-      case X86::VCMPPDZrmi:  NewOpc = X86::VCMPPDZrmi_alt;  break;
-      case X86::VCMPPDZrri:  NewOpc = X86::VCMPPDZrri_alt;  break;
-      case X86::VCMPPDZrrib: NewOpc = X86::VCMPPDZrrib_alt; break;
-      case X86::VCMPPSZrmi:  NewOpc = X86::VCMPPSZrmi_alt;  break;
-      case X86::VCMPPSZrri:  NewOpc = X86::VCMPPSZrri_alt;  break;
-      case X86::VCMPPSZrrib: NewOpc = X86::VCMPPSZrrib_alt; break;
-      case X86::VCMPPDZ128rmi:  NewOpc = X86::VCMPPDZ128rmi_alt;  break;
-      case X86::VCMPPDZ128rri:  NewOpc = X86::VCMPPDZ128rri_alt;  break;
-      case X86::VCMPPSZ128rmi:  NewOpc = X86::VCMPPSZ128rmi_alt;  break;
-      case X86::VCMPPSZ128rri:  NewOpc = X86::VCMPPSZ128rri_alt;  break;
-      case X86::VCMPPDZ256rmi:  NewOpc = X86::VCMPPDZ256rmi_alt;  break;
-      case X86::VCMPPDZ256rri:  NewOpc = X86::VCMPPDZ256rri_alt;  break;
-      case X86::VCMPPSZ256rmi:  NewOpc = X86::VCMPPSZ256rmi_alt;  break;
-      case X86::VCMPPSZ256rri:  NewOpc = X86::VCMPPSZ256rri_alt;  break;
-      case X86::VCMPSDZrm_Int:  NewOpc = X86::VCMPSDZrmi_alt;  break;
-      case X86::VCMPSDZrr_Int:  NewOpc = X86::VCMPSDZrri_alt;  break;
-      case X86::VCMPSDZrrb_Int: NewOpc = X86::VCMPSDZrrb_alt;  break;
-      case X86::VCMPSSZrm_Int:  NewOpc = X86::VCMPSSZrmi_alt;  break;
-      case X86::VCMPSSZrr_Int:  NewOpc = X86::VCMPSSZrri_alt;  break;
-      case X86::VCMPSSZrrb_Int: NewOpc = X86::VCMPSSZrrb_alt;  break;
-      }
-      // Switch opcode to the one that doesn't get special printing.
-      mcInst.setOpcode(NewOpc);
-    }
-  } else if (type == TYPE_AVX512ICC) {
-    if (immediate >= 8 || ((immediate & 0x3) == 3)) {
-      unsigned NewOpc;
-      switch (mcInst.getOpcode()) {
-      default: llvm_unreachable("unexpected opcode");
-      case X86::VPCMPBZ128rmi:    NewOpc = X86::VPCMPBZ128rmi_alt;    break;
-      case X86::VPCMPBZ128rmik:   NewOpc = X86::VPCMPBZ128rmik_alt;   break;
-      case X86::VPCMPBZ128rri:    NewOpc = X86::VPCMPBZ128rri_alt;    break;
-      case X86::VPCMPBZ128rrik:   NewOpc = X86::VPCMPBZ128rrik_alt;   break;
-      case X86::VPCMPBZ256rmi:    NewOpc = X86::VPCMPBZ256rmi_alt;    break;
-      case X86::VPCMPBZ256rmik:   NewOpc = X86::VPCMPBZ256rmik_alt;   break;
-      case X86::VPCMPBZ256rri:    NewOpc = X86::VPCMPBZ256rri_alt;    break;
-      case X86::VPCMPBZ256rrik:   NewOpc = X86::VPCMPBZ256rrik_alt;   break;
-      case X86::VPCMPBZrmi:       NewOpc = X86::VPCMPBZrmi_alt;       break;
-      case X86::VPCMPBZrmik:      NewOpc = X86::VPCMPBZrmik_alt;      break;
-      case X86::VPCMPBZrri:       NewOpc = X86::VPCMPBZrri_alt;       break;
-      case X86::VPCMPBZrrik:      NewOpc = X86::VPCMPBZrrik_alt;      break;
-      case X86::VPCMPDZ128rmi:    NewOpc = X86::VPCMPDZ128rmi_alt;    break;
-      case X86::VPCMPDZ128rmib:   NewOpc = X86::VPCMPDZ128rmib_alt;   break;
-      case X86::VPCMPDZ128rmibk:  NewOpc = X86::VPCMPDZ128rmibk_alt;  break;
-      case X86::VPCMPDZ128rmik:   NewOpc = X86::VPCMPDZ128rmik_alt;   break;
-      case X86::VPCMPDZ128rri:    NewOpc = X86::VPCMPDZ128rri_alt;    break;
-      case X86::VPCMPDZ128rrik:   NewOpc = X86::VPCMPDZ128rrik_alt;   break;
-      case X86::VPCMPDZ256rmi:    NewOpc = X86::VPCMPDZ256rmi_alt;    break;
-      case X86::VPCMPDZ256rmib:   NewOpc = X86::VPCMPDZ256rmib_alt;   break;
-      case X86::VPCMPDZ256rmibk:  NewOpc = X86::VPCMPDZ256rmibk_alt;  break;
-      case X86::VPCMPDZ256rmik:   NewOpc = X86::VPCMPDZ256rmik_alt;   break;
-      case X86::VPCMPDZ256rri:    NewOpc = X86::VPCMPDZ256rri_alt;    break;
-      case X86::VPCMPDZ256rrik:   NewOpc = X86::VPCMPDZ256rrik_alt;   break;
-      case X86::VPCMPDZrmi:       NewOpc = X86::VPCMPDZrmi_alt;       break;
-      case X86::VPCMPDZrmib:      NewOpc = X86::VPCMPDZrmib_alt;      break;
-      case X86::VPCMPDZrmibk:     NewOpc = X86::VPCMPDZrmibk_alt;     break;
-      case X86::VPCMPDZrmik:      NewOpc = X86::VPCMPDZrmik_alt;      break;
-      case X86::VPCMPDZrri:       NewOpc = X86::VPCMPDZrri_alt;       break;
-      case X86::VPCMPDZrrik:      NewOpc = X86::VPCMPDZrrik_alt;      break;
-      case X86::VPCMPQZ128rmi:    NewOpc = X86::VPCMPQZ128rmi_alt;    break;
-      case X86::VPCMPQZ128rmib:   NewOpc = X86::VPCMPQZ128rmib_alt;   break;
-      case X86::VPCMPQZ128rmibk:  NewOpc = X86::VPCMPQZ128rmibk_alt;  break;
-      case X86::VPCMPQZ128rmik:   NewOpc = X86::VPCMPQZ128rmik_alt;   break;
-      case X86::VPCMPQZ128rri:    NewOpc = X86::VPCMPQZ128rri_alt;    break;
-      case X86::VPCMPQZ128rrik:   NewOpc = X86::VPCMPQZ128rrik_alt;   break;
-      case X86::VPCMPQZ256rmi:    NewOpc = X86::VPCMPQZ256rmi_alt;    break;
-      case X86::VPCMPQZ256rmib:   NewOpc = X86::VPCMPQZ256rmib_alt;   break;
-      case X86::VPCMPQZ256rmibk:  NewOpc = X86::VPCMPQZ256rmibk_alt;  break;
-      case X86::VPCMPQZ256rmik:   NewOpc = X86::VPCMPQZ256rmik_alt;   break;
-      case X86::VPCMPQZ256rri:    NewOpc = X86::VPCMPQZ256rri_alt;    break;
-      case X86::VPCMPQZ256rrik:   NewOpc = X86::VPCMPQZ256rrik_alt;   break;
-      case X86::VPCMPQZrmi:       NewOpc = X86::VPCMPQZrmi_alt;       break;
-      case X86::VPCMPQZrmib:      NewOpc = X86::VPCMPQZrmib_alt;      break;
-      case X86::VPCMPQZrmibk:     NewOpc = X86::VPCMPQZrmibk_alt;     break;
-      case X86::VPCMPQZrmik:      NewOpc = X86::VPCMPQZrmik_alt;      break;
-      case X86::VPCMPQZrri:       NewOpc = X86::VPCMPQZrri_alt;       break;
-      case X86::VPCMPQZrrik:      NewOpc = X86::VPCMPQZrrik_alt;      break;
-      case X86::VPCMPUBZ128rmi:   NewOpc = X86::VPCMPUBZ128rmi_alt;   break;
-      case X86::VPCMPUBZ128rmik:  NewOpc = X86::VPCMPUBZ128rmik_alt;  break;
-      case X86::VPCMPUBZ128rri:   NewOpc = X86::VPCMPUBZ128rri_alt;   break;
-      case X86::VPCMPUBZ128rrik:  NewOpc = X86::VPCMPUBZ128rrik_alt;  break;
-      case X86::VPCMPUBZ256rmi:   NewOpc = X86::VPCMPUBZ256rmi_alt;   break;
-      case X86::VPCMPUBZ256rmik:  NewOpc = X86::VPCMPUBZ256rmik_alt;  break;
-      case X86::VPCMPUBZ256rri:   NewOpc = X86::VPCMPUBZ256rri_alt;   break;
-      case X86::VPCMPUBZ256rrik:  NewOpc = X86::VPCMPUBZ256rrik_alt;  break;
-      case X86::VPCMPUBZrmi:      NewOpc = X86::VPCMPUBZrmi_alt;      break;
-      case X86::VPCMPUBZrmik:     NewOpc = X86::VPCMPUBZrmik_alt;     break;
-      case X86::VPCMPUBZrri:      NewOpc = X86::VPCMPUBZrri_alt;      break;
-      case X86::VPCMPUBZrrik:     NewOpc = X86::VPCMPUBZrrik_alt;     break;
-      case X86::VPCMPUDZ128rmi:   NewOpc = X86::VPCMPUDZ128rmi_alt;   break;
-      case X86::VPCMPUDZ128rmib:  NewOpc = X86::VPCMPUDZ128rmib_alt;  break;
-      case X86::VPCMPUDZ128rmibk: NewOpc = X86::VPCMPUDZ128rmibk_alt; break;
-      case X86::VPCMPUDZ128rmik:  NewOpc = X86::VPCMPUDZ128rmik_alt;  break;
-      case X86::VPCMPUDZ128rri:   NewOpc = X86::VPCMPUDZ128rri_alt;   break;
-      case X86::VPCMPUDZ128rrik:  NewOpc = X86::VPCMPUDZ128rrik_alt;  break;
-      case X86::VPCMPUDZ256rmi:   NewOpc = X86::VPCMPUDZ256rmi_alt;   break;
-      case X86::VPCMPUDZ256rmib:  NewOpc = X86::VPCMPUDZ256rmib_alt;  break;
-      case X86::VPCMPUDZ256rmibk: NewOpc = X86::VPCMPUDZ256rmibk_alt; break;
-      case X86::VPCMPUDZ256rmik:  NewOpc = X86::VPCMPUDZ256rmik_alt;  break;
-      case X86::VPCMPUDZ256rri:   NewOpc = X86::VPCMPUDZ256rri_alt;   break;
-      case X86::VPCMPUDZ256rrik:  NewOpc = X86::VPCMPUDZ256rrik_alt;  break;
-      case X86::VPCMPUDZrmi:      NewOpc = X86::VPCMPUDZrmi_alt;      break;
-      case X86::VPCMPUDZrmib:     NewOpc = X86::VPCMPUDZrmib_alt;     break;
-      case X86::VPCMPUDZrmibk:    NewOpc = X86::VPCMPUDZrmibk_alt;    break;
-      case X86::VPCMPUDZrmik:     NewOpc = X86::VPCMPUDZrmik_alt;     break;
-      case X86::VPCMPUDZrri:      NewOpc = X86::VPCMPUDZrri_alt;      break;
-      case X86::VPCMPUDZrrik:     NewOpc = X86::VPCMPUDZrrik_alt;     break;
-      case X86::VPCMPUQZ128rmi:   NewOpc = X86::VPCMPUQZ128rmi_alt;   break;
-      case X86::VPCMPUQZ128rmib:  NewOpc = X86::VPCMPUQZ128rmib_alt;  break;
-      case X86::VPCMPUQZ128rmibk: NewOpc = X86::VPCMPUQZ128rmibk_alt; break;
-      case X86::VPCMPUQZ128rmik:  NewOpc = X86::VPCMPUQZ128rmik_alt;  break;
-      case X86::VPCMPUQZ128rri:   NewOpc = X86::VPCMPUQZ128rri_alt;   break;
-      case X86::VPCMPUQZ128rrik:  NewOpc = X86::VPCMPUQZ128rrik_alt;  break;
-      case X86::VPCMPUQZ256rmi:   NewOpc = X86::VPCMPUQZ256rmi_alt;   break;
-      case X86::VPCMPUQZ256rmib:  NewOpc = X86::VPCMPUQZ256rmib_alt;  break;
-      case X86::VPCMPUQZ256rmibk: NewOpc = X86::VPCMPUQZ256rmibk_alt; break;
-      case X86::VPCMPUQZ256rmik:  NewOpc = X86::VPCMPUQZ256rmik_alt;  break;
-      case X86::VPCMPUQZ256rri:   NewOpc = X86::VPCMPUQZ256rri_alt;   break;
-      case X86::VPCMPUQZ256rrik:  NewOpc = X86::VPCMPUQZ256rrik_alt;  break;
-      case X86::VPCMPUQZrmi:      NewOpc = X86::VPCMPUQZrmi_alt;      break;
-      case X86::VPCMPUQZrmib:     NewOpc = X86::VPCMPUQZrmib_alt;     break;
-      case X86::VPCMPUQZrmibk:    NewOpc = X86::VPCMPUQZrmibk_alt;    break;
-      case X86::VPCMPUQZrmik:     NewOpc = X86::VPCMPUQZrmik_alt;     break;
-      case X86::VPCMPUQZrri:      NewOpc = X86::VPCMPUQZrri_alt;      break;
-      case X86::VPCMPUQZrrik:     NewOpc = X86::VPCMPUQZrrik_alt;     break;
-      case X86::VPCMPUWZ128rmi:   NewOpc = X86::VPCMPUWZ128rmi_alt;   break;
-      case X86::VPCMPUWZ128rmik:  NewOpc = X86::VPCMPUWZ128rmik_alt;  break;
-      case X86::VPCMPUWZ128rri:   NewOpc = X86::VPCMPUWZ128rri_alt;   break;
-      case X86::VPCMPUWZ128rrik:  NewOpc = X86::VPCMPUWZ128rrik_alt;  break;
-      case X86::VPCMPUWZ256rmi:   NewOpc = X86::VPCMPUWZ256rmi_alt;   break;
-      case X86::VPCMPUWZ256rmik:  NewOpc = X86::VPCMPUWZ256rmik_alt;  break;
-      case X86::VPCMPUWZ256rri:   NewOpc = X86::VPCMPUWZ256rri_alt;   break;
-      case X86::VPCMPUWZ256rrik:  NewOpc = X86::VPCMPUWZ256rrik_alt;  break;
-      case X86::VPCMPUWZrmi:      NewOpc = X86::VPCMPUWZrmi_alt;      break;
-      case X86::VPCMPUWZrmik:     NewOpc = X86::VPCMPUWZrmik_alt;     break;
-      case X86::VPCMPUWZrri:      NewOpc = X86::VPCMPUWZrri_alt;      break;
-      case X86::VPCMPUWZrrik:     NewOpc = X86::VPCMPUWZrrik_alt;     break;
-      case X86::VPCMPWZ128rmi:    NewOpc = X86::VPCMPWZ128rmi_alt;    break;
-      case X86::VPCMPWZ128rmik:   NewOpc = X86::VPCMPWZ128rmik_alt;   break;
-      case X86::VPCMPWZ128rri:    NewOpc = X86::VPCMPWZ128rri_alt;    break;
-      case X86::VPCMPWZ128rrik:   NewOpc = X86::VPCMPWZ128rrik_alt;   break;
-      case X86::VPCMPWZ256rmi:    NewOpc = X86::VPCMPWZ256rmi_alt;    break;
-      case X86::VPCMPWZ256rmik:   NewOpc = X86::VPCMPWZ256rmik_alt;   break;
-      case X86::VPCMPWZ256rri:    NewOpc = X86::VPCMPWZ256rri_alt;    break;
-      case X86::VPCMPWZ256rrik:   NewOpc = X86::VPCMPWZ256rrik_alt;   break;
-      case X86::VPCMPWZrmi:       NewOpc = X86::VPCMPWZrmi_alt;       break;
-      case X86::VPCMPWZrmik:      NewOpc = X86::VPCMPWZrmik_alt;      break;
-      case X86::VPCMPWZrri:       NewOpc = X86::VPCMPWZrri_alt;       break;
-      case X86::VPCMPWZrrik:      NewOpc = X86::VPCMPWZrrik_alt;      break;
-      }
-      // Switch opcode to the one that doesn't get special printing.
-      mcInst.setOpcode(NewOpc);
-    }
   }
 
   switch (type) {
@@ -899,6 +694,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
   case TYPE_XMM:
   case TYPE_YMM:
   case TYPE_ZMM:
+  case TYPE_VK_PAIR:
   case TYPE_VK:
   case TYPE_DEBUGREG:
   case TYPE_CONTROLREG:
@@ -987,6 +783,9 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
   case ENCODING_Rv:
     translateRegister(mcInst, insn.opcodeRegister);
     return false;
+  case ENCODING_CC:
+    mcInst.addOperand(MCOperand::createImm(insn.immediates[1]));
+    return false;
   case ENCODING_FP:
     translateFPRegister(mcInst, insn.modRM & 7);
     return false;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index 54d550b60652..a241362a271d 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -1,9 +1,8 @@
 //===-- X86DisassemblerDecoder.cpp - Disassembler decoder -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -377,8 +376,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
       if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
                            nextByte == 0xc6 || nextByte == 0xc7)) {
         insn->xAcquireRelease = true;
-        if (nextByte != 0x90) // PAUSE instruction support
-          break;
+        break;
       }
       if (isREX(insn, nextByte)) {
         uint8_t nnextByte;
@@ -884,7 +882,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
       if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
         attrMask |= ATTR_EVEXK;
       if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
-        attrMask |= ATTR_EVEXL;
+        attrMask |= ATTR_VEXL;
       if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
         attrMask |= ATTR_EVEXL2;
     } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
@@ -1470,6 +1468,10 @@ static int readModRM(struct InternalInstruction* insn) {
       if (index > 7)                                      \
         *valid = 0;                                       \
       return prefix##_K0 + index;                         \
+    case TYPE_VK_PAIR:                                    \
+      if (index > 7)                                      \
+        *valid = 0;                                       \
+      return prefix##_K0_K1 + (index / 2);                \
     case TYPE_MM64:                                       \
       return prefix##_MM0 + (index & 0x7);                \
     case TYPE_SEGMENTREG:                                 \
@@ -1847,6 +1849,9 @@ static int readOperands(struct InternalInstruction* insn) {
       if (readOpcodeRegister(insn, 0))
         return -1;
       break;
+    case ENCODING_CC:
+      insn->immediates[1] = insn->opcode & 0xf;
+      break;
     case ENCODING_FP:
       break;
     case ENCODING_VVVV:
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 3b8a4f732eed..7c0a42c019e3 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -1,9 +1,8 @@
 //===-- X86DisassemblerDecoderInternal.h - Disassembler decoder -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -325,6 +324,12 @@ namespace X86Disassembler {
   ENTRY(K6)        \
   ENTRY(K7)
 
+#define REGS_MASK_PAIRS \
+  ENTRY(K0_K1)     \
+  ENTRY(K2_K3)     \
+  ENTRY(K4_K5)     \
+  ENTRY(K6_K7)
+
 #define REGS_SEGMENT \
   ENTRY(ES)          \
   ENTRY(CS)          \
@@ -394,6 +399,7 @@ namespace X86Disassembler {
   REGS_YMM            \
   REGS_ZMM            \
   REGS_MASKS          \
+  REGS_MASK_PAIRS     \
   REGS_SEGMENT        \
   REGS_DEBUG          \
   REGS_CONTROL        \
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
deleted file mode 100644
index 0e861d5ddbc9..000000000000
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ /dev/null
@@ -1,202 +0,0 @@
-//===-- X86ATTInstPrinter.cpp - AT&T assembly instruction printing --------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file includes code for rendering MCInst instances as AT&T-style
-// assembly.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86ATTInstPrinter.h"
-#include "MCTargetDesc/X86BaseInfo.h"
-#include "X86InstComments.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-#include <cinttypes>
-#include <cstdint>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-// Include the auto-generated portion of the assembly writer.
-#define PRINT_ALIAS_INSTR
-#include "X86GenAsmWriter.inc"
-
-void X86ATTInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  OS << markup("<reg:") << '%' << getRegisterName(RegNo) << markup(">");
-}
-
-void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
-                                  StringRef Annot, const MCSubtargetInfo &STI) {
-  // If verbose assembly is enabled, we can print some informative comments.
-  if (CommentStream)
-    HasCustomInstComment = EmitAnyX86InstComments(MI, *CommentStream, MII);
-
-  printInstFlags(MI, OS);
-
-  // Output CALLpcrel32 as "callq" in 64-bit mode.
-  // In Intel annotation it's always emitted as "call".
-  //
-  // TODO: Probably this hack should be redesigned via InstAlias in
-  // InstrInfo.td as soon as Requires clause is supported properly
-  // for InstAlias.
-  if (MI->getOpcode() == X86::CALLpcrel32 &&
-      (STI.getFeatureBits()[X86::Mode64Bit])) {
-    OS << "\tcallq\t";
-    printPCRelImm(MI, 0, OS);
-  }
-  // data16 and data32 both have the same encoding of 0x66. While data32 is
-  // valid only in 16 bit systems, data16 is valid in the rest.
-  // There seems to be some lack of support of the Requires clause that causes
-  // 0x66 to be interpreted as "data16" by the asm printer.
-  // Thus we add an adjustment here in order to print the "right" instruction.
-  else if (MI->getOpcode() == X86::DATA16_PREFIX &&
-           STI.getFeatureBits()[X86::Mode16Bit]) {
-   OS << "\tdata32";
-  }
-  // Try to print any aliases first.
-  else if (!printAliasInstr(MI, OS))
-    printInstruction(MI, OS);
-
-  // Next always print the annotation.
-  printAnnotation(OS, Annot);
-}
-
-void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                     raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    printRegName(O, Op.getReg());
-  } else if (Op.isImm()) {
-    // Print immediates as signed values.
-    int64_t Imm = Op.getImm();
-    O << markup("<imm:") << '$' << formatImm(Imm) << markup(">");
-
-    // TODO: This should be in a helper function in the base class, so it can
-    // be used by other printers.
-
-    // If there are no instruction-specific comments, add a comment clarifying
-    // the hex value of the immediate operand when it isn't in the range
-    // [-256,255].
-    if (CommentStream && !HasCustomInstComment && (Imm > 255 || Imm < -256)) {
-      // Don't print unnecessary hex sign bits.
-      if (Imm == (int16_t)(Imm))
-        *CommentStream << format("imm = 0x%" PRIX16 "\n", (uint16_t)Imm);
-      else if (Imm == (int32_t)(Imm))
-        *CommentStream << format("imm = 0x%" PRIX32 "\n", (uint32_t)Imm);
-      else
-        *CommentStream << format("imm = 0x%" PRIX64 "\n", (uint64_t)Imm);
-    }
-  } else {
-    assert(Op.isExpr() && "unknown operand kind in printOperand");
-    O << markup("<imm:") << '$';
-    Op.getExpr()->print(O, &MAI);
-    O << markup(">");
-  }
-}
-
-void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
-                                          raw_ostream &O) {
-  const MCOperand &BaseReg = MI->getOperand(Op + X86::AddrBaseReg);
-  const MCOperand &IndexReg = MI->getOperand(Op + X86::AddrIndexReg);
-  const MCOperand &DispSpec = MI->getOperand(Op + X86::AddrDisp);
-
-  O << markup("<mem:");
-
-  // If this has a segment register, print it.
-  printOptionalSegReg(MI, Op + X86::AddrSegmentReg, O);
-
-  if (DispSpec.isImm()) {
-    int64_t DispVal = DispSpec.getImm();
-    if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
-      O << formatImm(DispVal);
-  } else {
-    assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
-    DispSpec.getExpr()->print(O, &MAI);
-  }
-
-  if (IndexReg.getReg() || BaseReg.getReg()) {
-    O << '(';
-    if (BaseReg.getReg())
-      printOperand(MI, Op + X86::AddrBaseReg, O);
-
-    if (IndexReg.getReg()) {
-      O << ',';
-      printOperand(MI, Op + X86::AddrIndexReg, O);
-      unsigned ScaleVal = MI->getOperand(Op + X86::AddrScaleAmt).getImm();
-      if (ScaleVal != 1) {
-        O << ',' << markup("<imm:") << ScaleVal // never printed in hex.
-          << markup(">");
-      }
-    }
-    O << ')';
-  }
-
-  O << markup(">");
-}
-
-void X86ATTInstPrinter::printSrcIdx(const MCInst *MI, unsigned Op,
-                                    raw_ostream &O) {
-  O << markup("<mem:");
-
-  // If this has a segment register, print it.
-  printOptionalSegReg(MI, Op + 1, O);
-
-  O << "(";
-  printOperand(MI, Op, O);
-  O << ")";
-
-  O << markup(">");
-}
-
-void X86ATTInstPrinter::printDstIdx(const MCInst *MI, unsigned Op,
-                                    raw_ostream &O) {
-  O << markup("<mem:");
-
-  O << "%es:(";
-  printOperand(MI, Op, O);
-  O << ")";
-
-  O << markup(">");
-}
-
-void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
-                                       raw_ostream &O) {
-  const MCOperand &DispSpec = MI->getOperand(Op);
-
-  O << markup("<mem:");
-
-  // If this has a segment register, print it.
-  printOptionalSegReg(MI, Op + 1, O);
-
-  if (DispSpec.isImm()) {
-    O << formatImm(DispSpec.getImm());
-  } else {
-    assert(DispSpec.isExpr() && "non-immediate displacement?");
-    DispSpec.getExpr()->print(O, &MAI);
-  }
-
-  O << markup(">");
-}
-
-void X86ATTInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
-                                   raw_ostream &O) {
-  if (MI->getOperand(Op).isExpr())
-    return printOperand(MI, Op, O);
-
-  O << markup("<imm:") << '$' << formatImm(MI->getOperand(Op).getImm() & 0xff)
-    << markup(">");
-}
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
deleted file mode 100644
index 57422bc9a0b2..000000000000
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ /dev/null
@@ -1,138 +0,0 @@
-//=- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax --*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an X86 MCInst to AT&T style .s file syntax.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_X86_INSTPRINTER_X86ATTINSTPRINTER_H
-#define LLVM_LIB_TARGET_X86_INSTPRINTER_X86ATTINSTPRINTER_H
-
-#include "X86InstPrinterCommon.h"
-
-namespace llvm {
-
-class X86ATTInstPrinter final : public X86InstPrinterCommon {
-public:
-  X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                    const MCRegisterInfo &MRI)
-      : X86InstPrinterCommon(MAI, MII, MRI) {}
-
-  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
-
-  // Autogenerated by tblgen, returns true if we successfully printed an
-  // alias.
-  bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
-  void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
-                               unsigned PrintMethodIdx, raw_ostream &O);
-
-  // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &OS);
-  static const char *getRegisterName(unsigned RegNo);
-
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS) override;
-  void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS);
-  void printMemOffset(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
-  void printSrcIdx(const MCInst *MI, unsigned Op, raw_ostream &O);
-  void printDstIdx(const MCInst *MI, unsigned Op, raw_ostream &O);
-  void printU8Imm(const MCInst *MI, unsigned Op, raw_ostream &OS);
-
-  void printanymem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-
-  void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printi512mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-  void printf512mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-
-  void printSrcIdx8(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printSrcIdx(MI, OpNo, O);
-  }
-  void printSrcIdx16(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printSrcIdx(MI, OpNo, O);
-  }
-  void printSrcIdx32(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printSrcIdx(MI, OpNo, O);
-  }
-  void printSrcIdx64(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printSrcIdx(MI, OpNo, O);
-  }
-  void printDstIdx8(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printDstIdx(MI, OpNo, O);
-  }
-  void printDstIdx16(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printDstIdx(MI, OpNo, O);
-  }
-  void printDstIdx32(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printDstIdx(MI, OpNo, O);
-  }
-  void printDstIdx64(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printDstIdx(MI, OpNo, O);
-  }
-  void printMemOffs8(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemOffset(MI, OpNo, O);
-  }
-  void printMemOffs16(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemOffset(MI, OpNo, O);
-  }
-  void printMemOffs32(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemOffset(MI, OpNo, O);
-  }
-  void printMemOffs64(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemOffset(MI, OpNo, O);
-  }
-
-private:
-  bool HasCustomInstComment;
-};
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_X86_INSTPRINTER_X86ATTINSTPRINTER_H
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
deleted file mode 100644
index 37bed37b0994..000000000000
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ /dev/null
@@ -1,1310 +0,0 @@
-//===-- X86InstComments.cpp - Generate verbose-asm comments for instrs ----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This defines functionality used to emit comments about X86 instructions to
-// an output stream for -fverbose-asm.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86InstComments.h"
-#include "X86ATTInstPrinter.h"
-#include "MCTargetDesc/X86BaseInfo.h"
-#include "MCTargetDesc/X86MCTargetDesc.h"
-#include "Utils/X86ShuffleDecode.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define CASE_SSE_INS_COMMON(Inst, src)            \
-  case X86::Inst##src:
-
-#define CASE_AVX_INS_COMMON(Inst, Suffix, src)    \
-  case X86::V##Inst##Suffix##src:
-
-#define CASE_MASK_INS_COMMON(Inst, Suffix, src)   \
-  case X86::V##Inst##Suffix##src##k:
-
-#define CASE_MASKZ_INS_COMMON(Inst, Suffix, src)  \
-  case X86::V##Inst##Suffix##src##kz:
-
-#define CASE_AVX512_INS_COMMON(Inst, Suffix, src) \
-  CASE_AVX_INS_COMMON(Inst, Suffix, src)          \
-  CASE_MASK_INS_COMMON(Inst, Suffix, src)         \
-  CASE_MASKZ_INS_COMMON(Inst, Suffix, src)
-
-#define CASE_MOVDUP(Inst, src)                    \
-  CASE_AVX512_INS_COMMON(Inst, Z, r##src)         \
-  CASE_AVX512_INS_COMMON(Inst, Z256, r##src)      \
-  CASE_AVX512_INS_COMMON(Inst, Z128, r##src)      \
-  CASE_AVX_INS_COMMON(Inst, , r##src)             \
-  CASE_AVX_INS_COMMON(Inst, Y, r##src)            \
-  CASE_SSE_INS_COMMON(Inst, r##src)
-
-#define CASE_MASK_MOVDUP(Inst, src)               \
-  CASE_MASK_INS_COMMON(Inst, Z, r##src)           \
-  CASE_MASK_INS_COMMON(Inst, Z256, r##src)        \
-  CASE_MASK_INS_COMMON(Inst, Z128, r##src)
-
-#define CASE_MASKZ_MOVDUP(Inst, src)              \
-  CASE_MASKZ_INS_COMMON(Inst, Z, r##src)          \
-  CASE_MASKZ_INS_COMMON(Inst, Z256, r##src)       \
-  CASE_MASKZ_INS_COMMON(Inst, Z128, r##src)
-
-#define CASE_PMOVZX(Inst, src)                    \
-  CASE_AVX512_INS_COMMON(Inst, Z, r##src)         \
-  CASE_AVX512_INS_COMMON(Inst, Z256, r##src)      \
-  CASE_AVX512_INS_COMMON(Inst, Z128, r##src)      \
-  CASE_AVX_INS_COMMON(Inst, , r##src)             \
-  CASE_AVX_INS_COMMON(Inst, Y, r##src)            \
-  CASE_SSE_INS_COMMON(Inst, r##src)
-
-#define CASE_MASK_PMOVZX(Inst, src)               \
-  CASE_MASK_INS_COMMON(Inst, Z, r##src)           \
-  CASE_MASK_INS_COMMON(Inst, Z256, r##src)        \
-  CASE_MASK_INS_COMMON(Inst, Z128, r##src)
-
-#define CASE_MASKZ_PMOVZX(Inst, src)              \
-  CASE_MASKZ_INS_COMMON(Inst, Z, r##src)          \
-  CASE_MASKZ_INS_COMMON(Inst, Z256, r##src)       \
-  CASE_MASKZ_INS_COMMON(Inst, Z128, r##src)
-
-#define CASE_UNPCK(Inst, src)                     \
-  CASE_AVX512_INS_COMMON(Inst, Z, r##src)         \
-  CASE_AVX512_INS_COMMON(Inst, Z256, r##src)      \
-  CASE_AVX512_INS_COMMON(Inst, Z128, r##src)      \
-  CASE_AVX_INS_COMMON(Inst, , r##src)             \
-  CASE_AVX_INS_COMMON(Inst, Y, r##src)            \
-  CASE_SSE_INS_COMMON(Inst, r##src)
-
-#define CASE_MASK_UNPCK(Inst, src)                \
-  CASE_MASK_INS_COMMON(Inst, Z, r##src)           \
-  CASE_MASK_INS_COMMON(Inst, Z256, r##src)        \
-  CASE_MASK_INS_COMMON(Inst, Z128, r##src)
-
-#define CASE_MASKZ_UNPCK(Inst, src)               \
-  CASE_MASKZ_INS_COMMON(Inst, Z, r##src)          \
-  CASE_MASKZ_INS_COMMON(Inst, Z256, r##src)       \
-  CASE_MASKZ_INS_COMMON(Inst, Z128, r##src)
-
-#define CASE_SHUF(Inst, suf)                      \
-  CASE_AVX512_INS_COMMON(Inst, Z, suf)            \
-  CASE_AVX512_INS_COMMON(Inst, Z256, suf)         \
-  CASE_AVX512_INS_COMMON(Inst, Z128, suf)         \
-  CASE_AVX_INS_COMMON(Inst, , suf)                \
-  CASE_AVX_INS_COMMON(Inst, Y, suf)               \
-  CASE_SSE_INS_COMMON(Inst, suf)
-
-#define CASE_MASK_SHUF(Inst, src)                 \
-  CASE_MASK_INS_COMMON(Inst, Z, r##src##i)        \
-  CASE_MASK_INS_COMMON(Inst, Z256, r##src##i)     \
-  CASE_MASK_INS_COMMON(Inst, Z128, r##src##i)
-
-#define CASE_MASKZ_SHUF(Inst, src)                \
-  CASE_MASKZ_INS_COMMON(Inst, Z, r##src##i)       \
-  CASE_MASKZ_INS_COMMON(Inst, Z256, r##src##i)    \
-  CASE_MASKZ_INS_COMMON(Inst, Z128, r##src##i)
-
-#define CASE_VPERMILPI(Inst, src)                 \
-  CASE_AVX512_INS_COMMON(Inst, Z, src##i)         \
-  CASE_AVX512_INS_COMMON(Inst, Z256, src##i)      \
-  CASE_AVX512_INS_COMMON(Inst, Z128, src##i)      \
-  CASE_AVX_INS_COMMON(Inst, , src##i)             \
-  CASE_AVX_INS_COMMON(Inst, Y, src##i)
-
-#define CASE_MASK_VPERMILPI(Inst, src)            \
-  CASE_MASK_INS_COMMON(Inst, Z, src##i)           \
-  CASE_MASK_INS_COMMON(Inst, Z256, src##i)        \
-  CASE_MASK_INS_COMMON(Inst, Z128, src##i)
-
-#define CASE_MASKZ_VPERMILPI(Inst, src)           \
-  CASE_MASKZ_INS_COMMON(Inst, Z, src##i)          \
-  CASE_MASKZ_INS_COMMON(Inst, Z256, src##i)       \
-  CASE_MASKZ_INS_COMMON(Inst, Z128, src##i)
-
-#define CASE_VPERM(Inst, src)                     \
-  CASE_AVX512_INS_COMMON(Inst, Z, src##i)         \
-  CASE_AVX512_INS_COMMON(Inst, Z256, src##i)      \
-  CASE_AVX_INS_COMMON(Inst, Y, src##i)
-
-#define CASE_MASK_VPERM(Inst, src)                \
-  CASE_MASK_INS_COMMON(Inst, Z, src##i)           \
-  CASE_MASK_INS_COMMON(Inst, Z256, src##i)
-
-#define CASE_MASKZ_VPERM(Inst, src)               \
-  CASE_MASKZ_INS_COMMON(Inst, Z, src##i)          \
-  CASE_MASKZ_INS_COMMON(Inst, Z256, src##i)
-
-#define CASE_VSHUF(Inst, src)                          \
-  CASE_AVX512_INS_COMMON(SHUFF##Inst, Z, r##src##i)    \
-  CASE_AVX512_INS_COMMON(SHUFI##Inst, Z, r##src##i)    \
-  CASE_AVX512_INS_COMMON(SHUFF##Inst, Z256, r##src##i) \
-  CASE_AVX512_INS_COMMON(SHUFI##Inst, Z256, r##src##i)
-
-#define CASE_MASK_VSHUF(Inst, src)                    \
-  CASE_MASK_INS_COMMON(SHUFF##Inst, Z, r##src##i)     \
-  CASE_MASK_INS_COMMON(SHUFI##Inst, Z, r##src##i)     \
-  CASE_MASK_INS_COMMON(SHUFF##Inst, Z256, r##src##i)  \
-  CASE_MASK_INS_COMMON(SHUFI##Inst, Z256, r##src##i)
-
-#define CASE_MASKZ_VSHUF(Inst, src)                   \
-  CASE_MASKZ_INS_COMMON(SHUFF##Inst, Z, r##src##i)    \
-  CASE_MASKZ_INS_COMMON(SHUFI##Inst, Z, r##src##i)    \
-  CASE_MASKZ_INS_COMMON(SHUFF##Inst, Z256, r##src##i) \
-  CASE_MASKZ_INS_COMMON(SHUFI##Inst, Z256, r##src##i)
-
-#define CASE_AVX512_FMA(Inst, suf)                \
-  CASE_AVX512_INS_COMMON(Inst, Z, suf)            \
-  CASE_AVX512_INS_COMMON(Inst, Z256, suf)         \
-  CASE_AVX512_INS_COMMON(Inst, Z128, suf)
-
-#define CASE_FMA(Inst, suf)                       \
-  CASE_AVX512_FMA(Inst, suf)                      \
-  CASE_AVX_INS_COMMON(Inst, , suf)                \
-  CASE_AVX_INS_COMMON(Inst, Y, suf)
-
-#define CASE_FMA_PACKED_REG(Inst)                 \
-  CASE_FMA(Inst##PD, r)                           \
-  CASE_FMA(Inst##PS, r)
-
-#define CASE_FMA_PACKED_MEM(Inst)                 \
-  CASE_FMA(Inst##PD, m)                           \
-  CASE_FMA(Inst##PS, m)                           \
-  CASE_AVX512_FMA(Inst##PD, mb)                   \
-  CASE_AVX512_FMA(Inst##PS, mb)
-
-#define CASE_FMA_SCALAR_REG(Inst)                 \
-  CASE_AVX_INS_COMMON(Inst##SD, , r)              \
-  CASE_AVX_INS_COMMON(Inst##SS, , r)              \
-  CASE_AVX_INS_COMMON(Inst##SD, , r_Int)          \
-  CASE_AVX_INS_COMMON(Inst##SS, , r_Int)          \
-  CASE_AVX_INS_COMMON(Inst##SD, Z, r)             \
-  CASE_AVX_INS_COMMON(Inst##SS, Z, r)             \
-  CASE_AVX512_INS_COMMON(Inst##SD, Z, r_Int)      \
-  CASE_AVX512_INS_COMMON(Inst##SS, Z, r_Int)
-
-#define CASE_FMA_SCALAR_MEM(Inst)                 \
-  CASE_AVX_INS_COMMON(Inst##SD, , m)              \
-  CASE_AVX_INS_COMMON(Inst##SS, , m)              \
-  CASE_AVX_INS_COMMON(Inst##SD, , m_Int)          \
-  CASE_AVX_INS_COMMON(Inst##SS, , m_Int)          \
-  CASE_AVX_INS_COMMON(Inst##SD, Z, m)             \
-  CASE_AVX_INS_COMMON(Inst##SS, Z, m)             \
-  CASE_AVX512_INS_COMMON(Inst##SD, Z, m_Int)      \
-  CASE_AVX512_INS_COMMON(Inst##SS, Z, m_Int)
-
-static unsigned getVectorRegSize(unsigned RegNo) {
-  if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31)
-    return 512;
-  if (X86::YMM0 <= RegNo && RegNo <= X86::YMM31)
-    return 256;
-  if (X86::XMM0 <= RegNo && RegNo <= X86::XMM31)
-    return 128;
-  if (X86::MM0 <= RegNo && RegNo <= X86::MM7)
-    return 64;
-
-  llvm_unreachable("Unknown vector reg!");
-}
-
-static unsigned getRegOperandNumElts(const MCInst *MI, unsigned ScalarSize,
-                                     unsigned OperandIndex) {
-  unsigned OpReg = MI->getOperand(OperandIndex).getReg();
-  return getVectorRegSize(OpReg) / ScalarSize;
-}
-
-static const char *getRegName(unsigned Reg) {
-  return X86ATTInstPrinter::getRegisterName(Reg);
-}
-
-/// Wraps the destination register name with AVX512 mask/maskz filtering.
-static void printMasking(raw_ostream &OS, const MCInst *MI,
-                         const MCInstrInfo &MCII) {
-  const MCInstrDesc &Desc = MCII.get(MI->getOpcode());
-  uint64_t TSFlags = Desc.TSFlags;
-
-  if (!(TSFlags & X86II::EVEX_K))
-    return;
-
-  bool MaskWithZero = (TSFlags & X86II::EVEX_Z);
-  unsigned MaskOp = Desc.getNumDefs();
-
-  if (Desc.getOperandConstraint(MaskOp, MCOI::TIED_TO) != -1)
-    ++MaskOp;
-
-  const char *MaskRegName = getRegName(MI->getOperand(MaskOp).getReg());
-
-  // MASK: zmmX {%kY}
-  OS << " {%" << MaskRegName << "}";
-
-  // MASKZ: zmmX {%kY} {z}
-  if (MaskWithZero)
-    OS << " {z}";
-}
-
-static bool printFMA3Comments(const MCInst *MI, raw_ostream &OS) {
-  const char *Mul1Name = nullptr, *Mul2Name = nullptr, *AccName = nullptr;
-  unsigned NumOperands = MI->getNumOperands();
-  bool RegForm = false;
-  bool Negate = false;
-  StringRef AccStr = "+";
-
-  // The operands for FMA instructions without rounding fall into two forms.
-  //  dest, src1, src2, src3
-  //  dest, src1, mask, src2, src3
-  // Where src3 is either a register or 5 memory address operands. So to find
-  // dest and src1 we can index from the front. To find src2 and src3 we can
-  // index from the end by taking into account memory vs register form when
-  // finding src2.
-
-  switch (MI->getOpcode()) {
-  default:
-    return false;
-  CASE_FMA_PACKED_REG(FMADD132)
-  CASE_FMA_SCALAR_REG(FMADD132)
-    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-  CASE_FMA_PACKED_MEM(FMADD132)
-  CASE_FMA_SCALAR_MEM(FMADD132)
-    AccName = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    Mul1Name = getRegName(MI->getOperand(1).getReg());
-    break;
-
-  CASE_FMA_PACKED_REG(FMADD213)
-  CASE_FMA_SCALAR_REG(FMADD213)
-    AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-  CASE_FMA_PACKED_MEM(FMADD213)
-  CASE_FMA_SCALAR_MEM(FMADD213)
-    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    Mul2Name = getRegName(MI->getOperand(1).getReg());
-    break;
-
-  CASE_FMA_PACKED_REG(FMADD231)
-  CASE_FMA_SCALAR_REG(FMADD231)
-    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-  CASE_FMA_PACKED_MEM(FMADD231)
-  CASE_FMA_SCALAR_MEM(FMADD231)
-    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    AccName = getRegName(MI->getOperand(1).getReg());
-    break;
-
-  CASE_FMA_PACKED_REG(FMSUB132)
-  CASE_FMA_SCALAR_REG(FMSUB132)
-    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-  CASE_FMA_PACKED_MEM(FMSUB132)
-  CASE_FMA_SCALAR_MEM(FMSUB132)
-    AccName = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    Mul1Name = getRegName(MI->getOperand(1).getReg());
-    AccStr = "-";
-    break;
-
-  CASE_FMA_PACKED_REG(FMSUB213)
-  CASE_FMA_SCALAR_REG(FMSUB213)
-    AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-  CASE_FMA_PACKED_MEM(FMSUB213)
-  CASE_FMA_SCALAR_MEM(FMSUB213)
-    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    Mul2Name = getRegName(MI->getOperand(1).getReg());
-    AccStr = "-";
-    break;
-
-  CASE_FMA_PACKED_REG(FMSUB231)
-  CASE_FMA_SCALAR_REG(FMSUB231)
-    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-  CASE_FMA_PACKED_MEM(FMSUB231)
-  CASE_FMA_SCALAR_MEM(FMSUB231)
-    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    AccName = getRegName(MI->getOperand(1).getReg());
-    AccStr = "-";
-    break;
-
-  CASE_FMA_PACKED_REG(FNMADD132)
-  CASE_FMA_SCALAR_REG(FNMADD132)
-    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-  CASE_FMA_PACKED_MEM(FNMADD132)
-  CASE_FMA_SCALAR_MEM(FNMADD132)
-    AccName = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    Mul1Name = getRegName(MI->getOperand(1).getReg());
-    Negate = true;
-    break;
-
-  CASE_FMA_PACKED_REG(FNMADD213)
-  CASE_FMA_SCALAR_REG(FNMADD213)
-    AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-  CASE_FMA_PACKED_MEM(FNMADD213)
-  CASE_FMA_SCALAR_MEM(FNMADD213)
-    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    Mul2Name = getRegName(MI->getOperand(1).getReg());
-    Negate = true;
-    break;
-
-  CASE_FMA_PACKED_REG(FNMADD231)
-  CASE_FMA_SCALAR_REG(FNMADD231)
-    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-  CASE_FMA_PACKED_MEM(FNMADD231)
-  CASE_FMA_SCALAR_MEM(FNMADD231)
-    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    AccName = getRegName(MI->getOperand(1).getReg());
-    Negate = true;
-    break;
-
-  CASE_FMA_PACKED_REG(FNMSUB132)
-  CASE_FMA_SCALAR_REG(FNMSUB132)
-    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-  CASE_FMA_PACKED_MEM(FNMSUB132)
-  CASE_FMA_SCALAR_MEM(FNMSUB132)
-    AccName = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    Mul1Name = getRegName(MI->getOperand(1).getReg());
-    AccStr = "-";
-    Negate = true;
-    break;
-
-  CASE_FMA_PACKED_REG(FNMSUB213)
-  CASE_FMA_SCALAR_REG(FNMSUB213)
-    AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-  CASE_FMA_PACKED_MEM(FNMSUB213)
-  CASE_FMA_SCALAR_MEM(FNMSUB213)
-    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    Mul2Name = getRegName(MI->getOperand(1).getReg());
-    AccStr = "-";
-    Negate = true;
-    break;
-
-  CASE_FMA_PACKED_REG(FNMSUB231)
-  CASE_FMA_SCALAR_REG(FNMSUB231)
-    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-  CASE_FMA_PACKED_MEM(FNMSUB231)
-  CASE_FMA_SCALAR_MEM(FNMSUB231)
-    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    AccName = getRegName(MI->getOperand(1).getReg());
-    AccStr = "-";
-    Negate = true;
-    break;
-
-  CASE_FMA_PACKED_REG(FMADDSUB132)
-    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-  CASE_FMA_PACKED_MEM(FMADDSUB132)
-    AccName = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    Mul1Name = getRegName(MI->getOperand(1).getReg());
-    AccStr = "+/-";
-    break;
-
-  CASE_FMA_PACKED_REG(FMADDSUB213)
-    AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-  CASE_FMA_PACKED_MEM(FMADDSUB213)
-    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    Mul2Name = getRegName(MI->getOperand(1).getReg());
-    AccStr = "+/-";
-    break;
-
-  CASE_FMA_PACKED_REG(FMADDSUB231)
-    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-  CASE_FMA_PACKED_MEM(FMADDSUB231)
-    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    AccName = getRegName(MI->getOperand(1).getReg());
-    AccStr = "+/-";
-    break;
-
-  CASE_FMA_PACKED_REG(FMSUBADD132)
-    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-  CASE_FMA_PACKED_MEM(FMSUBADD132)
-    AccName = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    Mul1Name = getRegName(MI->getOperand(1).getReg());
-    AccStr = "-/+";
-    break;
-
-  CASE_FMA_PACKED_REG(FMSUBADD213)
-    AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-  CASE_FMA_PACKED_MEM(FMSUBADD213)
-    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    Mul2Name = getRegName(MI->getOperand(1).getReg());
-    AccStr = "-/+";
-    break;
-
-  CASE_FMA_PACKED_REG(FMSUBADD231)
-    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-  CASE_FMA_PACKED_MEM(FMSUBADD231)
-    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    AccName = getRegName(MI->getOperand(1).getReg());
-    AccStr = "-/+";
-    break;
-  }
-
-  const char *DestName = getRegName(MI->getOperand(0).getReg());
-
-  if (!Mul1Name) Mul1Name = "mem";
-  if (!Mul2Name) Mul2Name = "mem";
-  if (!AccName)  AccName = "mem";
-
-  OS << DestName << " = ";
-  // TODO: Print masking information?
-
-  if (Negate)
-    OS << '-';
-
-  OS << '(' << Mul1Name << " * " << Mul2Name << ") " << AccStr << ' '
-     << AccName;
-
-  return true;
-}
-
-
-//===----------------------------------------------------------------------===//
-// Top Level Entrypoint
-//===----------------------------------------------------------------------===//
-
-/// EmitAnyX86InstComments - This function decodes x86 instructions and prints
-/// newline terminated strings to the specified string if desired.  This
-/// information is shown in disassembly dumps when verbose assembly is enabled.
-bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
-                                  const MCInstrInfo &MCII) {
-  // If this is a shuffle operation, the switch should fill in this state.
-  SmallVector<int, 8> ShuffleMask;
-  const char *DestName = nullptr, *Src1Name = nullptr, *Src2Name = nullptr;
-  unsigned NumOperands = MI->getNumOperands();
-  bool RegForm = false;
-
-  if (printFMA3Comments(MI, OS))
-    return true;
-
-  switch (MI->getOpcode()) {
-  default:
-    // Not an instruction for which we can decode comments.
-    return false;
-
-  case X86::BLENDPDrri:
-  case X86::VBLENDPDrri:
-  case X86::VBLENDPDYrri:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    LLVM_FALLTHROUGH;
-  case X86::BLENDPDrmi:
-  case X86::VBLENDPDrmi:
-  case X86::VBLENDPDYrmi:
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodeBLENDMask(getRegOperandNumElts(MI, 64, 0),
-                      MI->getOperand(NumOperands - 1).getImm(),
-                      ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  case X86::BLENDPSrri:
-  case X86::VBLENDPSrri:
-  case X86::VBLENDPSYrri:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    LLVM_FALLTHROUGH;
-  case X86::BLENDPSrmi:
-  case X86::VBLENDPSrmi:
-  case X86::VBLENDPSYrmi:
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodeBLENDMask(getRegOperandNumElts(MI, 32, 0),
-                      MI->getOperand(NumOperands - 1).getImm(),
-                      ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  case X86::PBLENDWrri:
-  case X86::VPBLENDWrri:
-  case X86::VPBLENDWYrri:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    LLVM_FALLTHROUGH;
-  case X86::PBLENDWrmi:
-  case X86::VPBLENDWrmi:
-  case X86::VPBLENDWYrmi:
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodeBLENDMask(getRegOperandNumElts(MI, 16, 0),
-                      MI->getOperand(NumOperands - 1).getImm(),
-                      ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  case X86::VPBLENDDrri:
-  case X86::VPBLENDDYrri:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    LLVM_FALLTHROUGH;
-  case X86::VPBLENDDrmi:
-  case X86::VPBLENDDYrmi:
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodeBLENDMask(getRegOperandNumElts(MI, 32, 0),
-                      MI->getOperand(NumOperands - 1).getImm(),
-                      ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  case X86::INSERTPSrr:
-  case X86::VINSERTPSrr:
-  case X86::VINSERTPSZrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    LLVM_FALLTHROUGH;
-  case X86::INSERTPSrm:
-  case X86::VINSERTPSrm:
-  case X86::VINSERTPSZrm:
-    DestName = getRegName(MI->getOperand(0).getReg());
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodeINSERTPSMask(MI->getOperand(NumOperands - 1).getImm(),
-                         ShuffleMask);
-    break;
-
-  case X86::MOVLHPSrr:
-  case X86::VMOVLHPSrr:
-  case X86::VMOVLHPSZrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeMOVLHPSMask(2, ShuffleMask);
-    break;
-
-  case X86::MOVHLPSrr:
-  case X86::VMOVHLPSrr:
-  case X86::VMOVHLPSZrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeMOVHLPSMask(2, ShuffleMask);
-    break;
-
-  case X86::MOVHPDrm:
-  case X86::VMOVHPDrm:
-  case X86::VMOVHPDZ128rm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeInsertElementMask(2, 1, 1, ShuffleMask);
-    break;
-
-  case X86::MOVHPSrm:
-  case X86::VMOVHPSrm:
-  case X86::VMOVHPSZ128rm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeInsertElementMask(4, 2, 2, ShuffleMask);
-    break;
-
-  case X86::MOVLPDrm:
-  case X86::VMOVLPDrm:
-  case X86::VMOVLPDZ128rm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeInsertElementMask(2, 0, 1, ShuffleMask);
-    break;
-
-  case X86::MOVLPSrm:
-  case X86::VMOVLPSrm:
-  case X86::VMOVLPSZ128rm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeInsertElementMask(4, 0, 2, ShuffleMask);
-    break;
-
-  CASE_MOVDUP(MOVSLDUP, r)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    LLVM_FALLTHROUGH;
-
-  CASE_MOVDUP(MOVSLDUP, m)
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeMOVSLDUPMask(getRegOperandNumElts(MI, 32, 0), ShuffleMask);
-    break;
-
-  CASE_MOVDUP(MOVSHDUP, r)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    LLVM_FALLTHROUGH;
-
-  CASE_MOVDUP(MOVSHDUP, m)
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeMOVSHDUPMask(getRegOperandNumElts(MI, 32, 0), ShuffleMask);
-    break;
-
-  CASE_MOVDUP(MOVDDUP, r)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    LLVM_FALLTHROUGH;
-
-  CASE_MOVDUP(MOVDDUP, m)
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeMOVDDUPMask(getRegOperandNumElts(MI, 64, 0), ShuffleMask);
-    break;
-
-  case X86::PSLLDQri:
-  case X86::VPSLLDQri:
-  case X86::VPSLLDQYri:
-  case X86::VPSLLDQZ128rr:
-  case X86::VPSLLDQZ256rr:
-  case X86::VPSLLDQZrr:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    LLVM_FALLTHROUGH;
-  case X86::VPSLLDQZ128rm:
-  case X86::VPSLLDQZ256rm:
-  case X86::VPSLLDQZrm:
-    DestName = getRegName(MI->getOperand(0).getReg());
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodePSLLDQMask(getRegOperandNumElts(MI, 8, 0),
-                       MI->getOperand(NumOperands - 1).getImm(),
-                       ShuffleMask);
-    break;
-
-  case X86::PSRLDQri:
-  case X86::VPSRLDQri:
-  case X86::VPSRLDQYri:
-  case X86::VPSRLDQZ128rr:
-  case X86::VPSRLDQZ256rr:
-  case X86::VPSRLDQZrr:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    LLVM_FALLTHROUGH;
-  case X86::VPSRLDQZ128rm:
-  case X86::VPSRLDQZ256rm:
-  case X86::VPSRLDQZrm:
-    DestName = getRegName(MI->getOperand(0).getReg());
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodePSRLDQMask(getRegOperandNumElts(MI, 8, 0),
-                       MI->getOperand(NumOperands - 1).getImm(),
-                       ShuffleMask);
-    break;
-
-  CASE_SHUF(PALIGNR, rri)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-
-  CASE_SHUF(PALIGNR, rmi)
-    Src2Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodePALIGNRMask(getRegOperandNumElts(MI, 8, 0),
-                        MI->getOperand(NumOperands - 1).getImm(),
-                        ShuffleMask);
-    break;
-
-  CASE_AVX512_INS_COMMON(ALIGNQ, Z, rri)
-  CASE_AVX512_INS_COMMON(ALIGNQ, Z256, rri)
-  CASE_AVX512_INS_COMMON(ALIGNQ, Z128, rri)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-
-  CASE_AVX512_INS_COMMON(ALIGNQ, Z, rmi)
-  CASE_AVX512_INS_COMMON(ALIGNQ, Z256, rmi)
-  CASE_AVX512_INS_COMMON(ALIGNQ, Z128, rmi)
-    Src2Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodeVALIGNMask(getRegOperandNumElts(MI, 64, 0),
-                       MI->getOperand(NumOperands - 1).getImm(),
-                       ShuffleMask);
-    break;
-
-  CASE_AVX512_INS_COMMON(ALIGND, Z, rri)
-  CASE_AVX512_INS_COMMON(ALIGND, Z256, rri)
-  CASE_AVX512_INS_COMMON(ALIGND, Z128, rri)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-
-  CASE_AVX512_INS_COMMON(ALIGND, Z, rmi)
-  CASE_AVX512_INS_COMMON(ALIGND, Z256, rmi)
-  CASE_AVX512_INS_COMMON(ALIGND, Z128, rmi)
-    Src2Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodeVALIGNMask(getRegOperandNumElts(MI, 32, 0),
-                       MI->getOperand(NumOperands - 1).getImm(),
-                       ShuffleMask);
-    break;
-
-  CASE_SHUF(PSHUFD, ri)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    LLVM_FALLTHROUGH;
-
-  CASE_SHUF(PSHUFD, mi)
-    DestName = getRegName(MI->getOperand(0).getReg());
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodePSHUFMask(getRegOperandNumElts(MI, 32, 0), 32,
-                      MI->getOperand(NumOperands - 1).getImm(),
-                      ShuffleMask);
-    break;
-
-  CASE_SHUF(PSHUFHW, ri)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    LLVM_FALLTHROUGH;
-
-  CASE_SHUF(PSHUFHW, mi)
-    DestName = getRegName(MI->getOperand(0).getReg());
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodePSHUFHWMask(getRegOperandNumElts(MI, 16, 0),
-                        MI->getOperand(NumOperands - 1).getImm(),
-                        ShuffleMask);
-    break;
-
-  CASE_SHUF(PSHUFLW, ri)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    LLVM_FALLTHROUGH;
-
-  CASE_SHUF(PSHUFLW, mi)
-    DestName = getRegName(MI->getOperand(0).getReg());
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodePSHUFLWMask(getRegOperandNumElts(MI, 16, 0),
-                        MI->getOperand(NumOperands - 1).getImm(),
-                        ShuffleMask);
-    break;
-
-  case X86::MMX_PSHUFWri:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    LLVM_FALLTHROUGH;
-
-  case X86::MMX_PSHUFWmi:
-    DestName = getRegName(MI->getOperand(0).getReg());
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodePSHUFMask(4, 16, MI->getOperand(NumOperands - 1).getImm(),
-                      ShuffleMask);
-    break;
-
-  case X86::PSWAPDrr:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    LLVM_FALLTHROUGH;
-
-  case X86::PSWAPDrm:
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodePSWAPMask(2, ShuffleMask);
-    break;
-
-  CASE_UNPCK(PUNPCKHBW, r)
-  case X86::MMX_PUNPCKHBWirr:
-    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-
-  CASE_UNPCK(PUNPCKHBW, m)
-  case X86::MMX_PUNPCKHBWirm:
-    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKHMask(getRegOperandNumElts(MI, 8, 0), 8, ShuffleMask);
-    break;
-
-  CASE_UNPCK(PUNPCKHWD, r)
-  case X86::MMX_PUNPCKHWDirr:
-    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-
-  CASE_UNPCK(PUNPCKHWD, m)
-  case X86::MMX_PUNPCKHWDirm:
-    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKHMask(getRegOperandNumElts(MI, 16, 0), 16, ShuffleMask);
-    break;
-
-  CASE_UNPCK(PUNPCKHDQ, r)
-  case X86::MMX_PUNPCKHDQirr:
-    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-
-  CASE_UNPCK(PUNPCKHDQ, m)
-  case X86::MMX_PUNPCKHDQirm:
-    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKHMask(getRegOperandNumElts(MI, 32, 0), 32, ShuffleMask);
-    break;
-
-  CASE_UNPCK(PUNPCKHQDQ, r)
-    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-
-  CASE_UNPCK(PUNPCKHQDQ, m)
-    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKHMask(getRegOperandNumElts(MI, 64, 0), 64, ShuffleMask);
-    break;
-
-  CASE_UNPCK(PUNPCKLBW, r)
-  case X86::MMX_PUNPCKLBWirr:
-    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-
-  CASE_UNPCK(PUNPCKLBW, m)
-  case X86::MMX_PUNPCKLBWirm:
-    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKLMask(getRegOperandNumElts(MI, 8, 0), 8, ShuffleMask);
-    break;
-
-  CASE_UNPCK(PUNPCKLWD, r)
-  case X86::MMX_PUNPCKLWDirr:
-    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-
-  CASE_UNPCK(PUNPCKLWD, m)
-  case X86::MMX_PUNPCKLWDirm:
-    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKLMask(getRegOperandNumElts(MI, 16, 0), 16, ShuffleMask);
-    break;
-
-  CASE_UNPCK(PUNPCKLDQ, r)
-  case X86::MMX_PUNPCKLDQirr:
-    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-
-  CASE_UNPCK(PUNPCKLDQ, m)
-  case X86::MMX_PUNPCKLDQirm:
-    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKLMask(getRegOperandNumElts(MI, 32, 0), 32, ShuffleMask);
-    break;
-
-  CASE_UNPCK(PUNPCKLQDQ, r)
-    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-
-  CASE_UNPCK(PUNPCKLQDQ, m)
-    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKLMask(getRegOperandNumElts(MI, 64, 0), 64, ShuffleMask);
-    break;
-
-  CASE_SHUF(SHUFPD, rri)
-    Src2Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-
-  CASE_SHUF(SHUFPD, rmi)
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodeSHUFPMask(getRegOperandNumElts(MI, 64, 0), 64,
-                      MI->getOperand(NumOperands - 1).getImm(), ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  CASE_SHUF(SHUFPS, rri)
-    Src2Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-
-  CASE_SHUF(SHUFPS, rmi)
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodeSHUFPMask(getRegOperandNumElts(MI, 32, 0), 32,
-                      MI->getOperand(NumOperands - 1).getImm(),
-                      ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  CASE_VSHUF(64X2, r)
-    Src2Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-
-  CASE_VSHUF(64X2, m)
-    decodeVSHUF64x2FamilyMask(getRegOperandNumElts(MI, 64, 0), 64,
-                              MI->getOperand(NumOperands - 1).getImm(),
-                              ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  CASE_VSHUF(32X4, r)
-    Src2Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-
-  CASE_VSHUF(32X4, m)
-    decodeVSHUF64x2FamilyMask(getRegOperandNumElts(MI, 32, 0), 32,
-                              MI->getOperand(NumOperands - 1).getImm(),
-                              ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  CASE_UNPCK(UNPCKLPD, r)
-    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-
-  CASE_UNPCK(UNPCKLPD, m)
-    DecodeUNPCKLMask(getRegOperandNumElts(MI, 64, 0), 64, ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  CASE_UNPCK(UNPCKLPS, r)
-    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-
-  CASE_UNPCK(UNPCKLPS, m)
-    DecodeUNPCKLMask(getRegOperandNumElts(MI, 32, 0), 32, ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  CASE_UNPCK(UNPCKHPD, r)
-    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-
-  CASE_UNPCK(UNPCKHPD, m)
-    DecodeUNPCKHMask(getRegOperandNumElts(MI, 64, 0), 64, ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  CASE_UNPCK(UNPCKHPS, r)
-    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    RegForm = true;
-    LLVM_FALLTHROUGH;
-
-  CASE_UNPCK(UNPCKHPS, m)
-    DecodeUNPCKHMask(getRegOperandNumElts(MI, 32, 0), 32, ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  CASE_VPERMILPI(PERMILPS, r)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    LLVM_FALLTHROUGH;
-
-  CASE_VPERMILPI(PERMILPS, m)
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodePSHUFMask(getRegOperandNumElts(MI, 32, 0), 32,
-                      MI->getOperand(NumOperands - 1).getImm(),
-                      ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  CASE_VPERMILPI(PERMILPD, r)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    LLVM_FALLTHROUGH;
-
-  CASE_VPERMILPI(PERMILPD, m)
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodePSHUFMask(getRegOperandNumElts(MI, 64, 0), 64,
-                      MI->getOperand(NumOperands - 1).getImm(),
-                      ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  case X86::VPERM2F128rr:
-  case X86::VPERM2I128rr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    LLVM_FALLTHROUGH;
-
-  case X86::VPERM2F128rm:
-  case X86::VPERM2I128rm:
-    // For instruction comments purpose, assume the 256-bit vector is v4i64.
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodeVPERM2X128Mask(4, MI->getOperand(NumOperands - 1).getImm(),
-                           ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  CASE_VPERM(PERMPD, r)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    LLVM_FALLTHROUGH;
-
-  CASE_VPERM(PERMPD, m)
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodeVPERMMask(getRegOperandNumElts(MI, 64, 0),
-                      MI->getOperand(NumOperands - 1).getImm(),
-                      ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  CASE_VPERM(PERMQ, r)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    LLVM_FALLTHROUGH;
-
-  CASE_VPERM(PERMQ, m)
-    if (MI->getOperand(NumOperands - 1).isImm())
-      DecodeVPERMMask(getRegOperandNumElts(MI, 64, 0),
-                      MI->getOperand(NumOperands - 1).getImm(),
-                      ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  case X86::MOVSDrr:
-  case X86::VMOVSDrr:
-  case X86::VMOVSDZrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    LLVM_FALLTHROUGH;
-
-  case X86::MOVSDrm:
-  case X86::VMOVSDrm:
-  case X86::VMOVSDZrm:
-    DecodeScalarMoveMask(2, nullptr == Src2Name, ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  case X86::MOVSSrr:
-  case X86::VMOVSSrr:
-  case X86::VMOVSSZrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    LLVM_FALLTHROUGH;
-
-  case X86::MOVSSrm:
-  case X86::VMOVSSrm:
-  case X86::VMOVSSZrm:
-    DecodeScalarMoveMask(4, nullptr == Src2Name, ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  case X86::MOVPQI2QIrr:
-  case X86::MOVZPQILo2PQIrr:
-  case X86::VMOVPQI2QIrr:
-  case X86::VMOVPQI2QIZrr:
-  case X86::VMOVZPQILo2PQIrr:
-  case X86::VMOVZPQILo2PQIZrr:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    LLVM_FALLTHROUGH;
-
-  case X86::MOVQI2PQIrm:
-  case X86::VMOVQI2PQIrm:
-  case X86::VMOVQI2PQIZrm:
-    DecodeZeroMoveLowMask(2, ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  case X86::MOVDI2PDIrm:
-  case X86::VMOVDI2PDIrm:
-  case X86::VMOVDI2PDIZrm:
-    DecodeZeroMoveLowMask(4, ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  case X86::EXTRQI:
-    if (MI->getOperand(2).isImm() &&
-        MI->getOperand(3).isImm())
-      DecodeEXTRQIMask(16, 8, MI->getOperand(2).getImm(),
-                       MI->getOperand(3).getImm(), ShuffleMask);
-
-    DestName = getRegName(MI->getOperand(0).getReg());
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    break;
-
-  case X86::INSERTQI:
-    if (MI->getOperand(3).isImm() &&
-        MI->getOperand(4).isImm())
-      DecodeINSERTQIMask(16, 8, MI->getOperand(3).getImm(),
-                         MI->getOperand(4).getImm(), ShuffleMask);
-
-    DestName = getRegName(MI->getOperand(0).getReg());
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    break;
-
-  case X86::VBROADCASTF128:
-  case X86::VBROADCASTI128:
-  CASE_AVX512_INS_COMMON(BROADCASTF64X2, Z128, rm)
-  CASE_AVX512_INS_COMMON(BROADCASTI64X2, Z128, rm)
-    DecodeSubVectorBroadcast(4, 2, ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  CASE_AVX512_INS_COMMON(BROADCASTF64X2, , rm)
-  CASE_AVX512_INS_COMMON(BROADCASTI64X2, , rm)
-    DecodeSubVectorBroadcast(8, 2, ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  CASE_AVX512_INS_COMMON(BROADCASTF64X4, , rm)
-  CASE_AVX512_INS_COMMON(BROADCASTI64X4, , rm)
-    DecodeSubVectorBroadcast(8, 4, ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  CASE_AVX512_INS_COMMON(BROADCASTF32X4, Z256, rm)
-  CASE_AVX512_INS_COMMON(BROADCASTI32X4, Z256, rm)
-    DecodeSubVectorBroadcast(8, 4, ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  CASE_AVX512_INS_COMMON(BROADCASTF32X4, , rm)
-  CASE_AVX512_INS_COMMON(BROADCASTI32X4, , rm)
-    DecodeSubVectorBroadcast(16, 4, ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  CASE_AVX512_INS_COMMON(BROADCASTF32X8, , rm)
-  CASE_AVX512_INS_COMMON(BROADCASTI32X8, , rm)
-    DecodeSubVectorBroadcast(16, 8, ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z128, r)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    LLVM_FALLTHROUGH;
-  CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z128, m)
-    DecodeSubVectorBroadcast(4, 2, ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, r)
-  CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, r)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    LLVM_FALLTHROUGH;
-  CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, m)
-  CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, m)
-    DecodeSubVectorBroadcast(8, 2, ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, r)
-  CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, r)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    LLVM_FALLTHROUGH;
-  CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, m)
-  CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, m)
-    DecodeSubVectorBroadcast(16, 2, ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  CASE_PMOVZX(PMOVZXBW, r)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    LLVM_FALLTHROUGH;
-  CASE_PMOVZX(PMOVZXBW, m)
-    DecodeZeroExtendMask(8, 16, getRegOperandNumElts(MI, 16, 0), ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  CASE_PMOVZX(PMOVZXBD, r)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    LLVM_FALLTHROUGH;
-  CASE_PMOVZX(PMOVZXBD, m)
-    DecodeZeroExtendMask(8, 32, getRegOperandNumElts(MI, 32, 0), ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  CASE_PMOVZX(PMOVZXBQ, r)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    LLVM_FALLTHROUGH;
-  CASE_PMOVZX(PMOVZXBQ, m)
-    DecodeZeroExtendMask(8, 64, getRegOperandNumElts(MI, 64, 0), ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  CASE_PMOVZX(PMOVZXWD, r)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    LLVM_FALLTHROUGH;
-  CASE_PMOVZX(PMOVZXWD, m)
-    DecodeZeroExtendMask(16, 32, getRegOperandNumElts(MI, 32, 0), ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  CASE_PMOVZX(PMOVZXWQ, r)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    LLVM_FALLTHROUGH;
-  CASE_PMOVZX(PMOVZXWQ, m)
-    DecodeZeroExtendMask(16, 64, getRegOperandNumElts(MI, 64, 0), ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
-  CASE_PMOVZX(PMOVZXDQ, r)
-    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    LLVM_FALLTHROUGH;
-  CASE_PMOVZX(PMOVZXDQ, m)
-    DecodeZeroExtendMask(32, 64, getRegOperandNumElts(MI, 64, 0), ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  }
-
-  // The only comments we decode are shuffles, so give up if we were unable to
-  // decode a shuffle mask.
-  if (ShuffleMask.empty())
-    return false;
-
-  if (!DestName) DestName = Src1Name;
-  if (DestName) {
-    OS << DestName;
-    printMasking(OS, MI, MCII);
-  } else
-    OS << "mem";
-
-  OS << " = ";
-
-  // If the two sources are the same, canonicalize the input elements to be
-  // from the first src so that we get larger element spans.
-  if (Src1Name == Src2Name) {
-    for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
-      if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
-          ShuffleMask[i] >= (int)e)   // From second mask.
-        ShuffleMask[i] -= e;
-    }
-  }
-
-  // The shuffle mask specifies which elements of the src1/src2 fill in the
-  // destination, with a few sentinel values.  Loop through and print them
-  // out.
-  for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
-    if (i != 0)
-      OS << ',';
-    if (ShuffleMask[i] == SM_SentinelZero) {
-      OS << "zero";
-      continue;
-    }
-
-    // Otherwise, it must come from src1 or src2.  Print the span of elements
-    // that comes from this src.
-    bool isSrc1 = ShuffleMask[i] < (int)ShuffleMask.size();
-    const char *SrcName = isSrc1 ? Src1Name : Src2Name;
-    OS << (SrcName ? SrcName : "mem") << '[';
-    bool IsFirst = true;
-    while (i != e && (int)ShuffleMask[i] != SM_SentinelZero &&
-           (ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) {
-      if (!IsFirst)
-        OS << ',';
-      else
-        IsFirst = false;
-      if (ShuffleMask[i] == SM_SentinelUndef)
-        OS << "u";
-      else
-        OS << ShuffleMask[i] % ShuffleMask.size();
-      ++i;
-    }
-    OS << ']';
-    --i; // For loop increments element #.
-  }
-
-  // We successfully added a comment to this instruction.
-  return true;
-}
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.h b/lib/Target/X86/InstPrinter/X86InstComments.h
deleted file mode 100644
index 40dffa5fbb8a..000000000000
--- a/lib/Target/X86/InstPrinter/X86InstComments.h
+++ /dev/null
@@ -1,27 +0,0 @@
-//=- X86InstComments.h - Generate verbose-asm comments for instrs -*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This defines functionality used to emit comments about X86 instructions to
-// an output stream for -fverbose-asm.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_X86_INSTPRINTER_X86INSTCOMMENTS_H
-#define LLVM_LIB_TARGET_X86_INSTPRINTER_X86INSTCOMMENTS_H
-
-namespace llvm {
-
-  class MCInst;
-  class MCInstrInfo;
-  class raw_ostream;
-  bool EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
-                              const MCInstrInfo &MCII);
-}
-
-#endif
diff --git a/lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp b/lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp
deleted file mode 100644
index 432cd47ae499..000000000000
--- a/lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp
+++ /dev/null
@@ -1,142 +0,0 @@
-//===--- X86InstPrinterCommon.cpp - X86 assembly instruction printing -----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file includes common code for rendering MCInst instances as Intel-style
-// and Intel-style assembly.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86InstPrinterCommon.h"
-#include "MCTargetDesc/X86BaseInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Casting.h"
-#include <cstdint>
-#include <cassert>
-
-using namespace llvm;
-
-void X86InstPrinterCommon::printSSEAVXCC(const MCInst *MI, unsigned Op,
-                                         raw_ostream &O) {
-  int64_t Imm = MI->getOperand(Op).getImm();
-  switch (Imm) {
-  default: llvm_unreachable("Invalid ssecc/avxcc argument!");
-  case    0: O << "eq"; break;
-  case    1: O << "lt"; break;
-  case    2: O << "le"; break;
-  case    3: O << "unord"; break;
-  case    4: O << "neq"; break;
-  case    5: O << "nlt"; break;
-  case    6: O << "nle"; break;
-  case    7: O << "ord"; break;
-  case    8: O << "eq_uq"; break;
-  case    9: O << "nge"; break;
-  case  0xa: O << "ngt"; break;
-  case  0xb: O << "false"; break;
-  case  0xc: O << "neq_oq"; break;
-  case  0xd: O << "ge"; break;
-  case  0xe: O << "gt"; break;
-  case  0xf: O << "true"; break;
-  case 0x10: O << "eq_os"; break;
-  case 0x11: O << "lt_oq"; break;
-  case 0x12: O << "le_oq"; break;
-  case 0x13: O << "unord_s"; break;
-  case 0x14: O << "neq_us"; break;
-  case 0x15: O << "nlt_uq"; break;
-  case 0x16: O << "nle_uq"; break;
-  case 0x17: O << "ord_s"; break;
-  case 0x18: O << "eq_us"; break;
-  case 0x19: O << "nge_uq"; break;
-  case 0x1a: O << "ngt_uq"; break;
-  case 0x1b: O << "false_os"; break;
-  case 0x1c: O << "neq_os"; break;
-  case 0x1d: O << "ge_oq"; break;
-  case 0x1e: O << "gt_oq"; break;
-  case 0x1f: O << "true_us"; break;
-  }
-}
-
-void X86InstPrinterCommon::printXOPCC(const MCInst *MI, unsigned Op,
-                                      raw_ostream &O) {
-  int64_t Imm = MI->getOperand(Op).getImm();
-  switch (Imm) {
-  default: llvm_unreachable("Invalid xopcc argument!");
-  case 0: O << "lt"; break;
-  case 1: O << "le"; break;
-  case 2: O << "gt"; break;
-  case 3: O << "ge"; break;
-  case 4: O << "eq"; break;
-  case 5: O << "neq"; break;
-  case 6: O << "false"; break;
-  case 7: O << "true"; break;
-  }
-}
-
-void X86InstPrinterCommon::printRoundingControl(const MCInst *MI, unsigned Op,
-                                                raw_ostream &O) {
-  int64_t Imm = MI->getOperand(Op).getImm() & 0x3;
-  switch (Imm) {
-  case 0: O << "{rn-sae}"; break;
-  case 1: O << "{rd-sae}"; break;
-  case 2: O << "{ru-sae}"; break;
-  case 3: O << "{rz-sae}"; break;
-  }
-}
-
-/// printPCRelImm - This is used to print an immediate value that ends up
-/// being encoded as a pc-relative value (e.g. for jumps and calls).  In
-/// Intel-style these print slightly differently than normal immediates.
-/// for example, a $ is not emitted.
-void X86InstPrinterCommon::printPCRelImm(const MCInst *MI, unsigned OpNo,
-                                         raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isImm())
-    O << formatImm(Op.getImm());
-  else {
-    assert(Op.isExpr() && "unknown pcrel immediate operand");
-    // If a symbolic branch target was added as a constant expression then print
-    // that address in hex.
-    const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
-    int64_t Address;
-    if (BranchTarget && BranchTarget->evaluateAsAbsolute(Address)) {
-      O << formatHex((uint64_t)Address);
-    } else {
-      // Otherwise, just print the expression.
-      Op.getExpr()->print(O, &MAI);
-    }
-  }
-}
-
-void X86InstPrinterCommon::printOptionalSegReg(const MCInst *MI, unsigned OpNo,
-                                               raw_ostream &O) {
-  if (MI->getOperand(OpNo).getReg()) {
-    printOperand(MI, OpNo, O);
-    O << ':';
-  }
-}
-
-void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O) {
-  const MCInstrDesc &Desc = MII.get(MI->getOpcode());
-  uint64_t TSFlags = Desc.TSFlags;
-  unsigned Flags = MI->getFlags();
-
-  if ((TSFlags & X86II::LOCK) || (Flags & X86::IP_HAS_LOCK))
-    O << "\tlock\t";
-
-  if ((TSFlags & X86II::NOTRACK) || (Flags & X86::IP_HAS_NOTRACK))
-    O << "\tnotrack\t";
-
-  if (Flags & X86::IP_HAS_REPEAT_NE)
-    O << "\trepne\t";
-  else if (Flags & X86::IP_HAS_REPEAT)
-    O << "\trep\t";
-}
diff --git a/lib/Target/X86/InstPrinter/X86InstPrinterCommon.h b/lib/Target/X86/InstPrinter/X86InstPrinterCommon.h
deleted file mode 100644
index f2875e71f22c..000000000000
--- a/lib/Target/X86/InstPrinter/X86InstPrinterCommon.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===-- X86InstPrinterCommon.cpp - X86 assembly instruction printing ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file includes code common for rendering MCInst instances as AT&T-style
-// and Intel-style assembly.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_X86_INSTPRINTER_X86INSTPRINTERCOMMON_H
-#define LLVM_LIB_TARGET_X86_INSTPRINTER_X86INSTPRINTERCOMMON_H
-
-#include "llvm/MC/MCInstPrinter.h"
-
-namespace llvm {
-
-class X86InstPrinterCommon : public MCInstPrinter {
-public:
-  using MCInstPrinter::MCInstPrinter;
-
-  virtual void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) = 0;
-  void printSSEAVXCC(const MCInst *MI, unsigned Op, raw_ostream &OS);
-  void printXOPCC(const MCInst *MI, unsigned Op, raw_ostream &OS);
-  void printRoundingControl(const MCInst *MI, unsigned Op, raw_ostream &O);
-  void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-protected:
-  void printInstFlags(const MCInst *MI, raw_ostream &O);
-  void printOptionalSegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-};
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_X86_INSTPRINTER_X86ATTINSTPRINTER_H
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
deleted file mode 100644
index 044b71564152..000000000000
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-//===-- X86IntelInstPrinter.cpp - Intel assembly instruction printing -----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file includes code for rendering MCInst instances as Intel-style
-// assembly.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86IntelInstPrinter.h"
-#include "MCTargetDesc/X86BaseInfo.h"
-#include "X86InstComments.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <cassert>
-#include <cstdint>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-#include "X86GenAsmWriter1.inc"
-
-void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  OS << getRegisterName(RegNo);
-}
-
-void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
-                                    StringRef Annot,
-                                    const MCSubtargetInfo &STI) {
-  printInstFlags(MI, OS);
-
-  // In 16-bit mode, print data16 as data32.
-  if (MI->getOpcode() == X86::DATA16_PREFIX &&
-      STI.getFeatureBits()[X86::Mode16Bit]) {
-    OS << "\tdata32";
-  } else
-    printInstruction(MI, OS);
-
-  // Next always print the annotation.
-  printAnnotation(OS, Annot);
-
-  // If verbose assembly is enabled, we can print some informative comments.
-  if (CommentStream)
-    EmitAnyX86InstComments(MI, *CommentStream, MII);
-}
-
-void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                       raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    printRegName(O, Op.getReg());
-  } else if (Op.isImm()) {
-    O << formatImm((int64_t)Op.getImm());
-  } else {
-    assert(Op.isExpr() && "unknown operand kind in printOperand");
-    O << "offset ";
-    Op.getExpr()->print(O, &MAI);
-  }
-}
-
-void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
-                                            raw_ostream &O) {
-  const MCOperand &BaseReg  = MI->getOperand(Op+X86::AddrBaseReg);
-  unsigned ScaleVal         = MI->getOperand(Op+X86::AddrScaleAmt).getImm();
-  const MCOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg);
-  const MCOperand &DispSpec = MI->getOperand(Op+X86::AddrDisp);
-
-  // If this has a segment register, print it.
-  printOptionalSegReg(MI, Op + X86::AddrSegmentReg, O);
-
-  O << '[';
-
-  bool NeedPlus = false;
-  if (BaseReg.getReg()) {
-    printOperand(MI, Op+X86::AddrBaseReg, O);
-    NeedPlus = true;
-  }
-
-  if (IndexReg.getReg()) {
-    if (NeedPlus) O << " + ";
-    if (ScaleVal != 1)
-      O << ScaleVal << '*';
-    printOperand(MI, Op+X86::AddrIndexReg, O);
-    NeedPlus = true;
-  }
-
-  if (!DispSpec.isImm()) {
-    if (NeedPlus) O << " + ";
-    assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
-    DispSpec.getExpr()->print(O, &MAI);
-  } else {
-    int64_t DispVal = DispSpec.getImm();
-    if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
-      if (NeedPlus) {
-        if (DispVal > 0)
-          O << " + ";
-        else {
-          O << " - ";
-          DispVal = -DispVal;
-        }
-      }
-      O << formatImm(DispVal);
-    }
-  }
-
-  O << ']';
-}
-
-void X86IntelInstPrinter::printSrcIdx(const MCInst *MI, unsigned Op,
-                                      raw_ostream &O) {
-  // If this has a segment register, print it.
-  printOptionalSegReg(MI, Op + 1, O);
-  O << '[';
-  printOperand(MI, Op, O);
-  O << ']';
-}
-
-void X86IntelInstPrinter::printDstIdx(const MCInst *MI, unsigned Op,
-                                      raw_ostream &O) {
-  // DI accesses are always ES-based.
-  O << "es:[";
-  printOperand(MI, Op, O);
-  O << ']';
-}
-
-void X86IntelInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
-                                         raw_ostream &O) {
-  const MCOperand &DispSpec = MI->getOperand(Op);
-
-  // If this has a segment register, print it.
-  printOptionalSegReg(MI, Op + 1, O);
-
-  O << '[';
-
-  if (DispSpec.isImm()) {
-    O << formatImm(DispSpec.getImm());
-  } else {
-    assert(DispSpec.isExpr() && "non-immediate displacement?");
-    DispSpec.getExpr()->print(O, &MAI);
-  }
-
-  O << ']';
-}
-
-void X86IntelInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
-                                     raw_ostream &O) {
-  if (MI->getOperand(Op).isExpr())
-    return MI->getOperand(Op).getExpr()->print(O, &MAI);
-
-  O << formatImm(MI->getOperand(Op).getImm() & 0xff);
-}
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
deleted file mode 100644
index 3b34a8052bec..000000000000
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ /dev/null
@@ -1,157 +0,0 @@
-//= X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an X86 MCInst to Intel style .s file syntax.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_X86_INSTPRINTER_X86INTELINSTPRINTER_H
-#define LLVM_LIB_TARGET_X86_INSTPRINTER_X86INTELINSTPRINTER_H
-
-#include "X86InstPrinterCommon.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace llvm {
-
-class X86IntelInstPrinter final : public X86InstPrinterCommon {
-public:
-  X86IntelInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                      const MCRegisterInfo &MRI)
-    : X86InstPrinterCommon(MAI, MII, MRI) {}
-
-  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
-
-  // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
-  static const char *getRegisterName(unsigned RegNo);
-
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) override;
-  void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
-  void printMemOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printSrcIdx(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printDstIdx(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printU8Imm(const MCInst *MI, unsigned Op, raw_ostream &O);
-
-  void printanymem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-
-  void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    printMemReference(MI, OpNo, O);
-  }
-
-  void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "byte ptr ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "word ptr ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "dword ptr ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "qword ptr ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "xmmword ptr ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "ymmword ptr ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printi512mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "zmmword ptr ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "dword ptr ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "qword ptr ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "tbyte ptr ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "xmmword ptr ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "ymmword ptr ";
-    printMemReference(MI, OpNo, O);
-  }
-  void printf512mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "zmmword ptr ";
-    printMemReference(MI, OpNo, O);
-  }
-
-
-  void printSrcIdx8(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "byte ptr ";
-    printSrcIdx(MI, OpNo, O);
-  }
-  void printSrcIdx16(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "word ptr ";
-    printSrcIdx(MI, OpNo, O);
-  }
-  void printSrcIdx32(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "dword ptr ";
-    printSrcIdx(MI, OpNo, O);
-  }
-  void printSrcIdx64(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "qword ptr ";
-    printSrcIdx(MI, OpNo, O);
-  }
-  void printDstIdx8(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "byte ptr ";
-    printDstIdx(MI, OpNo, O);
-  }
-  void printDstIdx16(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "word ptr ";
-    printDstIdx(MI, OpNo, O);
-  }
-  void printDstIdx32(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "dword ptr ";
-    printDstIdx(MI, OpNo, O);
-  }
-  void printDstIdx64(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "qword ptr ";
-    printDstIdx(MI, OpNo, O);
-  }
-  void printMemOffs8(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "byte ptr ";
-    printMemOffset(MI, OpNo, O);
-  }
-  void printMemOffs16(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "word ptr ";
-    printMemOffset(MI, OpNo, O);
-  }
-  void printMemOffs32(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "dword ptr ";
-    printMemOffset(MI, OpNo, O);
-  }
-  void printMemOffs64(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-    O << "qword ptr ";
-    printMemOffset(MI, OpNo, O);
-  }
-};
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_X86_INSTPRINTER_X86INTELINSTPRINTER_H
diff --git a/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
new file mode 100644
index 000000000000..ed2ee55ff2a5
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
@@ -0,0 +1,487 @@
+//===-- X86ATTInstPrinter.cpp - AT&T assembly instruction printing --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as AT&T-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ATTInstPrinter.h"
+#include "X86BaseInfo.h"
+#include "X86InstComments.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cinttypes>
+#include <cstdint>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+// Include the auto-generated portion of the assembly writer.
+#define PRINT_ALIAS_INSTR
+#include "X86GenAsmWriter.inc"
+
+void X86ATTInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << markup("<reg:") << '%' << getRegisterName(RegNo) << markup(">");
+}
+
+void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+                                  StringRef Annot, const MCSubtargetInfo &STI) {
+  // If verbose assembly is enabled, we can print some informative comments.
+  if (CommentStream)
+    HasCustomInstComment = EmitAnyX86InstComments(MI, *CommentStream, MII);
+
+  printInstFlags(MI, OS);
+
+  // Output CALLpcrel32 as "callq" in 64-bit mode.
+  // In Intel annotation it's always emitted as "call".
+  //
+  // TODO: Probably this hack should be redesigned via InstAlias in
+  // InstrInfo.td as soon as Requires clause is supported properly
+  // for InstAlias.
+  if (MI->getOpcode() == X86::CALLpcrel32 &&
+      (STI.getFeatureBits()[X86::Mode64Bit])) {
+    OS << "\tcallq\t";
+    printPCRelImm(MI, 0, OS);
+  }
+  // data16 and data32 both have the same encoding of 0x66. While data32 is
+  // valid only in 16 bit systems, data16 is valid in the rest.
+  // There seems to be some lack of support of the Requires clause that causes
+  // 0x66 to be interpreted as "data16" by the asm printer.
+  // Thus we add an adjustment here in order to print the "right" instruction.
+  else if (MI->getOpcode() == X86::DATA16_PREFIX &&
+           STI.getFeatureBits()[X86::Mode16Bit]) {
+   OS << "\tdata32";
+  }
+  // Try to print any aliases first.
+  else if (!printAliasInstr(MI, OS) &&
+           !printVecCompareInstr(MI, OS))
+    printInstruction(MI, OS);
+
+  // Next always print the annotation.
+  printAnnotation(OS, Annot);
+}
+
+bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI,
+                                             raw_ostream &OS) {
+  if (MI->getNumOperands() == 0 ||
+      !MI->getOperand(MI->getNumOperands() - 1).isImm())
+    return false;
+
+  int64_t Imm = MI->getOperand(MI->getNumOperands() - 1).getImm();
+
+  const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+
+  // Custom print the vector compare instructions to get the immediate
+  // translated into the mnemonic.
+  switch (MI->getOpcode()) {
+  case X86::CMPPDrmi:    case X86::CMPPDrri:
+  case X86::CMPPSrmi:    case X86::CMPPSrri:
+  case X86::CMPSDrm:     case X86::CMPSDrr:
+  case X86::CMPSDrm_Int: case X86::CMPSDrr_Int:
+  case X86::CMPSSrm:     case X86::CMPSSrr:
+  case X86::CMPSSrm_Int: case X86::CMPSSrr_Int:
+    if (Imm >= 0 && Imm <= 7) {
+      OS << '\t';
+      printCMPMnemonic(MI, /*IsVCMP*/false, OS);
+
+      if ((Desc.TSFlags & X86II::FormMask) == X86II::MRMSrcMem) {
+        if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XS)
+          printdwordmem(MI, 2, OS);
+        else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD)
+          printqwordmem(MI, 2, OS);
+        else
+          printxmmwordmem(MI, 2, OS);
+      } else
+        printOperand(MI, 2, OS);
+
+      // Skip operand 1 as its tied to the dest.
+
+      OS << ", ";
+      printOperand(MI, 0, OS);
+      return true;
+    }
+    break;
+
+  case X86::VCMPPDrmi:      case X86::VCMPPDrri:
+  case X86::VCMPPDYrmi:     case X86::VCMPPDYrri:
+  case X86::VCMPPDZ128rmi:  case X86::VCMPPDZ128rri:
+  case X86::VCMPPDZ256rmi:  case X86::VCMPPDZ256rri:
+  case X86::VCMPPDZrmi:     case X86::VCMPPDZrri:
+  case X86::VCMPPSrmi:      case X86::VCMPPSrri:
+  case X86::VCMPPSYrmi:     case X86::VCMPPSYrri:
+  case X86::VCMPPSZ128rmi:  case X86::VCMPPSZ128rri:
+  case X86::VCMPPSZ256rmi:  case X86::VCMPPSZ256rri:
+  case X86::VCMPPSZrmi:     case X86::VCMPPSZrri:
+  case X86::VCMPSDrm:       case X86::VCMPSDrr:
+  case X86::VCMPSDZrm:      case X86::VCMPSDZrr:
+  case X86::VCMPSDrm_Int:   case X86::VCMPSDrr_Int:
+  case X86::VCMPSDZrm_Int:  case X86::VCMPSDZrr_Int:
+  case X86::VCMPSSrm:       case X86::VCMPSSrr:
+  case X86::VCMPSSZrm:      case X86::VCMPSSZrr:
+  case X86::VCMPSSrm_Int:   case X86::VCMPSSrr_Int:
+  case X86::VCMPSSZrm_Int:  case X86::VCMPSSZrr_Int:
+  case X86::VCMPPDZ128rmik: case X86::VCMPPDZ128rrik:
+  case X86::VCMPPDZ256rmik: case X86::VCMPPDZ256rrik:
+  case X86::VCMPPDZrmik:    case X86::VCMPPDZrrik:
+  case X86::VCMPPSZ128rmik: case X86::VCMPPSZ128rrik:
+  case X86::VCMPPSZ256rmik: case X86::VCMPPSZ256rrik:
+  case X86::VCMPPSZrmik:    case X86::VCMPPSZrrik:
+  case X86::VCMPSDZrm_Intk: case X86::VCMPSDZrr_Intk:
+  case X86::VCMPSSZrm_Intk: case X86::VCMPSSZrr_Intk:
+  case X86::VCMPPDZ128rmbi: case X86::VCMPPDZ128rmbik:
+  case X86::VCMPPDZ256rmbi: case X86::VCMPPDZ256rmbik:
+  case X86::VCMPPDZrmbi:    case X86::VCMPPDZrmbik:
+  case X86::VCMPPSZ128rmbi: case X86::VCMPPSZ128rmbik:
+  case X86::VCMPPSZ256rmbi: case X86::VCMPPSZ256rmbik:
+  case X86::VCMPPSZrmbi:    case X86::VCMPPSZrmbik:
+  case X86::VCMPPDZrrib:    case X86::VCMPPDZrribk:
+  case X86::VCMPPSZrrib:    case X86::VCMPPSZrribk:
+  case X86::VCMPSDZrrb_Int: case X86::VCMPSDZrrb_Intk:
+  case X86::VCMPSSZrrb_Int: case X86::VCMPSSZrrb_Intk:
+    if (Imm >= 0 && Imm <= 31) {
+      OS << '\t';
+      printCMPMnemonic(MI, /*IsVCMP*/true, OS);
+
+      unsigned CurOp = (Desc.TSFlags & X86II::EVEX_K) ? 3 : 2;
+
+      if ((Desc.TSFlags & X86II::FormMask) == X86II::MRMSrcMem) {
+        if (Desc.TSFlags & X86II::EVEX_B) {
+          // Broadcast form.
+          // Load size is based on W-bit.
+          if (Desc.TSFlags & X86II::VEX_W)
+            printqwordmem(MI, CurOp--, OS);
+          else
+            printdwordmem(MI, CurOp--, OS);
+
+          // Print the number of elements broadcasted.
+          unsigned NumElts;
+          if (Desc.TSFlags & X86II::EVEX_L2)
+            NumElts = (Desc.TSFlags & X86II::VEX_W) ? 8 : 16;
+          else if (Desc.TSFlags & X86II::VEX_L)
+            NumElts = (Desc.TSFlags & X86II::VEX_W) ? 4 : 8;
+          else
+            NumElts = (Desc.TSFlags & X86II::VEX_W) ? 2 : 4;
+          OS << "{1to" << NumElts << "}";
+        } else {
+          if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XS)
+            printdwordmem(MI, CurOp--, OS);
+          else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD)
+            printqwordmem(MI, CurOp--, OS);
+          else if (Desc.TSFlags & X86II::EVEX_L2)
+            printzmmwordmem(MI, CurOp--, OS);
+          else if (Desc.TSFlags & X86II::VEX_L)
+            printymmwordmem(MI, CurOp--, OS);
+          else
+            printxmmwordmem(MI, CurOp--, OS);
+        }
+      } else {
+        if (Desc.TSFlags & X86II::EVEX_B)
+          OS << "{sae}, ";
+        printOperand(MI, CurOp--, OS);
+      }
+
+      OS << ", ";
+      printOperand(MI, CurOp--, OS);
+      OS << ", ";
+      printOperand(MI, 0, OS);
+      if (CurOp > 0) {
+        // Print mask operand.
+        OS << " {";
+        printOperand(MI, CurOp--, OS);
+        OS << "}";
+      }
+
+      return true;
+    }
+    break;
+
+  case X86::VPCOMBmi:  case X86::VPCOMBri:
+  case X86::VPCOMDmi:  case X86::VPCOMDri:
+  case X86::VPCOMQmi:  case X86::VPCOMQri:
+  case X86::VPCOMUBmi: case X86::VPCOMUBri:
+  case X86::VPCOMUDmi: case X86::VPCOMUDri:
+  case X86::VPCOMUQmi: case X86::VPCOMUQri:
+  case X86::VPCOMUWmi: case X86::VPCOMUWri:
+  case X86::VPCOMWmi:  case X86::VPCOMWri:
+    if (Imm >= 0 && Imm <= 7) {
+      OS << '\t';
+      printVPCOMMnemonic(MI, OS);
+
+      if ((Desc.TSFlags & X86II::FormMask) == X86II::MRMSrcMem)
+        printxmmwordmem(MI, 2, OS);
+      else
+        printOperand(MI, 2, OS);
+
+      OS << ", ";
+      printOperand(MI, 1, OS);
+      OS << ", ";
+      printOperand(MI, 0, OS);
+      return true;
+    }
+    break;
+
+  case X86::VPCMPBZ128rmi:   case X86::VPCMPBZ128rri:
+  case X86::VPCMPBZ256rmi:   case X86::VPCMPBZ256rri:
+  case X86::VPCMPBZrmi:      case X86::VPCMPBZrri:
+  case X86::VPCMPDZ128rmi:   case X86::VPCMPDZ128rri:
+  case X86::VPCMPDZ256rmi:   case X86::VPCMPDZ256rri:
+  case X86::VPCMPDZrmi:      case X86::VPCMPDZrri:
+  case X86::VPCMPQZ128rmi:   case X86::VPCMPQZ128rri:
+  case X86::VPCMPQZ256rmi:   case X86::VPCMPQZ256rri:
+  case X86::VPCMPQZrmi:      case X86::VPCMPQZrri:
+  case X86::VPCMPUBZ128rmi:  case X86::VPCMPUBZ128rri:
+  case X86::VPCMPUBZ256rmi:  case X86::VPCMPUBZ256rri:
+  case X86::VPCMPUBZrmi:     case X86::VPCMPUBZrri:
+  case X86::VPCMPUDZ128rmi:  case X86::VPCMPUDZ128rri:
+  case X86::VPCMPUDZ256rmi:  case X86::VPCMPUDZ256rri:
+  case X86::VPCMPUDZrmi:     case X86::VPCMPUDZrri:
+  case X86::VPCMPUQZ128rmi:  case X86::VPCMPUQZ128rri:
+  case X86::VPCMPUQZ256rmi:  case X86::VPCMPUQZ256rri:
+  case X86::VPCMPUQZrmi:     case X86::VPCMPUQZrri:
+  case X86::VPCMPUWZ128rmi:  case X86::VPCMPUWZ128rri:
+  case X86::VPCMPUWZ256rmi:  case X86::VPCMPUWZ256rri:
+  case X86::VPCMPUWZrmi:     case X86::VPCMPUWZrri:
+  case X86::VPCMPWZ128rmi:   case X86::VPCMPWZ128rri:
+  case X86::VPCMPWZ256rmi:   case X86::VPCMPWZ256rri:
+  case X86::VPCMPWZrmi:      case X86::VPCMPWZrri:
+  case X86::VPCMPBZ128rmik:  case X86::VPCMPBZ128rrik:
+  case X86::VPCMPBZ256rmik:  case X86::VPCMPBZ256rrik:
+  case X86::VPCMPBZrmik:     case X86::VPCMPBZrrik:
+  case X86::VPCMPDZ128rmik:  case X86::VPCMPDZ128rrik:
+  case X86::VPCMPDZ256rmik:  case X86::VPCMPDZ256rrik:
+  case X86::VPCMPDZrmik:     case X86::VPCMPDZrrik:
+  case X86::VPCMPQZ128rmik:  case X86::VPCMPQZ128rrik:
+  case X86::VPCMPQZ256rmik:  case X86::VPCMPQZ256rrik:
+  case X86::VPCMPQZrmik:     case X86::VPCMPQZrrik:
+  case X86::VPCMPUBZ128rmik: case X86::VPCMPUBZ128rrik:
+  case X86::VPCMPUBZ256rmik: case X86::VPCMPUBZ256rrik:
+  case X86::VPCMPUBZrmik:    case X86::VPCMPUBZrrik:
+  case X86::VPCMPUDZ128rmik: case X86::VPCMPUDZ128rrik:
+  case X86::VPCMPUDZ256rmik: case X86::VPCMPUDZ256rrik:
+  case X86::VPCMPUDZrmik:    case X86::VPCMPUDZrrik:
+  case X86::VPCMPUQZ128rmik: case X86::VPCMPUQZ128rrik:
+  case X86::VPCMPUQZ256rmik: case X86::VPCMPUQZ256rrik:
+  case X86::VPCMPUQZrmik:    case X86::VPCMPUQZrrik:
+  case X86::VPCMPUWZ128rmik: case X86::VPCMPUWZ128rrik:
+  case X86::VPCMPUWZ256rmik: case X86::VPCMPUWZ256rrik:
+  case X86::VPCMPUWZrmik:    case X86::VPCMPUWZrrik:
+  case X86::VPCMPWZ128rmik:  case X86::VPCMPWZ128rrik:
+  case X86::VPCMPWZ256rmik:  case X86::VPCMPWZ256rrik:
+  case X86::VPCMPWZrmik:     case X86::VPCMPWZrrik:
+  case X86::VPCMPDZ128rmib:  case X86::VPCMPDZ128rmibk:
+  case X86::VPCMPDZ256rmib:  case X86::VPCMPDZ256rmibk:
+  case X86::VPCMPDZrmib:     case X86::VPCMPDZrmibk:
+  case X86::VPCMPQZ128rmib:  case X86::VPCMPQZ128rmibk:
+  case X86::VPCMPQZ256rmib:  case X86::VPCMPQZ256rmibk:
+  case X86::VPCMPQZrmib:     case X86::VPCMPQZrmibk:
+  case X86::VPCMPUDZ128rmib: case X86::VPCMPUDZ128rmibk:
+  case X86::VPCMPUDZ256rmib: case X86::VPCMPUDZ256rmibk:
+  case X86::VPCMPUDZrmib:    case X86::VPCMPUDZrmibk:
+  case X86::VPCMPUQZ128rmib: case X86::VPCMPUQZ128rmibk:
+  case X86::VPCMPUQZ256rmib: case X86::VPCMPUQZ256rmibk:
+  case X86::VPCMPUQZrmib:    case X86::VPCMPUQZrmibk:
+    if ((Imm >= 0 && Imm <= 2) || (Imm >= 4 && Imm <= 6)) {
+      OS << '\t';
+      printVPCMPMnemonic(MI, OS);
+
+      unsigned CurOp = (Desc.TSFlags & X86II::EVEX_K) ? 3 : 2;
+
+      if ((Desc.TSFlags & X86II::FormMask) == X86II::MRMSrcMem) {
+        if (Desc.TSFlags & X86II::EVEX_B) {
+          // Broadcast form.
+          // Load size is based on W-bit as only D and Q are supported.
+          if (Desc.TSFlags & X86II::VEX_W)
+            printqwordmem(MI, CurOp--, OS);
+          else
+            printdwordmem(MI, CurOp--, OS);
+
+          // Print the number of elements broadcasted.
+          unsigned NumElts;
+          if (Desc.TSFlags & X86II::EVEX_L2)
+            NumElts = (Desc.TSFlags & X86II::VEX_W) ? 8 : 16;
+          else if (Desc.TSFlags & X86II::VEX_L)
+            NumElts = (Desc.TSFlags & X86II::VEX_W) ? 4 : 8;
+          else
+            NumElts = (Desc.TSFlags & X86II::VEX_W) ? 2 : 4;
+          OS << "{1to" << NumElts << "}";
+        } else {
+          if (Desc.TSFlags & X86II::EVEX_L2)
+            printzmmwordmem(MI, CurOp--, OS);
+          else if (Desc.TSFlags & X86II::VEX_L)
+            printymmwordmem(MI, CurOp--, OS);
+          else
+            printxmmwordmem(MI, CurOp--, OS);
+        }
+      } else {
+        printOperand(MI, CurOp--, OS);
+      }
+
+      OS << ", ";
+      printOperand(MI, CurOp--, OS);
+      OS << ", ";
+      printOperand(MI, 0, OS);
+      if (CurOp > 0) {
+        // Print mask operand.
+        OS << " {";
+        printOperand(MI, CurOp--, OS);
+        OS << "}";
+      }
+
+      return true;
+    }
+    break;
+  }
+
+  return false;
+}
+
+void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    printRegName(O, Op.getReg());
+  } else if (Op.isImm()) {
+    // Print immediates as signed values.
+    int64_t Imm = Op.getImm();
+    O << markup("<imm:") << '$' << formatImm(Imm) << markup(">");
+
+    // TODO: This should be in a helper function in the base class, so it can
+    // be used by other printers.
+
+    // If there are no instruction-specific comments, add a comment clarifying
+    // the hex value of the immediate operand when it isn't in the range
+    // [-256,255].
+    if (CommentStream && !HasCustomInstComment && (Imm > 255 || Imm < -256)) {
+      // Don't print unnecessary hex sign bits.
+      if (Imm == (int16_t)(Imm))
+        *CommentStream << format("imm = 0x%" PRIX16 "\n", (uint16_t)Imm);
+      else if (Imm == (int32_t)(Imm))
+        *CommentStream << format("imm = 0x%" PRIX32 "\n", (uint32_t)Imm);
+      else
+        *CommentStream << format("imm = 0x%" PRIX64 "\n", (uint64_t)Imm);
+    }
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    O << markup("<imm:") << '$';
+    Op.getExpr()->print(O, &MAI);
+    O << markup(">");
+  }
+}
+
+void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
+                                          raw_ostream &O) {
+  const MCOperand &BaseReg = MI->getOperand(Op + X86::AddrBaseReg);
+  const MCOperand &IndexReg = MI->getOperand(Op + X86::AddrIndexReg);
+  const MCOperand &DispSpec = MI->getOperand(Op + X86::AddrDisp);
+
+  O << markup("<mem:");
+
+  // If this has a segment register, print it.
+  printOptionalSegReg(MI, Op + X86::AddrSegmentReg, O);
+
+  if (DispSpec.isImm()) {
+    int64_t DispVal = DispSpec.getImm();
+    if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
+      O << formatImm(DispVal);
+  } else {
+    assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
+    DispSpec.getExpr()->print(O, &MAI);
+  }
+
+  if (IndexReg.getReg() || BaseReg.getReg()) {
+    O << '(';
+    if (BaseReg.getReg())
+      printOperand(MI, Op + X86::AddrBaseReg, O);
+
+    if (IndexReg.getReg()) {
+      O << ',';
+      printOperand(MI, Op + X86::AddrIndexReg, O);
+      unsigned ScaleVal = MI->getOperand(Op + X86::AddrScaleAmt).getImm();
+      if (ScaleVal != 1) {
+        O << ',' << markup("<imm:") << ScaleVal // never printed in hex.
+          << markup(">");
+      }
+    }
+    O << ')';
+  }
+
+  O << markup(">");
+}
+
+void X86ATTInstPrinter::printSrcIdx(const MCInst *MI, unsigned Op,
+                                    raw_ostream &O) {
+  O << markup("<mem:");
+
+  // If this has a segment register, print it.
+  printOptionalSegReg(MI, Op + 1, O);
+
+  O << "(";
+  printOperand(MI, Op, O);
+  O << ")";
+
+  O << markup(">");
+}
+
+void X86ATTInstPrinter::printDstIdx(const MCInst *MI, unsigned Op,
+                                    raw_ostream &O) {
+  O << markup("<mem:");
+
+  O << "%es:(";
+  printOperand(MI, Op, O);
+  O << ")";
+
+  O << markup(">");
+}
+
+void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
+                                       raw_ostream &O) {
+  const MCOperand &DispSpec = MI->getOperand(Op);
+
+  O << markup("<mem:");
+
+  // If this has a segment register, print it.
+  printOptionalSegReg(MI, Op + 1, O);
+
+  if (DispSpec.isImm()) {
+    O << formatImm(DispSpec.getImm());
+  } else {
+    assert(DispSpec.isExpr() && "non-immediate displacement?");
+    DispSpec.getExpr()->print(O, &MAI);
+  }
+
+  O << markup(">");
+}
+
+void X86ATTInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
+                                   raw_ostream &O) {
+  if (MI->getOperand(Op).isExpr())
+    return printOperand(MI, Op, O);
+
+  O << markup("<imm:") << '$' << formatImm(MI->getOperand(Op).getImm() & 0xff)
+    << markup(">");
+}
+
+void X86ATTInstPrinter::printSTiRegOperand(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &OS) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  unsigned Reg = Op.getReg();
+  // Override the default printing to print st(0) instead st.
+  if (Reg == X86::ST0)
+    OS << markup("<reg:") << "%st(0)" << markup(">");
+  else
+    printRegName(OS, Reg);
+}
diff --git a/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h b/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h
new file mode 100644
index 000000000000..747ddd30a2d9
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h
@@ -0,0 +1,124 @@
+//=- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax --*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an X86 MCInst to AT&T style .s file syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86ATTINSTPRINTER_H
+#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86ATTINSTPRINTER_H
+
+#include "X86InstPrinterCommon.h"
+
+namespace llvm {
+
+class X86ATTInstPrinter final : public X86InstPrinterCommon {
+public:
+  X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                    const MCRegisterInfo &MRI)
+      : X86InstPrinterCommon(MAI, MII, MRI), HasCustomInstComment(false) {}
+
+  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  bool printVecCompareInstr(const MCInst *MI, raw_ostream &OS);
+
+  // Autogenerated by tblgen, returns true if we successfully printed an
+  // alias.
+  bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
+  void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+                               unsigned PrintMethodIdx, raw_ostream &O);
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &OS);
+  static const char *getRegisterName(unsigned RegNo);
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS) override;
+  void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS);
+  void printMemOffset(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
+  void printSrcIdx(const MCInst *MI, unsigned Op, raw_ostream &O);
+  void printDstIdx(const MCInst *MI, unsigned Op, raw_ostream &O);
+  void printU8Imm(const MCInst *MI, unsigned Op, raw_ostream &OS);
+  void printSTiRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
+
+  void printanymem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+
+  void printbytemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printdwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printqwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printxmmwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printymmwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printzmmwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printtbytemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+
+  void printSrcIdx8(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printSrcIdx(MI, OpNo, O);
+  }
+  void printSrcIdx16(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printSrcIdx(MI, OpNo, O);
+  }
+  void printSrcIdx32(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printSrcIdx(MI, OpNo, O);
+  }
+  void printSrcIdx64(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printSrcIdx(MI, OpNo, O);
+  }
+  void printDstIdx8(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printDstIdx(MI, OpNo, O);
+  }
+  void printDstIdx16(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printDstIdx(MI, OpNo, O);
+  }
+  void printDstIdx32(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printDstIdx(MI, OpNo, O);
+  }
+  void printDstIdx64(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printDstIdx(MI, OpNo, O);
+  }
+  void printMemOffs8(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemOffset(MI, OpNo, O);
+  }
+  void printMemOffs16(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemOffset(MI, OpNo, O);
+  }
+  void printMemOffs32(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemOffset(MI, OpNo, O);
+  }
+  void printMemOffs64(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemOffset(MI, OpNo, O);
+  }
+
+private:
+  bool HasCustomInstComment;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_X86_MCTARGETDESC_X86ATTINSTPRINTER_H
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 64e6fb9f0375..54413fa1a02f 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -1,9 +1,8 @@
 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -13,6 +12,7 @@
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixupKindInfo.h"
@@ -26,18 +26,20 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-static unsigned getFixupKindLog2Size(unsigned Kind) {
+static unsigned getFixupKindSize(unsigned Kind) {
   switch (Kind) {
   default:
     llvm_unreachable("invalid fixup kind!");
+  case FK_NONE:
+    return 0;
   case FK_PCRel_1:
   case FK_SecRel_1:
   case FK_Data_1:
-    return 0;
+    return 1;
   case FK_PCRel_2:
   case FK_SecRel_2:
   case FK_Data_2:
-    return 1;
+    return 2;
   case FK_PCRel_4:
   case X86::reloc_riprel_4byte:
   case X86::reloc_riprel_4byte_relax:
@@ -49,12 +51,12 @@ static unsigned getFixupKindLog2Size(unsigned Kind) {
   case X86::reloc_branch_4byte_pcrel:
   case FK_SecRel_4:
   case FK_Data_4:
-    return 2;
+    return 4;
   case FK_PCRel_8:
   case FK_SecRel_8:
   case FK_Data_8:
   case X86::reloc_global_offset_table8:
-    return 3;
+    return 8;
   }
 }
 
@@ -77,6 +79,8 @@ public:
     return X86::NumTargetFixupKinds;
   }
 
+  Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
+
   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
     const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
         {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
@@ -99,11 +103,14 @@ public:
     return Infos[Kind - FirstTargetFixupKind];
   }
 
+  bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+                             const MCValue &Target) override;
+
   void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
                   const MCValue &Target, MutableArrayRef<char> Data,
                   uint64_t Value, bool IsResolved,
                   const MCSubtargetInfo *STI) const override {
-    unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
+    unsigned Size = getFixupKindSize(Fixup.getKind());
 
     assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
 
@@ -111,7 +118,7 @@ public:
     // Specifically ignore overflow/underflow as long as the leakage is
     // limited to the lower bits. This is to remain compatible with
     // other assemblers.
-    assert(isIntN(Size * 8 + 1, Value) &&
+    assert((Size == 0 || isIntN(Size * 8 + 1, Value)) &&
            "Value does not fit in the Fixup field");
 
     for (unsigned i = 0; i != Size; ++i)
@@ -137,40 +144,10 @@ static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool is16BitMode) {
   switch (Op) {
   default:
     return Op;
-  case X86::JAE_1:
-    return (is16BitMode) ? X86::JAE_2 : X86::JAE_4;
-  case X86::JA_1:
-    return (is16BitMode) ? X86::JA_2 : X86::JA_4;
-  case X86::JBE_1:
-    return (is16BitMode) ? X86::JBE_2 : X86::JBE_4;
-  case X86::JB_1:
-    return (is16BitMode) ? X86::JB_2 : X86::JB_4;
-  case X86::JE_1:
-    return (is16BitMode) ? X86::JE_2 : X86::JE_4;
-  case X86::JGE_1:
-    return (is16BitMode) ? X86::JGE_2 : X86::JGE_4;
-  case X86::JG_1:
-    return (is16BitMode) ? X86::JG_2 : X86::JG_4;
-  case X86::JLE_1:
-    return (is16BitMode) ? X86::JLE_2 : X86::JLE_4;
-  case X86::JL_1:
-    return (is16BitMode) ? X86::JL_2 : X86::JL_4;
+  case X86::JCC_1:
+    return (is16BitMode) ? X86::JCC_2 : X86::JCC_4;
   case X86::JMP_1:
     return (is16BitMode) ? X86::JMP_2 : X86::JMP_4;
-  case X86::JNE_1:
-    return (is16BitMode) ? X86::JNE_2 : X86::JNE_4;
-  case X86::JNO_1:
-    return (is16BitMode) ? X86::JNO_2 : X86::JNO_4;
-  case X86::JNP_1:
-    return (is16BitMode) ? X86::JNP_2 : X86::JNP_4;
-  case X86::JNS_1:
-    return (is16BitMode) ? X86::JNS_2 : X86::JNS_4;
-  case X86::JO_1:
-    return (is16BitMode) ? X86::JO_2 : X86::JO_4;
-  case X86::JP_1:
-    return (is16BitMode) ? X86::JP_2 : X86::JP_4;
-  case X86::JS_1:
-    return (is16BitMode) ? X86::JS_2 : X86::JS_4;
   }
 }
 
@@ -266,6 +243,25 @@ static unsigned getRelaxedOpcode(const MCInst &Inst, bool is16BitMode) {
   return getRelaxedOpcodeBranch(Inst, is16BitMode);
 }
 
+Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
+  if (STI.getTargetTriple().isOSBinFormatELF()) {
+    if (STI.getTargetTriple().getArch() == Triple::x86_64) {
+      if (Name == "R_X86_64_NONE")
+        return FK_NONE;
+    } else {
+      if (Name == "R_386_NONE")
+        return FK_NONE;
+    }
+  }
+  return MCAsmBackend::getFixupKind(Name);
+}
+
+bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
+                                          const MCFixup &Fixup,
+                                          const MCValue &) {
+  return Fixup.getKind() == FK_NONE;
+}
+
 bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst,
                                       const MCSubtargetInfo &STI) const {
   // Branches can always be relaxed in either mode.
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index c85ce9bbd5a4..6bd6c6cac7df 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -1,9 +1,8 @@
 //===-- X86BaseInfo.h - Top level definitions for X86 -------- --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -49,7 +48,8 @@ namespace X86 {
     TO_NEG_INF = 1,
     TO_POS_INF = 2,
     TO_ZERO = 3,
-    CUR_DIRECTION = 4
+    CUR_DIRECTION = 4,
+    NO_EXC = 8
   };
 
   /// The constants to describe instr prefixes if there are
@@ -60,9 +60,46 @@ namespace X86 {
     IP_HAS_REPEAT_NE = 4,
     IP_HAS_REPEAT = 8,
     IP_HAS_LOCK = 16,
-    NO_SCHED_INFO = 32, // Don't add sched comment to the current instr because
-                        // it was already added
-    IP_HAS_NOTRACK = 64
+    IP_HAS_NOTRACK = 32,
+    IP_USE_VEX3 = 64,
+  };
+
+  enum OperandType : unsigned {
+    /// AVX512 embedded rounding control. This should only have values 0-3.
+    OPERAND_ROUNDING_CONTROL = MCOI::OPERAND_FIRST_TARGET,
+    OPERAND_COND_CODE,
+  };
+
+  // X86 specific condition code. These correspond to X86_*_COND in
+  // X86InstrInfo.td. They must be kept in synch.
+  enum CondCode {
+    COND_O = 0,
+    COND_NO = 1,
+    COND_B = 2,
+    COND_AE = 3,
+    COND_E = 4,
+    COND_NE = 5,
+    COND_BE = 6,
+    COND_A = 7,
+    COND_S = 8,
+    COND_NS = 9,
+    COND_P = 10,
+    COND_NP = 11,
+    COND_L = 12,
+    COND_GE = 13,
+    COND_LE = 14,
+    COND_G = 15,
+    LAST_VALID_COND = COND_G,
+
+    // Artificial condition codes. These are used by AnalyzeBranch
+    // to indicate a block terminated with two conditional branches that together
+    // form a compound condition. They occur in code using FCMP_OEQ or FCMP_UNE,
+    // which can't be represented on x86 with a single condition. These
+    // are never used in MachineInstrs and are inverses of one another.
+    COND_NE_OR_P,
+    COND_E_AND_NP,
+
+    COND_INVALID
   };
 } // end namespace X86;
 
@@ -285,6 +322,10 @@ namespace X86II {
     /// manual, this operand is described as pntr16:32 and pntr16:16
     RawFrmImm16 = 8,
 
+    /// AddCCFrm - This form is used for Jcc that encode the condition code
+    /// in the lower 4 bits of the opcode.
+    AddCCFrm = 9,
+
     /// MRM[0-7][rm] - These forms are used to represent instructions that use
     /// a Mod/RM byte, and use the middle field to hold extended opcode
     /// information.  In the intel manual these are represented as /0, /1, ...
@@ -310,10 +351,21 @@ namespace X86II {
     ///
     MRMSrcMemOp4   = 35,
 
+    /// MRMSrcMemCC - This form is used for instructions that use the Mod/RM
+    /// byte to specify the operands and also encodes a condition code.
+    ///
+    MRMSrcMemCC    = 36,
+
+    /// MRMXm - This form is used for instructions that use the Mod/RM byte
+    /// to specify a memory source, but doesn't use the middle field. And has
+    /// a condition code.
+    ///
+    MRMXmCC = 38,
+
     /// MRMXm - This form is used for instructions that use the Mod/RM byte
     /// to specify a memory source, but doesn't use the middle field.
     ///
-    MRMXm = 39, // Instruction that uses Mod/RM but not the middle field.
+    MRMXm = 39,
 
     // Next, instructions that operate on a memory r/m operand...
     MRM0m = 40,  MRM1m = 41,  MRM2m = 42,  MRM3m = 43, // Format /0 /1 /2 /3
@@ -339,10 +391,21 @@ namespace X86II {
     ///
     MRMSrcRegOp4   = 51,
 
+    /// MRMSrcRegCC - This form is used for instructions that use the Mod/RM
+    /// byte to specify the operands and also encodes a condition code
+    ///
+    MRMSrcRegCC    = 52,
+
+    /// MRMXCCr - This form is used for instructions that use the Mod/RM byte
+    /// to specify a register source, but doesn't use the middle field. And has
+    /// a condition code.
+    ///
+    MRMXrCC = 54,
+
     /// MRMXr - This form is used for instructions that use the Mod/RM byte
     /// to specify a register source, but doesn't use the middle field.
     ///
-    MRMXr = 55, // Instruction that uses Mod/RM but not the middle field.
+    MRMXr = 55,
 
     // Instructions that operate on a register r/m operand...
     MRM0r = 56,  MRM1r = 57,  MRM2r = 58,  MRM3r = 59, // Format /0 /1 /2 /3
@@ -681,8 +744,7 @@ namespace X86II {
       // has it as the last op.
       if (NumOps == 9 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0 &&
           (Desc.getOperandConstraint(3, MCOI::TIED_TO) == 1 ||
-           Desc.getOperandConstraint(8, MCOI::TIED_TO) == 1) &&
-          "Instruction with 2 defs isn't gather?")
+           Desc.getOperandConstraint(8, MCOI::TIED_TO) == 1))
         return 2;
       return 0;
     }
@@ -711,6 +773,7 @@ namespace X86II {
     case X86II::RawFrmSrc:
     case X86II::RawFrmDst:
     case X86II::RawFrmDstSrc:
+    case X86II::AddCCFrm:
       return -1;
     case X86II::MRMDestMem:
       return 0;
@@ -724,16 +787,23 @@ namespace X86II {
     case X86II::MRMSrcMemOp4:
       // Skip registers encoded in reg, VEX_VVVV, and I8IMM.
       return 3;
+    case X86II::MRMSrcMemCC:
+      // Start from 1, skip any registers encoded in VEX_VVVV or I8IMM, or a
+      // mask register.
+      return 1;
     case X86II::MRMDestReg:
     case X86II::MRMSrcReg:
     case X86II::MRMSrcReg4VOp3:
     case X86II::MRMSrcRegOp4:
+    case X86II::MRMSrcRegCC:
+    case X86II::MRMXrCC:
     case X86II::MRMXr:
     case X86II::MRM0r: case X86II::MRM1r:
     case X86II::MRM2r: case X86II::MRM3r:
     case X86II::MRM4r: case X86II::MRM5r:
     case X86II::MRM6r: case X86II::MRM7r:
       return -1;
+    case X86II::MRMXmCC:
     case X86II::MRMXm:
     case X86II::MRM0m: case X86II::MRM1m:
     case X86II::MRM2m: case X86II::MRM3m:
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index b724a89f81d2..232a06593238 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===-- X86ELFObjectWriter.cpp - X86 ELF Writer ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -45,7 +44,7 @@ X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI,
                               (EMachine != ELF::EM_386) &&
                                   (EMachine != ELF::EM_IAMCU)) {}
 
-enum X86_64RelType { RT64_64, RT64_32, RT64_32S, RT64_16, RT64_8 };
+enum X86_64RelType { RT64_NONE, RT64_64, RT64_32, RT64_32S, RT64_16, RT64_8 };
 
 static X86_64RelType getType64(unsigned Kind,
                                MCSymbolRefExpr::VariantKind &Modifier,
@@ -53,6 +52,8 @@ static X86_64RelType getType64(unsigned Kind,
   switch (Kind) {
   default:
     llvm_unreachable("Unimplemented");
+  case FK_NONE:
+    return RT64_NONE;
   case X86::reloc_global_offset_table8:
     Modifier = MCSymbolRefExpr::VK_GOT;
     IsPCRel = true;
@@ -103,6 +104,10 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
   case MCSymbolRefExpr::VK_None:
   case MCSymbolRefExpr::VK_X86_ABS8:
     switch (Type) {
+    case RT64_NONE:
+      if (Modifier == MCSymbolRefExpr::VK_None)
+        return ELF::R_X86_64_NONE;
+      llvm_unreachable("Unimplemented");
     case RT64_64:
       return IsPCRel ? ELF::R_X86_64_PC64 : ELF::R_X86_64_64;
     case RT64_32:
@@ -114,6 +119,7 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
     case RT64_8:
       return IsPCRel ? ELF::R_X86_64_PC8 : ELF::R_X86_64_8;
     }
+    llvm_unreachable("unexpected relocation type!");
   case MCSymbolRefExpr::VK_GOT:
     switch (Type) {
     case RT64_64:
@@ -123,8 +129,10 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
     case RT64_32S:
     case RT64_16:
     case RT64_8:
+    case RT64_NONE:
       llvm_unreachable("Unimplemented");
     }
+    llvm_unreachable("unexpected relocation type!");
   case MCSymbolRefExpr::VK_GOTOFF:
     assert(Type == RT64_64);
     assert(!IsPCRel);
@@ -139,8 +147,10 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
     case RT64_32S:
     case RT64_16:
     case RT64_8:
+    case RT64_NONE:
       llvm_unreachable("Unimplemented");
     }
+    llvm_unreachable("unexpected relocation type!");
   case MCSymbolRefExpr::VK_DTPOFF:
     assert(!IsPCRel);
     switch (Type) {
@@ -151,8 +161,10 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
     case RT64_32S:
     case RT64_16:
     case RT64_8:
+    case RT64_NONE:
       llvm_unreachable("Unimplemented");
     }
+    llvm_unreachable("unexpected relocation type!");
   case MCSymbolRefExpr::VK_SIZE:
     assert(!IsPCRel);
     switch (Type) {
@@ -163,8 +175,10 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
     case RT64_32S:
     case RT64_16:
     case RT64_8:
+    case RT64_NONE:
       llvm_unreachable("Unimplemented");
     }
+    llvm_unreachable("unexpected relocation type!");
   case MCSymbolRefExpr::VK_TLSCALL:
     return ELF::R_X86_64_TLSDESC_CALL;
   case MCSymbolRefExpr::VK_TLSDESC:
@@ -197,13 +211,16 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
     case X86::reloc_riprel_4byte_movq_load:
       return ELF::R_X86_64_REX_GOTPCRELX;
     }
+    llvm_unreachable("unexpected relocation type!");
   }
 }
 
-enum X86_32RelType { RT32_32, RT32_16, RT32_8 };
+enum X86_32RelType { RT32_NONE, RT32_32, RT32_16, RT32_8 };
 
 static X86_32RelType getType32(X86_64RelType T) {
   switch (T) {
+  case RT64_NONE:
+    return RT32_NONE;
   case RT64_64:
     llvm_unreachable("Unimplemented");
   case RT64_32:
@@ -227,6 +244,10 @@ static unsigned getRelocType32(MCContext &Ctx,
   case MCSymbolRefExpr::VK_None:
   case MCSymbolRefExpr::VK_X86_ABS8:
     switch (Type) {
+    case RT32_NONE:
+      if (Modifier == MCSymbolRefExpr::VK_None)
+        return ELF::R_386_NONE;
+      llvm_unreachable("Unimplemented");
     case RT32_32:
       return IsPCRel ? ELF::R_386_PC32 : ELF::R_386_32;
     case RT32_16:
@@ -234,6 +255,7 @@ static unsigned getRelocType32(MCContext &Ctx,
     case RT32_8:
       return IsPCRel ? ELF::R_386_PC8 : ELF::R_386_8;
     }
+    llvm_unreachable("unexpected relocation type!");
   case MCSymbolRefExpr::VK_GOT:
     assert(Type == RT32_32);
     if (IsPCRel)
@@ -249,6 +271,10 @@ static unsigned getRelocType32(MCContext &Ctx,
     assert(Type == RT32_32);
     assert(!IsPCRel);
     return ELF::R_386_GOTOFF;
+  case MCSymbolRefExpr::VK_TLSCALL:
+    return ELF::R_386_TLS_DESC_CALL;
+  case MCSymbolRefExpr::VK_TLSDESC:
+    return ELF::R_386_TLS_GOTDESC;
   case MCSymbolRefExpr::VK_TPOFF:
     assert(Type == RT32_32);
     assert(!IsPCRel);
diff --git a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
index 3c04b13e002e..2d5217115d07 100644
--- a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
+++ b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
@@ -1,9 +1,8 @@
 //===-- X86FixupKinds.h - X86 Specific Fixup Entries ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
new file mode 100644
index 000000000000..73b1969b4e82
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
@@ -0,0 +1,1322 @@
+//===-- X86InstComments.cpp - Generate verbose-asm comments for instrs ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstComments.h"
+#include "X86ATTInstPrinter.h"
+#include "X86BaseInfo.h"
+#include "X86MCTargetDesc.h"
+#include "Utils/X86ShuffleDecode.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define CASE_SSE_INS_COMMON(Inst, src)            \
+  case X86::Inst##src:
+
+#define CASE_AVX_INS_COMMON(Inst, Suffix, src)    \
+  case X86::V##Inst##Suffix##src:
+
+#define CASE_MASK_INS_COMMON(Inst, Suffix, src)   \
+  case X86::V##Inst##Suffix##src##k:
+
+#define CASE_MASKZ_INS_COMMON(Inst, Suffix, src)  \
+  case X86::V##Inst##Suffix##src##kz:
+
+#define CASE_AVX512_INS_COMMON(Inst, Suffix, src) \
+  CASE_AVX_INS_COMMON(Inst, Suffix, src)          \
+  CASE_MASK_INS_COMMON(Inst, Suffix, src)         \
+  CASE_MASKZ_INS_COMMON(Inst, Suffix, src)
+
+#define CASE_MOVDUP(Inst, src)                    \
+  CASE_AVX512_INS_COMMON(Inst, Z, r##src)         \
+  CASE_AVX512_INS_COMMON(Inst, Z256, r##src)      \
+  CASE_AVX512_INS_COMMON(Inst, Z128, r##src)      \
+  CASE_AVX_INS_COMMON(Inst, , r##src)             \
+  CASE_AVX_INS_COMMON(Inst, Y, r##src)            \
+  CASE_SSE_INS_COMMON(Inst, r##src)
+
+#define CASE_MASK_MOVDUP(Inst, src)               \
+  CASE_MASK_INS_COMMON(Inst, Z, r##src)           \
+  CASE_MASK_INS_COMMON(Inst, Z256, r##src)        \
+  CASE_MASK_INS_COMMON(Inst, Z128, r##src)
+
+#define CASE_MASKZ_MOVDUP(Inst, src)              \
+  CASE_MASKZ_INS_COMMON(Inst, Z, r##src)          \
+  CASE_MASKZ_INS_COMMON(Inst, Z256, r##src)       \
+  CASE_MASKZ_INS_COMMON(Inst, Z128, r##src)
+
+#define CASE_PMOVZX(Inst, src)                    \
+  CASE_AVX512_INS_COMMON(Inst, Z, r##src)         \
+  CASE_AVX512_INS_COMMON(Inst, Z256, r##src)      \
+  CASE_AVX512_INS_COMMON(Inst, Z128, r##src)      \
+  CASE_AVX_INS_COMMON(Inst, , r##src)             \
+  CASE_AVX_INS_COMMON(Inst, Y, r##src)            \
+  CASE_SSE_INS_COMMON(Inst, r##src)
+
+#define CASE_MASK_PMOVZX(Inst, src)               \
+  CASE_MASK_INS_COMMON(Inst, Z, r##src)           \
+  CASE_MASK_INS_COMMON(Inst, Z256, r##src)        \
+  CASE_MASK_INS_COMMON(Inst, Z128, r##src)
+
+#define CASE_MASKZ_PMOVZX(Inst, src)              \
+  CASE_MASKZ_INS_COMMON(Inst, Z, r##src)          \
+  CASE_MASKZ_INS_COMMON(Inst, Z256, r##src)       \
+  CASE_MASKZ_INS_COMMON(Inst, Z128, r##src)
+
+#define CASE_UNPCK(Inst, src)                     \
+  CASE_AVX512_INS_COMMON(Inst, Z, r##src)         \
+  CASE_AVX512_INS_COMMON(Inst, Z256, r##src)      \
+  CASE_AVX512_INS_COMMON(Inst, Z128, r##src)      \
+  CASE_AVX_INS_COMMON(Inst, , r##src)             \
+  CASE_AVX_INS_COMMON(Inst, Y, r##src)            \
+  CASE_SSE_INS_COMMON(Inst, r##src)
+
+#define CASE_MASK_UNPCK(Inst, src)                \
+  CASE_MASK_INS_COMMON(Inst, Z, r##src)           \
+  CASE_MASK_INS_COMMON(Inst, Z256, r##src)        \
+  CASE_MASK_INS_COMMON(Inst, Z128, r##src)
+
+#define CASE_MASKZ_UNPCK(Inst, src)               \
+  CASE_MASKZ_INS_COMMON(Inst, Z, r##src)          \
+  CASE_MASKZ_INS_COMMON(Inst, Z256, r##src)       \
+  CASE_MASKZ_INS_COMMON(Inst, Z128, r##src)
+
+#define CASE_SHUF(Inst, suf)                      \
+  CASE_AVX512_INS_COMMON(Inst, Z, suf)            \
+  CASE_AVX512_INS_COMMON(Inst, Z256, suf)         \
+  CASE_AVX512_INS_COMMON(Inst, Z128, suf)         \
+  CASE_AVX_INS_COMMON(Inst, , suf)                \
+  CASE_AVX_INS_COMMON(Inst, Y, suf)               \
+  CASE_SSE_INS_COMMON(Inst, suf)
+
+#define CASE_MASK_SHUF(Inst, src)                 \
+  CASE_MASK_INS_COMMON(Inst, Z, r##src##i)        \
+  CASE_MASK_INS_COMMON(Inst, Z256, r##src##i)     \
+  CASE_MASK_INS_COMMON(Inst, Z128, r##src##i)
+
+#define CASE_MASKZ_SHUF(Inst, src)                \
+  CASE_MASKZ_INS_COMMON(Inst, Z, r##src##i)       \
+  CASE_MASKZ_INS_COMMON(Inst, Z256, r##src##i)    \
+  CASE_MASKZ_INS_COMMON(Inst, Z128, r##src##i)
+
+#define CASE_VPERMILPI(Inst, src)                 \
+  CASE_AVX512_INS_COMMON(Inst, Z, src##i)         \
+  CASE_AVX512_INS_COMMON(Inst, Z256, src##i)      \
+  CASE_AVX512_INS_COMMON(Inst, Z128, src##i)      \
+  CASE_AVX_INS_COMMON(Inst, , src##i)             \
+  CASE_AVX_INS_COMMON(Inst, Y, src##i)
+
+#define CASE_MASK_VPERMILPI(Inst, src)            \
+  CASE_MASK_INS_COMMON(Inst, Z, src##i)           \
+  CASE_MASK_INS_COMMON(Inst, Z256, src##i)        \
+  CASE_MASK_INS_COMMON(Inst, Z128, src##i)
+
+#define CASE_MASKZ_VPERMILPI(Inst, src)           \
+  CASE_MASKZ_INS_COMMON(Inst, Z, src##i)          \
+  CASE_MASKZ_INS_COMMON(Inst, Z256, src##i)       \
+  CASE_MASKZ_INS_COMMON(Inst, Z128, src##i)
+
+#define CASE_VPERM(Inst, src)                     \
+  CASE_AVX512_INS_COMMON(Inst, Z, src##i)         \
+  CASE_AVX512_INS_COMMON(Inst, Z256, src##i)      \
+  CASE_AVX_INS_COMMON(Inst, Y, src##i)
+
+#define CASE_MASK_VPERM(Inst, src)                \
+  CASE_MASK_INS_COMMON(Inst, Z, src##i)           \
+  CASE_MASK_INS_COMMON(Inst, Z256, src##i)
+
+#define CASE_MASKZ_VPERM(Inst, src)               \
+  CASE_MASKZ_INS_COMMON(Inst, Z, src##i)          \
+  CASE_MASKZ_INS_COMMON(Inst, Z256, src##i)
+
+#define CASE_VSHUF(Inst, src)                          \
+  CASE_AVX512_INS_COMMON(SHUFF##Inst, Z, r##src##i)    \
+  CASE_AVX512_INS_COMMON(SHUFI##Inst, Z, r##src##i)    \
+  CASE_AVX512_INS_COMMON(SHUFF##Inst, Z256, r##src##i) \
+  CASE_AVX512_INS_COMMON(SHUFI##Inst, Z256, r##src##i)
+
+#define CASE_MASK_VSHUF(Inst, src)                    \
+  CASE_MASK_INS_COMMON(SHUFF##Inst, Z, r##src##i)     \
+  CASE_MASK_INS_COMMON(SHUFI##Inst, Z, r##src##i)     \
+  CASE_MASK_INS_COMMON(SHUFF##Inst, Z256, r##src##i)  \
+  CASE_MASK_INS_COMMON(SHUFI##Inst, Z256, r##src##i)
+
+#define CASE_MASKZ_VSHUF(Inst, src)                   \
+  CASE_MASKZ_INS_COMMON(SHUFF##Inst, Z, r##src##i)    \
+  CASE_MASKZ_INS_COMMON(SHUFI##Inst, Z, r##src##i)    \
+  CASE_MASKZ_INS_COMMON(SHUFF##Inst, Z256, r##src##i) \
+  CASE_MASKZ_INS_COMMON(SHUFI##Inst, Z256, r##src##i)
+
+#define CASE_AVX512_FMA(Inst, suf)                \
+  CASE_AVX512_INS_COMMON(Inst, Z, suf)            \
+  CASE_AVX512_INS_COMMON(Inst, Z256, suf)         \
+  CASE_AVX512_INS_COMMON(Inst, Z128, suf)
+
+#define CASE_FMA(Inst, suf)                       \
+  CASE_AVX512_FMA(Inst, suf)                      \
+  CASE_AVX_INS_COMMON(Inst, , suf)                \
+  CASE_AVX_INS_COMMON(Inst, Y, suf)
+
+#define CASE_FMA_PACKED_REG(Inst)                 \
+  CASE_FMA(Inst##PD, r)                           \
+  CASE_FMA(Inst##PS, r)
+
+#define CASE_FMA_PACKED_MEM(Inst)                 \
+  CASE_FMA(Inst##PD, m)                           \
+  CASE_FMA(Inst##PS, m)                           \
+  CASE_AVX512_FMA(Inst##PD, mb)                   \
+  CASE_AVX512_FMA(Inst##PS, mb)
+
+#define CASE_FMA_SCALAR_REG(Inst)                 \
+  CASE_AVX_INS_COMMON(Inst##SD, , r)              \
+  CASE_AVX_INS_COMMON(Inst##SS, , r)              \
+  CASE_AVX_INS_COMMON(Inst##SD, , r_Int)          \
+  CASE_AVX_INS_COMMON(Inst##SS, , r_Int)          \
+  CASE_AVX_INS_COMMON(Inst##SD, Z, r)             \
+  CASE_AVX_INS_COMMON(Inst##SS, Z, r)             \
+  CASE_AVX512_INS_COMMON(Inst##SD, Z, r_Int)      \
+  CASE_AVX512_INS_COMMON(Inst##SS, Z, r_Int)
+
+#define CASE_FMA_SCALAR_MEM(Inst)                 \
+  CASE_AVX_INS_COMMON(Inst##SD, , m)              \
+  CASE_AVX_INS_COMMON(Inst##SS, , m)              \
+  CASE_AVX_INS_COMMON(Inst##SD, , m_Int)          \
+  CASE_AVX_INS_COMMON(Inst##SS, , m_Int)          \
+  CASE_AVX_INS_COMMON(Inst##SD, Z, m)             \
+  CASE_AVX_INS_COMMON(Inst##SS, Z, m)             \
+  CASE_AVX512_INS_COMMON(Inst##SD, Z, m_Int)      \
+  CASE_AVX512_INS_COMMON(Inst##SS, Z, m_Int)
+
+static unsigned getVectorRegSize(unsigned RegNo) {
+  if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31)
+    return 512;
+  if (X86::YMM0 <= RegNo && RegNo <= X86::YMM31)
+    return 256;
+  if (X86::XMM0 <= RegNo && RegNo <= X86::XMM31)
+    return 128;
+  if (X86::MM0 <= RegNo && RegNo <= X86::MM7)
+    return 64;
+
+  llvm_unreachable("Unknown vector reg!");
+}
+
+static unsigned getRegOperandNumElts(const MCInst *MI, unsigned ScalarSize,
+                                     unsigned OperandIndex) {
+  unsigned OpReg = MI->getOperand(OperandIndex).getReg();
+  return getVectorRegSize(OpReg) / ScalarSize;
+}
+
+static const char *getRegName(unsigned Reg) {
+  return X86ATTInstPrinter::getRegisterName(Reg);
+}
+
+/// Wraps the destination register name with AVX512 mask/maskz filtering.
+static void printMasking(raw_ostream &OS, const MCInst *MI,
+                         const MCInstrInfo &MCII) {
+  const MCInstrDesc &Desc = MCII.get(MI->getOpcode());
+  uint64_t TSFlags = Desc.TSFlags;
+
+  if (!(TSFlags & X86II::EVEX_K))
+    return;
+
+  bool MaskWithZero = (TSFlags & X86II::EVEX_Z);
+  unsigned MaskOp = Desc.getNumDefs();
+
+  if (Desc.getOperandConstraint(MaskOp, MCOI::TIED_TO) != -1)
+    ++MaskOp;
+
+  const char *MaskRegName = getRegName(MI->getOperand(MaskOp).getReg());
+
+  // MASK: zmmX {%kY}
+  OS << " {%" << MaskRegName << "}";
+
+  // MASKZ: zmmX {%kY} {z}
+  if (MaskWithZero)
+    OS << " {z}";
+}
+
+static bool printFMA3Comments(const MCInst *MI, raw_ostream &OS) {
+  const char *Mul1Name = nullptr, *Mul2Name = nullptr, *AccName = nullptr;
+  unsigned NumOperands = MI->getNumOperands();
+  bool RegForm = false;
+  bool Negate = false;
+  StringRef AccStr = "+";
+
+  // The operands for FMA instructions without rounding fall into two forms.
+  //  dest, src1, src2, src3
+  //  dest, src1, mask, src2, src3
+  // Where src3 is either a register or 5 memory address operands. So to find
+  // dest and src1 we can index from the front. To find src2 and src3 we can
+  // index from the end by taking into account memory vs register form when
+  // finding src2.
+
+  switch (MI->getOpcode()) {
+  default:
+    return false;
+  CASE_FMA_PACKED_REG(FMADD132)
+  CASE_FMA_SCALAR_REG(FMADD132)
+    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+  CASE_FMA_PACKED_MEM(FMADD132)
+  CASE_FMA_SCALAR_MEM(FMADD132)
+    AccName = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    Mul1Name = getRegName(MI->getOperand(1).getReg());
+    break;
+
+  CASE_FMA_PACKED_REG(FMADD213)
+  CASE_FMA_SCALAR_REG(FMADD213)
+    AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+  CASE_FMA_PACKED_MEM(FMADD213)
+  CASE_FMA_SCALAR_MEM(FMADD213)
+    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    Mul2Name = getRegName(MI->getOperand(1).getReg());
+    break;
+
+  CASE_FMA_PACKED_REG(FMADD231)
+  CASE_FMA_SCALAR_REG(FMADD231)
+    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+  CASE_FMA_PACKED_MEM(FMADD231)
+  CASE_FMA_SCALAR_MEM(FMADD231)
+    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    AccName = getRegName(MI->getOperand(1).getReg());
+    break;
+
+  CASE_FMA_PACKED_REG(FMSUB132)
+  CASE_FMA_SCALAR_REG(FMSUB132)
+    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+  CASE_FMA_PACKED_MEM(FMSUB132)
+  CASE_FMA_SCALAR_MEM(FMSUB132)
+    AccName = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    Mul1Name = getRegName(MI->getOperand(1).getReg());
+    AccStr = "-";
+    break;
+
+  CASE_FMA_PACKED_REG(FMSUB213)
+  CASE_FMA_SCALAR_REG(FMSUB213)
+    AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+  CASE_FMA_PACKED_MEM(FMSUB213)
+  CASE_FMA_SCALAR_MEM(FMSUB213)
+    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    Mul2Name = getRegName(MI->getOperand(1).getReg());
+    AccStr = "-";
+    break;
+
+  CASE_FMA_PACKED_REG(FMSUB231)
+  CASE_FMA_SCALAR_REG(FMSUB231)
+    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+  CASE_FMA_PACKED_MEM(FMSUB231)
+  CASE_FMA_SCALAR_MEM(FMSUB231)
+    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    AccName = getRegName(MI->getOperand(1).getReg());
+    AccStr = "-";
+    break;
+
+  CASE_FMA_PACKED_REG(FNMADD132)
+  CASE_FMA_SCALAR_REG(FNMADD132)
+    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+  CASE_FMA_PACKED_MEM(FNMADD132)
+  CASE_FMA_SCALAR_MEM(FNMADD132)
+    AccName = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    Mul1Name = getRegName(MI->getOperand(1).getReg());
+    Negate = true;
+    break;
+
+  CASE_FMA_PACKED_REG(FNMADD213)
+  CASE_FMA_SCALAR_REG(FNMADD213)
+    AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+  CASE_FMA_PACKED_MEM(FNMADD213)
+  CASE_FMA_SCALAR_MEM(FNMADD213)
+    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    Mul2Name = getRegName(MI->getOperand(1).getReg());
+    Negate = true;
+    break;
+
+  CASE_FMA_PACKED_REG(FNMADD231)
+  CASE_FMA_SCALAR_REG(FNMADD231)
+    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+  CASE_FMA_PACKED_MEM(FNMADD231)
+  CASE_FMA_SCALAR_MEM(FNMADD231)
+    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    AccName = getRegName(MI->getOperand(1).getReg());
+    Negate = true;
+    break;
+
+  CASE_FMA_PACKED_REG(FNMSUB132)
+  CASE_FMA_SCALAR_REG(FNMSUB132)
+    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+  CASE_FMA_PACKED_MEM(FNMSUB132)
+  CASE_FMA_SCALAR_MEM(FNMSUB132)
+    AccName = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    Mul1Name = getRegName(MI->getOperand(1).getReg());
+    AccStr = "-";
+    Negate = true;
+    break;
+
+  CASE_FMA_PACKED_REG(FNMSUB213)
+  CASE_FMA_SCALAR_REG(FNMSUB213)
+    AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+  CASE_FMA_PACKED_MEM(FNMSUB213)
+  CASE_FMA_SCALAR_MEM(FNMSUB213)
+    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    Mul2Name = getRegName(MI->getOperand(1).getReg());
+    AccStr = "-";
+    Negate = true;
+    break;
+
+  CASE_FMA_PACKED_REG(FNMSUB231)
+  CASE_FMA_SCALAR_REG(FNMSUB231)
+    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+  CASE_FMA_PACKED_MEM(FNMSUB231)
+  CASE_FMA_SCALAR_MEM(FNMSUB231)
+    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    AccName = getRegName(MI->getOperand(1).getReg());
+    AccStr = "-";
+    Negate = true;
+    break;
+
+  CASE_FMA_PACKED_REG(FMADDSUB132)
+    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+  CASE_FMA_PACKED_MEM(FMADDSUB132)
+    AccName = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    Mul1Name = getRegName(MI->getOperand(1).getReg());
+    AccStr = "+/-";
+    break;
+
+  CASE_FMA_PACKED_REG(FMADDSUB213)
+    AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+  CASE_FMA_PACKED_MEM(FMADDSUB213)
+    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    Mul2Name = getRegName(MI->getOperand(1).getReg());
+    AccStr = "+/-";
+    break;
+
+  CASE_FMA_PACKED_REG(FMADDSUB231)
+    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+  CASE_FMA_PACKED_MEM(FMADDSUB231)
+    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    AccName = getRegName(MI->getOperand(1).getReg());
+    AccStr = "+/-";
+    break;
+
+  CASE_FMA_PACKED_REG(FMSUBADD132)
+    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+  CASE_FMA_PACKED_MEM(FMSUBADD132)
+    AccName = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    Mul1Name = getRegName(MI->getOperand(1).getReg());
+    AccStr = "-/+";
+    break;
+
+  CASE_FMA_PACKED_REG(FMSUBADD213)
+    AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+  CASE_FMA_PACKED_MEM(FMSUBADD213)
+    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    Mul2Name = getRegName(MI->getOperand(1).getReg());
+    AccStr = "-/+";
+    break;
+
+  CASE_FMA_PACKED_REG(FMSUBADD231)
+    Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+  CASE_FMA_PACKED_MEM(FMSUBADD231)
+    Mul1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    AccName = getRegName(MI->getOperand(1).getReg());
+    AccStr = "-/+";
+    break;
+  }
+
+  const char *DestName = getRegName(MI->getOperand(0).getReg());
+
+  if (!Mul1Name) Mul1Name = "mem";
+  if (!Mul2Name) Mul2Name = "mem";
+  if (!AccName)  AccName = "mem";
+
+  OS << DestName << " = ";
+  // TODO: Print masking information?
+
+  if (Negate)
+    OS << '-';
+
+  OS << '(' << Mul1Name << " * " << Mul2Name << ") " << AccStr << ' '
+     << AccName;
+
+  return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Top Level Entrypoint
+//===----------------------------------------------------------------------===//
+
+/// EmitAnyX86InstComments - This function decodes x86 instructions and prints
+/// newline terminated strings to the specified string if desired.  This
+/// information is shown in disassembly dumps when verbose assembly is enabled.
+bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+                                  const MCInstrInfo &MCII) {
+  // If this is a shuffle operation, the switch should fill in this state.
+  SmallVector<int, 8> ShuffleMask;
+  const char *DestName = nullptr, *Src1Name = nullptr, *Src2Name = nullptr;
+  unsigned NumOperands = MI->getNumOperands();
+  bool RegForm = false;
+
+  if (printFMA3Comments(MI, OS))
+    return true;
+
+  switch (MI->getOpcode()) {
+  default:
+    // Not an instruction for which we can decode comments.
+    return false;
+
+  case X86::BLENDPDrri:
+  case X86::VBLENDPDrri:
+  case X86::VBLENDPDYrri:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    LLVM_FALLTHROUGH;
+  case X86::BLENDPDrmi:
+  case X86::VBLENDPDrmi:
+  case X86::VBLENDPDYrmi:
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodeBLENDMask(getRegOperandNumElts(MI, 64, 0),
+                      MI->getOperand(NumOperands - 1).getImm(),
+                      ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  case X86::BLENDPSrri:
+  case X86::VBLENDPSrri:
+  case X86::VBLENDPSYrri:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    LLVM_FALLTHROUGH;
+  case X86::BLENDPSrmi:
+  case X86::VBLENDPSrmi:
+  case X86::VBLENDPSYrmi:
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodeBLENDMask(getRegOperandNumElts(MI, 32, 0),
+                      MI->getOperand(NumOperands - 1).getImm(),
+                      ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  case X86::PBLENDWrri:
+  case X86::VPBLENDWrri:
+  case X86::VPBLENDWYrri:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    LLVM_FALLTHROUGH;
+  case X86::PBLENDWrmi:
+  case X86::VPBLENDWrmi:
+  case X86::VPBLENDWYrmi:
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodeBLENDMask(getRegOperandNumElts(MI, 16, 0),
+                      MI->getOperand(NumOperands - 1).getImm(),
+                      ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  case X86::VPBLENDDrri:
+  case X86::VPBLENDDYrri:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    LLVM_FALLTHROUGH;
+  case X86::VPBLENDDrmi:
+  case X86::VPBLENDDYrmi:
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodeBLENDMask(getRegOperandNumElts(MI, 32, 0),
+                      MI->getOperand(NumOperands - 1).getImm(),
+                      ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  case X86::INSERTPSrr:
+  case X86::VINSERTPSrr:
+  case X86::VINSERTPSZrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    LLVM_FALLTHROUGH;
+  case X86::INSERTPSrm:
+  case X86::VINSERTPSrm:
+  case X86::VINSERTPSZrm:
+    DestName = getRegName(MI->getOperand(0).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodeINSERTPSMask(MI->getOperand(NumOperands - 1).getImm(),
+                         ShuffleMask);
+    break;
+
+  case X86::MOVLHPSrr:
+  case X86::VMOVLHPSrr:
+  case X86::VMOVLHPSZrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeMOVLHPSMask(2, ShuffleMask);
+    break;
+
+  case X86::MOVHLPSrr:
+  case X86::VMOVHLPSrr:
+  case X86::VMOVHLPSZrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeMOVHLPSMask(2, ShuffleMask);
+    break;
+
+  case X86::MOVHPDrm:
+  case X86::VMOVHPDrm:
+  case X86::VMOVHPDZ128rm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeInsertElementMask(2, 1, 1, ShuffleMask);
+    break;
+
+  case X86::MOVHPSrm:
+  case X86::VMOVHPSrm:
+  case X86::VMOVHPSZ128rm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeInsertElementMask(4, 2, 2, ShuffleMask);
+    break;
+
+  case X86::MOVLPDrm:
+  case X86::VMOVLPDrm:
+  case X86::VMOVLPDZ128rm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeInsertElementMask(2, 0, 1, ShuffleMask);
+    break;
+
+  case X86::MOVLPSrm:
+  case X86::VMOVLPSrm:
+  case X86::VMOVLPSZ128rm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeInsertElementMask(4, 0, 2, ShuffleMask);
+    break;
+
+  CASE_MOVDUP(MOVSLDUP, r)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    LLVM_FALLTHROUGH;
+
+  CASE_MOVDUP(MOVSLDUP, m)
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeMOVSLDUPMask(getRegOperandNumElts(MI, 32, 0), ShuffleMask);
+    break;
+
+  CASE_MOVDUP(MOVSHDUP, r)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    LLVM_FALLTHROUGH;
+
+  CASE_MOVDUP(MOVSHDUP, m)
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeMOVSHDUPMask(getRegOperandNumElts(MI, 32, 0), ShuffleMask);
+    break;
+
+  CASE_MOVDUP(MOVDDUP, r)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    LLVM_FALLTHROUGH;
+
+  CASE_MOVDUP(MOVDDUP, m)
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeMOVDDUPMask(getRegOperandNumElts(MI, 64, 0), ShuffleMask);
+    break;
+
+  case X86::PSLLDQri:
+  case X86::VPSLLDQri:
+  case X86::VPSLLDQYri:
+  case X86::VPSLLDQZ128rr:
+  case X86::VPSLLDQZ256rr:
+  case X86::VPSLLDQZrr:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    LLVM_FALLTHROUGH;
+  case X86::VPSLLDQZ128rm:
+  case X86::VPSLLDQZ256rm:
+  case X86::VPSLLDQZrm:
+    DestName = getRegName(MI->getOperand(0).getReg());
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodePSLLDQMask(getRegOperandNumElts(MI, 8, 0),
+                       MI->getOperand(NumOperands - 1).getImm(),
+                       ShuffleMask);
+    break;
+
+  case X86::PSRLDQri:
+  case X86::VPSRLDQri:
+  case X86::VPSRLDQYri:
+  case X86::VPSRLDQZ128rr:
+  case X86::VPSRLDQZ256rr:
+  case X86::VPSRLDQZrr:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    LLVM_FALLTHROUGH;
+  case X86::VPSRLDQZ128rm:
+  case X86::VPSRLDQZ256rm:
+  case X86::VPSRLDQZrm:
+    DestName = getRegName(MI->getOperand(0).getReg());
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodePSRLDQMask(getRegOperandNumElts(MI, 8, 0),
+                       MI->getOperand(NumOperands - 1).getImm(),
+                       ShuffleMask);
+    break;
+
+  CASE_SHUF(PALIGNR, rri)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_SHUF(PALIGNR, rmi)
+    Src2Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodePALIGNRMask(getRegOperandNumElts(MI, 8, 0),
+                        MI->getOperand(NumOperands - 1).getImm(),
+                        ShuffleMask);
+    break;
+
+  CASE_AVX512_INS_COMMON(ALIGNQ, Z, rri)
+  CASE_AVX512_INS_COMMON(ALIGNQ, Z256, rri)
+  CASE_AVX512_INS_COMMON(ALIGNQ, Z128, rri)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_AVX512_INS_COMMON(ALIGNQ, Z, rmi)
+  CASE_AVX512_INS_COMMON(ALIGNQ, Z256, rmi)
+  CASE_AVX512_INS_COMMON(ALIGNQ, Z128, rmi)
+    Src2Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodeVALIGNMask(getRegOperandNumElts(MI, 64, 0),
+                       MI->getOperand(NumOperands - 1).getImm(),
+                       ShuffleMask);
+    break;
+
+  CASE_AVX512_INS_COMMON(ALIGND, Z, rri)
+  CASE_AVX512_INS_COMMON(ALIGND, Z256, rri)
+  CASE_AVX512_INS_COMMON(ALIGND, Z128, rri)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_AVX512_INS_COMMON(ALIGND, Z, rmi)
+  CASE_AVX512_INS_COMMON(ALIGND, Z256, rmi)
+  CASE_AVX512_INS_COMMON(ALIGND, Z128, rmi)
+    Src2Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodeVALIGNMask(getRegOperandNumElts(MI, 32, 0),
+                       MI->getOperand(NumOperands - 1).getImm(),
+                       ShuffleMask);
+    break;
+
+  CASE_SHUF(PSHUFD, ri)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
+    LLVM_FALLTHROUGH;
+
+  CASE_SHUF(PSHUFD, mi)
+    DestName = getRegName(MI->getOperand(0).getReg());
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodePSHUFMask(getRegOperandNumElts(MI, 32, 0), 32,
+                      MI->getOperand(NumOperands - 1).getImm(),
+                      ShuffleMask);
+    break;
+
+  CASE_SHUF(PSHUFHW, ri)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
+    LLVM_FALLTHROUGH;
+
+  CASE_SHUF(PSHUFHW, mi)
+    DestName = getRegName(MI->getOperand(0).getReg());
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodePSHUFHWMask(getRegOperandNumElts(MI, 16, 0),
+                        MI->getOperand(NumOperands - 1).getImm(),
+                        ShuffleMask);
+    break;
+
+  CASE_SHUF(PSHUFLW, ri)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
+    LLVM_FALLTHROUGH;
+
+  CASE_SHUF(PSHUFLW, mi)
+    DestName = getRegName(MI->getOperand(0).getReg());
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodePSHUFLWMask(getRegOperandNumElts(MI, 16, 0),
+                        MI->getOperand(NumOperands - 1).getImm(),
+                        ShuffleMask);
+    break;
+
+  case X86::MMX_PSHUFWri:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    LLVM_FALLTHROUGH;
+
+  case X86::MMX_PSHUFWmi:
+    DestName = getRegName(MI->getOperand(0).getReg());
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodePSHUFMask(4, 16, MI->getOperand(NumOperands - 1).getImm(),
+                      ShuffleMask);
+    break;
+
+  case X86::PSWAPDrr:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    LLVM_FALLTHROUGH;
+
+  case X86::PSWAPDrm:
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodePSWAPMask(2, ShuffleMask);
+    break;
+
+  CASE_UNPCK(PUNPCKHBW, r)
+  case X86::MMX_PUNPCKHBWirr:
+    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_UNPCK(PUNPCKHBW, m)
+  case X86::MMX_PUNPCKHBWirm:
+    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(getRegOperandNumElts(MI, 8, 0), 8, ShuffleMask);
+    break;
+
+  CASE_UNPCK(PUNPCKHWD, r)
+  case X86::MMX_PUNPCKHWDirr:
+    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_UNPCK(PUNPCKHWD, m)
+  case X86::MMX_PUNPCKHWDirm:
+    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(getRegOperandNumElts(MI, 16, 0), 16, ShuffleMask);
+    break;
+
+  CASE_UNPCK(PUNPCKHDQ, r)
+  case X86::MMX_PUNPCKHDQirr:
+    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_UNPCK(PUNPCKHDQ, m)
+  case X86::MMX_PUNPCKHDQirm:
+    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(getRegOperandNumElts(MI, 32, 0), 32, ShuffleMask);
+    break;
+
+  CASE_UNPCK(PUNPCKHQDQ, r)
+    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_UNPCK(PUNPCKHQDQ, m)
+    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(getRegOperandNumElts(MI, 64, 0), 64, ShuffleMask);
+    break;
+
+  CASE_UNPCK(PUNPCKLBW, r)
+  case X86::MMX_PUNPCKLBWirr:
+    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_UNPCK(PUNPCKLBW, m)
+  case X86::MMX_PUNPCKLBWirm:
+    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(getRegOperandNumElts(MI, 8, 0), 8, ShuffleMask);
+    break;
+
+  CASE_UNPCK(PUNPCKLWD, r)
+  case X86::MMX_PUNPCKLWDirr:
+    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_UNPCK(PUNPCKLWD, m)
+  case X86::MMX_PUNPCKLWDirm:
+    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(getRegOperandNumElts(MI, 16, 0), 16, ShuffleMask);
+    break;
+
+  CASE_UNPCK(PUNPCKLDQ, r)
+  case X86::MMX_PUNPCKLDQirr:
+    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_UNPCK(PUNPCKLDQ, m)
+  case X86::MMX_PUNPCKLDQirm:
+    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(getRegOperandNumElts(MI, 32, 0), 32, ShuffleMask);
+    break;
+
+  CASE_UNPCK(PUNPCKLQDQ, r)
+    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_UNPCK(PUNPCKLQDQ, m)
+    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(getRegOperandNumElts(MI, 64, 0), 64, ShuffleMask);
+    break;
+
+  CASE_SHUF(SHUFPD, rri)
+    Src2Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_SHUF(SHUFPD, rmi)
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodeSHUFPMask(getRegOperandNumElts(MI, 64, 0), 64,
+                      MI->getOperand(NumOperands - 1).getImm(), ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  CASE_SHUF(SHUFPS, rri)
+    Src2Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_SHUF(SHUFPS, rmi)
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodeSHUFPMask(getRegOperandNumElts(MI, 32, 0), 32,
+                      MI->getOperand(NumOperands - 1).getImm(),
+                      ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  CASE_VSHUF(64X2, r)
+    Src2Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_VSHUF(64X2, m)
+    decodeVSHUF64x2FamilyMask(getRegOperandNumElts(MI, 64, 0), 64,
+                              MI->getOperand(NumOperands - 1).getImm(),
+                              ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  CASE_VSHUF(32X4, r)
+    Src2Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_VSHUF(32X4, m)
+    decodeVSHUF64x2FamilyMask(getRegOperandNumElts(MI, 32, 0), 32,
+                              MI->getOperand(NumOperands - 1).getImm(),
+                              ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  CASE_UNPCK(UNPCKLPD, r)
+    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_UNPCK(UNPCKLPD, m)
+    DecodeUNPCKLMask(getRegOperandNumElts(MI, 64, 0), 64, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  CASE_UNPCK(UNPCKLPS, r)
+    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_UNPCK(UNPCKLPS, m)
+    DecodeUNPCKLMask(getRegOperandNumElts(MI, 32, 0), 32, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  CASE_UNPCK(UNPCKHPD, r)
+    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_UNPCK(UNPCKHPD, m)
+    DecodeUNPCKHMask(getRegOperandNumElts(MI, 64, 0), 64, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  CASE_UNPCK(UNPCKHPS, r)
+    Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_UNPCK(UNPCKHPS, m)
+    DecodeUNPCKHMask(getRegOperandNumElts(MI, 32, 0), 32, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  CASE_VPERMILPI(PERMILPS, r)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
+    LLVM_FALLTHROUGH;
+
+  CASE_VPERMILPI(PERMILPS, m)
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodePSHUFMask(getRegOperandNumElts(MI, 32, 0), 32,
+                      MI->getOperand(NumOperands - 1).getImm(),
+                      ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  CASE_VPERMILPI(PERMILPD, r)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
+    LLVM_FALLTHROUGH;
+
+  CASE_VPERMILPI(PERMILPD, m)
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodePSHUFMask(getRegOperandNumElts(MI, 64, 0), 64,
+                      MI->getOperand(NumOperands - 1).getImm(),
+                      ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  case X86::VPERM2F128rr:
+  case X86::VPERM2I128rr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    LLVM_FALLTHROUGH;
+
+  case X86::VPERM2F128rm:
+  case X86::VPERM2I128rm:
+    // For instruction comments purpose, assume the 256-bit vector is v4i64.
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodeVPERM2X128Mask(4, MI->getOperand(NumOperands - 1).getImm(),
+                           ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  CASE_VPERM(PERMPD, r)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
+    LLVM_FALLTHROUGH;
+
+  CASE_VPERM(PERMPD, m)
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodeVPERMMask(getRegOperandNumElts(MI, 64, 0),
+                      MI->getOperand(NumOperands - 1).getImm(),
+                      ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  CASE_VPERM(PERMQ, r)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
+    LLVM_FALLTHROUGH;
+
+  CASE_VPERM(PERMQ, m)
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodeVPERMMask(getRegOperandNumElts(MI, 64, 0),
+                      MI->getOperand(NumOperands - 1).getImm(),
+                      ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  case X86::MOVSDrr:
+  case X86::VMOVSDrr:
+  case X86::VMOVSDZrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    LLVM_FALLTHROUGH;
+
+  case X86::MOVSDrm_alt:
+  case X86::MOVSDrm:
+  case X86::VMOVSDrm_alt:
+  case X86::VMOVSDrm:
+  case X86::VMOVSDZrm:
+  case X86::VMOVSDZrm_alt:
+    DecodeScalarMoveMask(2, nullptr == Src2Name, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  case X86::MOVSSrr:
+  case X86::VMOVSSrr:
+  case X86::VMOVSSZrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    LLVM_FALLTHROUGH;
+
+  case X86::MOVSSrm:
+  case X86::MOVSSrm_alt:
+  case X86::VMOVSSrm:
+  case X86::VMOVSSrm_alt:
+  case X86::VMOVSSZrm:
+  case X86::VMOVSSZrm_alt:
+    DecodeScalarMoveMask(4, nullptr == Src2Name, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  case X86::MOVPQI2QIrr:
+  case X86::MOVZPQILo2PQIrr:
+  case X86::VMOVPQI2QIrr:
+  case X86::VMOVPQI2QIZrr:
+  case X86::VMOVZPQILo2PQIrr:
+  case X86::VMOVZPQILo2PQIZrr:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    LLVM_FALLTHROUGH;
+
+  case X86::MOVQI2PQIrm:
+  case X86::VMOVQI2PQIrm:
+  case X86::VMOVQI2PQIZrm:
+    DecodeZeroMoveLowMask(2, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  case X86::MOVDI2PDIrm:
+  case X86::VMOVDI2PDIrm:
+  case X86::VMOVDI2PDIZrm:
+    DecodeZeroMoveLowMask(4, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  case X86::EXTRQI:
+    if (MI->getOperand(2).isImm() &&
+        MI->getOperand(3).isImm())
+      DecodeEXTRQIMask(16, 8, MI->getOperand(2).getImm(),
+                       MI->getOperand(3).getImm(), ShuffleMask);
+
+    DestName = getRegName(MI->getOperand(0).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    break;
+
+  case X86::INSERTQI:
+    if (MI->getOperand(3).isImm() &&
+        MI->getOperand(4).isImm())
+      DecodeINSERTQIMask(16, 8, MI->getOperand(3).getImm(),
+                         MI->getOperand(4).getImm(), ShuffleMask);
+
+    DestName = getRegName(MI->getOperand(0).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    break;
+
+  case X86::VBROADCASTF128:
+  case X86::VBROADCASTI128:
+  CASE_AVX512_INS_COMMON(BROADCASTF64X2, Z128, rm)
+  CASE_AVX512_INS_COMMON(BROADCASTI64X2, Z128, rm)
+    DecodeSubVectorBroadcast(4, 2, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  CASE_AVX512_INS_COMMON(BROADCASTF64X2, , rm)
+  CASE_AVX512_INS_COMMON(BROADCASTI64X2, , rm)
+    DecodeSubVectorBroadcast(8, 2, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  CASE_AVX512_INS_COMMON(BROADCASTF64X4, , rm)
+  CASE_AVX512_INS_COMMON(BROADCASTI64X4, , rm)
+    DecodeSubVectorBroadcast(8, 4, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  CASE_AVX512_INS_COMMON(BROADCASTF32X4, Z256, rm)
+  CASE_AVX512_INS_COMMON(BROADCASTI32X4, Z256, rm)
+    DecodeSubVectorBroadcast(8, 4, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  CASE_AVX512_INS_COMMON(BROADCASTF32X4, , rm)
+  CASE_AVX512_INS_COMMON(BROADCASTI32X4, , rm)
+    DecodeSubVectorBroadcast(16, 4, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  CASE_AVX512_INS_COMMON(BROADCASTF32X8, , rm)
+  CASE_AVX512_INS_COMMON(BROADCASTI32X8, , rm)
+    DecodeSubVectorBroadcast(16, 8, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z128, r)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    LLVM_FALLTHROUGH;
+  CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z128, m)
+    DecodeSubVectorBroadcast(4, 2, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, r)
+  CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, r)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    LLVM_FALLTHROUGH;
+  CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, m)
+  CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, m)
+    DecodeSubVectorBroadcast(8, 2, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, r)
+  CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, r)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    LLVM_FALLTHROUGH;
+  CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, m)
+  CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, m)
+    DecodeSubVectorBroadcast(16, 2, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  CASE_PMOVZX(PMOVZXBW, r)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    LLVM_FALLTHROUGH;
+  CASE_PMOVZX(PMOVZXBW, m)
+    DecodeZeroExtendMask(8, 16, getRegOperandNumElts(MI, 16, 0), false,
+                         ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  CASE_PMOVZX(PMOVZXBD, r)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    LLVM_FALLTHROUGH;
+  CASE_PMOVZX(PMOVZXBD, m)
+    DecodeZeroExtendMask(8, 32, getRegOperandNumElts(MI, 32, 0), false,
+                         ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  CASE_PMOVZX(PMOVZXBQ, r)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    LLVM_FALLTHROUGH;
+  CASE_PMOVZX(PMOVZXBQ, m)
+    DecodeZeroExtendMask(8, 64, getRegOperandNumElts(MI, 64, 0), false,
+                         ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  CASE_PMOVZX(PMOVZXWD, r)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    LLVM_FALLTHROUGH;
+  CASE_PMOVZX(PMOVZXWD, m)
+    DecodeZeroExtendMask(16, 32, getRegOperandNumElts(MI, 32, 0), false,
+                         ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  CASE_PMOVZX(PMOVZXWQ, r)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    LLVM_FALLTHROUGH;
+  CASE_PMOVZX(PMOVZXWQ, m)
+    DecodeZeroExtendMask(16, 64, getRegOperandNumElts(MI, 64, 0), false,
+                         ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  CASE_PMOVZX(PMOVZXDQ, r)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+    LLVM_FALLTHROUGH;
+  CASE_PMOVZX(PMOVZXDQ, m)
+    DecodeZeroExtendMask(32, 64, getRegOperandNumElts(MI, 64, 0), false,
+                         ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  }
+
+  // The only comments we decode are shuffles, so give up if we were unable to
+  // decode a shuffle mask.
+  if (ShuffleMask.empty())
+    return false;
+
+  if (!DestName) DestName = Src1Name;
+  if (DestName) {
+    OS << DestName;
+    printMasking(OS, MI, MCII);
+  } else
+    OS << "mem";
+
+  OS << " = ";
+
+  // If the two sources are the same, canonicalize the input elements to be
+  // from the first src so that we get larger element spans.
+  if (Src1Name == Src2Name) {
+    for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+      if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
+          ShuffleMask[i] >= (int)e)   // From second mask.
+        ShuffleMask[i] -= e;
+    }
+  }
+
+  // The shuffle mask specifies which elements of the src1/src2 fill in the
+  // destination, with a few sentinel values.  Loop through and print them
+  // out.
+  for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+    if (i != 0)
+      OS << ',';
+    if (ShuffleMask[i] == SM_SentinelZero) {
+      OS << "zero";
+      continue;
+    }
+
+    // Otherwise, it must come from src1 or src2.  Print the span of elements
+    // that comes from this src.
+    bool isSrc1 = ShuffleMask[i] < (int)ShuffleMask.size();
+    const char *SrcName = isSrc1 ? Src1Name : Src2Name;
+    OS << (SrcName ? SrcName : "mem") << '[';
+    bool IsFirst = true;
+    while (i != e && (int)ShuffleMask[i] != SM_SentinelZero &&
+           (ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) {
+      if (!IsFirst)
+        OS << ',';
+      else
+        IsFirst = false;
+      if (ShuffleMask[i] == SM_SentinelUndef)
+        OS << "u";
+      else
+        OS << ShuffleMask[i] % ShuffleMask.size();
+      ++i;
+    }
+    OS << ']';
+    --i; // For loop increments element #.
+  }
+  OS << '\n';
+
+  // We successfully added a comment to this instruction.
+  return true;
+}
diff --git a/lib/Target/X86/MCTargetDesc/X86InstComments.h b/lib/Target/X86/MCTargetDesc/X86InstComments.h
new file mode 100644
index 000000000000..96760664012a
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86InstComments.h
@@ -0,0 +1,26 @@
+//=- X86InstComments.h - Generate verbose-asm comments for instrs -*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86INSTCOMMENTS_H
+#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86INSTCOMMENTS_H
+
+namespace llvm {
+
+  class MCInst;
+  class MCInstrInfo;
+  class raw_ostream;
+  bool EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+                              const MCInstrInfo &MCII);
+}
+
+#endif
diff --git a/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
new file mode 100644
index 000000000000..a21555076976
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -0,0 +1,362 @@
+//===--- X86InstPrinterCommon.cpp - X86 assembly instruction printing -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes common code for rendering MCInst instances as Intel-style
+// and Intel-style assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstPrinterCommon.h"
+#include "X86BaseInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Casting.h"
+#include <cstdint>
+#include <cassert>
+
+using namespace llvm;
+
+void X86InstPrinterCommon::printCondCode(const MCInst *MI, unsigned Op,
+                                         raw_ostream &O) {
+  int64_t Imm = MI->getOperand(Op).getImm();
+  switch (Imm) {
+  default: llvm_unreachable("Invalid condcode argument!");
+  case    0: O << "o";  break;
+  case    1: O << "no"; break;
+  case    2: O << "b";  break;
+  case    3: O << "ae"; break;
+  case    4: O << "e";  break;
+  case    5: O << "ne"; break;
+  case    6: O << "be"; break;
+  case    7: O << "a";  break;
+  case    8: O << "s";  break;
+  case    9: O << "ns"; break;
+  case  0xa: O << "p";  break;
+  case  0xb: O << "np"; break;
+  case  0xc: O << "l";  break;
+  case  0xd: O << "ge"; break;
+  case  0xe: O << "le"; break;
+  case  0xf: O << "g";  break;
+  }
+}
+
+void X86InstPrinterCommon::printSSEAVXCC(const MCInst *MI, unsigned Op,
+                                         raw_ostream &O) {
+  int64_t Imm = MI->getOperand(Op).getImm();
+  switch (Imm) {
+  default: llvm_unreachable("Invalid ssecc/avxcc argument!");
+  case    0: O << "eq"; break;
+  case    1: O << "lt"; break;
+  case    2: O << "le"; break;
+  case    3: O << "unord"; break;
+  case    4: O << "neq"; break;
+  case    5: O << "nlt"; break;
+  case    6: O << "nle"; break;
+  case    7: O << "ord"; break;
+  case    8: O << "eq_uq"; break;
+  case    9: O << "nge"; break;
+  case  0xa: O << "ngt"; break;
+  case  0xb: O << "false"; break;
+  case  0xc: O << "neq_oq"; break;
+  case  0xd: O << "ge"; break;
+  case  0xe: O << "gt"; break;
+  case  0xf: O << "true"; break;
+  case 0x10: O << "eq_os"; break;
+  case 0x11: O << "lt_oq"; break;
+  case 0x12: O << "le_oq"; break;
+  case 0x13: O << "unord_s"; break;
+  case 0x14: O << "neq_us"; break;
+  case 0x15: O << "nlt_uq"; break;
+  case 0x16: O << "nle_uq"; break;
+  case 0x17: O << "ord_s"; break;
+  case 0x18: O << "eq_us"; break;
+  case 0x19: O << "nge_uq"; break;
+  case 0x1a: O << "ngt_uq"; break;
+  case 0x1b: O << "false_os"; break;
+  case 0x1c: O << "neq_os"; break;
+  case 0x1d: O << "ge_oq"; break;
+  case 0x1e: O << "gt_oq"; break;
+  case 0x1f: O << "true_us"; break;
+  }
+}
+
+void X86InstPrinterCommon::printVPCOMMnemonic(const MCInst *MI,
+                                              raw_ostream &OS) {
+  OS << "vpcom";
+
+  int64_t Imm = MI->getOperand(MI->getNumOperands() - 1).getImm();
+  switch (Imm) {
+  default: llvm_unreachable("Invalid vpcom argument!");
+  case 0: OS << "lt"; break;
+  case 1: OS << "le"; break;
+  case 2: OS << "gt"; break;
+  case 3: OS << "ge"; break;
+  case 4: OS << "eq"; break;
+  case 5: OS << "neq"; break;
+  case 6: OS << "false"; break;
+  case 7: OS << "true"; break;
+  }
+
+  switch (MI->getOpcode()) {
+  default: llvm_unreachable("Unexpected opcode!");
+  case X86::VPCOMBmi:  case X86::VPCOMBri:  OS << "b\t";  break;
+  case X86::VPCOMDmi:  case X86::VPCOMDri:  OS << "d\t";  break;
+  case X86::VPCOMQmi:  case X86::VPCOMQri:  OS << "q\t";  break;
+  case X86::VPCOMUBmi: case X86::VPCOMUBri: OS << "ub\t"; break;
+  case X86::VPCOMUDmi: case X86::VPCOMUDri: OS << "ud\t"; break;
+  case X86::VPCOMUQmi: case X86::VPCOMUQri: OS << "uq\t"; break;
+  case X86::VPCOMUWmi: case X86::VPCOMUWri: OS << "uw\t"; break;
+  case X86::VPCOMWmi:  case X86::VPCOMWri:  OS << "w\t";  break;
+  }
+}
+
+void X86InstPrinterCommon::printVPCMPMnemonic(const MCInst *MI,
+                                              raw_ostream &OS) {
+  OS << "vpcmp";
+
+  printSSEAVXCC(MI, MI->getNumOperands() - 1, OS);
+
+  switch (MI->getOpcode()) {
+  default: llvm_unreachable("Unexpected opcode!");
+  case X86::VPCMPBZ128rmi:  case X86::VPCMPBZ128rri:
+  case X86::VPCMPBZ256rmi:  case X86::VPCMPBZ256rri:
+  case X86::VPCMPBZrmi:     case X86::VPCMPBZrri:
+  case X86::VPCMPBZ128rmik: case X86::VPCMPBZ128rrik:
+  case X86::VPCMPBZ256rmik: case X86::VPCMPBZ256rrik:
+  case X86::VPCMPBZrmik:    case X86::VPCMPBZrrik:
+    OS << "b\t";
+    break;
+  case X86::VPCMPDZ128rmi:  case X86::VPCMPDZ128rri:
+  case X86::VPCMPDZ256rmi:  case X86::VPCMPDZ256rri:
+  case X86::VPCMPDZrmi:     case X86::VPCMPDZrri:
+  case X86::VPCMPDZ128rmik: case X86::VPCMPDZ128rrik:
+  case X86::VPCMPDZ256rmik: case X86::VPCMPDZ256rrik:
+  case X86::VPCMPDZrmik:    case X86::VPCMPDZrrik:
+  case X86::VPCMPDZ128rmib: case X86::VPCMPDZ128rmibk:
+  case X86::VPCMPDZ256rmib: case X86::VPCMPDZ256rmibk:
+  case X86::VPCMPDZrmib:    case X86::VPCMPDZrmibk:
+    OS << "d\t";
+    break;
+  case X86::VPCMPQZ128rmi:  case X86::VPCMPQZ128rri:
+  case X86::VPCMPQZ256rmi:  case X86::VPCMPQZ256rri:
+  case X86::VPCMPQZrmi:     case X86::VPCMPQZrri:
+  case X86::VPCMPQZ128rmik: case X86::VPCMPQZ128rrik:
+  case X86::VPCMPQZ256rmik: case X86::VPCMPQZ256rrik:
+  case X86::VPCMPQZrmik:    case X86::VPCMPQZrrik:
+  case X86::VPCMPQZ128rmib: case X86::VPCMPQZ128rmibk:
+  case X86::VPCMPQZ256rmib: case X86::VPCMPQZ256rmibk:
+  case X86::VPCMPQZrmib:    case X86::VPCMPQZrmibk:
+    OS << "q\t";
+    break;
+  case X86::VPCMPUBZ128rmi:  case X86::VPCMPUBZ128rri:
+  case X86::VPCMPUBZ256rmi:  case X86::VPCMPUBZ256rri:
+  case X86::VPCMPUBZrmi:     case X86::VPCMPUBZrri:
+  case X86::VPCMPUBZ128rmik: case X86::VPCMPUBZ128rrik:
+  case X86::VPCMPUBZ256rmik: case X86::VPCMPUBZ256rrik:
+  case X86::VPCMPUBZrmik:    case X86::VPCMPUBZrrik:
+    OS << "ub\t";
+    break;
+  case X86::VPCMPUDZ128rmi:  case X86::VPCMPUDZ128rri:
+  case X86::VPCMPUDZ256rmi:  case X86::VPCMPUDZ256rri:
+  case X86::VPCMPUDZrmi:     case X86::VPCMPUDZrri:
+  case X86::VPCMPUDZ128rmik: case X86::VPCMPUDZ128rrik:
+  case X86::VPCMPUDZ256rmik: case X86::VPCMPUDZ256rrik:
+  case X86::VPCMPUDZrmik:    case X86::VPCMPUDZrrik:
+  case X86::VPCMPUDZ128rmib: case X86::VPCMPUDZ128rmibk:
+  case X86::VPCMPUDZ256rmib: case X86::VPCMPUDZ256rmibk:
+  case X86::VPCMPUDZrmib:    case X86::VPCMPUDZrmibk:
+    OS << "ud\t";
+    break;
+  case X86::VPCMPUQZ128rmi:  case X86::VPCMPUQZ128rri:
+  case X86::VPCMPUQZ256rmi:  case X86::VPCMPUQZ256rri:
+  case X86::VPCMPUQZrmi:     case X86::VPCMPUQZrri:
+  case X86::VPCMPUQZ128rmik: case X86::VPCMPUQZ128rrik:
+  case X86::VPCMPUQZ256rmik: case X86::VPCMPUQZ256rrik:
+  case X86::VPCMPUQZrmik:    case X86::VPCMPUQZrrik:
+  case X86::VPCMPUQZ128rmib: case X86::VPCMPUQZ128rmibk:
+  case X86::VPCMPUQZ256rmib: case X86::VPCMPUQZ256rmibk:
+  case X86::VPCMPUQZrmib:    case X86::VPCMPUQZrmibk:
+    OS << "uq\t";
+    break;
+  case X86::VPCMPUWZ128rmi:  case X86::VPCMPUWZ128rri:
+  case X86::VPCMPUWZ256rri:  case X86::VPCMPUWZ256rmi:
+  case X86::VPCMPUWZrmi:     case X86::VPCMPUWZrri:
+  case X86::VPCMPUWZ128rmik: case X86::VPCMPUWZ128rrik:
+  case X86::VPCMPUWZ256rrik: case X86::VPCMPUWZ256rmik:
+  case X86::VPCMPUWZrmik:    case X86::VPCMPUWZrrik:
+    OS << "uw\t";
+    break;
+  case X86::VPCMPWZ128rmi:  case X86::VPCMPWZ128rri:
+  case X86::VPCMPWZ256rmi:  case X86::VPCMPWZ256rri:
+  case X86::VPCMPWZrmi:     case X86::VPCMPWZrri:
+  case X86::VPCMPWZ128rmik: case X86::VPCMPWZ128rrik:
+  case X86::VPCMPWZ256rmik: case X86::VPCMPWZ256rrik:
+  case X86::VPCMPWZrmik:    case X86::VPCMPWZrrik:
+    OS << "w\t";
+    break;
+  }
+}
+
+void X86InstPrinterCommon::printCMPMnemonic(const MCInst *MI, bool IsVCmp,
+                                            raw_ostream &OS) {
+  OS << (IsVCmp ? "vcmp" : "cmp");
+
+  printSSEAVXCC(MI, MI->getNumOperands() - 1, OS);
+
+  switch (MI->getOpcode()) {
+  default: llvm_unreachable("Unexpected opcode!");
+  case X86::CMPPDrmi:       case X86::CMPPDrri:
+  case X86::VCMPPDrmi:      case X86::VCMPPDrri:
+  case X86::VCMPPDYrmi:     case X86::VCMPPDYrri:
+  case X86::VCMPPDZ128rmi:  case X86::VCMPPDZ128rri:
+  case X86::VCMPPDZ256rmi:  case X86::VCMPPDZ256rri:
+  case X86::VCMPPDZrmi:     case X86::VCMPPDZrri:
+  case X86::VCMPPDZ128rmik: case X86::VCMPPDZ128rrik:
+  case X86::VCMPPDZ256rmik: case X86::VCMPPDZ256rrik:
+  case X86::VCMPPDZrmik:    case X86::VCMPPDZrrik:
+  case X86::VCMPPDZ128rmbi: case X86::VCMPPDZ128rmbik:
+  case X86::VCMPPDZ256rmbi: case X86::VCMPPDZ256rmbik:
+  case X86::VCMPPDZrmbi:    case X86::VCMPPDZrmbik:
+  case X86::VCMPPDZrrib:    case X86::VCMPPDZrribk:
+    OS << "pd\t";
+    break;
+  case X86::CMPPSrmi:       case X86::CMPPSrri:
+  case X86::VCMPPSrmi:      case X86::VCMPPSrri:
+  case X86::VCMPPSYrmi:     case X86::VCMPPSYrri:
+  case X86::VCMPPSZ128rmi:  case X86::VCMPPSZ128rri:
+  case X86::VCMPPSZ256rmi:  case X86::VCMPPSZ256rri:
+  case X86::VCMPPSZrmi:     case X86::VCMPPSZrri:
+  case X86::VCMPPSZ128rmik: case X86::VCMPPSZ128rrik:
+  case X86::VCMPPSZ256rmik: case X86::VCMPPSZ256rrik:
+  case X86::VCMPPSZrmik:    case X86::VCMPPSZrrik:
+  case X86::VCMPPSZ128rmbi: case X86::VCMPPSZ128rmbik:
+  case X86::VCMPPSZ256rmbi: case X86::VCMPPSZ256rmbik:
+  case X86::VCMPPSZrmbi:    case X86::VCMPPSZrmbik:
+  case X86::VCMPPSZrrib:    case X86::VCMPPSZrribk:
+    OS << "ps\t";
+    break;
+  case X86::CMPSDrm:        case X86::CMPSDrr:
+  case X86::CMPSDrm_Int:    case X86::CMPSDrr_Int:
+  case X86::VCMPSDrm:       case X86::VCMPSDrr:
+  case X86::VCMPSDrm_Int:   case X86::VCMPSDrr_Int:
+  case X86::VCMPSDZrm:      case X86::VCMPSDZrr:
+  case X86::VCMPSDZrm_Int:  case X86::VCMPSDZrr_Int:
+  case X86::VCMPSDZrm_Intk: case X86::VCMPSDZrr_Intk:
+  case X86::VCMPSDZrrb_Int: case X86::VCMPSDZrrb_Intk:
+    OS << "sd\t";
+    break;
+  case X86::CMPSSrm:        case X86::CMPSSrr:
+  case X86::CMPSSrm_Int:    case X86::CMPSSrr_Int:
+  case X86::VCMPSSrm:       case X86::VCMPSSrr:
+  case X86::VCMPSSrm_Int:   case X86::VCMPSSrr_Int:
+  case X86::VCMPSSZrm:      case X86::VCMPSSZrr:
+  case X86::VCMPSSZrm_Int:  case X86::VCMPSSZrr_Int:
+  case X86::VCMPSSZrm_Intk: case X86::VCMPSSZrr_Intk:
+  case X86::VCMPSSZrrb_Int: case X86::VCMPSSZrrb_Intk:
+    OS << "ss\t";
+    break;
+  }
+}
+
+void X86InstPrinterCommon::printRoundingControl(const MCInst *MI, unsigned Op,
+                                                raw_ostream &O) {
+  int64_t Imm = MI->getOperand(Op).getImm();
+  switch (Imm) {
+  default:
+    llvm_unreachable("Invalid rounding control!");
+  case X86::TO_NEAREST_INT:
+    O << "{rn-sae}";
+    break;
+  case X86::TO_NEG_INF:
+    O << "{rd-sae}";
+    break;
+  case X86::TO_POS_INF:
+    O << "{ru-sae}";
+    break;
+  case X86::TO_ZERO:
+    O << "{rz-sae}";
+    break;
+  }
+}
+
+/// printPCRelImm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value (e.g. for jumps and calls).  In
+/// Intel-style these print slightly differently than normal immediates.
+/// for example, a $ is not emitted.
+void X86InstPrinterCommon::printPCRelImm(const MCInst *MI, unsigned OpNo,
+                                         raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isImm())
+    O << formatImm(Op.getImm());
+  else {
+    assert(Op.isExpr() && "unknown pcrel immediate operand");
+    // If a symbolic branch target was added as a constant expression then print
+    // that address in hex.
+    const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
+    int64_t Address;
+    if (BranchTarget && BranchTarget->evaluateAsAbsolute(Address)) {
+      O << formatHex((uint64_t)Address);
+    } else {
+      // Otherwise, just print the expression.
+      Op.getExpr()->print(O, &MAI);
+    }
+  }
+}
+
+void X86InstPrinterCommon::printOptionalSegReg(const MCInst *MI, unsigned OpNo,
+                                               raw_ostream &O) {
+  if (MI->getOperand(OpNo).getReg()) {
+    printOperand(MI, OpNo, O);
+    O << ':';
+  }
+}
+
+void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O) {
+  const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+  uint64_t TSFlags = Desc.TSFlags;
+  unsigned Flags = MI->getFlags();
+
+  if ((TSFlags & X86II::LOCK) || (Flags & X86::IP_HAS_LOCK))
+    O << "\tlock\t";
+
+  if ((TSFlags & X86II::NOTRACK) || (Flags & X86::IP_HAS_NOTRACK))
+    O << "\tnotrack\t";
+
+  if (Flags & X86::IP_HAS_REPEAT_NE)
+    O << "\trepne\t";
+  else if (Flags & X86::IP_HAS_REPEAT)
+    O << "\trep\t";
+}
+
+void X86InstPrinterCommon::printVKPair(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &OS) {
+  // In assembly listings, a pair is represented by one of its members, any
+  // of the two.  Here, we pick k0, k2, k4, k6, but we could as well
+  // print K2_K3 as "k3".  It would probably make a lot more sense, if
+  // the assembly would look something like:
+  // "vp2intersect %zmm5, %zmm7, {%k2, %k3}"
+  // but this can work too.
+  switch (MI->getOperand(OpNo).getReg()) {
+  case X86::K0_K1:
+    printRegName(OS, X86::K0);
+    return;
+  case X86::K2_K3:
+    printRegName(OS, X86::K2);
+    return;
+  case X86::K4_K5:
+    printRegName(OS, X86::K4);
+    return;
+  case X86::K6_K7:
+    printRegName(OS, X86::K6);
+    return;
+  }
+  llvm_unreachable("Unknown mask pair register name");
+}
diff --git a/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h b/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
new file mode 100644
index 000000000000..8e28f24b619a
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
@@ -0,0 +1,41 @@
+//===-- X86InstPrinterCommon.cpp - X86 assembly instruction printing ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code common for rendering MCInst instances as AT&T-style
+// and Intel-style assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86INSTPRINTERCOMMON_H
+#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86INSTPRINTERCOMMON_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class X86InstPrinterCommon : public MCInstPrinter {
+public:
+  using MCInstPrinter::MCInstPrinter;
+
+  virtual void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) = 0;
+  void printCondCode(const MCInst *MI, unsigned Op, raw_ostream &OS);
+  void printSSEAVXCC(const MCInst *MI, unsigned Op, raw_ostream &OS);
+  void printVPCOMMnemonic(const MCInst *MI, raw_ostream &OS);
+  void printVPCMPMnemonic(const MCInst *MI, raw_ostream &OS);
+  void printCMPMnemonic(const MCInst *MI, bool IsVCmp, raw_ostream &OS);
+  void printRoundingControl(const MCInst *MI, unsigned Op, raw_ostream &O);
+  void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+protected:
+  void printInstFlags(const MCInst *MI, raw_ostream &O);
+  void printOptionalSegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printVKPair(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_X86_MCTARGETDESC_X86ATTINSTPRINTER_H
diff --git a/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
new file mode 100644
index 000000000000..ea28bef42569
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
@@ -0,0 +1,445 @@
+//===-- X86IntelInstPrinter.cpp - Intel assembly instruction printing -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as Intel-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86IntelInstPrinter.h"
+#include "X86BaseInfo.h"
+#include "X86InstComments.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+// Include the auto-generated portion of the assembly writer.
+#define PRINT_ALIAS_INSTR
+#include "X86GenAsmWriter1.inc"
+
+void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << getRegisterName(RegNo);
+}
+
+void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+                                    StringRef Annot,
+                                    const MCSubtargetInfo &STI) {
+  printInstFlags(MI, OS);
+
+  // In 16-bit mode, print data16 as data32.
+  if (MI->getOpcode() == X86::DATA16_PREFIX &&
+      STI.getFeatureBits()[X86::Mode16Bit]) {
+    OS << "\tdata32";
+  } else if (!printAliasInstr(MI, OS) &&
+             !printVecCompareInstr(MI, OS))
+    printInstruction(MI, OS);
+
+  // Next always print the annotation.
+  printAnnotation(OS, Annot);
+
+  // If verbose assembly is enabled, we can print some informative comments.
+  if (CommentStream)
+    EmitAnyX86InstComments(MI, *CommentStream, MII);
+}
+
+bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS) {
+  if (MI->getNumOperands() == 0 ||
+      !MI->getOperand(MI->getNumOperands() - 1).isImm())
+    return false;
+
+  int64_t Imm = MI->getOperand(MI->getNumOperands() - 1).getImm();
+
+  const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+
+  // Custom print the vector compare instructions to get the immediate
+  // translated into the mnemonic.
+  switch (MI->getOpcode()) {
+  case X86::CMPPDrmi:    case X86::CMPPDrri:
+  case X86::CMPPSrmi:    case X86::CMPPSrri:
+  case X86::CMPSDrm:     case X86::CMPSDrr:
+  case X86::CMPSDrm_Int: case X86::CMPSDrr_Int:
+  case X86::CMPSSrm:     case X86::CMPSSrr:
+  case X86::CMPSSrm_Int: case X86::CMPSSrr_Int:
+    if (Imm >= 0 && Imm <= 7) {
+      OS << '\t';
+      printCMPMnemonic(MI, /*IsVCMP*/false, OS);
+      printOperand(MI, 0, OS);
+      OS << ", ";
+      // Skip operand 1 as its tied to the dest.
+
+      if ((Desc.TSFlags & X86II::FormMask) == X86II::MRMSrcMem) {
+        if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XS)
+          printdwordmem(MI, 2, OS);
+        else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD)
+          printqwordmem(MI, 2, OS);
+        else
+          printxmmwordmem(MI, 2, OS);
+      } else
+        printOperand(MI, 2, OS);
+
+      return true;
+    }
+    break;
+
+  case X86::VCMPPDrmi:      case X86::VCMPPDrri:
+  case X86::VCMPPDYrmi:     case X86::VCMPPDYrri:
+  case X86::VCMPPDZ128rmi:  case X86::VCMPPDZ128rri:
+  case X86::VCMPPDZ256rmi:  case X86::VCMPPDZ256rri:
+  case X86::VCMPPDZrmi:     case X86::VCMPPDZrri:
+  case X86::VCMPPSrmi:      case X86::VCMPPSrri:
+  case X86::VCMPPSYrmi:     case X86::VCMPPSYrri:
+  case X86::VCMPPSZ128rmi:  case X86::VCMPPSZ128rri:
+  case X86::VCMPPSZ256rmi:  case X86::VCMPPSZ256rri:
+  case X86::VCMPPSZrmi:     case X86::VCMPPSZrri:
+  case X86::VCMPSDrm:       case X86::VCMPSDrr:
+  case X86::VCMPSDZrm:      case X86::VCMPSDZrr:
+  case X86::VCMPSDrm_Int:   case X86::VCMPSDrr_Int:
+  case X86::VCMPSDZrm_Int:  case X86::VCMPSDZrr_Int:
+  case X86::VCMPSSrm:       case X86::VCMPSSrr:
+  case X86::VCMPSSZrm:      case X86::VCMPSSZrr:
+  case X86::VCMPSSrm_Int:   case X86::VCMPSSrr_Int:
+  case X86::VCMPSSZrm_Int:  case X86::VCMPSSZrr_Int:
+  case X86::VCMPPDZ128rmik: case X86::VCMPPDZ128rrik:
+  case X86::VCMPPDZ256rmik: case X86::VCMPPDZ256rrik:
+  case X86::VCMPPDZrmik:    case X86::VCMPPDZrrik:
+  case X86::VCMPPSZ128rmik: case X86::VCMPPSZ128rrik:
+  case X86::VCMPPSZ256rmik: case X86::VCMPPSZ256rrik:
+  case X86::VCMPPSZrmik:    case X86::VCMPPSZrrik:
+  case X86::VCMPSDZrm_Intk: case X86::VCMPSDZrr_Intk:
+  case X86::VCMPSSZrm_Intk: case X86::VCMPSSZrr_Intk:
+  case X86::VCMPPDZ128rmbi: case X86::VCMPPDZ128rmbik:
+  case X86::VCMPPDZ256rmbi: case X86::VCMPPDZ256rmbik:
+  case X86::VCMPPDZrmbi:    case X86::VCMPPDZrmbik:
+  case X86::VCMPPSZ128rmbi: case X86::VCMPPSZ128rmbik:
+  case X86::VCMPPSZ256rmbi: case X86::VCMPPSZ256rmbik:
+  case X86::VCMPPSZrmbi:    case X86::VCMPPSZrmbik:
+  case X86::VCMPPDZrrib:    case X86::VCMPPDZrribk:
+  case X86::VCMPPSZrrib:    case X86::VCMPPSZrribk:
+  case X86::VCMPSDZrrb_Int: case X86::VCMPSDZrrb_Intk:
+  case X86::VCMPSSZrrb_Int: case X86::VCMPSSZrrb_Intk:
+    if (Imm >= 0 && Imm <= 31) {
+      OS << '\t';
+      printCMPMnemonic(MI, /*IsVCMP*/true, OS);
+
+      unsigned CurOp = 0;
+      printOperand(MI, CurOp++, OS);
+
+      if (Desc.TSFlags & X86II::EVEX_K) {
+        // Print mask operand.
+        OS << " {";
+        printOperand(MI, CurOp++, OS);
+        OS << "}";
+      }
+      OS << ", ";
+      printOperand(MI, CurOp++, OS);
+      OS << ", ";
+
+      if ((Desc.TSFlags & X86II::FormMask) == X86II::MRMSrcMem) {
+        if (Desc.TSFlags & X86II::EVEX_B) {
+          // Broadcast form.
+          // Load size is based on W-bit.
+          if (Desc.TSFlags & X86II::VEX_W)
+            printqwordmem(MI, CurOp++, OS);
+          else
+            printdwordmem(MI, CurOp++, OS);
+
+          // Print the number of elements broadcasted.
+          unsigned NumElts;
+          if (Desc.TSFlags & X86II::EVEX_L2)
+            NumElts = (Desc.TSFlags & X86II::VEX_W) ? 8 : 16;
+          else if (Desc.TSFlags & X86II::VEX_L)
+            NumElts = (Desc.TSFlags & X86II::VEX_W) ? 4 : 8;
+          else
+            NumElts = (Desc.TSFlags & X86II::VEX_W) ? 2 : 4;
+          OS << "{1to" << NumElts << "}";
+        } else {
+          if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XS)
+            printdwordmem(MI, CurOp++, OS);
+          else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD)
+            printqwordmem(MI, CurOp++, OS);
+          else if (Desc.TSFlags & X86II::EVEX_L2)
+            printzmmwordmem(MI, CurOp++, OS);
+          else if (Desc.TSFlags & X86II::VEX_L)
+            printymmwordmem(MI, CurOp++, OS);
+          else
+            printxmmwordmem(MI, CurOp++, OS);
+        }
+      } else {
+        printOperand(MI, CurOp++, OS);
+        if (Desc.TSFlags & X86II::EVEX_B)
+          OS << ", {sae}";
+      }
+
+      return true;
+    }
+    break;
+
+  case X86::VPCOMBmi:  case X86::VPCOMBri:
+  case X86::VPCOMDmi:  case X86::VPCOMDri:
+  case X86::VPCOMQmi:  case X86::VPCOMQri:
+  case X86::VPCOMUBmi: case X86::VPCOMUBri:
+  case X86::VPCOMUDmi: case X86::VPCOMUDri:
+  case X86::VPCOMUQmi: case X86::VPCOMUQri:
+  case X86::VPCOMUWmi: case X86::VPCOMUWri:
+  case X86::VPCOMWmi:  case X86::VPCOMWri:
+    if (Imm >= 0 && Imm <= 7) {
+      OS << '\t';
+      printVPCOMMnemonic(MI, OS);
+      printOperand(MI, 0, OS);
+      OS << ", ";
+      printOperand(MI, 1, OS);
+      OS << ", ";
+      if ((Desc.TSFlags & X86II::FormMask) == X86II::MRMSrcMem)
+        printxmmwordmem(MI, 2, OS);
+      else
+        printOperand(MI, 2, OS);
+      return true;
+    }
+    break;
+
+  case X86::VPCMPBZ128rmi:   case X86::VPCMPBZ128rri:
+  case X86::VPCMPBZ256rmi:   case X86::VPCMPBZ256rri:
+  case X86::VPCMPBZrmi:      case X86::VPCMPBZrri:
+  case X86::VPCMPDZ128rmi:   case X86::VPCMPDZ128rri:
+  case X86::VPCMPDZ256rmi:   case X86::VPCMPDZ256rri:
+  case X86::VPCMPDZrmi:      case X86::VPCMPDZrri:
+  case X86::VPCMPQZ128rmi:   case X86::VPCMPQZ128rri:
+  case X86::VPCMPQZ256rmi:   case X86::VPCMPQZ256rri:
+  case X86::VPCMPQZrmi:      case X86::VPCMPQZrri:
+  case X86::VPCMPUBZ128rmi:  case X86::VPCMPUBZ128rri:
+  case X86::VPCMPUBZ256rmi:  case X86::VPCMPUBZ256rri:
+  case X86::VPCMPUBZrmi:     case X86::VPCMPUBZrri:
+  case X86::VPCMPUDZ128rmi:  case X86::VPCMPUDZ128rri:
+  case X86::VPCMPUDZ256rmi:  case X86::VPCMPUDZ256rri:
+  case X86::VPCMPUDZrmi:     case X86::VPCMPUDZrri:
+  case X86::VPCMPUQZ128rmi:  case X86::VPCMPUQZ128rri:
+  case X86::VPCMPUQZ256rmi:  case X86::VPCMPUQZ256rri:
+  case X86::VPCMPUQZrmi:     case X86::VPCMPUQZrri:
+  case X86::VPCMPUWZ128rmi:  case X86::VPCMPUWZ128rri:
+  case X86::VPCMPUWZ256rmi:  case X86::VPCMPUWZ256rri:
+  case X86::VPCMPUWZrmi:     case X86::VPCMPUWZrri:
+  case X86::VPCMPWZ128rmi:   case X86::VPCMPWZ128rri:
+  case X86::VPCMPWZ256rmi:   case X86::VPCMPWZ256rri:
+  case X86::VPCMPWZrmi:      case X86::VPCMPWZrri:
+  case X86::VPCMPBZ128rmik:  case X86::VPCMPBZ128rrik:
+  case X86::VPCMPBZ256rmik:  case X86::VPCMPBZ256rrik:
+  case X86::VPCMPBZrmik:     case X86::VPCMPBZrrik:
+  case X86::VPCMPDZ128rmik:  case X86::VPCMPDZ128rrik:
+  case X86::VPCMPDZ256rmik:  case X86::VPCMPDZ256rrik:
+  case X86::VPCMPDZrmik:     case X86::VPCMPDZrrik:
+  case X86::VPCMPQZ128rmik:  case X86::VPCMPQZ128rrik:
+  case X86::VPCMPQZ256rmik:  case X86::VPCMPQZ256rrik:
+  case X86::VPCMPQZrmik:     case X86::VPCMPQZrrik:
+  case X86::VPCMPUBZ128rmik: case X86::VPCMPUBZ128rrik:
+  case X86::VPCMPUBZ256rmik: case X86::VPCMPUBZ256rrik:
+  case X86::VPCMPUBZrmik:    case X86::VPCMPUBZrrik:
+  case X86::VPCMPUDZ128rmik: case X86::VPCMPUDZ128rrik:
+  case X86::VPCMPUDZ256rmik: case X86::VPCMPUDZ256rrik:
+  case X86::VPCMPUDZrmik:    case X86::VPCMPUDZrrik:
+  case X86::VPCMPUQZ128rmik: case X86::VPCMPUQZ128rrik:
+  case X86::VPCMPUQZ256rmik: case X86::VPCMPUQZ256rrik:
+  case X86::VPCMPUQZrmik:    case X86::VPCMPUQZrrik:
+  case X86::VPCMPUWZ128rmik: case X86::VPCMPUWZ128rrik:
+  case X86::VPCMPUWZ256rmik: case X86::VPCMPUWZ256rrik:
+  case X86::VPCMPUWZrmik:    case X86::VPCMPUWZrrik:
+  case X86::VPCMPWZ128rmik:  case X86::VPCMPWZ128rrik:
+  case X86::VPCMPWZ256rmik:  case X86::VPCMPWZ256rrik:
+  case X86::VPCMPWZrmik:     case X86::VPCMPWZrrik:
+  case X86::VPCMPDZ128rmib:  case X86::VPCMPDZ128rmibk:
+  case X86::VPCMPDZ256rmib:  case X86::VPCMPDZ256rmibk:
+  case X86::VPCMPDZrmib:     case X86::VPCMPDZrmibk:
+  case X86::VPCMPQZ128rmib:  case X86::VPCMPQZ128rmibk:
+  case X86::VPCMPQZ256rmib:  case X86::VPCMPQZ256rmibk:
+  case X86::VPCMPQZrmib:     case X86::VPCMPQZrmibk:
+  case X86::VPCMPUDZ128rmib: case X86::VPCMPUDZ128rmibk:
+  case X86::VPCMPUDZ256rmib: case X86::VPCMPUDZ256rmibk:
+  case X86::VPCMPUDZrmib:    case X86::VPCMPUDZrmibk:
+  case X86::VPCMPUQZ128rmib: case X86::VPCMPUQZ128rmibk:
+  case X86::VPCMPUQZ256rmib: case X86::VPCMPUQZ256rmibk:
+  case X86::VPCMPUQZrmib:    case X86::VPCMPUQZrmibk:
+    if ((Imm >= 0 && Imm <= 2) || (Imm >= 4 && Imm <= 6)) {
+      OS << '\t';
+      printVPCMPMnemonic(MI, OS);
+
+      unsigned CurOp = 0;
+      printOperand(MI, CurOp++, OS);
+
+      if (Desc.TSFlags & X86II::EVEX_K) {
+        // Print mask operand.
+        OS << " {";
+        printOperand(MI, CurOp++, OS);
+        OS << "}";
+      }
+      OS << ", ";
+      printOperand(MI, CurOp++, OS);
+      OS << ", ";
+
+      if ((Desc.TSFlags & X86II::FormMask) == X86II::MRMSrcMem) {
+        if (Desc.TSFlags & X86II::EVEX_B) {
+          // Broadcast form.
+          // Load size is based on W-bit as only D and Q are supported.
+          if (Desc.TSFlags & X86II::VEX_W)
+            printqwordmem(MI, CurOp++, OS);
+          else
+            printdwordmem(MI, CurOp++, OS);
+
+          // Print the number of elements broadcasted.
+          unsigned NumElts;
+          if (Desc.TSFlags & X86II::EVEX_L2)
+            NumElts = (Desc.TSFlags & X86II::VEX_W) ? 8 : 16;
+          else if (Desc.TSFlags & X86II::VEX_L)
+            NumElts = (Desc.TSFlags & X86II::VEX_W) ? 4 : 8;
+          else
+            NumElts = (Desc.TSFlags & X86II::VEX_W) ? 2 : 4;
+          OS << "{1to" << NumElts << "}";
+        } else {
+          if (Desc.TSFlags & X86II::EVEX_L2)
+            printzmmwordmem(MI, CurOp++, OS);
+          else if (Desc.TSFlags & X86II::VEX_L)
+            printymmwordmem(MI, CurOp++, OS);
+          else
+            printxmmwordmem(MI, CurOp++, OS);
+        }
+      } else {
+        printOperand(MI, CurOp++, OS);
+      }
+
+      return true;
+    }
+    break;
+  }
+
+  return false;
+}
+
+void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    printRegName(O, Op.getReg());
+  } else if (Op.isImm()) {
+    O << formatImm((int64_t)Op.getImm());
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    O << "offset ";
+    Op.getExpr()->print(O, &MAI);
+  }
+}
+
+void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
+                                            raw_ostream &O) {
+  const MCOperand &BaseReg  = MI->getOperand(Op+X86::AddrBaseReg);
+  unsigned ScaleVal         = MI->getOperand(Op+X86::AddrScaleAmt).getImm();
+  const MCOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg);
+  const MCOperand &DispSpec = MI->getOperand(Op+X86::AddrDisp);
+
+  // If this has a segment register, print it.
+  printOptionalSegReg(MI, Op + X86::AddrSegmentReg, O);
+
+  O << '[';
+
+  bool NeedPlus = false;
+  if (BaseReg.getReg()) {
+    printOperand(MI, Op+X86::AddrBaseReg, O);
+    NeedPlus = true;
+  }
+
+  if (IndexReg.getReg()) {
+    if (NeedPlus) O << " + ";
+    if (ScaleVal != 1)
+      O << ScaleVal << '*';
+    printOperand(MI, Op+X86::AddrIndexReg, O);
+    NeedPlus = true;
+  }
+
+  if (!DispSpec.isImm()) {
+    if (NeedPlus) O << " + ";
+    assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
+    DispSpec.getExpr()->print(O, &MAI);
+  } else {
+    int64_t DispVal = DispSpec.getImm();
+    if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
+      if (NeedPlus) {
+        if (DispVal > 0)
+          O << " + ";
+        else {
+          O << " - ";
+          DispVal = -DispVal;
+        }
+      }
+      O << formatImm(DispVal);
+    }
+  }
+
+  O << ']';
+}
+
+void X86IntelInstPrinter::printSrcIdx(const MCInst *MI, unsigned Op,
+                                      raw_ostream &O) {
+  // If this has a segment register, print it.
+  printOptionalSegReg(MI, Op + 1, O);
+  O << '[';
+  printOperand(MI, Op, O);
+  O << ']';
+}
+
+void X86IntelInstPrinter::printDstIdx(const MCInst *MI, unsigned Op,
+                                      raw_ostream &O) {
+  // DI accesses are always ES-based.
+  O << "es:[";
+  printOperand(MI, Op, O);
+  O << ']';
+}
+
+void X86IntelInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
+                                         raw_ostream &O) {
+  const MCOperand &DispSpec = MI->getOperand(Op);
+
+  // If this has a segment register, print it.
+  printOptionalSegReg(MI, Op + 1, O);
+
+  O << '[';
+
+  if (DispSpec.isImm()) {
+    O << formatImm(DispSpec.getImm());
+  } else {
+    assert(DispSpec.isExpr() && "non-immediate displacement?");
+    DispSpec.getExpr()->print(O, &MAI);
+  }
+
+  O << ']';
+}
+
+void X86IntelInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
+                                     raw_ostream &O) {
+  if (MI->getOperand(Op).isExpr())
+    return MI->getOperand(Op).getExpr()->print(O, &MAI);
+
+  O << formatImm(MI->getOperand(Op).getImm() & 0xff);
+}
+
+void X86IntelInstPrinter::printSTiRegOperand(const MCInst *MI, unsigned OpNo,
+                                            raw_ostream &OS) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  unsigned Reg = Op.getReg();
+  // Override the default printing to print st(0) instead st.
+  if (Reg == X86::ST0)
+    OS << "st(0)";
+  else
+    printRegName(OS, Reg);
+}
diff --git a/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h b/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h
new file mode 100644
index 000000000000..f32f49f7c417
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h
@@ -0,0 +1,144 @@
+//= X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an X86 MCInst to Intel style .s file syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86INTELINSTPRINTER_H
+#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86INTELINSTPRINTER_H
+
+#include "X86InstPrinterCommon.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class X86IntelInstPrinter final : public X86InstPrinterCommon {
+public:
+  X86IntelInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                      const MCRegisterInfo &MRI)
+    : X86InstPrinterCommon(MAI, MII, MRI) {}
+
+  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  bool printVecCompareInstr(const MCInst *MI, raw_ostream &OS);
+
+  // Autogenerated by tblgen, returns true if we successfully printed an
+  // alias.
+  bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
+  void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+                               unsigned PrintMethodIdx, raw_ostream &O);
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) override;
+  void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
+  void printMemOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printSrcIdx(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printDstIdx(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU8Imm(const MCInst *MI, unsigned Op, raw_ostream &O);
+  void printSTiRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
+
+  void printanymem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+
+  void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+
+  void printbytemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "byte ptr ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "word ptr ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printdwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "dword ptr ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printqwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "qword ptr ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printxmmwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "xmmword ptr ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printymmwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "ymmword ptr ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printzmmwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "zmmword ptr ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printtbytemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "tbyte ptr ";
+    printMemReference(MI, OpNo, O);
+  }
+
+
+  void printSrcIdx8(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "byte ptr ";
+    printSrcIdx(MI, OpNo, O);
+  }
+  void printSrcIdx16(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "word ptr ";
+    printSrcIdx(MI, OpNo, O);
+  }
+  void printSrcIdx32(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "dword ptr ";
+    printSrcIdx(MI, OpNo, O);
+  }
+  void printSrcIdx64(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "qword ptr ";
+    printSrcIdx(MI, OpNo, O);
+  }
+  void printDstIdx8(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "byte ptr ";
+    printDstIdx(MI, OpNo, O);
+  }
+  void printDstIdx16(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "word ptr ";
+    printDstIdx(MI, OpNo, O);
+  }
+  void printDstIdx32(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "dword ptr ";
+    printDstIdx(MI, OpNo, O);
+  }
+  void printDstIdx64(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "qword ptr ";
+    printDstIdx(MI, OpNo, O);
+  }
+  void printMemOffs8(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "byte ptr ";
+    printMemOffset(MI, OpNo, O);
+  }
+  void printMemOffs16(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "word ptr ";
+    printMemOffset(MI, OpNo, O);
+  }
+  void printMemOffs32(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "dword ptr ";
+    printMemOffset(MI, OpNo, O);
+  }
+  void printMemOffs64(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "qword ptr ";
+    printMemOffset(MI, OpNo, O);
+  }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_X86_MCTARGETDESC_X86INTELINSTPRINTER_H
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index fa7c352a1b63..e1125c176b25 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -1,9 +1,8 @@
 //===-- X86MCAsmInfo.cpp - X86 asm properties -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
index 30d5c802d1ed..b2369647a40f 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
@@ -1,9 +1,8 @@
 //===-- X86MCAsmInfo.h - X86 asm properties --------------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index f5371db9e77a..31d26d08a63f 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -1,9 +1,8 @@
 //===-- X86MCCodeEmitter.cpp - Convert X86 code to machine code -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -525,9 +524,23 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
     // indirect register encoding, this handles addresses like [EAX].  The
     // encoding for [EBP] with no displacement means [disp32] so we handle it
     // by emitting a displacement of 0 below.
-    if (Disp.isImm() && Disp.getImm() == 0 && BaseRegNo != N86::EBP) {
-      EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
-      return;
+    if (BaseRegNo != N86::EBP) {
+      if (Disp.isImm() && Disp.getImm() == 0) {
+        EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
+        return;
+      }
+
+      // If the displacement is @tlscall, treat it as a zero.
+      if (Disp.isExpr()) {
+        auto *Sym = dyn_cast<MCSymbolRefExpr>(Disp.getExpr());
+        if (Sym && Sym->getKind() == MCSymbolRefExpr::VK_TLSCALL) {
+          // This is exclusively used by call *a@tlscall(base). The relocation
+          // (R_386_TLSCALL or R_X86_64_TLSCALL) applies to the beginning.
+          Fixups.push_back(MCFixup::create(0, Sym, FK_NONE, MI.getLoc()));
+          EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
+          return;
+        }
+      }
     }
 
     // Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
@@ -880,7 +893,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
       if (HasEVEX_RC) {
         unsigned RcOperand = NumOps-1;
         assert(RcOperand >= CurOp);
-        EVEX_rc = MI.getOperand(RcOperand).getImm() & 0x3;
+        EVEX_rc = MI.getOperand(RcOperand).getImm();
+        assert(EVEX_rc <= 3 && "Invalid rounding control!");
       }
       EncodeRC = true;
     }
@@ -979,7 +993,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
     uint8_t LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
 
     // Can we use the 2 byte VEX prefix?
-    if (Encoding == X86II::VEX && VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) {
+    if (!(MI.getFlags() & X86::IP_USE_VEX3) &&
+        Encoding == X86II::VEX && VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) {
       EmitByte(0xC5, CurByte, OS);
       EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
       return;
@@ -1060,16 +1075,17 @@ uint8_t X86MCCodeEmitter::DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
     REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
     break;
   case X86II::MRMSrcReg:
+  case X86II::MRMSrcRegCC:
     REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
     REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
     break;
-  case X86II::MRMSrcMem: {
+  case X86II::MRMSrcMem:
+  case X86II::MRMSrcMemCC:
     REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
     REX |= isREXExtendedReg(MI, MemOperand+X86::AddrBaseReg) << 0; // REX.B
     REX |= isREXExtendedReg(MI, MemOperand+X86::AddrIndexReg) << 1; // REX.X
     CurOp += X86::AddrNumOperands;
     break;
-  }
   case X86II::MRMDestReg:
     REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
     REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
@@ -1080,7 +1096,7 @@ uint8_t X86MCCodeEmitter::DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
     CurOp += X86::AddrNumOperands;
     REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
     break;
-  case X86II::MRMXm:
+  case X86II::MRMXmCC: case X86II::MRMXm:
   case X86II::MRM0m: case X86II::MRM1m:
   case X86II::MRM2m: case X86II::MRM3m:
   case X86II::MRM4m: case X86II::MRM5m:
@@ -1088,7 +1104,7 @@ uint8_t X86MCCodeEmitter::DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
     REX |= isREXExtendedReg(MI, MemOperand+X86::AddrBaseReg) << 0; // REX.B
     REX |= isREXExtendedReg(MI, MemOperand+X86::AddrIndexReg) << 1; // REX.X
     break;
-  case X86II::MRMXr:
+  case X86II::MRMXrCC: case X86II::MRMXr:
   case X86II::MRM0r: case X86II::MRM1r:
   case X86II::MRM2r: case X86II::MRM3r:
   case X86II::MRM4r: case X86II::MRM5r:
@@ -1272,6 +1288,8 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
   if ((TSFlags & X86II::OpMapMask) == X86II::ThreeDNow)
     BaseOpcode = 0x0F;   // Weird 3DNow! encoding.
 
+  unsigned OpcodeOffset = 0;
+
   uint64_t Form = TSFlags & X86II::FormMask;
   switch (Form) {
   default: errs() << "FORM: " << Form << "\n";
@@ -1318,8 +1336,14 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
     EmitByte(BaseOpcode, CurByte, OS);
     break;
   }
-  case X86II::RawFrm: {
-    EmitByte(BaseOpcode, CurByte, OS);
+  case X86II::AddCCFrm: {
+    // This will be added to the opcode in the fallthrough.
+    OpcodeOffset = MI.getOperand(NumOps - 1).getImm();
+    assert(OpcodeOffset < 16 && "Unexpected opcode offset!");
+    --NumOps; // Drop the operand from the end.
+    LLVM_FALLTHROUGH;
+  case X86II::RawFrm:
+    EmitByte(BaseOpcode + OpcodeOffset, CurByte, OS);
 
     if (!is64BitMode(STI) || !isPCRel32Branch(MI))
       break;
@@ -1436,6 +1460,17 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
     CurOp = SrcRegNum + 1;
     break;
   }
+  case X86II::MRMSrcRegCC: {
+    unsigned FirstOp = CurOp++;
+    unsigned SecondOp = CurOp++;
+
+    unsigned CC = MI.getOperand(CurOp++).getImm();
+    EmitByte(BaseOpcode + CC, CurByte, OS);
+
+    EmitRegModRMByte(MI.getOperand(SecondOp),
+                     GetX86RegNum(MI.getOperand(FirstOp)), CurByte, OS);
+    break;
+  }
   case X86II::MRMSrcMem: {
     unsigned FirstMemOp = CurOp+1;
 
@@ -1481,6 +1516,27 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
     CurOp = FirstMemOp + X86::AddrNumOperands;
     break;
   }
+  case X86II::MRMSrcMemCC: {
+    unsigned RegOp = CurOp++;
+    unsigned FirstMemOp = CurOp;
+    CurOp = FirstMemOp + X86::AddrNumOperands;
+
+    unsigned CC = MI.getOperand(CurOp++).getImm();
+    EmitByte(BaseOpcode + CC, CurByte, OS);
+
+    emitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(RegOp)),
+                     TSFlags, Rex, CurByte, OS, Fixups, STI);
+    break;
+  }
+
+  case X86II::MRMXrCC: {
+    unsigned RegOp = CurOp++;
+
+    unsigned CC = MI.getOperand(CurOp++).getImm();
+    EmitByte(BaseOpcode + CC, CurByte, OS);
+    EmitRegModRMByte(MI.getOperand(RegOp), 0, CurByte, OS);
+    break;
+  }
 
   case X86II::MRMXr:
   case X86II::MRM0r: case X86II::MRM1r:
@@ -1497,6 +1553,17 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
                      CurByte, OS);
     break;
 
+  case X86II::MRMXmCC: {
+    unsigned FirstMemOp = CurOp;
+    CurOp = FirstMemOp + X86::AddrNumOperands;
+
+    unsigned CC = MI.getOperand(CurOp++).getImm();
+    EmitByte(BaseOpcode + CC, CurByte, OS);
+
+    emitMemModRMByte(MI, FirstMemOp, 0, TSFlags, Rex, CurByte, OS, Fixups, STI);
+    break;
+  }
+
   case X86II::MRMXm:
   case X86II::MRM0m: case X86II::MRM1m:
   case X86II::MRM2m: case X86II::MRM3m:
diff --git a/lib/Target/X86/MCTargetDesc/X86MCExpr.h b/lib/Target/X86/MCTargetDesc/X86MCExpr.h
index 1070f70468fa..532fecd9951b 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCExpr.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCExpr.h
@@ -1,9 +1,8 @@
 //=--- X86MCExpr.h - X86 specific MC expression classes ---*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,7 +14,7 @@
 #ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCEXPR_H
 #define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCEXPR_H
 
-#include "InstPrinter/X86ATTInstPrinter.h"
+#include "X86ATTInstPrinter.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index ea4aaf14223d..ce05ad974507 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -1,9 +1,8 @@
 //===-- X86MCTargetDesc.cpp - X86 Target Descriptions ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,13 +11,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86MCTargetDesc.h"
-#include "InstPrinter/X86ATTInstPrinter.h"
-#include "InstPrinter/X86IntelInstPrinter.h"
+#include "TargetInfo/X86TargetInfo.h"
+#include "X86ATTInstPrinter.h"
 #include "X86BaseInfo.h"
+#include "X86IntelInstPrinter.h"
 #include "X86MCAsmInfo.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCInstrAnalysis.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
@@ -117,6 +118,15 @@ void X86_MC::initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI) {
       {codeview::RegisterId::ST6, X86::FP6},
       {codeview::RegisterId::ST7, X86::FP7},
 
+      {codeview::RegisterId::MM0, X86::MM0},
+      {codeview::RegisterId::MM1, X86::MM1},
+      {codeview::RegisterId::MM2, X86::MM2},
+      {codeview::RegisterId::MM3, X86::MM3},
+      {codeview::RegisterId::MM4, X86::MM4},
+      {codeview::RegisterId::MM5, X86::MM5},
+      {codeview::RegisterId::MM6, X86::MM6},
+      {codeview::RegisterId::MM7, X86::MM7},
+
       {codeview::RegisterId::XMM0, X86::XMM0},
       {codeview::RegisterId::XMM1, X86::XMM1},
       {codeview::RegisterId::XMM2, X86::XMM2},
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 4e9f5ba60d2e..00dd5908cbf5 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -1,9 +1,8 @@
 //===-- X86MCTargetDesc.h - X86 Target Descriptions -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -35,9 +34,6 @@ class StringRef;
 class raw_ostream;
 class raw_pwrite_stream;
 
-Target &getTheX86_32Target();
-Target &getTheX86_64Target();
-
 /// Flavour of dwarf regnumbers
 ///
 namespace DWARFFlavour {
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 883278b7bc1f..fc7e99f61e5e 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===-- X86MachObjectWriter.cpp - X86 Mach-O Writer -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h b/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h
index 10a282dd2962..3b1e9e7c34fb 100644
--- a/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h
+++ b/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h
@@ -1,9 +1,8 @@
 //===- X86TargetStreamer.h ------------------------------*- C++ -*---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index 2aec695b2dbf..3baab9da1c41 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -1,9 +1,8 @@
 //===-- X86WinCOFFObjectWriter.cpp - X86 Win COFF Writer ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index 0085787e576a..796a27a17255 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -1,9 +1,8 @@
 //===-- X86WinCOFFStreamer.cpp - X86 Target WinCOFF Streamer ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
index bee9b7046338..e9987d1f62bd 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
@@ -1,9 +1,8 @@
 //===-- X86WinCOFFTargetStreamer.cpp ----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/X86/ShadowCallStack.cpp b/lib/Target/X86/ShadowCallStack.cpp
deleted file mode 100644
index ab2cebcb58ee..000000000000
--- a/lib/Target/X86/ShadowCallStack.cpp
+++ /dev/null
@@ -1,322 +0,0 @@
-//===------- ShadowCallStack.cpp - Shadow Call Stack pass -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The ShadowCallStack pass instruments function prologs/epilogs to check that
-// the return address has not been corrupted during the execution of the
-// function. The return address is stored in a 'shadow call stack' addressed
-// using the %gs segment register.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86.h"
-#include "X86InstrBuilder.h"
-#include "X86InstrInfo.h"
-#include "X86Subtarget.h"
-
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
-
-class ShadowCallStack : public MachineFunctionPass {
-public:
-  static char ID;
-
-  ShadowCallStack() : MachineFunctionPass(ID) {
-    initializeShadowCallStackPass(*PassRegistry::getPassRegistry());
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
-  bool runOnMachineFunction(MachineFunction &Fn) override;
-
-private:
-  // Do not instrument leaf functions with this many or fewer instructions. The
-  // shadow call stack instrumented prolog/epilog are slightly race-y reading
-  // and checking the saved return address, so it is better to not instrument
-  // functions that have fewer instructions than the instrumented prolog/epilog
-  // race.
-  static const size_t SkipLeafInstructions = 3;
-};
-
-char ShadowCallStack::ID = 0;
-} // end anonymous namespace.
-
-static void addProlog(MachineFunction &Fn, const TargetInstrInfo *TII,
-                      MachineBasicBlock &MBB, const DebugLoc &DL);
-static void addPrologLeaf(MachineFunction &Fn, const TargetInstrInfo *TII,
-                          MachineBasicBlock &MBB, const DebugLoc &DL,
-                          MCPhysReg FreeRegister);
-
-static void addEpilog(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
-                      MachineInstr &MI, MachineBasicBlock &TrapBB);
-static void addEpilogLeaf(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
-                          MachineInstr &MI, MachineBasicBlock &TrapBB,
-                          MCPhysReg FreeRegister);
-// Generate a longer epilog that only uses r10 when a tailcall branches to r11.
-static void addEpilogOnlyR10(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
-                             MachineInstr &MI, MachineBasicBlock &TrapBB);
-
-// Helper function to add ModR/M references for [Seg: Reg + Offset] memory
-// accesses
-static inline const MachineInstrBuilder &
-addSegmentedMem(const MachineInstrBuilder &MIB, MCPhysReg Seg, MCPhysReg Reg,
-                int Offset = 0) {
-  return MIB.addReg(Reg).addImm(1).addReg(0).addImm(Offset).addReg(Seg);
-}
-
-static void addProlog(MachineFunction &Fn, const TargetInstrInfo *TII,
-                      MachineBasicBlock &MBB, const DebugLoc &DL) {
-  const MCPhysReg ReturnReg = X86::R10;
-  const MCPhysReg OffsetReg = X86::R11;
-
-  auto MBBI = MBB.begin();
-  // mov r10, [rsp]
-  addDirectMem(BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64rm)).addDef(ReturnReg),
-               X86::RSP);
-  // xor r11, r11
-  BuildMI(MBB, MBBI, DL, TII->get(X86::XOR64rr))
-      .addDef(OffsetReg)
-      .addReg(OffsetReg, RegState::Undef)
-      .addReg(OffsetReg, RegState::Undef);
-  // add QWORD [gs:r11], 8
-  addSegmentedMem(BuildMI(MBB, MBBI, DL, TII->get(X86::ADD64mi8)), X86::GS,
-                  OffsetReg)
-      .addImm(8);
-  // mov r11, [gs:r11]
-  addSegmentedMem(
-      BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64rm)).addDef(OffsetReg), X86::GS,
-      OffsetReg);
-  // mov [gs:r11], r10
-  addSegmentedMem(BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64mr)), X86::GS,
-                  OffsetReg)
-      .addReg(ReturnReg);
-}
-
-static void addPrologLeaf(MachineFunction &Fn, const TargetInstrInfo *TII,
-                          MachineBasicBlock &MBB, const DebugLoc &DL,
-                          MCPhysReg FreeRegister) {
-  // mov REG, [rsp]
-  addDirectMem(BuildMI(MBB, MBB.begin(), DL, TII->get(X86::MOV64rm))
-                   .addDef(FreeRegister),
-               X86::RSP);
-}
-
-static void addEpilog(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
-                      MachineInstr &MI, MachineBasicBlock &TrapBB) {
-  const DebugLoc &DL = MI.getDebugLoc();
-
-  // xor r11, r11
-  BuildMI(MBB, MI, DL, TII->get(X86::XOR64rr))
-      .addDef(X86::R11)
-      .addReg(X86::R11, RegState::Undef)
-      .addReg(X86::R11, RegState::Undef);
-  // mov r10, [gs:r11]
-  addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10),
-                  X86::GS, X86::R11);
-  // mov r10, [gs:r10]
-  addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10),
-                  X86::GS, X86::R10);
-  // sub QWORD [gs:r11], 8
-  // This instruction should not be moved up to avoid a signal race.
-  addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::SUB64mi8)),
-                  X86::GS, X86::R11)
-      .addImm(8);
-  // cmp [rsp], r10
-  addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP)
-      .addReg(X86::R10);
-  // jne trap
-  BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB);
-  MBB.addSuccessor(&TrapBB);
-}
-
-static void addEpilogLeaf(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
-                          MachineInstr &MI, MachineBasicBlock &TrapBB,
-                          MCPhysReg FreeRegister) {
-  const DebugLoc &DL = MI.getDebugLoc();
-
-  // cmp [rsp], REG
-  addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP)
-      .addReg(FreeRegister);
-  // jne trap
-  BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB);
-  MBB.addSuccessor(&TrapBB);
-}
-
-static void addEpilogOnlyR10(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
-                             MachineInstr &MI, MachineBasicBlock &TrapBB) {
-  const DebugLoc &DL = MI.getDebugLoc();
-
-  // xor r10, r10
-  BuildMI(MBB, MI, DL, TII->get(X86::XOR64rr))
-      .addDef(X86::R10)
-      .addReg(X86::R10, RegState::Undef)
-      .addReg(X86::R10, RegState::Undef);
-  // mov r10, [gs:r10]
-  addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10),
-                  X86::GS, X86::R10);
-  // mov r10, [gs:r10]
-  addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10),
-                  X86::GS, X86::R10);
-  // sub QWORD [gs:0], 8
-  // This instruction should not be moved up to avoid a signal race.
-  addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::SUB64mi8)), X86::GS, 0)
-      .addImm(8);
-  // cmp [rsp], r10
-  addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP)
-      .addReg(X86::R10);
-  // jne trap
-  BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB);
-  MBB.addSuccessor(&TrapBB);
-}
-
-bool ShadowCallStack::runOnMachineFunction(MachineFunction &Fn) {
-  if (!Fn.getFunction().hasFnAttribute(Attribute::ShadowCallStack) ||
-      Fn.getFunction().hasFnAttribute(Attribute::Naked))
-    return false;
-
-  if (Fn.empty() || !Fn.getRegInfo().tracksLiveness())
-    return false;
-
-  // FIXME: Skip functions that have r10 or r11 live on entry (r10 can be live
-  // on entry for parameters with the nest attribute.)
-  if (Fn.front().isLiveIn(X86::R10) || Fn.front().isLiveIn(X86::R11))
-    return false;
-
-  // FIXME: Skip functions with conditional and r10 tail calls for now.
-  bool HasReturn = false;
-  for (auto &MBB : Fn) {
-    if (MBB.empty())
-      continue;
-
-    const MachineInstr &MI = MBB.instr_back();
-    if (MI.isReturn())
-      HasReturn = true;
-
-    if (MI.isReturn() && MI.isCall()) {
-      if (MI.findRegisterUseOperand(X86::EFLAGS))
-        return false;
-      // This should only be possible on Windows 64 (see GR64_TC versus
-      // GR64_TCW64.)
-      if (MI.findRegisterUseOperand(X86::R10) ||
-          MI.hasRegisterImplicitUseOperand(X86::R10))
-        return false;
-    }
-  }
-
-  if (!HasReturn)
-    return false;
-
-  // For leaf functions:
-  // 1. Do not instrument very short functions where it would not improve that
-  //    function's security.
-  // 2. Detect if there is an unused caller-saved register we can reserve to
-  //    hold the return address instead of writing/reading it from the shadow
-  //    call stack.
-  MCPhysReg LeafFuncRegister = X86::NoRegister;
-  if (!Fn.getFrameInfo().adjustsStack()) {
-    size_t InstructionCount = 0;
-    std::bitset<X86::NUM_TARGET_REGS> UsedRegs;
-    for (auto &MBB : Fn) {
-      for (auto &LiveIn : MBB.liveins())
-        UsedRegs.set(LiveIn.PhysReg);
-      for (auto &MI : MBB) {
-        if (!MI.isDebugValue() && !MI.isCFIInstruction() && !MI.isLabel())
-          InstructionCount++;
-        for (auto &Op : MI.operands())
-          if (Op.isReg() && Op.isDef())
-            UsedRegs.set(Op.getReg());
-      }
-    }
-
-    if (InstructionCount <= SkipLeafInstructions)
-      return false;
-
-    std::bitset<X86::NUM_TARGET_REGS> CalleeSavedRegs;
-    const MCPhysReg *CSRegs = Fn.getRegInfo().getCalleeSavedRegs();
-    for (size_t i = 0; CSRegs[i]; i++)
-      CalleeSavedRegs.set(CSRegs[i]);
-
-    const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
-    for (auto &Reg : X86::GR64_NOSPRegClass.getRegisters()) {
-      // FIXME: Optimization opportunity: spill/restore a callee-saved register
-      // if a caller-saved register is unavailable.
-      if (CalleeSavedRegs.test(Reg))
-        continue;
-
-      bool Used = false;
-      for (MCSubRegIterator SR(Reg, TRI, true); SR.isValid(); ++SR)
-        if ((Used = UsedRegs.test(*SR)))
-          break;
-
-      if (!Used) {
-        LeafFuncRegister = Reg;
-        break;
-      }
-    }
-  }
-
-  const bool LeafFuncOptimization = LeafFuncRegister != X86::NoRegister;
-  if (LeafFuncOptimization)
-    // Mark the leaf function register live-in for all MBBs except the entry MBB
-    for (auto I = ++Fn.begin(), E = Fn.end(); I != E; ++I)
-      I->addLiveIn(LeafFuncRegister);
-
-  MachineBasicBlock &MBB = Fn.front();
-  const MachineBasicBlock *NonEmpty = MBB.empty() ? MBB.getFallThrough() : &MBB;
-  const DebugLoc &DL = NonEmpty->front().getDebugLoc();
-
-  const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
-  if (LeafFuncOptimization)
-    addPrologLeaf(Fn, TII, MBB, DL, LeafFuncRegister);
-  else
-    addProlog(Fn, TII, MBB, DL);
-
-  MachineBasicBlock *Trap = nullptr;
-  for (auto &MBB : Fn) {
-    if (MBB.empty())
-      continue;
-
-    MachineInstr &MI = MBB.instr_back();
-    if (MI.isReturn()) {
-      if (!Trap) {
-        Trap = Fn.CreateMachineBasicBlock();
-        BuildMI(Trap, MI.getDebugLoc(), TII->get(X86::TRAP));
-        Fn.push_back(Trap);
-      }
-
-      if (LeafFuncOptimization)
-        addEpilogLeaf(TII, MBB, MI, *Trap, LeafFuncRegister);
-      else if (MI.findRegisterUseOperand(X86::R11))
-        addEpilogOnlyR10(TII, MBB, MI, *Trap);
-      else
-        addEpilog(TII, MBB, MI, *Trap);
-    }
-  }
-
-  return true;
-}
-
-INITIALIZE_PASS(ShadowCallStack, "shadow-call-stack", "Shadow Call Stack",
-                false, false)
-
-FunctionPass *llvm::createShadowCallStackPass() {
-  return new ShadowCallStack();
-}
diff --git a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
index 16c2b56c48b5..47c41626a666 100644
--- a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
+++ b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
@@ -1,13 +1,12 @@
 //===-- X86TargetInfo.cpp - X86 Target Implementation ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "TargetInfo/X86TargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/X86/TargetInfo/X86TargetInfo.h b/lib/Target/X86/TargetInfo/X86TargetInfo.h
new file mode 100644
index 000000000000..caf6b8d424fc
--- /dev/null
+++ b/lib/Target/X86/TargetInfo/X86TargetInfo.h
@@ -0,0 +1,21 @@
+//===-- X86TargetInfo.h - X86 Target Implementation -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_TARGETINFO_X86TARGETINFO_H
+#define LLVM_LIB_TARGET_X86_TARGETINFO_X86TARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheX86_32Target();
+Target &getTheX86_64Target();
+
+}
+
+#endif // LLVM_LIB_TARGET_X86_TARGETINFO_X86TARGETINFO_H
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index bed940d0d0e9..48fd3e0b7ab9 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -1,9 +1,8 @@
 //===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -300,7 +299,7 @@ void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm,
     unsigned HalfMask = Imm >> (l * 4);
     unsigned HalfBegin = (HalfMask & 0x3) * HalfSize;
     for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i)
-      ShuffleMask.push_back(HalfMask & 8 ? SM_SentinelZero : (int)i);
+      ShuffleMask.push_back((HalfMask & 8) ? SM_SentinelZero : (int)i);
   }
 }
 
@@ -384,7 +383,8 @@ void DecodeVPERMMask(unsigned NumElts, unsigned Imm,
 }
 
 void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits,
-                          unsigned NumDstElts, SmallVectorImpl<int> &Mask) {
+                          unsigned NumDstElts, bool IsAnyExtend,
+                          SmallVectorImpl<int> &Mask) {
   unsigned Scale = DstScalarBits / SrcScalarBits;
   assert(SrcScalarBits < DstScalarBits &&
          "Expected zero extension mask to increase scalar size");
@@ -392,7 +392,7 @@ void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits,
   for (unsigned i = 0; i != NumDstElts; i++) {
     Mask.push_back(i);
     for (unsigned j = 1; j != Scale; j++)
-      Mask.push_back(SM_SentinelZero);
+      Mask.push_back(IsAnyExtend ? SM_SentinelUndef : SM_SentinelZero);
   }
 }
 
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 85cde14a3241..f52785063071 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -1,9 +1,8 @@
 //===-- X86ShuffleDecode.h - X86 shuffle decode logic -----------*-C++-*---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -137,7 +136,7 @@ void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
 
 /// Decode a zero extension instruction as a shuffle mask.
 void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits,
-                          unsigned NumDstElts,
+                          unsigned NumDstElts, bool IsAnyExtend,
                           SmallVectorImpl<int> &ShuffleMask);
 
 /// Decode a move lower and zero upper instruction as a shuffle mask.
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 1c8813815b86..a95f68434d12 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -1,9 +1,8 @@
 //===-- X86.h - Top-level interface for X86 representation ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -50,11 +49,6 @@ FunctionPass *createX86FloatingPointStackifierPass();
 /// transition penalty between functions encoded with AVX and SSE.
 FunctionPass *createX86IssueVZeroUpperPass();
 
-/// This pass instruments the function prolog to save the return address to a
-/// 'shadow call stack' and the function epilog to check that the return address
-/// did not change during function execution.
-FunctionPass *createShadowCallStackPass();
-
 /// This pass inserts ENDBR instructions before indirect jump/call
 /// destinations as part of CET IBT mechanism.
 FunctionPass *createX86IndirectBranchTrackingPass();
@@ -138,11 +132,12 @@ FunctionPass *createX86SpeculativeLoadHardeningPass();
 void initializeEvexToVexInstPassPass(PassRegistry &);
 void initializeFixupBWInstPassPass(PassRegistry &);
 void initializeFixupLEAPassPass(PassRegistry &);
-void initializeShadowCallStackPass(PassRegistry &);
+void initializeFPSPass(PassRegistry &);
 void initializeWinEHStatePassPass(PassRegistry &);
 void initializeX86AvoidSFBPassPass(PassRegistry &);
 void initializeX86CallFrameOptimizationPass(PassRegistry &);
 void initializeX86CmovConverterPassPass(PassRegistry &);
+void initializeX86ExpandPseudoPass(PassRegistry&);
 void initializeX86CondBrFoldingPassPass(PassRegistry &);
 void initializeX86DomainReassignmentPass(PassRegistry &);
 void initializeX86ExecutionDomainFixPass(PassRegistry &);
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 6b1749fc7500..3112f00c91f2 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -1,9 +1,8 @@
 //===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -40,6 +39,9 @@ def FeatureNOPL    : SubtargetFeature<"nopl", "HasNOPL", "true",
 def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
                                       "Enable conditional move instructions">;
 
+def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true",
+                                        "Support CMPXCHG8B instructions">;
+
 def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
                                        "Support POPCNT instruction">;
 
@@ -165,9 +167,16 @@ def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
 def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
                           "Enable AVX-512 Vector Neural Network Instructions",
                                       [FeatureAVX512]>;
+def FeatureBF16    : SubtargetFeature<"avx512bf16", "HasBF16", "true",
+                           "Support bfloat16 floating point",
+                                      [FeatureBWI]>;
 def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
                        "Enable AVX-512 Bit Algorithms",
                         [FeatureBWI]>;
+def FeatureVP2INTERSECT  : SubtargetFeature<"avx512vp2intersect",
+                                            "HasVP2INTERSECT", "true",
+                                            "Enable AVX-512 vp2intersect",
+                                            [FeatureAVX512]>;
 def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
                          "Enable packed carry-less multiplication instructions",
                                [FeatureSSE2]>;
@@ -258,6 +267,8 @@ def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
                                     "Support RDPID instructions">;
 def FeatureWAITPKG  : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
                                       "Wait and pause enhancements">;
+def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
+                                     "Has ENQCMD instructions">;
 // On some processors, instructions that implicitly take two memory operands are
 // slow. In practice, this means that CALL, PUSH, and POP with memory operands
 // should be avoided in favor of a MOV + register CALL/PUSH/POP.
@@ -274,7 +285,7 @@ def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
                                    "INC and DEC instructions are slower than ADD and SUB">;
 def FeatureSoftFloat
     : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
-                       "Use software floating point features.">;
+                       "Use software floating point features">;
 def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
                                      "HasPOPCNTFalseDeps", "true",
                                      "POPCNT has a false dependency on dest register">;
@@ -342,6 +353,12 @@ def FeatureERMSB
           "ermsb", "HasERMSB", "true",
           "REP MOVS/STOS are fast">;
 
+// Bulldozer and newer processors can merge CMP/TEST (but not other
+// instructions) with conditional branches.
+def FeatureBranchFusion
+    : SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
+                 "CMP/TEST can be fused with conditional branches">;
+
 // Sandy Bridge and newer processors have many instructions that can be
 // fused with conditional branches and pass through the CPU as a single
 // operation.
@@ -355,7 +372,7 @@ def FeatureMacroFusion
 // similar to Skylake Server (AVX-512).
 def FeatureHasFastGather
     : SubtargetFeature<"fast-gather", "HasFastGather", "true",
-                       "Indicates if gather is reasonably fast.">;
+                       "Indicates if gather is reasonably fast">;
 
 def FeaturePrefer256Bit
     : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
@@ -366,7 +383,7 @@ def FeaturePrefer256Bit
 def FeatureRetpolineIndirectCalls
     : SubtargetFeature<
           "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
-          "Remove speculation of indirect calls from the generated code.">;
+          "Remove speculation of indirect calls from the generated code">;
 
 // Lower indirect branches and switches either using conditional branch trees
 // or using a special construct called a `retpoline` to mitigate potential
@@ -374,7 +391,7 @@ def FeatureRetpolineIndirectCalls
 def FeatureRetpolineIndirectBranches
     : SubtargetFeature<
           "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
-          "Remove speculation of indirect branches from the generated code.">;
+          "Remove speculation of indirect branches from the generated code">;
 
 // Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
 // `retpoline-indirect-branches` above.
@@ -382,7 +399,7 @@ def FeatureRetpoline
     : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
                        "Remove speculation of indirect branches from the "
                        "generated code, either by avoiding them entirely or "
-                       "lowering them with a speculation blocking construct.",
+                       "lowering them with a speculation blocking construct",
                        [FeatureRetpolineIndirectCalls,
                         FeatureRetpolineIndirectBranches]>;
 
@@ -395,7 +412,7 @@ def FeatureRetpolineExternalThunk
           "When lowering an indirect call or branch using a `retpoline`, rely "
           "on the specified user provided thunk rather than emitting one "
           "ourselves. Only has effect when combined with some other retpoline "
-          "feature.", [FeatureRetpolineIndirectCalls]>;
+          "feature", [FeatureRetpolineIndirectCalls]>;
 
 // Direct Move instructions.
 def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
@@ -405,7 +422,7 @@ def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
 
 def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
           "Indicates that the BEXTR instruction is implemented as a single uop "
-          "with good throughput.">;
+          "with good throughput">;
 
 // Combine vector math operations with shuffles into horizontal math
 // instructions if a CPU implements horizontal operations (introduced with
@@ -416,12 +433,33 @@ def FeatureFastHorizontalOps
         "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
         "normal vector instructions with shuffles", [FeatureSSE3]>;
 
+def FeatureFastScalarShiftMasks
+    : SubtargetFeature<
+        "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
+        "Prefer a left/right scalar logical shift pair over a shift+and pair">;
+
+def FeatureFastVectorShiftMasks
+    : SubtargetFeature<
+        "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
+        "Prefer a left/right vector logical shift pair over a shift+and pair">;
+
 // Merge branches using three-way conditional code.
 def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch",
                                         "ThreewayBranchProfitable", "true",
                                         "Merge branches to a three-way "
                                         "conditional branch">;
 
+// Bonnell
+def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">;
+// Silvermont
+def ProcIntelSLM  : SubtargetFeature<"", "X86ProcFamily", "IntelSLM", "">;
+// Goldmont
+def ProcIntelGLM  : SubtargetFeature<"", "X86ProcFamily", "IntelGLM", "">;
+// Goldmont Plus
+def ProcIntelGLP  : SubtargetFeature<"", "X86ProcFamily", "IntelGLP", "">;
+// Tremont
+def ProcIntelTRM  : SubtargetFeature<"", "X86ProcFamily", "IntelTRM", "">;
+
 //===----------------------------------------------------------------------===//
 // Register File Description
 //===----------------------------------------------------------------------===//
@@ -440,7 +478,7 @@ include "X86SchedPredicates.td"
 def X86InstrInfo : InstrInfo;
 
 //===----------------------------------------------------------------------===//
-// X86 processors supported.
+// X86 Scheduler Models
 //===----------------------------------------------------------------------===//
 
 include "X86ScheduleAtom.td"
@@ -454,37 +492,468 @@ include "X86ScheduleBtVer2.td"
 include "X86SchedSkylakeClient.td"
 include "X86SchedSkylakeServer.td"
 
-def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
-                    "Intel Atom processors">;
-def ProcIntelSLM  : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
-                    "Intel Silvermont processors">;
-def ProcIntelGLM  : SubtargetFeature<"glm", "X86ProcFamily", "IntelGLM",
-                    "Intel Goldmont processors">;
-def ProcIntelGLP  : SubtargetFeature<"glp", "X86ProcFamily", "IntelGLP",
-                    "Intel Goldmont Plus processors">;
-def ProcIntelTRM  : SubtargetFeature<"tremont", "X86ProcFamily", "IntelTRM",
-                    "Intel Tremont processors">;
+//===----------------------------------------------------------------------===//
+// X86 Processor Feature Lists
+//===----------------------------------------------------------------------===//
+
+def ProcessorFeatures {
+  // Nehalem
+  list<SubtargetFeature> NHMInheritableFeatures = [FeatureX87,
+                                                   FeatureCMPXCHG8B,
+                                                   FeatureCMOV,
+                                                   FeatureMMX,
+                                                   FeatureSSE42,
+                                                   FeatureFXSR,
+                                                   FeatureNOPL,
+                                                   Feature64Bit,
+                                                   FeatureCMPXCHG16B,
+                                                   FeaturePOPCNT,
+                                                   FeatureLAHFSAHF,
+                                                   FeatureMacroFusion];
+  list<SubtargetFeature> NHMSpecificFeatures = [];
+  list<SubtargetFeature> NHMFeatures =
+    !listconcat(NHMInheritableFeatures, NHMSpecificFeatures);
+
+  // Westmere
+  list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
+  list<SubtargetFeature> WSMSpecificFeatures = [];
+  list<SubtargetFeature> WSMInheritableFeatures =
+    !listconcat(NHMInheritableFeatures, WSMAdditionalFeatures);
+  list<SubtargetFeature> WSMFeatures =
+    !listconcat(WSMInheritableFeatures, WSMSpecificFeatures);
+
+  // Sandybridge
+  list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
+                                                  FeatureSlowDivide64,
+                                                  FeatureXSAVE,
+                                                  FeatureXSAVEOPT,
+                                                  FeatureSlow3OpsLEA,
+                                                  FeatureFastScalarFSQRT,
+                                                  FeatureFastSHLDRotate,
+                                                  FeatureMergeToThreeWayBranch];
+  list<SubtargetFeature> SNBSpecificFeatures = [FeatureSlowUAMem32,
+                                                FeaturePOPCNTFalseDeps];
+  list<SubtargetFeature> SNBInheritableFeatures =
+    !listconcat(WSMInheritableFeatures, SNBAdditionalFeatures);
+  list<SubtargetFeature> SNBFeatures =
+    !listconcat(SNBInheritableFeatures, SNBSpecificFeatures);
+
+  // Ivybridge
+  list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND,
+                                                  FeatureF16C,
+                                                  FeatureFSGSBase];
+  list<SubtargetFeature> IVBSpecificFeatures = [FeatureSlowUAMem32,
+                                                FeaturePOPCNTFalseDeps];
+  list<SubtargetFeature> IVBInheritableFeatures =
+    !listconcat(SNBInheritableFeatures, IVBAdditionalFeatures);
+  list<SubtargetFeature> IVBFeatures =
+    !listconcat(IVBInheritableFeatures, IVBSpecificFeatures);
+
+  // Haswell
+  list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2,
+                                                  FeatureBMI,
+                                                  FeatureBMI2,
+                                                  FeatureERMSB,
+                                                  FeatureFMA,
+                                                  FeatureINVPCID,
+                                                  FeatureLZCNT,
+                                                  FeatureMOVBE,
+                                                  FeatureFastVariableShuffle];
+  list<SubtargetFeature> HSWSpecificFeatures = [FeaturePOPCNTFalseDeps,
+                                                FeatureLZCNTFalseDeps];
+  list<SubtargetFeature> HSWInheritableFeatures =
+    !listconcat(IVBInheritableFeatures, HSWAdditionalFeatures);
+  list<SubtargetFeature> HSWFeatures =
+    !listconcat(HSWInheritableFeatures, HSWSpecificFeatures);
+
+  // Broadwell
+  list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX,
+                                                  FeatureRDSEED,
+                                                  FeaturePRFCHW];
+  list<SubtargetFeature> BDWSpecificFeatures = [FeaturePOPCNTFalseDeps,
+                                                FeatureLZCNTFalseDeps];
+  list<SubtargetFeature> BDWInheritableFeatures =
+    !listconcat(HSWInheritableFeatures, BDWAdditionalFeatures);
+  list<SubtargetFeature> BDWFeatures =
+    !listconcat(BDWInheritableFeatures, BDWSpecificFeatures);
+
+  // Skylake
+  list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
+                                                  FeatureMPX,
+                                                  FeatureXSAVEC,
+                                                  FeatureXSAVES,
+                                                  FeatureCLFLUSHOPT,
+                                                  FeatureFastVectorFSQRT];
+  list<SubtargetFeature> SKLSpecificFeatures = [FeatureHasFastGather,
+                                                FeaturePOPCNTFalseDeps,
+                                                FeatureSGX];
+  list<SubtargetFeature> SKLInheritableFeatures =
+    !listconcat(BDWInheritableFeatures, SKLAdditionalFeatures);
+  list<SubtargetFeature> SKLFeatures =
+    !listconcat(SKLInheritableFeatures, SKLSpecificFeatures);
+
+  // Skylake-AVX512
+  list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAVX512,
+                                                  FeatureCDI,
+                                                  FeatureDQI,
+                                                  FeatureBWI,
+                                                  FeatureVLX,
+                                                  FeaturePKU,
+                                                  FeatureCLWB];
+  list<SubtargetFeature> SKXSpecificFeatures = [FeatureHasFastGather,
+                                                FeaturePOPCNTFalseDeps];
+  list<SubtargetFeature> SKXInheritableFeatures =
+    !listconcat(SKLInheritableFeatures, SKXAdditionalFeatures);
+  list<SubtargetFeature> SKXFeatures =
+    !listconcat(SKXInheritableFeatures, SKXSpecificFeatures);
+
+  // Cascadelake
+  list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI];
+  list<SubtargetFeature> CLXSpecificFeatures = [FeatureHasFastGather,
+                                                FeaturePOPCNTFalseDeps];
+  list<SubtargetFeature> CLXInheritableFeatures =
+    !listconcat(SKXInheritableFeatures, CLXAdditionalFeatures);
+  list<SubtargetFeature> CLXFeatures =
+    !listconcat(CLXInheritableFeatures, CLXSpecificFeatures);
+
+  // Cooperlake
+  list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16];
+  list<SubtargetFeature> CPXSpecificFeatures = [FeatureHasFastGather,
+                                                FeaturePOPCNTFalseDeps];
+  list<SubtargetFeature> CPXInheritableFeatures =
+    !listconcat(CLXInheritableFeatures, CPXAdditionalFeatures);
+  list<SubtargetFeature> CPXFeatures =
+    !listconcat(CPXInheritableFeatures, CPXSpecificFeatures);
+
+  // Cannonlake
+  list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
+                                                  FeatureCDI,
+                                                  FeatureDQI,
+                                                  FeatureBWI,
+                                                  FeatureVLX,
+                                                  FeaturePKU,
+                                                  FeatureVBMI,
+                                                  FeatureIFMA,
+                                                  FeatureSHA,
+                                                  FeatureSGX];
+  list<SubtargetFeature> CNLSpecificFeatures = [FeatureHasFastGather];
+  list<SubtargetFeature> CNLInheritableFeatures =
+    !listconcat(SKLInheritableFeatures, CNLAdditionalFeatures);
+  list<SubtargetFeature> CNLFeatures =
+    !listconcat(CNLInheritableFeatures, CNLSpecificFeatures);
+
+  // Icelake
+  list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG,
+                                                  FeatureVAES,
+                                                  FeatureVBMI2,
+                                                  FeatureVNNI,
+                                                  FeatureVPCLMULQDQ,
+                                                  FeatureVPOPCNTDQ,
+                                                  FeatureGFNI,
+                                                  FeatureCLWB,
+                                                  FeatureRDPID];
+  list<SubtargetFeature> ICLSpecificFeatures = [FeatureHasFastGather];
+  list<SubtargetFeature> ICLInheritableFeatures =
+    !listconcat(CNLInheritableFeatures, ICLAdditionalFeatures);
+  list<SubtargetFeature> ICLFeatures =
+    !listconcat(ICLInheritableFeatures, ICLSpecificFeatures);
+
+  // Icelake Server
+  list<SubtargetFeature> ICXSpecificFeatures = [FeaturePCONFIG,
+                                                FeatureWBNOINVD,
+                                                FeatureHasFastGather];
+  list<SubtargetFeature> ICXFeatures =
+    !listconcat(ICLInheritableFeatures, ICXSpecificFeatures);
+
+  // Atom
+  list<SubtargetFeature> AtomInheritableFeatures = [FeatureX87,
+                                                    FeatureCMPXCHG8B,
+                                                    FeatureCMOV,
+                                                    FeatureMMX,
+                                                    FeatureSSSE3,
+                                                    FeatureFXSR,
+                                                    FeatureNOPL,
+                                                    Feature64Bit,
+                                                    FeatureCMPXCHG16B,
+                                                    FeatureMOVBE,
+                                                    FeatureSlowTwoMemOps,
+                                                    FeatureLAHFSAHF];
+  list<SubtargetFeature> AtomSpecificFeatures = [ProcIntelAtom,
+                                                 FeatureSlowUAMem16,
+                                                 FeatureLEAForSP,
+                                                 FeatureSlowDivide32,
+                                                 FeatureSlowDivide64,
+                                                 FeatureLEAUsesAG,
+                                                 FeaturePadShortFunctions];
+  list<SubtargetFeature> AtomFeatures =
+    !listconcat(AtomInheritableFeatures, AtomSpecificFeatures);
+
+  // Silvermont
+  list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
+                                                  FeaturePOPCNT,
+                                                  FeaturePCLMUL,
+                                                  FeaturePRFCHW,
+                                                  FeatureSlowLEA,
+                                                  FeatureSlowIncDec,
+                                                  FeatureRDRAND];
+  list<SubtargetFeature> SLMSpecificFeatures = [ProcIntelSLM,
+                                                FeatureSlowDivide64,
+                                                FeatureSlowPMULLD,
+                                                FeaturePOPCNTFalseDeps];
+  list<SubtargetFeature> SLMInheritableFeatures =
+    !listconcat(AtomInheritableFeatures, SLMAdditionalFeatures);
+  list<SubtargetFeature> SLMFeatures =
+    !listconcat(SLMInheritableFeatures, SLMSpecificFeatures);
+
+  // Goldmont
+  list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES,
+                                                  FeatureMPX,
+                                                  FeatureSHA,
+                                                  FeatureRDSEED,
+                                                  FeatureXSAVE,
+                                                  FeatureXSAVEOPT,
+                                                  FeatureXSAVEC,
+                                                  FeatureXSAVES,
+                                                  FeatureCLFLUSHOPT,
+                                                  FeatureFSGSBase];
+  list<SubtargetFeature> GLMSpecificFeatures = [ProcIntelGLM,
+                                                FeaturePOPCNTFalseDeps];
+  list<SubtargetFeature> GLMInheritableFeatures =
+    !listconcat(SLMInheritableFeatures, GLMAdditionalFeatures);
+  list<SubtargetFeature> GLMFeatures =
+    !listconcat(GLMInheritableFeatures, GLMSpecificFeatures);
+
+  // Goldmont Plus
+  list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
+                                                  FeatureRDPID,
+                                                  FeatureSGX];
+  list<SubtargetFeature> GLPSpecificFeatures = [ProcIntelGLP];
+  list<SubtargetFeature> GLPInheritableFeatures =
+    !listconcat(GLMInheritableFeatures, GLPAdditionalFeatures);
+  list<SubtargetFeature> GLPFeatures =
+    !listconcat(GLPInheritableFeatures, GLPSpecificFeatures);
+
+  // Tremont
+  list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLDEMOTE,
+                                                  FeatureGFNI,
+                                                  FeatureMOVDIRI,
+                                                  FeatureMOVDIR64B,
+                                                  FeatureWAITPKG];
+  list<SubtargetFeature> TRMSpecificFeatures = [ProcIntelTRM];
+  list<SubtargetFeature> TRMFeatures =
+    !listconcat(GLPInheritableFeatures, TRMAdditionalFeatures,
+                TRMSpecificFeatures);
+
+  // Knights Landing
+  list<SubtargetFeature> KNLFeatures = [FeatureX87,
+                                        FeatureCMPXCHG8B,
+                                        FeatureCMOV,
+                                        FeatureMMX,
+                                        FeatureFXSR,
+                                        FeatureNOPL,
+                                        Feature64Bit,
+                                        FeatureCMPXCHG16B,
+                                        FeaturePOPCNT,
+                                        FeatureSlowDivide64,
+                                        FeaturePCLMUL,
+                                        FeatureXSAVE,
+                                        FeatureXSAVEOPT,
+                                        FeatureLAHFSAHF,
+                                        FeatureSlow3OpsLEA,
+                                        FeatureSlowIncDec,
+                                        FeatureAES,
+                                        FeatureRDRAND,
+                                        FeatureF16C,
+                                        FeatureFSGSBase,
+                                        FeatureAVX512,
+                                        FeatureERI,
+                                        FeatureCDI,
+                                        FeaturePFI,
+                                        FeaturePREFETCHWT1,
+                                        FeatureADX,
+                                        FeatureRDSEED,
+                                        FeatureMOVBE,
+                                        FeatureLZCNT,
+                                        FeatureBMI,
+                                        FeatureBMI2,
+                                        FeatureFMA,
+                                        FeaturePRFCHW,
+                                        FeatureSlowTwoMemOps,
+                                        FeatureFastPartialYMMorZMMWrite,
+                                        FeatureHasFastGather,
+                                        FeatureSlowPMADDWD];
+  // TODO Add AVX5124FMAPS/AVX5124VNNIW features
+  list<SubtargetFeature> KNMFeatures =
+    !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
+
+
+  // Bobcat
+  list<SubtargetFeature> BtVer1InheritableFeatures = [FeatureX87,
+                                                      FeatureCMPXCHG8B,
+                                                      FeatureCMOV,
+                                                      FeatureMMX,
+                                                      FeatureSSSE3,
+                                                      FeatureSSE4A,
+                                                      FeatureFXSR,
+                                                      FeatureNOPL,
+                                                      Feature64Bit,
+                                                      FeatureCMPXCHG16B,
+                                                      FeaturePRFCHW,
+                                                      FeatureLZCNT,
+                                                      FeaturePOPCNT,
+                                                      FeatureSlowSHLD,
+                                                      FeatureLAHFSAHF,
+                                                      FeatureFast15ByteNOP,
+                                                      FeatureFastScalarShiftMasks,
+                                                      FeatureFastVectorShiftMasks];
+  list<SubtargetFeature> BtVer1Features = BtVer1InheritableFeatures;
+
+  // Jaguar
+  list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
+                                                     FeatureAES,
+                                                     FeaturePCLMUL,
+                                                     FeatureBMI,
+                                                     FeatureF16C,
+                                                     FeatureMOVBE,
+                                                     FeatureXSAVE,
+                                                     FeatureXSAVEOPT];
+  list<SubtargetFeature> BtVer2SpecificFeatures = [FeatureFastLZCNT,
+                                                   FeatureFastBEXTR,
+                                                   FeatureFastPartialYMMorZMMWrite,
+                                                   FeatureFastHorizontalOps];
+  list<SubtargetFeature> BtVer2InheritableFeatures =
+    !listconcat(BtVer1InheritableFeatures, BtVer2AdditionalFeatures);
+  list<SubtargetFeature> BtVer2Features =
+    !listconcat(BtVer2InheritableFeatures, BtVer2SpecificFeatures);
+
+  // Bulldozer
+  list<SubtargetFeature> BdVer1InheritableFeatures = [FeatureX87,
+                                                      FeatureCMPXCHG8B,
+                                                      FeatureCMOV,
+                                                      FeatureXOP,
+                                                      Feature64Bit,
+                                                      FeatureCMPXCHG16B,
+                                                      FeatureAES,
+                                                      FeaturePRFCHW,
+                                                      FeaturePCLMUL,
+                                                      FeatureMMX,
+                                                      FeatureFXSR,
+                                                      FeatureNOPL,
+                                                      FeatureLZCNT,
+                                                      FeaturePOPCNT,
+                                                      FeatureXSAVE,
+                                                      FeatureLWP,
+                                                      FeatureSlowSHLD,
+                                                      FeatureLAHFSAHF,
+                                                      FeatureFast11ByteNOP,
+                                                      FeatureFastScalarShiftMasks,
+                                                      FeatureBranchFusion];
+  list<SubtargetFeature> BdVer1Features = BdVer1InheritableFeatures;
+
+  // PileDriver
+  list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
+                                                     FeatureBMI,
+                                                     FeatureTBM,
+                                                     FeatureFMA,
+                                                     FeatureFastBEXTR];
+  list<SubtargetFeature> BdVer2InheritableFeatures =
+    !listconcat(BdVer1InheritableFeatures, BdVer2AdditionalFeatures);
+  list<SubtargetFeature> BdVer2Features = BdVer2InheritableFeatures;
+
+  // Steamroller
+  list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT,
+                                                     FeatureFSGSBase];
+  list<SubtargetFeature> BdVer3InheritableFeatures =
+    !listconcat(BdVer2InheritableFeatures, BdVer3AdditionalFeatures);
+  list<SubtargetFeature> BdVer3Features = BdVer3InheritableFeatures;
+
+  // Excavator
+  list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2,
+                                                     FeatureBMI2,
+                                                     FeatureMWAITX];
+  list<SubtargetFeature> BdVer4InheritableFeatures =
+    !listconcat(BdVer3InheritableFeatures, BdVer4AdditionalFeatures);
+  list<SubtargetFeature> BdVer4Features = BdVer4InheritableFeatures;
+
+
+  // AMD Zen Processors common ISAs
+  list<SubtargetFeature> ZNFeatures = [FeatureADX,
+                                       FeatureAES,
+                                       FeatureAVX2,
+                                       FeatureBMI,
+                                       FeatureBMI2,
+                                       FeatureCLFLUSHOPT,
+                                       FeatureCLZERO,
+                                       FeatureCMOV,
+                                       Feature64Bit,
+                                       FeatureCMPXCHG16B,
+                                       FeatureF16C,
+                                       FeatureFMA,
+                                       FeatureFSGSBase,
+                                       FeatureFXSR,
+                                       FeatureNOPL,
+                                       FeatureFastLZCNT,
+                                       FeatureLAHFSAHF,
+                                       FeatureLZCNT,
+                                       FeatureFastBEXTR,
+                                       FeatureFast15ByteNOP,
+                                       FeatureBranchFusion,
+                                       FeatureFastScalarShiftMasks,
+                                       FeatureMMX,
+                                       FeatureMOVBE,
+                                       FeatureMWAITX,
+                                       FeaturePCLMUL,
+                                       FeaturePOPCNT,
+                                       FeaturePRFCHW,
+                                       FeatureRDRAND,
+                                       FeatureRDSEED,
+                                       FeatureSHA,
+                                       FeatureSSE4A,
+                                       FeatureSlowSHLD,
+                                       FeatureX87,
+                                       FeatureXSAVE,
+                                       FeatureXSAVEC,
+                                       FeatureXSAVEOPT,
+                                       FeatureXSAVES];
+  list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
+                                                  FeatureRDPID,
+                                                  FeatureWBNOINVD];
+  list<SubtargetFeature> ZN2Features =
+    !listconcat(ZNFeatures, ZN2AdditionalFeatures);
+}
+
+//===----------------------------------------------------------------------===//
+// X86 processors supported.
+//===----------------------------------------------------------------------===//
 
 class Proc<string Name, list<SubtargetFeature> Features>
  : ProcessorModel<Name, GenericModel, Features>;
 
-def : Proc<"generic",         [FeatureX87, FeatureSlowUAMem16]>;
+// NOTE: CMPXCHG8B is here for legacy compatbility so that it is only disabled
+// if i386/i486 is specifically requested.
+def : Proc<"generic",         [FeatureX87, FeatureSlowUAMem16,
+                               FeatureCMPXCHG8B]>;
 def : Proc<"i386",            [FeatureX87, FeatureSlowUAMem16]>;
 def : Proc<"i486",            [FeatureX87, FeatureSlowUAMem16]>;
-def : Proc<"i586",            [FeatureX87, FeatureSlowUAMem16]>;
-def : Proc<"pentium",         [FeatureX87, FeatureSlowUAMem16]>;
-def : Proc<"pentium-mmx",     [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
-
-def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV]>;
-def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV,
-                          FeatureNOPL]>;
-
-def : Proc<"pentium2",        [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
-                               FeatureCMOV, FeatureFXSR, FeatureNOPL]>;
+def : Proc<"i586",            [FeatureX87, FeatureSlowUAMem16,
+                               FeatureCMPXCHG8B]>;
+def : Proc<"pentium",         [FeatureX87, FeatureSlowUAMem16,
+                               FeatureCMPXCHG8B]>;
+def : Proc<"pentium-mmx",     [FeatureX87, FeatureSlowUAMem16,
+                               FeatureCMPXCHG8B, FeatureMMX]>;
+
+def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                    FeatureCMOV]>;
+def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                          FeatureCMOV, FeatureNOPL]>;
+
+def : Proc<"pentium2",        [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                               FeatureMMX, FeatureCMOV, FeatureFXSR,
+                               FeatureNOPL]>;
 
 foreach P = ["pentium3", "pentium3m"] in {
-  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
-                 FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
+  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,FeatureMMX,
+                 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
 }
 
 // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
@@ -498,13 +967,15 @@ foreach P = ["pentium3", "pentium3m"] in {
 // changes slightly.
 
 def : ProcessorModel<"pentium-m", GenericPostRAModel,
-                     [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
-                      FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
+                     [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                      FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
+                      FeatureCMOV]>;
 
 foreach P = ["pentium4", "pentium4m"] in {
   def : ProcessorModel<P, GenericPostRAModel,
-                       [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
-                        FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
+                       [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                        FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
+                        FeatureCMOV]>;
 }
 
 // Intel Quark.
@@ -512,16 +983,19 @@ def : Proc<"lakemont",        []>;
 
 // Intel Core Duo.
 def : ProcessorModel<"yonah", SandyBridgeModel,
-                     [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
-                      FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
+                     [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                      FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL,
+                      FeatureCMOV]>;
 
 // NetBurst.
 def : ProcessorModel<"prescott", GenericPostRAModel,
-                     [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
-                      FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
+                     [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                      FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL,
+                      FeatureCMOV]>;
 def : ProcessorModel<"nocona", GenericPostRAModel, [
   FeatureX87,
   FeatureSlowUAMem16,
+  FeatureCMPXCHG8B,
   FeatureCMOV,
   FeatureMMX,
   FeatureSSE3,
@@ -535,6 +1009,7 @@ def : ProcessorModel<"nocona", GenericPostRAModel, [
 def : ProcessorModel<"core2", SandyBridgeModel, [
   FeatureX87,
   FeatureSlowUAMem16,
+  FeatureCMPXCHG8B,
   FeatureCMOV,
   FeatureMMX,
   FeatureSSSE3,
@@ -548,6 +1023,7 @@ def : ProcessorModel<"core2", SandyBridgeModel, [
 def : ProcessorModel<"penryn", SandyBridgeModel, [
   FeatureX87,
   FeatureSlowUAMem16,
+  FeatureCMPXCHG8B,
   FeatureCMOV,
   FeatureMMX,
   FeatureSSE41,
@@ -560,638 +1036,131 @@ def : ProcessorModel<"penryn", SandyBridgeModel, [
 ]>;
 
 // Atom CPUs.
-class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
-  ProcIntelAtom,
-  FeatureX87,
-  FeatureSlowUAMem16,
-  FeatureCMOV,
-  FeatureMMX,
-  FeatureSSSE3,
-  FeatureFXSR,
-  FeatureNOPL,
-  Feature64Bit,
-  FeatureCMPXCHG16B,
-  FeatureMOVBE,
-  FeatureLEAForSP,
-  FeatureSlowDivide32,
-  FeatureSlowDivide64,
-  FeatureSlowTwoMemOps,
-  FeatureLEAUsesAG,
-  FeaturePadShortFunctions,
-  FeatureLAHFSAHF
-]>;
-def : BonnellProc<"bonnell">;
-def : BonnellProc<"atom">; // Pin the generic name to the baseline.
-
-class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
-  ProcIntelSLM,
-  FeatureX87,
-  FeatureCMOV,
-  FeatureMMX,
-  FeatureSSE42,
-  FeatureFXSR,
-  FeatureNOPL,
-  Feature64Bit,
-  FeatureCMPXCHG16B,
-  FeatureMOVBE,
-  FeaturePOPCNT,
-  FeaturePCLMUL,
-  FeatureSlowDivide64,
-  FeatureSlowTwoMemOps,
-  FeaturePRFCHW,
-  FeatureSlowLEA,
-  FeatureSlowIncDec,
-  FeatureSlowPMULLD,
-  FeatureRDRAND,
-  FeatureLAHFSAHF,
-  FeaturePOPCNTFalseDeps
-]>;
-def : SilvermontProc<"silvermont">;
-def : SilvermontProc<"slm">; // Legacy alias.
-
-class ProcessorFeatures<list<SubtargetFeature> Inherited,
-                        list<SubtargetFeature> NewFeatures> {
-  list<SubtargetFeature> Value = !listconcat(Inherited, NewFeatures);
+foreach P = ["bonnell", "atom"] in {
+  def : ProcessorModel<P, AtomModel, ProcessorFeatures.AtomFeatures>;
 }
 
-class ProcModel<string Name, SchedMachineModel Model,
-                list<SubtargetFeature> ProcFeatures,
-                list<SubtargetFeature> OtherFeatures> :
-  ProcessorModel<Name, Model, !listconcat(ProcFeatures, OtherFeatures)>;
-
-def GLMFeatures : ProcessorFeatures<[], [
-  FeatureX87,
-  FeatureCMOV,
-  FeatureMMX,
-  FeatureSSE42,
-  FeatureFXSR,
-  FeatureNOPL,
-  Feature64Bit,
-  FeatureCMPXCHG16B,
-  FeatureMOVBE,
-  FeaturePOPCNT,
-  FeaturePCLMUL,
-  FeatureAES,
-  FeaturePRFCHW,
-  FeatureSlowTwoMemOps,
-  FeatureSlowLEA,
-  FeatureSlowIncDec,
-  FeatureLAHFSAHF,
-  FeatureMPX,
-  FeatureSHA,
-  FeatureRDRAND,
-  FeatureRDSEED,
-  FeatureXSAVE,
-  FeatureXSAVEOPT,
-  FeatureXSAVEC,
-  FeatureXSAVES,
-  FeatureCLFLUSHOPT,
-  FeatureFSGSBase
-]>;
+foreach P = ["silvermont", "slm"] in {
+  def : ProcessorModel<P, SLMModel, ProcessorFeatures.SLMFeatures>;
+}
 
-class GoldmontProc<string Name> : ProcModel<Name, SLMModel,
-      GLMFeatures.Value, [
-  ProcIntelGLM,
-  FeaturePOPCNTFalseDeps
-]>;
-def : GoldmontProc<"goldmont">;
-
-def GLPFeatures : ProcessorFeatures<GLMFeatures.Value, [
-  FeaturePTWRITE,
-  FeatureRDPID,
-  FeatureSGX
-]>;
-
-class GoldmontPlusProc<string Name> : ProcModel<Name, SLMModel,
-      GLPFeatures.Value, [
-  ProcIntelGLP
-]>;
-def : GoldmontPlusProc<"goldmont-plus">;
-
-class TremontProc<string Name> : ProcModel<Name, SLMModel,
-      GLPFeatures.Value, [
-  ProcIntelTRM,
-  FeatureCLDEMOTE,
-  FeatureGFNI,
-  FeatureMOVDIRI,
-  FeatureMOVDIR64B,
-  FeatureWAITPKG
-]>;
-def : TremontProc<"tremont">;
+def : ProcessorModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures>;
+def : ProcessorModel<"goldmont-plus", SLMModel, ProcessorFeatures.GLPFeatures>;
+def : ProcessorModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures>;
 
 // "Arrandale" along with corei3 and corei5
-class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
-  FeatureX87,
-  FeatureCMOV,
-  FeatureMMX,
-  FeatureSSE42,
-  FeatureFXSR,
-  FeatureNOPL,
-  Feature64Bit,
-  FeatureCMPXCHG16B,
-  FeaturePOPCNT,
-  FeatureLAHFSAHF,
-  FeatureMacroFusion
-]>;
-def : NehalemProc<"nehalem">;
-def : NehalemProc<"corei7">;
+foreach P = ["nehalem", "corei7"] in {
+  def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures>;
+}
 
-// Westmere is a similar machine to nehalem with some additional features.
 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
-class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
-  FeatureX87,
-  FeatureCMOV,
-  FeatureMMX,
-  FeatureSSE42,
-  FeatureFXSR,
-  FeatureNOPL,
-  Feature64Bit,
-  FeatureCMPXCHG16B,
-  FeaturePOPCNT,
-  FeaturePCLMUL,
-  FeatureLAHFSAHF,
-  FeatureMacroFusion
-]>;
-def : WestmereProc<"westmere">;
-
-// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
-// rather than a superset.
-def SNBFeatures : ProcessorFeatures<[], [
-  FeatureX87,
-  FeatureCMOV,
-  FeatureMMX,
-  FeatureAVX,
-  FeatureFXSR,
-  FeatureNOPL,
-  Feature64Bit,
-  FeatureCMPXCHG16B,
-  FeaturePOPCNT,
-  FeatureSlowDivide64,
-  FeaturePCLMUL,
-  FeatureXSAVE,
-  FeatureXSAVEOPT,
-  FeatureLAHFSAHF,
-  FeatureSlow3OpsLEA,
-  FeatureFastScalarFSQRT,
-  FeatureFastSHLDRotate,
-  FeatureSlowIncDec,
-  FeatureMergeToThreeWayBranch,
-  FeatureMacroFusion
-]>;
-
-class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
-                                               SNBFeatures.Value, [
-  FeatureSlowUAMem32,
-  FeaturePOPCNTFalseDeps
-]>;
-def : SandyBridgeProc<"sandybridge">;
-def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
-
-def IVBFeatures : ProcessorFeatures<SNBFeatures.Value, [
-  FeatureRDRAND,
-  FeatureF16C,
-  FeatureFSGSBase
-]>;
-
-class IvyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
-                                             IVBFeatures.Value, [
-  FeatureSlowUAMem32,
-  FeaturePOPCNTFalseDeps
-]>;
-def : IvyBridgeProc<"ivybridge">;
-def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
-
-def HSWFeatures : ProcessorFeatures<IVBFeatures.Value, [
-  FeatureAVX2,
-  FeatureBMI,
-  FeatureBMI2,
-  FeatureERMSB,
-  FeatureFMA,
-  FeatureINVPCID,
-  FeatureLZCNT,
-  FeatureMOVBE,
-  FeatureFastVariableShuffle
-]>;
-
-class HaswellProc<string Name> : ProcModel<Name, HaswellModel,
-                                           HSWFeatures.Value, [
-  FeaturePOPCNTFalseDeps,
-  FeatureLZCNTFalseDeps
-]>;
-def : HaswellProc<"haswell">;
-def : HaswellProc<"core-avx2">; // Legacy alias.
+def : ProcessorModel<"westmere", SandyBridgeModel,
+                     ProcessorFeatures.WSMFeatures>;
 
-def BDWFeatures : ProcessorFeatures<HSWFeatures.Value, [
-  FeatureADX,
-  FeatureRDSEED,
-  FeaturePRFCHW
-]>;
-class BroadwellProc<string Name> : ProcModel<Name, BroadwellModel,
-                                             BDWFeatures.Value, [
-  FeaturePOPCNTFalseDeps,
-  FeatureLZCNTFalseDeps
-]>;
-def : BroadwellProc<"broadwell">;
-
-def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [
-  FeatureAES,
-  FeatureMPX,
-  FeatureXSAVEC,
-  FeatureXSAVES,
-  FeatureCLFLUSHOPT,
-  FeatureFastVectorFSQRT
-]>;
-
-class SkylakeClientProc<string Name> : ProcModel<Name, SkylakeClientModel,
-                                                 SKLFeatures.Value, [
-  FeatureHasFastGather,
-  FeaturePOPCNTFalseDeps,
-  FeatureSGX
-]>;
-def : SkylakeClientProc<"skylake">;
+foreach P = ["sandybridge", "corei7-avx"] in {
+  def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures>;
+}
 
-def KNLFeatures : ProcessorFeatures<[], [
-  FeatureX87,
-  FeatureCMOV,
-  FeatureMMX,
-  FeatureFXSR,
-  FeatureNOPL,
-  Feature64Bit,
-  FeatureCMPXCHG16B,
-  FeaturePOPCNT,
-  FeatureSlowDivide64,
-  FeaturePCLMUL,
-  FeatureXSAVE,
-  FeatureXSAVEOPT,
-  FeatureLAHFSAHF,
-  FeatureSlow3OpsLEA,
-  FeatureSlowIncDec,
-  FeatureAES,
-  FeatureRDRAND,
-  FeatureF16C,
-  FeatureFSGSBase,
-  FeatureAVX512,
-  FeatureERI,
-  FeatureCDI,
-  FeaturePFI,
-  FeaturePREFETCHWT1,
-  FeatureADX,
-  FeatureRDSEED,
-  FeatureMOVBE,
-  FeatureLZCNT,
-  FeatureBMI,
-  FeatureBMI2,
-  FeatureFMA,
-  FeaturePRFCHW
-]>;
+foreach P = ["ivybridge", "core-avx-i"] in {
+  def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures>;
+}
 
-// FIXME: define KNL model
-class KnightsLandingProc<string Name> : ProcModel<Name, HaswellModel,
-                                                  KNLFeatures.Value, [
-  FeatureSlowTwoMemOps,
-  FeatureFastPartialYMMorZMMWrite,
-  FeatureHasFastGather,
-  FeatureSlowPMADDWD
-]>;
-def : KnightsLandingProc<"knl">;
-
-class KnightsMillProc<string Name> : ProcModel<Name, HaswellModel,
-                                               KNLFeatures.Value, [
-  FeatureSlowTwoMemOps,
-  FeatureFastPartialYMMorZMMWrite,
-  FeatureHasFastGather,
-  FeatureSlowPMADDWD,
-  FeatureVPOPCNTDQ
-]>;
-def : KnightsMillProc<"knm">; // TODO Add AVX5124FMAPS/AVX5124VNNIW features
-
-def SKXFeatures : ProcessorFeatures<SKLFeatures.Value, [
-  FeatureAVX512,
-  FeatureCDI,
-  FeatureDQI,
-  FeatureBWI,
-  FeatureVLX,
-  FeaturePKU,
-  FeatureCLWB
-]>;
+foreach P = ["haswell", "core-avx2"] in {
+  def : ProcessorModel<P, HaswellModel, ProcessorFeatures.HSWFeatures>;
+}
 
-class SkylakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
-                                                 SKXFeatures.Value, [
-  FeatureHasFastGather,
-  FeaturePOPCNTFalseDeps
-]>;
-def : SkylakeServerProc<"skylake-avx512">;
-def : SkylakeServerProc<"skx">; // Legacy alias.
+def : ProcessorModel<"broadwell", BroadwellModel,
+                     ProcessorFeatures.BDWFeatures>;
 
-def CLXFeatures : ProcessorFeatures<SKXFeatures.Value, [
-  FeatureVNNI
-]>;
+def : ProcessorModel<"skylake", SkylakeClientModel,
+                     ProcessorFeatures.SKLFeatures>;
 
-class CascadelakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
-                                              CLXFeatures.Value, [
-  FeatureHasFastGather,
-  FeaturePOPCNTFalseDeps
-]>;
-def : CascadelakeProc<"cascadelake">;
-
-def CNLFeatures : ProcessorFeatures<SKLFeatures.Value, [
-  FeatureAVX512,
-  FeatureCDI,
-  FeatureDQI,
-  FeatureBWI,
-  FeatureVLX,
-  FeaturePKU,
-  FeatureVBMI,
-  FeatureIFMA,
-  FeatureSHA,
-  FeatureSGX
-]>;
+// FIXME: define KNL scheduler model
+def : ProcessorModel<"knl", HaswellModel, ProcessorFeatures.KNLFeatures>;
+def : ProcessorModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures>;
 
-class CannonlakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
-                                              CNLFeatures.Value, [
-  FeatureHasFastGather
-]>;
-def : CannonlakeProc<"cannonlake">;
-
-def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [
-  FeatureBITALG,
-  FeatureVAES,
-  FeatureVBMI2,
-  FeatureVNNI,
-  FeatureVPCLMULQDQ,
-  FeatureVPOPCNTDQ,
-  FeatureGFNI,
-  FeatureCLWB,
-  FeatureRDPID
-]>;
-
-class IcelakeClientProc<string Name> : ProcModel<Name, SkylakeServerModel,
-                                                 ICLFeatures.Value, [
-  FeatureHasFastGather
-]>;
-def : IcelakeClientProc<"icelake-client">;
+foreach P = ["skylake-avx512", "skx"] in {
+  def : ProcessorModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures>;
+}
 
-class IcelakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
-                                                 ICLFeatures.Value, [
-  FeaturePCONFIG,
-  FeatureWBNOINVD,
-  FeatureHasFastGather
-]>;
-def : IcelakeServerProc<"icelake-server">;
+def : ProcessorModel<"cascadelake", SkylakeServerModel,
+                     ProcessorFeatures.CLXFeatures>;
+def : ProcessorModel<"cooperlake", SkylakeServerModel,
+                     ProcessorFeatures.CPXFeatures>;
+def : ProcessorModel<"cannonlake", SkylakeServerModel,
+                     ProcessorFeatures.CNLFeatures>;
+def : ProcessorModel<"icelake-client", SkylakeServerModel,
+                     ProcessorFeatures.ICLFeatures>;
+def : ProcessorModel<"icelake-server", SkylakeServerModel,
+                     ProcessorFeatures.ICXFeatures>;
 
 // AMD CPUs.
 
-def : Proc<"k6",              [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
-def : Proc<"k6-2",            [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
-def : Proc<"k6-3",            [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"k6",   [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                    FeatureMMX]>;
+def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                    Feature3DNow]>;
+def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                               Feature3DNow]>;
 
 foreach P = ["athlon", "athlon-tbird"] in {
-  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, Feature3DNowA,
-                 FeatureNOPL, FeatureSlowSHLD]>;
+  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV,
+                 Feature3DNowA, FeatureNOPL, FeatureSlowSHLD]>;
 }
 
 foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
-  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, FeatureSSE1,
-                 Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureSlowSHLD]>;
+  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV,
+                 FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL,
+                 FeatureSlowSHLD]>;
 }
 
 foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
-  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
-                 FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureSlowSHLD,
-                 FeatureCMOV]>;
+  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                 FeatureSSE2, Feature3DNowA, FeatureFXSR, FeatureNOPL,
+                 Feature64Bit, FeatureSlowSHLD, FeatureCMOV,
+                 FeatureFastScalarShiftMasks]>;
 }
 
 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
-  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
-                 FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureSlowSHLD,
-                 FeatureCMOV, Feature64Bit]>;
+  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureSSE3,
+                 Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B,
+                 FeatureSlowSHLD, FeatureCMOV, Feature64Bit,
+                 FeatureFastScalarShiftMasks]>;
 }
 
 foreach P = ["amdfam10", "barcelona"] in {
-  def : Proc<P, [FeatureX87, FeatureSSE4A, Feature3DNowA, FeatureFXSR,
-                 FeatureNOPL, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
-                 FeatureSlowSHLD, FeatureLAHFSAHF, FeatureCMOV, Feature64Bit]>;
+  def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE4A, Feature3DNowA,
+                 FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureLZCNT,
+                 FeaturePOPCNT, FeatureSlowSHLD, FeatureLAHFSAHF, FeatureCMOV,
+                 Feature64Bit, FeatureFastScalarShiftMasks]>;
 }
 
 // Bobcat
-def : Proc<"btver1", [
-  FeatureX87,
-  FeatureCMOV,
-  FeatureMMX,
-  FeatureSSSE3,
-  FeatureSSE4A,
-  FeatureFXSR,
-  FeatureNOPL,
-  Feature64Bit,
-  FeatureCMPXCHG16B,
-  FeaturePRFCHW,
-  FeatureLZCNT,
-  FeaturePOPCNT,
-  FeatureSlowSHLD,
-  FeatureLAHFSAHF,
-  FeatureFast15ByteNOP
-]>;
-
+def : Proc<"btver1", ProcessorFeatures.BtVer1Features>;
 // Jaguar
-def : ProcessorModel<"btver2", BtVer2Model, [
-  FeatureX87,
-  FeatureCMOV,
-  FeatureMMX,
-  FeatureAVX,
-  FeatureFXSR,
-  FeatureNOPL,
-  FeatureSSE4A,
-  Feature64Bit,
-  FeatureCMPXCHG16B,
-  FeaturePRFCHW,
-  FeatureAES,
-  FeaturePCLMUL,
-  FeatureBMI,
-  FeatureF16C,
-  FeatureMOVBE,
-  FeatureLZCNT,
-  FeatureFastLZCNT,
-  FeaturePOPCNT,
-  FeatureXSAVE,
-  FeatureXSAVEOPT,
-  FeatureSlowSHLD,
-  FeatureLAHFSAHF,
-  FeatureFast15ByteNOP,
-  FeatureFastBEXTR,
-  FeatureFastPartialYMMorZMMWrite,
-  FeatureFastHorizontalOps
-]>;
+def : ProcessorModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features>;
 
 // Bulldozer
-def : ProcessorModel<"bdver1", BdVer2Model, [
-  FeatureX87,
-  FeatureCMOV,
-  FeatureXOP,
-  FeatureFMA4,
-  Feature64Bit,
-  FeatureCMPXCHG16B,
-  FeatureAES,
-  FeaturePRFCHW,
-  FeaturePCLMUL,
-  FeatureMMX,
-  FeatureAVX,
-  FeatureFXSR,
-  FeatureNOPL,
-  FeatureSSE4A,
-  FeatureLZCNT,
-  FeaturePOPCNT,
-  FeatureXSAVE,
-  FeatureLWP,
-  FeatureSlowSHLD,
-  FeatureLAHFSAHF,
-  FeatureFast11ByteNOP,
-  FeatureMacroFusion
-]>;
+def : ProcessorModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features>;
 // Piledriver
-def : ProcessorModel<"bdver2", BdVer2Model, [
-  FeatureX87,
-  FeatureCMOV,
-  FeatureXOP,
-  FeatureFMA4,
-  Feature64Bit,
-  FeatureCMPXCHG16B,
-  FeatureAES,
-  FeaturePRFCHW,
-  FeaturePCLMUL,
-  FeatureMMX,
-  FeatureAVX,
-  FeatureFXSR,
-  FeatureNOPL,
-  FeatureSSE4A,
-  FeatureF16C,
-  FeatureLZCNT,
-  FeaturePOPCNT,
-  FeatureXSAVE,
-  FeatureBMI,
-  FeatureTBM,
-  FeatureLWP,
-  FeatureFMA,
-  FeatureSlowSHLD,
-  FeatureLAHFSAHF,
-  FeatureFast11ByteNOP,
-  FeatureFastBEXTR,
-  FeatureMacroFusion
-]>;
-
+def : ProcessorModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features>;
 // Steamroller
-def : Proc<"bdver3", [
-  FeatureX87,
-  FeatureCMOV,
-  FeatureXOP,
-  FeatureFMA4,
-  Feature64Bit,
-  FeatureCMPXCHG16B,
-  FeatureAES,
-  FeaturePRFCHW,
-  FeaturePCLMUL,
-  FeatureMMX,
-  FeatureAVX,
-  FeatureFXSR,
-  FeatureNOPL,
-  FeatureSSE4A,
-  FeatureF16C,
-  FeatureLZCNT,
-  FeaturePOPCNT,
-  FeatureXSAVE,
-  FeatureBMI,
-  FeatureTBM,
-  FeatureLWP,
-  FeatureFMA,
-  FeatureXSAVEOPT,
-  FeatureSlowSHLD,
-  FeatureFSGSBase,
-  FeatureLAHFSAHF,
-  FeatureFast11ByteNOP,
-  FeatureFastBEXTR,
-  FeatureMacroFusion
-]>;
-
+def : Proc<"bdver3", ProcessorFeatures.BdVer3Features>;
 // Excavator
-def : Proc<"bdver4", [
-  FeatureX87,
-  FeatureCMOV,
-  FeatureMMX,
-  FeatureAVX2,
-  FeatureFXSR,
-  FeatureNOPL,
-  FeatureXOP,
-  FeatureFMA4,
-  Feature64Bit,
-  FeatureCMPXCHG16B,
-  FeatureAES,
-  FeaturePRFCHW,
-  FeaturePCLMUL,
-  FeatureF16C,
-  FeatureLZCNT,
-  FeaturePOPCNT,
-  FeatureXSAVE,
-  FeatureBMI,
-  FeatureBMI2,
-  FeatureTBM,
-  FeatureLWP,
-  FeatureFMA,
-  FeatureXSAVEOPT,
-  FeatureSlowSHLD,
-  FeatureFSGSBase,
-  FeatureLAHFSAHF,
-  FeatureFastBEXTR,
-  FeatureFast11ByteNOP,
-  FeatureMWAITX,
-  FeatureMacroFusion
-]>;
+def : Proc<"bdver4", ProcessorFeatures.BdVer4Features>;
 
-// Znver1
-def: ProcessorModel<"znver1", Znver1Model, [
-  FeatureADX,
-  FeatureAES,
-  FeatureAVX2,
-  FeatureBMI,
-  FeatureBMI2,
-  FeatureCLFLUSHOPT,
-  FeatureCLZERO,
-  FeatureCMOV,
-  Feature64Bit,
-  FeatureCMPXCHG16B,
-  FeatureF16C,
-  FeatureFMA,
-  FeatureFSGSBase,
-  FeatureFXSR,
-  FeatureNOPL,
-  FeatureFastLZCNT,
-  FeatureLAHFSAHF,
-  FeatureLZCNT,
-  FeatureFastBEXTR,
-  FeatureFast15ByteNOP,
-  FeatureMacroFusion,
-  FeatureMMX,
-  FeatureMOVBE,
-  FeatureMWAITX,
-  FeaturePCLMUL,
-  FeaturePOPCNT,
-  FeaturePRFCHW,
-  FeatureRDRAND,
-  FeatureRDSEED,
-  FeatureSHA,
-  FeatureSSE4A,
-  FeatureSlowSHLD,
-  FeatureX87,
-  FeatureXSAVE,
-  FeatureXSAVEC,
-  FeatureXSAVEOPT,
-  FeatureXSAVES]>;
+def : ProcessorModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures>;
+def : ProcessorModel<"znver2", Znver1Model, ProcessorFeatures.ZN2Features>;
 
-def : Proc<"geode",           [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
+def : Proc<"geode",           [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                               Feature3DNowA]>;
 
 def : Proc<"winchip-c6",      [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
 def : Proc<"winchip2",        [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
 def : Proc<"c3",              [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
-def : Proc<"c3-2",            [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
-                               FeatureSSE1, FeatureFXSR, FeatureCMOV]>;
+def : Proc<"c3-2",            [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                               FeatureMMX, FeatureSSE1, FeatureFXSR,
+                               FeatureCMOV]>;
 
 // We also provide a generic 64-bit specific x86 processor model which tries to
 // be good for modern chips without enabling instruction set encodings past the
@@ -1205,6 +1174,7 @@ def : Proc<"c3-2",            [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
 // forming a common base for them.
 def : ProcessorModel<"x86-64", SandyBridgeModel, [
   FeatureX87,
+  FeatureCMPXCHG8B,
   FeatureCMOV,
   FeatureMMX,
   FeatureSSE2,
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 36cef98a1ef5..80120722e0e6 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -1,9 +1,8 @@
 //===-- X86AsmPrinter.cpp - Convert X86 LLVM code to AT&T assembly --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,9 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86AsmPrinter.h"
-#include "InstPrinter/X86ATTInstPrinter.h"
+#include "MCTargetDesc/X86ATTInstPrinter.h"
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "MCTargetDesc/X86TargetStreamer.h"
+#include "TargetInfo/X86TargetInfo.h"
 #include "X86InstrInfo.h"
 #include "X86MachineFunctionInfo.h"
 #include "llvm/BinaryFormat/COFF.h"
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
@@ -104,16 +105,16 @@ void X86AsmPrinter::EmitFunctionBodyEnd() {
   }
 }
 
-/// printSymbolOperand - Print a raw symbol reference operand.  This handles
+/// PrintSymbolOperand - Print a raw symbol reference operand.  This handles
 /// jump tables, constant pools, global address and external symbols, all of
 /// which print to a label with various suffixes for relocation types etc.
-static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
-                               raw_ostream &O) {
+void X86AsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
+                                       raw_ostream &O) {
   switch (MO.getType()) {
   default: llvm_unreachable("unknown symbol type!");
   case MachineOperand::MO_ConstantPoolIndex:
-    P.GetCPISymbol(MO.getIndex())->print(O, P.MAI);
-    P.printOffset(MO.getOffset(), O);
+    GetCPISymbol(MO.getIndex())->print(O, MAI);
+    printOffset(MO.getOffset(), O);
     break;
   case MachineOperand::MO_GlobalAddress: {
     const GlobalValue *GV = MO.getGlobal();
@@ -121,38 +122,37 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
     MCSymbol *GVSym;
     if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
         MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE)
-      GVSym = P.getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+      GVSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
     else
-      GVSym = P.getSymbol(GV);
+      GVSym = getSymbol(GV);
 
     // Handle dllimport linkage.
     if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
-      GVSym =
-          P.OutContext.getOrCreateSymbol(Twine("__imp_") + GVSym->getName());
+      GVSym = OutContext.getOrCreateSymbol(Twine("__imp_") + GVSym->getName());
     else if (MO.getTargetFlags() == X86II::MO_COFFSTUB)
       GVSym =
-          P.OutContext.getOrCreateSymbol(Twine(".refptr.") + GVSym->getName());
+          OutContext.getOrCreateSymbol(Twine(".refptr.") + GVSym->getName());
 
     if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
         MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
-      MCSymbol *Sym = P.getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+      MCSymbol *Sym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
       MachineModuleInfoImpl::StubValueTy &StubSym =
-          P.MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
+          MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
       if (!StubSym.getPointer())
-        StubSym = MachineModuleInfoImpl::
-          StubValueTy(P.getSymbol(GV), !GV->hasInternalLinkage());
+        StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV),
+                                                     !GV->hasInternalLinkage());
     }
 
     // If the name begins with a dollar-sign, enclose it in parens.  We do this
     // to avoid having it look like an integer immediate to the assembler.
     if (GVSym->getName()[0] != '$')
-      GVSym->print(O, P.MAI);
+      GVSym->print(O, MAI);
     else {
       O << '(';
-      GVSym->print(O, P.MAI);
+      GVSym->print(O, MAI);
       O << ')';
     }
-    P.printOffset(MO.getOffset(), O);
+    printOffset(MO.getOffset(), O);
     break;
   }
   }
@@ -169,13 +169,13 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
     break;
   case X86II::MO_GOT_ABSOLUTE_ADDRESS:
     O << " + [.-";
-    P.MF->getPICBaseSymbol()->print(O, P.MAI);
+    MF->getPICBaseSymbol()->print(O, MAI);
     O << ']';
     break;
   case X86II::MO_PIC_BASE_OFFSET:
   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
     O << '-';
-    P.MF->getPICBaseSymbol()->print(O, P.MAI);
+    MF->getPICBaseSymbol()->print(O, MAI);
     break;
   case X86II::MO_TLSGD:     O << "@TLSGD";     break;
   case X86II::MO_TLSLD:     O << "@TLSLD";     break;
@@ -193,76 +193,91 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
   case X86II::MO_TLVP:      O << "@TLVP";      break;
   case X86II::MO_TLVP_PIC_BASE:
     O << "@TLVP" << '-';
-    P.MF->getPICBaseSymbol()->print(O, P.MAI);
+    MF->getPICBaseSymbol()->print(O, MAI);
     break;
   case X86II::MO_SECREL:    O << "@SECREL32";  break;
   }
 }
 
-static void printOperand(X86AsmPrinter &P, const MachineInstr *MI,
-                         unsigned OpNo, raw_ostream &O,
-                         const char *Modifier = nullptr, unsigned AsmVariant = 0);
-
-/// printPCRelImm - This is used to print an immediate value that ends up
-/// being encoded as a pc-relative value.  These print slightly differently, for
-/// example, a $ is not emitted.
-static void printPCRelImm(X86AsmPrinter &P, const MachineInstr *MI,
-                          unsigned OpNo, raw_ostream &O) {
+void X86AsmPrinter::PrintOperand(const MachineInstr *MI, unsigned OpNo,
+                                 raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(OpNo);
+  const bool IsATT = MI->getInlineAsmDialect() == InlineAsm::AD_ATT;
   switch (MO.getType()) {
-  default: llvm_unreachable("Unknown pcrel immediate operand");
-  case MachineOperand::MO_Register:
-    // pc-relativeness was handled when computing the value in the reg.
-    printOperand(P, MI, OpNo, O);
+  default: llvm_unreachable("unknown operand type!");
+  case MachineOperand::MO_Register: {
+    if (IsATT)
+      O << '%';
+    O << X86ATTInstPrinter::getRegisterName(MO.getReg());
     return;
+  }
+
   case MachineOperand::MO_Immediate:
+    if (IsATT)
+      O << '$';
     O << MO.getImm();
     return;
-  case MachineOperand::MO_GlobalAddress:
-    printSymbolOperand(P, MO, O);
-    return;
+
+  case MachineOperand::MO_GlobalAddress: {
+    if (IsATT)
+      O << '$';
+    PrintSymbolOperand(MO, O);
+    break;
+  }
+  case MachineOperand::MO_BlockAddress: {
+    MCSymbol *Sym = GetBlockAddressSymbol(MO.getBlockAddress());
+    Sym->print(O, MAI);
+    break;
+  }
   }
 }
 
-static void printOperand(X86AsmPrinter &P, const MachineInstr *MI,
-                         unsigned OpNo, raw_ostream &O, const char *Modifier,
-                         unsigned AsmVariant) {
+/// PrintModifiedOperand - Print subregisters based on supplied modifier,
+/// deferring to PrintOperand() if no modifier was supplied or if operand is not
+/// a register.
+void X86AsmPrinter::PrintModifiedOperand(const MachineInstr *MI, unsigned OpNo,
+                                         raw_ostream &O, const char *Modifier) {
   const MachineOperand &MO = MI->getOperand(OpNo);
-  switch (MO.getType()) {
-  default: llvm_unreachable("unknown operand type!");
-  case MachineOperand::MO_Register: {
-    // FIXME: Enumerating AsmVariant, so we can remove magic number.
-    if (AsmVariant == 0) O << '%';
-    unsigned Reg = MO.getReg();
-    if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
-      unsigned Size = (strcmp(Modifier+6,"64") == 0) ? 64 :
-                      (strcmp(Modifier+6,"32") == 0) ? 32 :
-                      (strcmp(Modifier+6,"16") == 0) ? 16 : 8;
-      Reg = getX86SubSuperRegister(Reg, Size);
-    }
-    O << X86ATTInstPrinter::getRegisterName(Reg);
-    return;
+  if (!Modifier || MO.getType() != MachineOperand::MO_Register)
+    return PrintOperand(MI, OpNo, O);
+  if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT)
+    O << '%';
+  unsigned Reg = MO.getReg();
+  if (strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
+    unsigned Size = (strcmp(Modifier+6,"64") == 0) ? 64 :
+        (strcmp(Modifier+6,"32") == 0) ? 32 :
+        (strcmp(Modifier+6,"16") == 0) ? 16 : 8;
+    Reg = getX86SubSuperRegister(Reg, Size);
   }
+  O << X86ATTInstPrinter::getRegisterName(Reg);
+}
 
+/// PrintPCRelImm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value.  These print slightly differently, for
+/// example, a $ is not emitted.
+void X86AsmPrinter::PrintPCRelImm(const MachineInstr *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  switch (MO.getType()) {
+  default: llvm_unreachable("Unknown pcrel immediate operand");
+  case MachineOperand::MO_Register:
+    // pc-relativeness was handled when computing the value in the reg.
+    PrintOperand(MI, OpNo, O);
+    return;
   case MachineOperand::MO_Immediate:
-    if (AsmVariant == 0) O << '$';
     O << MO.getImm();
     return;
-
-  case MachineOperand::MO_GlobalAddress: {
-    if (AsmVariant == 0) O << '$';
-    printSymbolOperand(P, MO, O);
-    break;
-  }
+  case MachineOperand::MO_GlobalAddress:
+    PrintSymbolOperand(MO, O);
+    return;
   }
 }
 
-static void printLeaMemReference(X86AsmPrinter &P, const MachineInstr *MI,
-                                 unsigned Op, raw_ostream &O,
-                                 const char *Modifier = nullptr) {
-  const MachineOperand &BaseReg  = MI->getOperand(Op+X86::AddrBaseReg);
-  const MachineOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg);
-  const MachineOperand &DispSpec = MI->getOperand(Op+X86::AddrDisp);
+void X86AsmPrinter::PrintLeaMemReference(const MachineInstr *MI, unsigned OpNo,
+                                         raw_ostream &O, const char *Modifier) {
+  const MachineOperand &BaseReg = MI->getOperand(OpNo + X86::AddrBaseReg);
+  const MachineOperand &IndexReg = MI->getOperand(OpNo + X86::AddrIndexReg);
+  const MachineOperand &DispSpec = MI->getOperand(OpNo + X86::AddrDisp);
 
   // If we really don't want to print out (rip), don't.
   bool HasBaseReg = BaseReg.getReg() != 0;
@@ -284,7 +299,8 @@ static void printLeaMemReference(X86AsmPrinter &P, const MachineInstr *MI,
   }
   case MachineOperand::MO_GlobalAddress:
   case MachineOperand::MO_ConstantPoolIndex:
-    printSymbolOperand(P, DispSpec, O);
+    PrintSymbolOperand(DispSpec, O);
+    break;
   }
 
   if (Modifier && strcmp(Modifier, "H") == 0)
@@ -296,12 +312,12 @@ static void printLeaMemReference(X86AsmPrinter &P, const MachineInstr *MI,
 
     O << '(';
     if (HasBaseReg)
-      printOperand(P, MI, Op+X86::AddrBaseReg, O, Modifier);
+      PrintModifiedOperand(MI, OpNo + X86::AddrBaseReg, O, Modifier);
 
     if (IndexReg.getReg()) {
       O << ',';
-      printOperand(P, MI, Op+X86::AddrIndexReg, O, Modifier);
-      unsigned ScaleVal = MI->getOperand(Op+X86::AddrScaleAmt).getImm();
+      PrintModifiedOperand(MI, OpNo + X86::AddrIndexReg, O, Modifier);
+      unsigned ScaleVal = MI->getOperand(OpNo + X86::AddrScaleAmt).getImm();
       if (ScaleVal != 1)
         O << ',' << ScaleVal;
     }
@@ -309,31 +325,28 @@ static void printLeaMemReference(X86AsmPrinter &P, const MachineInstr *MI,
   }
 }
 
-static void printMemReference(X86AsmPrinter &P, const MachineInstr *MI,
-                              unsigned Op, raw_ostream &O,
-                              const char *Modifier = nullptr) {
-  assert(isMem(*MI, Op) && "Invalid memory reference!");
-  const MachineOperand &Segment = MI->getOperand(Op+X86::AddrSegmentReg);
+void X86AsmPrinter::PrintMemReference(const MachineInstr *MI, unsigned OpNo,
+                                      raw_ostream &O, const char *Modifier) {
+  assert(isMem(*MI, OpNo) && "Invalid memory reference!");
+  const MachineOperand &Segment = MI->getOperand(OpNo + X86::AddrSegmentReg);
   if (Segment.getReg()) {
-    printOperand(P, MI, Op+X86::AddrSegmentReg, O, Modifier);
+    PrintModifiedOperand(MI, OpNo + X86::AddrSegmentReg, O, Modifier);
     O << ':';
   }
-  printLeaMemReference(P, MI, Op, O, Modifier);
+  PrintLeaMemReference(MI, OpNo, O, Modifier);
 }
 
-static void printIntelMemReference(X86AsmPrinter &P, const MachineInstr *MI,
-                                   unsigned Op, raw_ostream &O,
-                                   const char *Modifier = nullptr,
-                                   unsigned AsmVariant = 1) {
-  const MachineOperand &BaseReg  = MI->getOperand(Op+X86::AddrBaseReg);
-  unsigned ScaleVal = MI->getOperand(Op+X86::AddrScaleAmt).getImm();
-  const MachineOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg);
-  const MachineOperand &DispSpec = MI->getOperand(Op+X86::AddrDisp);
-  const MachineOperand &SegReg   = MI->getOperand(Op+X86::AddrSegmentReg);
+void X86AsmPrinter::PrintIntelMemReference(const MachineInstr *MI,
+                                           unsigned OpNo, raw_ostream &O) {
+  const MachineOperand &BaseReg = MI->getOperand(OpNo + X86::AddrBaseReg);
+  unsigned ScaleVal = MI->getOperand(OpNo + X86::AddrScaleAmt).getImm();
+  const MachineOperand &IndexReg = MI->getOperand(OpNo + X86::AddrIndexReg);
+  const MachineOperand &DispSpec = MI->getOperand(OpNo + X86::AddrDisp);
+  const MachineOperand &SegReg = MI->getOperand(OpNo + X86::AddrSegmentReg);
 
   // If this has a segment register, print it.
   if (SegReg.getReg()) {
-    printOperand(P, MI, Op+X86::AddrSegmentReg, O, Modifier, AsmVariant);
+    PrintOperand(MI, OpNo + X86::AddrSegmentReg, O);
     O << ':';
   }
 
@@ -341,7 +354,7 @@ static void printIntelMemReference(X86AsmPrinter &P, const MachineInstr *MI,
 
   bool NeedPlus = false;
   if (BaseReg.getReg()) {
-    printOperand(P, MI, Op+X86::AddrBaseReg, O, Modifier, AsmVariant);
+    PrintOperand(MI, OpNo + X86::AddrBaseReg, O);
     NeedPlus = true;
   }
 
@@ -349,13 +362,13 @@ static void printIntelMemReference(X86AsmPrinter &P, const MachineInstr *MI,
     if (NeedPlus) O << " + ";
     if (ScaleVal != 1)
       O << ScaleVal << '*';
-    printOperand(P, MI, Op+X86::AddrIndexReg, O, Modifier, AsmVariant);
+    PrintOperand(MI, OpNo + X86::AddrIndexReg, O);
     NeedPlus = true;
   }
 
   if (!DispSpec.isImm()) {
     if (NeedPlus) O << " + ";
-    printOperand(P, MI, Op+X86::AddrDisp, O, Modifier, AsmVariant);
+    PrintOperand(MI, OpNo + X86::AddrDisp, O);
   } else {
     int64_t DispVal = DispSpec.getImm();
     if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
@@ -418,7 +431,6 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
 /// PrintAsmOperand - Print out an operand for an inline asm expression.
 ///
 bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                    unsigned AsmVariant,
                                     const char *ExtraCode, raw_ostream &O) {
   // Does this asm operand have a single letter operand modifier?
   if (ExtraCode && ExtraCode[0]) {
@@ -429,7 +441,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
     switch (ExtraCode[0]) {
     default:
       // See if this is a generic print operand
-      return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+      return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
     case 'a': // This is an address.  Currently only 'i' and 'r' are expected.
       switch (MO.getType()) {
       default:
@@ -442,13 +454,13 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
       case MachineOperand::MO_ExternalSymbol:
         llvm_unreachable("unexpected operand type!");
       case MachineOperand::MO_GlobalAddress:
-        printSymbolOperand(*this, MO, O);
+        PrintSymbolOperand(MO, O);
         if (Subtarget->isPICStyleRIPRel())
           O << "(%rip)";
         return false;
       case MachineOperand::MO_Register:
         O << '(';
-        printOperand(*this, MI, OpNo, O);
+        PrintOperand(MI, OpNo, O);
         O << ')';
         return false;
       }
@@ -456,7 +468,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
     case 'c': // Don't print "$" before a global var name or constant.
       switch (MO.getType()) {
       default:
-        printOperand(*this, MI, OpNo, O);
+        PrintOperand(MI, OpNo, O);
         break;
       case MachineOperand::MO_Immediate:
         O << MO.getImm();
@@ -466,7 +478,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
       case MachineOperand::MO_ExternalSymbol:
         llvm_unreachable("unexpected operand type!");
       case MachineOperand::MO_GlobalAddress:
-        printSymbolOperand(*this, MO, O);
+        PrintSymbolOperand(MO, O);
         break;
       }
       return false;
@@ -474,7 +486,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
     case 'A': // Print '*' before a register (it must be a register)
       if (MO.isReg()) {
         O << '*';
-        printOperand(*this, MI, OpNo, O);
+        PrintOperand(MI, OpNo, O);
         return false;
       }
       return true;
@@ -487,11 +499,11 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
     case 'V': // Print native register without '%'
       if (MO.isReg())
         return printAsmMRegister(*this, MO, ExtraCode[0], O);
-      printOperand(*this, MI, OpNo, O);
+      PrintOperand(MI, OpNo, O);
       return false;
 
     case 'P': // This is the operand of a call, treat specially.
-      printPCRelImm(*this, MI, OpNo, O);
+      PrintPCRelImm(MI, OpNo, O);
       return false;
 
     case 'n': // Negate the immediate or print a '-' before the operand.
@@ -505,16 +517,15 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
     }
   }
 
-  printOperand(*this, MI, OpNo, O, /*Modifier*/ nullptr, AsmVariant);
+  PrintOperand(MI, OpNo, O);
   return false;
 }
 
-bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                          unsigned OpNo, unsigned AsmVariant,
+bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
                                           const char *ExtraCode,
                                           raw_ostream &O) {
-  if (AsmVariant) {
-    printIntelMemReference(*this, MI, OpNo, O);
+  if (MI->getInlineAsmDialect() == InlineAsm::AD_Intel) {
+    PrintIntelMemReference(MI, OpNo, O);
     return false;
   }
 
@@ -531,14 +542,14 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
       // These only apply to registers, ignore on mem.
       break;
     case 'H':
-      printMemReference(*this, MI, OpNo, O, "H");
+      PrintMemReference(MI, OpNo, O, "H");
       return false;
     case 'P': // Don't print @PLT, but do print as memory.
-      printMemReference(*this, MI, OpNo, O, "no-rip");
+      PrintMemReference(MI, OpNo, O, "no-rip");
       return false;
     }
   }
-  printMemReference(*this, MI, OpNo, O);
+  PrintMemReference(MI, OpNo, O, nullptr);
   return false;
 }
 
@@ -683,26 +694,31 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
     // stripping. Since LLVM never generates code that does this, it is always
     // safe to set.
     OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
-    return;
-  }
-
-  if (TT.isKnownWindowsMSVCEnvironment() && MMI->usesVAFloatArgument()) {
-    StringRef SymbolName =
-        (TT.getArch() == Triple::x86_64) ? "_fltused" : "__fltused";
-    MCSymbol *S = MMI->getContext().getOrCreateSymbol(SymbolName);
-    OutStreamer->EmitSymbolAttribute(S, MCSA_Global);
-    return;
-  }
-
-  if (TT.isOSBinFormatCOFF()) {
+  } else if (TT.isOSBinFormatCOFF()) {
+    if (MMI->usesMSVCFloatingPoint()) {
+      // In Windows' libcmt.lib, there is a file which is linked in only if the
+      // symbol _fltused is referenced. Linking this in causes some
+      // side-effects:
+      //
+      // 1. For x86-32, it will set the x87 rounding mode to 53-bit instead of
+      // 64-bit mantissas at program start.
+      //
+      // 2. It links in support routines for floating-point in scanf and printf.
+      //
+      // MSVC emits an undefined reference to _fltused when there are any
+      // floating point operations in the program (including calls). A program
+      // that only has: `scanf("%f", &global_float);` may fail to trigger this,
+      // but oh well...that's a documented issue.
+      StringRef SymbolName =
+          (TT.getArch() == Triple::x86) ? "__fltused" : "_fltused";
+      MCSymbol *S = MMI->getContext().getOrCreateSymbol(SymbolName);
+      OutStreamer->EmitSymbolAttribute(S, MCSA_Global);
+      return;
+    }
     emitStackMaps(SM);
-    return;
-  }
-
-  if (TT.isOSBinFormatELF()) {
+  } else if (TT.isOSBinFormatELF()) {
     emitStackMaps(SM);
     FM.serializeToFaultMapSection();
-    return;
   }
 }
 
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 55abdf2ba601..a011310970b3 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -1,9 +1,8 @@
 //===-- X86AsmPrinter.h - X86 implementation of AsmPrinter ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -103,6 +102,18 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
   // Choose between emitting .seh_ directives and .cv_fpo_ directives.
   void EmitSEHInstruction(const MachineInstr *MI);
 
+  void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &O) override;
+  void PrintOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+  void PrintModifiedOperand(const MachineInstr *MI, unsigned OpNo,
+                            raw_ostream &O, const char *Modifier);
+  void PrintPCRelImm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+  void PrintLeaMemReference(const MachineInstr *MI, unsigned OpNo,
+                            raw_ostream &O, const char *Modifier);
+  void PrintMemReference(const MachineInstr *MI, unsigned OpNo, raw_ostream &O,
+                         const char *Modifier);
+  void PrintIntelMemReference(const MachineInstr *MI, unsigned OpNo,
+                              raw_ostream &O);
+
 public:
   X86AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer);
 
@@ -124,11 +135,9 @@ public:
   }
 
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                       unsigned AsmVariant, const char *ExtraCode,
-                       raw_ostream &OS) override;
+                       const char *ExtraCode, raw_ostream &OS) override;
   bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                             unsigned AsmVariant, const char *ExtraCode,
-                             raw_ostream &OS) override;
+                             const char *ExtraCode, raw_ostream &OS) override;
 
   bool doInitialization(Module &M) override {
     SMShadowTracker.reset(0);
diff --git a/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
index 627a6cb14514..3dcc1015dc7c 100644
--- a/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
+++ b/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
@@ -1,9 +1,8 @@
 //===- X86AvoidStoreForwardingBlockis.cpp - Avoid HW Store Forward Block --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -69,9 +68,7 @@ using DisplacementSizeMap = std::map<int64_t, unsigned>;
 class X86AvoidSFBPass : public MachineFunctionPass {
 public:
   static char ID;
-  X86AvoidSFBPass() : MachineFunctionPass(ID) {
-    initializeX86AvoidSFBPassPass(*PassRegistry::getPassRegistry());
-  }
+  X86AvoidSFBPass() : MachineFunctionPass(ID) { }
 
   StringRef getPassName() const override {
     return "X86 Avoid Store Forwarding Blocks";
@@ -343,6 +340,8 @@ findPotentialBlockers(MachineInstr *LoadInst) {
   for (auto PBInst = std::next(MachineBasicBlock::reverse_iterator(LoadInst)),
             E = LoadInst->getParent()->rend();
        PBInst != E; ++PBInst) {
+    if (PBInst->isMetaInstruction())
+      continue;
     BlockCount++;
     if (BlockCount >= InspectionLimit)
       break;
@@ -366,6 +365,8 @@ findPotentialBlockers(MachineInstr *LoadInst) {
       for (MachineBasicBlock::reverse_iterator PBInst = PMBB->rbegin(),
                                                PME = PMBB->rend();
            PBInst != PME; ++PBInst) {
+        if (PBInst->isMetaInstruction())
+          continue;
         PredCount++;
         if (PredCount >= LimitLeft)
           break;
@@ -407,7 +408,10 @@ void X86AvoidSFBPass::buildCopy(MachineInstr *LoadInst, unsigned NLoadOpcode,
   // If the load and store are consecutive, use the loadInst location to
   // reduce register pressure.
   MachineInstr *StInst = StoreInst;
-  if (StoreInst->getPrevNode() == LoadInst)
+  auto PrevInstrIt = skipDebugInstructionsBackward(
+      std::prev(MachineBasicBlock::instr_iterator(StoreInst)),
+      MBB->instr_begin());
+  if (PrevInstrIt.getNodePtr() == LoadInst)
     StInst = LoadInst;
   MachineInstr *NewStore =
       BuildMI(*MBB, StInst, StInst->getDebugLoc(), TII->get(NStoreOpcode))
@@ -492,19 +496,22 @@ void X86AvoidSFBPass::buildCopies(int Size, MachineInstr *LoadInst,
 static void updateKillStatus(MachineInstr *LoadInst, MachineInstr *StoreInst) {
   MachineOperand &LoadBase = getBaseOperand(LoadInst);
   MachineOperand &StoreBase = getBaseOperand(StoreInst);
+  auto StorePrevNonDbgInstr = skipDebugInstructionsBackward(
+          std::prev(MachineBasicBlock::instr_iterator(StoreInst)),
+          LoadInst->getParent()->instr_begin()).getNodePtr();
   if (LoadBase.isReg()) {
     MachineInstr *LastLoad = LoadInst->getPrevNode();
     // If the original load and store to xmm/ymm were consecutive
     // then the partial copies were also created in
     // a consecutive order to reduce register pressure,
     // and the location of the last load is before the last store.
-    if (StoreInst->getPrevNode() == LoadInst)
+    if (StorePrevNonDbgInstr == LoadInst)
       LastLoad = LoadInst->getPrevNode()->getPrevNode();
     getBaseOperand(LastLoad).setIsKill(LoadBase.isKill());
   }
   if (StoreBase.isReg()) {
     MachineInstr *StInst = StoreInst;
-    if (StoreInst->getPrevNode() == LoadInst)
+    if (StorePrevNonDbgInstr == LoadInst)
       StInst = LoadInst;
     getBaseOperand(StInst->getPrevNode()).setIsKill(StoreBase.isKill());
   }
@@ -531,7 +538,7 @@ void X86AvoidSFBPass::findPotentiallylBlockedCopies(MachineFunction &MF) {
       if (!isPotentialBlockedMemCpyLd(MI.getOpcode()))
         continue;
       int DefVR = MI.getOperand(0).getReg();
-      if (!MRI->hasOneUse(DefVR))
+      if (!MRI->hasOneNonDBGUse(DefVR))
         continue;
       for (auto UI = MRI->use_nodbg_begin(DefVR), UE = MRI->use_nodbg_end();
            UI != UE;) {
diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp
index 24d7a219e751..4df849a2e14c 100644
--- a/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -1,9 +1,8 @@
 //===----- X86CallFrameOptimization.cpp - Optimize x86 call sequences -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -60,10 +59,7 @@ namespace {
 
 class X86CallFrameOptimization : public MachineFunctionPass {
 public:
-  X86CallFrameOptimization() : MachineFunctionPass(ID) {
-    initializeX86CallFrameOptimizationPass(
-        *PassRegistry::getPassRegistry());
-  }
+  X86CallFrameOptimization() : MachineFunctionPass(ID) { }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
diff --git a/lib/Target/X86/X86CallLowering.cpp b/lib/Target/X86/X86CallLowering.cpp
index 1dc83b76595d..b16b3839c85a 100644
--- a/lib/Target/X86/X86CallLowering.cpp
+++ b/lib/Target/X86/X86CallLowering.cpp
@@ -1,9 +1,8 @@
 //===- llvm/lib/Target/X86/X86CallLowering.cpp - Call lowering ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -48,8 +47,6 @@
 
 using namespace llvm;
 
-#include "X86GenCallingConv.inc"
-
 X86CallLowering::X86CallLowering(const X86TargetLowering &TLI)
     : CallLowering(&TLI) {}
 
@@ -64,6 +61,7 @@ bool X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
   SmallVector<EVT, 4> SplitVTs;
   SmallVector<uint64_t, 4> Offsets;
   ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
+  assert(OrigArg.Regs.size() == 1 && "Can't handle multple regs yet");
 
   if (OrigArg.Ty->isVoidTy())
     return true;
@@ -73,12 +71,12 @@ bool X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
 
   if (NumParts == 1) {
     // replace the original type ( pointer -> GPR ).
-    SplitArgs.emplace_back(OrigArg.Reg, VT.getTypeForEVT(Context),
+    SplitArgs.emplace_back(OrigArg.Regs[0], VT.getTypeForEVT(Context),
                            OrigArg.Flags, OrigArg.IsFixed);
     return true;
   }
 
-  SmallVector<unsigned, 8> SplitRegs;
+  SmallVector<Register, 8> SplitRegs;
 
   EVT PartVT = TLI.getRegisterType(Context, VT);
   Type *PartTy = PartVT.getTypeForEVT(Context);
@@ -88,7 +86,7 @@ bool X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
         ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*PartTy, DL)),
                 PartTy, OrigArg.Flags};
     SplitArgs.push_back(Info);
-    SplitRegs.push_back(Info.Reg);
+    SplitRegs.push_back(Info.Regs[0]);
   }
 
   PerformArgSplit(SplitRegs);
@@ -104,28 +102,28 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
         DL(MIRBuilder.getMF().getDataLayout()),
         STI(MIRBuilder.getMF().getSubtarget<X86Subtarget>()) {}
 
-  unsigned getStackAddress(uint64_t Size, int64_t Offset,
+  Register getStackAddress(uint64_t Size, int64_t Offset,
                            MachinePointerInfo &MPO) override {
     LLT p0 = LLT::pointer(0, DL.getPointerSizeInBits(0));
     LLT SType = LLT::scalar(DL.getPointerSizeInBits(0));
-    unsigned SPReg = MRI.createGenericVirtualRegister(p0);
+    Register SPReg = MRI.createGenericVirtualRegister(p0);
     MIRBuilder.buildCopy(SPReg, STI.getRegisterInfo()->getStackRegister());
 
-    unsigned OffsetReg = MRI.createGenericVirtualRegister(SType);
+    Register OffsetReg = MRI.createGenericVirtualRegister(SType);
     MIRBuilder.buildConstant(OffsetReg, Offset);
 
-    unsigned AddrReg = MRI.createGenericVirtualRegister(p0);
+    Register AddrReg = MRI.createGenericVirtualRegister(p0);
     MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
 
     MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
     return AddrReg;
   }
 
-  void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+  void assignValueToReg(Register ValVReg, Register PhysReg,
                         CCValAssign &VA) override {
     MIB.addUse(PhysReg, RegState::Implicit);
 
-    unsigned ExtReg;
+    Register ExtReg;
     // If we are copying the value to a physical register with the
     // size larger than the size of the value itself - build AnyExt
     // to the size of the register first and only then do the copy.
@@ -146,12 +144,12 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
     MIRBuilder.buildCopy(PhysReg, ExtReg);
   }
 
-  void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+  void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
                             MachinePointerInfo &MPO, CCValAssign &VA) override {
-    unsigned ExtReg = extendRegister(ValVReg, VA);
+    Register ExtReg = extendRegister(ValVReg, VA);
     auto MMO = MIRBuilder.getMF().getMachineMemOperand(
         MPO, MachineMemOperand::MOStore, VA.getLocVT().getStoreSize(),
-        /* Alignment */ 0);
+        /* Alignment */ 1);
     MIRBuilder.buildStore(ExtReg, Addr, *MMO);
   }
 
@@ -185,7 +183,7 @@ protected:
 
 bool X86CallLowering::lowerReturn(
     MachineIRBuilder &MIRBuilder, const Value *Val,
-    ArrayRef<unsigned> VRegs) const {
+    ArrayRef<Register> VRegs) const {
   assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
          "Return value without a vreg");
   auto MIB = MIRBuilder.buildInstrNoInsert(X86::RET).addImm(0);
@@ -208,7 +206,7 @@ bool X86CallLowering::lowerReturn(
       ArgInfo CurArgInfo = ArgInfo{VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx)};
       setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
       if (!splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI,
-                             [&](ArrayRef<unsigned> Regs) {
+                             [&](ArrayRef<Register> Regs) {
                                MIRBuilder.buildUnmerge(Regs, VRegs[i]);
                              }))
         return false;
@@ -231,7 +229,9 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
       : ValueHandler(MIRBuilder, MRI, AssignFn),
         DL(MIRBuilder.getMF().getDataLayout()) {}
 
-  unsigned getStackAddress(uint64_t Size, int64_t Offset,
+  bool isArgumentHandler() const override { return true; }
+
+  Register getStackAddress(uint64_t Size, int64_t Offset,
                            MachinePointerInfo &MPO) override {
     auto &MFI = MIRBuilder.getMF().getFrameInfo();
     int FI = MFI.CreateFixedObject(Size, Offset, true);
@@ -243,15 +243,15 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
     return AddrReg;
   }
 
-  void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+  void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
                             MachinePointerInfo &MPO, CCValAssign &VA) override {
     auto MMO = MIRBuilder.getMF().getMachineMemOperand(
         MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size,
-        0);
+        1);
     MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
   }
 
-  void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+  void assignValueToReg(Register ValVReg, Register PhysReg,
                         CCValAssign &VA) override {
     markPhysRegUsed(PhysReg);
 
@@ -320,9 +320,9 @@ protected:
 
 } // end anonymous namespace
 
-bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
-                                           const Function &F,
-                                           ArrayRef<unsigned> VRegs) const {
+bool X86CallLowering::lowerFormalArguments(
+    MachineIRBuilder &MIRBuilder, const Function &F,
+    ArrayRef<ArrayRef<Register>> VRegs) const {
   if (F.arg_empty())
     return true;
 
@@ -344,14 +344,14 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
         Arg.hasAttribute(Attribute::StructRet) ||
         Arg.hasAttribute(Attribute::SwiftSelf) ||
         Arg.hasAttribute(Attribute::SwiftError) ||
-        Arg.hasAttribute(Attribute::Nest))
+        Arg.hasAttribute(Attribute::Nest) || VRegs[Idx].size() > 1)
       return false;
 
     ArgInfo OrigArg(VRegs[Idx], Arg.getType());
     setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F);
     if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
-                           [&](ArrayRef<unsigned> Regs) {
-                             MIRBuilder.buildMerge(VRegs[Idx], Regs);
+                           [&](ArrayRef<Register> Regs) {
+                             MIRBuilder.buildMerge(VRegs[Idx][0], Regs);
                            }))
       return false;
     Idx++;
@@ -409,9 +409,12 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
     if (OrigArg.Flags.isByVal())
       return false;
 
+    if (OrigArg.Regs.size() > 1)
+      return false;
+
     if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
-                           [&](ArrayRef<unsigned> Regs) {
-                             MIRBuilder.buildUnmerge(Regs, OrigArg.Reg);
+                           [&](ArrayRef<Register> Regs) {
+                             MIRBuilder.buildUnmerge(Regs, OrigArg.Regs[0]);
                            }))
       return false;
   }
@@ -451,12 +454,15 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   // symmetry with the arguments, the physical register must be an
   // implicit-define of the call instruction.
 
-  if (OrigRet.Reg) {
+  if (!OrigRet.Ty->isVoidTy()) {
+    if (OrigRet.Regs.size() > 1)
+      return false;
+
     SplitArgs.clear();
-    SmallVector<unsigned, 8> NewRegs;
+    SmallVector<Register, 8> NewRegs;
 
     if (!splitToValueTypes(OrigRet, SplitArgs, DL, MRI,
-                           [&](ArrayRef<unsigned> Regs) {
+                           [&](ArrayRef<Register> Regs) {
                              NewRegs.assign(Regs.begin(), Regs.end());
                            }))
       return false;
@@ -466,7 +472,7 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
       return false;
 
     if (!NewRegs.empty())
-      MIRBuilder.buildMerge(OrigRet.Reg, NewRegs);
+      MIRBuilder.buildMerge(OrigRet.Regs[0], NewRegs);
   }
 
   CallSeqStart.addImm(Handler.getStackSize())
diff --git a/lib/Target/X86/X86CallLowering.h b/lib/Target/X86/X86CallLowering.h
index f5f8f9a3ef6d..0445331bc3ff 100644
--- a/lib/Target/X86/X86CallLowering.h
+++ b/lib/Target/X86/X86CallLowering.h
@@ -1,9 +1,8 @@
 //===- llvm/lib/Target/X86/X86CallLowering.h - Call lowering ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -30,10 +29,10 @@ public:
   X86CallLowering(const X86TargetLowering &TLI);
 
   bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
-                   ArrayRef<unsigned> VRegs) const override;
+                   ArrayRef<Register> VRegs) const override;
 
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
-                            ArrayRef<unsigned> VRegs) const override;
+                            ArrayRef<ArrayRef<Register>> VRegs) const override;
 
   bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
                  const MachineOperand &Callee, const ArgInfo &OrigRet,
@@ -41,7 +40,7 @@ public:
 
 private:
   /// A function of this type is used to perform value split action.
-  using SplitArgTy = std::function<void(ArrayRef<unsigned>)>;
+  using SplitArgTy = std::function<void(ArrayRef<Register>)>;
 
   bool splitToValueTypes(const ArgInfo &OrigArgInfo,
                          SmallVectorImpl<ArgInfo> &SplitArgs,
diff --git a/lib/Target/X86/X86CallingConv.cpp b/lib/Target/X86/X86CallingConv.cpp
index 59dde982f512..aee344a26764 100644
--- a/lib/Target/X86/X86CallingConv.cpp
+++ b/lib/Target/X86/X86CallingConv.cpp
@@ -1,9 +1,8 @@
 //=== X86CallingConv.cpp - X86 Custom Calling Convention Impl   -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,16 +11,23 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "X86CallingConv.h"
 #include "X86Subtarget.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/IR/CallingConv.h"
 
-namespace llvm {
-
-bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                   CCValAssign::LocInfo &LocInfo,
-                                   ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+using namespace llvm;
+
+/// When regcall calling convention compiled to 32 bit arch, special treatment
+/// is required for 64 bit masks.
+/// The value should be assigned to two GPRs.
+/// \return true if registers were allocated and false otherwise.
+static bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT,
+                                          MVT &LocVT,
+                                          CCValAssign::LocInfo &LocInfo,
+                                          ISD::ArgFlagsTy &ArgFlags,
+                                          CCState &State) {
   // List of GPR registers that are available to store values in regcall
   // calling convention.
   static const MCPhysReg RegList[] = {X86::EAX, X86::ECX, X86::EDX, X86::EDI,
@@ -113,9 +119,15 @@ static bool CC_X86_VectorCallAssignRegister(unsigned &ValNo, MVT &ValVT,
   return false;
 }
 
-bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                          CCValAssign::LocInfo &LocInfo,
-                          ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+/// Vectorcall calling convention has special handling for vector types or
+/// HVA for 64 bit arch.
+/// For HVAs shadow registers might be allocated on the first pass
+/// and actual XMM registers are allocated on the second pass.
+/// For vector types, actual XMM registers are allocated on the first pass.
+/// \return true if registers were allocated and false otherwise.
+static bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                 CCValAssign::LocInfo &LocInfo,
+                                 ISD::ArgFlagsTy &ArgFlags, CCState &State) {
   // On the second pass, go through the HVAs only.
   if (ArgFlags.isSecArgPass()) {
     if (ArgFlags.isHva())
@@ -150,7 +162,10 @@ bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
       // created on top of the basic 32 bytes of win64.
       // It can happen if the fifth or sixth argument is vector type or HVA.
       // At that case for each argument a shadow stack of 8 bytes is allocated.
-      if (Reg == X86::XMM4 || Reg == X86::XMM5)
+      const TargetRegisterInfo *TRI =
+          State.getMachineFunction().getSubtarget().getRegisterInfo();
+      if (TRI->regsOverlap(Reg, X86::XMM4) ||
+          TRI->regsOverlap(Reg, X86::XMM5))
         State.AllocateStack(8, 8);
 
       if (!ArgFlags.isHva()) {
@@ -165,9 +180,14 @@ bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   return ArgFlags.isHva();
 }
 
-bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                          CCValAssign::LocInfo &LocInfo,
-                          ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+/// Vectorcall calling convention has special handling for vector types or
+/// HVA for 32 bit arch.
+/// For HVAs actual XMM registers are allocated on the second pass.
+/// For vector types, actual XMM registers are allocated on the first pass.
+/// \return true if registers were allocated and false otherwise.
+static bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                 CCValAssign::LocInfo &LocInfo,
+                                 ISD::ArgFlagsTy &ArgFlags, CCState &State) {
   // On the second pass, go through the HVAs only.
   if (ArgFlags.isSecArgPass()) {
     if (ArgFlags.isHva())
@@ -205,4 +225,110 @@ bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   return false; // No register was assigned - Continue the search.
 }
 
-} // End llvm namespace
+static bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
+                                CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
+                                CCState &) {
+  llvm_unreachable("The AnyReg calling convention is only supported by the "
+                   "stackmap and patchpoint intrinsics.");
+  // gracefully fallback to X86 C calling convention on Release builds.
+  return false;
+}
+
+static bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                               CCValAssign::LocInfo &LocInfo,
+                               ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure
+  // not to split i64 and double between a register and stack
+  static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX};
+  static const unsigned NumRegs = sizeof(RegList) / sizeof(RegList[0]);
+
+  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+  // If this is the first part of an double/i64/i128, or if we're already
+  // in the middle of a split, add to the pending list. If this is not
+  // the end of the split, return, otherwise go on to process the pending
+  // list
+  if (ArgFlags.isSplit() || !PendingMembers.empty()) {
+    PendingMembers.push_back(
+        CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+    if (!ArgFlags.isSplitEnd())
+      return true;
+  }
+
+  // If there are no pending members, we are not in the middle of a split,
+  // so do the usual inreg stuff.
+  if (PendingMembers.empty()) {
+    if (unsigned Reg = State.AllocateReg(RegList)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return true;
+    }
+    return false;
+  }
+
+  assert(ArgFlags.isSplitEnd());
+
+  // We now have the entire original argument in PendingMembers, so decide
+  // whether to use registers or the stack.
+  // Per the MCU ABI:
+  // a) To use registers, we need to have enough of them free to contain
+  // the entire argument.
+  // b) We never want to use more than 2 registers for a single argument.
+
+  unsigned FirstFree = State.getFirstUnallocated(RegList);
+  bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree);
+
+  for (auto &It : PendingMembers) {
+    if (UseRegs)
+      It.convertToReg(State.AllocateReg(RegList[FirstFree++]));
+    else
+      It.convertToMem(State.AllocateStack(4, 4));
+    State.addLoc(It);
+  }
+
+  PendingMembers.clear();
+
+  return true;
+}
+
+/// X86 interrupt handlers can only take one or two stack arguments, but if
+/// there are two arguments, they are in the opposite order from the standard
+/// convention. Therefore, we have to look at the argument count up front before
+/// allocating stack for each argument.
+static bool CC_X86_Intr(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                        CCValAssign::LocInfo &LocInfo,
+                        ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  const MachineFunction &MF = State.getMachineFunction();
+  size_t ArgCount = State.getMachineFunction().getFunction().arg_size();
+  bool Is64Bit = static_cast<const X86Subtarget &>(MF.getSubtarget()).is64Bit();
+  unsigned SlotSize = Is64Bit ? 8 : 4;
+  unsigned Offset;
+  if (ArgCount == 1 && ValNo == 0) {
+    // If we have one argument, the argument is five stack slots big, at fixed
+    // offset zero.
+    Offset = State.AllocateStack(5 * SlotSize, 4);
+  } else if (ArgCount == 2 && ValNo == 0) {
+    // If we have two arguments, the stack slot is *after* the error code
+    // argument. Pretend it doesn't consume stack space, and account for it when
+    // we assign the second argument.
+    Offset = SlotSize;
+  } else if (ArgCount == 2 && ValNo == 1) {
+    // If this is the second of two arguments, it must be the error code. It
+    // appears first on the stack, and is then followed by the five slot
+    // interrupt struct.
+    Offset = 0;
+    (void)State.AllocateStack(6 * SlotSize, 4);
+  } else {
+    report_fatal_error("unsupported x86 interrupt prototype");
+  }
+
+  // FIXME: This should be accounted for in
+  // X86FrameLowering::getFrameIndexReference, not here.
+  if (Is64Bit && ArgCount == 2)
+    Offset += SlotSize;
+
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return true;
+}
+
+// Provides entry points of CC_X86 and RetCC_X86.
+#include "X86GenCallingConv.inc"
diff --git a/lib/Target/X86/X86CallingConv.h b/lib/Target/X86/X86CallingConv.h
index d0fcbd313312..191e0fa619b2 100644
--- a/lib/Target/X86/X86CallingConv.h
+++ b/lib/Target/X86/X86CallingConv.h
@@ -1,9 +1,8 @@
 //=== X86CallingConv.h - X86 Custom Calling Convention Routines -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -21,99 +20,12 @@
 
 namespace llvm {
 
-/// When regcall calling convention compiled to 32 bit arch, special treatment
-/// is required for 64 bit masks.
-/// The value should be assigned to two GPRs.
-/// \return true if registers were allocated and false otherwise.
-bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                   CCValAssign::LocInfo &LocInfo,
-                                   ISD::ArgFlagsTy &ArgFlags, CCState &State);
-
-/// Vectorcall calling convention has special handling for vector types or
-/// HVA for 64 bit arch.
-/// For HVAs shadow registers might be allocated on the first pass
-/// and actual XMM registers are allocated on the second pass.
-/// For vector types, actual XMM registers are allocated on the first pass.
-/// \return true if registers were allocated and false otherwise.
-bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                          CCValAssign::LocInfo &LocInfo,
-                          ISD::ArgFlagsTy &ArgFlags, CCState &State);
-
-/// Vectorcall calling convention has special handling for vector types or
-/// HVA for 32 bit arch.
-/// For HVAs actual XMM registers are allocated on the second pass.
-/// For vector types, actual XMM registers are allocated on the first pass.
-/// \return true if registers were allocated and false otherwise.
-bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                          CCValAssign::LocInfo &LocInfo,
-                          ISD::ArgFlagsTy &ArgFlags, CCState &State);
-
-inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
-                                CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
-                                CCState &) {
-  llvm_unreachable("The AnyReg calling convention is only supported by the " \
-                   "stackmap and patchpoint intrinsics.");
-  // gracefully fallback to X86 C calling convention on Release builds.
-  return false;
-}
-
-inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT,
-                                         MVT &LocVT,
-                                         CCValAssign::LocInfo &LocInfo,
-                                         ISD::ArgFlagsTy &ArgFlags,
-                                         CCState &State) {
-  // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure
-  // not to split i64 and double between a register and stack
-  static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX};
-  static const unsigned NumRegs = sizeof(RegList)/sizeof(RegList[0]);
-
-  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
-
-  // If this is the first part of an double/i64/i128, or if we're already
-  // in the middle of a split, add to the pending list. If this is not
-  // the end of the split, return, otherwise go on to process the pending
-  // list
-  if (ArgFlags.isSplit() || !PendingMembers.empty()) {
-    PendingMembers.push_back(
-        CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
-    if (!ArgFlags.isSplitEnd())
-      return true;
-  }
-
-  // If there are no pending members, we are not in the middle of a split,
-  // so do the usual inreg stuff.
-  if (PendingMembers.empty()) {
-    if (unsigned Reg = State.AllocateReg(RegList)) {
-      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
-      return true;
-    }
-    return false;
-  }
-
-  assert(ArgFlags.isSplitEnd());
-
-  // We now have the entire original argument in PendingMembers, so decide
-  // whether to use registers or the stack.
-  // Per the MCU ABI:
-  // a) To use registers, we need to have enough of them free to contain
-  // the entire argument.
-  // b) We never want to use more than 2 registers for a single argument.
-
-  unsigned FirstFree = State.getFirstUnallocated(RegList);
-  bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree);
-
-  for (auto &It : PendingMembers) {
-    if (UseRegs)
-      It.convertToReg(State.AllocateReg(RegList[FirstFree++]));
-    else
-      It.convertToMem(State.AllocateStack(4, 4));
-    State.addLoc(It);
-  }
-
-  PendingMembers.clear();
+bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT,
+               CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+               CCState &State);
 
-  return true;
-}
+bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
+            ISD::ArgFlagsTy ArgFlags, CCState &State);
 
 } // End llvm namespace
 
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index fe49c9ffbd95..1c3034a5116a 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -1,9 +1,8 @@
 //===-- X86CallingConv.td - Calling Conventions X86 32/64 --*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -148,7 +147,8 @@ def CC_#NAME : CallingConv<[
       CCAssignToStack<32, 32>>,
 
     // 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
-    CCIfType<[v16i32, v8i64, v16f32, v8f64], CCAssignToStack<64, 64>>
+    CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+      CCAssignToStack<64, 64>>
 ]>;
 
 def RetCC_#NAME : CallingConv<[
@@ -477,6 +477,7 @@ def RetCC_X86_64 : CallingConv<[
 ]>;
 
 // This is the return-value convention used for the entire X86 backend.
+let Entry = 1 in
 def RetCC_X86 : CallingConv<[
 
   // Check if this is the Intel OpenCL built-ins calling convention
@@ -567,7 +568,7 @@ def CC_X86_64_C : CallingConv<[
            CCAssignToStack<32, 32>>,
 
   // 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
-  CCIfType<[v16i32, v8i64, v16f32, v8f64],
+  CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
            CCAssignToStack<64, 64>>
 ]>;
 
@@ -612,7 +613,7 @@ def CC_X86_Win64_C : CallingConv<[
   CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], CCPassIndirect<i64>>,
 
   // 512 bit vectors are passed by pointer
-  CCIfType<[v16i32, v16f32, v8f64, v8i64], CCPassIndirect<i64>>,
+  CCIfType<[v64i8, v32i16, v16i32, v16f32, v8f64, v8i64], CCPassIndirect<i64>>,
 
   // Long doubles are passed by pointer
   CCIfType<[f80], CCPassIndirect<i64>>,
@@ -985,14 +986,6 @@ def CC_Intel_OCL_BI : CallingConv<[
   CCDelegateTo<CC_X86_32_C>
 ]>;
 
-def CC_X86_32_Intr : CallingConv<[
-  CCAssignToStack<4, 4>
-]>;
-
-def CC_X86_64_Intr : CallingConv<[
-  CCAssignToStack<8, 8>
-]>;
-
 //===----------------------------------------------------------------------===//
 // X86 Root Argument Calling Conventions
 //===----------------------------------------------------------------------===//
@@ -1001,7 +994,7 @@ def CC_X86_64_Intr : CallingConv<[
 def CC_X86_32 : CallingConv<[
   // X86_INTR calling convention is valid in MCU target and should override the
   // MCU calling convention. Thus, this should be checked before isTargetMCU().
-  CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_32_Intr>>,
+  CCIfCC<"CallingConv::X86_INTR", CCCustom<"CC_X86_Intr">>,
   CCIfSubtarget<"isTargetMCU()", CCDelegateTo<CC_X86_32_MCU>>,
   CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
   CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win32_VectorCall>>,
@@ -1029,7 +1022,7 @@ def CC_X86_64 : CallingConv<[
   CCIfCC<"CallingConv::X86_RegCall",
     CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_RegCall>>>,
   CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_SysV64_RegCall>>,
-  CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_64_Intr>>,
+  CCIfCC<"CallingConv::X86_INTR", CCCustom<"CC_X86_Intr">>,
 
   // Mingw64 and native Win64 use Win64 CC
   CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
@@ -1039,6 +1032,7 @@ def CC_X86_64 : CallingConv<[
 ]>;
 
 // This is the argument convention used for the entire X86 backend.
+let Entry = 1 in
 def CC_X86 : CallingConv<[
   CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<CC_Intel_OCL_BI>>,
   CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
diff --git a/lib/Target/X86/X86CmovConversion.cpp b/lib/Target/X86/X86CmovConversion.cpp
index c3e76fd2a856..a61fa3246f09 100644
--- a/lib/Target/X86/X86CmovConversion.cpp
+++ b/lib/Target/X86/X86CmovConversion.cpp
@@ -1,9 +1,8 @@
 //====- X86CmovConversion.cpp - Convert Cmov to Branch --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -102,9 +101,7 @@ namespace {
 /// Converts X86 cmov instructions into branches when profitable.
 class X86CmovConverterPass : public MachineFunctionPass {
 public:
-  X86CmovConverterPass() : MachineFunctionPass(ID) {
-    initializeX86CmovConverterPassPass(*PassRegistry::getPassRegistry());
-  }
+  X86CmovConverterPass() : MachineFunctionPass(ID) { }
 
   StringRef getPassName() const override { return "X86 cmov Conversion"; }
   bool runOnMachineFunction(MachineFunction &MF) override;
@@ -281,7 +278,8 @@ bool X86CmovConverterPass::collectCmovCandidates(
     Group.clear();
     // Condition code of first CMOV instruction current processed range and its
     // opposite condition code.
-    X86::CondCode FirstCC, FirstOppCC, MemOpCC;
+    X86::CondCode FirstCC = X86::COND_INVALID, FirstOppCC = X86::COND_INVALID,
+                  MemOpCC = X86::COND_INVALID;
     // Indicator of a non CMOVrr instruction in the current processed range.
     bool FoundNonCMOVInst = false;
     // Indicator for current processed CMOV-group if it should be skipped.
@@ -291,7 +289,7 @@ bool X86CmovConverterPass::collectCmovCandidates(
       // Skip debug instructions.
       if (I.isDebugInstr())
         continue;
-      X86::CondCode CC = X86::getCondFromCMovOpc(I.getOpcode());
+      X86::CondCode CC = X86::getCondFromCMov(I);
       // Check if we found a X86::CMOVrr instruction.
       if (CC != X86::COND_INVALID && (IncludeLoads || !I.mayLoad())) {
         if (Group.empty()) {
@@ -546,7 +544,7 @@ bool X86CmovConverterPass::checkForProfitableCmovCandidates(
       }
 
       unsigned CondCost =
-          DepthMap[OperandToDefMap.lookup(&MI->getOperand(3))].Depth;
+          DepthMap[OperandToDefMap.lookup(&MI->getOperand(4))].Depth;
       unsigned ValCost = getDepthOfOptCmov(
           DepthMap[OperandToDefMap.lookup(&MI->getOperand(1))].Depth,
           DepthMap[OperandToDefMap.lookup(&MI->getOperand(2))].Depth);
@@ -594,7 +592,7 @@ static bool checkEFLAGSLive(MachineInstr *MI) {
 /// move all debug instructions to after the last CMOV instruction, making the
 /// CMOV group consecutive.
 static void packCmovGroup(MachineInstr *First, MachineInstr *Last) {
-  assert(X86::getCondFromCMovOpc(Last->getOpcode()) != X86::COND_INVALID &&
+  assert(X86::getCondFromCMov(*Last) != X86::COND_INVALID &&
          "Last instruction in a CMOV group must be a CMOV instruction");
 
   SmallVector<MachineInstr *, 2> DBGInstructions;
@@ -652,14 +650,14 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
   MachineInstr *LastCMOV = Group.back();
   DebugLoc DL = MI.getDebugLoc();
 
-  X86::CondCode CC = X86::CondCode(X86::getCondFromCMovOpc(MI.getOpcode()));
+  X86::CondCode CC = X86::CondCode(X86::getCondFromCMov(MI));
   X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC);
   // Potentially swap the condition codes so that any memory operand to a CMOV
   // is in the *false* position instead of the *true* position. We can invert
   // any non-memory operand CMOV instructions to cope with this and we ensure
   // memory operand CMOVs are only included with a single condition code.
   if (llvm::any_of(Group, [&](MachineInstr *I) {
-        return I->mayLoad() && X86::getCondFromCMovOpc(I->getOpcode()) == CC;
+        return I->mayLoad() && X86::getCondFromCMov(*I) == CC;
       }))
     std::swap(CC, OppCC);
 
@@ -690,7 +688,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
   MBB->addSuccessor(SinkMBB);
 
   // Create the conditional branch instruction.
-  BuildMI(MBB, DL, TII->get(X86::GetCondBranchFromCond(CC))).addMBB(SinkMBB);
+  BuildMI(MBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC);
 
   // Add the sink block to the false block successors.
   FalseMBB->addSuccessor(SinkMBB);
@@ -713,8 +711,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
     if (!MI.mayLoad()) {
       // Remember the false-side register input.
       unsigned FalseReg =
-          MI.getOperand(X86::getCondFromCMovOpc(MI.getOpcode()) == CC ? 1 : 2)
-              .getReg();
+          MI.getOperand(X86::getCondFromCMov(MI) == CC ? 1 : 2).getReg();
       // Walk back through any intermediate cmovs referenced.
       while (true) {
         auto FRIt = FalseBBRegRewriteTable.find(FalseReg);
@@ -729,7 +726,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
     // The condition must be the *opposite* of the one we've decided to branch
     // on as the branch will go *around* the load and the load should happen
     // when the CMOV condition is false.
-    assert(X86::getCondFromCMovOpc(MI.getOpcode()) == OppCC &&
+    assert(X86::getCondFromCMov(MI) == OppCC &&
            "Can only handle memory-operand cmov instructions with a condition "
            "opposite to the selected branch direction.");
 
@@ -768,7 +765,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
     // Move the new CMOV to just before the old one and reset any impacted
     // iterator.
     auto *NewCMOV = NewMIs.pop_back_val();
-    assert(X86::getCondFromCMovOpc(NewCMOV->getOpcode()) == OppCC &&
+    assert(X86::getCondFromCMov(*NewCMOV) == OppCC &&
            "Last new instruction isn't the expected CMOV!");
     LLVM_DEBUG(dbgs() << "\tRewritten cmov: "; NewCMOV->dump());
     MBB->insert(MachineBasicBlock::iterator(MI), NewCMOV);
@@ -820,7 +817,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
     // If this CMOV we are processing is the opposite condition from the jump we
     // generated, then we have to swap the operands for the PHI that is going to
     // be generated.
-    if (X86::getCondFromCMovOpc(MIIt->getOpcode()) == OppCC)
+    if (X86::getCondFromCMov(*MIIt) == OppCC)
       std::swap(Op1Reg, Op2Reg);
 
     auto Op1Itr = RegRewriteTable.find(Op1Reg);
diff --git a/lib/Target/X86/X86CondBrFolding.cpp b/lib/Target/X86/X86CondBrFolding.cpp
index 7ce443c4656a..9dea94f1368d 100644
--- a/lib/Target/X86/X86CondBrFolding.cpp
+++ b/lib/Target/X86/X86CondBrFolding.cpp
@@ -1,9 +1,8 @@
 //===---- X86CondBrFolding.cpp - optimize conditional branches ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This file defines a pass that optimizes condition branches on x86 by taking
@@ -62,9 +61,7 @@ STATISTIC(NumFixedCondBrs, "Number of x86 condbr folded");
 namespace {
 class X86CondBrFoldingPass : public MachineFunctionPass {
 public:
-  X86CondBrFoldingPass() : MachineFunctionPass(ID) {
-    initializeX86CondBrFoldingPassPass(*PassRegistry::getPassRegistry());
-  }
+  X86CondBrFoldingPass() : MachineFunctionPass(ID) { }
   StringRef getPassName() const override { return "X86 CondBr Folding"; }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
@@ -226,10 +223,9 @@ void X86CondBrFolding::replaceBrDest(MachineBasicBlock *MBB,
   MachineInstr *BrMI;
   if (MBBInfo->TBB == OrigDest) {
     BrMI = MBBInfo->BrInstr;
-    unsigned JNCC = GetCondBranchFromCond(MBBInfo->BranchCode);
     MachineInstrBuilder MIB =
-        BuildMI(*MBB, BrMI, MBB->findDebugLoc(BrMI), TII->get(JNCC))
-            .addMBB(NewDest);
+        BuildMI(*MBB, BrMI, MBB->findDebugLoc(BrMI), TII->get(X86::JCC_1))
+            .addMBB(NewDest).addImm(MBBInfo->BranchCode);
     MBBInfo->TBB = NewDest;
     MBBInfo->BrInstr = MIB.getInstr();
   } else { // Should be the unconditional jump stmt.
@@ -255,8 +251,8 @@ void X86CondBrFolding::fixupModifiedCond(MachineBasicBlock *MBB) {
   MachineInstr *BrMI = MBBInfo->BrInstr;
   X86::CondCode CC = MBBInfo->BranchCode;
   MachineInstrBuilder MIB = BuildMI(*MBB, BrMI, MBB->findDebugLoc(BrMI),
-                                    TII->get(GetCondBranchFromCond(CC)))
-                                .addMBB(MBBInfo->TBB);
+                                    TII->get(X86::JCC_1))
+                                .addMBB(MBBInfo->TBB).addImm(CC);
   BrMI->eraseFromParent();
   MBBInfo->BrInstr = MIB.getInstr();
 
@@ -324,8 +320,8 @@ void X86CondBrFolding::optimizeCondBr(
       llvm_unreachable("unexpected condtional code.");
     }
     BuildMI(*RootMBB, UncondBrI, RootMBB->findDebugLoc(UncondBrI),
-            TII->get(GetCondBranchFromCond(NewCC)))
-        .addMBB(RootMBBInfo->FBB);
+            TII->get(X86::JCC_1))
+        .addMBB(RootMBBInfo->FBB).addImm(NewCC);
 
     // RootMBB: Jump to TargetMBB
     BuildMI(*RootMBB, UncondBrI, RootMBB->findDebugLoc(UncondBrI),
@@ -513,7 +509,7 @@ X86CondBrFolding::analyzeMBB(MachineBasicBlock &MBB) {
     if (I->isBranch()) {
       if (TBB)
         return nullptr;
-      CC = X86::getCondFromBranchOpc(I->getOpcode());
+      CC = X86::getCondFromBranch(*I);
       switch (CC) {
       default:
         return nullptr;
diff --git a/lib/Target/X86/X86DiscriminateMemOps.cpp b/lib/Target/X86/X86DiscriminateMemOps.cpp
index 3654bf04f4e9..7051550d52e6 100644
--- a/lib/Target/X86/X86DiscriminateMemOps.cpp
+++ b/lib/Target/X86/X86DiscriminateMemOps.cpp
@@ -1,9 +1,8 @@
 //===- X86DiscriminateMemOps.cpp - Unique IDs for Mem Ops -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -27,6 +26,22 @@ using namespace llvm;
 
 #define DEBUG_TYPE "x86-discriminate-memops"
 
+static cl::opt<bool> EnableDiscriminateMemops(
+    DEBUG_TYPE, cl::init(false),
+    cl::desc("Generate unique debug info for each instruction with a memory "
+             "operand. Should be enabled for profile-drived cache prefetching, "
+             "both in the build of the binary being profiled, as well as in "
+             "the build of the binary consuming the profile."),
+    cl::Hidden);
+
+static cl::opt<bool> BypassPrefetchInstructions(
+    "x86-bypass-prefetch-instructions", cl::init(true),
+    cl::desc("When discriminating instructions with memory operands, ignore "
+             "prefetch instructions. This ensures the other memory operand "
+             "instructions have the same identifiers after inserting "
+             "prefetches, allowing for successive insertions."),
+    cl::Hidden);
+
 namespace {
 
 using Location = std::pair<StringRef, unsigned>;
@@ -55,6 +70,10 @@ public:
   X86DiscriminateMemOps();
 };
 
+bool IsPrefetchOpcode(unsigned Opcode) {
+  return Opcode == X86::PREFETCHNTA || Opcode == X86::PREFETCHT0 ||
+         Opcode == X86::PREFETCHT1 || Opcode == X86::PREFETCHT2;
+}
 } // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
@@ -67,6 +86,9 @@ char X86DiscriminateMemOps::ID = 0;
 X86DiscriminateMemOps::X86DiscriminateMemOps() : MachineFunctionPass(ID) {}
 
 bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) {
+  if (!EnableDiscriminateMemops)
+    return false;
+
   DISubprogram *FDI = MF.getFunction().getSubprogram();
   if (!FDI || !FDI->getUnit()->getDebugInfoForProfiling())
     return false;
@@ -75,7 +97,7 @@ bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) {
   // have any debug info.
   const DILocation *ReferenceDI =
       DILocation::get(FDI->getContext(), FDI->getLine(), 0, FDI);
-
+  assert(ReferenceDI && "ReferenceDI should not be nullptr");
   DenseMap<Location, unsigned> MemOpDiscriminators;
   MemOpDiscriminators[diToLocation(ReferenceDI)] = 0;
 
@@ -88,6 +110,8 @@ bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) {
       const auto &DI = MI.getDebugLoc();
       if (!DI)
         continue;
+      if (BypassPrefetchInstructions && IsPrefetchOpcode(MI.getDesc().Opcode))
+        continue;
       Location Loc = diToLocation(DI);
       MemOpDiscriminators[Loc] =
           std::max(MemOpDiscriminators[Loc], DI->getBaseDiscriminator());
@@ -104,15 +128,18 @@ bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) {
     for (auto &MI : MBB) {
       if (X86II::getMemoryOperandNo(MI.getDesc().TSFlags) < 0)
         continue;
+      if (BypassPrefetchInstructions && IsPrefetchOpcode(MI.getDesc().Opcode))
+        continue;
       const DILocation *DI = MI.getDebugLoc();
-      if (!DI) {
+      bool HasDebug = DI;
+      if (!HasDebug) {
         DI = ReferenceDI;
       }
       Location L = diToLocation(DI);
       DenseSet<unsigned> &Set = Seen[L];
       const std::pair<DenseSet<unsigned>::iterator, bool> TryInsert =
           Set.insert(DI->getBaseDiscriminator());
-      if (!TryInsert.second) {
+      if (!TryInsert.second || !HasDebug) {
         unsigned BF, DF, CI = 0;
         DILocation::decodeDiscriminator(DI->getDiscriminator(), BF, DF, CI);
         Optional<unsigned> EncodedDiscriminator = DILocation::encodeDiscriminator(
@@ -133,6 +160,7 @@ bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) {
         // Since we were able to encode, bump the MemOpDiscriminators.
         ++MemOpDiscriminators[L];
         DI = DI->cloneWithDiscriminator(EncodedDiscriminator.getValue());
+        assert(DI && "DI should not be nullptr");
         updateDebugInfo(&MI, DI);
         Changed = true;
         std::pair<DenseSet<unsigned>::iterator, bool> MustInsert =
diff --git a/lib/Target/X86/X86DomainReassignment.cpp b/lib/Target/X86/X86DomainReassignment.cpp
index d9ebbb506ca4..18bbfa32e11b 100644
--- a/lib/Target/X86/X86DomainReassignment.cpp
+++ b/lib/Target/X86/X86DomainReassignment.cpp
@@ -1,9 +1,8 @@
 //===--- X86DomainReassignment.cpp - Selectively switch register classes---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -387,9 +386,7 @@ class X86DomainReassignment : public MachineFunctionPass {
 public:
   static char ID;
 
-  X86DomainReassignment() : MachineFunctionPass(ID) {
-    initializeX86DomainReassignmentPass(*PassRegistry::getPassRegistry());
-  }
+  X86DomainReassignment() : MachineFunctionPass(ID) { }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
@@ -557,6 +554,7 @@ void X86DomainReassignment::buildClosure(Closure &C, unsigned Reg) {
     // Register already in this closure.
     if (!C.insertEdge(CurReg))
       continue;
+    EnclosedEdges.insert(Reg);
 
     MachineInstr *DefMI = MRI->getVRegDef(CurReg);
     encloseInstr(C, DefMI);
diff --git a/lib/Target/X86/X86EvexToVex.cpp b/lib/Target/X86/X86EvexToVex.cpp
index 80674c7251fe..58680f1815bb 100755
--- a/lib/Target/X86/X86EvexToVex.cpp
+++ b/lib/Target/X86/X86EvexToVex.cpp
@@ -1,10 +1,9 @@
 //===- X86EvexToVex.cpp ---------------------------------------------------===//
 // Compress EVEX instructions to VEX encoding when possible to reduce code size
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,15 +12,15 @@
 /// are encoded using the EVEX prefix and if possible replaces them by their
 /// corresponding VEX encoding which is usually shorter by 2 bytes.
 /// EVEX instructions may be encoded via the VEX prefix when the AVX-512
-/// instruction has a corresponding AVX/AVX2 opcode and when it does not
-/// use the xmm or the mask registers or xmm/ymm registers with indexes
-/// higher than 15.
+/// instruction has a corresponding AVX/AVX2 opcode, when vector length 
+/// accessed by instruction is less than 512 bits and when it does not use 
+//  the xmm or the mask registers or xmm/ymm registers with indexes higher than 15.
 /// The pass applies code reduction on the generated code for AVX-512 instrs.
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstPrinter/X86InstComments.h"
 #include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86InstComments.h"
 #include "X86.h"
 #include "X86InstrInfo.h"
 #include "X86Subtarget.h"
@@ -69,9 +68,7 @@ class EvexToVexInstPass : public MachineFunctionPass {
 public:
   static char ID;
 
-  EvexToVexInstPass() : MachineFunctionPass(ID) {
-    initializeEvexToVexInstPassPass(*PassRegistry::getPassRegistry());
-  }
+  EvexToVexInstPass() : MachineFunctionPass(ID) { }
 
   StringRef getPassName() const override { return EVEX2VEX_DESC; }
 
@@ -255,7 +252,7 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
     (Desc.TSFlags & X86II::VEX_L) ? makeArrayRef(X86EvexToVex256CompressTable)
                                   : makeArrayRef(X86EvexToVex128CompressTable);
 
-  auto I = std::lower_bound(Table.begin(), Table.end(), MI.getOpcode());
+  auto I = llvm::lower_bound(Table, MI.getOpcode());
   if (I == Table.end() || I->EvexOpcode != MI.getOpcode())
     return false;
 
diff --git a/lib/Target/X86/X86ExpandPseudo.cpp b/lib/Target/X86/X86ExpandPseudo.cpp
index 1dd73163080b..b8624b40f2f7 100644
--- a/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/lib/Target/X86/X86ExpandPseudo.cpp
@@ -1,9 +1,8 @@
 //===------- X86ExpandPseudo.cpp - Expand pseudo instructions -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -27,6 +26,7 @@
 using namespace llvm;
 
 #define DEBUG_TYPE "x86-pseudo"
+#define X86_EXPAND_PSEUDO_NAME "X86 pseudo instruction expansion pass"
 
 namespace {
 class X86ExpandPseudo : public MachineFunctionPass {
@@ -66,8 +66,12 @@ private:
   bool ExpandMBB(MachineBasicBlock &MBB);
 };
 char X86ExpandPseudo::ID = 0;
+
 } // End anonymous namespace.
 
+INITIALIZE_PASS(X86ExpandPseudo, DEBUG_TYPE, X86_EXPAND_PSEUDO_NAME, false,
+                false)
+
 void X86ExpandPseudo::ExpandICallBranchFunnel(
     MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) {
   MachineBasicBlock *JTMBB = MBB;
@@ -83,6 +87,8 @@ void X86ExpandPseudo::ExpandICallBranchFunnel(
   const GlobalValue *CombinedGlobal = JTInst->getOperand(1).getGlobal();
 
   auto CmpTarget = [&](unsigned Target) {
+    if (Selector.isReg())
+      MBB->addLiveIn(Selector.getReg());
     BuildMI(*MBB, MBBI, DL, TII->get(X86::LEA64r), X86::R11)
         .addReg(X86::RIP)
         .addImm(1)
@@ -98,11 +104,13 @@ void X86ExpandPseudo::ExpandICallBranchFunnel(
   auto CreateMBB = [&]() {
     auto *NewMBB = MF->CreateMachineBasicBlock(BB);
     MBB->addSuccessor(NewMBB);
+    if (!MBB->isLiveIn(X86::EFLAGS))
+      MBB->addLiveIn(X86::EFLAGS);
     return NewMBB;
   };
 
-  auto EmitCondJump = [&](unsigned Opcode, MachineBasicBlock *ThenMBB) {
-    BuildMI(*MBB, MBBI, DL, TII->get(Opcode)).addMBB(ThenMBB);
+  auto EmitCondJump = [&](unsigned CC, MachineBasicBlock *ThenMBB) {
+    BuildMI(*MBB, MBBI, DL, TII->get(X86::JCC_1)).addMBB(ThenMBB).addImm(CC);
 
     auto *ElseMBB = CreateMBB();
     MF->insert(InsPt, ElseMBB);
@@ -110,10 +118,10 @@ void X86ExpandPseudo::ExpandICallBranchFunnel(
     MBBI = MBB->end();
   };
 
-  auto EmitCondJumpTarget = [&](unsigned Opcode, unsigned Target) {
+  auto EmitCondJumpTarget = [&](unsigned CC, unsigned Target) {
     auto *ThenMBB = CreateMBB();
     TargetMBBs.push_back({ThenMBB, Target});
-    EmitCondJump(Opcode, ThenMBB);
+    EmitCondJump(CC, ThenMBB);
   };
 
   auto EmitTailCall = [&](unsigned Target) {
@@ -130,23 +138,23 @@ void X86ExpandPseudo::ExpandICallBranchFunnel(
 
     if (NumTargets == 2) {
       CmpTarget(FirstTarget + 1);
-      EmitCondJumpTarget(X86::JB_1, FirstTarget);
+      EmitCondJumpTarget(X86::COND_B, FirstTarget);
       EmitTailCall(FirstTarget + 1);
       return;
     }
 
     if (NumTargets < 6) {
       CmpTarget(FirstTarget + 1);
-      EmitCondJumpTarget(X86::JB_1, FirstTarget);
-      EmitCondJumpTarget(X86::JE_1, FirstTarget + 1);
+      EmitCondJumpTarget(X86::COND_B, FirstTarget);
+      EmitCondJumpTarget(X86::COND_E, FirstTarget + 1);
       EmitBranchFunnel(FirstTarget + 2, NumTargets - 2);
       return;
     }
 
     auto *ThenMBB = CreateMBB();
     CmpTarget(FirstTarget + (NumTargets / 2));
-    EmitCondJump(X86::JB_1, ThenMBB);
-    EmitCondJumpTarget(X86::JE_1, FirstTarget + (NumTargets / 2));
+    EmitCondJump(X86::COND_B, ThenMBB);
+    EmitCondJumpTarget(X86::COND_E, FirstTarget + (NumTargets / 2));
     EmitBranchFunnel(FirstTarget + (NumTargets / 2) + 1,
                   NumTargets - (NumTargets / 2) - 1);
 
@@ -254,16 +262,19 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       for (unsigned i = 0; i != 5; ++i)
         MIB.add(MBBI->getOperand(i));
     } else if (Opcode == X86::TCRETURNri64) {
+      JumpTarget.setIsKill();
       BuildMI(MBB, MBBI, DL,
               TII->get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64))
-          .addReg(JumpTarget.getReg(), RegState::Kill);
+          .add(JumpTarget);
     } else {
+      JumpTarget.setIsKill();
       BuildMI(MBB, MBBI, DL, TII->get(X86::TAILJMPr))
-          .addReg(JumpTarget.getReg(), RegState::Kill);
+          .add(JumpTarget);
     }
 
     MachineInstr &NewMI = *std::prev(MBBI);
     NewMI.copyImplicitOps(*MBBI->getParent()->getParent(), *MBBI);
+    MBB.getParent()->updateCallSiteInfo(&*MBBI, &NewMI);
 
     // Delete the pseudo instruction TCRETURN.
     MBB.erase(MBBI);
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 9dd3f2652543..7b9ce0271205 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1,9 +1,8 @@
 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -85,7 +84,7 @@ private:
   bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
                           const DebugLoc &DL);
 
-  bool X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
+  bool X86FastEmitLoad(MVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
                        unsigned &ResultReg, unsigned Alignment = 1);
 
   bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
@@ -290,7 +289,7 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
 }
 
 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
-  EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
+  EVT evt = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true);
   if (evt == MVT::Other || !evt.isSimple())
     // Unhandled type. Halt "fast" selection and bail.
     return false;
@@ -312,12 +311,10 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
   return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
 }
 
-#include "X86GenCallingConv.inc"
-
 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
 /// Return true and the result register by reference if it is possible.
-bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
+bool X86FastISel::X86FastEmitLoad(MVT VT, X86AddressMode &AM,
                                   MachineMemOperand *MMO, unsigned &ResultReg,
                                   unsigned Alignment) {
   bool HasSSE41 = Subtarget->hasSSE41();
@@ -327,46 +324,42 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
   bool HasVLX = Subtarget->hasVLX();
   bool IsNonTemporal = MMO && MMO->isNonTemporal();
 
+  // Treat i1 loads the same as i8 loads. Masking will be done when storing.
+  if (VT == MVT::i1)
+    VT = MVT::i8;
+
   // Get opcode and regclass of the output for the given load instruction.
   unsigned Opc = 0;
-  const TargetRegisterClass *RC = nullptr;
-  switch (VT.getSimpleVT().SimpleTy) {
+  switch (VT.SimpleTy) {
   default: return false;
-  case MVT::i1:
   case MVT::i8:
     Opc = X86::MOV8rm;
-    RC  = &X86::GR8RegClass;
     break;
   case MVT::i16:
     Opc = X86::MOV16rm;
-    RC  = &X86::GR16RegClass;
     break;
   case MVT::i32:
     Opc = X86::MOV32rm;
-    RC  = &X86::GR32RegClass;
     break;
   case MVT::i64:
     // Must be in x86-64 mode.
     Opc = X86::MOV64rm;
-    RC  = &X86::GR64RegClass;
     break;
   case MVT::f32:
-    if (X86ScalarSSEf32) {
-      Opc = HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
-      RC  = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
-    } else {
+    if (X86ScalarSSEf32)
+      Opc = HasAVX512 ? X86::VMOVSSZrm_alt :
+            HasAVX    ? X86::VMOVSSrm_alt :
+                        X86::MOVSSrm_alt;
+    else
       Opc = X86::LD_Fp32m;
-      RC  = &X86::RFP32RegClass;
-    }
     break;
   case MVT::f64:
-    if (X86ScalarSSEf64) {
-      Opc = HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
-      RC  = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
-    } else {
+    if (X86ScalarSSEf64)
+      Opc = HasAVX512 ? X86::VMOVSDZrm_alt :
+            HasAVX    ? X86::VMOVSDrm_alt :
+                        X86::MOVSDrm_alt;
+    else
       Opc = X86::LD_Fp64m;
-      RC  = &X86::RFP64RegClass;
-    }
     break;
   case MVT::f80:
     // No f80 support yet.
@@ -381,7 +374,6 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
     else
       Opc = HasVLX ? X86::VMOVUPSZ128rm :
             HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
-    RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
     break;
   case MVT::v2f64:
     if (IsNonTemporal && Alignment >= 16 && HasSSE41)
@@ -393,13 +385,12 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
     else
       Opc = HasVLX ? X86::VMOVUPDZ128rm :
             HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
-    RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
     break;
   case MVT::v4i32:
   case MVT::v2i64:
   case MVT::v8i16:
   case MVT::v16i8:
-    if (IsNonTemporal && Alignment >= 16)
+    if (IsNonTemporal && Alignment >= 16 && HasSSE41)
       Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
             HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
     else if (Alignment >= 16)
@@ -408,7 +399,6 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
     else
       Opc = HasVLX ? X86::VMOVDQU64Z128rm :
             HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
-    RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
     break;
   case MVT::v8f32:
     assert(HasAVX);
@@ -420,7 +410,6 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
       Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
     else
       Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
-    RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
     break;
   case MVT::v4f64:
     assert(HasAVX);
@@ -432,7 +421,6 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
       Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
     else
       Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
-    RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
     break;
   case MVT::v8i32:
   case MVT::v4i64:
@@ -447,7 +435,6 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
       Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
     else
       Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
-    RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
     break;
   case MVT::v16f32:
     assert(HasAVX512);
@@ -455,7 +442,6 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
       Opc = X86::VMOVNTDQAZrm;
     else
       Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
-    RC  = &X86::VR512RegClass;
     break;
   case MVT::v8f64:
     assert(HasAVX512);
@@ -463,7 +449,6 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
       Opc = X86::VMOVNTDQAZrm;
     else
       Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
-    RC  = &X86::VR512RegClass;
     break;
   case MVT::v8i64:
   case MVT::v16i32:
@@ -476,10 +461,11 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
       Opc = X86::VMOVNTDQAZrm;
     else
       Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
-    RC  = &X86::VR512RegClass;
     break;
   }
 
+  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+
   ResultReg = createResultReg(RC);
   MachineInstrBuilder MIB =
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
@@ -1483,8 +1469,8 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
 
   // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
   static const uint16_t SETFOpcTable[2][3] = {
-    { X86::SETEr,  X86::SETNPr, X86::AND8rr },
-    { X86::SETNEr, X86::SETPr,  X86::OR8rr  }
+    { X86::COND_E,  X86::COND_NP, X86::AND8rr },
+    { X86::COND_NE, X86::COND_P,  X86::OR8rr  }
   };
   const uint16_t *SETFOpc = nullptr;
   switch (Predicate) {
@@ -1500,10 +1486,10 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
 
     unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
     unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
-            FlagReg1);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
-            FlagReg2);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
+            FlagReg1).addImm(SETFOpc[0]);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
+            FlagReg2).addImm(SETFOpc[1]);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
             ResultReg).addReg(FlagReg1).addReg(FlagReg2);
     updateValueMap(I, ResultReg);
@@ -1514,7 +1500,6 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
   bool SwapArgs;
   std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
   assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
-  unsigned Opc = X86::getSETFromCond(CC);
 
   if (SwapArgs)
     std::swap(LHS, RHS);
@@ -1523,7 +1508,8 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
   if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
     return false;
 
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
+          ResultReg).addImm(CC);
   updateValueMap(I, ResultReg);
   return true;
 }
@@ -1693,11 +1679,9 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
       }
 
       bool SwapArgs;
-      unsigned BranchOpc;
       std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
       assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
 
-      BranchOpc = X86::GetCondBranchFromCond(CC);
       if (SwapArgs)
         std::swap(CmpLHS, CmpRHS);
 
@@ -1705,14 +1689,14 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
       if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
         return false;
 
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
-        .addMBB(TrueMBB);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
+        .addMBB(TrueMBB).addImm(CC);
 
       // X86 requires a second branch to handle UNE (and OEQ, which is mapped
       // to UNE above).
       if (NeedExtraBranch) {
-        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_1))
-          .addMBB(TrueMBB);
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
+          .addMBB(TrueMBB).addImm(X86::COND_P);
       }
 
       finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
@@ -1739,14 +1723,14 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
           .addReg(OpReg).addImm(1);
 
-        unsigned JmpOpc = X86::JNE_1;
+        unsigned JmpCond = X86::COND_NE;
         if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
           std::swap(TrueMBB, FalseMBB);
-          JmpOpc = X86::JE_1;
+          JmpCond = X86::COND_E;
         }
 
-        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
-          .addMBB(TrueMBB);
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
+          .addMBB(TrueMBB).addImm(JmpCond);
 
         finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
         return true;
@@ -1759,10 +1743,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
     if (TmpReg == 0)
       return false;
 
-    unsigned BranchOpc = X86::GetCondBranchFromCond(CC);
-
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
-      .addMBB(TrueMBB);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
+      .addMBB(TrueMBB).addImm(CC);
     finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
     return true;
   }
@@ -1786,8 +1768,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
       .addReg(OpReg)
       .addImm(1);
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
-    .addMBB(TrueMBB);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
+    .addMBB(TrueMBB).addImm(X86::COND_NE);
   finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
   return true;
 }
@@ -2050,8 +2032,8 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
 
     // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
     static const uint16_t SETFOpcTable[2][3] = {
-      { X86::SETNPr, X86::SETEr , X86::TEST8rr },
-      { X86::SETPr,  X86::SETNEr, X86::OR8rr   }
+      { X86::COND_NP, X86::COND_E,  X86::TEST8rr },
+      { X86::COND_P,  X86::COND_NE, X86::OR8rr   }
     };
     const uint16_t *SETFOpc = nullptr;
     switch (Predicate) {
@@ -2083,10 +2065,10 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
     if (SETFOpc) {
       unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
       unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
-              FlagReg1);
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
-              FlagReg2);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
+              FlagReg1).addImm(SETFOpc[0]);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
+              FlagReg2).addImm(SETFOpc[1]);
       auto const &II = TII.get(SETFOpc[2]);
       if (II.getNumDefs()) {
         unsigned TmpReg = createResultReg(&X86::GR8RegClass);
@@ -2147,9 +2129,9 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
     return false;
 
   const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
-  unsigned Opc = X86::getCMovFromCond(CC, TRI.getRegSizeInBits(*RC)/8);
-  unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
-                                       LHSReg, LHSIsKill);
+  unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(*RC)/8);
+  unsigned ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill,
+                                        LHSReg, LHSIsKill, CC);
   updateValueMap(I, ResultReg);
   return true;
 }
@@ -2194,19 +2176,6 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
   if (NeedSwap)
     std::swap(CmpLHS, CmpRHS);
 
-  // Choose the SSE instruction sequence based on data type (float or double).
-  static const uint16_t OpcTable[2][4] = {
-    { X86::CMPSSrr,  X86::ANDPSrr,  X86::ANDNPSrr,  X86::ORPSrr  },
-    { X86::CMPSDrr,  X86::ANDPDrr,  X86::ANDNPDrr,  X86::ORPDrr  }
-  };
-
-  const uint16_t *Opc = nullptr;
-  switch (RetVT.SimpleTy) {
-  default: return false;
-  case MVT::f32: Opc = &OpcTable[0][0]; break;
-  case MVT::f64: Opc = &OpcTable[1][0]; break;
-  }
-
   const Value *LHS = I->getOperand(1);
   const Value *RHS = I->getOperand(2);
 
@@ -2277,6 +2246,19 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
             TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
   } else {
+    // Choose the SSE instruction sequence based on data type (float or double).
+    static const uint16_t OpcTable[2][4] = {
+      { X86::CMPSSrr,  X86::ANDPSrr,  X86::ANDNPSrr,  X86::ORPSrr  },
+      { X86::CMPSDrr,  X86::ANDPDrr,  X86::ANDNPDrr,  X86::ORPDrr  }
+    };
+
+    const uint16_t *Opc = nullptr;
+    switch (RetVT.SimpleTy) {
+    default: return false;
+    case MVT::f32: Opc = &OpcTable[0][0]; break;
+    case MVT::f64: Opc = &OpcTable[1][0]; break;
+    }
+
     const TargetRegisterClass *VR128 = &X86::VR128RegClass;
     unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
                                        CmpRHSReg, CmpRHSIsKill, CC);
@@ -2303,8 +2285,10 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
   case MVT::i8:  Opc = X86::CMOV_GR8;  break;
   case MVT::i16: Opc = X86::CMOV_GR16; break;
   case MVT::i32: Opc = X86::CMOV_GR32; break;
-  case MVT::f32: Opc = X86::CMOV_FR32; break;
-  case MVT::f64: Opc = X86::CMOV_FR64; break;
+  case MVT::f32: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR32X
+                                              : X86::CMOV_FR32; break;
+  case MVT::f64: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR64X
+                                              : X86::CMOV_FR64; break;
   }
 
   const Value *Cond = I->getOperand(0);
@@ -2485,13 +2469,14 @@ bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
   assert((I->getOpcode() == Instruction::FPExt ||
           I->getOpcode() == Instruction::FPTrunc) &&
          "Instruction must be an FPExt or FPTrunc!");
+  bool HasAVX = Subtarget->hasAVX();
 
   unsigned OpReg = getRegForValue(I->getOperand(0));
   if (OpReg == 0)
     return false;
 
   unsigned ImplicitDefReg;
-  if (Subtarget->hasAVX()) {
+  if (HasAVX) {
     ImplicitDefReg = createResultReg(RC);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
             TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
@@ -2503,7 +2488,7 @@ bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
   MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
                 ResultReg);
 
-  if (Subtarget->hasAVX())
+  if (HasAVX)
     MIB.addReg(ImplicitDefReg);
 
   MIB.addReg(OpReg);
@@ -2519,8 +2504,7 @@ bool X86FastISel::X86SelectFPExt(const Instruction *I) {
     unsigned Opc =
         HasAVX512 ? X86::VCVTSS2SDZrr
                   : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
-    return X86SelectFPExtOrFPTrunc(
-        I, Opc, HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass);
+    return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f64));
   }
 
   return false;
@@ -2534,8 +2518,7 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
     unsigned Opc =
         HasAVX512 ? X86::VCVTSD2SSZrr
                   : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
-    return X86SelectFPExtOrFPTrunc(
-        I, Opc, HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass);
+    return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f32));
   }
 
   return false;
@@ -2900,21 +2883,21 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
         isCommutativeIntrinsic(II))
       std::swap(LHS, RHS);
 
-    unsigned BaseOpc, CondOpc;
+    unsigned BaseOpc, CondCode;
     switch (II->getIntrinsicID()) {
     default: llvm_unreachable("Unexpected intrinsic!");
     case Intrinsic::sadd_with_overflow:
-      BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break;
+      BaseOpc = ISD::ADD; CondCode = X86::COND_O; break;
     case Intrinsic::uadd_with_overflow:
-      BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
+      BaseOpc = ISD::ADD; CondCode = X86::COND_B; break;
     case Intrinsic::ssub_with_overflow:
-      BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break;
+      BaseOpc = ISD::SUB; CondCode = X86::COND_O; break;
     case Intrinsic::usub_with_overflow:
-      BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
+      BaseOpc = ISD::SUB; CondCode = X86::COND_B; break;
     case Intrinsic::smul_with_overflow:
-      BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
+      BaseOpc = X86ISD::SMUL; CondCode = X86::COND_O; break;
     case Intrinsic::umul_with_overflow:
-      BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
+      BaseOpc = X86ISD::UMUL; CondCode = X86::COND_O; break;
     }
 
     unsigned LHSReg = getRegForValue(LHS);
@@ -2931,7 +2914,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
       };
 
       if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
-          CondOpc == X86::SETOr) {
+          CondCode == X86::COND_O) {
         // We can use INC/DEC.
         ResultReg = createResultReg(TLI.getRegClassFor(VT));
         bool IsDec = BaseOpc == ISD::SUB;
@@ -2990,8 +2973,8 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
     // Assign to a GPR since the overflow return value is lowered to a SETcc.
     unsigned ResultReg2 = createResultReg(&X86::GR8RegClass);
     assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
-            ResultReg2);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
+            ResultReg2).addImm(CondCode);
 
     updateValueMap(II, ResultReg, 2);
     return true;
@@ -3509,8 +3492,9 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
 
     // This will be a direct call, or an indirect call through memory for
     // NonLazyBind calls or dllimport calls.
-    bool NeedLoad =
-        OpFlags == X86II::MO_DLLIMPORT || OpFlags == X86II::MO_GOTPCREL;
+    bool NeedLoad = OpFlags == X86II::MO_DLLIMPORT ||
+                    OpFlags == X86II::MO_GOTPCREL ||
+                    OpFlags == X86II::MO_COFFSTUB;
     unsigned CallOpc = NeedLoad
                            ? (Is64Bit ? X86::CALL64m : X86::CALL32m)
                            : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
@@ -3595,7 +3579,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
       addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                                 TII.get(Opc)), FI)
         .addReg(CopyReg);
-      Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
+      Opc = ResVT == MVT::f32 ? X86::MOVSSrm_alt : X86::MOVSDrm_alt;
       addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                                 TII.get(Opc), ResultReg + i), FI);
     }
@@ -3662,24 +3646,19 @@ X86FastISel::fastSelectInstruction(const Instruction *I)  {
     return true;
   }
   case Instruction::BitCast: {
-    // Select SSE2/AVX bitcasts between 128/256 bit vector types.
+    // Select SSE2/AVX bitcasts between 128/256/512 bit vector types.
     if (!Subtarget->hasSSE2())
       return false;
 
-    EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
-    EVT DstVT = TLI.getValueType(DL, I->getType());
-
-    if (!SrcVT.isSimple() || !DstVT.isSimple())
+    MVT SrcVT, DstVT;
+    if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT) ||
+        !isTypeLegal(I->getType(), DstVT))
       return false;
 
-    MVT SVT = SrcVT.getSimpleVT();
-    MVT DVT = DstVT.getSimpleVT();
-
-    if (!SVT.is128BitVector() &&
-        !(Subtarget->hasAVX() && SVT.is256BitVector()) &&
-        !(Subtarget->hasAVX512() && SVT.is512BitVector() &&
-          (Subtarget->hasBWI() || (SVT.getScalarSizeInBits() >= 32 &&
-                                   DVT.getScalarSizeInBits() >= 32))))
+    // Only allow vectors that use xmm/ymm/zmm.
+    if (!SrcVT.isVector() || !DstVT.isVector() ||
+        SrcVT.getVectorElementType() == MVT::i1 ||
+        DstVT.getVectorElementType() == MVT::i1)
       return false;
 
     unsigned Reg = getRegForValue(I->getOperand(0));
@@ -3757,30 +3736,25 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
 
   // Get opcode and regclass of the output for the given load instruction.
   unsigned Opc = 0;
-  const TargetRegisterClass *RC = nullptr;
+  bool HasAVX = Subtarget->hasAVX();
+  bool HasAVX512 = Subtarget->hasAVX512();
   switch (VT.SimpleTy) {
   default: return 0;
   case MVT::f32:
-    if (X86ScalarSSEf32) {
-      Opc = Subtarget->hasAVX512()
-                ? X86::VMOVSSZrm
-                : Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
-      RC  = Subtarget->hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
-    } else {
+    if (X86ScalarSSEf32)
+      Opc = HasAVX512 ? X86::VMOVSSZrm_alt :
+            HasAVX    ? X86::VMOVSSrm_alt :
+                        X86::MOVSSrm_alt;
+    else
       Opc = X86::LD_Fp32m;
-      RC  = &X86::RFP32RegClass;
-    }
     break;
   case MVT::f64:
-    if (X86ScalarSSEf64) {
-      Opc = Subtarget->hasAVX512()
-                ? X86::VMOVSDZrm
-                : Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
-      RC  = Subtarget->hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
-    } else {
+    if (X86ScalarSSEf64)
+      Opc = HasAVX512 ? X86::VMOVSDZrm_alt :
+            HasAVX    ? X86::VMOVSDrm_alt :
+                        X86::MOVSDrm_alt;
+    else
       Opc = X86::LD_Fp64m;
-      RC  = &X86::RFP64RegClass;
-    }
     break;
   case MVT::f80:
     // No f80 support yet.
@@ -3806,7 +3780,7 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
 
   // Create the load from the constant pool.
   unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
-  unsigned ResultReg = createResultReg(RC);
+  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
 
   if (CM == CodeModel::Large) {
     unsigned AddrReg = createResultReg(&X86::GR64RegClass);
@@ -3916,33 +3890,26 @@ unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
   // Get opcode and regclass for the given zero.
   bool HasAVX512 = Subtarget->hasAVX512();
   unsigned Opc = 0;
-  const TargetRegisterClass *RC = nullptr;
   switch (VT.SimpleTy) {
   default: return 0;
   case MVT::f32:
-    if (X86ScalarSSEf32) {
+    if (X86ScalarSSEf32)
       Opc = HasAVX512 ? X86::AVX512_FsFLD0SS : X86::FsFLD0SS;
-      RC  = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
-    } else {
+    else
       Opc = X86::LD_Fp032;
-      RC  = &X86::RFP32RegClass;
-    }
     break;
   case MVT::f64:
-    if (X86ScalarSSEf64) {
+    if (X86ScalarSSEf64)
       Opc = HasAVX512 ? X86::AVX512_FsFLD0SD : X86::FsFLD0SD;
-      RC  = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
-    } else {
+    else
       Opc = X86::LD_Fp064;
-      RC  = &X86::RFP64RegClass;
-    }
     break;
   case MVT::f80:
     // No f80 support yet.
     return 0;
   }
 
-  unsigned ResultReg = createResultReg(RC);
+  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
   return ResultReg;
 }
@@ -3992,6 +3959,7 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
   }
 
   Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
+  Result->cloneInstrSymbols(*FuncInfo.MF, *MI);
   MachineBasicBlock::iterator I(MI);
   removeDeadCode(I, std::next(I));
   return true;
diff --git a/lib/Target/X86/X86FixupBWInsts.cpp b/lib/Target/X86/X86FixupBWInsts.cpp
index ed297e678203..bf541d933790 100644
--- a/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/lib/Target/X86/X86FixupBWInsts.cpp
@@ -1,9 +1,8 @@
 //===-- X86FixupBWInsts.cpp - Fixup Byte or Word instructions -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -103,9 +102,7 @@ public:
 
   StringRef getPassName() const override { return FIXUPBW_DESC; }
 
-  FixupBWInstPass() : MachineFunctionPass(ID) {
-    initializeFixupBWInstPassPass(*PassRegistry::getPassRegistry());
-  }
+  FixupBWInstPass() : MachineFunctionPass(ID) { }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<MachineLoopInfo>(); // Machine loop info is used to
@@ -151,7 +148,7 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) {
 
   this->MF = &MF;
   TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
-  OptForSize = MF.getFunction().optForSize();
+  OptForSize = MF.getFunction().hasOptSize();
   MLI = &getAnalysis<MachineLoopInfo>();
   LiveRegs.init(TII->getRegisterInfo());
 
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index a346085a52cb..041529a0be68 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -1,15 +1,14 @@
 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file defines the pass that finds instructions that can be
 // re-written as LEA instructions in order to reduce pipeline delays.
-// When optimizing for size it replaces suitable LEAs with INC or DEC.
+// It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
 //
 //===----------------------------------------------------------------------===//
 
@@ -36,31 +35,25 @@ namespace {
 class FixupLEAPass : public MachineFunctionPass {
   enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
 
-  /// Loop over all of the instructions in the basic block
-  /// replacing applicable instructions with LEA instructions,
-  /// where appropriate.
-  bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI,
-                         bool IsSlowLEA, bool IsSlow3OpsLEA);
-
   /// Given a machine register, look for the instruction
   /// which writes it in the current basic block. If found,
   /// try to replace it with an equivalent LEA instruction.
   /// If replacement succeeds, then also process the newly created
   /// instruction.
   void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
-                    MachineFunction::iterator MFI);
+                    MachineBasicBlock &MBB);
 
   /// Given a memory access or LEA instruction
   /// whose address mode uses a base and/or index register, look for
   /// an opportunity to replace the instruction which sets the base or index
   /// register with an equivalent LEA instruction.
   void processInstruction(MachineBasicBlock::iterator &I,
-                          MachineFunction::iterator MFI);
+                          MachineBasicBlock &MBB);
 
   /// Given a LEA instruction which is unprofitable
   /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
   void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
-                                    MachineFunction::iterator MFI);
+                                    MachineBasicBlock &MBB);
 
   /// Given a LEA instruction which is unprofitable
   /// on SNB+ try to replace it with other instructions.
@@ -75,12 +68,13 @@ class FixupLEAPass : public MachineFunctionPass {
   /// - LEA that uses 16-bit addressing mode "
   /// This function currently handles the first 2 cases only.
   MachineInstr *processInstrForSlow3OpLEA(MachineInstr &MI,
-                                          MachineFunction::iterator MFI);
+                                          MachineBasicBlock &MBB);
 
-  /// Look for LEAs that add 1 to reg or subtract 1 from reg
-  /// and convert them to INC or DEC respectively.
-  bool fixupIncDec(MachineBasicBlock::iterator &I,
-                   MachineFunction::iterator MFI) const;
+  /// Look for LEAs that are really two address LEAs that we might be able to
+  /// turn into regular ADD instructions.
+  bool optTwoAddrLEA(MachineBasicBlock::iterator &I,
+                     MachineBasicBlock &MBB, bool OptIncDec,
+                     bool UseLEAForSP) const;
 
   /// Determine if an instruction references a machine register
   /// and, if so, whether it reads or writes the register.
@@ -91,12 +85,12 @@ class FixupLEAPass : public MachineFunctionPass {
   /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
   MachineBasicBlock::iterator searchBackwards(MachineOperand &p,
                                               MachineBasicBlock::iterator &I,
-                                              MachineFunction::iterator MFI);
+                                              MachineBasicBlock &MBB);
 
   /// if an instruction can be converted to an
   /// equivalent LEA, insert the new instruction into the basic block
   /// and return a pointer to it. Otherwise, return zero.
-  MachineInstr *postRAConvertToLEA(MachineFunction::iterator &MFI,
+  MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator &MBBI) const;
 
 public:
@@ -104,9 +98,7 @@ public:
 
   StringRef getPassName() const override { return FIXUPLEA_DESC; }
 
-  FixupLEAPass() : MachineFunctionPass(ID) {
-    initializeFixupLEAPassPass(*PassRegistry::getPassRegistry());
-  }
+  FixupLEAPass() : MachineFunctionPass(ID) { }
 
   /// Loop over all of the basic blocks,
   /// replacing instructions by equivalent LEA instructions
@@ -121,10 +113,8 @@ public:
 
 private:
   TargetSchedModel TSM;
-  MachineFunction *MF;
-  const X86InstrInfo *TII; // Machine instruction info.
-  bool OptIncDec;
-  bool OptLEA;
+  const X86InstrInfo *TII;
+  const X86RegisterInfo *TRI;
 };
 }
 
@@ -133,7 +123,7 @@ char FixupLEAPass::ID = 0;
 INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
 
 MachineInstr *
-FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
+FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator &MBBI) const {
   MachineInstr &MI = *MBBI;
   switch (MI.getOpcode()) {
@@ -142,7 +132,7 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
     const MachineOperand &Src = MI.getOperand(1);
     const MachineOperand &Dest = MI.getOperand(0);
     MachineInstr *NewMI =
-        BuildMI(*MF, MI.getDebugLoc(),
+        BuildMI(MBB, MBBI, MI.getDebugLoc(),
                 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
                                                         : X86::LEA64r))
             .add(Dest)
@@ -151,9 +141,17 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
             .addReg(0)
             .addImm(0)
             .addReg(0);
-    MFI->insert(MBBI, NewMI); // Insert the new inst
     return NewMI;
   }
+  }
+
+  if (!MI.isConvertibleTo3Addr())
+    return nullptr;
+
+  switch (MI.getOpcode()) {
+  default:
+    // Only convert instructions that we've verified are safe.
+    return nullptr;
   case X86::ADD64ri32:
   case X86::ADD64ri8:
   case X86::ADD64ri32_DB:
@@ -162,52 +160,80 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
   case X86::ADD32ri8:
   case X86::ADD32ri_DB:
   case X86::ADD32ri8_DB:
-  case X86::ADD16ri:
-  case X86::ADD16ri8:
-  case X86::ADD16ri_DB:
-  case X86::ADD16ri8_DB:
     if (!MI.getOperand(2).isImm()) {
       // convertToThreeAddress will call getImm()
       // which requires isImm() to be true
       return nullptr;
     }
     break;
-  case X86::ADD16rr:
-  case X86::ADD16rr_DB:
-    if (MI.getOperand(1).getReg() != MI.getOperand(2).getReg()) {
-      // if src1 != src2, then convertToThreeAddress will
-      // need to create a Virtual register, which we cannot do
-      // after register allocation.
-      return nullptr;
-    }
+  case X86::SHL64ri:
+  case X86::SHL32ri:
+  case X86::INC64r:
+  case X86::INC32r:
+  case X86::DEC64r:
+  case X86::DEC32r:
+  case X86::ADD64rr:
+  case X86::ADD64rr_DB:
+  case X86::ADD32rr:
+  case X86::ADD32rr_DB:
+    // These instructions are all fine to convert.
+    break;
   }
+  MachineFunction::iterator MFI = MBB.getIterator();
   return TII->convertToThreeAddress(MFI, MI, nullptr);
 }
 
 FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
 
-bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
-  if (skipFunction(Func.getFunction()))
+static bool isLEA(unsigned Opcode) {
+  return Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
+         Opcode == X86::LEA64_32r;
+}
+
+bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction()))
     return false;
 
-  MF = &Func;
-  const X86Subtarget &ST = Func.getSubtarget<X86Subtarget>();
+  const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
   bool IsSlowLEA = ST.slowLEA();
   bool IsSlow3OpsLEA = ST.slow3OpsLEA();
+  bool LEAUsesAG = ST.LEAusesAG();
 
-  OptIncDec = !ST.slowIncDec() || Func.getFunction().optForMinSize();
-  OptLEA = ST.LEAusesAG() || IsSlowLEA || IsSlow3OpsLEA;
-
-  if (!OptLEA && !OptIncDec)
-    return false;
+  bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
+  bool UseLEAForSP = ST.useLeaForSP();
 
-  TSM.init(&Func.getSubtarget());
+  TSM.init(&ST);
   TII = ST.getInstrInfo();
+  TRI = ST.getRegisterInfo();
 
   LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
-  // Process all basic blocks.
-  for (MachineFunction::iterator I = Func.begin(), E = Func.end(); I != E; ++I)
-    processBasicBlock(Func, I, IsSlowLEA, IsSlow3OpsLEA);
+  for (MachineBasicBlock &MBB : MF) {
+    // First pass. Try to remove or optimize existing LEAs.
+    for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
+      if (!isLEA(I->getOpcode()))
+        continue;
+
+      if (optTwoAddrLEA(I, MBB, OptIncDec, UseLEAForSP))
+        continue;
+
+      if (IsSlowLEA) {
+        processInstructionForSlowLEA(I, MBB);
+      } else if (IsSlow3OpsLEA) {
+        if (auto *NewMI = processInstrForSlow3OpLEA(*I, MBB)) {
+          MBB.erase(I);
+          I = NewMI;
+        }
+      }
+    }
+
+    // Second pass for creating LEAs. This may reverse some of the
+    // transformations above.
+    if (LEAUsesAG) {
+      for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+        processInstruction(I, MBB);
+    }
+  }
+
   LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
 
   return true;
@@ -218,7 +244,7 @@ FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
   RegUsageState RegUsage = RU_NotUsed;
   MachineInstr &MI = *I;
 
-  for (unsigned int i = 0; i < MI.getNumOperands(); ++i) {
+  for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
     MachineOperand &opnd = MI.getOperand(i);
     if (opnd.isReg() && opnd.getReg() == p.getReg()) {
       if (opnd.isDef())
@@ -234,10 +260,10 @@ FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
 /// wrapping around to the last instruction of the block if the block
 /// branches to itself.
 static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
-                                    MachineFunction::iterator MFI) {
-  if (I == MFI->begin()) {
-    if (MFI->isPredecessor(&*MFI)) {
-      I = --MFI->end();
+                                    MachineBasicBlock &MBB) {
+  if (I == MBB.begin()) {
+    if (MBB.isPredecessor(&MBB)) {
+      I = --MBB.end();
       return true;
     } else
       return false;
@@ -248,14 +274,14 @@ static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
 
 MachineBasicBlock::iterator
 FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
-                              MachineFunction::iterator MFI) {
+                              MachineBasicBlock &MBB) {
   int InstrDistance = 1;
   MachineBasicBlock::iterator CurInst;
   static const int INSTR_DISTANCE_THRESHOLD = 5;
 
   CurInst = I;
   bool Found;
-  Found = getPreviousInstr(CurInst, MFI);
+  Found = getPreviousInstr(CurInst, MBB);
   while (Found && I != CurInst) {
     if (CurInst->isCall() || CurInst->isInlineAsm())
       break;
@@ -265,17 +291,12 @@ FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
       return CurInst;
     }
     InstrDistance += TSM.computeInstrLatency(&*CurInst);
-    Found = getPreviousInstr(CurInst, MFI);
+    Found = getPreviousInstr(CurInst, MBB);
   }
   return MachineBasicBlock::iterator();
 }
 
-static inline bool isLEA(const int Opcode) {
-  return Opcode == X86::LEA16r || Opcode == X86::LEA32r ||
-         Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
-}
-
-static inline bool isInefficientLEAReg(unsigned int Reg) {
+static inline bool isInefficientLEAReg(unsigned Reg) {
   return Reg == X86::EBP || Reg == X86::RBP ||
          Reg == X86::R13D || Reg == X86::R13;
 }
@@ -298,27 +319,24 @@ static inline bool hasLEAOffset(const MachineOperand &Offset) {
   return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal();
 }
 
-static inline int getADDrrFromLEA(int LEAOpcode) {
+static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
   switch (LEAOpcode) {
   default:
     llvm_unreachable("Unexpected LEA instruction");
-  case X86::LEA16r:
-    return X86::ADD16rr;
   case X86::LEA32r:
-    return X86::ADD32rr;
   case X86::LEA64_32r:
+    return X86::ADD32rr;
   case X86::LEA64r:
     return X86::ADD64rr;
   }
 }
 
-static inline int getADDriFromLEA(int LEAOpcode, const MachineOperand &Offset) {
+static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
+                                       const MachineOperand &Offset) {
   bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
   switch (LEAOpcode) {
   default:
     llvm_unreachable("Unexpected LEA instruction");
-  case X86::LEA16r:
-    return IsInt8 ? X86::ADD16ri8 : X86::ADD16ri;
   case X86::LEA32r:
   case X86::LEA64_32r:
     return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri;
@@ -327,56 +345,110 @@ static inline int getADDriFromLEA(int LEAOpcode, const MachineOperand &Offset) {
   }
 }
 
-/// isLEASimpleIncOrDec - Does this LEA have one these forms:
-/// lea  %reg, 1(%reg)
-/// lea  %reg, -1(%reg)
-static inline bool isLEASimpleIncOrDec(MachineInstr &LEA) {
-  unsigned SrcReg = LEA.getOperand(1 + X86::AddrBaseReg).getReg();
-  unsigned DstReg = LEA.getOperand(0).getReg();
-  const MachineOperand &AddrDisp = LEA.getOperand(1 + X86::AddrDisp);
-  return SrcReg == DstReg &&
-         LEA.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
-         LEA.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
-         AddrDisp.isImm() &&
-         (AddrDisp.getImm() == 1 || AddrDisp.getImm() == -1);
+static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
+  switch (LEAOpcode) {
+  default:
+    llvm_unreachable("Unexpected LEA instruction");
+  case X86::LEA32r:
+  case X86::LEA64_32r:
+    return IsINC ? X86::INC32r : X86::DEC32r;
+  case X86::LEA64r:
+    return IsINC ? X86::INC64r : X86::DEC64r;
+  }
 }
 
-bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I,
-                               MachineFunction::iterator MFI) const {
+bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
+                                 MachineBasicBlock &MBB, bool OptIncDec,
+                                 bool UseLEAForSP) const {
   MachineInstr &MI = *I;
-  int Opcode = MI.getOpcode();
-  if (!isLEA(Opcode))
+
+  const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
+  const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
+  const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
+  const MachineOperand &Disp =    MI.getOperand(1 + X86::AddrDisp);
+  const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
+
+  if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||
+      !TII->isSafeToClobberEFLAGS(MBB, I))
     return false;
 
-  if (isLEASimpleIncOrDec(MI) && TII->isSafeToClobberEFLAGS(*MFI, I)) {
-    int NewOpcode;
-    bool isINC = MI.getOperand(1 + X86::AddrDisp).getImm() == 1;
-    switch (Opcode) {
-    case X86::LEA16r:
-      NewOpcode = isINC ? X86::INC16r : X86::DEC16r;
-      break;
-    case X86::LEA32r:
-    case X86::LEA64_32r:
-      NewOpcode = isINC ? X86::INC32r : X86::DEC32r;
-      break;
-    case X86::LEA64r:
-      NewOpcode = isINC ? X86::INC64r : X86::DEC64r;
-      break;
-    }
+  unsigned DestReg  = MI.getOperand(0).getReg();
+  unsigned BaseReg  = Base.getReg();
+  unsigned IndexReg = Index.getReg();
 
-    MachineInstr *NewMI =
-        BuildMI(*MFI, I, MI.getDebugLoc(), TII->get(NewOpcode))
-            .add(MI.getOperand(0))
-            .add(MI.getOperand(1 + X86::AddrBaseReg));
-    MFI->erase(I);
-    I = static_cast<MachineBasicBlock::iterator>(NewMI);
-    return true;
+  // Don't change stack adjustment LEAs.
+  if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
+    return false;
+
+  // LEA64_32 has 64-bit operands but 32-bit result.
+  if (MI.getOpcode() == X86::LEA64_32r) {
+    if (BaseReg != 0)
+      BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
+    if (IndexReg != 0)
+      IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
   }
-  return false;
+
+  MachineInstr *NewMI = nullptr;
+
+  // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
+  // which can be turned into add %reg2, %reg1
+  if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
+      (DestReg == BaseReg || DestReg == IndexReg)) {
+    unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode());
+    if (DestReg != BaseReg)
+      std::swap(BaseReg, IndexReg);
+
+    if (MI.getOpcode() == X86::LEA64_32r) {
+      // TODO: Do we need the super register implicit use?
+      NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+        .addReg(BaseReg).addReg(IndexReg)
+        .addReg(Base.getReg(), RegState::Implicit)
+        .addReg(Index.getReg(), RegState::Implicit);
+    } else {
+      NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+        .addReg(BaseReg).addReg(IndexReg);
+    }
+  } else if (DestReg == BaseReg && IndexReg == 0) {
+    // This is an LEA with only a base register and a displacement,
+    // We can use ADDri or INC/DEC.
+
+    // Does this LEA have one these forms:
+    // lea  %reg, 1(%reg)
+    // lea  %reg, -1(%reg)
+    if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {
+      bool IsINC = Disp.getImm() == 1;
+      unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC);
+
+      if (MI.getOpcode() == X86::LEA64_32r) {
+        // TODO: Do we need the super register implicit use?
+        NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+          .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit);
+      } else {
+        NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+          .addReg(BaseReg);
+      }
+    } else {
+      unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp);
+      if (MI.getOpcode() == X86::LEA64_32r) {
+        // TODO: Do we need the super register implicit use?
+        NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+          .addReg(BaseReg).addImm(Disp.getImm())
+          .addReg(Base.getReg(), RegState::Implicit);
+      } else {
+        NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+          .addReg(BaseReg).addImm(Disp.getImm());
+      }
+    }
+  } else
+    return false;
+
+  MBB.erase(I);
+  I = NewMI;
+  return true;
 }
 
 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
-                                      MachineFunction::iterator MFI) {
+                                      MachineBasicBlock &MBB) {
   // Process a load, store, or LEA instruction.
   MachineInstr &MI = *I;
   const MCInstrDesc &Desc = MI.getDesc();
@@ -385,40 +457,38 @@ void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
     AddrOffset += X86II::getOperandBias(Desc);
     MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg);
     if (p.isReg() && p.getReg() != X86::ESP) {
-      seekLEAFixup(p, I, MFI);
+      seekLEAFixup(p, I, MBB);
     }
     MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg);
     if (q.isReg() && q.getReg() != X86::ESP) {
-      seekLEAFixup(q, I, MFI);
+      seekLEAFixup(q, I, MBB);
     }
   }
 }
 
 void FixupLEAPass::seekLEAFixup(MachineOperand &p,
                                 MachineBasicBlock::iterator &I,
-                                MachineFunction::iterator MFI) {
-  MachineBasicBlock::iterator MBI = searchBackwards(p, I, MFI);
+                                MachineBasicBlock &MBB) {
+  MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB);
   if (MBI != MachineBasicBlock::iterator()) {
-    MachineInstr *NewMI = postRAConvertToLEA(MFI, MBI);
+    MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI);
     if (NewMI) {
       ++NumLEAs;
       LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
       // now to replace with an equivalent LEA...
       LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
-      MFI->erase(MBI);
+      MBB.erase(MBI);
       MachineBasicBlock::iterator J =
           static_cast<MachineBasicBlock::iterator>(NewMI);
-      processInstruction(J, MFI);
+      processInstruction(J, MBB);
     }
   }
 }
 
 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
-                                                MachineFunction::iterator MFI) {
+                                                MachineBasicBlock &MBB) {
   MachineInstr &MI = *I;
-  const int Opcode = MI.getOpcode();
-  if (!isLEA(Opcode))
-    return;
+  const unsigned Opcode = MI.getOpcode();
 
   const MachineOperand &Dst =     MI.getOperand(0);
   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
@@ -428,7 +498,7 @@ void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
 
   if (Segment.getReg() != 0 || !Offset.isImm() ||
-      !TII->isSafeToClobberEFLAGS(*MFI, I))
+      !TII->isSafeToClobberEFLAGS(MBB, I))
     return;
   const unsigned DstR = Dst.getReg();
   const unsigned SrcR1 = Base.getReg();
@@ -445,7 +515,7 @@ void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
     const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
     const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
     NewMI =
-        BuildMI(*MFI, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
+        BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
     LLVM_DEBUG(NewMI->dump(););
   }
   // Make ADD instruction for immediate
@@ -453,24 +523,21 @@ void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
     const MCInstrDesc &ADDri =
         TII->get(getADDriFromLEA(Opcode, Offset));
     const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
-    NewMI = BuildMI(*MFI, I, MI.getDebugLoc(), ADDri, DstR)
+    NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR)
                 .add(SrcR)
                 .addImm(Offset.getImm());
     LLVM_DEBUG(NewMI->dump(););
   }
   if (NewMI) {
-    MFI->erase(I);
+    MBB.erase(I);
     I = NewMI;
   }
 }
 
 MachineInstr *
 FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
-                                        MachineFunction::iterator MFI) {
-
-  const int LEAOpcode = MI.getOpcode();
-  if (!isLEA(LEAOpcode))
-    return nullptr;
+                                        MachineBasicBlock &MBB) {
+  const unsigned LEAOpcode = MI.getOpcode();
 
   const MachineOperand &Dst =     MI.getOperand(0);
   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
@@ -481,13 +548,13 @@ FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
 
   if (!(TII->isThreeOperandsLEA(MI) ||
         hasInefficientLEABaseReg(Base, Index)) ||
-      !TII->isSafeToClobberEFLAGS(*MFI, MI) ||
+      !TII->isSafeToClobberEFLAGS(MBB, MI) ||
       Segment.getReg() != X86::NoRegister)
     return nullptr;
 
-  unsigned int DstR = Dst.getReg();
-  unsigned int BaseR = Base.getReg();
-  unsigned int IndexR = Index.getReg();
+  unsigned DstR = Dst.getReg();
+  unsigned BaseR = Base.getReg();
+  unsigned IndexR = Index.getReg();
   unsigned SSDstR =
       (LEAOpcode == X86::LEA64_32r) ? getX86SubSuperRegister(DstR, 64) : DstR;
   bool IsScale1 = Scale.getImm() == 1;
@@ -516,11 +583,11 @@ FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
   if (IsScale1 && (DstR == BaseR || DstR == IndexR)) {
     const MachineOperand &Src = DstR == BaseR ? Index : Base;
     MachineInstr *NewMI =
-        BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Src);
+        BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Src);
     LLVM_DEBUG(NewMI->dump(););
     // Create ADD instruction for the Offset in case of 3-Ops LEA.
     if (hasLEAOffset(Offset)) {
-      NewMI = BuildMI(*MFI, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
+      NewMI = BuildMI(MBB, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
       LLVM_DEBUG(NewMI->dump(););
     }
     return NewMI;
@@ -530,7 +597,7 @@ FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
   // lea offset(%base,%index,scale),%dst =>
   // lea (%base,%index,scale); add offset,%dst
   if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
-    MachineInstr *NewMI = BuildMI(*MFI, MI, DL, TII->get(LEAOpcode))
+    MachineInstr *NewMI = BuildMI(MBB, MI, DL, TII->get(LEAOpcode))
                               .add(Dst)
                               .add(IsInefficientBase ? Index : Base)
                               .add(Scale)
@@ -540,7 +607,7 @@ FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
     LLVM_DEBUG(NewMI->dump(););
     // Create ADD instruction for the Offset in case of 3-Ops LEA.
     if (hasLEAOffset(Offset)) {
-      NewMI = BuildMI(*MFI, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
+      NewMI = BuildMI(MBB, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
       LLVM_DEBUG(NewMI->dump(););
     }
     return NewMI;
@@ -552,17 +619,17 @@ FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
   // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
   if (IsScale1 && !hasLEAOffset(Offset)) {
     bool BIK = Base.isKill() && BaseR != IndexR;
-    TII->copyPhysReg(*MFI, MI, DL, DstR, BaseR, BIK);
+    TII->copyPhysReg(MBB, MI, DL, DstR, BaseR, BIK);
     LLVM_DEBUG(MI.getPrevNode()->dump(););
 
     MachineInstr *NewMI =
-        BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Index);
+        BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Index);
     LLVM_DEBUG(NewMI->dump(););
     return NewMI;
   }
   // lea offset(%base,%index,scale), %dst =>
   // lea offset( ,%index,scale), %dst; add %base,%dst
-  MachineInstr *NewMI = BuildMI(*MFI, MI, DL, TII->get(LEAOpcode))
+  MachineInstr *NewMI = BuildMI(MBB, MI, DL, TII->get(LEAOpcode))
                             .add(Dst)
                             .addReg(0)
                             .add(Scale)
@@ -571,35 +638,7 @@ FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
                             .add(Segment);
   LLVM_DEBUG(NewMI->dump(););
 
-  NewMI = BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Base);
+  NewMI = BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Base);
   LLVM_DEBUG(NewMI->dump(););
   return NewMI;
 }
-
-bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
-                                     MachineFunction::iterator MFI,
-                                     bool IsSlowLEA, bool IsSlow3OpsLEA) {
-  for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) {
-    if (OptIncDec)
-      if (fixupIncDec(I, MFI))
-        continue;
-
-    if (OptLEA) {
-      if (IsSlowLEA) {
-        processInstructionForSlowLEA(I, MFI);
-        continue;
-      }
-      
-      if (IsSlow3OpsLEA) {
-        if (auto *NewMI = processInstrForSlow3OpLEA(*I, MFI)) {
-          MFI->erase(I);
-          I = NewMI;
-        }
-        continue;
-      }
-
-      processInstruction(I, MFI);
-    }
-  }
-  return false;
-}
diff --git a/lib/Target/X86/X86FixupSetCC.cpp b/lib/Target/X86/X86FixupSetCC.cpp
index a86eb997635e..e2d4d1ede6f3 100644
--- a/lib/Target/X86/X86FixupSetCC.cpp
+++ b/lib/Target/X86/X86FixupSetCC.cpp
@@ -1,9 +1,8 @@
 //===---- X86FixupSetCC.cpp - optimize usage of LEA instructions ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -68,30 +67,6 @@ char X86FixupSetCCPass::ID = 0;
 
 FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); }
 
-bool X86FixupSetCCPass::isSetCCr(unsigned Opcode) {
-  switch (Opcode) {
-  default:
-    return false;
-  case X86::SETOr:
-  case X86::SETNOr:
-  case X86::SETBr:
-  case X86::SETAEr:
-  case X86::SETEr:
-  case X86::SETNEr:
-  case X86::SETBEr:
-  case X86::SETAr:
-  case X86::SETSr:
-  case X86::SETNSr:
-  case X86::SETPr:
-  case X86::SETNPr:
-  case X86::SETLr:
-  case X86::SETGEr:
-  case X86::SETLEr:
-  case X86::SETGr:
-    return true;
-  }
-}
-
 // We expect the instruction *immediately* before the setcc to imp-def
 // EFLAGS (because of scheduling glue). To make this less brittle w.r.t
 // scheduling, look backwards until we hit the beginning of the
@@ -103,7 +78,7 @@ X86FixupSetCCPass::findFlagsImpDef(MachineBasicBlock *MBB,
   auto MBBStart = MBB->rend();
   for (int i = 0; (i < SearchBound) && (MI != MBBStart); ++i, ++MI)
     for (auto &Op : MI->implicit_operands())
-      if ((Op.getReg() == X86::EFLAGS) && (Op.isDef()))
+      if (Op.isReg() && (Op.getReg() == X86::EFLAGS) && Op.isDef())
         return &*MI;
 
   return nullptr;
@@ -111,7 +86,7 @@ X86FixupSetCCPass::findFlagsImpDef(MachineBasicBlock *MBB,
 
 bool X86FixupSetCCPass::impUsesFlags(MachineInstr *MI) {
   for (auto &Op : MI->implicit_operands())
-    if ((Op.getReg() == X86::EFLAGS) && (Op.isUse()))
+    if (Op.isReg() && (Op.getReg() == X86::EFLAGS) && Op.isUse())
       return true;
 
   return false;
@@ -129,7 +104,7 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
       // Find a setcc that is used by a zext.
       // This doesn't have to be the only use, the transformation is safe
       // regardless.
-      if (!isSetCCr(MI.getOpcode()))
+      if (MI.getOpcode() != X86::SETCCr)
         continue;
 
       MachineInstr *ZExt = nullptr;
diff --git a/lib/Target/X86/X86FlagsCopyLowering.cpp b/lib/Target/X86/X86FlagsCopyLowering.cpp
index 778aa505b2d9..5ce3255ea96a 100644
--- a/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -1,9 +1,8 @@
 //====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -71,12 +70,6 @@ STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted");
 STATISTIC(NumTestsInserted, "Number of test instructions inserted");
 STATISTIC(NumAddsInserted, "Number of adds instructions inserted");
 
-namespace llvm {
-
-void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
-
-} // end namespace llvm
-
 namespace {
 
 // Convenient array type for storing registers associated with each condition.
@@ -84,9 +77,7 @@ using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>;
 
 class X86FlagsCopyLoweringPass : public MachineFunctionPass {
 public:
-  X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) {
-    initializeX86FlagsCopyLoweringPassPass(*PassRegistry::getPassRegistry());
-  }
+  X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) { }
 
   StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; }
   bool runOnMachineFunction(MachineFunction &MF) override;
@@ -252,13 +243,13 @@ static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB,
          "Split instruction must be in the split block!");
   assert(SplitI.isBranch() &&
          "Only designed to split a tail of branch instructions!");
-  assert(X86::getCondFromBranchOpc(SplitI.getOpcode()) != X86::COND_INVALID &&
+  assert(X86::getCondFromBranch(SplitI) != X86::COND_INVALID &&
          "Must split on an actual jCC instruction!");
 
   // Dig out the previous instruction to the split point.
   MachineInstr &PrevI = *std::prev(SplitI.getIterator());
   assert(PrevI.isBranch() && "Must split after a branch!");
-  assert(X86::getCondFromBranchOpc(PrevI.getOpcode()) != X86::COND_INVALID &&
+  assert(X86::getCondFromBranch(PrevI) != X86::COND_INVALID &&
          "Must split after an actual jCC instruction!");
   assert(!std::prev(PrevI.getIterator())->isTerminator() &&
          "Must only have this one terminator prior to the split!");
@@ -588,22 +579,21 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
         // branch folding or black placement. As a consequence, we get to deal
         // with the simpler formulation of conditional branches followed by tail
         // calls.
-        if (X86::getCondFromBranchOpc(MI.getOpcode()) != X86::COND_INVALID) {
+        if (X86::getCondFromBranch(MI) != X86::COND_INVALID) {
           auto JmpIt = MI.getIterator();
           do {
             JmpIs.push_back(&*JmpIt);
             ++JmpIt;
           } while (JmpIt != UseMBB.instr_end() &&
-                   X86::getCondFromBranchOpc(JmpIt->getOpcode()) !=
+                   X86::getCondFromBranch(*JmpIt) !=
                        X86::COND_INVALID);
           break;
         }
 
         // Otherwise we can just rewrite in-place.
-        if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) {
+        if (X86::getCondFromCMov(MI) != X86::COND_INVALID) {
           rewriteCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
-        } else if (X86::getCondFromSETOpc(MI.getOpcode()) !=
-                   X86::COND_INVALID) {
+        } else if (X86::getCondFromSETCC(MI) != X86::COND_INVALID) {
           rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
         } else if (MI.getOpcode() == TargetOpcode::COPY) {
           rewriteCopy(MI, *FlagUse, CopyDefI);
@@ -730,7 +720,7 @@ CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
   // Scan backwards across the range of instructions with live EFLAGS.
   for (MachineInstr &MI :
        llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) {
-    X86::CondCode Cond = X86::getCondFromSETOpc(MI.getOpcode());
+    X86::CondCode Cond = X86::getCondFromSETCC(MI);
     if (Cond != X86::COND_INVALID && !MI.mayStore() && MI.getOperand(0).isReg() &&
         TRI->isVirtualRegister(MI.getOperand(0).getReg())) {
       assert(MI.getOperand(0).isDef() &&
@@ -751,7 +741,7 @@ unsigned X86FlagsCopyLoweringPass::promoteCondToReg(
     DebugLoc TestLoc, X86::CondCode Cond) {
   unsigned Reg = MRI->createVirtualRegister(PromoteRC);
   auto SetI = BuildMI(TestMBB, TestPos, TestLoc,
-                      TII->get(X86::getSETFromCond(Cond)), Reg);
+                      TII->get(X86::SETCCr), Reg).addImm(Cond);
   (void)SetI;
   LLVM_DEBUG(dbgs() << "    save cond: "; SetI->dump());
   ++NumSetCCsInserted;
@@ -842,7 +832,7 @@ void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB,
                                            MachineOperand &FlagUse,
                                            CondRegArray &CondRegs) {
   // First get the register containing this specific condition.
-  X86::CondCode Cond = X86::getCondFromCMovOpc(CMovI.getOpcode());
+  X86::CondCode Cond = X86::getCondFromCMov(CMovI);
   unsigned CondReg;
   bool Inverted;
   std::tie(CondReg, Inverted) =
@@ -853,12 +843,10 @@ void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB,
   // Insert a direct test of the saved register.
   insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg);
 
-  // Rewrite the CMov to use the !ZF flag from the test (but match register
-  // size and memory operand), and then kill its use of the flags afterward.
-  auto &CMovRC = *MRI->getRegClass(CMovI.getOperand(0).getReg());
-  CMovI.setDesc(TII->get(X86::getCMovFromCond(
-      Inverted ? X86::COND_E : X86::COND_NE, TRI->getRegSizeInBits(CMovRC) / 8,
-      !CMovI.memoperands_empty())));
+  // Rewrite the CMov to use the !ZF flag from the test, and then kill its use
+  // of the flags afterward.
+  CMovI.getOperand(CMovI.getDesc().getNumOperands() - 1)
+      .setImm(Inverted ? X86::COND_E : X86::COND_NE);
   FlagUse.setIsKill(true);
   LLVM_DEBUG(dbgs() << "    fixed cmov: "; CMovI.dump());
 }
@@ -867,7 +855,7 @@ void X86FlagsCopyLoweringPass::rewriteCondJmp(
     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
     DebugLoc TestLoc, MachineInstr &JmpI, CondRegArray &CondRegs) {
   // First get the register containing this specific condition.
-  X86::CondCode Cond = X86::getCondFromBranchOpc(JmpI.getOpcode());
+  X86::CondCode Cond = X86::getCondFromBranch(JmpI);
   unsigned CondReg;
   bool Inverted;
   std::tie(CondReg, Inverted) =
@@ -880,10 +868,8 @@ void X86FlagsCopyLoweringPass::rewriteCondJmp(
 
   // Rewrite the jump to use the !ZF flag from the test, and kill its use of
   // flags afterward.
-  JmpI.setDesc(TII->get(
-      X86::GetCondBranchFromCond(Inverted ? X86::COND_E : X86::COND_NE)));
-  const int ImplicitEFLAGSOpIdx = 1;
-  JmpI.getOperand(ImplicitEFLAGSOpIdx).setIsKill(true);
+  JmpI.getOperand(1).setImm(Inverted ? X86::COND_E : X86::COND_NE);
+  JmpI.findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
   LLVM_DEBUG(dbgs() << "    fixed jCC: "; JmpI.dump());
 }
 
@@ -1026,7 +1012,7 @@ void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
                                             MachineInstr &SetCCI,
                                             MachineOperand &FlagUse,
                                             CondRegArray &CondRegs) {
-  X86::CondCode Cond = X86::getCondFromSETOpc(SetCCI.getOpcode());
+  X86::CondCode Cond = X86::getCondFromSETCC(SetCCI);
   // Note that we can't usefully rewrite this to the inverse without complex
   // analysis of the users of the setCC. Largely we rely on duplicates which
   // could have been avoided already being avoided here.
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index f330acff61a1..074cf21d03f5 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -1,9 +1,8 @@
 //===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -60,7 +59,6 @@ namespace {
   struct FPS : public MachineFunctionPass {
     static char ID;
     FPS() : MachineFunctionPass(ID) {
-      initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
       // This is really only to keep valgrind quiet.
       // The logic in isLive() is too much for it.
       memset(Stack, 0, sizeof(Stack));
@@ -299,9 +297,16 @@ namespace {
 
     void setKillFlags(MachineBasicBlock &MBB) const;
   };
-  char FPS::ID = 0;
 }
 
+char FPS::ID = 0;
+
+INITIALIZE_PASS_BEGIN(FPS, DEBUG_TYPE, "X86 FP Stackifier",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
+INITIALIZE_PASS_END(FPS, DEBUG_TYPE, "X86 FP Stackifier",
+                    false, false)
+
 FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
 
 /// getFPReg - Return the X86::FPx register number for the specified operand.
@@ -591,7 +596,7 @@ namespace {
 }
 
 static int Lookup(ArrayRef<TableEntry> Table, unsigned Opcode) {
-  const TableEntry *I = std::lower_bound(Table.begin(), Table.end(), Opcode);
+  const TableEntry *I = llvm::lower_bound(Table, Opcode);
   if (I != Table.end() && I->from == Opcode)
     return I->to;
   return -1;
@@ -1096,6 +1101,8 @@ void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
   // Change from the pseudo instruction to the concrete instruction.
   MI.RemoveOperand(0); // Remove the explicit ST(0) operand
   MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
+  MI.addOperand(
+      MachineOperand::CreateReg(X86::ST0, /*isDef*/ true, /*isImp*/ true));
 
   // Result gets pushed on the stack.
   pushReg(DestReg);
@@ -1140,6 +1147,8 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
   // Convert from the pseudo instruction to the concrete instruction.
   MI.RemoveOperand(NumOps - 1); // Remove explicit ST(0) operand
   MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
+  MI.addOperand(
+      MachineOperand::CreateReg(X86::ST0, /*isDef*/ false, /*isImp*/ true));
 
   if (MI.getOpcode() == X86::IST_FP64m || MI.getOpcode() == X86::ISTT_FP16m ||
       MI.getOpcode() == X86::ISTT_FP32m || MI.getOpcode() == X86::ISTT_FP64m ||
@@ -1369,8 +1378,6 @@ void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
 /// register arguments and no explicit destinations.
 ///
 void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
-  ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
-  ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
   MachineInstr &MI = *I;
 
   unsigned NumOperands = MI.getDesc().getNumOperands();
@@ -1475,7 +1482,8 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
     break;
   }
 
-  case TargetOpcode::INLINEASM: {
+  case TargetOpcode::INLINEASM:
+  case TargetOpcode::INLINEASM_BR: {
     // The inline asm MachineInstr currently only *uses* FP registers for the
     // 'f' constraint.  These should be turned into the current ST(x) register
     // in the machine instr.
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 984db12201ed..e310fe069117 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -1,9 +1,8 @@
 //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -585,23 +584,23 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
   // registers. For the prolog expansion we use RAX, RCX and RDX.
   MachineRegisterInfo &MRI = MF.getRegInfo();
   const TargetRegisterClass *RegClass = &X86::GR64RegClass;
-  const unsigned SizeReg = InProlog ? (unsigned)X86::RAX
+  const Register SizeReg = InProlog ? X86::RAX
                                     : MRI.createVirtualRegister(RegClass),
-                 ZeroReg = InProlog ? (unsigned)X86::RCX
+                 ZeroReg = InProlog ? X86::RCX
                                     : MRI.createVirtualRegister(RegClass),
-                 CopyReg = InProlog ? (unsigned)X86::RDX
+                 CopyReg = InProlog ? X86::RDX
                                     : MRI.createVirtualRegister(RegClass),
-                 TestReg = InProlog ? (unsigned)X86::RDX
+                 TestReg = InProlog ? X86::RDX
                                     : MRI.createVirtualRegister(RegClass),
-                 FinalReg = InProlog ? (unsigned)X86::RDX
+                 FinalReg = InProlog ? X86::RDX
                                      : MRI.createVirtualRegister(RegClass),
-                 RoundedReg = InProlog ? (unsigned)X86::RDX
+                 RoundedReg = InProlog ? X86::RDX
                                        : MRI.createVirtualRegister(RegClass),
-                 LimitReg = InProlog ? (unsigned)X86::RCX
+                 LimitReg = InProlog ? X86::RCX
                                      : MRI.createVirtualRegister(RegClass),
-                 JoinReg = InProlog ? (unsigned)X86::RCX
+                 JoinReg = InProlog ? X86::RCX
                                     : MRI.createVirtualRegister(RegClass),
-                 ProbeReg = InProlog ? (unsigned)X86::RCX
+                 ProbeReg = InProlog ? X86::RCX
                                      : MRI.createVirtualRegister(RegClass);
 
   // SP-relative offsets where we can save RCX and RDX.
@@ -654,9 +653,10 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
   BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
       .addReg(CopyReg)
       .addReg(SizeReg);
-  BuildMI(&MBB, DL, TII.get(X86::CMOVB64rr), FinalReg)
+  BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
       .addReg(TestReg)
-      .addReg(ZeroReg);
+      .addReg(ZeroReg)
+      .addImm(X86::COND_B);
 
   // FinalReg now holds final stack pointer value, or zero if
   // allocation would overflow. Compare against the current stack
@@ -673,7 +673,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
       .addReg(X86::GS);
   BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
   // Jump if the desired stack pointer is at or above the stack limit.
-  BuildMI(&MBB, DL, TII.get(X86::JAE_1)).addMBB(ContinueMBB);
+  BuildMI(&MBB, DL, TII.get(X86::JCC_1)).addMBB(ContinueMBB).addImm(X86::COND_AE);
 
   // Add code to roundMBB to round the final stack pointer to a page boundary.
   RoundMBB->addLiveIn(FinalReg);
@@ -710,7 +710,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
   BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
       .addReg(RoundedReg)
       .addReg(ProbeReg);
-  BuildMI(LoopMBB, DL, TII.get(X86::JNE_1)).addMBB(LoopMBB);
+  BuildMI(LoopMBB, DL, TII.get(X86::JCC_1)).addMBB(LoopMBB).addImm(X86::COND_NE);
 
   MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
 
@@ -794,8 +794,8 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
         .addExternalSymbol(MF.createExternalSymbolName(Symbol));
   }
 
-  unsigned AX = Is64Bit ? X86::RAX : X86::EAX;
-  unsigned SP = Is64Bit ? X86::RSP : X86::ESP;
+  unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
+  unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
   CI.addReg(AX, RegState::Implicit)
       .addReg(SP, RegState::Implicit)
       .addReg(AX, RegState::Define | RegState::Implicit)
@@ -809,7 +809,7 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
     // adjusting %rsp.
     // All other platforms do not specify a particular ABI for the stack probe
     // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
-    BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Is64Bit)), SP)
+    BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Uses64BitFramePtr)), SP)
         .addReg(SP)
         .addReg(AX);
   }
@@ -872,6 +872,17 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
   MI->getOperand(3).setIsDead();
 }
 
+bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {
+  // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
+  // clobbered by any interrupt handler.
+  assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
+         "MF used frame lowering for wrong subtarget");
+  const Function &Fn = MF.getFunction();
+  const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
+  return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
+}
+
+
 /// emitPrologue - Push callee-saved registers onto the stack, which
 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
 /// space for local variables. Also emit labels used by the exception handler to
@@ -976,7 +987,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
       MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
   bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
   bool HasFP = hasFP(MF);
-  bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
   bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
   bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
   // FIXME: Emit FPO data for EH funclets.
@@ -1030,12 +1040,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   // pointer, calls, or dynamic alloca then we do not need to adjust the
   // stack pointer (we fit in the Red Zone). We also check that we don't
   // push and pop from the stack.
-  if (Is64Bit && !Fn.hasFnAttribute(Attribute::NoRedZone) &&
+  if (has128ByteRedZone(MF) &&
       !TRI->needsStackRealignment(MF) &&
       !MFI.hasVarSizedObjects() &&             // No dynamic alloca.
       !MFI.adjustsStack() &&                   // No calls.
       !UseStackProbe &&                        // No stack probes.
-      !IsWin64CC &&                            // Win64 has no Red Zone
       !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
       !MF.shouldSplitStack()) {                // Regular stack
     uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
@@ -1774,6 +1783,15 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
   bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
   int64_t FPDelta = 0;
 
+  // In an x86 interrupt, remove the offset we added to account for the return
+  // address from any stack object allocated in the caller's frame. Interrupts
+  // do not have a standard return address. Fixed objects in the current frame,
+  // such as SSE register spills, should not get this treatment.
+  if (MF.getFunction().getCallingConv() == CallingConv::X86_INTR &&
+      Offset >= 0) {
+    Offset += getOffsetOfLocalArea();
+  }
+
   if (IsWin64Prologue) {
     assert(!MFI.hasCalls() || (StackSize % 16) == 8);
 
@@ -1888,8 +1906,7 @@ X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF,
   // If !hasReservedCallFrame the function might have SP adjustement in the
   // body.  So, even though the offset is statically known, it depends on where
   // we are in the function.
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
-  if (!IgnoreSPUpdates && !TFI->hasReservedCallFrame(MF))
+  if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
     return getFrameIndexReference(MF, FI, FrameReg);
 
   // We don't handle tail calls, and shouldn't be seeing them either.
@@ -2407,7 +2424,7 @@ void X86FrameLowering::adjustForSegmentedStacks(
 
   // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
   // It jumps to normal execution of the function body.
-  BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&PrologueMBB);
+  BuildMI(checkMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_A);
 
   // On 32 bit we first push the arguments size and then the frame size. On 64
   // bit, we pass the stack frame size in r10 and the argument size in r11.
@@ -2637,7 +2654,7 @@ void X86FrameLowering::adjustForHiPEPrologue(
     // SPLimitOffset is in a fixed heap location (pointed by BP).
     addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
                  .addReg(ScratchReg), PReg, false, SPLimitOffset);
-    BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&PrologueMBB);
+    BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_AE);
 
     // Create new MBB for IncStack:
     BuildMI(incStackMBB, DL, TII.get(CALLop)).
@@ -2646,7 +2663,7 @@ void X86FrameLowering::adjustForHiPEPrologue(
                  SPReg, false, -MaxStack);
     addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
                  .addReg(ScratchReg), PReg, false, SPLimitOffset);
-    BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB);
+    BuildMI(incStackMBB, DL, TII.get(X86::JCC_1)).addMBB(incStackMBB).addImm(X86::COND_LE);
 
     stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
     stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
@@ -2802,7 +2819,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
       StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
 
       if (StackAdjustment) {
-        if (!(F.optForMinSize() &&
+        if (!(F.hasMinSize() &&
               adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
           BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
                                /*InEpilogue=*/false);
@@ -3079,8 +3096,7 @@ void X86FrameLowering::orderFrameObjects(
 
   // Sort the objects using X86FrameSortingAlgorithm (see its comment for
   // info).
-  std::stable_sort(SortingObjects.begin(), SortingObjects.end(),
-                   X86FrameSortingComparator());
+  llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
 
   // Now modify the original list to represent the final order that
   // we want. The order will depend on whether we're going to access them
@@ -3154,7 +3170,7 @@ void X86FrameLowering::processFunctionBeforeFrameFinalized(
   MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
   int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
   int UnwindHelpFI =
-      MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*Immutable=*/false);
+      MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
   EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
 
   // Store -2 into UnwindHelp on function entry. We have to scan forwards past
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 3bd805aae123..d32746e3a36e 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -1,9 +1,8 @@
 //===-- X86TargetFrameLowering.h - Define frame lowering for X86 -*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -172,6 +171,10 @@ public:
 
   unsigned getInitialCFARegister(const MachineFunction &MF) const override;
 
+  /// Return true if the function has a redzone (accessible bytes past the
+  /// frame of the top of stack function) as part of it's ABI.  
+  bool has128ByteRedZone(const MachineFunction& MF) const;
+
 private:
   uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
 
diff --git a/lib/Target/X86/X86GenRegisterBankInfo.def b/lib/Target/X86/X86GenRegisterBankInfo.def
index 9cd3f96f83ac..0fdea9071c29 100644
--- a/lib/Target/X86/X86GenRegisterBankInfo.def
+++ b/lib/Target/X86/X86GenRegisterBankInfo.def
@@ -1,9 +1,8 @@
 //===- X86GenRegisterBankInfo.def ----------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 5ac153244df9..95d31e62cafc 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -74,6 +73,7 @@ namespace {
     int JT;
     unsigned Align;    // CP alignment.
     unsigned char SymbolFlags;  // X86II::MO_*
+    bool NegateIndex = false;
 
     X86ISelAddressMode()
         : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
@@ -116,6 +116,8 @@ namespace {
         dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
       dbgs() << " Scale " << Scale << '\n'
              << "IndexReg ";
+      if (NegateIndex)
+        dbgs() << "negate ";
       if (IndexReg.getNode())
         IndexReg.getNode()->dump(DAG);
       else
@@ -170,8 +172,8 @@ namespace {
 
   public:
     explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
-        : SelectionDAGISel(tm, OptLevel), OptForSize(false),
-          OptForMinSize(false) {}
+        : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), OptForSize(false),
+          OptForMinSize(false), IndirectTlsSegRefs(false) {}
 
     StringRef getPassName() const override {
       return "X86 DAG->DAG Instruction Selection";
@@ -182,6 +184,13 @@ namespace {
       Subtarget = &MF.getSubtarget<X86Subtarget>();
       IndirectTlsSegRefs = MF.getFunction().hasFnAttribute(
                              "indirect-tls-seg-refs");
+
+      // OptFor[Min]Size are used in pattern predicates that isel is matching.
+      OptForSize = MF.getFunction().hasOptSize();
+      OptForMinSize = MF.getFunction().hasMinSize();
+      assert((!OptForMinSize || OptForSize) &&
+             "OptForMinSize implies OptForSize");
+
       SelectionDAGISel::runOnMachineFunction(MF);
       return true;
     }
@@ -204,7 +213,7 @@ namespace {
     bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
     bool matchAddress(SDValue N, X86ISelAddressMode &AM);
     bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
-    bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
+    bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth);
     bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
                                  unsigned Depth);
     bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
@@ -252,16 +261,32 @@ namespace {
     void emitSpecialCodeForMain();
 
     inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
-                                   SDValue &Base, SDValue &Scale,
+                                   MVT VT, SDValue &Base, SDValue &Scale,
                                    SDValue &Index, SDValue &Disp,
                                    SDValue &Segment) {
-      Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
-                 ? CurDAG->getTargetFrameIndex(
-                       AM.Base_FrameIndex,
-                       TLI->getPointerTy(CurDAG->getDataLayout()))
-                 : AM.Base_Reg;
+      if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+        Base = CurDAG->getTargetFrameIndex(
+            AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout()));
+      else if (AM.Base_Reg.getNode())
+        Base = AM.Base_Reg;
+      else
+        Base = CurDAG->getRegister(0, VT);
+
       Scale = getI8Imm(AM.Scale, DL);
-      Index = AM.IndexReg;
+
+      // Negate the index if needed.
+      if (AM.NegateIndex) {
+        unsigned NegOpc = VT == MVT::i64 ? X86::NEG64r : X86::NEG32r;
+        SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32,
+                                                     AM.IndexReg), 0);
+        AM.IndexReg = Neg;
+      }
+
+      if (AM.IndexReg.getNode())
+        Index = AM.IndexReg;
+      else
+        Index = CurDAG->getRegister(0, VT);
+
       // These are 32-bit even in 64-bit mode since RIP-relative offset
       // is 32-bit.
       if (AM.GV)
@@ -290,7 +315,7 @@ namespace {
       if (AM.Segment.getNode())
         Segment = AM.Segment;
       else
-        Segment = CurDAG->getRegister(0, MVT::i32);
+        Segment = CurDAG->getRegister(0, MVT::i16);
     }
 
     // Utility function to determine whether we should avoid selecting
@@ -400,6 +425,19 @@ namespace {
       return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
     }
 
+    // Helper to detect unneeded and instructions on shift amounts. Called
+    // from PatFrags in tablegen.
+    bool isUnneededShiftMask(SDNode *N, unsigned Width) const {
+      assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
+      const APInt &Val = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+
+      if (Val.countTrailingOnes() >= Width)
+        return true;
+
+      APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero;
+      return Mask.countTrailingOnes() >= Width;
+    }
+
     /// Return an SDNode that returns the value of the global base register.
     /// Output instructions required to initialize the global base register,
     /// if necessary.
@@ -464,6 +502,8 @@ namespace {
     bool shrinkAndImmediate(SDNode *N);
     bool isMaskZeroExtended(SDNode *N) const;
     bool tryShiftAmountMod(SDNode *N);
+    bool tryShrinkShlLogicImm(SDNode *N);
+    bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
 
     MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
                                 const SDLoc &dl, MVT VT, SDNode *Node);
@@ -485,7 +525,7 @@ namespace {
 static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
   unsigned Opcode = N->getOpcode();
   if (Opcode == X86ISD::CMPM || Opcode == ISD::SETCC ||
-      Opcode == X86ISD::CMPM_RND || Opcode == X86ISD::VFPCLASS) {
+      Opcode == X86ISD::CMPM_SAE || Opcode == X86ISD::VFPCLASS) {
     // We can get 256-bit 8 element types here without VLX being enabled. When
     // this happens we will use 512-bit operations and the mask will not be
     // zero extended.
@@ -497,7 +537,7 @@ static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
   }
   // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check.
   if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM ||
-      Opcode == X86ISD::FSETCCM_RND)
+      Opcode == X86ISD::FSETCCM_SAE)
     return true;
 
   return false;
@@ -571,6 +611,21 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
             Imm->getAPIntValue().getBitWidth() == 64 &&
             Imm->getAPIntValue().isIntN(32))
           return false;
+
+        // If this really a zext_inreg that can be represented with a movzx
+        // instruction, prefer that.
+        // TODO: We could shrink the load and fold if it is non-volatile.
+        if (U->getOpcode() == ISD::AND &&
+            (Imm->getAPIntValue() == UINT8_MAX ||
+             Imm->getAPIntValue() == UINT16_MAX ||
+             Imm->getAPIntValue() == UINT32_MAX))
+          return false;
+
+        // ADD/SUB with can negate the immediate and use the opposite operation
+        // to fit 128 into a sign extended 8 bit immediate.
+        if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) &&
+            (-Imm->getAPIntValue()).isSignedIntN(8))
+          return false;
       }
 
       // If the other operand is a TLS address, we should fold it instead.
@@ -720,11 +775,6 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
 }
 
 void X86DAGToDAGISel::PreprocessISelDAG() {
-  // OptFor[Min]Size are used in pattern predicates that isel is matching.
-  OptForSize = MF->getFunction().optForSize();
-  OptForMinSize = MF->getFunction().optForMinSize();
-  assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize");
-
   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
        E = CurDAG->allnodes_end(); I != E; ) {
     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
@@ -741,6 +791,143 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
       continue;
     }
 
+    switch (N->getOpcode()) {
+    case ISD::FP_TO_SINT:
+    case ISD::FP_TO_UINT: {
+      // Replace vector fp_to_s/uint with their X86 specific equivalent so we
+      // don't need 2 sets of patterns.
+      if (!N->getSimpleValueType(0).isVector())
+        break;
+
+      unsigned NewOpc;
+      switch (N->getOpcode()) {
+      default: llvm_unreachable("Unexpected opcode!");
+      case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break;
+      case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break;
+      }
+      SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
+                                    N->getOperand(0));
+      --I;
+      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+      ++I;
+      CurDAG->DeleteNode(N);
+      continue;
+    }
+    case ISD::SHL:
+    case ISD::SRA:
+    case ISD::SRL: {
+      // Replace vector shifts with their X86 specific equivalent so we don't
+      // need 2 sets of patterns.
+      if (!N->getValueType(0).isVector())
+        break;
+
+      unsigned NewOpc;
+      switch (N->getOpcode()) {
+      default: llvm_unreachable("Unexpected opcode!");
+      case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
+      case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
+      case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
+      }
+      SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
+                                    N->getOperand(0), N->getOperand(1));
+      --I;
+      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+      ++I;
+      CurDAG->DeleteNode(N);
+      continue;
+    }
+    case ISD::ANY_EXTEND:
+    case ISD::ANY_EXTEND_VECTOR_INREG: {
+      // Replace vector any extend with the zero extend equivalents so we don't
+      // need 2 sets of patterns. Ignore vXi1 extensions.
+      if (!N->getValueType(0).isVector() ||
+          N->getOperand(0).getScalarValueSizeInBits() == 1)
+        break;
+
+      unsigned NewOpc = N->getOpcode() == ISD::ANY_EXTEND
+                            ? ISD::ZERO_EXTEND
+                            : ISD::ZERO_EXTEND_VECTOR_INREG;
+
+      SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
+                                    N->getOperand(0));
+      --I;
+      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+      ++I;
+      CurDAG->DeleteNode(N);
+      continue;
+    }
+    case ISD::FCEIL:
+    case ISD::FFLOOR:
+    case ISD::FTRUNC:
+    case ISD::FNEARBYINT:
+    case ISD::FRINT: {
+      // Replace fp rounding with their X86 specific equivalent so we don't
+      // need 2 sets of patterns.
+      unsigned Imm;
+      switch (N->getOpcode()) {
+      default: llvm_unreachable("Unexpected opcode!");
+      case ISD::FCEIL:      Imm = 0xA; break;
+      case ISD::FFLOOR:     Imm = 0x9; break;
+      case ISD::FTRUNC:     Imm = 0xB; break;
+      case ISD::FNEARBYINT: Imm = 0xC; break;
+      case ISD::FRINT:      Imm = 0x4; break;
+      }
+      SDLoc dl(N);
+      SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
+                                    N->getValueType(0),
+                                    N->getOperand(0),
+                                    CurDAG->getConstant(Imm, dl, MVT::i8));
+      --I;
+      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+      ++I;
+      CurDAG->DeleteNode(N);
+      continue;
+    }
+    case X86ISD::FANDN:
+    case X86ISD::FAND:
+    case X86ISD::FOR:
+    case X86ISD::FXOR: {
+      // Widen scalar fp logic ops to vector to reduce isel patterns.
+      // FIXME: Can we do this during lowering/combine.
+      MVT VT = N->getSimpleValueType(0);
+      if (VT.isVector() || VT == MVT::f128)
+        break;
+
+      MVT VecVT = VT == MVT::f64 ? MVT::v2f64 : MVT::v4f32;
+      SDLoc dl(N);
+      SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
+                                    N->getOperand(0));
+      SDValue Op1 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
+                                    N->getOperand(1));
+
+      SDValue Res;
+      if (Subtarget->hasSSE2()) {
+        EVT IntVT = EVT(VecVT).changeVectorElementTypeToInteger();
+        Op0 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op0);
+        Op1 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op1);
+        unsigned Opc;
+        switch (N->getOpcode()) {
+        default: llvm_unreachable("Unexpected opcode!");
+        case X86ISD::FANDN: Opc = X86ISD::ANDNP; break;
+        case X86ISD::FAND:  Opc = ISD::AND;      break;
+        case X86ISD::FOR:   Opc = ISD::OR;       break;
+        case X86ISD::FXOR:  Opc = ISD::XOR;      break;
+        }
+        Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1);
+        Res = CurDAG->getNode(ISD::BITCAST, dl, VecVT, Res);
+      } else {
+        Res = CurDAG->getNode(N->getOpcode(), dl, VecVT, Op0, Op1);
+      }
+      Res = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res,
+                            CurDAG->getIntPtrConstant(0, dl));
+      --I;
+      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+      ++I;
+      CurDAG->DeleteNode(N);
+      continue;
+    }
+    }
+
     if (OptLevel != CodeGenOpt::None &&
         // Only do this when the target can fold the load into the call or
         // jmp.
@@ -786,65 +973,135 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
     // and the node legalization.  As such this pass basically does "really
     // late" legalization of these inline with the X86 isel pass.
     // FIXME: This should only happen when not compiled with -O0.
-    if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
-      continue;
+    switch (N->getOpcode()) {
+    default: continue;
+    case ISD::FP_ROUND:
+    case ISD::FP_EXTEND:
+    {
+      MVT SrcVT = N->getOperand(0).getSimpleValueType();
+      MVT DstVT = N->getSimpleValueType(0);
+
+      // If any of the sources are vectors, no fp stack involved.
+      if (SrcVT.isVector() || DstVT.isVector())
+        continue;
 
-    MVT SrcVT = N->getOperand(0).getSimpleValueType();
-    MVT DstVT = N->getSimpleValueType(0);
+      // If the source and destination are SSE registers, then this is a legal
+      // conversion that should not be lowered.
+      const X86TargetLowering *X86Lowering =
+          static_cast<const X86TargetLowering *>(TLI);
+      bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
+      bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
+      if (SrcIsSSE && DstIsSSE)
+        continue;
 
-    // If any of the sources are vectors, no fp stack involved.
-    if (SrcVT.isVector() || DstVT.isVector())
-      continue;
+      if (!SrcIsSSE && !DstIsSSE) {
+        // If this is an FPStack extension, it is a noop.
+        if (N->getOpcode() == ISD::FP_EXTEND)
+          continue;
+        // If this is a value-preserving FPStack truncation, it is a noop.
+        if (N->getConstantOperandVal(1))
+          continue;
+      }
 
-    // If the source and destination are SSE registers, then this is a legal
-    // conversion that should not be lowered.
-    const X86TargetLowering *X86Lowering =
-        static_cast<const X86TargetLowering *>(TLI);
-    bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
-    bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
-    if (SrcIsSSE && DstIsSSE)
-      continue;
+      // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
+      // FPStack has extload and truncstore.  SSE can fold direct loads into other
+      // operations.  Based on this, decide what we want to do.
+      MVT MemVT;
+      if (N->getOpcode() == ISD::FP_ROUND)
+        MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
+      else
+        MemVT = SrcIsSSE ? SrcVT : DstVT;
+
+      SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
+      SDLoc dl(N);
 
-    if (!SrcIsSSE && !DstIsSSE) {
-      // If this is an FPStack extension, it is a noop.
-      if (N->getOpcode() == ISD::FP_EXTEND)
+      // FIXME: optimize the case where the src/dest is a load or store?
+
+      SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
+                                          MemTmp, MachinePointerInfo(), MemVT);
+      SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+                                          MachinePointerInfo(), MemVT);
+
+      // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
+      // extload we created.  This will cause general havok on the dag because
+      // anything below the conversion could be folded into other existing nodes.
+      // To avoid invalidating 'I', back it up to the convert node.
+      --I;
+      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+      break;
+    }
+
+    //The sequence of events for lowering STRICT_FP versions of these nodes requires
+    //dealing with the chain differently, as there is already a preexisting chain.
+    case ISD::STRICT_FP_ROUND:
+    case ISD::STRICT_FP_EXTEND:
+    {
+      MVT SrcVT = N->getOperand(1).getSimpleValueType();
+      MVT DstVT = N->getSimpleValueType(0);
+
+      // If any of the sources are vectors, no fp stack involved.
+      if (SrcVT.isVector() || DstVT.isVector())
         continue;
-      // If this is a value-preserving FPStack truncation, it is a noop.
-      if (N->getConstantOperandVal(1))
+
+      // If the source and destination are SSE registers, then this is a legal
+      // conversion that should not be lowered.
+      const X86TargetLowering *X86Lowering =
+          static_cast<const X86TargetLowering *>(TLI);
+      bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
+      bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
+      if (SrcIsSSE && DstIsSSE)
         continue;
-    }
 
-    // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
-    // FPStack has extload and truncstore.  SSE can fold direct loads into other
-    // operations.  Based on this, decide what we want to do.
-    MVT MemVT;
-    if (N->getOpcode() == ISD::FP_ROUND)
-      MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
-    else
-      MemVT = SrcIsSSE ? SrcVT : DstVT;
+      if (!SrcIsSSE && !DstIsSSE) {
+        // If this is an FPStack extension, it is a noop.
+        if (N->getOpcode() == ISD::STRICT_FP_EXTEND)
+          continue;
+        // If this is a value-preserving FPStack truncation, it is a noop.
+        if (N->getConstantOperandVal(2))
+          continue;
+      }
+
+      // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
+      // FPStack has extload and truncstore.  SSE can fold direct loads into other
+      // operations.  Based on this, decide what we want to do.
+      MVT MemVT;
+      if (N->getOpcode() == ISD::STRICT_FP_ROUND)
+        MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
+      else
+        MemVT = SrcIsSSE ? SrcVT : DstVT;
+
+      SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
+      SDLoc dl(N);
+
+      // FIXME: optimize the case where the src/dest is a load or store?
 
-    SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
-    SDLoc dl(N);
+      //Since the operation is StrictFP, use the preexisting chain.
+      SDValue Store = CurDAG->getTruncStore(N->getOperand(0), dl, N->getOperand(1),
+                                MemTmp, MachinePointerInfo(), MemVT);
+      SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+                                          MachinePointerInfo(), MemVT);
 
-    // FIXME: optimize the case where the src/dest is a load or store?
-    SDValue Store =
-        CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
-                              MemTmp, MachinePointerInfo(), MemVT);
-    SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
-                                        MachinePointerInfo(), MemVT);
+      // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
+      // extload we created.  This will cause general havok on the dag because
+      // anything below the conversion could be folded into other existing nodes.
+      // To avoid invalidating 'I', back it up to the convert node.
+      --I;
+      CurDAG->ReplaceAllUsesWith(N, Result.getNode());
+      break;
+    }
+    }
 
-    // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
-    // extload we created.  This will cause general havok on the dag because
-    // anything below the conversion could be folded into other existing nodes.
-    // To avoid invalidating 'I', back it up to the convert node.
-    --I;
-    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
 
     // Now that we did that, the node is dead.  Increment the iterator to the
     // next node to process, then delete N.
     ++I;
     CurDAG->DeleteNode(N);
   }
+
+  // The load+call transform above can leave some dead nodes in the graph. Make
+  // sure we remove them. Its possible some of the other transforms do to so
+  // just remove dead nodes unconditionally.
+  CurDAG->RemoveDeadNodes();
 }
 
 // Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
@@ -1138,15 +1395,23 @@ bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
   if (AM.hasSymbolicDisplacement())
     return true;
 
+  bool IsRIPRelTLS = false;
   bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
+  if (IsRIPRel) {
+    SDValue Val = N.getOperand(0);
+    if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
+      IsRIPRelTLS = true;
+  }
 
-  // We can't use an addressing mode in the 64-bit large code model. In the
-  // medium code model, we use can use an mode when RIP wrappers are present.
-  // That signifies access to globals that are known to be "near", such as the
-  // GOT itself.
+  // We can't use an addressing mode in the 64-bit large code model.
+  // Global TLS addressing is an exception. In the medium code model,
+  // we use can use a mode when RIP wrappers are present.
+  // That signifies access to globals that are known to be "near",
+  // such as the GOT itself.
   CodeModel::Model M = TM.getCodeModel();
   if (Subtarget->is64Bit() &&
-      (M == CodeModel::Large || (M == CodeModel::Medium && !IsRIPRel)))
+      ((M == CodeModel::Large && !IsRIPRelTLS) ||
+       (M == CodeModel::Medium && !IsRIPRel)))
     return true;
 
   // Base and index reg must be 0 in order to use %rip as base.
@@ -1212,20 +1477,25 @@ bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
   // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
   // because it has a smaller encoding.
   // TODO: Which other code models can use this?
-  if (TM.getCodeModel() == CodeModel::Small &&
-      Subtarget->is64Bit() &&
-      AM.Scale == 1 &&
-      AM.BaseType == X86ISelAddressMode::RegBase &&
-      AM.Base_Reg.getNode() == nullptr &&
-      AM.IndexReg.getNode() == nullptr &&
-      AM.SymbolFlags == X86II::MO_NO_FLAG &&
-      AM.hasSymbolicDisplacement())
-    AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
+  switch (TM.getCodeModel()) {
+    default: break;
+    case CodeModel::Small:
+    case CodeModel::Kernel:
+      if (Subtarget->is64Bit() &&
+          AM.Scale == 1 &&
+          AM.BaseType == X86ISelAddressMode::RegBase &&
+          AM.Base_Reg.getNode() == nullptr &&
+          AM.IndexReg.getNode() == nullptr &&
+          AM.SymbolFlags == X86II::MO_NO_FLAG &&
+          AM.hasSymbolicDisplacement())
+        AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
+      break;
+  }
 
   return false;
 }
 
-bool X86DAGToDAGISel::matchAdd(SDValue N, X86ISelAddressMode &AM,
+bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM,
                                unsigned Depth) {
   // Add an artificial use to this node so that we can keep track of
   // it if it gets CSE'd with a different node.
@@ -1317,6 +1587,7 @@ static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
   insertDAGNode(DAG, N, ShlCount);
   insertDAGNode(DAG, N, Shl);
   DAG.ReplaceAllUsesWith(N, Shl);
+  DAG.RemoveDeadNode(N.getNode());
   AM.IndexReg = And;
   AM.Scale = (1 << ScaleLog);
   return false;
@@ -1326,13 +1597,31 @@ static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
 // allows us to fold the shift into this addressing mode. Returns false if the
 // transform succeeded.
 static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
-                                        uint64_t Mask,
-                                        SDValue Shift, SDValue X,
                                         X86ISelAddressMode &AM) {
+  SDValue Shift = N.getOperand(0);
+
+  // Use a signed mask so that shifting right will insert sign bits. These
+  // bits will be removed when we shift the result left so it doesn't matter
+  // what we use. This might allow a smaller immediate encoding.
+  int64_t Mask = cast<ConstantSDNode>(N->getOperand(1))->getSExtValue();
+
+  // If we have an any_extend feeding the AND, look through it to see if there
+  // is a shift behind it. But only if the AND doesn't use the extended bits.
+  // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
+  bool FoundAnyExtend = false;
+  if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
+      Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
+      isUInt<32>(Mask)) {
+    FoundAnyExtend = true;
+    Shift = Shift.getOperand(0);
+  }
+
   if (Shift.getOpcode() != ISD::SHL ||
       !isa<ConstantSDNode>(Shift.getOperand(1)))
     return true;
 
+  SDValue X = Shift.getOperand(0);
+
   // Not likely to be profitable if either the AND or SHIFT node has more
   // than one use (unless all uses are for address computation). Besides,
   // isel mechanism requires their node ids to be reused.
@@ -1346,6 +1635,12 @@ static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
 
   MVT VT = N.getSimpleValueType();
   SDLoc DL(N);
+  if (FoundAnyExtend) {
+    SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
+    insertDAGNode(DAG, N, NewX);
+    X = NewX;
+  }
+
   SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
   SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
@@ -1359,6 +1654,7 @@ static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
   insertDAGNode(DAG, N, NewAnd);
   insertDAGNode(DAG, N, NewShift);
   DAG.ReplaceAllUsesWith(N, NewShift);
+  DAG.RemoveDeadNode(N.getNode());
 
   AM.Scale = 1 << ShiftAmt;
   AM.IndexReg = NewAnd;
@@ -1469,6 +1765,7 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
   insertDAGNode(DAG, N, NewSHLAmt);
   insertDAGNode(DAG, N, NewSHL);
   DAG.ReplaceAllUsesWith(N, NewSHL);
+  DAG.RemoveDeadNode(N.getNode());
 
   AM.Scale = 1 << AMShiftAmt;
   AM.IndexReg = NewSRL;
@@ -1527,6 +1824,7 @@ static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N,
   insertDAGNode(DAG, N, NewSHLAmt);
   insertDAGNode(DAG, N, NewSHL);
   DAG.ReplaceAllUsesWith(N, NewSHL);
+  DAG.RemoveDeadNode(N.getNode());
 
   AM.Scale = 1 << AMShiftAmt;
   AM.IndexReg = NewAnd;
@@ -1634,14 +1932,15 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     // Scale must not be used already.
     if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
 
+    // We only handle up to 64-bit values here as those are what matter for
+    // addressing mode optimizations.
+    assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
+           "Unexpected value size!");
+
     SDValue And = N.getOperand(0);
     if (And.getOpcode() != ISD::AND) break;
     SDValue X = And.getOperand(0);
 
-    // We only handle up to 64-bit values here as those are what matter for
-    // addressing mode optimizations.
-    if (X.getSimpleValueType().getSizeInBits() > 64) break;
-
     // The mask used for the transform is expected to be post-shift, but we
     // found the shift first so just apply the shift to the mask before passing
     // it down.
@@ -1712,9 +2011,11 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     // Test if the LHS of the sub can be folded.
     X86ISelAddressMode Backup = AM;
     if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
+      N = Handle.getValue();
       AM = Backup;
       break;
     }
+    N = Handle.getValue();
     // Test if the index field is free for use.
     if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
       AM = Backup;
@@ -1722,7 +2023,7 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     }
 
     int Cost = 0;
-    SDValue RHS = Handle.getValue().getOperand(1);
+    SDValue RHS = N.getOperand(1);
     // If the RHS involves a register with multiple uses, this
     // transformation incurs an extra mov, due to the neg instruction
     // clobbering its operand.
@@ -1735,9 +2036,7 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
       ++Cost;
     // If the base is a register with multiple uses, this
     // transformation may save a mov.
-    // FIXME: Don't rely on DELETED_NODEs.
     if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
-         AM.Base_Reg->getOpcode() != ISD::DELETED_NODE &&
          !AM.Base_Reg.getNode()->hasOneUse()) ||
         AM.BaseType == X86ISelAddressMode::FrameIndexBase)
       --Cost;
@@ -1754,14 +2053,11 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     }
 
     // Ok, the transformation is legal and appears profitable. Go for it.
-    SDValue Zero = CurDAG->getConstant(0, dl, N.getValueType());
-    SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
-    AM.IndexReg = Neg;
+    // Negation will be emitted later to avoid creating dangling nodes if this
+    // was an unprofitable LEA.
+    AM.IndexReg = RHS;
+    AM.NegateIndex = true;
     AM.Scale = 1;
-
-    // Insert the new nodes into the topological ordering.
-    insertDAGNode(*CurDAG, Handle.getValue(), Zero);
-    insertDAGNode(*CurDAG, Handle.getValue(), Neg);
     return false;
   }
 
@@ -1789,37 +2085,77 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     // Scale must not be used already.
     if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
 
-    SDValue Shift = N.getOperand(0);
-    if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
-    SDValue X = Shift.getOperand(0);
-
     // We only handle up to 64-bit values here as those are what matter for
     // addressing mode optimizations.
-    if (X.getSimpleValueType().getSizeInBits() > 64) break;
+    assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
+           "Unexpected value size!");
 
     if (!isa<ConstantSDNode>(N.getOperand(1)))
       break;
-    uint64_t Mask = N.getConstantOperandVal(1);
 
-    // Try to fold the mask and shift into an extract and scale.
-    if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
-      return false;
+    if (N.getOperand(0).getOpcode() == ISD::SRL) {
+      SDValue Shift = N.getOperand(0);
+      SDValue X = Shift.getOperand(0);
 
-    // Try to fold the mask and shift directly into the scale.
-    if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
-      return false;
+      uint64_t Mask = N.getConstantOperandVal(1);
+
+      // Try to fold the mask and shift into an extract and scale.
+      if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
+        return false;
+
+      // Try to fold the mask and shift directly into the scale.
+      if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
+        return false;
+
+      // Try to fold the mask and shift into BEXTR and scale.
+      if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
+        return false;
+    }
 
     // Try to swap the mask and shift to place shifts which can be done as
     // a scale on the outside of the mask.
-    if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
-      return false;
-
-    // Try to fold the mask and shift into BEXTR and scale.
-    if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
+    if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM))
       return false;
 
     break;
   }
+  case ISD::ZERO_EXTEND: {
+    // Try to widen a zexted shift left to the same size as its use, so we can
+    // match the shift as a scale factor.
+    if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
+      break;
+    if (N.getOperand(0).getOpcode() != ISD::SHL || !N.getOperand(0).hasOneUse())
+      break;
+
+    // Give up if the shift is not a valid scale factor [1,2,3].
+    SDValue Shl = N.getOperand(0);
+    auto *ShAmtC = dyn_cast<ConstantSDNode>(Shl.getOperand(1));
+    if (!ShAmtC || ShAmtC->getZExtValue() > 3)
+      break;
+
+    // The narrow shift must only shift out zero bits (it must be 'nuw').
+    // That makes it safe to widen to the destination type.
+    APInt HighZeros = APInt::getHighBitsSet(Shl.getValueSizeInBits(),
+                                            ShAmtC->getZExtValue());
+    if (!CurDAG->MaskedValueIsZero(Shl.getOperand(0), HighZeros))
+      break;
+
+    // zext (shl nuw i8 %x, C) to i32 --> shl (zext i8 %x to i32), (zext C)
+    MVT VT = N.getSimpleValueType();
+    SDLoc DL(N);
+    SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Shl.getOperand(0));
+    SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, Shl.getOperand(1));
+
+    // Convert the shift to scale factor.
+    AM.Scale = 1 << ShAmtC->getZExtValue();
+    AM.IndexReg = Zext;
+
+    insertDAGNode(*CurDAG, N, Zext);
+    insertDAGNode(*CurDAG, N, NewShl);
+    CurDAG->ReplaceAllUsesWith(N, NewShl);
+    CurDAG->RemoveDeadNode(N.getNode());
+    return false;
+  }
   }
 
   return matchAddressBase(N, AM);
@@ -1885,17 +2221,14 @@ bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
   if (AddrSpace == 258)
     AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
 
+  SDLoc DL(N);
+  MVT VT = N.getSimpleValueType();
+
   // Try to match into the base and displacement fields.
   if (matchVectorAddress(N, AM))
     return false;
 
-  MVT VT = N.getSimpleValueType();
-  if (AM.BaseType == X86ISelAddressMode::RegBase) {
-    if (!AM.Base_Reg.getNode())
-      AM.Base_Reg = CurDAG->getRegister(0, VT);
-  }
-
-  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
+  getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
   return true;
 }
 
@@ -1917,6 +2250,8 @@ bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
       Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
       Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
       Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
+      Parent->getOpcode() != X86ISD::ENQCMD && // Fixme
+      Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme
       Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
       Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
     unsigned AddrSpace =
@@ -1930,19 +2265,14 @@ bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
       AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
   }
 
-  if (matchAddress(N, AM))
-    return false;
-
+  // Save the DL and VT before calling matchAddress, it can invalidate N.
+  SDLoc DL(N);
   MVT VT = N.getSimpleValueType();
-  if (AM.BaseType == X86ISelAddressMode::RegBase) {
-    if (!AM.Base_Reg.getNode())
-      AM.Base_Reg = CurDAG->getRegister(0, VT);
-  }
 
-  if (!AM.IndexReg.getNode())
-    AM.IndexReg = CurDAG->getRegister(0, VT);
+  if (matchAddress(N, AM))
+    return false;
 
-  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
+  getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
   return true;
 }
 
@@ -1974,12 +2304,14 @@ bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
   if (!hasSingleUsesFromRoot(Root, Parent))
     return false;
 
-  // We can allow a full vector load here since narrowing a load is ok.
+  // We can allow a full vector load here since narrowing a load is ok unless
+  // it's volatile.
   if (ISD::isNON_EXTLoad(N.getNode())) {
-    PatternNodeWithChain = N;
-    if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
-        IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
-      LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
+    LoadSDNode *LD = cast<LoadSDNode>(N);
+    if (!LD->isVolatile() &&
+        IsProfitableToFold(N, LD, Root) &&
+        IsLegalToFold(N, Parent, Root, OptLevel)) {
+      PatternNodeWithChain = N;
       return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
                         Segment);
     }
@@ -2010,23 +2342,6 @@ bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
     }
   }
 
-  // Also handle the case where we explicitly require zeros in the top
-  // elements.  This is a vector shuffle from the zero vector.
-  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
-      // Check to see if the top elements are all zeros (or bitcast of zeros).
-      N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
-      N.getOperand(0).getNode()->hasOneUse()) {
-    PatternNodeWithChain = N.getOperand(0).getOperand(0);
-    if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
-        IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
-        IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
-      // Okay, this is a zero extending load.  Fold it.
-      LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
-      return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
-                        Segment);
-    }
-  }
-
   return false;
 }
 
@@ -2077,14 +2392,12 @@ bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
   RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base);
   if (RN && RN->getReg() == 0)
     Base = CurDAG->getRegister(0, MVT::i64);
-  else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(Base)) {
+  else if (Base.getValueType() == MVT::i32 && !isa<FrameIndexSDNode>(Base)) {
     // Base could already be %rip, particularly in the x32 ABI.
-    Base = SDValue(CurDAG->getMachineNode(
-                       TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
-                       CurDAG->getTargetConstant(0, DL, MVT::i64),
-                       Base,
-                       CurDAG->getTargetConstant(X86::sub_32bit, DL, MVT::i32)),
-                   0);
+    SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
+                                                     MVT::i64), 0);
+    Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
+                                         Base);
   }
 
   RN = dyn_cast<RegisterSDNode>(Index);
@@ -2093,13 +2406,10 @@ bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
   else {
     assert(Index.getValueType() == MVT::i32 &&
            "Expect to be extending 32-bit registers for use in LEA");
-    Index = SDValue(CurDAG->getMachineNode(
-                        TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
-                        CurDAG->getTargetConstant(0, DL, MVT::i64),
-                        Index,
-                        CurDAG->getTargetConstant(X86::sub_32bit, DL,
-                                                  MVT::i32)),
-                    0);
+    SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
+                                                     MVT::i64), 0);
+    Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
+                                          Index);
   }
 
   return true;
@@ -2128,18 +2438,13 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
   AM.Segment = Copy;
 
   unsigned Complexity = 0;
-  if (AM.BaseType == X86ISelAddressMode::RegBase)
-    if (AM.Base_Reg.getNode())
-      Complexity = 1;
-    else
-      AM.Base_Reg = CurDAG->getRegister(0, VT);
+  if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode())
+    Complexity = 1;
   else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
     Complexity = 4;
 
   if (AM.IndexReg.getNode())
     Complexity++;
-  else
-    AM.IndexReg = CurDAG->getRegister(0, VT);
 
   // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
   // a simple shift.
@@ -2159,14 +2464,14 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
       Complexity += 2;
   }
 
-  if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
+  if (AM.Disp)
     Complexity++;
 
   // If it isn't worth using an LEA, reject it.
   if (Complexity <= 2)
     return false;
 
-  getAddressOperands(AM, DL, Base, Scale, Index, Disp, Segment);
+  getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
   return true;
 }
 
@@ -2180,17 +2485,15 @@ bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
   X86ISelAddressMode AM;
   AM.GV = GA->getGlobal();
   AM.Disp += GA->getOffset();
-  AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
   AM.SymbolFlags = GA->getTargetFlags();
 
-  if (N.getValueType() == MVT::i32) {
+  MVT VT = N.getSimpleValueType();
+  if (VT == MVT::i32) {
     AM.Scale = 1;
     AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
-  } else {
-    AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
   }
 
-  getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
+  getAddressOperands(AM, SDLoc(N), VT, Base, Scale, Index, Disp, Segment);
   return true;
 }
 
@@ -2274,14 +2577,22 @@ bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
          CR->getSignedMax().slt(1ull << Width);
 }
 
-static X86::CondCode getCondFromOpc(unsigned Opc) {
+static X86::CondCode getCondFromNode(SDNode *N) {
+  assert(N->isMachineOpcode() && "Unexpected node");
   X86::CondCode CC = X86::COND_INVALID;
-  if (CC == X86::COND_INVALID)
-    CC = X86::getCondFromBranchOpc(Opc);
-  if (CC == X86::COND_INVALID)
-    CC = X86::getCondFromSETOpc(Opc);
-  if (CC == X86::COND_INVALID)
-    CC = X86::getCondFromCMovOpc(Opc);
+  unsigned Opc = N->getMachineOpcode();
+  if (Opc == X86::JCC_1)
+    CC = static_cast<X86::CondCode>(N->getConstantOperandVal(1));
+  else if (Opc == X86::SETCCr)
+    CC = static_cast<X86::CondCode>(N->getConstantOperandVal(0));
+  else if (Opc == X86::SETCCm)
+    CC = static_cast<X86::CondCode>(N->getConstantOperandVal(5));
+  else if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr ||
+           Opc == X86::CMOV64rr)
+    CC = static_cast<X86::CondCode>(N->getConstantOperandVal(2));
+  else if (Opc == X86::CMOV16rm || Opc == X86::CMOV32rm ||
+           Opc == X86::CMOV64rm)
+    CC = static_cast<X86::CondCode>(N->getConstantOperandVal(6));
 
   return CC;
 }
@@ -2307,7 +2618,7 @@ bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const {
       // Anything unusual: assume conservatively.
       if (!FlagUI->isMachineOpcode()) return false;
       // Examine the condition code of the user.
-      X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
+      X86::CondCode CC = getCondFromNode(*FlagUI);
 
       switch (CC) {
       // Comparisons which only use the zero flag.
@@ -2343,7 +2654,7 @@ bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
       // Anything unusual: assume conservatively.
       if (!FlagUI->isMachineOpcode()) return false;
       // Examine the condition code of the user.
-      X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
+      X86::CondCode CC = getCondFromNode(*FlagUI);
 
       switch (CC) {
       // Comparisons which don't examine the SF flag.
@@ -2404,7 +2715,7 @@ static bool mayUseCarryFlag(X86::CondCode CC) {
         if (!FlagUI->isMachineOpcode())
           return false;
         // Examine the condition code of the user.
-        X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
+        X86::CondCode CC = getCondFromNode(*FlagUI);
 
         if (mayUseCarryFlag(CC))
           return false;
@@ -2582,10 +2893,13 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
     return false;
 
   bool IsCommutable = false;
+  bool IsNegate = false;
   switch (Opc) {
   default:
     return false;
   case X86ISD::SUB:
+    IsNegate = isNullConstant(StoredVal.getOperand(0));
+    break;
   case X86ISD::SBB:
     break;
   case X86ISD::ADD:
@@ -2597,7 +2911,7 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
     break;
   }
 
-  unsigned LoadOpNo = 0;
+  unsigned LoadOpNo = IsNegate ? 1 : 0;
   LoadSDNode *LoadNode = nullptr;
   SDValue InputChain;
   if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
@@ -2635,11 +2949,20 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
 
   MachineSDNode *Result;
   switch (Opc) {
-  case X86ISD::ADD:
   case X86ISD::SUB:
+    // Handle negate.
+    if (IsNegate) {
+      unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m,
+                                     X86::NEG8m);
+      const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
+      Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
+                                      MVT::Other, Ops);
+      break;
+    }
+   LLVM_FALLTHROUGH;
+  case X86ISD::ADD:
     // Try to match inc/dec.
-    if (!Subtarget->slowIncDec() ||
-        CurDAG->getMachineFunction().getFunction().optForSize()) {
+    if (!Subtarget->slowIncDec() || OptForSize) {
       bool IsOne = isOneConstant(StoredVal.getOperand(1));
       bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
       // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
@@ -2740,16 +3063,15 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
     // See if the operand is a constant that we can fold into an immediate
     // operand.
     if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
-      auto OperandV = OperandC->getAPIntValue();
+      int64_t OperandV = OperandC->getSExtValue();
 
       // Check if we can shrink the operand enough to fit in an immediate (or
       // fit into a smaller immediate) by negating it and switching the
       // operation.
       if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
-          ((MemVT != MVT::i8 && OperandV.getMinSignedBits() > 8 &&
-            (-OperandV).getMinSignedBits() <= 8) ||
-           (MemVT == MVT::i64 && OperandV.getMinSignedBits() > 32 &&
-            (-OperandV).getMinSignedBits() <= 32)) &&
+          ((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) ||
+           (MemVT == MVT::i64 && !isInt<32>(OperandV) &&
+            isInt<32>(-OperandV))) &&
           hasNoCarryFlagUses(StoredVal.getValue(1))) {
         OperandV = -OperandV;
         Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
@@ -2757,11 +3079,10 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
 
       // First try to fit this into an Imm8 operand. If it doesn't fit, then try
       // the larger immediate operand.
-      if (MemVT != MVT::i8 && OperandV.getMinSignedBits() <= 8) {
+      if (MemVT != MVT::i8 && isInt<8>(OperandV)) {
         Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
         NewOpc = SelectImm8Opcode(Opc);
-      } else if (OperandV.getActiveBits() <= MemVT.getSizeInBits() &&
-                 (MemVT != MVT::i64 || OperandV.getMinSignedBits() <= 32)) {
+      } else if (MemVT != MVT::i64 || isInt<32>(OperandV)) {
         Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
         NewOpc = SelectImmOpcode(Opc);
       }
@@ -2821,8 +3142,6 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
   if (NVT != MVT::i32 && NVT != MVT::i64)
     return false;
 
-  unsigned Size = NVT.getSizeInBits();
-
   SDValue NBits;
 
   // If we have BMI2's BZHI, we are ok with muti-use patterns.
@@ -2835,16 +3154,27 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
   auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); };
   auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); };
 
+  auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) {
+    if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) {
+      assert(V.getSimpleValueType() == MVT::i32 &&
+             V.getOperand(0).getSimpleValueType() == MVT::i64 &&
+             "Expected i64 -> i32 truncation");
+      V = V.getOperand(0);
+    }
+    return V;
+  };
+
   // a) x & ((1 << nbits) + (-1))
-  auto matchPatternA = [&checkOneUse, &NBits](SDValue Mask) -> bool {
+  auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation,
+                        &NBits](SDValue Mask) -> bool {
     // Match `add`. Must only have one use!
     if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
       return false;
     // We should be adding all-ones constant (i.e. subtracting one.)
     if (!isAllOnesConstant(Mask->getOperand(1)))
       return false;
-    // Match `1 << nbits`. Must only have one use!
-    SDValue M0 = Mask->getOperand(0);
+    // Match `1 << nbits`. Might be truncated. Must only have one use!
+    SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
     if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
       return false;
     if (!isOneConstant(M0->getOperand(0)))
@@ -2853,23 +3183,36 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
     return true;
   };
 
+  auto isAllOnes = [this, peekThroughOneUseTruncation, NVT](SDValue V) {
+    V = peekThroughOneUseTruncation(V);
+    return CurDAG->MaskedValueIsAllOnes(
+        V, APInt::getLowBitsSet(V.getSimpleValueType().getSizeInBits(),
+                                NVT.getSizeInBits()));
+  };
+
   // b) x & ~(-1 << nbits)
-  auto matchPatternB = [&checkOneUse, &NBits](SDValue Mask) -> bool {
+  auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation,
+                        &NBits](SDValue Mask) -> bool {
     // Match `~()`. Must only have one use!
-    if (!isBitwiseNot(Mask) || !checkOneUse(Mask))
+    if (Mask.getOpcode() != ISD::XOR || !checkOneUse(Mask))
       return false;
-    // Match `-1 << nbits`. Must only have one use!
-    SDValue M0 = Mask->getOperand(0);
+    // The -1 only has to be all-ones for the final Node's NVT.
+    if (!isAllOnes(Mask->getOperand(1)))
+      return false;
+    // Match `-1 << nbits`. Might be truncated. Must only have one use!
+    SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
     if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
       return false;
-    if (!isAllOnesConstant(M0->getOperand(0)))
+    // The -1 only has to be all-ones for the final Node's NVT.
+    if (!isAllOnes(M0->getOperand(0)))
       return false;
     NBits = M0->getOperand(1);
     return true;
   };
 
   // Match potentially-truncated (bitwidth - y)
-  auto matchShiftAmt = [checkOneUse, Size, &NBits](SDValue ShiftAmt) {
+  auto matchShiftAmt = [checkOneUse, &NBits](SDValue ShiftAmt,
+                                             unsigned Bitwidth) {
     // Skip over a truncate of the shift amount.
     if (ShiftAmt.getOpcode() == ISD::TRUNCATE) {
       ShiftAmt = ShiftAmt.getOperand(0);
@@ -2881,52 +3224,56 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
     if (ShiftAmt.getOpcode() != ISD::SUB)
       return false;
     auto V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0));
-    if (!V0 || V0->getZExtValue() != Size)
+    if (!V0 || V0->getZExtValue() != Bitwidth)
       return false;
     NBits = ShiftAmt.getOperand(1);
     return true;
   };
 
   // c) x &  (-1 >> (32 - y))
-  auto matchPatternC = [&checkOneUse, matchShiftAmt](SDValue Mask) -> bool {
+  auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation,
+                        matchShiftAmt](SDValue Mask) -> bool {
+    // The mask itself may be truncated.
+    Mask = peekThroughOneUseTruncation(Mask);
+    unsigned Bitwidth = Mask.getSimpleValueType().getSizeInBits();
     // Match `l>>`. Must only have one use!
     if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask))
       return false;
-    // We should be shifting all-ones constant.
+    // We should be shifting truly all-ones constant.
     if (!isAllOnesConstant(Mask.getOperand(0)))
       return false;
     SDValue M1 = Mask.getOperand(1);
     // The shift amount should not be used externally.
     if (!checkOneUse(M1))
       return false;
-    return matchShiftAmt(M1);
+    return matchShiftAmt(M1, Bitwidth);
   };
 
   SDValue X;
 
   // d) x << (32 - y) >> (32 - y)
-  auto matchPatternD = [&checkOneUse, &checkTwoUse, matchShiftAmt,
+  auto matchPatternD = [checkOneUse, checkTwoUse, matchShiftAmt,
                         &X](SDNode *Node) -> bool {
     if (Node->getOpcode() != ISD::SRL)
       return false;
     SDValue N0 = Node->getOperand(0);
     if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0))
       return false;
+    unsigned Bitwidth = N0.getSimpleValueType().getSizeInBits();
     SDValue N1 = Node->getOperand(1);
     SDValue N01 = N0->getOperand(1);
     // Both of the shifts must be by the exact same value.
     // There should not be any uses of the shift amount outside of the pattern.
     if (N1 != N01 || !checkTwoUse(N1))
       return false;
-    if (!matchShiftAmt(N1))
+    if (!matchShiftAmt(N1, Bitwidth))
       return false;
     X = N0->getOperand(0);
     return true;
   };
 
-  auto matchLowBitMask = [&matchPatternA, &matchPatternB,
-                          &matchPatternC](SDValue Mask) -> bool {
-    // FIXME: pattern c.
+  auto matchLowBitMask = [matchPatternA, matchPatternB,
+                          matchPatternC](SDValue Mask) -> bool {
     return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
   };
 
@@ -2946,42 +3293,46 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
 
   SDLoc DL(Node);
 
-  // If we do *NOT* have BMI2, let's find out if the if the 'X' is *logically*
-  // shifted (potentially with one-use trunc inbetween),
-  // and if so look past one-use truncation.
-  MVT XVT = NVT;
-  if (!Subtarget->hasBMI2() && X.getOpcode() == ISD::TRUNCATE &&
-      X.hasOneUse() && X.getOperand(0).getOpcode() == ISD::SRL) {
-    assert(NVT == MVT::i32 && "Expected target valuetype to be i32");
-    X = X.getOperand(0);
-    XVT = X.getSimpleValueType();
-    assert(XVT == MVT::i64 && "Expected truncation from i64");
-  }
+  // Truncate the shift amount.
+  NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
+  insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
 
-  SDValue OrigNBits = NBits;
-  if (NBits.getValueType() != XVT) {
-    // Truncate the shift amount.
-    NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
-    insertDAGNode(*CurDAG, OrigNBits, NBits);
-
-    // Insert 8-bit NBits into lowest 8 bits of XVT-sized (32 or 64-bit)
-    // register. All the other bits are undefined, we do not care about them.
-    SDValue ImplDef =
-        SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, XVT), 0);
-    insertDAGNode(*CurDAG, OrigNBits, ImplDef);
-    NBits =
-        CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, XVT, ImplDef, NBits);
-    insertDAGNode(*CurDAG, OrigNBits, NBits);
-  }
+  // Insert 8-bit NBits into lowest 8 bits of 32-bit register.
+  // All the other bits are undefined, we do not care about them.
+  SDValue ImplDef = SDValue(
+      CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
+  insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef);
+  NBits = CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, MVT::i32, ImplDef,
+                                        NBits);
+  insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
 
   if (Subtarget->hasBMI2()) {
     // Great, just emit the the BZHI..
-    SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, XVT, X, NBits);
+    if (NVT != MVT::i32) {
+      // But have to place the bit count into the wide-enough register first.
+      NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits);
+      insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
+    }
+
+    SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits);
     ReplaceNode(Node, Extract.getNode());
     SelectCode(Extract.getNode());
     return true;
   }
 
+  // Else, if we do *NOT* have BMI2, let's find out if the if the 'X' is
+  // *logically* shifted (potentially with one-use trunc inbetween),
+  // and the truncation was the only use of the shift,
+  // and if so look past one-use truncation.
+  {
+    SDValue RealX = peekThroughOneUseTruncation(X);
+    // FIXME: only if the shift is one-use?
+    if (RealX != X && RealX.getOpcode() == ISD::SRL)
+      X = RealX;
+  }
+
+  MVT XVT = X.getSimpleValueType();
+
   // Else, emitting BEXTR requires one more step.
   // The 'control' of BEXTR has the pattern of:
   // [15...8 bit][ 7...0 bit] location
@@ -2991,10 +3342,11 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
   // Shift NBits left by 8 bits, thus producing 'control'.
   // This makes the low 8 bits to be zero.
   SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
-  SDValue Control = CurDAG->getNode(ISD::SHL, DL, XVT, NBits, C8);
-  insertDAGNode(*CurDAG, OrigNBits, Control);
+  SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8);
+  insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
 
   // If the 'X' is *logically* shifted, we can fold that shift into 'control'.
+  // FIXME: only if the shift is one-use?
   if (X.getOpcode() == ISD::SRL) {
     SDValue ShiftAmt = X.getOperand(1);
     X = X.getOperand(0);
@@ -3003,13 +3355,20 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
            "Expected shift amount to be i8");
 
     // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
+    // We could zext to i16 in some form, but we intentionally don't do that.
     SDValue OrigShiftAmt = ShiftAmt;
-    ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, XVT, ShiftAmt);
+    ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt);
     insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt);
 
     // And now 'or' these low 8 bits of shift amount into the 'control'.
-    Control = CurDAG->getNode(ISD::OR, DL, XVT, Control, ShiftAmt);
-    insertDAGNode(*CurDAG, OrigNBits, Control);
+    Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt);
+    insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
+  }
+
+  // But have to place the 'control' into the wide-enough register first.
+  if (XVT != MVT::i32) {
+    Control = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, Control);
+    insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
   }
 
   // And finally, form the BEXTR itself.
@@ -3017,7 +3376,7 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
 
   // The 'X' was originally truncated. Do that now.
   if (XVT != NVT) {
-    insertDAGNode(*CurDAG, OrigNBits, Extract);
+    insertDAGNode(*CurDAG, SDValue(Node, 0), Extract);
     Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract);
   }
 
@@ -3098,14 +3457,14 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
   SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
   if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
     SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) };
-    SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
+    SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
     NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
     // Update the chain.
-    ReplaceUses(Input.getValue(1), SDValue(NewNode, 1));
+    ReplaceUses(Input.getValue(1), SDValue(NewNode, 2));
     // Record the mem-refs
     CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
   } else {
-    NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, Input, New);
+    NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, New);
   }
 
   return NewNode;
@@ -3263,6 +3622,119 @@ bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
   return true;
 }
 
+bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
+  MVT NVT = N->getSimpleValueType(0);
+  unsigned Opcode = N->getOpcode();
+  SDLoc dl(N);
+
+  // For operations of the form (x << C1) op C2, check if we can use a smaller
+  // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
+  SDValue Shift = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
+  if (!Cst)
+    return false;
+
+  int64_t Val = Cst->getSExtValue();
+
+  // If we have an any_extend feeding the AND, look through it to see if there
+  // is a shift behind it. But only if the AND doesn't use the extended bits.
+  // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
+  bool FoundAnyExtend = false;
+  if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
+      Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
+      isUInt<32>(Val)) {
+    FoundAnyExtend = true;
+    Shift = Shift.getOperand(0);
+  }
+
+  if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
+    return false;
+
+  // i8 is unshrinkable, i16 should be promoted to i32.
+  if (NVT != MVT::i32 && NVT != MVT::i64)
+    return false;
+
+  ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
+  if (!ShlCst)
+    return false;
+
+  uint64_t ShAmt = ShlCst->getZExtValue();
+
+  // Make sure that we don't change the operation by removing bits.
+  // This only matters for OR and XOR, AND is unaffected.
+  uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
+  if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
+    return false;
+
+  // Check the minimum bitwidth for the new constant.
+  // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
+  auto CanShrinkImmediate = [&](int64_t &ShiftedVal) {
+    if (Opcode == ISD::AND) {
+      // AND32ri is the same as AND64ri32 with zext imm.
+      // Try this before sign extended immediates below.
+      ShiftedVal = (uint64_t)Val >> ShAmt;
+      if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
+        return true;
+      // Also swap order when the AND can become MOVZX.
+      if (ShiftedVal == UINT8_MAX || ShiftedVal == UINT16_MAX)
+        return true;
+    }
+    ShiftedVal = Val >> ShAmt;
+    if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) ||
+        (!isInt<32>(Val) && isInt<32>(ShiftedVal)))
+      return true;
+    if (Opcode != ISD::AND) {
+      // MOV32ri+OR64r/XOR64r is cheaper than MOV64ri64+OR64rr/XOR64rr
+      ShiftedVal = (uint64_t)Val >> ShAmt;
+      if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
+        return true;
+    }
+    return false;
+  };
+
+  int64_t ShiftedVal;
+  if (!CanShrinkImmediate(ShiftedVal))
+    return false;
+
+  // Ok, we can reorder to get a smaller immediate.
+
+  // But, its possible the original immediate allowed an AND to become MOVZX.
+  // Doing this late due to avoid the MakedValueIsZero call as late as
+  // possible.
+  if (Opcode == ISD::AND) {
+    // Find the smallest zext this could possibly be.
+    unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits();
+    ZExtWidth = PowerOf2Ceil(std::max(ZExtWidth, 8U));
+
+    // Figure out which bits need to be zero to achieve that mask.
+    APInt NeededMask = APInt::getLowBitsSet(NVT.getSizeInBits(),
+                                            ZExtWidth);
+    NeededMask &= ~Cst->getAPIntValue();
+
+    if (CurDAG->MaskedValueIsZero(N->getOperand(0), NeededMask))
+      return false;
+  }
+
+  SDValue X = Shift.getOperand(0);
+  if (FoundAnyExtend) {
+    SDValue NewX = CurDAG->getNode(ISD::ANY_EXTEND, dl, NVT, X);
+    insertDAGNode(*CurDAG, SDValue(N, 0), NewX);
+    X = NewX;
+  }
+
+  SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT);
+  insertDAGNode(*CurDAG, SDValue(N, 0), NewCst);
+  SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, X, NewCst);
+  insertDAGNode(*CurDAG, SDValue(N, 0), NewBinOp);
+  SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp,
+                                   Shift.getOperand(1));
+  ReplaceNode(N, NewSHL.getNode());
+  SelectCode(NewSHL.getNode());
+  return true;
+}
+
 /// If the high bits of an 'and' operand are known zero, try setting the
 /// high bits of an 'and' constant operand to produce a smaller encoding by
 /// creating a small, sign-extended negative immediate rather than a large
@@ -3333,6 +3805,347 @@ bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
   return true;
 }
 
+static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad,
+                              bool FoldedBCast, bool Masked) {
+  if (Masked) {
+    if (FoldedLoad) {
+      switch (TestVT.SimpleTy) {
+      default: llvm_unreachable("Unexpected VT!");
+      case MVT::v16i8:
+        return IsTestN ? X86::VPTESTNMBZ128rmk : X86::VPTESTMBZ128rmk;
+      case MVT::v8i16:
+        return IsTestN ? X86::VPTESTNMWZ128rmk : X86::VPTESTMWZ128rmk;
+      case MVT::v4i32:
+        return IsTestN ? X86::VPTESTNMDZ128rmk : X86::VPTESTMDZ128rmk;
+      case MVT::v2i64:
+        return IsTestN ? X86::VPTESTNMQZ128rmk : X86::VPTESTMQZ128rmk;
+      case MVT::v32i8:
+        return IsTestN ? X86::VPTESTNMBZ256rmk : X86::VPTESTMBZ256rmk;
+      case MVT::v16i16:
+        return IsTestN ? X86::VPTESTNMWZ256rmk : X86::VPTESTMWZ256rmk;
+      case MVT::v8i32:
+        return IsTestN ? X86::VPTESTNMDZ256rmk : X86::VPTESTMDZ256rmk;
+      case MVT::v4i64:
+        return IsTestN ? X86::VPTESTNMQZ256rmk : X86::VPTESTMQZ256rmk;
+      case MVT::v64i8:
+        return IsTestN ? X86::VPTESTNMBZrmk : X86::VPTESTMBZrmk;
+      case MVT::v32i16:
+        return IsTestN ? X86::VPTESTNMWZrmk : X86::VPTESTMWZrmk;
+      case MVT::v16i32:
+        return IsTestN ? X86::VPTESTNMDZrmk : X86::VPTESTMDZrmk;
+      case MVT::v8i64:
+        return IsTestN ? X86::VPTESTNMQZrmk : X86::VPTESTMQZrmk;
+      }
+    }
+
+    if (FoldedBCast) {
+      switch (TestVT.SimpleTy) {
+      default: llvm_unreachable("Unexpected VT!");
+      case MVT::v4i32:
+        return IsTestN ? X86::VPTESTNMDZ128rmbk : X86::VPTESTMDZ128rmbk;
+      case MVT::v2i64:
+        return IsTestN ? X86::VPTESTNMQZ128rmbk : X86::VPTESTMQZ128rmbk;
+      case MVT::v8i32:
+        return IsTestN ? X86::VPTESTNMDZ256rmbk : X86::VPTESTMDZ256rmbk;
+      case MVT::v4i64:
+        return IsTestN ? X86::VPTESTNMQZ256rmbk : X86::VPTESTMQZ256rmbk;
+      case MVT::v16i32:
+        return IsTestN ? X86::VPTESTNMDZrmbk : X86::VPTESTMDZrmbk;
+      case MVT::v8i64:
+        return IsTestN ? X86::VPTESTNMQZrmbk : X86::VPTESTMQZrmbk;
+      }
+    }
+
+    switch (TestVT.SimpleTy) {
+    default: llvm_unreachable("Unexpected VT!");
+    case MVT::v16i8:
+      return IsTestN ? X86::VPTESTNMBZ128rrk : X86::VPTESTMBZ128rrk;
+    case MVT::v8i16:
+      return IsTestN ? X86::VPTESTNMWZ128rrk : X86::VPTESTMWZ128rrk;
+    case MVT::v4i32:
+      return IsTestN ? X86::VPTESTNMDZ128rrk : X86::VPTESTMDZ128rrk;
+    case MVT::v2i64:
+      return IsTestN ? X86::VPTESTNMQZ128rrk : X86::VPTESTMQZ128rrk;
+    case MVT::v32i8:
+      return IsTestN ? X86::VPTESTNMBZ256rrk : X86::VPTESTMBZ256rrk;
+    case MVT::v16i16:
+      return IsTestN ? X86::VPTESTNMWZ256rrk : X86::VPTESTMWZ256rrk;
+    case MVT::v8i32:
+      return IsTestN ? X86::VPTESTNMDZ256rrk : X86::VPTESTMDZ256rrk;
+    case MVT::v4i64:
+      return IsTestN ? X86::VPTESTNMQZ256rrk : X86::VPTESTMQZ256rrk;
+    case MVT::v64i8:
+      return IsTestN ? X86::VPTESTNMBZrrk : X86::VPTESTMBZrrk;
+    case MVT::v32i16:
+      return IsTestN ? X86::VPTESTNMWZrrk : X86::VPTESTMWZrrk;
+    case MVT::v16i32:
+      return IsTestN ? X86::VPTESTNMDZrrk : X86::VPTESTMDZrrk;
+    case MVT::v8i64:
+      return IsTestN ? X86::VPTESTNMQZrrk : X86::VPTESTMQZrrk;
+    }
+  }
+
+  if (FoldedLoad) {
+    switch (TestVT.SimpleTy) {
+    default: llvm_unreachable("Unexpected VT!");
+    case MVT::v16i8:
+      return IsTestN ? X86::VPTESTNMBZ128rm : X86::VPTESTMBZ128rm;
+    case MVT::v8i16:
+      return IsTestN ? X86::VPTESTNMWZ128rm : X86::VPTESTMWZ128rm;
+    case MVT::v4i32:
+      return IsTestN ? X86::VPTESTNMDZ128rm : X86::VPTESTMDZ128rm;
+    case MVT::v2i64:
+      return IsTestN ? X86::VPTESTNMQZ128rm : X86::VPTESTMQZ128rm;
+    case MVT::v32i8:
+      return IsTestN ? X86::VPTESTNMBZ256rm : X86::VPTESTMBZ256rm;
+    case MVT::v16i16:
+      return IsTestN ? X86::VPTESTNMWZ256rm : X86::VPTESTMWZ256rm;
+    case MVT::v8i32:
+      return IsTestN ? X86::VPTESTNMDZ256rm : X86::VPTESTMDZ256rm;
+    case MVT::v4i64:
+      return IsTestN ? X86::VPTESTNMQZ256rm : X86::VPTESTMQZ256rm;
+    case MVT::v64i8:
+      return IsTestN ? X86::VPTESTNMBZrm : X86::VPTESTMBZrm;
+    case MVT::v32i16:
+      return IsTestN ? X86::VPTESTNMWZrm : X86::VPTESTMWZrm;
+    case MVT::v16i32:
+      return IsTestN ? X86::VPTESTNMDZrm : X86::VPTESTMDZrm;
+    case MVT::v8i64:
+      return IsTestN ? X86::VPTESTNMQZrm : X86::VPTESTMQZrm;
+    }
+  }
+
+  if (FoldedBCast) {
+    switch (TestVT.SimpleTy) {
+    default: llvm_unreachable("Unexpected VT!");
+    case MVT::v4i32:
+      return IsTestN ? X86::VPTESTNMDZ128rmb : X86::VPTESTMDZ128rmb;
+    case MVT::v2i64:
+      return IsTestN ? X86::VPTESTNMQZ128rmb : X86::VPTESTMQZ128rmb;
+    case MVT::v8i32:
+      return IsTestN ? X86::VPTESTNMDZ256rmb : X86::VPTESTMDZ256rmb;
+    case MVT::v4i64:
+      return IsTestN ? X86::VPTESTNMQZ256rmb : X86::VPTESTMQZ256rmb;
+    case MVT::v16i32:
+      return IsTestN ? X86::VPTESTNMDZrmb : X86::VPTESTMDZrmb;
+    case MVT::v8i64:
+      return IsTestN ? X86::VPTESTNMQZrmb : X86::VPTESTMQZrmb;
+    }
+  }
+
+  switch (TestVT.SimpleTy) {
+  default: llvm_unreachable("Unexpected VT!");
+  case MVT::v16i8:
+    return IsTestN ? X86::VPTESTNMBZ128rr : X86::VPTESTMBZ128rr;
+  case MVT::v8i16:
+    return IsTestN ? X86::VPTESTNMWZ128rr : X86::VPTESTMWZ128rr;
+  case MVT::v4i32:
+    return IsTestN ? X86::VPTESTNMDZ128rr : X86::VPTESTMDZ128rr;
+  case MVT::v2i64:
+    return IsTestN ? X86::VPTESTNMQZ128rr : X86::VPTESTMQZ128rr;
+  case MVT::v32i8:
+    return IsTestN ? X86::VPTESTNMBZ256rr : X86::VPTESTMBZ256rr;
+  case MVT::v16i16:
+    return IsTestN ? X86::VPTESTNMWZ256rr : X86::VPTESTMWZ256rr;
+  case MVT::v8i32:
+    return IsTestN ? X86::VPTESTNMDZ256rr : X86::VPTESTMDZ256rr;
+  case MVT::v4i64:
+    return IsTestN ? X86::VPTESTNMQZ256rr : X86::VPTESTMQZ256rr;
+  case MVT::v64i8:
+    return IsTestN ? X86::VPTESTNMBZrr : X86::VPTESTMBZrr;
+  case MVT::v32i16:
+    return IsTestN ? X86::VPTESTNMWZrr : X86::VPTESTMWZrr;
+  case MVT::v16i32:
+    return IsTestN ? X86::VPTESTNMDZrr : X86::VPTESTMDZrr;
+  case MVT::v8i64:
+    return IsTestN ? X86::VPTESTNMQZrr : X86::VPTESTMQZrr;
+  }
+}
+
+// Try to create VPTESTM instruction. If InMask is not null, it will be used
+// to form a masked operation.
+bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
+                                 SDValue InMask) {
+  assert(Subtarget->hasAVX512() && "Expected AVX512!");
+  assert(Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 &&
+         "Unexpected VT!");
+
+  // Look for equal and not equal compares.
+  ISD::CondCode CC = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
+  if (CC != ISD::SETEQ && CC != ISD::SETNE)
+    return false;
+
+  // See if we're comparing against zero. This should have been canonicalized
+  // to RHS during lowering.
+  if (!ISD::isBuildVectorAllZeros(Setcc.getOperand(1).getNode()))
+    return false;
+
+  SDValue N0 = Setcc.getOperand(0);
+
+  MVT CmpVT = N0.getSimpleValueType();
+  MVT CmpSVT = CmpVT.getVectorElementType();
+
+  // Start with both operands the same. We'll try to refine this.
+  SDValue Src0 = N0;
+  SDValue Src1 = N0;
+
+  {
+    // Look through single use bitcasts.
+    SDValue N0Temp = N0;
+    if (N0Temp.getOpcode() == ISD::BITCAST && N0Temp.hasOneUse())
+      N0Temp = N0.getOperand(0);
+
+     // Look for single use AND.
+    if (N0Temp.getOpcode() == ISD::AND && N0Temp.hasOneUse()) {
+      Src0 = N0Temp.getOperand(0);
+      Src1 = N0Temp.getOperand(1);
+    }
+  }
+
+  // Without VLX we need to widen the load.
+  bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector();
+
+  // We can only fold loads if the sources are unique.
+  bool CanFoldLoads = Src0 != Src1;
+
+  // Try to fold loads unless we need to widen.
+  bool FoldedLoad = false;
+  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Load;
+  if (!Widen && CanFoldLoads) {
+    Load = Src1;
+    FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2, Tmp3,
+                             Tmp4);
+    if (!FoldedLoad) {
+      // And is computative.
+      Load = Src0;
+      FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2,
+                               Tmp3, Tmp4);
+      if (FoldedLoad)
+        std::swap(Src0, Src1);
+    }
+  }
+
+  auto findBroadcastedOp = [](SDValue Src, MVT CmpSVT, SDNode *&Parent) {
+    // Look through single use bitcasts.
+    if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse())
+      Src = Src.getOperand(0);
+
+    if (Src.getOpcode() == X86ISD::VBROADCAST && Src.hasOneUse()) {
+      Parent = Src.getNode();
+      Src = Src.getOperand(0);
+      if (Src.getSimpleValueType() == CmpSVT)
+        return Src;
+    }
+
+    return SDValue();
+  };
+
+  // If we didn't fold a load, try to match broadcast. No widening limitation
+  // for this. But only 32 and 64 bit types are supported.
+  bool FoldedBCast = false;
+  if (!FoldedLoad && CanFoldLoads &&
+      (CmpSVT == MVT::i32 || CmpSVT == MVT::i64)) {
+    SDNode *ParentNode = nullptr;
+    if ((Load = findBroadcastedOp(Src1, CmpSVT, ParentNode))) {
+      FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0,
+                                Tmp1, Tmp2, Tmp3, Tmp4);
+    }
+
+    // Try the other operand.
+    if (!FoldedBCast) {
+      if ((Load = findBroadcastedOp(Src0, CmpSVT, ParentNode))) {
+        FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0,
+                                  Tmp1, Tmp2, Tmp3, Tmp4);
+        if (FoldedBCast)
+          std::swap(Src0, Src1);
+      }
+    }
+  }
+
+  auto getMaskRC = [](MVT MaskVT) {
+    switch (MaskVT.SimpleTy) {
+    default: llvm_unreachable("Unexpected VT!");
+    case MVT::v2i1:  return X86::VK2RegClassID;
+    case MVT::v4i1:  return X86::VK4RegClassID;
+    case MVT::v8i1:  return X86::VK8RegClassID;
+    case MVT::v16i1: return X86::VK16RegClassID;
+    case MVT::v32i1: return X86::VK32RegClassID;
+    case MVT::v64i1: return X86::VK64RegClassID;
+    }
+  };
+
+  bool IsMasked = InMask.getNode() != nullptr;
+
+  SDLoc dl(Root);
+
+  MVT ResVT = Setcc.getSimpleValueType();
+  MVT MaskVT = ResVT;
+  if (Widen) {
+    // Widen the inputs using insert_subreg or copy_to_regclass.
+    unsigned Scale = CmpVT.is128BitVector() ? 4 : 2;
+    unsigned SubReg = CmpVT.is128BitVector() ? X86::sub_xmm : X86::sub_ymm;
+    unsigned NumElts = CmpVT.getVectorNumElements() * Scale;
+    CmpVT = MVT::getVectorVT(CmpSVT, NumElts);
+    MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
+    SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl,
+                                                     CmpVT), 0);
+    Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0);
+
+    assert(!FoldedLoad && "Shouldn't have folded the load");
+    if (!FoldedBCast)
+      Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1);
+
+    if (IsMasked) {
+      // Widen the mask.
+      unsigned RegClass = getMaskRC(MaskVT);
+      SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
+      InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+                                              dl, MaskVT, InMask, RC), 0);
+    }
+  }
+
+  bool IsTestN = CC == ISD::SETEQ;
+  unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast,
+                               IsMasked);
+
+  MachineSDNode *CNode;
+  if (FoldedLoad || FoldedBCast) {
+    SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other);
+
+    if (IsMasked) {
+      SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
+                        Load.getOperand(0) };
+      CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
+    } else {
+      SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
+                        Load.getOperand(0) };
+      CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
+    }
+
+    // Update the chain.
+    ReplaceUses(Load.getValue(1), SDValue(CNode, 1));
+    // Record the mem-refs
+    CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(Load)->getMemOperand()});
+  } else {
+    if (IsMasked)
+      CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1);
+    else
+      CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1);
+  }
+
+  // If we widened, we need to shrink the mask VT.
+  if (Widen) {
+    unsigned RegClass = getMaskRC(ResVT);
+    SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
+    CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+                                   dl, ResVT, SDValue(CNode, 0), RC);
+  }
+
+  ReplaceUses(SDValue(Root, 0), SDValue(CNode, 0));
+  CurDAG->RemoveDeadNode(Root);
+  return true;
+}
+
 void X86DAGToDAGISel::Select(SDNode *Node) {
   MVT NVT = Node->getSimpleValueType(0);
   unsigned Opcode = Node->getOpcode();
@@ -3346,6 +4159,61 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
 
   switch (Opcode) {
   default: break;
+  case ISD::INTRINSIC_VOID: {
+    unsigned IntNo = Node->getConstantOperandVal(1);
+    switch (IntNo) {
+    default: break;
+    case Intrinsic::x86_sse3_monitor:
+    case Intrinsic::x86_monitorx:
+    case Intrinsic::x86_clzero: {
+      bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64;
+
+      unsigned Opc = 0;
+      switch (IntNo) {
+      case Intrinsic::x86_sse3_monitor:
+        if (!Subtarget->hasSSE3())
+          break;
+        Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr;
+        break;
+      case Intrinsic::x86_monitorx:
+        if (!Subtarget->hasMWAITX())
+          break;
+        Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr;
+        break;
+      case Intrinsic::x86_clzero:
+        if (!Subtarget->hasCLZERO())
+          break;
+        Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r;
+        break;
+      }
+
+      if (Opc) {
+        unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX;
+        SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg,
+                                             Node->getOperand(2), SDValue());
+        SDValue InFlag = Chain.getValue(1);
+
+        if (IntNo == Intrinsic::x86_sse3_monitor ||
+            IntNo == Intrinsic::x86_monitorx) {
+          // Copy the other two operands to ECX and EDX.
+          Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3),
+                                       InFlag);
+          InFlag = Chain.getValue(1);
+          Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4),
+                                       InFlag);
+          InFlag = Chain.getValue(1);
+        }
+
+        MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
+                                                      { Chain, InFlag});
+        ReplaceNode(Node, CNode);
+        return;
+      }
+    }
+    }
+
+    break;
+  }
   case ISD::BRIND: {
     if (Subtarget->isTargetNaCl())
       // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
@@ -3381,13 +4249,17 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     }
     break;
 
-  case X86ISD::BLENDV: {
-    // BLENDV selects like a regular VSELECT.
-    SDValue VSelect = CurDAG->getNode(
-        ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
+  case ISD::VSELECT: {
+    // Replace VSELECT with non-mask conditions with with BLENDV.
+    if (Node->getOperand(0).getValueType().getVectorElementType() == MVT::i1)
+      break;
+
+    assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!");
+    SDValue Blendv = CurDAG->getNode(
+        X86ISD::BLENDV, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
         Node->getOperand(1), Node->getOperand(2));
-    ReplaceNode(Node, VSelect.getNode());
-    SelectCode(VSelect.getNode());
+    ReplaceNode(Node, Blendv.getNode());
+    SelectCode(Blendv.getNode());
     // We already called ReplaceUses.
     return;
   }
@@ -3403,6 +4275,18 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     break;
 
   case ISD::AND:
+    if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) {
+      // Try to form a masked VPTESTM. Operands can be in either order.
+      SDValue N0 = Node->getOperand(0);
+      SDValue N1 = Node->getOperand(1);
+      if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() &&
+          tryVPTESTM(Node, N0, N1))
+        return;
+      if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
+          tryVPTESTM(Node, N1, N0))
+        return;
+    }
+
     if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
       ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
       CurDAG->RemoveDeadNode(Node);
@@ -3415,89 +4299,113 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
 
     LLVM_FALLTHROUGH;
   case ISD::OR:
-  case ISD::XOR: {
-
-    // For operations of the form (x << C1) op C2, check if we can use a smaller
-    // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
-    SDValue N0 = Node->getOperand(0);
-    SDValue N1 = Node->getOperand(1);
+  case ISD::XOR:
+    if (tryShrinkShlLogicImm(Node))
+      return;
 
-    if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
+    LLVM_FALLTHROUGH;
+  case ISD::ADD:
+  case ISD::SUB: {
+    // Try to avoid folding immediates with multiple uses for optsize.
+    // This code tries to select to register form directly to avoid going
+    // through the isel table which might fold the immediate. We can't change
+    // the patterns on the add/sub/and/or/xor with immediate paterns in the
+    // tablegen files to check immediate use count without making the patterns
+    // unavailable to the fast-isel table.
+    if (!OptForSize)
       break;
 
-    // i8 is unshrinkable, i16 should be promoted to i32.
-    if (NVT != MVT::i32 && NVT != MVT::i64)
+    // Only handle i8/i16/i32/i64.
+    if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64)
       break;
 
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+
     ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
-    ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
-    if (!Cst || !ShlCst)
+    if (!Cst)
       break;
 
     int64_t Val = Cst->getSExtValue();
-    uint64_t ShlVal = ShlCst->getZExtValue();
 
-    // Make sure that we don't change the operation by removing bits.
-    // This only matters for OR and XOR, AND is unaffected.
-    uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
-    if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
+    // Make sure its an immediate that is considered foldable.
+    // FIXME: Handle unsigned 32 bit immediates for 64-bit AND.
+    if (!isInt<8>(Val) && !isInt<32>(Val))
       break;
 
-    unsigned ShlOp, AddOp, Op;
-    MVT CstVT = NVT;
-
-    // Check the minimum bitwidth for the new constant.
-    // TODO: AND32ri is the same as AND64ri32 with zext imm.
-    // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
-    // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
-    if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
-      CstVT = MVT::i8;
-    else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
-      CstVT = MVT::i32;
-
-    // Bail if there is no smaller encoding.
-    if (NVT == CstVT)
+    // Check if we should avoid folding this immediate.
+    if (!shouldAvoidImmediateInstFormsForSize(N1.getNode()))
       break;
 
+    // We should not fold the immediate. So we need a register form instead.
+    unsigned ROpc, MOpc;
     switch (NVT.SimpleTy) {
-    default: llvm_unreachable("Unsupported VT!");
+    default: llvm_unreachable("Unexpected VT!");
+    case MVT::i8:
+      switch (Opcode) {
+      default: llvm_unreachable("Unexpected opcode!");
+      case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm; break;
+      case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm; break;
+      case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm; break;
+      case ISD::OR:  ROpc = X86::OR8rr;  MOpc = X86::OR8rm;  break;
+      case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm; break;
+      }
+      break;
+    case MVT::i16:
+      switch (Opcode) {
+      default: llvm_unreachable("Unexpected opcode!");
+      case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm; break;
+      case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm; break;
+      case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm; break;
+      case ISD::OR:  ROpc = X86::OR16rr;  MOpc = X86::OR16rm;  break;
+      case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm; break;
+      }
+      break;
     case MVT::i32:
-      assert(CstVT == MVT::i8);
-      ShlOp = X86::SHL32ri;
-      AddOp = X86::ADD32rr;
-
       switch (Opcode) {
-      default: llvm_unreachable("Impossible opcode");
-      case ISD::AND: Op = X86::AND32ri8; break;
-      case ISD::OR:  Op =  X86::OR32ri8; break;
-      case ISD::XOR: Op = X86::XOR32ri8; break;
+      default: llvm_unreachable("Unexpected opcode!");
+      case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm; break;
+      case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm; break;
+      case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm; break;
+      case ISD::OR:  ROpc = X86::OR32rr;  MOpc = X86::OR32rm;  break;
+      case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm; break;
       }
       break;
     case MVT::i64:
-      assert(CstVT == MVT::i8 || CstVT == MVT::i32);
-      ShlOp = X86::SHL64ri;
-      AddOp = X86::ADD64rr;
-
       switch (Opcode) {
-      default: llvm_unreachable("Impossible opcode");
-      case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
-      case ISD::OR:  Op = CstVT==MVT::i8?  X86::OR64ri8 :  X86::OR64ri32; break;
-      case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
+      default: llvm_unreachable("Unexpected opcode!");
+      case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm; break;
+      case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm; break;
+      case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm; break;
+      case ISD::OR:  ROpc = X86::OR64rr;  MOpc = X86::OR64rm;  break;
+      case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm; break;
       }
       break;
     }
 
-    // Emit the smaller op and the shift.
-    SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, CstVT);
-    SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
-    if (ShlVal == 1)
-      CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0),
-                           SDValue(New, 0));
-    else
-      CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
-                           getI8Imm(ShlVal, dl));
+    // Ok this is a AND/OR/XOR/ADD/SUB with constant.
+
+    // If this is a not a subtract, we can still try to fold a load.
+    if (Opcode != ISD::SUB) {
+      SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+      if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+        SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
+        SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
+        MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+        // Update the chain.
+        ReplaceUses(N0.getValue(1), SDValue(CNode, 2));
+        // Record the mem-refs
+        CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N0)->getMemOperand()});
+        ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
+        CurDAG->RemoveDeadNode(Node);
+        return;
+      }
+    }
+
+    CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1);
     return;
   }
+
   case X86ISD::SMUL:
     // i16/i32/i64 are handled with isel patterns.
     if (NVT != MVT::i8)
@@ -3895,7 +4803,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
           unsigned TrailingZeros = countTrailingZeros(Mask);
           SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64);
           SDValue Shift =
-            SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64,
+            SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, MVT::i32,
                                            N0.getOperand(0), Imm), 0);
           MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
                                                        MVT::i32, Shift, Shift);
@@ -3906,7 +4814,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
           unsigned LeadingZeros = countLeadingZeros(Mask);
           SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64);
           SDValue Shift =
-            SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64,
+            SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, MVT::i32,
                                            N0.getOperand(0), Imm), 0);
           MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
                                                        MVT::i32, Shift, Shift);
@@ -3964,8 +4872,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
         break;
       }
 
-      // FIXME: We should be able to fold loads here.
-
       SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
       SDValue Reg = N0.getOperand(0);
 
@@ -4058,10 +4964,46 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     return;
   }
 
+  case ISD::SETCC: {
+    if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue()))
+      return;
+
+    break;
+  }
+
   case ISD::STORE:
     if (foldLoadStoreIntoMemOperand(Node))
       return;
     break;
+  case ISD::FCEIL:
+  case ISD::FFLOOR:
+  case ISD::FTRUNC:
+  case ISD::FNEARBYINT:
+  case ISD::FRINT: {
+    // Replace fp rounding with their X86 specific equivalent so we don't
+    // need 2 sets of patterns.
+    // FIXME: This can only happen when the nodes started as STRICT_* and have
+    // been mutated into their non-STRICT equivalents. Eventually this
+    // mutation will be removed and we should switch the STRICT_ nodes to a
+    // strict version of RNDSCALE in PreProcessISelDAG.
+    unsigned Imm;
+    switch (Node->getOpcode()) {
+    default: llvm_unreachable("Unexpected opcode!");
+    case ISD::FCEIL:      Imm = 0xA; break;
+    case ISD::FFLOOR:     Imm = 0x9; break;
+    case ISD::FTRUNC:     Imm = 0xB; break;
+    case ISD::FNEARBYINT: Imm = 0xC; break;
+    case ISD::FRINT:      Imm = 0x4; break;
+    }
+    SDLoc dl(Node);
+    SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
+                                  Node->getValueType(0),
+                                  Node->getOperand(0),
+                                  CurDAG->getConstant(Imm, dl, MVT::i8));
+    ReplaceNode(Node, Res.getNode());
+    SelectCode(Res.getNode());
+    return;
+  }
   }
 
   SelectCode(Node);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b6a692ee187d..0b4bf687e6cf 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1,9 +1,8 @@
 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -131,7 +130,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       addBypassSlowDiv(64, 32);
   }
 
-  if (Subtarget.isTargetKnownWindowsMSVC() ||
+  if (Subtarget.isTargetWindowsMSVC() ||
       Subtarget.isTargetWindowsItanium()) {
     // Setup Windows compiler runtime calls.
     setLibcallName(RTLIB::SDIV_I64, "_alldiv");
@@ -159,6 +158,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setUseUnderscoreLongJmp(true);
   }
 
+  // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
+  // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
+  // FIXME: Should we be limitting the atomic size on other configs? Default is
+  // 1024.
+  if (!Subtarget.hasCmpxchg8b())
+    setMaxAtomicSizeInBitsSupported(32);
+
   // Set up the register classes.
   addRegisterClass(MVT::i8, &X86::GR8RegClass);
   addRegisterClass(MVT::i16, &X86::GR16RegClass);
@@ -190,10 +196,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   // Integer absolute.
   if (Subtarget.hasCMov()) {
     setOperationAction(ISD::ABS            , MVT::i16  , Custom);
-    setOperationAction(ISD::ABS            , MVT::i32  , Custom);
-    if (Subtarget.is64Bit())
-      setOperationAction(ISD::ABS          , MVT::i64  , Custom);
+    setOperationAction(ISD::ABS            , MVT::i32  , Custom); 
   }
+  setOperationAction(ISD::ABS              , MVT::i64  , Custom);
 
   // Funnel shifts.
   for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
@@ -258,14 +263,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
     setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);
 
-    if (X86ScalarSSEf32) {
-      setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
-      // f32 and f64 cases are Legal, f80 case is not
-      setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
-    } else {
-      setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
-      setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
-    }
+    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
+    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
   } else {
     setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
     setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Expand);
@@ -415,6 +414,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::CTPOP          , MVT::i32  , Expand);
     if (Subtarget.is64Bit())
       setOperationAction(ISD::CTPOP        , MVT::i64  , Expand);
+    else
+      setOperationAction(ISD::CTPOP        , MVT::i64  , Custom);
   }
 
   setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
@@ -486,6 +487,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
   }
 
+  if (!Subtarget.is64Bit())
+    setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
+
   if (Subtarget.hasCmpxchg16b()) {
     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
   }
@@ -530,6 +534,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
                                                      : &X86::FR64RegClass);
 
+    // Disable f32->f64 extload as we can only generate this in one instruction
+    // under optsize. So its easier to pattern match (fpext (load)) for that
+    // case instead of needing to emit 2 instructions for extload in the
+    // non-optsize case.
+    setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+
     for (auto VT : { MVT::f32, MVT::f64 }) {
       // Use ANDPD to simulate FABS.
       setOperationAction(ISD::FABS, VT, Custom);
@@ -668,6 +678,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FRINT,  MVT::f80, Expand);
     setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
     setOperationAction(ISD::FMA, MVT::f80, Expand);
+    setOperationAction(ISD::LROUND, MVT::f80, Expand);
+    setOperationAction(ISD::LLROUND, MVT::f80, Expand);
+    setOperationAction(ISD::LRINT, MVT::f80, Expand);
+    setOperationAction(ISD::LLRINT, MVT::f80, Expand);
   }
 
   // Always use a library call for pow.
@@ -780,6 +794,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
     setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Custom);
+
+    setOperationAction(ISD::LOAD,               MVT::v2f32, Custom);
+    setOperationAction(ISD::STORE,              MVT::v2f32, Custom);
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
@@ -841,6 +858,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::SADDSAT,            MVT::v8i16, Legal);
     setOperationAction(ISD::USUBSAT,            MVT::v8i16, Legal);
     setOperationAction(ISD::SSUBSAT,            MVT::v8i16, Legal);
+    setOperationAction(ISD::UADDSAT,            MVT::v4i32, Custom);
+    setOperationAction(ISD::USUBSAT,            MVT::v4i32, Custom);
+    setOperationAction(ISD::UADDSAT,            MVT::v2i64, Custom);
+    setOperationAction(ISD::USUBSAT,            MVT::v2i64, Custom);
 
     if (!ExperimentalVectorWideningLegalization) {
       // Use widening instead of promotion.
@@ -950,17 +971,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FP_EXTEND,          MVT::v2f32, Custom);
     setOperationAction(ISD::FP_ROUND,           MVT::v2f32, Custom);
 
-    for (MVT VT : MVT::fp_vector_valuetypes())
-      setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
-
     // We want to legalize this to an f64 load rather than an i64 load on
     // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
     // store.
-    setOperationAction(ISD::LOAD,               MVT::v2f32, Custom);
     setOperationAction(ISD::LOAD,               MVT::v2i32, Custom);
     setOperationAction(ISD::LOAD,               MVT::v4i16, Custom);
     setOperationAction(ISD::LOAD,               MVT::v8i8,  Custom);
-    setOperationAction(ISD::STORE,              MVT::v2f32, Custom);
     setOperationAction(ISD::STORE,              MVT::v2i32, Custom);
     setOperationAction(ISD::STORE,              MVT::v4i16, Custom);
     setOperationAction(ISD::STORE,              MVT::v8i8,  Custom);
@@ -1128,14 +1144,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FP_TO_SINT,         MVT::v8i32, Legal);
 
     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i32, Legal);
-    setOperationAction(ISD::FP_ROUND,           MVT::v4f32, Legal);
 
     if (!Subtarget.hasAVX512())
       setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
 
-    for (MVT VT : MVT::fp_vector_valuetypes())
-      setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
-
     // In the customized shift lowering, the legal v8i32/v4i64 cases
     // in AVX2 will be recognized.
     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
@@ -1144,13 +1156,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::SRA, VT, Custom);
     }
 
-    if (ExperimentalVectorWideningLegalization) {
-      // These types need custom splitting if their input is a 128-bit vector.
-      setOperationAction(ISD::SIGN_EXTEND,       MVT::v8i64,  Custom);
-      setOperationAction(ISD::SIGN_EXTEND,       MVT::v16i32, Custom);
-      setOperationAction(ISD::ZERO_EXTEND,       MVT::v8i64,  Custom);
-      setOperationAction(ISD::ZERO_EXTEND,       MVT::v16i32, Custom);
-    }
+    // These types need custom splitting if their input is a 128-bit vector.
+    setOperationAction(ISD::SIGN_EXTEND,       MVT::v8i64,  Custom);
+    setOperationAction(ISD::SIGN_EXTEND,       MVT::v16i32, Custom);
+    setOperationAction(ISD::ZERO_EXTEND,       MVT::v8i64,  Custom);
+    setOperationAction(ISD::ZERO_EXTEND,       MVT::v16i32, Custom);
 
     setOperationAction(ISD::ROTL,              MVT::v8i32,  Custom);
     setOperationAction(ISD::ROTL,              MVT::v16i16, Custom);
@@ -1182,9 +1192,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::CTPOP,           VT, Custom);
       setOperationAction(ISD::CTLZ,            VT, Custom);
 
-      // TODO - remove this once 256-bit X86ISD::ANDNP correctly split.
-      setOperationAction(ISD::CTTZ,  VT, HasInt256 ? Expand : Custom);
-
       // The condition codes aren't legal in SSE/AVX and under AVX512 we use
       // setcc all the way to isel and prefer SETGT in some isel patterns.
       setCondCodeAction(ISD::SETLT, VT, Custom);
@@ -1260,7 +1267,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
 
     for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
                      MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
-      setOperationAction(ISD::MLOAD,  VT, Legal);
+      setOperationAction(ISD::MLOAD,  VT, Subtarget.hasVLX() ? Legal : Custom);
       setOperationAction(ISD::MSTORE, VT, Legal);
     }
 
@@ -1282,6 +1289,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
       setOperationAction(ISD::INSERT_SUBVECTOR,   VT, Legal);
       setOperationAction(ISD::CONCAT_VECTORS,     VT, Custom);
+      setOperationAction(ISD::STORE,              VT, Custom);
     }
 
     if (HasInt256)
@@ -1352,19 +1360,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::SSUBSAT,          VT, Custom);
 
       setOperationAction(ISD::BUILD_VECTOR,     VT, Custom);
+      setOperationAction(ISD::CONCAT_VECTORS,   VT, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+      setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
       setOperationAction(ISD::VECTOR_SHUFFLE,   VT,  Custom);
       setOperationAction(ISD::VSELECT,          VT,  Expand);
     }
 
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i1, Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i1,  Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v4i1,  Custom);
-    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v2i1,  Custom);
-    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v4i1,  Custom);
-    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v8i1,  Custom);
-    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v16i1, Custom);
     for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
   }
@@ -1378,9 +1381,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     addRegisterClass(MVT::v8i64,  &X86::VR512RegClass);
     addRegisterClass(MVT::v8f64,  &X86::VR512RegClass);
 
-    for (MVT VT : MVT::fp_vector_valuetypes())
-      setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
-
     for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
       setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8,  Legal);
       setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
@@ -1413,10 +1413,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setTruncStoreAction(MVT::v16i32,  MVT::v16i8,  Legal);
     setTruncStoreAction(MVT::v16i32,  MVT::v16i16, Legal);
 
+    // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
+    // to 512-bit rather than use the AVX2 instructions so that we can use
+    // k-masks.
     if (!Subtarget.hasVLX()) {
-      // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
-      // to 512-bit rather than use the AVX2 instructions so that we can use
-      // k-masks.
       for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
            MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
         setOperationAction(ISD::MLOAD,  VT, Custom);
@@ -1446,6 +1446,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FTRUNC,           VT, Legal);
       setOperationAction(ISD::FRINT,            VT, Legal);
       setOperationAction(ISD::FNEARBYINT,       VT, Legal);
+
+      setOperationAction(ISD::SELECT,           VT, Custom);
     }
 
     // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
@@ -1465,13 +1467,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::MULHU,              MVT::v16i32,  Custom);
     setOperationAction(ISD::MULHS,              MVT::v16i32,  Custom);
 
-    setOperationAction(ISD::SELECT,             MVT::v8f64, Custom);
-    setOperationAction(ISD::SELECT,             MVT::v8i64, Custom);
-    setOperationAction(ISD::SELECT,             MVT::v16i32, Custom);
-    setOperationAction(ISD::SELECT,             MVT::v32i16, Custom);
-    setOperationAction(ISD::SELECT,             MVT::v64i8, Custom);
-    setOperationAction(ISD::SELECT,             MVT::v16f32, Custom);
-
     for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
       setOperationAction(ISD::SMAX,             VT, Legal);
       setOperationAction(ISD::UMAX,             VT, Legal);
@@ -1485,6 +1480,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::ROTL,             VT, Custom);
       setOperationAction(ISD::ROTR,             VT, Custom);
       setOperationAction(ISD::SETCC,            VT, Custom);
+      setOperationAction(ISD::SELECT,           VT, Custom);
 
       // The condition codes aren't legal in SSE/AVX and under AVX512 we use
       // setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -1705,6 +1701,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::SADDSAT,      VT, Legal);
       setOperationAction(ISD::USUBSAT,      VT, Legal);
       setOperationAction(ISD::SSUBSAT,      VT, Legal);
+      setOperationAction(ISD::SELECT,       VT, Custom);
 
       // The condition codes aren't legal in SSE/AVX and under AVX512 we use
       // setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -1788,7 +1785,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
   if (!Subtarget.is64Bit()) {
     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
-    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
   }
 
   // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
@@ -1842,8 +1838,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   // is. We should promote the value to 64-bits to solve this.
   // This is what the CRT headers do - `fmodf` is an inline header
   // function casting to f64 and calling `fmod`.
-  if (Subtarget.is32Bit() && (Subtarget.isTargetKnownWindowsMSVC() ||
-                              Subtarget.isTargetWindowsItanium()))
+  if (Subtarget.is32Bit() &&
+      (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
     for (ISD::NodeType Op :
          {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
           ISD::FLOG10, ISD::FPOW, ISD::FSIN})
@@ -1854,6 +1850,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
   setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+  setTargetDAGCombine(ISD::CONCAT_VECTORS);
   setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
   setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
   setTargetDAGCombine(ISD::BITCAST);
@@ -1881,6 +1878,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setTargetDAGCombine(ISD::ANY_EXTEND);
   setTargetDAGCombine(ISD::SIGN_EXTEND);
   setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
+  setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
+  setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
+  setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
   setTargetDAGCombine(ISD::SINT_TO_FP);
   setTargetDAGCombine(ISD::UINT_TO_FP);
   setTargetDAGCombine(ISD::SETCC);
@@ -2050,20 +2050,19 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
 /// source is constant so it does not need to be loaded.
 /// It returns EVT::Other if the type should be determined using generic
 /// target-independent logic.
-EVT
-X86TargetLowering::getOptimalMemOpType(uint64_t Size,
-                                       unsigned DstAlign, unsigned SrcAlign,
-                                       bool IsMemset, bool ZeroMemset,
-                                       bool MemcpyStrSrc,
-                                       MachineFunction &MF) const {
-  const Function &F = MF.getFunction();
-  if (!F.hasFnAttribute(Attribute::NoImplicitFloat)) {
-    if (Size >= 16 &&
-        (!Subtarget.isUnalignedMem16Slow() ||
-         ((DstAlign == 0 || DstAlign >= 16) &&
-          (SrcAlign == 0 || SrcAlign >= 16)))) {
+/// For vector ops we check that the overall size isn't larger than our
+/// preferred vector width.
+EVT X86TargetLowering::getOptimalMemOpType(
+    uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+    bool ZeroMemset, bool MemcpyStrSrc,
+    const AttributeList &FuncAttributes) const {
+  if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
+    if (Size >= 16 && (!Subtarget.isUnalignedMem16Slow() ||
+                       ((DstAlign == 0 || DstAlign >= 16) &&
+                        (SrcAlign == 0 || SrcAlign >= 16)))) {
       // FIXME: Check if unaligned 32-byte accesses are slow.
-      if (Size >= 32 && Subtarget.hasAVX()) {
+      if (Size >= 32 && Subtarget.hasAVX() &&
+          (Subtarget.getPreferVectorWidth() >= 256)) {
         // Although this isn't a well-supported type for AVX1, we'll let
         // legalization and shuffle lowering produce the optimal codegen. If we
         // choose an optimal type with a vector element larger than a byte,
@@ -2071,11 +2070,12 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
         // multiply) before we splat as a vector.
         return MVT::v32i8;
       }
-      if (Subtarget.hasSSE2())
+      if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
         return MVT::v16i8;
       // TODO: Can SSE1 handle a byte vector?
       // If we have SSE1 registers we should be able to use them.
-      if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()))
+      if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
+          (Subtarget.getPreferVectorWidth() >= 128))
         return MVT::v4f32;
     } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
                !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
@@ -2104,11 +2104,9 @@ bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
   return true;
 }
 
-bool
-X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
-                                                  unsigned,
-                                                  unsigned,
-                                                  bool *Fast) const {
+bool X86TargetLowering::allowsMisalignedMemoryAccesses(
+    EVT VT, unsigned, unsigned Align, MachineMemOperand::Flags Flags,
+    bool *Fast) const {
   if (Fast) {
     switch (VT.getSizeInBits()) {
     default:
@@ -2124,6 +2122,16 @@ X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
     // TODO: What about AVX-512 (512-bit) accesses?
     }
   }
+  // NonTemporal vector memory ops must be aligned.
+  if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
+    // NT loads can only be vector aligned, so if its less aligned than the
+    // minimum vector size (which we can split the vector down to), we might as
+    // well use a regular unaligned vector load.
+    // We don't have any NT loads pre-SSE41.
+    if (!!(Flags & MachineMemOperand::MOLoad))
+      return (Align < 16 || !Subtarget.hasSSE41());
+    return false;
+  }
   // Misaligned accesses of any size are always allowed.
   return true;
 }
@@ -2281,12 +2289,13 @@ void X86TargetLowering::insertSSPDeclarations(Module &M) const {
                         Type::getInt8PtrTy(M.getContext()));
 
     // MSVC CRT has a function to validate security cookie.
-    auto *SecurityCheckCookie = cast<Function>(
-        M.getOrInsertFunction("__security_check_cookie",
-                              Type::getVoidTy(M.getContext()),
-                              Type::getInt8PtrTy(M.getContext())));
-    SecurityCheckCookie->setCallingConv(CallingConv::X86_FastCall);
-    SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
+    FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
+        "__security_check_cookie", Type::getVoidTy(M.getContext()),
+        Type::getInt8PtrTy(M.getContext()));
+    if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
+      F->setCallingConv(CallingConv::X86_FastCall);
+      F->addAttribute(1, Attribute::AttrKind::InReg);
+    }
     return;
   }
   // glibc, bionic, and Fuchsia have a special slot for the stack guard.
@@ -2304,7 +2313,7 @@ Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
   return TargetLowering::getSDagStackGuard(M);
 }
 
-Value *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
+Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
   // MSVC CRT has a function to validate security cookie.
   if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
       Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
@@ -2347,8 +2356,6 @@ bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
 //               Return Value Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
-#include "X86GenCallingConv.inc"
-
 bool X86TargetLowering::CanLowerReturn(
     CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
@@ -2703,7 +2710,6 @@ static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
          "The values should reside in two registers");
 
   SDValue Lo, Hi;
-  unsigned Reg;
   SDValue ArgValueLo, ArgValueHi;
 
   MachineFunction &MF = DAG.getMachineFunction();
@@ -2713,7 +2719,7 @@ static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
   if (nullptr == InFlag) {
     // When no physical register is present,
     // create an intermediate virtual register.
-    Reg = MF.addLiveIn(VA.getLocReg(), RC);
+    unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
     ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
     ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
@@ -2934,6 +2940,8 @@ static bool mayTailCallThisCC(CallingConv::ID CC) {
   case CallingConv::X86_StdCall:
   case CallingConv::X86_VectorCall:
   case CallingConv::X86_FastCall:
+  // Swift:
+  case CallingConv::Swift:
     return true;
   default:
     return canGuaranteeTCO(CC);
@@ -2986,22 +2994,6 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
   else
     ValVT = VA.getValVT();
 
-  // Calculate SP offset of interrupt parameter, re-arrange the slot normally
-  // taken by a return address.
-  int Offset = 0;
-  if (CallConv == CallingConv::X86_INTR) {
-    // X86 interrupts may take one or two arguments.
-    // On the stack there will be no return address as in regular call.
-    // Offset of last argument need to be set to -4/-8 bytes.
-    // Where offset of the first argument out of two, should be set to 0 bytes.
-    Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
-    if (Subtarget.is64Bit() && Ins.size() == 2) {
-      // The stack pointer needs to be realigned for 64 bit handlers with error
-      // code, so the argument offset changes by 8 bytes.
-      Offset += 8;
-    }
-  }
-
   // FIXME: For now, all byval parameter objects are marked mutable. This can be
   // changed with more analysis.
   // In case of tail call optimization mark all arguments mutable. Since they
@@ -3014,15 +3006,15 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
     // can be improved with deeper analysis.
     int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
                                    /*isAliased=*/true);
-    // Adjust SP offset of interrupt parameter.
-    if (CallConv == CallingConv::X86_INTR) {
-      MFI.setObjectOffset(FI, Offset);
-    }
     return DAG.getFrameIndex(FI, PtrVT);
   }
 
   // This is an argument in memory. We might be able to perform copy elision.
-  if (Flags.isCopyElisionCandidate()) {
+  // If the argument is passed directly in memory without any extension, then we
+  // can perform copy elision. Large vector types, for example, may be passed
+  // indirectly by pointer.
+  if (Flags.isCopyElisionCandidate() &&
+      VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem) {
     EVT ArgVT = Ins[i].ArgVT;
     SDValue PartAddr;
     if (Ins[i].PartOffset == 0) {
@@ -3031,7 +3023,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
       // load from our portion of it. This assumes that if the first part of an
       // argument is in memory, the rest will also be in memory.
       int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
-                                     /*Immutable=*/false);
+                                     /*IsImmutable=*/false);
       PartAddr = DAG.getFrameIndex(FI, PtrVT);
       return DAG.getLoad(
           ValVT, dl, Chain, PartAddr,
@@ -3072,11 +3064,6 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
     MFI.setObjectSExt(FI, true);
   }
 
-  // Adjust SP offset of interrupt parameter.
-  if (CallConv == CallingConv::X86_INTR) {
-    MFI.setObjectOffset(FI, Offset);
-  }
-
   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
   SDValue Val = DAG.getLoad(
       ValVT, dl, Chain, FIN,
@@ -3166,14 +3153,6 @@ SDValue X86TargetLowering::LowerFormalArguments(
       !(isVarArg && canGuaranteeTCO(CallConv)) &&
       "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
 
-  if (CallConv == CallingConv::X86_INTR) {
-    bool isLegal = Ins.size() == 1 ||
-                   (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
-                                        (!Is64Bit && Ins[1].VT == MVT::i32)));
-    if (!isLegal)
-      report_fatal_error("X86 interrupts may take one or two arguments");
-  }
-
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
@@ -3454,11 +3433,11 @@ SDValue X86TargetLowering::LowerFormalArguments(
     }
 
     // Copy all forwards from physical to virtual registers.
-    for (ForwardedRegister &F : Forwards) {
+    for (ForwardedRegister &FR : Forwards) {
       // FIXME: Can we use a less constrained schedule?
-      SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
-      F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
-      Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
+      SDValue RegVal = DAG.getCopyFromReg(Chain, dl, FR.VReg, FR.VT);
+      FR.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(FR.VT));
+      Chain = DAG.getCopyToReg(Chain, dl, FR.VReg, RegVal);
     }
   }
 
@@ -3610,6 +3589,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   const Module *M = MF.getMMI().getModule();
   Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
 
+  MachineFunction::CallSiteInfo CSInfo;
+
   if (CallConv == CallingConv::X86_INTR)
     report_fatal_error("X86 interrupts may not be called directly");
 
@@ -3805,6 +3786,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                          Subtarget);
     } else if (VA.isRegLoc()) {
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+      const TargetOptions &Options = DAG.getTarget().Options;
+      if (Options.EnableDebugEntryValues)
+        CSInfo.emplace_back(VA.getLocReg(), I);
       if (isVarArg && IsWin64) {
         // Win64 ABI requires argument XMM reg to be copied to the corresponding
         // shadow reg if callee is a varargs function.
@@ -3975,46 +3959,13 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // through a register, since the call instruction's 32-bit
     // pc-relative offset may not be large enough to hold the whole
     // address.
-  } else if (Callee->getOpcode() == ISD::GlobalAddress) {
-    // If the callee is a GlobalAddress node (quite common, every direct call
-    // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
-    // it.
-    GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
-
-    // We should use extra load for direct calls to dllimported functions in
-    // non-JIT mode.
-    const GlobalValue *GV = G->getGlobal();
-    if (!GV->hasDLLImportStorageClass()) {
-      unsigned char OpFlags = Subtarget.classifyGlobalFunctionReference(GV);
-
-      Callee = DAG.getTargetGlobalAddress(
-          GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags);
-
-      if (OpFlags == X86II::MO_GOTPCREL) {
-        // Add a wrapper.
-        Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
-          getPointerTy(DAG.getDataLayout()), Callee);
-        // Add extra indirection
-        Callee = DAG.getLoad(
-            getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
-            MachinePointerInfo::getGOT(DAG.getMachineFunction()));
-      }
-    }
-  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
-    const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
-    unsigned char OpFlags =
-        Subtarget.classifyGlobalFunctionReference(nullptr, *Mod);
-
-    Callee = DAG.getTargetExternalSymbol(
-        S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
-
-    if (OpFlags == X86II::MO_GOTPCREL) {
-      Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
-          getPointerTy(DAG.getDataLayout()), Callee);
-      Callee = DAG.getLoad(
-          getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
-          MachinePointerInfo::getGOT(DAG.getMachineFunction()));
-    }
+  } else if (Callee->getOpcode() == ISD::GlobalAddress ||
+             Callee->getOpcode() == ISD::ExternalSymbol) {
+    // Lower direct calls to global addresses and external symbols. Setting
+    // ForCall to true here has the effect of removing WrapperRIP when possible
+    // to allow direct calls to be selected without first materializing the
+    // address into a register.
+    Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
   } else if (Subtarget.isTarget64BitILP32() &&
              Callee->getValueType(0) == MVT::i32) {
     // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
@@ -4105,7 +4056,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // should be computed from returns not tail calls.  Consider a void
     // function making a tail call to a function returning int.
     MF.getFrameInfo().setHasTailCall();
-    return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
+    SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
+    DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
+    return Ret;
   }
 
   if (HasNoCfCheck && IsCFProtectionSupported) {
@@ -4114,6 +4067,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
   }
   InFlag = Chain.getValue(1);
+  DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
 
   // Create the CALLSEQ_END node.
   unsigned NumBytesForCalleeToPop;
@@ -4787,7 +4741,6 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   if (!IntrData)
     return false;
 
-  Info.opc = ISD::INTRINSIC_W_CHAIN;
   Info.flags = MachineMemOperand::MONone;
   Info.offset = 0;
 
@@ -4795,6 +4748,7 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   case TRUNCATE_TO_MEM_VI8:
   case TRUNCATE_TO_MEM_VI16:
   case TRUNCATE_TO_MEM_VI32: {
+    Info.opc = ISD::INTRINSIC_VOID;
     Info.ptrVal = I.getArgOperand(0);
     MVT VT  = MVT::getVT(I.getArgOperand(1)->getType());
     MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
@@ -4810,6 +4764,31 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.flags |= MachineMemOperand::MOStore;
     break;
   }
+  case GATHER:
+  case GATHER_AVX2: {
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.ptrVal = nullptr;
+    MVT DataVT = MVT::getVT(I.getType());
+    MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
+    unsigned NumElts = std::min(DataVT.getVectorNumElements(),
+                                IndexVT.getVectorNumElements());
+    Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
+    Info.align = 1;
+    Info.flags |= MachineMemOperand::MOLoad;
+    break;
+  }
+  case SCATTER: {
+    Info.opc = ISD::INTRINSIC_VOID;
+    Info.ptrVal = nullptr;
+    MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
+    MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
+    unsigned NumElts = std::min(DataVT.getVectorNumElements(),
+                                IndexVT.getVectorNumElements());
+    Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
+    Info.align = 1;
+    Info.flags |= MachineMemOperand::MOStore;
+    break;
+  }
   default:
     return false;
   }
@@ -4820,7 +4799,8 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
 /// Returns true if the target can instruction select the
 /// specified FP immediate natively. If false, the legalizer will
 /// materialize the FP immediate as a load from a constant pool.
-bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+                                     bool ForCodeSize) const {
   for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
     if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
       return true;
@@ -4837,6 +4817,26 @@ bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
   if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
     if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
       return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
+
+  // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
+  // those uses are extracted directly into a store, then the extract + store
+  // can be store-folded. Therefore, it's probably not worth splitting the load.
+  EVT VT = Load->getValueType(0);
+  if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
+    for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
+      // Skip uses of the chain value. Result 0 of the node is the load value.
+      if (UI.getUse().getResNo() != 0)
+        continue;
+
+      // If this use is not an extract + store, it's probably worth splitting.
+      if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
+          UI->use_begin()->getOpcode() != ISD::STORE)
+        return true;
+    }
+    // All non-chain uses are extract + store.
+    return false;
+  }
+
   return true;
 }
 
@@ -4909,15 +4909,29 @@ bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
 }
 
 bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
+  unsigned Opc = VecOp.getOpcode();
+
+  // Assume target opcodes can't be scalarized.
+  // TODO - do we have any exceptions?
+  if (Opc >= ISD::BUILTIN_OP_END)
+    return false;
+
   // If the vector op is not supported, try to convert to scalar.
   EVT VecVT = VecOp.getValueType();
-  if (!isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), VecVT))
+  if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
     return true;
 
   // If the vector op is supported, but the scalar op is not, the transform may
   // not be worthwhile.
   EVT ScalarVT = VecVT.getScalarType();
-  return isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), ScalarVT);
+  return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
+}
+
+bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT) const {
+  // TODO: Allow vectors?
+  if (VT.isVector())
+    return false;
+  return VT.isSimple() || !isOperationExpand(Opcode, VT);
 }
 
 bool X86TargetLowering::isCheapToSpeculateCttz() const {
@@ -4930,8 +4944,9 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
   return Subtarget.hasLZCNT();
 }
 
-bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT,
-                                                EVT BitcastVT) const {
+bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
+                                                const SelectionDAG &DAG,
+                                                const MachineMemOperand &MMO) const {
   if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
       BitcastVT.getVectorElementType() == MVT::i1)
     return false;
@@ -4939,7 +4954,12 @@ bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT,
   if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
     return false;
 
-  return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT);
+  // If both types are legal vectors, it's always ok to convert them.
+  if (LoadVT.isVector() && BitcastVT.isVector() &&
+      isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
+    return true;
+
+  return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
 }
 
 bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
@@ -4953,6 +4973,10 @@ bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
     unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
     return (MemVT.getSizeInBits() <= MaxIntSize);
   }
+  // Make sure we don't merge greater than our preferred vector
+  // width.
+  if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
+    return false;
   return true;
 }
 
@@ -4998,7 +5022,25 @@ bool X86TargetLowering::hasAndNot(SDValue Y) const {
   return Subtarget.hasSSE2();
 }
 
-bool X86TargetLowering::preferShiftsToClearExtremeBits(SDValue Y) const {
+bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
+    const SDNode *N, CombineLevel Level) const {
+  assert(((N->getOpcode() == ISD::SHL &&
+           N->getOperand(0).getOpcode() == ISD::SRL) ||
+          (N->getOpcode() == ISD::SRL &&
+           N->getOperand(0).getOpcode() == ISD::SHL)) &&
+         "Expected shift-shift mask");
+  EVT VT = N->getValueType(0);
+  if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
+      (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
+    // Only fold if the shift values are equal - so it folds to AND.
+    // TODO - we should fold if either is a non-uniform vector but we don't do
+    // the fold for non-splats yet.
+    return N->getOperand(1) == N->getOperand(0).getOperand(1);
+  }
+  return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
+}
+
+bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
   EVT VT = Y.getValueType();
 
   // For vectors, we don't have a preference, but we probably want a mask.
@@ -5048,8 +5090,8 @@ static bool isUndefOrZero(int Val) {
   return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
 }
 
-/// Return true if every element in Mask, beginning
-/// from position Pos and ending in Pos+Size is the undef sentinel value.
+/// Return true if every element in Mask, beginning from position Pos and ending
+/// in Pos+Size is the undef sentinel value.
 static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
   for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
     if (Mask[i] != SM_SentinelUndef)
@@ -5057,6 +5099,18 @@ static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
   return true;
 }
 
+/// Return true if the mask creates a vector whose lower half is undefined.
+static bool isUndefLowerHalf(ArrayRef<int> Mask) {
+  unsigned NumElts = Mask.size();
+  return isUndefInRange(Mask, 0, NumElts / 2);
+}
+
+/// Return true if the mask creates a vector whose upper half is undefined.
+static bool isUndefUpperHalf(ArrayRef<int> Mask) {
+  unsigned NumElts = Mask.size();
+  return isUndefInRange(Mask, NumElts / 2, NumElts / 2);
+}
+
 /// Return true if Val falls within the specified range (L, H].
 static bool isInRange(int Val, int Low, int Hi) {
   return (Val >= Low && Val < Hi);
@@ -5409,6 +5463,53 @@ static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
                      DAG.getIntPtrConstant(0, dl));
 }
 
+/// Widen a vector to a larger size with the same scalar type, with the new
+/// elements either zero or undef.
+static SDValue widenSubVector(SDValue Vec, bool ZeroNewElements,
+                              const X86Subtarget &Subtarget, SelectionDAG &DAG,
+                              const SDLoc &dl, unsigned WideSizeInBits) {
+  assert(Vec.getValueSizeInBits() < WideSizeInBits &&
+         (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 &&
+         "Unsupported vector widening type");
+  unsigned WideNumElts = WideSizeInBits / Vec.getScalarValueSizeInBits();
+  MVT SVT = Vec.getSimpleValueType().getScalarType();
+  MVT VT = MVT::getVectorVT(SVT, WideNumElts);
+  return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl);
+}
+
+// Helper function to collect subvector ops that are concated together,
+// either by ISD::CONCAT_VECTORS or a ISD::INSERT_SUBVECTOR series.
+// The subvectors in Ops are guaranteed to be the same type.
+static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
+  assert(Ops.empty() && "Expected an empty ops vector");
+
+  if (N->getOpcode() == ISD::CONCAT_VECTORS) {
+    Ops.append(N->op_begin(), N->op_end());
+    return true;
+  }
+
+  if (N->getOpcode() == ISD::INSERT_SUBVECTOR &&
+      isa<ConstantSDNode>(N->getOperand(2))) {
+    SDValue Src = N->getOperand(0);
+    SDValue Sub = N->getOperand(1);
+    const APInt &Idx = N->getConstantOperandAPInt(2);
+    EVT VT = Src.getValueType();
+    EVT SubVT = Sub.getValueType();
+
+    // TODO - Handle more general insert_subvector chains.
+    if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) &&
+        Idx == (VT.getVectorNumElements() / 2) &&
+        Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
+        isNullConstant(Src.getOperand(2))) {
+      Ops.push_back(Src.getOperand(1));
+      Ops.push_back(Sub);
+      return true;
+    }
+  }
+
+  return false;
+}
+
 // Helper for splitting operands of an operation to legal target size and
 // apply a function on each part.
 // Useful for operations that are available on SSE2 in 128-bit, on AVX2 in
@@ -5457,19 +5558,6 @@ SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
 }
 
-// Return true if the instruction zeroes the unused upper part of the
-// destination and accepts mask.
-static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
-  switch (Opcode) {
-  default:
-    return false;
-  case X86ISD::CMPM:
-  case X86ISD::CMPM_RND:
-  case ISD::SETCC:
-    return true;
-  }
-}
-
 /// Insert i1-subvector to i1-vector.
 static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
                                 const X86Subtarget &Subtarget) {
@@ -5626,10 +5714,29 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
   return DAG.getBitcast(VT, Vec);
 }
 
-static SDValue getExtendInVec(bool Signed, const SDLoc &DL, EVT VT, SDValue In,
-                              SelectionDAG &DAG) {
+// Convert *_EXTEND to *_EXTEND_VECTOR_INREG opcode.
+static unsigned getOpcode_EXTEND_VECTOR_INREG(unsigned Opcode) {
+  switch (Opcode) {
+  case ISD::ANY_EXTEND:
+  case ISD::ANY_EXTEND_VECTOR_INREG:
+    return ISD::ANY_EXTEND_VECTOR_INREG;
+  case ISD::ZERO_EXTEND:
+  case ISD::ZERO_EXTEND_VECTOR_INREG:
+    return ISD::ZERO_EXTEND_VECTOR_INREG;
+  case ISD::SIGN_EXTEND:
+  case ISD::SIGN_EXTEND_VECTOR_INREG:
+    return ISD::SIGN_EXTEND_VECTOR_INREG;
+  }
+  llvm_unreachable("Unknown opcode");
+}
+
+static SDValue getExtendInVec(unsigned Opcode, const SDLoc &DL, EVT VT,
+                              SDValue In, SelectionDAG &DAG) {
   EVT InVT = In.getValueType();
   assert(VT.isVector() && InVT.isVector() && "Expected vector VTs.");
+  assert((ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode ||
+          ISD::ZERO_EXTEND == Opcode) &&
+         "Unknown extension opcode");
 
   // For 256-bit vectors, we only need the lower (128-bit) input half.
   // For 512-bit vectors, we only need the lower input half or quarter.
@@ -5642,13 +5749,10 @@ static SDValue getExtendInVec(bool Signed, const SDLoc &DL, EVT VT, SDValue In,
     InVT = In.getValueType();
   }
 
-  if (VT.getVectorNumElements() == InVT.getVectorNumElements())
-    return DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
-                       DL, VT, In);
+  if (VT.getVectorNumElements() != InVT.getVectorNumElements())
+    Opcode = getOpcode_EXTEND_VECTOR_INREG(Opcode);
 
-  return DAG.getNode(Signed ? ISD::SIGN_EXTEND_VECTOR_INREG
-                            : ISD::ZERO_EXTEND_VECTOR_INREG,
-                     DL, VT, In);
+  return DAG.getNode(Opcode, DL, VT, In);
 }
 
 /// Returns a vector_shuffle node for an unpackl operation.
@@ -5686,18 +5790,8 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
   return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
 }
 
-// Peek through EXTRACT_SUBVECTORs - typically used for AVX1 256-bit intops.
-static SDValue peekThroughEXTRACT_SUBVECTORs(SDValue V) {
-  while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR)
-    V = V.getOperand(0);
-  return V;
-}
-
-static const Constant *getTargetConstantFromNode(SDValue Op) {
-  Op = peekThroughBitcasts(Op);
-
-  auto *Load = dyn_cast<LoadSDNode>(Op);
-  if (!Load)
+static const Constant *getTargetConstantFromNode(LoadSDNode *Load) {
+  if (!Load || !ISD::isNormalLoad(Load))
     return nullptr;
 
   SDValue Ptr = Load->getBasePtr();
@@ -5712,6 +5806,17 @@ static const Constant *getTargetConstantFromNode(SDValue Op) {
   return CNode->getConstVal();
 }
 
+static const Constant *getTargetConstantFromNode(SDValue Op) {
+  Op = peekThroughBitcasts(Op);
+  return getTargetConstantFromNode(dyn_cast<LoadSDNode>(Op));
+}
+
+const Constant *
+X86TargetLowering::getTargetConstantFromLoad(LoadSDNode *LD) const {
+  assert(LD && "Unexpected null LoadSDNode");
+  return getTargetConstantFromNode(LD);
+}
+
 // Extract raw constant bits from constant pools.
 static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
                                           APInt &UndefElts,
@@ -5778,8 +5883,7 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
       if (UndefEltBits.getBoolValue() && !AllowPartialUndefs)
         return false;
 
-      APInt Bits = MaskBits.extractBits(EltSizeInBits, BitOffset);
-      EltBits[i] = Bits.getZExtValue();
+      EltBits[i] = MaskBits.extractBits(EltSizeInBits, BitOffset);
     }
     return true;
   };
@@ -5899,6 +6003,19 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
     }
   }
 
+  // Extract constant bits from a subvector broadcast.
+  if (Op.getOpcode() == X86ISD::SUBV_BROADCAST) {
+    SmallVector<APInt, 16> SubEltBits;
+    if (getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
+                                      UndefElts, SubEltBits, AllowWholeUndefs,
+                                      AllowPartialUndefs)) {
+      UndefElts = APInt::getSplat(NumElts, UndefElts);
+      while (EltBits.size() < NumElts)
+        EltBits.append(SubEltBits.begin(), SubEltBits.end());
+      return true;
+    }
+  }
+
   // Extract a rematerialized scalar constant insertion.
   if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
       Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
@@ -5914,6 +6031,29 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
     return CastBitData(UndefSrcElts, SrcEltBits);
   }
 
+  // Insert constant bits from a base and sub vector sources.
+  if (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
+      isa<ConstantSDNode>(Op.getOperand(2))) {
+    // TODO - support insert_subvector through bitcasts.
+    if (EltSizeInBits != VT.getScalarSizeInBits())
+      return false;
+
+    APInt UndefSubElts;
+    SmallVector<APInt, 32> EltSubBits;
+    if (getTargetConstantBitsFromNode(Op.getOperand(1), EltSizeInBits,
+                                      UndefSubElts, EltSubBits,
+                                      AllowWholeUndefs, AllowPartialUndefs) &&
+        getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
+                                      UndefElts, EltBits, AllowWholeUndefs,
+                                      AllowPartialUndefs)) {
+      unsigned BaseIdx = Op.getConstantOperandVal(2);
+      UndefElts.insertBits(UndefSubElts, BaseIdx);
+      for (unsigned i = 0, e = EltSubBits.size(); i != e; ++i)
+        EltBits[BaseIdx + i] = EltSubBits[i];
+      return true;
+    }
+  }
+
   // Extract constant bits from a subvector's source.
   if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
       isa<ConstantSDNode>(Op.getOperand(1))) {
@@ -6068,6 +6208,34 @@ static void getPackDemandedElts(EVT VT, const APInt &DemandedElts,
   }
 }
 
+// Split the demanded elts of a HADD/HSUB node between its operands.
+static void getHorizDemandedElts(EVT VT, const APInt &DemandedElts,
+                                 APInt &DemandedLHS, APInt &DemandedRHS) {
+  int NumLanes = VT.getSizeInBits() / 128;
+  int NumElts = DemandedElts.getBitWidth();
+  int NumEltsPerLane = NumElts / NumLanes;
+  int HalfEltsPerLane = NumEltsPerLane / 2;
+
+  DemandedLHS = APInt::getNullValue(NumElts);
+  DemandedRHS = APInt::getNullValue(NumElts);
+
+  // Map DemandedElts to the horizontal operands.
+  for (int Idx = 0; Idx != NumElts; ++Idx) {
+    if (!DemandedElts[Idx])
+      continue;
+    int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane;
+    int LocalIdx = Idx % NumEltsPerLane;
+    if (LocalIdx < HalfEltsPerLane) {
+      DemandedLHS.setBit(LaneIdx + 2 * LocalIdx + 0);
+      DemandedLHS.setBit(LaneIdx + 2 * LocalIdx + 1);
+    } else {
+      LocalIdx -= HalfEltsPerLane;
+      DemandedRHS.setBit(LaneIdx + 2 * LocalIdx + 0);
+      DemandedRHS.setBit(LaneIdx + 2 * LocalIdx + 1);
+    }
+  }
+}
+
 /// Calculates the shuffle mask corresponding to the target-specific opcode.
 /// If the mask could be calculated, returns it in \p Mask, returns the shuffle
 /// operands in \p Ops, and returns true.
@@ -6468,14 +6636,15 @@ static bool setTargetShuffleZeroElements(SDValue N,
 static bool resolveTargetShuffleInputs(SDValue Op,
                                        SmallVectorImpl<SDValue> &Inputs,
                                        SmallVectorImpl<int> &Mask,
-                                       const SelectionDAG &DAG);
+                                       SelectionDAG &DAG);
 
 // Attempt to decode ops that could be represented as a shuffle mask.
 // The decoded shuffle mask may contain a different number of elements to the
 // destination value type.
-static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
+static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
+                               SmallVectorImpl<int> &Mask,
                                SmallVectorImpl<SDValue> &Ops,
-                               const SelectionDAG &DAG) {
+                               SelectionDAG &DAG) {
   Mask.clear();
   Ops.clear();
 
@@ -6483,8 +6652,9 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
   unsigned NumElts = VT.getVectorNumElements();
   unsigned NumSizeInBits = VT.getSizeInBits();
   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
-  assert((NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 &&
-         "Expected byte aligned value types");
+  if ((NumBitsPerElt % 8) != 0 || (NumSizeInBits % 8) != 0)
+    return false;
+  assert(NumElts == DemandedElts.getBitWidth() && "Unexpected vector size");
 
   unsigned Opcode = N.getOpcode();
   switch (Opcode) {
@@ -6524,6 +6694,40 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
     return true;
   }
   case ISD::OR: {
+    // Inspect each operand at the byte level. We can merge these into a
+    // blend shuffle mask if for each byte at least one is masked out (zero).
+    KnownBits Known0 = DAG.computeKnownBits(N.getOperand(0), DemandedElts);
+    KnownBits Known1 = DAG.computeKnownBits(N.getOperand(1), DemandedElts);
+    if (Known0.One.isNullValue() && Known1.One.isNullValue()) {
+      bool IsByteMask = true;
+      unsigned NumSizeInBytes = NumSizeInBits / 8;
+      unsigned NumBytesPerElt = NumBitsPerElt / 8;
+      APInt ZeroMask = APInt::getNullValue(NumBytesPerElt);
+      APInt SelectMask = APInt::getNullValue(NumBytesPerElt);
+      for (unsigned i = 0; i != NumBytesPerElt && IsByteMask; ++i) {
+        unsigned LHS = Known0.Zero.extractBits(8, i * 8).getZExtValue();
+        unsigned RHS = Known1.Zero.extractBits(8, i * 8).getZExtValue();
+        if (LHS == 255 && RHS == 0)
+          SelectMask.setBit(i);
+        else if (LHS == 255 && RHS == 255)
+          ZeroMask.setBit(i);
+        else if (!(LHS == 0 && RHS == 255))
+          IsByteMask = false;
+      }
+      if (IsByteMask) {
+        for (unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt) {
+          for (unsigned j = 0; j != NumBytesPerElt; ++j) {
+            unsigned Ofs = (SelectMask[j] ? NumSizeInBytes : 0);
+            int Idx = (ZeroMask[j] ? (int)SM_SentinelZero : (i + j + Ofs));
+            Mask.push_back(Idx);
+          }
+        }
+        Ops.push_back(N.getOperand(0));
+        Ops.push_back(N.getOperand(1));
+        return true;
+      }
+    }
+
     // Handle OR(SHUFFLE,SHUFFLE) case where one source is zero and the other
     // is a valid shuffle index.
     SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0));
@@ -6558,9 +6762,6 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
     return true;
   }
   case ISD::INSERT_SUBVECTOR: {
-    // Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(EXTRACT_SUBVECTOR(SRC1)) where
-    // SRC0/SRC1 are both of the same valuetype VT.
-    // TODO - add peekThroughOneUseBitcasts support.
     SDValue Src = N.getOperand(0);
     SDValue Sub = N.getOperand(1);
     EVT SubVT = Sub.getValueType();
@@ -6568,28 +6769,57 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
     if (!isa<ConstantSDNode>(N.getOperand(2)) ||
         !N->isOnlyUserOf(Sub.getNode()))
       return false;
+    uint64_t InsertIdx = N.getConstantOperandVal(2);
+    // Handle INSERT_SUBVECTOR(SRC0, EXTRACT_SUBVECTOR(SRC1)).
+    if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+        Sub.getOperand(0).getValueType() == VT &&
+        isa<ConstantSDNode>(Sub.getOperand(1))) {
+      uint64_t ExtractIdx = Sub.getConstantOperandVal(1);
+      for (int i = 0; i != (int)NumElts; ++i)
+        Mask.push_back(i);
+      for (int i = 0; i != (int)NumSubElts; ++i)
+        Mask[InsertIdx + i] = NumElts + ExtractIdx + i;
+      Ops.push_back(Src);
+      Ops.push_back(Sub.getOperand(0));
+      return true;
+    }
+    // Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(SRC1)).
     SmallVector<int, 64> SubMask;
     SmallVector<SDValue, 2> SubInputs;
-    if (!resolveTargetShuffleInputs(Sub, SubInputs, SubMask, DAG) ||
-        SubMask.size() != NumSubElts)
+    if (!resolveTargetShuffleInputs(peekThroughOneUseBitcasts(Sub), SubInputs,
+                                    SubMask, DAG))
       return false;
+    if (SubMask.size() != NumSubElts) {
+      assert(((SubMask.size() % NumSubElts) == 0 ||
+              (NumSubElts % SubMask.size()) == 0) && "Illegal submask scale");
+      if ((NumSubElts % SubMask.size()) == 0) {
+        int Scale = NumSubElts / SubMask.size();
+        SmallVector<int,64> ScaledSubMask;
+        scaleShuffleMask<int>(Scale, SubMask, ScaledSubMask);
+        SubMask = ScaledSubMask;
+      } else {
+        int Scale = SubMask.size() / NumSubElts;
+        NumSubElts = SubMask.size();
+        NumElts *= Scale;
+        InsertIdx *= Scale;
+      }
+    }
     Ops.push_back(Src);
     for (SDValue &SubInput : SubInputs) {
-      if (SubInput.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
-          SubInput.getOperand(0).getValueType() != VT ||
-          !isa<ConstantSDNode>(SubInput.getOperand(1)))
-        return false;
-      Ops.push_back(SubInput.getOperand(0));
+      EVT SubSVT = SubInput.getValueType().getScalarType();
+      EVT AltVT = EVT::getVectorVT(*DAG.getContext(), SubSVT,
+                                   NumSizeInBits / SubSVT.getSizeInBits());
+      Ops.push_back(DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), AltVT,
+                                DAG.getUNDEF(AltVT), SubInput,
+                                DAG.getIntPtrConstant(0, SDLoc(N))));
     }
-    int InsertIdx = N.getConstantOperandVal(2);
     for (int i = 0; i != (int)NumElts; ++i)
       Mask.push_back(i);
     for (int i = 0; i != (int)NumSubElts; ++i) {
       int M = SubMask[i];
       if (0 <= M) {
         int InputIdx = M / NumSubElts;
-        int ExtractIdx = SubInputs[InputIdx].getConstantOperandVal(1);
-        M = (NumElts * (1 + InputIdx)) + ExtractIdx + (M % NumSubElts);
+        M = (NumElts * (1 + InputIdx)) + (M % NumSubElts);
       }
       Mask[i + InsertIdx] = M;
     }
@@ -6674,16 +6904,21 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
            N1.getValueType().getVectorNumElements() == (NumElts / 2) &&
            "Unexpected input value type");
 
+    APInt EltsLHS, EltsRHS;
+    getPackDemandedElts(VT, DemandedElts, EltsLHS, EltsRHS);
+
     // If we know input saturation won't happen we can treat this
     // as a truncation shuffle.
     if (Opcode == X86ISD::PACKSS) {
-      if ((!N0.isUndef() && DAG.ComputeNumSignBits(N0) <= NumBitsPerElt) ||
-          (!N1.isUndef() && DAG.ComputeNumSignBits(N1) <= NumBitsPerElt))
+      if ((!N0.isUndef() &&
+           DAG.ComputeNumSignBits(N0, EltsLHS) <= NumBitsPerElt) ||
+          (!N1.isUndef() &&
+           DAG.ComputeNumSignBits(N1, EltsRHS) <= NumBitsPerElt))
         return false;
     } else {
       APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt);
-      if ((!N0.isUndef() && !DAG.MaskedValueIsZero(N0, ZeroMask)) ||
-          (!N1.isUndef() && !DAG.MaskedValueIsZero(N1, ZeroMask)))
+      if ((!N0.isUndef() && !DAG.MaskedValueIsZero(N0, ZeroMask, EltsLHS)) ||
+          (!N1.isUndef() && !DAG.MaskedValueIsZero(N1, ZeroMask, EltsRHS)))
         return false;
     }
 
@@ -6728,15 +6963,54 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
     }
     return true;
   }
-  case ISD::ZERO_EXTEND_VECTOR_INREG:
-  case ISD::ZERO_EXTEND: {
-    // TODO - add support for VPMOVZX with smaller input vector types.
+  case X86ISD::VBROADCAST: {
     SDValue Src = N.getOperand(0);
     MVT SrcVT = Src.getSimpleValueType();
-    if (NumSizeInBits != SrcVT.getSizeInBits())
-      break;
-    DecodeZeroExtendMask(SrcVT.getScalarSizeInBits(), NumBitsPerElt, NumElts,
+    if (!SrcVT.isVector())
+      return false;
+
+    if (NumSizeInBits != SrcVT.getSizeInBits()) {
+      assert((NumSizeInBits % SrcVT.getSizeInBits()) == 0 &&
+             "Illegal broadcast type");
+      SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
+                               NumSizeInBits / SrcVT.getScalarSizeInBits());
+      Src = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), SrcVT,
+                        DAG.getUNDEF(SrcVT), Src,
+                        DAG.getIntPtrConstant(0, SDLoc(N)));
+    }
+
+    Ops.push_back(Src);
+    Mask.append(NumElts, 0);
+    return true;
+  }
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+  case ISD::ZERO_EXTEND_VECTOR_INREG:
+  case ISD::ANY_EXTEND_VECTOR_INREG: {
+    SDValue Src = N.getOperand(0);
+    EVT SrcVT = Src.getValueType();
+
+    // Extended source must be a simple vector.
+    if (!SrcVT.isSimple() || (SrcVT.getSizeInBits() % 128) != 0 ||
+        (SrcVT.getScalarSizeInBits() % 8) != 0)
+      return false;
+
+    unsigned NumSrcBitsPerElt = SrcVT.getScalarSizeInBits();
+    bool IsAnyExtend =
+        (ISD::ANY_EXTEND == Opcode || ISD::ANY_EXTEND_VECTOR_INREG == Opcode);
+    DecodeZeroExtendMask(NumSrcBitsPerElt, NumBitsPerElt, NumElts, IsAnyExtend,
                          Mask);
+
+    if (NumSizeInBits != SrcVT.getSizeInBits()) {
+      assert((NumSizeInBits % SrcVT.getSizeInBits()) == 0 &&
+             "Illegal zero-extension type");
+      SrcVT = MVT::getVectorVT(SrcVT.getSimpleVT().getScalarType(),
+                               NumSizeInBits / NumSrcBitsPerElt);
+      Src = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), SrcVT,
+                        DAG.getUNDEF(SrcVT), Src,
+                        DAG.getIntPtrConstant(0, SDLoc(N)));
+    }
+
     Ops.push_back(Src);
     return true;
   }
@@ -6745,7 +7019,7 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
   return false;
 }
 
-/// Removes unused shuffle source inputs and adjusts the shuffle mask accordingly.
+/// Removes unused/repeated shuffle source inputs and adjusts the shuffle mask.
 static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
                                               SmallVectorImpl<int> &Mask) {
   int MaskWidth = Mask.size();
@@ -6761,13 +7035,28 @@ static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
           M = SM_SentinelUndef;
 
     // Check for unused inputs.
-    if (any_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) {
-      UsedInputs.push_back(Inputs[i]);
+    if (none_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) {
+      for (int &M : Mask)
+        if (lo <= M)
+          M -= MaskWidth;
       continue;
     }
-    for (int &M : Mask)
-      if (lo <= M)
-        M -= MaskWidth;
+
+    // Check for repeated inputs.
+    bool IsRepeat = false;
+    for (int j = 0, ue = UsedInputs.size(); j != ue; ++j) {
+      if (UsedInputs[j] != Inputs[i])
+        continue;
+      for (int &M : Mask)
+        if (lo <= M)
+          M = (M < hi) ? ((M - lo) + (j * MaskWidth)) : (M - MaskWidth);
+      IsRepeat = true;
+      break;
+    }
+    if (IsRepeat)
+      continue;
+
+    UsedInputs.push_back(Inputs[i]);
   }
   Inputs = UsedInputs;
 }
@@ -6780,9 +7069,11 @@ static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
 static bool resolveTargetShuffleInputs(SDValue Op,
                                        SmallVectorImpl<SDValue> &Inputs,
                                        SmallVectorImpl<int> &Mask,
-                                       const SelectionDAG &DAG) {
+                                       SelectionDAG &DAG) {
+  unsigned NumElts = Op.getValueType().getVectorNumElements();
+  APInt DemandedElts = APInt::getAllOnesValue(NumElts);
   if (!setTargetShuffleZeroElements(Op, Mask, Inputs))
-    if (!getFauxShuffleMask(Op, Mask, Inputs, DAG))
+    if (!getFauxShuffleMask(Op, DemandedElts, Mask, Inputs, DAG))
       return false;
 
   resolveTargetShuffleInputsAndMask(Inputs, Mask);
@@ -6838,6 +7129,28 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
                                Depth+1);
   }
 
+  // Recurse into insert_subvector base/sub vector to find scalars.
+  if (Opcode == ISD::INSERT_SUBVECTOR &&
+      isa<ConstantSDNode>(N->getOperand(2))) {
+    SDValue Vec = N->getOperand(0);
+    SDValue Sub = N->getOperand(1);
+    EVT SubVT = Sub.getValueType();
+    unsigned NumSubElts = SubVT.getVectorNumElements();
+    uint64_t SubIdx = N->getConstantOperandVal(2);
+
+    if (SubIdx <= Index && Index < (SubIdx + NumSubElts))
+      return getShuffleScalarElt(Sub.getNode(), Index - SubIdx, DAG, Depth + 1);
+    return getShuffleScalarElt(Vec.getNode(), Index, DAG, Depth + 1);
+  }
+
+  // Recurse into extract_subvector src vector to find scalars.
+  if (Opcode == ISD::EXTRACT_SUBVECTOR &&
+      isa<ConstantSDNode>(N->getOperand(1))) {
+    SDValue Src = N->getOperand(0);
+    uint64_t SrcIdx = N->getConstantOperandVal(1);
+    return getShuffleScalarElt(Src.getNode(), Index + SrcIdx, DAG, Depth + 1);
+  }
+
   // Actual nodes that may contain scalar elements
   if (Opcode == ISD::BITCAST) {
     V = V.getOperand(0);
@@ -6880,7 +7193,7 @@ static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros,
 
     // If the build vector contains zeros or our first insertion is not the
     // first index then insert into zero vector to break any register
-    // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
+    // dependency else use SCALAR_TO_VECTOR.
     if (First) {
       First = false;
       if (NumZero || 0 != i)
@@ -6889,7 +7202,6 @@ static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros,
         assert(0 == i && "Expected insertion into zero-index");
         V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
         V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
-        V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
         V = DAG.getBitcast(VT, V);
         continue;
       }
@@ -6916,50 +7228,51 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
 
   SDLoc dl(Op);
   SDValue V;
-  bool First = true;
 
   // Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
-  for (unsigned i = 0; i < 16; ++i) {
+  for (unsigned i = 0; i < 16; i += 2) {
     bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
-    if (ThisIsNonZero && First) {
-      if (NumZero)
-        V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
+    bool NextIsNonZero = (NonZeros & (1 << (i + 1))) != 0;
+    if (!ThisIsNonZero && !NextIsNonZero)
+      continue;
+
+    // FIXME: Investigate combining the first 4 bytes as a i32 instead.
+    SDValue Elt;
+    if (ThisIsNonZero) {
+      if (NumZero || NextIsNonZero)
+        Elt = DAG.getZExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
       else
-        V = DAG.getUNDEF(MVT::v8i16);
-      First = false;
+        Elt = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
     }
 
-    if ((i & 1) != 0) {
-      // FIXME: Investigate extending to i32 instead of just i16.
-      // FIXME: Investigate combining the first 4 bytes as a i32 instead.
-      SDValue ThisElt, LastElt;
-      bool LastIsNonZero = (NonZeros & (1 << (i - 1))) != 0;
-      if (LastIsNonZero) {
-        LastElt =
-            DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i - 1));
-      }
-      if (ThisIsNonZero) {
-        ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
-        ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16, ThisElt,
-                              DAG.getConstant(8, dl, MVT::i8));
-        if (LastIsNonZero)
-          ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
-      } else
-        ThisElt = LastElt;
-
-      if (ThisElt) {
-        if (1 == i) {
-          V = NumZero ? DAG.getZExtOrTrunc(ThisElt, dl, MVT::i32)
-                      : DAG.getAnyExtOrTrunc(ThisElt, dl, MVT::i32);
-          V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
-          V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
-          V = DAG.getBitcast(MVT::v8i16, V);
-        } else {
-          V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
-                          DAG.getIntPtrConstant(i / 2, dl));
-        }
+    if (NextIsNonZero) {
+      SDValue NextElt = Op.getOperand(i + 1);
+      if (i == 0 && NumZero)
+        NextElt = DAG.getZExtOrTrunc(NextElt, dl, MVT::i32);
+      else
+        NextElt = DAG.getAnyExtOrTrunc(NextElt, dl, MVT::i32);
+      NextElt = DAG.getNode(ISD::SHL, dl, MVT::i32, NextElt,
+                            DAG.getConstant(8, dl, MVT::i8));
+      if (ThisIsNonZero)
+        Elt = DAG.getNode(ISD::OR, dl, MVT::i32, NextElt, Elt);
+      else
+        Elt = NextElt;
+    }
+
+    // If our first insertion is not the first index then insert into zero
+    // vector to break any register dependency else use SCALAR_TO_VECTOR.
+    if (!V) {
+      if (i != 0)
+        V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
+      else {
+        V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Elt);
+        V = DAG.getBitcast(MVT::v8i16, V);
+        continue;
       }
     }
+    Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Elt);
+    V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, Elt,
+                    DAG.getIntPtrConstant(i / 2, dl));
   }
 
   return DAG.getBitcast(MVT::v16i8, V);
@@ -7002,9 +7315,10 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
   }
 
   // Find all zeroable elements.
-  std::bitset<4> Zeroable;
-  for (int i=0; i < 4; ++i) {
-    SDValue Elt = Op->getOperand(i);
+  std::bitset<4> Zeroable, Undefs;
+  for (int i = 0; i < 4; ++i) {
+    SDValue Elt = Op.getOperand(i);
+    Undefs[i] = Elt.isUndef();
     Zeroable[i] = (Elt.isUndef() || X86::isZeroNode(Elt));
   }
   assert(Zeroable.size() - Zeroable.count() > 1 &&
@@ -7014,10 +7328,10 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
   // zeroable or extract_vector_elt with constant index.
   SDValue FirstNonZero;
   unsigned FirstNonZeroIdx;
-  for (unsigned i=0; i < 4; ++i) {
+  for (unsigned i = 0; i < 4; ++i) {
     if (Zeroable[i])
       continue;
-    SDValue Elt = Op->getOperand(i);
+    SDValue Elt = Op.getOperand(i);
     if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
         !isa<ConstantSDNode>(Elt.getOperand(1)))
       return SDValue();
@@ -7056,10 +7370,12 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
 
   if (EltIdx == 4) {
     // Let the shuffle legalizer deal with blend operations.
-    SDValue VZero = getZeroVector(VT, Subtarget, DAG, SDLoc(Op));
+    SDValue VZeroOrUndef = (Zeroable == Undefs)
+                               ? DAG.getUNDEF(VT)
+                               : getZeroVector(VT, Subtarget, DAG, SDLoc(Op));
     if (V1.getSimpleValueType() != VT)
       V1 = DAG.getBitcast(VT, V1);
-    return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZero, Mask);
+    return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZeroOrUndef, Mask);
   }
 
   // See if we can lower this build_vector to a INSERTPS.
@@ -7079,7 +7395,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
     SDValue SrcVector = Current->getOperand(0);
     if (!V1.getNode())
       V1 = SrcVector;
-    CanFold = (SrcVector == V1) && (Current.getConstantOperandVal(1) == i);
+    CanFold = (SrcVector == V1) && (Current.getConstantOperandAPInt(1) == i);
   }
 
   if (!CanFold)
@@ -7200,9 +7516,11 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
   unsigned NumElems = Elts.size();
 
   int LastLoadedElt = -1;
-  SmallBitVector LoadMask(NumElems, false);
-  SmallBitVector ZeroMask(NumElems, false);
-  SmallBitVector UndefMask(NumElems, false);
+  APInt LoadMask = APInt::getNullValue(NumElems);
+  APInt ZeroMask = APInt::getNullValue(NumElems);
+  APInt UndefMask = APInt::getNullValue(NumElems);
+
+  SmallVector<LoadSDNode*, 8> Loads(NumElems, nullptr);
 
   // For each element in the initializer, see if we've found a load, zero or an
   // undef.
@@ -7210,38 +7528,52 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
     SDValue Elt = peekThroughBitcasts(Elts[i]);
     if (!Elt.getNode())
       return SDValue();
+    if (Elt.isUndef()) {
+      UndefMask.setBit(i);
+      continue;
+    }
+    if (X86::isZeroNode(Elt) || ISD::isBuildVectorAllZeros(Elt.getNode())) {
+      ZeroMask.setBit(i);
+      continue;
+    }
 
-    if (Elt.isUndef())
-      UndefMask[i] = true;
-    else if (X86::isZeroNode(Elt) || ISD::isBuildVectorAllZeros(Elt.getNode()))
-      ZeroMask[i] = true;
-    else if (ISD::isNON_EXTLoad(Elt.getNode())) {
-      LoadMask[i] = true;
-      LastLoadedElt = i;
-      // Each loaded element must be the correct fractional portion of the
-      // requested vector load.
-      if ((NumElems * Elt.getValueSizeInBits()) != VT.getSizeInBits())
-        return SDValue();
-    } else
+    // Each loaded element must be the correct fractional portion of the
+    // requested vector load.
+    if ((NumElems * Elt.getValueSizeInBits()) != VT.getSizeInBits())
       return SDValue();
+
+    if (!ISD::isNON_EXTLoad(Elt.getNode()))
+      return SDValue();
+
+    Loads[i] = cast<LoadSDNode>(Elt);
+    LoadMask.setBit(i);
+    LastLoadedElt = i;
   }
-  assert((ZeroMask | UndefMask | LoadMask).count() == NumElems &&
+  assert((ZeroMask.countPopulation() + UndefMask.countPopulation() +
+          LoadMask.countPopulation()) == NumElems &&
          "Incomplete element masks");
 
   // Handle Special Cases - all undef or undef/zero.
-  if (UndefMask.count() == NumElems)
+  if (UndefMask.countPopulation() == NumElems)
     return DAG.getUNDEF(VT);
 
   // FIXME: Should we return this as a BUILD_VECTOR instead?
-  if ((ZeroMask | UndefMask).count() == NumElems)
+  if ((ZeroMask.countPopulation() + UndefMask.countPopulation()) == NumElems)
     return VT.isInteger() ? DAG.getConstant(0, DL, VT)
                           : DAG.getConstantFP(0.0, DL, VT);
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  int FirstLoadedElt = LoadMask.find_first();
+  int FirstLoadedElt = LoadMask.countTrailingZeros();
   SDValue EltBase = peekThroughBitcasts(Elts[FirstLoadedElt]);
-  LoadSDNode *LDBase = cast<LoadSDNode>(EltBase);
-  EVT LDBaseVT = EltBase.getValueType();
+  EVT EltBaseVT = EltBase.getValueType();
+  assert(EltBaseVT.getSizeInBits() == EltBaseVT.getStoreSizeInBits() &&
+         "Register/Memory size mismatch");
+  LoadSDNode *LDBase = Loads[FirstLoadedElt];
+  assert(LDBase && "Did not find base load for merging consecutive loads");
+  unsigned BaseSizeInBits = EltBaseVT.getStoreSizeInBits();
+  unsigned BaseSizeInBytes = BaseSizeInBits / 8;
+  int LoadSizeInBits = (1 + LastLoadedElt - FirstLoadedElt) * BaseSizeInBits;
+  assert((BaseSizeInBits % 8) == 0 && "Sub-byte element loads detected");
 
   // Consecutive loads can contain UNDEFS but not ZERO elements.
   // Consecutive loads with UNDEFs and ZEROs elements require a
@@ -7250,11 +7582,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
   bool IsConsecutiveLoadWithZeros = true;
   for (int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) {
     if (LoadMask[i]) {
-      SDValue Elt = peekThroughBitcasts(Elts[i]);
-      LoadSDNode *LD = cast<LoadSDNode>(Elt);
-      if (!DAG.areNonVolatileConsecutiveLoads(
-              LD, LDBase, Elt.getValueType().getStoreSizeInBits() / 8,
-              i - FirstLoadedElt)) {
+      if (!DAG.areNonVolatileConsecutiveLoads(Loads[i], LDBase, BaseSizeInBytes,
+                                              i - FirstLoadedElt)) {
         IsConsecutiveLoad = false;
         IsConsecutiveLoadWithZeros = false;
         break;
@@ -7264,11 +7593,6 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
     }
   }
 
-  SmallVector<LoadSDNode *, 8> Loads;
-  for (int i = FirstLoadedElt; i <= LastLoadedElt; ++i)
-    if (LoadMask[i])
-      Loads.push_back(cast<LoadSDNode>(peekThroughBitcasts(Elts[i])));
-
   auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) {
     auto MMOFlags = LDBase->getMemOperand()->getFlags();
     assert(!(MMOFlags & MachineMemOperand::MOVolatile) &&
@@ -7277,23 +7601,23 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
         DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
                     LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags);
     for (auto *LD : Loads)
-      DAG.makeEquivalentMemoryOrdering(LD, NewLd);
+      if (LD)
+        DAG.makeEquivalentMemoryOrdering(LD, NewLd);
     return NewLd;
   };
 
-  // LOAD - all consecutive load/undefs (must start/end with a load).
-  // If we have found an entire vector of loads and undefs, then return a large
-  // load of the entire vector width starting at the base pointer.
-  // If the vector contains zeros, then attempt to shuffle those elements.
-  if (FirstLoadedElt == 0 && LastLoadedElt == (int)(NumElems - 1) &&
+  // Check if the base load is entirely dereferenceable.
+  bool IsDereferenceable = LDBase->getPointerInfo().isDereferenceable(
+      VT.getSizeInBits() / 8, *DAG.getContext(), DAG.getDataLayout());
+
+  // LOAD - all consecutive load/undefs (must start/end with a load or be
+  // entirely dereferenceable). If we have found an entire vector of loads and
+  // undefs, then return a large load of the entire vector width starting at the
+  // base pointer. If the vector contains zeros, then attempt to shuffle those
+  // elements.
+  if (FirstLoadedElt == 0 &&
+      (LastLoadedElt == (int)(NumElems - 1) || IsDereferenceable) &&
       (IsConsecutiveLoad || IsConsecutiveLoadWithZeros)) {
-    assert(LDBase && "Did not find base load for merging consecutive loads");
-    EVT EltVT = LDBase->getValueType(0);
-    // Ensure that the input vector size for the merged loads matches the
-    // cumulative size of the input elements.
-    if (VT.getSizeInBits() != EltVT.getSizeInBits() * NumElems)
-      return SDValue();
-
     if (isAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT))
       return SDValue();
 
@@ -7303,12 +7627,15 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
         VT.is256BitVector() && !Subtarget.hasInt256())
       return SDValue();
 
-    if (IsConsecutiveLoad)
+    if (NumElems == 1)
+      return DAG.getBitcast(VT, Elts[FirstLoadedElt]);
+
+    if (!ZeroMask)
       return CreateLoad(VT, LDBase);
 
     // IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded
     // vector and a zero vector to clear out the zero elements.
-    if (!isAfterLegalize && NumElems == VT.getVectorNumElements()) {
+    if (!isAfterLegalize && VT.isVector()) {
       SmallVector<int, 4> ClearMask(NumElems, -1);
       for (unsigned i = 0; i < NumElems; ++i) {
         if (ZeroMask[i])
@@ -7323,16 +7650,28 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
     }
   }
 
-  int LoadSize =
-      (1 + LastLoadedElt - FirstLoadedElt) * LDBaseVT.getStoreSizeInBits();
+  // If the upper half of a ymm/zmm load is undef then just load the lower half.
+  if (VT.is256BitVector() || VT.is512BitVector()) {
+    unsigned HalfNumElems = NumElems / 2;
+    if (UndefMask.extractBits(HalfNumElems, HalfNumElems).isAllOnesValue()) {
+      EVT HalfVT =
+          EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), HalfNumElems);
+      SDValue HalfLD =
+          EltsFromConsecutiveLoads(HalfVT, Elts.drop_back(HalfNumElems), DL,
+                                   DAG, Subtarget, isAfterLegalize);
+      if (HalfLD)
+        return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
+                           HalfLD, DAG.getIntPtrConstant(0, DL));
+    }
+  }
 
   // VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs.
   if (IsConsecutiveLoad && FirstLoadedElt == 0 &&
-      (LoadSize == 32 || LoadSize == 64) &&
+      (LoadSizeInBits == 32 || LoadSizeInBits == 64) &&
       ((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))) {
-    MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSize)
-                                      : MVT::getIntegerVT(LoadSize);
-    MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSize);
+    MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSizeInBits)
+                                      : MVT::getIntegerVT(LoadSizeInBits);
+    MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSizeInBits);
     if (TLI.isTypeLegal(VecVT)) {
       SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
       SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
@@ -7342,14 +7681,85 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
                                   LDBase->getAlignment(),
                                   MachineMemOperand::MOLoad);
       for (auto *LD : Loads)
-        DAG.makeEquivalentMemoryOrdering(LD, ResNode);
+        if (LD)
+          DAG.makeEquivalentMemoryOrdering(LD, ResNode);
       return DAG.getBitcast(VT, ResNode);
     }
   }
 
+  // BROADCAST - match the smallest possible repetition pattern, load that
+  // scalar/subvector element and then broadcast to the entire vector.
+  if (ZeroMask.isNullValue() && isPowerOf2_32(NumElems) && Subtarget.hasAVX() &&
+      (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector())) {
+    for (unsigned SubElems = 1; SubElems < NumElems; SubElems *= 2) {
+      unsigned RepeatSize = SubElems * BaseSizeInBits;
+      unsigned ScalarSize = std::min(RepeatSize, 64u);
+      if (!Subtarget.hasAVX2() && ScalarSize < 32)
+        continue;
+
+      bool Match = true;
+      SmallVector<SDValue, 8> RepeatedLoads(SubElems, DAG.getUNDEF(EltBaseVT));
+      for (unsigned i = 0; i != NumElems && Match; ++i) {
+        if (!LoadMask[i])
+          continue;
+        SDValue Elt = peekThroughBitcasts(Elts[i]);
+        if (RepeatedLoads[i % SubElems].isUndef())
+          RepeatedLoads[i % SubElems] = Elt;
+        else
+          Match &= (RepeatedLoads[i % SubElems] == Elt);
+      }
+
+      // We must have loads at both ends of the repetition.
+      Match &= !RepeatedLoads.front().isUndef();
+      Match &= !RepeatedLoads.back().isUndef();
+      if (!Match)
+        continue;
+
+      EVT RepeatVT =
+          VT.isInteger() && (RepeatSize != 64 || TLI.isTypeLegal(MVT::i64))
+              ? EVT::getIntegerVT(*DAG.getContext(), ScalarSize)
+              : EVT::getFloatingPointVT(ScalarSize);
+      if (RepeatSize > ScalarSize)
+        RepeatVT = EVT::getVectorVT(*DAG.getContext(), RepeatVT,
+                                    RepeatSize / ScalarSize);
+      EVT BroadcastVT =
+          EVT::getVectorVT(*DAG.getContext(), RepeatVT.getScalarType(),
+                           VT.getSizeInBits() / ScalarSize);
+      if (TLI.isTypeLegal(BroadcastVT)) {
+        if (SDValue RepeatLoad = EltsFromConsecutiveLoads(
+                RepeatVT, RepeatedLoads, DL, DAG, Subtarget, isAfterLegalize)) {
+          unsigned Opcode = RepeatSize > ScalarSize ? X86ISD::SUBV_BROADCAST
+                                                    : X86ISD::VBROADCAST;
+          SDValue Broadcast = DAG.getNode(Opcode, DL, BroadcastVT, RepeatLoad);
+          return DAG.getBitcast(VT, Broadcast);
+        }
+      }
+    }
+  }
+
   return SDValue();
 }
 
+// Combine a vector ops (shuffles etc.) that is equal to build_vector load1,
+// load2, load3, load4, <0, 1, 2, 3> into a vector load if the load addresses
+// are consecutive, non-overlapping, and in the right order.
+static SDValue combineToConsecutiveLoads(EVT VT, SDNode *N, const SDLoc &DL,
+                                         SelectionDAG &DAG,
+                                         const X86Subtarget &Subtarget,
+                                         bool isAfterLegalize) {
+  SmallVector<SDValue, 64> Elts;
+  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+    if (SDValue Elt = getShuffleScalarElt(N, i, DAG, 0)) {
+      Elts.push_back(Elt);
+      continue;
+    }
+    return SDValue();
+  }
+  assert(Elts.size() == VT.getVectorNumElements());
+  return EltsFromConsecutiveLoads(VT, Elts, DL, DAG, Subtarget,
+                                  isAfterLegalize);
+}
+
 static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
                                    unsigned SplatBitSize, LLVMContext &C) {
   unsigned ScalarSize = VT.getScalarSizeInBits();
@@ -7373,12 +7783,20 @@ static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
   return ConstantVector::get(ArrayRef<Constant *>(ConstantVec));
 }
 
-static bool isUseOfShuffle(SDNode *N) {
+static bool isFoldableUseOfShuffle(SDNode *N) {
   for (auto *U : N->uses()) {
-    if (isTargetShuffle(U->getOpcode()))
+    unsigned Opc = U->getOpcode();
+    // VPERMV/VPERMV3 shuffles can never fold their index operands.
+    if (Opc == X86ISD::VPERMV && U->getOperand(0).getNode() == N)
+      return false;
+    if (Opc == X86ISD::VPERMV3 && U->getOperand(1).getNode() == N)
+      return false;
+    if (isTargetShuffle(Opc))
+      return true;
+    if (Opc == ISD::BITCAST) // Ignore bitcasts
+      return isFoldableUseOfShuffle(U);
+    if (N->hasOneUse())
       return true;
-    if (U->getOpcode() == ISD::BITCAST) // Ignore bitcasts
-      return isUseOfShuffle(U);
   }
   return false;
 }
@@ -7486,7 +7904,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
         SplatBitSize < VT.getSizeInBits()) {
       // Avoid replacing with broadcast when it's a use of a shuffle
       // instruction to preserve the present custom lowering of shuffles.
-      if (isUseOfShuffle(BVOp) || BVOp->hasOneUse())
+      if (isFoldableUseOfShuffle(BVOp))
         return SDValue();
       // replace BUILD_VECTOR with broadcast of the repeated constants.
       const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -7581,7 +7999,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
   // TODO: If multiple splats are generated to load the same constant,
   // it may be detrimental to overall size. There needs to be a way to detect
   // that condition to know if this is truly a size win.
-  bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
+  bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
 
   // Handle broadcasting a single constant scalar from the constant pool
   // into a vector.
@@ -8330,6 +8748,22 @@ static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,
   else if (V1.getValueSizeInBits() < Width)
     V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, SDLoc(BV), Width);
 
+  unsigned NumElts = VT.getVectorNumElements();
+  APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+  for (unsigned i = 0; i != NumElts; ++i)
+    if (BV->getOperand(i).isUndef())
+      DemandedElts.clearBit(i);
+
+  // If we don't need the upper xmm, then perform as a xmm hop.
+  unsigned HalfNumElts = NumElts / 2;
+  if (VT.is256BitVector() && DemandedElts.lshr(HalfNumElts) == 0) {
+    MVT HalfVT = MVT::getVectorVT(VT.getScalarType(), HalfNumElts);
+    V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), 128);
+    V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), 128);
+    SDValue Half = DAG.getNode(HOpcode, SDLoc(BV), HalfVT, V0, V1);
+    return insertSubVector(DAG.getUNDEF(VT), Half, 0, DAG, SDLoc(BV), 256);
+  }
+
   return DAG.getNode(HOpcode, SDLoc(BV), VT, V0, V1);
 }
 
@@ -8338,11 +8772,8 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
                                    const X86Subtarget &Subtarget,
                                    SelectionDAG &DAG) {
   // We need at least 2 non-undef elements to make this worthwhile by default.
-  unsigned NumNonUndefs = 0;
-  for (const SDValue &V : BV->op_values())
-    if (!V.isUndef())
-      ++NumNonUndefs;
-
+  unsigned NumNonUndefs =
+      count_if(BV->op_values(), [](SDValue V) { return !V.isUndef(); });
   if (NumNonUndefs < 2)
     return SDValue();
 
@@ -8350,23 +8781,15 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
   // int/FP at 128-bit/256-bit. Each type was introduced with a different
   // subtarget feature. Try to match those "native" patterns first.
   MVT VT = BV->getSimpleValueType(0);
-  unsigned HOpcode;
-  SDValue V0, V1;
-  if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3())
-    if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
-      return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
-
-  if ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.hasSSSE3())
-    if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
-      return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
-
-  if ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.hasAVX())
-    if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
-      return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
-
-  if ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasAVX2())
+  if (((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) ||
+      ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.hasSSSE3()) ||
+      ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.hasAVX()) ||
+      ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasAVX2())) {
+    unsigned HOpcode;
+    SDValue V0, V1;
     if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
       return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
+  }
 
   // Try harder to match 256-bit ops by using extract/concat.
   if (!Subtarget.hasAVX() || !VT.is256BitVector())
@@ -8481,9 +8904,15 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
       return SDValue();
 
   // TODO: We may be able to add support for other Ops (ADD/SUB + shifts).
+  bool IsShift = false;
   switch (Opcode) {
   default:
     return SDValue();
+  case ISD::SHL:
+  case ISD::SRL:
+  case ISD::SRA:
+    IsShift = true;
+    break;
   case ISD::AND:
   case ISD::XOR:
   case ISD::OR:
@@ -8504,10 +8933,24 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
     // We expect the canonicalized RHS operand to be the constant.
     if (!isa<ConstantSDNode>(RHS))
       return SDValue();
+
+    // Extend shift amounts.
+    if (RHS.getValueSizeInBits() != VT.getScalarSizeInBits()) {
+      if (!IsShift)
+        return SDValue();
+      RHS = DAG.getZExtOrTrunc(RHS, DL, VT.getScalarType());
+    }
+
     LHSElts.push_back(LHS);
     RHSElts.push_back(RHS);
   }
 
+  // Limit to shifts by uniform immediates.
+  // TODO: Only accept vXi8/vXi64 special cases?
+  // TODO: Permit non-uniform XOP/AVX2/MULLO cases?
+  if (IsShift && any_of(RHSElts, [&](SDValue V) { return RHSElts[0] != V; }))
+    return SDValue();
+
   SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts);
   SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts);
   return DAG.getNode(Opcode, DL, VT, LHS, RHS);
@@ -9288,60 +9731,9 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG,
   return Vec;
 }
 
-// Return true if all the operands of the given CONCAT_VECTORS node are zeros
-// except for the first one. (CONCAT_VECTORS Op, 0, 0,...,0)
-static bool isExpandWithZeros(const SDValue &Op) {
-  assert(Op.getOpcode() == ISD::CONCAT_VECTORS &&
-         "Expand with zeros only possible in CONCAT_VECTORS nodes!");
-
-  for (unsigned i = 1; i < Op.getNumOperands(); i++)
-    if (!ISD::isBuildVectorAllZeros(Op.getOperand(i).getNode()))
-      return false;
-
-  return true;
-}
-
 // Returns true if the given node is a type promotion (by concatenating i1
 // zeros) of the result of a node that already zeros all upper bits of
 // k-register.
-static SDValue isTypePromotionOfi1ZeroUpBits(SDValue Op) {
-  unsigned Opc = Op.getOpcode();
-
-  assert(Opc == ISD::CONCAT_VECTORS &&
-         Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
-         "Unexpected node to check for type promotion!");
-
-  // As long as we are concatenating zeros to the upper part of a previous node
-  // result, climb up the tree until a node with different opcode is
-  // encountered
-  while (Opc == ISD::INSERT_SUBVECTOR || Opc == ISD::CONCAT_VECTORS) {
-    if (Opc == ISD::INSERT_SUBVECTOR) {
-      if (ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()) &&
-          Op.getConstantOperandVal(2) == 0)
-        Op = Op.getOperand(1);
-      else
-        return SDValue();
-    } else { // Opc == ISD::CONCAT_VECTORS
-      if (isExpandWithZeros(Op))
-        Op = Op.getOperand(0);
-      else
-        return SDValue();
-    }
-    Opc = Op.getOpcode();
-  }
-
-  // Check if the first inserted node zeroes the upper bits, or an 'and' result
-  // of a node that zeros the upper bits (its masked version).
-  if (isMaskedZeroUpperBitsvXi1(Op.getOpcode()) ||
-      (Op.getOpcode() == ISD::AND &&
-       (isMaskedZeroUpperBitsvXi1(Op.getOperand(0).getOpcode()) ||
-        isMaskedZeroUpperBitsvXi1(Op.getOperand(1).getOpcode())))) {
-    return Op;
-  }
-
-  return SDValue();
-}
-
 // TODO: Merge this with LowerAVXCONCAT_VECTORS?
 static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
                                        const X86Subtarget &Subtarget,
@@ -9353,13 +9745,6 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
   assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&
          "Unexpected number of operands in CONCAT_VECTORS");
 
-  // If this node promotes - by concatenating zeroes - the type of the result
-  // of a node with instruction that zeroes all upper (irrelevant) bits of the
-  // output register, mark it as legal and catch the pattern in instruction
-  // selection to avoid emitting extra instructions (for zeroing upper bits).
-  if (SDValue Promoted = isTypePromotionOfi1ZeroUpBits(Op))
-    return widenSubVector(ResVT, Promoted, true, Subtarget, DAG, dl);
-
   unsigned NumZero = 0;
   unsigned NumNonZero = 0;
   uint64_t NonZeros = 0;
@@ -9618,6 +10003,8 @@ static bool isTargetShuffleEquivalent(ArrayRef<int> Mask,
   int Size = Mask.size();
   if (Size != (int)ExpectedMask.size())
     return false;
+  assert(isUndefOrZeroOrInRange(ExpectedMask, 0, 2 * Size) &&
+         "Illegal target shuffle mask");
 
   for (int i = 0; i < Size; ++i)
     if (Mask[i] == SM_SentinelUndef)
@@ -9687,6 +10074,40 @@ static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) {
   return IsUnpackwdMask;
 }
 
+static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask) {
+  // Create 128-bit vector type based on mask size.
+  MVT EltVT = MVT::getIntegerVT(128 / Mask.size());
+  MVT VT = MVT::getVectorVT(EltVT, Mask.size());
+
+  // We can't assume a canonical shuffle mask, so try the commuted version too.
+  SmallVector<int, 4> CommutedMask(Mask.begin(), Mask.end());
+  ShuffleVectorSDNode::commuteMask(CommutedMask);
+
+  // Match any of unary/binary or low/high.
+  for (unsigned i = 0; i != 4; ++i) {
+    SmallVector<int, 16> UnpackMask;
+    createUnpackShuffleMask(VT, UnpackMask, (i >> 1) % 2, i % 2);
+    if (isTargetShuffleEquivalent(Mask, UnpackMask) ||
+        isTargetShuffleEquivalent(CommutedMask, UnpackMask))
+      return true;
+  }
+  return false;
+}
+
+/// Return true if a shuffle mask chooses elements identically in its top and
+/// bottom halves. For example, any splat mask has the same top and bottom
+/// halves. If an element is undefined in only one half of the mask, the halves
+/// are not considered identical.
+static bool hasIdenticalHalvesShuffleMask(ArrayRef<int> Mask) {
+  assert(Mask.size() % 2 == 0 && "Expecting even number of elements in mask");
+  unsigned HalfSize = Mask.size() / 2;
+  for (unsigned i = 0; i != HalfSize; ++i) {
+    if (Mask[i] != Mask[i + HalfSize])
+      return false;
+  }
+  return true;
+}
+
 /// Get a 4-lane 8-bit shuffle immediate for a mask.
 ///
 /// This helper function produces an 8-bit shuffle immediate corresponding to
@@ -9826,12 +10247,11 @@ static bool isNonZeroElementsInOrder(const APInt &Zeroable,
 }
 
 /// Try to lower a shuffle with a single PSHUFB of V1 or V2.
-static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
-                                            ArrayRef<int> Mask, SDValue V1,
-                                            SDValue V2,
-                                            const APInt &Zeroable,
-                                            const X86Subtarget &Subtarget,
-                                            SelectionDAG &DAG) {
+static SDValue lowerShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
+                                      ArrayRef<int> Mask, SDValue V1,
+                                      SDValue V2, const APInt &Zeroable,
+                                      const X86Subtarget &Subtarget,
+                                      SelectionDAG &DAG) {
   int Size = Mask.size();
   int LaneSize = 128 / VT.getScalarSizeInBits();
   const int NumBytes = VT.getSizeInBits() / 8;
@@ -9885,11 +10305,11 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
                            const SDLoc &dl);
 
 // X86 has dedicated shuffle that can be lowered to VEXPAND
-static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT,
-                                          const APInt &Zeroable,
-                                          ArrayRef<int> Mask, SDValue &V1,
-                                          SDValue &V2, SelectionDAG &DAG,
-                                          const X86Subtarget &Subtarget) {
+static SDValue lowerShuffleToEXPAND(const SDLoc &DL, MVT VT,
+                                    const APInt &Zeroable,
+                                    ArrayRef<int> Mask, SDValue &V1,
+                                    SDValue &V2, SelectionDAG &DAG,
+                                    const X86Subtarget &Subtarget) {
   bool IsLeftZeroSide = true;
   if (!isNonZeroElementsInOrder(Zeroable, Mask, V1.getValueType(),
                                 IsLeftZeroSide))
@@ -9905,9 +10325,7 @@ static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT,
                               Subtarget, DAG, DL);
   SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, DL);
   SDValue ExpandedVector = IsLeftZeroSide ? V2 : V1;
-  return DAG.getSelect(DL, VT, VMask,
-                       DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector),
-                       ZeroVector);
+  return DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector, ZeroVector, VMask);
 }
 
 static bool matchVectorShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
@@ -9997,9 +10415,9 @@ static bool matchVectorShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
 
 // X86 has dedicated unpack instructions that can handle specific blend
 // operations: UNPCKH and UNPCKL.
-static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT,
-                                           ArrayRef<int> Mask, SDValue V1,
-                                           SDValue V2, SelectionDAG &DAG) {
+static SDValue lowerShuffleWithUNPCK(const SDLoc &DL, MVT VT,
+                                     ArrayRef<int> Mask, SDValue V1, SDValue V2,
+                                     SelectionDAG &DAG) {
   SmallVector<int, 8> Unpckl;
   createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, /* Unary = */ false);
   if (isShuffleEquivalent(V1, V2, Mask, Unpckl))
@@ -10061,10 +10479,10 @@ static bool matchVectorShuffleAsVPMOV(ArrayRef<int> Mask, bool SwappedOps,
 //
 // But when avx512vl is available, one can just use a single vpmovdw
 // instruction.
-static SDValue lowerVectorShuffleWithVPMOV(const SDLoc &DL, ArrayRef<int> Mask,
-                                           MVT VT, SDValue V1, SDValue V2,
-                                           SelectionDAG &DAG,
-                                           const X86Subtarget &Subtarget) {
+static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, ArrayRef<int> Mask,
+                                     MVT VT, SDValue V1, SDValue V2,
+                                     SelectionDAG &DAG,
+                                     const X86Subtarget &Subtarget) {
   if (VT != MVT::v16i8 && VT != MVT::v8i16)
     return SDValue();
 
@@ -10169,10 +10587,9 @@ static bool matchVectorShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1,
   return false;
 }
 
-static SDValue lowerVectorShuffleWithPACK(const SDLoc &DL, MVT VT,
-                                          ArrayRef<int> Mask, SDValue V1,
-                                          SDValue V2, SelectionDAG &DAG,
-                                          const X86Subtarget &Subtarget) {
+static SDValue lowerShuffleWithPACK(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
+                                    SDValue V1, SDValue V2, SelectionDAG &DAG,
+                                    const X86Subtarget &Subtarget) {
   MVT PackVT;
   unsigned PackOpcode;
   if (matchVectorShuffleWithPACK(VT, PackVT, V1, V2, PackOpcode, Mask, DAG,
@@ -10187,14 +10604,32 @@ static SDValue lowerVectorShuffleWithPACK(const SDLoc &DL, MVT VT,
 ///
 /// This handles cases where we can model a blend exactly as a bitmask due to
 /// one of the inputs being zeroable.
-static SDValue lowerVectorShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
-                                           SDValue V2, ArrayRef<int> Mask,
-                                           const APInt &Zeroable,
-                                           SelectionDAG &DAG) {
-  assert(!VT.isFloatingPoint() && "Floating point types are not supported");
+static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
+                                     SDValue V2, ArrayRef<int> Mask,
+                                     const APInt &Zeroable,
+                                     const X86Subtarget &Subtarget,
+                                     SelectionDAG &DAG) {
+  MVT MaskVT = VT;
   MVT EltVT = VT.getVectorElementType();
-  SDValue Zero = DAG.getConstant(0, DL, EltVT);
-  SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT);
+  SDValue Zero, AllOnes;
+  // Use f64 if i64 isn't legal.
+  if (EltVT == MVT::i64 && !Subtarget.is64Bit()) {
+    EltVT = MVT::f64;
+    MaskVT = MVT::getVectorVT(EltVT, Mask.size());
+  }
+
+  MVT LogicVT = VT;
+  if (EltVT == MVT::f32 || EltVT == MVT::f64) {
+    Zero = DAG.getConstantFP(0.0, DL, EltVT);
+    AllOnes = DAG.getConstantFP(
+        APFloat::getAllOnesValue(EltVT.getSizeInBits(), true), DL, EltVT);
+    LogicVT =
+        MVT::getVectorVT(EltVT == MVT::f64 ? MVT::i64 : MVT::i32, Mask.size());
+  } else {
+    Zero = DAG.getConstant(0, DL, EltVT);
+    AllOnes = DAG.getAllOnesConstant(DL, EltVT);
+  }
+
   SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
   SDValue V;
   for (int i = 0, Size = Mask.size(); i < Size; ++i) {
@@ -10212,8 +10647,11 @@ static SDValue lowerVectorShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
   if (!V)
     return SDValue(); // No non-zeroable elements!
 
-  SDValue VMask = DAG.getBuildVector(VT, DL, VMaskOps);
-  return DAG.getNode(ISD::AND, DL, VT, V, VMask);
+  SDValue VMask = DAG.getBuildVector(MaskVT, DL, VMaskOps);
+  VMask = DAG.getBitcast(LogicVT, VMask);
+  V = DAG.getBitcast(LogicVT, V);
+  SDValue And = DAG.getNode(ISD::AND, DL, LogicVT, V, VMask);
+  return DAG.getBitcast(VT, And);
 }
 
 /// Try to emit a blend instruction for a shuffle using bit math.
@@ -10221,9 +10659,9 @@ static SDValue lowerVectorShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
 /// This is used as a fallback approach when first class blend instructions are
 /// unavailable. Currently it is only suitable for integer vectors, but could
 /// be generalized for floating point vectors if desirable.
-static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
-                                            SDValue V2, ArrayRef<int> Mask,
-                                            SelectionDAG &DAG) {
+static SDValue lowerShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
+                                      SDValue V2, ArrayRef<int> Mask,
+                                      SelectionDAG &DAG) {
   assert(VT.isInteger() && "Only supports integer vector types!");
   MVT EltVT = VT.getVectorElementType();
   SDValue Zero = DAG.getConstant(0, DL, EltVT);
@@ -10305,11 +10743,11 @@ static uint64_t scaleVectorShuffleBlendMask(uint64_t BlendMask, int Size,
 /// these values. It relies on the availability of the X86ISD::BLENDI pattern to
 /// be matched in the backend with the type given. What it does check for is
 /// that the shuffle mask is a blend, or convertible into a blend with zero.
-static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
-                                         SDValue V2, ArrayRef<int> Original,
-                                         const APInt &Zeroable,
-                                         const X86Subtarget &Subtarget,
-                                         SelectionDAG &DAG) {
+static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
+                                   SDValue V2, ArrayRef<int> Original,
+                                   const APInt &Zeroable,
+                                   const X86Subtarget &Subtarget,
+                                   SelectionDAG &DAG) {
   SmallVector<int, 64> Mask = createTargetShuffleMask(Original, Zeroable);
 
   uint64_t BlendMask = 0;
@@ -10325,45 +10763,24 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
     V2 = getZeroVector(VT, Subtarget, DAG, DL);
 
   switch (VT.SimpleTy) {
-  case MVT::v2f64:
-  case MVT::v4f32:
-  case MVT::v4f64:
-  case MVT::v8f32:
-    return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
-                       DAG.getConstant(BlendMask, DL, MVT::i8));
   case MVT::v4i64:
   case MVT::v8i32:
     assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
     LLVM_FALLTHROUGH;
+  case MVT::v4f64:
+  case MVT::v8f32:
+    assert(Subtarget.hasAVX() && "256-bit float blends require AVX!");
+    LLVM_FALLTHROUGH;
+  case MVT::v2f64:
   case MVT::v2i64:
+  case MVT::v4f32:
   case MVT::v4i32:
-    // If we have AVX2 it is faster to use VPBLENDD when the shuffle fits into
-    // that instruction.
-    if (Subtarget.hasAVX2()) {
-      // Scale the blend by the number of 32-bit dwords per element.
-      int Scale =  VT.getScalarSizeInBits() / 32;
-      BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale);
-      MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32;
-      V1 = DAG.getBitcast(BlendVT, V1);
-      V2 = DAG.getBitcast(BlendVT, V2);
-      return DAG.getBitcast(
-          VT, DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2,
-                          DAG.getConstant(BlendMask, DL, MVT::i8)));
-    }
-    LLVM_FALLTHROUGH;
-  case MVT::v8i16: {
-    // For integer shuffles we need to expand the mask and cast the inputs to
-    // v8i16s prior to blending.
-    int Scale = 8 / VT.getVectorNumElements();
-    BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale);
-    V1 = DAG.getBitcast(MVT::v8i16, V1);
-    V2 = DAG.getBitcast(MVT::v8i16, V2);
-    return DAG.getBitcast(VT,
-                          DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2,
-                                      DAG.getConstant(BlendMask, DL, MVT::i8)));
-  }
+  case MVT::v8i16:
+    assert(Subtarget.hasSSE41() && "128-bit blends require SSE41!");
+    return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
+                       DAG.getConstant(BlendMask, DL, MVT::i8));
   case MVT::v16i16: {
-    assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
+    assert(Subtarget.hasAVX2() && "v16i16 blends require AVX2!");
     SmallVector<int, 8> RepeatedMask;
     if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
       // We can lower these with PBLENDW which is mirrored across 128-bit lanes.
@@ -10391,14 +10808,15 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
     }
     LLVM_FALLTHROUGH;
   }
-  case MVT::v16i8:
-  case MVT::v32i8: {
-    assert((VT.is128BitVector() || Subtarget.hasAVX2()) &&
-           "256-bit byte-blends require AVX2 support!");
+  case MVT::v32i8:
+    assert(Subtarget.hasAVX2() && "256-bit byte-blends require AVX2!");
+    LLVM_FALLTHROUGH;
+  case MVT::v16i8: {
+    assert(Subtarget.hasSSE41() && "128-bit byte-blends require SSE41!");
 
     // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
-    if (SDValue Masked =
-            lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG))
+    if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
+                                               Subtarget, DAG))
       return Masked;
 
     if (Subtarget.hasBWI() && Subtarget.hasVLX()) {
@@ -10456,6 +10874,16 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
   case MVT::v16i32:
   case MVT::v32i16:
   case MVT::v64i8: {
+    // Attempt to lower to a bitmask if we can. Only if not optimizing for size.
+    bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
+    if (!OptForSize) {
+      if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
+                                                 Subtarget, DAG))
+        return Masked;
+    }
+
+    // Otherwise load an immediate into a GPR, cast to k-register, and use a
+    // masked move.
     MVT IntegerType =
         MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
     SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
@@ -10471,11 +10899,11 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
 ///
 /// This matches the pattern where we can blend elements from two inputs and
 /// then reduce the shuffle to a single-input permutation.
-static SDValue lowerVectorShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
-                                                   SDValue V1, SDValue V2,
-                                                   ArrayRef<int> Mask,
-                                                   SelectionDAG &DAG,
-                                                   bool ImmBlends = false) {
+static SDValue lowerShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
+                                             SDValue V1, SDValue V2,
+                                             ArrayRef<int> Mask,
+                                             SelectionDAG &DAG,
+                                             bool ImmBlends = false) {
   // We build up the blend mask while checking whether a blend is a viable way
   // to reduce the shuffle.
   SmallVector<int, 32> BlendMask(Mask.size(), -1);
@@ -10510,10 +10938,10 @@ static SDValue lowerVectorShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
 ///
 /// This matches the pattern where we can unpack elements from two inputs and
 /// then reduce the shuffle to a single-input (wider) permutation.
-static SDValue lowerVectorShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT,
-                                                   SDValue V1, SDValue V2,
-                                                   ArrayRef<int> Mask,
-                                                   SelectionDAG &DAG) {
+static SDValue lowerShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT,
+                                             SDValue V1, SDValue V2,
+                                             ArrayRef<int> Mask,
+                                             SelectionDAG &DAG) {
   int NumElts = Mask.size();
   int NumLanes = VT.getSizeInBits() / 128;
   int NumLaneElts = NumElts / NumLanes;
@@ -10573,7 +11001,7 @@ static SDValue lowerVectorShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT,
 
 /// Helper to form a PALIGNR-based rotate+permute, merging 2 inputs and then
 /// permuting the elements of the result in place.
-static SDValue lowerVectorShuffleAsByteRotateAndPermute(
+static SDValue lowerShuffleAsByteRotateAndPermute(
     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
     const X86Subtarget &Subtarget, SelectionDAG &DAG) {
   if ((VT.is128BitVector() && !Subtarget.hasSSSE3()) ||
@@ -10664,7 +11092,7 @@ static SDValue lowerVectorShuffleAsByteRotateAndPermute(
 /// shuffle+blend operations on newer X86 ISAs where we have very fast blend
 /// operations. It will try to pick the best arrangement of shuffles and
 /// blends.
-static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(
+static SDValue lowerShuffleAsDecomposedShuffleBlend(
     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
     const X86Subtarget &Subtarget, SelectionDAG &DAG) {
   // Shuffle the input elements into the desired positions in V1 and V2 and
@@ -10688,18 +11116,18 @@ static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(
   // pre-shuffle first is a better strategy.
   if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask)) {
     // Only prefer immediate blends to unpack/rotate.
-    if (SDValue BlendPerm = lowerVectorShuffleAsBlendAndPermute(
-            DL, VT, V1, V2, Mask, DAG, true))
+    if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask,
+                                                          DAG, true))
       return BlendPerm;
-    if (SDValue UnpackPerm =
-            lowerVectorShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask, DAG))
+    if (SDValue UnpackPerm = lowerShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask,
+                                                           DAG))
       return UnpackPerm;
-    if (SDValue RotatePerm = lowerVectorShuffleAsByteRotateAndPermute(
+    if (SDValue RotatePerm = lowerShuffleAsByteRotateAndPermute(
             DL, VT, V1, V2, Mask, Subtarget, DAG))
       return RotatePerm;
     // Unpack/rotate failed - try again with variable blends.
-    if (SDValue BlendPerm =
-            lowerVectorShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, DAG))
+    if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask,
+                                                          DAG))
       return BlendPerm;
   }
 
@@ -10711,8 +11139,7 @@ static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(
 /// Try to lower a vector shuffle as a rotation.
 ///
 /// This is used for support PALIGNR for SSSE3 or VALIGND/Q for AVX512.
-static int matchVectorShuffleAsRotate(SDValue &V1, SDValue &V2,
-                                      ArrayRef<int> Mask) {
+static int matchShuffleAsRotate(SDValue &V1, SDValue &V2, ArrayRef<int> Mask) {
   int NumElts = Mask.size();
 
   // We need to detect various ways of spelling a rotation:
@@ -10796,8 +11223,8 @@ static int matchVectorShuffleAsRotate(SDValue &V1, SDValue &V2,
 /// elements, and takes the low elements as the result. Note that while this is
 /// specified as a *right shift* because x86 is little-endian, it is a *left
 /// rotate* of the vector lanes.
-static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
-                                          ArrayRef<int> Mask) {
+static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
+                                    ArrayRef<int> Mask) {
   // Don't accept any shuffles with zero elements.
   if (any_of(Mask, [](int M) { return M == SM_SentinelZero; }))
     return -1;
@@ -10807,7 +11234,7 @@ static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
   if (!is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask))
     return -1;
 
-  int Rotation = matchVectorShuffleAsRotate(V1, V2, RepeatedMask);
+  int Rotation = matchShuffleAsRotate(V1, V2, RepeatedMask);
   if (Rotation <= 0)
     return -1;
 
@@ -10818,15 +11245,14 @@ static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
   return Rotation * Scale;
 }
 
-static SDValue lowerVectorShuffleAsByteRotate(const SDLoc &DL, MVT VT,
-                                              SDValue V1, SDValue V2,
-                                              ArrayRef<int> Mask,
-                                              const X86Subtarget &Subtarget,
-                                              SelectionDAG &DAG) {
+static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1,
+                                        SDValue V2, ArrayRef<int> Mask,
+                                        const X86Subtarget &Subtarget,
+                                        SelectionDAG &DAG) {
   assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
 
   SDValue Lo = V1, Hi = V2;
-  int ByteRotation = matchVectorShuffleAsByteRotate(VT, Lo, Hi, Mask);
+  int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
   if (ByteRotation <= 0)
     return SDValue();
 
@@ -10874,11 +11300,10 @@ static SDValue lowerVectorShuffleAsByteRotate(const SDLoc &DL, MVT VT,
 /// elements, and takes the low elements as the result. Note that while this is
 /// specified as a *right shift* because x86 is little-endian, it is a *left
 /// rotate* of the vector lanes.
-static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT,
-                                          SDValue V1, SDValue V2,
-                                          ArrayRef<int> Mask,
-                                          const X86Subtarget &Subtarget,
-                                          SelectionDAG &DAG) {
+static SDValue lowerShuffleAsRotate(const SDLoc &DL, MVT VT, SDValue V1,
+                                    SDValue V2, ArrayRef<int> Mask,
+                                    const X86Subtarget &Subtarget,
+                                    SelectionDAG &DAG) {
   assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) &&
          "Only 32-bit and 64-bit elements are supported!");
 
@@ -10887,7 +11312,7 @@ static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT,
          && "VLX required for 128/256-bit vectors");
 
   SDValue Lo = V1, Hi = V2;
-  int Rotation = matchVectorShuffleAsRotate(Lo, Hi, Mask);
+  int Rotation = matchShuffleAsRotate(Lo, Hi, Mask);
   if (Rotation <= 0)
     return SDValue();
 
@@ -10895,6 +11320,69 @@ static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT,
                      DAG.getConstant(Rotation, DL, MVT::i8));
 }
 
+/// Try to lower a vector shuffle as a byte shift sequence.
+static SDValue lowerVectorShuffleAsByteShiftMask(
+    const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+    const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) {
+  assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
+  assert(VT.is128BitVector() && "Only 128-bit vectors supported");
+
+  // We need a shuffle that has zeros at one/both ends and a sequential
+  // shuffle from one source within.
+  unsigned ZeroLo = Zeroable.countTrailingOnes();
+  unsigned ZeroHi = Zeroable.countLeadingOnes();
+  if (!ZeroLo && !ZeroHi)
+    return SDValue();
+
+  unsigned NumElts = Mask.size();
+  unsigned Len = NumElts - (ZeroLo + ZeroHi);
+  if (!isSequentialOrUndefInRange(Mask, ZeroLo, Len, Mask[ZeroLo]))
+    return SDValue();
+
+  unsigned Scale = VT.getScalarSizeInBits() / 8;
+  ArrayRef<int> StubMask = Mask.slice(ZeroLo, Len);
+  if (!isUndefOrInRange(StubMask, 0, NumElts) &&
+      !isUndefOrInRange(StubMask, NumElts, 2 * NumElts))
+    return SDValue();
+
+  SDValue Res = Mask[ZeroLo] < (int)NumElts ? V1 : V2;
+  Res = DAG.getBitcast(MVT::v16i8, Res);
+
+  // Use VSHLDQ/VSRLDQ ops to zero the ends of a vector and leave an
+  // inner sequential set of elements, possibly offset:
+  // 01234567 --> zzzzzz01 --> 1zzzzzzz
+  // 01234567 --> 4567zzzz --> zzzzz456
+  // 01234567 --> z0123456 --> 3456zzzz --> zz3456zz
+  if (ZeroLo == 0) {
+    unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);
+    Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
+                      DAG.getConstant(Scale * Shift, DL, MVT::i8));
+    Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
+                      DAG.getConstant(Scale * ZeroHi, DL, MVT::i8));
+  } else if (ZeroHi == 0) {
+    unsigned Shift = Mask[ZeroLo] % NumElts;
+    Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
+                      DAG.getConstant(Scale * Shift, DL, MVT::i8));
+    Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
+                      DAG.getConstant(Scale * ZeroLo, DL, MVT::i8));
+  } else if (!Subtarget.hasSSSE3()) {
+    // If we don't have PSHUFB then its worth avoiding an AND constant mask
+    // by performing 3 byte shifts. Shuffle combining can kick in above that.
+    // TODO: There may be some cases where VSH{LR}DQ+PAND is still better.
+    unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);
+    Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
+                      DAG.getConstant(Scale * Shift, DL, MVT::i8));
+    Shift += Mask[ZeroLo] % NumElts;
+    Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
+                      DAG.getConstant(Scale * Shift, DL, MVT::i8));
+    Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
+                      DAG.getConstant(Scale * ZeroLo, DL, MVT::i8));
+  } else
+    return SDValue();
+
+  return DAG.getBitcast(VT, Res);
+}
+
 /// Try to lower a vector shuffle as a bit shift (shifts in zeros).
 ///
 /// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and
@@ -10918,11 +11406,10 @@ static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT,
 /// [  5, 6,  7, zz, zz, zz, zz, zz]
 /// [ -1, 5,  6,  7, zz, zz, zz, zz]
 /// [  1, 2, -1, -1, -1, -1, zz, zz]
-static int matchVectorShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
-                                     unsigned ScalarSizeInBits,
-                                     ArrayRef<int> Mask, int MaskOffset,
-                                     const APInt &Zeroable,
-                                     const X86Subtarget &Subtarget) {
+static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
+                               unsigned ScalarSizeInBits, ArrayRef<int> Mask,
+                               int MaskOffset, const APInt &Zeroable,
+                               const X86Subtarget &Subtarget) {
   int Size = Mask.size();
   unsigned SizeInBits = Size * ScalarSizeInBits;
 
@@ -10981,11 +11468,11 @@ static int matchVectorShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
   return -1;
 }
 
-static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
-                                         SDValue V2, ArrayRef<int> Mask,
-                                         const APInt &Zeroable,
-                                         const X86Subtarget &Subtarget,
-                                         SelectionDAG &DAG) {
+static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
+                                   SDValue V2, ArrayRef<int> Mask,
+                                   const APInt &Zeroable,
+                                   const X86Subtarget &Subtarget,
+                                   SelectionDAG &DAG) {
   int Size = Mask.size();
   assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
 
@@ -10994,14 +11481,13 @@ static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
   unsigned Opcode;
 
   // Try to match shuffle against V1 shift.
-  int ShiftAmt = matchVectorShuffleAsShift(
-      ShiftVT, Opcode, VT.getScalarSizeInBits(), Mask, 0, Zeroable, Subtarget);
+  int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
+                                     Mask, 0, Zeroable, Subtarget);
 
   // If V1 failed, try to match shuffle against V2 shift.
   if (ShiftAmt < 0) {
-    ShiftAmt =
-        matchVectorShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
-                                  Mask, Size, Zeroable, Subtarget);
+    ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
+                                   Mask, Size, Zeroable, Subtarget);
     V = V2;
   }
 
@@ -11018,16 +11504,16 @@ static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
 
 // EXTRQ: Extract Len elements from lower half of source, starting at Idx.
 // Remainder of lower half result is zero and upper half is all undef.
-static bool matchVectorShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2,
-                                      ArrayRef<int> Mask, uint64_t &BitLen,
-                                      uint64_t &BitIdx, const APInt &Zeroable) {
+static bool matchShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2,
+                                ArrayRef<int> Mask, uint64_t &BitLen,
+                                uint64_t &BitIdx, const APInt &Zeroable) {
   int Size = Mask.size();
   int HalfSize = Size / 2;
   assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
   assert(!Zeroable.isAllOnesValue() && "Fully zeroable shuffle mask");
 
   // Upper half must be undefined.
-  if (!isUndefInRange(Mask, HalfSize, HalfSize))
+  if (!isUndefUpperHalf(Mask))
     return false;
 
   // Determine the extraction length from the part of the
@@ -11074,15 +11560,15 @@ static bool matchVectorShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2,
 // INSERTQ: Extract lowest Len elements from lower half of second source and
 // insert over first source, starting at Idx.
 // { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... }
-static bool matchVectorShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2,
-                                        ArrayRef<int> Mask, uint64_t &BitLen,
-                                        uint64_t &BitIdx) {
+static bool matchShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2,
+                                  ArrayRef<int> Mask, uint64_t &BitLen,
+                                  uint64_t &BitIdx) {
   int Size = Mask.size();
   int HalfSize = Size / 2;
   assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
 
   // Upper half must be undefined.
-  if (!isUndefInRange(Mask, HalfSize, HalfSize))
+  if (!isUndefUpperHalf(Mask))
     return false;
 
   for (int Idx = 0; Idx != HalfSize; ++Idx) {
@@ -11140,17 +11626,16 @@ static bool matchVectorShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2,
 }
 
 /// Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
-static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
-                                           SDValue V2, ArrayRef<int> Mask,
-                                           const APInt &Zeroable,
-                                           SelectionDAG &DAG) {
+static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
+                                     SDValue V2, ArrayRef<int> Mask,
+                                     const APInt &Zeroable, SelectionDAG &DAG) {
   uint64_t BitLen, BitIdx;
-  if (matchVectorShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable))
+  if (matchShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable))
     return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1,
                        DAG.getConstant(BitLen, DL, MVT::i8),
                        DAG.getConstant(BitIdx, DL, MVT::i8));
 
-  if (matchVectorShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx))
+  if (matchShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx))
     return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT),
                        V2 ? V2 : DAG.getUNDEF(VT),
                        DAG.getConstant(BitLen, DL, MVT::i8),
@@ -11168,7 +11653,7 @@ static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
 /// avoid excess shuffling the offset must either being in the bottom lane
 /// or at the start of a higher lane. All extended elements must be from
 /// the same lane.
-static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
+static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
     const SDLoc &DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV,
     ArrayRef<int> Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) {
   assert(Scale > 1 && "Need a scale to extend.");
@@ -11203,6 +11688,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
 
   // Found a valid zext mask! Try various lowering strategies based on the
   // input type and available ISA extensions.
+  // TODO: Add AnyExt support.
   if (Subtarget.hasSSE41()) {
     // Not worth offsetting 128-bit vectors if scale == 2, a pattern using
     // PUNPCK will catch this in a later shuffle match.
@@ -11211,7 +11697,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
     MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
                                  NumElements / Scale);
     InputV = ShuffleOffset(InputV);
-    InputV = getExtendInVec(/*Signed*/false, DL, ExtVT, InputV, DAG);
+    InputV = getExtendInVec(ISD::ZERO_EXTEND, DL, ExtVT, InputV, DAG);
     return DAG.getBitcast(VT, InputV);
   }
 
@@ -11234,7 +11720,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
                          DAG.getBitcast(MVT::v4i32, InputV),
                          getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG));
     int PSHUFWMask[4] = {1, -1, -1, -1};
-    unsigned OddEvenOp = (Offset & 1 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW);
+    unsigned OddEvenOp = (Offset & 1) ? X86ISD::PSHUFLW : X86ISD::PSHUFHW;
     return DAG.getBitcast(
         VT, DAG.getNode(OddEvenOp, DL, MVT::v8i16,
                         DAG.getBitcast(MVT::v8i16, InputV),
@@ -11253,8 +11739,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
                                 DAG.getConstant(EltBits, DL, MVT::i8),
                                 DAG.getConstant(LoIdx, DL, MVT::i8)));
 
-    if (isUndefInRange(Mask, NumElements / 2, NumElements / 2) ||
-        !SafeOffset(Offset + 1))
+    if (isUndefUpperHalf(Mask) || !SafeOffset(Offset + 1))
       return DAG.getBitcast(VT, Lo);
 
     int HiIdx = (Offset + 1) * EltBits;
@@ -11326,7 +11811,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
 ///
 /// The reason we have dedicated lowering for zext-style shuffles is that they
 /// are both incredibly common and often quite performance sensitive.
-static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
+static SDValue lowerShuffleAsZeroOrAnyExtend(
     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
     const APInt &Zeroable, const X86Subtarget &Subtarget,
     SelectionDAG &DAG) {
@@ -11397,8 +11882,8 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
     if (Offset != 0 && Matches < 2)
       return SDValue();
 
-    return lowerVectorShuffleAsSpecificZeroOrAnyExtend(
-        DL, VT, Scale, Offset, AnyExt, InputV, Mask, Subtarget, DAG);
+    return lowerShuffleAsSpecificZeroOrAnyExtend(DL, VT, Scale, Offset, AnyExt,
+                                                 InputV, Mask, Subtarget, DAG);
   };
 
   // The widest scale possible for extending is to a 64-bit integer.
@@ -11482,7 +11967,7 @@ static bool isShuffleFoldableLoad(SDValue V) {
 ///
 /// This is a common pattern that we have especially efficient patterns to lower
 /// across all subtarget feature sets.
-static SDValue lowerVectorShuffleAsElementInsertion(
+static SDValue lowerShuffleAsElementInsertion(
     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
     const APInt &Zeroable, const X86Subtarget &Subtarget,
     SelectionDAG &DAG) {
@@ -11580,10 +12065,10 @@ static SDValue lowerVectorShuffleAsElementInsertion(
 /// coming from a scalar_to_vector/build_vector node \p V0 with larger elements.
 ///
 /// This assumes we have AVX2.
-static SDValue lowerVectorShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT,
-                                                  SDValue V0, int BroadcastIdx,
-                                                  const X86Subtarget &Subtarget,
-                                                  SelectionDAG &DAG) {
+static SDValue lowerShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT, SDValue V0,
+                                            int BroadcastIdx,
+                                            const X86Subtarget &Subtarget,
+                                            SelectionDAG &DAG) {
   assert(Subtarget.hasAVX2() &&
          "We can only lower integer broadcasts with AVX2!");
 
@@ -11629,16 +12114,90 @@ static SDValue lowerVectorShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT,
                      DAG.getNode(ISD::TRUNCATE, DL, EltVT, Scalar));
 }
 
+/// Test whether this can be lowered with a single SHUFPS instruction.
+///
+/// This is used to disable more specialized lowerings when the shufps lowering
+/// will happen to be efficient.
+static bool isSingleSHUFPSMask(ArrayRef<int> Mask) {
+  // This routine only handles 128-bit shufps.
+  assert(Mask.size() == 4 && "Unsupported mask size!");
+  assert(Mask[0] >= -1 && Mask[0] < 8 && "Out of bound mask element!");
+  assert(Mask[1] >= -1 && Mask[1] < 8 && "Out of bound mask element!");
+  assert(Mask[2] >= -1 && Mask[2] < 8 && "Out of bound mask element!");
+  assert(Mask[3] >= -1 && Mask[3] < 8 && "Out of bound mask element!");
+
+  // To lower with a single SHUFPS we need to have the low half and high half
+  // each requiring a single input.
+  if (Mask[0] >= 0 && Mask[1] >= 0 && (Mask[0] < 4) != (Mask[1] < 4))
+    return false;
+  if (Mask[2] >= 0 && Mask[3] >= 0 && (Mask[2] < 4) != (Mask[3] < 4))
+    return false;
+
+  return true;
+}
+
+/// If we are extracting two 128-bit halves of a vector and shuffling the
+/// result, match that to a 256-bit AVX2 vperm* instruction to avoid a
+/// multi-shuffle lowering.
+static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0,
+                                             SDValue N1, ArrayRef<int> Mask,
+                                             SelectionDAG &DAG) {
+  EVT VT = N0.getValueType();
+  assert((VT.is128BitVector() &&
+          (VT.getScalarSizeInBits() == 32 || VT.getScalarSizeInBits() == 64)) &&
+         "VPERM* family of shuffles requires 32-bit or 64-bit elements");
+
+  // Check that both sources are extracts of the same source vector.
+  if (!N0.hasOneUse() || !N1.hasOneUse() ||
+      N0.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+      N1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+      N0.getOperand(0) != N1.getOperand(0))
+    return SDValue();
+
+  SDValue WideVec = N0.getOperand(0);
+  EVT WideVT = WideVec.getValueType();
+  if (!WideVT.is256BitVector() || !isa<ConstantSDNode>(N0.getOperand(1)) ||
+      !isa<ConstantSDNode>(N1.getOperand(1)))
+    return SDValue();
+
+  // Match extracts of each half of the wide source vector. Commute the shuffle
+  // if the extract of the low half is N1.
+  unsigned NumElts = VT.getVectorNumElements();
+  SmallVector<int, 4> NewMask(Mask.begin(), Mask.end());
+  const APInt &ExtIndex0 = N0.getConstantOperandAPInt(1);
+  const APInt &ExtIndex1 = N1.getConstantOperandAPInt(1);
+  if (ExtIndex1 == 0 && ExtIndex0 == NumElts)
+    ShuffleVectorSDNode::commuteMask(NewMask);
+  else if (ExtIndex0 != 0 || ExtIndex1 != NumElts)
+    return SDValue();
+
+  // Final bailout: if the mask is simple, we are better off using an extract
+  // and a simple narrow shuffle. Prefer extract+unpack(h/l)ps to vpermps
+  // because that avoids a constant load from memory.
+  if (NumElts == 4 &&
+      (isSingleSHUFPSMask(NewMask) || is128BitUnpackShuffleMask(NewMask)))
+    return SDValue();
+
+  // Extend the shuffle mask with undef elements.
+  NewMask.append(NumElts, -1);
+
+  // shuf (extract X, 0), (extract X, 4), M --> extract (shuf X, undef, M'), 0
+  SDValue Shuf = DAG.getVectorShuffle(WideVT, DL, WideVec, DAG.getUNDEF(WideVT),
+                                      NewMask);
+  // This is free: ymm -> xmm.
+  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuf,
+                     DAG.getIntPtrConstant(0, DL));
+}
+
 /// Try to lower broadcast of a single element.
 ///
 /// For convenience, this code also bundles all of the subtarget feature set
 /// filtering. While a little annoying to re-dispatch on type here, there isn't
 /// a convenient way to factor it out.
-static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
-                                             SDValue V1, SDValue V2,
-                                             ArrayRef<int> Mask,
-                                             const X86Subtarget &Subtarget,
-                                             SelectionDAG &DAG) {
+static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
+                                       SDValue V2, ArrayRef<int> Mask,
+                                       const X86Subtarget &Subtarget,
+                                       SelectionDAG &DAG) {
   if (!((Subtarget.hasSSE3() && VT == MVT::v2f64) ||
         (Subtarget.hasAVX() && VT.isFloatingPoint()) ||
         (Subtarget.hasAVX2() && VT.isInteger())))
@@ -11647,6 +12206,7 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
   // With MOVDDUP (v2f64) we can broadcast from a register or a load, otherwise
   // we can only broadcast from a register with AVX2.
   unsigned NumElts = Mask.size();
+  unsigned NumEltBits = VT.getScalarSizeInBits();
   unsigned Opcode = (VT == MVT::v2f64 && !Subtarget.hasAVX2())
                         ? X86ISD::MOVDDUP
                         : X86ISD::VBROADCAST;
@@ -11670,29 +12230,19 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
 
   // Go up the chain of (vector) values to find a scalar load that we can
   // combine with the broadcast.
+  int BitOffset = BroadcastIdx * NumEltBits;
   SDValue V = V1;
   for (;;) {
     switch (V.getOpcode()) {
     case ISD::BITCAST: {
-      // Peek through bitcasts as long as BroadcastIdx can be adjusted.
-      SDValue VSrc = V.getOperand(0);
-      unsigned NumEltBits = V.getScalarValueSizeInBits();
-      unsigned NumSrcBits = VSrc.getScalarValueSizeInBits();
-      if ((NumEltBits % NumSrcBits) == 0)
-        BroadcastIdx *= (NumEltBits / NumSrcBits);
-      else if ((NumSrcBits % NumEltBits) == 0 &&
-               (BroadcastIdx % (NumSrcBits / NumEltBits)) == 0)
-        BroadcastIdx /= (NumSrcBits / NumEltBits);
-      else
-        break;
-      V = VSrc;
+      V = V.getOperand(0);
       continue;
     }
     case ISD::CONCAT_VECTORS: {
-      int OperandSize =
-          V.getOperand(0).getSimpleValueType().getVectorNumElements();
-      V = V.getOperand(BroadcastIdx / OperandSize);
-      BroadcastIdx %= OperandSize;
+      int OpBitWidth = V.getOperand(0).getValueSizeInBits();
+      int OpIdx = BitOffset / OpBitWidth;
+      V = V.getOperand(OpIdx);
+      BitOffset %= OpBitWidth;
       continue;
     }
     case ISD::INSERT_SUBVECTOR: {
@@ -11701,11 +12251,13 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
       if (!ConstantIdx)
         break;
 
-      int BeginIdx = (int)ConstantIdx->getZExtValue();
-      int EndIdx =
-          BeginIdx + (int)VInner.getSimpleValueType().getVectorNumElements();
-      if (BroadcastIdx >= BeginIdx && BroadcastIdx < EndIdx) {
-        BroadcastIdx -= BeginIdx;
+      int EltBitWidth = VOuter.getScalarValueSizeInBits();
+      int Idx = (int)ConstantIdx->getZExtValue();
+      int NumSubElts = (int)VInner.getSimpleValueType().getVectorNumElements();
+      int BeginOffset = Idx * EltBitWidth;
+      int EndOffset = BeginOffset + NumSubElts * EltBitWidth;
+      if (BeginOffset <= BitOffset && BitOffset < EndOffset) {
+        BitOffset -= BeginOffset;
         V = VInner;
       } else {
         V = VOuter;
@@ -11715,48 +12267,34 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
     }
     break;
   }
+  assert((BitOffset % NumEltBits) == 0 && "Illegal bit-offset");
+  BroadcastIdx = BitOffset / NumEltBits;
 
-  // Ensure the source vector and BroadcastIdx are for a suitable type.
-  if (VT.getScalarSizeInBits() != V.getScalarValueSizeInBits()) {
-    unsigned NumEltBits = VT.getScalarSizeInBits();
-    unsigned NumSrcBits = V.getScalarValueSizeInBits();
-    if ((NumSrcBits % NumEltBits) == 0)
-      BroadcastIdx *= (NumSrcBits / NumEltBits);
-    else if ((NumEltBits % NumSrcBits) == 0 &&
-             (BroadcastIdx % (NumEltBits / NumSrcBits)) == 0)
-      BroadcastIdx /= (NumEltBits / NumSrcBits);
-    else
-      return SDValue();
-
-    unsigned NumSrcElts = V.getValueSizeInBits() / NumEltBits;
-    MVT SrcVT = MVT::getVectorVT(VT.getScalarType(), NumSrcElts);
-    V = DAG.getBitcast(SrcVT, V);
-  }
+  // Do we need to bitcast the source to retrieve the original broadcast index?
+  bool BitCastSrc = V.getScalarValueSizeInBits() != NumEltBits;
 
   // Check if this is a broadcast of a scalar. We special case lowering
   // for scalars so that we can more effectively fold with loads.
-  // First, look through bitcast: if the original value has a larger element
-  // type than the shuffle, the broadcast element is in essence truncated.
-  // Make that explicit to ease folding.
-  if (V.getOpcode() == ISD::BITCAST && VT.isInteger())
-    if (SDValue TruncBroadcast = lowerVectorShuffleAsTruncBroadcast(
-            DL, VT, V.getOperand(0), BroadcastIdx, Subtarget, DAG))
+  // If the original value has a larger element type than the shuffle, the
+  // broadcast element is in essence truncated. Make that explicit to ease
+  // folding.
+  if (BitCastSrc && VT.isInteger())
+    if (SDValue TruncBroadcast = lowerShuffleAsTruncBroadcast(
+            DL, VT, V, BroadcastIdx, Subtarget, DAG))
       return TruncBroadcast;
 
   MVT BroadcastVT = VT;
 
-  // Peek through any bitcast (only useful for loads).
-  SDValue BC = peekThroughBitcasts(V);
-
   // Also check the simpler case, where we can directly reuse the scalar.
-  if ((V.getOpcode() == ISD::BUILD_VECTOR && V.hasOneUse()) ||
-      (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
+  if (!BitCastSrc &&
+      ((V.getOpcode() == ISD::BUILD_VECTOR && V.hasOneUse()) ||
+       (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0))) {
     V = V.getOperand(BroadcastIdx);
 
     // If we can't broadcast from a register, check that the input is a load.
     if (!BroadcastFromReg && !isShuffleFoldableLoad(V))
       return SDValue();
-  } else if (MayFoldLoad(BC) && !cast<LoadSDNode>(BC)->isVolatile()) {
+  } else if (MayFoldLoad(V) && !cast<LoadSDNode>(V)->isVolatile()) {
     // 32-bit targets need to load i64 as a f64 and then bitcast the result.
     if (!Subtarget.is64Bit() && VT.getScalarType() == MVT::i64) {
       BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements());
@@ -11767,10 +12305,11 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
 
     // If we are broadcasting a load that is only used by the shuffle
     // then we can reduce the vector load to the broadcasted scalar load.
-    LoadSDNode *Ld = cast<LoadSDNode>(BC);
+    LoadSDNode *Ld = cast<LoadSDNode>(V);
     SDValue BaseAddr = Ld->getOperand(1);
     EVT SVT = BroadcastVT.getScalarType();
     unsigned Offset = BroadcastIdx * SVT.getStoreSize();
+    assert((int)(Offset * 8) == BitOffset && "Unexpected bit-offset");
     SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
     V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
                     DAG.getMachineFunction().getMachineMemOperand(
@@ -11779,7 +12318,7 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
   } else if (!BroadcastFromReg) {
     // We can't broadcast from a vector register.
     return SDValue();
-  } else if (BroadcastIdx != 0) {
+  } else if (BitOffset != 0) {
     // We can only broadcast from the zero-element of a vector register,
     // but it can be advantageous to broadcast from the zero-element of a
     // subvector.
@@ -11791,18 +12330,15 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
       return SDValue();
 
     // Only broadcast the zero-element of a 128-bit subvector.
-    unsigned EltSize = VT.getScalarSizeInBits();
-    if (((BroadcastIdx * EltSize) % 128) != 0)
+    if ((BitOffset % 128) != 0)
       return SDValue();
 
-    // The shuffle input might have been a bitcast we looked through; look at
-    // the original input vector.  Emit an EXTRACT_SUBVECTOR of that type; we'll
-    // later bitcast it to BroadcastVT.
-    assert(V.getScalarValueSizeInBits() == BroadcastVT.getScalarSizeInBits() &&
-           "Unexpected vector element size");
+    assert((BitOffset % V.getScalarValueSizeInBits()) == 0 &&
+           "Unexpected bit-offset");
     assert((V.getValueSizeInBits() == 256 || V.getValueSizeInBits() == 512) &&
            "Unexpected vector size");
-    V = extract128BitVector(V, BroadcastIdx, DAG, DL);
+    unsigned ExtractIdx = BitOffset / V.getScalarValueSizeInBits();
+    V = extract128BitVector(V, ExtractIdx, DAG, DL);
   }
 
   if (Opcode == X86ISD::MOVDDUP && !V.getValueType().isVector())
@@ -11810,21 +12346,21 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
                     DAG.getBitcast(MVT::f64, V));
 
   // Bitcast back to the same scalar type as BroadcastVT.
-  MVT SrcVT = V.getSimpleValueType();
-  if (SrcVT.getScalarType() != BroadcastVT.getScalarType()) {
-    assert(SrcVT.getScalarSizeInBits() == BroadcastVT.getScalarSizeInBits() &&
+  if (V.getValueType().getScalarType() != BroadcastVT.getScalarType()) {
+    assert(NumEltBits == BroadcastVT.getScalarSizeInBits() &&
            "Unexpected vector element size");
-    if (SrcVT.isVector()) {
-      unsigned NumSrcElts = SrcVT.getVectorNumElements();
-      SrcVT = MVT::getVectorVT(BroadcastVT.getScalarType(), NumSrcElts);
+    MVT ExtVT;
+    if (V.getValueType().isVector()) {
+      unsigned NumSrcElts = V.getValueSizeInBits() / NumEltBits;
+      ExtVT = MVT::getVectorVT(BroadcastVT.getScalarType(), NumSrcElts);
     } else {
-      SrcVT = BroadcastVT.getScalarType();
+      ExtVT = BroadcastVT.getScalarType();
     }
-    V = DAG.getBitcast(SrcVT, V);
+    V = DAG.getBitcast(ExtVT, V);
   }
 
   // 32-bit targets need to load i64 as a f64 and then bitcast the result.
-  if (!Subtarget.is64Bit() && SrcVT == MVT::i64) {
+  if (!Subtarget.is64Bit() && V.getValueType() == MVT::i64) {
     V = DAG.getBitcast(MVT::f64, V);
     unsigned NumBroadcastElts = BroadcastVT.getVectorNumElements();
     BroadcastVT = MVT::getVectorVT(MVT::f64, NumBroadcastElts);
@@ -11833,9 +12369,9 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
   // We only support broadcasting from 128-bit vectors to minimize the
   // number of patterns we need to deal with in isel. So extract down to
   // 128-bits, removing as many bitcasts as possible.
-  if (SrcVT.getSizeInBits() > 128) {
-    MVT ExtVT = MVT::getVectorVT(SrcVT.getScalarType(),
-                                 128 / SrcVT.getScalarSizeInBits());
+  if (V.getValueSizeInBits() > 128) {
+    MVT ExtVT = V.getSimpleValueType().getScalarType();
+    ExtVT = MVT::getVectorVT(ExtVT, 128 / ExtVT.getScalarSizeInBits());
     V = extract128BitVector(peekThroughBitcasts(V), 0, DAG, DL);
     V = DAG.getBitcast(ExtVT, V);
   }
@@ -11849,11 +12385,10 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
 // are much smaller to encode than a SHUFPS and an INSERTPS. We can also
 // perform INSERTPS if a single V1 element is out of place and all V2
 // elements are zeroable.
-static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2,
-                                         unsigned &InsertPSMask,
-                                         const APInt &Zeroable,
-                                         ArrayRef<int> Mask,
-                                         SelectionDAG &DAG) {
+static bool matchShuffleAsInsertPS(SDValue &V1, SDValue &V2,
+                                   unsigned &InsertPSMask,
+                                   const APInt &Zeroable,
+                                   ArrayRef<int> Mask, SelectionDAG &DAG) {
   assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!");
   assert(V2.getSimpleValueType().is128BitVector() && "Bad operand type!");
   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
@@ -11938,16 +12473,15 @@ static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2,
   return false;
 }
 
-static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1,
-                                            SDValue V2, ArrayRef<int> Mask,
-                                            const APInt &Zeroable,
-                                            SelectionDAG &DAG) {
+static SDValue lowerShuffleAsInsertPS(const SDLoc &DL, SDValue V1, SDValue V2,
+                                      ArrayRef<int> Mask, const APInt &Zeroable,
+                                      SelectionDAG &DAG) {
   assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
 
   // Attempt to match the insertps pattern.
   unsigned InsertPSMask;
-  if (!matchVectorShuffleAsInsertPS(V1, V2, InsertPSMask, Zeroable, Mask, DAG))
+  if (!matchShuffleAsInsertPS(V1, V2, InsertPSMask, Zeroable, Mask, DAG))
     return SDValue();
 
   // Insert the V2 element into the desired position.
@@ -11964,7 +12498,7 @@ static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1,
 /// because for floating point vectors we have a generalized SHUFPS lowering
 /// strategy that handles everything that doesn't *exactly* match an unpack,
 /// making this clever lowering unnecessary.
-static SDValue lowerVectorShuffleAsPermuteAndUnpack(
+static SDValue lowerShuffleAsPermuteAndUnpack(
     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
     const X86Subtarget &Subtarget, SelectionDAG &DAG) {
   assert(!VT.isFloatingPoint() &&
@@ -12079,19 +12613,18 @@ static SDValue lowerVectorShuffleAsPermuteAndUnpack(
 /// instructions will incur a domain crossing penalty on some chips though so
 /// it is better to avoid lowering through this for integer vectors where
 /// possible.
-static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const APInt &Zeroable,
-                                       SDValue V1, SDValue V2,
-                                       const X86Subtarget &Subtarget,
-                                       SelectionDAG &DAG) {
+static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                 const APInt &Zeroable, SDValue V1, SDValue V2,
+                                 const X86Subtarget &Subtarget,
+                                 SelectionDAG &DAG) {
   assert(V1.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
   assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
 
   if (V2.isUndef()) {
     // Check for being able to broadcast a single element.
-    if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
-            DL, MVT::v2f64, V1, V2, Mask, Subtarget, DAG))
+    if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v2f64, V1, V2,
+                                                    Mask, Subtarget, DAG))
       return Broadcast;
 
     // Straight shuffle of a single input vector. Simulate this by using the
@@ -12116,16 +12649,20 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   assert(Mask[0] < 2 && "We sort V1 to be the first input.");
   assert(Mask[1] >= 2 && "We sort V2 to be the second input.");
 
+  if (Subtarget.hasAVX2())
+    if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG))
+      return Extract;
+
   // When loading a scalar and then shuffling it into a vector we can often do
   // the insertion cheaply.
-  if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+  if (SDValue Insertion = lowerShuffleAsElementInsertion(
           DL, MVT::v2f64, V1, V2, Mask, Zeroable, Subtarget, DAG))
     return Insertion;
   // Try inverting the insertion since for v2 masks it is easy to do and we
   // can't reliably sort the mask one way or the other.
   int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2),
                         Mask[1] < 0 ? -1 : (Mask[1] ^ 2)};
-  if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+  if (SDValue Insertion = lowerShuffleAsElementInsertion(
           DL, MVT::v2f64, V2, V1, InverseMask, Zeroable, Subtarget, DAG))
     return Insertion;
 
@@ -12141,13 +12678,12 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
           DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V1S));
 
   if (Subtarget.hasSSE41())
-    if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask,
-                                                  Zeroable, Subtarget, DAG))
+    if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask,
+                                            Zeroable, Subtarget, DAG))
       return Blend;
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (SDValue V =
-          lowerVectorShuffleWithUNPCK(DL, MVT::v2f64, Mask, V1, V2, DAG))
+  if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v2f64, Mask, V1, V2, DAG))
     return V;
 
   unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
@@ -12161,19 +12697,18 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 /// the integer unit to minimize domain crossing penalties. However, for blends
 /// it falls back to the floating point shuffle operation with appropriate bit
 /// casting.
-static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const APInt &Zeroable,
-                                       SDValue V1, SDValue V2,
-                                       const X86Subtarget &Subtarget,
-                                       SelectionDAG &DAG) {
+static SDValue lowerV2I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                 const APInt &Zeroable, SDValue V1, SDValue V2,
+                                 const X86Subtarget &Subtarget,
+                                 SelectionDAG &DAG) {
   assert(V1.getSimpleValueType() == MVT::v2i64 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v2i64 && "Bad operand type!");
   assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
 
   if (V2.isUndef()) {
     // Check for being able to broadcast a single element.
-    if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
-            DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
+    if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v2i64, V1, V2,
+                                                    Mask, Subtarget, DAG))
       return Broadcast;
 
     // Straight shuffle of a single input vector. For everything from SSE2
@@ -12193,20 +12728,24 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   assert(Mask[0] < 2 && "We sort V1 to be the first input.");
   assert(Mask[1] >= 2 && "We sort V2 to be the second input.");
 
+  if (Subtarget.hasAVX2())
+    if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG))
+      return Extract;
+
   // Try to use shift instructions.
-  if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Shift;
 
   // When loading a scalar and then shuffling it into a vector we can often do
   // the insertion cheaply.
-  if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+  if (SDValue Insertion = lowerShuffleAsElementInsertion(
           DL, MVT::v2i64, V1, V2, Mask, Zeroable, Subtarget, DAG))
     return Insertion;
   // Try inverting the insertion since for v2 masks it is easy to do and we
   // can't reliably sort the mask one way or the other.
   int InverseMask[2] = {Mask[0] ^ 2, Mask[1] ^ 2};
-  if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+  if (SDValue Insertion = lowerShuffleAsElementInsertion(
           DL, MVT::v2i64, V2, V1, InverseMask, Zeroable, Subtarget, DAG))
     return Insertion;
 
@@ -12214,33 +12753,32 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // *exact* same predicate.
   bool IsBlendSupported = Subtarget.hasSSE41();
   if (IsBlendSupported)
-    if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
-                                                  Zeroable, Subtarget, DAG))
+    if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
+                                            Zeroable, Subtarget, DAG))
       return Blend;
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (SDValue V =
-          lowerVectorShuffleWithUNPCK(DL, MVT::v2i64, Mask, V1, V2, DAG))
+  if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v2i64, Mask, V1, V2, DAG))
     return V;
 
   // Try to use byte rotation instructions.
   // Its more profitable for pre-SSSE3 to use shuffles/unpacks.
   if (Subtarget.hasSSSE3()) {
     if (Subtarget.hasVLX())
-      if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v2i64, V1, V2,
-                                                      Mask, Subtarget, DAG))
+      if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v2i64, V1, V2, Mask,
+                                                Subtarget, DAG))
         return Rotate;
 
-    if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
-            DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
+    if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v2i64, V1, V2, Mask,
+                                                  Subtarget, DAG))
       return Rotate;
   }
 
   // If we have direct support for blends, we should lower by decomposing into
   // a permute. That will be faster than the domain cross.
   if (IsBlendSupported)
-    return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v2i64, V1, V2,
-                                                      Mask, Subtarget, DAG);
+    return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v2i64, V1, V2, Mask,
+                                                Subtarget, DAG);
 
   // We implement this with SHUFPD which is pretty lame because it will likely
   // incur 2 cycles of stall for integer vectors on Nehalem and older chips.
@@ -12252,36 +12790,14 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                         DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask));
 }
 
-/// Test whether this can be lowered with a single SHUFPS instruction.
-///
-/// This is used to disable more specialized lowerings when the shufps lowering
-/// will happen to be efficient.
-static bool isSingleSHUFPSMask(ArrayRef<int> Mask) {
-  // This routine only handles 128-bit shufps.
-  assert(Mask.size() == 4 && "Unsupported mask size!");
-  assert(Mask[0] >= -1 && Mask[0] < 8 && "Out of bound mask element!");
-  assert(Mask[1] >= -1 && Mask[1] < 8 && "Out of bound mask element!");
-  assert(Mask[2] >= -1 && Mask[2] < 8 && "Out of bound mask element!");
-  assert(Mask[3] >= -1 && Mask[3] < 8 && "Out of bound mask element!");
-
-  // To lower with a single SHUFPS we need to have the low half and high half
-  // each requiring a single input.
-  if (Mask[0] >= 0 && Mask[1] >= 0 && (Mask[0] < 4) != (Mask[1] < 4))
-    return false;
-  if (Mask[2] >= 0 && Mask[3] >= 0 && (Mask[2] < 4) != (Mask[3] < 4))
-    return false;
-
-  return true;
-}
-
 /// Lower a vector shuffle using the SHUFPS instruction.
 ///
 /// This is a helper routine dedicated to lowering vector shuffles using SHUFPS.
 /// It makes no assumptions about whether this is the *best* lowering, it simply
 /// uses it.
-static SDValue lowerVectorShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
-                                            ArrayRef<int> Mask, SDValue V1,
-                                            SDValue V2, SelectionDAG &DAG) {
+static SDValue lowerShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
+                                      ArrayRef<int> Mask, SDValue V1,
+                                      SDValue V2, SelectionDAG &DAG) {
   SDValue LowV = V1, HighV = V2;
   int NewMask[4] = {Mask[0], Mask[1], Mask[2], Mask[3]};
 
@@ -12366,11 +12882,10 @@ static SDValue lowerVectorShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
 /// Uses instructions exclusively from the floating point unit to minimize
 /// domain crossing penalties, as these are sufficient to implement all v4f32
 /// shuffles.
-static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const APInt &Zeroable,
-                                       SDValue V1, SDValue V2,
-                                       const X86Subtarget &Subtarget,
-                                       SelectionDAG &DAG) {
+static SDValue lowerV4F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                 const APInt &Zeroable, SDValue V1, SDValue V2,
+                                 const X86Subtarget &Subtarget,
+                                 SelectionDAG &DAG) {
   assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
@@ -12379,8 +12894,8 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   if (NumV2Elements == 0) {
     // Check for being able to broadcast a single element.
-    if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
-            DL, MVT::v4f32, V1, V2, Mask, Subtarget, DAG))
+    if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4f32, V1, V2,
+                                                    Mask, Subtarget, DAG))
       return Broadcast;
 
     // Use even/odd duplicate instructions for masks that match their pattern.
@@ -12413,29 +12928,32 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                        getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
   }
 
+  if (Subtarget.hasAVX2())
+    if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG))
+      return Extract;
+
   // There are special ways we can lower some single-element blends. However, we
   // have custom ways we can lower more complex single-element blends below that
   // we defer to if both this and BLENDPS fail to match, so restrict this to
   // when the V2 input is targeting element 0 of the mask -- that is the fast
   // case here.
   if (NumV2Elements == 1 && Mask[0] >= 4)
-    if (SDValue V = lowerVectorShuffleAsElementInsertion(
+    if (SDValue V = lowerShuffleAsElementInsertion(
             DL, MVT::v4f32, V1, V2, Mask, Zeroable, Subtarget, DAG))
       return V;
 
   if (Subtarget.hasSSE41()) {
-    if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
-                                                  Zeroable, Subtarget, DAG))
+    if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
+                                            Zeroable, Subtarget, DAG))
       return Blend;
 
     // Use INSERTPS if we can complete the shuffle efficiently.
-    if (SDValue V =
-            lowerVectorShuffleAsInsertPS(DL, V1, V2, Mask, Zeroable, DAG))
+    if (SDValue V = lowerShuffleAsInsertPS(DL, V1, V2, Mask, Zeroable, DAG))
       return V;
 
     if (!isSingleSHUFPSMask(Mask))
-      if (SDValue BlendPerm = lowerVectorShuffleAsBlendAndPermute(
-              DL, MVT::v4f32, V1, V2, Mask, DAG))
+      if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, MVT::v4f32, V1,
+                                                            V2, Mask, DAG))
         return BlendPerm;
   }
 
@@ -12449,23 +12967,21 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   }
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (SDValue V =
-          lowerVectorShuffleWithUNPCK(DL, MVT::v4f32, Mask, V1, V2, DAG))
+  if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f32, Mask, V1, V2, DAG))
     return V;
 
   // Otherwise fall back to a SHUFPS lowering strategy.
-  return lowerVectorShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG);
+  return lowerShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG);
 }
 
 /// Lower 4-lane i32 vector shuffles.
 ///
 /// We try to handle these with integer-domain shuffles where we can, but for
 /// blends we use the floating point domain blend instructions.
-static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const APInt &Zeroable,
-                                       SDValue V1, SDValue V2,
-                                       const X86Subtarget &Subtarget,
-                                       SelectionDAG &DAG) {
+static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                 const APInt &Zeroable, SDValue V1, SDValue V2,
+                                 const X86Subtarget &Subtarget,
+                                 SelectionDAG &DAG) {
   assert(V1.getSimpleValueType() == MVT::v4i32 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v4i32 && "Bad operand type!");
   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
@@ -12473,16 +12989,16 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // Whenever we can lower this as a zext, that instruction is strictly faster
   // than any alternative. It also allows us to fold memory operands into the
   // shuffle in many cases.
-  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
-          DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
+  if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v4i32, V1, V2, Mask,
+                                                   Zeroable, Subtarget, DAG))
     return ZExt;
 
   int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });
 
   if (NumV2Elements == 0) {
     // Check for being able to broadcast a single element.
-    if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
-            DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
+    if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i32, V1, V2,
+                                                    Mask, Subtarget, DAG))
       return Broadcast;
 
     // Straight shuffle of a single input vector. For everything from SSE2
@@ -12501,14 +13017,18 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                        getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
   }
 
+  if (Subtarget.hasAVX2())
+    if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG))
+      return Extract;
+
   // Try to use shift instructions.
-  if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Shift;
 
   // There are special ways we can lower some single-element blends.
   if (NumV2Elements == 1)
-    if (SDValue V = lowerVectorShuffleAsElementInsertion(
+    if (SDValue V = lowerShuffleAsElementInsertion(
             DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
       return V;
 
@@ -12516,29 +13036,28 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // *exact* same predicate.
   bool IsBlendSupported = Subtarget.hasSSE41();
   if (IsBlendSupported)
-    if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
-                                                  Zeroable, Subtarget, DAG))
+    if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
+                                            Zeroable, Subtarget, DAG))
       return Blend;
 
-  if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask,
-                                                   Zeroable, DAG))
+  if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask,
+                                             Zeroable, Subtarget, DAG))
     return Masked;
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (SDValue V =
-          lowerVectorShuffleWithUNPCK(DL, MVT::v4i32, Mask, V1, V2, DAG))
+  if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4i32, Mask, V1, V2, DAG))
     return V;
 
   // Try to use byte rotation instructions.
   // Its more profitable for pre-SSSE3 to use shuffles/unpacks.
   if (Subtarget.hasSSSE3()) {
     if (Subtarget.hasVLX())
-      if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i32, V1, V2,
-                                                      Mask, Subtarget, DAG))
+      if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v4i32, V1, V2, Mask,
+                                                Subtarget, DAG))
         return Rotate;
 
-    if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
-            DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
+    if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v4i32, V1, V2, Mask,
+                                                  Subtarget, DAG))
       return Rotate;
   }
 
@@ -12549,12 +13068,12 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
     // If we have direct support for blends, we should lower by decomposing into
     // a permute. That will be faster than the domain cross.
     if (IsBlendSupported)
-      return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2,
-                                                        Mask, Subtarget, DAG);
+      return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2, Mask,
+                                                  Subtarget, DAG);
 
     // Try to lower by permuting the inputs into an unpack instruction.
-    if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
-            DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
+    if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(DL, MVT::v4i32, V1, V2,
+                                                        Mask, Subtarget, DAG))
       return Unpack;
   }
 
@@ -12585,7 +13104,7 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 /// lane must shuffle the *exact* same way. In fact, you must pass a v8 Mask to
 /// this routine for it to work correctly. To shuffle a 256-bit or 512-bit i16
 /// vector, form the analogous 128-bit 8-element Mask.
-static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
+static SDValue lowerV8I16GeneralSingleInputShuffle(
     const SDLoc &DL, MVT VT, SDValue V, MutableArrayRef<int> Mask,
     const X86Subtarget &Subtarget, SelectionDAG &DAG) {
   assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!");
@@ -12617,11 +13136,9 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
   copy_if(HiMask, std::back_inserter(HiInputs), [](int M) { return M >= 0; });
   array_pod_sort(HiInputs.begin(), HiInputs.end());
   HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end());
-  int NumLToL =
-      std::lower_bound(LoInputs.begin(), LoInputs.end(), 4) - LoInputs.begin();
+  int NumLToL = llvm::lower_bound(LoInputs, 4) - LoInputs.begin();
   int NumHToL = LoInputs.size() - NumLToL;
-  int NumLToH =
-      std::lower_bound(HiInputs.begin(), HiInputs.end(), 4) - HiInputs.begin();
+  int NumLToH = llvm::lower_bound(HiInputs, 4) - HiInputs.begin();
   int NumHToH = HiInputs.size() - NumLToH;
   MutableArrayRef<int> LToLInputs(LoInputs.data(), NumLToL);
   MutableArrayRef<int> LToHInputs(HiInputs.data(), NumLToH);
@@ -12730,7 +13247,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
     // a half by taking the sum of the half with three inputs and subtracting
     // the sum of the actual three inputs. The difference is the remaining
     // slot.
-    int ADWord, BDWord;
+    int ADWord = 0, BDWord = 0;
     int &TripleDWord = ThreeAInputs ? ADWord : BDWord;
     int &OneInputDWord = ThreeAInputs ? BDWord : ADWord;
     int TripleInputOffset = ThreeAInputs ? AOffset : BOffset;
@@ -12825,8 +13342,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
 
     // Recurse back into this routine to re-compute state now that this isn't
     // a 3 and 1 problem.
-    return lowerV8I16GeneralSingleInputVectorShuffle(DL, VT, V, Mask, Subtarget,
-                                                     DAG);
+    return lowerV8I16GeneralSingleInputShuffle(DL, VT, V, Mask, Subtarget, DAG);
   };
   if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3))
     return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4);
@@ -13084,7 +13600,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
 
 /// Helper to form a PSHUFB-based shuffle+blend, opportunistically avoiding the
 /// blend if only one input is used.
-static SDValue lowerVectorShuffleAsBlendOfPSHUFBs(
+static SDValue lowerShuffleAsBlendOfPSHUFBs(
     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
     const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse, bool &V2InUse) {
   assert(!is128BitLaneCrossingShuffleMask(VT, Mask) &&
@@ -13147,54 +13663,51 @@ static SDValue lowerVectorShuffleAsBlendOfPSHUFBs(
 /// the two inputs, try to interleave them. Otherwise, blend the low and high
 /// halves of the inputs separately (making them have relatively few inputs)
 /// and then concatenate them.
-static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const APInt &Zeroable,
-                                       SDValue V1, SDValue V2,
-                                       const X86Subtarget &Subtarget,
-                                       SelectionDAG &DAG) {
+static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                 const APInt &Zeroable, SDValue V1, SDValue V2,
+                                 const X86Subtarget &Subtarget,
+                                 SelectionDAG &DAG) {
   assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad operand type!");
   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
 
   // Whenever we can lower this as a zext, that instruction is strictly faster
   // than any alternative.
-  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
-          DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
+  if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v8i16, V1, V2, Mask,
+                                                   Zeroable, Subtarget, DAG))
     return ZExt;
 
   int NumV2Inputs = count_if(Mask, [](int M) { return M >= 8; });
 
   if (NumV2Inputs == 0) {
     // Check for being able to broadcast a single element.
-    if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
-            DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
+    if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i16, V1, V2,
+                                                    Mask, Subtarget, DAG))
       return Broadcast;
 
     // Try to use shift instructions.
-    if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask,
-                                                  Zeroable, Subtarget, DAG))
+    if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask,
+                                            Zeroable, Subtarget, DAG))
       return Shift;
 
     // Use dedicated unpack instructions for masks that match their pattern.
-    if (SDValue V =
-            lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
+    if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
       return V;
 
     // Use dedicated pack instructions for masks that match their pattern.
-    if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2,
-                                               DAG, Subtarget))
+    if (SDValue V = lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG,
+                                         Subtarget))
       return V;
 
     // Try to use byte rotation instructions.
-    if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i16, V1, V1,
-                                                        Mask, Subtarget, DAG))
+    if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i16, V1, V1, Mask,
+                                                  Subtarget, DAG))
       return Rotate;
 
     // Make a copy of the mask so it can be modified.
     SmallVector<int, 8> MutableMask(Mask.begin(), Mask.end());
-    return lowerV8I16GeneralSingleInputVectorShuffle(DL, MVT::v8i16, V1,
-                                                     MutableMask, Subtarget,
-                                                     DAG);
+    return lowerV8I16GeneralSingleInputShuffle(DL, MVT::v8i16, V1, MutableMask,
+                                               Subtarget, DAG);
   }
 
   assert(llvm::any_of(Mask, [](int M) { return M >= 0 && M < 8; }) &&
@@ -13202,19 +13715,19 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
          "shuffles.");
 
   // Try to use shift instructions.
-  if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Shift;
 
   // See if we can use SSE4A Extraction / Insertion.
   if (Subtarget.hasSSE4A())
-    if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask,
-                                                Zeroable, DAG))
+    if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask,
+                                          Zeroable, DAG))
       return V;
 
   // There are special ways we can lower some single-element blends.
   if (NumV2Inputs == 1)
-    if (SDValue V = lowerVectorShuffleAsElementInsertion(
+    if (SDValue V = lowerShuffleAsElementInsertion(
             DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
       return V;
 
@@ -13222,50 +13735,54 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // *exact* same predicate.
   bool IsBlendSupported = Subtarget.hasSSE41();
   if (IsBlendSupported)
-    if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
-                                                  Zeroable, Subtarget, DAG))
+    if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
+                                            Zeroable, Subtarget, DAG))
       return Blend;
 
-  if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask,
-                                                   Zeroable, DAG))
+  if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask,
+                                             Zeroable, Subtarget, DAG))
     return Masked;
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (SDValue V =
-          lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
+  if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
     return V;
 
   // Use dedicated pack instructions for masks that match their pattern.
-  if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG,
-                                             Subtarget))
+  if (SDValue V = lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG,
+                                       Subtarget))
     return V;
 
   // Try to use byte rotation instructions.
-  if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
-          DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
+  if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i16, V1, V2, Mask,
+                                                Subtarget, DAG))
     return Rotate;
 
   if (SDValue BitBlend =
-          lowerVectorShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG))
+          lowerShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG))
     return BitBlend;
 
+  // Try to use byte shift instructions to mask.
+  if (SDValue V = lowerVectorShuffleAsByteShiftMask(
+          DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
+    return V;
+
   // Try to lower by permuting the inputs into an unpack instruction.
-  if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
-          DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
+  if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1, V2,
+                                                      Mask, Subtarget, DAG))
     return Unpack;
 
   // If we can't directly blend but can use PSHUFB, that will be better as it
   // can both shuffle and set up the inefficient blend.
   if (!IsBlendSupported && Subtarget.hasSSSE3()) {
     bool V1InUse, V2InUse;
-    return lowerVectorShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask,
-                                              Zeroable, DAG, V1InUse, V2InUse);
+    return lowerShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask,
+                                        Zeroable, DAG, V1InUse, V2InUse);
   }
 
   // We can always bit-blend if we have to so the fallback strategy is to
   // decompose into single-input permutes and blends.
-  return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2,
-                                                    Mask, Subtarget, DAG);
+  return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2,
+                                              Mask, Subtarget, DAG);
 }
 
 /// Check whether a compaction lowering can be done by dropping even
@@ -13334,9 +13851,9 @@ static int canLowerByDroppingEvenElements(ArrayRef<int> Mask,
   return 0;
 }
 
-static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT,
-                                           ArrayRef<int> Mask, SDValue V1,
-                                           SDValue V2, SelectionDAG &DAG) {
+static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT,
+                                     ArrayRef<int> Mask, SDValue V1,
+                                     SDValue V2, SelectionDAG &DAG) {
   MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
   MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());
 
@@ -13354,39 +13871,38 @@ static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT,
 /// UNPCK to spread the i8 elements across two i16-element vectors, and uses
 /// the existing lowering for v8i16 blends on each half, finally PACK-ing them
 /// back together.
-static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const APInt &Zeroable,
-                                       SDValue V1, SDValue V2,
-                                       const X86Subtarget &Subtarget,
-                                       SelectionDAG &DAG) {
+static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                 const APInt &Zeroable, SDValue V1, SDValue V2,
+                                 const X86Subtarget &Subtarget,
+                                 SelectionDAG &DAG) {
   assert(V1.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
   assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
 
   // Try to use shift instructions.
-  if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i8, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i8, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Shift;
 
   // Try to use byte rotation instructions.
-  if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
-          DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
+  if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i8, V1, V2, Mask,
+                                                Subtarget, DAG))
     return Rotate;
 
   // Use dedicated pack instructions for masks that match their pattern.
-  if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v16i8, Mask, V1, V2, DAG,
-                                             Subtarget))
+  if (SDValue V = lowerShuffleWithPACK(DL, MVT::v16i8, Mask, V1, V2, DAG,
+                                       Subtarget))
     return V;
 
   // Try to use a zext lowering.
-  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
-          DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
+  if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v16i8, V1, V2, Mask,
+                                                   Zeroable, Subtarget, DAG))
     return ZExt;
 
   // See if we can use SSE4A Extraction / Insertion.
   if (Subtarget.hasSSE4A())
-    if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask,
-                                                Zeroable, DAG))
+    if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask,
+                                          Zeroable, DAG))
       return V;
 
   int NumV2Elements = count_if(Mask, [](int M) { return M >= 16; });
@@ -13394,12 +13910,11 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // For single-input shuffles, there are some nicer lowering tricks we can use.
   if (NumV2Elements == 0) {
     // Check for being able to broadcast a single element.
-    if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
-            DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
+    if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v16i8, V1, V2,
+                                                    Mask, Subtarget, DAG))
       return Broadcast;
 
-    if (SDValue V =
-            lowerVectorShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
+    if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
       return V;
 
     // Check whether we can widen this to an i16 shuffle by duplicating bytes.
@@ -13492,13 +14007,17 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
       return V;
   }
 
-  if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask,
-                                                   Zeroable, DAG))
+  if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask,
+                                             Zeroable, Subtarget, DAG))
     return Masked;
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (SDValue V =
-          lowerVectorShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
+  if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
+    return V;
+
+  // Try to use byte shift instructions to mask.
+  if (SDValue V = lowerVectorShuffleAsByteShiftMask(
+          DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
     return V;
 
   // Check for SSSE3 which lets us lower all v16i8 shuffles much more directly
@@ -13518,7 +14037,7 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
     bool V1InUse = false;
     bool V2InUse = false;
 
-    SDValue PSHUFB = lowerVectorShuffleAsBlendOfPSHUFBs(
+    SDValue PSHUFB = lowerShuffleAsBlendOfPSHUFBs(
         DL, MVT::v16i8, V1, V2, Mask, Zeroable, DAG, V1InUse, V2InUse);
 
     // If both V1 and V2 are in use and we can use a direct blend or an unpack,
@@ -13526,8 +14045,8 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
     // important as a single pshufb is significantly faster for that.
     if (V1InUse && V2InUse) {
       if (Subtarget.hasSSE41())
-        if (SDValue Blend = lowerVectorShuffleAsBlend(
-                DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
+        if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i8, V1, V2, Mask,
+                                                Zeroable, Subtarget, DAG))
           return Blend;
 
       // We can use an unpack to do the blending rather than an or in some
@@ -13538,17 +14057,17 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
       // FIXME: It might be worth trying to detect if the unpack-feeding
       // shuffles will both be pshufb, in which case we shouldn't bother with
       // this.
-      if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
+      if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(
               DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
         return Unpack;
 
       // If we have VBMI we can use one VPERM instead of multiple PSHUFBs.
       if (Subtarget.hasVBMI() && Subtarget.hasVLX())
-        return lowerVectorShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, DAG);
+        return lowerShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, DAG);
 
       // Use PALIGNR+Permute if possible - permute might become PSHUFB but the
       // PALIGNR will be cheaper than the second PSHUFB+OR.
-      if (SDValue V = lowerVectorShuffleAsByteRotateAndPermute(
+      if (SDValue V = lowerShuffleAsByteRotateAndPermute(
               DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
         return V;
     }
@@ -13558,13 +14077,12 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   // There are special ways we can lower some single-element blends.
   if (NumV2Elements == 1)
-    if (SDValue V = lowerVectorShuffleAsElementInsertion(
+    if (SDValue V = lowerShuffleAsElementInsertion(
             DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
       return V;
 
-  if (SDValue BitBlend =
-          lowerVectorShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG))
-    return BitBlend;
+  if (SDValue Blend = lowerShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG))
+    return Blend;
 
   // Check whether a compaction lowering can be done. This handles shuffles
   // which take every Nth element for some even N. See the helper function for
@@ -13605,8 +14123,8 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   // Handle multi-input cases by blending single-input shuffles.
   if (NumV2Elements > 0)
-    return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v16i8, V1, V2,
-                                                      Mask, Subtarget, DAG);
+    return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v16i8, V1, V2, Mask,
+                                                Subtarget, DAG);
 
   // The fallback path for single-input shuffles widens this into two v8i16
   // vectors with unpacks, shuffles those, and then pulls them back together
@@ -13661,24 +14179,24 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 ///
 /// This routine breaks down the specific type of 128-bit shuffle and
 /// dispatches to the lowering routines accordingly.
-static SDValue lower128BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                        MVT VT, SDValue V1, SDValue V2,
-                                        const APInt &Zeroable,
-                                        const X86Subtarget &Subtarget,
-                                        SelectionDAG &DAG) {
+static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                  MVT VT, SDValue V1, SDValue V2,
+                                  const APInt &Zeroable,
+                                  const X86Subtarget &Subtarget,
+                                  SelectionDAG &DAG) {
   switch (VT.SimpleTy) {
   case MVT::v2i64:
-    return lowerV2I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+    return lowerV2I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v2f64:
-    return lowerV2F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+    return lowerV2F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v4i32:
-    return lowerV4I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+    return lowerV4I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v4f32:
-    return lowerV4F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+    return lowerV4F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v8i16:
-    return lowerV8I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+    return lowerV8I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v16i8:
-    return lowerV16I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+    return lowerV16I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
 
   default:
     llvm_unreachable("Unimplemented!");
@@ -13690,9 +14208,9 @@ static SDValue lower128BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 /// This routine just extracts two subvectors, shuffles them independently, and
 /// then concatenates them back together. This should work effectively with all
 /// AVX vector shuffle types.
-static SDValue splitAndLowerVectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
-                                          SDValue V2, ArrayRef<int> Mask,
-                                          SelectionDAG &DAG) {
+static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1,
+                                    SDValue V2, ArrayRef<int> Mask,
+                                    SelectionDAG &DAG) {
   assert(VT.getSizeInBits() >= 256 &&
          "Only for 256-bit or wider vector shuffles!");
   assert(V1.getSimpleValueType() == VT && "Bad operand type!");
@@ -13816,11 +14334,10 @@ static SDValue splitAndLowerVectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
 /// between splitting the shuffle into 128-bit components and stitching those
 /// back together vs. extracting the single-input shuffles and blending those
 /// results.
-static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
-                                                SDValue V1, SDValue V2,
-                                                ArrayRef<int> Mask,
-                                                const X86Subtarget &Subtarget,
-                                                SelectionDAG &DAG) {
+static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1,
+                                          SDValue V2, ArrayRef<int> Mask,
+                                          const X86Subtarget &Subtarget,
+                                          SelectionDAG &DAG) {
   assert(!V2.isUndef() && "This routine must not be used to lower single-input "
          "shuffles as it could then recurse on itself.");
   int Size = Mask.size();
@@ -13845,8 +14362,8 @@ static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
     return true;
   };
   if (DoBothBroadcast())
-    return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
-                                                      Subtarget, DAG);
+    return lowerShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
+                                                Subtarget, DAG);
 
   // If the inputs all stem from a single 128-bit lane of each input, then we
   // split them rather than blending because the split will decompose to
@@ -13860,12 +14377,12 @@ static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
     if (Mask[i] >= 0)
       LaneInputs[Mask[i] / Size][(Mask[i] % Size) / LaneSize] = true;
   if (LaneInputs[0].count() <= 1 && LaneInputs[1].count() <= 1)
-    return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
+    return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
 
   // Otherwise, just fall back to decomposed shuffles and a blend. This requires
   // that the decomposed single-input shuffles don't end up here.
-  return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
-                                                    Subtarget, DAG);
+  return lowerShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, Subtarget,
+                                              DAG);
 }
 
 /// Lower a vector shuffle crossing multiple 128-bit lanes as
@@ -13874,9 +14391,9 @@ static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
 /// This is mainly for cases where we can have non-repeating permutes
 /// in each lane.
 ///
-/// TODO: This is very similar to lowerVectorShuffleByMerging128BitLanes,
+/// TODO: This is very similar to lowerShuffleAsLanePermuteAndRepeatedMask,
 /// we should investigate merging them.
-static SDValue lowerVectorShuffleAsLanePermuteAndPermute(
+static SDValue lowerShuffleAsLanePermuteAndPermute(
     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
     SelectionDAG &DAG, const X86Subtarget &Subtarget) {
   int NumElts = VT.getVectorNumElements();
@@ -13884,7 +14401,6 @@ static SDValue lowerVectorShuffleAsLanePermuteAndPermute(
   int NumEltsPerLane = NumElts / NumLanes;
 
   SmallVector<int, 4> SrcLaneMask(NumLanes, SM_SentinelUndef);
-  SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
   SmallVector<int, 16> PermMask(NumElts, SM_SentinelUndef);
 
   for (int i = 0; i != NumElts; ++i) {
@@ -13899,10 +14415,20 @@ static SDValue lowerVectorShuffleAsLanePermuteAndPermute(
       return SDValue();
     SrcLaneMask[DstLane] = SrcLane;
 
-    LaneMask[i] = (SrcLane * NumEltsPerLane) + (i % NumEltsPerLane);
     PermMask[i] = (DstLane * NumEltsPerLane) + (M % NumEltsPerLane);
   }
 
+  // Make sure we set all elements of the lane mask, to avoid undef propagation.
+  SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
+  for (int DstLane = 0; DstLane != NumLanes; ++DstLane) {
+    int SrcLane = SrcLaneMask[DstLane];
+    if (0 <= SrcLane)
+      for (int j = 0; j != NumEltsPerLane; ++j) {
+        LaneMask[(DstLane * NumEltsPerLane) + j] =
+            (SrcLane * NumEltsPerLane) + j;
+      }
+  }
+
   // If we're only shuffling a single lowest lane and the rest are identity
   // then don't bother.
   // TODO - isShuffleMaskInputInPlace could be extended to something like this.
@@ -13931,11 +14457,9 @@ static SDValue lowerVectorShuffleAsLanePermuteAndPermute(
 /// is lower than any other fully general cross-lane shuffle strategy I'm aware
 /// of. Special cases for each particular shuffle pattern should be handled
 /// prior to trying this lowering.
-static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT,
-                                                       SDValue V1, SDValue V2,
-                                                       ArrayRef<int> Mask,
-                                                       SelectionDAG &DAG,
-                                                       const X86Subtarget &Subtarget) {
+static SDValue lowerShuffleAsLanePermuteAndBlend(
+    const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+    SelectionDAG &DAG, const X86Subtarget &Subtarget) {
   // FIXME: This should probably be generalized for 512-bit vectors as well.
   assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
   int Size = Mask.size();
@@ -13950,14 +14474,14 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT,
       if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize)
         LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
     if (!LaneCrossing[0] || !LaneCrossing[1])
-      return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
+      return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
   } else {
     bool LaneUsed[2] = {false, false};
     for (int i = 0; i < Size; ++i)
       if (Mask[i] >= 0)
         LaneUsed[(Mask[i] / LaneSize)] = true;
     if (!LaneUsed[0] || !LaneUsed[1])
-      return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
+      return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
   }
 
   assert(V2.isUndef() &&
@@ -13981,11 +14505,11 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT,
 }
 
 /// Handle lowering 2-lane 128-bit shuffles.
-static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
-                                        SDValue V2, ArrayRef<int> Mask,
-                                        const APInt &Zeroable,
-                                        const X86Subtarget &Subtarget,
-                                        SelectionDAG &DAG) {
+static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
+                                  SDValue V2, ArrayRef<int> Mask,
+                                  const APInt &Zeroable,
+                                  const X86Subtarget &Subtarget,
+                                  SelectionDAG &DAG) {
   // With AVX2, use VPERMQ/VPERMPD for unary shuffles to allow memory folding.
   if (Subtarget.hasAVX2() && V2.isUndef())
     return SDValue();
@@ -14012,8 +14536,8 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
   // instruction bytes needed to explicitly generate the zero vector.
 
   // Blends are faster and handle all the non-lane-crossing cases.
-  if (SDValue Blend = lowerVectorShuffleAsBlend(DL, VT, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Blend = lowerShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable,
+                                          Subtarget, DAG))
     return Blend;
 
   // If either input operand is a zero vector, use VPERM2X128 because its mask
@@ -14084,9 +14608,7 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
 /// or two of the lanes of the inputs. The lanes of the input vectors are
 /// shuffled in one or two independent shuffles to get the lanes into the
 /// position needed by the final shuffle.
-///
-/// FIXME: This should be generalized to 512-bit shuffles.
-static SDValue lowerVectorShuffleByMerging128BitLanes(
+static SDValue lowerShuffleAsLanePermuteAndRepeatedMask(
     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
     const X86Subtarget &Subtarget, SelectionDAG &DAG) {
   assert(!V2.isUndef() && "This is only useful with multiple inputs.");
@@ -14095,12 +14617,10 @@ static SDValue lowerVectorShuffleByMerging128BitLanes(
     return SDValue();
 
   int Size = Mask.size();
+  int NumLanes = VT.getSizeInBits() / 128;
   int LaneSize = 128 / VT.getScalarSizeInBits();
-  int NumLanes = Size / LaneSize;
-  assert(NumLanes == 2 && "Only handles 256-bit shuffles.");
-
   SmallVector<int, 16> RepeatMask(LaneSize, -1);
-  int LaneSrcs[2][2] = { { -1, -1 }, { -1 , -1 } };
+  SmallVector<std::array<int, 2>, 2> LaneSrcs(NumLanes, {{-1, -1}});
 
   // First pass will try to fill in the RepeatMask from lanes that need two
   // sources.
@@ -14111,7 +14631,7 @@ static SDValue lowerVectorShuffleByMerging128BitLanes(
       int M = Mask[(Lane * LaneSize) + i];
       if (M < 0)
         continue;
-      // Determine which of the 4 possible input lanes (2 from each source)
+      // Determine which of the possible input lanes (NumLanes from each source)
       // this element comes from. Assign that as one of the sources for this
       // lane. We can assign up to 2 sources for this lane. If we run out
       // sources we can't do anything.
@@ -14250,54 +14770,30 @@ static SDValue lowerVectorShuffleByMerging128BitLanes(
   return DAG.getVectorShuffle(VT, DL, NewV1, NewV2, NewMask);
 }
 
-/// Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF.
-/// This allows for fast cases such as subvector extraction/insertion
-/// or shuffling smaller vector types which can lower more efficiently.
-static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT,
-                                               SDValue V1, SDValue V2,
-                                               ArrayRef<int> Mask,
-                                               const X86Subtarget &Subtarget,
-                                               SelectionDAG &DAG) {
-  assert((VT.is256BitVector() || VT.is512BitVector()) &&
-         "Expected 256-bit or 512-bit vector");
-
-  unsigned NumElts = VT.getVectorNumElements();
-  unsigned HalfNumElts = NumElts / 2;
-  MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts);
-
-  bool UndefLower = isUndefInRange(Mask, 0, HalfNumElts);
-  bool UndefUpper = isUndefInRange(Mask, HalfNumElts, HalfNumElts);
-  if (!UndefLower && !UndefUpper)
-    return SDValue();
-
-  // Upper half is undef and lower half is whole upper subvector.
-  // e.g. vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
-  if (UndefUpper &&
-      isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) {
-    SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
-                             DAG.getIntPtrConstant(HalfNumElts, DL));
-    return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
-                       DAG.getIntPtrConstant(0, DL));
-  }
-
-  // Lower half is undef and upper half is whole lower subvector.
-  // e.g. vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
-  if (UndefLower &&
-      isSequentialOrUndefInRange(Mask, HalfNumElts, HalfNumElts, 0)) {
-    SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
-                             DAG.getIntPtrConstant(0, DL));
-    return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
-                       DAG.getIntPtrConstant(HalfNumElts, DL));
-  }
+/// If the input shuffle mask results in a vector that is undefined in all upper
+/// or lower half elements and that mask accesses only 2 halves of the
+/// shuffle's operands, return true. A mask of half the width with mask indexes
+/// adjusted to access the extracted halves of the original shuffle operands is
+/// returned in HalfMask. HalfIdx1 and HalfIdx2 return whether the upper or
+/// lower half of each input operand is accessed.
+static bool
+getHalfShuffleMask(ArrayRef<int> Mask, MutableArrayRef<int> HalfMask,
+                   int &HalfIdx1, int &HalfIdx2) {
+  assert((Mask.size() == HalfMask.size() * 2) &&
+         "Expected input mask to be twice as long as output");
+
+  // Exactly one half of the result must be undef to allow narrowing.
+  bool UndefLower = isUndefLowerHalf(Mask);
+  bool UndefUpper = isUndefUpperHalf(Mask);
+  if (UndefLower == UndefUpper)
+    return false;
 
-  // If the shuffle only uses two of the four halves of the input operands,
-  // then extract them and perform the 'half' shuffle at half width.
-  // e.g. vector_shuffle <X, X, X, X, u, u, u, u> or <X, X, u, u>
-  int HalfIdx1 = -1, HalfIdx2 = -1;
-  SmallVector<int, 8> HalfMask(HalfNumElts);
-  unsigned Offset = UndefLower ? HalfNumElts : 0;
+  unsigned HalfNumElts = HalfMask.size();
+  unsigned MaskIndexOffset = UndefLower ? HalfNumElts : 0;
+  HalfIdx1 = -1;
+  HalfIdx2 = -1;
   for (unsigned i = 0; i != HalfNumElts; ++i) {
-    int M = Mask[i + Offset];
+    int M = Mask[i + MaskIndexOffset];
     if (M < 0) {
       HalfMask[i] = M;
       continue;
@@ -14324,42 +14820,27 @@ static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT,
     }
 
     // Too many half vectors referenced.
-    return SDValue();
+    return false;
   }
-  assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length");
 
-  // Only shuffle the halves of the inputs when useful.
-  int NumLowerHalves =
-      (HalfIdx1 == 0 || HalfIdx1 == 2) + (HalfIdx2 == 0 || HalfIdx2 == 2);
-  int NumUpperHalves =
-      (HalfIdx1 == 1 || HalfIdx1 == 3) + (HalfIdx2 == 1 || HalfIdx2 == 3);
-
-  // uuuuXXXX - don't extract uppers just to insert again.
-  if (UndefLower && NumUpperHalves != 0)
-    return SDValue();
-
-  // XXXXuuuu - don't extract both uppers, instead shuffle and then extract.
-  if (UndefUpper && NumUpperHalves == 2)
-    return SDValue();
+  return true;
+}
 
-  // AVX2 - XXXXuuuu - always extract lowers.
-  if (Subtarget.hasAVX2() && !(UndefUpper && NumUpperHalves == 0)) {
-    // AVX2 supports efficient immediate 64-bit element cross-lane shuffles.
-    if (VT == MVT::v4f64 || VT == MVT::v4i64)
-      return SDValue();
-    // AVX2 supports variable 32-bit element cross-lane shuffles.
-    if (VT == MVT::v8f32 || VT == MVT::v8i32) {
-      // XXXXuuuu - don't extract lowers and uppers.
-      if (UndefUpper && NumLowerHalves != 0 && NumUpperHalves != 0)
-        return SDValue();
-    }
-  }
+/// Given the output values from getHalfShuffleMask(), create a half width
+/// shuffle of extracted vectors followed by an insert back to full width.
+static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2,
+                                     ArrayRef<int> HalfMask, int HalfIdx1,
+                                     int HalfIdx2, bool UndefLower,
+                                     SelectionDAG &DAG) {
+  assert(V1.getValueType() == V2.getValueType() && "Different sized vectors?");
+  assert(V1.getValueType().isSimple() && "Expecting only simple types");
 
-  // AVX512 - XXXXuuuu - always extract lowers.
-  if (VT.is512BitVector() && !(UndefUpper && NumUpperHalves == 0))
-    return SDValue();
+  MVT VT = V1.getSimpleValueType();
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned HalfNumElts = NumElts / 2;
+  MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts);
 
-  auto GetHalfVector = [&](int HalfIdx) {
+  auto getHalfVector = [&](int HalfIdx) {
     if (HalfIdx < 0)
       return DAG.getUNDEF(HalfVT);
     SDValue V = (HalfIdx < 2 ? V1 : V2);
@@ -14368,13 +14849,126 @@ static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT,
                        DAG.getIntPtrConstant(HalfIdx, DL));
   };
 
-  SDValue Half1 = GetHalfVector(HalfIdx1);
-  SDValue Half2 = GetHalfVector(HalfIdx2);
+  // ins undef, (shuf (ext V1, HalfIdx1), (ext V2, HalfIdx2), HalfMask), Offset
+  SDValue Half1 = getHalfVector(HalfIdx1);
+  SDValue Half2 = getHalfVector(HalfIdx2);
   SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask);
+  unsigned Offset = UndefLower ? HalfNumElts : 0;
   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V,
                      DAG.getIntPtrConstant(Offset, DL));
 }
 
+/// Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF.
+/// This allows for fast cases such as subvector extraction/insertion
+/// or shuffling smaller vector types which can lower more efficiently.
+static SDValue lowerShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1,
+                                         SDValue V2, ArrayRef<int> Mask,
+                                         const X86Subtarget &Subtarget,
+                                         SelectionDAG &DAG) {
+  assert((VT.is256BitVector() || VT.is512BitVector()) &&
+         "Expected 256-bit or 512-bit vector");
+
+  bool UndefLower = isUndefLowerHalf(Mask);
+  if (!UndefLower && !isUndefUpperHalf(Mask))
+    return SDValue();
+
+  assert((!UndefLower || !isUndefUpperHalf(Mask)) &&
+         "Completely undef shuffle mask should have been simplified already");
+
+  // Upper half is undef and lower half is whole upper subvector.
+  // e.g. vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned HalfNumElts = NumElts / 2;
+  MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts);
+  if (!UndefLower &&
+      isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) {
+    SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
+                             DAG.getIntPtrConstant(HalfNumElts, DL));
+    return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
+                       DAG.getIntPtrConstant(0, DL));
+  }
+
+  // Lower half is undef and upper half is whole lower subvector.
+  // e.g. vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
+  if (UndefLower &&
+      isSequentialOrUndefInRange(Mask, HalfNumElts, HalfNumElts, 0)) {
+    SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
+                             DAG.getIntPtrConstant(0, DL));
+    return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
+                       DAG.getIntPtrConstant(HalfNumElts, DL));
+  }
+
+  int HalfIdx1, HalfIdx2;
+  SmallVector<int, 8> HalfMask(HalfNumElts);
+  if (!getHalfShuffleMask(Mask, HalfMask, HalfIdx1, HalfIdx2))
+    return SDValue();
+
+  assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length");
+
+  // Only shuffle the halves of the inputs when useful.
+  unsigned NumLowerHalves =
+      (HalfIdx1 == 0 || HalfIdx1 == 2) + (HalfIdx2 == 0 || HalfIdx2 == 2);
+  unsigned NumUpperHalves =
+      (HalfIdx1 == 1 || HalfIdx1 == 3) + (HalfIdx2 == 1 || HalfIdx2 == 3);
+  assert(NumLowerHalves + NumUpperHalves <= 2 && "Only 1 or 2 halves allowed");
+
+  // Determine the larger pattern of undef/halves, then decide if it's worth
+  // splitting the shuffle based on subtarget capabilities and types.
+  unsigned EltWidth = VT.getVectorElementType().getSizeInBits();
+  if (!UndefLower) {
+    // XXXXuuuu: no insert is needed.
+    // Always extract lowers when setting lower - these are all free subreg ops.
+    if (NumUpperHalves == 0)
+      return getShuffleHalfVectors(DL, V1, V2, HalfMask, HalfIdx1, HalfIdx2,
+                                   UndefLower, DAG);
+
+    if (NumUpperHalves == 1) {
+      // AVX2 has efficient 32/64-bit element cross-lane shuffles.
+      if (Subtarget.hasAVX2()) {
+        // extract128 + vunpckhps/vshufps, is better than vblend + vpermps.
+        if (EltWidth == 32 && NumLowerHalves && HalfVT.is128BitVector() &&
+            !is128BitUnpackShuffleMask(HalfMask) &&
+            (!isSingleSHUFPSMask(HalfMask) ||
+             Subtarget.hasFastVariableShuffle()))
+          return SDValue();
+        // If this is a unary shuffle (assume that the 2nd operand is
+        // canonicalized to undef), then we can use vpermpd. Otherwise, we
+        // are better off extracting the upper half of 1 operand and using a
+        // narrow shuffle.
+        if (EltWidth == 64 && V2.isUndef())
+          return SDValue();
+      }
+      // AVX512 has efficient cross-lane shuffles for all legal 512-bit types.
+      if (Subtarget.hasAVX512() && VT.is512BitVector())
+        return SDValue();
+      // Extract + narrow shuffle is better than the wide alternative.
+      return getShuffleHalfVectors(DL, V1, V2, HalfMask, HalfIdx1, HalfIdx2,
+                                   UndefLower, DAG);
+    }
+
+    // Don't extract both uppers, instead shuffle and then extract.
+    assert(NumUpperHalves == 2 && "Half vector count went wrong");
+    return SDValue();
+  }
+
+  // UndefLower - uuuuXXXX: an insert to high half is required if we split this.
+  if (NumUpperHalves == 0) {
+    // AVX2 has efficient 64-bit element cross-lane shuffles.
+    // TODO: Refine to account for unary shuffle, splat, and other masks?
+    if (Subtarget.hasAVX2() && EltWidth == 64)
+      return SDValue();
+    // AVX512 has efficient cross-lane shuffles for all legal 512-bit types.
+    if (Subtarget.hasAVX512() && VT.is512BitVector())
+      return SDValue();
+    // Narrow shuffle + insert is better than the wide alternative.
+    return getShuffleHalfVectors(DL, V1, V2, HalfMask, HalfIdx1, HalfIdx2,
+                                 UndefLower, DAG);
+  }
+
+  // NumUpperHalves != 0: don't bother with extract, shuffle, and then insert.
+  return SDValue();
+}
+
 /// Test whether the specified input (0 or 1) is in-place blended by the
 /// given mask.
 ///
@@ -14560,9 +15154,8 @@ static SDValue lowerShuffleAsRepeatedMaskAndLanePermute(
                               SubLaneMask);
 }
 
-static bool matchVectorShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
-                                         unsigned &ShuffleImm,
-                                         ArrayRef<int> Mask) {
+static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
+                                   unsigned &ShuffleImm, ArrayRef<int> Mask) {
   int NumElts = VT.getVectorNumElements();
   assert(VT.getScalarSizeInBits() == 64 &&
          (NumElts == 2 || NumElts == 4 || NumElts == 8) &&
@@ -14597,14 +15190,14 @@ static bool matchVectorShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
   return false;
 }
 
-static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
-                                            ArrayRef<int> Mask, SDValue V1,
-                                            SDValue V2, SelectionDAG &DAG) {
+static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
+                                      ArrayRef<int> Mask, SDValue V1,
+                                      SDValue V2, SelectionDAG &DAG) {
   assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64)&&
          "Unexpected data type for VSHUFPD");
 
   unsigned Immediate = 0;
-  if (!matchVectorShuffleWithSHUFPD(VT, V1, V2, Immediate, Mask))
+  if (!matchShuffleWithSHUFPD(VT, V1, V2, Immediate, Mask))
     return SDValue();
 
   return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
@@ -14615,23 +15208,22 @@ static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
 ///
 /// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
 /// isn't available.
-static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const APInt &Zeroable,
-                                       SDValue V1, SDValue V2,
-                                       const X86Subtarget &Subtarget,
-                                       SelectionDAG &DAG) {
+static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                 const APInt &Zeroable, SDValue V1, SDValue V2,
+                                 const X86Subtarget &Subtarget,
+                                 SelectionDAG &DAG) {
   assert(V1.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
 
-  if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
-                                           Zeroable, Subtarget, DAG))
+  if (SDValue V = lowerV2X128Shuffle(DL, MVT::v4f64, V1, V2, Mask, Zeroable,
+                                     Subtarget, DAG))
     return V;
 
   if (V2.isUndef()) {
     // Check for being able to broadcast a single element.
-    if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
-            DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
+    if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4f64, V1, V2,
+                                                    Mask, Subtarget, DAG))
       return Broadcast;
 
     // Use low duplicate instructions for masks that match their pattern.
@@ -14659,29 +15251,33 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
       return V;
 
     // Try to permute the lanes and then use a per-lane permute.
-    if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
-            DL, MVT::v4f64, V1, V2, Mask, DAG, Subtarget))
+    if (SDValue V = lowerShuffleAsLanePermuteAndPermute(DL, MVT::v4f64, V1, V2,
+                                                        Mask, DAG, Subtarget))
       return V;
 
     // Otherwise, fall back.
-    return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask,
-                                                   DAG, Subtarget);
+    return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask, DAG,
+                                             Subtarget);
   }
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (SDValue V =
-          lowerVectorShuffleWithUNPCK(DL, MVT::v4f64, Mask, V1, V2, DAG))
+  if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f64, Mask, V1, V2, DAG))
     return V;
 
-  if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Blend;
 
   // Check if the blend happens to exactly fit that of SHUFPD.
-  if (SDValue Op =
-      lowerVectorShuffleWithSHUFPD(DL, MVT::v4f64, Mask, V1, V2, DAG))
+  if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v4f64, Mask, V1, V2, DAG))
     return Op;
 
+  // If we have one input in place, then we can permute the other input and
+  // blend the result.
+  if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask))
+    return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2, Mask,
+                                                Subtarget, DAG);
+
   // Try to create an in-lane repeating shuffle mask and then shuffle the
   // results into the target lanes.
   if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
@@ -14694,52 +15290,51 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // instruction so skip this pattern.
   if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) ||
                                 isShuffleMaskInputInPlace(1, Mask))))
-    if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
+    if (SDValue V = lowerShuffleAsLanePermuteAndRepeatedMask(
             DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
-      return Result;
+      return V;
 
   // If we have VLX support, we can use VEXPAND.
   if (Subtarget.hasVLX())
-    if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask,
-                                               V1, V2, DAG, Subtarget))
+    if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask, V1, V2,
+                                         DAG, Subtarget))
       return V;
 
   // If we have AVX2 then we always want to lower with a blend because an v4 we
   // can fully permute the elements.
   if (Subtarget.hasAVX2())
-    return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2,
-                                                      Mask, Subtarget, DAG);
+    return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2, Mask,
+                                                Subtarget, DAG);
 
   // Otherwise fall back on generic lowering.
-  return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask,
-                                          Subtarget, DAG);
+  return lowerShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask,
+                                    Subtarget, DAG);
 }
 
 /// Handle lowering of 4-lane 64-bit integer shuffles.
 ///
 /// This routine is only called when we have AVX2 and thus a reasonable
 /// instruction set for v4i64 shuffling..
-static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const APInt &Zeroable,
-                                       SDValue V1, SDValue V2,
-                                       const X86Subtarget &Subtarget,
-                                       SelectionDAG &DAG) {
+static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                 const APInt &Zeroable, SDValue V1, SDValue V2,
+                                 const X86Subtarget &Subtarget,
+                                 SelectionDAG &DAG) {
   assert(V1.getSimpleValueType() == MVT::v4i64 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v4i64 && "Bad operand type!");
   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
   assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!");
 
-  if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
-                                           Zeroable, Subtarget, DAG))
+  if (SDValue V = lowerV2X128Shuffle(DL, MVT::v4i64, V1, V2, Mask, Zeroable,
+                                     Subtarget, DAG))
     return V;
 
-  if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Blend;
 
   // Check for being able to broadcast a single element.
-  if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v4i64, V1, V2,
-                                                        Mask, Subtarget, DAG))
+  if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i64, V1, V2, Mask,
+                                                  Subtarget, DAG))
     return Broadcast;
 
   if (V2.isUndef()) {
@@ -14763,31 +15358,36 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   }
 
   // Try to use shift instructions.
-  if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Shift;
 
   // If we have VLX support, we can use VALIGN or VEXPAND.
   if (Subtarget.hasVLX()) {
-    if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i64, V1, V2,
-                                                    Mask, Subtarget, DAG))
+    if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v4i64, V1, V2, Mask,
+                                              Subtarget, DAG))
       return Rotate;
 
-    if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v4i64, Zeroable, Mask,
-                                               V1, V2, DAG, Subtarget))
+    if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4i64, Zeroable, Mask, V1, V2,
+                                         DAG, Subtarget))
       return V;
   }
 
   // Try to use PALIGNR.
-  if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v4i64, V1, V2,
-                                                      Mask, Subtarget, DAG))
+  if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v4i64, V1, V2, Mask,
+                                                Subtarget, DAG))
     return Rotate;
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (SDValue V =
-          lowerVectorShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG))
+  if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG))
     return V;
 
+  // If we have one input in place, then we can permute the other input and
+  // blend the result.
+  if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask))
+    return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2, Mask,
+                                                Subtarget, DAG);
+
   // Try to create an in-lane repeating shuffle mask and then shuffle the
   // results into the target lanes.
   if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
@@ -14800,35 +15400,34 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // instruction so skip this pattern.
   if (!isShuffleMaskInputInPlace(0, Mask) &&
       !isShuffleMaskInputInPlace(1, Mask))
-    if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
+    if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
             DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
       return Result;
 
   // Otherwise fall back on generic blend lowering.
-  return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2,
-                                                    Mask, Subtarget, DAG);
+  return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2, Mask,
+                                              Subtarget, DAG);
 }
 
 /// Handle lowering of 8-lane 32-bit floating point shuffles.
 ///
 /// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2
 /// isn't available.
-static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const APInt &Zeroable,
-                                       SDValue V1, SDValue V2,
-                                       const X86Subtarget &Subtarget,
-                                       SelectionDAG &DAG) {
+static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                 const APInt &Zeroable, SDValue V1, SDValue V2,
+                                 const X86Subtarget &Subtarget,
+                                 SelectionDAG &DAG) {
   assert(V1.getSimpleValueType() == MVT::v8f32 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v8f32 && "Bad operand type!");
   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
 
-  if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Blend;
 
   // Check for being able to broadcast a single element.
-  if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8f32, V1, V2,
-                                                        Mask, Subtarget, DAG))
+  if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8f32, V1, V2, Mask,
+                                                  Subtarget, DAG))
     return Broadcast;
 
   // If the shuffle mask is repeated in each 128-bit lane, we have many more
@@ -14849,13 +15448,12 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                          getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
 
     // Use dedicated unpack instructions for masks that match their pattern.
-    if (SDValue V =
-            lowerVectorShuffleWithUNPCK(DL, MVT::v8f32, Mask, V1, V2, DAG))
+    if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8f32, Mask, V1, V2, DAG))
       return V;
 
     // Otherwise, fall back to a SHUFPS sequence. Here it is important that we
     // have already handled any direct blends.
-    return lowerVectorShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, V1, V2, DAG);
+    return lowerShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, V1, V2, DAG);
   }
 
   // Try to create an in-lane repeating shuffle mask and then shuffle the
@@ -14875,49 +15473,49 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
       return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, VPermMask, V1);
 
     // Otherwise, fall back.
-    return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
-                                                   DAG, Subtarget);
+    return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
+                                             DAG, Subtarget);
   }
 
   // Try to simplify this by merging 128-bit lanes to enable a lane-based
   // shuffle.
-  if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
+  if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
           DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
     return Result;
+
   // If we have VLX support, we can use VEXPAND.
   if (Subtarget.hasVLX())
-    if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8f32, Zeroable, Mask,
-                                               V1, V2, DAG, Subtarget))
+    if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f32, Zeroable, Mask, V1, V2,
+                                         DAG, Subtarget))
       return V;
 
   // For non-AVX512 if the Mask is of 16bit elements in lane then try to split
   // since after split we get a more efficient code using vpunpcklwd and
   // vpunpckhwd instrs than vblend.
   if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32))
-    if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2,
-                                                     Mask, Subtarget, DAG))
+    if (SDValue V = lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask,
+                                               Subtarget, DAG))
       return V;
 
   // If we have AVX2 then we always want to lower with a blend because at v8 we
   // can fully permute the elements.
   if (Subtarget.hasAVX2())
-    return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2,
-                                                      Mask, Subtarget, DAG);
+    return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2, Mask,
+                                                Subtarget, DAG);
 
   // Otherwise fall back on generic lowering.
-  return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask,
-                                          Subtarget, DAG);
+  return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask,
+                                    Subtarget, DAG);
 }
 
 /// Handle lowering of 8-lane 32-bit integer shuffles.
 ///
 /// This routine is only called when we have AVX2 and thus a reasonable
 /// instruction set for v8i32 shuffling..
-static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const APInt &Zeroable,
-                                       SDValue V1, SDValue V2,
-                                       const X86Subtarget &Subtarget,
-                                       SelectionDAG &DAG) {
+static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                 const APInt &Zeroable, SDValue V1, SDValue V2,
+                                 const X86Subtarget &Subtarget,
+                                 SelectionDAG &DAG) {
   assert(V1.getSimpleValueType() == MVT::v8i32 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v8i32 && "Bad operand type!");
   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
@@ -14926,8 +15524,8 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // Whenever we can lower this as a zext, that instruction is strictly faster
   // than any alternative. It also allows us to fold memory operands into the
   // shuffle in many cases.
-  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
-          DL, MVT::v8i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
+  if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v8i32, V1, V2, Mask,
+                                                   Zeroable, Subtarget, DAG))
     return ZExt;
 
   // For non-AVX512 if the Mask is of 16bit elements in lane then try to split
@@ -14935,17 +15533,17 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // vpunpcklwd and vpunpckhwd instrs.
   if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() &&
       !Subtarget.hasAVX512())
-    if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2,
-                                                     Mask, Subtarget, DAG))
+    if (SDValue V = lowerShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask,
+                                               Subtarget, DAG))
       return V;
 
-  if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Blend;
 
   // Check for being able to broadcast a single element.
-  if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8i32, V1, V2,
-                                                        Mask, Subtarget, DAG))
+  if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i32, V1, V2, Mask,
+                                                  Subtarget, DAG))
     return Broadcast;
 
   // If the shuffle mask is repeated in each 128-bit lane we can use more
@@ -14961,30 +15559,29 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                          getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
 
     // Use dedicated unpack instructions for masks that match their pattern.
-    if (SDValue V =
-            lowerVectorShuffleWithUNPCK(DL, MVT::v8i32, Mask, V1, V2, DAG))
+    if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i32, Mask, V1, V2, DAG))
       return V;
   }
 
   // Try to use shift instructions.
-  if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Shift;
 
   // If we have VLX support, we can use VALIGN or EXPAND.
   if (Subtarget.hasVLX()) {
-    if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i32, V1, V2,
-                                                    Mask, Subtarget, DAG))
+    if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v8i32, V1, V2, Mask,
+                                              Subtarget, DAG))
       return Rotate;
 
-    if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8i32, Zeroable, Mask,
-                                               V1, V2, DAG, Subtarget))
+    if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i32, Zeroable, Mask, V1, V2,
+                                         DAG, Subtarget))
       return V;
   }
 
   // Try to use byte rotation instructions.
-  if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
-          DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
+  if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i32, V1, V2, Mask,
+                                                Subtarget, DAG))
     return Rotate;
 
   // Try to create an in-lane repeating shuffle mask and then shuffle the
@@ -15006,31 +15603,30 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) {
     SDValue CastV1 = DAG.getBitcast(MVT::v8f32, V1);
     SDValue CastV2 = DAG.getBitcast(MVT::v8f32, V2);
-    SDValue ShufPS = lowerVectorShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask,
-                                                  CastV1, CastV2, DAG);
+    SDValue ShufPS = lowerShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask,
+                                            CastV1, CastV2, DAG);
     return DAG.getBitcast(MVT::v8i32, ShufPS);
   }
 
   // Try to simplify this by merging 128-bit lanes to enable a lane-based
   // shuffle.
-  if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
+  if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
           DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
     return Result;
 
   // Otherwise fall back on generic blend lowering.
-  return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2,
-                                                    Mask, Subtarget, DAG);
+  return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2, Mask,
+                                              Subtarget, DAG);
 }
 
 /// Handle lowering of 16-lane 16-bit integer shuffles.
 ///
 /// This routine is only called when we have AVX2 and thus a reasonable
 /// instruction set for v16i16 shuffling..
-static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                        const APInt &Zeroable,
-                                        SDValue V1, SDValue V2,
-                                        const X86Subtarget &Subtarget,
-                                        SelectionDAG &DAG) {
+static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                  const APInt &Zeroable, SDValue V1, SDValue V2,
+                                  const X86Subtarget &Subtarget,
+                                  SelectionDAG &DAG) {
   assert(V1.getSimpleValueType() == MVT::v16i16 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v16i16 && "Bad operand type!");
   assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
@@ -15039,37 +15635,36 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // Whenever we can lower this as a zext, that instruction is strictly faster
   // than any alternative. It also allows us to fold memory operands into the
   // shuffle in many cases.
-  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
+  if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
           DL, MVT::v16i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
     return ZExt;
 
   // Check for being able to broadcast a single element.
-  if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v16i16, V1, V2,
-                                                        Mask, Subtarget, DAG))
+  if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v16i16, V1, V2, Mask,
+                                                  Subtarget, DAG))
     return Broadcast;
 
-  if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Blend;
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (SDValue V =
-          lowerVectorShuffleWithUNPCK(DL, MVT::v16i16, Mask, V1, V2, DAG))
+  if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i16, Mask, V1, V2, DAG))
     return V;
 
   // Use dedicated pack instructions for masks that match their pattern.
-  if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v16i16, Mask, V1, V2, DAG,
-                                             Subtarget))
+  if (SDValue V = lowerShuffleWithPACK(DL, MVT::v16i16, Mask, V1, V2, DAG,
+                                       Subtarget))
     return V;
 
   // Try to use shift instructions.
-  if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Shift;
 
   // Try to use byte rotation instructions.
-  if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
-          DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
+  if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i16, V1, V2, Mask,
+                                                Subtarget, DAG))
     return Rotate;
 
   // Try to create an in-lane repeating shuffle mask and then shuffle the
@@ -15082,12 +15677,12 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
     // There are no generalized cross-lane shuffle operations available on i16
     // element types.
     if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask)) {
-      if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+      if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
               DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
         return V;
 
-      return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2,
-                                                     Mask, DAG, Subtarget);
+      return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2, Mask,
+                                               DAG, Subtarget);
     }
 
     SmallVector<int, 8> RepeatedMask;
@@ -15095,44 +15690,43 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
       // As this is a single-input shuffle, the repeated mask should be
       // a strictly valid v8i16 mask that we can pass through to the v8i16
       // lowering to handle even the v16 case.
-      return lowerV8I16GeneralSingleInputVectorShuffle(
+      return lowerV8I16GeneralSingleInputShuffle(
           DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG);
     }
   }
 
-  if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
-          DL, MVT::v16i16, Mask, V1, V2, Zeroable, Subtarget, DAG))
+  if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v16i16, Mask, V1, V2,
+                                              Zeroable, Subtarget, DAG))
     return PSHUFB;
 
   // AVX512BWVL can lower to VPERMW.
   if (Subtarget.hasBWI() && Subtarget.hasVLX())
-    return lowerVectorShuffleWithPERMV(DL, MVT::v16i16, Mask, V1, V2, DAG);
+    return lowerShuffleWithPERMV(DL, MVT::v16i16, Mask, V1, V2, DAG);
 
   // Try to simplify this by merging 128-bit lanes to enable a lane-based
   // shuffle.
-  if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
+  if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
           DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
     return Result;
 
   // Try to permute the lanes and then use a per-lane permute.
-  if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+  if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
           DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
     return V;
 
   // Otherwise fall back on generic lowering.
-  return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask,
-                                          Subtarget, DAG);
+  return lowerShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask,
+                                    Subtarget, DAG);
 }
 
 /// Handle lowering of 32-lane 8-bit integer shuffles.
 ///
 /// This routine is only called when we have AVX2 and thus a reasonable
 /// instruction set for v32i8 shuffling..
-static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const APInt &Zeroable,
-                                       SDValue V1, SDValue V2,
-                                       const X86Subtarget &Subtarget,
-                                       SelectionDAG &DAG) {
+static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                 const APInt &Zeroable, SDValue V1, SDValue V2,
+                                 const X86Subtarget &Subtarget,
+                                 SelectionDAG &DAG) {
   assert(V1.getSimpleValueType() == MVT::v32i8 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v32i8 && "Bad operand type!");
   assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
@@ -15141,37 +15735,36 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // Whenever we can lower this as a zext, that instruction is strictly faster
   // than any alternative. It also allows us to fold memory operands into the
   // shuffle in many cases.
-  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
-          DL, MVT::v32i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
+  if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v32i8, V1, V2, Mask,
+                                                   Zeroable, Subtarget, DAG))
     return ZExt;
 
   // Check for being able to broadcast a single element.
-  if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v32i8, V1, V2,
-                                                        Mask, Subtarget, DAG))
+  if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v32i8, V1, V2, Mask,
+                                                  Subtarget, DAG))
     return Broadcast;
 
-  if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Blend;
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (SDValue V =
-          lowerVectorShuffleWithUNPCK(DL, MVT::v32i8, Mask, V1, V2, DAG))
+  if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v32i8, Mask, V1, V2, DAG))
     return V;
 
   // Use dedicated pack instructions for masks that match their pattern.
-  if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v32i8, Mask, V1, V2, DAG,
-                                             Subtarget))
+  if (SDValue V = lowerShuffleWithPACK(DL, MVT::v32i8, Mask, V1, V2, DAG,
+                                       Subtarget))
     return V;
 
   // Try to use shift instructions.
-  if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask,
+  if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask,
                                                 Zeroable, Subtarget, DAG))
     return Shift;
 
   // Try to use byte rotation instructions.
-  if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
-          DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
+  if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v32i8, V1, V2, Mask,
+                                                Subtarget, DAG))
     return Rotate;
 
   // Try to create an in-lane repeating shuffle mask and then shuffle the
@@ -15183,36 +15776,36 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // There are no generalized cross-lane shuffle operations available on i8
   // element types.
   if (V2.isUndef() && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask)) {
-    if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+    if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
             DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
       return V;
 
-    return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask,
-                                                   DAG, Subtarget);
+    return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask, DAG,
+                                             Subtarget);
   }
 
-  if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
-          DL, MVT::v32i8, Mask, V1, V2, Zeroable, Subtarget, DAG))
+  if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i8, Mask, V1, V2,
+                                              Zeroable, Subtarget, DAG))
     return PSHUFB;
 
   // AVX512VBMIVL can lower to VPERMB.
   if (Subtarget.hasVBMI() && Subtarget.hasVLX())
-    return lowerVectorShuffleWithPERMV(DL, MVT::v32i8, Mask, V1, V2, DAG);
+    return lowerShuffleWithPERMV(DL, MVT::v32i8, Mask, V1, V2, DAG);
 
   // Try to simplify this by merging 128-bit lanes to enable a lane-based
   // shuffle.
-  if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
+  if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
           DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
     return Result;
 
   // Try to permute the lanes and then use a per-lane permute.
-  if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+  if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
           DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
     return V;
 
   // Otherwise fall back on generic lowering.
-  return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask,
-                                          Subtarget, DAG);
+  return lowerShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask,
+                                    Subtarget, DAG);
 }
 
 /// High-level routine to lower various 256-bit x86 vector shuffles.
@@ -15220,24 +15813,23 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 /// This routine either breaks down the specific type of a 256-bit x86 vector
 /// shuffle or splits it into two 128-bit shuffles and fuses the results back
 /// together based on the available instructions.
-static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                        MVT VT, SDValue V1, SDValue V2,
-                                        const APInt &Zeroable,
-                                        const X86Subtarget &Subtarget,
-                                        SelectionDAG &DAG) {
+static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+                                  SDValue V1, SDValue V2, const APInt &Zeroable,
+                                  const X86Subtarget &Subtarget,
+                                  SelectionDAG &DAG) {
   // If we have a single input to the zero element, insert that into V1 if we
   // can do so cheaply.
   int NumElts = VT.getVectorNumElements();
   int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; });
 
   if (NumV2Elements == 1 && Mask[0] >= NumElts)
-    if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+    if (SDValue Insertion = lowerShuffleAsElementInsertion(
             DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
       return Insertion;
 
   // Handle special cases where the lower or upper half is UNDEF.
   if (SDValue V =
-          lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
+          lowerShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
     return V;
 
   // There is a really nice hard cut-over between AVX1 and AVX2 that means we
@@ -15251,12 +15843,12 @@ static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
     if (ElementBits < 32) {
       // No floating point type available, if we can't use the bit operations
       // for masking/blending then decompose into 128-bit vectors.
-      if (SDValue V =
-              lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG))
+      if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
+                                            Subtarget, DAG))
         return V;
-      if (SDValue V = lowerVectorShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
+      if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
         return V;
-      return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
+      return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
     }
 
     MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits),
@@ -15268,17 +15860,17 @@ static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   switch (VT.SimpleTy) {
   case MVT::v4f64:
-    return lowerV4F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+    return lowerV4F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v4i64:
-    return lowerV4I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+    return lowerV4I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v8f32:
-    return lowerV8F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+    return lowerV8F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v8i32:
-    return lowerV8I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+    return lowerV8I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v16i16:
-    return lowerV16I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+    return lowerV16I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v32i8:
-    return lowerV32I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+    return lowerV32I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
 
   default:
     llvm_unreachable("Not a valid 256-bit x86 vector type!");
@@ -15286,12 +15878,10 @@ static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 }
 
 /// Try to lower a vector shuffle as a 128-bit shuffles.
-static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
-                                        ArrayRef<int> Mask,
-                                        const APInt &Zeroable,
-                                        SDValue V1, SDValue V2,
-                                        const X86Subtarget &Subtarget,
-                                        SelectionDAG &DAG) {
+static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
+                                  const APInt &Zeroable, SDValue V1, SDValue V2,
+                                  const X86Subtarget &Subtarget,
+                                  SelectionDAG &DAG) {
   assert(VT.getScalarSizeInBits() == 64 &&
          "Unexpected element type size for 128bit shuffle.");
 
@@ -15388,11 +15978,10 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
 }
 
 /// Handle lowering of 8-lane 64-bit floating point shuffles.
-static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const APInt &Zeroable,
-                                       SDValue V1, SDValue V2,
-                                       const X86Subtarget &Subtarget,
-                                       SelectionDAG &DAG) {
+static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                 const APInt &Zeroable, SDValue V1, SDValue V2,
+                                 const X86Subtarget &Subtarget,
+                                 SelectionDAG &DAG) {
   assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
@@ -15419,37 +16008,33 @@ static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                          getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
   }
 
-  if (SDValue Shuf128 =
-          lowerV4X128VectorShuffle(DL, MVT::v8f64, Mask, Zeroable, V1, V2,
-                                   Subtarget, DAG))
+  if (SDValue Shuf128 = lowerV4X128Shuffle(DL, MVT::v8f64, Mask, Zeroable, V1,
+                                           V2, Subtarget, DAG))
     return Shuf128;
 
-  if (SDValue Unpck =
-          lowerVectorShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG))
+  if (SDValue Unpck = lowerShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG))
     return Unpck;
 
   // Check if the blend happens to exactly fit that of SHUFPD.
-  if (SDValue Op =
-      lowerVectorShuffleWithSHUFPD(DL, MVT::v8f64, Mask, V1, V2, DAG))
+  if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v8f64, Mask, V1, V2, DAG))
     return Op;
 
-  if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1,
-                                             V2, DAG, Subtarget))
+  if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1, V2,
+                                       DAG, Subtarget))
     return V;
 
-  if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Blend;
 
-  return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG);
+  return lowerShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG);
 }
 
 /// Handle lowering of 16-lane 32-bit floating point shuffles.
-static SDValue lowerV16F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                        const APInt &Zeroable,
-                                        SDValue V1, SDValue V2,
-                                        const X86Subtarget &Subtarget,
-                                        SelectionDAG &DAG) {
+static SDValue lowerV16F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                  const APInt &Zeroable, SDValue V1, SDValue V2,
+                                  const X86Subtarget &Subtarget,
+                                  SelectionDAG &DAG) {
   assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
   assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
@@ -15471,16 +16056,15 @@ static SDValue lowerV16F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                          getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
 
     // Use dedicated unpack instructions for masks that match their pattern.
-    if (SDValue Unpck =
-            lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG))
-      return Unpck;
+    if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG))
+      return V;
 
-    if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask,
-                                                  Zeroable, Subtarget, DAG))
+    if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask,
+                                            Zeroable, Subtarget, DAG))
       return Blend;
 
     // Otherwise, fall back to a SHUFPS sequence.
-    return lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG);
+    return lowerShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG);
   }
 
   // If we have a single input shuffle with different shuffle patterns in the
@@ -15492,19 +16076,18 @@ static SDValue lowerV16F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   }
 
   // If we have AVX512F support, we can use VEXPAND.
-  if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask,
+  if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask,
                                              V1, V2, DAG, Subtarget))
     return V;
 
-  return lowerVectorShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG);
+  return lowerShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG);
 }
 
 /// Handle lowering of 8-lane 64-bit integer shuffles.
-static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const APInt &Zeroable,
-                                       SDValue V1, SDValue V2,
-                                       const X86Subtarget &Subtarget,
-                                       SelectionDAG &DAG) {
+static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                 const APInt &Zeroable, SDValue V1, SDValue V2,
+                                 const X86Subtarget &Subtarget,
+                                 SelectionDAG &DAG) {
   assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
@@ -15530,47 +16113,44 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                          getV4X86ShuffleImm8ForMask(Repeated256Mask, DL, DAG));
   }
 
-  if (SDValue Shuf128 =
-          lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, Zeroable,
-                                   V1, V2, Subtarget, DAG))
+  if (SDValue Shuf128 = lowerV4X128Shuffle(DL, MVT::v8i64, Mask, Zeroable, V1,
+                                           V2, Subtarget, DAG))
     return Shuf128;
 
   // Try to use shift instructions.
-  if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Shift;
 
   // Try to use VALIGN.
-  if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i64, V1, V2,
-                                                  Mask, Subtarget, DAG))
+  if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v8i64, V1, V2, Mask,
+                                            Subtarget, DAG))
     return Rotate;
 
   // Try to use PALIGNR.
-  if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i64, V1, V2,
-                                                      Mask, Subtarget, DAG))
+  if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i64, V1, V2, Mask,
+                                                Subtarget, DAG))
     return Rotate;
 
-  if (SDValue Unpck =
-          lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))
+  if (SDValue Unpck = lowerShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))
     return Unpck;
   // If we have AVX512F support, we can use VEXPAND.
-  if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1,
-                                             V2, DAG, Subtarget))
+  if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1, V2,
+                                       DAG, Subtarget))
     return V;
 
-  if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Blend;
 
-  return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG);
+  return lowerShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG);
 }
 
 /// Handle lowering of 16-lane 32-bit integer shuffles.
-static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                        const APInt &Zeroable,
-                                        SDValue V1, SDValue V2,
-                                        const X86Subtarget &Subtarget,
-                                        SelectionDAG &DAG) {
+static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                  const APInt &Zeroable, SDValue V1, SDValue V2,
+                                  const X86Subtarget &Subtarget,
+                                  SelectionDAG &DAG) {
   assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
   assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
@@ -15578,7 +16158,7 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // Whenever we can lower this as a zext, that instruction is strictly faster
   // than any alternative. It also allows us to fold memory operands into the
   // shuffle in many cases.
-  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
+  if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
           DL, MVT::v16i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
     return ZExt;
 
@@ -15595,25 +16175,24 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                          getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
 
     // Use dedicated unpack instructions for masks that match their pattern.
-    if (SDValue V =
-            lowerVectorShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG))
+    if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG))
       return V;
   }
 
   // Try to use shift instructions.
-  if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i32, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i32, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Shift;
 
   // Try to use VALIGN.
-  if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v16i32, V1, V2,
-                                                  Mask, Subtarget, DAG))
+  if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v16i32, V1, V2, Mask,
+                                            Subtarget, DAG))
     return Rotate;
 
   // Try to use byte rotation instructions.
   if (Subtarget.hasBWI())
-    if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
-            DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG))
+    if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i32, V1, V2, Mask,
+                                                  Subtarget, DAG))
       return Rotate;
 
   // Assume that a single SHUFPS is faster than using a permv shuffle.
@@ -15621,27 +16200,26 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) {
     SDValue CastV1 = DAG.getBitcast(MVT::v16f32, V1);
     SDValue CastV2 = DAG.getBitcast(MVT::v16f32, V2);
-    SDValue ShufPS = lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask,
-                                                  CastV1, CastV2, DAG);
+    SDValue ShufPS = lowerShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask,
+                                            CastV1, CastV2, DAG);
     return DAG.getBitcast(MVT::v16i32, ShufPS);
   }
   // If we have AVX512F support, we can use VEXPAND.
-  if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask,
-                                             V1, V2, DAG, Subtarget))
+  if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask, V1, V2,
+                                       DAG, Subtarget))
     return V;
 
-  if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Blend;
-  return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);
+  return lowerShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);
 }
 
 /// Handle lowering of 32-lane 16-bit integer shuffles.
-static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                        const APInt &Zeroable,
-                                        SDValue V1, SDValue V2,
-                                        const X86Subtarget &Subtarget,
-                                        SelectionDAG &DAG) {
+static SDValue lowerV32I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                  const APInt &Zeroable, SDValue V1, SDValue V2,
+                                  const X86Subtarget &Subtarget,
+                                  SelectionDAG &DAG) {
   assert(V1.getSimpleValueType() == MVT::v32i16 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v32i16 && "Bad operand type!");
   assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
@@ -15650,23 +16228,22 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // Whenever we can lower this as a zext, that instruction is strictly faster
   // than any alternative. It also allows us to fold memory operands into the
   // shuffle in many cases.
-  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
+  if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
           DL, MVT::v32i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
     return ZExt;
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (SDValue V =
-          lowerVectorShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG))
+  if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG))
     return V;
 
   // Try to use shift instructions.
-  if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Shift;
 
   // Try to use byte rotation instructions.
-  if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
-          DL, MVT::v32i16, V1, V2, Mask, Subtarget, DAG))
+  if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v32i16, V1, V2, Mask,
+                                                Subtarget, DAG))
     return Rotate;
 
   if (V2.isUndef()) {
@@ -15675,28 +16252,27 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
       // As this is a single-input shuffle, the repeated mask should be
       // a strictly valid v8i16 mask that we can pass through to the v8i16
       // lowering to handle even the v32 case.
-      return lowerV8I16GeneralSingleInputVectorShuffle(
+      return lowerV8I16GeneralSingleInputShuffle(
           DL, MVT::v32i16, V1, RepeatedMask, Subtarget, DAG);
     }
   }
 
-  if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask,
+  if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask,
                                                 Zeroable, Subtarget, DAG))
     return Blend;
 
-  if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
-          DL, MVT::v32i16, Mask, V1, V2, Zeroable, Subtarget, DAG))
+  if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i16, Mask, V1, V2,
+                                              Zeroable, Subtarget, DAG))
     return PSHUFB;
 
-  return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG);
+  return lowerShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG);
 }
 
 /// Handle lowering of 64-lane 8-bit integer shuffles.
-static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const APInt &Zeroable,
-                                       SDValue V1, SDValue V2,
-                                       const X86Subtarget &Subtarget,
-                                       SelectionDAG &DAG) {
+static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                 const APInt &Zeroable, SDValue V1, SDValue V2,
+                                 const X86Subtarget &Subtarget,
+                                 SelectionDAG &DAG) {
   assert(V1.getSimpleValueType() == MVT::v64i8 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v64i8 && "Bad operand type!");
   assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!");
@@ -15705,37 +16281,36 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // Whenever we can lower this as a zext, that instruction is strictly faster
   // than any alternative. It also allows us to fold memory operands into the
   // shuffle in many cases.
-  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
+  if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
           DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
     return ZExt;
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (SDValue V =
-          lowerVectorShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG))
+  if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG))
     return V;
 
   // Use dedicated pack instructions for masks that match their pattern.
-  if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v64i8, Mask, V1, V2, DAG,
-                                             Subtarget))
+  if (SDValue V = lowerShuffleWithPACK(DL, MVT::v64i8, Mask, V1, V2, DAG,
+                                       Subtarget))
     return V;
 
   // Try to use shift instructions.
-  if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Shift;
 
   // Try to use byte rotation instructions.
-  if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
-          DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
+  if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v64i8, V1, V2, Mask,
+                                                Subtarget, DAG))
     return Rotate;
 
-  if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
-          DL, MVT::v64i8, Mask, V1, V2, Zeroable, Subtarget, DAG))
+  if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v64i8, Mask, V1, V2,
+                                              Zeroable, Subtarget, DAG))
     return PSHUFB;
 
   // VBMI can use VPERMV/VPERMV3 byte shuffles.
   if (Subtarget.hasVBMI())
-    return lowerVectorShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG);
+    return lowerShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG);
 
   // Try to create an in-lane repeating shuffle mask and then shuffle the
   // results into the target lanes.
@@ -15743,12 +16318,19 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
           DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
     return V;
 
-  if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask,
-                                                Zeroable, Subtarget, DAG))
+  if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask,
+                                          Zeroable, Subtarget, DAG))
     return Blend;
 
+  // Try to simplify this by merging 128-bit lanes to enable a lane-based
+  // shuffle.
+  if (!V2.isUndef())
+    if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
+            DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
+      return Result;
+
   // FIXME: Implement direct support for this type!
-  return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
+  return splitAndLowerShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
 }
 
 /// High-level routine to lower various 512-bit x86 vector shuffles.
@@ -15756,11 +16338,11 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 /// This routine either breaks down the specific type of a 512-bit x86 vector
 /// shuffle or splits it into two 256-bit shuffles and fuses the results back
 /// together based on the available instructions.
-static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                        MVT VT, SDValue V1, SDValue V2,
-                                        const APInt &Zeroable,
-                                        const X86Subtarget &Subtarget,
-                                        SelectionDAG &DAG) {
+static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                  MVT VT, SDValue V1, SDValue V2,
+                                  const APInt &Zeroable,
+                                  const X86Subtarget &Subtarget,
+                                  SelectionDAG &DAG) {
   assert(Subtarget.hasAVX512() &&
          "Cannot lower 512-bit vectors w/ basic ISA!");
 
@@ -15770,18 +16352,18 @@ static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; });
 
   if (NumV2Elements == 1 && Mask[0] >= NumElts)
-    if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+    if (SDValue Insertion = lowerShuffleAsElementInsertion(
             DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
       return Insertion;
 
   // Handle special cases where the lower or upper half is UNDEF.
   if (SDValue V =
-        lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
+          lowerShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
     return V;
 
   // Check for being able to broadcast a single element.
-  if (SDValue Broadcast =
-          lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG))
+  if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask,
+                                                  Subtarget, DAG))
     return Broadcast;
 
   // Dispatch to each element type for lowering. If we don't have support for
@@ -15790,17 +16372,17 @@ static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // the requisite ISA extensions for that element type are available.
   switch (VT.SimpleTy) {
   case MVT::v8f64:
-    return lowerV8F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+    return lowerV8F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v16f32:
-    return lowerV16F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+    return lowerV16F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v8i64:
-    return lowerV8I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+    return lowerV8I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v16i32:
-    return lowerV16I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+    return lowerV16I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v32i16:
-    return lowerV32I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+    return lowerV32I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v64i8:
-    return lowerV64I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+    return lowerV64I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
 
   default:
     llvm_unreachable("Not a valid 512-bit x86 vector type!");
@@ -15809,7 +16391,7 @@ static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
 // Determine if this shuffle can be implemented with a KSHIFT instruction.
 // Returns the shift amount if possible or -1 if not. This is a simplified
-// version of matchVectorShuffleAsShift.
+// version of matchShuffleAsShift.
 static int match1BitShuffleAsKSHIFT(unsigned &Opcode, ArrayRef<int> Mask,
                                     int MaskOffset, const APInt &Zeroable) {
   int Size = Mask.size();
@@ -15844,11 +16426,11 @@ static int match1BitShuffleAsKSHIFT(unsigned &Opcode, ArrayRef<int> Mask,
 // There is no a dedicated instruction on AVX-512 that shuffles the masks.
 // The only way to shuffle bits is to sign-extend the mask vector to SIMD
 // vector, shuffle and then truncate it back.
-static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                      MVT VT, SDValue V1, SDValue V2,
-                                      const APInt &Zeroable,
-                                      const X86Subtarget &Subtarget,
-                                      SelectionDAG &DAG) {
+static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                MVT VT, SDValue V1, SDValue V2,
+                                const APInt &Zeroable,
+                                const X86Subtarget &Subtarget,
+                                SelectionDAG &DAG) {
   assert(Subtarget.hasAVX512() &&
          "Cannot lower 512-bit vectors w/o basic ISA!");
 
@@ -16037,15 +16619,14 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
   // Check for non-undef masks pointing at an undef vector and make the masks
   // undef as well. This makes it easier to match the shuffle based solely on
   // the mask.
-  if (V2IsUndef)
-    for (int M : Mask)
-      if (M >= NumElements) {
-        SmallVector<int, 8> NewMask(Mask.begin(), Mask.end());
-        for (int &M : NewMask)
-          if (M >= NumElements)
-            M = -1;
-        return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
-      }
+  if (V2IsUndef &&
+      any_of(Mask, [NumElements](int M) { return M >= NumElements; })) {
+    SmallVector<int, 8> NewMask(Mask.begin(), Mask.end());
+    for (int &M : NewMask)
+      if (M >= NumElements)
+        M = -1;
+    return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
+  }
 
   // Check for illegal shuffle mask element index values.
   int MaskUpperLimit = Mask.size() * (V2IsUndef ? 1 : 2); (void)MaskUpperLimit;
@@ -16083,8 +16664,8 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
     // by obfuscating the operands with bitcasts.
     // TODO: Avoid lowering directly from this top-level function: make this
     // a query (canLowerAsBroadcast) and defer lowering to the type-based calls.
-    if (SDValue Broadcast =
-            lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG))
+    if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask,
+                                                    Subtarget, DAG))
       return Broadcast;
 
     MVT NewEltVT = VT.isFloatingPoint()
@@ -16122,26 +16703,21 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
   if (canonicalizeShuffleMaskWithCommute(Mask))
     return DAG.getCommutedVectorShuffle(*SVOp);
 
-  if (SDValue V =
-          lowerVectorShuffleWithVPMOV(DL, Mask, VT, V1, V2, DAG, Subtarget))
+  if (SDValue V = lowerShuffleWithVPMOV(DL, Mask, VT, V1, V2, DAG, Subtarget))
     return V;
 
   // For each vector width, delegate to a specialized lowering routine.
   if (VT.is128BitVector())
-    return lower128BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
-                                    DAG);
+    return lower128BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG);
 
   if (VT.is256BitVector())
-    return lower256BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
-                                    DAG);
+    return lower256BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG);
 
   if (VT.is512BitVector())
-    return lower512BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
-                                    DAG);
+    return lower512BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG);
 
   if (Is1BitVector)
-    return lower1BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
-                                  DAG);
+    return lower1BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG);
 
   llvm_unreachable("Unimplemented!");
 }
@@ -16401,7 +16977,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
     // this can be done with a mask.
     IdxVal &= ElemsPerChunk - 1;
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
-                       DAG.getConstant(IdxVal, dl, MVT::i32));
+                       DAG.getIntPtrConstant(IdxVal, dl));
   }
 
   assert(VecVT.is128BitVector() && "Unexpected vector length");
@@ -16527,10 +17103,11 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
   SDValue N0 = Op.getOperand(0);
   SDValue N1 = Op.getOperand(1);
   SDValue N2 = Op.getOperand(2);
-  if (!isa<ConstantSDNode>(N2))
+
+  auto *N2C = dyn_cast<ConstantSDNode>(N2);
+  if (!N2C || N2C->getAPIntValue().uge(NumElts))
     return SDValue();
-  auto *N2C = cast<ConstantSDNode>(N2);
-  unsigned IdxVal = N2C->getZExtValue();
+  uint64_t IdxVal = N2C->getZExtValue();
 
   bool IsZeroElt = X86::isZeroNode(N1);
   bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1);
@@ -16575,13 +17152,21 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
     unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1);
 
     V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
-                    DAG.getConstant(IdxIn128, dl, MVT::i32));
+                    DAG.getIntPtrConstant(IdxIn128, dl));
 
     // Insert the changed part back into the bigger vector
     return insert128BitVector(N0, V, IdxVal, DAG, dl);
   }
   assert(VT.is128BitVector() && "Only 128-bit vector types should be left!");
 
+  // This will be just movd/movq/movss/movsd.
+  if (IdxVal == 0 && ISD::isBuildVectorAllZeros(N0.getNode()) &&
+      (EltVT == MVT::i32 || EltVT == MVT::f32 || EltVT == MVT::f64 ||
+       EltVT == MVT::i64)) {
+    N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
+    return getShuffleVectorZeroOrUndef(N1, 0, true, Subtarget, DAG);
+  }
+
   // Transform it so it match pinsr{b,w} which expects a GR32 as its second
   // argument. SSE41 required for pinsrb.
   if (VT == MVT::v8i16 || (VT == MVT::v16i8 && Subtarget.hasSSE41())) {
@@ -16613,7 +17198,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
       // Bits [3:0] of the constant are the zero mask. The DAG Combiner may
       //   combine either bitwise AND or insert of float 0.0 to set these bits.
 
-      bool MinSize = DAG.getMachineFunction().getFunction().optForMinSize();
+      bool MinSize = DAG.getMachineFunction().getFunction().hasMinSize();
       if (IdxVal == 0 && (!MinSize || !MayFoldLoad(N1))) {
         // If this is an insertion of 32-bits into the low 32-bits of
         // a vector, we prefer to generate a blend with immediate rather
@@ -16663,7 +17248,8 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget,
     // Insert the 128-bit vector.
     return insert128BitVector(DAG.getUNDEF(OpVT), Op, 0, DAG, dl);
   }
-  assert(OpVT.is128BitVector() && "Expected an SSE type!");
+  assert(OpVT.is128BitVector() && OpVT.isInteger() && OpVT != MVT::v2i64 &&
+         "Expected an SSE type!");
 
   // Pass through a v4i32 SCALAR_TO_VECTOR as that's what we use in tblgen.
   if (OpVT == MVT::v4i32)
@@ -16789,35 +17375,9 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   return Result;
 }
 
-SDValue
-X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
-  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
-
-  // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
-  // global base reg.
-  const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
-  unsigned char OpFlag = Subtarget.classifyGlobalReference(nullptr, *Mod);
-
-  auto PtrVT = getPointerTy(DAG.getDataLayout());
-  SDValue Result = DAG.getTargetExternalSymbol(Sym, PtrVT, OpFlag);
-
-  SDLoc DL(Op);
-  Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result);
-
-  // With PIC, the address is actually $g + Offset.
-  if (OpFlag) {
-    Result =
-        DAG.getNode(ISD::ADD, DL, PtrVT,
-                    DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
-  }
-
-  // For symbols that require a load from a stub to get the address, emit the
-  // load.
-  if (isGlobalStubReference(OpFlag))
-    Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
-                         MachinePointerInfo::getGOT(DAG.getMachineFunction()));
-
-  return Result;
+SDValue X86TargetLowering::LowerExternalSymbol(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
 }
 
 SDValue
@@ -16841,35 +17401,67 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
   return Result;
 }
 
-SDValue X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV,
-                                              const SDLoc &dl, int64_t Offset,
-                                              SelectionDAG &DAG) const {
-  // Create the TargetGlobalAddress node, folding in the constant
-  // offset if it is legal.
-  unsigned char OpFlags = Subtarget.classifyGlobalReference(GV);
+/// Creates target global address or external symbol nodes for calls or
+/// other uses.
+SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
+                                                 bool ForCall) const {
+  // Unpack the global address or external symbol.
+  const SDLoc &dl = SDLoc(Op);
+  const GlobalValue *GV = nullptr;
+  int64_t Offset = 0;
+  const char *ExternalSym = nullptr;
+  if (const auto *G = dyn_cast<GlobalAddressSDNode>(Op)) {
+    GV = G->getGlobal();
+    Offset = G->getOffset();
+  } else {
+    const auto *ES = cast<ExternalSymbolSDNode>(Op);
+    ExternalSym = ES->getSymbol();
+  }
+
+  // Calculate some flags for address lowering.
+  const Module &Mod = *DAG.getMachineFunction().getFunction().getParent();
+  unsigned char OpFlags;
+  if (ForCall)
+    OpFlags = Subtarget.classifyGlobalFunctionReference(GV, Mod);
+  else
+    OpFlags = Subtarget.classifyGlobalReference(GV, Mod);
+  bool HasPICReg = isGlobalRelativeToPICBase(OpFlags);
+  bool NeedsLoad = isGlobalStubReference(OpFlags);
+
   CodeModel::Model M = DAG.getTarget().getCodeModel();
   auto PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue Result;
-  if (OpFlags == X86II::MO_NO_FLAG &&
-      X86::isOffsetSuitableForCodeModel(Offset, M)) {
-    // A direct static reference to a global.
-    Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
-    Offset = 0;
+
+  if (GV) {
+    // Create a target global address if this is a global. If possible, fold the
+    // offset into the global address reference. Otherwise, ADD it on later.
+    int64_t GlobalOffset = 0;
+    if (OpFlags == X86II::MO_NO_FLAG &&
+        X86::isOffsetSuitableForCodeModel(Offset, M)) {
+      std::swap(GlobalOffset, Offset);
+    }
+    Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, GlobalOffset, OpFlags);
   } else {
-    Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, OpFlags);
+    // If this is not a global address, this must be an external symbol.
+    Result = DAG.getTargetExternalSymbol(ExternalSym, PtrVT, OpFlags);
   }
 
+  // If this is a direct call, avoid the wrapper if we don't need to do any
+  // loads or adds. This allows SDAG ISel to match direct calls.
+  if (ForCall && !NeedsLoad && !HasPICReg && Offset == 0)
+    return Result;
+
   Result = DAG.getNode(getGlobalWrapperKind(GV, OpFlags), dl, PtrVT, Result);
 
   // With PIC, the address is actually $g + Offset.
-  if (isGlobalRelativeToPICBase(OpFlags)) {
+  if (HasPICReg) {
     Result = DAG.getNode(ISD::ADD, dl, PtrVT,
                          DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result);
   }
 
   // For globals that require a load from a stub to get the address, emit the
   // load.
-  if (isGlobalStubReference(OpFlags))
+  if (NeedsLoad)
     Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
                          MachinePointerInfo::getGOT(DAG.getMachineFunction()));
 
@@ -16884,9 +17476,7 @@ SDValue X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV,
 
 SDValue
 X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
-  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-  int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
-  return LowerGlobalAddress(GV, SDLoc(Op), Offset, DAG);
+  return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
 }
 
 static SDValue
@@ -17112,9 +17702,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
     return DAG.getCopyFromReg(Chain, DL, Reg, PtrVT, Chain.getValue(1));
   }
 
-  if (Subtarget.isTargetKnownWindowsMSVC() ||
-      Subtarget.isTargetWindowsItanium() ||
-      Subtarget.isTargetWindowsGNU()) {
+  if (Subtarget.isOSWindows()) {
     // Just use the implicit TLS architecture
     // Need to generate something similar to:
     //   mov     rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
@@ -17254,7 +17842,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
 
     APInt APIntShiftAmt;
     if (isConstantSplat(Amt, APIntShiftAmt)) {
-      uint64_t ShiftAmt = APIntShiftAmt.getZExtValue();
+      uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits());
       return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
                          Op0, Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8));
     }
@@ -17267,7 +17855,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
          "Unexpected funnel shift type!");
 
   // Expand slow SHLD/SHRD cases if we are not optimizing for size.
-  bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
+  bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
   if (!OptForSize && Subtarget.isSHLDSlow())
     return SDValue();
 
@@ -17311,6 +17899,70 @@ static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
                      DAG.getIntPtrConstant(0, dl));
 }
 
+static bool useVectorCast(unsigned Opcode, MVT FromVT, MVT ToVT,
+                          const X86Subtarget &Subtarget) {
+  switch (Opcode) {
+    case ISD::SINT_TO_FP:
+      // TODO: Handle wider types with AVX/AVX512.
+      if (!Subtarget.hasSSE2() || FromVT != MVT::v4i32)
+        return false;
+      // CVTDQ2PS or (V)CVTDQ2PD
+      return ToVT == MVT::v4f32 || (Subtarget.hasAVX() && ToVT == MVT::v4f64);
+
+    case ISD::UINT_TO_FP:
+      // TODO: Handle wider types and i64 elements.
+      if (!Subtarget.hasAVX512() || FromVT != MVT::v4i32)
+        return false;
+      // VCVTUDQ2PS or VCVTUDQ2PD
+      return ToVT == MVT::v4f32 || ToVT == MVT::v4f64;
+
+    default:
+      return false;
+  }
+}
+
+/// Given a scalar cast operation that is extracted from a vector, try to
+/// vectorize the cast op followed by extraction. This will avoid an expensive
+/// round-trip between XMM and GPR.
+static SDValue vectorizeExtractedCast(SDValue Cast, SelectionDAG &DAG,
+                                      const X86Subtarget &Subtarget) {
+  // TODO: This could be enhanced to handle smaller integer types by peeking
+  // through an extend.
+  SDValue Extract = Cast.getOperand(0);
+  MVT DestVT = Cast.getSimpleValueType();
+  if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+      !isa<ConstantSDNode>(Extract.getOperand(1)))
+    return SDValue();
+
+  // See if we have a 128-bit vector cast op for this type of cast.
+  SDValue VecOp = Extract.getOperand(0);
+  MVT FromVT = VecOp.getSimpleValueType();
+  unsigned NumEltsInXMM = 128 / FromVT.getScalarSizeInBits();
+  MVT Vec128VT = MVT::getVectorVT(FromVT.getScalarType(), NumEltsInXMM);
+  MVT ToVT = MVT::getVectorVT(DestVT, NumEltsInXMM);
+  if (!useVectorCast(Cast.getOpcode(), Vec128VT, ToVT, Subtarget))
+    return SDValue();
+
+  // If we are extracting from a non-zero element, first shuffle the source
+  // vector to allow extracting from element zero.
+  SDLoc DL(Cast);
+  if (!isNullConstant(Extract.getOperand(1))) {
+    SmallVector<int, 16> Mask(FromVT.getVectorNumElements(), -1);
+    Mask[0] = Extract.getConstantOperandVal(1);
+    VecOp = DAG.getVectorShuffle(FromVT, DL, VecOp, DAG.getUNDEF(FromVT), Mask);
+  }
+  // If the source vector is wider than 128-bits, extract the low part. Do not
+  // create an unnecessarily wide vector cast op.
+  if (FromVT != Vec128VT)
+    VecOp = extract128BitVector(VecOp, 0, DAG, DL);
+
+  // cast (extelt V, 0) --> extelt (cast (extract_subv V)), 0
+  // cast (extelt V, C) --> extelt (cast (extract_subv (shuffle V, [C...]))), 0
+  SDValue VCast = DAG.getNode(Cast.getOpcode(), DL, ToVT, VecOp);
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestVT, VCast,
+                     DAG.getIntPtrConstant(0, DL));
+}
+
 SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
                                            SelectionDAG &DAG) const {
   SDValue Src = Op.getOperand(0);
@@ -17318,6 +17970,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
   MVT VT = Op.getSimpleValueType();
   SDLoc dl(Op);
 
+  if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
+    return Extract;
+
   if (SrcVT.isVector()) {
     if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
       return DAG.getNode(X86ISD::CVTSI2P, dl, VT,
@@ -17371,23 +18026,23 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
   else
     Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
 
-  unsigned ByteSize = SrcVT.getSizeInBits()/8;
+  unsigned ByteSize = SrcVT.getSizeInBits() / 8;
 
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(StackSlot);
-  MachineMemOperand *MMO;
+  MachineMemOperand *LoadMMO;
   if (FI) {
     int SSFI = FI->getIndex();
-    MMO = DAG.getMachineFunction().getMachineMemOperand(
+    LoadMMO = DAG.getMachineFunction().getMachineMemOperand(
         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
         MachineMemOperand::MOLoad, ByteSize, ByteSize);
   } else {
-    MMO = cast<LoadSDNode>(StackSlot)->getMemOperand();
+    LoadMMO = cast<LoadSDNode>(StackSlot)->getMemOperand();
     StackSlot = StackSlot.getOperand(1);
   }
-  SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) };
-  SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG :
-                                           X86ISD::FILD, DL,
-                                           Tys, Ops, SrcVT, MMO);
+  SDValue FILDOps[] = {Chain, StackSlot};
+  SDValue Result =
+      DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, DL,
+                              Tys, FILDOps, SrcVT, LoadMMO);
 
   if (useSSE) {
     Chain = Result.getValue(1);
@@ -17397,20 +18052,18 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
     // shouldn't be necessary except that RFP cannot be live across
     // multiple blocks. When stackifier is fixed, they can be uncoupled.
     MachineFunction &MF = DAG.getMachineFunction();
-    unsigned SSFISize = Op.getValueSizeInBits()/8;
+    unsigned SSFISize = Op.getValueSizeInBits() / 8;
     int SSFI = MF.getFrameInfo().CreateStackObject(SSFISize, SSFISize, false);
     auto PtrVT = getPointerTy(MF.getDataLayout());
     SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
     Tys = DAG.getVTList(MVT::Other);
-    SDValue Ops[] = {
-      Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
-    };
-    MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+    SDValue FSTOps[] = {Chain, Result, StackSlot, InFlag};
+    MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand(
         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
         MachineMemOperand::MOStore, SSFISize, SSFISize);
 
-    Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
-                                    Ops, Op.getValueType(), MMO);
+    Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys, FSTOps,
+                                    Op.getValueType(), StoreMMO);
     Result = DAG.getLoad(
         Op.getValueType(), DL, Chain, StackSlot,
         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
@@ -17545,7 +18198,7 @@ static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG,
   SDValue HalfWordMask = DAG.getConstant(0x0000FFFF, DL, MVT::v4i32);
 
   // Two to the power of half-word-size.
-  SDValue TWOHW = DAG.getConstantFP(1 << 16, DL, MVT::v2f64);
+  SDValue TWOHW = DAG.getConstantFP((double)(1 << 16), DL, MVT::v2f64);
 
   // Clear upper part of LO, lower HI.
   SDValue HI = DAG.getNode(ISD::SRL, DL, MVT::v4i32, N0, HalfWord);
@@ -17680,6 +18333,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
   if (Op.getSimpleValueType().isVector())
     return lowerUINT_TO_FP_vec(Op, DAG, Subtarget);
 
+  if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
+    return Extract;
+
   MVT SrcVT = N0.getSimpleValueType();
   MVT DstVT = Op.getSimpleValueType();
 
@@ -17732,7 +18388,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
       MachineMemOperand::MOLoad, 8, 8);
 
   SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
-  SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
+  SDValue Ops[] = { Store, StackSlot };
   SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops,
                                          MVT::i64, MMO);
 
@@ -17768,16 +18424,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
 
 // If the given FP_TO_SINT (IsSigned) or FP_TO_UINT (!IsSigned) operation
 // is legal, or has an fp128 or f16 source (which needs to be promoted to f32),
-// just return an <SDValue(), SDValue()> pair.
+// just return an SDValue().
 // Otherwise it is assumed to be a conversion from one of f32, f64 or f80
-// to i16, i32 or i64, and we lower it to a legal sequence.
-// If lowered to the final integer result we return a <result, SDValue()> pair.
-// Otherwise we lower it to a sequence ending with a FIST, return a
-// <FIST, StackSlot> pair, and the caller is responsible for loading
-// the final integer result from StackSlot.
-std::pair<SDValue,SDValue>
+// to i16, i32 or i64, and we lower it to a legal sequence and return the
+// result.
+SDValue
 X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
-                                   bool IsSigned, bool IsReplace) const {
+                                   bool IsSigned) const {
   SDLoc DL(Op);
 
   EVT DstTy = Op.getValueType();
@@ -17787,18 +18440,15 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
   if (TheVT != MVT::f32 && TheVT != MVT::f64 && TheVT != MVT::f80) {
     // f16 must be promoted before using the lowering in this routine.
     // fp128 does not use this lowering.
-    return std::make_pair(SDValue(), SDValue());
+    return SDValue();
   }
 
   // If using FIST to compute an unsigned i64, we'll need some fixup
   // to handle values above the maximum signed i64.  A FIST is always
   // used for the 32-bit subtarget, but also for f80 on a 64-bit target.
-  bool UnsignedFixup = !IsSigned &&
-                       DstTy == MVT::i64 &&
-                       (!Subtarget.is64Bit() ||
-                        !isScalarFPTypeInSSEReg(TheVT));
+  bool UnsignedFixup = !IsSigned && DstTy == MVT::i64;
 
-  if (!IsSigned && DstTy != MVT::i64 && !Subtarget.hasAVX512()) {
+  if (!IsSigned && DstTy != MVT::i64) {
     // Replace the fp-to-uint32 operation with an fp-to-sint64 FIST.
     // The low 32 bits of the fist result will have the correct uint32 result.
     assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
@@ -17809,30 +18459,13 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
          DstTy.getSimpleVT() >= MVT::i16 &&
          "Unknown FP_TO_INT to lower!");
 
-  // These are really Legal.
-  if (DstTy == MVT::i32 &&
-      isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
-    return std::make_pair(SDValue(), SDValue());
-  if (Subtarget.is64Bit() &&
-      DstTy == MVT::i64 &&
-      isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
-    return std::make_pair(SDValue(), SDValue());
-
   // We lower FP->int64 into FISTP64 followed by a load from a temporary
   // stack slot.
   MachineFunction &MF = DAG.getMachineFunction();
-  unsigned MemSize = DstTy.getSizeInBits()/8;
+  unsigned MemSize = DstTy.getStoreSize();
   int SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
 
-  unsigned Opc;
-  switch (DstTy.getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
-  case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
-  case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
-  case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
-  }
-
   SDValue Chain = DAG.getEntryNode();
   SDValue Value = Op.getOperand(0);
   SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment.
@@ -17874,9 +18507,10 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
                                getSetCCResultType(DAG.getDataLayout(),
                                                   *DAG.getContext(), TheVT),
                                Value, ThreshVal, ISD::SETLT);
-    Adjust = DAG.getSelect(DL, MVT::i32, Cmp,
-                           DAG.getConstant(0, DL, MVT::i32),
-                           DAG.getConstant(0x80000000, DL, MVT::i32));
+    Adjust = DAG.getSelect(DL, MVT::i64, Cmp,
+                           DAG.getConstant(0, DL, MVT::i64),
+                           DAG.getConstant(APInt::getSignMask(64),
+                                           DL, MVT::i64));
     SDValue Sub = DAG.getNode(ISD::FSUB, DL, TheVT, Value, ThreshVal);
     Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(),
                                               *DAG.getContext(), TheVT),
@@ -17884,81 +18518,52 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
     Value = DAG.getSelect(DL, TheVT, Cmp, Value, Sub);
   }
 
+  MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, SSFI);
+
   // FIXME This causes a redundant load/store if the SSE-class value is already
   // in memory, such as if it is on the callstack.
   if (isScalarFPTypeInSSEReg(TheVT)) {
     assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
-    Chain = DAG.getStore(Chain, DL, Value, StackSlot,
-                         MachinePointerInfo::getFixedStack(MF, SSFI));
-    SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
-    SDValue Ops[] = {
-      Chain, StackSlot, DAG.getValueType(TheVT)
-    };
-
-    MachineMemOperand *MMO =
-        MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
-                                MachineMemOperand::MOLoad, MemSize, MemSize);
-    Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, DstTy, MMO);
+    Chain = DAG.getStore(Chain, DL, Value, StackSlot, MPI);
+    SDVTList Tys = DAG.getVTList(TheVT, MVT::Other);
+    SDValue Ops[] = { Chain, StackSlot };
+
+    unsigned FLDSize = TheVT.getStoreSize();
+    assert(FLDSize <= MemSize && "Stack slot not big enough");
+    MachineMemOperand *MMO = MF.getMachineMemOperand(
+        MPI, MachineMemOperand::MOLoad, FLDSize, FLDSize);
+    Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, TheVT, MMO);
     Chain = Value.getValue(1);
-    SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false);
-    StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
   }
 
-  MachineMemOperand *MMO =
-      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
-                              MachineMemOperand::MOStore, MemSize, MemSize);
-
-  if (UnsignedFixup) {
-
-    // Insert the FIST, load its result as two i32's,
-    // and XOR the high i32 with Adjust.
+  // Build the FP_TO_INT*_IN_MEM
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MPI, MachineMemOperand::MOStore, MemSize, MemSize);
+  SDValue Ops[] = { Chain, Value, StackSlot };
+  SDValue FIST = DAG.getMemIntrinsicNode(X86ISD::FP_TO_INT_IN_MEM, DL,
+                                         DAG.getVTList(MVT::Other),
+                                         Ops, DstTy, MMO);
 
-    SDValue FistOps[] = { Chain, Value, StackSlot };
-    SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
-                                           FistOps, DstTy, MMO);
+  SDValue Res = DAG.getLoad(Op.getValueType(), SDLoc(Op), FIST, StackSlot, MPI);
 
-    SDValue Low32 =
-        DAG.getLoad(MVT::i32, DL, FIST, StackSlot, MachinePointerInfo());
-    SDValue HighAddr = DAG.getMemBasePlusOffset(StackSlot, 4, DL);
+  // If we need an unsigned fixup, XOR the result with adjust.
+  if (UnsignedFixup)
+    Res = DAG.getNode(ISD::XOR, DL, MVT::i64, Res, Adjust);
 
-    SDValue High32 =
-        DAG.getLoad(MVT::i32, DL, FIST, HighAddr, MachinePointerInfo());
-    High32 = DAG.getNode(ISD::XOR, DL, MVT::i32, High32, Adjust);
-
-    if (Subtarget.is64Bit()) {
-      // Join High32 and Low32 into a 64-bit result.
-      // (High32 << 32) | Low32
-      Low32 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Low32);
-      High32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, High32);
-      High32 = DAG.getNode(ISD::SHL, DL, MVT::i64, High32,
-                           DAG.getConstant(32, DL, MVT::i8));
-      SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i64, High32, Low32);
-      return std::make_pair(Result, SDValue());
-    }
-
-    SDValue ResultOps[] = { Low32, High32 };
-
-    SDValue pair = IsReplace
-      ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResultOps)
-      : DAG.getMergeValues(ResultOps, DL);
-    return std::make_pair(pair, SDValue());
-  } else {
-    // Build the FP_TO_INT*_IN_MEM
-    SDValue Ops[] = { Chain, Value, StackSlot };
-    SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
-                                           Ops, DstTy, MMO);
-    return std::make_pair(FIST, StackSlot);
-  }
+  return Res;
 }
 
 static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
                               const X86Subtarget &Subtarget) {
-  MVT VT = Op->getSimpleValueType(0);
-  SDValue In = Op->getOperand(0);
+  MVT VT = Op.getSimpleValueType();
+  SDValue In = Op.getOperand(0);
   MVT InVT = In.getSimpleValueType();
   SDLoc dl(Op);
+  unsigned Opc = Op.getOpcode();
 
   assert(VT.isVector() && InVT.isVector() && "Expected vector type");
+  assert((Opc == ISD::ANY_EXTEND || Opc == ISD::ZERO_EXTEND) &&
+         "Unexpected extension opcode");
   assert(VT.getVectorNumElements() == VT.getVectorNumElements() &&
          "Expected same number of elements");
   assert((VT.getVectorElementType() == MVT::i16 ||
@@ -17970,6 +18575,8 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
           InVT.getVectorElementType() == MVT::i32) &&
          "Unexpected element type");
 
+  unsigned ExtendInVecOpc = getOpcode_EXTEND_VECTOR_INREG(Opc);
+
   // Custom legalize v8i8->v8i64 on CPUs without avx512bw.
   if (InVT == MVT::v8i8) {
     if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64)
@@ -17977,8 +18584,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
 
     In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op),
                      MVT::v16i8, In, DAG.getUNDEF(MVT::v8i8));
-    // FIXME: This should be ANY_EXTEND_VECTOR_INREG for ANY_EXTEND input.
-    return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, dl, VT, In);
+    return DAG.getNode(ExtendInVecOpc, dl, VT, In);
   }
 
   if (Subtarget.hasInt256())
@@ -18000,11 +18606,17 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
   MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
                                 VT.getVectorNumElements() / 2);
 
-  SDValue OpLo = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, dl, HalfVT, In);
+  SDValue OpLo = DAG.getNode(ExtendInVecOpc, dl, HalfVT, In);
+
+  // Short-circuit if we can determine that each 128-bit half is the same value.
+  // Otherwise, this is difficult to match and optimize.
+  if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(In))
+    if (hasIdenticalHalvesShuffleMask(Shuf->getMask()))
+      return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpLo);
 
   SDValue ZeroVec = DAG.getConstant(0, dl, InVT);
   SDValue Undef = DAG.getUNDEF(InVT);
-  bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND;
+  bool NeedZero = Opc == ISD::ZERO_EXTEND;
   SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
   OpHi = DAG.getBitcast(HalfVT, OpHi);
 
@@ -18179,8 +18791,11 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
 
     // 256-bit PACK(ARG0, ARG1) leaves us with ((LO0,LO1),(HI0,HI1)),
     // so we need to shuffle to get ((LO0,HI0),(LO1,HI1)).
-    Res = DAG.getBitcast(MVT::v4i64, Res);
-    Res = DAG.getVectorShuffle(MVT::v4i64, DL, Res, Res, {0, 2, 1, 3});
+    // Scale shuffle mask to avoid bitcasts and help ComputeNumSignBits.
+    SmallVector<int, 64> Mask;
+    int Scale = 64 / OutVT.getScalarSizeInBits();
+    scaleShuffleMask<int>(Scale, ArrayRef<int>({ 0, 2, 1, 3 }), Mask);
+    Res = DAG.getVectorShuffle(OutVT, DL, Res, Res, Mask);
 
     if (DstVT.is256BitVector())
       return DAG.getBitcast(DstVT, Res);
@@ -18422,12 +19037,12 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
 SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
   MVT VT = Op.getSimpleValueType();
+  SDValue Src = Op.getOperand(0);
+  MVT SrcVT = Src.getSimpleValueType();
+  SDLoc dl(Op);
 
   if (VT.isVector()) {
-    SDValue Src = Op.getOperand(0);
-    SDLoc dl(Op);
-
-    if (VT == MVT::v2i1 && Src.getSimpleValueType() == MVT::v2f64) {
+    if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) {
       MVT ResVT = MVT::v4i32;
       MVT TruncVT = MVT::v4i1;
       unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
@@ -18447,7 +19062,7 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
     }
 
     assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
-    if (VT == MVT::v2i64 && Src.getSimpleValueType() == MVT::v2f32) {
+    if (VT == MVT::v2i64 && SrcVT  == MVT::v2f32) {
       return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl, VT,
                          DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
                                      DAG.getUNDEF(MVT::v2f32)));
@@ -18458,19 +19073,34 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
 
   assert(!VT.isVector());
 
-  std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
-    IsSigned, /*IsReplace=*/ false);
-  SDValue FIST = Vals.first, StackSlot = Vals.second;
-  // If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
-  if (!FIST.getNode())
+  bool UseSSEReg = isScalarFPTypeInSSEReg(SrcVT);
+
+  if (!IsSigned && Subtarget.hasAVX512()) {
+    // Conversions from f32/f64 should be legal.
+    if (UseSSEReg)
+      return Op;
+
+    // Use default expansion.
+    if (VT == MVT::i64)
+      return SDValue();
+  }
+
+  // Promote i16 to i32 if we can use a SSE operation.
+  if (VT == MVT::i16 && UseSSEReg) {
+    assert(IsSigned && "Expected i16 FP_TO_UINT to have been promoted!");
+    SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
+    return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+  }
+
+  // If this is a SINT_TO_FP using SSEReg we're done.
+  if (UseSSEReg && IsSigned)
     return Op;
 
-  if (StackSlot.getNode())
-    // Load the result.
-    return DAG.getLoad(VT, SDLoc(Op), FIST, StackSlot, MachinePointerInfo());
+  // Fall back to X87.
+  if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned))
+    return V;
 
-  // The node is the result.
-  return FIST;
+  llvm_unreachable("Expected FP_TO_INTHelper to handle all remaining cases.");
 }
 
 static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
@@ -18491,7 +19121,7 @@ static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
 /// implementation, and likely shuffle complexity of the alternate sequence.
 static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG,
                                   const X86Subtarget &Subtarget) {
-  bool IsOptimizingSize = DAG.getMachineFunction().getFunction().optForSize();
+  bool IsOptimizingSize = DAG.getMachineFunction().getFunction().hasOptSize();
   bool HasFastHOps = Subtarget.hasFastHorizontalOps();
   return !IsSingleSource || IsOptimizingSize || HasFastHOps;
 }
@@ -18513,16 +19143,11 @@ static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG,
   if (!IsFP && !Subtarget.hasSSSE3())
     return Op;
 
-  // Defer forming the minimal horizontal op if the vector source has more than
-  // the 2 extract element uses that we're matching here. In that case, we might
-  // form a horizontal op that includes more than 1 add/sub op.
+  // Extract from a common vector.
   if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
       RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
       LHS.getOperand(0) != RHS.getOperand(0) ||
-      !LHS.getOperand(0)->hasNUsesOfValue(2, 0))
-    return Op;
-
-  if (!isa<ConstantSDNode>(LHS.getOperand(1)) ||
+      !isa<ConstantSDNode>(LHS.getOperand(1)) ||
       !isa<ConstantSDNode>(RHS.getOperand(1)) ||
       !shouldUseHorizontalOp(true, DAG, Subtarget))
     return Op;
@@ -18540,33 +19165,37 @@ static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG,
   }
   unsigned LExtIndex = LHS.getConstantOperandVal(1);
   unsigned RExtIndex = RHS.getConstantOperandVal(1);
-  if (LExtIndex == 1 && RExtIndex == 0 &&
+  if ((LExtIndex & 1) == 1 && (RExtIndex & 1) == 0 &&
       (HOpcode == X86ISD::HADD || HOpcode == X86ISD::FHADD))
     std::swap(LExtIndex, RExtIndex);
 
-  // TODO: This can be extended to handle other adjacent extract pairs.
-  if (LExtIndex != 0 || RExtIndex != 1)
+  if ((LExtIndex & 1) != 0 || RExtIndex != (LExtIndex + 1))
     return Op;
 
   SDValue X = LHS.getOperand(0);
   EVT VecVT = X.getValueType();
   unsigned BitWidth = VecVT.getSizeInBits();
+  unsigned NumLanes = BitWidth / 128;
+  unsigned NumEltsPerLane = VecVT.getVectorNumElements() / NumLanes;
   assert((BitWidth == 128 || BitWidth == 256 || BitWidth == 512) &&
          "Not expecting illegal vector widths here");
 
   // Creating a 256-bit horizontal op would be wasteful, and there is no 512-bit
-  // equivalent, so extract the 256/512-bit source op to 128-bit.
-  // This is free: ymm/zmm -> xmm.
+  // equivalent, so extract the 256/512-bit source op to 128-bit if we can.
   SDLoc DL(Op);
-  if (BitWidth == 256 || BitWidth == 512)
-    X = extract128BitVector(X, 0, DAG, DL);
+  if (BitWidth == 256 || BitWidth == 512) {
+    unsigned LaneIdx = LExtIndex / NumEltsPerLane;
+    X = extract128BitVector(X, LaneIdx * NumEltsPerLane, DAG, DL);
+    LExtIndex %= NumEltsPerLane;
+  }
 
   // add (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hadd X, X), 0
   // add (extractelt (X, 1), extractelt (X, 0)) --> extractelt (hadd X, X), 0
+  // add (extractelt (X, 2), extractelt (X, 3)) --> extractelt (hadd X, X), 1
   // sub (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hsub X, X), 0
   SDValue HOp = DAG.getNode(HOpcode, DL, X.getValueType(), X, X);
   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getSimpleValueType(), HOp,
-                     DAG.getIntPtrConstant(0, DL));
+                     DAG.getIntPtrConstant(LExtIndex / 2, DL));
 }
 
 /// Depending on uarch and/or optimizing for size, we might prefer to use a
@@ -18732,36 +19361,25 @@ static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl,
                      DAG.getConstant(Cond, dl, MVT::i8), EFLAGS);
 }
 
-// Check whether an OR'd tree is PTEST-able.
-static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
-                                      const X86Subtarget &Subtarget,
-                                      SelectionDAG &DAG,
-                                      SDValue &X86CC) {
-  assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
-
-  if (!Subtarget.hasSSE41())
-    return SDValue();
-
-  if (!Op->hasOneUse())
-    return SDValue();
-
-  SDNode *N = Op.getNode();
-  SDLoc DL(N);
-
+/// Helper for matching OR(EXTRACTELT(X,0),OR(EXTRACTELT(X,1),...))
+/// style scalarized (associative) reduction patterns.
+static bool matchBitOpReduction(SDValue Op, ISD::NodeType BinOp,
+                                SmallVectorImpl<SDValue> &SrcOps) {
   SmallVector<SDValue, 8> Opnds;
-  DenseMap<SDValue, unsigned> VecInMap;
-  SmallVector<SDValue, 8> VecIns;
+  DenseMap<SDValue, APInt> SrcOpMap;
   EVT VT = MVT::Other;
 
   // Recognize a special case where a vector is casted into wide integer to
   // test all 0s.
-  Opnds.push_back(N->getOperand(0));
-  Opnds.push_back(N->getOperand(1));
+  assert(Op.getOpcode() == unsigned(BinOp) &&
+         "Unexpected bit reduction opcode");
+  Opnds.push_back(Op.getOperand(0));
+  Opnds.push_back(Op.getOperand(1));
 
   for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) {
     SmallVectorImpl<SDValue>::const_iterator I = Opnds.begin() + Slot;
-    // BFS traverse all OR'd operands.
-    if (I->getOpcode() == ISD::OR) {
+    // BFS traverse all BinOp operands.
+    if (I->getOpcode() == unsigned(BinOp)) {
       Opnds.push_back(I->getOperand(0));
       Opnds.push_back(I->getOperand(1));
       // Re-evaluate the number of nodes to be traversed.
@@ -18771,42 +19389,63 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
 
     // Quit if a non-EXTRACT_VECTOR_ELT
     if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
-      return SDValue();
+      return false;
 
     // Quit if without a constant index.
     SDValue Idx = I->getOperand(1);
     if (!isa<ConstantSDNode>(Idx))
-      return SDValue();
+      return false;
 
-    SDValue ExtractedFromVec = I->getOperand(0);
-    DenseMap<SDValue, unsigned>::iterator M = VecInMap.find(ExtractedFromVec);
-    if (M == VecInMap.end()) {
-      VT = ExtractedFromVec.getValueType();
-      // Quit if not 128/256-bit vector.
-      if (!VT.is128BitVector() && !VT.is256BitVector())
-        return SDValue();
+    SDValue Src = I->getOperand(0);
+    DenseMap<SDValue, APInt>::iterator M = SrcOpMap.find(Src);
+    if (M == SrcOpMap.end()) {
+      VT = Src.getValueType();
       // Quit if not the same type.
-      if (VecInMap.begin() != VecInMap.end() &&
-          VT != VecInMap.begin()->first.getValueType())
-        return SDValue();
-      M = VecInMap.insert(std::make_pair(ExtractedFromVec, 0)).first;
-      VecIns.push_back(ExtractedFromVec);
+      if (SrcOpMap.begin() != SrcOpMap.end() &&
+          VT != SrcOpMap.begin()->first.getValueType())
+        return false;
+      unsigned NumElts = VT.getVectorNumElements();
+      APInt EltCount = APInt::getNullValue(NumElts);
+      M = SrcOpMap.insert(std::make_pair(Src, EltCount)).first;
+      SrcOps.push_back(Src);
     }
-    M->second |= 1U << cast<ConstantSDNode>(Idx)->getZExtValue();
+    // Quit if element already used.
+    unsigned CIdx = cast<ConstantSDNode>(Idx)->getZExtValue();
+    if (M->second[CIdx])
+      return false;
+    M->second.setBit(CIdx);
   }
 
-  assert((VT.is128BitVector() || VT.is256BitVector()) &&
-         "Not extracted from 128-/256-bit vector.");
+  // Quit if not all elements are used.
+  for (DenseMap<SDValue, APInt>::const_iterator I = SrcOpMap.begin(),
+                                                E = SrcOpMap.end();
+       I != E; ++I) {
+    if (!I->second.isAllOnesValue())
+      return false;
+  }
 
-  unsigned FullMask = (1U << VT.getVectorNumElements()) - 1U;
+  return true;
+}
 
-  for (DenseMap<SDValue, unsigned>::const_iterator
-        I = VecInMap.begin(), E = VecInMap.end(); I != E; ++I) {
-    // Quit if not all elements are used.
-    if (I->second != FullMask)
-      return SDValue();
-  }
+// Check whether an OR'd tree is PTEST-able.
+static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
+                                      const X86Subtarget &Subtarget,
+                                      SelectionDAG &DAG, SDValue &X86CC) {
+  assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
+
+  if (!Subtarget.hasSSE41() || !Op->hasOneUse())
+    return SDValue();
+
+  SmallVector<SDValue, 8> VecIns;
+  if (!matchBitOpReduction(Op, ISD::OR, VecIns))
+    return SDValue();
+
+  // Quit if not 128/256-bit vector.
+  EVT VT = VecIns[0].getValueType();
+  if (!VT.is128BitVector() && !VT.is256BitVector())
+    return SDValue();
 
+  SDLoc DL(Op);
   MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
 
   // Cast all vectors into TestVT for PTEST.
@@ -18822,10 +19461,9 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
     VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS));
   }
 
-  X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE,
-                          DL, MVT::i8);
-  return DAG.getNode(X86ISD::PTEST, DL, MVT::i32,
-                     VecIns.back(), VecIns.back());
+  X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE, DL,
+                          MVT::i8);
+  return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, VecIns.back(), VecIns.back());
 }
 
 /// return true if \c Op has a use that doesn't just read flags.
@@ -18963,29 +19601,52 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
   if (isNullConstant(Op1))
     return EmitTest(Op0, X86CC, dl, DAG, Subtarget);
 
-  if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
-       Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
-    // Only promote the compare up to I32 if it is a 16 bit operation
-    // with an immediate.  16 bit immediates are to be avoided.
-    if (Op0.getValueType() == MVT::i16 &&
-        ((isa<ConstantSDNode>(Op0) &&
-          !cast<ConstantSDNode>(Op0)->getAPIntValue().isSignedIntN(8)) ||
-         (isa<ConstantSDNode>(Op1) &&
-          !cast<ConstantSDNode>(Op1)->getAPIntValue().isSignedIntN(8))) &&
-        !DAG.getMachineFunction().getFunction().optForMinSize() &&
-        !Subtarget.isAtom()) {
+  EVT CmpVT = Op0.getValueType();
+
+  if (CmpVT.isFloatingPoint())
+    return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
+
+  assert((CmpVT == MVT::i8 || CmpVT == MVT::i16 ||
+          CmpVT == MVT::i32 || CmpVT == MVT::i64) && "Unexpected VT!");
+
+  // Only promote the compare up to I32 if it is a 16 bit operation
+  // with an immediate.  16 bit immediates are to be avoided.
+  if (CmpVT == MVT::i16 && !Subtarget.isAtom() &&
+      !DAG.getMachineFunction().getFunction().hasMinSize()) {
+    ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
+    ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
+    // Don't do this if the immediate can fit in 8-bits.
+    if ((COp0 && !COp0->getAPIntValue().isSignedIntN(8)) ||
+        (COp1 && !COp1->getAPIntValue().isSignedIntN(8))) {
       unsigned ExtendOp =
           isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
-      Op0 = DAG.getNode(ExtendOp, dl, MVT::i32, Op0);
-      Op1 = DAG.getNode(ExtendOp, dl, MVT::i32, Op1);
+      if (X86CC == X86::COND_E || X86CC == X86::COND_NE) {
+        // For equality comparisons try to use SIGN_EXTEND if the input was
+        // truncate from something with enough sign bits.
+        if (Op0.getOpcode() == ISD::TRUNCATE) {
+          SDValue In = Op0.getOperand(0);
+          unsigned EffBits =
+              In.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(In) + 1;
+          if (EffBits <= 16)
+            ExtendOp = ISD::SIGN_EXTEND;
+        } else if (Op1.getOpcode() == ISD::TRUNCATE) {
+          SDValue In = Op1.getOperand(0);
+          unsigned EffBits =
+              In.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(In) + 1;
+          if (EffBits <= 16)
+            ExtendOp = ISD::SIGN_EXTEND;
+        }
+      }
+
+      CmpVT = MVT::i32;
+      Op0 = DAG.getNode(ExtendOp, dl, CmpVT, Op0);
+      Op1 = DAG.getNode(ExtendOp, dl, CmpVT, Op1);
     }
-    // Use SUB instead of CMP to enable CSE between SUB and CMP.
-    SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);
-    SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1);
-    return SDValue(Sub.getNode(), 1);
   }
-  assert(Op0.getValueType().isFloatingPoint() && "Unexpected VT!");
-  return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
+  // Use SUB instead of CMP to enable CSE between SUB and CMP.
+  SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32);
+  SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1);
+  return Sub.getValue(1);
 }
 
 /// Convert a comparison if required by the subtarget.
@@ -19146,7 +19807,7 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
     } else {
       // Use BT if the immediate can't be encoded in a TEST instruction or we
       // are optimizing for size and the immedaite won't fit in a byte.
-      bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
+      bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
       if ((!isUInt<32>(AndRHSVal) || (OptForSize && !isUInt<8>(AndRHSVal))) &&
           isPowerOf2_64(AndRHSVal)) {
         Src = AndLHS;
@@ -19290,10 +19951,11 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
   return DAG.getSetCC(dl, VT, Op0, Op1, SetCCOpcode);
 }
 
-/// Given a simple buildvector constant, return a new vector constant with each
-/// element decremented. If decrementing would result in underflow or this
-/// is not a simple vector constant, return an empty value.
-static SDValue decrementVectorConstant(SDValue V, SelectionDAG &DAG) {
+/// Given a buildvector constant, return a new vector constant with each element
+/// incremented or decremented. If incrementing or decrementing would result in
+/// unsigned overflow or underflow or this is not a simple vector constant,
+/// return an empty value.
+static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc) {
   auto *BV = dyn_cast<BuildVectorSDNode>(V.getNode());
   if (!BV)
     return SDValue();
@@ -19308,11 +19970,12 @@ static SDValue decrementVectorConstant(SDValue V, SelectionDAG &DAG) {
     if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EltVT)
       return SDValue();
 
-    // Avoid underflow.
-    if (Elt->getAPIntValue().isNullValue())
+    // Avoid overflow/underflow.
+    const APInt &EltC = Elt->getAPIntValue();
+    if ((IsInc && EltC.isMaxValue()) || (!IsInc && EltC.isNullValue()))
       return SDValue();
 
-    NewVecC.push_back(DAG.getConstant(Elt->getAPIntValue() - 1, DL, EltVT));
+    NewVecC.push_back(DAG.getConstant(EltC + (IsInc ? 1 : -1), DL, EltVT));
   }
 
   return DAG.getBuildVector(VT, DL, NewVecC);
@@ -19344,12 +20007,24 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
     // Only do this pre-AVX since vpcmp* is no longer destructive.
     if (Subtarget.hasAVX())
       return SDValue();
-    SDValue ULEOp1 = decrementVectorConstant(Op1, DAG);
+    SDValue ULEOp1 = incDecVectorConstant(Op1, DAG, false);
     if (!ULEOp1)
       return SDValue();
     Op1 = ULEOp1;
     break;
   }
+  case ISD::SETUGT: {
+    // If the comparison is against a constant, we can turn this into a setuge.
+    // This is beneficial because materializing a constant 0 for the PCMPEQ is
+    // probably cheaper than XOR+PCMPGT using 2 different vector constants:
+    // cmpgt (xor X, SignMaskC) CmpC --> cmpeq (usubsat (CmpC+1), X), 0
+    SDValue UGEOp1 = incDecVectorConstant(Op1, DAG, true);
+    if (!UGEOp1)
+      return SDValue();
+    Op1 = Op0;
+    Op0 = UGEOp1;
+    break;
+  }
   // Psubus is better than flip-sign because it requires no inversion.
   case ISD::SETUGE:
     std::swap(Op0, Op1);
@@ -19446,10 +20121,6 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
   assert((Subtarget.hasAVX512() || (VT == VTOp0)) &&
          "Value types for source and destination must be the same!");
 
-  // Break 256-bit integer vector compare into smaller ones.
-  if (VT.is256BitVector() && !Subtarget.hasInt256())
-    return Lower256IntVSETCC(Op, DAG);
-
   // The result is boolean, but operands are int/float
   if (VT.getVectorElementType() == MVT::i1) {
     // In AVX-512 architecture setcc returns mask with i1 elements,
@@ -19503,6 +20174,27 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
     }
   }
 
+  // ICMP_EQ(AND(X,C),C) -> SRA(SHL(X,LOG2(C)),BW-1) iff C is power-of-2.
+  if (Cond == ISD::SETEQ && Op0.getOpcode() == ISD::AND &&
+      Op0.getOperand(1) == Op1 && Op0.hasOneUse()) {
+    ConstantSDNode *C1 = isConstOrConstSplat(Op1);
+    if (C1 && C1->getAPIntValue().isPowerOf2()) {
+      unsigned BitWidth = VT.getScalarSizeInBits();
+      unsigned ShiftAmt = BitWidth - C1->getAPIntValue().logBase2() - 1;
+
+      SDValue Result = Op0.getOperand(0);
+      Result = DAG.getNode(ISD::SHL, dl, VT, Result,
+                           DAG.getConstant(ShiftAmt, dl, VT));
+      Result = DAG.getNode(ISD::SRA, dl, VT, Result,
+                           DAG.getConstant(BitWidth - 1, dl, VT));
+      return Result;
+    }
+  }
+
+  // Break 256-bit integer vector compare into smaller ones.
+  if (VT.is256BitVector() && !Subtarget.hasInt256())
+    return Lower256IntVSETCC(Op, DAG);
+
   // If this is a SETNE against the signed minimum value, change it to SETGT.
   // If this is a SETNE against the signed maximum value, change it to SETLT.
   // which will be swapped to SETGT.
@@ -19530,17 +20222,20 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
       TLI.isOperationLegal(ISD::UMIN, VT)) {
     // If we have a constant operand, increment/decrement it and change the
     // condition to avoid an invert.
-    // TODO: This could be extended to handle a non-splat constant by checking
-    // that each element of the constant is not the max/null value.
-    APInt C;
-    if (Cond == ISD::SETUGT && isConstantSplat(Op1, C) && !C.isMaxValue()) {
+    if (Cond == ISD::SETUGT &&
+        ISD::matchUnaryPredicate(Op1, [](ConstantSDNode *C) {
+          return !C->getAPIntValue().isMaxValue();
+        })) {
       // X > C --> X >= (C+1) --> X == umax(X, C+1)
-      Op1 = DAG.getConstant(C + 1, dl, VT);
+      Op1 = DAG.getNode(ISD::ADD, dl, VT, Op1, DAG.getConstant(1, dl, VT));
       Cond = ISD::SETUGE;
     }
-    if (Cond == ISD::SETULT && isConstantSplat(Op1, C) && !C.isNullValue()) {
+    if (Cond == ISD::SETULT &&
+        ISD::matchUnaryPredicate(Op1, [](ConstantSDNode *C) {
+          return !C->getAPIntValue().isNullValue();
+        })) {
       // X < C --> X <= (C-1) --> X == umin(X, C-1)
-      Op1 = DAG.getConstant(C - 1, dl, VT);
+      Op1 = DAG.getNode(ISD::SUB, dl, VT, Op1, DAG.getConstant(1, dl, VT));
       Cond = ISD::SETULE;
     }
     bool Invert = false;
@@ -19826,7 +20521,7 @@ getX86XALUOOp(X86::CondCode &Cond, SDValue Op, SelectionDAG &DAG) {
     break;
   case ISD::UADDO:
     BaseOp = X86ISD::ADD;
-    Cond = X86::COND_B;
+    Cond = isOneConstant(RHS) ? X86::COND_E : X86::COND_B;
     break;
   case ISD::SSUBO:
     BaseOp = X86ISD::SUB;
@@ -19867,6 +20562,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
   std::tie(Value, Overflow) = getX86XALUOOp(Cond, Op, DAG);
 
   SDValue SetCC = getSETCC(Cond, Overflow, DL, DAG);
+  assert(Op->getValueType(1) == MVT::i8 && "Unexpected VT!");
   return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Value, SetCC);
 }
 
@@ -20036,10 +20732,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
       if (isNullConstant(Y) &&
           (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) {
         SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType());
-        SDValue Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Zero, CmpOp0);
+        SDValue CmpZero = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Zero, CmpOp0);
         SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
         Zero = DAG.getConstant(0, DL, Op.getValueType());
-        return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp);
+        return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, CmpZero);
       }
 
       Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
@@ -20111,7 +20807,6 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     CC = Cond.getOperand(0);
 
     SDValue Cmp = Cond.getOperand(1);
-    unsigned Opc = Cmp.getOpcode();
     MVT VT = Op.getSimpleValueType();
 
     bool IllegalFPCMov = false;
@@ -20120,7 +20815,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
       IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
 
     if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) ||
-        Opc == X86ISD::BT) { // FIXME
+        Cmp.getOpcode() == X86ISD::BT) { // FIXME
       Cond = Cmp;
       AddTest = false;
     }
@@ -20193,8 +20888,15 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     }
   }
 
-  // Promote i16 cmovs if it won't prevent folding a load.
-  if (Op.getValueType() == MVT::i16 && !MayFoldLoad(Op1) && !MayFoldLoad(Op2)) {
+  // Or finally, promote i8 cmovs if we have CMOV,
+  //                 or i16 cmovs if it won't prevent folding a load.
+  // FIXME: we should not limit promotion of i8 case to only when the CMOV is
+  //        legal, but EmitLoweredSelect() can not deal with these extensions
+  //        being inserted between two CMOV's. (in i16 case too TBN)
+  //        https://bugs.llvm.org/show_bug.cgi?id=40974
+  if ((Op.getValueType() == MVT::i8 && Subtarget.hasCMov()) ||
+      (Op.getValueType() == MVT::i16 && !MayFoldLoad(Op1) &&
+       !MayFoldLoad(Op2))) {
     Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
     Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
     SDValue Ops[] = { Op2, Op1, CC, Cond };
@@ -20453,6 +21155,76 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
   return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
 }
 
+/// Change a vector store into a pair of half-size vector stores.
+static SDValue splitVectorStore(StoreSDNode *Store, SelectionDAG &DAG) {
+  SDValue StoredVal = Store->getValue();
+  assert((StoredVal.getValueType().is256BitVector() ||
+          StoredVal.getValueType().is512BitVector()) &&
+         "Expecting 256/512-bit op");
+
+  // Splitting volatile memory ops is not allowed unless the operation was not
+  // legal to begin with. We are assuming the input op is legal (this transform
+  // is only used for targets with AVX).
+  if (Store->isVolatile())
+    return SDValue();
+
+  MVT StoreVT = StoredVal.getSimpleValueType();
+  unsigned NumElems = StoreVT.getVectorNumElements();
+  unsigned HalfSize = StoredVal.getValueSizeInBits() / 2;
+  unsigned HalfAlign = (128 == HalfSize ? 16 : 32);
+
+  SDLoc DL(Store);
+  SDValue Value0 = extractSubVector(StoredVal, 0, DAG, DL, HalfSize);
+  SDValue Value1 = extractSubVector(StoredVal, NumElems / 2, DAG, DL, HalfSize);
+  SDValue Ptr0 = Store->getBasePtr();
+  SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, HalfAlign, DL);
+  unsigned Alignment = Store->getAlignment();
+  SDValue Ch0 =
+      DAG.getStore(Store->getChain(), DL, Value0, Ptr0, Store->getPointerInfo(),
+                   Alignment, Store->getMemOperand()->getFlags());
+  SDValue Ch1 = DAG.getStore(Store->getChain(), DL, Value1, Ptr1,
+                             Store->getPointerInfo().getWithOffset(HalfAlign),
+                             MinAlign(Alignment, HalfAlign),
+                             Store->getMemOperand()->getFlags());
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Ch0, Ch1);
+}
+
+/// Scalarize a vector store, bitcasting to TargetVT to determine the scalar
+/// type.
+static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT,
+                                    SelectionDAG &DAG) {
+  SDValue StoredVal = Store->getValue();
+  assert(StoreVT.is128BitVector() &&
+         StoredVal.getValueType().is128BitVector() && "Expecting 128-bit op");
+  StoredVal = DAG.getBitcast(StoreVT, StoredVal);
+
+  // Splitting volatile memory ops is not allowed unless the operation was not
+  // legal to begin with. We are assuming the input op is legal (this transform
+  // is only used for targets with AVX).
+  if (Store->isVolatile())
+    return SDValue();
+
+  MVT StoreSVT = StoreVT.getScalarType();
+  unsigned NumElems = StoreVT.getVectorNumElements();
+  unsigned ScalarSize = StoreSVT.getStoreSize();
+  unsigned Alignment = Store->getAlignment();
+
+  SDLoc DL(Store);
+  SmallVector<SDValue, 4> Stores;
+  for (unsigned i = 0; i != NumElems; ++i) {
+    unsigned Offset = i * ScalarSize;
+    SDValue Ptr = DAG.getMemBasePlusOffset(Store->getBasePtr(), Offset, DL);
+    SDValue Scl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreSVT, StoredVal,
+                              DAG.getIntPtrConstant(i, DL));
+    SDValue Ch = DAG.getStore(Store->getChain(), DL, Scl, Ptr,
+                              Store->getPointerInfo().getWithOffset(Offset),
+                              MinAlign(Alignment, Offset),
+                              Store->getMemOperand()->getFlags());
+    Stores.push_back(Ch);
+  }
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
+}
+
 static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
                           SelectionDAG &DAG) {
   StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
@@ -20482,28 +21254,47 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
   if (St->isTruncatingStore())
     return SDValue();
 
+  // If this is a 256-bit store of concatenated ops, we are better off splitting
+  // that store into two 128-bit stores. This avoids spurious use of 256-bit ops
+  // and each half can execute independently. Some cores would split the op into
+  // halves anyway, so the concat (vinsertf128) is purely an extra op.
   MVT StoreVT = StoredVal.getSimpleValueType();
+  if (StoreVT.is256BitVector()) {
+    SmallVector<SDValue, 4> CatOps;
+    if (StoredVal.hasOneUse() && collectConcatOps(StoredVal.getNode(), CatOps))
+      return splitVectorStore(St, DAG);
+    return SDValue();
+  }
+
   assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 &&
          "Unexpected VT");
   if (DAG.getTargetLoweringInfo().getTypeAction(*DAG.getContext(), StoreVT) !=
         TargetLowering::TypeWidenVector)
     return SDValue();
 
-  // Widen the vector, cast to a v2x64 type, extract the single 64-bit element
-  // and store it.
   MVT WideVT = MVT::getVectorVT(StoreVT.getVectorElementType(),
                                 StoreVT.getVectorNumElements() * 2);
   StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal,
                           DAG.getUNDEF(StoreVT));
-  MVT StVT = Subtarget.is64Bit() && StoreVT.isInteger() ? MVT::i64 : MVT::f64;
-  MVT CastVT = MVT::getVectorVT(StVT, 2);
-  StoredVal = DAG.getBitcast(CastVT, StoredVal);
-  StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StVT, StoredVal,
-                          DAG.getIntPtrConstant(0, dl));
 
-  return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
-                      St->getPointerInfo(), St->getAlignment(),
-                      St->getMemOperand()->getFlags());
+  if (Subtarget.hasSSE2()) {
+    // Widen the vector, cast to a v2x64 type, extract the single 64-bit element
+    // and store it.
+    MVT StVT = Subtarget.is64Bit() && StoreVT.isInteger() ? MVT::i64 : MVT::f64;
+    MVT CastVT = MVT::getVectorVT(StVT, 2);
+    StoredVal = DAG.getBitcast(CastVT, StoredVal);
+    StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StVT, StoredVal,
+                            DAG.getIntPtrConstant(0, dl));
+
+    return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+                        St->getPointerInfo(), St->getAlignment(),
+                        St->getMemOperand()->getFlags());
+  }
+  assert(Subtarget.hasSSE1() && "Expected SSE");
+  SDVTList Tys = DAG.getVTList(MVT::Other);
+  SDValue Ops[] = {St->getChain(), StoredVal, St->getBasePtr()};
+  return DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops, MVT::i64,
+                                 St->getMemOperand());
 }
 
 // Lower vector extended loads using a shuffle. If SSSE3 is not available we
@@ -20694,13 +21485,13 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
   unsigned SizeRatio = RegSz / MemSz;
 
   if (Ext == ISD::SEXTLOAD) {
-    SDValue Sext = getExtendInVec(/*Signed*/true, dl, RegVT, SlicedVec, DAG);
+    SDValue Sext = getExtendInVec(ISD::SIGN_EXTEND, dl, RegVT, SlicedVec, DAG);
     return DAG.getMergeValues({Sext, TF}, dl);
   }
 
   if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 &&
       MemVT == MVT::v8i8) {
-    SDValue Sext = getExtendInVec(/*Signed*/false, dl, RegVT, SlicedVec, DAG);
+    SDValue Sext = getExtendInVec(ISD::ZERO_EXTEND, dl, RegVT, SlicedVec, DAG);
     return DAG.getMergeValues({Sext, TF}, dl);
   }
 
@@ -21240,42 +22031,41 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
   if (ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) {
     SmallVector<SDValue, 8> Elts;
     unsigned NumElts = SrcOp->getNumOperands();
-    ConstantSDNode *ND;
 
-    switch(Opc) {
+    switch (Opc) {
     default: llvm_unreachable("Unknown opcode!");
     case X86ISD::VSHLI:
-      for (unsigned i=0; i!=NumElts; ++i) {
+      for (unsigned i = 0; i != NumElts; ++i) {
         SDValue CurrentOp = SrcOp->getOperand(i);
         if (CurrentOp->isUndef()) {
           Elts.push_back(CurrentOp);
           continue;
         }
-        ND = cast<ConstantSDNode>(CurrentOp);
+        auto *ND = cast<ConstantSDNode>(CurrentOp);
         const APInt &C = ND->getAPIntValue();
         Elts.push_back(DAG.getConstant(C.shl(ShiftAmt), dl, ElementType));
       }
       break;
     case X86ISD::VSRLI:
-      for (unsigned i=0; i!=NumElts; ++i) {
+      for (unsigned i = 0; i != NumElts; ++i) {
         SDValue CurrentOp = SrcOp->getOperand(i);
         if (CurrentOp->isUndef()) {
           Elts.push_back(CurrentOp);
           continue;
         }
-        ND = cast<ConstantSDNode>(CurrentOp);
+        auto *ND = cast<ConstantSDNode>(CurrentOp);
         const APInt &C = ND->getAPIntValue();
         Elts.push_back(DAG.getConstant(C.lshr(ShiftAmt), dl, ElementType));
       }
       break;
     case X86ISD::VSRAI:
-      for (unsigned i=0; i!=NumElts; ++i) {
+      for (unsigned i = 0; i != NumElts; ++i) {
         SDValue CurrentOp = SrcOp->getOperand(i);
         if (CurrentOp->isUndef()) {
           Elts.push_back(CurrentOp);
           continue;
         }
-        ND = cast<ConstantSDNode>(CurrentOp);
+        auto *ND = cast<ConstantSDNode>(CurrentOp);
         const APInt &C = ND->getAPIntValue();
         Elts.push_back(DAG.getConstant(C.ashr(ShiftAmt), dl, ElementType));
       }
@@ -21443,7 +22233,7 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
                               DAG.getBitcast(MVT::v8i1, Mask),
                               DAG.getIntPtrConstant(0, dl));
   if (Op.getOpcode() == X86ISD::FSETCCM ||
-      Op.getOpcode() == X86ISD::FSETCCM_RND ||
+      Op.getOpcode() == X86ISD::FSETCCM_SAE ||
       Op.getOpcode() == X86ISD::VFPCLASSS)
     return DAG.getNode(ISD::AND, dl, VT, Op, IMask);
 
@@ -21517,11 +22307,31 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                    SelectionDAG &DAG) const {
   // Helper to detect if the operand is CUR_DIRECTION rounding mode.
   auto isRoundModeCurDirection = [](SDValue Rnd) {
-    if (!isa<ConstantSDNode>(Rnd))
-      return false;
+    if (auto *C = dyn_cast<ConstantSDNode>(Rnd))
+      return C->getZExtValue() == X86::STATIC_ROUNDING::CUR_DIRECTION;
 
-    unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue();
-    return Round == X86::STATIC_ROUNDING::CUR_DIRECTION;
+    return false;
+  };
+  auto isRoundModeSAE = [](SDValue Rnd) {
+    if (auto *C = dyn_cast<ConstantSDNode>(Rnd))
+      return C->getZExtValue() == X86::STATIC_ROUNDING::NO_EXC;
+
+    return false;
+  };
+  auto isRoundModeSAEToX = [](SDValue Rnd, unsigned &RC) {
+    if (auto *C = dyn_cast<ConstantSDNode>(Rnd)) {
+      RC = C->getZExtValue();
+      if (RC & X86::STATIC_ROUNDING::NO_EXC) {
+        // Clear the NO_EXC bit and check remaining bits.
+        RC ^= X86::STATIC_ROUNDING::NO_EXC;
+        return RC == X86::STATIC_ROUNDING::TO_NEAREST_INT ||
+               RC == X86::STATIC_ROUNDING::TO_NEG_INF ||
+               RC == X86::STATIC_ROUNDING::TO_POS_INF ||
+               RC == X86::STATIC_ROUNDING::TO_ZERO;
+      }
+    }
+
+    return false;
   };
 
   SDLoc dl(Op);
@@ -21537,13 +22347,29 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
       if (IntrWithRoundingModeOpcode != 0) {
         SDValue Rnd = Op.getOperand(2);
-        if (!isRoundModeCurDirection(Rnd)) {
+        unsigned RC = 0;
+        if (isRoundModeSAEToX(Rnd, RC))
           return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(),
-                             Op.getOperand(1), Rnd);
-        }
+                             Op.getOperand(1),
+                             DAG.getTargetConstant(RC, dl, MVT::i32));
+        if (!isRoundModeCurDirection(Rnd))
+          return SDValue();
       }
       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1));
     }
+    case INTR_TYPE_1OP_SAE: {
+      SDValue Sae = Op.getOperand(2);
+
+      unsigned Opc;
+      if (isRoundModeCurDirection(Sae))
+        Opc = IntrData->Opc0;
+      else if (isRoundModeSAE(Sae))
+        Opc = IntrData->Opc1;
+      else
+        return SDValue();
+
+      return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1));
+    }
     case INTR_TYPE_2OP: {
       SDValue Src2 = Op.getOperand(2);
 
@@ -21553,15 +22379,32 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
       if (IntrWithRoundingModeOpcode != 0) {
         SDValue Rnd = Op.getOperand(3);
-        if (!isRoundModeCurDirection(Rnd)) {
+        unsigned RC = 0;
+        if (isRoundModeSAEToX(Rnd, RC))
           return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(),
-                             Op.getOperand(1), Src2, Rnd);
-        }
+                             Op.getOperand(1), Src2,
+                             DAG.getTargetConstant(RC, dl, MVT::i32));
+        if (!isRoundModeCurDirection(Rnd))
+          return SDValue();
       }
 
       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
                          Op.getOperand(1), Src2);
     }
+    case INTR_TYPE_2OP_SAE: {
+      SDValue Sae = Op.getOperand(3);
+
+      unsigned Opc;
+      if (isRoundModeCurDirection(Sae))
+        Opc = IntrData->Opc0;
+      else if (isRoundModeSAE(Sae))
+        Opc = IntrData->Opc1;
+      else
+        return SDValue();
+
+      return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1),
+                         Op.getOperand(2));
+    }
     case INTR_TYPE_3OP:
     case INTR_TYPE_3OP_IMM8: {
       SDValue Src1 = Op.getOperand(1);
@@ -21577,11 +22420,13 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
       if (IntrWithRoundingModeOpcode != 0) {
         SDValue Rnd = Op.getOperand(4);
-        if (!isRoundModeCurDirection(Rnd)) {
-          return DAG.getNode(IntrWithRoundingModeOpcode,
-                             dl, Op.getValueType(),
-                             Src1, Src2, Src3, Rnd);
-        }
+        unsigned RC = 0;
+        if (isRoundModeSAEToX(Rnd, RC))
+          return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(),
+                             Src1, Src2, Src3,
+                             DAG.getTargetConstant(RC, dl, MVT::i32));
+        if (!isRoundModeCurDirection(Rnd))
+          return SDValue();
       }
 
       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
@@ -21590,44 +22435,45 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     case INTR_TYPE_4OP:
       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
         Op.getOperand(2), Op.getOperand(3), Op.getOperand(4));
-    case INTR_TYPE_1OP_MASK_RM: {
-      SDValue Src = Op.getOperand(1);
-      SDValue PassThru = Op.getOperand(2);
-      SDValue Mask = Op.getOperand(3);
-      SDValue RoundingMode;
-      // We always add rounding mode to the Node.
-      // If the rounding mode is not specified, we add the
-      // "current direction" mode.
-      if (Op.getNumOperands() == 4)
-        RoundingMode =
-          DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
-      else
-        RoundingMode = Op.getOperand(4);
-      assert(IntrData->Opc1 == 0 && "Unexpected second opcode!");
-      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
-                                              RoundingMode),
-                                  Mask, PassThru, Subtarget, DAG);
-    }
     case INTR_TYPE_1OP_MASK: {
       SDValue Src = Op.getOperand(1);
       SDValue PassThru = Op.getOperand(2);
       SDValue Mask = Op.getOperand(3);
       // We add rounding mode to the Node when
-      //   - RM Opcode is specified and
-      //   - RM is not "current direction".
+      //   - RC Opcode is specified and
+      //   - RC is not "current direction".
       unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
       if (IntrWithRoundingModeOpcode != 0) {
         SDValue Rnd = Op.getOperand(4);
-        if (!isRoundModeCurDirection(Rnd)) {
-          return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
-                                      dl, Op.getValueType(),
-                                      Src, Rnd),
-                                      Mask, PassThru, Subtarget, DAG);
-        }
+        unsigned RC = 0;
+        if (isRoundModeSAEToX(Rnd, RC))
+          return getVectorMaskingNode(
+              DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(),
+                          Src, DAG.getTargetConstant(RC, dl, MVT::i32)),
+              Mask, PassThru, Subtarget, DAG);
+        if (!isRoundModeCurDirection(Rnd))
+          return SDValue();
       }
       return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
                                   Mask, PassThru, Subtarget, DAG);
     }
+    case INTR_TYPE_1OP_MASK_SAE: {
+      SDValue Src = Op.getOperand(1);
+      SDValue PassThru = Op.getOperand(2);
+      SDValue Mask = Op.getOperand(3);
+      SDValue Rnd = Op.getOperand(4);
+
+      unsigned Opc;
+      if (isRoundModeCurDirection(Rnd))
+        Opc = IntrData->Opc0;
+      else if (isRoundModeSAE(Rnd))
+        Opc = IntrData->Opc1;
+      else
+        return SDValue();
+
+      return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src),
+                                  Mask, PassThru, Subtarget, DAG);
+    }
     case INTR_TYPE_SCALAR_MASK: {
       SDValue Src1 = Op.getOperand(1);
       SDValue Src2 = Op.getOperand(2);
@@ -21641,10 +22487,14 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       if (Op.getNumOperands() == (5U + HasRounding)) {
         if (HasRounding) {
           SDValue Rnd = Op.getOperand(5);
+          unsigned RC = 0;
+          if (isRoundModeSAEToX(Rnd, RC))
+            return getScalarMaskingNode(
+                DAG.getNode(IntrWithRoundingModeOpcode, dl, VT, Src1, Src2,
+                            DAG.getTargetConstant(RC, dl, MVT::i32)),
+                Mask, passThru, Subtarget, DAG);
           if (!isRoundModeCurDirection(Rnd))
-            return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
-                                                    dl, VT, Src1, Src2, Rnd),
-                                        Mask, passThru, Subtarget, DAG);
+            return SDValue();
         }
         return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
                                                 Src2),
@@ -21654,123 +22504,138 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       assert(Op.getNumOperands() == (6U + HasRounding) &&
              "Unexpected intrinsic form");
       SDValue RoundingMode = Op.getOperand(5);
+      unsigned Opc = IntrData->Opc0;
       if (HasRounding) {
         SDValue Sae = Op.getOperand(6);
-        if (!isRoundModeCurDirection(Sae))
-          return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
-                                                  dl, VT, Src1, Src2,
-                                                  RoundingMode, Sae),
-                                      Mask, passThru, Subtarget, DAG);
+        if (isRoundModeSAE(Sae))
+          Opc = IntrWithRoundingModeOpcode;
+        else if (!isRoundModeCurDirection(Sae))
+          return SDValue();
       }
-      return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
+      return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1,
                                               Src2, RoundingMode),
                                   Mask, passThru, Subtarget, DAG);
     }
-    case INTR_TYPE_SCALAR_MASK_RM: {
+    case INTR_TYPE_SCALAR_MASK_RND: {
       SDValue Src1 = Op.getOperand(1);
       SDValue Src2 = Op.getOperand(2);
-      SDValue Src0 = Op.getOperand(3);
+      SDValue passThru = Op.getOperand(3);
       SDValue Mask = Op.getOperand(4);
-      // There are 2 kinds of intrinsics in this group:
-      // (1) With suppress-all-exceptions (sae) or rounding mode- 6 operands
-      // (2) With rounding mode and sae - 7 operands.
-      if (Op.getNumOperands() == 6) {
-        SDValue Sae  = Op.getOperand(5);
-        return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2,
-                                                Sae),
-                                    Mask, Src0, Subtarget, DAG);
-      }
-      assert(Op.getNumOperands() == 7 && "Unexpected intrinsic form");
-      SDValue RoundingMode  = Op.getOperand(5);
-      SDValue Sae  = Op.getOperand(6);
-      return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2,
-                                              RoundingMode, Sae),
-                                  Mask, Src0, Subtarget, DAG);
+      SDValue Rnd = Op.getOperand(5);
+
+      SDValue NewOp;
+      unsigned RC = 0;
+      if (isRoundModeCurDirection(Rnd))
+        NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2);
+      else if (isRoundModeSAEToX(Rnd, RC))
+        NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2,
+                            DAG.getTargetConstant(RC, dl, MVT::i32));
+      else
+        return SDValue();
+
+      return getScalarMaskingNode(NewOp, Mask, passThru, Subtarget, DAG);
+    }
+    case INTR_TYPE_SCALAR_MASK_SAE: {
+      SDValue Src1 = Op.getOperand(1);
+      SDValue Src2 = Op.getOperand(2);
+      SDValue passThru = Op.getOperand(3);
+      SDValue Mask = Op.getOperand(4);
+      SDValue Sae = Op.getOperand(5);
+      unsigned Opc;
+      if (isRoundModeCurDirection(Sae))
+        Opc = IntrData->Opc0;
+      else if (isRoundModeSAE(Sae))
+        Opc = IntrData->Opc1;
+      else
+        return SDValue();
+
+      return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2),
+                                  Mask, passThru, Subtarget, DAG);
     }
     case INTR_TYPE_2OP_MASK: {
       SDValue Src1 = Op.getOperand(1);
       SDValue Src2 = Op.getOperand(2);
       SDValue PassThru = Op.getOperand(3);
       SDValue Mask = Op.getOperand(4);
-
-      // We specify 2 possible opcodes for intrinsics with rounding modes.
-      // First, we check if the intrinsic may have non-default rounding mode,
-      // (IntrData->Opc1 != 0), then we check the rounding mode operand.
-      unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
-      if (IntrWithRoundingModeOpcode != 0) {
+      SDValue NewOp;
+      if (IntrData->Opc1 != 0) {
         SDValue Rnd = Op.getOperand(5);
-        if (!isRoundModeCurDirection(Rnd)) {
-          return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
-                                      dl, Op.getValueType(),
-                                      Src1, Src2, Rnd),
-                                      Mask, PassThru, Subtarget, DAG);
-        }
+        unsigned RC = 0;
+        if (isRoundModeSAEToX(Rnd, RC))
+          NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2,
+                              DAG.getTargetConstant(RC, dl, MVT::i32));
+        else if (!isRoundModeCurDirection(Rnd))
+          return SDValue();
       }
-      // TODO: Intrinsics should have fast-math-flags to propagate.
-      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,Src1,Src2),
-                                  Mask, PassThru, Subtarget, DAG);
+      if (!NewOp)
+        NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2);
+      return getVectorMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG);
     }
-    case INTR_TYPE_2OP_MASK_RM: {
+    case INTR_TYPE_2OP_MASK_SAE: {
       SDValue Src1 = Op.getOperand(1);
       SDValue Src2 = Op.getOperand(2);
       SDValue PassThru = Op.getOperand(3);
       SDValue Mask = Op.getOperand(4);
-      // We specify 2 possible modes for intrinsics, with/without rounding
-      // modes.
-      // First, we check if the intrinsic have rounding mode (6 operands),
-      // if not, we set rounding mode to "current".
-      SDValue Rnd;
-      if (Op.getNumOperands() == 6)
-        Rnd = Op.getOperand(5);
-      else
-        Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
-      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
-                                              Src1, Src2, Rnd),
+
+      unsigned Opc = IntrData->Opc0;
+      if (IntrData->Opc1 != 0) {
+        SDValue Sae = Op.getOperand(5);
+        if (isRoundModeSAE(Sae))
+          Opc = IntrData->Opc1;
+        else if (!isRoundModeCurDirection(Sae))
+          return SDValue();
+      }
+
+      return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2),
                                   Mask, PassThru, Subtarget, DAG);
     }
-    case INTR_TYPE_3OP_SCALAR_MASK: {
+    case INTR_TYPE_3OP_SCALAR_MASK_SAE: {
       SDValue Src1 = Op.getOperand(1);
       SDValue Src2 = Op.getOperand(2);
       SDValue Src3 = Op.getOperand(3);
       SDValue PassThru = Op.getOperand(4);
       SDValue Mask = Op.getOperand(5);
+      SDValue Sae = Op.getOperand(6);
+      unsigned Opc;
+      if (isRoundModeCurDirection(Sae))
+        Opc = IntrData->Opc0;
+      else if (isRoundModeSAE(Sae))
+        Opc = IntrData->Opc1;
+      else
+        return SDValue();
 
-      unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
-      if (IntrWithRoundingModeOpcode != 0) {
-        SDValue Rnd = Op.getOperand(6);
-        if (!isRoundModeCurDirection(Rnd))
-          return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
-                                                  dl, VT, Src1, Src2, Src3, Rnd),
-                                      Mask, PassThru, Subtarget, DAG);
-      }
-      return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
-                                              Src2, Src3),
+      return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2, Src3),
                                   Mask, PassThru, Subtarget, DAG);
     }
-    case INTR_TYPE_3OP_MASK: {
+    case INTR_TYPE_3OP_MASK_SAE: {
       SDValue Src1 = Op.getOperand(1);
       SDValue Src2 = Op.getOperand(2);
       SDValue Src3 = Op.getOperand(3);
       SDValue PassThru = Op.getOperand(4);
       SDValue Mask = Op.getOperand(5);
 
-      // We specify 2 possible opcodes for intrinsics with rounding modes.
-      // First, we check if the intrinsic may have non-default rounding mode,
-      // (IntrData->Opc1 != 0), then we check the rounding mode operand.
-      unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
-      if (IntrWithRoundingModeOpcode != 0) {
-        SDValue Rnd = Op.getOperand(6);
-        if (!isRoundModeCurDirection(Rnd)) {
-          return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
-                                      dl, Op.getValueType(),
-                                      Src1, Src2, Src3, Rnd),
-                                      Mask, PassThru, Subtarget, DAG);
-        }
+      unsigned Opc = IntrData->Opc0;
+      if (IntrData->Opc1 != 0) {
+        SDValue Sae = Op.getOperand(6);
+        if (isRoundModeSAE(Sae))
+          Opc = IntrData->Opc1;
+        else if (!isRoundModeCurDirection(Sae))
+          return SDValue();
       }
-      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
-                                              Src1, Src2, Src3),
+      return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2, Src3),
                                   Mask, PassThru, Subtarget, DAG);
     }
+    case BLENDV: {
+      SDValue Src1 = Op.getOperand(1);
+      SDValue Src2 = Op.getOperand(2);
+      SDValue Src3 = Op.getOperand(3);
+
+      EVT MaskVT = Src3.getValueType().changeVectorElementTypeToInteger();
+      Src3 = DAG.getBitcast(MaskVT, Src3);
+
+      // Reverse the operands to match VSELECT order.
+      return DAG.getNode(IntrData->Opc0, dl, VT, Src3, Src2, Src1);
+    }
     case VPERM_2OP : {
       SDValue Src1 = Op.getOperand(1);
       SDValue Src2 = Op.getOperand(2);
@@ -21783,35 +22648,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       // first.
       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
                          Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
-    case CVTPD2PS:
-      // ISD::FP_ROUND has a second argument that indicates if the truncation
-      // does not change the value. Set it to 0 since it can change.
-      return DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
-                         DAG.getIntPtrConstant(0, dl));
-    case CVTPD2PS_RND_MASK: {
-      SDValue Src = Op.getOperand(1);
-      SDValue PassThru = Op.getOperand(2);
-      SDValue Mask = Op.getOperand(3);
-      // We add rounding mode to the Node when
-      //   - RM Opcode is specified and
-      //   - RM is not "current direction".
-      unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
-      if (IntrWithRoundingModeOpcode != 0) {
-        SDValue Rnd = Op.getOperand(4);
-        if (!isRoundModeCurDirection(Rnd)) {
-          return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
-                                      dl, Op.getValueType(),
-                                      Src, Rnd),
-                                      Mask, PassThru, Subtarget, DAG);
-        }
-      }
-      assert(IntrData->Opc0 == ISD::FP_ROUND && "Unexpected opcode!");
-      // ISD::FP_ROUND has a second argument that indicates if the truncation
-      // does not change the value. Set it to 0 since it can change.
-      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
-                                              DAG.getIntPtrConstant(0, dl)),
-                                  Mask, PassThru, Subtarget, DAG);
-    }
     case FPCLASSS: {
       SDValue Src1 = Op.getOperand(1);
       SDValue Imm = Op.getOperand(2);
@@ -21829,24 +22665,22 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 
     case CMP_MASK_CC: {
       MVT MaskVT = Op.getSimpleValueType();
-      SDValue Cmp;
       SDValue CC = Op.getOperand(3);
       CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CC);
       // We specify 2 possible opcodes for intrinsics with rounding modes.
       // First, we check if the intrinsic may have non-default rounding mode,
       // (IntrData->Opc1 != 0), then we check the rounding mode operand.
       if (IntrData->Opc1 != 0) {
-        SDValue Rnd = Op.getOperand(4);
-        if (!isRoundModeCurDirection(Rnd))
-          Cmp = DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1),
-                            Op.getOperand(2), CC, Rnd);
+        SDValue Sae = Op.getOperand(4);
+        if (isRoundModeSAE(Sae))
+          return DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1),
+                             Op.getOperand(2), CC, Sae);
+        if (!isRoundModeCurDirection(Sae))
+          return SDValue();
       }
       //default rounding mode
-      if (!Cmp.getNode())
-        Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
+      return DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
                           Op.getOperand(2), CC);
-
-      return Cmp;
     }
     case CMP_MASK_SCALAR_CC: {
       SDValue Src1 = Op.getOperand(1);
@@ -21856,12 +22690,14 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 
       SDValue Cmp;
       if (IntrData->Opc1 != 0) {
-        SDValue Rnd = Op.getOperand(5);
-        if (!isRoundModeCurDirection(Rnd))
-          Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::v1i1, Src1, Src2, CC, Rnd);
+        SDValue Sae = Op.getOperand(5);
+        if (isRoundModeSAE(Sae))
+          Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::v1i1, Src1, Src2, CC, Sae);
+        else if (!isRoundModeCurDirection(Sae))
+          return SDValue();
       }
       //default rounding mode
-      if(!Cmp.getNode())
+      if (!Cmp.getNode())
         Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Src2, CC);
 
       SDValue CmpMask = getScalarMaskingNode(Cmp, Mask, SDValue(),
@@ -21921,9 +22757,11 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       if (isRoundModeCurDirection(Sae))
         FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS,
                            DAG.getConstant(CondVal, dl, MVT::i8));
-      else
-        FCmp = DAG.getNode(X86ISD::FSETCCM_RND, dl, MVT::v1i1, LHS, RHS,
+      else if (isRoundModeSAE(Sae))
+        FCmp = DAG.getNode(X86ISD::FSETCCM_SAE, dl, MVT::v1i1, LHS, RHS,
                            DAG.getConstant(CondVal, dl, MVT::i8), Sae);
+      else
+        return SDValue();
       // Need to fill with zeros to ensure the bitcast will produce zeroes
       // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
       SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
@@ -21940,41 +22778,42 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       SDValue Mask = Op.getOperand(3);
       SDValue DataToCompress = Op.getOperand(1);
       SDValue PassThru = Op.getOperand(2);
-      if (isAllOnesConstant(Mask)) // return data as is
+      if (ISD::isBuildVectorAllOnes(Mask.getNode())) // return data as is
         return Op.getOperand(1);
 
-      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
-                                              DataToCompress),
-                                  Mask, PassThru, Subtarget, DAG);
+      // Avoid false dependency.
+      if (PassThru.isUndef())
+        PassThru = DAG.getConstant(0, dl, VT);
+
+      return DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress, PassThru,
+                         Mask);
     }
-    case FIXUPIMMS:
-    case FIXUPIMMS_MASKZ:
     case FIXUPIMM:
-    case FIXUPIMM_MASKZ:{
+    case FIXUPIMM_MASKZ: {
       SDValue Src1 = Op.getOperand(1);
       SDValue Src2 = Op.getOperand(2);
       SDValue Src3 = Op.getOperand(3);
       SDValue Imm = Op.getOperand(4);
       SDValue Mask = Op.getOperand(5);
-      SDValue Passthru = (IntrData->Type == FIXUPIMM || IntrData->Type == FIXUPIMMS ) ?
-                                         Src1 : getZeroVector(VT, Subtarget, DAG, dl);
-      // We specify 2 possible modes for intrinsics, with/without rounding
-      // modes.
-      // First, we check if the intrinsic have rounding mode (7 operands),
-      // if not, we set rounding mode to "current".
-      SDValue Rnd;
-      if (Op.getNumOperands() == 7)
-        Rnd = Op.getOperand(6);
-      else
-        Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
-      if (IntrData->Type == FIXUPIMM || IntrData->Type == FIXUPIMM_MASKZ)
-        return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
-                                                Src1, Src2, Src3, Imm, Rnd),
-                                    Mask, Passthru, Subtarget, DAG);
-      else // Scalar - FIXUPIMMS, FIXUPIMMS_MASKZ
-        return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
-                                       Src1, Src2, Src3, Imm, Rnd),
-                                    Mask, Passthru, Subtarget, DAG);
+      SDValue Passthru = (IntrData->Type == FIXUPIMM)
+                             ? Src1
+                             : getZeroVector(VT, Subtarget, DAG, dl);
+
+      unsigned Opc = IntrData->Opc0;
+      if (IntrData->Opc1 != 0) {
+        SDValue Sae = Op.getOperand(6);
+        if (isRoundModeSAE(Sae))
+          Opc = IntrData->Opc1;
+        else if (!isRoundModeCurDirection(Sae))
+          return SDValue();
+      }
+
+      SDValue FixupImm = DAG.getNode(Opc, dl, VT, Src1, Src2, Src3, Imm);
+
+      if (Opc == X86ISD::VFIXUPIMM || Opc == X86ISD::VFIXUPIMM_SAE)
+        return getVectorMaskingNode(FixupImm, Mask, Passthru, Subtarget, DAG);
+
+      return getScalarMaskingNode(FixupImm, Mask, Passthru, Subtarget, DAG);
     }
     case ROUNDP: {
       assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode");
@@ -22018,7 +22857,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       return DAG.getMergeValues(Results, dl);
     }
     case CVTPD2PS_MASK:
-    case CVTPD2I_MASK:
+    case CVTPD2DQ_MASK:
+    case CVTQQ2PS_MASK:
     case TRUNCATE_TO_REG: {
       SDValue Src = Op.getOperand(1);
       SDValue PassThru = Op.getOperand(2);
@@ -22049,6 +22889,21 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                          PassThru, Mask);
 
     }
+    case CVTNEPS2BF16_MASK: {
+      SDValue Src = Op.getOperand(1);
+      SDValue PassThru = Op.getOperand(2);
+      SDValue Mask = Op.getOperand(3);
+
+      if (ISD::isBuildVectorAllOnes(Mask.getNode()))
+        return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src);
+
+      // Break false dependency.
+      if (PassThru.isUndef())
+        PassThru = DAG.getConstant(0, dl, PassThru.getValueType());
+
+      return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru,
+                         Mask);
+    }
     default:
       break;
     }
@@ -22279,10 +23134,37 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     unsigned Reg;
     if (RegInfo->hasBasePointer(MF))
       Reg = RegInfo->getBaseRegister();
-    else // This function handles the SP or FP case.
-      Reg = RegInfo->getPtrSizedFrameRegister(MF);
+    else { // Handles the SP or FP case.
+      bool CantUseFP = RegInfo->needsStackRealignment(MF);
+      if (CantUseFP)
+        Reg = RegInfo->getPtrSizedStackRegister(MF);
+      else
+        Reg = RegInfo->getPtrSizedFrameRegister(MF);
+    }
     return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
   }
+
+  case Intrinsic::x86_avx512_vp2intersect_q_512:
+  case Intrinsic::x86_avx512_vp2intersect_q_256:
+  case Intrinsic::x86_avx512_vp2intersect_q_128:
+  case Intrinsic::x86_avx512_vp2intersect_d_512:
+  case Intrinsic::x86_avx512_vp2intersect_d_256:
+  case Intrinsic::x86_avx512_vp2intersect_d_128: {
+    MVT MaskVT = Op.getSimpleValueType();
+
+    SDVTList VTs = DAG.getVTList(MVT::Untyped, MVT::Other);
+    SDLoc DL(Op);
+
+    SDValue Operation =
+        DAG.getNode(X86ISD::VP2INTERSECT, DL, VTs,
+                    Op->getOperand(1), Op->getOperand(2));
+
+    SDValue Result0 = DAG.getTargetExtractSubreg(X86::sub_mask_0, DL,
+                                                 MaskVT, Operation);
+    SDValue Result1 = DAG.getTargetExtractSubreg(X86::sub_mask_1, DL,
+                                                 MaskVT, Operation);
+    return DAG.getMergeValues({Result0, Result1}, DL);
+  }
   }
 }
 
@@ -22296,25 +23178,26 @@ static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
   if (!C)
     return SDValue();
   SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
-  EVT MaskVT = Mask.getValueType();
+  EVT MaskVT = Mask.getValueType().changeVectorElementTypeToInteger();
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
-  SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
-  SDValue Segment = DAG.getRegister(0, MVT::i32);
   // If source is undef or we know it won't be used, use a zero vector
   // to break register dependency.
   // TODO: use undef instead and let BreakFalseDeps deal with it?
   if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode()))
     Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
-  SDValue Ops[] = {Src, Base, Scale, Index, Disp, Segment, Mask, Chain};
-  SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
-  SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
-  return DAG.getMergeValues(RetOps, dl);
+
+  MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
+
+  SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale };
+  SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
+    VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand());
+  return DAG.getMergeValues({ Res, Res.getValue(2) }, dl);
 }
 
-static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
-                              SDValue Src, SDValue Mask, SDValue Base,
-                              SDValue Index, SDValue ScaleOp, SDValue Chain,
-                              const X86Subtarget &Subtarget) {
+static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG,
+                             SDValue Src, SDValue Mask, SDValue Base,
+                             SDValue Index, SDValue ScaleOp, SDValue Chain,
+                             const X86Subtarget &Subtarget) {
   MVT VT = Op.getSimpleValueType();
   SDLoc dl(Op);
   auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
@@ -22332,17 +23215,18 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
     Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
 
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
-  SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
-  SDValue Segment = DAG.getRegister(0, MVT::i32);
   // If source is undef or we know it won't be used, use a zero vector
   // to break register dependency.
   // TODO: use undef instead and let BreakFalseDeps deal with it?
   if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode()))
     Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
-  SDValue Ops[] = {Src, Mask, Base, Scale, Index, Disp, Segment, Chain};
-  SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
-  SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
-  return DAG.getMergeValues(RetOps, dl);
+
+  MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
+
+  SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale };
+  SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
+    VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand());
+  return DAG.getMergeValues({ Res, Res.getValue(2) }, dl);
 }
 
 static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
@@ -22355,8 +23239,6 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
   if (!C)
     return SDValue();
   SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
-  SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
-  SDValue Segment = DAG.getRegister(0, MVT::i32);
   unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(),
                               Src.getSimpleValueType().getVectorNumElements());
   MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts);
@@ -22366,10 +23248,13 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
   if (Mask.getValueType() != MaskVT)
     Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
 
+  MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
+
   SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
-  SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Src, Chain};
-  SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
-  return SDValue(Res, 1);
+  SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale};
+  SDValue Res = DAG.getTargetMemSDNode<X86MaskedScatterSDNode>(
+      VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand());
+  return Res.getValue(1);
 }
 
 static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
@@ -22392,24 +23277,37 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
   return SDValue(Res, 0);
 }
 
-/// Handles the lowering of builtin intrinsic that return the value
-/// of the extended control register.
-static void getExtendedControlRegister(SDNode *N, const SDLoc &DL,
-                                       SelectionDAG &DAG,
-                                       const X86Subtarget &Subtarget,
-                                       SmallVectorImpl<SDValue> &Results) {
-  assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
-  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
-  SDValue LO, HI;
+/// Handles the lowering of builtin intrinsics with chain that return their
+/// value into registers EDX:EAX.
+/// If operand ScrReg is a valid register identifier, then operand 2 of N is
+/// copied to SrcReg. The assumption is that SrcReg is an implicit input to
+/// TargetOpcode.
+/// Returns a Glue value which can be used to add extra copy-from-reg if the
+/// expanded intrinsics implicitly defines extra registers (i.e. not just
+/// EDX:EAX).
+static SDValue expandIntrinsicWChainHelper(SDNode *N, const SDLoc &DL,
+                                        SelectionDAG &DAG,
+                                        unsigned TargetOpcode,
+                                        unsigned SrcReg,
+                                        const X86Subtarget &Subtarget,
+                                        SmallVectorImpl<SDValue> &Results) {
+  SDValue Chain = N->getOperand(0);
+  SDValue Glue;
 
-  // The ECX register is used to select the index of the XCR register to
-  // return.
-  SDValue Chain =
-      DAG.getCopyToReg(N->getOperand(0), DL, X86::ECX, N->getOperand(2));
-  SDNode *N1 = DAG.getMachineNode(X86::XGETBV, DL, Tys, Chain);
+  if (SrcReg) {
+    assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+    Chain = DAG.getCopyToReg(Chain, DL, SrcReg, N->getOperand(2), Glue);
+    Glue = Chain.getValue(1);
+  }
+
+  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDValue N1Ops[] = {Chain, Glue};
+  SDNode *N1 = DAG.getMachineNode(
+      TargetOpcode, DL, Tys, ArrayRef<SDValue>(N1Ops, Glue.getNode() ? 2 : 1));
   Chain = SDValue(N1, 0);
 
   // Reads the content of XCR and returns it in registers EDX:EAX.
+  SDValue LO, HI;
   if (Subtarget.is64Bit()) {
     LO = DAG.getCopyFromReg(Chain, DL, X86::RAX, MVT::i64, SDValue(N1, 1));
     HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
@@ -22420,60 +23318,15 @@ static void getExtendedControlRegister(SDNode *N, const SDLoc &DL,
                             LO.getValue(2));
   }
   Chain = HI.getValue(1);
+  Glue = HI.getValue(2);
 
   if (Subtarget.is64Bit()) {
-    // Merge the two 32-bit values into a 64-bit one..
-    SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
-                              DAG.getConstant(32, DL, MVT::i8));
-    Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
-    Results.push_back(Chain);
-    return;
-  }
-
-  // Use a buildpair to merge the two 32-bit values into a 64-bit one.
-  SDValue Ops[] = { LO, HI };
-  SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
-  Results.push_back(Pair);
-  Results.push_back(Chain);
-}
-
-/// Handles the lowering of builtin intrinsics that read performance monitor
-/// counters (x86_rdpmc).
-static void getReadPerformanceCounter(SDNode *N, const SDLoc &DL,
-                                      SelectionDAG &DAG,
-                                      const X86Subtarget &Subtarget,
-                                      SmallVectorImpl<SDValue> &Results) {
-  assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
-  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
-  SDValue LO, HI;
-
-  // The ECX register is used to select the index of the performance counter
-  // to read.
-  SDValue Chain = DAG.getCopyToReg(N->getOperand(0), DL, X86::ECX,
-                                   N->getOperand(2));
-  SDValue rd = DAG.getNode(X86ISD::RDPMC_DAG, DL, Tys, Chain);
-
-  // Reads the content of a 64-bit performance counter and returns it in the
-  // registers EDX:EAX.
-  if (Subtarget.is64Bit()) {
-    LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
-    HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
-                            LO.getValue(2));
-  } else {
-    LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1));
-    HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
-                            LO.getValue(2));
-  }
-  Chain = HI.getValue(1);
-
-  if (Subtarget.is64Bit()) {
-    // The EAX register is loaded with the low-order 32 bits. The EDX register
-    // is loaded with the supported high-order bits of the counter.
+    // Merge the two 32-bit values into a 64-bit one.
     SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
                               DAG.getConstant(32, DL, MVT::i8));
     Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
     Results.push_back(Chain);
-    return;
+    return Glue;
   }
 
   // Use a buildpair to merge the two 32-bit values into a 64-bit one.
@@ -22481,6 +23334,7 @@ static void getReadPerformanceCounter(SDNode *N, const SDLoc &DL,
   SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
   Results.push_back(Pair);
   Results.push_back(Chain);
+  return Glue;
 }
 
 /// Handles the lowering of builtin intrinsics that read the time stamp counter
@@ -22490,59 +23344,28 @@ static void getReadTimeStampCounter(SDNode *N, const SDLoc &DL, unsigned Opcode,
                                     SelectionDAG &DAG,
                                     const X86Subtarget &Subtarget,
                                     SmallVectorImpl<SDValue> &Results) {
-  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
-  SDValue rd = DAG.getNode(Opcode, DL, Tys, N->getOperand(0));
-  SDValue LO, HI;
-
   // The processor's time-stamp counter (a 64-bit MSR) is stored into the
   // EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR
   // and the EAX register is loaded with the low-order 32 bits.
-  if (Subtarget.is64Bit()) {
-    LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
-    HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
-                            LO.getValue(2));
-  } else {
-    LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1));
-    HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
-                            LO.getValue(2));
-  }
-  SDValue Chain = HI.getValue(1);
-
-  SDValue TSC;
-  if (Subtarget.is64Bit()) {
-    // The EDX register is loaded with the high-order 32 bits of the MSR, and
-    // the EAX register is loaded with the low-order 32 bits.
-    TSC = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
-                      DAG.getConstant(32, DL, MVT::i8));
-    TSC = DAG.getNode(ISD::OR, DL, MVT::i64, LO, TSC);
-  } else {
-    // Use a buildpair to merge the two 32-bit values into a 64-bit one.
-    TSC = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, { LO, HI });
-  }
-
-  if (Opcode == X86ISD::RDTSCP_DAG) {
-    assert(N->getNumOperands() == 2 && "Unexpected number of operands!");
-
-    // Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into
-    // the ECX register. Add 'ecx' explicitly to the chain.
-    SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32,
-                                     HI.getValue(2));
-
-    Results.push_back(TSC);
-    Results.push_back(ecx);
-    Results.push_back(ecx.getValue(1));
+  SDValue Glue = expandIntrinsicWChainHelper(N, DL, DAG, Opcode,
+                                             /* NoRegister */0, Subtarget,
+                                             Results);
+  if (Opcode != X86::RDTSCP)
     return;
-  }
 
-  Results.push_back(TSC);
-  Results.push_back(Chain);
+  SDValue Chain = Results[1];
+  // Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into
+  // the ECX register. Add 'ecx' explicitly to the chain.
+  SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32, Glue);
+  Results[1] = ecx;
+  Results.push_back(ecx.getValue(1));
 }
 
 static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget,
                                      SelectionDAG &DAG) {
   SmallVector<SDValue, 3> Results;
   SDLoc DL(Op);
-  getReadTimeStampCounter(Op.getNode(), DL, X86ISD::RDTSC_DAG, DAG, Subtarget,
+  getReadTimeStampCounter(Op.getNode(), DL, X86::RDTSC, DAG, Subtarget,
                           Results);
   return DAG.getMergeValues(Results, DL);
 }
@@ -22621,6 +23444,22 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
       return MarkEHRegistrationNode(Op, DAG);
     case llvm::Intrinsic::x86_seh_ehguard:
       return MarkEHGuard(Op, DAG);
+    case llvm::Intrinsic::x86_rdpkru: {
+      SDLoc dl(Op);
+      SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
+      // Create a RDPKRU node and pass 0 to the ECX parameter.
+      return DAG.getNode(X86ISD::RDPKRU, dl, VTs, Op.getOperand(0),
+                         DAG.getConstant(0, dl, MVT::i32));
+    }
+    case llvm::Intrinsic::x86_wrpkru: {
+      SDLoc dl(Op);
+      // Create a WRPKRU node, pass the input to the EAX parameter,  and pass 0
+      // to the EDX and ECX parameters.
+      return DAG.getNode(X86ISD::WRPKRU, dl, MVT::Other,
+                         Op.getOperand(0), Op.getOperand(2),
+                         DAG.getConstant(0, dl, MVT::i32),
+                         DAG.getConstant(0, dl, MVT::i32));
+    }
     case llvm::Intrinsic::x86_flags_read_u32:
     case llvm::Intrinsic::x86_flags_read_u64:
     case llvm::Intrinsic::x86_flags_write_u32:
@@ -22630,7 +23469,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
       MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
       MFI.setHasCopyImplyingStackAdjustment(true);
       // Don't do anything here, we will expand these intrinsics out later
-      // during ExpandISelPseudos in EmitInstrWithCustomInserter.
+      // during FinalizeISel in EmitInstrWithCustomInserter.
       return SDValue();
     }
     case Intrinsic::x86_lwpins32:
@@ -22660,8 +23499,28 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
           DAG.getNode(Opcode, dl, VTs, Chain, Op->getOperand(2),
                       Op->getOperand(3), Op->getOperand(4));
       SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG);
-      SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, SetCC);
-      return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result,
+      return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,
+                         Operation.getValue(1));
+    }
+    case Intrinsic::x86_enqcmd:
+    case Intrinsic::x86_enqcmds: {
+      SDLoc dl(Op);
+      SDValue Chain = Op.getOperand(0);
+      SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
+      unsigned Opcode;
+      switch (IntNo) {
+      default: llvm_unreachable("Impossible intrinsic!");
+      case Intrinsic::x86_enqcmd:
+        Opcode = X86ISD::ENQCMD;
+        break;
+      case Intrinsic::x86_enqcmds:
+        Opcode = X86ISD::ENQCMDS;
+        break;
+      }
+      SDValue Operation = DAG.getNode(Opcode, dl, VTs, Chain, Op.getOperand(2),
+                                      Op.getOperand(3));
+      SDValue SetCC = getSETCC(X86::COND_E, Operation.getValue(0), dl, DAG);
+      return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,
                          Operation.getValue(1));
     }
     }
@@ -22707,7 +23566,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
     SDValue Index = Op.getOperand(4);
     SDValue Mask  = Op.getOperand(5);
     SDValue Scale = Op.getOperand(6);
-    return getGatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale,
+    return getGatherNode(Op, DAG, Src, Mask, Base, Index, Scale,
                          Chain, Subtarget);
   }
   case SCATTER: {
@@ -22743,15 +23602,16 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
     return DAG.getMergeValues(Results, dl);
   }
   // Read Performance Monitoring Counters.
-  case RDPMC: {
-    SmallVector<SDValue, 2> Results;
-    getReadPerformanceCounter(Op.getNode(), dl, DAG, Subtarget, Results);
-    return DAG.getMergeValues(Results, dl);
-  }
-  // Get Extended Control Register.
+  case RDPMC:
+  // GetExtended Control Register.
   case XGETBV: {
     SmallVector<SDValue, 2> Results;
-    getExtendedControlRegister(Op.getNode(), dl, DAG, Subtarget, Results);
+
+    // RDPMC uses ECX to select the index of the performance counter to read.
+    // XGETBV uses ECX to select the index of the XCR register to return.
+    // The result is stored into registers EDX:EAX.
+    expandIntrinsicWChainHelper(Op.getNode(), dl, DAG, IntrData->Opc0, X86::ECX,
+                                Subtarget, Results);
     return DAG.getMergeValues(Results, dl);
   }
   // XTEST intrinsics.
@@ -22861,7 +23721,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
       // Set up a frame object for the return address.
       unsigned SlotSize = RegInfo->getSlotSize();
       FrameAddrIndex = MF.getFrameInfo().CreateFixedObject(
-          SlotSize, /*Offset=*/0, /*IsImmutable=*/false);
+          SlotSize, /*SPOffset=*/0, /*IsImmutable=*/false);
       FuncInfo->setFAIndex(FrameAddrIndex);
     }
     return DAG.getFrameIndex(FrameAddrIndex, VT);
@@ -23444,10 +24304,6 @@ static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget,
   SDValue N0 = Op.getOperand(0);
   SDLoc dl(Op);
 
-  // Decompose 256-bit ops into smaller 128-bit ops.
-  if (VT.is256BitVector() && !Subtarget.hasInt256())
-    return Lower256IntUnary(Op, DAG);
-
   assert(!VT.isVector() && Op.getOpcode() == ISD::CTTZ &&
          "Only scalar CTTZ requires custom lowering");
 
@@ -23539,22 +24395,48 @@ static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG,
   return split256IntArith(Op, DAG);
 }
 
-static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
+                                  const X86Subtarget &Subtarget) {
   MVT VT = Op.getSimpleValueType();
+  SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
+  unsigned Opcode = Op.getOpcode();
   if (VT.getScalarType() == MVT::i1) {
     SDLoc dl(Op);
-    switch (Op.getOpcode()) {
+    switch (Opcode) {
     default: llvm_unreachable("Expected saturated arithmetic opcode");
     case ISD::UADDSAT:
     case ISD::SADDSAT:
-      return DAG.getNode(ISD::OR, dl, VT, Op.getOperand(0), Op.getOperand(1));
+      // *addsat i1 X, Y --> X | Y
+      return DAG.getNode(ISD::OR, dl, VT, X, Y);
     case ISD::USUBSAT:
     case ISD::SSUBSAT:
-      return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),
-                         DAG.getNOT(dl, Op.getOperand(1), VT));
+      // *subsat i1 X, Y --> X & ~Y
+      return DAG.getNode(ISD::AND, dl, VT, X, DAG.getNOT(dl, Y, VT));
     }
   }
 
+  if (VT.is128BitVector()) {
+    // Avoid the generic expansion with min/max if we don't have pminu*/pmaxu*.
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+    EVT SetCCResultType = TLI.getSetCCResultType(DAG.getDataLayout(),
+                                                 *DAG.getContext(), VT);
+    SDLoc DL(Op);
+    if (Opcode == ISD::UADDSAT && !TLI.isOperationLegal(ISD::UMIN, VT)) {
+      // uaddsat X, Y --> (X >u (X + Y)) ? -1 : X + Y
+      SDValue Add = DAG.getNode(ISD::ADD, DL, VT, X, Y);
+      SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Add, ISD::SETUGT);
+      return DAG.getSelect(DL, VT, Cmp, DAG.getAllOnesConstant(DL, VT), Add);
+    }
+    if (Opcode == ISD::USUBSAT && !TLI.isOperationLegal(ISD::UMAX, VT)) {
+      // usubsat X, Y --> (X >u Y) ? X - Y : 0
+      SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y);
+      SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Y, ISD::SETUGT);
+      return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT));
+    }
+    // Use default expansion.
+    return SDValue();
+  }
+
   assert(Op.getSimpleValueType().is256BitVector() &&
          Op.getSimpleValueType().isInteger() &&
          "Only handle AVX 256-bit vector integer operation");
@@ -23886,9 +24768,6 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
 
   // Signed AVX2 implementation - extend xmm subvectors to ymm.
   if (VT == MVT::v32i8 && IsSigned) {
-    SDValue Lo = DAG.getIntPtrConstant(0, dl);
-    SDValue Hi = DAG.getIntPtrConstant(NumElts / 2, dl);
-
     MVT ExVT = MVT::v16i16;
     SDValue ALo = extract128BitVector(A, 0, DAG, dl);
     SDValue BLo = extract128BitVector(B, 0, DAG, dl);
@@ -23898,8 +24777,8 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
     BLo = DAG.getNode(ExAVX, dl, ExVT, BLo);
     AHi = DAG.getNode(ExAVX, dl, ExVT, AHi);
     BHi = DAG.getNode(ExAVX, dl, ExVT, BHi);
-    Lo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
-    Hi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
+    SDValue Lo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
+    SDValue Hi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
     Lo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Lo, 8, DAG);
     Hi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Hi, 8, DAG);
 
@@ -24156,6 +25035,11 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
   APInt APIntShiftAmt;
   if (!isConstantSplat(Amt, APIntShiftAmt))
     return SDValue();
+
+  // If the shift amount is out of range, return undef.
+  if (APIntShiftAmt.uge(VT.getScalarSizeInBits()))
+    return DAG.getUNDEF(VT);
+
   uint64_t ShiftAmt = APIntShiftAmt.getZExtValue();
 
   if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
@@ -24197,8 +25081,8 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
                                                ShiftAmt, DAG);
       SHL = DAG.getBitcast(VT, SHL);
       // Zero out the rightmost bits.
-      return DAG.getNode(ISD::AND, dl, VT, SHL,
-                         DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, VT));
+      APInt Mask = APInt::getHighBitsSet(8, 8 - ShiftAmt);
+      return DAG.getNode(ISD::AND, dl, VT, SHL, DAG.getConstant(Mask, dl, VT));
     }
     if (Op.getOpcode() == ISD::SRL) {
       // Make a large shift.
@@ -24224,54 +25108,6 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
   return SDValue();
 }
 
-// If V is a splat value, return the source vector and splat index;
-static SDValue IsSplatVector(SDValue V, int &SplatIdx, SelectionDAG &DAG) {
-  V = peekThroughEXTRACT_SUBVECTORs(V);
-
-  EVT VT = V.getValueType();
-  unsigned Opcode = V.getOpcode();
-  switch (Opcode) {
-  default: {
-    APInt UndefElts;
-    APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
-    if (DAG.isSplatValue(V, DemandedElts, UndefElts)) {
-      // Handle case where all demanded elements are UNDEF.
-      if (DemandedElts.isSubsetOf(UndefElts)) {
-        SplatIdx = 0;
-        return DAG.getUNDEF(VT);
-      }
-      SplatIdx = (UndefElts & DemandedElts).countTrailingOnes();
-      return V;
-    }
-    break;
-  }
-  case ISD::VECTOR_SHUFFLE: {
-    // Check if this is a shuffle node doing a splat.
-    // TODO - remove this and rely purely on SelectionDAG::isSplatValue,
-    // getTargetVShiftNode currently struggles without the splat source.
-    auto *SVN = cast<ShuffleVectorSDNode>(V);
-    if (!SVN->isSplat())
-      break;
-    int Idx = SVN->getSplatIndex();
-    int NumElts = V.getValueType().getVectorNumElements();
-    SplatIdx = Idx % NumElts;
-    return V.getOperand(Idx / NumElts);
-  }
-  }
-
-  return SDValue();
-}
-
-static SDValue GetSplatValue(SDValue V, const SDLoc &dl,
-                             SelectionDAG &DAG) {
-  int SplatIdx;
-  if (SDValue SrcVector = IsSplatVector(V, SplatIdx, DAG))
-    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                       SrcVector.getValueType().getScalarType(), SrcVector,
-                       DAG.getIntPtrConstant(SplatIdx, dl));
-  return SDValue();
-}
-
 static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
                                         const X86Subtarget &Subtarget) {
   MVT VT = Op.getSimpleValueType();
@@ -24282,7 +25118,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
   unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false);
   unsigned X86OpcV = getTargetVShiftUniformOpcode(Opcode, true);
 
-  if (SDValue BaseShAmt = GetSplatValue(Amt, dl, DAG)) {
+  if (SDValue BaseShAmt = DAG.getSplatValue(Amt)) {
     if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
       MVT EltVT = VT.getVectorElementType();
       assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");
@@ -25102,24 +25938,45 @@ bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const {
   unsigned OpWidth = MemType->getPrimitiveSizeInBits();
 
   if (OpWidth == 64)
-    return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b
-  else if (OpWidth == 128)
+    return Subtarget.hasCmpxchg8b() && !Subtarget.is64Bit();
+  if (OpWidth == 128)
     return Subtarget.hasCmpxchg16b();
-  else
-    return false;
+
+  return false;
 }
 
+// TODO: In 32-bit mode, use MOVLPS when SSE1 is available?
+// TODO: In 32-bit mode, use FISTP when X87 is available?
 bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
-  return needsCmpXchgNb(SI->getValueOperand()->getType());
+  Type *MemType = SI->getValueOperand()->getType();
+
+  bool NoImplicitFloatOps =
+      SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
+  if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
+      !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2())
+    return false;
+
+  return needsCmpXchgNb(MemType);
 }
 
 // Note: this turns large loads into lock cmpxchg8b/16b.
-// FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b.
+// TODO: In 32-bit mode, use MOVLPS when SSE1 is available?
 TargetLowering::AtomicExpansionKind
 X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
-  auto PTy = cast<PointerType>(LI->getPointerOperandType());
-  return needsCmpXchgNb(PTy->getElementType()) ? AtomicExpansionKind::CmpXChg
-                                               : AtomicExpansionKind::None;
+  Type *MemType = LI->getType();
+
+  // If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we
+  // can use movq to do the load. If we have X87 we can load into an 80-bit
+  // X87 register and store it to a stack temporary.
+  bool NoImplicitFloatOps =
+      LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
+  if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
+      !Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
+      (Subtarget.hasSSE2() || Subtarget.hasX87()))
+    return AtomicExpansionKind::None;
+
+  return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
+                                 : AtomicExpansionKind::None;
 }
 
 TargetLowering::AtomicExpansionKind
@@ -25155,6 +26012,8 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   case AtomicRMWInst::Min:
   case AtomicRMWInst::UMax:
   case AtomicRMWInst::UMin:
+  case AtomicRMWInst::FAdd:
+  case AtomicRMWInst::FSub:
     // These always require a non-trivial set of data operations on x86. We must
     // use a cmpxchg loop.
     return AtomicExpansionKind::CmpXChg;
@@ -25171,13 +26030,20 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
   if (MemType->getPrimitiveSizeInBits() > NativeWidth)
     return nullptr;
 
+  // If this is a canonical idempotent atomicrmw w/no uses, we have a better
+  // lowering available in lowerAtomicArith.
+  // TODO: push more cases through this path. 
+  if (auto *C = dyn_cast<ConstantInt>(AI->getValOperand()))
+    if (AI->getOperation() == AtomicRMWInst::Or && C->isZero() &&
+        AI->use_empty())
+      return nullptr;
+
   auto Builder = IRBuilder<>(AI);
   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
   auto SSID = AI->getSyncScopeID();
   // We must restrict the ordering to avoid generating loads with Release or
   // ReleaseAcquire orderings.
   auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering());
-  auto Ptr = AI->getPointerOperand();
 
   // Before the load we need a fence. Here is an example lifted from
   // http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf showing why a fence
@@ -25212,14 +26078,80 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
   Builder.CreateCall(MFence, {});
 
   // Finally we can emit the atomic load.
-  LoadInst *Loaded = Builder.CreateAlignedLoad(Ptr,
-          AI->getType()->getPrimitiveSizeInBits());
+  LoadInst *Loaded =
+      Builder.CreateAlignedLoad(AI->getType(), AI->getPointerOperand(),
+                                AI->getType()->getPrimitiveSizeInBits());
   Loaded->setAtomic(Order, SSID);
   AI->replaceAllUsesWith(Loaded);
   AI->eraseFromParent();
   return Loaded;
 }
 
+/// Emit a locked operation on a stack location which does not change any
+/// memory location, but does involve a lock prefix.  Location is chosen to be
+/// a) very likely accessed only by a single thread to minimize cache traffic,
+/// and b) definitely dereferenceable.  Returns the new Chain result.  
+static SDValue emitLockedStackOp(SelectionDAG &DAG,
+                                 const X86Subtarget &Subtarget,
+                                 SDValue Chain, SDLoc DL) {
+  // Implementation notes:
+  // 1) LOCK prefix creates a full read/write reordering barrier for memory
+  // operations issued by the current processor.  As such, the location
+  // referenced is not relevant for the ordering properties of the instruction.
+  // See: Intel® 64 and IA-32 ArchitecturesSoftware Developer’s Manual,
+  // 8.2.3.9  Loads and Stores Are Not Reordered with Locked Instructions 
+  // 2) Using an immediate operand appears to be the best encoding choice
+  // here since it doesn't require an extra register.
+  // 3) OR appears to be very slightly faster than ADD. (Though, the difference
+  // is small enough it might just be measurement noise.)
+  // 4) When choosing offsets, there are several contributing factors:
+  //   a) If there's no redzone, we default to TOS.  (We could allocate a cache
+  //      line aligned stack object to improve this case.) 
+  //   b) To minimize our chances of introducing a false dependence, we prefer
+  //      to offset the stack usage from TOS slightly.  
+  //   c) To minimize concerns about cross thread stack usage - in particular,
+  //      the idiomatic MyThreadPool.run([&StackVars]() {...}) pattern which
+  //      captures state in the TOS frame and accesses it from many threads -
+  //      we want to use an offset such that the offset is in a distinct cache
+  //      line from the TOS frame.
+  // 
+  // For a general discussion of the tradeoffs and benchmark results, see:
+  // https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
+
+  auto &MF = DAG.getMachineFunction();
+  auto &TFL = *Subtarget.getFrameLowering();
+  const unsigned SPOffset = TFL.has128ByteRedZone(MF) ? -64 : 0;
+
+  if (Subtarget.is64Bit()) {
+    SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
+    SDValue Ops[] = {
+      DAG.getRegister(X86::RSP, MVT::i64),                  // Base
+      DAG.getTargetConstant(1, DL, MVT::i8),                // Scale
+      DAG.getRegister(0, MVT::i64),                         // Index
+      DAG.getTargetConstant(SPOffset, DL, MVT::i32),        // Disp
+      DAG.getRegister(0, MVT::i16),                         // Segment.
+      Zero,
+      Chain};
+    SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32,
+                                     MVT::Other, Ops);
+    return SDValue(Res, 1);
+  }
+
+  SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
+  SDValue Ops[] = {
+    DAG.getRegister(X86::ESP, MVT::i32),            // Base
+    DAG.getTargetConstant(1, DL, MVT::i8),          // Scale
+    DAG.getRegister(0, MVT::i32),                   // Index
+    DAG.getTargetConstant(SPOffset, DL, MVT::i32),  // Disp
+    DAG.getRegister(0, MVT::i16),                   // Segment.
+    Zero,
+    Chain
+  };
+  SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32,
+                                   MVT::Other, Ops);
+  return SDValue(Res, 1);
+}
+
 static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
                                  SelectionDAG &DAG) {
   SDLoc dl(Op);
@@ -25235,19 +26167,8 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
     if (Subtarget.hasMFence())
       return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
 
-    SDValue Chain = Op.getOperand(0);
-    SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
-    SDValue Ops[] = {
-      DAG.getRegister(X86::ESP, MVT::i32),     // Base
-      DAG.getTargetConstant(1, dl, MVT::i8),   // Scale
-      DAG.getRegister(0, MVT::i32),            // Index
-      DAG.getTargetConstant(0, dl, MVT::i32),  // Disp
-      DAG.getRegister(0, MVT::i32),            // Segment.
-      Zero,
-      Chain
-    };
-    SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, dl, MVT::Other, Ops);
-    return SDValue(Res, 0);
+    SDValue Chain = Op.getOperand(0); 
+    return emitLockedStackOp(DAG, Subtarget, Chain, dl);
   }
 
   // MEMBARRIER is a compiler barrier; it codegens to a no-op.
@@ -25288,10 +26209,8 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget &Subtarget,
                                       MVT::i32, cpOut.getValue(2));
   SDValue Success = getSETCC(X86::COND_E, EFLAGS, DL, DAG);
 
-  DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), cpOut);
-  DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
-  DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), EFLAGS.getValue(1));
-  return SDValue();
+  return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(),
+                     cpOut, Success, EFLAGS.getValue(1));
 }
 
 // Create MOVMSKB, taking into account whether we need to split for AVX1.
@@ -25703,6 +26622,7 @@ static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG,
 /// Lower atomic_load_ops into LOCK-prefixed operations.
 static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
                                 const X86Subtarget &Subtarget) {
+  AtomicSDNode *AN = cast<AtomicSDNode>(N.getNode());
   SDValue Chain = N->getOperand(0);
   SDValue LHS = N->getOperand(1);
   SDValue RHS = N->getOperand(2);
@@ -25717,7 +26637,6 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
     // Handle (atomic_load_sub p, v) as (atomic_load_add p, -v), to be able to
     // select LXADD if LOCK_SUB can't be selected.
     if (Opc == ISD::ATOMIC_LOAD_SUB) {
-      AtomicSDNode *AN = cast<AtomicSDNode>(N.getNode());
       RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS);
       return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS,
                            RHS, AN->getMemOperand());
@@ -25727,35 +26646,93 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
     return N;
   }
 
+  // Specialized lowering for the canonical form of an idemptotent atomicrmw.
+  // The core idea here is that since the memory location isn't actually
+  // changing, all we need is a lowering for the *ordering* impacts of the
+  // atomicrmw.  As such, we can chose a different operation and memory
+  // location to minimize impact on other code.
+  if (Opc == ISD::ATOMIC_LOAD_OR && isNullConstant(RHS)) {
+    // On X86, the only ordering which actually requires an instruction is
+    // seq_cst which isn't SingleThread, everything just needs to be preserved
+    // during codegen and then dropped. Note that we expect (but don't assume),
+    // that orderings other than seq_cst and acq_rel have been canonicalized to
+    // a store or load. 
+    if (AN->getOrdering() == AtomicOrdering::SequentiallyConsistent &&
+        AN->getSyncScopeID() == SyncScope::System) {
+      // Prefer a locked operation against a stack location to minimize cache
+      // traffic.  This assumes that stack locations are very likely to be
+      // accessed only by the owning thread. 
+      SDValue NewChain = emitLockedStackOp(DAG, Subtarget, Chain, DL);
+      assert(!N->hasAnyUseOfValue(0));
+      // NOTE: The getUNDEF is needed to give something for the unused result 0.
+      return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(),
+                         DAG.getUNDEF(VT), NewChain);
+    }
+    // MEMBARRIER is a compiler barrier; it codegens to a no-op.
+    SDValue NewChain = DAG.getNode(X86ISD::MEMBARRIER, DL, MVT::Other, Chain);
+    assert(!N->hasAnyUseOfValue(0));
+    // NOTE: The getUNDEF is needed to give something for the unused result 0.
+    return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(),
+                       DAG.getUNDEF(VT), NewChain);
+  }
+
   SDValue LockOp = lowerAtomicArithWithLOCK(N, DAG, Subtarget);
   // RAUW the chain, but don't worry about the result, as it's unused.
   assert(!N->hasAnyUseOfValue(0));
-  DAG.ReplaceAllUsesOfValueWith(N.getValue(1), LockOp.getValue(1));
-  return SDValue();
+  // NOTE: The getUNDEF is needed to give something for the unused result 0.
+  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(),
+                     DAG.getUNDEF(VT), LockOp.getValue(1));
 }
 
-static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) {
-  SDNode *Node = Op.getNode();
+static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
+                                 const X86Subtarget &Subtarget) {
+  auto *Node = cast<AtomicSDNode>(Op.getNode());
   SDLoc dl(Node);
-  EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
+  EVT VT = Node->getMemoryVT();
+
+  bool IsSeqCst = Node->getOrdering() == AtomicOrdering::SequentiallyConsistent;
+  bool IsTypeLegal = DAG.getTargetLoweringInfo().isTypeLegal(VT);
+
+  // If this store is not sequentially consistent and the type is legal
+  // we can just keep it.
+  if (!IsSeqCst && IsTypeLegal)
+    return Op;
+
+  if (VT == MVT::i64 && !IsTypeLegal) {
+    // For illegal i64 atomic_stores, we can try to use MOVQ if SSE2 is enabled.
+    // FIXME: Use movlps with SSE1.
+    // FIXME: Use fist with X87.
+    bool NoImplicitFloatOps =
+        DAG.getMachineFunction().getFunction().hasFnAttribute(
+            Attribute::NoImplicitFloat);
+    if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
+        Subtarget.hasSSE2()) {
+      SDValue SclToVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
+                                     Node->getOperand(2));
+      SDVTList Tys = DAG.getVTList(MVT::Other);
+      SDValue Ops[] = { Node->getChain(), SclToVec, Node->getBasePtr() };
+      SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys,
+                                              Ops, MVT::i64,
+                                              Node->getMemOperand());
+
+      // If this is a sequentially consistent store, also emit an appropriate
+      // barrier.
+      if (IsSeqCst)
+        Chain = emitLockedStackOp(DAG, Subtarget, Chain, dl);
+
+      return Chain;
+    }
+  }
 
   // Convert seq_cst store -> xchg
   // Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b)
-  // FIXME: On 32-bit, store -> fist or movq would be more efficient
-  //        (The only way to get a 16-byte store is cmpxchg16b)
   // FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment.
-  if (cast<AtomicSDNode>(Node)->getOrdering() ==
-          AtomicOrdering::SequentiallyConsistent ||
-      !DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
-    SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
-                                 cast<AtomicSDNode>(Node)->getMemoryVT(),
-                                 Node->getOperand(0),
-                                 Node->getOperand(1), Node->getOperand(2),
-                                 cast<AtomicSDNode>(Node)->getMemOperand());
-    return Swap.getValue(1);
-  }
-  // Other atomic stores have a simple pattern.
-  return Op;
+  SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+                               Node->getMemoryVT(),
+                               Node->getOperand(0),
+                               Node->getOperand(1), Node->getOperand(2),
+                               Node->getMemOperand());
+  return Swap.getValue(1);
 }
 
 static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
@@ -25919,7 +26896,6 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget,
       SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
       SDValue NewScatter = DAG.getTargetMemSDNode<X86MaskedScatterSDNode>(
           VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand());
-      DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1));
       return SDValue(NewScatter.getNode(), 1);
     }
     return SDValue();
@@ -25935,7 +26911,6 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget,
       SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
       SDValue NewScatter = DAG.getTargetMemSDNode<X86MaskedScatterSDNode>(
           VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand());
-      DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1));
       return SDValue(NewScatter.getNode(), 1);
     }
     // Custom widen all the operands to avoid promotion.
@@ -25980,7 +26955,6 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget,
   SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
   SDValue NewScatter = DAG.getTargetMemSDNode<X86MaskedScatterSDNode>(
       VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand());
-  DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1));
   return SDValue(NewScatter.getNode(), 1);
 }
 
@@ -25991,8 +26965,28 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
   MVT VT = Op.getSimpleValueType();
   MVT ScalarVT = VT.getScalarType();
   SDValue Mask = N->getMask();
+  MVT MaskVT = Mask.getSimpleValueType();
+  SDValue PassThru = N->getPassThru();
   SDLoc dl(Op);
 
+  // Handle AVX masked loads which don't support passthru other than 0.
+  if (MaskVT.getVectorElementType() != MVT::i1) {
+    // We also allow undef in the isel pattern.
+    if (PassThru.isUndef() || ISD::isBuildVectorAllZeros(PassThru.getNode()))
+      return Op;
+
+    SDValue NewLoad = DAG.getMaskedLoad(VT, dl, N->getChain(),
+                                        N->getBasePtr(), Mask,
+                                        getZeroVector(VT, Subtarget, DAG, dl),
+                                        N->getMemoryVT(), N->getMemOperand(),
+                                        N->getExtensionType(),
+                                        N->isExpandingLoad());
+    // Emit a blend.
+    SDValue Select = DAG.getNode(ISD::VSELECT, dl, MaskVT, Mask, NewLoad,
+                                 PassThru);
+    return DAG.getMergeValues({ Select, NewLoad.getValue(1) }, dl);
+  }
+
   assert((!N->isExpandingLoad() || Subtarget.hasAVX512()) &&
          "Expanding masked load is supported on AVX-512 target only!");
 
@@ -26011,7 +27005,7 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
   // VLX the vector should be widened to 512 bit
   unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits();
   MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec);
-  SDValue PassThru = ExtendToType(N->getPassThru(), WideDataVT, DAG);
+  PassThru = ExtendToType(PassThru, WideDataVT, DAG);
 
   // Mask element has to be i1.
   assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 &&
@@ -26179,7 +27173,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::ATOMIC_LOAD_OR:
   case ISD::ATOMIC_LOAD_XOR:
   case ISD::ATOMIC_LOAD_AND:    return lowerAtomicArith(Op, DAG, Subtarget);
-  case ISD::ATOMIC_STORE:       return LowerATOMIC_STORE(Op, DAG);
+  case ISD::ATOMIC_STORE:       return LowerATOMIC_STORE(Op, DAG, Subtarget);
   case ISD::BITREVERSE:         return LowerBITREVERSE(Op, Subtarget, DAG);
   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
   case ISD::CONCAT_VECTORS:     return LowerCONCAT_VECTORS(Op, Subtarget, DAG);
@@ -26272,7 +27266,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::UADDSAT:
   case ISD::SADDSAT:
   case ISD::USUBSAT:
-  case ISD::SSUBSAT:            return LowerADDSAT_SUBSAT(Op, DAG);
+  case ISD::SSUBSAT:            return LowerADDSAT_SUBSAT(Op, DAG, Subtarget);
   case ISD::SMAX:
   case ISD::SMIN:
   case ISD::UMAX:
@@ -26301,12 +27295,19 @@ void X86TargetLowering::LowerOperationWrapper(SDNode *N,
   if (!Res.getNode())
     return;
 
-  assert((N->getNumValues() <= Res->getNumValues()) &&
+  // If the original node has one result, take the return value from
+  // LowerOperation as is. It might not be result number 0.
+  if (N->getNumValues() == 1) {
+    Results.push_back(Res);
+    return;
+  }
+
+  // If the original node has multiple results, then the return node should
+  // have the same number of results.
+  assert((N->getNumValues() == Res->getNumValues()) &&
       "Lowering returned the wrong number of results!");
 
   // Places new result values base on N result number.
-  // In some cases (LowerSINT_TO_FP for example) Res has more result values
-  // than original node, chain should be dropped(last value).
   for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
     Results.push_back(Res.getValue(I));
 }
@@ -26319,7 +27320,31 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
   SDLoc dl(N);
   switch (N->getOpcode()) {
   default:
+#ifndef NDEBUG
+    dbgs() << "ReplaceNodeResults: ";
+    N->dump(&DAG);
+#endif
     llvm_unreachable("Do not know how to custom type legalize this operation!");
+  case ISD::CTPOP: {
+    assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
+    // Use a v2i64 if possible.
+    bool NoImplicitFloatOps =
+        DAG.getMachineFunction().getFunction().hasFnAttribute(
+            Attribute::NoImplicitFloat);
+    if (isTypeLegal(MVT::v2i64) && !NoImplicitFloatOps) {
+      SDValue Wide =
+          DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, N->getOperand(0));
+      Wide = DAG.getNode(ISD::CTPOP, dl, MVT::v2i64, Wide);
+      // Bit count should fit in 32-bits, extract it as that and then zero
+      // extend to i64. Otherwise we end up extracting bits 63:32 separately.
+      Wide = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Wide);
+      Wide = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Wide,
+                         DAG.getIntPtrConstant(0, dl));
+      Wide = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Wide);
+      Results.push_back(Wide);
+    }
+    return;
+  }
   case ISD::MUL: {
     EVT VT = N->getValueType(0);
     assert(VT.isVector() && "Unexpected VT");
@@ -26385,6 +27410,31 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     Results.push_back(Res);
     return;
   }
+  case ISD::ABS: {
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+    assert(N->getValueType(0) == MVT::i64 &&
+           "Unexpected type (!= i64) on ABS.");
+    MVT HalfT = MVT::i32;
+    SDValue Lo, Hi, Tmp;
+    SDVTList VTList = DAG.getVTList(HalfT, MVT::i1);
+
+    Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
+                     DAG.getConstant(0, dl, HalfT));
+    Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
+                     DAG.getConstant(1, dl, HalfT));
+    Tmp = DAG.getNode(
+        ISD::SRA, dl, HalfT, Hi,
+        DAG.getConstant(HalfT.getSizeInBits() - 1, dl,
+                        TLI.getShiftAmountTy(HalfT, DAG.getDataLayout())));
+    Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo);
+    Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi,
+                     SDValue(Lo.getNode(), 1));
+    Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi);
+    Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo);
+    Results.push_back(Lo);
+    Results.push_back(Hi);
+    return;
+  }
   case ISD::SETCC: {
     // Widen v2i32 (setcc v2f32). This is really needed for AVX512VL when
     // setCC result type is v2i1 because type legalzation will end up with
@@ -26557,14 +27607,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
   }
   case ISD::SIGN_EXTEND:
   case ISD::ZERO_EXTEND: {
-    if (!ExperimentalVectorWideningLegalization)
-      return;
-
     EVT VT = N->getValueType(0);
     SDValue In = N->getOperand(0);
     EVT InVT = In.getValueType();
     if (!Subtarget.hasSSE41() && VT == MVT::v4i64 &&
-        (InVT == MVT::v4i16 || InVT == MVT::v4i8)) {
+        (InVT == MVT::v4i16 || InVT == MVT::v4i8) &&
+        getTypeAction(*DAG.getContext(), InVT) == TypeWidenVector) {
+      assert(N->getOpcode() == ISD::SIGN_EXTEND && "Unexpected opcode");
       // Custom split this so we can extend i8/i16->i32 invec. This is better
       // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using
       // sra. Then extending from i32 to i64 using pcmpgt. By custom splitting
@@ -26589,16 +27638,28 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
       return;
     }
 
-    if ((VT == MVT::v16i32 || VT == MVT::v8i64) && InVT.is128BitVector()) {
+    if (VT == MVT::v16i32 || VT == MVT::v8i64) {
+      if (!InVT.is128BitVector()) {
+        // Not a 128 bit vector, but maybe type legalization will promote
+        // it to 128 bits.
+        if (getTypeAction(*DAG.getContext(), InVT) != TypePromoteInteger)
+          return;
+        InVT = getTypeToTransformTo(*DAG.getContext(), InVT);
+        if (!InVT.is128BitVector())
+          return;
+
+        // Promote the input to 128 bits. Type legalization will turn this into
+        // zext_inreg/sext_inreg.
+        In = DAG.getNode(N->getOpcode(), dl, InVT, In);
+      }
+
       // Perform custom splitting instead of the two stage extend we would get
       // by default.
       EVT LoVT, HiVT;
       std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
       assert(isTypeLegal(LoVT) && "Split VT not legal?");
 
-      bool IsSigned = N->getOpcode() == ISD::SIGN_EXTEND;
-
-      SDValue Lo = getExtendInVec(IsSigned, dl, LoVT, In, DAG);
+      SDValue Lo = getExtendInVec(N->getOpcode(), dl, LoVT, In, DAG);
 
       // We need to shift the input over by half the number of elements.
       unsigned NumElts = InVT.getVectorNumElements();
@@ -26608,7 +27669,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
         ShufMask[i] = i + HalfNumElts;
 
       SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask);
-      Hi = getExtendInVec(IsSigned, dl, HiVT, Hi, DAG);
+      Hi = getExtendInVec(N->getOpcode(), dl, HiVT, Hi, DAG);
 
       SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
       Results.push_back(Res);
@@ -26735,17 +27796,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
       return;
     }
 
-    std::pair<SDValue,SDValue> Vals =
-        FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true);
-    SDValue FIST = Vals.first, StackSlot = Vals.second;
-    if (FIST.getNode()) {
-      // Return a load from the stack slot.
-      if (StackSlot.getNode())
-        Results.push_back(
-            DAG.getLoad(VT, dl, FIST, StackSlot, MachinePointerInfo()));
-      else
-        Results.push_back(FIST);
-    }
+    if (SDValue V = FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned))
+      Results.push_back(V);
     return;
   }
   case ISD::SINT_TO_FP: {
@@ -26800,31 +27852,30 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     default : llvm_unreachable("Do not know how to custom type "
                                "legalize this intrinsic operation!");
     case Intrinsic::x86_rdtsc:
-      return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
+      return getReadTimeStampCounter(N, dl, X86::RDTSC, DAG, Subtarget,
                                      Results);
     case Intrinsic::x86_rdtscp:
-      return getReadTimeStampCounter(N, dl, X86ISD::RDTSCP_DAG, DAG, Subtarget,
+      return getReadTimeStampCounter(N, dl, X86::RDTSCP, DAG, Subtarget,
                                      Results);
     case Intrinsic::x86_rdpmc:
-      return getReadPerformanceCounter(N, dl, DAG, Subtarget, Results);
-
+      expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPMC, X86::ECX, Subtarget,
+                                  Results);
+      return;
     case Intrinsic::x86_xgetbv:
-      return getExtendedControlRegister(N, dl, DAG, Subtarget, Results);
+      expandIntrinsicWChainHelper(N, dl, DAG, X86::XGETBV, X86::ECX, Subtarget,
+                                  Results);
+      return;
     }
   }
-  case ISD::INTRINSIC_WO_CHAIN: {
-    if (SDValue V = LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG))
-      Results.push_back(V);
-    return;
-  }
   case ISD::READCYCLECOUNTER: {
-    return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
-                                   Results);
+    return getReadTimeStampCounter(N, dl, X86::RDTSC, DAG, Subtarget, Results);
   }
   case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
     EVT T = N->getValueType(0);
     assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair");
     bool Regs64bit = T == MVT::i128;
+    assert((!Regs64bit || Subtarget.hasCmpxchg16b()) &&
+           "64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS requires CMPXCHG16B");
     MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
     SDValue cpInL, cpInH;
     cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
@@ -26903,6 +27954,66 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     Results.push_back(EFLAGS.getValue(1));
     return;
   }
+  case ISD::ATOMIC_LOAD: {
+    assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
+    bool NoImplicitFloatOps =
+        DAG.getMachineFunction().getFunction().hasFnAttribute(
+            Attribute::NoImplicitFloat);
+    if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
+      auto *Node = cast<AtomicSDNode>(N);
+      if (Subtarget.hasSSE2()) {
+        // Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the
+        // lower 64-bits.
+        SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+        SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
+        SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
+                                             MVT::i64, Node->getMemOperand());
+        SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
+                                  DAG.getIntPtrConstant(0, dl));
+        Results.push_back(Res);
+        Results.push_back(Ld.getValue(1));
+        return;
+      }
+      if (Subtarget.hasX87()) {
+        // First load this into an 80-bit X87 register. This will put the whole
+        // integer into the significand.
+        // FIXME: Do we need to glue? See FIXME comment in BuildFILD.
+        SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other, MVT::Glue);
+        SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
+        SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD_FLAG,
+                                                 dl, Tys, Ops, MVT::i64,
+                                                 Node->getMemOperand());
+        SDValue Chain = Result.getValue(1);
+        SDValue InFlag = Result.getValue(2);
+
+        // Now store the X87 register to a stack temporary and convert to i64.
+        // This store is not atomic and doesn't need to be.
+        // FIXME: We don't need a stack temporary if the result of the load
+        // is already being stored. We could just directly store there.
+        SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
+        int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+        MachinePointerInfo MPI =
+            MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+        SDValue StoreOps[] = { Chain, Result, StackPtr, InFlag };
+        Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, dl,
+                                        DAG.getVTList(MVT::Other), StoreOps,
+                                        MVT::i64, MPI, 0 /*Align*/,
+                                        MachineMemOperand::MOStore);
+
+        // Finally load the value back from the stack temporary and return it.
+        // This load is not atomic and doesn't need to be.
+        // This load will be further type legalized.
+        Result = DAG.getLoad(MVT::i64, dl, Chain, StackPtr, MPI);
+        Results.push_back(Result);
+        Results.push_back(Result.getValue(1));
+        return;
+      }
+    }
+    // TODO: Use MOVLPS when SSE1 is available?
+    // Delegate to generic TypeLegalization. Situations we can really handle
+    // should have already been dealt with by AtomicExpandPass.cpp.
+    break;
+  }
   case ISD::ATOMIC_SWAP:
   case ISD::ATOMIC_LOAD_ADD:
   case ISD::ATOMIC_LOAD_SUB:
@@ -26914,11 +28025,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
   case ISD::ATOMIC_LOAD_MAX:
   case ISD::ATOMIC_LOAD_UMIN:
   case ISD::ATOMIC_LOAD_UMAX:
-  case ISD::ATOMIC_LOAD: {
     // Delegate to generic TypeLegalization. Situations we can really handle
     // should have already been dealt with by AtomicExpandPass.cpp.
     break;
-  }
+
   case ISD::BITCAST: {
     assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
     EVT DstVT = N->getValueType(0);
@@ -27061,19 +28171,28 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     if (!ISD::isNON_EXTLoad(N))
       return;
     auto *Ld = cast<LoadSDNode>(N);
-    MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64;
-    SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(),
-                              Ld->getPointerInfo(),
-                              Ld->getAlignment(),
-                              Ld->getMemOperand()->getFlags());
-    SDValue Chain = Res.getValue(1);
-    MVT WideVT = MVT::getVectorVT(LdVT, 2);
-    Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, WideVT, Res);
-    MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(),
-                                  VT.getVectorNumElements() * 2);
-    Res = DAG.getBitcast(CastVT, Res);
+    if (Subtarget.hasSSE2()) {
+      MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64;
+      SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(),
+                                Ld->getPointerInfo(), Ld->getAlignment(),
+                                Ld->getMemOperand()->getFlags());
+      SDValue Chain = Res.getValue(1);
+      MVT WideVT = MVT::getVectorVT(LdVT, 2);
+      Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, WideVT, Res);
+      MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(),
+                                    VT.getVectorNumElements() * 2);
+      Res = DAG.getBitcast(CastVT, Res);
+      Results.push_back(Res);
+      Results.push_back(Chain);
+      return;
+    }
+    assert(Subtarget.hasSSE1() && "Expected SSE");
+    SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other);
+    SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()};
+    SDValue Res = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
+                                          MVT::i64, Ld->getMemOperand());
     Results.push_back(Res);
-    Results.push_back(Chain);
+    Results.push_back(Res.getValue(1));
     return;
   }
   }
@@ -27092,26 +28211,22 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::FXOR:               return "X86ISD::FXOR";
   case X86ISD::FILD:               return "X86ISD::FILD";
   case X86ISD::FILD_FLAG:          return "X86ISD::FILD_FLAG";
-  case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
-  case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
-  case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
+  case X86ISD::FIST:               return "X86ISD::FIST";
+  case X86ISD::FP_TO_INT_IN_MEM:   return "X86ISD::FP_TO_INT_IN_MEM";
   case X86ISD::FLD:                return "X86ISD::FLD";
   case X86ISD::FST:                return "X86ISD::FST";
   case X86ISD::CALL:               return "X86ISD::CALL";
-  case X86ISD::RDTSC_DAG:          return "X86ISD::RDTSC_DAG";
-  case X86ISD::RDTSCP_DAG:         return "X86ISD::RDTSCP_DAG";
-  case X86ISD::RDPMC_DAG:          return "X86ISD::RDPMC_DAG";
   case X86ISD::BT:                 return "X86ISD::BT";
   case X86ISD::CMP:                return "X86ISD::CMP";
   case X86ISD::COMI:               return "X86ISD::COMI";
   case X86ISD::UCOMI:              return "X86ISD::UCOMI";
   case X86ISD::CMPM:               return "X86ISD::CMPM";
-  case X86ISD::CMPM_RND:           return "X86ISD::CMPM_RND";
+  case X86ISD::CMPM_SAE:           return "X86ISD::CMPM_SAE";
   case X86ISD::SETCC:              return "X86ISD::SETCC";
   case X86ISD::SETCC_CARRY:        return "X86ISD::SETCC_CARRY";
   case X86ISD::FSETCC:             return "X86ISD::FSETCC";
   case X86ISD::FSETCCM:            return "X86ISD::FSETCCM";
-  case X86ISD::FSETCCM_RND:        return "X86ISD::FSETCCM_RND";
+  case X86ISD::FSETCCM_SAE:        return "X86ISD::FSETCCM_SAE";
   case X86ISD::CMOV:               return "X86ISD::CMOV";
   case X86ISD::BRCOND:             return "X86ISD::BRCOND";
   case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
@@ -27140,12 +28255,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::CONFLICT:           return "X86ISD::CONFLICT";
   case X86ISD::FMAX:               return "X86ISD::FMAX";
   case X86ISD::FMAXS:              return "X86ISD::FMAXS";
-  case X86ISD::FMAX_RND:           return "X86ISD::FMAX_RND";
-  case X86ISD::FMAXS_RND:          return "X86ISD::FMAX_RND";
+  case X86ISD::FMAX_SAE:           return "X86ISD::FMAX_SAE";
+  case X86ISD::FMAXS_SAE:          return "X86ISD::FMAXS_SAE";
   case X86ISD::FMIN:               return "X86ISD::FMIN";
   case X86ISD::FMINS:              return "X86ISD::FMINS";
-  case X86ISD::FMIN_RND:           return "X86ISD::FMIN_RND";
-  case X86ISD::FMINS_RND:          return "X86ISD::FMINS_RND";
+  case X86ISD::FMIN_SAE:           return "X86ISD::FMIN_SAE";
+  case X86ISD::FMINS_SAE:          return "X86ISD::FMINS_SAE";
   case X86ISD::FMAXC:              return "X86ISD::FMAXC";
   case X86ISD::FMINC:              return "X86ISD::FMINC";
   case X86ISD::FRSQRT:             return "X86ISD::FRSQRT";
@@ -27177,6 +28292,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::LAND:               return "X86ISD::LAND";
   case X86ISD::VZEXT_MOVL:         return "X86ISD::VZEXT_MOVL";
   case X86ISD::VZEXT_LOAD:         return "X86ISD::VZEXT_LOAD";
+  case X86ISD::VEXTRACT_STORE:     return "X86ISD::VEXTRACT_STORE";
   case X86ISD::VTRUNC:             return "X86ISD::VTRUNC";
   case X86ISD::VTRUNCS:            return "X86ISD::VTRUNCS";
   case X86ISD::VTRUNCUS:           return "X86ISD::VTRUNCUS";
@@ -27188,11 +28304,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::VMTRUNCSTORES:      return "X86ISD::VMTRUNCSTORES";
   case X86ISD::VMTRUNCSTOREUS:     return "X86ISD::VMTRUNCSTOREUS";
   case X86ISD::VFPEXT:             return "X86ISD::VFPEXT";
-  case X86ISD::VFPEXT_RND:         return "X86ISD::VFPEXT_RND";
-  case X86ISD::VFPEXTS_RND:        return "X86ISD::VFPEXTS_RND";
+  case X86ISD::VFPEXT_SAE:         return "X86ISD::VFPEXT_SAE";
+  case X86ISD::VFPEXTS:            return "X86ISD::VFPEXTS";
+  case X86ISD::VFPEXTS_SAE:        return "X86ISD::VFPEXTS_SAE";
   case X86ISD::VFPROUND:           return "X86ISD::VFPROUND";
   case X86ISD::VMFPROUND:          return "X86ISD::VMFPROUND";
   case X86ISD::VFPROUND_RND:       return "X86ISD::VFPROUND_RND";
+  case X86ISD::VFPROUNDS:          return "X86ISD::VFPROUNDS";
   case X86ISD::VFPROUNDS_RND:      return "X86ISD::VFPROUNDS_RND";
   case X86ISD::VSHLDQ:             return "X86ISD::VSHLDQ";
   case X86ISD::VSRLDQ:             return "X86ISD::VSRLDQ";
@@ -27202,6 +28320,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::VSHLI:              return "X86ISD::VSHLI";
   case X86ISD::VSRLI:              return "X86ISD::VSRLI";
   case X86ISD::VSRAI:              return "X86ISD::VSRAI";
+  case X86ISD::VSHLV:              return "X86ISD::VSHLV";
+  case X86ISD::VSRLV:              return "X86ISD::VSRLV";
   case X86ISD::VSRAV:              return "X86ISD::VSRAV";
   case X86ISD::VROTLI:             return "X86ISD::VROTLI";
   case X86ISD::VROTRI:             return "X86ISD::VROTRI";
@@ -27263,11 +28383,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::VPERMI:             return "X86ISD::VPERMI";
   case X86ISD::VPTERNLOG:          return "X86ISD::VPTERNLOG";
   case X86ISD::VFIXUPIMM:          return "X86ISD::VFIXUPIMM";
+  case X86ISD::VFIXUPIMM_SAE:      return "X86ISD::VFIXUPIMM_SAE";
   case X86ISD::VFIXUPIMMS:         return "X86ISD::VFIXUPIMMS";
+  case X86ISD::VFIXUPIMMS_SAE:     return "X86ISD::VFIXUPIMMS_SAE";
   case X86ISD::VRANGE:             return "X86ISD::VRANGE";
-  case X86ISD::VRANGE_RND:         return "X86ISD::VRANGE_RND";
+  case X86ISD::VRANGE_SAE:         return "X86ISD::VRANGE_SAE";
   case X86ISD::VRANGES:            return "X86ISD::VRANGES";
-  case X86ISD::VRANGES_RND:        return "X86ISD::VRANGES_RND";
+  case X86ISD::VRANGES_SAE:        return "X86ISD::VRANGES_SAE";
   case X86ISD::PMULUDQ:            return "X86ISD::PMULUDQ";
   case X86ISD::PMULDQ:             return "X86ISD::PMULDQ";
   case X86ISD::PSADBW:             return "X86ISD::PSADBW";
@@ -27281,6 +28403,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::SAHF:               return "X86ISD::SAHF";
   case X86ISD::RDRAND:             return "X86ISD::RDRAND";
   case X86ISD::RDSEED:             return "X86ISD::RDSEED";
+  case X86ISD::RDPKRU:             return "X86ISD::RDPKRU";
+  case X86ISD::WRPKRU:             return "X86ISD::WRPKRU";
   case X86ISD::VPMADDUBSW:         return "X86ISD::VPMADDUBSW";
   case X86ISD::VPMADDWD:           return "X86ISD::VPMADDWD";
   case X86ISD::VPSHA:              return "X86ISD::VPSHA";
@@ -27302,17 +28426,17 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::VPMADD52H:          return "X86ISD::VPMADD52H";
   case X86ISD::VPMADD52L:          return "X86ISD::VPMADD52L";
   case X86ISD::VRNDSCALE:          return "X86ISD::VRNDSCALE";
-  case X86ISD::VRNDSCALE_RND:      return "X86ISD::VRNDSCALE_RND";
+  case X86ISD::VRNDSCALE_SAE:      return "X86ISD::VRNDSCALE_SAE";
   case X86ISD::VRNDSCALES:         return "X86ISD::VRNDSCALES";
-  case X86ISD::VRNDSCALES_RND:     return "X86ISD::VRNDSCALES_RND";
+  case X86ISD::VRNDSCALES_SAE:     return "X86ISD::VRNDSCALES_SAE";
   case X86ISD::VREDUCE:            return "X86ISD::VREDUCE";
-  case X86ISD::VREDUCE_RND:        return "X86ISD::VREDUCE_RND";
+  case X86ISD::VREDUCE_SAE:        return "X86ISD::VREDUCE_SAE";
   case X86ISD::VREDUCES:           return "X86ISD::VREDUCES";
-  case X86ISD::VREDUCES_RND:       return "X86ISD::VREDUCES_RND";
+  case X86ISD::VREDUCES_SAE:       return "X86ISD::VREDUCES_SAE";
   case X86ISD::VGETMANT:           return "X86ISD::VGETMANT";
-  case X86ISD::VGETMANT_RND:       return "X86ISD::VGETMANT_RND";
+  case X86ISD::VGETMANT_SAE:       return "X86ISD::VGETMANT_SAE";
   case X86ISD::VGETMANTS:          return "X86ISD::VGETMANTS";
-  case X86ISD::VGETMANTS_RND:      return "X86ISD::VGETMANTS_RND";
+  case X86ISD::VGETMANTS_SAE:      return "X86ISD::VGETMANTS_SAE";
   case X86ISD::PCMPESTR:           return "X86ISD::PCMPESTR";
   case X86ISD::PCMPISTR:           return "X86ISD::PCMPISTR";
   case X86ISD::XTEST:              return "X86ISD::XTEST";
@@ -27323,26 +28447,40 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::RCP14:              return "X86ISD::RCP14";
   case X86ISD::RCP14S:             return "X86ISD::RCP14S";
   case X86ISD::RCP28:              return "X86ISD::RCP28";
+  case X86ISD::RCP28_SAE:          return "X86ISD::RCP28_SAE";
   case X86ISD::RCP28S:             return "X86ISD::RCP28S";
+  case X86ISD::RCP28S_SAE:         return "X86ISD::RCP28S_SAE";
   case X86ISD::EXP2:               return "X86ISD::EXP2";
+  case X86ISD::EXP2_SAE:           return "X86ISD::EXP2_SAE";
   case X86ISD::RSQRT14:            return "X86ISD::RSQRT14";
   case X86ISD::RSQRT14S:           return "X86ISD::RSQRT14S";
   case X86ISD::RSQRT28:            return "X86ISD::RSQRT28";
+  case X86ISD::RSQRT28_SAE:        return "X86ISD::RSQRT28_SAE";
   case X86ISD::RSQRT28S:           return "X86ISD::RSQRT28S";
+  case X86ISD::RSQRT28S_SAE:       return "X86ISD::RSQRT28S_SAE";
   case X86ISD::FADD_RND:           return "X86ISD::FADD_RND";
+  case X86ISD::FADDS:              return "X86ISD::FADDS";
   case X86ISD::FADDS_RND:          return "X86ISD::FADDS_RND";
   case X86ISD::FSUB_RND:           return "X86ISD::FSUB_RND";
+  case X86ISD::FSUBS:              return "X86ISD::FSUBS";
   case X86ISD::FSUBS_RND:          return "X86ISD::FSUBS_RND";
   case X86ISD::FMUL_RND:           return "X86ISD::FMUL_RND";
+  case X86ISD::FMULS:              return "X86ISD::FMULS";
   case X86ISD::FMULS_RND:          return "X86ISD::FMULS_RND";
   case X86ISD::FDIV_RND:           return "X86ISD::FDIV_RND";
+  case X86ISD::FDIVS:              return "X86ISD::FDIVS";
   case X86ISD::FDIVS_RND:          return "X86ISD::FDIVS_RND";
   case X86ISD::FSQRT_RND:          return "X86ISD::FSQRT_RND";
+  case X86ISD::FSQRTS:             return "X86ISD::FSQRTS";
   case X86ISD::FSQRTS_RND:         return "X86ISD::FSQRTS_RND";
-  case X86ISD::FGETEXP_RND:        return "X86ISD::FGETEXP_RND";
-  case X86ISD::FGETEXPS_RND:       return "X86ISD::FGETEXPS_RND";
+  case X86ISD::FGETEXP:            return "X86ISD::FGETEXP";
+  case X86ISD::FGETEXP_SAE:        return "X86ISD::FGETEXP_SAE";
+  case X86ISD::FGETEXPS:           return "X86ISD::FGETEXPS";
+  case X86ISD::FGETEXPS_SAE:       return "X86ISD::FGETEXPS_SAE";
   case X86ISD::SCALEF:             return "X86ISD::SCALEF";
+  case X86ISD::SCALEF_RND:         return "X86ISD::SCALEF_RND";
   case X86ISD::SCALEFS:            return "X86ISD::SCALEFS";
+  case X86ISD::SCALEFS_RND:        return "X86ISD::SCALEFS_RND";
   case X86ISD::AVG:                return "X86ISD::AVG";
   case X86ISD::MULHRS:             return "X86ISD::MULHRS";
   case X86ISD::SINT_TO_FP_RND:     return "X86ISD::SINT_TO_FP_RND";
@@ -27351,23 +28489,27 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::CVTTP2UI:           return "X86ISD::CVTTP2UI";
   case X86ISD::MCVTTP2SI:          return "X86ISD::MCVTTP2SI";
   case X86ISD::MCVTTP2UI:          return "X86ISD::MCVTTP2UI";
-  case X86ISD::CVTTP2SI_RND:       return "X86ISD::CVTTP2SI_RND";
-  case X86ISD::CVTTP2UI_RND:       return "X86ISD::CVTTP2UI_RND";
+  case X86ISD::CVTTP2SI_SAE:       return "X86ISD::CVTTP2SI_SAE";
+  case X86ISD::CVTTP2UI_SAE:       return "X86ISD::CVTTP2UI_SAE";
   case X86ISD::CVTTS2SI:           return "X86ISD::CVTTS2SI";
   case X86ISD::CVTTS2UI:           return "X86ISD::CVTTS2UI";
-  case X86ISD::CVTTS2SI_RND:       return "X86ISD::CVTTS2SI_RND";
-  case X86ISD::CVTTS2UI_RND:       return "X86ISD::CVTTS2UI_RND";
+  case X86ISD::CVTTS2SI_SAE:       return "X86ISD::CVTTS2SI_SAE";
+  case X86ISD::CVTTS2UI_SAE:       return "X86ISD::CVTTS2UI_SAE";
   case X86ISD::CVTSI2P:            return "X86ISD::CVTSI2P";
   case X86ISD::CVTUI2P:            return "X86ISD::CVTUI2P";
+  case X86ISD::MCVTSI2P:           return "X86ISD::MCVTSI2P";
+  case X86ISD::MCVTUI2P:           return "X86ISD::MCVTUI2P";
   case X86ISD::VFPCLASS:           return "X86ISD::VFPCLASS";
   case X86ISD::VFPCLASSS:          return "X86ISD::VFPCLASSS";
   case X86ISD::MULTISHIFT:         return "X86ISD::MULTISHIFT";
+  case X86ISD::SCALAR_SINT_TO_FP:     return "X86ISD::SCALAR_SINT_TO_FP";
   case X86ISD::SCALAR_SINT_TO_FP_RND: return "X86ISD::SCALAR_SINT_TO_FP_RND";
+  case X86ISD::SCALAR_UINT_TO_FP:     return "X86ISD::SCALAR_UINT_TO_FP";
   case X86ISD::SCALAR_UINT_TO_FP_RND: return "X86ISD::SCALAR_UINT_TO_FP_RND";
   case X86ISD::CVTPS2PH:           return "X86ISD::CVTPS2PH";
   case X86ISD::MCVTPS2PH:          return "X86ISD::MCVTPS2PH";
   case X86ISD::CVTPH2PS:           return "X86ISD::CVTPH2PS";
-  case X86ISD::CVTPH2PS_RND:       return "X86ISD::CVTPH2PS_RND";
+  case X86ISD::CVTPH2PS_SAE:       return "X86ISD::CVTPH2PS_SAE";
   case X86ISD::CVTP2SI:            return "X86ISD::CVTP2SI";
   case X86ISD::CVTP2UI:            return "X86ISD::CVTP2UI";
   case X86ISD::MCVTP2SI:           return "X86ISD::MCVTP2SI";
@@ -27378,6 +28520,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::CVTS2UI:            return "X86ISD::CVTS2UI";
   case X86ISD::CVTS2SI_RND:        return "X86ISD::CVTS2SI_RND";
   case X86ISD::CVTS2UI_RND:        return "X86ISD::CVTS2UI_RND";
+  case X86ISD::CVTNE2PS2BF16:      return "X86ISD::CVTNE2PS2BF16";
+  case X86ISD::CVTNEPS2BF16:       return "X86ISD::CVTNEPS2BF16";
+  case X86ISD::MCVTNEPS2BF16:      return "X86ISD::MCVTNEPS2BF16";
+  case X86ISD::DPBF16PS:           return "X86ISD::DPBF16PS";
   case X86ISD::LWPINS:             return "X86ISD::LWPINS";
   case X86ISD::MGATHER:            return "X86ISD::MGATHER";
   case X86ISD::MSCATTER:           return "X86ISD::MSCATTER";
@@ -27393,6 +28539,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::NT_BRIND:           return "X86ISD::NT_BRIND";
   case X86ISD::UMWAIT:             return "X86ISD::UMWAIT";
   case X86ISD::TPAUSE:             return "X86ISD::TPAUSE";
+  case X86ISD::ENQCMD:             return "X86ISD:ENQCMD";
+  case X86ISD::ENQCMDS:            return "X86ISD:ENQCMDS";
+  case X86ISD::VP2INTERSECT:       return "X86ISD::VP2INTERSECT";
   }
   return nullptr;
 }
@@ -27478,6 +28627,38 @@ bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const {
   return true;
 }
 
+bool X86TargetLowering::isBinOp(unsigned Opcode) const {
+  switch (Opcode) {
+  // These are non-commutative binops.
+  // TODO: Add more X86ISD opcodes once we have test coverage.
+  case X86ISD::ANDNP:
+  case X86ISD::PCMPGT:
+  case X86ISD::FMAX:
+  case X86ISD::FMIN:
+  case X86ISD::FANDN:
+    return true;
+  }
+
+  return TargetLoweringBase::isBinOp(Opcode);
+}
+
+bool X86TargetLowering::isCommutativeBinOp(unsigned Opcode) const {
+  switch (Opcode) {
+  // TODO: Add more X86ISD opcodes once we have test coverage.
+  case X86ISD::PCMPEQ:
+  case X86ISD::PMULDQ:
+  case X86ISD::PMULUDQ:
+  case X86ISD::FMAXC:
+  case X86ISD::FMINC:
+  case X86ISD::FAND:
+  case X86ISD::FOR:
+  case X86ISD::FXOR:
+    return true;
+  }
+
+  return TargetLoweringBase::isCommutativeBinOp(Opcode);
+}
+
 bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
     return false;
@@ -27713,87 +28894,6 @@ static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB,
   return sinkMBB;
 }
 
-static MachineBasicBlock *emitWRPKRU(MachineInstr &MI, MachineBasicBlock *BB,
-                                     const X86Subtarget &Subtarget) {
-  DebugLoc dl = MI.getDebugLoc();
-  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
-
-  // insert input VAL into EAX
-  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
-      .addReg(MI.getOperand(0).getReg());
-  // insert zero to ECX
-  BuildMI(*BB, MI, dl, TII->get(X86::MOV32r0), X86::ECX);
-
-  // insert zero to EDX
-  BuildMI(*BB, MI, dl, TII->get(X86::MOV32r0), X86::EDX);
-
-  // insert WRPKRU instruction
-  BuildMI(*BB, MI, dl, TII->get(X86::WRPKRUr));
-
-  MI.eraseFromParent(); // The pseudo is gone now.
-  return BB;
-}
-
-static MachineBasicBlock *emitRDPKRU(MachineInstr &MI, MachineBasicBlock *BB,
-                                     const X86Subtarget &Subtarget) {
-  DebugLoc dl = MI.getDebugLoc();
-  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
-
-  // insert zero to ECX
-  BuildMI(*BB, MI, dl, TII->get(X86::MOV32r0), X86::ECX);
-
-  // insert RDPKRU instruction
-  BuildMI(*BB, MI, dl, TII->get(X86::RDPKRUr));
-  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg())
-      .addReg(X86::EAX);
-
-  MI.eraseFromParent(); // The pseudo is gone now.
-  return BB;
-}
-
-static MachineBasicBlock *emitMonitor(MachineInstr &MI, MachineBasicBlock *BB,
-                                      const X86Subtarget &Subtarget,
-                                      unsigned Opc) {
-  DebugLoc dl = MI.getDebugLoc();
-  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
-  // Address into RAX/EAX, other two args into ECX, EDX.
-  unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r;
-  unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
-  MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
-  for (int i = 0; i < X86::AddrNumOperands; ++i)
-    MIB.add(MI.getOperand(i));
-
-  unsigned ValOps = X86::AddrNumOperands;
-  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
-      .addReg(MI.getOperand(ValOps).getReg());
-  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX)
-      .addReg(MI.getOperand(ValOps + 1).getReg());
-
-  // The instruction doesn't actually take any operands though.
-  BuildMI(*BB, MI, dl, TII->get(Opc));
-
-  MI.eraseFromParent(); // The pseudo is gone now.
-  return BB;
-}
-
-static MachineBasicBlock *emitClzero(MachineInstr *MI, MachineBasicBlock *BB,
-                                      const X86Subtarget &Subtarget) {
-  DebugLoc dl = MI->getDebugLoc();
-  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
-  // Address into RAX/EAX
-  unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r;
-  unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
-  MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
-  for (int i = 0; i < X86::AddrNumOperands; ++i)
-    MIB.add(MI->getOperand(i));
-
-  // The instruction doesn't actually take any operands though.
-  BuildMI(*BB, MI, dl, TII->get(X86::CLZEROr));
-
-  MI->eraseFromParent(); // The pseudo is gone now.
-  return BB;
-}
-
 
 
 MachineBasicBlock *
@@ -27823,10 +28923,18 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
   unsigned ArgMode = MI.getOperand(7).getImm();
   unsigned Align = MI.getOperand(8).getImm();
 
+  MachineFunction *MF = MBB->getParent();
+
   // Memory Reference
   assert(MI.hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
-  SmallVector<MachineMemOperand *, 1> MMOs(MI.memoperands_begin(),
-                                           MI.memoperands_end());
+
+  MachineMemOperand *OldMMO = MI.memoperands().front();
+
+  // Clone the MMO into two separate MMOs for loading and storing
+  MachineMemOperand *LoadOnlyMMO = MF->getMachineMemOperand(
+      OldMMO, OldMMO->getFlags() & ~MachineMemOperand::MOStore);
+  MachineMemOperand *StoreOnlyMMO = MF->getMachineMemOperand(
+      OldMMO, OldMMO->getFlags() & ~MachineMemOperand::MOLoad);
 
   // Machine Information
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
@@ -27891,7 +28999,6 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
     OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);
 
     const BasicBlock *LLVM_BB = MBB->getBasicBlock();
-    MachineFunction *MF = MBB->getParent();
     overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);
     offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
     endMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -27924,7 +29031,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
         .add(Index)
         .addDisp(Disp, UseFPOffset ? 4 : 0)
         .add(Segment)
-        .setMemRefs(MMOs);
+        .setMemRefs(LoadOnlyMMO);
 
     // Check if there is enough room left to pull this argument.
     BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
@@ -27933,8 +29040,8 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
 
     // Branch to "overflowMBB" if offset >= max
     // Fall through to "offsetMBB" otherwise
-    BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE)))
-      .addMBB(overflowMBB);
+    BuildMI(thisMBB, DL, TII->get(X86::JCC_1))
+      .addMBB(overflowMBB).addImm(X86::COND_AE);
   }
 
   // In offsetMBB, emit code to use the reg_save_area.
@@ -27949,7 +29056,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
         .add(Index)
         .addDisp(Disp, 16)
         .add(Segment)
-        .setMemRefs(MMOs);
+        .setMemRefs(LoadOnlyMMO);
 
     // Zero-extend the offset
     unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
@@ -27977,7 +29084,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
         .addDisp(Disp, UseFPOffset ? 4 : 0)
         .add(Segment)
         .addReg(NextOffsetReg)
-        .setMemRefs(MMOs);
+        .setMemRefs(StoreOnlyMMO);
 
     // Jump to endMBB
     BuildMI(offsetMBB, DL, TII->get(X86::JMP_1))
@@ -27996,7 +29103,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
       .add(Index)
       .addDisp(Disp, 8)
       .add(Segment)
-      .setMemRefs(MMOs);
+      .setMemRefs(LoadOnlyMMO);
 
   // If we need to align it, do so. Otherwise, just copy the address
   // to OverflowDestReg.
@@ -28033,7 +29140,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
       .addDisp(Disp, 8)
       .add(Segment)
       .addReg(NextAddrReg)
-      .setMemRefs(MMOs);
+      .setMemRefs(StoreOnlyMMO);
 
   // If we branched, emit the PHI to the front of endMBB.
   if (offsetMBB) {
@@ -28091,7 +29198,7 @@ MachineBasicBlock *X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
   if (!Subtarget.isCallingConvWin64(F->getFunction().getCallingConv())) {
     // If %al is 0, branch around the XMM save block.
     BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
-    BuildMI(MBB, DL, TII->get(X86::JE_1)).addMBB(EndMBB);
+    BuildMI(MBB, DL, TII->get(X86::JCC_1)).addMBB(EndMBB).addImm(X86::COND_E);
     MBB->addSuccessor(EndMBB);
   }
 
@@ -28371,13 +29478,11 @@ X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV,
 
   // Create the conditional branch instructions.
   X86::CondCode FirstCC = X86::CondCode(FirstCMOV.getOperand(3).getImm());
-  unsigned Opc = X86::GetCondBranchFromCond(FirstCC);
-  BuildMI(ThisMBB, DL, TII->get(Opc)).addMBB(SinkMBB);
+  BuildMI(ThisMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(FirstCC);
 
   X86::CondCode SecondCC =
       X86::CondCode(SecondCascadedCMOV.getOperand(3).getImm());
-  unsigned Opc2 = X86::GetCondBranchFromCond(SecondCC);
-  BuildMI(FirstInsertedMBB, DL, TII->get(Opc2)).addMBB(SinkMBB);
+  BuildMI(FirstInsertedMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(SecondCC);
 
   //  SinkMBB:
   //   %Result = phi [ %FalseValue, SecondInsertedMBB ], [ %TrueValue, ThisMBB ]
@@ -28463,20 +29568,21 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
   X86::CondCode CC = X86::CondCode(MI.getOperand(3).getImm());
   X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC);
   MachineInstr *LastCMOV = &MI;
-  MachineBasicBlock::iterator NextMIIt =
-      std::next(MachineBasicBlock::iterator(MI));
+  MachineBasicBlock::iterator NextMIIt = MachineBasicBlock::iterator(MI);
 
   // Check for case 1, where there are multiple CMOVs with the same condition
   // first.  Of the two cases of multiple CMOV lowerings, case 1 reduces the
   // number of jumps the most.
 
   if (isCMOVPseudo(MI)) {
-    // See if we have a string of CMOVS with the same condition.
+    // See if we have a string of CMOVS with the same condition. Skip over
+    // intervening debug insts.
     while (NextMIIt != ThisMBB->end() && isCMOVPseudo(*NextMIIt) &&
            (NextMIIt->getOperand(3).getImm() == CC ||
             NextMIIt->getOperand(3).getImm() == OppCC)) {
       LastCMOV = &*NextMIIt;
       ++NextMIIt;
+      NextMIIt = skipDebugInstructionsForward(NextMIIt, ThisMBB->end());
     }
   }
 
@@ -28508,8 +29614,18 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
     SinkMBB->addLiveIn(X86::EFLAGS);
   }
 
+  // Transfer any debug instructions inside the CMOV sequence to the sunk block.
+  auto DbgEnd = MachineBasicBlock::iterator(LastCMOV);
+  auto DbgIt = MachineBasicBlock::iterator(MI);
+  while (DbgIt != DbgEnd) {
+    auto Next = std::next(DbgIt);
+    if (DbgIt->isDebugInstr())
+      SinkMBB->push_back(DbgIt->removeFromParent());
+    DbgIt = Next;
+  }
+
   // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
-  SinkMBB->splice(SinkMBB->begin(), ThisMBB,
+  SinkMBB->splice(SinkMBB->end(), ThisMBB,
                   std::next(MachineBasicBlock::iterator(LastCMOV)),
                   ThisMBB->end());
   SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
@@ -28522,8 +29638,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
   FalseMBB->addSuccessor(SinkMBB);
 
   // Create the conditional branch instruction.
-  unsigned Opc = X86::GetCondBranchFromCond(CC);
-  BuildMI(ThisMBB, DL, TII->get(Opc)).addMBB(SinkMBB);
+  BuildMI(ThisMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC);
 
   //  SinkMBB:
   //   %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, ThisMBB ]
@@ -28539,53 +29654,6 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
   return SinkMBB;
 }
 
-MachineBasicBlock *
-X86TargetLowering::EmitLoweredAtomicFP(MachineInstr &MI,
-                                       MachineBasicBlock *BB) const {
-  // Combine the following atomic floating-point modification pattern:
-  //   a.store(reg OP a.load(acquire), release)
-  // Transform them into:
-  //   OPss (%gpr), %xmm
-  //   movss %xmm, (%gpr)
-  // Or sd equivalent for 64-bit operations.
-  unsigned MOp, FOp;
-  switch (MI.getOpcode()) {
-  default: llvm_unreachable("unexpected instr type for EmitLoweredAtomicFP");
-  case X86::RELEASE_FADD32mr:
-    FOp = X86::ADDSSrm;
-    MOp = X86::MOVSSmr;
-    break;
-  case X86::RELEASE_FADD64mr:
-    FOp = X86::ADDSDrm;
-    MOp = X86::MOVSDmr;
-    break;
-  }
-  const X86InstrInfo *TII = Subtarget.getInstrInfo();
-  DebugLoc DL = MI.getDebugLoc();
-  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
-  unsigned ValOpIdx = X86::AddrNumOperands;
-  unsigned VSrc = MI.getOperand(ValOpIdx).getReg();
-  MachineInstrBuilder MIB =
-      BuildMI(*BB, MI, DL, TII->get(FOp),
-              MRI.createVirtualRegister(MRI.getRegClass(VSrc)))
-          .addReg(VSrc);
-  for (int i = 0; i < X86::AddrNumOperands; ++i) {
-    MachineOperand &Operand = MI.getOperand(i);
-    // Clear any kill flags on register operands as we'll create a second
-    // instruction using the same address operands.
-    if (Operand.isReg())
-      Operand.setIsKill(false);
-    MIB.add(Operand);
-  }
-  MachineInstr *FOpMI = MIB;
-  MIB = BuildMI(*BB, MI, DL, TII->get(MOp));
-  for (int i = 0; i < X86::AddrNumOperands; ++i)
-    MIB.add(MI.getOperand(i));
-  MIB.addReg(FOpMI->getOperand(0).getReg(), RegState::Kill);
-  MI.eraseFromParent(); // The pseudo instruction is gone now.
-  return BB;
-}
-
 MachineBasicBlock *
 X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
                                         MachineBasicBlock *BB) const {
@@ -28652,7 +29720,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
   BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr))
     .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg)
     .addReg(SPLimitVReg);
-  BuildMI(BB, DL, TII->get(X86::JG_1)).addMBB(mallocMBB);
+  BuildMI(BB, DL, TII->get(X86::JCC_1)).addMBB(mallocMBB).addImm(X86::COND_G);
 
   // bumpMBB simply decreases the stack pointer, since we know the current
   // stacklet has enough space.
@@ -29279,7 +30347,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
   BuildMI(checkSspMBB, DL, TII->get(TestRROpc))
       .addReg(SSPCopyReg)
       .addReg(SSPCopyReg);
-  BuildMI(checkSspMBB, DL, TII->get(X86::JE_1)).addMBB(sinkMBB);
+  BuildMI(checkSspMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_E);
   checkSspMBB->addSuccessor(sinkMBB);
   checkSspMBB->addSuccessor(fallMBB);
 
@@ -29309,7 +30377,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
       .addReg(SSPCopyReg);
 
   // Jump to sink in case PrevSSPReg <= SSPCopyReg.
-  BuildMI(fallMBB, DL, TII->get(X86::JBE_1)).addMBB(sinkMBB);
+  BuildMI(fallMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_BE);
   fallMBB->addSuccessor(sinkMBB);
   fallMBB->addSuccessor(fixShadowMBB);
 
@@ -29332,7 +30400,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
       .addImm(8);
 
   // Jump if the result of the shift is zero.
-  BuildMI(fixShadowMBB, DL, TII->get(X86::JE_1)).addMBB(sinkMBB);
+  BuildMI(fixShadowMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_E);
   fixShadowMBB->addSuccessor(sinkMBB);
   fixShadowMBB->addSuccessor(fixShadowLoopPrepareMBB);
 
@@ -29367,7 +30435,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
   BuildMI(fixShadowLoopMBB, DL, TII->get(DecROpc), DecReg).addReg(CounterReg);
 
   // Jump if the counter is not zero yet.
-  BuildMI(fixShadowLoopMBB, DL, TII->get(X86::JNE_1)).addMBB(fixShadowLoopMBB);
+  BuildMI(fixShadowLoopMBB, DL, TII->get(X86::JCC_1)).addMBB(fixShadowLoopMBB).addImm(X86::COND_NE);
   fixShadowLoopMBB->addSuccessor(sinkMBB);
   fixShadowLoopMBB->addSuccessor(fixShadowLoopMBB);
 
@@ -29512,10 +30580,9 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
                                          MachineBasicBlock *BB) const {
   DebugLoc DL = MI.getDebugLoc();
   MachineFunction *MF = BB->getParent();
-  MachineFrameInfo &MFI = MF->getFrameInfo();
   MachineRegisterInfo *MRI = &MF->getRegInfo();
   const X86InstrInfo *TII = Subtarget.getInstrInfo();
-  int FI = MFI.getFunctionContextIndex();
+  int FI = MF->getFrameInfo().getFunctionContextIndex();
 
   // Get a mapping of the call site numbers to all of the landing pads they're
   // associated with.
@@ -29613,7 +30680,7 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
   BuildMI(DispatchBB, DL, TII->get(X86::CMP32ri))
       .addReg(IReg)
       .addImm(LPadList.size());
-  BuildMI(DispatchBB, DL, TII->get(X86::JAE_1)).addMBB(TrapBB);
+  BuildMI(DispatchBB, DL, TII->get(X86::JCC_1)).addMBB(TrapBB).addImm(X86::COND_AE);
 
   if (Subtarget.is64Bit()) {
     unsigned BReg = MRI->createVirtualRegister(&X86::GR64RegClass);
@@ -29766,7 +30833,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   case X86::TLSCall_64:
     return EmitLoweredTLSCall(MI, BB);
   case X86::CMOV_FR32:
+  case X86::CMOV_FR32X:
   case X86::CMOV_FR64:
+  case X86::CMOV_FR64X:
   case X86::CMOV_GR8:
   case X86::CMOV_GR16:
   case X86::CMOV_GR32:
@@ -29821,10 +30890,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     return BB;
   }
 
-  case X86::RELEASE_FADD32mr:
-  case X86::RELEASE_FADD64mr:
-    return EmitLoweredAtomicFP(MI, BB);
-
   case X86::FP32_TO_INT16_IN_MEM:
   case X86::FP32_TO_INT32_IN_MEM:
   case X86::FP32_TO_INT64_IN_MEM:
@@ -29836,27 +30901,37 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   case X86::FP80_TO_INT64_IN_MEM: {
     // Change the floating point control register to use "round towards zero"
     // mode when truncating to an integer value.
-    int CWFrameIdx = MF->getFrameInfo().CreateStackObject(2, 2, false);
+    int OrigCWFrameIdx = MF->getFrameInfo().CreateStackObject(2, 2, false);
     addFrameReference(BuildMI(*BB, MI, DL,
-                              TII->get(X86::FNSTCW16m)), CWFrameIdx);
+                              TII->get(X86::FNSTCW16m)), OrigCWFrameIdx);
 
-    // Load the old value of the high byte of the control word...
+    // Load the old value of the control word...
     unsigned OldCW =
+      MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
+    addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOVZX32rm16), OldCW),
+                      OrigCWFrameIdx);
+
+    // OR 0b11 into bit 10 and 11. 0b11 is the encoding for round toward zero.
+    unsigned NewCW =
+      MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
+    BuildMI(*BB, MI, DL, TII->get(X86::OR32ri), NewCW)
+      .addReg(OldCW, RegState::Kill).addImm(0xC00);
+
+    // Extract to 16 bits.
+    unsigned NewCW16 =
       MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
-    addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW),
-                      CWFrameIdx);
+    BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), NewCW16)
+      .addReg(NewCW, RegState::Kill, X86::sub_16bit);
 
-    // Set the high part to be round to zero...
-    addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
-      .addImm(0xC7F);
+    // Prepare memory for FLDCW.
+    int NewCWFrameIdx = MF->getFrameInfo().CreateStackObject(2, 2, false);
+    addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)),
+                      NewCWFrameIdx)
+      .addReg(NewCW16, RegState::Kill);
 
     // Reload the modified control word now...
     addFrameReference(BuildMI(*BB, MI, DL,
-                              TII->get(X86::FLDCW16m)), CWFrameIdx);
-
-    // Restore the memory image of control word to original value
-    addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
-      .addReg(OldCW);
+                              TII->get(X86::FLDCW16m)), NewCWFrameIdx);
 
     // Get the X86 opcode to use.
     unsigned Opc;
@@ -29879,26 +30954,12 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
 
     // Reload the original control word now.
     addFrameReference(BuildMI(*BB, MI, DL,
-                              TII->get(X86::FLDCW16m)), CWFrameIdx);
+                              TII->get(X86::FLDCW16m)), OrigCWFrameIdx);
 
     MI.eraseFromParent(); // The pseudo instruction is gone now.
     return BB;
   }
-  // Thread synchronization.
-  case X86::MONITOR:
-    return emitMonitor(MI, BB, Subtarget, X86::MONITORrrr);
-  case X86::MONITORX:
-    return emitMonitor(MI, BB, Subtarget, X86::MONITORXrrr);
-
-  // Cache line zero
-  case X86::CLZERO:
-    return emitClzero(&MI, BB, Subtarget);
-
-  // PKU feature
-  case X86::WRPKRU:
-    return emitWRPKRU(MI, BB, Subtarget);
-  case X86::RDPKRU:
-    return emitRDPKRU(MI, BB, Subtarget);
+
   // xbegin
   case X86::XBEGIN:
     return emitXBegin(MI, BB, Subtarget.getInstrInfo());
@@ -30093,7 +31154,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     APInt DemandedElt = APInt::getOneBitSet(SrcVT.getVectorNumElements(),
                                             Op.getConstantOperandVal(1));
     Known = DAG.computeKnownBits(Src, DemandedElt, Depth + 1);
-    Known = Known.zextOrTrunc(BitWidth);
+    Known = Known.zextOrTrunc(BitWidth, false);
     Known.Zero.setBitsFrom(SrcVT.getScalarSizeInBits());
     break;
   }
@@ -30150,6 +31211,27 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     Known = Known.trunc(BitWidth);
     break;
   }
+  case X86ISD::ANDNP: {
+    KnownBits Known2;
+    Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+    // ANDNP = (~X & Y);
+    Known.One &= Known2.Zero;
+    Known.Zero |= Known2.One;
+    break;
+  }
+  case X86ISD::FOR: {
+    KnownBits Known2;
+    Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+    // Output known-0 bits are only known if clear in both the LHS & RHS.
+    Known.Zero &= Known2.Zero;
+    // Output known-1 are known to be set if set in either the LHS | RHS.
+    Known.One |= Known2.One;
+    break;
+  }
   case X86ISD::CMOV: {
     Known = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
     // If we don't know any bits, early out.
@@ -30219,7 +31301,8 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
 unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
     unsigned Depth) const {
-  unsigned VTBits = Op.getScalarValueSizeInBits();
+  EVT VT = Op.getValueType();
+  unsigned VTBits = VT.getScalarSizeInBits();
   unsigned Opcode = Op.getOpcode();
   switch (Opcode) {
   case X86ISD::SETCC_CARRY:
@@ -30257,7 +31340,7 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
 
   case X86ISD::VSHLI: {
     SDValue Src = Op.getOperand(0);
-    APInt ShiftVal = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
+    const APInt &ShiftVal = Op.getConstantOperandAPInt(1);
     if (ShiftVal.uge(VTBits))
       return VTBits; // Shifted all bits out --> zero.
     unsigned Tmp = DAG.ComputeNumSignBits(Src, DemandedElts, Depth + 1);
@@ -30268,7 +31351,7 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
 
   case X86ISD::VSRAI: {
     SDValue Src = Op.getOperand(0);
-    APInt ShiftVal = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
+    APInt ShiftVal = Op.getConstantOperandAPInt(1);
     if (ShiftVal.uge(VTBits - 1))
       return VTBits; // Sign splat.
     unsigned Tmp = DAG.ComputeNumSignBits(Src, DemandedElts, Depth + 1);
@@ -30284,6 +31367,15 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
     // Vector compares return zero/all-bits result values.
     return VTBits;
 
+  case X86ISD::ANDNP: {
+    unsigned Tmp0 =
+        DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    if (Tmp0 == 1) return 1; // Early out.
+    unsigned Tmp1 =
+        DAG.ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    return std::min(Tmp0, Tmp1);
+  }
+
   case X86ISD::CMOV: {
     unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth+1);
     if (Tmp0 == 1) return 1;  // Early out.
@@ -30292,6 +31384,54 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
   }
   }
 
+  // Handle target shuffles.
+  // TODO - use resolveTargetShuffleInputs once we can limit recursive depth.
+  if (isTargetShuffle(Opcode)) {
+    bool IsUnary;
+    SmallVector<int, 64> Mask;
+    SmallVector<SDValue, 2> Ops;
+    if (getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, Ops, Mask,
+                             IsUnary)) {
+      unsigned NumOps = Ops.size();
+      unsigned NumElts = VT.getVectorNumElements();
+      if (Mask.size() == NumElts) {
+        SmallVector<APInt, 2> DemandedOps(NumOps, APInt(NumElts, 0));
+        for (unsigned i = 0; i != NumElts; ++i) {
+          if (!DemandedElts[i])
+            continue;
+          int M = Mask[i];
+          if (M == SM_SentinelUndef) {
+            // For UNDEF elements, we don't know anything about the common state
+            // of the shuffle result.
+            return 1;
+          } else if (M == SM_SentinelZero) {
+            // Zero = all sign bits.
+            continue;
+          }
+          assert(0 <= M && (unsigned)M < (NumOps * NumElts) &&
+                 "Shuffle index out of range");
+
+          unsigned OpIdx = (unsigned)M / NumElts;
+          unsigned EltIdx = (unsigned)M % NumElts;
+          if (Ops[OpIdx].getValueType() != VT) {
+            // TODO - handle target shuffle ops with different value types.
+            return 1;
+          }
+          DemandedOps[OpIdx].setBit(EltIdx);
+        }
+        unsigned Tmp0 = VTBits;
+        for (unsigned i = 0; i != NumOps && Tmp0 > 1; ++i) {
+          if (!DemandedOps[i])
+            continue;
+          unsigned Tmp1 =
+              DAG.ComputeNumSignBits(Ops[i], DemandedOps[i], Depth + 1);
+          Tmp0 = std::min(Tmp0, Tmp1);
+        }
+        return Tmp0;
+      }
+    }
+  }
+
   // Fallback case.
   return 1;
 }
@@ -30305,12 +31445,11 @@ SDValue X86TargetLowering::unwrapAddress(SDValue N) const {
 // Attempt to match a combined shuffle mask against supported unary shuffle
 // instructions.
 // TODO: Investigate sharing more of this with shuffle lowering.
-static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
-                                    bool AllowFloatDomain, bool AllowIntDomain,
-                                    SDValue &V1, const SDLoc &DL,
-                                    SelectionDAG &DAG,
-                                    const X86Subtarget &Subtarget,
-                                    unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) {
+static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
+                              bool AllowFloatDomain, bool AllowIntDomain,
+                              SDValue &V1, const SDLoc &DL, SelectionDAG &DAG,
+                              const X86Subtarget &Subtarget, unsigned &Shuffle,
+                              MVT &SrcVT, MVT &DstVT) {
   unsigned NumMaskElts = Mask.size();
   unsigned MaskEltSize = MaskVT.getScalarSizeInBits();
 
@@ -30322,19 +31461,25 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
     return true;
   }
 
-  // Match against a ZERO_EXTEND_VECTOR_INREG/VZEXT instruction.
+  // Match against a ANY/ZERO_EXTEND_VECTOR_INREG instruction.
   // TODO: Add 512-bit vector support (split AVX512F and AVX512BW).
   if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) ||
                          (MaskVT.is256BitVector() && Subtarget.hasInt256()))) {
     unsigned MaxScale = 64 / MaskEltSize;
     for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) {
-      bool Match = true;
+      bool MatchAny = true;
+      bool MatchZero = true;
       unsigned NumDstElts = NumMaskElts / Scale;
-      for (unsigned i = 0; i != NumDstElts && Match; ++i) {
-        Match &= isUndefOrEqual(Mask[i * Scale], (int)i);
-        Match &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1);
+      for (unsigned i = 0; i != NumDstElts && (MatchAny || MatchZero); ++i) {
+        if (!isUndefOrEqual(Mask[i * Scale], (int)i)) {
+          MatchAny = MatchZero = false;
+          break;
+        }
+        MatchAny &= isUndefInRange(Mask, (i * Scale) + 1, Scale - 1);
+        MatchZero &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1);
       }
-      if (Match) {
+      if (MatchAny || MatchZero) {
+        assert(MatchZero && "Failed to match zext but matched aext?");
         unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize);
         MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType() :
                                             MVT::getIntegerVT(MaskEltSize);
@@ -30343,10 +31488,9 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
         if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits())
           V1 = extractSubVector(V1, 0, DAG, DL, SrcSize);
 
-        if (SrcVT.getVectorNumElements() == NumDstElts)
-          Shuffle = unsigned(ISD::ZERO_EXTEND);
-        else
-          Shuffle = unsigned(ISD::ZERO_EXTEND_VECTOR_INREG);
+        Shuffle = unsigned(MatchAny ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND);
+        if (SrcVT.getVectorNumElements() != NumDstElts)
+          Shuffle = getOpcode_EXTEND_VECTOR_INREG(Shuffle);
 
         DstVT = MVT::getIntegerVT(Scale * MaskEltSize);
         DstVT = MVT::getVectorVT(DstVT, NumDstElts);
@@ -30368,7 +31512,7 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
   // instructions are no slower than UNPCKLPD but has the option to
   // fold the input operand into even an unaligned memory load.
   if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) {
-    if (!Subtarget.hasAVX2() && isTargetShuffleEquivalent(Mask, {0, 0})) {
+    if (isTargetShuffleEquivalent(Mask, {0, 0})) {
       Shuffle = X86ISD::MOVDDUP;
       SrcVT = DstVT = MVT::v2f64;
       return true;
@@ -30426,29 +31570,18 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
     }
   }
 
-  // Attempt to match against broadcast-from-vector.
-  if (Subtarget.hasAVX2()) {
-    SmallVector<int, 64> BroadcastMask(NumMaskElts, 0);
-    if (isTargetShuffleEquivalent(Mask, BroadcastMask)) {
-      SrcVT = DstVT = MaskVT;
-      Shuffle = X86ISD::VBROADCAST;
-      return true;
-    }
-  }
-
   return false;
 }
 
 // Attempt to match a combined shuffle mask against supported unary immediate
 // permute instructions.
 // TODO: Investigate sharing more of this with shuffle lowering.
-static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
-                                           const APInt &Zeroable,
-                                           bool AllowFloatDomain,
-                                           bool AllowIntDomain,
-                                           const X86Subtarget &Subtarget,
-                                           unsigned &Shuffle, MVT &ShuffleVT,
-                                           unsigned &PermuteImm) {
+static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask,
+                                     const APInt &Zeroable,
+                                     bool AllowFloatDomain, bool AllowIntDomain,
+                                     const X86Subtarget &Subtarget,
+                                     unsigned &Shuffle, MVT &ShuffleVT,
+                                     unsigned &PermuteImm) {
   unsigned NumMaskElts = Mask.size();
   unsigned InputSizeInBits = MaskVT.getSizeInBits();
   unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts;
@@ -30549,9 +31682,8 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
   // FIXME: Add 512-bit support.
   if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
                          (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
-    int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle,
-                                             MaskScalarSizeInBits, Mask,
-                                             0, Zeroable, Subtarget);
+    int ShiftAmt = matchShuffleAsShift(ShuffleVT, Shuffle, MaskScalarSizeInBits,
+                                       Mask, 0, Zeroable, Subtarget);
     if (0 < ShiftAmt) {
       PermuteImm = (unsigned)ShiftAmt;
       return true;
@@ -30564,13 +31696,12 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
 // Attempt to match a combined unary shuffle mask against supported binary
 // shuffle instructions.
 // TODO: Investigate sharing more of this with shuffle lowering.
-static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
-                                     bool AllowFloatDomain, bool AllowIntDomain,
-                                     SDValue &V1, SDValue &V2, const SDLoc &DL,
-                                     SelectionDAG &DAG,
-                                     const X86Subtarget &Subtarget,
-                                     unsigned &Shuffle, MVT &SrcVT, MVT &DstVT,
-                                     bool IsUnary) {
+static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
+                               bool AllowFloatDomain, bool AllowIntDomain,
+                               SDValue &V1, SDValue &V2, const SDLoc &DL,
+                               SelectionDAG &DAG, const X86Subtarget &Subtarget,
+                               unsigned &Shuffle, MVT &SrcVT, MVT &DstVT,
+                               bool IsUnary) {
   unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
 
   if (MaskVT.is128BitVector()) {
@@ -30631,7 +31762,7 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
   return false;
 }
 
-static bool matchBinaryPermuteVectorShuffle(
+static bool matchBinaryPermuteShuffle(
     MVT MaskVT, ArrayRef<int> Mask, const APInt &Zeroable,
     bool AllowFloatDomain, bool AllowIntDomain, SDValue &V1, SDValue &V2,
     const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget,
@@ -30642,7 +31773,7 @@ static bool matchBinaryPermuteVectorShuffle(
   // Attempt to match against PALIGNR byte rotate.
   if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) ||
                          (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
-    int ByteRotation = matchVectorShuffleAsByteRotate(MaskVT, V1, V2, Mask);
+    int ByteRotation = matchShuffleAsByteRotate(MaskVT, V1, V2, Mask);
     if (0 < ByteRotation) {
       Shuffle = X86ISD::PALIGNR;
       ShuffleVT = MVT::getVectorVT(MVT::i8, MaskVT.getSizeInBits() / 8);
@@ -30678,34 +31809,11 @@ static bool matchBinaryPermuteVectorShuffle(
           return true;
         }
       } else {
-        // Determine a type compatible with X86ISD::BLENDI.
-        ShuffleVT = MaskVT;
-        if (Subtarget.hasAVX2()) {
-          if (ShuffleVT == MVT::v4i64)
-            ShuffleVT = MVT::v8i32;
-          else if (ShuffleVT == MVT::v2i64)
-            ShuffleVT = MVT::v4i32;
-        } else {
-          if (ShuffleVT == MVT::v2i64 || ShuffleVT == MVT::v4i32)
-            ShuffleVT = MVT::v8i16;
-          else if (ShuffleVT == MVT::v4i64)
-            ShuffleVT = MVT::v4f64;
-          else if (ShuffleVT == MVT::v8i32)
-            ShuffleVT = MVT::v8f32;
-        }
-
-        if (!ShuffleVT.isFloatingPoint()) {
-          int Scale = EltSizeInBits / ShuffleVT.getScalarSizeInBits();
-          BlendMask =
-              scaleVectorShuffleBlendMask(BlendMask, NumMaskElts, Scale);
-          ShuffleVT = MVT::getIntegerVT(EltSizeInBits / Scale);
-          ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts * Scale);
-        }
-
         V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1;
         V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2;
         PermuteImm = (unsigned)BlendMask;
         Shuffle = X86ISD::BLENDI;
+        ShuffleVT = MaskVT;
         return true;
       }
     }
@@ -30715,7 +31823,7 @@ static bool matchBinaryPermuteVectorShuffle(
   if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() &&
       MaskVT.is128BitVector()) {
     if (Zeroable.getBoolValue() &&
-        matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
+        matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
       Shuffle = X86ISD::INSERTPS;
       ShuffleVT = MVT::v4f32;
       return true;
@@ -30727,7 +31835,7 @@ static bool matchBinaryPermuteVectorShuffle(
       ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
        (MaskVT.is256BitVector() && Subtarget.hasAVX()) ||
        (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
-    if (matchVectorShuffleWithSHUFPD(MaskVT, V1, V2, PermuteImm, Mask)) {
+    if (matchShuffleWithSHUFPD(MaskVT, V1, V2, PermuteImm, Mask)) {
       Shuffle = X86ISD::SHUFP;
       ShuffleVT = MVT::getVectorVT(MVT::f64, MaskVT.getSizeInBits() / 64);
       return true;
@@ -30784,6 +31892,11 @@ static bool matchBinaryPermuteVectorShuffle(
   return false;
 }
 
+static SDValue combineX86ShuffleChainWithExtract(
+    ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth,
+    bool HasVariableMask, bool AllowVariableMask, SelectionDAG &DAG,
+    const X86Subtarget &Subtarget);
+
 /// Combine an arbitrary chain of shuffles into a single instruction if
 /// possible.
 ///
@@ -30841,6 +31954,24 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
   bool IsEVEXShuffle =
       RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128);
 
+  // Attempt to match a subvector broadcast.
+  // shuffle(insert_subvector(undef, sub, 0), undef, 0, 0, 0, 0)
+  if (UnaryShuffle &&
+      (BaseMaskEltSizeInBits == 128 || BaseMaskEltSizeInBits == 256)) {
+    SmallVector<int, 64> BroadcastMask(NumBaseMaskElts, 0);
+    if (isTargetShuffleEquivalent(BaseMask, BroadcastMask)) {
+      SDValue Src = Inputs[0];
+      if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
+          Src.getOperand(0).isUndef() &&
+          Src.getOperand(1).getValueSizeInBits() == BaseMaskEltSizeInBits &&
+          MayFoldLoad(Src.getOperand(1)) && isNullConstant(Src.getOperand(2))) {
+        return DAG.getBitcast(RootVT, DAG.getNode(X86ISD::SUBV_BROADCAST, DL,
+                                                  Src.getValueType(),
+                                                  Src.getOperand(1)));
+      }
+    }
+  }
+
   // TODO - handle 128/256-bit lane shuffles of 512-bit vectors.
 
   // Handle 128-bit lane shuffles of 256-bit vectors.
@@ -30894,6 +32025,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
 
   // Which shuffle domains are permitted?
   // Permit domain crossing at higher combine depths.
+  // TODO: Should we indicate which domain is preferred if both are allowed?
   bool AllowFloatDomain = FloatDomain || (Depth > 3);
   bool AllowIntDomain = (!FloatDomain || (Depth > 3)) && Subtarget.hasSSE2() &&
                         (!MaskVT.is256BitVector() || Subtarget.hasAVX2());
@@ -30909,8 +32041,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
     // directly if we don't shuffle the lower element and we shuffle the upper
     // (zero) elements within themselves.
     if (V1.getOpcode() == X86ISD::VZEXT_LOAD &&
-        (V1.getScalarValueSizeInBits() % MaskEltSizeInBits) == 0) {
-      unsigned Scale = V1.getScalarValueSizeInBits() / MaskEltSizeInBits;
+        (cast<MemIntrinsicSDNode>(V1)->getMemoryVT().getScalarSizeInBits() %
+         MaskEltSizeInBits) == 0) {
+      unsigned Scale =
+          cast<MemIntrinsicSDNode>(V1)->getMemoryVT().getScalarSizeInBits() /
+          MaskEltSizeInBits;
       ArrayRef<int> HiMask(Mask.data() + Scale, NumMaskElts - Scale);
       if (isSequentialOrUndefInRange(Mask, 0, Scale, 0) &&
           isUndefOrZeroOrInRange(HiMask, Scale, NumMaskElts)) {
@@ -30918,10 +32053,35 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
       }
     }
 
+    // Attempt to match against broadcast-from-vector.
+    // Limit AVX1 to cases where we're loading+broadcasting a scalar element.
+    if ((Subtarget.hasAVX2() || (Subtarget.hasAVX() && 32 <= MaskEltSizeInBits))
+        && (!IsEVEXShuffle || NumRootElts == NumMaskElts)) {
+      SmallVector<int, 64> BroadcastMask(NumMaskElts, 0);
+      if (isTargetShuffleEquivalent(Mask, BroadcastMask)) {
+        if (V1.getValueType() == MaskVT &&
+            V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+            MayFoldLoad(V1.getOperand(0))) {
+          if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST)
+            return SDValue(); // Nothing to do!
+          Res = V1.getOperand(0);
+          Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
+          return DAG.getBitcast(RootVT, Res);
+        }
+        if (Subtarget.hasAVX2()) {
+          if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST)
+            return SDValue(); // Nothing to do!
+          Res = DAG.getBitcast(MaskVT, V1);
+          Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
+          return DAG.getBitcast(RootVT, Res);
+        }
+      }
+    }
+
     SDValue NewV1 = V1; // Save operand in case early exit happens.
-    if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
-                                NewV1, DL, DAG, Subtarget, Shuffle,
-                                ShuffleSrcVT, ShuffleVT) &&
+    if (matchUnaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1,
+                          DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
+                          ShuffleVT) &&
         (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
       if (Depth == 1 && Root.getOpcode() == Shuffle)
         return SDValue(); // Nothing to do!
@@ -30930,9 +32090,9 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
       return DAG.getBitcast(RootVT, Res);
     }
 
-    if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
-                                       AllowIntDomain, Subtarget, Shuffle,
-                                       ShuffleVT, PermuteImm) &&
+    if (matchUnaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
+                                 AllowIntDomain, Subtarget, Shuffle, ShuffleVT,
+                                 PermuteImm) &&
         (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
       if (Depth == 1 && Root.getOpcode() == Shuffle)
         return SDValue(); // Nothing to do!
@@ -30945,9 +32105,9 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
 
   SDValue NewV1 = V1; // Save operands in case early exit happens.
   SDValue NewV2 = V2;
-  if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
-                               NewV1, NewV2, DL, DAG, Subtarget, Shuffle,
-                               ShuffleSrcVT, ShuffleVT, UnaryShuffle) &&
+  if (matchBinaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1,
+                         NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
+                         ShuffleVT, UnaryShuffle) &&
       (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
     if (Depth == 1 && Root.getOpcode() == Shuffle)
       return SDValue(); // Nothing to do!
@@ -30959,7 +32119,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
 
   NewV1 = V1; // Save operands in case early exit happens.
   NewV2 = V2;
-  if (matchBinaryPermuteVectorShuffle(
+  if (matchBinaryPermuteShuffle(
           MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1,
           NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
       (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
@@ -30979,8 +32139,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
   // Annoyingly, SSE4A instructions don't map into the above match helpers.
   if (Subtarget.hasSSE4A() && AllowIntDomain && RootSizeInBits == 128) {
     uint64_t BitLen, BitIdx;
-    if (matchVectorShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx,
-                                  Zeroable)) {
+    if (matchShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx,
+                            Zeroable)) {
       if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI)
         return SDValue(); // Nothing to do!
       V1 = DAG.getBitcast(IntMaskVT, V1);
@@ -30990,7 +32150,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
       return DAG.getBitcast(RootVT, Res);
     }
 
-    if (matchVectorShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) {
+    if (matchShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) {
       if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI)
         return SDValue(); // Nothing to do!
       V1 = DAG.getBitcast(IntMaskVT, V1);
@@ -31057,6 +32217,13 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
       return DAG.getBitcast(RootVT, Res);
     }
 
+    // If that failed and either input is extracted then try to combine as a
+    // shuffle with the larger type.
+    if (SDValue WideShuffle = combineX86ShuffleChainWithExtract(
+            Inputs, Root, BaseMask, Depth, HasVariableMask, AllowVariableMask,
+            DAG, Subtarget))
+      return WideShuffle;
+
     // If we have a dual input lane-crossing shuffle then lower to VPERMV3.
     if (AllowVariableMask && !MaskContainsZeros &&
         ((Subtarget.hasAVX512() &&
@@ -31222,10 +32389,145 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
     return DAG.getBitcast(RootVT, Res);
   }
 
+  // If that failed and either input is extracted then try to combine as a
+  // shuffle with the larger type.
+  if (SDValue WideShuffle = combineX86ShuffleChainWithExtract(
+          Inputs, Root, BaseMask, Depth, HasVariableMask, AllowVariableMask,
+          DAG, Subtarget))
+    return WideShuffle;
+
+  // If we have a dual input shuffle then lower to VPERMV3.
+  if (!UnaryShuffle && AllowVariableMask && !MaskContainsZeros &&
+      ((Subtarget.hasAVX512() &&
+        (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 ||
+         MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) ||
+       (Subtarget.hasVLX() &&
+        (MaskVT == MVT::v2f64 || MaskVT == MVT::v2i64 || MaskVT == MVT::v4f64 ||
+         MaskVT == MVT::v4i64 || MaskVT == MVT::v4f32 || MaskVT == MVT::v4i32 ||
+         MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) ||
+       (Subtarget.hasBWI() && MaskVT == MVT::v32i16) ||
+       (Subtarget.hasBWI() && Subtarget.hasVLX() &&
+        (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16)) ||
+       (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) ||
+       (Subtarget.hasVBMI() && Subtarget.hasVLX() &&
+        (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8)))) {
+    SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true);
+    V1 = DAG.getBitcast(MaskVT, V1);
+    V2 = DAG.getBitcast(MaskVT, V2);
+    Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, V1, VPermMask, V2);
+    return DAG.getBitcast(RootVT, Res);
+  }
+
   // Failed to find any combines.
   return SDValue();
 }
 
+// Combine an arbitrary chain of shuffles + extract_subvectors into a single
+// instruction if possible.
+//
+// Wrapper for combineX86ShuffleChain that extends the shuffle mask to a larger
+// type size to attempt to combine:
+// shuffle(extract_subvector(x,c1),extract_subvector(y,c2),m1)
+// -->
+// extract_subvector(shuffle(x,y,m2),0)
+static SDValue combineX86ShuffleChainWithExtract(
+    ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth,
+    bool HasVariableMask, bool AllowVariableMask, SelectionDAG &DAG,
+    const X86Subtarget &Subtarget) {
+  unsigned NumMaskElts = BaseMask.size();
+  unsigned NumInputs = Inputs.size();
+  if (NumInputs == 0)
+    return SDValue();
+
+  SmallVector<SDValue, 4> WideInputs(Inputs.begin(), Inputs.end());
+  SmallVector<unsigned, 4> Offsets(NumInputs, 0);
+
+  // Peek through subvectors.
+  // TODO: Support inter-mixed EXTRACT_SUBVECTORs + BITCASTs?
+  unsigned WideSizeInBits = WideInputs[0].getValueSizeInBits();
+  for (unsigned i = 0; i != NumInputs; ++i) {
+    SDValue &Src = WideInputs[i];
+    unsigned &Offset = Offsets[i];
+    Src = peekThroughBitcasts(Src);
+    EVT BaseVT = Src.getValueType();
+    while (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+           isa<ConstantSDNode>(Src.getOperand(1))) {
+      Offset += Src.getConstantOperandVal(1);
+      Src = Src.getOperand(0);
+    }
+    WideSizeInBits = std::max(WideSizeInBits, Src.getValueSizeInBits());
+    assert((Offset % BaseVT.getVectorNumElements()) == 0 &&
+           "Unexpected subvector extraction");
+    Offset /= BaseVT.getVectorNumElements();
+    Offset *= NumMaskElts;
+  }
+
+  // Bail if we're always extracting from the lowest subvectors,
+  // combineX86ShuffleChain should match this for the current width.
+  if (llvm::all_of(Offsets, [](unsigned Offset) { return Offset == 0; }))
+    return SDValue();
+
+  EVT RootVT = Root.getValueType();
+  unsigned RootSizeInBits = RootVT.getSizeInBits();
+  unsigned Scale = WideSizeInBits / RootSizeInBits;
+  assert((WideSizeInBits % RootSizeInBits) == 0 &&
+         "Unexpected subvector extraction");
+
+  // If the src vector types aren't the same, see if we can extend
+  // them to match each other.
+  // TODO: Support different scalar types?
+  EVT WideSVT = WideInputs[0].getValueType().getScalarType();
+  if (llvm::any_of(WideInputs, [&WideSVT, &DAG](SDValue Op) {
+        return !DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()) ||
+               Op.getValueType().getScalarType() != WideSVT;
+      }))
+    return SDValue();
+
+  for (SDValue &NewInput : WideInputs) {
+    assert((WideSizeInBits % NewInput.getValueSizeInBits()) == 0 &&
+           "Shuffle vector size mismatch");
+    if (WideSizeInBits > NewInput.getValueSizeInBits())
+      NewInput = widenSubVector(NewInput, false, Subtarget, DAG,
+                                SDLoc(NewInput), WideSizeInBits);
+    assert(WideSizeInBits == NewInput.getValueSizeInBits() &&
+           "Unexpected subvector extraction");
+  }
+
+  // Create new mask for larger type.
+  for (unsigned i = 1; i != NumInputs; ++i)
+    Offsets[i] += i * Scale * NumMaskElts;
+
+  SmallVector<int, 64> WideMask(BaseMask.begin(), BaseMask.end());
+  for (int &M : WideMask) {
+    if (M < 0)
+      continue;
+    M = (M % NumMaskElts) + Offsets[M / NumMaskElts];
+  }
+  WideMask.append((Scale - 1) * NumMaskElts, SM_SentinelUndef);
+
+  // Remove unused/repeated shuffle source ops.
+  resolveTargetShuffleInputsAndMask(WideInputs, WideMask);
+  assert(!WideInputs.empty() && "Shuffle with no inputs detected");
+
+  if (WideInputs.size() > 2)
+    return SDValue();
+
+  // Increase depth for every upper subvector we've peeked through.
+  Depth += count_if(Offsets, [](unsigned Offset) { return Offset > 0; });
+
+  // Attempt to combine wider chain.
+  // TODO: Can we use a better Root?
+  SDValue WideRoot = WideInputs[0];
+  if (SDValue WideShuffle = combineX86ShuffleChain(
+          WideInputs, WideRoot, WideMask, Depth, HasVariableMask,
+          AllowVariableMask, DAG, Subtarget)) {
+    WideShuffle =
+        extractSubVector(WideShuffle, 0, DAG, SDLoc(Root), RootSizeInBits);
+    return DAG.getBitcast(RootVT, WideShuffle);
+  }
+  return SDValue();
+}
+
 // Attempt to constant fold all of the constant source ops.
 // Returns true if the entire shuffle is folded to a constant.
 // TODO: Extend this to merge multiple constant Ops and update the mask.
@@ -31370,19 +32672,10 @@ static SDValue combineX86ShufflesRecursively(
   if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG))
     return SDValue();
 
-  // TODO - Add support for more than 2 inputs.
-  if (2 < OpInputs.size())
-    return SDValue();
-
-  SDValue Input0 = (OpInputs.size() > 0 ? OpInputs[0] : SDValue());
-  SDValue Input1 = (OpInputs.size() > 1 ? OpInputs[1] : SDValue());
-
   // Add the inputs to the Ops list, avoiding duplicates.
   SmallVector<SDValue, 16> Ops(SrcOps.begin(), SrcOps.end());
 
   auto AddOp = [&Ops](SDValue Input, int InsertionPoint) -> int {
-    if (!Input)
-      return -1;
     // Attempt to find an existing match.
     SDValue InputBC = peekThroughBitcasts(Input);
     for (int i = 0, e = Ops.size(); i < e; ++i)
@@ -31398,8 +32691,9 @@ static SDValue combineX86ShufflesRecursively(
     return Ops.size() - 1;
   };
 
-  int InputIdx0 = AddOp(Input0, SrcOpIndex);
-  int InputIdx1 = AddOp(Input1, -1);
+  SmallVector<int, 2> OpInputIdx;
+  for (SDValue OpInput : OpInputs)
+    OpInputIdx.push_back(AddOp(OpInput, OpInputIdx.empty() ? SrcOpIndex : -1));
 
   assert(((RootMask.size() > OpMask.size() &&
            RootMask.size() % OpMask.size() == 0) ||
@@ -31471,13 +32765,9 @@ static SDValue combineX86ShufflesRecursively(
             : (OpMask[OpIdx] << OpRatioLog2) + (RootMaskedIdx & (OpRatio - 1));
 
     OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1);
-    if (OpMask[OpIdx] < (int)OpMask.size()) {
-      assert(0 <= InputIdx0 && "Unknown target shuffle input");
-      OpMaskedIdx += InputIdx0 * MaskWidth;
-    } else {
-      assert(0 <= InputIdx1 && "Unknown target shuffle input");
-      OpMaskedIdx += InputIdx1 * MaskWidth;
-    }
+    int InputIdx = OpMask[OpIdx] / (int)OpMask.size();
+    assert(0 <= OpInputIdx[InputIdx] && "Unknown target shuffle input");
+    OpMaskedIdx += OpInputIdx[InputIdx] * MaskWidth;
 
     Mask[i] = OpMaskedIdx;
   }
@@ -31493,7 +32783,7 @@ static SDValue combineX86ShufflesRecursively(
     return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG,
                          SDLoc(Root));
 
-  // Remove unused shuffle source ops.
+  // Remove unused/repeated shuffle source ops.
   resolveTargetShuffleInputsAndMask(Ops, Mask);
   assert(!Ops.empty() && "Shuffle with no inputs detected");
 
@@ -31530,29 +32820,42 @@ static SDValue combineX86ShufflesRecursively(
     return Cst;
 
   // We can only combine unary and binary shuffle mask cases.
-  if (Ops.size() > 2)
-    return SDValue();
+  if (Ops.size() <= 2) {
+    // Minor canonicalization of the accumulated shuffle mask to make it easier
+    // to match below. All this does is detect masks with sequential pairs of
+    // elements, and shrink them to the half-width mask. It does this in a loop
+    // so it will reduce the size of the mask to the minimal width mask which
+    // performs an equivalent shuffle.
+    SmallVector<int, 64> WidenedMask;
+    while (Mask.size() > 1 && canWidenShuffleElements(Mask, WidenedMask)) {
+      Mask = std::move(WidenedMask);
+    }
+
+    // Canonicalization of binary shuffle masks to improve pattern matching by
+    // commuting the inputs.
+    if (Ops.size() == 2 && canonicalizeShuffleMaskWithCommute(Mask)) {
+      ShuffleVectorSDNode::commuteMask(Mask);
+      std::swap(Ops[0], Ops[1]);
+    }
 
-  // Minor canonicalization of the accumulated shuffle mask to make it easier
-  // to match below. All this does is detect masks with sequential pairs of
-  // elements, and shrink them to the half-width mask. It does this in a loop
-  // so it will reduce the size of the mask to the minimal width mask which
-  // performs an equivalent shuffle.
-  SmallVector<int, 64> WidenedMask;
-  while (Mask.size() > 1 && canWidenShuffleElements(Mask, WidenedMask)) {
-    Mask = std::move(WidenedMask);
+    // Finally, try to combine into a single shuffle instruction.
+    return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask,
+                                  AllowVariableMask, DAG, Subtarget);
   }
 
-  // Canonicalization of binary shuffle masks to improve pattern matching by
-  // commuting the inputs.
-  if (Ops.size() == 2 && canonicalizeShuffleMaskWithCommute(Mask)) {
-    ShuffleVectorSDNode::commuteMask(Mask);
-    std::swap(Ops[0], Ops[1]);
-  }
+  // If that failed and any input is extracted then try to combine as a
+  // shuffle with the larger type.
+  return combineX86ShuffleChainWithExtract(Ops, Root, Mask, Depth,
+                                           HasVariableMask, AllowVariableMask,
+                                           DAG, Subtarget);
+}
 
-  // Finally, try to combine into a single shuffle instruction.
-  return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask,
-                                AllowVariableMask, DAG, Subtarget);
+/// Helper entry wrapper to combineX86ShufflesRecursively.
+static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG,
+                                             const X86Subtarget &Subtarget) {
+  return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
+                                       /*HasVarMask*/ false,
+                                       /*AllowVarMask*/ true, DAG, Subtarget);
 }
 
 /// Get the PSHUF-style mask from PSHUF node.
@@ -31770,12 +33073,13 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
 
   switch (Opcode) {
   case X86ISD::VBROADCAST: {
-    // If broadcasting from another shuffle, attempt to simplify it.
-    // TODO - we really need a general SimplifyDemandedVectorElts mechanism.
     SDValue Src = N.getOperand(0);
     SDValue BC = peekThroughBitcasts(Src);
     EVT SrcVT = Src.getValueType();
     EVT BCVT = BC.getValueType();
+
+    // If broadcasting from another shuffle, attempt to simplify it.
+    // TODO - we really need a general SimplifyDemandedVectorElts mechanism.
     if (isTargetShuffle(BC.getOpcode()) &&
         VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits() == 0) {
       unsigned Scale = VT.getScalarSizeInBits() / BCVT.getScalarSizeInBits();
@@ -31789,6 +33093,71 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
         return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
                            DAG.getBitcast(SrcVT, Res));
     }
+
+    // broadcast(bitcast(src)) -> bitcast(broadcast(src))
+    // 32-bit targets have to bitcast i64 to f64, so better to bitcast upward.
+    if (Src.getOpcode() == ISD::BITCAST &&
+        SrcVT.getScalarSizeInBits() == BCVT.getScalarSizeInBits()) {
+      EVT NewVT = EVT::getVectorVT(*DAG.getContext(), BCVT.getScalarType(),
+                                   VT.getVectorNumElements());
+      return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC));
+    }
+
+    // Reduce broadcast source vector to lowest 128-bits.
+    if (SrcVT.getSizeInBits() > 128)
+      return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
+                         extract128BitVector(Src, 0, DAG, DL));
+
+    // broadcast(scalar_to_vector(x)) -> broadcast(x).
+    if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR)
+      return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));
+
+    // Share broadcast with the longest vector and extract low subvector (free).
+    for (SDNode *User : Src->uses())
+      if (User != N.getNode() && User->getOpcode() == X86ISD::VBROADCAST &&
+          User->getValueSizeInBits(0) > VT.getSizeInBits()) {
+        return extractSubVector(SDValue(User, 0), 0, DAG, DL,
+                                VT.getSizeInBits());
+      }
+
+    return SDValue();
+  }
+  case X86ISD::BLENDI: {
+    SDValue N0 = N.getOperand(0);
+    SDValue N1 = N.getOperand(1);
+
+    // blend(bitcast(x),bitcast(y)) -> bitcast(blend(x,y)) to narrower types.
+    // TODO: Handle MVT::v16i16 repeated blend mask.
+    if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST &&
+        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) {
+      MVT SrcVT = N0.getOperand(0).getSimpleValueType();
+      if ((VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
+          SrcVT.getScalarSizeInBits() >= 32) {
+        unsigned Mask = N.getConstantOperandVal(2);
+        unsigned Size = VT.getVectorNumElements();
+        unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
+        unsigned ScaleMask = scaleVectorShuffleBlendMask(Mask, Size, Scale);
+        return DAG.getBitcast(
+            VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0),
+                            N1.getOperand(0),
+                            DAG.getConstant(ScaleMask, DL, MVT::i8)));
+      }
+    }
+    return SDValue();
+  }
+  case X86ISD::VPERMI: {
+    // vpermi(bitcast(x)) -> bitcast(vpermi(x)) for same number of elements.
+    // TODO: Remove when we have preferred domains in combineX86ShuffleChain.
+    SDValue N0 = N.getOperand(0);
+    SDValue N1 = N.getOperand(1);
+    unsigned EltSizeInBits = VT.getScalarSizeInBits();
+    if (N0.getOpcode() == ISD::BITCAST &&
+        N0.getOperand(0).getScalarValueSizeInBits() == EltSizeInBits) {
+      SDValue Src = N0.getOperand(0);
+      EVT SrcVT = Src.getValueType();
+      SDValue Res = DAG.getNode(X86ISD::VPERMI, DL, SrcVT, Src, N1);
+      return DAG.getBitcast(VT, Res);
+    }
     return SDValue();
   }
   case X86ISD::PSHUFD:
@@ -32212,8 +33581,22 @@ static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG,
 
 /// Eliminate a redundant shuffle of a horizontal math op.
 static SDValue foldShuffleOfHorizOp(SDNode *N) {
-  if (N->getOpcode() != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef())
-    return SDValue();
+  unsigned Opcode = N->getOpcode();
+  if (Opcode != X86ISD::MOVDDUP && Opcode != X86ISD::VBROADCAST)
+    if (Opcode != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef())
+      return SDValue();
+
+  // For a broadcast, peek through an extract element of index 0 to find the
+  // horizontal op: broadcast (ext_vec_elt HOp, 0)
+  EVT VT = N->getValueType(0);
+  if (Opcode == X86ISD::VBROADCAST) {
+    SDValue SrcOp = N->getOperand(0);
+    if (SrcOp.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+        SrcOp.getValueType() == MVT::f64 &&
+        SrcOp.getOperand(0).getValueType() == VT &&
+        isNullConstant(SrcOp.getOperand(1)))
+      N = SrcOp.getNode();
+  }
 
   SDValue HOp = N->getOperand(0);
   if (HOp.getOpcode() != X86ISD::HADD && HOp.getOpcode() != X86ISD::FHADD &&
@@ -32224,13 +33607,25 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) {
   // lanes of each operand as:
   // v4X32: A[0] + A[1] , A[2] + A[3] , B[0] + B[1] , B[2] + B[3]
   // ...similarly for v2f64 and v8i16.
-  // TODO: Handle UNDEF operands.
-  if (HOp.getOperand(0) != HOp.getOperand(1))
+  if (!HOp.getOperand(0).isUndef() && !HOp.getOperand(1).isUndef() &&
+      HOp.getOperand(0) != HOp.getOperand(1))
     return SDValue();
 
   // When the operands of a horizontal math op are identical, the low half of
-  // the result is the same as the high half. If the shuffle is also replicating
-  // low and high halves, we don't need the shuffle.
+  // the result is the same as the high half. If a target shuffle is also
+  // replicating low and high halves, we don't need the shuffle.
+  if (Opcode == X86ISD::MOVDDUP || Opcode == X86ISD::VBROADCAST) {
+    if (HOp.getScalarValueSizeInBits() == 64) {
+      // movddup (hadd X, X) --> hadd X, X
+      // broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X
+      assert((HOp.getValueType() == MVT::v2f64 ||
+        HOp.getValueType() == MVT::v4f64) && HOp.getValueType() == VT &&
+        "Unexpected type for h-op");
+      return HOp;
+    }
+    return SDValue();
+  }
+
   // shuffle (hadd X, X), undef, [low half...high half] --> hadd X, X
   ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
   // TODO: Other mask possibilities like {1,1} and {1,0} could be added here,
@@ -32252,14 +33647,51 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) {
   return SDValue();
 }
 
+/// If we have a shuffle of AVX/AVX512 (256/512 bit) vectors that only uses the
+/// low half of each source vector and does not set any high half elements in
+/// the destination vector, narrow the shuffle to half its original size.
+static SDValue narrowShuffle(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) {
+  if (!Shuf->getValueType(0).isSimple())
+    return SDValue();
+  MVT VT = Shuf->getSimpleValueType(0);
+  if (!VT.is256BitVector() && !VT.is512BitVector())
+    return SDValue();
+
+  // See if we can ignore all of the high elements of the shuffle.
+  ArrayRef<int> Mask = Shuf->getMask();
+  if (!isUndefUpperHalf(Mask))
+    return SDValue();
+
+  // Check if the shuffle mask accesses only the low half of each input vector
+  // (half-index output is 0 or 2).
+  int HalfIdx1, HalfIdx2;
+  SmallVector<int, 8> HalfMask(Mask.size() / 2);
+  if (!getHalfShuffleMask(Mask, HalfMask, HalfIdx1, HalfIdx2) ||
+      (HalfIdx1 % 2 == 1) || (HalfIdx2 % 2 == 1))
+    return SDValue();
+
+  // Create a half-width shuffle to replace the unnecessarily wide shuffle.
+  // The trick is knowing that all of the insert/extract are actually free
+  // subregister (zmm<->ymm or ymm<->xmm) ops. That leaves us with a shuffle
+  // of narrow inputs into a narrow output, and that is always cheaper than
+  // the wide shuffle that we started with.
+  return getShuffleHalfVectors(SDLoc(Shuf), Shuf->getOperand(0),
+                               Shuf->getOperand(1), HalfMask, HalfIdx1,
+                               HalfIdx2, false, DAG);
+}
+
 static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
                               TargetLowering::DAGCombinerInfo &DCI,
                               const X86Subtarget &Subtarget) {
+  if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N))
+    if (SDValue V = narrowShuffle(Shuf, DAG))
+      return V;
+
+  // If we have legalized the vector types, look for blends of FADD and FSUB
+  // nodes that we can fuse into an ADDSUB, FMADDSUB, or FMSUBADD node.
   SDLoc dl(N);
   EVT VT = N->getValueType(0);
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  // If we have legalized the vector types, look for blends of FADD and FSUB
-  // nodes that we can fuse into an ADDSUB, FMADDSUB, or FMSUBADD node.
   if (TLI.isTypeLegal(VT)) {
     if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG))
       return AddSub;
@@ -32328,23 +33760,9 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  // Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
-  // load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are
-  // consecutive, non-overlapping, and in the right order.
-  SmallVector<SDValue, 16> Elts;
-  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
-    if (SDValue Elt = getShuffleScalarElt(N, i, DAG, 0)) {
-      Elts.push_back(Elt);
-      continue;
-    }
-    Elts.clear();
-    break;
-  }
-
-  if (Elts.size() == VT.getVectorNumElements())
-    if (SDValue LD =
-            EltsFromConsecutiveLoads(VT, Elts, dl, DAG, Subtarget, true))
-      return LD;
+  // Attempt to combine into a vector load/broadcast.
+  if (SDValue LD = combineToConsecutiveLoads(VT, N, dl, DAG, Subtarget, true))
+    return LD;
 
   // For AVX2, we sometimes want to combine
   // (vector_shuffle <mask> (concat_vectors t1, undef)
@@ -32365,9 +33783,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
     // specific PSHUF instruction sequences into their minimal form so that we
     // can evaluate how many specialized shuffle instructions are involved in
     // a particular chain.
-    if (SDValue Res = combineX86ShufflesRecursively(
-            {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
-            /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
+    if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
       return Res;
 
     // Simplify source operands based on shuffle mask.
@@ -32378,6 +33794,68 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
       return SDValue(N, 0);
   }
 
+  // Look for a v2i64/v2f64 VZEXT_MOVL of a node that already produces zeros
+  // in the upper 64 bits.
+  // TODO: Can we generalize this using computeKnownBits.
+  if (N->getOpcode() == X86ISD::VZEXT_MOVL &&
+      (VT == MVT::v2f64 || VT == MVT::v2i64) &&
+      N->getOperand(0).getOpcode() == ISD::BITCAST &&
+      (N->getOperand(0).getOperand(0).getValueType() == MVT::v4f32 ||
+       N->getOperand(0).getOperand(0).getValueType() == MVT::v4i32)) {
+    SDValue In = N->getOperand(0).getOperand(0);
+    switch (In.getOpcode()) {
+    default:
+      break;
+    case X86ISD::CVTP2SI:   case X86ISD::CVTP2UI:
+    case X86ISD::MCVTP2SI:  case X86ISD::MCVTP2UI:
+    case X86ISD::CVTTP2SI:  case X86ISD::CVTTP2UI:
+    case X86ISD::MCVTTP2SI: case X86ISD::MCVTTP2UI:
+    case X86ISD::CVTSI2P:   case X86ISD::CVTUI2P:
+    case X86ISD::MCVTSI2P:  case X86ISD::MCVTUI2P:
+    case X86ISD::VFPROUND:  case X86ISD::VMFPROUND:
+      if (In.getOperand(0).getValueType() == MVT::v2f64 ||
+          In.getOperand(0).getValueType() == MVT::v2i64)
+        return N->getOperand(0); // return the bitcast
+      break;
+    }
+  }
+
+  // Pull subvector inserts into undef through VZEXT_MOVL by making it an
+  // insert into a zero vector. This helps get VZEXT_MOVL closer to
+  // scalar_to_vectors where 256/512 are canonicalized to an insert and a
+  // 128-bit scalar_to_vector. This reduces the number of isel patterns.
+  if (N->getOpcode() == X86ISD::VZEXT_MOVL && !DCI.isBeforeLegalizeOps() &&
+      N->getOperand(0).getOpcode() == ISD::INSERT_SUBVECTOR &&
+      N->getOperand(0).hasOneUse() &&
+      N->getOperand(0).getOperand(0).isUndef() &&
+      isNullConstant(N->getOperand(0).getOperand(2))) {
+    SDValue In = N->getOperand(0).getOperand(1);
+    SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, dl, In.getValueType(), In);
+    return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT,
+                       getZeroVector(VT.getSimpleVT(), Subtarget, DAG, dl),
+                       Movl, N->getOperand(0).getOperand(2));
+  }
+
+  // If this a vzmovl of a full vector load, replace it with a vzload, unless
+  // the load is volatile.
+  if (N->getOpcode() == X86ISD::VZEXT_MOVL && N->getOperand(0).hasOneUse() &&
+      ISD::isNormalLoad(N->getOperand(0).getNode())) {
+    LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0));
+    if (!LN->isVolatile()) {
+      SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+      SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+      SDValue VZLoad =
+          DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
+                                  VT.getVectorElementType(),
+                                  LN->getPointerInfo(),
+                                  LN->getAlignment(),
+                                  MachineMemOperand::MOLoad);
+      DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
+      return VZLoad;
+    }
+  }
+
+
   // Look for a truncating shuffle to v2i32 of a PMULUDQ where one of the
   // operands is an extend from v2i32 to v2i64. Turn it into a pmulld.
   // FIXME: This can probably go away once we default to widening legalization.
@@ -32436,6 +33914,22 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
 
   // Handle special case opcodes.
   switch (Opc) {
+  case X86ISD::PMULDQ:
+  case X86ISD::PMULUDQ: {
+    APInt LHSUndef, LHSZero;
+    APInt RHSUndef, RHSZero;
+    SDValue LHS = Op.getOperand(0);
+    SDValue RHS = Op.getOperand(1);
+    if (SimplifyDemandedVectorElts(LHS, DemandedElts, LHSUndef, LHSZero, TLO,
+                                   Depth + 1))
+      return true;
+    if (SimplifyDemandedVectorElts(RHS, DemandedElts, RHSUndef, RHSZero, TLO,
+                                   Depth + 1))
+      return true;
+    // Multiply by zero.
+    KnownZero = LHSZero | RHSZero;
+    break;
+  }
   case X86ISD::VSHL:
   case X86ISD::VSRL:
   case X86ISD::VSRA: {
@@ -32443,11 +33937,21 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
     SDValue Amt = Op.getOperand(1);
     MVT AmtVT = Amt.getSimpleValueType();
     assert(AmtVT.is128BitVector() && "Unexpected value type");
+
+    // If we reuse the shift amount just for sse shift amounts then we know that
+    // only the bottom 64-bits are only ever used.
+    bool AssumeSingleUse = llvm::all_of(Amt->uses(), [&Amt](SDNode *Use) {
+      unsigned UseOpc = Use->getOpcode();
+      return (UseOpc == X86ISD::VSHL || UseOpc == X86ISD::VSRL ||
+              UseOpc == X86ISD::VSRA) &&
+             Use->getOperand(0) != Amt;
+    });
+
     APInt AmtUndef, AmtZero;
     unsigned NumAmtElts = AmtVT.getVectorNumElements();
     APInt AmtElts = APInt::getLowBitsSet(NumAmtElts, NumAmtElts / 2);
     if (SimplifyDemandedVectorElts(Amt, AmtElts, AmtUndef, AmtZero, TLO,
-                                   Depth + 1))
+                                   Depth + 1, AssumeSingleUse))
       return true;
     LLVM_FALLTHROUGH;
   }
@@ -32487,6 +33991,58 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
       return true;
     break;
   }
+  case X86ISD::HADD:
+  case X86ISD::HSUB:
+  case X86ISD::FHADD:
+  case X86ISD::FHSUB: {
+    APInt DemandedLHS, DemandedRHS;
+    getHorizDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);
+
+    APInt LHSUndef, LHSZero;
+    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, LHSUndef,
+                                   LHSZero, TLO, Depth + 1))
+      return true;
+    APInt RHSUndef, RHSZero;
+    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, RHSUndef,
+                                   RHSZero, TLO, Depth + 1))
+      return true;
+    break;
+  }
+  case X86ISD::VTRUNC:
+  case X86ISD::VTRUNCS:
+  case X86ISD::VTRUNCUS: {
+    SDValue Src = Op.getOperand(0);
+    MVT SrcVT = Src.getSimpleValueType();
+    APInt DemandedSrc = DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements());
+    APInt SrcUndef, SrcZero;
+    if (SimplifyDemandedVectorElts(Src, DemandedSrc, SrcUndef, SrcZero, TLO,
+                                   Depth + 1))
+      return true;
+    KnownZero = SrcZero.zextOrTrunc(NumElts);
+    KnownUndef = SrcUndef.zextOrTrunc(NumElts);
+    break;
+  }
+  case X86ISD::BLENDV: {
+    APInt SelUndef, SelZero;
+    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, SelUndef,
+                                   SelZero, TLO, Depth + 1))
+      return true;
+
+    // TODO: Use SelZero to adjust LHS/RHS DemandedElts.
+    APInt LHSUndef, LHSZero;
+    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, LHSUndef,
+                                   LHSZero, TLO, Depth + 1))
+      return true;
+
+    APInt RHSUndef, RHSZero;
+    if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedElts, RHSUndef,
+                                   RHSZero, TLO, Depth + 1))
+      return true;
+
+    KnownZero = LHSZero & RHSZero;
+    KnownUndef = LHSUndef & RHSUndef;
+    break;
+  }
   case X86ISD::VBROADCAST: {
     SDValue Src = Op.getOperand(0);
     MVT SrcVT = Src.getSimpleValueType();
@@ -32494,7 +34050,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
       return false;
     // Don't bother broadcasting if we just need the 0'th element.
     if (DemandedElts == 1) {
-      if(Src.getValueType() != VT)
+      if (Src.getValueType() != VT)
         Src = widenSubVector(VT.getSimpleVT(), Src, false, Subtarget, TLO.DAG,
                              SDLoc(Op));
       return TLO.CombineTo(Op, Src);
@@ -32506,8 +34062,36 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
       return true;
     break;
   }
-  case X86ISD::PSHUFB: {
-    // TODO - simplify other variable shuffle masks.
+  case X86ISD::SUBV_BROADCAST: {
+    // Reduce size of broadcast if we don't need the upper half.
+    unsigned HalfElts = NumElts / 2;
+    if (DemandedElts.extractBits(HalfElts, HalfElts).isNullValue()) {
+      SDValue Src = Op.getOperand(0);
+      MVT SrcVT = Src.getSimpleValueType();
+
+      SDValue Half = Src;
+      if (SrcVT.getVectorNumElements() != HalfElts) {
+        MVT HalfVT = MVT::getVectorVT(SrcVT.getScalarType(), HalfElts);
+        Half = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, SDLoc(Op), HalfVT, Src);
+      }
+
+      return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Half, 0,
+                                               TLO.DAG, SDLoc(Op),
+                                               Half.getValueSizeInBits()));
+    }
+    break;
+  }
+  case X86ISD::VPERMV: {
+    SDValue Mask = Op.getOperand(0);
+    APInt MaskUndef, MaskZero;
+    if (SimplifyDemandedVectorElts(Mask, DemandedElts, MaskUndef, MaskZero, TLO,
+                                   Depth + 1))
+      return true;
+    break;
+  }
+  case X86ISD::PSHUFB:
+  case X86ISD::VPERMV3:
+  case X86ISD::VPERMILPV: {
     SDValue Mask = Op.getOperand(1);
     APInt MaskUndef, MaskZero;
     if (SimplifyDemandedVectorElts(Mask, DemandedElts, MaskUndef, MaskZero, TLO,
@@ -32515,6 +34099,106 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
       return true;
     break;
   }
+  case X86ISD::VPPERM:
+  case X86ISD::VPERMIL2: {
+    SDValue Mask = Op.getOperand(2);
+    APInt MaskUndef, MaskZero;
+    if (SimplifyDemandedVectorElts(Mask, DemandedElts, MaskUndef, MaskZero, TLO,
+                                   Depth + 1))
+      return true;
+    break;
+  }
+  }
+
+  // For 256/512-bit ops that are 128/256-bit ops glued together, if we do not
+  // demand any of the high elements, then narrow the op to 128/256-bits: e.g.
+  // (op ymm0, ymm1) --> insert undef, (op xmm0, xmm1), 0
+  if ((VT.is256BitVector() || VT.is512BitVector()) &&
+      DemandedElts.lshr(NumElts / 2) == 0) {
+    unsigned SizeInBits = VT.getSizeInBits();
+    unsigned ExtSizeInBits = SizeInBits / 2;
+
+    // See if 512-bit ops only use the bottom 128-bits.
+    if (VT.is512BitVector() && DemandedElts.lshr(NumElts / 4) == 0)
+      ExtSizeInBits = SizeInBits / 4;
+
+    switch (Opc) {
+      // Zero upper elements.
+    case X86ISD::VZEXT_MOVL: {
+      SDLoc DL(Op);
+      SDValue Ext0 =
+          extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
+      SDValue ExtOp =
+          TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0);
+      SDValue UndefVec = TLO.DAG.getUNDEF(VT);
+      SDValue Insert =
+          insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
+      return TLO.CombineTo(Op, Insert);
+    }
+      // Byte shifts by immediate.
+    case X86ISD::VSHLDQ:
+    case X86ISD::VSRLDQ:
+      // Shift by uniform.
+    case X86ISD::VSHL:
+    case X86ISD::VSRL:
+    case X86ISD::VSRA:
+      // Shift by immediate.
+    case X86ISD::VSHLI:
+    case X86ISD::VSRLI:
+    case X86ISD::VSRAI: {
+      SDLoc DL(Op);
+      SDValue Ext0 =
+          extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
+      SDValue ExtOp =
+          TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0, Op.getOperand(1));
+      SDValue UndefVec = TLO.DAG.getUNDEF(VT);
+      SDValue Insert =
+          insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
+      return TLO.CombineTo(Op, Insert);
+    }
+    case X86ISD::VPERMI: {
+      // Simplify PERMPD/PERMQ to extract_subvector.
+      // TODO: This should be done in shuffle combining.
+      if (VT == MVT::v4f64 || VT == MVT::v4i64) {
+        SmallVector<int, 4> Mask;
+        DecodeVPERMMask(NumElts, Op.getConstantOperandVal(1), Mask);
+        if (isUndefOrEqual(Mask[0], 2) && isUndefOrEqual(Mask[1], 3)) {
+          SDLoc DL(Op);
+          SDValue Ext = extractSubVector(Op.getOperand(0), 2, TLO.DAG, DL, 128);
+          SDValue UndefVec = TLO.DAG.getUNDEF(VT);
+          SDValue Insert = insertSubVector(UndefVec, Ext, 0, TLO.DAG, DL, 128);
+          return TLO.CombineTo(Op, Insert);
+        }
+      }
+      break;
+    }
+      // Target Shuffles.
+    case X86ISD::PSHUFB:
+    case X86ISD::UNPCKL:
+    case X86ISD::UNPCKH:
+      // Saturated Packs.
+    case X86ISD::PACKSS:
+    case X86ISD::PACKUS:
+      // Horizontal Ops.
+    case X86ISD::HADD:
+    case X86ISD::HSUB:
+    case X86ISD::FHADD:
+    case X86ISD::FHSUB: {
+      SDLoc DL(Op);
+      MVT ExtVT = VT.getSimpleVT();
+      ExtVT = MVT::getVectorVT(ExtVT.getScalarType(),
+                               ExtSizeInBits / ExtVT.getScalarSizeInBits());
+      SDValue Ext0 =
+          extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
+      SDValue Ext1 =
+          extractSubVector(Op.getOperand(1), 0, TLO.DAG, DL, ExtSizeInBits);
+      SDValue ExtOp = TLO.DAG.getNode(Opc, DL, ExtVT, Ext0, Ext1);
+      SDValue UndefVec = TLO.DAG.getUNDEF(VT);
+      SDValue Insert =
+          insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
+      return TLO.CombineTo(Op, Insert);
+    }
+    }
   }
 
   // Simplify target shuffles.
@@ -32606,9 +34290,11 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
     SDValue RHS = Op.getOperand(1);
     // FIXME: Can we bound this better?
     APInt DemandedMask = APInt::getLowBitsSet(64, 32);
-    if (SimplifyDemandedBits(LHS, DemandedMask, KnownOp, TLO, Depth + 1))
+    if (SimplifyDemandedBits(LHS, DemandedMask, OriginalDemandedElts, KnownOp,
+                             TLO, Depth + 1))
       return true;
-    if (SimplifyDemandedBits(RHS, DemandedMask, KnownOp, TLO, Depth + 1))
+    if (SimplifyDemandedBits(RHS, DemandedMask, OriginalDemandedElts, KnownOp,
+                             TLO, Depth + 1))
       return true;
     break;
   }
@@ -32727,6 +34413,97 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
     }
     break;
   }
+  case X86ISD::PEXTRB:
+  case X86ISD::PEXTRW: {
+    SDValue Vec = Op.getOperand(0);
+    auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+    MVT VecVT = Vec.getSimpleValueType();
+    unsigned NumVecElts = VecVT.getVectorNumElements();
+
+    if (CIdx && CIdx->getAPIntValue().ult(NumVecElts)) {
+      unsigned Idx = CIdx->getZExtValue();
+      unsigned VecBitWidth = VecVT.getScalarSizeInBits();
+
+      // If we demand no bits from the vector then we must have demanded
+      // bits from the implict zext - simplify to zero.
+      APInt DemandedVecBits = OriginalDemandedBits.trunc(VecBitWidth);
+      if (DemandedVecBits == 0)
+        return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+
+      APInt KnownUndef, KnownZero;
+      APInt DemandedVecElts = APInt::getOneBitSet(NumVecElts, Idx);
+      if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
+                                     KnownZero, TLO, Depth + 1))
+        return true;
+
+      KnownBits KnownVec;
+      if (SimplifyDemandedBits(Vec, DemandedVecBits, DemandedVecElts,
+                               KnownVec, TLO, Depth + 1))
+        return true;
+
+      Known = KnownVec.zext(BitWidth, true);
+      return false;
+    }
+    break;
+  }
+  case X86ISD::PINSRB:
+  case X86ISD::PINSRW: {
+    SDValue Vec = Op.getOperand(0);
+    SDValue Scl = Op.getOperand(1);
+    auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+    MVT VecVT = Vec.getSimpleValueType();
+
+    if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
+      unsigned Idx = CIdx->getZExtValue();
+      if (!OriginalDemandedElts[Idx])
+        return TLO.CombineTo(Op, Vec);
+
+      KnownBits KnownVec;
+      APInt DemandedVecElts(OriginalDemandedElts);
+      DemandedVecElts.clearBit(Idx);
+      if (SimplifyDemandedBits(Vec, OriginalDemandedBits, DemandedVecElts,
+                               KnownVec, TLO, Depth + 1))
+        return true;
+
+      KnownBits KnownScl;
+      unsigned NumSclBits = Scl.getScalarValueSizeInBits();
+      APInt DemandedSclBits = OriginalDemandedBits.zext(NumSclBits);
+      if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
+        return true;
+
+      KnownScl = KnownScl.trunc(VecVT.getScalarSizeInBits());
+      Known.One = KnownVec.One & KnownScl.One;
+      Known.Zero = KnownVec.Zero & KnownScl.Zero;
+      return false;
+    }
+    break;
+  }
+  case X86ISD::PACKSS:
+    // PACKSS saturates to MIN/MAX integer values. So if we just want the
+    // sign bit then we can just ask for the source operands sign bit.
+    // TODO - add known bits handling.
+    if (OriginalDemandedBits.isSignMask()) {
+      APInt DemandedLHS, DemandedRHS;
+      getPackDemandedElts(VT, OriginalDemandedElts, DemandedLHS, DemandedRHS);
+
+      KnownBits KnownLHS, KnownRHS;
+      APInt SignMask = APInt::getSignMask(BitWidth * 2);
+      if (SimplifyDemandedBits(Op.getOperand(0), SignMask, DemandedLHS,
+                               KnownLHS, TLO, Depth + 1))
+        return true;
+      if (SimplifyDemandedBits(Op.getOperand(1), SignMask, DemandedRHS,
+                               KnownRHS, TLO, Depth + 1))
+        return true;
+    }
+    // TODO - add general PACKSS/PACKUS SimplifyDemandedBits support.
+    break;
+  case X86ISD::PCMPGT:
+    // icmp sgt(0, R) == ashr(R, BitWidth-1).
+    // iff we only need the sign bit then we can use R directly.
+    if (OriginalDemandedBits.isSignMask() &&
+        ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()))
+      return TLO.CombineTo(Op, Op.getOperand(1));
+    break;
   case X86ISD::MOVMSK: {
     SDValue Src = Op.getOperand(0);
     MVT SrcVT = Src.getSimpleValueType();
@@ -32868,29 +34645,42 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
                      EltNo);
 }
 
+// Helper to peek through bitops/setcc to determine size of source vector.
+// Allows combineBitcastvxi1 to determine what size vector generated a <X x i1>.
+static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size) {
+  switch (Src.getOpcode()) {
+  case ISD::SETCC:
+    return Src.getOperand(0).getValueSizeInBits() == Size;
+  case ISD::AND:
+  case ISD::XOR:
+  case ISD::OR:
+    return checkBitcastSrcVectorSize(Src.getOperand(0), Size) &&
+           checkBitcastSrcVectorSize(Src.getOperand(1), Size);
+  }
+  return false;
+}
+
 // Try to match patterns such as
 // (i16 bitcast (v16i1 x))
 // ->
 // (i16 movmsk (16i8 sext (v16i1 x)))
 // before the illegal vector is scalarized on subtargets that don't have legal
 // vxi1 types.
-static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
+static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
+                                  const SDLoc &DL,
                                   const X86Subtarget &Subtarget) {
-  EVT VT = BitCast.getValueType();
-  SDValue N0 = BitCast.getOperand(0);
-  EVT VecVT = N0->getValueType(0);
-
-  if (!VT.isScalarInteger() || !VecVT.isSimple())
+  EVT SrcVT = Src.getValueType();
+  if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
     return SDValue();
 
   // If the input is a truncate from v16i8 or v32i8 go ahead and use a
   // movmskb even with avx512. This will be better than truncating to vXi1 and
   // using a kmov. This can especially help KNL if the input is a v16i8/v32i8
   // vpcmpeqb/vpcmpgtb.
-  bool IsTruncated = N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
-                     (N0.getOperand(0).getValueType() == MVT::v16i8 ||
-                      N0.getOperand(0).getValueType() == MVT::v32i8 ||
-                      N0.getOperand(0).getValueType() == MVT::v64i8);
+  bool IsTruncated = Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse() &&
+                     (Src.getOperand(0).getValueType() == MVT::v16i8 ||
+                      Src.getOperand(0).getValueType() == MVT::v32i8 ||
+                      Src.getOperand(0).getValueType() == MVT::v64i8);
 
   // With AVX512 vxi1 types are legal and we prefer using k-regs.
   // MOVMSK is supported in SSE2 or later.
@@ -32908,7 +34698,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
   // For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as:
   // (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef)
   MVT SExtVT;
-  switch (VecVT.getSimpleVT().SimpleTy) {
+  switch (SrcVT.getSimpleVT().SimpleTy) {
   default:
     return SDValue();
   case MVT::v2i1:
@@ -32918,10 +34708,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
     SExtVT = MVT::v4i32;
     // For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
     // sign-extend to a 256-bit operation to avoid truncation.
-    if (N0->getOpcode() == ISD::SETCC && Subtarget.hasAVX() &&
-        N0->getOperand(0).getValueType().is256BitVector()) {
+    if (Subtarget.hasAVX() && checkBitcastSrcVectorSize(Src, 256))
       SExtVT = MVT::v4i64;
-    }
     break;
   case MVT::v8i1:
     SExtVT = MVT::v8i16;
@@ -32930,9 +34718,10 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
     // If the setcc operand is 128-bit, prefer sign-extending to 128-bit over
     // 256-bit because the shuffle is cheaper than sign extending the result of
     // the compare.
-    if (N0->getOpcode() == ISD::SETCC && Subtarget.hasAVX() &&
-        (N0->getOperand(0).getValueType().is256BitVector() ||
-         N0->getOperand(0).getValueType().is512BitVector())) {
+    // TODO : use checkBitcastSrcVectorSize
+    if (Src.getOpcode() == ISD::SETCC && Subtarget.hasAVX() &&
+        (Src.getOperand(0).getValueType().is256BitVector() ||
+         Src.getOperand(0).getValueType().is512BitVector())) {
       SExtVT = MVT::v8i32;
     }
     break;
@@ -32956,8 +34745,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
     return SDValue();
   };
 
-  SDLoc DL(BitCast);
-  SDValue V = DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, N0);
+  SDValue V = DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
 
   if (SExtVT == MVT::v64i8) {
     SDValue Lo, Hi;
@@ -32977,7 +34765,11 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
                       DAG.getUNDEF(MVT::v8i16));
     V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
   }
-  return DAG.getZExtOrTrunc(V, DL, VT);
+
+  EVT IntVT =
+      EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements());
+  V = DAG.getZExtOrTrunc(V, DL, IntVT);
+  return DAG.getBitcast(VT, V);
 }
 
 // Convert a vXi1 constant build vector to the same width scalar integer.
@@ -33054,12 +34846,10 @@ static SDValue combineCastedMaskArithmetic(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-static SDValue createMMXBuildVector(SDValue N, SelectionDAG &DAG,
+static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG,
                                     const X86Subtarget &Subtarget) {
-  SDLoc DL(N);
-  unsigned NumElts = N.getNumOperands();
-
-  auto *BV = cast<BuildVectorSDNode>(N);
+  SDLoc DL(BV);
+  unsigned NumElts = BV->getNumOperands();
   SDValue Splat = BV->getSplatValue();
 
   // Build MMX element from integer GPR or SSE float values.
@@ -33107,7 +34897,7 @@ static SDValue createMMXBuildVector(SDValue N, SelectionDAG &DAG,
     Ops.append(NumElts, Splat);
   } else {
     for (unsigned i = 0; i != NumElts; ++i)
-      Ops.push_back(CreateMMXElement(N.getOperand(i)));
+      Ops.push_back(CreateMMXElement(BV->getOperand(i)));
   }
 
   // Use tree of PUNPCKLs to build up general MMX vector.
@@ -33141,14 +34931,14 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
   // before the setcc result is scalarized on subtargets that don't have legal
   // vxi1 types.
   if (DCI.isBeforeLegalize()) {
-    if (SDValue V = combineBitcastvxi1(DAG, SDValue(N, 0), Subtarget))
+    SDLoc dl(N);
+    if (SDValue V = combineBitcastvxi1(DAG, VT, N0, dl, Subtarget))
       return V;
 
     // If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer
     // type, widen both sides to avoid a trip through memory.
     if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() &&
         Subtarget.hasAVX512()) {
-      SDLoc dl(N);
       N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, N0);
       N0 = DAG.getBitcast(MVT::v8i1, N0);
       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, N0,
@@ -33159,7 +34949,6 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
     // type, widen both sides to avoid a trip through memory.
     if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() &&
         Subtarget.hasAVX512()) {
-      SDLoc dl(N);
       unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
       SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
       Ops[0] = N0;
@@ -33213,7 +35002,7 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
     if (N0.getOpcode() == ISD::BUILD_VECTOR &&
         (SrcVT == MVT::v2f32 || SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 ||
          SrcVT == MVT::v8i8))
-      return createMMXBuildVector(N0, DAG, Subtarget);
+      return createMMXBuildVector(cast<BuildVectorSDNode>(N0), DAG, Subtarget);
 
     // Detect bitcasts between element or subvector extraction to x86mmx.
     if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
@@ -33297,66 +35086,16 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-// Given a select, detect the following pattern:
-// 1:    %2 = zext <N x i8> %0 to <N x i32>
-// 2:    %3 = zext <N x i8> %1 to <N x i32>
-// 3:    %4 = sub nsw <N x i32> %2, %3
-// 4:    %5 = icmp sgt <N x i32> %4, [0 x N] or [-1 x N]
-// 5:    %6 = sub nsw <N x i32> zeroinitializer, %4
-// 6:    %7 = select <N x i1> %5, <N x i32> %4, <N x i32> %6
+// Given a ABS node, detect the following pattern:
+// (ABS (SUB (ZERO_EXTEND a), (ZERO_EXTEND b))).
 // This is useful as it is the input into a SAD pattern.
-static bool detectZextAbsDiff(const SDValue &Select, SDValue &Op0,
-                              SDValue &Op1) {
-  // Check the condition of the select instruction is greater-than.
-  SDValue SetCC = Select->getOperand(0);
-  if (SetCC.getOpcode() != ISD::SETCC)
-    return false;
-  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
-  if (CC != ISD::SETGT && CC != ISD::SETLT)
+static bool detectZextAbsDiff(const SDValue &Abs, SDValue &Op0, SDValue &Op1) {
+  SDValue AbsOp1 = Abs->getOperand(0);
+  if (AbsOp1.getOpcode() != ISD::SUB)
     return false;
 
-  SDValue SelectOp1 = Select->getOperand(1);
-  SDValue SelectOp2 = Select->getOperand(2);
-
-  // The following instructions assume SelectOp1 is the subtraction operand
-  // and SelectOp2 is the negation operand.
-  // In the case of SETLT this is the other way around.
-  if (CC == ISD::SETLT)
-    std::swap(SelectOp1, SelectOp2);
-
-  // The second operand of the select should be the negation of the first
-  // operand, which is implemented as 0 - SelectOp1.
-  if (!(SelectOp2.getOpcode() == ISD::SUB &&
-        ISD::isBuildVectorAllZeros(SelectOp2.getOperand(0).getNode()) &&
-        SelectOp2.getOperand(1) == SelectOp1))
-    return false;
-
-  // The first operand of SetCC is the first operand of the select, which is the
-  // difference between the two input vectors.
-  if (SetCC.getOperand(0) != SelectOp1)
-    return false;
-
-  // In SetLT case, The second operand of the comparison can be either 1 or 0.
-  APInt SplatVal;
-  if ((CC == ISD::SETLT) &&
-      !((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal) &&
-         SplatVal.isOneValue()) ||
-        (ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()))))
-    return false;
-
-  // In SetGT case, The second operand of the comparison can be either -1 or 0.
-  if ((CC == ISD::SETGT) &&
-      !(ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()) ||
-        ISD::isBuildVectorAllOnes(SetCC.getOperand(1).getNode())))
-    return false;
-
-  // The first operand of the select is the difference between the two input
-  // vectors.
-  if (SelectOp1.getOpcode() != ISD::SUB)
-    return false;
-
-  Op0 = SelectOp1.getOperand(0);
-  Op1 = SelectOp1.getOperand(1);
+  Op0 = AbsOp1.getOperand(0);
+  Op1 = AbsOp1.getOperand(1);
 
   // Check if the operands of the sub are zero-extended from vectors of i8.
   if (Op0.getOpcode() != ISD::ZERO_EXTEND ||
@@ -33476,23 +35215,25 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
                      DAG.getIntPtrConstant(0, DL));
 }
 
-// Attempt to replace an all_of/any_of style horizontal reduction with a MOVMSK.
+// Attempt to replace an all_of/any_of/parity style horizontal reduction with a MOVMSK.
 static SDValue combineHorizontalPredicateResult(SDNode *Extract,
                                                 SelectionDAG &DAG,
                                                 const X86Subtarget &Subtarget) {
-  // Bail without SSE2 or with AVX512VL (which uses predicate registers).
-  if (!Subtarget.hasSSE2() || Subtarget.hasVLX())
+  // Bail without SSE2.
+  if (!Subtarget.hasSSE2())
     return SDValue();
 
   EVT ExtractVT = Extract->getValueType(0);
   unsigned BitWidth = ExtractVT.getSizeInBits();
   if (ExtractVT != MVT::i64 && ExtractVT != MVT::i32 && ExtractVT != MVT::i16 &&
-      ExtractVT != MVT::i8)
+      ExtractVT != MVT::i8 && ExtractVT != MVT::i1)
     return SDValue();
 
-  // Check for OR(any_of) and AND(all_of) horizontal reduction patterns.
+  // Check for OR(any_of)/AND(all_of)/XOR(parity) horizontal reduction patterns.
   ISD::NodeType BinOp;
   SDValue Match = DAG.matchBinOpReduction(Extract, BinOp, {ISD::OR, ISD::AND});
+  if (!Match && ExtractVT == MVT::i1)
+    Match = DAG.matchBinOpReduction(Extract, BinOp, {ISD::XOR});
   if (!Match)
     return SDValue();
 
@@ -33501,53 +35242,104 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract,
   if (Match.getScalarValueSizeInBits() != BitWidth)
     return SDValue();
 
-  // We require AVX2 for PMOVMSKB for v16i16/v32i8;
-  unsigned MatchSizeInBits = Match.getValueSizeInBits();
-  if (!(MatchSizeInBits == 128 ||
-        (MatchSizeInBits == 256 &&
-         ((Subtarget.hasAVX() && BitWidth >= 32) || Subtarget.hasAVX2()))))
-    return SDValue();
+  SDValue Movmsk;
+  SDLoc DL(Extract);
+  EVT MatchVT = Match.getValueType();
+  unsigned NumElts = MatchVT.getVectorNumElements();
 
-  // Don't bother performing this for 2-element vectors.
-  if (Match.getValueType().getVectorNumElements() <= 2)
-    return SDValue();
+  if (ExtractVT == MVT::i1) {
+    // Special case for (pre-legalization) vXi1 reductions.
+    if (NumElts > 32)
+      return SDValue();
+    if (DAG.getTargetLoweringInfo().isTypeLegal(MatchVT)) {
+      // If this is a legal AVX512 predicate type then we can just bitcast.
+      EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
+      Movmsk = DAG.getBitcast(MovmskVT, Match);
+    } else {
+      // Use combineBitcastvxi1 to create the MOVMSK.
+      if (NumElts == 32 && !Subtarget.hasInt256()) {
+        SDValue Lo, Hi;
+        std::tie(Lo, Hi) = DAG.SplitVector(Match, DL);
+        Match = DAG.getNode(BinOp, DL, Lo.getValueType(), Lo, Hi);
+        NumElts = 16;
+      }
+      EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
+      Movmsk = combineBitcastvxi1(DAG, MovmskVT, Match, DL, Subtarget);
+    }
+    if (!Movmsk)
+      return SDValue();
+    Movmsk = DAG.getZExtOrTrunc(Movmsk, DL, MVT::i32);
+  } else {
+    // Bail with AVX512VL (which uses predicate registers).
+    if (Subtarget.hasVLX())
+      return SDValue();
 
-  // Check that we are extracting a reduction of all sign bits.
-  if (DAG.ComputeNumSignBits(Match) != BitWidth)
-    return SDValue();
+    unsigned MatchSizeInBits = Match.getValueSizeInBits();
+    if (!(MatchSizeInBits == 128 ||
+          (MatchSizeInBits == 256 && Subtarget.hasAVX())))
+      return SDValue();
 
-  // For 32/64 bit comparisons use MOVMSKPS/MOVMSKPD, else PMOVMSKB.
-  MVT MaskVT;
-  if (64 == BitWidth || 32 == BitWidth)
-    MaskVT = MVT::getVectorVT(MVT::getFloatingPointVT(BitWidth),
-                              MatchSizeInBits / BitWidth);
-  else
-    MaskVT = MVT::getVectorVT(MVT::i8, MatchSizeInBits / 8);
+    // Make sure this isn't a vector of 1 element. The perf win from using
+    // MOVMSK diminishes with less elements in the reduction, but it is
+    // generally better to get the comparison over to the GPRs as soon as
+    // possible to reduce the number of vector ops.
+    if (Match.getValueType().getVectorNumElements() < 2)
+      return SDValue();
 
-  APInt CompareBits;
+    // Check that we are extracting a reduction of all sign bits.
+    if (DAG.ComputeNumSignBits(Match) != BitWidth)
+      return SDValue();
+
+    if (MatchSizeInBits == 256 && BitWidth < 32 && !Subtarget.hasInt256()) {
+      SDValue Lo, Hi;
+      std::tie(Lo, Hi) = DAG.SplitVector(Match, DL);
+      Match = DAG.getNode(BinOp, DL, Lo.getValueType(), Lo, Hi);
+      MatchSizeInBits = Match.getValueSizeInBits();
+    }
+
+    // For 32/64 bit comparisons use MOVMSKPS/MOVMSKPD, else PMOVMSKB.
+    MVT MaskSrcVT;
+    if (64 == BitWidth || 32 == BitWidth)
+      MaskSrcVT = MVT::getVectorVT(MVT::getFloatingPointVT(BitWidth),
+                                   MatchSizeInBits / BitWidth);
+    else
+      MaskSrcVT = MVT::getVectorVT(MVT::i8, MatchSizeInBits / 8);
+
+    SDValue BitcastLogicOp = DAG.getBitcast(MaskSrcVT, Match);
+    Movmsk = getPMOVMSKB(DL, BitcastLogicOp, DAG, Subtarget);
+    NumElts = MaskSrcVT.getVectorNumElements();
+  }
+  assert(NumElts <= 32 && "Not expecting more than 32 elements");
+
+  if (BinOp == ISD::XOR) {
+    // parity -> (AND (CTPOP(MOVMSK X)), 1)
+    SDValue Mask = DAG.getConstant(1, DL, MVT::i32);
+    SDValue Result = DAG.getNode(ISD::CTPOP, DL, MVT::i32, Movmsk);
+    Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result, Mask);
+    return DAG.getZExtOrTrunc(Result, DL, ExtractVT);
+  }
+
+  SDValue CmpC;
   ISD::CondCode CondCode;
   if (BinOp == ISD::OR) {
     // any_of -> MOVMSK != 0
-    CompareBits = APInt::getNullValue(32);
+    CmpC = DAG.getConstant(0, DL, MVT::i32);
     CondCode = ISD::CondCode::SETNE;
   } else {
     // all_of -> MOVMSK == ((1 << NumElts) - 1)
-    CompareBits = APInt::getLowBitsSet(32, MaskVT.getVectorNumElements());
+    CmpC = DAG.getConstant((1ULL << NumElts) - 1, DL, MVT::i32);
     CondCode = ISD::CondCode::SETEQ;
   }
 
-  // Perform the select as i32/i64 and then truncate to avoid partial register
-  // stalls.
-  unsigned ResWidth = std::max(BitWidth, 32u);
-  EVT ResVT = EVT::getIntegerVT(*DAG.getContext(), ResWidth);
-  SDLoc DL(Extract);
-  SDValue Zero = DAG.getConstant(0, DL, ResVT);
-  SDValue Ones = DAG.getAllOnesConstant(DL, ResVT);
-  SDValue Res = DAG.getBitcast(MaskVT, Match);
-  Res = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Res);
-  Res = DAG.getSelectCC(DL, Res, DAG.getConstant(CompareBits, DL, MVT::i32),
-                        Ones, Zero, CondCode);
-  return DAG.getSExtOrTrunc(Res, DL, ExtractVT);
+  // The setcc produces an i8 of 0/1, so extend that to the result width and
+  // negate to get the final 0/-1 mask value.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT SetccVT =
+      TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32);
+  SDValue Setcc = DAG.getSetCC(DL, SetccVT, Movmsk, CmpC, CondCode);
+  SDValue Zext = DAG.getZExtOrTrunc(Setcc, DL, ExtractVT);
+  SDValue Zero = DAG.getConstant(0, DL, ExtractVT);
+  return DAG.getNode(ISD::SUB, DL, ExtractVT, Zero, Zext);
 }
 
 static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
@@ -33592,7 +35384,7 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
 
   // If there was a match, we want Root to be a select that is the root of an
   // abs-diff pattern.
-  if (!Root || (Root.getOpcode() != ISD::VSELECT))
+  if (!Root || Root.getOpcode() != ISD::ABS)
     return SDValue();
 
   // Check whether we have an abs-diff pattern feeding into the select.
@@ -33651,15 +35443,19 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
   if (SrcSVT == MVT::i1 || !isa<ConstantSDNode>(Idx))
     return SDValue();
 
+  SDValue SrcBC = peekThroughBitcasts(Src);
+
   // Handle extract(broadcast(scalar_value)), it doesn't matter what index is.
-  if (X86ISD::VBROADCAST == Src.getOpcode() &&
-      Src.getOperand(0).getValueType() == VT)
-    return Src.getOperand(0);
+  if (X86ISD::VBROADCAST == SrcBC.getOpcode()) {
+    SDValue SrcOp = SrcBC.getOperand(0);
+    if (SrcOp.getValueSizeInBits() == VT.getSizeInBits())
+      return DAG.getBitcast(VT, SrcOp);
+  }
 
   // Resolve the target shuffle inputs and mask.
   SmallVector<int, 16> Mask;
   SmallVector<SDValue, 2> Ops;
-  if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask, DAG))
+  if (!resolveTargetShuffleInputs(SrcBC, Ops, Mask, DAG))
     return SDValue();
 
   // Attempt to narrow/widen the shuffle mask to the correct size.
@@ -33704,7 +35500,6 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
                                 : DAG.getConstant(0, dl, VT);
 
   SDValue SrcOp = Ops[SrcIdx / Mask.size()];
-  SrcOp = DAG.getBitcast(SrcVT, SrcOp);
   SrcIdx = SrcIdx % Mask.size();
 
   // We can only extract other elements from 128-bit vectors and in certain
@@ -33714,6 +35509,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
   if ((SrcVT == MVT::v4i32 || SrcVT == MVT::v2i64) &&
       ((SrcIdx == 0 && Subtarget.hasSSE2()) || Subtarget.hasSSE41())) {
     assert(SrcSVT == VT && "Unexpected extraction type");
+    SrcOp = DAG.getBitcast(SrcVT, SrcOp);
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcSVT, SrcOp,
                        DAG.getIntPtrConstant(SrcIdx, dl));
   }
@@ -33723,6 +35519,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
     assert(VT.getSizeInBits() >= SrcSVT.getSizeInBits() &&
            "Unexpected extraction type");
     unsigned OpCode = (SrcVT == MVT::v8i16 ? X86ISD::PEXTRW : X86ISD::PEXTRB);
+    SrcOp = DAG.getBitcast(SrcVT, SrcOp);
     SDValue ExtOp = DAG.getNode(OpCode, dl, MVT::i32, SrcOp,
                                 DAG.getIntPtrConstant(SrcIdx, dl));
     return DAG.getZExtOrTrunc(ExtOp, dl, VT);
@@ -33731,6 +35528,155 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+/// Extracting a scalar FP value from vector element 0 is free, so extract each
+/// operand first, then perform the math as a scalar op.
+static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) {
+  assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Expected extract");
+  SDValue Vec = ExtElt->getOperand(0);
+  SDValue Index = ExtElt->getOperand(1);
+  EVT VT = ExtElt->getValueType(0);
+  EVT VecVT = Vec.getValueType();
+
+  // TODO: If this is a unary/expensive/expand op, allow extraction from a
+  // non-zero element because the shuffle+scalar op will be cheaper?
+  if (!Vec.hasOneUse() || !isNullConstant(Index) || VecVT.getScalarType() != VT)
+    return SDValue();
+
+  // Vector FP compares don't fit the pattern of FP math ops (propagate, not
+  // extract, the condition code), so deal with those as a special-case.
+  if (Vec.getOpcode() == ISD::SETCC && VT == MVT::i1) {
+    EVT OpVT = Vec.getOperand(0).getValueType().getScalarType();
+    if (OpVT != MVT::f32 && OpVT != MVT::f64)
+      return SDValue();
+
+    // extract (setcc X, Y, CC), 0 --> setcc (extract X, 0), (extract Y, 0), CC
+    SDLoc DL(ExtElt);
+    SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT,
+                               Vec.getOperand(0), Index);
+    SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT,
+                               Vec.getOperand(1), Index);
+    return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1, Vec.getOperand(2));
+  }
+
+  if (VT != MVT::f32 && VT != MVT::f64)
+    return SDValue();
+
+  // Vector FP selects don't fit the pattern of FP math ops (because the
+  // condition has a different type and we have to change the opcode), so deal
+  // with those here.
+  // FIXME: This is restricted to pre type legalization by ensuring the setcc
+  // has i1 elements. If we loosen this we need to convert vector bool to a
+  // scalar bool.
+  if (Vec.getOpcode() == ISD::VSELECT &&
+      Vec.getOperand(0).getOpcode() == ISD::SETCC &&
+      Vec.getOperand(0).getValueType().getScalarType() == MVT::i1 &&
+      Vec.getOperand(0).getOperand(0).getValueType() == VecVT) {
+    // ext (sel Cond, X, Y), 0 --> sel (ext Cond, 0), (ext X, 0), (ext Y, 0)
+    SDLoc DL(ExtElt);
+    SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                               Vec.getOperand(0).getValueType().getScalarType(),
+                               Vec.getOperand(0), Index);
+    SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+                               Vec.getOperand(1), Index);
+    SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+                               Vec.getOperand(2), Index);
+    return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2);
+  }
+
+  // TODO: This switch could include FNEG and the x86-specific FP logic ops
+  // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid 
+  // missed load folding and fma+fneg combining.
+  switch (Vec.getOpcode()) {
+  case ISD::FMA: // Begin 3 operands
+  case ISD::FMAD:
+  case ISD::FADD: // Begin 2 operands
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::FDIV:
+  case ISD::FREM:
+  case ISD::FCOPYSIGN:
+  case ISD::FMINNUM:
+  case ISD::FMAXNUM:
+  case ISD::FMINNUM_IEEE:
+  case ISD::FMAXNUM_IEEE:
+  case ISD::FMAXIMUM:
+  case ISD::FMINIMUM:
+  case X86ISD::FMAX:
+  case X86ISD::FMIN:
+  case ISD::FABS: // Begin 1 operand
+  case ISD::FSQRT:
+  case ISD::FRINT:
+  case ISD::FCEIL:
+  case ISD::FTRUNC:
+  case ISD::FNEARBYINT:
+  case ISD::FROUND:
+  case ISD::FFLOOR:
+  case X86ISD::FRCP:
+  case X86ISD::FRSQRT: {
+    // extract (fp X, Y, ...), 0 --> fp (extract X, 0), (extract Y, 0), ...
+    SDLoc DL(ExtElt);
+    SmallVector<SDValue, 4> ExtOps;
+    for (SDValue Op : Vec->ops())
+      ExtOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, Index));
+    return DAG.getNode(Vec.getOpcode(), DL, VT, ExtOps);
+  }
+  default:
+    return SDValue();
+  }
+  llvm_unreachable("All opcodes should return within switch");
+}
+
+/// Try to convert a vector reduction sequence composed of binops and shuffles
+/// into horizontal ops.
+static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG,
+                                            const X86Subtarget &Subtarget) {
+  assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unexpected caller");
+  bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
+  if (!Subtarget.hasFastHorizontalOps() && !OptForSize)
+    return SDValue();
+  SDValue Index = ExtElt->getOperand(1);
+  if (!isNullConstant(Index))
+    return SDValue();
+
+  // TODO: Allow FADD with reduction and/or reassociation and no-signed-zeros.
+  ISD::NodeType Opc;
+  SDValue Rdx = DAG.matchBinOpReduction(ExtElt, Opc, {ISD::ADD});
+  if (!Rdx)
+    return SDValue();
+
+  EVT VT = ExtElt->getValueType(0);
+  EVT VecVT = ExtElt->getOperand(0).getValueType();
+  if (VecVT.getScalarType() != VT)
+    return SDValue();
+
+  unsigned HorizOpcode = Opc == ISD::ADD ? X86ISD::HADD : X86ISD::FHADD;
+  SDLoc DL(ExtElt);
+
+  // 256-bit horizontal instructions operate on 128-bit chunks rather than
+  // across the whole vector, so we need an extract + hop preliminary stage.
+  // This is the only step where the operands of the hop are not the same value.
+  // TODO: We could extend this to handle 512-bit or even longer vectors.
+  if (((VecVT == MVT::v16i16 || VecVT == MVT::v8i32) && Subtarget.hasSSSE3()) ||
+      ((VecVT == MVT::v8f32 || VecVT == MVT::v4f64) && Subtarget.hasSSE3())) {
+    unsigned NumElts = VecVT.getVectorNumElements();
+    SDValue Hi = extract128BitVector(Rdx, NumElts / 2, DAG, DL);
+    SDValue Lo = extract128BitVector(Rdx, 0, DAG, DL);
+    VecVT = EVT::getVectorVT(*DAG.getContext(), VT, NumElts / 2);
+    Rdx = DAG.getNode(HorizOpcode, DL, VecVT, Hi, Lo);
+  }
+  if (!((VecVT == MVT::v8i16 || VecVT == MVT::v4i32) && Subtarget.hasSSSE3()) &&
+      !((VecVT == MVT::v4f32 || VecVT == MVT::v2f64) && Subtarget.hasSSE3()))
+    return SDValue();
+
+  // extract (add (shuf X), X), 0 --> extract (hadd X, X), 0
+  assert(Rdx.getValueType() == VecVT && "Unexpected reduction match");
+  unsigned ReductionSteps = Log2_32(VecVT.getVectorNumElements());
+  for (unsigned i = 0; i != ReductionSteps; ++i)
+    Rdx = DAG.getNode(HorizOpcode, DL, VecVT, Rdx, Rdx);
+
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index);
+}
+
 /// Detect vector gather/scatter index generation and convert it from being a
 /// bunch of shuffles and extracts into a somewhat faster sequence.
 /// For i686, the best sequence is apparently storing the value and loading
@@ -33741,23 +35687,48 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
   if (SDValue NewOp = combineExtractWithShuffle(N, DAG, DCI, Subtarget))
     return NewOp;
 
+  SDValue InputVector = N->getOperand(0);
+  SDValue EltIdx = N->getOperand(1);
+  auto *CIdx = dyn_cast<ConstantSDNode>(EltIdx);
+
+  EVT SrcVT = InputVector.getValueType();
+  EVT VT = N->getValueType(0);
+  SDLoc dl(InputVector);
+  bool IsPextr = N->getOpcode() != ISD::EXTRACT_VECTOR_ELT;
+
+  if (CIdx && CIdx->getAPIntValue().uge(SrcVT.getVectorNumElements()))
+    return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT);
+
+  // Integer Constant Folding.
+  if (CIdx && VT.isInteger()) {
+    APInt UndefVecElts;
+    SmallVector<APInt, 16> EltBits;
+    unsigned VecEltBitWidth = SrcVT.getScalarSizeInBits();
+    if (getTargetConstantBitsFromNode(InputVector, VecEltBitWidth, UndefVecElts,
+                                      EltBits, true, false)) {
+      uint64_t Idx = CIdx->getZExtValue();
+      if (UndefVecElts[Idx])
+        return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT);
+      return DAG.getConstant(EltBits[Idx].zextOrSelf(VT.getScalarSizeInBits()),
+                             dl, VT);
+    }
+  }
+
   // TODO - Remove this once we can handle the implicit zero-extension of
   // X86ISD::PEXTRW/X86ISD::PEXTRB in:
   // XFormVExtractWithShuffleIntoLoad, combineHorizontalPredicateResult and
   // combineBasicSADPattern.
-  if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+  if (IsPextr) {
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+    if (TLI.SimplifyDemandedBits(
+            SDValue(N, 0), APInt::getAllOnesValue(VT.getSizeInBits()), DCI))
+      return SDValue(N, 0);
     return SDValue();
+  }
 
   if (SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI))
     return NewOp;
 
-  SDValue InputVector = N->getOperand(0);
-  SDValue EltIdx = N->getOperand(1);
-
-  EVT SrcVT = InputVector.getValueType();
-  EVT VT = N->getValueType(0);
-  SDLoc dl(InputVector);
-
   // Detect mmx extraction of all bits as a i64. It works better as a bitcast.
   if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
       VT == MVT::i64 && SrcVT == MVT::v1i64 && isNullConstant(EltIdx)) {
@@ -33778,16 +35749,6 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
       return DAG.getNode(X86ISD::MMX_MOVD2W, dl, MVT::i32, MMXSrc);
   }
 
-  if (VT == MVT::i1 && InputVector.getOpcode() == ISD::BITCAST &&
-      isa<ConstantSDNode>(EltIdx) &&
-      isa<ConstantSDNode>(InputVector.getOperand(0))) {
-    uint64_t ExtractedElt = N->getConstantOperandVal(1);
-    auto *InputC = cast<ConstantSDNode>(InputVector.getOperand(0));
-    const APInt &InputValue = InputC->getAPIntValue();
-    uint64_t Res = InputValue[ExtractedElt];
-    return DAG.getConstant(Res, dl, MVT::i1);
-  }
-
   // Check whether this extract is the root of a sum of absolute differences
   // pattern. This has to be done here because we really want it to happen
   // pre-legalization,
@@ -33802,6 +35763,45 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
   if (SDValue MinMax = combineHorizontalMinMaxResult(N, DAG, Subtarget))
     return MinMax;
 
+  if (SDValue V = combineReductionToHorizontal(N, DAG, Subtarget))
+    return V;
+
+  if (SDValue V = scalarizeExtEltFP(N, DAG))
+    return V;
+
+  // Attempt to extract a i1 element by using MOVMSK to extract the signbits
+  // and then testing the relevant element.
+  if (CIdx && SrcVT.getScalarType() == MVT::i1) {
+    SmallVector<SDNode *, 16> BoolExtracts;
+    auto IsBoolExtract = [&BoolExtracts](SDNode *Use) {
+      if (Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+          isa<ConstantSDNode>(Use->getOperand(1)) &&
+          Use->getValueType(0) == MVT::i1) {
+        BoolExtracts.push_back(Use);
+        return true;
+      }
+      return false;
+    };
+    if (all_of(InputVector->uses(), IsBoolExtract) &&
+        BoolExtracts.size() > 1) {
+      unsigned NumSrcElts = SrcVT.getVectorNumElements();
+      EVT BCVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcElts);
+      if (SDValue BC =
+              combineBitcastvxi1(DAG, BCVT, InputVector, dl, Subtarget)) {
+        for (SDNode *Use : BoolExtracts) {
+          // extractelement vXi1 X, MaskIdx --> ((movmsk X) & Mask) == Mask
+          unsigned MaskIdx = Use->getConstantOperandVal(1);
+          APInt MaskBit = APInt::getOneBitSet(NumSrcElts, MaskIdx);
+          SDValue Mask = DAG.getConstant(MaskBit, dl, BCVT);
+          SDValue Res = DAG.getNode(ISD::AND, dl, BCVT, BC, Mask);
+          Res = DAG.getSetCC(dl, MVT::i1, Res, Mask, ISD::SETEQ);
+          DCI.CombineTo(Use, Res);
+        }
+        return SDValue(N, 0);
+      }
+    }
+  }
+
   return SDValue();
 }
 
@@ -33825,11 +35825,15 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
 
   assert(CondVT.isVector() && "Vector select expects a vector selector!");
 
-  bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
   // Check if the first operand is all zeros and Cond type is vXi1.
   // This situation only applies to avx512.
-  if (TValIsAllZeros  && Subtarget.hasAVX512() && Cond.hasOneUse() &&
-      CondVT.getVectorElementType() == MVT::i1) {
+  // TODO: Use isNullOrNullSplat() to distinguish constants with undefs?
+  // TODO: Can we assert that both operands are not zeros (because that should
+  //       get simplified at node creation time)?
+  bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
+  bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
+  if (TValIsAllZeros && !FValIsAllZeros && Subtarget.hasAVX512() &&
+      Cond.hasOneUse() && CondVT.getVectorElementType() == MVT::i1) {
     // Invert the cond to not(cond) : xor(op,allones)=not(op)
     SDValue CondNew = DAG.getNOT(DL, Cond, CondVT);
     // Vselect cond, op1, op2 = Vselect not(cond), op2, op1
@@ -33844,12 +35848,10 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
   if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
     return SDValue();
 
-  bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
-  bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
-
   // Try to invert the condition if true value is not all 1s and false value is
-  // not all 0s.
-  if (!TValIsAllOnes && !FValIsAllZeros &&
+  // not all 0s. Only do this if the condition has one use.
+  bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
+  if (!TValIsAllOnes && !FValIsAllZeros && Cond.hasOneUse() &&
       // Check if the selector will be produced by CMPP*/PCMP*.
       Cond.getOpcode() == ISD::SETCC &&
       // Check if SETCC has already been promoted.
@@ -33907,6 +35909,39 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+/// If both arms of a vector select are concatenated vectors, split the select,
+/// and concatenate the result to eliminate a wide (256-bit) vector instruction:
+///   vselect Cond, (concat T0, T1), (concat F0, F1) -->
+///   concat (vselect (split Cond), T0, F0), (vselect (split Cond), T1, F1)
+static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG,
+                                  const X86Subtarget &Subtarget) {
+  unsigned Opcode = N->getOpcode();
+  if (Opcode != X86ISD::BLENDV && Opcode != ISD::VSELECT)
+    return SDValue();
+
+  // TODO: Split 512-bit vectors too?
+  EVT VT = N->getValueType(0);
+  if (!VT.is256BitVector())
+    return SDValue();
+
+  // TODO: Split as long as any 2 of the 3 operands are concatenated?
+  SDValue Cond = N->getOperand(0);
+  SDValue TVal = N->getOperand(1);
+  SDValue FVal = N->getOperand(2);
+  SmallVector<SDValue, 4> CatOpsT, CatOpsF;
+  if (!TVal.hasOneUse() || !FVal.hasOneUse() ||
+      !collectConcatOps(TVal.getNode(), CatOpsT) ||
+      !collectConcatOps(FVal.getNode(), CatOpsF))
+    return SDValue();
+
+  auto makeBlend = [Opcode](SelectionDAG &DAG, const SDLoc &DL,
+                            ArrayRef<SDValue> Ops) {
+    return DAG.getNode(Opcode, DL, Ops[1].getValueType(), Ops);
+  };
+  return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { Cond, TVal, FVal },
+                          makeBlend, /*CheckBWI*/ false);
+}
+
 static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
   SDValue Cond = N->getOperand(0);
   SDValue LHS = N->getOperand(1);
@@ -33973,7 +36008,7 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
 /// If this is a *dynamic* select (non-constant condition) and we can match
 /// this node with one of the variable blend instructions, restructure the
 /// condition so that blends can use the high (sign) bit of each element.
-/// This function will also call SimplfiyDemandedBits on already created
+/// This function will also call SimplifyDemandedBits on already created
 /// BLENDV to perform additional simplifications.
 static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
                                            TargetLowering::DAGCombinerInfo &DCI,
@@ -34268,6 +36303,42 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
     return DAG.getNode(N->getOpcode(), DL, VT, Cond, LHS, RHS);
   }
 
+  // AVX512 - Extend select with zero to merge with target shuffle.
+  // select(mask, extract_subvector(shuffle(x)), zero) -->
+  // extract_subvector(select(insert_subvector(mask), shuffle(x), zero))
+  // TODO - support non target shuffles as well.
+  if (Subtarget.hasAVX512() && CondVT.isVector() &&
+      CondVT.getVectorElementType() == MVT::i1) {
+    auto SelectableOp = [&TLI](SDValue Op) {
+      return Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+             isTargetShuffle(Op.getOperand(0).getOpcode()) &&
+             isNullConstant(Op.getOperand(1)) &&
+             TLI.isTypeLegal(Op.getOperand(0).getValueType()) &&
+             Op.hasOneUse() && Op.getOperand(0).hasOneUse();
+    };
+
+    bool SelectableLHS = SelectableOp(LHS);
+    bool SelectableRHS = SelectableOp(RHS);
+    bool ZeroLHS = ISD::isBuildVectorAllZeros(LHS.getNode());
+    bool ZeroRHS = ISD::isBuildVectorAllZeros(RHS.getNode());
+
+    if ((SelectableLHS && ZeroRHS) || (SelectableRHS && ZeroLHS)) {
+      EVT SrcVT = SelectableLHS ? LHS.getOperand(0).getValueType()
+                                : RHS.getOperand(0).getValueType();
+      unsigned NumSrcElts = SrcVT.getVectorNumElements();
+      EVT SrcCondVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumSrcElts);
+      LHS = insertSubVector(DAG.getUNDEF(SrcVT), LHS, 0, DAG, DL,
+                            VT.getSizeInBits());
+      RHS = insertSubVector(DAG.getUNDEF(SrcVT), RHS, 0, DAG, DL,
+                            VT.getSizeInBits());
+      Cond = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcCondVT,
+                         DAG.getUNDEF(SrcCondVT), Cond,
+                         DAG.getIntPtrConstant(0, DL));
+      SDValue Res = DAG.getSelect(DL, SrcVT, Cond, LHS, RHS);
+      return extractSubVector(Res, 0, DAG, DL, VT.getSizeInBits());
+    }
+  }
+
   if (SDValue V = combineSelectOfTwoConstants(N, DAG))
     return V;
 
@@ -34338,14 +36409,16 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
           // If the RHS is a constant we have to reverse the const
           // canonicalization.
           // x > C-1 ? x+-C : 0 --> subus x, C
-          // TODO: Handle build_vectors with undef elements.
           auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
-            return Cond->getAPIntValue() == (-Op->getAPIntValue() - 1);
+            return (!Op && !Cond) ||
+                   (Op && Cond &&
+                    Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
           };
           if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
-              ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT)) {
-            OpRHS = DAG.getNode(ISD::SUB, DL, VT,
-                                DAG.getConstant(0, DL, VT), OpRHS);
+              ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
+                                        /*AllowUndefs*/ true)) {
+            OpRHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+                                OpRHS);
             return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
           }
 
@@ -34432,6 +36505,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
   if (SDValue V = combineVSelectToBLENDV(N, DAG, DCI, Subtarget))
     return V;
 
+  if (SDValue V = narrowVectorSelect(N, DAG, Subtarget))
+    return V;
+
   // Custom action for SELECT MMX
   if (VT == MVT::x86mmx) {
     LHS = DAG.getBitcast(MVT::i64, LHS);
@@ -34715,7 +36791,7 @@ static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
 // When legalizing carry, we create carries via add X, -1
 // If that comes from an actual carry, via setcc, we use the
 // carry directly.
-static SDValue combineCarryThroughADD(SDValue EFLAGS) {
+static SDValue combineCarryThroughADD(SDValue EFLAGS, SelectionDAG &DAG) {
   if (EFLAGS.getOpcode() == X86ISD::ADD) {
     if (isAllOnesConstant(EFLAGS.getOperand(1))) {
       SDValue Carry = EFLAGS.getOperand(0);
@@ -34728,8 +36804,34 @@ static SDValue combineCarryThroughADD(SDValue EFLAGS) {
         Carry = Carry.getOperand(0);
       if (Carry.getOpcode() == X86ISD::SETCC ||
           Carry.getOpcode() == X86ISD::SETCC_CARRY) {
-        if (Carry.getConstantOperandVal(0) == X86::COND_B)
-          return Carry.getOperand(1);
+        // TODO: Merge this code with equivalent in combineAddOrSubToADCOrSBB?
+        uint64_t CarryCC = Carry.getConstantOperandVal(0);
+        SDValue CarryOp1 = Carry.getOperand(1);
+        if (CarryCC == X86::COND_B)
+          return CarryOp1;
+        if (CarryCC == X86::COND_A) {
+          // Try to convert COND_A into COND_B in an attempt to facilitate
+          // materializing "setb reg".
+          //
+          // Do not flip "e > c", where "c" is a constant, because Cmp
+          // instruction cannot take an immediate as its first operand.
+          //
+          if (CarryOp1.getOpcode() == X86ISD::SUB &&
+              CarryOp1.getNode()->hasOneUse() &&
+              CarryOp1.getValueType().isInteger() &&
+              !isa<ConstantSDNode>(CarryOp1.getOperand(1))) {
+            SDValue SubCommute =
+                DAG.getNode(X86ISD::SUB, SDLoc(CarryOp1), CarryOp1->getVTList(),
+                            CarryOp1.getOperand(1), CarryOp1.getOperand(0));
+            return SDValue(SubCommute.getNode(), CarryOp1.getResNo());
+          }
+        }
+        // If this is a check of the z flag of an add with 1, switch to the
+        // C flag.
+        if (CarryCC == X86::COND_E &&
+            CarryOp1.getOpcode() == X86ISD::ADD &&
+            isOneConstant(CarryOp1.getOperand(1)))
+          return CarryOp1;
       }
     }
   }
@@ -34744,7 +36846,7 @@ static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
                                   SelectionDAG &DAG,
                                   const X86Subtarget &Subtarget) {
   if (CC == X86::COND_B)
-    if (SDValue Flags = combineCarryThroughADD(EFLAGS))
+    if (SDValue Flags = combineCarryThroughADD(EFLAGS, DAG))
       return Flags;
 
   if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC))
@@ -34763,6 +36865,10 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
   X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
   SDValue Cond = N->getOperand(3);
 
+  // cmov X, X, ?, ? --> X
+  if (TrueOp == FalseOp)
+    return TrueOp;
+
   // Try to simplify the EFLAGS and condition code operands.
   // We can't always do this as FCMOV only supports a subset of X86 cond.
   if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG, Subtarget)) {
@@ -35044,7 +37150,7 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
   // pmulld is supported since SSE41. It is better to use pmulld
   // instead of pmullw+pmulhw, except for subtargets where pmulld is slower than
   // the expansion.
-  bool OptForMinSize = DAG.getMachineFunction().getFunction().optForMinSize();
+  bool OptForMinSize = DAG.getMachineFunction().getFunction().hasMinSize();
   if (Subtarget.hasSSE41() && (OptForMinSize || !Subtarget.isPMULLDSlow()))
     return SDValue();
 
@@ -35283,8 +37389,8 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
   // Use SplitOpsAndApply to handle AVX splitting.
   auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
                            ArrayRef<SDValue> Ops) {
-    MVT VT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
-    return DAG.getNode(X86ISD::VPMADDWD, DL, VT, Ops);
+    MVT OpVT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
+    return DAG.getNode(X86ISD::VPMADDWD, DL, OpVT, Ops);
   };
   return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT,
                           { DAG.getBitcast(WVT, N0), DAG.getBitcast(WVT, N1) },
@@ -35352,7 +37458,7 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
   if (!MulConstantOptimization)
     return SDValue();
   // An imul is usually smaller than the alternative sequence.
-  if (DAG.getMachineFunction().getFunction().optForMinSize())
+  if (DAG.getMachineFunction().getFunction().hasMinSize())
     return SDValue();
 
   if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
@@ -35489,7 +37595,7 @@ static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG) {
       N1C && N0.getOpcode() == ISD::AND &&
       N0.getOperand(1).getOpcode() == ISD::Constant) {
     SDValue N00 = N0.getOperand(0);
-    APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+    APInt Mask = N0.getConstantOperandAPInt(1);
     Mask <<= N1C->getAPIntValue();
     bool MaskOK = false;
     // We can handle cases concerning bit-widening nodes containing setcc_c if
@@ -35638,24 +37744,6 @@ static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-static SDValue combineShift(SDNode* N, SelectionDAG &DAG,
-                            TargetLowering::DAGCombinerInfo &DCI,
-                            const X86Subtarget &Subtarget) {
-  if (N->getOpcode() == ISD::SHL)
-    if (SDValue V = combineShiftLeft(N, DAG))
-      return V;
-
-  if (N->getOpcode() == ISD::SRA)
-    if (SDValue V = combineShiftRightArithmetic(N, DAG))
-      return V;
-
-  if (N->getOpcode() == ISD::SRL)
-    if (SDValue V = combineShiftRightLogical(N, DAG, DCI))
-      return V;
-
-  return SDValue();
-}
-
 static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
                                  const X86Subtarget &Subtarget) {
@@ -35677,8 +37765,8 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
   // Constant Folding.
   APInt UndefElts0, UndefElts1;
   SmallVector<APInt, 32> EltBits0, EltBits1;
-  if ((N0->isUndef() || N->isOnlyUserOf(N0.getNode())) &&
-      (N1->isUndef() || N->isOnlyUserOf(N1.getNode())) &&
+  if ((N0.isUndef() || N->isOnlyUserOf(N0.getNode())) &&
+      (N1.isUndef() || N->isOnlyUserOf(N1.getNode())) &&
       getTargetConstantBitsFromNode(N0, SrcBitsPerElt, UndefElts0, EltBits0) &&
       getTargetConstantBitsFromNode(N1, SrcBitsPerElt, UndefElts1, EltBits1)) {
     unsigned NumLanes = VT.getSizeInBits() / 128;
@@ -35750,10 +37838,7 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
 
   // Attempt to combine as shuffle.
   SDValue Op(N, 0);
-  if (SDValue Res =
-          combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
-                                        /*HasVarMask*/ false,
-                                        /*AllowVarMask*/ true, DAG, Subtarget))
+  if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
     return Res;
 
   return SDValue();
@@ -35766,11 +37851,22 @@ static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG,
           X86ISD::VSRL == N->getOpcode()) &&
          "Unexpected shift opcode");
   EVT VT = N->getValueType(0);
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
 
   // Shift zero -> zero.
-  if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
+  if (ISD::isBuildVectorAllZeros(N0.getNode()))
     return DAG.getConstant(0, SDLoc(N), VT);
 
+  // Detect constant shift amounts.
+  APInt UndefElts;
+  SmallVector<APInt, 32> EltBits;
+  if (getTargetConstantBitsFromNode(N1, 64, UndefElts, EltBits, true, false)) {
+    unsigned X86Opc = getTargetVShiftUniformOpcode(N->getOpcode(), false);
+    return getTargetVShiftByConstNode(X86Opc, SDLoc(N), VT.getSimpleVT(), N0,
+                                      EltBits[0].getZExtValue(), DAG);
+  }
+
   APInt KnownUndef, KnownZero;
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
@@ -35829,9 +37925,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
   // We can decode 'whole byte' logical bit shifts as shuffles.
   if (LogicalShift && (ShiftVal % 8) == 0) {
     SDValue Op(N, 0);
-    if (SDValue Res = combineX86ShufflesRecursively(
-            {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
-            /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
+    if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
       return Res;
   }
 
@@ -35864,18 +37958,20 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
 static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG,
                                    TargetLowering::DAGCombinerInfo &DCI,
                                    const X86Subtarget &Subtarget) {
-  assert(
-      ((N->getOpcode() == X86ISD::PINSRB && N->getValueType(0) == MVT::v16i8) ||
-       (N->getOpcode() == X86ISD::PINSRW &&
-        N->getValueType(0) == MVT::v8i16)) &&
-      "Unexpected vector insertion");
+  EVT VT = N->getValueType(0);
+  assert(((N->getOpcode() == X86ISD::PINSRB && VT == MVT::v16i8) ||
+          (N->getOpcode() == X86ISD::PINSRW && VT == MVT::v8i16)) &&
+         "Unexpected vector insertion");
+
+  unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (TLI.SimplifyDemandedBits(SDValue(N, 0),
+                               APInt::getAllOnesValue(NumBitsPerElt), DCI))
+    return SDValue(N, 0);
 
   // Attempt to combine PINSRB/PINSRW patterns to a shuffle.
   SDValue Op(N, 0);
-  if (SDValue Res =
-          combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
-                                        /*HasVarMask*/ false,
-                                        /*AllowVarMask*/ true, DAG, Subtarget))
+  if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
     return Res;
 
   return SDValue();
@@ -35894,8 +37990,8 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
   if (Subtarget.hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
     SDValue N0 = N->getOperand(0);
     SDValue N1 = N->getOperand(1);
-    SDValue CMP0 = N0->getOperand(1);
-    SDValue CMP1 = N1->getOperand(1);
+    SDValue CMP0 = N0.getOperand(1);
+    SDValue CMP1 = N1.getOperand(1);
     SDLoc DL(N);
 
     // The SETCCs should both refer to the same CMP.
@@ -35987,6 +38083,34 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+// Match (xor X, -1) -> X.
+// Match extract_subvector(xor X, -1) -> extract_subvector(X).
+// Match concat_vectors(xor X, -1, xor Y, -1) -> concat_vectors(X, Y).
+static SDValue IsNOT(SDValue V, SelectionDAG &DAG) {
+  V = peekThroughBitcasts(V);
+  if (V.getOpcode() == ISD::XOR &&
+      ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()))
+    return V.getOperand(0);
+  if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+      (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
+    if (SDValue Not = IsNOT(V.getOperand(0), DAG)) {
+      Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
+      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), V.getValueType(),
+                         Not, V.getOperand(1));
+    }
+  }
+  SmallVector<SDValue, 2> CatOps;
+  if (collectConcatOps(V.getNode(), CatOps)) {
+    for (SDValue &CatOp : CatOps) {
+      SDValue NotCat = IsNOT(CatOp, DAG);
+      if (!NotCat) return SDValue();
+      CatOp = DAG.getBitcast(CatOp.getValueType(), NotCat);
+    }
+    return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(V), V.getValueType(), CatOps);
+  }
+  return SDValue();
+}
+
 /// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y).
 static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
   assert(N->getOpcode() == ISD::AND);
@@ -35996,15 +38120,14 @@ static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
     return SDValue();
 
   SDValue X, Y;
-  SDValue N0 = peekThroughBitcasts(N->getOperand(0));
-  SDValue N1 = peekThroughBitcasts(N->getOperand(1));
-  if (N0.getOpcode() == ISD::XOR &&
-      ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode())) {
-    X = N0.getOperand(0);
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  if (SDValue Not = IsNOT(N0, DAG)) {
+    X = Not;
     Y = N1;
-  } else if (N1.getOpcode() == ISD::XOR &&
-             ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) {
-    X = N1.getOperand(0);
+  } else if (SDValue Not = IsNOT(N1, DAG)) {
+    X = Not;
     Y = N0;
   } else
     return SDValue();
@@ -36046,7 +38169,7 @@ static SDValue PromoteMaskArithmetic(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   // The type of the truncated inputs.
-  if (N0->getOperand(0).getValueType() != VT)
+  if (N0.getOperand(0).getValueType() != VT)
     return SDValue();
 
   // The right side has to be a 'trunc' or a constant vector.
@@ -36062,9 +38185,9 @@ static SDValue PromoteMaskArithmetic(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   // Set N0 and N1 to hold the inputs to the new wide operation.
-  N0 = N0->getOperand(0);
+  N0 = N0.getOperand(0);
   if (RHSTrunc)
-    N1 = N1->getOperand(0);
+    N1 = N1.getOperand(0);
   else
     N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N1);
 
@@ -36088,34 +38211,35 @@ static SDValue PromoteMaskArithmetic(SDNode *N, SelectionDAG &DAG,
 /// unnecessary moves from SSE to integer registers.
 static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
                                         const X86Subtarget &Subtarget) {
-  unsigned FPOpcode = ISD::DELETED_NODE;
-  if (N->getOpcode() == ISD::AND)
-    FPOpcode = X86ISD::FAND;
-  else if (N->getOpcode() == ISD::OR)
-    FPOpcode = X86ISD::FOR;
-  else if (N->getOpcode() == ISD::XOR)
-    FPOpcode = X86ISD::FXOR;
-
-  assert(FPOpcode != ISD::DELETED_NODE &&
-         "Unexpected input node for FP logic conversion");
-
   EVT VT = N->getValueType(0);
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   SDLoc DL(N);
-  if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST &&
-      ((Subtarget.hasSSE1() && VT == MVT::i32) ||
-       (Subtarget.hasSSE2() && VT == MVT::i64))) {
-    SDValue N00 = N0.getOperand(0);
-    SDValue N10 = N1.getOperand(0);
-    EVT N00Type = N00.getValueType();
-    EVT N10Type = N10.getValueType();
-    if (N00Type.isFloatingPoint() && N10Type.isFloatingPoint()) {
-      SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
-      return DAG.getBitcast(VT, FPLogic);
-    }
+
+  if (N0.getOpcode() != ISD::BITCAST || N1.getOpcode() != ISD::BITCAST)
+    return SDValue();
+
+  SDValue N00 = N0.getOperand(0);
+  SDValue N10 = N1.getOperand(0);
+  EVT N00Type = N00.getValueType();
+  EVT N10Type = N10.getValueType();
+
+  // Ensure that both types are the same and are legal scalar fp types.
+  if (N00Type != N10Type ||
+      !((Subtarget.hasSSE1() && N00Type == MVT::f32) ||
+        (Subtarget.hasSSE2() && N00Type == MVT::f64)))
+    return SDValue();
+
+  unsigned FPOpcode;
+  switch (N->getOpcode()) {
+  default: llvm_unreachable("Unexpected input node for FP logic conversion");
+  case ISD::AND: FPOpcode = X86ISD::FAND; break;
+  case ISD::OR:  FPOpcode = X86ISD::FOR;  break;
+  case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
   }
-  return SDValue();
+
+  SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
+  return DAG.getBitcast(VT, FPLogic);
 }
 
 /// If this is a zero/all-bits result that is bitwise-anded with a low bits
@@ -36371,6 +38495,24 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
   if (SDValue V = combineParity(N, DAG, Subtarget))
     return V;
 
+  // Match all-of bool scalar reductions into a bitcast/movmsk + cmp.
+  // TODO: Support multiple SrcOps.
+  if (VT == MVT::i1) {
+    SmallVector<SDValue, 2> SrcOps;
+    if (matchBitOpReduction(SDValue(N, 0), ISD::AND, SrcOps) &&
+        SrcOps.size() == 1) {
+      SDLoc dl(N);
+      unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
+      EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
+      SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget);
+      if (Mask) {
+        APInt AllBits = APInt::getAllOnesValue(NumElts);
+        return DAG.getSetCC(dl, MVT::i1, Mask,
+                            DAG.getConstant(AllBits, dl, MaskVT), ISD::SETEQ);
+      }
+    }
+  }
+
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
@@ -36392,9 +38534,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
   // Attempt to recursively combine a bitmask AND with shuffles.
   if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
     SDValue Op(N, 0);
-    if (SDValue Res = combineX86ShufflesRecursively(
-            {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
-            /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
+    if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
       return Res;
   }
 
@@ -36440,6 +38580,52 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+// Canonicalize OR(AND(X,C),AND(Y,~C)) -> OR(AND(X,C),ANDNP(C,Y))
+static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
+                                     const X86Subtarget &Subtarget) {
+  assert(N->getOpcode() == ISD::OR && "Unexpected Opcode");
+
+  EVT VT = N->getValueType(0);
+  if (!VT.isVector() || (VT.getScalarSizeInBits() % 8) != 0)
+    return SDValue();
+
+  SDValue N0 = peekThroughBitcasts(N->getOperand(0));
+  SDValue N1 = peekThroughBitcasts(N->getOperand(1));
+  if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
+    return SDValue();
+
+  // On XOP we'll lower to PCMOV so accept one use, otherwise only
+  // do this if either mask has multiple uses already.
+  if (!(Subtarget.hasXOP() || !N0.getOperand(1).hasOneUse() ||
+        !N1.getOperand(1).hasOneUse()))
+    return SDValue();
+
+  // Attempt to extract constant byte masks.
+  APInt UndefElts0, UndefElts1;
+  SmallVector<APInt, 32> EltBits0, EltBits1;
+  if (!getTargetConstantBitsFromNode(N0.getOperand(1), 8, UndefElts0, EltBits0,
+                                     false, false))
+    return SDValue();
+  if (!getTargetConstantBitsFromNode(N1.getOperand(1), 8, UndefElts1, EltBits1,
+                                     false, false))
+    return SDValue();
+
+  for (unsigned i = 0, e = EltBits0.size(); i != e; ++i) {
+    // TODO - add UNDEF elts support.
+    if (UndefElts0[i] || UndefElts1[i])
+      return SDValue();
+    if (EltBits0[i] != ~EltBits1[i])
+      return SDValue();
+  }
+
+  SDLoc DL(N);
+  SDValue X = N->getOperand(0);
+  SDValue Y =
+      DAG.getNode(X86ISD::ANDNP, DL, VT, DAG.getBitcast(VT, N0.getOperand(1)),
+                  DAG.getBitcast(VT, N1.getOperand(0)));
+  return DAG.getNode(ISD::OR, DL, VT, X, Y);
+}
+
 // Try to match OR(AND(~MASK,X),AND(MASK,Y)) logic pattern.
 static bool matchLogicBlend(SDNode *N, SDValue &X, SDValue &Y, SDValue &Mask) {
   if (N->getOpcode() != ISD::OR)
@@ -36472,6 +38658,68 @@ static bool matchLogicBlend(SDNode *N, SDValue &X, SDValue &Y, SDValue &Mask) {
   return true;
 }
 
+// Try to match:
+//   (or (and (M, (sub 0, X)), (pandn M, X)))
+// which is a special case of vselect:
+//   (vselect M, (sub 0, X), X)
+// Per:
+// http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate
+// We know that, if fNegate is 0 or 1:
+//   (fNegate ? -v : v) == ((v ^ -fNegate) + fNegate)
+//
+// Here, we have a mask, M (all 1s or 0), and, similarly, we know that:
+//   ((M & 1) ? -X : X) == ((X ^ -(M & 1)) + (M & 1))
+//   ( M      ? -X : X) == ((X ^   M     ) + (M & 1))
+// This lets us transform our vselect to:
+//   (add (xor X, M), (and M, 1))
+// And further to:
+//   (sub (xor X, M), M)
+static SDValue combineLogicBlendIntoConditionalNegate(
+    EVT VT, SDValue Mask, SDValue X, SDValue Y, const SDLoc &DL,
+    SelectionDAG &DAG, const X86Subtarget &Subtarget) {
+  EVT MaskVT = Mask.getValueType();
+  assert(MaskVT.isInteger() &&
+         DAG.ComputeNumSignBits(Mask) == MaskVT.getScalarSizeInBits() &&
+         "Mask must be zero/all-bits");
+
+  if (X.getValueType() != MaskVT || Y.getValueType() != MaskVT)
+    return SDValue();
+  if (!DAG.getTargetLoweringInfo().isOperationLegal(ISD::SUB, MaskVT))
+    return SDValue();
+
+  auto IsNegV = [](SDNode *N, SDValue V) {
+    return N->getOpcode() == ISD::SUB && N->getOperand(1) == V &&
+           ISD::isBuildVectorAllZeros(N->getOperand(0).getNode());
+  };
+
+  SDValue V;
+  if (IsNegV(Y.getNode(), X))
+    V = X;
+  else if (IsNegV(X.getNode(), Y))
+    V = Y;
+  else
+    return SDValue();
+
+  SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask);
+  SDValue SubOp2 = Mask;
+
+  // If the negate was on the false side of the select, then
+  // the operands of the SUB need to be swapped. PR 27251.
+  // This is because the pattern being matched above is
+  // (vselect M, (sub (0, X), X)  -> (sub (xor X, M), M)
+  // but if the pattern matched was
+  // (vselect M, X, (sub (0, X))), that is really negation of the pattern
+  // above, -(vselect M, (sub 0, X), X), and therefore the replacement
+  // pattern also needs to be a negation of the replacement pattern above.
+  // And -(sub X, Y) is just sub (Y, X), so swapping the operands of the
+  // sub accomplishes the negation of the replacement pattern.
+  if (V == Y)
+    std::swap(SubOp1, SubOp2);
+
+  SDValue Res = DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2);
+  return DAG.getBitcast(VT, Res);
+}
+
 // Try to fold:
 //   (or (and (m, y), (pandn m, x)))
 // into:
@@ -36507,55 +38755,10 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
 
   SDLoc DL(N);
 
-  // Try to match:
-  //   (or (and (M, (sub 0, X)), (pandn M, X)))
-  // which is a special case of vselect:
-  //   (vselect M, (sub 0, X), X)
-  // Per:
-  // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate
-  // We know that, if fNegate is 0 or 1:
-  //   (fNegate ? -v : v) == ((v ^ -fNegate) + fNegate)
-  //
-  // Here, we have a mask, M (all 1s or 0), and, similarly, we know that:
-  //   ((M & 1) ? -X : X) == ((X ^ -(M & 1)) + (M & 1))
-  //   ( M      ? -X : X) == ((X ^   M     ) + (M & 1))
-  // This lets us transform our vselect to:
-  //   (add (xor X, M), (and M, 1))
-  // And further to:
-  //   (sub (xor X, M), M)
-  if (X.getValueType() == MaskVT && Y.getValueType() == MaskVT &&
-      DAG.getTargetLoweringInfo().isOperationLegal(ISD::SUB, MaskVT)) {
-    auto IsNegV = [](SDNode *N, SDValue V) {
-      return N->getOpcode() == ISD::SUB && N->getOperand(1) == V &&
-        ISD::isBuildVectorAllZeros(N->getOperand(0).getNode());
-    };
-    SDValue V;
-    if (IsNegV(Y.getNode(), X))
-      V = X;
-    else if (IsNegV(X.getNode(), Y))
-      V = Y;
-
-    if (V) {
-      SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask);
-      SDValue SubOp2 = Mask;
-
-      // If the negate was on the false side of the select, then
-      // the operands of the SUB need to be swapped. PR 27251.
-      // This is because the pattern being matched above is
-      // (vselect M, (sub (0, X), X)  -> (sub (xor X, M), M)
-      // but if the pattern matched was
-      // (vselect M, X, (sub (0, X))), that is really negation of the pattern
-      // above, -(vselect M, (sub 0, X), X), and therefore the replacement
-      // pattern also needs to be a negation of the replacement pattern above.
-      // And -(sub X, Y) is just sub (Y, X), so swapping the operands of the
-      // sub accomplishes the negation of the replacement pattern.
-      if (V == Y)
-         std::swap(SubOp1, SubOp2);
-
-      SDValue Res = DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2);
-      return DAG.getBitcast(VT, Res);
-    }
-  }
+  // Attempt to combine to conditional negate: (sub (xor X, M), M)
+  if (SDValue Res = combineLogicBlendIntoConditionalNegate(VT, Mask, X, Y, DL,
+                                                           DAG, Subtarget))
+    return Res;
 
   // PBLENDVB is only available on SSE 4.1.
   if (!Subtarget.hasSSE41())
@@ -36665,8 +38868,7 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
     // Swap rhs with lhs to match or(setcc(eq, cmp, 0), or).
     if (RHS->getOpcode() == ISD::OR)
       std::swap(LHS, RHS);
-    EVT VT = OR->getValueType(0);
-    SDValue NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG);
+    NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG);
     if (!NewRHS)
       return SDValue();
     Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS);
@@ -36702,15 +38904,16 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
   if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
     return FPLogic;
 
+  if (SDValue R = canonicalizeBitSelect(N, DAG, Subtarget))
+    return R;
+
   if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget))
     return R;
 
   // Attempt to recursively combine an OR of shuffles.
   if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
     SDValue Op(N, 0);
-    if (SDValue Res = combineX86ShufflesRecursively(
-            {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
-            /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
+    if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
       return Res;
   }
 
@@ -36718,7 +38921,7 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
-  bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
+  bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
   unsigned Bits = VT.getScalarSizeInBits();
 
   // SHLD/SHRD instructions have lower register pressure, but on some
@@ -36747,14 +38950,14 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
   SDValue ShMsk0;
   if (ShAmt0.getOpcode() == ISD::AND &&
       isa<ConstantSDNode>(ShAmt0.getOperand(1)) &&
-      ShAmt0.getConstantOperandVal(1) == (Bits - 1)) {
+      ShAmt0.getConstantOperandAPInt(1) == (Bits - 1)) {
     ShMsk0 = ShAmt0;
     ShAmt0 = ShAmt0.getOperand(0);
   }
   SDValue ShMsk1;
   if (ShAmt1.getOpcode() == ISD::AND &&
       isa<ConstantSDNode>(ShAmt1.getOperand(1)) &&
-      ShAmt1.getConstantOperandVal(1) == (Bits - 1)) {
+      ShAmt1.getConstantOperandAPInt(1) == (Bits - 1)) {
     ShMsk1 = ShAmt1;
     ShAmt1 = ShAmt1.getOperand(0);
   }
@@ -36765,46 +38968,55 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
     ShAmt1 = ShAmt1.getOperand(0);
 
   SDLoc DL(N);
-  unsigned Opc = X86ISD::SHLD;
+  unsigned Opc = ISD::FSHL;
   SDValue Op0 = N0.getOperand(0);
   SDValue Op1 = N1.getOperand(0);
-  if (ShAmt0.getOpcode() == ISD::SUB ||
-      ShAmt0.getOpcode() == ISD::XOR) {
-    Opc = X86ISD::SHRD;
+  if (ShAmt0.getOpcode() == ISD::SUB || ShAmt0.getOpcode() == ISD::XOR) {
+    Opc = ISD::FSHR;
     std::swap(Op0, Op1);
     std::swap(ShAmt0, ShAmt1);
     std::swap(ShMsk0, ShMsk1);
   }
 
-  // OR( SHL( X, C ), SRL( Y, 32 - C ) ) -> SHLD( X, Y, C )
-  // OR( SRL( X, C ), SHL( Y, 32 - C ) ) -> SHRD( X, Y, C )
-  // OR( SHL( X, C ), SRL( SRL( Y, 1 ), XOR( C, 31 ) ) ) -> SHLD( X, Y, C )
-  // OR( SRL( X, C ), SHL( SHL( Y, 1 ), XOR( C, 31 ) ) ) -> SHRD( X, Y, C )
-  // OR( SHL( X, AND( C, 31 ) ), SRL( Y, AND( 0 - C, 31 ) ) ) -> SHLD( X, Y, C )
-  // OR( SRL( X, AND( C, 31 ) ), SHL( Y, AND( 0 - C, 31 ) ) ) -> SHRD( X, Y, C )
+  auto GetFunnelShift = [&DAG, &DL, VT, Opc](SDValue Op0, SDValue Op1,
+                                             SDValue Amt) {
+    if (Opc == ISD::FSHR)
+      std::swap(Op0, Op1);
+    return DAG.getNode(Opc, DL, VT, Op0, Op1,
+                       DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, Amt));
+  };
+
+  // OR( SHL( X, C ), SRL( Y, 32 - C ) ) -> FSHL( X, Y, C )
+  // OR( SRL( X, C ), SHL( Y, 32 - C ) ) -> FSHR( Y, X, C )
+  // OR( SHL( X, C ), SRL( SRL( Y, 1 ), XOR( C, 31 ) ) ) -> FSHL( X, Y, C )
+  // OR( SRL( X, C ), SHL( SHL( Y, 1 ), XOR( C, 31 ) ) ) -> FSHR( Y, X, C )
+  // OR( SHL( X, AND( C, 31 ) ), SRL( Y, AND( 0 - C, 31 ) ) ) -> FSHL( X, Y, C )
+  // OR( SRL( X, AND( C, 31 ) ), SHL( Y, AND( 0 - C, 31 ) ) ) -> FSHR( Y, X, C )
   if (ShAmt1.getOpcode() == ISD::SUB) {
     SDValue Sum = ShAmt1.getOperand(0);
     if (auto *SumC = dyn_cast<ConstantSDNode>(Sum)) {
       SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
+      if (ShAmt1Op1.getOpcode() == ISD::AND &&
+          isa<ConstantSDNode>(ShAmt1Op1.getOperand(1)) &&
+          ShAmt1Op1.getConstantOperandAPInt(1) == (Bits - 1)) {
+        ShMsk1 = ShAmt1Op1;
+        ShAmt1Op1 = ShAmt1Op1.getOperand(0);
+      }
       if (ShAmt1Op1.getOpcode() == ISD::TRUNCATE)
         ShAmt1Op1 = ShAmt1Op1.getOperand(0);
       if ((SumC->getAPIntValue() == Bits ||
            (SumC->getAPIntValue() == 0 && ShMsk1)) &&
           ShAmt1Op1 == ShAmt0)
-        return DAG.getNode(Opc, DL, VT, Op0, Op1,
-                           DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
+        return GetFunnelShift(Op0, Op1, ShAmt0);
     }
   } else if (auto *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
     auto *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
     if (ShAmt0C && (ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue()) == Bits)
-      return DAG.getNode(Opc, DL, VT,
-                         N0.getOperand(0), N1.getOperand(0),
-                         DAG.getNode(ISD::TRUNCATE, DL,
-                                       MVT::i8, ShAmt0));
+      return GetFunnelShift(Op0, Op1, ShAmt0);
   } else if (ShAmt1.getOpcode() == ISD::XOR) {
     SDValue Mask = ShAmt1.getOperand(1);
     if (auto *MaskC = dyn_cast<ConstantSDNode>(Mask)) {
-      unsigned InnerShift = (X86ISD::SHLD == Opc ? ISD::SRL : ISD::SHL);
+      unsigned InnerShift = (ISD::FSHL == Opc ? ISD::SRL : ISD::SHL);
       SDValue ShAmt1Op0 = ShAmt1.getOperand(0);
       if (ShAmt1Op0.getOpcode() == ISD::TRUNCATE)
         ShAmt1Op0 = ShAmt1Op0.getOperand(0);
@@ -36812,15 +39024,13 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
           (ShAmt1Op0 == ShAmt0 || ShAmt1Op0 == ShMsk0)) {
         if (Op1.getOpcode() == InnerShift &&
             isa<ConstantSDNode>(Op1.getOperand(1)) &&
-            Op1.getConstantOperandVal(1) == 1) {
-          return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0),
-                             DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
+            Op1.getConstantOperandAPInt(1) == 1) {
+          return GetFunnelShift(Op0, Op1.getOperand(0), ShAmt0);
         }
         // Test for ADD( Y, Y ) as an equivalent to SHL( Y, 1 ).
         if (InnerShift == ISD::SHL && Op1.getOpcode() == ISD::ADD &&
             Op1.getOperand(0) == Op1.getOperand(1)) {
-          return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0),
-                             DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
+          return GetFunnelShift(Op0, Op1.getOperand(0), ShAmt0);
         }
       }
     }
@@ -36862,7 +39072,7 @@ static SDValue foldXorTruncShiftIntoCmp(SDNode *N, SelectionDAG &DAG) {
 
   // Make sure the shift amount extracts the sign bit.
   if (!isa<ConstantSDNode>(Shift.getOperand(1)) ||
-      Shift.getConstantOperandVal(1) != ShiftTy.getSizeInBits() - 1)
+      Shift.getConstantOperandAPInt(1) != (ShiftTy.getSizeInBits() - 1))
     return SDValue();
 
   // Create a greater-than comparison against -1.
@@ -36915,13 +39125,10 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   // The shift should be smearing the sign bit across each vector element.
-  auto *ShiftBV = dyn_cast<BuildVectorSDNode>(Shift.getOperand(1));
-  if (!ShiftBV)
-    return SDValue();
-
-  EVT ShiftEltTy = Shift.getValueType().getVectorElementType();
-  auto *ShiftAmt = ShiftBV->getConstantSplatNode();
-  if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
+  auto *ShiftAmt =
+      isConstOrConstSplat(Shift.getOperand(1), /*AllowUndefs*/ true);
+  if (!ShiftAmt ||
+      ShiftAmt->getAPIntValue() != (Shift.getScalarValueSizeInBits() - 1))
     return SDValue();
 
   // Create a greater-than comparison against -1. We don't use the more obvious
@@ -37203,15 +39410,35 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
                             AVGBuilder);
   }
 
-  if (Operands[0].getOpcode() == ISD::ADD)
+  // Matches 'add like' patterns: add(Op0,Op1) + zext(or(Op0,Op1)).
+  // Match the or case only if its 'add-like' - can be replaced by an add.
+  auto FindAddLike = [&](SDValue V, SDValue &Op0, SDValue &Op1) {
+    if (ISD::ADD == V.getOpcode()) {
+      Op0 = V.getOperand(0);
+      Op1 = V.getOperand(1);
+      return true;
+    }
+    if (ISD::ZERO_EXTEND != V.getOpcode())
+      return false;
+    V = V.getOperand(0);
+    if (V.getValueType() != VT || ISD::OR != V.getOpcode() ||
+        !DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1)))
+      return false;
+    Op0 = V.getOperand(0);
+    Op1 = V.getOperand(1);
+    return true;
+  };
+
+  SDValue Op0, Op1;
+  if (FindAddLike(Operands[0], Op0, Op1))
     std::swap(Operands[0], Operands[1]);
-  else if (Operands[1].getOpcode() != ISD::ADD)
+  else if (!FindAddLike(Operands[1], Op0, Op1))
     return SDValue();
-  Operands[2] = Operands[1].getOperand(0);
-  Operands[1] = Operands[1].getOperand(1);
+  Operands[2] = Op0;
+  Operands[1] = Op1;
 
   // Now we have three operands of two additions. Check that one of them is a
-  // constant vector with ones, and the other two are promoted from i8/i16.
+  // constant vector with ones, and the other two can be promoted from i8/i16.
   for (int i = 0; i < 3; ++i) {
     if (!IsConstVectorInRange(Operands[i], 1, 1))
       continue;
@@ -37219,14 +39446,16 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
 
     // Check if Operands[0] and Operands[1] are results of type promotion.
     for (int j = 0; j < 2; ++j)
-      if (Operands[j].getOpcode() != ISD::ZERO_EXTEND ||
-          Operands[j].getOperand(0).getValueType() != VT)
-        return SDValue();
+      if (Operands[j].getValueType() != VT) {
+        if (Operands[j].getOpcode() != ISD::ZERO_EXTEND ||
+            Operands[j].getOperand(0).getValueType() != VT)
+          return SDValue();
+        Operands[j] = Operands[j].getOperand(0);
+      }
 
     // The pattern is detected, emit X86ISD::AVG instruction(s).
-    return SplitOpsAndApply(DAG, Subtarget, DL, VT,
-                            { Operands[0].getOperand(0),
-                              Operands[1].getOperand(0) }, AVGBuilder);
+    return SplitOpsAndApply(DAG, Subtarget, DL, VT, {Operands[0], Operands[1]},
+                            AVGBuilder);
   }
 
   return SDValue();
@@ -37246,38 +39475,51 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
   // pre-AVX2 targets as 32-byte loads will lower to regular temporal loads.
   ISD::LoadExtType Ext = Ld->getExtensionType();
   bool Fast;
-  unsigned AddressSpace = Ld->getAddressSpace();
   unsigned Alignment = Ld->getAlignment();
   if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() &&
       Ext == ISD::NON_EXTLOAD &&
       ((Ld->isNonTemporal() && !Subtarget.hasInt256() && Alignment >= 16) ||
        (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT,
-                               AddressSpace, Alignment, &Fast) && !Fast))) {
+                               *Ld->getMemOperand(), &Fast) &&
+        !Fast))) {
     unsigned NumElems = RegVT.getVectorNumElements();
     if (NumElems < 2)
       return SDValue();
 
-    SDValue Ptr = Ld->getBasePtr();
-
+    unsigned HalfAlign = 16;
+    SDValue Ptr1 = Ld->getBasePtr();
+    SDValue Ptr2 = DAG.getMemBasePlusOffset(Ptr1, HalfAlign, dl);
     EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
-                                  NumElems/2);
+                                  NumElems / 2);
     SDValue Load1 =
-        DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
+        DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr1, Ld->getPointerInfo(),
                     Alignment, Ld->getMemOperand()->getFlags());
-
-    Ptr = DAG.getMemBasePlusOffset(Ptr, 16, dl);
-    SDValue Load2 =
-        DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
-                    Ld->getPointerInfo().getWithOffset(16),
-                    MinAlign(Alignment, 16U), Ld->getMemOperand()->getFlags());
+    SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr2,
+                                Ld->getPointerInfo().getWithOffset(HalfAlign),
+                                MinAlign(Alignment, HalfAlign),
+                                Ld->getMemOperand()->getFlags());
     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                             Load1.getValue(1),
-                             Load2.getValue(1));
+                             Load1.getValue(1), Load2.getValue(1));
 
     SDValue NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Load1, Load2);
     return DCI.CombineTo(N, NewVec, TF, true);
   }
 
+  // Bool vector load - attempt to cast to an integer, as we have good
+  // (vXiY *ext(vXi1 bitcast(iX))) handling.
+  if (Ext == ISD::NON_EXTLOAD && !Subtarget.hasAVX512() && RegVT.isVector() &&
+      RegVT.getScalarType() == MVT::i1 && DCI.isBeforeLegalize()) {
+    unsigned NumElts = RegVT.getVectorNumElements();
+    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
+    if (TLI.isTypeLegal(IntVT)) {
+      SDValue IntLoad = DAG.getLoad(IntVT, dl, Ld->getChain(), Ld->getBasePtr(),
+                                    Ld->getPointerInfo(), Alignment,
+                                    Ld->getMemOperand()->getFlags());
+      SDValue BoolVec = DAG.getBitcast(RegVT, IntLoad);
+      return DCI.CombineTo(N, BoolVec, IntLoad.getValue(1), true);
+    }
+  }
+
   return SDValue();
 }
 
@@ -37404,6 +39646,9 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
   if (ML->getPassThru().isUndef())
     return SDValue();
 
+  if (ISD::isBuildVectorAllZeros(ML->getPassThru().getNode()))
+    return SDValue();
+
   // The new masked load has an undef pass-through operand. The select uses the
   // original pass-through operand.
   SDValue NewML = DAG.getMaskedLoad(VT, DL, ML->getChain(), ML->getBasePtr(),
@@ -37434,7 +39679,7 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
         return Blend;
   }
 
-  if (Mld->getExtensionType() != ISD::SEXTLOAD)
+  if (Mld->getExtensionType() != ISD::EXTLOAD)
     return SDValue();
 
   // Resolve extending loads.
@@ -37504,8 +39749,20 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
                                      Mld->getBasePtr(), NewMask, WidePassThru,
                                      Mld->getMemoryVT(), Mld->getMemOperand(),
                                      ISD::NON_EXTLOAD);
-  SDValue NewVec = getExtendInVec(/*Signed*/true, dl, VT, WideLd, DAG);
-  return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
+
+  SDValue SlicedVec = DAG.getBitcast(WideVecVT, WideLd);
+  SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
+  for (unsigned i = 0; i != NumElems; ++i)
+    ShuffleVec[i * SizeRatio] = i;
+
+  // Can't shuffle using an illegal type.
+  assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
+         "WideVecVT should be legal");
+  SlicedVec = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
+                                   DAG.getUNDEF(WideVecVT), ShuffleVec);
+  SlicedVec = DAG.getBitcast(VT, SlicedVec);
+
+  return DCI.CombineTo(N, SlicedVec, WideLd.getValue(1), true);
 }
 
 /// If exactly one element of the mask is set for a non-truncating masked store,
@@ -37543,6 +39800,10 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   EVT VT = Mst->getValue().getValueType();
+  EVT StVT = Mst->getMemoryVT();
+  SDLoc dl(Mst);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
   if (!Mst->isTruncatingStore()) {
     if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG))
       return ScalarStore;
@@ -37551,7 +39812,6 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
     // simplify ops leading up to it. We only demand the MSB of each lane.
     SDValue Mask = Mst->getMask();
     if (Mask.getScalarValueSizeInBits() != 1) {
-      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
       APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits()));
       if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI))
         return SDValue(N, 0);
@@ -37561,20 +39821,25 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
     // pattern above, but that pattern will be different. It will either need to
     // match setcc more generally or match PCMPGTM later (in tablegen?).
 
+    SDValue Value = Mst->getValue();
+    if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() &&
+        TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
+                              Mst->getMemoryVT())) {
+      return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0),
+                                Mst->getBasePtr(), Mask,
+                                Mst->getMemoryVT(), Mst->getMemOperand(), true);
+    }
+
     return SDValue();
   }
 
   // Resolve truncating stores.
   unsigned NumElems = VT.getVectorNumElements();
-  EVT StVT = Mst->getMemoryVT();
-  SDLoc dl(Mst);
 
   assert(StVT != VT && "Cannot truncate to the same type");
   unsigned FromSz = VT.getScalarSizeInBits();
   unsigned ToSz = StVT.getScalarSizeInBits();
 
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-
   // The truncating store is legal in some cases. For example
   // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
   // are designated for truncate store.
@@ -37644,11 +39909,13 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
 }
 
 static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
+                            TargetLowering::DAGCombinerInfo &DCI,
                             const X86Subtarget &Subtarget) {
   StoreSDNode *St = cast<StoreSDNode>(N);
   EVT VT = St->getValue().getValueType();
   EVT StVT = St->getMemoryVT();
   SDLoc dl(St);
+  unsigned Alignment = St->getAlignment();
   SDValue StoredVal = St->getOperand(1);
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
@@ -37699,8 +39966,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
                                       StoredVal->ops().slice(32, 32));
       Hi = combinevXi1ConstantToInteger(Hi, DAG);
 
-      unsigned Alignment = St->getAlignment();
-
       SDValue Ptr0 = St->getBasePtr();
       SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 4, dl);
 
@@ -37724,30 +39989,48 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
   // If we are saving a concatenation of two XMM registers and 32-byte stores
   // are slow, such as on Sandy Bridge, perform two 16-byte stores.
   bool Fast;
-  unsigned AddressSpace = St->getAddressSpace();
-  unsigned Alignment = St->getAlignment();
   if (VT.is256BitVector() && StVT == VT &&
       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
-                             AddressSpace, Alignment, &Fast) &&
+                             *St->getMemOperand(), &Fast) &&
       !Fast) {
     unsigned NumElems = VT.getVectorNumElements();
     if (NumElems < 2)
       return SDValue();
 
-    SDValue Value0 = extract128BitVector(StoredVal, 0, DAG, dl);
-    SDValue Value1 = extract128BitVector(StoredVal, NumElems / 2, DAG, dl);
+    return splitVectorStore(St, DAG);
+  }
+
+  // Split under-aligned vector non-temporal stores.
+  if (St->isNonTemporal() && StVT == VT && Alignment < VT.getStoreSize()) {
+    // ZMM/YMM nt-stores - either it can be stored as a series of shorter
+    // vectors or the legalizer can scalarize it to use MOVNTI.
+    if (VT.is256BitVector() || VT.is512BitVector()) {
+      unsigned NumElems = VT.getVectorNumElements();
+      if (NumElems < 2)
+        return SDValue();
+      return splitVectorStore(St, DAG);
+    }
 
-    SDValue Ptr0 = St->getBasePtr();
-    SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 16, dl);
+    // XMM nt-stores - scalarize this to f64 nt-stores on SSE4A, else i32/i64
+    // to use MOVNTI.
+    if (VT.is128BitVector() && Subtarget.hasSSE2()) {
+      MVT NTVT = Subtarget.hasSSE4A()
+                     ? MVT::v2f64
+                     : (TLI.isTypeLegal(MVT::i64) ? MVT::v2i64 : MVT::v4i32);
+      return scalarizeVectorStore(St, NTVT, DAG);
+    }
+  }
 
-    SDValue Ch0 =
-        DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(),
-                     Alignment, St->getMemOperand()->getFlags());
-    SDValue Ch1 =
-        DAG.getStore(St->getChain(), dl, Value1, Ptr1,
-                     St->getPointerInfo().getWithOffset(16),
-                     MinAlign(Alignment, 16U), St->getMemOperand()->getFlags());
-    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
+  // Try to optimize v16i16->v16i8 truncating stores when BWI is not
+  // supported, but avx512f is by extending to v16i32 and truncating.
+  if (!St->isTruncatingStore() && VT == MVT::v16i8 && !Subtarget.hasBWI() &&
+      St->getValue().getOpcode() == ISD::TRUNCATE &&
+      St->getValue().getOperand(0).getValueType() == MVT::v16i16 &&
+      TLI.isTruncStoreLegalOrCustom(MVT::v16i32, MVT::v16i8) &&
+      !DCI.isBeforeLegalizeOps()) {
+    SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32, St->getValue());
+    return DAG.getTruncStore(St->getChain(), dl, Ext, St->getBasePtr(),
+                             MVT::v16i8, St->getMemOperand());
   }
 
   // Optimize trunc store (of multiple scalars) to shuffle and store.
@@ -37763,7 +40046,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
                           St->getPointerInfo(), St->getAlignment(),
                           St->getMemOperand()->getFlags());
 
-    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
     if (SDValue Val =
         detectAVX512SSatPattern(St->getValue(), St->getMemoryVT(), Subtarget,
                                 TLI))
@@ -37867,7 +40149,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
   bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
   bool F64IsLegal =
       !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2();
-  if ((VT.isVector() ||
+  if (((VT.isVector() && !VT.isFloatingPoint()) ||
        (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit())) &&
       isa<LoadSDNode>(St->getValue()) &&
       !cast<LoadSDNode>(St->getValue())->isVolatile() &&
@@ -37890,8 +40172,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
     // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
     // pair instead.
     if (Subtarget.is64Bit() || F64IsLegal) {
-      MVT LdVT = (Subtarget.is64Bit() &&
-                  (!VT.isFloatingPoint() || !F64IsLegal)) ? MVT::i64 : MVT::f64;
+      MVT LdVT = Subtarget.is64Bit() ? MVT::i64 : MVT::f64;
       SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
                                   Ld->getMemOperand());
 
@@ -37965,7 +40246,9 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
 /// In short, LHS and RHS are inspected to see if LHS op RHS is of the form
 /// A horizontal-op B, for some already available A and B, and if so then LHS is
 /// set to A, RHS to B, and the routine returns 'true'.
-static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
+static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG,
+                              const X86Subtarget &Subtarget,
+                              bool IsCommutative) {
   // If either operand is undef, bail out. The binop should be simplified.
   if (LHS.isUndef() || RHS.isUndef())
     return false;
@@ -37979,51 +40262,83 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
   // then LHS op RHS = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >
   // which is A horizontal-op B.
 
-  // At least one of the operands should be a vector shuffle.
-  if (LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
-      RHS.getOpcode() != ISD::VECTOR_SHUFFLE)
-    return false;
-
   MVT VT = LHS.getSimpleValueType();
   assert((VT.is128BitVector() || VT.is256BitVector()) &&
          "Unsupported vector type for horizontal add/sub");
+  unsigned NumElts = VT.getVectorNumElements();
+
+  // TODO - can we make a general helper method that does all of this for us?
+  auto GetShuffle = [&](SDValue Op, SDValue &N0, SDValue &N1,
+                        SmallVectorImpl<int> &ShuffleMask) {
+    if (Op.getOpcode() == ISD::VECTOR_SHUFFLE) {
+      if (!Op.getOperand(0).isUndef())
+        N0 = Op.getOperand(0);
+      if (!Op.getOperand(1).isUndef())
+        N1 = Op.getOperand(1);
+      ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
+      ShuffleMask.append(Mask.begin(), Mask.end());
+      return;
+    }
+    bool UseSubVector = false;
+    if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+        Op.getOperand(0).getValueType().is256BitVector() &&
+        llvm::isNullConstant(Op.getOperand(1))) {
+      Op = Op.getOperand(0);
+      UseSubVector = true;
+    }
+    bool IsUnary;
+    SmallVector<SDValue, 2> SrcOps;
+    SmallVector<int, 16> SrcShuffleMask;
+    SDValue BC = peekThroughBitcasts(Op);
+    if (isTargetShuffle(BC.getOpcode()) &&
+        getTargetShuffleMask(BC.getNode(), BC.getSimpleValueType(), false,
+                             SrcOps, SrcShuffleMask, IsUnary)) {
+      if (!UseSubVector && SrcShuffleMask.size() == NumElts &&
+          SrcOps.size() <= 2) {
+        N0 = SrcOps.size() > 0 ? SrcOps[0] : SDValue();
+        N1 = SrcOps.size() > 1 ? SrcOps[1] : SDValue();
+        ShuffleMask.append(SrcShuffleMask.begin(), SrcShuffleMask.end());
+      }
+      if (UseSubVector && (SrcShuffleMask.size() == (NumElts * 2)) &&
+          SrcOps.size() == 1) {
+        N0 = extract128BitVector(SrcOps[0], 0, DAG, SDLoc(Op));
+        N1 = extract128BitVector(SrcOps[0], NumElts, DAG, SDLoc(Op));
+        ArrayRef<int> Mask = ArrayRef<int>(SrcShuffleMask).slice(0, NumElts);
+        ShuffleMask.append(Mask.begin(), Mask.end());
+      }
+    }
+  };
 
   // View LHS in the form
   //   LHS = VECTOR_SHUFFLE A, B, LMask
   // If LHS is not a shuffle, then pretend it is the identity shuffle:
   //   LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1>
   // NOTE: A default initialized SDValue represents an UNDEF of type VT.
-  unsigned NumElts = VT.getVectorNumElements();
   SDValue A, B;
-  SmallVector<int, 16> LMask(NumElts);
-  if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
-    if (!LHS.getOperand(0).isUndef())
-      A = LHS.getOperand(0);
-    if (!LHS.getOperand(1).isUndef())
-      B = LHS.getOperand(1);
-    ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(LHS.getNode())->getMask();
-    llvm::copy(Mask, LMask.begin());
-  } else {
-    A = LHS;
-    for (unsigned i = 0; i != NumElts; ++i)
-      LMask[i] = i;
-  }
+  SmallVector<int, 16> LMask;
+  GetShuffle(LHS, A, B, LMask);
 
   // Likewise, view RHS in the form
   //   RHS = VECTOR_SHUFFLE C, D, RMask
   SDValue C, D;
-  SmallVector<int, 16> RMask(NumElts);
-  if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
-    if (!RHS.getOperand(0).isUndef())
-      C = RHS.getOperand(0);
-    if (!RHS.getOperand(1).isUndef())
-      D = RHS.getOperand(1);
-    ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(RHS.getNode())->getMask();
-    llvm::copy(Mask, RMask.begin());
-  } else {
+  SmallVector<int, 16> RMask;
+  GetShuffle(RHS, C, D, RMask);
+
+  // At least one of the operands should be a vector shuffle.
+  unsigned NumShuffles = (LMask.empty() ? 0 : 1) + (RMask.empty() ? 0 : 1);
+  if (NumShuffles == 0)
+    return false;
+
+  if (LMask.empty()) {
+    A = LHS;
+    for (unsigned i = 0; i != NumElts; ++i)
+      LMask.push_back(i);
+  }
+
+  if (RMask.empty()) {
     C = RHS;
     for (unsigned i = 0; i != NumElts; ++i)
-      RMask[i] = i;
+      RMask.push_back(i);
   }
 
   // If A and B occur in reverse order in RHS, then canonicalize by commuting
@@ -38072,6 +40387,12 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
 
   LHS = A.getNode() ? A : B; // If A is 'UNDEF', use B for it.
   RHS = B.getNode() ? B : A; // If B is 'UNDEF', use A for it.
+
+  if (!shouldUseHorizontalOp(LHS == RHS && NumShuffles < 2, DAG, Subtarget))
+    return false;
+
+  LHS = DAG.getBitcast(VT, LHS);
+  RHS = DAG.getBitcast(VT, RHS);
   return true;
 }
 
@@ -38088,8 +40409,7 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
   // Try to synthesize horizontal add/sub from adds/subs of shuffles.
   if (((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
        (Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
-      isHorizontalBinOp(LHS, RHS, IsFadd) &&
-      shouldUseHorizontalOp(LHS == RHS, DAG, Subtarget))
+      isHorizontalBinOp(LHS, RHS, DAG, Subtarget, IsFadd))
     return DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS);
 
   return SDValue();
@@ -38105,7 +40425,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
                                           const SDLoc &DL) {
   assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode");
   SDValue Src = N->getOperand(0);
-  unsigned Opcode = Src.getOpcode();
+  unsigned SrcOpcode = Src.getOpcode();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   EVT VT = N->getValueType(0);
@@ -38123,14 +40443,17 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
       return true;
 
     // See if this is a single use constant which can be constant folded.
-    SDValue BC = peekThroughOneUseBitcasts(Op);
-    return ISD::isBuildVectorOfConstantSDNodes(BC.getNode());
+    // NOTE: We don't peek throught bitcasts here because there is currently
+    // no support for constant folding truncate+bitcast+vector_of_constants. So
+    // we'll just send up with a truncate on both operands which will
+    // get turned back into (truncate (binop)) causing an infinite loop.
+    return ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
   };
 
   auto TruncateArithmetic = [&](SDValue N0, SDValue N1) {
     SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
     SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
-    return DAG.getNode(Opcode, DL, VT, Trunc0, Trunc1);
+    return DAG.getNode(SrcOpcode, DL, VT, Trunc0, Trunc1);
   };
 
   // Don't combine if the operation has other uses.
@@ -38145,13 +40468,13 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
   // In most cases its only worth pre-truncating if we're only facing the cost
   // of one truncation.
   // i.e. if one of the inputs will constant fold or the input is repeated.
-  switch (Opcode) {
+  switch (SrcOpcode) {
   case ISD::AND:
   case ISD::XOR:
   case ISD::OR: {
     SDValue Op0 = Src.getOperand(0);
     SDValue Op1 = Src.getOperand(1);
-    if (TLI.isOperationLegalOrPromote(Opcode, VT) &&
+    if (TLI.isOperationLegalOrPromote(SrcOpcode, VT) &&
         (Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1)))
       return TruncateArithmetic(Op0, Op1);
     break;
@@ -38160,14 +40483,15 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
   case ISD::MUL:
     // X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its
     // better to truncate if we have the chance.
-    if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) &&
-        !TLI.isOperationLegal(Opcode, SrcVT))
+    if (SrcVT.getScalarType() == MVT::i64 &&
+        TLI.isOperationLegal(SrcOpcode, VT) &&
+        !TLI.isOperationLegal(SrcOpcode, SrcVT))
       return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
     LLVM_FALLTHROUGH;
   case ISD::ADD: {
     SDValue Op0 = Src.getOperand(0);
     SDValue Op1 = Src.getOperand(1);
-    if (TLI.isOperationLegal(Opcode, VT) &&
+    if (TLI.isOperationLegal(SrcOpcode, VT) &&
         (Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1)))
       return TruncateArithmetic(Op0, Op1);
     break;
@@ -38177,7 +40501,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
     // truncatable to avoid interfering with combineSubToSubus.
     SDValue Op0 = Src.getOperand(0);
     SDValue Op1 = Src.getOperand(1);
-    if (TLI.isOperationLegal(Opcode, VT) &&
+    if (TLI.isOperationLegal(SrcOpcode, VT) &&
         (Op0 == Op1 || (IsFreeTruncation(Op0) && IsFreeTruncation(Op1))))
       return TruncateArithmetic(Op0, Op1);
     break;
@@ -38188,36 +40512,19 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
 }
 
 /// Truncate using ISD::AND mask and X86ISD::PACKUS.
+/// e.g. trunc <8 x i32> X to <8 x i16> -->
+/// MaskX = X & 0xffff (clear high bits to prevent saturation)
+/// packus (extract_subv MaskX, 0), (extract_subv MaskX, 1)
 static SDValue combineVectorTruncationWithPACKUS(SDNode *N, const SDLoc &DL,
                                                  const X86Subtarget &Subtarget,
                                                  SelectionDAG &DAG) {
   SDValue In = N->getOperand(0);
   EVT InVT = In.getValueType();
-  EVT InSVT = InVT.getVectorElementType();
   EVT OutVT = N->getValueType(0);
-  EVT OutSVT = OutVT.getVectorElementType();
-
-  // Split a long vector into vectors of legal type and mask to unset all bits
-  // that won't appear in the result to prevent saturation.
-  // TODO - we should be doing this at the maximum legal size but this is
-  // causing regressions where we're concatenating back to max width just to
-  // perform the AND and then extracting back again.....
-  unsigned NumSubRegs = InVT.getSizeInBits() / 128;
-  unsigned NumSubRegElts = 128 / InSVT.getSizeInBits();
-  EVT SubRegVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubRegElts);
-  SmallVector<SDValue, 8> SubVecs(NumSubRegs);
-
-  APInt Mask =
-      APInt::getLowBitsSet(InSVT.getSizeInBits(), OutSVT.getSizeInBits());
-  SDValue MaskVal = DAG.getConstant(Mask, DL, SubRegVT);
-
-  for (unsigned i = 0; i < NumSubRegs; i++) {
-    SDValue Sub = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubRegVT, In,
-                              DAG.getIntPtrConstant(i * NumSubRegElts, DL));
-    SubVecs[i] = DAG.getNode(ISD::AND, DL, SubRegVT, Sub, MaskVal);
-  }
-  In = DAG.getNode(ISD::CONCAT_VECTORS, DL, InVT, SubVecs);
 
+  APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(),
+                                    OutVT.getScalarSizeInBits());
+  In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
   return truncateVectorWithPACK(X86ISD::PACKUS, OutVT, In, DL, DAG, Subtarget);
 }
 
@@ -38580,16 +40887,23 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) {
   if (N->getOpcode() == ISD::FNEG)
     return N->getOperand(0);
 
+  unsigned ScalarSize = N->getValueType(0).getScalarSizeInBits();
+
   SDValue Op = peekThroughBitcasts(SDValue(N, 0));
-  auto VT = Op->getValueType(0);
+  EVT VT = Op->getValueType(0);
+  // Make sure the element size does't change.
+  if (VT.getScalarSizeInBits() != ScalarSize)
+    return SDValue();
+
   if (auto SVOp = dyn_cast<ShuffleVectorSDNode>(Op.getNode())) {
     // For a VECTOR_SHUFFLE(VEC1, VEC2), if the VEC2 is undef, then the negate
     // of this is VECTOR_SHUFFLE(-VEC1, UNDEF).  The mask can be anything here.
     if (!SVOp->getOperand(1).isUndef())
       return SDValue();
     if (SDValue NegOp0 = isFNEG(DAG, SVOp->getOperand(0).getNode()))
-      return DAG.getVectorShuffle(VT, SDLoc(SVOp), NegOp0, DAG.getUNDEF(VT),
-                                  SVOp->getMask());
+      if (NegOp0.getValueType() == VT) // FIXME: Can we do better?
+        return DAG.getVectorShuffle(VT, SDLoc(SVOp), NegOp0, DAG.getUNDEF(VT),
+                                    SVOp->getMask());
     return SDValue();
   }
   unsigned Opc = Op.getOpcode();
@@ -38601,19 +40915,17 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) {
     if (!InsVector.isUndef())
       return SDValue();
     if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode()))
-      return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector,
-                         NegInsVal, Op.getOperand(2));
+      if (NegInsVal.getValueType() == VT.getVectorElementType()) // FIXME
+        return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector,
+                           NegInsVal, Op.getOperand(2));
     return SDValue();
   }
 
   if (Opc != X86ISD::FXOR && Opc != ISD::XOR && Opc != ISD::FSUB)
     return SDValue();
 
-  SDValue Op1 = peekThroughBitcasts(Op.getOperand(1));
-  if (!Op1.getValueType().isFloatingPoint())
-    return SDValue();
-
-  SDValue Op0 = peekThroughBitcasts(Op.getOperand(0));
+  SDValue Op1 = Op.getOperand(1);
+  SDValue Op0 = Op.getOperand(0);
 
   // For XOR and FXOR, we want to check if constant bits of Op1 are sign bit
   // masks. For FSUB, we have to check if constant bits of Op0 are sign bit
@@ -38625,7 +40937,7 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) {
   SmallVector<APInt, 16> EltBits;
   // Extract constant bits and see if they are all sign bit masks. Ignore the
   // undef elements.
-  if (getTargetConstantBitsFromNode(Op1, Op1.getScalarValueSizeInBits(),
+  if (getTargetConstantBitsFromNode(Op1, ScalarSize,
                                     UndefElts, EltBits,
                                     /* AllowWholeUndefs */ true,
                                     /* AllowPartialUndefs */ false)) {
@@ -38922,13 +41234,12 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
   if (Subtarget.useSoftFloat())
     return SDValue();
 
-  // TODO: If an operand is already known to be a NaN or not a NaN, this
-  //       should be an optional swap and FMAX/FMIN.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   EVT VT = N->getValueType(0);
-  if (!((Subtarget.hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
-        (Subtarget.hasSSE2() && (VT == MVT::f64 || VT == MVT::v2f64)) ||
-        (Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))))
+  if (!((Subtarget.hasSSE1() && VT == MVT::f32) ||
+        (Subtarget.hasSSE2() && VT == MVT::f64) ||
+        (VT.isVector() && TLI.isTypeLegal(VT))))
     return SDValue();
 
   SDValue Op0 = N->getOperand(0);
@@ -38941,13 +41252,20 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
   if (DAG.getTarget().Options.NoNaNsFPMath || N->getFlags().hasNoNaNs())
     return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags());
 
+  // If one of the operands is known non-NaN use the native min/max instructions
+  // with the non-NaN input as second operand.
+  if (DAG.isKnownNeverNaN(Op1))
+    return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags());
+  if (DAG.isKnownNeverNaN(Op0))
+    return DAG.getNode(MinMaxOp, DL, VT, Op1, Op0, N->getFlags());
+
   // If we have to respect NaN inputs, this takes at least 3 instructions.
   // Favor a library call when operating on a scalar and minimizing code size.
-  if (!VT.isVector() && DAG.getMachineFunction().getFunction().optForMinSize())
+  if (!VT.isVector() && DAG.getMachineFunction().getFunction().hasMinSize())
     return SDValue();
 
-  EVT SetCCType = DAG.getTargetLoweringInfo().getSetCCResultType(
-      DAG.getDataLayout(), *DAG.getContext(), VT);
+  EVT SetCCType = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+                                         VT);
 
   // There are 4 possibilities involving NaN inputs, and these are the required
   // outputs:
@@ -38987,6 +41305,69 @@ static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG,
                                      KnownZero, DCI))
     return SDValue(N, 0);
 
+  // Convert a full vector load into vzload when not all bits are needed.
+  SDValue In = N->getOperand(0);
+  MVT InVT = In.getSimpleValueType();
+  if (VT.getVectorNumElements() < InVT.getVectorNumElements() &&
+      ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) {
+    assert(InVT.is128BitVector() && "Expected 128-bit input vector");
+    LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0));
+    // Unless the load is volatile.
+    if (!LN->isVolatile()) {
+      SDLoc dl(N);
+      unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements();
+      MVT MemVT = MVT::getIntegerVT(NumBits);
+      MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits);
+      SDVTList Tys = DAG.getVTList(LoadVT, MVT::Other);
+      SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+      SDValue VZLoad =
+          DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, MemVT,
+                                  LN->getPointerInfo(),
+                                  LN->getAlignment(),
+                                  LN->getMemOperand()->getFlags());
+      SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT,
+                                    DAG.getBitcast(InVT, VZLoad));
+      DCI.CombineTo(N, Convert);
+      DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
+      return SDValue(N, 0);
+    }
+  }
+
+  return SDValue();
+}
+
+static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG,
+                                     TargetLowering::DAGCombinerInfo &DCI) {
+  EVT VT = N->getValueType(0);
+
+  // Convert a full vector load into vzload when not all bits are needed.
+  SDValue In = N->getOperand(0);
+  MVT InVT = In.getSimpleValueType();
+  if (VT.getVectorNumElements() < InVT.getVectorNumElements() &&
+      ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) {
+    assert(InVT.is128BitVector() && "Expected 128-bit input vector");
+    LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0));
+    // Unless the load is volatile.
+    if (!LN->isVolatile()) {
+      SDLoc dl(N);
+      unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements();
+      MVT MemVT = MVT::getFloatingPointVT(NumBits);
+      MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits);
+      SDVTList Tys = DAG.getVTList(LoadVT, MVT::Other);
+      SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+      SDValue VZLoad =
+          DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, MemVT,
+                                  LN->getPointerInfo(),
+                                  LN->getAlignment(),
+                                  LN->getMemOperand()->getFlags());
+      SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT,
+                                    DAG.getBitcast(InVT, VZLoad));
+      DCI.CombineTo(N, Convert);
+      DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
+      return SDValue(N, 0);
+    }
+  }
+
   return SDValue();
 }
 
@@ -39005,18 +41386,14 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
     return DAG.getConstant(0, SDLoc(N), VT);
 
   // Turn ANDNP back to AND if input is inverted.
-  if (VT.isVector() && N->getOperand(0).getOpcode() == ISD::XOR &&
-      ISD::isBuildVectorAllOnes(N->getOperand(0).getOperand(1).getNode())) {
-    return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                       N->getOperand(0).getOperand(0), N->getOperand(1));
-  }
+  if (SDValue Not = IsNOT(N->getOperand(0), DAG))
+    return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getBitcast(VT, Not),
+                       N->getOperand(1));
 
   // Attempt to recursively combine a bitmask ANDNP with shuffles.
   if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
     SDValue Op(N, 0);
-    if (SDValue Res = combineX86ShufflesRecursively(
-            {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
-            /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
+    if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
       return Res;
   }
 
@@ -39039,18 +41416,24 @@ static SDValue combineBT(SDNode *N, SelectionDAG &DAG,
 
 // Try to combine sext_in_reg of a cmov of constants by extending the constants.
 static SDValue combineSextInRegCmov(SDNode *N, SelectionDAG &DAG) {
-  EVT VT = N->getValueType(0);
+  assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
+
+  EVT DstVT = N->getValueType(0);
 
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
 
-  if (ExtraVT != MVT::i16)
+  if (ExtraVT != MVT::i8 && ExtraVT != MVT::i16)
     return SDValue();
 
-  // Look through single use any_extends.
-  if (N0.getOpcode() == ISD::ANY_EXTEND && N0.hasOneUse())
+  // Look through single use any_extends / truncs.
+  SDValue IntermediateBitwidthOp;
+  if ((N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::TRUNCATE) &&
+      N0.hasOneUse()) {
+    IntermediateBitwidthOp = N0;
     N0 = N0.getOperand(0);
+  }
 
   // See if we have a single use cmov.
   if (N0.getOpcode() != X86ISD::CMOV || !N0.hasOneUse())
@@ -39066,21 +41449,37 @@ static SDValue combineSextInRegCmov(SDNode *N, SelectionDAG &DAG) {
 
   SDLoc DL(N);
 
-  // If we looked through an any_extend above, add one to the constants.
-  if (N0.getValueType() != VT) {
-    CMovOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, VT, CMovOp0);
-    CMovOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, VT, CMovOp1);
+  // If we looked through an any_extend/trunc above, add one to the constants.
+  if (IntermediateBitwidthOp) {
+    unsigned IntermediateOpc = IntermediateBitwidthOp.getOpcode();
+    CMovOp0 = DAG.getNode(IntermediateOpc, DL, DstVT, CMovOp0);
+    CMovOp1 = DAG.getNode(IntermediateOpc, DL, DstVT, CMovOp1);
   }
 
-  CMovOp0 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, CMovOp0, N1);
-  CMovOp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, CMovOp1, N1);
+  CMovOp0 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, DstVT, CMovOp0, N1);
+  CMovOp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, DstVT, CMovOp1, N1);
 
-  return DAG.getNode(X86ISD::CMOV, DL, VT, CMovOp0, CMovOp1,
-                     N0.getOperand(2), N0.getOperand(3));
+  EVT CMovVT = DstVT;
+  // We do not want i16 CMOV's. Promote to i32 and truncate afterwards.
+  if (DstVT == MVT::i16) {
+    CMovVT = MVT::i32;
+    CMovOp0 = DAG.getNode(ISD::ZERO_EXTEND, DL, CMovVT, CMovOp0);
+    CMovOp1 = DAG.getNode(ISD::ZERO_EXTEND, DL, CMovVT, CMovOp1);
+  }
+
+  SDValue CMov = DAG.getNode(X86ISD::CMOV, DL, CMovVT, CMovOp0, CMovOp1,
+                             N0.getOperand(2), N0.getOperand(3));
+
+  if (CMovVT != DstVT)
+    CMov = DAG.getNode(ISD::TRUNCATE, DL, DstVT, CMov);
+
+  return CMov;
 }
 
 static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG,
                                       const X86Subtarget &Subtarget) {
+  assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
+
   if (SDValue V = combineSextInRegCmov(N, DAG))
     return V;
 
@@ -39336,6 +41735,7 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   unsigned Opcode = N->getOpcode();
+  // TODO - add ANY_EXTEND support.
   if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND)
     return SDValue();
   if (!DCI.isBeforeLegalizeOps())
@@ -39382,13 +41782,13 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
   SDLoc DL(N);
 
   auto ExtendVecSize = [&DAG](const SDLoc &DL, SDValue N, unsigned Size) {
-    EVT InVT = N.getValueType();
-    EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
-                                 Size / InVT.getScalarSizeInBits());
-    SmallVector<SDValue, 8> Opnds(Size / InVT.getSizeInBits(),
-                                  DAG.getUNDEF(InVT));
+    EVT SrcVT = N.getValueType();
+    EVT DstVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
+                                 Size / SrcVT.getScalarSizeInBits());
+    SmallVector<SDValue, 8> Opnds(Size / SrcVT.getSizeInBits(),
+                                  DAG.getUNDEF(SrcVT));
     Opnds[0] = N;
-    return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Opnds);
+    return DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Opnds);
   };
 
   // If target-size is less than 128-bits, extend to a type that would extend
@@ -39410,8 +41810,7 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
       (VT.is256BitVector() && Subtarget.hasAVX()) ||
       (VT.is512BitVector() && Subtarget.useAVX512Regs())) {
     SDValue ExOp = ExtendVecSize(DL, N0, VT.getSizeInBits());
-    Opcode = Opcode == ISD::SIGN_EXTEND ? ISD::SIGN_EXTEND_VECTOR_INREG
-                                        : ISD::ZERO_EXTEND_VECTOR_INREG;
+    Opcode = getOpcode_EXTEND_VECTOR_INREG(Opcode);
     return DAG.getNode(Opcode, DL, VT, ExOp);
   }
 
@@ -39421,9 +41820,7 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
     EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts);
     EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts);
 
-    unsigned IROpc = Opcode == ISD::SIGN_EXTEND ? ISD::SIGN_EXTEND_VECTOR_INREG
-                                                : ISD::ZERO_EXTEND_VECTOR_INREG;
-
+    unsigned IROpc = getOpcode_EXTEND_VECTOR_INREG(Opcode);
     SmallVector<SDValue, 8> Opnds;
     for (unsigned i = 0, Offset = 0; i != NumVecs; ++i, Offset += NumSubElts) {
       SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
@@ -39457,7 +41854,7 @@ static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG,
   SDLoc dl(N);
 
   // Only do this combine with AVX512 for vector extends.
-  if (!Subtarget.hasAVX512() || !VT.isVector() || N0->getOpcode() != ISD::SETCC)
+  if (!Subtarget.hasAVX512() || !VT.isVector() || N0.getOpcode() != ISD::SETCC)
     return SDValue();
 
   // Only combine legal element types.
@@ -39473,7 +41870,7 @@ static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG,
 
   // Don't fold if the condition code can't be handled by PCMPEQ/PCMPGT since
   // that's the only integer compares with we have.
-  ISD::CondCode CC = cast<CondCodeSDNode>(N0->getOperand(2))->get();
+  ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
   if (ISD::isUnsignedIntSetCC(CC))
     return SDValue();
 
@@ -39629,6 +42026,10 @@ static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG,
   if (!NegVal)
     return SDValue();
 
+  // FIXME: Should we bitcast instead?
+  if (NegVal.getValueType() != VT)
+    return SDValue();
+
   unsigned NewOpcode;
   switch (N->getOpcode()) {
   default: llvm_unreachable("Unexpected opcode!");
@@ -39705,6 +42106,20 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
   if (SDValue R = combineOrCmpEqZeroToCtlzSrl(N, DAG, DCI, Subtarget))
     return R;
 
+  // TODO: Combine with any target/faux shuffle.
+  if (N0.getOpcode() == X86ISD::PACKUS && N0.getValueSizeInBits() == 128 &&
+      VT.getScalarSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits()) {
+    SDValue N00 = N0.getOperand(0);
+    SDValue N01 = N0.getOperand(1);
+    unsigned NumSrcElts = N00.getValueType().getVectorNumElements();
+    unsigned NumSrcEltBits = N00.getScalarValueSizeInBits();
+    APInt ZeroMask = APInt::getHighBitsSet(NumSrcEltBits, NumSrcEltBits / 2);
+    if ((N00.isUndef() || DAG.MaskedValueIsZero(N00, ZeroMask)) &&
+        (N01.isUndef() || DAG.MaskedValueIsZero(N01, ZeroMask))) {
+      return concatSubVectors(N00, N01, VT, NumSrcElts * 2, DAG, dl, 128);
+    }
+  }
+
   return SDValue();
 }
 
@@ -39734,9 +42149,14 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
   if (isNullConstant(Y) && !IsOrXorXorCCZero)
     return SDValue();
 
-  // Bail out if we know that this is not really just an oversized integer.
-  if (peekThroughBitcasts(X).getValueType() == MVT::f128 ||
-      peekThroughBitcasts(Y).getValueType() == MVT::f128)
+  // Don't perform this combine if constructing the vector will be expensive.
+  auto IsVectorBitCastCheap = [](SDValue X) {
+    X = peekThroughBitcasts(X);
+    return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||
+           X.getOpcode() == ISD::LOAD;
+  };
+  if ((!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) &&
+      !IsOrXorXorCCZero)
     return SDValue();
 
   // TODO: Use PXOR + PTEST for SSE4.1 or later?
@@ -39873,66 +42293,44 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
   SDValue Src = N->getOperand(0);
   MVT SrcVT = Src.getSimpleValueType();
   MVT VT = N->getSimpleValueType(0);
+  unsigned NumBits = VT.getScalarSizeInBits();
+  unsigned NumElts = SrcVT.getVectorNumElements();
 
   // Perform constant folding.
   if (ISD::isBuildVectorOfConstantSDNodes(Src.getNode())) {
-    assert(VT== MVT::i32 && "Unexpected result type");
+    assert(VT == MVT::i32 && "Unexpected result type");
     APInt Imm(32, 0);
     for (unsigned Idx = 0, e = Src.getNumOperands(); Idx < e; ++Idx) {
-      SDValue In = Src.getOperand(Idx);
-      if (!In.isUndef() &&
-          cast<ConstantSDNode>(In)->getAPIntValue().isNegative())
+      if (!Src.getOperand(Idx).isUndef() &&
+          Src.getConstantOperandAPInt(Idx).isNegative())
         Imm.setBit(Idx);
     }
     return DAG.getConstant(Imm, SDLoc(N), VT);
   }
 
   // Look through int->fp bitcasts that don't change the element width.
-  if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse() &&
-      SrcVT.isFloatingPoint() &&
-      Src.getOperand(0).getValueType() ==
-        EVT(SrcVT).changeVectorElementTypeToInteger())
-    Src = Src.getOperand(0);
+  unsigned EltWidth = SrcVT.getScalarSizeInBits();
+  if (Src.getOpcode() == ISD::BITCAST &&
+      Src.getOperand(0).getScalarValueSizeInBits() == EltWidth)
+    return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), VT, Src.getOperand(0));
+
+  // Fold movmsk(not(x)) -> not(movmsk) to improve folding of movmsk results
+  // with scalar comparisons.
+  if (SDValue NotSrc = IsNOT(Src, DAG)) {
+    SDLoc DL(N);
+    APInt NotMask = APInt::getLowBitsSet(NumBits, NumElts);
+    NotSrc = DAG.getBitcast(SrcVT, NotSrc);
+    return DAG.getNode(ISD::XOR, DL, VT,
+                       DAG.getNode(X86ISD::MOVMSK, DL, VT, NotSrc),
+                       DAG.getConstant(NotMask, DL, VT));
+  }
 
   // Simplify the inputs.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  APInt DemandedMask(APInt::getAllOnesValue(VT.getScalarSizeInBits()));
+  APInt DemandedMask(APInt::getAllOnesValue(NumBits));
   if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
     return SDValue(N, 0);
 
-  // Combine (movmsk (setne (and X, (1 << C)), 0)) -> (movmsk (X << C)).
-  // Only do this when the setcc input and output types are the same and the
-  // setcc and the 'and' node have a single use.
-  // FIXME: Support 256-bits with AVX1. The movmsk is split, but the and isn't.
-  APInt SplatVal;
-  if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse() &&
-      Src.getOperand(0).getValueType() == Src.getValueType() &&
-      cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETNE &&
-      ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode()) &&
-      Src.getOperand(0).getOpcode() == ISD::AND) {
-    SDValue And = Src.getOperand(0);
-    if (And.hasOneUse() &&
-        ISD::isConstantSplatVector(And.getOperand(1).getNode(), SplatVal) &&
-        SplatVal.isPowerOf2()) {
-      MVT VT = Src.getSimpleValueType();
-      unsigned BitWidth = VT.getScalarSizeInBits();
-      unsigned ShAmt = BitWidth - SplatVal.logBase2() - 1;
-      SDLoc DL(And);
-      SDValue X = And.getOperand(0);
-      // If the element type is i8, we need to bitcast to i16 to use a legal
-      // shift. If we wait until lowering we end up with an extra and to bits
-      // from crossing the 8-bit elements, but we don't care about that here.
-      if (VT.getVectorElementType() == MVT::i8) {
-        VT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
-        X = DAG.getBitcast(VT, X);
-      }
-      SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
-                                DAG.getConstant(ShAmt, DL, VT));
-      SDValue Cast = DAG.getBitcast(SrcVT, Shl);
-      return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), N->getValueType(0), Cast);
-    }
-  }
-
   return SDValue();
 }
 
@@ -40065,8 +42463,7 @@ static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N,
   // make the transformation for non-constant splats as well, but it's unclear
   // that would be a benefit as it would not eliminate any operations, just
   // perform one more step in scalar code before moving to the vector unit.
-  if (BuildVectorSDNode *BV =
-          dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
+  if (auto *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(0).getOperand(1))) {
     // Bail out if the vector isn't a constant.
     if (!BV->isConstant())
       return SDValue();
@@ -40088,6 +42485,41 @@ static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N,
   return SDValue();
 }
 
+/// If we are converting a value to floating-point, try to replace scalar
+/// truncate of an extracted vector element with a bitcast. This tries to keep
+/// the sequence on XMM registers rather than moving between vector and GPRs.
+static SDValue combineToFPTruncExtElt(SDNode *N, SelectionDAG &DAG) {
+  // TODO: This is currently only used by combineSIntToFP, but it is generalized
+  //       to allow being called by any similar cast opcode.
+  // TODO: Consider merging this into lowering: vectorizeExtractedCast().
+  SDValue Trunc = N->getOperand(0);
+  if (!Trunc.hasOneUse() || Trunc.getOpcode() != ISD::TRUNCATE)
+    return SDValue();
+
+  SDValue ExtElt = Trunc.getOperand(0);
+  if (!ExtElt.hasOneUse() || ExtElt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+      !isNullConstant(ExtElt.getOperand(1)))
+    return SDValue();
+
+  EVT TruncVT = Trunc.getValueType();
+  EVT SrcVT = ExtElt.getValueType();
+  unsigned DestWidth = TruncVT.getSizeInBits();
+  unsigned SrcWidth = SrcVT.getSizeInBits();
+  if (SrcWidth % DestWidth != 0)
+    return SDValue();
+
+  // inttofp (trunc (extelt X, 0)) --> inttofp (extelt (bitcast X), 0)
+  EVT SrcVecVT = ExtElt.getOperand(0).getValueType();
+  unsigned VecWidth = SrcVecVT.getSizeInBits();
+  unsigned NumElts = VecWidth / DestWidth;
+  EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), TruncVT, NumElts);
+  SDValue BitcastVec = DAG.getBitcast(BitcastVT, ExtElt.getOperand(0));
+  SDLoc DL(N);
+  SDValue NewExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TruncVT,
+                                  BitcastVec, ExtElt.getOperand(1));
+  return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), NewExtElt);
+}
+
 static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
                                const X86Subtarget &Subtarget) {
   SDValue Op0 = N->getOperand(0);
@@ -40181,6 +42613,10 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
       return FILDChain;
     }
   }
+
+  if (SDValue V = combineToFPTruncExtElt(N, DAG))
+    return V;
+
   return SDValue();
 }
 
@@ -40267,13 +42703,13 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
   if ((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) &&
       Op.hasOneUse() && isa<ConstantSDNode>(Op.getOperand(1)) &&
       onlyZeroFlagUsed(SDValue(N, 0))) {
-    EVT VT = Op.getValueType();
     unsigned BitWidth = VT.getSizeInBits();
-    unsigned ShAmt = Op.getConstantOperandVal(1);
-    if (ShAmt < BitWidth) { // Avoid undefined shifts.
+    const APInt &ShAmt = Op.getConstantOperandAPInt(1);
+    if (ShAmt.ult(BitWidth)) { // Avoid undefined shifts.
+      unsigned MaskBits = BitWidth - ShAmt.getZExtValue();
       APInt Mask = Op.getOpcode() == ISD::SRL
-                       ? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)
-                       : APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt);
+                       ? APInt::getHighBitsSet(BitWidth, MaskBits)
+                       : APInt::getLowBitsSet(BitWidth, MaskBits);
       if (Mask.isSignedIntN(32)) {
         Op = DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),
                          DAG.getConstant(Mask, dl, VT));
@@ -40283,7 +42719,6 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
     }
   }
 
-
   // Look for a truncate with a single use.
   if (Op.getOpcode() != ISD::TRUNCATE || !Op.hasOneUse())
     return SDValue();
@@ -40337,8 +42772,42 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
   return Op.getValue(1);
 }
 
+static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
+                                TargetLowering::DAGCombinerInfo &DCI) {
+  assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) &&
+         "Expected X86ISD::ADD or X86ISD::SUB");
+
+  SDLoc DL(N);
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  MVT VT = LHS.getSimpleValueType();
+  unsigned GenericOpc = X86ISD::ADD == N->getOpcode() ? ISD::ADD : ISD::SUB;
+
+  // If we don't use the flag result, simplify back to a generic ADD/SUB.
+  if (!N->hasAnyUseOfValue(1)) {
+    SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS);
+    return DAG.getMergeValues({Res, DAG.getConstant(0, DL, MVT::i32)}, DL);
+  }
+
+  // Fold any similar generic ADD/SUB opcodes to reuse this node.
+  auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) {
+    SDValue Ops[] = {N0, N1};
+    SDVTList VTs = DAG.getVTList(N->getValueType(0));
+    if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) {
+      SDValue Op(N, 0);
+      if (Negate)
+        Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
+      DCI.CombineTo(GenericAddSub, Op);
+    }
+  };
+  MatchGeneric(LHS, RHS, false);
+  MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode());
+
+  return SDValue();
+}
+
 static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) {
-  if (SDValue Flags = combineCarryThroughADD(N->getOperand(2))) {
+  if (SDValue Flags = combineCarryThroughADD(N->getOperand(2), DAG)) {
     MVT VT = N->getSimpleValueType(0);
     SDVTList VTs = DAG.getVTList(VT, MVT::i32);
     return DAG.getNode(X86ISD::SBB, SDLoc(N), VTs,
@@ -40346,6 +42815,15 @@ static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) {
                        Flags);
   }
 
+  // Fold SBB(SUB(X,Y),0,Carry) -> SBB(X,Y,Carry)
+  // iff the flag result is dead.
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  if (Op0.getOpcode() == ISD::SUB && isNullConstant(Op1) &&
+      !N->hasAnyUseOfValue(1))
+    return DAG.getNode(X86ISD::SBB, SDLoc(N), N->getVTList(), Op0.getOperand(0),
+                       Op0.getOperand(1), N->getOperand(2));
+
   return SDValue();
 }
 
@@ -40372,7 +42850,7 @@ static SDValue combineADC(SDNode *N, SelectionDAG &DAG,
     return DCI.CombineTo(N, Res1, CarryOut);
   }
 
-  if (SDValue Flags = combineCarryThroughADD(N->getOperand(2))) {
+  if (SDValue Flags = combineCarryThroughADD(N->getOperand(2), DAG)) {
     MVT VT = N->getSimpleValueType(0);
     SDVTList VTs = DAG.getVTList(VT, MVT::i32);
     return DAG.getNode(X86ISD::ADC, SDLoc(N), VTs,
@@ -40468,7 +42946,7 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
     // Do not flip "e > c", where "c" is a constant, because Cmp instruction
     // cannot take an immediate as its first operand.
     //
-    if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
+    if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() &&
         EFLAGS.getValueType().isInteger() &&
         !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
       SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS),
@@ -40575,8 +43053,8 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
   // Madd vector size is half of the original vector size
   auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
                            ArrayRef<SDValue> Ops) {
-    MVT VT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
-    return DAG.getNode(X86ISD::VPMADDWD, DL, VT, Ops);
+    MVT OpVT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
+    return DAG.getNode(X86ISD::VPMADDWD, DL, OpVT, Ops);
   };
 
   auto BuildPMADDWD = [&](SDValue Mul) {
@@ -40631,10 +43109,10 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   // We know N is a reduction add, which means one of its operands is a phi.
-  // To match SAD, we need the other operand to be a vector select.
-  if (Op0.getOpcode() != ISD::VSELECT)
+  // To match SAD, we need the other operand to be a ABS.
+  if (Op0.getOpcode() != ISD::ABS)
     std::swap(Op0, Op1);
-  if (Op0.getOpcode() != ISD::VSELECT)
+  if (Op0.getOpcode() != ISD::ABS)
     return SDValue();
 
   auto BuildPSADBW = [&](SDValue Op0, SDValue Op1) {
@@ -40673,7 +43151,7 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
   Op0 = BuildPSADBW(SadOp0, SadOp1);
 
   // It's possible we have a sad on the other side too.
-  if (Op1.getOpcode() == ISD::VSELECT &&
+  if (Op1.getOpcode() == ISD::ABS &&
       detectZextAbsDiff(Op1, SadOp0, SadOp1)) {
     Op1 = BuildPSADBW(SadOp0, SadOp1);
   }
@@ -40815,39 +43293,6 @@ static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1,
                           PMADDBuilder);
 }
 
-// Try to turn (add (umax X, C), -C) into (psubus X, C)
-static SDValue combineAddToSUBUS(SDNode *N, SelectionDAG &DAG,
-                                 const X86Subtarget &Subtarget) {
-  if (!Subtarget.hasSSE2())
-    return SDValue();
-
-  EVT VT = N->getValueType(0);
-
-  // psubus is available in SSE2 for i8 and i16 vectors.
-  if (!VT.isVector() || VT.getVectorNumElements() < 2 ||
-      !isPowerOf2_32(VT.getVectorNumElements()) ||
-      !(VT.getVectorElementType() == MVT::i8 ||
-        VT.getVectorElementType() == MVT::i16))
-    return SDValue();
-
-  SDValue Op0 = N->getOperand(0);
-  SDValue Op1 = N->getOperand(1);
-  if (Op0.getOpcode() != ISD::UMAX)
-    return SDValue();
-
-  // The add should have a constant that is the negative of the max.
-  // TODO: Handle build_vectors with undef elements.
-  auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
-    return Max->getAPIntValue() == (-Op->getAPIntValue());
-  };
-  if (!ISD::matchBinaryPredicate(Op0.getOperand(1), Op1, MatchUSUBSAT))
-    return SDValue();
-
-  SDLoc DL(N);
-  return DAG.getNode(ISD::USUBSAT, DL, VT, Op0.getOperand(0),
-                     Op0.getOperand(1));
-}
-
 // Attempt to turn this pattern into PMADDWD.
 // (mul (add (zext (build_vector)), (zext (build_vector))),
 //      (add (zext (build_vector)), (zext (build_vector)))
@@ -40957,12 +43402,12 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1,
                          ArrayRef<SDValue> Ops) {
     // Shrink by adding truncate nodes and let DAGCombine fold with the
     // sources.
-    EVT InVT = Ops[0].getValueType();
-    assert(InVT.getScalarType() == MVT::i16 &&
+    EVT OpVT = Ops[0].getValueType();
+    assert(OpVT.getScalarType() == MVT::i16 &&
            "Unexpected scalar element type");
-    assert(InVT == Ops[1].getValueType() && "Operands' types mismatch");
+    assert(OpVT == Ops[1].getValueType() && "Operands' types mismatch");
     EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
-                                 InVT.getVectorNumElements() / 2);
+                                 OpVT.getVectorNumElements() / 2);
     return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT, Ops[0], Ops[1]);
   };
   return SplitOpsAndApply(DAG, Subtarget, DL, VT, { In0, In1 },
@@ -40990,8 +43435,8 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
   // Try to synthesize horizontal adds from adds of shuffles.
   if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
        VT == MVT::v8i32) &&
-      Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, true) &&
-      shouldUseHorizontalOp(Op0 == Op1, DAG, Subtarget)) {
+      Subtarget.hasSSSE3() &&
+      isHorizontalBinOp(Op0, Op1, DAG, Subtarget, true)) {
     auto HADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
                           ArrayRef<SDValue> Ops) {
       return DAG.getNode(X86ISD::HADD, DL, Ops[0].getValueType(), Ops);
@@ -41003,9 +43448,6 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
   if (SDValue V = combineIncDecVector(N, DAG))
     return V;
 
-  if (SDValue V = combineAddToSUBUS(N, DAG, Subtarget))
-    return V;
-
   return combineAddOrSubToADCOrSBB(N, DAG);
 }
 
@@ -41110,7 +43552,7 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
     // X-Y -> X+~Y+1, saving one register.
     if (Op1->hasOneUse() && Op1.getOpcode() == ISD::XOR &&
         isa<ConstantSDNode>(Op1.getOperand(1))) {
-      APInt XorC = cast<ConstantSDNode>(Op1.getOperand(1))->getAPIntValue();
+      const APInt &XorC = Op1.getConstantOperandAPInt(1);
       EVT VT = Op0.getValueType();
       SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT,
                                    Op1.getOperand(0),
@@ -41124,8 +43566,8 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
   EVT VT = N->getValueType(0);
   if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
        VT == MVT::v8i32) &&
-      Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, false) &&
-      shouldUseHorizontalOp(Op0 == Op1, DAG, Subtarget)) {
+      Subtarget.hasSSSE3() &&
+      isHorizontalBinOp(Op0, Op1, DAG, Subtarget, false)) {
     auto HSUBBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
                           ArrayRef<SDValue> Ops) {
       return DAG.getNode(X86ISD::HSUB, DL, Ops[0].getValueType(), Ops);
@@ -41159,6 +43601,149 @@ static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+/// Helper that combines an array of subvector ops as if they were the operands
+/// of a ISD::CONCAT_VECTORS node, but may have come from another source (e.g.
+/// ISD::INSERT_SUBVECTOR). The ops are assumed to be of the same type.
+static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
+                                      ArrayRef<SDValue> Ops, SelectionDAG &DAG,
+                                      TargetLowering::DAGCombinerInfo &DCI,
+                                      const X86Subtarget &Subtarget) {
+  assert(Subtarget.hasAVX() && "AVX assumed for concat_vectors");
+
+  if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
+    return DAG.getUNDEF(VT);
+
+  if (llvm::all_of(Ops, [](SDValue Op) {
+        return ISD::isBuildVectorAllZeros(Op.getNode());
+      }))
+    return getZeroVector(VT, Subtarget, DAG, DL);
+
+  SDValue Op0 = Ops[0];
+
+  // Fold subvector loads into one.
+  // If needed, look through bitcasts to get to the load.
+  if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(Op0))) {
+    bool Fast;
+    const X86TargetLowering *TLI = Subtarget.getTargetLowering();
+    if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+                                *FirstLd->getMemOperand(), &Fast) &&
+        Fast) {
+      if (SDValue Ld =
+              EltsFromConsecutiveLoads(VT, Ops, DL, DAG, Subtarget, false))
+        return Ld;
+    }
+  }
+
+  // Repeated subvectors.
+  if (llvm::all_of(Ops, [Op0](SDValue Op) { return Op == Op0; })) {
+    // If this broadcast/subv_broadcast is inserted into both halves, use a
+    // larger broadcast/subv_broadcast.
+    if (Op0.getOpcode() == X86ISD::VBROADCAST ||
+        Op0.getOpcode() == X86ISD::SUBV_BROADCAST)
+      return DAG.getNode(Op0.getOpcode(), DL, VT, Op0.getOperand(0));
+
+    // concat_vectors(movddup(x),movddup(x)) -> broadcast(x)
+    if (Op0.getOpcode() == X86ISD::MOVDDUP && VT == MVT::v4f64 &&
+        (Subtarget.hasAVX2() || MayFoldLoad(Op0.getOperand(0))))
+      return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
+                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64,
+                                     Op0.getOperand(0),
+                                     DAG.getIntPtrConstant(0, DL)));
+
+    // concat_vectors(scalar_to_vector(x),scalar_to_vector(x)) -> broadcast(x)
+    if (Op0.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+        (Subtarget.hasAVX2() ||
+         (VT.getScalarSizeInBits() >= 32 && MayFoldLoad(Op0.getOperand(0)))) &&
+        Op0.getOperand(0).getValueType() == VT.getScalarType())
+      return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Op0.getOperand(0));
+  }
+
+  bool IsSplat = llvm::all_of(Ops, [&Op0](SDValue Op) { return Op == Op0; });
+
+  // Repeated opcode.
+  // TODO - combineX86ShufflesRecursively should handle shuffle concatenation
+  // but it currently struggles with different vector widths.
+  if (llvm::all_of(Ops, [Op0](SDValue Op) {
+        return Op.getOpcode() == Op0.getOpcode();
+      })) {
+    unsigned NumOps = Ops.size();
+    switch (Op0.getOpcode()) {
+    case X86ISD::PSHUFHW:
+    case X86ISD::PSHUFLW:
+    case X86ISD::PSHUFD:
+      if (!IsSplat && NumOps == 2 && VT.is256BitVector() &&
+          Subtarget.hasInt256() && Op0.getOperand(1) == Ops[1].getOperand(1)) {
+        SmallVector<SDValue, 2> Src;
+        for (unsigned i = 0; i != NumOps; ++i)
+          Src.push_back(Ops[i].getOperand(0));
+        return DAG.getNode(Op0.getOpcode(), DL, VT,
+                           DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Src),
+                           Op0.getOperand(1));
+      }
+      LLVM_FALLTHROUGH;
+    case X86ISD::VPERMILPI:
+      // TODO - add support for vXf64/vXi64 shuffles.
+      if (!IsSplat && NumOps == 2 && (VT == MVT::v8f32 || VT == MVT::v8i32) &&
+          Subtarget.hasAVX() && Op0.getOperand(1) == Ops[1].getOperand(1)) {
+        SmallVector<SDValue, 2> Src;
+        for (unsigned i = 0; i != NumOps; ++i)
+          Src.push_back(DAG.getBitcast(MVT::v4f32, Ops[i].getOperand(0)));
+        SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8f32, Src);
+        Res = DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, Res,
+                          Op0.getOperand(1));
+        return DAG.getBitcast(VT, Res);
+      }
+      break;
+    case X86ISD::PACKUS:
+      if (NumOps == 2 && VT.is256BitVector() && Subtarget.hasInt256()) {
+        SmallVector<SDValue, 2> LHS, RHS;
+        for (unsigned i = 0; i != NumOps; ++i) {
+          LHS.push_back(Ops[i].getOperand(0));
+          RHS.push_back(Ops[i].getOperand(1));
+        }
+        MVT SrcVT = Op0.getOperand(0).getSimpleValueType();
+        SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
+                                 NumOps * SrcVT.getVectorNumElements());
+        return DAG.getNode(Op0.getOpcode(), DL, VT,
+                           DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcVT, LHS),
+                           DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcVT, RHS));
+      }
+      break;
+    }
+  }
+
+  // If we're inserting all zeros into the upper half, change this to
+  // an insert into an all zeros vector. We will match this to a move
+  // with implicit upper bit zeroing during isel.
+  if (Ops.size() == 2 && ISD::isBuildVectorAllZeros(Ops[1].getNode()))
+    return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
+                       getZeroVector(VT, Subtarget, DAG, DL), Ops[0],
+                       DAG.getIntPtrConstant(0, DL));
+
+  return SDValue();
+}
+
+static SDValue combineConcatVectors(SDNode *N, SelectionDAG &DAG,
+                                    TargetLowering::DAGCombinerInfo &DCI,
+                                    const X86Subtarget &Subtarget) {
+  EVT VT = N->getValueType(0);
+  EVT SrcVT = N->getOperand(0).getValueType();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  // Don't do anything for i1 vectors.
+  if (VT.getVectorElementType() == MVT::i1)
+    return SDValue();
+
+  if (Subtarget.hasAVX() && TLI.isTypeLegal(VT) && TLI.isTypeLegal(SrcVT)) {
+    SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
+    if (SDValue R = combineConcatVectorOps(SDLoc(N), VT.getSimpleVT(), Ops, DAG,
+                                           DCI, Subtarget))
+      return R;
+  }
+
+  return SDValue();
+}
+
 static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
                                       TargetLowering::DAGCombinerInfo &DCI,
                                       const X86Subtarget &Subtarget) {
@@ -41173,19 +43758,23 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
   SDValue Vec = N->getOperand(0);
   SDValue SubVec = N->getOperand(1);
 
-  unsigned IdxVal = N->getConstantOperandVal(2);
+  uint64_t IdxVal = N->getConstantOperandVal(2);
   MVT SubVecVT = SubVec.getSimpleValueType();
 
-  if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
-    // Inserting zeros into zeros is a nop.
-    if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
-      return getZeroVector(OpVT, Subtarget, DAG, dl);
+  if (Vec.isUndef() && SubVec.isUndef())
+    return DAG.getUNDEF(OpVT);
+
+  // Inserting undefs/zeros into zeros/undefs is a zero vector.
+  if ((Vec.isUndef() || ISD::isBuildVectorAllZeros(Vec.getNode())) &&
+      (SubVec.isUndef() || ISD::isBuildVectorAllZeros(SubVec.getNode())))
+    return getZeroVector(OpVT, Subtarget, DAG, dl);
 
+  if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
     // If we're inserting into a zero vector and then into a larger zero vector,
     // just insert into the larger zero vector directly.
     if (SubVec.getOpcode() == ISD::INSERT_SUBVECTOR &&
         ISD::isBuildVectorAllZeros(SubVec.getOperand(0).getNode())) {
-      unsigned Idx2Val = SubVec.getConstantOperandVal(2);
+      uint64_t Idx2Val = SubVec.getConstantOperandVal(2);
       return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
                          getZeroVector(OpVT, Subtarget, DAG, dl),
                          SubVec.getOperand(1),
@@ -41197,30 +43786,16 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
     // least as large as the original insertion. Just insert the original
     // subvector into a zero vector.
     if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && IdxVal == 0 &&
-        SubVec.getConstantOperandVal(1) == 0 &&
+        SubVec.getConstantOperandAPInt(1) == 0 &&
         SubVec.getOperand(0).getOpcode() == ISD::INSERT_SUBVECTOR) {
       SDValue Ins = SubVec.getOperand(0);
-      if (Ins.getConstantOperandVal(2) == 0 &&
+      if (Ins.getConstantOperandAPInt(2) == 0 &&
           ISD::isBuildVectorAllZeros(Ins.getOperand(0).getNode()) &&
           Ins.getOperand(1).getValueSizeInBits() <= SubVecVT.getSizeInBits())
         return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
                            getZeroVector(OpVT, Subtarget, DAG, dl),
                            Ins.getOperand(1), N->getOperand(2));
     }
-
-    // If we're inserting a bitcast into zeros, rewrite the insert and move the
-    // bitcast to the other side. This helps with detecting zero extending
-    // during isel.
-    // TODO: Is this useful for other indices than 0?
-    if (!IsI1Vector && SubVec.getOpcode() == ISD::BITCAST && IdxVal == 0) {
-      MVT CastVT = SubVec.getOperand(0).getSimpleValueType();
-      unsigned NumElems = OpVT.getSizeInBits() / CastVT.getScalarSizeInBits();
-      MVT NewVT = MVT::getVectorVT(CastVT.getVectorElementType(), NumElems);
-      SDValue Insert = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT,
-                                   DAG.getBitcast(NewVT, Vec),
-                                   SubVec.getOperand(0), N->getOperand(2));
-      return DAG.getBitcast(OpVT, Insert);
-    }
   }
 
   // Stop here if this is an i1 vector.
@@ -41248,77 +43823,92 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  // Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte
-  // load:
-  // (insert_subvector (insert_subvector undef, (load16 addr), 0),
-  //                   (load16 addr + 16), Elts/2)
-  // --> load32 addr
-  // or:
-  // (insert_subvector (insert_subvector undef, (load32 addr), 0),
-  //                   (load32 addr + 32), Elts/2)
-  // --> load64 addr
-  // or a 16-byte or 32-byte broadcast:
-  // (insert_subvector (insert_subvector undef, (load16 addr), 0),
-  //                   (load16 addr), Elts/2)
-  // --> X86SubVBroadcast(load16 addr)
-  // or:
-  // (insert_subvector (insert_subvector undef, (load32 addr), 0),
-  //                   (load32 addr), Elts/2)
-  // --> X86SubVBroadcast(load32 addr)
+  // Match concat_vector style patterns.
+  SmallVector<SDValue, 2> SubVectorOps;
+  if (collectConcatOps(N, SubVectorOps))
+    if (SDValue Fold =
+            combineConcatVectorOps(dl, OpVT, SubVectorOps, DAG, DCI, Subtarget))
+      return Fold;
+
+  // If we are inserting into both halves of the vector, the starting vector
+  // should be undef. If it isn't, make it so. Only do this if the early insert
+  // has no other uses.
+  // TODO: Should this be a generic DAG combine?
+  // TODO: Why doesn't SimplifyDemandedVectorElts catch this?
   if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
       Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
-      OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2) {
-    if (isNullConstant(Vec.getOperand(2))) {
-      SDValue SubVec2 = Vec.getOperand(1);
-      // If needed, look through bitcasts to get to the load.
-      if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(SubVec2))) {
-        bool Fast;
-        unsigned Alignment = FirstLd->getAlignment();
-        unsigned AS = FirstLd->getAddressSpace();
-        const X86TargetLowering *TLI = Subtarget.getTargetLowering();
-        if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
-                                    OpVT, AS, Alignment, &Fast) && Fast) {
-          SDValue Ops[] = {SubVec2, SubVec};
-          if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG,
-                                                    Subtarget, false))
-            return Ld;
-        }
-      }
-      // If lower/upper loads are the same and there's no other use of the lower
-      // load, then splat the loaded value with a broadcast.
-      if (auto *Ld = dyn_cast<LoadSDNode>(peekThroughOneUseBitcasts(SubVec2)))
-        if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) && Vec.hasOneUse())
-          return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
-
-      // If this is subv_broadcast insert into both halves, use a larger
-      // subv_broadcast.
-      if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2)
-        return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT,
-                           SubVec.getOperand(0));
-
-      // If we're inserting all zeros into the upper half, change this to
-      // an insert into an all zeros vector. We will match this to a move
-      // with implicit upper bit zeroing during isel.
-      if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
-        return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
-                           getZeroVector(OpVT, Subtarget, DAG, dl), SubVec2,
-                           Vec.getOperand(2));
+      OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2 &&
+      isNullConstant(Vec.getOperand(2)) && !Vec.getOperand(0).isUndef() &&
+      Vec.hasOneUse()) {
+    Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, DAG.getUNDEF(OpVT),
+                      Vec.getOperand(1), Vec.getOperand(2));
+    return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec, SubVec,
+                       N->getOperand(2));
+  }
 
-      // If we are inserting into both halves of the vector, the starting
-      // vector should be undef. If it isn't, make it so. Only do this if the
-      // the early insert has no other uses.
-      // TODO: Should this be a generic DAG combine?
-      if (!Vec.getOperand(0).isUndef() && Vec.hasOneUse()) {
-        Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, DAG.getUNDEF(OpVT),
-                          SubVec2, Vec.getOperand(2));
-        return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec, SubVec,
-                           N->getOperand(2));
+  // If this is a broadcast insert into an upper undef, use a larger broadcast.
+  if (Vec.isUndef() && IdxVal != 0 && SubVec.getOpcode() == X86ISD::VBROADCAST)
+    return DAG.getNode(X86ISD::VBROADCAST, dl, OpVT, SubVec.getOperand(0));
 
-      }
-    }
+  return SDValue();
+}
+
+/// If we are extracting a subvector of a vector select and the select condition
+/// is composed of concatenated vectors, try to narrow the select width. This
+/// is a common pattern for AVX1 integer code because 256-bit selects may be
+/// legal, but there is almost no integer math/logic available for 256-bit.
+/// This function should only be called with legal types (otherwise, the calls
+/// to get simple value types will assert).
+static SDValue narrowExtractedVectorSelect(SDNode *Ext, SelectionDAG &DAG) {
+  SDValue Sel = peekThroughBitcasts(Ext->getOperand(0));
+  SmallVector<SDValue, 4> CatOps;
+  if (Sel.getOpcode() != ISD::VSELECT ||
+      !collectConcatOps(Sel.getOperand(0).getNode(), CatOps))
+    return SDValue();
+
+  // Note: We assume simple value types because this should only be called with
+  //       legal operations/types.
+  // TODO: This can be extended to handle extraction to 256-bits.
+  MVT VT = Ext->getSimpleValueType(0);
+  if (!VT.is128BitVector())
+    return SDValue();
+
+  MVT SelCondVT = Sel.getOperand(0).getSimpleValueType();
+  if (!SelCondVT.is256BitVector() && !SelCondVT.is512BitVector())
+    return SDValue();
+
+  MVT WideVT = Ext->getOperand(0).getSimpleValueType();
+  MVT SelVT = Sel.getSimpleValueType();
+  assert((SelVT.is256BitVector() || SelVT.is512BitVector()) &&
+         "Unexpected vector type with legal operations");
+
+  unsigned SelElts = SelVT.getVectorNumElements();
+  unsigned CastedElts = WideVT.getVectorNumElements();
+  unsigned ExtIdx = cast<ConstantSDNode>(Ext->getOperand(1))->getZExtValue();
+  if (SelElts % CastedElts == 0) {
+    // The select has the same or more (narrower) elements than the extract
+    // operand. The extraction index gets scaled by that factor.
+    ExtIdx *= (SelElts / CastedElts);
+  } else if (CastedElts % SelElts == 0) {
+    // The select has less (wider) elements than the extract operand. Make sure
+    // that the extraction index can be divided evenly.
+    unsigned IndexDivisor = CastedElts / SelElts;
+    if (ExtIdx % IndexDivisor != 0)
+      return SDValue();
+    ExtIdx /= IndexDivisor;
+  } else {
+    llvm_unreachable("Element count of simple vector types are not divisible?");
   }
 
-  return SDValue();
+  unsigned NarrowingFactor = WideVT.getSizeInBits() / VT.getSizeInBits();
+  unsigned NarrowElts = SelElts / NarrowingFactor;
+  MVT NarrowSelVT = MVT::getVectorVT(SelVT.getVectorElementType(), NarrowElts);
+  SDLoc DL(Ext);
+  SDValue ExtCond = extract128BitVector(Sel.getOperand(0), ExtIdx, DAG, DL);
+  SDValue ExtT = extract128BitVector(Sel.getOperand(1), ExtIdx, DAG, DL);
+  SDValue ExtF = extract128BitVector(Sel.getOperand(2), ExtIdx, DAG, DL);
+  SDValue NarrowSel = DAG.getSelect(DL, NarrowSelVT, ExtCond, ExtT, ExtF);
+  return DAG.getBitcast(VT, NarrowSel);
 }
 
 static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
@@ -41334,7 +43924,10 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
 
   // Capture the original wide type in the likely case that we need to bitcast
   // back to this type.
-  EVT VT = N->getValueType(0);
+  if (!N->getValueType(0).isSimple())
+    return SDValue();
+
+  MVT VT = N->getSimpleValueType(0);
   EVT WideVecVT = N->getOperand(0).getValueType();
   SDValue WideVec = peekThroughBitcasts(N->getOperand(0));
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -41360,65 +43953,102 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
-  MVT OpVT = N->getSimpleValueType(0);
+  if (SDValue V = narrowExtractedVectorSelect(N, DAG))
+    return V;
+
   SDValue InVec = N->getOperand(0);
   unsigned IdxVal = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
 
   if (ISD::isBuildVectorAllZeros(InVec.getNode()))
-    return getZeroVector(OpVT, Subtarget, DAG, SDLoc(N));
+    return getZeroVector(VT, Subtarget, DAG, SDLoc(N));
 
   if (ISD::isBuildVectorAllOnes(InVec.getNode())) {
-    if (OpVT.getScalarType() == MVT::i1)
-      return DAG.getConstant(1, SDLoc(N), OpVT);
-    return getOnesVector(OpVT, DAG, SDLoc(N));
+    if (VT.getScalarType() == MVT::i1)
+      return DAG.getConstant(1, SDLoc(N), VT);
+    return getOnesVector(VT, DAG, SDLoc(N));
   }
 
   if (InVec.getOpcode() == ISD::BUILD_VECTOR)
     return DAG.getBuildVector(
-        OpVT, SDLoc(N),
-        InVec.getNode()->ops().slice(IdxVal, OpVT.getVectorNumElements()));
+        VT, SDLoc(N),
+        InVec.getNode()->ops().slice(IdxVal, VT.getVectorNumElements()));
+
+  // Try to move vector bitcast after extract_subv by scaling extraction index:
+  // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
+  // TODO: Move this to DAGCombiner::visitEXTRACT_SUBVECTOR
+  if (InVec.getOpcode() == ISD::BITCAST &&
+      InVec.getOperand(0).getValueType().isVector()) {
+    SDValue SrcOp = InVec.getOperand(0);
+    EVT SrcVT = SrcOp.getValueType();
+    unsigned SrcNumElts = SrcVT.getVectorNumElements();
+    unsigned DestNumElts = InVec.getValueType().getVectorNumElements();
+    if ((DestNumElts % SrcNumElts) == 0) {
+      unsigned DestSrcRatio = DestNumElts / SrcNumElts;
+      if ((VT.getVectorNumElements() % DestSrcRatio) == 0) {
+        unsigned NewExtNumElts = VT.getVectorNumElements() / DestSrcRatio;
+        EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(),
+                                        SrcVT.getScalarType(), NewExtNumElts);
+        if ((N->getConstantOperandVal(1) % DestSrcRatio) == 0 &&
+            TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
+          unsigned IndexValScaled = N->getConstantOperandVal(1) / DestSrcRatio;
+          SDLoc DL(N);
+          SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
+          SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
+                                           SrcOp, NewIndex);
+          return DAG.getBitcast(VT, NewExtract);
+        }
+      }
+    }
+  }
+
+  // If we're extracting from a broadcast then we're better off just
+  // broadcasting to the smaller type directly, assuming this is the only use.
+  // As its a broadcast we don't care about the extraction index.
+  if (InVec.getOpcode() == X86ISD::VBROADCAST && InVec.hasOneUse() &&
+      InVec.getOperand(0).getValueSizeInBits() <= VT.getSizeInBits())
+    return DAG.getNode(X86ISD::VBROADCAST, SDLoc(N), VT, InVec.getOperand(0));
 
   // If we're extracting the lowest subvector and we're the only user,
   // we may be able to perform this with a smaller vector width.
   if (IdxVal == 0 && InVec.hasOneUse()) {
     unsigned InOpcode = InVec.getOpcode();
-    if (OpVT == MVT::v2f64 && InVec.getValueType() == MVT::v4f64) {
+    if (VT == MVT::v2f64 && InVec.getValueType() == MVT::v4f64) {
       // v2f64 CVTDQ2PD(v4i32).
       if (InOpcode == ISD::SINT_TO_FP &&
           InVec.getOperand(0).getValueType() == MVT::v4i32) {
-        return DAG.getNode(X86ISD::CVTSI2P, SDLoc(N), OpVT, InVec.getOperand(0));
+        return DAG.getNode(X86ISD::CVTSI2P, SDLoc(N), VT, InVec.getOperand(0));
+      }
+      // v2f64 CVTUDQ2PD(v4i32).
+      if (InOpcode == ISD::UINT_TO_FP &&
+          InVec.getOperand(0).getValueType() == MVT::v4i32) {
+        return DAG.getNode(X86ISD::CVTUI2P, SDLoc(N), VT, InVec.getOperand(0));
       }
       // v2f64 CVTPS2PD(v4f32).
       if (InOpcode == ISD::FP_EXTEND &&
           InVec.getOperand(0).getValueType() == MVT::v4f32) {
-        return DAG.getNode(X86ISD::VFPEXT, SDLoc(N), OpVT, InVec.getOperand(0));
+        return DAG.getNode(X86ISD::VFPEXT, SDLoc(N), VT, InVec.getOperand(0));
       }
     }
-    if ((InOpcode == ISD::ZERO_EXTEND || InOpcode == ISD::SIGN_EXTEND) &&
-        OpVT.is128BitVector() &&
-        InVec.getOperand(0).getSimpleValueType().is128BitVector()) {
-      unsigned ExtOp =
-        InOpcode == ISD::ZERO_EXTEND ? ISD::ZERO_EXTEND_VECTOR_INREG
-                                     : ISD::SIGN_EXTEND_VECTOR_INREG;
-      return DAG.getNode(ExtOp, SDLoc(N), OpVT, InVec.getOperand(0));
-    }
-    if ((InOpcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
+    if ((InOpcode == ISD::ANY_EXTEND ||
+         InOpcode == ISD::ANY_EXTEND_VECTOR_INREG ||
+         InOpcode == ISD::ZERO_EXTEND ||
+         InOpcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
+         InOpcode == ISD::SIGN_EXTEND ||
          InOpcode == ISD::SIGN_EXTEND_VECTOR_INREG) &&
-        OpVT.is128BitVector() &&
+        VT.is128BitVector() &&
         InVec.getOperand(0).getSimpleValueType().is128BitVector()) {
-      return DAG.getNode(InOpcode, SDLoc(N), OpVT, InVec.getOperand(0));
+      unsigned ExtOp = getOpcode_EXTEND_VECTOR_INREG(InOpcode);
+      return DAG.getNode(ExtOp, SDLoc(N), VT, InVec.getOperand(0));
     }
-    if (InOpcode == ISD::BITCAST) {
-      // TODO - do this for target shuffles in general.
-      SDValue InVecBC = peekThroughOneUseBitcasts(InVec);
-      if (InVecBC.getOpcode() == X86ISD::PSHUFB && OpVT.is128BitVector()) {
-        SDLoc DL(N);
-        SDValue SubPSHUFB =
-            DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
-                        extract128BitVector(InVecBC.getOperand(0), 0, DAG, DL),
-                        extract128BitVector(InVecBC.getOperand(1), 0, DAG, DL));
-        return DAG.getBitcast(OpVT, SubPSHUFB);
-      }
+    if (InOpcode == ISD::VSELECT &&
+        InVec.getOperand(0).getValueType().is256BitVector() &&
+        InVec.getOperand(1).getValueType().is256BitVector() &&
+        InVec.getOperand(2).getValueType().is256BitVector()) {
+      SDLoc DL(N);
+      SDValue Ext0 = extractSubVector(InVec.getOperand(0), 0, DAG, DL, 128);
+      SDValue Ext1 = extractSubVector(InVec.getOperand(1), 0, DAG, DL, 128);
+      SDValue Ext2 = extractSubVector(InVec.getOperand(2), 0, DAG, DL, 128);
+      return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1, Ext2);
     }
   }
 
@@ -41428,6 +44058,7 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
 static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
   EVT VT = N->getValueType(0);
   SDValue Src = N->getOperand(0);
+  SDLoc DL(N);
 
   // If this is a scalar to vector to v1i1 from an AND with 1, bypass the and.
   // This occurs frequently in our masked scalar intrinsic code and our
@@ -41436,7 +44067,7 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
   if (VT == MVT::v1i1 && Src.getOpcode() == ISD::AND && Src.hasOneUse())
     if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
       if (C->getAPIntValue().isOneValue())
-        return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), MVT::v1i1,
+        return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1,
                            Src.getOperand(0));
 
   // Combine scalar_to_vector of an extract_vector_elt into an extract_subvec.
@@ -41445,8 +44076,17 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
       Src.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
     if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
       if (C->isNullValue())
-        return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT,
-                           Src.getOperand(0), Src.getOperand(1));
+        return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src.getOperand(0),
+                           Src.getOperand(1));
+
+  // Reduce v2i64 to v4i32 if we don't need the upper bits.
+  // TODO: Move to DAGCombine?
+  if (VT == MVT::v2i64 && Src.getOpcode() == ISD::ANY_EXTEND &&
+      Src.getValueType() == MVT::i64 && Src.hasOneUse() &&
+      Src.getOperand(0).getScalarValueSizeInBits() <= 32)
+    return DAG.getBitcast(
+        VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32,
+                        DAG.getAnyExtOrTrunc(Src.getOperand(0), DL, MVT::i32)));
 
   return SDValue();
 }
@@ -41483,6 +44123,56 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
+                               TargetLowering::DAGCombinerInfo &DCI,
+                               const X86Subtarget &Subtarget) {
+  EVT VT = N->getValueType(0);
+  SDValue In = N->getOperand(0);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  // Try to merge vector loads and extend_inreg to an extload.
+  if (!DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(In.getNode()) &&
+      In.hasOneUse()) {
+    auto *Ld = cast<LoadSDNode>(In);
+    if (!Ld->isVolatile()) {
+      MVT SVT = In.getSimpleValueType().getVectorElementType();
+      ISD::LoadExtType Ext = N->getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+      EVT MemVT = EVT::getVectorVT(*DAG.getContext(), SVT,
+                                   VT.getVectorNumElements());
+      if (TLI.isLoadExtLegal(Ext, VT, MemVT)) {
+        SDValue Load =
+            DAG.getExtLoad(Ext, SDLoc(N), VT, Ld->getChain(), Ld->getBasePtr(),
+                           Ld->getPointerInfo(), MemVT, Ld->getAlignment(),
+                           Ld->getMemOperand()->getFlags());
+        DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
+        return Load;
+      }
+    }
+  }
+
+  // Disabling for widening legalization for now. We can enable if we find a
+  // case that needs it. Otherwise it can be deleted when we switch to
+  // widening legalization.
+  if (ExperimentalVectorWideningLegalization)
+    return SDValue();
+
+  // Combine (ext_invec (ext_invec X)) -> (ext_invec X)
+  if (In.getOpcode() == N->getOpcode() &&
+      TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getOperand(0).getValueType()))
+    return DAG.getNode(N->getOpcode(), SDLoc(N), VT, In.getOperand(0));
+
+  // Attempt to combine as a shuffle.
+  // TODO: SSE41 support
+  if (Subtarget.hasAVX() && N->getOpcode() != ISD::SIGN_EXTEND_VECTOR_INREG) {
+    SDValue Op(N, 0);
+    if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType()))
+      if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
+        return Res;
+  }
+
+  return SDValue();
+}
+
 SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -41494,6 +44184,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::PEXTRW:
   case X86ISD::PEXTRB:
     return combineExtractVectorElt(N, DAG, DCI, Subtarget);
+  case ISD::CONCAT_VECTORS:
+    return combineConcatVectors(N, DAG, DCI, Subtarget);
   case ISD::INSERT_SUBVECTOR:
     return combineInsertSubvector(N, DAG, DCI, Subtarget);
   case ISD::EXTRACT_SUBVECTOR:
@@ -41506,19 +44198,21 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::CMP:         return combineCMP(N, DAG);
   case ISD::ADD:            return combineAdd(N, DAG, Subtarget);
   case ISD::SUB:            return combineSub(N, DAG, Subtarget);
+  case X86ISD::ADD:
+  case X86ISD::SUB:         return combineX86AddSub(N, DAG, DCI);
   case X86ISD::SBB:         return combineSBB(N, DAG);
   case X86ISD::ADC:         return combineADC(N, DAG, DCI);
   case ISD::MUL:            return combineMul(N, DAG, DCI, Subtarget);
-  case ISD::SHL:
-  case ISD::SRA:
-  case ISD::SRL:            return combineShift(N, DAG, DCI, Subtarget);
+  case ISD::SHL:            return combineShiftLeft(N, DAG);
+  case ISD::SRA:            return combineShiftRightArithmetic(N, DAG);
+  case ISD::SRL:            return combineShiftRightLogical(N, DAG, DCI);
   case ISD::AND:            return combineAnd(N, DAG, DCI, Subtarget);
   case ISD::OR:             return combineOr(N, DAG, DCI, Subtarget);
   case ISD::XOR:            return combineXor(N, DAG, DCI, Subtarget);
   case X86ISD::BEXTR:       return combineBEXTR(N, DAG, DCI, Subtarget);
   case ISD::LOAD:           return combineLoad(N, DAG, DCI, Subtarget);
   case ISD::MLOAD:          return combineMaskedLoad(N, DAG, DCI, Subtarget);
-  case ISD::STORE:          return combineStore(N, DAG, Subtarget);
+  case ISD::STORE:          return combineStore(N, DAG, DCI, Subtarget);
   case ISD::MSTORE:         return combineMaskedStore(N, DAG, DCI, Subtarget);
   case ISD::SINT_TO_FP:     return combineSIntToFP(N, DAG, Subtarget);
   case ISD::UINT_TO_FP:     return combineUIntToFP(N, DAG, Subtarget);
@@ -41535,13 +44229,21 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::FMAX:        return combineFMinFMax(N, DAG);
   case ISD::FMINNUM:
   case ISD::FMAXNUM:        return combineFMinNumFMaxNum(N, DAG, Subtarget);
-  case X86ISD::CVTSI2P:  
+  case X86ISD::CVTSI2P:
   case X86ISD::CVTUI2P:     return combineX86INT_TO_FP(N, DAG, DCI);
+  case X86ISD::CVTP2SI:
+  case X86ISD::CVTP2UI:
+  case X86ISD::CVTTP2SI:
+  case X86ISD::CVTTP2UI:    return combineCVTP2I_CVTTP2I(N, DAG, DCI);
   case X86ISD::BT:          return combineBT(N, DAG, DCI);
   case ISD::ANY_EXTEND:
   case ISD::ZERO_EXTEND:    return combineZext(N, DAG, DCI, Subtarget);
   case ISD::SIGN_EXTEND:    return combineSext(N, DAG, DCI, Subtarget);
   case ISD::SIGN_EXTEND_INREG: return combineSignExtendInReg(N, DAG, Subtarget);
+  case ISD::ANY_EXTEND_VECTOR_INREG:
+  case ISD::SIGN_EXTEND_VECTOR_INREG:
+  case ISD::ZERO_EXTEND_VECTOR_INREG: return combineExtInVec(N, DAG, DCI,
+                                                             Subtarget);
   case ISD::SETCC:          return combineSetCC(N, DAG, Subtarget);
   case X86ISD::SETCC:       return combineX86SetCC(N, DAG, Subtarget);
   case X86ISD::BRCOND:      return combineBrCond(N, DAG, Subtarget);
@@ -41624,11 +44326,15 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
   if (Opc == ISD::SHL && VT.isVector() && VT.getVectorElementType() == MVT::i8)
     return false;
 
-  // 8-bit multiply is probably not much cheaper than 32-bit multiply, and
-  // we have specializations to turn 32-bit multiply into LEA or other ops.
+  // TODO: Almost no 8-bit ops are desirable because they have no actual
+  //       size/speed advantages vs. 32-bit ops, but they do have a major
+  //       potential disadvantage by causing partial register stalls.
+  //
+  // 8-bit multiply/shl is probably not cheaper than 32-bit multiply/shl, and
+  // we have specializations to turn 32-bit multiply/shl into LEA or other ops.
   // Also, see the comment in "IsDesirableToPromoteOp" - where we additionally
   // check for a constant operand to the multiply.
-  if (Opc == ISD::MUL && VT == MVT::i8)
+  if ((Opc == ISD::MUL || Opc == ISD::SHL) && VT == MVT::i8)
     return false;
 
   // i16 instruction encodings are longer and some i16 instructions are slow,
@@ -41642,6 +44348,7 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
     case ISD::ZERO_EXTEND:
     case ISD::ANY_EXTEND:
     case ISD::SHL:
+    case ISD::SRA:
     case ISD::SRL:
     case ISD::SUB:
     case ISD::ADD:
@@ -41717,6 +44424,7 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
   case ISD::ANY_EXTEND:
     break;
   case ISD::SHL:
+  case ISD::SRA:
   case ISD::SRL: {
     SDValue N0 = Op.getOperand(0);
     // Look out for (store (shl (load), x)).
@@ -41889,6 +44597,40 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
   return false;
 }
 
+static X86::CondCode parseConstraintCode(llvm::StringRef Constraint) {
+  X86::CondCode Cond = StringSwitch<X86::CondCode>(Constraint)
+                           .Case("{@cca}", X86::COND_A)
+                           .Case("{@ccae}", X86::COND_AE)
+                           .Case("{@ccb}", X86::COND_B)
+                           .Case("{@ccbe}", X86::COND_BE)
+                           .Case("{@ccc}", X86::COND_B)
+                           .Case("{@cce}", X86::COND_E)
+                           .Case("{@ccz}", X86::COND_E)
+                           .Case("{@ccg}", X86::COND_G)
+                           .Case("{@ccge}", X86::COND_GE)
+                           .Case("{@ccl}", X86::COND_L)
+                           .Case("{@ccle}", X86::COND_LE)
+                           .Case("{@ccna}", X86::COND_BE)
+                           .Case("{@ccnae}", X86::COND_B)
+                           .Case("{@ccnb}", X86::COND_AE)
+                           .Case("{@ccnbe}", X86::COND_A)
+                           .Case("{@ccnc}", X86::COND_AE)
+                           .Case("{@ccne}", X86::COND_NE)
+                           .Case("{@ccnz}", X86::COND_NE)
+                           .Case("{@ccng}", X86::COND_LE)
+                           .Case("{@ccnge}", X86::COND_L)
+                           .Case("{@ccnl}", X86::COND_GE)
+                           .Case("{@ccnle}", X86::COND_G)
+                           .Case("{@ccno}", X86::COND_NO)
+                           .Case("{@ccnp}", X86::COND_P)
+                           .Case("{@ccns}", X86::COND_NS)
+                           .Case("{@cco}", X86::COND_O)
+                           .Case("{@ccp}", X86::COND_P)
+                           .Case("{@ccs}", X86::COND_S)
+                           .Default(X86::COND_INVALID);
+  return Cond;
+}
+
 /// Given a constraint letter, return the type of constraint for this target.
 X86TargetLowering::ConstraintType
 X86TargetLowering::getConstraintType(StringRef Constraint) const {
@@ -41949,7 +44691,8 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
         return C_RegisterClass;
       }
     }
-  }
+  } else if (parseConstraintCode(Constraint) != X86::COND_INVALID)
+    return C_Other;
   return TargetLowering::getConstraintType(Constraint);
 }
 
@@ -42120,6 +44863,32 @@ LowerXConstraint(EVT ConstraintVT) const {
   return TargetLowering::LowerXConstraint(ConstraintVT);
 }
 
+// Lower @cc targets via setcc.
+SDValue X86TargetLowering::LowerAsmOutputForConstraint(
+    SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo,
+    SelectionDAG &DAG) const {
+  X86::CondCode Cond = parseConstraintCode(OpInfo.ConstraintCode);
+  if (Cond == X86::COND_INVALID)
+    return SDValue();
+  // Check that return type is valid.
+  if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
+      OpInfo.ConstraintVT.getSizeInBits() < 8)
+    report_fatal_error("Flag output operand is of invalid type");
+
+  // Get EFLAGS register. Only update chain when copyfrom is glued.
+  if (Flag.getNode()) {
+    Flag = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32, Flag);
+    Chain = Flag.getValue(1);
+  } else
+    Flag = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32);
+  // Extract CC code.
+  SDValue CC = getSETCC(Cond, Flag, DL, DAG);
+  // Extend to 32-bits
+  SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, OpInfo.ConstraintVT, CC);
+
+  return Result;
+}
+
 /// Lower the specified operand into the Ops vector.
 /// If it is invalid, don't add anything to Ops.
 void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
@@ -42229,8 +44998,13 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   case 'i': {
     // Literal immediates are always ok.
     if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) {
-      // Widen to 64 bits here to get it sign extended.
-      Result = DAG.getTargetConstant(CST->getSExtValue(), SDLoc(Op), MVT::i64);
+      bool IsBool = CST->getConstantIntValue()->getBitWidth() == 1;
+      BooleanContent BCont = getBooleanContents(MVT::i64);
+      ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
+                                    : ISD::SIGN_EXTEND;
+      int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? CST->getZExtValue()
+                                                  : CST->getSExtValue();
+      Result = DAG.getTargetConstant(ExtVal, SDLoc(Op), MVT::i64);
       break;
     }
 
@@ -42242,40 +45016,12 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
 
     // If we are in non-pic codegen mode, we allow the address of a global (with
     // an optional displacement) to be used with 'i'.
-    GlobalAddressSDNode *GA = nullptr;
-    int64_t Offset = 0;
-
-    // Match either (GA), (GA+C), (GA+C1+C2), etc.
-    while (1) {
-      if ((GA = dyn_cast<GlobalAddressSDNode>(Op))) {
-        Offset += GA->getOffset();
-        break;
-      } else if (Op.getOpcode() == ISD::ADD) {
-        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-          Offset += C->getZExtValue();
-          Op = Op.getOperand(0);
-          continue;
-        }
-      } else if (Op.getOpcode() == ISD::SUB) {
-        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-          Offset += -C->getZExtValue();
-          Op = Op.getOperand(0);
-          continue;
-        }
-      }
-
-      // Otherwise, this isn't something we can handle, reject it.
-      return;
-    }
-
-    const GlobalValue *GV = GA->getGlobal();
-    // If we require an extra load to get this address, as in PIC mode, we
-    // can't accept it.
-    if (isGlobalStubReference(Subtarget.classifyGlobalReference(GV)))
-      return;
-
-    Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op),
-                                        GA->getValueType(0), Offset);
+    if (auto *GA = dyn_cast<GlobalAddressSDNode>(Op))
+      // If we require an extra load to get this address, as in PIC mode, we
+      // can't accept it.
+      if (isGlobalStubReference(
+              Subtarget.classifyGlobalReference(GA->getGlobal())))
+        return;
     break;
   }
   }
@@ -42307,6 +45053,18 @@ static bool isFRClass(const TargetRegisterClass &RC) {
          RC.hasSuperClassEq(&X86::VR512RegClass);
 }
 
+/// Check if \p RC is a mask register class.
+/// I.e., VK* or one of their variant.
+static bool isVKClass(const TargetRegisterClass &RC) {
+  return RC.hasSuperClassEq(&X86::VK1RegClass) ||
+         RC.hasSuperClassEq(&X86::VK2RegClass) ||
+         RC.hasSuperClassEq(&X86::VK4RegClass) ||
+         RC.hasSuperClassEq(&X86::VK8RegClass) ||
+         RC.hasSuperClassEq(&X86::VK16RegClass) ||
+         RC.hasSuperClassEq(&X86::VK32RegClass) ||
+         RC.hasSuperClassEq(&X86::VK64RegClass);
+}
+
 std::pair<unsigned, const TargetRegisterClass *>
 X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
                                                 StringRef Constraint,
@@ -42317,25 +45075,31 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
     // GCC Constraint Letters
     switch (Constraint[0]) {
     default: break;
+    // 'A' means [ER]AX + [ER]DX.
+    case 'A':
+      if (Subtarget.is64Bit())
+        return std::make_pair(X86::RAX, &X86::GR64_ADRegClass);
+      assert((Subtarget.is32Bit() || Subtarget.is16Bit()) &&
+             "Expecting 64, 32 or 16 bit subtarget");
+      return std::make_pair(X86::EAX, &X86::GR32_ADRegClass);
+
       // TODO: Slight differences here in allocation order and leaving
       // RIP in the class. Do they matter any more here than they do
       // in the normal allocation?
     case 'k':
       if (Subtarget.hasAVX512()) {
-        //  Only supported in AVX512 or later.
-        switch (VT.SimpleTy) {
-        default: break;
-        case MVT::i32:
-          return std::make_pair(0U, &X86::VK32RegClass);
-        case MVT::i16:
-          return std::make_pair(0U, &X86::VK16RegClass);
-        case MVT::i8:
-          return std::make_pair(0U, &X86::VK8RegClass);
-        case MVT::i1:
+        if (VT == MVT::i1)
           return std::make_pair(0U, &X86::VK1RegClass);
-        case MVT::i64:
+        if (VT == MVT::i8)
+          return std::make_pair(0U, &X86::VK8RegClass);
+        if (VT == MVT::i16)
+          return std::make_pair(0U, &X86::VK16RegClass);
+      }
+      if (Subtarget.hasBWI()) {
+        if (VT == MVT::i32)
+          return std::make_pair(0U, &X86::VK32RegClass);
+        if (VT == MVT::i64)
           return std::make_pair(0U, &X86::VK64RegClass);
-        }
       }
       break;
     case 'q':   // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
@@ -42403,7 +45167,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       // Scalar SSE types.
       case MVT::f32:
       case MVT::i32:
-        if (VConstraint && Subtarget.hasAVX512() && Subtarget.hasVLX())
+        if (VConstraint && Subtarget.hasVLX())
           return std::make_pair(0U, &X86::FR32XRegClass);
         return std::make_pair(0U, &X86::FR32RegClass);
       case MVT::f64:
@@ -42431,12 +45195,17 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       case MVT::v4f64:
         if (VConstraint && Subtarget.hasVLX())
           return std::make_pair(0U, &X86::VR256XRegClass);
-        return std::make_pair(0U, &X86::VR256RegClass);
+        if (Subtarget.hasAVX())
+          return std::make_pair(0U, &X86::VR256RegClass);
+        break;
       case MVT::v8f64:
       case MVT::v16f32:
       case MVT::v16i32:
       case MVT::v8i64:
-        return std::make_pair(0U, &X86::VR512RegClass);
+        if (!Subtarget.hasAVX512()) break;
+        if (VConstraint)
+          return std::make_pair(0U, &X86::VR512RegClass);
+        return std::make_pair(0U, &X86::VR512_0_15RegClass);
       }
       break;
     }
@@ -42457,25 +45226,27 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       return std::make_pair(X86::XMM0, &X86::VR128RegClass);
     case 'k':
       // This register class doesn't allocate k0 for masked vector operation.
-      if (Subtarget.hasAVX512()) { // Only supported in AVX512.
-        switch (VT.SimpleTy) {
-        default: break;
-        case MVT::i32:
-          return std::make_pair(0U, &X86::VK32WMRegClass);
-        case MVT::i16:
-          return std::make_pair(0U, &X86::VK16WMRegClass);
-        case MVT::i8:
-          return std::make_pair(0U, &X86::VK8WMRegClass);
-        case MVT::i1:
+      if (Subtarget.hasAVX512()) {
+        if (VT == MVT::i1)
           return std::make_pair(0U, &X86::VK1WMRegClass);
-        case MVT::i64:
+        if (VT == MVT::i8)
+          return std::make_pair(0U, &X86::VK8WMRegClass);
+        if (VT == MVT::i16)
+          return std::make_pair(0U, &X86::VK16WMRegClass);
+      }
+      if (Subtarget.hasBWI()) {
+        if (VT == MVT::i32)
+          return std::make_pair(0U, &X86::VK32WMRegClass);
+        if (VT == MVT::i64)
           return std::make_pair(0U, &X86::VK64WMRegClass);
-        }
       }
       break;
     }
   }
 
+  if (parseConstraintCode(Constraint) != X86::COND_INVALID)
+    return std::make_pair(0U, &X86::GR32RegClass);
+
   // Use the default implementation in TargetLowering to convert the register
   // constraint into a member of a register class.
   std::pair<unsigned, const TargetRegisterClass*> Res;
@@ -42505,14 +45276,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
     if (StringRef("{flags}").equals_lower(Constraint))
       return std::make_pair(X86::EFLAGS, &X86::CCRRegClass);
 
-    // 'A' means [ER]AX + [ER]DX.
-    if (Constraint == "A") {
-      if (Subtarget.is64Bit())
-        return std::make_pair(X86::RAX, &X86::GR64_ADRegClass);
-      assert((Subtarget.is32Bit() || Subtarget.is16Bit()) &&
-             "Expecting 64, 32 or 16 bit subtarget");
-      return std::make_pair(X86::EAX, &X86::GR32_ADRegClass);
-    }
+    // dirflag -> DF
+    if (StringRef("{dirflag}").equals_lower(Constraint))
+      return std::make_pair(X86::DF, &X86::DFCCRRegClass);
+
+    // fpsr -> FPSW
+    if (StringRef("{fpsr}").equals_lower(Constraint))
+      return std::make_pair(X86::FPSW, &X86::FPCCRRegClass);
+
     return Res;
   }
 
@@ -42561,20 +45332,20 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       if (Size == 64 && !is64Bit) {
         // Model GCC's behavior here and select a fixed pair of 32-bit
         // registers.
-        switch (Res.first) {
-        case X86::EAX:
+        switch (DestReg) {
+        case X86::RAX:
           return std::make_pair(X86::EAX, &X86::GR32_ADRegClass);
-        case X86::EDX:
+        case X86::RDX:
           return std::make_pair(X86::EDX, &X86::GR32_DCRegClass);
-        case X86::ECX:
+        case X86::RCX:
           return std::make_pair(X86::ECX, &X86::GR32_CBRegClass);
-        case X86::EBX:
+        case X86::RBX:
           return std::make_pair(X86::EBX, &X86::GR32_BSIRegClass);
-        case X86::ESI:
+        case X86::RSI:
           return std::make_pair(X86::ESI, &X86::GR32_SIDIRegClass);
-        case X86::EDI:
+        case X86::RDI:
           return std::make_pair(X86::EDI, &X86::GR32_DIBPRegClass);
-        case X86::EBP:
+        case X86::RBP:
           return std::make_pair(X86::EBP, &X86::GR32_BPSPRegClass);
         default:
           return std::make_pair(0, nullptr);
@@ -42594,13 +45365,13 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
 
     // TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
     if (VT == MVT::f32 || VT == MVT::i32)
-      Res.second = &X86::FR32RegClass;
+      Res.second = &X86::FR32XRegClass;
     else if (VT == MVT::f64 || VT == MVT::i64)
-      Res.second = &X86::FR64RegClass;
-    else if (TRI->isTypeLegalForClass(X86::VR128RegClass, VT))
-      Res.second = &X86::VR128RegClass;
-    else if (TRI->isTypeLegalForClass(X86::VR256RegClass, VT))
-      Res.second = &X86::VR256RegClass;
+      Res.second = &X86::FR64XRegClass;
+    else if (TRI->isTypeLegalForClass(X86::VR128XRegClass, VT))
+      Res.second = &X86::VR128XRegClass;
+    else if (TRI->isTypeLegalForClass(X86::VR256XRegClass, VT))
+      Res.second = &X86::VR256XRegClass;
     else if (TRI->isTypeLegalForClass(X86::VR512RegClass, VT))
       Res.second = &X86::VR512RegClass;
     else {
@@ -42608,6 +45379,22 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       Res.first = 0;
       Res.second = nullptr;
     }
+  } else if (isVKClass(*Class)) {
+    if (VT == MVT::i1)
+      Res.second = &X86::VK1RegClass;
+    else if (VT == MVT::i8)
+      Res.second = &X86::VK8RegClass;
+    else if (VT == MVT::i16)
+      Res.second = &X86::VK16RegClass;
+    else if (VT == MVT::i32)
+      Res.second = &X86::VK32RegClass;
+    else if (VT == MVT::i64)
+      Res.second = &X86::VK64RegClass;
+    else {
+      // Type mismatch and not a clobber: Return an error;
+      Res.first = 0;
+      Res.second = nullptr;
+    }
   }
 
   return Res;
@@ -42660,7 +45447,7 @@ void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
 
   // Update IsSplitCSR in X86MachineFunctionInfo.
   X86MachineFunctionInfo *AFI =
-    Entry->getParent()->getInfo<X86MachineFunctionInfo>();
+      Entry->getParent()->getInfo<X86MachineFunctionInfo>();
   AFI->setIsSplitCSR(true);
 }
 
@@ -42688,9 +45475,9 @@ void X86TargetLowering::insertCopiesSplitCSR(
     // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
     // nounwind. If we want to generalize this later, we may need to emit
     // CFI pseudo-instructions.
-    assert(Entry->getParent()->getFunction().hasFnAttribute(
-               Attribute::NoUnwind) &&
-           "Function should be nounwind in insertCopiesSplitCSR!");
+    assert(
+        Entry->getParent()->getFunction().hasFnAttribute(Attribute::NoUnwind) &&
+        "Function should be nounwind in insertCopiesSplitCSR!");
     Entry->addLiveIn(*I);
     BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
         .addReg(*I);
@@ -42709,7 +45496,8 @@ bool X86TargetLowering::supportSwiftError() const {
 
 /// Returns the name of the symbol used to emit stack probes or the empty
 /// string if not applicable.
-StringRef X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const {
+StringRef
+X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const {
   // If the function specifically requests stack probes, emit them.
   if (MF.getFunction().hasFnAttribute("probe-stack"))
     return MF.getFunction().getFnAttribute("probe-stack").getValueAsString();
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 910acd80e8b8..e0be03bc3f9d 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -1,9 +1,8 @@
 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -78,15 +77,6 @@ namespace llvm {
       /// Same as call except it adds the NoTrack prefix.
       NT_CALL,
 
-      /// This operation implements the lowering for readcyclecounter.
-      RDTSC_DAG,
-
-      /// X86 Read Time-Stamp Counter and Processor ID.
-      RDTSCP_DAG,
-
-      /// X86 Read Performance Monitoring Counters.
-      RDPMC_DAG,
-
       /// X86 compare and logical compare instructions.
       CMP, COMI, UCOMI,
 
@@ -110,13 +100,12 @@ namespace llvm {
       FSETCC,
 
       /// X86 FP SETCC, similar to above, but with output as an i1 mask and
-      /// with optional rounding mode.
-      FSETCCM, FSETCCM_RND,
+      /// and a version with SAE.
+      FSETCCM, FSETCCM_SAE,
 
       /// X86 conditional moves. Operand 0 and operand 1 are the two values
       /// to select from. Operand 2 is the condition code, and operand 3 is the
-      /// flag operand produced by a CMP or TEST instruction. It also writes a
-      /// flag result.
+      /// flag operand produced by a CMP or TEST instruction.
       CMOV,
 
       /// X86 conditional branches. Operand 0 is the chain operand, operand 1
@@ -204,28 +193,29 @@ namespace llvm {
       /// Dynamic (non-constant condition) vector blend where only the sign bits
       /// of the condition elements are used. This is used to enforce that the
       /// condition mask is not valid for generic VSELECT optimizations. This
-      /// can also be used to implement the intrinsics.
+      /// is also used to implement the intrinsics.
+      /// Operands are in VSELECT order: MASK, TRUE, FALSE
       BLENDV,
 
       /// Combined add and sub on an FP vector.
       ADDSUB,
 
       //  FP vector ops with rounding mode.
-      FADD_RND, FADDS_RND,
-      FSUB_RND, FSUBS_RND,
-      FMUL_RND, FMULS_RND,
-      FDIV_RND, FDIVS_RND,
-      FMAX_RND, FMAXS_RND,
-      FMIN_RND, FMINS_RND,
-      FSQRT_RND, FSQRTS_RND,
+      FADD_RND, FADDS, FADDS_RND,
+      FSUB_RND, FSUBS, FSUBS_RND,
+      FMUL_RND, FMULS, FMULS_RND,
+      FDIV_RND, FDIVS, FDIVS_RND,
+      FMAX_SAE, FMAXS_SAE,
+      FMIN_SAE, FMINS_SAE,
+      FSQRT_RND, FSQRTS, FSQRTS_RND,
 
       // FP vector get exponent.
-      FGETEXP_RND, FGETEXPS_RND,
+      FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE,
       // Extract Normalized Mantissas.
-      VGETMANT, VGETMANT_RND, VGETMANTS, VGETMANTS_RND,
+      VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE,
       // FP Scale.
-      SCALEF,
-      SCALEFS,
+      SCALEF, SCALEF_RND,
+      SCALEFS, SCALEFS_RND,
 
       // Unsigned Integer average.
       AVG,
@@ -300,10 +290,10 @@ namespace llvm {
       VMTRUNC, VMTRUNCUS, VMTRUNCS,
 
       // Vector FP extend.
-      VFPEXT, VFPEXT_RND, VFPEXTS_RND,
+      VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE,
 
       // Vector FP round.
-      VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
+      VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND,
 
       // Masked version of above. Used for v2f64->v4f32.
       // SRC, PASSTHRU, MASK
@@ -315,10 +305,8 @@ namespace llvm {
       // Vector shift elements
       VSHL, VSRL, VSRA,
 
-      // Vector variable shift right arithmetic.
-      // Unlike ISD::SRA, in case shift count greater then element size
-      // use sign bit to fill destination data element.
-      VSRAV,
+      // Vector variable shift
+      VSHLV, VSRLV, VSRAV,
 
       // Vector shift elements by immediate
       VSHLI, VSRLI, VSRAI,
@@ -343,8 +331,8 @@ namespace llvm {
       /// Vector comparison generating mask bits for fp and
       /// integer signed and unsigned data types.
       CMPM,
-      // Vector comparison with rounding mode for FP values
-      CMPM_RND,
+      // Vector comparison with SAE for FP values
+      CMPM_SAE,
 
       // Arithmetic operations with FLAGS results.
       ADD, SUB, ADC, SBB, SMUL, UMUL,
@@ -419,16 +407,16 @@ namespace llvm {
       // Bitwise ternary logic.
       VPTERNLOG,
       // Fix Up Special Packed Float32/64 values.
-      VFIXUPIMM,
-      VFIXUPIMMS,
+      VFIXUPIMM, VFIXUPIMM_SAE,
+      VFIXUPIMMS, VFIXUPIMMS_SAE,
       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
-      VRANGE, VRANGE_RND, VRANGES, VRANGES_RND,
+      VRANGE, VRANGE_SAE, VRANGES, VRANGES_SAE,
       // Reduce - Perform Reduction Transformation on scalar\packed FP.
-      VREDUCE, VREDUCE_RND, VREDUCES, VREDUCES_RND,
+      VREDUCE, VREDUCE_SAE, VREDUCES, VREDUCES_SAE,
       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
       // Also used by the legacy (V)ROUND intrinsics where we mask out the
       // scaling part of the immediate.
-      VRNDSCALE, VRNDSCALE_RND, VRNDSCALES, VRNDSCALES_RND,
+      VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE,
       // Tests Types Of a FP Values for packed types.
       VFPCLASS,
       // Tests Types Of a FP Values for scalar types.
@@ -499,6 +487,7 @@ namespace llvm {
 
       // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
       SINT_TO_FP_RND, UINT_TO_FP_RND,
+      SCALAR_SINT_TO_FP, SCALAR_UINT_TO_FP,
       SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
 
       // Vector float/double to signed/unsigned integer.
@@ -507,9 +496,9 @@ namespace llvm {
       CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
 
       // Vector float/double to signed/unsigned integer with truncation.
-      CVTTP2SI, CVTTP2UI, CVTTP2SI_RND, CVTTP2UI_RND,
+      CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE,
       // Scalar float/double to signed/unsigned integer with truncation.
-      CVTTS2SI, CVTTS2UI, CVTTS2SI_RND, CVTTS2UI_RND,
+      CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE,
 
       // Vector signed/unsigned integer to float/double.
       CVTSI2P, CVTUI2P,
@@ -517,6 +506,20 @@ namespace llvm {
       // Masked versions of above. Used for v2f64->v4f32.
       // SRC, PASSTHRU, MASK
       MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
+      MCVTSI2P, MCVTUI2P,
+
+      // Vector float to bfloat16.
+      // Convert TWO packed single data to one packed BF16 data
+      CVTNE2PS2BF16, 
+      // Convert packed single data to packed BF16 data
+      CVTNEPS2BF16,
+      // Masked version of above.
+      // SRC, PASSTHRU, MASK
+      MCVTNEPS2BF16,
+
+      // Dot product of BF16 pairs to accumulated into
+      // packed single precision.
+      DPBF16PS,
 
       // Save xmm argument registers to the stack, according to %al. An operator
       // is needed so that this can be expanded with control flow.
@@ -547,6 +550,12 @@ namespace llvm {
       // indicate whether it is valid in CF.
       RDSEED,
 
+      // Protection keys
+      // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
+      // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
+      // value for ECX.
+      RDPKRU, WRPKRU,
+
       // SSE42 string comparisons.
       // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
       // will emit one or two instructions based on which results are used. If
@@ -560,10 +569,11 @@ namespace llvm {
       XTEST,
 
       // ERI instructions.
-      RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2,
+      RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE,
+      RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE,
 
       // Conversions between float and half-float.
-      CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
+      CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE,
 
       // Masked version of above.
       // SRC, RND, PASSTHRU, MASK
@@ -578,6 +588,12 @@ namespace llvm {
       // User level wait
       UMWAIT, TPAUSE,
 
+      // Enqueue Stores Instructions
+      ENQCMD, ENQCMDS,
+
+      // For avx512-vp2intersect
+      VP2INTERSECT,
+
       // Compare and swap.
       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
       LCMPXCHG8_DAG,
@@ -592,6 +608,9 @@ namespace llvm {
       // Load, scalar_to_vector, and zero extend.
       VZEXT_LOAD,
 
+      // extract_vector_elt, store.
+      VEXTRACT_STORE,
+
       // Store FP control world into i16 memory.
       FNSTCW16m,
 
@@ -599,29 +618,33 @@ namespace llvm {
       /// integer destination in memory and a FP reg source.  This corresponds
       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
       /// has two inputs (token chain and address) and two outputs (int value
-      /// and token chain).
-      FP_TO_INT16_IN_MEM,
-      FP_TO_INT32_IN_MEM,
-      FP_TO_INT64_IN_MEM,
+      /// and token chain). Memory VT specifies the type to store to.
+      FP_TO_INT_IN_MEM,
 
       /// This instruction implements SINT_TO_FP with the
       /// integer source in memory and FP reg result.  This corresponds to the
-      /// X86::FILD*m instructions. It has three inputs (token chain, address,
-      /// and source type) and two outputs (FP value and token chain). FILD_FLAG
-      /// also produces a flag).
+      /// X86::FILD*m instructions. It has two inputs (token chain and address)
+      /// and two outputs (FP value and token chain). FILD_FLAG also produces a
+      /// flag). The integer source type is specified by the memory VT.
       FILD,
       FILD_FLAG,
 
+      /// This instruction implements a fp->int store from FP stack
+      /// slots. This corresponds to the fist instruction. It takes a
+      /// chain operand, value to store, address, and glue. The memory VT
+      /// specifies the type to store as.
+      FIST,
+
       /// This instruction implements an extending load to FP stack slots.
       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
-      /// operand, ptr to load from, and a ValueType node indicating the type
-      /// to load to.
+      /// operand, and ptr to load from. The memory VT specifies the type to
+      /// load from.
       FLD,
 
-      /// This instruction implements a truncating store to FP stack
+      /// This instruction implements a truncating store from FP stack
       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
-      /// chain operand, value to store, address, and a ValueType to store it
-      /// as.
+      /// chain operand, value to store, address, and glue. The memory VT
+      /// specifies the type to store as.
       FST,
 
       /// This instruction grabs the address of the next argument
@@ -708,7 +731,7 @@ namespace llvm {
     /// target-independent logic.
     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
                             bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
-                            MachineFunction &MF) const override;
+                            const AttributeList &FuncAttributes) const override;
 
     /// Returns true if it's safe to use load / store of the
     /// specified type to expand memcpy / memset inline. This is mostly true
@@ -721,7 +744,8 @@ namespace llvm {
     /// Returns true if the target allows unaligned memory accesses of the
     /// specified type. Returns whether it is "fast" in the last argument.
     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
-                                       bool *Fast) const override;
+                                        MachineMemOperand::Flags Flags,
+                                        bool *Fast) const override;
 
     /// Provide custom lowering hooks for some operations.
     ///
@@ -775,7 +799,11 @@ namespace llvm {
     /// This method returns the name of a target specific DAG node.
     const char *getTargetNodeName(unsigned Opcode) const override;
 
-    bool mergeStoresAfterLegalization() const override { return true; }
+    /// Do not merge vector stores after legalization because that may conflict
+    /// with x86-specific store splitting optimizations.
+    bool mergeStoresAfterLegalization(EVT MemVT) const override {
+      return !MemVT.isVector();
+    }
 
     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
                           const SelectionDAG &DAG) const override;
@@ -812,7 +840,10 @@ namespace llvm {
 
     bool hasAndNot(SDValue Y) const override;
 
-    bool preferShiftsToClearExtremeBits(SDValue Y) const override;
+    bool shouldFoldConstantShiftPairToMask(const SDNode *N,
+                                           CombineLevel Level) const override;
+
+    bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
 
     bool
     shouldTransformSignedTruncationCheck(EVT XVT,
@@ -832,6 +863,12 @@ namespace llvm {
       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
     }
 
+    bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
+      if (DAG.getMachineFunction().getFunction().hasMinSize())
+        return false;
+      return true;
+    }
+
     bool shouldSplatInsEltVarIndex(EVT VT) const override;
 
     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
@@ -841,11 +878,6 @@ namespace llvm {
     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
     MVT hasFastEqualityCompare(unsigned NumBits) const override;
 
-    /// Allow multiple load pairs per block for smaller and faster code.
-    unsigned getMemcmpEqZeroLoadsPerBlock() const override {
-      return 2;
-    }
-
     /// Return the value type to use for ISD::SETCC.
     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
                            EVT VT) const override;
@@ -881,6 +913,8 @@ namespace llvm {
                                            TargetLoweringOpt &TLO,
                                            unsigned Depth) const override;
 
+    const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
+
     SDValue unwrapAddress(SDValue N) const override;
 
     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
@@ -918,6 +952,11 @@ namespace llvm {
       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
     }
 
+    /// Handle Lowering flag assembly outputs.
+    SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL,
+                                        const AsmOperandInfo &Constraint,
+                                        SelectionDAG &DAG) const override;
+
     /// Given a physical register constraint
     /// (e.g. {edx}), return the register number and the register class for the
     /// register.  This should only be used for C_Register constraints.  On
@@ -956,6 +995,12 @@ namespace llvm {
 
     bool isVectorShiftByScalarCheap(Type *Ty) const override;
 
+    /// Add x86-specific opcodes to the default list.
+    bool isBinOp(unsigned Opcode) const override;
+
+    /// Returns true if the opcode is a commutative binary operation.
+    bool isCommutativeBinOp(unsigned Opcode) const override;
+
     /// Return true if it's free to truncate a value of
     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
     /// register EAX to i16 by referencing its sub-register AX.
@@ -1001,7 +1046,8 @@ namespace llvm {
     /// Returns true if the target can instruction select the
     /// specified FP immediate natively. If false, the legalizer will
     /// materialize the FP immediate as a load from a constant pool.
-    bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+    bool isFPImmLegal(const APFloat &Imm, EVT VT,
+                      bool ForCodeSize) const override;
 
     /// Targets can use this to indicate that they only support *some*
     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
@@ -1063,6 +1109,17 @@ namespace llvm {
     /// supported.
     bool shouldScalarizeBinop(SDValue) const override;
 
+    /// Extract of a scalar FP value from index 0 of a vector is free.
+    bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
+      EVT EltVT = VT.getScalarType();
+      return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
+    }
+
+    /// Overflow nodes should get combined/lowered to optimal instructions
+    /// (they should allow eliminating explicit compares by getting flags from
+    /// math ops).
+    bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override;
+
     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
                                       unsigned AddrSpace) const override {
       // If we can replace more than 2 scalar stores, there will be a reduction
@@ -1070,7 +1127,9 @@ namespace llvm {
       return NumElem > 2;
     }
 
-    bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override;
+    bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
+                                 const SelectionDAG &DAG,
+                                 const MachineMemOperand &MMO) const override;
 
     /// Intel processors have a unified instruction and data cache
     const char * getClearCacheBuiltinName() const override {
@@ -1105,7 +1164,7 @@ namespace llvm {
     bool useStackGuardXorFP() const override;
     void insertSSPDeclarations(Module &M) const override;
     Value *getSDagStackGuard(const Module &M) const override;
-    Value *getSSPStackGuardCheck(const Module &M) const override;
+    Function *getSSPStackGuardCheck(const Module &M) const override;
     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
                                 const SDLoc &DL) const override;
 
@@ -1221,9 +1280,7 @@ namespace llvm {
 
     unsigned getAddressSpace(void) const;
 
-    std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
-                                               bool isSigned,
-                                               bool isReplace) const;
+    SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned) const;
 
     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -1234,12 +1291,15 @@ namespace llvm {
                                   const unsigned char OpFlags = 0) const;
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerGlobalAddress(const GlobalValue *GV, const SDLoc &dl,
-                               int64_t Offset, SelectionDAG &DAG) const;
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
 
+    /// Creates target global address or external symbol nodes for calls or
+    /// other uses.
+    SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
+                                  bool ForCall) const;
+
     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
@@ -1568,10 +1628,10 @@ namespace llvm {
   void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
                         SmallVectorImpl<T> &ScaledMask) {
     assert(0 < Scale && "Unexpected scaling factor");
-    int NumElts = Mask.size();
-    ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1);
+    size_t NumElts = Mask.size();
+    ScaledMask.assign(NumElts * Scale, -1);
 
-    for (int i = 0; i != NumElts; ++i) {
+    for (int i = 0; i != (int)NumElts; ++i) {
       int M = Mask[i];
 
       // Repeat sentinel values in every mask element.
diff --git a/lib/Target/X86/X86IndirectBranchTracking.cpp b/lib/Target/X86/X86IndirectBranchTracking.cpp
index 7c00c9260d15..04e8b2231fec 100644
--- a/lib/Target/X86/X86IndirectBranchTracking.cpp
+++ b/lib/Target/X86/X86IndirectBranchTracking.cpp
@@ -1,9 +1,8 @@
 //===---- X86IndirectBranchTracking.cpp - Enables CET IBT mechanism -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -58,7 +57,7 @@ private:
   /// The function will not add it if already exists.
   /// It will add ENDBR32 or ENDBR64 opcode, depending on the target.
   /// \returns true if the ENDBR was added and false otherwise.
-  bool addENDBR(MachineBasicBlock &MBB) const;
+  bool addENDBR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const;
 };
 
 } // end anonymous namespace
@@ -69,20 +68,31 @@ FunctionPass *llvm::createX86IndirectBranchTrackingPass() {
   return new X86IndirectBranchTrackingPass();
 }
 
-bool X86IndirectBranchTrackingPass::addENDBR(MachineBasicBlock &MBB) const {
+bool X86IndirectBranchTrackingPass::addENDBR(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
   assert(TII && "Target instruction info was not initialized");
   assert((X86::ENDBR64 == EndbrOpcode || X86::ENDBR32 == EndbrOpcode) &&
          "Unexpected Endbr opcode");
 
-  auto MI = MBB.begin();
-  // If the MBB is empty or the first instruction is not ENDBR,
-  // add the ENDBR instruction to the beginning of the MBB.
-  if (MI == MBB.end() || EndbrOpcode != MI->getOpcode()) {
-    BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(EndbrOpcode));
-    NumEndBranchAdded++;
+  // If the MBB/I is empty or the current instruction is not ENDBR,
+  // insert ENDBR instruction to the location of I.
+  if (I == MBB.end() || I->getOpcode() != EndbrOpcode) {
+    BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(EndbrOpcode));
+    ++NumEndBranchAdded;
     return true;
   }
+  return false;
+}
 
+bool IsCallReturnTwice(llvm::MachineOperand &MOp) {
+  if (!MOp.isGlobal())
+    return false;
+  auto *CalleeFn = dyn_cast<Function>(MOp.getGlobal());
+  if (!CalleeFn)
+    return false;
+  AttributeList Attrs = CalleeFn->getAttributes();
+  if (Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::ReturnsTwice))
+    return true;
   return false;
 }
 
@@ -108,14 +118,21 @@ bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) {
        !MF.getFunction().hasLocalLinkage()) &&
       !MF.getFunction().doesNoCfCheck()) {
     auto MBB = MF.begin();
-    Changed |= addENDBR(*MBB);
+    Changed |= addENDBR(*MBB, MBB->begin());
   }
 
-  for (auto &MBB : MF)
+  for (auto &MBB : MF) {
     // Find all basic blocks that their address was taken (for example
     // in the case of indirect jump) and add ENDBR instruction.
     if (MBB.hasAddressTaken())
-      Changed |= addENDBR(MBB);
-
+      Changed |= addENDBR(MBB, MBB.begin());
+
+    for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
+      if (!I->isCall())
+        continue;
+      if (IsCallReturnTwice(I->getOperand(0)))
+        Changed |= addENDBR(MBB, std::next(I));
+    }
+  }
   return Changed;
 }
diff --git a/lib/Target/X86/X86InsertPrefetch.cpp b/lib/Target/X86/X86InsertPrefetch.cpp
index 30b46a09ef0f..02ae73706a34 100644
--- a/lib/Target/X86/X86InsertPrefetch.cpp
+++ b/lib/Target/X86/X86InsertPrefetch.cpp
@@ -1,9 +1,8 @@
 //===------- X86InsertPrefetch.cpp - Insert cache prefetch hints ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -34,7 +33,8 @@ using namespace sampleprof;
 
 static cl::opt<std::string>
     PrefetchHintsFile("prefetch-hints-file",
-                      cl::desc("Path to the prefetch hints profile."),
+                      cl::desc("Path to the prefetch hints profile. See also "
+                               "-x86-discriminate-memops"),
                       cl::Hidden);
 namespace {
 
diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td
index 49e9e924887a..cd1b06365971 100644
--- a/lib/Target/X86/X86Instr3DNow.td
+++ b/lib/Target/X86/X86Instr3DNow.td
@@ -1,9 +1,8 @@
 //===-- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -74,7 +73,9 @@ defm PFSUBR   : I3DNow_binop_rm_int<0xAA, "pfsubr", WriteFAdd, 1>;
 defm PI2FD    : I3DNow_conv_rm_int<0x0D, "pi2fd", WriteCvtI2PS>;
 defm PMULHRW  : I3DNow_binop_rm_int<0xB7, "pmulhrw", SchedWriteVecIMul.MMX, 1>;
 
-let SchedRW = [WriteEMMS] in
+let SchedRW = [WriteEMMS],
+    Defs = [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+            ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7] in
 def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
                    [(int_x86_mmx_femms)]>, TB;
 
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 7423cb85acd2..54eddeacaa17 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -1,9 +1,8 @@
 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -27,6 +26,10 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
   // Corresponding mask register class.
   RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
 
+  // Corresponding mask register pair class.
+  RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
+                              !cast<RegisterOperand>("VK" # NumElts # "Pair"));
+
   // Corresponding write-mask register class.
   RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
 
@@ -95,10 +98,7 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
 
   RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
 
-  // A vector type of the same width with element type i32.  This is used to
-  // create the canonical constant zero node ImmAllZerosV.
-  ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
-  dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
+  dag ImmAllZerosV = (VT immAllZerosV);
 
   string ZSuffix = !if (!eq (Size, 128), "Z128",
                    !if (!eq (Size, 256), "Z256", "Z"));
@@ -277,10 +277,9 @@ multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
                            dag Outs, dag Ins, string OpcodeStr,
                            string AttSrcAsm, string IntelSrcAsm,
-                           dag RHS,
-                           bit IsCommutable = 0> :
+                           dag RHS> :
    AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
-                   RHS, IsCommutable, 0, IsCommutable, X86selects>;
+                   RHS, 0, 0, 0, X86selects>;
 
 // Similar to AVX512_maskable but in this case one of the source operands
 // ($src1) is already tied to $dst so we just use that for the preserved
@@ -365,7 +364,7 @@ multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
                                   list<dag> Pattern,
                                   list<dag> MaskingPattern,
                                   bit IsCommutable = 0> {
-    let isCommutable = IsCommutable in
+    let isCommutable = IsCommutable in {
     def NAME: AVX512<O, F, Outs, Ins,
                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
                                      "$dst, "#IntelSrcAsm#"}",
@@ -375,6 +374,7 @@ multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
                        MaskingPattern>, EVEX_K;
+    }
 }
 
 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
@@ -392,38 +392,11 @@ multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
                            dag Outs, dag Ins, string OpcodeStr,
                            string AttSrcAsm, string IntelSrcAsm,
-                           dag RHS, bit IsCommutable = 0> :
+                           dag RHS, dag RHS_su, bit IsCommutable = 0> :
    AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
                           !con((ins _.KRCWM:$mask), Ins),
                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
-                          (and _.KRCWM:$mask, RHS), IsCommutable>;
-
-multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
-                           dag Outs, dag Ins, string OpcodeStr,
-                           string AttSrcAsm, string IntelSrcAsm> :
-   AVX512_maskable_custom_cmp<O, F, Outs,
-                             Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
-                             AttSrcAsm, IntelSrcAsm, [], []>;
-
-// This multiclass generates the unconditional/non-masking, the masking and
-// the zero-masking variant of the vector instruction.  In the masking case, the
-// perserved vector elements come from a new dummy input operand tied to $dst.
-multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
-                           dag Outs, dag Ins, string OpcodeStr,
-                           string AttSrcAsm, string IntelSrcAsm,
-                           dag RHS, dag MaskedRHS,
-                           bit IsCommutable = 0, SDNode Select = vselect> :
-   AVX512_maskable_custom<O, F, Outs, Ins,
-                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
-                          !con((ins _.KRCWM:$mask), Ins),
-                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
-                          [(set _.RC:$dst, RHS)],
-                          [(set _.RC:$dst,
-                                (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
-                          [(set _.RC:$dst,
-                                (Select _.KRCWM:$mask, MaskedRHS,
-                                        _.ImmAllZerosV))],
-                          "$src0 = $dst", IsCommutable>;
+                          (and _.KRCWM:$mask, RHS_su), IsCommutable>;
 
 
 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
@@ -451,8 +424,8 @@ def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
                                 (ins VK8WM:$mask), "",
                 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
-                                           (bc_v8i64 (v16i32 immAllOnesV)),
-                                           (bc_v8i64 (v16i32 immAllZerosV))))]>;
+                                           (v8i64 immAllOnesV),
+                                           (v8i64 immAllZerosV)))]>;
 }
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
@@ -753,6 +726,7 @@ defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
 
 // vinsertps - insert f32 to XMM
 let ExeDomain = SSEPackedSingle in {
+let isCommutable = 1 in
 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
       (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -1378,15 +1352,15 @@ multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
 
 let Predicates = [HasAVX512] in {
   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
-  def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
+  def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
             (VPBROADCASTQZm addr:$src)>;
 }
 
 let Predicates = [HasVLX] in {
   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
-  def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+  def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
             (VPBROADCASTQZ128m addr:$src)>;
-  def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
+  def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
             (VPBROADCASTQZ256m addr:$src)>;
 }
 let Predicates = [HasVLX, HasBWI] in {
@@ -1396,13 +1370,31 @@ let Predicates = [HasVLX, HasBWI] in {
             (VPBROADCASTWZ128m addr:$src)>;
   def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
             (VPBROADCASTWZ256m addr:$src)>;
+  def : Pat<(v8i16 (X86VBroadcast
+              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
+            (VPBROADCASTWZ128m addr:$src)>;
   def : Pat<(v8i16 (X86VBroadcast
               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
             (VPBROADCASTWZ128m addr:$src)>;
+  def : Pat<(v16i16 (X86VBroadcast
+              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
+            (VPBROADCASTWZ256m addr:$src)>;
   def : Pat<(v16i16 (X86VBroadcast
               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
             (VPBROADCASTWZ256m addr:$src)>;
 }
+let Predicates = [HasBWI] in {
+  // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
+  // This means we'll encounter truncated i32 loads; match that here.
+  def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
+            (VPBROADCASTWZm addr:$src)>;
+  def : Pat<(v32i16 (X86VBroadcast
+              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
+            (VPBROADCASTWZm addr:$src)>;
+  def : Pat<(v32i16 (X86VBroadcast
+              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
+            (VPBROADCASTWZm addr:$src)>;
+}
 
 //===----------------------------------------------------------------------===//
 // AVX-512 BROADCAST SUBVECTORS
@@ -1464,7 +1456,7 @@ def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
 // Patterns for selects of bitcasted operations.
 def : Pat<(vselect VK16WM:$mask,
                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
-                   (bc_v16f32 (v16i32 immAllZerosV))),
+                   (v16f32 immAllZerosV)),
           (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
 def : Pat<(vselect VK16WM:$mask,
                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
@@ -1481,7 +1473,7 @@ def : Pat<(vselect VK16WM:$mask,
 
 def : Pat<(vselect VK8WM:$mask,
                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
-                   (bc_v8f64 (v16i32 immAllZerosV))),
+                   (v8f64 immAllZerosV)),
           (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
 def : Pat<(vselect VK8WM:$mask,
                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
@@ -1489,7 +1481,7 @@ def : Pat<(vselect VK8WM:$mask,
           (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
 def : Pat<(vselect VK8WM:$mask,
                    (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
-                   (bc_v8i64 (v16i32 immAllZerosV))),
+                   (v8i64 immAllZerosV)),
           (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
 def : Pat<(vselect VK8WM:$mask,
                    (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
@@ -1517,7 +1509,7 @@ def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
 // Patterns for selects of bitcasted operations.
 def : Pat<(vselect VK8WM:$mask,
                    (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
-                   (bc_v8f32 (v8i32 immAllZerosV))),
+                   (v8f32 immAllZerosV)),
           (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
 def : Pat<(vselect VK8WM:$mask,
                    (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
@@ -1566,7 +1558,7 @@ defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2"
 // Patterns for selects of bitcasted operations.
 def : Pat<(vselect VK4WM:$mask,
                    (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
-                   (bc_v4f64 (v8i32 immAllZerosV))),
+                   (v4f64 immAllZerosV)),
           (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
 def : Pat<(vselect VK4WM:$mask,
                    (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
@@ -1574,7 +1566,7 @@ def : Pat<(vselect VK4WM:$mask,
           (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
 def : Pat<(vselect VK4WM:$mask,
                    (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
-                   (bc_v4i64 (v8i32 immAllZerosV))),
+                   (v4i64 immAllZerosV)),
           (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
 def : Pat<(vselect VK4WM:$mask,
                    (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
@@ -1599,7 +1591,7 @@ defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
 // Patterns for selects of bitcasted operations.
 def : Pat<(vselect VK16WM:$mask,
                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
-                   (bc_v16f32 (v16i32 immAllZerosV))),
+                   (v16f32 immAllZerosV)),
           (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
 def : Pat<(vselect VK16WM:$mask,
                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
@@ -1616,7 +1608,7 @@ def : Pat<(vselect VK16WM:$mask,
 
 def : Pat<(vselect VK8WM:$mask,
                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
-                   (bc_v8f64 (v16i32 immAllZerosV))),
+                   (v8f64 immAllZerosV)),
           (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
 def : Pat<(vselect VK8WM:$mask,
                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
@@ -1624,7 +1616,7 @@ def : Pat<(vselect VK8WM:$mask,
           (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
 def : Pat<(vselect VK8WM:$mask,
                    (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
-                   (bc_v8i64 (v16i32 immAllZerosV))),
+                   (v8i64 immAllZerosV)),
           (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
 def : Pat<(vselect VK8WM:$mask,
                    (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
@@ -2031,96 +2023,86 @@ defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
 
 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
 
-multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
+multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
+                             PatFrag OpNode_su, PatFrag OpNodeSAE_su,
                              X86FoldableSchedWrite sched> {
   defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
                       (outs _.KRC:$dst),
-                      (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
-                      "vcmp${cc}"#_.Suffix,
-                      "$src2, $src1", "$src1, $src2",
-                      (OpNode (_.VT _.RC:$src1),
-                              (_.VT _.RC:$src2),
-                              imm:$cc)>, EVEX_4V, Sched<[sched]>;
+                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+                      "vcmp"#_.Suffix,
+                      "$cc, $src2, $src1", "$src1, $src2, $cc",
+                      (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
+                      (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+                                 imm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
   let mayLoad = 1 in
   defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
                     (outs _.KRC:$dst),
-                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
-                    "vcmp${cc}"#_.Suffix,
-                    "$src2, $src1", "$src1, $src2",
+                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
+                    "vcmp"#_.Suffix,
+                    "$cc, $src2, $src1", "$src1, $src2, $cc",
                     (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
-                        imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
+                        imm:$cc),
+                    (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
+                        imm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
                     Sched<[sched.Folded, sched.ReadAfterFold]>;
 
   defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
                      (outs _.KRC:$dst),
-                     (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
-                     "vcmp${cc}"#_.Suffix,
-                     "{sae}, $src2, $src1", "$src1, $src2, {sae}",
-                     (OpNodeRnd (_.VT _.RC:$src1),
-                                (_.VT _.RC:$src2),
-                                imm:$cc,
-                                (i32 FROUND_NO_EXC))>,
-                     EVEX_4V, EVEX_B, Sched<[sched]>;
-  // Accept explicit immediate argument form instead of comparison code.
-  let isAsmParserOnly = 1, hasSideEffects = 0 in {
-    defm  rri_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
-                        (outs VK1:$dst),
-                        (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
-                        "vcmp"#_.Suffix,
-                        "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V,
-                        Sched<[sched]>, NotMemoryFoldable;
-  let mayLoad = 1 in
-    defm  rmi_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
-                        (outs _.KRC:$dst),
-                        (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
-                        "vcmp"#_.Suffix,
-                        "$cc, $src2, $src1", "$src1, $src2, $cc">,
-                        EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
-                        Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
-
-    defm  rrb_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
-                       (outs _.KRC:$dst),
-                       (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
-                       "vcmp"#_.Suffix,
-                       "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">,
-                       EVEX_4V, EVEX_B, Sched<[sched]>, NotMemoryFoldable;
-  }// let isAsmParserOnly = 1, hasSideEffects = 0
+                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+                     "vcmp"#_.Suffix,
+                     "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
+                     (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+                                imm:$cc),
+                     (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+                                   imm:$cc)>,
+                     EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
 
   let isCodeGenOnly = 1 in {
     let isCommutable = 1 in
     def rr : AVX512Ii8<0xC2, MRMSrcReg,
-                (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
-                !strconcat("vcmp${cc}", _.Suffix,
-                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
+                !strconcat("vcmp", _.Suffix,
+                           "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
                 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
                                           _.FRC:$src2,
                                           imm:$cc))]>,
-                EVEX_4V, Sched<[sched]>;
+                EVEX_4V, VEX_LIG, Sched<[sched]>;
     def rm : AVX512Ii8<0xC2, MRMSrcMem,
               (outs _.KRC:$dst),
-              (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
-              !strconcat("vcmp${cc}", _.Suffix,
-                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+              (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
+              !strconcat("vcmp", _.Suffix,
+                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
               [(set _.KRC:$dst, (OpNode _.FRC:$src1,
                                         (_.ScalarLdFrag addr:$src2),
                                         imm:$cc))]>,
-              EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
+              EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
               Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
+def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+                          (X86cmpms node:$src1, node:$src2, node:$cc), [{
+  return N->hasOneUse();
+}]>;
+def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+                          (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
+  return N->hasOneUse();
+}]>;
+
 let Predicates = [HasAVX512] in {
   let ExeDomain = SSEPackedSingle in
-  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
+  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
+                                   X86cmpms_su, X86cmpmsSAE_su,
                                    SchedWriteFCmp.Scl>, AVX512XSIi8Base;
   let ExeDomain = SSEPackedDouble in
-  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
+  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
+                                   X86cmpms_su, X86cmpmsSAE_su,
                                    SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
 }
 
 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
-                              X86FoldableSchedWrite sched, X86VectorVTInfo _,
-                              bit IsCommutable> {
+                              PatFrag OpNode_su, X86FoldableSchedWrite sched,
+                              X86VectorVTInfo _, bit IsCommutable> {
   let isCommutable = IsCommutable in
   def rr : AVX512BI<opc, MRMSrcReg,
              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
@@ -2139,22 +2121,23 @@ multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
                           "$dst {${mask}}, $src1, $src2}"),
               [(set _.KRC:$dst, (and _.KRCWM:$mask,
-                                   (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
+                                   (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
               EVEX_4V, EVEX_K, Sched<[sched]>;
   def rmk : AVX512BI<opc, MRMSrcMem,
               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
                           "$dst {${mask}}, $src1, $src2}"),
               [(set _.KRC:$dst, (and _.KRCWM:$mask,
-                                   (OpNode (_.VT _.RC:$src1),
+                                   (OpNode_su (_.VT _.RC:$src1),
                                        (_.VT (_.LdFrag addr:$src2)))))]>,
               EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
+                                  PatFrag OpNode_su,
                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
                                   bit IsCommutable> :
-           avx512_icmp_packed<opc, OpcodeStr, OpNode, sched, _, IsCommutable> {
+           avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched, _, IsCommutable> {
   def rmb : AVX512BI<opc, MRMSrcMem,
               (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
               !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
@@ -2169,7 +2152,7 @@ multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
                           "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
                           "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
                [(set _.KRC:$dst, (and _.KRCWM:$mask,
-                                      (OpNode (_.VT _.RC:$src1),
+                                      (OpNode_su (_.VT _.RC:$src1),
                                         (X86VBroadcast
                                           (_.ScalarLdFrag addr:$src2)))))]>,
                EVEX_4V, EVEX_K, EVEX_B,
@@ -2177,33 +2160,34 @@ multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
 }
 
 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
-                                 X86SchedWriteWidths sched,
+                                 PatFrag OpNode_su, X86SchedWriteWidths sched,
                                  AVX512VLVectorVTInfo VTInfo, Predicate prd,
                                  bit IsCommutable = 0> {
   let Predicates = [prd] in
-  defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.ZMM,
+  defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM,
                               VTInfo.info512, IsCommutable>, EVEX_V512;
 
   let Predicates = [prd, HasVLX] in {
-    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.YMM,
+    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM,
                                    VTInfo.info256, IsCommutable>, EVEX_V256;
-    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.XMM,
+    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM,
                                    VTInfo.info128, IsCommutable>, EVEX_V128;
   }
 }
 
 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
-                                     PatFrag OpNode, X86SchedWriteWidths sched,
+                                     PatFrag OpNode, PatFrag OpNode_su,
+                                     X86SchedWriteWidths sched,
                                      AVX512VLVectorVTInfo VTInfo,
                                      Predicate prd, bit IsCommutable = 0> {
   let Predicates = [prd] in
-  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.ZMM,
+  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM,
                                   VTInfo.info512, IsCommutable>, EVEX_V512;
 
   let Predicates = [prd, HasVLX] in {
-    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.YMM,
+    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM,
                                        VTInfo.info256, IsCommutable>, EVEX_V256;
-    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.XMM,
+    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM,
                                        VTInfo.info128, IsCommutable>, EVEX_V128;
   }
 }
@@ -2216,59 +2200,69 @@ def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
                          (setcc node:$src1, node:$src2, SETGT)>;
 
+def X86pcmpeqm_c_su : PatFrag<(ops node:$src1, node:$src2),
+                              (X86pcmpeqm_c node:$src1, node:$src2), [{
+  return N->hasOneUse();
+}]>;
+def X86pcmpgtm_su : PatFrag<(ops node:$src1, node:$src2),
+                            (X86pcmpgtm node:$src1, node:$src2), [{
+  return N->hasOneUse();
+}]>;
+
 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
 // increase the pattern complexity the way an immediate would.
 let AddedComplexity = 2 in {
 // FIXME: Is there a better scheduler class for VPCMP?
-defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c,
+defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c, X86pcmpeqm_c_su,
                       SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
                 EVEX_CD8<8, CD8VF>, VEX_WIG;
 
-defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c,
+defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c, X86pcmpeqm_c_su,
                       SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
                 EVEX_CD8<16, CD8VF>, VEX_WIG;
 
-defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c,
+defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c, X86pcmpeqm_c_su,
                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
                 EVEX_CD8<32, CD8VF>;
 
-defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c,
+defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c, X86pcmpeqm_c_su,
                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
 
-defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
+defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, X86pcmpgtm_su,
                       SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
                 EVEX_CD8<8, CD8VF>, VEX_WIG;
 
-defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
+defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm, X86pcmpgtm_su,
                       SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
                 EVEX_CD8<16, CD8VF>, VEX_WIG;
 
-defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
+defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm, X86pcmpgtm_su,
                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
                 EVEX_CD8<32, CD8VF>;
 
-defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
+defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, X86pcmpgtm_su,
                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
 }
 
 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
-                          PatFrag CommFrag, X86FoldableSchedWrite sched,
+                          PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su,
+                          X86FoldableSchedWrite sched,
                           X86VectorVTInfo _, string Name> {
   let isCommutable = 1 in
   def rri : AVX512AIi8<opc, MRMSrcReg,
-             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
-             !strconcat("vpcmp${cc}", Suffix,
-                        "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+             !strconcat("vpcmp", Suffix,
+                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
              [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
                                                 (_.VT _.RC:$src2),
                                                 cond)))]>,
              EVEX_4V, Sched<[sched]>;
   def rmi : AVX512AIi8<opc, MRMSrcMem,
-             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
-             !strconcat("vpcmp${cc}", Suffix,
-                        "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
+             !strconcat("vpcmp", Suffix,
+                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
              [(set _.KRC:$dst, (_.KVT
                                 (Frag:$cc
                                  (_.VT _.RC:$src1),
@@ -2278,67 +2272,36 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
   let isCommutable = 1 in
   def rrik : AVX512AIi8<opc, MRMSrcReg,
               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
-                                      AVX512ICC:$cc),
-              !strconcat("vpcmp${cc}", Suffix,
-                         "\t{$src2, $src1, $dst {${mask}}|",
-                         "$dst {${mask}}, $src1, $src2}"),
+                                      u8imm:$cc),
+              !strconcat("vpcmp", Suffix,
+                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
+                         "$dst {${mask}}, $src1, $src2, $cc}"),
               [(set _.KRC:$dst, (and _.KRCWM:$mask,
-                                     (_.KVT (Frag:$cc (_.VT _.RC:$src1),
-                                                      (_.VT _.RC:$src2),
-                                                      cond))))]>,
+                                     (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
+                                                         (_.VT _.RC:$src2),
+                                                         cond))))]>,
               EVEX_4V, EVEX_K, Sched<[sched]>;
   def rmik : AVX512AIi8<opc, MRMSrcMem,
               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
-                                    AVX512ICC:$cc),
-              !strconcat("vpcmp${cc}", Suffix,
-                         "\t{$src2, $src1, $dst {${mask}}|",
-                         "$dst {${mask}}, $src1, $src2}"),
+                                    u8imm:$cc),
+              !strconcat("vpcmp", Suffix,
+                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
+                         "$dst {${mask}}, $src1, $src2, $cc}"),
               [(set _.KRC:$dst, (and _.KRCWM:$mask,
                                      (_.KVT
-                                      (Frag:$cc
+                                      (Frag_su:$cc
                                        (_.VT _.RC:$src1),
                                        (_.VT (_.LdFrag addr:$src2)),
                                        cond))))]>,
               EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
 
-  // Accept explicit immediate argument form instead of comparison code.
-  let isAsmParserOnly = 1, hasSideEffects = 0 in {
-    def rri_alt : AVX512AIi8<opc, MRMSrcReg,
-               (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
-               !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
-                          "$dst, $src1, $src2, $cc}"), []>,
-               EVEX_4V, Sched<[sched]>, NotMemoryFoldable;
-    let mayLoad = 1 in
-    def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
-               (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
-               !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
-                          "$dst, $src1, $src2, $cc}"), []>,
-               EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
-    def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
-               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
-                                       u8imm:$cc),
-               !strconcat("vpcmp", Suffix,
-                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
-                          "$dst {${mask}}, $src1, $src2, $cc}"), []>,
-               EVEX_4V, EVEX_K, Sched<[sched]>, NotMemoryFoldable;
-    let mayLoad = 1 in
-    def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
-               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
-                                       u8imm:$cc),
-               !strconcat("vpcmp", Suffix,
-                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
-                          "$dst {${mask}}, $src1, $src2, $cc}"), []>,
-               EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>,
-               NotMemoryFoldable;
-  }
-
   def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
                                  (_.VT _.RC:$src1), cond)),
             (!cast<Instruction>(Name#_.ZSuffix#"rmi")
              _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
 
   def : Pat<(and _.KRCWM:$mask,
-                 (_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
+                 (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2),
                                       (_.VT _.RC:$src1), cond))),
             (!cast<Instruction>(Name#_.ZSuffix#"rmik")
              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
@@ -2346,15 +2309,17 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
 }
 
 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
-                              PatFrag CommFrag, X86FoldableSchedWrite sched,
+                              PatFrag Frag_su, PatFrag CommFrag,
+                              PatFrag CommFrag_su, X86FoldableSchedWrite sched,
                               X86VectorVTInfo _, string Name> :
-           avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched, _, Name> {
+           avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+                          sched, _, Name> {
   def rmib : AVX512AIi8<opc, MRMSrcMem,
              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
-                                     AVX512ICC:$cc),
-             !strconcat("vpcmp${cc}", Suffix,
-                        "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
-                        "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
+                                     u8imm:$cc),
+             !strconcat("vpcmp", Suffix,
+                        "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
+                        "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
              [(set _.KRC:$dst, (_.KVT (Frag:$cc
                                        (_.VT _.RC:$src1),
                                        (X86VBroadcast
@@ -2363,45 +2328,25 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
              EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
   def rmibk : AVX512AIi8<opc, MRMSrcMem,
               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
-                                       _.ScalarMemOp:$src2, AVX512ICC:$cc),
-              !strconcat("vpcmp${cc}", Suffix,
-                       "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
-                       "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
+                                       _.ScalarMemOp:$src2, u8imm:$cc),
+              !strconcat("vpcmp", Suffix,
+                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
+                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
               [(set _.KRC:$dst, (and _.KRCWM:$mask,
-                                     (_.KVT (Frag:$cc
+                                     (_.KVT (Frag_su:$cc
                                              (_.VT _.RC:$src1),
                                              (X86VBroadcast
                                               (_.ScalarLdFrag addr:$src2)),
                                              cond))))]>,
               EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
 
-  // Accept explicit immediate argument form instead of comparison code.
-  let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
-    def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
-               (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
-                                       u8imm:$cc),
-               !strconcat("vpcmp", Suffix,
-                   "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
-                   "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
-               EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
-               NotMemoryFoldable;
-    def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
-               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
-                                       _.ScalarMemOp:$src2, u8imm:$cc),
-               !strconcat("vpcmp", Suffix,
-                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
-                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
-               EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
-               NotMemoryFoldable;
-  }
-
   def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
                     (_.VT _.RC:$src1), cond)),
             (!cast<Instruction>(Name#_.ZSuffix#"rmib")
              _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
 
   def : Pat<(and _.KRCWM:$mask,
-                 (_.KVT (CommFrag:$cc (X86VBroadcast
+                 (_.KVT (CommFrag_su:$cc (X86VBroadcast
                                        (_.ScalarLdFrag addr:$src2)),
                                       (_.VT _.RC:$src1), cond))),
             (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
@@ -2410,32 +2355,34 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
 }
 
 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
-                             PatFrag CommFrag, X86SchedWriteWidths sched,
+                             PatFrag Frag_su, PatFrag CommFrag,
+                             PatFrag CommFrag_su, X86SchedWriteWidths sched,
                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
   let Predicates = [prd] in
-  defm Z : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.ZMM,
-                          VTInfo.info512, NAME>, EVEX_V512;
+  defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+                          sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
 
   let Predicates = [prd, HasVLX] in {
-    defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.YMM,
-                               VTInfo.info256, NAME>, EVEX_V256;
-    defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.XMM,
-                               VTInfo.info128, NAME>, EVEX_V128;
+    defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+                               sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
+    defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+                               sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
   }
 }
 
 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
-                                 PatFrag CommFrag, X86SchedWriteWidths sched,
+                                 PatFrag Frag_su, PatFrag CommFrag,
+                                 PatFrag CommFrag_su, X86SchedWriteWidths sched,
                                  AVX512VLVectorVTInfo VTInfo, Predicate prd> {
   let Predicates = [prd] in
-  defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.ZMM,
-                              VTInfo.info512, NAME>, EVEX_V512;
+  defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+                              sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
 
   let Predicates = [prd, HasVLX] in {
-    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.YMM,
-                                    VTInfo.info256, NAME>, EVEX_V256;
-    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.XMM,
-                                   VTInfo.info128, NAME>, EVEX_V128;
+    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+                                   sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
+    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+                                   sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
   }
 }
 
@@ -2459,6 +2406,12 @@ def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
   return !ISD::isUnsignedIntSetCC(CC);
 }], X86pcmpm_imm>;
 
+def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+                          (setcc node:$src1, node:$src2, node:$cc), [{
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
+}], X86pcmpm_imm>;
+
 // Same as above, but commutes immediate. Use for load folding.
 def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
                                (setcc node:$src1, node:$src2, node:$cc), [{
@@ -2466,12 +2419,24 @@ def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
   return !ISD::isUnsignedIntSetCC(CC);
 }], X86pcmpm_imm_commute>;
 
+def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+                                  (setcc node:$src1, node:$src2, node:$cc), [{
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
+}], X86pcmpm_imm_commute>;
+
 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
                         (setcc node:$src1, node:$src2, node:$cc), [{
   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
   return ISD::isUnsignedIntSetCC(CC);
 }], X86pcmpm_imm>;
 
+def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+                           (setcc node:$src1, node:$src2, node:$cc), [{
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
+}], X86pcmpm_imm>;
+
 // Same as above, but commutes immediate. Use for load folding.
 def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
                                 (setcc node:$src1, node:$src2, node:$cc), [{
@@ -2479,93 +2444,91 @@ def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
   return ISD::isUnsignedIntSetCC(CC);
 }], X86pcmpm_imm_commute>;
 
+def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+                                   (setcc node:$src1, node:$src2, node:$cc), [{
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
+}], X86pcmpm_imm_commute>;
+
 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
-defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_commute,
+defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
+                                X86pcmpm_commute, X86pcmpm_commute_su,
                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
                                 EVEX_CD8<8, CD8VF>;
-defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_commute,
+defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
+                                 X86pcmpum_commute, X86pcmpum_commute_su,
                                  SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
                                  EVEX_CD8<8, CD8VF>;
 
-defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_commute,
+defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
+                                X86pcmpm_commute, X86pcmpm_commute_su,
                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
                                 VEX_W, EVEX_CD8<16, CD8VF>;
-defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_commute,
+defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
+                                 X86pcmpum_commute, X86pcmpum_commute_su,
                                  SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
                                  VEX_W, EVEX_CD8<16, CD8VF>;
 
-defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_commute,
+defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
+                                    X86pcmpm_commute, X86pcmpm_commute_su,
                                     SchedWriteVecALU, avx512vl_i32_info,
                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
-defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_commute,
+defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
+                                     X86pcmpum_commute, X86pcmpum_commute_su,
                                      SchedWriteVecALU, avx512vl_i32_info,
                                      HasAVX512>, EVEX_CD8<32, CD8VF>;
 
-defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_commute,
+defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
+                                    X86pcmpm_commute, X86pcmpm_commute_su,
                                     SchedWriteVecALU, avx512vl_i64_info,
                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_commute,
+defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
+                                     X86pcmpum_commute, X86pcmpum_commute_su,
                                      SchedWriteVecALU, avx512vl_i64_info,
                                      HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
 
+def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+                         (X86cmpm node:$src1, node:$src2, node:$cc), [{
+  return N->hasOneUse();
+}]>;
+def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+                            (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{
+  return N->hasOneUse();
+}]>;
+
 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
                               string Name> {
   defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
-                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
-                   "vcmp${cc}"#_.Suffix,
-                   "$src2, $src1", "$src1, $src2",
-                   (X86cmpm (_.VT _.RC:$src1),
-                         (_.VT _.RC:$src2),
-                           imm:$cc), 1>,
-                   Sched<[sched]>;
+                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
+                   "vcmp"#_.Suffix,
+                   "$cc, $src2, $src1", "$src1, $src2, $cc",
+                   (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
+                   (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
+                   1>, Sched<[sched]>;
 
   defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
-                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
-                "vcmp${cc}"#_.Suffix,
-                "$src2, $src1", "$src1, $src2",
-                (X86cmpm (_.VT _.RC:$src1),
-                        (_.VT (_.LdFrag addr:$src2)),
-                        imm:$cc)>,
+                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
+                "vcmp"#_.Suffix,
+                "$cc, $src2, $src1", "$src1, $src2, $cc",
+                (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
+                         imm:$cc),
+                (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
+                            imm:$cc)>,
                 Sched<[sched.Folded, sched.ReadAfterFold]>;
 
   defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
                 (outs _.KRC:$dst),
-                (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
-                "vcmp${cc}"#_.Suffix,
-                "${src2}"##_.BroadcastStr##", $src1",
-                "$src1, ${src2}"##_.BroadcastStr,
+                (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
+                "vcmp"#_.Suffix,
+                "$cc, ${src2}"#_.BroadcastStr#", $src1",
+                "$src1, ${src2}"#_.BroadcastStr#", $cc",
                 (X86cmpm (_.VT _.RC:$src1),
                         (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
-                        imm:$cc)>,
+                        imm:$cc),
+                (X86cmpm_su (_.VT _.RC:$src1),
+                            (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
+                            imm:$cc)>,
                 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
-  // Accept explicit immediate argument form instead of comparison code.
-  let isAsmParserOnly = 1, hasSideEffects = 0 in {
-    defm  rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
-                         (outs _.KRC:$dst),
-                         (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
-                         "vcmp"#_.Suffix,
-                         "$cc, $src2, $src1", "$src1, $src2, $cc">,
-                         Sched<[sched]>, NotMemoryFoldable;
-
-    let mayLoad = 1 in {
-      defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
-                             (outs _.KRC:$dst),
-                             (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
-                             "vcmp"#_.Suffix,
-                             "$cc, $src2, $src1", "$src1, $src2, $cc">,
-                             Sched<[sched.Folded, sched.ReadAfterFold]>,
-                             NotMemoryFoldable;
-
-      defm  rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
-                         (outs _.KRC:$dst),
-                         (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
-                         "vcmp"#_.Suffix,
-                         "$cc, ${src2}"##_.BroadcastStr##", $src1",
-                         "$src1, ${src2}"##_.BroadcastStr##", $cc">,
-                         EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
-                         NotMemoryFoldable;
-    }
-  }
 
   // Patterns for selecting with loads in other operand.
   def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
@@ -2573,9 +2536,9 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
             (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
                                                       imm:$cc)>;
 
-  def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
-                                         (_.VT _.RC:$src1),
-                                         CommutableCMPCC:$cc)),
+  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
+                                            (_.VT _.RC:$src1),
+                                            CommutableCMPCC:$cc)),
             (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
                                                        _.RC:$src1, addr:$src2,
                                                        imm:$cc)>;
@@ -2585,10 +2548,10 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
             (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
                                                        imm:$cc)>;
 
-  def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
-                                          (_.ScalarLdFrag addr:$src2)),
-                                         (_.VT _.RC:$src1),
-                                         CommutableCMPCC:$cc)),
+  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (X86VBroadcast
+                                             (_.ScalarLdFrag addr:$src2)),
+                                            (_.VT _.RC:$src1),
+                                            CommutableCMPCC:$cc)),
             (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
                                                         _.RC:$src1, addr:$src2,
                                                         imm:$cc)>;
@@ -2597,24 +2560,14 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   // comparison code form (VCMP[EQ/LT/LE/...]
   defm  rrib  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
-                     (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
-                     "vcmp${cc}"#_.Suffix,
-                     "{sae}, $src2, $src1", "$src1, $src2, {sae}",
-                     (X86cmpmRnd (_.VT _.RC:$src1),
-                                    (_.VT _.RC:$src2),
-                                    imm:$cc,
-                                (i32 FROUND_NO_EXC))>,
+                     (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+                     "vcmp"#_.Suffix,
+                     "$cc, {sae}, $src2, $src1",
+                     "$src1, $src2, {sae}, $cc",
+                     (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
+                     (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+                                    imm:$cc)>,
                      EVEX_B, Sched<[sched]>;
-
-  let isAsmParserOnly = 1, hasSideEffects = 0 in {
-    defm  rrib_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
-                         (outs _.KRC:$dst),
-                         (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
-                         "vcmp"#_.Suffix,
-                         "$cc, {sae}, $src2, $src1",
-                         "$src1, $src2, {sae}, $cc">,
-                         EVEX_B, Sched<[sched]>, NotMemoryFoldable;
-   }
 }
 
 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
@@ -2647,16 +2600,27 @@ let Predicates = [HasAVX512] in {
 
 // ----------------------------------------------------------------
 // FPClass
+
+def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
+                              (X86Vfpclasss node:$src1, node:$src2), [{
+  return N->hasOneUse();
+}]>;
+
+def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
+                             (X86Vfpclass node:$src1, node:$src2), [{
+  return N->hasOneUse();
+}]>;
+
 //handle fpclass instruction  mask =  op(reg_scalar,imm)
 //                                    op(mem_scalar,imm)
-multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
                                  Predicate prd> {
   let Predicates = [prd], ExeDomain = _.ExeDomain in {
       def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
                       (ins _.RC:$src1, i32u8imm:$src2),
                       OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
+                      [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
                               (i32 imm:$src2)))]>,
                       Sched<[sched]>;
       def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
@@ -2664,7 +2628,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
                       OpcodeStr##_.Suffix#
                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
-                                      (OpNode (_.VT _.RC:$src1),
+                                      (X86Vfpclasss_su (_.VT _.RC:$src1),
                                       (i32 imm:$src2))))]>,
                       EVEX_K, Sched<[sched]>;
     def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
@@ -2672,15 +2636,15 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
                     OpcodeStr##_.Suffix##
                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set _.KRC:$dst,
-                          (OpNode _.ScalarIntMemCPat:$src1,
-                                  (i32 imm:$src2)))]>,
+                          (X86Vfpclasss _.ScalarIntMemCPat:$src1,
+                                       (i32 imm:$src2)))]>,
                     Sched<[sched.Folded, sched.ReadAfterFold]>;
     def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
                     (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
                     OpcodeStr##_.Suffix##
                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
                     [(set _.KRC:$dst,(and _.KRCWM:$mask,
-                        (OpNode _.ScalarIntMemCPat:$src1,
+                        (X86Vfpclasss_su _.ScalarIntMemCPat:$src1,
                             (i32 imm:$src2))))]>,
                     EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
@@ -2689,14 +2653,14 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
 //                                  fpclass(reg_vec, mem_vec, imm)
 //                                  fpclass(reg_vec, broadcast(eltVt), imm)
-multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
-                                 string mem, string broadcast>{
+                                 string mem>{
   let ExeDomain = _.ExeDomain in {
   def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
                       (ins _.RC:$src1, i32u8imm:$src2),
                       OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
+                      [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
                                        (i32 imm:$src2)))]>,
                       Sched<[sched]>;
   def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
@@ -2704,85 +2668,103 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
                       OpcodeStr##_.Suffix#
                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
-                                       (OpNode (_.VT _.RC:$src1),
+                                       (X86Vfpclass_su (_.VT _.RC:$src1),
                                        (i32 imm:$src2))))]>,
                       EVEX_K, Sched<[sched]>;
   def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
                     (ins _.MemOp:$src1, i32u8imm:$src2),
-                    OpcodeStr##_.Suffix##mem#
+                    OpcodeStr##_.Suffix#"{"#mem#"}"#
                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    [(set _.KRC:$dst,(OpNode
+                    [(set _.KRC:$dst,(X86Vfpclass
                                      (_.VT (_.LdFrag addr:$src1)),
                                      (i32 imm:$src2)))]>,
                     Sched<[sched.Folded, sched.ReadAfterFold]>;
   def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
                     (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
-                    OpcodeStr##_.Suffix##mem#
+                    OpcodeStr##_.Suffix#"{"#mem#"}"#
                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
-                    [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode
+                    [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
                                   (_.VT (_.LdFrag addr:$src1)),
                                   (i32 imm:$src2))))]>,
                     EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
   def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
-                    OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
+                    OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
                                       _.BroadcastStr##", $dst|$dst, ${src1}"
                                                   ##_.BroadcastStr##", $src2}",
-                    [(set _.KRC:$dst,(OpNode
+                    [(set _.KRC:$dst,(X86Vfpclass
                                      (_.VT (X86VBroadcast
                                            (_.ScalarLdFrag addr:$src1))),
                                      (i32 imm:$src2)))]>,
                     EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
   def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
                     (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
-                    OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
+                    OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
                           _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
                                                    _.BroadcastStr##", $src2}",
-                    [(set _.KRC:$dst,(and _.KRCWM:$mask, (OpNode
+                    [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
                                      (_.VT (X86VBroadcast
                                            (_.ScalarLdFrag addr:$src1))),
                                      (i32 imm:$src2))))]>,
                     EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
+
+  // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
+  // the memory form.
+  def : InstAlias<OpcodeStr#_.Suffix#mem#
+                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                  (!cast<Instruction>(NAME#"rr")
+                   _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
+  def : InstAlias<OpcodeStr#_.Suffix#mem#
+                  "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
+                  (!cast<Instruction>(NAME#"rrk")
+                   _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
+  def : InstAlias<OpcodeStr#_.Suffix#mem#
+                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
+                  _.BroadcastStr#", $src2}",
+                  (!cast<Instruction>(NAME#"rmb")
+                   _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
+  def : InstAlias<OpcodeStr#_.Suffix#mem#
+                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
+                  "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
+                  (!cast<Instruction>(NAME#"rmbk")
+                   _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
 }
 
 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
-                                     bits<8> opc, SDNode OpNode,
-                                     X86SchedWriteWidths sched, Predicate prd,
-                                     string broadcast>{
+                                     bits<8> opc, X86SchedWriteWidths sched,
+                                     Predicate prd>{
   let Predicates = [prd] in {
-    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.ZMM,
-                                      _.info512, "{z}", broadcast>, EVEX_V512;
+    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
+                                      _.info512, "z">, EVEX_V512;
   }
   let Predicates = [prd, HasVLX] in {
-    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.XMM,
-                                      _.info128, "{x}", broadcast>, EVEX_V128;
-    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.YMM,
-                                      _.info256, "{y}", broadcast>, EVEX_V256;
+    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
+                                      _.info128, "x">, EVEX_V128;
+    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
+                                      _.info256, "y">, EVEX_V256;
   }
 }
 
 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
-                                 bits<8> opcScalar, SDNode VecOpNode,
-                                 SDNode ScalarOpNode, X86SchedWriteWidths sched,
+                                 bits<8> opcScalar, X86SchedWriteWidths sched,
                                  Predicate prd> {
   defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
-                                      VecOpNode, sched, prd, "{l}">,
+                                      sched, prd>,
                                       EVEX_CD8<32, CD8VF>;
   defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
-                                      VecOpNode, sched, prd, "{q}">,
+                                      sched, prd>,
                                       EVEX_CD8<64, CD8VF> , VEX_W;
-  defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
-                                   sched.Scl, f32x_info, prd>,
+  defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
+                                   sched.Scl, f32x_info, prd>, VEX_LIG,
                                    EVEX_CD8<32, CD8VT1>;
-  defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
-                                   sched.Scl, f64x_info, prd>,
+  defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
+                                   sched.Scl, f64x_info, prd>, VEX_LIG,
                                    EVEX_CD8<64, CD8VT1>, VEX_W;
 }
 
-defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
-                                      X86Vfpclasss, SchedWriteFCmp, HasDQI>,
-                                      AVX512AIi8Base, EVEX;
+defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp,
+                                      HasDQI>, AVX512AIi8Base, EVEX;
 
 //-----------------------------------------------------------------
 // Mask register copy, including
@@ -3039,26 +3021,24 @@ defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
 defm : avx512_binop_pat<xor,   xor,  KXORWrr>;
 
 // Mask unpacking
-multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
-                             RegisterClass KRCSrc, X86FoldableSchedWrite sched,
+multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
+                             X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
                              Predicate prd> {
   let Predicates = [prd] in {
     let hasSideEffects = 0 in
-    def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
-               (ins KRC:$src1, KRC:$src2),
+    def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
+               (ins Src.KRC:$src1, Src.KRC:$src2),
                "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
                VEX_4V, VEX_L, Sched<[sched]>;
 
-    def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
-              (!cast<Instruction>(NAME##rr)
-                        (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
-                        (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
+    def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
+              (!cast<Instruction>(NAME##rr) Src.KRC:$src2, Src.KRC:$src1)>;
   }
 }
 
-defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, WriteShuffle, HasAVX512>, PD;
-defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, WriteShuffle, HasBWI>, PS;
-defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, WriteShuffle, HasBWI>, PS, VEX_W;
+defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, PD;
+defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
+defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
 
 // Mask bit testing
 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
@@ -3118,7 +3098,8 @@ defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShu
 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
 
 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
-multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
+multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
+                                              string InstStr,
                                               X86VectorVTInfo Narrow,
                                               X86VectorVTInfo Wide> {
   def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
@@ -3130,8 +3111,8 @@ multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
            Narrow.KRC)>;
 
   def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
-                             (Frag (Narrow.VT Narrow.RC:$src1),
-                                   (Narrow.VT Narrow.RC:$src2)))),
+                             (Frag_su (Narrow.VT Narrow.RC:$src1),
+                                      (Narrow.VT Narrow.RC:$src2)))),
           (COPY_TO_REGCLASS
            (!cast<Instruction>(InstStr#"Zrrk")
             (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
@@ -3141,7 +3122,7 @@ multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
 }
 
 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
-multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag,
+multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
                                                  string InstStr,
                                                  X86VectorVTInfo Narrow,
                                                  X86VectorVTInfo Wide> {
@@ -3154,9 +3135,9 @@ def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
             (Frag.OperandTransform $cc)), Narrow.KRC)>;
 
 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
-                           (Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
-                                                 (Narrow.VT Narrow.RC:$src2),
-                                                 cond)))),
+                           (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
+                                                    (Narrow.VT Narrow.RC:$src2),
+                                                    cond)))),
           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@@ -3165,7 +3146,8 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
 }
 
 // Same as above, but for fp types which don't use PatFrags.
-multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
+multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, PatFrag OpNode_su,
+                                                string InstStr,
                                                 X86VectorVTInfo Narrow,
                                                 X86VectorVTInfo Wide> {
 def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
@@ -3177,8 +3159,8 @@ def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
             imm:$cc), Narrow.KRC)>;
 
 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
-                           (OpNode (Narrow.VT Narrow.RC:$src1),
-                                   (Narrow.VT Narrow.RC:$src2), imm:$cc))),
+                           (OpNode_su (Narrow.VT Narrow.RC:$src1),
+                                      (Narrow.VT Narrow.RC:$src2), imm:$cc))),
           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@@ -3190,65 +3172,65 @@ let Predicates = [HasAVX512, NoVLX] in {
   // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
   // increase the pattern complexity the way an immediate would.
   let AddedComplexity = 2 in {
-  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v8i32x_info, v16i32_info>;
-  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v8i32x_info, v16i32_info>;
+  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v8i32x_info, v16i32_info>;
+  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v8i32x_info, v16i32_info>;
 
-  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v4i32x_info, v16i32_info>;
-  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v4i32x_info, v16i32_info>;
+  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v4i32x_info, v16i32_info>;
+  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v4i32x_info, v16i32_info>;
 
-  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v4i64x_info, v8i64_info>;
-  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v4i64x_info, v8i64_info>;
+  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v4i64x_info, v8i64_info>;
+  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v4i64x_info, v8i64_info>;
 
-  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v2i64x_info, v8i64_info>;
-  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v2i64x_info, v8i64_info>;
+  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v2i64x_info, v8i64_info>;
+  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v2i64x_info, v8i64_info>;
   }
 
-  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v8i32x_info, v16i32_info>;
-  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v8i32x_info, v16i32_info>;
+  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
+  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
 
-  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v4i32x_info, v16i32_info>;
-  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v4i32x_info, v16i32_info>;
+  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
+  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
 
-  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v4i64x_info, v8i64_info>;
-  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v4i64x_info, v8i64_info>;
+  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
+  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
 
-  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v2i64x_info, v8i64_info>;
-  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v2i64x_info, v8i64_info>;
+  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
+  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
 
-  defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>;
-  defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v4f32x_info, v16f32_info>;
-  defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v4f64x_info, v8f64_info>;
-  defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v2f64x_info, v8f64_info>;
+  defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v8f32x_info, v16f32_info>;
+  defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v4f32x_info, v16f32_info>;
+  defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v4f64x_info, v8f64_info>;
+  defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v2f64x_info, v8f64_info>;
 }
 
 let Predicates = [HasBWI, NoVLX] in {
   // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
   // increase the pattern complexity the way an immediate would.
   let AddedComplexity = 2 in {
-  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v32i8x_info, v64i8_info>;
-  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v32i8x_info, v64i8_info>;
+  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v32i8x_info, v64i8_info>;
+  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v32i8x_info, v64i8_info>;
 
-  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v16i8x_info, v64i8_info>;
-  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v16i8x_info, v64i8_info>;
+  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v16i8x_info, v64i8_info>;
+  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v16i8x_info, v64i8_info>;
 
-  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v16i16x_info, v32i16_info>;
-  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v16i16x_info, v32i16_info>;
+  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v16i16x_info, v32i16_info>;
+  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v16i16x_info, v32i16_info>;
 
-  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v8i16x_info, v32i16_info>;
-  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v8i16x_info, v32i16_info>;
+  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v8i16x_info, v32i16_info>;
+  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v8i16x_info, v32i16_info>;
   }
 
-  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v32i8x_info, v64i8_info>;
-  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v32i8x_info, v64i8_info>;
+  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
+  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
 
-  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v16i8x_info, v64i8_info>;
-  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v16i8x_info, v64i8_info>;
+  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
+  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
 
-  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v16i16x_info, v32i16_info>;
-  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v16i16x_info, v32i16_info>;
+  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
+  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
 
-  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v8i16x_info, v32i16_info>;
-  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v8i16x_info, v32i16_info>;
+  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
+  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
 }
 
 // Mask setting all 0s or 1s
@@ -3394,15 +3376,15 @@ multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
                                  string EVEX2VEXOvrd, bit NoRMPattern = 0> {
   let Predicates = [prd] in
   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
-                       _.info512.AlignedLdFrag, masked_load_aligned512,
+                       _.info512.AlignedLdFrag, masked_load_aligned,
                        Sched.ZMM, "", NoRMPattern>, EVEX_V512;
 
   let Predicates = [prd, HasVLX] in {
   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
-                          _.info256.AlignedLdFrag, masked_load_aligned256,
+                          _.info256.AlignedLdFrag, masked_load_aligned,
                           Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
-                          _.info128.AlignedLdFrag, masked_load_aligned128,
+                          _.info128.AlignedLdFrag, masked_load_aligned,
                           Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
   }
 }
@@ -3414,15 +3396,15 @@ multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
                           SDPatternOperator SelectOprr = vselect> {
   let Predicates = [prd] in
   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
-                       masked_load_unaligned, Sched.ZMM, "",
+                       masked_load, Sched.ZMM, "",
                        NoRMPattern, SelectOprr>, EVEX_V512;
 
   let Predicates = [prd, HasVLX] in {
   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
-                         masked_load_unaligned, Sched.YMM, EVEX2VEXOvrd#"Y",
+                         masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
                          NoRMPattern, SelectOprr>, EVEX_V256;
   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
-                         masked_load_unaligned, Sched.XMM, EVEX2VEXOvrd,
+                         masked_load, Sched.XMM, EVEX2VEXOvrd,
                          NoRMPattern, SelectOprr>, EVEX_V128;
   }
 }
@@ -3488,14 +3470,14 @@ multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
                             string EVEX2VEXOvrd, bit NoMRPattern = 0> {
   let Predicates = [prd] in
   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
-                        masked_store_unaligned, Sched.ZMM, "",
+                        masked_store, Sched.ZMM, "",
                         NoMRPattern>, EVEX_V512;
   let Predicates = [prd, HasVLX] in {
     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
-                             masked_store_unaligned, Sched.YMM,
+                             masked_store, Sched.YMM,
                              EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
-                             masked_store_unaligned, Sched.XMM, EVEX2VEXOvrd,
+                             masked_store, Sched.XMM, EVEX2VEXOvrd,
                              NoMRPattern>, EVEX_V128;
   }
 }
@@ -3506,15 +3488,15 @@ multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
                                   string EVEX2VEXOvrd, bit NoMRPattern = 0> {
   let Predicates = [prd] in
   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
-                        masked_store_aligned512, Sched.ZMM, "",
+                        masked_store_aligned, Sched.ZMM, "",
                         NoMRPattern>, EVEX_V512;
 
   let Predicates = [prd, HasVLX] in {
     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
-                             masked_store_aligned256, Sched.YMM,
+                             masked_store_aligned, Sched.YMM,
                              EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
-                             masked_store_aligned128, Sched.XMM, EVEX2VEXOvrd,
+                             masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
                              NoMRPattern>, EVEX_V128;
   }
 }
@@ -3609,7 +3591,7 @@ def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
                             "", []>, Sched<[WriteFStoreY]>;
 }
 
-def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
+def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
                           (v8i64 VR512:$src))),
    (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
                                               VK8), VR512:$src)>;
@@ -3621,7 +3603,7 @@ def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
 // These patterns exist to prevent the above patterns from introducing a second
 // mask inversion when one already exists.
 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
-                          (bc_v8i64 (v16i32 immAllZerosV)),
+                          (v8i64 immAllZerosV),
                           (v8i64 VR512:$src))),
                  (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
@@ -3761,75 +3743,6 @@ let Predicates = [HasVLX] in {
             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
 }
 
-multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From,
-                                   X86VectorVTInfo To, X86VectorVTInfo Cast> {
-  def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
-                              (bitconvert
-                               (To.VT (extract_subvector
-                                       (From.VT From.RC:$src), (iPTR 0)))),
-                              To.RC:$src0)),
-            (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
-                      Cast.RC:$src0, Cast.KRCWM:$mask,
-                      (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>;
-
-  def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
-                              (bitconvert
-                               (To.VT (extract_subvector
-                                       (From.VT From.RC:$src), (iPTR 0)))),
-                              Cast.ImmAllZerosV)),
-            (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
-                      Cast.KRCWM:$mask,
-                      (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>;
-}
-
-
-let Predicates = [HasVLX] in {
-// A masked extract from the first 128-bits of a 256-bit vector can be
-// implemented with masked move.
-defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info,  v2i64x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info,  v4i32x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info,  v16i8x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info,  v2i64x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info,  v4i32x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info,  v16i8x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVAPDZ128",   v4f64x_info,  v2f64x_info, v2f64x_info>;
-defm : masked_move_for_extract<"VMOVAPDZ128",   v8f32x_info,  v4f32x_info, v2f64x_info>;
-defm : masked_move_for_extract<"VMOVAPSZ128",   v4f64x_info,  v2f64x_info, v4f32x_info>;
-defm : masked_move_for_extract<"VMOVAPSZ128",   v8f32x_info,  v4f32x_info, v4f32x_info>;
-
-// A masked extract from the first 128-bits of a 512-bit vector can be
-// implemented with masked move.
-defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info,  v2i64x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info,  v16i8x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info,  v2i64x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info,  v16i8x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVAPDZ128",   v8f64_info,  v2f64x_info, v2f64x_info>;
-defm : masked_move_for_extract<"VMOVAPDZ128",   v16f32_info, v4f32x_info, v2f64x_info>;
-defm : masked_move_for_extract<"VMOVAPSZ128",   v8f64_info,  v2f64x_info, v4f32x_info>;
-defm : masked_move_for_extract<"VMOVAPSZ128",   v16f32_info, v4f32x_info, v4f32x_info>;
-
-// A masked extract from the first 256-bits of a 512-bit vector can be
-// implemented with masked move.
-defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info,  v4i64x_info,  v4i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info,  v4i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info,  v32i8x_info,  v4i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info,  v4i64x_info,  v8i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info,  v8i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info,  v32i8x_info,  v8i32x_info>;
-defm : masked_move_for_extract<"VMOVAPDZ256",   v8f64_info,  v4f64x_info,  v4f64x_info>;
-defm : masked_move_for_extract<"VMOVAPDZ256",   v16f32_info, v8f32x_info,  v4f64x_info>;
-defm : masked_move_for_extract<"VMOVAPSZ256",   v8f64_info,  v4f64x_info,  v8f32x_info>;
-defm : masked_move_for_extract<"VMOVAPSZ256",   v16f32_info, v8f32x_info,  v8f32x_info>;
-}
-
 // Move Int Doubleword to Packed Double Int
 //
 let ExeDomain = SSEPackedInt in {
@@ -3858,19 +3771,10 @@ def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src)
                        "vmovq\t{$src, $dst|$dst, $src}",
                        [(set FR64X:$dst, (bitconvert GR64:$src))]>,
                        EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
-def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
-                      "vmovq\t{$src, $dst|$dst, $src}",
-                      [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
-                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
                          "vmovq\t{$src, $dst|$dst, $src}",
                          [(set GR64:$dst, (bitconvert FR64X:$src))]>,
                          EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
-def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
-                         "vmovq\t{$src, $dst|$dst, $src}",
-                         [(store (i64 (bitconvert FR64X:$src)), addr:$dst)]>,
-                         EVEX, VEX_W, Sched<[WriteVecStore]>,
-                         EVEX_CD8<64, CD8VT1>;
 }
 } // ExeDomain = SSEPackedInt
 
@@ -3881,11 +3785,6 @@ def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src)
                       "vmovd\t{$src, $dst|$dst, $src}",
                       [(set FR32X:$dst, (bitconvert GR32:$src))]>,
                       EVEX, Sched<[WriteVecMoveFromGpr]>;
-
-def VMOVDI2SSZrm  : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
-                      "vmovd\t{$src, $dst|$dst, $src}",
-                      [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))]>,
-                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
 
 // Move doubleword from xmm register to r/m32
@@ -3938,6 +3837,11 @@ def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
                 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
 
+let Predicates = [HasAVX512] in {
+  def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
+            (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
+}
+
 // Move Scalar Single to Double Int
 //
 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
@@ -3946,11 +3850,6 @@ def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
                       "vmovd\t{$src, $dst|$dst, $src}",
                       [(set GR32:$dst, (bitconvert FR32X:$src))]>,
                       EVEX, Sched<[WriteVecMoveToGpr]>;
-def VMOVSS2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
-                      (ins i32mem:$dst, FR32X:$src),
-                      "vmovd\t{$src, $dst|$dst, $src}",
-                      [(store (i32 (bitconvert FR32X:$src)), addr:$dst)]>,
-                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
 
 // Move Quadword Int to Packed Quadword Int
@@ -3974,7 +3873,7 @@ def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
 // AVX-512  MOVSS, MOVSD
 //===----------------------------------------------------------------------===//
 
-multiclass avx512_move_scalar<string asm, SDNode OpNode,
+multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
                               X86VectorVTInfo _> {
   let Predicates = [HasAVX512, OptForSize] in
   def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
@@ -3999,11 +3898,18 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode,
                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
                                      (_.VT _.RC:$src0))))],
              _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
-  let canFoldAsLoad = 1, isReMaterializable = 1 in
-  def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
+  let canFoldAsLoad = 1, isReMaterializable = 1 in {
+  def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
-             [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
+             [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
              _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
+  // _alt version uses FR32/FR64 register class.
+  let isCodeGenOnly = 1 in
+  def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
+                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+                 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
+                 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
+  }
   let mayLoad = 1, hasSideEffects = 0 in {
     let Constraints = "$src0 = $dst" in
     def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
@@ -4023,16 +3929,16 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode,
              EVEX, Sched<[WriteFStore]>;
   let mayStore = 1, hasSideEffects = 0 in
   def mrk: AVX512PI<0x11, MRMDestMem, (outs),
-              (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
+              (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
               !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
               NotMemoryFoldable;
 }
 
-defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
+defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
                                   VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
 
-defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
+defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
                                   VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
 
 
@@ -4070,7 +3976,7 @@ def : Pat<(masked_store
                                (iPTR 0))), addr:$dst, Mask),
           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
-                      (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
+                      _.info128.RC:$src)>;
 
 }
 
@@ -4085,7 +3991,7 @@ def : Pat<(masked_store
                                (iPTR 0))), addr:$dst, Mask),
           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
-                      (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
+                      _.info128.RC:$src)>;
 
 }
 
@@ -4105,13 +4011,13 @@ def : Pat<(masked_store
                                (iPTR 0))), addr:$dst, Mask512),
           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
-                      (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
+                      _.info128.RC:$src)>;
 
 // AVX512VL pattern.
 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
-                      (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
+                      _.info128.RC:$src)>;
 }
 
 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
@@ -4119,8 +4025,7 @@ multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
 
 def : Pat<(_.info128.VT (extract_subvector
                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
-                                        (_.info512.VT (bitconvert
-                                                       (v16i32 immAllZerosV))))),
+                                        _.info512.ImmAllZerosV)),
                            (iPTR 0))),
           (!cast<Instruction>(InstrStr#rmkz)
                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
@@ -4145,8 +4050,7 @@ multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
 
 def : Pat<(_.info128.VT (extract_subvector
                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
-                                        (_.info512.VT (bitconvert
-                                                       (v16i32 immAllZerosV))))),
+                                        _.info512.ImmAllZerosV)),
                            (iPTR 0))),
           (!cast<Instruction>(InstrStr#rmkz)
                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
@@ -4175,8 +4079,7 @@ multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
 // AVX512F patterns.
 def : Pat<(_.info128.VT (extract_subvector
                          (_.info512.VT (masked_load addr:$srcAddr, Mask512,
-                                        (_.info512.VT (bitconvert
-                                                       (v16i32 immAllZerosV))))),
+                                        _.info512.ImmAllZerosV)),
                            (iPTR 0))),
           (!cast<Instruction>(InstrStr#rmkz)
                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
@@ -4194,7 +4097,7 @@ def : Pat<(_.info128.VT (extract_subvector
 
 // AVX512Vl patterns.
 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
-                         (_.info128.VT (bitconvert (v4i32 immAllZerosV))))),
+                         _.info128.ImmAllZerosV)),
           (!cast<Instruction>(InstrStr#rmkz)
                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
                       addr:$srcAddr)>;
@@ -4383,15 +4286,6 @@ let Predicates = [HasAVX512, OptForSize] in {
              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
               (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
 
-  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
-              (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), sub_xmm)>;
-  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
-              (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), sub_xmm)>;
-
   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
             (SUBREG_TO_REG (i32 0),
              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
@@ -4400,17 +4294,6 @@ let Predicates = [HasAVX512, OptForSize] in {
             (SUBREG_TO_REG (i32 0),
              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
               (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
-
-  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
-              (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), sub_xmm)>;
-
-  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
-              (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), sub_xmm)>;
-
 }
 
 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
@@ -4426,79 +4309,27 @@ let Predicates = [HasAVX512, OptForSpeed] in {
              (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
                           (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
                           (i8 3))), sub_xmm)>;
-
-  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2f64 (VBLENDPDrri (v2f64 (V_SET0)),
-                          (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)),
-                          (i8 1))), sub_xmm)>;
-  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2i64 (VPBLENDWrri (v2i64 (V_SET0)),
-                          (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)),
-                          (i8 0xf))), sub_xmm)>;
 }
 
 let Predicates = [HasAVX512] in {
-
-  // MOVSSrm zeros the high parts of the register; represent this
-  // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
-  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
-            (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
-  def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
-            (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
-  def : Pat<(v4f32 (X86vzload addr:$src)),
-            (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
-
-  // MOVSDrm zeros the high parts of the register; represent this
-  // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
-  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
-            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
-  def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
-            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
-  def : Pat<(v2f64 (X86vzload addr:$src)),
-            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
+  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+            (VMOVSSZrm addr:$src)>;
+  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+            (VMOVSDZrm addr:$src)>;
 
   // Represent the same patterns above but in the form they appear for
   // 256-bit types
-  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
-                   (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
-            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
-  def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
-                   (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
+  def : Pat<(v8f32 (X86vzload32 addr:$src)),
             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
-  def : Pat<(v8f32 (X86vzload addr:$src)),
-            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
-  def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
-                   (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
-            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
-  def : Pat<(v4f64 (X86vzload addr:$src)),
+  def : Pat<(v4f64 (X86vzload64 addr:$src)),
             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
 
   // Represent the same patterns above but in the form they appear for
   // 512-bit types
-  def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
-                   (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
-            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
-  def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
-                   (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
-            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
-  def : Pat<(v16f32 (X86vzload addr:$src)),
+  def : Pat<(v16f32 (X86vzload32 addr:$src)),
             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
-  def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
-                   (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
+  def : Pat<(v8f64 (X86vzload64 addr:$src)),
             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
-  def : Pat<(v8f64 (X86vzload addr:$src)),
-            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
-
-  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
-                   (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
-            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
-
-  // Extract and store.
-  def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
-                   addr:$dst),
-            (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
 }
 
 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
@@ -4517,47 +4348,47 @@ let Predicates = [HasAVX512] in {
   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
             (VMOV64toPQIZrr GR64:$src)>;
 
-  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
-                               (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
-            (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIZrr GR64:$src)), sub_xmm)>;
-
-  def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
-                               (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
-            (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIZrr GR64:$src)), sub_xmm)>;
-
   // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
             (VMOVDI2PDIZrm addr:$src)>;
-  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
+  def : Pat<(v4i32 (X86vzload32 addr:$src)),
             (VMOVDI2PDIZrm addr:$src)>;
-  def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
-            (VMOVDI2PDIZrm addr:$src)>;
-  def : Pat<(v4i32 (X86vzload addr:$src)),
-            (VMOVDI2PDIZrm addr:$src)>;
-  def : Pat<(v8i32 (X86vzload addr:$src)),
+  def : Pat<(v8i32 (X86vzload32 addr:$src)),
             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
-  def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
-            (VMOVQI2PQIZrm addr:$src)>;
   def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
             (VMOVZPQILo2PQIZrr VR128X:$src)>;
-  def : Pat<(v2i64 (X86vzload addr:$src)),
+  def : Pat<(v2i64 (X86vzload64 addr:$src)),
             (VMOVQI2PQIZrm addr:$src)>;
-  def : Pat<(v4i64 (X86vzload addr:$src)),
+  def : Pat<(v4i64 (X86vzload64 addr:$src)),
             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
 
-  // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
-  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
-                               (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
-            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrr GR32:$src)), sub_xmm)>;
-  def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
-                                (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
-            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrr GR32:$src)), sub_xmm)>;
-
   // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
-  def : Pat<(v16i32 (X86vzload addr:$src)),
+  def : Pat<(v16i32 (X86vzload32 addr:$src)),
             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
-  def : Pat<(v8i64 (X86vzload addr:$src)),
+  def : Pat<(v8i64 (X86vzload64 addr:$src)),
             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
+
+  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2f64 (VMOVZPQILo2PQIZrr
+                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
+             sub_xmm)>;
+  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2i64 (VMOVZPQILo2PQIZrr
+                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
+             sub_xmm)>;
+
+  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2f64 (VMOVZPQILo2PQIZrr
+                     (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
+             sub_xmm)>;
+  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2i64 (VMOVZPQILo2PQIZrr
+                     (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
+             sub_xmm)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -4686,7 +4517,7 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                     (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
                     "$src2, $src1", "$src1, $src2",
                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
-                    IsCommutable>, AVX512BIBase, EVEX_4V,
+                    IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
                     Sched<[sched]>;
 
   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -4922,7 +4753,7 @@ multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
                             (_Dst.VT (OpNode
                                          (_Src.VT _Src.RC:$src1),
                                          (_Src.VT _Src.RC:$src2))),
-                            IsCommutable>,
+                            IsCommutable, IsCommutable>,
                             EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
@@ -5458,16 +5289,14 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
                            "$src2, $src1", "$src1, $src2",
-                           (_.VT (VecNode _.RC:$src1, _.RC:$src2,
-                                          (i32 FROUND_CURRENT)))>,
+                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
                            Sched<[sched]>;
 
   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
                          "$src2, $src1", "$src1, $src2",
                          (_.VT (VecNode _.RC:$src1,
-                                        _.ScalarIntMemCPat:$src2,
-                                        (i32 FROUND_CURRENT)))>,
+                                        _.ScalarIntMemCPat:$src2))>,
                          Sched<[sched.Folded, sched.ReadAfterFold]>;
   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
@@ -5495,7 +5324,7 @@ multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo
                           (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
                           "$rc, $src2, $src1", "$src1, $src2, $rc",
                           (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
-                          (i32 imm:$rc)), IsCommutable>,
+                          (i32 timm:$rc))>,
                           EVEX_B, EVEX_RC, Sched<[sched]>;
 }
 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
@@ -5534,23 +5363,22 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
-                            (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
-                            (i32 FROUND_NO_EXC))>, EVEX_B,
-                            Sched<[sched]>;
+                            (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
+                            EVEX_B, Sched<[sched]>;
   }
 }
 
 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                SDNode VecNode, X86SchedWriteSizes sched,
-                                bit IsCommutable> {
+                                SDNode VecNode, SDNode RndNode,
+                                X86SchedWriteSizes sched, bit IsCommutable> {
   defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
                               sched.PS.Scl, IsCommutable>,
-             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
+             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
                               sched.PS.Scl, IsCommutable>,
                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
   defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
                               sched.PD.Scl, IsCommutable>,
-             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
+             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
                               sched.PD.Scl, IsCommutable>,
                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
 }
@@ -5565,17 +5393,17 @@ multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
                               VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
 }
-defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds,
+defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds,
                                  SchedWriteFAddSizes, 1>;
-defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds,
+defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmuls, X86fmulRnds,
                                  SchedWriteFMulSizes, 1>;
-defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds,
+defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubs, X86fsubRnds,
                                  SchedWriteFAddSizes, 0>;
-defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds,
+defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivs, X86fdivRnds,
                                  SchedWriteFDivSizes, 0>;
-defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
+defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
                                SchedWriteFCmpSizes, 0>;
-defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
+defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
                                SchedWriteFCmpSizes, 0>;
 
 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
@@ -5618,13 +5446,13 @@ defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
                             bit IsCommutable,
-                            bit IsKZCommutable = IsCommutable> {
+                            bit IsKCommutable = IsCommutable> {
   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
                   "$src2, $src1", "$src1, $src2",
-                  (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable, 0,
-                  IsKZCommutable>,
+                  (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
+                  IsKCommutable, IsKCommutable>,
                   EVEX_4V, Sched<[sched]>;
   let mayLoad = 1 in {
     defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -5651,18 +5479,18 @@ multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                   (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
                   "$rc, $src2, $src1", "$src1, $src2, $rc",
-                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>,
+                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
                   EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
 }
 
 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
-                                SDPatternOperator OpNodeRnd,
+                                SDPatternOperator OpNodeSAE,
                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   let ExeDomain = _.ExeDomain in
   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
                   "{sae}, $src2, $src1", "$src1, $src2, {sae}",
-                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC)))>,
+                  (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
                   EVEX_4V, EVEX_B, Sched<[sched]>;
 }
 
@@ -5731,10 +5559,10 @@ defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
             avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
                               SchedWriteFCmpSizes, 0>,
-            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmpSizes>;
+            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
                               SchedWriteFCmpSizes, 0>,
-            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmpSizes>;
+            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
 let isCodeGenOnly = 1 in {
   defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
                                  SchedWriteFCmpSizes, 1>;
@@ -5750,71 +5578,25 @@ defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
 defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
                                SchedWriteFLogicSizes, 1>;
 
-let Predicates = [HasVLX,HasDQI] in {
-  // Use packed logical operations for scalar ops.
-  def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
-            (COPY_TO_REGCLASS
-             (v2f64 (VANDPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
-                                  (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
-             FR64X)>;
-  def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
-            (COPY_TO_REGCLASS
-             (v2f64 (VORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
-                                 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
-             FR64X)>;
-  def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
-            (COPY_TO_REGCLASS
-             (v2f64 (VXORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
-                                  (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
-             FR64X)>;
-  def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
-            (COPY_TO_REGCLASS
-             (v2f64 (VANDNPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
-                                   (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
-             FR64X)>;
-
-  def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
-            (COPY_TO_REGCLASS
-             (v4f32 (VANDPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
-                                  (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
-             FR32X)>;
-  def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
-            (COPY_TO_REGCLASS
-             (v4f32 (VORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
-                                 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
-             FR32X)>;
-  def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
-            (COPY_TO_REGCLASS
-             (v4f32 (VXORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
-                                  (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
-             FR32X)>;
-  def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
-            (COPY_TO_REGCLASS
-             (v4f32 (VANDNPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
-                                   (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
-             FR32X)>;
-}
-
 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
   let ExeDomain = _.ExeDomain in {
   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
                   "$src2, $src1", "$src1, $src2",
-                  (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>,
+                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
                   EVEX_4V, Sched<[sched]>;
   defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
                   "$src2, $src1", "$src1, $src2",
-                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>,
+                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
                   EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
   defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
                    "${src2}"##_.BroadcastStr##", $src1",
                    "$src1, ${src2}"##_.BroadcastStr,
                    (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
-                                              (_.ScalarLdFrag addr:$src2))),
-                                              (i32 FROUND_CURRENT))>,
+                                              (_.ScalarLdFrag addr:$src2))))>,
                    EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
@@ -5825,332 +5607,139 @@ multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
   defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
                   "$src2, $src1", "$src1, $src2",
-                  (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>,
+                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
                   Sched<[sched]>;
   defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
                   (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
                   "$src2, $src1", "$src1, $src2",
-                  (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
-                          (i32 FROUND_CURRENT))>,
+                  (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2)>,
                   Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
-                                SDNode OpNode, SDNode OpNodeScal,
                                 X86SchedWriteWidths sched> {
-  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
-             avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
+  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
+             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
                               EVEX_V512, EVEX_CD8<32, CD8VF>;
-  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
-             avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
+  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
+             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
                               EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-  defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f32x_info>,
-             avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, sched.Scl>,
-                           EVEX_4V,EVEX_CD8<32, CD8VT1>;
-  defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f64x_info>,
-             avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, sched.Scl>,
-                           EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+  defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
+             avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info,
+                                    X86scalefsRnd, sched.Scl>,
+                                    EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+  defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
+             avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info,
+                                    X86scalefsRnd, sched.Scl>,
+                                    EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
 
   // Define only if AVX512VL feature is present.
   let Predicates = [HasVLX] in {
-    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v4f32x_info>,
+    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
                                    EVEX_V128, EVEX_CD8<32, CD8VF>;
-    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v8f32x_info>,
+    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
                                    EVEX_V256, EVEX_CD8<32, CD8VF>;
-    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v2f64x_info>,
+    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
                                    EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
-    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v4f64x_info>,
+    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
                                    EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
   }
 }
-defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs,
+defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
                                     SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
 
 //===----------------------------------------------------------------------===//
 // AVX-512  VPTESTM instructions
 //===----------------------------------------------------------------------===//
 
-multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
+multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
                          string Name> {
-  let ExeDomain = _.ExeDomain in {
-  let isCommutable = 1 in
+  // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
+  // There are just too many permuations due to commutability and bitcasts.
+  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
   defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
                       "$src2, $src1", "$src1, $src2",
-                   (OpNode (and _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)>,
+                   (null_frag), (null_frag), 1>,
                    EVEX_4V, Sched<[sched]>;
+  let mayLoad = 1 in
   defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
                        "$src2, $src1", "$src1, $src2",
-                   (OpNode (and _.RC:$src1, (_.LdFrag addr:$src2)),
-                           _.ImmAllZerosV)>,
+                   (null_frag), (null_frag)>,
                    EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
                    Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
-
-  // Patterns for compare with 0 that just use the same source twice.
-  def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
-            (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rr")
-                                      _.RC:$src, _.RC:$src))>;
-
-  def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
-            (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rrk")
-                                      _.KRC:$mask, _.RC:$src, _.RC:$src))>;
 }
 
-multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
+multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
-  let ExeDomain = _.ExeDomain in
+  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
   defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
                     "${src2}"##_.BroadcastStr##", $src1",
                     "$src1, ${src2}"##_.BroadcastStr,
-                    (OpNode (and _.RC:$src1,
-                                       (X86VBroadcast
-                                        (_.ScalarLdFrag addr:$src2))),
-                            _.ImmAllZerosV)>,
+                    (null_frag), (null_frag)>,
                     EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
                     Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
-// Use 512bit version to implement 128/256 bit in case NoVLX.
-multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
-                                  X86VectorVTInfo _, string Name> {
-  def : Pat<(_.KVT (OpNode (and _.RC:$src1, _.RC:$src2),
-                           _.ImmAllZerosV)),
-            (_.KVT (COPY_TO_REGCLASS
-                     (!cast<Instruction>(Name # "Zrr")
-                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
-                                      _.RC:$src1, _.SubRegIdx),
-                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
-                                      _.RC:$src2, _.SubRegIdx)),
-                   _.KRC))>;
-
-  def : Pat<(_.KVT (and _.KRC:$mask,
-                        (OpNode (and _.RC:$src1, _.RC:$src2),
-                                _.ImmAllZerosV))),
-            (COPY_TO_REGCLASS
-             (!cast<Instruction>(Name # "Zrrk")
-              (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
-              (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
-                             _.RC:$src1, _.SubRegIdx),
-              (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
-                             _.RC:$src2, _.SubRegIdx)),
-             _.KRC)>;
-
-  def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
-            (_.KVT (COPY_TO_REGCLASS
-                     (!cast<Instruction>(Name # "Zrr")
-                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
-                                      _.RC:$src, _.SubRegIdx),
-                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
-                                      _.RC:$src, _.SubRegIdx)),
-                   _.KRC))>;
-
-  def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
-            (COPY_TO_REGCLASS
-             (!cast<Instruction>(Name # "Zrrk")
-              (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
-              (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
-                             _.RC:$src, _.SubRegIdx),
-              (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
-                             _.RC:$src, _.SubRegIdx)),
-             _.KRC)>;
-}
-
-multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
-                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
+multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
+                                  X86SchedWriteWidths sched,
+                                  AVX512VLVectorVTInfo _> {
   let Predicates  = [HasAVX512] in
-  defm Z : avx512_vptest<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, NAME>,
-           avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
+  defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
+           avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
 
   let Predicates = [HasAVX512, HasVLX] in {
-  defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, sched.YMM, _.info256, NAME>,
-              avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
-  defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, sched.XMM, _.info128, NAME>,
-              avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
-  }
-  let Predicates = [HasAVX512, NoVLX] in {
-  defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, NAME>;
-  defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, NAME>;
+  defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
+              avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
+  defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
+              avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
   }
 }
 
-multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
+multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
                             X86SchedWriteWidths sched> {
-  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, sched,
+  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
                                  avx512vl_i32_info>;
-  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, sched,
+  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
                                  avx512vl_i64_info>, VEX_W;
 }
 
 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
-                            PatFrag OpNode, X86SchedWriteWidths sched> {
+                            X86SchedWriteWidths sched> {
   let Predicates = [HasBWI] in {
-  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.ZMM,
+  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
                             v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
-  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.ZMM,
+  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
                             v64i8_info, NAME#"B">, EVEX_V512;
   }
   let Predicates = [HasVLX, HasBWI] in {
 
-  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.YMM,
+  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
                             v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
-  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.XMM,
+  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
                             v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
-  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.YMM,
+  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
                             v32i8x_info, NAME#"B">, EVEX_V256;
-  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.XMM,
+  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
                             v16i8x_info, NAME#"B">, EVEX_V128;
   }
-
-  let Predicates = [HasBWI, NoVLX] in {
-  defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, NAME#"B">;
-  defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, NAME#"B">;
-  defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, NAME#"W">;
-  defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, NAME#"W">;
-  }
 }
 
-// These patterns are used to match vptestm/vptestnm. We don't treat pcmpeqm
-// as commutable here because we already canonicalized all zeros vectors to the
-// RHS during lowering.
-def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
-                         (setcc node:$src1, node:$src2, SETEQ)>;
-def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
-                         (setcc node:$src1, node:$src2, SETNE)>;
-
 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
-                                   PatFrag OpNode, X86SchedWriteWidths sched> :
-  avx512_vptest_wb<opc_wb, OpcodeStr, OpNode, sched>,
-  avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, sched>;
+                                   X86SchedWriteWidths sched> :
+  avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
+  avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
 
-defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
+defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
                                          SchedWriteVecLogic>, T8PD;
-defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
+defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
                                          SchedWriteVecLogic>, T8XS;
 
-
-multiclass avx512_vptest_lowering_pats<string InstrStr, PatFrag OpNode,
-                                       X86VectorVTInfo _,
-                                       X86VectorVTInfo AndInfo> {
-  def : Pat<(_.KVT (OpNode (bitconvert
-                            (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>(InstrStr # "rr") _.RC:$src1, _.RC:$src2)>;
-
-  def : Pat<(_.KVT (and _.KRC:$mask,
-                    (OpNode (bitconvert
-                             (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
-                            _.ImmAllZerosV))),
-            (!cast<Instruction>(InstrStr # "rrk") _.KRC:$mask, _.RC:$src1,
-                                                  _.RC:$src2)>;
-
-  def : Pat<(_.KVT (OpNode (bitconvert
-                            (AndInfo.VT (and _.RC:$src1,
-                                             (AndInfo.LdFrag addr:$src2)))),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>(InstrStr # "rm") _.RC:$src1, addr:$src2)>;
-
-  def : Pat<(_.KVT (and _.KRC:$mask,
-                    (OpNode (bitconvert
-                             (AndInfo.VT (and _.RC:$src1,
-                                              (AndInfo.LdFrag addr:$src2)))),
-                            _.ImmAllZerosV))),
-            (!cast<Instruction>(InstrStr # "rmk") _.KRC:$mask, _.RC:$src1,
-                                                  addr:$src2)>;
-}
-
-// Patterns to use 512-bit instructions when 128/256 are not available.
-multiclass avx512_vptest_lowering_wide_pats<string InstrStr, PatFrag OpNode,
-                                            X86VectorVTInfo _,
-                                            X86VectorVTInfo AndInfo,
-                                            X86VectorVTInfo ExtendInfo> {
-  def : Pat<(_.KVT (OpNode (bitconvert
-                            (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
-                           _.ImmAllZerosV)),
-            (_.KVT (COPY_TO_REGCLASS
-                     (!cast<Instruction>(InstrStr#"rr")
-                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
-                                      _.RC:$src1, _.SubRegIdx),
-                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
-                                      _.RC:$src2, _.SubRegIdx)),
-                   _.KRC))>;
-
-  def : Pat<(_.KVT (and _.KRC:$mask,
-                    (OpNode (bitconvert
-                             (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
-                            _.ImmAllZerosV))),
-            (COPY_TO_REGCLASS
-             (!cast<Instruction>(InstrStr#"rrk")
-              (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
-              (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
-                             _.RC:$src1, _.SubRegIdx),
-              (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
-                             _.RC:$src2, _.SubRegIdx)),
-             _.KRC)>;
-}
-
-multiclass avx512_vptest_lowering_sizes<string InstrStr, PatFrag OpNode,
-                                        Predicate prd,
-                                        AVX512VLVectorVTInfo CmpInfo,
-                                        AVX512VLVectorVTInfo AndInfo> {
-let Predicates = [prd, HasVLX] in {
-  defm : avx512_vptest_lowering_pats<InstrStr#"Z128", OpNode,
-                                     CmpInfo.info128, AndInfo.info128>;
-  defm : avx512_vptest_lowering_pats<InstrStr#"Z256", OpNode,
-                                     CmpInfo.info256, AndInfo.info256>;
-}
-let Predicates = [prd] in {
-  defm : avx512_vptest_lowering_pats<InstrStr#"Z", OpNode,
-                                     CmpInfo.info512, AndInfo.info512>;
-}
-
-let Predicates = [prd, NoVLX] in {
-  defm : avx512_vptest_lowering_wide_pats<InstrStr#"Z", OpNode,
-                                          CmpInfo.info128, AndInfo.info128,
-                                          CmpInfo.info512>;
-  defm : avx512_vptest_lowering_wide_pats<InstrStr#"Z", OpNode,
-                                          CmpInfo.info256, AndInfo.info256,
-                                          CmpInfo.info512>;
-}
-}
-
-multiclass avx512_vptest_lowering_types<string InstrStr, PatFrag OpNode> {
-  defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
-                                      avx512vl_i8_info, avx512vl_i16_info>;
-  defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
-                                      avx512vl_i8_info, avx512vl_i32_info>;
-  defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
-                                      avx512vl_i8_info, avx512vl_i64_info>;
-
-  defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
-                                      avx512vl_i16_info, avx512vl_i8_info>;
-  defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
-                                      avx512vl_i16_info, avx512vl_i32_info>;
-  defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
-                                      avx512vl_i16_info, avx512vl_i64_info>;
-
-  defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
-                                      avx512vl_i32_info, avx512vl_i8_info>;
-  defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
-                                      avx512vl_i32_info, avx512vl_i16_info>;
-  defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
-                                      avx512vl_i32_info, avx512vl_i64_info>;
-
-  defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
-                                      avx512vl_i64_info, avx512vl_i8_info>;
-  defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
-                                      avx512vl_i64_info, avx512vl_i16_info>;
-  defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
-                                      avx512vl_i64_info, avx512vl_i32_info>;
-}
-
-defm : avx512_vptest_lowering_types<"VPTESTM", X86pcmpnem>;
-defm : avx512_vptest_lowering_types<"VPTESTNM", X86pcmpeqm>;
-
 //===----------------------------------------------------------------------===//
 // AVX-512  Shift instructions
 //===----------------------------------------------------------------------===//
@@ -6427,86 +6016,23 @@ multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
   }
 }
 
-defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SchedWriteVarVecShift>,
-              avx512_var_shift_w<0x12, "vpsllvw", shl, SchedWriteVarVecShift>;
+defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
+              avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
 
-defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SchedWriteVarVecShift>,
-              avx512_var_shift_w<0x11, "vpsravw", sra, SchedWriteVarVecShift>;
+defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
+              avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
 
-defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SchedWriteVarVecShift>,
-              avx512_var_shift_w<0x10, "vpsrlvw", srl, SchedWriteVarVecShift>;
+defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
+              avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
 
 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
 
-defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
-defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
-defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
-defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
 
-// Special handing for handling VPSRAV intrinsics.
-multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
-                                         list<Predicate> p> {
-  let Predicates = p in {
-    def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
-              (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
-               _.RC:$src2)>;
-    def : Pat<(_.VT (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2))),
-              (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
-               _.RC:$src1, addr:$src2)>;
-    def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                     (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
-              (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
-               _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
-    def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                     (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)),
-                     _.RC:$src0)),
-              (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
-               _.KRC:$mask, _.RC:$src1, addr:$src2)>;
-    def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                     (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
-              (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
-               _.RC:$src1, _.RC:$src2)>;
-    def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                     (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)),
-                     _.ImmAllZerosV)),
-              (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
-               _.RC:$src1, addr:$src2)>;
-  }
-}
-
-multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
-                                         list<Predicate> p> :
-           avx512_var_shift_int_lowering<InstrStr, _, p> {
-  let Predicates = p in {
-    def : Pat<(_.VT (X86vsrav _.RC:$src1,
-                     (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
-              (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
-               _.RC:$src1, addr:$src2)>;
-    def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                     (X86vsrav _.RC:$src1,
-                      (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
-                     _.RC:$src0)),
-              (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
-               _.KRC:$mask, _.RC:$src1, addr:$src2)>;
-    def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                     (X86vsrav _.RC:$src1,
-                      (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
-                     _.ImmAllZerosV)),
-              (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
-               _.RC:$src1, addr:$src2)>;
-  }
-}
-
-defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
-defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
-defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
 
 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
 let Predicates = [HasAVX512, NoVLX] in {
@@ -6827,17 +6353,20 @@ let Predicates = [HasAVX512] in {
   def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
                     (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
+  def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
+            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
+
+  // VMOVLPD patterns
+  def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
+            (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
 }
 
 let SchedRW = [WriteFStore] in {
+let mayStore = 1, hasSideEffects = 0 in
 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
                        (ins f64mem:$dst, VR128X:$src),
                        "vmovhps\t{$src, $dst|$dst, $src}",
-                       [(store (f64 (extractelt
-                                     (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
-                                                (bc_v2f64 (v4f32 VR128X:$src))),
-                                     (iPTR 0))), addr:$dst)]>,
-                       EVEX, EVEX_CD8<32, CD8VT2>;
+                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
                        (ins f64mem:$dst, VR128X:$src),
                        "vmovhpd\t{$src, $dst|$dst, $src}",
@@ -6845,12 +6374,11 @@ def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
                                      (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
                                      (iPTR 0))), addr:$dst)]>,
                        EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
+let mayStore = 1, hasSideEffects = 0 in
 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
                        (ins f64mem:$dst, VR128X:$src),
                        "vmovlps\t{$src, $dst|$dst, $src}",
-                       [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
-                                     (iPTR 0))), addr:$dst)]>,
-                       EVEX, EVEX_CD8<32, CD8VT2>;
+                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
                        (ins f64mem:$dst, VR128X:$src),
                        "vmovlpd\t{$src, $dst|$dst, $src}",
@@ -6903,7 +6431,7 @@ multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
-          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))), 1, 1>,
+          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
 }
 
@@ -6978,7 +6506,7 @@ multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
-          (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))),
+          (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
           1, 1, vselect, 1>,
           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
 }
@@ -7056,7 +6584,7 @@ multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
-          (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))),
+          (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
           1, 1, vselect, 1>,
           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
 }
@@ -7132,7 +6660,7 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
     def rb    : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
                      !strconcat(OpcodeStr,
-                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                              "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
                      !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
                      Sched<[SchedWriteFMA.Scl]>;
   }// isCodeGenOnly = 1
@@ -7151,7 +6679,7 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
                          (_.ScalarLdFrag addr:$src3)))),
                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
-                         _.FRC:$src3, (i32 imm:$rc)))), 0>;
+                         _.FRC:$src3, (i32 timm:$rc)))), 0>;
 
   defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
@@ -7159,7 +6687,7 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
                             (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
-                         _.FRC:$src1, (i32 imm:$rc)))), 1>;
+                         _.FRC:$src1, (i32 timm:$rc)))), 1>;
 
   // One pattern is 312 order so that the load is in a different place from the
   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
@@ -7169,7 +6697,7 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
                 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
                                  _.FRC:$src1, _.FRC:$src2))),
                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
-                         _.FRC:$src2, (i32 imm:$rc)))), 1>;
+                         _.FRC:$src2, (i32 timm:$rc)))), 1>;
   }
 }
 
@@ -7333,62 +6861,62 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
                 (RndOp _.FRC:$src2,
                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
-                       _.FRC:$src3, (i32 imm:$rc)))))),
+                       _.FRC:$src3, (i32 timm:$rc)))))),
               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
-               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
+               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
 
     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
                 (RndOp _.FRC:$src2, _.FRC:$src3,
                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
-                       (i32 imm:$rc)))))),
+                       (i32 timm:$rc)))))),
               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
-               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
+               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
 
     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
                (X86selects VK1WM:$mask,
                 (RndOp _.FRC:$src2,
                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
-                       _.FRC:$src3, (i32 imm:$rc)),
+                       _.FRC:$src3, (i32 timm:$rc)),
                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
                VR128X:$src1, VK1WM:$mask,
                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
-               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
+               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
 
     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
                (X86selects VK1WM:$mask,
                 (RndOp _.FRC:$src2, _.FRC:$src3,
                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
-                       (i32 imm:$rc)),
+                       (i32 timm:$rc)),
                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
                VR128X:$src1, VK1WM:$mask,
                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
-               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
+               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
 
     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
                (X86selects VK1WM:$mask,
                 (RndOp _.FRC:$src2,
                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
-                       _.FRC:$src3, (i32 imm:$rc)),
+                       _.FRC:$src3, (i32 timm:$rc)),
                 (_.EltVT ZeroFP)))))),
               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
                VR128X:$src1, VK1WM:$mask,
                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
-               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
+               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
 
     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
                (X86selects VK1WM:$mask,
                 (RndOp _.FRC:$src2, _.FRC:$src3,
                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
-                       (i32 imm:$rc)),
+                       (i32 timm:$rc)),
                 (_.EltVT ZeroFP)))))),
               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
                VR128X:$src1, VK1WM:$mask,
                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
-               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
+               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
   }
 }
 
@@ -7468,44 +6996,44 @@ defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
 // AVX-512  Scalar convert from sign integer to float/double
 //===----------------------------------------------------------------------===//
 
-multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched,
+multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
                     RegisterClass SrcRC, X86VectorVTInfo DstVT,
-                    X86MemOperand x86memop, PatFrag ld_frag, string asm> {
-  let hasSideEffects = 0 in {
+                    X86MemOperand x86memop, PatFrag ld_frag, string asm,
+                    string mem> {
+  let hasSideEffects = 0, isCodeGenOnly = 1 in {
     def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
               (ins DstVT.FRC:$src1, SrcRC:$src),
               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
-              EVEX_4V, Sched<[sched]>;
+              EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
     let mayLoad = 1 in
       def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
               (ins DstVT.FRC:$src1, x86memop:$src),
-              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+              asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
               EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
   } // hasSideEffects = 0
-  let isCodeGenOnly = 1 in {
-    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
-                  (ins DstVT.RC:$src1, SrcRC:$src2),
-                  !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                  [(set DstVT.RC:$dst,
-                        (OpNode (DstVT.VT DstVT.RC:$src1),
-                                 SrcRC:$src2,
-                                 (i32 FROUND_CURRENT)))]>,
-                 EVEX_4V, Sched<[sched]>;
-
-    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
-                  (ins DstVT.RC:$src1, x86memop:$src2),
-                  !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                  [(set DstVT.RC:$dst,
-                        (OpNode (DstVT.VT DstVT.RC:$src1),
-                                 (ld_frag addr:$src2),
-                                 (i32 FROUND_CURRENT)))]>,
-                  EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
-  }//isCodeGenOnly = 1
+  def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
+                (ins DstVT.RC:$src1, SrcRC:$src2),
+                !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                [(set DstVT.RC:$dst,
+                      (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
+               EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
+
+  def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
+                (ins DstVT.RC:$src1, x86memop:$src2),
+                asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                [(set DstVT.RC:$dst,
+                      (OpNode (DstVT.VT DstVT.RC:$src1),
+                               (ld_frag addr:$src2)))]>,
+                EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+  def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                  (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
+                  DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
 }
 
 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
                                X86FoldableSchedWrite sched, RegisterClass SrcRC,
-                               X86VectorVTInfo DstVT, string asm> {
+                               X86VectorVTInfo DstVT, string asm,
+                               string mem> {
   def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
               (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
               !strconcat(asm,
@@ -7513,37 +7041,44 @@ multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
               [(set DstVT.RC:$dst,
                     (OpNode (DstVT.VT DstVT.RC:$src1),
                              SrcRC:$src2,
-                             (i32 imm:$rc)))]>,
-              EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
+                             (i32 timm:$rc)))]>,
+              EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
+  def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
+                  (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
+                  DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
 }
 
-multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode,
+multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
                                 X86FoldableSchedWrite sched,
                                 RegisterClass SrcRC, X86VectorVTInfo DstVT,
-                                X86MemOperand x86memop, PatFrag ld_frag, string asm> {
-  defm NAME : avx512_vcvtsi_round<opc, OpNode, sched, SrcRC, DstVT, asm>,
+                                X86MemOperand x86memop, PatFrag ld_frag,
+                                string asm, string mem> {
+  defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
               avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
-                            ld_frag, asm>, VEX_LIG;
+                            ld_frag, asm, mem>, VEX_LIG;
 }
 
 let Predicates = [HasAVX512] in {
-defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR32,
-                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
+defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
+                                 WriteCvtI2SS, GR32,
+                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
                                  XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR64,
-                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
+defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
+                                 WriteCvtI2SS, GR64,
+                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VCVTSI2SDZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR32,
-                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
-                                 XD, EVEX_CD8<32, CD8VT1>;
-defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR64,
-                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
+defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
+                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l">,
+                                 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
+                                 WriteCvtI2SD, GR64,
+                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
 
 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
-              (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
+              (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
-              (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
+              (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
 
 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
           (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
@@ -7563,23 +7098,26 @@ def : Pat<(f64 (sint_to_fp GR32:$src)),
 def : Pat<(f64 (sint_to_fp GR64:$src)),
           (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
 
-defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR32,
+defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
+                                  WriteCvtI2SS, GR32,
                                   v4f32x_info, i32mem, loadi32,
-                                  "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR64,
-                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
+                                  "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
+                                  WriteCvtI2SS, GR64,
+                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
                                   XS, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR32, v2f64x_info,
-                                  i32mem, loadi32, "cvtusi2sd{l}">,
+defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
+                                  i32mem, loadi32, "cvtusi2sd", "l">,
                                   XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
-defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR64,
-                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
+defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
+                                  WriteCvtI2SD, GR64,
+                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
 
 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
-              (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
+              (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
-              (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
+              (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
 
 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
           (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
@@ -7608,8 +7146,7 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
                                   X86VectorVTInfo DstVT, SDNode OpNode,
                                   SDNode OpNodeRnd,
                                   X86FoldableSchedWrite sched, string asm,
-                                  string aliasStr,
-                                  bit CodeGenOnly = 1> {
+                                  string aliasStr> {
   let Predicates = [HasAVX512] in {
     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
@@ -7617,34 +7154,23 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
                 EVEX, VEX_LIG, Sched<[sched]>;
     def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
                  !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
-                 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
+                 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
                  EVEX, VEX_LIG, EVEX_B, EVEX_RC,
                  Sched<[sched]>;
-    let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
     def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
                 [(set DstVT.RC:$dst, (OpNode
                       (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
                 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
-
-    def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
-            (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
-    def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
-            (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
   } // Predicates = [HasAVX512]
-}
 
-multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
-                                          X86VectorVTInfo DstVT, SDNode OpNode,
-                                          SDNode OpNodeRnd,
-                                          X86FoldableSchedWrite sched, string asm,
-                                          string aliasStr> :
-  avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, OpNodeRnd, sched, asm, aliasStr, 0> {
-  let Predicates = [HasAVX512] in {
-    def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
-            (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
-                                            SrcVT.IntScalarMemOp:$src), 0, "att">;
-  } // Predicates = [HasAVX512]
+  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
+          (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
+  def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
+          (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
+  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
+          (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
+                                          SrcVT.IntScalarMemOp:$src), 0, "att">;
 }
 
 // Convert float/double to signed/unsigned int 32/64
@@ -7654,10 +7180,10 @@ defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
                                    X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info, X86cvts2usi,
+defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
                                    XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info, X86cvts2usi,
+defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
@@ -7666,10 +7192,10 @@ defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
                                    X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2USIZ:   avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info, X86cvts2usi,
+defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
                                    X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
                                    XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info, X86cvts2usi,
+defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
                                    X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
 
@@ -7760,19 +7286,18 @@ def : Pat<(v2f64 (X86Movsd
 // Convert float/double to signed/unsigned int 32/64 with truncation
 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
                             X86VectorVTInfo _DstRC, SDNode OpNode,
-                            SDNode OpNodeInt, SDNode OpNodeRnd,
-                            X86FoldableSchedWrite sched, string aliasStr,
-                            bit CodeGenOnly = 1>{
+                            SDNode OpNodeInt, SDNode OpNodeSAE,
+                            X86FoldableSchedWrite sched, string aliasStr>{
 let Predicates = [HasAVX512] in {
   let isCodeGenOnly = 1 in {
   def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
               [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
-              EVEX, Sched<[sched]>;
+              EVEX, VEX_LIG, Sched<[sched]>;
   def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
               [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
-              EVEX, Sched<[sched.Folded, sched.ReadAfterFold]>;
+              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 
   def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
@@ -7781,63 +7306,49 @@ let Predicates = [HasAVX512] in {
            EVEX, VEX_LIG, Sched<[sched]>;
   def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
             !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
-            [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
-                                  (i32 FROUND_NO_EXC)))]>,
-                                  EVEX,VEX_LIG , EVEX_B, Sched<[sched]>;
-  let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
+            [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
+                                  EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
   def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
               (ins _SrcRC.IntScalarMemOp:$src),
               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
               [(set _DstRC.RC:$dst,
                 (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
               EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
+} //HasAVX512
 
   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
           (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
   def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
           (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
-} //HasAVX512
-}
-
-multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm,
-                                     X86VectorVTInfo _SrcRC,
-                                     X86VectorVTInfo _DstRC, SDNode OpNode,
-                                     SDNode OpNodeInt, SDNode OpNodeRnd,
-                                     X86FoldableSchedWrite sched,
-                                     string aliasStr> :
-  avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeInt, OpNodeRnd, sched,
-                   aliasStr, 0> {
-let Predicates = [HasAVX512] in {
   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
           (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
                                           _SrcRC.IntScalarMemOp:$src), 0, "att">;
 }
-}
 
 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
-                        fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSS2I,
+                        fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
                         "{l}">, XS, EVEX_CD8<32, CD8VT1>;
 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
-                        fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSS2I,
+                        fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
                         "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
-                        fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSD2I,
+                        fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
                         "{l}">, XD, EVEX_CD8<64, CD8VT1>;
 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
-                        fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSD2I,
+                        fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
                         "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
 
-defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info,
-                        fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSS2I,
+defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
+                        fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
                         "{l}">, XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info,
-                        fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSS2I,
+defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
+                        fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
                         "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
-defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info,
-                        fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSD2I,
+defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
+                        fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
                         "{l}">, XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info,
-                        fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSD2I,
+defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
+                        fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
                         "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
 
 //===----------------------------------------------------------------------===//
@@ -7851,15 +7362,13 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
                          (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
                          "$src2, $src1", "$src1, $src2",
                          (_.VT (OpNode (_.VT _.RC:$src1),
-                                       (_Src.VT _Src.RC:$src2),
-                                       (i32 FROUND_CURRENT)))>,
+                                       (_Src.VT _Src.RC:$src2)))>,
                          EVEX_4V, VEX_LIG, Sched<[sched]>;
   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
                          (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
                          "$src2, $src1", "$src1, $src2",
                          (_.VT (OpNode (_.VT _.RC:$src1),
-                                  (_Src.VT _Src.ScalarIntMemCPat:$src2),
-                                  (i32 FROUND_CURRENT)))>,
+                                  (_Src.VT _Src.ScalarIntMemCPat:$src2)))>,
                          EVEX_4V, VEX_LIG,
                          Sched<[sched.Folded, sched.ReadAfterFold]>;
 
@@ -7878,14 +7387,13 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
 
 // Scalar Coversion with SAE - suppress all exceptions
 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
-                                    X86VectorVTInfo _Src, SDNode OpNodeRnd,
+                                    X86VectorVTInfo _Src, SDNode OpNodeSAE,
                                     X86FoldableSchedWrite sched> {
   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
                         "{sae}, $src2, $src1", "$src1, $src2, {sae}",
-                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
-                                         (_Src.VT _Src.RC:$src2),
-                                         (i32 FROUND_NO_EXC)))>,
+                        (_.VT (OpNodeSAE (_.VT _.RC:$src1),
+                                         (_Src.VT _Src.RC:$src2)))>,
                         EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
 }
 
@@ -7897,34 +7405,36 @@ multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInf
                         (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
                         "$rc, $src2, $src1", "$src1, $src2, $rc",
                         (_.VT (OpNodeRnd (_.VT _.RC:$src1),
-                                         (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>,
+                                         (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
                         EVEX_4V, VEX_LIG, Sched<[sched]>,
                         EVEX_B, EVEX_RC;
 }
 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
-                                  SDNode OpNodeRnd, X86FoldableSchedWrite sched,
-                                  X86VectorVTInfo _src, X86VectorVTInfo _dst> {
+                                      SDNode OpNode, SDNode OpNodeRnd,
+                                      X86FoldableSchedWrite sched,
+                                      X86VectorVTInfo _src, X86VectorVTInfo _dst> {
   let Predicates = [HasAVX512] in {
-    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
+    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
              avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
                                OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
   }
 }
 
-multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
+multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
+                                      SDNode OpNode, SDNode OpNodeSAE,
                                       X86FoldableSchedWrite sched,
                                       X86VectorVTInfo _src, X86VectorVTInfo _dst> {
   let Predicates = [HasAVX512] in {
-    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
-             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
+    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
+             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
              EVEX_CD8<32, CD8VT1>, XS;
   }
 }
-defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
-                                         X86froundRnd, WriteCvtSD2SS, f64x_info,
+defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
+                                         X86froundsRnd, WriteCvtSD2SS, f64x_info,
                                          f32x_info>;
-defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
-                                          X86fpextRnd, WriteCvtSS2SD, f32x_info,
+defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
+                                          X86fpextsSAE, WriteCvtSS2SD, f32x_info,
                                           f64x_info>;
 
 def : Pat<(f64 (fpextend FR32X:$src)),
@@ -7934,14 +7444,6 @@ def : Pat<(f64 (fpextend (loadf32 addr:$src))),
           (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
           Requires<[HasAVX512, OptForSize]>;
 
-def : Pat<(f64 (extloadf32 addr:$src)),
-          (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
-      Requires<[HasAVX512, OptForSize]>;
-
-def : Pat<(f64 (extloadf32 addr:$src)),
-          (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
-          Requires<[HasAVX512, OptForSpeed]>;
-
 def : Pat<(f32 (fpround FR64X:$src)),
           (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
            Requires<[HasAVX512]>;
@@ -7970,7 +7472,8 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                           X86FoldableSchedWrite sched,
                           string Broadcast = _.BroadcastStr,
                           string Alias = "", X86MemOperand MemOp = _Src.MemOp,
-                          RegisterClass MaskRC = _.KRCWM> {
+                          RegisterClass MaskRC = _.KRCWM,
+                          dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
 
   defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
                          (ins _Src.RC:$src),
@@ -7989,12 +7492,8 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                          (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
                          (ins MaskRC:$mask, MemOp:$src),
                          OpcodeStr#Alias, "$src", "$src",
-                         (_.VT (OpNode (_Src.VT
-                             (_Src.LdFrag addr:$src)))),
-                         (vselect MaskRC:$mask,
-                                  (_.VT (OpNode (_Src.VT
-                                                 (_Src.LdFrag addr:$src)))),
-                                  _.RC:$src0),
+                         LdDAG,
+                         (vselect MaskRC:$mask, LdDAG, _.RC:$src0),
                          vselect, "$src0 = $dst">,
                          EVEX, Sched<[sched.Folded]>;
 
@@ -8019,13 +7518,12 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
 }
 // Coversion with SAE - suppress all exceptions
 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
-                              X86VectorVTInfo _Src, SDNode OpNodeRnd,
+                              X86VectorVTInfo _Src, SDNode OpNodeSAE,
                               X86FoldableSchedWrite sched> {
   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                         (ins _Src.RC:$src), OpcodeStr,
                         "{sae}, $src", "$src, {sae}",
-                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
-                               (i32 FROUND_NO_EXC)))>,
+                        (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
                         EVEX, EVEX_B, Sched<[sched]>;
 }
 
@@ -8036,23 +7534,34 @@ multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                         (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
                         "$rc, $src", "$src, $rc",
-                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>,
+                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
 }
 
+// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
+multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+                                X86VectorVTInfo _Src, SDNode OpNode,
+                                X86FoldableSchedWrite sched,
+                                string Broadcast = _.BroadcastStr,
+                                string Alias = "", X86MemOperand MemOp = _Src.MemOp,
+                                RegisterClass MaskRC = _.KRCWM>
+  : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias,
+                   MemOp, MaskRC,
+                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
+
 // Extend Float to Double
 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
                            X86SchedWriteWidths sched> {
   let Predicates = [HasAVX512] in {
-    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
+    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
                             fpextend, sched.ZMM>,
              avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
-                                X86vfpextRnd, sched.ZMM>, EVEX_V512;
+                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
   }
   let Predicates = [HasVLX] in {
-    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
+    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
                                X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
-    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
+    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
                                sched.YMM>, EVEX_V256;
   }
 }
@@ -8060,7 +7569,7 @@ multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
 // Truncate Double to Float
 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
   let Predicates = [HasAVX512] in {
-    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, sched.ZMM>,
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86vfpround, sched.ZMM>,
              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
                                X86vfproundRnd, sched.ZMM>, EVEX_V512;
   }
@@ -8068,18 +7577,49 @@ multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sc
     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
                                null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
                                EVEX_V128;
-    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86vfpround,
                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
-
-    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
-                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
-    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
-                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">;
-    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
-                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
-    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
-                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">;
   }
+
+  def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
+                  VK2WM:$mask, VR128X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, $src}",
+                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
+                  VK2WM:$mask, VR128X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
+                  "$dst {${mask}}, ${src}{1to2}}",
+                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
+                  VK2WM:$mask, f64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, ${src}{1to2}}",
+                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
+                  VK2WM:$mask, f64mem:$src), 0, "att">;
+
+  def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
+                  VK4WM:$mask, VR256X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, $src}",
+                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
+                  VK4WM:$mask, VR256X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
+                  "$dst {${mask}}, ${src}{1to4}}",
+                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
+                  VK4WM:$mask, f64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, ${src}{1to4}}",
+                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
+                  VK4WM:$mask, f64mem:$src), 0, "att">;
 }
 
 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
@@ -8087,20 +7627,66 @@ defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
                                   PS, EVEX_CD8<32, CD8VH>;
 
-def : Pat<(v8f64 (extloadv8f32 addr:$src)),
-            (VCVTPS2PDZrm addr:$src)>;
+let Predicates = [HasAVX512] in {
+  def : Pat<(v8f32 (fpround (v8f64 VR512:$src))),
+            (VCVTPD2PSZrr VR512:$src)>;
+  def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
+                     VR256X:$src0),
+            (VCVTPD2PSZrrk VR256X:$src0, VK8WM:$mask, VR512:$src)>;
+  def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
+                     v8f32x_info.ImmAllZerosV),
+            (VCVTPD2PSZrrkz VK8WM:$mask, VR512:$src)>;
 
-let Predicates = [HasVLX] in {
-  def : Pat<(X86vzmovl (v2f64 (bitconvert
-                               (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
-            (VCVTPD2PSZ128rr VR128X:$src)>;
-  def : Pat<(X86vzmovl (v2f64 (bitconvert
-                               (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
-            (VCVTPD2PSZ128rm addr:$src)>;
-  def : Pat<(v2f64 (extloadv2f32 addr:$src)),
-              (VCVTPS2PDZ128rm addr:$src)>;
-  def : Pat<(v4f64 (extloadv4f32 addr:$src)),
-              (VCVTPS2PDZ256rm addr:$src)>;
+  def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
+            (VCVTPD2PSZrm addr:$src)>;
+  def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
+                     VR256X:$src0),
+            (VCVTPD2PSZrmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
+  def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
+                     v8f32x_info.ImmAllZerosV),
+            (VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>;
+
+  def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src))))),
+            (VCVTPD2PSZrmb addr:$src)>;
+  def : Pat<(vselect VK8WM:$mask,
+                     (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))),
+                     (v8f32 VR256X:$src0)),
+            (VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>;
+  def : Pat<(vselect VK8WM:$mask,
+                     (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))),
+                     v8f32x_info.ImmAllZerosV),
+            (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>;
+}
+
+let Predicates = [HasVLX] in {
+  def : Pat<(v4f32 (fpround (v4f64 VR256X:$src))),
+            (VCVTPD2PSZ256rr VR256X:$src)>;
+  def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
+                     VR128X:$src0),
+            (VCVTPD2PSZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
+  def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
+                     v4f32x_info.ImmAllZerosV),
+            (VCVTPD2PSZ256rrkz VK4WM:$mask, VR256X:$src)>;
+
+  def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
+            (VCVTPD2PSZ256rm addr:$src)>;
+  def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
+                     VR128X:$src0),
+            (VCVTPD2PSZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+  def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
+                     v4f32x_info.ImmAllZerosV),
+            (VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>;
+
+  def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
+            (VCVTPD2PSZ256rmb addr:$src)>;
+  def : Pat<(vselect VK4WM:$mask,
+                     (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
+                     VR128X:$src0),
+            (VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+  def : Pat<(vselect VK4WM:$mask,
+                     (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
+                     v4f32x_info.ImmAllZerosV),
+            (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>;
 
   // Special patterns to allow use of X86vmfpround for masking. Instruction
   // patterns have been disabled with null_frag.
@@ -8142,7 +7728,11 @@ multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
 
   let Predicates = [HasVLX] in {
     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
-                               OpNode128, sched.XMM, "{1to2}", "", i64mem>, EVEX_V128;
+                               OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM,
+                               (v2f64 (OpNode128 (bc_v4i32
+                                (v2i64
+                                 (scalar_to_vector (loadi64 addr:$src))))))>,
+                               EVEX_V128;
     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
                                sched.YMM>, EVEX_V256;
   }
@@ -8167,12 +7757,12 @@ multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
 
 // Convert Float to Signed/Unsigned Doubleword with truncation
 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
   let Predicates = [HasAVX512] in {
     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
                             sched.ZMM>,
              avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
-                                OpNodeRnd, sched.ZMM>, EVEX_V512;
+                                OpNodeSAE, sched.ZMM>, EVEX_V512;
   }
   let Predicates = [HasVLX] in {
     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
@@ -8201,12 +7791,12 @@ multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
 
 // Convert Double to Signed/Unsigned Doubleword with truncation
 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
   let Predicates = [HasAVX512] in {
     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
                             sched.ZMM>,
              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
-                                OpNodeRnd, sched.ZMM>, EVEX_V512;
+                                OpNodeSAE, sched.ZMM>, EVEX_V512;
   }
   let Predicates = [HasVLX] in {
     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
@@ -8218,16 +7808,49 @@ multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                VK2WM>, EVEX_V128;
     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
-
-    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
-                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
-    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
-                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">;
-    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
-                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
-    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
-                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">;
   }
+
+  def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
+                  VR128X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
+                  VK2WM:$mask, VR128X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
+                  VK2WM:$mask, VR128X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
+                  f64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
+                  "$dst {${mask}}, ${src}{1to2}}",
+                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
+                  VK2WM:$mask, f64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, ${src}{1to2}}",
+                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
+                  VK2WM:$mask, f64mem:$src), 0, "att">;
+
+  def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
+                  VR256X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
+                  VK4WM:$mask, VR256X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
+                  VK4WM:$mask, VR256X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
+                  f64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
+                  "$dst {${mask}}, ${src}{1to4}}",
+                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
+                  VK4WM:$mask, f64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, ${src}{1to4}}",
+                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
+                  VK4WM:$mask, f64mem:$src), 0, "att">;
 }
 
 // Convert Double to Signed/Unsigned Doubleword
@@ -8249,16 +7872,47 @@ multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                VK2WM>, EVEX_V128;
     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
-
-    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
-                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
-    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
-                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">;
-    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
-                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
-    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
-                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">;
   }
+
+  def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
+                  VK2WM:$mask, VR128X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
+                  VK2WM:$mask, VR128X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
+                  f64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
+                  "$dst {${mask}}, ${src}{1to2}}",
+                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
+                  VK2WM:$mask, f64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, ${src}{1to2}}",
+                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
+                  VK2WM:$mask, f64mem:$src), 0, "att">;
+
+  def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
+                  VK4WM:$mask, VR256X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
+                  VK4WM:$mask, VR256X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
+                  f64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
+                  "$dst {${mask}}, ${src}{1to4}}",
+                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
+                  VK4WM:$mask, f64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, ${src}{1to4}}",
+                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
+                  VK4WM:$mask, f64mem:$src), 0, "att">;
 }
 
 // Convert Double to Signed/Unsigned Quardword
@@ -8325,7 +7979,11 @@ multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
     // Explicitly specified broadcast string, since we take only 2 elements
     // from v4f32x_info source
     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
-                               sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
+                               sched.XMM, "{1to2}", "", f64mem, VK2WM,
+                               (v2i64 (OpNode (bc_v4f32
+                                (v2f64
+                                 (scalar_to_vector (loadf64 addr:$src))))))>,
+                               EVEX_V128;
     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
                                sched.YMM>, EVEX_V256;
   }
@@ -8343,7 +8001,11 @@ multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
     // Explicitly specified broadcast string, since we take only 2 elements
     // from v4f32x_info source
     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
-                               sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
+                               sched.XMM, "{1to2}", "", f64mem, VK2WM,
+                               (v2i64 (OpNode (bc_v4f32
+                                (v2f64
+                                 (scalar_to_vector (loadf64 addr:$src))))))>,
+                               EVEX_V128;
     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
                                sched.YMM>, EVEX_V256;
   }
@@ -8351,8 +8013,7 @@ multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
 
 // Convert Signed/Unsigned Quardword to Float
 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                           SDNode OpNode128, SDNode OpNodeRnd,
-                           X86SchedWriteWidths sched> {
+                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
   let Predicates = [HasDQI] in {
     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
                             sched.ZMM>,
@@ -8364,22 +8025,57 @@ multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
     // memory forms of these instructions in Asm Parcer. They have the same
     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
     // due to the same reason.
-    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
-                               sched.XMM, "{1to2}", "{x}">, EVEX_V128,
-                               NotEVEX2VEXConvertible;
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
+                               sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
+                               EVEX_V128, NotEVEX2VEXConvertible;
     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
                                sched.YMM, "{1to4}", "{y}">, EVEX_V256,
                                NotEVEX2VEXConvertible;
-
-    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
-                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
-    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
-                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">;
-    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
-                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
-    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
-                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">;
   }
+
+  def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
+                  VR128X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
+                  VK2WM:$mask, VR128X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
+                  VK2WM:$mask, VR128X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
+                  i64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
+                  "$dst {${mask}}, ${src}{1to2}}",
+                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
+                  VK2WM:$mask, i64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, ${src}{1to2}}",
+                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
+                  VK2WM:$mask, i64mem:$src), 0, "att">;
+
+  def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
+                  VR256X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|"
+                  "$dst {${mask}}, $src}",
+                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
+                  VK4WM:$mask, VR256X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, $src}",
+                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
+                  VK4WM:$mask, VR256X:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
+                  i64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
+                  "$dst {${mask}}, ${src}{1to4}}",
+                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
+                  VK4WM:$mask, i64mem:$src), 0, "att">;
+  def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+                  "$dst {${mask}} {z}, ${src}{1to4}}",
+                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
+                  VK4WM:$mask, i64mem:$src), 0, "att">;
 }
 
 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
@@ -8390,19 +8086,19 @@ defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
                                 PS, EVEX_CD8<32, CD8VF>;
 
 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si,
-                                X86cvttp2siRnd, SchedWriteCvtPS2DQ>,
+                                X86cvttp2siSAE, SchedWriteCvtPS2DQ>,
                                 XS, EVEX_CD8<32, CD8VF>;
 
 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si,
-                                 X86cvttp2siRnd, SchedWriteCvtPD2DQ>,
+                                 X86cvttp2siSAE, SchedWriteCvtPD2DQ>,
                                  PD, VEX_W, EVEX_CD8<64, CD8VF>;
 
 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui,
-                                 X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PS,
+                                 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS,
                                  EVEX_CD8<32, CD8VF>;
 
 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
-                                 X86cvttp2uiRnd, SchedWriteCvtPD2DQ>,
+                                 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>,
                                  PS, VEX_W, EVEX_CD8<64, CD8VF>;
 
 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
@@ -8446,19 +8142,19 @@ defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
                                  EVEX_CD8<32, CD8VH>;
 
 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si,
-                                 X86cvttp2siRnd, SchedWriteCvtPD2DQ>, VEX_W,
+                                 X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W,
                                  PD, EVEX_CD8<64, CD8VF>;
 
 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si,
-                                 X86cvttp2siRnd, SchedWriteCvtPS2DQ>, PD,
+                                 X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD,
                                  EVEX_CD8<32, CD8VH>;
 
 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui,
-                                 X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, VEX_W,
+                                 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W,
                                  PD, EVEX_CD8<64, CD8VF>;
 
 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
-                                 X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PD,
+                                 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD,
                                  EVEX_CD8<32, CD8VH>;
 
 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
@@ -8469,67 +8165,15 @@ defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
                             X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
                             EVEX_CD8<64, CD8VF>;
 
-defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
+defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
                             X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
                             EVEX_CD8<64, CD8VF>;
 
-defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
+defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
                             X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
                             EVEX_CD8<64, CD8VF>;
 
-let Predicates = [HasAVX512] in  {
-  def : Pat<(v16i32 (fp_to_sint (v16f32 VR512:$src))),
-            (VCVTTPS2DQZrr VR512:$src)>;
-  def : Pat<(v16i32 (fp_to_sint (loadv16f32 addr:$src))),
-            (VCVTTPS2DQZrm addr:$src)>;
-
-  def : Pat<(v16i32 (fp_to_uint (v16f32 VR512:$src))),
-            (VCVTTPS2UDQZrr VR512:$src)>;
-  def : Pat<(v16i32 (fp_to_uint (loadv16f32 addr:$src))),
-            (VCVTTPS2UDQZrm addr:$src)>;
-
-  def : Pat<(v8i32 (fp_to_sint (v8f64 VR512:$src))),
-            (VCVTTPD2DQZrr VR512:$src)>;
-  def : Pat<(v8i32 (fp_to_sint (loadv8f64 addr:$src))),
-            (VCVTTPD2DQZrm addr:$src)>;
-
-  def : Pat<(v8i32 (fp_to_uint (v8f64 VR512:$src))),
-            (VCVTTPD2UDQZrr VR512:$src)>;
-  def : Pat<(v8i32 (fp_to_uint (loadv8f64 addr:$src))),
-            (VCVTTPD2UDQZrm addr:$src)>;
-}
-
 let Predicates = [HasVLX] in {
-  def : Pat<(v4i32 (fp_to_sint (v4f32 VR128X:$src))),
-            (VCVTTPS2DQZ128rr VR128X:$src)>;
-  def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
-            (VCVTTPS2DQZ128rm addr:$src)>;
-
-  def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src))),
-            (VCVTTPS2UDQZ128rr VR128X:$src)>;
-  def : Pat<(v4i32 (fp_to_uint (loadv4f32 addr:$src))),
-            (VCVTTPS2UDQZ128rm addr:$src)>;
-
-  def : Pat<(v8i32 (fp_to_sint (v8f32 VR256X:$src))),
-            (VCVTTPS2DQZ256rr VR256X:$src)>;
-  def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
-            (VCVTTPS2DQZ256rm addr:$src)>;
-
-  def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src))),
-            (VCVTTPS2UDQZ256rr VR256X:$src)>;
-  def : Pat<(v8i32 (fp_to_uint (loadv8f32 addr:$src))),
-            (VCVTTPS2UDQZ256rm addr:$src)>;
-
-  def : Pat<(v4i32 (fp_to_sint (v4f64 VR256X:$src))),
-            (VCVTTPD2DQZ256rr VR256X:$src)>;
-  def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
-            (VCVTTPD2DQZ256rm addr:$src)>;
-
-  def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src))),
-            (VCVTTPD2UDQZ256rr VR256X:$src)>;
-  def : Pat<(v4i32 (fp_to_uint (loadv4f64 addr:$src))),
-            (VCVTTPD2UDQZ256rm addr:$src)>;
-
   // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
   // patterns have been disabled with null_frag.
   def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
@@ -8647,72 +8291,64 @@ let Predicates = [HasVLX] in {
             (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
 }
 
-let Predicates = [HasDQI] in {
-  def : Pat<(v8i64 (fp_to_sint (v8f32 VR256X:$src))),
-            (VCVTTPS2QQZrr VR256X:$src)>;
-  def : Pat<(v8i64 (fp_to_sint (loadv8f32 addr:$src))),
-            (VCVTTPS2QQZrm addr:$src)>;
-
-  def : Pat<(v8i64 (fp_to_uint (v8f32 VR256X:$src))),
-            (VCVTTPS2UQQZrr VR256X:$src)>;
-  def : Pat<(v8i64 (fp_to_uint (loadv8f32 addr:$src))),
-            (VCVTTPS2UQQZrm addr:$src)>;
-
-  def : Pat<(v8i64 (fp_to_sint (v8f64 VR512:$src))),
-            (VCVTTPD2QQZrr VR512:$src)>;
-  def : Pat<(v8i64 (fp_to_sint (loadv8f64 addr:$src))),
-            (VCVTTPD2QQZrm addr:$src)>;
-
-  def : Pat<(v8i64 (fp_to_uint (v8f64 VR512:$src))),
-            (VCVTTPD2UQQZrr VR512:$src)>;
-  def : Pat<(v8i64 (fp_to_uint (loadv8f64 addr:$src))),
-            (VCVTTPD2UQQZrm addr:$src)>;
-}
-
 let Predicates = [HasDQI, HasVLX] in {
-  def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src))),
-            (VCVTTPS2QQZ256rr VR128X:$src)>;
-  def : Pat<(v4i64 (fp_to_sint (loadv4f32 addr:$src))),
-            (VCVTTPS2QQZ256rm addr:$src)>;
-
-  def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src))),
-            (VCVTTPS2UQQZ256rr VR128X:$src)>;
-  def : Pat<(v4i64 (fp_to_uint (loadv4f32 addr:$src))),
-            (VCVTTPS2UQQZ256rm addr:$src)>;
-
-  def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src))),
-            (VCVTTPD2QQZ128rr VR128X:$src)>;
-  def : Pat<(v2i64 (fp_to_sint (loadv2f64 addr:$src))),
-            (VCVTTPD2QQZ128rm addr:$src)>;
-
-  def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src))),
-            (VCVTTPD2UQQZ128rr VR128X:$src)>;
-  def : Pat<(v2i64 (fp_to_uint (loadv2f64 addr:$src))),
-            (VCVTTPD2UQQZ128rm addr:$src)>;
-
-  def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src))),
-            (VCVTTPD2QQZ256rr VR256X:$src)>;
-  def : Pat<(v4i64 (fp_to_sint (loadv4f64 addr:$src))),
-            (VCVTTPD2QQZ256rm addr:$src)>;
-
-  def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src))),
-            (VCVTTPD2UQQZ256rr VR256X:$src)>;
-  def : Pat<(v4i64 (fp_to_uint (loadv4f64 addr:$src))),
-            (VCVTTPD2UQQZ256rm addr:$src)>;
+  def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
+            (VCVTPS2QQZ128rm addr:$src)>;
+  def : Pat<(v2i64 (vselect VK2WM:$mask,
+                            (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+                            VR128X:$src0)),
+            (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(v2i64 (vselect VK2WM:$mask,
+                            (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+                            v2i64x_info.ImmAllZerosV)),
+            (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+  def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
+            (VCVTPS2UQQZ128rm addr:$src)>;
+  def : Pat<(v2i64 (vselect VK2WM:$mask,
+                            (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+                            VR128X:$src0)),
+            (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(v2i64 (vselect VK2WM:$mask,
+                            (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+                            v2i64x_info.ImmAllZerosV)),
+            (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+  def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
+            (VCVTTPS2QQZ128rm addr:$src)>;
+  def : Pat<(v2i64 (vselect VK2WM:$mask,
+                            (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+                            VR128X:$src0)),
+            (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(v2i64 (vselect VK2WM:$mask,
+                            (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+                            v2i64x_info.ImmAllZerosV)),
+            (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+  def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
+            (VCVTTPS2UQQZ128rm addr:$src)>;
+  def : Pat<(v2i64 (vselect VK2WM:$mask,
+                            (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+                            VR128X:$src0)),
+            (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(v2i64 (vselect VK2WM:$mask,
+                            (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+                            v2i64x_info.ImmAllZerosV)),
+            (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
 }
 
 let Predicates = [HasAVX512, NoVLX] in {
-def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
+def : Pat<(v8i32 (X86cvttp2ui (v8f32 VR256X:$src1))),
           (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
 
-def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
+def : Pat<(v4i32 (X86cvttp2ui (v4f32 VR128X:$src1))),
           (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
 
-def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
+def : Pat<(v4i32 (X86cvttp2ui (v4f64 VR256X:$src1))),
           (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
                                  VR256X:$src1, sub_ymm)))), sub_xmm)>;
@@ -8738,80 +8374,117 @@ def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
 }
 
-let Predicates = [HasAVX512, HasVLX] in {
-  def : Pat<(X86vzmovl (v2i64 (bitconvert
-                              (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
-            (VCVTPD2DQZ128rr VR128X:$src)>;
-  def : Pat<(X86vzmovl (v2i64 (bitconvert
-                              (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
-            (VCVTPD2DQZ128rm addr:$src)>;
-  def : Pat<(X86vzmovl (v2i64 (bitconvert
-                               (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
-            (VCVTPD2UDQZ128rr VR128X:$src)>;
-  def : Pat<(X86vzmovl (v2i64 (bitconvert
-                              (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
-            (VCVTTPD2DQZ128rr VR128X:$src)>;
-  def : Pat<(X86vzmovl (v2i64 (bitconvert
-                              (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
-            (VCVTTPD2DQZ128rm addr:$src)>;
-  def : Pat<(X86vzmovl (v2i64 (bitconvert
-                               (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
-            (VCVTTPD2UDQZ128rr VR128X:$src)>;
-
-  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
-            (VCVTDQ2PDZ128rm addr:$src)>;
-  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
+let Predicates = [HasVLX] in {
+  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
             (VCVTDQ2PDZ128rm addr:$src)>;
-
-  def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
-            (VCVTUDQ2PDZ128rm addr:$src)>;
-  def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
+  def : Pat<(v2f64 (vselect VK2WM:$mask,
+                            (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
+                            VR128X:$src0)),
+            (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(v2f64 (vselect VK2WM:$mask,
+                            (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
+                            v2f64x_info.ImmAllZerosV)),
+            (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
+
+  def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
             (VCVTUDQ2PDZ128rm addr:$src)>;
-}
-
-let Predicates = [HasAVX512] in {
-  def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
-            (VCVTPD2PSZrm addr:$src)>;
-  def : Pat<(v8f64 (extloadv8f32 addr:$src)),
-            (VCVTPS2PDZrm addr:$src)>;
+  def : Pat<(v2f64 (vselect VK2WM:$mask,
+                            (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
+                            VR128X:$src0)),
+            (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(v2f64 (vselect VK2WM:$mask,
+                            (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
+                            v2f64x_info.ImmAllZerosV)),
+            (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
 }
 
 let Predicates = [HasDQI, HasVLX] in {
-  def : Pat<(X86vzmovl (v2f64 (bitconvert
-                              (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
+  // Special patterns to allow use of X86VMSintToFP for masking. Instruction
+  // patterns have been disabled with null_frag.
+  def : Pat<(v4f32 (X86VSintToFP (v2i64 VR128X:$src))),
             (VCVTQQ2PSZ128rr VR128X:$src)>;
-  def : Pat<(X86vzmovl (v2f64 (bitconvert
-                              (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
+  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
+                           VK2WM:$mask),
+            (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
+                           VK2WM:$mask),
+            (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+  def : Pat<(v4f32 (X86VSintToFP (loadv2i64 addr:$src))),
+            (VCVTQQ2PSZ128rm addr:$src)>;
+  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
+                           VK2WM:$mask),
+            (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
+                           VK2WM:$mask),
+            (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
+
+  def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))),
+            (VCVTQQ2PSZ128rmb addr:$src)>;
+  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
+                           (v4f32 VR128X:$src0), VK2WM:$mask),
+            (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
+                           v4f32x_info.ImmAllZerosV, VK2WM:$mask),
+            (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
+
+  // Special patterns to allow use of X86VMUintToFP for masking. Instruction
+  // patterns have been disabled with null_frag.
+  def : Pat<(v4f32 (X86VUintToFP (v2i64 VR128X:$src))),
             (VCVTUQQ2PSZ128rr VR128X:$src)>;
+  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
+                           VK2WM:$mask),
+            (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
+                           VK2WM:$mask),
+            (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+  def : Pat<(v4f32 (X86VUintToFP (loadv2i64 addr:$src))),
+            (VCVTUQQ2PSZ128rm addr:$src)>;
+  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
+                           VK2WM:$mask),
+            (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
+                           VK2WM:$mask),
+            (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
+
+  def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))),
+            (VCVTUQQ2PSZ128rmb addr:$src)>;
+  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
+                           (v4f32 VR128X:$src0), VK2WM:$mask),
+            (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
+                           v4f32x_info.ImmAllZerosV, VK2WM:$mask),
+            (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
 }
 
 let Predicates = [HasDQI, NoVLX] in {
-def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
+def : Pat<(v2i64 (X86cvttp2si (v2f64 VR128X:$src1))),
           (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
 
-def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
+def : Pat<(v4i64 (X86cvttp2si (v4f32 VR128X:$src1))),
           (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
            (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
                                   VR128X:$src1, sub_xmm)))), sub_ymm)>;
 
-def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
+def : Pat<(v4i64 (X86cvttp2si (v4f64 VR256X:$src1))),
           (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
 
-def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
+def : Pat<(v2i64 (X86cvttp2ui (v2f64 VR128X:$src1))),
           (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
 
-def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
+def : Pat<(v4i64 (X86cvttp2ui (v4f32 VR128X:$src1))),
           (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
            (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
                                   VR128X:$src1, sub_xmm)))), sub_ymm)>;
 
-def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
+def : Pat<(v4i64 (X86cvttp2ui (v4f64 VR256X:$src1))),
           (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
@@ -8870,8 +8543,7 @@ multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
   defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
                              (ins _src.RC:$src), "vcvtph2ps",
                              "{sae}, $src", "$src, {sae}",
-                             (X86cvtph2psRnd (_src.VT _src.RC:$src),
-                                             (i32 FROUND_NO_EXC))>,
+                             (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
                              T8PD, EVEX_B, Sched<[sched]>;
 }
 
@@ -8890,9 +8562,7 @@ let Predicates = [HasVLX] in {
                        EVEX_CD8<32, CD8VH>;
 
   // Pattern match vcvtph2ps of a scalar i64 load.
-  def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
-            (VCVTPH2PSZ128rm addr:$src)>;
-  def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
             (VCVTPH2PSZ128rm addr:$src)>;
   def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
               (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
@@ -9055,12 +8725,12 @@ multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
                            "$src2, $src1", "$src1, $src2",
                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
-                           EVEX_4V, Sched<[sched]>;
+                           EVEX_4V, VEX_LIG, Sched<[sched]>;
   defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
                          "$src2, $src1", "$src1, $src2",
                          (OpNode (_.VT _.RC:$src1),
-                          _.ScalarIntMemCPat:$src2)>, EVEX_4V,
+                          _.ScalarIntMemCPat:$src2)>, EVEX_4V, VEX_LIG,
                           Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 }
@@ -9129,47 +8799,45 @@ defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
 
 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
-                         SDNode OpNode, X86FoldableSchedWrite sched> {
+                         SDNode OpNode, SDNode OpNodeSAE,
+                         X86FoldableSchedWrite sched> {
   let ExeDomain = _.ExeDomain in {
   defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
                            "$src2, $src1", "$src1, $src2",
-                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
-                           (i32 FROUND_CURRENT))>,
+                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
                            Sched<[sched]>;
 
   defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
-                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
-                            (i32 FROUND_NO_EXC))>, EVEX_B,
-                            Sched<[sched]>;
+                            (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
+                            EVEX_B, Sched<[sched]>;
 
   defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
                          "$src2, $src1", "$src1, $src2",
-                         (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
-                         (i32 FROUND_CURRENT))>,
+                         (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>,
                          Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                        X86FoldableSchedWrite sched> {
-  defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, sched>,
-               EVEX_CD8<32, CD8VT1>;
-  defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, sched>,
-               EVEX_CD8<64, CD8VT1>, VEX_W;
+                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
+  defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
+                           sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG;
+  defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
+                           sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
 }
 
 let Predicates = [HasERI] in {
-  defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SchedWriteFRcp.Scl>,
-                              T8PD, EVEX_4V;
-  defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s,
+  defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
+                               SchedWriteFRcp.Scl>, T8PD, EVEX_4V;
+  defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
                                SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
 }
 
-defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds,
+defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
                               SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
 
@@ -9178,42 +8846,40 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
   let ExeDomain = _.ExeDomain in {
   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
-                         (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>,
+                         (OpNode (_.VT _.RC:$src))>,
                          Sched<[sched]>;
 
   defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
                          (OpNode (_.VT
-                             (bitconvert (_.LdFrag addr:$src))),
-                          (i32 FROUND_CURRENT))>,
+                             (bitconvert (_.LdFrag addr:$src))))>,
                           Sched<[sched.Folded, sched.ReadAfterFold]>;
 
   defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                          (ins _.ScalarMemOp:$src), OpcodeStr,
                          "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
                          (OpNode (_.VT
-                                  (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                                 (i32 FROUND_CURRENT))>, EVEX_B,
-                         Sched<[sched.Folded, sched.ReadAfterFold]>;
+                                  (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
+                         EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
-multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                          SDNode OpNode, X86FoldableSchedWrite sched> {
   let ExeDomain = _.ExeDomain in
   defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                         (ins _.RC:$src), OpcodeStr,
                         "{sae}, $src", "$src, {sae}",
-                        (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>,
+                        (OpNode (_.VT _.RC:$src))>,
                         EVEX_B, Sched<[sched]>;
 }
 
 multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                       X86SchedWriteWidths sched> {
+                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
    defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
-              avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
+              avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
               T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
    defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
-              avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
+              avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
               T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
 }
 
@@ -9221,24 +8887,32 @@ multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
                                   SDNode OpNode, X86SchedWriteWidths sched> {
   // Define only if AVX512VL feature is present.
   let Predicates = [HasVLX] in {
-    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, sched.XMM>,
-                                     EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
-    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, sched.YMM>,
-                                     EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
-    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, sched.XMM>,
-                                     EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
-    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, sched.YMM>,
-                                     EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
+    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
+                                sched.XMM>,
+                                EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
+    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
+                                sched.YMM>,
+                                EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
+    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
+                                sched.XMM>,
+                                EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
+    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
+                                sched.YMM>,
+                                EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
   }
 }
 
 let Predicates = [HasERI] in {
- defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SchedWriteFRsqrt>, EVEX;
- defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, SchedWriteFRcp>, EVEX;
- defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, SchedWriteFAdd>, EVEX;
-}
-defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFRnd>,
-                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
+ defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
+                            SchedWriteFRsqrt>, EVEX;
+ defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
+                            SchedWriteFRcp>, EVEX;
+ defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
+                            SchedWriteFAdd>, EVEX;
+}
+defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
+                            SchedWriteFRnd>,
+                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
                                           SchedWriteFRnd>, EVEX;
 
 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
@@ -9246,7 +8920,7 @@ multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
   let ExeDomain = _.ExeDomain in
   defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                          (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
-                         (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc)))>,
+                         (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
                          EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
 }
 
@@ -9312,23 +8986,21 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
     defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                          (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
                          "$src2, $src1", "$src1, $src2",
-                         (X86fsqrtRnds (_.VT _.RC:$src1),
-                                    (_.VT _.RC:$src2),
-                                    (i32 FROUND_CURRENT))>,
+                         (X86fsqrts (_.VT _.RC:$src1),
+                                    (_.VT _.RC:$src2))>,
                          Sched<[sched]>;
     defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
                          "$src2, $src1", "$src1, $src2",
-                         (X86fsqrtRnds (_.VT _.RC:$src1),
-                                    _.ScalarIntMemCPat:$src2,
-                                    (i32 FROUND_CURRENT))>,
+                         (X86fsqrts (_.VT _.RC:$src1),
+                                    _.ScalarIntMemCPat:$src2)>,
                          Sched<[sched.Folded, sched.ReadAfterFold]>;
     defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
                          "$rc, $src2, $src1", "$src1, $src2, $rc",
                          (X86fsqrtRnds (_.VT _.RC:$src1),
                                      (_.VT _.RC:$src2),
-                                     (i32 imm:$rc))>,
+                                     (i32 timm:$rc))>,
                          EVEX_B, EVEX_RC, Sched<[sched]>;
 
     let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
@@ -9383,8 +9055,8 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
   defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                          (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
                          "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
-                         (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2),
-                         (i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B,
+                         (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+                         (i32 imm:$src3)))>, EVEX_B,
                          Sched<[sched]>;
 
   defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -9410,50 +9082,26 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
   }
 
   let Predicates = [HasAVX512] in {
-    def : Pat<(ffloor _.FRC:$src),
-              (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
-               _.FRC:$src, (i32 0x9)))>;
-    def : Pat<(fceil _.FRC:$src),
-              (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
-               _.FRC:$src, (i32 0xa)))>;
-    def : Pat<(ftrunc _.FRC:$src),
-              (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
-               _.FRC:$src, (i32 0xb)))>;
-    def : Pat<(frint _.FRC:$src),
+    def : Pat<(X86VRndScale _.FRC:$src1, imm:$src2),
               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
-               _.FRC:$src, (i32 0x4)))>;
-    def : Pat<(fnearbyint _.FRC:$src),
-              (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
-               _.FRC:$src, (i32 0xc)))>;
+               _.FRC:$src1, imm:$src2))>;
   }
 
   let Predicates = [HasAVX512, OptForSize] in {
-    def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
-              (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
-               addr:$src, (i32 0x9)))>;
-    def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
-              (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
-               addr:$src, (i32 0xa)))>;
-    def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
-              (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
-               addr:$src, (i32 0xb)))>;
-    def : Pat<(frint (_.ScalarLdFrag addr:$src)),
+    def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), imm:$src2),
               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
-               addr:$src, (i32 0x4)))>;
-    def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
-              (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
-               addr:$src, (i32 0xc)))>;
+               addr:$src1, imm:$src2))>;
   }
 }
 
 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
                                            SchedWriteFRnd.Scl, f32x_info>,
-                                           AVX512AIi8Base, EVEX_4V,
+                                           AVX512AIi8Base, EVEX_4V, VEX_LIG,
                                            EVEX_CD8<32, CD8VT1>;
 
 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
                                            SchedWriteFRnd.Scl, f64x_info>,
-                                           VEX_W, AVX512AIi8Base, EVEX_4V,
+                                           VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
                                            EVEX_CD8<64, CD8VT1>;
 
 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
@@ -9481,32 +9129,6 @@ defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
                             fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
 
-multiclass avx512_masked_scalar_imm<SDNode OpNode, string OpcPrefix, SDNode Move,
-                                    X86VectorVTInfo _, PatLeaf ZeroFP,
-                                    bits<8> ImmV, Predicate BasePredicate> {
-  let Predicates = [BasePredicate] in {
-    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
-               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
-               (extractelt _.VT:$dst, (iPTR 0))))),
-              (!cast<Instruction>("V"#OpcPrefix#Zr_Intk)
-               _.VT:$dst, VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
-
-    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
-               (OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))),
-              (!cast<Instruction>("V"#OpcPrefix#Zr_Intkz)
-               VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
-  }
-}
-
-defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss,
-                                v4f32x_info, fp32imm0, 0x01, HasAVX512>;
-defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss,
-                                v4f32x_info, fp32imm0, 0x02, HasAVX512>;
-defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd,
-                                v2f64x_info, fp64imm0, 0x01, HasAVX512>;
-defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
-                                v2f64x_info, fp64imm0, 0x02,  HasAVX512>;
-
 
 //-------------------------------------------------
 // Integer truncate and extend operations
@@ -9966,26 +9588,14 @@ multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
   let Predicates = [HasVLX, HasBWI] in {
     def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
               (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
-    def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
-              (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
-    def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
-              (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
   }
 
   let Predicates = [HasVLX] in {
     def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
               (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
-    def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
-              (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
-    def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
-              (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
 
     def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
               (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
-    def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
-              (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
-    def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
-              (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
   }
 
   // 512-bit patterns
@@ -10007,41 +9617,6 @@ multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
   }
 }
 
-multiclass AVX512_pmovx_patterns_aext<string OpcPrefix, SDNode ExtOp> :
-    AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
-  let Predicates = [HasVLX, HasBWI] in {
-    def : Pat<(v16i16 (ExtOp (v16i8 VR128X:$src))),
-              (!cast<I>(OpcPrefix#BWZ256rr) VR128X:$src)>;
-  }
-
-  let Predicates = [HasVLX] in {
-    def : Pat<(v8i32 (ExtOp (v8i16 VR128X:$src))),
-              (!cast<I>(OpcPrefix#WDZ256rr) VR128X:$src)>;
-
-    def : Pat<(v4i64 (ExtOp (v4i32 VR128X:$src))),
-              (!cast<I>(OpcPrefix#DQZ256rr) VR128X:$src)>;
-  }
-
-  // 512-bit patterns
-  let Predicates = [HasBWI] in {
-    def : Pat<(v32i16 (ExtOp (v32i8 VR256X:$src))),
-              (!cast<I>(OpcPrefix#BWZrr) VR256X:$src)>;
-  }
-  let Predicates = [HasAVX512] in {
-    def : Pat<(v16i32 (ExtOp (v16i8 VR128X:$src))),
-              (!cast<I>(OpcPrefix#BDZrr) VR128X:$src)>;
-    def : Pat<(v16i32 (ExtOp (v16i16 VR256X:$src))),
-              (!cast<I>(OpcPrefix#WDZrr) VR256X:$src)>;
-
-    def : Pat<(v8i64 (ExtOp (v8i16 VR128X:$src))),
-              (!cast<I>(OpcPrefix#WQZrr) VR128X:$src)>;
-
-    def : Pat<(v8i64 (ExtOp (v8i32 VR256X:$src))),
-              (!cast<I>(OpcPrefix#DQZrr) VR256X:$src)>;
-  }
-}
-
-
 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
                                  SDNode InVecOp> :
     AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
@@ -10051,103 +9626,62 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
-  def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
-  def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
-  def : Pat<(v8i16 (InVecOp (loadv16i8 addr:$src))),
+  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
   }
   let Predicates = [HasVLX] in {
   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
-  def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
-            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
-  def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
-  def : Pat<(v4i32 (InVecOp (loadv16i8 addr:$src))),
+  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
 
   def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
-  def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
-            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
-  def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
-  def : Pat<(v2i64 (InVecOp (loadv16i8 addr:$src))),
-            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
 
   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
-  def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
-  def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
-  def : Pat<(v4i32 (InVecOp (loadv8i16 addr:$src))),
+  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
 
   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
-  def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
-            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
-  def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
-  def : Pat<(v2i64 (InVecOp (loadv8i16 addr:$src))),
+  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
 
   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
-  def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
-  def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
-  def : Pat<(v2i64 (InVecOp (loadv4i32 addr:$src))),
+  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
   }
   let Predicates = [HasVLX] in {
   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
-  def : Pat<(v8i32 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
-  def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
-  def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))),
+  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
 
   def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
-  def : Pat<(v4i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
-            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
-  def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
-  def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))),
+  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
 
   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
-  def : Pat<(v4i64 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
-  def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
-  def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))),
+  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
   }
   // 512-bit patterns
   let Predicates = [HasAVX512] in {
   def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
-  def : Pat<(v8i64 (InVecOp (loadv16i8 addr:$src))),
-            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
   }
 }
 
 defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
 defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
-defm : AVX512_pmovx_patterns_aext<"VPMOVZX", anyext>;
 
 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
 // ext+trunc aggresively making it impossible to legalize the DAG to this
@@ -10155,22 +9689,8 @@ defm : AVX512_pmovx_patterns_aext<"VPMOVZX", anyext>;
 let Predicates = [HasAVX512, NoBWI] in {
 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
          (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
-def: Pat<(v16i8 (trunc (bc_v16i16 (loadv4i64 addr:$src)))),
+def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
          (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
-def: Pat<(store (v16i8 (trunc (v16i16 VR256X:$src))), addr:$dst),
-         (VPMOVDBZmr addr:$dst, (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
-}
-
-// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
-// ext+trunc aggresively making it impossible to legalize the DAG to this
-// pattern directly.
-let Predicates = [HasAVX512, NoBWI] in {
-def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
-         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
-def: Pat<(v16i8 (trunc (bc_v16i16 (loadv4i64 addr:$src)))),
-         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
-def: Pat<(store (v16i8 (trunc (v16i16 VR256X:$src))), addr:$dst),
-         (VPMOVDBZmr addr:$dst, (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -10457,7 +9977,7 @@ multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
                                  string OpcodeStr, X86FoldableSchedWrite sched> {
   defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
-              (_.VT (X86compress _.RC:$src1))>, AVX5128IBase,
+              (null_frag)>, AVX5128IBase,
               Sched<[sched]>;
 
   let mayStore = 1, hasSideEffects = 0 in
@@ -10479,6 +9999,13 @@ multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
   def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
             (!cast<Instruction>(Name#_.ZSuffix##mrk)
                             addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
+
+  def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
+            (!cast<Instruction>(Name#_.ZSuffix##rrk)
+                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
+  def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
+            (!cast<Instruction>(Name#_.ZSuffix##rrkz)
+                            _.KRCWM:$mask, _.RC:$src)>;
 }
 
 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
@@ -10512,13 +10039,12 @@ multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
                                  string OpcodeStr, X86FoldableSchedWrite sched> {
   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
-              (_.VT (X86expand _.RC:$src1))>, AVX5128IBase,
+              (null_frag)>, AVX5128IBase,
               Sched<[sched]>;
 
   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
               (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
-              (_.VT (X86expand (_.VT (bitconvert
-                                      (_.LdFrag addr:$src1)))))>,
+              (null_frag)>,
             AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
             Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
@@ -10537,6 +10063,13 @@ multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
                                                (_.VT _.RC:$src0))),
             (!cast<Instruction>(Name#_.ZSuffix##rmk)
                             _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
+
+  def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
+            (!cast<Instruction>(Name#_.ZSuffix##rrk)
+                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
+  def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
+            (!cast<Instruction>(Name#_.ZSuffix##rrkz)
+                            _.KRCWM:$mask, _.RC:$src)>;
 }
 
 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
@@ -10603,18 +10136,17 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
                       OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
                       "$src1, {sae}, $src2",
                       (OpNode (_.VT _.RC:$src1),
-                              (i32 imm:$src2),
-                              (i32 FROUND_NO_EXC))>,
+                              (i32 imm:$src2))>,
                       EVEX_B, Sched<[sched]>;
 }
 
 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
-            SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
+            SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
   let Predicates = [prd] in {
     defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
                                            _.info512>,
-                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
+                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
                                                sched.ZMM, _.info512>, EVEX_V512;
   }
   let Predicates = [prd, HasVLX] in {
@@ -10733,8 +10265,7 @@ multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
                       "$src1, $src2, {sae}, $src3",
                       (OpNode (_.VT _.RC:$src1),
                               (_.VT _.RC:$src2),
-                              (i32 imm:$src3),
-                              (i32 FROUND_NO_EXC))>,
+                              (i32 imm:$src3))>,
                       EVEX_B, Sched<[sched]>;
 }
 
@@ -10748,17 +10279,16 @@ multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode
                       "$src1, $src2, {sae}, $src3",
                       (OpNode (_.VT _.RC:$src1),
                               (_.VT _.RC:$src2),
-                              (i32 imm:$src3),
-                              (i32 FROUND_NO_EXC))>,
+                              (i32 imm:$src3))>,
                       EVEX_B, Sched<[sched]>;
 }
 
 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
-            SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
+            SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
   let Predicates = [prd] in {
     defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
-                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, sched.ZMM, _.info512>,
+                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
                                   EVEX_V512;
 
   }
@@ -10802,267 +10332,64 @@ multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
 
 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
                   X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
-                  SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd> {
+                  SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
   let Predicates = [prd] in {
      defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
-              avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, sched.XMM, _>;
+              avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
   }
 }
 
 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
                     bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
-                    SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
+                    SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
   defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
-                            opcPs, OpNode, OpNodeRnd, sched, prd>,
+                            opcPs, OpNode, OpNodeSAE, sched, prd>,
                             EVEX_CD8<32, CD8VF>;
   defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
-                            opcPd, OpNode, OpNodeRnd, sched, prd>,
+                            opcPd, OpNode, OpNodeSAE, sched, prd>,
                             EVEX_CD8<64, CD8VF>, VEX_W;
 }
 
 defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
-                              X86VReduce, X86VReduceRnd, SchedWriteFRnd, HasDQI>,
+                              X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>,
                               AVX512AIi8Base, EVEX;
 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
-                              X86VRndScale, X86VRndScaleRnd, SchedWriteFRnd, HasAVX512>,
+                              X86VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
                               AVX512AIi8Base, EVEX;
 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
-                              X86VGetMant, X86VGetMantRnd, SchedWriteFRnd, HasAVX512>,
+                              X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>,
                               AVX512AIi8Base, EVEX;
 
 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
-                                                0x50, X86VRange, X86VRangeRnd,
+                                                0x50, X86VRange, X86VRangeSAE,
                                                 SchedWriteFAdd, HasDQI>,
       AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
-                                                0x50, X86VRange, X86VRangeRnd,
+                                                0x50, X86VRange, X86VRangeSAE,
                                                 SchedWriteFAdd, HasDQI>,
       AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
 
 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
-      f64x_info, 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
+      f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
-      0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
+      0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
 
 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
-      0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
+      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
-      0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
+      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
 
 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
-      0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
+      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
-      0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
+      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
 
-
-multiclass AVX512_rndscale_lowering<X86VectorVTInfo _, string Suffix> {
-  // Register
-  def : Pat<(_.VT (ffloor _.RC:$src)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
-             _.RC:$src, (i32 0x9))>;
-  def : Pat<(_.VT (fnearbyint _.RC:$src)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
-             _.RC:$src, (i32 0xC))>;
-  def : Pat<(_.VT (fceil _.RC:$src)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
-             _.RC:$src, (i32 0xA))>;
-  def : Pat<(_.VT (frint _.RC:$src)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
-             _.RC:$src, (i32 0x4))>;
-  def : Pat<(_.VT (ftrunc _.RC:$src)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
-             _.RC:$src, (i32 0xB))>;
-
-  // Merge-masking
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src), _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
-             _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src), _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
-             _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src), _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
-             _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src), _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
-             _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src), _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
-             _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;
-
-  // Zero-masking
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
-             _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
-             _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
-             _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
-             _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
-             _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;
-
-  // Load
-  def : Pat<(_.VT (ffloor (_.LdFrag addr:$src))),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
-             addr:$src, (i32 0x9))>;
-  def : Pat<(_.VT (fnearbyint (_.LdFrag addr:$src))),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
-             addr:$src, (i32 0xC))>;
-  def : Pat<(_.VT (fceil (_.LdFrag addr:$src))),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
-             addr:$src, (i32 0xA))>;
-  def : Pat<(_.VT (frint (_.LdFrag addr:$src))),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
-             addr:$src, (i32 0x4))>;
-  def : Pat<(_.VT (ftrunc (_.LdFrag addr:$src))),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
-             addr:$src, (i32 0xB))>;
-
-  // Merge-masking + load
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
-                           _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
-             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
-                           _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
-             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
-                           _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
-             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
-                           _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
-             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
-                           _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
-             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;
-
-  // Zero-masking + load
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
-             _.KRCWM:$mask, addr:$src, (i32 0x9))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
-             _.KRCWM:$mask, addr:$src, (i32 0xC))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
-             _.KRCWM:$mask, addr:$src, (i32 0xA))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
-             _.KRCWM:$mask, addr:$src, (i32 0x4))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
-             _.KRCWM:$mask, addr:$src, (i32 0xB))>;
-
-  // Broadcast load
-  def : Pat<(_.VT (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
-             addr:$src, (i32 0x9))>;
-  def : Pat<(_.VT (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
-             addr:$src, (i32 0xC))>;
-  def : Pat<(_.VT (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
-             addr:$src, (i32 0xA))>;
-  def : Pat<(_.VT (frint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
-             addr:$src, (i32 0x4))>;
-  def : Pat<(_.VT (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
-             addr:$src, (i32 0xB))>;
-
-  // Merge-masking + broadcast load
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                           (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                           _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
-             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                           (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                           _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
-             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                           (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                           _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
-             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                           (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                           _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
-             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                           (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                           _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
-             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;
-
-  // Zero-masking + broadcast load
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                           (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
-             _.KRCWM:$mask, addr:$src, (i32 0x9))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                           (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
-             _.KRCWM:$mask, addr:$src, (i32 0xC))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                           (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
-             _.KRCWM:$mask, addr:$src, (i32 0xA))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                           (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
-             _.KRCWM:$mask, addr:$src, (i32 0x4))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                           (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
-             _.KRCWM:$mask, addr:$src, (i32 0xB))>;
-}
-
-let Predicates = [HasAVX512] in {
-  defm : AVX512_rndscale_lowering<v16f32_info, "PS">;
-  defm : AVX512_rndscale_lowering<v8f64_info,  "PD">;
-}
-
-let Predicates = [HasVLX] in {
-  defm : AVX512_rndscale_lowering<v8f32x_info, "PS">;
-  defm : AVX512_rndscale_lowering<v4f64x_info, "PD">;
-  defm : AVX512_rndscale_lowering<v4f32x_info, "PS">;
-  defm : AVX512_rndscale_lowering<v2f64x_info, "PD">;
-}
-
 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
                                           X86FoldableSchedWrite sched,
                                           X86VectorVTInfo _,
@@ -11544,9 +10871,9 @@ def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
           (VMOVDDUPZ128rm addr:$src)>;
 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
           (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
-def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
+def : Pat<(v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
           (VMOVDDUPZ128rm addr:$src)>;
-def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))),
+def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
           (VMOVDDUPZ128rm addr:$src)>;
 
 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
@@ -11554,21 +10881,21 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
           (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
                            (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
-                   (bitconvert (v4i32 immAllZerosV))),
+                   immAllZerosV),
           (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
 
 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
                    (v2f64 VR128X:$src0)),
           (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
-                   (bitconvert (v4i32 immAllZerosV))),
+                   immAllZerosV),
           (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
 
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
+def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
                    (v2f64 VR128X:$src0)),
           (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
-                   (bitconvert (v4i32 immAllZerosV))),
+def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
+                   immAllZerosV),
           (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
 }
 
@@ -12067,39 +11394,39 @@ defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
 // TODO: We should maybe have a more generalized algorithm for folding to
 // vpternlog.
 let Predicates = [HasAVX512] in {
-  def : Pat<(xor VR512:$src, (bc_v64i8 (v16i32 immAllOnesV))),
+  def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)),
             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
-  def : Pat<(xor VR512:$src, (bc_v32i16 (v16i32 immAllOnesV))),
+  def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)),
             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
-  def : Pat<(xor VR512:$src, (bc_v16i32 (v16i32 immAllOnesV))),
+  def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)),
             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
-  def : Pat<(xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV))),
+  def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)),
             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
 }
 
 let Predicates = [HasAVX512, NoVLX] in {
-  def : Pat<(xor VR128X:$src, (bc_v16i8 (v4i32 immAllOnesV))),
+  def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
             (EXTRACT_SUBREG
              (VPTERNLOGQZrri
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
               (i8 15)), sub_xmm)>;
-  def : Pat<(xor VR128X:$src, (bc_v8i16 (v4i32 immAllOnesV))),
+  def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
             (EXTRACT_SUBREG
              (VPTERNLOGQZrri
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
               (i8 15)), sub_xmm)>;
-  def : Pat<(xor VR128X:$src, (bc_v4i32 (v4i32 immAllOnesV))),
+  def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
             (EXTRACT_SUBREG
              (VPTERNLOGQZrri
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
               (i8 15)), sub_xmm)>;
-  def : Pat<(xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV))),
+  def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
             (EXTRACT_SUBREG
              (VPTERNLOGQZrri
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
@@ -12107,28 +11434,28 @@ let Predicates = [HasAVX512, NoVLX] in {
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
               (i8 15)), sub_xmm)>;
 
-  def : Pat<(xor VR256X:$src, (bc_v32i8 (v8i32 immAllOnesV))),
+  def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
             (EXTRACT_SUBREG
              (VPTERNLOGQZrri
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
               (i8 15)), sub_ymm)>;
-  def : Pat<(xor VR256X:$src, (bc_v16i16 (v8i32 immAllOnesV))),
+  def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
             (EXTRACT_SUBREG
              (VPTERNLOGQZrri
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
               (i8 15)), sub_ymm)>;
-  def : Pat<(xor VR256X:$src, (bc_v8i32 (v8i32 immAllOnesV))),
+  def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
             (EXTRACT_SUBREG
              (VPTERNLOGQZrri
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
               (i8 15)), sub_ymm)>;
-  def : Pat<(xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV))),
+  def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
             (EXTRACT_SUBREG
              (VPTERNLOGQZrri
               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
@@ -12138,22 +11465,22 @@ let Predicates = [HasAVX512, NoVLX] in {
 }
 
 let Predicates = [HasVLX] in {
-  def : Pat<(xor VR128X:$src, (bc_v16i8 (v4i32 immAllOnesV))),
+  def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
-  def : Pat<(xor VR128X:$src, (bc_v8i16 (v4i32 immAllOnesV))),
+  def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
-  def : Pat<(xor VR128X:$src, (bc_v4i32 (v4i32 immAllOnesV))),
+  def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
-  def : Pat<(xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV))),
+  def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
 
-  def : Pat<(xor VR256X:$src, (bc_v32i8 (v8i32 immAllOnesV))),
+  def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
-  def : Pat<(xor VR256X:$src, (bc_v16i16 (v8i32 immAllOnesV))),
+  def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
-  def : Pat<(xor VR256X:$src, (bc_v8i32 (v8i32 immAllOnesV))),
+  def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
-  def : Pat<(xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV))),
+  def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
 }
 
@@ -12161,58 +11488,55 @@ let Predicates = [HasVLX] in {
 // AVX-512 - FixupImm
 //===----------------------------------------------------------------------===//
 
-multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
                                   X86VectorVTInfo TblVT>{
   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
     defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
                         (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
                          OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
-                        (OpNode (_.VT _.RC:$src1),
-                                (_.VT _.RC:$src2),
-                                (TblVT.VT _.RC:$src3),
-                                (i32 imm:$src4),
-                                (i32 FROUND_CURRENT))>, Sched<[sched]>;
+                        (X86VFixupimm (_.VT _.RC:$src1),
+                                      (_.VT _.RC:$src2),
+                                      (TblVT.VT _.RC:$src3),
+                                      (i32 imm:$src4))>, Sched<[sched]>;
     defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
                       (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
                       OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
-                      (OpNode (_.VT _.RC:$src1),
-                              (_.VT _.RC:$src2),
-                              (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
-                              (i32 imm:$src4),
-                              (i32 FROUND_CURRENT))>,
+                      (X86VFixupimm (_.VT _.RC:$src1),
+                                    (_.VT _.RC:$src2),
+                                    (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
+                                    (i32 imm:$src4))>,
                       Sched<[sched.Folded, sched.ReadAfterFold]>;
     defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
                       (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
                     OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
                     "$src2, ${src3}"##_.BroadcastStr##", $src4",
-                      (OpNode (_.VT _.RC:$src1),
-                              (_.VT _.RC:$src2),
-                              (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
-                              (i32 imm:$src4),
-                              (i32 FROUND_CURRENT))>,
+                      (X86VFixupimm (_.VT _.RC:$src1),
+                                    (_.VT _.RC:$src2),
+                                    (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
+                                    (i32 imm:$src4))>,
                     EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
   } // Constraints = "$src1 = $dst"
 }
 
 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
-                                      SDNode OpNode, X86FoldableSchedWrite sched,
-                                      X86VectorVTInfo _, X86VectorVTInfo TblVT>{
+                                      X86FoldableSchedWrite sched,
+                                      X86VectorVTInfo _, X86VectorVTInfo TblVT>
+  : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
   defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
                       OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
                       "$src2, $src3, {sae}, $src4",
-                      (OpNode (_.VT _.RC:$src1),
-                                (_.VT _.RC:$src2),
-                                (TblVT.VT _.RC:$src3),
-                                (i32 imm:$src4),
-                                (i32 FROUND_NO_EXC))>,
+                      (X86VFixupimmSAE (_.VT _.RC:$src1),
+                                       (_.VT _.RC:$src2),
+                                       (TblVT.VT _.RC:$src3),
+                                       (i32 imm:$src4))>,
                       EVEX_B, Sched<[sched]>;
   }
 }
 
-multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
                                   X86VectorVTInfo _src3VT> {
   let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
@@ -12220,30 +11544,27 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
     defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
                       OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
-                      (OpNode (_.VT _.RC:$src1),
-                              (_.VT _.RC:$src2),
-                              (_src3VT.VT _src3VT.RC:$src3),
-                              (i32 imm:$src4),
-                              (i32 FROUND_CURRENT))>, Sched<[sched]>;
+                      (X86VFixupimms (_.VT _.RC:$src1),
+                                     (_.VT _.RC:$src2),
+                                     (_src3VT.VT _src3VT.RC:$src3),
+                                     (i32 imm:$src4))>, Sched<[sched]>;
     defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
                       OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
                       "$src2, $src3, {sae}, $src4",
-                      (OpNode (_.VT _.RC:$src1),
-                              (_.VT _.RC:$src2),
-                              (_src3VT.VT _src3VT.RC:$src3),
-                              (i32 imm:$src4),
-                              (i32 FROUND_NO_EXC))>,
+                      (X86VFixupimmSAEs (_.VT _.RC:$src1),
+                                        (_.VT _.RC:$src2),
+                                        (_src3VT.VT _src3VT.RC:$src3),
+                                        (i32 imm:$src4))>,
                       EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
     defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
-                     (OpNode (_.VT _.RC:$src1),
-                             (_.VT _.RC:$src2),
-                             (_src3VT.VT (scalar_to_vector
-                                       (_src3VT.ScalarLdFrag addr:$src3))),
-                             (i32 imm:$src4),
-                             (i32 FROUND_CURRENT))>,
+                     (X86VFixupimms (_.VT _.RC:$src1),
+                                    (_.VT _.RC:$src2),
+                                    (_src3VT.VT (scalar_to_vector
+                                              (_src3VT.ScalarLdFrag addr:$src3))),
+                                    (i32 imm:$src4))>,
                      Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
@@ -12252,25 +11573,23 @@ multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
                                       AVX512VLVectorVTInfo _Vec, 
                                       AVX512VLVectorVTInfo _Tbl> {
   let Predicates = [HasAVX512] in
-    defm Z    : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
-                                       _Vec.info512, _Tbl.info512>,
-                avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
+    defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
                                 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
                                 EVEX_4V, EVEX_V512;
   let Predicates = [HasAVX512, HasVLX] in {
-    defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.XMM,
+    defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
                             _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
                             EVEX_4V, EVEX_V128;
-    defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.YMM,
+    defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
                             _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
                             EVEX_4V, EVEX_V256;
   }
 }
 
-defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
+defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
                                            SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
                           AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
-defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
+defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
                                            SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
                           AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
@@ -12331,6 +11650,12 @@ multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode Mo
                           _.FRC:$src)))),
               (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
                (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
+    def : Pat<(MoveNode
+               (_.VT VR128X:$dst),
+               (_.VT (scalar_to_vector
+                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
+                          (_.ScalarLdFrag addr:$src))))),
+              (!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>;
 
     // extracted masked scalar math op with insert via movss
     def : Pat<(MoveNode (_.VT VR128X:$src1),
@@ -12344,6 +11669,16 @@ multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode Mo
                (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
                VK1WM:$mask, _.VT:$src1,
                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
+    def : Pat<(MoveNode (_.VT VR128X:$src1),
+               (scalar_to_vector
+                (X86selects VK1WM:$mask,
+                            (Op (_.EltVT
+                                 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+                                (_.ScalarLdFrag addr:$src2)),
+                            _.FRC:$src0))),
+              (!cast<Instruction>("V"#OpcPrefix#Zrm_Intk)
+               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
+               VK1WM:$mask, _.VT:$src1, addr:$src2)>;
 
     // extracted masked scalar math op with insert via movss
     def : Pat<(MoveNode (_.VT VR128X:$src1),
@@ -12355,6 +11690,13 @@ multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode Mo
       (!cast<I>("V"#OpcPrefix#Zrr_Intkz) 
           VK1WM:$mask, _.VT:$src1,
           (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
+    def : Pat<(MoveNode (_.VT VR128X:$src1),
+               (scalar_to_vector
+                (X86selects VK1WM:$mask,
+                            (Op (_.EltVT
+                                 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+                                (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
+      (!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>;
   }
 }
 
@@ -12380,26 +11722,6 @@ multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
 
-multiclass AVX512_scalar_unary_math_imm_patterns<SDNode OpNode, string OpcPrefix,
-                                                 SDNode Move, X86VectorVTInfo _,
-                                                 bits<8> ImmV> {
-  let Predicates = [HasAVX512] in {
-    def : Pat<(_.VT (Move _.VT:$dst,
-                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
-              (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src,
-                                                        (i32 ImmV))>;
-  }
-}
-
-defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESS", X86Movss,
-                                             v4f32x_info, 0x01>;
-defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESS", X86Movss,
-                                             v4f32x_info, 0x02>;
-defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESD", X86Movsd,
-                                             v2f64x_info, 0x01>;
-defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESD", X86Movsd,
-                                             v2f64x_info, 0x02>;
-
 //===----------------------------------------------------------------------===//
 // AES instructions
 //===----------------------------------------------------------------------===//
@@ -12612,12 +11934,19 @@ defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
 
+def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
+                                 (X86Vpshufbitqmb node:$src1, node:$src2), [{
+  return N->hasOneUse();
+}]>;
+
 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
   defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
                                 (ins VTI.RC:$src1, VTI.RC:$src2),
                                 "vpshufbitqmb",
                                 "$src2, $src1", "$src1, $src2",
                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
+                                (VTI.VT VTI.RC:$src2)),
+                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
                                 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
                                 Sched<[sched]>;
   defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
@@ -12625,6 +11954,8 @@ multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
                                 "vpshufbitqmb",
                                 "$src2, $src1", "$src1, $src2",
                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
+                                (VTI.VT (VTI.LdFrag addr:$src2))),
+                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
                                 (VTI.VT (VTI.LdFrag addr:$src2)))>,
                                 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
                                 Sched<[sched.Folded, sched.ReadAfterFold]>;
@@ -12720,13 +12051,13 @@ defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
                     (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
                     "v4fmaddss", "$src3, $src2", "$src2, $src3",
-                    []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
+                    []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
                     Sched<[SchedWriteFMA.Scl.Folded]>;
 
 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
                      (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
                      "v4fnmaddss", "$src3, $src2", "$src2, $src3",
-                     []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
+                     []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
                      Sched<[SchedWriteFMA.Scl.Folded]>;
 }
 
@@ -12749,3 +12080,196 @@ defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
                      Sched<[SchedWriteFMA.ZMM.Folded]>;
 }
 
+let hasSideEffects = 0 in {
+  let mayStore = 1 in
+  def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
+  let mayLoad = 1 in
+  def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
+}
+
+//===----------------------------------------------------------------------===//
+// VP2INTERSECT
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> {
+  def rr : I<0x68, MRMSrcReg,
+                  (outs _.KRPC:$dst),
+                  (ins _.RC:$src1, _.RC:$src2),
+                  !strconcat("vp2intersect", _.Suffix,
+                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  [(set _.KRPC:$dst, (X86vp2intersect
+                            _.RC:$src1, (_.VT _.RC:$src2)))]>,
+                  EVEX_4V, T8XD;
+
+  def rm : I<0x68, MRMSrcMem,
+                  (outs _.KRPC:$dst),
+                  (ins  _.RC:$src1, _.MemOp:$src2),
+                  !strconcat("vp2intersect", _.Suffix,
+                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  [(set _.KRPC:$dst, (X86vp2intersect
+                            _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
+                  EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>;
+
+  def rmb : I<0x68, MRMSrcMem,
+                  (outs _.KRPC:$dst),
+                  (ins _.RC:$src1, _.ScalarMemOp:$src2),
+                  !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
+                             ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
+                  [(set _.KRPC:$dst, (X86vp2intersect
+                             _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))]>,
+                  EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
+}
+
+multiclass avx512_vp2intersect<AVX512VLVectorVTInfo _> {
+  let Predicates  = [HasAVX512, HasVP2INTERSECT] in
+    defm Z : avx512_vp2intersect_modes<_.info512>, EVEX_V512;
+
+  let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
+    defm Z256 : avx512_vp2intersect_modes<_.info256>, EVEX_V256;
+    defm Z128 : avx512_vp2intersect_modes<_.info128>, EVEX_V128;
+  }
+}
+
+defm VP2INTERSECTD : avx512_vp2intersect<avx512vl_i32_info>;
+defm VP2INTERSECTQ : avx512_vp2intersect<avx512vl_i64_info>, VEX_W;
+
+multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
+                             X86SchedWriteWidths sched,
+                             AVX512VLVectorVTInfo _SrcVTInfo,
+                             AVX512VLVectorVTInfo _DstVTInfo,
+                             SDNode OpNode, Predicate prd,
+                             bit IsCommutable = 0> {
+  let Predicates = [prd] in
+    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
+                                   _SrcVTInfo.info512, _DstVTInfo.info512,
+                                   _SrcVTInfo.info512, IsCommutable>,
+                                   EVEX_V512, EVEX_CD8<32, CD8VF>;
+  let Predicates = [HasVLX, prd] in {
+    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
+                                      _SrcVTInfo.info256, _DstVTInfo.info256,
+                                      _SrcVTInfo.info256, IsCommutable>,
+                                     EVEX_V256, EVEX_CD8<32, CD8VF>;
+    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
+                                      _SrcVTInfo.info128, _DstVTInfo.info128,
+                                      _SrcVTInfo.info128, IsCommutable>,
+                                      EVEX_V128, EVEX_CD8<32, CD8VF>;
+  }
+}
+
+defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
+                                        SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF
+                                        avx512vl_f32_info, avx512vl_i16_info,
+                                        X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
+
+// Truncate Float to BFloat16
+multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
+                             X86SchedWriteWidths sched> {
+  let Predicates = [HasBF16] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
+                            X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
+  }
+  let Predicates = [HasBF16, HasVLX] in {
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
+                               null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
+                               VK4WM>, EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
+                               X86cvtneps2bf16,
+                               sched.YMM, "{1to8}", "{y}">, EVEX_V256;
+
+    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
+                    VR128X:$src), 0>;
+    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
+                    f128mem:$src), 0, "intel">;
+    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
+                    VR256X:$src), 0>;
+    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
+                    f256mem:$src), 0, "intel">;
+  }
+}
+
+defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
+                                       SchedWriteCvtPD2PS>, T8XS,
+                                       EVEX_CD8<32, CD8VF>;
+
+let Predicates = [HasBF16, HasVLX] in {
+  // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
+  // patterns have been disabled with null_frag.
+  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
+            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
+  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
+                              VK4WM:$mask),
+            (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
+  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
+                              VK4WM:$mask),
+            (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
+
+  def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
+            (VCVTNEPS2BF16Z128rm addr:$src)>;
+  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
+                              VK4WM:$mask),
+            (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
+                              VK4WM:$mask),
+            (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
+
+  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
+                                     (X86VBroadcast (loadf32 addr:$src))))),
+            (VCVTNEPS2BF16Z128rmb addr:$src)>;
+  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))),
+                              (v8i16 VR128X:$src0), VK4WM:$mask),
+            (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))),
+                              v8i16x_info.ImmAllZerosV, VK4WM:$mask),
+            (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
+}
+
+let Constraints = "$src1 = $dst" in {
+multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                              X86VectorVTInfo _, X86VectorVTInfo src_v> {
+  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                           (ins _.RC:$src2, _.RC:$src3),
+                           OpcodeStr, "$src3, $src2", "$src2, $src3",
+                           (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
+                           EVEX_4V;
+
+  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                               (ins _.RC:$src2, _.MemOp:$src3),
+                               OpcodeStr, "$src3, $src2", "$src2, $src3",
+                               (_.VT (OpNode _.RC:$src1, _.RC:$src2,
+                               (src_v.VT (bitconvert
+                               (src_v.LdFrag addr:$src3)))))>, EVEX_4V;
+
+  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                  (ins _.RC:$src2, _.ScalarMemOp:$src3),
+                  OpcodeStr,
+                  !strconcat("${src3}", _.BroadcastStr,", $src2"),
+                  !strconcat("$src2, ${src3}", _.BroadcastStr),
+                  (_.VT (OpNode _.RC:$src1, _.RC:$src2,
+                  (src_v.VT (X86VBroadcast(src_v.ScalarLdFrag addr:$src3)))))>,
+                  EVEX_B, EVEX_4V;
+
+}
+} // Constraints = "$src1 = $dst"
+
+multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                 AVX512VLVectorVTInfo _,
+                                 AVX512VLVectorVTInfo src_v, Predicate prd> {
+  let Predicates = [prd] in {
+    defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info512,
+                                   src_v.info512>, EVEX_V512;
+  }
+  let Predicates = [HasVLX, prd] in {
+    defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info256,
+                                   src_v.info256>, EVEX_V256;
+    defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info128,
+                                   src_v.info128>, EVEX_V128;
+  }
+}
+
+defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps,
+                                       avx512vl_f32_info, avx512vl_i32_info,
+                                       HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index cb5a4e5b5d41..e52635f8d48b 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -1,9 +1,8 @@
 //===-- X86InstrArithmetic.td - Integer Arithmetic Instrs --*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -195,19 +194,22 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
 
 // Surprisingly enough, these are not two address instructions!
 let Defs = [EFLAGS] in {
+// NOTE: These are order specific, we want the ri8 forms to be listed
+// first so that they are slightly preferred to the ri forms.
+
 // Register-Integer Signed Integer Multiply
-def IMUL16rri  : Ii16<0x69, MRMSrcReg,                      // GR16 = GR16*I16
-                      (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
-                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR16:$dst, EFLAGS,
-                            (X86smul_flag GR16:$src1, imm:$src2))]>,
-                      Sched<[WriteIMul16Imm]>, OpSize16;
 def IMUL16rri8 : Ii8<0x6B, MRMSrcReg,                       // GR16 = GR16*I8
                      (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      [(set GR16:$dst, EFLAGS,
                            (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
                      Sched<[WriteIMul16Imm]>, OpSize16;
+def IMUL16rri  : Ii16<0x69, MRMSrcReg,                      // GR16 = GR16*I16
+                      (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR16:$dst, EFLAGS,
+                            (X86smul_flag GR16:$src1, imm:$src2))]>,
+                      Sched<[WriteIMul16Imm]>, OpSize16;
 def IMUL32rri  : Ii32<0x69, MRMSrcReg,                      // GR32 = GR32*I32
                       (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
                       "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -220,26 +222,20 @@ def IMUL32rri8 : Ii8<0x6B, MRMSrcReg,                       // GR32 = GR32*I8
                      [(set GR32:$dst, EFLAGS,
                            (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>,
                      Sched<[WriteIMul32Imm]>, OpSize32;
-def IMUL64rri32 : RIi32S<0x69, MRMSrcReg,                    // GR64 = GR64*I32
-                         (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
-                         "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                         [(set GR64:$dst, EFLAGS,
-                             (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>,
-                         Sched<[WriteIMul64Imm]>;
 def IMUL64rri8 : RIi8<0x6B, MRMSrcReg,                      // GR64 = GR64*I8
                       (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
                       "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set GR64:$dst, EFLAGS,
                             (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>,
                       Sched<[WriteIMul64Imm]>;
+def IMUL64rri32 : RIi32S<0x69, MRMSrcReg,                    // GR64 = GR64*I32
+                         (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+                         "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                         [(set GR64:$dst, EFLAGS,
+                             (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>,
+                         Sched<[WriteIMul64Imm]>;
 
 // Memory-Integer Signed Integer Multiply
-def IMUL16rmi  : Ii16<0x69, MRMSrcMem,                     // GR16 = [mem16]*I16
-                      (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
-                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR16:$dst, EFLAGS,
-                            (X86smul_flag (loadi16 addr:$src1), imm:$src2))]>,
-                      Sched<[WriteIMul16Imm.Folded]>, OpSize16;
 def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR16 = [mem16]*I8
                      (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -247,12 +243,12 @@ def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR16 = [mem16]*I8
                            (X86smul_flag (loadi16 addr:$src1),
                                          i16immSExt8:$src2))]>,
                      Sched<[WriteIMul16Imm.Folded]>, OpSize16;
-def IMUL32rmi  : Ii32<0x69, MRMSrcMem,                     // GR32 = [mem32]*I32
-                      (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
-                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR32:$dst, EFLAGS,
-                            (X86smul_flag (loadi32 addr:$src1), imm:$src2))]>,
-                      Sched<[WriteIMul32Imm.Folded]>, OpSize32;
+def IMUL16rmi  : Ii16<0x69, MRMSrcMem,                     // GR16 = [mem16]*I16
+                      (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
+                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR16:$dst, EFLAGS,
+                            (X86smul_flag (loadi16 addr:$src1), imm:$src2))]>,
+                      Sched<[WriteIMul16Imm.Folded]>, OpSize16;
 def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR32 = [mem32]*I8
                      (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -260,13 +256,12 @@ def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR32 = [mem32]*I8
                            (X86smul_flag (loadi32 addr:$src1),
                                          i32immSExt8:$src2))]>,
                      Sched<[WriteIMul32Imm.Folded]>, OpSize32;
-def IMUL64rmi32 : RIi32S<0x69, MRMSrcMem,                   // GR64 = [mem64]*I32
-                         (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
-                         "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                         [(set GR64:$dst, EFLAGS,
-                              (X86smul_flag (loadi64 addr:$src1),
-                                            i64immSExt32:$src2))]>,
-                         Sched<[WriteIMul64Imm.Folded]>;
+def IMUL32rmi  : Ii32<0x69, MRMSrcMem,                     // GR32 = [mem32]*I32
+                      (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
+                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR32:$dst, EFLAGS,
+                            (X86smul_flag (loadi32 addr:$src1), imm:$src2))]>,
+                      Sched<[WriteIMul32Imm.Folded]>, OpSize32;
 def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem,                      // GR64 = [mem64]*I8
                       (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
                       "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -274,6 +269,13 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem,                      // GR64 = [mem64]*I8
                             (X86smul_flag (loadi64 addr:$src1),
                                           i64immSExt8:$src2))]>,
                       Sched<[WriteIMul64Imm.Folded]>;
+def IMUL64rmi32 : RIi32S<0x69, MRMSrcMem,                   // GR64 = [mem64]*I32
+                         (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
+                         "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                         [(set GR64:$dst, EFLAGS,
+                              (X86smul_flag (loadi64 addr:$src1),
+                                            i64immSExt32:$src2))]>,
+                         Sched<[WriteIMul64Imm.Folded]>;
 } // Defs = [EFLAGS]
 
 // unsigned division/remainder
@@ -436,11 +438,10 @@ def X86sub_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
 // TODO: inc/dec is slow for P4, but fast for Pentium-M.
 let Defs = [EFLAGS] in {
 let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
-let CodeSize = 2 in
+let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
 def INC8r  : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
                "inc{b}\t$dst",
                [(set GR8:$dst, EFLAGS, (X86add_flag_nocf GR8:$src1, 1))]>;
-let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
 def INC16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
                "inc{w}\t$dst",
                [(set GR16:$dst, EFLAGS, (X86add_flag_nocf GR16:$src1, 1))]>,
@@ -484,11 +485,10 @@ let Predicates = [UseIncDec, In64BitMode] in {
 } // CodeSize = 2, SchedRW
 
 let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
-let CodeSize = 2 in
+let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
 def DEC8r  : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
                "dec{b}\t$dst",
                [(set GR8:$dst, EFLAGS, (X86sub_flag_nocf GR8:$src1, 1))]>;
-let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
 def DEC16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
                "dec{w}\t$dst",
                [(set GR16:$dst, EFLAGS, (X86sub_flag_nocf GR16:$src1, 1))]>,
@@ -605,16 +605,16 @@ def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
 
 
 def Xi8  : X86TypeInfo<i8, "b", GR8, loadi8, i8mem,
-                       Imm8, i8imm, imm8_su, i8imm, invalid_node,
+                       Imm8, i8imm, relocImm8_su, i8imm, invalid_node,
                        0, OpSizeFixed, 0>;
 def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem,
-                       Imm16, i16imm, imm16_su, i16i8imm, i16immSExt8_su,
+                       Imm16, i16imm, relocImm16_su, i16i8imm, i16immSExt8_su,
                        1, OpSize16, 0>;
 def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem,
-                       Imm32, i32imm, imm32_su, i32i8imm, i32immSExt8_su,
+                       Imm32, i32imm, relocImm32_su, i32i8imm, i32immSExt8_su,
                        1, OpSize32, 0>;
 def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
-                       Imm32S, i64i32imm, i64immSExt32_su, i64i8imm, i64immSExt8_su,
+                       Imm32S, i64i32imm, i64relocImmSExt32_su, i64i8imm, i64immSExt8_su,
                        1, OpSizeFixed, 1>;
 
 /// ITy - This instruction base class takes the type info for the instruction.
@@ -924,11 +924,12 @@ class BinOpAI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
 multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
                          string mnemonic, Format RegMRM, Format MemMRM,
                          SDNode opnodeflag, SDNode opnode,
-                         bit CommutableRR, bit ConvertibleToThreeAddress> {
+                         bit CommutableRR, bit ConvertibleToThreeAddress,
+                         bit ConvertibleToThreeAddressRR> {
   let Defs = [EFLAGS] in {
     let Constraints = "$src1 = $dst" in {
       let isCommutable = CommutableRR in {
-        let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+        let isConvertibleToThreeAddress = ConvertibleToThreeAddressRR in {
           def NAME#8rr  : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
           def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
           def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
@@ -1169,16 +1170,16 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
 
 
 defm AND : ArithBinOp_RF<0x20, 0x22, 0x24, "and", MRM4r, MRM4m,
-                         X86and_flag, and, 1, 0>;
+                         X86and_flag, and, 1, 0, 0>;
 defm OR  : ArithBinOp_RF<0x08, 0x0A, 0x0C, "or", MRM1r, MRM1m,
-                         X86or_flag, or, 1, 0>;
+                         X86or_flag, or, 1, 0, 0>;
 defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m,
-                         X86xor_flag, xor, 1, 0>;
+                         X86xor_flag, xor, 1, 0, 0>;
 defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m,
-                         X86add_flag, add, 1, 1>;
+                         X86add_flag, add, 1, 1, 1>;
 let isCompare = 1 in {
 defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
-                         X86sub_flag, sub, 0, 0>;
+                         X86sub_flag, sub, 0, 1, 0>;
 }
 
 // Arithmetic.
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index dcce7b9951f2..50aed98112c3 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -1,9 +1,8 @@
 //===-- X86InstrBuilder.h - Functions to aid building x86 insts -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td
index f5494fc0b13f..099f6aa8d8bb 100644
--- a/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/lib/Target/X86/X86InstrCMovSetCC.td
@@ -1,9 +1,8 @@
 //===-- X86InstrCMovSetCC.td - Conditional Move and SetCC --*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,99 +13,94 @@
 
 
 // CMOV instructions.
-multiclass CMOV<bits<8> opc, string Mnemonic, X86FoldableSchedWrite Sched,
-                PatLeaf CondNode> {
-  let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
-      isCommutable = 1, SchedRW = [Sched] in {
-    def NAME#16rr
-      : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-          !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
-          [(set GR16:$dst,
-                (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))]>,
-                TB, OpSize16;
-    def NAME#32rr
-      : I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-          !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
-          [(set GR32:$dst,
-                (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))]>,
-                TB, OpSize32;
-    def NAME#64rr
-      :RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-          !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
-          [(set GR64:$dst,
-                (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))]>, TB;
-  }
-
-  let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
-      SchedRW = [Sched.Folded, Sched.ReadAfterFold] in {
-    def NAME#16rm
-      : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
-          !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
-          [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                    CondNode, EFLAGS))]>, TB, OpSize16;
-    def NAME#32rm
-      : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
-          !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
-          [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                    CondNode, EFLAGS))]>, TB, OpSize32;
-    def NAME#64rm
-      :RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
-          !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
-          [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    CondNode, EFLAGS))]>, TB;
-  } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
-} // end multiclass
+let isCodeGenOnly = 1, ForceDisassemble = 1 in {
+let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+    isCommutable = 1, SchedRW = [WriteCMOV] in {
+  def CMOV16rr
+    : I<0x40, MRMSrcRegCC, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, ccode:$cond),
+        "cmov${cond}{w}\t{$src2, $dst|$dst, $src2}",
+        [(set GR16:$dst,
+              (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>,
+              TB, OpSize16;
+  def CMOV32rr
+    : I<0x40, MRMSrcRegCC, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, ccode:$cond),
+        "cmov${cond}{l}\t{$src2, $dst|$dst, $src2}",
+        [(set GR32:$dst,
+              (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>,
+              TB, OpSize32;
+  def CMOV64rr
+    :RI<0x40, MRMSrcRegCC, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, ccode:$cond),
+        "cmov${cond}{q}\t{$src2, $dst|$dst, $src2}",
+        [(set GR64:$dst,
+              (X86cmov GR64:$src1, GR64:$src2, imm:$cond, EFLAGS))]>, TB;
+}
 
+let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+    SchedRW = [WriteCMOV.Folded, WriteCMOV.ReadAfterFold] in {
+  def CMOV16rm
+    : I<0x40, MRMSrcMemCC, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2, ccode:$cond),
+        "cmov${cond}{w}\t{$src2, $dst|$dst, $src2}",
+        [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+                                  imm:$cond, EFLAGS))]>, TB, OpSize16;
+  def CMOV32rm
+    : I<0x40, MRMSrcMemCC, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2, ccode:$cond),
+        "cmov${cond}{l}\t{$src2, $dst|$dst, $src2}",
+        [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+                                  imm:$cond, EFLAGS))]>, TB, OpSize32;
+  def CMOV64rm
+    :RI<0x40, MRMSrcMemCC, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2, ccode:$cond),
+        "cmov${cond}{q}\t{$src2, $dst|$dst, $src2}",
+        [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+                                  imm:$cond, EFLAGS))]>, TB;
+} // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
+} // isCodeGenOnly = 1, ForceDisassemble = 1
 
-// Conditional Moves.
-defm CMOVO  : CMOV<0x40, "cmovo" , WriteCMOV,  X86_COND_O>;
-defm CMOVNO : CMOV<0x41, "cmovno", WriteCMOV,  X86_COND_NO>;
-defm CMOVB  : CMOV<0x42, "cmovb" , WriteCMOV,  X86_COND_B>;
-defm CMOVAE : CMOV<0x43, "cmovae", WriteCMOV,  X86_COND_AE>;
-defm CMOVE  : CMOV<0x44, "cmove" , WriteCMOV,  X86_COND_E>;
-defm CMOVNE : CMOV<0x45, "cmovne", WriteCMOV,  X86_COND_NE>;
-defm CMOVBE : CMOV<0x46, "cmovbe", WriteCMOV2, X86_COND_BE>;
-defm CMOVA  : CMOV<0x47, "cmova" , WriteCMOV2, X86_COND_A>;
-defm CMOVS  : CMOV<0x48, "cmovs" , WriteCMOV,  X86_COND_S>;
-defm CMOVNS : CMOV<0x49, "cmovns", WriteCMOV,  X86_COND_NS>;
-defm CMOVP  : CMOV<0x4A, "cmovp" , WriteCMOV,  X86_COND_P>;
-defm CMOVNP : CMOV<0x4B, "cmovnp", WriteCMOV,  X86_COND_NP>;
-defm CMOVL  : CMOV<0x4C, "cmovl" , WriteCMOV,  X86_COND_L>;
-defm CMOVGE : CMOV<0x4D, "cmovge", WriteCMOV,  X86_COND_GE>;
-defm CMOVLE : CMOV<0x4E, "cmovle", WriteCMOV,  X86_COND_LE>;
-defm CMOVG  : CMOV<0x4F, "cmovg" , WriteCMOV,  X86_COND_G>;
+// SetCC instructions.
+let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1 in {
+  def SETCCr : I<0x90, MRMXrCC, (outs GR8:$dst), (ins ccode:$cond),
+                "set${cond}\t$dst",
+                [(set GR8:$dst, (X86setcc imm:$cond, EFLAGS))]>,
+                TB, Sched<[WriteSETCC]>;
+  def SETCCm : I<0x90, MRMXmCC, (outs), (ins i8mem:$dst, ccode:$cond),
+                "set${cond}\t$dst",
+                [(store (X86setcc imm:$cond, EFLAGS), addr:$dst)]>,
+                TB, Sched<[WriteSETCCStore]>;
+} // Uses = [EFLAGS]
 
+multiclass CMOV_SETCC_Aliases<string Cond, int CC> {
+  def : InstAlias<"cmov"#Cond#"{w}\t{$src, $dst|$dst, $src}",
+                  (CMOV16rr GR16:$dst, GR16:$src, CC), 0>;
+  def : InstAlias<"cmov"#Cond#"{w}\t{$src, $dst|$dst, $src}",
+                  (CMOV16rm GR16:$dst, i16mem:$src, CC), 0>;
+  def : InstAlias<"cmov"#Cond#"{l}\t{$src, $dst|$dst, $src}",
+                  (CMOV32rr GR32:$dst, GR32:$src, CC), 0>;
+  def : InstAlias<"cmov"#Cond#"{l}\t{$src, $dst|$dst, $src}",
+                  (CMOV32rm GR32:$dst, i32mem:$src, CC), 0>;
+  def : InstAlias<"cmov"#Cond#"{q}\t{$src, $dst|$dst, $src}",
+                  (CMOV64rr GR64:$dst, GR64:$src, CC), 0>;
+  def : InstAlias<"cmov"#Cond#"{q}\t{$src, $dst|$dst, $src}",
+                  (CMOV64rm GR64:$dst, i64mem:$src, CC), 0>;
 
-// SetCC instructions.
-multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
-  let Uses = [EFLAGS] in {
-    def r    : I<opc, MRMXr,  (outs GR8:$dst), (ins),
-                     !strconcat(Mnemonic, "\t$dst"),
-                     [(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>,
-                     TB, Sched<[WriteSETCC]>;
-    def m    : I<opc, MRMXm,  (outs), (ins i8mem:$dst),
-                     !strconcat(Mnemonic, "\t$dst"),
-                     [(store (X86setcc OpNode, EFLAGS), addr:$dst)]>,
-                     TB, Sched<[WriteSETCCStore]>;
-  } // Uses = [EFLAGS]
+  def : InstAlias<"set"#Cond#"\t$dst", (SETCCr GR8:$dst, CC), 0>;
+  def : InstAlias<"set"#Cond#"\t$dst", (SETCCm i8mem:$dst, CC), 0>;
 }
 
-defm SETO  : SETCC<0x90, "seto",  X86_COND_O>;   // is overflow bit set
-defm SETNO : SETCC<0x91, "setno", X86_COND_NO>;  // is overflow bit not set
-defm SETB  : SETCC<0x92, "setb",  X86_COND_B>;   // unsigned less than
-defm SETAE : SETCC<0x93, "setae", X86_COND_AE>;  // unsigned greater or equal
-defm SETE  : SETCC<0x94, "sete",  X86_COND_E>;   // equal to
-defm SETNE : SETCC<0x95, "setne", X86_COND_NE>;  // not equal to
-defm SETBE : SETCC<0x96, "setbe", X86_COND_BE>;  // unsigned less than or equal
-defm SETA  : SETCC<0x97, "seta",  X86_COND_A>;   // unsigned greater than
-defm SETS  : SETCC<0x98, "sets",  X86_COND_S>;   // is signed bit set
-defm SETNS : SETCC<0x99, "setns", X86_COND_NS>;  // is not signed
-defm SETP  : SETCC<0x9A, "setp",  X86_COND_P>;   // is parity bit set
-defm SETNP : SETCC<0x9B, "setnp", X86_COND_NP>;  // is parity bit not set
-defm SETL  : SETCC<0x9C, "setl",  X86_COND_L>;   // signed less than
-defm SETGE : SETCC<0x9D, "setge", X86_COND_GE>;  // signed greater or equal
-defm SETLE : SETCC<0x9E, "setle", X86_COND_LE>;  // signed less than or equal
-defm SETG  : SETCC<0x9F, "setg",  X86_COND_G>;   // signed greater than
+defm : CMOV_SETCC_Aliases<"o" ,  0>;
+defm : CMOV_SETCC_Aliases<"no",  1>;
+defm : CMOV_SETCC_Aliases<"b" ,  2>;
+defm : CMOV_SETCC_Aliases<"ae",  3>;
+defm : CMOV_SETCC_Aliases<"e" ,  4>;
+defm : CMOV_SETCC_Aliases<"ne",  5>;
+defm : CMOV_SETCC_Aliases<"be",  6>;
+defm : CMOV_SETCC_Aliases<"a" ,  7>;
+defm : CMOV_SETCC_Aliases<"s" ,  8>;
+defm : CMOV_SETCC_Aliases<"ns",  9>;
+defm : CMOV_SETCC_Aliases<"p" , 10>;
+defm : CMOV_SETCC_Aliases<"np", 11>;
+defm : CMOV_SETCC_Aliases<"l" , 12>;
+defm : CMOV_SETCC_Aliases<"ge", 13>;
+defm : CMOV_SETCC_Aliases<"le", 14>;
+defm : CMOV_SETCC_Aliases<"g" , 15>;
 
 // SALC is an undocumented instruction. Information for this instruction can be found
 // here http://www.rcollins.org/secrets/opcodes/SALC.html
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 394dca8e7817..efaccdc9ee96 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -1,9 +1,8 @@
 //===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -20,11 +19,6 @@ def GetLo32XForm : SDNodeXForm<imm, [{
   return getI32Imm((uint32_t)N->getZExtValue(), SDLoc(N));
 }]>;
 
-def GetLo8XForm : SDNodeXForm<imm, [{
-  // Transformation function: get the low 8 bits.
-  return getI8Imm((uint8_t)N->getZExtValue(), SDLoc(N));
-}]>;
-
 
 //===----------------------------------------------------------------------===//
 // Random Pseudo Instructions.
@@ -360,7 +354,7 @@ def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
 // this happens, it is great.  However, if we are left with an 8-bit sbb and an
 // and, we might as well just match it as a setb.
 def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
-          (SETBr)>;
+          (SETCCr (i8 2))>;
 
 // Patterns to give priority when both inputs are zero so that we don't use
 // an immediate for the RHS.
@@ -574,8 +568,14 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in {
 
   defm _RFP80 : CMOVrr_PSEUDO<RFP80, f80>;
 
-  defm _FR32   : CMOVrr_PSEUDO<FR32, f32>;
-  defm _FR64   : CMOVrr_PSEUDO<FR64, f64>;
+  let Predicates = [NoAVX512] in {
+    defm _FR32   : CMOVrr_PSEUDO<FR32, f32>;
+    defm _FR64   : CMOVrr_PSEUDO<FR64, f64>;
+  }
+  let Predicates = [HasAVX512] in {
+    defm _FR32X  : CMOVrr_PSEUDO<FR32X, f32>;
+    defm _FR64X  : CMOVrr_PSEUDO<FR64X, f64>;
+  }
   let Predicates = [NoVLX] in {
     defm _VR128  : CMOVrr_PSEUDO<VR128, v2i64>;
     defm _VR256  : CMOVrr_PSEUDO<VR256, v4i64>;
@@ -712,6 +712,32 @@ def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
                                "{$src2, $dst|$dst, $src2}"),
                     [(set EFLAGS, (Op addr:$dst, GR64:$src2))]>, LOCK;
 
+// NOTE: These are order specific, we want the mi8 forms to be listed
+// first so that they are slightly preferred to the mi forms.
+def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+                      ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+                      ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
+                      !strconcat(mnemonic, "{w}\t",
+                                 "{$src2, $dst|$dst, $src2}"),
+                      [(set EFLAGS, (Op addr:$dst, i16immSExt8:$src2))]>,
+                      OpSize16, LOCK;
+
+def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+                      ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+                      ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
+                      !strconcat(mnemonic, "{l}\t",
+                                 "{$src2, $dst|$dst, $src2}"),
+                      [(set EFLAGS, (Op addr:$dst, i32immSExt8:$src2))]>,
+                      OpSize32, LOCK;
+
+def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+                       ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+                       ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
+                       !strconcat(mnemonic, "{q}\t",
+                                  "{$src2, $dst|$dst, $src2}"),
+                       [(set EFLAGS, (Op addr:$dst, i64immSExt8:$src2))]>,
+                       LOCK;
+
 def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
                     ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
                     ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
@@ -742,30 +768,6 @@ def NAME#64mi32 : RIi32S<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
                                      "{$src2, $dst|$dst, $src2}"),
                           [(set EFLAGS, (Op addr:$dst, i64immSExt32:$src2))]>,
                           LOCK;
-
-def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
-                      ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
-                      ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
-                      !strconcat(mnemonic, "{w}\t",
-                                 "{$src2, $dst|$dst, $src2}"),
-                      [(set EFLAGS, (Op addr:$dst, i16immSExt8:$src2))]>,
-                      OpSize16, LOCK;
-
-def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
-                      ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
-                      ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
-                      !strconcat(mnemonic, "{l}\t",
-                                 "{$src2, $dst|$dst, $src2}"),
-                      [(set EFLAGS, (Op addr:$dst, i32immSExt8:$src2))]>,
-                      OpSize32, LOCK;
-
-def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
-                       ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
-                       ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
-                       !strconcat(mnemonic, "{q}\t",
-                                  "{$src2, $dst|$dst, $src2}"),
-                       [(set EFLAGS, (Op addr:$dst, i64immSExt8:$src2))]>,
-                       LOCK;
 }
 
 }
@@ -868,7 +870,7 @@ let isCodeGenOnly = 1, SchedRW = [WriteCMPXCHGRMW] in {
 }
 
 let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
-    SchedRW = [WriteCMPXCHGRMW] in {
+    Predicates = [HasCmpxchg8b], SchedRW = [WriteCMPXCHGRMW] in {
 defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>;
 }
 
@@ -892,8 +894,9 @@ defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>;
 // the instruction and we are sure we will have a valid register to restore
 // the value of RBX.
 let Defs = [EAX, EDX, EBX, EFLAGS], Uses = [EAX, ECX, EDX],
-    SchedRW = [WriteCMPXCHGRMW], isCodeGenOnly = 1, isPseudo = 1,
-    Constraints = "$ebx_save = $dst", usesCustomInserter = 1 in {
+    Predicates = [HasCmpxchg8b], SchedRW = [WriteCMPXCHGRMW],
+    isCodeGenOnly = 1, isPseudo = 1, Constraints = "$ebx_save = $dst",
+    usesCustomInserter = 1 in {
 def LCMPXCHG8B_SAVE_EBX :
     I<0, Pseudo, (outs GR32:$dst),
       (ins i64mem:$ptr, GR32:$ebx_input, GR32:$ebx_save),
@@ -904,14 +907,14 @@ def LCMPXCHG8B_SAVE_EBX :
 
 
 let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
-    Predicates = [HasCmpxchg16b], SchedRW = [WriteCMPXCHGRMW] in {
+    Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW] in {
 defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",
                                  X86cas16, i128mem>, REX_W;
 }
 
 // Same as LCMPXCHG8B_SAVE_RBX but for the 16 Bytes variant.
 let Defs = [RAX, RDX, RBX, EFLAGS], Uses = [RAX, RCX, RDX],
-    Predicates = [HasCmpxchg16b], SchedRW = [WriteCMPXCHGRMW],
+    Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW],
     isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst",
     usesCustomInserter = 1 in {
 def LCMPXCHG16B_SAVE_RBX :
@@ -1001,28 +1004,31 @@ defm : RELEASE_BINOP_MI<"OR",  or>;
 defm : RELEASE_BINOP_MI<"XOR", xor>;
 defm : RELEASE_BINOP_MI<"SUB", sub>;
 
-// Same as above, but for floating-point.
-// FIXME: imm version.
-// FIXME: Version that doesn't clobber $src, using AVX's VADDSS.
+// Atomic load + floating point patterns.
 // FIXME: This could also handle SIMD operations with *ps and *pd instructions.
-let usesCustomInserter = 1, SchedRW = [WriteMicrocoded] in {
-multiclass RELEASE_FP_BINOP_MI<SDNode op> {
-    def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, FR32:$src),
-        "#BINOP "#NAME#"32mr PSEUDO!",
-        [(atomic_store_32 addr:$dst,
-           (i32 (bitconvert (op
-             (f32 (bitconvert (i32 (atomic_load_32 addr:$dst)))),
-          FR32:$src))))]>, Requires<[HasSSE1]>;
-    def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, FR64:$src),
-        "#BINOP "#NAME#"64mr PSEUDO!",
-        [(atomic_store_64 addr:$dst,
-           (i64 (bitconvert (op
-             (f64 (bitconvert (i64 (atomic_load_64 addr:$dst)))),
-          FR64:$src))))]>, Requires<[HasSSE2]>;
+multiclass ATOMIC_LOAD_FP_BINOP_MI<string Name, SDNode op> {
+  def : Pat<(op FR32:$src1, (bitconvert (i32 (atomic_load_32 addr:$src2)))),
+            (!cast<Instruction>(Name#"SSrm") FR32:$src1, addr:$src2)>,
+            Requires<[UseSSE1]>;
+  def : Pat<(op FR32:$src1, (bitconvert (i32 (atomic_load_32 addr:$src2)))),
+            (!cast<Instruction>("V"#Name#"SSrm") FR32:$src1, addr:$src2)>,
+            Requires<[UseAVX]>;
+  def : Pat<(op FR32X:$src1, (bitconvert (i32 (atomic_load_32 addr:$src2)))),
+            (!cast<Instruction>("V"#Name#"SSZrm") FR32X:$src1, addr:$src2)>,
+            Requires<[HasAVX512]>;
+
+  def : Pat<(op FR64:$src1, (bitconvert (i64 (atomic_load_64 addr:$src2)))),
+            (!cast<Instruction>(Name#"SDrm") FR64:$src1, addr:$src2)>,
+            Requires<[UseSSE1]>;
+  def : Pat<(op FR64:$src1, (bitconvert (i64 (atomic_load_64 addr:$src2)))),
+            (!cast<Instruction>("V"#Name#"SDrm") FR64:$src1, addr:$src2)>,
+            Requires<[UseAVX]>;
+  def : Pat<(op FR64X:$src1, (bitconvert (i64 (atomic_load_64 addr:$src2)))),
+            (!cast<Instruction>("V"#Name#"SDZrm") FR64X:$src1, addr:$src2)>,
+            Requires<[HasAVX512]>;
 }
-defm RELEASE_FADD : RELEASE_FP_BINOP_MI<fadd>;
+defm : ATOMIC_LOAD_FP_BINOP_MI<"ADD", fadd>;
 // FIXME: Add fsub, fmul, fdiv, ...
-}
 
 multiclass RELEASE_UNOP<string Name, dag dag8, dag dag16, dag dag32,
                         dag dag64> {
@@ -1083,6 +1089,35 @@ def : Pat<(i16 (atomic_load_16 addr:$src)), (MOV16rm addr:$src)>;
 def : Pat<(i32 (atomic_load_32 addr:$src)), (MOV32rm addr:$src)>;
 def : Pat<(i64 (atomic_load_64 addr:$src)), (MOV64rm addr:$src)>;
 
+// Floating point loads/stores.
+def : Pat<(atomic_store_32 addr:$dst, (i32 (bitconvert (f32 FR32:$src)))),
+          (MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>;
+def : Pat<(atomic_store_32 addr:$dst, (i32 (bitconvert (f32 FR32:$src)))),
+          (VMOVSSmr addr:$dst, FR32:$src)>, Requires<[UseAVX]>;
+def : Pat<(atomic_store_32 addr:$dst, (i32 (bitconvert (f32 FR32:$src)))),
+          (VMOVSSZmr addr:$dst, FR32:$src)>, Requires<[HasAVX512]>;
+
+def : Pat<(atomic_store_64 addr:$dst, (i64 (bitconvert (f64 FR64:$src)))),
+          (MOVSDmr addr:$dst, FR64:$src)>, Requires<[UseSSE2]>;
+def : Pat<(atomic_store_64 addr:$dst, (i64 (bitconvert (f64 FR64:$src)))),
+          (VMOVSDmr addr:$dst, FR64:$src)>, Requires<[UseAVX]>;
+def : Pat<(atomic_store_64 addr:$dst, (i64 (bitconvert (f64 FR64:$src)))),
+          (VMOVSDmr addr:$dst, FR64:$src)>, Requires<[HasAVX512]>;
+
+def : Pat<(f32 (bitconvert (i32 (atomic_load_32 addr:$src)))),
+          (MOVSSrm_alt addr:$src)>, Requires<[UseSSE1]>;
+def : Pat<(f32 (bitconvert (i32 (atomic_load_32 addr:$src)))),
+          (VMOVSSrm_alt addr:$src)>, Requires<[UseAVX]>;
+def : Pat<(f32 (bitconvert (i32 (atomic_load_32 addr:$src)))),
+          (VMOVSSZrm_alt addr:$src)>, Requires<[HasAVX512]>;
+
+def : Pat<(f64 (bitconvert (i64 (atomic_load_64 addr:$src)))),
+          (MOVSDrm_alt addr:$src)>, Requires<[UseSSE2]>;
+def : Pat<(f64 (bitconvert (i64 (atomic_load_64 addr:$src)))),
+          (VMOVSDrm_alt addr:$src)>, Requires<[UseAVX]>;
+def : Pat<(f64 (bitconvert (i64 (atomic_load_64 addr:$src)))),
+          (VMOVSDZrm_alt addr:$src)>, Requires<[HasAVX512]>;
+
 //===----------------------------------------------------------------------===//
 // DAG Pattern Matching Rules
 //===----------------------------------------------------------------------===//
@@ -1241,37 +1276,23 @@ def : Pat<(X86cmp GR32:$src1, 0),
 def : Pat<(X86cmp GR64:$src1, 0),
           (TEST64rr GR64:$src1, GR64:$src1)>;
 
+def inv_cond_XFORM : SDNodeXForm<imm, [{
+  X86::CondCode CC = static_cast<X86::CondCode>(N->getZExtValue());
+  return CurDAG->getTargetConstant(X86::GetOppositeBranchCondition(CC),
+                                   SDLoc(N), MVT::i8);
+}]>;
+
 // Conditional moves with folded loads with operands swapped and conditions
 // inverted.
-multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,
-                  Instruction Inst64> {
-  let Predicates = [HasCMov] in {
-    def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
-              (Inst16 GR16:$src2, addr:$src1)>;
-    def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
-              (Inst32 GR32:$src2, addr:$src1)>;
-    def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
-              (Inst64 GR64:$src2, addr:$src1)>;
-  }
+let Predicates = [HasCMov] in {
+  def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, imm:$cond, EFLAGS),
+            (CMOV16rm GR16:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>;
+  def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, imm:$cond, EFLAGS),
+            (CMOV32rm GR32:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>;
+  def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, imm:$cond, EFLAGS),
+            (CMOV64rm GR64:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>;
 }
 
-defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;
-defm : CMOVmr<X86_COND_AE, CMOVB16rm , CMOVB32rm , CMOVB64rm>;
-defm : CMOVmr<X86_COND_E , CMOVNE16rm, CMOVNE32rm, CMOVNE64rm>;
-defm : CMOVmr<X86_COND_NE, CMOVE16rm , CMOVE32rm , CMOVE64rm>;
-defm : CMOVmr<X86_COND_BE, CMOVA16rm , CMOVA32rm , CMOVA64rm>;
-defm : CMOVmr<X86_COND_A , CMOVBE16rm, CMOVBE32rm, CMOVBE64rm>;
-defm : CMOVmr<X86_COND_L , CMOVGE16rm, CMOVGE32rm, CMOVGE64rm>;
-defm : CMOVmr<X86_COND_GE, CMOVL16rm , CMOVL32rm , CMOVL64rm>;
-defm : CMOVmr<X86_COND_LE, CMOVG16rm , CMOVG32rm , CMOVG64rm>;
-defm : CMOVmr<X86_COND_G , CMOVLE16rm, CMOVLE32rm, CMOVLE64rm>;
-defm : CMOVmr<X86_COND_P , CMOVNP16rm, CMOVNP32rm, CMOVNP64rm>;
-defm : CMOVmr<X86_COND_NP, CMOVP16rm , CMOVP32rm , CMOVP64rm>;
-defm : CMOVmr<X86_COND_S , CMOVNS16rm, CMOVNS32rm, CMOVNS64rm>;
-defm : CMOVmr<X86_COND_NS, CMOVS16rm , CMOVS32rm , CMOVS64rm>;
-defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;
-defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
-
 // zextload bool -> zextload byte
 // i1 stored in one byte in zero-extended form.
 // Upper bits cleanup should be executed before Store.
@@ -1298,14 +1319,16 @@ def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
 
 // For other extloads, use subregs, since the high contents of the register are
 // defined after an extload.
+// NOTE: The extloadi64i32 pattern needs to be first as it will try to form
+// 32-bit loads for 4 byte aligned i8/i16 loads.
+def : Pat<(extloadi64i32 addr:$src),
+          (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;
 def : Pat<(extloadi64i1 addr:$src),
           (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
 def : Pat<(extloadi64i8 addr:$src),
           (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
 def : Pat<(extloadi64i16 addr:$src),
           (SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;
-def : Pat<(extloadi64i32 addr:$src),
-          (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;
 
 // anyext. Define these to do an explicit zero-extend to
 // avoid partial-register updates.
@@ -1351,6 +1374,8 @@ def def32 : PatLeaf<(i32 GR32:$src), [{
 // we can use a SUBREG_TO_REG.
 def : Pat<(i64 (zext def32:$src)),
           (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
+def : Pat<(i64 (and (anyext def32:$src), 0x00000000FFFFFFFF)),
+          (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
 
 //===----------------------------------------------------------------------===//
 // Pattern match OR as ADD
@@ -1377,9 +1402,12 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
 // Try this before the selecting to OR.
 let SchedRW = [WriteALU] in {
 
-let isConvertibleToThreeAddress = 1,
+let isConvertibleToThreeAddress = 1, isPseudo = 1,
     Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
 let isCommutable = 1 in {
+def ADD8rr_DB   : I<0, Pseudo, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+                    "", // orb/addb REG, REG
+                    [(set GR8:$dst, (or_is_add GR8:$src1, GR8:$src2))]>;
 def ADD16rr_DB  : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
                     "", // orw/addw REG, REG
                     [(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;
@@ -1394,6 +1422,10 @@ def ADD64rr_DB  : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
 // NOTE: These are order specific, we want the ri8 forms to be listed
 // first so that they are slightly preferred to the ri forms.
 
+def ADD8ri_DB :   I<0, Pseudo,
+                    (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+                    "", // orb/addb REG, imm8
+                    [(set GR8:$dst, (or_is_add GR8:$src1, imm:$src2))]>;
 def ADD16ri8_DB : I<0, Pseudo,
                     (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
                     "", // orw/addw REG, imm8
@@ -1483,6 +1515,13 @@ def : Pat<(add GR64:$src1, 128),
 def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
           (SUB64mi8 addr:$dst, -128)>;
 
+def : Pat<(X86add_flag_nocf GR16:$src1, 128),
+          (SUB16ri8 GR16:$src1, -128)>;
+def : Pat<(X86add_flag_nocf GR32:$src1, 128),
+          (SUB32ri8 GR32:$src1, -128)>;
+def : Pat<(X86add_flag_nocf GR64:$src1, 128),
+          (SUB64ri8 GR64:$src1, -128)>;
+
 // The same trick applies for 32-bit immediate fields in 64-bit
 // instructions.
 def : Pat<(add GR64:$src1, 0x0000000080000000),
@@ -1490,6 +1529,9 @@ def : Pat<(add GR64:$src1, 0x0000000080000000),
 def : Pat<(store (add (loadi64 addr:$dst), 0x0000000080000000), addr:$dst),
           (SUB64mi32 addr:$dst, 0xffffffff80000000)>;
 
+def : Pat<(X86add_flag_nocf GR64:$src1, 0x0000000080000000),
+          (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
+
 // To avoid needing to materialize an immediate in a register, use a 32-bit and
 // with implicit zero-extension instead of a 64-bit and if the immediate has at
 // least 32 bits of leading zeros. If in addition the last 32 bits can be
@@ -1504,7 +1546,7 @@ def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
             (i64 0),
             (AND32ri8
               (EXTRACT_SUBREG GR64:$src, sub_32bit),
-              (i32 (GetLo8XForm imm:$imm))),
+              (i32 (GetLo32XForm imm:$imm))),
             sub_32bit)>;
 
 def : Pat<(and GR64:$src, i64immZExt32:$imm),
@@ -1714,40 +1756,43 @@ def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
 def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
 def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
 
-// Helper imms to check if a mask doesn't change significant shift/rotate bits.
-def immShift8 : ImmLeaf<i8, [{
-  return countTrailingOnes<uint64_t>(Imm) >= 3;
+def shiftMask8 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
+  return isUnneededShiftMask(N, 3);
 }]>;
-def immShift16 : ImmLeaf<i8, [{
-  return countTrailingOnes<uint64_t>(Imm) >= 4;
+
+def shiftMask16 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
+  return isUnneededShiftMask(N, 4);
 }]>;
-def immShift32 : ImmLeaf<i8, [{
-  return countTrailingOnes<uint64_t>(Imm) >= 5;
+
+def shiftMask32 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
+  return isUnneededShiftMask(N, 5);
 }]>;
-def immShift64 : ImmLeaf<i8, [{
-  return countTrailingOnes<uint64_t>(Imm) >= 6;
+
+def shiftMask64 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
+  return isUnneededShiftMask(N, 6);
 }]>;
 
+
 // Shift amount is implicitly masked.
 multiclass MaskedShiftAmountPats<SDNode frag, string name> {
   // (shift x (and y, 31)) ==> (shift x, y)
-  def : Pat<(frag GR8:$src1, (and CL, immShift32)),
+  def : Pat<(frag GR8:$src1, (shiftMask32 CL)),
             (!cast<Instruction>(name # "8rCL") GR8:$src1)>;
-  def : Pat<(frag GR16:$src1, (and CL, immShift32)),
+  def : Pat<(frag GR16:$src1, (shiftMask32 CL)),
             (!cast<Instruction>(name # "16rCL") GR16:$src1)>;
-  def : Pat<(frag GR32:$src1, (and CL, immShift32)),
+  def : Pat<(frag GR32:$src1, (shiftMask32 CL)),
             (!cast<Instruction>(name # "32rCL") GR32:$src1)>;
-  def : Pat<(store (frag (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
+  def : Pat<(store (frag (loadi8 addr:$dst), (shiftMask32 CL)), addr:$dst),
             (!cast<Instruction>(name # "8mCL") addr:$dst)>;
-  def : Pat<(store (frag (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
+  def : Pat<(store (frag (loadi16 addr:$dst), (shiftMask32 CL)), addr:$dst),
             (!cast<Instruction>(name # "16mCL") addr:$dst)>;
-  def : Pat<(store (frag (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
+  def : Pat<(store (frag (loadi32 addr:$dst), (shiftMask32 CL)), addr:$dst),
             (!cast<Instruction>(name # "32mCL") addr:$dst)>;
 
   // (shift x (and y, 63)) ==> (shift x, y)
-  def : Pat<(frag GR64:$src1, (and CL, immShift64)),
+  def : Pat<(frag GR64:$src1, (shiftMask64 CL)),
             (!cast<Instruction>(name # "64rCL") GR64:$src1)>;
-  def : Pat<(store (frag (loadi64 addr:$dst), (and CL, immShift64)), addr:$dst),
+  def : Pat<(store (frag (loadi64 addr:$dst), (shiftMask64 CL)), addr:$dst),
             (!cast<Instruction>(name # "64mCL") addr:$dst)>;
 }
 
@@ -1763,23 +1808,23 @@ defm : MaskedShiftAmountPats<sra, "SAR">;
 // not tracking flags for these nodes.
 multiclass MaskedRotateAmountPats<SDNode frag, string name> {
   // (rot x (and y, BitWidth - 1)) ==> (rot x, y)
-  def : Pat<(frag GR8:$src1, (and CL, immShift8)),
+  def : Pat<(frag GR8:$src1, (shiftMask8 CL)),
   (!cast<Instruction>(name # "8rCL") GR8:$src1)>;
-  def : Pat<(frag GR16:$src1, (and CL, immShift16)),
+  def : Pat<(frag GR16:$src1, (shiftMask16 CL)),
   (!cast<Instruction>(name # "16rCL") GR16:$src1)>;
-  def : Pat<(frag GR32:$src1, (and CL, immShift32)),
+  def : Pat<(frag GR32:$src1, (shiftMask32 CL)),
   (!cast<Instruction>(name # "32rCL") GR32:$src1)>;
-  def : Pat<(store (frag (loadi8 addr:$dst), (and CL, immShift8)), addr:$dst),
+  def : Pat<(store (frag (loadi8 addr:$dst), (shiftMask8 CL)), addr:$dst),
   (!cast<Instruction>(name # "8mCL") addr:$dst)>;
-  def : Pat<(store (frag (loadi16 addr:$dst), (and CL, immShift16)), addr:$dst),
+  def : Pat<(store (frag (loadi16 addr:$dst), (shiftMask16 CL)), addr:$dst),
   (!cast<Instruction>(name # "16mCL") addr:$dst)>;
-  def : Pat<(store (frag (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
+  def : Pat<(store (frag (loadi32 addr:$dst), (shiftMask32 CL)), addr:$dst),
   (!cast<Instruction>(name # "32mCL") addr:$dst)>;
 
   // (rot x (and y, 63)) ==> (rot x, y)
-  def : Pat<(frag GR64:$src1, (and CL, immShift64)),
+  def : Pat<(frag GR64:$src1, (shiftMask64 CL)),
   (!cast<Instruction>(name # "64rCL") GR64:$src1)>;
-  def : Pat<(store (frag (loadi64 addr:$dst), (and CL, immShift64)), addr:$dst),
+  def : Pat<(store (frag (loadi64 addr:$dst), (shiftMask64 CL)), addr:$dst),
   (!cast<Instruction>(name # "64mCL") addr:$dst)>;
 }
 
@@ -1790,13 +1835,13 @@ defm : MaskedRotateAmountPats<rotr, "ROR">;
 // Double shift amount is implicitly masked.
 multiclass MaskedDoubleShiftAmountPats<SDNode frag, string name> {
   // (shift x (and y, 31)) ==> (shift x, y)
-  def : Pat<(frag GR16:$src1, GR16:$src2, (and CL, immShift32)),
+  def : Pat<(frag GR16:$src1, GR16:$src2, (shiftMask32 CL)),
             (!cast<Instruction>(name # "16rrCL") GR16:$src1, GR16:$src2)>;
-  def : Pat<(frag GR32:$src1, GR32:$src2, (and CL, immShift32)),
+  def : Pat<(frag GR32:$src1, GR32:$src2, (shiftMask32 CL)),
             (!cast<Instruction>(name # "32rrCL") GR32:$src1, GR32:$src2)>;
 
   // (shift x (and y, 63)) ==> (shift x, y)
-  def : Pat<(frag GR64:$src1, GR64:$src2, (and CL, immShift64)),
+  def : Pat<(frag GR64:$src1, GR64:$src2, (shiftMask32 CL)),
             (!cast<Instruction>(name # "64rrCL") GR64:$src1, GR64:$src2)>;
 }
 
@@ -1805,57 +1850,57 @@ defm : MaskedDoubleShiftAmountPats<X86shrd, "SHRD">;
 
 let Predicates = [HasBMI2] in {
   let AddedComplexity = 1 in {
-    def : Pat<(sra GR32:$src1, (and GR8:$src2, immShift32)),
+    def : Pat<(sra GR32:$src1, (shiftMask32 GR8:$src2)),
               (SARX32rr GR32:$src1,
                         (INSERT_SUBREG
                           (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-    def : Pat<(sra GR64:$src1, (and GR8:$src2, immShift64)),
+    def : Pat<(sra GR64:$src1, (shiftMask64 GR8:$src2)),
               (SARX64rr GR64:$src1,
                         (INSERT_SUBREG
                           (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
 
-    def : Pat<(srl GR32:$src1, (and GR8:$src2, immShift32)),
+    def : Pat<(srl GR32:$src1, (shiftMask32 GR8:$src2)),
               (SHRX32rr GR32:$src1,
                         (INSERT_SUBREG
                           (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-    def : Pat<(srl GR64:$src1, (and GR8:$src2, immShift64)),
+    def : Pat<(srl GR64:$src1, (shiftMask64 GR8:$src2)),
               (SHRX64rr GR64:$src1,
                         (INSERT_SUBREG
                           (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
 
-    def : Pat<(shl GR32:$src1, (and GR8:$src2, immShift32)),
+    def : Pat<(shl GR32:$src1, (shiftMask32 GR8:$src2)),
               (SHLX32rr GR32:$src1,
                         (INSERT_SUBREG
                           (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-    def : Pat<(shl GR64:$src1, (and GR8:$src2, immShift64)),
+    def : Pat<(shl GR64:$src1, (shiftMask64 GR8:$src2)),
               (SHLX64rr GR64:$src1,
                         (INSERT_SUBREG
                           (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
   }
 
-  def : Pat<(sra (loadi32 addr:$src1), (and GR8:$src2, immShift32)),
+  def : Pat<(sra (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
             (SARX32rm addr:$src1,
                       (INSERT_SUBREG
                         (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-  def : Pat<(sra (loadi64 addr:$src1), (and GR8:$src2, immShift64)),
+  def : Pat<(sra (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
             (SARX64rm addr:$src1,
                       (INSERT_SUBREG
                         (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
 
-  def : Pat<(srl (loadi32 addr:$src1), (and GR8:$src2, immShift32)),
+  def : Pat<(srl (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
             (SHRX32rm addr:$src1,
                       (INSERT_SUBREG
                         (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-  def : Pat<(srl (loadi64 addr:$src1), (and GR8:$src2, immShift64)),
+  def : Pat<(srl (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
             (SHRX64rm addr:$src1,
                       (INSERT_SUBREG
                         (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
 
-  def : Pat<(shl (loadi32 addr:$src1), (and GR8:$src2, immShift32)),
+  def : Pat<(shl (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
             (SHLX32rm addr:$src1,
                       (INSERT_SUBREG
                         (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-  def : Pat<(shl (loadi64 addr:$src1), (and GR8:$src2, immShift64)),
+  def : Pat<(shl (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
             (SHLX64rm addr:$src1,
                       (INSERT_SUBREG
                         (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
@@ -1864,7 +1909,7 @@ let Predicates = [HasBMI2] in {
 // Use BTR/BTS/BTC for clearing/setting/toggling a bit in a variable location.
 multiclass one_bit_patterns<RegisterClass RC, ValueType VT, Instruction BTR,
                             Instruction BTS, Instruction BTC,
-                            ImmLeaf ImmShift> {
+                            PatFrag ShiftMask> {
   def : Pat<(and RC:$src1, (rotl -2, GR8:$src2)),
             (BTR RC:$src1,
                  (INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
@@ -1876,20 +1921,20 @@ multiclass one_bit_patterns<RegisterClass RC, ValueType VT, Instruction BTR,
                  (INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
 
   // Similar to above, but removing unneeded masking of the shift amount.
-  def : Pat<(and RC:$src1, (rotl -2, (and GR8:$src2, ImmShift))),
+  def : Pat<(and RC:$src1, (rotl -2, (ShiftMask GR8:$src2))),
             (BTR RC:$src1,
                  (INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-  def : Pat<(or RC:$src1, (shl 1, (and GR8:$src2, ImmShift))),
+  def : Pat<(or RC:$src1, (shl 1, (ShiftMask GR8:$src2))),
             (BTS RC:$src1,
                 (INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-  def : Pat<(xor RC:$src1, (shl 1, (and GR8:$src2, ImmShift))),
+  def : Pat<(xor RC:$src1, (shl 1, (ShiftMask GR8:$src2))),
             (BTC RC:$src1,
                 (INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
 }
 
-defm : one_bit_patterns<GR16, i16, BTR16rr, BTS16rr, BTC16rr, immShift16>;
-defm : one_bit_patterns<GR32, i32, BTR32rr, BTS32rr, BTC32rr, immShift32>;
-defm : one_bit_patterns<GR64, i64, BTR64rr, BTS64rr, BTC64rr, immShift64>;
+defm : one_bit_patterns<GR16, i16, BTR16rr, BTS16rr, BTC16rr, shiftMask16>;
+defm : one_bit_patterns<GR32, i32, BTR32rr, BTS32rr, BTC32rr, shiftMask32>;
+defm : one_bit_patterns<GR64, i64, BTR64rr, BTS64rr, BTC64rr, shiftMask64>;
 
 
 // (anyext (setcc_carry)) -> (setcc_carry)
@@ -1974,8 +2019,6 @@ def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>;
 // sub reg, relocImm
 def : Pat<(X86sub_flag GR64:$src1, i64relocImmSExt8_su:$src2),
           (SUB64ri8 GR64:$src1, i64relocImmSExt8_su:$src2)>;
-def : Pat<(X86sub_flag GR64:$src1, i64relocImmSExt32_su:$src2),
-          (SUB64ri32 GR64:$src1, i64relocImmSExt32_su:$src2)>;
 
 // mul reg, reg
 def : Pat<(mul GR16:$src1, GR16:$src2),
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index a7c7aaab2285..f82e80965b7c 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -1,9 +1,8 @@
 //===-- X86InstrControl.td - Control Flow Instructions -----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -71,35 +70,40 @@ let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
 }
 
 // Conditional Branches.
-let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump] in {
-  multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
-    def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm,
-                       [(X86brcond bb:$dst, Cond, EFLAGS)]>;
-    let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
-      def _2 : Ii16PCRel<opc4, RawFrm, (outs), (ins brtarget16:$dst), asm,
-                         []>, OpSize16, TB;
-      def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget32:$dst), asm,
-                         []>, TB, OpSize32;
-    }
+let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump],
+    isCodeGenOnly = 1, ForceDisassemble = 1 in {
+  def JCC_1 : Ii8PCRel <0x70, AddCCFrm, (outs),
+                        (ins brtarget8:$dst, ccode:$cond),
+                        "j${cond}\t$dst",
+                        [(X86brcond bb:$dst, imm:$cond, EFLAGS)]>;
+  let hasSideEffects = 0 in {
+    def JCC_2 : Ii16PCRel<0x80, AddCCFrm, (outs),
+                          (ins brtarget16:$dst, ccode:$cond),
+                          "j${cond}\t$dst",
+                          []>, OpSize16, TB;
+    def JCC_4 : Ii32PCRel<0x80, AddCCFrm, (outs),
+                          (ins brtarget32:$dst, ccode:$cond),
+                          "j${cond}\t$dst",
+                          []>, TB, OpSize32;
   }
 }
 
-defm JO  : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
-defm JNO : ICBr<0x71, 0x81, "jno\t$dst", X86_COND_NO>;
-defm JB  : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
-defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
-defm JE  : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
-defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
-defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
-defm JA  : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
-defm JS  : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
-defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
-defm JP  : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
-defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
-defm JL  : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
-defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
-defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
-defm JG  : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
+def : InstAlias<"jo\t$dst",  (JCC_1 brtarget8:$dst,  0), 0>;
+def : InstAlias<"jno\t$dst", (JCC_1 brtarget8:$dst,  1), 0>;
+def : InstAlias<"jb\t$dst",  (JCC_1 brtarget8:$dst,  2), 0>;
+def : InstAlias<"jae\t$dst", (JCC_1 brtarget8:$dst,  3), 0>;
+def : InstAlias<"je\t$dst",  (JCC_1 brtarget8:$dst,  4), 0>;
+def : InstAlias<"jne\t$dst", (JCC_1 brtarget8:$dst,  5), 0>;
+def : InstAlias<"jbe\t$dst", (JCC_1 brtarget8:$dst,  6), 0>;
+def : InstAlias<"ja\t$dst",  (JCC_1 brtarget8:$dst,  7), 0>;
+def : InstAlias<"js\t$dst",  (JCC_1 brtarget8:$dst,  8), 0>;
+def : InstAlias<"jns\t$dst", (JCC_1 brtarget8:$dst,  9), 0>;
+def : InstAlias<"jp\t$dst",  (JCC_1 brtarget8:$dst, 10), 0>;
+def : InstAlias<"jnp\t$dst", (JCC_1 brtarget8:$dst, 11), 0>;
+def : InstAlias<"jl\t$dst",  (JCC_1 brtarget8:$dst, 12), 0>;
+def : InstAlias<"jge\t$dst", (JCC_1 brtarget8:$dst, 13), 0>;
+def : InstAlias<"jle\t$dst", (JCC_1 brtarget8:$dst, 14), 0>;
+def : InstAlias<"jg\t$dst",  (JCC_1 brtarget8:$dst, 15), 0>;
 
 // jcx/jecx/jrcx instructions.
 let isBranch = 1, isTerminator = 1, hasSideEffects = 0, SchedRW = [WriteJump] in {
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index c24d6d5b8df1..06e605fe5db2 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -1,9 +1,8 @@
 //===-- X86InstrExtension.td - Sign and Zero Extensions ----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -29,11 +28,11 @@ let hasSideEffects = 0 in {
 
   let Defs = [RAX], Uses = [EAX] in // RAX = signext(EAX)
   def CDQE : RI<0x98, RawFrm, (outs), (ins),
-               "{cltq|cdqe}", []>, Sched<[WriteALU]>;
+               "{cltq|cdqe}", []>, Sched<[WriteALU]>, Requires<[In64BitMode]>;
 
   let Defs = [RAX,RDX], Uses = [RAX] in // RDX:RAX = signext(RAX)
   def CQO  : RI<0x99, RawFrm, (outs), (ins),
-                "{cqto|cqo}", []>, Sched<[WriteALU]>;
+                "{cqto|cqo}", []>, Sched<[WriteALU]>, Requires<[In64BitMode]>;
 }
 
 // Sign/Zero extenders
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index 1a8e529431af..0cca71bdc431 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -1,9 +1,8 @@
 //===-- X86InstrFMA.td - FMA Instruction Set ---------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -237,7 +236,8 @@ multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr,
                 Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
 }
 
-let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
+let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1,
+    hasSideEffects = 0 in
 multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
                        string OpStr, string PackTy, string Suff,
                        SDNode OpNode, RegisterClass RC,
@@ -263,8 +263,7 @@ multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
 // the lowest element of the FMA*_Int instruction. Even though such analysis
 // may be not implemented yet we allow the routines doing the actual commute
 // transformation to decide if one or another instruction is commutable or not.
-let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1,
-    hasSideEffects = 0 in
+let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
 multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
                         Operand memopr, RegisterClass RC,
                         X86FoldableSchedWrite sched> {
diff --git a/lib/Target/X86/X86InstrFMA3Info.cpp b/lib/Target/X86/X86InstrFMA3Info.cpp
index def732a2dd00..25bbdddb7a21 100644
--- a/lib/Target/X86/X86InstrFMA3Info.cpp
+++ b/lib/Target/X86/X86InstrFMA3Info.cpp
@@ -1,9 +1,8 @@
 //===-- X86InstrFMA3Info.cpp - X86 FMA3 Instruction Information -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -57,7 +56,7 @@ using namespace llvm;
 
 #define FMA3GROUP_SCALAR(Name, Attrs) \
   FMA3GROUP_SCALAR_WIDTHS(Name, SD, Attrs) \
-  FMA3GROUP_SCALAR_WIDTHS(Name, SS, Attrs) \
+  FMA3GROUP_SCALAR_WIDTHS(Name, SS, Attrs)
 
 #define FMA3GROUP_FULL(Name, Attrs) \
   FMA3GROUP_PACKED(Name, Attrs) \
@@ -159,11 +158,9 @@ const X86InstrFMA3Group *llvm::getFMA3Group(unsigned Opcode, uint64_t TSFlags) {
   // FMA 231 instructions have an opcode of 0xB6-0xBF
   unsigned FormIndex = ((BaseOpcode - 0x90) >> 4) & 0x3;
 
-  auto I = std::lower_bound(Table.begin(), Table.end(), Opcode,
-                            [FormIndex](const X86InstrFMA3Group &Group,
-                                        unsigned Opcode) {
-                              return Group.Opcodes[FormIndex] < Opcode;
-                            });
+  auto I = partition_point(Table, [=](const X86InstrFMA3Group &Group) {
+    return Group.Opcodes[FormIndex] < Opcode;
+  });
   assert(I != Table.end() && I->Opcodes[FormIndex] == Opcode &&
          "Couldn't find FMA3 opcode!");
   return I;
diff --git a/lib/Target/X86/X86InstrFMA3Info.h b/lib/Target/X86/X86InstrFMA3Info.h
index 6eec1db98bf8..7fa6f5917862 100644
--- a/lib/Target/X86/X86InstrFMA3Info.h
+++ b/lib/Target/X86/X86InstrFMA3Info.h
@@ -1,9 +1,8 @@
 //===- X86InstrFMA3Info.h - X86 FMA3 Instruction Information ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 5912a3199613..2ec6d50f9702 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -1,9 +1,8 @@
 //===- X86InstrFPStack.td - FPU Instruction Set ------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,18 +16,13 @@
 // FPStack specific DAG Nodes.
 //===----------------------------------------------------------------------===//
 
-def SDTX86FpGet2    : SDTypeProfile<2, 0, [SDTCisVT<0, f80>,
-                                           SDTCisVT<1, f80>]>;
-def SDTX86Fld       : SDTypeProfile<1, 2, [SDTCisFP<0>,
-                                           SDTCisPtrTy<1>,
-                                           SDTCisVT<2, OtherVT>]>;
-def SDTX86Fst       : SDTypeProfile<0, 3, [SDTCisFP<0>,
-                                           SDTCisPtrTy<1>,
-                                           SDTCisVT<2, OtherVT>]>;
-def SDTX86Fild      : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
-                                           SDTCisVT<2, OtherVT>]>;
+def SDTX86Fld       : SDTypeProfile<1, 1, [SDTCisFP<0>,
+                                           SDTCisPtrTy<1>]>;
+def SDTX86Fst       : SDTypeProfile<0, 2, [SDTCisFP<0>,
+                                           SDTCisPtrTy<1>]>;
+def SDTX86Fild      : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
+def SDTX86Fist      : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
 def SDTX86Fnstsw    : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
-def SDTX86FpToIMem  : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
 
 def SDTX86CwdStore  : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
 
@@ -42,17 +36,71 @@ def X86fild         : SDNode<"X86ISD::FILD", SDTX86Fild,
 def X86fildflag     : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
                              [SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
                               SDNPMemOperand]>;
+def X86fist         : SDNode<"X86ISD::FIST", SDTX86Fist,
+                             [SDNPHasChain, SDNPInGlue, SDNPMayStore,
+                              SDNPMemOperand]>;
 def X86fp_stsw      : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>;
-def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
-                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
-                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
-                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst,
+                          [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m",          SDTX86CwdStore,
                              [SDNPHasChain, SDNPMayStore, SDNPSideEffect,
                               SDNPMemOperand]>;
 
+def X86fstf32 : PatFrag<(ops node:$val, node:$ptr),
+                        (X86fst node:$val, node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f32;
+}]>;
+def X86fstf64 : PatFrag<(ops node:$val, node:$ptr),
+                        (X86fst node:$val, node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f64;
+}]>;
+def X86fstf80 : PatFrag<(ops node:$val, node:$ptr),
+                        (X86fst node:$val, node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f80;
+}]>;
+
+def X86fldf32 : PatFrag<(ops node:$ptr), (X86fld node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f32;
+}]>;
+def X86fldf64 : PatFrag<(ops node:$ptr), (X86fld node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f64;
+}]>;
+def X86fldf80 : PatFrag<(ops node:$ptr), (X86fld node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f80;
+}]>;
+
+def X86fild16 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def X86fild32 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def X86fild64 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+def X86fildflag64 : PatFrag<(ops node:$ptr), (X86fildflag node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+def X86fist64 : PatFrag<(ops node:$val, node:$ptr),
+                        (X86fist node:$val, node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+def X86fp_to_i16mem : PatFrag<(ops node:$val, node:$ptr),
+                              (X86fp_to_mem node:$val, node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def X86fp_to_i32mem : PatFrag<(ops node:$val, node:$ptr),
+                              (X86fp_to_mem node:$val, node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def X86fp_to_i64mem : PatFrag<(ops node:$val, node:$ptr),
+                              (X86fp_to_mem node:$val, node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
 //===----------------------------------------------------------------------===//
 // FPStack pattern fragments
 //===----------------------------------------------------------------------===//
@@ -74,7 +122,9 @@ def fpimmneg1 : FPImmLeaf<fAny, [{
 }]>;
 
 // Some 'special' instructions - expanded after instruction selection.
-let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
+// Clobbers EFLAGS due to OR instruction used internally.
+// FIXME: Can we model this in SelectionDAG?
+let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [EFLAGS] in {
   def FP32_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP32:$src),
                               [(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
   def FP32_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP32:$src),
@@ -139,7 +189,6 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
 // These instructions cannot address 80-bit memory.
 multiclass FPBinary<SDNode OpNode, Format fp, string asmstring,
                     bit Forward = 1> {
-let mayLoad = 1, hasSideEffects = 1 in {
 // ST(0) = ST(0) + [mem]
 def _Fp32m  : FpIf32<(outs RFP32:$dst),
                      (ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
@@ -176,8 +225,10 @@ def _Fp80m64: FpI_<(outs RFP80:$dst),
                         (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2)))),
                        (set RFP80:$dst,
                         (OpNode (f80 (extloadf64 addr:$src2)), RFP80:$src1)))]>;
+let mayLoad = 1 in
 def _F32m  : FPI<0xD8, fp, (outs), (ins f32mem:$src),
                  !strconcat("f", asmstring, "{s}\t$src")>;
+let mayLoad = 1 in
 def _F64m  : FPI<0xDC, fp, (outs), (ins f64mem:$src),
                  !strconcat("f", asmstring, "{l}\t$src")>;
 // ST(0) = ST(0) + [memint]
@@ -185,52 +236,53 @@ def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2),
                        OneArgFPRW,
                        [!if(Forward,
                             (set RFP32:$dst,
-                             (OpNode RFP32:$src1, (X86fild addr:$src2, i16))),
+                             (OpNode RFP32:$src1, (X86fild16 addr:$src2))),
                             (set RFP32:$dst,
-                             (OpNode (X86fild addr:$src2, i16), RFP32:$src1)))]>;
+                             (OpNode (X86fild16 addr:$src2), RFP32:$src1)))]>;
 def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2),
                        OneArgFPRW,
                        [!if(Forward,
                             (set RFP32:$dst,
-                             (OpNode RFP32:$src1, (X86fild addr:$src2, i32))),
+                             (OpNode RFP32:$src1, (X86fild32 addr:$src2))),
                             (set RFP32:$dst,
-                             (OpNode (X86fild addr:$src2, i32), RFP32:$src1)))]>;
+                             (OpNode (X86fild32 addr:$src2), RFP32:$src1)))]>;
 def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2),
                        OneArgFPRW,
                        [!if(Forward,
                             (set RFP64:$dst,
-                             (OpNode RFP64:$src1, (X86fild addr:$src2, i16))),
+                             (OpNode RFP64:$src1, (X86fild16 addr:$src2))),
                             (set RFP64:$dst,
-                             (OpNode (X86fild addr:$src2, i16), RFP64:$src1)))]>;
+                             (OpNode (X86fild16 addr:$src2), RFP64:$src1)))]>;
 def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2),
                        OneArgFPRW,
                        [!if(Forward,
                             (set RFP64:$dst,
-                             (OpNode RFP64:$src1, (X86fild addr:$src2, i32))),
+                             (OpNode RFP64:$src1, (X86fild32 addr:$src2))),
                             (set RFP64:$dst,
-                             (OpNode (X86fild addr:$src2, i32), RFP64:$src1)))]>;
+                             (OpNode (X86fild32 addr:$src2), RFP64:$src1)))]>;
 def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2),
                      OneArgFPRW,
                      [!if(Forward,
                           (set RFP80:$dst,
-                           (OpNode RFP80:$src1, (X86fild addr:$src2, i16))),
+                           (OpNode RFP80:$src1, (X86fild16 addr:$src2))),
                           (set RFP80:$dst,
-                           (OpNode (X86fild addr:$src2, i16), RFP80:$src1)))]>;
+                           (OpNode (X86fild16 addr:$src2), RFP80:$src1)))]>;
 def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
                      OneArgFPRW,
                      [!if(Forward,
                           (set RFP80:$dst,
-                           (OpNode RFP80:$src1, (X86fild addr:$src2, i32))),
+                           (OpNode RFP80:$src1, (X86fild32 addr:$src2))),
                           (set RFP80:$dst,
-                           (OpNode (X86fild addr:$src2, i32), RFP80:$src1)))]>;
+                           (OpNode (X86fild32 addr:$src2), RFP80:$src1)))]>;
+let mayLoad = 1 in
 def _FI16m  : FPI<0xDE, fp, (outs), (ins i16mem:$src),
                   !strconcat("fi", asmstring, "{s}\t$src")>;
+let mayLoad = 1 in
 def _FI32m  : FPI<0xDA, fp, (outs), (ins i32mem:$src),
                   !strconcat("fi", asmstring, "{l}\t$src")>;
-} // mayLoad = 1, hasSideEffects = 1
 }
 
-let Defs = [FPSW] in {
+let Defs = [FPSW], Uses = [FPCW] in {
 // FPBinary_rr just defines pseudo-instructions, no need to set a scheduling
 // resources.
 let hasNoSchedulingInfo = 1 in {
@@ -258,42 +310,42 @@ defm DIVR: FPBinary<fdiv, MRM7m, "divr", 0>;
 } // Defs = [FPSW]
 
 class FPST0rInst<Format fp, string asm>
-  : FPI<0xD8, fp, (outs), (ins RST:$op), asm>;
+  : FPI<0xD8, fp, (outs), (ins RSTi:$op), asm>;
 class FPrST0Inst<Format fp, string asm>
-  : FPI<0xDC, fp, (outs), (ins RST:$op), asm>;
+  : FPI<0xDC, fp, (outs), (ins RSTi:$op), asm>;
 class FPrST0PInst<Format fp, string asm>
-  : FPI<0xDE, fp, (outs), (ins RST:$op), asm>;
+  : FPI<0xDE, fp, (outs), (ins RSTi:$op), asm>;
 
 // NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
 // of some of the 'reverse' forms of the fsub and fdiv instructions.  As such,
 // we have to put some 'r's in and take them out of weird places.
-let SchedRW = [WriteFAdd] in {
-def ADD_FST0r   : FPST0rInst <MRM0r, "fadd\t$op">;
-def ADD_FrST0   : FPrST0Inst <MRM0r, "fadd\t{%st(0), $op|$op, st(0)}">;
-def ADD_FPrST0  : FPrST0PInst<MRM0r, "faddp\t$op">;
-def SUBR_FST0r  : FPST0rInst <MRM5r, "fsubr\t$op">;
-def SUB_FrST0   : FPrST0Inst <MRM5r, "fsub{r}\t{%st(0), $op|$op, st(0)}">;
-def SUB_FPrST0  : FPrST0PInst<MRM5r, "fsub{r}p\t$op">;
-def SUB_FST0r   : FPST0rInst <MRM4r, "fsub\t$op">;
-def SUBR_FrST0  : FPrST0Inst <MRM4r, "fsub{|r}\t{%st(0), $op|$op, st(0)}">;
-def SUBR_FPrST0 : FPrST0PInst<MRM4r, "fsub{|r}p\t$op">;
+let SchedRW = [WriteFAdd], Defs = [FPSW], Uses = [FPCW] in {
+def ADD_FST0r   : FPST0rInst <MRM0r, "fadd\t{$op, %st|st, $op}">;
+def ADD_FrST0   : FPrST0Inst <MRM0r, "fadd\t{%st, $op|$op, st}">;
+def ADD_FPrST0  : FPrST0PInst<MRM0r, "faddp\t{%st, $op|$op, st}">;
+def SUBR_FST0r  : FPST0rInst <MRM5r, "fsubr\t{$op, %st|st, $op}">;
+def SUB_FrST0   : FPrST0Inst <MRM5r, "fsub{r}\t{%st, $op|$op, st}">;
+def SUB_FPrST0  : FPrST0PInst<MRM5r, "fsub{r}p\t{%st, $op|$op, st}">;
+def SUB_FST0r   : FPST0rInst <MRM4r, "fsub\t{$op, %st|st, $op}">;
+def SUBR_FrST0  : FPrST0Inst <MRM4r, "fsub{|r}\t{%st, $op|$op, st}">;
+def SUBR_FPrST0 : FPrST0PInst<MRM4r, "fsub{|r}p\t{%st, $op|$op, st}">;
 } // SchedRW
-let SchedRW = [WriteFCom] in {
+let SchedRW = [WriteFCom], Defs = [FPSW], Uses = [FPCW] in {
 def COM_FST0r   : FPST0rInst <MRM2r, "fcom\t$op">;
 def COMP_FST0r  : FPST0rInst <MRM3r, "fcomp\t$op">;
 } // SchedRW
-let SchedRW = [WriteFMul] in {
-def MUL_FST0r   : FPST0rInst <MRM1r, "fmul\t$op">;
-def MUL_FrST0   : FPrST0Inst <MRM1r, "fmul\t{%st(0), $op|$op, st(0)}">;
-def MUL_FPrST0  : FPrST0PInst<MRM1r, "fmulp\t$op">;
+let SchedRW = [WriteFMul], Defs = [FPSW], Uses = [FPCW] in {
+def MUL_FST0r   : FPST0rInst <MRM1r, "fmul\t{$op, %st|st, $op}">;
+def MUL_FrST0   : FPrST0Inst <MRM1r, "fmul\t{%st, $op|$op, st}">;
+def MUL_FPrST0  : FPrST0PInst<MRM1r, "fmulp\t{%st, $op|$op, st}">;
 } // SchedRW
-let SchedRW = [WriteFDiv] in {
-def DIVR_FST0r  : FPST0rInst <MRM7r, "fdivr\t$op">;
-def DIV_FrST0   : FPrST0Inst <MRM7r, "fdiv{r}\t{%st(0), $op|$op, st(0)}">;
-def DIV_FPrST0  : FPrST0PInst<MRM7r, "fdiv{r}p\t$op">;
-def DIV_FST0r   : FPST0rInst <MRM6r, "fdiv\t$op">;
-def DIVR_FrST0  : FPrST0Inst <MRM6r, "fdiv{|r}\t{%st(0), $op|$op, st(0)}">;
-def DIVR_FPrST0 : FPrST0PInst<MRM6r, "fdiv{|r}p\t$op">;
+let SchedRW = [WriteFDiv], Defs = [FPSW], Uses = [FPCW] in {
+def DIVR_FST0r  : FPST0rInst <MRM7r, "fdivr\t{$op, %st|st, $op}">;
+def DIV_FrST0   : FPrST0Inst <MRM7r, "fdiv{r}\t{%st, $op|$op, st}">;
+def DIV_FPrST0  : FPrST0PInst<MRM7r, "fdiv{r}p\t{%st, $op|$op, st}">;
+def DIV_FST0r   : FPST0rInst <MRM6r, "fdiv\t{$op, %st|st, $op}">;
+def DIVR_FrST0  : FPrST0Inst <MRM6r, "fdiv{|r}\t{%st, $op|$op, st}">;
+def DIVR_FPrST0 : FPrST0PInst<MRM6r, "fdiv{|r}p\t{%st, $op|$op, st}">;
 } // SchedRW
 
 // Unary operations.
@@ -307,7 +359,7 @@ def _Fp80  : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW,
 def _F     : FPI<0xD9, fp, (outs), (ins), asmstring>;
 }
 
-let Defs = [FPSW] in {
+let Defs = [FPSW], Uses = [FPCW] in {
 
 let SchedRW = [WriteFSign] in {
 defm CHS : FPUnary<fneg, MRM_E0, "fchs">;
@@ -335,7 +387,7 @@ def TST_F  : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">;
 
 // Versions of FP instructions that take a single memory operand.  Added for the
 //   disassembler; remove as they are included with patterns elsewhere.
-let SchedRW = [WriteFComLd] in {
+let SchedRW = [WriteFComLd], Defs = [FPSW], Uses = [FPCW] in {
 def FCOM32m  : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
 def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
 
@@ -398,32 +450,31 @@ defm CMOVNP : FPCMov<X86_COND_NP>;
 
 let Predicates = [HasCMov] in {
 // These are not factored because there's no clean way to pass DA/DB.
-def CMOVB_F  : FPI<0xDA, MRM0r, (outs), (ins RST:$op),
-                  "fcmovb\t{$op, %st(0)|st(0), $op}">;
-def CMOVBE_F : FPI<0xDA, MRM2r, (outs), (ins RST:$op),
-                  "fcmovbe\t{$op, %st(0)|st(0), $op}">;
-def CMOVE_F  : FPI<0xDA, MRM1r, (outs), (ins RST:$op),
-                  "fcmove\t{$op, %st(0)|st(0), $op}">;
-def CMOVP_F  : FPI<0xDA, MRM3r, (outs), (ins RST:$op),
-                  "fcmovu\t{$op, %st(0)|st(0), $op}">;
-def CMOVNB_F : FPI<0xDB, MRM0r, (outs), (ins RST:$op),
-                  "fcmovnb\t{$op, %st(0)|st(0), $op}">;
-def CMOVNBE_F: FPI<0xDB, MRM2r, (outs), (ins RST:$op),
-                  "fcmovnbe\t{$op, %st(0)|st(0), $op}">;
-def CMOVNE_F : FPI<0xDB, MRM1r, (outs), (ins RST:$op),
-                  "fcmovne\t{$op, %st(0)|st(0), $op}">;
-def CMOVNP_F : FPI<0xDB, MRM3r, (outs), (ins RST:$op),
-                  "fcmovnu\t{$op, %st(0)|st(0), $op}">;
+def CMOVB_F  : FPI<0xDA, MRM0r, (outs), (ins RSTi:$op),
+                  "fcmovb\t{$op, %st|st, $op}">;
+def CMOVBE_F : FPI<0xDA, MRM2r, (outs), (ins RSTi:$op),
+                  "fcmovbe\t{$op, %st|st, $op}">;
+def CMOVE_F  : FPI<0xDA, MRM1r, (outs), (ins RSTi:$op),
+                  "fcmove\t{$op, %st|st, $op}">;
+def CMOVP_F  : FPI<0xDA, MRM3r, (outs), (ins RSTi:$op),
+                  "fcmovu\t{$op, %st|st, $op}">;
+def CMOVNB_F : FPI<0xDB, MRM0r, (outs), (ins RSTi:$op),
+                  "fcmovnb\t{$op, %st|st, $op}">;
+def CMOVNBE_F: FPI<0xDB, MRM2r, (outs), (ins RSTi:$op),
+                  "fcmovnbe\t{$op, %st|st, $op}">;
+def CMOVNE_F : FPI<0xDB, MRM1r, (outs), (ins RSTi:$op),
+                  "fcmovne\t{$op, %st|st, $op}">;
+def CMOVNP_F : FPI<0xDB, MRM3r, (outs), (ins RSTi:$op),
+                  "fcmovnu\t{$op, %st|st, $op}">;
 } // Predicates = [HasCMov]
 } // SchedRW
 
 // Floating point loads & stores.
-let SchedRW = [WriteLoad] in {
+let SchedRW = [WriteLoad], Uses = [FPCW] in {
 let canFoldAsLoad = 1 in {
 def LD_Fp32m   : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
                   [(set RFP32:$dst, (loadf32 addr:$src))]>;
-let isReMaterializable = 1 in
-  def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
+def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
                   [(set RFP64:$dst, (loadf64 addr:$src))]>;
 def LD_Fp80m   : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP,
                   [(set RFP80:$dst, (loadf80 addr:$src))]>;
@@ -435,26 +486,26 @@ def LD_Fp64m80 : FpI_<(outs RFP80:$dst), (ins f64mem:$src), ZeroArgFP,
 def LD_Fp32m80 : FpI_<(outs RFP80:$dst), (ins f32mem:$src), ZeroArgFP,
                   [(set RFP80:$dst, (f80 (extloadf32 addr:$src)))]>;
 def ILD_Fp16m32: FpIf32<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP,
-                  [(set RFP32:$dst, (X86fild addr:$src, i16))]>;
+                  [(set RFP32:$dst, (X86fild16 addr:$src))]>;
 def ILD_Fp32m32: FpIf32<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP,
-                  [(set RFP32:$dst, (X86fild addr:$src, i32))]>;
+                  [(set RFP32:$dst, (X86fild32 addr:$src))]>;
 def ILD_Fp64m32: FpIf32<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP,
-                  [(set RFP32:$dst, (X86fild addr:$src, i64))]>;
+                  [(set RFP32:$dst, (X86fild64 addr:$src))]>;
 def ILD_Fp16m64: FpIf64<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP,
-                  [(set RFP64:$dst, (X86fild addr:$src, i16))]>;
+                  [(set RFP64:$dst, (X86fild16 addr:$src))]>;
 def ILD_Fp32m64: FpIf64<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP,
-                  [(set RFP64:$dst, (X86fild addr:$src, i32))]>;
+                  [(set RFP64:$dst, (X86fild32 addr:$src))]>;
 def ILD_Fp64m64: FpIf64<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP,
-                  [(set RFP64:$dst, (X86fild addr:$src, i64))]>;
+                  [(set RFP64:$dst, (X86fild64 addr:$src))]>;
 def ILD_Fp16m80: FpI_<(outs RFP80:$dst), (ins i16mem:$src), ZeroArgFP,
-                  [(set RFP80:$dst, (X86fild addr:$src, i16))]>;
+                  [(set RFP80:$dst, (X86fild16 addr:$src))]>;
 def ILD_Fp32m80: FpI_<(outs RFP80:$dst), (ins i32mem:$src), ZeroArgFP,
-                  [(set RFP80:$dst, (X86fild addr:$src, i32))]>;
+                  [(set RFP80:$dst, (X86fild32 addr:$src))]>;
 def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP,
-                  [(set RFP80:$dst, (X86fild addr:$src, i64))]>;
+                  [(set RFP80:$dst, (X86fild64 addr:$src))]>;
 } // SchedRW
 
-let SchedRW = [WriteStore] in {
+let SchedRW = [WriteStore], Uses = [FPCW] in {
 def ST_Fp32m   : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
                   [(store RFP32:$src, addr:$op)]>;
 def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
@@ -489,9 +540,9 @@ def IST_Fp16m80  : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
 def IST_Fp32m80  : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
 def IST_Fp64m80  : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
 } // mayStore
-} // SchedRW
+} // SchedRW, Uses = [FPCW]
 
-let mayLoad = 1, SchedRW = [WriteLoad] in {
+let mayLoad = 1, SchedRW = [WriteLoad], Uses = [FPCW] in {
 def LD_F32m   : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">;
 def LD_F64m   : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src">;
 def LD_F80m   : FPI<0xDB, MRM5m, (outs), (ins f80mem:$src), "fld{t}\t$src">;
@@ -499,7 +550,7 @@ def ILD_F16m  : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src">;
 def ILD_F32m  : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">;
 def ILD_F64m  : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">;
 }
-let mayStore = 1, SchedRW = [WriteStore] in {
+let mayStore = 1, SchedRW = [WriteStore], Uses = [FPCW] in {
 def ST_F32m   : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">;
 def ST_F64m   : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">;
 def ST_FP32m  : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">;
@@ -513,7 +564,7 @@ def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">;
 }
 
 // FISTTP requires SSE3 even though it's a FPStack op.
-let Predicates = [HasSSE3], SchedRW = [WriteStore] in {
+let Predicates = [HasSSE3], SchedRW = [WriteStore], Uses = [FPCW] in {
 def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
                     [(X86fp_to_i16mem RFP32:$src, addr:$op)]>;
 def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
@@ -534,22 +585,22 @@ def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
                     [(X86fp_to_i64mem RFP80:$src, addr:$op)]>;
 } // Predicates = [HasSSE3]
 
-let mayStore = 1, SchedRW = [WriteStore] in {
+let mayStore = 1, SchedRW = [WriteStore], Uses = [FPCW] in {
 def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">;
 def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">;
 def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), "fisttp{ll}\t$dst">;
 }
 
 // FP Stack manipulation instructions.
-let SchedRW = [WriteMove] in {
-def LD_Frr   : FPI<0xD9, MRM0r, (outs), (ins RST:$op), "fld\t$op">;
-def ST_Frr   : FPI<0xDD, MRM2r, (outs), (ins RST:$op), "fst\t$op">;
-def ST_FPrr  : FPI<0xDD, MRM3r, (outs), (ins RST:$op), "fstp\t$op">;
-def XCH_F    : FPI<0xD9, MRM1r, (outs), (ins RST:$op), "fxch\t$op">;
+let SchedRW = [WriteMove], Uses = [FPCW] in {
+def LD_Frr   : FPI<0xD9, MRM0r, (outs), (ins RSTi:$op), "fld\t$op">;
+def ST_Frr   : FPI<0xDD, MRM2r, (outs), (ins RSTi:$op), "fst\t$op">;
+def ST_FPrr  : FPI<0xDD, MRM3r, (outs), (ins RSTi:$op), "fstp\t$op">;
+def XCH_F    : FPI<0xD9, MRM1r, (outs), (ins RSTi:$op), "fxch\t$op">;
 }
 
 // Floating point constant loads.
-let isReMaterializable = 1, SchedRW = [WriteZero] in {
+let SchedRW = [WriteZero], Uses = [FPCW] in {
 def LD_Fp032 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
                 [(set RFP32:$dst, fpimm0)]>;
 def LD_Fp132 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
@@ -564,13 +615,13 @@ def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
                 [(set RFP80:$dst, fpimm1)]>;
 }
 
-let SchedRW = [WriteFLD0] in
+let SchedRW = [WriteFLD0], Uses = [FPCW] in
 def LD_F0 : FPI<0xD9, MRM_EE, (outs), (ins), "fldz">;
 
-let SchedRW = [WriteFLD1] in
+let SchedRW = [WriteFLD1], Uses = [FPCW] in
 def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">;
 
-let SchedRW = [WriteFLDC], Defs = [FPSW] in {
+let SchedRW = [WriteFLDC], Uses = [FPCW] in {
 def FLDL2T : I<0xD9, MRM_E9, (outs), (ins), "fldl2t", []>;
 def FLDL2E : I<0xD9, MRM_EA, (outs), (ins), "fldl2e", []>;
 def FLDPI : I<0xD9, MRM_EB, (outs), (ins), "fldpi", []>;
@@ -579,7 +630,7 @@ def FLDLN2 : I<0xD9, MRM_ED, (outs), (ins), "fldln2", []>;
 } // SchedRW
 
 // Floating point compares.
-let SchedRW = [WriteFCom] in {
+let SchedRW = [WriteFCom], Uses = [FPCW] in {
 def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
                         [(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>;
 def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
@@ -591,37 +642,37 @@ def UCOM_Fpr80 : FpI_  <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
 
 let SchedRW = [WriteFCom] in {
 // CC = ST(0) cmp ST(i)
-let Defs = [EFLAGS, FPSW] in {
-let Predicates = [FPStackf32, HasCMov] in
-def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
-                  [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
-let Predicates = [FPStackf64, HasCMov] in
-def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
-                  [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>;
-let Predicates = [HasCMov] in
+let Defs = [EFLAGS, FPSW], Uses = [FPCW] in {
+def UCOM_FpIr32: FpI_<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+                  [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>,
+                  Requires<[FPStackf32, HasCMov]>;
+def UCOM_FpIr64: FpI_<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+                  [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>,
+                  Requires<[FPStackf64, HasCMov]>;
 def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
-                  [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
+                  [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>,
+                  Requires<[HasCMov]>;
 }
 
-let Defs = [FPSW], Uses = [ST0] in {
+let Defs = [FPSW], Uses = [ST0, FPCW] in {
 def UCOM_Fr    : FPI<0xDD, MRM4r,    // FPSW = cmp ST(0) with ST(i)
-                    (outs), (ins RST:$reg), "fucom\t$reg">;
+                    (outs), (ins RSTi:$reg), "fucom\t$reg">;
 def UCOM_FPr   : FPI<0xDD, MRM5r,    // FPSW = cmp ST(0) with ST(i), pop
-                    (outs), (ins RST:$reg), "fucomp\t$reg">;
+                    (outs), (ins RSTi:$reg), "fucomp\t$reg">;
 def UCOM_FPPr  : FPI<0xDA, MRM_E9,       // cmp ST(0) with ST(1), pop, pop
                     (outs), (ins), "fucompp">;
 }
 
-let Defs = [EFLAGS, FPSW], Uses = [ST0] in {
+let Defs = [EFLAGS, FPSW], Uses = [ST0, FPCW] in {
 def UCOM_FIr   : FPI<0xDB, MRM5r,     // CC = cmp ST(0) with ST(i)
-                    (outs), (ins RST:$reg), "fucomi\t$reg">;
+                    (outs), (ins RSTi:$reg), "fucomi\t{$reg, %st|st, $reg}">;
 def UCOM_FIPr  : FPI<0xDF, MRM5r,     // CC = cmp ST(0) with ST(i), pop
-                    (outs), (ins RST:$reg), "fucompi\t$reg">;
-}
+                    (outs), (ins RSTi:$reg), "fucompi\t{$reg, %st|st, $reg}">;
 
-let Defs = [EFLAGS, FPSW] in {
-def COM_FIr : FPI<0xDB, MRM6r, (outs), (ins RST:$reg), "fcomi\t$reg">;
-def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RST:$reg), "fcompi\t$reg">;
+def COM_FIr : FPI<0xDB, MRM6r, (outs), (ins RSTi:$reg),
+                  "fcomi\t{$reg, %st|st, $reg}">;
+def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RSTi:$reg),
+                   "fcompi\t{$reg, %st|st, $reg}">;
 }
 } // SchedRW
 
@@ -631,12 +682,12 @@ let Defs = [AX], Uses = [FPSW] in
 def FNSTSW16r : I<0xDF, MRM_E0,                  // AX = fp flags
                   (outs), (ins), "fnstsw\t{%ax|ax}",
                   [(set AX, (X86fp_stsw FPSW))]>;
-let Defs = [FPSW] in
+let Defs = [FPSW], Uses = [FPCW] in
 def FNSTCW16m : I<0xD9, MRM7m,                   // [mem16] = X87 control world
                   (outs), (ins i16mem:$dst), "fnstcw\t$dst",
                   [(X86fp_cwd_get16 addr:$dst)]>;
 } // SchedRW
-let Defs = [FPSW], mayLoad = 1 in
+let Defs = [FPSW,FPCW], mayLoad = 1 in
 def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = [mem16]
                   (outs), (ins i16mem:$dst), "fldcw\t$dst", []>,
                 Sched<[WriteLoad]>;
@@ -645,8 +696,8 @@ def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = [mem16]
 let SchedRW = [WriteMicrocoded] in {
 let Defs = [FPSW] in {
 def FNINIT : I<0xDB, MRM_E3, (outs), (ins), "fninit", []>;
-def FFREE : FPI<0xDD, MRM0r, (outs), (ins RST:$reg), "ffree\t$reg">;
-def FFREEP : FPI<0xDF, MRM0r, (outs), (ins RST:$reg), "ffreep\t$reg">;
+def FFREE : FPI<0xDD, MRM0r, (outs), (ins RSTi:$reg), "ffree\t$reg">;
+def FFREEP : FPI<0xDF, MRM0r, (outs), (ins RSTi:$reg), "ffreep\t$reg">;
 
 // Clear exceptions
 def FNCLEX : I<0xDB, MRM_E2, (outs), (ins), "fnclex", []>;
@@ -695,21 +746,17 @@ def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaquemem:$src),
 //===----------------------------------------------------------------------===//
 
 // Required for RET of f32 / f64 / f80 values.
-def : Pat<(X86fld addr:$src, f32), (LD_Fp32m addr:$src)>;
-def : Pat<(X86fld addr:$src, f64), (LD_Fp64m addr:$src)>;
-def : Pat<(X86fld addr:$src, f80), (LD_Fp80m addr:$src)>;
+def : Pat<(X86fldf32 addr:$src), (LD_Fp32m addr:$src)>;
+def : Pat<(X86fldf64 addr:$src), (LD_Fp64m addr:$src)>;
+def : Pat<(X86fldf80 addr:$src), (LD_Fp80m addr:$src)>;
 
 // Required for CALL which return f32 / f64 / f80 values.
-def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>;
-def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op,
-                                                          RFP64:$src)>;
-def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>;
-def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op,
-                                                          RFP80:$src)>;
-def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op,
-                                                          RFP80:$src)>;
-def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op,
-                                                         RFP80:$src)>;
+def : Pat<(X86fstf32 RFP32:$src, addr:$op), (ST_Fp32m addr:$op, RFP32:$src)>;
+def : Pat<(X86fstf32 RFP64:$src, addr:$op), (ST_Fp64m32 addr:$op, RFP64:$src)>;
+def : Pat<(X86fstf64 RFP64:$src, addr:$op), (ST_Fp64m addr:$op, RFP64:$src)>;
+def : Pat<(X86fstf32 RFP80:$src, addr:$op), (ST_Fp80m32 addr:$op, RFP80:$src)>;
+def : Pat<(X86fstf64 RFP80:$src, addr:$op), (ST_Fp80m64 addr:$op, RFP80:$src)>;
+def : Pat<(X86fstf80 RFP80:$src, addr:$op), (ST_FpP80m addr:$op, RFP80:$src)>;
 
 // Floating point constant -0.0 and -1.0
 def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStackf32]>;
@@ -720,7 +767,11 @@ def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
 def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
 
 // Used to conv. i64 to f64 since there isn't a SSE version.
-def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
+def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m64 addr:$src)>;
+
+// Used to conv. between f80 and i64 for i64 atomic loads.
+def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m80 addr:$src)>;
+def : Pat<(X86fist64 RFP80:$src, addr:$op), (IST_Fp64m80 addr:$op, RFP80:$src)>;
 
 // FP extensions map onto simple pseudo-value conversions if they are to/from
 // the FP stack.
diff --git a/lib/Target/X86/X86InstrFoldTables.cpp b/lib/Target/X86/X86InstrFoldTables.cpp
index 7d31cfab4137..d42fec3770c7 100644
--- a/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/lib/Target/X86/X86InstrFoldTables.cpp
@@ -1,9 +1,8 @@
 //===-- X86InstrFoldTables.cpp - X86 Instruction Folding Tables -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -34,6 +33,17 @@ using namespace llvm;
 // tables that would be incorrect. The manual review process allows us a chance
 // to catch these before they become observable bugs.
 static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
+  { X86::ADD16ri8_DB, X86::ADD16mi8,   TB_NO_REVERSE },
+  { X86::ADD16ri_DB,  X86::ADD16mi,    TB_NO_REVERSE },
+  { X86::ADD16rr_DB,  X86::ADD16mr,    TB_NO_REVERSE },
+  { X86::ADD32ri8_DB, X86::ADD32mi8,   TB_NO_REVERSE },
+  { X86::ADD32ri_DB,  X86::ADD32mi,    TB_NO_REVERSE },
+  { X86::ADD32rr_DB,  X86::ADD32mr,    TB_NO_REVERSE },
+  { X86::ADD64ri32_DB,X86::ADD64mi32,  TB_NO_REVERSE },
+  { X86::ADD64ri8_DB, X86::ADD64mi8,   TB_NO_REVERSE },
+  { X86::ADD64rr_DB,  X86::ADD64mr,    TB_NO_REVERSE },
+  { X86::ADD8ri_DB,   X86::ADD8mi,     TB_NO_REVERSE },
+  { X86::ADD8rr_DB,   X86::ADD8mr,     TB_NO_REVERSE },
   { X86::ADC16ri,     X86::ADC16mi,    0 },
   { X86::ADC16ri8,    X86::ADC16mi8,   0 },
   { X86::ADC16rr,     X86::ADC16mr,    0 },
@@ -48,22 +58,13 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
   { X86::ADC8rr,      X86::ADC8mr,     0 },
   { X86::ADD16ri,     X86::ADD16mi,    0 },
   { X86::ADD16ri8,    X86::ADD16mi8,   0 },
-  { X86::ADD16ri8_DB, X86::ADD16mi8,   TB_NO_REVERSE },
-  { X86::ADD16ri_DB,  X86::ADD16mi,    TB_NO_REVERSE },
   { X86::ADD16rr,     X86::ADD16mr,    0 },
-  { X86::ADD16rr_DB,  X86::ADD16mr,    TB_NO_REVERSE },
   { X86::ADD32ri,     X86::ADD32mi,    0 },
   { X86::ADD32ri8,    X86::ADD32mi8,   0 },
-  { X86::ADD32ri8_DB, X86::ADD32mi8,   TB_NO_REVERSE },
-  { X86::ADD32ri_DB,  X86::ADD32mi,    TB_NO_REVERSE },
   { X86::ADD32rr,     X86::ADD32mr,    0 },
-  { X86::ADD32rr_DB,  X86::ADD32mr,    TB_NO_REVERSE },
   { X86::ADD64ri32,   X86::ADD64mi32,  0 },
-  { X86::ADD64ri32_DB,X86::ADD64mi32,  TB_NO_REVERSE },
   { X86::ADD64ri8,    X86::ADD64mi8,   0 },
-  { X86::ADD64ri8_DB, X86::ADD64mi8,   TB_NO_REVERSE },
   { X86::ADD64rr,     X86::ADD64mr,    0 },
-  { X86::ADD64rr_DB,  X86::ADD64mr,    TB_NO_REVERSE },
   { X86::ADD8ri,      X86::ADD8mi,     0 },
   { X86::ADD8ri8,     X86::ADD8mi8,    0 },
   { X86::ADD8rr,      X86::ADD8mr,     0 },
@@ -247,7 +248,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
   { X86::XOR64rr,     X86::XOR64mr,    0 },
   { X86::XOR8ri,      X86::XOR8mi,     0 },
   { X86::XOR8ri8,     X86::XOR8mi8,    0 },
-  { X86::XOR8rr,      X86::XOR8mr,     0 }
+  { X86::XOR8rr,      X86::XOR8mr,     0 },
 };
 
 static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
@@ -305,9 +306,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
   { X86::MOVDQArr,            X86::MOVDQAmr,            TB_FOLDED_STORE | TB_ALIGN_16 },
   { X86::MOVDQUrr,            X86::MOVDQUmr,            TB_FOLDED_STORE },
   { X86::MOVPDI2DIrr,         X86::MOVPDI2DImr,         TB_FOLDED_STORE },
-  { X86::MOVPQIto64rr,        X86::MOVPQI2QImr,         TB_FOLDED_STORE },
-  { X86::MOVSDto64rr,         X86::MOVSDto64mr,         TB_FOLDED_STORE },
-  { X86::MOVSS2DIrr,          X86::MOVSS2DImr,          TB_FOLDED_STORE },
+  { X86::MOVPQIto64rr,        X86::MOVPQI2QImr,         TB_FOLDED_STORE | TB_NO_REVERSE },
+  { X86::MOVSDto64rr,         X86::MOVSDmr,             TB_FOLDED_STORE | TB_NO_REVERSE },
+  { X86::MOVSS2DIrr,          X86::MOVSSmr,             TB_FOLDED_STORE },
   { X86::MOVUPDrr,            X86::MOVUPDmr,            TB_FOLDED_STORE },
   { X86::MOVUPSrr,            X86::MOVUPSmr,            TB_FOLDED_STORE },
   { X86::MUL16r,              X86::MUL16m,              TB_FOLDED_LOAD },
@@ -321,22 +322,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
   { X86::PUSH16r,             X86::PUSH16rmm,           TB_FOLDED_LOAD },
   { X86::PUSH32r,             X86::PUSH32rmm,           TB_FOLDED_LOAD },
   { X86::PUSH64r,             X86::PUSH64rmm,           TB_FOLDED_LOAD },
-  { X86::SETAEr,              X86::SETAEm,              TB_FOLDED_STORE },
-  { X86::SETAr,               X86::SETAm,               TB_FOLDED_STORE },
-  { X86::SETBEr,              X86::SETBEm,              TB_FOLDED_STORE },
-  { X86::SETBr,               X86::SETBm,               TB_FOLDED_STORE },
-  { X86::SETEr,               X86::SETEm,               TB_FOLDED_STORE },
-  { X86::SETGEr,              X86::SETGEm,              TB_FOLDED_STORE },
-  { X86::SETGr,               X86::SETGm,               TB_FOLDED_STORE },
-  { X86::SETLEr,              X86::SETLEm,              TB_FOLDED_STORE },
-  { X86::SETLr,               X86::SETLm,               TB_FOLDED_STORE },
-  { X86::SETNEr,              X86::SETNEm,              TB_FOLDED_STORE },
-  { X86::SETNOr,              X86::SETNOm,              TB_FOLDED_STORE },
-  { X86::SETNPr,              X86::SETNPm,              TB_FOLDED_STORE },
-  { X86::SETNSr,              X86::SETNSm,              TB_FOLDED_STORE },
-  { X86::SETOr,               X86::SETOm,               TB_FOLDED_STORE },
-  { X86::SETPr,               X86::SETPm,               TB_FOLDED_STORE },
-  { X86::SETSr,               X86::SETSm,               TB_FOLDED_STORE },
+  { X86::SETCCr,              X86::SETCCm,              TB_FOLDED_STORE },
   { X86::TAILJMPr,            X86::TAILJMPm,            TB_FOLDED_LOAD },
   { X86::TAILJMPr64,          X86::TAILJMPm64,          TB_FOLDED_LOAD },
   { X86::TAILJMPr64_REX,      X86::TAILJMPm64_REX,      TB_FOLDED_LOAD },
@@ -403,12 +389,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
   { X86::VMOVDQUrr,           X86::VMOVDQUmr,           TB_FOLDED_STORE },
   { X86::VMOVPDI2DIZrr,       X86::VMOVPDI2DIZmr,       TB_FOLDED_STORE },
   { X86::VMOVPDI2DIrr,        X86::VMOVPDI2DImr,        TB_FOLDED_STORE },
-  { X86::VMOVPQIto64Zrr,      X86::VMOVPQI2QIZmr,       TB_FOLDED_STORE },
-  { X86::VMOVPQIto64rr,       X86::VMOVPQI2QImr,        TB_FOLDED_STORE },
-  { X86::VMOVSDto64Zrr,       X86::VMOVSDto64Zmr,       TB_FOLDED_STORE },
-  { X86::VMOVSDto64rr,        X86::VMOVSDto64mr,        TB_FOLDED_STORE },
-  { X86::VMOVSS2DIZrr,        X86::VMOVSS2DIZmr,        TB_FOLDED_STORE },
-  { X86::VMOVSS2DIrr,         X86::VMOVSS2DImr,         TB_FOLDED_STORE },
+  { X86::VMOVPQIto64Zrr,      X86::VMOVPQI2QIZmr,       TB_FOLDED_STORE | TB_NO_REVERSE },
+  { X86::VMOVPQIto64rr,       X86::VMOVPQI2QImr,        TB_FOLDED_STORE | TB_NO_REVERSE },
+  { X86::VMOVSDto64Zrr,       X86::VMOVSDZmr,           TB_FOLDED_STORE | TB_NO_REVERSE },
+  { X86::VMOVSDto64rr,        X86::VMOVSDmr,            TB_FOLDED_STORE | TB_NO_REVERSE },
+  { X86::VMOVSS2DIZrr,        X86::VMOVSSZmr,           TB_FOLDED_STORE },
+  { X86::VMOVSS2DIrr,         X86::VMOVSSmr,            TB_FOLDED_STORE },
   { X86::VMOVUPDYrr,          X86::VMOVUPDYmr,          TB_FOLDED_STORE },
   { X86::VMOVUPDZ128rr,       X86::VMOVUPDZ128mr,       TB_FOLDED_STORE },
   { X86::VMOVUPDZ256rr,       X86::VMOVUPDZ256mr,       TB_FOLDED_STORE },
@@ -544,14 +530,14 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
   { X86::MOV16rr,              X86::MOV16rm,              0 },
   { X86::MOV32rr,              X86::MOV32rm,              0 },
   { X86::MOV64rr,              X86::MOV64rm,              0 },
-  { X86::MOV64toPQIrr,         X86::MOVQI2PQIrm,          0 },
-  { X86::MOV64toSDrr,          X86::MOV64toSDrm,          0 },
+  { X86::MOV64toPQIrr,         X86::MOVQI2PQIrm,          TB_NO_REVERSE },
+  { X86::MOV64toSDrr,          X86::MOVSDrm_alt,          TB_NO_REVERSE },
   { X86::MOV8rr,               X86::MOV8rm,               0 },
   { X86::MOVAPDrr,             X86::MOVAPDrm,             TB_ALIGN_16 },
   { X86::MOVAPSrr,             X86::MOVAPSrm,             TB_ALIGN_16 },
   { X86::MOVDDUPrr,            X86::MOVDDUPrm,            TB_NO_REVERSE },
   { X86::MOVDI2PDIrr,          X86::MOVDI2PDIrm,          0 },
-  { X86::MOVDI2SSrr,           X86::MOVDI2SSrm,           0 },
+  { X86::MOVDI2SSrr,           X86::MOVSSrm_alt,          0 },
   { X86::MOVDQArr,             X86::MOVDQArm,             TB_ALIGN_16 },
   { X86::MOVDQUrr,             X86::MOVDQUrm,             0 },
   { X86::MOVSHDUPrr,           X86::MOVSHDUPrm,           TB_ALIGN_16 },
@@ -628,7 +614,6 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
   { X86::SQRTSSr,              X86::SQRTSSm,              0 },
   { X86::T1MSKC32rr,           X86::T1MSKC32rm,           0 },
   { X86::T1MSKC64rr,           X86::T1MSKC64rm,           0 },
-  // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
   { X86::TZCNT16rr,            X86::TZCNT16rm,            0 },
   { X86::TZCNT32rr,            X86::TZCNT32rm,            0 },
   { X86::TZCNT64rr,            X86::TZCNT64rm,            0 },
@@ -663,7 +648,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
   { X86::VCOMISSrr_Int,        X86::VCOMISSrm_Int,        TB_NO_REVERSE },
   { X86::VCVTDQ2PDYrr,         X86::VCVTDQ2PDYrm,         0 },
   { X86::VCVTDQ2PDZ128rr,      X86::VCVTDQ2PDZ128rm,      TB_NO_REVERSE },
-  { X86::VCVTDQ2PDZ256rr,      X86::VCVTDQ2PDZ256rm,       0 },
+  { X86::VCVTDQ2PDZ256rr,      X86::VCVTDQ2PDZ256rm,      0 },
   { X86::VCVTDQ2PDZrr,         X86::VCVTDQ2PDZrm,         0 },
   { X86::VCVTDQ2PDrr,          X86::VCVTDQ2PDrm,          TB_NO_REVERSE },
   { X86::VCVTDQ2PSYrr,         X86::VCVTDQ2PSYrm,         0 },
@@ -671,6 +656,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
   { X86::VCVTDQ2PSZ256rr,      X86::VCVTDQ2PSZ256rm,      0 },
   { X86::VCVTDQ2PSZrr,         X86::VCVTDQ2PSZrm,         0 },
   { X86::VCVTDQ2PSrr,          X86::VCVTDQ2PSrm,          0 },
+  { X86::VCVTNEPS2BF16Z128rr,  X86::VCVTNEPS2BF16Z128rm,  0 },
+  { X86::VCVTNEPS2BF16Z256rr,  X86::VCVTNEPS2BF16Z256rm,  0 },
+  { X86::VCVTNEPS2BF16Zrr,     X86::VCVTNEPS2BF16Zrm,     0 },
   { X86::VCVTPD2DQYrr,         X86::VCVTPD2DQYrm,         0 },
   { X86::VCVTPD2DQZ128rr,      X86::VCVTPD2DQZ128rm,      0 },
   { X86::VCVTPD2DQZ256rr,      X86::VCVTPD2DQZ256rm,      0 },
@@ -830,10 +818,10 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
   { X86::VGETMANTPSZ128rri,    X86::VGETMANTPSZ128rmi,    0 },
   { X86::VGETMANTPSZ256rri,    X86::VGETMANTPSZ256rmi,    0 },
   { X86::VGETMANTPSZrri,       X86::VGETMANTPSZrmi,       0 },
-  { X86::VMOV64toPQIZrr,       X86::VMOVQI2PQIZrm,        0 },
-  { X86::VMOV64toPQIrr,        X86::VMOVQI2PQIrm,         0 },
-  { X86::VMOV64toSDZrr,        X86::VMOV64toSDZrm,        0 },
-  { X86::VMOV64toSDrr,         X86::VMOV64toSDrm,         0 },
+  { X86::VMOV64toPQIZrr,       X86::VMOVQI2PQIZrm,        TB_NO_REVERSE },
+  { X86::VMOV64toPQIrr,        X86::VMOVQI2PQIrm,         TB_NO_REVERSE },
+  { X86::VMOV64toSDZrr,        X86::VMOVSDZrm_alt,        TB_NO_REVERSE },
+  { X86::VMOV64toSDrr,         X86::VMOVSDrm_alt,         TB_NO_REVERSE },
   { X86::VMOVAPDYrr,           X86::VMOVAPDYrm,           TB_ALIGN_32 },
   { X86::VMOVAPDZ128rr,        X86::VMOVAPDZ128rm,        TB_ALIGN_16 },
   { X86::VMOVAPDZ256rr,        X86::VMOVAPDZ256rm,        TB_ALIGN_32 },
@@ -851,8 +839,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
   { X86::VMOVDDUPrr,           X86::VMOVDDUPrm,           TB_NO_REVERSE },
   { X86::VMOVDI2PDIZrr,        X86::VMOVDI2PDIZrm,        0 },
   { X86::VMOVDI2PDIrr,         X86::VMOVDI2PDIrm,         0 },
-  { X86::VMOVDI2SSZrr,         X86::VMOVDI2SSZrm,         0 },
-  { X86::VMOVDI2SSrr,          X86::VMOVDI2SSrm,          0 },
+  { X86::VMOVDI2SSZrr,         X86::VMOVSSZrm_alt,        0 },
+  { X86::VMOVDI2SSrr,          X86::VMOVSSrm_alt,         0 },
   { X86::VMOVDQA32Z128rr,      X86::VMOVDQA32Z128rm,      TB_ALIGN_16 },
   { X86::VMOVDQA32Z256rr,      X86::VMOVDQA32Z256rm,      TB_ALIGN_32 },
   { X86::VMOVDQA32Zrr,         X86::VMOVDQA32Zrm,         TB_ALIGN_64 },
@@ -1206,6 +1194,10 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
 };
 
 static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
+  { X86::ADD16rr_DB,               X86::ADD16rm,                  TB_NO_REVERSE },
+  { X86::ADD32rr_DB,               X86::ADD32rm,                  TB_NO_REVERSE },
+  { X86::ADD64rr_DB,               X86::ADD64rm,                  TB_NO_REVERSE },
+  { X86::ADD8rr_DB,                X86::ADD8rm,                   TB_NO_REVERSE },
   { X86::ADC16rr,                  X86::ADC16rm,                  0 },
   { X86::ADC32rr,                  X86::ADC32rm,                  0 },
   { X86::ADC64rr,                  X86::ADC64rm,                  0 },
@@ -1213,11 +1205,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
   { X86::ADCX32rr,                 X86::ADCX32rm,                 0 },
   { X86::ADCX64rr,                 X86::ADCX64rm,                 0 },
   { X86::ADD16rr,                  X86::ADD16rm,                  0 },
-  { X86::ADD16rr_DB,               X86::ADD16rm,                  TB_NO_REVERSE },
   { X86::ADD32rr,                  X86::ADD32rm,                  0 },
-  { X86::ADD32rr_DB,               X86::ADD32rm,                  TB_NO_REVERSE },
   { X86::ADD64rr,                  X86::ADD64rm,                  0 },
-  { X86::ADD64rr_DB,               X86::ADD64rm,                  TB_NO_REVERSE },
   { X86::ADD8rr,                   X86::ADD8rm,                   0 },
   { X86::ADDPDrr,                  X86::ADDPDrm,                  TB_ALIGN_16 },
   { X86::ADDPSrr,                  X86::ADDPSrm,                  TB_ALIGN_16 },
@@ -1247,54 +1236,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
   { X86::BLENDPSrri,               X86::BLENDPSrmi,               TB_ALIGN_16 },
   { X86::BLENDVPDrr0,              X86::BLENDVPDrm0,              TB_ALIGN_16 },
   { X86::BLENDVPSrr0,              X86::BLENDVPSrm0,              TB_ALIGN_16 },
-  { X86::CMOVA16rr,                X86::CMOVA16rm,                0 },
-  { X86::CMOVA32rr,                X86::CMOVA32rm,                0 },
-  { X86::CMOVA64rr,                X86::CMOVA64rm,                0 },
-  { X86::CMOVAE16rr,               X86::CMOVAE16rm,               0 },
-  { X86::CMOVAE32rr,               X86::CMOVAE32rm,               0 },
-  { X86::CMOVAE64rr,               X86::CMOVAE64rm,               0 },
-  { X86::CMOVB16rr,                X86::CMOVB16rm,                0 },
-  { X86::CMOVB32rr,                X86::CMOVB32rm,                0 },
-  { X86::CMOVB64rr,                X86::CMOVB64rm,                0 },
-  { X86::CMOVBE16rr,               X86::CMOVBE16rm,               0 },
-  { X86::CMOVBE32rr,               X86::CMOVBE32rm,               0 },
-  { X86::CMOVBE64rr,               X86::CMOVBE64rm,               0 },
-  { X86::CMOVE16rr,                X86::CMOVE16rm,                0 },
-  { X86::CMOVE32rr,                X86::CMOVE32rm,                0 },
-  { X86::CMOVE64rr,                X86::CMOVE64rm,                0 },
-  { X86::CMOVG16rr,                X86::CMOVG16rm,                0 },
-  { X86::CMOVG32rr,                X86::CMOVG32rm,                0 },
-  { X86::CMOVG64rr,                X86::CMOVG64rm,                0 },
-  { X86::CMOVGE16rr,               X86::CMOVGE16rm,               0 },
-  { X86::CMOVGE32rr,               X86::CMOVGE32rm,               0 },
-  { X86::CMOVGE64rr,               X86::CMOVGE64rm,               0 },
-  { X86::CMOVL16rr,                X86::CMOVL16rm,                0 },
-  { X86::CMOVL32rr,                X86::CMOVL32rm,                0 },
-  { X86::CMOVL64rr,                X86::CMOVL64rm,                0 },
-  { X86::CMOVLE16rr,               X86::CMOVLE16rm,               0 },
-  { X86::CMOVLE32rr,               X86::CMOVLE32rm,               0 },
-  { X86::CMOVLE64rr,               X86::CMOVLE64rm,               0 },
-  { X86::CMOVNE16rr,               X86::CMOVNE16rm,               0 },
-  { X86::CMOVNE32rr,               X86::CMOVNE32rm,               0 },
-  { X86::CMOVNE64rr,               X86::CMOVNE64rm,               0 },
-  { X86::CMOVNO16rr,               X86::CMOVNO16rm,               0 },
-  { X86::CMOVNO32rr,               X86::CMOVNO32rm,               0 },
-  { X86::CMOVNO64rr,               X86::CMOVNO64rm,               0 },
-  { X86::CMOVNP16rr,               X86::CMOVNP16rm,               0 },
-  { X86::CMOVNP32rr,               X86::CMOVNP32rm,               0 },
-  { X86::CMOVNP64rr,               X86::CMOVNP64rm,               0 },
-  { X86::CMOVNS16rr,               X86::CMOVNS16rm,               0 },
-  { X86::CMOVNS32rr,               X86::CMOVNS32rm,               0 },
-  { X86::CMOVNS64rr,               X86::CMOVNS64rm,               0 },
-  { X86::CMOVO16rr,                X86::CMOVO16rm,                0 },
-  { X86::CMOVO32rr,                X86::CMOVO32rm,                0 },
-  { X86::CMOVO64rr,                X86::CMOVO64rm,                0 },
-  { X86::CMOVP16rr,                X86::CMOVP16rm,                0 },
-  { X86::CMOVP32rr,                X86::CMOVP32rm,                0 },
-  { X86::CMOVP64rr,                X86::CMOVP64rm,                0 },
-  { X86::CMOVS16rr,                X86::CMOVS16rm,                0 },
-  { X86::CMOVS32rr,                X86::CMOVS32rm,                0 },
-  { X86::CMOVS64rr,                X86::CMOVS64rm,                0 },
+  { X86::CMOV16rr,                 X86::CMOV16rm,                 0 },
+  { X86::CMOV32rr,                 X86::CMOV32rm,                 0 },
+  { X86::CMOV64rr,                 X86::CMOV64rm,                 0 },
   { X86::CMPPDrri,                 X86::CMPPDrmi,                 TB_ALIGN_16 },
   { X86::CMPPSrri,                 X86::CMPPSrmi,                 TB_ALIGN_16 },
   { X86::CMPSDrr,                  X86::CMPSDrm,                  0 },
@@ -1421,6 +1365,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
   { X86::MMX_PUNPCKLWDirr,         X86::MMX_PUNPCKLWDirm,         TB_NO_REVERSE },
   { X86::MMX_PXORirr,              X86::MMX_PXORirm,              0 },
   { X86::MOVLHPSrr,                X86::MOVHPSrm,                 TB_NO_REVERSE },
+  { X86::MOVSDrr,                  X86::MOVLPDrm,                 TB_NO_REVERSE },
   { X86::MPSADBWrri,               X86::MPSADBWrmi,               TB_ALIGN_16 },
   { X86::MULPDrr,                  X86::MULPDrm,                  TB_ALIGN_16 },
   { X86::MULPSrr,                  X86::MULPSrm,                  TB_ALIGN_16 },
@@ -1576,7 +1521,6 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
   { X86::SUBSDrr_Int,              X86::SUBSDrm_Int,              TB_NO_REVERSE },
   { X86::SUBSSrr,                  X86::SUBSSrm,                  0 },
   { X86::SUBSSrr_Int,              X86::SUBSSrm_Int,              TB_NO_REVERSE },
-  // FIXME: TEST*rr -> swapped      operand of TEST      *mr.
   { X86::UNPCKHPDrr,               X86::UNPCKHPDrm,               TB_ALIGN_16 },
   { X86::UNPCKHPSrr,               X86::UNPCKHPSrm,               TB_ALIGN_16 },
   { X86::UNPCKLPDrr,               X86::UNPCKLPDrm,               TB_ALIGN_16 },
@@ -1697,6 +1641,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
   { X86::VCVTDQ2PSZ128rrkz,        X86::VCVTDQ2PSZ128rmkz,        0 },
   { X86::VCVTDQ2PSZ256rrkz,        X86::VCVTDQ2PSZ256rmkz,        0 },
   { X86::VCVTDQ2PSZrrkz,           X86::VCVTDQ2PSZrmkz,           0 },
+  { X86::VCVTNE2PS2BF16Z128rr,     X86::VCVTNE2PS2BF16Z128rm,     0 },
+  { X86::VCVTNE2PS2BF16Z256rr,     X86::VCVTNE2PS2BF16Z256rm,     0 },
+  { X86::VCVTNE2PS2BF16Zrr,        X86::VCVTNE2PS2BF16Zrm,        0 },
+  { X86::VCVTNEPS2BF16Z128rrkz,    X86::VCVTNEPS2BF16Z128rmkz,    0 },
+  { X86::VCVTNEPS2BF16Z256rrkz,    X86::VCVTNEPS2BF16Z256rmkz,    0 },
+  { X86::VCVTNEPS2BF16Zrrkz,       X86::VCVTNEPS2BF16Zrmkz,       0 },
   { X86::VCVTPD2DQZ128rrkz,        X86::VCVTPD2DQZ128rmkz,        0 },
   { X86::VCVTPD2DQZ256rrkz,        X86::VCVTPD2DQZ256rmkz,        0 },
   { X86::VCVTPD2DQZrrkz,           X86::VCVTPD2DQZrmkz,           0 },
@@ -2030,6 +1980,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
   { X86::VMOVDQU8Zrrkz,            X86::VMOVDQU8Zrmkz,            TB_NO_REVERSE },
   { X86::VMOVLHPSZrr,              X86::VMOVHPSZ128rm,            TB_NO_REVERSE },
   { X86::VMOVLHPSrr,               X86::VMOVHPSrm,                TB_NO_REVERSE },
+  { X86::VMOVSDZrr,                X86::VMOVLPDZ128rm,            TB_NO_REVERSE },
+  { X86::VMOVSDrr,                 X86::VMOVLPDrm,                TB_NO_REVERSE },
   { X86::VMOVSHDUPZ128rrkz,        X86::VMOVSHDUPZ128rmkz,        0 },
   { X86::VMOVSHDUPZ256rrkz,        X86::VMOVSHDUPZ256rmkz,        0 },
   { X86::VMOVSHDUPZrrkz,           X86::VMOVSHDUPZrmkz,           0 },
@@ -2072,6 +2024,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
   { X86::VORPSZ256rr,              X86::VORPSZ256rm,              0 },
   { X86::VORPSZrr,                 X86::VORPSZrm,                 0 },
   { X86::VORPSrr,                  X86::VORPSrm,                  0 },
+  { X86::VP2INTERSECTDZ128rr,      X86::VP2INTERSECTDZ128rm,      0 },
+  { X86::VP2INTERSECTDZ256rr,      X86::VP2INTERSECTDZ256rm,      0 },
+  { X86::VP2INTERSECTDZrr,         X86::VP2INTERSECTDZrm,         0 },
+  { X86::VP2INTERSECTQZ128rr,      X86::VP2INTERSECTQZ128rm,      0 },
+  { X86::VP2INTERSECTQZ256rr,      X86::VP2INTERSECTQZ256rm,      0 },
+  { X86::VP2INTERSECTQZrr,         X86::VP2INTERSECTQZrm,         0 },
   { X86::VPABSBZ128rrkz,           X86::VPABSBZ128rmkz,           0 },
   { X86::VPABSBZ256rrkz,           X86::VPABSBZ256rmkz,           0 },
   { X86::VPABSBZrrkz,              X86::VPABSBZrmkz,              0 },
@@ -3074,6 +3032,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
   { X86::VCVTDQ2PSZ128rrk,           X86::VCVTDQ2PSZ128rmk,           0 },
   { X86::VCVTDQ2PSZ256rrk,           X86::VCVTDQ2PSZ256rmk,           0 },
   { X86::VCVTDQ2PSZrrk,              X86::VCVTDQ2PSZrmk,              0 },
+  { X86::VCVTNE2PS2BF16Z128rrkz,     X86::VCVTNE2PS2BF16Z128rmkz,     0 },
+  { X86::VCVTNE2PS2BF16Z256rrkz,     X86::VCVTNE2PS2BF16Z256rmkz,     0 },
+  { X86::VCVTNE2PS2BF16Zrrkz,        X86::VCVTNE2PS2BF16Zrmkz,        0 },
+  { X86::VCVTNEPS2BF16Z128rrk,       X86::VCVTNEPS2BF16Z128rmk,       0 },
+  { X86::VCVTNEPS2BF16Z256rrk,       X86::VCVTNEPS2BF16Z256rmk,       0 },
+  { X86::VCVTNEPS2BF16Zrrk,          X86::VCVTNEPS2BF16Zrmk,          0 },
   { X86::VCVTPD2DQZ128rrk,           X86::VCVTPD2DQZ128rmk,           0 },
   { X86::VCVTPD2DQZ256rrk,           X86::VCVTPD2DQZ256rmk,           0 },
   { X86::VCVTPD2DQZrrk,              X86::VCVTPD2DQZrmk,              0 },
@@ -3162,6 +3126,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
   { X86::VDIVPSZrrkz,                X86::VDIVPSZrmkz,                0 },
   { X86::VDIVSDZrr_Intkz,            X86::VDIVSDZrm_Intkz,            TB_NO_REVERSE },
   { X86::VDIVSSZrr_Intkz,            X86::VDIVSSZrm_Intkz,            TB_NO_REVERSE },
+  { X86::VDPBF16PSZ128r,             X86::VDPBF16PSZ128m,             0 },
+  { X86::VDPBF16PSZ256r,             X86::VDPBF16PSZ256m,             0 },
+  { X86::VDPBF16PSZr,                X86::VDPBF16PSZm,                0 },
   { X86::VEXP2PDZrk,                 X86::VEXP2PDZmk,                 0 },
   { X86::VEXP2PSZrk,                 X86::VEXP2PSZmk,                 0 },
   { X86::VEXPANDPDZ128rrk,           X86::VEXPANDPDZ128rmk,           TB_NO_REVERSE },
@@ -4376,6 +4343,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
   { X86::VANDPSZ128rrk,             X86::VANDPSZ128rmk,             0 },
   { X86::VANDPSZ256rrk,             X86::VANDPSZ256rmk,             0 },
   { X86::VANDPSZrrk,                X86::VANDPSZrmk,                0 },
+  { X86::VCVTNE2PS2BF16Z128rrk,     X86::VCVTNE2PS2BF16Z128rmk,     0 },
+  { X86::VCVTNE2PS2BF16Z256rrk,     X86::VCVTNE2PS2BF16Z256rmk,     0 },
+  { X86::VCVTNE2PS2BF16Zrrk,        X86::VCVTNE2PS2BF16Zrmk,        0 },
   { X86::VCVTSD2SSZrr_Intk,         X86::VCVTSD2SSZrm_Intk,         TB_NO_REVERSE },
   { X86::VCVTSS2SDZrr_Intk,         X86::VCVTSS2SDZrm_Intk,         TB_NO_REVERSE },
   { X86::VDBPSADBWZ128rrik,         X86::VDBPSADBWZ128rmik,         0 },
@@ -4389,6 +4359,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
   { X86::VDIVPSZrrk,                X86::VDIVPSZrmk,                0 },
   { X86::VDIVSDZrr_Intk,            X86::VDIVSDZrm_Intk,            TB_NO_REVERSE },
   { X86::VDIVSSZrr_Intk,            X86::VDIVSSZrm_Intk,            TB_NO_REVERSE },
+  { X86::VDPBF16PSZ128rk,           X86::VDPBF16PSZ128mk,           0 },
+  { X86::VDPBF16PSZ128rkz,          X86::VDPBF16PSZ128mkz,          0 },
+  { X86::VDPBF16PSZ256rk,           X86::VDPBF16PSZ256mk,           0 },
+  { X86::VDPBF16PSZ256rkz,          X86::VDPBF16PSZ256mkz,          0 },
+  { X86::VDPBF16PSZrk,              X86::VDPBF16PSZmk,              0 },
+  { X86::VDPBF16PSZrkz,             X86::VDPBF16PSZmkz,             0 },
   { X86::VFIXUPIMMPDZ128rrik,       X86::VFIXUPIMMPDZ128rmik,       0 },
   { X86::VFIXUPIMMPDZ128rrikz,      X86::VFIXUPIMMPDZ128rmikz,      0 },
   { X86::VFIXUPIMMPDZ256rrik,       X86::VFIXUPIMMPDZ256rmik,       0 },
@@ -5315,9 +5291,7 @@ lookupFoldTableImpl(ArrayRef<X86MemoryFoldTableEntry> Table, unsigned RegOp) {
   }
 #endif
 
-  const X86MemoryFoldTableEntry *Data = std::lower_bound(Table.begin(),
-                                                         Table.end(),
-                                                         RegOp);
+  const X86MemoryFoldTableEntry *Data = llvm::lower_bound(Table, RegOp);
   if (Data != Table.end() && Data->KeyOp == RegOp &&
       !(Data->Flags & TB_NO_FORWARD))
     return Data;
@@ -5404,7 +5378,7 @@ static ManagedStatic<X86MemUnfoldTable> MemUnfoldTable;
 const X86MemoryFoldTableEntry *
 llvm::lookupUnfoldTable(unsigned MemOp) {
   auto &Table = MemUnfoldTable->Table;
-  auto I = std::lower_bound(Table.begin(), Table.end(), MemOp);
+  auto I = llvm::lower_bound(Table, MemOp);
   if (I != Table.end() && I->KeyOp == MemOp)
     return &*I;
   return nullptr;
diff --git a/lib/Target/X86/X86InstrFoldTables.h b/lib/Target/X86/X86InstrFoldTables.h
index 90016baead96..419baf98f61d 100644
--- a/lib/Target/X86/X86InstrFoldTables.h
+++ b/lib/Target/X86/X86InstrFoldTables.h
@@ -1,9 +1,8 @@
 //===-- X86InstrFoldTables.h - X86 Instruction Folding Tables ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 47d4719d3060..e8f0d937dff4 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -1,9 +1,8 @@
 //===-- X86InstrFormats.td - X86 Instruction Formats -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -27,10 +26,13 @@ def RawFrmDst     : Format<5>;
 def RawFrmDstSrc  : Format<6>;
 def RawFrmImm8    : Format<7>;
 def RawFrmImm16   : Format<8>;
+def AddCCFrm      : Format<9>;
 def MRMDestMem     : Format<32>;
 def MRMSrcMem      : Format<33>;
 def MRMSrcMem4VOp3 : Format<34>;
 def MRMSrcMemOp4   : Format<35>;
+def MRMSrcMemCC    : Format<36>;
+def MRMXmCC: Format<38>;
 def MRMXm  : Format<39>;
 def MRM0m  : Format<40>;  def MRM1m  : Format<41>;  def MRM2m  : Format<42>;
 def MRM3m  : Format<43>;  def MRM4m  : Format<44>;  def MRM5m  : Format<45>;
@@ -39,6 +41,8 @@ def MRMDestReg     : Format<48>;
 def MRMSrcReg      : Format<49>;
 def MRMSrcReg4VOp3 : Format<50>;
 def MRMSrcRegOp4   : Format<51>;
+def MRMSrcRegCC    : Format<52>;
+def MRMXrCC: Format<54>;
 def MRMXr  : Format<55>;
 def MRM0r  : Format<56>;  def MRM1r  : Format<57>;  def MRM2r  : Format<58>;
 def MRM3r  : Format<59>;  def MRM4r  : Format<60>;  def MRM5r  : Format<61>;
@@ -206,13 +210,10 @@ class TAPS : TA { Prefix OpPrefix = PS; }
 class TAPD : TA { Prefix OpPrefix = PD; }
 class TAXD : TA { Prefix OpPrefix = XD; }
 class VEX    { Encoding OpEnc = EncVEX; }
-class VEX_W    { bits<2> VEX_WPrefix = 1; }
-class VEX_WIG  { bits<2> VEX_WPrefix = 2; }
+class VEX_W    { bit HasVEX_W = 1; }
+class VEX_WIG  { bit IgnoresVEX_W = 1; }
 // Special version of VEX_W that can be changed to VEX.W==0 for EVEX2VEX.
-// FIXME: We should consider adding separate bits for VEX_WIG and the extra
-// part of W1X. This would probably simplify the tablegen emitters and
-// the TSFlags creation below.
-class VEX_W1X  { bits<2> VEX_WPrefix = 3; }
+class VEX_W1X  { bit HasVEX_W = 1; bit EVEX_W1_VEX_W0 = 1; }
 class VEX_4V : VEX { bit hasVEX_4V = 1; }
 class VEX_L  { bit hasVEX_L = 1; }
 class VEX_LIG { bit ignoresVEX_L = 1; }
@@ -296,7 +297,10 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   bit hasREPPrefix = 0;     // Does this inst have a REP prefix?
   Encoding OpEnc = EncNormal; // Encoding used by this instruction
   bits<2> OpEncBits = OpEnc.Value;
-  bits<2> VEX_WPrefix = 0;  // Does this inst set the VEX_W field?
+  bit HasVEX_W = 0;         // Does this inst set the VEX_W field?
+  bit IgnoresVEX_W = 0;     // Does this inst ignore VEX_W field?
+  bit EVEX_W1_VEX_W0 = 0;   // This EVEX inst with VEX.W==1 can become a VEX
+                            // instruction with VEX.W == 0.
   bit hasVEX_4V = 0;        // Does this inst require the VEX.VVVV field?
   bit hasVEX_L = 0;         // Does this inst use large (256-bit) registers?
   bit ignoresVEX_L = 0;     // Does this instruction ignore the L-bit
@@ -311,11 +315,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   bit hasEVEX_RC = 0;       // Explicitly specified rounding control in FP instruction.
   bit hasNoTrackPrefix = 0; // Does this inst has 0x3E (NoTrack) prefix?
 
-  bits<2> EVEX_LL;
-  let EVEX_LL{0} = hasVEX_L;
-  let EVEX_LL{1} = hasEVEX_L2;
   // Vector size in bytes.
-  bits<7> VectSize = !shl(16, EVEX_LL);
+  bits<7> VectSize = !if(hasEVEX_L2, 64, !if(hasVEX_L, 32, 16));
 
   // The scaling factor for AVX512's compressed displacement is either
   //   - the size of a  power-of-two number of elements or
@@ -355,7 +356,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   let TSFlags{29-28} = OpEncBits;
   let TSFlags{37-30} = Opcode;
   // Currently no need for second bit in TSFlags - W Ignore is equivalent to 0.
-  let TSFlags{38}    = VEX_WPrefix{0};
+  let TSFlags{38}    = HasVEX_W;
   let TSFlags{39}    = hasVEX_4V;
   let TSFlags{40}    = hasVEX_L;
   let TSFlags{41}    = hasEVEX_K;
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 11a27ba90586..096cc27861ca 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -1,9 +1,8 @@
 //===-- X86InstrFragmentsSIMD.td - x86 SIMD ISA ------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -100,8 +99,10 @@ def X86insertps : SDNode<"X86ISD::INSERTPS",
 def X86vzmovl  : SDNode<"X86ISD::VZEXT_MOVL",
                  SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
 
-def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
-                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86vzld  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
+                      [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86vextractst  : SDNode<"X86ISD::VEXTRACT_STORE", SDTStore,
+                     [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 
 def SDTVtrunc    : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
                                         SDTCisInt<0>, SDTCisInt<1>,
@@ -127,21 +128,31 @@ def X86vfpext  : SDNode<"X86ISD::VFPEXT",
 def X86vfpround: SDNode<"X86ISD::VFPROUND",
                         SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
                                              SDTCVecEltisVT<1, f64>,
-                                             SDTCisSameSizeAs<0, 1>]>>;
+                                             SDTCisOpSmallerThanOp<0, 1>]>>;
 
-def X86froundRnd: SDNode<"X86ISD::VFPROUNDS_RND",
+def X86frounds   : SDNode<"X86ISD::VFPROUNDS",
+                           SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
+                                                SDTCisSameAs<0, 1>,
+                                                SDTCVecEltisVT<2, f64>,
+                                                SDTCisSameSizeAs<0, 2>]>>;
+
+def X86froundsRnd: SDNode<"X86ISD::VFPROUNDS_RND",
                         SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
                                              SDTCisSameAs<0, 1>,
                                              SDTCVecEltisVT<2, f64>,
                                              SDTCisSameSizeAs<0, 2>,
                                              SDTCisVT<3, i32>]>>;
 
-def X86fpextRnd  : SDNode<"X86ISD::VFPEXTS_RND",
-                        SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f64>,
+def X86fpexts     : SDNode<"X86ISD::VFPEXTS",
+                        SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
                                              SDTCisSameAs<0, 1>,
                                              SDTCVecEltisVT<2, f32>,
-                                             SDTCisSameSizeAs<0, 2>,
-                                             SDTCisVT<3, i32>]>>;
+                                             SDTCisSameSizeAs<0, 2>]>>;
+def X86fpextsSAE  : SDNode<"X86ISD::VFPEXTS_SAE",
+                        SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
+                                             SDTCisSameAs<0, 1>,
+                                             SDTCVecEltisVT<2, f32>,
+                                             SDTCisSameSizeAs<0, 2>]>>;
 
 def X86vmfpround: SDNode<"X86ISD::VMFPROUND",
                          SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
@@ -164,25 +175,14 @@ def X86CmpMaskCC :
       SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
                        SDTCisVec<1>, SDTCisSameAs<2, 1>,
                        SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>]>;
-def X86CmpMaskCCRound :
-      SDTypeProfile<1, 4, [SDTCisVec<0>,SDTCVecEltisVT<0, i1>,
-                       SDTCisVec<1>, SDTCisFP<1>, SDTCisSameAs<2, 1>,
-                       SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>,
-                       SDTCisVT<4, i32>]>;
 def X86CmpMaskCCScalar :
       SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisFP<1>, SDTCisSameAs<1, 2>,
                            SDTCisVT<3, i8>]>;
 
-def X86CmpMaskCCScalarRound :
-      SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisFP<1>, SDTCisSameAs<1, 2>,
-                           SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
-
 def X86cmpm     : SDNode<"X86ISD::CMPM",     X86CmpMaskCC>;
-// Hack to make CMPM commutable in tablegen patterns for load folding.
-def X86cmpm_c   : SDNode<"X86ISD::CMPM",     X86CmpMaskCC, [SDNPCommutative]>;
-def X86cmpmRnd  : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>;
+def X86cmpmSAE  : SDNode<"X86ISD::CMPM_SAE", X86CmpMaskCC>;
 def X86cmpms    : SDNode<"X86ISD::FSETCCM",   X86CmpMaskCCScalar>;
-def X86cmpmsRnd : SDNode<"X86ISD::FSETCCM_RND",   X86CmpMaskCCScalarRound>;
+def X86cmpmsSAE : SDNode<"X86ISD::FSETCCM_SAE",   X86CmpMaskCCScalar>;
 
 def X86phminpos: SDNode<"X86ISD::PHMINPOS", 
                  SDTypeProfile<1, 1, [SDTCisVT<0, v8i16>, SDTCisVT<1, v8i16>]>>;
@@ -198,6 +198,8 @@ def X86vsra    : SDNode<"X86ISD::VSRA", X86vshiftuniform>;
 def X86vshiftvariable : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                              SDTCisSameAs<0,2>, SDTCisInt<0>]>;
 
+def X86vshlv   : SDNode<"X86ISD::VSHLV", X86vshiftvariable>;
+def X86vsrlv   : SDNode<"X86ISD::VSRLV", X86vshiftvariable>;
 def X86vsrav   : SDNode<"X86ISD::VSRAV", X86vshiftvariable>;
 
 def X86vshli   : SDNode<"X86ISD::VSHLI", X86vshiftimm>;
@@ -299,25 +301,15 @@ def SDTFPBinOpImm: SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
                                         SDTCisSameAs<0,1>,
                                         SDTCisSameAs<0,2>,
                                         SDTCisVT<3, i32>]>;
-def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisFP<0>, SDTCisVec<0>,
-                                             SDTCisSameAs<0,1>,
-                                             SDTCisSameAs<0,2>,
-                                             SDTCisVT<3, i32>,
-                                             SDTCisVT<4, i32>]>;
-def SDTFPTernaryOpImmRound: SDTypeProfile<1, 5, [SDTCisFP<0>, SDTCisSameAs<0,1>,
-                                                 SDTCisSameAs<0,2>,
-                                                 SDTCisInt<3>,
-                                                 SDTCisSameSizeAs<0, 3>,
-                                                 SDTCisSameNumEltsAs<0, 3>,
-                                                 SDTCisVT<4, i32>,
-                                                 SDTCisVT<5, i32>]>;
-def SDTFPUnaryOpImm: SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
+def SDTFPTernaryOpImm: SDTypeProfile<1, 4, [SDTCisFP<0>, SDTCisSameAs<0,1>,
+                                            SDTCisSameAs<0,2>,
+                                            SDTCisInt<3>,
+                                            SDTCisSameSizeAs<0, 3>,
+                                            SDTCisSameNumEltsAs<0, 3>,
+                                            SDTCisVT<4, i32>]>;
+def SDTFPUnaryOpImm: SDTypeProfile<1, 2, [SDTCisFP<0>,
                                           SDTCisSameAs<0,1>,
                                           SDTCisVT<2, i32>]>;
-def SDTFPUnaryOpImmRound: SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
-                                               SDTCisSameAs<0,1>,
-                                               SDTCisVT<2, i32>,
-                                               SDTCisVT<3, i32>]>;
 
 def SDTVBroadcast  : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
 def SDTVBroadcastm : SDTypeProfile<1, 1, [SDTCisVec<0>,
@@ -373,11 +365,23 @@ def X86Movddup  : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
 def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>;
 def X86Movsldup : SDNode<"X86ISD::MOVSLDUP", SDTShuff1Op>;
 
-def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2OpFP>;
-def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2OpFP>;
-
-def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2OpFP>;
-def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2OpFP>;
+def X86Movsd : SDNode<"X86ISD::MOVSD",
+                      SDTypeProfile<1, 2, [SDTCisVT<0, v2f64>,
+                                           SDTCisVT<1, v2f64>,
+                                           SDTCisVT<2, v2f64>]>>;
+def X86Movss : SDNode<"X86ISD::MOVSS",
+                      SDTypeProfile<1, 2, [SDTCisVT<0, v4f32>,
+                                           SDTCisVT<1, v4f32>,
+                                           SDTCisVT<2, v4f32>]>>;
+
+def X86Movlhps : SDNode<"X86ISD::MOVLHPS",
+                        SDTypeProfile<1, 2, [SDTCisVT<0, v4f32>,
+                                             SDTCisVT<1, v4f32>,
+                                             SDTCisVT<2, v4f32>]>>;
+def X86Movhlps : SDNode<"X86ISD::MOVHLPS",
+                        SDTypeProfile<1, 2, [SDTCisVT<0, v4f32>,
+                                             SDTCisVT<1, v4f32>,
+                                             SDTCisVT<2, v4f32>]>>;
 
 def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>,
                                    SDTCisVec<1>, SDTCisInt<1>,
@@ -421,16 +425,18 @@ def X86vpternlog  : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>;
 
 def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
 
-def X86VFixupimm   : SDNode<"X86ISD::VFIXUPIMM", SDTFPTernaryOpImmRound>;
-def X86VFixupimmScalar   : SDNode<"X86ISD::VFIXUPIMMS", SDTFPTernaryOpImmRound>;
+def X86VFixupimm     : SDNode<"X86ISD::VFIXUPIMM", SDTFPTernaryOpImm>;
+def X86VFixupimmSAE  : SDNode<"X86ISD::VFIXUPIMM_SAE", SDTFPTernaryOpImm>;
+def X86VFixupimms    : SDNode<"X86ISD::VFIXUPIMMS", SDTFPTernaryOpImm>;
+def X86VFixupimmSAEs : SDNode<"X86ISD::VFIXUPIMMS_SAE", SDTFPTernaryOpImm>;
 def X86VRange      : SDNode<"X86ISD::VRANGE",        SDTFPBinOpImm>;
-def X86VRangeRnd   : SDNode<"X86ISD::VRANGE_RND",    SDTFPBinOpImmRound>;
+def X86VRangeSAE   : SDNode<"X86ISD::VRANGE_SAE",    SDTFPBinOpImm>;
 def X86VReduce     : SDNode<"X86ISD::VREDUCE",       SDTFPUnaryOpImm>;
-def X86VReduceRnd  : SDNode<"X86ISD::VREDUCE_RND",   SDTFPUnaryOpImmRound>;
+def X86VReduceSAE  : SDNode<"X86ISD::VREDUCE_SAE",   SDTFPUnaryOpImm>;
 def X86VRndScale   : SDNode<"X86ISD::VRNDSCALE",     SDTFPUnaryOpImm>;
-def X86VRndScaleRnd: SDNode<"X86ISD::VRNDSCALE_RND", SDTFPUnaryOpImmRound>;
+def X86VRndScaleSAE: SDNode<"X86ISD::VRNDSCALE_SAE", SDTFPUnaryOpImm>;
 def X86VGetMant    : SDNode<"X86ISD::VGETMANT",      SDTFPUnaryOpImm>;
-def X86VGetMantRnd : SDNode<"X86ISD::VGETMANT_RND",  SDTFPUnaryOpImmRound>;
+def X86VGetMantSAE : SDNode<"X86ISD::VGETMANT_SAE",  SDTFPUnaryOpImm>;
 def X86Vfpclass    : SDNode<"X86ISD::VFPCLASS",
                        SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i1>,
                                             SDTCisFP<1>,
@@ -448,27 +454,42 @@ def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
 def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
 
 def X86Blendi    : SDNode<"X86ISD::BLENDI",   SDTBlend>;
+def X86Blendv    : SDNode<"X86ISD::BLENDV",
+                     SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>,
+                                          SDTCisSameAs<0, 2>,
+                                          SDTCisSameAs<2, 3>,
+                                          SDTCisSameNumEltsAs<0, 1>,
+                                          SDTCisSameSizeAs<0, 1>]>>;
 
 def X86Addsub    : SDNode<"X86ISD::ADDSUB", SDTFPBinOp>;
 
 def X86faddRnd   : SDNode<"X86ISD::FADD_RND",  SDTFPBinOpRound>;
+def X86fadds     : SDNode<"X86ISD::FADDS",     SDTFPBinOp>;
 def X86faddRnds  : SDNode<"X86ISD::FADDS_RND", SDTFPBinOpRound>;
 def X86fsubRnd   : SDNode<"X86ISD::FSUB_RND",  SDTFPBinOpRound>;
+def X86fsubs     : SDNode<"X86ISD::FSUBS",     SDTFPBinOp>;
 def X86fsubRnds  : SDNode<"X86ISD::FSUBS_RND", SDTFPBinOpRound>;
 def X86fmulRnd   : SDNode<"X86ISD::FMUL_RND",  SDTFPBinOpRound>;
+def X86fmuls     : SDNode<"X86ISD::FMULS",     SDTFPBinOp>;
 def X86fmulRnds  : SDNode<"X86ISD::FMULS_RND", SDTFPBinOpRound>;
 def X86fdivRnd   : SDNode<"X86ISD::FDIV_RND",  SDTFPBinOpRound>;
+def X86fdivs     : SDNode<"X86ISD::FDIVS",     SDTFPBinOp>;
 def X86fdivRnds  : SDNode<"X86ISD::FDIVS_RND", SDTFPBinOpRound>;
-def X86fmaxRnd   : SDNode<"X86ISD::FMAX_RND",  SDTFPBinOpRound>;
-def X86fmaxRnds  : SDNode<"X86ISD::FMAXS_RND", SDTFPBinOpRound>;
-def X86fminRnd   : SDNode<"X86ISD::FMIN_RND",  SDTFPBinOpRound>;
-def X86fminRnds  : SDNode<"X86ISD::FMINS_RND", SDTFPBinOpRound>;
-def X86scalef    : SDNode<"X86ISD::SCALEF",         SDTFPBinOpRound>;
-def X86scalefs   : SDNode<"X86ISD::SCALEFS",        SDTFPBinOpRound>;
+def X86fmaxSAE   : SDNode<"X86ISD::FMAX_SAE",  SDTFPBinOp>;
+def X86fmaxSAEs  : SDNode<"X86ISD::FMAXS_SAE", SDTFPBinOp>;
+def X86fminSAE   : SDNode<"X86ISD::FMIN_SAE",  SDTFPBinOp>;
+def X86fminSAEs  : SDNode<"X86ISD::FMINS_SAE", SDTFPBinOp>;
+def X86scalef    : SDNode<"X86ISD::SCALEF",         SDTFPBinOp>;
+def X86scalefRnd : SDNode<"X86ISD::SCALEF_RND",     SDTFPBinOpRound>;
+def X86scalefs   : SDNode<"X86ISD::SCALEFS",        SDTFPBinOp>;
+def X86scalefsRnd: SDNode<"X86ISD::SCALEFS_RND",    SDTFPBinOpRound>;
 def X86fsqrtRnd     : SDNode<"X86ISD::FSQRT_RND",   SDTFPUnaryOpRound>;
+def X86fsqrts       : SDNode<"X86ISD::FSQRTS", SDTFPBinOp>;
 def X86fsqrtRnds    : SDNode<"X86ISD::FSQRTS_RND", SDTFPBinOpRound>;
-def X86fgetexpRnd   : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>;
-def X86fgetexpRnds  : SDNode<"X86ISD::FGETEXPS_RND", SDTFPBinOpRound>;
+def X86fgetexp      : SDNode<"X86ISD::FGETEXP", SDTFPUnaryOp>;
+def X86fgetexpSAE   : SDNode<"X86ISD::FGETEXP_SAE", SDTFPUnaryOp>;
+def X86fgetexps     : SDNode<"X86ISD::FGETEXPS", SDTFPBinOp>;
+def X86fgetexpSAEs  : SDNode<"X86ISD::FGETEXPS_SAE", SDTFPBinOp>;
 
 def X86Fmadd     : SDNode<"ISD::FMA",          SDTFPTernaryOp, [SDNPCommutative]>;
 def X86Fnmadd    : SDNode<"X86ISD::FNMADD",    SDTFPTernaryOp, [SDNPCommutative]>;
@@ -484,6 +505,10 @@ def X86FnmsubRnd    : SDNode<"X86ISD::FNMSUB_RND",    SDTFmaRound, [SDNPCommutat
 def X86FmaddsubRnd  : SDNode<"X86ISD::FMADDSUB_RND",  SDTFmaRound, [SDNPCommutative]>;
 def X86FmsubaddRnd  : SDNode<"X86ISD::FMSUBADD_RND",  SDTFmaRound, [SDNPCommutative]>;
 
+def X86vp2intersect : SDNode<"X86ISD::VP2INTERSECT",
+                              SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
+                                                   SDTCisVec<1>, SDTCisSameAs<1, 2>]>>;
+
 def SDTIFma : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0,1>,
                            SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
 def x86vpmadd52l     : SDNode<"X86ISD::VPMADD52L",     SDTIFma, [SDNPCommutative]>;
@@ -500,27 +525,36 @@ def X86Vpdpbusds : SDNode<"X86ISD::VPDPBUSDS", SDTVnni>;
 def X86Vpdpwssd  : SDNode<"X86ISD::VPDPWSSD", SDTVnni>;
 def X86Vpdpwssds : SDNode<"X86ISD::VPDPWSSDS", SDTVnni>;
 
-def X86rsqrt28   : SDNode<"X86ISD::RSQRT28",  SDTFPUnaryOpRound>;
-def X86rcp28     : SDNode<"X86ISD::RCP28",    SDTFPUnaryOpRound>;
-def X86exp2      : SDNode<"X86ISD::EXP2",     SDTFPUnaryOpRound>;
+def X86rsqrt28   : SDNode<"X86ISD::RSQRT28",     SDTFPUnaryOp>;
+def X86rsqrt28SAE: SDNode<"X86ISD::RSQRT28_SAE", SDTFPUnaryOp>;
+def X86rcp28     : SDNode<"X86ISD::RCP28",       SDTFPUnaryOp>;
+def X86rcp28SAE  : SDNode<"X86ISD::RCP28_SAE",   SDTFPUnaryOp>;
+def X86exp2      : SDNode<"X86ISD::EXP2",        SDTFPUnaryOp>;
+def X86exp2SAE   : SDNode<"X86ISD::EXP2_SAE",    SDTFPUnaryOp>;
 
 def X86rsqrt14s  : SDNode<"X86ISD::RSQRT14S",   SDTFPBinOp>;
 def X86rcp14s    : SDNode<"X86ISD::RCP14S",     SDTFPBinOp>;
-def X86rsqrt28s  : SDNode<"X86ISD::RSQRT28S",   SDTFPBinOpRound>;
-def X86rcp28s    : SDNode<"X86ISD::RCP28S",     SDTFPBinOpRound>;
+def X86rsqrt28s  : SDNode<"X86ISD::RSQRT28S",   SDTFPBinOp>;
+def X86rsqrt28SAEs : SDNode<"X86ISD::RSQRT28S_SAE", SDTFPBinOp>;
+def X86rcp28s    : SDNode<"X86ISD::RCP28S",     SDTFPBinOp>;
+def X86rcp28SAEs : SDNode<"X86ISD::RCP28S_SAE", SDTFPBinOp>;
 def X86Ranges    : SDNode<"X86ISD::VRANGES",    SDTFPBinOpImm>;
 def X86RndScales : SDNode<"X86ISD::VRNDSCALES", SDTFPBinOpImm>;
 def X86Reduces   : SDNode<"X86ISD::VREDUCES",   SDTFPBinOpImm>;
 def X86GetMants  : SDNode<"X86ISD::VGETMANTS",  SDTFPBinOpImm>;
-def X86RangesRnd    : SDNode<"X86ISD::VRANGES_RND",    SDTFPBinOpImmRound>;
-def X86RndScalesRnd : SDNode<"X86ISD::VRNDSCALES_RND", SDTFPBinOpImmRound>;
-def X86ReducesRnd   : SDNode<"X86ISD::VREDUCES_RND",   SDTFPBinOpImmRound>;
-def X86GetMantsRnd  : SDNode<"X86ISD::VGETMANTS_RND",  SDTFPBinOpImmRound>;
-
-def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 1,
-                              [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
-def X86expand  : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1,
-                              [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
+def X86RangesSAE    : SDNode<"X86ISD::VRANGES_SAE",    SDTFPBinOpImm>;
+def X86RndScalesSAE : SDNode<"X86ISD::VRNDSCALES_SAE", SDTFPBinOpImm>;
+def X86ReducesSAE   : SDNode<"X86ISD::VREDUCES_SAE",   SDTFPBinOpImm>;
+def X86GetMantsSAE  : SDNode<"X86ISD::VGETMANTS_SAE",  SDTFPBinOpImm>;
+
+def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3,
+                              [SDTCisSameAs<0, 1>, SDTCisVec<1>,
+                               SDTCisSameAs<0, 2>, SDTCVecEltisVT<3, i1>,
+                               SDTCisSameNumEltsAs<0, 3>]>, []>;
+def X86expand  : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3,
+                              [SDTCisSameAs<0, 1>, SDTCisVec<1>,
+                               SDTCisSameAs<0, 2>, SDTCVecEltisVT<3, i1>,
+                               SDTCisSameNumEltsAs<0, 3>]>, []>;
 
 // vpshufbitqmb
 def X86Vpshufbitqmb : SDNode<"X86ISD::VPSHUFBITQMB",
@@ -529,6 +563,8 @@ def X86Vpshufbitqmb : SDNode<"X86ISD::VPSHUFBITQMB",
                                                   SDTCVecEltisVT<0,i1>,
                                                   SDTCisSameNumEltsAs<0,1>]>>;
 
+def SDTintToFP: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisFP<0>,
+                                     SDTCisSameAs<0,1>, SDTCisInt<2>]>;
 def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
                                           SDTCisSameAs<0,1>, SDTCisInt<2>,
                                           SDTCisVT<3, i32>]>;
@@ -550,13 +586,15 @@ def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
                                            SDTCisVT<2, i32>]>;
 
 // Scalar
+def X86SintToFp     : SDNode<"X86ISD::SCALAR_SINT_TO_FP",      SDTintToFP>;
 def X86SintToFpRnd  : SDNode<"X86ISD::SCALAR_SINT_TO_FP_RND",  SDTintToFPRound>;
+def X86UintToFp     : SDNode<"X86ISD::SCALAR_UINT_TO_FP",      SDTintToFP>;
 def X86UintToFpRnd  : SDNode<"X86ISD::SCALAR_UINT_TO_FP_RND",  SDTintToFPRound>;
 
 def X86cvtts2Int  : SDNode<"X86ISD::CVTTS2SI",  SDTSFloatToInt>;
 def X86cvtts2UInt : SDNode<"X86ISD::CVTTS2UI",  SDTSFloatToInt>;
-def X86cvtts2IntRnd  : SDNode<"X86ISD::CVTTS2SI_RND",  SDTSFloatToIntRnd>;
-def X86cvtts2UIntRnd : SDNode<"X86ISD::CVTTS2UI_RND",  SDTSFloatToIntRnd>;
+def X86cvtts2IntSAE  : SDNode<"X86ISD::CVTTS2SI_SAE",  SDTSFloatToInt>;
+def X86cvtts2UIntSAE : SDNode<"X86ISD::CVTTS2UI_SAE",  SDTSFloatToInt>;
 
 def X86cvts2si  : SDNode<"X86ISD::CVTS2SI", SDTSFloatToInt>;
 def X86cvts2usi : SDNode<"X86ISD::CVTS2UI", SDTSFloatToInt>;
@@ -566,8 +604,8 @@ def X86cvts2usiRnd : SDNode<"X86ISD::CVTS2UI_RND", SDTSFloatToIntRnd>;
 // Vector with rounding mode
 
 // cvtt fp-to-int staff
-def X86cvttp2siRnd    : SDNode<"X86ISD::CVTTP2SI_RND", SDTFloatToIntRnd>;
-def X86cvttp2uiRnd    : SDNode<"X86ISD::CVTTP2UI_RND", SDTFloatToIntRnd>;
+def X86cvttp2siSAE    : SDNode<"X86ISD::CVTTP2SI_SAE", SDTFloatToInt>;
+def X86cvttp2uiSAE    : SDNode<"X86ISD::CVTTP2UI_SAE", SDTFloatToInt>;
 
 def X86VSintToFpRnd   : SDNode<"X86ISD::SINT_TO_FP_RND",  SDTVintToFPRound>;
 def X86VUintToFpRnd   : SDNode<"X86ISD::UINT_TO_FP_RND",  SDTVintToFPRound>;
@@ -590,6 +628,13 @@ def X86cvtp2Int      : SDNode<"X86ISD::CVTP2SI",  SDTFloatToInt>;
 def X86cvtp2UInt     : SDNode<"X86ISD::CVTP2UI",  SDTFloatToInt>;
 
 
+// Masked versions of above
+def SDTMVintToFP: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
+                                       SDTCisFP<0>, SDTCisInt<1>,
+                                       SDTCisSameSizeAs<0, 1>,
+                                       SDTCisSameAs<0, 2>,
+                                       SDTCVecEltisVT<3, i1>,
+                                       SDTCisSameNumEltsAs<1, 3>]>;
 def SDTMFloatToInt: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
                                          SDTCisInt<0>, SDTCisFP<1>,
                                          SDTCisSameSizeAs<0, 1>,
@@ -597,6 +642,9 @@ def SDTMFloatToInt: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
                                          SDTCVecEltisVT<3, i1>,
                                          SDTCisSameNumEltsAs<1, 3>]>;
 
+def X86VMSintToFP    : SDNode<"X86ISD::MCVTSI2P",  SDTMVintToFP>;
+def X86VMUintToFP    : SDNode<"X86ISD::MCVTUI2P",  SDTMVintToFP>;
+
 def X86mcvtp2Int     : SDNode<"X86ISD::MCVTP2SI",  SDTMFloatToInt>;
 def X86mcvtp2UInt    : SDNode<"X86ISD::MCVTP2UI",  SDTMFloatToInt>;
 def X86mcvttp2si     : SDNode<"X86ISD::MCVTTP2SI", SDTMFloatToInt>;
@@ -607,10 +655,9 @@ def X86cvtph2ps     : SDNode<"X86ISD::CVTPH2PS",
                               SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
                                                    SDTCVecEltisVT<1, i16>]> >;
 
-def X86cvtph2psRnd  : SDNode<"X86ISD::CVTPH2PS_RND",
-                              SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
-                                                   SDTCVecEltisVT<1, i16>,
-                                                   SDTCisVT<2, i32>]> >;
+def X86cvtph2psSAE  : SDNode<"X86ISD::CVTPH2PS_SAE",
+                              SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
+                                                   SDTCVecEltisVT<1, i16>]> >;
 
 def X86cvtps2ph   : SDNode<"X86ISD::CVTPS2PH",
                         SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
@@ -623,17 +670,35 @@ def X86mcvtps2ph   : SDNode<"X86ISD::MCVTPS2PH",
                                              SDTCisSameAs<0, 3>,
                                              SDTCVecEltisVT<4, i1>,
                                              SDTCisSameNumEltsAs<1, 4>]> >;
-def X86vfpextRnd  : SDNode<"X86ISD::VFPEXT_RND",
-                        SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
+def X86vfpextSAE  : SDNode<"X86ISD::VFPEXT_SAE",
+                        SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
                                              SDTCVecEltisVT<1, f32>,
-                                             SDTCisOpSmallerThanOp<1, 0>,
-                                             SDTCisVT<2, i32>]>>;
+                                             SDTCisOpSmallerThanOp<1, 0>]>>;
 def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND",
                         SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
                                              SDTCVecEltisVT<1, f64>,
                                              SDTCisOpSmallerThanOp<0, 1>,
                                              SDTCisVT<2, i32>]>>;
 
+// cvt fp to bfloat16
+def X86cvtne2ps2bf16 : SDNode<"X86ISD::CVTNE2PS2BF16",
+                       SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+                                            SDTCisSameAs<1,2>]>>;
+def X86mcvtneps2bf16 : SDNode<"X86ISD::MCVTNEPS2BF16",
+                       SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>,
+                                            SDTCVecEltisVT<1, f32>,
+                                            SDTCisSameAs<0, 2>,
+                                            SDTCVecEltisVT<3, i1>,
+                                            SDTCisSameNumEltsAs<1, 3>]>>;
+def X86cvtneps2bf16 :  SDNode<"X86ISD::CVTNEPS2BF16",
+                       SDTypeProfile<1, 1, [SDTCVecEltisVT<0, i16>,
+                                            SDTCVecEltisVT<1, f32>]>>;
+def X86dpbf16ps :      SDNode<"X86ISD::DPBF16PS",
+                       SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
+                                            SDTCisSameAs<0,1>,
+                                            SDTCVecEltisVT<2, i32>,
+                                            SDTCisSameAs<2,3>]>>;
+
 // galois field arithmetic
 def X86GF2P8affineinvqb : SDNode<"X86ISD::GF2P8AFFINEINVQB", SDTBlend>;
 def X86GF2P8affineqb    : SDNode<"X86ISD::GF2P8AFFINEQB", SDTBlend>;
@@ -653,18 +718,8 @@ def sse_load_f64 : ComplexPattern<v2f64, 5, "selectScalarSSELoad", [],
                                   [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
                                    SDNPWantRoot, SDNPWantParent]>;
 
-def ssmem : Operand<v4f32> {
-  let PrintMethod = "printf32mem";
-  let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, SEGMENT_REG);
-  let ParserMatchClass = X86Mem32AsmOperand;
-  let OperandType = "OPERAND_MEMORY";
-}
-def sdmem : Operand<v2f64> {
-  let PrintMethod = "printf64mem";
-  let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, SEGMENT_REG);
-  let ParserMatchClass = X86Mem64AsmOperand;
-  let OperandType = "OPERAND_MEMORY";
-}
+def ssmem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>;
+def sdmem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>;
 
 //===----------------------------------------------------------------------===//
 // SSE pattern fragments
@@ -695,9 +750,9 @@ def loadv32i16   : PatFrag<(ops node:$ptr), (v32i16 (load node:$ptr))>;
 def loadv64i8    : PatFrag<(ops node:$ptr), (v64i8  (load node:$ptr))>;
 
 // 128-/256-/512-bit extload pattern fragments
-def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
-def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>;
-def extloadv8f32 : PatFrag<(ops node:$ptr), (v8f64 (extloadvf32 node:$ptr))>;
+def extloadv2f32 : PatFrag<(ops node:$ptr), (extloadvf32 node:$ptr)>;
+def extloadv4f32 : PatFrag<(ops node:$ptr), (extloadvf32 node:$ptr)>;
+def extloadv8f32 : PatFrag<(ops node:$ptr), (extloadvf32 node:$ptr)>;
 
 // Like 'store', but always requires vector size alignment.
 def alignedstore : PatFrag<(ops node:$val, node:$ptr),
@@ -884,15 +939,20 @@ def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>;
 def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>;
 def bc_v16f32 : PatFrag<(ops node:$in), (v16f32 (bitconvert node:$in))>;
 
-def vzmovl_v2i64 : PatFrag<(ops node:$src),
-                           (bitconvert (v2i64 (X86vzmovl
-                             (v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
-def vzmovl_v4i32 : PatFrag<(ops node:$src),
-                           (bitconvert (v4i32 (X86vzmovl
-                             (v4i32 (scalar_to_vector (loadi32 node:$src))))))>;
+def X86vzload32 : PatFrag<(ops node:$src),
+                          (X86vzld node:$src), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 4;
+}]>;
 
-def vzload_v2i64 : PatFrag<(ops node:$src),
-                           (bitconvert (v2i64 (X86vzload node:$src)))>;
+def X86vzload64 : PatFrag<(ops node:$src),
+                          (X86vzld node:$src), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 8;
+}]>;
+
+def X86vextractstore64 : PatFrag<(ops node:$val, node:$ptr),
+                                 (X86vextractst node:$val, node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 8;
+}]>;
 
 
 def fp32imm0 : PatLeaf<(f32 fpimm), [{
@@ -903,20 +963,6 @@ def fp64imm0 : PatLeaf<(f64 fpimm), [{
   return N->isExactlyValue(+0.0);
 }]>;
 
-def I8Imm : SDNodeXForm<imm, [{
-  // Transformation function: get the low 8 bits.
-  return getI8Imm((uint8_t)N->getZExtValue(), SDLoc(N));
-}]>;
-
-def FROUND_NO_EXC : PatLeaf<(i32 8)>;
-def FROUND_CURRENT : PatLeaf<(i32 4)>;
-
-// BYTE_imm - Transform bit immediates into byte immediates.
-def BYTE_imm  : SDNodeXForm<imm, [{
-  // Transformation function: imm >> 3
-  return getI32Imm(N->getZExtValue() >> 3, SDLoc(N));
-}]>;
-
 // EXTRACT_get_vextract128_imm xform function: convert extract_subvector index
 // to VEXTRACTF128/VEXTRACTI128 imm.
 def EXTRACT_get_vextract128_imm : SDNodeXForm<extract_subvector, [{
@@ -943,8 +989,10 @@ def INSERT_get_vinsert256_imm : SDNodeXForm<insert_subvector, [{
 
 def vextract128_extract : PatFrag<(ops node:$bigvec, node:$index),
                                    (extract_subvector node:$bigvec,
-                                                      node:$index), [{}],
-                                  EXTRACT_get_vextract128_imm>;
+                                                      node:$index), [{
+  // Index 0 can be handled via extract_subreg.
+  return !isNullConstant(N->getOperand(1));
+}], EXTRACT_get_vextract128_imm>;
 
 def vinsert128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
                                       node:$index),
@@ -954,8 +1002,10 @@ def vinsert128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
 
 def vextract256_extract : PatFrag<(ops node:$bigvec, node:$index),
                                    (extract_subvector node:$bigvec,
-                                                      node:$index), [{}],
-                                  EXTRACT_get_vextract256_imm>;
+                                                      node:$index), [{
+  // Index 0 can be handled via extract_subreg.
+  return !isNullConstant(N->getOperand(1));
+}], EXTRACT_get_vextract256_imm>;
 
 def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
                                       node:$index),
@@ -963,70 +1013,46 @@ def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
                                                    node:$index), [{}],
                                 INSERT_get_vinsert256_imm>;
 
-def X86mload : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                         (masked_load node:$src1, node:$src2, node:$src3), [{
+def masked_load : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                          (masked_ld node:$src1, node:$src2, node:$src3), [{
   return !cast<MaskedLoadSDNode>(N)->isExpandingLoad() &&
     cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
 }]>;
 
-def masked_load_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                         (X86mload node:$src1, node:$src2, node:$src3), [{
-  return cast<MaskedLoadSDNode>(N)->getAlignment() >= 16;
-}]>;
-
-def masked_load_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                         (X86mload node:$src1, node:$src2, node:$src3), [{
-  return cast<MaskedLoadSDNode>(N)->getAlignment() >= 32;
-}]>;
-
-def masked_load_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                         (X86mload node:$src1, node:$src2, node:$src3), [{
-  return cast<MaskedLoadSDNode>(N)->getAlignment() >= 64;
-}]>;
-
-def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+def masked_load_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (masked_load node:$src1, node:$src2, node:$src3), [{
-  return !cast<MaskedLoadSDNode>(N)->isExpandingLoad() &&
-    cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
+  // Use the node type to determine the size the alignment needs to match.
+  // We can't use memory VT because type widening changes the node VT, but
+  // not the memory VT.
+  auto *Ld = cast<MaskedLoadSDNode>(N);
+  return Ld->getAlignment() >= Ld->getValueType(0).getStoreSize();
 }]>;
 
 def X86mExpandingLoad : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                         (masked_load node:$src1, node:$src2, node:$src3), [{
+                         (masked_ld node:$src1, node:$src2, node:$src3), [{
   return cast<MaskedLoadSDNode>(N)->isExpandingLoad();
 }]>;
 
 // Masked store fragments.
 // X86mstore can't be implemented in core DAG files because some targets
 // do not support vector types (llvm-tblgen will fail).
-def X86mstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                        (masked_store node:$src1, node:$src2, node:$src3), [{
+def masked_store : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                        (masked_st node:$src1, node:$src2, node:$src3), [{
   return (!cast<MaskedStoreSDNode>(N)->isTruncatingStore()) &&
          (!cast<MaskedStoreSDNode>(N)->isCompressingStore());
 }]>;
 
-def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                         (X86mstore node:$src1, node:$src2, node:$src3), [{
-  return cast<MaskedStoreSDNode>(N)->getAlignment() >= 16;
-}]>;
-
-def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                         (X86mstore node:$src1, node:$src2, node:$src3), [{
-  return cast<MaskedStoreSDNode>(N)->getAlignment() >= 32;
-}]>;
-
-def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                         (X86mstore node:$src1, node:$src2, node:$src3), [{
-  return cast<MaskedStoreSDNode>(N)->getAlignment() >= 64;
-}]>;
-
-def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+def masked_store_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (masked_store node:$src1, node:$src2, node:$src3), [{
-  return (!cast<MaskedStoreSDNode>(N)->isTruncatingStore()) &&
-         (!cast<MaskedStoreSDNode>(N)->isCompressingStore());
+  // Use the node type to determine the size the alignment needs to match.
+  // We can't use memory VT because type widening changes the node VT, but
+  // not the memory VT.
+  auto *St = cast<MaskedStoreSDNode>(N);
+  return St->getAlignment() >= St->getOperand(1).getValueType().getStoreSize();
 }]>;
 
 def X86mCompressingStore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                             (masked_store node:$src1, node:$src2, node:$src3), [{
+                             (masked_st node:$src1, node:$src2, node:$src3), [{
     return cast<MaskedStoreSDNode>(N)->isCompressingStore();
 }]>;
 
@@ -1034,7 +1060,7 @@ def X86mCompressingStore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
 // X86mtruncstore can't be implemented in core DAG files because some targets
 // doesn't support vector type ( llvm-tblgen will fail)
 def X86mtruncstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                             (masked_store node:$src1, node:$src2, node:$src3), [{
+                             (masked_st node:$src1, node:$src2, node:$src3), [{
     return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
 }]>;
 def masked_truncstorevi8 :
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index ab14ee7fadf2..dbe45356c42b 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1,9 +1,8 @@
 //===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -220,16 +219,22 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
     return true;
   case X86::MOV32rm:
   case X86::MOVSSrm:
-  case X86::VMOVSSZrm:
+  case X86::MOVSSrm_alt:
   case X86::VMOVSSrm:
+  case X86::VMOVSSrm_alt:
+  case X86::VMOVSSZrm:
+  case X86::VMOVSSZrm_alt:
   case X86::KMOVDkm:
     MemBytes = 4;
     return true;
   case X86::MOV64rm:
   case X86::LD_Fp64m:
   case X86::MOVSDrm:
+  case X86::MOVSDrm_alt:
   case X86::VMOVSDrm:
+  case X86::VMOVSDrm_alt:
   case X86::VMOVSDZrm:
+  case X86::VMOVSDZrm_alt:
   case X86::MMX_MOVD64rm:
   case X86::MMX_MOVQ64rm:
   case X86::KMOVQkm:
@@ -483,9 +488,10 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
   case X86::MOV16rm:
   case X86::MOV32rm:
   case X86::MOV64rm:
-  case X86::LD_Fp64m:
   case X86::MOVSSrm:
+  case X86::MOVSSrm_alt:
   case X86::MOVSDrm:
+  case X86::MOVSDrm_alt:
   case X86::MOVAPSrm:
   case X86::MOVUPSrm:
   case X86::MOVAPDrm:
@@ -493,7 +499,9 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
   case X86::MOVDQArm:
   case X86::MOVDQUrm:
   case X86::VMOVSSrm:
+  case X86::VMOVSSrm_alt:
   case X86::VMOVSDrm:
+  case X86::VMOVSDrm_alt:
   case X86::VMOVAPSrm:
   case X86::VMOVUPSrm:
   case X86::VMOVAPDrm:
@@ -510,7 +518,9 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
   case X86::MMX_MOVQ64rm:
   // AVX-512
   case X86::VMOVSSZrm:
+  case X86::VMOVSSZrm_alt:
   case X86::VMOVSDZrm:
+  case X86::VMOVSDZrm_alt:
   case X86::VMOVAPDZ128rm:
   case X86::VMOVAPDZ256rm:
   case X86::VMOVAPDZrm:
@@ -590,96 +600,12 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
   return true;
 }
 
-bool X86InstrInfo::isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
-                                         MachineBasicBlock::iterator I) const {
-  MachineBasicBlock::iterator E = MBB.end();
-
-  // For compile time consideration, if we are not able to determine the
-  // safety after visiting 4 instructions in each direction, we will assume
-  // it's not safe.
-  MachineBasicBlock::iterator Iter = I;
-  for (unsigned i = 0; Iter != E && i < 4; ++i) {
-    bool SeenDef = false;
-    for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
-      MachineOperand &MO = Iter->getOperand(j);
-      if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
-        SeenDef = true;
-      if (!MO.isReg())
-        continue;
-      if (MO.getReg() == X86::EFLAGS) {
-        if (MO.isUse())
-          return false;
-        SeenDef = true;
-      }
-    }
-
-    if (SeenDef)
-      // This instruction defines EFLAGS, no need to look any further.
-      return true;
-    ++Iter;
-    // Skip over debug instructions.
-    while (Iter != E && Iter->isDebugInstr())
-      ++Iter;
-  }
-
-  // It is safe to clobber EFLAGS at the end of a block of no successor has it
-  // live in.
-  if (Iter == E) {
-    for (MachineBasicBlock *S : MBB.successors())
-      if (S->isLiveIn(X86::EFLAGS))
-        return false;
-    return true;
-  }
-
-  MachineBasicBlock::iterator B = MBB.begin();
-  Iter = I;
-  for (unsigned i = 0; i < 4; ++i) {
-    // If we make it to the beginning of the block, it's safe to clobber
-    // EFLAGS iff EFLAGS is not live-in.
-    if (Iter == B)
-      return !MBB.isLiveIn(X86::EFLAGS);
-
-    --Iter;
-    // Skip over debug instructions.
-    while (Iter != B && Iter->isDebugInstr())
-      --Iter;
-
-    bool SawKill = false;
-    for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
-      MachineOperand &MO = Iter->getOperand(j);
-      // A register mask may clobber EFLAGS, but we should still look for a
-      // live EFLAGS def.
-      if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
-        SawKill = true;
-      if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
-        if (MO.isDef()) return MO.isDead();
-        if (MO.isKill()) SawKill = true;
-      }
-    }
-
-    if (SawKill)
-      // This instruction kills EFLAGS and doesn't redefine it, so
-      // there's no need to look further.
-      return true;
-  }
-
-  // Conservative answer.
-  return false;
-}
-
 void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator I,
                                  unsigned DestReg, unsigned SubIdx,
                                  const MachineInstr &Orig,
                                  const TargetRegisterInfo &TRI) const {
-  bool ClobbersEFLAGS = false;
-  for (const MachineOperand &MO : Orig.operands()) {
-    if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS) {
-      ClobbersEFLAGS = true;
-      break;
-    }
-  }
-
+  bool ClobbersEFLAGS = Orig.modifiesRegister(X86::EFLAGS, &TRI);
   if (ClobbersEFLAGS && !isSafeToClobberEFLAGS(MBB, I)) {
     // The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side
     // effects.
@@ -796,11 +722,10 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
 
 MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
     unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI,
-    LiveVariables *LV) const {
+    LiveVariables *LV, bool Is8BitOp) const {
   // We handle 8-bit adds and various 16-bit opcodes in the switch below.
-  bool Is16BitOp = !(MIOpc == X86::ADD8rr || MIOpc == X86::ADD8ri);
   MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
-  assert((!Is16BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits(
+  assert((Is8BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits(
               *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) &&
          "Unexpected type for LEA transform");
 
@@ -830,7 +755,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
   unsigned Src = MI.getOperand(1).getReg();
   bool IsDead = MI.getOperand(0).isDead();
   bool IsKill = MI.getOperand(1).isKill();
-  unsigned SubReg = Is16BitOp ? X86::sub_16bit : X86::sub_8bit;
+  unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit;
   assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization");
   BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA);
   MachineInstr *InsMI =
@@ -842,19 +767,23 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
       BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA);
   switch (MIOpc) {
   default: llvm_unreachable("Unreachable!");
+  case X86::SHL8ri:
   case X86::SHL16ri: {
     unsigned ShAmt = MI.getOperand(2).getImm();
     MIB.addReg(0).addImm(1ULL << ShAmt)
        .addReg(InRegLEA, RegState::Kill).addImm(0).addReg(0);
     break;
   }
+  case X86::INC8r:
   case X86::INC16r:
     addRegOffset(MIB, InRegLEA, true, 1);
     break;
+  case X86::DEC8r:
   case X86::DEC16r:
     addRegOffset(MIB, InRegLEA, true, -1);
     break;
   case X86::ADD8ri:
+  case X86::ADD8ri_DB:
   case X86::ADD16ri:
   case X86::ADD16ri8:
   case X86::ADD16ri_DB:
@@ -862,6 +791,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
     addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm());
     break;
   case X86::ADD8rr:
+  case X86::ADD8rr_DB:
   case X86::ADD16rr:
   case X86::ADD16rr_DB: {
     unsigned Src2 = MI.getOperand(2).getReg();
@@ -948,9 +878,10 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
   MachineInstr *NewMI = nullptr;
   bool Is64Bit = Subtarget.is64Bit();
 
+  bool Is8BitOp = false;
   unsigned MIOpc = MI.getOpcode();
   switch (MIOpc) {
-  default: return nullptr;
+  default: llvm_unreachable("Unreachable!");
   case X86::SHL64ri: {
     assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
     unsigned ShAmt = getTruncatedShiftCount(MI, 2);
@@ -1000,12 +931,15 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
 
     break;
   }
+  case X86::SHL8ri:
+    Is8BitOp = true;
+    LLVM_FALLTHROUGH;
   case X86::SHL16ri: {
     assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
     unsigned ShAmt = getTruncatedShiftCount(MI, 2);
     if (!isTruncatedShiftCountForLEA(ShAmt))
       return nullptr;
-    return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
+    return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
   }
   case X86::INC64r:
   case X86::INC32r: {
@@ -1029,8 +963,6 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     NewMI = addOffset(MIB, 1);
     break;
   }
-  case X86::INC16r:
-    return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
   case X86::DEC64r:
   case X86::DEC32r: {
     assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
@@ -1054,8 +986,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
 
     break;
   }
+  case X86::DEC8r:
+  case X86::INC8r:
+    Is8BitOp = true;
+    LLVM_FALLTHROUGH;
   case X86::DEC16r:
-    return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
+  case X86::INC16r:
+    return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
   case X86::ADD64rr:
   case X86::ADD64rr_DB:
   case X86::ADD32rr:
@@ -1094,9 +1031,12 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     break;
   }
   case X86::ADD8rr:
+  case X86::ADD8rr_DB:
+    Is8BitOp = true;
+    LLVM_FALLTHROUGH;
   case X86::ADD16rr:
   case X86::ADD16rr_DB:
-    return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
+    return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
   case X86::ADD64ri32:
   case X86::ADD64ri8:
   case X86::ADD64ri32_DB:
@@ -1130,11 +1070,59 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     break;
   }
   case X86::ADD8ri:
+  case X86::ADD8ri_DB:
+    Is8BitOp = true;
+    LLVM_FALLTHROUGH;
   case X86::ADD16ri:
   case X86::ADD16ri8:
   case X86::ADD16ri_DB:
   case X86::ADD16ri8_DB:
-    return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
+    return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
+  case X86::SUB8ri:
+  case X86::SUB16ri8:
+  case X86::SUB16ri:
+    /// FIXME: Support these similar to ADD8ri/ADD16ri*.
+    return nullptr;
+  case X86::SUB32ri8:
+  case X86::SUB32ri: {
+    int64_t Imm = MI.getOperand(2).getImm();
+    if (!isInt<32>(-Imm))
+      return nullptr;
+
+    assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
+    unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
+
+    bool isKill;
+    unsigned SrcReg;
+    MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
+    if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
+                        SrcReg, isKill, ImplicitOp, LV))
+      return nullptr;
+
+    MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
+                                  .add(Dest)
+                                  .addReg(SrcReg, getKillRegState(isKill));
+    if (ImplicitOp.getReg() != 0)
+      MIB.add(ImplicitOp);
+
+    NewMI = addOffset(MIB, -Imm);
+    break;
+  }
+
+  case X86::SUB64ri8:
+  case X86::SUB64ri32: {
+    int64_t Imm = MI.getOperand(2).getImm();
+    if (!isInt<32>(-Imm))
+      return nullptr;
+
+    assert(MI.getNumOperands() >= 3 && "Unknown sub instruction!");
+
+    MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(),
+                                      get(X86::LEA64r)).add(Dest).add(Src);
+    NewMI = addOffset(MIB, -Imm);
+    break;
+  }
+
   case X86::VMOVDQU8Z128rmk:
   case X86::VMOVDQU8Z256rmk:
   case X86::VMOVDQU8Zrmk:
@@ -1522,7 +1510,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
   case X86::VBLENDPDrri:
   case X86::VBLENDPSrri:
     // If we're optimizing for size, try to use MOVSD/MOVSS.
-    if (MI.getParent()->getParent()->getFunction().optForSize()) {
+    if (MI.getParent()->getParent()->getFunction().hasOptSize()) {
       unsigned Mask, Opc;
       switch (MI.getOpcode()) {
       default: llvm_unreachable("Unreachable!");
@@ -1548,47 +1536,90 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
   case X86::VPBLENDWrri:
   case X86::VPBLENDDYrri:
   case X86::VPBLENDWYrri:{
-    unsigned Mask;
+    int8_t Mask;
     switch (MI.getOpcode()) {
     default: llvm_unreachable("Unreachable!");
-    case X86::BLENDPDrri:    Mask = 0x03; break;
-    case X86::BLENDPSrri:    Mask = 0x0F; break;
-    case X86::PBLENDWrri:    Mask = 0xFF; break;
-    case X86::VBLENDPDrri:   Mask = 0x03; break;
-    case X86::VBLENDPSrri:   Mask = 0x0F; break;
-    case X86::VBLENDPDYrri:  Mask = 0x0F; break;
-    case X86::VBLENDPSYrri:  Mask = 0xFF; break;
-    case X86::VPBLENDDrri:   Mask = 0x0F; break;
-    case X86::VPBLENDWrri:   Mask = 0xFF; break;
-    case X86::VPBLENDDYrri:  Mask = 0xFF; break;
-    case X86::VPBLENDWYrri:  Mask = 0xFF; break;
+    case X86::BLENDPDrri:    Mask = (int8_t)0x03; break;
+    case X86::BLENDPSrri:    Mask = (int8_t)0x0F; break;
+    case X86::PBLENDWrri:    Mask = (int8_t)0xFF; break;
+    case X86::VBLENDPDrri:   Mask = (int8_t)0x03; break;
+    case X86::VBLENDPSrri:   Mask = (int8_t)0x0F; break;
+    case X86::VBLENDPDYrri:  Mask = (int8_t)0x0F; break;
+    case X86::VBLENDPSYrri:  Mask = (int8_t)0xFF; break;
+    case X86::VPBLENDDrri:   Mask = (int8_t)0x0F; break;
+    case X86::VPBLENDWrri:   Mask = (int8_t)0xFF; break;
+    case X86::VPBLENDDYrri:  Mask = (int8_t)0xFF; break;
+    case X86::VPBLENDWYrri:  Mask = (int8_t)0xFF; break;
     }
     // Only the least significant bits of Imm are used.
-    unsigned Imm = MI.getOperand(3).getImm() & Mask;
+    // Using int8_t to ensure it will be sign extended to the int64_t that
+    // setImm takes in order to match isel behavior.
+    int8_t Imm = MI.getOperand(3).getImm() & Mask;
     auto &WorkingMI = cloneIfNew(MI);
     WorkingMI.getOperand(3).setImm(Mask ^ Imm);
     return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
                                                    OpIdx1, OpIdx2);
   }
+  case X86::INSERTPSrr:
+  case X86::VINSERTPSrr:
+  case X86::VINSERTPSZrr: {
+    unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm();
+    unsigned ZMask = Imm & 15;
+    unsigned DstIdx = (Imm >> 4) & 3;
+    unsigned SrcIdx = (Imm >> 6) & 3;
+
+    // We can commute insertps if we zero 2 of the elements, the insertion is
+    // "inline" and we don't override the insertion with a zero.
+    if (DstIdx == SrcIdx && (ZMask & (1 << DstIdx)) == 0 &&
+        countPopulation(ZMask) == 2) {
+      unsigned AltIdx = findFirstSet((ZMask | (1 << DstIdx)) ^ 15);
+      assert(AltIdx < 4 && "Illegal insertion index");
+      unsigned AltImm = (AltIdx << 6) | (AltIdx << 4) | ZMask;
+      auto &WorkingMI = cloneIfNew(MI);
+      WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(AltImm);
+      return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+                                                     OpIdx1, OpIdx2);
+    }
+    return nullptr;
+  }
   case X86::MOVSDrr:
   case X86::MOVSSrr:
   case X86::VMOVSDrr:
   case X86::VMOVSSrr:{
     // On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD.
-    assert(Subtarget.hasSSE41() && "Commuting MOVSD/MOVSS requires SSE41!");
+    if (Subtarget.hasSSE41()) {
+      unsigned Mask, Opc;
+      switch (MI.getOpcode()) {
+      default: llvm_unreachable("Unreachable!");
+      case X86::MOVSDrr:  Opc = X86::BLENDPDrri;  Mask = 0x02; break;
+      case X86::MOVSSrr:  Opc = X86::BLENDPSrri;  Mask = 0x0E; break;
+      case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
+      case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
+      }
 
-    unsigned Mask, Opc;
-    switch (MI.getOpcode()) {
-    default: llvm_unreachable("Unreachable!");
-    case X86::MOVSDrr:  Opc = X86::BLENDPDrri;  Mask = 0x02; break;
-    case X86::MOVSSrr:  Opc = X86::BLENDPSrri;  Mask = 0x0E; break;
-    case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
-    case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
+      auto &WorkingMI = cloneIfNew(MI);
+      WorkingMI.setDesc(get(Opc));
+      WorkingMI.addOperand(MachineOperand::CreateImm(Mask));
+      return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+                                                     OpIdx1, OpIdx2);
     }
 
+    // Convert to SHUFPD.
+    assert(MI.getOpcode() == X86::MOVSDrr &&
+           "Can only commute MOVSDrr without SSE4.1");
+
     auto &WorkingMI = cloneIfNew(MI);
-    WorkingMI.setDesc(get(Opc));
-    WorkingMI.addOperand(MachineOperand::CreateImm(Mask));
+    WorkingMI.setDesc(get(X86::SHUFPDrri));
+    WorkingMI.addOperand(MachineOperand::CreateImm(0x02));
+    return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+                                                   OpIdx1, OpIdx2);
+  }
+  case X86::SHUFPDrri: {
+    // Commute to MOVSD.
+    assert(MI.getOperand(3).getImm() == 0x02 && "Unexpected immediate!");
+    auto &WorkingMI = cloneIfNew(MI);
+    WorkingMI.setDesc(get(X86::MOVSDrr));
+    WorkingMI.RemoveOperand(3);
     return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
                                                    OpIdx1, OpIdx2);
   }
@@ -1657,7 +1688,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
     // Flip permute source immediate.
     // Imm & 0x02: lo = if set, select Op1.lo/hi else Op0.lo/hi.
     // Imm & 0x20: hi = if set, select Op1.lo/hi else Op0.lo/hi.
-    unsigned Imm = MI.getOperand(3).getImm() & 0xFF;
+    int8_t Imm = MI.getOperand(3).getImm() & 0xFF;
     auto &WorkingMI = cloneIfNew(MI);
     WorkingMI.getOperand(3).setImm(Imm ^ 0x22);
     return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
@@ -1686,76 +1717,11 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
     return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
                                                    OpIdx1, OpIdx2);
   }
-  case X86::CMOVB16rr:  case X86::CMOVB32rr:  case X86::CMOVB64rr:
-  case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
-  case X86::CMOVE16rr:  case X86::CMOVE32rr:  case X86::CMOVE64rr:
-  case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
-  case X86::CMOVBE16rr: case X86::CMOVBE32rr: case X86::CMOVBE64rr:
-  case X86::CMOVA16rr:  case X86::CMOVA32rr:  case X86::CMOVA64rr:
-  case X86::CMOVL16rr:  case X86::CMOVL32rr:  case X86::CMOVL64rr:
-  case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
-  case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
-  case X86::CMOVG16rr:  case X86::CMOVG32rr:  case X86::CMOVG64rr:
-  case X86::CMOVS16rr:  case X86::CMOVS32rr:  case X86::CMOVS64rr:
-  case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
-  case X86::CMOVP16rr:  case X86::CMOVP32rr:  case X86::CMOVP64rr:
-  case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
-  case X86::CMOVO16rr:  case X86::CMOVO32rr:  case X86::CMOVO64rr:
-  case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: {
-    unsigned Opc;
-    switch (MI.getOpcode()) {
-    default: llvm_unreachable("Unreachable!");
-    case X86::CMOVB16rr:  Opc = X86::CMOVAE16rr; break;
-    case X86::CMOVB32rr:  Opc = X86::CMOVAE32rr; break;
-    case X86::CMOVB64rr:  Opc = X86::CMOVAE64rr; break;
-    case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break;
-    case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break;
-    case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break;
-    case X86::CMOVE16rr:  Opc = X86::CMOVNE16rr; break;
-    case X86::CMOVE32rr:  Opc = X86::CMOVNE32rr; break;
-    case X86::CMOVE64rr:  Opc = X86::CMOVNE64rr; break;
-    case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break;
-    case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break;
-    case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break;
-    case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break;
-    case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break;
-    case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break;
-    case X86::CMOVA16rr:  Opc = X86::CMOVBE16rr; break;
-    case X86::CMOVA32rr:  Opc = X86::CMOVBE32rr; break;
-    case X86::CMOVA64rr:  Opc = X86::CMOVBE64rr; break;
-    case X86::CMOVL16rr:  Opc = X86::CMOVGE16rr; break;
-    case X86::CMOVL32rr:  Opc = X86::CMOVGE32rr; break;
-    case X86::CMOVL64rr:  Opc = X86::CMOVGE64rr; break;
-    case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break;
-    case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break;
-    case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break;
-    case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break;
-    case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break;
-    case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break;
-    case X86::CMOVG16rr:  Opc = X86::CMOVLE16rr; break;
-    case X86::CMOVG32rr:  Opc = X86::CMOVLE32rr; break;
-    case X86::CMOVG64rr:  Opc = X86::CMOVLE64rr; break;
-    case X86::CMOVS16rr:  Opc = X86::CMOVNS16rr; break;
-    case X86::CMOVS32rr:  Opc = X86::CMOVNS32rr; break;
-    case X86::CMOVS64rr:  Opc = X86::CMOVNS64rr; break;
-    case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break;
-    case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break;
-    case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break;
-    case X86::CMOVP16rr:  Opc = X86::CMOVNP16rr; break;
-    case X86::CMOVP32rr:  Opc = X86::CMOVNP32rr; break;
-    case X86::CMOVP64rr:  Opc = X86::CMOVNP64rr; break;
-    case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break;
-    case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break;
-    case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break;
-    case X86::CMOVO16rr:  Opc = X86::CMOVNO16rr; break;
-    case X86::CMOVO32rr:  Opc = X86::CMOVNO32rr; break;
-    case X86::CMOVO64rr:  Opc = X86::CMOVNO64rr; break;
-    case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break;
-    case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break;
-    case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break;
-    }
+  case X86::CMOV16rr:  case X86::CMOV32rr:  case X86::CMOV64rr: {
     auto &WorkingMI = cloneIfNew(MI);
-    WorkingMI.setDesc(get(Opc));
+    unsigned OpNo = MI.getDesc().getNumOperands() - 1;
+    X86::CondCode CC = static_cast<X86::CondCode>(MI.getOperand(OpNo).getImm());
+    WorkingMI.getOperand(OpNo).setImm(X86::GetOppositeBranchCondition(CC));
     return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
                                                    OpIdx1, OpIdx2);
   }
@@ -1879,7 +1845,6 @@ X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
   // regardless of the FMA opcode. The FMA opcode is adjusted later.
   if (SrcOpIdx1 == CommuteAnyOperandIndex ||
       SrcOpIdx2 == CommuteAnyOperandIndex) {
-    unsigned CommutableOpIdx1 = SrcOpIdx1;
     unsigned CommutableOpIdx2 = SrcOpIdx2;
 
     // At least one of operands to be commuted is not specified and
@@ -1895,6 +1860,8 @@ X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
     // CommutableOpIdx2 is well defined now. Let's choose another commutable
     // operand and assign its index to CommutableOpIdx1.
     unsigned Op2Reg = MI.getOperand(CommutableOpIdx2).getReg();
+
+    unsigned CommutableOpIdx1;
     for (CommutableOpIdx1 = LastCommutableVecOp;
          CommutableOpIdx1 >= FirstCommutableVecOp; CommutableOpIdx1--) {
       // Just ignore and skip the k-mask operand.
@@ -1946,28 +1913,43 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
   case X86::VCMPPDZ128rri:
   case X86::VCMPPSZ128rri:
   case X86::VCMPPDZ256rri:
-  case X86::VCMPPSZ256rri: {
+  case X86::VCMPPSZ256rri:
+  case X86::VCMPPDZrrik:
+  case X86::VCMPPSZrrik:
+  case X86::VCMPPDZ128rrik:
+  case X86::VCMPPSZ128rrik:
+  case X86::VCMPPDZ256rrik:
+  case X86::VCMPPSZ256rrik: {
+    unsigned OpOffset = X86II::isKMasked(Desc.TSFlags) ? 1 : 0;
+
     // Float comparison can be safely commuted for
     // Ordered/Unordered/Equal/NotEqual tests
-    unsigned Imm = MI.getOperand(3).getImm() & 0x7;
+    unsigned Imm = MI.getOperand(3 + OpOffset).getImm() & 0x7;
     switch (Imm) {
     case 0x00: // EQUAL
     case 0x03: // UNORDERED
     case 0x04: // NOT EQUAL
     case 0x07: // ORDERED
-      // The indices of the commutable operands are 1 and 2.
+      // The indices of the commutable operands are 1 and 2 (or 2 and 3
+      // when masked).
       // Assign them to the returned operand indices here.
-      return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
+      return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset,
+                                  2 + OpOffset);
     }
     return false;
   }
-  case X86::MOVSDrr:
   case X86::MOVSSrr:
-  case X86::VMOVSDrr:
-  case X86::VMOVSSrr:
+    // X86::MOVSDrr is always commutable. MOVSS is only commutable if we can
+    // form sse4.1 blend. We assume VMOVSSrr/VMOVSDrr is always commutable since
+    // AVX implies sse4.1.
     if (Subtarget.hasSSE41())
       return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
     return false;
+  case X86::SHUFPDrri:
+    // We can commute this to MOVSD.
+    if (MI.getOperand(3).getImm() == 0x02)
+      return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
+    return false;
   case X86::MOVHLPSrr:
   case X86::UNPCKHPDrr:
   case X86::VMOVHLPSrr:
@@ -2089,125 +2071,33 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
   return false;
 }
 
-X86::CondCode X86::getCondFromBranchOpc(unsigned BrOpc) {
-  switch (BrOpc) {
+X86::CondCode X86::getCondFromBranch(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
   default: return X86::COND_INVALID;
-  case X86::JE_1:  return X86::COND_E;
-  case X86::JNE_1: return X86::COND_NE;
-  case X86::JL_1:  return X86::COND_L;
-  case X86::JLE_1: return X86::COND_LE;
-  case X86::JG_1:  return X86::COND_G;
-  case X86::JGE_1: return X86::COND_GE;
-  case X86::JB_1:  return X86::COND_B;
-  case X86::JBE_1: return X86::COND_BE;
-  case X86::JA_1:  return X86::COND_A;
-  case X86::JAE_1: return X86::COND_AE;
-  case X86::JS_1:  return X86::COND_S;
-  case X86::JNS_1: return X86::COND_NS;
-  case X86::JP_1:  return X86::COND_P;
-  case X86::JNP_1: return X86::COND_NP;
-  case X86::JO_1:  return X86::COND_O;
-  case X86::JNO_1: return X86::COND_NO;
-  }
-}
-
-/// Return condition code of a SET opcode.
-X86::CondCode X86::getCondFromSETOpc(unsigned Opc) {
-  switch (Opc) {
+  case X86::JCC_1:
+    return static_cast<X86::CondCode>(
+        MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm());
+  }
+}
+
+/// Return condition code of a SETCC opcode.
+X86::CondCode X86::getCondFromSETCC(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
   default: return X86::COND_INVALID;
-  case X86::SETAr:  case X86::SETAm:  return X86::COND_A;
-  case X86::SETAEr: case X86::SETAEm: return X86::COND_AE;
-  case X86::SETBr:  case X86::SETBm:  return X86::COND_B;
-  case X86::SETBEr: case X86::SETBEm: return X86::COND_BE;
-  case X86::SETEr:  case X86::SETEm:  return X86::COND_E;
-  case X86::SETGr:  case X86::SETGm:  return X86::COND_G;
-  case X86::SETGEr: case X86::SETGEm: return X86::COND_GE;
-  case X86::SETLr:  case X86::SETLm:  return X86::COND_L;
-  case X86::SETLEr: case X86::SETLEm: return X86::COND_LE;
-  case X86::SETNEr: case X86::SETNEm: return X86::COND_NE;
-  case X86::SETNOr: case X86::SETNOm: return X86::COND_NO;
-  case X86::SETNPr: case X86::SETNPm: return X86::COND_NP;
-  case X86::SETNSr: case X86::SETNSm: return X86::COND_NS;
-  case X86::SETOr:  case X86::SETOm:  return X86::COND_O;
-  case X86::SETPr:  case X86::SETPm:  return X86::COND_P;
-  case X86::SETSr:  case X86::SETSm:  return X86::COND_S;
+  case X86::SETCCr: case X86::SETCCm:
+    return static_cast<X86::CondCode>(
+        MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm());
   }
 }
 
 /// Return condition code of a CMov opcode.
-X86::CondCode X86::getCondFromCMovOpc(unsigned Opc) {
-  switch (Opc) {
+X86::CondCode X86::getCondFromCMov(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
   default: return X86::COND_INVALID;
-  case X86::CMOVA16rm:  case X86::CMOVA16rr:  case X86::CMOVA32rm:
-  case X86::CMOVA32rr:  case X86::CMOVA64rm:  case X86::CMOVA64rr:
-    return X86::COND_A;
-  case X86::CMOVAE16rm: case X86::CMOVAE16rr: case X86::CMOVAE32rm:
-  case X86::CMOVAE32rr: case X86::CMOVAE64rm: case X86::CMOVAE64rr:
-    return X86::COND_AE;
-  case X86::CMOVB16rm:  case X86::CMOVB16rr:  case X86::CMOVB32rm:
-  case X86::CMOVB32rr:  case X86::CMOVB64rm:  case X86::CMOVB64rr:
-    return X86::COND_B;
-  case X86::CMOVBE16rm: case X86::CMOVBE16rr: case X86::CMOVBE32rm:
-  case X86::CMOVBE32rr: case X86::CMOVBE64rm: case X86::CMOVBE64rr:
-    return X86::COND_BE;
-  case X86::CMOVE16rm:  case X86::CMOVE16rr:  case X86::CMOVE32rm:
-  case X86::CMOVE32rr:  case X86::CMOVE64rm:  case X86::CMOVE64rr:
-    return X86::COND_E;
-  case X86::CMOVG16rm:  case X86::CMOVG16rr:  case X86::CMOVG32rm:
-  case X86::CMOVG32rr:  case X86::CMOVG64rm:  case X86::CMOVG64rr:
-    return X86::COND_G;
-  case X86::CMOVGE16rm: case X86::CMOVGE16rr: case X86::CMOVGE32rm:
-  case X86::CMOVGE32rr: case X86::CMOVGE64rm: case X86::CMOVGE64rr:
-    return X86::COND_GE;
-  case X86::CMOVL16rm:  case X86::CMOVL16rr:  case X86::CMOVL32rm:
-  case X86::CMOVL32rr:  case X86::CMOVL64rm:  case X86::CMOVL64rr:
-    return X86::COND_L;
-  case X86::CMOVLE16rm: case X86::CMOVLE16rr: case X86::CMOVLE32rm:
-  case X86::CMOVLE32rr: case X86::CMOVLE64rm: case X86::CMOVLE64rr:
-    return X86::COND_LE;
-  case X86::CMOVNE16rm: case X86::CMOVNE16rr: case X86::CMOVNE32rm:
-  case X86::CMOVNE32rr: case X86::CMOVNE64rm: case X86::CMOVNE64rr:
-    return X86::COND_NE;
-  case X86::CMOVNO16rm: case X86::CMOVNO16rr: case X86::CMOVNO32rm:
-  case X86::CMOVNO32rr: case X86::CMOVNO64rm: case X86::CMOVNO64rr:
-    return X86::COND_NO;
-  case X86::CMOVNP16rm: case X86::CMOVNP16rr: case X86::CMOVNP32rm:
-  case X86::CMOVNP32rr: case X86::CMOVNP64rm: case X86::CMOVNP64rr:
-    return X86::COND_NP;
-  case X86::CMOVNS16rm: case X86::CMOVNS16rr: case X86::CMOVNS32rm:
-  case X86::CMOVNS32rr: case X86::CMOVNS64rm: case X86::CMOVNS64rr:
-    return X86::COND_NS;
-  case X86::CMOVO16rm:  case X86::CMOVO16rr:  case X86::CMOVO32rm:
-  case X86::CMOVO32rr:  case X86::CMOVO64rm:  case X86::CMOVO64rr:
-    return X86::COND_O;
-  case X86::CMOVP16rm:  case X86::CMOVP16rr:  case X86::CMOVP32rm:
-  case X86::CMOVP32rr:  case X86::CMOVP64rm:  case X86::CMOVP64rr:
-    return X86::COND_P;
-  case X86::CMOVS16rm:  case X86::CMOVS16rr:  case X86::CMOVS32rm:
-  case X86::CMOVS32rr:  case X86::CMOVS64rm:  case X86::CMOVS64rr:
-    return X86::COND_S;
-  }
-}
-
-unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
-  switch (CC) {
-  default: llvm_unreachable("Illegal condition code!");
-  case X86::COND_E:  return X86::JE_1;
-  case X86::COND_NE: return X86::JNE_1;
-  case X86::COND_L:  return X86::JL_1;
-  case X86::COND_LE: return X86::JLE_1;
-  case X86::COND_G:  return X86::JG_1;
-  case X86::COND_GE: return X86::JGE_1;
-  case X86::COND_B:  return X86::JB_1;
-  case X86::COND_BE: return X86::JBE_1;
-  case X86::COND_A:  return X86::JA_1;
-  case X86::COND_AE: return X86::JAE_1;
-  case X86::COND_S:  return X86::JS_1;
-  case X86::COND_NS: return X86::JNS_1;
-  case X86::COND_P:  return X86::JP_1;
-  case X86::COND_NP: return X86::JNP_1;
-  case X86::COND_O:  return X86::JO_1;
-  case X86::COND_NO: return X86::JNO_1;
+  case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr:
+  case X86::CMOV16rm: case X86::CMOV32rm: case X86::CMOV64rm:
+    return static_cast<X86::CondCode>(
+        MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm());
   }
 }
 
@@ -2293,78 +2183,18 @@ X86::getX86ConditionCode(CmpInst::Predicate Predicate) {
   return std::make_pair(CC, NeedSwap);
 }
 
-/// Return a set opcode for the given condition and
-/// whether it has memory operand.
-unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) {
-  static const uint16_t Opc[16][2] = {
-    { X86::SETAr,  X86::SETAm  },
-    { X86::SETAEr, X86::SETAEm },
-    { X86::SETBr,  X86::SETBm  },
-    { X86::SETBEr, X86::SETBEm },
-    { X86::SETEr,  X86::SETEm  },
-    { X86::SETGr,  X86::SETGm  },
-    { X86::SETGEr, X86::SETGEm },
-    { X86::SETLr,  X86::SETLm  },
-    { X86::SETLEr, X86::SETLEm },
-    { X86::SETNEr, X86::SETNEm },
-    { X86::SETNOr, X86::SETNOm },
-    { X86::SETNPr, X86::SETNPm },
-    { X86::SETNSr, X86::SETNSm },
-    { X86::SETOr,  X86::SETOm  },
-    { X86::SETPr,  X86::SETPm  },
-    { X86::SETSr,  X86::SETSm  }
-  };
-
-  assert(CC <= LAST_VALID_COND && "Can only handle standard cond codes");
-  return Opc[CC][HasMemoryOperand ? 1 : 0];
-}
-
-/// Return a cmov opcode for the given condition,
-/// register size in bytes, and operand type.
-unsigned X86::getCMovFromCond(CondCode CC, unsigned RegBytes,
-                              bool HasMemoryOperand) {
-  static const uint16_t Opc[32][3] = {
-    { X86::CMOVA16rr,  X86::CMOVA32rr,  X86::CMOVA64rr  },
-    { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
-    { X86::CMOVB16rr,  X86::CMOVB32rr,  X86::CMOVB64rr  },
-    { X86::CMOVBE16rr, X86::CMOVBE32rr, X86::CMOVBE64rr },
-    { X86::CMOVE16rr,  X86::CMOVE32rr,  X86::CMOVE64rr  },
-    { X86::CMOVG16rr,  X86::CMOVG32rr,  X86::CMOVG64rr  },
-    { X86::CMOVGE16rr, X86::CMOVGE32rr, X86::CMOVGE64rr },
-    { X86::CMOVL16rr,  X86::CMOVL32rr,  X86::CMOVL64rr  },
-    { X86::CMOVLE16rr, X86::CMOVLE32rr, X86::CMOVLE64rr },
-    { X86::CMOVNE16rr, X86::CMOVNE32rr, X86::CMOVNE64rr },
-    { X86::CMOVNO16rr, X86::CMOVNO32rr, X86::CMOVNO64rr },
-    { X86::CMOVNP16rr, X86::CMOVNP32rr, X86::CMOVNP64rr },
-    { X86::CMOVNS16rr, X86::CMOVNS32rr, X86::CMOVNS64rr },
-    { X86::CMOVO16rr,  X86::CMOVO32rr,  X86::CMOVO64rr  },
-    { X86::CMOVP16rr,  X86::CMOVP32rr,  X86::CMOVP64rr  },
-    { X86::CMOVS16rr,  X86::CMOVS32rr,  X86::CMOVS64rr  },
-    { X86::CMOVA16rm,  X86::CMOVA32rm,  X86::CMOVA64rm  },
-    { X86::CMOVAE16rm, X86::CMOVAE32rm, X86::CMOVAE64rm },
-    { X86::CMOVB16rm,  X86::CMOVB32rm,  X86::CMOVB64rm  },
-    { X86::CMOVBE16rm, X86::CMOVBE32rm, X86::CMOVBE64rm },
-    { X86::CMOVE16rm,  X86::CMOVE32rm,  X86::CMOVE64rm  },
-    { X86::CMOVG16rm,  X86::CMOVG32rm,  X86::CMOVG64rm  },
-    { X86::CMOVGE16rm, X86::CMOVGE32rm, X86::CMOVGE64rm },
-    { X86::CMOVL16rm,  X86::CMOVL32rm,  X86::CMOVL64rm  },
-    { X86::CMOVLE16rm, X86::CMOVLE32rm, X86::CMOVLE64rm },
-    { X86::CMOVNE16rm, X86::CMOVNE32rm, X86::CMOVNE64rm },
-    { X86::CMOVNO16rm, X86::CMOVNO32rm, X86::CMOVNO64rm },
-    { X86::CMOVNP16rm, X86::CMOVNP32rm, X86::CMOVNP64rm },
-    { X86::CMOVNS16rm, X86::CMOVNS32rm, X86::CMOVNS64rm },
-    { X86::CMOVO16rm,  X86::CMOVO32rm,  X86::CMOVO64rm  },
-    { X86::CMOVP16rm,  X86::CMOVP32rm,  X86::CMOVP64rm  },
-    { X86::CMOVS16rm,  X86::CMOVS32rm,  X86::CMOVS64rm  }
-  };
+/// Return a setcc opcode based on whether it has memory operand.
+unsigned X86::getSETOpc(bool HasMemoryOperand) {
+  return HasMemoryOperand ? X86::SETCCr : X86::SETCCm;
+}
 
-  assert(CC < 16 && "Can only handle standard cond codes");
-  unsigned Idx = HasMemoryOperand ? 16+CC : CC;
+/// Return a cmov opcode for the given register size in bytes, and operand type.
+unsigned X86::getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand) {
   switch(RegBytes) {
   default: llvm_unreachable("Illegal register size!");
-  case 2: return Opc[Idx][0];
-  case 4: return Opc[Idx][1];
-  case 8: return Opc[Idx][2];
+  case 2: return HasMemoryOperand ? X86::CMOV16rm : X86::CMOV16rr;
+  case 4: return HasMemoryOperand ? X86::CMOV32rm : X86::CMOV32rr;
+  case 8: return HasMemoryOperand ? X86::CMOV32rm : X86::CMOV64rr;
   }
 }
 
@@ -2490,7 +2320,7 @@ void X86InstrInfo::replaceBranchWithTailCall(
     if (!I->isBranch())
       assert(0 && "Can't find the branch to replace!");
 
-    X86::CondCode CC = X86::getCondFromBranchOpc(I->getOpcode());
+    X86::CondCode CC = X86::getCondFromBranch(*I);
     assert(BranchCond.size() == 1);
     if (CC != BranchCond[0].getImm())
       continue;
@@ -2597,13 +2427,13 @@ bool X86InstrInfo::AnalyzeBranchImpl(
     }
 
     // Handle conditional branches.
-    X86::CondCode BranchCode = X86::getCondFromBranchOpc(I->getOpcode());
+    X86::CondCode BranchCode = X86::getCondFromBranch(*I);
     if (BranchCode == X86::COND_INVALID)
       return true;  // Can't handle indirect branch.
 
     // In practice we should never have an undef eflags operand, if we do
     // abort here as we are not prepared to preserve the flag.
-    if (I->getOperand(1).isUndef())
+    if (I->findRegisterUseOperand(X86::EFLAGS)->isUndef())
       return true;
 
     // Working from the bottom, handle the first conditional branch.
@@ -2629,11 +2459,11 @@ bool X86InstrInfo::AnalyzeBranchImpl(
         // Which is a bit more efficient.
         // We conditionally jump to the fall-through block.
         BranchCode = GetOppositeBranchCondition(BranchCode);
-        unsigned JNCC = GetCondBranchFromCond(BranchCode);
         MachineBasicBlock::iterator OldInst = I;
 
-        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC))
-          .addMBB(UnCondBrIter->getOperand(0).getMBB());
+        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JCC_1))
+          .addMBB(UnCondBrIter->getOperand(0).getMBB())
+          .addImm(BranchCode);
         BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_1))
           .addMBB(TargetBB);
 
@@ -2798,7 +2628,7 @@ unsigned X86InstrInfo::removeBranch(MachineBasicBlock &MBB,
     if (I->isDebugInstr())
       continue;
     if (I->getOpcode() != X86::JMP_1 &&
-        X86::getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
+        X86::getCondFromBranch(*I) == X86::COND_INVALID)
       break;
     // Remove the branch.
     I->eraseFromParent();
@@ -2837,9 +2667,9 @@ unsigned X86InstrInfo::insertBranch(MachineBasicBlock &MBB,
   switch (CC) {
   case X86::COND_NE_OR_P:
     // Synthesize NE_OR_P with two branches.
-    BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(TBB);
+    BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_NE);
     ++Count;
-    BuildMI(&MBB, DL, get(X86::JP_1)).addMBB(TBB);
+    BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_P);
     ++Count;
     break;
   case X86::COND_E_AND_NP:
@@ -2850,14 +2680,13 @@ unsigned X86InstrInfo::insertBranch(MachineBasicBlock &MBB,
                     "body is a fall-through.");
     }
     // Synthesize COND_E_AND_NP with two branches.
-    BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(FBB);
+    BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(FBB).addImm(X86::COND_NE);
     ++Count;
-    BuildMI(&MBB, DL, get(X86::JNP_1)).addMBB(TBB);
+    BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_NP);
     ++Count;
     break;
   default: {
-    unsigned Opc = GetCondBranchFromCond(CC);
-    BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
+    BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(CC);
     ++Count;
   }
   }
@@ -2880,7 +2709,7 @@ canInsertSelect(const MachineBasicBlock &MBB,
   if (Cond.size() != 1)
     return false;
   // We cannot do the composite conditions, at least not in SSA form.
-  if ((X86::CondCode)Cond[0].getImm() > X86::COND_S)
+  if ((X86::CondCode)Cond[0].getImm() > X86::LAST_VALID_COND)
     return false;
 
   // Check register classes.
@@ -2915,10 +2744,12 @@ void X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
   const TargetRegisterClass &RC = *MRI.getRegClass(DstReg);
   assert(Cond.size() == 1 && "Invalid Cond array");
-  unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(),
-                                 TRI.getRegSizeInBits(RC) / 8,
-                                 false /*HasMemoryOperand*/);
-  BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg);
+  unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(RC) / 8,
+                                    false /*HasMemoryOperand*/);
+  BuildMI(MBB, I, DL, get(Opc), DstReg)
+      .addReg(FalseReg)
+      .addReg(TrueReg)
+      .addImm(Cond[0].getImm());
 }
 
 /// Test if the given register is a physical h register.
@@ -2984,22 +2815,22 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
       return X86::MMX_MOVD64to64rr;
   }
 
-  // SrcReg(FR32) -> DestReg(GR32)
-  // SrcReg(GR32) -> DestReg(FR32)
+  // SrcReg(VR128) -> DestReg(GR32)
+  // SrcReg(GR32)  -> DestReg(VR128)
 
   if (X86::GR32RegClass.contains(DestReg) &&
-      X86::FR32XRegClass.contains(SrcReg))
-    // Copy from a FR32 register to a GR32 register.
-    return HasAVX512 ? X86::VMOVSS2DIZrr :
-           HasAVX    ? X86::VMOVSS2DIrr  :
-                       X86::MOVSS2DIrr;
+      X86::VR128XRegClass.contains(SrcReg))
+    // Copy from a VR128 register to a GR32 register.
+    return HasAVX512 ? X86::VMOVPDI2DIZrr :
+           HasAVX    ? X86::VMOVPDI2DIrr  :
+                       X86::MOVPDI2DIrr;
 
-  if (X86::FR32XRegClass.contains(DestReg) &&
+  if (X86::VR128XRegClass.contains(DestReg) &&
       X86::GR32RegClass.contains(SrcReg))
-    // Copy from a GR32 register to a FR32 register.
-    return HasAVX512 ? X86::VMOVDI2SSZrr :
-           HasAVX    ? X86::VMOVDI2SSrr  :
-                       X86::MOVDI2SSrr;
+    // Copy from a VR128 register to a VR128 register.
+    return HasAVX512 ? X86::VMOVDI2PDIZrr :
+           HasAVX    ? X86::VMOVDI2PDIrr  :
+                       X86::MOVDI2PDIrr;
   return 0;
 }
 
@@ -3129,22 +2960,38 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
       return load ? X86::MOV32rm : X86::MOV32mr;
     if (X86::FR32XRegClass.hasSubClassEq(RC))
       return load ?
-        (HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm) :
-        (HasAVX512 ? X86::VMOVSSZmr : HasAVX ? X86::VMOVSSmr : X86::MOVSSmr);
+        (HasAVX512 ? X86::VMOVSSZrm_alt :
+         HasAVX    ? X86::VMOVSSrm_alt :
+                     X86::MOVSSrm_alt) :
+        (HasAVX512 ? X86::VMOVSSZmr :
+         HasAVX    ? X86::VMOVSSmr :
+                     X86::MOVSSmr);
     if (X86::RFP32RegClass.hasSubClassEq(RC))
       return load ? X86::LD_Fp32m : X86::ST_Fp32m;
     if (X86::VK32RegClass.hasSubClassEq(RC)) {
       assert(STI.hasBWI() && "KMOVD requires BWI");
       return load ? X86::KMOVDkm : X86::KMOVDmk;
     }
+    // All of these mask pair classes have the same spill size, the same kind
+    // of kmov instructions can be used with all of them.
+    if (X86::VK1PAIRRegClass.hasSubClassEq(RC) ||
+        X86::VK2PAIRRegClass.hasSubClassEq(RC) ||
+        X86::VK4PAIRRegClass.hasSubClassEq(RC) ||
+        X86::VK8PAIRRegClass.hasSubClassEq(RC) ||
+        X86::VK16PAIRRegClass.hasSubClassEq(RC))
+      return load ? X86::MASKPAIR16LOAD : X86::MASKPAIR16STORE;
     llvm_unreachable("Unknown 4-byte regclass");
   case 8:
     if (X86::GR64RegClass.hasSubClassEq(RC))
       return load ? X86::MOV64rm : X86::MOV64mr;
     if (X86::FR64XRegClass.hasSubClassEq(RC))
       return load ?
-        (HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm) :
-        (HasAVX512 ? X86::VMOVSDZmr : HasAVX ? X86::VMOVSDmr : X86::MOVSDmr);
+        (HasAVX512 ? X86::VMOVSDZrm_alt :
+         HasAVX    ? X86::VMOVSDrm_alt :
+                     X86::MOVSDrm_alt) :
+        (HasAVX512 ? X86::VMOVSDZmr :
+         HasAVX    ? X86::VMOVSDmr :
+                     X86::MOVSDmr);
     if (X86::VR64RegClass.hasSubClassEq(RC))
       return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
     if (X86::RFP64RegClass.hasSubClassEq(RC))
@@ -3219,7 +3066,7 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
 }
 
 bool X86InstrInfo::getMemOperandWithOffset(
-    MachineInstr &MemOp, MachineOperand *&BaseOp, int64_t &Offset,
+    const MachineInstr &MemOp, const MachineOperand *&BaseOp, int64_t &Offset,
     const TargetRegisterInfo *TRI) const {
   const MCInstrDesc &Desc = MemOp.getDesc();
   int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
@@ -3572,25 +3419,39 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) {
 static X86::CondCode isUseDefConvertible(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   default: return X86::COND_INVALID;
-  case X86::LZCNT16rr: case X86::LZCNT16rm:
-  case X86::LZCNT32rr: case X86::LZCNT32rm:
-  case X86::LZCNT64rr: case X86::LZCNT64rm:
+  case X86::NEG8r:
+  case X86::NEG16r:
+  case X86::NEG32r:
+  case X86::NEG64r:
+    return X86::COND_AE;
+  case X86::LZCNT16rr:
+  case X86::LZCNT32rr:
+  case X86::LZCNT64rr:
     return X86::COND_B;
-  case X86::POPCNT16rr:case X86::POPCNT16rm:
-  case X86::POPCNT32rr:case X86::POPCNT32rm:
-  case X86::POPCNT64rr:case X86::POPCNT64rm:
+  case X86::POPCNT16rr:
+  case X86::POPCNT32rr:
+  case X86::POPCNT64rr:
     return X86::COND_E;
-  case X86::TZCNT16rr: case X86::TZCNT16rm:
-  case X86::TZCNT32rr: case X86::TZCNT32rm:
-  case X86::TZCNT64rr: case X86::TZCNT64rm:
+  case X86::TZCNT16rr:
+  case X86::TZCNT32rr:
+  case X86::TZCNT64rr:
     return X86::COND_B;
-  case X86::BSF16rr: case X86::BSF16rm:
-  case X86::BSF32rr: case X86::BSF32rm:
-  case X86::BSF64rr: case X86::BSF64rm:
-  case X86::BSR16rr: case X86::BSR16rm:
-  case X86::BSR32rr: case X86::BSR32rm:
-  case X86::BSR64rr: case X86::BSR64rm:
+  case X86::BSF16rr:
+  case X86::BSF32rr:
+  case X86::BSF64rr:
+  case X86::BSR16rr:
+  case X86::BSR32rr:
+  case X86::BSR64rr:
     return X86::COND_E;
+  case X86::BLSI32rr:
+  case X86::BLSI64rr:
+    return X86::COND_AE;
+  case X86::BLSR32rr:
+  case X86::BLSR64rr:
+  case X86::BLSMSK32rr:
+  case X86::BLSMSK64rr:
+    return X86::COND_B;
+  // TODO: TBM instructions.
   }
 }
 
@@ -3602,7 +3463,6 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
                                         int CmpValue,
                                         const MachineRegisterInfo *MRI) const {
   // Check whether we can replace SUB with CMP.
-  unsigned NewOpcode = 0;
   switch (CmpInstr.getOpcode()) {
   default: break;
   case X86::SUB64ri32:
@@ -3623,6 +3483,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
     if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
       return false;
     // There is no use of the destination register, we can replace SUB with CMP.
+    unsigned NewOpcode = 0;
     switch (CmpInstr.getOpcode()) {
     default: llvm_unreachable("Unreachable!");
     case X86::SUB64rm:   NewOpcode = X86::CMP64rm;   break;
@@ -3746,7 +3607,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
   // If we are done with the basic block, we need to check whether EFLAGS is
   // live-out.
   bool IsSafe = false;
-  SmallVector<std::pair<MachineInstr*, unsigned /*NewOpc*/>, 4> OpsToUpdate;
+  SmallVector<std::pair<MachineInstr*, X86::CondCode>, 4> OpsToUpdate;
   MachineBasicBlock::iterator E = CmpInstr.getParent()->end();
   for (++I; I != E; ++I) {
     const MachineInstr &Instr = *I;
@@ -3763,17 +3624,14 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
 
     // EFLAGS is used by this instruction.
     X86::CondCode OldCC = X86::COND_INVALID;
-    bool OpcIsSET = false;
     if (IsCmpZero || IsSwapped) {
       // We decode the condition code from opcode.
       if (Instr.isBranch())
-        OldCC = X86::getCondFromBranchOpc(Instr.getOpcode());
+        OldCC = X86::getCondFromBranch(Instr);
       else {
-        OldCC = X86::getCondFromSETOpc(Instr.getOpcode());
-        if (OldCC != X86::COND_INVALID)
-          OpcIsSET = true;
-        else
-          OldCC = X86::getCondFromCMovOpc(Instr.getOpcode());
+        OldCC = X86::getCondFromSETCC(Instr);
+        if (OldCC == X86::COND_INVALID)
+          OldCC = X86::getCondFromCMov(Instr);
       }
       if (OldCC == X86::COND_INVALID) return false;
     }
@@ -3818,24 +3676,10 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
     }
 
     if ((ShouldUpdateCC || IsSwapped) && ReplacementCC != OldCC) {
-      // Synthesize the new opcode.
-      bool HasMemoryOperand = Instr.hasOneMemOperand();
-      unsigned NewOpc;
-      if (Instr.isBranch())
-        NewOpc = GetCondBranchFromCond(ReplacementCC);
-      else if(OpcIsSET)
-        NewOpc = getSETFromCond(ReplacementCC, HasMemoryOperand);
-      else {
-        unsigned DstReg = Instr.getOperand(0).getReg();
-        const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
-        NewOpc = getCMovFromCond(ReplacementCC, TRI->getRegSizeInBits(*DstRC)/8,
-                                 HasMemoryOperand);
-      }
-
       // Push the MachineInstr to OpsToUpdate.
       // If it is safe to remove CmpInstr, the condition code of these
       // instructions will be modified.
-      OpsToUpdate.push_back(std::make_pair(&*I, NewOpc));
+      OpsToUpdate.push_back(std::make_pair(&*I, ReplacementCC));
     }
     if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
       // It is safe to remove CmpInstr if EFLAGS is updated again or killed.
@@ -3876,21 +3720,17 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
   }
 
   // Make sure Sub instruction defines EFLAGS and mark the def live.
-  unsigned i = 0, e = Sub->getNumOperands();
-  for (; i != e; ++i) {
-    MachineOperand &MO = Sub->getOperand(i);
-    if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS) {
-      MO.setIsDead(false);
-      break;
-    }
-  }
-  assert(i != e && "Unable to locate a def EFLAGS operand");
+  MachineOperand *FlagDef = Sub->findRegisterDefOperand(X86::EFLAGS);
+  assert(FlagDef && "Unable to locate a def EFLAGS operand");
+  FlagDef->setIsDead(false);
 
   CmpInstr.eraseFromParent();
 
   // Modify the condition code of instructions in OpsToUpdate.
-  for (auto &Op : OpsToUpdate)
-    Op.first->setDesc(get(Op.second));
+  for (auto &Op : OpsToUpdate) {
+    Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1)
+        .setImm(Op.second);
+  }
   return true;
 }
 
@@ -4128,6 +3968,20 @@ static bool expandNOVLXStore(MachineInstrBuilder &MIB,
 
   return true;
 }
+
+static bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) {
+  MIB->setDesc(Desc);
+  int64_t ShiftAmt = MIB->getOperand(2).getImm();
+  // Temporarily remove the immediate so we can add another source register.
+  MIB->RemoveOperand(2);
+  // Add the register. Don't copy the kill flag if there is one.
+  MIB.addReg(MIB->getOperand(1).getReg(),
+             getUndefRegState(MIB->getOperand(1).isUndef()));
+  // Add back the immediate.
+  MIB.addImm(ShiftAmt);
+  return true;
+}
+
 bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   bool HasAVX = Subtarget.hasAVX();
   MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
@@ -4193,6 +4047,12 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
       MIB.addReg(SrcReg, RegState::ImplicitDefine);
       return true;
     }
+    if (MI.getOpcode() == X86::AVX512_256_SET0) {
+      // No VLX so we must reference a zmm.
+      unsigned ZReg =
+        TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
+      MIB->getOperand(0).setReg(ZReg);
+    }
     return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
   }
   case X86::V_SETALLONES:
@@ -4282,6 +4142,21 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   case X86::XOR64_FP:
   case X86::XOR32_FP:
     return expandXorFP(MIB, *this);
+  case X86::SHLDROT32ri: return expandSHXDROT(MIB, get(X86::SHLD32rri8));
+  case X86::SHLDROT64ri: return expandSHXDROT(MIB, get(X86::SHLD64rri8));
+  case X86::SHRDROT32ri: return expandSHXDROT(MIB, get(X86::SHRD32rri8));
+  case X86::SHRDROT64ri: return expandSHXDROT(MIB, get(X86::SHRD64rri8));
+  case X86::ADD8rr_DB:    MIB->setDesc(get(X86::OR8rr));    break;
+  case X86::ADD16rr_DB:   MIB->setDesc(get(X86::OR16rr));   break;
+  case X86::ADD32rr_DB:   MIB->setDesc(get(X86::OR32rr));   break;
+  case X86::ADD64rr_DB:   MIB->setDesc(get(X86::OR64rr));   break;
+  case X86::ADD8ri_DB:    MIB->setDesc(get(X86::OR8ri));    break;
+  case X86::ADD16ri_DB:   MIB->setDesc(get(X86::OR16ri));   break;
+  case X86::ADD32ri_DB:   MIB->setDesc(get(X86::OR32ri));   break;
+  case X86::ADD64ri32_DB: MIB->setDesc(get(X86::OR64ri32)); break;
+  case X86::ADD16ri8_DB:  MIB->setDesc(get(X86::OR16ri8));  break;
+  case X86::ADD32ri8_DB:  MIB->setDesc(get(X86::OR32ri8));  break;
+  case X86::ADD64ri8_DB:  MIB->setDesc(get(X86::OR64ri8));  break;
   }
   return false;
 }
@@ -4303,7 +4178,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
 /// FIXME: This should be turned into a TSFlags.
 ///
 static bool hasPartialRegUpdate(unsigned Opcode,
-                                const X86Subtarget &Subtarget) {
+                                const X86Subtarget &Subtarget,
+                                bool ForLoadFold = false) {
   switch (Opcode) {
   case X86::CVTSI2SSrr:
   case X86::CVTSI2SSrm:
@@ -4313,6 +4189,9 @@ static bool hasPartialRegUpdate(unsigned Opcode,
   case X86::CVTSI2SDrm:
   case X86::CVTSI642SDrr:
   case X86::CVTSI642SDrm:
+    // Load folding won't effect the undef register update since the input is
+    // a GPR.
+    return !ForLoadFold;
   case X86::CVTSD2SSrr:
   case X86::CVTSD2SSrm:
   case X86::CVTSS2SDrr:
@@ -4389,7 +4268,7 @@ unsigned X86InstrInfo::getPartialRegUpdateClearance(
 
 // Return true for any instruction the copies the high bits of the first source
 // operand into the unused high bits of the destination operand.
-static bool hasUndefRegUpdate(unsigned Opcode) {
+static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) {
   switch (Opcode) {
   case X86::VCVTSI2SSrr:
   case X86::VCVTSI2SSrm:
@@ -4407,38 +4286,6 @@ static bool hasUndefRegUpdate(unsigned Opcode) {
   case X86::VCVTSI642SDrm:
   case X86::VCVTSI642SDrr_Int:
   case X86::VCVTSI642SDrm_Int:
-  case X86::VCVTSD2SSrr:
-  case X86::VCVTSD2SSrm:
-  case X86::VCVTSD2SSrr_Int:
-  case X86::VCVTSD2SSrm_Int:
-  case X86::VCVTSS2SDrr:
-  case X86::VCVTSS2SDrm:
-  case X86::VCVTSS2SDrr_Int:
-  case X86::VCVTSS2SDrm_Int:
-  case X86::VRCPSSr:
-  case X86::VRCPSSr_Int:
-  case X86::VRCPSSm:
-  case X86::VRCPSSm_Int:
-  case X86::VROUNDSDr:
-  case X86::VROUNDSDm:
-  case X86::VROUNDSDr_Int:
-  case X86::VROUNDSDm_Int:
-  case X86::VROUNDSSr:
-  case X86::VROUNDSSm:
-  case X86::VROUNDSSr_Int:
-  case X86::VROUNDSSm_Int:
-  case X86::VRSQRTSSr:
-  case X86::VRSQRTSSr_Int:
-  case X86::VRSQRTSSm:
-  case X86::VRSQRTSSm_Int:
-  case X86::VSQRTSSr:
-  case X86::VSQRTSSr_Int:
-  case X86::VSQRTSSm:
-  case X86::VSQRTSSm_Int:
-  case X86::VSQRTSDr:
-  case X86::VSQRTSDr_Int:
-  case X86::VSQRTSDm:
-  case X86::VSQRTSDm_Int:
   // AVX-512
   case X86::VCVTSI2SSZrr:
   case X86::VCVTSI2SSZrm:
@@ -4453,7 +4300,6 @@ static bool hasUndefRegUpdate(unsigned Opcode) {
   case X86::VCVTSI2SDZrr:
   case X86::VCVTSI2SDZrm:
   case X86::VCVTSI2SDZrr_Int:
-  case X86::VCVTSI2SDZrrb_Int:
   case X86::VCVTSI2SDZrm_Int:
   case X86::VCVTSI642SDZrr:
   case X86::VCVTSI642SDZrm:
@@ -4479,6 +4325,42 @@ static bool hasUndefRegUpdate(unsigned Opcode) {
   case X86::VCVTUSI642SDZrr_Int:
   case X86::VCVTUSI642SDZrrb_Int:
   case X86::VCVTUSI642SDZrm_Int:
+    // Load folding won't effect the undef register update since the input is
+    // a GPR.
+    return !ForLoadFold;
+  case X86::VCVTSD2SSrr:
+  case X86::VCVTSD2SSrm:
+  case X86::VCVTSD2SSrr_Int:
+  case X86::VCVTSD2SSrm_Int:
+  case X86::VCVTSS2SDrr:
+  case X86::VCVTSS2SDrm:
+  case X86::VCVTSS2SDrr_Int:
+  case X86::VCVTSS2SDrm_Int:
+  case X86::VRCPSSr:
+  case X86::VRCPSSr_Int:
+  case X86::VRCPSSm:
+  case X86::VRCPSSm_Int:
+  case X86::VROUNDSDr:
+  case X86::VROUNDSDm:
+  case X86::VROUNDSDr_Int:
+  case X86::VROUNDSDm_Int:
+  case X86::VROUNDSSr:
+  case X86::VROUNDSSm:
+  case X86::VROUNDSSr_Int:
+  case X86::VROUNDSSm_Int:
+  case X86::VRSQRTSSr:
+  case X86::VRSQRTSSr_Int:
+  case X86::VRSQRTSSm:
+  case X86::VRSQRTSSm_Int:
+  case X86::VSQRTSSr:
+  case X86::VSQRTSSr_Int:
+  case X86::VSQRTSSm:
+  case X86::VSQRTSSm_Int:
+  case X86::VSQRTSDr:
+  case X86::VSQRTSDr_Int:
+  case X86::VSQRTSDm:
+  case X86::VSQRTSDm_Int:
+  // AVX-512
   case X86::VCVTSD2SSZrr:
   case X86::VCVTSD2SSZrr_Int:
   case X86::VCVTSD2SSZrrb_Int:
@@ -4759,7 +4641,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
       const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
       const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
       unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
-      if (Size <= RCSize && 4 <= Align) {
+      if ((Size == 0 || Size >= 16) && RCSize >= 16 && 4 <= Align) {
         int PtrOffset = SrcIdx * 4;
         unsigned NewImm = (DstIdx << 4) | ZMask;
         unsigned NewOpCode =
@@ -4783,7 +4665,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
       const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
       const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
       unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
-      if (Size <= RCSize && 8 <= Align) {
+      if ((Size == 0 || Size >= 16) && RCSize >= 16 && 8 <= Align) {
         unsigned NewOpCode =
             (MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm :
             (MI.getOpcode() == X86::VMOVHLPSrr)  ? X86::VMOVLPSrm     :
@@ -4794,13 +4676,29 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
       }
     }
     break;
-  };
+  case X86::UNPCKLPDrr:
+    // If we won't be able to fold this to the memory form of UNPCKL, use
+    // MOVHPD instead. Done as custom because we can't have this in the load
+    // table twice.
+    if (OpNum == 2) {
+      const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+      const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
+      unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
+      if ((Size == 0 || Size >= 16) && RCSize >= 16 && Align < 16) {
+        MachineInstr *NewMI =
+            FuseInst(MF, X86::MOVHPDrm, OpNum, MOs, InsertPt, MI, *this);
+        return NewMI;
+      }
+    }
+    break;
+  }
 
   return nullptr;
 }
 
-static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF, MachineInstr &MI) {
-  if (MF.getFunction().optForSize() || !hasUndefRegUpdate(MI.getOpcode()) ||
+static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF,
+                                               MachineInstr &MI) {
+  if (!hasUndefRegUpdate(MI.getOpcode(), /*ForLoadFold*/true) ||
       !MI.getOperand(1).isReg())
     return false;
 
@@ -4828,15 +4726,15 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
   // For CPUs that favor the register form of a call or push,
   // do not fold loads into calls or pushes, unless optimizing for size
   // aggressively.
-  if (isSlowTwoMemOps && !MF.getFunction().optForMinSize() &&
+  if (isSlowTwoMemOps && !MF.getFunction().hasMinSize() &&
       (MI.getOpcode() == X86::CALL32r || MI.getOpcode() == X86::CALL64r ||
        MI.getOpcode() == X86::PUSH16r || MI.getOpcode() == X86::PUSH32r ||
        MI.getOpcode() == X86::PUSH64r))
     return nullptr;
 
   // Avoid partial and undef register update stalls unless optimizing for size.
-  if (!MF.getFunction().optForSize() &&
-      (hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
+  if (!MF.getFunction().hasOptSize() &&
+      (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) ||
        shouldPreventUndefRegUpdateMemFold(MF, MI)))
     return nullptr;
 
@@ -4899,6 +4797,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
                                                   &RI, MF);
       unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
       if (Size < RCSize) {
+        // FIXME: Allow scalar intrinsic instructions like ADDSSrm_Int.
         // Check if it's safe to fold the load. If the size of the object is
         // narrower than the load width, then it's not.
         if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
@@ -4937,9 +4836,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
     unsigned CommuteOpIdx1 = OpNum, CommuteOpIdx2 = CommuteAnyOperandIndex;
     if (findCommutedOpIndices(MI, CommuteOpIdx1, CommuteOpIdx2)) {
       bool HasDef = MI.getDesc().getNumDefs();
-      unsigned Reg0 = HasDef ? MI.getOperand(0).getReg() : 0;
-      unsigned Reg1 = MI.getOperand(CommuteOpIdx1).getReg();
-      unsigned Reg2 = MI.getOperand(CommuteOpIdx2).getReg();
+      Register Reg0 = HasDef ? MI.getOperand(0).getReg() : Register();
+      Register Reg1 = MI.getOperand(CommuteOpIdx1).getReg();
+      Register Reg2 = MI.getOperand(CommuteOpIdx2).getReg();
       bool Tied1 =
           0 == MI.getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO);
       bool Tied2 =
@@ -4997,14 +4896,15 @@ MachineInstr *
 X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
                                     ArrayRef<unsigned> Ops,
                                     MachineBasicBlock::iterator InsertPt,
-                                    int FrameIndex, LiveIntervals *LIS) const {
+                                    int FrameIndex, LiveIntervals *LIS,
+                                    VirtRegMap *VRM) const {
   // Check switch flag
   if (NoFusing)
     return nullptr;
 
   // Avoid partial and undef register update stalls unless optimizing for size.
-  if (!MF.getFunction().optForSize() &&
-      (hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
+  if (!MF.getFunction().hasOptSize() &&
+      (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) ||
        shouldPreventUndefRegUpdateMemFold(MF, MI)))
     return nullptr;
 
@@ -5073,7 +4973,9 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
       MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg());
   unsigned RegSize = TRI.getRegSizeInBits(*RC);
 
-  if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm || Opc == X86::VMOVSSZrm) &&
+  if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm || Opc == X86::VMOVSSZrm ||
+       Opc == X86::MOVSSrm_alt || Opc == X86::VMOVSSrm_alt ||
+       Opc == X86::VMOVSSZrm_alt) &&
       RegSize > 32) {
     // These instructions only load 32 bits, we can't fold them if the
     // destination register is wider than 32 bits (4 bytes), and its user
@@ -5087,6 +4989,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
     case X86::MULSSrr_Int: case X86::VMULSSrr_Int: case X86::VMULSSZrr_Int:
     case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: case X86::VSUBSSZrr_Int:
     case X86::VADDSSZrr_Intk: case X86::VADDSSZrr_Intkz:
+    case X86::VCMPSSZrr_Intk:
     case X86::VDIVSSZrr_Intk: case X86::VDIVSSZrr_Intkz:
     case X86::VMAXSSZrr_Intk: case X86::VMAXSSZrr_Intkz:
     case X86::VMINSSZrr_Intk: case X86::VMINSSZrr_Intkz:
@@ -5124,7 +5027,9 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
     }
   }
 
-  if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm || Opc == X86::VMOVSDZrm) &&
+  if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm || Opc == X86::VMOVSDZrm ||
+       Opc == X86::MOVSDrm_alt || Opc == X86::VMOVSDrm_alt ||
+       Opc == X86::VMOVSDZrm_alt) &&
       RegSize > 64) {
     // These instructions only load 64 bits, we can't fold them if the
     // destination register is wider than 64 bits (8 bytes), and its user
@@ -5138,6 +5043,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
     case X86::MULSDrr_Int: case X86::VMULSDrr_Int: case X86::VMULSDZrr_Int:
     case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: case X86::VSUBSDZrr_Int:
     case X86::VADDSDZrr_Intk: case X86::VADDSDZrr_Intkz:
+    case X86::VCMPSDZrr_Intk:
     case X86::VDIVSDZrr_Intk: case X86::VDIVSDZrr_Intkz:
     case X86::VMAXSDZrr_Intk: case X86::VMAXSDZrr_Intkz:
     case X86::VMINSDZrr_Intk: case X86::VMINSDZrr_Intkz:
@@ -5203,8 +5109,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
   if (NoFusing) return nullptr;
 
   // Avoid partial and undef register update stalls unless optimizing for size.
-  if (!MF.getFunction().optForSize() &&
-      (hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
+  if (!MF.getFunction().hasOptSize() &&
+      (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) ||
        shouldPreventUndefRegUpdateMemFold(MF, MI)))
     return nullptr;
 
@@ -5359,10 +5265,7 @@ extractLoadMMOs(ArrayRef<MachineMemOperand *> MMOs, MachineFunction &MF) {
     } else {
       // Clone the MMO and unset the store flag.
       LoadMMOs.push_back(MF.getMachineMemOperand(
-          MMO->getPointerInfo(), MMO->getFlags() & ~MachineMemOperand::MOStore,
-          MMO->getSize(), MMO->getBaseAlignment(), MMO->getAAInfo(), nullptr,
-          MMO->getSyncScopeID(), MMO->getOrdering(),
-          MMO->getFailureOrdering()));
+          MMO, MMO->getFlags() & ~MachineMemOperand::MOStore));
     }
   }
 
@@ -5383,10 +5286,7 @@ extractStoreMMOs(ArrayRef<MachineMemOperand *> MMOs, MachineFunction &MF) {
     } else {
       // Clone the MMO and unset the load flag.
       StoreMMOs.push_back(MF.getMachineMemOperand(
-          MMO->getPointerInfo(), MMO->getFlags() & ~MachineMemOperand::MOLoad,
-          MMO->getSize(), MMO->getBaseAlignment(), MMO->getAAInfo(), nullptr,
-          MMO->getSyncScopeID(), MMO->getOrdering(),
-          MMO->getFailureOrdering()));
+          MMO, MMO->getFlags() & ~MachineMemOperand::MOLoad));
     }
   }
 
@@ -5668,7 +5568,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   case X86::LD_Fp64m:
   case X86::LD_Fp80m:
   case X86::MOVSSrm:
+  case X86::MOVSSrm_alt:
   case X86::MOVSDrm:
+  case X86::MOVSDrm_alt:
   case X86::MMX_MOVD64rm:
   case X86::MMX_MOVQ64rm:
   case X86::MOVAPSrm:
@@ -5679,7 +5581,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   case X86::MOVDQUrm:
   // AVX load instructions
   case X86::VMOVSSrm:
+  case X86::VMOVSSrm_alt:
   case X86::VMOVSDrm:
+  case X86::VMOVSDrm_alt:
   case X86::VMOVAPSrm:
   case X86::VMOVUPSrm:
   case X86::VMOVAPDrm:
@@ -5694,7 +5598,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   case X86::VMOVDQUYrm:
   // AVX512 load instructions
   case X86::VMOVSSZrm:
+  case X86::VMOVSSZrm_alt:
   case X86::VMOVSDZrm:
+  case X86::VMOVSDZrm_alt:
   case X86::VMOVAPSZ128rm:
   case X86::VMOVUPSZ128rm:
   case X86::VMOVAPSZ128rm_NOVLX:
@@ -5745,7 +5651,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   case X86::LD_Fp64m:
   case X86::LD_Fp80m:
   case X86::MOVSSrm:
+  case X86::MOVSSrm_alt:
   case X86::MOVSDrm:
+  case X86::MOVSDrm_alt:
   case X86::MMX_MOVD64rm:
   case X86::MMX_MOVQ64rm:
   case X86::MOVAPSrm:
@@ -5756,7 +5664,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   case X86::MOVDQUrm:
   // AVX load instructions
   case X86::VMOVSSrm:
+  case X86::VMOVSSrm_alt:
   case X86::VMOVSDrm:
+  case X86::VMOVSDrm_alt:
   case X86::VMOVAPSrm:
   case X86::VMOVUPSrm:
   case X86::VMOVAPDrm:
@@ -5771,7 +5681,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   case X86::VMOVDQUYrm:
   // AVX512 load instructions
   case X86::VMOVSSZrm:
+  case X86::VMOVSSZrm_alt:
   case X86::VMOVSDZrm:
+  case X86::VMOVSDZrm_alt:
   case X86::VMOVAPSZ128rm:
   case X86::VMOVUPSZ128rm:
   case X86::VMOVAPSZ128rm_NOVLX:
@@ -5943,7 +5855,9 @@ static const uint16_t ReplaceableInstrs[][3] = {
   { X86::MOVSDmr,    X86::MOVSDmr,   X86::MOVPQI2QImr },
   { X86::MOVSSmr,    X86::MOVSSmr,   X86::MOVPDI2DImr },
   { X86::MOVSDrm,    X86::MOVSDrm,   X86::MOVQI2PQIrm },
+  { X86::MOVSDrm_alt,X86::MOVSDrm_alt,X86::MOVQI2PQIrm },
   { X86::MOVSSrm,    X86::MOVSSrm,   X86::MOVDI2PDIrm },
+  { X86::MOVSSrm_alt,X86::MOVSSrm_alt,X86::MOVDI2PDIrm },
   { X86::MOVNTPSmr,  X86::MOVNTPDmr, X86::MOVNTDQmr },
   { X86::ANDNPSrm,   X86::ANDNPDrm,  X86::PANDNrm   },
   { X86::ANDNPSrr,   X86::ANDNPDrr,  X86::PANDNrr   },
@@ -5973,7 +5887,9 @@ static const uint16_t ReplaceableInstrs[][3] = {
   { X86::VMOVSDmr,   X86::VMOVSDmr,   X86::VMOVPQI2QImr },
   { X86::VMOVSSmr,   X86::VMOVSSmr,   X86::VMOVPDI2DImr },
   { X86::VMOVSDrm,   X86::VMOVSDrm,   X86::VMOVQI2PQIrm },
+  { X86::VMOVSDrm_alt,X86::VMOVSDrm_alt,X86::VMOVQI2PQIrm },
   { X86::VMOVSSrm,   X86::VMOVSSrm,   X86::VMOVDI2PDIrm },
+  { X86::VMOVSSrm_alt,X86::VMOVSSrm_alt,X86::VMOVDI2PDIrm },
   { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
   { X86::VANDNPSrm,  X86::VANDNPDrm,  X86::VPANDNrm   },
   { X86::VANDNPSrr,  X86::VANDNPDrr,  X86::VPANDNrr   },
@@ -6012,13 +5928,17 @@ static const uint16_t ReplaceableInstrs[][3] = {
   { X86::VMOVSDZmr,      X86::VMOVSDZmr,      X86::VMOVPQI2QIZmr  },
   { X86::VMOVSSZmr,      X86::VMOVSSZmr,      X86::VMOVPDI2DIZmr  },
   { X86::VMOVSDZrm,      X86::VMOVSDZrm,      X86::VMOVQI2PQIZrm  },
+  { X86::VMOVSDZrm_alt,  X86::VMOVSDZrm_alt,  X86::VMOVQI2PQIZrm  },
   { X86::VMOVSSZrm,      X86::VMOVSSZrm,      X86::VMOVDI2PDIZrm  },
+  { X86::VMOVSSZrm_alt,  X86::VMOVSSZrm_alt,  X86::VMOVDI2PDIZrm  },
   { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128r, X86::VPBROADCASTDZ128r },
   { X86::VBROADCASTSSZ128m, X86::VBROADCASTSSZ128m, X86::VPBROADCASTDZ128m },
   { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256r, X86::VPBROADCASTDZ256r },
   { X86::VBROADCASTSSZ256m, X86::VBROADCASTSSZ256m, X86::VPBROADCASTDZ256m },
   { X86::VBROADCASTSSZr,    X86::VBROADCASTSSZr,    X86::VPBROADCASTDZr },
   { X86::VBROADCASTSSZm,    X86::VBROADCASTSSZm,    X86::VPBROADCASTDZm },
+  { X86::VMOVDDUPZ128rr,    X86::VMOVDDUPZ128rr,    X86::VPBROADCASTQZ128r },
+  { X86::VMOVDDUPZ128rm,    X86::VMOVDDUPZ128rm,    X86::VPBROADCASTQZ128m },
   { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256r, X86::VPBROADCASTQZ256r },
   { X86::VBROADCASTSDZ256m, X86::VBROADCASTSDZ256m, X86::VPBROADCASTQZ256m },
   { X86::VBROADCASTSDZr,    X86::VBROADCASTSDZr,    X86::VPBROADCASTQZr },
@@ -6109,6 +6029,8 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = {
   { X86::VPERM2F128rr,   X86::VPERM2F128rr,   X86::VPERM2I128rr },
   { X86::VBROADCASTSSrm, X86::VBROADCASTSSrm, X86::VPBROADCASTDrm},
   { X86::VBROADCASTSSrr, X86::VBROADCASTSSrr, X86::VPBROADCASTDrr},
+  { X86::VMOVDDUPrm,     X86::VMOVDDUPrm,     X86::VPBROADCASTQrm},
+  { X86::VMOVDDUPrr,     X86::VMOVDDUPrr,     X86::VPBROADCASTQrr},
   { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrr, X86::VPBROADCASTDYrr},
   { X86::VBROADCASTSSYrm, X86::VBROADCASTSSYrm, X86::VPBROADCASTDYrm},
   { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrr, X86::VPBROADCASTQYrr},
@@ -6128,6 +6050,19 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = {
   { X86::VUNPCKHPSYrr,    X86::VUNPCKHPSYrr,    X86::VPUNPCKHDQYrr },
 };
 
+static const uint16_t ReplaceableInstrsFP[][3] = {
+  //PackedSingle         PackedDouble
+  { X86::MOVLPSrm,       X86::MOVLPDrm,      X86::INSTRUCTION_LIST_END },
+  { X86::MOVHPSrm,       X86::MOVHPDrm,      X86::INSTRUCTION_LIST_END },
+  { X86::MOVHPSmr,       X86::MOVHPDmr,      X86::INSTRUCTION_LIST_END },
+  { X86::VMOVLPSrm,      X86::VMOVLPDrm,     X86::INSTRUCTION_LIST_END },
+  { X86::VMOVHPSrm,      X86::VMOVHPDrm,     X86::INSTRUCTION_LIST_END },
+  { X86::VMOVHPSmr,      X86::VMOVHPDmr,     X86::INSTRUCTION_LIST_END },
+  { X86::VMOVLPSZ128rm,  X86::VMOVLPDZ128rm, X86::INSTRUCTION_LIST_END },
+  { X86::VMOVHPSZ128rm,  X86::VMOVHPDZ128rm, X86::INSTRUCTION_LIST_END },
+  { X86::VMOVHPSZ128mr,  X86::VMOVHPDZ128mr, X86::INSTRUCTION_LIST_END },
+};
+
 static const uint16_t ReplaceableInstrsAVX2InsertExtract[][3] = {
   //PackedSingle       PackedDouble       PackedInt
   { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
@@ -6368,7 +6303,7 @@ static const uint16_t ReplaceableInstrsAVX512DQMasked[][4] = {
 };
 
 // NOTE: These should only be used by the custom domain methods.
-static const uint16_t ReplaceableCustomInstrs[][3] = {
+static const uint16_t ReplaceableBlendInstrs[][3] = {
   //PackedSingle             PackedDouble             PackedInt
   { X86::BLENDPSrmi,         X86::BLENDPDrmi,         X86::PBLENDWrmi   },
   { X86::BLENDPSrri,         X86::BLENDPDrri,         X86::PBLENDWrri   },
@@ -6377,7 +6312,7 @@ static const uint16_t ReplaceableCustomInstrs[][3] = {
   { X86::VBLENDPSYrmi,       X86::VBLENDPDYrmi,       X86::VPBLENDWYrmi },
   { X86::VBLENDPSYrri,       X86::VBLENDPDYrri,       X86::VPBLENDWYrri },
 };
-static const uint16_t ReplaceableCustomAVX2Instrs[][3] = {
+static const uint16_t ReplaceableBlendAVX2Instrs[][3] = {
   //PackedSingle             PackedDouble             PackedInt
   { X86::VBLENDPSrmi,        X86::VBLENDPDrmi,        X86::VPBLENDDrmi  },
   { X86::VBLENDPSrri,        X86::VBLENDPDrri,        X86::VPBLENDDrri  },
@@ -6552,6 +6487,8 @@ uint16_t X86InstrInfo::getExecutionDomainCustom(const MachineInstr &MI) const {
         MI.getOperand(2).getSubReg() == 0)
       return 0x6;
     return 0;
+  case X86::SHUFPDrri:
+    return 0x6;
   }
   return 0;
 }
@@ -6571,9 +6508,9 @@ bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI,
       Imm = (ImmWidth == 16 ? ((Imm << 8) | Imm) : Imm);
       unsigned NewImm = Imm;
 
-      const uint16_t *table = lookup(Opcode, dom, ReplaceableCustomInstrs);
+      const uint16_t *table = lookup(Opcode, dom, ReplaceableBlendInstrs);
       if (!table)
-        table = lookup(Opcode, dom, ReplaceableCustomAVX2Instrs);
+        table = lookup(Opcode, dom, ReplaceableBlendAVX2Instrs);
 
       if (Domain == 1) { // PackedSingle
         AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm);
@@ -6583,7 +6520,7 @@ bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI,
         if (Subtarget.hasAVX2()) {
           // If we are already VPBLENDW use that, else use VPBLENDD.
           if ((ImmWidth / (Is256 ? 2 : 1)) != 8) {
-            table = lookup(Opcode, dom, ReplaceableCustomAVX2Instrs);
+            table = lookup(Opcode, dom, ReplaceableBlendAVX2Instrs);
             AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm);
           }
         } else {
@@ -6672,6 +6609,18 @@ bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI,
     // We must always return true for MOVHLPSrr.
     if (Opcode == X86::MOVHLPSrr)
       return true;
+    break;
+  case X86::SHUFPDrri: {
+    if (Domain == 1) {
+      unsigned Imm = MI.getOperand(3).getImm();
+      unsigned NewImm = 0x44;
+      if (Imm & 1) NewImm |= 0x0a;
+      if (Imm & 2) NewImm |= 0xa0;
+      MI.getOperand(3).setImm(NewImm);
+      MI.setDesc(get(X86::SHUFPSrri));
+    }
+    return true;
+  }
   }
   return false;
 }
@@ -6691,6 +6640,8 @@ X86InstrInfo::getExecutionDomain(const MachineInstr &MI) const {
       validDomains = 0xe;
     } else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) {
       validDomains = Subtarget.hasAVX2() ? 0xe : 0x6;
+    } else if (lookup(opcode, domain, ReplaceableInstrsFP)) {
+      validDomains = 0x6;
     } else if (lookup(opcode, domain, ReplaceableInstrsAVX2InsertExtract)) {
       // Insert/extract instructions should only effect domain if AVX2
       // is enabled.
@@ -6730,6 +6681,11 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const {
            "256-bit vector operations only available in AVX2");
     table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2);
   }
+  if (!table) { // try the FP table
+    table = lookup(MI.getOpcode(), dom, ReplaceableInstrsFP);
+    assert((!table || Domain < 3) &&
+           "Can only select PackedSingle or PackedDouble");
+  }
   if (!table) { // try the other table
     assert(Subtarget.hasAVX2() &&
            "256-bit insert/extract only available in AVX2");
@@ -7140,6 +7096,20 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
   case X86::PADDWrr:
   case X86::PADDDrr:
   case X86::PADDQrr:
+  case X86::PMULLWrr:
+  case X86::PMULLDrr:
+  case X86::PMAXSBrr:
+  case X86::PMAXSDrr:
+  case X86::PMAXSWrr:
+  case X86::PMAXUBrr:
+  case X86::PMAXUDrr:
+  case X86::PMAXUWrr:
+  case X86::PMINSBrr:
+  case X86::PMINSDrr:
+  case X86::PMINSWrr:
+  case X86::PMINUBrr:
+  case X86::PMINUDrr:
+  case X86::PMINUWrr:
   case X86::VPANDrr:
   case X86::VPANDYrr:
   case X86::VPANDDZ128rr:
@@ -7243,6 +7213,78 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
   case X86::VPMULLQZ128rr:
   case X86::VPMULLQZ256rr:
   case X86::VPMULLQZrr:
+  case X86::VPMAXSBrr:
+  case X86::VPMAXSBYrr:
+  case X86::VPMAXSBZ128rr:
+  case X86::VPMAXSBZ256rr:
+  case X86::VPMAXSBZrr:
+  case X86::VPMAXSDrr:
+  case X86::VPMAXSDYrr:
+  case X86::VPMAXSDZ128rr:
+  case X86::VPMAXSDZ256rr:
+  case X86::VPMAXSDZrr:
+  case X86::VPMAXSQZ128rr:
+  case X86::VPMAXSQZ256rr:
+  case X86::VPMAXSQZrr:
+  case X86::VPMAXSWrr:
+  case X86::VPMAXSWYrr:
+  case X86::VPMAXSWZ128rr:
+  case X86::VPMAXSWZ256rr:
+  case X86::VPMAXSWZrr:
+  case X86::VPMAXUBrr:
+  case X86::VPMAXUBYrr:
+  case X86::VPMAXUBZ128rr:
+  case X86::VPMAXUBZ256rr:
+  case X86::VPMAXUBZrr:
+  case X86::VPMAXUDrr:
+  case X86::VPMAXUDYrr:
+  case X86::VPMAXUDZ128rr:
+  case X86::VPMAXUDZ256rr:
+  case X86::VPMAXUDZrr:
+  case X86::VPMAXUQZ128rr:
+  case X86::VPMAXUQZ256rr:
+  case X86::VPMAXUQZrr:
+  case X86::VPMAXUWrr:
+  case X86::VPMAXUWYrr:
+  case X86::VPMAXUWZ128rr:
+  case X86::VPMAXUWZ256rr:
+  case X86::VPMAXUWZrr:
+  case X86::VPMINSBrr:
+  case X86::VPMINSBYrr:
+  case X86::VPMINSBZ128rr:
+  case X86::VPMINSBZ256rr:
+  case X86::VPMINSBZrr:
+  case X86::VPMINSDrr:
+  case X86::VPMINSDYrr:
+  case X86::VPMINSDZ128rr:
+  case X86::VPMINSDZ256rr:
+  case X86::VPMINSDZrr:
+  case X86::VPMINSQZ128rr:
+  case X86::VPMINSQZ256rr:
+  case X86::VPMINSQZrr:
+  case X86::VPMINSWrr:
+  case X86::VPMINSWYrr:
+  case X86::VPMINSWZ128rr:
+  case X86::VPMINSWZ256rr:
+  case X86::VPMINSWZrr:
+  case X86::VPMINUBrr:
+  case X86::VPMINUBYrr:
+  case X86::VPMINUBZ128rr:
+  case X86::VPMINUBZ256rr:
+  case X86::VPMINUBZrr:
+  case X86::VPMINUDrr:
+  case X86::VPMINUDYrr:
+  case X86::VPMINUDZ128rr:
+  case X86::VPMINUDZ256rr:
+  case X86::VPMINUDZrr:
+  case X86::VPMINUQZ128rr:
+  case X86::VPMINUQZ256rr:
+  case X86::VPMINUQZrr:
+  case X86::VPMINUWrr:
+  case X86::VPMINUWYrr:
+  case X86::VPMINUWZ128rr:
+  case X86::VPMINUWZ256rr:
+  case X86::VPMINUWZrr:
   // Normal min/max instructions are not commutative because of NaN and signed
   // zero semantics, but these are. Thus, there's no need to check for global
   // relaxed math; the instructions themselves have the properties we need.
@@ -7698,7 +7740,7 @@ bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF,
 
   // Does the function use a red zone? If it does, then we can't risk messing
   // with the stack.
-  if (!F.hasFnAttribute(Attribute::NoRedZone)) {
+  if (Subtarget.getFrameLowering()->has128ByteRedZone(MF)) {
     // It could have a red zone. If it does, then we don't want to touch it.
     const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
     if (!X86FI || X86FI->getUsesRedZone())
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 159cb50afc5c..13ca17139494 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -1,9 +1,8 @@
 //===-- X86InstrInfo.h - X86 Instruction Information ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -36,62 +35,24 @@ enum AsmComments {
   AC_EVEX_2_VEX = MachineInstr::TAsmComments
 };
 
-// X86 specific condition code. These correspond to X86_*_COND in
-// X86InstrInfo.td. They must be kept in synch.
-enum CondCode {
-  COND_A = 0,
-  COND_AE = 1,
-  COND_B = 2,
-  COND_BE = 3,
-  COND_E = 4,
-  COND_G = 5,
-  COND_GE = 6,
-  COND_L = 7,
-  COND_LE = 8,
-  COND_NE = 9,
-  COND_NO = 10,
-  COND_NP = 11,
-  COND_NS = 12,
-  COND_O = 13,
-  COND_P = 14,
-  COND_S = 15,
-  LAST_VALID_COND = COND_S,
-
-  // Artificial condition codes. These are used by AnalyzeBranch
-  // to indicate a block terminated with two conditional branches that together
-  // form a compound condition. They occur in code using FCMP_OEQ or FCMP_UNE,
-  // which can't be represented on x86 with a single condition. These
-  // are never used in MachineInstrs and are inverses of one another.
-  COND_NE_OR_P,
-  COND_E_AND_NP,
-
-  COND_INVALID
-};
-
-// Turn condition code into conditional branch opcode.
-unsigned GetCondBranchFromCond(CondCode CC);
-
 /// Return a pair of condition code for the given predicate and whether
 /// the instruction operands should be swaped to match the condition code.
 std::pair<CondCode, bool> getX86ConditionCode(CmpInst::Predicate Predicate);
 
-/// Return a set opcode for the given condition and whether it has
-/// a memory operand.
-unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false);
+/// Return a setcc opcode based on whether it has a memory operand.
+unsigned getSETOpc(bool HasMemoryOperand = false);
 
-/// Return a cmov opcode for the given condition, register size in
-/// bytes, and operand type.
-unsigned getCMovFromCond(CondCode CC, unsigned RegBytes,
-                         bool HasMemoryOperand = false);
+/// Return a cmov opcode for the given register size in bytes, and operand type.
+unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand = false);
 
-// Turn jCC opcode into condition code.
-CondCode getCondFromBranchOpc(unsigned Opc);
+// Turn jCC instruction into condition code.
+CondCode getCondFromBranch(const MachineInstr &MI);
 
-// Turn setCC opcode into condition code.
-CondCode getCondFromSETOpc(unsigned Opc);
+// Turn setCC instruction into condition code.
+CondCode getCondFromSETCC(const MachineInstr &MI);
 
-// Turn CMov opcode into condition code.
-CondCode getCondFromCMovOpc(unsigned Opc);
+// Turn CMov instruction into condition code.
+CondCode getCondFromCMov(const MachineInstr &MI);
 
 /// GetOppositeBranchCondition - Return the inverse of the specified cond,
 /// e.g. turning COND_E to COND_NE.
@@ -327,7 +288,8 @@ public:
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
 
-  bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
+  bool getMemOperandWithOffset(const MachineInstr &LdSt,
+                               const MachineOperand *&BaseOp,
                                int64_t &Offset,
                                const TargetRegisterInfo *TRI) const override;
   bool analyzeBranchPredicate(MachineBasicBlock &MBB,
@@ -388,7 +350,8 @@ public:
   foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
                         ArrayRef<unsigned> Ops,
                         MachineBasicBlock::iterator InsertPt, int FrameIndex,
-                        LiveIntervals *LIS = nullptr) const override;
+                        LiveIntervals *LIS = nullptr,
+                        VirtRegMap *VRM = nullptr) const override;
 
   /// foldMemoryOperand - Same as the previous version except it allows folding
   /// of any load and store from / to any address, not just from a specific
@@ -453,7 +416,10 @@ public:
   /// conservative. If it cannot definitely determine the safety after visiting
   /// a few instructions in each direction it assumes it's not safe.
   bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
-                             MachineBasicBlock::iterator I) const;
+                             MachineBasicBlock::iterator I) const {
+    return MBB.computeRegisterLiveness(&RI, X86::EFLAGS, I, 4) ==
+           MachineBasicBlock::LQR_Dead;
+  }
 
   /// True if MI has a condition code def, e.g. EFLAGS, that is
   /// not marked dead.
@@ -590,7 +556,8 @@ private:
   MachineInstr *convertToThreeAddressWithLEA(unsigned MIOpc,
                                              MachineFunction::iterator &MFI,
                                              MachineInstr &MI,
-                                             LiveVariables *LV) const;
+                                             LiveVariables *LV,
+                                             bool Is8BitOp) const;
 
   /// Handles memory folding for special case instructions, for instance those
   /// requiring custom manipulation of the address.
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index e53f83baa3c6..8e05dd8ec5c1 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1,9 +1,8 @@
 //===-- X86InstrInfo.td - Main X86 Instruction Definition --*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -64,6 +63,10 @@ def SDTX86sahf : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i8>]>;
 
 def SDTX86rdrand : SDTypeProfile<2, 0, [SDTCisInt<0>, SDTCisVT<1, i32>]>;
 
+def SDTX86rdpkru : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def SDTX86wrpkru : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+                                        SDTCisVT<2, i32>]>;
+
 def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
                                      SDTCisVT<2, i8>]>;
 def SDTX86caspair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
@@ -124,6 +127,9 @@ def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
 
 def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>;
 
+def SDT_X86ENQCMD : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+                                         SDTCisPtrTy<1>, SDTCisSameAs<1, 2>]>;
+
 def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER,
                             [SDNPHasChain,SDNPSideEffect]>;
 def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
@@ -152,6 +158,11 @@ def X86rdrand  : SDNode<"X86ISD::RDRAND",   SDTX86rdrand,
 def X86rdseed  : SDNode<"X86ISD::RDSEED",   SDTX86rdrand,
                         [SDNPHasChain, SDNPSideEffect]>;
 
+def X86rdpkru : SDNode<"X86ISD::RDPKRU",    SDTX86rdpkru,
+                       [SDNPHasChain, SDNPSideEffect]>;
+def X86wrpkru : SDNode<"X86ISD::WRPKRU",    SDTX86wrpkru,
+                       [SDNPHasChain, SDNPSideEffect]>;
+
 def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
                         [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
                          SDNPMayLoad, SDNPMemOperand]>;
@@ -206,13 +217,6 @@ def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
                         [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
                          SDNPMayLoad]>;
 
-def X86rdtsc   : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void,
-                        [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
-def X86rdtscp  : SDNode<"X86ISD::RDTSCP_DAG", SDTX86Void,
-                        [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
-def X86rdpmc   : SDNode<"X86ISD::RDPMC_DAG", SDTX86Void,
-                        [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
-
 def X86Wrapper    : SDNode<"X86ISD::Wrapper",     SDTX86Wrapper>;
 def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP",  SDTX86Wrapper>;
 
@@ -306,6 +310,11 @@ def X86tpause : SDNode<"X86ISD::TPAUSE",
                                             SDTCisVT<2, i32>, SDTCisVT<3, i32>]>,
                        [SDNPHasChain, SDNPSideEffect]>;
 
+def X86enqcmd : SDNode<"X86ISD::ENQCMD", SDT_X86ENQCMD,
+                       [SDNPHasChain, SDNPSideEffect]>;
+def X86enqcmds : SDNode<"X86ISD::ENQCMDS", SDT_X86ENQCMD,
+                       [SDNPHasChain, SDNPSideEffect]>;
+
 //===----------------------------------------------------------------------===//
 // X86 Operand Definitions.
 //
@@ -371,37 +380,35 @@ def anymem : X86MemOperand<"printanymem">;
 // restrict to only unsized memory.
 def opaquemem : X86MemOperand<"printopaquemem">;
 
-def i8mem   : X86MemOperand<"printi8mem",   X86Mem8AsmOperand>;
-def i16mem  : X86MemOperand<"printi16mem",  X86Mem16AsmOperand>;
-def i32mem  : X86MemOperand<"printi32mem",  X86Mem32AsmOperand>;
-def i64mem  : X86MemOperand<"printi64mem",  X86Mem64AsmOperand>;
-def i128mem : X86MemOperand<"printi128mem", X86Mem128AsmOperand>;
-def i256mem : X86MemOperand<"printi256mem", X86Mem256AsmOperand>;
-def i512mem : X86MemOperand<"printi512mem", X86Mem512AsmOperand>;
-def f32mem  : X86MemOperand<"printf32mem",  X86Mem32AsmOperand>;
-def f64mem  : X86MemOperand<"printf64mem",  X86Mem64AsmOperand>;
-def f80mem  : X86MemOperand<"printf80mem",  X86Mem80AsmOperand>;
-def f128mem : X86MemOperand<"printf128mem", X86Mem128AsmOperand>;
-def f256mem : X86MemOperand<"printf256mem", X86Mem256AsmOperand>;
-def f512mem : X86MemOperand<"printf512mem", X86Mem512AsmOperand>;
-
-def v512mem : X86VMemOperand<VR512, "printf512mem", X86Mem512AsmOperand>;
+def i8mem   : X86MemOperand<"printbytemem",   X86Mem8AsmOperand>;
+def i16mem  : X86MemOperand<"printwordmem",  X86Mem16AsmOperand>;
+def i32mem  : X86MemOperand<"printdwordmem",  X86Mem32AsmOperand>;
+def i64mem  : X86MemOperand<"printqwordmem",  X86Mem64AsmOperand>;
+def i128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand>;
+def i256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand>;
+def i512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand>;
+def f32mem  : X86MemOperand<"printdwordmem",  X86Mem32AsmOperand>;
+def f64mem  : X86MemOperand<"printqwordmem",  X86Mem64AsmOperand>;
+def f80mem  : X86MemOperand<"printtbytemem",  X86Mem80AsmOperand>;
+def f128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand>;
+def f256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand>;
+def f512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand>;
 
 // Gather mem operands
-def vx64mem  : X86VMemOperand<VR128,  "printi64mem",  X86Mem64_RC128Operand>;
-def vx128mem : X86VMemOperand<VR128,  "printi128mem", X86Mem128_RC128Operand>;
-def vx256mem : X86VMemOperand<VR128,  "printi256mem", X86Mem256_RC128Operand>;
-def vy128mem : X86VMemOperand<VR256,  "printi128mem", X86Mem128_RC256Operand>;
-def vy256mem : X86VMemOperand<VR256,  "printi256mem", X86Mem256_RC256Operand>;
-
-def vx64xmem  : X86VMemOperand<VR128X, "printi64mem",  X86Mem64_RC128XOperand>;
-def vx128xmem : X86VMemOperand<VR128X, "printi128mem", X86Mem128_RC128XOperand>;
-def vx256xmem : X86VMemOperand<VR128X, "printi256mem", X86Mem256_RC128XOperand>;
-def vy128xmem : X86VMemOperand<VR256X, "printi128mem", X86Mem128_RC256XOperand>;
-def vy256xmem : X86VMemOperand<VR256X, "printi256mem", X86Mem256_RC256XOperand>;
-def vy512xmem : X86VMemOperand<VR256X, "printi512mem", X86Mem512_RC256XOperand>;
-def vz256mem  : X86VMemOperand<VR512,  "printi256mem", X86Mem256_RC512Operand>;
-def vz512mem  : X86VMemOperand<VR512,  "printi512mem", X86Mem512_RC512Operand>;
+def vx64mem  : X86VMemOperand<VR128,  "printqwordmem",  X86Mem64_RC128Operand>;
+def vx128mem : X86VMemOperand<VR128,  "printxmmwordmem", X86Mem128_RC128Operand>;
+def vx256mem : X86VMemOperand<VR128,  "printymmwordmem", X86Mem256_RC128Operand>;
+def vy128mem : X86VMemOperand<VR256,  "printxmmwordmem", X86Mem128_RC256Operand>;
+def vy256mem : X86VMemOperand<VR256,  "printymmwordmem", X86Mem256_RC256Operand>;
+
+def vx64xmem  : X86VMemOperand<VR128X, "printqwordmem",  X86Mem64_RC128XOperand>;
+def vx128xmem : X86VMemOperand<VR128X, "printxmmwordmem", X86Mem128_RC128XOperand>;
+def vx256xmem : X86VMemOperand<VR128X, "printymmwordmem", X86Mem256_RC128XOperand>;
+def vy128xmem : X86VMemOperand<VR256X, "printxmmwordmem", X86Mem128_RC256XOperand>;
+def vy256xmem : X86VMemOperand<VR256X, "printymmwordmem", X86Mem256_RC256XOperand>;
+def vy512xmem : X86VMemOperand<VR256X, "printzmmwordmem", X86Mem512_RC256XOperand>;
+def vz256mem  : X86VMemOperand<VR512,  "printymmwordmem", X86Mem256_RC512Operand>;
+def vz512mem  : X86VMemOperand<VR512,  "printzmmwordmem", X86Mem512_RC512Operand>;
 
 // A version of i8mem for use on x86-64 and x32 that uses a NOREX GPR instead
 // of a plain GPR, so that it doesn't potentially require a REX prefix.
@@ -409,7 +416,7 @@ def ptr_rc_norex : PointerLikeRegClass<2>;
 def ptr_rc_norex_nosp : PointerLikeRegClass<3>;
 
 def i8mem_NOREX : Operand<iPTR> {
-  let PrintMethod = "printi8mem";
+  let PrintMethod = "printbytemem";
   let MIOperandInfo = (ops ptr_rc_norex, i8imm, ptr_rc_norex_nosp, i32imm,
                        SEGMENT_REG);
   let ParserMatchClass = X86Mem8AsmOperand;
@@ -424,7 +431,7 @@ def ptr_rc_tailcall : PointerLikeRegClass<4>;
 // allowed to use callee-saved registers since they must be scheduled
 // after callee-saved register are popped.
 def i32mem_TC : Operand<i32> {
-  let PrintMethod = "printi32mem";
+  let PrintMethod = "printdwordmem";
   let MIOperandInfo = (ops ptr_rc_tailcall, i8imm, ptr_rc_tailcall,
                        i32imm, SEGMENT_REG);
   let ParserMatchClass = X86Mem32AsmOperand;
@@ -435,7 +442,7 @@ def i32mem_TC : Operand<i32> {
 // allowed to use callee-saved registers since they must be scheduled
 // after callee-saved register are popped.
 def i64mem_TC : Operand<i64> {
-  let PrintMethod = "printi64mem";
+  let PrintMethod = "printqwordmem";
   let MIOperandInfo = (ops ptr_rc_tailcall, i8imm,
                        ptr_rc_tailcall, i32imm, SEGMENT_REG);
   let ParserMatchClass = X86Mem64AsmOperand;
@@ -603,24 +610,10 @@ def offset64_32 : X86MemOffsOperand<i64imm, "printMemOffs32",
 def offset64_64 : X86MemOffsOperand<i64imm, "printMemOffs64",
                                     X86MemOffs64_64AsmOperand>;
 
-def SSECC : Operand<i8> {
-  let PrintMethod = "printSSEAVXCC";
-  let OperandType = "OPERAND_IMMEDIATE";
-}
-
-def AVXCC : Operand<i8> {
-  let PrintMethod = "printSSEAVXCC";
-  let OperandType = "OPERAND_IMMEDIATE";
-}
-
-def AVX512ICC : Operand<i8> {
-  let PrintMethod = "printSSEAVXCC";
-  let OperandType = "OPERAND_IMMEDIATE";
-}
-
-def XOPCC : Operand<i8> {
-  let PrintMethod = "printXOPCC";
-  let OperandType = "OPERAND_IMMEDIATE";
+def ccode : Operand<i8> {
+  let PrintMethod = "printCondCode";
+  let OperandNamespace = "X86";
+  let OperandType = "OPERAND_COND_CODE";
 }
 
 class ImmSExtAsmOperandClass : AsmOperandClass {
@@ -640,7 +633,8 @@ def AVX512RCOperand : AsmOperandClass {
 }
 def AVX512RC : Operand<i32> {
   let PrintMethod = "printRoundingControl";
-  let OperandType = "OPERAND_IMMEDIATE";
+  let OperandNamespace = "X86";
+  let OperandType = "OPERAND_ROUNDING_CONTROL";
   let ParserMatchClass = AVX512RCOperand;
 }
 
@@ -718,6 +712,14 @@ def u8imm : Operand<i8> {
   let OperandType = "OPERAND_IMMEDIATE";
 }
 
+// 16-bit immediate but only 8-bits are significant and they are unsigned.
+// Used by BT instructions.
+def i16u8imm : Operand<i16> {
+  let PrintMethod = "printU8Imm";
+  let ParserMatchClass = ImmUnsignedi8AsmOperand;
+  let OperandType = "OPERAND_IMMEDIATE";
+}
+
 // 32-bit immediate but only 8-bits are significant and they are unsigned.
 // Used by some SSE/AVX instructions that use intrinsics.
 def i32u8imm : Operand<i32> {
@@ -726,6 +728,14 @@ def i32u8imm : Operand<i32> {
   let OperandType = "OPERAND_IMMEDIATE";
 }
 
+// 64-bit immediate but only 8-bits are significant and they are unsigned.
+// Used by BT instructions.
+def i64u8imm : Operand<i64> {
+  let PrintMethod = "printU8Imm";
+  let ParserMatchClass = ImmUnsignedi8AsmOperand;
+  let OperandType = "OPERAND_IMMEDIATE";
+}
+
 // 64-bits but only 32 bits are significant, and those bits are treated as being
 // pc relative.
 def i64i32imm_pcrel : Operand<i64> {
@@ -747,6 +757,33 @@ def lea64mem : Operand<i64> {
   let ParserMatchClass = X86MemAsmOperand;
 }
 
+let RenderMethod = "addMaskPairOperands" in {
+  def VK1PairAsmOperand : AsmOperandClass { let Name = "VK1Pair"; }
+  def VK2PairAsmOperand : AsmOperandClass { let Name = "VK2Pair"; }
+  def VK4PairAsmOperand : AsmOperandClass { let Name = "VK4Pair"; }
+  def VK8PairAsmOperand : AsmOperandClass { let Name = "VK8Pair"; }
+  def VK16PairAsmOperand : AsmOperandClass { let Name = "VK16Pair"; }
+}
+
+def VK1Pair : RegisterOperand<VK1PAIR, "printVKPair"> {
+  let ParserMatchClass = VK1PairAsmOperand;
+}
+
+def VK2Pair : RegisterOperand<VK2PAIR, "printVKPair"> {
+  let ParserMatchClass = VK2PairAsmOperand;
+}
+
+def VK4Pair : RegisterOperand<VK4PAIR, "printVKPair"> {
+  let ParserMatchClass = VK4PairAsmOperand;
+}
+
+def VK8Pair : RegisterOperand<VK8PAIR, "printVKPair"> {
+  let ParserMatchClass = VK8PairAsmOperand;
+}
+
+def VK16Pair : RegisterOperand<VK16PAIR, "printVKPair"> {
+  let ParserMatchClass = VK16PairAsmOperand;
+}
 
 //===----------------------------------------------------------------------===//
 // X86 Complex Pattern Definitions.
@@ -833,6 +870,8 @@ def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">;
 def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
 def PKU        : Predicate<"Subtarget->hasPKU()">;
 def HasVNNI    : Predicate<"Subtarget->hasVNNI()">;
+def HasVP2INTERSECT : Predicate<"Subtarget->hasVP2INTERSECT()">;
+def HasBF16      : Predicate<"Subtarget->hasBF16()">;
 
 def HasBITALG    : Predicate<"Subtarget->hasBITALG()">;
 def HasPOPCNT    : Predicate<"Subtarget->hasPOPCNT()">;
@@ -894,8 +933,10 @@ def HasWBNOINVD  : Predicate<"Subtarget->hasWBNOINVD()">;
 def HasRDPID     : Predicate<"Subtarget->hasRDPID()">;
 def HasWAITPKG   : Predicate<"Subtarget->hasWAITPKG()">;
 def HasINVPCID   : Predicate<"Subtarget->hasINVPCID()">;
+def HasCmpxchg8b : Predicate<"Subtarget->hasCmpxchg8b()">;
 def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
 def HasPCONFIG   : Predicate<"Subtarget->hasPCONFIG()">;
+def HasENQCMD    : Predicate<"Subtarget->hasENQCMD()">;
 def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
                              AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">;
 def In64BitMode  : Predicate<"Subtarget->is64Bit()">,
@@ -928,12 +969,12 @@ def IsNotPIC     : Predicate<"!TM.isPositionIndependent()">;
 // the Function object through the <Target>Subtarget and objections were raised
 // to that (see post-commit review comments for r301750).
 let RecomputePerFunction = 1 in {
-  def OptForSize   : Predicate<"MF->getFunction().optForSize()">;
-  def OptForMinSize : Predicate<"MF->getFunction().optForMinSize()">;
-  def OptForSpeed  : Predicate<"!MF->getFunction().optForSize()">;
+  def OptForSize   : Predicate<"MF->getFunction().hasOptSize()">;
+  def OptForMinSize : Predicate<"MF->getFunction().hasMinSize()">;
+  def OptForSpeed  : Predicate<"!MF->getFunction().hasOptSize()">;
   def UseIncDec : Predicate<"!Subtarget->slowIncDec() || "
-                            "MF->getFunction().optForSize()">;
-  def NoSSE41_Or_OptForSize : Predicate<"MF->getFunction().optForSize() || "
+                            "MF->getFunction().hasOptSize()">;
+  def NoSSE41_Or_OptForSize : Predicate<"MF->getFunction().hasOptSize() || "
                                         "!Subtarget->hasSSE41()">;
 }
 
@@ -959,22 +1000,22 @@ include "X86InstrFormats.td"
 
 // X86 specific condition code. These correspond to CondCode in
 // X86InstrInfo.h. They must be kept in synch.
-def X86_COND_A   : PatLeaf<(i8 0)>;  // alt. COND_NBE
-def X86_COND_AE  : PatLeaf<(i8 1)>;  // alt. COND_NC
+def X86_COND_O   : PatLeaf<(i8 0)>;
+def X86_COND_NO  : PatLeaf<(i8 1)>;
 def X86_COND_B   : PatLeaf<(i8 2)>;  // alt. COND_C
-def X86_COND_BE  : PatLeaf<(i8 3)>;  // alt. COND_NA
+def X86_COND_AE  : PatLeaf<(i8 3)>;  // alt. COND_NC
 def X86_COND_E   : PatLeaf<(i8 4)>;  // alt. COND_Z
-def X86_COND_G   : PatLeaf<(i8 5)>;  // alt. COND_NLE
-def X86_COND_GE  : PatLeaf<(i8 6)>;  // alt. COND_NL
-def X86_COND_L   : PatLeaf<(i8 7)>;  // alt. COND_NGE
-def X86_COND_LE  : PatLeaf<(i8 8)>;  // alt. COND_NG
-def X86_COND_NE  : PatLeaf<(i8 9)>;  // alt. COND_NZ
-def X86_COND_NO  : PatLeaf<(i8 10)>;
+def X86_COND_NE  : PatLeaf<(i8 5)>;  // alt. COND_NZ
+def X86_COND_BE  : PatLeaf<(i8 6)>;  // alt. COND_NA
+def X86_COND_A   : PatLeaf<(i8 7)>;  // alt. COND_NBE
+def X86_COND_S   : PatLeaf<(i8 8)>;
+def X86_COND_NS  : PatLeaf<(i8 9)>;
+def X86_COND_P   : PatLeaf<(i8 10)>; // alt. COND_PE
 def X86_COND_NP  : PatLeaf<(i8 11)>; // alt. COND_PO
-def X86_COND_NS  : PatLeaf<(i8 12)>;
-def X86_COND_O   : PatLeaf<(i8 13)>;
-def X86_COND_P   : PatLeaf<(i8 14)>; // alt. COND_PE
-def X86_COND_S   : PatLeaf<(i8 15)>;
+def X86_COND_L   : PatLeaf<(i8 12)>; // alt. COND_NGE
+def X86_COND_GE  : PatLeaf<(i8 13)>; // alt. COND_NL
+def X86_COND_LE  : PatLeaf<(i8 14)>; // alt. COND_NG
+def X86_COND_G   : PatLeaf<(i8 15)>; // alt. COND_NLE
 
 def i16immSExt8  : ImmLeaf<i16, [{ return isInt<8>(Imm); }]>;
 def i32immSExt8  : ImmLeaf<i32, [{ return isInt<8>(Imm); }]>;
@@ -1007,16 +1048,13 @@ def i64relocImmSExt32 : PatLeaf<(i64 relocImm), [{
 // Eventually, it would be nice to allow ConstantHoisting to merge constants
 // globally for potentially added savings.
 //
-def imm8_su : PatLeaf<(i8 relocImm), [{
+def relocImm8_su : PatLeaf<(i8 relocImm), [{
     return !shouldAvoidImmediateInstFormsForSize(N);
 }]>;
-def imm16_su : PatLeaf<(i16 relocImm), [{
+def relocImm16_su : PatLeaf<(i16 relocImm), [{
     return !shouldAvoidImmediateInstFormsForSize(N);
 }]>;
-def imm32_su : PatLeaf<(i32 relocImm), [{
-    return !shouldAvoidImmediateInstFormsForSize(N);
-}]>;
-def i64immSExt32_su : PatLeaf<(i64immSExt32), [{
+def relocImm32_su : PatLeaf<(i32 relocImm), [{
     return !shouldAvoidImmediateInstFormsForSize(N);
 }]>;
 
@@ -1121,7 +1159,19 @@ def extloadi32i16  : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
 def extloadi64i1   : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
 def extloadi64i8   : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
 def extloadi64i16  : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
-def extloadi64i32  : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
+
+// We can treat an i8/i16 extending load to i64 as a 32 bit load if its known
+// to be 4 byte aligned or better.
+def extloadi64i32  : PatFrag<(ops node:$ptr), (i64 (unindexedload node:$ptr)), [{
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  ISD::LoadExtType ExtType = LD->getExtensionType();
+  if (ExtType != ISD::EXTLOAD)
+    return false;
+  if (LD->getMemoryVT() == MVT::i32)
+    return true;
+
+  return LD->getAlignment() >= 4 && !LD->isVolatile();
+}]>;
 
 
 // An 'and' node with a single use.
@@ -1517,16 +1567,16 @@ def MOV32ri_alt : Ii32<0xC7, MRM0r, (outs GR32:$dst), (ins i32imm:$src),
 let SchedRW = [WriteStore] in {
 def MOV8mi  : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
                    "mov{b}\t{$src, $dst|$dst, $src}",
-                   [(store (i8 imm8_su:$src), addr:$dst)]>;
+                   [(store (i8 relocImm8_su:$src), addr:$dst)]>;
 def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
                    "mov{w}\t{$src, $dst|$dst, $src}",
-                   [(store (i16 imm16_su:$src), addr:$dst)]>, OpSize16;
+                   [(store (i16 relocImm16_su:$src), addr:$dst)]>, OpSize16;
 def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
                    "mov{l}\t{$src, $dst|$dst, $src}",
-                   [(store (i32 imm32_su:$src), addr:$dst)]>, OpSize32;
+                   [(store (i32 relocImm32_su:$src), addr:$dst)]>, OpSize32;
 def MOV64mi32 : RIi32S<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
                        "mov{q}\t{$src, $dst|$dst, $src}",
-                       [(store i64immSExt32_su:$src, addr:$dst)]>,
+                       [(store i64relocImmSExt32_su:$src, addr:$dst)]>,
                        Requires<[In64BitMode]>;
 } // SchedRW
 
@@ -1773,36 +1823,36 @@ let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteBitTestRegLd] in {
 }
 
 let SchedRW = [WriteBitTest] in {
-def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16u8imm:$src2),
                 "bt{w}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>,
+                [(set EFLAGS, (X86bt GR16:$src1, imm:$src2))]>,
                 OpSize16, TB;
-def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32u8imm:$src2),
                 "bt{l}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))]>,
+                [(set EFLAGS, (X86bt GR32:$src1, imm:$src2))]>,
                 OpSize32, TB;
-def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64u8imm:$src2),
                 "bt{q}\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
+                [(set EFLAGS, (X86bt GR64:$src1, imm:$src2))]>, TB;
 } // SchedRW
 
 // Note that these instructions aren't slow because that only applies when the
 // other operand is in a register. When it's an immediate, bt is still fast.
 let SchedRW = [WriteBitTestImmLd] in {
-def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16u8imm:$src2),
                   "bt{w}\t{$src2, $src1|$src1, $src2}",
                   [(set EFLAGS, (X86bt (loadi16 addr:$src1),
-                                       i16immSExt8:$src2))]>,
+                                       imm:$src2))]>,
                   OpSize16, TB;
-def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32u8imm:$src2),
                   "bt{l}\t{$src2, $src1|$src1, $src2}",
                   [(set EFLAGS, (X86bt (loadi32 addr:$src1),
-                                       i32immSExt8:$src2))]>,
+                                       imm:$src2))]>,
                   OpSize32, TB;
-def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64u8imm:$src2),
                 "bt{q}\t{$src2, $src1|$src1, $src2}",
                 [(set EFLAGS, (X86bt (loadi64 addr:$src1),
-                                     i64immSExt8:$src2))]>, TB,
+                                     imm:$src2))]>, TB,
                 Requires<[In64BitMode]>;
 } // SchedRW
 
@@ -1832,20 +1882,20 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
 }
 
 let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
-def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16u8imm:$src2),
                     "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
-def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32u8imm:$src2),
                     "btc{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB;
-def BTC64ri8 : RIi8<0xBA, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+def BTC64ri8 : RIi8<0xBA, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i64u8imm:$src2),
                     "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 } // SchedRW
 
 let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
-def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16u8imm:$src2),
                     "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
-def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32u8imm:$src2),
                     "btc{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB;
-def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64u8imm:$src2),
                     "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB,
                     Requires<[In64BitMode]>;
 }
@@ -1875,24 +1925,24 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
 }
 
 let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
-def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16u8imm:$src2),
                     "btr{w}\t{$src2, $src1|$src1, $src2}", []>,
                     OpSize16, TB;
-def BTR32ri8 : Ii8<0xBA, MRM6r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+def BTR32ri8 : Ii8<0xBA, MRM6r, (outs GR32:$dst), (ins GR32:$src1, i32u8imm:$src2),
                     "btr{l}\t{$src2, $src1|$src1, $src2}", []>,
                     OpSize32, TB;
-def BTR64ri8 : RIi8<0xBA, MRM6r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+def BTR64ri8 : RIi8<0xBA, MRM6r, (outs GR64:$dst), (ins GR64:$src1, i64u8imm:$src2),
                     "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 } // SchedRW
 
 let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
-def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16u8imm:$src2),
                     "btr{w}\t{$src2, $src1|$src1, $src2}", []>,
                     OpSize16, TB;
-def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32u8imm:$src2),
                     "btr{l}\t{$src2, $src1|$src1, $src2}", []>,
                     OpSize32, TB;
-def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64u8imm:$src2),
                     "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB,
                     Requires<[In64BitMode]>;
 }
@@ -1922,20 +1972,20 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
 }
 
 let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
-def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16u8imm:$src2),
                     "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
-def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32u8imm:$src2),
                     "bts{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB;
-def BTS64ri8 : RIi8<0xBA, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+def BTS64ri8 : RIi8<0xBA, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64u8imm:$src2),
                     "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
 } // SchedRW
 
 let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
-def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16u8imm:$src2),
                     "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
-def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32u8imm:$src2),
                     "bts{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB;
-def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64u8imm:$src2),
                     "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB,
                     Requires<[In64BitMode]>;
 }
@@ -2090,12 +2140,13 @@ def CMPXCHG64rm  : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
 
 let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
 def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
-                  "cmpxchg8b\t$dst", []>, TB;
+                  "cmpxchg8b\t$dst", []>, TB, Requires<[HasCmpxchg8b]>;
 
 let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
+// NOTE: In64BitMode check needed for the AssemblerPredicate.
 def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
                     "cmpxchg16b\t$dst", []>,
-                    TB, Requires<[HasCmpxchg16b, In64BitMode]>;
+                    TB, Requires<[HasCmpxchg16b,In64BitMode]>;
 } // SchedRW, mayLoad, mayStore, hasSideEffects
 
 
@@ -2388,6 +2439,11 @@ def xor_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
   return hasNoCarryFlagUses(SDValue(N, 1));
 }]>;
 
+def and_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+                            (X86and_flag node:$lhs, node:$rhs), [{
+  return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
 let Predicates = [HasBMI] in {
   // FIXME: patterns for the load versions are not implemented
   def : Pat<(and GR32:$src, (add GR32:$src, -1)),
@@ -2406,12 +2462,20 @@ let Predicates = [HasBMI] in {
             (BLSI64rr GR64:$src)>;
 
   // Versions to match flag producing ops.
-  // X86and_flag nodes are rarely created. Those should use CMP+AND. We do
-  // TESTrr matching in PostProcessISelDAG to allow BLSR/BLSI to be formed.
+  def : Pat<(and_flag_nocf GR32:$src, (add GR32:$src, -1)),
+            (BLSR32rr GR32:$src)>;
+  def : Pat<(and_flag_nocf GR64:$src, (add GR64:$src, -1)),
+            (BLSR64rr GR64:$src)>;
+
   def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, -1)),
             (BLSMSK32rr GR32:$src)>;
   def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, -1)),
             (BLSMSK64rr GR64:$src)>;
+
+  def : Pat<(and_flag_nocf GR32:$src, (ineg GR32:$src)),
+            (BLSI32rr GR32:$src)>;
+  def : Pat<(and_flag_nocf GR64:$src, (ineg GR64:$src)),
+            (BLSI64rr GR64:$src)>;
 }
 
 multiclass bmi_bextr<bits<8> opc, string mnemonic, RegisterClass RC,
@@ -2653,16 +2717,12 @@ defm LWPVAL64 : lwpval_intr<GR64, int_x86_lwpval64>, VEX_W;
 // MONITORX/MWAITX Instructions
 //
 let SchedRW = [ WriteSystem ] in {
-  let usesCustomInserter = 1 in {
-    def MONITORX : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
-                           [(int_x86_monitorx addr:$src1, GR32:$src2, GR32:$src3)]>,
-                   Requires<[ HasMWAITX ]>;
-  }
-
-  let Uses = [ EAX, ECX, EDX ] in {
-    def MONITORXrrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", []>,
-                      TB, Requires<[ HasMWAITX ]>;
-  }
+  let Uses = [ EAX, ECX, EDX ] in
+  def MONITORX32rrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", []>,
+                      TB, Requires<[ HasMWAITX, Not64BitMode ]>;
+  let Uses = [ RAX, ECX, EDX ] in
+  def MONITORX64rrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", []>,
+                      TB, Requires<[ HasMWAITX, In64BitMode ]>;
 
   let Uses = [ ECX, EAX, EBX ] in {
     def MWAITXrrr : I<0x01, MRM_FB, (outs), (ins), "mwaitx",
@@ -2676,9 +2736,9 @@ def : InstAlias<"mwaitx\t{%eax, %ecx, %ebx|ebx, ecx, eax}", (MWAITXrrr)>,
 def : InstAlias<"mwaitx\t{%rax, %rcx, %rbx|rbx, rcx, rax}", (MWAITXrrr)>,
       Requires<[ In64BitMode ]>;
 
-def : InstAlias<"monitorx\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORXrrr)>,
+def : InstAlias<"monitorx\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORX32rrr)>,
       Requires<[ Not64BitMode ]>;
-def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORXrrr)>,
+def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORX64rrr)>,
       Requires<[ In64BitMode ]>;
 
 //===----------------------------------------------------------------------===//
@@ -2737,22 +2797,51 @@ def MOVDIR64B64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
                    T8PD, AdSize64, Requires<[HasMOVDIR64B, In64BitMode]>;
 } // SchedRW
 
+//===----------------------------------------------------------------------===//
+// ENQCMD/S - Enqueue 64-byte command as user with 64-byte write atomicity
+//
+let SchedRW = [WriteStore], Defs = [EFLAGS] in {
+  def ENQCMD16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src),
+                 "enqcmd\t{$src, $dst|$dst, $src}",
+                 [(set EFLAGS, (X86enqcmd GR16:$dst, addr:$src))]>,
+                 T8XD, AdSize16, Requires<[HasENQCMD, Not64BitMode]>;
+  def ENQCMD32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src),
+                 "enqcmd\t{$src, $dst|$dst, $src}",
+                 [(set EFLAGS, (X86enqcmd GR32:$dst, addr:$src))]>,
+                 T8XD, AdSize32, Requires<[HasENQCMD]>;
+  def ENQCMD64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
+                 "enqcmd\t{$src, $dst|$dst, $src}",
+                 [(set EFLAGS, (X86enqcmd GR64:$dst, addr:$src))]>,
+                 T8XD, AdSize64, Requires<[HasENQCMD, In64BitMode]>;
+
+  def ENQCMDS16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src),
+                 "enqcmds\t{$src, $dst|$dst, $src}",
+                 [(set EFLAGS, (X86enqcmds GR16:$dst, addr:$src))]>,
+                 T8XS, AdSize16, Requires<[HasENQCMD, Not64BitMode]>;
+  def ENQCMDS32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src),
+                 "enqcmds\t{$src, $dst|$dst, $src}",
+                 [(set EFLAGS, (X86enqcmds GR32:$dst, addr:$src))]>,
+                 T8XS, AdSize32, Requires<[HasENQCMD]>;
+  def ENQCMDS64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
+                 "enqcmds\t{$src, $dst|$dst, $src}",
+                 [(set EFLAGS, (X86enqcmds GR64:$dst, addr:$src))]>,
+                 T8XS, AdSize64, Requires<[HasENQCMD, In64BitMode]>;
+}
+
 //===----------------------------------------------------------------------===//
 // CLZERO Instruction
 //
 let SchedRW = [WriteSystem] in {
   let Uses = [EAX] in
-  def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", []>,
-                TB, Requires<[HasCLZERO]>;
-
-  let usesCustomInserter = 1 in {
-  def CLZERO : PseudoI<(outs), (ins i32mem:$src1),
-                       [(int_x86_clzero addr:$src1)]>, Requires<[HasCLZERO]>;
-  }
+  def CLZERO32r : I<0x01, MRM_FC, (outs), (ins), "clzero", []>,
+                  TB, Requires<[HasCLZERO, Not64BitMode]>;
+  let Uses = [RAX] in
+  def CLZERO64r : I<0x01, MRM_FC, (outs), (ins), "clzero", []>,
+                  TB, Requires<[HasCLZERO, In64BitMode]>;
 } // SchedRW
 
-def : InstAlias<"clzero\t{%eax|eax}", (CLZEROr)>, Requires<[Not64BitMode]>;
-def : InstAlias<"clzero\t{%rax|rax}", (CLZEROr)>, Requires<[In64BitMode]>;
+def : InstAlias<"clzero\t{%eax|eax}", (CLZERO32r)>, Requires<[Not64BitMode]>;
+def : InstAlias<"clzero\t{%rax|rax}", (CLZERO64r)>, Requires<[In64BitMode]>;
 
 //===----------------------------------------------------------------------===//
 // Pattern fragments to auto generate TBM instructions.
@@ -2812,8 +2901,6 @@ let Predicates = [HasTBM] in {
             (TZMSK64rr GR64:$src)>;
 
   // Patterns to match flag producing ops.
-  // X86and_flag nodes are rarely created. Those should use CMP+AND. We do
-  // TESTrr matching in PostProcessISelDAG to allow BLSR/BLSI to be formed.
   def : Pat<(or_flag_nocf GR32:$src, (not (add GR32:$src, 1))),
             (BLCI32rr GR32:$src)>;
   def : Pat<(or_flag_nocf GR64:$src, (not (add GR64:$src, 1))),
@@ -2825,6 +2912,11 @@ let Predicates = [HasTBM] in {
   def : Pat<(or_flag_nocf GR64:$src, (sub -2, GR64:$src)),
             (BLCI64rr GR64:$src)>;
 
+  def : Pat<(and_flag_nocf (not GR32:$src), (add GR32:$src, 1)),
+            (BLCIC32rr GR32:$src)>;
+  def : Pat<(and_flag_nocf (not GR64:$src), (add GR64:$src, 1)),
+            (BLCIC64rr GR64:$src)>;
+
   def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, 1)),
             (BLCMSK32rr GR32:$src)>;
   def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, 1)),
@@ -2849,6 +2941,11 @@ let Predicates = [HasTBM] in {
             (T1MSKC32rr GR32:$src)>;
   def : Pat<(or_flag_nocf (not GR64:$src), (add GR64:$src, 1)),
             (T1MSKC64rr GR64:$src)>;
+
+  def : Pat<(and_flag_nocf (not GR32:$src), (add GR32:$src, -1)),
+            (TZMSK32rr GR32:$src)>;
+  def : Pat<(and_flag_nocf (not GR64:$src), (add GR64:$src, -1)),
+            (TZMSK64rr GR64:$src)>;
 } // HasTBM
 
 //===----------------------------------------------------------------------===//
@@ -3231,39 +3328,39 @@ def : InstAlias<"fucompi",      (UCOM_FIPr   ST1), 0>;
 // instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
 // gas.
 multiclass FpUnaryAlias<string Mnemonic, Instruction Inst, bit EmitAlias = 1> {
- def : InstAlias<!strconcat(Mnemonic, "\t{$op, %st(0)|st(0), $op}"),
-                 (Inst RST:$op), EmitAlias>;
- def : InstAlias<!strconcat(Mnemonic, "\t{%st(0), %st(0)|st(0), st(0)}"),
+ def : InstAlias<!strconcat(Mnemonic, "\t$op"),
+                 (Inst RSTi:$op), EmitAlias>;
+ def : InstAlias<!strconcat(Mnemonic, "\t{%st, %st|st, st}"),
                  (Inst ST0), EmitAlias>;
 }
 
-defm : FpUnaryAlias<"fadd",   ADD_FST0r>;
+defm : FpUnaryAlias<"fadd",   ADD_FST0r, 0>;
 defm : FpUnaryAlias<"faddp",  ADD_FPrST0, 0>;
-defm : FpUnaryAlias<"fsub",   SUB_FST0r>;
-defm : FpUnaryAlias<"fsub{|r}p",  SUBR_FPrST0>;
-defm : FpUnaryAlias<"fsubr",  SUBR_FST0r>;
-defm : FpUnaryAlias<"fsub{r|}p", SUB_FPrST0>;
-defm : FpUnaryAlias<"fmul",   MUL_FST0r>;
-defm : FpUnaryAlias<"fmulp",  MUL_FPrST0>;
-defm : FpUnaryAlias<"fdiv",   DIV_FST0r>;
-defm : FpUnaryAlias<"fdiv{|r}p",  DIVR_FPrST0>;
-defm : FpUnaryAlias<"fdivr",  DIVR_FST0r>;
-defm : FpUnaryAlias<"fdiv{r|}p", DIV_FPrST0>;
+defm : FpUnaryAlias<"fsub",   SUB_FST0r, 0>;
+defm : FpUnaryAlias<"fsub{|r}p",  SUBR_FPrST0, 0>;
+defm : FpUnaryAlias<"fsubr",  SUBR_FST0r, 0>;
+defm : FpUnaryAlias<"fsub{r|}p", SUB_FPrST0, 0>;
+defm : FpUnaryAlias<"fmul",   MUL_FST0r, 0>;
+defm : FpUnaryAlias<"fmulp",  MUL_FPrST0, 0>;
+defm : FpUnaryAlias<"fdiv",   DIV_FST0r, 0>;
+defm : FpUnaryAlias<"fdiv{|r}p",  DIVR_FPrST0, 0>;
+defm : FpUnaryAlias<"fdivr",  DIVR_FST0r, 0>;
+defm : FpUnaryAlias<"fdiv{r|}p", DIV_FPrST0, 0>;
 defm : FpUnaryAlias<"fcomi",   COM_FIr, 0>;
 defm : FpUnaryAlias<"fucomi",  UCOM_FIr, 0>;
-defm : FpUnaryAlias<"fcompi",   COM_FIPr>;
-defm : FpUnaryAlias<"fucompi",  UCOM_FIPr>;
+defm : FpUnaryAlias<"fcompi",   COM_FIPr, 0>;
+defm : FpUnaryAlias<"fucompi",  UCOM_FIPr, 0>;
 
 
-// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
+// Handle "f{mulp,addp} $op, %st(0)" the same as "f{mulp,addp} $op", since they
 // commute.  We also allow fdiv[r]p/fsubrp even though they don't commute,
 // solely because gas supports it.
-def : InstAlias<"faddp\t{%st(0), $op|$op, st(0)}", (ADD_FPrST0 RST:$op), 0>;
-def : InstAlias<"fmulp\t{%st(0), $op|$op, st(0)}", (MUL_FPrST0 RST:$op)>;
-def : InstAlias<"fsub{|r}p\t{%st(0), $op|$op, st(0)}", (SUBR_FPrST0 RST:$op)>;
-def : InstAlias<"fsub{r|}p\t{%st(0), $op|$op, st(0)}", (SUB_FPrST0 RST:$op)>;
-def : InstAlias<"fdiv{|r}p\t{%st(0), $op|$op, st(0)}", (DIVR_FPrST0 RST:$op)>;
-def : InstAlias<"fdiv{r|}p\t{%st(0), $op|$op, st(0)}", (DIV_FPrST0 RST:$op)>;
+def : InstAlias<"faddp\t{$op, %st|st, $op}", (ADD_FPrST0 RSTi:$op), 0>;
+def : InstAlias<"fmulp\t{$op, %st|st, $op}", (MUL_FPrST0 RSTi:$op), 0>;
+def : InstAlias<"fsub{|r}p\t{$op, %st|st, $op}", (SUBR_FPrST0 RSTi:$op), 0>;
+def : InstAlias<"fsub{r|}p\t{$op, %st|st, $op}", (SUB_FPrST0 RSTi:$op), 0>;
+def : InstAlias<"fdiv{|r}p\t{$op, %st|st, $op}", (DIVR_FPrST0 RSTi:$op), 0>;
+def : InstAlias<"fdiv{r|}p\t{$op, %st|st, $op}", (DIV_FPrST0 RSTi:$op), 0>;
 
 def : InstAlias<"fnstsw"     , (FNSTSW16r), 0>;
 
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 8f3357170576..57835b1a256a 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -1,9 +1,8 @@
 //===-- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -153,7 +152,9 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
 // MMX EMMS Instruction
 //===----------------------------------------------------------------------===//
 
-let SchedRW = [WriteEMMS] in
+let SchedRW = [WriteEMMS],
+    Defs = [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+            ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7] in
 def MMX_EMMS  : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>;
 
 //===----------------------------------------------------------------------===//
@@ -544,7 +545,7 @@ let Predicates = [HasMMX, HasSSE1] in {
                     "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                     [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
                                       GR32orGR64:$src2, imm:$src3))]>,
-                    Sched<[WriteVecInsert]>;
+                    Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
 
   def MMX_PINSRWrm : MMXIi8<0xC4, MRMSrcMem,
                    (outs VR64:$dst),
diff --git a/lib/Target/X86/X86InstrMPX.td b/lib/Target/X86/X86InstrMPX.td
index c1a8cc7c5fbf..f7d931510fe2 100644
--- a/lib/Target/X86/X86InstrMPX.td
+++ b/lib/Target/X86/X86InstrMPX.td
@@ -1,9 +1,8 @@
 //===-- X86InstrMPX.td - MPX Instruction Set ---------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/X86/X86InstrSGX.td b/lib/Target/X86/X86InstrSGX.td
index 488cc4438076..747f5aa86653 100644
--- a/lib/Target/X86/X86InstrSGX.td
+++ b/lib/Target/X86/X86InstrSGX.td
@@ -1,9 +1,8 @@
 //===-- X86InstrSGX.td - SGX Instruction Set Extension -----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index e2bcd18ce660..7d0a5b87baf4 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1,9 +1,8 @@
 //===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -22,6 +21,7 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
                            RegisterClass RC, X86MemOperand x86memop,
                            Domain d, X86FoldableSchedWrite sched,
                            bit Is2Addr = 1> {
+let isCodeGenOnly = 1 in {
   let isCommutable = 1 in {
     def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
@@ -37,6 +37,7 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
        [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], d>,
        Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
+}
 
 /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
 multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr,
@@ -44,7 +45,7 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr,
                                ValueType VT, string asm, Operand memopr,
                                ComplexPattern mem_cpat, Domain d,
                                X86FoldableSchedWrite sched, bit Is2Addr = 1> {
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let hasSideEffects = 0 in {
   def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
            !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
@@ -224,16 +225,29 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
 }
 
 // Loading from memory automatically zeroing upper bits.
-multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
-                         PatFrag mem_pat, string OpcodeStr, Domain d> {
-  def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+multiclass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop,
+                         PatFrag mem_pat, PatFrag vzloadfrag, string OpcodeStr,
+                         Domain d> {
+  def V#NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                     [(set RC:$dst, (mem_pat addr:$src))], d>,
+                     [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>,
                      VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG;
-  def NAME#rm   : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+  def NAME#rm   : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                     [(set RC:$dst, (mem_pat addr:$src))], d>,
+                     [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>,
                      Sched<[WriteFLoad]>;
+
+  // _alt version uses FR32/FR64 register class.
+  let isCodeGenOnly = 1 in {
+  def V#NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                         [(set RC:$dst, (mem_pat addr:$src))], d>,
+                         VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG;
+  def NAME#rm_alt   : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                         [(set RC:$dst, (mem_pat addr:$src))], d>,
+                         Sched<[WriteFLoad]>;
+  }
 }
 
 defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss",
@@ -242,49 +256,25 @@ defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd",
                         SSEPackedDouble, "MOVSD", UseSSE2>, XD;
 
 let canFoldAsLoad = 1, isReMaterializable = 1 in {
-  defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss",
+  defm MOVSS : sse12_move_rm<FR32, v4f32, f32mem, loadf32, X86vzload32, "movss",
                              SSEPackedSingle>, XS;
-  defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd",
+  defm MOVSD : sse12_move_rm<FR64, v2f64, f64mem, loadf64, X86vzload64, "movsd",
                              SSEPackedDouble>, XD;
 }
 
 // Patterns
 let Predicates = [UseAVX] in {
-  // MOVSSrm zeros the high parts of the register; represent this
-  // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
-  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
-            (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
-  def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
-            (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
-  def : Pat<(v4f32 (X86vzload addr:$src)),
-            (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
-
-  // MOVSDrm zeros the high parts of the register; represent this
-  // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
-  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
-            (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
-  def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
-            (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
-  def : Pat<(v2f64 (X86vzload addr:$src)),
-            (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
+  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+            (VMOVSSrm addr:$src)>;
+  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+            (VMOVSDrm addr:$src)>;
 
   // Represent the same patterns above but in the form they appear for
   // 256-bit types
-  def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
-                   (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
-            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
-  def : Pat<(v8f32 (X86vzload addr:$src)),
+  def : Pat<(v8f32 (X86vzload32 addr:$src)),
             (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
-  def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
-                   (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
+  def : Pat<(v4f64 (X86vzload64 addr:$src)),
             (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
-  def : Pat<(v4f64 (X86vzload addr:$src)),
-            (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
-
-  // Extract and store.
-  def : Pat<(store (f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
-                   addr:$dst),
-            (VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>;
 }
 
 let Predicates = [UseAVX, OptForSize] in {
@@ -304,59 +294,24 @@ let Predicates = [UseAVX, OptForSize] in {
             (SUBREG_TO_REG (i32 0),
              (v4i32 (VMOVSSrr (v4i32 (V_SET0)),
               (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>;
-
-  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2f64 (VMOVSDrr (v2f64 (V_SET0)),
-                       (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))),
-             sub_xmm)>;
-  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2i64 (VMOVSDrr (v2i64 (V_SET0)),
-                       (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))),
-             sub_xmm)>;
 }
 
-let Predicates = [UseSSE1] in {
-  let Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in {
-  // Move scalar to XMM zero-extended, zeroing a VR128 then do a
-  // MOVSS to the lower bits.
-  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
-            (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
-  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
-            (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
-  }
-
-  // MOVSSrm already zeros the high parts of the register.
-  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
-            (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
-  def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
-            (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
-  def : Pat<(v4f32 (X86vzload addr:$src)),
-            (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
-
-  // Extract and store.
-  def : Pat<(store (f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
-                   addr:$dst),
-            (MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>;
+let Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in {
+// Move scalar to XMM zero-extended, zeroing a VR128 then do a
+// MOVSS to the lower bits.
+def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+          (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
+def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+          (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
 }
 
-let Predicates = [UseSSE2] in {
-  // MOVSDrm already zeros the high parts of the register.
-  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
-            (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
-  def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
-            (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
-  def : Pat<(v2f64 (X86vzload addr:$src)),
-            (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
-}
-
-// Aliases to help the assembler pick two byte VEX encodings by swapping the
-// operands relative to the normal instructions to use VEX.R instead of VEX.B.
-def : InstAlias<"vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                (VMOVSSrr_REV VR128L:$dst, VR128:$src1, VR128H:$src2), 0>;
-def : InstAlias<"vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                (VMOVSDrr_REV VR128L:$dst, VR128:$src1, VR128H:$src2), 0>;
+let Predicates = [UseSSE2] in
+def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+          (MOVSDrm addr:$src)>;
+
+let Predicates = [UseSSE1] in
+def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+          (MOVSSrm addr:$src)>;
 
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Move Aligned/Unaligned FP Instructions
@@ -504,25 +459,6 @@ let SchedRW = [SchedWriteFMoveLS.YMM.RR] in {
 } // SchedRW
 } // Predicate
 
-// Aliases to help the assembler pick two byte VEX encodings by swapping the
-// operands relative to the normal instructions to use VEX.R instead of VEX.B.
-def : InstAlias<"vmovaps\t{$src, $dst|$dst, $src}",
-                (VMOVAPSrr_REV VR128L:$dst, VR128H:$src), 0>;
-def : InstAlias<"vmovapd\t{$src, $dst|$dst, $src}",
-                (VMOVAPDrr_REV VR128L:$dst, VR128H:$src), 0>;
-def : InstAlias<"vmovups\t{$src, $dst|$dst, $src}",
-                (VMOVUPSrr_REV VR128L:$dst, VR128H:$src), 0>;
-def : InstAlias<"vmovupd\t{$src, $dst|$dst, $src}",
-                (VMOVUPDrr_REV VR128L:$dst, VR128H:$src), 0>;
-def : InstAlias<"vmovaps\t{$src, $dst|$dst, $src}",
-                (VMOVAPSYrr_REV VR256L:$dst, VR256H:$src), 0>;
-def : InstAlias<"vmovapd\t{$src, $dst|$dst, $src}",
-                (VMOVAPDYrr_REV VR256L:$dst, VR256H:$src), 0>;
-def : InstAlias<"vmovups\t{$src, $dst|$dst, $src}",
-                (VMOVUPSYrr_REV VR256L:$dst, VR256H:$src), 0>;
-def : InstAlias<"vmovupd\t{$src, $dst|$dst, $src}",
-                (VMOVUPDYrr_REV VR256L:$dst, VR256H:$src), 0>;
-
 // Reversed version with ".s" suffix for GAS compatibility.
 def : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}",
                 (VMOVAPSrr_REV VR128:$dst, VR128:$src), 0>;
@@ -700,10 +636,10 @@ defm MOVL : sse12_mov_hilo_packed<0x12, X86Movsd, "movlp">;
 
 let SchedRW = [WriteFStore] in {
 let Predicates = [UseAVX] in {
+let mayStore = 1, hasSideEffects = 0 in
 def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                      "movlps\t{$src, $dst|$dst, $src}",
-                     [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128:$src)),
-                                   (iPTR 0))), addr:$dst)]>,
+                     []>,
                      VEX, VEX_WIG;
 def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                      "movlpd\t{$src, $dst|$dst, $src}",
@@ -711,10 +647,10 @@ def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                                    (iPTR 0))), addr:$dst)]>,
                      VEX, VEX_WIG;
 }// UseAVX
+let mayStore = 1, hasSideEffects = 0 in
 def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movlps\t{$src, $dst|$dst, $src}",
-                   [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128:$src)),
-                                 (iPTR 0))), addr:$dst)]>;
+                   []>;
 def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movlpd\t{$src, $dst|$dst, $src}",
                    [(store (f64 (extractelt (v2f64 VR128:$src),
@@ -722,16 +658,19 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
 } // SchedRW
 
 let Predicates = [UseSSE1] in {
-  // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
-  def : Pat<(store (i64 (extractelt (bc_v2i64 (v4f32 VR128:$src2)),
-                                 (iPTR 0))), addr:$src1),
-            (MOVLPSmr addr:$src1, VR128:$src2)>;
-
   // This pattern helps select MOVLPS on SSE1 only targets. With SSE2 we'll
   // end up with a movsd or blend instead of shufp.
   // No need for aligned load, we're only loading 64-bits.
-  def : Pat<(X86Shufp (loadv4f32 addr:$src2), VR128:$src1, (i8 -28)),
+  def : Pat<(X86Shufp (v4f32 (nonvolatile_load addr:$src2)), VR128:$src1,
+                      (i8 -28)),
             (MOVLPSrm VR128:$src1, addr:$src2)>;
+  def : Pat<(X86Shufp (v4f32 (X86vzload64 addr:$src2)), VR128:$src1, (i8 -28)),
+            (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+  def : Pat<(v4f32 (X86vzload64 addr:$src)),
+            (MOVLPSrm (v4f32 (V_SET0)), addr:$src)>;
+  def : Pat<(X86vextractstore64 (v4f32 VR128:$src), addr:$dst),
+            (MOVLPSmr addr:$dst, VR128:$src)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -744,24 +683,20 @@ let SchedRW = [WriteFStore] in {
 // v2f64 extract element 1 is always custom lowered to unpack high to low
 // and extract element 0 so the non-store version isn't too horrible.
 let Predicates = [UseAVX] in {
+let mayStore = 1, hasSideEffects = 0 in
 def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movhps\t{$src, $dst|$dst, $src}",
-                   [(store (f64 (extractelt
-                                 (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
-                                            (bc_v2f64 (v4f32 VR128:$src))),
-                                 (iPTR 0))), addr:$dst)]>, VEX, VEX_WIG;
+                   []>, VEX, VEX_WIG;
 def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movhpd\t{$src, $dst|$dst, $src}",
                    [(store (f64 (extractelt
                                  (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
                                  (iPTR 0))), addr:$dst)]>, VEX, VEX_WIG;
 } // UseAVX
+let mayStore = 1, hasSideEffects = 0 in
 def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movhps\t{$src, $dst|$dst, $src}",
-                   [(store (f64 (extractelt
-                                 (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
-                                            (bc_v2f64 (v4f32 VR128:$src))),
-                                 (iPTR 0))), addr:$dst)]>;
+                   []>;
 def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movhpd\t{$src, $dst|$dst, $src}",
                    [(store (f64 (extractelt
@@ -775,19 +710,31 @@ let Predicates = [UseAVX] in {
   def : Pat<(v2f64 (X86Unpckl VR128:$src1,
                       (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
             (VMOVHPDrm VR128:$src1, addr:$src2)>;
+  def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))),
+            (VMOVHPDrm VR128:$src1, addr:$src2)>;
 
   def : Pat<(store (f64 (extractelt
                           (v2f64 (X86VPermilpi VR128:$src, (i8 1))),
                           (iPTR 0))), addr:$dst),
             (VMOVHPDmr addr:$dst, VR128:$src)>;
+
+  // MOVLPD patterns
+  def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))),
+            (VMOVLPDrm VR128:$src1, addr:$src2)>;
 }
 
 let Predicates = [UseSSE1] in {
   // This pattern helps select MOVHPS on SSE1 only targets. With SSE2 we'll
   // end up with a movsd or blend instead of shufp.
   // No need for aligned load, we're only loading 64-bits.
-  def : Pat<(X86Movlhps VR128:$src1, (loadv4f32 addr:$src2)),
+  def : Pat<(X86Movlhps VR128:$src1, (v4f32 (nonvolatile_load addr:$src2))),
+            (MOVHPSrm VR128:$src1, addr:$src2)>;
+  def : Pat<(X86Movlhps VR128:$src1, (v4f32 (X86vzload64 addr:$src2))),
             (MOVHPSrm VR128:$src1, addr:$src2)>;
+
+  def : Pat<(X86vextractstore64 (v4f32 (X86Movhlps VR128:$src, VR128:$src)),
+                                addr:$dst),
+            (MOVHPSmr addr:$dst, VR128:$src)>;
 }
 
 let Predicates = [UseSSE2] in {
@@ -798,11 +745,24 @@ let Predicates = [UseSSE2] in {
   def : Pat<(v2f64 (X86Unpckl VR128:$src1,
                       (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
             (MOVHPDrm VR128:$src1, addr:$src2)>;
+  def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))),
+            (MOVHPDrm VR128:$src1, addr:$src2)>;
 
   def : Pat<(store (f64 (extractelt
                           (v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))),
                           (iPTR 0))), addr:$dst),
             (MOVHPDmr addr:$dst, VR128:$src)>;
+
+  // MOVLPD patterns
+  def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))),
+            (MOVLPDrm VR128:$src1, addr:$src2)>;
+}
+
+let Predicates = [UseSSE2, NoSSE41_Or_OptForSize] in {
+  // Use MOVLPD to load into the low bits from a full vector unless we can use
+  // BLENDPD.
+  def : Pat<(X86Movsd VR128:$src1, (v2f64 (nonvolatile_load addr:$src2))),
+            (MOVLPDrm VR128:$src1, addr:$src2)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -847,13 +807,16 @@ let Constraints = "$src1 = $dst" in {
 
 multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
                      SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
-                     string asm, X86FoldableSchedWrite sched> {
-  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
-                        [(set DstRC:$dst, (OpNode SrcRC:$src))]>,
-                        Sched<[sched]>;
-  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
-                        [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>,
-                        Sched<[sched.Folded]>;
+                     string asm, string mem, X86FoldableSchedWrite sched,
+                     SchedRead Int2Fpu = ReadDefault> {
+  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+              [(set DstRC:$dst, (OpNode SrcRC:$src))]>,
+              Sched<[sched, Int2Fpu]>;
+  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+              mem#"\t{$src, $dst|$dst, $src}",
+              [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>,
+              Sched<[sched.Folded]>;
 }
 
 multiclass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop,
@@ -872,74 +835,55 @@ let hasSideEffects = 0 in {
 }
 
 multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
-                          X86MemOperand x86memop, string asm,
+                          X86MemOperand x86memop, string asm, string mem,
                           X86FoldableSchedWrite sched> {
 let hasSideEffects = 0, Predicates = [UseAVX] in {
   def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
-              Sched<[sched]>;
+              Sched<[sched, ReadDefault, ReadInt2Fpu]>;
   let mayLoad = 1 in
   def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
               (ins DstRC:$src1, x86memop:$src),
-              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+              asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
            Sched<[sched.Folded, sched.ReadAfterFold]>;
 } // hasSideEffects = 0
 }
 
-let Predicates = [UseAVX] in {
+let isCodeGenOnly = 1, Predicates = [UseAVX] in {
 defm VCVTTSS2SI   : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
-                                "cvttss2si\t{$src, $dst|$dst, $src}",
+                                "cvttss2si", "cvttss2si",
                                 WriteCvtSS2I>,
                                 XS, VEX, VEX_LIG;
 defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
-                                "cvttss2si\t{$src, $dst|$dst, $src}",
+                                "cvttss2si", "cvttss2si",
                                 WriteCvtSS2I>,
                                 XS, VEX, VEX_W, VEX_LIG;
 defm VCVTTSD2SI   : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
-                                "cvttsd2si\t{$src, $dst|$dst, $src}",
+                                "cvttsd2si", "cvttsd2si",
                                 WriteCvtSD2I>,
                                 XD, VEX, VEX_LIG;
 defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
-                                "cvttsd2si\t{$src, $dst|$dst, $src}",
+                                "cvttsd2si", "cvttsd2si",
                                 WriteCvtSD2I>,
                                 XD, VEX, VEX_W, VEX_LIG;
-
-def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
-                (VCVTTSS2SIrr GR32:$dst, FR32:$src), 0, "att">;
-def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
-                (VCVTTSS2SIrm GR32:$dst, f32mem:$src), 0, "att">;
-def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
-                (VCVTTSD2SIrr GR32:$dst, FR64:$src), 0, "att">;
-def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
-                (VCVTTSD2SIrm GR32:$dst, f64mem:$src), 0, "att">;
-def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
-                (VCVTTSS2SI64rr GR64:$dst, FR32:$src), 0, "att">;
-def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
-                (VCVTTSS2SI64rm GR64:$dst, f32mem:$src), 0, "att">;
-def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
-                (VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0, "att">;
-def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
-                (VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0, "att">;
 }
+
 // The assembler can recognize rr 64-bit instructions by seeing a rxx
 // register, but the same isn't true when only using memory operands,
 // provide other assembly "l" and "q" forms to address this explicitly
 // where appropriate to do so.
-defm VCVTSI2SS   : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss{l}",
+let isCodeGenOnly = 1 in {
+defm VCVTSI2SS   : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l",
                                   WriteCvtI2SS>, XS, VEX_4V, VEX_LIG;
-defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}",
+defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q",
                                   WriteCvtI2SS>, XS, VEX_4V, VEX_W, VEX_LIG;
-defm VCVTSI2SD   : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}",
+defm VCVTSI2SD   : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l",
                                   WriteCvtI2SD>, XD, VEX_4V, VEX_LIG;
-defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}",
+defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q",
                                   WriteCvtI2SD>, XD, VEX_4V, VEX_W, VEX_LIG;
+} // isCodeGenOnly = 1
 
 let Predicates = [UseAVX] in {
-  def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
-                (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src), 0, "att">;
-  def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
-                (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src), 0, "att">;
-
   def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
             (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
   def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
@@ -959,52 +903,32 @@ let Predicates = [UseAVX] in {
             (VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
 }
 
+let isCodeGenOnly = 1 in {
 defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
-                      "cvttss2si\t{$src, $dst|$dst, $src}",
+                      "cvttss2si", "cvttss2si",
                       WriteCvtSS2I>, XS;
 defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
-                      "cvttss2si\t{$src, $dst|$dst, $src}",
+                      "cvttss2si", "cvttss2si",
                       WriteCvtSS2I>, XS, REX_W;
 defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
-                      "cvttsd2si\t{$src, $dst|$dst, $src}",
+                      "cvttsd2si", "cvttsd2si",
                       WriteCvtSD2I>, XD;
 defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
-                      "cvttsd2si\t{$src, $dst|$dst, $src}",
+                      "cvttsd2si", "cvttsd2si",
                       WriteCvtSD2I>, XD, REX_W;
 defm CVTSI2SS  : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
-                      "cvtsi2ss{l}\t{$src, $dst|$dst, $src}",
-                      WriteCvtI2SS>, XS;
+                      "cvtsi2ss", "cvtsi2ss{l}",
+                      WriteCvtI2SS, ReadInt2Fpu>, XS;
 defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
-                      "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
-                      WriteCvtI2SS>, XS, REX_W;
+                      "cvtsi2ss", "cvtsi2ss{q}",
+                      WriteCvtI2SS, ReadInt2Fpu>, XS, REX_W;
 defm CVTSI2SD  : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
-                      "cvtsi2sd{l}\t{$src, $dst|$dst, $src}",
-                      WriteCvtI2SD>, XD;
+                      "cvtsi2sd", "cvtsi2sd{l}",
+                      WriteCvtI2SD, ReadInt2Fpu>, XD;
 defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
-                      "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
-                      WriteCvtI2SD>, XD, REX_W;
-
-def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
-                (CVTTSS2SIrr GR32:$dst, FR32:$src), 0, "att">;
-def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
-                (CVTTSS2SIrm GR32:$dst, f32mem:$src), 0, "att">;
-def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
-                (CVTTSD2SIrr GR32:$dst, FR64:$src), 0, "att">;
-def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
-                (CVTTSD2SIrm GR32:$dst, f64mem:$src), 0, "att">;
-def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
-                (CVTTSS2SI64rr GR64:$dst, FR32:$src), 0, "att">;
-def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
-                (CVTTSS2SI64rm GR64:$dst, f32mem:$src), 0, "att">;
-def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
-                (CVTTSD2SI64rr GR64:$dst, FR64:$src), 0, "att">;
-def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
-                (CVTTSD2SI64rm GR64:$dst, f64mem:$src), 0, "att">;
-
-def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}",
-                (CVTSI2SSrm FR64:$dst, i32mem:$src), 0, "att">;
-def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
-                (CVTSI2SDrm FR64:$dst, i32mem:$src), 0, "att">;
+                      "cvtsi2sd", "cvtsi2sd{q}",
+                      WriteCvtI2SD, ReadInt2Fpu>, XD, REX_W;
+} // isCodeGenOnly = 1
 
 // Conversion Instructions Intrinsics - Match intrinsics which expect MM
 // and/or XMM operand(s).
@@ -1025,20 +949,20 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
 
 multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
                     RegisterClass DstRC, X86MemOperand x86memop,
-                    string asm, X86FoldableSchedWrite sched,
+                    string asm, string mem, X86FoldableSchedWrite sched,
                     bit Is2Addr = 1> {
 let hasSideEffects = 0 in {
   def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
                   !if(Is2Addr,
                       !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
                       !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-                  []>, Sched<[sched]>;
+                  []>, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
   let mayLoad = 1 in
   def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst),
                   (ins DstRC:$src1, x86memop:$src2),
                   !if(Is2Addr,
-                      !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
-                      !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+                      asm#"{"#mem#"}\t{$src2, $dst|$dst, $src2}",
+                      asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                   []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 }
@@ -1057,48 +981,73 @@ defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si,
                    sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD, REX_W;
 
 
-let isCodeGenOnly = 1 in {
-  let Predicates = [UseAVX] in {
-  defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
-            i32mem, "cvtsi2ss{l}", WriteCvtI2SS, 0>, XS, VEX_4V;
-  defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
-            i64mem, "cvtsi2ss{q}", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_W;
-  defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
-            i32mem, "cvtsi2sd{l}", WriteCvtI2SD, 0>, XD, VEX_4V;
-  defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
-            i64mem, "cvtsi2sd{q}", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_W;
-  }
-  let Constraints = "$src1 = $dst" in {
-    defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
-                          i32mem, "cvtsi2ss{l}", WriteCvtI2SS>, XS;
-    defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
-                          i64mem, "cvtsi2ss{q}", WriteCvtI2SS>, XS, REX_W;
-    defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
-                          i32mem, "cvtsi2sd{l}", WriteCvtI2SD>, XD;
-    defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
-                          i64mem, "cvtsi2sd{q}", WriteCvtI2SD>, XD, REX_W;
-  }
-} // isCodeGenOnly = 1
+let Predicates = [UseAVX] in {
+defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+          i32mem, "cvtsi2ss", "l", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG;
+defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+          i64mem, "cvtsi2ss", "q", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, VEX_W;
+defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+          i32mem, "cvtsi2sd", "l", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG;
+defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+          i64mem, "cvtsi2sd", "q", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG, VEX_W;
+}
+let Constraints = "$src1 = $dst" in {
+  defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+                        i32mem, "cvtsi2ss", "l", WriteCvtI2SS>, XS;
+  defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+                        i64mem, "cvtsi2ss", "q", WriteCvtI2SS>, XS, REX_W;
+  defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+                        i32mem, "cvtsi2sd", "l", WriteCvtI2SD>, XD;
+  defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+                        i64mem, "cvtsi2sd", "q", WriteCvtI2SD>, XD, REX_W;
+}
+
+def : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+               (VCVTSI2SSrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">;
+def : InstAlias<"vcvtsi2ss{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+               (VCVTSI642SSrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">;
+def : InstAlias<"vcvtsi2sd{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+               (VCVTSI2SDrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">;
+def : InstAlias<"vcvtsi2sd{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+               (VCVTSI642SDrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">;
+
+def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+              (VCVTSI2SSrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">;
+def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
+              (VCVTSI2SDrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">;
+
+def : InstAlias<"cvtsi2ss{l}\t{$src, $dst|$dst, $src}",
+                (CVTSI2SSrr_Int VR128:$dst, GR32:$src), 0, "att">;
+def : InstAlias<"cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
+                (CVTSI642SSrr_Int VR128:$dst, GR64:$src), 0, "att">;
+def : InstAlias<"cvtsi2sd{l}\t{$src, $dst|$dst, $src}",
+                (CVTSI2SDrr_Int VR128:$dst, GR32:$src), 0, "att">;
+def : InstAlias<"cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
+                (CVTSI642SDrr_Int VR128:$dst, GR64:$src), 0, "att">;
+
+def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}",
+                (CVTSI2SSrm_Int VR128:$dst, i32mem:$src), 0, "att">;
+def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
+                (CVTSI2SDrm_Int VR128:$dst, i32mem:$src), 0, "att">;
 
 /// SSE 1 Only
 
 // Aliases for intrinsics
-let isCodeGenOnly = 1 in {
 let Predicates = [UseAVX] in {
 defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
                                 ssmem, sse_load_f32, "cvttss2si",
-                                WriteCvtSS2I>, XS, VEX;
+                                WriteCvtSS2I>, XS, VEX, VEX_LIG;
 defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
                                X86cvtts2Int, ssmem, sse_load_f32,
                                "cvttss2si", WriteCvtSS2I>,
-                               XS, VEX, VEX_W;
+                               XS, VEX, VEX_LIG, VEX_W;
 defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
                                 sdmem, sse_load_f64, "cvttsd2si",
-                                WriteCvtSS2I>, XD, VEX;
+                                WriteCvtSS2I>, XD, VEX, VEX_LIG;
 defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
                               X86cvtts2Int, sdmem, sse_load_f64,
                               "cvttsd2si", WriteCvtSS2I>,
-                              XD, VEX, VEX_W;
+                              XD, VEX, VEX_LIG, VEX_W;
 }
 defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
                                     ssmem, sse_load_f32, "cvttss2si",
@@ -1112,7 +1061,40 @@ defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
 defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
                                   X86cvtts2Int, sdmem, sse_load_f64,
                                   "cvttsd2si", WriteCvtSD2I>, XD, REX_W;
-} // isCodeGenOnly = 1
+
+def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
+                (VCVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
+def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
+                (VCVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">;
+def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
+                (VCVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
+def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
+                (VCVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">;
+def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
+                (VCVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
+def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
+                (VCVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">;
+def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
+                (VCVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
+def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
+                (VCVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">;
+
+def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
+                (CVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
+def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
+                (CVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">;
+def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
+                (CVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
+def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
+                (CVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">;
+def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
+                (CVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
+def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
+                (CVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">;
+def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+                (CVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
+def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+                (CVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">;
 
 let Predicates = [UseAVX] in {
 defm VCVTSS2SI   : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
@@ -1143,7 +1125,7 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop,
                             SSEPackedSingle, WriteCvtI2PS>,
                             PS, Requires<[UseSSE2]>;
 
-let Predicates = [UseAVX] in {
+// AVX aliases
 def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
                 (VCVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
 def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
@@ -1160,8 +1142,8 @@ def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
                 (VCVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
 def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
                 (VCVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">;
-}
 
+// SSE aliases
 def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
                 (CVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
 def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
@@ -1182,7 +1164,7 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
 /// SSE 2 Only
 
 // Convert scalar double to scalar single
-let hasSideEffects = 0, Predicates = [UseAVX] in {
+let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX] in {
 def VCVTSD2SSrr  : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
                         (ins FR32:$src1, FR64:$src2),
                         "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
@@ -1200,6 +1182,7 @@ def : Pat<(f32 (fpround FR64:$src)),
             (VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>,
           Requires<[UseAVX]>;
 
+let isCodeGenOnly = 1 in {
 def CVTSD2SSrr  : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
                       "cvtsd2ss\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (fpround FR64:$src))]>,
@@ -1209,42 +1192,41 @@ def CVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
                     [(set FR32:$dst, (fpround (loadf64 addr:$src)))]>,
                     XD, Requires<[UseSSE2, OptForSize]>,
                     Sched<[WriteCvtSD2SS.Folded]>;
+}
 
-let isCodeGenOnly = 1 in {
 def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
                        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                        "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                        [(set VR128:$dst,
-                         (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))]>,
-                       XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
+                         (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>,
+                       XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>,
                        Sched<[WriteCvtSD2SS]>;
 def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
                        (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
                        "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                       [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
-                                          VR128:$src1, sse_load_f64:$src2))]>,
-                       XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
+                       [(set VR128:$dst,
+                         (v4f32 (X86frounds VR128:$src1, sse_load_f64:$src2)))]>,
+                       XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>,
                        Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
 let Constraints = "$src1 = $dst" in {
 def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
                        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                        "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
                        [(set VR128:$dst,
-                         (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))]>,
+                         (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>,
                        XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>;
 def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
                        (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
                        "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
-                       [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
-                                          VR128:$src1, sse_load_f64:$src2))]>,
+                       [(set VR128:$dst,
+                         (v4f32 (X86frounds VR128:$src1,sse_load_f64:$src2)))]>,
                        XD, Requires<[UseSSE2]>,
                        Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
 }
-} // isCodeGenOnly = 1
 
 // Convert scalar single to scalar double
 // SSE2 instructions with XS prefix
-let hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
 def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
                     (ins FR64:$src1, FR32:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
@@ -1257,51 +1239,36 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
                     XS, VEX_4V, VEX_LIG, VEX_WIG,
                     Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>,
                     Requires<[UseAVX, OptForSize]>;
-}
+} // isCodeGenOnly = 1, hasSideEffects = 0
 
 def : Pat<(f64 (fpextend FR32:$src)),
     (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>;
 def : Pat<(fpextend (loadf32 addr:$src)),
     (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>;
 
-def : Pat<(extloadf32 addr:$src),
-    (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>,
-    Requires<[UseAVX, OptForSize]>;
-def : Pat<(extloadf32 addr:$src),
-    (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
-    Requires<[UseAVX, OptForSpeed]>;
-
+let isCodeGenOnly = 1 in {
 def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
                    "cvtss2sd\t{$src, $dst|$dst, $src}",
                    [(set FR64:$dst, (fpextend FR32:$src))]>,
                    XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>;
 def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
                    "cvtss2sd\t{$src, $dst|$dst, $src}",
-                   [(set FR64:$dst, (extloadf32 addr:$src))]>,
+                   [(set FR64:$dst, (fpextend (loadf32 addr:$src)))]>,
                    XS, Requires<[UseSSE2, OptForSize]>,
                    Sched<[WriteCvtSS2SD.Folded]>;
+} // isCodeGenOnly = 1
 
-// extload f32 -> f64.  This matches load+fpextend because we have a hack in
-// the isel (PreprocessForFPConvert) that can introduce loads after dag
-// combine.
-// Since these loads aren't folded into the fpextend, we have to match it
-// explicitly here.
-def : Pat<(fpextend (loadf32 addr:$src)),
-          (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2, OptForSize]>;
-def : Pat<(extloadf32 addr:$src),
-          (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>;
-
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let hasSideEffects = 0 in {
 def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    []>, XS, VEX_4V, VEX_WIG,
+                    []>, XS, VEX_4V, VEX_LIG, VEX_WIG,
                     Requires<[HasAVX]>, Sched<[WriteCvtSS2SD]>;
 let mayLoad = 1 in
 def VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
                       (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    []>, XS, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
+                    []>, XS, VEX_4V, VEX_LIG, VEX_WIG, Requires<[HasAVX]>,
                     Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
 let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
 def CVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
@@ -1316,7 +1283,7 @@ def CVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
                     []>, XS, Requires<[UseSSE2]>,
                     Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
 }
-} // isCodeGenOnly = 1
+} // hasSideEffects = 0
 
 // Patterns used for matching (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and
 // (v)cvtss2sd intrinsic sequences from clang which produce unnecessary
@@ -1476,15 +1443,11 @@ def VCVTPD2DQrr  : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        VEX, Sched<[WriteCvtPD2I]>, VEX_WIG;
 
 // XMM only
-def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
-                (VCVTPD2DQrr VR128:$dst, VR128:$src), 0>;
 def VCVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                       "vcvtpd2dq{x}\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
                         (v4i32 (X86cvtp2Int (loadv2f64 addr:$src))))]>, VEX,
                       Sched<[WriteCvtPD2ILd]>, VEX_WIG;
-def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
-                (VCVTPD2DQrm VR128:$dst, f128mem:$src), 0, "intel">;
 
 // YMM only
 def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
@@ -1497,12 +1460,13 @@ def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
                        [(set VR128:$dst,
                          (v4i32 (X86cvtp2Int (loadv4f64 addr:$src))))]>,
                        VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG;
-def : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}",
-                (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0>;
-def : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}",
-                (VCVTPD2DQYrm VR128:$dst, f256mem:$src), 0, "intel">;
 }
 
+def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
+                (VCVTPD2DQrr VR128:$dst, VR128:$src), 0, "att">;
+def : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}",
+                (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">;
+
 def CVTPD2DQrm  : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                       "cvtpd2dq\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
@@ -1540,17 +1504,6 @@ def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src)
                           Sched<[WriteCvtPS2IYLd]>, VEX_WIG;
 }
 
-let Predicates = [HasAVX, NoVLX] in {
-  def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
-            (VCVTTPS2DQrr VR128:$src)>;
-  def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
-            (VCVTTPS2DQrm addr:$src)>;
-  def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))),
-            (VCVTTPS2DQYrr VR256:$src)>;
-  def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
-            (VCVTTPS2DQYrm addr:$src)>;
-}
-
 def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvttps2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
@@ -1562,39 +1515,23 @@ def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                          (v4i32 (X86cvttp2si (memopv4f32 addr:$src))))]>,
                        Sched<[WriteCvtPS2ILd]>;
 
-let Predicates = [UseSSE2] in {
-  def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
-            (CVTTPS2DQrr VR128:$src)>;
-  def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
-            (CVTTPS2DQrm addr:$src)>;
-}
-
-let Predicates = [HasAVX, NoVLX] in
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+let Predicates = [HasAVX, NoVLX] in {
+// XMM only
 def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "cvttpd2dq\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                           (v4i32 (X86cvttp2si (v2f64 VR128:$src))))]>,
                         VEX, Sched<[WriteCvtPD2I]>, VEX_WIG;
-
-// The assembler can recognize rr 256-bit instructions by seeing a ymm
-// register, but the same isn't true when using memory operands instead.
-// Provide other assembly rr and rm forms to address this explicitly.
-
-// XMM only
-def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
-                (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0>;
-
-let Predicates = [HasAVX, NoVLX] in
 def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                         "cvttpd2dq{x}\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                           (v4i32 (X86cvttp2si (loadv2f64 addr:$src))))]>,
                         VEX, Sched<[WriteCvtPD2ILd]>, VEX_WIG;
-def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
-                (VCVTTPD2DQrm VR128:$dst, f128mem:$src), 0, "intel">;
 
 // YMM only
-let Predicates = [HasAVX, NoVLX] in {
 def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
                          "cvttpd2dq\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst,
@@ -1605,11 +1542,12 @@ def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
                          [(set VR128:$dst,
                            (v4i32 (X86cvttp2si (loadv4f64 addr:$src))))]>,
                          VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG;
-}
-def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}",
-                (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;
+} // Predicates = [HasAVX, NoVLX]
+
+def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
+                (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0, "att">;
 def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}",
-                (VCVTTPD2DQYrm VR128:$dst, f256mem:$src), 0, "intel">;
+                (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">;
 
 let Predicates = [HasAVX, NoVLX] in {
   def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
@@ -1618,21 +1556,6 @@ let Predicates = [HasAVX, NoVLX] in {
             (VCVTTPD2DQYrm addr:$src)>;
 }
 
-let Predicates = [HasAVX, NoVLX] in {
-  def : Pat<(X86vzmovl (v2i64 (bitconvert
-                               (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
-            (VCVTPD2DQrr VR128:$src)>;
-  def : Pat<(X86vzmovl (v2i64 (bitconvert
-                               (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
-            (VCVTPD2DQrm addr:$src)>;
-  def : Pat<(X86vzmovl (v2i64 (bitconvert
-                               (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
-            (VCVTTPD2DQrr VR128:$src)>;
-  def : Pat<(X86vzmovl (v2i64 (bitconvert
-                               (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
-            (VCVTTPD2DQrm addr:$src)>;
-} // Predicates = [HasAVX, NoVLX]
-
 def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                       "cvttpd2dq\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
@@ -1644,21 +1567,6 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
                         (v4i32 (X86cvttp2si (memopv2f64 addr:$src))))]>,
                       Sched<[WriteCvtPD2ILd]>;
 
-let Predicates = [UseSSE2] in {
-  def : Pat<(X86vzmovl (v2i64 (bitconvert
-                               (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
-            (CVTPD2DQrr VR128:$src)>;
-  def : Pat<(X86vzmovl (v2i64 (bitconvert
-                               (v4i32 (X86cvtp2Int (memopv2f64 addr:$src)))))),
-            (CVTPD2DQrm addr:$src)>;
-  def : Pat<(X86vzmovl (v2i64 (bitconvert
-                               (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
-            (CVTTPD2DQrr VR128:$src)>;
-  def : Pat<(X86vzmovl (v2i64 (bitconvert
-                               (v4i32 (X86cvttp2si (memopv2f64 addr:$src)))))),
-            (CVTTPD2DQrm addr:$src)>;
-} // Predicates = [UseSSE2]
-
 // Convert packed single to packed double
 let Predicates = [HasAVX, NoVLX] in {
                   // SSE2 instructions without OpSize prefix
@@ -1697,7 +1605,10 @@ let hasSideEffects = 0, mayLoad = 1 in
 def VCVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                         "vcvtdq2pd\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
-                          (v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>,
+                          (v2f64 (X86VSintToFP
+                                  (bc_v4i32
+                                   (v2i64 (scalar_to_vector
+                                           (loadi64 addr:$src)))))))]>,
                         VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG;
 def VCVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "vcvtdq2pd\t{$src, $dst|$dst, $src}",
@@ -1721,7 +1632,10 @@ let hasSideEffects = 0, mayLoad = 1 in
 def CVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                        "cvtdq2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
-                         (v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>,
+                         (v2f64 (X86VSintToFP
+                                 (bc_v4i32
+                                  (v2i64 (scalar_to_vector
+                                          (loadi64 addr:$src)))))))]>,
                        Sched<[WriteCvtI2PDLd]>;
 def CVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtdq2pd\t{$src, $dst|$dst, $src}",
@@ -1731,17 +1645,13 @@ def CVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
 
 // AVX register conversion intrinsics
 let Predicates = [HasAVX, NoVLX] in {
-  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
-            (VCVTDQ2PDrm addr:$src)>;
-  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
+  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
             (VCVTDQ2PDrm addr:$src)>;
 } // Predicates = [HasAVX, NoVLX]
 
 // SSE2 register conversion intrinsics
 let Predicates = [UseSSE2] in {
-  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
-            (CVTDQ2PDrm addr:$src)>;
-  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
+  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
             (CVTDQ2PDrm addr:$src)>;
 } // Predicates = [UseSSE2]
 
@@ -1749,38 +1659,31 @@ let Predicates = [UseSSE2] in {
 // The assembler can recognize rr 256-bit instructions by seeing a ymm
 // register, but the same isn't true when using memory operands instead.
 // Provide other assembly rr and rm forms to address this explicitly.
-let Predicates = [HasAVX, NoVLX] in
+let Predicates = [HasAVX, NoVLX] in {
+// XMM only
 def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtpd2ps\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>,
                        VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG;
-
-// XMM only
-def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
-                (VCVTPD2PSrr VR128:$dst, VR128:$src), 0>;
-let Predicates = [HasAVX, NoVLX] in
 def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        "cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (X86vfpround (loadv2f64 addr:$src)))]>,
                        VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG;
-def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
-                (VCVTPD2PSrm VR128:$dst, f128mem:$src), 0, "intel">;
 
-// YMM only
-let Predicates = [HasAVX, NoVLX] in {
 def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
                         "cvtpd2ps\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst, (fpround VR256:$src))]>,
+                        [(set VR128:$dst, (X86vfpround VR256:$src))]>,
                         VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG;
 def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
                         "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst, (fpround (loadv4f64 addr:$src)))]>,
+                        [(set VR128:$dst, (X86vfpround (loadv4f64 addr:$src)))]>,
                         VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG;
-}
-def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
-                (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0>;
+} // Predicates = [HasAVX, NoVLX]
+
+def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
+                (VCVTPD2PSrr VR128:$dst, VR128:$src), 0, "att">;
 def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
-                (VCVTPD2PSYrm VR128:$dst, f256mem:$src), 0, "intel">;
+                (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0, "att">;
 
 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "cvtpd2ps\t{$src, $dst|$dst, $src}",
@@ -1791,28 +1694,11 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                      [(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))]>,
                      Sched<[WriteCvtPD2PS.Folded]>;
 
-// AVX 256-bit register conversion intrinsics
-// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
-// whenever possible to avoid declaring two versions of each one.
-
 let Predicates = [HasAVX, NoVLX] in {
-  // Match fpround and fpextend for 128/256-bit conversions
-  def : Pat<(X86vzmovl (v2f64 (bitconvert
-                               (v4f32 (X86vfpround (v2f64 VR128:$src)))))),
-            (VCVTPD2PSrr VR128:$src)>;
-  def : Pat<(X86vzmovl (v2f64 (bitconvert
-                               (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
-            (VCVTPD2PSrm addr:$src)>;
-}
-
-let Predicates = [UseSSE2] in {
-  // Match fpround and fpextend for 128 conversions
-  def : Pat<(X86vzmovl (v2f64 (bitconvert
-                               (v4f32 (X86vfpround (v2f64 VR128:$src)))))),
-            (CVTPD2PSrr VR128:$src)>;
-  def : Pat<(X86vzmovl (v2f64 (bitconvert
-                               (v4f32 (X86vfpround (memopv2f64 addr:$src)))))),
-            (CVTPD2PSrm addr:$src)>;
+  def : Pat<(v4f32 (fpround (v4f64 VR256:$src))),
+            (VCVTPD2PSYrr VR256:$src)>;
+  def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
+            (VCVTPD2PSYrm addr:$src)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1821,94 +1707,80 @@ let Predicates = [UseSSE2] in {
 
 // sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
 multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
-                            Operand CC, SDNode OpNode, ValueType VT,
-                            PatFrag ld_frag, string asm, string asm_alt,
+                            SDNode OpNode, ValueType VT,
+                            PatFrag ld_frag, string asm,
                             X86FoldableSchedWrite sched> {
   let isCommutable = 1 in
   def rr : SIi8<0xC2, MRMSrcReg,
-                (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
+                (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
                 [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))]>,
                 Sched<[sched]>;
   def rm : SIi8<0xC2, MRMSrcMem,
-                (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
+                (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
                 [(set RC:$dst, (OpNode (VT RC:$src1),
                                          (ld_frag addr:$src2), imm:$cc))]>,
                 Sched<[sched.Folded, sched.ReadAfterFold]>;
-
-  // Accept explicit immediate argument form instead of comparison code.
-  let isAsmParserOnly = 1, hasSideEffects = 0 in {
-    def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst),
-                      (ins RC:$src1, RC:$src2, u8imm:$cc), asm_alt, []>,
-                      Sched<[sched]>, NotMemoryFoldable;
-    let mayLoad = 1 in
-    def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
-                      (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm_alt, []>,
-                      Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
-  }
 }
 
-let ExeDomain = SSEPackedSingle in
-defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmps, f32, loadf32,
-                 "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                 "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
-                 SchedWriteFCmpSizes.PS.Scl>, XS, VEX_4V, VEX_LIG, VEX_WIG;
-let ExeDomain = SSEPackedDouble in
-defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmps, f64, loadf64,
-                 "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                 "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
-                 SchedWriteFCmpSizes.PD.Scl>,
-                 XD, VEX_4V, VEX_LIG, VEX_WIG;
-
-let Constraints = "$src1 = $dst" in {
+let isCodeGenOnly = 1 in {
   let ExeDomain = SSEPackedSingle in
-  defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmps, f32, loadf32,
-                  "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
-                  "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}",
-                  SchedWriteFCmpSizes.PS.Scl>, XS;
+  defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmps, f32, loadf32,
+                   "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+                   SchedWriteFCmpSizes.PS.Scl>, XS, VEX_4V, VEX_LIG, VEX_WIG;
   let ExeDomain = SSEPackedDouble in
-  defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmps, f64, loadf64,
-                  "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
-                  "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
-                  SchedWriteFCmpSizes.PD.Scl>, XD;
+  defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmps, f64, loadf64,
+                   "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+                   SchedWriteFCmpSizes.PD.Scl>,
+                   XD, VEX_4V, VEX_LIG, VEX_WIG;
+
+  let Constraints = "$src1 = $dst" in {
+    let ExeDomain = SSEPackedSingle in
+    defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmps, f32, loadf32,
+                    "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}",
+                    SchedWriteFCmpSizes.PS.Scl>, XS;
+    let ExeDomain = SSEPackedDouble in
+    defm CMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmps, f64, loadf64,
+                    "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
+                    SchedWriteFCmpSizes.PD.Scl>, XD;
+  }
 }
 
-multiclass sse12_cmp_scalar_int<Operand memop, Operand CC,
+multiclass sse12_cmp_scalar_int<Operand memop,
                          Intrinsic Int, string asm, X86FoldableSchedWrite sched,
                          ComplexPattern mem_cpat> {
   def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
-                      (ins VR128:$src1, VR128:$src, CC:$cc), asm,
+                      (ins VR128:$src1, VR128:$src, u8imm:$cc), asm,
                         [(set VR128:$dst, (Int VR128:$src1,
                                                VR128:$src, imm:$cc))]>,
            Sched<[sched]>;
 let mayLoad = 1 in
   def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
-                      (ins VR128:$src1, memop:$src, CC:$cc), asm,
+                      (ins VR128:$src1, memop:$src, u8imm:$cc), asm,
                         [(set VR128:$dst, (Int VR128:$src1,
                                                mem_cpat:$src, imm:$cc))]>,
            Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
-let isCodeGenOnly = 1 in {
-  // Aliases to match intrinsics which expect XMM operand(s).
+// Aliases to match intrinsics which expect XMM operand(s).
+let ExeDomain = SSEPackedSingle in
+defm VCMPSS  : sse12_cmp_scalar_int<ssmem, int_x86_sse_cmp_ss,
+                     "cmpss\t{$cc, $src, $src1, $dst|$dst, $src1, $src, $cc}",
+                     SchedWriteFCmpSizes.PS.Scl, sse_load_f32>,
+                     XS, VEX_4V, VEX_LIG, VEX_WIG;
+let ExeDomain = SSEPackedDouble in
+defm VCMPSD  : sse12_cmp_scalar_int<sdmem, int_x86_sse2_cmp_sd,
+                     "cmpsd\t{$cc, $src, $src1, $dst|$dst, $src1, $src, $cc}",
+                     SchedWriteFCmpSizes.PD.Scl, sse_load_f64>,
+                     XD, VEX_4V, VEX_LIG, VEX_WIG;
+let Constraints = "$src1 = $dst" in {
   let ExeDomain = SSEPackedSingle in
-  defm VCMPSS  : sse12_cmp_scalar_int<ssmem, AVXCC, int_x86_sse_cmp_ss,
-                       "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
-                       SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS, VEX_4V;
+  defm CMPSS  : sse12_cmp_scalar_int<ssmem, int_x86_sse_cmp_ss,
+                       "cmpss\t{$cc, $src, $dst|$dst, $src, $cc}",
+                       SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS;
   let ExeDomain = SSEPackedDouble in
-  defm VCMPSD  : sse12_cmp_scalar_int<sdmem, AVXCC, int_x86_sse2_cmp_sd,
-                       "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
-                       SchedWriteFCmpSizes.PD.Scl, sse_load_f64>,
-                       XD, VEX_4V;
-  let Constraints = "$src1 = $dst" in {
-    let ExeDomain = SSEPackedSingle in
-    defm CMPSS  : sse12_cmp_scalar_int<ssmem, SSECC, int_x86_sse_cmp_ss,
-                         "cmp${cc}ss\t{$src, $dst|$dst, $src}",
-                         SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS;
-    let ExeDomain = SSEPackedDouble in
-    defm CMPSD  : sse12_cmp_scalar_int<sdmem, SSECC, int_x86_sse2_cmp_sd,
-                         "cmp${cc}sd\t{$src, $dst|$dst, $src}",
-                         SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD;
-}
+  defm CMPSD  : sse12_cmp_scalar_int<sdmem, int_x86_sse2_cmp_sd,
+                       "cmpsd\t{$cc, $src, $dst|$dst, $src, $cc}",
+                       SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD;
 }
 
 
@@ -1962,14 +1834,14 @@ let Defs = [EFLAGS] in {
 
   let isCodeGenOnly = 1 in {
     defm VUCOMISS  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
-                      sse_load_f32, "ucomiss", WriteFCom>, PS, VEX, VEX_WIG;
+                      sse_load_f32, "ucomiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG;
     defm VUCOMISD  : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
-                      sse_load_f64, "ucomisd", WriteFCom>, PD, VEX, VEX_WIG;
+                      sse_load_f64, "ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG;
 
     defm VCOMISS  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
-                       sse_load_f32, "comiss", WriteFCom>, PS, VEX, VEX_WIG;
+                       sse_load_f32, "comiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG;
     defm VCOMISD  : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
-                       sse_load_f64, "comisd", WriteFCom>, PD, VEX, VEX_WIG;
+                       sse_load_f64, "comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG;
   }
   defm UCOMISS  : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
                                   "ucomiss", WriteFCom>, PS;
@@ -1998,56 +1870,38 @@ let Defs = [EFLAGS] in {
 
 // sse12_cmp_packed - sse 1 & 2 compare packed instructions
 multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
-                            Operand CC,  ValueType VT, string asm,
-                            string asm_alt, X86FoldableSchedWrite sched,
+                            ValueType VT, string asm,
+                            X86FoldableSchedWrite sched,
                             Domain d, PatFrag ld_frag> {
   let isCommutable = 1 in
   def rri : PIi8<0xC2, MRMSrcReg,
-             (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
+             (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
              [(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, imm:$cc)))], d>,
             Sched<[sched]>;
   def rmi : PIi8<0xC2, MRMSrcMem,
-             (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
+             (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
              [(set RC:$dst,
                (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), imm:$cc)))], d>,
             Sched<[sched.Folded, sched.ReadAfterFold]>;
-
-  // Accept explicit immediate argument form instead of comparison code.
-  let isAsmParserOnly = 1, hasSideEffects = 0 in {
-    def rri_alt : PIi8<0xC2, MRMSrcReg,
-               (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc),
-               asm_alt, [], d>, Sched<[sched]>, NotMemoryFoldable;
-    let mayLoad = 1 in
-    def rmi_alt : PIi8<0xC2, MRMSrcMem,
-               (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc),
-               asm_alt, [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>,
-               NotMemoryFoldable;
-  }
 }
 
-defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, v4f32,
-               "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+defm VCMPPS : sse12_cmp_packed<VR128, f128mem, v4f32,
                "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
                SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG;
-defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, v2f64,
-               "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+defm VCMPPD : sse12_cmp_packed<VR128, f128mem, v2f64,
                "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
                SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG;
-defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, v8f32,
-               "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, v8f32,
                "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
                SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, VEX_WIG;
-defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, v4f64,
-               "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, v4f64,
                "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
                SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, VEX_WIG;
 let Constraints = "$src1 = $dst" in {
-  defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, v4f32,
-                 "cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
+  defm CMPPS : sse12_cmp_packed<VR128, f128mem, v4f32,
                  "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
                  SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, memopv4f32>, PS;
-  defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, v2f64,
-                 "cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
+  defm CMPPD : sse12_cmp_packed<VR128, f128mem, v2f64,
                  "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
                  SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD;
 }
@@ -2111,12 +1965,14 @@ let Predicates = [UseSSE1] in {
 /// sse12_shuffle - sse 1 & 2 fp shuffle instructions
 multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
                          ValueType vt, string asm, PatFrag mem_frag,
-                         X86FoldableSchedWrite sched, Domain d> {
+                         X86FoldableSchedWrite sched, Domain d,
+                         bit IsCommutable = 0> {
   def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
                    (ins RC:$src1, x86memop:$src2, u8imm:$src3), asm,
                    [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
                                        (i8 imm:$src3))))], d>,
             Sched<[sched.Folded, sched.ReadAfterFold]>;
+  let isCommutable = IsCommutable in
   def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
                  (ins RC:$src1, RC:$src2, u8imm:$src3), asm,
                  [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
@@ -2148,7 +2004,7 @@ let Constraints = "$src1 = $dst" in {
                     memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
   defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
                     "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                    memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>, PD;
+                    memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2238,6 +2094,13 @@ let Predicates = [HasAVX1Only] in {
             (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
 }
 
+let Predicates = [UseSSE2] in {
+  // Use MOVHPD if the load isn't aligned enough for UNPCKLPD.
+  def : Pat<(v2f64 (X86Unpckl VR128:$src1,
+                              (v2f64 (nonvolatile_load addr:$src2)))),
+            (MOVHPDrm VR128:$src1, addr:$src2)>;
+}
+
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Extract Floating-Point Sign mask
 //===----------------------------------------------------------------------===//
@@ -2523,99 +2386,6 @@ let Predicates = [HasAVX1Only] in {
             (VANDNPSYrm VR256:$src1, addr:$src2)>;
 }
 
-let Predicates = [HasAVX, NoVLX_Or_NoDQI] in {
-  // Use packed logical operations for scalar ops.
-  def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)),
-            (COPY_TO_REGCLASS
-             (v2f64 (VANDPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
-                              (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
-             FR64)>;
-  def : Pat<(f64 (X86for FR64:$src1, FR64:$src2)),
-            (COPY_TO_REGCLASS
-             (v2f64 (VORPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
-                             (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
-             FR64)>;
-  def : Pat<(f64 (X86fxor FR64:$src1, FR64:$src2)),
-            (COPY_TO_REGCLASS
-             (v2f64 (VXORPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
-                              (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
-             FR64)>;
-  def : Pat<(f64 (X86fandn FR64:$src1, FR64:$src2)),
-            (COPY_TO_REGCLASS
-             (v2f64 (VANDNPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
-                               (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
-             FR64)>;
-
-  def : Pat<(f32 (X86fand FR32:$src1, FR32:$src2)),
-            (COPY_TO_REGCLASS
-             (v4f32 (VANDPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
-                              (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
-             FR32)>;
-  def : Pat<(f32 (X86for FR32:$src1, FR32:$src2)),
-            (COPY_TO_REGCLASS
-             (v4f32 (VORPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
-                             (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
-             FR32)>;
-  def : Pat<(f32 (X86fxor FR32:$src1, FR32:$src2)),
-            (COPY_TO_REGCLASS
-             (v4f32 (VXORPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
-                              (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
-             FR32)>;
-  def : Pat<(f32 (X86fandn FR32:$src1, FR32:$src2)),
-            (COPY_TO_REGCLASS
-             (v4f32 (VANDNPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
-                               (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
-             FR32)>;
-}
-
-let Predicates = [UseSSE1] in {
-  // Use packed logical operations for scalar ops.
-  def : Pat<(f32 (X86fand FR32:$src1, FR32:$src2)),
-            (COPY_TO_REGCLASS
-             (v4f32 (ANDPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
-                             (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
-             FR32)>;
-  def : Pat<(f32 (X86for FR32:$src1, FR32:$src2)),
-            (COPY_TO_REGCLASS
-             (v4f32 (ORPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
-                            (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
-             FR32)>;
-  def : Pat<(f32 (X86fxor FR32:$src1, FR32:$src2)),
-            (COPY_TO_REGCLASS
-             (v4f32 (XORPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
-                             (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
-             FR32)>;
-  def : Pat<(f32 (X86fandn FR32:$src1, FR32:$src2)),
-            (COPY_TO_REGCLASS
-             (v4f32 (ANDNPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
-                              (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
-             FR32)>;
-}
-
-let Predicates = [UseSSE2] in {
-  // Use packed logical operations for scalar ops.
-  def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)),
-            (COPY_TO_REGCLASS
-             (v2f64 (ANDPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
-                             (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
-             FR64)>;
-  def : Pat<(f64 (X86for FR64:$src1, FR64:$src2)),
-            (COPY_TO_REGCLASS
-             (v2f64 (ORPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
-                            (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
-             FR64)>;
-  def : Pat<(f64 (X86fxor FR64:$src1, FR64:$src2)),
-            (COPY_TO_REGCLASS
-             (v2f64 (XORPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
-                             (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
-             FR64)>;
-  def : Pat<(f64 (X86fandn FR64:$src1, FR64:$src2)),
-            (COPY_TO_REGCLASS
-             (v2f64 (ANDNPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
-                              (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
-             FR64)>;
-}
-
 let Predicates = [HasAVX, NoVLX] in {
   def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
             (VPANDrr VR128:$src1, VR128:$src2)>;
@@ -2908,7 +2678,8 @@ let isCodeGenOnly = 1 in {
 // patterns we have to try to match.
 multiclass scalar_math_patterns<SDNode Op, string OpcPrefix, SDNode Move,
                                     ValueType VT, ValueType EltTy,
-                                    RegisterClass RC, Predicate BasePredicate> {
+                                    RegisterClass RC, PatFrag ld_frag,
+                                    Predicate BasePredicate> {
   let Predicates = [BasePredicate] in {
     // extracted scalar math op with insert via movss/movsd
     def : Pat<(VT (Move (VT VR128:$dst),
@@ -2917,6 +2688,11 @@ multiclass scalar_math_patterns<SDNode Op, string OpcPrefix, SDNode Move,
                                  RC:$src))))),
               (!cast<Instruction>(OpcPrefix#rr_Int) VT:$dst,
                (VT (COPY_TO_REGCLASS RC:$src, VR128)))>;
+    def : Pat<(VT (Move (VT VR128:$dst),
+                        (VT (scalar_to_vector
+                             (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
+                                 (ld_frag addr:$src)))))),
+              (!cast<Instruction>(OpcPrefix#rm_Int) VT:$dst, addr:$src)>;
   }
 
   // Repeat for AVX versions of the instructions.
@@ -2928,18 +2704,23 @@ multiclass scalar_math_patterns<SDNode Op, string OpcPrefix, SDNode Move,
                                  RC:$src))))),
               (!cast<Instruction>("V"#OpcPrefix#rr_Int) VT:$dst,
                (VT (COPY_TO_REGCLASS RC:$src, VR128)))>;
+    def : Pat<(VT (Move (VT VR128:$dst),
+                        (VT (scalar_to_vector
+                             (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
+                                 (ld_frag addr:$src)))))),
+              (!cast<Instruction>("V"#OpcPrefix#rm_Int) VT:$dst, addr:$src)>;
   }
 }
 
-defm : scalar_math_patterns<fadd, "ADDSS", X86Movss, v4f32, f32, FR32, UseSSE1>;
-defm : scalar_math_patterns<fsub, "SUBSS", X86Movss, v4f32, f32, FR32, UseSSE1>;
-defm : scalar_math_patterns<fmul, "MULSS", X86Movss, v4f32, f32, FR32, UseSSE1>;
-defm : scalar_math_patterns<fdiv, "DIVSS", X86Movss, v4f32, f32, FR32, UseSSE1>;
+defm : scalar_math_patterns<fadd, "ADDSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
+defm : scalar_math_patterns<fsub, "SUBSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
+defm : scalar_math_patterns<fmul, "MULSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
+defm : scalar_math_patterns<fdiv, "DIVSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
 
-defm : scalar_math_patterns<fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, UseSSE2>;
-defm : scalar_math_patterns<fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, UseSSE2>;
-defm : scalar_math_patterns<fmul, "MULSD", X86Movsd, v2f64, f64, FR64, UseSSE2>;
-defm : scalar_math_patterns<fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, UseSSE2>;
+defm : scalar_math_patterns<fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
+defm : scalar_math_patterns<fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
+defm : scalar_math_patterns<fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
+defm : scalar_math_patterns<fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
  
 /// Unop Arithmetic
 /// In addition, we also have a special variant of the scalar form here to
@@ -2956,7 +2737,7 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
                           ValueType ScalarVT, X86MemOperand x86memop,
                           Operand intmemop, SDNode OpNode, Domain d,
                           X86FoldableSchedWrite sched, Predicate target> {
-  let hasSideEffects = 0 in {
+  let isCodeGenOnly = 1, hasSideEffects = 0 in {
   def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1),
               !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
             [(set RC:$dst, (OpNode RC:$src1))], d>, Sched<[sched]>,
@@ -2967,8 +2748,9 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
             [(set RC:$dst, (OpNode (load addr:$src1)))], d>,
             Sched<[sched.Folded]>,
             Requires<[target, OptForSize]>;
+  }
 
-  let isCodeGenOnly = 1, Constraints = "$src1 = $dst", ExeDomain = d in {
+  let hasSideEffects = 0, Constraints = "$src1 = $dst", ExeDomain = d in {
   def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>,
                 Sched<[sched]>;
@@ -2977,7 +2759,6 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>,
                 Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
-  }
 
 }
 
@@ -3022,7 +2803,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
                           ValueType ScalarVT, X86MemOperand x86memop,
                           Operand intmemop, SDNode OpNode, Domain d,
                           X86FoldableSchedWrite sched, Predicate target> {
-  let hasSideEffects = 0 in {
+  let isCodeGenOnly = 1, hasSideEffects = 0 in {
   def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
             [], d>, Sched<[sched]>;
@@ -3030,7 +2811,8 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
   def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
             [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>;
-  let isCodeGenOnly = 1, ExeDomain = d in {
+  }
+  let hasSideEffects = 0, ExeDomain = d in {
   def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),
                 (ins VR128:$src1, VR128:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -3041,7 +2823,6 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
-  }
 
   // We don't want to fold scalar loads into these instructions unless
   // optimizing for size. This is because the folded instruction will have a
@@ -3197,23 +2978,6 @@ multiclass scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix, SDNode Mo
   }
 }
 
-multiclass scalar_unary_math_imm_patterns<SDNode OpNode, string OpcPrefix, SDNode Move,
-                                          ValueType VT, bits<8> ImmV,
-                                          Predicate BasePredicate> {
-  let Predicates = [BasePredicate] in {
-    def : Pat<(VT (Move VT:$dst, (scalar_to_vector
-                                  (OpNode (extractelt VT:$src, 0))))),
-              (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src, (i32 ImmV))>;
-  }
-
-  // Repeat for AVX versions of the instructions.
-  let Predicates = [UseAVX] in {
-    def : Pat<(VT (Move VT:$dst, (scalar_to_vector
-                                  (OpNode (extractelt VT:$src, 0))))),
-              (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src, (i32 ImmV))>;
-  }
-}
-
 defm : scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>;
 defm : scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>;
 
@@ -3388,16 +3152,20 @@ def : Pat<(X86MFence), (MFENCE)>;
 // SSE 1 & 2 - Load/Store XCSR register
 //===----------------------------------------------------------------------===//
 
+let mayLoad=1, hasSideEffects=1 in
 def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
                "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>,
                VEX, Sched<[WriteLDMXCSR]>, VEX_WIG;
+let mayStore=1, hasSideEffects=1 in
 def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
                "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>,
                VEX, Sched<[WriteSTMXCSR]>, VEX_WIG;
 
+let mayLoad=1, hasSideEffects=1 in
 def LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src),
               "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>,
               TB, Sched<[WriteLDMXCSR]>;
+let mayStore=1, hasSideEffects=1 in
 def STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst),
               "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>,
               TB, Sched<[WriteSTMXCSR]>;
@@ -3529,17 +3297,6 @@ def MOVDQUmr :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
 
 } // ExeDomain = SSEPackedInt
 
-// Aliases to help the assembler pick two byte VEX encodings by swapping the
-// operands relative to the normal instructions to use VEX.R instead of VEX.B.
-def : InstAlias<"vmovdqa\t{$src, $dst|$dst, $src}",
-                (VMOVDQArr_REV VR128L:$dst, VR128H:$src), 0>;
-def : InstAlias<"vmovdqa\t{$src, $dst|$dst, $src}",
-                (VMOVDQAYrr_REV VR256L:$dst, VR256H:$src), 0>;
-def : InstAlias<"vmovdqu\t{$src, $dst|$dst, $src}",
-                (VMOVDQUrr_REV VR128L:$dst, VR128H:$src), 0>;
-def : InstAlias<"vmovdqu\t{$src, $dst|$dst, $src}",
-                (VMOVDQUYrr_REV VR256L:$dst, VR256H:$src), 0>;
-
 // Reversed version with ".s" suffix for GAS compatibility.
 def : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}",
                 (VMOVDQArr_REV VR128:$dst, VR128:$src), 0>;
@@ -4118,7 +3875,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
            "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
        [(set VR128:$dst,
          (X86pinsrw VR128:$src1, GR32orGR64:$src2, imm:$src3))]>,
-       Sched<[WriteVecInsert]>;
+       Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
   def rm : Ii8<0xC4, MRMSrcMem,
                       (outs VR128:$dst), (ins VR128:$src1,
                        i16mem:$src2, u8imm:$src3),
@@ -4138,7 +3895,7 @@ def VPEXTRWrr : Ii8<0xC5, MRMSrcReg,
                     "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
                                             imm:$src2))]>,
-                PD, VEX, Sched<[WriteVecExtract]>;
+                PD, VEX, VEX_WIG, Sched<[WriteVecExtract]>;
 def PEXTRWrr : PDIi8<0xC5, MRMSrcReg,
                     (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
                     "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -4148,7 +3905,7 @@ def PEXTRWrr : PDIi8<0xC5, MRMSrcReg,
 
 // Insert
 let Predicates = [HasAVX, NoBWI] in
-defm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V;
+defm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V, VEX_WIG;
 
 let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in
 defm PINSRW : sse2_pinsrw, PD;
@@ -4279,19 +4036,11 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
                         [(set FR32:$dst, (bitconvert GR32:$src))]>,
                         VEX, Sched<[WriteVecMoveFromGpr]>;
 
-  def VMOVDI2SSrm  : VS2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
-                        "movd\t{$src, $dst|$dst, $src}",
-                        [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
-                        VEX, Sched<[WriteVecLoad]>;
   def MOVDI2SSrr  : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set FR32:$dst, (bitconvert GR32:$src))]>,
                         Sched<[WriteVecMoveFromGpr]>;
 
-  def MOVDI2SSrm  : S2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
-                        "movd\t{$src, $dst|$dst, $src}",
-                        [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
-                        Sched<[WriteVecLoad]>;
 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
 
 //===---------------------------------------------------------------------===//
@@ -4353,32 +4102,15 @@ def MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
 // Bitcast FR64 <-> GR64
 //
 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
-  let Predicates = [UseAVX] in
-  def VMOV64toSDrm : VS2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
-                          "movq\t{$src, $dst|$dst, $src}",
-                          [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
-                          VEX, Sched<[WriteVecLoad]>;
   def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
                            "movq\t{$src, $dst|$dst, $src}",
                            [(set GR64:$dst, (bitconvert FR64:$src))]>,
                            VEX, Sched<[WriteVecMoveToGpr]>;
-  def VMOVSDto64mr : VRS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
-                           "movq\t{$src, $dst|$dst, $src}",
-                           [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>,
-                           VEX, Sched<[WriteVecStore]>;
 
-  def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
-                         "movq\t{$src, $dst|$dst, $src}",
-                         [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
-                         Sched<[WriteVecLoad]>;
   def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
                          "movq\t{$src, $dst|$dst, $src}",
                          [(set GR64:$dst, (bitconvert FR64:$src))]>,
                          Sched<[WriteVecMoveToGpr]>;
-  def MOVSDto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
-                         "movq\t{$src, $dst|$dst, $src}",
-                         [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>,
-                         Sched<[WriteVecStore]>;
 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
 
 //===---------------------------------------------------------------------===//
@@ -4389,18 +4121,10 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set GR32:$dst, (bitconvert FR32:$src))]>,
                         VEX, Sched<[WriteVecMoveToGpr]>;
-  def VMOVSS2DImr  : VS2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
-                        "movd\t{$src, $dst|$dst, $src}",
-                        [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>,
-                        VEX, Sched<[WriteVecStore]>;
   def MOVSS2DIrr  : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set GR32:$dst, (bitconvert FR32:$src))]>,
                         Sched<[WriteVecMoveToGpr]>;
-  def MOVSS2DImr  : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
-                        "movd\t{$src, $dst|$dst, $src}",
-                        [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>,
-                        Sched<[WriteVecStore]>;
 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
 
 let Predicates = [UseAVX] in {
@@ -4410,28 +4134,14 @@ let Predicates = [UseAVX] in {
   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
             (VMOV64toPQIrr GR64:$src)>;
 
-  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
-              (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
-            (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIrr GR64:$src)), sub_xmm)>;
   // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
   // These instructions also write zeros in the high part of a 256-bit register.
   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
             (VMOVDI2PDIrm addr:$src)>;
-  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
+  def : Pat<(v4i32 (X86vzload32 addr:$src)),
             (VMOVDI2PDIrm addr:$src)>;
-  def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
-            (VMOVDI2PDIrm addr:$src)>;
-  def : Pat<(v4i32 (X86vzload addr:$src)),
-            (VMOVDI2PDIrm addr:$src)>;
-  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
-              (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
-            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIrm addr:$src)), sub_xmm)>;
-  def : Pat<(v8i32 (X86vzload addr:$src)),
+  def : Pat<(v8i32 (X86vzload32 addr:$src)),
             (SUBREG_TO_REG (i64 0), (v4i32 (VMOVDI2PDIrm addr:$src)), sub_xmm)>;
-  // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
-  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
-                               (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
-            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIrr GR32:$src)), sub_xmm)>;
 }
 
 let Predicates = [UseSSE2] in {
@@ -4442,11 +4152,7 @@ let Predicates = [UseSSE2] in {
             (MOV64toPQIrr GR64:$src)>;
   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
             (MOVDI2PDIrm addr:$src)>;
-  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
-            (MOVDI2PDIrm addr:$src)>;
-  def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
-            (MOVDI2PDIrm addr:$src)>;
-  def : Pat<(v4i32 (X86vzload addr:$src)),
+  def : Pat<(v4i32 (X86vzload32 addr:$src)),
             (MOVDI2PDIrm addr:$src)>;
 }
 
@@ -4508,32 +4214,26 @@ def MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
                       "movq\t{$src, $dst|$dst, $src}", []>;
 }
 
-// Aliases to help the assembler pick two byte VEX encodings by swapping the
-// operands relative to the normal instructions to use VEX.R instead of VEX.B.
-def : InstAlias<"vmovq\t{$src, $dst|$dst, $src}",
-                (VMOVPQI2QIrr VR128L:$dst, VR128H:$src), 0>;
-
 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
                 (VMOVPQI2QIrr VR128:$dst, VR128:$src), 0>;
 def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}",
                 (MOVPQI2QIrr VR128:$dst, VR128:$src), 0>;
 
 let Predicates = [UseAVX] in {
-  def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+  def : Pat<(v2i64 (X86vzload64 addr:$src)),
             (VMOVQI2PQIrm addr:$src)>;
-  def : Pat<(v2i64 (X86vzload addr:$src)),
-            (VMOVQI2PQIrm addr:$src)>;
-  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
-              (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
-            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIrm addr:$src)), sub_xmm)>;
-  def : Pat<(v4i64 (X86vzload addr:$src)),
+  def : Pat<(v4i64 (X86vzload64 addr:$src)),
             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIrm addr:$src)), sub_xmm)>;
+
+  def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst),
+            (VMOVPQI2QImr addr:$dst, VR128:$src)>;
 }
 
 let Predicates = [UseSSE2] in {
-  def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
-            (MOVQI2PQIrm addr:$src)>;
-  def : Pat<(v2i64 (X86vzload addr:$src)), (MOVQI2PQIrm addr:$src)>;
+  def : Pat<(v2i64 (X86vzload64 addr:$src)), (MOVQI2PQIrm addr:$src)>;
+
+  def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst),
+            (MOVPQI2QImr addr:$dst, VR128:$src)>;
 }
 
 //===---------------------------------------------------------------------===//
@@ -4560,6 +4260,19 @@ let Predicates = [UseSSE2] in {
             (MOVZPQILo2PQIrr VR128:$src)>;
 }
 
+let Predicates = [UseAVX] in {
+  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2f64 (VMOVZPQILo2PQIrr
+                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))),
+             sub_xmm)>;
+  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2i64 (VMOVZPQILo2PQIrr
+                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))),
+             sub_xmm)>;
+}
+
 //===---------------------------------------------------------------------===//
 // SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
 //===---------------------------------------------------------------------===//
@@ -4667,17 +4380,17 @@ defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>;
 
 
 let Predicates = [HasAVX, NoVLX] in {
-  def : Pat<(X86Movddup (loadv2f64 addr:$src)),
+  def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))),
             (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-  def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))),
+  def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
             (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
 }
 
 let Predicates = [UseSSE3] in {
   // No need for aligned memory as this only loads 64-bits.
-  def : Pat<(X86Movddup (loadv2f64 addr:$src)),
+  def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))),
             (MOVDDUPrm addr:$src)>;
-  def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))),
+  def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
             (MOVDDUPrm addr:$src)>;
 }
 
@@ -5130,15 +4843,12 @@ let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
 //===---------------------------------------------------------------------===//
 
 let SchedRW = [WriteSystem] in {
-let usesCustomInserter = 1 in {
-def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
-                [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
-                Requires<[HasSSE3]>;
-}
-
 let Uses = [EAX, ECX, EDX] in
-def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>,
-                   TB, Requires<[HasSSE3]>;
+def MONITOR32rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>,
+                     TB, Requires<[HasSSE3, Not64BitMode]>;
+let Uses = [RAX, ECX, EDX] in
+def MONITOR64rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>,
+                     TB, Requires<[HasSSE3, In64BitMode]>;
 
 let Uses = [ECX, EAX] in
 def MWAITrr   : I<0x01, MRM_C9, (outs), (ins), "mwait",
@@ -5148,13 +4858,14 @@ def MWAITrr   : I<0x01, MRM_C9, (outs), (ins), "mwait",
 def : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[Not64BitMode]>;
 def : InstAlias<"mwait\t{%rax, %rcx|rcx, rax}", (MWAITrr)>, Requires<[In64BitMode]>;
 
-def : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORrrr)>,
+def : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITOR32rrr)>,
       Requires<[Not64BitMode]>;
-def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORrrr)>,
+def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITOR64rrr)>,
       Requires<[In64BitMode]>;
 
 //===----------------------------------------------------------------------===//
 // SSE4.1 - Packed Move with Sign/Zero Extend
+// NOTE: Any Extend is promoted to Zero Extend in X86ISelDAGToDAG.cpp
 //===----------------------------------------------------------------------===//
 
 multiclass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,
@@ -5202,71 +4913,38 @@ defm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem, NoVLX>;
 
 defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem, NoVLX>;
 
-// Patterns that we also need for any_extend.
-// Any_extend_vector_inreg is currently legalized to zero_extend_vector_inreg.
-multiclass SS41I_pmovx_avx2_patterns_base<string OpcPrefix, SDNode ExtOp> {
-  // Register-Register patterns
-  let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
-    def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
-              (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
-  }
-
-  let Predicates = [HasAVX2, NoVLX] in {
-    def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))),
-              (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>;
-
-    def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))),
-              (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;
-  }
-
-  // AVX2 Register-Memory patterns
-  let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
-    def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
-              (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
-    def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
-              (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
-    def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
-              (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
-  }
-
-  let Predicates = [HasAVX2, NoVLX] in {
-    def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
-              (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
-    def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
-              (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
-    def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
-              (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
-
-    def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
-              (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
-    def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
-              (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
-    def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
-              (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
-  }
-}
-
 // AVX2 Patterns
 multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
-                                     SDNode ExtOp, SDNode InVecOp> :
-    SS41I_pmovx_avx2_patterns_base<OpcPrefix, ExtOp> {
-
+                                     SDNode ExtOp, SDNode InVecOp> {
   // Register-Register patterns
+  let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
+  def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
+            (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
+  }
   let Predicates = [HasAVX2, NoVLX] in {
   def : Pat<(v8i32 (InVecOp (v16i8 VR128:$src))),
             (!cast<I>(OpcPrefix#BDYrr) VR128:$src)>;
   def : Pat<(v4i64 (InVecOp (v16i8 VR128:$src))),
             (!cast<I>(OpcPrefix#BQYrr) VR128:$src)>;
 
+  def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))),
+            (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>;
   def : Pat<(v4i64 (InVecOp (v8i16 VR128:$src))),
             (!cast<I>(OpcPrefix#WQYrr) VR128:$src)>;
+
+  def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))),
+            (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;
   }
 
   // Simple Register-Memory patterns
   let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
   def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
             (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
+
+  def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
+            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
   }
+
   let Predicates = [HasAVX2, NoVLX] in {
   def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
             (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
@@ -5284,38 +4962,31 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
 
   // AVX2 Register-Memory patterns
   let Predicates = [HasAVX2, NoVLX] in {
+  def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
+            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
+
   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
-  def : Pat<(v8i32 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
-  def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
-  def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))),
+  def : Pat<(v8i32 (InVecOp (v16i8 (X86vzload64 addr:$src)))),
             (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
 
+  def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
+            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
+
   def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
             (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
-  def : Pat<(v4i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
-            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
-  def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
-  def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))),
+  def : Pat<(v4i64 (InVecOp (v16i8 (X86vzload64 addr:$src)))),
             (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
 
   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
-  def : Pat<(v4i64 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
-  def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
-  def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))),
+  def : Pat<(v4i64 (InVecOp (v8i16 (X86vzload64 addr:$src)))),
             (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
   }
 }
 
 defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", sext, sext_invec>;
 defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", zext, zext_invec>;
-defm : SS41I_pmovx_avx2_patterns_base<"VPMOVZX", anyext>;
 
 // SSE4.1/AVX patterns.
 multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
@@ -5361,9 +5032,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
             (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
   def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
             (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
-  def : Pat<(v8i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
-  def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
             (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
   def : Pat<(v8i16 (ExtOp (loadv16i8 addr:$src))),
             (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
@@ -5371,19 +5040,13 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
   let Predicates = [HasAVX, NoVLX] in {
   def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
             (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
-  def : Pat<(v4i32 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
-            (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
-  def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
             (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
   def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))),
             (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
 
   def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
             (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
-  def : Pat<(v2i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
-            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
-  def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
   def : Pat<(v2i64 (ExtOp (loadv16i8 addr:$src))),
             (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
 
@@ -5391,18 +5054,14 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
             (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
   def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
             (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
-  def : Pat<(v4i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
-  def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
             (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
   def : Pat<(v4i32 (ExtOp (loadv8i16 addr:$src))),
             (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
 
   def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
             (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
-  def : Pat<(v2i64 (ExtOp (v8i16 (vzmovl_v4i32 addr:$src)))),
-            (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
-  def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
             (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
   def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))),
             (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
@@ -5411,9 +5070,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
             (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
   def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
             (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
-  def : Pat<(v2i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
-  def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
             (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
   def : Pat<(v2i64 (ExtOp (loadv4i32 addr:$src))),
             (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
@@ -5451,7 +5108,7 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
 }
 
 let Predicates = [HasAVX, NoBWI] in
-  defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
+  defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX, VEX_WIG;
 
 defm PEXTRB      : SS41I_extract8<0x14, "pextrb">;
 
@@ -5475,7 +5132,7 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
 }
 
 let Predicates = [HasAVX, NoBWI] in
-  defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
+  defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX, VEX_WIG;
 
 defm PEXTRW      : SS41I_extract16<0x15, "pextrw">;
 
@@ -5548,18 +5205,6 @@ let ExeDomain = SSEPackedSingle in {
   defm EXTRACTPS   : SS41I_extractf32<0x17, "extractps">;
 }
 
-// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
-def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
-                                              imm:$src2))),
-                 addr:$dst),
-          (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
-          Requires<[HasAVX]>;
-def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
-                                              imm:$src2))),
-                 addr:$dst),
-          (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
-          Requires<[UseSSE41]>;
-
 //===----------------------------------------------------------------------===//
 // SSE4.1 - Insert Instructions
 //===----------------------------------------------------------------------===//
@@ -5573,7 +5218,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
       [(set VR128:$dst,
         (X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>,
-      Sched<[WriteVecInsert]>;
+      Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
   def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
       (ins VR128:$src1, i8mem:$src2, u8imm:$src3),
       !if(Is2Addr,
@@ -5586,7 +5231,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
 }
 
 let Predicates = [HasAVX, NoBWI] in
-  defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
+  defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V, VEX_WIG;
 let Constraints = "$src1 = $dst" in
   defm PINSRB  : SS41I_insert8<0x20, "pinsrb">;
 
@@ -5599,7 +5244,7 @@ multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
       [(set VR128:$dst,
         (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
-      Sched<[WriteVecInsert]>;
+      Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
   def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
       (ins VR128:$src1, i32mem:$src2, u8imm:$src3),
       !if(Is2Addr,
@@ -5625,7 +5270,7 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
       [(set VR128:$dst,
         (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
-      Sched<[WriteVecInsert]>;
+      Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
   def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
       (ins VR128:$src1, i64mem:$src2, u8imm:$src3),
       !if(Is2Addr,
@@ -5647,6 +5292,7 @@ let Constraints = "$src1 = $dst" in
 // vector. The next one matches the intrinsic and could zero arbitrary elements
 // in the target vector.
 multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
+  let isCommutable = 1 in
   def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
       (ins VR128:$src1, VR128:$src2, u8imm:$src3),
       !if(Is2Addr,
@@ -5853,7 +5499,7 @@ let Predicates = [HasAVX, NoVLX] in {
                                    VEX, VEX_L, VEX_WIG;
   }
 }
-let Predicates = [HasAVX, NoAVX512] in {
+let Predicates = [UseAVX] in {
   defm VROUND  : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl,
                                   v4f32, v2f64, X86RndScales, 0>,
                                   VEX_4V, VEX_LIG, VEX_WIG;
@@ -5862,141 +5508,17 @@ let Predicates = [HasAVX, NoAVX512] in {
 }
 
 let Predicates = [UseAVX] in {
-  def : Pat<(ffloor FR32:$src),
-            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x9))>;
-  def : Pat<(f32 (fnearbyint FR32:$src)),
-            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
-  def : Pat<(f32 (fceil FR32:$src)),
-            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xA))>;
-  def : Pat<(f32 (frint FR32:$src)),
-            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
-  def : Pat<(f32 (ftrunc FR32:$src)),
-            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xB))>;
-
-  def : Pat<(f64 (ffloor FR64:$src)),
-            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x9))>;
-  def : Pat<(f64 (fnearbyint FR64:$src)),
-            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
-  def : Pat<(f64 (fceil FR64:$src)),
-            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xA))>;
-  def : Pat<(f64 (frint FR64:$src)),
-            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
-  def : Pat<(f64 (ftrunc FR64:$src)),
-            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xB))>;
+  def : Pat<(X86VRndScale FR32:$src1, imm:$src2),
+            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, imm:$src2)>;
+  def : Pat<(X86VRndScale FR64:$src1, imm:$src2),
+            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, imm:$src2)>;
 }
 
 let Predicates = [UseAVX, OptForSize] in {
-  def : Pat<(ffloor (loadf32 addr:$src)),
-            (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src, (i32 0x9))>;
-  def : Pat<(f32 (fnearbyint (loadf32 addr:$src))),
-            (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src, (i32 0xC))>;
-  def : Pat<(f32 (fceil (loadf32 addr:$src))),
-            (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src, (i32 0xA))>;
-  def : Pat<(f32 (frint (loadf32 addr:$src))),
-            (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src, (i32 0x4))>;
-  def : Pat<(f32 (ftrunc (loadf32 addr:$src))),
-            (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src, (i32 0xB))>;
-
-  def : Pat<(f64 (ffloor (loadf64 addr:$src))),
-            (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src, (i32 0x9))>;
-  def : Pat<(f64 (fnearbyint (loadf64 addr:$src))),
-            (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src, (i32 0xC))>;
-  def : Pat<(f64 (fceil (loadf64 addr:$src))),
-            (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src, (i32 0xA))>;
-  def : Pat<(f64 (frint (loadf64 addr:$src))),
-            (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src, (i32 0x4))>;
-  def : Pat<(f64 (ftrunc (loadf64 addr:$src))),
-            (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src, (i32 0xB))>;
-}
-
-let Predicates = [HasAVX, NoVLX] in {
-  def : Pat<(v4f32 (ffloor VR128:$src)),
-            (VROUNDPSr VR128:$src, (i32 0x9))>;
-  def : Pat<(v4f32 (fnearbyint VR128:$src)),
-            (VROUNDPSr VR128:$src, (i32 0xC))>;
-  def : Pat<(v4f32 (fceil VR128:$src)),
-            (VROUNDPSr VR128:$src, (i32 0xA))>;
-  def : Pat<(v4f32 (frint VR128:$src)),
-            (VROUNDPSr VR128:$src, (i32 0x4))>;
-  def : Pat<(v4f32 (ftrunc VR128:$src)),
-            (VROUNDPSr VR128:$src, (i32 0xB))>;
-
-  def : Pat<(v4f32 (ffloor (loadv4f32 addr:$src))),
-            (VROUNDPSm addr:$src, (i32 0x9))>;
-  def : Pat<(v4f32 (fnearbyint (loadv4f32 addr:$src))),
-            (VROUNDPSm addr:$src, (i32 0xC))>;
-  def : Pat<(v4f32 (fceil (loadv4f32 addr:$src))),
-            (VROUNDPSm addr:$src, (i32 0xA))>;
-  def : Pat<(v4f32 (frint (loadv4f32 addr:$src))),
-            (VROUNDPSm addr:$src, (i32 0x4))>;
-  def : Pat<(v4f32 (ftrunc (loadv4f32 addr:$src))),
-            (VROUNDPSm addr:$src, (i32 0xB))>;
-
-  def : Pat<(v2f64 (ffloor VR128:$src)),
-            (VROUNDPDr VR128:$src, (i32 0x9))>;
-  def : Pat<(v2f64 (fnearbyint VR128:$src)),
-            (VROUNDPDr VR128:$src, (i32 0xC))>;
-  def : Pat<(v2f64 (fceil VR128:$src)),
-            (VROUNDPDr VR128:$src, (i32 0xA))>;
-  def : Pat<(v2f64 (frint VR128:$src)),
-            (VROUNDPDr VR128:$src, (i32 0x4))>;
-  def : Pat<(v2f64 (ftrunc VR128:$src)),
-            (VROUNDPDr VR128:$src, (i32 0xB))>;
-
-  def : Pat<(v2f64 (ffloor (loadv2f64 addr:$src))),
-            (VROUNDPDm addr:$src, (i32 0x9))>;
-  def : Pat<(v2f64 (fnearbyint (loadv2f64 addr:$src))),
-            (VROUNDPDm addr:$src, (i32 0xC))>;
-  def : Pat<(v2f64 (fceil (loadv2f64 addr:$src))),
-            (VROUNDPDm addr:$src, (i32 0xA))>;
-  def : Pat<(v2f64 (frint (loadv2f64 addr:$src))),
-            (VROUNDPDm addr:$src, (i32 0x4))>;
-  def : Pat<(v2f64 (ftrunc (loadv2f64 addr:$src))),
-            (VROUNDPDm addr:$src, (i32 0xB))>;
-
-  def : Pat<(v8f32 (ffloor VR256:$src)),
-            (VROUNDPSYr VR256:$src, (i32 0x9))>;
-  def : Pat<(v8f32 (fnearbyint VR256:$src)),
-            (VROUNDPSYr VR256:$src, (i32 0xC))>;
-  def : Pat<(v8f32 (fceil VR256:$src)),
-            (VROUNDPSYr VR256:$src, (i32 0xA))>;
-  def : Pat<(v8f32 (frint VR256:$src)),
-            (VROUNDPSYr VR256:$src, (i32 0x4))>;
-  def : Pat<(v8f32 (ftrunc VR256:$src)),
-            (VROUNDPSYr VR256:$src, (i32 0xB))>;
-
-  def : Pat<(v8f32 (ffloor (loadv8f32 addr:$src))),
-            (VROUNDPSYm addr:$src, (i32 0x9))>;
-  def : Pat<(v8f32 (fnearbyint (loadv8f32 addr:$src))),
-            (VROUNDPSYm addr:$src, (i32 0xC))>;
-  def : Pat<(v8f32 (fceil (loadv8f32 addr:$src))),
-            (VROUNDPSYm addr:$src, (i32 0xA))>;
-  def : Pat<(v8f32 (frint (loadv8f32 addr:$src))),
-            (VROUNDPSYm addr:$src, (i32 0x4))>;
-  def : Pat<(v8f32 (ftrunc (loadv8f32 addr:$src))),
-            (VROUNDPSYm addr:$src, (i32 0xB))>;
-
-  def : Pat<(v4f64 (ffloor VR256:$src)),
-            (VROUNDPDYr VR256:$src, (i32 0x9))>;
-  def : Pat<(v4f64 (fnearbyint VR256:$src)),
-            (VROUNDPDYr VR256:$src, (i32 0xC))>;
-  def : Pat<(v4f64 (fceil VR256:$src)),
-            (VROUNDPDYr VR256:$src, (i32 0xA))>;
-  def : Pat<(v4f64 (frint VR256:$src)),
-            (VROUNDPDYr VR256:$src, (i32 0x4))>;
-  def : Pat<(v4f64 (ftrunc VR256:$src)),
-            (VROUNDPDYr VR256:$src, (i32 0xB))>;
-
-  def : Pat<(v4f64 (ffloor (loadv4f64 addr:$src))),
-            (VROUNDPDYm addr:$src, (i32 0x9))>;
-  def : Pat<(v4f64 (fnearbyint (loadv4f64 addr:$src))),
-            (VROUNDPDYm addr:$src, (i32 0xC))>;
-  def : Pat<(v4f64 (fceil (loadv4f64 addr:$src))),
-            (VROUNDPDYm addr:$src, (i32 0xA))>;
-  def : Pat<(v4f64 (frint (loadv4f64 addr:$src))),
-            (VROUNDPDYm addr:$src, (i32 0x4))>;
-  def : Pat<(v4f64 (ftrunc (loadv4f64 addr:$src))),
-            (VROUNDPDYm addr:$src, (i32 0xB))>;
+  def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2),
+            (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>;
+  def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2),
+            (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>;
 }
 
 let ExeDomain = SSEPackedSingle in
@@ -6013,108 +5535,19 @@ defm ROUND  : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl,
                                v4f32, v2f64, X86RndScales>;
 
 let Predicates = [UseSSE41] in {
-  def : Pat<(ffloor FR32:$src),
-            (ROUNDSSr FR32:$src, (i32 0x9))>;
-  def : Pat<(f32 (fnearbyint FR32:$src)),
-            (ROUNDSSr FR32:$src, (i32 0xC))>;
-  def : Pat<(f32 (fceil FR32:$src)),
-            (ROUNDSSr FR32:$src, (i32 0xA))>;
-  def : Pat<(f32 (frint FR32:$src)),
-            (ROUNDSSr FR32:$src, (i32 0x4))>;
-  def : Pat<(f32 (ftrunc FR32:$src)),
-            (ROUNDSSr FR32:$src, (i32 0xB))>;
-
-  def : Pat<(f64 (ffloor FR64:$src)),
-            (ROUNDSDr FR64:$src, (i32 0x9))>;
-  def : Pat<(f64 (fnearbyint FR64:$src)),
-            (ROUNDSDr FR64:$src, (i32 0xC))>;
-  def : Pat<(f64 (fceil FR64:$src)),
-            (ROUNDSDr FR64:$src, (i32 0xA))>;
-  def : Pat<(f64 (frint FR64:$src)),
-            (ROUNDSDr FR64:$src, (i32 0x4))>;
-  def : Pat<(f64 (ftrunc FR64:$src)),
-            (ROUNDSDr FR64:$src, (i32 0xB))>;
+  def : Pat<(X86VRndScale FR32:$src1, imm:$src2),
+            (ROUNDSSr FR32:$src1, imm:$src2)>;
+  def : Pat<(X86VRndScale FR64:$src1, imm:$src2),
+            (ROUNDSDr FR64:$src1, imm:$src2)>;
 }
 
 let Predicates = [UseSSE41, OptForSize] in {
-  def : Pat<(ffloor (loadf32 addr:$src)),
-            (ROUNDSSm addr:$src, (i32 0x9))>;
-  def : Pat<(f32 (fnearbyint (loadf32 addr:$src))),
-            (ROUNDSSm addr:$src, (i32 0xC))>;
-  def : Pat<(f32 (fceil (loadf32 addr:$src))),
-            (ROUNDSSm addr:$src, (i32 0xA))>;
-  def : Pat<(f32 (frint (loadf32 addr:$src))),
-            (ROUNDSSm addr:$src, (i32 0x4))>;
-  def : Pat<(f32 (ftrunc (loadf32 addr:$src))),
-            (ROUNDSSm addr:$src, (i32 0xB))>;
-
-  def : Pat<(f64 (ffloor (loadf64 addr:$src))),
-            (ROUNDSDm addr:$src, (i32 0x9))>;
-  def : Pat<(f64 (fnearbyint (loadf64 addr:$src))),
-            (ROUNDSDm addr:$src, (i32 0xC))>;
-  def : Pat<(f64 (fceil (loadf64 addr:$src))),
-            (ROUNDSDm addr:$src, (i32 0xA))>;
-  def : Pat<(f64 (frint (loadf64 addr:$src))),
-            (ROUNDSDm addr:$src, (i32 0x4))>;
-  def : Pat<(f64 (ftrunc (loadf64 addr:$src))),
-            (ROUNDSDm addr:$src, (i32 0xB))>;
+  def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2),
+            (ROUNDSSm addr:$src1, imm:$src2)>;
+  def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2),
+            (ROUNDSDm addr:$src1, imm:$src2)>;
 }
 
-let Predicates = [UseSSE41] in {
-  def : Pat<(v4f32 (ffloor VR128:$src)),
-            (ROUNDPSr VR128:$src, (i32 0x9))>;
-  def : Pat<(v4f32 (fnearbyint VR128:$src)),
-            (ROUNDPSr VR128:$src, (i32 0xC))>;
-  def : Pat<(v4f32 (fceil VR128:$src)),
-            (ROUNDPSr VR128:$src, (i32 0xA))>;
-  def : Pat<(v4f32 (frint VR128:$src)),
-            (ROUNDPSr VR128:$src, (i32 0x4))>;
-  def : Pat<(v4f32 (ftrunc VR128:$src)),
-            (ROUNDPSr VR128:$src, (i32 0xB))>;
-
-  def : Pat<(v4f32 (ffloor (memopv4f32 addr:$src))),
-            (ROUNDPSm addr:$src, (i32 0x9))>;
-  def : Pat<(v4f32 (fnearbyint (memopv4f32 addr:$src))),
-            (ROUNDPSm addr:$src, (i32 0xC))>;
-  def : Pat<(v4f32 (fceil (memopv4f32 addr:$src))),
-            (ROUNDPSm addr:$src, (i32 0xA))>;
-  def : Pat<(v4f32 (frint (memopv4f32 addr:$src))),
-            (ROUNDPSm addr:$src, (i32 0x4))>;
-  def : Pat<(v4f32 (ftrunc (memopv4f32 addr:$src))),
-            (ROUNDPSm addr:$src, (i32 0xB))>;
-
-  def : Pat<(v2f64 (ffloor VR128:$src)),
-            (ROUNDPDr VR128:$src, (i32 0x9))>;
-  def : Pat<(v2f64 (fnearbyint VR128:$src)),
-            (ROUNDPDr VR128:$src, (i32 0xC))>;
-  def : Pat<(v2f64 (fceil VR128:$src)),
-            (ROUNDPDr VR128:$src, (i32 0xA))>;
-  def : Pat<(v2f64 (frint VR128:$src)),
-            (ROUNDPDr VR128:$src, (i32 0x4))>;
-  def : Pat<(v2f64 (ftrunc VR128:$src)),
-            (ROUNDPDr VR128:$src, (i32 0xB))>;
-
-  def : Pat<(v2f64 (ffloor (memopv2f64 addr:$src))),
-            (ROUNDPDm addr:$src, (i32 0x9))>;
-  def : Pat<(v2f64 (fnearbyint (memopv2f64 addr:$src))),
-            (ROUNDPDm addr:$src, (i32 0xC))>;
-  def : Pat<(v2f64 (fceil (memopv2f64 addr:$src))),
-            (ROUNDPDm addr:$src, (i32 0xA))>;
-  def : Pat<(v2f64 (frint (memopv2f64 addr:$src))),
-            (ROUNDPDm addr:$src, (i32 0x4))>;
-  def : Pat<(v2f64 (ftrunc (memopv2f64 addr:$src))),
-            (ROUNDPDm addr:$src, (i32 0xB))>;
-}
-
-defm : scalar_unary_math_imm_patterns<ffloor, "ROUNDSS", X86Movss,
-                                      v4f32, 0x01, UseSSE41>;
-defm : scalar_unary_math_imm_patterns<fceil, "ROUNDSS", X86Movss,
-                                      v4f32, 0x02, UseSSE41>;
-defm : scalar_unary_math_imm_patterns<ffloor, "ROUNDSD", X86Movsd,
-                                      v2f64, 0x01, UseSSE41>;
-defm : scalar_unary_math_imm_patterns<fceil, "ROUNDSD", X86Movsd,
-                                      v2f64, 0x02, UseSSE41>;
-
 //===----------------------------------------------------------------------===//
 // SSE4.1 - Packed Bit Test
 //===----------------------------------------------------------------------===//
@@ -6449,6 +5882,72 @@ def BlendCommuteImm8 : SDNodeXForm<imm, [{
   return getI8Imm(Imm ^ 0xff, SDLoc(N));
 }]>;
 
+// Turn a 4-bit blendi immediate to 8-bit for use with pblendw.
+def BlendScaleImm4 : SDNodeXForm<imm, [{
+  uint8_t Imm = N->getZExtValue();
+  uint8_t NewImm = 0;
+  for (unsigned i = 0; i != 4; ++i) {
+    if (Imm & (1 << i))
+      NewImm |= 0x3 << (i * 2);
+  }
+  return getI8Imm(NewImm, SDLoc(N));
+}]>;
+
+// Turn a 2-bit blendi immediate to 8-bit for use with pblendw.
+def BlendScaleImm2 : SDNodeXForm<imm, [{
+  uint8_t Imm = N->getZExtValue();
+  uint8_t NewImm = 0;
+  for (unsigned i = 0; i != 2; ++i) {
+    if (Imm & (1 << i))
+      NewImm |= 0xf << (i * 4);
+  }
+  return getI8Imm(NewImm, SDLoc(N));
+}]>;
+
+// Turn a 2-bit blendi immediate to 4-bit for use with pblendd.
+def BlendScaleImm2to4 : SDNodeXForm<imm, [{
+  uint8_t Imm = N->getZExtValue();
+  uint8_t NewImm = 0;
+  for (unsigned i = 0; i != 2; ++i) {
+    if (Imm & (1 << i))
+      NewImm |= 0x3 << (i * 2);
+  }
+  return getI8Imm(NewImm, SDLoc(N));
+}]>;
+
+// Turn a 4-bit blendi immediate to 8-bit for use with pblendw and invert it.
+def BlendScaleCommuteImm4 : SDNodeXForm<imm, [{
+  uint8_t Imm = N->getZExtValue();
+  uint8_t NewImm = 0;
+  for (unsigned i = 0; i != 4; ++i) {
+    if (Imm & (1 << i))
+      NewImm |= 0x3 << (i * 2);
+  }
+  return getI8Imm(NewImm ^ 0xff, SDLoc(N));
+}]>;
+
+// Turn a 2-bit blendi immediate to 8-bit for use with pblendw and invert it.
+def BlendScaleCommuteImm2 : SDNodeXForm<imm, [{
+  uint8_t Imm = N->getZExtValue();
+  uint8_t NewImm = 0;
+  for (unsigned i = 0; i != 2; ++i) {
+    if (Imm & (1 << i))
+      NewImm |= 0xf << (i * 4);
+  }
+  return getI8Imm(NewImm ^ 0xff, SDLoc(N));
+}]>;
+
+// Turn a 2-bit blendi immediate to 4-bit for use with pblendd and invert it.
+def BlendScaleCommuteImm2to4 : SDNodeXForm<imm, [{
+  uint8_t Imm = N->getZExtValue();
+  uint8_t NewImm = 0;
+  for (unsigned i = 0; i != 2; ++i) {
+    if (Imm & (1 << i))
+      NewImm |= 0x3 << (i * 2);
+  }
+  return getI8Imm(NewImm ^ 0xf, SDLoc(N));
+}]>;
+
 let Predicates = [HasAVX] in {
   let isCommutable = 0 in {
     defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
@@ -6559,6 +6058,42 @@ let Predicates = [HasAVX2] in {
                                    VEX_4V, VEX_L, VEX_WIG;
 }
 
+// Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw.
+// ExecutionDomainFixPass will cleanup domains later on.
+let Predicates = [HasAVX1Only] in {
+def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3),
+          (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3),
+          (VBLENDPDYrmi VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3),
+          (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 imm:$src3))>;
+
+// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
+// it from becoming movsd via commuting under optsize.
+def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
+          (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>;
+def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3),
+          (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>;
+def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3),
+          (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>;
+
+def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), imm:$src3),
+          (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), imm:$src3),
+          (VBLENDPSYrmi VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, imm:$src3),
+          (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 imm:$src3))>;
+
+// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
+// it from becoming movss via commuting under optsize.
+def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3),
+          (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>;
+def : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), imm:$src3),
+          (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
+def : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, imm:$src3),
+          (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
+}
+
 defm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32,
                                VR128, memop, f128mem, 1, SSEPackedSingle,
                                SchedWriteFBlend.XMM, BlendCommuteImm4>;
@@ -6569,6 +6104,24 @@ defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16,
                                VR128, memop, i128mem, 1, SSEPackedInt,
                                SchedWriteBlend.XMM, BlendCommuteImm8>;
 
+let Predicates = [UseSSE41] in {
+// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
+// it from becoming movss via commuting under optsize.
+def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
+          (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>;
+def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), imm:$src3),
+          (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>;
+def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, imm:$src3),
+          (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>;
+
+def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3),
+          (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>;
+def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), imm:$src3),
+          (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
+def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, imm:$src3),
+          (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
+}
+
 // For insertion into the zero index (low half) of a 256-bit vector, it is
 // more efficient to generate a blend with immediate instead of an insert*128.
 let Predicates = [HasAVX] in {
@@ -6580,18 +6133,25 @@ def : Pat<(insert_subvector (v8f32 VR256:$src1), (v4f32 VR128:$src2), (iPTR 0)),
           (VBLENDPSYrri VR256:$src1,
                         (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
                                        VR128:$src2, sub_xmm), 0xf)>;
+
+def : Pat<(insert_subvector (loadv4f64 addr:$src2), (v2f64 VR128:$src1), (iPTR 0)),
+          (VBLENDPDYrmi (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
+                                       VR128:$src1, sub_xmm), addr:$src2, 0xc)>;
+def : Pat<(insert_subvector (loadv8f32 addr:$src2), (v4f32 VR128:$src1), (iPTR 0)),
+          (VBLENDPSYrmi (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
+                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
 }
 
-/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
-multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
-                                    RegisterClass RC, X86MemOperand x86memop,
-                                    PatFrag mem_frag, Intrinsic IntId,
-                                    X86FoldableSchedWrite sched> {
+/// SS41I_quaternary_vx - AVX SSE 4.1 with 4 operators
+multiclass SS41I_quaternary_avx<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                                X86MemOperand x86memop, ValueType VT,
+                                PatFrag mem_frag, SDNode OpNode,
+                                X86FoldableSchedWrite sched> {
   def rr : Ii8Reg<opc, MRMSrcReg, (outs RC:$dst),
                   (ins RC:$src1, RC:$src2, RC:$src3),
                   !strconcat(OpcodeStr,
                     "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-                  [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
+                  [(set RC:$dst, (VT (OpNode RC:$src3, RC:$src2, RC:$src1)))],
                   SSEPackedInt>, TAPD, VEX_4V,
                 Sched<[sched]>;
 
@@ -6600,8 +6160,8 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
                   !strconcat(OpcodeStr,
                     "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
                   [(set RC:$dst,
-                        (IntId RC:$src1, (mem_frag addr:$src2),
-                               RC:$src3))], SSEPackedInt>, TAPD, VEX_4V,
+                        (OpNode RC:$src3, (mem_frag addr:$src2),
+                                RC:$src1))], SSEPackedInt>, TAPD, VEX_4V,
                 Sched<[sched.Folded, sched.ReadAfterFold,
                        // x86memop:$src2
                        ReadDefault, ReadDefault, ReadDefault, ReadDefault,
@@ -6612,68 +6172,47 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
 
 let Predicates = [HasAVX] in {
 let ExeDomain = SSEPackedDouble in {
-defm VBLENDVPD  : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem,
-                                           load, int_x86_sse41_blendvpd,
-                                           SchedWriteFVarBlend.XMM>;
-defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem,
-                                  loadv4f64, int_x86_avx_blendv_pd_256,
-                                  SchedWriteFVarBlend.YMM>, VEX_L;
+defm VBLENDVPD  : SS41I_quaternary_avx<0x4B, "vblendvpd", VR128, f128mem,
+                                       v2f64, loadv2f64, X86Blendv,
+                                       SchedWriteFVarBlend.XMM>;
+defm VBLENDVPDY : SS41I_quaternary_avx<0x4B, "vblendvpd", VR256, f256mem,
+                                       v4f64, loadv4f64, X86Blendv,
+                                       SchedWriteFVarBlend.YMM>, VEX_L;
 } // ExeDomain = SSEPackedDouble
 let ExeDomain = SSEPackedSingle in {
-defm VBLENDVPS  : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem,
-                                           load, int_x86_sse41_blendvps,
-                                           SchedWriteFVarBlend.XMM>;
-defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem,
-                                  loadv8f32, int_x86_avx_blendv_ps_256,
-                                  SchedWriteFVarBlend.YMM>, VEX_L;
+defm VBLENDVPS  : SS41I_quaternary_avx<0x4A, "vblendvps", VR128, f128mem,
+                                       v4f32, loadv4f32, X86Blendv,
+                                       SchedWriteFVarBlend.XMM>;
+defm VBLENDVPSY : SS41I_quaternary_avx<0x4A, "vblendvps", VR256, f256mem,
+                                       v8f32, loadv8f32, X86Blendv,
+                                       SchedWriteFVarBlend.YMM>, VEX_L;
 } // ExeDomain = SSEPackedSingle
-defm VPBLENDVB  : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
-                                           load, int_x86_sse41_pblendvb,
-                                           SchedWriteVarBlend.XMM>;
+defm VPBLENDVB  : SS41I_quaternary_avx<0x4C, "vpblendvb", VR128, i128mem,
+                                       v16i8, loadv16i8, X86Blendv,
+                                       SchedWriteVarBlend.XMM>;
 }
 
 let Predicates = [HasAVX2] in {
-defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem,
-                                      load, int_x86_avx2_pblendvb,
-                                      SchedWriteVarBlend.YMM>, VEX_L;
+defm VPBLENDVBY : SS41I_quaternary_avx<0x4C, "vpblendvb", VR256, i256mem,
+                                       v32i8, loadv32i8, X86Blendv,
+                                       SchedWriteVarBlend.YMM>, VEX_L;
 }
 
 let Predicates = [HasAVX] in {
-  def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1),
-                            (v16i8 VR128:$src2))),
-            (VPBLENDVBrr VR128:$src2, VR128:$src1, VR128:$mask)>;
-  def : Pat<(v4i32 (vselect (v4i32 VR128:$mask), (v4i32 VR128:$src1),
-                            (v4i32 VR128:$src2))),
+  def : Pat<(v4i32 (X86Blendv (v4i32 VR128:$mask), (v4i32 VR128:$src1),
+                              (v4i32 VR128:$src2))),
             (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
-  def : Pat<(v4f32 (vselect (v4i32 VR128:$mask), (v4f32 VR128:$src1),
-                            (v4f32 VR128:$src2))),
-            (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
-  def : Pat<(v2i64 (vselect (v2i64 VR128:$mask), (v2i64 VR128:$src1),
-                            (v2i64 VR128:$src2))),
-            (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
-  def : Pat<(v2f64 (vselect (v2i64 VR128:$mask), (v2f64 VR128:$src1),
-                            (v2f64 VR128:$src2))),
+  def : Pat<(v2i64 (X86Blendv (v2i64 VR128:$mask), (v2i64 VR128:$src1),
+                              (v2i64 VR128:$src2))),
             (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
-  def : Pat<(v8i32 (vselect (v8i32 VR256:$mask), (v8i32 VR256:$src1),
-                            (v8i32 VR256:$src2))),
+  def : Pat<(v8i32 (X86Blendv (v8i32 VR256:$mask), (v8i32 VR256:$src1),
+                              (v8i32 VR256:$src2))),
             (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
-  def : Pat<(v8f32 (vselect (v8i32 VR256:$mask), (v8f32 VR256:$src1),
-                            (v8f32 VR256:$src2))),
-            (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
-  def : Pat<(v4i64 (vselect (v4i64 VR256:$mask), (v4i64 VR256:$src1),
-                            (v4i64 VR256:$src2))),
-            (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
-  def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
-                            (v4f64 VR256:$src2))),
+  def : Pat<(v4i64 (X86Blendv (v4i64 VR256:$mask), (v4i64 VR256:$src1),
+                              (v4i64 VR256:$src2))),
             (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
 }
 
-let Predicates = [HasAVX2] in {
-  def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
-                            (v32i8 VR256:$src2))),
-            (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
-}
-
 // Prefer a movss or movsd over a blendps when optimizing for size. these were
 // changed to use blends because blends have better throughput on sandybridge
 // and haswell, but movs[s/d] are 1-2 byte shorter instructions.
@@ -6708,17 +6247,6 @@ let Predicates = [HasAVX, OptForSpeed] in {
              (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
                           (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)),
                           (i8 3))), sub_xmm)>;
-
-  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2f64 (VBLENDPDrri (v2f64 (V_SET0)),
-                          (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)),
-                          (i8 1))), sub_xmm)>;
-  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2i64 (VPBLENDWrri (v2i64 (V_SET0)),
-                          (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)),
-                          (i8 0xf))), sub_xmm)>;
 }
 
 // Prefer a movss or movsd over a blendps when optimizing for size. these were
@@ -6747,16 +6275,17 @@ let Predicates = [UseSSE41, OptForSpeed] in {
 }
 
 
-/// SS41I_ternary_int - SSE 4.1 ternary operator
+/// SS41I_ternary - SSE 4.1 ternary operator
 let Uses = [XMM0], Constraints = "$src1 = $dst" in {
-  multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
-                               X86MemOperand x86memop, Intrinsic IntId,
-                               X86FoldableSchedWrite sched> {
+  multiclass SS41I_ternary<bits<8> opc, string OpcodeStr, ValueType VT,
+                           PatFrag mem_frag, X86MemOperand x86memop,
+                           SDNode OpNode, X86FoldableSchedWrite sched> {
     def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
                     (ins VR128:$src1, VR128:$src2),
                     !strconcat(OpcodeStr,
                      "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
-                    [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
+                    [(set VR128:$dst,
+                      (VT (OpNode XMM0, VR128:$src2, VR128:$src1)))]>,
                     Sched<[sched]>;
 
     def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
@@ -6764,20 +6293,19 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
                     !strconcat(OpcodeStr,
                      "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
                     [(set VR128:$dst,
-                      (IntId VR128:$src1,
-                       (mem_frag addr:$src2), XMM0))]>,
+                      (OpNode XMM0, (mem_frag addr:$src2), VR128:$src1))]>,
                     Sched<[sched.Folded, sched.ReadAfterFold]>;
   }
 }
 
 let ExeDomain = SSEPackedDouble in
-defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memop, f128mem,
-                                  int_x86_sse41_blendvpd, SchedWriteFVarBlend.XMM>;
+defm BLENDVPD : SS41I_ternary<0x15, "blendvpd", v2f64, memopv2f64, f128mem,
+                              X86Blendv, SchedWriteFVarBlend.XMM>;
 let ExeDomain = SSEPackedSingle in
-defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memop, f128mem,
-                                  int_x86_sse41_blendvps, SchedWriteFVarBlend.XMM>;
-defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memop, i128mem,
-                                  int_x86_sse41_pblendvb, SchedWriteVarBlend.XMM>;
+defm BLENDVPS : SS41I_ternary<0x14, "blendvps", v4f32, memopv4f32, f128mem,
+                              X86Blendv, SchedWriteFVarBlend.XMM>;
+defm PBLENDVB : SS41I_ternary<0x10, "pblendvb", v16i8, memopv16i8, i128mem,
+                              X86Blendv, SchedWriteVarBlend.XMM>;
 
 // Aliases with the implicit xmm0 argument
 def : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}",
@@ -6794,20 +6322,11 @@ def : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}",
                 (PBLENDVBrm0 VR128:$dst, i128mem:$src2), 0>;
 
 let Predicates = [UseSSE41] in {
-  def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1),
-                            (v16i8 VR128:$src2))),
-            (PBLENDVBrr0 VR128:$src2, VR128:$src1)>;
-  def : Pat<(v4i32 (vselect (v4i32 XMM0), (v4i32 VR128:$src1),
-                            (v4i32 VR128:$src2))),
+  def : Pat<(v4i32 (X86Blendv (v4i32 XMM0), (v4i32 VR128:$src1),
+                              (v4i32 VR128:$src2))),
             (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
-  def : Pat<(v4f32 (vselect (v4i32 XMM0), (v4f32 VR128:$src1),
-                            (v4f32 VR128:$src2))),
-            (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
-  def : Pat<(v2i64 (vselect (v2i64 XMM0), (v2i64 VR128:$src1),
-                            (v2i64 VR128:$src2))),
-            (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
-  def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
-                            (v2f64 VR128:$src2))),
+  def : Pat<(v2i64 (X86Blendv (v2i64 XMM0), (v2i64 VR128:$src1),
+                              (v2i64 VR128:$src2))),
             (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
 }
 
@@ -7451,17 +6970,6 @@ def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst),
                            "vbroadcastf128\t{$src, $dst|$dst, $src}", []>,
                            Sched<[SchedWriteFShuffle.XMM.Folded]>, VEX, VEX_L;
 
-let Predicates = [HasAVX2, NoVLX] in {
-def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
-          (VBROADCASTI128 addr:$src)>;
-def : Pat<(v8i32 (X86SubVBroadcast (loadv4i32 addr:$src))),
-          (VBROADCASTI128 addr:$src)>;
-def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
-          (VBROADCASTI128 addr:$src)>;
-def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
-          (VBROADCASTI128 addr:$src)>;
-}
-
 let Predicates = [HasAVX, NoVLX] in {
 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
           (VBROADCASTF128 addr:$src)>;
@@ -7469,7 +6977,9 @@ def : Pat<(v8f32 (X86SubVBroadcast (loadv4f32 addr:$src))),
           (VBROADCASTF128 addr:$src)>;
 }
 
-let Predicates = [HasAVX1Only] in {
+// NOTE: We're using FP instructions here, but execution domain fixing can
+// convert to integer when profitable.
+let Predicates = [HasAVX, NoVLX] in {
 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
           (VBROADCASTF128 addr:$src)>;
 def : Pat<(v8i32 (X86SubVBroadcast (loadv4i32 addr:$src))),
@@ -7765,12 +7275,10 @@ let Predicates = [HasF16C, NoVLX] in {
                                WriteCvtPS2PHYSt>, VEX_L;
 
   // Pattern match vcvtph2ps of a scalar i64 load.
-  def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
-            (VCVTPH2PSrm addr:$src)>;
-  def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
             (VCVTPH2PSrm addr:$src)>;
-  def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
-              (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+  def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16
+              (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (VCVTPH2PSrm addr:$src)>;
 
   def : Pat<(store (f64 (extractelt
@@ -7835,6 +7343,7 @@ multiclass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                             (commuteXForm imm:$src3))>;
 }
 
+let Predicates = [HasAVX2] in {
 defm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32,
                                SchedWriteBlend.XMM, VR128, i128mem,
                                BlendCommuteImm4>;
@@ -7842,28 +7351,26 @@ defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32,
                                 SchedWriteBlend.YMM, VR256, i256mem,
                                 BlendCommuteImm8>, VEX_L;
 
-// For insertion into the zero index (low half) of a 256-bit vector, it is
-// more efficient to generate a blend with immediate instead of an insert*128.
-let Predicates = [HasAVX2] in {
-def : Pat<(insert_subvector (v8i32 VR256:$src1), (v4i32 VR128:$src2), (iPTR 0)),
-          (VPBLENDDYrri VR256:$src1,
-                        (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
-                                       VR128:$src2, sub_xmm), 0xf)>;
-def : Pat<(insert_subvector (v4i64 VR256:$src1), (v2i64 VR128:$src2), (iPTR 0)),
-          (VPBLENDDYrri VR256:$src1,
-                        (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
-                                       VR128:$src2, sub_xmm), 0xf)>;
-def : Pat<(insert_subvector (v16i16 VR256:$src1), (v8i16 VR128:$src2), (iPTR 0)),
-          (VPBLENDDYrri VR256:$src1,
-                        (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
-                                       VR128:$src2, sub_xmm), 0xf)>;
-def : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)),
-          (VPBLENDDYrri VR256:$src1,
-                        (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
-                                       VR128:$src2, sub_xmm), 0xf)>;
+def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3),
+          (VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 imm:$src3))>;
+def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3),
+          (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
+def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3),
+          (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
+
+def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
+          (VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 imm:$src3))>;
+def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3),
+          (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 imm:$src3))>;
+def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3),
+          (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 imm:$src3))>;
 }
 
-let Predicates = [HasAVX1Only] in {
+// For insertion into the zero index (low half) of a 256-bit vector, it is
+// more efficient to generate a blend with immediate instead of an insert*128.
+// NOTE: We're using FP instructions here, but exeuction domain fixing should
+// take care of using integer instructions when profitable.
+let Predicates = [HasAVX] in {
 def : Pat<(insert_subvector (v8i32 VR256:$src1), (v4i32 VR128:$src2), (iPTR 0)),
           (VBLENDPSYrri VR256:$src1,
                         (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
@@ -7880,6 +7387,19 @@ def : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)),
           (VBLENDPSYrri VR256:$src1,
                         (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
                                        VR128:$src2, sub_xmm), 0xf)>;
+
+def : Pat<(insert_subvector (loadv8i32 addr:$src2), (v4i32 VR128:$src1), (iPTR 0)),
+          (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
+def : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0)),
+          (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
+def : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)),
+          (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
+def : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)),
+          (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+                                       VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -7930,9 +7450,9 @@ defm VPBROADCASTQ  : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
 
 let Predicates = [HasAVX2, NoVLX] in {
   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
-  def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+  def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
             (VPBROADCASTQrm addr:$src)>;
-  def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
+  def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
             (VPBROADCASTQYrm addr:$src)>;
 
   def : Pat<(v4i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
@@ -7951,9 +7471,15 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
             (VPBROADCASTWrm addr:$src)>;
   def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
             (VPBROADCASTWYrm addr:$src)>;
+  def : Pat<(v8i16 (X86VBroadcast
+              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
+            (VPBROADCASTWrm addr:$src)>;
   def : Pat<(v8i16 (X86VBroadcast
               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
             (VPBROADCASTWrm addr:$src)>;
+  def : Pat<(v16i16 (X86VBroadcast
+              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
+            (VPBROADCASTWYrm addr:$src)>;
   def : Pat<(v16i16 (X86VBroadcast
               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
             (VPBROADCASTWYrm addr:$src)>;
@@ -8038,7 +7564,7 @@ let Predicates = [HasAVX, NoVLX] in {
             (VMOVDDUPrr VR128:$src)>;
   def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
             (VMOVDDUPrm addr:$src)>;
-  def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))),
+  def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
             (VMOVDDUPrm addr:$src)>;
 }
 
@@ -8236,19 +7762,14 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
 multiclass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT,
                           ValueType MaskVT, string BlendStr, ValueType ZeroVT> {
     // masked store
-    def: Pat<(X86mstore (VT RC:$src), addr:$ptr, (MaskVT RC:$mask)),
+    def: Pat<(masked_store (VT RC:$src), addr:$ptr, (MaskVT RC:$mask)),
              (!cast<Instruction>(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>;
     // masked load
-    def: Pat<(VT (X86mload addr:$ptr, (MaskVT RC:$mask), undef)),
+    def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), undef)),
              (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>;
-    def: Pat<(VT (X86mload addr:$ptr, (MaskVT RC:$mask),
-                              (VT (bitconvert (ZeroVT immAllZerosV))))),
+    def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask),
+                              (VT immAllZerosV))),
              (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>;
-    def: Pat<(VT (X86mload addr:$ptr, (MaskVT RC:$mask), (VT RC:$src0))),
-             (!cast<Instruction>(BlendStr#"rr")
-                 RC:$src0,
-                 (VT (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)),
-                 RC:$mask)>;
 }
 let Predicates = [HasAVX] in {
   defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32, "VBLENDVPS", v4i32>;
@@ -8275,21 +7796,6 @@ let Predicates = [HasAVX2] in {
 // Provide fallback in case the load node that is used in the patterns above
 // is used by additional users, which prevents the pattern selection.
 
-let Predicates = [HasAVX2, NoVLX] in {
-def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128:$src))),
-          (VINSERTI128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
-                         (v2i64 VR128:$src), 1)>;
-def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128:$src))),
-          (VINSERTI128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
-                         (v4i32 VR128:$src), 1)>;
-def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128:$src))),
-          (VINSERTI128rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
-                         (v8i16 VR128:$src), 1)>;
-def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128:$src))),
-          (VINSERTI128rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
-                         (v16i8 VR128:$src), 1)>;
-}
-
 let Predicates = [HasAVX, NoVLX] in {
 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128:$src))),
           (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
@@ -8299,7 +7805,9 @@ def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128:$src))),
                          (v4f32 VR128:$src), 1)>;
 }
 
-let Predicates = [HasAVX1Only] in {
+// NOTE: We're using FP instructions here, but execution domain fixing can
+// convert to integer when profitable.
+let Predicates = [HasAVX, NoVLX] in {
 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128:$src))),
           (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
                          (v2i64 VR128:$src), 1)>;
@@ -8350,20 +7858,11 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
 }
 
 let Predicates = [HasAVX2, NoVLX] in {
-  defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>;
-  defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
-  defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
-  defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
-  defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
-
-  def : Pat<(v4i32 (X86vsrav VR128:$src1, VR128:$src2)),
-            (VPSRAVDrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v4i32 (X86vsrav VR128:$src1, (load addr:$src2))),
-            (VPSRAVDrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v8i32 (X86vsrav VR256:$src1, VR256:$src2)),
-            (VPSRAVDYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v8i32 (X86vsrav VR256:$src1, (load addr:$src2))),
-            (VPSRAVDYrm VR256:$src1, addr:$src2)>;
+  defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", X86vshlv, v4i32, v8i32>;
+  defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", X86vshlv, v2i64, v4i64>, VEX_W;
+  defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", X86vsrlv, v4i32, v8i32>;
+  defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", X86vsrlv, v2i64, v4i64>, VEX_W;
+  defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", X86vsrav, v4i32, v8i32>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -8393,7 +7892,7 @@ multiclass avx2_gather<bits<8> opc, string OpcodeStr, ValueType VTx,
             VEX, VEX_L, Sched<[WriteLoad]>;
 }
 
-let Predicates = [UseAVX2] in {
+let Predicates = [HasAVX2] in {
   let mayLoad = 1, hasSideEffects = 0, Constraints
     = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
     in {
diff --git a/lib/Target/X86/X86InstrSVM.td b/lib/Target/X86/X86InstrSVM.td
index 2dc6e8b43667..82c8e74156b2 100644
--- a/lib/Target/X86/X86InstrSVM.td
+++ b/lib/Target/X86/X86InstrSVM.td
@@ -1,9 +1,8 @@
 //===-- X86InstrSVM.td - SVM Instruction Set Extension -----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td
index 7cd63a6dd820..9d974b716dda 100644
--- a/lib/Target/X86/X86InstrShiftRotate.td
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -1,9 +1,8 @@
 //===-- X86InstrShiftRotate.td - Shift and Rotate Instrs ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -31,11 +30,11 @@ def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
                   [(set GR64:$dst, (shl GR64:$src1, CL))]>;
 } // Uses = [CL], SchedRW
 
+let isConvertibleToThreeAddress = 1 in {   // Can transform into LEA.
 def SHL8ri   : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
                    "shl{b}\t{$src2, $dst|$dst, $src2}",
                    [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
 
-let isConvertibleToThreeAddress = 1 in {   // Can transform into LEA.
 def SHL16ri  : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
                    "shl{w}\t{$src2, $dst|$dst, $src2}",
                    [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>,
@@ -473,17 +472,19 @@ def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
 
 def ROL8ri   : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
                    "rol{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
+                   [(set GR8:$dst, (rotl GR8:$src1, (i8 relocImm:$src2)))]>;
 def ROL16ri  : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
                    "rol{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, OpSize16;
+                   [(set GR16:$dst, (rotl GR16:$src1, (i8 relocImm:$src2)))]>,
+                   OpSize16;
 def ROL32ri  : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
                    "rol{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>, OpSize32;
+                   [(set GR32:$dst, (rotl GR32:$src1, (i8 relocImm:$src2)))]>,
+                   OpSize32;
 def ROL64ri  : RIi8<0xC1, MRM0r, (outs GR64:$dst),
                     (ins GR64:$src1, u8imm:$src2),
                     "rol{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
+                    [(set GR64:$dst, (rotl GR64:$src1, (i8 relocImm:$src2)))]>;
 
 // Rotate by 1
 def ROL8r1   : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
@@ -586,16 +587,16 @@ def ROR64ri  : RIi8<0xC1, MRM1r, (outs GR64:$dst),
 // Rotate by 1
 def ROR8r1   : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "ror{b}\t$dst",
-                 [(set GR8:$dst, (rotl GR8:$src1, (i8 7)))]>;
+                 [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
 def ROR16r1  : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
                  "ror{w}\t$dst",
-                 [(set GR16:$dst, (rotl GR16:$src1, (i8 15)))]>, OpSize16;
+                 [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize16;
 def ROR32r1  : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
                  "ror{l}\t$dst",
-                 [(set GR32:$dst, (rotl GR32:$src1, (i8 31)))]>, OpSize32;
+                 [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>, OpSize32;
 def ROR64r1  : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
                   "ror{q}\t$dst",
-                  [(set GR64:$dst, (rotl GR64:$src1, (i8 63)))]>;
+                  [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
 } // Constraints = "$src = $dst", SchedRW
 
 let Uses = [CL], SchedRW = [WriteRotateCLLd, WriteRMW] in {
@@ -634,18 +635,18 @@ def ROR64mi  : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, u8imm:$src),
 // Rotate by 1
 def ROR8m1   : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
                  "ror{b}\t$dst",
-                 [(store (rotl (loadi8 addr:$dst), (i8 7)), addr:$dst)]>;
+                 [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
 def ROR16m1  : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
                  "ror{w}\t$dst",
-                 [(store (rotl (loadi16 addr:$dst), (i8 15)), addr:$dst)]>,
+                 [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
                  OpSize16;
 def ROR32m1  : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
                  "ror{l}\t$dst",
-                 [(store (rotl (loadi32 addr:$dst), (i8 31)), addr:$dst)]>,
+                 [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>,
                  OpSize32;
 def ROR64m1  : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
                  "ror{q}\t$dst",
-                 [(store (rotl (loadi64 addr:$dst), (i8 63)), addr:$dst)]>,
+                 [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>,
                  Requires<[In64BitMode]>;
 } // SchedRW
 
@@ -807,13 +808,54 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
 
 } // Defs = [EFLAGS]
 
+// Use the opposite rotate if allows us to use the rotate by 1 instruction.
+def : Pat<(rotl GR8:$src1,  (i8 7)),  (ROR8r1  GR8:$src1)>;
+def : Pat<(rotl GR16:$src1, (i8 15)), (ROR16r1 GR16:$src1)>;
+def : Pat<(rotl GR32:$src1, (i8 31)), (ROR32r1 GR32:$src1)>;
+def : Pat<(rotl GR64:$src1, (i8 63)), (ROR64r1 GR64:$src1)>;
+def : Pat<(rotr GR8:$src1,  (i8 7)),  (ROL8r1  GR8:$src1)>;
+def : Pat<(rotr GR16:$src1, (i8 15)), (ROL16r1 GR16:$src1)>;
+def : Pat<(rotr GR32:$src1, (i8 31)), (ROL32r1 GR32:$src1)>;
+def : Pat<(rotr GR64:$src1, (i8 63)), (ROL64r1 GR64:$src1)>;
+
+def : Pat<(store (rotl (loadi8 addr:$dst), (i8 7)), addr:$dst),
+          (ROR8m1 addr:$dst)>;
+def : Pat<(store (rotl (loadi16 addr:$dst), (i8 15)), addr:$dst),
+          (ROR16m1 addr:$dst)>;
+def : Pat<(store (rotl (loadi32 addr:$dst), (i8 31)), addr:$dst),
+          (ROR32m1 addr:$dst)>;
+def : Pat<(store (rotl (loadi64 addr:$dst), (i8 63)), addr:$dst),
+          (ROR64m1 addr:$dst)>, Requires<[In64BitMode]>;
+
+def : Pat<(store (rotr (loadi8 addr:$dst), (i8 7)), addr:$dst),
+          (ROL8m1 addr:$dst)>;
+def : Pat<(store (rotr (loadi16 addr:$dst), (i8 15)), addr:$dst),
+          (ROL16m1 addr:$dst)>;
+def : Pat<(store (rotr (loadi32 addr:$dst), (i8 31)), addr:$dst),
+          (ROL32m1 addr:$dst)>;
+def : Pat<(store (rotr (loadi64 addr:$dst), (i8 63)), addr:$dst),
+          (ROL64m1 addr:$dst)>, Requires<[In64BitMode]>;
+
 // Sandy Bridge and newer Intel processors support faster rotates using
 // SHLD to avoid a partial flag update on the normal rotate instructions.
-let Predicates = [HasFastSHLDRotate], AddedComplexity = 5 in {
-  def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
-            (SHLD32rri8 GR32:$src, GR32:$src, imm:$shamt)>;
-  def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
-            (SHLD64rri8 GR64:$src, GR64:$src, imm:$shamt)>;
+// Use a pseudo so that TwoInstructionPass and register allocation will see
+// this as unary instruction.
+let Predicates = [HasFastSHLDRotate], AddedComplexity = 5,
+    Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteSHDrri],
+    Constraints = "$src1 = $dst" in {
+  def SHLDROT32ri  : I<0, Pseudo, (outs GR32:$dst),
+                       (ins GR32:$src1, u8imm:$shamt), "",
+                     [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$shamt)))]>;
+  def SHLDROT64ri  : I<0, Pseudo, (outs GR64:$dst),
+                       (ins GR64:$src1, u8imm:$shamt), "",
+                     [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$shamt)))]>;
+
+  def SHRDROT32ri  : I<0, Pseudo, (outs GR32:$dst),
+                       (ins GR32:$src1, u8imm:$shamt), "",
+                     [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$shamt)))]>;
+  def SHRDROT64ri  : I<0, Pseudo, (outs GR64:$dst),
+                       (ins GR64:$src1, u8imm:$shamt), "",
+                     [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$shamt)))]>;
 }
 
 def ROT32L2R_imm8  : SDNodeXForm<imm, [{
@@ -871,19 +913,29 @@ let Predicates = [HasBMI2] in {
 
   // Prefer RORX which is non-destructive and doesn't update EFLAGS.
   let AddedComplexity = 10 in {
+    def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
+              (RORX32ri GR32:$src, imm:$shamt)>;
+    def : Pat<(rotr GR64:$src, (i8 imm:$shamt)),
+              (RORX64ri GR64:$src, imm:$shamt)>;
+
     def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
               (RORX32ri GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
     def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
               (RORX64ri GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
   }
 
+  def : Pat<(rotr (loadi32 addr:$src), (i8 imm:$shamt)),
+            (RORX32mi addr:$src, imm:$shamt)>;
+  def : Pat<(rotr (loadi64 addr:$src), (i8 imm:$shamt)),
+            (RORX64mi addr:$src, imm:$shamt)>;
+
   def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)),
             (RORX32mi addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
   def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)),
             (RORX64mi addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
 
   // Prefer SARX/SHRX/SHLX over SAR/SHR/SHL with variable shift BUT not
-  // immedidate shift, i.e. the following code is considered better
+  // immediate shift, i.e. the following code is considered better
   //
   //  mov %edi, %esi
   //  shl $imm, %esi
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 35ee00b9e016..7050e1917494 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -1,9 +1,8 @@
 //===-- X86InstrSystem.td - System Instructions ------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,10 +14,10 @@
 
 let SchedRW = [WriteSystem] in {
 let Defs = [RAX, RDX] in
-  def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB;
+def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", []>, TB;
 
 let Defs = [RAX, RCX, RDX] in
-  def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", [(X86rdtscp)]>, TB;
+def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
 
 // CPU flow control instructions
 
@@ -411,7 +410,7 @@ let Defs = [EAX, EDX], Uses = [ECX] in
 def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
 
 let Defs = [RAX, RDX], Uses = [ECX] in
-  def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [(X86rdpmc)]>, TB;
+def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB;
 
 def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
                 "smsw{w}\t$dst", []>, OpSize16, TB;
@@ -588,18 +587,13 @@ let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
 
 //==-----------------------------------------------------------------------===//
 // PKU  - enable protection key
-let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
-  def WRPKRU : PseudoI<(outs), (ins GR32:$src),
-                [(int_x86_wrpkru GR32:$src)]>;
-  def RDPKRU : PseudoI<(outs GR32:$dst), (ins),
-                [(set GR32:$dst, (int_x86_rdpkru))]>;
-}
-
 let SchedRW = [WriteSystem] in {
 let Defs = [EAX, EDX], Uses = [ECX] in
-  def RDPKRUr : I<0x01, MRM_EE, (outs), (ins), "rdpkru", []>, TB;
+  def RDPKRUr : I<0x01, MRM_EE, (outs), (ins), "rdpkru",
+                  [(set EAX, (X86rdpkru ECX)), (implicit EDX)]>, TB;
 let Uses = [EAX, ECX, EDX] in
-  def WRPKRUr : I<0x01, MRM_EF, (outs), (ins), "wrpkru", []>, TB;
+  def WRPKRUr : I<0x01, MRM_EF, (outs), (ins), "wrpkru",
+                  [(X86wrpkru EAX, EDX, ECX)]>, TB;
 } // SchedRW
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td
index 10c6eef78639..fc0da845299f 100644
--- a/lib/Target/X86/X86InstrTSX.td
+++ b/lib/Target/X86/X86InstrTSX.td
@@ -1,9 +1,8 @@
 //===-- X86InstrVMX.td - TSX Instruction Set Extension -----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td
index 06a438ebfcad..37bc4ce2e053 100644
--- a/lib/Target/X86/X86InstrVMX.td
+++ b/lib/Target/X86/X86InstrVMX.td
@@ -1,9 +1,8 @@
 //===-- X86InstrVMX.td - VMX Instruction Set Extension -----*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/X86/X86InstrVecCompiler.td b/lib/Target/X86/X86InstrVecCompiler.td
index c417dc99b84d..e98843bd3ae3 100644
--- a/lib/Target/X86/X86InstrVecCompiler.td
+++ b/lib/Target/X86/X86InstrVecCompiler.td
@@ -1,9 +1,8 @@
 //===- X86InstrVecCompiler.td - Vector Compiler Patterns ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -99,76 +98,6 @@ defm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>;
 defm : subvector_subreg_lowering<VR256, v32i8,  VR512, v64i8,  sub_ymm>;
 
 
-multiclass subvector_store_lowering<string AlignedStr, string UnalignedStr,
-                                    RegisterClass RC, ValueType DstTy,
-                                    ValueType SrcTy, SubRegIndex SubIdx> {
-  def : Pat<(alignedstore (DstTy (extract_subvector
-                                  (SrcTy RC:$src), (iPTR 0))), addr:$dst),
-            (!cast<Instruction>("VMOV"#AlignedStr#"mr") addr:$dst,
-             (DstTy (EXTRACT_SUBREG RC:$src, SubIdx)))>;
-
-  def : Pat<(store (DstTy (extract_subvector
-                           (SrcTy RC:$src), (iPTR 0))), addr:$dst),
-            (!cast<Instruction>("VMOV"#UnalignedStr#"mr") addr:$dst,
-             (DstTy (EXTRACT_SUBREG RC:$src, SubIdx)))>;
-}
-
-let Predicates = [HasAVX, NoVLX] in {
-  defm : subvector_store_lowering<"APD", "UPD", VR256X, v2f64, v4f64,  sub_xmm>;
-  defm : subvector_store_lowering<"APS", "UPS", VR256X, v4f32, v8f32,  sub_xmm>;
-  defm : subvector_store_lowering<"DQA", "DQU", VR256X, v2i64, v4i64,  sub_xmm>;
-  defm : subvector_store_lowering<"DQA", "DQU", VR256X, v4i32, v8i32,  sub_xmm>;
-  defm : subvector_store_lowering<"DQA", "DQU", VR256X, v8i16, v16i16, sub_xmm>;
-  defm : subvector_store_lowering<"DQA", "DQU", VR256X, v16i8, v32i8,  sub_xmm>;
-}
-
-let Predicates = [HasVLX] in {
-  // Special patterns for storing subvector extracts of lower 128-bits
-  // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
-  defm : subvector_store_lowering<"APDZ128", "UPDZ128", VR256X, v2f64, v4f64,
-                                  sub_xmm>;
-  defm : subvector_store_lowering<"APSZ128", "UPSZ128", VR256X, v4f32, v8f32,
-                                  sub_xmm>;
-  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v2i64,
-                                  v4i64, sub_xmm>;
-  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v4i32,
-                                  v8i32, sub_xmm>;
-  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v8i16,
-                                  v16i16, sub_xmm>;
-  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v16i8,
-                                  v32i8, sub_xmm>;
-
-  // Special patterns for storing subvector extracts of lower 128-bits of 512.
-  // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
-  defm : subvector_store_lowering<"APDZ128", "UPDZ128", VR512, v2f64, v8f64,
-                                  sub_xmm>;
-  defm : subvector_store_lowering<"APSZ128", "UPSZ128", VR512, v4f32, v16f32,
-                                  sub_xmm>;
-  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v2i64,
-                                  v8i64, sub_xmm>;
-  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v4i32,
-                                  v16i32, sub_xmm>;
-  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v8i16,
-                                  v32i16, sub_xmm>;
-  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v16i8,
-                                  v64i8, sub_xmm>;
-
-  // Special patterns for storing subvector extracts of lower 256-bits of 512.
-  // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
-  defm : subvector_store_lowering<"APDZ256", "UPDZ256", VR512, v4f64, v8f64,
-                                  sub_ymm>;
-  defm : subvector_store_lowering<"APSZ256", "UPSZ256", VR512, v8f32, v16f32,
-                                  sub_ymm>;
-  defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v4i64,
-                                  v8i64, sub_ymm>;
-  defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v8i32,
-                                  v16i32, sub_ymm>;
-  defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v16i16,
-                                  v32i16, sub_ymm>;
-  defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v32i8,
-                                  v64i8, sub_ymm>;
-}
-
 // If we're inserting into an all zeros vector, just use a plain move which
 // will zero the upper bits. A post-isel hook will take care of removing
 // any moves that we can prove are unnecessary.
@@ -176,7 +105,7 @@ multiclass subvec_zero_lowering<string MoveStr,
                                 RegisterClass RC, ValueType DstTy,
                                 ValueType SrcTy, ValueType ZeroTy,
                                 SubRegIndex SubIdx> {
-  def : Pat<(DstTy (insert_subvector (bitconvert (ZeroTy immAllZerosV)),
+  def : Pat<(DstTy (insert_subvector immAllZerosV,
                                      (SrcTy RC:$src), (iPTR 0))),
             (SUBREG_TO_REG (i64 0),
              (SrcTy (!cast<Instruction>("VMOV"#MoveStr#"rr") RC:$src)), SubIdx)>;
@@ -398,7 +327,7 @@ let Predicates = [HasBWI, HasDQI] in {
             (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK64)>;
 }
 
-let Predicates = [HasBWI, HasVLX] in {
+let Predicates = [HasBWI] in {
   def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
                                      (v1i1 VK1:$mask), (iPTR 0))),
             (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK1:$mask, VK32),
@@ -487,7 +416,7 @@ def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
           (XORPSrr VR128:$src1, VR128:$src2)>;
 }
 
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
 // andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
 def : Pat<(f128 (X86fand VR128:$src1, (loadf128 addr:$src2))),
           (VANDPSrm VR128:$src1, f128mem:$src2)>;
@@ -507,3 +436,24 @@ def : Pat<(f128 (X86fxor VR128:$src1, (loadf128 addr:$src2))),
 def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
           (VXORPSrr VR128:$src1, VR128:$src2)>;
 }
+
+let Predicates = [HasVLX] in {
+// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
+def : Pat<(f128 (X86fand VR128X:$src1, (loadf128 addr:$src2))),
+          (VANDPSZ128rm VR128X:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86fand VR128X:$src1, VR128X:$src2)),
+          (VANDPSZ128rr VR128X:$src1, VR128X:$src2)>;
+
+def : Pat<(f128 (X86for VR128X:$src1, (loadf128 addr:$src2))),
+          (VORPSZ128rm VR128X:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86for VR128X:$src1, VR128X:$src2)),
+          (VORPSZ128rr VR128X:$src1, VR128X:$src2)>;
+
+def : Pat<(f128 (X86fxor VR128X:$src1, (loadf128 addr:$src2))),
+          (VXORPSZ128rm VR128X:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86fxor VR128X:$src1, VR128X:$src2)),
+          (VXORPSZ128rr VR128X:$src1, VR128X:$src2)>;
+}
diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td
index 9d810a675e3b..66ca78556b82 100644
--- a/lib/Target/X86/X86InstrXOP.td
+++ b/lib/Target/X86/X86InstrXOP.td
@@ -1,9 +1,8 @@
 //===-- X86InstrXOP.td - XOP Instruction Set ---------------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -247,36 +246,22 @@ multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
   let ExeDomain = SSEPackedInt in { // SSE integer instructions
     let isCommutable = 1 in
     def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
-             (ins VR128:$src1, VR128:$src2, XOPCC:$cc),
-             !strconcat("vpcom${cc}", Suffix,
-             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             (ins VR128:$src1, VR128:$src2, u8imm:$cc),
+             !strconcat("vpcom", Suffix,
+             "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
              [(set VR128:$dst,
                 (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
                                imm:$cc)))]>,
              XOP_4V, Sched<[sched]>;
     def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
-             (ins VR128:$src1, i128mem:$src2, XOPCC:$cc),
-             !strconcat("vpcom${cc}", Suffix,
-             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             (ins VR128:$src1, i128mem:$src2, u8imm:$cc),
+             !strconcat("vpcom", Suffix,
+             "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
              [(set VR128:$dst,
                 (vt128 (OpNode (vt128 VR128:$src1),
                                (vt128 (load addr:$src2)),
                                 imm:$cc)))]>,
              XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
-    let isAsmParserOnly = 1, hasSideEffects = 0 in {
-      def ri_alt : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
-                   (ins VR128:$src1, VR128:$src2, u8imm:$src3),
-                   !strconcat("vpcom", Suffix,
-                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-                   []>, XOP_4V, Sched<[sched]>, NotMemoryFoldable;
-      let mayLoad = 1 in
-      def mi_alt : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
-                   (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
-                   !strconcat("vpcom", Suffix,
-                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-                   []>, XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>,
-                   NotMemoryFoldable;
-    }
   }
 
   def : Pat<(OpNode (load addr:$src2),
diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp
index c20336387b2d..892a083f4d1a 100644
--- a/lib/Target/X86/X86InstructionSelector.cpp
+++ b/lib/Target/X86/X86InstructionSelector.cpp
@@ -1,9 +1,8 @@
 //===- X86InstructionSelector.cpp -----------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -419,18 +418,22 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
     if (X86::GPRRegBankID == RB.getID())
       return Isload ? X86::MOV32rm : X86::MOV32mr;
     if (X86::VECRRegBankID == RB.getID())
-      return Isload ? (HasAVX512 ? X86::VMOVSSZrm
-                                 : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm)
-                    : (HasAVX512 ? X86::VMOVSSZmr
-                                 : HasAVX ? X86::VMOVSSmr : X86::MOVSSmr);
+      return Isload ? (HasAVX512 ? X86::VMOVSSZrm_alt :
+                       HasAVX    ? X86::VMOVSSrm_alt :
+                                   X86::MOVSSrm_alt)
+                    : (HasAVX512 ? X86::VMOVSSZmr :
+                       HasAVX    ? X86::VMOVSSmr :
+                                   X86::MOVSSmr);
   } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
     if (X86::GPRRegBankID == RB.getID())
       return Isload ? X86::MOV64rm : X86::MOV64mr;
     if (X86::VECRRegBankID == RB.getID())
-      return Isload ? (HasAVX512 ? X86::VMOVSDZrm
-                                 : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm)
-                    : (HasAVX512 ? X86::VMOVSDZmr
-                                 : HasAVX ? X86::VMOVSDmr : X86::MOVSDmr);
+      return Isload ? (HasAVX512 ? X86::VMOVSDZrm_alt :
+                       HasAVX    ? X86::VMOVSDrm_alt :
+                                   X86::MOVSDrm_alt)
+                    : (HasAVX512 ? X86::VMOVSDZmr :
+                       HasAVX    ? X86::VMOVSDmr :
+                                   X86::MOVSDmr);
   } else if (Ty.isVector() && Ty.getSizeInBits() == 128) {
     if (Alignment >= 16)
       return Isload ? (HasVLX ? X86::VMOVAPSZ128rm
@@ -513,10 +516,22 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
   LLT Ty = MRI.getType(DefReg);
   const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
 
+  assert(I.hasOneMemOperand());
   auto &MemOp = **I.memoperands_begin();
-  if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
-    LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
-    return false;
+  if (MemOp.isAtomic()) {
+    // Note: for unordered operations, we rely on the fact the appropriate MMO
+    // is already on the instruction we're mutating, and thus we don't need to
+    // make any changes.  So long as we select an opcode which is capable of
+    // loading or storing the appropriate size atomically, the rest of the
+    // backend is required to respect the MMO state. 
+    if (!MemOp.isUnordered()) {
+      LLVM_DEBUG(dbgs() << "Atomic ordering not supported yet\n");
+      return false;
+    }
+    if (MemOp.getAlignment() < Ty.getSizeInBits()/8) {
+      LLVM_DEBUG(dbgs() << "Unaligned atomics not supported yet\n");
+      return false;
+    }
   }
 
   unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlignment());
@@ -936,7 +951,6 @@ bool X86InstructionSelector::selectCmp(MachineInstr &I,
   bool SwapArgs;
   std::tie(CC, SwapArgs) = X86::getX86ConditionCode(
       (CmpInst::Predicate)I.getOperand(1).getPredicate());
-  unsigned OpSet = X86::getSETFromCond(CC);
 
   unsigned LHS = I.getOperand(2).getReg();
   unsigned RHS = I.getOperand(3).getReg();
@@ -970,7 +984,7 @@ bool X86InstructionSelector::selectCmp(MachineInstr &I,
            .addReg(RHS);
 
   MachineInstr &SetInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
-                                   TII.get(OpSet), I.getOperand(0).getReg());
+                                   TII.get(X86::SETCCr), I.getOperand(0).getReg()).addImm(CC);
 
   constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
   constrainSelectedInstRegOperands(SetInst, TII, TRI, RBI);
@@ -991,8 +1005,8 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I,
 
   // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
   static const uint16_t SETFOpcTable[2][3] = {
-      {X86::SETEr, X86::SETNPr, X86::AND8rr},
-      {X86::SETNEr, X86::SETPr, X86::OR8rr}};
+      {X86::COND_E, X86::COND_NP, X86::AND8rr},
+      {X86::COND_NE, X86::COND_P, X86::OR8rr}};
   const uint16_t *SETFOpc = nullptr;
   switch (Predicate) {
   default:
@@ -1032,9 +1046,9 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I,
     unsigned FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass);
     unsigned FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass);
     MachineInstr &Set1 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
-                                  TII.get(SETFOpc[0]), FlagReg1);
+                                  TII.get(X86::SETCCr), FlagReg1).addImm(SETFOpc[0]);
     MachineInstr &Set2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
-                                  TII.get(SETFOpc[1]), FlagReg2);
+                                  TII.get(X86::SETCCr), FlagReg2).addImm(SETFOpc[1]);
     MachineInstr &Set3 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
                                   TII.get(SETFOpc[2]), ResultReg)
                               .addReg(FlagReg1)
@@ -1052,7 +1066,6 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I,
   bool SwapArgs;
   std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
   assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
-  unsigned Opc = X86::getSETFromCond(CC);
 
   if (SwapArgs)
     std::swap(LhsReg, RhsReg);
@@ -1064,7 +1077,7 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I,
            .addReg(RhsReg);
 
   MachineInstr &Set =
-      *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opc), ResultReg);
+      *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), ResultReg).addImm(CC);
   constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
   constrainSelectedInstRegOperands(Set, TII, TRI, RBI);
   I.eraseFromParent();
@@ -1409,8 +1422,8 @@ bool X86InstructionSelector::selectCondBranch(MachineInstr &I,
       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TEST8ri))
            .addReg(CondReg)
            .addImm(1);
-  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::JNE_1))
-      .addMBB(DestMBB);
+  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::JCC_1))
+      .addMBB(DestMBB).addImm(X86::COND_NE);
 
   constrainSelectedInstRegOperands(TestInst, TII, TRI, RBI);
 
@@ -1530,15 +1543,14 @@ bool X86InstructionSelector::selectShift(MachineInstr &I,
 
   const static struct ShiftEntry {
     unsigned SizeInBits;
-    unsigned CReg;
     unsigned OpLSHR;
     unsigned OpASHR;
     unsigned OpSHL;
   } OpTable[] = {
-      {8, X86::CL, X86::SHR8rCL, X86::SAR8rCL, X86::SHL8rCL},      // i8
-      {16, X86::CX, X86::SHR16rCL, X86::SAR16rCL, X86::SHL16rCL},  // i16
-      {32, X86::ECX, X86::SHR32rCL, X86::SAR32rCL, X86::SHL32rCL}, // i32
-      {64, X86::RCX, X86::SHR64rCL, X86::SAR64rCL, X86::SHL64rCL}  // i64
+      {8, X86::SHR8rCL, X86::SAR8rCL, X86::SHL8rCL},     // i8
+      {16, X86::SHR16rCL, X86::SAR16rCL, X86::SHL16rCL}, // i16
+      {32, X86::SHR32rCL, X86::SAR32rCL, X86::SHL32rCL}, // i32
+      {64, X86::SHR64rCL, X86::SAR64rCL, X86::SHL64rCL}  // i64
   };
 
   if (DstRB.getID() != X86::GPRRegBankID)
@@ -1551,7 +1563,6 @@ bool X86InstructionSelector::selectShift(MachineInstr &I,
   if (ShiftEntryIt == std::end(OpTable))
     return false;
 
-  unsigned CReg = ShiftEntryIt->CReg;
   unsigned Opcode = 0;
   switch (I.getOpcode()) {
   case TargetOpcode::G_SHL:
@@ -1570,16 +1581,11 @@ bool X86InstructionSelector::selectShift(MachineInstr &I,
   unsigned Op0Reg = I.getOperand(1).getReg();
   unsigned Op1Reg = I.getOperand(2).getReg();
 
-  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
-          ShiftEntryIt->CReg)
-      .addReg(Op1Reg);
+  assert(MRI.getType(Op1Reg).getSizeInBits() == 8);
 
-  // The shift instruction uses X86::CL. If we defined a super-register
-  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
-  if (CReg != X86::CL)
-    BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::KILL),
-            X86::CL)
-        .addReg(CReg, RegState::Kill);
+  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
+          X86::CL)
+    .addReg(Op1Reg);
 
   MachineInstr &ShiftInst =
       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode), DstReg)
@@ -1608,8 +1614,8 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
   assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) &&
          "Arguments and return value types must match");
 
-  const RegisterBank &RegRB = *RBI.getRegBank(DstReg, MRI, TRI);
-  if (RegRB.getID() != X86::GPRRegBankID)
+  const RegisterBank *RegRB = RBI.getRegBank(DstReg, MRI, TRI);
+  if (!RegRB || RegRB->getID() != X86::GPRRegBankID)
     return false;
 
   const static unsigned NumTypes = 4; // i8, i16, i32, i64
@@ -1707,7 +1713,7 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
   const DivRemEntry &TypeEntry = *OpEntryIt;
   const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
 
-  const TargetRegisterClass *RegRC = getRegClass(RegTy, RegRB);
+  const TargetRegisterClass *RegRC = getRegClass(RegTy, *RegRB);
   if (!RBI.constrainGenericRegister(Op1Reg, *RegRC, MRI) ||
       !RBI.constrainGenericRegister(Op2Reg, *RegRC, MRI) ||
       !RBI.constrainGenericRegister(DstReg, *RegRC, MRI)) {
diff --git a/lib/Target/X86/X86InterleavedAccess.cpp b/lib/Target/X86/X86InterleavedAccess.cpp
index 28940754a203..8f74a8fe041d 100644
--- a/lib/Target/X86/X86InterleavedAccess.cpp
+++ b/lib/Target/X86/X86InterleavedAccess.cpp
@@ -1,9 +1,8 @@
 //===- X86InterleavedAccess.cpp -------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -194,7 +193,7 @@ void X86InterleavedAccessGroup::decompose(
 
   // Decompose the load instruction.
   LoadInst *LI = cast<LoadInst>(VecInst);
-  Type *VecBasePtrTy = SubVecTy->getPointerTo(LI->getPointerAddressSpace());
+  Type *VecBaseTy, *VecBasePtrTy;
   Value *VecBasePtr;
   unsigned int NumLoads = NumSubVectors;
   // In the case of stride 3 with a vector of 32 elements load the information
@@ -202,18 +201,22 @@ void X86InterleavedAccessGroup::decompose(
   // [0,1...,VF/2-1,VF/2+VF,VF/2+VF+1,...,2VF-1]
   unsigned VecLength = DL.getTypeSizeInBits(VecWidth);
   if (VecLength == 768 || VecLength == 1536) {
-    Type *VecTran =
-        VectorType::get(Type::getInt8Ty(LI->getContext()), 16)->getPointerTo();
-    VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecTran);
+    VecBaseTy = VectorType::get(Type::getInt8Ty(LI->getContext()), 16);
+    VecBasePtrTy = VecBaseTy->getPointerTo(LI->getPointerAddressSpace());
+    VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy);
     NumLoads = NumSubVectors * (VecLength / 384);
-  } else
+  } else {
+    VecBaseTy = SubVecTy;
+    VecBasePtrTy = VecBaseTy->getPointerTo(LI->getPointerAddressSpace());
     VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy);
+  }
   // Generate N loads of T type.
   for (unsigned i = 0; i < NumLoads; i++) {
     // TODO: Support inbounds GEP.
-    Value *NewBasePtr = Builder.CreateGEP(VecBasePtr, Builder.getInt32(i));
+    Value *NewBasePtr =
+        Builder.CreateGEP(VecBaseTy, VecBasePtr, Builder.getInt32(i));
     Instruction *NewLoad =
-        Builder.CreateAlignedLoad(NewBasePtr, LI->getAlignment());
+        Builder.CreateAlignedLoad(VecBaseTy, NewBasePtr, LI->getAlignment());
     DecomposedVectors.push_back(NewLoad);
   }
 }
@@ -416,7 +419,7 @@ void X86InterleavedAccessGroup::interleave8bitStride4(
   }
 
   reorderSubVector(VT, TransposedMatrix, VecOut, makeArrayRef(Concat, 16),
-		   NumOfElm, 4, Builder);
+                   NumOfElm, 4, Builder);
 }
 
 //  createShuffleStride returns shuffle mask of size N.
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 151e1b9136c4..40141d894629 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -1,9 +1,8 @@
 //===-- X86IntrinsicsInfo.h - X86 Intrinsics ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -20,21 +19,22 @@
 namespace llvm {
 
 enum IntrinsicType : uint16_t {
+  CVTNEPS2BF16_MASK,
   GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASSS,
   INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
   INTR_TYPE_3OP_IMM8,
-  CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM,
-  CVTPD2PS, CVTPD2PS_MASK, CVTPD2PS_RND_MASK,
-  INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
-  INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
-  INTR_TYPE_3OP_MASK,
-  IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK,
-  INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
+  CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, BLENDV,
+  CVTPD2PS_MASK,
+  INTR_TYPE_1OP_SAE, INTR_TYPE_2OP_SAE,
+  INTR_TYPE_1OP_MASK_SAE, INTR_TYPE_2OP_MASK_SAE, INTR_TYPE_3OP_MASK_SAE,
+  INTR_TYPE_1OP_MASK, INTR_TYPE_2OP_MASK,
+  IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK, INTR_TYPE_SCALAR_MASK_SAE,
+  INTR_TYPE_SCALAR_MASK_RND,
+  INTR_TYPE_3OP_SCALAR_MASK_SAE,
   COMPRESS_EXPAND_IN_REG,
-  TRUNCATE_TO_REG, CVTPS2PH_MASK, CVTPD2I_MASK,
+  TRUNCATE_TO_REG, CVTPS2PH_MASK, CVTPD2DQ_MASK, CVTQQ2PS_MASK,
   TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
-  FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
-  FIXUPIMMS_MASKZ, GATHER_AVX2,
+  FIXUPIMM, FIXUPIMM_MASKZ, GATHER_AVX2,
   ROUNDP, ROUNDS
 };
 
@@ -64,47 +64,47 @@ struct IntrinsicData {
  * the alphabetical order.
  */
 static const IntrinsicData IntrinsicsWithChain[] = {
-  X86_INTRINSIC_DATA(avx2_gather_d_d,      GATHER_AVX2, X86::VPGATHERDDrm, 0),
-  X86_INTRINSIC_DATA(avx2_gather_d_d_256,  GATHER_AVX2, X86::VPGATHERDDYrm, 0),
-  X86_INTRINSIC_DATA(avx2_gather_d_pd,     GATHER_AVX2, X86::VGATHERDPDrm, 0),
-  X86_INTRINSIC_DATA(avx2_gather_d_pd_256, GATHER_AVX2, X86::VGATHERDPDYrm, 0),
-  X86_INTRINSIC_DATA(avx2_gather_d_ps,     GATHER_AVX2, X86::VGATHERDPSrm, 0),
-  X86_INTRINSIC_DATA(avx2_gather_d_ps_256, GATHER_AVX2, X86::VGATHERDPSYrm, 0),
-  X86_INTRINSIC_DATA(avx2_gather_d_q,      GATHER_AVX2, X86::VPGATHERDQrm, 0),
-  X86_INTRINSIC_DATA(avx2_gather_d_q_256,  GATHER_AVX2, X86::VPGATHERDQYrm, 0),
-  X86_INTRINSIC_DATA(avx2_gather_q_d,      GATHER_AVX2, X86::VPGATHERQDrm, 0),
-  X86_INTRINSIC_DATA(avx2_gather_q_d_256,  GATHER_AVX2, X86::VPGATHERQDYrm, 0),
-  X86_INTRINSIC_DATA(avx2_gather_q_pd,     GATHER_AVX2, X86::VGATHERQPDrm, 0),
-  X86_INTRINSIC_DATA(avx2_gather_q_pd_256, GATHER_AVX2, X86::VGATHERQPDYrm, 0),
-  X86_INTRINSIC_DATA(avx2_gather_q_ps,     GATHER_AVX2, X86::VGATHERQPSrm, 0),
-  X86_INTRINSIC_DATA(avx2_gather_q_ps_256, GATHER_AVX2, X86::VGATHERQPSYrm, 0),
-  X86_INTRINSIC_DATA(avx2_gather_q_q,      GATHER_AVX2, X86::VPGATHERQQrm, 0),
-  X86_INTRINSIC_DATA(avx2_gather_q_q_256,  GATHER_AVX2, X86::VPGATHERQQYrm, 0),
+  X86_INTRINSIC_DATA(avx2_gather_d_d,      GATHER_AVX2, 0, 0),
+  X86_INTRINSIC_DATA(avx2_gather_d_d_256,  GATHER_AVX2, 0, 0),
+  X86_INTRINSIC_DATA(avx2_gather_d_pd,     GATHER_AVX2, 0, 0),
+  X86_INTRINSIC_DATA(avx2_gather_d_pd_256, GATHER_AVX2, 0, 0),
+  X86_INTRINSIC_DATA(avx2_gather_d_ps,     GATHER_AVX2, 0, 0),
+  X86_INTRINSIC_DATA(avx2_gather_d_ps_256, GATHER_AVX2, 0, 0),
+  X86_INTRINSIC_DATA(avx2_gather_d_q,      GATHER_AVX2, 0, 0),
+  X86_INTRINSIC_DATA(avx2_gather_d_q_256,  GATHER_AVX2, 0, 0),
+  X86_INTRINSIC_DATA(avx2_gather_q_d,      GATHER_AVX2, 0, 0),
+  X86_INTRINSIC_DATA(avx2_gather_q_d_256,  GATHER_AVX2, 0, 0),
+  X86_INTRINSIC_DATA(avx2_gather_q_pd,     GATHER_AVX2, 0, 0),
+  X86_INTRINSIC_DATA(avx2_gather_q_pd_256, GATHER_AVX2, 0, 0),
+  X86_INTRINSIC_DATA(avx2_gather_q_ps,     GATHER_AVX2, 0, 0),
+  X86_INTRINSIC_DATA(avx2_gather_q_ps_256, GATHER_AVX2, 0, 0),
+  X86_INTRINSIC_DATA(avx2_gather_q_q,      GATHER_AVX2, 0, 0),
+  X86_INTRINSIC_DATA(avx2_gather_q_q_256,  GATHER_AVX2, 0, 0),
 
-  X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
-  X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
-  X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
-  X86_INTRINSIC_DATA(avx512_gather_dps_512, GATHER, X86::VGATHERDPSZrm, 0),
-  X86_INTRINSIC_DATA(avx512_gather_qpd_512, GATHER, X86::VGATHERQPDZrm, 0),
-  X86_INTRINSIC_DATA(avx512_gather_qpi_512, GATHER, X86::VPGATHERQDZrm, 0),
-  X86_INTRINSIC_DATA(avx512_gather_qpq_512, GATHER, X86::VPGATHERQQZrm, 0),
-  X86_INTRINSIC_DATA(avx512_gather_qps_512, GATHER, X86::VGATHERQPSZrm, 0),
-  X86_INTRINSIC_DATA(avx512_gather3div2_df, GATHER, X86::VGATHERQPDZ128rm, 0),
-  X86_INTRINSIC_DATA(avx512_gather3div2_di, GATHER, X86::VPGATHERQQZ128rm, 0),
-  X86_INTRINSIC_DATA(avx512_gather3div4_df, GATHER, X86::VGATHERQPDZ256rm, 0),
-  X86_INTRINSIC_DATA(avx512_gather3div4_di, GATHER, X86::VPGATHERQQZ256rm, 0),
-  X86_INTRINSIC_DATA(avx512_gather3div4_sf, GATHER, X86::VGATHERQPSZ128rm, 0),
-  X86_INTRINSIC_DATA(avx512_gather3div4_si, GATHER, X86::VPGATHERQDZ128rm, 0),
-  X86_INTRINSIC_DATA(avx512_gather3div8_sf, GATHER, X86::VGATHERQPSZ256rm, 0),
-  X86_INTRINSIC_DATA(avx512_gather3div8_si, GATHER, X86::VPGATHERQDZ256rm, 0),
-  X86_INTRINSIC_DATA(avx512_gather3siv2_df, GATHER, X86::VGATHERDPDZ128rm, 0),
-  X86_INTRINSIC_DATA(avx512_gather3siv2_di, GATHER, X86::VPGATHERDQZ128rm, 0),
-  X86_INTRINSIC_DATA(avx512_gather3siv4_df, GATHER, X86::VGATHERDPDZ256rm, 0),
-  X86_INTRINSIC_DATA(avx512_gather3siv4_di, GATHER, X86::VPGATHERDQZ256rm, 0),
-  X86_INTRINSIC_DATA(avx512_gather3siv4_sf, GATHER, X86::VGATHERDPSZ128rm, 0),
-  X86_INTRINSIC_DATA(avx512_gather3siv4_si, GATHER, X86::VPGATHERDDZ128rm, 0),
-  X86_INTRINSIC_DATA(avx512_gather3siv8_sf, GATHER, X86::VGATHERDPSZ256rm, 0),
-  X86_INTRINSIC_DATA(avx512_gather3siv8_si, GATHER, X86::VPGATHERDDZ256rm, 0),
+  X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather_dps_512, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather_qpd_512, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather_qpi_512, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather_qpq_512, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather_qps_512, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather3div2_df, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather3div2_di, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather3div4_df, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather3div4_di, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather3div4_sf, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather3div4_si, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather3div8_sf, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather3div8_si, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather3siv2_df, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather3siv2_di, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather3siv4_df, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather3siv4_di, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather3siv4_sf, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather3siv4_si, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather3siv8_sf, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_gather3siv8_si, GATHER, 0, 0),
 
   X86_INTRINSIC_DATA(avx512_gatherpf_dpd_512, PREFETCH,
                      X86::VGATHERPF0DPDm, X86::VGATHERPF1DPDm),
@@ -115,30 +115,30 @@ static const IntrinsicData IntrinsicsWithChain[] = {
   X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH,
                      X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm),
 
-  X86_INTRINSIC_DATA(avx512_mask_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather_dps_512, GATHER, X86::VGATHERDPSZrm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather_qpd_512, GATHER, X86::VGATHERQPDZrm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather_qpi_512, GATHER, X86::VPGATHERQDZrm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather_qpq_512, GATHER, X86::VPGATHERQQZrm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather_qps_512, GATHER, X86::VGATHERQPSZrm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather3div2_df, GATHER, X86::VGATHERQPDZ128rm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather3div2_di, GATHER, X86::VPGATHERQQZ128rm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather3div4_df, GATHER, X86::VGATHERQPDZ256rm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather3div4_di, GATHER, X86::VPGATHERQQZ256rm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather3div4_sf, GATHER, X86::VGATHERQPSZ128rm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather3div4_si, GATHER, X86::VPGATHERQDZ128rm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather3div8_sf, GATHER, X86::VGATHERQPSZ256rm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather3div8_si, GATHER, X86::VPGATHERQDZ256rm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather3siv2_df, GATHER, X86::VGATHERDPDZ128rm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather3siv2_di, GATHER, X86::VPGATHERDQZ128rm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather3siv4_df, GATHER, X86::VGATHERDPDZ256rm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather3siv4_di, GATHER, X86::VPGATHERDQZ256rm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather3siv4_sf, GATHER, X86::VGATHERDPSZ128rm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather3siv4_si, GATHER, X86::VPGATHERDDZ128rm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather3siv8_sf, GATHER, X86::VGATHERDPSZ256rm, 0),
-  X86_INTRINSIC_DATA(avx512_mask_gather3siv8_si, GATHER, X86::VPGATHERDDZ256rm, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather_dpd_512, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather_dpi_512, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather_dpq_512, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather_dps_512, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather_qpd_512, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather_qpi_512, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather_qpq_512, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather_qps_512, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3div2_df, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3div2_di, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3div4_df, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3div4_di, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3div4_sf, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3div4_si, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3div8_sf, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3div8_si, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3siv2_df, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3siv2_di, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3siv4_df, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3siv4_di, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3siv4_sf, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3siv4_si, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3siv8_sf, GATHER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_gather3siv8_si, GATHER, 0, 0),
 
   X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
                      X86ISD::VTRUNC, 0),
@@ -249,47 +249,47 @@ static const IntrinsicData IntrinsicsWithChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_512, TRUNCATE_TO_MEM_VI8,
                      X86ISD::VTRUNCUS, 0),
 
-  X86_INTRINSIC_DATA(avx512_mask_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scatter_dps_512, SCATTER, X86::VSCATTERDPSZmr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scatter_qpd_512, SCATTER, X86::VSCATTERQPDZmr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scatter_qpi_512, SCATTER, X86::VPSCATTERQDZmr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scatter_qpq_512, SCATTER, X86::VPSCATTERQQZmr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scatter_qps_512, SCATTER, X86::VSCATTERQPSZmr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scatterdiv2_df, SCATTER, X86::VSCATTERQPDZ128mr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scatterdiv2_di, SCATTER, X86::VPSCATTERQQZ128mr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_df, SCATTER, X86::VSCATTERQPDZ256mr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_di, SCATTER, X86::VPSCATTERQQZ256mr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_sf, SCATTER, X86::VSCATTERQPSZ128mr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_si, SCATTER, X86::VPSCATTERQDZ128mr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scatterdiv8_sf, SCATTER, X86::VSCATTERQPSZ256mr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scatterdiv8_si, SCATTER, X86::VPSCATTERQDZ256mr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scattersiv2_df, SCATTER, X86::VSCATTERDPDZ128mr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scattersiv2_di, SCATTER, X86::VPSCATTERDQZ128mr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scattersiv4_df, SCATTER, X86::VSCATTERDPDZ256mr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scattersiv4_di, SCATTER, X86::VPSCATTERDQZ256mr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scattersiv4_sf, SCATTER, X86::VSCATTERDPSZ128mr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scattersiv4_si, SCATTER, X86::VPSCATTERDDZ128mr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scattersiv8_sf, SCATTER, X86::VSCATTERDPSZ256mr, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scattersiv8_si, SCATTER, X86::VPSCATTERDDZ256mr, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatter_dpd_512, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatter_dpi_512, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatter_dpq_512, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatter_dps_512, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatter_qpd_512, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatter_qpi_512, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatter_qpq_512, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatter_qps_512, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatterdiv2_df, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatterdiv2_di, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_df, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_di, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_sf, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_si, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatterdiv8_sf, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scatterdiv8_si, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scattersiv2_df, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scattersiv2_di, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scattersiv4_df, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scattersiv4_di, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scattersiv4_sf, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scattersiv4_si, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scattersiv8_sf, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scattersiv8_si, SCATTER, 0, 0),
 
-  X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
-  X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
-  X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
-  X86_INTRINSIC_DATA(avx512_scatter_dps_512, SCATTER, X86::VSCATTERDPSZmr, 0),
-  X86_INTRINSIC_DATA(avx512_scatter_qpd_512, SCATTER, X86::VSCATTERQPDZmr, 0),
-  X86_INTRINSIC_DATA(avx512_scatter_qpi_512, SCATTER, X86::VPSCATTERQDZmr, 0),
-  X86_INTRINSIC_DATA(avx512_scatter_qpq_512, SCATTER, X86::VPSCATTERQQZmr, 0),
-  X86_INTRINSIC_DATA(avx512_scatter_qps_512, SCATTER, X86::VSCATTERQPSZmr, 0),
-  X86_INTRINSIC_DATA(avx512_scatterdiv2_df, SCATTER, X86::VSCATTERQPDZ128mr, 0),
-  X86_INTRINSIC_DATA(avx512_scatterdiv2_di, SCATTER, X86::VPSCATTERQQZ128mr, 0),
-  X86_INTRINSIC_DATA(avx512_scatterdiv4_df, SCATTER, X86::VSCATTERQPDZ256mr, 0),
-  X86_INTRINSIC_DATA(avx512_scatterdiv4_di, SCATTER, X86::VPSCATTERQQZ256mr, 0),
-  X86_INTRINSIC_DATA(avx512_scatterdiv4_sf, SCATTER, X86::VSCATTERQPSZ128mr, 0),
-  X86_INTRINSIC_DATA(avx512_scatterdiv4_si, SCATTER, X86::VPSCATTERQDZ128mr, 0),
-  X86_INTRINSIC_DATA(avx512_scatterdiv8_sf, SCATTER, X86::VSCATTERQPSZ256mr, 0),
-  X86_INTRINSIC_DATA(avx512_scatterdiv8_si, SCATTER, X86::VPSCATTERQDZ256mr, 0),
+  X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scatter_dps_512, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scatter_qpd_512, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scatter_qpi_512, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scatter_qpq_512, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scatter_qps_512, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scatterdiv2_df, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scatterdiv2_di, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scatterdiv4_df, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scatterdiv4_di, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scatterdiv4_sf, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scatterdiv4_si, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scatterdiv8_sf, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scatterdiv8_si, SCATTER, 0, 0),
   X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH, X86::VSCATTERPF0DPDm,
                      X86::VSCATTERPF1DPDm),
   X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH, X86::VSCATTERPF0DPSm,
@@ -298,24 +298,24 @@ static const IntrinsicData IntrinsicsWithChain[] = {
                      X86::VSCATTERPF1QPDm),
   X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH, X86::VSCATTERPF0QPSm,
                      X86::VSCATTERPF1QPSm),
-  X86_INTRINSIC_DATA(avx512_scattersiv2_df, SCATTER, X86::VSCATTERDPDZ128mr, 0),
-  X86_INTRINSIC_DATA(avx512_scattersiv2_di, SCATTER, X86::VPSCATTERDQZ128mr, 0),
-  X86_INTRINSIC_DATA(avx512_scattersiv4_df, SCATTER, X86::VSCATTERDPDZ256mr, 0),
-  X86_INTRINSIC_DATA(avx512_scattersiv4_di, SCATTER, X86::VPSCATTERDQZ256mr, 0),
-  X86_INTRINSIC_DATA(avx512_scattersiv4_sf, SCATTER, X86::VSCATTERDPSZ128mr, 0),
-  X86_INTRINSIC_DATA(avx512_scattersiv4_si, SCATTER, X86::VPSCATTERDDZ128mr, 0),
-  X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, X86::VSCATTERDPSZ256mr, 0),
-  X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, X86::VPSCATTERDDZ256mr, 0),
-  X86_INTRINSIC_DATA(rdpmc,     RDPMC,  X86ISD::RDPMC_DAG, 0),
+  X86_INTRINSIC_DATA(avx512_scattersiv2_df, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scattersiv2_di, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scattersiv4_df, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scattersiv4_di, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scattersiv4_sf, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scattersiv4_si, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, 0, 0),
+  X86_INTRINSIC_DATA(rdpmc,     RDPMC,  X86::RDPMC, 0),
   X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0),
   X86_INTRINSIC_DATA(rdrand_32, RDRAND, X86ISD::RDRAND, 0),
   X86_INTRINSIC_DATA(rdrand_64, RDRAND, X86ISD::RDRAND, 0),
   X86_INTRINSIC_DATA(rdseed_16, RDSEED, X86ISD::RDSEED, 0),
   X86_INTRINSIC_DATA(rdseed_32, RDSEED, X86ISD::RDSEED, 0),
   X86_INTRINSIC_DATA(rdseed_64, RDSEED, X86ISD::RDSEED, 0),
-  X86_INTRINSIC_DATA(rdtsc,     RDTSC,  X86ISD::RDTSC_DAG, 0),
-  X86_INTRINSIC_DATA(rdtscp,    RDTSC,  X86ISD::RDTSCP_DAG, 0),
-  X86_INTRINSIC_DATA(xgetbv, XGETBV, X86::XGETBV, 0),
+  X86_INTRINSIC_DATA(rdtsc,     RDTSC,  X86::RDTSC, 0),
+  X86_INTRINSIC_DATA(rdtscp,    RDTSC,  X86::RDTSCP, 0),
+  X86_INTRINSIC_DATA(xgetbv,    XGETBV, X86::XGETBV, 0),
   X86_INTRINSIC_DATA(xtest,     XTEST,  X86ISD::XTEST,  0),
 };
 
@@ -340,9 +340,11 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(addcarry_64,       ADX, X86ISD::ADC, X86ISD::ADD),
   X86_INTRINSIC_DATA(avx_addsub_pd_256, INTR_TYPE_2OP, X86ISD::ADDSUB, 0),
   X86_INTRINSIC_DATA(avx_addsub_ps_256, INTR_TYPE_2OP, X86ISD::ADDSUB, 0),
+  X86_INTRINSIC_DATA(avx_blendv_pd_256, BLENDV, X86ISD::BLENDV, 0),
+  X86_INTRINSIC_DATA(avx_blendv_ps_256, BLENDV, X86ISD::BLENDV, 0),
   X86_INTRINSIC_DATA(avx_cmp_pd_256,    INTR_TYPE_3OP, X86ISD::CMPP, 0),
   X86_INTRINSIC_DATA(avx_cmp_ps_256,    INTR_TYPE_3OP, X86ISD::CMPP, 0),
-  X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0),
+  X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,INTR_TYPE_1OP, X86ISD::VFPROUND, 0),
   X86_INTRINSIC_DATA(avx_cvt_pd2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
   X86_INTRINSIC_DATA(avx_cvt_ps2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
   X86_INTRINSIC_DATA(avx_cvtt_pd2dq_256,INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
@@ -369,6 +371,9 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
   X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+  X86_INTRINSIC_DATA(avx2_pavg_b,  INTR_TYPE_2OP, X86ISD::AVG, 0),
+  X86_INTRINSIC_DATA(avx2_pavg_w,  INTR_TYPE_2OP, X86ISD::AVG, 0),
+  X86_INTRINSIC_DATA(avx2_pblendvb, BLENDV, X86ISD::BLENDV, 0),
   X86_INTRINSIC_DATA(avx2_permd, VPERM_2OP, X86ISD::VPERMV, 0),
   X86_INTRINSIC_DATA(avx2_permps, VPERM_2OP, X86ISD::VPERMV, 0),
   X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
@@ -389,10 +394,10 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx2_pslli_d, VSHIFT, X86ISD::VSHLI, 0),
   X86_INTRINSIC_DATA(avx2_pslli_q, VSHIFT, X86ISD::VSHLI, 0),
   X86_INTRINSIC_DATA(avx2_pslli_w, VSHIFT, X86ISD::VSHLI, 0),
-  X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, ISD::SHL, 0),
+  X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+  X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+  X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+  X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
   X86_INTRINSIC_DATA(avx2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0),
   X86_INTRINSIC_DATA(avx2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0),
   X86_INTRINSIC_DATA(avx2_psrai_d, VSHIFT, X86ISD::VSRAI, 0),
@@ -405,39 +410,45 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
   X86_INTRINSIC_DATA(avx2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
   X86_INTRINSIC_DATA(avx2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
-  X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),
+  X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+  X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+  X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+  X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
   X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
   X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
   X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
   X86_INTRINSIC_DATA(avx512_cmp_pd_256, CMP_MASK_CC, X86ISD::CMPM, 0),
-  X86_INTRINSIC_DATA(avx512_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPM, X86ISD::CMPM_RND),
+  X86_INTRINSIC_DATA(avx512_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPM, X86ISD::CMPM_SAE),
   X86_INTRINSIC_DATA(avx512_cmp_ps_128, CMP_MASK_CC, X86ISD::CMPM, 0),
   X86_INTRINSIC_DATA(avx512_cmp_ps_256, CMP_MASK_CC, X86ISD::CMPM, 0),
-  X86_INTRINSIC_DATA(avx512_cmp_ps_512, CMP_MASK_CC, X86ISD::CMPM, X86ISD::CMPM_RND),
-  X86_INTRINSIC_DATA(avx512_cvtsi2sd64,  INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvtsi2ss32,  INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvtsi2ss64,  INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvttsd2si, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND),
-  X86_INTRINSIC_DATA(avx512_cvttsd2si64, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND),
-  X86_INTRINSIC_DATA(avx512_cvttsd2usi, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND),
-  X86_INTRINSIC_DATA(avx512_cvttsd2usi64, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND),
-  X86_INTRINSIC_DATA(avx512_cvttss2si, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND),
-  X86_INTRINSIC_DATA(avx512_cvttss2si64, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND),
-  X86_INTRINSIC_DATA(avx512_cvttss2usi, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND),
-  X86_INTRINSIC_DATA(avx512_cvttss2usi64, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND),
-  X86_INTRINSIC_DATA(avx512_cvtusi2ss,   INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cmp_ps_512, CMP_MASK_CC, X86ISD::CMPM, X86ISD::CMPM_SAE),
+  X86_INTRINSIC_DATA(avx512_conflict_d_128, INTR_TYPE_1OP, X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_conflict_d_256, INTR_TYPE_1OP, X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_conflict_d_512, INTR_TYPE_1OP, X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_conflict_q_128, INTR_TYPE_1OP, X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_conflict_q_256, INTR_TYPE_1OP, X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_conflict_q_512, INTR_TYPE_1OP, X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_cvtsi2sd64,  INTR_TYPE_2OP, X86ISD::SCALAR_SINT_TO_FP, X86ISD::SCALAR_SINT_TO_FP_RND),
+  X86_INTRINSIC_DATA(avx512_cvtsi2ss32,  INTR_TYPE_2OP, X86ISD::SCALAR_SINT_TO_FP, X86ISD::SCALAR_SINT_TO_FP_RND),
+  X86_INTRINSIC_DATA(avx512_cvtsi2ss64,  INTR_TYPE_2OP, X86ISD::SCALAR_SINT_TO_FP, X86ISD::SCALAR_SINT_TO_FP_RND),
+  X86_INTRINSIC_DATA(avx512_cvttsd2si, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE),
+  X86_INTRINSIC_DATA(avx512_cvttsd2si64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE),
+  X86_INTRINSIC_DATA(avx512_cvttsd2usi, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE),
+  X86_INTRINSIC_DATA(avx512_cvttsd2usi64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE),
+  X86_INTRINSIC_DATA(avx512_cvttss2si, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE),
+  X86_INTRINSIC_DATA(avx512_cvttss2si64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE),
+  X86_INTRINSIC_DATA(avx512_cvttss2usi, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE),
+  X86_INTRINSIC_DATA(avx512_cvttss2usi64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE),
+  X86_INTRINSIC_DATA(avx512_cvtusi2ss,   INTR_TYPE_2OP, X86ISD::SCALAR_UINT_TO_FP, X86ISD::SCALAR_UINT_TO_FP_RND),
+  X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_2OP, X86ISD::SCALAR_UINT_TO_FP, X86ISD::SCALAR_UINT_TO_FP_RND),
+  X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_2OP, X86ISD::SCALAR_UINT_TO_FP, X86ISD::SCALAR_UINT_TO_FP_RND),
   X86_INTRINSIC_DATA(avx512_dbpsadbw_128, INTR_TYPE_3OP_IMM8, X86ISD::DBPSADBW, 0),
   X86_INTRINSIC_DATA(avx512_dbpsadbw_256, INTR_TYPE_3OP_IMM8, X86ISD::DBPSADBW, 0),
   X86_INTRINSIC_DATA(avx512_dbpsadbw_512, INTR_TYPE_3OP_IMM8, X86ISD::DBPSADBW, 0),
   X86_INTRINSIC_DATA(avx512_div_pd_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),
   X86_INTRINSIC_DATA(avx512_div_ps_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),
-  X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
-  X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
+  X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_SAE, X86ISD::EXP2, X86ISD::EXP2_SAE),
+  X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_SAE, X86ISD::EXP2, X86ISD::EXP2_SAE),
   X86_INTRINSIC_DATA(avx512_fpclass_pd_128, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
   X86_INTRINSIC_DATA(avx512_fpclass_pd_256, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
   X86_INTRINSIC_DATA(avx512_fpclass_pd_512, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
@@ -448,80 +459,32 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_kadd_d, INTR_TYPE_2OP, X86ISD::KADD, 0),
   X86_INTRINSIC_DATA(avx512_kadd_q, INTR_TYPE_2OP, X86ISD::KADD, 0),
   X86_INTRINSIC_DATA(avx512_kadd_w, INTR_TYPE_2OP, X86ISD::KADD, 0),
-  X86_INTRINSIC_DATA(avx512_mask_add_sd_round, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::FADDS_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::FADDS_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_add_sd_round, INTR_TYPE_SCALAR_MASK,
+                     X86ISD::FADDS, X86ISD::FADDS_RND),
+  X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK,
+                     X86ISD::FADDS, X86ISD::FADDS_RND),
   X86_INTRINSIC_DATA(avx512_mask_cmp_sd,     CMP_MASK_SCALAR_CC,
-                     X86ISD::FSETCCM, X86ISD::FSETCCM_RND),
+                     X86ISD::FSETCCM, X86ISD::FSETCCM_SAE),
   X86_INTRINSIC_DATA(avx512_mask_cmp_ss,     CMP_MASK_SCALAR_CC,
-                     X86ISD::FSETCCM, X86ISD::FSETCCM_RND),
+                     X86ISD::FSETCCM, X86ISD::FSETCCM_SAE),
 
-  X86_INTRINSIC_DATA(avx512_mask_compress_b_128,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_b_256,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_b_512,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_d_128,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_d_256,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_d_512,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_pd_128, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_pd_256, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_pd_512, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_ps_128, COMPRESS_EXPAND_IN_REG,
+  X86_INTRINSIC_DATA(avx512_mask_compress,        COMPRESS_EXPAND_IN_REG,
                      X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_ps_256, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_ps_512, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_q_128,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_q_256,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_q_512,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_w_128,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_w_256,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_w_512,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_conflict_d_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::CONFLICT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_conflict_d_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::CONFLICT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_conflict_d_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::CONFLICT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_conflict_q_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::CONFLICT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_conflict_q_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::CONFLICT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_conflict_q_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::CONFLICT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_512, INTR_TYPE_1OP_MASK,
-                     ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND), //er
-  X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, CVTPD2I_MASK,
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, CVTPD2DQ_MASK,
                      X86ISD::CVTP2SI, X86ISD::MCVTP2SI),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_512, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps,     CVTPD2PS_MASK,
                      X86ISD::VFPROUND, X86ISD::VMFPROUND),
-  X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS_RND_MASK,
-                     ISD::FP_ROUND, X86ISD::VFPROUND_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VFPROUND, X86ISD::VFPROUND_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_128, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTP2SI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_256, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTP2SI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_512, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
-  X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_128, CVTPD2I_MASK,
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_128, CVTPD2DQ_MASK,
                      X86ISD::CVTP2UI, X86ISD::MCVTP2UI),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_256, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTP2UI, 0),
@@ -539,8 +502,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::CVTP2SI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtps2dq_512, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
-  X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_512, INTR_TYPE_1OP_MASK,
-                     ISD::FP_EXTEND, X86ISD::VFPEXT_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_512, INTR_TYPE_1OP_MASK_SAE,
+                     ISD::FP_EXTEND, X86ISD::VFPEXT_SAE),
   X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_128, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTP2SI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_256, INTR_TYPE_1OP_MASK,
@@ -559,164 +522,116 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::CVTP2UI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtps2uqq_512, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
-  X86_INTRINSIC_DATA(avx512_mask_cvtqq2pd_512, INTR_TYPE_1OP_MASK,
-                     ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND),
-  X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::CVTSI2P, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_256, INTR_TYPE_1OP_MASK,
-                     ISD::SINT_TO_FP, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_512, INTR_TYPE_1OP_MASK,
-                     ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND),
-  X86_INTRINSIC_DATA(avx512_mask_cvtsd2ss_round, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::VFPROUNDS_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::VFPEXTS_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, CVTPD2I_MASK,
+  X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_128, CVTQQ2PS_MASK,
+                     X86ISD::CVTSI2P, X86ISD::MCVTSI2P),
+  X86_INTRINSIC_DATA(avx512_mask_cvtsd2ss_round, INTR_TYPE_SCALAR_MASK_RND,
+                     X86ISD::VFPROUNDS, X86ISD::VFPROUNDS_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_SAE,
+                     X86ISD::VFPEXTS, X86ISD::VFPEXTS_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, CVTPD2DQ_MASK,
                      X86ISD::CVTTP2SI, X86ISD::MCVTTP2SI),
-  X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK_SAE,
+                     X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_128, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTTP2SI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_256, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTTP2SI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND),
-  X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, CVTPD2I_MASK,
+  X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK_SAE,
+                     X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, CVTPD2DQ_MASK,
                      X86ISD::CVTTP2UI, X86ISD::MCVTTP2UI),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_256, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTTP2UI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_512, INTR_TYPE_1OP_MASK_SAE,
+                     X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_128, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTTP2UI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_256, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTTP2UI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND),
-  X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_512, INTR_TYPE_1OP_MASK_SAE,
+                     X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_512, INTR_TYPE_1OP_MASK_SAE,
+                     X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
   X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_128, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTTP2SI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_256, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTTP2SI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_512, INTR_TYPE_1OP_MASK_SAE,
+                     X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
   X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_128, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTTP2UI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_256, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTTP2UI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_512, INTR_TYPE_1OP_MASK_SAE,
+                     X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
   X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_128, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTTP2UI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_256, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTTP2UI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND),
-  X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_512, INTR_TYPE_1OP_MASK,
-                     ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND),
-  X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_512, INTR_TYPE_1OP_MASK,
-                     ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND),
-  X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::CVTUI2P, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_256, INTR_TYPE_1OP_MASK,
-                     ISD::UINT_TO_FP, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_512, INTR_TYPE_1OP_MASK,
-                     ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND),
-  X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::FDIVS_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::FDIVS_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_b_128,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_b_256,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_b_512,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_d_128,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_d_256,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_d_512,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_pd_128, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_pd_256, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_pd_512, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_ps_128, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_ps_256, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_ps_512, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_q_128,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_q_256,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_q_512,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_w_128,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_w_256,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_w_512,  COMPRESS_EXPAND_IN_REG,
+  X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_512, INTR_TYPE_1OP_MASK_SAE,
+                     X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_128, CVTQQ2PS_MASK,
+                     X86ISD::CVTUI2P, X86ISD::MCVTUI2P),
+  X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK,
+                     X86ISD::FDIVS, X86ISD::FDIVS_RND),
+  X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK,
+                     X86ISD::FDIVS, X86ISD::FDIVS_RND),
+  X86_INTRINSIC_DATA(avx512_mask_expand,        COMPRESS_EXPAND_IN_REG,
                      X86ISD::EXPAND, 0),
   X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_128, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
   X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_256, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
-  X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_512, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_512, FIXUPIMM, X86ISD::VFIXUPIMM, X86ISD::VFIXUPIMM_SAE),
   X86_INTRINSIC_DATA(avx512_mask_fixupimm_ps_128, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
   X86_INTRINSIC_DATA(avx512_mask_fixupimm_ps_256, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
-  X86_INTRINSIC_DATA(avx512_mask_fixupimm_ps_512, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
-  X86_INTRINSIC_DATA(avx512_mask_fixupimm_sd, FIXUPIMMS, X86ISD::VFIXUPIMMS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_fixupimm_ss, FIXUPIMMS, X86ISD::VFIXUPIMMS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_fixupimm_ps_512, FIXUPIMM, X86ISD::VFIXUPIMM, X86ISD::VFIXUPIMM_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_fixupimm_sd, FIXUPIMM, X86ISD::VFIXUPIMMS, X86ISD::VFIXUPIMMS_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_fixupimm_ss, FIXUPIMM, X86ISD::VFIXUPIMMS, X86ISD::VFIXUPIMMS_SAE),
   X86_INTRINSIC_DATA(avx512_mask_fpclass_sd, FPCLASSS, X86ISD::VFPCLASSS, 0),
   X86_INTRINSIC_DATA(avx512_mask_fpclass_ss, FPCLASSS, X86ISD::VFPCLASSS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK_RM,
-                     X86ISD::FGETEXP_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_getexp_pd_256, INTR_TYPE_1OP_MASK_RM,
-                     X86ISD::FGETEXP_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_getexp_pd_512, INTR_TYPE_1OP_MASK_RM,
-                     X86ISD::FGETEXP_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_getexp_ps_128, INTR_TYPE_1OP_MASK_RM,
-                     X86ISD::FGETEXP_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_getexp_ps_256, INTR_TYPE_1OP_MASK_RM,
-                     X86ISD::FGETEXP_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM,
-                     X86ISD::FGETEXP_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_getexp_sd, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::FGETEXPS_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_getexp_ss, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::FGETEXPS_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_getmant_pd_128, INTR_TYPE_2OP_MASK,
+  X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::FGETEXP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getexp_pd_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::FGETEXP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getexp_pd_512, INTR_TYPE_1OP_MASK_SAE,
+                     X86ISD::FGETEXP, X86ISD::FGETEXP_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_getexp_ps_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::FGETEXP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getexp_ps_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::FGETEXP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_SAE,
+                     X86ISD::FGETEXP, X86ISD::FGETEXP_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_getexp_sd, INTR_TYPE_SCALAR_MASK_SAE,
+                     X86ISD::FGETEXPS, X86ISD::FGETEXPS_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_getexp_ss, INTR_TYPE_SCALAR_MASK_SAE,
+                     X86ISD::FGETEXPS, X86ISD::FGETEXPS_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_pd_128, INTR_TYPE_2OP_MASK_SAE,
                      X86ISD::VGETMANT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_getmant_pd_256, INTR_TYPE_2OP_MASK,
+  X86_INTRINSIC_DATA(avx512_mask_getmant_pd_256, INTR_TYPE_2OP_MASK_SAE,
                      X86ISD::VGETMANT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_getmant_pd_512, INTR_TYPE_2OP_MASK,
-                     X86ISD::VGETMANT, X86ISD::VGETMANT_RND),
-  X86_INTRINSIC_DATA(avx512_mask_getmant_ps_128, INTR_TYPE_2OP_MASK,
+  X86_INTRINSIC_DATA(avx512_mask_getmant_pd_512, INTR_TYPE_2OP_MASK_SAE,
+                     X86ISD::VGETMANT, X86ISD::VGETMANT_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_ps_128, INTR_TYPE_2OP_MASK_SAE,
                      X86ISD::VGETMANT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_getmant_ps_256, INTR_TYPE_2OP_MASK,
+  X86_INTRINSIC_DATA(avx512_mask_getmant_ps_256, INTR_TYPE_2OP_MASK_SAE,
                      X86ISD::VGETMANT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_getmant_ps_512, INTR_TYPE_2OP_MASK,
-                     X86ISD::VGETMANT, X86ISD::VGETMANT_RND),
-  X86_INTRINSIC_DATA(avx512_mask_getmant_sd, INTR_TYPE_3OP_SCALAR_MASK,
-                     X86ISD::VGETMANTS, X86ISD::VGETMANTS_RND),
-  X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK,
-                     X86ISD::VGETMANTS, X86ISD::VGETMANTS_RND),
-  X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK,
-                     X86ISD::FMAXS, X86ISD::FMAXS_RND),
-  X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK,
-                     X86ISD::FMAXS, X86ISD::FMAXS_RND),
-  X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK,
-                     X86ISD::FMINS, X86ISD::FMINS_RND),
-  X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK,
-                     X86ISD::FMINS, X86ISD::FMINS_RND),
-  X86_INTRINSIC_DATA(avx512_mask_mul_sd_round, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::FMULS_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::FMULS_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_ps_512, INTR_TYPE_2OP_MASK_SAE,
+                     X86ISD::VGETMANT, X86ISD::VGETMANT_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_sd, INTR_TYPE_3OP_SCALAR_MASK_SAE,
+                     X86ISD::VGETMANTS, X86ISD::VGETMANTS_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_SAE,
+                     X86ISD::VGETMANTS, X86ISD::VGETMANTS_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_SAE,
+                     X86ISD::FMAXS, X86ISD::FMAXS_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_SAE,
+                     X86ISD::FMAXS, X86ISD::FMAXS_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_SAE,
+                     X86ISD::FMINS, X86ISD::FMINS_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_SAE,
+                     X86ISD::FMINS, X86ISD::FMINS_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_mul_sd_round, INTR_TYPE_SCALAR_MASK,
+                     X86ISD::FMULS, X86ISD::FMULS_RND),
+  X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK,
+                     X86ISD::FMULS, X86ISD::FMULS_RND),
   X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, TRUNCATE_TO_REG,
                      X86ISD::VTRUNC, X86ISD::VMTRUNC),
   X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, TRUNCATE_TO_REG,
@@ -737,10 +652,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::VTRUNC, X86ISD::VMTRUNC),
   X86_INTRINSIC_DATA(avx512_mask_pmov_qd_128, TRUNCATE_TO_REG,
                      X86ISD::VTRUNC, X86ISD::VMTRUNC),
-  X86_INTRINSIC_DATA(avx512_mask_pmov_qd_256, INTR_TYPE_1OP_MASK,
-                     ISD::TRUNCATE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmov_qd_512, INTR_TYPE_1OP_MASK,
-                     ISD::TRUNCATE, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmov_qw_128, TRUNCATE_TO_REG,
                      X86ISD::VTRUNC, X86ISD::VMTRUNC),
   X86_INTRINSIC_DATA(avx512_mask_pmov_qw_256, TRUNCATE_TO_REG,
@@ -749,10 +660,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      ISD::TRUNCATE, X86ISD::VMTRUNC),
   X86_INTRINSIC_DATA(avx512_mask_pmov_wb_128, TRUNCATE_TO_REG,
                      X86ISD::VTRUNC, X86ISD::VMTRUNC),
-  X86_INTRINSIC_DATA(avx512_mask_pmov_wb_256, INTR_TYPE_1OP_MASK,
-                     ISD::TRUNCATE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmov_wb_512, INTR_TYPE_1OP_MASK,
-                     ISD::TRUNCATE, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmovs_db_128, TRUNCATE_TO_REG,
                      X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
   X86_INTRINSIC_DATA(avx512_mask_pmovs_db_256, TRUNCATE_TO_REG,
@@ -825,62 +732,62 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::VTRUNCUS, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_512, INTR_TYPE_1OP_MASK,
                      X86ISD::VTRUNCUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_range_pd_128, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_range_pd_256, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_range_pd_512, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, X86ISD::VRANGE_RND),
-  X86_INTRINSIC_DATA(avx512_mask_range_ps_128, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_range_ps_256, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_range_ps_512, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, X86ISD::VRANGE_RND),
-  X86_INTRINSIC_DATA(avx512_mask_range_sd, INTR_TYPE_SCALAR_MASK, X86ISD::VRANGES, X86ISD::VRANGES_RND),
-  X86_INTRINSIC_DATA(avx512_mask_range_ss, INTR_TYPE_SCALAR_MASK, X86ISD::VRANGES, X86ISD::VRANGES_RND),
-  X86_INTRINSIC_DATA(avx512_mask_reduce_pd_128, INTR_TYPE_2OP_MASK, X86ISD::VREDUCE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_reduce_pd_256, INTR_TYPE_2OP_MASK, X86ISD::VREDUCE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_reduce_pd_512, INTR_TYPE_2OP_MASK, X86ISD::VREDUCE, X86ISD::VREDUCE_RND),
-  X86_INTRINSIC_DATA(avx512_mask_reduce_ps_128, INTR_TYPE_2OP_MASK, X86ISD::VREDUCE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_reduce_ps_256, INTR_TYPE_2OP_MASK, X86ISD::VREDUCE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_reduce_ps_512, INTR_TYPE_2OP_MASK, X86ISD::VREDUCE, X86ISD::VREDUCE_RND),
-  X86_INTRINSIC_DATA(avx512_mask_reduce_sd, INTR_TYPE_SCALAR_MASK, X86ISD::VREDUCES, X86ISD::VREDUCES_RND),
-  X86_INTRINSIC_DATA(avx512_mask_reduce_ss, INTR_TYPE_SCALAR_MASK, X86ISD::VREDUCES, X86ISD::VREDUCES_RND),
-  X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_128, INTR_TYPE_2OP_MASK, X86ISD::VRNDSCALE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_256, INTR_TYPE_2OP_MASK, X86ISD::VRNDSCALE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_512, INTR_TYPE_2OP_MASK, X86ISD::VRNDSCALE, X86ISD::VRNDSCALE_RND),
-  X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_128, INTR_TYPE_2OP_MASK, X86ISD::VRNDSCALE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_256, INTR_TYPE_2OP_MASK, X86ISD::VRNDSCALE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_512, INTR_TYPE_2OP_MASK, X86ISD::VRNDSCALE, X86ISD::VRNDSCALE_RND),
+  X86_INTRINSIC_DATA(avx512_mask_range_pd_128, INTR_TYPE_3OP_MASK_SAE, X86ISD::VRANGE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_range_pd_256, INTR_TYPE_3OP_MASK_SAE, X86ISD::VRANGE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_range_pd_512, INTR_TYPE_3OP_MASK_SAE, X86ISD::VRANGE, X86ISD::VRANGE_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_range_ps_128, INTR_TYPE_3OP_MASK_SAE, X86ISD::VRANGE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_range_ps_256, INTR_TYPE_3OP_MASK_SAE, X86ISD::VRANGE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_range_ps_512, INTR_TYPE_3OP_MASK_SAE, X86ISD::VRANGE, X86ISD::VRANGE_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_range_sd, INTR_TYPE_SCALAR_MASK, X86ISD::VRANGES, X86ISD::VRANGES_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_range_ss, INTR_TYPE_SCALAR_MASK, X86ISD::VRANGES, X86ISD::VRANGES_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_pd_128, INTR_TYPE_2OP_MASK_SAE, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_pd_256, INTR_TYPE_2OP_MASK_SAE, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_pd_512, INTR_TYPE_2OP_MASK_SAE, X86ISD::VREDUCE, X86ISD::VREDUCE_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_ps_128, INTR_TYPE_2OP_MASK_SAE, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_ps_256, INTR_TYPE_2OP_MASK_SAE, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_ps_512, INTR_TYPE_2OP_MASK_SAE, X86ISD::VREDUCE, X86ISD::VREDUCE_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_sd, INTR_TYPE_SCALAR_MASK, X86ISD::VREDUCES, X86ISD::VREDUCES_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_ss, INTR_TYPE_SCALAR_MASK, X86ISD::VREDUCES, X86ISD::VREDUCES_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_128, INTR_TYPE_2OP_MASK_SAE, X86ISD::VRNDSCALE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_256, INTR_TYPE_2OP_MASK_SAE, X86ISD::VRNDSCALE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_512, INTR_TYPE_2OP_MASK_SAE, X86ISD::VRNDSCALE, X86ISD::VRNDSCALE_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_128, INTR_TYPE_2OP_MASK_SAE, X86ISD::VRNDSCALE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_256, INTR_TYPE_2OP_MASK_SAE, X86ISD::VRNDSCALE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_512, INTR_TYPE_2OP_MASK_SAE, X86ISD::VRNDSCALE, X86ISD::VRNDSCALE_SAE),
   X86_INTRINSIC_DATA(avx512_mask_rndscale_sd,   INTR_TYPE_SCALAR_MASK,
-                     X86ISD::VRNDSCALES, X86ISD::VRNDSCALES_RND),
+                     X86ISD::VRNDSCALES, X86ISD::VRNDSCALES_SAE),
   X86_INTRINSIC_DATA(avx512_mask_rndscale_ss,   INTR_TYPE_SCALAR_MASK,
-                     X86ISD::VRNDSCALES, X86ISD::VRNDSCALES_RND),
-  X86_INTRINSIC_DATA(avx512_mask_scalef_pd_128, INTR_TYPE_2OP_MASK_RM,
+                     X86ISD::VRNDSCALES, X86ISD::VRNDSCALES_SAE),
+  X86_INTRINSIC_DATA(avx512_mask_scalef_pd_128, INTR_TYPE_2OP_MASK,
                      X86ISD::SCALEF, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scalef_pd_256, INTR_TYPE_2OP_MASK_RM,
+  X86_INTRINSIC_DATA(avx512_mask_scalef_pd_256, INTR_TYPE_2OP_MASK,
                      X86ISD::SCALEF, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scalef_pd_512, INTR_TYPE_2OP_MASK_RM,
+  X86_INTRINSIC_DATA(avx512_mask_scalef_pd_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::SCALEF, X86ISD::SCALEF_RND),
+  X86_INTRINSIC_DATA(avx512_mask_scalef_ps_128, INTR_TYPE_2OP_MASK,
                      X86ISD::SCALEF, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scalef_ps_128, INTR_TYPE_2OP_MASK_RM,
+  X86_INTRINSIC_DATA(avx512_mask_scalef_ps_256, INTR_TYPE_2OP_MASK,
                      X86ISD::SCALEF, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scalef_ps_256, INTR_TYPE_2OP_MASK_RM,
-                     X86ISD::SCALEF, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM,
-                     X86ISD::SCALEF, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scalef_sd, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::SCALEFS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::SCALEFS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_sqrt_sd, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::FSQRTS_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_sqrt_ss, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::FSQRTS_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_sub_sd_round, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::FSUBS_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::FSUBS_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::SCALEF, X86ISD::SCALEF_RND),
+  X86_INTRINSIC_DATA(avx512_mask_scalef_sd, INTR_TYPE_SCALAR_MASK,
+                     X86ISD::SCALEFS, X86ISD::SCALEFS_RND),
+  X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK,
+                     X86ISD::SCALEFS, X86ISD::SCALEFS_RND),
+  X86_INTRINSIC_DATA(avx512_mask_sqrt_sd, INTR_TYPE_SCALAR_MASK,
+                     X86ISD::FSQRTS, X86ISD::FSQRTS_RND),
+  X86_INTRINSIC_DATA(avx512_mask_sqrt_ss, INTR_TYPE_SCALAR_MASK,
+                     X86ISD::FSQRTS, X86ISD::FSQRTS_RND),
+  X86_INTRINSIC_DATA(avx512_mask_sub_sd_round, INTR_TYPE_SCALAR_MASK,
+                     X86ISD::FSUBS, X86ISD::FSUBS_RND),
+  X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK,
+                     X86ISD::FSUBS, X86ISD::FSUBS_RND),
   X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTPH2PS, 0),
   X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTPH2PS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::CVTPH2PS, X86ISD::CVTPH2PS_RND),
+  X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK_SAE,
+                     X86ISD::CVTPH2PS, X86ISD::CVTPH2PS_SAE),
   X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, CVTPS2PH_MASK,
                      X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
   X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, CVTPS2PH_MASK,
@@ -893,28 +800,30 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_256, FIXUPIMM_MASKZ,
                      X86ISD::VFIXUPIMM, 0),
   X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_512, FIXUPIMM_MASKZ,
-                     X86ISD::VFIXUPIMM, 0),
+                     X86ISD::VFIXUPIMM, X86ISD::VFIXUPIMM_SAE),
   X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ps_128, FIXUPIMM_MASKZ,
                      X86ISD::VFIXUPIMM, 0),
   X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ps_256, FIXUPIMM_MASKZ,
                      X86ISD::VFIXUPIMM, 0),
   X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ps_512, FIXUPIMM_MASKZ,
-                     X86ISD::VFIXUPIMM, 0),
-  X86_INTRINSIC_DATA(avx512_maskz_fixupimm_sd, FIXUPIMMS_MASKZ,
-                     X86ISD::VFIXUPIMMS, 0),
-  X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ss, FIXUPIMMS_MASKZ,
-                     X86ISD::VFIXUPIMMS, 0),
+                     X86ISD::VFIXUPIMM, X86ISD::VFIXUPIMM_SAE),
+  X86_INTRINSIC_DATA(avx512_maskz_fixupimm_sd, FIXUPIMM_MASKZ,
+                     X86ISD::VFIXUPIMMS, X86ISD::VFIXUPIMMS_SAE),
+  X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ss, FIXUPIMM_MASKZ,
+                     X86ISD::VFIXUPIMMS, X86ISD::VFIXUPIMMS_SAE),
 
-  X86_INTRINSIC_DATA(avx512_max_pd_512, INTR_TYPE_2OP, X86ISD::FMAX, X86ISD::FMAX_RND),
-  X86_INTRINSIC_DATA(avx512_max_ps_512, INTR_TYPE_2OP, X86ISD::FMAX, X86ISD::FMAX_RND),
-  X86_INTRINSIC_DATA(avx512_min_pd_512, INTR_TYPE_2OP, X86ISD::FMIN, X86ISD::FMIN_RND),
-  X86_INTRINSIC_DATA(avx512_min_ps_512, INTR_TYPE_2OP, X86ISD::FMIN, X86ISD::FMIN_RND),
+  X86_INTRINSIC_DATA(avx512_max_pd_512, INTR_TYPE_2OP_SAE, X86ISD::FMAX, X86ISD::FMAX_SAE),
+  X86_INTRINSIC_DATA(avx512_max_ps_512, INTR_TYPE_2OP_SAE, X86ISD::FMAX, X86ISD::FMAX_SAE),
+  X86_INTRINSIC_DATA(avx512_min_pd_512, INTR_TYPE_2OP_SAE, X86ISD::FMIN, X86ISD::FMIN_SAE),
+  X86_INTRINSIC_DATA(avx512_min_ps_512, INTR_TYPE_2OP_SAE, X86ISD::FMIN, X86ISD::FMIN_SAE),
   X86_INTRINSIC_DATA(avx512_mul_pd_512, INTR_TYPE_2OP, ISD::FMUL, X86ISD::FMUL_RND),
   X86_INTRINSIC_DATA(avx512_mul_ps_512, INTR_TYPE_2OP, ISD::FMUL, X86ISD::FMUL_RND),
   X86_INTRINSIC_DATA(avx512_packssdw_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
   X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+  X86_INTRINSIC_DATA(avx512_pavg_b_512, INTR_TYPE_2OP, X86ISD::AVG, 0),
+  X86_INTRINSIC_DATA(avx512_pavg_w_512, INTR_TYPE_2OP, X86ISD::AVG, 0),
   X86_INTRINSIC_DATA(avx512_permvar_df_256, VPERM_2OP, X86ISD::VPERMV, 0),
   X86_INTRINSIC_DATA(avx512_permvar_df_512, VPERM_2OP, X86ISD::VPERMV, 0),
   X86_INTRINSIC_DATA(avx512_permvar_di_256, VPERM_2OP, X86ISD::VPERMV, 0),
@@ -943,11 +852,11 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_pslli_d_512, VSHIFT, X86ISD::VSHLI, 0),
   X86_INTRINSIC_DATA(avx512_pslli_q_512, VSHIFT, X86ISD::VSHLI, 0),
   X86_INTRINSIC_DATA(avx512_pslli_w_512, VSHIFT, X86ISD::VSHLI, 0),
-  X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, ISD::SHL, 0),
+  X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+  X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+  X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+  X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+  X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
   X86_INTRINSIC_DATA(avx512_psra_d_512, INTR_TYPE_2OP, X86ISD::VSRA, 0),
   X86_INTRINSIC_DATA(avx512_psra_q_128, INTR_TYPE_2OP, X86ISD::VSRA, 0),
   X86_INTRINSIC_DATA(avx512_psra_q_256, INTR_TYPE_2OP, X86ISD::VSRA, 0),
@@ -971,11 +880,11 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_psrli_d_512, VSHIFT, X86ISD::VSRLI, 0),
   X86_INTRINSIC_DATA(avx512_psrli_q_512, VSHIFT, X86ISD::VSRLI, 0),
   X86_INTRINSIC_DATA(avx512_psrli_w_512, VSHIFT, X86ISD::VSRLI, 0),
-  X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0),
+  X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+  X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+  X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+  X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+  X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
   X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
   X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
   X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
@@ -990,10 +899,10 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
   X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0),
   X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0),
-  X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0),
-  X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0),
-  X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28S, 0),
-  X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28S, 0),
+  X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_SAE, X86ISD::RCP28, X86ISD::RCP28_SAE),
+  X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_SAE, X86ISD::RCP28, X86ISD::RCP28_SAE),
+  X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_SAE, X86ISD::RCP28S, X86ISD::RCP28S_SAE),
+  X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_SAE, X86ISD::RCP28S, X86ISD::RCP28S_SAE),
   X86_INTRINSIC_DATA(avx512_rsqrt14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
@@ -1002,14 +911,16 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0),
-  X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
-  X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
-  X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0),
-  X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0),
+  X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_SAE,X86ISD::RSQRT28, X86ISD::RSQRT28_SAE),
+  X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_SAE,X86ISD::RSQRT28, X86ISD::RSQRT28_SAE),
+  X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_SAE,X86ISD::RSQRT28S, X86ISD::RSQRT28S_SAE),
+  X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_SAE,X86ISD::RSQRT28S, X86ISD::RSQRT28S_SAE),
+  X86_INTRINSIC_DATA(avx512_sitofp_round, INTR_TYPE_1OP, ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND),
   X86_INTRINSIC_DATA(avx512_sqrt_pd_512, INTR_TYPE_1OP, ISD::FSQRT, X86ISD::FSQRT_RND),
   X86_INTRINSIC_DATA(avx512_sqrt_ps_512, INTR_TYPE_1OP, ISD::FSQRT, X86ISD::FSQRT_RND),
   X86_INTRINSIC_DATA(avx512_sub_pd_512, INTR_TYPE_2OP, ISD::FSUB, X86ISD::FSUB_RND),
   X86_INTRINSIC_DATA(avx512_sub_ps_512, INTR_TYPE_2OP, ISD::FSUB, X86ISD::FSUB_RND),
+  X86_INTRINSIC_DATA(avx512_uitofp_round, INTR_TYPE_1OP, ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND),
   X86_INTRINSIC_DATA(avx512_vcomi_sd, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
   X86_INTRINSIC_DATA(avx512_vcomi_ss, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
   X86_INTRINSIC_DATA(avx512_vcvtsd2si32, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
@@ -1071,6 +982,16 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_vpshufbitqmb_128, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
   X86_INTRINSIC_DATA(avx512_vpshufbitqmb_256, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
   X86_INTRINSIC_DATA(avx512_vpshufbitqmb_512, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
+  // bfloat16
+  X86_INTRINSIC_DATA(avx512bf16_cvtne2ps2bf16_128, INTR_TYPE_2OP, X86ISD::CVTNE2PS2BF16, 0),
+  X86_INTRINSIC_DATA(avx512bf16_cvtne2ps2bf16_256, INTR_TYPE_2OP, X86ISD::CVTNE2PS2BF16, 0),
+  X86_INTRINSIC_DATA(avx512bf16_cvtne2ps2bf16_512, INTR_TYPE_2OP, X86ISD::CVTNE2PS2BF16, 0),
+  X86_INTRINSIC_DATA(avx512bf16_cvtneps2bf16_256, INTR_TYPE_1OP, X86ISD::CVTNEPS2BF16, 0),
+  X86_INTRINSIC_DATA(avx512bf16_cvtneps2bf16_512, INTR_TYPE_1OP, X86ISD::CVTNEPS2BF16, 0),
+  X86_INTRINSIC_DATA(avx512bf16_dpbf16ps_128, INTR_TYPE_3OP, X86ISD::DPBF16PS, 0),
+  X86_INTRINSIC_DATA(avx512bf16_dpbf16ps_256, INTR_TYPE_3OP, X86ISD::DPBF16PS, 0),
+  X86_INTRINSIC_DATA(avx512bf16_dpbf16ps_512, INTR_TYPE_3OP, X86ISD::DPBF16PS, 0),
+  X86_INTRINSIC_DATA(avx512bf16_mask_cvtneps2bf16_128, CVTNEPS2BF16_MASK, X86ISD::CVTNEPS2BF16, X86ISD::MCVTNEPS2BF16),
   X86_INTRINSIC_DATA(bmi_bextr_32,         INTR_TYPE_2OP, X86ISD::BEXTR, 0),
   X86_INTRINSIC_DATA(bmi_bextr_64,         INTR_TYPE_2OP, X86ISD::BEXTR, 0),
   X86_INTRINSIC_DATA(bmi_bzhi_32,          INTR_TYPE_2OP, X86ISD::BZHI, 0),
@@ -1111,6 +1032,7 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(sse2_cvtps2dq,     INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
   X86_INTRINSIC_DATA(sse2_cvtsd2si,     INTR_TYPE_1OP, X86ISD::CVTS2SI, 0),
   X86_INTRINSIC_DATA(sse2_cvtsd2si64,   INTR_TYPE_1OP, X86ISD::CVTS2SI, 0),
+  X86_INTRINSIC_DATA(sse2_cvtsd2ss,     INTR_TYPE_2OP, X86ISD::VFPROUNDS, 0),
   X86_INTRINSIC_DATA(sse2_cvttpd2dq,    INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
   X86_INTRINSIC_DATA(sse2_cvttps2dq,    INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
   X86_INTRINSIC_DATA(sse2_cvttsd2si,    INTR_TYPE_1OP, X86ISD::CVTTS2SI, 0),
@@ -1123,6 +1045,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+  X86_INTRINSIC_DATA(sse2_pavg_b,       INTR_TYPE_2OP, X86ISD::AVG, 0),
+  X86_INTRINSIC_DATA(sse2_pavg_w,       INTR_TYPE_2OP, X86ISD::AVG, 0),
   X86_INTRINSIC_DATA(sse2_pmadd_wd,     INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
   X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
   X86_INTRINSIC_DATA(sse2_pmulh_w,      INTR_TYPE_2OP, ISD::MULHS, 0),
@@ -1156,8 +1080,11 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(sse3_hadd_ps,      INTR_TYPE_2OP, X86ISD::FHADD, 0),
   X86_INTRINSIC_DATA(sse3_hsub_pd,      INTR_TYPE_2OP, X86ISD::FHSUB, 0),
   X86_INTRINSIC_DATA(sse3_hsub_ps,      INTR_TYPE_2OP, X86ISD::FHSUB, 0),
+  X86_INTRINSIC_DATA(sse41_blendvpd,    BLENDV, X86ISD::BLENDV, 0),
+  X86_INTRINSIC_DATA(sse41_blendvps,    BLENDV, X86ISD::BLENDV, 0),
   X86_INTRINSIC_DATA(sse41_insertps,    INTR_TYPE_3OP, X86ISD::INSERTPS, 0),
   X86_INTRINSIC_DATA(sse41_packusdw,    INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+  X86_INTRINSIC_DATA(sse41_pblendvb,    BLENDV, X86ISD::BLENDV, 0),
   X86_INTRINSIC_DATA(sse41_phminposuw,  INTR_TYPE_1OP, X86ISD::PHMINPOS, 0),
   X86_INTRINSIC_DATA(sse41_round_pd,    ROUNDP, X86ISD::VRNDSCALE, 0),
   X86_INTRINSIC_DATA(sse41_round_ps,    ROUNDP, X86ISD::VRNDSCALE, 0),
@@ -1200,14 +1127,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(vgf2p8mulb_512, INTR_TYPE_2OP,
                      X86ISD::GF2P8MULB, 0),
 
-  X86_INTRINSIC_DATA(xop_vpcomb,        INTR_TYPE_3OP, X86ISD::VPCOM, 0),
-  X86_INTRINSIC_DATA(xop_vpcomd,        INTR_TYPE_3OP, X86ISD::VPCOM, 0),
-  X86_INTRINSIC_DATA(xop_vpcomq,        INTR_TYPE_3OP, X86ISD::VPCOM, 0),
-  X86_INTRINSIC_DATA(xop_vpcomub,       INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
-  X86_INTRINSIC_DATA(xop_vpcomud,       INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
-  X86_INTRINSIC_DATA(xop_vpcomuq,       INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
-  X86_INTRINSIC_DATA(xop_vpcomuw,       INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
-  X86_INTRINSIC_DATA(xop_vpcomw,        INTR_TYPE_3OP, X86ISD::VPCOM, 0),
   X86_INTRINSIC_DATA(xop_vpermil2pd,     INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
   X86_INTRINSIC_DATA(xop_vpermil2pd_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
   X86_INTRINSIC_DATA(xop_vpermil2ps,     INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp
index 4a49fa68dd06..00fb1b573858 100644
--- a/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/lib/Target/X86/X86LegalizerInfo.cpp
@@ -1,9 +1,8 @@
 //===- X86LegalizerInfo.cpp --------------------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -134,9 +133,15 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
 
     // Shifts and SDIV
     getActionDefinitionsBuilder(
-        {G_SHL, G_LSHR, G_ASHR, G_SDIV, G_SREM, G_UDIV, G_UREM})
-        .legalFor({s8, s16, s32})
-        .clampScalar(0, s8, s32);
+        {G_SDIV, G_SREM, G_UDIV, G_UREM})
+      .legalFor({s8, s16, s32})
+      .clampScalar(0, s8, s32);
+
+    getActionDefinitionsBuilder(
+        {G_SHL, G_LSHR, G_ASHR})
+      .legalFor({{s8, s8}, {s16, s8}, {s32, s8}})
+      .clampScalar(0, s8, s32)
+      .clampScalar(1, s8, s8);
   }
 
   // Control-flow
@@ -236,12 +241,19 @@ void X86LegalizerInfo::setLegalizerInfo64bit() {
       .clampScalar(1, s32, s64)
       .widenScalarToNextPow2(1);
 
-  // Shifts and SDIV
+  // Divisions
   getActionDefinitionsBuilder(
-      {G_SHL, G_LSHR, G_ASHR, G_SDIV, G_SREM, G_UDIV, G_UREM})
+      {G_SDIV, G_SREM, G_UDIV, G_UREM})
       .legalFor({s8, s16, s32, s64})
       .clampScalar(0, s8, s64);
 
+  // Shifts
+  getActionDefinitionsBuilder(
+    {G_SHL, G_LSHR, G_ASHR})
+    .legalFor({{s8, s8}, {s16, s8}, {s32, s8}, {s64, s8}})
+    .clampScalar(0, s8, s64)
+    .clampScalar(1, s8, s8);
+
   // Merge/Unmerge
   setAction({G_MERGE_VALUES, s128}, Legal);
   setAction({G_UNMERGE_VALUES, 1, s128}, Legal);
diff --git a/lib/Target/X86/X86LegalizerInfo.h b/lib/Target/X86/X86LegalizerInfo.h
index 135950a95f84..d21707b9ab9b 100644
--- a/lib/Target/X86/X86LegalizerInfo.h
+++ b/lib/Target/X86/X86LegalizerInfo.h
@@ -1,10 +1,9 @@
 //===- X86LegalizerInfo.h ------------------------------------------*- C++
 //-*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 2816f8c62bfb..b1fefaa84be4 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -1,9 +1,8 @@
 //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstPrinter/X86ATTInstPrinter.h"
-#include "InstPrinter/X86InstComments.h"
+#include "MCTargetDesc/X86ATTInstPrinter.h"
 #include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86InstComments.h"
 #include "MCTargetDesc/X86TargetStreamer.h"
 #include "Utils/X86ShuffleDecode.h"
 #include "X86AsmPrinter.h"
@@ -101,9 +100,7 @@ void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
 }
 
 void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
-  OutStreamer->EmitInstruction(Inst, getSubtargetInfo(),
-                               EnablePrintSchedInfo &&
-                                   !(Inst.getFlags() & X86::NO_SCHED_INFO));
+  OutStreamer->EmitInstruction(Inst, getSubtargetInfo());
   SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
 }
 
@@ -438,7 +435,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
       OutMI.addOperand(MaybeMCOp.getValue());
 
   // Handle a few special cases to eliminate operand modifiers.
-ReSimplify:
   switch (OutMI.getOpcode()) {
   case X86::LEA64_32r:
   case X86::LEA64r:
@@ -554,11 +550,6 @@ ReSimplify:
     case X86::TAILJMPd64:
       Opcode = X86::JMP_1;
       goto SetTailJmpOpcode;
-    case X86::TAILJMPd_CC:
-    case X86::TAILJMPd64_CC:
-      Opcode = X86::GetCondBranchFromCond(
-          static_cast<X86::CondCode>(MI->getOperand(1).getImm()));
-      goto SetTailJmpOpcode;
 
     SetTailJmpOpcode:
       MCOperand Saved = OutMI.getOperand(0);
@@ -568,6 +559,17 @@ ReSimplify:
       break;
     }
 
+  case X86::TAILJMPd_CC:
+  case X86::TAILJMPd64_CC: {
+    MCOperand Saved = OutMI.getOperand(0);
+    MCOperand Saved2 = OutMI.getOperand(1);
+    OutMI = MCInst();
+    OutMI.setOpcode(X86::JCC_1);
+    OutMI.addOperand(Saved);
+    OutMI.addOperand(Saved2);
+    break;
+  }
+
   case X86::DEC16r:
   case X86::DEC32r:
   case X86::INC16r:
@@ -586,19 +588,6 @@ ReSimplify:
     }
     break;
 
-  // These are pseudo-ops for OR to help with the OR->ADD transformation.  We do
-  // this with an ugly goto in case the resultant OR uses EAX and needs the
-  // short form.
-  case X86::ADD16rr_DB:   OutMI.setOpcode(X86::OR16rr);   goto ReSimplify;
-  case X86::ADD32rr_DB:   OutMI.setOpcode(X86::OR32rr);   goto ReSimplify;
-  case X86::ADD64rr_DB:   OutMI.setOpcode(X86::OR64rr);   goto ReSimplify;
-  case X86::ADD16ri_DB:   OutMI.setOpcode(X86::OR16ri);   goto ReSimplify;
-  case X86::ADD32ri_DB:   OutMI.setOpcode(X86::OR32ri);   goto ReSimplify;
-  case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify;
-  case X86::ADD16ri8_DB:  OutMI.setOpcode(X86::OR16ri8);  goto ReSimplify;
-  case X86::ADD32ri8_DB:  OutMI.setOpcode(X86::OR32ri8);  goto ReSimplify;
-  case X86::ADD64ri8_DB:  OutMI.setOpcode(X86::OR64ri8);  goto ReSimplify;
-
   // We don't currently select the correct instruction form for instructions
   // which have a short %eax, etc. form. Handle this by custom lowering, for
   // now.
@@ -694,16 +683,9 @@ ReSimplify:
 
 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
                                  const MachineInstr &MI) {
-
-  bool is64Bits = MI.getOpcode() == X86::TLS_addr64 ||
+  bool Is64Bits = MI.getOpcode() == X86::TLS_addr64 ||
                   MI.getOpcode() == X86::TLS_base_addr64;
-
-  bool needsPadding = MI.getOpcode() == X86::TLS_addr64;
-
-  MCContext &context = OutStreamer->getContext();
-
-  if (needsPadding)
-    EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+  MCContext &Ctx = OutStreamer->getContext();
 
   MCSymbolRefExpr::VariantKind SRVK;
   switch (MI.getOpcode()) {
@@ -721,51 +703,86 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
     llvm_unreachable("unexpected opcode");
   }
 
-  MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
-  const MCSymbolRefExpr *symRef = MCSymbolRefExpr::create(sym, SRVK, context);
-
-  MCInst LEA;
-  if (is64Bits) {
-    LEA.setOpcode(X86::LEA64r);
-    LEA.addOperand(MCOperand::createReg(X86::RDI)); // dest
-    LEA.addOperand(MCOperand::createReg(X86::RIP)); // base
-    LEA.addOperand(MCOperand::createImm(1));        // scale
-    LEA.addOperand(MCOperand::createReg(0));        // index
-    LEA.addOperand(MCOperand::createExpr(symRef));  // disp
-    LEA.addOperand(MCOperand::createReg(0));        // seg
-  } else if (SRVK == MCSymbolRefExpr::VK_TLSLDM) {
-    LEA.setOpcode(X86::LEA32r);
-    LEA.addOperand(MCOperand::createReg(X86::EAX)); // dest
-    LEA.addOperand(MCOperand::createReg(X86::EBX)); // base
-    LEA.addOperand(MCOperand::createImm(1));        // scale
-    LEA.addOperand(MCOperand::createReg(0));        // index
-    LEA.addOperand(MCOperand::createExpr(symRef));  // disp
-    LEA.addOperand(MCOperand::createReg(0));        // seg
+  const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create(
+      MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), SRVK, Ctx);
+
+  // As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD
+  // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
+  // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
+  // only using GOT when GOTPCRELX is enabled.
+  // TODO Delete the workaround when GOTPCRELX becomes commonplace.
+  bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
+                Ctx.getAsmInfo()->canRelaxRelocations();
+
+  if (Is64Bits) {
+    bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
+    if (NeedsPadding)
+      EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+    EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
+                                .addReg(X86::RDI)
+                                .addReg(X86::RIP)
+                                .addImm(1)
+                                .addReg(0)
+                                .addExpr(Sym)
+                                .addReg(0));
+    const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr");
+    if (NeedsPadding) {
+      if (!UseGot)
+        EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+      EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+      EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
+    }
+    if (UseGot) {
+      const MCExpr *Expr = MCSymbolRefExpr::create(
+          TlsGetAddr, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
+      EmitAndCountInstruction(MCInstBuilder(X86::CALL64m)
+                                  .addReg(X86::RIP)
+                                  .addImm(1)
+                                  .addReg(0)
+                                  .addExpr(Expr)
+                                  .addReg(0));
+    } else {
+      EmitAndCountInstruction(
+          MCInstBuilder(X86::CALL64pcrel32)
+              .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
+                                               MCSymbolRefExpr::VK_PLT, Ctx)));
+    }
   } else {
-    LEA.setOpcode(X86::LEA32r);
-    LEA.addOperand(MCOperand::createReg(X86::EAX)); // dest
-    LEA.addOperand(MCOperand::createReg(0));        // base
-    LEA.addOperand(MCOperand::createImm(1));        // scale
-    LEA.addOperand(MCOperand::createReg(X86::EBX)); // index
-    LEA.addOperand(MCOperand::createExpr(symRef));  // disp
-    LEA.addOperand(MCOperand::createReg(0));        // seg
-  }
-  EmitAndCountInstruction(LEA);
+    if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) {
+      EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
+                                  .addReg(X86::EAX)
+                                  .addReg(0)
+                                  .addImm(1)
+                                  .addReg(X86::EBX)
+                                  .addExpr(Sym)
+                                  .addReg(0));
+    } else {
+      EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
+                                  .addReg(X86::EAX)
+                                  .addReg(X86::EBX)
+                                  .addImm(1)
+                                  .addReg(0)
+                                  .addExpr(Sym)
+                                  .addReg(0));
+    }
 
-  if (needsPadding) {
-    EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
-    EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
-    EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
+    const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr");
+    if (UseGot) {
+      const MCExpr *Expr =
+          MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_GOT, Ctx);
+      EmitAndCountInstruction(MCInstBuilder(X86::CALL32m)
+                                  .addReg(X86::EBX)
+                                  .addImm(1)
+                                  .addReg(0)
+                                  .addExpr(Expr)
+                                  .addReg(0));
+    } else {
+      EmitAndCountInstruction(
+          MCInstBuilder(X86::CALLpcrel32)
+              .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
+                                               MCSymbolRefExpr::VK_PLT, Ctx)));
+    }
   }
-
-  StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
-  MCSymbol *tlsGetAddr = context.getOrCreateSymbol(name);
-  const MCSymbolRefExpr *tlsRef =
-      MCSymbolRefExpr::create(tlsGetAddr, MCSymbolRefExpr::VK_PLT, context);
-
-  EmitAndCountInstruction(
-      MCInstBuilder(is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
-          .addExpr(tlsRef));
 }
 
 /// Emit the largest nop instruction smaller than or equal to \p NumBytes
@@ -778,7 +795,7 @@ static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
 
   unsigned NopSize;
   unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
-  Opc = IndexReg = Displacement = SegmentReg = 0;
+  IndexReg = Displacement = SegmentReg = 0;
   BaseReg = X86::RAX;
   ScaleVal = 1;
   switch (NumBytes) {
@@ -963,6 +980,7 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
     if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, *I))
       MI.addOperand(MaybeOperand.getValue());
 
+  OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
   OutStreamer->EmitInstruction(MI, getSubtargetInfo());
 }
 
@@ -1374,7 +1392,8 @@ PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
     MBB = MBB->getPrevNode();
     MBBI = MBB->end();
   }
-  return --MBBI;
+  --MBBI;
+  return MBBI;
 }
 
 static const Constant *getConstantFromPool(const MachineInstr &MI,
@@ -1668,6 +1687,77 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case X86::TLS_base_addr64:
     return LowerTlsAddr(MCInstLowering, *MI);
 
+  // Loading/storing mask pairs requires two kmov operations. The second one of these
+  // needs a 2 byte displacement relative to the specified address (with 32 bit spill
+  // size). The pairs of 1bit masks up to 16 bit masks all use the same spill size,
+  // they all are stored using MASKPAIR16STORE, loaded using MASKPAIR16LOAD.
+  //
+  // The displacement value might wrap around in theory, thus the asserts in both
+  // cases.
+  case X86::MASKPAIR16LOAD: {
+    int64_t Disp = MI->getOperand(1 + X86::AddrDisp).getImm();
+    assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
+    const X86RegisterInfo *RI =
+      MF->getSubtarget<X86Subtarget>().getRegisterInfo();
+    unsigned Reg = MI->getOperand(0).getReg();
+    unsigned Reg0 = RI->getSubReg(Reg, X86::sub_mask_0);
+    unsigned Reg1 = RI->getSubReg(Reg, X86::sub_mask_1);
+
+    // Load the first mask register
+    MCInstBuilder MIB = MCInstBuilder(X86::KMOVWkm);
+    MIB.addReg(Reg0);
+    for (int i = 0; i < X86::AddrNumOperands; ++i) {
+      auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i));
+      MIB.addOperand(Op.getValue());
+    }
+    EmitAndCountInstruction(MIB);
+
+    // Load the second mask register of the pair
+    MIB = MCInstBuilder(X86::KMOVWkm);
+    MIB.addReg(Reg1);
+    for (int i = 0; i < X86::AddrNumOperands; ++i) {
+      if (i == X86::AddrDisp) {
+        MIB.addImm(Disp + 2);
+      } else {
+        auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i));
+        MIB.addOperand(Op.getValue());
+      }
+    }
+    EmitAndCountInstruction(MIB);
+    return;
+  }
+
+  case X86::MASKPAIR16STORE: {
+    int64_t Disp = MI->getOperand(X86::AddrDisp).getImm();
+    assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
+    const X86RegisterInfo *RI =
+      MF->getSubtarget<X86Subtarget>().getRegisterInfo();
+    unsigned Reg = MI->getOperand(X86::AddrNumOperands).getReg();
+    unsigned Reg0 = RI->getSubReg(Reg, X86::sub_mask_0);
+    unsigned Reg1 = RI->getSubReg(Reg, X86::sub_mask_1);
+
+    // Store the first mask register
+    MCInstBuilder MIB = MCInstBuilder(X86::KMOVWmk);
+    for (int i = 0; i < X86::AddrNumOperands; ++i)
+      MIB.addOperand(MCInstLowering.LowerMachineOperand(MI, MI->getOperand(i)).getValue());
+    MIB.addReg(Reg0);
+    EmitAndCountInstruction(MIB);
+
+    // Store the second mask register of the pair
+    MIB = MCInstBuilder(X86::KMOVWmk);
+    for (int i = 0; i < X86::AddrNumOperands; ++i) {
+      if (i == X86::AddrDisp) {
+        MIB.addImm(Disp + 2);
+      } else {
+        auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(0 + i));
+        MIB.addOperand(Op.getValue());
+      }
+    }
+    MIB.addReg(Reg1);
+    EmitAndCountInstruction(MIB);
+    return;
+  }
+
   case X86::MOVPC32r: {
     // This is a pseudo op for a two instruction sequence with a label, which
     // looks like:
@@ -1861,8 +1951,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
       SmallVector<int, 64> Mask;
       DecodePSHUFBMask(C, Width, Mask);
       if (!Mask.empty())
-        OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
-                                !EnablePrintSchedInfo);
+        OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
     }
     break;
   }
@@ -1934,8 +2023,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
       SmallVector<int, 16> Mask;
       DecodeVPERMILPMask(C, ElSize, Width, Mask);
       if (!Mask.empty())
-        OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
-                                !EnablePrintSchedInfo);
+        OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
     }
     break;
   }
@@ -1966,8 +2054,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
       SmallVector<int, 16> Mask;
       DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
       if (!Mask.empty())
-        OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
-                                !EnablePrintSchedInfo);
+        OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
     }
     break;
   }
@@ -1984,8 +2071,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
       SmallVector<int, 16> Mask;
       DecodeVPPERMMask(C, Width, Mask);
       if (!Mask.empty())
-        OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
-                                !EnablePrintSchedInfo);
+        OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
     }
     break;
   }
@@ -2002,7 +2088,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
       if (auto *CF = dyn_cast<ConstantFP>(C)) {
         CS << "0x" << CF->getValueAPF().bitcastToAPInt().toString(16, false);
-        OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
+        OutStreamer->AddComment(CS.str());
       }
     }
     break;
@@ -2099,7 +2185,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
           }
         }
         CS << "]";
-        OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
+        OutStreamer->AddComment(CS.str());
       } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
         CS << "<";
         for (int l = 0; l != NumLanes; ++l) {
@@ -2111,7 +2197,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
           }
         }
         CS << ">";
-        OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
+        OutStreamer->AddComment(CS.str());
       }
     }
     break;
@@ -2198,14 +2284,12 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
         printConstant(C, CS);
       }
       CS << "]";
-      OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
+      OutStreamer->AddComment(CS.str());
     }
   }
 
   MCInst TmpInst;
   MCInstLowering.Lower(MI, TmpInst);
-  if (MI->getAsmPrinterFlag(MachineInstr::NoSchedComment))
-    TmpInst.setFlags(TmpInst.getFlags() | X86::NO_SCHED_INFO);
 
   // Stackmap shadows cannot include branch targets, so we can count the bytes
   // in a call towards the shadow, but must ensure that the no thread returns
diff --git a/lib/Target/X86/X86MachineFunctionInfo.cpp b/lib/Target/X86/X86MachineFunctionInfo.cpp
index 5433033671f3..05f846bfb219 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.cpp
+++ b/lib/Target/X86/X86MachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
 //===-- X86MachineFunctionInfo.cpp - X86 machine function info ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index e1183bd14796..d7e535598d81 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -1,9 +1,8 @@
 //===-- X86MachineFunctionInfo.h - X86 machine function info ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/X86/X86MacroFusion.cpp b/lib/Target/X86/X86MacroFusion.cpp
index 5c09597d0442..c6da4b09dd60 100644
--- a/lib/Target/X86/X86MacroFusion.cpp
+++ b/lib/Target/X86/X86MacroFusion.cpp
@@ -1,9 +1,8 @@
 //===- X86MacroFusion.cpp - X86 Macro Fusion ------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,59 +18,29 @@
 
 using namespace llvm;
 
-/// Check if the instr pair, FirstMI and SecondMI, should be fused
-/// together. Given SecondMI, when FirstMI is unspecified, then check if
-/// SecondMI may be part of a fused pair at all.
-static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
-                                   const TargetSubtargetInfo &TSI,
-                                   const MachineInstr *FirstMI,
-                                   const MachineInstr &SecondMI) {
-  const X86Subtarget &ST = static_cast<const X86Subtarget&>(TSI);
-  // Check if this processor supports macro-fusion.
-  if (!ST.hasMacroFusion())
-    return false;
+namespace {
 
-  enum {
-    FuseTest,
-    FuseCmp,
-    FuseInc
-  } FuseKind;
+// The classification for the first instruction.
+enum class FirstInstrKind { Test, Cmp, And, ALU, IncDec, Invalid };
 
-  unsigned FirstOpcode = FirstMI
-                         ? FirstMI->getOpcode()
-                         : static_cast<unsigned>(X86::INSTRUCTION_LIST_END);
-  unsigned SecondOpcode = SecondMI.getOpcode();
+// The classification for the second instruction (jump).
+enum class JumpKind {
+  // JE, JL, JG and variants.
+  ELG,
+  // JA, JB and variants.
+  AB,
+  // JS, JP, JO and variants.
+  SPO,
+  // Not a fusable jump.
+  Invalid,
+};
 
-  switch (SecondOpcode) {
-  default:
-    return false;
-  case X86::JE_1:
-  case X86::JNE_1:
-  case X86::JL_1:
-  case X86::JLE_1:
-  case X86::JG_1:
-  case X86::JGE_1:
-    FuseKind = FuseInc;
-    break;
-  case X86::JB_1:
-  case X86::JBE_1:
-  case X86::JA_1:
-  case X86::JAE_1:
-    FuseKind = FuseCmp;
-    break;
-  case X86::JS_1:
-  case X86::JNS_1:
-  case X86::JP_1:
-  case X86::JNP_1:
-  case X86::JO_1:
-  case X86::JNO_1:
-    FuseKind = FuseTest;
-    break;
-  }
+} // namespace
 
-  switch (FirstOpcode) {
+static FirstInstrKind classifyFirst(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
   default:
-    return false;
+    return FirstInstrKind::Invalid;
   case X86::TEST8rr:
   case X86::TEST16rr:
   case X86::TEST32rr:
@@ -84,6 +53,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
   case X86::TEST16mr:
   case X86::TEST32mr:
   case X86::TEST64mr:
+    return FirstInstrKind::Test;
   case X86::AND16ri:
   case X86::AND16ri8:
   case X86::AND16rm:
@@ -99,7 +69,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
   case X86::AND8ri:
   case X86::AND8rm:
   case X86::AND8rr:
-    return true;
+    return FirstInstrKind::And;
   case X86::CMP16ri:
   case X86::CMP16ri8:
   case X86::CMP16rm:
@@ -119,6 +89,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
   case X86::CMP8rm:
   case X86::CMP8rr:
   case X86::CMP8mr:
+    return FirstInstrKind::Cmp;
   case X86::ADD16ri:
   case X86::ADD16ri8:
   case X86::ADD16ri8_DB:
@@ -141,8 +112,10 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
   case X86::ADD64rr:
   case X86::ADD64rr_DB:
   case X86::ADD8ri:
+  case X86::ADD8ri_DB:
   case X86::ADD8rm:
   case X86::ADD8rr:
+  case X86::ADD8rr_DB:
   case X86::SUB16ri:
   case X86::SUB16ri8:
   case X86::SUB16rm:
@@ -158,7 +131,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
   case X86::SUB8ri:
   case X86::SUB8rm:
   case X86::SUB8rr:
-    return FuseKind == FuseCmp || FuseKind == FuseInc;
+    return FirstInstrKind::ALU;
   case X86::INC16r:
   case X86::INC32r:
   case X86::INC64r:
@@ -167,10 +140,87 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
   case X86::DEC32r:
   case X86::DEC64r:
   case X86::DEC8r:
-    return FuseKind == FuseInc;
-  case X86::INSTRUCTION_LIST_END:
-    return true;
+    return FirstInstrKind::IncDec;
+  }
+}
+
+static JumpKind classifySecond(const MachineInstr &MI) {
+  X86::CondCode CC = X86::getCondFromBranch(MI);
+  if (CC == X86::COND_INVALID)
+    return JumpKind::Invalid;
+
+  switch (CC) {
+  default:
+    return JumpKind::Invalid;
+  case X86::COND_E:
+  case X86::COND_NE:
+  case X86::COND_L:
+  case X86::COND_LE:
+  case X86::COND_G:
+  case X86::COND_GE:
+    return JumpKind::ELG;
+  case X86::COND_B:
+  case X86::COND_BE:
+  case X86::COND_A:
+  case X86::COND_AE:
+    return JumpKind::AB;
+  case X86::COND_S:
+  case X86::COND_NS:
+  case X86::COND_P:
+  case X86::COND_NP:
+  case X86::COND_O:
+  case X86::COND_NO:
+    return JumpKind::SPO;
+  }
+}
+
+/// Check if the instr pair, FirstMI and SecondMI, should be fused
+/// together. Given SecondMI, when FirstMI is unspecified, then check if
+/// SecondMI may be part of a fused pair at all.
+static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
+                                   const TargetSubtargetInfo &TSI,
+                                   const MachineInstr *FirstMI,
+                                   const MachineInstr &SecondMI) {
+  const X86Subtarget &ST = static_cast<const X86Subtarget &>(TSI);
+
+  // Check if this processor supports any kind of fusion.
+  if (!(ST.hasBranchFusion() || ST.hasMacroFusion()))
+    return false;
+
+  const JumpKind BranchKind = classifySecond(SecondMI);
+
+  if (BranchKind == JumpKind::Invalid)
+    return false; // Second cannot be fused with anything.
+
+  if (FirstMI == nullptr)
+    return true; // We're only checking whether Second can be fused at all.
+
+  const FirstInstrKind TestKind = classifyFirst(*FirstMI);
+
+  if (ST.hasBranchFusion()) {
+    // Branch fusion can merge CMP and TEST with all conditional jumps.
+    return (TestKind == FirstInstrKind::Cmp ||
+            TestKind == FirstInstrKind::Test);
+  }
+
+  if (ST.hasMacroFusion()) {
+    // Macro Fusion rules are a bit more complex. See Agner Fog's
+    // Microarchitecture table 9.2 "Instruction Fusion".
+    switch (TestKind) {
+    case FirstInstrKind::Test:
+    case FirstInstrKind::And:
+      return true;
+    case FirstInstrKind::Cmp:
+    case FirstInstrKind::ALU:
+      return BranchKind == JumpKind::ELG || BranchKind == JumpKind::AB;
+    case FirstInstrKind::IncDec:
+      return BranchKind == JumpKind::ELG;
+    case FirstInstrKind::Invalid:
+      return false;
+    }
   }
+
+  llvm_unreachable("unknown branch fusion type");
 }
 
 namespace llvm {
diff --git a/lib/Target/X86/X86MacroFusion.h b/lib/Target/X86/X86MacroFusion.h
index 97ef1d6d3b61..d4ae54f657a5 100644
--- a/lib/Target/X86/X86MacroFusion.h
+++ b/lib/Target/X86/X86MacroFusion.h
@@ -1,9 +1,8 @@
 //===- X86MacroFusion.h - X86 Macro Fusion --------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp
index b56d02b6bfb6..7f75598b0655 100644
--- a/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -1,9 +1,8 @@
 //===- X86OptimizeLEAs.cpp - optimize usage of LEA instructions -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -569,11 +568,8 @@ MachineInstr *OptimizeLEAPass::replaceDebugValue(MachineInstr &MI,
                                                  unsigned VReg,
                                                  int64_t AddrDispShift) {
   DIExpression *Expr = const_cast<DIExpression *>(MI.getDebugExpression());
-
   if (AddrDispShift != 0)
-    Expr = DIExpression::prepend(Expr, DIExpression::NoDeref, AddrDispShift,
-                                 DIExpression::NoDeref,
-                                 DIExpression::WithStackValue);
+    Expr = DIExpression::prepend(Expr, DIExpression::StackValue, AddrDispShift);
 
   // Replace DBG_VALUE instruction with modified version.
   MachineBasicBlock *MBB = MI.getParent();
@@ -701,7 +697,7 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
 
     // Remove redundant address calculations. Do it only for -Os/-Oz since only
     // a code size gain is expected from this part of the pass.
-    if (MF.getFunction().optForSize())
+    if (MF.getFunction().hasOptSize())
       Changed |= removeRedundantAddrCalc(LEAs);
   }
 
diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp
index 85b9aecc2106..af974c805c36 100644
--- a/lib/Target/X86/X86PadShortFunction.cpp
+++ b/lib/Target/X86/X86PadShortFunction.cpp
@@ -1,9 +1,8 @@
 //===-------- X86PadShortFunction.cpp - pad short functions -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -98,7 +97,7 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
 
-  if (MF.getFunction().optForSize())
+  if (MF.getFunction().hasOptSize())
     return false;
 
   if (!MF.getSubtarget<X86Subtarget>().padShortFunctions())
@@ -113,14 +112,11 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
 
   bool MadeChange = false;
 
-  MachineBasicBlock *MBB;
-  unsigned int Cycles = 0;
-
   // Pad the identified basic blocks with NOOPs
   for (DenseMap<MachineBasicBlock*, unsigned int>::iterator I = ReturnBBs.begin();
        I != ReturnBBs.end(); ++I) {
-    MBB = I->first;
-    Cycles = I->second;
+    MachineBasicBlock *MBB = I->first;
+    unsigned Cycles = I->second;
 
     if (Cycles < Threshold) {
       // BB ends in a return. Skip over any DBG_VALUE instructions
diff --git a/lib/Target/X86/X86PfmCounters.td b/lib/Target/X86/X86PfmCounters.td
index a1a4210b5ebf..5610f4bc8873 100644
--- a/lib/Target/X86/X86PfmCounters.td
+++ b/lib/Target/X86/X86PfmCounters.td
@@ -1,9 +1,8 @@
 //===-- X86PfmCounters.td - X86 Hardware Counters ----------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/X86/X86RegisterBankInfo.cpp b/lib/Target/X86/X86RegisterBankInfo.cpp
index 355291916ee8..78fede3dcde2 100644
--- a/lib/Target/X86/X86RegisterBankInfo.cpp
+++ b/lib/Target/X86/X86RegisterBankInfo.cpp
@@ -1,9 +1,8 @@
 //===- X86RegisterBankInfo.cpp -----------------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -160,7 +159,7 @@ const RegisterBankInfo::InstructionMapping &
 X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   const MachineFunction &MF = *MI.getParent()->getParent();
   const MachineRegisterInfo &MRI = MF.getRegInfo();
-  auto Opc = MI.getOpcode();
+  unsigned Opc = MI.getOpcode();
 
   // Try the default logic for non-generic instructions that are either copies
   // or already have some operands assigned to banks.
@@ -174,17 +173,22 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   case TargetOpcode::G_ADD:
   case TargetOpcode::G_SUB:
   case TargetOpcode::G_MUL:
-  case TargetOpcode::G_SHL:
-  case TargetOpcode::G_LSHR:
-  case TargetOpcode::G_ASHR:
     return getSameOperandsMapping(MI, false);
-    break;
   case TargetOpcode::G_FADD:
   case TargetOpcode::G_FSUB:
   case TargetOpcode::G_FMUL:
   case TargetOpcode::G_FDIV:
     return getSameOperandsMapping(MI, true);
-    break;
+  case TargetOpcode::G_SHL:
+  case TargetOpcode::G_LSHR:
+  case TargetOpcode::G_ASHR: {
+    unsigned NumOperands = MI.getNumOperands();
+    LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+
+    auto Mapping = getValueMapping(getPartialMappingIdx(Ty, false), 3);
+    return getInstructionMapping(DefaultMappingID, 1, Mapping, NumOperands);
+
+  }
   default:
     break;
   }
diff --git a/lib/Target/X86/X86RegisterBankInfo.h b/lib/Target/X86/X86RegisterBankInfo.h
index e227880427f3..c1f3001c6180 100644
--- a/lib/Target/X86/X86RegisterBankInfo.h
+++ b/lib/Target/X86/X86RegisterBankInfo.h
@@ -1,9 +1,8 @@
 //===- X86RegisterBankInfo ---------------------------------------*- C++ -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/Target/X86/X86RegisterBanks.td b/lib/Target/X86/X86RegisterBanks.td
index 6d17cd53a0c1..74c515850ab1 100644
--- a/lib/Target/X86/X86RegisterBanks.td
+++ b/lib/Target/X86/X86RegisterBanks.td
@@ -1,9 +1,8 @@
 //=- X86RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 55842a4a2091..2e2f1f9e438a 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===-- X86RegisterInfo.cpp - X86 Register Information --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -164,6 +163,7 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
     case X86::RFP32RegClassID:
     case X86::RFP64RegClassID:
     case X86::RFP80RegClassID:
+    case X86::VR512_0_15RegClassID:
     case X86::VR512RegClassID:
       // Don't return a super-class that would shrink the spill size.
       // That can happen with the vector and float classes.
@@ -216,6 +216,21 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
   }
 }
 
+bool X86RegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
+                                           unsigned DefSubReg,
+                                           const TargetRegisterClass *SrcRC,
+                                           unsigned SrcSubReg) const {
+  // Prevent rewriting a copy where the destination size is larger than the
+  // input size. See PR41619.
+  // FIXME: Should this be factored into the base implementation somehow.
+  if (DefRC->hasSuperClassEq(&X86::GR64RegClass) && DefSubReg == 0 &&
+      SrcRC->hasSuperClassEq(&X86::GR64RegClass) && SrcSubReg == X86::sub_32bit)
+    return false;
+
+  return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg,
+                                                  SrcRC, SrcSubReg);
+}
+
 const TargetRegisterClass *
 X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {
   const Function &F = MF.getFunction();
@@ -497,6 +512,9 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
   const X86FrameLowering *TFI = getFrameLowering(MF);
 
+  // Set the floating point control register as reserved.
+  Reserved.set(X86::FPCW);
+
   // Set the stack-pointer register and its aliases as reserved.
   for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid();
        ++I)
@@ -747,7 +765,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   }
 }
 
-unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const X86FrameLowering *TFI = getFrameLowering(MF);
   return TFI->hasFP(MF) ? FramePtr : StackPtr;
 }
@@ -760,3 +778,12 @@ X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
     FrameReg = getX86SubSuperRegister(FrameReg, 32);
   return FrameReg;
 }
+
+unsigned
+X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const {
+  const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
+  unsigned StackReg = getStackRegister();
+  if (Subtarget.isTarget64BitILP32())
+    StackReg = getX86SubSuperRegister(StackReg, 32);
+  return StackReg;
+}
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 29401dadead0..b82920898069 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -1,9 +1,8 @@
 //===-- X86RegisterInfo.h - X86 Register Information Impl -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -50,7 +49,7 @@ private:
   unsigned BasePtr;
 
 public:
-  X86RegisterInfo(const Triple &TT);
+  explicit X86RegisterInfo(const Triple &TT);
 
   // FIXME: This should be tablegen'd like getDwarfRegNum is
   int getSEHRegNum(unsigned i) const;
@@ -75,6 +74,11 @@ public:
   getLargestLegalSuperClass(const TargetRegisterClass *RC,
                             const MachineFunction &MF) const override;
 
+  bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
+                            unsigned DefSubReg,
+                            const TargetRegisterClass *SrcRC,
+                            unsigned SrcSubReg) const override;
+
   /// getPointerRegClass - Returns a TargetRegisterClass used for pointer
   /// values.
   const TargetRegisterClass *
@@ -129,15 +133,16 @@ public:
                            RegScavenger *RS = nullptr) const override;
 
   // Debug information queries.
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
+  Register getFrameRegister(const MachineFunction &MF) const override;
   unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const;
-  unsigned getStackRegister() const { return StackPtr; }
-  unsigned getBaseRegister() const { return BasePtr; }
+  unsigned getPtrSizedStackRegister(const MachineFunction &MF) const;
+  Register getStackRegister() const { return StackPtr; }
+  Register getBaseRegister() const { return BasePtr; }
   /// Returns physical register used as frame pointer.
   /// This will always returns the frame pointer register, contrary to
   /// getFrameRegister() which returns the "base pointer" in situations
   /// involving a stack, frame and base pointer.
-  unsigned getFramePtr() const { return FramePtr; }
+  Register getFramePtr() const { return FramePtr; }
   // FIXME: Move to FrameInfok
   unsigned getSlotSize() const { return SlotSize; }
 };
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index aa20273f89ab..0528b90c1fd5 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -1,9 +1,8 @@
 //===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -29,6 +28,8 @@ let Namespace = "X86" in {
   def sub_32bit    : SubRegIndex<32>;
   def sub_xmm      : SubRegIndex<128>;
   def sub_ymm      : SubRegIndex<256>;
+  def sub_mask_0   : SubRegIndex<-1>;
+  def sub_mask_1   : SubRegIndex<-1, -1>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -278,7 +279,7 @@ def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, 100, 100]>;
 // pseudo registers, but we still mark them as aliasing FP registers. That
 // way both kinds can be live without exceeding the stack depth. ST registers
 // are only live around inline assembly.
-def ST0 : X86Reg<"st(0)", 0>, DwarfRegNum<[33, 12, 11]>;
+def ST0 : X86Reg<"st", 0>, DwarfRegNum<[33, 12, 11]>;
 def ST1 : X86Reg<"st(1)", 1>, DwarfRegNum<[34, 13, 12]>;
 def ST2 : X86Reg<"st(2)", 2>, DwarfRegNum<[35, 14, 13]>;
 def ST3 : X86Reg<"st(3)", 3>, DwarfRegNum<[36, 15, 14]>;
@@ -288,7 +289,10 @@ def ST6 : X86Reg<"st(6)", 6>, DwarfRegNum<[39, 18, 17]>;
 def ST7 : X86Reg<"st(7)", 7>, DwarfRegNum<[40, 19, 18]>;
 
 // Floating-point status word
-def FPSW : X86Reg<"fpsw", 0>;
+def FPSW : X86Reg<"fpsr", 0>;
+
+// Floating-point control word
+def FPCW : X86Reg<"fpcr", 0>;
 
 // Status flags register.
 //
@@ -539,6 +543,9 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> {
   let isAllocatable = 0;
 }
 
+// Helper to allow %st to print as %st(0) when its encoded in the instruction.
+def RSTi : RegisterOperand<RST, "printSTiRegOperand">;
+
 // Generic vector registers: VR64 and VR128.
 // Ensure that float types are declared first - only float is legal on SSE1.
 def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
@@ -547,17 +554,6 @@ def VR128 : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128
 def VR256 : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
                           256, (sequence "YMM%u", 0, 15)>;
 
-// Special classes that help the assembly parser choose some alternate
-// instructions to favor 2-byte VEX encodings.
-def VR128L : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
-                           128, (sequence "XMM%u", 0, 7)>;
-def VR128H : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
-                           128, (sequence "XMM%u", 8, 15)>;
-def VR256L : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
-                           256, (sequence "YMM%u", 0, 7)>;
-def VR256H : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
-                           256, (sequence "YMM%u", 8, 15)>;
-
 // Status flags registers.
 def CCR : RegisterClass<"X86", [i32], 32, (add EFLAGS)> {
   let CopyCost = -1;  // Don't allow copying of status registers.
@@ -576,6 +572,10 @@ def DFCCR : RegisterClass<"X86", [i32], 32, (add DF)> {
 def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64],
                           512, (sequence "ZMM%u", 0, 31)>;
 
+// Represents the lower 16 registers that have VEX/legacy encodable subregs.
+def VR512_0_15 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64],
+                               512, (sequence "ZMM%u", 0, 15)>;
+
 // Scalar AVX-512 floating point registers.
 def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;
 
@@ -596,6 +596,16 @@ def VK16    : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;}
 def VK32    : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;}
 def VK64    : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;}
 
+// Mask register pairs
+def KPAIRS : RegisterTuples<[sub_mask_0, sub_mask_1],
+                             [(add K0, K2, K4, K6), (add K1, K3, K5, K7)]>;
+
+def VK1PAIR   : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
+def VK2PAIR   : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
+def VK4PAIR   : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
+def VK8PAIR   : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
+def VK16PAIR  : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
+
 def VK1WM   : RegisterClass<"X86", [v1i1],  16,  (sub VK1, K0)> {let Size = 16;}
 def VK2WM   : RegisterClass<"X86", [v2i1],  16,  (sub VK2, K0)> {let Size = 16;}
 def VK4WM   : RegisterClass<"X86", [v4i1],  16,  (sub VK4, K0)> {let Size = 16;}
diff --git a/lib/Target/X86/X86RetpolineThunks.cpp b/lib/Target/X86/X86RetpolineThunks.cpp
index 08994cccb21e..b435b22e8ac7 100644
--- a/lib/Target/X86/X86RetpolineThunks.cpp
+++ b/lib/Target/X86/X86RetpolineThunks.cpp
@@ -1,9 +1,8 @@
 //======- X86RetpolineThunks.cpp - Construct retpoline thunks for x86  --=====//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/Target/X86/X86SchedBroadwell.td b/lib/Target/X86/X86SchedBroadwell.td
index 971a50196e45..7574e4b8f896 100755
--- a/lib/Target/X86/X86SchedBroadwell.td
+++ b/lib/Target/X86/X86SchedBroadwell.td
@@ -1,9 +1,8 @@
 //=- X86SchedBroadwell.td - X86 Broadwell Scheduling ---------*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -82,6 +81,8 @@ def : ReadAdvance<ReadAfterVecLd, 5>;
 def : ReadAdvance<ReadAfterVecXLd, 5>;
 def : ReadAdvance<ReadAfterVecYLd, 6>;
 
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
 // Many SchedWrites are defined in pairs with and without a folded load.
 // Instructions with folded loads are usually micro-fused, so they only appear
 // as two micro-ops when queued in the reservation station.
@@ -159,7 +160,6 @@ defm : BWWriteResPair<WriteCRC32, [BWPort1],   3>;
 def : WriteRes<WriteLEA, [BWPort15]>; // LEA instructions can't fold loads.
 
 defm : BWWriteResPair<WriteCMOV,  [BWPort06], 1>; // Conditional move.
-defm : BWWriteResPair<WriteCMOV2, [BWPort06,BWPort0156], 2, [1,1], 2>; // // Conditional (CF + ZF flag) move.
 defm : X86WriteRes<WriteFCMOV, [BWPort1], 3, [1], 1>; // x87 conditional move.
 
 def  : WriteRes<WriteSETCC, [BWPort06]>; // Setcc.
@@ -186,7 +186,7 @@ defm : BWWriteResPair<WritePOPCNT,         [BWPort1], 3>;
 // Integer shifts and rotates.
 defm : BWWriteResPair<WriteShift,    [BWPort06],  1>;
 defm : BWWriteResPair<WriteShiftCL,  [BWPort06,BWPort0156],  3, [2,1], 3>;
-defm : BWWriteResPair<WriteRotate,   [BWPort06],  2, [2], 2>;
+defm : BWWriteResPair<WriteRotate,   [BWPort06],  1, [1], 1>;
 defm : BWWriteResPair<WriteRotateCL, [BWPort06,BWPort0156],  3, [2,1], 3>;
 
 // SHLD/SHRD.
@@ -732,10 +732,10 @@ def BWWriteResGroup20 : SchedWriteRes<[BWPort06,BWPort0156]> {
 }
 def: InstRW<[BWWriteResGroup20], (instrs CWD,
                                          JCXZ, JECXZ, JRCXZ,
-                                         ADC8i8, SBB8i8)>;
-def: InstRW<[BWWriteResGroup20], (instregex "ADC8ri",
-                                            "SBB8ri",
-                                            "SET(A|BE)r")>;
+                                         ADC8i8, SBB8i8,
+                                         ADC16i16, SBB16i16,
+                                         ADC32i32, SBB32i32,
+                                         ADC64i32, SBB64i32)>;
 
 def BWWriteResGroup22 : SchedWriteRes<[BWPort4,BWPort6,BWPort237]> {
   let Latency = 2;
@@ -814,7 +814,6 @@ def BWWriteResGroup38 : SchedWriteRes<[BWPort4,BWPort237,BWPort06,BWPort0156]> {
   let ResourceCycles = [1,1,1,1];
 }
 def: InstRW<[BWWriteResGroup38], (instrs CALL64pcrel32)>;
-def: InstRW<[BWWriteResGroup38], (instregex "SET(A|BE)m")>;
 
 def BWWriteResGroup39 : SchedWriteRes<[BWPort0,BWPort1]> {
   let Latency = 4;
@@ -890,8 +889,7 @@ def BWWriteResGroup47 : SchedWriteRes<[BWPort0]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[BWWriteResGroup47], (instregex "(V?)PCMPGTQ(Y?)rr",
-                                            "MUL_(FPrST0|FST0r|FrST0)")>;
+def: InstRW<[BWWriteResGroup47], (instregex "MUL_(FPrST0|FST0r|FrST0)")>;
 
 def BWWriteResGroup49 : SchedWriteRes<[BWPort23]> {
   let Latency = 5;
@@ -965,6 +963,7 @@ def BWWriteResGroup59 : SchedWriteRes<[BWPort0,BWPort23]> {
 }
 def: InstRW<[BWWriteResGroup59], (instrs CVTPS2PDrm, VCVTPS2PDrm,
                                          CVTSS2SDrm, VCVTSS2SDrm,
+                                         CVTSS2SDrm_Int, VCVTSS2SDrm_Int,
                                          VPSLLVQrm,
                                          VPSRLVQrm)>;
 
@@ -1103,6 +1102,14 @@ def BWWriteResGroup87 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06]> {
 def: InstRW<[BWWriteResGroup87], (instregex "ROL(8|16|32|64)m(1|i)",
                                             "ROR(8|16|32|64)m(1|i)")>;
 
+def BWWriteResGroup87_1 : SchedWriteRes<[BWPort06]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+  let ResourceCycles = [2];
+}
+def: InstRW<[BWWriteResGroup87_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
+                                           ROR8r1, ROR16r1, ROR32r1, ROR64r1)>;
+
 def BWWriteResGroup88 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> {
   let Latency = 7;
   let NumMicroOps = 5;
@@ -1592,4 +1599,140 @@ def: InstRW<[BWWriteResGroup202], (instrs FSTENVm)>;
 
 def: InstRW<[WriteZero], (instrs CLC)>;
 
+
+// Intruction variants handled by the renamer. These might not need execution
+// ports in certain conditions.
+// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
+// section "Haswell and Broadwell Pipeline" > "Register allocation and
+// renaming".
+// These can be investigated with llvm-exegesis, e.g.
+// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+
+def BWWriteZeroLatency : SchedWriteRes<[]> {
+  let Latency = 0;
+}
+
+def BWWriteZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteALU]>
+]>;
+def : InstRW<[BWWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+                                         XOR32rr, XOR64rr)>;
+
+def BWWriteFZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogic]>
+]>;
+def : InstRW<[BWWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr,
+                                          VXORPDrr)>;
+
+def BWWriteFZeroIdiomY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogicY]>
+]>;
+def : InstRW<[BWWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>;
+
+def BWWriteVZeroIdiomLogicX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecLogicX]>
+]>;
+def : InstRW<[BWWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>;
+
+def BWWriteVZeroIdiomLogicY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecLogicY]>
+]>;
+def : InstRW<[BWWriteVZeroIdiomLogicY], (instrs VPXORYrr)>;
+
+def BWWriteVZeroIdiomALUX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecALUX]>
+]>;
+def : InstRW<[BWWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
+                                              PSUBDrr, VPSUBDrr,
+                                              PSUBQrr, VPSUBQrr,
+                                              PSUBWrr, VPSUBWrr,
+                                              PCMPGTBrr, VPCMPGTBrr,
+                                              PCMPGTDrr, VPCMPGTDrr,
+                                              PCMPGTWrr, VPCMPGTWrr)>;
+
+def BWWriteVZeroIdiomALUY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecALUY]>
+]>;
+def : InstRW<[BWWriteVZeroIdiomALUY], (instrs VPSUBBYrr,
+                                              VPSUBDYrr,
+                                              VPSUBQYrr,
+                                              VPSUBWYrr,
+                                              VPCMPGTBYrr,
+                                              VPCMPGTDYrr,
+                                              VPCMPGTWYrr)>;
+
+def BWWritePCMPGTQ : SchedWriteRes<[BWPort0]> {
+  let Latency = 5;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+
+def BWWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [BWWritePCMPGTQ]>
+]>;
+def : InstRW<[BWWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
+                                                 VPCMPGTQYrr)>;
+
+
+// CMOVs that use both Z and C flag require an extra uop.
+def BWWriteCMOVA_CMOVBErr : SchedWriteRes<[BWPort06,BWPort0156]> {
+  let Latency = 2;
+  let ResourceCycles = [1,1];
+  let NumMicroOps = 2;
+}
+
+def BWWriteCMOVA_CMOVBErm : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> {
+  let Latency = 7;
+  let ResourceCycles = [1,1,1];
+  let NumMicroOps = 3;
+}
+
+def BWCMOVA_CMOVBErr :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [BWWriteCMOVA_CMOVBErr]>,
+  SchedVar<NoSchedPred,                             [WriteCMOV]>
+]>;
+
+def BWCMOVA_CMOVBErm :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [BWWriteCMOVA_CMOVBErm]>,
+  SchedVar<NoSchedPred,                             [WriteCMOV.Folded]>
+]>;
+
+def : InstRW<[BWCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>;
+def : InstRW<[BWCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
+
+// SETCCs that use both Z and C flag require an extra uop.
+def BWWriteSETA_SETBEr : SchedWriteRes<[BWPort06,BWPort0156]> {
+  let Latency = 2;
+  let ResourceCycles = [1,1];
+  let NumMicroOps = 2;
+}
+
+def BWWriteSETA_SETBEm : SchedWriteRes<[BWPort4,BWPort237,BWPort06,BWPort0156]> {
+  let Latency = 3;
+  let ResourceCycles = [1,1,1,1];
+  let NumMicroOps = 4;
+}
+
+def BWSETA_SETBErr :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [BWWriteSETA_SETBEr]>,
+  SchedVar<NoSchedPred,                         [WriteSETCC]>
+]>;
+
+def BWSETA_SETBErm :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [BWWriteSETA_SETBEm]>,
+  SchedVar<NoSchedPred,                         [WriteSETCCStore]>
+]>;
+
+def : InstRW<[BWSETA_SETBErr], (instrs SETCCr)>;
+def : InstRW<[BWSETA_SETBErm], (instrs SETCCm)>;
+
 } // SchedModel
diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td
index 06a32fb0b1cd..284d1567c5c6 100644
--- a/lib/Target/X86/X86SchedHaswell.td
+++ b/lib/Target/X86/X86SchedHaswell.td
@@ -1,9 +1,8 @@
 //=- X86SchedHaswell.td - X86 Haswell Scheduling -------------*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -87,6 +86,8 @@ def : ReadAdvance<ReadAfterVecLd, 5>;
 def : ReadAdvance<ReadAfterVecXLd, 6>;
 def : ReadAdvance<ReadAfterVecYLd, 7>;
 
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
 // Many SchedWrites are defined in pairs with and without a folded load.
 // Instructions with folded loads are usually micro-fused, so they only appear
 // as two micro-ops when queued in the reservation station.
@@ -151,7 +152,7 @@ defm : X86WriteRes<WriteXCHG, [HWPort0156], 2, [3], 3>;
 // Integer shifts and rotates.
 defm : HWWriteResPair<WriteShift,    [HWPort06],  1>;
 defm : HWWriteResPair<WriteShiftCL,  [HWPort06, HWPort0156],  3, [2,1], 3>;
-defm : HWWriteResPair<WriteRotate,   [HWPort06],  2, [2], 2>;
+defm : HWWriteResPair<WriteRotate,   [HWPort06],  1, [1], 1>;
 defm : HWWriteResPair<WriteRotateCL, [HWPort06, HWPort0156],  3, [2,1], 3>;
 
 // SHLD/SHRD.
@@ -164,7 +165,6 @@ defm : HWWriteResPair<WriteJump,   [HWPort06],  1>;
 defm : HWWriteResPair<WriteCRC32,  [HWPort1],   3>;
 
 defm : HWWriteResPair<WriteCMOV,  [HWPort06,HWPort0156], 2, [1,1], 2>; // Conditional move.
-defm : HWWriteResPair<WriteCMOV2, [HWPort06,HWPort0156], 3, [1,2], 3>; // Conditional (CF + ZF flag) move.
 defm : X86WriteRes<WriteFCMOV, [HWPort1], 3, [1], 1>; // x87 conditional move.
 def  : WriteRes<WriteSETCC, [HWPort06]>; // Setcc.
 def  : WriteRes<WriteSETCCStore, [HWPort06,HWPort4,HWPort237]> {
@@ -1126,7 +1126,6 @@ def HWWriteResGroup35 : SchedWriteRes<[HWPort06,HWPort0156]> {
   let ResourceCycles = [1,1];
 }
 def: InstRW<[HWWriteResGroup35], (instrs CWD, JCXZ, JECXZ, JRCXZ)>;
-def: InstRW<[HWWriteResGroup35], (instregex "SET(A|BE)r")>;
 
 def HWWriteResGroup36_2 : SchedWriteRes<[HWPort5,HWPort23]> {
   let Latency = 7;
@@ -1172,7 +1171,6 @@ def HWWriteResGroup45 : SchedWriteRes<[HWPort4,HWPort237,HWPort06,HWPort0156]> {
   let ResourceCycles = [1,1,1,1];
 }
 def: InstRW<[HWWriteResGroup45], (instrs CALL64pcrel32)>;
-def: InstRW<[HWWriteResGroup45], (instregex "SET(A|BE)m")>;
 
 def HWWriteResGroup46 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06]> {
   let Latency = 8;
@@ -1182,6 +1180,14 @@ def HWWriteResGroup46 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06]> {
 def: InstRW<[HWWriteResGroup46], (instregex "ROL(8|16|32|64)m(1|i)",
                                             "ROR(8|16|32|64)m(1|i)")>;
 
+def HWWriteResGroup46_1 : SchedWriteRes<[HWPort06]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+  let ResourceCycles = [2];
+}
+def: InstRW<[HWWriteResGroup46_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
+                                           ROR8r1, ROR16r1, ROR32r1, ROR64r1)>;
+
 def HWWriteResGroup47 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> {
   let Latency = 8;
   let NumMicroOps = 5;
@@ -1391,8 +1397,8 @@ def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
   let ResourceCycles = [1,1,1];
 }
 def: InstRW<[HWWriteResGroup78_1], (instrs MMX_CVTPI2PDirm,
-                                           CVTSD2SSrm,
-                                           VCVTSD2SSrm)>;
+                                           CVTSD2SSrm, CVTSD2SSrm_Int,
+                                           VCVTSD2SSrm, VCVTSD2SSrm_Int)>;
 
 def HWWriteResGroup80 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> {
   let Latency = 9;
@@ -1442,8 +1448,7 @@ def HWWriteResGroup89 : SchedWriteRes<[HWPort0]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[HWWriteResGroup89], (instregex "(V?)PCMPGTQ(Y?)rr",
-                                            "MUL_(FPrST0|FST0r|FrST0)")>;
+def: InstRW<[HWWriteResGroup89], (instregex "MUL_(FPrST0|FST0r|FrST0)")>;
 
 def HWWriteResGroup91_2 : SchedWriteRes<[HWPort0,HWPort23]> {
   let Latency = 11;
@@ -1847,4 +1852,170 @@ def: InstRW<[HWWriteResGroup192], (instrs VGATHERQPSrm,
 
 def: InstRW<[WriteZero], (instrs CLC)>;
 
+
+// Intruction variants handled by the renamer. These might not need execution
+// ports in certain conditions.
+// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
+// section "Haswell and Broadwell Pipeline" > "Register allocation and
+// renaming".
+// These can be investigated with llvm-exegesis, e.g.
+// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+
+def HWWriteZeroLatency : SchedWriteRes<[]> {
+  let Latency = 0;
+}
+
+def HWWriteZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteALU]>
+]>;
+def : InstRW<[HWWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+                                         XOR32rr, XOR64rr)>;
+
+def HWWriteFZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogic]>
+]>;
+def : InstRW<[HWWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr,
+                                          VXORPDrr)>;
+
+def HWWriteFZeroIdiomY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogicY]>
+]>;
+def : InstRW<[HWWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>;
+
+def HWWriteVZeroIdiomLogicX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecLogicX]>
+]>;
+def : InstRW<[HWWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>;
+
+def HWWriteVZeroIdiomLogicY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecLogicY]>
+]>;
+def : InstRW<[HWWriteVZeroIdiomLogicY], (instrs VPXORYrr)>;
+
+def HWWriteVZeroIdiomALUX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecALUX]>
+]>;
+def : InstRW<[HWWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
+                                              PSUBDrr, VPSUBDrr,
+                                              PSUBQrr, VPSUBQrr,
+                                              PSUBWrr, VPSUBWrr,
+                                              PCMPGTBrr, VPCMPGTBrr,
+                                              PCMPGTDrr, VPCMPGTDrr,
+                                              PCMPGTWrr, VPCMPGTWrr)>;
+
+def HWWriteVZeroIdiomALUY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecALUY]>
+]>;
+def : InstRW<[HWWriteVZeroIdiomALUY], (instrs VPSUBBYrr,
+                                              VPSUBDYrr,
+                                              VPSUBQYrr,
+                                              VPSUBWYrr,
+                                              VPCMPGTBYrr,
+                                              VPCMPGTDYrr,
+                                              VPCMPGTWYrr)>;
+
+def HWWritePCMPGTQ : SchedWriteRes<[HWPort0]> {
+  let Latency = 5;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+
+def HWWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [HWWritePCMPGTQ]>
+]>;
+def : InstRW<[HWWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
+                                                 VPCMPGTQYrr)>;
+
+
+// The 0x83 ADC/SBB opcodes have special support for immediate 0 to only require
+// a single uop. It does not apply to the GR8 encoding. And only applies to the
+// 8-bit immediate since using larger immediate for 0 would be silly.
+// Unfortunately, this optimization does not apply to the AX/EAX/RAX short
+// encodings we convert to in MCInstLowering so we exclude AX/EAX/RAX here since
+// we schedule before that point.
+// TODO: Should we disable using the short encodings on these CPUs?
+def HWFastADC0 : MCSchedPredicate<
+  CheckAll<[
+    CheckImmOperand<2, 0>,              // Second MCOperand is Imm and has value 0.
+    CheckNot<CheckRegOperand<1, AX>>,   // First MCOperand is not register AX
+    CheckNot<CheckRegOperand<1, EAX>>,  // First MCOperand is not register EAX
+    CheckNot<CheckRegOperand<1, RAX>>   // First MCOperand is not register RAX
+  ]>
+>;
+
+def HWWriteADC0 : SchedWriteRes<[HWPort06]> {
+  let Latency = 1;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+
+def HWWriteADC : SchedWriteVariant<[
+  SchedVar<HWFastADC0, [HWWriteADC0]>,
+  SchedVar<NoSchedPred, [WriteADC]>
+]>;
+
+def : InstRW<[HWWriteADC], (instrs ADC16ri8, ADC32ri8, ADC64ri8,
+                                      SBB16ri8, SBB32ri8, SBB64ri8)>;
+
+// CMOVs that use both Z and C flag require an extra uop.
+def HWWriteCMOVA_CMOVBErr : SchedWriteRes<[HWPort06,HWPort0156]> {
+  let Latency = 3;
+  let ResourceCycles = [1,2];
+  let NumMicroOps = 3;
+}
+
+def HWWriteCMOVA_CMOVBErm : SchedWriteRes<[HWPort23,HWPort06,HWPort0156]> {
+  let Latency = 8;
+  let ResourceCycles = [1,1,2];
+  let NumMicroOps = 4;
+}
+
+def HWCMOVA_CMOVBErr :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [HWWriteCMOVA_CMOVBErr]>,
+  SchedVar<NoSchedPred,                             [WriteCMOV]>
+]>;
+
+def HWCMOVA_CMOVBErm :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [HWWriteCMOVA_CMOVBErm]>,
+  SchedVar<NoSchedPred,                             [WriteCMOV.Folded]>
+]>;
+
+def : InstRW<[HWCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>;
+def : InstRW<[HWCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
+
+// SETCCs that use both Z and C flag require an extra uop.
+def HWWriteSETA_SETBEr : SchedWriteRes<[HWPort06,HWPort0156]> {
+  let Latency = 2;
+  let ResourceCycles = [1,1];
+  let NumMicroOps = 2;
+}
+
+def HWWriteSETA_SETBEm : SchedWriteRes<[HWPort4,HWPort237,HWPort06,HWPort0156]> {
+  let Latency = 3;
+  let ResourceCycles = [1,1,1,1];
+  let NumMicroOps = 4;
+}
+
+def HWSETA_SETBErr :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [HWWriteSETA_SETBEr]>,
+  SchedVar<NoSchedPred,                         [WriteSETCC]>
+]>;
+
+def HWSETA_SETBErm :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [HWWriteSETA_SETBEm]>,
+  SchedVar<NoSchedPred,                         [WriteSETCCStore]>
+]>;
+
+def : InstRW<[HWSETA_SETBErr], (instrs SETCCr)>;
+def : InstRW<[HWSETA_SETBErm], (instrs SETCCm)>;
+
 } // SchedModel
diff --git a/lib/Target/X86/X86SchedPredicates.td b/lib/Target/X86/X86SchedPredicates.td
index 1c7f24375f61..41bd776648f7 100644
--- a/lib/Target/X86/X86SchedPredicates.td
+++ b/lib/Target/X86/X86SchedPredicates.td
@@ -1,9 +1,8 @@
 //===-- X86SchedPredicates.td - X86 Scheduling Predicates --*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -61,3 +60,27 @@ def IsThreeOperandsLEABody :
 // X86GenInstrInfo.
 def IsThreeOperandsLEAFn :
     TIIPredicate<"isThreeOperandsLEA", IsThreeOperandsLEABody>;
+
+// A predicate to check for COND_A and COND_BE CMOVs which have an extra uop
+// on recent Intel CPUs.
+def IsCMOVArr_Or_CMOVBErr : CheckAny<[
+  CheckImmOperand_s<3, "X86::COND_A">,
+  CheckImmOperand_s<3, "X86::COND_BE">
+]>;
+
+def IsCMOVArm_Or_CMOVBErm : CheckAny<[
+  CheckImmOperand_s<7, "X86::COND_A">,
+  CheckImmOperand_s<7, "X86::COND_BE">
+]>;
+
+// A predicate to check for COND_A and COND_BE SETCCs which have an extra uop
+// on recent Intel CPUs.
+def IsSETAr_Or_SETBEr : CheckAny<[
+  CheckImmOperand_s<1, "X86::COND_A">,
+  CheckImmOperand_s<1, "X86::COND_BE">
+]>;
+
+def IsSETAm_Or_SETBEm : CheckAny<[
+  CheckImmOperand_s<5, "X86::COND_A">,
+  CheckImmOperand_s<5, "X86::COND_BE">
+]>;
diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td
index 9dbf0976989f..d40bdf728a48 100644
--- a/lib/Target/X86/X86SchedSandyBridge.td
+++ b/lib/Target/X86/X86SchedSandyBridge.td
@@ -1,9 +1,8 @@
 //=- X86SchedSandyBridge.td - X86 Sandy Bridge Scheduling ----*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -77,6 +76,8 @@ def : ReadAdvance<ReadAfterVecLd, 5>;
 def : ReadAdvance<ReadAfterVecXLd, 6>;
 def : ReadAdvance<ReadAfterVecYLd, 7>;
 
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
 // Many SchedWrites are defined in pairs with and without a folded load.
 // Instructions with folded loads are usually micro-fused, so they only appear
 // as two micro-ops when queued in the reservation station.
@@ -159,7 +160,6 @@ defm : SBWriteResPair<WriteJump,  [SBPort5],   1>;
 defm : SBWriteResPair<WriteCRC32, [SBPort1],   3, [1], 1, 5>;
 
 defm : SBWriteResPair<WriteCMOV,  [SBPort05,SBPort015], 2, [1,1], 2>; // Conditional move.
-defm : SBWriteResPair<WriteCMOV2, [SBPort05,SBPort015], 3, [2,1], 3>; // Conditional (CF + ZF flag) move.
 defm : X86WriteRes<WriteFCMOV, [SBPort5,SBPort05], 3, [2,1], 3>; // x87 conditional move.
 def  : WriteRes<WriteSETCC, [SBPort05]>; // Setcc.
 def  : WriteRes<WriteSETCCStore, [SBPort05,SBPort4,SBPort23]> {
@@ -615,13 +615,6 @@ def: InstRW<[SBWriteResGroup5], (instrs MMX_PABSBrr,
                                         MMX_PSIGNDrr,
                                         MMX_PSIGNWrr)>;
 
-def SBWriteResGroup9 : SchedWriteRes<[SBPort05]> {
-  let Latency = 2;
-  let NumMicroOps = 2;
-  let ResourceCycles = [2];
-}
-def: InstRW<[SBWriteResGroup9], (instregex "SET(A|BE)r")>;
-
 def SBWriteResGroup11 : SchedWriteRes<[SBPort015]> {
   let Latency = 2;
   let NumMicroOps = 2;
@@ -705,12 +698,6 @@ def SBWriteResGroup29_2 : SchedWriteRes<[SBPort5,SBPort015]> {
 }
 def: InstRW<[SBWriteResGroup29_2], (instrs PAUSE)>;
 
-def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> {
-  let Latency = 5;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1];
-}
-
 def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> {
   let Latency = 5;
   let NumMicroOps = 1;
@@ -772,13 +759,6 @@ def SBWriteResGroup41 : SchedWriteRes<[SBPort5,SBPort015]> {
 }
 def: InstRW<[SBWriteResGroup41], (instrs FNINIT)>;
 
-def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
-  let Latency = 3;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SBWriteResGroup43], (instregex "SET(A|BE)m")>;
-
 def SBWriteResGroup45 : SchedWriteRes<[SBPort0,SBPort4,SBPort23,SBPort15]> {
   let Latency = 5;
   let NumMicroOps = 4;
@@ -1148,6 +1128,12 @@ def SBWriteFZeroIdiom : SchedWriteVariant<[
 def : InstRW<[SBWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr,
                                           VXORPDrr)>;
 
+def SBWriteFZeroIdiomY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogicY]>
+]>;
+def : InstRW<[SBWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>;
+
 def SBWriteVZeroIdiomLogicX : SchedWriteVariant<[
     SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
     SchedVar<NoSchedPred,                          [WriteVecLogicX]>
@@ -1166,10 +1152,68 @@ def : InstRW<[SBWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
                                               PCMPGTDrr, VPCMPGTDrr,
                                               PCMPGTWrr, VPCMPGTWrr)>;
 
+def SBWritePCMPGTQ : SchedWriteRes<[SBPort0]> {
+  let Latency = 5;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+
 def SBWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
     SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
-    SchedVar<NoSchedPred,                          [SBWriteResGroup30]>
+    SchedVar<NoSchedPred,                          [SBWritePCMPGTQ]>
 ]>;
 def : InstRW<[SBWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr)>;
 
+// CMOVs that use both Z and C flag require an extra uop.
+def SBWriteCMOVA_CMOVBErr : SchedWriteRes<[SBPort05,SBPort015]> {
+  let Latency = 3;
+  let ResourceCycles = [2,1];
+  let NumMicroOps = 3;
+}
+
+def SBWriteCMOVA_CMOVBErm : SchedWriteRes<[SBPort23,SBPort05,SBPort015]> {
+  let Latency = 8;
+  let ResourceCycles = [1,2,1];
+  let NumMicroOps = 4;
+}
+
+def SBCMOVA_CMOVBErr :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [SBWriteCMOVA_CMOVBErr]>,
+  SchedVar<NoSchedPred,                             [WriteCMOV]>
+]>;
+
+def SBCMOVA_CMOVBErm :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [SBWriteCMOVA_CMOVBErm]>,
+  SchedVar<NoSchedPred,                             [WriteCMOV.Folded]>
+]>;
+
+def : InstRW<[SBCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>;
+def : InstRW<[SBCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
+
+// SETCCs that use both Z and C flag require an extra uop.
+def SBWriteSETA_SETBEr : SchedWriteRes<[SBPort05]> {
+  let Latency = 2;
+  let ResourceCycles = [2];
+  let NumMicroOps = 2;
+}
+
+def SBWriteSETA_SETBEm : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
+  let Latency = 3;
+  let ResourceCycles = [1,1,2];
+  let NumMicroOps = 4;
+}
+
+def SBSETA_SETBErr :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [SBWriteSETA_SETBEr]>,
+  SchedVar<NoSchedPred,                         [WriteSETCC]>
+]>;
+
+def SBSETA_SETBErm :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [SBWriteSETA_SETBEm]>,
+  SchedVar<NoSchedPred,                         [WriteSETCCStore]>
+]>;
+
+def : InstRW<[SBSETA_SETBErr], (instrs SETCCr)>;
+def : InstRW<[SBSETA_SETBErm], (instrs SETCCm)>;
+
 } // SchedModel
diff --git a/lib/Target/X86/X86SchedSkylakeClient.td b/lib/Target/X86/X86SchedSkylakeClient.td
index 2c9eb7516085..8f3e4ae62d53 100644
--- a/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/lib/Target/X86/X86SchedSkylakeClient.td
@@ -1,9 +1,8 @@
 //=- X86SchedSkylake.td - X86 Skylake Client Scheduling ------*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -81,6 +80,8 @@ def : ReadAdvance<ReadAfterVecLd, 5>;
 def : ReadAdvance<ReadAfterVecXLd, 6>;
 def : ReadAdvance<ReadAfterVecYLd, 7>;
 
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
 // Many SchedWrites are defined in pairs with and without a folded load.
 // Instructions with folded loads are usually micro-fused, so they only appear
 // as two micro-ops when queued in the reservation station.
@@ -157,7 +158,6 @@ defm : SKLWriteResPair<WriteCRC32, [SKLPort1], 3>;
 def : WriteRes<WriteLEA, [SKLPort15]>; // LEA instructions can't fold loads.
 
 defm : SKLWriteResPair<WriteCMOV,  [SKLPort06], 1, [1], 1>; // Conditional move.
-defm : SKLWriteResPair<WriteCMOV2, [SKLPort06], 2, [2], 2>; // Conditional (CF + ZF flag) move.
 defm : X86WriteRes<WriteFCMOV, [SKLPort1], 3, [1], 1>; // x87 conditional move.
 def  : WriteRes<WriteSETCC, [SKLPort06]>; // Setcc.
 def  : WriteRes<WriteSETCCStore, [SKLPort06,SKLPort4,SKLPort237]> {
@@ -183,7 +183,7 @@ defm : SKLWriteResPair<WritePOPCNT,         [SKLPort1], 3>;
 // Integer shifts and rotates.
 defm : SKLWriteResPair<WriteShift,    [SKLPort06],  1>;
 defm : SKLWriteResPair<WriteShiftCL,  [SKLPort06],  3, [3], 3>;
-defm : SKLWriteResPair<WriteRotate,   [SKLPort06],  2, [2], 2>;
+defm : SKLWriteResPair<WriteRotate,   [SKLPort06],  1, [1], 1>;
 defm : SKLWriteResPair<WriteRotateCL, [SKLPort06],  3, [3], 3>;
 
 // SHLD/SHRD.
@@ -659,8 +659,7 @@ def SKLWriteResGroup9 : SchedWriteRes<[SKLPort015]> {
   let ResourceCycles = [1];
 }
 def: InstRW<[SKLWriteResGroup9], (instregex "(V?)PADD(B|D|Q|W)(Y?)rr",
-                                            "VPBLENDD(Y?)rri",
-                                            "(V?)PSUB(B|D|Q|W)(Y?)rr")>;
+                                            "VPBLENDD(Y?)rri")>;
 
 def SKLWriteResGroup10 : SchedWriteRes<[SKLPort0156]> {
   let Latency = 1;
@@ -698,13 +697,6 @@ def SKLWriteResGroup14 : SchedWriteRes<[SKLPort05]> {
 def: InstRW<[SKLWriteResGroup14], (instrs FDECSTP,
                                           MMX_MOVDQ2Qrr)>;
 
-def SKLWriteResGroup15 : SchedWriteRes<[SKLPort06]> {
-  let Latency = 2;
-  let NumMicroOps = 2;
-  let ResourceCycles = [2];
-}
-def: InstRW<[SKLWriteResGroup15], (instregex "SET(A|BE)r")>;
-
 def SKLWriteResGroup17 : SchedWriteRes<[SKLPort0156]> {
   let Latency = 2;
   let NumMicroOps = 2;
@@ -735,9 +727,10 @@ def SKLWriteResGroup23 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
 }
 def: InstRW<[SKLWriteResGroup23], (instrs CWD,
                                           JCXZ, JECXZ, JRCXZ,
-                                          ADC8i8, SBB8i8)>;
-def: InstRW<[SKLWriteResGroup23], (instregex "ADC8ri",
-                                             "SBB8ri")>;
+                                          ADC8i8, SBB8i8,
+                                          ADC16i16, SBB16i16,
+                                          ADC32i32, SBB32i32,
+                                          ADC64i32, SBB64i32)>;
 
 def SKLWriteResGroup25 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237]> {
   let Latency = 2;
@@ -776,8 +769,7 @@ def SKLWriteResGroup30 : SchedWriteRes<[SKLPort5]> {
   let ResourceCycles = [1];
 }
 def: InstRW<[SKLWriteResGroup30], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
-                                             "VPBROADCAST(B|W)rr",
-                                             "(V?)PCMPGTQ(Y?)rr")>;
+                                             "VPBROADCAST(B|W)rr")>;
 
 def SKLWriteResGroup32 : SchedWriteRes<[SKLPort0,SKLPort0156]> {
   let Latency = 3;
@@ -839,13 +831,6 @@ def SKLWriteResGroup43 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort237]> {
 }
 def: InstRW<[SKLWriteResGroup43], (instrs FNSTSWm)>;
 
-def SKLWriteResGroup44 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06]> {
-  let Latency = 3;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SKLWriteResGroup44], (instregex "SET(A|BE)m")>;
-
 def SKLWriteResGroup45 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237,SKLPort0156]> {
   let Latency = 3;
   let NumMicroOps = 4;
@@ -1183,6 +1168,14 @@ def SKLWriteResGroup100 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06
 def: InstRW<[SKLWriteResGroup100], (instregex "ROL(8|16|32|64)m(1|i)",
                                               "ROR(8|16|32|64)m(1|i)")>;
 
+def SKLWriteResGroup100_1 : SchedWriteRes<[SKLPort06]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+  let ResourceCycles = [2];
+}
+def: InstRW<[SKLWriteResGroup100_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
+                                             ROR8r1, ROR16r1, ROR32r1, ROR64r1)>;
+
 def SKLWriteResGroup101 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> {
   let Latency = 7;
   let NumMicroOps = 5;
@@ -1747,4 +1740,150 @@ def: InstRW<[SKLWriteResGroup223], (instrs FSTENVm)>;
 
 def: InstRW<[WriteZero], (instrs CLC)>;
 
+
+// Intruction variants handled by the renamer. These might not need execution
+// ports in certain conditions.
+// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
+// section "Skylake Pipeline" > "Register allocation and renaming".
+// These can be investigated with llvm-exegesis, e.g.
+// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+
+def SKLWriteZeroLatency : SchedWriteRes<[]> {
+  let Latency = 0;
+}
+
+def SKLWriteZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteALU]>
+]>;
+def : InstRW<[SKLWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+                                          XOR32rr, XOR64rr)>;
+
+def SKLWriteFZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogic]>
+]>;
+def : InstRW<[SKLWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr,
+                                           VXORPDrr)>;
+
+def SKLWriteFZeroIdiomY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogicY]>
+]>;
+def : InstRW<[SKLWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>;
+
+def SKLWriteVZeroIdiomLogicX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecLogicX]>
+]>;
+def : InstRW<[SKLWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>;
+
+def SKLWriteVZeroIdiomLogicY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecLogicY]>
+]>;
+def : InstRW<[SKLWriteVZeroIdiomLogicY], (instrs VPXORYrr)>;
+
+def SKLWriteVZeroIdiomALUX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecALUX]>
+]>;
+def : InstRW<[SKLWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr,
+                                               PCMPGTDrr, VPCMPGTDrr,
+                                               PCMPGTWrr, VPCMPGTWrr)>;
+
+def SKLWriteVZeroIdiomALUY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecALUY]>
+]>;
+def : InstRW<[SKLWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr,
+                                               VPCMPGTDYrr,
+                                               VPCMPGTWYrr)>;
+
+def SKLWritePSUB : SchedWriteRes<[SKLPort015]> {
+  let Latency = 1;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+
+def SKLWriteVZeroIdiomPSUB : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [SKLWritePSUB]>
+]>;
+def : InstRW<[SKLWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr,
+                                               PSUBDrr, VPSUBDrr,
+                                               PSUBQrr, VPSUBQrr,
+                                               PSUBWrr, VPSUBWrr,
+                                               VPSUBBYrr,
+                                               VPSUBDYrr,
+                                               VPSUBQYrr,
+                                               VPSUBWYrr)>;
+
+def SKLWritePCMPGTQ : SchedWriteRes<[SKLPort5]> {
+  let Latency = 3;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+
+def SKLWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [SKLWritePCMPGTQ]>
+]>;
+def : InstRW<[SKLWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
+                                                  VPCMPGTQYrr)>;
+
+
+// CMOVs that use both Z and C flag require an extra uop.
+def SKLWriteCMOVA_CMOVBErr : SchedWriteRes<[SKLPort06]> {
+  let Latency = 2;
+  let ResourceCycles = [2];
+  let NumMicroOps = 2;
+}
+
+def SKLWriteCMOVA_CMOVBErm : SchedWriteRes<[SKLPort23,SKLPort06]> {
+  let Latency = 7;
+  let ResourceCycles = [1,2];
+  let NumMicroOps = 3;
+}
+
+def SKLCMOVA_CMOVBErr :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [SKLWriteCMOVA_CMOVBErr]>,
+  SchedVar<NoSchedPred,                             [WriteCMOV]>
+]>;
+
+def SKLCMOVA_CMOVBErm :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [SKLWriteCMOVA_CMOVBErm]>,
+  SchedVar<NoSchedPred,                             [WriteCMOV.Folded]>
+]>;
+
+def : InstRW<[SKLCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>;
+def : InstRW<[SKLCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
+
+// SETCCs that use both Z and C flag require an extra uop.
+def SKLWriteSETA_SETBEr : SchedWriteRes<[SKLPort06]> {
+  let Latency = 2;
+  let ResourceCycles = [2];
+  let NumMicroOps = 2;
+}
+
+def SKLWriteSETA_SETBEm : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06]> {
+  let Latency = 3;
+  let ResourceCycles = [1,1,2];
+  let NumMicroOps = 4;
+}
+
+def SKLSETA_SETBErr :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [SKLWriteSETA_SETBEr]>,
+  SchedVar<NoSchedPred,                         [WriteSETCC]>
+]>;
+
+def SKLSETA_SETBErm :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [SKLWriteSETA_SETBEm]>,
+  SchedVar<NoSchedPred,                         [WriteSETCCStore]>
+]>;
+
+def : InstRW<[SKLSETA_SETBErr], (instrs SETCCr)>;
+def : InstRW<[SKLSETA_SETBErm], (instrs SETCCm)>;
+
 } // SchedModel
diff --git a/lib/Target/X86/X86SchedSkylakeServer.td b/lib/Target/X86/X86SchedSkylakeServer.td
index ec8e4db02d8a..58caf1dacfcb 100755
--- a/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/lib/Target/X86/X86SchedSkylakeServer.td
@@ -1,9 +1,8 @@
 //=- X86SchedSkylake.td - X86 Skylake Server Scheduling ------*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -81,6 +80,8 @@ def : ReadAdvance<ReadAfterVecLd, 5>;
 def : ReadAdvance<ReadAfterVecXLd, 6>;
 def : ReadAdvance<ReadAfterVecYLd, 7>;
 
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
 // Many SchedWrites are defined in pairs with and without a folded load.
 // Instructions with folded loads are usually micro-fused, so they only appear
 // as two micro-ops when queued in the reservation station.
@@ -158,7 +159,6 @@ defm : SKXWriteResPair<WriteCRC32, [SKXPort1], 3>;
 def : WriteRes<WriteLEA, [SKXPort15]>; // LEA instructions can't fold loads.
 
 defm : SKXWriteResPair<WriteCMOV,  [SKXPort06], 1, [1], 1>; // Conditional move.
-defm : SKXWriteResPair<WriteCMOV2, [SKXPort06], 2, [2], 2>; // Conditional (CF + ZF flag) move.
 defm : X86WriteRes<WriteFCMOV, [SKXPort1], 3, [1], 1>; // x87 conditional move.
 def  : WriteRes<WriteSETCC, [SKXPort06]>; // Setcc.
 def  : WriteRes<WriteSETCCStore, [SKXPort06,SKXPort4,SKXPort237]> {
@@ -176,7 +176,7 @@ defm : X86WriteRes<WriteBitTestSetRegLd, [SKXPort0156,SKXPort23], 5, [1,1], 2>;
 // Integer shifts and rotates.
 defm : SKXWriteResPair<WriteShift,    [SKXPort06],  1>;
 defm : SKXWriteResPair<WriteShiftCL,  [SKXPort06],  3, [3], 3>;
-defm : SKXWriteResPair<WriteRotate,   [SKXPort06],  2, [2], 2>;
+defm : SKXWriteResPair<WriteRotate,   [SKXPort06],  1, [1], 1>;
 defm : SKXWriteResPair<WriteRotateCL, [SKXPort06],  3, [3], 3>;
 
 // SHLD/SHRD.
@@ -680,8 +680,7 @@ def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr",
                                             "VPBLENDMD(Z128|Z256)rr",
                                             "VPBLENDMQ(Z128|Z256)rr",
                                             "VPBLENDMW(Z128|Z256)rr",
-                                            "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rr",
-                                            "(V?)PSUB(B|D|Q|W)rr",
+                                            "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rrk",
                                             "VPTERNLOGD(Z|Z128|Z256)rri",
                                             "VPTERNLOGQ(Z|Z128|Z256)rri")>;
 
@@ -722,13 +721,6 @@ def SKXWriteResGroup14 : SchedWriteRes<[SKXPort05]> {
 def: InstRW<[SKXWriteResGroup14], (instrs FDECSTP,
                                           MMX_MOVDQ2Qrr)>;
 
-def SKXWriteResGroup15 : SchedWriteRes<[SKXPort06]> {
-  let Latency = 2;
-  let NumMicroOps = 2;
-  let ResourceCycles = [2];
-}
-def: InstRW<[SKXWriteResGroup15], (instregex "SET(A|BE)r")>;
-
 def SKXWriteResGroup17 : SchedWriteRes<[SKXPort0156]> {
   let Latency = 2;
   let NumMicroOps = 2;
@@ -759,9 +751,10 @@ def SKXWriteResGroup23 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
 }
 def: InstRW<[SKXWriteResGroup23], (instrs CWD,
                                           JCXZ, JECXZ, JRCXZ,
-                                          ADC8i8, SBB8i8)>;
-def: InstRW<[SKXWriteResGroup23], (instregex "ADC8ri",
-                                             "SBB8ri")>;
+                                          ADC8i8, SBB8i8,
+                                          ADC16i16, SBB16i16,
+                                          ADC32i32, SBB32i32,
+                                          ADC64i32, SBB64i32)>;
 
 def SKXWriteResGroup25 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237]> {
   let Latency = 2;
@@ -834,7 +827,6 @@ def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0
                                              "VPCMPD(Z|Z128|Z256)rri",
                                              "VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr",
                                              "VPCMPGT(B|D|Q|W)(Z|Z128|Z256)rr",
-                                             "(V?)PCMPGTQ(Y?)rr",
                                              "VPCMPQ(Z|Z128|Z256)rri",
                                              "VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri",
                                              "VPCMPW(Z|Z128|Z256)rri",
@@ -900,13 +892,6 @@ def SKXWriteResGroup45 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237]> {
 }
 def: InstRW<[SKXWriteResGroup45], (instrs FNSTSWm)>;
 
-def SKXWriteResGroup46 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06]> {
-  let Latency = 3;
-  let NumMicroOps = 4;
-  let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SKXWriteResGroup46], (instregex "SET(A|BE)m")>;
-
 def SKXWriteResGroup47 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237,SKXPort0156]> {
   let Latency = 3;
   let NumMicroOps = 4;
@@ -1446,6 +1431,14 @@ def SKXWriteResGroup107 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06
 def: InstRW<[SKXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)",
                                               "ROR(8|16|32|64)m(1|i)")>;
 
+def SKXWriteResGroup107_1 : SchedWriteRes<[SKXPort06]> {
+  let Latency = 2;
+  let NumMicroOps = 2;
+  let ResourceCycles = [2];
+}
+def: InstRW<[SKXWriteResGroup107_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
+                                             ROR8r1, ROR16r1, ROR32r1, ROR64r1)>;
+
 def SKXWriteResGroup108 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> {
   let Latency = 7;
   let NumMicroOps = 5;
@@ -2463,4 +2456,171 @@ def: InstRW<[SKXWriteResGroup267], (instrs PAUSE)>;
 
 def: InstRW<[WriteZero], (instrs CLC)>;
 
+
+// Intruction variants handled by the renamer. These might not need execution
+// ports in certain conditions.
+// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
+// section "Skylake Pipeline" > "Register allocation and renaming".
+// These can be investigated with llvm-exegesis, e.g.
+// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+
+def SKXWriteZeroLatency : SchedWriteRes<[]> {
+  let Latency = 0;
+}
+
+def SKXWriteZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteALU]>
+]>;
+def : InstRW<[SKXWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+                                          XOR32rr, XOR64rr)>;
+
+def SKXWriteFZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogic]>
+]>;
+def : InstRW<[SKXWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr,
+                                           XORPDrr, VXORPDrr,
+                                           VXORPSZ128rr,
+                                           VXORPDZ128rr)>;
+
+def SKXWriteFZeroIdiomY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogicY]>
+]>;
+def : InstRW<[SKXWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr,
+                                            VXORPSZ256rr, VXORPDZ256rr)>;
+
+def SKXWriteFZeroIdiomZ : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogicZ]>
+]>;
+def : InstRW<[SKXWriteFZeroIdiomZ], (instrs VXORPSZrr, VXORPDZrr)>;
+
+def SKXWriteVZeroIdiomLogicX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecLogicX]>
+]>;
+def : InstRW<[SKXWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr,
+                                                 VPXORDZ128rr, VPXORQZ128rr)>;
+
+def SKXWriteVZeroIdiomLogicY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecLogicY]>
+]>;
+def : InstRW<[SKXWriteVZeroIdiomLogicY], (instrs VPXORYrr,
+                                                 VPXORDZ256rr, VPXORQZ256rr)>;
+
+def SKXWriteVZeroIdiomLogicZ : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecLogicZ]>
+]>;
+def : InstRW<[SKXWriteVZeroIdiomLogicZ], (instrs VPXORDZrr, VPXORQZrr)>;
+
+def SKXWriteVZeroIdiomALUX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecALUX]>
+]>;
+def : InstRW<[SKXWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr,
+                                               PCMPGTDrr, VPCMPGTDrr,
+                                               PCMPGTWrr, VPCMPGTWrr)>;
+
+def SKXWriteVZeroIdiomALUY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecALUY]>
+]>;
+def : InstRW<[SKXWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr,
+                                               VPCMPGTDYrr,
+                                               VPCMPGTWYrr)>;
+
+def SKXWritePSUB : SchedWriteRes<[SKXPort015]> {
+  let Latency = 1;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+
+def SKXWriteVZeroIdiomPSUB : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [SKXWritePSUB]>
+]>;
+
+def : InstRW<[SKXWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr, VPSUBBZ128rr,
+                                               PSUBDrr, VPSUBDrr, VPSUBDZ128rr,
+                                               PSUBQrr, VPSUBQrr, VPSUBQZ128rr,
+                                               PSUBWrr, VPSUBWrr, VPSUBWZ128rr,
+                                               VPSUBBYrr, VPSUBBZ256rr,
+                                               VPSUBDYrr, VPSUBDZ256rr,
+                                               VPSUBQYrr, VPSUBQZ256rr,
+                                               VPSUBWYrr, VPSUBWZ256rr,
+                                               VPSUBBZrr,
+                                               VPSUBDZrr,
+                                               VPSUBQZrr,
+                                               VPSUBWZrr)>;
+def SKXWritePCMPGTQ : SchedWriteRes<[SKXPort5]> {
+  let Latency = 3;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+
+def SKXWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [SKXWritePCMPGTQ]>
+]>;
+def : InstRW<[SKXWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
+                                                  VPCMPGTQYrr)>;
+
+
+// CMOVs that use both Z and C flag require an extra uop.
+def SKXWriteCMOVA_CMOVBErr : SchedWriteRes<[SKXPort06]> {
+  let Latency = 2;
+  let ResourceCycles = [2];
+  let NumMicroOps = 2;
+}
+
+def SKXWriteCMOVA_CMOVBErm : SchedWriteRes<[SKXPort23,SKXPort06]> {
+  let Latency = 7;
+  let ResourceCycles = [1,2];
+  let NumMicroOps = 3;
+}
+
+def SKXCMOVA_CMOVBErr :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [SKXWriteCMOVA_CMOVBErr]>,
+  SchedVar<NoSchedPred,                             [WriteCMOV]>
+]>;
+
+def SKXCMOVA_CMOVBErm :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [SKXWriteCMOVA_CMOVBErm]>,
+  SchedVar<NoSchedPred,                             [WriteCMOV.Folded]>
+]>;
+
+def : InstRW<[SKXCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>;
+def : InstRW<[SKXCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
+
+// SETCCs that use both Z and C flag require an extra uop.
+def SKXWriteSETA_SETBEr : SchedWriteRes<[SKXPort06]> {
+  let Latency = 2;
+  let ResourceCycles = [2];
+  let NumMicroOps = 2;
+}
+
+def SKXWriteSETA_SETBEm : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06]> {
+  let Latency = 3;
+  let ResourceCycles = [1,1,2];
+  let NumMicroOps = 4;
+}
+
+def SKXSETA_SETBErr :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [SKXWriteSETA_SETBEr]>,
+  SchedVar<NoSchedPred,                         [WriteSETCC]>
+]>;
+
+def SKXSETA_SETBErm :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [SKXWriteSETA_SETBEm]>,
+  SchedVar<NoSchedPred,                         [WriteSETCCStore]>
+]>;
+
+def : InstRW<[SKXSETA_SETBErr], (instrs SETCCr)>;
+def : InstRW<[SKXSETA_SETBErm], (instrs SETCCm)>;
+
 } // SchedModel
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index 25aa83f96d3a..55ca85ec1e3d 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -1,9 +1,8 @@
 //===-- X86Schedule.td - X86 Scheduling Definitions --------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -18,6 +17,12 @@ def ReadAfterVecLd : SchedRead;
 def ReadAfterVecXLd : SchedRead;
 def ReadAfterVecYLd : SchedRead;
 
+// Instructions that move data between general purpose registers and vector
+// registers may be subject to extra latency due to data bypass delays.
+// This SchedRead describes a bypass delay caused by data being moved from the
+// integer unit to the floating point unit.
+def ReadInt2Fpu : SchedRead;
+
 // Instructions with both a load and a store folded are modeled as a folded
 // load + WriteRMW.
 def WriteRMW : SchedWrite;
@@ -158,7 +163,6 @@ defm WritePOPCNT : X86SchedWritePair; // Bit population count.
 defm WriteLZCNT : X86SchedWritePair; // Leading zero count.
 defm WriteTZCNT : X86SchedWritePair; // Trailing zero count.
 defm WriteCMOV  : X86SchedWritePair; // Conditional move.
-defm WriteCMOV2 : X86SchedWritePair; // Conditional (CF + ZF flag) move.
 def  WriteFCMOV : SchedWrite; // X87 conditional move.
 def  WriteSETCC : SchedWrite; // Set register based on condition code.
 def  WriteSETCCStore : SchedWrite;
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index 1589ff2ef402..b0334655de7e 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -1,9 +1,8 @@
 //===- X86ScheduleAtom.td - X86 Atom Scheduling Definitions -*- tablegen -*-==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -47,6 +46,8 @@ def : ReadAdvance<ReadAfterVecLd, 3>;
 def : ReadAdvance<ReadAfterVecXLd, 3>;
 def : ReadAdvance<ReadAfterVecYLd, 3>;
 
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
 // Many SchedWrites are defined in pairs with and without a folded load.
 // Instructions with folded loads are usually micro-fused, so they only appear
 // as two micro-ops when dispatched by the schedulers.
@@ -112,7 +113,6 @@ defm : AtomWriteResPair<WriteIDiv64, [AtomPort01], [AtomPort01],130,130,[130],[1
 defm : X86WriteResPairUnsupported<WriteCRC32>;
 
 defm : AtomWriteResPair<WriteCMOV,  [AtomPort01], [AtomPort0]>;
-defm : AtomWriteResPair<WriteCMOV2, [AtomPort01], [AtomPort0]>;
 defm : X86WriteRes<WriteFCMOV, [AtomPort01], 9, [9], 1>; // x87 conditional move.
 
 def  : WriteRes<WriteSETCC, [AtomPort01]>;
@@ -740,7 +740,7 @@ def AtomWrite01_45 : SchedWriteRes<[AtomPort01]> {
   let Latency = 45;
   let ResourceCycles = [45];
 }
-def : InstRW<[AtomWrite01_45], (instrs MONITORrrr)>;
+def : InstRW<[AtomWrite01_45], (instrs MONITOR32rrr, MONITOR64rrr)>;
 
 def AtomWrite01_46 : SchedWriteRes<[AtomPort01]> {
   let Latency = 46;
diff --git a/lib/Target/X86/X86ScheduleBdVer2.td b/lib/Target/X86/X86ScheduleBdVer2.td
index 5798e1b2671b..8cc01c3acece 100644
--- a/lib/Target/X86/X86ScheduleBdVer2.td
+++ b/lib/Target/X86/X86ScheduleBdVer2.td
@@ -1,9 +1,8 @@
 //=- X86ScheduleBdVer2.td - X86 BdVer2 (Piledriver) Scheduling * tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -209,7 +208,10 @@ multiclass __pdWriteResPair<X86FoldableSchedWrite SchedRW,
                     !add(Lat, LoadLat),
                     !if(!and(!empty(Res), !eq(LoadRes, 1)),
                       [],
-                      !listconcat([LoadRes], Res)),
+                      !listconcat([LoadRes],
+                        !if(!empty(Res),
+                          !listsplat(1, !size(ExePorts)),
+                          Res))),
                     !add(UOps, LoadUOps)>;
 }
 
@@ -218,7 +220,7 @@ multiclass PdWriteResExPair<X86FoldableSchedWrite SchedRW,
                             list<int> Res = [], int UOps = 1,
                             int LoadUOps = 0> {
   defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
-                          /*LoadLat*/4, /*LoadRes*/1, LoadUOps>;
+                          /*LoadLat*/4, /*LoadRes*/3, LoadUOps>;
 }
 
 multiclass PdWriteResXMMPair<X86FoldableSchedWrite SchedRW,
@@ -226,15 +228,15 @@ multiclass PdWriteResXMMPair<X86FoldableSchedWrite SchedRW,
                              list<int> Res = [], int UOps = 1,
                              int LoadUOps = 0> {
   defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
-                           /*LoadLat*/5, /*LoadRes*/1, LoadUOps>;
+                           /*LoadLat*/5, /*LoadRes*/3, LoadUOps>;
 }
 
 multiclass PdWriteResYMMPair<X86FoldableSchedWrite SchedRW,
                              list<ProcResourceKind> ExePorts, int Lat,
-                             list<int> Res, int UOps = 2,
+                             list<int> Res = [], int UOps = 2,
                              int LoadUOps = 0> {
   defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
-                           /*LoadLat*/5, /*LoadRes*/2, LoadUOps>;
+                           /*LoadLat*/5, /*LoadRes*/3, LoadUOps>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -251,6 +253,11 @@ def : ReadAdvance<ReadAfterVecLd, 5>;
 def : ReadAdvance<ReadAfterVecXLd, 5>;
 def : ReadAdvance<ReadAfterVecYLd, 5>;
 
+// Transfer from int domain to ivec domain incurs additional latency of 8..10cy
+// Reference: Agner, Microarchitecture, "AMD Bulldozer, Piledriver, Steamroller
+// and Excavator pipeline", "Data delay between different execution domains"
+def : ReadAdvance<ReadInt2Fpu, -10>;
+
 // A folded store needs a cycle on the PdStore for the store data.
 def : WriteRes<WriteRMW, [PdStore]>;
 
@@ -258,15 +265,15 @@ def : WriteRes<WriteRMW, [PdStore]>;
 // Loads, stores, and moves, not folded with other operations.
 ////////////////////////////////////////////////////////////////////////////////
 
-def : WriteRes<WriteLoad,    [PdLoad]> { let Latency = 5; }
+def : WriteRes<WriteLoad,    [PdLoad]> { let Latency = 5; let ResourceCycles = [2]; }
 def : WriteRes<WriteStore,   [PdStore]>;
 def : WriteRes<WriteStoreNT, [PdStore]>;
-def : WriteRes<WriteMove,    [PdEX01]>;
+def : WriteRes<WriteMove,    [PdEX01]> { let ResourceCycles = [2]; }
 
 // Load/store MXCSR.
 // FIXME: These are copy and pasted from WriteLoad/Store.
 def : WriteRes<WriteLDMXCSR, [PdLoad]> { let Latency = 5; }
-def : WriteRes<WriteSTMXCSR, [PdStore]> { let NumMicroOps = 2; }
+def : WriteRes<WriteSTMXCSR, [PdStore]> { let NumMicroOps = 2; let ResourceCycles = [18]; }
 
 // Treat misc copies as a move.
 def : InstRW<[WriteMove], (instrs COPY)>;
@@ -300,6 +307,7 @@ def : InstRW<[PdWriteXLAT], (instrs XLAT)>;
 
 def PdWriteLARrr : SchedWriteRes<[PdEX01]> {
   let Latency = 184;
+  let ResourceCycles = [375];
   let NumMicroOps = 45;
 }
 def : InstRW<[PdWriteLARrr], (instregex "LAR(16|32|64)rr",
@@ -307,22 +315,31 @@ def : InstRW<[PdWriteLARrr], (instregex "LAR(16|32|64)rr",
 
 // Nops don't have dependencies, so there's no actual latency, but we set this
 // to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
-def : WriteRes<WriteNop, [PdEX01]>;
+def : WriteRes<WriteNop, [PdEX01]> { let ResourceCycles = [2]; }
 
 ////////////////////////////////////////////////////////////////////////////////
 // Arithmetic.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : PdWriteResExPair<WriteALU,     [PdEX01]>;
+defm : PdWriteResExPair<WriteALU,     [PdEX01], 1, [2]>;
+
+def PdWriteALURMW : SchedWriteRes<[PdLoad, PdEX01, PdStore]> {
+  let Latency = 6;
+  let ResourceCycles = [3, 2, 1];
+  let NumMicroOps = 1;
+}
+def : SchedAlias<WriteALURMW, PdWriteALURMW>;
 
 def PdWriteLXADD : SchedWriteRes<[PdEX01]> {
   let Latency = 6;
+  let ResourceCycles = [88];
   let NumMicroOps = 4;
 }
 def : InstRW<[PdWriteLXADD], (instrs LXADD8, LXADD16, LXADD32, LXADD64)>;
 
 def PdWriteBMI1 : SchedWriteRes<[PdEX01]> {
   let Latency = 2;
+  let ResourceCycles = [2];
   let NumMicroOps = 2;
 }
 def : InstRW<[PdWriteBMI1],
@@ -332,8 +349,9 @@ def : InstRW<[PdWriteBMI1],
                      BLSIC32rr, BLSIC64rr, T1MSKC32rr, T1MSKC64rr,
                      TZMSK32rr, TZMSK64rr)>;
 
-def PdWriteBMI1m : SchedWriteRes<[PdEX01]> {
+def PdWriteBMI1m : SchedWriteRes<[PdLoad, PdEX01]> {
   let Latency = 6;
+  let ResourceCycles = [3, 3];
   let NumMicroOps = 2;
 }
 def : InstRW<[PdWriteBMI1m],
@@ -345,26 +363,34 @@ def : InstRW<[PdWriteBMI1m],
 
 defm : PdWriteResExPair<WriteADC,    [PdEX01],                  1,  [2]>;
 
-defm : PdWriteRes<WriteBSWAP32,      [PdEX1]>;
-defm : PdWriteRes<WriteBSWAP64,      [PdEX1]>;
-defm : PdWriteRes<WriteCMPXCHG,      [PdEX1],                   3,  [],       5>;
-defm : PdWriteRes<WriteCMPXCHGRMW,   [PdEX1, PdStore, PdLoad],  3,  [], 2>;
-defm : PdWriteRes<WriteXCHG,         [PdEX1],                   1,  [],       2>;
+def PdWriteADCSBB64ri32 : SchedWriteRes<[PdEX01]> {
+  let ResourceCycles = [3];
+}
+def : InstRW<[PdWriteADCSBB64ri32], (instrs ADC64ri32, SBB64ri32)>;
+
+defm : PdWriteRes<WriteBSWAP32,      [PdEX01]>;
+defm : PdWriteRes<WriteBSWAP64,      [PdEX01]>;
+defm : PdWriteRes<WriteCMPXCHG,      [PdEX1],                   3,  [3],        5>;
+defm : PdWriteRes<WriteCMPXCHGRMW,   [PdEX1, PdStore, PdLoad],  3,  [44, 1, 1], 2>;
+defm : PdWriteRes<WriteXCHG,         [PdEX1],                   1,  [],         2>;
 
 def PdWriteCMPXCHG8rr : SchedWriteRes<[PdEX1]> {
   let Latency = 3;
+  let ResourceCycles = [3];
   let NumMicroOps = 3;
 }
 def : InstRW<[PdWriteCMPXCHG8rr], (instrs CMPXCHG8rr)>;
 
 def PdWriteCMPXCHG8rm : SchedWriteRes<[PdEX1]> {
   let Latency = 3;
+  let ResourceCycles = [23];
   let NumMicroOps = 5;
 }
 def : InstRW<[PdWriteCMPXCHG8rm], (instrs CMPXCHG8rm)>;
 
 def PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm : SchedWriteRes<[PdEX1]> {
   let Latency = 3;
+  let ResourceCycles = [21];
   let NumMicroOps = 6;
 }
 def : InstRW<[PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm],
@@ -372,42 +398,40 @@ def : InstRW<[PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm],
 
 def PdWriteCMPXCHG8B : SchedWriteRes<[PdEX1]> {
   let Latency = 3;
+  let ResourceCycles = [26];
   let NumMicroOps = 18;
 }
 def : InstRW<[PdWriteCMPXCHG8B], (instrs CMPXCHG8B)>;
 
 def PdWriteCMPXCHG16B : SchedWriteRes<[PdEX1]> {
   let Latency = 3;
+  let ResourceCycles = [69];
   let NumMicroOps = 22;
 }
 def : InstRW<[PdWriteCMPXCHG16B], (instrs CMPXCHG16B)>;
 
-def PdWriteXCHG16rr : SchedWriteRes<[PdEX1]> {
-  let Latency = 2;
-  let NumMicroOps = 2;
-}
-def : InstRW<[PdWriteXCHG16rr], (instrs XCHG16rr)>;
-
 def PdWriteXADD : SchedWriteRes<[PdEX1]> {
-  let Latency = 2;
-  let NumMicroOps = 4;
+  let Latency = 1;
+  let ResourceCycles = [1];
+  let NumMicroOps = 2;
 }
 def : InstRW<[PdWriteXADD], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr)>;
 
 def PdWriteXADDm : SchedWriteRes<[PdEX1]> {
-let Latency = 6;
-let NumMicroOps = 4;
+  let Latency = 6;
+  let ResourceCycles = [20];
+  let NumMicroOps = 4;
 }
 def : InstRW<[PdWriteXADDm], (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm)>;
 
-defm : PdWriteResExPair<WriteIMul8,     [PdEX1, PdMul],          4>;
-defm : PdWriteResExPair<WriteIMul16,    [PdEX1, PdMul],          4,  [],    2>;
-defm : PdWriteResExPair<WriteIMul16Imm, [PdEX1, PdMul],          5,  [],    2>;
-defm : PdWriteResExPair<WriteIMul16Reg, [PdEX1, PdMul],          4>;
-defm : PdWriteResExPair<WriteIMul32,    [PdEX1, PdMul],          4>;
-defm : PdWriteResExPair<WriteIMul32Imm, [PdEX1, PdMul],          4,  [],    1, 1>;
-defm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul],          4>;
-defm : PdWriteResExPair<WriteIMul64,    [PdEX1, PdMul],          6,  [1, 4]>;
+defm : PdWriteResExPair<WriteIMul8,     [PdEX1, PdMul],          4,  [1, 4]>;
+defm : PdWriteResExPair<WriteIMul16,    [PdEX1, PdMul],          4,  [1, 5],    2>;
+defm : PdWriteResExPair<WriteIMul16Imm, [PdEX1, PdMul],          5,  [1, 5],    2>;
+defm : PdWriteResExPair<WriteIMul16Reg, [PdEX1, PdMul],          4,  [1, 2]>;
+defm : PdWriteResExPair<WriteIMul32,    [PdEX1, PdMul],          4,  [1, 4]>;
+defm : PdWriteResExPair<WriteIMul32Imm, [PdEX1, PdMul],          4,  [1, 2],    1, 1>;
+defm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul],          4,  [1, 2]>;
+defm : PdWriteResExPair<WriteIMul64,    [PdEX1, PdMul],          6,  [1, 6]>;
 defm : PdWriteResExPair<WriteIMul64Imm, [PdEX1, PdMul],          6,  [1, 4],1, 1>;
 defm : PdWriteResExPair<WriteIMul64Reg, [PdEX1, PdMul],          6,  [1, 4]>;
 defm : X86WriteResUnsupported<WriteIMulH>; // BMI2 MULX
@@ -422,36 +446,48 @@ defm : PdWriteResExPair<WriteIDiv16,  [PdEX1, PdDiv],           15,  [1, 17],
 defm : PdWriteResExPair<WriteIDiv32,  [PdEX1, PdDiv],           14,  [1, 25],   2>;
 defm : PdWriteResExPair<WriteIDiv64,  [PdEX1, PdDiv],           14,  [1, 14],   2>;
 
-defm : PdWriteResExPair<WriteCRC32,   [PdEX01],                  3,  [4],       3>;
+defm : PdWriteResExPair<WriteCRC32,   [PdEX01],                  2,  [4],       3>;
 
 def PdWriteCRC32r32r16 : SchedWriteRes<[PdEX01]> {
   let Latency = 5;
-  let ResourceCycles = [4];
+  let ResourceCycles = [10];
   let NumMicroOps = 5;
 }
 def : InstRW<[PdWriteCRC32r32r16], (instrs CRC32r32r16)>;
 
 def PdWriteCRC32r32r32 : SchedWriteRes<[PdEX01]> {
   let Latency = 6;
-  let ResourceCycles = [4];
+  let ResourceCycles = [12];
   let NumMicroOps = 7;
 }
 def : InstRW<[PdWriteCRC32r32r32], (instrs CRC32r32r32)>;
 
 def PdWriteCRC32r64r64 : SchedWriteRes<[PdEX01]> {
   let Latency = 10;
-  let ResourceCycles = [4];
+  let ResourceCycles = [17];
   let NumMicroOps = 11;
 }
 def : InstRW<[PdWriteCRC32r64r64], (instrs CRC32r64r64)>;
 
 defm : PdWriteResExPair<WriteCMOV,    [PdEX01]>; // Conditional move.
-defm : PdWriteResExPair<WriteCMOV2,   [PdEX01], 1, [], 1, 1>; // Conditional (CF + ZF flag) move.
 
-def : InstRW<[WriteCMOV2.Folded], (instrs CMOVG16rm, CMOVG32rm, CMOVG64rm,
-                                          CMOVGE16rm, CMOVGE32rm, CMOVGE64rm,
-                                          CMOVL16rm, CMOVL32rm, CMOVL64rm,
-                                          CMOVLE16rm, CMOVLE32rm, CMOVLE64rm)>;
+def PdWriteCMOVm : SchedWriteRes<[PdLoad, PdEX01]> {
+  let Latency = 5;
+  let ResourceCycles = [3, 3];
+  let NumMicroOps = 2;
+}
+
+def PdWriteCMOVmVar : SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_BE">>, [PdWriteCMOVm]>,
+  SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_A">>,  [PdWriteCMOVm]>,
+  SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_L">>,  [PdWriteCMOVm]>,
+  SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_GE">>, [PdWriteCMOVm]>,
+  SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_LE">>, [PdWriteCMOVm]>,
+  SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_G">>,  [PdWriteCMOVm]>,
+  SchedVar<NoSchedPred, [WriteCMOV.Folded]>
+]>;
+
+def : InstRW<[PdWriteCMOVmVar], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
 
 defm : PdWriteRes<WriteFCMOV,        [PdFPU0, PdFPFMA]>; // x87 conditional move.
 
@@ -462,107 +498,143 @@ def PdWriteSETGEmSETGmSETLEmSETLm : SchedWriteRes<[PdEX01]> {
   let ResourceCycles = [2];
   let NumMicroOps = 2;
 }
-def : InstRW<[PdWriteSETGEmSETGmSETLEmSETLm], (instrs SETGEm, SETGm,
-                                                      SETLEm, SETLm)>;
 
-defm : PdWriteRes<WriteLAHFSAHF,      [PdEX01],          2,  [],     2>;
+def PdSETGEmSETGmSETLEmSETLm :  SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_GE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>,
+  SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_G">>,  [PdWriteSETGEmSETGmSETLEmSETLm]>,
+  SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_LE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>,
+  SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_L">>,  [PdWriteSETGEmSETGmSETLEmSETLm]>,
+  SchedVar<NoSchedPred,                                            [WriteSETCCStore]>
+]>;
+def : InstRW<[PdSETGEmSETGmSETLEmSETLm], (instrs SETCCm)>;
+
+defm : PdWriteRes<WriteLAHFSAHF,      [PdEX01],          2,  [4],       2>;
 
-def WriteLAHF : SchedWriteRes<[PdEX01]> {
+def PdWriteLAHF : SchedWriteRes<[PdEX01]> {
   let Latency = 2;
+  let ResourceCycles = [4];
   let NumMicroOps = 4;
 }
-def : InstRW<[WriteLAHF], (instrs LAHF)>;
+def : InstRW<[PdWriteLAHF], (instrs LAHF)>;
 
-def WriteSAHF : SchedWriteRes<[PdEX01]> {
+def PdWriteSAHF : SchedWriteRes<[PdEX01]> {
   let Latency = 2;
+  let ResourceCycles = [2];
   let NumMicroOps = 2;
 }
-def : InstRW<[WriteSAHF], (instrs SAHF)>;
+def : InstRW<[PdWriteSAHF], (instrs SAHF)>;
+
+defm : PdWriteRes<WriteBitTest,          [PdEX01],         1, [2],      1>;
+defm : PdWriteRes<WriteBitTestImmLd,     [PdEX01, PdLoad], 5, [2,  3],  1>;
+defm : PdWriteRes<WriteBitTestRegLd,     [PdEX01, PdLoad], 5, [7,  2],  7>;
+defm : PdWriteRes<WriteBitTestSet,       [PdEX01],         2, [2],      2>;
+defm : PdWriteRes<WriteBitTestSetImmLd,  [PdEX01, PdLoad], 6, [1,  1],  4>;
+defm : PdWriteRes<WriteBitTestSetRegLd,  [PdEX01, PdLoad], 6, [1,  1], 10>;
 
-defm : PdWriteRes<WriteBitTest,          [PdEX01],         1, [1],     1>;
-defm : PdWriteRes<WriteBitTestImmLd,     [PdEX01, PdLoad], 5, [1, 1],  1>;
-defm : PdWriteRes<WriteBitTestRegLd,     [PdEX01, PdLoad], 5, [1, 1],  7>;
-defm : PdWriteRes<WriteBitTestSet,       [PdEX01],         2, [1],     2>;
-defm : PdWriteRes<WriteBitTestSetImmLd,  [PdEX01, PdLoad], 6, [1, 1],  4>;
-defm : PdWriteRes<WriteBitTestSetImmRMW, [PdEX01, PdLoad], 6, [1, 1],  4>;
-defm : PdWriteRes<WriteBitTestSetRegLd,  [PdEX01, PdLoad], 6, [1, 1], 10>;
-defm : PdWriteRes<WriteBitTestSetRegRMW, [PdEX01, PdLoad], 6, [1, 1], 10>;
+def PdWriteBTSIm : SchedWriteRes<[PdEX01, PdLoad]> {
+  let Latency = 7;
+  let ResourceCycles = [42, 1];
+  let NumMicroOps = 4;
+}
+def : SchedAlias<WriteBitTestSetImmRMW, PdWriteBTSIm>;
+def PdWriteBTSRm : SchedWriteRes<[PdEX01, PdLoad]> {
+  let Latency = 7;
+  let ResourceCycles = [44, 1];
+  let NumMicroOps = 10;
+}
+def : SchedAlias<WriteBitTestSetRegRMW, PdWriteBTSRm>;
 
 // This is for simple LEAs with one or two input operands.
 // FIXME: SAGU 3-operand LEA
 def : WriteRes<WriteLEA,              [PdEX01]> { let NumMicroOps = 2; }
 
 // Bit counts.
-defm : PdWriteResExPair<WriteBSF,     [PdEX01],          3,  [4],     6, 2>;
-defm : PdWriteResExPair<WriteBSR,     [PdEX01],          4,  [4],     7, 2>;
-defm : PdWriteResExPair<WritePOPCNT,  [PdEX01],          4>;
-defm : PdWriteResExPair<WriteLZCNT,   [PdEX01],          2,  [],      2>;
-defm : PdWriteResExPair<WriteTZCNT,   [PdEX01],          2,  [2],     2>;
+defm : PdWriteResExPair<WriteBSF,     [PdEX01],          3,  [6],     6, 2>;
+defm : PdWriteResExPair<WriteBSR,     [PdEX01],          4,  [8],     7, 2>;
+defm : PdWriteResExPair<WritePOPCNT,  [PdEX01],          4,  [4]>;
+defm : PdWriteResExPair<WriteLZCNT,   [PdEX0],           2,  [2],     2>;
+defm : PdWriteResExPair<WriteTZCNT,   [PdEX0],           2,  [2],     2>;
 
 // BMI1 BEXTR, BMI2 BZHI
-defm : PdWriteResExPair<WriteBEXTR,   [PdEX01],          2,  [],     2>;
-defm : PdWriteResExPair<WriteBLS,     [PdEX01],          2,  [],     2>;
+defm : PdWriteResExPair<WriteBEXTR,   [PdEX01],          2,  [2],    2>;
+defm : PdWriteResExPair<WriteBLS,     [PdEX01],          2,  [2],    2>;
 defm : PdWriteResExPair<WriteBZHI,    [PdEX01]>;
 
+def PdWriteBEXTRI : SchedWriteRes<[PdEX01]> {
+  let Latency = 2;
+  let ResourceCycles = [4];
+  let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteBEXTRI], (instrs BEXTRI32ri, BEXTRI64ri)>;
+
+def PdWriteBEXTRIm : SchedWriteRes<[PdEX01]> {
+  let Latency = 2;
+  let ResourceCycles = [5];
+  let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteBEXTRIm], (instrs BEXTRI32mi, BEXTRI64mi)>;
+
 ////////////////////////////////////////////////////////////////////////////////
 // Integer shifts and rotates.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : PdWriteResExPair<WriteShift,    [PdEX01]>;
+defm : PdWriteResExPair<WriteShift,    [PdEX01], 1, [2]>;
 defm : PdWriteResExPair<WriteShiftCL,  [PdEX01]>;
-defm : PdWriteResExPair<WriteRotate,   [PdEX01]>;
+defm : PdWriteResExPair<WriteRotate,   [PdEX01], 1, [2]>;
 defm : PdWriteResExPair<WriteRotateCL, [PdEX01]>;
 
 def PdWriteRCL8rCL : SchedWriteRes<[PdEX01]> {
   let Latency = 12;
+  let ResourceCycles = [24];
   let NumMicroOps = 26;
 }
 def : InstRW<[PdWriteRCL8rCL], (instrs RCL8rCL)>;
 
 def PdWriteRCR8ri : SchedWriteRes<[PdEX01]> {
   let Latency = 12;
+  let ResourceCycles = [23];
   let NumMicroOps = 23;
 }
 def : InstRW<[PdWriteRCR8ri], (instrs RCR8ri)>;
 
 def PdWriteRCR8rCL : SchedWriteRes<[PdEX01]> {
   let Latency = 11;
+  let ResourceCycles = [22];
   let NumMicroOps = 24;
 }
 def : InstRW<[PdWriteRCR8rCL], (instrs RCR8rCL)>;
 
 def PdWriteRCL16rCL : SchedWriteRes<[PdEX01]> {
   let Latency = 10;
+  let ResourceCycles = [20];
   let NumMicroOps = 22;
 }
 def : InstRW<[PdWriteRCL16rCL], (instrs RCL16rCL)>;
 
 def PdWriteRCR16ri : SchedWriteRes<[PdEX01]> {
   let Latency = 10;
+  let ResourceCycles = [19];
   let NumMicroOps = 19;
 }
 def : InstRW<[PdWriteRCR16ri], (instrs RCR16ri)>;
 
-def PdWriteRCL32rCLRCL64rCL : SchedWriteRes<[PdEX01]> {
+def PdWriteRCL3264rCL : SchedWriteRes<[PdEX01]> {
   let Latency = 7;
+  let ResourceCycles = [14];
   let NumMicroOps = 17;
 }
-def : InstRW<[PdWriteRCL32rCLRCL64rCL], (instrs RCL32rCL, RCL64rCL)>;
+def : InstRW<[PdWriteRCL3264rCL], (instrs RCL32rCL, RCL64rCL)>;
 
-def PdWriteRCR64rCL : SchedWriteRes<[PdEX01]> {
+def PdWriteRCR3264rCL : SchedWriteRes<[PdEX01]> {
   let Latency = 7;
+  let ResourceCycles = [13];
   let NumMicroOps = 16;
 }
-def : InstRW<[PdWriteRCR64rCL], (instrs RCR64rCL)>;
-
-def PdWriteRCR32rCL : SchedWriteRes<[PdEX01]> {
-  let Latency = 7;
-  let NumMicroOps = 16;
-}
-def : InstRW<[PdWriteRCR32rCL ], (instrs RCR32rCL)>;
+def : InstRW<[PdWriteRCR3264rCL], (instrs RCR32rCL, RCR64rCL)>;
 
 def PdWriteRCR32riRCR64ri : SchedWriteRes<[PdEX01]> {
   let Latency = 7;
+  let ResourceCycles = [14];
   let NumMicroOps = 15;
 }
 def : InstRW<[PdWriteRCR32riRCR64ri], (instrs RCR32ri, RCR64ri)>;
@@ -570,31 +642,35 @@ def : InstRW<[PdWriteRCR32riRCR64ri], (instrs RCR32ri, RCR64ri)>;
 
 def PdWriteRCR16rCL : SchedWriteRes<[PdEX01]> {
   let Latency = 9;
+  let ResourceCycles = [18];
   let NumMicroOps = 20;
 }
 def : InstRW<[PdWriteRCR16rCL], (instrs RCR16rCL)>;
 
 def PdWriteRCL16ri : SchedWriteRes<[PdEX01]> {
   let Latency = 11;
+  let ResourceCycles = [21];
   let NumMicroOps = 21;
 }
 def : InstRW<[PdWriteRCL16ri], (instrs RCL16ri)>;
 
 def PdWriteRCL3264ri : SchedWriteRes<[PdEX01]> {
   let Latency = 8;
+  let ResourceCycles = [15];
   let NumMicroOps = 16;
 }
 def : InstRW<[PdWriteRCL3264ri], (instrs RCL32ri, RCL64ri)>;
 
 def PdWriteRCL8ri : SchedWriteRes<[PdEX01]> {
   let Latency = 13;
+  let ResourceCycles = [25];
   let NumMicroOps = 25;
 }
 def : InstRW<[PdWriteRCL8ri], (instrs RCL8ri)>;
 
 // SHLD/SHRD.
-defm : PdWriteRes<WriteSHDrri,       [PdEX01],         4, [6], 6>;
-defm : PdWriteRes<WriteSHDrrcl,      [PdEX01],         4, [8], 7>;
+defm : PdWriteRes<WriteSHDrri,       [PdEX01],         3, [6], 6>;
+defm : PdWriteRes<WriteSHDrrcl,      [PdEX01],         3, [8], 7>;
 
 def PdWriteSHLD32rri8SHRD16rri8 : SchedWriteRes<[PdEX01]> {
   let Latency = 3;
@@ -604,8 +680,8 @@ def PdWriteSHLD32rri8SHRD16rri8 : SchedWriteRes<[PdEX01]> {
 def : InstRW<[PdWriteSHLD32rri8SHRD16rri8 ], (instrs SHLD32rri8, SHRD16rri8)>;
 
 def PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL : SchedWriteRes<[PdEX01]> {
-  let Latency = 4;
-  let ResourceCycles = [8];
+  let Latency = 3;
+  let ResourceCycles = [6];
   let NumMicroOps = 7;
 }
 def : InstRW<[PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL], (instrs SHLD16rrCL,
@@ -623,19 +699,20 @@ defm : PdWriteRes<WriteFLD0,               [PdFPU1, PdFPSTO], 3>;
 defm : PdWriteRes<WriteFLD1,               [PdFPU1, PdFPSTO], 3>;
 defm : PdWriteRes<WriteFLDC,               [PdFPU1, PdFPSTO], 3>;
 
-defm : PdWriteRes<WriteFLoad,              [PdLoad, PdFPU01, PdFPFMA], 5>;
-defm : PdWriteRes<WriteFLoadX,             [PdLoad, PdFPU01, PdFPFMA], 5>;
-defm : PdWriteRes<WriteFLoadY,             [PdLoad, PdFPU01, PdFPFMA], 5, [], 2>;
+defm : PdWriteRes<WriteFLoad,              [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>;
+defm : PdWriteRes<WriteFLoadX,             [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>;
+defm : PdWriteRes<WriteFLoadY,             [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3], 2>;
 
-defm : PdWriteRes<WriteFMaskedLoad,        [PdLoad, PdFPU01, PdFPFMA], 6, [1, 1, 2]>;
-defm : PdWriteRes<WriteFMaskedLoadY,       [PdLoad, PdFPU01, PdFPFMA], 6, [2, 2, 4], 2>;
+defm : PdWriteRes<WriteFMaskedLoad,        [PdLoad, PdFPU01, PdFPFMA], 6, [3, 1, 4]>;
+defm : PdWriteRes<WriteFMaskedLoadY,       [PdLoad, PdFPU01, PdFPFMA], 6, [3, 2, 4], 2>;
 
-defm : PdWriteRes<WriteFStore,             [PdStore, PdFPU1,  PdFPSTO], 2>;
-defm : PdWriteRes<WriteFStoreX,            [PdStore, PdFPU1,  PdFPSTO]>;
-defm : PdWriteRes<WriteFStoreY,            [PdStore, PdFPU1,  PdFPSTO], 1, [], 4>;
+defm : PdWriteRes<WriteFStore,             [PdStore, PdFPU23, PdFPSTO], 2, [1,  3, 1]>;
+defm : PdWriteRes<WriteFStoreX,            [PdStore, PdFPU23, PdFPSTO], 1, [1,  3, 1]>;
+defm : PdWriteRes<WriteFStoreY,            [PdStore, PdFPU23, PdFPSTO], 1, [1, 36, 2], 4>;
 
-def PdWriteMOVHPm : SchedWriteRes<[PdStore, PdFPU1,  PdFPSTO]> {
+def PdWriteMOVHPm : SchedWriteRes<[PdStore, PdFPU23,  PdFPSTO]> {
   let Latency = 2;
+  let ResourceCycles = [1, 3, 1];
   let NumMicroOps = 2;
 }
 def : InstRW<[PdWriteMOVHPm], (instrs MOVHPDmr, MOVHPSmr, VMOVHPDmr, VMOVHPSmr)>;
@@ -649,33 +726,41 @@ defm : PdWriteRes<WriteFStoreNT,           [PdStore, PdFPU1,  PdFPSTO], 3>;
 defm : PdWriteRes<WriteFStoreNTX,          [PdStore, PdFPU1,  PdFPSTO], 3>;
 defm : PdWriteRes<WriteFStoreNTY,          [PdStore, PdFPU1,  PdFPSTO], 3, [2, 2, 2], 4>;
 
-defm : PdWriteRes<WriteFMaskedStore,       [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 4], 18>;
-defm : PdWriteRes<WriteFMaskedStoreY,      [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 4], 34>;
+defm : PdWriteRes<WriteFMaskedStore,       [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>;
+defm : PdWriteRes<WriteFMaskedStoreY,      [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>;
 
 defm : PdWriteRes<WriteFMove,              [PdFPU01, PdFPFMA]>;
-defm : PdWriteRes<WriteFMoveX,             [PdFPU01, PdFPFMA]>;
+defm : PdWriteRes<WriteFMoveX,             [PdFPU01, PdFPFMA], 1, [1, 2]>;
 defm : PdWriteRes<WriteFMoveY,             [PdFPU01, PdFPFMA], 2, [2, 2], 2>;
 
 defm : PdWriteRes<WriteEMMS,               [PdFPU01, PdFPFMA], 2>;
 
 defm : PdWriteResXMMPair<WriteFAdd,         [PdFPU0, PdFPFMA],  5>;
 defm : PdWriteResXMMPair<WriteFAddX,        [PdFPU0, PdFPFMA],  5>;
-defm : PdWriteResYMMPair<WriteFAddY,        [PdFPU0, PdFPFMA],  5, [2, 1]>;
+defm : PdWriteResYMMPair<WriteFAddY,        [PdFPU0, PdFPFMA],  5, [1, 2]>;
 defm : X86WriteResPairUnsupported<WriteFAddZ>;
 
+def PdWriteX87Add: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> {
+  let Latency = 5;
+  let ResourceCycles = [3, 1, 10];
+}
+def : InstRW<[PdWriteX87Add], (instrs ADD_FI16m,  ADD_FI32m,  ADD_F32m,  ADD_F64m,
+                                      SUB_FI16m,  SUB_FI32m,  SUB_F32m,  SUB_F64m,
+                                      SUBR_FI16m, SUBR_FI32m, SUBR_F32m, SUBR_F64m)>;
+
 defm : PdWriteResXMMPair<WriteFAdd64,       [PdFPU0, PdFPFMA],  5>;
 defm : PdWriteResXMMPair<WriteFAdd64X,      [PdFPU0, PdFPFMA],  5>;
-defm : PdWriteResYMMPair<WriteFAdd64Y,      [PdFPU0, PdFPFMA],  5, [2, 1]>;
+defm : PdWriteResYMMPair<WriteFAdd64Y,      [PdFPU0, PdFPFMA],  5, [1, 2]>;
 defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
 
 defm : PdWriteResXMMPair<WriteFCmp,         [PdFPU0, PdFPFMA],  2>;
 defm : PdWriteResXMMPair<WriteFCmpX,        [PdFPU0, PdFPFMA],  2>;
-defm : PdWriteResYMMPair<WriteFCmpY,        [PdFPU0, PdFPFMA],  2, [2, 1]>;
+defm : PdWriteResYMMPair<WriteFCmpY,        [PdFPU0, PdFPFMA],  2, [1, 2]>;
 defm : X86WriteResPairUnsupported<WriteFCmpZ>;
 
 defm : PdWriteResXMMPair<WriteFCmp64,       [PdFPU0, PdFPFMA],  2>;
 defm : PdWriteResXMMPair<WriteFCmp64X,      [PdFPU0, PdFPFMA],  2>;
-defm : PdWriteResYMMPair<WriteFCmp64Y,      [PdFPU0, PdFPFMA],  2, [2, 1]>;
+defm : PdWriteResYMMPair<WriteFCmp64Y,      [PdFPU0, PdFPFMA],  2, [1, 2]>;
 defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
 
 defm : PdWriteResXMMPair<WriteFCom,         [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
@@ -690,29 +775,35 @@ def : InstRW<[PdWriteTST_F_UCOM_FPPr], (instrs TST_F, UCOM_FPPr)>;
 
 defm : PdWriteResXMMPair<WriteFMul,         [PdFPU1, PdFPFMA],  5>;
 defm : PdWriteResXMMPair<WriteFMulX,        [PdFPU1, PdFPFMA],  5>;
-defm : PdWriteResYMMPair<WriteFMulY,        [PdFPU1, PdFPFMA],  5, [2, 1]>;
+defm : PdWriteResYMMPair<WriteFMulY,        [PdFPU1, PdFPFMA],  5, [1, 2]>;
 defm : X86WriteResPairUnsupported<WriteFMulZ>;
 
+def PdWriteX87Mul: SchedWriteRes<[PdLoad, PdFPU1, PdFPFMA]> {
+  let Latency = 5;
+  let ResourceCycles = [3, 1, 10];
+}
+def : InstRW<[PdWriteX87Mul], (instrs MUL_FI16m, MUL_FI32m, MUL_F32m, MUL_F64m)>;
+
 defm : PdWriteResXMMPair<WriteFMul64,       [PdFPU1, PdFPFMA],  5>;
 defm : PdWriteResXMMPair<WriteFMul64X,      [PdFPU1, PdFPFMA],  5>;
-defm : PdWriteResYMMPair<WriteFMul64Y,      [PdFPU1, PdFPFMA],  5, [2, 1]>;
+defm : PdWriteResYMMPair<WriteFMul64Y,      [PdFPU1, PdFPFMA],  5, [1, 2]>;
 defm : X86WriteResPairUnsupported<WriteFMul64Z>;
 
-defm : PdWriteResXMMPair<WriteFMA,          [PdFPU, PdFPFMA], 5>;
-defm : PdWriteResXMMPair<WriteFMAX,         [PdFPU, PdFPFMA], 5>;
-defm : PdWriteResYMMPair<WriteFMAY,         [PdFPU, PdFPFMA], 5,   [1, 1]>;
+defm : PdWriteResXMMPair<WriteFMA,          [PdFPU, PdFPFMA], 5, [1, 3]>;
+defm : PdWriteResXMMPair<WriteFMAX,         [PdFPU, PdFPFMA], 5, [1, 3]>;
+defm : PdWriteResYMMPair<WriteFMAY,         [PdFPU, PdFPFMA], 5, [1, 3]>;
 defm : X86WriteResPairUnsupported<WriteFMAZ>;
 
 
-defm : PdWriteResXMMPair<WriteDPPD,         [PdFPU1, PdFPFMA], 15, [1, 3],  15, 2>;
+defm : PdWriteResXMMPair<WriteDPPD,         [PdFPU1, PdFPFMA], 15, [1, 10], 15, 2>;
 
-defm : PdWriteResXMMPair<WriteDPPS,         [PdFPU1, PdFPFMA], 25, [1, 3],  16, 2>;
-defm : PdWriteResYMMPair<WriteDPPSY,        [PdFPU1, PdFPFMA], 27, [2, 6], /*or 29*/ 25, 4>;
+defm : PdWriteResXMMPair<WriteDPPS,         [PdFPU1, PdFPFMA], 25, [1, 14],  16, 2>;
+defm : PdWriteResYMMPair<WriteDPPSY,        [PdFPU1, PdFPFMA], 27, [2, 25], /*or 29*/ 25, 4>;
 defm : X86WriteResPairUnsupported<WriteDPPSZ>;
 
 def PdWriteVDPPSrri : SchedWriteRes<[PdFPU1, PdFPFMA]> {
-  let Latency = 25;
-  let ResourceCycles = [1, 3];
+  let Latency = 27;
+  let ResourceCycles = [1, 14];
   let NumMicroOps = 17;
 }
 def : InstRW<[PdWriteVDPPSrri], (instrs VDPPSrri)>;
@@ -722,118 +813,140 @@ defm : PdWriteResXMMPair<WriteFRcpX,        [PdFPU1, PdFPFMA],  5>;
 defm : PdWriteResYMMPair<WriteFRcpY,        [PdFPU1, PdFPFMA],  5, [2, 1]>;
 defm : X86WriteResPairUnsupported<WriteFRcpZ>;
 
-defm : PdWriteResXMMPair<WriteFRsqrt,       [PdFPU1, PdFPFMA],  5>;
+defm : PdWriteResXMMPair<WriteFRsqrt,       [PdFPU1, PdFPFMA],  5, [1, 2]>;
 defm : PdWriteResXMMPair<WriteFRsqrtX,      [PdFPU1, PdFPFMA],  5>;
-defm : PdWriteResYMMPair<WriteFRsqrtY,      [PdFPU1, PdFPFMA],  5, [2, 1]>;
+defm : PdWriteResYMMPair<WriteFRsqrtY,      [PdFPU1, PdFPFMA],  5, [2, 2]>;
 defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
 
-defm : PdWriteResXMMPair<WriteFDiv,         [PdFPU1, PdFPFMA], 9, [1, 19]>;
-defm : PdWriteResXMMPair<WriteFDivX,        [PdFPU1, PdFPFMA], 9, [1, 19]>;
-defm : PdWriteResYMMPair<WriteFDivY,        [PdFPU1, PdFPFMA], 9, [2, 38]>;
+defm : PdWriteResXMMPair<WriteFDiv,         [PdFPU1, PdFPFMA], 9, [1, 9]>;
+defm : PdWriteResXMMPair<WriteFDivX,        [PdFPU1, PdFPFMA], 9, [1, 9]>;
+defm : PdWriteResYMMPair<WriteFDivY,        [PdFPU1, PdFPFMA], 9, [2, 18]>;
 defm : X86WriteResPairUnsupported<WriteFDivZ>;
 
-defm : PdWriteResXMMPair<WriteFDiv64,       [PdFPU1, PdFPFMA], 9, [1, 19]>;
-defm : PdWriteResXMMPair<WriteFDiv64X,      [PdFPU1, PdFPFMA], 9, [1, 19]>;
-defm : PdWriteResYMMPair<WriteFDiv64Y,      [PdFPU1, PdFPFMA], 9, [2, 38]>;
+def PdWriteX87Div: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> {
+  let Latency = 9;
+  let ResourceCycles = [3, 1, 18];
+}
+def : InstRW<[PdWriteX87Div], (instrs DIV_FI16m,  DIV_FI32m,
+                                      DIVR_FI16m, DIVR_FI32m,
+                                      DIV_F32m,   DIV_F64m,
+                                      DIVR_F32m,  DIVR_F64m)>;
+
+defm : PdWriteResXMMPair<WriteFDiv64,       [PdFPU1, PdFPFMA], 9, [1, 9]>;
+defm : PdWriteResXMMPair<WriteFDiv64X,      [PdFPU1, PdFPFMA], 9, [1, 9]>;
+defm : PdWriteResYMMPair<WriteFDiv64Y,      [PdFPU1, PdFPFMA], 9, [2, 18]>;
 defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
 
-defm : PdWriteResXMMPair<WriteFSqrt,        [PdFPU1, PdFPFMA], 9, [1, 21]>;
-defm : PdWriteResXMMPair<WriteFSqrtX,       [PdFPU1, PdFPFMA], 9, [1, 21]>;
-defm : PdWriteResYMMPair<WriteFSqrtY,       [PdFPU1, PdFPFMA], 9, [2, 42]>;
+defm : PdWriteResXMMPair<WriteFSqrt,        [PdFPU1, PdFPFMA], 9, [1, 9]>;
+defm : PdWriteResXMMPair<WriteFSqrtX,       [PdFPU1, PdFPFMA], 9, [1, 9]>;
+defm : PdWriteResYMMPair<WriteFSqrtY,       [PdFPU1, PdFPFMA], 9, [2, 18]>;
 defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
 
-defm : PdWriteResXMMPair<WriteFSqrt64,      [PdFPU1, PdFPFMA], 9, [1, 27]>;
-defm : PdWriteResXMMPair<WriteFSqrt64X,     [PdFPU1, PdFPFMA], 9, [1, 27]>;
-defm : PdWriteResYMMPair<WriteFSqrt64Y,     [PdFPU1, PdFPFMA], 9, [2, 54]>;
+defm : PdWriteResXMMPair<WriteFSqrt64,      [PdFPU1, PdFPFMA], 9, [1, 9]>;
+defm : PdWriteResXMMPair<WriteFSqrt64X,     [PdFPU1, PdFPFMA], 9, [1, 9]>;
+defm : PdWriteResYMMPair<WriteFSqrt64Y,     [PdFPU1, PdFPFMA], 9, [2, 18]>;
 defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
 
-defm : PdWriteResXMMPair<WriteFSqrt80,      [PdFPU1, PdFPFMA],  1, [1, 35]>;
-defm : PdWriteResXMMPair<WriteFSign,        [PdFPU1, PdFPFMA]>;
+defm : PdWriteResXMMPair<WriteFSqrt80,      [PdFPU1, PdFPFMA],  1, [1, 18]>;
+defm : PdWriteResXMMPair<WriteFSign,        [PdFPU1, PdFPFMA],  1, [1, 4]>;
 
-defm : PdWriteResXMMPair<WriteFRnd,         [PdFPU1, PdFPSTO],  4>;
+defm : PdWriteResXMMPair<WriteFRnd,         [PdFPU1, PdFPSTO],  4, []>;
 defm : PdWriteResYMMPair<WriteFRndY,        [PdFPU1, PdFPSTO],  4, [2, 1], 2>;
 defm : X86WriteResPairUnsupported<WriteFRndZ>;
 
-def PdWriteVFRCZ : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+def PdWriteVFRCZP : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+  let Latency = 10;
+  let ResourceCycles = [2, 1];
+  let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteVFRCZP], (instrs VFRCZPDrr, VFRCZPSrr)>;
+
+def PdWriteVFRCZS : SchedWriteRes<[PdFPU1, PdFPSTO]> {
   let Latency = 10;
+  let ResourceCycles = [10, 1];
   let NumMicroOps = 2;
 }
-def : InstRW<[PdWriteVFRCZ], (instrs VFRCZPDrr, VFRCZPSrr,
-                                     VFRCZSDrr, VFRCZSSrr)>;
+def : InstRW<[PdWriteVFRCZS], (instrs VFRCZSDrr, VFRCZSSrr)>;
 
 def PdWriteVFRCZm : SchedWriteRes<[PdFPU1, PdFPSTO]> {
   let Latency = 15;
-  let NumMicroOps = 2;
+  let ResourceCycles = [2, 1];
+  let NumMicroOps = 3;
 }
 def : InstRW<[PdWriteVFRCZm], (instrs VFRCZPDrm, VFRCZPSrm,
                                       VFRCZSDrm, VFRCZSSrm)>;
 
 def PdWriteVFRCZY : SchedWriteRes<[PdFPU1, PdFPSTO]> {
   let Latency = 10;
-  let ResourceCycles = [2, 1];
+  let ResourceCycles = [3, 1];
   let NumMicroOps = 4;
 }
 def : InstRW<[PdWriteVFRCZY], (instrs VFRCZPSYrr, VFRCZPDYrr)>;
 
 def PdWriteVFRCZYm : SchedWriteRes<[PdFPU1, PdFPSTO]> {
   let Latency = 15;
-  let ResourceCycles = [2, 1];
+  let ResourceCycles = [4, 1];
   let NumMicroOps = 8;
 }
 def : InstRW<[PdWriteVFRCZYm], (instrs VFRCZPSYrm, VFRCZPDYrm)>;
 
-defm : PdWriteResXMMPair<WriteFLogic,       [PdFPU01, PdFPFMA],  2>;
+defm : PdWriteResXMMPair<WriteFLogic,       [PdFPU01, PdFPFMA],  2, [1, 2]>;
 defm : PdWriteResYMMPair<WriteFLogicY,      [PdFPU01, PdFPFMA],  2, [2, 2]>;
 defm : X86WriteResPairUnsupported<WriteFLogicZ>;
 
 defm : PdWriteResXMMPair<WriteFTest,        [PdFPU0, PdFPFMA, PdEX0],  1, [], 2>;
-defm : PdWriteResYMMPair<WriteFTestY,       [PdFPU01, PdFPFMA, PdEX0], 1, [2, 2, 1], 4, 2>;
+defm : PdWriteResYMMPair<WriteFTestY,       [PdFPU01, PdFPFMA, PdEX0], 1, [4, 4, 1], 4, 2>;
 defm : X86WriteResPairUnsupported<WriteFTestZ>;
 
-defm : PdWriteResXMMPair<WriteFShuffle,     [PdFPU01, PdFPFMA],  2>;
-defm : PdWriteResYMMPair<WriteFShuffleY,    [PdFPU01, PdFPFMA],  2, [2, 2], 2>;
+defm : PdWriteResXMMPair<WriteFShuffle,     [PdFPU01, PdFPFMA],  2, [1, 2]>;
+defm : PdWriteResYMMPair<WriteFShuffleY,    [PdFPU01, PdFPFMA],  2, [2, 4], 2>;
 defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
 
 def PdWriteVBROADCASTF128 : SchedWriteRes<[PdFPU01, PdFPFMA]> {
   let Latency = 7;
+  let ResourceCycles = [1, 3];
   let NumMicroOps = 2;
 }
 def : InstRW<[PdWriteVBROADCASTF128], (instrs VBROADCASTF128)>;
 
-defm : PdWriteResXMMPair<WriteFVarShuffle,  [PdFPU01, PdFPFMA],  3, [1, 4]>;
-defm : PdWriteResYMMPair<WriteFVarShuffleY, [PdFPU01, PdFPFMA],  3, [2, 6], 2>;
+defm : PdWriteResXMMPair<WriteFVarShuffle,  [PdFPU01, PdFPFMA],  3, [1, 2]>;
+defm : PdWriteResYMMPair<WriteFVarShuffleY, [PdFPU01, PdFPFMA],  3, [2, 4], 2>;
 defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
 
-defm : PdWriteResXMMPair<WriteFBlend,       [PdFPU01, PdFPFMA],  2>;
-defm : PdWriteResYMMPair<WriteFBlendY,      [PdFPU01, PdFPFMA],  2, [2, 2], 2>;
+defm : PdWriteResXMMPair<WriteFBlend,       [PdFPU01, PdFPFMA],  2, [1, 3]>;
+defm : PdWriteResYMMPair<WriteFBlendY,      [PdFPU01, PdFPFMA],  2, [2, 3], 2>;
 defm : X86WriteResPairUnsupported<WriteFBlendZ>;
 
-defm : PdWriteResXMMPair<WriteFVarBlend,    [PdFPU01, PdFPFMA],  2, [1, 4]>;
-defm : PdWriteResYMMPair<WriteFVarBlendY,   [PdFPU01, PdFPFMA],  2, [2, 6], 2>;
+defm : PdWriteResXMMPair<WriteFVarBlend,    [PdFPU01, PdFPFMA],  2, [1, 3]>;
+defm : PdWriteResYMMPair<WriteFVarBlendY,   [PdFPU01, PdFPFMA],  2, [2, 4], 2>;
 defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
 
-defm : PdWriteResXMMPair<WriteFShuffle256,  [PdFPU01, PdFPFMA],  2, [], 2>;
+defm : PdWriteResXMMPair<WriteFShuffle256,  [PdFPU01, PdFPFMA],  2, [1, 3], 2>;
 defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
 
 def PdWriteVEXTRACTF128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
   let Latency = 2;
+  let ResourceCycles = [1, 2];
 }
 def : InstRW<[PdWriteVEXTRACTF128rr], (instrs VEXTRACTF128rr)>;
 
 def PdWriteVEXTRACTF128mr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
   let Latency = 7;
+  let ResourceCycles = [1, 4];
   let NumMicroOps = 2;
 }
 def : InstRW<[PdWriteVEXTRACTF128mr], (instrs VEXTRACTF128mr)>;
 
 def PdWriteVPERM2F128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
   let Latency = 4;
+  let ResourceCycles = [1, 6];
   let NumMicroOps = 8;
 }
 def : InstRW<[PdWriteVPERM2F128rr], (instrs VPERM2F128rr)>;
 
 def PdWriteVPERM2F128rm : SchedWriteRes<[PdFPU01, PdFPFMA]> {
   let Latency = 8; // 4 + 4
+  let ResourceCycles = [1, 8];
   let NumMicroOps = 10;
 }
 def : InstRW<[PdWriteVPERM2F128rm], (instrs VPERM2F128rm)>;
@@ -842,99 +955,100 @@ def : InstRW<[PdWriteVPERM2F128rm], (instrs VPERM2F128rm)>;
 // Conversions.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : PdWriteResXMMPair<WriteCvtSS2I,   [PdFPU1, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>;
+defm : PdWriteResXMMPair<WriteCvtSS2I,   [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>;
 
-defm : PdWriteResXMMPair<WriteCvtPS2I,   [PdFPU1, PdFPSTO], 4>;
-defm : PdWriteResYMMPair<WriteCvtPS2IY,  [PdFPU1, PdFPSTO], 4, [2, 1]>;
+defm : PdWriteResXMMPair<WriteCvtPS2I,   [PdFPU0, PdFPCVT, PdFPSTO], 4>;
+defm : PdWriteResYMMPair<WriteCvtPS2IY,  [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>;
 defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
 
-defm : PdWriteResXMMPair<WriteCvtSD2I,   [PdFPU1, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>;
+defm : PdWriteResXMMPair<WriteCvtSD2I,   [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>;
 
-defm : PdWriteResXMMPair<WriteCvtPD2I,   [PdFPU1, PdFPSTO],          8, [],        2>;
-defm : PdWriteResYMMPair<WriteCvtPD2IY,  [PdFPU1, PdFPSTO, PdFPFMA], 8, [2, 1, 1], 4>;
+defm : PdWriteResXMMPair<WriteCvtPD2I,   [PdFPU0, PdFPCVT, PdFPSTO],          8, [],        2>;
+defm : PdWriteResYMMPair<WriteCvtPD2IY,  [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>;
 defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
 
-def PdWriteMMX_CVTTPD2PIirr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+def PdWriteMMX_CVTTPD2PIirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
   let Latency = 6;
   let NumMicroOps = 2;
 }
 def : InstRW<[PdWriteMMX_CVTTPD2PIirr], (instrs MMX_CVTTPD2PIirr)>;
 
 // FIXME: f+3 ST, LD+STC latency
-defm : PdWriteResXMMPair<WriteCvtI2SS,   [PdFPU1, PdFPSTO], 4, [], 2>;
+defm : PdWriteResXMMPair<WriteCvtI2SS,   [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>;
 // FIXME: .Folded version is one NumMicroOp *less*..
 
-defm : PdWriteResXMMPair<WriteCvtI2PS,   [PdFPU1, PdFPSTO], 4>;
-defm : PdWriteResYMMPair<WriteCvtI2PSY,  [PdFPU1, PdFPSTO], 4, [2, 1]>;
+defm : PdWriteResXMMPair<WriteCvtI2PS,   [PdFPU0, PdFPCVT, PdFPSTO], 4>;
+defm : PdWriteResYMMPair<WriteCvtI2PSY,  [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>;
 defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
 
-defm : PdWriteResXMMPair<WriteCvtI2SD,   [PdFPU1, PdFPSTO], 4, [], 2>;
+defm : PdWriteResXMMPair<WriteCvtI2SD,   [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>;
 // FIXME: .Folded version is one NumMicroOp *less*..
 
-def WriteCVTSI642SDrr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+def PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
   let Latency = 13;
+  let ResourceCycles = [1, 3, 1];
   let NumMicroOps = 2;
 }
-def : InstRW<[WriteCVTSI642SDrr], (instrs CVTSI642SDrr, CVTSI642SSrr)>;
+def : InstRW<[PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr], (instrs CVTSI642SDrr, CVTSI642SSrr, CVTSI2SDrr, CVTSI2SSrr)>;
 
-defm : PdWriteResXMMPair<WriteCvtI2PD,   [PdFPU1, PdFPSTO], 8, [],     2>;
-defm : PdWriteResYMMPair<WriteCvtI2PDY,  [PdFPU1, PdFPSTO], 8, [2, 1], 4, 1>;
+defm : PdWriteResXMMPair<WriteCvtI2PD,   [PdFPU0, PdFPCVT, PdFPSTO], 8, [],     2>;
+defm : PdWriteResYMMPair<WriteCvtI2PDY,  [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>;
 defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
 
-defm : PdWriteResXMMPair<WriteCvtSS2SD,  [PdFPU1, PdFPSTO], 4>;
+defm : PdWriteResXMMPair<WriteCvtSS2SD,  [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>;
 
-defm : PdWriteResXMMPair<WriteCvtPS2PD,  [PdFPU1, PdFPSTO], 8, [],     2>;
-defm : PdWriteResYMMPair<WriteCvtPS2PDY, [PdFPU1, PdFPSTO], 8, [2, 1], 4, 1>;
+defm : PdWriteResXMMPair<WriteCvtPS2PD,  [PdFPU0, PdFPCVT, PdFPSTO], 8, [],     2>;
+defm : PdWriteResYMMPair<WriteCvtPS2PDY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>;
 defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
 
-defm : PdWriteResXMMPair<WriteCvtSD2SS,  [PdFPU1, PdFPSTO], 4>;
+defm : PdWriteResXMMPair<WriteCvtSD2SS,  [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>;
 
-defm : PdWriteResXMMPair<WriteCvtPD2PS,  [PdFPU1, PdFPSTO],          8, [],        2>;
-defm : PdWriteResYMMPair<WriteCvtPD2PSY, [PdFPU1, PdFPSTO, PdFPFMA], 8, [2, 1, 1], 4>;
+defm : PdWriteResXMMPair<WriteCvtPD2PS,  [PdFPU0, PdFPCVT, PdFPSTO],          8, [],        2>;
+defm : PdWriteResYMMPair<WriteCvtPD2PSY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>;
 defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
 
-def WriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+def PdWriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
   let Latency = 6;
   let NumMicroOps = 2;
 }
-def : InstRW<[WriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr], (instrs MMX_CVTPD2PIirr,
+def : InstRW<[PdWriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr], (instrs MMX_CVTPD2PIirr,
                                                             MMX_CVTPI2PDirr)>;
 
-def WriteMMX_CVTPI2PSirr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+def PdWriteMMX_CVTPI2PSirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
   let Latency = 4;
   let NumMicroOps = 2;
 }
-def : InstRW<[WriteMMX_CVTPI2PSirr], (instrs MMX_CVTPI2PSirr)>;
+def : InstRW<[PdWriteMMX_CVTPI2PSirr], (instrs MMX_CVTPI2PSirr)>;
 
-defm : PdWriteResXMMPair<WriteCvtPH2PS,  [PdFPU1, PdFPSTO], 8, [],     2, 1>;
-defm : PdWriteResYMMPair<WriteCvtPH2PSY, [PdFPU1, PdFPSTO], 8, [2, 1], 4, 3>;
+defm : PdWriteResXMMPair<WriteCvtPH2PS,  [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2, 1>;
+defm : PdWriteResYMMPair<WriteCvtPH2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 3>;
 defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
 
-defm : PdWriteRes<WriteCvtPS2PH,        [PdFPU1, PdFPSTO],          8, [],        2>;
-defm : PdWriteRes<WriteCvtPS2PHY,       [PdFPU1, PdFPSTO, PdFPFMA], 8, [2, 1, 1], 4>;
+defm : PdWriteRes<WriteCvtPS2PH,        [PdFPU0, PdFPCVT, PdFPSTO],          8, [1, 2, 1],    2>;
+defm : PdWriteRes<WriteCvtPS2PHY,       [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>;
 defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
 
-defm : PdWriteRes<WriteCvtPS2PHSt,      [PdFPU1, PdFPSTO, PdStore],          4, [],           3>;
-defm : PdWriteRes<WriteCvtPS2PHYSt,     [PdFPU1, PdFPSTO, PdFPFMA, PdStore], 4, [2, 1, 1, 1], 4>;
+defm : PdWriteRes<WriteCvtPS2PHSt,      [PdFPU0, PdFPCVT, PdFPSTO, PdStore],          4, [1, 2, 1, 1],    3>;
+defm : PdWriteRes<WriteCvtPS2PHYSt,     [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdStore], 4, [1, 2, 1, 1, 1], 4>;
 defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Vector integer operations.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : PdWriteRes<WriteVecLoad,             [PdLoad, PdFPU01, PdFPMAL], 5>;
-defm : PdWriteRes<WriteVecLoadX,            [PdLoad, PdFPU01, PdFPMAL], 5>;
-defm : PdWriteRes<WriteVecLoadY,            [PdLoad, PdFPU01, PdFPMAL], 5, [], 2>;
+defm : PdWriteRes<WriteVecLoad,             [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>;
+defm : PdWriteRes<WriteVecLoadX,            [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>;
+defm : PdWriteRes<WriteVecLoadY,            [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 3], 2>;
 
-defm : PdWriteRes<WriteVecLoadNT,           [PdLoad, PdFPU01, PdFPMAL], 5>;
-defm : PdWriteRes<WriteVecLoadNTY,          [PdLoad, PdFPU01, PdFPMAL], 5>;
+defm : PdWriteRes<WriteVecLoadNT,           [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 4]>;
+defm : PdWriteRes<WriteVecLoadNTY,          [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 4]>;
 
-defm : PdWriteRes<WriteVecMaskedLoad,       [PdLoad, PdFPU01, PdFPMAL], 6, [1, 1, 2]>;
-defm : PdWriteRes<WriteVecMaskedLoadY,      [PdLoad, PdFPU01, PdFPMAL], 6, [2, 2, 4], 2>;
+defm : PdWriteRes<WriteVecMaskedLoad,       [PdLoad, PdFPU01, PdFPMAL], 6, [3, 1, 2]>;
+defm : PdWriteRes<WriteVecMaskedLoadY,      [PdLoad, PdFPU01, PdFPMAL], 6, [3, 2, 4], 2>;
 
-defm : PdWriteRes<WriteVecStore,            [PdStore, PdFPU1,   PdFPSTO], 2>;
-defm : PdWriteRes<WriteVecStoreX,           [PdStore, PdFPU1,   PdFPSTO]>;
-defm : PdWriteRes<WriteVecStoreY,           [PdStore, PdFPU1,   PdFPSTO], 1, [], 4>;
+defm : PdWriteRes<WriteVecStore,            [PdStore, PdFPU23, PdFPSTO], 2, [1, 3,  1]>;
+defm : PdWriteRes<WriteVecStoreX,           [PdStore, PdFPU23, PdFPSTO], 1, [1, 3,  1]>;
+defm : PdWriteRes<WriteVecStoreY,           [PdStore, PdFPU23, PdFPSTO], 1, [2, 36, 2], 4>;
 
 def PdWriteVMOVDQUYmr : SchedWriteRes<[PdStore, PdFPU1,   PdFPSTO]> {
   let NumMicroOps = 8;
@@ -948,24 +1062,33 @@ defm : PdWriteRes<WriteVecMaskedStore,      [PdStore, PdFPU01, PdFPMAL], 6, [1,
 defm : PdWriteRes<WriteVecMaskedStoreY,     [PdStore, PdFPU01, PdFPMAL], 6, [2, 2, 4], 2>;
 
 defm : PdWriteRes<WriteVecMove,             [PdFPU01, PdFPMAL], 2>;
-defm : PdWriteRes<WriteVecMoveX,            [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteRes<WriteVecMoveX,            [PdFPU01, PdFPMAL], 1, [1, 2]>;
 defm : PdWriteRes<WriteVecMoveY,            [PdFPU01, PdFPMAL], 2, [2, 2], 2>;
 
-defm : PdWriteRes<WriteVecMoveToGpr,        [PdFPU0, PdFPFMA, PdEX0], 10>;
-defm : PdWriteRes<WriteVecMoveFromGpr,      [PdFPU01, PdFPFMA], 10, [], 2>;
+def PdWriteMOVDQArr : SchedWriteRes<[PdFPU01, PdFPMAL]> {
+}
+def : InstRW<[PdWriteMOVDQArr], (instrs MOVDQArr)>;
+
+def PdWriteMOVQ2DQrr : SchedWriteRes<[PdFPU01, PdFPMAL]> {
+  let Latency = 4;
+}
+def : InstRW<[PdWriteMOVQ2DQrr], (instrs MMX_MOVQ2DQrr)>;
+
+defm : PdWriteRes<WriteVecMoveToGpr,        [PdFPU0, PdFPFMA, PdEX0], 11>;
+defm : PdWriteRes<WriteVecMoveFromGpr,      [PdFPU01, PdFPFMA], 11, [1, 2], 2>;
 
 defm : PdWriteResXMMPair<WriteVecALU,        [PdFPU01, PdFPMAL], 2>;
-defm : PdWriteResXMMPair<WriteVecALUX,       [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResXMMPair<WriteVecALUX,       [PdFPU01, PdFPMAL], 2, [1, 2]>;
 defm : X86WriteResPairUnsupported<WriteVecALUY>;
 defm : X86WriteResPairUnsupported<WriteVecALUZ>;
 
-defm : PdWriteResXMMPair<WriteVecShift,      [PdFPU01, PdFPMAL], 3>;
-defm : PdWriteResXMMPair<WriteVecShiftX,     [PdFPU01, PdFPMAL], 3>;
+defm : PdWriteResXMMPair<WriteVecShift,      [PdFPU01, PdFPMAL], 3, [1, 2]>;
+defm : PdWriteResXMMPair<WriteVecShiftX,     [PdFPU01, PdFPMAL], 3, [1, 2]>;
 defm : X86WriteResPairUnsupported<WriteVecShiftY>;
 defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
 
-defm : PdWriteResXMMPair<WriteVecShiftImm,   [PdFPU01, PdFPMAL], 2>;
-defm : PdWriteResXMMPair<WriteVecShiftImmX,  [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResXMMPair<WriteVecShiftImm,   [PdFPU01, PdFPMAL], 2, [1, 2]>;
+defm : PdWriteResXMMPair<WriteVecShiftImmX,  [PdFPU01, PdFPMAL], 2, [1, 2]>;
 defm : X86WriteResPairUnsupported<WriteVecShiftImmY>;
 defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
 
@@ -978,55 +1101,67 @@ defm : PdWriteResXMMPair<WritePMULLD,        [PdFPU0, PdFPU01, PdFPMMA, PdFPMAL]
 defm : X86WriteResPairUnsupported<WritePMULLDY>;
 defm : X86WriteResPairUnsupported<WritePMULLDZ>;
 
-def JWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPU01, PdFPMMA, PdFPMAL]> {
+def PdWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPMMA, PdFPMAL]> {
   let Latency = 4;
-  let ResourceCycles = [2, 1, 2, 1];
 }
-def : InstRW<[JWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr,
-                                     VPMACSSDQLrr)>;
+def : InstRW<[PdWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr,
+                                      VPMACSSDQLrr)>;
 
-defm : PdWriteResXMMPair<WriteMPSAD,         [PdFPU0, PdFPMMA], 9, [1, 2], 9>;
+defm : PdWriteResXMMPair<WriteMPSAD,         [PdFPU0, PdFPMMA], 9, [1, 4], 8>;
 defm : X86WriteResPairUnsupported<WriteMPSADY>;
 defm : X86WriteResPairUnsupported<WriteMPSADZ>;
 
-defm : PdWriteResXMMPair<WritePSADBW,        [PdFPU01, PdFPMAL], 4, [], 2>;
-defm : PdWriteResXMMPair<WritePSADBWX,       [PdFPU01, PdFPMAL], 4, [], 2>;
+def PdWriteVMPSADBW : SchedWriteRes<[PdFPU0, PdFPMMA]> {
+  let Latency = 8;
+  let ResourceCycles = [1, 4];
+  let NumMicroOps = 10;
+}
+def : InstRW<[PdWriteVMPSADBW], (instrs VMPSADBWrri)>;
+
+defm : PdWriteResXMMPair<WritePSADBW,        [PdFPU01, PdFPMAL], 4, [1, 2], 2>;
+defm : PdWriteResXMMPair<WritePSADBWX,       [PdFPU01, PdFPMAL], 4, [1, 2], 2>;
 defm : X86WriteResPairUnsupported<WritePSADBWY>;
 defm : X86WriteResPairUnsupported<WritePSADBWZ>;
 
 defm : PdWriteResXMMPair<WritePHMINPOS,      [PdFPU0,  PdFPMAL], 4, [], 2>;
 
-defm : PdWriteResXMMPair<WriteShuffle,       [PdFPU01, PdFPMAL], 2>;
-defm : PdWriteResXMMPair<WriteShuffleX,      [PdFPU01, PdFPMAL], 2>;
-defm : PdWriteResYMMPair<WriteShuffleY,      [PdFPU01, PdFPMAL], 2,   [1, 1]>;
+defm : PdWriteResXMMPair<WriteShuffle,       [PdFPU01, PdFPMAL], 2, [1, 2]>;
+defm : PdWriteResXMMPair<WriteShuffleX,      [PdFPU01, PdFPMAL], 2, [1, 2]>;
+defm : PdWriteResYMMPair<WriteShuffleY,      [PdFPU01, PdFPMAL], 2, [1, 4]>;
 defm : X86WriteResPairUnsupported<WriteShuffleZ>;
 
-defm : PdWriteResXMMPair<WriteVarShuffle,    [PdFPU01, PdFPMAL], 3, [1, 4]>;
-defm : PdWriteResXMMPair<WriteVarShuffleX,   [PdFPU01, PdFPMAL], 3, [1, 4]>;
+defm : PdWriteResXMMPair<WriteVarShuffle,    [PdFPU01, PdFPMAL], 3, [1, 2]>;
+defm : PdWriteResXMMPair<WriteVarShuffleX,   [PdFPU01, PdFPMAL], 3, [1, 3]>;
 defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
 defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
 
+def PdWriteVPPERM : SchedWriteRes<[PdFPU01, PdFPMAL]> {
+  let Latency = 2;
+  let ResourceCycles = [1, 3];
+}
+def : InstRW<[PdWriteVPPERM], (instrs VPPERMrrr, VPPERMrrr_REV)>;
+
 defm : PdWriteResXMMPair<WriteBlend,         [PdFPU01, PdFPMAL], 2>;
 defm : X86WriteResPairUnsupported<WriteBlendY>;
 defm : X86WriteResPairUnsupported<WriteBlendZ>;
 
-defm : PdWriteResXMMPair<WriteVarBlend,      [PdFPU01, PdFPMAL], 2, [1, 4]>;
+defm : PdWriteResXMMPair<WriteVarBlend,      [PdFPU01, PdFPMAL], 2, [1, 2]>;
 defm : X86WriteResPairUnsupported<WriteVarBlendY>;
 defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
 
 defm : PdWriteResXMMPair<WriteVecLogic,      [PdFPU01, PdFPMAL], 2>;
-defm : PdWriteResXMMPair<WriteVecLogicX,     [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResXMMPair<WriteVecLogicX,     [PdFPU01, PdFPMAL], 2, [1, 2]>;
 defm : X86WriteResPairUnsupported<WriteVecLogicY>;
 defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
 
 defm : PdWriteResXMMPair<WriteVecTest,       [PdFPU0, PdFPFMA, PdEX0],  1, [], 2>;
-defm : PdWriteResYMMPair<WriteVecTestY,      [PdFPU01, PdFPFMA, PdEX0], 1, [2, 2, 1], 4, 2>;
+defm : PdWriteResYMMPair<WriteVecTestY,      [PdFPU01, PdFPFMA, PdEX0], 1, [2, 4, 1], 4, 2>;
 defm : X86WriteResPairUnsupported<WriteVecTestZ>;
 
 defm : PdWriteResXMMPair<WriteShuffle256,    [PdFPU01, PdFPMAL]>;
 defm : PdWriteResXMMPair<WriteVarShuffle256, [PdFPU01, PdFPMAL]>;
 
-defm : PdWriteResXMMPair<WriteVarVecShift,   [PdFPU01, PdFPMAL], 3>;
+defm : PdWriteResXMMPair<WriteVarVecShift,   [PdFPU01, PdFPMAL], 3, [1, 2]>;
 defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
 defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
 
@@ -1034,14 +1169,15 @@ defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
 // Vector insert/extract operations.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : PdWriteRes<WriteVecInsert,    [PdFPU01, PdFPMAL], 2, [], 2>;
-defm : PdWriteRes<WriteVecInsertLd,  [PdFPU01, PdFPMAL, PdLoad], 6, [], 2>;
+defm : PdWriteRes<WriteVecInsert,    [PdFPU01, PdFPMAL], 2, [1, 3], 2>;
+defm : PdWriteRes<WriteVecInsertLd,  [PdFPU01, PdFPMAL, PdLoad], 6, [1, 4, 3], 2>;
 
-defm : PdWriteRes<WriteVecExtract,   [PdFPU0, PdFPFMA, PdEX0], 13, [], 2>;
-defm : PdWriteRes<WriteVecExtractSt, [PdFPU1, PdFPSTO, PdStore], 13, [], 2>;
+defm : PdWriteRes<WriteVecExtract,   [PdFPU0, PdFPFMA, PdEX0], 12, [1, 3, 1], 2>;
+defm : PdWriteRes<WriteVecExtractSt, [PdFPU1, PdFPSTO, PdStore], 13, [2, 1, 1], 2>;
 
 def PdWriteEXTRQ : SchedWriteRes<[PdFPU01, PdFPMAL]> {
   let Latency = 3;
+  let ResourceCycles = [1, 3];
 }
 def : InstRW<[PdWriteEXTRQ], (instrs EXTRQ, EXTRQI)>;
 
@@ -1049,19 +1185,19 @@ def : InstRW<[PdWriteEXTRQ], (instrs EXTRQ, EXTRQI)>;
 // SSE42 String instructions.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : PdWriteResXMMPair<WritePCmpIStrI, [PdFPU1, PdFPFMA, PdEX0], 14, [1, 2, 1], 7, 1>;
-defm : PdWriteResXMMPair<WritePCmpIStrM, [PdFPU1, PdFPFMA, PdEX0],  6, [1, 2, 1], 7, 2>;
+defm : PdWriteResXMMPair<WritePCmpIStrI, [PdFPU1, PdFPFMA, PdEX0], 11, [1, 6, 1], 7, 1>;
+defm : PdWriteResXMMPair<WritePCmpIStrM, [PdFPU1, PdFPFMA, PdEX0],  7, [1, 8, 1], 7, 2>;
 
-defm : PdWriteResXMMPair<WritePCmpEStrI, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 15, [1, 2, 6, 4, 1, 1], 27, 1>;
-defm : PdWriteResXMMPair<WritePCmpEStrM, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 10, [1, 2, 6, 4, 1, 1], 27, 1>;
+defm : PdWriteResXMMPair<WritePCmpEStrI, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 14, [1, 10, 10, 10, 1, 1], 27, 1>;
+defm : PdWriteResXMMPair<WritePCmpEStrM, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 10, [1, 10, 10, 10, 1, 1], 27, 1>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // MOVMSK Instructions.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : PdWriteRes<WriteFMOVMSK, [PdFPU0, PdFPFMA, PdEX0],   10, [], 2>;
+defm : PdWriteRes<WriteFMOVMSK, [PdFPU0, PdFPFMA, PdEX0],   12, [], 2>;
 
-defm : PdWriteRes<WriteVecMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 13, [], 2>;
+defm : PdWriteRes<WriteVecMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>;
 defm : X86WriteResUnsupported<WriteVecMOVMSKY>;
 // defm : X86WriteResUnsupported<WriteVecMOVMSKZ>;
 
@@ -1079,12 +1215,12 @@ defm : PdWriteResXMMPair<WriteAESDecEnc, [PdFPU0, PdFPMMA], 9, [], 2>;
 // Horizontal add/sub  instructions.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : PdWriteResXMMPair<WriteFHAdd,  [PdFPU0, PdFPFMA], 11, [],     3, 1>;
-defm : PdWriteResYMMPair<WriteFHAddY, [PdFPU0, PdFPFMA], 11, [2, 1], 8, 2>;
+defm : PdWriteResXMMPair<WriteFHAdd,  [PdFPU0, PdFPFMA], 11, [1, 5],     3, 1>;
+defm : PdWriteResYMMPair<WriteFHAddY, [PdFPU0, PdFPFMA], 11, [1, 8], 8, 2>;
 defm : X86WriteResPairUnsupported<WriteFHAddZ>;
 
-defm : PdWriteResXMMPair<WritePHAdd,  [PdFPU01, PdFPMAL], 5, [], 3, 1>;
-defm : PdWriteResXMMPair<WritePHAddX, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResXMMPair<WritePHAdd,  [PdFPU01, PdFPMAL], 5, [1, 4], 3, 1>;
+defm : PdWriteResXMMPair<WritePHAddX, [PdFPU01, PdFPMAL], 2, [1, 2]>;
 defm : X86WriteResPairUnsupported<WritePHAddY>;
 defm : X86WriteResPairUnsupported<WritePHAddZ>;
 
@@ -1106,10 +1242,11 @@ def : InstRW<[WritePHAdd.Folded], (instrs PHADDDrm, PHSUBDrm,
 // Carry-less multiplication instructions.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : PdWriteResXMMPair<WriteCLMul, [PdFPU0, PdFPMMA], 12, [], 5, 1>;
+defm : PdWriteResXMMPair<WriteCLMul, [PdFPU0, PdFPMMA], 12, [1, 7], 5, 1>;
 
 def PdWriteVPCLMULQDQrr : SchedWriteRes<[PdFPU0, PdFPMMA]> {
-  let Latency = 13;
+  let Latency = 12;
+  let ResourceCycles = [1, 7];
   let NumMicroOps = 6;
 }
 def : InstRW<[PdWriteVPCLMULQDQrr], (instrs VPCLMULQDQrr)>;
@@ -1120,9 +1257,15 @@ def : InstRW<[PdWriteVPCLMULQDQrr], (instrs VPCLMULQDQrr)>;
 
 def PdWriteINSERTQ : SchedWriteRes<[PdFPU01, PdFPMAL]> {
   let Latency = 3;
-  let ResourceCycles = [1, 4];
+  let ResourceCycles = [1, 2];
+}
+def : InstRW<[PdWriteINSERTQ], (instrs INSERTQ)>;
+
+def PdWriteINSERTQI : SchedWriteRes<[PdFPU01, PdFPMAL]> {
+  let Latency = 3;
+  let ResourceCycles = [1, 3];
 }
-def : InstRW<[PdWriteINSERTQ], (instrs INSERTQ, INSERTQI)>;
+def : InstRW<[PdWriteINSERTQI], (instrs INSERTQI)>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // AVX instructions.
diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td
index 33a6b01546d7..2d26232b4132 100644
--- a/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/lib/Target/X86/X86ScheduleBtVer2.td
@@ -1,9 +1,8 @@
 //=- X86ScheduleBtVer2.td - X86 BtVer2 (Jaguar) Scheduling ---*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -109,6 +108,11 @@ def : ReadAdvance<ReadAfterVecLd, 5>;
 def : ReadAdvance<ReadAfterVecXLd, 5>;
 def : ReadAdvance<ReadAfterVecYLd, 5>;
 
+/// "Additional 6 cycle transfer operation which moves a floating point
+/// operation input value from the integer unit to the floating point unit.
+/// Reference: AMDfam16h SOG (Appendix A "Instruction Latencies", Section A.2).
+def : ReadAdvance<ReadInt2Fpu, -6>;
+
 // Many SchedWrites are defined in pairs with and without a folded load.
 // Instructions with folded loads are usually micro-fused, so they only appear
 // as two micro-ops when dispatched by the schedulers.
@@ -174,6 +178,8 @@ multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW,
   }
 }
 
+// Instructions that have local forwarding disabled have an extra +1cy latency.
+
 // A folded store needs a cycle on the SAGU for the store data,
 // most RMW instructions don't need an extra uop.
 defm : X86WriteRes<WriteRMW, [JSAGU], 1, [1], 0>;
@@ -215,7 +221,6 @@ defm : JWriteResIntPair<WriteIDiv64, [JALU1, JDiv], 41, [1, 41], 2>;
 defm : JWriteResIntPair<WriteCRC32,  [JALU01], 3, [4], 3>;
 
 defm : JWriteResIntPair<WriteCMOV,  [JALU01], 1>; // Conditional move.
-defm : JWriteResIntPair<WriteCMOV2, [JALU01], 1>; // Conditional (CF + ZF flag) move.
 defm : X86WriteRes<WriteFCMOV, [JFPU0, JFPA], 3, [1,1], 1>; // x87 conditional move.
 def  : WriteRes<WriteSETCC, [JALU01]>; // Setcc.
 def  : WriteRes<WriteSETCCStore, [JALU01,JSAGU]>;
@@ -262,14 +267,13 @@ defm : X86WriteRes<WriteSHDmrcl,[JLAGU, JALU01], 9, [1, 22], 8>;
 // Loads, stores, and moves, not folded with other operations.
 ////////////////////////////////////////////////////////////////////////////////
 
-def : WriteRes<WriteLoad,    [JLAGU]> { let Latency = 5; }
+def : WriteRes<WriteLoad,    [JLAGU]> { let Latency = 3; }
 def : WriteRes<WriteStore,   [JSAGU]>;
 def : WriteRes<WriteStoreNT, [JSAGU]>;
 def : WriteRes<WriteMove,    [JALU01]>;
 
 // Load/store MXCSR.
-// FIXME: These are copy and pasted from WriteLoad/Store.
-def : WriteRes<WriteLDMXCSR, [JLAGU]> { let Latency = 5; }
+def : WriteRes<WriteLDMXCSR, [JLAGU]> { let Latency = 3; }
 def : WriteRes<WriteSTMXCSR, [JSAGU]>;
 
 // Treat misc copies as a move.
@@ -400,8 +404,8 @@ defm : X86WriteResPairUnsupported<WriteFTestZ>;
 defm : JWriteResFpuPair<WriteFShuffle,    [JFPU01, JFPX],  1>;
 defm : JWriteResYMMPair<WriteFShuffleY,   [JFPU01, JFPX],  1, [2, 2], 2>;
 defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
-defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX],  2, [1, 4], 3>;
-defm : JWriteResYMMPair<WriteFVarShuffleY,[JFPU01, JFPX],  3, [2, 6], 6>;
+defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX],  3, [1, 4], 3>; // +1cy latency.
+defm : JWriteResYMMPair<WriteFVarShuffleY,[JFPU01, JFPX],  4, [2, 6], 6>; // +1cy latency.
 defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
 defm : JWriteResFpuPair<WriteFBlend,      [JFPU01, JFPX],  1>;
 defm : JWriteResYMMPair<WriteFBlendY,     [JFPU01, JFPX],  1, [2, 2], 2>;
@@ -425,12 +429,13 @@ defm : JWriteResFpuPair<WriteCvtPD2I,      [JFPU1, JSTC], 3, [1,1], 1>;
 defm : JWriteResYMMPair<WriteCvtPD2IY,     [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
 defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
 
-// FIXME: f+3 ST, LD+STC latency
-defm : JWriteResFpuPair<WriteCvtI2SS,      [JFPU1, JSTC], 9, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2SS,           [JFPU1, JSTC], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2SSLd,         [JLAGU, JFPU1, JSTC], 9, [1,1,1], 1>;
 defm : JWriteResFpuPair<WriteCvtI2PS,      [JFPU1, JSTC], 3, [1,1], 1>;
 defm : JWriteResYMMPair<WriteCvtI2PSY,     [JFPU1, JSTC], 3, [2,2], 2>;
 defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
-defm : JWriteResFpuPair<WriteCvtI2SD,      [JFPU1, JSTC], 9, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2SD,           [JFPU1, JSTC], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2SDLd,         [JLAGU, JFPU1, JSTC], 9, [1,1,1], 1>;
 defm : JWriteResFpuPair<WriteCvtI2PD,      [JFPU1, JSTC], 3, [1,1], 1>;
 defm : JWriteResYMMPair<WriteCvtI2PDY,     [JFPU1, JSTC], 3, [2,2], 2>;
 defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
@@ -487,11 +492,11 @@ defm : JWriteResFpuPair<WriteVecALUX,     [JFPU01, JVALU], 1>;
 defm : X86WriteResPairUnsupported<WriteVecALUY>;
 defm : X86WriteResPairUnsupported<WriteVecALUZ>;
 defm : JWriteResFpuPair<WriteVecShift,    [JFPU01, JVALU], 1>;
-defm : JWriteResFpuPair<WriteVecShiftX,   [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecShiftX,   [JFPU01, JVALU], 2>; // +1cy latency.
 defm : X86WriteResPairUnsupported<WriteVecShiftY>;
 defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
 defm : JWriteResFpuPair<WriteVecShiftImm, [JFPU01, JVALU], 1>;
-defm : JWriteResFpuPair<WriteVecShiftImmX,[JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecShiftImmX,[JFPU01, JVALU], 2>; // +1cy latency.
 defm : X86WriteResPairUnsupported<WriteVecShiftImmY>;
 defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
 defm : X86WriteResPairUnsupported<WriteVarVecShift>;
@@ -540,7 +545,7 @@ defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
 // Vector insert/extract operations.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : X86WriteRes<WriteVecInsert,      [JFPU01, JVALU], 7, [1,1], 2>;
+defm : X86WriteRes<WriteVecInsert,      [JFPU01, JVALU], 1, [1,1], 2>;
 defm : X86WriteRes<WriteVecInsertLd,    [JFPU01, JVALU, JLAGU], 4, [1,1,1], 1>;
 defm : X86WriteRes<WriteVecExtract,     [JFPU0, JFPA, JALU0], 3, [1,1,1], 1>;
 defm : X86WriteRes<WriteVecExtractSt,   [JFPU1, JSTC, JSAGU], 3, [1,1,1], 1>;
@@ -575,10 +580,10 @@ defm : JWriteResFpuPair<WriteAESDecEnc,   [JFPU01, JVALU, JFPU0, JVIMUL], 3, [1,
 // Horizontal add/sub  instructions.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : JWriteResFpuPair<WriteFHAdd,         [JFPU0, JFPA], 3>;
-defm : JWriteResYMMPair<WriteFHAddY,        [JFPU0, JFPA], 3, [2,2], 2>;
-defm : JWriteResFpuPair<WritePHAdd,       [JFPU01, JVALU], 1>;
-defm : JWriteResFpuPair<WritePHAddX,      [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteFHAdd,         [JFPU0, JFPA], 4>;            // +1cy latency.
+defm : JWriteResYMMPair<WriteFHAddY,        [JFPU0, JFPA], 4, [2,2], 2>;  // +1cy latency.
+defm : JWriteResFpuPair<WritePHAdd,         [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WritePHAddX,        [JFPU01, JVALU], 2>;          // +1cy latency.
 defm : X86WriteResPairUnsupported<WritePHAddY>;
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td
index fcaff7cf810f..34c251a5c5bb 100644
--- a/lib/Target/X86/X86ScheduleSLM.td
+++ b/lib/Target/X86/X86ScheduleSLM.td
@@ -1,9 +1,8 @@
 //=- X86ScheduleSLM.td - X86 Silvermont Scheduling -----------*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -53,6 +52,8 @@ def : ReadAdvance<ReadAfterVecLd, 3>;
 def : ReadAdvance<ReadAfterVecXLd, 3>;
 def : ReadAdvance<ReadAfterVecYLd, 3>;
 
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
 // Many SchedWrites are defined in pairs with and without a folded load.
 // Instructions with folded loads are usually micro-fused, so they only appear
 // as two micro-ops when queued in the reservation station.
@@ -130,7 +131,6 @@ defm : SLMWriteResPair<WriteJump,   [SLM_IEC_RSV1],  1>;
 defm : SLMWriteResPair<WriteCRC32,  [SLM_IEC_RSV1],  3>;
 
 defm : SLMWriteResPair<WriteCMOV,  [SLM_IEC_RSV01], 2, [2]>;
-defm : SLMWriteResPair<WriteCMOV2, [SLM_IEC_RSV01], 2, [2]>;
 defm : X86WriteRes<WriteFCMOV, [SLM_FPC_RSV1], 3, [1], 1>; // x87 conditional move.
 def  : WriteRes<WriteSETCC, [SLM_IEC_RSV01]>;
 def  : WriteRes<WriteSETCCStore, [SLM_IEC_RSV01, SLM_MEC_RSV]> {
diff --git a/lib/Target/X86/X86ScheduleZnver1.td b/lib/Target/X86/X86ScheduleZnver1.td
index a866f843106b..65f6d89df610 100644
--- a/lib/Target/X86/X86ScheduleZnver1.td
+++ b/lib/Target/X86/X86ScheduleZnver1.td
@@ -1,9 +1,8 @@
 //=- X86ScheduleZnver1.td - X86 Znver1 Scheduling -------------*- tablegen -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -95,6 +94,8 @@ def : ReadAdvance<ReadAfterVecLd, 8>;
 def : ReadAdvance<ReadAfterVecXLd, 8>;
 def : ReadAdvance<ReadAfterVecYLd, 8>;
 
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
 // The Integer PRF for Zen is 168 entries, and it holds the architectural and
 // speculative version of the 64-bit integer registers.
 // Reference: "Software Optimization Guide for AMD Family 17h Processors"
@@ -214,7 +215,6 @@ defm : ZnWriteResPair<WriteJump,  [ZnALU], 1>;
 defm : ZnWriteResFpuPair<WriteCRC32, [ZnFPU0], 3>;
 
 defm : ZnWriteResPair<WriteCMOV,   [ZnALU], 1>;
-defm : ZnWriteResPair<WriteCMOV2,  [ZnALU], 1>;
 def  : WriteRes<WriteSETCC,  [ZnALU]>;
 def  : WriteRes<WriteSETCCStore,  [ZnALU, ZnAGU]>;
 defm : X86WriteRes<WriteLAHFSAHF, [ZnALU], 2, [1], 2>;
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 008a9ec2ba3c..50690953eef5 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -1,9 +1,8 @@
 //===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -44,24 +43,6 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible(
   return false;
 }
 
-namespace {
-
-// Represents a cover of a buffer of Size bytes with Count() blocks of type AVT
-// (of size UBytes() bytes), as well as how many bytes remain (BytesLeft() is
-// always smaller than the block size).
-struct RepMovsRepeats {
-  RepMovsRepeats(uint64_t Size) : Size(Size) {}
-
-  uint64_t Count() const { return Size / UBytes(); }
-  uint64_t BytesLeft() const { return Size % UBytes(); }
-  uint64_t UBytes() const { return AVT.getSizeInBits() / 8; }
-
-  const uint64_t Size;
-  MVT AVT = MVT::i8;
-};
-
-}  // namespace
-
 SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
     SDValue Size, unsigned Align, bool isVolatile,
@@ -201,98 +182,137 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
   return Chain;
 }
 
-SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
-    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
-    SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
-    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
-  // This requires the copy size to be a constant, preferably
-  // within a subtarget-specific limit.
-  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
-  const X86Subtarget &Subtarget =
-      DAG.getMachineFunction().getSubtarget<X86Subtarget>();
-  if (!ConstantSize)
-    return SDValue();
-  RepMovsRepeats Repeats(ConstantSize->getZExtValue());
-  if (!AlwaysInline && Repeats.Size > Subtarget.getMaxInlineSizeThreshold())
+/// Emit a single REP MOVS{B,W,D,Q} instruction.
+static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG,
+                           const SDLoc &dl, SDValue Chain, SDValue Dst,
+                           SDValue Src, SDValue Size, MVT AVT) {
+  const bool Use64BitRegs = Subtarget.isTarget64BitLP64();
+  const unsigned CX = Use64BitRegs ? X86::RCX : X86::ECX;
+  const unsigned DI = Use64BitRegs ? X86::RDI : X86::EDI;
+  const unsigned SI = Use64BitRegs ? X86::RSI : X86::ESI;
+
+  SDValue InFlag;
+  Chain = DAG.getCopyToReg(Chain, dl, CX, Size, InFlag);
+  InFlag = Chain.getValue(1);
+  Chain = DAG.getCopyToReg(Chain, dl, DI, Dst, InFlag);
+  InFlag = Chain.getValue(1);
+  Chain = DAG.getCopyToReg(Chain, dl, SI, Src, InFlag);
+  InFlag = Chain.getValue(1);
+
+  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDValue Ops[] = {Chain, DAG.getValueType(AVT), InFlag};
+  return DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops);
+}
+
+/// Emit a single REP MOVSB instruction for a particular constant size.
+static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
+                            const SDLoc &dl, SDValue Chain, SDValue Dst,
+                            SDValue Src, uint64_t Size) {
+  return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
+                     DAG.getIntPtrConstant(Size, dl), MVT::i8);
+}
+
+/// Returns the best type to use with repmovs depending on alignment.
+static MVT getOptimalRepmovsType(const X86Subtarget &Subtarget,
+                                 uint64_t Align) {
+  assert((Align != 0) && "Align is normalized");
+  assert(isPowerOf2_64(Align) && "Align is a power of 2");
+  switch (Align) {
+  case 1:
+    return MVT::i8;
+  case 2:
+    return MVT::i16;
+  case 4:
+    return MVT::i32;
+  default:
+    return Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
+  }
+}
+
+/// Returns a REP MOVS instruction, possibly with a few load/stores to implement
+/// a constant size memory copy. In some cases where we know REP MOVS is
+/// inefficient we return an empty SDValue so the calling code can either
+/// generate a load/store sequence or call the runtime memcpy function.
+static SDValue emitConstantSizeRepmov(
+    SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl,
+    SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT,
+    unsigned Align, bool isVolatile, bool AlwaysInline,
+    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) {
+
+  /// TODO: Revisit next line: big copy with ERMSB on march >= haswell are very
+  /// efficient.
+  if (!AlwaysInline && Size > Subtarget.getMaxInlineSizeThreshold())
     return SDValue();
 
-  /// If not DWORD aligned, it is more efficient to call the library.  However
-  /// if calling the library is not allowed (AlwaysInline), then soldier on as
-  /// the code generated here is better than the long load-store sequence we
-  /// would otherwise get.
+  /// If we have enhanced repmovs we use it.
+  if (Subtarget.hasERMSB())
+    return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
+
+  assert(!Subtarget.hasERMSB() && "No efficient RepMovs");
+  /// We assume runtime memcpy will do a better job for unaligned copies when
+  /// ERMS is not present.
   if (!AlwaysInline && (Align & 3) != 0)
     return SDValue();
 
+  const MVT BlockType = getOptimalRepmovsType(Subtarget, Align);
+  const uint64_t BlockBytes = BlockType.getSizeInBits() / 8;
+  const uint64_t BlockCount = Size / BlockBytes;
+  const uint64_t BytesLeft = Size % BlockBytes;
+  SDValue RepMovs =
+      emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
+                  DAG.getIntPtrConstant(BlockCount, dl), BlockType);
+
+  /// RepMov can process the whole length.
+  if (BytesLeft == 0)
+    return RepMovs;
+
+  assert(BytesLeft && "We have leftover at this point");
+
+  /// In case we optimize for size we use repmovsb even if it's less efficient
+  /// so we can save the loads/stores of the leftover.
+  if (DAG.getMachineFunction().getFunction().hasMinSize())
+    return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
+
+  // Handle the last 1 - 7 bytes.
+  SmallVector<SDValue, 4> Results;
+  Results.push_back(RepMovs);
+  unsigned Offset = Size - BytesLeft;
+  EVT DstVT = Dst.getValueType();
+  EVT SrcVT = Src.getValueType();
+  Results.push_back(DAG.getMemcpy(
+      Chain, dl,
+      DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, dl, DstVT)),
+      DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)),
+      DAG.getConstant(BytesLeft, dl, SizeVT), Align, isVolatile,
+      /*AlwaysInline*/ true, /*isTailCall*/ false,
+      DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset)));
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
+}
+
+SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
+    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
+    SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
+    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
   // If to a segment-relative address space, use the default lowering.
-  if (DstPtrInfo.getAddrSpace() >= 256 ||
-      SrcPtrInfo.getAddrSpace() >= 256)
+  if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256)
     return SDValue();
 
-  // If the base register might conflict with our physical registers, bail out.
+  // If the base registers conflict with our physical registers, use the default
+  // lowering.
   const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
                                   X86::ECX, X86::ESI, X86::EDI};
   if (isBaseRegConflictPossible(DAG, ClobberSet))
     return SDValue();
 
-  // If the target has enhanced REPMOVSB, then it's at least as fast to use
-  // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle
-  // BytesLeft.
-  if (!Subtarget.hasERMSB() && !(Align & 1)) {
-    if (Align & 2)
-      // WORD aligned
-      Repeats.AVT = MVT::i16;
-    else if (Align & 4)
-      // DWORD aligned
-      Repeats.AVT = MVT::i32;
-    else
-      // QWORD aligned
-      Repeats.AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
-
-    if (Repeats.BytesLeft() > 0 &&
-        DAG.getMachineFunction().getFunction().optForMinSize()) {
-      // When aggressively optimizing for size, avoid generating the code to
-      // handle BytesLeft.
-      Repeats.AVT = MVT::i8;
-    }
-  }
-
-  bool Use64BitRegs = Subtarget.isTarget64BitLP64();
-  SDValue InFlag;
-  Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RCX : X86::ECX,
-                           DAG.getIntPtrConstant(Repeats.Count(), dl), InFlag);
-  InFlag = Chain.getValue(1);
-  Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RDI : X86::EDI,
-                           Dst, InFlag);
-  InFlag = Chain.getValue(1);
-  Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RSI : X86::ESI,
-                           Src, InFlag);
-  InFlag = Chain.getValue(1);
-
-  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
-  SDValue Ops[] = { Chain, DAG.getValueType(Repeats.AVT), InFlag };
-  SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops);
+  const X86Subtarget &Subtarget =
+      DAG.getMachineFunction().getSubtarget<X86Subtarget>();
 
-  SmallVector<SDValue, 4> Results;
-  Results.push_back(RepMovs);
-  if (Repeats.BytesLeft()) {
-    // Handle the last 1 - 7 bytes.
-    unsigned Offset = Repeats.Size - Repeats.BytesLeft();
-    EVT DstVT = Dst.getValueType();
-    EVT SrcVT = Src.getValueType();
-    EVT SizeVT = Size.getValueType();
-    Results.push_back(DAG.getMemcpy(Chain, dl,
-                                    DAG.getNode(ISD::ADD, dl, DstVT, Dst,
-                                                DAG.getConstant(Offset, dl,
-                                                                DstVT)),
-                                    DAG.getNode(ISD::ADD, dl, SrcVT, Src,
-                                                DAG.getConstant(Offset, dl,
-                                                                SrcVT)),
-                                    DAG.getConstant(Repeats.BytesLeft(), dl,
-                                                    SizeVT),
-                                    Align, isVolatile, AlwaysInline, false,
-                                    DstPtrInfo.getWithOffset(Offset),
-                                    SrcPtrInfo.getWithOffset(Offset)));
-  }
+  /// Handle constant sizes,
+  if (ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size))
+    return emitConstantSizeRepmov(DAG, Subtarget, dl, Chain, Dst, Src,
+                                  ConstantSize->getZExtValue(),
+                                  Size.getValueType(), Align, isVolatile,
+                                  AlwaysInline, DstPtrInfo, SrcPtrInfo);
 
-  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
+  return SDValue();
 }
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
index f4a285a5f916..0f2d979f91e3 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -1,9 +1,8 @@
 //===-- X86SelectionDAGInfo.h - X86 SelectionDAG Info -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
index 720be8afa62c..a202fc63637b 100644
--- a/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
+++ b/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
@@ -1,9 +1,8 @@
 //===-- X86ShuffleDecodeConstantPool.cpp - X86 shuffle decode -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/X86/X86ShuffleDecodeConstantPool.h b/lib/Target/X86/X86ShuffleDecodeConstantPool.h
index b08c31935d28..296341517579 100644
--- a/lib/Target/X86/X86ShuffleDecodeConstantPool.h
+++ b/lib/Target/X86/X86ShuffleDecodeConstantPool.h
@@ -1,9 +1,8 @@
 //===-- X86ShuffleDecodeConstantPool.h - X86 shuffle decode -----*-C++-*---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/lib/Target/X86/X86SpeculativeLoadHardening.cpp
index a729161a1beb..40f5dbe57e4b 100644
--- a/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ b/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -1,9 +1,8 @@
 //====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -123,10 +122,7 @@ namespace {
 
 class X86SpeculativeLoadHardeningPass : public MachineFunctionPass {
 public:
-  X86SpeculativeLoadHardeningPass() : MachineFunctionPass(ID) {
-    initializeX86SpeculativeLoadHardeningPassPass(
-        *PassRegistry::getPassRegistry());
-  }
+  X86SpeculativeLoadHardeningPass() : MachineFunctionPass(ID) { }
 
   StringRef getPassName() const override {
     return "X86 speculative load hardening";
@@ -661,7 +657,7 @@ X86SpeculativeLoadHardeningPass::collectBlockCondInfo(MachineFunction &MF) {
       //   jmpq *%rax
       // ```
       // We still want to harden the edge to `L1`.
-      if (X86::getCondFromBranchOpc(MI.getOpcode()) == X86::COND_INVALID) {
+      if (X86::getCondFromBranch(MI) == X86::COND_INVALID) {
         Info.CondBrs.clear();
         Info.UncondBr = &MI;
         continue;
@@ -752,7 +748,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
 
           for (X86::CondCode Cond : Conds) {
             int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
-            auto CMovOp = X86::getCMovFromCond(Cond, PredStateSizeInBytes);
+            auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
 
             unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
             // Note that we intentionally use an empty debug location so that
@@ -760,7 +756,8 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
             auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(),
                                  TII->get(CMovOp), UpdatedStateReg)
                              .addReg(CurStateReg)
-                             .addReg(PS->PoisonReg);
+                             .addReg(PS->PoisonReg)
+                             .addImm(Cond);
             // If this is the last cmov and the EFLAGS weren't originally
             // live-in, mark them as killed.
             if (!LiveEFLAGS && Cond == Conds.back())
@@ -789,7 +786,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
       MachineBasicBlock &Succ = *CondBr->getOperand(0).getMBB();
       int &SuccCount = SuccCounts[&Succ];
 
-      X86::CondCode Cond = X86::getCondFromBranchOpc(CondBr->getOpcode());
+      X86::CondCode Cond = X86::getCondFromBranch(*CondBr);
       X86::CondCode InvCond = X86::GetOppositeBranchCondition(Cond);
       UncondCodeSeq.push_back(Cond);
 
@@ -1177,12 +1174,13 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
 
     // Now cmov over the predicate if the comparison wasn't equal.
     int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
-    auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes);
+    auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
     unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
     auto CMovI =
         BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg)
             .addReg(PS->InitialReg)
-            .addReg(PS->PoisonReg);
+            .addReg(PS->PoisonReg)
+            .addImm(X86::COND_NE);
     CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
     ++NumInstsInserted;
     LLVM_DEBUG(dbgs() << "  Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
@@ -1963,6 +1961,14 @@ void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
     LLVM_DEBUG(
         dbgs() << "  Skipping hardening base of explicit stack frame load: ";
         MI.dump(); dbgs() << "\n");
+  } else if (BaseMO.getReg() == X86::RSP) {
+    // Some idempotent atomic operations are lowered directly to a locked
+    // OR with 0 to the top of stack(or slightly offset from top) which uses an
+    // explicit RSP register as the base.
+    assert(IndexMO.getReg() == X86::NoRegister &&
+           "Explicit RSP access with dynamic index!");
+    LLVM_DEBUG(
+        dbgs() << "  Cannot harden base of explicit RSP offset in a load!");
   } else if (BaseMO.getReg() == X86::RIP ||
              BaseMO.getReg() == X86::NoRegister) {
     // For both RIP-relative addressed loads or absolute loads, we cannot
@@ -2464,7 +2470,7 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
   // If we have no red zones or if the function returns twice (possibly without
   // using the `ret` instruction) like setjmp, we need to save the expected
   // return address prior to the call.
-  if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone) ||
+  if (!Subtarget->getFrameLowering()->has128ByteRedZone(MF) ||
       MF.exposesReturnsTwice()) {
     // If we don't have red zones, we need to compute the expected return
     // address prior to the call and store it in a register that lives across
@@ -2546,12 +2552,13 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
   // Now conditionally update the predicate state we just extracted if we ended
   // up at a different return address than expected.
   int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
-  auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes);
+  auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
 
   unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
   auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg)
                    .addReg(NewStateReg, RegState::Kill)
-                   .addReg(PS->PoisonReg);
+                   .addReg(PS->PoisonReg)
+                   .addImm(X86::COND_NE);
   CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
   ++NumInstsInserted;
   LLVM_DEBUG(dbgs() << "  Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 0c9ce8802e1b..d5bb56603df9 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -1,9 +1,8 @@
 //===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,6 +14,7 @@
 
 #include "X86CallLowering.h"
 #include "X86LegalizerInfo.h"
+#include "X86MacroFusion.h"
 #include "X86RegisterBankInfo.h"
 #include "X86Subtarget.h"
 #include "MCTargetDesc/X86BaseInfo.h"
@@ -176,10 +176,13 @@ X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV,
   if (TM.shouldAssumeDSOLocal(M, GV))
     return X86II::MO_NO_FLAG;
 
+  // Functions on COFF can be non-DSO local for two reasons:
+  // - They are marked dllimport
+  // - They are extern_weak, and a stub is needed
   if (isTargetCOFF()) {
-    assert(GV->hasDLLImportStorageClass() &&
-           "shouldAssumeDSOLocal gave inconsistent answer");
-    return X86II::MO_DLLIMPORT;
+    if (GV->hasDLLImportStorageClass())
+      return X86II::MO_DLLIMPORT;
+    return X86II::MO_COFFSTUB;
   }
 
   const Function *F = dyn_cast_or_null<Function>(GV);
@@ -367,3 +370,8 @@ const RegisterBankInfo *X86Subtarget::getRegBankInfo() const {
 bool X86Subtarget::enableEarlyIfConversion() const {
   return hasCMov() && X86EarlyIfConv;
 }
+
+void X86Subtarget::getPostRAMutations(
+    std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
+  Mutations.push_back(createX86MacroFusionDAGMutation());
+}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index b1103f823e7f..24ccc9cb7843 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -1,9 +1,8 @@
 //===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -89,6 +88,9 @@ protected:
   /// True if the processor supports X87 instructions.
   bool HasX87 = false;
 
+  /// True if the processor supports CMPXCHG8B.
+  bool HasCmpxchg8b = false;
+
   /// True if this processor has NOPL instruction
   /// (generally pentium pro+).
   bool HasNOPL = false;
@@ -295,6 +297,9 @@ protected:
   /// True if the processor supports macrofusion.
   bool HasMacroFusion = false;
 
+  /// True if the processor supports branch fusion.
+  bool HasBranchFusion = false;
+
   /// True if the processor has enhanced REP MOVSB/STOSB.
   bool HasERMSB = false;
 
@@ -348,9 +353,18 @@ protected:
   /// Processor has AVX-512 Vector Neural Network Instructions
   bool HasVNNI = false;
 
+  /// Processor has AVX-512 bfloat16 floating-point extensions
+  bool HasBF16 = false;
+
+  /// Processor supports ENQCMD instructions
+  bool HasENQCMD = false;
+
   /// Processor has AVX-512 Bit Algorithms instructions
   bool HasBITALG = false;
 
+  /// Processor has AVX-512 vp2intersect instructions
+  bool HasVP2INTERSECT = false;
+
   /// Processor supports MPX - Memory Protection Extensions
   bool HasMPX = false;
 
@@ -388,6 +402,12 @@ protected:
   /// Try harder to combine to horizontal vector ops if they are fast.
   bool HasFastHorizontalOps = false;
 
+  /// Prefer a left/right scalar logical shifts pair over a shift+and pair.
+  bool HasFastScalarShiftMasks = false;
+
+  /// Prefer a left/right vector logical shifts pair over a shift+and pair.
+  bool HasFastVectorShiftMasks = false;
+
   /// Use a retpoline thunk rather than indirect calls to block speculative
   /// execution.
   bool UseRetpolineIndirectCalls = false;
@@ -547,6 +567,7 @@ public:
   void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
 
   bool hasX87() const { return HasX87; }
+  bool hasCmpxchg8b() const { return HasCmpxchg8b; }
   bool hasNOPL() const { return HasNOPL; }
   // SSE codegen depends on cmovs, and all SSE1+ processors support them.
   // All 64-bit processors support cmov.
@@ -621,7 +642,7 @@ public:
   int getGatherOverhead() const { return GatherOverhead; }
   int getScatterOverhead() const { return ScatterOverhead; }
   bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
-  bool hasCmpxchg16b() const { return HasCmpxchg16b; }
+  bool hasCmpxchg16b() const { return HasCmpxchg16b && is64Bit(); }
   bool useLeaForSP() const { return UseLeaForSP; }
   bool hasPOPCNTFalseDeps() const { return HasPOPCNTFalseDeps; }
   bool hasLZCNTFalseDeps() const { return HasLZCNTFalseDeps; }
@@ -638,7 +659,10 @@ public:
   bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
   bool hasFastBEXTR() const { return HasFastBEXTR; }
   bool hasFastHorizontalOps() const { return HasFastHorizontalOps; }
+  bool hasFastScalarShiftMasks() const { return HasFastScalarShiftMasks; }
+  bool hasFastVectorShiftMasks() const { return HasFastVectorShiftMasks; }
   bool hasMacroFusion() const { return HasMacroFusion; }
+  bool hasBranchFusion() const { return HasBranchFusion; }
   bool hasERMSB() const { return HasERMSB; }
   bool hasSlowDivide32() const { return HasSlowDivide32; }
   bool hasSlowDivide64() const { return HasSlowDivide64; }
@@ -657,6 +681,8 @@ public:
   bool hasVLX() const { return HasVLX; }
   bool hasPKU() const { return HasPKU; }
   bool hasVNNI() const { return HasVNNI; }
+  bool hasBF16() const { return HasBF16; }
+  bool hasVP2INTERSECT() const { return HasVP2INTERSECT; }
   bool hasBITALG() const { return HasBITALG; }
   bool hasMPX() const { return HasMPX; }
   bool hasSHSTK() const { return HasSHSTK; }
@@ -669,6 +695,7 @@ public:
   bool hasSGX() const { return HasSGX; }
   bool threewayBranchProfitable() const { return ThreewayBranchProfitable; }
   bool hasINVPCID() const { return HasINVPCID; }
+  bool hasENQCMD() const { return HasENQCMD; }
   bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
   bool useRetpolineIndirectBranches() const {
     return UseRetpolineIndirectBranches;
@@ -744,10 +771,6 @@ public:
     return TargetTriple.isWindowsMSVCEnvironment();
   }
 
-  bool isTargetKnownWindowsMSVC() const {
-    return TargetTriple.isKnownWindowsMSVCEnvironment();
-  }
-
   bool isTargetWindowsCoreCLR() const {
     return TargetTriple.isWindowsCoreCLREnvironment();
   }
@@ -834,11 +857,11 @@ public:
   /// Enable the MachineScheduler pass for all X86 subtargets.
   bool enableMachineScheduler() const override { return true; }
 
-  // TODO: Update the regression tests and return true.
-  bool supportPrintSchedInfo() const override { return false; }
-
   bool enableEarlyIfConversion() const override;
 
+  void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
+                              &Mutations) const override;
+
   AntiDepBreakMode getAntiDepBreakMode() const override {
     return TargetSubtargetInfo::ANTIDEP_CRITICAL;
   }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index afcb49dc2263..0cbf13899a29 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -1,9 +1,8 @@
 //===-- X86TargetMachine.cpp - Define TargetMachine for the X86 -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,6 +12,7 @@
 
 #include "X86TargetMachine.h"
 #include "MCTargetDesc/X86MCTargetDesc.h"
+#include "TargetInfo/X86TargetInfo.h"
 #include "X86.h"
 #include "X86CallLowering.h"
 #include "X86LegalizerInfo.h"
@@ -38,6 +38,7 @@
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
@@ -70,9 +71,10 @@ extern "C" void LLVMInitializeX86Target() {
   initializeFixupBWInstPassPass(PR);
   initializeEvexToVexInstPassPass(PR);
   initializeFixupLEAPassPass(PR);
-  initializeShadowCallStackPass(PR);
+  initializeFPSPass(PR);
   initializeX86CallFrameOptimizationPass(PR);
   initializeX86CmovConverterPassPass(PR);
+  initializeX86ExpandPseudoPass(PR);
   initializeX86ExecutionDomainFixPass(PR);
   initializeX86DomainReassignmentPass(PR);
   initializeX86AvoidSFBPassPass(PR);
@@ -194,7 +196,7 @@ static CodeModel::Model getEffectiveX86CodeModel(Optional<CodeModel::Model> CM,
                                                  bool JIT, bool Is64Bit) {
   if (CM) {
     if (*CM == CodeModel::Tiny)
-      report_fatal_error("Target does not support the tiny CodeModel");
+      report_fatal_error("Target does not support the tiny CodeModel", false);
     return *CM;
   }
   if (JIT)
@@ -357,6 +359,13 @@ public:
     return DAG;
   }
 
+  ScheduleDAGInstrs *
+  createPostMachineScheduler(MachineSchedContext *C) const override {
+    ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
+    DAG->addMutation(createX86MacroFusionDAGMutation());
+    return DAG;
+  }
+
   void addIRPasses() override;
   bool addInstSelector() override;
   bool addIRTranslator() override;
@@ -371,6 +380,8 @@ public:
   void addPreEmitPass() override;
   void addPreEmitPass2() override;
   void addPreSched2() override;
+
+  std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
 };
 
 class X86ExecutionDomainFix : public ExecutionDomainFix {
@@ -490,7 +501,6 @@ void X86PassConfig::addPreEmitPass() {
     addPass(createBreakFalseDeps());
   }
 
-  addPass(createShadowCallStackPass());
   addPass(createX86IndirectBranchTrackingPass());
 
   if (UseVZeroUpper)
@@ -512,6 +522,13 @@ void X86PassConfig::addPreEmitPass2() {
   // correct CFA calculation rule where needed by inserting appropriate CFI
   // instructions.
   const Triple &TT = TM->getTargetTriple();
-  if (!TT.isOSDarwin() && !TT.isOSWindows())
+  const MCAsmInfo *MAI = TM->getMCAsmInfo();
+  if (!TT.isOSDarwin() &&
+      (!TT.isOSWindows() ||
+       MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI))
     addPass(createCFIInstrInserter());
 }
+
+std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const {
+  return getStandardCSEConfigForOpt(TM->getOptLevel());
+}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index f5b45da0c3dc..b999e2e86af6 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -1,9 +1,8 @@
 //===-- X86TargetMachine.h - Define TargetMachine for the X86 ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 505c4fa07b77..92e0779c2e74 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -1,9 +1,8 @@
 //===-- X86TargetObjectFile.cpp - X86 Object Info -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index d045094edb1e..13d7b4ad70d6 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -1,9 +1,8 @@
 //===-- X86TargetObjectFile.h - X86 Object Info -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 36929a4f5439..3dc59aeb263e 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1,9 +1,8 @@
 //===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -1651,17 +1650,77 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
-  static const CostTblEntry SSE2CostTbl[] = {
-    { ISD::SETCC,   MVT::v2i64,   8 },
-    { ISD::SETCC,   MVT::v4i32,   1 },
-    { ISD::SETCC,   MVT::v8i16,   1 },
-    { ISD::SETCC,   MVT::v16i8,   1 },
+  unsigned ExtraCost = 0;
+  if (I && (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)) {
+    // Some vector comparison predicates cost extra instructions.
+    if (MTy.isVector() &&
+        !((ST->hasXOP() && (!ST->hasAVX2() || MTy.is128BitVector())) ||
+          (ST->hasAVX512() && 32 <= MTy.getScalarSizeInBits()) ||
+          ST->hasBWI())) {
+      switch (cast<CmpInst>(I)->getPredicate()) {
+      case CmpInst::Predicate::ICMP_NE:
+        // xor(cmpeq(x,y),-1)
+        ExtraCost = 1;
+        break;
+      case CmpInst::Predicate::ICMP_SGE:
+      case CmpInst::Predicate::ICMP_SLE:
+        // xor(cmpgt(x,y),-1)
+        ExtraCost = 1;
+        break;
+      case CmpInst::Predicate::ICMP_ULT:
+      case CmpInst::Predicate::ICMP_UGT:
+        // cmpgt(xor(x,signbit),xor(y,signbit))
+        // xor(cmpeq(pmaxu(x,y),x),-1)
+        ExtraCost = 2;
+        break;
+      case CmpInst::Predicate::ICMP_ULE:
+      case CmpInst::Predicate::ICMP_UGE:
+        if ((ST->hasSSE41() && MTy.getScalarSizeInBits() == 32) ||
+            (ST->hasSSE2() && MTy.getScalarSizeInBits() < 32)) {
+          // cmpeq(psubus(x,y),0)
+          // cmpeq(pminu(x,y),x)
+          ExtraCost = 1;
+        } else {
+          // xor(cmpgt(xor(x,signbit),xor(y,signbit)),-1)
+          ExtraCost = 3;
+        }
+        break;
+      default:
+        break;
+      }
+    }
+  }
+
+  static const CostTblEntry AVX512BWCostTbl[] = {
+    { ISD::SETCC,   MVT::v32i16,  1 },
+    { ISD::SETCC,   MVT::v64i8,   1 },
+
+    { ISD::SELECT,  MVT::v32i16,  1 },
+    { ISD::SELECT,  MVT::v64i8,   1 },
   };
 
-  static const CostTblEntry SSE42CostTbl[] = {
-    { ISD::SETCC,   MVT::v2f64,   1 },
-    { ISD::SETCC,   MVT::v4f32,   1 },
-    { ISD::SETCC,   MVT::v2i64,   1 },
+  static const CostTblEntry AVX512CostTbl[] = {
+    { ISD::SETCC,   MVT::v8i64,   1 },
+    { ISD::SETCC,   MVT::v16i32,  1 },
+    { ISD::SETCC,   MVT::v8f64,   1 },
+    { ISD::SETCC,   MVT::v16f32,  1 },
+
+    { ISD::SELECT,  MVT::v8i64,   1 },
+    { ISD::SELECT,  MVT::v16i32,  1 },
+    { ISD::SELECT,  MVT::v8f64,   1 },
+    { ISD::SELECT,  MVT::v16f32,  1 },
+  };
+
+  static const CostTblEntry AVX2CostTbl[] = {
+    { ISD::SETCC,   MVT::v4i64,   1 },
+    { ISD::SETCC,   MVT::v8i32,   1 },
+    { ISD::SETCC,   MVT::v16i16,  1 },
+    { ISD::SETCC,   MVT::v32i8,   1 },
+
+    { ISD::SELECT,  MVT::v4i64,   1 }, // pblendvb
+    { ISD::SELECT,  MVT::v8i32,   1 }, // pblendvb
+    { ISD::SELECT,  MVT::v16i16,  1 }, // pblendvb
+    { ISD::SELECT,  MVT::v32i8,   1 }, // pblendvb
   };
 
   static const CostTblEntry AVX1CostTbl[] = {
@@ -1672,50 +1731,83 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
     { ISD::SETCC,   MVT::v8i32,   4 },
     { ISD::SETCC,   MVT::v16i16,  4 },
     { ISD::SETCC,   MVT::v32i8,   4 },
+
+    { ISD::SELECT,  MVT::v4f64,   1 }, // vblendvpd
+    { ISD::SELECT,  MVT::v8f32,   1 }, // vblendvps
+    { ISD::SELECT,  MVT::v4i64,   1 }, // vblendvpd
+    { ISD::SELECT,  MVT::v8i32,   1 }, // vblendvps
+    { ISD::SELECT,  MVT::v16i16,  3 }, // vandps + vandnps + vorps
+    { ISD::SELECT,  MVT::v32i8,   3 }, // vandps + vandnps + vorps
   };
 
-  static const CostTblEntry AVX2CostTbl[] = {
-    { ISD::SETCC,   MVT::v4i64,   1 },
-    { ISD::SETCC,   MVT::v8i32,   1 },
-    { ISD::SETCC,   MVT::v16i16,  1 },
-    { ISD::SETCC,   MVT::v32i8,   1 },
+  static const CostTblEntry SSE42CostTbl[] = {
+    { ISD::SETCC,   MVT::v2f64,   1 },
+    { ISD::SETCC,   MVT::v4f32,   1 },
+    { ISD::SETCC,   MVT::v2i64,   1 },
   };
 
-  static const CostTblEntry AVX512CostTbl[] = {
-    { ISD::SETCC,   MVT::v8i64,   1 },
-    { ISD::SETCC,   MVT::v16i32,  1 },
-    { ISD::SETCC,   MVT::v8f64,   1 },
-    { ISD::SETCC,   MVT::v16f32,  1 },
+  static const CostTblEntry SSE41CostTbl[] = {
+    { ISD::SELECT,  MVT::v2f64,   1 }, // blendvpd
+    { ISD::SELECT,  MVT::v4f32,   1 }, // blendvps
+    { ISD::SELECT,  MVT::v2i64,   1 }, // pblendvb
+    { ISD::SELECT,  MVT::v4i32,   1 }, // pblendvb
+    { ISD::SELECT,  MVT::v8i16,   1 }, // pblendvb
+    { ISD::SELECT,  MVT::v16i8,   1 }, // pblendvb
   };
 
-  static const CostTblEntry AVX512BWCostTbl[] = {
-    { ISD::SETCC,   MVT::v32i16,  1 },
-    { ISD::SETCC,   MVT::v64i8,   1 },
+  static const CostTblEntry SSE2CostTbl[] = {
+    { ISD::SETCC,   MVT::v2f64,   2 },
+    { ISD::SETCC,   MVT::f64,     1 },
+    { ISD::SETCC,   MVT::v2i64,   8 },
+    { ISD::SETCC,   MVT::v4i32,   1 },
+    { ISD::SETCC,   MVT::v8i16,   1 },
+    { ISD::SETCC,   MVT::v16i8,   1 },
+
+    { ISD::SELECT,  MVT::v2f64,   3 }, // andpd + andnpd + orpd
+    { ISD::SELECT,  MVT::v2i64,   3 }, // pand + pandn + por
+    { ISD::SELECT,  MVT::v4i32,   3 }, // pand + pandn + por
+    { ISD::SELECT,  MVT::v8i16,   3 }, // pand + pandn + por
+    { ISD::SELECT,  MVT::v16i8,   3 }, // pand + pandn + por
+  };
+
+  static const CostTblEntry SSE1CostTbl[] = {
+    { ISD::SETCC,   MVT::v4f32,   2 },
+    { ISD::SETCC,   MVT::f32,     1 },
+
+    { ISD::SELECT,  MVT::v4f32,   3 }, // andps + andnps + orps
   };
 
   if (ST->hasBWI())
     if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+      return LT.first * (ExtraCost + Entry->Cost);
 
   if (ST->hasAVX512())
     if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+      return LT.first * (ExtraCost + Entry->Cost);
 
   if (ST->hasAVX2())
     if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+      return LT.first * (ExtraCost + Entry->Cost);
 
   if (ST->hasAVX())
     if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+      return LT.first * (ExtraCost + Entry->Cost);
 
   if (ST->hasSSE42())
     if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+      return LT.first * (ExtraCost + Entry->Cost);
+
+  if (ST->hasSSE41())
+    if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
+      return LT.first * (ExtraCost + Entry->Cost);
 
   if (ST->hasSSE2())
     if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
-      return LT.first * Entry->Cost;
+      return LT.first * (ExtraCost + Entry->Cost);
+
+  if (ST->hasSSE1())
+    if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
+      return LT.first * (ExtraCost + Entry->Cost);
 
   return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
 }
@@ -1784,6 +1876,10 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
     { ISD::USUBSAT,    MVT::v2i64,   2 }, // pmaxuq + psubq
     { ISD::USUBSAT,    MVT::v4i64,   2 }, // pmaxuq + psubq
     { ISD::USUBSAT,    MVT::v8i64,   2 }, // pmaxuq + psubq
+    { ISD::UADDSAT,    MVT::v16i32,  3 }, // not + pminud + paddd
+    { ISD::UADDSAT,    MVT::v2i64,   3 }, // not + pminuq + paddq
+    { ISD::UADDSAT,    MVT::v4i64,   3 }, // not + pminuq + paddq
+    { ISD::UADDSAT,    MVT::v8i64,   3 }, // not + pminuq + paddq
   };
   static const CostTblEntry XOPCostTbl[] = {
     { ISD::BITREVERSE, MVT::v4i64,   4 },
@@ -1825,6 +1921,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
     { ISD::SSUBSAT,    MVT::v32i8,   1 },
     { ISD::UADDSAT,    MVT::v16i16,  1 },
     { ISD::UADDSAT,    MVT::v32i8,   1 },
+    { ISD::UADDSAT,    MVT::v8i32,   3 }, // not + pminud + paddd
     { ISD::USUBSAT,    MVT::v16i16,  1 },
     { ISD::USUBSAT,    MVT::v32i8,   1 },
     { ISD::USUBSAT,    MVT::v8i32,   2 }, // pmaxud + psubd
@@ -1861,6 +1958,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
     { ISD::SSUBSAT,    MVT::v32i8,   4 }, // 2 x 128-bit Op + extract/insert
     { ISD::UADDSAT,    MVT::v16i16,  4 }, // 2 x 128-bit Op + extract/insert
     { ISD::UADDSAT,    MVT::v32i8,   4 }, // 2 x 128-bit Op + extract/insert
+    { ISD::UADDSAT,    MVT::v8i32,   8 }, // 2 x 128-bit Op + extract/insert
     { ISD::USUBSAT,    MVT::v16i16,  4 }, // 2 x 128-bit Op + extract/insert
     { ISD::USUBSAT,    MVT::v32i8,   4 }, // 2 x 128-bit Op + extract/insert
     { ISD::USUBSAT,    MVT::v8i32,   6 }, // 2 x 128-bit Op + extract/insert
@@ -1885,6 +1983,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
   };
   static const CostTblEntry SSE42CostTbl[] = {
     { ISD::USUBSAT,    MVT::v4i32,   2 }, // pmaxud + psubd
+    { ISD::UADDSAT,    MVT::v4i32,   3 }, // not + pminud + paddd
     { ISD::FSQRT,      MVT::f32,    18 }, // Nehalem from http://www.agner.org/
     { ISD::FSQRT,      MVT::v4f32,  18 }, // Nehalem from http://www.agner.org/
   };
@@ -1945,14 +2044,23 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
     { ISD::FSQRT,      MVT::v4f32,  56 }, // Pentium III from http://www.agner.org/
   };
   static const CostTblEntry X64CostTbl[] = { // 64-bit targets
-    { ISD::BITREVERSE, MVT::i64,    14 }
+    { ISD::BITREVERSE, MVT::i64,    14 },
+    { ISD::SADDO,      MVT::i64,     1 },
+    { ISD::UADDO,      MVT::i64,     1 },
   };
   static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets
     { ISD::BITREVERSE, MVT::i32,    14 },
     { ISD::BITREVERSE, MVT::i16,    14 },
-    { ISD::BITREVERSE, MVT::i8,     11 }
+    { ISD::BITREVERSE, MVT::i8,     11 },
+    { ISD::SADDO,      MVT::i32,     1 },
+    { ISD::SADDO,      MVT::i16,     1 },
+    { ISD::SADDO,      MVT::i8,      1 },
+    { ISD::UADDO,      MVT::i32,     1 },
+    { ISD::UADDO,      MVT::i16,     1 },
+    { ISD::UADDO,      MVT::i8,      1 },
   };
 
+  Type *OpTy = RetTy;
   unsigned ISD = ISD::DELETED_NODE;
   switch (IID) {
   default:
@@ -1987,11 +2095,23 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
   case Intrinsic::sqrt:
     ISD = ISD::FSQRT;
     break;
+  case Intrinsic::sadd_with_overflow:
+  case Intrinsic::ssub_with_overflow:
+    // SSUBO has same costs so don't duplicate.
+    ISD = ISD::SADDO;
+    OpTy = RetTy->getContainedType(0);
+    break;
+  case Intrinsic::uadd_with_overflow:
+  case Intrinsic::usub_with_overflow:
+    // USUBO has same costs so don't duplicate.
+    ISD = ISD::UADDO;
+    OpTy = RetTy->getContainedType(0);
+    break;
   }
 
   if (ISD != ISD::DELETED_NODE) {
     // Legalize the type.
-    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
+    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, OpTy);
     MVT MTy = LT.second;
 
     // Attempt to lookup cost.
@@ -2226,6 +2346,9 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
 int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
                                       unsigned Alignment,
                                       unsigned AddressSpace) {
+  bool IsLoad = (Instruction::Load == Opcode);
+  bool IsStore = (Instruction::Store == Opcode);
+
   VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy);
   if (!SrcVTy)
     // To calculate scalar take the regular cost, without mask
@@ -2233,10 +2356,9 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
 
   unsigned NumElem = SrcVTy->getVectorNumElements();
   VectorType *MaskTy =
-    VectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem);
-  if ((Opcode == Instruction::Load && !isLegalMaskedLoad(SrcVTy)) ||
-      (Opcode == Instruction::Store && !isLegalMaskedStore(SrcVTy)) ||
-      !isPowerOf2_32(NumElem)) {
+      VectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem);
+  if ((IsLoad && !isLegalMaskedLoad(SrcVTy)) ||
+      (IsStore && !isLegalMaskedStore(SrcVTy)) || !isPowerOf2_32(NumElem)) {
     // Scalarization
     int MaskSplitCost = getScalarizationOverhead(MaskTy, false, true);
     int ScalarCompareCost = getCmpSelInstrCost(
@@ -2244,8 +2366,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
     int BranchCost = getCFInstrCost(Instruction::Br);
     int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
 
-    int ValueSplitCost = getScalarizationOverhead(
-        SrcVTy, Opcode == Instruction::Load, Opcode == Instruction::Store);
+    int ValueSplitCost = getScalarizationOverhead(SrcVTy, IsLoad, IsStore);
     int MemopCost =
         NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
                                          Alignment, AddressSpace);
@@ -2259,8 +2380,8 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
   if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
       LT.second.getVectorNumElements() == NumElem)
     // Promotion requires expand/truncate for data and a shuffle for mask.
-    Cost += getShuffleCost(TTI::SK_Select, SrcVTy, 0, nullptr) +
-            getShuffleCost(TTI::SK_Select, MaskTy, 0, nullptr);
+    Cost += getShuffleCost(TTI::SK_PermuteTwoSrc, SrcVTy, 0, nullptr) +
+            getShuffleCost(TTI::SK_PermuteTwoSrc, MaskTy, 0, nullptr);
 
   else if (LT.second.getVectorNumElements() > NumElem) {
     VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(),
@@ -2268,11 +2389,13 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
     // Expanding requires fill mask with zeroes
     Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, 0, MaskTy);
   }
+
+  // Pre-AVX512 - each maskmov load costs 2 + store costs ~8.
   if (!ST->hasAVX512())
-    return Cost + LT.first*4; // Each maskmov costs 4
+    return Cost + LT.first * (IsLoad ? 2 : 8);
 
   // AVX-512 masked load/store is cheapper
-  return Cost+LT.first;
+  return Cost + LT.first;
 }
 
 int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
@@ -2281,7 +2404,7 @@ int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
   // likely result in more instructions compared to scalar code where the
   // computation can more often be merged into the index mode. The resulting
   // extra micro-ops can significantly decrease throughput.
-  unsigned NumVectorInstToHideOverhead = 10;
+  const unsigned NumVectorInstToHideOverhead = 10;
 
   // Cost modeling of Strided Access Computation is hidden by the indexing
   // modes of X86 regardless of the stride value. We dont believe that there
@@ -2369,6 +2492,48 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
         return LT.first * Entry->Cost;
   }
 
+  static const CostTblEntry AVX2BoolReduction[] = {
+    { ISD::AND,  MVT::v16i16,  2 }, // vpmovmskb + cmp
+    { ISD::AND,  MVT::v32i8,   2 }, // vpmovmskb + cmp
+    { ISD::OR,   MVT::v16i16,  2 }, // vpmovmskb + cmp
+    { ISD::OR,   MVT::v32i8,   2 }, // vpmovmskb + cmp
+  };
+
+  static const CostTblEntry AVX1BoolReduction[] = {
+    { ISD::AND,  MVT::v4i64,   2 }, // vmovmskpd + cmp
+    { ISD::AND,  MVT::v8i32,   2 }, // vmovmskps + cmp
+    { ISD::AND,  MVT::v16i16,  4 }, // vextractf128 + vpand + vpmovmskb + cmp
+    { ISD::AND,  MVT::v32i8,   4 }, // vextractf128 + vpand + vpmovmskb + cmp
+    { ISD::OR,   MVT::v4i64,   2 }, // vmovmskpd + cmp
+    { ISD::OR,   MVT::v8i32,   2 }, // vmovmskps + cmp
+    { ISD::OR,   MVT::v16i16,  4 }, // vextractf128 + vpor + vpmovmskb + cmp
+    { ISD::OR,   MVT::v32i8,   4 }, // vextractf128 + vpor + vpmovmskb + cmp
+  };
+
+  static const CostTblEntry SSE2BoolReduction[] = {
+    { ISD::AND,  MVT::v2i64,   2 }, // movmskpd + cmp
+    { ISD::AND,  MVT::v4i32,   2 }, // movmskps + cmp
+    { ISD::AND,  MVT::v8i16,   2 }, // pmovmskb + cmp
+    { ISD::AND,  MVT::v16i8,   2 }, // pmovmskb + cmp
+    { ISD::OR,   MVT::v2i64,   2 }, // movmskpd + cmp
+    { ISD::OR,   MVT::v4i32,   2 }, // movmskps + cmp
+    { ISD::OR,   MVT::v8i16,   2 }, // pmovmskb + cmp
+    { ISD::OR,   MVT::v16i8,   2 }, // pmovmskb + cmp
+  };
+
+  // Handle bool allof/anyof patterns.
+  if (ValTy->getVectorElementType()->isIntegerTy(1)) {
+    if (ST->hasAVX2())
+      if (const auto *Entry = CostTableLookup(AVX2BoolReduction, ISD, MTy))
+        return LT.first * Entry->Cost;
+    if (ST->hasAVX())
+      if (const auto *Entry = CostTableLookup(AVX1BoolReduction, ISD, MTy))
+        return LT.first * Entry->Cost;
+    if (ST->hasSSE2())
+      if (const auto *Entry = CostTableLookup(SSE2BoolReduction, ISD, MTy))
+        return LT.first * Entry->Cost;
+  }
+
   return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise);
 }
 
@@ -2390,15 +2555,37 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
   // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
   // and make it as the cost.
 
-  static const CostTblEntry SSE42CostTblPairWise[] = {
+  static const CostTblEntry SSE1CostTblPairWise[] = {
+      {ISD::FMINNUM, MVT::v4f32, 4},
+  };
+
+  static const CostTblEntry SSE2CostTblPairWise[] = {
       {ISD::FMINNUM, MVT::v2f64, 3},
+      {ISD::SMIN, MVT::v2i64, 6},
+      {ISD::UMIN, MVT::v2i64, 8},
+      {ISD::SMIN, MVT::v4i32, 6},
+      {ISD::UMIN, MVT::v4i32, 8},
+      {ISD::SMIN, MVT::v8i16, 4},
+      {ISD::UMIN, MVT::v8i16, 6},
+      {ISD::SMIN, MVT::v16i8, 8},
+      {ISD::UMIN, MVT::v16i8, 6},
+  };
+
+  static const CostTblEntry SSE41CostTblPairWise[] = {
       {ISD::FMINNUM, MVT::v4f32, 2},
-      {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8"
-      {ISD::UMIN, MVT::v2i64, 8}, // The data reported by the IACA is "8.6"
+      {ISD::SMIN, MVT::v2i64, 9},
+      {ISD::UMIN, MVT::v2i64,10},
       {ISD::SMIN, MVT::v4i32, 1}, // The data reported by the IACA is "1.5"
       {ISD::UMIN, MVT::v4i32, 2}, // The data reported by the IACA is "1.8"
       {ISD::SMIN, MVT::v8i16, 2},
       {ISD::UMIN, MVT::v8i16, 2},
+      {ISD::SMIN, MVT::v16i8, 3},
+      {ISD::UMIN, MVT::v16i8, 3},
+  };
+
+  static const CostTblEntry SSE42CostTblPairWise[] = {
+      {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8"
+      {ISD::UMIN, MVT::v2i64, 8}, // The data reported by the IACA is "8.6"
   };
 
   static const CostTblEntry AVX1CostTblPairWise[] = {
@@ -2411,8 +2598,16 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
       {ISD::UMIN, MVT::v4i32, 1},
       {ISD::SMIN, MVT::v8i16, 1},
       {ISD::UMIN, MVT::v8i16, 1},
+      {ISD::SMIN, MVT::v16i8, 2},
+      {ISD::UMIN, MVT::v16i8, 2},
+      {ISD::SMIN, MVT::v4i64, 7},
+      {ISD::UMIN, MVT::v4i64, 7},
       {ISD::SMIN, MVT::v8i32, 3},
       {ISD::UMIN, MVT::v8i32, 3},
+      {ISD::SMIN, MVT::v16i16, 3},
+      {ISD::UMIN, MVT::v16i16, 3},
+      {ISD::SMIN, MVT::v32i8, 3},
+      {ISD::UMIN, MVT::v32i8, 3},
   };
 
   static const CostTblEntry AVX2CostTblPairWise[] = {
@@ -2435,15 +2630,37 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
       {ISD::UMIN, MVT::v16i32, 1},
   };
 
-  static const CostTblEntry SSE42CostTblNoPairWise[] = {
+  static const CostTblEntry SSE1CostTblNoPairWise[] = {
+      {ISD::FMINNUM, MVT::v4f32, 4},
+  };
+
+  static const CostTblEntry SSE2CostTblNoPairWise[] = {
       {ISD::FMINNUM, MVT::v2f64, 3},
+      {ISD::SMIN, MVT::v2i64, 6},
+      {ISD::UMIN, MVT::v2i64, 8},
+      {ISD::SMIN, MVT::v4i32, 6},
+      {ISD::UMIN, MVT::v4i32, 8},
+      {ISD::SMIN, MVT::v8i16, 4},
+      {ISD::UMIN, MVT::v8i16, 6},
+      {ISD::SMIN, MVT::v16i8, 8},
+      {ISD::UMIN, MVT::v16i8, 6},
+  };
+
+  static const CostTblEntry SSE41CostTblNoPairWise[] = {
       {ISD::FMINNUM, MVT::v4f32, 3},
-      {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8"
-      {ISD::UMIN, MVT::v2i64, 9}, // The data reported by the IACA is "8.6"
+      {ISD::SMIN, MVT::v2i64, 9},
+      {ISD::UMIN, MVT::v2i64,11},
       {ISD::SMIN, MVT::v4i32, 1}, // The data reported by the IACA is "1.5"
       {ISD::UMIN, MVT::v4i32, 2}, // The data reported by the IACA is "1.8"
       {ISD::SMIN, MVT::v8i16, 1}, // The data reported by the IACA is "1.5"
       {ISD::UMIN, MVT::v8i16, 2}, // The data reported by the IACA is "1.8"
+      {ISD::SMIN, MVT::v16i8, 3},
+      {ISD::UMIN, MVT::v16i8, 3},
+  };
+
+  static const CostTblEntry SSE42CostTblNoPairWise[] = {
+      {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8"
+      {ISD::UMIN, MVT::v2i64, 9}, // The data reported by the IACA is "8.6"
   };
 
   static const CostTblEntry AVX1CostTblNoPairWise[] = {
@@ -2456,8 +2673,16 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
       {ISD::UMIN, MVT::v4i32, 1},
       {ISD::SMIN, MVT::v8i16, 1},
       {ISD::UMIN, MVT::v8i16, 1},
+      {ISD::SMIN, MVT::v16i8, 2},
+      {ISD::UMIN, MVT::v16i8, 2},
+      {ISD::SMIN, MVT::v4i64, 7},
+      {ISD::UMIN, MVT::v4i64, 7},
       {ISD::SMIN, MVT::v8i32, 2},
       {ISD::UMIN, MVT::v8i32, 2},
+      {ISD::SMIN, MVT::v16i16, 2},
+      {ISD::UMIN, MVT::v16i16, 2},
+      {ISD::SMIN, MVT::v32i8, 2},
+      {ISD::UMIN, MVT::v32i8, 2},
   };
 
   static const CostTblEntry AVX2CostTblNoPairWise[] = {
@@ -2496,6 +2721,18 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
     if (ST->hasSSE42())
       if (const auto *Entry = CostTableLookup(SSE42CostTblPairWise, ISD, MTy))
         return LT.first * Entry->Cost;
+
+    if (ST->hasSSE41())
+      if (const auto *Entry = CostTableLookup(SSE41CostTblPairWise, ISD, MTy))
+        return LT.first * Entry->Cost;
+
+    if (ST->hasSSE2())
+      if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy))
+        return LT.first * Entry->Cost;
+
+    if (ST->hasSSE1())
+      if (const auto *Entry = CostTableLookup(SSE1CostTblPairWise, ISD, MTy))
+        return LT.first * Entry->Cost;
   } else {
     if (ST->hasAVX512())
       if (const auto *Entry =
@@ -2513,6 +2750,18 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
     if (ST->hasSSE42())
       if (const auto *Entry = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy))
         return LT.first * Entry->Cost;
+
+    if (ST->hasSSE41())
+      if (const auto *Entry = CostTableLookup(SSE41CostTblNoPairWise, ISD, MTy))
+        return LT.first * Entry->Cost;
+
+    if (ST->hasSSE2())
+      if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
+        return LT.first * Entry->Cost;
+
+    if (ST->hasSSE1())
+      if (const auto *Entry = CostTableLookup(SSE1CostTblNoPairWise, ISD, MTy))
+        return LT.first * Entry->Cost;
   }
 
   return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned);
@@ -2864,26 +3113,106 @@ bool X86TTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
 }
 
 bool X86TTIImpl::canMacroFuseCmp() {
-  return ST->hasMacroFusion();
+  return ST->hasMacroFusion() || ST->hasBranchFusion();
 }
 
 bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {
+  if (!ST->hasAVX())
+    return false;
+
   // The backend can't handle a single element vector.
   if (isa<VectorType>(DataTy) && DataTy->getVectorNumElements() == 1)
     return false;
   Type *ScalarTy = DataTy->getScalarType();
-  int DataWidth = isa<PointerType>(ScalarTy) ?
-    DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
 
-  return ((DataWidth == 32 || DataWidth == 64) && ST->hasAVX()) ||
-         ((DataWidth == 8 || DataWidth == 16) && ST->hasBWI());
+  if (ScalarTy->isPointerTy())
+    return true;
+
+  if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
+    return true;
+
+  if (!ScalarTy->isIntegerTy())
+    return false;
+
+  unsigned IntWidth = ScalarTy->getIntegerBitWidth();
+  return IntWidth == 32 || IntWidth == 64 ||
+         ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI());
 }
 
 bool X86TTIImpl::isLegalMaskedStore(Type *DataType) {
   return isLegalMaskedLoad(DataType);
 }
 
+bool X86TTIImpl::isLegalNTLoad(Type *DataType, unsigned Alignment) {
+  unsigned DataSize = DL.getTypeStoreSize(DataType);
+  // The only supported nontemporal loads are for aligned vectors of 16 or 32
+  // bytes.  Note that 32-byte nontemporal vector loads are supported by AVX2
+  // (the equivalent stores only require AVX).
+  if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32))
+    return DataSize == 16 ?  ST->hasSSE1() : ST->hasAVX2();
+
+  return false;
+}
+
+bool X86TTIImpl::isLegalNTStore(Type *DataType, unsigned Alignment) {
+  unsigned DataSize = DL.getTypeStoreSize(DataType);
+
+  // SSE4A supports nontemporal stores of float and double at arbitrary
+  // alignment.
+  if (ST->hasSSE4A() && (DataType->isFloatTy() || DataType->isDoubleTy()))
+    return true;
+
+  // Besides the SSE4A subtarget exception above, only aligned stores are
+  // available nontemporaly on any other subtarget.  And only stores with a size
+  // of 4..32 bytes (powers of 2, only) are permitted.
+  if (Alignment < DataSize || DataSize < 4 || DataSize > 32 ||
+      !isPowerOf2_32(DataSize))
+    return false;
+
+  // 32-byte vector nontemporal stores are supported by AVX (the equivalent
+  // loads require AVX2).
+  if (DataSize == 32)
+    return ST->hasAVX();
+  else if (DataSize == 16)
+    return ST->hasSSE1();
+  return true;
+}
+
+bool X86TTIImpl::isLegalMaskedExpandLoad(Type *DataTy) {
+  if (!isa<VectorType>(DataTy))
+    return false;
+
+  if (!ST->hasAVX512())
+    return false;
+
+  // The backend can't handle a single element vector.
+  if (DataTy->getVectorNumElements() == 1)
+    return false;
+
+  Type *ScalarTy = DataTy->getVectorElementType();
+
+  if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
+    return true;
+
+  if (!ScalarTy->isIntegerTy())
+    return false;
+
+  unsigned IntWidth = ScalarTy->getIntegerBitWidth();
+  return IntWidth == 32 || IntWidth == 64 ||
+         ((IntWidth == 8 || IntWidth == 16) && ST->hasVBMI2());
+}
+
+bool X86TTIImpl::isLegalMaskedCompressStore(Type *DataTy) {
+  return isLegalMaskedExpandLoad(DataTy);
+}
+
 bool X86TTIImpl::isLegalMaskedGather(Type *DataTy) {
+  // Some CPUs have better gather performance than others.
+  // TODO: Remove the explicit ST->hasAVX512()?, That would mean we would only
+  // enable gather with a -march.
+  if (!(ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2())))
+    return false;
+
   // This function is called now in two cases: from the Loop Vectorizer
   // and from the Scalarizer.
   // When the Loop Vectorizer asks about legality of the feature,
@@ -2902,14 +3231,17 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy) {
       return false;
   }
   Type *ScalarTy = DataTy->getScalarType();
-  int DataWidth = isa<PointerType>(ScalarTy) ?
-    DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
+  if (ScalarTy->isPointerTy())
+    return true;
 
-  // Some CPUs have better gather performance than others.
-  // TODO: Remove the explicit ST->hasAVX512()?, That would mean we would only
-  // enable gather with a -march.
-  return (DataWidth == 32 || DataWidth == 64) &&
-         (ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2()));
+  if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
+    return true;
+
+  if (!ScalarTy->isIntegerTy())
+    return false;
+
+  unsigned IntWidth = ScalarTy->getIntegerBitWidth();
+  return IntWidth == 32 || IntWidth == 64;
 }
 
 bool X86TTIImpl::isLegalMaskedScatter(Type *DataType) {
@@ -2938,44 +3270,51 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
   const FeatureBitset &CalleeBits =
       TM.getSubtargetImpl(*Callee)->getFeatureBits();
 
-  // FIXME: This is likely too limiting as it will include subtarget features
-  // that we might not care about for inlining, but it is conservatively
-  // correct.
-  return (CallerBits & CalleeBits) == CalleeBits;
+  FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
+  FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
+  return (RealCallerBits & RealCalleeBits) == RealCalleeBits;
 }
 
-const X86TTIImpl::TTI::MemCmpExpansionOptions *
-X86TTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {
-  // Only enable vector loads for equality comparison.
-  // Right now the vector version is not as fast, see #33329.
-  static const auto ThreeWayOptions = [this]() {
-    TTI::MemCmpExpansionOptions Options;
-    if (ST->is64Bit()) {
-      Options.LoadSizes.push_back(8);
-    }
-    Options.LoadSizes.push_back(4);
-    Options.LoadSizes.push_back(2);
-    Options.LoadSizes.push_back(1);
-    return Options;
-  }();
-  static const auto EqZeroOptions = [this]() {
-    TTI::MemCmpExpansionOptions Options;
+bool X86TTIImpl::areFunctionArgsABICompatible(
+    const Function *Caller, const Function *Callee,
+    SmallPtrSetImpl<Argument *> &Args) const {
+  if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args))
+    return false;
+
+  // If we get here, we know the target features match. If one function
+  // considers 512-bit vectors legal and the other does not, consider them
+  // incompatible.
+  // FIXME Look at the arguments and only consider 512 bit or larger vectors?
+  const TargetMachine &TM = getTLI()->getTargetMachine();
+
+  return TM.getSubtarget<X86Subtarget>(*Caller).useAVX512Regs() ==
+         TM.getSubtarget<X86Subtarget>(*Callee).useAVX512Regs();
+}
+
+X86TTIImpl::TTI::MemCmpExpansionOptions
+X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+  TTI::MemCmpExpansionOptions Options;
+  Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
+  Options.NumLoadsPerBlock = 2;
+  if (IsZeroCmp) {
+    // Only enable vector loads for equality comparison. Right now the vector
+    // version is not as fast for three way compare (see #33329).
     // TODO: enable AVX512 when the DAG is ready.
     // if (ST->hasAVX512()) Options.LoadSizes.push_back(64);
-    if (ST->hasAVX2()) Options.LoadSizes.push_back(32);
-    if (ST->hasSSE2()) Options.LoadSizes.push_back(16);
-    if (ST->is64Bit()) {
-      Options.LoadSizes.push_back(8);
-    }
-    Options.LoadSizes.push_back(4);
-    Options.LoadSizes.push_back(2);
-    Options.LoadSizes.push_back(1);
+    const unsigned PreferredWidth = ST->getPreferVectorWidth();
+    if (PreferredWidth >= 256 && ST->hasAVX2()) Options.LoadSizes.push_back(32);
+    if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16);
     // All GPR and vector loads can be unaligned. SIMD compare requires integer
     // vectors (SSE2/AVX2).
     Options.AllowOverlappingLoads = true;
-    return Options;
-  }();
-  return IsZeroCmp ? &EqZeroOptions : &ThreeWayOptions;
+  }
+  if (ST->is64Bit()) {
+    Options.LoadSizes.push_back(8);
+  }
+  Options.LoadSizes.push_back(4);
+  Options.LoadSizes.push_back(2);
+  Options.LoadSizes.push_back(1);
+  return Options;
 }
 
 bool X86TTIImpl::enableInterleavedAccessVectorization() {
diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h
index 1637592c81f8..25d9c33eb16d 100644
--- a/lib/Target/X86/X86TargetTransformInfo.h
+++ b/lib/Target/X86/X86TargetTransformInfo.h
@@ -1,9 +1,8 @@
 //===-- X86TargetTransformInfo.h - X86 specific TTI -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -36,6 +35,64 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
   const X86Subtarget *getST() const { return ST; }
   const X86TargetLowering *getTLI() const { return TLI; }
 
+  const FeatureBitset InlineFeatureIgnoreList = {
+      // This indicates the CPU is 64 bit capable not that we are in 64-bit
+      // mode.
+      X86::Feature64Bit,
+
+      // These features don't have any intrinsics or ABI effect.
+      X86::FeatureNOPL,
+      X86::FeatureCMPXCHG16B,
+      X86::FeatureLAHFSAHF,
+
+      // Codegen control options.
+      X86::FeatureFast11ByteNOP,
+      X86::FeatureFast15ByteNOP,
+      X86::FeatureFastBEXTR,
+      X86::FeatureFastHorizontalOps,
+      X86::FeatureFastLZCNT,
+      X86::FeatureFastPartialYMMorZMMWrite,
+      X86::FeatureFastScalarFSQRT,
+      X86::FeatureFastSHLDRotate,
+      X86::FeatureFastScalarShiftMasks,
+      X86::FeatureFastVectorShiftMasks,
+      X86::FeatureFastVariableShuffle,
+      X86::FeatureFastVectorFSQRT,
+      X86::FeatureLEAForSP,
+      X86::FeatureLEAUsesAG,
+      X86::FeatureLZCNTFalseDeps,
+      X86::FeatureBranchFusion,
+      X86::FeatureMacroFusion,
+      X86::FeatureMergeToThreeWayBranch,
+      X86::FeaturePadShortFunctions,
+      X86::FeaturePOPCNTFalseDeps,
+      X86::FeatureSSEUnalignedMem,
+      X86::FeatureSlow3OpsLEA,
+      X86::FeatureSlowDivide32,
+      X86::FeatureSlowDivide64,
+      X86::FeatureSlowIncDec,
+      X86::FeatureSlowLEA,
+      X86::FeatureSlowPMADDWD,
+      X86::FeatureSlowPMULLD,
+      X86::FeatureSlowSHLD,
+      X86::FeatureSlowTwoMemOps,
+      X86::FeatureSlowUAMem16,
+
+      // Perf-tuning flags.
+      X86::FeatureHasFastGather,
+      X86::FeatureSlowUAMem32,
+
+      // Based on whether user set the -mprefer-vector-width command line.
+      X86::FeaturePrefer256Bit,
+
+      // CPU name enums. These just follow CPU string.
+      X86::ProcIntelAtom,
+      X86::ProcIntelGLM,
+      X86::ProcIntelGLP,
+      X86::ProcIntelSLM,
+      X86::ProcIntelTRM,
+  };
+
 public:
   explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F)
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
@@ -129,14 +186,21 @@ public:
   bool canMacroFuseCmp();
   bool isLegalMaskedLoad(Type *DataType);
   bool isLegalMaskedStore(Type *DataType);
+  bool isLegalNTLoad(Type *DataType, unsigned Alignment);
+  bool isLegalNTStore(Type *DataType, unsigned Alignment);
   bool isLegalMaskedGather(Type *DataType);
   bool isLegalMaskedScatter(Type *DataType);
+  bool isLegalMaskedExpandLoad(Type *DataType);
+  bool isLegalMaskedCompressStore(Type *DataType);
   bool hasDivRemOp(Type *DataType, bool IsSigned);
   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty);
   bool areInlineCompatible(const Function *Caller,
                            const Function *Callee) const;
-  const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
-      bool IsZeroCmp) const;
+  bool areFunctionArgsABICompatible(const Function *Caller,
+                                    const Function *Callee,
+                                    SmallPtrSetImpl<Argument *> &Args) const;
+  TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+                                                    bool IsZeroCmp) const;
   bool enableInterleavedAccessVectorization();
 private:
   int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask,
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index f882b760927c..a07d2f20acab 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -1,9 +1,8 @@
 //===- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/X86/X86WinAllocaExpander.cpp b/lib/Target/X86/X86WinAllocaExpander.cpp
index d298aaa97ecd..9e499db1d7ee 100644
--- a/lib/Target/X86/X86WinAllocaExpander.cpp
+++ b/lib/Target/X86/X86WinAllocaExpander.cpp
@@ -1,9 +1,8 @@
 //===----- X86WinAllocaExpander.cpp - Expand WinAlloca pseudo instruction -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -85,10 +84,6 @@ static int64_t getWinAllocaAmount(MachineInstr *MI, MachineRegisterInfo *MRI) {
   unsigned AmountReg = MI->getOperand(0).getReg();
   MachineInstr *Def = MRI->getUniqueVRegDef(AmountReg);
 
-  // Look through copies.
-  while (Def && Def->isCopy() && Def->getOperand(1).isReg())
-    Def = MRI->getUniqueVRegDef(Def->getOperand(1).getReg());
-
   if (!Def ||
       (Def->getOpcode() != X86::MOV32ri && Def->getOpcode() != X86::MOV64ri) ||
       !Def->getOperand(1).isImm())
@@ -210,15 +205,18 @@ void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) {
     return;
   }
 
+  // These two variables differ on x32, which is a 64-bit target with a
+  // 32-bit alloca.
   bool Is64Bit = STI->is64Bit();
+  bool Is64BitAlloca = MI->getOpcode() == X86::WIN_ALLOCA_64;
   assert(SlotSize == 4 || SlotSize == 8);
-  unsigned RegA = (SlotSize == 8) ? X86::RAX : X86::EAX;
 
   switch (L) {
-  case TouchAndSub:
+  case TouchAndSub: {
     assert(Amount >= SlotSize);
 
     // Use a push to touch the top of the stack.
+    unsigned RegA = Is64Bit ? X86::RAX : X86::EAX;
     BuildMI(*MBB, I, DL, TII->get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
         .addReg(RegA, RegState::Undef);
     Amount -= SlotSize;
@@ -227,15 +225,18 @@ void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) {
 
     // Fall through to make any remaining adjustment.
     LLVM_FALLTHROUGH;
+  }
   case Sub:
     assert(Amount > 0);
     if (Amount == SlotSize) {
       // Use push to save size.
+      unsigned RegA = Is64Bit ? X86::RAX : X86::EAX;
       BuildMI(*MBB, I, DL, TII->get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
           .addReg(RegA, RegState::Undef);
     } else {
       // Sub.
-      BuildMI(*MBB, I, DL, TII->get(getSubOpcode(Is64Bit, Amount)), StackPtr)
+      BuildMI(*MBB, I, DL,
+              TII->get(getSubOpcode(Is64BitAlloca, Amount)), StackPtr)
           .addReg(StackPtr)
           .addImm(Amount);
     }
@@ -243,16 +244,17 @@ void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) {
   case Probe:
     if (!NoStackArgProbe) {
       // The probe lowering expects the amount in RAX/EAX.
+      unsigned RegA = Is64BitAlloca ? X86::RAX : X86::EAX;
       BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY), RegA)
           .addReg(MI->getOperand(0).getReg());
 
       // Do the probe.
       STI->getFrameLowering()->emitStackProbe(*MBB->getParent(), *MBB, MI, DL,
-                                              /*InPrologue=*/false);
+                                              /*InProlog=*/false);
     } else {
       // Sub
-      BuildMI(*MBB, I, DL, TII->get(Is64Bit ? X86::SUB64rr : X86::SUB32rr),
-              StackPtr)
+      BuildMI(*MBB, I, DL,
+              TII->get(Is64BitAlloca ? X86::SUB64rr : X86::SUB32rr), StackPtr)
           .addReg(StackPtr)
           .addReg(MI->getOperand(0).getReg());
     }
@@ -262,18 +264,10 @@ void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) {
   unsigned AmountReg = MI->getOperand(0).getReg();
   MI->eraseFromParent();
 
-  // Delete the definition of AmountReg, possibly walking a chain of copies.
-  for (;;) {
-    if (!MRI->use_empty(AmountReg))
-      break;
-    MachineInstr *AmountDef = MRI->getUniqueVRegDef(AmountReg);
-    if (!AmountDef)
-      break;
-    if (AmountDef->isCopy() && AmountDef->getOperand(1).isReg())
-      AmountReg = AmountDef->getOperand(1).isReg();
-    AmountDef->eraseFromParent();
-    break;
-  }
+  // Delete the definition of AmountReg.
+  if (MRI->use_empty(AmountReg))
+    if (MachineInstr *AmountDef = MRI->getUniqueVRegDef(AmountReg))
+      AmountDef->eraseFromParent();
 }
 
 bool X86WinAllocaExpander::runOnMachineFunction(MachineFunction &MF) {
diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp
index 185deda97c1f..f68d17d7256d 100644
--- a/lib/Target/X86/X86WinEHState.cpp
+++ b/lib/Target/X86/X86WinEHState.cpp
@@ -1,9 +1,8 @@
 //===-- X86WinEHState - Insert EH state updates for win32 exceptions ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -41,9 +40,7 @@ class WinEHStatePass : public FunctionPass {
 public:
   static char ID; // Pass identification, replacement for typeid.
 
-  WinEHStatePass() : FunctionPass(ID) {
-    initializeWinEHStatePassPass(*PassRegistry::getPassRegistry());
-  }
+  WinEHStatePass() : FunctionPass(ID) { }
 
   bool runOnFunction(Function &Fn) override;
 
@@ -87,15 +84,15 @@ private:
   StructType *EHLinkRegistrationTy = nullptr;
   StructType *CXXEHRegistrationTy = nullptr;
   StructType *SEHRegistrationTy = nullptr;
-  Constant *SetJmp3 = nullptr;
-  Constant *CxxLongjmpUnwind = nullptr;
+  FunctionCallee SetJmp3 = nullptr;
+  FunctionCallee CxxLongjmpUnwind = nullptr;
 
   // Per-function state
   EHPersonality Personality = EHPersonality::Unknown;
   Function *PersonalityFn = nullptr;
   bool UseStackGuard = false;
   int ParentBaseState;
-  Constant *SehLongjmpUnwind = nullptr;
+  FunctionCallee SehLongjmpUnwind = nullptr;
   Constant *Cookie = nullptr;
 
   /// The stack allocation containing all EH data, including the link in the
@@ -304,7 +301,7 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
     CxxLongjmpUnwind = TheModule->getOrInsertFunction(
         "__CxxLongjmpUnwind",
         FunctionType::get(VoidTy, Int8PtrType, /*isVarArg=*/false));
-    cast<Function>(CxxLongjmpUnwind->stripPointerCasts())
+    cast<Function>(CxxLongjmpUnwind.getCallee()->stripPointerCasts())
         ->setCallingConv(CallingConv::X86_StdCall);
   } else if (Personality == EHPersonality::MSVC_X86SEH) {
     // If _except_handler4 is in use, some additional guard checks and prologue
@@ -357,7 +354,7 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
         UseStackGuard ? "_seh_longjmp_unwind4" : "_seh_longjmp_unwind",
         FunctionType::get(Type::getVoidTy(TheModule->getContext()), Int8PtrType,
                           /*isVarArg=*/false));
-    cast<Function>(SehLongjmpUnwind->stripPointerCasts())
+    cast<Function>(SehLongjmpUnwind.getCallee()->stripPointerCasts())
         ->setCallingConv(CallingConv::X86_StdCall);
   } else {
     llvm_unreachable("unexpected personality function");
@@ -412,7 +409,7 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) {
       Builder.CreateBitCast(PersonalityFn, TargetFuncTy->getPointerTo());
   auto AI = Trampoline->arg_begin();
   Value *Args[5] = {LSDA, &*AI++, &*AI++, &*AI++, &*AI++};
-  CallInst *Call = Builder.CreateCall(CastPersonality, Args);
+  CallInst *Call = Builder.CreateCall(TargetFuncTy, CastPersonality, Args);
   // Can't use musttail due to prototype mismatch, but we can use tail.
   Call->setTailCall(true);
   // Set inreg so we pass it in EAX.
@@ -433,7 +430,7 @@ void WinEHStatePass::linkExceptionRegistration(IRBuilder<> &Builder,
   // Next = [fs:00]
   Constant *FSZero =
       Constant::getNullValue(LinkTy->getPointerTo()->getPointerTo(257));
-  Value *Next = Builder.CreateLoad(FSZero);
+  Value *Next = Builder.CreateLoad(LinkTy->getPointerTo(), FSZero);
   Builder.CreateStore(Next, Builder.CreateStructGEP(LinkTy, Link, 0));
   // [fs:00] = Link
   Builder.CreateStore(Link, FSZero);
@@ -448,8 +445,8 @@ void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder) {
   }
   Type *LinkTy = getEHLinkRegistrationType();
   // [fs:00] = Link->Next
-  Value *Next =
-      Builder.CreateLoad(Builder.CreateStructGEP(LinkTy, Link, 0));
+  Value *Next = Builder.CreateLoad(LinkTy->getPointerTo(),
+                                   Builder.CreateStructGEP(LinkTy, Link, 0));
   Constant *FSZero =
       Constant::getNullValue(LinkTy->getPointerTo()->getPointerTo(257));
   Builder.CreateStore(Next, FSZero);
@@ -472,11 +469,11 @@ void WinEHStatePass::rewriteSetJmpCallSite(IRBuilder<> &Builder, Function &F,
 
   SmallVector<Value *, 3> OptionalArgs;
   if (Personality == EHPersonality::MSVC_CXX) {
-    OptionalArgs.push_back(CxxLongjmpUnwind);
+    OptionalArgs.push_back(CxxLongjmpUnwind.getCallee());
     OptionalArgs.push_back(State);
     OptionalArgs.push_back(emitEHLSDA(Builder, &F));
   } else if (Personality == EHPersonality::MSVC_X86SEH) {
-    OptionalArgs.push_back(SehLongjmpUnwind);
+    OptionalArgs.push_back(SehLongjmpUnwind.getCallee());
     OptionalArgs.push_back(State);
     if (UseStackGuard)
       OptionalArgs.push_back(Cookie);
@@ -767,7 +764,7 @@ void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
       if (!CS)
         continue;
       if (CS.getCalledValue()->stripPointerCasts() !=
-          SetJmp3->stripPointerCasts())
+          SetJmp3.getCallee()->stripPointerCasts())
         continue;
 
       SetJmp3CallSites.push_back(CS);
@@ -782,9 +779,9 @@ void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
     IRBuilder<> Builder(CS.getInstruction());
     Value *State;
     if (InCleanup) {
-      Value *StateField =
-          Builder.CreateStructGEP(nullptr, RegNode, StateFieldIndex);
-      State = Builder.CreateLoad(StateField);
+      Value *StateField = Builder.CreateStructGEP(RegNode->getAllocatedType(),
+                                                  RegNode, StateFieldIndex);
+      State = Builder.CreateLoad(Builder.getInt32Ty(), StateField);
     } else {
       State = Builder.getInt32(getStateForCallSite(BlockColors, FuncInfo, CS));
     }
@@ -794,7 +791,7 @@ void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
 
 void WinEHStatePass::insertStateNumberStore(Instruction *IP, int State) {
   IRBuilder<> Builder(IP);
-  Value *StateField =
-      Builder.CreateStructGEP(nullptr, RegNode, StateFieldIndex);
+  Value *StateField = Builder.CreateStructGEP(RegNode->getAllocatedType(),
+                                              RegNode, StateFieldIndex);
   Builder.CreateStore(Builder.getInt32(State), StateField);
 }
diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index faf66e5944ab..ff3d41fd5274 100644
--- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -1,9 +1,8 @@
 //===- XCoreDisassembler.cpp - Disassembler for XCore -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -12,6 +11,7 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "TargetInfo/XCoreTargetInfo.h"
 #include "XCore.h"
 #include "XCoreRegisterInfo.h"
 #include "llvm/MC/MCContext.h"
@@ -768,10 +768,6 @@ MCDisassembler::DecodeStatus XCoreDisassembler::getInstruction(
   return Fail;
 }
 
-namespace llvm {
-  Target &getTheXCoreTarget();
-}
-
 static MCDisassembler *createXCoreDisassembler(const Target &T,
                                                const MCSubtargetInfo &STI,
                                                MCContext &Ctx) {
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
deleted file mode 100644
index b03c1852281d..000000000000
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-//===-- XCoreInstPrinter.cpp - Convert XCore MCInst to assembly syntax ----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an XCore MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "XCoreInstPrinter.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-#include "XCoreGenAsmWriter.inc"
-
-void XCoreInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  OS << StringRef(getRegisterName(RegNo)).lower();
-}
-
-void XCoreInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                 StringRef Annot, const MCSubtargetInfo &STI) {
-  printInstruction(MI, O);
-  printAnnotation(O, Annot);
-}
-
-void XCoreInstPrinter::
-printInlineJT(const MCInst *MI, int opNum, raw_ostream &O) {
-  report_fatal_error("can't handle InlineJT");
-}
-
-void XCoreInstPrinter::
-printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O) {
-  report_fatal_error("can't handle InlineJT32");
-}
-
-static void printExpr(const MCExpr *Expr, const MCAsmInfo *MAI,
-                      raw_ostream &OS) {
-  int Offset = 0;
-  const MCSymbolRefExpr *SRE;
-
-  if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr)) {
-    SRE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
-    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(BE->getRHS());
-    assert(SRE && CE && "Binary expression must be sym+const.");
-    Offset = CE->getValue();
-  } else {
-    SRE = dyn_cast<MCSymbolRefExpr>(Expr);
-    assert(SRE && "Unexpected MCExpr type.");
-  }
-  assert(SRE->getKind() == MCSymbolRefExpr::VK_None);
-
-  SRE->getSymbol().print(OS, MAI);
-
-  if (Offset) {
-    if (Offset > 0)
-      OS << '+';
-    OS << Offset;
-  }
-}
-
-void XCoreInstPrinter::
-printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
-  const MCOperand &Op = MI->getOperand(OpNo);
-  if (Op.isReg()) {
-    printRegName(O, Op.getReg());
-    return;
-  }
-
-  if (Op.isImm()) {
-    O << Op.getImm();
-    return;
-  }
-
-  assert(Op.isExpr() && "unknown operand kind in printOperand");
-  printExpr(Op.getExpr(), &MAI, O);
-}
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
deleted file mode 100644
index a0b480026469..000000000000
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
+++ /dev/null
@@ -1,47 +0,0 @@
-//== XCoreInstPrinter.h - Convert XCore MCInst to assembly syntax -*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains the declaration of the XCoreInstPrinter class,
-/// which is used to print XCore MCInst to a .s file.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_XCORE_INSTPRINTER_XCOREINSTPRINTER_H
-#define LLVM_LIB_TARGET_XCORE_INSTPRINTER_XCOREINSTPRINTER_H
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCInstPrinter.h"
-
-namespace llvm {
-
-class XCoreInstPrinter : public MCInstPrinter {
-public:
-  XCoreInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                  const MCRegisterInfo &MRI)
-    : MCInstPrinter(MAI, MII, MRI) {}
-
-  // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
-  static const char *getRegisterName(unsigned RegNo);
-
-  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
-
-private:
-  void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O);
-  void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O);
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
-};
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_XCORE_INSTPRINTER_XCOREINSTPRINTER_H
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp b/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp
new file mode 100644
index 000000000000..d231e0981324
--- /dev/null
+++ b/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp
@@ -0,0 +1,89 @@
+//===-- XCoreInstPrinter.cpp - Convert XCore MCInst to assembly syntax ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an XCore MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreInstPrinter.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+#include "XCoreGenAsmWriter.inc"
+
+void XCoreInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << StringRef(getRegisterName(RegNo)).lower();
+}
+
+void XCoreInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                                 StringRef Annot, const MCSubtargetInfo &STI) {
+  printInstruction(MI, O);
+  printAnnotation(O, Annot);
+}
+
+void XCoreInstPrinter::
+printInlineJT(const MCInst *MI, int opNum, raw_ostream &O) {
+  report_fatal_error("can't handle InlineJT");
+}
+
+void XCoreInstPrinter::
+printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O) {
+  report_fatal_error("can't handle InlineJT32");
+}
+
+static void printExpr(const MCExpr *Expr, const MCAsmInfo *MAI,
+                      raw_ostream &OS) {
+  int Offset = 0;
+  const MCSymbolRefExpr *SRE;
+
+  if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr)) {
+    SRE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(BE->getRHS());
+    assert(SRE && CE && "Binary expression must be sym+const.");
+    Offset = CE->getValue();
+  } else {
+    SRE = dyn_cast<MCSymbolRefExpr>(Expr);
+    assert(SRE && "Unexpected MCExpr type.");
+  }
+  assert(SRE->getKind() == MCSymbolRefExpr::VK_None);
+
+  SRE->getSymbol().print(OS, MAI);
+
+  if (Offset) {
+    if (Offset > 0)
+      OS << '+';
+    OS << Offset;
+  }
+}
+
+void XCoreInstPrinter::
+printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    printRegName(O, Op.getReg());
+    return;
+  }
+
+  if (Op.isImm()) {
+    O << Op.getImm();
+    return;
+  }
+
+  assert(Op.isExpr() && "unknown operand kind in printOperand");
+  printExpr(Op.getExpr(), &MAI, O);
+}
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h b/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h
new file mode 100644
index 000000000000..4f0940323505
--- /dev/null
+++ b/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h
@@ -0,0 +1,46 @@
+//== XCoreInstPrinter.h - Convert XCore MCInst to assembly syntax -*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the declaration of the XCoreInstPrinter class,
+/// which is used to print XCore MCInst to a .s file.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREINSTPRINTER_H
+#define LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREINSTPRINTER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class XCoreInstPrinter : public MCInstPrinter {
+public:
+  XCoreInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                  const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+
+  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+
+private:
+  void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O);
+  void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O);
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREINSTPRINTER_H
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
index 3178a4edbb3b..ae19e2a78eec 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
@@ -1,9 +1,8 @@
 //===-- XCoreMCAsmInfo.cpp - XCore asm properties -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
index 39581e424e8c..b1dd247f8468 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
@@ -1,9 +1,8 @@
 //===-- XCoreMCAsmInfo.h - XCore asm properties ----------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index 805f1c18b609..877f38e22f9b 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -1,9 +1,8 @@
 //===-- XCoreMCTargetDesc.cpp - XCore Target Descriptions -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,8 +11,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/XCoreMCTargetDesc.h"
-#include "InstPrinter/XCoreInstPrinter.h"
+#include "MCTargetDesc/XCoreInstPrinter.h"
 #include "MCTargetDesc/XCoreMCAsmInfo.h"
+#include "TargetInfo/XCoreTargetInfo.h"
 #include "XCoreTargetStreamer.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCDwarf.h"
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
index 1dc384fadf69..3e56302f4add 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
@@ -1,9 +1,8 @@
 //===-- XCoreMCTargetDesc.h - XCore Target Descriptions ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,8 +17,6 @@ namespace llvm {
 
 class Target;
 
-Target &getTheXCoreTarget();
-
 } // end namespace llvm
 
 // Defines symbolic names for XCore registers.  This defines a mapping from
diff --git a/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
index 41f4078cc328..5604f29db3e9 100644
--- a/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
+++ b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
@@ -1,14 +1,12 @@
 //===-- XCoreTargetInfo.cpp - XCore Target Implementation -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "XCore.h"
-#include "llvm/IR/Module.h"
+#include "TargetInfo/XCoreTargetInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/XCore/TargetInfo/XCoreTargetInfo.h b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.h
new file mode 100644
index 000000000000..35f05f22e4ce
--- /dev/null
+++ b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.h
@@ -0,0 +1,20 @@
+//===-- XCoreTargetInfo.h - XCore Target Implementation ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_XCORE_TARGETINFO_XCORETARGETINFO_H
+#define LLVM_LIB_TARGET_XCORE_TARGETINFO_XCORETARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheXCoreTarget();
+
+}
+
+#endif // LLVM_LIB_TARGET_XCORE_TARGETINFO_XCORETARGETINFO_H
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
index ba6ca843671e..b7b86be9ab51 100644
--- a/lib/Target/XCore/XCore.h
+++ b/lib/Target/XCore/XCore.h
@@ -1,9 +1,8 @@
 //===-- XCore.h - Top-level interface for XCore representation --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/XCore/XCore.td b/lib/Target/XCore/XCore.td
index 04a1dd5e95be..a97b3dd1d0a2 100644
--- a/lib/Target/XCore/XCore.td
+++ b/lib/Target/XCore/XCore.td
@@ -1,9 +1,8 @@
 //===-- XCore.td - Describe the XCore Target Machine -------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 916bca6392de..9f615b9e7741 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -1,9 +1,8 @@
 //===-- XCoreAsmPrinter.cpp - XCore LLVM assembly writer ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,7 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstPrinter/XCoreInstPrinter.h"
+#include "MCTargetDesc/XCoreInstPrinter.h"
+#include "TargetInfo/XCoreTargetInfo.h"
 #include "XCore.h"
 #include "XCoreInstrInfo.h"
 #include "XCoreMCInstLower.h"
@@ -67,11 +67,9 @@ namespace {
     }
     void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
     bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                         unsigned AsmVariant, const char *ExtraCode,
-                         raw_ostream &O) override;
+                         const char *ExtraCode, raw_ostream &O) override;
     bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
-                               unsigned AsmVariant, const char *ExtraCode,
-                               raw_ostream &O) override;
+                               const char *ExtraCode, raw_ostream &O) override;
 
     void emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV);
     void EmitGlobalVariable(const GlobalVariable *GV) override;
@@ -216,7 +214,7 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
     MO.getMBB()->getSymbol()->print(O, MAI);
     break;
   case MachineOperand::MO_GlobalAddress:
-    getSymbol(MO.getGlobal())->print(O, MAI);
+    PrintSymbolOperand(MO, O);
     break;
   case MachineOperand::MO_ConstantPoolIndex:
     O << DL.getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
@@ -233,8 +231,7 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
 /// PrintAsmOperand - Print out an operand for an inline asm expression.
 ///
 bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                      unsigned AsmVariant,const char *ExtraCode,
-                                      raw_ostream &O) {
+                                      const char *ExtraCode, raw_ostream &O) {
   // Print the operand if there is no operand modifier.
   if (!ExtraCode || !ExtraCode[0]) {
     printOperand(MI, OpNo, O);
@@ -242,13 +239,13 @@ bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
   }
 
   // Otherwise fallback on the default implementation.
-  return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+  return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
 }
 
-bool XCoreAsmPrinter::
-PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
-                      unsigned AsmVariant, const char *ExtraCode,
-                      raw_ostream &O) {
+bool XCoreAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                            unsigned OpNum,
+                                            const char *ExtraCode,
+                                            raw_ostream &O) {
   if (ExtraCode && ExtraCode[0]) {
     return true; // Unknown modifier.
   }
diff --git a/lib/Target/XCore/XCoreCallingConv.td b/lib/Target/XCore/XCoreCallingConv.td
index e149e6d9ec20..aec109b83fa2 100644
--- a/lib/Target/XCore/XCoreCallingConv.td
+++ b/lib/Target/XCore/XCoreCallingConv.td
@@ -1,9 +1,8 @@
 //===- XCoreCallingConv.td - Calling Conventions for XCore -*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This describes the calling conventions for XCore architecture.
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index fff8a66d0e75..5066407c74aa 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -1,9 +1,8 @@
 //===-- XCoreFrameLowering.cpp - Frame info for XCore Target --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
index e98e9cda11db..95c3a2973033 100644
--- a/lib/Target/XCore/XCoreFrameLowering.h
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -1,9 +1,8 @@
 //===-- XCoreFrameLowering.h - Frame info for XCore Target ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
index 4b10e71be03d..e433d21c59b7 100644
--- a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
+++ b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
@@ -1,9 +1,8 @@
 //===-- XCoreFrameToArgsOffsetElim.cpp ----------------------------*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 1688c38efc1d..5fd9e23258b0 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
 //===-- XCoreISelDAGToDAG.cpp - A dag to dag inst selector for XCore ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 75d7ae7048a1..072278d9fc46 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -1,9 +1,8 @@
 //===-- XCoreISelLowering.cpp - XCore DAG Lowering Implementation ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -407,23 +406,16 @@ static bool isWordAligned(SDValue Value, SelectionDAG &DAG)
   return Known.countMinTrailingZeros() >= 2;
 }
 
-SDValue XCoreTargetLowering::
-LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+SDValue XCoreTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  LLVMContext &Context = *DAG.getContext();
   LoadSDNode *LD = cast<LoadSDNode>(Op);
   assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
          "Unexpected extension type");
   assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT");
-  if (allowsMisalignedMemoryAccesses(LD->getMemoryVT(),
-                                     LD->getAddressSpace(),
-                                     LD->getAlignment()))
-    return SDValue();
 
-  auto &TD = DAG.getDataLayout();
-  unsigned ABIAlignment = TD.getABITypeAlignment(
-      LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
-  // Leave aligned load alone.
-  if (LD->getAlignment() >= ABIAlignment)
+  if (allowsMemoryAccess(Context, DAG.getDataLayout(), LD->getMemoryVT(),
+                         *LD->getMemOperand()))
     return SDValue();
 
   SDValue Chain = LD->getChain();
@@ -470,7 +462,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   }
 
   // Lower to a call to __misaligned_load(BasePtr).
-  Type *IntPtrTy = TD.getIntPtrType(*DAG.getContext());
+  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(Context);
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
 
@@ -490,23 +482,16 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getMergeValues(Ops, DL);
 }
 
-SDValue XCoreTargetLowering::
-LowerSTORE(SDValue Op, SelectionDAG &DAG) const
-{
+SDValue XCoreTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+  LLVMContext &Context = *DAG.getContext();
   StoreSDNode *ST = cast<StoreSDNode>(Op);
   assert(!ST->isTruncatingStore() && "Unexpected store type");
   assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT");
-  if (allowsMisalignedMemoryAccesses(ST->getMemoryVT(),
-                                     ST->getAddressSpace(),
-                                     ST->getAlignment())) {
-    return SDValue();
-  }
-  unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(
-      ST->getMemoryVT().getTypeForEVT(*DAG.getContext()));
-  // Leave aligned store alone.
-  if (ST->getAlignment() >= ABIAlignment) {
+
+  if (allowsMemoryAccess(Context, DAG.getDataLayout(), ST->getMemoryVT(),
+                         *ST->getMemOperand()))
     return SDValue();
-  }
+
   SDValue Chain = ST->getChain();
   SDValue BasePtr = ST->getBasePtr();
   SDValue Value = ST->getValue();
@@ -515,7 +500,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
   if (ST->getAlignment() == 2) {
     SDValue Low = Value;
     SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
-                                      DAG.getConstant(16, dl, MVT::i32));
+                               DAG.getConstant(16, dl, MVT::i32));
     SDValue StoreLow = DAG.getTruncStore(
         Chain, dl, Low, BasePtr, ST->getPointerInfo(), MVT::i16,
         /* Alignment = */ 2, ST->getMemOperand()->getFlags());
@@ -528,7 +513,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
   }
 
   // Lower to a call to __misaligned_store(BasePtr, Value).
-  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(Context);
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
 
@@ -541,7 +526,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
 
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(dl).setChain(Chain).setCallee(
-      CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+      CallingConv::C, Type::getVoidTy(Context),
       DAG.getExternalSymbol("__misaligned_store",
                             getPointerTy(DAG.getDataLayout())),
       std::move(Args));
@@ -1009,6 +994,27 @@ LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const {
   return SDValue();
 }
 
+MachineMemOperand::Flags
+XCoreTargetLowering::getMMOFlags(const Instruction &I) const {
+  // Because of how we convert atomic_load and atomic_store to normal loads and
+  // stores in the DAG, we need to ensure that the MMOs are marked volatile
+  // since DAGCombine hasn't been updated to account for atomic, but non
+  // volatile loads.  (See D57601)
+  if (auto *SI = dyn_cast<StoreInst>(&I))
+    if (SI->isAtomic())
+      return MachineMemOperand::MOVolatile;
+  if (auto *LI = dyn_cast<LoadInst>(&I))
+    if (LI->isAtomic())
+      return MachineMemOperand::MOVolatile;
+  if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
+    if (AI->isAtomic())
+      return MachineMemOperand::MOVolatile;
+  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
+    if (AI->isAtomic())
+      return MachineMemOperand::MOVolatile;
+  return MachineMemOperand::MONone;
+}
+
 //===----------------------------------------------------------------------===//
 //                      Calling Convention Implementation
 //===----------------------------------------------------------------------===//
@@ -1772,11 +1778,10 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
   break;
   case ISD::STORE: {
     // Replace unaligned store of unaligned load with memmove.
-    StoreSDNode *ST  = cast<StoreSDNode>(N);
+    StoreSDNode *ST = cast<StoreSDNode>(N);
     if (!DCI.isBeforeLegalize() ||
-        allowsMisalignedMemoryAccesses(ST->getMemoryVT(),
-                                       ST->getAddressSpace(),
-                                       ST->getAlignment()) ||
+        allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+                           ST->getMemoryVT(), *ST->getMemOperand()) ||
         ST->isVolatile() || ST->isIndexed()) {
       break;
     }
@@ -1785,12 +1790,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
     unsigned StoreBits = ST->getMemoryVT().getStoreSizeInBits();
     assert((StoreBits % 8) == 0 &&
            "Store size in bits must be a multiple of 8");
-    unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(
-        ST->getMemoryVT().getTypeForEVT(*DCI.DAG.getContext()));
     unsigned Alignment = ST->getAlignment();
-    if (Alignment >= ABIAlignment) {
-      break;
-    }
 
     if (LoadSDNode *LD = dyn_cast<LoadSDNode>(ST->getValue())) {
       if (LD->hasNUsesOfValue(1, 0) && ST->getMemoryVT() == LD->getMemoryVT() &&
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 7a99389e54a7..b4f25feda7fe 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -1,9 +1,8 @@
 //===-- XCoreISelLowering.h - XCore DAG Lowering Interface ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -189,6 +188,8 @@ namespace llvm {
     SDValue LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
 
+    MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override;
+
     // Inline asm support
     std::pair<unsigned, const TargetRegisterClass *>
     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td
index 379cc39aa617..deb899ddb1af 100644
--- a/lib/Target/XCore/XCoreInstrFormats.td
+++ b/lib/Target/XCore/XCoreInstrFormats.td
@@ -1,9 +1,8 @@
 //===-- XCoreInstrFormats.td - XCore Instruction Formats ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index b0de048672df..bbad8e354586 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -1,9 +1,8 @@
 //===-- XCoreInstrInfo.cpp - XCore Instruction Information ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index 9d9ee33ce222..b9621f136589 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -1,9 +1,8 @@
 //===-- XCoreInstrInfo.h - XCore Instruction Information --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index b87ba6548962..18f02e1d80f0 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -1,9 +1,8 @@
 //===-- XCoreInstrInfo.td - Target Description for XCore ---*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index 7455cd997ad6..a18fb28f2fe9 100644
--- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -1,9 +1,8 @@
 //===-- XCoreLowerThreadLocal - Lower thread local variables --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Target/XCore/XCoreMCInstLower.cpp b/lib/Target/XCore/XCoreMCInstLower.cpp
index 21270192b234..cd28fa5cd144 100644
--- a/lib/Target/XCore/XCoreMCInstLower.cpp
+++ b/lib/Target/XCore/XCoreMCInstLower.cpp
@@ -1,9 +1,8 @@
 //===-- XCoreMCInstLower.cpp - Convert XCore MachineInstr to MCInst -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Target/XCore/XCoreMCInstLower.h b/lib/Target/XCore/XCoreMCInstLower.h
index abcb80fcf766..0eaa84ef736b 100644
--- a/lib/Target/XCore/XCoreMCInstLower.h
+++ b/lib/Target/XCore/XCoreMCInstLower.h
@@ -1,9 +1,8 @@
 //===-- XCoreMCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
index b7b0daab9806..0b4fcffbc655 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
 //===-- XCoreMachineFunctionInfo.cpp - XCore machine function info --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
index 6c05ab3f10df..aebe11b15b54 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -1,9 +1,8 @@
 //===- XCoreMachineFunctionInfo.h - XCore machine function info -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index e119d9555f9d..3752274e2cdf 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -1,9 +1,8 @@
 //===-- XCoreRegisterInfo.cpp - XCore Register Information ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -284,7 +283,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   Offset += StackSize;
 
-  unsigned FrameReg = getFrameRegister(MF);
+  Register FrameReg = getFrameRegister(MF);
 
   // Special handling of DBG_VALUE instructions.
   if (MI.isDebugValue()) {
@@ -322,7 +321,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 }
 
 
-unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const XCoreFrameLowering *TFI = getFrameLowering(MF);
 
   return TFI->hasFP(MF) ? XCore::R10 : XCore::SP;
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index 2e9fd98ed34f..35a42e1a1457 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -1,9 +1,8 @@
 //===-- XCoreRegisterInfo.h - XCore Register Information Impl ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -44,7 +43,7 @@ public:
                            RegScavenger *RS = nullptr) const override;
 
   // Debug information queries.
-  unsigned getFrameRegister(const MachineFunction &MF) const override;
+  Register getFrameRegister(const MachineFunction &MF) const override;
 
   //! Return whether to emit frame moves
   static bool needsFrameMoves(const MachineFunction &MF);
diff --git a/lib/Target/XCore/XCoreRegisterInfo.td b/lib/Target/XCore/XCoreRegisterInfo.td
index 6694b2882aca..d9502939bae3 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.td
+++ b/lib/Target/XCore/XCoreRegisterInfo.td
@@ -1,9 +1,8 @@
 //===-- XCoreRegisterInfo.td - XCore Register defs ---------*- tablegen -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
index 646309e02de8..c86756e345a9 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -1,9 +1,8 @@
 //===-- XCoreSelectionDAGInfo.cpp - XCore SelectionDAG Info ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h
index 7cd0d8216e91..5dcef08391c9 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.h
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -1,9 +1,8 @@
 //===-- XCoreSelectionDAGInfo.h - XCore SelectionDAG Info -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp
index 99ad2c88504f..ffeb0862c945 100644
--- a/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/lib/Target/XCore/XCoreSubtarget.cpp
@@ -1,9 +1,8 @@
 //===-- XCoreSubtarget.cpp - XCore Subtarget Information ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h
index ed9936ebf2b8..68139da9d1d0 100644
--- a/lib/Target/XCore/XCoreSubtarget.h
+++ b/lib/Target/XCore/XCoreSubtarget.h
@@ -1,9 +1,8 @@
 //===-- XCoreSubtarget.h - Define Subtarget for the XCore -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 2aa9932e2465..2a8cd6b657b7 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -1,9 +1,8 @@
 //===-- XCoreTargetMachine.cpp - Define TargetMachine for XCore -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,6 +11,7 @@
 
 #include "XCoreTargetMachine.h"
 #include "MCTargetDesc/XCoreMCTargetDesc.h"
+#include "TargetInfo/XCoreTargetInfo.h"
 #include "XCore.h"
 #include "XCoreTargetObjectFile.h"
 #include "XCoreTargetTransformInfo.h"
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 965b9b2c4d65..9c3bdcf78f9c 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -1,9 +1,8 @@
 //===-- XCoreTargetMachine.h - Define TargetMachine for XCore ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp
index c60a262e719c..fe743b28b4b4 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -1,9 +1,8 @@
 //===-- XCoreTargetObjectFile.cpp - XCore object files --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.h b/lib/Target/XCore/XCoreTargetObjectFile.h
index 5eb423a7435e..fd172c55919f 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.h
+++ b/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -1,9 +1,8 @@
 //===-- XCoreTargetObjectFile.h - XCore Object Info -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/XCore/XCoreTargetStreamer.h b/lib/Target/XCore/XCoreTargetStreamer.h
index 3563dbc5cb7b..3543fc52ea7f 100644
--- a/lib/Target/XCore/XCoreTargetStreamer.h
+++ b/lib/Target/XCore/XCoreTargetStreamer.h
@@ -1,9 +1,8 @@
 //===-- XCoreTargetStreamer.h - XCore Target Streamer ----------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/XCore/XCoreTargetTransformInfo.h b/lib/Target/XCore/XCoreTargetTransformInfo.h
index aa068b333425..3fecaaa59722 100644
--- a/lib/Target/XCore/XCoreTargetTransformInfo.h
+++ b/lib/Target/XCore/XCoreTargetTransformInfo.h
@@ -1,9 +1,8 @@
 //===-- XCoreTargetTransformInfo.h - XCore specific TTI ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/Testing/Support/Annotations.cpp b/lib/Testing/Support/Annotations.cpp
new file mode 100644
index 000000000000..09c572011d36
--- /dev/null
+++ b/lib/Testing/Support/Annotations.cpp
@@ -0,0 +1,95 @@
+//===--- Annotations.cpp - Annotated source code for unit tests --*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Testing/Support/Annotations.h"
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+// Crash if the assertion fails, printing the message and testcase.
+// More elegant error handling isn't needed for unit tests.
+static void require(bool Assertion, const char *Msg, llvm::StringRef Code) {
+  if (!Assertion) {
+    llvm::errs() << "Annotated testcase: " << Msg << "\n" << Code << "\n";
+    llvm_unreachable("Annotated testcase assertion failed!");
+  }
+}
+
+Annotations::Annotations(llvm::StringRef Text) {
+  auto Require = [Text](bool Assertion, const char *Msg) {
+    require(Assertion, Msg, Text);
+  };
+  llvm::Optional<llvm::StringRef> Name;
+  llvm::SmallVector<std::pair<llvm::StringRef, size_t>, 8> OpenRanges;
+
+  Code.reserve(Text.size());
+  while (!Text.empty()) {
+    if (Text.consume_front("^")) {
+      Points[Name.getValueOr("")].push_back(Code.size());
+      Name = llvm::None;
+      continue;
+    }
+    if (Text.consume_front("[[")) {
+      OpenRanges.emplace_back(Name.getValueOr(""), Code.size());
+      Name = llvm::None;
+      continue;
+    }
+    Require(!Name, "$name should be followed by ^ or [[");
+    if (Text.consume_front("]]")) {
+      Require(!OpenRanges.empty(), "unmatched ]]");
+      Range R;
+      R.Begin = OpenRanges.back().second;
+      R.End = Code.size();
+      Ranges[OpenRanges.back().first].push_back(R);
+      OpenRanges.pop_back();
+      continue;
+    }
+    if (Text.consume_front("$")) {
+      Name = Text.take_while(llvm::isAlnum);
+      Text = Text.drop_front(Name->size());
+      continue;
+    }
+    Code.push_back(Text.front());
+    Text = Text.drop_front();
+  }
+  Require(!Name, "unterminated $name");
+  Require(OpenRanges.empty(), "unmatched [[");
+}
+
+size_t Annotations::point(llvm::StringRef Name) const {
+  auto I = Points.find(Name);
+  require(I != Points.end() && I->getValue().size() == 1,
+          "expected exactly one point", Code);
+  return I->getValue()[0];
+}
+
+std::vector<size_t> Annotations::points(llvm::StringRef Name) const {
+  auto P = Points.lookup(Name);
+  return {P.begin(), P.end()};
+}
+
+Annotations::Range Annotations::range(llvm::StringRef Name) const {
+  auto I = Ranges.find(Name);
+  require(I != Ranges.end() && I->getValue().size() == 1,
+          "expected exactly one range", Code);
+  return I->getValue()[0];
+}
+
+std::vector<Annotations::Range>
+Annotations::ranges(llvm::StringRef Name) const {
+  auto R = Ranges.lookup(Name);
+  return {R.begin(), R.end()};
+}
+
+llvm::raw_ostream &llvm::operator<<(llvm::raw_ostream &O,
+                                    const llvm::Annotations::Range &R) {
+  return O << llvm::formatv("[{0}, {1})", R.Begin, R.End);
+}
diff --git a/lib/Testing/Support/Error.cpp b/lib/Testing/Support/Error.cpp
index 5692cdfcdf7b..a5f8f9b47b3f 100644
--- a/lib/Testing/Support/Error.cpp
+++ b/lib/Testing/Support/Error.cpp
@@ -1,9 +1,8 @@
 //===- llvm/Testing/Support/Error.cpp -------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/TextAPI/ELF/ELFStub.cpp b/lib/TextAPI/ELF/ELFStub.cpp
index 248a078a2404..f8463497093b 100644
--- a/lib/TextAPI/ELF/ELFStub.cpp
+++ b/lib/TextAPI/ELF/ELFStub.cpp
@@ -1,9 +1,8 @@
 //===- ELFStub.cpp --------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===-----------------------------------------------------------------------===/
 
diff --git a/lib/TextAPI/ELF/TBEHandler.cpp b/lib/TextAPI/ELF/TBEHandler.cpp
index b621829d9358..cb597d8896e8 100644
--- a/lib/TextAPI/ELF/TBEHandler.cpp
+++ b/lib/TextAPI/ELF/TBEHandler.cpp
@@ -1,9 +1,8 @@
 //===- TBEHandler.cpp -----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===-----------------------------------------------------------------------===/
 
diff --git a/lib/TextAPI/MachO/Architecture.cpp b/lib/TextAPI/MachO/Architecture.cpp
new file mode 100644
index 000000000000..a66a982fa153
--- /dev/null
+++ b/lib/TextAPI/MachO/Architecture.cpp
@@ -0,0 +1,77 @@
+//===- Architecture.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the architecture helper functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TextAPI/MachO/Architecture.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/MachO.h"
+
+namespace llvm {
+namespace MachO {
+
+Architecture getArchitectureFromCpuType(uint32_t CPUType, uint32_t CPUSubType) {
+#define ARCHINFO(Arch, Type, Subtype)                                          \
+  if (CPUType == (Type) &&                                                     \
+      (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) == (Subtype))                    \
+    return AK_##Arch;
+#include "llvm/TextAPI/MachO/Architecture.def"
+#undef ARCHINFO
+
+  return AK_unknown;
+}
+
+Architecture getArchitectureFromName(StringRef Name) {
+  return StringSwitch<Architecture>(Name)
+#define ARCHINFO(Arch, Type, Subtype) .Case(#Arch, AK_##Arch)
+#include "llvm/TextAPI/MachO/Architecture.def"
+#undef ARCHINFO
+      .Default(AK_unknown);
+}
+
+StringRef getArchitectureName(Architecture Arch) {
+  switch (Arch) {
+#define ARCHINFO(Arch, Type, Subtype)                                          \
+  case AK_##Arch:                                                              \
+    return #Arch;
+#include "llvm/TextAPI/MachO/Architecture.def"
+#undef ARCHINFO
+  case AK_unknown:
+    return "unknown";
+  }
+
+  // Appease some compilers that cannot figure out that this is a fully covered
+  // switch statement.
+  return "unknown";
+}
+
+std::pair<uint32_t, uint32_t> getCPUTypeFromArchitecture(Architecture Arch) {
+  switch (Arch) {
+#define ARCHINFO(Arch, Type, Subtype)                                          \
+  case AK_##Arch:                                                              \
+    return std::make_pair(Type, Subtype);
+#include "llvm/TextAPI/MachO/Architecture.def"
+#undef ARCHINFO
+  case AK_unknown:
+    return std::make_pair(0, 0);
+  }
+
+  // Appease some compilers that cannot figure out that this is a fully covered
+  // switch statement.
+  return std::make_pair(0, 0);
+}
+
+raw_ostream &operator<<(raw_ostream &OS, Architecture Arch) {
+  OS << getArchitectureName(Arch);
+  return OS;
+}
+
+} // end namespace MachO.
+} // end namespace llvm.
diff --git a/lib/TextAPI/MachO/ArchitectureSet.cpp b/lib/TextAPI/MachO/ArchitectureSet.cpp
new file mode 100644
index 000000000000..c589671199b7
--- /dev/null
+++ b/lib/TextAPI/MachO/ArchitectureSet.cpp
@@ -0,0 +1,69 @@
+//===- ArchitectureSet.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the architecture set.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TextAPI/MachO/ArchitectureSet.h"
+
+namespace llvm {
+namespace MachO {
+
+ArchitectureSet::ArchitectureSet(const std::vector<Architecture> &Archs)
+    : ArchitectureSet() {
+  for (auto Arch : Archs) {
+    if (Arch == AK_unknown)
+      continue;
+    set(Arch);
+  }
+}
+
+size_t ArchitectureSet::count() const {
+  // popcnt
+  size_t Cnt = 0;
+  for (unsigned i = 0; i < sizeof(ArchSetType) * 8; ++i)
+    if (ArchSet & (1U << i))
+      ++Cnt;
+  return Cnt;
+}
+
+ArchitectureSet::operator std::string() const {
+  if (empty())
+    return "[(empty)]";
+
+  std::string result;
+  auto size = count();
+  for (auto arch : *this) {
+    result.append(getArchitectureName(arch));
+    size -= 1;
+    if (size)
+      result.append(" ");
+  }
+  return result;
+}
+
+ArchitectureSet::operator std::vector<Architecture>() const {
+  std::vector<Architecture> archs;
+  for (auto arch : *this) {
+    if (arch == AK_unknown)
+      continue;
+    archs.emplace_back(arch);
+  }
+  return archs;
+}
+
+void ArchitectureSet::print(raw_ostream &os) const { os << std::string(*this); }
+
+raw_ostream &operator<<(raw_ostream &os, ArchitectureSet set) {
+  set.print(os);
+  return os;
+}
+
+} // end namespace MachO.
+} // end namespace llvm.
diff --git a/lib/TextAPI/MachO/InterfaceFile.cpp b/lib/TextAPI/MachO/InterfaceFile.cpp
new file mode 100644
index 000000000000..54ba8cc31267
--- /dev/null
+++ b/lib/TextAPI/MachO/InterfaceFile.cpp
@@ -0,0 +1,81 @@
+//===- InterfaceFile.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the Interface File.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TextAPI/MachO/InterfaceFile.h"
+#include <iomanip>
+#include <sstream>
+
+namespace llvm {
+namespace MachO {
+namespace detail {
+template <typename C>
+typename C::iterator addEntry(C &Container, StringRef InstallName) {
+  auto I = partition_point(Container, [=](const InterfaceFileRef &O) {
+    return O.getInstallName() < InstallName;
+  });
+  if (I != Container.end() && I->getInstallName() == InstallName)
+    return I;
+
+  return Container.emplace(I, InstallName);
+}
+} // end namespace detail.
+
+void InterfaceFile::addAllowableClient(StringRef Name,
+                                       ArchitectureSet Architectures) {
+  auto Client = detail::addEntry(AllowableClients, Name);
+  Client->addArchitectures(Architectures);
+}
+
+void InterfaceFile::addReexportedLibrary(StringRef InstallName,
+                                         ArchitectureSet Architectures) {
+  auto Lib = detail::addEntry(ReexportedLibraries, InstallName);
+  Lib->addArchitectures(Architectures);
+}
+
+void InterfaceFile::addUUID(Architecture Arch, StringRef UUID) {
+  auto I = partition_point(UUIDs,
+                           [=](const std::pair<Architecture, std::string> &O) {
+                             return O.first < Arch;
+                           });
+
+  if (I != UUIDs.end() && Arch == I->first) {
+    I->second = UUID;
+    return;
+  }
+
+  UUIDs.emplace(I, Arch, UUID);
+  return;
+}
+
+void InterfaceFile::addUUID(Architecture Arch, uint8_t UUID[16]) {
+  std::stringstream Stream;
+  for (unsigned i = 0; i < 16; ++i) {
+    if (i == 4 || i == 6 || i == 8 || i == 10)
+      Stream << '-';
+    Stream << std::setfill('0') << std::setw(2) << std::uppercase << std::hex
+           << static_cast<int>(UUID[i]);
+  }
+  addUUID(Arch, Stream.str());
+}
+
+void InterfaceFile::addSymbol(SymbolKind Kind, StringRef Name,
+                              ArchitectureSet Archs, SymbolFlags Flags) {
+  Name = copyString(Name);
+  auto result = Symbols.try_emplace(SymbolsMapKey{Kind, Name}, nullptr);
+  if (result.second)
+    result.first->second = new (Allocator) Symbol{Kind, Name, Archs, Flags};
+  else
+    result.first->second->addArchitectures(Archs);
+}
+
+} // end namespace MachO.
+} // end namespace llvm.
diff --git a/lib/TextAPI/MachO/PackedVersion.cpp b/lib/TextAPI/MachO/PackedVersion.cpp
new file mode 100644
index 000000000000..8405aba90ed6
--- /dev/null
+++ b/lib/TextAPI/MachO/PackedVersion.cpp
@@ -0,0 +1,113 @@
+//===- PackedVersion.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the Mach-O packed version.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TextAPI/MachO/PackedVersion.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace MachO {
+
+bool PackedVersion::parse32(StringRef Str) {
+  Version = 0;
+
+  if (Str.empty())
+    return false;
+
+  SmallVector<StringRef, 3> Parts;
+  SplitString(Str, Parts, ".");
+
+  if (Parts.size() > 3)
+    return false;
+
+  unsigned long long Num;
+  if (getAsUnsignedInteger(Parts[0], 10, Num))
+    return false;
+
+  if (Num > UINT16_MAX)
+    return false;
+
+  Version = Num << 16;
+
+  for (unsigned i = 1, ShiftNum = 8; i < Parts.size(); ++i, ShiftNum -= 8) {
+    if (getAsUnsignedInteger(Parts[i], 10, Num))
+      return false;
+
+    if (Num > UINT8_MAX)
+      return false;
+
+    Version |= (Num << ShiftNum);
+  }
+
+  return true;
+}
+
+std::pair<bool, bool> PackedVersion::parse64(StringRef Str) {
+  bool Truncated = false;
+  Version = 0;
+
+  if (Str.empty())
+    return std::make_pair(false, Truncated);
+
+  SmallVector<StringRef, 5> Parts;
+  SplitString(Str, Parts, ".");
+
+  if (Parts.size() > 5)
+    return std::make_pair(false, Truncated);
+
+  unsigned long long Num;
+  if (getAsUnsignedInteger(Parts[0], 10, Num))
+    return std::make_pair(false, Truncated);
+
+  if (Num > 0xFFFFFFULL)
+    return std::make_pair(false, Truncated);
+
+  if (Num > 0xFFFFULL) {
+    Num = 0xFFFFULL;
+    Truncated = true;
+  }
+  Version = Num << 16;
+
+  for (unsigned i = 1, ShiftNum = 8; i < Parts.size() && i < 3;
+       ++i, ShiftNum -= 8) {
+    if (getAsUnsignedInteger(Parts[i], 10, Num))
+      return std::make_pair(false, Truncated);
+
+    if (Num > 0x3FFULL)
+      return std::make_pair(false, Truncated);
+
+    if (Num > 0xFFULL) {
+      Num = 0xFFULL;
+      Truncated = true;
+    }
+    Version |= (Num << ShiftNum);
+  }
+
+  if (Parts.size() > 3)
+    Truncated = true;
+
+  return std::make_pair(true, Truncated);
+}
+
+void PackedVersion::print(raw_ostream &OS) const {
+  OS << format("%d", getMajor());
+  if (getMinor() || getSubminor())
+    OS << format(".%d", getMinor());
+  if (getSubminor())
+    OS << format(".%d", getSubminor());
+}
+
+} // end namespace MachO.
+} // end namespace llvm.
diff --git a/lib/TextAPI/MachO/Symbol.cpp b/lib/TextAPI/MachO/Symbol.cpp
new file mode 100644
index 000000000000..731b264f6082
--- /dev/null
+++ b/lib/TextAPI/MachO/Symbol.cpp
@@ -0,0 +1,49 @@
+//===- Symbol.cpp ---------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the Symbol.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TextAPI/MachO/Symbol.h"
+#include <string>
+
+namespace llvm {
+namespace MachO {
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void Symbol::dump(raw_ostream &OS) const {
+  std::string Result;
+  if (isUndefined())
+    Result += "(undef) ";
+  if (isWeakDefined())
+    Result += "(weak-def) ";
+  if (isWeakReferenced())
+    Result += "(weak-ref) ";
+  if (isThreadLocalValue())
+    Result += "(tlv) ";
+  switch (Kind) {
+  case SymbolKind::GlobalSymbol:
+    Result += Name.str();
+    break;
+  case SymbolKind::ObjectiveCClass:
+    Result += "(ObjC Class) " + Name.str();
+    break;
+  case SymbolKind::ObjectiveCClassEHType:
+    Result += "(ObjC Class EH) " + Name.str();
+    break;
+  case SymbolKind::ObjectiveCInstanceVariable:
+    Result += "(ObjC IVar) " + Name.str();
+    break;
+  }
+  OS << Result;
+}
+#endif
+
+} // end namespace MachO.
+} // end namespace llvm.
diff --git a/lib/TextAPI/MachO/TextAPIContext.h b/lib/TextAPI/MachO/TextAPIContext.h
new file mode 100644
index 000000000000..3df40f09f7f7
--- /dev/null
+++ b/lib/TextAPI/MachO/TextAPIContext.h
@@ -0,0 +1,33 @@
+//===- TextAPIContext.h ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the YAML Context for the TextAPI Reader/Writer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TEXTAPI_MACHO_CONTEXT_H
+#define LLVM_TEXTAPI_MACHO_CONTEXT_H
+
+#include "llvm/Support/MemoryBuffer.h"
+#include <string>
+
+namespace llvm {
+namespace MachO {
+
+enum FileType : unsigned;
+
+struct TextAPIContext {
+  std::string ErrorMessage;
+  std::string Path;
+  FileType FileKind;
+};
+
+} // end namespace MachO.
+} // end namespace llvm.
+
+#endif // LLVM_TEXTAPI_MACHO_CONTEXT_H
diff --git a/lib/TextAPI/MachO/TextStub.cpp b/lib/TextAPI/MachO/TextStub.cpp
new file mode 100644
index 000000000000..799ebdc883ab
--- /dev/null
+++ b/lib/TextAPI/MachO/TextStub.cpp
@@ -0,0 +1,660 @@
+//===- TextStub.cpp -------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the text stub file reader/writer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TextAPIContext.h"
+#include "TextStubCommon.h"
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TextAPI/MachO/Architecture.h"
+#include "llvm/TextAPI/MachO/ArchitectureSet.h"
+#include "llvm/TextAPI/MachO/InterfaceFile.h"
+#include "llvm/TextAPI/MachO/PackedVersion.h"
+#include "llvm/TextAPI/MachO/TextAPIReader.h"
+#include "llvm/TextAPI/MachO/TextAPIWriter.h"
+#include <algorithm>
+#include <set>
+
+// clang-format off
+/*
+
+ YAML Format specification.
+
+ The TBD v1 format only support two level address libraries and is per
+ definition application extension safe.
+
+---                              # the tag !tapi-tbd-v1 is optional and
+                                 # shouldn't be emitted to support older linker.
+archs: [ armv7, armv7s, arm64 ]  # the list of architecture slices that are
+                                 # supported by this file.
+platform: ios                    # Specifies the platform (macosx, ios, etc)
+install-name: /u/l/libfoo.dylib  #
+current-version: 1.2.3           # Optional: defaults to 1.0
+compatibility-version: 1.0       # Optional: defaults to 1.0
+swift-version: 0                 # Optional: defaults to 0
+objc-constraint: none            # Optional: defaults to none
+exports:                         # List of export sections
+...
+
+Each export section is defined as following:
+
+ - archs: [ arm64 ]                   # the list of architecture slices
+   allowed-clients: [ client ]        # Optional: List of clients
+   re-exports: [ ]                    # Optional: List of re-exports
+   symbols: [ _sym ]                  # Optional: List of symbols
+   objc-classes: []                   # Optional: List of Objective-C classes
+   objc-ivars: []                     # Optional: List of Objective C Instance
+                                      #           Variables
+   weak-def-symbols: []               # Optional: List of weak defined symbols
+   thread-local-symbols: []           # Optional: List of thread local symbols
+*/
+
+/*
+
+ YAML Format specification.
+
+--- !tapi-tbd-v2
+archs: [ armv7, armv7s, arm64 ]  # the list of architecture slices that are
+                                 # supported by this file.
+uuids: [ armv7:... ]             # Optional: List of architecture and UUID pairs.
+platform: ios                    # Specifies the platform (macosx, ios, etc)
+flags: []                        # Optional:
+install-name: /u/l/libfoo.dylib  #
+current-version: 1.2.3           # Optional: defaults to 1.0
+compatibility-version: 1.0       # Optional: defaults to 1.0
+swift-version: 0                 # Optional: defaults to 0
+objc-constraint: retain_release  # Optional: defaults to retain_release
+parent-umbrella:                 # Optional:
+exports:                         # List of export sections
+...
+undefineds:                      # List of undefineds sections
+...
+
+Each export section is defined as following:
+
+- archs: [ arm64 ]                   # the list of architecture slices
+  allowed-clients: [ client ]        # Optional: List of clients
+  re-exports: [ ]                    # Optional: List of re-exports
+  symbols: [ _sym ]                  # Optional: List of symbols
+  objc-classes: []                   # Optional: List of Objective-C classes
+  objc-ivars: []                     # Optional: List of Objective C Instance
+                                     #           Variables
+  weak-def-symbols: []               # Optional: List of weak defined symbols
+  thread-local-symbols: []           # Optional: List of thread local symbols
+
+Each undefineds section is defined as following:
+- archs: [ arm64 ]     # the list of architecture slices
+  symbols: [ _sym ]    # Optional: List of symbols
+  objc-classes: []     # Optional: List of Objective-C classes
+  objc-ivars: []       # Optional: List of Objective C Instance Variables
+  weak-ref-symbols: [] # Optional: List of weak defined symbols
+*/
+
+/*
+
+ YAML Format specification.
+
+--- !tapi-tbd-v3
+archs: [ armv7, armv7s, arm64 ]  # the list of architecture slices that are
+                                 # supported by this file.
+uuids: [ armv7:... ]             # Optional: List of architecture and UUID pairs.
+platform: ios                    # Specifies the platform (macosx, ios, etc)
+flags: []                        # Optional:
+install-name: /u/l/libfoo.dylib  #
+current-version: 1.2.3           # Optional: defaults to 1.0
+compatibility-version: 1.0       # Optional: defaults to 1.0
+swift-abi-version: 0             # Optional: defaults to 0
+objc-constraint: retain_release  # Optional: defaults to retain_release
+parent-umbrella:                 # Optional:
+exports:                         # List of export sections
+...
+undefineds:                      # List of undefineds sections
+...
+
+Each export section is defined as following:
+
+- archs: [ arm64 ]                   # the list of architecture slices
+  allowed-clients: [ client ]        # Optional: List of clients
+  re-exports: [ ]                    # Optional: List of re-exports
+  symbols: [ _sym ]                  # Optional: List of symbols
+  objc-classes: []                   # Optional: List of Objective-C classes
+  objc-eh-types: []                  # Optional: List of Objective-C classes
+                                     #           with EH
+  objc-ivars: []                     # Optional: List of Objective C Instance
+                                     #           Variables
+  weak-def-symbols: []               # Optional: List of weak defined symbols
+  thread-local-symbols: []           # Optional: List of thread local symbols
+
+Each undefineds section is defined as following:
+- archs: [ arm64 ]     # the list of architecture slices
+  symbols: [ _sym ]    # Optional: List of symbols
+  objc-classes: []     # Optional: List of Objective-C classes
+  objc-eh-types: []                  # Optional: List of Objective-C classes
+                                     #           with EH
+  objc-ivars: []       # Optional: List of Objective C Instance Variables
+  weak-ref-symbols: [] # Optional: List of weak defined symbols
+*/
+// clang-format on
+
+using namespace llvm;
+using namespace llvm::yaml;
+using namespace llvm::MachO;
+
+namespace {
+struct ExportSection {
+  std::vector<Architecture> Architectures;
+  std::vector<FlowStringRef> AllowableClients;
+  std::vector<FlowStringRef> ReexportedLibraries;
+  std::vector<FlowStringRef> Symbols;
+  std::vector<FlowStringRef> Classes;
+  std::vector<FlowStringRef> ClassEHs;
+  std::vector<FlowStringRef> IVars;
+  std::vector<FlowStringRef> WeakDefSymbols;
+  std::vector<FlowStringRef> TLVSymbols;
+};
+
+struct UndefinedSection {
+  std::vector<Architecture> Architectures;
+  std::vector<FlowStringRef> Symbols;
+  std::vector<FlowStringRef> Classes;
+  std::vector<FlowStringRef> ClassEHs;
+  std::vector<FlowStringRef> IVars;
+  std::vector<FlowStringRef> WeakRefSymbols;
+};
+
+// clang-format off
+enum TBDFlags : unsigned {
+  None                         = 0U,
+  FlatNamespace                = 1U << 0,
+  NotApplicationExtensionSafe  = 1U << 1,
+  InstallAPI                   = 1U << 2,
+  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/InstallAPI),
+};
+// clang-format on
+} // end anonymous namespace.
+
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(Architecture)
+LLVM_YAML_IS_SEQUENCE_VECTOR(ExportSection)
+LLVM_YAML_IS_SEQUENCE_VECTOR(UndefinedSection)
+
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<ExportSection> {
+  static void mapping(IO &IO, ExportSection &Section) {
+    const auto *Ctx = reinterpret_cast<TextAPIContext *>(IO.getContext());
+    assert((!Ctx || (Ctx && Ctx->FileKind != FileType::Invalid)) &&
+           "File type is not set in YAML context");
+
+    IO.mapRequired("archs", Section.Architectures);
+    if (Ctx->FileKind == FileType::TBD_V1)
+      IO.mapOptional("allowed-clients", Section.AllowableClients);
+    else
+      IO.mapOptional("allowable-clients", Section.AllowableClients);
+    IO.mapOptional("re-exports", Section.ReexportedLibraries);
+    IO.mapOptional("symbols", Section.Symbols);
+    IO.mapOptional("objc-classes", Section.Classes);
+    if (Ctx->FileKind == FileType::TBD_V3)
+      IO.mapOptional("objc-eh-types", Section.ClassEHs);
+    IO.mapOptional("objc-ivars", Section.IVars);
+    IO.mapOptional("weak-def-symbols", Section.WeakDefSymbols);
+    IO.mapOptional("thread-local-symbols", Section.TLVSymbols);
+  }
+};
+
+template <> struct MappingTraits<UndefinedSection> {
+  static void mapping(IO &IO, UndefinedSection &Section) {
+    const auto *Ctx = reinterpret_cast<TextAPIContext *>(IO.getContext());
+    assert((!Ctx || (Ctx && Ctx->FileKind != FileType::Invalid)) &&
+           "File type is not set in YAML context");
+
+    IO.mapRequired("archs", Section.Architectures);
+    IO.mapOptional("symbols", Section.Symbols);
+    IO.mapOptional("objc-classes", Section.Classes);
+    if (Ctx->FileKind == FileType::TBD_V3)
+      IO.mapOptional("objc-eh-types", Section.ClassEHs);
+    IO.mapOptional("objc-ivars", Section.IVars);
+    IO.mapOptional("weak-ref-symbols", Section.WeakRefSymbols);
+  }
+};
+
+template <> struct ScalarBitSetTraits<TBDFlags> {
+  static void bitset(IO &IO, TBDFlags &Flags) {
+    IO.bitSetCase(Flags, "flat_namespace", TBDFlags::FlatNamespace);
+    IO.bitSetCase(Flags, "not_app_extension_safe",
+                  TBDFlags::NotApplicationExtensionSafe);
+    IO.bitSetCase(Flags, "installapi", TBDFlags::InstallAPI);
+  }
+};
+
+template <> struct MappingTraits<const InterfaceFile *> {
+  struct NormalizedTBD {
+    explicit NormalizedTBD(IO &IO) {}
+    NormalizedTBD(IO &IO, const InterfaceFile *&File) {
+      Architectures = File->getArchitectures();
+      UUIDs = File->uuids();
+      Platform = File->getPlatform();
+      InstallName = File->getInstallName();
+      CurrentVersion = PackedVersion(File->getCurrentVersion());
+      CompatibilityVersion = PackedVersion(File->getCompatibilityVersion());
+      SwiftABIVersion = File->getSwiftABIVersion();
+      ObjCConstraint = File->getObjCConstraint();
+
+      Flags = TBDFlags::None;
+      if (!File->isApplicationExtensionSafe())
+        Flags |= TBDFlags::NotApplicationExtensionSafe;
+
+      if (!File->isTwoLevelNamespace())
+        Flags |= TBDFlags::FlatNamespace;
+
+      if (File->isInstallAPI())
+        Flags |= TBDFlags::InstallAPI;
+
+      ParentUmbrella = File->getParentUmbrella();
+
+      std::set<ArchitectureSet> ArchSet;
+      for (const auto &Library : File->allowableClients())
+        ArchSet.insert(Library.getArchitectures());
+
+      for (const auto &Library : File->reexportedLibraries())
+        ArchSet.insert(Library.getArchitectures());
+
+      std::map<const Symbol *, ArchitectureSet> SymbolToArchSet;
+      for (const auto *Symbol : File->exports()) {
+        auto Architectures = Symbol->getArchitectures();
+        SymbolToArchSet[Symbol] = Architectures;
+        ArchSet.insert(Architectures);
+      }
+
+      for (auto Architectures : ArchSet) {
+        ExportSection Section;
+        Section.Architectures = Architectures;
+
+        for (const auto &Library : File->allowableClients())
+          if (Library.getArchitectures() == Architectures)
+            Section.AllowableClients.emplace_back(Library.getInstallName());
+
+        for (const auto &Library : File->reexportedLibraries())
+          if (Library.getArchitectures() == Architectures)
+            Section.ReexportedLibraries.emplace_back(Library.getInstallName());
+
+        for (const auto &SymArch : SymbolToArchSet) {
+          if (SymArch.second != Architectures)
+            continue;
+
+          const auto *Symbol = SymArch.first;
+          switch (Symbol->getKind()) {
+          case SymbolKind::GlobalSymbol:
+            if (Symbol->isWeakDefined())
+              Section.WeakDefSymbols.emplace_back(Symbol->getName());
+            else if (Symbol->isThreadLocalValue())
+              Section.TLVSymbols.emplace_back(Symbol->getName());
+            else
+              Section.Symbols.emplace_back(Symbol->getName());
+            break;
+          case SymbolKind::ObjectiveCClass:
+            if (File->getFileType() != FileType::TBD_V3)
+              Section.Classes.emplace_back(
+                  copyString("_" + Symbol->getName().str()));
+            else
+              Section.Classes.emplace_back(Symbol->getName());
+            break;
+          case SymbolKind::ObjectiveCClassEHType:
+            if (File->getFileType() != FileType::TBD_V3)
+              Section.Symbols.emplace_back(
+                  copyString("_OBJC_EHTYPE_$_" + Symbol->getName().str()));
+            else
+              Section.ClassEHs.emplace_back(Symbol->getName());
+            break;
+          case SymbolKind::ObjectiveCInstanceVariable:
+            if (File->getFileType() != FileType::TBD_V3)
+              Section.IVars.emplace_back(
+                  copyString("_" + Symbol->getName().str()));
+            else
+              Section.IVars.emplace_back(Symbol->getName());
+            break;
+          }
+        }
+        llvm::sort(Section.Symbols.begin(), Section.Symbols.end());
+        llvm::sort(Section.Classes.begin(), Section.Classes.end());
+        llvm::sort(Section.ClassEHs.begin(), Section.ClassEHs.end());
+        llvm::sort(Section.IVars.begin(), Section.IVars.end());
+        llvm::sort(Section.WeakDefSymbols.begin(),
+                   Section.WeakDefSymbols.end());
+        llvm::sort(Section.TLVSymbols.begin(), Section.TLVSymbols.end());
+        Exports.emplace_back(std::move(Section));
+      }
+
+      ArchSet.clear();
+      SymbolToArchSet.clear();
+
+      for (const auto *Symbol : File->undefineds()) {
+        auto Architectures = Symbol->getArchitectures();
+        SymbolToArchSet[Symbol] = Architectures;
+        ArchSet.insert(Architectures);
+      }
+
+      for (auto Architectures : ArchSet) {
+        UndefinedSection Section;
+        Section.Architectures = Architectures;
+
+        for (const auto &SymArch : SymbolToArchSet) {
+          if (SymArch.second != Architectures)
+            continue;
+
+          const auto *Symbol = SymArch.first;
+          switch (Symbol->getKind()) {
+          case SymbolKind::GlobalSymbol:
+            if (Symbol->isWeakReferenced())
+              Section.WeakRefSymbols.emplace_back(Symbol->getName());
+            else
+              Section.Symbols.emplace_back(Symbol->getName());
+            break;
+          case SymbolKind::ObjectiveCClass:
+            if (File->getFileType() != FileType::TBD_V3)
+              Section.Classes.emplace_back(
+                  copyString("_" + Symbol->getName().str()));
+            else
+              Section.Classes.emplace_back(Symbol->getName());
+            break;
+          case SymbolKind::ObjectiveCClassEHType:
+            if (File->getFileType() != FileType::TBD_V3)
+              Section.Symbols.emplace_back(
+                  copyString("_OBJC_EHTYPE_$_" + Symbol->getName().str()));
+            else
+              Section.ClassEHs.emplace_back(Symbol->getName());
+            break;
+          case SymbolKind::ObjectiveCInstanceVariable:
+            if (File->getFileType() != FileType::TBD_V3)
+              Section.IVars.emplace_back(
+                  copyString("_" + Symbol->getName().str()));
+            else
+              Section.IVars.emplace_back(Symbol->getName());
+            break;
+          }
+        }
+        llvm::sort(Section.Symbols.begin(), Section.Symbols.end());
+        llvm::sort(Section.Classes.begin(), Section.Classes.end());
+        llvm::sort(Section.ClassEHs.begin(), Section.ClassEHs.end());
+        llvm::sort(Section.IVars.begin(), Section.IVars.end());
+        llvm::sort(Section.WeakRefSymbols.begin(),
+                   Section.WeakRefSymbols.end());
+        Undefineds.emplace_back(std::move(Section));
+      }
+    }
+
+    const InterfaceFile *denormalize(IO &IO) {
+      auto Ctx = reinterpret_cast<TextAPIContext *>(IO.getContext());
+      assert(Ctx);
+
+      auto *File = new InterfaceFile;
+      File->setPath(Ctx->Path);
+      File->setFileType(Ctx->FileKind);
+      for (auto &ID : UUIDs)
+        File->addUUID(ID.first, ID.second);
+      File->setPlatform(Platform);
+      File->setArchitectures(Architectures);
+      File->setInstallName(InstallName);
+      File->setCurrentVersion(CurrentVersion);
+      File->setCompatibilityVersion(CompatibilityVersion);
+      File->setSwiftABIVersion(SwiftABIVersion);
+      File->setObjCConstraint(ObjCConstraint);
+      File->setParentUmbrella(ParentUmbrella);
+
+      if (Ctx->FileKind == FileType::TBD_V1) {
+        File->setTwoLevelNamespace();
+        File->setApplicationExtensionSafe();
+      } else {
+        File->setTwoLevelNamespace(!(Flags & TBDFlags::FlatNamespace));
+        File->setApplicationExtensionSafe(
+            !(Flags & TBDFlags::NotApplicationExtensionSafe));
+        File->setInstallAPI(Flags & TBDFlags::InstallAPI);
+      }
+
+      for (const auto &Section : Exports) {
+        for (const auto &Library : Section.AllowableClients)
+          File->addAllowableClient(Library, Section.Architectures);
+        for (const auto &Library : Section.ReexportedLibraries)
+          File->addReexportedLibrary(Library, Section.Architectures);
+
+        for (const auto &Symbol : Section.Symbols) {
+          if (Ctx->FileKind != FileType::TBD_V3 &&
+              Symbol.value.startswith("_OBJC_EHTYPE_$_"))
+            File->addSymbol(SymbolKind::ObjectiveCClassEHType,
+                            Symbol.value.drop_front(15), Section.Architectures);
+          else
+            File->addSymbol(SymbolKind::GlobalSymbol, Symbol,
+                            Section.Architectures);
+        }
+        for (auto &Symbol : Section.Classes) {
+          auto Name = Symbol.value;
+          if (Ctx->FileKind != FileType::TBD_V3)
+            Name = Name.drop_front();
+          File->addSymbol(SymbolKind::ObjectiveCClass, Name,
+                          Section.Architectures);
+        }
+        for (auto &Symbol : Section.ClassEHs)
+          File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol,
+                          Section.Architectures);
+        for (auto &Symbol : Section.IVars) {
+          auto Name = Symbol.value;
+          if (Ctx->FileKind != FileType::TBD_V3)
+            Name = Name.drop_front();
+          File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, Name,
+                          Section.Architectures);
+        }
+        for (auto &Symbol : Section.WeakDefSymbols)
+          File->addSymbol(SymbolKind::GlobalSymbol, Symbol,
+                          Section.Architectures, SymbolFlags::WeakDefined);
+        for (auto &Symbol : Section.TLVSymbols)
+          File->addSymbol(SymbolKind::GlobalSymbol, Symbol,
+                          Section.Architectures, SymbolFlags::ThreadLocalValue);
+      }
+
+      for (const auto &Section : Undefineds) {
+        for (auto &Symbol : Section.Symbols) {
+          if (Ctx->FileKind != FileType::TBD_V3 &&
+              Symbol.value.startswith("_OBJC_EHTYPE_$_"))
+            File->addSymbol(SymbolKind::ObjectiveCClassEHType,
+                            Symbol.value.drop_front(15), Section.Architectures,
+                            SymbolFlags::Undefined);
+          else
+            File->addSymbol(SymbolKind::GlobalSymbol, Symbol,
+                            Section.Architectures, SymbolFlags::Undefined);
+        }
+        for (auto &Symbol : Section.Classes) {
+          auto Name = Symbol.value;
+          if (Ctx->FileKind != FileType::TBD_V3)
+            Name = Name.drop_front();
+          File->addSymbol(SymbolKind::ObjectiveCClass, Name,
+                          Section.Architectures, SymbolFlags::Undefined);
+        }
+        for (auto &Symbol : Section.ClassEHs)
+          File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol,
+                          Section.Architectures, SymbolFlags::Undefined);
+        for (auto &Symbol : Section.IVars) {
+          auto Name = Symbol.value;
+          if (Ctx->FileKind != FileType::TBD_V3)
+            Name = Name.drop_front();
+          File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, Name,
+                          Section.Architectures, SymbolFlags::Undefined);
+        }
+        for (auto &Symbol : Section.WeakRefSymbols)
+          File->addSymbol(SymbolKind::GlobalSymbol, Symbol,
+                          Section.Architectures,
+                          SymbolFlags::Undefined | SymbolFlags::WeakReferenced);
+      }
+
+      return File;
+    }
+
+    llvm::BumpPtrAllocator Allocator;
+    StringRef copyString(StringRef String) {
+      if (String.empty())
+        return {};
+
+      void *Ptr = Allocator.Allocate(String.size(), 1);
+      memcpy(Ptr, String.data(), String.size());
+      return StringRef(reinterpret_cast<const char *>(Ptr), String.size());
+    }
+
+    std::vector<Architecture> Architectures;
+    std::vector<UUID> UUIDs;
+    PlatformKind Platform{PlatformKind::unknown};
+    StringRef InstallName;
+    PackedVersion CurrentVersion;
+    PackedVersion CompatibilityVersion;
+    SwiftVersion SwiftABIVersion{0};
+    ObjCConstraintType ObjCConstraint{ObjCConstraintType::None};
+    TBDFlags Flags{TBDFlags::None};
+    StringRef ParentUmbrella;
+    std::vector<ExportSection> Exports;
+    std::vector<UndefinedSection> Undefineds;
+  };
+
+  static void mapping(IO &IO, const InterfaceFile *&File) {
+    auto *Ctx = reinterpret_cast<TextAPIContext *>(IO.getContext());
+    assert((!Ctx || !IO.outputting() ||
+            (Ctx && Ctx->FileKind != FileType::Invalid)) &&
+           "File type is not set in YAML context");
+    MappingNormalization<NormalizedTBD, const InterfaceFile *> Keys(IO, File);
+
+    // prope file type when reading.
+    if (!IO.outputting()) {
+      if (IO.mapTag("!tapi-tbd-v2", false))
+        Ctx->FileKind = FileType::TBD_V2;
+      else if (IO.mapTag("!tapi-tbd-v3", false))
+        Ctx->FileKind = FileType::TBD_V2;
+      else if (IO.mapTag("!tapi-tbd-v1", false) ||
+               IO.mapTag("tag:yaml.org,2002:map", false))
+        Ctx->FileKind = FileType::TBD_V1;
+      else {
+        IO.setError("unsupported file type");
+        return;
+      }
+    }
+
+    // Set file tyoe when writing.
+    if (IO.outputting()) {
+      switch (Ctx->FileKind) {
+      default:
+        llvm_unreachable("unexpected file type");
+      case FileType::TBD_V1:
+        // Don't write the tag into the .tbd file for TBD v1.
+        break;
+      case FileType::TBD_V2:
+        IO.mapTag("!tapi-tbd-v2", true);
+        break;
+      case FileType::TBD_V3:
+        IO.mapTag("!tapi-tbd-v3", true);
+        break;
+      }
+    }
+
+    IO.mapRequired("archs", Keys->Architectures);
+    if (Ctx->FileKind != FileType::TBD_V1)
+      IO.mapOptional("uuids", Keys->UUIDs);
+    IO.mapRequired("platform", Keys->Platform);
+    if (Ctx->FileKind != FileType::TBD_V1)
+      IO.mapOptional("flags", Keys->Flags, TBDFlags::None);
+    IO.mapRequired("install-name", Keys->InstallName);
+    IO.mapOptional("current-version", Keys->CurrentVersion,
+                   PackedVersion(1, 0, 0));
+    IO.mapOptional("compatibility-version", Keys->CompatibilityVersion,
+                   PackedVersion(1, 0, 0));
+    if (Ctx->FileKind != FileType::TBD_V3)
+      IO.mapOptional("swift-version", Keys->SwiftABIVersion, SwiftVersion(0));
+    else
+      IO.mapOptional("swift-abi-version", Keys->SwiftABIVersion,
+                     SwiftVersion(0));
+    IO.mapOptional("objc-constraint", Keys->ObjCConstraint,
+                   (Ctx->FileKind == FileType::TBD_V1)
+                       ? ObjCConstraintType::None
+                       : ObjCConstraintType::Retain_Release);
+    if (Ctx->FileKind != FileType::TBD_V1)
+      IO.mapOptional("parent-umbrella", Keys->ParentUmbrella, StringRef());
+    IO.mapOptional("exports", Keys->Exports);
+    if (Ctx->FileKind != FileType::TBD_V1)
+      IO.mapOptional("undefineds", Keys->Undefineds);
+  }
+};
+
+template <>
+struct DocumentListTraits<std::vector<const MachO::InterfaceFile *>> {
+  static size_t size(IO &IO, std::vector<const MachO::InterfaceFile *> &Seq) {
+    return Seq.size();
+  }
+  static const InterfaceFile *&
+  element(IO &IO, std::vector<const InterfaceFile *> &Seq, size_t Index) {
+    if (Index >= Seq.size())
+      Seq.resize(Index + 1);
+    return Seq[Index];
+  }
+};
+
+} // end namespace yaml.
+
+namespace MachO {
+static void DiagHandler(const SMDiagnostic &Diag, void *Context) {
+  auto *File = static_cast<TextAPIContext *>(Context);
+  SmallString<1024> Message;
+  raw_svector_ostream S(Message);
+
+  SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), File->Path,
+                       Diag.getLineNo(), Diag.getColumnNo(), Diag.getKind(),
+                       Diag.getMessage(), Diag.getLineContents(),
+                       Diag.getRanges(), Diag.getFixIts());
+
+  NewDiag.print(nullptr, S);
+  File->ErrorMessage = ("malformed file\n" + Message).str();
+}
+
+Expected<std::unique_ptr<InterfaceFile>>
+TextAPIReader::get(std::unique_ptr<MemoryBuffer> InputBuffer) {
+  TextAPIContext Ctx;
+  Ctx.Path = InputBuffer->getBufferIdentifier();
+  yaml::Input YAMLIn(InputBuffer->getBuffer(), &Ctx, DiagHandler, &Ctx);
+
+  // Fill vector with interface file objects created by parsing the YAML file.
+  std::vector<const InterfaceFile *> Files;
+  YAMLIn >> Files;
+
+  auto File = std::unique_ptr<InterfaceFile>(
+      const_cast<InterfaceFile *>(Files.front()));
+
+  if (YAMLIn.error())
+    return make_error<StringError>(Ctx.ErrorMessage, YAMLIn.error());
+
+  return std::move(File);
+}
+
+Error TextAPIWriter::writeToStream(raw_ostream &OS, const InterfaceFile &File) {
+  TextAPIContext Ctx;
+  Ctx.Path = File.getPath();
+  Ctx.FileKind = File.getFileType();
+  llvm::yaml::Output YAMLOut(OS, &Ctx, /*WrapColumn=*/80);
+
+  std::vector<const InterfaceFile *> Files;
+  Files.emplace_back(&File);
+
+  // Stream out yaml.
+  YAMLOut << Files;
+
+  return Error::success();
+}
+
+} // end namespace MachO.
+} // end namespace llvm.
diff --git a/lib/TextAPI/MachO/TextStubCommon.cpp b/lib/TextAPI/MachO/TextStubCommon.cpp
new file mode 100644
index 000000000000..00382cd24573
--- /dev/null
+++ b/lib/TextAPI/MachO/TextStubCommon.cpp
@@ -0,0 +1,178 @@
+//===- TextStubCommon.cpp -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implememts common Text Stub YAML mappings.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TextStubCommon.h"
+#include "TextAPIContext.h"
+
+using namespace llvm::MachO;
+
+namespace llvm {
+namespace yaml {
+
+void ScalarTraits<FlowStringRef>::output(const FlowStringRef &Value, void *Ctx,
+                                         raw_ostream &OS) {
+  ScalarTraits<StringRef>::output(Value, Ctx, OS);
+}
+StringRef ScalarTraits<FlowStringRef>::input(StringRef Value, void *Ctx,
+                                             FlowStringRef &Out) {
+  return ScalarTraits<StringRef>::input(Value, Ctx, Out.value);
+}
+QuotingType ScalarTraits<FlowStringRef>::mustQuote(StringRef Name) {
+  return ScalarTraits<StringRef>::mustQuote(Name);
+}
+
+void ScalarEnumerationTraits<ObjCConstraintType>::enumeration(
+    IO &IO, ObjCConstraintType &Constraint) {
+  IO.enumCase(Constraint, "none", ObjCConstraintType::None);
+  IO.enumCase(Constraint, "retain_release", ObjCConstraintType::Retain_Release);
+  IO.enumCase(Constraint, "retain_release_for_simulator",
+              ObjCConstraintType::Retain_Release_For_Simulator);
+  IO.enumCase(Constraint, "retain_release_or_gc",
+              ObjCConstraintType::Retain_Release_Or_GC);
+  IO.enumCase(Constraint, "gc", ObjCConstraintType::GC);
+}
+
+void ScalarTraits<PlatformKind>::output(const PlatformKind &Value, void *,
+                                        raw_ostream &OS) {
+  switch (Value) {
+  default:
+    llvm_unreachable("unexpected platform");
+    break;
+  case PlatformKind::macOS:
+    OS << "macosx";
+    break;
+  case PlatformKind::iOS:
+    OS << "ios";
+    break;
+  case PlatformKind::watchOS:
+    OS << "watchos";
+    break;
+  case PlatformKind::tvOS:
+    OS << "tvos";
+    break;
+  case PlatformKind::bridgeOS:
+    OS << "bridgeos";
+    break;
+  }
+}
+StringRef ScalarTraits<PlatformKind>::input(StringRef Scalar, void *,
+                                            PlatformKind &Value) {
+  Value = StringSwitch<PlatformKind>(Scalar)
+              .Case("macosx", PlatformKind::macOS)
+              .Case("ios", PlatformKind::iOS)
+              .Case("watchos", PlatformKind::watchOS)
+              .Case("tvos", PlatformKind::tvOS)
+              .Case("bridgeos", PlatformKind::bridgeOS)
+              .Default(PlatformKind::unknown);
+
+  if (Value == PlatformKind::unknown)
+    return "unknown platform";
+  return {};
+}
+QuotingType ScalarTraits<PlatformKind>::mustQuote(StringRef) {
+  return QuotingType::None;
+}
+
+void ScalarBitSetTraits<ArchitectureSet>::bitset(IO &IO,
+                                                 ArchitectureSet &Archs) {
+#define ARCHINFO(arch, type, subtype)                                          \
+  IO.bitSetCase(Archs, #arch, 1U << static_cast<int>(AK_##arch));
+#include "llvm/TextAPI/MachO/Architecture.def"
+#undef ARCHINFO
+}
+
+void ScalarTraits<Architecture>::output(const Architecture &Value, void *,
+                                        raw_ostream &OS) {
+  OS << Value;
+}
+StringRef ScalarTraits<Architecture>::input(StringRef Scalar, void *,
+                                            Architecture &Value) {
+  Value = getArchitectureFromName(Scalar);
+  return {};
+}
+QuotingType ScalarTraits<Architecture>::mustQuote(StringRef) {
+  return QuotingType::None;
+}
+
+void ScalarTraits<PackedVersion>::output(const PackedVersion &Value, void *,
+                                         raw_ostream &OS) {
+  OS << Value;
+}
+StringRef ScalarTraits<PackedVersion>::input(StringRef Scalar, void *,
+                                             PackedVersion &Value) {
+  if (!Value.parse32(Scalar))
+    return "invalid packed version string.";
+  return {};
+}
+QuotingType ScalarTraits<PackedVersion>::mustQuote(StringRef) {
+  return QuotingType::None;
+}
+
+void ScalarTraits<SwiftVersion>::output(const SwiftVersion &Value, void *,
+                                        raw_ostream &OS) {
+  switch (Value) {
+  case 1:
+    OS << "1.0";
+    break;
+  case 2:
+    OS << "1.1";
+    break;
+  case 3:
+    OS << "2.0";
+    break;
+  case 4:
+    OS << "3.0";
+    break;
+  default:
+    OS << (unsigned)Value;
+    break;
+  }
+}
+StringRef ScalarTraits<SwiftVersion>::input(StringRef Scalar, void *,
+                                            SwiftVersion &Value) {
+  Value = StringSwitch<SwiftVersion>(Scalar)
+              .Case("1.0", 1)
+              .Case("1.1", 2)
+              .Case("2.0", 3)
+              .Case("3.0", 4)
+              .Default(0);
+  if (Value != SwiftVersion(0))
+    return {};
+
+  if (Scalar.getAsInteger(10, Value))
+    return "invalid Swift ABI version.";
+
+  return StringRef();
+}
+QuotingType ScalarTraits<SwiftVersion>::mustQuote(StringRef) {
+  return QuotingType::None;
+}
+
+void ScalarTraits<UUID>::output(const UUID &Value, void *, raw_ostream &OS) {
+  OS << Value.first << ": " << Value.second;
+}
+StringRef ScalarTraits<UUID>::input(StringRef Scalar, void *, UUID &Value) {
+  auto Split = Scalar.split(':');
+  auto Arch = Split.first.trim();
+  auto UUID = Split.second.trim();
+  if (UUID.empty())
+    return "invalid uuid string pair";
+  Value.first = getArchitectureFromName(Arch);
+  Value.second = UUID;
+  return {};
+}
+QuotingType ScalarTraits<UUID>::mustQuote(StringRef) {
+  return QuotingType::Single;
+}
+
+} // end namespace yaml.
+} // end namespace llvm.
diff --git a/lib/TextAPI/MachO/TextStubCommon.h b/lib/TextAPI/MachO/TextStubCommon.h
new file mode 100644
index 000000000000..c4dd1075b1c8
--- /dev/null
+++ b/lib/TextAPI/MachO/TextStubCommon.h
@@ -0,0 +1,81 @@
+//===- TextStubCommon.h ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines common Text Stub YAML mappings.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TEXTAPI_TEXT_STUB_COMMON_H
+#define LLVM_TEXTAPI_TEXT_STUB_COMMON_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/TextAPI/MachO/Architecture.h"
+#include "llvm/TextAPI/MachO/ArchitectureSet.h"
+#include "llvm/TextAPI/MachO/InterfaceFile.h"
+#include "llvm/TextAPI/MachO/PackedVersion.h"
+
+using UUID = std::pair<llvm::MachO::Architecture, std::string>;
+
+LLVM_YAML_STRONG_TYPEDEF(llvm::StringRef, FlowStringRef)
+LLVM_YAML_STRONG_TYPEDEF(uint8_t, SwiftVersion)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(UUID)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(FlowStringRef)
+
+namespace llvm {
+namespace yaml {
+
+template <> struct ScalarTraits<FlowStringRef> {
+  static void output(const FlowStringRef &, void *, raw_ostream &);
+  static StringRef input(StringRef, void *, FlowStringRef &);
+  static QuotingType mustQuote(StringRef);
+};
+
+template <> struct ScalarEnumerationTraits<MachO::ObjCConstraintType> {
+  static void enumeration(IO &, MachO::ObjCConstraintType &);
+};
+
+template <> struct ScalarTraits<MachO::PlatformKind> {
+  static void output(const MachO::PlatformKind &, void *, raw_ostream &);
+  static StringRef input(StringRef, void *, MachO::PlatformKind &);
+  static QuotingType mustQuote(StringRef);
+};
+
+template <> struct ScalarBitSetTraits<MachO::ArchitectureSet> {
+  static void bitset(IO &, MachO::ArchitectureSet &);
+};
+
+template <> struct ScalarTraits<MachO::Architecture> {
+  static void output(const MachO::Architecture &, void *, raw_ostream &);
+  static StringRef input(StringRef, void *, MachO::Architecture &);
+  static QuotingType mustQuote(StringRef);
+};
+
+template <> struct ScalarTraits<MachO::PackedVersion> {
+  static void output(const MachO::PackedVersion &, void *, raw_ostream &);
+  static StringRef input(StringRef, void *, MachO::PackedVersion &);
+  static QuotingType mustQuote(StringRef);
+};
+
+template <> struct ScalarTraits<SwiftVersion> {
+  static void output(const SwiftVersion &, void *, raw_ostream &);
+  static StringRef input(StringRef, void *, SwiftVersion &);
+  static QuotingType mustQuote(StringRef);
+};
+
+template <> struct ScalarTraits<UUID> {
+  static void output(const UUID &, void *, raw_ostream &);
+  static StringRef input(StringRef, void *, UUID &);
+  static QuotingType mustQuote(StringRef);
+};
+
+} // end namespace yaml.
+} // end namespace llvm.
+
+#endif // LLVM_TEXTAPI_TEXT_STUB_COMMON_H
diff --git a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
index c5a28d4f1c08..0b406cc531a4 100644
--- a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
+++ b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
@@ -1,9 +1,8 @@
 //===- DlltoolDriver.cpp - dlltool.exe-compatible driver ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -109,7 +108,8 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
   }
 
   for (auto *Arg : Args.filtered(OPT_UNKNOWN))
-    llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n";
+    llvm::errs() << "ignoring unknown argument: " << Arg->getAsString(Args)
+                 << "\n";
 
   if (!Args.hasArg(OPT_d)) {
     llvm::errs() << "no definition file specified\n";
diff --git a/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/lib/ToolDrivers/llvm-lib/LibDriver.cpp
index 64f4fe423f25..18ab6637305e 100644
--- a/lib/ToolDrivers/llvm-lib/LibDriver.cpp
+++ b/lib/ToolDrivers/llvm-lib/LibDriver.cpp
@@ -1,9 +1,8 @@
 //===- LibDriver.cpp - lib.exe-compatible driver --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,8 +13,12 @@
 
 #include "llvm/ToolDrivers/llvm-lib/LibDriver.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/BinaryFormat/Magic.h"
+#include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/Object/ArchiveWriter.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/WindowsMachineFlag.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/Option.h"
@@ -97,6 +100,47 @@ static std::string findInputFile(StringRef File, ArrayRef<StringRef> Paths) {
   return "";
 }
 
+static void fatalOpenError(llvm::Error E, Twine File) {
+  if (!E)
+    return;
+  handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EIB) {
+    llvm::errs() << "error opening '" << File << "': " << EIB.message() << '\n';
+    exit(1);
+  });
+}
+
+static void doList(opt::InputArgList& Args) {
+  // lib.exe prints the contents of the first archive file.
+  std::unique_ptr<MemoryBuffer> B;
+  for (auto *Arg : Args.filtered(OPT_INPUT)) {
+    // Create or open the archive object.
+    ErrorOr<std::unique_ptr<MemoryBuffer>> MaybeBuf =
+        MemoryBuffer::getFile(Arg->getValue(), -1, false);
+    fatalOpenError(errorCodeToError(MaybeBuf.getError()), Arg->getValue());
+
+    if (identify_magic(MaybeBuf.get()->getBuffer()) == file_magic::archive) {
+      B = std::move(MaybeBuf.get());
+      break;
+    }
+  }
+
+  // lib.exe doesn't print an error if no .lib files are passed.
+  if (!B)
+    return;
+
+  Error Err = Error::success();
+  object::Archive Archive(B.get()->getMemBufferRef(), Err);
+  fatalOpenError(std::move(Err), B->getBufferIdentifier());
+
+  for (auto &C : Archive.children(Err)) {
+    Expected<StringRef> NameOrErr = C.getName();
+    fatalOpenError(NameOrErr.takeError(), B->getBufferIdentifier());
+    StringRef Name = NameOrErr.get();
+    llvm::outs() << Name << '\n';
+  }
+  fatalOpenError(std::move(Err), B->getBufferIdentifier());
+}
+
 int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
   BumpPtrAllocator Alloc;
   StringSaver Saver(Alloc);
@@ -119,7 +163,8 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
     return 1;
   }
   for (auto *Arg : Args.filtered(OPT_UNKNOWN))
-    llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n";
+    llvm::errs() << "ignoring unknown argument: " << Arg->getAsString(Args)
+                 << "\n";
 
   // Handle /help
   if (Args.hasArg(OPT_help)) {
@@ -131,8 +176,25 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
   if (!Args.hasArgNoClaim(OPT_INPUT))
     return 0;
 
+  if (Args.hasArg(OPT_lst)) {
+    doList(Args);
+    return 0;
+  }
+
   std::vector<StringRef> SearchPaths = getSearchPaths(&Args, Saver);
 
+  COFF::MachineTypes LibMachine = COFF::IMAGE_FILE_MACHINE_UNKNOWN;
+  std::string LibMachineSource;
+  if (auto *Arg = Args.getLastArg(OPT_machine)) {
+    LibMachine = getMachineType(Arg->getValue());
+    if (LibMachine == COFF::IMAGE_FILE_MACHINE_UNKNOWN) {
+      llvm::errs() << "unknown /machine: arg " << Arg->getValue() << '\n';
+      return 1;
+    }
+    LibMachineSource =
+        std::string(" (from '/machine:") + Arg->getValue() + "' flag)";
+  }
+
   // Create a NewArchiveMember for each input file.
   std::vector<NewArchiveMember> Members;
   for (auto *Arg : Args.filtered(OPT_INPUT)) {
@@ -158,11 +220,95 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
                    << ": not a COFF object, bitcode or resource file\n";
       return 1;
     }
+
+    // Check that all input files have the same machine type.
+    // Mixing normal objects and LTO bitcode files is fine as long as they
+    // have the same machine type.
+    // Doing this here duplicates the header parsing work that writeArchive()
+    // below does, but it's not a lot of work and it's a bit awkward to do
+    // in writeArchive() which needs to support many tools, can't assume the
+    // input is COFF, and doesn't have a good way to report errors.
+    COFF::MachineTypes FileMachine = COFF::IMAGE_FILE_MACHINE_UNKNOWN;
+    if (Magic == file_magic::coff_object) {
+      std::error_code EC;
+      object::COFFObjectFile Obj(*MOrErr->Buf, EC);
+      if (EC) {
+        llvm::errs() << Arg->getValue() << ": failed to open: " << EC.message()
+                     << '\n';
+        return 1;
+      }
+      uint16_t Machine = Obj.getMachine();
+      if (Machine != COFF::IMAGE_FILE_MACHINE_I386 &&
+          Machine != COFF::IMAGE_FILE_MACHINE_AMD64 &&
+          Machine != COFF::IMAGE_FILE_MACHINE_ARMNT &&
+          Machine != COFF::IMAGE_FILE_MACHINE_ARM64) {
+        llvm::errs() << Arg->getValue() << ": unknown machine: " << Machine
+                     << '\n';
+        return 1;
+      }
+      FileMachine = static_cast<COFF::MachineTypes>(Machine);
+    } else if (Magic == file_magic::bitcode) {
+      Expected<std::string> TripleStr = getBitcodeTargetTriple(*MOrErr->Buf);
+      if (!TripleStr) {
+        llvm::errs() << Arg->getValue()
+                     << ": failed to get target triple from bitcode\n";
+        return 1;
+      }
+      switch (Triple(*TripleStr).getArch()) {
+      case Triple::x86:
+        FileMachine = COFF::IMAGE_FILE_MACHINE_I386;
+        break;
+      case Triple::x86_64:
+        FileMachine = COFF::IMAGE_FILE_MACHINE_AMD64;
+        break;
+      case Triple::arm:
+        FileMachine = COFF::IMAGE_FILE_MACHINE_ARMNT;
+        break;
+      case Triple::aarch64:
+        FileMachine = COFF::IMAGE_FILE_MACHINE_ARM64;
+        break;
+      default:
+        llvm::errs() << Arg->getValue() << ": unknown arch in target triple "
+                     << *TripleStr << '\n';
+        return 1;
+      }
+    }
+
+    // FIXME: Once lld-link rejects multiple resource .obj files:
+    // Call convertResToCOFF() on .res files and add the resulting
+    // COFF file to the .lib output instead of adding the .res file, and remove
+    // this check. See PR42180.
+    if (FileMachine != COFF::IMAGE_FILE_MACHINE_UNKNOWN) {
+      if (LibMachine == COFF::IMAGE_FILE_MACHINE_UNKNOWN) {
+        LibMachine = FileMachine;
+        LibMachineSource = std::string(" (inferred from earlier file '") +
+                           Arg->getValue() + "')";
+      } else if (LibMachine != FileMachine) {
+        llvm::errs() << Arg->getValue() << ": file machine type "
+                     << machineToStr(FileMachine)
+                     << " conflicts with library machine type "
+                     << machineToStr(LibMachine) << LibMachineSource << '\n';
+        return 1;
+      }
+    }
+
     Members.emplace_back(std::move(*MOrErr));
   }
 
   // Create an archive file.
   std::string OutputPath = getOutputPath(&Args, Members[0]);
+  // llvm-lib uses relative paths for both regular and thin archives, unlike
+  // standard GNU ar, which only uses relative paths for thin archives and
+  // basenames for regular archives.
+  for (NewArchiveMember &Member : Members) {
+    if (sys::path::is_relative(Member.MemberName)) {
+      Expected<std::string> PathOrErr =
+          computeArchiveRelativePath(OutputPath, Member.MemberName);
+      if (PathOrErr)
+        Member.MemberName = Saver.save(*PathOrErr);
+    }
+  }
+
   if (Error E =
           writeArchive(OutputPath, Members,
                        /*WriteSymtab=*/true, object::Archive::K_GNU,
diff --git a/lib/ToolDrivers/llvm-lib/Options.td b/lib/ToolDrivers/llvm-lib/Options.td
index dd41952b7878..7863196126a8 100644
--- a/lib/ToolDrivers/llvm-lib/Options.td
+++ b/lib/ToolDrivers/llvm-lib/Options.td
@@ -3,27 +3,33 @@ include "llvm/Option/OptParser.td"
 // lib.exe accepts options starting with either a dash or a slash.
 
 // Flag that takes no arguments.
-class F<string name> : Flag<["/", "-", "-?"], name>;
+class F<string name> : Flag<["/", "-", "/?", "-?"], name>;
 
 // Flag that takes one argument after ":".
 class P<string name, string help> :
-      Joined<["/", "-", "-?"], name#":">, HelpText<help>;
+      Joined<["/", "-", "/?", "-?"], name#":">, HelpText<help>;
 
 def libpath: P<"libpath", "Object file search path">;
+
+// Can't be called "list" since that's a keyword.
+def lst    : F<"list">, HelpText<"List contents of .lib file on stdout">;
 def out    : P<"out", "Path to file to write output">;
 
 def llvmlibthin : F<"llvmlibthin">,
     HelpText<"Make .lib point to .obj files instead of copying their contents">;
 
+def machine: P<"machine", "Specify target platform">;
+
 def help : F<"help">;
-def help_q : Flag<["/?", "-?"], "">, Alias<help>;
+
+// /?? and -?? must be before /? and -? to not confuse lib/Options.
+def help_q : Flag<["/??", "-??", "/?", "-?"], "">, Alias<help>;
 
 //==============================================================================
 // The flags below do nothing. They are defined only for lib.exe compatibility.
 //==============================================================================
 
-class QF<string name> : Joined<["/", "-", "-?"], name#":">;
+class QF<string name> : Joined<["/", "-", "/?", "-?"], name#":">;
 
 def ignore : QF<"ignore">;
-def machine: QF<"machine">;
 def nologo : F<"nologo">;
diff --git a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index c795866ec0f2..06222d7e7e44 100644
--- a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -1,9 +1,8 @@
 //===- AggressiveInstCombine.cpp ------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
index f3c8bde9f8ff..44e1c45664e7 100644
--- a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
+++ b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
@@ -1,9 +1,8 @@
 //===- AggressiveInstCombineInternal.h --------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
index 8289b2d68f8a..7c5767912fd3 100644
--- a/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
+++ b/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
@@ -1,9 +1,8 @@
 //===- TruncInstCombine.cpp -----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Coroutines/CoroCleanup.cpp b/lib/Transforms/Coroutines/CoroCleanup.cpp
index 359876627fce..1fb0a114d0c7 100644
--- a/lib/Transforms/Coroutines/CoroCleanup.cpp
+++ b/lib/Transforms/Coroutines/CoroCleanup.cpp
@@ -1,9 +1,8 @@
 //===- CoroCleanup.cpp - Coroutine Cleanup Pass ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This pass lowers all remaining coroutine intrinsics.
@@ -50,7 +49,7 @@ static void lowerSubFn(IRBuilder<> &Builder, CoroSubFnInst *SubFn) {
   Builder.SetInsertPoint(SubFn);
   auto *FramePtr = Builder.CreateBitCast(FrameRaw, FramePtrTy);
   auto *Gep = Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0, Index);
-  auto *Load = Builder.CreateLoad(Gep);
+  auto *Load = Builder.CreateLoad(FrameTy->getElementType(Index), Gep);
 
   SubFn->replaceAllUsesWith(Load);
 }
diff --git a/lib/Transforms/Coroutines/CoroEarly.cpp b/lib/Transforms/Coroutines/CoroEarly.cpp
index ac47a06281a5..692697d6f32e 100644
--- a/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -1,9 +1,8 @@
 //===- CoroEarly.cpp - Coroutine Early Function Pass ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This pass lowers coroutine intrinsics that hide the details of the exact
@@ -98,7 +97,7 @@ void Lowerer::lowerCoroDone(IntrinsicInst *II) {
   Builder.SetInsertPoint(II);
   auto *BCI = Builder.CreateBitCast(Operand, FramePtrTy);
   auto *Gep = Builder.CreateConstInBoundsGEP1_32(FrameTy, BCI, 0);
-  auto *Load = Builder.CreateLoad(Gep);
+  auto *Load = Builder.CreateLoad(FrameTy, Gep);
   auto *Cond = Builder.CreateICmpEQ(Load, NullPtr);
 
   II->replaceAllUsesWith(Cond);
@@ -114,7 +113,7 @@ void Lowerer::lowerCoroNoop(IntrinsicInst *II) {
     StructType *FrameTy = StructType::create(C, "NoopCoro.Frame");
     auto *FramePtrTy = FrameTy->getPointerTo();
     auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy,
-                                   /*IsVarArgs=*/false);
+                                   /*isVarArg=*/false);
     auto *FnPtrTy = FnTy->getPointerTo();
     FrameTy->setBody({FnPtrTy, FnPtrTy});
 
diff --git a/lib/Transforms/Coroutines/CoroElide.cpp b/lib/Transforms/Coroutines/CoroElide.cpp
index 58f952b54f3a..6707aa1c827d 100644
--- a/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/lib/Transforms/Coroutines/CoroElide.cpp
@@ -1,9 +1,8 @@
 //===- CoroElide.cpp - Coroutine Frame Allocation Elision Pass ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This pass replaces dynamic allocation of coroutine frame with alloca and
diff --git a/lib/Transforms/Coroutines/CoroFrame.cpp b/lib/Transforms/Coroutines/CoroFrame.cpp
index 4cb0a52961cc..58bf22bee29b 100644
--- a/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -1,9 +1,8 @@
 //===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This file contains classes used to discover if for a particular value
@@ -53,7 +52,7 @@ public:
   }
 
   size_t blockToIndex(BasicBlock *BB) const {
-    auto *I = std::lower_bound(V.begin(), V.end(), BB);
+    auto *I = llvm::lower_bound(V, BB);
     assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
     return I - V.begin();
   }
@@ -379,7 +378,7 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
   StructType *FrameTy = StructType::create(C, Name);
   auto *FramePtrTy = FrameTy->getPointerTo();
   auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy,
-                                 /*IsVarArgs=*/false);
+                                 /*isVarArg=*/false);
   auto *FnPtrTy = FnTy->getPointerTo();
 
   // Figure out how wide should be an integer type storing the suspend index.
@@ -403,6 +402,7 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
     if (CurrentDef == Shape.PromiseAlloca)
       continue;
 
+    uint64_t Count = 1;
     Type *Ty = nullptr;
     if (auto *AI = dyn_cast<AllocaInst>(CurrentDef)) {
       Ty = AI->getAllocatedType();
@@ -414,11 +414,18 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
           Padder.addType(PaddingTy);
         }
       }
+      if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
+        Count = CI->getValue().getZExtValue();
+      else
+        report_fatal_error("Coroutines cannot handle non static allocas yet");
     } else {
       Ty = CurrentDef->getType();
     }
     S.setFieldIndex(Types.size());
-    Types.push_back(Ty);
+    if (Count == 1)
+      Types.push_back(Ty);
+    else
+      Types.push_back(ArrayType::get(Ty, Count));
     Padder.addType(Ty);
   }
   FrameTy->setBody(Types);
@@ -471,11 +478,12 @@ static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
 //
 static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
   auto *CB = Shape.CoroBegin;
+  LLVMContext &C = CB->getContext();
   IRBuilder<> Builder(CB->getNextNode());
-  PointerType *FramePtrTy = Shape.FrameTy->getPointerTo();
+  StructType *FrameTy = Shape.FrameTy;
+  PointerType *FramePtrTy = FrameTy->getPointerTo();
   auto *FramePtr =
       cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));
-  Type *FrameTy = FramePtrTy->getElementType();
 
   Value *CurrentValue = nullptr;
   BasicBlock *CurrentBlock = nullptr;
@@ -492,17 +500,41 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
   if (Shape.PromiseAlloca)
     Allocas.emplace_back(Shape.PromiseAlloca, coro::Shape::PromiseField);
 
+  // Create a GEP with the given index into the coroutine frame for the original
+  // value Orig. Appends an extra 0 index for array-allocas, preserving the
+  // original type.
+  auto GetFramePointer = [&](uint32_t Index, Value *Orig) -> Value * {
+    SmallVector<Value *, 3> Indices = {
+        ConstantInt::get(Type::getInt32Ty(C), 0),
+        ConstantInt::get(Type::getInt32Ty(C), Index),
+    };
+
+    if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
+      if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+        auto Count = CI->getValue().getZExtValue();
+        if (Count > 1) {
+          Indices.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
+        }
+      } else {
+        report_fatal_error("Coroutines cannot handle non static allocas yet");
+      }
+    }
+
+    return Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices);
+  };
+
   // Create a load instruction to reload the spilled value from the coroutine
   // frame.
   auto CreateReload = [&](Instruction *InsertBefore) {
     assert(Index && "accessing unassigned field number");
     Builder.SetInsertPoint(InsertBefore);
-    auto *G = Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0, Index,
-                                                 CurrentValue->getName() +
-                                                     Twine(".reload.addr"));
+
+    auto *G = GetFramePointer(Index, CurrentValue);
+    G->setName(CurrentValue->getName() + Twine(".reload.addr"));
+
     return isa<AllocaInst>(CurrentValue)
                ? G
-               : Builder.CreateLoad(G,
+               : Builder.CreateLoad(FrameTy->getElementType(Index), G,
                                     CurrentValue->getName() + Twine(".reload"));
   };
 
@@ -589,8 +621,8 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
   Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());
   // If we found any allocas, replace all of their remaining uses with Geps.
   for (auto &P : Allocas) {
-    auto *G =
-        Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0, P.second);
+    auto *G = GetFramePointer(P.second, P.first);
+
     // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G)) here,
     // as we are changing location of the instruction.
     G->takeName(P.first);
diff --git a/lib/Transforms/Coroutines/CoroInstr.h b/lib/Transforms/Coroutines/CoroInstr.h
index 9a8cc5a2591c..5e19d7642e38 100644
--- a/lib/Transforms/Coroutines/CoroInstr.h
+++ b/lib/Transforms/Coroutines/CoroInstr.h
@@ -1,9 +1,8 @@
 //===-- CoroInstr.h - Coroutine Intrinsics Instruction Wrappers -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This file defines classes that make it really easy to deal with intrinsic
diff --git a/lib/Transforms/Coroutines/CoroInternal.h b/lib/Transforms/Coroutines/CoroInternal.h
index 8e690d649cf5..441c8a20f1f3 100644
--- a/lib/Transforms/Coroutines/CoroInternal.h
+++ b/lib/Transforms/Coroutines/CoroInternal.h
@@ -1,9 +1,8 @@
 //===- CoroInternal.h - Internal Coroutine interfaces ---------*- C++ -*---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Common definitions/declarations used internally by coroutine lowering passes.
diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp
index 9eeceb217ba8..5458e70ff16a 100644
--- a/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -1,9 +1,8 @@
 //===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This pass builds the coroutine frame and outlines resume and destroy parts
@@ -94,7 +93,7 @@ static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) {
   auto *FrameTy = Shape.FrameTy;
   auto *GepIndex = Builder.CreateConstInBoundsGEP2_32(
       FrameTy, FramePtr, 0, coro::Shape::IndexField, "index.addr");
-  auto *Index = Builder.CreateLoad(GepIndex, "index");
+  auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index");
   auto *Switch =
       Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size());
   Shape.ResumeSwitch = Switch;
@@ -230,7 +229,8 @@ static void handleFinalSuspend(IRBuilder<> &Builder, Value *FramePtr,
     Builder.SetInsertPoint(OldSwitchBB->getTerminator());
     auto *GepIndex = Builder.CreateConstInBoundsGEP2_32(Shape.FrameTy, FramePtr,
                                                         0, 0, "ResumeFn.addr");
-    auto *Load = Builder.CreateLoad(GepIndex);
+    auto *Load = Builder.CreateLoad(
+        Shape.FrameTy->getElementType(coro::Shape::ResumeField), GepIndex);
     auto *NullPtr =
         ConstantPointerNull::get(cast<PointerType>(Load->getType()));
     auto *Cond = Builder.CreateICmpEQ(Load, NullPtr);
@@ -777,6 +777,8 @@ static void relocateInstructionBefore(CoroBeginInst *CoroBegin, Function &F) {
 }
 
 static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
+  EliminateUnreachableBlocks(F);
+
   coro::Shape Shape(F);
   if (!Shape.CoroBegin)
     return;
@@ -828,6 +830,7 @@ static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
 // split.
 static void prepareForSplit(Function &F, CallGraph &CG) {
   Module &M = *F.getParent();
+  LLVMContext &Context = F.getContext();
 #ifndef NDEBUG
   Function *DevirtFn = M.getFunction(CORO_DEVIRT_TRIGGER_FN);
   assert(DevirtFn && "coro.devirt.trigger function not found");
@@ -842,10 +845,12 @@ static void prepareForSplit(Function &F, CallGraph &CG) {
   //    call void %1(i8* null)
   coro::LowererBase Lowerer(M);
   Instruction *InsertPt = F.getEntryBlock().getTerminator();
-  auto *Null = ConstantPointerNull::get(Type::getInt8PtrTy(F.getContext()));
+  auto *Null = ConstantPointerNull::get(Type::getInt8PtrTy(Context));
   auto *DevirtFnAddr =
       Lowerer.makeSubFnCall(Null, CoroSubFnInst::RestartTrigger, InsertPt);
-  auto *IndirectCall = CallInst::Create(DevirtFnAddr, Null, "", InsertPt);
+  FunctionType *FnTy = FunctionType::get(Type::getVoidTy(Context),
+                                         {Type::getInt8PtrTy(Context)}, false);
+  auto *IndirectCall = CallInst::Create(FnTy, DevirtFnAddr, Null, "", InsertPt);
 
   // Update CG graph with an indirect call we just added.
   CG[&F]->addCalledFunction(IndirectCall, CG.getCallsExternalNode());
@@ -861,7 +866,7 @@ static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) {
 
   LLVMContext &C = M.getContext();
   auto *FnTy = FunctionType::get(Type::getVoidTy(C), Type::getInt8PtrTy(C),
-                                 /*IsVarArgs=*/false);
+                                 /*isVarArg=*/false);
   Function *DevirtFn =
       Function::Create(FnTy, GlobalValue::LinkageTypes::PrivateLinkage,
                        CORO_DEVIRT_TRIGGER_FN, &M);
@@ -941,7 +946,12 @@ struct CoroSplit : public CallGraphSCCPass {
 
 char CoroSplit::ID = 0;
 
-INITIALIZE_PASS(
+INITIALIZE_PASS_BEGIN(
+    CoroSplit, "coro-split",
+    "Split coroutine into a set of functions driving its state machine", false,
+    false)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_END(
     CoroSplit, "coro-split",
     "Split coroutine into a set of functions driving its state machine", false,
     false)
diff --git a/lib/Transforms/Coroutines/Coroutines.cpp b/lib/Transforms/Coroutines/Coroutines.cpp
index cf84f916e24b..a581d1d21169 100644
--- a/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/lib/Transforms/Coroutines/Coroutines.cpp
@@ -1,9 +1,8 @@
 //===- Coroutines.cpp -----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -177,15 +176,15 @@ static void buildCGN(CallGraph &CG, CallGraphNode *Node) {
 
   // Look for calls by this function.
   for (Instruction &I : instructions(F))
-    if (CallSite CS = CallSite(cast<Value>(&I))) {
-      const Function *Callee = CS.getCalledFunction();
+    if (auto *Call = dyn_cast<CallBase>(&I)) {
+      const Function *Callee = Call->getCalledFunction();
       if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID()))
         // Indirect calls of intrinsics are not allowed so no need to check.
         // We can be more precise here by using TargetArg returned by
         // Intrinsic::isLeaf.
-        Node->addCalledFunction(CS, CG.getCallsExternalNode());
+        Node->addCalledFunction(Call, CG.getCallsExternalNode());
       else if (!Callee->isIntrinsic())
-        Node->addCalledFunction(CS, CG.getOrInsertFunction(Callee));
+        Node->addCalledFunction(Call, CG.getOrInsertFunction(Callee));
     }
 }
 
diff --git a/lib/Transforms/IPO/AlwaysInliner.cpp b/lib/Transforms/IPO/AlwaysInliner.cpp
index 07138718ce2c..c50805692b98 100644
--- a/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -1,9 +1,8 @@
 //===- InlineAlways.cpp - Code to inline always_inline functions ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -32,8 +31,17 @@ using namespace llvm;
 
 #define DEBUG_TYPE "inline"
 
-PreservedAnalyses AlwaysInlinerPass::run(Module &M, ModuleAnalysisManager &) {
-  InlineFunctionInfo IFI;
+PreservedAnalyses AlwaysInlinerPass::run(Module &M,
+                                         ModuleAnalysisManager &MAM) {
+  // Add inline assumptions during code generation.
+  FunctionAnalysisManager &FAM =
+      MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
+      [&](Function &F) -> AssumptionCache & {
+    return FAM.getResult<AssumptionAnalysis>(F);
+  };
+  InlineFunctionInfo IFI(/*cg=*/nullptr, &GetAssumptionCache);
+
   SmallSetVector<CallSite, 16> Calls;
   bool Changed = false;
   SmallVector<Function *, 16> InlinedFunctions;
@@ -146,11 +154,20 @@ InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallSite CS) {
   Function *Callee = CS.getCalledFunction();
 
   // Only inline direct calls to functions with always-inline attributes
-  // that are viable for inlining. FIXME: We shouldn't even get here for
-  // declarations.
-  if (Callee && !Callee->isDeclaration() &&
-      CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee))
-    return InlineCost::getAlways("always inliner");
+  // that are viable for inlining.
+  if (!Callee)
+    return InlineCost::getNever("indirect call");
+
+  // FIXME: We shouldn't even get here for declarations.
+  if (Callee->isDeclaration())
+    return InlineCost::getNever("no definition");
+
+  if (!CS.hasFnAttr(Attribute::AlwaysInline))
+    return InlineCost::getNever("no alwaysinline attribute");
+
+  auto IsViable = isInlineViable(*Callee);
+  if (!IsViable)
+    return InlineCost::getNever(IsViable.message);
 
-  return InlineCost::getNever("always inliner");
+  return InlineCost::getAlways("always inliner");
 }
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 4663de0b049e..95a9f31cced3 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -1,9 +1,8 @@
 //===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -59,11 +58,13 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/NoFolder.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
@@ -243,6 +244,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
     assert(CS.getCalledFunction() == F);
     Instruction *Call = CS.getInstruction();
     const AttributeList &CallPAL = CS.getAttributes();
+    IRBuilder<NoFolder> IRB(Call);
 
     // Loop over the operands, inserting GEP and loads in the caller as
     // appropriate.
@@ -261,10 +263,11 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
             ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr};
         for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
           Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
-          Value *Idx = GetElementPtrInst::Create(
-              STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call);
+          auto *Idx =
+              IRB.CreateGEP(STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i));
           // TODO: Tell AA about the new values?
-          Args.push_back(new LoadInst(Idx, Idx->getName() + ".val", Call));
+          Args.push_back(IRB.CreateLoad(STy->getElementType(i), Idx,
+                                        Idx->getName() + ".val"));
           ArgAttrVec.push_back(AttributeSet());
         }
       } else if (!I->use_empty()) {
@@ -294,13 +297,13 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
                 ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II);
             }
             // And create a GEP to extract those indices.
-            V = GetElementPtrInst::Create(ArgIndex.first, V, Ops,
-                                          V->getName() + ".idx", Call);
+            V = IRB.CreateGEP(ArgIndex.first, V, Ops, V->getName() + ".idx");
             Ops.clear();
           }
           // Since we're replacing a load make sure we take the alignment
           // of the previous load.
-          LoadInst *newLoad = new LoadInst(V, V->getName() + ".val", Call);
+          LoadInst *newLoad =
+              IRB.CreateLoad(OrigLoad->getType(), V, V->getName() + ".val");
           newLoad->setAlignment(OrigLoad->getAlignment());
           // Transfer the AA info too.
           AAMDNodes AAInfo;
@@ -476,9 +479,9 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
   return NF;
 }
 
-/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
-/// all callees pass in a valid pointer for the specified function argument.
-static bool allCallersPassInValidPointerForArgument(Argument *Arg) {
+/// Return true if we can prove that all callees pass in a valid pointer for the
+/// specified function argument.
+static bool allCallersPassValidPointerForArgument(Argument *Arg, Type *Ty) {
   Function *Callee = Arg->getParent();
   const DataLayout &DL = Callee->getParent()->getDataLayout();
 
@@ -490,7 +493,7 @@ static bool allCallersPassInValidPointerForArgument(Argument *Arg) {
     CallSite CS(U);
     assert(CS && "Should only have direct calls!");
 
-    if (!isDereferenceablePointer(CS.getArgument(ArgNo), DL))
+    if (!isDereferenceablePointer(CS.getArgument(ArgNo), Ty, DL))
       return false;
   }
   return true;
@@ -563,8 +566,8 @@ static void markIndicesSafe(const IndicesVector &ToMark,
 /// This method limits promotion of aggregates to only promote up to three
 /// elements of the aggregate in order to avoid exploding the number of
 /// arguments passed in.
-static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
-                                    AAResults &AAR, unsigned MaxElements) {
+static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR,
+                                    unsigned MaxElements) {
   using GEPIndicesSet = std::set<IndicesVector>;
 
   // Quick exit for unused arguments
@@ -586,9 +589,6 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
   //
   // This set will contain all sets of indices that are loaded in the entry
   // block, and thus are safe to unconditionally load in the caller.
-  //
-  // This optimization is also safe for InAlloca parameters, because it verifies
-  // that the address isn't captured.
   GEPIndicesSet SafeToUnconditionallyLoad;
 
   // This set contains all the sets of indices that we are planning to promote.
@@ -596,9 +596,28 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
   GEPIndicesSet ToPromote;
 
   // If the pointer is always valid, any load with first index 0 is valid.
-  if (isByValOrInAlloca || allCallersPassInValidPointerForArgument(Arg))
+
+  if (ByValTy)
     SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
 
+  // Whenever a new underlying type for the operand is found, make sure it's
+  // consistent with the GEPs and loads we've already seen and, if necessary,
+  // use it to see if all incoming pointers are valid (which implies the 0-index
+  // is safe).
+  Type *BaseTy = ByValTy;
+  auto UpdateBaseTy = [&](Type *NewBaseTy) {
+    if (BaseTy)
+      return BaseTy == NewBaseTy;
+
+    BaseTy = NewBaseTy;
+    if (allCallersPassValidPointerForArgument(Arg, BaseTy)) {
+      assert(SafeToUnconditionallyLoad.empty());
+      SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
+    }
+
+    return true;
+  };
+
   // First, iterate the entry block and mark loads of (geps of) arguments as
   // safe.
   BasicBlock &EntryBlock = Arg->getParent()->front();
@@ -621,6 +640,9 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
               // right away, can't promote this argument at all.
               return false;
 
+          if (!UpdateBaseTy(GEP->getSourceElementType()))
+            return false;
+
           // Indices checked out, mark them as safe
           markIndicesSafe(Indices, SafeToUnconditionallyLoad);
           Indices.clear();
@@ -628,6 +650,11 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
       } else if (V == Arg) {
         // Direct loads are equivalent to a GEP with a single 0 index.
         markIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
+
+        if (BaseTy && LI->getType() != BaseTy)
+          return false;
+
+        BaseTy = LI->getType();
       }
     }
 
@@ -645,6 +672,9 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
       Loads.push_back(LI);
       // Direct loads are equivalent to a GEP with a zero index and then a load.
       Operands.push_back(0);
+
+      if (!UpdateBaseTy(LI->getType()))
+        return false;
     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UR)) {
       if (GEP->use_empty()) {
         // Dead GEP's cause trouble later.  Just remove them if we run into
@@ -653,10 +683,12 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
         // TODO: This runs the above loop over and over again for dead GEPs
         // Couldn't we just do increment the UI iterator earlier and erase the
         // use?
-        return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR,
-                                       MaxElements);
+        return isSafeToPromoteArgument(Arg, ByValTy, AAR, MaxElements);
       }
 
+      if (!UpdateBaseTy(GEP->getSourceElementType()))
+        return false;
+
       // Ensure that all of the indices are constants.
       for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end(); i != e;
            ++i)
@@ -853,6 +885,11 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
   if (F->isVarArg())
     return nullptr;
 
+  // Don't transform functions that receive inallocas, as the transformation may
+  // not be safe depending on calling convention.
+  if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
+    return nullptr;
+
   // First check: see if there are any pointer arguments!  If not, quick exit.
   SmallVector<Argument *, 16> PointerArgs;
   for (Argument &I : F->args())
@@ -911,8 +948,7 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
 
     // If this is a byval argument, and if the aggregate type is small, just
     // pass the elements, which is always safe, if the passed value is densely
-    // packed or if we can prove the padding bytes are never accessed. This does
-    // not apply to inalloca.
+    // packed or if we can prove the padding bytes are never accessed.
     bool isSafeToPromote =
         PtrArg->hasByValAttr() &&
         (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
@@ -963,8 +999,9 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
     }
 
     // Otherwise, see if we can promote the pointer to its value.
-    if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR,
-                                MaxElements))
+    Type *ByValTy =
+        PtrArg->hasByValAttr() ? PtrArg->getParamByValType() : nullptr;
+    if (isSafeToPromoteArgument(PtrArg, ByValTy, AAR, MaxElements))
       ArgsToPromote.insert(PtrArg);
   }
 
@@ -1101,7 +1138,9 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
         CallGraphNode *NewCalleeNode =
             CG.getOrInsertFunction(NewCS.getCalledFunction());
         CallGraphNode *CallerNode = CG[Caller];
-        CallerNode->replaceCallEdge(OldCS, NewCS, NewCalleeNode);
+        CallerNode->replaceCallEdge(*cast<CallBase>(OldCS.getInstruction()),
+                                    *cast<CallBase>(NewCS.getInstruction()),
+                                    NewCalleeNode);
       };
 
       const TargetTransformInfo &TTI =
diff --git a/lib/Transforms/IPO/Attributor.cpp b/lib/Transforms/IPO/Attributor.cpp
new file mode 100644
index 000000000000..2a52c6b9b4ad
--- /dev/null
+++ b/lib/Transforms/IPO/Attributor.cpp
@@ -0,0 +1,1690 @@
+//===- Attributor.cpp - Module-wide attribute deduction -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an inter procedural pass that deduces and/or propagating
+// attributes. This is done in an abstract interpretation style fixpoint
+// iteration. See the Attributor.h file comment and the class descriptions in
+// that file for more information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/Attributor.h"
+
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "attributor"
+
+STATISTIC(NumFnWithExactDefinition,
+          "Number of function with exact definitions");
+STATISTIC(NumFnWithoutExactDefinition,
+          "Number of function without exact definitions");
+STATISTIC(NumAttributesTimedOut,
+          "Number of abstract attributes timed out before fixpoint");
+STATISTIC(NumAttributesValidFixpoint,
+          "Number of abstract attributes in a valid fixpoint state");
+STATISTIC(NumAttributesManifested,
+          "Number of abstract attributes manifested in IR");
+STATISTIC(NumFnNoUnwind, "Number of functions marked nounwind");
+
+STATISTIC(NumFnUniqueReturned, "Number of function with unique return");
+STATISTIC(NumFnKnownReturns, "Number of function with known return values");
+STATISTIC(NumFnArgumentReturned,
+          "Number of function arguments marked returned");
+STATISTIC(NumFnNoSync, "Number of functions marked nosync");
+STATISTIC(NumFnNoFree, "Number of functions marked nofree");
+STATISTIC(NumFnReturnedNonNull,
+          "Number of function return values marked nonnull");
+STATISTIC(NumFnArgumentNonNull, "Number of function arguments marked nonnull");
+STATISTIC(NumCSArgumentNonNull, "Number of call site arguments marked nonnull");
+STATISTIC(NumFnWillReturn, "Number of functions marked willreturn");
+
+// TODO: Determine a good default value.
+//
+// In the LLVM-TS and SPEC2006, 32 seems to not induce compile time overheads
+// (when run with the first 5 abstract attributes). The results also indicate
+// that we never reach 32 iterations but always find a fixpoint sooner.
+//
+// This will become more evolved once we perform two interleaved fixpoint
+// iterations: bottom-up and top-down.
+static cl::opt<unsigned>
+    MaxFixpointIterations("attributor-max-iterations", cl::Hidden,
+                          cl::desc("Maximal number of fixpoint iterations."),
+                          cl::init(32));
+
+static cl::opt<bool> DisableAttributor(
+    "attributor-disable", cl::Hidden,
+    cl::desc("Disable the attributor inter-procedural deduction pass."),
+    cl::init(true));
+
+static cl::opt<bool> VerifyAttributor(
+    "attributor-verify", cl::Hidden,
+    cl::desc("Verify the Attributor deduction and "
+             "manifestation of attributes -- may issue false-positive errors"),
+    cl::init(false));
+
+/// Logic operators for the change status enum class.
+///
+///{
+ChangeStatus llvm::operator|(ChangeStatus l, ChangeStatus r) {
+  return l == ChangeStatus::CHANGED ? l : r;
+}
+ChangeStatus llvm::operator&(ChangeStatus l, ChangeStatus r) {
+  return l == ChangeStatus::UNCHANGED ? l : r;
+}
+///}
+
+/// Helper to adjust the statistics.
+static void bookkeeping(AbstractAttribute::ManifestPosition MP,
+                        const Attribute &Attr) {
+  if (!AreStatisticsEnabled())
+    return;
+
+  if (!Attr.isEnumAttribute())
+    return;
+  switch (Attr.getKindAsEnum()) {
+  case Attribute::NoUnwind:
+    NumFnNoUnwind++;
+    return;
+  case Attribute::Returned:
+    NumFnArgumentReturned++;
+    return;
+  case Attribute::NoSync:
+    NumFnNoSync++;
+    break;
+  case Attribute::NoFree:
+    NumFnNoFree++;
+    break;
+  case Attribute::NonNull:
+    switch (MP) {
+    case AbstractAttribute::MP_RETURNED:
+      NumFnReturnedNonNull++;
+      break;
+    case AbstractAttribute::MP_ARGUMENT:
+      NumFnArgumentNonNull++;
+      break;
+    case AbstractAttribute::MP_CALL_SITE_ARGUMENT:
+      NumCSArgumentNonNull++;
+      break;
+    default:
+      break;
+    }
+    break;
+  case Attribute::WillReturn:
+    NumFnWillReturn++;
+    break;
+  default:
+    return;
+  }
+}
+
+template <typename StateTy>
+using followValueCB_t = std::function<bool(Value *, StateTy &State)>;
+template <typename StateTy>
+using visitValueCB_t = std::function<void(Value *, StateTy &State)>;
+
+/// Recursively visit all values that might become \p InitV at some point. This
+/// will be done by looking through cast instructions, selects, phis, and calls
+/// with the "returned" attribute. The callback \p FollowValueCB is asked before
+/// a potential origin value is looked at. If no \p FollowValueCB is passed, a
+/// default one is used that will make sure we visit every value only once. Once
+/// we cannot look through the value any further, the callback \p VisitValueCB
+/// is invoked and passed the current value and the \p State. To limit how much
+/// effort is invested, we will never visit more than \p MaxValues values.
+template <typename StateTy>
+static bool genericValueTraversal(
+    Value *InitV, StateTy &State, visitValueCB_t<StateTy> &VisitValueCB,
+    followValueCB_t<StateTy> *FollowValueCB = nullptr, int MaxValues = 8) {
+
+  SmallPtrSet<Value *, 16> Visited;
+  followValueCB_t<bool> DefaultFollowValueCB = [&](Value *Val, bool &) {
+    return Visited.insert(Val).second;
+  };
+
+  if (!FollowValueCB)
+    FollowValueCB = &DefaultFollowValueCB;
+
+  SmallVector<Value *, 16> Worklist;
+  Worklist.push_back(InitV);
+
+  int Iteration = 0;
+  do {
+    Value *V = Worklist.pop_back_val();
+
+    // Check if we should process the current value. To prevent endless
+    // recursion keep a record of the values we followed!
+    if (!(*FollowValueCB)(V, State))
+      continue;
+
+    // Make sure we limit the compile time for complex expressions.
+    if (Iteration++ >= MaxValues)
+      return false;
+
+    // Explicitly look through calls with a "returned" attribute if we do
+    // not have a pointer as stripPointerCasts only works on them.
+    if (V->getType()->isPointerTy()) {
+      V = V->stripPointerCasts();
+    } else {
+      CallSite CS(V);
+      if (CS && CS.getCalledFunction()) {
+        Value *NewV = nullptr;
+        for (Argument &Arg : CS.getCalledFunction()->args())
+          if (Arg.hasReturnedAttr()) {
+            NewV = CS.getArgOperand(Arg.getArgNo());
+            break;
+          }
+        if (NewV) {
+          Worklist.push_back(NewV);
+          continue;
+        }
+      }
+    }
+
+    // Look through select instructions, visit both potential values.
+    if (auto *SI = dyn_cast<SelectInst>(V)) {
+      Worklist.push_back(SI->getTrueValue());
+      Worklist.push_back(SI->getFalseValue());
+      continue;
+    }
+
+    // Look through phi nodes, visit all operands.
+    if (auto *PHI = dyn_cast<PHINode>(V)) {
+      Worklist.append(PHI->op_begin(), PHI->op_end());
+      continue;
+    }
+
+    // Once a leaf is reached we inform the user through the callback.
+    VisitValueCB(V, State);
+  } while (!Worklist.empty());
+
+  // All values have been visited.
+  return true;
+}
+
+/// Helper to identify the correct offset into an attribute list.
+static unsigned getAttrIndex(AbstractAttribute::ManifestPosition MP,
+                             unsigned ArgNo = 0) {
+  switch (MP) {
+  case AbstractAttribute::MP_ARGUMENT:
+  case AbstractAttribute::MP_CALL_SITE_ARGUMENT:
+    return ArgNo + AttributeList::FirstArgIndex;
+  case AbstractAttribute::MP_FUNCTION:
+    return AttributeList::FunctionIndex;
+  case AbstractAttribute::MP_RETURNED:
+    return AttributeList::ReturnIndex;
+  }
+  llvm_unreachable("Unknown manifest position!");
+}
+
+/// Return true if \p New is equal or worse than \p Old.
+static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) {
+  if (!Old.isIntAttribute())
+    return true;
+
+  return Old.getValueAsInt() >= New.getValueAsInt();
+}
+
+/// Return true if the information provided by \p Attr was added to the
+/// attribute list \p Attrs. This is only the case if it was not already present
+/// in \p Attrs at the position describe by \p MP and \p ArgNo.
+static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,
+                             AttributeList &Attrs,
+                             AbstractAttribute::ManifestPosition MP,
+                             unsigned ArgNo = 0) {
+  unsigned AttrIdx = getAttrIndex(MP, ArgNo);
+
+  if (Attr.isEnumAttribute()) {
+    Attribute::AttrKind Kind = Attr.getKindAsEnum();
+    if (Attrs.hasAttribute(AttrIdx, Kind))
+      if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+        return false;
+    Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+    return true;
+  }
+  if (Attr.isStringAttribute()) {
+    StringRef Kind = Attr.getKindAsString();
+    if (Attrs.hasAttribute(AttrIdx, Kind))
+      if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+        return false;
+    Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+    return true;
+  }
+
+  llvm_unreachable("Expected enum or string attribute!");
+}
+
+ChangeStatus AbstractAttribute::update(Attributor &A) {
+  ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
+  if (getState().isAtFixpoint())
+    return HasChanged;
+
+  LLVM_DEBUG(dbgs() << "[Attributor] Update: " << *this << "\n");
+
+  HasChanged = updateImpl(A);
+
+  LLVM_DEBUG(dbgs() << "[Attributor] Update " << HasChanged << " " << *this
+                    << "\n");
+
+  return HasChanged;
+}
+
+ChangeStatus AbstractAttribute::manifest(Attributor &A) {
+  assert(getState().isValidState() &&
+         "Attempted to manifest an invalid state!");
+  assert(getAssociatedValue() &&
+         "Attempted to manifest an attribute without associated value!");
+
+  ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
+  SmallVector<Attribute, 4> DeducedAttrs;
+  getDeducedAttributes(DeducedAttrs);
+
+  Function &ScopeFn = getAnchorScope();
+  LLVMContext &Ctx = ScopeFn.getContext();
+  ManifestPosition MP = getManifestPosition();
+
+  AttributeList Attrs;
+  SmallVector<unsigned, 4> ArgNos;
+
+  // In the following some generic code that will manifest attributes in
+  // DeducedAttrs if they improve the current IR. Due to the different
+  // annotation positions we use the underlying AttributeList interface.
+  // Note that MP_CALL_SITE_ARGUMENT can annotate multiple locations.
+
+  switch (MP) {
+  case MP_ARGUMENT:
+    ArgNos.push_back(cast<Argument>(getAssociatedValue())->getArgNo());
+    Attrs = ScopeFn.getAttributes();
+    break;
+  case MP_FUNCTION:
+  case MP_RETURNED:
+    ArgNos.push_back(0);
+    Attrs = ScopeFn.getAttributes();
+    break;
+  case MP_CALL_SITE_ARGUMENT: {
+    CallSite CS(&getAnchoredValue());
+    for (unsigned u = 0, e = CS.getNumArgOperands(); u != e; u++)
+      if (CS.getArgOperand(u) == getAssociatedValue())
+        ArgNos.push_back(u);
+    Attrs = CS.getAttributes();
+  }
+  }
+
+  for (const Attribute &Attr : DeducedAttrs) {
+    for (unsigned ArgNo : ArgNos) {
+      if (!addIfNotExistent(Ctx, Attr, Attrs, MP, ArgNo))
+        continue;
+
+      HasChanged = ChangeStatus::CHANGED;
+      bookkeeping(MP, Attr);
+    }
+  }
+
+  if (HasChanged == ChangeStatus::UNCHANGED)
+    return HasChanged;
+
+  switch (MP) {
+  case MP_ARGUMENT:
+  case MP_FUNCTION:
+  case MP_RETURNED:
+    ScopeFn.setAttributes(Attrs);
+    break;
+  case MP_CALL_SITE_ARGUMENT:
+    CallSite(&getAnchoredValue()).setAttributes(Attrs);
+  }
+
+  return HasChanged;
+}
+
+Function &AbstractAttribute::getAnchorScope() {
+  Value &V = getAnchoredValue();
+  if (isa<Function>(V))
+    return cast<Function>(V);
+  if (isa<Argument>(V))
+    return *cast<Argument>(V).getParent();
+  if (isa<Instruction>(V))
+    return *cast<Instruction>(V).getFunction();
+  llvm_unreachable("No scope for anchored value found!");
+}
+
+const Function &AbstractAttribute::getAnchorScope() const {
+  return const_cast<AbstractAttribute *>(this)->getAnchorScope();
+}
+
+/// -----------------------NoUnwind Function Attribute--------------------------
+
+struct AANoUnwindFunction : AANoUnwind, BooleanState {
+
+  AANoUnwindFunction(Function &F, InformationCache &InfoCache)
+      : AANoUnwind(F, InfoCache) {}
+
+  /// See AbstractAttribute::getState()
+  /// {
+  AbstractState &getState() override { return *this; }
+  const AbstractState &getState() const override { return *this; }
+  /// }
+
+  /// See AbstractAttribute::getManifestPosition().
+  ManifestPosition getManifestPosition() const override { return MP_FUNCTION; }
+
+  const std::string getAsStr() const override {
+    return getAssumed() ? "nounwind" : "may-unwind";
+  }
+
+  /// See AbstractAttribute::updateImpl(...).
+  ChangeStatus updateImpl(Attributor &A) override;
+
+  /// See AANoUnwind::isAssumedNoUnwind().
+  bool isAssumedNoUnwind() const override { return getAssumed(); }
+
+  /// See AANoUnwind::isKnownNoUnwind().
+  bool isKnownNoUnwind() const override { return getKnown(); }
+};
+
+ChangeStatus AANoUnwindFunction::updateImpl(Attributor &A) {
+  Function &F = getAnchorScope();
+
+  // The map from instruction opcodes to those instructions in the function.
+  auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+  auto Opcodes = {
+      (unsigned)Instruction::Invoke,      (unsigned)Instruction::CallBr,
+      (unsigned)Instruction::Call,        (unsigned)Instruction::CleanupRet,
+      (unsigned)Instruction::CatchSwitch, (unsigned)Instruction::Resume};
+
+  for (unsigned Opcode : Opcodes) {
+    for (Instruction *I : OpcodeInstMap[Opcode]) {
+      if (!I->mayThrow())
+        continue;
+
+      auto *NoUnwindAA = A.getAAFor<AANoUnwind>(*this, *I);
+
+      if (!NoUnwindAA || !NoUnwindAA->isAssumedNoUnwind()) {
+        indicatePessimisticFixpoint();
+        return ChangeStatus::CHANGED;
+      }
+    }
+  }
+  return ChangeStatus::UNCHANGED;
+}
+
+/// --------------------- Function Return Values -------------------------------
+
+/// "Attribute" that collects all potential returned values and the return
+/// instructions that they arise from.
+///
+/// If there is a unique returned value R, the manifest method will:
+///   - mark R with the "returned" attribute, if R is an argument.
+class AAReturnedValuesImpl final : public AAReturnedValues, AbstractState {
+
+  /// Mapping of values potentially returned by the associated function to the
+  /// return instructions that might return them.
+  DenseMap<Value *, SmallPtrSet<ReturnInst *, 2>> ReturnedValues;
+
+  /// State flags
+  ///
+  ///{
+  bool IsFixed;
+  bool IsValidState;
+  bool HasOverdefinedReturnedCalls;
+  ///}
+
+  /// Collect values that could become \p V in the set \p Values, each mapped to
+  /// \p ReturnInsts.
+  void collectValuesRecursively(
+      Attributor &A, Value *V, SmallPtrSetImpl<ReturnInst *> &ReturnInsts,
+      DenseMap<Value *, SmallPtrSet<ReturnInst *, 2>> &Values) {
+
+    visitValueCB_t<bool> VisitValueCB = [&](Value *Val, bool &) {
+      assert(!isa<Instruction>(Val) ||
+             &getAnchorScope() == cast<Instruction>(Val)->getFunction());
+      Values[Val].insert(ReturnInsts.begin(), ReturnInsts.end());
+    };
+
+    bool UnusedBool;
+    bool Success = genericValueTraversal(V, UnusedBool, VisitValueCB);
+
+    // If we did abort the above traversal we haven't see all the values.
+    // Consequently, we cannot know if the information we would derive is
+    // accurate so we give up early.
+    if (!Success)
+      indicatePessimisticFixpoint();
+  }
+
+public:
+  /// See AbstractAttribute::AbstractAttribute(...).
+  AAReturnedValuesImpl(Function &F, InformationCache &InfoCache)
+      : AAReturnedValues(F, InfoCache) {
+    // We do not have an associated argument yet.
+    AssociatedVal = nullptr;
+  }
+
+  /// See AbstractAttribute::initialize(...).
+  void initialize(Attributor &A) override {
+    // Reset the state.
+    AssociatedVal = nullptr;
+    IsFixed = false;
+    IsValidState = true;
+    HasOverdefinedReturnedCalls = false;
+    ReturnedValues.clear();
+
+    Function &F = cast<Function>(getAnchoredValue());
+
+    // The map from instruction opcodes to those instructions in the function.
+    auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+
+    // Look through all arguments, if one is marked as returned we are done.
+    for (Argument &Arg : F.args()) {
+      if (Arg.hasReturnedAttr()) {
+
+        auto &ReturnInstSet = ReturnedValues[&Arg];
+        for (Instruction *RI : OpcodeInstMap[Instruction::Ret])
+          ReturnInstSet.insert(cast<ReturnInst>(RI));
+
+        indicateOptimisticFixpoint();
+        return;
+      }
+    }
+
+    // If no argument was marked as returned we look at all return instructions
+    // and collect potentially returned values.
+    for (Instruction *RI : OpcodeInstMap[Instruction::Ret]) {
+      SmallPtrSet<ReturnInst *, 1> RISet({cast<ReturnInst>(RI)});
+      collectValuesRecursively(A, cast<ReturnInst>(RI)->getReturnValue(), RISet,
+                               ReturnedValues);
+    }
+  }
+
+  /// See AbstractAttribute::manifest(...).
+  ChangeStatus manifest(Attributor &A) override;
+
+  /// See AbstractAttribute::getState(...).
+  AbstractState &getState() override { return *this; }
+
+  /// See AbstractAttribute::getState(...).
+  const AbstractState &getState() const override { return *this; }
+
+  /// See AbstractAttribute::getManifestPosition().
+  ManifestPosition getManifestPosition() const override { return MP_ARGUMENT; }
+
+  /// See AbstractAttribute::updateImpl(Attributor &A).
+  ChangeStatus updateImpl(Attributor &A) override;
+
+  /// Return the number of potential return values, -1 if unknown.
+  size_t getNumReturnValues() const {
+    return isValidState() ? ReturnedValues.size() : -1;
+  }
+
+  /// Return an assumed unique return value if a single candidate is found. If
+  /// there cannot be one, return a nullptr. If it is not clear yet, return the
+  /// Optional::NoneType.
+  Optional<Value *> getAssumedUniqueReturnValue() const;
+
+  /// See AbstractState::checkForallReturnedValues(...).
+  bool
+  checkForallReturnedValues(std::function<bool(Value &)> &Pred) const override;
+
+  /// Pretty print the attribute similar to the IR representation.
+  const std::string getAsStr() const override;
+
+  /// See AbstractState::isAtFixpoint().
+  bool isAtFixpoint() const override { return IsFixed; }
+
+  /// See AbstractState::isValidState().
+  bool isValidState() const override { return IsValidState; }
+
+  /// See AbstractState::indicateOptimisticFixpoint(...).
+  void indicateOptimisticFixpoint() override {
+    IsFixed = true;
+    IsValidState &= true;
+  }
+  void indicatePessimisticFixpoint() override {
+    IsFixed = true;
+    IsValidState = false;
+  }
+};
+
+ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) {
+  ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+  // Bookkeeping.
+  assert(isValidState());
+  NumFnKnownReturns++;
+
+  // Check if we have an assumed unique return value that we could manifest.
+  Optional<Value *> UniqueRV = getAssumedUniqueReturnValue();
+
+  if (!UniqueRV.hasValue() || !UniqueRV.getValue())
+    return Changed;
+
+  // Bookkeeping.
+  NumFnUniqueReturned++;
+
+  // If the assumed unique return value is an argument, annotate it.
+  if (auto *UniqueRVArg = dyn_cast<Argument>(UniqueRV.getValue())) {
+    AssociatedVal = UniqueRVArg;
+    Changed = AbstractAttribute::manifest(A) | Changed;
+  }
+
+  return Changed;
+}
+
+const std::string AAReturnedValuesImpl::getAsStr() const {
+  return (isAtFixpoint() ? "returns(#" : "may-return(#") +
+         (isValidState() ? std::to_string(getNumReturnValues()) : "?") + ")";
+}
+
+Optional<Value *> AAReturnedValuesImpl::getAssumedUniqueReturnValue() const {
+  // If checkForallReturnedValues provides a unique value, ignoring potential
+  // undef values that can also be present, it is assumed to be the actual
+  // return value and forwarded to the caller of this method. If there are
+  // multiple, a nullptr is returned indicating there cannot be a unique
+  // returned value.
+  Optional<Value *> UniqueRV;
+
+  std::function<bool(Value &)> Pred = [&](Value &RV) -> bool {
+    // If we found a second returned value and neither the current nor the saved
+    // one is an undef, there is no unique returned value. Undefs are special
+    // since we can pretend they have any value.
+    if (UniqueRV.hasValue() && UniqueRV != &RV &&
+        !(isa<UndefValue>(RV) || isa<UndefValue>(UniqueRV.getValue()))) {
+      UniqueRV = nullptr;
+      return false;
+    }
+
+    // Do not overwrite a value with an undef.
+    if (!UniqueRV.hasValue() || !isa<UndefValue>(RV))
+      UniqueRV = &RV;
+
+    return true;
+  };
+
+  if (!checkForallReturnedValues(Pred))
+    UniqueRV = nullptr;
+
+  return UniqueRV;
+}
+
+bool AAReturnedValuesImpl::checkForallReturnedValues(
+    std::function<bool(Value &)> &Pred) const {
+  if (!isValidState())
+    return false;
+
+  // Check all returned values but ignore call sites as long as we have not
+  // encountered an overdefined one during an update.
+  for (auto &It : ReturnedValues) {
+    Value *RV = It.first;
+
+    ImmutableCallSite ICS(RV);
+    if (ICS && !HasOverdefinedReturnedCalls)
+      continue;
+
+    if (!Pred(*RV))
+      return false;
+  }
+
+  return true;
+}
+
+ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
+
+  // Check if we know of any values returned by the associated function,
+  // if not, we are done.
+  if (getNumReturnValues() == 0) {
+    indicateOptimisticFixpoint();
+    return ChangeStatus::UNCHANGED;
+  }
+
+  // Check if any of the returned values is a call site we can refine.
+  decltype(ReturnedValues) AddRVs;
+  bool HasCallSite = false;
+
+  // Look at all returned call sites.
+  for (auto &It : ReturnedValues) {
+    SmallPtrSet<ReturnInst *, 2> &ReturnInsts = It.second;
+    Value *RV = It.first;
+    LLVM_DEBUG(dbgs() << "[AAReturnedValues] Potentially returned value " << *RV
+                      << "\n");
+
+    // Only call sites can change during an update, ignore the rest.
+    CallSite RetCS(RV);
+    if (!RetCS)
+      continue;
+
+    // For now, any call site we see will prevent us from directly fixing the
+    // state. However, if the information on the callees is fixed, the call
+    // sites will be removed and we will fix the information for this state.
+    HasCallSite = true;
+
+    // Try to find a assumed unique return value for the called function.
+    auto *RetCSAA = A.getAAFor<AAReturnedValuesImpl>(*this, *RV);
+    if (!RetCSAA) {
+      HasOverdefinedReturnedCalls = true;
+      LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned call site (" << *RV
+                        << ") with " << (RetCSAA ? "invalid" : "no")
+                        << " associated state\n");
+      continue;
+    }
+
+    // Try to find a assumed unique return value for the called function.
+    Optional<Value *> AssumedUniqueRV = RetCSAA->getAssumedUniqueReturnValue();
+
+    // If no assumed unique return value was found due to the lack of
+    // candidates, we may need to resolve more calls (through more update
+    // iterations) or the called function will not return. Either way, we simply
+    // stick with the call sites as return values. Because there were not
+    // multiple possibilities, we do not treat it as overdefined.
+    if (!AssumedUniqueRV.hasValue())
+      continue;
+
+    // If multiple, non-refinable values were found, there cannot be a unique
+    // return value for the called function. The returned call is overdefined!
+    if (!AssumedUniqueRV.getValue()) {
+      HasOverdefinedReturnedCalls = true;
+      LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned call site has multiple "
+                           "potentially returned values\n");
+      continue;
+    }
+
+    LLVM_DEBUG({
+      bool UniqueRVIsKnown = RetCSAA->isAtFixpoint();
+      dbgs() << "[AAReturnedValues] Returned call site "
+             << (UniqueRVIsKnown ? "known" : "assumed")
+             << " unique return value: " << *AssumedUniqueRV << "\n";
+    });
+
+    // The assumed unique return value.
+    Value *AssumedRetVal = AssumedUniqueRV.getValue();
+
+    // If the assumed unique return value is an argument, lookup the matching
+    // call site operand and recursively collect new returned values.
+    // If it is not an argument, it is just put into the set of returned values
+    // as we would have already looked through casts, phis, and similar values.
+    if (Argument *AssumedRetArg = dyn_cast<Argument>(AssumedRetVal))
+      collectValuesRecursively(A,
+                               RetCS.getArgOperand(AssumedRetArg->getArgNo()),
+                               ReturnInsts, AddRVs);
+    else
+      AddRVs[AssumedRetVal].insert(ReturnInsts.begin(), ReturnInsts.end());
+  }
+
+  // Keep track of any change to trigger updates on dependent attributes.
+  ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+  for (auto &It : AddRVs) {
+    assert(!It.second.empty() && "Entry does not add anything.");
+    auto &ReturnInsts = ReturnedValues[It.first];
+    for (ReturnInst *RI : It.second)
+      if (ReturnInsts.insert(RI).second) {
+        LLVM_DEBUG(dbgs() << "[AAReturnedValues] Add new returned value "
+                          << *It.first << " => " << *RI << "\n");
+        Changed = ChangeStatus::CHANGED;
+      }
+  }
+
+  // If there is no call site in the returned values we are done.
+  if (!HasCallSite) {
+    indicateOptimisticFixpoint();
+    return ChangeStatus::CHANGED;
+  }
+
+  return Changed;
+}
+
+/// ------------------------ NoSync Function Attribute -------------------------
+
+struct AANoSyncFunction : AANoSync, BooleanState {
+
+  AANoSyncFunction(Function &F, InformationCache &InfoCache)
+      : AANoSync(F, InfoCache) {}
+
+  /// See AbstractAttribute::getState()
+  /// {
+  AbstractState &getState() override { return *this; }
+  const AbstractState &getState() const override { return *this; }
+  /// }
+
+  /// See AbstractAttribute::getManifestPosition().
+  ManifestPosition getManifestPosition() const override { return MP_FUNCTION; }
+
+  const std::string getAsStr() const override {
+    return getAssumed() ? "nosync" : "may-sync";
+  }
+
+  /// See AbstractAttribute::updateImpl(...).
+  ChangeStatus updateImpl(Attributor &A) override;
+
+  /// See AANoSync::isAssumedNoSync()
+  bool isAssumedNoSync() const override { return getAssumed(); }
+
+  /// See AANoSync::isKnownNoSync()
+  bool isKnownNoSync() const override { return getKnown(); }
+
+  /// Helper function used to determine whether an instruction is non-relaxed
+  /// atomic. In other words, if an atomic instruction does not have unordered
+  /// or monotonic ordering
+  static bool isNonRelaxedAtomic(Instruction *I);
+
+  /// Helper function used to determine whether an instruction is volatile.
+  static bool isVolatile(Instruction *I);
+
+  /// Helper function uset to check if intrinsic is volatile (memcpy, memmove,
+  /// memset).
+  static bool isNoSyncIntrinsic(Instruction *I);
+};
+
+bool AANoSyncFunction::isNonRelaxedAtomic(Instruction *I) {
+  if (!I->isAtomic())
+    return false;
+
+  AtomicOrdering Ordering;
+  switch (I->getOpcode()) {
+  case Instruction::AtomicRMW:
+    Ordering = cast<AtomicRMWInst>(I)->getOrdering();
+    break;
+  case Instruction::Store:
+    Ordering = cast<StoreInst>(I)->getOrdering();
+    break;
+  case Instruction::Load:
+    Ordering = cast<LoadInst>(I)->getOrdering();
+    break;
+  case Instruction::Fence: {
+    auto *FI = cast<FenceInst>(I);
+    if (FI->getSyncScopeID() == SyncScope::SingleThread)
+      return false;
+    Ordering = FI->getOrdering();
+    break;
+  }
+  case Instruction::AtomicCmpXchg: {
+    AtomicOrdering Success = cast<AtomicCmpXchgInst>(I)->getSuccessOrdering();
+    AtomicOrdering Failure = cast<AtomicCmpXchgInst>(I)->getFailureOrdering();
+    // Only if both are relaxed, than it can be treated as relaxed.
+    // Otherwise it is non-relaxed.
+    if (Success != AtomicOrdering::Unordered &&
+        Success != AtomicOrdering::Monotonic)
+      return true;
+    if (Failure != AtomicOrdering::Unordered &&
+        Failure != AtomicOrdering::Monotonic)
+      return true;
+    return false;
+  }
+  default:
+    llvm_unreachable(
+        "New atomic operations need to be known in the attributor.");
+  }
+
+  // Relaxed.
+  if (Ordering == AtomicOrdering::Unordered ||
+      Ordering == AtomicOrdering::Monotonic)
+    return false;
+  return true;
+}
+
+/// Checks if an intrinsic is nosync. Currently only checks mem* intrinsics.
+/// FIXME: We should ipmrove the handling of intrinsics.
+bool AANoSyncFunction::isNoSyncIntrinsic(Instruction *I) {
+  if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+    switch (II->getIntrinsicID()) {
+    /// Element wise atomic memory intrinsics are can only be unordered,
+    /// therefore nosync.
+    case Intrinsic::memset_element_unordered_atomic:
+    case Intrinsic::memmove_element_unordered_atomic:
+    case Intrinsic::memcpy_element_unordered_atomic:
+      return true;
+    case Intrinsic::memset:
+    case Intrinsic::memmove:
+    case Intrinsic::memcpy:
+      if (!cast<MemIntrinsic>(II)->isVolatile())
+        return true;
+      return false;
+    default:
+      return false;
+    }
+  }
+  return false;
+}
+
+bool AANoSyncFunction::isVolatile(Instruction *I) {
+  assert(!ImmutableCallSite(I) && !isa<CallBase>(I) &&
+         "Calls should not be checked here");
+
+  switch (I->getOpcode()) {
+  case Instruction::AtomicRMW:
+    return cast<AtomicRMWInst>(I)->isVolatile();
+  case Instruction::Store:
+    return cast<StoreInst>(I)->isVolatile();
+  case Instruction::Load:
+    return cast<LoadInst>(I)->isVolatile();
+  case Instruction::AtomicCmpXchg:
+    return cast<AtomicCmpXchgInst>(I)->isVolatile();
+  default:
+    return false;
+  }
+}
+
+ChangeStatus AANoSyncFunction::updateImpl(Attributor &A) {
+  Function &F = getAnchorScope();
+
+  /// We are looking for volatile instructions or Non-Relaxed atomics.
+  /// FIXME: We should ipmrove the handling of intrinsics.
+  for (Instruction *I : InfoCache.getReadOrWriteInstsForFunction(F)) {
+    ImmutableCallSite ICS(I);
+    auto *NoSyncAA = A.getAAFor<AANoSyncFunction>(*this, *I);
+
+    if (isa<IntrinsicInst>(I) && isNoSyncIntrinsic(I))
+      continue;
+
+    if (ICS && (!NoSyncAA || !NoSyncAA->isAssumedNoSync()) &&
+        !ICS.hasFnAttr(Attribute::NoSync)) {
+      indicatePessimisticFixpoint();
+      return ChangeStatus::CHANGED;
+    }
+
+    if (ICS)
+      continue;
+
+    if (!isVolatile(I) && !isNonRelaxedAtomic(I))
+      continue;
+
+    indicatePessimisticFixpoint();
+    return ChangeStatus::CHANGED;
+  }
+
+  auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+  auto Opcodes = {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
+                  (unsigned)Instruction::Call};
+
+  for (unsigned Opcode : Opcodes) {
+    for (Instruction *I : OpcodeInstMap[Opcode]) {
+      // At this point we handled all read/write effects and they are all
+      // nosync, so they can be skipped.
+      if (I->mayReadOrWriteMemory())
+        continue;
+
+      ImmutableCallSite ICS(I);
+
+      // non-convergent and readnone imply nosync.
+      if (!ICS.isConvergent())
+        continue;
+
+      indicatePessimisticFixpoint();
+      return ChangeStatus::CHANGED;
+    }
+  }
+
+  return ChangeStatus::UNCHANGED;
+}
+
+/// ------------------------ No-Free Attributes ----------------------------
+
+struct AANoFreeFunction : AbstractAttribute, BooleanState {
+
+  /// See AbstractAttribute::AbstractAttribute(...).
+  AANoFreeFunction(Function &F, InformationCache &InfoCache)
+      : AbstractAttribute(F, InfoCache) {}
+
+  /// See AbstractAttribute::getState()
+  ///{
+  AbstractState &getState() override { return *this; }
+  const AbstractState &getState() const override { return *this; }
+  ///}
+
+  /// See AbstractAttribute::getManifestPosition().
+  ManifestPosition getManifestPosition() const override { return MP_FUNCTION; }
+
+  /// See AbstractAttribute::getAsStr().
+  const std::string getAsStr() const override {
+    return getAssumed() ? "nofree" : "may-free";
+  }
+
+  /// See AbstractAttribute::updateImpl(...).
+  ChangeStatus updateImpl(Attributor &A) override;
+
+  /// See AbstractAttribute::getAttrKind().
+  Attribute::AttrKind getAttrKind() const override { return ID; }
+
+  /// Return true if "nofree" is assumed.
+  bool isAssumedNoFree() const { return getAssumed(); }
+
+  /// Return true if "nofree" is known.
+  bool isKnownNoFree() const { return getKnown(); }
+
+  /// The identifier used by the Attributor for this class of attributes.
+  static constexpr Attribute::AttrKind ID = Attribute::NoFree;
+};
+
+ChangeStatus AANoFreeFunction::updateImpl(Attributor &A) {
+  Function &F = getAnchorScope();
+
+  // The map from instruction opcodes to those instructions in the function.
+  auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+
+  for (unsigned Opcode :
+       {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
+        (unsigned)Instruction::Call}) {
+    for (Instruction *I : OpcodeInstMap[Opcode]) {
+
+      auto ICS = ImmutableCallSite(I);
+      auto *NoFreeAA = A.getAAFor<AANoFreeFunction>(*this, *I);
+
+      if ((!NoFreeAA || !NoFreeAA->isAssumedNoFree()) &&
+          !ICS.hasFnAttr(Attribute::NoFree)) {
+        indicatePessimisticFixpoint();
+        return ChangeStatus::CHANGED;
+      }
+    }
+  }
+  return ChangeStatus::UNCHANGED;
+}
+
+/// ------------------------ NonNull Argument Attribute ------------------------
+struct AANonNullImpl : AANonNull, BooleanState {
+
+  AANonNullImpl(Value &V, InformationCache &InfoCache)
+      : AANonNull(V, InfoCache) {}
+
+  AANonNullImpl(Value *AssociatedVal, Value &AnchoredValue,
+                InformationCache &InfoCache)
+      : AANonNull(AssociatedVal, AnchoredValue, InfoCache) {}
+
+  /// See AbstractAttribute::getState()
+  /// {
+  AbstractState &getState() override { return *this; }
+  const AbstractState &getState() const override { return *this; }
+  /// }
+
+  /// See AbstractAttribute::getAsStr().
+  const std::string getAsStr() const override {
+    return getAssumed() ? "nonnull" : "may-null";
+  }
+
+  /// See AANonNull::isAssumedNonNull().
+  bool isAssumedNonNull() const override { return getAssumed(); }
+
+  /// See AANonNull::isKnownNonNull().
+  bool isKnownNonNull() const override { return getKnown(); }
+
+  /// Generate a predicate that checks if a given value is assumed nonnull.
+  /// The generated function returns true if a value satisfies any of
+  /// following conditions.
+  /// (i) A value is known nonZero(=nonnull).
+  /// (ii) A value is associated with AANonNull and its isAssumedNonNull() is
+  /// true.
+  std::function<bool(Value &)> generatePredicate(Attributor &);
+};
+
+std::function<bool(Value &)> AANonNullImpl::generatePredicate(Attributor &A) {
+  // FIXME: The `AAReturnedValues` should provide the predicate with the
+  // `ReturnInst` vector as well such that we can use the control flow sensitive
+  // version of `isKnownNonZero`. This should fix `test11` in
+  // `test/Transforms/FunctionAttrs/nonnull.ll`
+
+  std::function<bool(Value &)> Pred = [&](Value &RV) -> bool {
+    if (isKnownNonZero(&RV, getAnchorScope().getParent()->getDataLayout()))
+      return true;
+
+    auto *NonNullAA = A.getAAFor<AANonNull>(*this, RV);
+
+    ImmutableCallSite ICS(&RV);
+
+    if ((!NonNullAA || !NonNullAA->isAssumedNonNull()) &&
+        (!ICS || !ICS.hasRetAttr(Attribute::NonNull)))
+      return false;
+
+    return true;
+  };
+
+  return Pred;
+}
+
+/// NonNull attribute for function return value.
+struct AANonNullReturned : AANonNullImpl {
+
+  AANonNullReturned(Function &F, InformationCache &InfoCache)
+      : AANonNullImpl(F, InfoCache) {}
+
+  /// See AbstractAttribute::getManifestPosition().
+  ManifestPosition getManifestPosition() const override { return MP_RETURNED; }
+
+  /// See AbstractAttriubute::initialize(...).
+  void initialize(Attributor &A) override {
+    Function &F = getAnchorScope();
+
+    // Already nonnull.
+    if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
+                                       Attribute::NonNull))
+      indicateOptimisticFixpoint();
+  }
+
+  /// See AbstractAttribute::updateImpl(...).
+  ChangeStatus updateImpl(Attributor &A) override;
+};
+
+ChangeStatus AANonNullReturned::updateImpl(Attributor &A) {
+  Function &F = getAnchorScope();
+
+  auto *AARetVal = A.getAAFor<AAReturnedValues>(*this, F);
+  if (!AARetVal) {
+    indicatePessimisticFixpoint();
+    return ChangeStatus::CHANGED;
+  }
+
+  std::function<bool(Value &)> Pred = this->generatePredicate(A);
+  if (!AARetVal->checkForallReturnedValues(Pred)) {
+    indicatePessimisticFixpoint();
+    return ChangeStatus::CHANGED;
+  }
+  return ChangeStatus::UNCHANGED;
+}
+
+/// NonNull attribute for function argument.
+struct AANonNullArgument : AANonNullImpl {
+
+  AANonNullArgument(Argument &A, InformationCache &InfoCache)
+      : AANonNullImpl(A, InfoCache) {}
+
+  /// See AbstractAttribute::getManifestPosition().
+  ManifestPosition getManifestPosition() const override { return MP_ARGUMENT; }
+
+  /// See AbstractAttriubute::initialize(...).
+  void initialize(Attributor &A) override {
+    Argument *Arg = cast<Argument>(getAssociatedValue());
+    if (Arg->hasNonNullAttr())
+      indicateOptimisticFixpoint();
+  }
+
+  /// See AbstractAttribute::updateImpl(...).
+  ChangeStatus updateImpl(Attributor &A) override;
+};
+
+/// NonNull attribute for a call site argument.
+struct AANonNullCallSiteArgument : AANonNullImpl {
+
+  /// See AANonNullImpl::AANonNullImpl(...).
+  AANonNullCallSiteArgument(CallSite CS, unsigned ArgNo,
+                            InformationCache &InfoCache)
+      : AANonNullImpl(CS.getArgOperand(ArgNo), *CS.getInstruction(), InfoCache),
+        ArgNo(ArgNo) {}
+
+  /// See AbstractAttribute::initialize(...).
+  void initialize(Attributor &A) override {
+    CallSite CS(&getAnchoredValue());
+    if (isKnownNonZero(getAssociatedValue(),
+                       getAnchorScope().getParent()->getDataLayout()) ||
+        CS.paramHasAttr(ArgNo, getAttrKind()))
+      indicateOptimisticFixpoint();
+  }
+
+  /// See AbstractAttribute::updateImpl(Attributor &A).
+  ChangeStatus updateImpl(Attributor &A) override;
+
+  /// See AbstractAttribute::getManifestPosition().
+  ManifestPosition getManifestPosition() const override {
+    return MP_CALL_SITE_ARGUMENT;
+  };
+
+  // Return argument index of associated value.
+  int getArgNo() const { return ArgNo; }
+
+private:
+  unsigned ArgNo;
+};
+ChangeStatus AANonNullArgument::updateImpl(Attributor &A) {
+  Function &F = getAnchorScope();
+  Argument &Arg = cast<Argument>(getAnchoredValue());
+
+  unsigned ArgNo = Arg.getArgNo();
+
+  // Callback function
+  std::function<bool(CallSite)> CallSiteCheck = [&](CallSite CS) {
+    assert(CS && "Sanity check: Call site was not initialized properly!");
+
+    auto *NonNullAA = A.getAAFor<AANonNull>(*this, *CS.getInstruction(), ArgNo);
+
+    // Check that NonNullAA is AANonNullCallSiteArgument.
+    if (NonNullAA) {
+      ImmutableCallSite ICS(&NonNullAA->getAnchoredValue());
+      if (ICS && CS.getInstruction() == ICS.getInstruction())
+        return NonNullAA->isAssumedNonNull();
+      return false;
+    }
+
+    if (CS.paramHasAttr(ArgNo, Attribute::NonNull))
+      return true;
+
+    Value *V = CS.getArgOperand(ArgNo);
+    if (isKnownNonZero(V, getAnchorScope().getParent()->getDataLayout()))
+      return true;
+
+    return false;
+  };
+  if (!A.checkForAllCallSites(F, CallSiteCheck, true)) {
+    indicatePessimisticFixpoint();
+    return ChangeStatus::CHANGED;
+  }
+  return ChangeStatus::UNCHANGED;
+}
+
+ChangeStatus AANonNullCallSiteArgument::updateImpl(Attributor &A) {
+  // NOTE: Never look at the argument of the callee in this method.
+  //       If we do this, "nonnull" is always deduced because of the assumption.
+
+  Value &V = *getAssociatedValue();
+
+  auto *NonNullAA = A.getAAFor<AANonNull>(*this, V);
+
+  if (!NonNullAA || !NonNullAA->isAssumedNonNull()) {
+    indicatePessimisticFixpoint();
+    return ChangeStatus::CHANGED;
+  }
+
+  return ChangeStatus::UNCHANGED;
+}
+
+/// ------------------------ Will-Return Attributes ----------------------------
+
+struct AAWillReturnImpl : public AAWillReturn, BooleanState {
+
+  /// See AbstractAttribute::AbstractAttribute(...).
+  AAWillReturnImpl(Function &F, InformationCache &InfoCache)
+      : AAWillReturn(F, InfoCache) {}
+
+  /// See AAWillReturn::isKnownWillReturn().
+  bool isKnownWillReturn() const override { return getKnown(); }
+
+  /// See AAWillReturn::isAssumedWillReturn().
+  bool isAssumedWillReturn() const override { return getAssumed(); }
+
+  /// See AbstractAttribute::getState(...).
+  AbstractState &getState() override { return *this; }
+
+  /// See AbstractAttribute::getState(...).
+  const AbstractState &getState() const override { return *this; }
+
+  /// See AbstractAttribute::getAsStr()
+  const std::string getAsStr() const override {
+    return getAssumed() ? "willreturn" : "may-noreturn";
+  }
+};
+
+struct AAWillReturnFunction final : AAWillReturnImpl {
+
+  /// See AbstractAttribute::AbstractAttribute(...).
+  AAWillReturnFunction(Function &F, InformationCache &InfoCache)
+      : AAWillReturnImpl(F, InfoCache) {}
+
+  /// See AbstractAttribute::getManifestPosition().
+  ManifestPosition getManifestPosition() const override {
+    return MP_FUNCTION;
+  }
+
+  /// See AbstractAttribute::initialize(...).
+  void initialize(Attributor &A) override;
+
+  /// See AbstractAttribute::updateImpl(...).
+  ChangeStatus updateImpl(Attributor &A) override;
+};
+
+// Helper function that checks whether a function has any cycle.
+// TODO: Replace with more efficent code
+bool containsCycle(Function &F) {
+  SmallPtrSet<BasicBlock *, 32> Visited;
+
+  // Traverse BB by dfs and check whether successor is already visited.
+  for (BasicBlock *BB : depth_first(&F)) {
+    Visited.insert(BB);
+    for (auto *SuccBB : successors(BB)) {
+      if (Visited.count(SuccBB))
+        return true;
+    }
+  }
+  return false;
+}
+
+// Helper function that checks the function have a loop which might become an
+// endless loop
+// FIXME: Any cycle is regarded as endless loop for now.
+//        We have to allow some patterns.
+bool containsPossiblyEndlessLoop(Function &F) { return containsCycle(F); }
+
+void AAWillReturnFunction::initialize(Attributor &A) {
+  Function &F = getAnchorScope();
+
+  if (containsPossiblyEndlessLoop(F))
+    indicatePessimisticFixpoint();
+}
+
+ChangeStatus AAWillReturnFunction::updateImpl(Attributor &A) {
+  Function &F = getAnchorScope();
+
+  // The map from instruction opcodes to those instructions in the function.
+  auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+
+  for (unsigned Opcode :
+       {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
+        (unsigned)Instruction::Call}) {
+    for (Instruction *I : OpcodeInstMap[Opcode]) {
+      auto ICS = ImmutableCallSite(I);
+
+      if (ICS.hasFnAttr(Attribute::WillReturn))
+        continue;
+
+      auto *WillReturnAA = A.getAAFor<AAWillReturn>(*this, *I);
+      if (!WillReturnAA || !WillReturnAA->isAssumedWillReturn()) {
+        indicatePessimisticFixpoint();
+        return ChangeStatus::CHANGED;
+      }
+
+      auto *NoRecurseAA = A.getAAFor<AANoRecurse>(*this, *I);
+
+      // FIXME: (i) Prohibit any recursion for now.
+      //        (ii) AANoRecurse isn't implemented yet so currently any call is
+      //        regarded as having recursion.
+      //       Code below should be
+      //       if ((!NoRecurseAA || !NoRecurseAA->isAssumedNoRecurse()) &&
+      if (!NoRecurseAA && !ICS.hasFnAttr(Attribute::NoRecurse)) {
+        indicatePessimisticFixpoint();
+        return ChangeStatus::CHANGED;
+      }
+    }
+  }
+
+  return ChangeStatus::UNCHANGED;
+}
+
+/// ----------------------------------------------------------------------------
+///                               Attributor
+/// ----------------------------------------------------------------------------
+
+bool Attributor::checkForAllCallSites(Function &F,
+                                      std::function<bool(CallSite)> &Pred,
+                                      bool RequireAllCallSites) {
+  // We can try to determine information from
+  // the call sites. However, this is only possible all call sites are known,
+  // hence the function has internal linkage.
+  if (RequireAllCallSites && !F.hasInternalLinkage()) {
+    LLVM_DEBUG(
+        dbgs()
+        << "Attributor: Function " << F.getName()
+        << " has no internal linkage, hence not all call sites are known\n");
+    return false;
+  }
+
+  for (const Use &U : F.uses()) {
+
+    CallSite CS(U.getUser());
+    if (!CS || !CS.isCallee(&U) || !CS.getCaller()->hasExactDefinition()) {
+      if (!RequireAllCallSites)
+        continue;
+
+      LLVM_DEBUG(dbgs() << "Attributor: User " << *U.getUser()
+                        << " is an invalid use of " << F.getName() << "\n");
+      return false;
+    }
+
+    if (Pred(CS))
+      continue;
+
+    LLVM_DEBUG(dbgs() << "Attributor: Call site callback failed for "
+                      << *CS.getInstruction() << "\n");
+    return false;
+  }
+
+  return true;
+}
+
+ChangeStatus Attributor::run() {
+  // Initialize all abstract attributes.
+  for (AbstractAttribute *AA : AllAbstractAttributes)
+    AA->initialize(*this);
+
+  LLVM_DEBUG(dbgs() << "[Attributor] Identified and initialized "
+                    << AllAbstractAttributes.size()
+                    << " abstract attributes.\n");
+
+  // Now that all abstract attributes are collected and initialized we start
+  // the abstract analysis.
+
+  unsigned IterationCounter = 1;
+
+  SmallVector<AbstractAttribute *, 64> ChangedAAs;
+  SetVector<AbstractAttribute *> Worklist;
+  Worklist.insert(AllAbstractAttributes.begin(), AllAbstractAttributes.end());
+
+  do {
+    LLVM_DEBUG(dbgs() << "\n\n[Attributor] #Iteration: " << IterationCounter
+                      << ", Worklist size: " << Worklist.size() << "\n");
+
+    // Add all abstract attributes that are potentially dependent on one that
+    // changed to the work list.
+    for (AbstractAttribute *ChangedAA : ChangedAAs) {
+      auto &QuerriedAAs = QueryMap[ChangedAA];
+      Worklist.insert(QuerriedAAs.begin(), QuerriedAAs.end());
+    }
+
+    // Reset the changed set.
+    ChangedAAs.clear();
+
+    // Update all abstract attribute in the work list and record the ones that
+    // changed.
+    for (AbstractAttribute *AA : Worklist)
+      if (AA->update(*this) == ChangeStatus::CHANGED)
+        ChangedAAs.push_back(AA);
+
+    // Reset the work list and repopulate with the changed abstract attributes.
+    // Note that dependent ones are added above.
+    Worklist.clear();
+    Worklist.insert(ChangedAAs.begin(), ChangedAAs.end());
+
+  } while (!Worklist.empty() && ++IterationCounter < MaxFixpointIterations);
+
+  LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: "
+                    << IterationCounter << "/" << MaxFixpointIterations
+                    << " iterations\n");
+
+  bool FinishedAtFixpoint = Worklist.empty();
+
+  // Reset abstract arguments not settled in a sound fixpoint by now. This
+  // happens when we stopped the fixpoint iteration early. Note that only the
+  // ones marked as "changed" *and* the ones transitively depending on them
+  // need to be reverted to a pessimistic state. Others might not be in a
+  // fixpoint state but we can use the optimistic results for them anyway.
+  SmallPtrSet<AbstractAttribute *, 32> Visited;
+  for (unsigned u = 0; u < ChangedAAs.size(); u++) {
+    AbstractAttribute *ChangedAA = ChangedAAs[u];
+    if (!Visited.insert(ChangedAA).second)
+      continue;
+
+    AbstractState &State = ChangedAA->getState();
+    if (!State.isAtFixpoint()) {
+      State.indicatePessimisticFixpoint();
+
+      NumAttributesTimedOut++;
+    }
+
+    auto &QuerriedAAs = QueryMap[ChangedAA];
+    ChangedAAs.append(QuerriedAAs.begin(), QuerriedAAs.end());
+  }
+
+  LLVM_DEBUG({
+    if (!Visited.empty())
+      dbgs() << "\n[Attributor] Finalized " << Visited.size()
+             << " abstract attributes.\n";
+  });
+
+  unsigned NumManifested = 0;
+  unsigned NumAtFixpoint = 0;
+  ChangeStatus ManifestChange = ChangeStatus::UNCHANGED;
+  for (AbstractAttribute *AA : AllAbstractAttributes) {
+    AbstractState &State = AA->getState();
+
+    // If there is not already a fixpoint reached, we can now take the
+    // optimistic state. This is correct because we enforced a pessimistic one
+    // on abstract attributes that were transitively dependent on a changed one
+    // already above.
+    if (!State.isAtFixpoint())
+      State.indicateOptimisticFixpoint();
+
+    // If the state is invalid, we do not try to manifest it.
+    if (!State.isValidState())
+      continue;
+
+    // Manifest the state and record if we changed the IR.
+    ChangeStatus LocalChange = AA->manifest(*this);
+    ManifestChange = ManifestChange | LocalChange;
+
+    NumAtFixpoint++;
+    NumManifested += (LocalChange == ChangeStatus::CHANGED);
+  }
+
+  (void)NumManifested;
+  (void)NumAtFixpoint;
+  LLVM_DEBUG(dbgs() << "\n[Attributor] Manifested " << NumManifested
+                    << " arguments while " << NumAtFixpoint
+                    << " were in a valid fixpoint state\n");
+
+  // If verification is requested, we finished this run at a fixpoint, and the
+  // IR was changed, we re-run the whole fixpoint analysis, starting at
+  // re-initialization of the arguments. This re-run should not result in an IR
+  // change. Though, the (virtual) state of attributes at the end of the re-run
+  // might be more optimistic than the known state or the IR state if the better
+  // state cannot be manifested.
+  if (VerifyAttributor && FinishedAtFixpoint &&
+      ManifestChange == ChangeStatus::CHANGED) {
+    VerifyAttributor = false;
+    ChangeStatus VerifyStatus = run();
+    if (VerifyStatus != ChangeStatus::UNCHANGED)
+      llvm_unreachable(
+          "Attributor verification failed, re-run did result in an IR change "
+          "even after a fixpoint was reached in the original run. (False "
+          "positives possible!)");
+    VerifyAttributor = true;
+  }
+
+  NumAttributesManifested += NumManifested;
+  NumAttributesValidFixpoint += NumAtFixpoint;
+
+  return ManifestChange;
+}
+
+void Attributor::identifyDefaultAbstractAttributes(
+    Function &F, InformationCache &InfoCache,
+    DenseSet</* Attribute::AttrKind */ unsigned> *Whitelist) {
+
+  // Every function can be nounwind.
+  registerAA(*new AANoUnwindFunction(F, InfoCache));
+
+  // Every function might be marked "nosync"
+  registerAA(*new AANoSyncFunction(F, InfoCache));
+
+  // Every function might be "no-free".
+  registerAA(*new AANoFreeFunction(F, InfoCache));
+
+  // Return attributes are only appropriate if the return type is non void.
+  Type *ReturnType = F.getReturnType();
+  if (!ReturnType->isVoidTy()) {
+    // Argument attribute "returned" --- Create only one per function even
+    // though it is an argument attribute.
+    if (!Whitelist || Whitelist->count(AAReturnedValues::ID))
+      registerAA(*new AAReturnedValuesImpl(F, InfoCache));
+
+    // Every function with pointer return type might be marked nonnull.
+    if (ReturnType->isPointerTy() &&
+        (!Whitelist || Whitelist->count(AANonNullReturned::ID)))
+      registerAA(*new AANonNullReturned(F, InfoCache));
+  }
+
+  // Every argument with pointer type might be marked nonnull.
+  for (Argument &Arg : F.args()) {
+    if (Arg.getType()->isPointerTy())
+      registerAA(*new AANonNullArgument(Arg, InfoCache));
+  }
+
+  // Every function might be "will-return".
+  registerAA(*new AAWillReturnFunction(F, InfoCache));
+
+  // Walk all instructions to find more attribute opportunities and also
+  // interesting instructions that might be queried by abstract attributes
+  // during their initialization or update.
+  auto &ReadOrWriteInsts = InfoCache.FuncRWInstsMap[&F];
+  auto &InstOpcodeMap = InfoCache.FuncInstOpcodeMap[&F];
+
+  for (Instruction &I : instructions(&F)) {
+    bool IsInterestingOpcode = false;
+
+    // To allow easy access to all instructions in a function with a given
+    // opcode we store them in the InfoCache. As not all opcodes are interesting
+    // to concrete attributes we only cache the ones that are as identified in
+    // the following switch.
+    // Note: There are no concrete attributes now so this is initially empty.
+    switch (I.getOpcode()) {
+    default:
+      assert((!ImmutableCallSite(&I)) && (!isa<CallBase>(&I)) &&
+             "New call site/base instruction type needs to be known int the "
+             "attributor.");
+      break;
+    case Instruction::Call:
+    case Instruction::CallBr:
+    case Instruction::Invoke:
+    case Instruction::CleanupRet:
+    case Instruction::CatchSwitch:
+    case Instruction::Resume:
+    case Instruction::Ret:
+      IsInterestingOpcode = true;
+    }
+    if (IsInterestingOpcode)
+      InstOpcodeMap[I.getOpcode()].push_back(&I);
+    if (I.mayReadOrWriteMemory())
+      ReadOrWriteInsts.push_back(&I);
+
+    CallSite CS(&I);
+    if (CS && CS.getCalledFunction()) {
+      for (int i = 0, e = CS.getCalledFunction()->arg_size(); i < e; i++) {
+        if (!CS.getArgument(i)->getType()->isPointerTy())
+          continue;
+
+        // Call site argument attribute "non-null".
+        registerAA(*new AANonNullCallSiteArgument(CS, i, InfoCache), i);
+      }
+    }
+  }
+}
+
+/// Helpers to ease debugging through output streams and print calls.
+///
+///{
+raw_ostream &llvm::operator<<(raw_ostream &OS, ChangeStatus S) {
+  return OS << (S == ChangeStatus::CHANGED ? "changed" : "unchanged");
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS,
+                              AbstractAttribute::ManifestPosition AP) {
+  switch (AP) {
+  case AbstractAttribute::MP_ARGUMENT:
+    return OS << "arg";
+  case AbstractAttribute::MP_CALL_SITE_ARGUMENT:
+    return OS << "cs_arg";
+  case AbstractAttribute::MP_FUNCTION:
+    return OS << "fn";
+  case AbstractAttribute::MP_RETURNED:
+    return OS << "fn_ret";
+  }
+  llvm_unreachable("Unknown attribute position!");
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractState &S) {
+  return OS << (!S.isValidState() ? "top" : (S.isAtFixpoint() ? "fix" : ""));
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractAttribute &AA) {
+  AA.print(OS);
+  return OS;
+}
+
+void AbstractAttribute::print(raw_ostream &OS) const {
+  OS << "[" << getManifestPosition() << "][" << getAsStr() << "]["
+     << AnchoredVal.getName() << "]";
+}
+///}
+
+/// ----------------------------------------------------------------------------
+///                       Pass (Manager) Boilerplate
+/// ----------------------------------------------------------------------------
+
+static bool runAttributorOnModule(Module &M) {
+  if (DisableAttributor)
+    return false;
+
+  LLVM_DEBUG(dbgs() << "[Attributor] Run on module with " << M.size()
+                    << " functions.\n");
+
+  // Create an Attributor and initially empty information cache that is filled
+  // while we identify default attribute opportunities.
+  Attributor A;
+  InformationCache InfoCache;
+
+  for (Function &F : M) {
+    // TODO: Not all attributes require an exact definition. Find a way to
+    //       enable deduction for some but not all attributes in case the
+    //       definition might be changed at runtime, see also
+    //       http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html.
+    // TODO: We could always determine abstract attributes and if sufficient
+    //       information was found we could duplicate the functions that do not
+    //       have an exact definition.
+    if (!F.hasExactDefinition()) {
+      NumFnWithoutExactDefinition++;
+      continue;
+    }
+
+    // For now we ignore naked and optnone functions.
+    if (F.hasFnAttribute(Attribute::Naked) ||
+        F.hasFnAttribute(Attribute::OptimizeNone))
+      continue;
+
+    NumFnWithExactDefinition++;
+
+    // Populate the Attributor with abstract attribute opportunities in the
+    // function and the information cache with IR information.
+    A.identifyDefaultAbstractAttributes(F, InfoCache);
+  }
+
+  return A.run() == ChangeStatus::CHANGED;
+}
+
+PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) {
+  if (runAttributorOnModule(M)) {
+    // FIXME: Think about passes we will preserve and add them here.
+    return PreservedAnalyses::none();
+  }
+  return PreservedAnalyses::all();
+}
+
+namespace {
+
+struct AttributorLegacyPass : public ModulePass {
+  static char ID;
+
+  AttributorLegacyPass() : ModulePass(ID) {
+    initializeAttributorLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M) override {
+    if (skipModule(M))
+      return false;
+    return runAttributorOnModule(M);
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    // FIXME: Think about passes we will preserve and add them here.
+    AU.setPreservesCFG();
+  }
+};
+
+} // end anonymous namespace
+
+Pass *llvm::createAttributorLegacyPass() { return new AttributorLegacyPass(); }
+
+char AttributorLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(AttributorLegacyPass, "attributor",
+                      "Deduce and propagate attributes", false, false)
+INITIALIZE_PASS_END(AttributorLegacyPass, "attributor",
+                    "Deduce and propagate attributes", false, false)
diff --git a/lib/Transforms/IPO/BarrierNoopPass.cpp b/lib/Transforms/IPO/BarrierNoopPass.cpp
index 05fc3dd6950c..6b68aa90c567 100644
--- a/lib/Transforms/IPO/BarrierNoopPass.cpp
+++ b/lib/Transforms/IPO/BarrierNoopPass.cpp
@@ -1,9 +1,8 @@
 //===- BarrierNoopPass.cpp - A barrier pass for the pass manager ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/IPO/BlockExtractor.cpp b/lib/Transforms/IPO/BlockExtractor.cpp
index ff5ee817da49..6c365f3f3cbe 100644
--- a/lib/Transforms/IPO/BlockExtractor.cpp
+++ b/lib/Transforms/IPO/BlockExtractor.cpp
@@ -1,9 +1,8 @@
 //===- BlockExtractor.cpp - Extracts blocks into their own functions ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -23,6 +22,7 @@
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/CodeExtractor.h"
+
 using namespace llvm;
 
 #define DEBUG_TYPE "block-extractor"
@@ -36,22 +36,48 @@ static cl::opt<std::string> BlockExtractorFile(
 cl::opt<bool> BlockExtractorEraseFuncs("extract-blocks-erase-funcs",
                                        cl::desc("Erase the existing functions"),
                                        cl::Hidden);
-
 namespace {
 class BlockExtractor : public ModulePass {
-  SmallVector<BasicBlock *, 16> Blocks;
+  SmallVector<SmallVector<BasicBlock *, 16>, 4> GroupsOfBlocks;
   bool EraseFunctions;
-  SmallVector<std::pair<std::string, std::string>, 32> BlocksByName;
+  /// Map a function name to groups of blocks.
+  SmallVector<std::pair<std::string, SmallVector<std::string, 4>>, 4>
+      BlocksByName;
+
+  void init(const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
+                &GroupsOfBlocksToExtract) {
+    for (const SmallVectorImpl<BasicBlock *> &GroupOfBlocks :
+         GroupsOfBlocksToExtract) {
+      SmallVector<BasicBlock *, 16> NewGroup;
+      NewGroup.append(GroupOfBlocks.begin(), GroupOfBlocks.end());
+      GroupsOfBlocks.emplace_back(NewGroup);
+    }
+    if (!BlockExtractorFile.empty())
+      loadFile();
+  }
 
 public:
   static char ID;
   BlockExtractor(const SmallVectorImpl<BasicBlock *> &BlocksToExtract,
                  bool EraseFunctions)
-      : ModulePass(ID), Blocks(BlocksToExtract.begin(), BlocksToExtract.end()),
-        EraseFunctions(EraseFunctions) {
-    if (!BlockExtractorFile.empty())
-      loadFile();
+      : ModulePass(ID), EraseFunctions(EraseFunctions) {
+    // We want one group per element of the input list.
+    SmallVector<SmallVector<BasicBlock *, 16>, 4> MassagedGroupsOfBlocks;
+    for (BasicBlock *BB : BlocksToExtract) {
+      SmallVector<BasicBlock *, 16> NewGroup;
+      NewGroup.push_back(BB);
+      MassagedGroupsOfBlocks.push_back(NewGroup);
+    }
+    init(MassagedGroupsOfBlocks);
   }
+
+  BlockExtractor(const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
+                     &GroupsOfBlocksToExtract,
+                 bool EraseFunctions)
+      : ModulePass(ID), EraseFunctions(EraseFunctions) {
+    init(GroupsOfBlocksToExtract);
+  }
+
   BlockExtractor() : BlockExtractor(SmallVector<BasicBlock *, 0>(), false) {}
   bool runOnModule(Module &M) override;
 
@@ -70,6 +96,12 @@ ModulePass *llvm::createBlockExtractorPass(
     const SmallVectorImpl<BasicBlock *> &BlocksToExtract, bool EraseFunctions) {
   return new BlockExtractor(BlocksToExtract, EraseFunctions);
 }
+ModulePass *llvm::createBlockExtractorPass(
+    const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
+        &GroupsOfBlocksToExtract,
+    bool EraseFunctions) {
+  return new BlockExtractor(GroupsOfBlocksToExtract, EraseFunctions);
+}
 
 /// Gets all of the blocks specified in the input file.
 void BlockExtractor::loadFile() {
@@ -82,8 +114,17 @@ void BlockExtractor::loadFile() {
   Buf->getBuffer().split(Lines, '\n', /*MaxSplit=*/-1,
                          /*KeepEmpty=*/false);
   for (const auto &Line : Lines) {
-    auto FBPair = Line.split(' ');
-    BlocksByName.push_back({FBPair.first, FBPair.second});
+    SmallVector<StringRef, 4> LineSplit;
+    Line.split(LineSplit, ' ', /*MaxSplit=*/-1,
+               /*KeepEmpty=*/false);
+    if (LineSplit.empty())
+      continue;
+    SmallVector<StringRef, 4> BBNames;
+    LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1,
+                       /*KeepEmpty=*/false);
+    if (BBNames.empty())
+      report_fatal_error("Missing bbs name");
+    BlocksByName.push_back({LineSplit[0], {BBNames.begin(), BBNames.end()}});
   }
 }
 
@@ -130,33 +171,46 @@ bool BlockExtractor::runOnModule(Module &M) {
   }
 
   // Get all the blocks specified in the input file.
+  unsigned NextGroupIdx = GroupsOfBlocks.size();
+  GroupsOfBlocks.resize(NextGroupIdx + BlocksByName.size());
   for (const auto &BInfo : BlocksByName) {
     Function *F = M.getFunction(BInfo.first);
     if (!F)
       report_fatal_error("Invalid function name specified in the input file");
-    auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) {
-      return BB.getName().equals(BInfo.second);
-    });
-    if (Res == F->end())
-      report_fatal_error("Invalid block name specified in the input file");
-    Blocks.push_back(&*Res);
+    for (const auto &BBInfo : BInfo.second) {
+      auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) {
+        return BB.getName().equals(BBInfo);
+      });
+      if (Res == F->end())
+        report_fatal_error("Invalid block name specified in the input file");
+      GroupsOfBlocks[NextGroupIdx].push_back(&*Res);
+    }
+    ++NextGroupIdx;
   }
 
-  // Extract basic blocks.
-  for (BasicBlock *BB : Blocks) {
-    // Check if the module contains BB.
-    if (BB->getParent()->getParent() != &M)
-      report_fatal_error("Invalid basic block");
-    LLVM_DEBUG(dbgs() << "BlockExtractor: Extracting "
-                      << BB->getParent()->getName() << ":" << BB->getName()
-                      << "\n");
-    SmallVector<BasicBlock *, 2> BlocksToExtractVec;
-    BlocksToExtractVec.push_back(BB);
-    if (const InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
-      BlocksToExtractVec.push_back(II->getUnwindDest());
-    CodeExtractor(BlocksToExtractVec).extractCodeRegion();
-    ++NumExtracted;
-    Changed = true;
+  // Extract each group of basic blocks.
+  for (auto &BBs : GroupsOfBlocks) {
+    SmallVector<BasicBlock *, 32> BlocksToExtractVec;
+    for (BasicBlock *BB : BBs) {
+      // Check if the module contains BB.
+      if (BB->getParent()->getParent() != &M)
+        report_fatal_error("Invalid basic block");
+      LLVM_DEBUG(dbgs() << "BlockExtractor: Extracting "
+                        << BB->getParent()->getName() << ":" << BB->getName()
+                        << "\n");
+      BlocksToExtractVec.push_back(BB);
+      if (const InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
+        BlocksToExtractVec.push_back(II->getUnwindDest());
+      ++NumExtracted;
+      Changed = true;
+    }
+    Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion();
+    if (F)
+      LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName()
+                        << "' in: " << F->getName() << '\n');
+    else
+      LLVM_DEBUG(dbgs() << "Failed to extract for group '"
+                        << (*BBs.begin())->getName() << "'\n");
   }
 
   // Erase the functions.
diff --git a/lib/Transforms/IPO/CalledValuePropagation.cpp b/lib/Transforms/IPO/CalledValuePropagation.cpp
index de62cfc0c1db..20cb3213628e 100644
--- a/lib/Transforms/IPO/CalledValuePropagation.cpp
+++ b/lib/Transforms/IPO/CalledValuePropagation.cpp
@@ -1,9 +1,8 @@
 //===- CalledValuePropagation.cpp - Propagate called values -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 81f3634eaf28..ad877ae1786c 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -1,9 +1,8 @@
 //===- ConstantMerge.cpp - Merge duplicate global constants ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -91,6 +90,16 @@ static unsigned getAlignment(GlobalVariable *GV) {
   return GV->getParent()->getDataLayout().getPreferredAlignment(GV);
 }
 
+static bool
+isUnmergeableGlobal(GlobalVariable *GV,
+                    const SmallPtrSetImpl<const GlobalValue *> &UsedGlobals) {
+  // Only process constants with initializers in the default address space.
+  return !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+         GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
+         // Don't touch values marked with attribute(used).
+         UsedGlobals.count(GV);
+}
+
 enum class CanMerge { No, Yes };
 static CanMerge makeMergeable(GlobalVariable *Old, GlobalVariable *New) {
   if (!Old->hasGlobalUnnamedAddr() && !New->hasGlobalUnnamedAddr())
@@ -155,11 +164,7 @@ static bool mergeConstants(Module &M) {
         continue;
       }
 
-      // Only process constants with initializers in the default address space.
-      if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
-          GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
-          // Don't touch values marked with attribute(used).
-          UsedGlobals.count(GV))
+      if (isUnmergeableGlobal(GV, UsedGlobals))
         continue;
 
       // This transformation is legal for weak ODR globals in the sense it
@@ -197,11 +202,7 @@ static bool mergeConstants(Module &M) {
          GVI != E; ) {
       GlobalVariable *GV = &*GVI++;
 
-      // Only process constants with initializers in the default address space.
-      if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
-          GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
-          // Don't touch values marked with attribute(used).
-          UsedGlobals.count(GV))
+      if (isUnmergeableGlobal(GV, UsedGlobals))
         continue;
 
       // We can only replace constant with local linkage.
diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp
index 666f6cc37bfd..e30b33aa4872 100644
--- a/lib/Transforms/IPO/CrossDSOCFI.cpp
+++ b/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -1,9 +1,8 @@
 //===-- CrossDSOCFI.cpp - Externalize this module's CFI checks ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -106,10 +105,10 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
   }
 
   LLVMContext &Ctx = M.getContext();
-  Constant *C = M.getOrInsertFunction(
+  FunctionCallee C = M.getOrInsertFunction(
       "__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx),
       Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx));
-  Function *F = dyn_cast<Function>(C);
+  Function *F = dyn_cast<Function>(C.getCallee());
   // Take over the existing function. The frontend emits a weak stub so that the
   // linker knows about the symbol; this pass replaces the function body.
   F->deleteBody();
@@ -133,9 +132,9 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
 
   BasicBlock *TrapBB = BasicBlock::Create(Ctx, "fail", F);
   IRBuilder<> IRBFail(TrapBB);
-  Constant *CFICheckFailFn = M.getOrInsertFunction(
-      "__cfi_check_fail", Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx),
-      Type::getInt8PtrTy(Ctx));
+  FunctionCallee CFICheckFailFn =
+      M.getOrInsertFunction("__cfi_check_fail", Type::getVoidTy(Ctx),
+                            Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx));
   IRBFail.CreateCall(CFICheckFailFn, {&CFICheckFailData, &Addr});
   IRBFail.CreateBr(ExitBB);
 
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index cb30e8f46a54..968a13110b16 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -1,9 +1,8 @@
 //===- DeadArgumentElimination.cpp - Eliminate dead arguments -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -939,7 +938,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
       NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
                                  Args, OpBundles, "", Call->getParent());
     } else {
-      NewCS = CallInst::Create(NF, Args, OpBundles, "", Call);
+      NewCS = CallInst::Create(NFTy, NF, Args, OpBundles, "", Call);
       cast<CallInst>(NewCS.getInstruction())
           ->setTailCallKind(cast<CallInst>(Call)->getTailCallKind());
     }
diff --git a/lib/Transforms/IPO/ElimAvailExtern.cpp b/lib/Transforms/IPO/ElimAvailExtern.cpp
index d5fef59286dd..fc52db562c62 100644
--- a/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -1,9 +1,8 @@
 //===- ElimAvailExtern.cpp - DCE unreachable internal functions -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index a744d7f2d2d9..f77b528fc42d 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -1,9 +1,8 @@
 //===-- ExtractGV.cpp - Global Value extraction pass ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index 4dc1529ddbf5..b38cb6d0ed3f 100644
--- a/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -1,9 +1,8 @@
 //===- ForceFunctionAttrs.cpp - Force function attrs for debugging --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -58,6 +57,7 @@ static Attribute::AttrKind parseAttrKind(StringRef Kind) {
       .Case("sanitize_hwaddress", Attribute::SanitizeHWAddress)
       .Case("sanitize_memory", Attribute::SanitizeMemory)
       .Case("sanitize_thread", Attribute::SanitizeThread)
+      .Case("sanitize_memtag", Attribute::SanitizeMemTag)
       .Case("speculative_load_hardening", Attribute::SpeculativeLoadHardening)
       .Case("ssp", Attribute::StackProtect)
       .Case("sspreq", Attribute::StackProtectReq)
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 4e2a82b56eec..5ccd8bc4b0fb 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -1,9 +1,8 @@
 //===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -28,6 +27,7 @@
 #include "llvm/Analysis/CallGraphSCCPass.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Argument.h"
@@ -76,6 +76,7 @@ STATISTIC(NumNoAlias, "Number of function returns marked noalias");
 STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
 STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
 STATISTIC(NumNoUnwind, "Number of functions marked as nounwind");
+STATISTIC(NumNoFree, "Number of functions marked as nofree");
 
 // FIXME: This is disabled by default to avoid exposing security vulnerabilities
 // in C/C++ code compiled by clang:
@@ -89,6 +90,10 @@ static cl::opt<bool> DisableNoUnwindInference(
     "disable-nounwind-inference", cl::Hidden,
     cl::desc("Stop inferring nounwind attribute during function-attrs pass"));
 
+static cl::opt<bool> DisableNoFreeInference(
+    "disable-nofree-inference", cl::Hidden,
+    cl::desc("Stop inferring nofree attribute during function-attrs pass"));
+
 namespace {
 
 using SCCNodeSet = SmallSetVector<Function *, 8>;
@@ -256,12 +261,15 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
     }
   }
 
+  // If the SCC contains both functions that read and functions that write, then
+  // we cannot add readonly attributes.
+  if (ReadsMemory && WritesMemory)
+    return false;
+
   // Success!  Functions in this SCC do not access memory, or only read memory.
   // Give them the appropriate attribute.
   bool MadeChange = false;
 
-  assert(!(ReadsMemory && WritesMemory) &&
-          "Function marked read-only and write-only");
   for (Function *F : SCCNodes) {
     if (F->doesNotAccessMemory())
       // Already perfect!
@@ -1228,6 +1236,25 @@ static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) {
   return true;
 }
 
+/// Helper for NoFree inference predicate InstrBreaksAttribute.
+static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) {
+  CallSite CS(&I);
+  if (!CS)
+    return false;
+
+  Function *Callee = CS.getCalledFunction();
+  if (!Callee)
+    return true;
+
+  if (Callee->doesNotFreeMemory())
+    return false;
+
+  if (SCCNodes.count(Callee) > 0)
+    return false;
+
+  return true;
+}
+
 /// Infer attributes from all functions in the SCC by scanning every
 /// instruction for compliance to the attribute assumptions. Currently it
 /// does:
@@ -1281,6 +1308,29 @@ static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) {
         },
         /* RequiresExactDefinition= */ true});
 
+  if (!DisableNoFreeInference)
+    // Request to infer nofree attribute for all the functions in the SCC if
+    // every callsite within the SCC does not directly or indirectly free
+    // memory (except for calls to functions within the SCC). Note that nofree
+    // attribute suffers from derefinement - results may change depending on
+    // how functions are optimized. Thus it can be inferred only from exact
+    // definitions.
+    AI.registerAttrInference(AttributeInferer::InferenceDescriptor{
+        Attribute::NoFree,
+        // Skip functions known not to free memory.
+        [](const Function &F) { return F.doesNotFreeMemory(); },
+        // Instructions that break non-deallocating assumption.
+        [SCCNodes](Instruction &I) {
+          return InstrBreaksNoFree(I, SCCNodes);
+        },
+        [](Function &F) {
+          LLVM_DEBUG(dbgs()
+                     << "Adding nofree attr to fn " << F.getName() << "\n");
+          F.setDoesNotFreeMemory();
+          ++NumNoFree;
+        },
+        /* RequiresExactDefinition= */ true});
+
   // Perform all the requested attribute inference actions.
   return AI.run(SCCNodes);
 }
@@ -1301,7 +1351,7 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
     return false;
 
   Function *F = *SCCNodes.begin();
-  if (!F || F->isDeclaration() || F->doesNotRecurse())
+  if (!F || !F->hasExactDefinition() || F->doesNotRecurse())
     return false;
 
   // If all of the calls in F are identifiable and are to norecurse functions, F
@@ -1323,7 +1373,8 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
 }
 
 template <typename AARGetterT>
-static bool deriveAttrsInPostOrder(SCCNodeSet &SCCNodes, AARGetterT &&AARGetter,
+static bool deriveAttrsInPostOrder(SCCNodeSet &SCCNodes,
+                                   AARGetterT &&AARGetter,
                                    bool HasUnknownCall) {
   bool Changed = false;
 
@@ -1367,8 +1418,7 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
   bool HasUnknownCall = false;
   for (LazyCallGraph::Node &N : C) {
     Function &F = N.getFunction();
-    if (F.hasFnAttribute(Attribute::OptimizeNone) ||
-        F.hasFnAttribute(Attribute::Naked)) {
+    if (F.hasOptNone() || F.hasFnAttribute(Attribute::Naked)) {
       // Treat any function we're trying not to optimize as if it were an
       // indirect call and omit it from the node set used below.
       HasUnknownCall = true;
@@ -1441,8 +1491,7 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
   bool ExternalNode = false;
   for (CallGraphNode *I : SCC) {
     Function *F = I->getFunction();
-    if (!F || F->hasFnAttribute(Attribute::OptimizeNone) ||
-        F->hasFnAttribute(Attribute::Naked)) {
+    if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked)) {
       // External node or function we're trying not to optimize - we both avoid
       // transform them and avoid leveraging information they provide.
       ExternalNode = true;
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index 1223a23512ed..62c7fbd07223 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -1,9 +1,8 @@
 //===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -778,9 +777,7 @@ void llvm::computeDeadSymbols(
     if (!VI)
       return;
 
-    // We need to make sure all variants of the symbol are scanned, alias can
-    // make one (but not all) alive.
-    if (llvm::all_of(VI.getSummaryList(),
+    if (llvm::any_of(VI.getSummaryList(),
                      [](const std::unique_ptr<llvm::GlobalValueSummary> &S) {
                        return S->isLive();
                      }))
@@ -820,12 +817,18 @@ void llvm::computeDeadSymbols(
   while (!Worklist.empty()) {
     auto VI = Worklist.pop_back_val();
     for (auto &Summary : VI.getSummaryList()) {
-      GlobalValueSummary *Base = Summary->getBaseObject();
-      // Set base value live in case it is an alias.
-      Base->setLive(true);
-      for (auto Ref : Base->refs())
+      if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
+        // If this is an alias, visit the aliasee VI to ensure that all copies
+        // are marked live and it is added to the worklist for further
+        // processing of its references.
+        visit(AS->getAliaseeVI());
+        continue;
+      }
+
+      Summary->setLive(true);
+      for (auto Ref : Summary->refs())
         visit(Ref);
-      if (auto *FS = dyn_cast<FunctionSummary>(Base))
+      if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
         for (auto Call : FS->calls())
           visit(Call.first);
     }
@@ -847,14 +850,16 @@ void llvm::computeDeadSymbolsWithConstProp(
     bool ImportEnabled) {
   computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing);
   if (ImportEnabled) {
-    Index.propagateConstants(GUIDPreservedSymbols);
+    Index.propagateAttributes(GUIDPreservedSymbols);
   } else {
-    // If import is disabled we should drop read-only attribute
+    // If import is disabled we should drop read/write-only attribute
     // from all summaries to prevent internalization.
     for (auto &P : Index)
       for (auto &S : P.second.SummaryList)
-        if (auto *GVS = dyn_cast<GlobalVarSummary>(S.get()))
+        if (auto *GVS = dyn_cast<GlobalVarSummary>(S.get())) {
           GVS->setReadOnly(false);
+          GVS->setWriteOnly(false);
+        }
   }
 }
 
@@ -973,12 +978,15 @@ void llvm::thinLTOResolvePrevailingInModule(
         // changed to enable this for aliases.
         llvm_unreachable("Expected GV to be converted");
     } else {
-      // If the original symbols has global unnamed addr and linkonce_odr linkage,
-      // it should be an auto hide symbol. Add hidden visibility to the symbol to
-      // preserve the property.
-      if (GV.hasLinkOnceODRLinkage() && GV.hasGlobalUnnamedAddr() &&
-          NewLinkage == GlobalValue::WeakODRLinkage)
+      // If all copies of the original symbol had global unnamed addr and
+      // linkonce_odr linkage, it should be an auto hide symbol. In that case
+      // the thin link would have marked it as CanAutoHide. Add hidden visibility
+      // to the symbol to preserve the property.
+      if (NewLinkage == GlobalValue::WeakODRLinkage &&
+          GS->second->canAutoHide()) {
+        assert(GV.hasLinkOnceODRLinkage() && GV.hasGlobalUnnamedAddr());
         GV.setVisibility(GlobalValue::HiddenVisibility);
+      }
 
       LLVM_DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName()
                         << "` from " << GV.getLinkage() << " to " << NewLinkage
@@ -1047,9 +1055,10 @@ static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
 
   ValueToValueMapTy VMap;
   Function *NewFn = CloneFunction(Fn, VMap);
-  // Clone should use the original alias's linkage and name, and we ensure
-  // all uses of alias instead use the new clone (casted if necessary).
+  // Clone should use the original alias's linkage, visibility and name, and we
+  // ensure all uses of alias instead use the new clone (casted if necessary).
   NewFn->setLinkage(GA->getLinkage());
+  NewFn->setVisibility(GA->getVisibility());
   GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewFn, GA->getType()));
   NewFn->takeName(GA);
   return NewFn;
@@ -1057,7 +1066,7 @@ static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
 
 // Internalize values that we marked with specific attribute
 // in processGlobalForThinLTO.
-static void internalizeImmutableGVs(Module &M) {
+static void internalizeGVsAfterImport(Module &M) {
   for (auto &GV : M.globals())
     // Skip GVs which have been converted to declarations
     // by dropDeadSymbols.
@@ -1190,7 +1199,7 @@ Expected<bool> FunctionImporter::importFunctions(
     NumImportedModules++;
   }
 
-  internalizeImmutableGVs(DestModule);
+  internalizeGVsAfterImport(DestModule);
 
   NumImportedFunctions += (ImportedCount - ImportedGVCount);
   NumImportedGlobalVars += ImportedGVCount;
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 34de87433367..86b7f3e49ee6 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -1,9 +1,8 @@
 //===-- GlobalDCE.cpp - DCE unreachable internal functions ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 3005aafd06b1..c4fb3ce77f6e 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1,9 +1,8 @@
 //===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -730,7 +729,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
           break;
       if (Idxs.size() == GEPI->getNumOperands()-1)
         Changed |= OptimizeAwayTrappingUsesOfValue(
-            GEPI, ConstantExpr::getGetElementPtr(nullptr, NewV, Idxs));
+            GEPI, ConstantExpr::getGetElementPtr(GEPI->getSourceElementType(),
+                                                 NewV, Idxs));
       if (GEPI->use_empty()) {
         Changed = true;
         GEPI->eraseFromParent();
@@ -906,9 +906,10 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
 
       // Replace the cmp X, 0 with a use of the bool value.
       // Sink the load to where the compare was, if atomic rules allow us to.
-      Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0,
+      Value *LV = new LoadInst(InitBool->getValueType(), InitBool,
+                               InitBool->getName() + ".val", false, 0,
                                LI->getOrdering(), LI->getSyncScopeID(),
-                               LI->isUnordered() ? (Instruction*)ICI : LI);
+                               LI->isUnordered() ? (Instruction *)ICI : LI);
       InitBoolUsed = true;
       switch (ICI->getPredicate()) {
       default: llvm_unreachable("Unknown ICmp Predicate!");
@@ -1041,7 +1042,8 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
     }
 
     // Insert a load from the global, and use it instead of the malloc.
-    Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
+    Value *NL =
+        new LoadInst(GV->getValueType(), GV, GV->getName() + ".val", InsertPt);
     U->replaceUsesOfWith(Alloc, NL);
   }
 }
@@ -1164,10 +1166,10 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
   if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
     // This is a scalarized version of the load from the global.  Just create
     // a new Load of the scalarized global.
-    Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
-                                           InsertedScalarizedValues,
-                                           PHIsToRewrite),
-                          LI->getName()+".f"+Twine(FieldNo), LI);
+    Value *V = GetHeapSROAValue(LI->getOperand(0), FieldNo,
+                                InsertedScalarizedValues, PHIsToRewrite);
+    Result = new LoadInst(V->getType()->getPointerElementType(), V,
+                          LI->getName() + ".f" + Twine(FieldNo), LI);
   } else {
     PHINode *PN = cast<PHINode>(V);
     // PN's type is pointer to struct.  Make a new PHI of pointer to struct
@@ -1357,7 +1359,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
   // Within the NullPtrBlock, we need to emit a comparison and branch for each
   // pointer, because some may be null while others are not.
   for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
-    Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
+    Value *GVVal =
+        new LoadInst(cast<GlobalVariable>(FieldGlobals[i])->getValueType(),
+                     FieldGlobals[i], "tmp", NullPtrBlock);
     Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
                               Constant::getNullValue(GVVal->getType()));
     BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
@@ -1650,6 +1654,9 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
       for(auto *GVe : GVs){
         DIGlobalVariable *DGV = GVe->getVariable();
         DIExpression *E = GVe->getExpression();
+        const DataLayout &DL = GV->getParent()->getDataLayout();
+        unsigned SizeInOctets =
+          DL.getTypeAllocSizeInBits(NewGV->getType()->getElementType()) / 8;
 
         // It is expected that the address of global optimized variable is on
         // top of the stack. After optimization, value of that variable will
@@ -1660,10 +1667,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
         // DW_OP_deref DW_OP_constu <ValMinus>
         // DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value
         SmallVector<uint64_t, 12> Ops = {
-            dwarf::DW_OP_deref, dwarf::DW_OP_constu, ValMinus,
-            dwarf::DW_OP_mul,   dwarf::DW_OP_constu, ValInit,
+            dwarf::DW_OP_deref_size, SizeInOctets,
+            dwarf::DW_OP_constu, ValMinus,
+            dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit,
             dwarf::DW_OP_plus};
-        E = DIExpression::prependOpcodes(E, Ops, DIExpression::WithStackValue);
+        bool WithStackValue = true;
+        E = DIExpression::prependOpcodes(E, Ops, WithStackValue);
         DIGlobalVariableExpression *DGVE =
           DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E);
         NewGV->addDebugInfo(DGVE);
@@ -1701,7 +1710,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
         if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
           assert(LI->getOperand(0) == GV && "Not a copy!");
           // Insert a new load, to preserve the saved value.
-          StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0,
+          StoreVal = new LoadInst(NewGV->getValueType(), NewGV,
+                                  LI->getName() + ".b", false, 0,
                                   LI->getOrdering(), LI->getSyncScopeID(), LI);
         } else {
           assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
@@ -1717,8 +1727,9 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
     } else {
       // Change the load into a load of bool then a select.
       LoadInst *LI = cast<LoadInst>(UI);
-      LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
-                                   LI->getOrdering(), LI->getSyncScopeID(), LI);
+      LoadInst *NLI =
+          new LoadInst(NewGV->getValueType(), NewGV, LI->getName() + ".b",
+                       false, 0, LI->getOrdering(), LI->getSyncScopeID(), LI);
       Instruction *NSI;
       if (IsOneZero)
         NSI = new ZExtInst(NLI, LI->getType(), "", LI);
@@ -1970,7 +1981,12 @@ static bool processInternalGlobal(
   }
   if (GS.StoredType <= GlobalStatus::InitializerStored) {
     LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
-    GV->setConstant(true);
+
+    // Don't actually mark a global constant if it's atomic because atomic loads
+    // are implemented by a trivial cmpxchg in some edge-cases and that usually
+    // requires write access to the variable even if it's not actually changed.
+    if (GS.Ordering == AtomicOrdering::NotAtomic)
+      GV->setConstant(true);
 
     // Clean up any obviously simplifiable users now.
     CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
@@ -2084,21 +2100,21 @@ static void ChangeCalleesToFastCall(Function *F) {
   }
 }
 
-static AttributeList StripNest(LLVMContext &C, AttributeList Attrs) {
-  // There can be at most one attribute set with a nest attribute.
-  unsigned NestIndex;
-  if (Attrs.hasAttrSomewhere(Attribute::Nest, &NestIndex))
-    return Attrs.removeAttribute(C, NestIndex, Attribute::Nest);
+static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
+                               Attribute::AttrKind A) {
+  unsigned AttrIndex;
+  if (Attrs.hasAttrSomewhere(A, &AttrIndex))
+    return Attrs.removeAttribute(C, AttrIndex, A);
   return Attrs;
 }
 
-static void RemoveNestAttribute(Function *F) {
-  F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
+static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
+  F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A));
   for (User *U : F->users()) {
     if (isa<BlockAddress>(U))
       continue;
     CallSite CS(cast<Instruction>(U));
-    CS.setAttributes(StripNest(F->getContext(), CS.getAttributes()));
+    CS.setAttributes(StripAttr(F->getContext(), CS.getAttributes(), A));
   }
 }
 
@@ -2113,13 +2129,6 @@ static bool hasChangeableCC(Function *F) {
   if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
     return false;
 
-  // Don't break the invariant that the inalloca parameter is the only parameter
-  // passed in memory.
-  // FIXME: GlobalOpt should remove inalloca when possible and hoist the dynamic
-  // alloca it uses to the entry block if possible.
-  if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
-    return false;
-
   // FIXME: Change CC for the whole chain of musttail calls when possible.
   //
   // Can't change CC of the function that either has musttail calls, or is a
@@ -2281,6 +2290,17 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
     if (!F->hasLocalLinkage())
       continue;
 
+    // If we have an inalloca parameter that we can safely remove the
+    // inalloca attribute from, do so. This unlocks optimizations that
+    // wouldn't be safe in the presence of inalloca.
+    // FIXME: We should also hoist alloca affected by this to the entry
+    // block if possible.
+    if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
+        !F->hasAddressTaken()) {
+      RemoveAttribute(F, Attribute::InAlloca);
+      Changed = true;
+    }
+
     if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
       NumInternalFunc++;
       TargetTransformInfo &TTI = GetTTI(*F);
@@ -2289,8 +2309,8 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
       // cold at all call sites and the callers contain no other non coldcc
       // calls.
       if (EnableColdCCStressTest ||
-          (isValidCandidateForColdCC(*F, GetBFI, AllCallsCold) &&
-           TTI.useColdCCForColdCall(*F))) {
+          (TTI.useColdCCForColdCall(*F) &&
+           isValidCandidateForColdCC(*F, GetBFI, AllCallsCold))) {
         F->setCallingConv(CallingConv::Cold);
         changeCallSitesToColdCC(F);
         Changed = true;
@@ -2313,7 +2333,7 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
         !F->hasAddressTaken()) {
       // The function is not used by a trampoline intrinsic, so it is safe
       // to remove the 'nest' attribute.
-      RemoveNestAttribute(F);
+      RemoveAttribute(F, Attribute::Nest);
       ++NumNestRemoved;
       Changed = true;
     }
@@ -2808,46 +2828,20 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
 /// Returns whether the given function is an empty C++ destructor and can
 /// therefore be eliminated.
 /// Note that we assume that other optimization passes have already simplified
-/// the code so we only look for a function with a single basic block, where
-/// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and
-/// other side-effect free instructions.
-static bool cxxDtorIsEmpty(const Function &Fn,
-                           SmallPtrSet<const Function *, 8> &CalledFunctions) {
+/// the code so we simply check for 'ret'.
+static bool cxxDtorIsEmpty(const Function &Fn) {
   // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
   // nounwind, but that doesn't seem worth doing.
   if (Fn.isDeclaration())
     return false;
 
-  if (++Fn.begin() != Fn.end())
-    return false;
-
-  const BasicBlock &EntryBlock = Fn.getEntryBlock();
-  for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end();
-       I != E; ++I) {
-    if (const CallInst *CI = dyn_cast<CallInst>(I)) {
-      // Ignore debug intrinsics.
-      if (isa<DbgInfoIntrinsic>(CI))
-        continue;
-
-      const Function *CalledFn = CI->getCalledFunction();
-
-      if (!CalledFn)
-        return false;
-
-      SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions);
-
-      // Don't treat recursive functions as empty.
-      if (!NewCalledFunctions.insert(CalledFn).second)
-        return false;
-
-      if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
-        return false;
-    } else if (isa<ReturnInst>(*I))
-      return true; // We're done.
-    else if (I->mayHaveSideEffects())
-      return false; // Destructor with side effects, bail.
+  for (auto &I : Fn.getEntryBlock()) {
+    if (isa<DbgInfoIntrinsic>(I))
+      continue;
+    if (isa<ReturnInst>(I))
+      return true;
+    break;
   }
-
   return false;
 }
 
@@ -2879,11 +2873,7 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
 
     Function *DtorFn =
       dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
-    if (!DtorFn)
-      continue;
-
-    SmallPtrSet<const Function *, 8> CalledFunctions;
-    if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions))
+    if (!DtorFn || !cxxDtorIsEmpty(*DtorFn))
       continue;
 
     // Just remove the call.
diff --git a/lib/Transforms/IPO/GlobalSplit.cpp b/lib/Transforms/IPO/GlobalSplit.cpp
index 792f4b3052a3..060043a40b89 100644
--- a/lib/Transforms/IPO/GlobalSplit.cpp
+++ b/lib/Transforms/IPO/GlobalSplit.cpp
@@ -1,9 +1,8 @@
 //===- GlobalSplit.cpp - global variable splitter -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/IPO/HotColdSplitting.cpp b/lib/Transforms/IPO/HotColdSplitting.cpp
index 924a7d5fbd9c..ab1a9a79cad6 100644
--- a/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -1,16 +1,28 @@
 //===- HotColdSplitting.cpp -- Outline Cold Regions -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// Outline cold regions to a separate function.
-// TODO: Update BFI and BPI
-// TODO: Add all the outlined functions to a separate section.
-//
+///
+/// \file
+/// The goal of hot/cold splitting is to improve the memory locality of code.
+/// The splitting pass does this by identifying cold blocks and moving them into
+/// separate functions.
+///
+/// When the splitting pass finds a cold block (referred to as "the sink"), it
+/// grows a maximal cold region around that block. The maximal region contains
+/// all blocks (post-)dominated by the sink [*]. In theory, these blocks are as
+/// cold as the sink. Once a region is found, it's split out of the original
+/// function provided it's profitable to do so.
+///
+/// [*] In practice, there is some added complexity because some blocks are not
+/// safe to extract.
+///
+/// TODO: Use the PM to get domtrees, and preserve BFI/BPI.
+/// TODO: Reorder outlined functions.
+///
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/PostOrderIterator.h"
@@ -53,7 +65,6 @@
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/CodeExtractor.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include <algorithm>
 #include <cassert>
@@ -69,16 +80,12 @@ static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
                               cl::init(true), cl::Hidden);
 
 static cl::opt<int>
-    MinOutliningThreshold("min-outlining-thresh", cl::init(3), cl::Hidden,
-                          cl::desc("Code size threshold for outlining within a "
-                                   "single BB (as a multiple of TCC_Basic)"));
+    SplittingThreshold("hotcoldsplit-threshold", cl::init(2), cl::Hidden,
+                       cl::desc("Base penalty for splitting cold code (as a "
+                                "multiple of TCC_Basic)"));
 
 namespace {
 
-struct PostDomTree : PostDomTreeBase<BasicBlock> {
-  PostDomTree(Function &F) { recalculate(F); }
-};
-
 /// A sequence of basic blocks.
 ///
 /// A 0-sized SmallVector is slightly cheaper to move than a std::vector.
@@ -101,13 +108,14 @@ bool blockEndsInUnreachable(const BasicBlock &BB) {
 
 bool unlikelyExecuted(BasicBlock &BB) {
   // Exception handling blocks are unlikely executed.
-  if (BB.isEHPad())
+  if (BB.isEHPad() || isa<ResumeInst>(BB.getTerminator()))
     return true;
 
-  // The block is cold if it calls/invokes a cold function.
+  // The block is cold if it calls/invokes a cold function. However, do not
+  // mark sanitizer traps as cold.
   for (Instruction &I : BB)
     if (auto CS = CallSite(&I))
-      if (CS.hasFnAttr(Attribute::Cold))
+      if (CS.hasFnAttr(Attribute::Cold) && !CS->getMetadata("nosanitize"))
         return true;
 
   // The block is cold if it has an unreachable terminator, unless it's
@@ -125,38 +133,39 @@ bool unlikelyExecuted(BasicBlock &BB) {
 
 /// Check whether it's safe to outline \p BB.
 static bool mayExtractBlock(const BasicBlock &BB) {
-  return !BB.hasAddressTaken() && !BB.isEHPad();
-}
-
-/// Check whether \p Region is profitable to outline.
-static bool isProfitableToOutline(const BlockSequence &Region,
-                                  TargetTransformInfo &TTI) {
-  if (Region.size() > 1)
-    return true;
-
-  int Cost = 0;
-  const BasicBlock &BB = *Region[0];
-  for (const Instruction &I : BB) {
-    if (isa<DbgInfoIntrinsic>(&I) || &I == BB.getTerminator())
-      continue;
-
-    Cost += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
-
-    if (Cost >= (MinOutliningThreshold * TargetTransformInfo::TCC_Basic))
-      return true;
-  }
-  return false;
+  // EH pads are unsafe to outline because doing so breaks EH type tables. It
+  // follows that invoke instructions cannot be extracted, because CodeExtractor
+  // requires unwind destinations to be within the extraction region.
+  //
+  // Resumes that are not reachable from a cleanup landing pad are considered to
+  // be unreachable. It’s not safe to split them out either.
+  auto Term = BB.getTerminator();
+  return !BB.hasAddressTaken() && !BB.isEHPad() && !isa<InvokeInst>(Term) &&
+         !isa<ResumeInst>(Term);
 }
 
-/// Mark \p F cold. Return true if it's changed.
-static bool markEntireFunctionCold(Function &F) {
-  assert(!F.hasFnAttribute(Attribute::OptimizeNone) && "Can't mark this cold");
+/// Mark \p F cold. Based on this assumption, also optimize it for minimum size.
+/// If \p UpdateEntryCount is true (set when this is a new split function and
+/// module has profile data), set entry count to 0 to ensure treated as cold.
+/// Return true if the function is changed.
+static bool markFunctionCold(Function &F, bool UpdateEntryCount = false) {
+  assert(!F.hasOptNone() && "Can't mark this cold");
   bool Changed = false;
+  if (!F.hasFnAttribute(Attribute::Cold)) {
+    F.addFnAttr(Attribute::Cold);
+    Changed = true;
+  }
   if (!F.hasFnAttribute(Attribute::MinSize)) {
     F.addFnAttr(Attribute::MinSize);
     Changed = true;
   }
-  // TODO: Move this function into a cold section.
+  if (UpdateEntryCount) {
+    // Set the entry count to 0 to ensure it is placed in the unlikely text
+    // section when function sections are enabled.
+    F.setEntryCount(0);
+    Changed = true;
+  }
+
   return Changed;
 }
 
@@ -165,24 +174,24 @@ public:
   HotColdSplitting(ProfileSummaryInfo *ProfSI,
                    function_ref<BlockFrequencyInfo *(Function &)> GBFI,
                    function_ref<TargetTransformInfo &(Function &)> GTTI,
-                   std::function<OptimizationRemarkEmitter &(Function &)> *GORE)
-      : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {}
+                   std::function<OptimizationRemarkEmitter &(Function &)> *GORE,
+                   function_ref<AssumptionCache *(Function &)> LAC)
+      : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE), LookupAC(LAC) {}
   bool run(Module &M);
 
 private:
+  bool isFunctionCold(const Function &F) const;
   bool shouldOutlineFrom(const Function &F) const;
-  bool outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
-                          BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
-                          DominatorTree &DT, PostDomTree &PDT,
-                          OptimizationRemarkEmitter &ORE);
+  bool outlineColdRegions(Function &F, bool HasProfileSummary);
   Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT,
                               BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
-                              OptimizationRemarkEmitter &ORE, unsigned Count);
-  SmallPtrSet<const Function *, 2> OutlinedFunctions;
+                              OptimizationRemarkEmitter &ORE,
+                              AssumptionCache *AC, unsigned Count);
   ProfileSummaryInfo *PSI;
   function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
   function_ref<TargetTransformInfo &(Function &)> GetTTI;
   std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
+  function_ref<AssumptionCache *(Function &)> LookupAC;
 };
 
 class HotColdSplittingLegacyPass : public ModulePass {
@@ -193,10 +202,10 @@ public:
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<BlockFrequencyInfoWrapperPass>();
     AU.addRequired<ProfileSummaryInfoWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.addUsedIfAvailable<AssumptionCacheTracker>();
   }
 
   bool runOnModule(Module &M) override;
@@ -204,59 +213,141 @@ public:
 
 } // end anonymous namespace
 
-// Returns false if the function should not be considered for hot-cold split
-// optimization.
-bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
-  // Do not try to outline again from an already outlined cold function.
-  if (OutlinedFunctions.count(&F))
-    return false;
+/// Check whether \p F is inherently cold.
+bool HotColdSplitting::isFunctionCold(const Function &F) const {
+  if (F.hasFnAttribute(Attribute::Cold))
+    return true;
 
-  if (F.size() <= 2)
-    return false;
+  if (F.getCallingConv() == CallingConv::Cold)
+    return true;
 
-  // TODO: Consider only skipping functions marked `optnone` or `cold`.
+  if (PSI->isFunctionEntryCold(&F))
+    return true;
 
-  if (F.hasAddressTaken())
-    return false;
+  return false;
+}
 
+// Returns false if the function should not be considered for hot-cold split
+// optimization.
+bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
   if (F.hasFnAttribute(Attribute::AlwaysInline))
     return false;
 
   if (F.hasFnAttribute(Attribute::NoInline))
     return false;
 
-  if (F.getCallingConv() == CallingConv::Cold)
+  if (F.hasFnAttribute(Attribute::SanitizeAddress) ||
+      F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
+      F.hasFnAttribute(Attribute::SanitizeThread) ||
+      F.hasFnAttribute(Attribute::SanitizeMemory))
     return false;
 
-  if (PSI->isFunctionEntryCold(&F))
-    return false;
   return true;
 }
 
+/// Get the benefit score of outlining \p Region.
+static int getOutliningBenefit(ArrayRef<BasicBlock *> Region,
+                               TargetTransformInfo &TTI) {
+  // Sum up the code size costs of non-terminator instructions. Tight coupling
+  // with \ref getOutliningPenalty is needed to model the costs of terminators.
+  int Benefit = 0;
+  for (BasicBlock *BB : Region)
+    for (Instruction &I : BB->instructionsWithoutDebug())
+      if (&I != BB->getTerminator())
+        Benefit +=
+            TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
+
+  return Benefit;
+}
+
+/// Get the penalty score for outlining \p Region.
+static int getOutliningPenalty(ArrayRef<BasicBlock *> Region,
+                               unsigned NumInputs, unsigned NumOutputs) {
+  int Penalty = SplittingThreshold;
+  LLVM_DEBUG(dbgs() << "Applying penalty for splitting: " << Penalty << "\n");
+
+  // If the splitting threshold is set at or below zero, skip the usual
+  // profitability check.
+  if (SplittingThreshold <= 0)
+    return Penalty;
+
+  // The typical code size cost for materializing an argument for the outlined
+  // call.
+  LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumInputs << " inputs\n");
+  const int CostForArgMaterialization = TargetTransformInfo::TCC_Basic;
+  Penalty += CostForArgMaterialization * NumInputs;
+
+  // The typical code size cost for an output alloca, its associated store, and
+  // its associated reload.
+  LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumOutputs << " outputs\n");
+  const int CostForRegionOutput = 3 * TargetTransformInfo::TCC_Basic;
+  Penalty += CostForRegionOutput * NumOutputs;
+
+  // Find the number of distinct exit blocks for the region. Use a conservative
+  // check to determine whether control returns from the region.
+  bool NoBlocksReturn = true;
+  SmallPtrSet<BasicBlock *, 2> SuccsOutsideRegion;
+  for (BasicBlock *BB : Region) {
+    // If a block has no successors, only assume it does not return if it's
+    // unreachable.
+    if (succ_empty(BB)) {
+      NoBlocksReturn &= isa<UnreachableInst>(BB->getTerminator());
+      continue;
+    }
+
+    for (BasicBlock *SuccBB : successors(BB)) {
+      if (find(Region, SuccBB) == Region.end()) {
+        NoBlocksReturn = false;
+        SuccsOutsideRegion.insert(SuccBB);
+      }
+    }
+  }
+
+  // Apply a `noreturn` bonus.
+  if (NoBlocksReturn) {
+    LLVM_DEBUG(dbgs() << "Applying bonus for: " << Region.size()
+                      << " non-returning terminators\n");
+    Penalty -= Region.size();
+  }
+
+  // Apply a penalty for having more than one successor outside of the region.
+  // This penalty accounts for the switch needed in the caller.
+  if (!SuccsOutsideRegion.empty()) {
+    LLVM_DEBUG(dbgs() << "Applying penalty for: " << SuccsOutsideRegion.size()
+                      << " non-region successors\n");
+    Penalty += (SuccsOutsideRegion.size() - 1) * TargetTransformInfo::TCC_Basic;
+  }
+
+  return Penalty;
+}
+
 Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
                                               DominatorTree &DT,
                                               BlockFrequencyInfo *BFI,
                                               TargetTransformInfo &TTI,
                                               OptimizationRemarkEmitter &ORE,
+                                              AssumptionCache *AC,
                                               unsigned Count) {
   assert(!Region.empty());
 
   // TODO: Pass BFI and BPI to update profile information.
   CodeExtractor CE(Region, &DT, /* AggregateArgs */ false, /* BFI */ nullptr,
-                   /* BPI */ nullptr, /* AllowVarArgs */ false,
+                   /* BPI */ nullptr, AC, /* AllowVarArgs */ false,
                    /* AllowAlloca */ false,
                    /* Suffix */ "cold." + std::to_string(Count));
 
+  // Perform a simple cost/benefit analysis to decide whether or not to permit
+  // splitting.
   SetVector<Value *> Inputs, Outputs, Sinks;
   CE.findInputsOutputs(Inputs, Outputs, Sinks);
-
-  // Do not extract regions that have live exit variables.
-  if (Outputs.size() > 0) {
-    LLVM_DEBUG(llvm::dbgs() << "Not outlining; live outputs\n");
+  int OutliningBenefit = getOutliningBenefit(Region, TTI);
+  int OutliningPenalty =
+      getOutliningPenalty(Region, Inputs.size(), Outputs.size());
+  LLVM_DEBUG(dbgs() << "Split profitability: benefit = " << OutliningBenefit
+                    << ", penalty = " << OutliningPenalty << "\n");
+  if (OutliningBenefit <= OutliningPenalty)
     return nullptr;
-  }
 
-  // TODO: Run MergeBasicBlockIntoOnlyPred on the outlined function.
   Function *OrigF = Region[0]->getParent();
   if (Function *OutF = CE.extractCodeRegion()) {
     User *U = *OutF->user_begin();
@@ -269,9 +360,7 @@ Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
     }
     CI->setIsNoInline();
 
-    // Try to make the outlined code as small as possible on the assumption
-    // that it's cold.
-    markEntireFunctionCold(*OutF);
+    markFunctionCold(*OutF, BFI != nullptr);
 
     LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF);
     ORE.emit([&]() {
@@ -298,6 +387,8 @@ using BlockTy = std::pair<BasicBlock *, unsigned>;
 namespace {
 /// A maximal outlining region. This contains all blocks post-dominated by a
 /// sink block, the sink block itself, and all blocks dominated by the sink.
+/// If sink-predecessors and sink-successors cannot be extracted in one region,
+/// the static constructor returns a list of suitable extraction regions.
 class OutliningRegion {
   /// A list of (block, score) pairs. A block's score is non-zero iff it's a
   /// viable sub-region entry point. Blocks with higher scores are better entry
@@ -312,12 +403,9 @@ class OutliningRegion {
   /// Whether the entire function is cold.
   bool EntireFunctionCold = false;
 
-  /// Whether or not \p BB could be the entry point of an extracted region.
-  static bool isViableEntryPoint(BasicBlock &BB) { return !BB.isEHPad(); }
-
   /// If \p BB is a viable entry point, return \p Score. Return 0 otherwise.
   static unsigned getEntryPointScore(BasicBlock &BB, unsigned Score) {
-    return isViableEntryPoint(BB) ? Score : 0;
+    return mayExtractBlock(BB) ? Score : 0;
   }
 
   /// These scores should be lower than the score for predecessor blocks,
@@ -333,21 +421,23 @@ public:
   OutliningRegion(OutliningRegion &&) = default;
   OutliningRegion &operator=(OutliningRegion &&) = default;
 
-  static OutliningRegion create(BasicBlock &SinkBB, const DominatorTree &DT,
-                                const PostDomTree &PDT) {
-    OutliningRegion ColdRegion;
-
+  static std::vector<OutliningRegion> create(BasicBlock &SinkBB,
+                                             const DominatorTree &DT,
+                                             const PostDominatorTree &PDT) {
+    std::vector<OutliningRegion> Regions;
     SmallPtrSet<BasicBlock *, 4> RegionBlocks;
 
+    Regions.emplace_back();
+    OutliningRegion *ColdRegion = &Regions.back();
+
     auto addBlockToRegion = [&](BasicBlock *BB, unsigned Score) {
       RegionBlocks.insert(BB);
-      ColdRegion.Blocks.emplace_back(BB, Score);
-      assert(RegionBlocks.size() == ColdRegion.Blocks.size() && "Duplicate BB");
+      ColdRegion->Blocks.emplace_back(BB, Score);
     };
 
     // The ancestor farthest-away from SinkBB, and also post-dominated by it.
     unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
-    ColdRegion.SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr;
+    ColdRegion->SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr;
     unsigned BestScore = SinkScore;
 
     // Visit SinkBB's ancestors using inverse DFS.
@@ -360,8 +450,8 @@ public:
       // If the predecessor is cold and has no predecessors, the entire
       // function must be cold.
       if (SinkPostDom && pred_empty(&PredBB)) {
-        ColdRegion.EntireFunctionCold = true;
-        return ColdRegion;
+        ColdRegion->EntireFunctionCold = true;
+        return Regions;
       }
 
       // If SinkBB does not post-dominate a predecessor, do not mark the
@@ -376,7 +466,7 @@ public:
       // considered as entry points before the sink block.
       unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
       if (PredScore > BestScore) {
-        ColdRegion.SuggestedEntryPoint = &PredBB;
+        ColdRegion->SuggestedEntryPoint = &PredBB;
         BestScore = PredScore;
       }
 
@@ -384,9 +474,19 @@ public:
       ++PredIt;
     }
 
-    // Add SinkBB to the cold region. It's considered as an entry point before
-    // any sink-successor blocks.
-    addBlockToRegion(&SinkBB, SinkScore);
+    // If the sink can be added to the cold region, do so. It's considered as
+    // an entry point before any sink-successor blocks.
+    //
+    // Otherwise, split cold sink-successor blocks using a separate region.
+    // This satisfies the requirement that all extraction blocks other than the
+    // first have predecessors within the extraction region.
+    if (mayExtractBlock(SinkBB)) {
+      addBlockToRegion(&SinkBB, SinkScore);
+    } else {
+      Regions.emplace_back();
+      ColdRegion = &Regions.back();
+      BestScore = 0;
+    }
 
     // Find all successors of SinkBB dominated by SinkBB using DFS.
     auto SuccIt = ++df_begin(&SinkBB);
@@ -407,7 +507,7 @@ public:
 
       unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
       if (SuccScore > BestScore) {
-        ColdRegion.SuggestedEntryPoint = &SuccBB;
+        ColdRegion->SuggestedEntryPoint = &SuccBB;
         BestScore = SuccScore;
       }
 
@@ -415,7 +515,7 @@ public:
       ++SuccIt;
     }
 
-    return ColdRegion;
+    return Regions;
   }
 
   /// Whether this region has nothing to extract.
@@ -461,11 +561,7 @@ public:
 };
 } // namespace
 
-bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
-                                          BlockFrequencyInfo *BFI,
-                                          TargetTransformInfo &TTI,
-                                          DominatorTree &DT, PostDomTree &PDT,
-                                          OptimizationRemarkEmitter &ORE) {
+bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
   bool Changed = false;
 
   // The set of cold blocks.
@@ -479,17 +575,28 @@ bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
   // the first region to contain a block.
   ReversePostOrderTraversal<Function *> RPOT(&F);
 
+  // Calculate domtrees lazily. This reduces compile-time significantly.
+  std::unique_ptr<DominatorTree> DT;
+  std::unique_ptr<PostDominatorTree> PDT;
+
+  // Calculate BFI lazily (it's only used to query ProfileSummaryInfo). This
+  // reduces compile-time significantly. TODO: When we *do* use BFI, we should
+  // be able to salvage its domtrees instead of recomputing them.
+  BlockFrequencyInfo *BFI = nullptr;
+  if (HasProfileSummary)
+    BFI = GetBFI(F);
+
+  TargetTransformInfo &TTI = GetTTI(F);
+  OptimizationRemarkEmitter &ORE = (*GetORE)(F);
+  AssumptionCache *AC = LookupAC(F);
+
   // Find all cold regions.
   for (BasicBlock *BB : RPOT) {
-    // Skip blocks which can't be outlined.
-    if (!mayExtractBlock(*BB))
-      continue;
-
     // This block is already part of some outlining region.
     if (ColdBlocks.count(BB))
       continue;
 
-    bool Cold = PSI.isColdBlock(BB, BFI) ||
+    bool Cold = (BFI && PSI->isColdBlock(BB, BFI)) ||
                 (EnableStaticAnalyis && unlikelyExecuted(*BB));
     if (!Cold)
       continue;
@@ -499,28 +606,35 @@ bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
       BB->dump();
     });
 
-    auto Region = OutliningRegion::create(*BB, DT, PDT);
-    if (Region.empty())
-      continue;
+    if (!DT)
+      DT = make_unique<DominatorTree>(F);
+    if (!PDT)
+      PDT = make_unique<PostDominatorTree>(F);
 
-    if (Region.isEntireFunctionCold()) {
-      LLVM_DEBUG(dbgs() << "Entire function is cold\n");
-      return markEntireFunctionCold(F);
-    }
+    auto Regions = OutliningRegion::create(*BB, *DT, *PDT);
+    for (OutliningRegion &Region : Regions) {
+      if (Region.empty())
+        continue;
 
-    // If this outlining region intersects with another, drop the new region.
-    //
-    // TODO: It's theoretically possible to outline more by only keeping the
-    // largest region which contains a block, but the extra bookkeeping to do
-    // this is tricky/expensive.
-    bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
-      return !ColdBlocks.insert(Block.first).second;
-    });
-    if (RegionsOverlap)
-      continue;
+      if (Region.isEntireFunctionCold()) {
+        LLVM_DEBUG(dbgs() << "Entire function is cold\n");
+        return markFunctionCold(F);
+      }
+
+      // If this outlining region intersects with another, drop the new region.
+      //
+      // TODO: It's theoretically possible to outline more by only keeping the
+      // largest region which contains a block, but the extra bookkeeping to do
+      // this is tricky/expensive.
+      bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
+        return !ColdBlocks.insert(Block.first).second;
+      });
+      if (RegionsOverlap)
+        continue;
 
-    OutliningWorklist.emplace_back(std::move(Region));
-    ++NumColdRegionsFound;
+      OutliningWorklist.emplace_back(std::move(Region));
+      ++NumColdRegionsFound;
+    }
   }
 
   // Outline single-entry cold regions, splitting up larger regions as needed.
@@ -529,26 +643,17 @@ bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
     OutliningRegion Region = OutliningWorklist.pop_back_val();
     assert(!Region.empty() && "Empty outlining region in worklist");
     do {
-      BlockSequence SubRegion = Region.takeSingleEntrySubRegion(DT);
-      if (!isProfitableToOutline(SubRegion, TTI)) {
-        LLVM_DEBUG({
-          dbgs() << "Skipping outlining; not profitable to outline\n";
-          SubRegion[0]->dump();
-        });
-        continue;
-      }
-
+      BlockSequence SubRegion = Region.takeSingleEntrySubRegion(*DT);
       LLVM_DEBUG({
         dbgs() << "Hot/cold splitting attempting to outline these blocks:\n";
         for (BasicBlock *BB : SubRegion)
           BB->dump();
       });
 
-      Function *Outlined =
-          extractColdRegion(SubRegion, DT, BFI, TTI, ORE, OutlinedFunctionID);
+      Function *Outlined = extractColdRegion(SubRegion, *DT, BFI, TTI, ORE, AC,
+                                             OutlinedFunctionID);
       if (Outlined) {
         ++OutlinedFunctionID;
-        OutlinedFunctions.insert(Outlined);
         Changed = true;
       }
     } while (!Region.empty());
@@ -559,20 +664,31 @@ bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
 
 bool HotColdSplitting::run(Module &M) {
   bool Changed = false;
-  OutlinedFunctions.clear();
-  for (auto &F : M) {
+  bool HasProfileSummary = (M.getProfileSummary(/* IsCS */ false) != nullptr);
+  for (auto It = M.begin(), End = M.end(); It != End; ++It) {
+    Function &F = *It;
+
+    // Do not touch declarations.
+    if (F.isDeclaration())
+      continue;
+
+    // Do not modify `optnone` functions.
+    if (F.hasOptNone())
+      continue;
+
+    // Detect inherently cold functions and mark them as such.
+    if (isFunctionCold(F)) {
+      Changed |= markFunctionCold(F);
+      continue;
+    }
+
     if (!shouldOutlineFrom(F)) {
       LLVM_DEBUG(llvm::dbgs() << "Skipping " << F.getName() << "\n");
       continue;
     }
+
     LLVM_DEBUG(llvm::dbgs() << "Outlining in " << F.getName() << "\n");
-    DominatorTree DT(F);
-    PostDomTree PDT(F);
-    PDT.recalculate(F);
-    BlockFrequencyInfo *BFI = GetBFI(F);
-    TargetTransformInfo &TTI = GetTTI(F);
-    OptimizationRemarkEmitter &ORE = (*GetORE)(F);
-    Changed |= outlineColdRegions(F, *PSI, BFI, TTI, DT, PDT, ORE);
+    Changed |= outlineColdRegions(F, HasProfileSummary);
   }
   return Changed;
 }
@@ -594,17 +710,21 @@ bool HotColdSplittingLegacyPass::runOnModule(Module &M) {
     ORE.reset(new OptimizationRemarkEmitter(&F));
     return *ORE.get();
   };
+  auto LookupAC = [this](Function &F) -> AssumptionCache * {
+    if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>())
+      return ACT->lookupAssumptionCache(F);
+    return nullptr;
+  };
 
-  return HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M);
+  return HotColdSplitting(PSI, GBFI, GTTI, &GetORE, LookupAC).run(M);
 }
 
 PreservedAnalyses
 HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) {
   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
 
-  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
-      [&FAM](Function &F) -> AssumptionCache & {
-    return FAM.getResult<AssumptionAnalysis>(F);
+  auto LookupAC = [&FAM](Function &F) -> AssumptionCache * {
+    return FAM.getCachedResult<AssumptionAnalysis>(F);
   };
 
   auto GBFI = [&FAM](Function &F) {
@@ -625,7 +745,7 @@ HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) {
 
   ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
 
-  if (HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M))
+  if (HotColdSplitting(PSI, GBFI, GTTI, &GetORE, LookupAC).run(M))
     return PreservedAnalyses::none();
   return PreservedAnalyses::all();
 }
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index 7d55ebecbf92..7dc4d9ee9e34 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -1,9 +1,8 @@
 //===-- IPConstantPropagation.cpp - Propagate constants through calls -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -62,32 +61,55 @@ static bool PropagateConstantsIntoArguments(Function &F) {
     // Ignore blockaddress uses.
     if (isa<BlockAddress>(UR)) continue;
 
-    // Used by a non-instruction, or not the callee of a function, do not
-    // transform.
-    if (!isa<CallInst>(UR) && !isa<InvokeInst>(UR))
+    // If no abstract call site was created we did not understand the use, bail.
+    AbstractCallSite ACS(&U);
+    if (!ACS)
       return false;
 
-    CallSite CS(cast<Instruction>(UR));
-    if (!CS.isCallee(&U))
+    // Mismatched argument count is undefined behavior. Simply bail out to avoid
+    // handling of such situations below (avoiding asserts/crashes).
+    unsigned NumActualArgs = ACS.getNumArgOperands();
+    if (F.isVarArg() ? ArgumentConstants.size() > NumActualArgs
+                     : ArgumentConstants.size() != NumActualArgs)
       return false;
 
     // Check out all of the potentially constant arguments.  Note that we don't
     // inspect varargs here.
-    CallSite::arg_iterator AI = CS.arg_begin();
     Function::arg_iterator Arg = F.arg_begin();
-    for (unsigned i = 0, e = ArgumentConstants.size(); i != e;
-         ++i, ++AI, ++Arg) {
+    for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++Arg) {
 
       // If this argument is known non-constant, ignore it.
       if (ArgumentConstants[i].second)
         continue;
 
-      Constant *C = dyn_cast<Constant>(*AI);
+      Value *V = ACS.getCallArgOperand(i);
+      Constant *C = dyn_cast_or_null<Constant>(V);
+
+      // Mismatched argument type is undefined behavior. Simply bail out to avoid
+      // handling of such situations below (avoiding asserts/crashes).
+      if (C && Arg->getType() != C->getType())
+        return false;
+
+      // We can only propagate thread independent values through callbacks.
+      // This is different to direct/indirect call sites because for them we
+      // know the thread executing the caller and callee is the same. For
+      // callbacks this is not guaranteed, thus a thread dependent value could
+      // be different for the caller and callee, making it invalid to propagate.
+      if (C && ACS.isCallbackCall() && C->isThreadDependent()) {
+        // Argument became non-constant. If all arguments are non-constant now,
+        // give up on this function.
+        if (++NumNonconstant == ArgumentConstants.size())
+          return false;
+
+        ArgumentConstants[i].second = true;
+        continue;
+      }
+
       if (C && ArgumentConstants[i].first == nullptr) {
         ArgumentConstants[i].first = C;   // First constant seen.
       } else if (C && ArgumentConstants[i].first == C) {
         // Still the constant value we think it is.
-      } else if (*AI == &*Arg) {
+      } else if (V == &*Arg) {
         // Ignore recursive calls passing argument down.
       } else {
         // Argument became non-constant.  If all arguments are non-constant now,
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 973382e2b097..34db75dd8b03 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -1,9 +1,8 @@
 //===-- IPO.cpp -----------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -46,6 +45,7 @@ void llvm::initializeIPO(PassRegistry &Registry) {
   initializeLowerTypeTestsPass(Registry);
   initializeMergeFunctionsPass(Registry);
   initializePartialInlinerLegacyPassPass(Registry);
+  initializeAttributorLegacyPassPass(Registry);
   initializePostOrderFunctionAttrsLegacyPassPass(Registry);
   initializeReversePostOrderFunctionAttrsLegacyPassPass(Registry);
   initializePruneEHPass(Registry);
diff --git a/lib/Transforms/IPO/InferFunctionAttrs.cpp b/lib/Transforms/IPO/InferFunctionAttrs.cpp
index 470f97b8ba61..7f5511e008e1 100644
--- a/lib/Transforms/IPO/InferFunctionAttrs.cpp
+++ b/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -1,9 +1,8 @@
 //===- InferFunctionAttrs.cpp - Infer implicit function attributes --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -26,7 +25,7 @@ static bool inferAllPrototypeAttributes(Module &M,
   for (Function &F : M.functions())
     // We only infer things using the prototype and the name; we don't need
     // definitions.
-    if (F.isDeclaration() && !F.hasFnAttribute((Attribute::OptimizeNone)))
+    if (F.isDeclaration() && !F.hasOptNone())
       Changed |= inferLibFuncAttributes(F, TLI);
 
   return Changed;
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 82bba1e5c93b..efb71b73cbb7 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -1,9 +1,8 @@
 //===- InlineSimple.cpp - Code to perform simple function inlining --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -69,9 +68,9 @@ public:
         [&](Function &F) -> AssumptionCache & {
       return ACT->getAssumptionCache(F);
     };
-    return llvm::getInlineCost(CS, Params, TTI, GetAssumptionCache,
-                               /*GetBFI=*/None, PSI,
-                               RemarksEnabled ? &ORE : nullptr);
+    return llvm::getInlineCost(
+        cast<CallBase>(*CS.getInstruction()), Params, TTI, GetAssumptionCache,
+        /*GetBFI=*/None, PSI, RemarksEnabled ? &ORE : nullptr);
   }
 
   bool runOnSCC(CallGraphSCC &SCC) override;
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 66a6f80f31e4..945f8affae6e 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -1,9 +1,8 @@
 //===- Inliner.cpp - Code common to all inliners --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -672,7 +671,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
         LLVM_DEBUG(dbgs() << "    -> Deleting dead call: " << *Instr << "\n");
         // Update the call graph by deleting the edge from Callee to Caller.
         setInlineRemark(CS, "trivially dead");
-        CG[Caller]->removeCallEdgeFor(CS);
+        CG[Caller]->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction()));
         Instr->eraseFromParent();
         ++NumCallsDeleted;
       } else {
@@ -974,7 +973,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
     LazyCallGraph::Node &N = *CG.lookup(F);
     if (CG.lookupSCC(N) != C)
       continue;
-    if (F.hasFnAttribute(Attribute::OptimizeNone)) {
+    if (F.hasOptNone()) {
       setInlineRemark(Calls[i].first, "optnone attribute");
       continue;
     }
@@ -1006,8 +1005,12 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
     auto GetInlineCost = [&](CallSite CS) {
       Function &Callee = *CS.getCalledFunction();
       auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
-      return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI},
-                           PSI, &ORE);
+      bool RemarksEnabled =
+          Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
+              DEBUG_TYPE);
+      return getInlineCost(cast<CallBase>(*CS.getInstruction()), Params,
+                           CalleeTTI, GetAssumptionCache, {GetBFI}, PSI,
+                           RemarksEnabled ? &ORE : nullptr);
     };
 
     // Now process as many calls as we have within this caller in the sequnece.
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index a6542d28dfd8..2e269604e379 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -1,9 +1,8 @@
 //===-- Internalize.cpp - Mark functions internal -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -28,11 +27,11 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Utils/GlobalStatus.h"
-#include <fstream>
-#include <set>
 using namespace llvm;
 
 #define DEBUG_TYPE "internalize"
@@ -73,18 +72,15 @@ private:
 
   void LoadFile(StringRef Filename) {
     // Load the APIFile...
-    std::ifstream In(Filename.data());
-    if (!In.good()) {
+    ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
+        MemoryBuffer::getFile(Filename);
+    if (!Buf) {
       errs() << "WARNING: Internalize couldn't load file '" << Filename
              << "'! Continuing as if it's empty.\n";
       return; // Just continue as if the file were empty
     }
-    while (In) {
-      std::string Symbol;
-      In >> Symbol;
-      if (!Symbol.empty())
-        ExternalNames.insert(Symbol);
-    }
+    for (line_iterator I(*Buf->get(), true), E; I != E; ++I)
+      ExternalNames.insert(*I);
   }
 };
 } // end anonymous namespace
@@ -114,7 +110,7 @@ bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
 }
 
 bool InternalizePass::maybeInternalize(
-    GlobalValue &GV, const std::set<const Comdat *> &ExternalComdats) {
+    GlobalValue &GV, const DenseSet<const Comdat *> &ExternalComdats) {
   if (Comdat *C = GV.getComdat()) {
     if (ExternalComdats.count(C))
       return false;
@@ -141,7 +137,7 @@ bool InternalizePass::maybeInternalize(
 // If GV is part of a comdat and is externally visible, keep track of its
 // comdat so that we don't internalize any of its members.
 void InternalizePass::checkComdatVisibility(
-    GlobalValue &GV, std::set<const Comdat *> &ExternalComdats) {
+    GlobalValue &GV, DenseSet<const Comdat *> &ExternalComdats) {
   Comdat *C = GV.getComdat();
   if (!C)
     return;
@@ -158,7 +154,7 @@ bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) {
   collectUsedGlobalVariables(M, Used, false);
 
   // Collect comdat visiblity information for the module.
-  std::set<const Comdat *> ExternalComdats;
+  DenseSet<const Comdat *> ExternalComdats;
   if (!M.getComdatSymbolTable().empty()) {
     for (Function &F : M)
       checkComdatVisibility(F, ExternalComdats);
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 733235d45a09..91c7b5f5f135 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -1,9 +1,8 @@
 //===- LoopExtractor.cpp - Extract each loop into a new function ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
@@ -51,6 +51,7 @@ namespace {
       AU.addRequiredID(LoopSimplifyID);
       AU.addRequired<DominatorTreeWrapperPass>();
       AU.addRequired<LoopInfoWrapperPass>();
+      AU.addUsedIfAvailable<AssumptionCacheTracker>();
     }
   };
 }
@@ -139,7 +140,10 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
   if (ShouldExtractLoop) {
     if (NumLoops == 0) return Changed;
     --NumLoops;
-    CodeExtractor Extractor(DT, *L);
+    AssumptionCache *AC = nullptr;
+    if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>())
+      AC = ACT->lookupAssumptionCache(*L->getHeader()->getParent());
+    CodeExtractor Extractor(DT, *L, false, nullptr, nullptr, AC);
     if (Extractor.extractCodeRegion() != nullptr) {
       Changed = true;
       // After extraction, the loop is replaced by a function call, so
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp
index 87c65db09517..f7371284f47e 100644
--- a/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -1,9 +1,8 @@
 //===- LowerTypeTests.cpp - type metadata lowering pass -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -549,10 +548,10 @@ ByteArrayInfo *LowerTypeTestsModule::createByteArray(BitSetInfo &BSI) {
 }
 
 void LowerTypeTestsModule::allocateByteArrays() {
-  std::stable_sort(ByteArrayInfos.begin(), ByteArrayInfos.end(),
-                   [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) {
-                     return BAI1.BitSize > BAI2.BitSize;
-                   });
+  llvm::stable_sort(ByteArrayInfos,
+                    [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) {
+                      return BAI1.BitSize > BAI2.BitSize;
+                    });
 
   std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size());
 
@@ -619,7 +618,7 @@ Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B,
     }
 
     Value *ByteAddr = B.CreateGEP(Int8Ty, ByteArray, BitOffset);
-    Value *Byte = B.CreateLoad(ByteAddr);
+    Value *Byte = B.CreateLoad(Int8Ty, ByteAddr);
 
     Value *ByteAndMask =
         B.CreateAnd(Byte, ConstantExpr::getPtrToInt(TIL.BitMask, Int8Ty));
@@ -1553,11 +1552,10 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
 
   // Order the sets of indices by size. The GlobalLayoutBuilder works best
   // when given small index sets first.
-  std::stable_sort(
-      TypeMembers.begin(), TypeMembers.end(),
-      [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
-        return O1.size() < O2.size();
-      });
+  llvm::stable_sort(TypeMembers, [](const std::set<uint64_t> &O1,
+                                    const std::set<uint64_t> &O2) {
+    return O1.size() < O2.size();
+  });
 
   // Create a GlobalLayoutBuilder and provide it with index sets as layout
   // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as
@@ -1693,6 +1691,14 @@ void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) {
 }
 
 bool LowerTypeTestsModule::lower() {
+  // If only some of the modules were split, we cannot correctly perform
+  // this transformation. We already checked for the presense of type tests
+  // with partially split modules during the thin link, and would have emitted
+  // an error if any were found, so here we can simply return.
+  if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
+      (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
+    return false;
+
   Function *TypeTestFunc =
       M.getFunction(Intrinsic::getName(Intrinsic::type_test));
   Function *ICallBranchFunnelFunc =
@@ -1702,13 +1708,6 @@ bool LowerTypeTestsModule::lower() {
       !ExportSummary && !ImportSummary)
     return false;
 
-  // If only some of the modules were split, we cannot correctly handle
-  // code that contains type tests.
-  if (TypeTestFunc && !TypeTestFunc->use_empty() &&
-      ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
-       (ImportSummary && ImportSummary->partiallySplitLTOUnits())))
-    report_fatal_error("inconsistent LTO Unit splitting with llvm.type.test");
-
   if (ImportSummary) {
     if (TypeTestFunc) {
       for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 11efe95b10d4..3a08069dcd4a 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -1,9 +1,8 @@
 //===- MergeFunctions.cpp - Merge identical functions ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -190,8 +189,6 @@ public:
   void replaceBy(Function *G) const {
     F = G;
   }
-
-  void release() { F = nullptr; }
 };
 
 /// MergeFunctions finds functions which will generate identical machine code,
@@ -281,8 +278,8 @@ private:
   // Replace G with an alias to F (deleting function G)
   void writeAlias(Function *F, Function *G);
 
-  // Replace G with an alias to F if possible, or a thunk to F if
-  // profitable. Returns false if neither is the case.
+  // Replace G with an alias to F if possible, or a thunk to F if possible.
+  // Returns false if neither is the case.
   bool writeThunkOrAlias(Function *F, Function *G);
 
   /// Replace function F with function G in the function tree.
@@ -383,6 +380,11 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) {
 }
 #endif
 
+/// Check whether \p F is eligible for function merging.
+static bool isEligibleForMerging(Function &F) {
+  return !F.isDeclaration() && !F.hasAvailableExternallyLinkage();
+}
+
 bool MergeFunctions::runOnModule(Module &M) {
   if (skipModule(M))
     return false;
@@ -394,17 +396,12 @@ bool MergeFunctions::runOnModule(Module &M) {
   std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
     HashedFuncs;
   for (Function &Func : M) {
-    if (!Func.isDeclaration() && !Func.hasAvailableExternallyLinkage()) {
+    if (isEligibleForMerging(Func)) {
       HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
     }
   }
 
-  std::stable_sort(
-      HashedFuncs.begin(), HashedFuncs.end(),
-      [](const std::pair<FunctionComparator::FunctionHash, Function *> &a,
-         const std::pair<FunctionComparator::FunctionHash, Function *> &b) {
-        return a.first < b.first;
-      });
+  llvm::stable_sort(HashedFuncs, less_first());
 
   auto S = HashedFuncs.begin();
   for (auto I = HashedFuncs.begin(), IE = HashedFuncs.end(); I != IE; ++I) {
@@ -654,12 +651,16 @@ void MergeFunctions::filterInstsUnrelatedToPDI(
   LLVM_DEBUG(dbgs() << " }\n");
 }
 
-// Don't merge tiny functions using a thunk, since it can just end up
-// making the function larger.
-static bool isThunkProfitable(Function * F) {
+/// Whether this function may be replaced by a forwarding thunk.
+static bool canCreateThunkFor(Function *F) {
+  if (F->isVarArg())
+    return false;
+
+  // Don't merge tiny functions using a thunk, since it can just end up
+  // making the function larger.
   if (F->size() == 1) {
     if (F->front().size() <= 2) {
-      LLVM_DEBUG(dbgs() << "isThunkProfitable: " << F->getName()
+      LLVM_DEBUG(dbgs() << "canCreateThunkFor: " << F->getName()
                         << " is too small to bother creating a thunk for\n");
       return false;
     }
@@ -695,6 +696,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
   } else {
     NewG = Function::Create(G->getFunctionType(), G->getLinkage(),
                             G->getAddressSpace(), "", G->getParent());
+    NewG->setComdat(G->getComdat());
     BB = BasicBlock::Create(F->getContext(), "", NewG);
   }
 
@@ -787,7 +789,7 @@ bool MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
     writeAlias(F, G);
     return true;
   }
-  if (isThunkProfitable(F)) {
+  if (canCreateThunkFor(F)) {
     writeThunk(F, G);
     return true;
   }
@@ -802,9 +804,9 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
     // Both writeThunkOrAlias() calls below must succeed, either because we can
     // create aliases for G and NewF, or because a thunk for F is profitable.
     // F here has the same signature as NewF below, so that's what we check.
-    if (!isThunkProfitable(F) && (!canCreateAliasFor(F) || !canCreateAliasFor(G))) {
+    if (!canCreateThunkFor(F) &&
+        (!canCreateAliasFor(F) || !canCreateAliasFor(G)))
       return;
-    }
 
     // Make them both thunks to the same internal function.
     Function *NewF = Function::Create(F->getFunctionType(), F->getLinkage(),
@@ -944,25 +946,7 @@ void MergeFunctions::remove(Function *F) {
 // For each instruction used by the value, remove() the function that contains
 // the instruction. This should happen right before a call to RAUW.
 void MergeFunctions::removeUsers(Value *V) {
-  std::vector<Value *> Worklist;
-  Worklist.push_back(V);
-  SmallPtrSet<Value*, 8> Visited;
-  Visited.insert(V);
-  while (!Worklist.empty()) {
-    Value *V = Worklist.back();
-    Worklist.pop_back();
-
-    for (User *U : V->users()) {
-      if (Instruction *I = dyn_cast<Instruction>(U)) {
-        remove(I->getFunction());
-      } else if (isa<GlobalValue>(U)) {
-        // do nothing
-      } else if (Constant *C = dyn_cast<Constant>(U)) {
-        for (User *UU : C->users()) {
-          if (!Visited.insert(UU).second)
-            Worklist.push_back(UU);
-        }
-      }
-    }
-  }
+  for (User *U : V->users())
+    if (auto *I = dyn_cast<Instruction>(U))
+      remove(I->getFunction());
 }
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index da214a1d3b44..733782e8764d 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -1,9 +1,8 @@
 //===- PartialInlining.cpp - Inline parts of functions --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -182,11 +181,11 @@ struct FunctionOutliningMultiRegionInfo {
 
   // Container for outline regions
   struct OutlineRegionInfo {
-    OutlineRegionInfo(SmallVector<BasicBlock *, 8> Region,
+    OutlineRegionInfo(ArrayRef<BasicBlock *> Region,
                       BasicBlock *EntryBlock, BasicBlock *ExitBlock,
                       BasicBlock *ReturnBlock)
-        : Region(Region), EntryBlock(EntryBlock), ExitBlock(ExitBlock),
-          ReturnBlock(ReturnBlock) {}
+        : Region(Region.begin(), Region.end()), EntryBlock(EntryBlock),
+          ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
     SmallVector<BasicBlock *, 8> Region;
     BasicBlock *EntryBlock;
     BasicBlock *ExitBlock;
@@ -200,10 +199,12 @@ struct PartialInlinerImpl {
 
   PartialInlinerImpl(
       std::function<AssumptionCache &(Function &)> *GetAC,
+      function_ref<AssumptionCache *(Function &)> LookupAC,
       std::function<TargetTransformInfo &(Function &)> *GTTI,
       Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
       ProfileSummaryInfo *ProfSI)
-      : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
+      : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
+        GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
 
   bool run(Module &M);
   // Main part of the transformation that calls helper functions to find
@@ -223,9 +224,11 @@ struct PartialInlinerImpl {
     // Two constructors, one for single region outlining, the other for
     // multi-region outlining.
     FunctionCloner(Function *F, FunctionOutliningInfo *OI,
-                   OptimizationRemarkEmitter &ORE);
+                   OptimizationRemarkEmitter &ORE,
+                   function_ref<AssumptionCache *(Function &)> LookupAC);
     FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
-                   OptimizationRemarkEmitter &ORE);
+                   OptimizationRemarkEmitter &ORE,
+                   function_ref<AssumptionCache *(Function &)> LookupAC);
     ~FunctionCloner();
 
     // Prepare for function outlining: making sure there is only
@@ -261,11 +264,13 @@ struct PartialInlinerImpl {
     std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
     std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
     OptimizationRemarkEmitter &ORE;
+    function_ref<AssumptionCache *(Function &)> LookupAC;
   };
 
 private:
   int NumPartialInlining = 0;
   std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
+  function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
   std::function<TargetTransformInfo &(Function &)> *GetTTI;
   Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
   ProfileSummaryInfo *PSI;
@@ -366,12 +371,17 @@ struct PartialInlinerLegacyPass : public ModulePass {
       return ACT->getAssumptionCache(F);
     };
 
+    auto LookupAssumptionCache = [ACT](Function &F) -> AssumptionCache * {
+      return ACT->lookupAssumptionCache(F);
+    };
+
     std::function<TargetTransformInfo &(Function &)> GetTTI =
         [&TTIWP](Function &F) -> TargetTransformInfo & {
       return TTIWP->getTTI(F);
     };
 
-    return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI)
+    return PartialInlinerImpl(&GetAssumptionCache, LookupAssumptionCache,
+                              &GetTTI, NoneType::None, PSI)
         .run(M);
   }
 };
@@ -525,7 +535,6 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
       // assert(ReturnBlock && "ReturnBlock is NULL somehow!");
       FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
           DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
-      RegInfo.Region = DominateVector;
       OutliningInfo->ORI.push_back(RegInfo);
 #ifndef NDEBUG
       if (TracePartialInlining) {
@@ -763,8 +772,13 @@ bool PartialInlinerImpl::shouldPartialInline(
 
   Function *Caller = CS.getCaller();
   auto &CalleeTTI = (*GetTTI)(*Callee);
-  InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
-                                *GetAssumptionCache, GetBFI, PSI, &ORE);
+  bool RemarksEnabled =
+      Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
+          DEBUG_TYPE);
+  assert(Call && "invalid callsite for partial inline");
+  InlineCost IC = getInlineCost(cast<CallBase>(*Call), getInlineParams(),
+                                CalleeTTI, *GetAssumptionCache, GetBFI, PSI,
+                                RemarksEnabled ? &ORE : nullptr);
 
   if (IC.isAlways()) {
     ORE.emit([&]() {
@@ -798,7 +812,7 @@ bool PartialInlinerImpl::shouldPartialInline(
   const DataLayout &DL = Caller->getParent()->getDataLayout();
 
   // The savings of eliminating the call:
-  int NonWeightedSavings = getCallsiteCost(CS, DL);
+  int NonWeightedSavings = getCallsiteCost(cast<CallBase>(*Call), DL);
   BlockFrequency NormWeightedSavings(NonWeightedSavings);
 
   // Weighted saving is smaller than weighted cost, return false
@@ -855,12 +869,12 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
       continue;
 
     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
-      InlineCost += getCallsiteCost(CallSite(CI), DL);
+      InlineCost += getCallsiteCost(*CI, DL);
       continue;
     }
 
     if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
-      InlineCost += getCallsiteCost(CallSite(II), DL);
+      InlineCost += getCallsiteCost(*II, DL);
       continue;
     }
 
@@ -949,8 +963,9 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap(
 }
 
 PartialInlinerImpl::FunctionCloner::FunctionCloner(
-    Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE)
-    : OrigFunc(F), ORE(ORE) {
+    Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
+    function_ref<AssumptionCache *(Function &)> LookupAC)
+    : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
   ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
 
   // Clone the function, so that we can hack away on it.
@@ -973,8 +988,9 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(
 
 PartialInlinerImpl::FunctionCloner::FunctionCloner(
     Function *F, FunctionOutliningMultiRegionInfo *OI,
-    OptimizationRemarkEmitter &ORE)
-    : OrigFunc(F), ORE(ORE) {
+    OptimizationRemarkEmitter &ORE,
+    function_ref<AssumptionCache *(Function &)> LookupAC)
+    : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
   ClonedOMRI = llvm::make_unique<FunctionOutliningMultiRegionInfo>();
 
   // Clone the function, so that we can hack away on it.
@@ -1112,7 +1128,9 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
     int CurrentOutlinedRegionCost = ComputeRegionCost(RegionInfo.Region);
 
     CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
-                     ClonedFuncBFI.get(), &BPI, /* AllowVarargs */ false);
+                     ClonedFuncBFI.get(), &BPI,
+                     LookupAC(*RegionInfo.EntryBlock->getParent()),
+                     /* AllowVarargs */ false);
 
     CE.findInputsOutputs(Inputs, Outputs, Sinks);
 
@@ -1194,7 +1212,7 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
   // Extract the body of the if.
   Function *OutlinedFunc =
       CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
-                    ClonedFuncBFI.get(), &BPI,
+                    ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
                     /* AllowVarargs */ true)
           .extractCodeRegion();
 
@@ -1258,7 +1276,7 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
     std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
         computeOutliningColdRegionsInfo(F, ORE);
     if (OMRI) {
-      FunctionCloner Cloner(F, OMRI.get(), ORE);
+      FunctionCloner Cloner(F, OMRI.get(), ORE, LookupAssumptionCache);
 
 #ifndef NDEBUG
       if (TracePartialInlining) {
@@ -1291,7 +1309,7 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
   if (!OI)
     return {false, nullptr};
 
-  FunctionCloner Cloner(F, OI.get(), ORE);
+  FunctionCloner Cloner(F, OI.get(), ORE, LookupAssumptionCache);
   Cloner.NormalizeReturnBlock();
 
   Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
@@ -1485,6 +1503,10 @@ PreservedAnalyses PartialInlinerPass::run(Module &M,
     return FAM.getResult<AssumptionAnalysis>(F);
   };
 
+  auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
+    return FAM.getCachedResult<AssumptionAnalysis>(F);
+  };
+
   std::function<BlockFrequencyInfo &(Function &)> GetBFI =
       [&FAM](Function &F) -> BlockFrequencyInfo & {
     return FAM.getResult<BlockFrequencyAnalysis>(F);
@@ -1497,7 +1519,8 @@ PreservedAnalyses PartialInlinerPass::run(Module &M,
 
   ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
 
-  if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI)
+  if (PartialInlinerImpl(&GetAssumptionCache, LookupAssumptionCache, &GetTTI,
+                         {GetBFI}, PSI)
           .run(M))
     return PreservedAnalyses::none();
   return PreservedAnalyses::all();
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 9764944dc332..3ea77f08fd3c 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -1,9 +1,8 @@
 //===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -31,6 +30,7 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/Attributor.h"
 #include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
 #include "llvm/Transforms/IPO/FunctionAttrs.h"
 #include "llvm/Transforms/IPO/InferFunctionAttrs.h"
@@ -39,9 +39,13 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/Transforms/Scalar/InstSimplifyPass.h"
+#include "llvm/Transforms/Scalar/LICM.h"
+#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
 #include "llvm/Transforms/Utils.h"
 #include "llvm/Transforms/Vectorize.h"
+#include "llvm/Transforms/Vectorize/LoopVectorize.h"
+#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
 
 using namespace llvm;
 
@@ -49,14 +53,6 @@ static cl::opt<bool>
     RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,
                        cl::ZeroOrMore, cl::desc("Run Partial inlinining pass"));
 
-static cl::opt<bool>
-    RunLoopVectorization("vectorize-loops", cl::Hidden,
-                         cl::desc("Run the Loop vectorization passes"));
-
-static cl::opt<bool>
-RunSLPVectorization("vectorize-slp", cl::Hidden,
-                    cl::desc("Run the SLP vectorization passes"));
-
 static cl::opt<bool>
 UseGVNAfterVectorization("use-gvn-after-vectorization",
   cl::init(false), cl::Hidden,
@@ -73,12 +69,6 @@ RunLoopRerolling("reroll-loops", cl::Hidden,
 static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
                                cl::desc("Run the NewGVN pass"));
 
-static cl::opt<bool>
-RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization",
-  cl::init(true), cl::Hidden,
-  cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop "
-           "vectorizer instead of before"));
-
 // Experimental option to use CFL-AA
 enum class CFLAAType { None, Steensgaard, Andersen, Both };
 static cl::opt<CFLAAType>
@@ -104,23 +94,13 @@ static cl::opt<bool>
     EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden,
                             cl::desc("Enable preparation for ThinLTO."));
 
+static cl::opt<bool>
+    EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden,
+                         cl::desc("Enable performing ThinLTO."));
+
 cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false), cl::Hidden,
     cl::desc("Enable hot-cold splitting pass"));
 
-
-static cl::opt<bool> RunPGOInstrGen(
-    "profile-generate", cl::init(false), cl::Hidden,
-    cl::desc("Enable PGO instrumentation."));
-
-static cl::opt<std::string>
-    PGOOutputFile("profile-generate-file", cl::init(""), cl::Hidden,
-                      cl::desc("Specify the path of profile data file."));
-
-static cl::opt<std::string> RunPGOInstrUse(
-    "profile-use", cl::init(""), cl::Hidden, cl::value_desc("filename"),
-    cl::desc("Enable use phase of PGO instrumentation and specify the path "
-             "of profile data file"));
-
 static cl::opt<bool> UseLoopVersioningLICM(
     "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
     cl::desc("Enable the experimental Loop Versioning LICM pass"));
@@ -134,10 +114,6 @@ static cl::opt<int> PreInlineThreshold(
     cl::desc("Control the amount of inlining in pre-instrumentation inliner "
              "(default = 75)"));
 
-static cl::opt<bool> EnableEarlyCSEMemSSA(
-    "enable-earlycse-memssa", cl::init(true), cl::Hidden,
-    cl::desc("Enable the EarlyCSE w/ MemorySSA pass (default = on)"));
-
 static cl::opt<bool> EnableGVNHoist(
     "enable-gvn-hoist", cl::init(false), cl::Hidden,
     cl::desc("Enable the GVN hoisting pass (default = off)"));
@@ -156,10 +132,21 @@ static cl::opt<bool> EnableGVNSink(
     "enable-gvn-sink", cl::init(false), cl::Hidden,
     cl::desc("Enable the GVN sinking pass (default = off)"));
 
+// This option is used in simplifying testing SampleFDO optimizations for
+// profile loading.
 static cl::opt<bool>
     EnableCHR("enable-chr", cl::init(true), cl::Hidden,
               cl::desc("Enable control height reduction optimization (CHR)"));
 
+cl::opt<bool> FlattenedProfileUsed(
+    "flattened-profile-used", cl::init(false), cl::Hidden,
+    cl::desc("Indicate the sample profile being used is flattened, i.e., "
+             "no inline hierachy exists in the profile. "));
+
+cl::opt<bool> EnableOrderFileInstrumentation(
+    "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
+    cl::desc("Enable order file instrumentation (default = off)"));
+
 PassManagerBuilder::PassManagerBuilder() {
     OptLevel = 2;
     SizeLevel = 0;
@@ -167,19 +154,26 @@ PassManagerBuilder::PassManagerBuilder() {
     Inliner = nullptr;
     DisableUnrollLoops = false;
     SLPVectorize = RunSLPVectorization;
-    LoopVectorize = RunLoopVectorization;
+    LoopVectorize = EnableLoopVectorization;
+    LoopsInterleaved = EnableLoopInterleaving;
     RerollLoops = RunLoopRerolling;
     NewGVN = RunNewGVN;
+    LicmMssaOptCap = SetLicmMssaOptCap;
+    LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
     DisableGVNLoadPRE = false;
+    ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
     VerifyInput = false;
     VerifyOutput = false;
     MergeFunctions = false;
     PrepareForLTO = false;
-    EnablePGOInstrGen = RunPGOInstrGen;
-    PGOInstrGen = PGOOutputFile;
-    PGOInstrUse = RunPGOInstrUse;
+    EnablePGOInstrGen = false;
+    EnablePGOCSInstrGen = false;
+    EnablePGOCSInstrUse = false;
+    PGOInstrGen = "";
+    PGOInstrUse = "";
+    PGOSampleUse = "";
     PrepareForThinLTO = EnablePrepareForThinLTO;
-    PerformThinLTO = false;
+    PerformThinLTO = EnablePerformThinLTO;
     DivergentTarget = false;
 }
 
@@ -272,13 +266,19 @@ void PassManagerBuilder::populateFunctionPassManager(
 }
 
 // Do PGO instrumentation generation or use pass as the option specified.
-void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
-  if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty())
+void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM,
+                                           bool IsCS = false) {
+  if (IsCS) {
+    if (!EnablePGOCSInstrGen && !EnablePGOCSInstrUse)
+      return;
+  } else if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty())
     return;
+
   // Perform the preinline and cleanup passes for O1 and above.
   // And avoid doing them if optimizing for size.
+  // We will not do this inline for context sensitive PGO (when IsCS is true).
   if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner &&
-      PGOSampleUse.empty()) {
+      PGOSampleUse.empty() && !IsCS) {
     // Create preinline pass. We construct an InlineParams object and specify
     // the threshold here to avoid the command line options of the regular
     // inliner to influence pre-inlining. The only fields of InlineParams we
@@ -296,22 +296,23 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
     MPM.add(createInstructionCombiningPass()); // Combine silly seq's
     addExtensionsToPM(EP_Peephole, MPM);
   }
-  if (EnablePGOInstrGen) {
-    MPM.add(createPGOInstrumentationGenLegacyPass());
+  if ((EnablePGOInstrGen && !IsCS) || (EnablePGOCSInstrGen && IsCS)) {
+    MPM.add(createPGOInstrumentationGenLegacyPass(IsCS));
     // Add the profile lowering pass.
     InstrProfOptions Options;
     if (!PGOInstrGen.empty())
       Options.InstrProfileOutput = PGOInstrGen;
     Options.DoCounterPromotion = true;
+    Options.UseBFIInPromotion = IsCS;
     MPM.add(createLoopRotatePass());
-    MPM.add(createInstrProfilingLegacyPass(Options));
+    MPM.add(createInstrProfilingLegacyPass(Options, IsCS));
   }
   if (!PGOInstrUse.empty())
-    MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse));
+    MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse, IsCS));
   // Indirect call promotion that promotes intra-module targets only.
   // For ThinLTO this is done earlier due to interactions with globalopt
   // for imported functions. We don't run this at -O0.
-  if (OptLevel > 0)
+  if (OptLevel > 0 && !IsCS)
     MPM.add(
         createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty()));
 }
@@ -320,7 +321,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
   // Start of function pass.
   // Break up aggregate allocas, using SSAUpdater.
   MPM.add(createSROAPass());
-  MPM.add(createEarlyCSEPass(EnableEarlyCSEMemSSA)); // Catch trivial redundancies
+  MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies
   if (EnableGVNHoist)
     MPM.add(createGVNHoistPass());
   if (EnableGVNSink) {
@@ -359,7 +360,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
   }
   // Rotate Loop - disable header duplication at -Oz
   MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
-  MPM.add(createLICMPass());                  // Hoist loop invariants
+  MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
   if (EnableSimpleLoopUnswitch)
     MPM.add(createSimpleLoopUnswitchLegacyPass());
   else
@@ -378,8 +379,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
   if (EnableLoopInterchange)
     MPM.add(createLoopInterchangePass()); // Interchange loops
 
-  MPM.add(createSimpleLoopUnrollPass(OptLevel,
-                                     DisableUnrollLoops)); // Unroll small loops
+  // Unroll small loops
+  MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
+                                     ForgetAllSCEVInLoopUnroll));
   addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
   // This ends the loop pass pipelines.
 
@@ -403,14 +405,12 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
   MPM.add(createJumpThreadingPass());         // Thread jumps
   MPM.add(createCorrelatedValuePropagationPass());
   MPM.add(createDeadStoreEliminationPass());  // Delete dead stores
-  MPM.add(createLICMPass());
+  MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
 
   addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
 
   if (RerollLoops)
     MPM.add(createLoopRerollPass());
-  if (!RunSLPAfterLoopVectorization && SLPVectorize)
-    MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
 
   MPM.add(createAggressiveDCEPass());         // Delete dead instructions
   MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
@@ -419,15 +419,23 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
   addExtensionsToPM(EP_Peephole, MPM);
 
   if (EnableCHR && OptLevel >= 3 &&
-      (!PGOInstrUse.empty() || !PGOSampleUse.empty()))
+      (!PGOInstrUse.empty() || !PGOSampleUse.empty() || EnablePGOCSInstrGen))
     MPM.add(createControlHeightReductionLegacyPass());
 }
 
 void PassManagerBuilder::populateModulePassManager(
     legacy::PassManagerBase &MPM) {
+  // Whether this is a default or *LTO pre-link pipeline. The FullLTO post-link
+  // is handled separately, so just check this is not the ThinLTO post-link.
+  bool DefaultOrPreLinkPipeline = !PerformThinLTO;
+
   if (!PGOSampleUse.empty()) {
     MPM.add(createPruneEHPass());
-    MPM.add(createSampleProfileLoaderPass(PGOSampleUse));
+    // In ThinLTO mode, when flattened profile is used, all the available
+    // profile information will be annotated in PreLink phase so there is
+    // no need to load the profile again in PostLink.
+    if (!(FlattenedProfileUsed && PerformThinLTO))
+      MPM.add(createSampleProfileLoaderPass(PGOSampleUse));
   }
 
   // Allow forcing function attributes as a debugging and tuning aid.
@@ -508,6 +516,10 @@ void PassManagerBuilder::populateModulePassManager(
 
   MPM.add(createIPSCCPPass());          // IP SCCP
   MPM.add(createCalledValuePropagationPass());
+
+  // Infer attributes on declarations, call sites, arguments, etc.
+  MPM.add(createAttributorLegacyPass());
+
   MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
   // Promote any localized global vars.
   MPM.add(createPromoteMemoryToRegisterPass());
@@ -523,9 +535,14 @@ void PassManagerBuilder::populateModulePassManager(
   // profile annotation in backend more difficult.
   // PGO instrumentation is added during the compile phase for ThinLTO, do
   // not run it a second time
-  if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile)
+  if (DefaultOrPreLinkPipeline && !PrepareForThinLTOUsingPGOSampleProfile)
     addPGOInstrPasses(MPM);
 
+  // Create profile COMDAT variables. Lld linker wants to see all variables
+  // before the LTO/ThinLTO link since it needs to resolve symbols/comdats.
+  if (!PerformThinLTO && EnablePGOCSInstrGen)
+    MPM.add(createPGOInstrumentationGenCreateVarLegacyPass(PGOInstrGen));
+
   // We add a module alias analysis pass here. In part due to bugs in the
   // analysis infrastructure this "works" in that the analysis stays alive
   // for the entire SCC pass run below.
@@ -567,6 +584,17 @@ void PassManagerBuilder::populateModulePassManager(
     // and saves running remaining passes on the eliminated functions.
     MPM.add(createEliminateAvailableExternallyPass());
 
+  // CSFDO instrumentation and use pass. Don't invoke this for Prepare pass
+  // for LTO and ThinLTO -- The actual pass will be called after all inlines
+  // are performed.
+  // Need to do this after COMDAT variables have been eliminated,
+  // (i.e. after EliminateAvailableExternallyPass).
+  if (!(PrepareForLTO || PrepareForThinLTO))
+    addPGOInstrPasses(MPM, /* IsCS */ true);
+
+  if (EnableOrderFileInstrumentation)
+    MPM.add(createInstrOrderFilePass());
+
   MPM.add(createReversePostOrderFunctionAttrsPass());
 
   // The inliner performs some kind of dead code elimination as it goes,
@@ -605,7 +633,7 @@ void PassManagerBuilder::populateModulePassManager(
   // later might get benefit of no-alias assumption in clone loop.
   if (UseLoopVersioningLICM) {
     MPM.add(createLoopVersioningLICMPass());    // Do LoopVersioningLICM
-    MPM.add(createLICMPass());                  // Hoist loop invariants
+    MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
   }
 
   // We add a fresh GlobalsModRef run at this point. This is particularly
@@ -640,7 +668,7 @@ void PassManagerBuilder::populateModulePassManager(
   // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
   MPM.add(createLoopDistributePass());
 
-  MPM.add(createLoopVectorizePass(DisableUnrollLoops, !LoopVectorize));
+  MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize));
 
   // Eliminate loads by forwarding stores from the previous iteration to loads
   // of the current iteration.
@@ -662,7 +690,7 @@ void PassManagerBuilder::populateModulePassManager(
     MPM.add(createEarlyCSEPass());
     MPM.add(createCorrelatedValuePropagationPass());
     addInstructionCombiningPass(MPM);
-    MPM.add(createLICMPass());
+    MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
     MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
     MPM.add(createCFGSimplificationPass());
     addInstructionCombiningPass(MPM);
@@ -675,7 +703,7 @@ void PassManagerBuilder::populateModulePassManager(
   // before SLP vectorization.
   MPM.add(createCFGSimplificationPass(1, true, true, false, true));
 
-  if (RunSLPAfterLoopVectorization && SLPVectorize) {
+  if (SLPVectorize) {
     MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
     if (OptLevel > 1 && ExtraVectorizerPasses) {
       MPM.add(createEarlyCSEPass());
@@ -692,8 +720,9 @@ void PassManagerBuilder::populateModulePassManager(
     MPM.add(createLoopUnrollAndJamPass(OptLevel));
   }
 
-  MPM.add(createLoopUnrollPass(OptLevel,
-                               DisableUnrollLoops)); // Unroll small loops
+  // Unroll small loops
+  MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
+                               ForgetAllSCEVInLoopUnroll));
 
   if (!DisableUnrollLoops) {
     // LoopUnroll may generate some redundency to cleanup.
@@ -703,7 +732,7 @@ void PassManagerBuilder::populateModulePassManager(
     // unrolled loop is a inner loop, then the prologue will be inside the
     // outer loop. LICM pass can help to promote the runtime check out if the
     // checked value is loop invariant.
-    MPM.add(createLICMPass());
+    MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
   }
 
   MPM.add(createWarnMissedTransformationsPass());
@@ -722,6 +751,11 @@ void PassManagerBuilder::populateModulePassManager(
     MPM.add(createConstantMergePass());     // Merge dup global constants
   }
 
+  // See comment in the new PM for justification of scheduling splitting at
+  // this stage (\ref buildModuleSimplificationPipeline).
+  if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO))
+    MPM.add(createHotColdSplittingPass());
+
   if (MergeFunctions)
     MPM.add(createMergeFunctionsPass());
 
@@ -738,9 +772,6 @@ void PassManagerBuilder::populateModulePassManager(
   // flattening of blocks.
   MPM.add(createDivRemPairsPass());
 
-  if (EnableHotColdSplit)
-    MPM.add(createHotColdSplittingPass());
-
   // LoopSink (and other loop passes since the last simplifyCFG) might have
   // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
   MPM.add(createCFGSimplificationPass());
@@ -793,6 +824,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
     // Attach metadata to indirect call sites indicating the set of functions
     // they may target at run-time. This should follow IPSCCP.
     PM.add(createCalledValuePropagationPass());
+
+    // Infer attributes on declarations, call sites, arguments, etc.
+    PM.add(createAttributorLegacyPass());
   }
 
   // Infer attributes about definitions. The readnone attribute in particular is
@@ -842,6 +876,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
 
   PM.add(createPruneEHPass());   // Remove dead EH info.
 
+  // CSFDO instrumentation and use pass.
+  addPGOInstrPasses(PM, /* IsCS */ true);
+
   // Optimize globals again if we ran the inliner.
   if (RunInliner)
     PM.add(createGlobalOptimizerPass());
@@ -859,11 +896,16 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
   // Break up allocas
   PM.add(createSROAPass());
 
-  // Run a few AA driven optimizations here and now, to cleanup the code.
+  // LTO provides additional opportunities for tailcall elimination due to
+  // link-time inlining, and visibility of nocapture attribute.
+  PM.add(createTailCallEliminationPass());
+
+  // Infer attributes on declarations, call sites, arguments, etc.
   PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture.
+  // Run a few AA driven optimizations here and now, to cleanup the code.
   PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
 
-  PM.add(createLICMPass());                 // Hoist loop invariants.
+  PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
   PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
   PM.add(NewGVN ? createNewGVNPass()
                 : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
@@ -878,11 +920,13 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
   if (EnableLoopInterchange)
     PM.add(createLoopInterchangePass());
 
-  PM.add(createSimpleLoopUnrollPass(OptLevel,
-                                    DisableUnrollLoops)); // Unroll small loops
+  // Unroll small loops
+  PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
+                                    ForgetAllSCEVInLoopUnroll));
   PM.add(createLoopVectorizePass(true, !LoopVectorize));
   // The vectorizer may have significantly shortened a loop body; unroll again.
-  PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops));
+  PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
+                              ForgetAllSCEVInLoopUnroll));
 
   PM.add(createWarnMissedTransformationsPass());
 
@@ -896,9 +940,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
   PM.add(createBitTrackingDCEPass());
 
   // More scalar chains could be vectorized due to more alias information
-  if (RunSLPAfterLoopVectorization)
-    if (SLPVectorize)
-      PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+  if (SLPVectorize)
+    PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
 
   // After vectorization, assume intrinsics may tell us more about pointer
   // alignments.
@@ -913,6 +956,11 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
 
 void PassManagerBuilder::addLateLTOOptimizationPasses(
     legacy::PassManagerBase &PM) {
+  // See comment in the new PM for justification of scheduling splitting at
+  // this stage (\ref buildLTODefaultPipeline).
+  if (EnableHotColdSplit)
+    PM.add(createHotColdSplittingPass());
+
   // Delete basic blocks, which optimization passes may have killed.
   PM.add(createCFGSimplificationPass());
 
@@ -968,6 +1016,8 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
   if (VerifyInput)
     PM.add(createVerifierPass());
 
+  addExtensionsToPM(EP_FullLinkTimeOptimizationEarly, PM);
+
   if (OptLevel != 0)
     addLTOOptimizationPasses(PM);
   else {
@@ -989,6 +1039,8 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
   if (OptLevel != 0)
     addLateLTOOptimizationPasses(PM);
 
+  addExtensionsToPM(EP_FullLinkTimeOptimizationLast, PM);
+
   if (VerifyOutput)
     PM.add(createVerifierPass());
 }
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index ae586c017471..cb3915dfb678 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -1,9 +1,8 @@
 //===- PruneEH.cpp - Pass which deletes unused exception handlers ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -204,7 +203,8 @@ static bool SimplifyFunction(Function *F, CallGraph &CG) {
 
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
       if (CallInst *CI = dyn_cast<CallInst>(I++))
-        if (CI->doesNotReturn() && !isa<UnreachableInst>(I)) {
+        if (CI->doesNotReturn() && !CI->isMustTailCall() &&
+            !isa<UnreachableInst>(I)) {
           // This call calls a function that cannot return.  Insert an
           // unreachable instruction after it and simplify the code.  Do this
           // by splitting the BB, adding the unreachable, then deleting the
@@ -242,12 +242,12 @@ static void DeleteBasicBlock(BasicBlock *BB, CallGraph &CG) {
       break;
     }
 
-    if (auto CS = CallSite (&*I)) {
-      const Function *Callee = CS.getCalledFunction();
+    if (auto *Call = dyn_cast<CallBase>(&*I)) {
+      const Function *Callee = Call->getCalledFunction();
       if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID()))
-        CGN->removeCallEdgeFor(CS);
+        CGN->removeCallEdgeFor(*Call);
       else if (!Callee->isIntrinsic())
-        CGN->removeCallEdgeFor(CS);
+        CGN->removeCallEdgeFor(*Call);
     }
 
     if (!I->use_empty())
diff --git a/lib/Transforms/IPO/SCCP.cpp b/lib/Transforms/IPO/SCCP.cpp
index d2c34abfc132..7be3608bd2ec 100644
--- a/lib/Transforms/IPO/SCCP.cpp
+++ b/lib/Transforms/IPO/SCCP.cpp
@@ -79,6 +79,7 @@ char IPSCCPLegacyPass::ID = 0;
 INITIALIZE_PASS_BEGIN(IPSCCPLegacyPass, "ipsccp",
                       "Interprocedural Sparse Conditional Constant Propagation",
                       false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(IPSCCPLegacyPass, "ipsccp",
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
index 9f123c2b875e..877d20e72ffc 100644
--- a/lib/Transforms/IPO/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -1,9 +1,8 @@
 //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -319,6 +318,14 @@ protected:
 
   /// Optimization Remark Emitter used to emit diagnostic remarks.
   OptimizationRemarkEmitter *ORE = nullptr;
+
+  // Information recorded when we declined to inline a call site
+  // because we have determined it is too cold is accumulated for
+  // each callee function. Initially this is just the entry count.
+  struct NotInlinedProfileInfo {
+    uint64_t entryCount;
+  };
+  DenseMap<Function *, NotInlinedProfileInfo> notInlinedCallInfo;
 };
 
 class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -745,8 +752,9 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
   // when cost exceeds threshold without checking all IRs in the callee.
   // The acutal cost does not matter because we only checks isNever() to
   // see if it is legal to inline the callsite.
-  InlineCost Cost = getInlineCost(CS, Params, GetTTI(*CalledFunction), GetAC,
-                                  None, nullptr, nullptr);
+  InlineCost Cost =
+      getInlineCost(cast<CallBase>(*I), Params, GetTTI(*CalledFunction), GetAC,
+                    None, nullptr, nullptr);
   if (Cost.isNever()) {
     ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB)
               << "incompatible inlining");
@@ -779,6 +787,8 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
 bool SampleProfileLoader::inlineHotFunctions(
     Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
   DenseSet<Instruction *> PromotedInsns;
+
+  DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites;
   bool Changed = false;
   while (true) {
     bool LocalChanged = false;
@@ -791,6 +801,8 @@ bool SampleProfileLoader::inlineHotFunctions(
         if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
             !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) {
           Candidates.push_back(&I);
+          if (FS->getEntrySamples() > 0)
+            localNotInlinedCallSites.try_emplace(&I, FS);
           if (callsiteIsHot(FS, PSI))
             Hot = true;
         }
@@ -823,6 +835,9 @@ bool SampleProfileLoader::inlineHotFunctions(
           if (CalleeFunctionName == F.getName())
             continue;
 
+          if (!callsiteIsHot(FS, PSI))
+            continue;
+
           const char *Reason = "Callee function not available";
           auto R = SymbolMap.find(CalleeFunctionName);
           if (R != SymbolMap.end() && R->getValue() &&
@@ -836,8 +851,10 @@ bool SampleProfileLoader::inlineHotFunctions(
             PromotedInsns.insert(I);
             // If profile mismatches, we should not attempt to inline DI.
             if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
-                inlineCallInstruction(DI))
+                inlineCallInstruction(DI)) {
+              localNotInlinedCallSites.erase(I);
               LocalChanged = true;
+            }
           } else {
             LLVM_DEBUG(dbgs()
                        << "\nFailed to promote indirect call to "
@@ -846,8 +863,10 @@ bool SampleProfileLoader::inlineHotFunctions(
         }
       } else if (CalledFunction && CalledFunction->getSubprogram() &&
                  !CalledFunction->isDeclaration()) {
-        if (inlineCallInstruction(I))
+        if (inlineCallInstruction(I)) {
+          localNotInlinedCallSites.erase(I);
           LocalChanged = true;
+        }
       } else if (IsThinLTOPreLink) {
         findCalleeFunctionSamples(*I)->findInlinedFunctions(
             InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
@@ -859,6 +878,18 @@ bool SampleProfileLoader::inlineHotFunctions(
       break;
     }
   }
+
+  // Accumulate not inlined callsite information into notInlinedSamples
+  for (const auto &Pair : localNotInlinedCallSites) {
+    Instruction *I = Pair.getFirst();
+    Function *Callee = CallSite(I).getCalledFunction();
+    if (!Callee || Callee->isDeclaration())
+      continue;
+    const FunctionSamples *FS = Pair.getSecond();
+    auto pair =
+        notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
+    pair.first->second.entryCount += FS->getEntrySamples();
+  }
   return Changed;
 }
 
@@ -1299,10 +1330,10 @@ void SampleProfileLoader::propagateWeights(Function &F) {
           annotateValueSite(*I.getParent()->getParent()->getParent(), I,
                             SortedCallTargets, Sum, IPVK_IndirectCallTarget,
                             SortedCallTargets.size());
-        } else if (!dyn_cast<IntrinsicInst>(&I)) {
-          SmallVector<uint32_t, 1> Weights;
-          Weights.push_back(BlockWeights[BB]);
-          I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+        } else if (!isa<IntrinsicInst>(&I)) {
+          I.setMetadata(LLVMContext::MD_prof,
+                        MDB.createBranchWeights(
+                            {static_cast<uint32_t>(BlockWeights[BB])}));
         }
       }
     }
@@ -1568,8 +1599,9 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
     return false;
 
   PSI = _PSI;
-  if (M.getProfileSummary() == nullptr)
-    M.setProfileSummary(Reader->getSummary().getMD(M.getContext()));
+  if (M.getProfileSummary(/* IsCS */ false) == nullptr)
+    M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
+                        ProfileSummary::PSK_Sample);
 
   // Compute the total number of samples collected in this profile.
   for (const auto &I : Reader->getProfiles())
@@ -1601,6 +1633,12 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
       clearFunctionData();
       retval |= runOnFunction(F, AM);
     }
+
+  // Account for cold calls not inlined....
+  for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
+       notInlinedCallInfo)
+    updateProfileCallee(pair.first, pair.second.entryCount);
+
   return retval;
 }
 
diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp
index 3c3c5dd19d1f..106db3c8bd9d 100644
--- a/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -1,9 +1,8 @@
 //===-- StripDeadPrototypes.cpp - Remove unused function declarations ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index c9afb060a91a..67a473612fc1 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -1,9 +1,8 @@
 //===- StripSymbols.cpp - Strip symbols and debug info from a module ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/IPO/SyntheticCountsPropagation.cpp b/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
index ba4efb3ff60d..45fd432fd721 100644
--- a/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
+++ b/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
@@ -1,9 +1,8 @@
 //=- SyntheticCountsPropagation.cpp - Propagate function counts --*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 510ecb516dc2..24c476376c14 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -1,9 +1,8 @@
 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -418,34 +417,53 @@ void splitAndWriteThinLTOBitcode(
   }
 }
 
-// Returns whether this module needs to be split because splitting is
-// enabled and it uses type metadata.
-bool requiresSplit(Module &M) {
-  // First check if the LTO Unit splitting has been enabled.
+// Check if the LTO Unit splitting has been enabled.
+bool enableSplitLTOUnit(Module &M) {
   bool EnableSplitLTOUnit = false;
   if (auto *MD = mdconst::extract_or_null<ConstantInt>(
           M.getModuleFlag("EnableSplitLTOUnit")))
     EnableSplitLTOUnit = MD->getZExtValue();
-  if (!EnableSplitLTOUnit)
-    return false;
+  return EnableSplitLTOUnit;
+}
 
-  // Module only needs to be split if it contains type metadata.
+// Returns whether this module needs to be split because it uses type metadata.
+bool hasTypeMetadata(Module &M) {
   for (auto &GO : M.global_objects()) {
     if (GO.hasMetadata(LLVMContext::MD_type))
       return true;
   }
-
   return false;
 }
 
 void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
                          function_ref<AAResults &(Function &)> AARGetter,
                          Module &M, const ModuleSummaryIndex *Index) {
-  // Split module if splitting is enabled and it contains any type metadata.
-  if (requiresSplit(M))
-    return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
+  std::unique_ptr<ModuleSummaryIndex> NewIndex = nullptr;
+  // See if this module has any type metadata. If so, we try to split it
+  // or at least promote type ids to enable WPD.
+  if (hasTypeMetadata(M)) {
+    if (enableSplitLTOUnit(M))
+      return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
+    // Promote type ids as needed for index-based WPD.
+    std::string ModuleId = getUniqueModuleId(&M);
+    if (!ModuleId.empty()) {
+      promoteTypeIds(M, ModuleId);
+      // Need to rebuild the index so that it contains type metadata
+      // for the newly promoted type ids.
+      // FIXME: Probably should not bother building the index at all
+      // in the caller of writeThinLTOBitcode (which does so via the
+      // ModuleSummaryIndexAnalysis pass), since we have to rebuild it
+      // anyway whenever there is type metadata (here or in
+      // splitAndWriteThinLTOBitcode). Just always build it once via the
+      // buildModuleSummaryIndex when Module(s) are ready.
+      ProfileSummaryInfo PSI(M);
+      NewIndex = llvm::make_unique<ModuleSummaryIndex>(
+          buildModuleSummaryIndex(M, nullptr, &PSI));
+      Index = NewIndex.get();
+    }
+  }
 
-  // Otherwise we can just write it out as a regular module.
+  // Write it out as an unsplit ThinLTO module.
 
   // Save the module hash produced for the full bitcode, which will
   // be used in the backends, and use that in the minimized bitcode
diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 48bd0cda759d..6b6dd6194e17 100644
--- a/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -1,9 +1,8 @@
 //===- WholeProgramDevirt.cpp - Whole program virtual call optimization ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -882,7 +881,7 @@ void DevirtModule::tryICallBranchFunnel(
   }
 
   BasicBlock *BB = BasicBlock::Create(M.getContext(), "", JT, nullptr);
-  Constant *Intr =
+  Function *Intr =
       Intrinsic::getDeclaration(&M, llvm::Intrinsic::icall_branch_funnel, {});
 
   auto *CI = CallInst::Create(Intr, JTArgs, "", BB);
@@ -921,9 +920,10 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
       NewArgs.push_back(Int8PtrTy);
       for (Type *T : CS.getFunctionType()->params())
         NewArgs.push_back(T);
-      PointerType *NewFT = PointerType::getUnqual(
+      FunctionType *NewFT =
           FunctionType::get(CS.getFunctionType()->getReturnType(), NewArgs,
-                            CS.getFunctionType()->isVarArg()));
+                            CS.getFunctionType()->isVarArg());
+      PointerType *NewFTPtr = PointerType::getUnqual(NewFT);
 
       IRBuilder<> IRB(CS.getInstruction());
       std::vector<Value *> Args;
@@ -933,10 +933,10 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
 
       CallSite NewCS;
       if (CS.isCall())
-        NewCS = IRB.CreateCall(IRB.CreateBitCast(JT, NewFT), Args);
+        NewCS = IRB.CreateCall(NewFT, IRB.CreateBitCast(JT, NewFTPtr), Args);
       else
         NewCS = IRB.CreateInvoke(
-            IRB.CreateBitCast(JT, NewFT),
+            NewFT, IRB.CreateBitCast(JT, NewFTPtr),
             cast<InvokeInst>(CS.getInstruction())->getNormalDest(),
             cast<InvokeInst>(CS.getInstruction())->getUnwindDest(), Args);
       NewCS.setCallingConv(CS.getCallingConv());
@@ -1183,7 +1183,7 @@ void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
     Value *Addr =
         B.CreateGEP(Int8Ty, B.CreateBitCast(Call.VTable, Int8PtrTy), Byte);
     if (RetType->getBitWidth() == 1) {
-      Value *Bits = B.CreateLoad(Addr);
+      Value *Bits = B.CreateLoad(Int8Ty, Addr);
       Value *BitsAndBit = B.CreateAnd(Bits, Bit);
       auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
       Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
@@ -1495,8 +1495,10 @@ void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
   if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) {
     // The type of the function in the declaration is irrelevant because every
     // call site will cast it to the correct type.
-    auto *SingleImpl = M.getOrInsertFunction(
-        Res.SingleImplName, Type::getVoidTy(M.getContext()));
+    Constant *SingleImpl =
+        cast<Constant>(M.getOrInsertFunction(Res.SingleImplName,
+                                             Type::getVoidTy(M.getContext()))
+                           .getCallee());
 
     // This is the import phase so we should not be exporting anything.
     bool IsExported = false;
@@ -1538,8 +1540,12 @@ void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
   }
 
   if (Res.TheKind == WholeProgramDevirtResolution::BranchFunnel) {
-    auto *JT = M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),
-                                     Type::getVoidTy(M.getContext()));
+    // The type of the function is irrelevant, because it's bitcast at calls
+    // anyhow.
+    Constant *JT = cast<Constant>(
+        M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),
+                              Type::getVoidTy(M.getContext()))
+            .getCallee());
     bool IsExported = false;
     applyICallBranchFunnel(SlotInfo, JT, IsExported);
     assert(!IsExported);
@@ -1557,23 +1563,20 @@ void DevirtModule::removeRedundantTypeTests() {
 }
 
 bool DevirtModule::run() {
+  // If only some of the modules were split, we cannot correctly perform
+  // this transformation. We already checked for the presense of type tests
+  // with partially split modules during the thin link, and would have emitted
+  // an error if any were found, so here we can simply return.
+  if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
+      (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
+    return false;
+
   Function *TypeTestFunc =
       M.getFunction(Intrinsic::getName(Intrinsic::type_test));
   Function *TypeCheckedLoadFunc =
       M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
   Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));
 
-  // If only some of the modules were split, we cannot correctly handle
-  // code that contains type tests or type checked loads.
-  if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
-      (ImportSummary && ImportSummary->partiallySplitLTOUnits())) {
-    if ((TypeTestFunc && !TypeTestFunc->use_empty()) ||
-        (TypeCheckedLoadFunc && !TypeCheckedLoadFunc->use_empty()))
-      report_fatal_error("inconsistent LTO Unit splitting with llvm.type.test "
-                         "or llvm.type.checked.load");
-    return false;
-  }
-
   // Normally if there are no users of the devirtualization intrinsics in the
   // module, this pass has nothing to do. But if we are exporting, we also need
   // to handle any users that appear only in the function summaries.
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 6e196bfdbd25..ba15b023f2a3 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1,9 +1,8 @@
 //===- InstCombineAddSub.cpp ------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -823,6 +822,47 @@ static Value *checkForNegativeOperand(BinaryOperator &I,
   return nullptr;
 }
 
+/// Wrapping flags may allow combining constants separated by an extend.
+static Instruction *foldNoWrapAdd(BinaryOperator &Add,
+                                  InstCombiner::BuilderTy &Builder) {
+  Value *Op0 = Add.getOperand(0), *Op1 = Add.getOperand(1);
+  Type *Ty = Add.getType();
+  Constant *Op1C;
+  if (!match(Op1, m_Constant(Op1C)))
+    return nullptr;
+
+  // Try this match first because it results in an add in the narrow type.
+  // (zext (X +nuw C2)) + C1 --> zext (X + (C2 + trunc(C1)))
+  Value *X;
+  const APInt *C1, *C2;
+  if (match(Op1, m_APInt(C1)) &&
+      match(Op0, m_OneUse(m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C2))))) &&
+      C1->isNegative() && C1->sge(-C2->sext(C1->getBitWidth()))) {
+    Constant *NewC =
+        ConstantInt::get(X->getType(), *C2 + C1->trunc(C2->getBitWidth()));
+    return new ZExtInst(Builder.CreateNUWAdd(X, NewC), Ty);
+  }
+
+  // More general combining of constants in the wide type.
+  // (sext (X +nsw NarrowC)) + C --> (sext X) + (sext(NarrowC) + C)
+  Constant *NarrowC;
+  if (match(Op0, m_OneUse(m_SExt(m_NSWAdd(m_Value(X), m_Constant(NarrowC)))))) {
+    Constant *WideC = ConstantExpr::getSExt(NarrowC, Ty);
+    Constant *NewC = ConstantExpr::getAdd(WideC, Op1C);
+    Value *WideX = Builder.CreateSExt(X, Ty);
+    return BinaryOperator::CreateAdd(WideX, NewC);
+  }
+  // (zext (X +nuw NarrowC)) + C --> (zext X) + (zext(NarrowC) + C)
+  if (match(Op0, m_OneUse(m_ZExt(m_NUWAdd(m_Value(X), m_Constant(NarrowC)))))) {
+    Constant *WideC = ConstantExpr::getZExt(NarrowC, Ty);
+    Constant *NewC = ConstantExpr::getAdd(WideC, Op1C);
+    Value *WideX = Builder.CreateZExt(X, Ty);
+    return BinaryOperator::CreateAdd(WideX, NewC);
+  }
+
+  return nullptr;
+}
+
 Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
   Value *Op0 = Add.getOperand(0), *Op1 = Add.getOperand(1);
   Constant *Op1C;
@@ -832,7 +872,14 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
   if (Instruction *NV = foldBinOpIntoSelectOrPhi(Add))
     return NV;
 
-  Value *X, *Y;
+  Value *X;
+  Constant *Op00C;
+
+  // add (sub C1, X), C2 --> sub (add C1, C2), X
+  if (match(Op0, m_Sub(m_Constant(Op00C), m_Value(X))))
+    return BinaryOperator::CreateSub(ConstantExpr::getAdd(Op00C, Op1C), X);
+
+  Value *Y;
 
   // add (sub X, Y), -1 --> add (not Y), X
   if (match(Op0, m_OneUse(m_Sub(m_Value(X), m_Value(Y)))) &&
@@ -852,6 +899,11 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
   if (!match(Op1, m_APInt(C)))
     return nullptr;
 
+  // (X | C2) + C --> (X | C2) ^ C2 iff (C2 == -C)
+  const APInt *C2;
+  if (match(Op0, m_Or(m_Value(), m_APInt(C2))) && *C2 == -*C)
+    return BinaryOperator::CreateXor(Op0, ConstantInt::get(Add.getType(), *C2));
+
   if (C->isSignMask()) {
     // If wrapping is not allowed, then the addition must set the sign bit:
     // X + (signmask) --> X | signmask
@@ -866,19 +918,10 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
   // Is this add the last step in a convoluted sext?
   // add(zext(xor i16 X, -32768), -32768) --> sext X
   Type *Ty = Add.getType();
-  const APInt *C2;
   if (match(Op0, m_ZExt(m_Xor(m_Value(X), m_APInt(C2)))) &&
       C2->isMinSignedValue() && C2->sext(Ty->getScalarSizeInBits()) == *C)
     return CastInst::Create(Instruction::SExt, X, Ty);
 
-  // (add (zext (add nuw X, C2)), C) --> (zext (add nuw X, C2 + C))
-  if (match(Op0, m_OneUse(m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C2))))) &&
-      C->isNegative() && C->sge(-C2->sext(C->getBitWidth()))) {
-    Constant *NewC =
-        ConstantInt::get(X->getType(), *C2 + C->trunc(C2->getBitWidth()));
-    return new ZExtInst(Builder.CreateNUWAdd(X, NewC), Ty);
-  }
-
   if (C->isOneValue() && Op0->hasOneUse()) {
     // add (sext i1 X), 1 --> zext (not X)
     // TODO: The smallest IR representation is (select X, 0, 1), and that would
@@ -1032,6 +1075,28 @@ static Instruction *canonicalizeLowbitMask(BinaryOperator &I,
   return BinaryOperator::CreateNot(NotMask, I.getName());
 }
 
+static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) {
+  assert(I.getOpcode() == Instruction::Add && "Expecting add instruction");
+  Type *Ty = I.getType();
+  auto getUAddSat = [&]() {
+    return Intrinsic::getDeclaration(I.getModule(), Intrinsic::uadd_sat, Ty);
+  };
+
+  // add (umin X, ~Y), Y --> uaddsat X, Y
+  Value *X, *Y;
+  if (match(&I, m_c_Add(m_c_UMin(m_Value(X), m_Not(m_Value(Y))),
+                        m_Deferred(Y))))
+    return CallInst::Create(getUAddSat(), { X, Y });
+
+  // add (umin X, ~C), C --> uaddsat X, C
+  const APInt *C, *NotC;
+  if (match(&I, m_Add(m_UMin(m_Value(X), m_APInt(NotC)), m_APInt(C))) &&
+      *C == ~*NotC)
+    return CallInst::Create(getUAddSat(), { X, ConstantInt::get(Ty, *C) });
+
+  return nullptr;
+}
+
 Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   if (Value *V = SimplifyAddInst(I.getOperand(0), I.getOperand(1),
                                  I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
@@ -1051,6 +1116,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   if (Instruction *X = foldAddWithConstant(I))
     return X;
 
+  if (Instruction *X = foldNoWrapAdd(I, Builder))
+    return X;
+
   // FIXME: This should be moved into the above helper function to allow these
   // transforms for general constant or constant splat vectors.
   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
@@ -1119,6 +1187,12 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     return BinaryOperator::CreateSub(RHS, A);
   }
 
+  // Canonicalize sext to zext for better value tracking potential.
+  // add A, sext(B) --> sub A, zext(B)
+  if (match(&I, m_c_Add(m_Value(A), m_OneUse(m_SExt(m_Value(B))))) &&
+      B->getType()->isIntOrIntVectorTy(1))
+    return BinaryOperator::CreateSub(A, Builder.CreateZExt(B, Ty));
+
   // A + -B  -->  A - B
   if (match(RHS, m_Neg(m_Value(B))))
     return BinaryOperator::CreateSub(LHS, B);
@@ -1128,7 +1202,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 
   // (A + 1) + ~B --> A - B
   // ~B + (A + 1) --> A - B
-  if (match(&I, m_c_BinOp(m_Add(m_Value(A), m_One()), m_Not(m_Value(B)))))
+  // (~B + A) + 1 --> A - B
+  // (A + ~B) + 1 --> A - B
+  if (match(&I, m_c_BinOp(m_Add(m_Value(A), m_One()), m_Not(m_Value(B)))) ||
+      match(&I, m_BinOp(m_c_Add(m_Not(m_Value(B)), m_Value(A)), m_One())))
     return BinaryOperator::CreateSub(A, B);
 
   // X % C0 + (( X / C0 ) % C1) * C0 => X % (C0 * C1)
@@ -1225,6 +1302,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   if (Instruction *V = canonicalizeLowbitMask(I, Builder))
     return V;
 
+  if (Instruction *SatAdd = foldToUnsignedSaturatedAdd(I))
+    return SatAdd;
+
   return Changed ? &I : nullptr;
 }
 
@@ -1500,6 +1580,12 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
   if (match(Op1, m_OneUse(m_Add(m_Value(X), m_One()))))
     return BinaryOperator::CreateAdd(Builder.CreateNot(X), Op0);
 
+  // Y - ~X --> (X + 1) + Y
+  if (match(Op1, m_OneUse(m_Not(m_Value(X))))) {
+    return BinaryOperator::CreateAdd(
+        Builder.CreateAdd(Op0, ConstantInt::get(I.getType(), 1)), X);
+  }
+
   if (Constant *C = dyn_cast<Constant>(Op0)) {
     bool IsNegate = match(C, m_ZeroInt());
     Value *X;
@@ -1532,8 +1618,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
       if (Instruction *R = foldOpIntoPhi(I, PN))
         return R;
 
-    // C-(X+C2) --> (C-C2)-X
     Constant *C2;
+
+    // C-(C2-X) --> X+(C-C2)
+    if (match(Op1, m_Sub(m_Constant(C2), m_Value(X))))
+      return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2));
+
+    // C-(X+C2) --> (C-C2)-X
     if (match(Op1, m_Add(m_Value(X), m_Constant(C2))))
       return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
   }
@@ -1626,9 +1717,15 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
                                   Builder.CreateNot(Y, Y->getName() + ".not"));
 
     // 0 - (X sdiv C)  -> (X sdiv -C)  provided the negation doesn't overflow.
-    if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && match(Op0, m_Zero()) &&
-        C->isNotMinSignedValue() && !C->isOneValue())
-      return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C));
+    // TODO: This could be extended to match arbitrary vector constants.
+    const APInt *DivC;
+    if (match(Op0, m_Zero()) && match(Op1, m_SDiv(m_Value(X), m_APInt(DivC))) &&
+        !DivC->isMinSignedValue() && *DivC != 1) {
+      Constant *NegDivC = ConstantInt::get(I.getType(), -(*DivC));
+      Instruction *BO = BinaryOperator::CreateSDiv(X, NegDivC);
+      BO->setIsExact(cast<BinaryOperator>(Op1)->isExact());
+      return BO;
+    }
 
     // 0 - (X << Y)  -> (-X << Y)   when X is freely negatable.
     if (match(Op1, m_Shl(m_Value(X), m_Value(Y))) && match(Op0, m_Zero()))
@@ -1745,6 +1842,49 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
   return Changed ? &I : nullptr;
 }
 
+/// This eliminates floating-point negation in either 'fneg(X)' or
+/// 'fsub(-0.0, X)' form by combining into a constant operand.
+static Instruction *foldFNegIntoConstant(Instruction &I) {
+  Value *X;
+  Constant *C;
+
+  // Fold negation into constant operand. This is limited with one-use because
+  // fneg is assumed better for analysis and cheaper in codegen than fmul/fdiv.
+  // -(X * C) --> X * (-C)
+  // FIXME: It's arguable whether these should be m_OneUse or not. The current
+  // belief is that the FNeg allows for better reassociation opportunities.
+  if (match(&I, m_FNeg(m_OneUse(m_FMul(m_Value(X), m_Constant(C))))))
+    return BinaryOperator::CreateFMulFMF(X, ConstantExpr::getFNeg(C), &I);
+  // -(X / C) --> X / (-C)
+  if (match(&I, m_FNeg(m_OneUse(m_FDiv(m_Value(X), m_Constant(C))))))
+    return BinaryOperator::CreateFDivFMF(X, ConstantExpr::getFNeg(C), &I);
+  // -(C / X) --> (-C) / X
+  if (match(&I, m_FNeg(m_OneUse(m_FDiv(m_Constant(C), m_Value(X))))))
+    return BinaryOperator::CreateFDivFMF(ConstantExpr::getFNeg(C), X, &I);
+
+  return nullptr;
+}
+
+Instruction *InstCombiner::visitFNeg(UnaryOperator &I) {
+  Value *Op = I.getOperand(0);
+
+  if (Value *V = SimplifyFNegInst(Op, I.getFastMathFlags(),
+                                  SQ.getWithInstruction(&I)))
+    return replaceInstUsesWith(I, V);
+
+  if (Instruction *X = foldFNegIntoConstant(I))
+    return X;
+
+  Value *X, *Y;
+
+  // If we can ignore the sign of zeros: -(X - Y) --> (Y - X)
+  if (I.hasNoSignedZeros() &&
+      match(Op, m_OneUse(m_FSub(m_Value(X), m_Value(Y)))))
+    return BinaryOperator::CreateFSubFMF(Y, X, &I);
+
+  return nullptr;
+}
+
 Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
   if (Value *V = SimplifyFSubInst(I.getOperand(0), I.getOperand(1),
                                   I.getFastMathFlags(),
@@ -1760,21 +1900,12 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
   if (I.hasNoSignedZeros() && match(Op0, m_PosZeroFP()))
     return BinaryOperator::CreateFNegFMF(Op1, &I);
 
+  if (Instruction *X = foldFNegIntoConstant(I))
+    return X;
+
   Value *X, *Y;
   Constant *C;
 
-  // Fold negation into constant operand. This is limited with one-use because
-  // fneg is assumed better for analysis and cheaper in codegen than fmul/fdiv.
-  // -(X * C) --> X * (-C)
-  if (match(&I, m_FNeg(m_OneUse(m_FMul(m_Value(X), m_Constant(C))))))
-    return BinaryOperator::CreateFMulFMF(X, ConstantExpr::getFNeg(C), &I);
-  // -(X / C) --> X / (-C)
-  if (match(&I, m_FNeg(m_OneUse(m_FDiv(m_Value(X), m_Constant(C))))))
-    return BinaryOperator::CreateFDivFMF(X, ConstantExpr::getFNeg(C), &I);
-  // -(C / X) --> (-C) / X
-  if (match(&I, m_FNeg(m_OneUse(m_FDiv(m_Constant(C), m_Value(X))))))
-    return BinaryOperator::CreateFDivFMF(ConstantExpr::getFNeg(C), X, &I);
-
   // If Op0 is not -0.0 or we can ignore -0.0: Z - (X - Y) --> Z + (Y - X)
   // Canonicalize to fadd to make analysis easier.
   // This can also help codegen because fadd is commutative.
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 404c2ad7e6e7..2b9859b602f4 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1,9 +1,8 @@
 //===- InstCombineAndOrXor.cpp --------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -968,7 +967,7 @@ static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1,
     // Can it be decomposed into  icmp eq (X & Mask), 0  ?
     if (llvm::decomposeBitTestICmp(ICmp->getOperand(0), ICmp->getOperand(1),
                                    Pred, X, UnsetBitsMask,
-                                   /*LookThruTrunc=*/false) &&
+                                   /*LookThroughTrunc=*/false) &&
         Pred == ICmpInst::ICMP_EQ)
       return true;
     // Is it  icmp eq (X & Mask), 0  already?
@@ -1022,6 +1021,36 @@ static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1,
                                CxtI.getName() + ".simplified");
 }
 
+/// Reduce a pair of compares that check if a value has exactly 1 bit set.
+static Value *foldIsPowerOf2(ICmpInst *Cmp0, ICmpInst *Cmp1, bool JoinedByAnd,
+                             InstCombiner::BuilderTy &Builder) {
+  // Handle 'and' / 'or' commutation: make the equality check the first operand.
+  if (JoinedByAnd && Cmp1->getPredicate() == ICmpInst::ICMP_NE)
+    std::swap(Cmp0, Cmp1);
+  else if (!JoinedByAnd && Cmp1->getPredicate() == ICmpInst::ICMP_EQ)
+    std::swap(Cmp0, Cmp1);
+
+  // (X != 0) && (ctpop(X) u< 2) --> ctpop(X) == 1
+  CmpInst::Predicate Pred0, Pred1;
+  Value *X;
+  if (JoinedByAnd && match(Cmp0, m_ICmp(Pred0, m_Value(X), m_ZeroInt())) &&
+      match(Cmp1, m_ICmp(Pred1, m_Intrinsic<Intrinsic::ctpop>(m_Specific(X)),
+                         m_SpecificInt(2))) &&
+      Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_ULT) {
+    Value *CtPop = Cmp1->getOperand(0);
+    return Builder.CreateICmpEQ(CtPop, ConstantInt::get(CtPop->getType(), 1));
+  }
+  // (X == 0) || (ctpop(X) u> 1) --> ctpop(X) != 1
+  if (!JoinedByAnd && match(Cmp0, m_ICmp(Pred0, m_Value(X), m_ZeroInt())) &&
+      match(Cmp1, m_ICmp(Pred1, m_Intrinsic<Intrinsic::ctpop>(m_Specific(X)),
+                         m_SpecificInt(1))) &&
+      Pred0 == ICmpInst::ICMP_EQ && Pred1 == ICmpInst::ICMP_UGT) {
+    Value *CtPop = Cmp1->getOperand(0);
+    return Builder.CreateICmpNE(CtPop, ConstantInt::get(CtPop->getType(), 1));
+  }
+  return nullptr;
+}
+
 /// Fold (icmp)&(icmp) if possible.
 Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
                                     Instruction &CxtI) {
@@ -1064,6 +1093,9 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
   if (Value *V = foldSignedTruncationCheck(LHS, RHS, CxtI, Builder))
     return V;
 
+  if (Value *V = foldIsPowerOf2(LHS, RHS, true /* JoinedByAnd */, Builder))
+    return V;
+
   // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
   Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
   ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1));
@@ -1259,6 +1291,52 @@ Value *InstCombiner::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd)
   return nullptr;
 }
 
+/// This a limited reassociation for a special case (see above) where we are
+/// checking if two values are either both NAN (unordered) or not-NAN (ordered).
+/// This could be handled more generally in '-reassociation', but it seems like
+/// an unlikely pattern for a large number of logic ops and fcmps.
+static Instruction *reassociateFCmps(BinaryOperator &BO,
+                                     InstCombiner::BuilderTy &Builder) {
+  Instruction::BinaryOps Opcode = BO.getOpcode();
+  assert((Opcode == Instruction::And || Opcode == Instruction::Or) &&
+         "Expecting and/or op for fcmp transform");
+
+  // There are 4 commuted variants of the pattern. Canonicalize operands of this
+  // logic op so an fcmp is operand 0 and a matching logic op is operand 1.
+  Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1), *X;
+  FCmpInst::Predicate Pred;
+  if (match(Op1, m_FCmp(Pred, m_Value(), m_AnyZeroFP())))
+    std::swap(Op0, Op1);
+
+  // Match inner binop and the predicate for combining 2 NAN checks into 1.
+  BinaryOperator *BO1;
+  FCmpInst::Predicate NanPred = Opcode == Instruction::And ? FCmpInst::FCMP_ORD
+                                                           : FCmpInst::FCMP_UNO;
+  if (!match(Op0, m_FCmp(Pred, m_Value(X), m_AnyZeroFP())) || Pred != NanPred ||
+      !match(Op1, m_BinOp(BO1)) || BO1->getOpcode() != Opcode)
+    return nullptr;
+
+  // The inner logic op must have a matching fcmp operand.
+  Value *BO10 = BO1->getOperand(0), *BO11 = BO1->getOperand(1), *Y;
+  if (!match(BO10, m_FCmp(Pred, m_Value(Y), m_AnyZeroFP())) ||
+      Pred != NanPred || X->getType() != Y->getType())
+    std::swap(BO10, BO11);
+
+  if (!match(BO10, m_FCmp(Pred, m_Value(Y), m_AnyZeroFP())) ||
+      Pred != NanPred || X->getType() != Y->getType())
+    return nullptr;
+
+  // and (fcmp ord X, 0), (and (fcmp ord Y, 0), Z) --> and (fcmp ord X, Y), Z
+  // or  (fcmp uno X, 0), (or  (fcmp uno Y, 0), Z) --> or  (fcmp uno X, Y), Z
+  Value *NewFCmp = Builder.CreateFCmp(Pred, X, Y);
+  if (auto *NewFCmpInst = dyn_cast<FCmpInst>(NewFCmp)) {
+    // Intersect FMF from the 2 source fcmps.
+    NewFCmpInst->copyIRFlags(Op0);
+    NewFCmpInst->andIRFlags(BO10);
+  }
+  return BinaryOperator::Create(Opcode, NewFCmp, BO11);
+}
+
 /// Match De Morgan's Laws:
 /// (~A & ~B) == (~(A | B))
 /// (~A | ~B) == (~(A & B))
@@ -1619,6 +1697,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
     if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
       // ((C1 OP zext(X)) & C2) -> zext((C1-X) & C2) if C2 fits in the bitwidth
       // of X and OP behaves well when given trunc(C1) and X.
+      // TODO: Do this for vectors by using m_APInt isntead of m_ConstantInt.
       switch (Op0I->getOpcode()) {
       default:
         break;
@@ -1629,7 +1708,10 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       case Instruction::Sub:
         Value *X;
         ConstantInt *C1;
-        if (match(Op0I, m_c_BinOp(m_ZExt(m_Value(X)), m_ConstantInt(C1)))) {
+        // TODO: The one use restrictions could be relaxed a little if the AND
+        // is going to be removed.
+        if (match(Op0I, m_OneUse(m_c_BinOp(m_OneUse(m_ZExt(m_Value(X))),
+                                           m_ConstantInt(C1))))) {
           if (AndRHSMask.isIntN(X->getType()->getScalarSizeInBits())) {
             auto *TruncC1 = ConstantExpr::getTrunc(C1, X->getType());
             Value *BinOp;
@@ -1747,6 +1829,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       if (Value *Res = foldLogicOfFCmps(LHS, RHS, true))
         return replaceInstUsesWith(I, Res);
 
+  if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder))
+    return FoldedFCmps;
+
   if (Instruction *CastedAnd = foldCastedBitwiseLogic(I))
     return CastedAnd;
 
@@ -1820,14 +1905,18 @@ static Instruction *matchRotate(Instruction &Or) {
 
   // First, find an or'd pair of opposite shifts with the same shifted operand:
   // or (lshr ShVal, ShAmt0), (shl ShVal, ShAmt1)
-  Value *Or0 = Or.getOperand(0), *Or1 = Or.getOperand(1);
+  BinaryOperator *Or0, *Or1;
+  if (!match(Or.getOperand(0), m_BinOp(Or0)) ||
+      !match(Or.getOperand(1), m_BinOp(Or1)))
+    return nullptr;
+
   Value *ShVal, *ShAmt0, *ShAmt1;
   if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal), m_Value(ShAmt0)))) ||
       !match(Or1, m_OneUse(m_LogicalShift(m_Specific(ShVal), m_Value(ShAmt1)))))
     return nullptr;
 
-  auto ShiftOpcode0 = cast<BinaryOperator>(Or0)->getOpcode();
-  auto ShiftOpcode1 = cast<BinaryOperator>(Or1)->getOpcode();
+  BinaryOperator::BinaryOps ShiftOpcode0 = Or0->getOpcode();
+  BinaryOperator::BinaryOps ShiftOpcode1 = Or1->getOpcode();
   if (ShiftOpcode0 == ShiftOpcode1)
     return nullptr;
 
@@ -1842,6 +1931,13 @@ static Instruction *matchRotate(Instruction &Or) {
         match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
       return X;
 
+    // Similar to above, but the shift amount may be extended after masking,
+    // so return the extended value as the parameter for the intrinsic.
+    if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
+        match(R, m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))),
+                       m_SpecificInt(Mask))))
+      return L;
+
     return nullptr;
   };
 
@@ -2083,6 +2179,9 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
   if (Value *V = foldAndOrOfEqualityCmpsWithConstants(LHS, RHS, false, Builder))
     return V;
 
+  if (Value *V = foldIsPowerOf2(LHS, RHS, false /* JoinedByAnd */, Builder))
+    return V;
+
   // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
   if (!LHSC || !RHSC)
     return nullptr;
@@ -2412,6 +2511,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
       if (Value *Res = foldLogicOfFCmps(LHS, RHS, false))
         return replaceInstUsesWith(I, Res);
 
+  if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder))
+    return FoldedFCmps;
+
   if (Instruction *CastedOr = foldCastedBitwiseLogic(I))
     return CastedOr;
 
diff --git a/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
new file mode 100644
index 000000000000..5f37a00f56cf
--- /dev/null
+++ b/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
@@ -0,0 +1,159 @@
+//===- InstCombineAtomicRMW.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for atomic rmw instructions.
+//
+//===----------------------------------------------------------------------===//
+#include "InstCombineInternal.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+
+namespace {
+/// Return true if and only if the given instruction does not modify the memory
+/// location referenced.  Note that an idemptent atomicrmw may still have
+/// ordering effects on nearby instructions, or be volatile.
+/// TODO: Common w/ the version in AtomicExpandPass, and change the term used.
+/// Idemptotent is confusing in this context.
+bool isIdempotentRMW(AtomicRMWInst& RMWI) {
+  if (auto CF = dyn_cast<ConstantFP>(RMWI.getValOperand()))
+    switch(RMWI.getOperation()) {
+    case AtomicRMWInst::FAdd: // -0.0
+      return CF->isZero() && CF->isNegative();
+    case AtomicRMWInst::FSub: // +0.0
+      return CF->isZero() && !CF->isNegative();
+    default:
+      return false;
+    };
+  
+  auto C = dyn_cast<ConstantInt>(RMWI.getValOperand());
+  if(!C)
+    return false;
+
+  switch(RMWI.getOperation()) {
+    case AtomicRMWInst::Add:
+    case AtomicRMWInst::Sub:
+    case AtomicRMWInst::Or:
+    case AtomicRMWInst::Xor:
+      return C->isZero();
+    case AtomicRMWInst::And:
+      return C->isMinusOne();
+    case AtomicRMWInst::Min:
+      return C->isMaxValue(true);
+    case AtomicRMWInst::Max:
+      return C->isMinValue(true);
+    case AtomicRMWInst::UMin:
+      return C->isMaxValue(false);
+    case AtomicRMWInst::UMax:
+      return C->isMinValue(false);
+    default:
+      return false;
+  }
+}
+
+/// Return true if the given instruction always produces a value in memory
+/// equivalent to its value operand.
+bool isSaturating(AtomicRMWInst& RMWI) {
+  if (auto CF = dyn_cast<ConstantFP>(RMWI.getValOperand()))
+    switch(RMWI.getOperation()) {
+    case AtomicRMWInst::FAdd:
+    case AtomicRMWInst::FSub:
+      return CF->isNaN();
+    default:
+      return false;
+    };
+
+  auto C = dyn_cast<ConstantInt>(RMWI.getValOperand());
+  if(!C)
+    return false;
+
+  switch(RMWI.getOperation()) {
+  default:
+    return false;
+  case AtomicRMWInst::Xchg:
+    return true;
+  case AtomicRMWInst::Or:
+    return C->isAllOnesValue();
+  case AtomicRMWInst::And:
+    return C->isZero();
+  case AtomicRMWInst::Min:
+    return C->isMinValue(true);
+  case AtomicRMWInst::Max:
+    return C->isMaxValue(true);
+  case AtomicRMWInst::UMin:
+    return C->isMinValue(false);
+  case AtomicRMWInst::UMax:
+    return C->isMaxValue(false);
+  };
+}
+}
+
+Instruction *InstCombiner::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
+
+  // Volatile RMWs perform a load and a store, we cannot replace this by just a
+  // load or just a store. We chose not to canonicalize out of general paranoia
+  // about user expectations around volatile. 
+  if (RMWI.isVolatile())
+    return nullptr;
+
+  // Any atomicrmw op which produces a known result in memory can be
+  // replaced w/an atomicrmw xchg.
+  if (isSaturating(RMWI) &&
+      RMWI.getOperation() != AtomicRMWInst::Xchg) {
+    RMWI.setOperation(AtomicRMWInst::Xchg);
+    return &RMWI;
+  }
+
+  AtomicOrdering Ordering = RMWI.getOrdering();
+  assert(Ordering != AtomicOrdering::NotAtomic &&
+         Ordering != AtomicOrdering::Unordered &&
+         "AtomicRMWs don't make sense with Unordered or NotAtomic");
+
+  // Any atomicrmw xchg with no uses can be converted to a atomic store if the
+  // ordering is compatible. 
+  if (RMWI.getOperation() == AtomicRMWInst::Xchg &&
+      RMWI.use_empty()) {
+    if (Ordering != AtomicOrdering::Release &&
+        Ordering != AtomicOrdering::Monotonic)
+      return nullptr;
+    auto *SI = new StoreInst(RMWI.getValOperand(),
+                             RMWI.getPointerOperand(), &RMWI);
+    SI->setAtomic(Ordering, RMWI.getSyncScopeID());
+    SI->setAlignment(DL.getABITypeAlignment(RMWI.getType()));
+    return eraseInstFromFunction(RMWI);
+  }
+  
+  if (!isIdempotentRMW(RMWI))
+    return nullptr;
+
+  // We chose to canonicalize all idempotent operations to an single
+  // operation code and constant.  This makes it easier for the rest of the
+  // optimizer to match easily.  The choices of or w/0 and fadd w/-0.0 are
+  // arbitrary. 
+  if (RMWI.getType()->isIntegerTy() &&
+      RMWI.getOperation() != AtomicRMWInst::Or) {
+    RMWI.setOperation(AtomicRMWInst::Or);
+    RMWI.setOperand(1, ConstantInt::get(RMWI.getType(), 0));
+    return &RMWI;
+  } else if (RMWI.getType()->isFloatingPointTy() &&
+             RMWI.getOperation() != AtomicRMWInst::FAdd) {
+    RMWI.setOperation(AtomicRMWInst::FAdd);
+    RMWI.setOperand(1, ConstantFP::getNegativeZero(RMWI.getType()));
+    return &RMWI;
+  }
+
+  // Check if the required ordering is compatible with an atomic load.
+  if (Ordering != AtomicOrdering::Acquire &&
+      Ordering != AtomicOrdering::Monotonic)
+    return nullptr;
+  
+  LoadInst *Load = new LoadInst(RMWI.getType(), RMWI.getPointerOperand());
+  Load->setAtomic(Ordering, RMWI.getSyncScopeID());
+  Load->setAlignment(DL.getABITypeAlignment(RMWI.getType()));
+  return Load;
+}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index aeb25d530d71..4b3333affa72 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1,19 +1,19 @@
 //===- InstCombineCalls.cpp -----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the visitCall and visitInvoke functions.
+// This file implements the visitCall, visitInvoke, and visitCallBr functions.
 //
 //===----------------------------------------------------------------------===//
 
 #include "InstCombineInternal.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
@@ -23,12 +23,12 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -58,6 +58,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
 #include <algorithm>
 #include <cassert>
@@ -121,6 +122,15 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
     return MI;
   }
 
+  // If we have a store to a location which is known constant, we can conclude
+  // that the store must be storing the constant value (else the memory
+  // wouldn't be constant), and this must be a noop.
+  if (AA->pointsToConstantMemory(MI->getDest())) {
+    // Set the size of the copy to 0, it will be deleted on the next iteration.
+    MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
+    return MI;
+  }
+
   // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
   // load/store.
   ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
@@ -173,7 +183,7 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
 
   Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
   Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
-  LoadInst *L = Builder.CreateLoad(Src);
+  LoadInst *L = Builder.CreateLoad(IntType, Src);
   // Alignment from the mem intrinsic will be better, so use it.
   L->setAlignment(CopySrcAlign);
   if (CopyMD)
@@ -219,6 +229,15 @@ Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) {
     return MI;
   }
 
+  // If we have a store to a location which is known constant, we can conclude
+  // that the store must be storing the constant value (else the memory
+  // wouldn't be constant), and this must be a noop.
+  if (AA->pointsToConstantMemory(MI->getDest())) {
+    // Set the size of the copy to 0, it will be deleted on the next iteration.
+    MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
+    return MI;
+  }
+
   // Extract the length and alignment and fill if they are constant.
   ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
   ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
@@ -523,7 +542,8 @@ static Value *simplifyX86varShift(const IntrinsicInst &II,
   return Builder.CreateAShr(Vec, ShiftVec);
 }
 
-static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) {
+static Value *simplifyX86pack(IntrinsicInst &II,
+                              InstCombiner::BuilderTy &Builder, bool IsSigned) {
   Value *Arg0 = II.getArgOperand(0);
   Value *Arg1 = II.getArgOperand(1);
   Type *ResTy = II.getType();
@@ -534,167 +554,58 @@ static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) {
 
   Type *ArgTy = Arg0->getType();
   unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128;
-  unsigned NumDstElts = ResTy->getVectorNumElements();
   unsigned NumSrcElts = ArgTy->getVectorNumElements();
-  assert(NumDstElts == (2 * NumSrcElts) && "Unexpected packing types");
+  assert(ResTy->getVectorNumElements() == (2 * NumSrcElts) &&
+         "Unexpected packing types");
 
-  unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
   unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
   unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits();
-  assert(ArgTy->getScalarSizeInBits() == (2 * DstScalarSizeInBits) &&
+  unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits();
+  assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) &&
          "Unexpected packing types");
 
   // Constant folding.
-  auto *Cst0 = dyn_cast<Constant>(Arg0);
-  auto *Cst1 = dyn_cast<Constant>(Arg1);
-  if (!Cst0 || !Cst1)
+  if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
     return nullptr;
 
-  SmallVector<Constant *, 32> Vals;
-  for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
-    for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) {
-      unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane;
-      auto *Cst = (Elt >= NumSrcEltsPerLane) ? Cst1 : Cst0;
-      auto *COp = Cst->getAggregateElement(SrcIdx);
-      if (COp && isa<UndefValue>(COp)) {
-        Vals.push_back(UndefValue::get(ResTy->getScalarType()));
-        continue;
-      }
-
-      auto *CInt = dyn_cast_or_null<ConstantInt>(COp);
-      if (!CInt)
-        return nullptr;
-
-      APInt Val = CInt->getValue();
-      assert(Val.getBitWidth() == ArgTy->getScalarSizeInBits() &&
-             "Unexpected constant bitwidth");
-
-      if (IsSigned) {
-        // PACKSS: Truncate signed value with signed saturation.
-        // Source values less than dst minint are saturated to minint.
-        // Source values greater than dst maxint are saturated to maxint.
-        if (Val.isSignedIntN(DstScalarSizeInBits))
-          Val = Val.trunc(DstScalarSizeInBits);
-        else if (Val.isNegative())
-          Val = APInt::getSignedMinValue(DstScalarSizeInBits);
-        else
-          Val = APInt::getSignedMaxValue(DstScalarSizeInBits);
-      } else {
-        // PACKUS: Truncate signed value with unsigned saturation.
-        // Source values less than zero are saturated to zero.
-        // Source values greater than dst maxuint are saturated to maxuint.
-        if (Val.isIntN(DstScalarSizeInBits))
-          Val = Val.trunc(DstScalarSizeInBits);
-        else if (Val.isNegative())
-          Val = APInt::getNullValue(DstScalarSizeInBits);
-        else
-          Val = APInt::getAllOnesValue(DstScalarSizeInBits);
-      }
-
-      Vals.push_back(ConstantInt::get(ResTy->getScalarType(), Val));
-    }
-  }
-
-  return ConstantVector::get(Vals);
-}
-
-// Replace X86-specific intrinsics with generic floor-ceil where applicable.
-static Value *simplifyX86round(IntrinsicInst &II,
-                               InstCombiner::BuilderTy &Builder) {
-  ConstantInt *Arg = nullptr;
-  Intrinsic::ID IntrinsicID = II.getIntrinsicID();
-
-  if (IntrinsicID == Intrinsic::x86_sse41_round_ss ||
-      IntrinsicID == Intrinsic::x86_sse41_round_sd)
-    Arg = dyn_cast<ConstantInt>(II.getArgOperand(2));
-  else if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
-           IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd)
-    Arg = dyn_cast<ConstantInt>(II.getArgOperand(4));
-  else
-    Arg = dyn_cast<ConstantInt>(II.getArgOperand(1));
-  if (!Arg)
-    return nullptr;
-  unsigned RoundControl = Arg->getZExtValue();
-
-  Arg = nullptr;
-  unsigned SAE = 0;
-  if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_512 ||
-      IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_512)
-    Arg = dyn_cast<ConstantInt>(II.getArgOperand(4));
-  else if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
-           IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd)
-    Arg = dyn_cast<ConstantInt>(II.getArgOperand(5));
-  else
-    SAE = 4;
-  if (!SAE) {
-    if (!Arg)
-      return nullptr;
-    SAE = Arg->getZExtValue();
+  // Clamp Values - signed/unsigned both use signed clamp values, but they
+  // differ on the min/max values.
+  APInt MinValue, MaxValue;
+  if (IsSigned) {
+    // PACKSS: Truncate signed value with signed saturation.
+    // Source values less than dst minint are saturated to minint.
+    // Source values greater than dst maxint are saturated to maxint.
+    MinValue =
+        APInt::getSignedMinValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits);
+    MaxValue =
+        APInt::getSignedMaxValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits);
+  } else {
+    // PACKUS: Truncate signed value with unsigned saturation.
+    // Source values less than zero are saturated to zero.
+    // Source values greater than dst maxuint are saturated to maxuint.
+    MinValue = APInt::getNullValue(SrcScalarSizeInBits);
+    MaxValue = APInt::getLowBitsSet(SrcScalarSizeInBits, DstScalarSizeInBits);
   }
 
-  if (SAE != 4 || (RoundControl != 2 /*ceil*/ && RoundControl != 1 /*floor*/))
-    return nullptr;
+  auto *MinC = Constant::getIntegerValue(ArgTy, MinValue);
+  auto *MaxC = Constant::getIntegerValue(ArgTy, MaxValue);
+  Arg0 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg0, MinC), MinC, Arg0);
+  Arg1 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg1, MinC), MinC, Arg1);
+  Arg0 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg0, MaxC), MaxC, Arg0);
+  Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1);
 
-  Value *Src, *Dst, *Mask;
-  bool IsScalar = false;
-  if (IntrinsicID == Intrinsic::x86_sse41_round_ss ||
-      IntrinsicID == Intrinsic::x86_sse41_round_sd ||
-      IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
-      IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
-    IsScalar = true;
-    if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
-        IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
-      Mask = II.getArgOperand(3);
-      Value *Zero = Constant::getNullValue(Mask->getType());
-      Mask = Builder.CreateAnd(Mask, 1);
-      Mask = Builder.CreateICmp(ICmpInst::ICMP_NE, Mask, Zero);
-      Dst = II.getArgOperand(2);
-    } else
-      Dst = II.getArgOperand(0);
-    Src = Builder.CreateExtractElement(II.getArgOperand(1), (uint64_t)0);
-  } else {
-    Src = II.getArgOperand(0);
-    if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_128 ||
-        IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_256 ||
-        IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_512 ||
-        IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_128 ||
-        IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_256 ||
-        IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_512) {
-      Dst = II.getArgOperand(2);
-      Mask = II.getArgOperand(3);
-    } else {
-      Dst = Src;
-      Mask = ConstantInt::getAllOnesValue(
-          Builder.getIntNTy(Src->getType()->getVectorNumElements()));
-    }
+  // Shuffle clamped args together at the lane level.
+  SmallVector<unsigned, 32> PackMask;
+  for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+    for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
+      PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane));
+    for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
+      PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
   }
+  auto *Shuffle = Builder.CreateShuffleVector(Arg0, Arg1, PackMask);
 
-  Intrinsic::ID ID = (RoundControl == 2) ? Intrinsic::ceil : Intrinsic::floor;
-  Value *Res = Builder.CreateUnaryIntrinsic(ID, Src, &II);
-  if (!IsScalar) {
-    if (auto *C = dyn_cast<Constant>(Mask))
-      if (C->isAllOnesValue())
-        return Res;
-    auto *MaskTy = VectorType::get(
-        Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
-    Mask = Builder.CreateBitCast(Mask, MaskTy);
-    unsigned Width = Src->getType()->getVectorNumElements();
-    if (MaskTy->getVectorNumElements() > Width) {
-      uint32_t Indices[4];
-      for (unsigned i = 0; i != Width; ++i)
-        Indices[i] = i;
-      Mask = Builder.CreateShuffleVector(Mask, Mask,
-                                         makeArrayRef(Indices, Width));
-    }
-    return Builder.CreateSelect(Mask, Res, Dst);
-  }
-  if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
-      IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
-    Dst = Builder.CreateExtractElement(Dst, (uint64_t)0);
-    Res = Builder.CreateSelect(Mask, Res, Dst);
-    Dst = II.getArgOperand(0);
-  }
-  return Builder.CreateInsertElement(Dst, Res, (uint64_t)0);
+  // Truncate to dst size.
+  return Builder.CreateTrunc(Shuffle, ResTy);
 }
 
 static Value *simplifyX86movmsk(const IntrinsicInst &II,
@@ -711,43 +622,44 @@ static Value *simplifyX86movmsk(const IntrinsicInst &II,
   if (!ArgTy->isVectorTy())
     return nullptr;
 
-  if (auto *C = dyn_cast<Constant>(Arg)) {
-    // Extract signbits of the vector input and pack into integer result.
-    APInt Result(ResTy->getPrimitiveSizeInBits(), 0);
-    for (unsigned I = 0, E = ArgTy->getVectorNumElements(); I != E; ++I) {
-      auto *COp = C->getAggregateElement(I);
-      if (!COp)
-        return nullptr;
-      if (isa<UndefValue>(COp))
-        continue;
-
-      auto *CInt = dyn_cast<ConstantInt>(COp);
-      auto *CFp = dyn_cast<ConstantFP>(COp);
-      if (!CInt && !CFp)
-        return nullptr;
-
-      if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative()))
-        Result.setBit(I);
-    }
-    return Constant::getIntegerValue(ResTy, Result);
-  }
+  // Expand MOVMSK to compare/bitcast/zext:
+  // e.g. PMOVMSKB(v16i8 x):
+  // %cmp = icmp slt <16 x i8> %x, zeroinitializer
+  // %int = bitcast <16 x i1> %cmp to i16
+  // %res = zext i16 %int to i32
+  unsigned NumElts = ArgTy->getVectorNumElements();
+  Type *IntegerVecTy = VectorType::getInteger(cast<VectorType>(ArgTy));
+  Type *IntegerTy = Builder.getIntNTy(NumElts);
+
+  Value *Res = Builder.CreateBitCast(Arg, IntegerVecTy);
+  Res = Builder.CreateICmpSLT(Res, Constant::getNullValue(IntegerVecTy));
+  Res = Builder.CreateBitCast(Res, IntegerTy);
+  Res = Builder.CreateZExtOrTrunc(Res, ResTy);
+  return Res;
+}
 
-  // Look for a sign-extended boolean source vector as the argument to this
-  // movmsk. If the argument is bitcast, look through that, but make sure the
-  // source of that bitcast is still a vector with the same number of elements.
-  // TODO: We can also convert a bitcast with wider elements, but that requires
-  // duplicating the bool source sign bits to match the number of elements
-  // expected by the movmsk call.
-  Arg = peekThroughBitcast(Arg);
-  Value *X;
-  if (Arg->getType()->isVectorTy() &&
-      Arg->getType()->getVectorNumElements() == ArgTy->getVectorNumElements() &&
-      match(Arg, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
-    // call iM movmsk(sext <N x i1> X) --> zext (bitcast <N x i1> X to iN) to iM
-    unsigned NumElts = X->getType()->getVectorNumElements();
-    Type *ScalarTy = Type::getIntNTy(Arg->getContext(), NumElts);
-    Value *BC = Builder.CreateBitCast(X, ScalarTy);
-    return Builder.CreateZExtOrTrunc(BC, ResTy);
+static Value *simplifyX86addcarry(const IntrinsicInst &II,
+                                  InstCombiner::BuilderTy &Builder) {
+  Value *CarryIn = II.getArgOperand(0);
+  Value *Op1 = II.getArgOperand(1);
+  Value *Op2 = II.getArgOperand(2);
+  Type *RetTy = II.getType();
+  Type *OpTy = Op1->getType();
+  assert(RetTy->getStructElementType(0)->isIntegerTy(8) &&
+         RetTy->getStructElementType(1) == OpTy && OpTy == Op2->getType() &&
+         "Unexpected types for x86 addcarry");
+
+  // If carry-in is zero, this is just an unsigned add with overflow.
+  if (match(CarryIn, m_ZeroInt())) {
+    Value *UAdd = Builder.CreateIntrinsic(Intrinsic::uadd_with_overflow, OpTy,
+                                          { Op1, Op2 });
+    // The types have to be adjusted to match the x86 call types.
+    Value *UAddResult = Builder.CreateExtractValue(UAdd, 0);
+    Value *UAddOV = Builder.CreateZExt(Builder.CreateExtractValue(UAdd, 1),
+                                       Builder.getInt8Ty());
+    Value *Res = UndefValue::get(RetTy);
+    Res = Builder.CreateInsertValue(Res, UAddOV, 0);
+    return Builder.CreateInsertValue(Res, UAddResult, 1);
   }
 
   return nullptr;
@@ -892,7 +804,7 @@ static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0,
     if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
       Value *Args[] = {Op0, CILength, CIIndex};
       Module *M = II.getModule();
-      Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
+      Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
       return Builder.CreateCall(F, Args);
     }
   }
@@ -993,7 +905,7 @@ static Value *simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1,
 
     Value *Args[] = {Op0, Op1, CILength, CIIndex};
     Module *M = II.getModule();
-    Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
+    Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
     return Builder.CreateCall(F, Args);
   }
 
@@ -1134,82 +1046,42 @@ static Value *simplifyX86vpermv(const IntrinsicInst &II,
   return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
 }
 
-/// Decode XOP integer vector comparison intrinsics.
-static Value *simplifyX86vpcom(const IntrinsicInst &II,
-                               InstCombiner::BuilderTy &Builder,
-                               bool IsSigned) {
-  if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
-    uint64_t Imm = CInt->getZExtValue() & 0x7;
-    VectorType *VecTy = cast<VectorType>(II.getType());
-    CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
-
-    switch (Imm) {
-    case 0x0:
-      Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
-      break;
-    case 0x1:
-      Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
-      break;
-    case 0x2:
-      Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
-      break;
-    case 0x3:
-      Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
-      break;
-    case 0x4:
-      Pred = ICmpInst::ICMP_EQ; break;
-    case 0x5:
-      Pred = ICmpInst::ICMP_NE; break;
-    case 0x6:
-      return ConstantInt::getSigned(VecTy, 0); // FALSE
-    case 0x7:
-      return ConstantInt::getSigned(VecTy, -1); // TRUE
-    }
-
-    if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0),
-                                        II.getArgOperand(1)))
-      return Builder.CreateSExtOrTrunc(Cmp, VecTy);
-  }
-  return nullptr;
-}
+// TODO, Obvious Missing Transforms:
+// * Narrow width by halfs excluding zero/undef lanes
+Value *InstCombiner::simplifyMaskedLoad(IntrinsicInst &II) {
+  Value *LoadPtr = II.getArgOperand(0);
+  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
 
-static bool maskIsAllOneOrUndef(Value *Mask) {
-  auto *ConstMask = dyn_cast<Constant>(Mask);
-  if (!ConstMask)
-    return false;
-  if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
-    return true;
-  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
-       ++I) {
-    if (auto *MaskElt = ConstMask->getAggregateElement(I))
-      if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
-        continue;
-    return false;
-  }
-  return true;
-}
-
-static Value *simplifyMaskedLoad(const IntrinsicInst &II,
-                                 InstCombiner::BuilderTy &Builder) {
   // If the mask is all ones or undefs, this is a plain vector load of the 1st
   // argument.
-  if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
-    Value *LoadPtr = II.getArgOperand(0);
-    unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
-    return Builder.CreateAlignedLoad(LoadPtr, Alignment, "unmaskedload");
+  if (maskIsAllOneOrUndef(II.getArgOperand(2)))
+    return Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
+                                     "unmaskedload");
+
+  // If we can unconditionally load from this address, replace with a
+  // load/select idiom. TODO: use DT for context sensitive query
+  if (isDereferenceableAndAlignedPointer(LoadPtr, II.getType(), Alignment,
+                                         II.getModule()->getDataLayout(),
+                                         &II, nullptr)) {
+    Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
+                                         "unmaskedload");
+    return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3));
   }
 
   return nullptr;
 }
 
-static Instruction *simplifyMaskedStore(IntrinsicInst &II, InstCombiner &IC) {
+// TODO, Obvious Missing Transforms:
+// * Single constant active lane -> store
+// * Narrow width by halfs excluding zero/undef lanes
+Instruction *InstCombiner::simplifyMaskedStore(IntrinsicInst &II) {
   auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
   if (!ConstMask)
     return nullptr;
 
   // If the mask is all zeros, this instruction does nothing.
   if (ConstMask->isNullValue())
-    return IC.eraseInstFromFunction(II);
+    return eraseInstFromFunction(II);
 
   // If the mask is all ones, this is a plain vector store of the 1st argument.
   if (ConstMask->isAllOnesValue()) {
@@ -1218,14 +1090,57 @@ static Instruction *simplifyMaskedStore(IntrinsicInst &II, InstCombiner &IC) {
     return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
   }
 
+  // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
+  APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
+  APInt UndefElts(DemandedElts.getBitWidth(), 0);
+  if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0),
+                                            DemandedElts, UndefElts)) {
+    II.setOperand(0, V);
+    return &II;
+  }
+
   return nullptr;
 }
 
-static Instruction *simplifyMaskedGather(IntrinsicInst &II, InstCombiner &IC) {
-  // If the mask is all zeros, return the "passthru" argument of the gather.
-  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
-  if (ConstMask && ConstMask->isNullValue())
-    return IC.replaceInstUsesWith(II, II.getArgOperand(3));
+// TODO, Obvious Missing Transforms:
+// * Single constant active lane load -> load
+// * Dereferenceable address & few lanes -> scalarize speculative load/selects
+// * Adjacent vector addresses -> masked.load
+// * Narrow width by halfs excluding zero/undef lanes
+// * Vector splat address w/known mask -> scalar load
+// * Vector incrementing address -> vector masked load
+Instruction *InstCombiner::simplifyMaskedGather(IntrinsicInst &II) {
+  return nullptr;
+}
+
+// TODO, Obvious Missing Transforms:
+// * Single constant active lane -> store
+// * Adjacent vector addresses -> masked.store
+// * Narrow store width by halfs excluding zero/undef lanes
+// * Vector splat address w/known mask -> scalar store
+// * Vector incrementing address -> vector masked store
+Instruction *InstCombiner::simplifyMaskedScatter(IntrinsicInst &II) {
+  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
+  if (!ConstMask)
+    return nullptr;
+
+  // If the mask is all zeros, a scatter does nothing.
+  if (ConstMask->isNullValue())
+    return eraseInstFromFunction(II);
+
+  // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
+  APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
+  APInt UndefElts(DemandedElts.getBitWidth(), 0);
+  if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0),
+                                            DemandedElts, UndefElts)) {
+    II.setOperand(0, V);
+    return &II;
+  }
+  if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1),
+                                            DemandedElts, UndefElts)) {
+    II.setOperand(1, V);
+    return &II;
+  }
 
   return nullptr;
 }
@@ -1264,25 +1179,41 @@ static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II,
   return cast<Instruction>(Result);
 }
 
-static Instruction *simplifyMaskedScatter(IntrinsicInst &II, InstCombiner &IC) {
-  // If the mask is all zeros, a scatter does nothing.
-  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
-  if (ConstMask && ConstMask->isNullValue())
-    return IC.eraseInstFromFunction(II);
-
-  return nullptr;
-}
-
 static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
   assert((II.getIntrinsicID() == Intrinsic::cttz ||
           II.getIntrinsicID() == Intrinsic::ctlz) &&
          "Expected cttz or ctlz intrinsic");
+  bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
   Value *Op0 = II.getArgOperand(0);
+  Value *X;
+  // ctlz(bitreverse(x)) -> cttz(x)
+  // cttz(bitreverse(x)) -> ctlz(x)
+  if (match(Op0, m_BitReverse(m_Value(X)))) {
+    Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
+    Function *F = Intrinsic::getDeclaration(II.getModule(), ID, II.getType());
+    return CallInst::Create(F, {X, II.getArgOperand(1)});
+  }
+
+  if (IsTZ) {
+    // cttz(-x) -> cttz(x)
+    if (match(Op0, m_Neg(m_Value(X)))) {
+      II.setOperand(0, X);
+      return &II;
+    }
+
+    // cttz(abs(x)) -> cttz(x)
+    // cttz(nabs(x)) -> cttz(x)
+    Value *Y;
+    SelectPatternFlavor SPF = matchSelectPattern(Op0, X, Y).Flavor;
+    if (SPF == SPF_ABS || SPF == SPF_NABS) {
+      II.setOperand(0, X);
+      return &II;
+    }
+  }
 
   KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
 
   // Create a mask for bits above (ctlz) or below (cttz) the first known one.
-  bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
   unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
                                 : Known.countMaxLeadingZeros();
   unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
@@ -1328,6 +1259,14 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombiner &IC) {
   assert(II.getIntrinsicID() == Intrinsic::ctpop &&
          "Expected ctpop intrinsic");
   Value *Op0 = II.getArgOperand(0);
+  Value *X;
+  // ctpop(bitreverse(x)) -> ctpop(x)
+  // ctpop(bswap(x)) -> ctpop(x)
+  if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X)))) {
+    II.setOperand(0, X);
+    return &II;
+  }
+
   // FIXME: Try to simplify vectors of integers.
   auto *IT = dyn_cast<IntegerType>(Op0->getType());
   if (!IT)
@@ -1513,7 +1452,7 @@ static Value *simplifyNeonVld1(const IntrinsicInst &II,
 
   auto *BCastInst = Builder.CreateBitCast(II.getArgOperand(0),
                                           PointerType::get(II.getType(), 0));
-  return Builder.CreateAlignedLoad(BCastInst, Alignment);
+  return Builder.CreateAlignedLoad(II.getType(), BCastInst, Alignment);
 }
 
 // Returns true iff the 2 intrinsics have the same operands, limiting the
@@ -1827,8 +1766,18 @@ static Instruction *canonicalizeConstantArg0ToArg1(CallInst &Call) {
   return nullptr;
 }
 
+Instruction *InstCombiner::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
+  WithOverflowInst *WO = cast<WithOverflowInst>(II);
+  Value *OperationResult = nullptr;
+  Constant *OverflowResult = nullptr;
+  if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
+                            WO->getRHS(), *WO, OperationResult, OverflowResult))
+    return CreateOverflowTuple(WO, OperationResult, OverflowResult);
+  return nullptr;
+}
+
 /// CallInst simplification. This mostly only handles folding of intrinsic
-/// instructions. For normal calls, it allows visitCallSite to do the heavy
+/// instructions. For normal calls, it allows visitCallBase to do the heavy
 /// lifting.
 Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI)))
@@ -1845,10 +1794,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   }
 
   IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
-  if (!II) return visitCallSite(&CI);
+  if (!II) return visitCallBase(CI);
 
-  // Intrinsics cannot occur in an invoke, so handle them here instead of in
-  // visitCallSite.
+  // Intrinsics cannot occur in an invoke or a callbr, so handle them here
+  // instead of in visitCallBase.
   if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
     bool Changed = false;
 
@@ -1908,6 +1857,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     if (Changed) return II;
   }
 
+  // For vector result intrinsics, use the generic demanded vector support.
+  if (II->getType()->isVectorTy()) {
+    auto VWidth = II->getType()->getVectorNumElements();
+    APInt UndefElts(VWidth, 0);
+    APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+    if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
+      if (V != II)
+        return replaceInstUsesWith(*II, V);
+      return II;
+    }
+  }
+
   if (Instruction *I = SimplifyNVVMIntrinsic(II, *this))
     return I;
 
@@ -1918,12 +1879,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
   };
 
-  switch (II->getIntrinsicID()) {
+  Intrinsic::ID IID = II->getIntrinsicID();
+  switch (IID) {
   default: break;
   case Intrinsic::objectsize:
-    if (ConstantInt *N =
-            lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
-      return replaceInstUsesWith(CI, N);
+    if (Value *V = lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
+      return replaceInstUsesWith(CI, V);
     return nullptr;
   case Intrinsic::bswap: {
     Value *IIOperand = II->getArgOperand(0);
@@ -1940,15 +1901,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     break;
   }
   case Intrinsic::masked_load:
-    if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder))
+    if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II))
       return replaceInstUsesWith(CI, SimplifiedMaskedOp);
     break;
   case Intrinsic::masked_store:
-    return simplifyMaskedStore(*II, *this);
+    return simplifyMaskedStore(*II);
   case Intrinsic::masked_gather:
-    return simplifyMaskedGather(*II, *this);
+    return simplifyMaskedGather(*II);
   case Intrinsic::masked_scatter:
-    return simplifyMaskedScatter(*II, *this);
+    return simplifyMaskedScatter(*II);
   case Intrinsic::launder_invariant_group:
   case Intrinsic::strip_invariant_group:
     if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
@@ -1982,33 +1943,62 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
 
   case Intrinsic::fshl:
   case Intrinsic::fshr: {
-    const APInt *SA;
-    if (match(II->getArgOperand(2), m_APInt(SA))) {
-      Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
-      unsigned BitWidth = SA->getBitWidth();
-      uint64_t ShiftAmt = SA->urem(BitWidth);
-      assert(ShiftAmt != 0 && "SimplifyCall should have handled zero shift");
-      // Normalize to funnel shift left.
-      if (II->getIntrinsicID() == Intrinsic::fshr)
-        ShiftAmt = BitWidth - ShiftAmt;
-
-      // fshl(X, 0, C) -> shl X, C
-      // fshl(X, undef, C) -> shl X, C
-      if (match(Op1, m_Zero()) || match(Op1, m_Undef()))
-        return BinaryOperator::CreateShl(
-            Op0, ConstantInt::get(II->getType(), ShiftAmt));
-
-      // fshl(0, X, C) -> lshr X, (BW-C)
-      // fshl(undef, X, C) -> lshr X, (BW-C)
-      if (match(Op0, m_Zero()) || match(Op0, m_Undef()))
-        return BinaryOperator::CreateLShr(
-            Op1, ConstantInt::get(II->getType(), BitWidth - ShiftAmt));
+    Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
+    Type *Ty = II->getType();
+    unsigned BitWidth = Ty->getScalarSizeInBits();
+    Constant *ShAmtC;
+    if (match(II->getArgOperand(2), m_Constant(ShAmtC)) &&
+        !isa<ConstantExpr>(ShAmtC) && !ShAmtC->containsConstantExpression()) {
+      // Canonicalize a shift amount constant operand to modulo the bit-width.
+      Constant *WidthC = ConstantInt::get(Ty, BitWidth);
+      Constant *ModuloC = ConstantExpr::getURem(ShAmtC, WidthC);
+      if (ModuloC != ShAmtC) {
+        II->setArgOperand(2, ModuloC);
+        return II;
+      }
+      assert(ConstantExpr::getICmp(ICmpInst::ICMP_UGT, WidthC, ShAmtC) ==
+                 ConstantInt::getTrue(CmpInst::makeCmpResultType(Ty)) &&
+             "Shift amount expected to be modulo bitwidth");
+
+      // Canonicalize funnel shift right by constant to funnel shift left. This
+      // is not entirely arbitrary. For historical reasons, the backend may
+      // recognize rotate left patterns but miss rotate right patterns.
+      if (IID == Intrinsic::fshr) {
+        // fshr X, Y, C --> fshl X, Y, (BitWidth - C)
+        Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
+        Module *Mod = II->getModule();
+        Function *Fshl = Intrinsic::getDeclaration(Mod, Intrinsic::fshl, Ty);
+        return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
+      }
+      assert(IID == Intrinsic::fshl &&
+             "All funnel shifts by simple constants should go left");
+
+      // fshl(X, 0, C) --> shl X, C
+      // fshl(X, undef, C) --> shl X, C
+      if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef()))
+        return BinaryOperator::CreateShl(Op0, ShAmtC);
+
+      // fshl(0, X, C) --> lshr X, (BW-C)
+      // fshl(undef, X, C) --> lshr X, (BW-C)
+      if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef()))
+        return BinaryOperator::CreateLShr(Op1,
+                                          ConstantExpr::getSub(WidthC, ShAmtC));
+
+      // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
+      if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
+        Module *Mod = II->getModule();
+        Function *Bswap = Intrinsic::getDeclaration(Mod, Intrinsic::bswap, Ty);
+        return CallInst::Create(Bswap, { Op0 });
+      }
     }
 
+    // Left or right might be masked.
+    if (SimplifyDemandedInstructionBits(*II))
+      return &CI;
+
     // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
     // so only the low bits of the shift amount are demanded if the bitwidth is
     // a power-of-2.
-    unsigned BitWidth = II->getType()->getScalarSizeInBits();
     if (!isPowerOf2_32(BitWidth))
       break;
     APInt Op2Demanded = APInt::getLowBitsSet(BitWidth, Log2_32_Ceil(BitWidth));
@@ -2018,7 +2008,34 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     break;
   }
   case Intrinsic::uadd_with_overflow:
-  case Intrinsic::sadd_with_overflow:
+  case Intrinsic::sadd_with_overflow: {
+    if (Instruction *I = canonicalizeConstantArg0ToArg1(CI))
+      return I;
+    if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
+      return I;
+
+    // Given 2 constant operands whose sum does not overflow:
+    // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
+    // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
+    Value *X;
+    const APInt *C0, *C1;
+    Value *Arg0 = II->getArgOperand(0);
+    Value *Arg1 = II->getArgOperand(1);
+    bool IsSigned = IID == Intrinsic::sadd_with_overflow;
+    bool HasNWAdd = IsSigned ? match(Arg0, m_NSWAdd(m_Value(X), m_APInt(C0)))
+                             : match(Arg0, m_NUWAdd(m_Value(X), m_APInt(C0)));
+    if (HasNWAdd && match(Arg1, m_APInt(C1))) {
+      bool Overflow;
+      APInt NewC =
+          IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow);
+      if (!Overflow)
+        return replaceInstUsesWith(
+            *II, Builder.CreateBinaryIntrinsic(
+                     IID, X, ConstantInt::get(Arg1->getType(), NewC)));
+    }
+    break;
+  }
+
   case Intrinsic::umul_with_overflow:
   case Intrinsic::smul_with_overflow:
     if (Instruction *I = canonicalizeConstantArg0ToArg1(CI))
@@ -2026,16 +2043,29 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     LLVM_FALLTHROUGH;
 
   case Intrinsic::usub_with_overflow:
+    if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
+      return I;
+    break;
+
   case Intrinsic::ssub_with_overflow: {
-    OverflowCheckFlavor OCF =
-        IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID());
-    assert(OCF != OCF_INVALID && "unexpected!");
+    if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
+      return I;
 
-    Value *OperationResult = nullptr;
-    Constant *OverflowResult = nullptr;
-    if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
-                              *II, OperationResult, OverflowResult))
-      return CreateOverflowTuple(II, OperationResult, OverflowResult);
+    Constant *C;
+    Value *Arg0 = II->getArgOperand(0);
+    Value *Arg1 = II->getArgOperand(1);
+    // Given a constant C that is not the minimum signed value
+    // for an integer of a given bit width:
+    //
+    // ssubo X, C -> saddo X, -C
+    if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
+      Value *NegVal = ConstantExpr::getNeg(C);
+      // Build a saddo call that is equivalent to the discovered
+      // ssubo call.
+      return replaceInstUsesWith(
+          *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
+                                             Arg0, NegVal));
+    }
 
     break;
   }
@@ -2047,39 +2077,32 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     LLVM_FALLTHROUGH;
   case Intrinsic::usub_sat:
   case Intrinsic::ssub_sat: {
-    Value *Arg0 = II->getArgOperand(0);
-    Value *Arg1 = II->getArgOperand(1);
-    Intrinsic::ID IID = II->getIntrinsicID();
+    SaturatingInst *SI = cast<SaturatingInst>(II);
+    Type *Ty = SI->getType();
+    Value *Arg0 = SI->getLHS();
+    Value *Arg1 = SI->getRHS();
 
     // Make use of known overflow information.
-    OverflowResult OR;
-    switch (IID) {
-    default:
-      llvm_unreachable("Unexpected intrinsic!");
-    case Intrinsic::uadd_sat:
-      OR = computeOverflowForUnsignedAdd(Arg0, Arg1, II);
-      if (OR == OverflowResult::NeverOverflows)
-        return BinaryOperator::CreateNUWAdd(Arg0, Arg1);
-      if (OR == OverflowResult::AlwaysOverflows)
-        return replaceInstUsesWith(*II,
-                                   ConstantInt::getAllOnesValue(II->getType()));
-      break;
-    case Intrinsic::usub_sat:
-      OR = computeOverflowForUnsignedSub(Arg0, Arg1, II);
-      if (OR == OverflowResult::NeverOverflows)
-        return BinaryOperator::CreateNUWSub(Arg0, Arg1);
-      if (OR == OverflowResult::AlwaysOverflows)
-        return replaceInstUsesWith(*II,
-                                   ConstantInt::getNullValue(II->getType()));
-      break;
-    case Intrinsic::sadd_sat:
-      if (willNotOverflowSignedAdd(Arg0, Arg1, *II))
-        return BinaryOperator::CreateNSWAdd(Arg0, Arg1);
-      break;
-    case Intrinsic::ssub_sat:
-      if (willNotOverflowSignedSub(Arg0, Arg1, *II))
-        return BinaryOperator::CreateNSWSub(Arg0, Arg1);
-      break;
+    OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
+                                        Arg0, Arg1, SI);
+    switch (OR) {
+      case OverflowResult::MayOverflow:
+        break;
+      case OverflowResult::NeverOverflows:
+        if (SI->isSigned())
+          return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
+        else
+          return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
+      case OverflowResult::AlwaysOverflowsLow: {
+        unsigned BitWidth = Ty->getScalarSizeInBits();
+        APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
+        return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
+      }
+      case OverflowResult::AlwaysOverflowsHigh: {
+        unsigned BitWidth = Ty->getScalarSizeInBits();
+        APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
+        return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
+      }
     }
 
     // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
@@ -2101,7 +2124,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       APInt NewVal;
       bool IsUnsigned =
           IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
-      if (Other->getIntrinsicID() == II->getIntrinsicID() &&
+      if (Other->getIntrinsicID() == IID &&
           match(Arg1, m_APInt(Val)) &&
           match(Other->getArgOperand(0), m_Value(X)) &&
           match(Other->getArgOperand(1), m_APInt(Val2))) {
@@ -2136,7 +2159,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       return I;
     Value *Arg0 = II->getArgOperand(0);
     Value *Arg1 = II->getArgOperand(1);
-    Intrinsic::ID IID = II->getIntrinsicID();
     Value *X, *Y;
     if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
         (Arg0->hasOneUse() || Arg1->hasOneUse())) {
@@ -2266,8 +2288,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     Value *ExtSrc;
     if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
       // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
-      Value *NarrowII =
-          Builder.CreateUnaryIntrinsic(II->getIntrinsicID(), ExtSrc, II);
+      Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
       return new FPExtInst(NarrowII, II->getType());
     }
     break;
@@ -2302,7 +2323,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
                                    &DT) >= 16) {
       Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
                                          PointerType::getUnqual(II->getType()));
-      return new LoadInst(Ptr);
+      return new LoadInst(II->getType(), Ptr);
     }
     break;
   case Intrinsic::ppc_vsx_lxvw4x:
@@ -2310,7 +2331,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // Turn PPC VSX loads into normal loads.
     Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
                                        PointerType::getUnqual(II->getType()));
-    return new LoadInst(Ptr, Twine(""), false, 1);
+    return new LoadInst(II->getType(), Ptr, Twine(""), false, 1);
   }
   case Intrinsic::ppc_altivec_stvx:
   case Intrinsic::ppc_altivec_stvxl:
@@ -2338,7 +2359,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
                                   II->getType()->getVectorNumElements());
       Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
                                          PointerType::getUnqual(VTy));
-      Value *Load = Builder.CreateLoad(Ptr);
+      Value *Load = Builder.CreateLoad(VTy, Ptr);
       return new FPExtInst(Load, II->getType());
     }
     break;
@@ -2348,7 +2369,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
                                    &DT) >= 32) {
       Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
                                          PointerType::getUnqual(II->getType()));
-      return new LoadInst(Ptr);
+      return new LoadInst(II->getType(), Ptr);
     }
     break;
   case Intrinsic::ppc_qpx_qvstfs:
@@ -2499,22 +2520,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     break;
   }
 
-  case Intrinsic::x86_sse41_round_ps:
-  case Intrinsic::x86_sse41_round_pd:
-  case Intrinsic::x86_avx_round_ps_256:
-  case Intrinsic::x86_avx_round_pd_256:
-  case Intrinsic::x86_avx512_mask_rndscale_ps_128:
-  case Intrinsic::x86_avx512_mask_rndscale_ps_256:
-  case Intrinsic::x86_avx512_mask_rndscale_ps_512:
-  case Intrinsic::x86_avx512_mask_rndscale_pd_128:
-  case Intrinsic::x86_avx512_mask_rndscale_pd_256:
-  case Intrinsic::x86_avx512_mask_rndscale_pd_512:
-  case Intrinsic::x86_avx512_mask_rndscale_ss:
-  case Intrinsic::x86_avx512_mask_rndscale_sd:
-    if (Value *V = simplifyX86round(*II, Builder))
-      return replaceInstUsesWith(*II, V);
-    break;
-
   case Intrinsic::x86_mmx_pmovmskb:
   case Intrinsic::x86_sse_movmsk_ps:
   case Intrinsic::x86_sse2_movmsk_pd:
@@ -2620,7 +2625,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         Value *Arg1 = II->getArgOperand(1);
 
         Value *V;
-        switch (II->getIntrinsicID()) {
+        switch (IID) {
         default: llvm_unreachable("Case stmts out of sync!");
         case Intrinsic::x86_avx512_add_ps_512:
         case Intrinsic::x86_avx512_add_pd_512:
@@ -2664,7 +2669,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         Value *RHS = Builder.CreateExtractElement(Arg1, (uint64_t)0);
 
         Value *V;
-        switch (II->getIntrinsicID()) {
+        switch (IID) {
         default: llvm_unreachable("Case stmts out of sync!");
         case Intrinsic::x86_avx512_mask_add_ss_round:
         case Intrinsic::x86_avx512_mask_add_sd_round:
@@ -2706,44 +2711,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         return replaceInstUsesWith(*II, V);
       }
     }
-    LLVM_FALLTHROUGH;
-
-  // X86 scalar intrinsics simplified with SimplifyDemandedVectorElts.
-  case Intrinsic::x86_avx512_mask_max_ss_round:
-  case Intrinsic::x86_avx512_mask_min_ss_round:
-  case Intrinsic::x86_avx512_mask_max_sd_round:
-  case Intrinsic::x86_avx512_mask_min_sd_round:
-  case Intrinsic::x86_sse_cmp_ss:
-  case Intrinsic::x86_sse_min_ss:
-  case Intrinsic::x86_sse_max_ss:
-  case Intrinsic::x86_sse2_cmp_sd:
-  case Intrinsic::x86_sse2_min_sd:
-  case Intrinsic::x86_sse2_max_sd:
-  case Intrinsic::x86_xop_vfrcz_ss:
-  case Intrinsic::x86_xop_vfrcz_sd: {
-   unsigned VWidth = II->getType()->getVectorNumElements();
-   APInt UndefElts(VWidth, 0);
-   APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
-   if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
-     if (V != II)
-       return replaceInstUsesWith(*II, V);
-     return II;
-   }
-   break;
-  }
-  case Intrinsic::x86_sse41_round_ss:
-  case Intrinsic::x86_sse41_round_sd: {
-    unsigned VWidth = II->getType()->getVectorNumElements();
-    APInt UndefElts(VWidth, 0);
-    APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
-    if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
-      if (V != II)
-        return replaceInstUsesWith(*II, V);
-      return II;
-    } else if (Value *V = simplifyX86round(*II, Builder))
-      return replaceInstUsesWith(*II, V);
     break;
-  }
 
   // Constant fold ashr( <A x Bi>, Ci ).
   // Constant fold lshr( <A x Bi>, Ci ).
@@ -2860,7 +2828,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_avx2_packsswb:
   case Intrinsic::x86_avx512_packssdw_512:
   case Intrinsic::x86_avx512_packsswb_512:
-    if (Value *V = simplifyX86pack(*II, true))
+    if (Value *V = simplifyX86pack(*II, Builder, true))
       return replaceInstUsesWith(*II, V);
     break;
 
@@ -2870,7 +2838,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_avx2_packuswb:
   case Intrinsic::x86_avx512_packusdw_512:
   case Intrinsic::x86_avx512_packuswb_512:
-    if (Value *V = simplifyX86pack(*II, false))
+    if (Value *V = simplifyX86pack(*II, Builder, false))
       return replaceInstUsesWith(*II, V);
     break;
 
@@ -3168,19 +3136,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       return nullptr;
     break;
 
-  case Intrinsic::x86_xop_vpcomb:
-  case Intrinsic::x86_xop_vpcomd:
-  case Intrinsic::x86_xop_vpcomq:
-  case Intrinsic::x86_xop_vpcomw:
-    if (Value *V = simplifyX86vpcom(*II, Builder, true))
-      return replaceInstUsesWith(*II, V);
-    break;
-
-  case Intrinsic::x86_xop_vpcomub:
-  case Intrinsic::x86_xop_vpcomud:
-  case Intrinsic::x86_xop_vpcomuq:
-  case Intrinsic::x86_xop_vpcomuw:
-    if (Value *V = simplifyX86vpcom(*II, Builder, false))
+  case Intrinsic::x86_addcarry_32:
+  case Intrinsic::x86_addcarry_64:
+    if (Value *V = simplifyX86addcarry(*II, Builder))
       return replaceInstUsesWith(*II, V);
     break;
 
@@ -3296,8 +3254,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     }
 
     // Check for constant LHS & RHS - in this case we just simplify.
-    bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
-                 II->getIntrinsicID() == Intrinsic::aarch64_neon_umull);
+    bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
+                 IID == Intrinsic::aarch64_neon_umull);
     VectorType *NewVT = cast<VectorType>(II->getType());
     if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
       if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
@@ -3374,7 +3332,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       APFloat Significand = frexp(C->getValueAPF(), Exp,
                                   APFloat::rmNearestTiesToEven);
 
-      if (II->getIntrinsicID() == Intrinsic::amdgcn_frexp_mant) {
+      if (IID == Intrinsic::amdgcn_frexp_mant) {
         return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(),
                                                        Significand));
       }
@@ -3559,7 +3517,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       }
     }
 
-    bool Signed = II->getIntrinsicID() == Intrinsic::amdgcn_sbfe;
+    bool Signed = IID == Intrinsic::amdgcn_sbfe;
 
     if (!CWidth || !COffset)
       break;
@@ -3587,15 +3545,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   }
   case Intrinsic::amdgcn_exp:
   case Intrinsic::amdgcn_exp_compr: {
-    ConstantInt *En = dyn_cast<ConstantInt>(II->getArgOperand(1));
-    if (!En) // Illegal.
-      break;
-
+    ConstantInt *En = cast<ConstantInt>(II->getArgOperand(1));
     unsigned EnBits = En->getZExtValue();
     if (EnBits == 0xf)
       break; // All inputs enabled.
 
-    bool IsCompr = II->getIntrinsicID() == Intrinsic::amdgcn_exp_compr;
+    bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
     bool Changed = false;
     for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
       if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
@@ -3680,13 +3635,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   }
   case Intrinsic::amdgcn_icmp:
   case Intrinsic::amdgcn_fcmp: {
-    const ConstantInt *CC = dyn_cast<ConstantInt>(II->getArgOperand(2));
-    if (!CC)
-      break;
-
+    const ConstantInt *CC = cast<ConstantInt>(II->getArgOperand(2));
     // Guard against invalid arguments.
     int64_t CCVal = CC->getZExtValue();
-    bool IsInteger = II->getIntrinsicID() == Intrinsic::amdgcn_icmp;
+    bool IsInteger = IID == Intrinsic::amdgcn_icmp;
     if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
                        CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
         (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
@@ -3709,7 +3661,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         // register (which contains the bitmask of live threads). So a
         // comparison that always returns true is the same as a read of the
         // EXEC register.
-        Value *NewF = Intrinsic::getDeclaration(
+        Function *NewF = Intrinsic::getDeclaration(
             II->getModule(), Intrinsic::read_register, II->getType());
         Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")};
         MDNode *MD = MDNode::get(II->getContext(), MDArgs);
@@ -3804,8 +3756,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
         break;
 
-      Value *NewF = Intrinsic::getDeclaration(II->getModule(), NewIID,
-                                              SrcLHS->getType());
+      Function *NewF =
+          Intrinsic::getDeclaration(II->getModule(), NewIID,
+                                    { II->getType(),
+                                      SrcLHS->getType() });
       Value *Args[] = { SrcLHS, SrcRHS,
                         ConstantInt::get(CC->getType(), SrcPred) };
       CallInst *NewCall = Builder.CreateCall(NewF, Args);
@@ -3833,11 +3787,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::amdgcn_update_dpp: {
     Value *Old = II->getArgOperand(0);
 
-    auto BC = dyn_cast<ConstantInt>(II->getArgOperand(5));
-    auto RM = dyn_cast<ConstantInt>(II->getArgOperand(3));
-    auto BM = dyn_cast<ConstantInt>(II->getArgOperand(4));
-    if (!BC || !RM || !BM ||
-        BC->isZeroValue() ||
+    auto BC = cast<ConstantInt>(II->getArgOperand(5));
+    auto RM = cast<ConstantInt>(II->getArgOperand(3));
+    auto BM = cast<ConstantInt>(II->getArgOperand(4));
+    if (BC->isZeroValue() ||
         RM->getZExtValue() != 0xF ||
         BM->getZExtValue() != 0xF ||
         isa<UndefValue>(Old))
@@ -3847,6 +3800,37 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     II->setOperand(0, UndefValue::get(Old->getType()));
     return II;
   }
+  case Intrinsic::amdgcn_readfirstlane:
+  case Intrinsic::amdgcn_readlane: {
+    // A constant value is trivially uniform.
+    if (Constant *C = dyn_cast<Constant>(II->getArgOperand(0)))
+      return replaceInstUsesWith(*II, C);
+
+    // The rest of these may not be safe if the exec may not be the same between
+    // the def and use.
+    Value *Src = II->getArgOperand(0);
+    Instruction *SrcInst = dyn_cast<Instruction>(Src);
+    if (SrcInst && SrcInst->getParent() != II->getParent())
+      break;
+
+    // readfirstlane (readfirstlane x) -> readfirstlane x
+    // readlane (readfirstlane x), y -> readfirstlane x
+    if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readfirstlane>()))
+      return replaceInstUsesWith(*II, Src);
+
+    if (IID == Intrinsic::amdgcn_readfirstlane) {
+      // readfirstlane (readlane x, y) -> readlane x, y
+      if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readlane>()))
+        return replaceInstUsesWith(*II, Src);
+    } else {
+      // readlane (readlane x, y), y -> readlane x, y
+      if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readlane>(
+                  m_Value(), m_Specific(II->getArgOperand(1)))))
+        return replaceInstUsesWith(*II, Src);
+    }
+
+    break;
+  }
   case Intrinsic::stackrestore: {
     // If the save is right next to the restore, remove the restore.  This can
     // happen when variable allocas are DCE'd.
@@ -3870,14 +3854,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         break;
       }
       if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
-        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
+        if (auto *II2 = dyn_cast<IntrinsicInst>(BCI)) {
           // If there is a stackrestore below this one, remove this one.
-          if (II->getIntrinsicID() == Intrinsic::stackrestore)
+          if (II2->getIntrinsicID() == Intrinsic::stackrestore)
             return eraseInstFromFunction(CI);
 
           // Bail if we cross over an intrinsic with side effects, such as
           // llvm.stacksave, llvm.read_register, or llvm.setjmp.
-          if (II->mayHaveSideEffects()) {
+          if (II2->mayHaveSideEffects()) {
             CannotRemove = true;
             break;
           }
@@ -3920,16 +3904,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // Canonicalize assume(a && b) -> assume(a); assume(b);
     // Note: New assumption intrinsics created here are registered by
     // the InstCombineIRInserter object.
-    Value *AssumeIntrinsic = II->getCalledValue(), *A, *B;
+    FunctionType *AssumeIntrinsicTy = II->getFunctionType();
+    Value *AssumeIntrinsic = II->getCalledValue();
+    Value *A, *B;
     if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
-      Builder.CreateCall(AssumeIntrinsic, A, II->getName());
-      Builder.CreateCall(AssumeIntrinsic, B, II->getName());
+      Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, II->getName());
+      Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName());
       return eraseInstFromFunction(*II);
     }
     // assume(!(a || b)) -> assume(!a); assume(!b);
     if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
-      Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(A), II->getName());
-      Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(B), II->getName());
+      Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
+                         Builder.CreateNot(A), II->getName());
+      Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
+                         Builder.CreateNot(B), II->getName());
       return eraseInstFromFunction(*II);
     }
 
@@ -4036,7 +4024,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     break;
   }
   }
-  return visitCallSite(II);
+  return visitCallBase(*II);
 }
 
 // Fence instruction simplification
@@ -4051,12 +4039,17 @@ Instruction *InstCombiner::visitFenceInst(FenceInst &FI) {
 
 // InvokeInst simplification
 Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
-  return visitCallSite(&II);
+  return visitCallBase(II);
+}
+
+// CallBrInst simplification
+Instruction *InstCombiner::visitCallBrInst(CallBrInst &CBI) {
+  return visitCallBase(CBI);
 }
 
 /// If this cast does not affect the value passed through the varargs area, we
 /// can eliminate the use of the cast.
-static bool isSafeToEliminateVarargsCast(const CallSite CS,
+static bool isSafeToEliminateVarargsCast(const CallBase &Call,
                                          const DataLayout &DL,
                                          const CastInst *const CI,
                                          const int ix) {
@@ -4068,18 +4061,20 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
   // TODO: This is probably something which should be expanded to all
   // intrinsics since the entire point of intrinsics is that
   // they are understandable by the optimizer.
-  if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS))
+  if (isStatepoint(&Call) || isGCRelocate(&Call) || isGCResult(&Call))
     return false;
 
   // The size of ByVal or InAlloca arguments is derived from the type, so we
   // can't change to a type with a different size.  If the size were
   // passed explicitly we could avoid this check.
-  if (!CS.isByValOrInAllocaArgument(ix))
+  if (!Call.isByValOrInAllocaArgument(ix))
     return true;
 
   Type* SrcTy =
             cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
-  Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
+  Type *DstTy = Call.isByValArgument(ix)
+                    ? Call.getParamByValType(ix)
+                    : cast<PointerType>(CI->getType())->getElementType();
   if (!SrcTy->isSized() || !DstTy->isSized())
     return false;
   if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
@@ -4096,7 +4091,7 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
   auto InstCombineErase = [this](Instruction *I) {
     eraseInstFromFunction(*I);
   };
-  LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW,
+  LibCallSimplifier Simplifier(DL, &TLI, ORE, BFI, PSI, InstCombineRAUW,
                                InstCombineErase);
   if (Value *With = Simplifier.optimizeCall(CI)) {
     ++NumSimplified;
@@ -4182,10 +4177,10 @@ static IntrinsicInst *findInitTrampoline(Value *Callee) {
   return nullptr;
 }
 
-/// Improvements for call and invoke instructions.
-Instruction *InstCombiner::visitCallSite(CallSite CS) {
-  if (isAllocLikeFn(CS.getInstruction(), &TLI))
-    return visitAllocSite(*CS.getInstruction());
+/// Improvements for call, callbr and invoke instructions.
+Instruction *InstCombiner::visitCallBase(CallBase &Call) {
+  if (isAllocLikeFn(&Call, &TLI))
+    return visitAllocSite(Call);
 
   bool Changed = false;
 
@@ -4195,52 +4190,50 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
   SmallVector<unsigned, 4> ArgNos;
   unsigned ArgNo = 0;
 
-  for (Value *V : CS.args()) {
+  for (Value *V : Call.args()) {
     if (V->getType()->isPointerTy() &&
-        !CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
-        isKnownNonZero(V, DL, 0, &AC, CS.getInstruction(), &DT))
+        !Call.paramHasAttr(ArgNo, Attribute::NonNull) &&
+        isKnownNonZero(V, DL, 0, &AC, &Call, &DT))
       ArgNos.push_back(ArgNo);
     ArgNo++;
   }
 
-  assert(ArgNo == CS.arg_size() && "sanity check");
+  assert(ArgNo == Call.arg_size() && "sanity check");
 
   if (!ArgNos.empty()) {
-    AttributeList AS = CS.getAttributes();
-    LLVMContext &Ctx = CS.getInstruction()->getContext();
+    AttributeList AS = Call.getAttributes();
+    LLVMContext &Ctx = Call.getContext();
     AS = AS.addParamAttribute(Ctx, ArgNos,
                               Attribute::get(Ctx, Attribute::NonNull));
-    CS.setAttributes(AS);
+    Call.setAttributes(AS);
     Changed = true;
   }
 
   // If the callee is a pointer to a function, attempt to move any casts to the
-  // arguments of the call/invoke.
-  Value *Callee = CS.getCalledValue();
-  if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
+  // arguments of the call/callbr/invoke.
+  Value *Callee = Call.getCalledValue();
+  if (!isa<Function>(Callee) && transformConstExprCastCall(Call))
     return nullptr;
 
   if (Function *CalleeF = dyn_cast<Function>(Callee)) {
     // Remove the convergent attr on calls when the callee is not convergent.
-    if (CS.isConvergent() && !CalleeF->isConvergent() &&
+    if (Call.isConvergent() && !CalleeF->isConvergent() &&
         !CalleeF->isIntrinsic()) {
-      LLVM_DEBUG(dbgs() << "Removing convergent attr from instr "
-                        << CS.getInstruction() << "\n");
-      CS.setNotConvergent();
-      return CS.getInstruction();
+      LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
+                        << "\n");
+      Call.setNotConvergent();
+      return &Call;
     }
 
     // If the call and callee calling conventions don't match, this call must
     // be unreachable, as the call is undefined.
-    if (CalleeF->getCallingConv() != CS.getCallingConv() &&
+    if (CalleeF->getCallingConv() != Call.getCallingConv() &&
         // Only do this for calls to a function with a body.  A prototype may
         // not actually end up matching the implementation's calling conv for a
         // variety of reasons (e.g. it may be written in assembly).
         !CalleeF->isDeclaration()) {
-      Instruction *OldCall = CS.getInstruction();
-      new StoreInst(ConstantInt::getTrue(Callee->getContext()),
-                UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
-                                  OldCall);
+      Instruction *OldCall = &Call;
+      CreateNonTerminatorUnreachable(OldCall);
       // If OldCall does not return void then replaceAllUsesWith undef.
       // This allows ValueHandlers and custom metadata to adjust itself.
       if (!OldCall->getType()->isVoidTy())
@@ -4248,40 +4241,35 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
       if (isa<CallInst>(OldCall))
         return eraseInstFromFunction(*OldCall);
 
-      // We cannot remove an invoke, because it would change the CFG, just
-      // change the callee to a null pointer.
-      cast<InvokeInst>(OldCall)->setCalledFunction(
-                                    Constant::getNullValue(CalleeF->getType()));
+      // We cannot remove an invoke or a callbr, because it would change thexi
+      // CFG, just change the callee to a null pointer.
+      cast<CallBase>(OldCall)->setCalledFunction(
+          CalleeF->getFunctionType(),
+          Constant::getNullValue(CalleeF->getType()));
       return nullptr;
     }
   }
 
   if ((isa<ConstantPointerNull>(Callee) &&
-       !NullPointerIsDefined(CS.getInstruction()->getFunction())) ||
+       !NullPointerIsDefined(Call.getFunction())) ||
       isa<UndefValue>(Callee)) {
-    // If CS does not return void then replaceAllUsesWith undef.
+    // If Call does not return void then replaceAllUsesWith undef.
     // This allows ValueHandlers and custom metadata to adjust itself.
-    if (!CS.getInstruction()->getType()->isVoidTy())
-      replaceInstUsesWith(*CS.getInstruction(),
-                          UndefValue::get(CS.getInstruction()->getType()));
+    if (!Call.getType()->isVoidTy())
+      replaceInstUsesWith(Call, UndefValue::get(Call.getType()));
 
-    if (isa<InvokeInst>(CS.getInstruction())) {
-      // Can't remove an invoke because we cannot change the CFG.
+    if (Call.isTerminator()) {
+      // Can't remove an invoke or callbr because we cannot change the CFG.
       return nullptr;
     }
 
-    // This instruction is not reachable, just remove it.  We insert a store to
-    // undef so that we know that this code is not reachable, despite the fact
-    // that we can't modify the CFG here.
-    new StoreInst(ConstantInt::getTrue(Callee->getContext()),
-                  UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
-                  CS.getInstruction());
-
-    return eraseInstFromFunction(*CS.getInstruction());
+    // This instruction is not reachable, just remove it.
+    CreateNonTerminatorUnreachable(&Call);
+    return eraseInstFromFunction(Call);
   }
 
   if (IntrinsicInst *II = findInitTrampoline(Callee))
-    return transformCallThroughTrampoline(CS, II);
+    return transformCallThroughTrampoline(Call, *II);
 
   PointerType *PTy = cast<PointerType>(Callee->getType());
   FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
@@ -4289,39 +4277,48 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
     int ix = FTy->getNumParams();
     // See if we can optimize any arguments passed through the varargs area of
     // the call.
-    for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
-           E = CS.arg_end(); I != E; ++I, ++ix) {
+    for (auto I = Call.arg_begin() + FTy->getNumParams(), E = Call.arg_end();
+         I != E; ++I, ++ix) {
       CastInst *CI = dyn_cast<CastInst>(*I);
-      if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
+      if (CI && isSafeToEliminateVarargsCast(Call, DL, CI, ix)) {
         *I = CI->getOperand(0);
+
+        // Update the byval type to match the argument type.
+        if (Call.isByValArgument(ix)) {
+          Call.removeParamAttr(ix, Attribute::ByVal);
+          Call.addParamAttr(
+              ix, Attribute::getWithByValType(
+                      Call.getContext(),
+                      CI->getOperand(0)->getType()->getPointerElementType()));
+        }
         Changed = true;
       }
     }
   }
 
-  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
+  if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
     // Inline asm calls cannot throw - mark them 'nounwind'.
-    CS.setDoesNotThrow();
+    Call.setDoesNotThrow();
     Changed = true;
   }
 
   // Try to optimize the call if possible, we require DataLayout for most of
   // this.  None of these calls are seen as possibly dead so go ahead and
   // delete the instruction now.
-  if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
+  if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
     Instruction *I = tryOptimizeCall(CI);
     // If we changed something return the result, etc. Otherwise let
     // the fallthrough check.
     if (I) return eraseInstFromFunction(*I);
   }
 
-  return Changed ? CS.getInstruction() : nullptr;
+  return Changed ? &Call : nullptr;
 }
 
 /// If the callee is a constexpr cast of a function, attempt to move the cast to
-/// the arguments of the call/invoke.
-bool InstCombiner::transformConstExprCastCall(CallSite CS) {
-  auto *Callee = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+/// the arguments of the call/callbr/invoke.
+bool InstCombiner::transformConstExprCastCall(CallBase &Call) {
+  auto *Callee = dyn_cast<Function>(Call.getCalledValue()->stripPointerCasts());
   if (!Callee)
     return false;
 
@@ -4335,11 +4332,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   // prototype with the exception of pointee types. The code below doesn't
   // implement that, so we can't do this transform.
   // TODO: Do the transform if it only requires adding pointer casts.
-  if (CS.isMustTailCall())
+  if (Call.isMustTailCall())
     return false;
 
-  Instruction *Caller = CS.getInstruction();
-  const AttributeList &CallerPAL = CS.getAttributes();
+  Instruction *Caller = &Call;
+  const AttributeList &CallerPAL = Call.getAttributes();
 
   // Okay, this is a cast from a function to a different type.  Unless doing so
   // would cause a type conversion of one of our arguments, change this call to
@@ -4370,20 +4367,24 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
         return false;   // Attribute not compatible with transformed value.
     }
 
-    // If the callsite is an invoke instruction, and the return value is used by
-    // a PHI node in a successor, we cannot change the return type of the call
-    // because there is no place to put the cast instruction (without breaking
-    // the critical edge).  Bail out in this case.
-    if (!Caller->use_empty())
+    // If the callbase is an invoke/callbr instruction, and the return value is
+    // used by a PHI node in a successor, we cannot change the return type of
+    // the call because there is no place to put the cast instruction (without
+    // breaking the critical edge).  Bail out in this case.
+    if (!Caller->use_empty()) {
       if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
         for (User *U : II->users())
           if (PHINode *PN = dyn_cast<PHINode>(U))
             if (PN->getParent() == II->getNormalDest() ||
                 PN->getParent() == II->getUnwindDest())
               return false;
+      // FIXME: Be conservative for callbr to avoid a quadratic search.
+      if (isa<CallBrInst>(Caller))
+        return false;
+    }
   }
 
-  unsigned NumActualArgs = CS.arg_size();
+  unsigned NumActualArgs = Call.arg_size();
   unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
 
   // Prevent us turning:
@@ -4398,7 +4399,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
       Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
     return false;
 
-  CallSite::arg_iterator AI = CS.arg_begin();
+  auto AI = Call.arg_begin();
   for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
     Type *ParamTy = FT->getParamType(i);
     Type *ActTy = (*AI)->getType();
@@ -4410,7 +4411,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
             .overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
       return false;   // Attribute not compatible with transformed value.
 
-    if (CS.isInAllocaArgument(i))
+    if (Call.isInAllocaArgument(i))
       return false;   // Cannot transform to and from inalloca.
 
     // If the parameter is passed as a byval argument, then we have to have a
@@ -4420,7 +4421,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
       if (!ParamPTy || !ParamPTy->getElementType()->isSized())
         return false;
 
-      Type *CurElTy = ActTy->getPointerElementType();
+      Type *CurElTy = Call.getParamByValType(i);
       if (DL.getTypeAllocSize(CurElTy) !=
           DL.getTypeAllocSize(ParamPTy->getElementType()))
         return false;
@@ -4435,7 +4436,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     // If the callee is just a declaration, don't change the varargsness of the
     // call.  We don't want to introduce a varargs call where one doesn't
     // already exist.
-    PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
+    PointerType *APTy = cast<PointerType>(Call.getCalledValue()->getType());
     if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
       return false;
 
@@ -4474,7 +4475,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   // with the existing attributes.  Wipe out any problematic attributes.
   RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
 
-  AI = CS.arg_begin();
+  LLVMContext &Ctx = Call.getContext();
+  AI = Call.arg_begin();
   for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
     Type *ParamTy = FT->getParamType(i);
 
@@ -4484,7 +4486,12 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     Args.push_back(NewArg);
 
     // Add any parameter attributes.
-    ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
+    if (CallerPAL.hasParamAttribute(i, Attribute::ByVal)) {
+      AttrBuilder AB(CallerPAL.getParamAttributes(i));
+      AB.addByValAttr(NewArg->getType()->getPointerElementType());
+      ArgAttrs.push_back(AttributeSet::get(Ctx, AB));
+    } else
+      ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
   }
 
   // If the function takes more arguments than the call was taking, add them
@@ -4523,45 +4530,50 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
 
   assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
          "missing argument attributes");
-  LLVMContext &Ctx = Callee->getContext();
   AttributeList NewCallerPAL = AttributeList::get(
       Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
 
   SmallVector<OperandBundleDef, 1> OpBundles;
-  CS.getOperandBundlesAsDefs(OpBundles);
+  Call.getOperandBundlesAsDefs(OpBundles);
 
-  CallSite NewCS;
+  CallBase *NewCall;
   if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
-    NewCS = Builder.CreateInvoke(Callee, II->getNormalDest(),
-                                 II->getUnwindDest(), Args, OpBundles);
+    NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
+                                   II->getUnwindDest(), Args, OpBundles);
+  } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Caller)) {
+    NewCall = Builder.CreateCallBr(Callee, CBI->getDefaultDest(),
+                                   CBI->getIndirectDests(), Args, OpBundles);
   } else {
-    NewCS = Builder.CreateCall(Callee, Args, OpBundles);
-    cast<CallInst>(NewCS.getInstruction())
-        ->setTailCallKind(cast<CallInst>(Caller)->getTailCallKind());
+    NewCall = Builder.CreateCall(Callee, Args, OpBundles);
+    cast<CallInst>(NewCall)->setTailCallKind(
+        cast<CallInst>(Caller)->getTailCallKind());
   }
-  NewCS->takeName(Caller);
-  NewCS.setCallingConv(CS.getCallingConv());
-  NewCS.setAttributes(NewCallerPAL);
+  NewCall->takeName(Caller);
+  NewCall->setCallingConv(Call.getCallingConv());
+  NewCall->setAttributes(NewCallerPAL);
 
   // Preserve the weight metadata for the new call instruction. The metadata
   // is used by SamplePGO to check callsite's hotness.
   uint64_t W;
   if (Caller->extractProfTotalWeight(W))
-    NewCS->setProfWeight(W);
+    NewCall->setProfWeight(W);
 
   // Insert a cast of the return type as necessary.
-  Instruction *NC = NewCS.getInstruction();
+  Instruction *NC = NewCall;
   Value *NV = NC;
   if (OldRetTy != NV->getType() && !Caller->use_empty()) {
     if (!NV->getType()->isVoidTy()) {
       NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
       NC->setDebugLoc(Caller->getDebugLoc());
 
-      // If this is an invoke instruction, we should insert it after the first
-      // non-phi, instruction in the normal successor block.
+      // If this is an invoke/callbr instruction, we should insert it after the
+      // first non-phi instruction in the normal successor block.
       if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
         BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
         InsertNewInstBefore(NC, *I);
+      } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Caller)) {
+        BasicBlock::iterator I = CBI->getDefaultDest()->getFirstInsertionPt();
+        InsertNewInstBefore(NC, *I);
       } else {
         // Otherwise, it's a call, just insert cast right after the call.
         InsertNewInstBefore(NC, *Caller);
@@ -4590,23 +4602,20 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
 /// Turn a call to a function created by init_trampoline / adjust_trampoline
 /// intrinsic pair into a direct call to the underlying function.
 Instruction *
-InstCombiner::transformCallThroughTrampoline(CallSite CS,
-                                             IntrinsicInst *Tramp) {
-  Value *Callee = CS.getCalledValue();
-  PointerType *PTy = cast<PointerType>(Callee->getType());
-  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
-  AttributeList Attrs = CS.getAttributes();
+InstCombiner::transformCallThroughTrampoline(CallBase &Call,
+                                             IntrinsicInst &Tramp) {
+  Value *Callee = Call.getCalledValue();
+  Type *CalleeTy = Callee->getType();
+  FunctionType *FTy = Call.getFunctionType();
+  AttributeList Attrs = Call.getAttributes();
 
   // If the call already has the 'nest' attribute somewhere then give up -
   // otherwise 'nest' would occur twice after splicing in the chain.
   if (Attrs.hasAttrSomewhere(Attribute::Nest))
     return nullptr;
 
-  assert(Tramp &&
-         "transformCallThroughTrampoline called with incorrect CallSite.");
-
-  Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
-  FunctionType *NestFTy = cast<FunctionType>(NestF->getValueType());
+  Function *NestF = cast<Function>(Tramp.getArgOperand(1)->stripPointerCasts());
+  FunctionType *NestFTy = NestF->getFunctionType();
 
   AttributeList NestAttrs = NestF->getAttributes();
   if (!NestAttrs.isEmpty()) {
@@ -4628,22 +4637,21 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
     }
 
     if (NestTy) {
-      Instruction *Caller = CS.getInstruction();
       std::vector<Value*> NewArgs;
       std::vector<AttributeSet> NewArgAttrs;
-      NewArgs.reserve(CS.arg_size() + 1);
-      NewArgAttrs.reserve(CS.arg_size());
+      NewArgs.reserve(Call.arg_size() + 1);
+      NewArgAttrs.reserve(Call.arg_size());
 
       // Insert the nest argument into the call argument list, which may
       // mean appending it.  Likewise for attributes.
 
       {
         unsigned ArgNo = 0;
-        CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+        auto I = Call.arg_begin(), E = Call.arg_end();
         do {
           if (ArgNo == NestArgNo) {
             // Add the chain argument and attributes.
-            Value *NestVal = Tramp->getArgOperand(2);
+            Value *NestVal = Tramp.getArgOperand(2);
             if (NestVal->getType() != NestTy)
               NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
             NewArgs.push_back(NestVal);
@@ -4705,24 +4713,30 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
                              Attrs.getRetAttributes(), NewArgAttrs);
 
       SmallVector<OperandBundleDef, 1> OpBundles;
-      CS.getOperandBundlesAsDefs(OpBundles);
+      Call.getOperandBundlesAsDefs(OpBundles);
 
       Instruction *NewCaller;
-      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
-        NewCaller = InvokeInst::Create(NewCallee,
+      if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
+        NewCaller = InvokeInst::Create(NewFTy, NewCallee,
                                        II->getNormalDest(), II->getUnwindDest(),
                                        NewArgs, OpBundles);
         cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
         cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
+      } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
+        NewCaller =
+            CallBrInst::Create(NewFTy, NewCallee, CBI->getDefaultDest(),
+                               CBI->getIndirectDests(), NewArgs, OpBundles);
+        cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
+        cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
       } else {
-        NewCaller = CallInst::Create(NewCallee, NewArgs, OpBundles);
+        NewCaller = CallInst::Create(NewFTy, NewCallee, NewArgs, OpBundles);
         cast<CallInst>(NewCaller)->setTailCallKind(
-            cast<CallInst>(Caller)->getTailCallKind());
+            cast<CallInst>(Call).getTailCallKind());
         cast<CallInst>(NewCaller)->setCallingConv(
-            cast<CallInst>(Caller)->getCallingConv());
+            cast<CallInst>(Call).getCallingConv());
         cast<CallInst>(NewCaller)->setAttributes(NewPAL);
       }
-      NewCaller->setDebugLoc(Caller->getDebugLoc());
+      NewCaller->setDebugLoc(Call.getDebugLoc());
 
       return NewCaller;
     }
@@ -4731,9 +4745,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
   // Replace the trampoline call with a direct call.  Since there is no 'nest'
   // parameter, there is no need to adjust the argument list.  Let the generic
   // code sort out any function type mismatches.
-  Constant *NewCallee =
-    NestF->getType() == PTy ? NestF :
-                              ConstantExpr::getBitCast(NestF, PTy);
-  CS.setCalledFunction(NewCallee);
-  return CS.getInstruction();
+  Constant *NewCallee = ConstantExpr::getBitCast(NestF, CalleeTy);
+  Call.setCalledFunction(FTy, NewCallee);
+  return &Call;
 }
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 1201ac196ec0..2c9ba203fbf3 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1,9 +1,8 @@
 //===- InstCombineCasts.cpp -----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1373,10 +1372,8 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
   // If we know that the value being extended is positive, we can use a zext
   // instead.
   KnownBits Known = computeKnownBits(Src, 0, &CI);
-  if (Known.isNonNegative()) {
-    Value *ZExt = Builder.CreateZExt(Src, DestTy);
-    return replaceInstUsesWith(CI, ZExt);
-  }
+  if (Known.isNonNegative())
+    return CastInst::Create(Instruction::ZExt, Src, DestTy);
 
   // Try to extend the entire expression tree to the wide destination type.
   if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
@@ -1618,12 +1615,20 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
         return CastInst::CreateFPCast(ExactResult, Ty);
       }
     }
+  }
 
-    // (fptrunc (fneg x)) -> (fneg (fptrunc x))
-    Value *X;
-    if (match(OpI, m_FNeg(m_Value(X)))) {
+  // (fptrunc (fneg x)) -> (fneg (fptrunc x))
+  Value *X;
+  Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
+  if (Op && Op->hasOneUse()) {
+    if (match(Op, m_FNeg(m_Value(X)))) {
       Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
-      return BinaryOperator::CreateFNegFMF(InnerTrunc, OpI);
+
+      // FIXME: Once we're sure that unary FNeg optimizations are on par with
+      // binary FNeg, this should always return a unary operator.
+      if (isa<BinaryOperator>(Op))
+        return BinaryOperator::CreateFNegFMF(InnerTrunc, Op);
+      return UnaryOperator::CreateFNegFMF(InnerTrunc, Op);
     }
   }
 
@@ -1657,8 +1662,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
                                                      II->getIntrinsicID(), Ty);
       SmallVector<OperandBundleDef, 1> OpBundles;
       II->getOperandBundlesAsDefs(OpBundles);
-      CallInst *NewCI = CallInst::Create(Overload, { InnerTrunc }, OpBundles,
-                                         II->getName());
+      CallInst *NewCI =
+          CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
       NewCI->copyFastMathFlags(II);
       return NewCI;
     }
@@ -2167,7 +2172,7 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
   SmallSetVector<PHINode *, 4> OldPhiNodes;
 
   // Find all of the A->B casts and PHI nodes.
-  // We need to inpect all related PHI nodes, but PHIs can be cyclic, so
+  // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
   // OldPhiNodes is used to track all known PHI nodes, before adding a new
   // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
   PhiWorklist.push_back(PN);
@@ -2242,20 +2247,43 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
     }
   }
 
+  // Traverse all accumulated PHI nodes and process its users,
+  // which are Stores and BitcCasts. Without this processing
+  // NewPHI nodes could be replicated and could lead to extra
+  // moves generated after DeSSA.
   // If there is a store with type B, change it to type A.
-  for (User *U : PN->users()) {
-    auto *SI = dyn_cast<StoreInst>(U);
-    if (SI && SI->isSimple() && SI->getOperand(0) == PN) {
-      Builder.SetInsertPoint(SI);
-      auto *NewBC =
-          cast<BitCastInst>(Builder.CreateBitCast(NewPNodes[PN], SrcTy));
-      SI->setOperand(0, NewBC);
-      Worklist.Add(SI);
-      assert(hasStoreUsersOnly(*NewBC));
+
+
+  // Replace users of BitCast B->A with NewPHI. These will help
+  // later to get rid off a closure formed by OldPHI nodes.
+  Instruction *RetVal = nullptr;
+  for (auto *OldPN : OldPhiNodes) {
+    PHINode *NewPN = NewPNodes[OldPN];
+    for (User *V : OldPN->users()) {
+      if (auto *SI = dyn_cast<StoreInst>(V)) {
+        if (SI->isSimple() && SI->getOperand(0) == OldPN) {
+          Builder.SetInsertPoint(SI);
+          auto *NewBC =
+            cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
+          SI->setOperand(0, NewBC);
+          Worklist.Add(SI);
+          assert(hasStoreUsersOnly(*NewBC));
+        }
+      }
+      else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
+        // Verify it's a B->A cast.
+        Type *TyB = BCI->getOperand(0)->getType();
+        Type *TyA = BCI->getType();
+        if (TyA == DestTy && TyB == SrcTy) {
+          Instruction *I = replaceInstUsesWith(*BCI, NewPN);
+          if (BCI == &CI)
+            RetVal = I;
+        }
+      }
     }
   }
 
-  return replaceInstUsesWith(CI, NewPNodes[PN]);
+  return RetVal;
 }
 
 Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
@@ -2310,7 +2338,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     // If we found a path from the src to dest, create the getelementptr now.
     if (SrcElTy == DstElTy) {
       SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder.getInt32(0));
-      return GetElementPtrInst::CreateInBounds(Src, Idxs);
+      return GetElementPtrInst::CreateInBounds(SrcPTy->getElementType(), Src,
+                                               Idxs);
     }
   }
 
@@ -2355,11 +2384,10 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
       }
 
       // Otherwise, see if our source is an insert. If so, then use the scalar
-      // component directly.
-      if (InsertElementInst *IEI =
-            dyn_cast<InsertElementInst>(CI.getOperand(0)))
-        return CastInst::Create(Instruction::BitCast, IEI->getOperand(1),
-                                DestTy);
+      // component directly:
+      // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
+      if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
+        return new BitCastInst(InsElt->getOperand(1), DestTy);
     }
   }
 
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index b5bbb09935e2..3a4283ae5406 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1,9 +1,8 @@
 //===- InstCombineCompares.cpp --------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -704,7 +703,10 @@ static Value *rewriteGEPAsOffset(Value *Start, Value *Base,
       continue;
 
     if (auto *CI = dyn_cast<CastInst>(Val)) {
-      NewInsts[CI] = NewInsts[CI->getOperand(0)];
+      // Don't get rid of the intermediate variable here; the store can grow
+      // the map which will invalidate the reference to the input value.
+      Value *V = NewInsts[CI->getOperand(0)];
+      NewInsts[CI] = V;
       continue;
     }
     if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
@@ -1292,8 +1294,8 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
   // use the sadd_with_overflow intrinsic to efficiently compute both the
   // result and the overflow bit.
   Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
-  Value *F = Intrinsic::getDeclaration(I.getModule(),
-                                       Intrinsic::sadd_with_overflow, NewType);
+  Function *F = Intrinsic::getDeclaration(
+      I.getModule(), Intrinsic::sadd_with_overflow, NewType);
 
   InstCombiner::BuilderTy &Builder = IC.Builder;
 
@@ -1315,14 +1317,16 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
   return ExtractValueInst::Create(Call, 1, "sadd.overflow");
 }
 
-// Handle (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
+// Handle  icmp pred X, 0
 Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
   CmpInst::Predicate Pred = Cmp.getPredicate();
-  Value *X = Cmp.getOperand(0);
+  if (!match(Cmp.getOperand(1), m_Zero()))
+    return nullptr;
 
-  if (match(Cmp.getOperand(1), m_Zero()) && Pred == ICmpInst::ICMP_SGT) {
+  // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
+  if (Pred == ICmpInst::ICMP_SGT) {
     Value *A, *B;
-    SelectPatternResult SPR = matchSelectPattern(X, A, B);
+    SelectPatternResult SPR = matchSelectPattern(Cmp.getOperand(0), A, B);
     if (SPR.Flavor == SPF_SMIN) {
       if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT))
         return new ICmpInst(Pred, B, Cmp.getOperand(1));
@@ -1330,6 +1334,20 @@ Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
         return new ICmpInst(Pred, A, Cmp.getOperand(1));
     }
   }
+
+  // Given:
+  //   icmp eq/ne (urem %x, %y), 0
+  // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
+  //   icmp eq/ne %x, 0
+  Value *X, *Y;
+  if (match(Cmp.getOperand(0), m_URem(m_Value(X), m_Value(Y))) &&
+      ICmpInst::isEquality(Pred)) {
+    KnownBits XKnown = computeKnownBits(X, 0, &Cmp);
+    KnownBits YKnown = computeKnownBits(Y, 0, &Cmp);
+    if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
+      return new ICmpInst(Pred, X, Cmp.getOperand(1));
+  }
+
   return nullptr;
 }
 
@@ -1624,20 +1642,43 @@ Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And,
 Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp,
                                                  BinaryOperator *And,
                                                  const APInt &C1) {
+  bool isICMP_NE = Cmp.getPredicate() == ICmpInst::ICMP_NE;
+
   // For vectors: icmp ne (and X, 1), 0 --> trunc X to N x i1
   // TODO: We canonicalize to the longer form for scalars because we have
   // better analysis/folds for icmp, and codegen may be better with icmp.
-  if (Cmp.getPredicate() == CmpInst::ICMP_NE && Cmp.getType()->isVectorTy() &&
-      C1.isNullValue() && match(And->getOperand(1), m_One()))
+  if (isICMP_NE && Cmp.getType()->isVectorTy() && C1.isNullValue() &&
+      match(And->getOperand(1), m_One()))
     return new TruncInst(And->getOperand(0), Cmp.getType());
 
   const APInt *C2;
-  if (!match(And->getOperand(1), m_APInt(C2)))
+  Value *X;
+  if (!match(And, m_And(m_Value(X), m_APInt(C2))))
     return nullptr;
 
+  // Don't perform the following transforms if the AND has multiple uses
   if (!And->hasOneUse())
     return nullptr;
 
+  if (Cmp.isEquality() && C1.isNullValue()) {
+    // Restrict this fold to single-use 'and' (PR10267).
+    // Replace (and X, (1 << size(X)-1) != 0) with X s< 0
+    if (C2->isSignMask()) {
+      Constant *Zero = Constant::getNullValue(X->getType());
+      auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
+      return new ICmpInst(NewPred, X, Zero);
+    }
+
+    // Restrict this fold only for single-use 'and' (PR10267).
+    // ((%x & C) == 0) --> %x u< (-C)  iff (-C) is power of two.
+    if ((~(*C2) + 1).isPowerOf2()) {
+      Constant *NegBOC =
+          ConstantExpr::getNeg(cast<Constant>(And->getOperand(1)));
+      auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
+      return new ICmpInst(NewPred, X, NegBOC);
+    }
+  }
+
   // If the LHS is an 'and' of a truncate and we can widen the and/compare to
   // the input width without changing the value produced, eliminate the cast:
   //
@@ -1772,13 +1813,22 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
                           ConstantInt::get(V->getType(), 1));
   }
 
-  // X | C == C --> X <=u C
-  // X | C != C --> X  >u C
-  //   iff C+1 is a power of 2 (C is a bitmask of the low bits)
-  if (Cmp.isEquality() && Cmp.getOperand(1) == Or->getOperand(1) &&
-      (C + 1).isPowerOf2()) {
-    Pred = (Pred == CmpInst::ICMP_EQ) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
-    return new ICmpInst(Pred, Or->getOperand(0), Or->getOperand(1));
+  Value *OrOp0 = Or->getOperand(0), *OrOp1 = Or->getOperand(1);
+  if (Cmp.isEquality() && Cmp.getOperand(1) == OrOp1) {
+    // X | C == C --> X <=u C
+    // X | C != C --> X  >u C
+    //   iff C+1 is a power of 2 (C is a bitmask of the low bits)
+    if ((C + 1).isPowerOf2()) {
+      Pred = (Pred == CmpInst::ICMP_EQ) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
+      return new ICmpInst(Pred, OrOp0, OrOp1);
+    }
+    // More general: are all bits outside of a mask constant set or not set?
+    // X | C == C --> (X & ~C) == 0
+    // X | C != C --> (X & ~C) != 0
+    if (Or->hasOneUse()) {
+      Value *A = Builder.CreateAnd(OrOp0, ~C);
+      return new ICmpInst(Pred, A, ConstantInt::getNullValue(OrOp0->getType()));
+    }
   }
 
   if (!Cmp.isEquality() || !C.isNullValue() || !Or->hasOneUse())
@@ -1799,8 +1849,8 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
   // Are we using xors to bitwise check for a pair of (in)equalities? Convert to
   // a shorter form that has more potential to be folded even further.
   Value *X1, *X2, *X3, *X4;
-  if (match(Or->getOperand(0), m_OneUse(m_Xor(m_Value(X1), m_Value(X2)))) &&
-      match(Or->getOperand(1), m_OneUse(m_Xor(m_Value(X3), m_Value(X4))))) {
+  if (match(OrOp0, m_OneUse(m_Xor(m_Value(X1), m_Value(X2)))) &&
+      match(OrOp1, m_OneUse(m_Xor(m_Value(X3), m_Value(X4))))) {
     // ((X1 ^ X2) || (X3 ^ X4)) == 0 --> (X1 == X2) && (X3 == X4)
     // ((X1 ^ X2) || (X3 ^ X4)) != 0 --> (X1 != X2) || (X3 != X4)
     Value *Cmp12 = Builder.CreateICmp(Pred, X1, X2);
@@ -1994,6 +2044,27 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
                         And, Constant::getNullValue(ShType));
   }
 
+  // Simplify 'shl' inequality test into 'and' equality test.
+  if (Cmp.isUnsigned() && Shl->hasOneUse()) {
+    // (X l<< C2) u<=/u> C1 iff C1+1 is power of two -> X & (~C1 l>> C2) ==/!= 0
+    if ((C + 1).isPowerOf2() &&
+        (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT)) {
+      Value *And = Builder.CreateAnd(X, (~C).lshr(ShiftAmt->getZExtValue()));
+      return new ICmpInst(Pred == ICmpInst::ICMP_ULE ? ICmpInst::ICMP_EQ
+                                                     : ICmpInst::ICMP_NE,
+                          And, Constant::getNullValue(ShType));
+    }
+    // (X l<< C2) u</u>= C1 iff C1 is power of two -> X & (-C1 l>> C2) ==/!= 0
+    if (C.isPowerOf2() &&
+        (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) {
+      Value *And =
+          Builder.CreateAnd(X, (~(C - 1)).lshr(ShiftAmt->getZExtValue()));
+      return new ICmpInst(Pred == ICmpInst::ICMP_ULT ? ICmpInst::ICMP_EQ
+                                                     : ICmpInst::ICMP_NE,
+                          And, Constant::getNullValue(ShType));
+    }
+  }
+
   // Transform (icmp pred iM (shl iM %v, N), C)
   // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (C>>N))
   // Transform the shl to a trunc if (trunc (C>>N)) has no loss and M-N.
@@ -2313,6 +2384,16 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp,
                                                const APInt &C) {
   Value *X = Sub->getOperand(0), *Y = Sub->getOperand(1);
   ICmpInst::Predicate Pred = Cmp.getPredicate();
+  const APInt *C2;
+  APInt SubResult;
+
+  // (icmp P (sub nuw|nsw C2, Y), C) -> (icmp swap(P) Y, C2-C)
+  if (match(X, m_APInt(C2)) &&
+      ((Cmp.isUnsigned() && Sub->hasNoUnsignedWrap()) ||
+       (Cmp.isSigned() && Sub->hasNoSignedWrap())) &&
+      !subWithOverflow(SubResult, *C2, C, Cmp.isSigned()))
+    return new ICmpInst(Cmp.getSwappedPredicate(), Y,
+                        ConstantInt::get(Y->getType(), SubResult));
 
   // The following transforms are only worth it if the only user of the subtract
   // is the icmp.
@@ -2337,7 +2418,6 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp,
       return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
   }
 
-  const APInt *C2;
   if (!match(X, m_APInt(C2)))
     return nullptr;
 
@@ -2482,20 +2562,76 @@ Instruction *InstCombiner::foldICmpSelectConstant(ICmpInst &Cmp,
     // the entire original Cmp can be simplified to a false.
     Value *Cond = Builder.getFalse();
     if (TrueWhenLessThan)
-      Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SLT, OrigLHS, OrigRHS));
+      Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SLT,
+                                                       OrigLHS, OrigRHS));
     if (TrueWhenEqual)
-      Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_EQ, OrigLHS, OrigRHS));
+      Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_EQ,
+                                                       OrigLHS, OrigRHS));
     if (TrueWhenGreaterThan)
-      Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SGT, OrigLHS, OrigRHS));
+      Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SGT,
+                                                       OrigLHS, OrigRHS));
 
     return replaceInstUsesWith(Cmp, Cond);
   }
   return nullptr;
 }
 
-Instruction *InstCombiner::foldICmpBitCastConstant(ICmpInst &Cmp,
-                                                   BitCastInst *Bitcast,
-                                                   const APInt &C) {
+static Instruction *foldICmpBitCast(ICmpInst &Cmp,
+                                    InstCombiner::BuilderTy &Builder) {
+  auto *Bitcast = dyn_cast<BitCastInst>(Cmp.getOperand(0));
+  if (!Bitcast)
+    return nullptr;
+
+  ICmpInst::Predicate Pred = Cmp.getPredicate();
+  Value *Op1 = Cmp.getOperand(1);
+  Value *BCSrcOp = Bitcast->getOperand(0);
+
+  // Make sure the bitcast doesn't change the number of vector elements.
+  if (Bitcast->getSrcTy()->getScalarSizeInBits() ==
+          Bitcast->getDestTy()->getScalarSizeInBits()) {
+    // Zero-equality and sign-bit checks are preserved through sitofp + bitcast.
+    Value *X;
+    if (match(BCSrcOp, m_SIToFP(m_Value(X)))) {
+      // icmp  eq (bitcast (sitofp X)), 0 --> icmp  eq X, 0
+      // icmp  ne (bitcast (sitofp X)), 0 --> icmp  ne X, 0
+      // icmp slt (bitcast (sitofp X)), 0 --> icmp slt X, 0
+      // icmp sgt (bitcast (sitofp X)), 0 --> icmp sgt X, 0
+      if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_SLT ||
+           Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT) &&
+          match(Op1, m_Zero()))
+        return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
+
+      // icmp slt (bitcast (sitofp X)), 1 --> icmp slt X, 1
+      if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_One()))
+        return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), 1));
+
+      // icmp sgt (bitcast (sitofp X)), -1 --> icmp sgt X, -1
+      if (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes()))
+        return new ICmpInst(Pred, X,
+                            ConstantInt::getAllOnesValue(X->getType()));
+    }
+
+    // Zero-equality checks are preserved through unsigned floating-point casts:
+    // icmp eq (bitcast (uitofp X)), 0 --> icmp eq X, 0
+    // icmp ne (bitcast (uitofp X)), 0 --> icmp ne X, 0
+    if (match(BCSrcOp, m_UIToFP(m_Value(X))))
+      if (Cmp.isEquality() && match(Op1, m_Zero()))
+        return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
+  }
+
+  // Test to see if the operands of the icmp are casted versions of other
+  // values. If the ptr->ptr cast can be stripped off both arguments, do so.
+  if (Bitcast->getType()->isPointerTy() &&
+      (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
+    // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
+    // so eliminate it as well.
+    if (auto *BC2 = dyn_cast<BitCastInst>(Op1))
+      Op1 = BC2->getOperand(0);
+
+    Op1 = Builder.CreateBitCast(Op1, BCSrcOp->getType());
+    return new ICmpInst(Pred, BCSrcOp, Op1);
+  }
+
   // Folding: icmp <pred> iN X, C
   //  where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN
   //    and C is a splat of a K-bit pattern
@@ -2503,28 +2639,28 @@ Instruction *InstCombiner::foldICmpBitCastConstant(ICmpInst &Cmp,
   // Into:
   //   %E = extractelement <M x iK> %vec, i32 C'
   //   icmp <pred> iK %E, trunc(C)
-  if (!Bitcast->getType()->isIntegerTy() ||
+  const APInt *C;
+  if (!match(Cmp.getOperand(1), m_APInt(C)) ||
+      !Bitcast->getType()->isIntegerTy() ||
       !Bitcast->getSrcTy()->isIntOrIntVectorTy())
     return nullptr;
 
-  Value *BCIOp = Bitcast->getOperand(0);
-  Value *Vec = nullptr;     // 1st vector arg of the shufflevector
-  Constant *Mask = nullptr; // Mask arg of the shufflevector
-  if (match(BCIOp,
+  Value *Vec;
+  Constant *Mask;
+  if (match(BCSrcOp,
             m_ShuffleVector(m_Value(Vec), m_Undef(), m_Constant(Mask)))) {
     // Check whether every element of Mask is the same constant
     if (auto *Elem = dyn_cast_or_null<ConstantInt>(Mask->getSplatValue())) {
-      auto *VecTy = cast<VectorType>(BCIOp->getType());
+      auto *VecTy = cast<VectorType>(BCSrcOp->getType());
       auto *EltTy = cast<IntegerType>(VecTy->getElementType());
-      auto Pred = Cmp.getPredicate();
-      if (C.isSplat(EltTy->getBitWidth())) {
+      if (C->isSplat(EltTy->getBitWidth())) {
         // Fold the icmp based on the value of C
         // If C is M copies of an iK sized bit pattern,
         // then:
         //   =>  %E = extractelement <N x iK> %vec, i32 Elem
         //       icmp <pred> iK %SplatVal, <pattern>
         Value *Extract = Builder.CreateExtractElement(Vec, Elem);
-        Value *NewC = ConstantInt::get(EltTy, C.trunc(EltTy->getBitWidth()));
+        Value *NewC = ConstantInt::get(EltTy, C->trunc(EltTy->getBitWidth()));
         return new ICmpInst(Pred, Extract, NewC);
       }
     }
@@ -2606,13 +2742,9 @@ Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) {
       return I;
   }
 
-  if (auto *BCI = dyn_cast<BitCastInst>(Cmp.getOperand(0))) {
-    if (Instruction *I = foldICmpBitCastConstant(Cmp, BCI, *C))
+  if (auto *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0)))
+    if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, II, *C))
       return I;
-  }
-
-  if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, *C))
-    return I;
 
   return nullptr;
 }
@@ -2711,24 +2843,6 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
       if (C == *BOC && C.isPowerOf2())
         return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
                             BO, Constant::getNullValue(RHS->getType()));
-
-      // Don't perform the following transforms if the AND has multiple uses
-      if (!BO->hasOneUse())
-        break;
-
-      // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
-      if (BOC->isSignMask()) {
-        Constant *Zero = Constant::getNullValue(BOp0->getType());
-        auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
-        return new ICmpInst(NewPred, BOp0, Zero);
-      }
-
-      // ((X & ~7) == 0) --> X < 8
-      if (C.isNullValue() && (~(*BOC) + 1).isPowerOf2()) {
-        Constant *NegBOC = ConstantExpr::getNeg(cast<Constant>(BOp1));
-        auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
-        return new ICmpInst(NewPred, BOp0, NegBOC);
-      }
     }
     break;
   }
@@ -2756,14 +2870,10 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
   return nullptr;
 }
 
-/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
-Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
-                                                         const APInt &C) {
-  IntrinsicInst *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0));
-  if (!II || !Cmp.isEquality())
-    return nullptr;
-
-  // Handle icmp {eq|ne} <intrinsic>, Constant.
+/// Fold an equality icmp with LLVM intrinsic and constant operand.
+Instruction *InstCombiner::foldICmpEqIntrinsicWithConstant(ICmpInst &Cmp,
+                                                           IntrinsicInst *II,
+                                                           const APInt &C) {
   Type *Ty = II->getType();
   unsigned BitWidth = C.getBitWidth();
   switch (II->getIntrinsicID()) {
@@ -2823,6 +2933,65 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
   return nullptr;
 }
 
+/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
+Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
+                                                         IntrinsicInst *II,
+                                                         const APInt &C) {
+  if (Cmp.isEquality())
+    return foldICmpEqIntrinsicWithConstant(Cmp, II, C);
+
+  Type *Ty = II->getType();
+  unsigned BitWidth = C.getBitWidth();
+  switch (II->getIntrinsicID()) {
+  case Intrinsic::ctlz: {
+    // ctlz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX < 0b00010000
+    if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
+      unsigned Num = C.getLimitedValue();
+      APInt Limit = APInt::getOneBitSet(BitWidth, BitWidth - Num - 1);
+      return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULT,
+                             II->getArgOperand(0), ConstantInt::get(Ty, Limit));
+    }
+
+    // ctlz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX > 0b00011111
+    if (Cmp.getPredicate() == ICmpInst::ICMP_ULT &&
+        C.uge(1) && C.ule(BitWidth)) {
+      unsigned Num = C.getLimitedValue();
+      APInt Limit = APInt::getLowBitsSet(BitWidth, BitWidth - Num);
+      return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_UGT,
+                             II->getArgOperand(0), ConstantInt::get(Ty, Limit));
+    }
+    break;
+  }
+  case Intrinsic::cttz: {
+    // Limit to one use to ensure we don't increase instruction count.
+    if (!II->hasOneUse())
+      return nullptr;
+
+    // cttz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX & 0b00001111 == 0
+    if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
+      APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue() + 1);
+      return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ,
+                             Builder.CreateAnd(II->getArgOperand(0), Mask),
+                             ConstantInt::getNullValue(Ty));
+    }
+
+    // cttz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX & 0b00000111 != 0
+    if (Cmp.getPredicate() == ICmpInst::ICMP_ULT &&
+        C.uge(1) && C.ule(BitWidth)) {
+      APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue());
+      return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE,
+                             Builder.CreateAnd(II->getArgOperand(0), Mask),
+                             ConstantInt::getNullValue(Ty));
+    }
+    break;
+  }
+  default:
+    break;
+  }
+
+  return nullptr;
+}
+
 /// Handle icmp with constant (but not simple integer constant) RHS.
 Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -2983,6 +3152,10 @@ static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
     //  x s> x & (-1 >> y)    ->    x s> (-1 >> y)
     if (X != I.getOperand(0)) // X must be on LHS of comparison!
       return nullptr;         // Ignore the other case.
+    if (!match(M, m_Constant())) // Can not do this fold with non-constant.
+      return nullptr;
+    if (!match(M, m_NonNegative())) // Must not have any -1 vector elements.
+      return nullptr;
     DstPred = ICmpInst::Predicate::ICMP_SGT;
     break;
   case ICmpInst::Predicate::ICMP_SGE:
@@ -3009,6 +3182,10 @@ static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
     //  x s<= x & (-1 >> y)    ->    x s<= (-1 >> y)
     if (X != I.getOperand(0)) // X must be on LHS of comparison!
       return nullptr;         // Ignore the other case.
+    if (!match(M, m_Constant())) // Can not do this fold with non-constant.
+      return nullptr;
+    if (!match(M, m_NonNegative())) // Must not have any -1 vector elements.
+      return nullptr;
     DstPred = ICmpInst::Predicate::ICMP_SLE;
     break;
   default:
@@ -3093,6 +3270,64 @@ foldICmpWithTruncSignExtendedVal(ICmpInst &I,
   return T1;
 }
 
+// Given pattern:
+//   icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0
+// we should move shifts to the same hand of 'and', i.e. rewrite as
+//   icmp eq/ne (and (x shift (Q+K)), y), 0  iff (Q+K) u< bitwidth(x)
+// We are only interested in opposite logical shifts here.
+// If we can, we want to end up creating 'lshr' shift.
+static Value *
+foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
+                                           InstCombiner::BuilderTy &Builder) {
+  if (!I.isEquality() || !match(I.getOperand(1), m_Zero()) ||
+      !I.getOperand(0)->hasOneUse())
+    return nullptr;
+
+  auto m_AnyLogicalShift = m_LogicalShift(m_Value(), m_Value());
+  auto m_AnyLShr = m_LShr(m_Value(), m_Value());
+
+  // Look for an 'and' of two (opposite) logical shifts.
+  // Pick the single-use shift as XShift.
+  Value *XShift, *YShift;
+  if (!match(I.getOperand(0),
+             m_c_And(m_OneUse(m_CombineAnd(m_AnyLogicalShift, m_Value(XShift))),
+                     m_CombineAnd(m_AnyLogicalShift, m_Value(YShift)))))
+    return nullptr;
+
+  // If YShift is a single-use 'lshr', swap the shifts around.
+  if (match(YShift, m_OneUse(m_AnyLShr)))
+    std::swap(XShift, YShift);
+
+  // The shifts must be in opposite directions.
+  Instruction::BinaryOps XShiftOpcode =
+      cast<BinaryOperator>(XShift)->getOpcode();
+  if (XShiftOpcode == cast<BinaryOperator>(YShift)->getOpcode())
+    return nullptr; // Do not care about same-direction shifts here.
+
+  Value *X, *XShAmt, *Y, *YShAmt;
+  match(XShift, m_BinOp(m_Value(X), m_Value(XShAmt)));
+  match(YShift, m_BinOp(m_Value(Y), m_Value(YShAmt)));
+
+  // Can we fold (XShAmt+YShAmt) ?
+  Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, XShAmt, YShAmt,
+                                  SQ.getWithInstruction(&I));
+  if (!NewShAmt)
+    return nullptr;
+  // Is the new shift amount smaller than the bit width?
+  // FIXME: could also rely on ConstantRange.
+  unsigned BitWidth = X->getType()->getScalarSizeInBits();
+  if (!match(NewShAmt, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT,
+                                          APInt(BitWidth, BitWidth))))
+    return nullptr;
+  // All good, we can do this fold. The shift is the same that was for X.
+  Value *T0 = XShiftOpcode == Instruction::BinaryOps::LShr
+                  ? Builder.CreateLShr(X, NewShAmt)
+                  : Builder.CreateShl(X, NewShAmt);
+  Value *T1 = Builder.CreateAnd(T0, Y);
+  return Builder.CreateICmp(I.getPredicate(), T1,
+                            Constant::getNullValue(X->getType()));
+}
+
 /// Try to fold icmp (binop), X or icmp X, (binop).
 /// TODO: A large part of this logic is duplicated in InstSimplify's
 /// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
@@ -3448,6 +3683,9 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
   if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder))
     return replaceInstUsesWith(I, V);
 
+  if (Value *V = foldShiftIntoShiftInAnotherHandOfAndInICmp(I, SQ, Builder))
+    return replaceInstUsesWith(I, V);
+
   return nullptr;
 }
 
@@ -3688,6 +3926,30 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
        match(Op1, m_BitReverse(m_Value(B)))))
     return new ICmpInst(Pred, A, B);
 
+  // Canonicalize checking for a power-of-2-or-zero value:
+  // (A & (A-1)) == 0 --> ctpop(A) < 2 (two commuted variants)
+  // ((A-1) & A) != 0 --> ctpop(A) > 1 (two commuted variants)
+  if (!match(Op0, m_OneUse(m_c_And(m_Add(m_Value(A), m_AllOnes()),
+                                   m_Deferred(A)))) ||
+      !match(Op1, m_ZeroInt()))
+    A = nullptr;
+
+  // (A & -A) == A --> ctpop(A) < 2 (four commuted variants)
+  // (-A & A) != A --> ctpop(A) > 1 (four commuted variants)
+  if (match(Op0, m_OneUse(m_c_And(m_Neg(m_Specific(Op1)), m_Specific(Op1)))))
+    A = Op1;
+  else if (match(Op1,
+                 m_OneUse(m_c_And(m_Neg(m_Specific(Op0)), m_Specific(Op0)))))
+    A = Op0;
+
+  if (A) {
+    Type *Ty = A->getType();
+    CallInst *CtPop = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, A);
+    return Pred == ICmpInst::ICMP_EQ
+        ? new ICmpInst(ICmpInst::ICMP_ULT, CtPop, ConstantInt::get(Ty, 2))
+        : new ICmpInst(ICmpInst::ICMP_UGT, CtPop, ConstantInt::get(Ty, 1));
+  }
+
   return nullptr;
 }
 
@@ -3698,7 +3960,6 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
   Value *LHSCIOp        = LHSCI->getOperand(0);
   Type *SrcTy     = LHSCIOp->getType();
   Type *DestTy    = LHSCI->getType();
-  Value *RHSCIOp;
 
   // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
   // integer type is the same size as the pointer type.
@@ -3740,7 +4001,7 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
 
   if (auto *CI = dyn_cast<CastInst>(ICmp.getOperand(1))) {
     // Not an extension from the same type?
-    RHSCIOp = CI->getOperand(0);
+    Value *RHSCIOp = CI->getOperand(0);
     if (RHSCIOp->getType() != LHSCIOp->getType())
       return nullptr;
 
@@ -3813,104 +4074,83 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
   return BinaryOperator::CreateNot(Result);
 }
 
-bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
-                                         Value *RHS, Instruction &OrigI,
-                                         Value *&Result, Constant *&Overflow) {
+static bool isNeutralValue(Instruction::BinaryOps BinaryOp, Value *RHS) {
+  switch (BinaryOp) {
+    default:
+      llvm_unreachable("Unsupported binary op");
+    case Instruction::Add:
+    case Instruction::Sub:
+      return match(RHS, m_Zero());
+    case Instruction::Mul:
+      return match(RHS, m_One());
+  }
+}
+
+OverflowResult InstCombiner::computeOverflow(
+    Instruction::BinaryOps BinaryOp, bool IsSigned,
+    Value *LHS, Value *RHS, Instruction *CxtI) const {
+  switch (BinaryOp) {
+    default:
+      llvm_unreachable("Unsupported binary op");
+    case Instruction::Add:
+      if (IsSigned)
+        return computeOverflowForSignedAdd(LHS, RHS, CxtI);
+      else
+        return computeOverflowForUnsignedAdd(LHS, RHS, CxtI);
+    case Instruction::Sub:
+      if (IsSigned)
+        return computeOverflowForSignedSub(LHS, RHS, CxtI);
+      else
+        return computeOverflowForUnsignedSub(LHS, RHS, CxtI);
+    case Instruction::Mul:
+      if (IsSigned)
+        return computeOverflowForSignedMul(LHS, RHS, CxtI);
+      else
+        return computeOverflowForUnsignedMul(LHS, RHS, CxtI);
+  }
+}
+
+bool InstCombiner::OptimizeOverflowCheck(
+    Instruction::BinaryOps BinaryOp, bool IsSigned, Value *LHS, Value *RHS,
+    Instruction &OrigI, Value *&Result, Constant *&Overflow) {
   if (OrigI.isCommutative() && isa<Constant>(LHS) && !isa<Constant>(RHS))
     std::swap(LHS, RHS);
 
-  auto SetResult = [&](Value *OpResult, Constant *OverflowVal, bool ReuseName) {
-    Result = OpResult;
-    Overflow = OverflowVal;
-    if (ReuseName)
-      Result->takeName(&OrigI);
-    return true;
-  };
-
   // If the overflow check was an add followed by a compare, the insertion point
   // may be pointing to the compare.  We want to insert the new instructions
   // before the add in case there are uses of the add between the add and the
   // compare.
   Builder.SetInsertPoint(&OrigI);
 
-  switch (OCF) {
-  case OCF_INVALID:
-    llvm_unreachable("bad overflow check kind!");
-
-  case OCF_UNSIGNED_ADD: {
-    OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI);
-    if (OR == OverflowResult::NeverOverflows)
-      return SetResult(Builder.CreateNUWAdd(LHS, RHS), Builder.getFalse(),
-                       true);
-
-    if (OR == OverflowResult::AlwaysOverflows)
-      return SetResult(Builder.CreateAdd(LHS, RHS), Builder.getTrue(), true);
-
-    // Fall through uadd into sadd
-    LLVM_FALLTHROUGH;
-  }
-  case OCF_SIGNED_ADD: {
-    // X + 0 -> {X, false}
-    if (match(RHS, m_Zero()))
-      return SetResult(LHS, Builder.getFalse(), false);
-
-    // We can strength reduce this signed add into a regular add if we can prove
-    // that it will never overflow.
-    if (OCF == OCF_SIGNED_ADD)
-      if (willNotOverflowSignedAdd(LHS, RHS, OrigI))
-        return SetResult(Builder.CreateNSWAdd(LHS, RHS), Builder.getFalse(),
-                         true);
-    break;
-  }
-
-  case OCF_UNSIGNED_SUB:
-  case OCF_SIGNED_SUB: {
-    // X - 0 -> {X, false}
-    if (match(RHS, m_Zero()))
-      return SetResult(LHS, Builder.getFalse(), false);
-
-    if (OCF == OCF_SIGNED_SUB) {
-      if (willNotOverflowSignedSub(LHS, RHS, OrigI))
-        return SetResult(Builder.CreateNSWSub(LHS, RHS), Builder.getFalse(),
-                         true);
-    } else {
-      if (willNotOverflowUnsignedSub(LHS, RHS, OrigI))
-        return SetResult(Builder.CreateNUWSub(LHS, RHS), Builder.getFalse(),
-                         true);
-    }
-    break;
-  }
-
-  case OCF_UNSIGNED_MUL: {
-    OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI);
-    if (OR == OverflowResult::NeverOverflows)
-      return SetResult(Builder.CreateNUWMul(LHS, RHS), Builder.getFalse(),
-                       true);
-    if (OR == OverflowResult::AlwaysOverflows)
-      return SetResult(Builder.CreateMul(LHS, RHS), Builder.getTrue(), true);
-    LLVM_FALLTHROUGH;
+  if (isNeutralValue(BinaryOp, RHS)) {
+    Result = LHS;
+    Overflow = Builder.getFalse();
+    return true;
   }
-  case OCF_SIGNED_MUL:
-    // X * undef -> undef
-    if (isa<UndefValue>(RHS))
-      return SetResult(RHS, UndefValue::get(Builder.getInt1Ty()), false);
-
-    // X * 0 -> {0, false}
-    if (match(RHS, m_Zero()))
-      return SetResult(RHS, Builder.getFalse(), false);
-
-    // X * 1 -> {X, false}
-    if (match(RHS, m_One()))
-      return SetResult(LHS, Builder.getFalse(), false);
 
-    if (OCF == OCF_SIGNED_MUL)
-      if (willNotOverflowSignedMul(LHS, RHS, OrigI))
-        return SetResult(Builder.CreateNSWMul(LHS, RHS), Builder.getFalse(),
-                         true);
-    break;
+  switch (computeOverflow(BinaryOp, IsSigned, LHS, RHS, &OrigI)) {
+    case OverflowResult::MayOverflow:
+      return false;
+    case OverflowResult::AlwaysOverflowsLow:
+    case OverflowResult::AlwaysOverflowsHigh:
+      Result = Builder.CreateBinOp(BinaryOp, LHS, RHS);
+      Result->takeName(&OrigI);
+      Overflow = Builder.getTrue();
+      return true;
+    case OverflowResult::NeverOverflows:
+      Result = Builder.CreateBinOp(BinaryOp, LHS, RHS);
+      Result->takeName(&OrigI);
+      Overflow = Builder.getFalse();
+      if (auto *Inst = dyn_cast<Instruction>(Result)) {
+        if (IsSigned)
+          Inst->setHasNoSignedWrap();
+        else
+          Inst->setHasNoUnsignedWrap();
+      }
+      return true;
   }
 
-  return false;
+  llvm_unreachable("Unexpected overflow result");
 }
 
 /// Recognize and process idiom involving test for multiplication
@@ -4084,8 +4324,8 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
     MulA = Builder.CreateZExt(A, MulType);
   if (WidthB < MulWidth)
     MulB = Builder.CreateZExt(B, MulType);
-  Value *F = Intrinsic::getDeclaration(I.getModule(),
-                                       Intrinsic::umul_with_overflow, MulType);
+  Function *F = Intrinsic::getDeclaration(
+      I.getModule(), Intrinsic::umul_with_overflow, MulType);
   CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "umul");
   IC.Worklist.Add(MulInstr);
 
@@ -4881,61 +5121,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         return New;
   }
 
-  // Zero-equality and sign-bit checks are preserved through sitofp + bitcast.
-  Value *X;
-  if (match(Op0, m_BitCast(m_SIToFP(m_Value(X))))) {
-    // icmp  eq (bitcast (sitofp X)), 0 --> icmp  eq X, 0
-    // icmp  ne (bitcast (sitofp X)), 0 --> icmp  ne X, 0
-    // icmp slt (bitcast (sitofp X)), 0 --> icmp slt X, 0
-    // icmp sgt (bitcast (sitofp X)), 0 --> icmp sgt X, 0
-    if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_SLT ||
-         Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT) &&
-        match(Op1, m_Zero()))
-      return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
-
-    // icmp slt (bitcast (sitofp X)), 1 --> icmp slt X, 1
-    if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_One()))
-      return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), 1));
-
-    // icmp sgt (bitcast (sitofp X)), -1 --> icmp sgt X, -1
-    if (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes()))
-      return new ICmpInst(Pred, X, ConstantInt::getAllOnesValue(X->getType()));
-  }
-
-  // Zero-equality checks are preserved through unsigned floating-point casts:
-  // icmp eq (bitcast (uitofp X)), 0 --> icmp eq X, 0
-  // icmp ne (bitcast (uitofp X)), 0 --> icmp ne X, 0
-  if (match(Op0, m_BitCast(m_UIToFP(m_Value(X)))))
-    if (I.isEquality() && match(Op1, m_Zero()))
-      return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
-
-  // Test to see if the operands of the icmp are casted versions of other
-  // values.  If the ptr->ptr cast can be stripped off both arguments, we do so
-  // now.
-  if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
-    if (Op0->getType()->isPointerTy() &&
-        (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
-      // We keep moving the cast from the left operand over to the right
-      // operand, where it can often be eliminated completely.
-      Op0 = CI->getOperand(0);
-
-      // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
-      // so eliminate it as well.
-      if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1))
-        Op1 = CI2->getOperand(0);
-
-      // If Op1 is a constant, we can fold the cast into the constant.
-      if (Op0->getType() != Op1->getType()) {
-        if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
-          Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
-        } else {
-          // Otherwise, cast the RHS right before the icmp
-          Op1 = Builder.CreateBitCast(Op1, Op0->getType());
-        }
-      }
-      return new ICmpInst(I.getPredicate(), Op0, Op1);
-    }
-  }
+  if (Instruction *Res = foldICmpBitCast(I, Builder))
+    return Res;
 
   if (isa<CastInst>(Op0)) {
     // Handle the special case of: icmp (cast bool to X), <cst>
@@ -4984,8 +5171,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         isa<IntegerType>(A->getType())) {
       Value *Result;
       Constant *Overflow;
-      if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result,
-                                Overflow)) {
+      if (OptimizeOverflowCheck(Instruction::Add, /*Signed*/false, A, B,
+                                *AddI, Result, Overflow)) {
         replaceInstUsesWith(*AddI, Result);
         return replaceInstUsesWith(I, Overflow);
       }
@@ -5411,6 +5598,8 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
     return replaceInstUsesWith(I, V);
 
   // Simplify 'fcmp pred X, X'
+  Type *OpType = Op0->getType();
+  assert(OpType == Op1->getType() && "fcmp with different-typed operands?");
   if (Op0 == Op1) {
     switch (Pred) {
       default: break;
@@ -5420,7 +5609,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
     case FCmpInst::FCMP_UNE:    // True if unordered or not equal
       // Canonicalize these to be 'fcmp uno %X, 0.0'.
       I.setPredicate(FCmpInst::FCMP_UNO);
-      I.setOperand(1, Constant::getNullValue(Op0->getType()));
+      I.setOperand(1, Constant::getNullValue(OpType));
       return &I;
 
     case FCmpInst::FCMP_ORD:    // True if ordered (no nans)
@@ -5429,7 +5618,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
     case FCmpInst::FCMP_OLE:    // True if ordered and less than or equal
       // Canonicalize these to be 'fcmp ord %X, 0.0'.
       I.setPredicate(FCmpInst::FCMP_ORD);
-      I.setOperand(1, Constant::getNullValue(Op0->getType()));
+      I.setOperand(1, Constant::getNullValue(OpType));
       return &I;
     }
   }
@@ -5438,15 +5627,20 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
   // then canonicalize the operand to 0.0.
   if (Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) {
     if (!match(Op0, m_PosZeroFP()) && isKnownNeverNaN(Op0, &TLI)) {
-      I.setOperand(0, ConstantFP::getNullValue(Op0->getType()));
+      I.setOperand(0, ConstantFP::getNullValue(OpType));
       return &I;
     }
     if (!match(Op1, m_PosZeroFP()) && isKnownNeverNaN(Op1, &TLI)) {
-      I.setOperand(1, ConstantFP::getNullValue(Op0->getType()));
+      I.setOperand(1, ConstantFP::getNullValue(OpType));
       return &I;
     }
   }
 
+  // fcmp pred (fneg X), (fneg Y) -> fcmp swap(pred) X, Y
+  Value *X, *Y;
+  if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
+    return new FCmpInst(I.getSwappedPredicate(), X, Y, "", &I);
+
   // Test if the FCmpInst instruction is used exclusively by a select as
   // part of a minimum or maximum operation. If so, refrain from doing
   // any other folding. This helps out other analyses which understand
@@ -5465,7 +5659,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
   // The sign of 0.0 is ignored by fcmp, so canonicalize to +0.0:
   // fcmp Pred X, -0.0 --> fcmp Pred X, 0.0
   if (match(Op1, m_AnyZeroFP()) && !match(Op1, m_PosZeroFP())) {
-    I.setOperand(1, ConstantFP::getNullValue(Op1->getType()));
+    I.setOperand(1, ConstantFP::getNullValue(OpType));
     return &I;
   }
 
@@ -5505,12 +5699,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
   if (Instruction *R = foldFabsWithFcmpZero(I))
     return R;
 
-  Value *X, *Y;
   if (match(Op0, m_FNeg(m_Value(X)))) {
-    // fcmp pred (fneg X), (fneg Y) -> fcmp swap(pred) X, Y
-    if (match(Op1, m_FNeg(m_Value(Y))))
-      return new FCmpInst(I.getSwappedPredicate(), X, Y, "", &I);
-
     // fcmp pred (fneg X), C --> fcmp swap(pred) X, -C
     Constant *C;
     if (match(Op1, m_Constant(C))) {
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 2de41bd5bef5..434b0d591215 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -1,9 +1,8 @@
 //===- InstCombineInternal.h - InstCombine pass internals -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -53,13 +52,14 @@ namespace llvm {
 
 class APInt;
 class AssumptionCache;
-class CallSite;
+class BlockFrequencyInfo;
 class DataLayout;
 class DominatorTree;
 class GEPOperator;
 class GlobalVariable;
 class LoopInfo;
 class OptimizationRemarkEmitter;
+class ProfileSummaryInfo;
 class TargetLibraryInfo;
 class User;
 
@@ -185,40 +185,6 @@ static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {
   return false;
 }
 
-/// Specific patterns of overflow check idioms that we match.
-enum OverflowCheckFlavor {
-  OCF_UNSIGNED_ADD,
-  OCF_SIGNED_ADD,
-  OCF_UNSIGNED_SUB,
-  OCF_SIGNED_SUB,
-  OCF_UNSIGNED_MUL,
-  OCF_SIGNED_MUL,
-
-  OCF_INVALID
-};
-
-/// Returns the OverflowCheckFlavor corresponding to a overflow_with_op
-/// intrinsic.
-static inline OverflowCheckFlavor
-IntrinsicIDToOverflowCheckFlavor(unsigned ID) {
-  switch (ID) {
-  default:
-    return OCF_INVALID;
-  case Intrinsic::uadd_with_overflow:
-    return OCF_UNSIGNED_ADD;
-  case Intrinsic::sadd_with_overflow:
-    return OCF_SIGNED_ADD;
-  case Intrinsic::usub_with_overflow:
-    return OCF_UNSIGNED_SUB;
-  case Intrinsic::ssub_with_overflow:
-    return OCF_SIGNED_SUB;
-  case Intrinsic::umul_with_overflow:
-    return OCF_UNSIGNED_MUL;
-  case Intrinsic::smul_with_overflow:
-    return OCF_SIGNED_MUL;
-  }
-}
-
 /// Some binary operators require special handling to avoid poison and undefined
 /// behavior. If a constant vector has undef elements, replace those undefs with
 /// identity constants if possible because those are always safe to execute.
@@ -306,6 +272,8 @@ private:
   const DataLayout &DL;
   const SimplifyQuery SQ;
   OptimizationRemarkEmitter &ORE;
+  BlockFrequencyInfo *BFI;
+  ProfileSummaryInfo *PSI;
 
   // Optional analyses. When non-null, these can both be used to do better
   // combining and will be updated to reflect any changes.
@@ -317,11 +285,11 @@ public:
   InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder,
                bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA,
                AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
-               OptimizationRemarkEmitter &ORE, const DataLayout &DL,
-               LoopInfo *LI)
+               OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
+               ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI)
       : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
         ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT),
-        DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), LI(LI) {}
+        DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {}
 
   /// Run the combiner over the entire worklist until it is empty.
   ///
@@ -345,6 +313,7 @@ public:
   //     I          - Change was made, I is still valid, I may be dead though
   //   otherwise    - Change was made, replace I with returned instruction
   //
+  Instruction *visitFNeg(UnaryOperator &I);
   Instruction *visitAdd(BinaryOperator &I);
   Instruction *visitFAdd(BinaryOperator &I);
   Value *OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty);
@@ -394,6 +363,7 @@ public:
   Instruction *visitSelectInst(SelectInst &SI);
   Instruction *visitCallInst(CallInst &CI);
   Instruction *visitInvokeInst(InvokeInst &II);
+  Instruction *visitCallBrInst(CallBrInst &CBI);
 
   Instruction *SliceUpIllegalIntegerPHI(PHINode &PN);
   Instruction *visitPHINode(PHINode &PN);
@@ -403,6 +373,7 @@ public:
   Instruction *visitFree(CallInst &FI);
   Instruction *visitLoadInst(LoadInst &LI);
   Instruction *visitStoreInst(StoreInst &SI);
+  Instruction *visitAtomicRMWInst(AtomicRMWInst &SI);
   Instruction *visitBranchInst(BranchInst &BI);
   Instruction *visitFenceInst(FenceInst &FI);
   Instruction *visitSwitchInst(SwitchInst &SI);
@@ -464,16 +435,22 @@ private:
   /// operation in OperationResult and result of the overflow check in
   /// OverflowResult, and return true.  If no simplification is possible,
   /// returns false.
-  bool OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, Value *RHS,
+  bool OptimizeOverflowCheck(Instruction::BinaryOps BinaryOp, bool IsSigned,
+                             Value *LHS, Value *RHS,
                              Instruction &CtxI, Value *&OperationResult,
                              Constant *&OverflowResult);
 
-  Instruction *visitCallSite(CallSite CS);
+  Instruction *visitCallBase(CallBase &Call);
   Instruction *tryOptimizeCall(CallInst *CI);
-  bool transformConstExprCastCall(CallSite CS);
-  Instruction *transformCallThroughTrampoline(CallSite CS,
-                                              IntrinsicInst *Tramp);
-
+  bool transformConstExprCastCall(CallBase &Call);
+  Instruction *transformCallThroughTrampoline(CallBase &Call,
+                                              IntrinsicInst &Tramp);
+
+  Value *simplifyMaskedLoad(IntrinsicInst &II);
+  Instruction *simplifyMaskedStore(IntrinsicInst &II);
+  Instruction *simplifyMaskedGather(IntrinsicInst &II);
+  Instruction *simplifyMaskedScatter(IntrinsicInst &II);
+  
   /// Transform (zext icmp) to bitwise / integer operations in order to
   /// eliminate it.
   ///
@@ -592,6 +569,8 @@ private:
   Value *matchSelectFromAndOr(Value *A, Value *B, Value *C, Value *D);
   Value *getSelectCondition(Value *A, Value *B);
 
+  Instruction *foldIntrinsicWithOverflowCommon(IntrinsicInst *II);
+
 public:
   /// Inserts an instruction \p New before instruction \p Old
   ///
@@ -647,6 +626,16 @@ public:
     return InsertValueInst::Create(Struct, Result, 0);
   }
 
+  /// Create and insert the idiom we use to indicate a block is unreachable
+  /// without having to rewrite the CFG from within InstCombine.
+  void CreateNonTerminatorUnreachable(Instruction *InsertAt) {
+    auto &Ctx = InsertAt->getContext();
+    new StoreInst(ConstantInt::getTrue(Ctx),
+                  UndefValue::get(Type::getInt1PtrTy(Ctx)),
+                  InsertAt);
+  }
+
+
   /// Combiner aware instruction erasure.
   ///
   /// When dealing with an instruction that has side effects or produces a void
@@ -703,7 +692,7 @@ public:
   }
 
   OverflowResult computeOverflowForSignedMul(const Value *LHS,
-	                                         const Value *RHS,
+                                             const Value *RHS,
                                              const Instruction *CxtI) const {
     return llvm::computeOverflowForSignedMul(LHS, RHS, DL, &AC, CxtI, &DT);
   }
@@ -731,6 +720,10 @@ public:
     return llvm::computeOverflowForSignedSub(LHS, RHS, DL, &AC, CxtI, &DT);
   }
 
+  OverflowResult computeOverflow(
+      Instruction::BinaryOps BinaryOp, bool IsSigned,
+      Value *LHS, Value *RHS, Instruction *CxtI) const;
+
   /// Maximum size of array considered when transforming.
   uint64_t MaxArraySizeForCombine;
 
@@ -802,8 +795,7 @@ private:
 
   Value *simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
                                                APInt DemandedElts,
-                                               int DmaskIdx = -1,
-                                               int TFCIdx = -1);
+                                               int DmaskIdx = -1);
 
   Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
                                     APInt &UndefElts, unsigned Depth = 0);
@@ -868,8 +860,6 @@ private:
 
   Instruction *foldICmpSelectConstant(ICmpInst &Cmp, SelectInst *Select,
                                       ConstantInt *C);
-  Instruction *foldICmpBitCastConstant(ICmpInst &Cmp, BitCastInst *Bitcast,
-                                       const APInt &C);
   Instruction *foldICmpTruncConstant(ICmpInst &Cmp, TruncInst *Trunc,
                                      const APInt &C);
   Instruction *foldICmpAndConstant(ICmpInst &Cmp, BinaryOperator *And,
@@ -904,7 +894,10 @@ private:
   Instruction *foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
                                                  BinaryOperator *BO,
                                                  const APInt &C);
-  Instruction *foldICmpIntrinsicWithConstant(ICmpInst &ICI, const APInt &C);
+  Instruction *foldICmpIntrinsicWithConstant(ICmpInst &ICI, IntrinsicInst *II,
+                                             const APInt &C);
+  Instruction *foldICmpEqIntrinsicWithConstant(ICmpInst &ICI, IntrinsicInst *II,
+                                               const APInt &C);
 
   // Helpers of visitSelectInst().
   Instruction *foldSelectExtConst(SelectInst &Sel);
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 76ab614090fa..054fb7da09a2 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1,9 +1,8 @@
 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -89,29 +88,29 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
         continue;
       }
 
-      if (auto CS = CallSite(I)) {
+      if (auto *Call = dyn_cast<CallBase>(I)) {
         // If this is the function being called then we treat it like a load and
         // ignore it.
-        if (CS.isCallee(&U))
+        if (Call->isCallee(&U))
           continue;
 
-        unsigned DataOpNo = CS.getDataOperandNo(&U);
-        bool IsArgOperand = CS.isArgOperand(&U);
+        unsigned DataOpNo = Call->getDataOperandNo(&U);
+        bool IsArgOperand = Call->isArgOperand(&U);
 
         // Inalloca arguments are clobbered by the call.
-        if (IsArgOperand && CS.isInAllocaArgument(DataOpNo))
+        if (IsArgOperand && Call->isInAllocaArgument(DataOpNo))
           return false;
 
         // If this is a readonly/readnone call site, then we know it is just a
         // load (but one that potentially returns the value itself), so we can
         // ignore it if we know that the value isn't captured.
-        if (CS.onlyReadsMemory() &&
-            (CS.getInstruction()->use_empty() || CS.doesNotCapture(DataOpNo)))
+        if (Call->onlyReadsMemory() &&
+            (Call->use_empty() || Call->doesNotCapture(DataOpNo)))
           continue;
 
         // If this is being passed as a byval argument, the caller is making a
         // copy, so it is only a read of the alloca.
-        if (IsArgOperand && CS.isByValArgument(DataOpNo))
+        if (IsArgOperand && Call->isByValArgument(DataOpNo))
           continue;
       }
 
@@ -213,8 +212,8 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
       Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
       Value *NullIdx = Constant::getNullValue(IdxTy);
       Value *Idx[2] = {NullIdx, NullIdx};
-      Instruction *GEP =
-          GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
+      Instruction *GEP = GetElementPtrInst::CreateInBounds(
+          NewTy, New, Idx, New->getName() + ".sub");
       IC.InsertNewInstBefore(GEP, *It);
 
       // Now make everything use the getelementptr instead of the original
@@ -299,7 +298,7 @@ void PointerReplacer::replace(Instruction *I) {
   if (auto *LT = dyn_cast<LoadInst>(I)) {
     auto *V = getReplacement(LT->getPointerOperand());
     assert(V && "Operand not replaced");
-    auto *NewI = new LoadInst(V);
+    auto *NewI = new LoadInst(I->getType(), V);
     NewI->takeName(LT);
     IC.InsertNewInstWith(NewI, *LT);
     IC.replaceInstUsesWith(*LT, NewI);
@@ -466,7 +465,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
     NewPtr = IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS));
 
   LoadInst *NewLoad = IC.Builder.CreateAlignedLoad(
-      NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
+      NewTy, NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
   NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
   MDBuilder MDB(NewLoad->getContext());
   for (const auto &MDPair : MD) {
@@ -631,7 +630,7 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
   // infinite loop).
   if (!Ty->isIntegerTy() && Ty->isSized() &&
       DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
-      DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty) &&
+      DL.typeSizeEqualsStoreSize(Ty) &&
       !DL.isNonIntegralPointerType(Ty) &&
       !isMinMaxWithLoads(
           peekThroughBitcast(LI.getPointerOperand(), /*OneUseOnly=*/true))) {
@@ -725,7 +724,8 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
       auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
                                                Name + ".elt");
       auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
-      auto *L = IC.Builder.CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack");
+      auto *L = IC.Builder.CreateAlignedLoad(ST->getElementType(i), Ptr,
+                                             EltAlign, Name + ".unpack");
       // Propagate AA metadata. It'll still be valid on the narrowed load.
       AAMDNodes AAMD;
       LI.getAAMetadata(AAMD);
@@ -775,8 +775,8 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
       };
       auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
                                                Name + ".elt");
-      auto *L = IC.Builder.CreateAlignedLoad(Ptr, MinAlign(Align, Offset),
-                                             Name + ".unpack");
+      auto *L = IC.Builder.CreateAlignedLoad(
+          AT->getElementType(), Ptr, MinAlign(Align, Offset), Name + ".unpack");
       AAMDNodes AAMD;
       LI.getAAMetadata(AAMD);
       L->setAAMetadata(AAMD);
@@ -1064,12 +1064,16 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
     if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
       // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
       unsigned Align = LI.getAlignment();
-      if (isSafeToLoadUnconditionally(SI->getOperand(1), Align, DL, SI) &&
-          isSafeToLoadUnconditionally(SI->getOperand(2), Align, DL, SI)) {
-        LoadInst *V1 = Builder.CreateLoad(SI->getOperand(1),
-                                          SI->getOperand(1)->getName()+".val");
-        LoadInst *V2 = Builder.CreateLoad(SI->getOperand(2),
-                                          SI->getOperand(2)->getName()+".val");
+      if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(), Align,
+                                      DL, SI) &&
+          isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(), Align,
+                                      DL, SI)) {
+        LoadInst *V1 =
+            Builder.CreateLoad(LI.getType(), SI->getOperand(1),
+                               SI->getOperand(1)->getName() + ".val");
+        LoadInst *V2 =
+            Builder.CreateLoad(LI.getType(), SI->getOperand(2),
+                               SI->getOperand(2)->getName() + ".val");
         assert(LI.isUnordered() && "implied by above");
         V1->setAlignment(Align);
         V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
@@ -1436,6 +1440,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
     }
   }
 
+  // If we have a store to a location which is known constant, we can conclude
+  // that the store must be storing the constant value (else the memory
+  // wouldn't be constant), and this must be a noop.
+  if (AA->pointsToConstantMemory(Ptr))
+    return eraseInstFromFunction(SI);
+
   // Do really simple DSE, to catch cases where there are several consecutive
   // stores to the same location, separated by a few arithmetic operations. This
   // situation often occurs with bitfield accesses.
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 7e99f3e4e500..cc753ce05313 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1,9 +1,8 @@
 //===- InstCombineMulDivRem.cpp -------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -375,11 +374,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
     return BinaryOperator::CreateFMulFMF(X, ConstantExpr::getFNeg(C), &I);
 
   // Sink negation: -X * Y --> -(X * Y)
-  if (match(Op0, m_OneUse(m_FNeg(m_Value(X)))))
+  // But don't transform constant expressions because there's an inverse fold.
+  if (match(Op0, m_OneUse(m_FNeg(m_Value(X)))) && !isa<ConstantExpr>(Op0))
     return BinaryOperator::CreateFNegFMF(Builder.CreateFMulFMF(X, Op1, &I), &I);
 
   // Sink negation: Y * -X --> -(X * Y)
-  if (match(Op1, m_OneUse(m_FNeg(m_Value(X)))))
+  // But don't transform constant expressions because there's an inverse fold.
+  if (match(Op1, m_OneUse(m_FNeg(m_Value(X)))) && !isa<ConstantExpr>(Op1))
     return BinaryOperator::CreateFNegFMF(Builder.CreateFMulFMF(X, Op0, &I), &I);
 
   // fabs(X) * fabs(X) -> X * X
@@ -431,6 +432,14 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
       }
     }
 
+    Value *Z;
+    if (match(&I, m_c_FMul(m_OneUse(m_FDiv(m_Value(X), m_Value(Y))),
+                           m_Value(Z)))) {
+      // Sink division: (X / Y) * Z --> (X * Z) / Y
+      Value *NewFMul = Builder.CreateFMulFMF(X, Z, &I);
+      return BinaryOperator::CreateFDivFMF(NewFMul, Y, &I);
+    }
+
     // sqrt(X) * sqrt(Y) -> sqrt(X * Y)
     // nnan disallows the possibility of returning a number if both operands are
     // negative (in that case, we should return NaN).
@@ -442,6 +451,45 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
       return replaceInstUsesWith(I, Sqrt);
     }
 
+    // Like the similar transform in instsimplify, this requires 'nsz' because
+    // sqrt(-0.0) = -0.0, and -0.0 * -0.0 does not simplify to -0.0.
+    if (I.hasNoNaNs() && I.hasNoSignedZeros() && Op0 == Op1 &&
+        Op0->hasNUses(2)) {
+      // Peek through fdiv to find squaring of square root:
+      // (X / sqrt(Y)) * (X / sqrt(Y)) --> (X * X) / Y
+      if (match(Op0, m_FDiv(m_Value(X),
+                            m_Intrinsic<Intrinsic::sqrt>(m_Value(Y))))) {
+        Value *XX = Builder.CreateFMulFMF(X, X, &I);
+        return BinaryOperator::CreateFDivFMF(XX, Y, &I);
+      }
+      // (sqrt(Y) / X) * (sqrt(Y) / X) --> Y / (X * X)
+      if (match(Op0, m_FDiv(m_Intrinsic<Intrinsic::sqrt>(m_Value(Y)),
+                            m_Value(X)))) {
+        Value *XX = Builder.CreateFMulFMF(X, X, &I);
+        return BinaryOperator::CreateFDivFMF(Y, XX, &I);
+      }
+    }
+
+    // exp(X) * exp(Y) -> exp(X + Y)
+    // Match as long as at least one of exp has only one use.
+    if (match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X))) &&
+        match(Op1, m_Intrinsic<Intrinsic::exp>(m_Value(Y))) &&
+        (Op0->hasOneUse() || Op1->hasOneUse())) {
+      Value *XY = Builder.CreateFAddFMF(X, Y, &I);
+      Value *Exp = Builder.CreateUnaryIntrinsic(Intrinsic::exp, XY, &I);
+      return replaceInstUsesWith(I, Exp);
+    }
+
+    // exp2(X) * exp2(Y) -> exp2(X + Y)
+    // Match as long as at least one of exp2 has only one use.
+    if (match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X))) &&
+        match(Op1, m_Intrinsic<Intrinsic::exp2>(m_Value(Y))) &&
+        (Op0->hasOneUse() || Op1->hasOneUse())) {
+      Value *XY = Builder.CreateFAddFMF(X, Y, &I);
+      Value *Exp2 = Builder.CreateUnaryIntrinsic(Intrinsic::exp2, XY, &I);
+      return replaceInstUsesWith(I, Exp2);
+    }
+
     // (X*Y) * X => (X*X) * Y where Y != X
     //  The purpose is two-fold:
     //   1) to form a power expression (of X).
@@ -576,7 +624,7 @@ static bool isMultiple(const APInt &C1, const APInt &C2, APInt &Quotient,
   if (IsSigned && C1.isMinSignedValue() && C2.isAllOnesValue())
     return false;
 
-  APInt Remainder(C1.getBitWidth(), /*Val=*/0ULL, IsSigned);
+  APInt Remainder(C1.getBitWidth(), /*val=*/0ULL, IsSigned);
   if (IsSigned)
     APInt::sdivrem(C1, C2, Quotient, Remainder);
   else
@@ -613,7 +661,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
     // (X / C1) / C2  -> X / (C1*C2)
     if ((IsSigned && match(Op0, m_SDiv(m_Value(X), m_APInt(C1)))) ||
         (!IsSigned && match(Op0, m_UDiv(m_Value(X), m_APInt(C1))))) {
-      APInt Product(C1->getBitWidth(), /*Val=*/0ULL, IsSigned);
+      APInt Product(C1->getBitWidth(), /*val=*/0ULL, IsSigned);
       if (!multiplyOverflows(*C1, *C2, Product, IsSigned))
         return BinaryOperator::Create(I.getOpcode(), X,
                                       ConstantInt::get(Ty, Product));
@@ -621,7 +669,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
 
     if ((IsSigned && match(Op0, m_NSWMul(m_Value(X), m_APInt(C1)))) ||
         (!IsSigned && match(Op0, m_NUWMul(m_Value(X), m_APInt(C1))))) {
-      APInt Quotient(C1->getBitWidth(), /*Val=*/0ULL, IsSigned);
+      APInt Quotient(C1->getBitWidth(), /*val=*/0ULL, IsSigned);
 
       // (X * C1) / C2 -> X / (C2 / C1) if C2 is a multiple of C1.
       if (isMultiple(*C2, *C1, Quotient, IsSigned)) {
@@ -645,7 +693,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
     if ((IsSigned && match(Op0, m_NSWShl(m_Value(X), m_APInt(C1))) &&
          *C1 != C1->getBitWidth() - 1) ||
         (!IsSigned && match(Op0, m_NUWShl(m_Value(X), m_APInt(C1))))) {
-      APInt Quotient(C1->getBitWidth(), /*Val=*/0ULL, IsSigned);
+      APInt Quotient(C1->getBitWidth(), /*val=*/0ULL, IsSigned);
       APInt C1Shifted = APInt::getOneBitSet(
           C1->getBitWidth(), static_cast<unsigned>(C1->getLimitedValue()));
 
@@ -977,6 +1025,10 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
       (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)))
     return BinaryOperator::CreateNeg(Op0);
 
+  // X / INT_MIN --> X == INT_MIN
+  if (match(Op1, m_SignMask()))
+    return new ZExtInst(Builder.CreateICmpEQ(Op0, Op1), I.getType());
+
   const APInt *Op1C;
   if (match(Op1, m_APInt(Op1C))) {
     // sdiv exact X, C  -->  ashr exact X, log2(C)
@@ -1001,22 +1053,25 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
       Value *NarrowOp = Builder.CreateSDiv(Op0Src, NarrowDivisor);
       return new SExtInst(NarrowOp, Op0->getType());
     }
-  }
 
-  if (Constant *RHS = dyn_cast<Constant>(Op1)) {
-    // X/INT_MIN -> X == INT_MIN
-    if (RHS->isMinSignedValue())
-      return new ZExtInst(Builder.CreateICmpEQ(Op0, Op1), I.getType());
-
-    // -X/C  -->  X/-C  provided the negation doesn't overflow.
-    Value *X;
-    if (match(Op0, m_NSWSub(m_Zero(), m_Value(X)))) {
-      auto *BO = BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(RHS));
+    // -X / C --> X / -C (if the negation doesn't overflow).
+    // TODO: This could be enhanced to handle arbitrary vector constants by
+    //       checking if all elements are not the min-signed-val.
+    if (!Op1C->isMinSignedValue() &&
+        match(Op0, m_NSWSub(m_Zero(), m_Value(X)))) {
+      Constant *NegC = ConstantInt::get(I.getType(), -(*Op1C));
+      Instruction *BO = BinaryOperator::CreateSDiv(X, NegC);
       BO->setIsExact(I.isExact());
       return BO;
     }
   }
 
+  // -X / Y --> -(X / Y)
+  Value *Y;
+  if (match(&I, m_SDiv(m_OneUse(m_NSWSub(m_Zero(), m_Value(X))), m_Value(Y))))
+    return BinaryOperator::CreateNSWNeg(
+        Builder.CreateSDiv(X, Y, I.getName(), I.isExact()));
+
   // If the sign bits of both operands are zero (i.e. we can prove they are
   // unsigned inputs), turn this into a udiv.
   APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits()));
@@ -1161,7 +1216,8 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
       IRBuilder<> B(&I);
       IRBuilder<>::FastMathFlagGuard FMFGuard(B);
       B.setFastMathFlags(I.getFastMathFlags());
-      AttributeList Attrs = CallSite(Op0).getCalledFunction()->getAttributes();
+      AttributeList Attrs =
+          cast<CallBase>(Op0)->getCalledFunction()->getAttributes();
       Value *Res = emitUnaryFloatFnCall(X, &TLI, LibFunc_tan, LibFunc_tanf,
                                         LibFunc_tanl, B, Attrs);
       if (IsCot)
@@ -1305,6 +1361,11 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
     }
   }
 
+  // -X srem Y --> -(X srem Y)
+  Value *X, *Y;
+  if (match(&I, m_SRem(m_OneUse(m_NSWSub(m_Zero(), m_Value(X))), m_Value(Y))))
+    return BinaryOperator::CreateNSWNeg(Builder.CreateSRem(X, Y)); 
+
   // If the sign bits of both operands are zero (i.e. we can prove they are
   // unsigned inputs), turn this into a urem.
   APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits()));
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 7603cf4d7958..5820ab726637 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -1,9 +1,8 @@
 //===- InstCombinePHI.cpp -------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -596,7 +595,8 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
 
   Value *InVal = FirstLI->getOperand(0);
   NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
-  LoadInst *NewLI = new LoadInst(NewPN, "", isVolatile, LoadAlignment);
+  LoadInst *NewLI =
+      new LoadInst(FirstLI->getType(), NewPN, "", isVolatile, LoadAlignment);
 
   unsigned KnownIDs[] = {
     LLVMContext::MD_tbaa,
@@ -1004,6 +1004,11 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
           !isa<ConstantInt>(UserI->getOperand(1)))
         return nullptr;
 
+      // Bail on out of range shifts.
+      unsigned SizeInBits = UserI->getType()->getScalarSizeInBits();
+      if (cast<ConstantInt>(UserI->getOperand(1))->getValue().uge(SizeInBits))
+        return nullptr;
+
       unsigned Shift = cast<ConstantInt>(UserI->getOperand(1))->getZExtValue();
       PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, UserI->user_back()));
     }
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index faf58a08976d..aefaf5af1750 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1,9 +1,8 @@
 //===- InstCombineSelect.cpp ----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -293,6 +292,8 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
     return nullptr;
 
   // If this is a cast from the same type, merge.
+  Value *Cond = SI.getCondition();
+  Type *CondTy = Cond->getType();
   if (TI->getNumOperands() == 1 && TI->isCast()) {
     Type *FIOpndTy = FI->getOperand(0)->getType();
     if (TI->getOperand(0)->getType() != FIOpndTy)
@@ -300,7 +301,6 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
 
     // The select condition may be a vector. We may only change the operand
     // type if the vector width remains the same (and matches the condition).
-    Type *CondTy = SI.getCondition()->getType();
     if (CondTy->isVectorTy()) {
       if (!FIOpndTy->isVectorTy())
         return nullptr;
@@ -327,12 +327,24 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
 
     // Fold this by inserting a select from the input values.
     Value *NewSI =
-        Builder.CreateSelect(SI.getCondition(), TI->getOperand(0),
-                             FI->getOperand(0), SI.getName() + ".v", &SI);
+        Builder.CreateSelect(Cond, TI->getOperand(0), FI->getOperand(0),
+                             SI.getName() + ".v", &SI);
     return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
                             TI->getType());
   }
 
+  // Cond ? -X : -Y --> -(Cond ? X : Y)
+  Value *X, *Y;
+  if (match(TI, m_FNeg(m_Value(X))) && match(FI, m_FNeg(m_Value(Y))) &&
+      (TI->hasOneUse() || FI->hasOneUse())) {
+    Value *NewSel = Builder.CreateSelect(Cond, X, Y, SI.getName() + ".v", &SI);
+    // TODO: Remove the hack for the binop form when the unary op is optimized
+    //       properly with all IR passes.
+    if (TI->getOpcode() != Instruction::FNeg)
+      return BinaryOperator::CreateFNegFMF(NewSel, cast<BinaryOperator>(TI));
+    return UnaryOperator::CreateFNeg(NewSel);
+  }
+
   // Only handle binary operators (including two-operand getelementptr) with
   // one-use here. As with the cast case above, it may be possible to relax the
   // one-use constraint, but that needs be examined carefully since it may not
@@ -374,13 +386,12 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
   // If the select condition is a vector, the operands of the original select's
   // operands also must be vectors. This may not be the case for getelementptr
   // for example.
-  if (SI.getCondition()->getType()->isVectorTy() &&
-      (!OtherOpT->getType()->isVectorTy() ||
-       !OtherOpF->getType()->isVectorTy()))
+  if (CondTy->isVectorTy() && (!OtherOpT->getType()->isVectorTy() ||
+                               !OtherOpF->getType()->isVectorTy()))
     return nullptr;
 
   // If we reach here, they do have operations in common.
-  Value *NewSI = Builder.CreateSelect(SI.getCondition(), OtherOpT, OtherOpF,
+  Value *NewSI = Builder.CreateSelect(Cond, OtherOpT, OtherOpF,
                                       SI.getName() + ".v", &SI);
   Value *Op0 = MatchIsOpZero ? MatchOp : NewSI;
   Value *Op1 = MatchIsOpZero ? NewSI : MatchOp;
@@ -520,6 +531,46 @@ static Instruction *foldSelectICmpAndAnd(Type *SelType, const ICmpInst *Cmp,
   return new ZExtInst(ICmpNeZero, SelType);
 }
 
+/// We want to turn:
+///   (select (icmp sgt x, C), lshr (X, Y), ashr (X, Y)); iff C s>= -1
+///   (select (icmp slt x, C), ashr (X, Y), lshr (X, Y)); iff C s>= 0
+/// into:
+///   ashr (X, Y)
+static Value *foldSelectICmpLshrAshr(const ICmpInst *IC, Value *TrueVal,
+                                     Value *FalseVal,
+                                     InstCombiner::BuilderTy &Builder) {
+  ICmpInst::Predicate Pred = IC->getPredicate();
+  Value *CmpLHS = IC->getOperand(0);
+  Value *CmpRHS = IC->getOperand(1);
+  if (!CmpRHS->getType()->isIntOrIntVectorTy())
+    return nullptr;
+
+  Value *X, *Y;
+  unsigned Bitwidth = CmpRHS->getType()->getScalarSizeInBits();
+  if ((Pred != ICmpInst::ICMP_SGT ||
+       !match(CmpRHS,
+              m_SpecificInt_ICMP(ICmpInst::ICMP_SGE, APInt(Bitwidth, -1)))) &&
+      (Pred != ICmpInst::ICMP_SLT ||
+       !match(CmpRHS,
+              m_SpecificInt_ICMP(ICmpInst::ICMP_SGE, APInt(Bitwidth, 0)))))
+    return nullptr;
+
+  // Canonicalize so that ashr is in FalseVal.
+  if (Pred == ICmpInst::ICMP_SLT)
+    std::swap(TrueVal, FalseVal);
+
+  if (match(TrueVal, m_LShr(m_Value(X), m_Value(Y))) &&
+      match(FalseVal, m_AShr(m_Specific(X), m_Specific(Y))) &&
+      match(CmpLHS, m_Specific(X))) {
+    const auto *Ashr = cast<Instruction>(FalseVal);
+    // if lshr is not exact and ashr is, this new ashr must not be exact.
+    bool IsExact = Ashr->isExact() && cast<Instruction>(TrueVal)->isExact();
+    return Builder.CreateAShr(X, Y, IC->getName(), IsExact);
+  }
+
+  return nullptr;
+}
+
 /// We want to turn:
 ///   (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
 /// into:
@@ -623,11 +674,7 @@ static Value *foldSelectICmpAndOr(const ICmpInst *IC, Value *TrueVal,
   return Builder.CreateOr(V, Y);
 }
 
-/// Transform patterns such as: (a > b) ? a - b : 0
-/// into: ((a > b) ? a : b) - b)
-/// This produces a canonical max pattern that is more easily recognized by the
-/// backend and converted into saturated subtraction instructions if those
-/// exist.
+/// Transform patterns such as (a > b) ? a - b : 0 into usub.sat(a, b).
 /// There are 8 commuted/swapped variants of this pattern.
 /// TODO: Also support a - UMIN(a,b) patterns.
 static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI,
@@ -669,11 +716,73 @@ static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI,
   if (!TrueVal->hasOneUse())
     return nullptr;
 
-  // All checks passed, convert to canonical unsigned saturated subtraction
-  // form: sub(max()).
-  // (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b)
-  Value *Max = Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B);
-  return IsNegative ? Builder.CreateSub(B, Max) : Builder.CreateSub(Max, B);
+  // (a > b) ? a - b : 0 -> usub.sat(a, b)
+  // (a > b) ? b - a : 0 -> -usub.sat(a, b)
+  Value *Result = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, A, B);
+  if (IsNegative)
+    Result = Builder.CreateNeg(Result);
+  return Result;
+}
+
+static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
+                                       InstCombiner::BuilderTy &Builder) {
+  if (!Cmp->hasOneUse())
+    return nullptr;
+
+  // Match unsigned saturated add with constant.
+  Value *Cmp0 = Cmp->getOperand(0);
+  Value *Cmp1 = Cmp->getOperand(1);
+  ICmpInst::Predicate Pred = Cmp->getPredicate();
+  Value *X;
+  const APInt *C, *CmpC;
+  if (Pred == ICmpInst::ICMP_ULT &&
+      match(TVal, m_Add(m_Value(X), m_APInt(C))) && X == Cmp0 &&
+      match(FVal, m_AllOnes()) && match(Cmp1, m_APInt(CmpC)) && *CmpC == ~*C) {
+    // (X u< ~C) ? (X + C) : -1 --> uadd.sat(X, C)
+    return Builder.CreateBinaryIntrinsic(
+        Intrinsic::uadd_sat, X, ConstantInt::get(X->getType(), *C));
+  }
+
+  // Match unsigned saturated add of 2 variables with an unnecessary 'not'.
+  // There are 8 commuted variants.
+  // Canonicalize -1 (saturated result) to true value of the select. Just
+  // swapping the compare operands is legal, because the selected value is the
+  // same in case of equality, so we can interchange u< and u<=.
+  if (match(FVal, m_AllOnes())) {
+    std::swap(TVal, FVal);
+    std::swap(Cmp0, Cmp1);
+  }
+  if (!match(TVal, m_AllOnes()))
+    return nullptr;
+
+  // Canonicalize predicate to 'ULT'.
+  if (Pred == ICmpInst::ICMP_UGT) {
+    Pred = ICmpInst::ICMP_ULT;
+    std::swap(Cmp0, Cmp1);
+  }
+  if (Pred != ICmpInst::ICMP_ULT)
+    return nullptr;
+
+  // Match unsigned saturated add of 2 variables with an unnecessary 'not'.
+  Value *Y;
+  if (match(Cmp0, m_Not(m_Value(X))) &&
+      match(FVal, m_c_Add(m_Specific(X), m_Value(Y))) && Y == Cmp1) {
+    // (~X u< Y) ? -1 : (X + Y) --> uadd.sat(X, Y)
+    // (~X u< Y) ? -1 : (Y + X) --> uadd.sat(X, Y)
+    return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, X, Y);
+  }
+  // The 'not' op may be included in the sum but not the compare.
+  X = Cmp0;
+  Y = Cmp1;
+  if (match(FVal, m_c_Add(m_Not(m_Specific(X)), m_Specific(Y)))) {
+    // (X u< Y) ? -1 : (~X + Y) --> uadd.sat(~X, Y)
+    // (X u< Y) ? -1 : (Y + ~X) --> uadd.sat(Y, ~X)
+    BinaryOperator *BO = cast<BinaryOperator>(FVal);
+    return Builder.CreateBinaryIntrinsic(
+        Intrinsic::uadd_sat, BO->getOperand(0), BO->getOperand(1));
+  }
+
+  return nullptr;
 }
 
 /// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single
@@ -1043,12 +1152,18 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
   if (Value *V = foldSelectICmpAndOr(ICI, TrueVal, FalseVal, Builder))
     return replaceInstUsesWith(SI, V);
 
+  if (Value *V = foldSelectICmpLshrAshr(ICI, TrueVal, FalseVal, Builder))
+    return replaceInstUsesWith(SI, V);
+
   if (Value *V = foldSelectCttzCtlz(ICI, TrueVal, FalseVal, Builder))
     return replaceInstUsesWith(SI, V);
 
   if (Value *V = canonicalizeSaturatedSubtract(ICI, TrueVal, FalseVal, Builder))
     return replaceInstUsesWith(SI, V);
 
+  if (Value *V = canonicalizeSaturatedAdd(ICI, TrueVal, FalseVal, Builder))
+    return replaceInstUsesWith(SI, V);
+
   return Changed ? &SI : nullptr;
 }
 
@@ -1496,6 +1611,43 @@ static Instruction *foldSelectCmpXchg(SelectInst &SI) {
   return nullptr;
 }
 
+static Instruction *moveAddAfterMinMax(SelectPatternFlavor SPF, Value *X,
+                                       Value *Y,
+                                       InstCombiner::BuilderTy &Builder) {
+  assert(SelectPatternResult::isMinOrMax(SPF) && "Expected min/max pattern");
+  bool IsUnsigned = SPF == SelectPatternFlavor::SPF_UMIN ||
+                    SPF == SelectPatternFlavor::SPF_UMAX;
+  // TODO: If InstSimplify could fold all cases where C2 <= C1, we could change
+  // the constant value check to an assert.
+  Value *A;
+  const APInt *C1, *C2;
+  if (IsUnsigned && match(X, m_NUWAdd(m_Value(A), m_APInt(C1))) &&
+      match(Y, m_APInt(C2)) && C2->uge(*C1) && X->hasNUses(2)) {
+    // umin (add nuw A, C1), C2 --> add nuw (umin A, C2 - C1), C1
+    // umax (add nuw A, C1), C2 --> add nuw (umax A, C2 - C1), C1
+    Value *NewMinMax = createMinMax(Builder, SPF, A,
+                                    ConstantInt::get(X->getType(), *C2 - *C1));
+    return BinaryOperator::CreateNUW(BinaryOperator::Add, NewMinMax,
+                                     ConstantInt::get(X->getType(), *C1));
+  }
+
+  if (!IsUnsigned && match(X, m_NSWAdd(m_Value(A), m_APInt(C1))) &&
+      match(Y, m_APInt(C2)) && X->hasNUses(2)) {
+    bool Overflow;
+    APInt Diff = C2->ssub_ov(*C1, Overflow);
+    if (!Overflow) {
+      // smin (add nsw A, C1), C2 --> add nsw (smin A, C2 - C1), C1
+      // smax (add nsw A, C1), C2 --> add nsw (smax A, C2 - C1), C1
+      Value *NewMinMax = createMinMax(Builder, SPF, A,
+                                      ConstantInt::get(X->getType(), Diff));
+      return BinaryOperator::CreateNSW(BinaryOperator::Add, NewMinMax,
+                                       ConstantInt::get(X->getType(), *C1));
+    }
+  }
+
+  return nullptr;
+}
+
 /// Reduce a sequence of min/max with a common operand.
 static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS,
                                         Value *RHS,
@@ -1757,37 +1909,55 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
 
       // NOTE: if we wanted to, this is where to detect MIN/MAX
     }
+  }
 
-    // Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need
-    // fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work. We
-    // also require nnan because we do not want to unintentionally change the
-    // sign of a NaN value.
-    Value *X = FCI->getOperand(0);
-    FCmpInst::Predicate Pred = FCI->getPredicate();
-    if (match(FCI->getOperand(1), m_AnyZeroFP()) && FCI->hasNoNaNs()) {
-      // (X <= +/-0.0) ? (0.0 - X) : X --> fabs(X)
-      // (X >  +/-0.0) ? X : (0.0 - X) --> fabs(X)
-      if ((X == FalseVal && Pred == FCmpInst::FCMP_OLE &&
-           match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(X)))) ||
-          (X == TrueVal && Pred == FCmpInst::FCMP_OGT &&
-           match(FalseVal, m_FSub(m_PosZeroFP(), m_Specific(X))))) {
-        Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, FCI);
-        return replaceInstUsesWith(SI, Fabs);
-      }
-      // With nsz:
-      // (X <  +/-0.0) ? -X : X --> fabs(X)
-      // (X <= +/-0.0) ? -X : X --> fabs(X)
-      // (X >  +/-0.0) ? X : -X --> fabs(X)
-      // (X >= +/-0.0) ? X : -X --> fabs(X)
-      if (FCI->hasNoSignedZeros() &&
-          ((X == FalseVal && match(TrueVal, m_FNeg(m_Specific(X))) &&
-            (Pred == FCmpInst::FCMP_OLT || Pred == FCmpInst::FCMP_OLE)) ||
-           (X == TrueVal && match(FalseVal, m_FNeg(m_Specific(X))) &&
-            (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_OGE)))) {
-        Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, FCI);
-        return replaceInstUsesWith(SI, Fabs);
-      }
-    }
+  // Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need
+  // fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work. We
+  // also require nnan because we do not want to unintentionally change the
+  // sign of a NaN value.
+  // FIXME: These folds should test/propagate FMF from the select, not the
+  //        fsub or fneg.
+  // (X <= +/-0.0) ? (0.0 - X) : X --> fabs(X)
+  Instruction *FSub;
+  if (match(CondVal, m_FCmp(Pred, m_Specific(FalseVal), m_AnyZeroFP())) &&
+      match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(FalseVal))) &&
+      match(TrueVal, m_Instruction(FSub)) && FSub->hasNoNaNs() &&
+      (Pred == FCmpInst::FCMP_OLE || Pred == FCmpInst::FCMP_ULE)) {
+    Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FalseVal, FSub);
+    return replaceInstUsesWith(SI, Fabs);
+  }
+  // (X >  +/-0.0) ? X : (0.0 - X) --> fabs(X)
+  if (match(CondVal, m_FCmp(Pred, m_Specific(TrueVal), m_AnyZeroFP())) &&
+      match(FalseVal, m_FSub(m_PosZeroFP(), m_Specific(TrueVal))) &&
+      match(FalseVal, m_Instruction(FSub)) && FSub->hasNoNaNs() &&
+      (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_UGT)) {
+    Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, TrueVal, FSub);
+    return replaceInstUsesWith(SI, Fabs);
+  }
+  // With nnan and nsz:
+  // (X <  +/-0.0) ? -X : X --> fabs(X)
+  // (X <= +/-0.0) ? -X : X --> fabs(X)
+  Instruction *FNeg;
+  if (match(CondVal, m_FCmp(Pred, m_Specific(FalseVal), m_AnyZeroFP())) &&
+      match(TrueVal, m_FNeg(m_Specific(FalseVal))) &&
+      match(TrueVal, m_Instruction(FNeg)) &&
+      FNeg->hasNoNaNs() && FNeg->hasNoSignedZeros() &&
+      (Pred == FCmpInst::FCMP_OLT || Pred == FCmpInst::FCMP_OLE ||
+       Pred == FCmpInst::FCMP_ULT || Pred == FCmpInst::FCMP_ULE)) {
+    Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FalseVal, FNeg);
+    return replaceInstUsesWith(SI, Fabs);
+  }
+  // With nnan and nsz:
+  // (X >  +/-0.0) ? X : -X --> fabs(X)
+  // (X >= +/-0.0) ? X : -X --> fabs(X)
+  if (match(CondVal, m_FCmp(Pred, m_Specific(TrueVal), m_AnyZeroFP())) &&
+      match(FalseVal, m_FNeg(m_Specific(TrueVal))) &&
+      match(FalseVal, m_Instruction(FNeg)) &&
+      FNeg->hasNoNaNs() && FNeg->hasNoSignedZeros() &&
+      (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_OGE ||
+       Pred == FCmpInst::FCMP_UGT || Pred == FCmpInst::FCMP_UGE)) {
+    Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, TrueVal, FNeg);
+    return replaceInstUsesWith(SI, Fabs);
   }
 
   // See if we are selecting two values based on a comparison of the two values.
@@ -1895,11 +2065,27 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       if (Instruction *I = moveNotAfterMinMax(RHS, LHS))
         return I;
 
+      if (Instruction *I = moveAddAfterMinMax(SPF, LHS, RHS, Builder))
+        return I;
+
       if (Instruction *I = factorizeMinMaxTree(SPF, LHS, RHS, Builder))
         return I;
     }
   }
 
+  // Canonicalize select of FP values where NaN and -0.0 are not valid as
+  // minnum/maxnum intrinsics.
+  if (isa<FPMathOperator>(SI) && SI.hasNoNaNs() && SI.hasNoSignedZeros()) {
+    Value *X, *Y;
+    if (match(&SI, m_OrdFMax(m_Value(X), m_Value(Y))))
+      return replaceInstUsesWith(
+          SI, Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, X, Y, &SI));
+
+    if (match(&SI, m_OrdFMin(m_Value(X), m_Value(Y))))
+      return replaceInstUsesWith(
+          SI, Builder.CreateBinaryIntrinsic(Intrinsic::minnum, X, Y, &SI));
+  }
+
   // See if we can fold the select into a phi node if the condition is a select.
   if (auto *PN = dyn_cast<PHINode>(SI.getCondition()))
     // The true/false values have to be live in the PHI predecessor's blocks.
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index c562d45a9e2b..c821292400cd 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1,9 +1,8 @@
 //===- InstCombineShifts.cpp ----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -21,6 +20,51 @@ using namespace PatternMatch;
 
 #define DEBUG_TYPE "instcombine"
 
+// Given pattern:
+//   (x shiftopcode Q) shiftopcode K
+// we should rewrite it as
+//   x shiftopcode (Q+K)  iff (Q+K) u< bitwidth(x)
+// This is valid for any shift, but they must be identical.
+static Instruction *
+reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0,
+                                             const SimplifyQuery &SQ) {
+  // Look for:  (x shiftopcode ShAmt0) shiftopcode ShAmt1
+  Value *X, *ShAmt1, *ShAmt0;
+  Instruction *Sh1;
+  if (!match(Sh0, m_Shift(m_CombineAnd(m_Shift(m_Value(X), m_Value(ShAmt1)),
+                                       m_Instruction(Sh1)),
+                          m_Value(ShAmt0))))
+    return nullptr;
+
+  // The shift opcodes must be identical.
+  Instruction::BinaryOps ShiftOpcode = Sh0->getOpcode();
+  if (ShiftOpcode != Sh1->getOpcode())
+    return nullptr;
+  // Can we fold (ShAmt0+ShAmt1) ?
+  Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, ShAmt0, ShAmt1,
+                                  SQ.getWithInstruction(Sh0));
+  if (!NewShAmt)
+    return nullptr; // Did not simplify.
+  // Is the new shift amount smaller than the bit width?
+  // FIXME: could also rely on ConstantRange.
+  unsigned BitWidth = X->getType()->getScalarSizeInBits();
+  if (!match(NewShAmt, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT,
+                                          APInt(BitWidth, BitWidth))))
+    return nullptr;
+  // All good, we can do this fold.
+  BinaryOperator *NewShift = BinaryOperator::Create(ShiftOpcode, X, NewShAmt);
+  // If both of the original shifts had the same flag set, preserve the flag.
+  if (ShiftOpcode == Instruction::BinaryOps::Shl) {
+    NewShift->setHasNoUnsignedWrap(Sh0->hasNoUnsignedWrap() &&
+                                   Sh1->hasNoUnsignedWrap());
+    NewShift->setHasNoSignedWrap(Sh0->hasNoSignedWrap() &&
+                                 Sh1->hasNoSignedWrap());
+  } else {
+    NewShift->setIsExact(Sh0->isExact() && Sh1->isExact());
+  }
+  return NewShift;
+}
+
 Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
   assert(Op0->getType() == Op1->getType());
@@ -39,6 +83,10 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
     if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
       return Res;
 
+  if (Instruction *NewShift =
+          reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ))
+    return NewShift;
+
   // (C1 shift (A add C2)) -> (C1 shift C2) shift A)
   // iff A and C2 are both positive.
   Value *A;
@@ -313,35 +361,17 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
 // If this is a bitwise operator or add with a constant RHS we might be able
 // to pull it through a shift.
 static bool canShiftBinOpWithConstantRHS(BinaryOperator &Shift,
-                                         BinaryOperator *BO,
-                                         const APInt &C) {
-  bool IsValid = true;     // Valid only for And, Or Xor,
-  bool HighBitSet = false; // Transform ifhigh bit of constant set?
-
+                                         BinaryOperator *BO) {
   switch (BO->getOpcode()) {
-  default: IsValid = false; break;   // Do not perform transform!
+  default:
+    return false; // Do not perform transform!
   case Instruction::Add:
-    IsValid = Shift.getOpcode() == Instruction::Shl;
-    break;
+    return Shift.getOpcode() == Instruction::Shl;
   case Instruction::Or:
   case Instruction::Xor:
-    HighBitSet = false;
-    break;
   case Instruction::And:
-    HighBitSet = true;
-    break;
+    return true;
   }
-
-  // If this is a signed shift right, and the high bit is modified
-  // by the logical operation, do not perform the transformation.
-  // The HighBitSet boolean indicates the value of the high bit of
-  // the constant which would cause it to be modified for this
-  // operation.
-  //
-  if (IsValid && Shift.getOpcode() == Instruction::AShr)
-    IsValid = C.isNegative() == HighBitSet;
-
-  return IsValid;
 }
 
 Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
@@ -508,7 +538,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
       // shift is the only use, we can pull it out of the shift.
       const APInt *Op0C;
       if (match(Op0BO->getOperand(1), m_APInt(Op0C))) {
-        if (canShiftBinOpWithConstantRHS(I, Op0BO, *Op0C)) {
+        if (canShiftBinOpWithConstantRHS(I, Op0BO)) {
           Constant *NewRHS = ConstantExpr::get(I.getOpcode(),
                                      cast<Constant>(Op0BO->getOperand(1)), Op1);
 
@@ -552,7 +582,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
       const APInt *C;
       if (!isa<Constant>(FalseVal) && TBO->getOperand(0) == FalseVal &&
           match(TBO->getOperand(1), m_APInt(C)) &&
-          canShiftBinOpWithConstantRHS(I, TBO, *C)) {
+          canShiftBinOpWithConstantRHS(I, TBO)) {
         Constant *NewRHS = ConstantExpr::get(I.getOpcode(),
                                        cast<Constant>(TBO->getOperand(1)), Op1);
 
@@ -571,7 +601,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
       const APInt *C;
       if (!isa<Constant>(TrueVal) && FBO->getOperand(0) == TrueVal &&
           match(FBO->getOperand(1), m_APInt(C)) &&
-          canShiftBinOpWithConstantRHS(I, FBO, *C)) {
+          canShiftBinOpWithConstantRHS(I, FBO)) {
         Constant *NewRHS = ConstantExpr::get(I.getOpcode(),
                                        cast<Constant>(FBO->getOperand(1)), Op1);
 
@@ -601,6 +631,8 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
   Type *Ty = I.getType();
+  unsigned BitWidth = Ty->getScalarSizeInBits();
+
   const APInt *ShAmtAPInt;
   if (match(Op1, m_APInt(ShAmtAPInt))) {
     unsigned ShAmt = ShAmtAPInt->getZExtValue();
@@ -689,6 +721,12 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
       return BinaryOperator::CreateMul(X, ConstantExpr::getShl(C2, C1));
   }
 
+  // (1 << (C - x)) -> ((1 << C) >> x) if C is bitwidth - 1
+  if (match(Op0, m_One()) &&
+      match(Op1, m_Sub(m_SpecificInt(BitWidth - 1), m_Value(X))))
+    return BinaryOperator::CreateLShr(
+        ConstantInt::get(Ty, APInt::getSignMask(BitWidth)), X);
+
   return nullptr;
 }
 
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 9bf87d024607..e0d85c4b49ae 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1,9 +1,8 @@
 //===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -366,10 +365,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     KnownBits InputKnown(SrcBitWidth);
     if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1))
       return I;
-    Known = InputKnown.zextOrTrunc(BitWidth);
-    // Any top bits are known to be zero.
-    if (BitWidth > SrcBitWidth)
-      Known.Zero.setBitsFrom(SrcBitWidth);
+    assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?");
+    Known = InputKnown.zextOrTrunc(BitWidth,
+                                   true /* ExtendedBitsAreKnownZero */);
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
     break;
   }
@@ -967,26 +965,16 @@ InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1,
 }
 
 /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
+///
+/// Note: This only supports non-TFE/LWE image intrinsic calls; those have
+///       struct returns.
 Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
                                                            APInt DemandedElts,
-                                                           int DMaskIdx,
-                                                           int TFCIdx) {
+                                                           int DMaskIdx) {
   unsigned VWidth = II->getType()->getVectorNumElements();
   if (VWidth == 1)
     return nullptr;
 
-  // Need to change to new instruction format
-  ConstantInt *TFC = nullptr;
-  bool TFELWEEnabled = false;
-  if (TFCIdx > 0) {
-    TFC = dyn_cast<ConstantInt>(II->getArgOperand(TFCIdx));
-    TFELWEEnabled =    TFC->getZExtValue() & 0x1  // TFE
-                    || TFC->getZExtValue() & 0x2; // LWE
-  }
-
-  if (TFELWEEnabled)
-    return nullptr; // TFE not yet supported
-
   ConstantInt *NewDMask = nullptr;
 
   if (DMaskIdx < 0) {
@@ -994,10 +982,7 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
     // below.
     DemandedElts = (1 << DemandedElts.getActiveBits()) - 1;
   } else {
-    ConstantInt *DMask = dyn_cast<ConstantInt>(II->getArgOperand(DMaskIdx));
-    if (!DMask)
-      return nullptr; // non-constant dmask is not supported by codegen
-
+    ConstantInt *DMask = cast<ConstantInt>(II->getArgOperand(DMaskIdx));
     unsigned DMaskVal = DMask->getZExtValue() & 0xf;
 
     // Mask off values that are undefined because the dmask doesn't cover them
@@ -1018,8 +1003,7 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
       NewDMask = ConstantInt::get(DMask->getType(), NewDMaskVal);
   }
 
-  // TODO: Handle 3 vectors when supported in code gen.
-  unsigned NewNumElts = PowerOf2Ceil(DemandedElts.countPopulation());
+  unsigned NewNumElts = DemandedElts.countPopulation();
   if (!NewNumElts)
     return UndefValue::get(II->getType());
 
@@ -1035,13 +1019,12 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
   getIntrinsicInfoTableEntries(IID, Table);
   ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
 
+  // Validate function argument and return types, extracting overloaded types
+  // along the way.
   FunctionType *FTy = II->getCalledFunction()->getFunctionType();
   SmallVector<Type *, 6> OverloadTys;
-  Intrinsic::matchIntrinsicType(FTy->getReturnType(), TableRef, OverloadTys);
-  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
-    Intrinsic::matchIntrinsicType(FTy->getParamType(i), TableRef, OverloadTys);
+  Intrinsic::matchIntrinsicSignature(FTy, TableRef, OverloadTys);
 
-  // Get the new return type overload of the intrinsic.
   Module *M = II->getParent()->getParent()->getParent();
   Type *EltTy = II->getType()->getVectorElementType();
   Type *NewTy = (NewNumElts == 1) ? EltTy : VectorType::get(EltTy, NewNumElts);
@@ -1184,6 +1167,39 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
   switch (I->getOpcode()) {
   default: break;
 
+  case Instruction::GetElementPtr: {
+    // The LangRef requires that struct geps have all constant indices.  As
+    // such, we can't convert any operand to partial undef.
+    auto mayIndexStructType = [](GetElementPtrInst &GEP) {
+      for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP);
+           I != E; I++)
+        if (I.isStruct())
+          return true;;
+      return false;
+    };
+    if (mayIndexStructType(cast<GetElementPtrInst>(*I)))
+      break;
+    
+    // Conservatively track the demanded elements back through any vector
+    // operands we may have.  We know there must be at least one, or we
+    // wouldn't have a vector result to get here. Note that we intentionally
+    // merge the undef bits here since gepping with either an undef base or
+    // index results in undef. 
+    for (unsigned i = 0; i < I->getNumOperands(); i++) {
+      if (isa<UndefValue>(I->getOperand(i))) {
+        // If the entire vector is undefined, just return this info.
+        UndefElts = EltMask;
+        return nullptr;
+      }
+      if (I->getOperand(i)->getType()->isVectorTy()) {
+        APInt UndefEltsOp(VWidth, 0);
+        simplifyAndSetOp(I, i, DemandedElts, UndefEltsOp);
+        UndefElts |= UndefEltsOp;
+      }
+    }
+
+    break;
+  }
   case Instruction::InsertElement: {
     // If this is a variable index, we don't know which element it overwrites.
     // demand exactly the same input as we produce.
@@ -1430,6 +1446,30 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
     if (!II) break;
     switch (II->getIntrinsicID()) {
+    case Intrinsic::masked_gather: // fallthrough
+    case Intrinsic::masked_load: {
+      // Subtlety: If we load from a pointer, the pointer must be valid
+      // regardless of whether the element is demanded.  Doing otherwise risks
+      // segfaults which didn't exist in the original program.
+      APInt DemandedPtrs(APInt::getAllOnesValue(VWidth)),
+        DemandedPassThrough(DemandedElts);
+      if (auto *CV = dyn_cast<ConstantVector>(II->getOperand(2)))
+        for (unsigned i = 0; i < VWidth; i++) {
+          Constant *CElt = CV->getAggregateElement(i);
+          if (CElt->isNullValue())
+            DemandedPtrs.clearBit(i);
+          else if (CElt->isAllOnesValue())
+            DemandedPassThrough.clearBit(i);
+        }
+      if (II->getIntrinsicID() == Intrinsic::masked_gather)
+        simplifyAndSetOp(II, 0, DemandedPtrs, UndefElts2);
+      simplifyAndSetOp(II, 3, DemandedPassThrough, UndefElts3);
+      
+      // Output elements are undefined if the element from both sources are.
+      // TODO: can strengthen via mask as well.
+      UndefElts = UndefElts2 & UndefElts3;
+      break;
+    }
     case Intrinsic::x86_xop_vfrcz_ss:
     case Intrinsic::x86_xop_vfrcz_sd:
       // The instructions for these intrinsics are speced to zero upper bits not
@@ -1639,8 +1679,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts);
     default: {
       if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID()))
-        return simplifyAMDGCNMemoryIntrinsicDemanded(
-            II, DemandedElts, 0, II->getNumArgOperands() - 2);
+        return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts, 0);
 
       break;
     }
@@ -1667,5 +1706,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     UndefElts &= UndefElts2;
   }
 
+  // If we've proven all of the lanes undef, return an undef value.
+  // TODO: Intersect w/demanded lanes
+  if (UndefElts.isAllOnesValue())
+    return UndefValue::get(I->getType());;
+
   return MadeChange ? I : nullptr;
 }
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 0ad1fc0e791f..dc9abdd7f47a 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1,9 +1,8 @@
 //===- InstCombineVectorOps.cpp -------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -663,18 +662,17 @@ static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
   return true;
 }
 
-// Turn a chain of inserts that splats a value into a canonical insert + shuffle
-// splat. That is:
-// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
-// shufflevector(insertelt(X, %k, 0), undef, zero)
-static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {
-  // We are interested in the last insert in a chain. So, if this insert
-  // has a single user, and that user is an insert, bail.
+/// Turn a chain of inserts that splats a value into an insert + shuffle:
+/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
+/// shufflevector(insertelt(X, %k, 0), undef, zero)
+static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
+  // We are interested in the last insert in a chain. So if this insert has a
+  // single user and that user is an insert, bail.
   if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
     return nullptr;
 
-  VectorType *VT = cast<VectorType>(InsElt.getType());
-  int NumElements = VT->getNumElements();
+  auto *VecTy = cast<VectorType>(InsElt.getType());
+  unsigned NumElements = VecTy->getNumElements();
 
   // Do not try to do this for a one-element vector, since that's a nop,
   // and will cause an inf-loop.
@@ -706,24 +704,66 @@ static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {
     CurrIE = NextIE;
   }
 
-  // Make sure we've seen an insert into every element.
-  if (llvm::any_of(ElementPresent, [](bool Present) { return !Present; }))
+  // If this is just a single insertelement (not a sequence), we are done.
+  if (FirstIE == &InsElt)
     return nullptr;
 
-  // All right, create the insert + shuffle.
-  Instruction *InsertFirst;
-  if (cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
-    InsertFirst = FirstIE;
-  else
-    InsertFirst = InsertElementInst::Create(
-        UndefValue::get(VT), SplatVal,
-        ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), 0),
-        "", &InsElt);
+  // If we are not inserting into an undef vector, make sure we've seen an
+  // insert into every element.
+  // TODO: If the base vector is not undef, it might be better to create a splat
+  //       and then a select-shuffle (blend) with the base vector.
+  if (!isa<UndefValue>(FirstIE->getOperand(0)))
+    if (any_of(ElementPresent, [](bool Present) { return !Present; }))
+      return nullptr;
+
+  // Create the insert + shuffle.
+  Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
+  UndefValue *UndefVec = UndefValue::get(VecTy);
+  Constant *Zero = ConstantInt::get(Int32Ty, 0);
+  if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
+    FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt);
 
-  Constant *ZeroMask = ConstantAggregateZero::get(
-      VectorType::get(Type::getInt32Ty(InsElt.getContext()), NumElements));
+  // Splat from element 0, but replace absent elements with undef in the mask.
+  SmallVector<Constant *, 16> Mask(NumElements, Zero);
+  for (unsigned i = 0; i != NumElements; ++i)
+    if (!ElementPresent[i])
+      Mask[i] = UndefValue::get(Int32Ty);
 
-  return new ShuffleVectorInst(InsertFirst, UndefValue::get(VT), ZeroMask);
+  return new ShuffleVectorInst(FirstIE, UndefVec, ConstantVector::get(Mask));
+}
+
+/// Try to fold an insert element into an existing splat shuffle by changing
+/// the shuffle's mask to include the index of this insert element.
+static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
+  // Check if the vector operand of this insert is a canonical splat shuffle.
+  auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
+  if (!Shuf || !Shuf->isZeroEltSplat())
+    return nullptr;
+
+  // Check for a constant insertion index.
+  uint64_t IdxC;
+  if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
+    return nullptr;
+
+  // Check if the splat shuffle's input is the same as this insert's scalar op.
+  Value *X = InsElt.getOperand(1);
+  Value *Op0 = Shuf->getOperand(0);
+  if (!match(Op0, m_InsertElement(m_Undef(), m_Specific(X), m_ZeroInt())))
+    return nullptr;
+
+  // Replace the shuffle mask element at the index of this insert with a zero.
+  // For example:
+  // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
+  //   --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
+  unsigned NumMaskElts = Shuf->getType()->getVectorNumElements();
+  SmallVector<Constant *, 16> NewMaskVec(NumMaskElts);
+  Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext());
+  Constant *Zero = ConstantInt::getNullValue(I32Ty);
+  for (unsigned i = 0; i != NumMaskElts; ++i)
+    NewMaskVec[i] = i == IdxC ? Zero : Shuf->getMask()->getAggregateElement(i);
+
+  Constant *NewMask = ConstantVector::get(NewMaskVec);
+  return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
 }
 
 /// If we have an insertelement instruction feeding into another insertelement
@@ -864,30 +904,28 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
           VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
     return replaceInstUsesWith(IE, V);
 
-  // Inserting an undef or into an undefined place, remove this.
-  if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
-    replaceInstUsesWith(IE, VecOp);
+  // If the vector and scalar are both bitcast from the same element type, do
+  // the insert in that source type followed by bitcast.
+  Value *VecSrc, *ScalarSrc;
+  if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
+      match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
+      (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
+      VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
+      VecSrc->getType()->getVectorElementType() == ScalarSrc->getType()) {
+    // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
+    //   bitcast (inselt VecSrc, ScalarSrc, IdxOp)
+    Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
+    return new BitCastInst(NewInsElt, IE.getType());
+  }
 
   // If the inserted element was extracted from some other vector and both
-  // indexes are constant, try to turn this into a shuffle.
+  // indexes are valid constants, try to turn this into a shuffle.
   uint64_t InsertedIdx, ExtractedIdx;
   Value *ExtVecOp;
   if (match(IdxOp, m_ConstantInt(InsertedIdx)) &&
       match(ScalarOp, m_ExtractElement(m_Value(ExtVecOp),
-                                       m_ConstantInt(ExtractedIdx)))) {
-    unsigned NumInsertVectorElts = IE.getType()->getNumElements();
-    unsigned NumExtractVectorElts = ExtVecOp->getType()->getVectorNumElements();
-    if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract.
-      return replaceInstUsesWith(IE, VecOp);
-
-    if (InsertedIdx >= NumInsertVectorElts)  // Out of range insert.
-      return replaceInstUsesWith(IE, UndefValue::get(IE.getType()));
-
-    // If we are extracting a value from a vector, then inserting it right
-    // back into the same place, just use the input vector.
-    if (ExtVecOp == VecOp && ExtractedIdx == InsertedIdx)
-      return replaceInstUsesWith(IE, VecOp);
-
+                                       m_ConstantInt(ExtractedIdx))) &&
+      ExtractedIdx < ExtVecOp->getType()->getVectorNumElements()) {
     // TODO: Looking at the user(s) to determine if this insert is a
     // fold-to-shuffle opportunity does not match the usual instcombine
     // constraints. We should decide if the transform is worthy based only
@@ -943,11 +981,12 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
   if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
     return NewInsElt;
 
-  // Turn a sequence of inserts that broadcasts a scalar into a single
-  // insert + shufflevector.
-  if (Instruction *Broadcast = foldInsSequenceIntoBroadcast(IE))
+  if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
     return Broadcast;
 
+  if (Instruction *Splat = foldInsEltIntoSplat(IE))
+    return Splat;
+
   return nullptr;
 }
 
@@ -1172,7 +1211,14 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
       SmallVector<Value*, 8> NewOps;
       bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
       for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
-        Value *V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
+        Value *V;
+        // Recursively call evaluateInDifferentElementOrder on vector arguments
+        // as well. E.g. GetElementPtr may have scalar operands even if the
+        // return value is a vector, so we need to examine the operand type.
+        if (I->getOperand(i)->getType()->isVectorTy())
+          V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
+        else
+          V = I->getOperand(i);
         NewOps.push_back(V);
         NeedsRebuild |= (V != I->getOperand(i));
       }
@@ -1337,6 +1383,41 @@ static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
   return NewBO;
 }
 
+/// If we have an insert of a scalar to a non-zero element of an undefined
+/// vector and then shuffle that value, that's the same as inserting to the zero
+/// element and shuffling. Splatting from the zero element is recognized as the
+/// canonical form of splat.
+static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
+                                            InstCombiner::BuilderTy &Builder) {
+  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
+  Constant *Mask = Shuf.getMask();
+  Value *X;
+  uint64_t IndexC;
+
+  // Match a shuffle that is a splat to a non-zero element.
+  if (!match(Op0, m_OneUse(m_InsertElement(m_Undef(), m_Value(X),
+                                           m_ConstantInt(IndexC)))) ||
+      !match(Op1, m_Undef()) || match(Mask, m_ZeroInt()) || IndexC == 0)
+    return nullptr;
+
+  // Insert into element 0 of an undef vector.
+  UndefValue *UndefVec = UndefValue::get(Shuf.getType());
+  Constant *Zero = Builder.getInt32(0);
+  Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);
+
+  // Splat from element 0. Any mask element that is undefined remains undefined.
+  // For example:
+  // shuf (inselt undef, X, 2), undef, <2,2,undef>
+  //   --> shuf (inselt undef, X, 0), undef, <0,0,undef>
+  unsigned NumMaskElts = Shuf.getType()->getVectorNumElements();
+  SmallVector<Constant *, 16> NewMask(NumMaskElts, Zero);
+  for (unsigned i = 0; i != NumMaskElts; ++i)
+    if (isa<UndefValue>(Mask->getAggregateElement(i)))
+      NewMask[i] = Mask->getAggregateElement(i);
+
+  return new ShuffleVectorInst(NewIns, UndefVec, ConstantVector::get(NewMask));
+}
+
 /// Try to fold shuffles that are the equivalent of a vector select.
 static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
                                       InstCombiner::BuilderTy &Builder,
@@ -1344,6 +1425,15 @@ static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
   if (!Shuf.isSelect())
     return nullptr;
 
+  // Canonicalize to choose from operand 0 first.
+  unsigned NumElts = Shuf.getType()->getVectorNumElements();
+  if (Shuf.getMaskValue(0) >= (int)NumElts) {
+    // TODO: Can we assert that both operands of a shuffle-select are not undef
+    // (otherwise, it would have been folded by instsimplify?
+    Shuf.commute();
+    return &Shuf;
+  }
+
   if (Instruction *I = foldSelectShuffleWith1Binop(Shuf))
     return I;
 
@@ -1499,6 +1589,11 @@ static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
   if (!match(Op0, m_ShuffleVector(m_Value(X), m_Value(Y), m_Constant(Mask))))
     return nullptr;
 
+  // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
+  // then combining may result in worse codegen.
+  if (!Op0->hasOneUse())
+    return nullptr;
+
   // We are extracting a subvector from a shuffle. Remove excess elements from
   // the 1st shuffle mask to eliminate the extract.
   //
@@ -1588,6 +1683,72 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf) {
   return nullptr;
 }
 
+static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
+  // Match the operands as identity with padding (also known as concatenation
+  // with undef) shuffles of the same source type. The backend is expected to
+  // recreate these concatenations from a shuffle of narrow operands.
+  auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
+  auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
+  if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
+      !Shuffle1 || !Shuffle1->isIdentityWithPadding())
+    return nullptr;
+
+  // We limit this transform to power-of-2 types because we expect that the
+  // backend can convert the simplified IR patterns to identical nodes as the
+  // original IR.
+  // TODO: If we can verify the same behavior for arbitrary types, the
+  //       power-of-2 checks can be removed.
+  Value *X = Shuffle0->getOperand(0);
+  Value *Y = Shuffle1->getOperand(0);
+  if (X->getType() != Y->getType() ||
+      !isPowerOf2_32(Shuf.getType()->getVectorNumElements()) ||
+      !isPowerOf2_32(Shuffle0->getType()->getVectorNumElements()) ||
+      !isPowerOf2_32(X->getType()->getVectorNumElements()) ||
+      isa<UndefValue>(X) || isa<UndefValue>(Y))
+    return nullptr;
+  assert(isa<UndefValue>(Shuffle0->getOperand(1)) &&
+         isa<UndefValue>(Shuffle1->getOperand(1)) &&
+         "Unexpected operand for identity shuffle");
+
+  // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
+  // operands directly by adjusting the shuffle mask to account for the narrower
+  // types:
+  // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
+  int NarrowElts = X->getType()->getVectorNumElements();
+  int WideElts = Shuffle0->getType()->getVectorNumElements();
+  assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
+
+  Type *I32Ty = IntegerType::getInt32Ty(Shuf.getContext());
+  SmallVector<int, 16> Mask = Shuf.getShuffleMask();
+  SmallVector<Constant *, 16> NewMask(Mask.size(), UndefValue::get(I32Ty));
+  for (int i = 0, e = Mask.size(); i != e; ++i) {
+    if (Mask[i] == -1)
+      continue;
+
+    // If this shuffle is choosing an undef element from 1 of the sources, that
+    // element is undef.
+    if (Mask[i] < WideElts) {
+      if (Shuffle0->getMaskValue(Mask[i]) == -1)
+        continue;
+    } else {
+      if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
+        continue;
+    }
+
+    // If this shuffle is choosing from the 1st narrow op, the mask element is
+    // the same. If this shuffle is choosing from the 2nd narrow op, the mask
+    // element is offset down to adjust for the narrow vector widths.
+    if (Mask[i] < WideElts) {
+      assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
+      NewMask[i] = ConstantInt::get(I32Ty, Mask[i]);
+    } else {
+      assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
+      NewMask[i] = ConstantInt::get(I32Ty, Mask[i] - (WideElts - NarrowElts));
+    }
+  }
+  return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
+}
+
 Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   Value *LHS = SVI.getOperand(0);
   Value *RHS = SVI.getOperand(1);
@@ -1595,36 +1756,12 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
           LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI)))
     return replaceInstUsesWith(SVI, V);
 
-  if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
-    return I;
-
-  if (Instruction *I = narrowVectorSelect(SVI, Builder))
-    return I;
-
+  // Canonicalize shuffle(x    ,x,mask) -> shuffle(x, undef,mask')
+  // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
   unsigned VWidth = SVI.getType()->getVectorNumElements();
-  APInt UndefElts(VWidth, 0);
-  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
-  if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
-    if (V != &SVI)
-      return replaceInstUsesWith(SVI, V);
-    return &SVI;
-  }
-
-  if (Instruction *I = foldIdentityExtractShuffle(SVI))
-    return I;
-
-  // This transform has the potential to lose undef knowledge, so it is
-  // intentionally placed after SimplifyDemandedVectorElts().
-  if (Instruction *I = foldShuffleWithInsert(SVI))
-    return I;
-
+  unsigned LHSWidth = LHS->getType()->getVectorNumElements();
   SmallVector<int, 16> Mask = SVI.getShuffleMask();
   Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
-  unsigned LHSWidth = LHS->getType()->getVectorNumElements();
-  bool MadeChange = false;
-
-  // Canonicalize shuffle(x    ,x,mask) -> shuffle(x, undef,mask')
-  // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
   if (LHS == RHS || isa<UndefValue>(LHS)) {
     // Remap any references to RHS to use LHS.
     SmallVector<Constant*, 16> Elts;
@@ -1646,11 +1783,36 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
     SVI.setOperand(0, SVI.getOperand(1));
     SVI.setOperand(1, UndefValue::get(RHS->getType()));
     SVI.setOperand(2, ConstantVector::get(Elts));
-    LHS = SVI.getOperand(0);
-    RHS = SVI.getOperand(1);
-    MadeChange = true;
+    return &SVI;
   }
 
+  if (Instruction *I = canonicalizeInsertSplat(SVI, Builder))
+    return I;
+
+  if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
+    return I;
+
+  if (Instruction *I = narrowVectorSelect(SVI, Builder))
+    return I;
+
+  APInt UndefElts(VWidth, 0);
+  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+  if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+    if (V != &SVI)
+      return replaceInstUsesWith(SVI, V);
+    return &SVI;
+  }
+
+  if (Instruction *I = foldIdentityExtractShuffle(SVI))
+    return I;
+
+  // These transforms have the potential to lose undef knowledge, so they are
+  // intentionally placed after SimplifyDemandedVectorElts().
+  if (Instruction *I = foldShuffleWithInsert(SVI))
+    return I;
+  if (Instruction *I = foldIdentityPaddedShuffles(SVI))
+    return I;
+
   if (VWidth == LHSWidth) {
     // Analyze the shuffle, are the LHS or RHS and identity shuffles?
     bool isLHSID, isRHSID;
@@ -1695,6 +1857,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   //                +-----------+-----------+-----------+-----------+
   // Index range [6,10):              ^-----------^ Needs an extra shuffle.
   // Target type i40:           ^--------------^ Won't work, bail.
+  bool MadeChange = false;
   if (isShuffleExtractingFromLHS(SVI, Mask)) {
     Value *V = LHS;
     unsigned MaskElems = Mask.size();
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index be7d43bbcf2c..385f4926b845 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1,9 +1,8 @@
 //===- InstructionCombining.cpp - Combine multiple instructions -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -47,14 +46,17 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetFolder.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -221,6 +223,11 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
   return !Overflow;
 }
 
+static bool hasNoUnsignedWrap(BinaryOperator &I) {
+  OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
+  return OBO && OBO->hasNoUnsignedWrap();
+}
+
 /// Conservatively clears subclassOptionalData after a reassociation or
 /// commutation. We preserve fast-math flags when applicable as they can be
 /// preserved.
@@ -327,14 +334,19 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
           I.setOperand(1, V);
           // Conservatively clear the optional flags, since they may not be
           // preserved by the reassociation.
-          if (MaintainNoSignedWrap(I, B, C) &&
+          bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
+          bool IsNSW = MaintainNoSignedWrap(I, B, C);
+
+          ClearSubclassDataAfterReassociation(I);
+
+          if (IsNUW)
+            I.setHasNoUnsignedWrap(true);
+
+          if (IsNSW &&
               (!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) {
             // Note: this is only valid because SimplifyBinOp doesn't look at
             // the operands to Op0.
-            I.clearSubclassOptionalData();
             I.setHasNoSignedWrap(true);
-          } else {
-            ClearSubclassDataAfterReassociation(I);
           }
 
           Changed = true;
@@ -419,8 +431,14 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
           Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
           match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
           match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2))))) {
-        BinaryOperator *NewBO = BinaryOperator::Create(Opcode, A, B);
-        if (isa<FPMathOperator>(NewBO)) {
+        bool IsNUW = hasNoUnsignedWrap(I) &&
+           hasNoUnsignedWrap(*Op0) &&
+           hasNoUnsignedWrap(*Op1);
+         BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
+           BinaryOperator::CreateNUW(Opcode, A, B) :
+           BinaryOperator::Create(Opcode, A, B);
+
+         if (isa<FPMathOperator>(NewBO)) {
           FastMathFlags Flags = I.getFastMathFlags();
           Flags &= Op0->getFastMathFlags();
           Flags &= Op1->getFastMathFlags();
@@ -433,6 +451,8 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
         // Conservatively clear the optional flags, since they may not be
         // preserved by the reassociation.
         ClearSubclassDataAfterReassociation(I);
+        if (IsNUW)
+          I.setHasNoUnsignedWrap(true);
 
         Changed = true;
         continue;
@@ -570,32 +590,44 @@ Value *InstCombiner::tryFactorization(BinaryOperator &I,
     ++NumFactor;
     SimplifiedInst->takeName(&I);
 
-    // Check if we can add NSW flag to SimplifiedInst. If so, set NSW flag.
-    // TODO: Check for NUW.
+    // Check if we can add NSW/NUW flags to SimplifiedInst. If so, set them.
     if (BinaryOperator *BO = dyn_cast<BinaryOperator>(SimplifiedInst)) {
       if (isa<OverflowingBinaryOperator>(SimplifiedInst)) {
         bool HasNSW = false;
-        if (isa<OverflowingBinaryOperator>(&I))
+        bool HasNUW = false;
+        if (isa<OverflowingBinaryOperator>(&I)) {
           HasNSW = I.hasNoSignedWrap();
+          HasNUW = I.hasNoUnsignedWrap();
+        }
 
-        if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS))
+        if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
           HasNSW &= LOBO->hasNoSignedWrap();
+          HasNUW &= LOBO->hasNoUnsignedWrap();
+        }
 
-        if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS))
+        if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
           HasNSW &= ROBO->hasNoSignedWrap();
+          HasNUW &= ROBO->hasNoUnsignedWrap();
+        }
 
-        // We can propagate 'nsw' if we know that
-        //  %Y = mul nsw i16 %X, C
-        //  %Z = add nsw i16 %Y, %X
-        // =>
-        //  %Z = mul nsw i16 %X, C+1
-        //
-        // iff C+1 isn't INT_MIN
         const APInt *CInt;
         if (TopLevelOpcode == Instruction::Add &&
-            InnerOpcode == Instruction::Mul)
-          if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
-            BO->setHasNoSignedWrap(HasNSW);
+            InnerOpcode == Instruction::Mul) {
+          // We can propagate 'nsw' if we know that
+          //  %Y = mul nsw i16 %X, C
+          //  %Z = add nsw i16 %Y, %X
+          // =>
+          //  %Z = mul nsw i16 %X, C+1
+          //
+          // iff C+1 isn't INT_MIN
+          if (match(V, m_APInt(CInt))) {
+            if (!CInt->isMinSignedValue())
+              BO->setHasNoSignedWrap(HasNSW);
+          }
+
+          // nuw can be propagated with any constant or nuw value.
+          BO->setHasNoUnsignedWrap(HasNUW);
+        }
       }
     }
   }
@@ -922,8 +954,8 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
 
     // If the InVal is an invoke at the end of the pred block, then we can't
     // insert a computation after it without breaking the edge.
-    if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
-      if (II->getParent() == NonConstBB)
+    if (isa<InvokeInst>(InVal))
+      if (cast<Instruction>(InVal)->getParent() == NonConstBB)
         return nullptr;
 
     // If the incoming non-constant value is in I's block, we will remove one
@@ -1376,7 +1408,8 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
   if (match(LHS, m_ShuffleVector(m_Value(L0), m_Value(L1), m_Constant(Mask))) &&
       match(RHS, m_ShuffleVector(m_Value(R0), m_Value(R1), m_Specific(Mask))) &&
       LHS->hasOneUse() && RHS->hasOneUse() &&
-      cast<ShuffleVectorInst>(LHS)->isConcat()) {
+      cast<ShuffleVectorInst>(LHS)->isConcat() &&
+      cast<ShuffleVectorInst>(RHS)->isConcat()) {
     // This transform does not have the speculative execution constraint as
     // below because the shuffle is a concatenation. The new binops are
     // operating on exactly the same elements as the existing binop.
@@ -1415,6 +1448,30 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
     return createBinOpShuffle(V1, V2, Mask);
   }
 
+  // If both arguments of a commutative binop are select-shuffles that use the
+  // same mask with commuted operands, the shuffles are unnecessary.
+  if (Inst.isCommutative() &&
+      match(LHS, m_ShuffleVector(m_Value(V1), m_Value(V2), m_Constant(Mask))) &&
+      match(RHS, m_ShuffleVector(m_Specific(V2), m_Specific(V1),
+                                 m_Specific(Mask)))) {
+    auto *LShuf = cast<ShuffleVectorInst>(LHS);
+    auto *RShuf = cast<ShuffleVectorInst>(RHS);
+    // TODO: Allow shuffles that contain undefs in the mask?
+    //       That is legal, but it reduces undef knowledge.
+    // TODO: Allow arbitrary shuffles by shuffling after binop?
+    //       That might be legal, but we have to deal with poison.
+    if (LShuf->isSelect() && !LShuf->getMask()->containsUndefElement() &&
+        RShuf->isSelect() && !RShuf->getMask()->containsUndefElement()) {
+      // Example:
+      // LHS = shuffle V1, V2, <0, 5, 6, 3>
+      // RHS = shuffle V2, V1, <0, 5, 6, 3>
+      // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
+      Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
+      NewBO->copyIRFlags(&Inst);
+      return NewBO;
+    }
+  }
+
   // If one argument is a shuffle within one vector and the other is a constant,
   // try moving the shuffle after the binary operation. This canonicalization
   // intends to move shuffles closer to other shuffles and binops closer to
@@ -1557,6 +1614,23 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   if (Value *V = SimplifyGEPInst(GEPEltType, Ops, SQ.getWithInstruction(&GEP)))
     return replaceInstUsesWith(GEP, V);
 
+  // For vector geps, use the generic demanded vector support.
+  if (GEP.getType()->isVectorTy()) {
+    auto VWidth = GEP.getType()->getVectorNumElements();
+    APInt UndefElts(VWidth, 0);
+    APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+    if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
+                                              UndefElts)) {
+      if (V != &GEP)
+        return replaceInstUsesWith(GEP, V);
+      return &GEP;
+    }
+
+    // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
+    // possible (decide on canonical form for pointer broadcast), 3) exploit
+    // undef elements to decrease demanded bits  
+  }
+
   Value *PtrOp = GEP.getOperand(0);
 
   // Eliminate unneeded casts for indices, and replace indices which displace
@@ -1755,9 +1829,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
             // put NewSrc at same location as %src
             Builder.SetInsertPoint(cast<Instruction>(PtrOp));
             auto *NewSrc = cast<GetElementPtrInst>(
-                Builder.CreateGEP(SO0, GO1, Src->getName()));
+                Builder.CreateGEP(GEPEltType, SO0, GO1, Src->getName()));
             NewSrc->setIsInBounds(Src->isInBounds());
-            auto *NewGEP = GetElementPtrInst::Create(nullptr, NewSrc, {SO1});
+            auto *NewGEP = GetElementPtrInst::Create(GEPEltType, NewSrc, {SO1});
             NewGEP->setIsInBounds(GEP.isInBounds());
             return NewGEP;
           }
@@ -1881,6 +1955,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 
   if (StrippedPtr != PtrOp) {
     bool HasZeroPointerIndex = false;
+    Type *StrippedPtrEltTy = StrippedPtrTy->getElementType();
+
     if (auto *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
       HasZeroPointerIndex = C->isZero();
 
@@ -1894,11 +1970,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     if (HasZeroPointerIndex) {
       if (auto *CATy = dyn_cast<ArrayType>(GEPEltType)) {
         // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
-        if (CATy->getElementType() == StrippedPtrTy->getElementType()) {
+        if (CATy->getElementType() == StrippedPtrEltTy) {
           // -> GEP i8* X, ...
           SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
           GetElementPtrInst *Res = GetElementPtrInst::Create(
-              StrippedPtrTy->getElementType(), StrippedPtr, Idx, GEP.getName());
+              StrippedPtrEltTy, StrippedPtr, Idx, GEP.getName());
           Res->setIsInBounds(GEP.isInBounds());
           if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
             return Res;
@@ -1911,7 +1987,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
           return new AddrSpaceCastInst(Builder.Insert(Res), GEPType);
         }
 
-        if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrTy->getElementType())) {
+        if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrEltTy)) {
           // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
           if (CATy->getElementType() == XATy->getElementType()) {
             // -> GEP [10 x i8]* X, i32 0, ...
@@ -1934,11 +2010,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
             // %0 = GEP [10 x i8] addrspace(1)* X, ...
             // addrspacecast i8 addrspace(1)* %0 to i8*
             SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
-            Value *NewGEP = GEP.isInBounds()
-                                ? Builder.CreateInBoundsGEP(
-                                      nullptr, StrippedPtr, Idx, GEP.getName())
-                                : Builder.CreateGEP(nullptr, StrippedPtr, Idx,
-                                                    GEP.getName());
+            Value *NewGEP =
+                GEP.isInBounds()
+                    ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
+                                                Idx, GEP.getName())
+                    : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
+                                        GEP.getName());
             return new AddrSpaceCastInst(NewGEP, GEPType);
           }
         }
@@ -1947,17 +2024,17 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // Transform things like:
       // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
       // into:  %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
-      Type *SrcEltTy = StrippedPtrTy->getElementType();
-      if (SrcEltTy->isArrayTy() &&
-          DL.getTypeAllocSize(SrcEltTy->getArrayElementType()) ==
+      if (StrippedPtrEltTy->isArrayTy() &&
+          DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) ==
               DL.getTypeAllocSize(GEPEltType)) {
         Type *IdxType = DL.getIndexType(GEPType);
         Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
         Value *NewGEP =
             GEP.isInBounds()
-                ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, Idx,
+                ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, Idx,
                                             GEP.getName())
-                : Builder.CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName());
+                : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
+                                    GEP.getName());
 
         // V and GEP are both pointer types --> BitCast
         return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType);
@@ -1967,11 +2044,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // %V = mul i64 %N, 4
       // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
       // into:  %t1 = getelementptr i32* %arr, i32 %N; bitcast
-      if (GEPEltType->isSized() && SrcEltTy->isSized()) {
+      if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) {
         // Check that changing the type amounts to dividing the index by a scale
         // factor.
         uint64_t ResSize = DL.getTypeAllocSize(GEPEltType);
-        uint64_t SrcSize = DL.getTypeAllocSize(SrcEltTy);
+        uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy);
         if (ResSize && SrcSize % ResSize == 0) {
           Value *Idx = GEP.getOperand(1);
           unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1990,9 +2067,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
             // GEP may not be "inbounds".
             Value *NewGEP =
                 GEP.isInBounds() && NSW
-                    ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx,
-                                                GEP.getName())
-                    : Builder.CreateGEP(nullptr, StrippedPtr, NewIdx,
+                    ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
+                                                NewIdx, GEP.getName())
+                    : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, NewIdx,
                                         GEP.getName());
 
             // The NewGEP must be pointer typed, so must the old one -> BitCast
@@ -2006,13 +2083,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
       //   (where tmp = 8*tmp2) into:
       // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
-      if (GEPEltType->isSized() && SrcEltTy->isSized() &&
-          SrcEltTy->isArrayTy()) {
+      if (GEPEltType->isSized() && StrippedPtrEltTy->isSized() &&
+          StrippedPtrEltTy->isArrayTy()) {
         // Check that changing to the array element type amounts to dividing the
         // index by a scale factor.
         uint64_t ResSize = DL.getTypeAllocSize(GEPEltType);
         uint64_t ArrayEltSize =
-            DL.getTypeAllocSize(SrcEltTy->getArrayElementType());
+            DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType());
         if (ResSize && ArrayEltSize % ResSize == 0) {
           Value *Idx = GEP.getOperand(1);
           unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -2032,11 +2109,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
             Type *IndTy = DL.getIndexType(GEPType);
             Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx};
 
-            Value *NewGEP = GEP.isInBounds() && NSW
-                                ? Builder.CreateInBoundsGEP(
-                                      SrcEltTy, StrippedPtr, Off, GEP.getName())
-                                : Builder.CreateGEP(SrcEltTy, StrippedPtr, Off,
-                                                    GEP.getName());
+            Value *NewGEP =
+                GEP.isInBounds() && NSW
+                    ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
+                                                Off, GEP.getName())
+                    : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Off,
+                                        GEP.getName());
             // The NewGEP must be pointer typed, so must the old one -> BitCast
             return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
                                                                  GEPType);
@@ -2084,8 +2162,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // constructing an AddrSpaceCastInst
       Value *NGEP =
           GEP.isInBounds()
-              ? Builder.CreateInBoundsGEP(nullptr, SrcOp, {Ops[1], Ops[2]})
-              : Builder.CreateGEP(nullptr, SrcOp, {Ops[1], Ops[2]});
+              ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, {Ops[1], Ops[2]})
+              : Builder.CreateGEP(SrcEltType, SrcOp, {Ops[1], Ops[2]});
       NGEP->takeName(&GEP);
 
       // Preserve GEP address space to satisfy users
@@ -2132,8 +2210,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       if (FindElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices)) {
         Value *NGEP =
             GEP.isInBounds()
-                ? Builder.CreateInBoundsGEP(nullptr, SrcOp, NewIndices)
-                : Builder.CreateGEP(nullptr, SrcOp, NewIndices);
+                ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, NewIndices)
+                : Builder.CreateGEP(SrcEltType, SrcOp, NewIndices);
 
         if (NGEP->getType() == GEPType)
           return replaceInstUsesWith(GEP, NGEP);
@@ -2159,7 +2237,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         APInt AllocSize(IdxWidth, DL.getTypeAllocSize(AI->getAllocatedType()));
         if (BasePtrOffset.ule(AllocSize)) {
           return GetElementPtrInst::CreateInBounds(
-              PtrOp, makeArrayRef(Ops).slice(1), GEP.getName());
+              GEP.getSourceElementType(), PtrOp, makeArrayRef(Ops).slice(1),
+              GEP.getName());
         }
       }
     }
@@ -2296,8 +2375,8 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
 
       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
         if (II->getIntrinsicID() == Intrinsic::objectsize) {
-          ConstantInt *Result = lowerObjectSizeCall(II, DL, &TLI,
-                                                    /*MustSucceed=*/true);
+          Value *Result =
+              lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/true);
           replaceInstUsesWith(*I, Result);
           eraseInstFromFunction(*I);
           Users[i] = nullptr; // Skip examining in the next loop.
@@ -2426,9 +2505,8 @@ Instruction *InstCombiner::visitFree(CallInst &FI) {
 
   // free undef -> unreachable.
   if (isa<UndefValue>(Op)) {
-    // Insert a new store to null because we cannot modify the CFG here.
-    Builder.CreateStore(ConstantInt::getTrue(FI.getContext()),
-                        UndefValue::get(Type::getInt1PtrTy(FI.getContext())));
+    // Leave a marker since we can't modify the CFG here.
+    CreateNonTerminatorUnreachable(&FI);
     return eraseInstFromFunction(FI);
   }
 
@@ -2618,53 +2696,28 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
       return ExtractValueInst::Create(IV->getInsertedValueOperand(),
                                       makeArrayRef(exti, exte));
   }
-  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
-    // We're extracting from an intrinsic, see if we're the only user, which
-    // allows us to simplify multiple result intrinsics to simpler things that
-    // just get one value.
-    if (II->hasOneUse()) {
-      // Check if we're grabbing the overflow bit or the result of a 'with
-      // overflow' intrinsic.  If it's the latter we can remove the intrinsic
+  if (WithOverflowInst *WO = dyn_cast<WithOverflowInst>(Agg)) {
+    // We're extracting from an overflow intrinsic, see if we're the only user,
+    // which allows us to simplify multiple result intrinsics to simpler
+    // things that just get one value.
+    if (WO->hasOneUse()) {
+      // Check if we're grabbing only the result of a 'with overflow' intrinsic
       // and replace it with a traditional binary instruction.
-      switch (II->getIntrinsicID()) {
-      case Intrinsic::uadd_with_overflow:
-      case Intrinsic::sadd_with_overflow:
-        if (*EV.idx_begin() == 0) {  // Normal result.
-          Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
-          replaceInstUsesWith(*II, UndefValue::get(II->getType()));
-          eraseInstFromFunction(*II);
-          return BinaryOperator::CreateAdd(LHS, RHS);
-        }
-
-        // If the normal result of the add is dead, and the RHS is a constant,
-        // we can transform this into a range comparison.
-        // overflow = uadd a, -4  -->  overflow = icmp ugt a, 3
-        if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow)
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getArgOperand(1)))
-            return new ICmpInst(ICmpInst::ICMP_UGT, II->getArgOperand(0),
-                                ConstantExpr::getNot(CI));
-        break;
-      case Intrinsic::usub_with_overflow:
-      case Intrinsic::ssub_with_overflow:
-        if (*EV.idx_begin() == 0) {  // Normal result.
-          Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
-          replaceInstUsesWith(*II, UndefValue::get(II->getType()));
-          eraseInstFromFunction(*II);
-          return BinaryOperator::CreateSub(LHS, RHS);
-        }
-        break;
-      case Intrinsic::umul_with_overflow:
-      case Intrinsic::smul_with_overflow:
-        if (*EV.idx_begin() == 0) {  // Normal result.
-          Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
-          replaceInstUsesWith(*II, UndefValue::get(II->getType()));
-          eraseInstFromFunction(*II);
-          return BinaryOperator::CreateMul(LHS, RHS);
-        }
-        break;
-      default:
-        break;
+      if (*EV.idx_begin() == 0) {
+        Instruction::BinaryOps BinOp = WO->getBinaryOp();
+        Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
+        replaceInstUsesWith(*WO, UndefValue::get(WO->getType()));
+        eraseInstFromFunction(*WO);
+        return BinaryOperator::Create(BinOp, LHS, RHS);
       }
+
+      // If the normal result of the add is dead, and the RHS is a constant,
+      // we can transform this into a range comparison.
+      // overflow = uadd a, -4  -->  overflow = icmp ugt a, 3
+      if (WO->getIntrinsicID() == Intrinsic::uadd_with_overflow)
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(WO->getRHS()))
+          return new ICmpInst(ICmpInst::ICMP_UGT, WO->getLHS(),
+                              ConstantExpr::getNot(CI));
     }
   }
   if (LoadInst *L = dyn_cast<LoadInst>(Agg))
@@ -2687,7 +2740,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
       Builder.SetInsertPoint(L);
       Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
                                              L->getPointerOperand(), Indices);
-      Instruction *NL = Builder.CreateLoad(GEP);
+      Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);
       // Whatever aliasing information we had for the orignal load must also
       // hold for the smaller load, so propagate the annotations.
       AAMDNodes Nodes;
@@ -3065,9 +3118,11 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
       I->isTerminator())
     return false;
 
-  // Do not sink alloca instructions out of the entry block.
-  if (isa<AllocaInst>(I) && I->getParent() ==
-        &DestBlock->getParent()->getEntryBlock())
+  // Do not sink static or dynamic alloca instructions. Static allocas must
+  // remain in the entry block, and dynamic allocas must not be sunk in between
+  // a stacksave / stackrestore pair, which would incorrectly shorten its
+  // lifetime.
+  if (isa<AllocaInst>(I))
     return false;
 
   // Do not sink into catchswitch blocks.
@@ -3093,13 +3148,35 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
   ++NumSunkInst;
 
   // Also sink all related debug uses from the source basic block. Otherwise we
-  // get debug use before the def.
-  SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
+  // get debug use before the def. Attempt to salvage debug uses first, to
+  // maximise the range variables have location for. If we cannot salvage, then
+  // mark the location undef: we know it was supposed to receive a new location
+  // here, but that computation has been sunk.
+  SmallVector<DbgVariableIntrinsic *, 2> DbgUsers;
   findDbgUsers(DbgUsers, I);
-  for (auto *DII : DbgUsers) {
+  for (auto *DII : reverse(DbgUsers)) {
     if (DII->getParent() == SrcBlock) {
-      DII->moveBefore(&*InsertPos);
-      LLVM_DEBUG(dbgs() << "SINK: " << *DII << '\n');
+      // dbg.value is in the same basic block as the sunk inst, see if we can
+      // salvage it. Clone a new copy of the instruction: on success we need
+      // both salvaged and unsalvaged copies.
+      SmallVector<DbgVariableIntrinsic *, 1> TmpUser{
+          cast<DbgVariableIntrinsic>(DII->clone())};
+
+      if (!salvageDebugInfoForDbgValues(*I, TmpUser)) {
+        // We are unable to salvage: sink the cloned dbg.value, and mark the
+        // original as undef, terminating any earlier variable location.
+        LLVM_DEBUG(dbgs() << "SINK: " << *DII << '\n');
+        TmpUser[0]->insertBefore(&*InsertPos);
+        Value *Undef = UndefValue::get(I->getType());
+        DII->setOperand(0, MetadataAsValue::get(DII->getContext(),
+                                                ValueAsMetadata::get(Undef)));
+      } else {
+        // We successfully salvaged: place the salvaged dbg.value in the
+        // original location, and move the unmodified dbg.value to sink with
+        // the sunk inst.
+        TmpUser[0]->insertBefore(DII);
+        DII->moveBefore(&*InsertPos);
+      }
     }
   }
   return true;
@@ -3294,7 +3371,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
       if (isInstructionTriviallyDead(Inst, TLI)) {
         ++NumDeadInst;
         LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
-        salvageDebugInfo(*Inst);
+        if (!salvageDebugInfo(*Inst))
+          replaceDbgUsesWithUndef(Inst);
         Inst->eraseFromParent();
         MadeIRChange = true;
         continue;
@@ -3407,7 +3485,8 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
 static bool combineInstructionsOverFunction(
     Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA,
     AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
-    OptimizationRemarkEmitter &ORE, bool ExpensiveCombines = true,
+    OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
+    ProfileSummaryInfo *PSI, bool ExpensiveCombines = true,
     LoopInfo *LI = nullptr) {
   auto &DL = F.getParent()->getDataLayout();
   ExpensiveCombines |= EnableExpensiveCombines;
@@ -3437,8 +3516,8 @@ static bool combineInstructionsOverFunction(
 
     MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
 
-    InstCombiner IC(Worklist, Builder, F.optForMinSize(), ExpensiveCombines, AA,
-                    AC, TLI, DT, ORE, DL, LI);
+    InstCombiner IC(Worklist, Builder, F.hasMinSize(), ExpensiveCombines, AA,
+                    AC, TLI, DT, ORE, BFI, PSI, DL, LI);
     IC.MaxArraySizeForCombine = MaxArraySize;
 
     if (!IC.run())
@@ -3458,8 +3537,15 @@ PreservedAnalyses InstCombinePass::run(Function &F,
   auto *LI = AM.getCachedResult<LoopAnalysis>(F);
 
   auto *AA = &AM.getResult<AAManager>(F);
+  const ModuleAnalysisManager &MAM =
+      AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+  ProfileSummaryInfo *PSI =
+      MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+      &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
+
   if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE,
-                                       ExpensiveCombines, LI))
+                                       BFI, PSI, ExpensiveCombines, LI))
     // No changes, all analyses are preserved.
     return PreservedAnalyses::all();
 
@@ -3483,6 +3569,8 @@ void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<AAResultsWrapperPass>();
   AU.addPreserved<BasicAAWrapperPass>();
   AU.addPreserved<GlobalsAAWrapperPass>();
+  AU.addRequired<ProfileSummaryInfoWrapperPass>();
+  LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
 }
 
 bool InstructionCombiningPass::runOnFunction(Function &F) {
@@ -3499,9 +3587,15 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
   // Optional analyses.
   auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
   auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+  ProfileSummaryInfo *PSI =
+      &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+  BlockFrequencyInfo *BFI =
+      (PSI && PSI->hasProfileSummary()) ?
+      &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
+      nullptr;
 
   return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE,
-                                         ExpensiveCombines, LI);
+                                         BFI, PSI, ExpensiveCombines, LI);
 }
 
 char InstructionCombiningPass::ID = 0;
@@ -3514,6 +3608,8 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
                     "Combine redundant instructions", false, false)
 
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index f1558c75cb90..6821e214e921 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1,9 +1,8 @@
 //===- AddressSanitizer.cpp - memory error detector -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,6 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
@@ -25,7 +25,6 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/IR/Argument.h"
@@ -72,6 +71,7 @@
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include <algorithm>
@@ -94,9 +94,6 @@ static const uint64_t kDefaultShadowOffset32 = 1ULL << 29;
 static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
 static const uint64_t kDynamicShadowSentinel =
     std::numeric_limits<uint64_t>::max();
-static const uint64_t kIOSShadowOffset32 = 1ULL << 30;
-static const uint64_t kIOSSimShadowOffset32 = 1ULL << 30;
-static const uint64_t kIOSSimShadowOffset64 = kDefaultShadowOffset64;
 static const uint64_t kSmallX86_64ShadowOffsetBase = 0x7FFFFFFF;  // < 2G.
 static const uint64_t kSmallX86_64ShadowOffsetAlignMask = ~0xFFFULL;
 static const uint64_t kLinuxKasan_ShadowOffset64 = 0xdffffc0000000000;
@@ -112,6 +109,7 @@ static const uint64_t kNetBSD_ShadowOffset64 = 1ULL << 46;
 static const uint64_t kNetBSDKasan_ShadowOffset64 = 0xdfff900000000000;
 static const uint64_t kPS4CPU_ShadowOffset64 = 1ULL << 40;
 static const uint64_t kWindowsShadowOffset32 = 3ULL << 28;
+static const uint64_t kEmscriptenShadowOffset = 0;
 
 static const uint64_t kMyriadShadowScale = 5;
 static const uint64_t kMyriadMemoryOffset32 = 0x80000000ULL;
@@ -275,6 +273,16 @@ static cl::opt<bool> ClInvalidPointerPairs(
     cl::desc("Instrument <, <=, >, >=, - with pointer operands"), cl::Hidden,
     cl::init(false));
 
+static cl::opt<bool> ClInvalidPointerCmp(
+    "asan-detect-invalid-pointer-cmp",
+    cl::desc("Instrument <, <=, >, >= with pointer operands"), cl::Hidden,
+    cl::init(false));
+
+static cl::opt<bool> ClInvalidPointerSub(
+    "asan-detect-invalid-pointer-sub",
+    cl::desc("Instrument - operations with pointer operands"), cl::Hidden,
+    cl::init(false));
+
 static cl::opt<unsigned> ClRealignStack(
     "asan-realign-stack",
     cl::desc("Realign stack to the value of this flag (power of two)"),
@@ -311,10 +319,10 @@ static cl::opt<int> ClMappingScale("asan-mapping-scale",
                                    cl::desc("scale of asan shadow mapping"),
                                    cl::Hidden, cl::init(0));
 
-static cl::opt<unsigned long long> ClMappingOffset(
-    "asan-mapping-offset",
-    cl::desc("offset of asan shadow mapping [EXPERIMENTAL]"), cl::Hidden,
-    cl::init(0));
+static cl::opt<uint64_t>
+    ClMappingOffset("asan-mapping-offset",
+                    cl::desc("offset of asan shadow mapping [EXPERIMENTAL]"),
+                    cl::Hidden, cl::init(0));
 
 // Optimization flags. Not user visible, used mostly for testing
 // and benchmarking the tool.
@@ -393,87 +401,6 @@ STATISTIC(NumOptimizedAccessesToStackVar,
 
 namespace {
 
-/// Frontend-provided metadata for source location.
-struct LocationMetadata {
-  StringRef Filename;
-  int LineNo = 0;
-  int ColumnNo = 0;
-
-  LocationMetadata() = default;
-
-  bool empty() const { return Filename.empty(); }
-
-  void parse(MDNode *MDN) {
-    assert(MDN->getNumOperands() == 3);
-    MDString *DIFilename = cast<MDString>(MDN->getOperand(0));
-    Filename = DIFilename->getString();
-    LineNo =
-        mdconst::extract<ConstantInt>(MDN->getOperand(1))->getLimitedValue();
-    ColumnNo =
-        mdconst::extract<ConstantInt>(MDN->getOperand(2))->getLimitedValue();
-  }
-};
-
-/// Frontend-provided metadata for global variables.
-class GlobalsMetadata {
-public:
-  struct Entry {
-    LocationMetadata SourceLoc;
-    StringRef Name;
-    bool IsDynInit = false;
-    bool IsBlacklisted = false;
-
-    Entry() = default;
-  };
-
-  GlobalsMetadata() = default;
-
-  void reset() {
-    inited_ = false;
-    Entries.clear();
-  }
-
-  void init(Module &M) {
-    assert(!inited_);
-    inited_ = true;
-    NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals");
-    if (!Globals) return;
-    for (auto MDN : Globals->operands()) {
-      // Metadata node contains the global and the fields of "Entry".
-      assert(MDN->getNumOperands() == 5);
-      auto *V = mdconst::extract_or_null<Constant>(MDN->getOperand(0));
-      // The optimizer may optimize away a global entirely.
-      if (!V) continue;
-      auto *StrippedV = V->stripPointerCasts();
-      auto *GV = dyn_cast<GlobalVariable>(StrippedV);
-      if (!GV) continue;
-      // We can already have an entry for GV if it was merged with another
-      // global.
-      Entry &E = Entries[GV];
-      if (auto *Loc = cast_or_null<MDNode>(MDN->getOperand(1)))
-        E.SourceLoc.parse(Loc);
-      if (auto *Name = cast_or_null<MDString>(MDN->getOperand(2)))
-        E.Name = Name->getString();
-      ConstantInt *IsDynInit =
-          mdconst::extract<ConstantInt>(MDN->getOperand(3));
-      E.IsDynInit |= IsDynInit->isOne();
-      ConstantInt *IsBlacklisted =
-          mdconst::extract<ConstantInt>(MDN->getOperand(4));
-      E.IsBlacklisted |= IsBlacklisted->isOne();
-    }
-  }
-
-  /// Returns metadata entry for a given global.
-  Entry get(GlobalVariable *G) const {
-    auto Pos = Entries.find(G);
-    return (Pos != Entries.end()) ? Pos->second : Entry();
-  }
-
-private:
-  bool inited_ = false;
-  DenseMap<GlobalVariable *, Entry> Entries;
-};
-
 /// This struct defines the shadow mapping using the rule:
 ///   shadow = (mem >> Scale) ADD-or-OR Offset.
 /// If InGlobal is true, then
@@ -499,7 +426,6 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
   bool IsPPC64 = TargetTriple.getArch() == Triple::ppc64 ||
                  TargetTriple.getArch() == Triple::ppc64le;
   bool IsSystemZ = TargetTriple.getArch() == Triple::systemz;
-  bool IsX86 = TargetTriple.getArch() == Triple::x86;
   bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
   bool IsMIPS32 = TargetTriple.isMIPS32();
   bool IsMIPS64 = TargetTriple.isMIPS64();
@@ -508,6 +434,7 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
   bool IsWindows = TargetTriple.isOSWindows();
   bool IsFuchsia = TargetTriple.isOSFuchsia();
   bool IsMyriad = TargetTriple.getVendor() == llvm::Triple::Myriad;
+  bool IsEmscripten = TargetTriple.isOSEmscripten();
 
   ShadowMapping Mapping;
 
@@ -526,10 +453,11 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
     else if (IsNetBSD)
       Mapping.Offset = kNetBSD_ShadowOffset32;
     else if (IsIOS)
-      // If we're targeting iOS and x86, the binary is built for iOS simulator.
-      Mapping.Offset = IsX86 ? kIOSSimShadowOffset32 : kIOSShadowOffset32;
+      Mapping.Offset = kDynamicShadowSentinel;
     else if (IsWindows)
       Mapping.Offset = kWindowsShadowOffset32;
+    else if (IsEmscripten)
+      Mapping.Offset = kEmscriptenShadowOffset;
     else if (IsMyriad) {
       uint64_t ShadowOffset = (kMyriadMemoryOffset32 + kMyriadMemorySize32 -
                                (kMyriadMemorySize32 >> Mapping.Scale));
@@ -566,10 +494,7 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
     } else if (IsMIPS64)
       Mapping.Offset = kMIPS64_ShadowOffset64;
     else if (IsIOS)
-      // If we're targeting iOS and x86, the binary is built for iOS simulator.
-      // We are using dynamic shadow offset on the 64-bit devices.
-      Mapping.Offset =
-        IsX86_64 ? kIOSSimShadowOffset64 : kDynamicShadowSentinel;
+      Mapping.Offset = kDynamicShadowSentinel;
     else if (IsAArch64)
       Mapping.Offset = kAArch64_ShadowOffset64;
     else
@@ -607,27 +532,53 @@ static size_t RedzoneSizeForScale(int MappingScale) {
 
 namespace {
 
-/// AddressSanitizer: instrument the code in module to find memory bugs.
-struct AddressSanitizer : public FunctionPass {
-  // Pass identification, replacement for typeid
+/// Module analysis for getting various metadata about the module.
+class ASanGlobalsMetadataWrapperPass : public ModulePass {
+public:
   static char ID;
 
-  explicit AddressSanitizer(bool CompileKernel = false, bool Recover = false,
-                            bool UseAfterScope = false)
-      : FunctionPass(ID), UseAfterScope(UseAfterScope || ClUseAfterScope) {
-    this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
-    this->CompileKernel = ClEnableKasan.getNumOccurrences() > 0 ?
-        ClEnableKasan : CompileKernel;
-    initializeAddressSanitizerPass(*PassRegistry::getPassRegistry());
+  ASanGlobalsMetadataWrapperPass() : ModulePass(ID) {
+    initializeASanGlobalsMetadataWrapperPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M) override {
+    GlobalsMD = GlobalsMetadata(M);
+    return false;
   }
 
   StringRef getPassName() const override {
-    return "AddressSanitizerFunctionPass";
+    return "ASanGlobalsMetadataWrapperPass";
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<TargetLibraryInfoWrapperPass>();
+    AU.setPreservesAll();
+  }
+
+  GlobalsMetadata &getGlobalsMD() { return GlobalsMD; }
+
+private:
+  GlobalsMetadata GlobalsMD;
+};
+
+char ASanGlobalsMetadataWrapperPass::ID = 0;
+
+/// AddressSanitizer: instrument the code in module to find memory bugs.
+struct AddressSanitizer {
+  AddressSanitizer(Module &M, GlobalsMetadata &GlobalsMD,
+                   bool CompileKernel = false, bool Recover = false,
+                   bool UseAfterScope = false)
+      : UseAfterScope(UseAfterScope || ClUseAfterScope), GlobalsMD(GlobalsMD) {
+    this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
+    this->CompileKernel =
+        ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan : CompileKernel;
+
+    C = &(M.getContext());
+    LongSize = M.getDataLayout().getPointerSizeInBits();
+    IntptrTy = Type::getIntNTy(*C, LongSize);
+    TargetTriple = Triple(M.getTargetTriple());
+
+    Mapping = getShadowMapping(TargetTriple, LongSize, this->CompileKernel);
   }
 
   uint64_t getAllocaSizeInBytes(const AllocaInst &AI) const {
@@ -672,14 +623,10 @@ struct AddressSanitizer : public FunctionPass {
                                  Value *SizeArgument, uint32_t Exp);
   void instrumentMemIntrinsic(MemIntrinsic *MI);
   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
-  bool runOnFunction(Function &F) override;
+  bool instrumentFunction(Function &F, const TargetLibraryInfo *TLI);
   bool maybeInsertAsanInitAtFunctionEntry(Function &F);
   void maybeInsertDynamicShadowAtFunctionEntry(Function &F);
   void markEscapedLocalAllocas(Function &F);
-  bool doInitialization(Module &M) override;
-  bool doFinalization(Module &M) override;
-
-  DominatorTree &getDominatorTree() const { return *DT; }
 
 private:
   friend struct FunctionStackPoisoner;
@@ -715,36 +662,68 @@ private:
   bool UseAfterScope;
   Type *IntptrTy;
   ShadowMapping Mapping;
-  DominatorTree *DT;
-  Function *AsanHandleNoReturnFunc;
-  Function *AsanPtrCmpFunction, *AsanPtrSubFunction;
+  FunctionCallee AsanHandleNoReturnFunc;
+  FunctionCallee AsanPtrCmpFunction, AsanPtrSubFunction;
   Constant *AsanShadowGlobal;
 
   // These arrays is indexed by AccessIsWrite, Experiment and log2(AccessSize).
-  Function *AsanErrorCallback[2][2][kNumberOfAccessSizes];
-  Function *AsanMemoryAccessCallback[2][2][kNumberOfAccessSizes];
+  FunctionCallee AsanErrorCallback[2][2][kNumberOfAccessSizes];
+  FunctionCallee AsanMemoryAccessCallback[2][2][kNumberOfAccessSizes];
 
   // These arrays is indexed by AccessIsWrite and Experiment.
-  Function *AsanErrorCallbackSized[2][2];
-  Function *AsanMemoryAccessCallbackSized[2][2];
+  FunctionCallee AsanErrorCallbackSized[2][2];
+  FunctionCallee AsanMemoryAccessCallbackSized[2][2];
 
-  Function *AsanMemmove, *AsanMemcpy, *AsanMemset;
+  FunctionCallee AsanMemmove, AsanMemcpy, AsanMemset;
   InlineAsm *EmptyAsm;
   Value *LocalDynamicShadow = nullptr;
   GlobalsMetadata GlobalsMD;
   DenseMap<const AllocaInst *, bool> ProcessedAllocas;
 };
 
-class AddressSanitizerModule : public ModulePass {
+class AddressSanitizerLegacyPass : public FunctionPass {
 public:
-  // Pass identification, replacement for typeid
   static char ID;
 
-  explicit AddressSanitizerModule(bool CompileKernel = false,
-                                  bool Recover = false,
-                                  bool UseGlobalsGC = true,
-                                  bool UseOdrIndicator = false)
-      : ModulePass(ID), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC),
+  explicit AddressSanitizerLegacyPass(bool CompileKernel = false,
+                                      bool Recover = false,
+                                      bool UseAfterScope = false)
+      : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover),
+        UseAfterScope(UseAfterScope) {
+    initializeAddressSanitizerLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+    return "AddressSanitizerFunctionPass";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<ASanGlobalsMetadataWrapperPass>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+  }
+
+  bool runOnFunction(Function &F) override {
+    GlobalsMetadata &GlobalsMD =
+        getAnalysis<ASanGlobalsMetadataWrapperPass>().getGlobalsMD();
+    const TargetLibraryInfo *TLI =
+        &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+    AddressSanitizer ASan(*F.getParent(), GlobalsMD, CompileKernel, Recover,
+                          UseAfterScope);
+    return ASan.instrumentFunction(F, TLI);
+  }
+
+private:
+  bool CompileKernel;
+  bool Recover;
+  bool UseAfterScope;
+};
+
+class ModuleAddressSanitizer {
+public:
+  ModuleAddressSanitizer(Module &M, GlobalsMetadata &GlobalsMD,
+                         bool CompileKernel = false, bool Recover = false,
+                         bool UseGlobalsGC = true, bool UseOdrIndicator = false)
+      : GlobalsMD(GlobalsMD), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC),
         // Enable aliases as they should have no downside with ODR indicators.
         UsePrivateAlias(UseOdrIndicator || ClUsePrivateAlias),
         UseOdrIndicator(UseOdrIndicator || ClUseOdrIndicator),
@@ -759,10 +738,15 @@ public:
     this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
     this->CompileKernel =
         ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan : CompileKernel;
+
+    C = &(M.getContext());
+    int LongSize = M.getDataLayout().getPointerSizeInBits();
+    IntptrTy = Type::getIntNTy(*C, LongSize);
+    TargetTriple = Triple(M.getTargetTriple());
+    Mapping = getShadowMapping(TargetTriple, LongSize, this->CompileKernel);
   }
 
-  bool runOnModule(Module &M) override;
-  StringRef getPassName() const override { return "AddressSanitizerModule"; }
+  bool instrumentModule(Module &);
 
 private:
   void initializeCallbacks(Module &M);
@@ -810,19 +794,54 @@ private:
   LLVMContext *C;
   Triple TargetTriple;
   ShadowMapping Mapping;
-  Function *AsanPoisonGlobals;
-  Function *AsanUnpoisonGlobals;
-  Function *AsanRegisterGlobals;
-  Function *AsanUnregisterGlobals;
-  Function *AsanRegisterImageGlobals;
-  Function *AsanUnregisterImageGlobals;
-  Function *AsanRegisterElfGlobals;
-  Function *AsanUnregisterElfGlobals;
+  FunctionCallee AsanPoisonGlobals;
+  FunctionCallee AsanUnpoisonGlobals;
+  FunctionCallee AsanRegisterGlobals;
+  FunctionCallee AsanUnregisterGlobals;
+  FunctionCallee AsanRegisterImageGlobals;
+  FunctionCallee AsanUnregisterImageGlobals;
+  FunctionCallee AsanRegisterElfGlobals;
+  FunctionCallee AsanUnregisterElfGlobals;
 
   Function *AsanCtorFunction = nullptr;
   Function *AsanDtorFunction = nullptr;
 };
 
+class ModuleAddressSanitizerLegacyPass : public ModulePass {
+public:
+  static char ID;
+
+  explicit ModuleAddressSanitizerLegacyPass(bool CompileKernel = false,
+                                            bool Recover = false,
+                                            bool UseGlobalGC = true,
+                                            bool UseOdrIndicator = false)
+      : ModulePass(ID), CompileKernel(CompileKernel), Recover(Recover),
+        UseGlobalGC(UseGlobalGC), UseOdrIndicator(UseOdrIndicator) {
+    initializeModuleAddressSanitizerLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override { return "ModuleAddressSanitizer"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<ASanGlobalsMetadataWrapperPass>();
+  }
+
+  bool runOnModule(Module &M) override {
+    GlobalsMetadata &GlobalsMD =
+        getAnalysis<ASanGlobalsMetadataWrapperPass>().getGlobalsMD();
+    ModuleAddressSanitizer ASanModule(M, GlobalsMD, CompileKernel, Recover,
+                                      UseGlobalGC, UseOdrIndicator);
+    return ASanModule.instrumentModule(M);
+  }
+
+private:
+  bool CompileKernel;
+  bool Recover;
+  bool UseGlobalGC;
+  bool UseOdrIndicator;
+};
+
 // Stack poisoning does not play well with exception handling.
 // When an exception is thrown, we essentially bypass the code
 // that unpoisones the stack. This is why the run-time library has
@@ -846,11 +865,11 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   SmallVector<Instruction *, 8> RetVec;
   unsigned StackAlignment;
 
-  Function *AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1],
-      *AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
-  Function *AsanSetShadowFunc[0x100] = {};
-  Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc;
-  Function *AsanAllocaPoisonFunc, *AsanAllocasUnpoisonFunc;
+  FunctionCallee AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1],
+      AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
+  FunctionCallee AsanSetShadowFunc[0x100] = {};
+  FunctionCallee AsanPoisonStackMemoryFunc, AsanUnpoisonStackMemoryFunc;
+  FunctionCallee AsanAllocaPoisonFunc, AsanAllocasUnpoisonFunc;
 
   // Stores a place and arguments of poisoning/unpoisoning call for alloca.
   struct AllocaPoisonCall {
@@ -861,6 +880,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   };
   SmallVector<AllocaPoisonCall, 8> DynamicAllocaPoisonCallVec;
   SmallVector<AllocaPoisonCall, 8> StaticAllocaPoisonCallVec;
+  bool HasUntracedLifetimeIntrinsic = false;
 
   SmallVector<AllocaInst *, 1> DynamicAllocaVec;
   SmallVector<IntrinsicInst *, 1> StackRestoreVec;
@@ -876,13 +896,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   std::unique_ptr<CallInst> EmptyInlineAsm;
 
   FunctionStackPoisoner(Function &F, AddressSanitizer &ASan)
-      : F(F),
-        ASan(ASan),
-        DIB(*F.getParent(), /*AllowUnresolved*/ false),
-        C(ASan.C),
-        IntptrTy(ASan.IntptrTy),
-        IntptrPtrTy(PointerType::get(IntptrTy, 0)),
-        Mapping(ASan.Mapping),
+      : F(F), ASan(ASan), DIB(*F.getParent(), /*AllowUnresolved*/ false),
+        C(ASan.C), IntptrTy(ASan.IntptrTy),
+        IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping),
         StackAlignment(1 << Mapping.Scale),
         EmptyInlineAsm(CallInst::Create(ASan.EmptyAsm)) {}
 
@@ -899,6 +915,14 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
 
     initializeCallbacks(*F.getParent());
 
+    if (HasUntracedLifetimeIntrinsic) {
+      // If there are lifetime intrinsics which couldn't be traced back to an
+      // alloca, we may not know exactly when a variable enters scope, and
+      // therefore should "fail safe" by not poisoning them.
+      StaticAllocaPoisonCallVec.clear();
+      DynamicAllocaPoisonCallVec.clear();
+    }
+
     processDynamicAllocas();
     processStaticAllocas();
 
@@ -950,8 +974,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
                                      DynamicAreaOffset);
     }
 
-    IRB.CreateCall(AsanAllocasUnpoisonFunc,
-                   {IRB.CreateLoad(DynamicAllocaLayout), DynamicAreaPtr});
+    IRB.CreateCall(
+        AsanAllocasUnpoisonFunc,
+        {IRB.CreateLoad(IntptrTy, DynamicAllocaLayout), DynamicAreaPtr});
   }
 
   // Unpoison dynamic allocas redzones.
@@ -1018,8 +1043,14 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
         !ConstantInt::isValueValidForType(IntptrTy, SizeValue))
       return;
     // Find alloca instruction that corresponds to llvm.lifetime argument.
-    AllocaInst *AI = findAllocaForValue(II.getArgOperand(1));
-    if (!AI || !ASan.isInterestingAlloca(*AI))
+    AllocaInst *AI =
+        llvm::findAllocaForValue(II.getArgOperand(1), AllocaForValue);
+    if (!AI) {
+      HasUntracedLifetimeIntrinsic = true;
+      return;
+    }
+    // We're interested only in allocas we can handle.
+    if (!ASan.isInterestingAlloca(*AI))
       return;
     bool DoPoison = (ID == Intrinsic::lifetime_end);
     AllocaPoisonCall APC = {&II, AI, SizeValue, DoPoison};
@@ -1042,16 +1073,6 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   // ---------------------- Helpers.
   void initializeCallbacks(Module &M);
 
-  bool doesDominateAllExits(const Instruction *I) const {
-    for (auto Ret : RetVec) {
-      if (!ASan.getDominatorTree().dominates(I, Ret)) return false;
-    }
-    return true;
-  }
-
-  /// Finds alloca where the value comes from.
-  AllocaInst *findAllocaForValue(Value *V);
-
   // Copies bytes from ShadowBytes into shadow memory for indexes where
   // ShadowMask is not zero. If ShadowMask[i] is zero, we assume that
   // ShadowBytes[i] is constantly zero and doesn't need to be overwritten.
@@ -1074,16 +1095,111 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
 
 } // end anonymous namespace
 
-char AddressSanitizer::ID = 0;
+void LocationMetadata::parse(MDNode *MDN) {
+  assert(MDN->getNumOperands() == 3);
+  MDString *DIFilename = cast<MDString>(MDN->getOperand(0));
+  Filename = DIFilename->getString();
+  LineNo = mdconst::extract<ConstantInt>(MDN->getOperand(1))->getLimitedValue();
+  ColumnNo =
+      mdconst::extract<ConstantInt>(MDN->getOperand(2))->getLimitedValue();
+}
+
+// FIXME: It would be cleaner to instead attach relevant metadata to the globals
+// we want to sanitize instead and reading this metadata on each pass over a
+// function instead of reading module level metadata at first.
+GlobalsMetadata::GlobalsMetadata(Module &M) {
+  NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals");
+  if (!Globals)
+    return;
+  for (auto MDN : Globals->operands()) {
+    // Metadata node contains the global and the fields of "Entry".
+    assert(MDN->getNumOperands() == 5);
+    auto *V = mdconst::extract_or_null<Constant>(MDN->getOperand(0));
+    // The optimizer may optimize away a global entirely.
+    if (!V)
+      continue;
+    auto *StrippedV = V->stripPointerCasts();
+    auto *GV = dyn_cast<GlobalVariable>(StrippedV);
+    if (!GV)
+      continue;
+    // We can already have an entry for GV if it was merged with another
+    // global.
+    Entry &E = Entries[GV];
+    if (auto *Loc = cast_or_null<MDNode>(MDN->getOperand(1)))
+      E.SourceLoc.parse(Loc);
+    if (auto *Name = cast_or_null<MDString>(MDN->getOperand(2)))
+      E.Name = Name->getString();
+    ConstantInt *IsDynInit = mdconst::extract<ConstantInt>(MDN->getOperand(3));
+    E.IsDynInit |= IsDynInit->isOne();
+    ConstantInt *IsBlacklisted =
+        mdconst::extract<ConstantInt>(MDN->getOperand(4));
+    E.IsBlacklisted |= IsBlacklisted->isOne();
+  }
+}
+
+AnalysisKey ASanGlobalsMetadataAnalysis::Key;
+
+GlobalsMetadata ASanGlobalsMetadataAnalysis::run(Module &M,
+                                                 ModuleAnalysisManager &AM) {
+  return GlobalsMetadata(M);
+}
+
+AddressSanitizerPass::AddressSanitizerPass(bool CompileKernel, bool Recover,
+                                           bool UseAfterScope)
+    : CompileKernel(CompileKernel), Recover(Recover),
+      UseAfterScope(UseAfterScope) {}
+
+PreservedAnalyses AddressSanitizerPass::run(Function &F,
+                                            AnalysisManager<Function> &AM) {
+  auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
+  auto &MAM = MAMProxy.getManager();
+  Module &M = *F.getParent();
+  if (auto *R = MAM.getCachedResult<ASanGlobalsMetadataAnalysis>(M)) {
+    const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
+    AddressSanitizer Sanitizer(M, *R, CompileKernel, Recover, UseAfterScope);
+    if (Sanitizer.instrumentFunction(F, TLI))
+      return PreservedAnalyses::none();
+    return PreservedAnalyses::all();
+  }
+
+  report_fatal_error(
+      "The ASanGlobalsMetadataAnalysis is required to run before "
+      "AddressSanitizer can run");
+  return PreservedAnalyses::all();
+}
+
+ModuleAddressSanitizerPass::ModuleAddressSanitizerPass(bool CompileKernel,
+                                                       bool Recover,
+                                                       bool UseGlobalGC,
+                                                       bool UseOdrIndicator)
+    : CompileKernel(CompileKernel), Recover(Recover), UseGlobalGC(UseGlobalGC),
+      UseOdrIndicator(UseOdrIndicator) {}
+
+PreservedAnalyses ModuleAddressSanitizerPass::run(Module &M,
+                                                  AnalysisManager<Module> &AM) {
+  GlobalsMetadata &GlobalsMD = AM.getResult<ASanGlobalsMetadataAnalysis>(M);
+  ModuleAddressSanitizer Sanitizer(M, GlobalsMD, CompileKernel, Recover,
+                                   UseGlobalGC, UseOdrIndicator);
+  if (Sanitizer.instrumentModule(M))
+    return PreservedAnalyses::none();
+  return PreservedAnalyses::all();
+}
+
+INITIALIZE_PASS(ASanGlobalsMetadataWrapperPass, "asan-globals-md",
+                "Read metadata to mark which globals should be instrumented "
+                "when running ASan.",
+                false, true)
+
+char AddressSanitizerLegacyPass::ID = 0;
 
 INITIALIZE_PASS_BEGIN(
-    AddressSanitizer, "asan",
+    AddressSanitizerLegacyPass, "asan",
     "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
     false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ASanGlobalsMetadataWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(
-    AddressSanitizer, "asan",
+    AddressSanitizerLegacyPass, "asan",
     "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
     false)
 
@@ -1091,24 +1207,22 @@ FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel,
                                                        bool Recover,
                                                        bool UseAfterScope) {
   assert(!CompileKernel || Recover);
-  return new AddressSanitizer(CompileKernel, Recover, UseAfterScope);
+  return new AddressSanitizerLegacyPass(CompileKernel, Recover, UseAfterScope);
 }
 
-char AddressSanitizerModule::ID = 0;
+char ModuleAddressSanitizerLegacyPass::ID = 0;
 
 INITIALIZE_PASS(
-    AddressSanitizerModule, "asan-module",
+    ModuleAddressSanitizerLegacyPass, "asan-module",
     "AddressSanitizer: detects use-after-free and out-of-bounds bugs."
     "ModulePass",
     false, false)
 
-ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel,
-                                                   bool Recover,
-                                                   bool UseGlobalsGC,
-                                                   bool UseOdrIndicator) {
+ModulePass *llvm::createModuleAddressSanitizerLegacyPassPass(
+    bool CompileKernel, bool Recover, bool UseGlobalsGC, bool UseOdrIndicator) {
   assert(!CompileKernel || Recover);
-  return new AddressSanitizerModule(CompileKernel, Recover, UseGlobalsGC,
-                                    UseOdrIndicator);
+  return new ModuleAddressSanitizerLegacyPass(CompileKernel, Recover,
+                                              UseGlobalsGC, UseOdrIndicator);
 }
 
 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
@@ -1312,11 +1426,24 @@ static bool isPointerOperand(Value *V) {
 // This is a rough heuristic; it may cause both false positives and
 // false negatives. The proper implementation requires cooperation with
 // the frontend.
-static bool isInterestingPointerComparisonOrSubtraction(Instruction *I) {
+static bool isInterestingPointerComparison(Instruction *I) {
   if (ICmpInst *Cmp = dyn_cast<ICmpInst>(I)) {
-    if (!Cmp->isRelational()) return false;
-  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
-    if (BO->getOpcode() != Instruction::Sub) return false;
+    if (!Cmp->isRelational())
+      return false;
+  } else {
+    return false;
+  }
+  return isPointerOperand(I->getOperand(0)) &&
+         isPointerOperand(I->getOperand(1));
+}
+
+// This is a rough heuristic; it may cause both false positives and
+// false negatives. The proper implementation requires cooperation with
+// the frontend.
+static bool isInterestingPointerSubtraction(Instruction *I) {
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+    if (BO->getOpcode() != Instruction::Sub)
+      return false;
   } else {
     return false;
   }
@@ -1328,13 +1455,16 @@ bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) {
   // If a global variable does not have dynamic initialization we don't
   // have to instrument it.  However, if a global does not have initializer
   // at all, we assume it has dynamic initializer (in other TU).
+  //
+  // FIXME: Metadata should be attched directly to the global directly instead
+  // of being added to llvm.asan.globals.
   return G->hasInitializer() && !GlobalsMD.get(G).IsDynInit;
 }
 
 void AddressSanitizer::instrumentPointerComparisonOrSubtraction(
     Instruction *I) {
   IRBuilder<> IRB(I);
-  Function *F = isa<ICmpInst>(I) ? AsanPtrCmpFunction : AsanPtrSubFunction;
+  FunctionCallee F = isa<ICmpInst>(I) ? AsanPtrCmpFunction : AsanPtrSubFunction;
   Value *Param[2] = {I->getOperand(0), I->getOperand(1)};
   for (Value *&i : Param) {
     if (i->getType()->isPointerTy())
@@ -1392,7 +1522,7 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
 
     IRBuilder<> IRB(InsertBefore);
     InstrumentedAddress =
-        IRB.CreateGEP(Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
+        IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
     doInstrumentAddress(Pass, I, InsertBefore, InstrumentedAddress, Alignment,
                         Granularity, ElemTypeSize, IsWrite, SizeArgument,
                         UseCalls, Exp);
@@ -1553,7 +1683,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
   Value *ShadowPtr = memToShadow(AddrLong, IRB);
   Value *CmpVal = Constant::getNullValue(ShadowTy);
   Value *ShadowValue =
-      IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
+      IRB.CreateLoad(ShadowTy, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
 
   Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
   size_t Granularity = 1ULL << Mapping.Scale;
@@ -1612,7 +1742,7 @@ void AddressSanitizer::instrumentUnusualSizeOrAlignment(
   }
 }
 
-void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit,
+void ModuleAddressSanitizer::poisonOneInitializer(Function &GlobalInit,
                                                   GlobalValue *ModuleName) {
   // Set up the arguments to our poison/unpoison functions.
   IRBuilder<> IRB(&GlobalInit.front(),
@@ -1628,7 +1758,7 @@ void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit,
       CallInst::Create(AsanUnpoisonGlobals, "", RI);
 }
 
-void AddressSanitizerModule::createInitializerPoisonCalls(
+void ModuleAddressSanitizer::createInitializerPoisonCalls(
     Module &M, GlobalValue *ModuleName) {
   GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
   if (!GV)
@@ -1653,10 +1783,12 @@ void AddressSanitizerModule::createInitializerPoisonCalls(
   }
 }
 
-bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
+bool ModuleAddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
   Type *Ty = G->getValueType();
   LLVM_DEBUG(dbgs() << "GLOBAL: " << *G << "\n");
 
+  // FIXME: Metadata should be attched directly to the global directly instead
+  // of being added to llvm.asan.globals.
   if (GlobalsMD.get(G).IsBlacklisted) return false;
   if (!Ty->isSized()) return false;
   if (!G->hasInitializer()) return false;
@@ -1768,7 +1900,7 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
 // On Mach-O platforms, we emit global metadata in a separate section of the
 // binary in order to allow the linker to properly dead strip. This is only
 // supported on recent versions of ld64.
-bool AddressSanitizerModule::ShouldUseMachOGlobalsSection() const {
+bool ModuleAddressSanitizer::ShouldUseMachOGlobalsSection() const {
   if (!TargetTriple.isOSBinFormatMachO())
     return false;
 
@@ -1782,7 +1914,7 @@ bool AddressSanitizerModule::ShouldUseMachOGlobalsSection() const {
   return false;
 }
 
-StringRef AddressSanitizerModule::getGlobalMetadataSection() const {
+StringRef ModuleAddressSanitizer::getGlobalMetadataSection() const {
   switch (TargetTriple.getObjectFormat()) {
   case Triple::COFF:  return ".ASAN$GL";
   case Triple::ELF:   return "asan_globals";
@@ -1792,52 +1924,39 @@ StringRef AddressSanitizerModule::getGlobalMetadataSection() const {
   llvm_unreachable("unsupported object format");
 }
 
-void AddressSanitizerModule::initializeCallbacks(Module &M) {
+void ModuleAddressSanitizer::initializeCallbacks(Module &M) {
   IRBuilder<> IRB(*C);
 
   // Declare our poisoning and unpoisoning functions.
-  AsanPoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy));
-  AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
-  AsanUnpoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      kAsanUnpoisonGlobalsName, IRB.getVoidTy()));
-  AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);
+  AsanPoisonGlobals =
+      M.getOrInsertFunction(kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy);
+  AsanUnpoisonGlobals =
+      M.getOrInsertFunction(kAsanUnpoisonGlobalsName, IRB.getVoidTy());
 
   // Declare functions that register/unregister globals.
-  AsanRegisterGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy));
-  AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
-  AsanUnregisterGlobals = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction(kAsanUnregisterGlobalsName, IRB.getVoidTy(),
-                            IntptrTy, IntptrTy));
-  AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
+  AsanRegisterGlobals = M.getOrInsertFunction(
+      kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy);
+  AsanUnregisterGlobals = M.getOrInsertFunction(
+      kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy);
 
   // Declare the functions that find globals in a shared object and then invoke
   // the (un)register function on them.
-  AsanRegisterImageGlobals =
-      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-          kAsanRegisterImageGlobalsName, IRB.getVoidTy(), IntptrTy));
-  AsanRegisterImageGlobals->setLinkage(Function::ExternalLinkage);
-
-  AsanUnregisterImageGlobals =
-      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-          kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy));
-  AsanUnregisterImageGlobals->setLinkage(Function::ExternalLinkage);
+  AsanRegisterImageGlobals = M.getOrInsertFunction(
+      kAsanRegisterImageGlobalsName, IRB.getVoidTy(), IntptrTy);
+  AsanUnregisterImageGlobals = M.getOrInsertFunction(
+      kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy);
 
-  AsanRegisterElfGlobals = checkSanitizerInterfaceFunction(
+  AsanRegisterElfGlobals =
       M.getOrInsertFunction(kAsanRegisterElfGlobalsName, IRB.getVoidTy(),
-                            IntptrTy, IntptrTy, IntptrTy));
-  AsanRegisterElfGlobals->setLinkage(Function::ExternalLinkage);
-
-  AsanUnregisterElfGlobals = checkSanitizerInterfaceFunction(
+                            IntptrTy, IntptrTy, IntptrTy);
+  AsanUnregisterElfGlobals =
       M.getOrInsertFunction(kAsanUnregisterElfGlobalsName, IRB.getVoidTy(),
-                            IntptrTy, IntptrTy, IntptrTy));
-  AsanUnregisterElfGlobals->setLinkage(Function::ExternalLinkage);
+                            IntptrTy, IntptrTy, IntptrTy);
 }
 
 // Put the metadata and the instrumented global in the same group. This ensures
 // that the metadata is discarded if the instrumented global is discarded.
-void AddressSanitizerModule::SetComdatForGlobalMetadata(
+void ModuleAddressSanitizer::SetComdatForGlobalMetadata(
     GlobalVariable *G, GlobalVariable *Metadata, StringRef InternalSuffix) {
   Module &M = *G->getParent();
   Comdat *C = G->getComdat();
@@ -1875,7 +1994,7 @@ void AddressSanitizerModule::SetComdatForGlobalMetadata(
 // Create a separate metadata global and put it in the appropriate ASan
 // global registration section.
 GlobalVariable *
-AddressSanitizerModule::CreateMetadataGlobal(Module &M, Constant *Initializer,
+ModuleAddressSanitizer::CreateMetadataGlobal(Module &M, Constant *Initializer,
                                              StringRef OriginalName) {
   auto Linkage = TargetTriple.isOSBinFormatMachO()
                      ? GlobalVariable::InternalLinkage
@@ -1887,7 +2006,7 @@ AddressSanitizerModule::CreateMetadataGlobal(Module &M, Constant *Initializer,
   return Metadata;
 }
 
-IRBuilder<> AddressSanitizerModule::CreateAsanModuleDtor(Module &M) {
+IRBuilder<> ModuleAddressSanitizer::CreateAsanModuleDtor(Module &M) {
   AsanDtorFunction =
       Function::Create(FunctionType::get(Type::getVoidTy(*C), false),
                        GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
@@ -1896,7 +2015,7 @@ IRBuilder<> AddressSanitizerModule::CreateAsanModuleDtor(Module &M) {
   return IRBuilder<>(ReturnInst::Create(*C, AsanDtorBB));
 }
 
-void AddressSanitizerModule::InstrumentGlobalsCOFF(
+void ModuleAddressSanitizer::InstrumentGlobalsCOFF(
     IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
     ArrayRef<Constant *> MetadataInitializers) {
   assert(ExtendedGlobals.size() == MetadataInitializers.size());
@@ -1920,7 +2039,7 @@ void AddressSanitizerModule::InstrumentGlobalsCOFF(
   }
 }
 
-void AddressSanitizerModule::InstrumentGlobalsELF(
+void ModuleAddressSanitizer::InstrumentGlobalsELF(
     IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
     ArrayRef<Constant *> MetadataInitializers,
     const std::string &UniqueModuleId) {
@@ -1979,7 +2098,7 @@ void AddressSanitizerModule::InstrumentGlobalsELF(
                        IRB.CreatePointerCast(StopELFMetadata, IntptrTy)});
 }
 
-void AddressSanitizerModule::InstrumentGlobalsMachO(
+void ModuleAddressSanitizer::InstrumentGlobalsMachO(
     IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
     ArrayRef<Constant *> MetadataInitializers) {
   assert(ExtendedGlobals.size() == MetadataInitializers.size());
@@ -2036,7 +2155,7 @@ void AddressSanitizerModule::InstrumentGlobalsMachO(
                       {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)});
 }
 
-void AddressSanitizerModule::InstrumentGlobalsWithMetadataArray(
+void ModuleAddressSanitizer::InstrumentGlobalsWithMetadataArray(
     IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
     ArrayRef<Constant *> MetadataInitializers) {
   assert(ExtendedGlobals.size() == MetadataInitializers.size());
@@ -2070,9 +2189,9 @@ void AddressSanitizerModule::InstrumentGlobalsWithMetadataArray(
 // redzones and inserts this function into llvm.global_ctors.
 // Sets *CtorComdat to true if the global registration code emitted into the
 // asan constructor is comdat-compatible.
-bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat) {
+bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
+                                               bool *CtorComdat) {
   *CtorComdat = false;
-  GlobalsMD.init(M);
 
   SmallVector<GlobalVariable *, 16> GlobalsToChange;
 
@@ -2115,6 +2234,8 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool
     static const uint64_t kMaxGlobalRedzone = 1 << 18;
     GlobalVariable *G = GlobalsToChange[i];
 
+    // FIXME: Metadata should be attched directly to the global directly instead
+    // of being added to llvm.asan.globals.
     auto MD = GlobalsMD.get(G);
     StringRef NameForGlobal = G->getName();
     // Create string holding the global name (use global name from metadata
@@ -2271,7 +2392,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool
   return true;
 }
 
-int AddressSanitizerModule::GetAsanVersion(const Module &M) const {
+int ModuleAddressSanitizer::GetAsanVersion(const Module &M) const {
   int LongSize = M.getDataLayout().getPointerSizeInBits();
   bool isAndroid = Triple(M.getTargetTriple()).isAndroid();
   int Version = 8;
@@ -2281,12 +2402,7 @@ int AddressSanitizerModule::GetAsanVersion(const Module &M) const {
   return Version;
 }
 
-bool AddressSanitizerModule::runOnModule(Module &M) {
-  C = &(M.getContext());
-  int LongSize = M.getDataLayout().getPointerSizeInBits();
-  IntptrTy = Type::getIntNTy(*C, LongSize);
-  TargetTriple = Triple(M.getTargetTriple());
-  Mapping = getShadowMapping(TargetTriple, LongSize, CompileKernel);
+bool ModuleAddressSanitizer::instrumentModule(Module &M) {
   initializeCallbacks(M);
 
   if (CompileKernel)
@@ -2346,51 +2462,49 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
         Args2.push_back(ExpType);
         Args1.push_back(ExpType);
       }
-      AsanErrorCallbackSized[AccessIsWrite][Exp] =
-          checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-              kAsanReportErrorTemplate + ExpStr + TypeStr + "_n" + EndingStr,
-              FunctionType::get(IRB.getVoidTy(), Args2, false)));
+      AsanErrorCallbackSized[AccessIsWrite][Exp] = M.getOrInsertFunction(
+          kAsanReportErrorTemplate + ExpStr + TypeStr + "_n" + EndingStr,
+          FunctionType::get(IRB.getVoidTy(), Args2, false));
 
-      AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] =
-          checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-              ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N" + EndingStr,
-              FunctionType::get(IRB.getVoidTy(), Args2, false)));
+      AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] = M.getOrInsertFunction(
+          ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N" + EndingStr,
+          FunctionType::get(IRB.getVoidTy(), Args2, false));
 
       for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
            AccessSizeIndex++) {
         const std::string Suffix = TypeStr + itostr(1ULL << AccessSizeIndex);
         AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] =
-            checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+            M.getOrInsertFunction(
                 kAsanReportErrorTemplate + ExpStr + Suffix + EndingStr,
-                FunctionType::get(IRB.getVoidTy(), Args1, false)));
+                FunctionType::get(IRB.getVoidTy(), Args1, false));
 
         AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] =
-            checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+            M.getOrInsertFunction(
                 ClMemoryAccessCallbackPrefix + ExpStr + Suffix + EndingStr,
-                FunctionType::get(IRB.getVoidTy(), Args1, false)));
+                FunctionType::get(IRB.getVoidTy(), Args1, false));
       }
     }
   }
 
   const std::string MemIntrinCallbackPrefix =
       CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix;
-  AsanMemmove = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      MemIntrinCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
-      IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy));
-  AsanMemcpy = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      MemIntrinCallbackPrefix + "memcpy", IRB.getInt8PtrTy(),
-      IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy));
-  AsanMemset = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      MemIntrinCallbackPrefix + "memset", IRB.getInt8PtrTy(),
-      IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy));
-
-  AsanHandleNoReturnFunc = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy()));
-
-  AsanPtrCmpFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy));
-  AsanPtrSubFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy));
+  AsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
+                                      IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                                      IRB.getInt8PtrTy(), IntptrTy);
+  AsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy",
+                                     IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                                     IRB.getInt8PtrTy(), IntptrTy);
+  AsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset",
+                                     IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                                     IRB.getInt32Ty(), IntptrTy);
+
+  AsanHandleNoReturnFunc =
+      M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy());
+
+  AsanPtrCmpFunction =
+      M.getOrInsertFunction(kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy);
+  AsanPtrSubFunction =
+      M.getOrInsertFunction(kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy);
   // We insert an empty inline asm after __asan_report* to avoid callback merge.
   EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
                             StringRef(""), StringRef(""),
@@ -2400,25 +2514,6 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
                                            ArrayType::get(IRB.getInt8Ty(), 0));
 }
 
-// virtual
-bool AddressSanitizer::doInitialization(Module &M) {
-  // Initialize the private fields. No one has accessed them before.
-  GlobalsMD.init(M);
-
-  C = &(M.getContext());
-  LongSize = M.getDataLayout().getPointerSizeInBits();
-  IntptrTy = Type::getIntNTy(*C, LongSize);
-  TargetTriple = Triple(M.getTargetTriple());
-
-  Mapping = getShadowMapping(TargetTriple, LongSize, CompileKernel);
-  return true;
-}
-
-bool AddressSanitizer::doFinalization(Module &M) {
-  GlobalsMD.reset();
-  return false;
-}
-
 bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
   // For each NSObject descendant having a +load method, this method is invoked
   // by the ObjC runtime before any of the static constructors is called.
@@ -2428,7 +2523,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
   // We cannot just ignore these methods, because they may call other
   // instrumented functions.
   if (F.getName().find(" load]") != std::string::npos) {
-    Function *AsanInitFunction =
+    FunctionCallee AsanInitFunction =
         declareSanitizerInitFunction(*F.getParent(), kAsanInitName, {});
     IRBuilder<> IRB(&F.front(), F.front().begin());
     IRB.CreateCall(AsanInitFunction, {});
@@ -2460,7 +2555,7 @@ void AddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(Function &F) {
   } else {
     Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal(
         kAsanShadowMemoryDynamicAddress, IntptrTy);
-    LocalDynamicShadow = IRB.CreateLoad(GlobalDynamicAddress);
+    LocalDynamicShadow = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress);
   }
 }
 
@@ -2492,7 +2587,8 @@ void AddressSanitizer::markEscapedLocalAllocas(Function &F) {
   }
 }
 
-bool AddressSanitizer::runOnFunction(Function &F) {
+bool AddressSanitizer::instrumentFunction(Function &F,
+                                          const TargetLibraryInfo *TLI) {
   if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
   if (!ClDebugFunc.empty() && ClDebugFunc == F.getName()) return false;
   if (F.getName().startswith("__asan_")) return false;
@@ -2511,7 +2607,6 @@ bool AddressSanitizer::runOnFunction(Function &F) {
   LLVM_DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
 
   initializeCallbacks(*F.getParent());
-  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
 
   FunctionStateRAII CleanupObj(this);
 
@@ -2532,8 +2627,6 @@ bool AddressSanitizer::runOnFunction(Function &F) {
   bool IsWrite;
   unsigned Alignment;
   uint64_t TypeSize;
-  const TargetLibraryInfo *TLI =
-      &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
 
   // Fill the set of memory operations to instrument.
   for (auto &BB : F) {
@@ -2557,8 +2650,10 @@ bool AddressSanitizer::runOnFunction(Function &F) {
               continue; // We've seen this temp in the current BB.
           }
         }
-      } else if (ClInvalidPointerPairs &&
-                 isInterestingPointerComparisonOrSubtraction(&Inst)) {
+      } else if (((ClInvalidPointerPairs || ClInvalidPointerCmp) &&
+                  isInterestingPointerComparison(&Inst)) ||
+                 ((ClInvalidPointerPairs || ClInvalidPointerSub) &&
+                  isInterestingPointerSubtraction(&Inst))) {
         PointerComparisonsOrSubtracts.push_back(&Inst);
         continue;
       } else if (isa<MemIntrinsic>(Inst)) {
@@ -2569,7 +2664,8 @@ bool AddressSanitizer::runOnFunction(Function &F) {
         if (CS) {
           // A call inside BB.
           TempsToInstrument.clear();
-          if (CS.doesNotReturn()) NoReturnCalls.push_back(CS.getInstruction());
+          if (CS.doesNotReturn() && !CS->getMetadata("nosanitize"))
+            NoReturnCalls.push_back(CS.getInstruction());
         }
         if (CallInst *CI = dyn_cast<CallInst>(&Inst))
           maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI);
@@ -2606,7 +2702,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
   FunctionStackPoisoner FSP(F, *this);
   bool ChangedStack = FSP.runOnFunction();
 
-  // We must unpoison the stack before every NoReturn call (throw, _exit, etc).
+  // We must unpoison the stack before NoReturn calls (throw, _exit, etc).
   // See e.g. https://github.com/google/sanitizers/issues/37
   for (auto CI : NoReturnCalls) {
     IRBuilder<> IRB(CI);
@@ -2643,20 +2739,17 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
   IRBuilder<> IRB(*C);
   for (int i = 0; i <= kMaxAsanStackMallocSizeClass; i++) {
     std::string Suffix = itostr(i);
-    AsanStackMallocFunc[i] = checkSanitizerInterfaceFunction(
-        M.getOrInsertFunction(kAsanStackMallocNameTemplate + Suffix, IntptrTy,
-                              IntptrTy));
-    AsanStackFreeFunc[i] = checkSanitizerInterfaceFunction(
+    AsanStackMallocFunc[i] = M.getOrInsertFunction(
+        kAsanStackMallocNameTemplate + Suffix, IntptrTy, IntptrTy);
+    AsanStackFreeFunc[i] =
         M.getOrInsertFunction(kAsanStackFreeNameTemplate + Suffix,
-                              IRB.getVoidTy(), IntptrTy, IntptrTy));
+                              IRB.getVoidTy(), IntptrTy, IntptrTy);
   }
   if (ASan.UseAfterScope) {
-    AsanPoisonStackMemoryFunc = checkSanitizerInterfaceFunction(
-        M.getOrInsertFunction(kAsanPoisonStackMemoryName, IRB.getVoidTy(),
-                              IntptrTy, IntptrTy));
-    AsanUnpoisonStackMemoryFunc = checkSanitizerInterfaceFunction(
-        M.getOrInsertFunction(kAsanUnpoisonStackMemoryName, IRB.getVoidTy(),
-                              IntptrTy, IntptrTy));
+    AsanPoisonStackMemoryFunc = M.getOrInsertFunction(
+        kAsanPoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy);
+    AsanUnpoisonStackMemoryFunc = M.getOrInsertFunction(
+        kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy);
   }
 
   for (size_t Val : {0x00, 0xf1, 0xf2, 0xf3, 0xf5, 0xf8}) {
@@ -2664,15 +2757,13 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
     Name << kAsanSetShadowPrefix;
     Name << std::setw(2) << std::setfill('0') << std::hex << Val;
     AsanSetShadowFunc[Val] =
-        checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-            Name.str(), IRB.getVoidTy(), IntptrTy, IntptrTy));
+        M.getOrInsertFunction(Name.str(), IRB.getVoidTy(), IntptrTy, IntptrTy);
   }
 
-  AsanAllocaPoisonFunc = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy));
-  AsanAllocasUnpoisonFunc =
-      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-          kAsanAllocasUnpoison, IRB.getVoidTy(), IntptrTy, IntptrTy));
+  AsanAllocaPoisonFunc = M.getOrInsertFunction(
+      kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy);
+  AsanAllocasUnpoisonFunc = M.getOrInsertFunction(
+      kAsanAllocasUnpoison, IRB.getVoidTy(), IntptrTy, IntptrTy);
 }
 
 void FunctionStackPoisoner::copyToShadowInline(ArrayRef<uint8_t> ShadowMask,
@@ -2958,7 +3049,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
   Value *FakeStack;
   Value *LocalStackBase;
   Value *LocalStackBaseAlloca;
-  bool Deref;
+  uint8_t DIExprFlags = DIExpression::ApplyOffset;
 
   if (DoStackMalloc) {
     LocalStackBaseAlloca =
@@ -2969,9 +3060,9 @@ void FunctionStackPoisoner::processStaticAllocas() {
     // void *LocalStackBase = (FakeStack) ? FakeStack : alloca(LocalStackSize);
     Constant *OptionDetectUseAfterReturn = F.getParent()->getOrInsertGlobal(
         kAsanOptionDetectUseAfterReturn, IRB.getInt32Ty());
-    Value *UseAfterReturnIsEnabled =
-        IRB.CreateICmpNE(IRB.CreateLoad(OptionDetectUseAfterReturn),
-                         Constant::getNullValue(IRB.getInt32Ty()));
+    Value *UseAfterReturnIsEnabled = IRB.CreateICmpNE(
+        IRB.CreateLoad(IRB.getInt32Ty(), OptionDetectUseAfterReturn),
+        Constant::getNullValue(IRB.getInt32Ty()));
     Instruction *Term =
         SplitBlockAndInsertIfThen(UseAfterReturnIsEnabled, InsBefore, false);
     IRBuilder<> IRBIf(Term);
@@ -2999,7 +3090,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
     LocalStackBase = createPHI(IRB, NoFakeStack, AllocaValue, Term, FakeStack);
     IRB.SetCurrentDebugLocation(EntryDebugLocation);
     IRB.CreateStore(LocalStackBase, LocalStackBaseAlloca);
-    Deref = true;
+    DIExprFlags |= DIExpression::DerefBefore;
   } else {
     // void *FakeStack = nullptr;
     // void *LocalStackBase = alloca(LocalStackSize);
@@ -3007,14 +3098,13 @@ void FunctionStackPoisoner::processStaticAllocas() {
     LocalStackBase =
         DoDynamicAlloca ? createAllocaForLayout(IRB, L, true) : StaticAlloca;
     LocalStackBaseAlloca = LocalStackBase;
-    Deref = false;
   }
 
   // Replace Alloca instructions with base+offset.
   for (const auto &Desc : SVD) {
     AllocaInst *AI = Desc.AI;
-    replaceDbgDeclareForAlloca(AI, LocalStackBaseAlloca, DIB, Deref,
-                               Desc.Offset, DIExpression::NoDeref);
+    replaceDbgDeclareForAlloca(AI, LocalStackBaseAlloca, DIB, DIExprFlags,
+                               Desc.Offset);
     Value *NewAllocaPtr = IRB.CreateIntToPtr(
         IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)),
         AI->getType());
@@ -3105,7 +3195,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
             FakeStack,
             ConstantInt::get(IntptrTy, ClassSize - ASan.LongSize / 8));
         Value *SavedFlagPtr = IRBPoison.CreateLoad(
-            IRBPoison.CreateIntToPtr(SavedFlagPtrPtr, IntptrPtrTy));
+            IntptrTy, IRBPoison.CreateIntToPtr(SavedFlagPtrPtr, IntptrPtrTy));
         IRBPoison.CreateStore(
             Constant::getNullValue(IRBPoison.getInt8Ty()),
             IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getInt8PtrTy()));
@@ -3145,41 +3235,6 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
 //     variable may go in and out of scope several times, e.g. in loops).
 // (3) if we poisoned at least one %alloca in a function,
 //     unpoison the whole stack frame at function exit.
-
-AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
-  if (AllocaInst *AI = dyn_cast<AllocaInst>(V))
-    // We're interested only in allocas we can handle.
-    return ASan.isInterestingAlloca(*AI) ? AI : nullptr;
-  // See if we've already calculated (or started to calculate) alloca for a
-  // given value.
-  AllocaForValueMapTy::iterator I = AllocaForValue.find(V);
-  if (I != AllocaForValue.end()) return I->second;
-  // Store 0 while we're calculating alloca for value V to avoid
-  // infinite recursion if the value references itself.
-  AllocaForValue[V] = nullptr;
-  AllocaInst *Res = nullptr;
-  if (CastInst *CI = dyn_cast<CastInst>(V))
-    Res = findAllocaForValue(CI->getOperand(0));
-  else if (PHINode *PN = dyn_cast<PHINode>(V)) {
-    for (Value *IncValue : PN->incoming_values()) {
-      // Allow self-referencing phi-nodes.
-      if (IncValue == PN) continue;
-      AllocaInst *IncValueAI = findAllocaForValue(IncValue);
-      // AI for incoming values should exist and should all be equal.
-      if (IncValueAI == nullptr || (Res != nullptr && IncValueAI != Res))
-        return nullptr;
-      Res = IncValueAI;
-    }
-  } else if (GetElementPtrInst *EP = dyn_cast<GetElementPtrInst>(V)) {
-    Res = findAllocaForValue(EP->getPointerOperand());
-  } else {
-    LLVM_DEBUG(dbgs() << "Alloca search canceled on unknown instruction: " << *V
-                      << "\n");
-  }
-  if (Res) AllocaForValue[V] = Res;
-  return Res;
-}
-
 void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) {
   IRBuilder<> IRB(AI);
 
diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index a0c78e0468c6..4dc9b611c156 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -1,9 +1,8 @@
 //===- BoundsChecking.cpp - Instrumentation for run-time bounds checking --===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -143,8 +142,9 @@ static void insertBoundsCheck(Value *Or, BuilderTy IRB, GetTrapBBT GetTrapBB) {
 static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI,
                               ScalarEvolution &SE) {
   const DataLayout &DL = F.getParent()->getDataLayout();
-  ObjectSizeOffsetEvaluator ObjSizeEval(DL, &TLI, F.getContext(),
-                                           /*RoundToAlign=*/true);
+  ObjectSizeOpts EvalOpts;
+  EvalOpts.RoundToAlign = true;
+  ObjectSizeOffsetEvaluator ObjSizeEval(DL, &TLI, F.getContext(), EvalOpts);
 
   // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
   // touching instructions
diff --git a/lib/Transforms/Instrumentation/CFGMST.h b/lib/Transforms/Instrumentation/CFGMST.h
index e178ef386e68..971e00041762 100644
--- a/lib/Transforms/Instrumentation/CFGMST.h
+++ b/lib/Transforms/Instrumentation/CFGMST.h
@@ -1,9 +1,8 @@
 //===-- CFGMST.h - Minimum Spanning Tree for CFG ----------------*- C++ -*-===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -196,11 +195,10 @@ public:
 
   // Sort CFG edges based on its weight.
   void sortEdgesByWeight() {
-    std::stable_sort(AllEdges.begin(), AllEdges.end(),
-                     [](const std::unique_ptr<Edge> &Edge1,
-                        const std::unique_ptr<Edge> &Edge2) {
-                       return Edge1->Weight > Edge2->Weight;
-                     });
+    llvm::stable_sort(AllEdges, [](const std::unique_ptr<Edge> &Edge1,
+                                   const std::unique_ptr<Edge> &Edge2) {
+      return Edge1->Weight > Edge2->Weight;
+    });
   }
 
   // Traverse all the edges and compute the Minimum Weight Spanning Tree
diff --git a/lib/Transforms/Instrumentation/CGProfile.cpp b/lib/Transforms/Instrumentation/CGProfile.cpp
index cdcd01726906..358abab3cceb 100644
--- a/lib/Transforms/Instrumentation/CGProfile.cpp
+++ b/lib/Transforms/Instrumentation/CGProfile.cpp
@@ -1,9 +1,8 @@
 //===-- CGProfile.cpp -----------------------------------------------------===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 1ada0b713092..3f4f9bc7145d 100644
--- a/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -1,9 +1,8 @@
 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -547,19 +546,25 @@ static std::set<Value *> getBaseValues(Value *V,
 static bool
 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
                 DenseSet<Instruction *> &Unhoistables,
-                DenseSet<Instruction *> *HoistStops) {
+                DenseSet<Instruction *> *HoistStops,
+                DenseMap<Instruction *, bool> &Visited) {
   assert(InsertPoint && "Null InsertPoint");
   if (auto *I = dyn_cast<Instruction>(V)) {
+    if (Visited.count(I)) {
+      return Visited[I];
+    }
     assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
     assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
     if (Unhoistables.count(I)) {
       // Don't hoist if they are not to be hoisted.
+      Visited[I] = false;
       return false;
     }
     if (DT.dominates(I, InsertPoint)) {
       // We are already above the insert point. Stop here.
       if (HoistStops)
         HoistStops->insert(I);
+      Visited[I] = true;
       return true;
     }
     // We aren't not above the insert point, check if we can hoist it above the
@@ -569,7 +574,8 @@ checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
       DenseSet<Instruction *> OpsHoistStops;
       bool AllOpsHoisted = true;
       for (Value *Op : I->operands()) {
-        if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) {
+        if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
+                             Visited)) {
           AllOpsHoisted = false;
           break;
         }
@@ -578,9 +584,11 @@ checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
         CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
         if (HoistStops)
           HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
+        Visited[I] = true;
         return true;
       }
     }
+    Visited[I] = false;
     return false;
   }
   // Non-instructions are considered hoistable.
@@ -893,8 +901,9 @@ void CHR::checkScopeHoistable(CHRScope *Scope) {
         ++it;
         continue;
       }
+      DenseMap<Instruction *, bool> Visited;
       bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
-                                         DT, Unhoistables, nullptr);
+                                         DT, Unhoistables, nullptr, Visited);
       if (!IsHoistable) {
         CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
         ORE.emit([&]() {
@@ -913,8 +922,9 @@ void CHR::checkScopeHoistable(CHRScope *Scope) {
     InsertPoint = getBranchInsertPoint(RI);
     CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
     if (RI.HasBranch && InsertPoint != Branch) {
+      DenseMap<Instruction *, bool> Visited;
       bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
-                                         DT, Unhoistables, nullptr);
+                                         DT, Unhoistables, nullptr, Visited);
       if (!IsHoistable) {
         // If the branch isn't hoistable, drop the selects in the entry
         // block, preferring the branch, which makes the branch the hoist
@@ -945,15 +955,17 @@ void CHR::checkScopeHoistable(CHRScope *Scope) {
     if (RI.HasBranch) {
       assert(!DT.dominates(Branch, InsertPoint) &&
              "Branch can't be already above the hoist point");
+      DenseMap<Instruction *, bool> Visited;
       assert(checkHoistValue(Branch->getCondition(), InsertPoint,
-                             DT, Unhoistables, nullptr) &&
+                             DT, Unhoistables, nullptr, Visited) &&
              "checkHoistValue for branch");
     }
     for (auto *SI : Selects) {
       assert(!DT.dominates(SI, InsertPoint) &&
              "SI can't be already above the hoist point");
+      DenseMap<Instruction *, bool> Visited;
       assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
-                             Unhoistables, nullptr) &&
+                             Unhoistables, nullptr, Visited) &&
              "checkHoistValue for selects");
     }
     CHR_DEBUG(dbgs() << "Result\n");
@@ -1054,7 +1066,8 @@ static bool shouldSplit(Instruction *InsertPoint,
   assert(InsertPoint && "Null InsertPoint");
   // If any of Bases isn't hoistable to the hoist point, split.
   for (Value *V : ConditionValues) {
-    if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) {
+    DenseMap<Instruction *, bool> Visited;
+    if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
       CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
       return true; // Not hoistable, split.
     }
@@ -1383,8 +1396,9 @@ void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
              "Must be truthy or falsy");
       auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
       // Note checkHoistValue fills in HoistStops.
+      DenseMap<Instruction *, bool> Visited;
       bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
-                                         Unhoistables, &HoistStops);
+                                         Unhoistables, &HoistStops, Visited);
       assert(IsHoistable && "Must be hoistable");
       (void)(IsHoistable);  // Unused in release build
       IsHoisted = true;
@@ -1394,8 +1408,9 @@ void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
               OutermostScope->FalseBiasedSelects.count(SI) > 0) &&
              "Must be true or false biased");
       // Note checkHoistValue fills in HoistStops.
+      DenseMap<Instruction *, bool> Visited;
       bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
-                                         Unhoistables, &HoistStops);
+                                         Unhoistables, &HoistStops, Visited);
       assert(IsHoistable && "Must be hoistable");
       (void)(IsHoistable);  // Unused in release build
       IsHoisted = true;
@@ -1417,7 +1432,7 @@ void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
                      SmallVectorImpl<CHRScope *> &Output) {
   Output.resize(Input.size());
   llvm::copy(Input, Output.begin());
-  std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter);
+  llvm::stable_sort(Output, CHRScopeSorter);
 }
 
 // Return true if V is already hoisted or was hoisted (along with its operands)
@@ -1425,7 +1440,8 @@ void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
                        HoistStopMapTy &HoistStopMap,
                        DenseSet<Instruction *> &HoistedSet,
-                       DenseSet<PHINode *> &TrivialPHIs) {
+                       DenseSet<PHINode *> &TrivialPHIs,
+                       DominatorTree &DT) {
   auto IT = HoistStopMap.find(R);
   assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
   DenseSet<Instruction *> &HoistStops = IT->second;
@@ -1445,8 +1461,21 @@ static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
       // Already hoisted, return.
       return;
     assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
+    assert(DT.getNode(I->getParent()) && "DT must contain I's block");
+    assert(DT.getNode(HoistPoint->getParent()) &&
+           "DT must contain HoistPoint block");
+    if (DT.dominates(I, HoistPoint))
+      // We are already above the hoist point. Stop here. This may be necessary
+      // when multiple scopes would independently hoist the same
+      // instruction. Since an outer (dominating) scope would hoist it to its
+      // entry before an inner (dominated) scope would to its entry, the inner
+      // scope may see the instruction already hoisted, in which case it
+      // potentially wrong for the inner scope to hoist it and could cause bad
+      // IR (non-dominating def), but safe to skip hoisting it instead because
+      // it's already in a block that dominates the inner scope.
+      return;
     for (Value *Op : I->operands()) {
-      hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs);
+      hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
     }
     I->moveBefore(HoistPoint);
     HoistedSet.insert(I);
@@ -1457,7 +1486,8 @@ static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
 // Hoist the dependent condition values of the branches and the selects in the
 // scope to the insert point.
 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
-                                 DenseSet<PHINode *> &TrivialPHIs) {
+                                 DenseSet<PHINode *> &TrivialPHIs,
+                                 DominatorTree &DT) {
   DenseSet<Instruction *> HoistedSet;
   for (const RegInfo &RI : Scope->CHRRegions) {
     Region *R = RI.R;
@@ -1466,7 +1496,7 @@ static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
     if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
       auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
       hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
-                 HoistedSet, TrivialPHIs);
+                 HoistedSet, TrivialPHIs, DT);
     }
     for (SelectInst *SI : RI.Selects) {
       bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
@@ -1474,7 +1504,7 @@ static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
       if (!(IsTrueBiased || IsFalseBiased))
         continue;
       hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
-                 HoistedSet, TrivialPHIs);
+                 HoistedSet, TrivialPHIs, DT);
     }
   }
 }
@@ -1708,7 +1738,7 @@ void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
 #endif
 
   // Hoist the conditional values of the branches/selects.
-  hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs);
+  hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
 
 #ifndef NDEBUG
   assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 4c3c6c9added..2279c1bcb6a8 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -1,9 +1,8 @@
 //===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -333,6 +332,8 @@ class DataFlowSanitizer : public ModulePass {
   Constant *RetvalTLS;
   void *(*GetArgTLSPtr)();
   void *(*GetRetvalTLSPtr)();
+  FunctionType *GetArgTLSTy;
+  FunctionType *GetRetvalTLSTy;
   Constant *GetArgTLS;
   Constant *GetRetvalTLS;
   Constant *ExternalShadowMask;
@@ -342,13 +343,13 @@ class DataFlowSanitizer : public ModulePass {
   FunctionType *DFSanSetLabelFnTy;
   FunctionType *DFSanNonzeroLabelFnTy;
   FunctionType *DFSanVarargWrapperFnTy;
-  Constant *DFSanUnionFn;
-  Constant *DFSanCheckedUnionFn;
-  Constant *DFSanUnionLoadFn;
-  Constant *DFSanUnimplementedFn;
-  Constant *DFSanSetLabelFn;
-  Constant *DFSanNonzeroLabelFn;
-  Constant *DFSanVarargWrapperFn;
+  FunctionCallee DFSanUnionFn;
+  FunctionCallee DFSanCheckedUnionFn;
+  FunctionCallee DFSanUnionLoadFn;
+  FunctionCallee DFSanUnimplementedFn;
+  FunctionCallee DFSanSetLabelFn;
+  FunctionCallee DFSanNonzeroLabelFn;
+  FunctionCallee DFSanVarargWrapperFn;
   MDNode *ColdCallWeights;
   DFSanABIList ABIList;
   DenseMap<Value *, Function *> UnwrappedFnMap;
@@ -436,6 +437,7 @@ public:
   }
 
   void visitOperandShadowInst(Instruction &I);
+  void visitUnaryOperator(UnaryOperator &UO);
   void visitBinaryOperator(BinaryOperator &BO);
   void visitCastInst(CastInst &CI);
   void visitCmpInst(CmpInst &CI);
@@ -581,17 +583,17 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
   if (GetArgTLSPtr) {
     Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
     ArgTLS = nullptr;
+    GetArgTLSTy = FunctionType::get(PointerType::getUnqual(ArgTLSTy), false);
     GetArgTLS = ConstantExpr::getIntToPtr(
         ConstantInt::get(IntptrTy, uintptr_t(GetArgTLSPtr)),
-        PointerType::getUnqual(
-            FunctionType::get(PointerType::getUnqual(ArgTLSTy), false)));
+        PointerType::getUnqual(GetArgTLSTy));
   }
   if (GetRetvalTLSPtr) {
     RetvalTLS = nullptr;
+    GetRetvalTLSTy = FunctionType::get(PointerType::getUnqual(ShadowTy), false);
     GetRetvalTLS = ConstantExpr::getIntToPtr(
         ConstantInt::get(IntptrTy, uintptr_t(GetRetvalTLSPtr)),
-        PointerType::getUnqual(
-            FunctionType::get(PointerType::getUnqual(ShadowTy), false)));
+        PointerType::getUnqual(GetRetvalTLSTy));
   }
 
   ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
@@ -678,8 +680,8 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
 Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
                                                           StringRef FName) {
   FunctionType *FTT = getTrampolineFunctionType(FT);
-  Constant *C = Mod->getOrInsertFunction(FName, FTT);
-  Function *F = dyn_cast<Function>(C);
+  FunctionCallee C = Mod->getOrInsertFunction(FName, FTT);
+  Function *F = dyn_cast<Function>(C.getCallee());
   if (F && F->isDeclaration()) {
     F->setLinkage(GlobalValue::LinkOnceODRLinkage);
     BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
@@ -687,7 +689,7 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
     Function::arg_iterator AI = F->arg_begin(); ++AI;
     for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N)
       Args.push_back(&*AI);
-    CallInst *CI = CallInst::Create(&*F->arg_begin(), Args, "", BB);
+    CallInst *CI = CallInst::Create(FT, &*F->arg_begin(), Args, "", BB);
     ReturnInst *RI;
     if (FT->getReturnType()->isVoidTy())
       RI = ReturnInst::Create(*Ctx, BB);
@@ -704,7 +706,7 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
                     &*std::prev(F->arg_end()), RI);
   }
 
-  return C;
+  return cast<Constant>(C.getCallee());
 }
 
 bool DataFlowSanitizer::runOnModule(Module &M) {
@@ -726,35 +728,51 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
   ExternalShadowMask =
       Mod->getOrInsertGlobal(kDFSanExternShadowPtrMask, IntptrTy);
 
-  DFSanUnionFn = Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy);
-  if (Function *F = dyn_cast<Function>(DFSanUnionFn)) {
-    F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
-    F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
-    F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
-    F->addParamAttr(0, Attribute::ZExt);
-    F->addParamAttr(1, Attribute::ZExt);
+  {
+    AttributeList AL;
+    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+                         Attribute::NoUnwind);
+    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+                         Attribute::ReadNone);
+    AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
+                         Attribute::ZExt);
+    AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
+    AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
+    DFSanUnionFn =
+        Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy, AL);
   }
-  DFSanCheckedUnionFn = Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy);
-  if (Function *F = dyn_cast<Function>(DFSanCheckedUnionFn)) {
-    F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
-    F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
-    F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
-    F->addParamAttr(0, Attribute::ZExt);
-    F->addParamAttr(1, Attribute::ZExt);
+
+  {
+    AttributeList AL;
+    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+                         Attribute::NoUnwind);
+    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+                         Attribute::ReadNone);
+    AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
+                         Attribute::ZExt);
+    AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
+    AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
+    DFSanCheckedUnionFn =
+        Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy, AL);
   }
-  DFSanUnionLoadFn =
-      Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy);
-  if (Function *F = dyn_cast<Function>(DFSanUnionLoadFn)) {
-    F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
-    F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly);
-    F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
+  {
+    AttributeList AL;
+    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+                         Attribute::NoUnwind);
+    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+                         Attribute::ReadOnly);
+    AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
+                         Attribute::ZExt);
+    DFSanUnionLoadFn =
+        Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL);
   }
   DFSanUnimplementedFn =
       Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
-  DFSanSetLabelFn =
-      Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy);
-  if (Function *F = dyn_cast<Function>(DFSanSetLabelFn)) {
-    F->addParamAttr(0, Attribute::ZExt);
+  {
+    AttributeList AL;
+    AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
+    DFSanSetLabelFn =
+        Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL);
   }
   DFSanNonzeroLabelFn =
       Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
@@ -765,13 +783,13 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
   SmallPtrSet<Function *, 2> FnsWithNativeABI;
   for (Function &i : M) {
     if (!i.isIntrinsic() &&
-        &i != DFSanUnionFn &&
-        &i != DFSanCheckedUnionFn &&
-        &i != DFSanUnionLoadFn &&
-        &i != DFSanUnimplementedFn &&
-        &i != DFSanSetLabelFn &&
-        &i != DFSanNonzeroLabelFn &&
-        &i != DFSanVarargWrapperFn)
+        &i != DFSanUnionFn.getCallee()->stripPointerCasts() &&
+        &i != DFSanCheckedUnionFn.getCallee()->stripPointerCasts() &&
+        &i != DFSanUnionLoadFn.getCallee()->stripPointerCasts() &&
+        &i != DFSanUnimplementedFn.getCallee()->stripPointerCasts() &&
+        &i != DFSanSetLabelFn.getCallee()->stripPointerCasts() &&
+        &i != DFSanNonzeroLabelFn.getCallee()->stripPointerCasts() &&
+        &i != DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
       FnsToInstrument.push_back(&i);
   }
 
@@ -982,7 +1000,7 @@ Value *DFSanFunction::getArgTLSPtr() {
     return ArgTLSPtr = DFS.ArgTLS;
 
   IRBuilder<> IRB(&F->getEntryBlock().front());
-  return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLS, {});
+  return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLSTy, DFS.GetArgTLS, {});
 }
 
 Value *DFSanFunction::getRetvalTLS() {
@@ -992,12 +1010,14 @@ Value *DFSanFunction::getRetvalTLS() {
     return RetvalTLSPtr = DFS.RetvalTLS;
 
   IRBuilder<> IRB(&F->getEntryBlock().front());
-  return RetvalTLSPtr = IRB.CreateCall(DFS.GetRetvalTLS, {});
+  return RetvalTLSPtr =
+             IRB.CreateCall(DFS.GetRetvalTLSTy, DFS.GetRetvalTLS, {});
 }
 
 Value *DFSanFunction::getArgTLS(unsigned Idx, Instruction *Pos) {
   IRBuilder<> IRB(Pos);
-  return IRB.CreateConstGEP2_64(getArgTLSPtr(), 0, Idx);
+  return IRB.CreateConstGEP2_64(ArrayType::get(DFS.ShadowTy, 64),
+                                getArgTLSPtr(), 0, Idx);
 }
 
 Value *DFSanFunction::getShadow(Value *V) {
@@ -1015,7 +1035,8 @@ Value *DFSanFunction::getShadow(Value *V) {
             DFS.ArgTLS ? &*F->getEntryBlock().begin()
                        : cast<Instruction>(ArgTLSPtr)->getNextNode();
         IRBuilder<> IRB(ArgTLSPos);
-        Shadow = IRB.CreateLoad(getArgTLS(A->getArgNo(), ArgTLSPos));
+        Shadow =
+            IRB.CreateLoad(DFS.ShadowTy, getArgTLS(A->getArgNo(), ArgTLSPos));
         break;
       }
       case DataFlowSanitizer::IA_Args: {
@@ -1165,15 +1186,15 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
     const auto i = AllocaShadowMap.find(AI);
     if (i != AllocaShadowMap.end()) {
       IRBuilder<> IRB(Pos);
-      return IRB.CreateLoad(i->second);
+      return IRB.CreateLoad(DFS.ShadowTy, i->second);
     }
   }
 
   uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8;
-  SmallVector<Value *, 2> Objs;
+  SmallVector<const Value *, 2> Objs;
   GetUnderlyingObjects(Addr, Objs, Pos->getModule()->getDataLayout());
   bool AllConstants = true;
-  for (Value *Obj : Objs) {
+  for (const Value *Obj : Objs) {
     if (isa<Function>(Obj) || isa<BlockAddress>(Obj))
       continue;
     if (isa<GlobalVariable>(Obj) && cast<GlobalVariable>(Obj)->isConstant())
@@ -1190,7 +1211,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
   case 0:
     return DFS.ZeroShadow;
   case 1: {
-    LoadInst *LI = new LoadInst(ShadowAddr, "", Pos);
+    LoadInst *LI = new LoadInst(DFS.ShadowTy, ShadowAddr, "", Pos);
     LI->setAlignment(ShadowAlign);
     return LI;
   }
@@ -1198,8 +1219,9 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
     IRBuilder<> IRB(Pos);
     Value *ShadowAddr1 = IRB.CreateGEP(DFS.ShadowTy, ShadowAddr,
                                        ConstantInt::get(DFS.IntptrTy, 1));
-    return combineShadows(IRB.CreateAlignedLoad(ShadowAddr, ShadowAlign),
-                          IRB.CreateAlignedLoad(ShadowAddr1, ShadowAlign), Pos);
+    return combineShadows(
+        IRB.CreateAlignedLoad(DFS.ShadowTy, ShadowAddr, ShadowAlign),
+        IRB.CreateAlignedLoad(DFS.ShadowTy, ShadowAddr1, ShadowAlign), Pos);
   }
   }
   if (!AvoidNewBlocks && Size % (64 / DFS.ShadowWidth) == 0) {
@@ -1218,7 +1240,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
     IRBuilder<> IRB(Pos);
     Value *WideAddr =
         IRB.CreateBitCast(ShadowAddr, Type::getInt64PtrTy(*DFS.Ctx));
-    Value *WideShadow = IRB.CreateAlignedLoad(WideAddr, ShadowAlign);
+    Value *WideShadow =
+        IRB.CreateAlignedLoad(IRB.getInt64Ty(), WideAddr, ShadowAlign);
     Value *TruncShadow = IRB.CreateTrunc(WideShadow, DFS.ShadowTy);
     Value *ShlShadow = IRB.CreateShl(WideShadow, DFS.ShadowWidth);
     Value *ShrShadow = IRB.CreateLShr(WideShadow, 64 - DFS.ShadowWidth);
@@ -1251,7 +1274,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
       IRBuilder<> NextIRB(NextBB);
       WideAddr = NextIRB.CreateGEP(Type::getInt64Ty(*DFS.Ctx), WideAddr,
                                    ConstantInt::get(DFS.IntptrTy, 1));
-      Value *NextWideShadow = NextIRB.CreateAlignedLoad(WideAddr, ShadowAlign);
+      Value *NextWideShadow = NextIRB.CreateAlignedLoad(NextIRB.getInt64Ty(),
+                                                        WideAddr, ShadowAlign);
       ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow);
       LastBr->setSuccessor(0, NextBB);
       LastBr = NextIRB.CreateCondBr(ShadowsEq, FallbackBB, FallbackBB);
@@ -1375,6 +1399,10 @@ void DFSanVisitor::visitStoreInst(StoreInst &SI) {
   DFSF.storeShadow(SI.getPointerOperand(), Size, Align, Shadow, &SI);
 }
 
+void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) {
+  visitOperandShadowInst(UO);
+}
+
 void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
   visitOperandShadowInst(BO);
 }
@@ -1470,7 +1498,7 @@ void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
   DestShadow = IRB.CreateBitCast(DestShadow, Int8Ptr);
   SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
   auto *MTI = cast<MemTransferInst>(
-      IRB.CreateCall(I.getCalledValue(),
+      IRB.CreateCall(I.getFunctionType(), I.getCalledValue(),
                      {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()}));
   if (ClPreserveAlignment) {
     MTI->setDestAlignment(I.getDestAlignment() * (DFSF.DFS.ShadowWidth / 8));
@@ -1513,7 +1541,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
 
   // Calls to this function are synthesized in wrappers, and we shouldn't
   // instrument them.
-  if (F == DFSF.DFS.DFSanVarargWrapperFn)
+  if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
     return;
 
   IRBuilder<> IRB(CS.getInstruction());
@@ -1546,9 +1574,9 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
         TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT);
         std::string CustomFName = "__dfsw_";
         CustomFName += F->getName();
-        Constant *CustomF = DFSF.DFS.Mod->getOrInsertFunction(
+        FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction(
             CustomFName, CustomFn.TransformedType);
-        if (Function *CustomFn = dyn_cast<Function>(CustomF)) {
+        if (Function *CustomFn = dyn_cast<Function>(CustomF.getCallee())) {
           CustomFn->copyAttributesFrom(F);
 
           // Custom functions returning non-void will write to the return label.
@@ -1628,7 +1656,8 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
         }
 
         if (!FT->getReturnType()->isVoidTy()) {
-          LoadInst *LabelLoad = IRB.CreateLoad(DFSF.LabelReturnAlloca);
+          LoadInst *LabelLoad =
+              IRB.CreateLoad(DFSF.DFS.ShadowTy, DFSF.LabelReturnAlloca);
           DFSF.setShadow(CustomCI, LabelLoad);
         }
 
@@ -1666,7 +1695,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
 
     if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
       IRBuilder<> NextIRB(Next);
-      LoadInst *LI = NextIRB.CreateLoad(DFSF.getRetvalTLS());
+      LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.ShadowTy, DFSF.getRetvalTLS());
       DFSF.SkipInsts.insert(LI);
       DFSF.setShadow(CS.getInstruction(), LI);
       DFSF.NonZeroChecks.push_back(LI);
@@ -1706,10 +1735,10 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
 
     CallSite NewCS;
     if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
-      NewCS = IRB.CreateInvoke(Func, II->getNormalDest(), II->getUnwindDest(),
-                               Args);
+      NewCS = IRB.CreateInvoke(NewFT, Func, II->getNormalDest(),
+                               II->getUnwindDest(), Args);
     } else {
-      NewCS = IRB.CreateCall(Func, Args);
+      NewCS = IRB.CreateCall(NewFT, Func, Args);
     }
     NewCS.setCallingConv(CS.getCallingConv());
     NewCS.setAttributes(CS.getAttributes().removeAttributes(
diff --git a/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp b/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
deleted file mode 100644
index db438e78ded9..000000000000
--- a/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
+++ /dev/null
@@ -1,900 +0,0 @@
-//===-- EfficiencySanitizer.cpp - performance tuner -----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of EfficiencySanitizer, a family of performance tuners
-// that detects multiple performance issues via separate sub-tools.
-//
-// The instrumentation phase is straightforward:
-//   - Take action on every memory access: either inlined instrumentation,
-//     or Inserted calls to our run-time library.
-//   - Optimizations may apply to avoid instrumenting some of the accesses.
-//   - Turn mem{set,cpy,move} instrinsics into library calls.
-// The rest is handled by the run-time library.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Instrumentation.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "esan"
-
-// The tool type must be just one of these ClTool* options, as the tools
-// cannot be combined due to shadow memory constraints.
-static cl::opt<bool>
-    ClToolCacheFrag("esan-cache-frag", cl::init(false),
-                    cl::desc("Detect data cache fragmentation"), cl::Hidden);
-static cl::opt<bool>
-    ClToolWorkingSet("esan-working-set", cl::init(false),
-                    cl::desc("Measure the working set size"), cl::Hidden);
-// Each new tool will get its own opt flag here.
-// These are converted to EfficiencySanitizerOptions for use
-// in the code.
-
-static cl::opt<bool> ClInstrumentLoadsAndStores(
-    "esan-instrument-loads-and-stores", cl::init(true),
-    cl::desc("Instrument loads and stores"), cl::Hidden);
-static cl::opt<bool> ClInstrumentMemIntrinsics(
-    "esan-instrument-memintrinsics", cl::init(true),
-    cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden);
-static cl::opt<bool> ClInstrumentFastpath(
-    "esan-instrument-fastpath", cl::init(true),
-    cl::desc("Instrument fastpath"), cl::Hidden);
-static cl::opt<bool> ClAuxFieldInfo(
-    "esan-aux-field-info", cl::init(true),
-    cl::desc("Generate binary with auxiliary struct field information"),
-    cl::Hidden);
-
-// Experiments show that the performance difference can be 2x or more,
-// and accuracy loss is typically negligible, so we turn this on by default.
-static cl::opt<bool> ClAssumeIntraCacheLine(
-    "esan-assume-intra-cache-line", cl::init(true),
-    cl::desc("Assume each memory access touches just one cache line, for "
-             "better performance but with a potential loss of accuracy."),
-    cl::Hidden);
-
-STATISTIC(NumInstrumentedLoads, "Number of instrumented loads");
-STATISTIC(NumInstrumentedStores, "Number of instrumented stores");
-STATISTIC(NumFastpaths, "Number of instrumented fastpaths");
-STATISTIC(NumAccessesWithIrregularSize,
-          "Number of accesses with a size outside our targeted callout sizes");
-STATISTIC(NumIgnoredStructs, "Number of ignored structs");
-STATISTIC(NumIgnoredGEPs, "Number of ignored GEP instructions");
-STATISTIC(NumInstrumentedGEPs, "Number of instrumented GEP instructions");
-STATISTIC(NumAssumedIntraCacheLine,
-          "Number of accesses assumed to be intra-cache-line");
-
-static const uint64_t EsanCtorAndDtorPriority = 0;
-static const char *const EsanModuleCtorName = "esan.module_ctor";
-static const char *const EsanModuleDtorName = "esan.module_dtor";
-static const char *const EsanInitName = "__esan_init";
-static const char *const EsanExitName = "__esan_exit";
-
-// We need to specify the tool to the runtime earlier than
-// the ctor is called in some cases, so we set a global variable.
-static const char *const EsanWhichToolName = "__esan_which_tool";
-
-// We must keep these Shadow* constants consistent with the esan runtime.
-// FIXME: Try to place these shadow constants, the names of the __esan_*
-// interface functions, and the ToolType enum into a header shared between
-// llvm and compiler-rt.
-struct ShadowMemoryParams {
-  uint64_t ShadowMask;
-  uint64_t ShadowOffs[3];
-};
-
-static const ShadowMemoryParams ShadowParams47 = {
-    0x00000fffffffffffull,
-    {
-        0x0000130000000000ull, 0x0000220000000000ull, 0x0000440000000000ull,
-    }};
-
-static const ShadowMemoryParams ShadowParams40 = {
-    0x0fffffffffull,
-    {
-        0x1300000000ull, 0x2200000000ull, 0x4400000000ull,
-    }};
-
-// This array is indexed by the ToolType enum.
-static const int ShadowScale[] = {
-  0, // ESAN_None.
-  2, // ESAN_CacheFrag: 4B:1B, so 4 to 1 == >>2.
-  6, // ESAN_WorkingSet: 64B:1B, so 64 to 1 == >>6.
-};
-
-// MaxStructCounterNameSize is a soft size limit to avoid insanely long
-// names for those extremely large structs.
-static const unsigned MaxStructCounterNameSize = 512;
-
-namespace {
-
-static EfficiencySanitizerOptions
-OverrideOptionsFromCL(EfficiencySanitizerOptions Options) {
-  if (ClToolCacheFrag)
-    Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag;
-  else if (ClToolWorkingSet)
-    Options.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet;
-
-  // Direct opt invocation with no params will have the default ESAN_None.
-  // We run the default tool in that case.
-  if (Options.ToolType == EfficiencySanitizerOptions::ESAN_None)
-    Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag;
-
-  return Options;
-}
-
-/// EfficiencySanitizer: instrument each module to find performance issues.
-class EfficiencySanitizer : public ModulePass {
-public:
-  EfficiencySanitizer(
-      const EfficiencySanitizerOptions &Opts = EfficiencySanitizerOptions())
-      : ModulePass(ID), Options(OverrideOptionsFromCL(Opts)) {}
-  StringRef getPassName() const override;
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-  bool runOnModule(Module &M) override;
-  static char ID;
-
-private:
-  bool initOnModule(Module &M);
-  void initializeCallbacks(Module &M);
-  bool shouldIgnoreStructType(StructType *StructTy);
-  void createStructCounterName(
-      StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr);
-  void createCacheFragAuxGV(
-    Module &M, const DataLayout &DL, StructType *StructTy,
-    GlobalVariable *&TypeNames, GlobalVariable *&Offsets, GlobalVariable *&Size);
-  GlobalVariable *createCacheFragInfoGV(Module &M, const DataLayout &DL,
-                                        Constant *UnitName);
-  Constant *createEsanInitToolInfoArg(Module &M, const DataLayout &DL);
-  void createDestructor(Module &M, Constant *ToolInfoArg);
-  bool runOnFunction(Function &F, Module &M);
-  bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL);
-  bool instrumentMemIntrinsic(MemIntrinsic *MI);
-  bool instrumentGetElementPtr(Instruction *I, Module &M);
-  bool insertCounterUpdate(Instruction *I, StructType *StructTy,
-                           unsigned CounterIdx);
-  unsigned getFieldCounterIdx(StructType *StructTy) {
-    return 0;
-  }
-  unsigned getArrayCounterIdx(StructType *StructTy) {
-    return StructTy->getNumElements();
-  }
-  unsigned getStructCounterSize(StructType *StructTy) {
-    // The struct counter array includes:
-    // - one counter for each struct field,
-    // - one counter for the struct access within an array.
-    return (StructTy->getNumElements()/*field*/ + 1/*array*/);
-  }
-  bool shouldIgnoreMemoryAccess(Instruction *I);
-  int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL);
-  Value *appToShadow(Value *Shadow, IRBuilder<> &IRB);
-  bool instrumentFastpath(Instruction *I, const DataLayout &DL, bool IsStore,
-                          Value *Addr, unsigned Alignment);
-  // Each tool has its own fastpath routine:
-  bool instrumentFastpathCacheFrag(Instruction *I, const DataLayout &DL,
-                                   Value *Addr, unsigned Alignment);
-  bool instrumentFastpathWorkingSet(Instruction *I, const DataLayout &DL,
-                                    Value *Addr, unsigned Alignment);
-
-  EfficiencySanitizerOptions Options;
-  LLVMContext *Ctx;
-  Type *IntptrTy;
-  // Our slowpath involves callouts to the runtime library.
-  // Access sizes are powers of two: 1, 2, 4, 8, 16.
-  static const size_t NumberOfAccessSizes = 5;
-  Function *EsanAlignedLoad[NumberOfAccessSizes];
-  Function *EsanAlignedStore[NumberOfAccessSizes];
-  Function *EsanUnalignedLoad[NumberOfAccessSizes];
-  Function *EsanUnalignedStore[NumberOfAccessSizes];
-  // For irregular sizes of any alignment:
-  Function *EsanUnalignedLoadN, *EsanUnalignedStoreN;
-  Function *MemmoveFn, *MemcpyFn, *MemsetFn;
-  Function *EsanCtorFunction;
-  Function *EsanDtorFunction;
-  // Remember the counter variable for each struct type to avoid
-  // recomputing the variable name later during instrumentation.
-  std::map<Type *, GlobalVariable *> StructTyMap;
-  ShadowMemoryParams ShadowParams;
-};
-} // namespace
-
-char EfficiencySanitizer::ID = 0;
-INITIALIZE_PASS_BEGIN(
-    EfficiencySanitizer, "esan",
-    "EfficiencySanitizer: finds performance issues.", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(
-    EfficiencySanitizer, "esan",
-    "EfficiencySanitizer: finds performance issues.", false, false)
-
-StringRef EfficiencySanitizer::getPassName() const {
-  return "EfficiencySanitizer";
-}
-
-void EfficiencySanitizer::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<TargetLibraryInfoWrapperPass>();
-}
-
-ModulePass *
-llvm::createEfficiencySanitizerPass(const EfficiencySanitizerOptions &Options) {
-  return new EfficiencySanitizer(Options);
-}
-
-void EfficiencySanitizer::initializeCallbacks(Module &M) {
-  IRBuilder<> IRB(M.getContext());
-  // Initialize the callbacks.
-  for (size_t Idx = 0; Idx < NumberOfAccessSizes; ++Idx) {
-    const unsigned ByteSize = 1U << Idx;
-    std::string ByteSizeStr = utostr(ByteSize);
-    // We'll inline the most common (i.e., aligned and frequent sizes)
-    // load + store instrumentation: these callouts are for the slowpath.
-    SmallString<32> AlignedLoadName("__esan_aligned_load" + ByteSizeStr);
-    EsanAlignedLoad[Idx] =
-        checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-            AlignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
-    SmallString<32> AlignedStoreName("__esan_aligned_store" + ByteSizeStr);
-    EsanAlignedStore[Idx] =
-        checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-            AlignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
-    SmallString<32> UnalignedLoadName("__esan_unaligned_load" + ByteSizeStr);
-    EsanUnalignedLoad[Idx] =
-        checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-            UnalignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
-    SmallString<32> UnalignedStoreName("__esan_unaligned_store" + ByteSizeStr);
-    EsanUnalignedStore[Idx] =
-        checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-            UnalignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
-  }
-  EsanUnalignedLoadN = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction("__esan_unaligned_loadN", IRB.getVoidTy(),
-                            IRB.getInt8PtrTy(), IntptrTy));
-  EsanUnalignedStoreN = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction("__esan_unaligned_storeN", IRB.getVoidTy(),
-                            IRB.getInt8PtrTy(), IntptrTy));
-  MemmoveFn = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
-                            IRB.getInt8PtrTy(), IntptrTy));
-  MemcpyFn = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
-                            IRB.getInt8PtrTy(), IntptrTy));
-  MemsetFn = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
-                            IRB.getInt32Ty(), IntptrTy));
-}
-
-bool EfficiencySanitizer::shouldIgnoreStructType(StructType *StructTy) {
-  if (StructTy == nullptr || StructTy->isOpaque() /* no struct body */)
-    return true;
-  return false;
-}
-
-void EfficiencySanitizer::createStructCounterName(
-    StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr) {
-  // Append NumFields and field type ids to avoid struct conflicts
-  // with the same name but different fields.
-  if (StructTy->hasName())
-    NameStr += StructTy->getName();
-  else
-    NameStr += "struct.anon";
-  // We allow the actual size of the StructCounterName to be larger than
-  // MaxStructCounterNameSize and append $NumFields and at least one
-  // field type id.
-  // Append $NumFields.
-  NameStr += "$";
-  Twine(StructTy->getNumElements()).toVector(NameStr);
-  // Append struct field type ids in the reverse order.
-  for (int i = StructTy->getNumElements() - 1; i >= 0; --i) {
-    NameStr += "$";
-    Twine(StructTy->getElementType(i)->getTypeID()).toVector(NameStr);
-    if (NameStr.size() >= MaxStructCounterNameSize)
-      break;
-  }
-  if (StructTy->isLiteral()) {
-    // End with $ for literal struct.
-    NameStr += "$";
-  }
-}
-
-// Create global variables with auxiliary information (e.g., struct field size,
-// offset, and type name) for better user report.
-void EfficiencySanitizer::createCacheFragAuxGV(
-    Module &M, const DataLayout &DL, StructType *StructTy,
-    GlobalVariable *&TypeName, GlobalVariable *&Offset,
-    GlobalVariable *&Size) {
-  auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
-  auto *Int32Ty = Type::getInt32Ty(*Ctx);
-  // FieldTypeName.
-  auto *TypeNameArrayTy = ArrayType::get(Int8PtrTy, StructTy->getNumElements());
-  TypeName = new GlobalVariable(M, TypeNameArrayTy, true,
-                                 GlobalVariable::InternalLinkage, nullptr);
-  SmallVector<Constant *, 16> TypeNameVec;
-  // FieldOffset.
-  auto *OffsetArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements());
-  Offset = new GlobalVariable(M, OffsetArrayTy, true,
-                              GlobalVariable::InternalLinkage, nullptr);
-  SmallVector<Constant *, 16> OffsetVec;
-  // FieldSize
-  auto *SizeArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements());
-  Size = new GlobalVariable(M, SizeArrayTy, true,
-                            GlobalVariable::InternalLinkage, nullptr);
-  SmallVector<Constant *, 16> SizeVec;
-  for (unsigned i = 0; i < StructTy->getNumElements(); ++i) {
-    Type *Ty = StructTy->getElementType(i);
-    std::string Str;
-    raw_string_ostream StrOS(Str);
-    Ty->print(StrOS);
-    TypeNameVec.push_back(
-        ConstantExpr::getPointerCast(
-            createPrivateGlobalForString(M, StrOS.str(), true),
-            Int8PtrTy));
-    OffsetVec.push_back(
-        ConstantInt::get(Int32Ty,
-                         DL.getStructLayout(StructTy)->getElementOffset(i)));
-    SizeVec.push_back(ConstantInt::get(Int32Ty,
-                                       DL.getTypeAllocSize(Ty)));
-    }
-  TypeName->setInitializer(ConstantArray::get(TypeNameArrayTy, TypeNameVec));
-  Offset->setInitializer(ConstantArray::get(OffsetArrayTy, OffsetVec));
-  Size->setInitializer(ConstantArray::get(SizeArrayTy, SizeVec));
-}
-
-// Create the global variable for the cache-fragmentation tool.
-GlobalVariable *EfficiencySanitizer::createCacheFragInfoGV(
-    Module &M, const DataLayout &DL, Constant *UnitName) {
-  assert(Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag);
-
-  auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
-  auto *Int8PtrPtrTy = Int8PtrTy->getPointerTo();
-  auto *Int32Ty = Type::getInt32Ty(*Ctx);
-  auto *Int32PtrTy = Type::getInt32PtrTy(*Ctx);
-  auto *Int64Ty = Type::getInt64Ty(*Ctx);
-  auto *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
-  // This structure should be kept consistent with the StructInfo struct
-  // in the runtime library.
-  // struct StructInfo {
-  //   const char *StructName;
-  //   u32 Size;
-  //   u32 NumFields;
-  //   u32 *FieldOffset;           // auxiliary struct field info.
-  //   u32 *FieldSize;             // auxiliary struct field info.
-  //   const char **FieldTypeName; // auxiliary struct field info.
-  //   u64 *FieldCounters;
-  //   u64 *ArrayCounter;
-  // };
-  auto *StructInfoTy =
-      StructType::get(Int8PtrTy, Int32Ty, Int32Ty, Int32PtrTy, Int32PtrTy,
-                      Int8PtrPtrTy, Int64PtrTy, Int64PtrTy);
-  auto *StructInfoPtrTy = StructInfoTy->getPointerTo();
-  // This structure should be kept consistent with the CacheFragInfo struct
-  // in the runtime library.
-  // struct CacheFragInfo {
-  //   const char *UnitName;
-  //   u32 NumStructs;
-  //   StructInfo *Structs;
-  // };
-  auto *CacheFragInfoTy = StructType::get(Int8PtrTy, Int32Ty, StructInfoPtrTy);
-
-  std::vector<StructType *> Vec = M.getIdentifiedStructTypes();
-  unsigned NumStructs = 0;
-  SmallVector<Constant *, 16> Initializers;
-
-  for (auto &StructTy : Vec) {
-    if (shouldIgnoreStructType(StructTy)) {
-      ++NumIgnoredStructs;
-      continue;
-    }
-    ++NumStructs;
-
-    // StructName.
-    SmallString<MaxStructCounterNameSize> CounterNameStr;
-    createStructCounterName(StructTy, CounterNameStr);
-    GlobalVariable *StructCounterName = createPrivateGlobalForString(
-        M, CounterNameStr, /*AllowMerging*/true);
-
-    // Counters.
-    // We create the counter array with StructCounterName and weak linkage
-    // so that the structs with the same name and layout from different
-    // compilation units will be merged into one.
-    auto *CounterArrayTy = ArrayType::get(Int64Ty,
-                                          getStructCounterSize(StructTy));
-    GlobalVariable *Counters =
-      new GlobalVariable(M, CounterArrayTy, false,
-                         GlobalVariable::WeakAnyLinkage,
-                         ConstantAggregateZero::get(CounterArrayTy),
-                         CounterNameStr);
-
-    // Remember the counter variable for each struct type.
-    StructTyMap.insert(std::pair<Type *, GlobalVariable *>(StructTy, Counters));
-
-    // We pass the field type name array, offset array, and size array to
-    // the runtime for better reporting.
-    GlobalVariable *TypeName = nullptr, *Offset = nullptr, *Size = nullptr;
-    if (ClAuxFieldInfo)
-      createCacheFragAuxGV(M, DL, StructTy, TypeName, Offset, Size);
-
-    Constant *FieldCounterIdx[2];
-    FieldCounterIdx[0] = ConstantInt::get(Int32Ty, 0);
-    FieldCounterIdx[1] = ConstantInt::get(Int32Ty,
-                                          getFieldCounterIdx(StructTy));
-    Constant *ArrayCounterIdx[2];
-    ArrayCounterIdx[0] = ConstantInt::get(Int32Ty, 0);
-    ArrayCounterIdx[1] = ConstantInt::get(Int32Ty,
-                                          getArrayCounterIdx(StructTy));
-    Initializers.push_back(ConstantStruct::get(
-        StructInfoTy,
-        ConstantExpr::getPointerCast(StructCounterName, Int8PtrTy),
-        ConstantInt::get(Int32Ty,
-                         DL.getStructLayout(StructTy)->getSizeInBytes()),
-        ConstantInt::get(Int32Ty, StructTy->getNumElements()),
-        Offset == nullptr ? ConstantPointerNull::get(Int32PtrTy)
-                          : ConstantExpr::getPointerCast(Offset, Int32PtrTy),
-        Size == nullptr ? ConstantPointerNull::get(Int32PtrTy)
-                        : ConstantExpr::getPointerCast(Size, Int32PtrTy),
-        TypeName == nullptr
-            ? ConstantPointerNull::get(Int8PtrPtrTy)
-            : ConstantExpr::getPointerCast(TypeName, Int8PtrPtrTy),
-        ConstantExpr::getGetElementPtr(CounterArrayTy, Counters,
-                                       FieldCounterIdx),
-        ConstantExpr::getGetElementPtr(CounterArrayTy, Counters,
-                                       ArrayCounterIdx)));
-  }
-  // Structs.
-  Constant *StructInfo;
-  if (NumStructs == 0) {
-    StructInfo = ConstantPointerNull::get(StructInfoPtrTy);
-  } else {
-    auto *StructInfoArrayTy = ArrayType::get(StructInfoTy, NumStructs);
-    StructInfo = ConstantExpr::getPointerCast(
-        new GlobalVariable(M, StructInfoArrayTy, false,
-                           GlobalVariable::InternalLinkage,
-                           ConstantArray::get(StructInfoArrayTy, Initializers)),
-        StructInfoPtrTy);
-  }
-
-  auto *CacheFragInfoGV = new GlobalVariable(
-      M, CacheFragInfoTy, true, GlobalVariable::InternalLinkage,
-      ConstantStruct::get(CacheFragInfoTy, UnitName,
-                          ConstantInt::get(Int32Ty, NumStructs), StructInfo));
-  return CacheFragInfoGV;
-}
-
-// Create the tool-specific argument passed to EsanInit and EsanExit.
-Constant *EfficiencySanitizer::createEsanInitToolInfoArg(Module &M,
-                                                         const DataLayout &DL) {
-  // This structure contains tool-specific information about each compilation
-  // unit (module) and is passed to the runtime library.
-  GlobalVariable *ToolInfoGV = nullptr;
-
-  auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
-  // Compilation unit name.
-  auto *UnitName = ConstantExpr::getPointerCast(
-      createPrivateGlobalForString(M, M.getModuleIdentifier(), true),
-      Int8PtrTy);
-
-  // Create the tool-specific variable.
-  if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag)
-    ToolInfoGV = createCacheFragInfoGV(M, DL, UnitName);
-
-  if (ToolInfoGV != nullptr)
-    return ConstantExpr::getPointerCast(ToolInfoGV, Int8PtrTy);
-
-  // Create the null pointer if no tool-specific variable created.
-  return ConstantPointerNull::get(Int8PtrTy);
-}
-
-void EfficiencySanitizer::createDestructor(Module &M, Constant *ToolInfoArg) {
-  PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
-  EsanDtorFunction = Function::Create(FunctionType::get(Type::getVoidTy(*Ctx),
-                                                        false),
-                                      GlobalValue::InternalLinkage,
-                                      EsanModuleDtorName, &M);
-  ReturnInst::Create(*Ctx, BasicBlock::Create(*Ctx, "", EsanDtorFunction));
-  IRBuilder<> IRB_Dtor(EsanDtorFunction->getEntryBlock().getTerminator());
-  Function *EsanExit = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction(EsanExitName, IRB_Dtor.getVoidTy(),
-                            Int8PtrTy));
-  EsanExit->setLinkage(Function::ExternalLinkage);
-  IRB_Dtor.CreateCall(EsanExit, {ToolInfoArg});
-  appendToGlobalDtors(M, EsanDtorFunction, EsanCtorAndDtorPriority);
-}
-
-bool EfficiencySanitizer::initOnModule(Module &M) {
-
-  Triple TargetTriple(M.getTargetTriple());
-  if (TargetTriple.isMIPS64())
-    ShadowParams = ShadowParams40;
-  else
-    ShadowParams = ShadowParams47;
-
-  Ctx = &M.getContext();
-  const DataLayout &DL = M.getDataLayout();
-  IRBuilder<> IRB(M.getContext());
-  IntegerType *OrdTy = IRB.getInt32Ty();
-  PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
-  IntptrTy = DL.getIntPtrType(M.getContext());
-  // Create the variable passed to EsanInit and EsanExit.
-  Constant *ToolInfoArg = createEsanInitToolInfoArg(M, DL);
-  // Constructor
-  // We specify the tool type both in the EsanWhichToolName global
-  // and as an arg to the init routine as a sanity check.
-  std::tie(EsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions(
-      M, EsanModuleCtorName, EsanInitName, /*InitArgTypes=*/{OrdTy, Int8PtrTy},
-      /*InitArgs=*/{
-        ConstantInt::get(OrdTy, static_cast<int>(Options.ToolType)),
-        ToolInfoArg});
-  appendToGlobalCtors(M, EsanCtorFunction, EsanCtorAndDtorPriority);
-
-  createDestructor(M, ToolInfoArg);
-
-  new GlobalVariable(M, OrdTy, true,
-                     GlobalValue::WeakAnyLinkage,
-                     ConstantInt::get(OrdTy,
-                                      static_cast<int>(Options.ToolType)),
-                     EsanWhichToolName);
-
-  return true;
-}
-
-Value *EfficiencySanitizer::appToShadow(Value *Shadow, IRBuilder<> &IRB) {
-  // Shadow = ((App & Mask) + Offs) >> Scale
-  Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, ShadowParams.ShadowMask));
-  uint64_t Offs;
-  int Scale = ShadowScale[Options.ToolType];
-  if (Scale <= 2)
-    Offs = ShadowParams.ShadowOffs[Scale];
-  else
-    Offs = ShadowParams.ShadowOffs[0] << Scale;
-  Shadow = IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Offs));
-  if (Scale > 0)
-    Shadow = IRB.CreateLShr(Shadow, Scale);
-  return Shadow;
-}
-
-bool EfficiencySanitizer::shouldIgnoreMemoryAccess(Instruction *I) {
-  if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
-    // We'd like to know about cache fragmentation in vtable accesses and
-    // constant data references, so we do not currently ignore anything.
-    return false;
-  } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) {
-    // TODO: the instrumentation disturbs the data layout on the stack, so we
-    // may want to add an option to ignore stack references (if we can
-    // distinguish them) to reduce overhead.
-  }
-  // TODO(bruening): future tools will be returning true for some cases.
-  return false;
-}
-
-bool EfficiencySanitizer::runOnModule(Module &M) {
-  bool Res = initOnModule(M);
-  initializeCallbacks(M);
-  for (auto &F : M) {
-    Res |= runOnFunction(F, M);
-  }
-  return Res;
-}
-
-bool EfficiencySanitizer::runOnFunction(Function &F, Module &M) {
-  // This is required to prevent instrumenting the call to __esan_init from
-  // within the module constructor.
-  if (&F == EsanCtorFunction)
-    return false;
-  SmallVector<Instruction *, 8> LoadsAndStores;
-  SmallVector<Instruction *, 8> MemIntrinCalls;
-  SmallVector<Instruction *, 8> GetElementPtrs;
-  bool Res = false;
-  const DataLayout &DL = M.getDataLayout();
-  const TargetLibraryInfo *TLI =
-      &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-
-  for (auto &BB : F) {
-    for (auto &Inst : BB) {
-      if ((isa<LoadInst>(Inst) || isa<StoreInst>(Inst) ||
-           isa<AtomicRMWInst>(Inst) || isa<AtomicCmpXchgInst>(Inst)) &&
-          !shouldIgnoreMemoryAccess(&Inst))
-        LoadsAndStores.push_back(&Inst);
-      else if (isa<MemIntrinsic>(Inst))
-        MemIntrinCalls.push_back(&Inst);
-      else if (isa<GetElementPtrInst>(Inst))
-        GetElementPtrs.push_back(&Inst);
-      else if (CallInst *CI = dyn_cast<CallInst>(&Inst))
-        maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI);
-    }
-  }
-
-  if (ClInstrumentLoadsAndStores) {
-    for (auto Inst : LoadsAndStores) {
-      Res |= instrumentLoadOrStore(Inst, DL);
-    }
-  }
-
-  if (ClInstrumentMemIntrinsics) {
-    for (auto Inst : MemIntrinCalls) {
-      Res |= instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
-    }
-  }
-
-  if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
-    for (auto Inst : GetElementPtrs) {
-      Res |= instrumentGetElementPtr(Inst, M);
-    }
-  }
-
-  return Res;
-}
-
-bool EfficiencySanitizer::instrumentLoadOrStore(Instruction *I,
-                                                const DataLayout &DL) {
-  IRBuilder<> IRB(I);
-  bool IsStore;
-  Value *Addr;
-  unsigned Alignment;
-  if (LoadInst *Load = dyn_cast<LoadInst>(I)) {
-    IsStore = false;
-    Alignment = Load->getAlignment();
-    Addr = Load->getPointerOperand();
-  } else if (StoreInst *Store = dyn_cast<StoreInst>(I)) {
-    IsStore = true;
-    Alignment = Store->getAlignment();
-    Addr = Store->getPointerOperand();
-  } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
-    IsStore = true;
-    Alignment = 0;
-    Addr = RMW->getPointerOperand();
-  } else if (AtomicCmpXchgInst *Xchg = dyn_cast<AtomicCmpXchgInst>(I)) {
-    IsStore = true;
-    Alignment = 0;
-    Addr = Xchg->getPointerOperand();
-  } else
-    llvm_unreachable("Unsupported mem access type");
-
-  Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
-  const uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8;
-  Value *OnAccessFunc = nullptr;
-
-  // Convert 0 to the default alignment.
-  if (Alignment == 0)
-    Alignment = DL.getPrefTypeAlignment(OrigTy);
-
-  if (IsStore)
-    NumInstrumentedStores++;
-  else
-    NumInstrumentedLoads++;
-  int Idx = getMemoryAccessFuncIndex(Addr, DL);
-  if (Idx < 0) {
-    OnAccessFunc = IsStore ? EsanUnalignedStoreN : EsanUnalignedLoadN;
-    IRB.CreateCall(OnAccessFunc,
-                   {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
-                    ConstantInt::get(IntptrTy, TypeSizeBytes)});
-  } else {
-    if (ClInstrumentFastpath &&
-        instrumentFastpath(I, DL, IsStore, Addr, Alignment)) {
-      NumFastpaths++;
-      return true;
-    }
-    if (Alignment == 0 || (Alignment % TypeSizeBytes) == 0)
-      OnAccessFunc = IsStore ? EsanAlignedStore[Idx] : EsanAlignedLoad[Idx];
-    else
-      OnAccessFunc = IsStore ? EsanUnalignedStore[Idx] : EsanUnalignedLoad[Idx];
-    IRB.CreateCall(OnAccessFunc,
-                   IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
-  }
-  return true;
-}
-
-// It's simplest to replace the memset/memmove/memcpy intrinsics with
-// calls that the runtime library intercepts.
-// Our pass is late enough that calls should not turn back into intrinsics.
-bool EfficiencySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
-  IRBuilder<> IRB(MI);
-  bool Res = false;
-  if (isa<MemSetInst>(MI)) {
-    IRB.CreateCall(
-        MemsetFn,
-        {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()),
-         IRB.CreateIntCast(MI->getArgOperand(1), IRB.getInt32Ty(), false),
-         IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)});
-    MI->eraseFromParent();
-    Res = true;
-  } else if (isa<MemTransferInst>(MI)) {
-    IRB.CreateCall(
-        isa<MemCpyInst>(MI) ? MemcpyFn : MemmoveFn,
-        {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()),
-         IRB.CreatePointerCast(MI->getArgOperand(1), IRB.getInt8PtrTy()),
-         IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)});
-    MI->eraseFromParent();
-    Res = true;
-  } else
-    llvm_unreachable("Unsupported mem intrinsic type");
-  return Res;
-}
-
-bool EfficiencySanitizer::instrumentGetElementPtr(Instruction *I, Module &M) {
-  GetElementPtrInst *GepInst = dyn_cast<GetElementPtrInst>(I);
-  bool Res = false;
-  if (GepInst == nullptr || GepInst->getNumIndices() == 1) {
-    ++NumIgnoredGEPs;
-    return false;
-  }
-  Type *SourceTy = GepInst->getSourceElementType();
-  StructType *StructTy = nullptr;
-  ConstantInt *Idx;
-  // Check if GEP calculates address from a struct array.
-  if (isa<StructType>(SourceTy)) {
-    StructTy = cast<StructType>(SourceTy);
-    Idx = dyn_cast<ConstantInt>(GepInst->getOperand(1));
-    if ((Idx == nullptr || Idx->getSExtValue() != 0) &&
-        !shouldIgnoreStructType(StructTy) && StructTyMap.count(StructTy) != 0)
-      Res |= insertCounterUpdate(I, StructTy, getArrayCounterIdx(StructTy));
-  }
-  // Iterate all (except the first and the last) idx within each GEP instruction
-  // for possible nested struct field address calculation.
-  for (unsigned i = 1; i < GepInst->getNumIndices(); ++i) {
-    SmallVector<Value *, 8> IdxVec(GepInst->idx_begin(),
-                                   GepInst->idx_begin() + i);
-    Type *Ty = GetElementPtrInst::getIndexedType(SourceTy, IdxVec);
-    unsigned CounterIdx = 0;
-    if (isa<ArrayType>(Ty)) {
-      ArrayType *ArrayTy = cast<ArrayType>(Ty);
-      StructTy = dyn_cast<StructType>(ArrayTy->getElementType());
-      if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0)
-        continue;
-      // The last counter for struct array access.
-      CounterIdx = getArrayCounterIdx(StructTy);
-    } else if (isa<StructType>(Ty)) {
-      StructTy = cast<StructType>(Ty);
-      if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0)
-        continue;
-      // Get the StructTy's subfield index.
-      Idx = cast<ConstantInt>(GepInst->getOperand(i+1));
-      assert(Idx->getSExtValue() >= 0 &&
-             Idx->getSExtValue() < StructTy->getNumElements());
-      CounterIdx = getFieldCounterIdx(StructTy) + Idx->getSExtValue();
-    }
-    Res |= insertCounterUpdate(I, StructTy, CounterIdx);
-  }
-  if (Res)
-    ++NumInstrumentedGEPs;
-  else
-    ++NumIgnoredGEPs;
-  return Res;
-}
-
-bool EfficiencySanitizer::insertCounterUpdate(Instruction *I,
-                                              StructType *StructTy,
-                                              unsigned CounterIdx) {
-  GlobalVariable *CounterArray = StructTyMap[StructTy];
-  if (CounterArray == nullptr)
-    return false;
-  IRBuilder<> IRB(I);
-  Constant *Indices[2];
-  // Xref http://llvm.org/docs/LangRef.html#i-getelementptr and
-  // http://llvm.org/docs/GetElementPtr.html.
-  // The first index of the GEP instruction steps through the first operand,
-  // i.e., the array itself.
-  Indices[0] = ConstantInt::get(IRB.getInt32Ty(), 0);
-  // The second index is the index within the array.
-  Indices[1] = ConstantInt::get(IRB.getInt32Ty(), CounterIdx);
-  Constant *Counter =
-    ConstantExpr::getGetElementPtr(
-        ArrayType::get(IRB.getInt64Ty(), getStructCounterSize(StructTy)),
-        CounterArray, Indices);
-  Value *Load = IRB.CreateLoad(Counter);
-  IRB.CreateStore(IRB.CreateAdd(Load, ConstantInt::get(IRB.getInt64Ty(), 1)),
-                  Counter);
-  return true;
-}
-
-int EfficiencySanitizer::getMemoryAccessFuncIndex(Value *Addr,
-                                                  const DataLayout &DL) {
-  Type *OrigPtrTy = Addr->getType();
-  Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
-  assert(OrigTy->isSized());
-  // The size is always a multiple of 8.
-  uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8;
-  if (TypeSizeBytes != 1 && TypeSizeBytes != 2 && TypeSizeBytes != 4 &&
-      TypeSizeBytes != 8 && TypeSizeBytes != 16) {
-    // Irregular sizes do not have per-size call targets.
-    NumAccessesWithIrregularSize++;
-    return -1;
-  }
-  size_t Idx = countTrailingZeros(TypeSizeBytes);
-  assert(Idx < NumberOfAccessSizes);
-  return Idx;
-}
-
-bool EfficiencySanitizer::instrumentFastpath(Instruction *I,
-                                             const DataLayout &DL, bool IsStore,
-                                             Value *Addr, unsigned Alignment) {
-  if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
-    return instrumentFastpathCacheFrag(I, DL, Addr, Alignment);
-  } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) {
-    return instrumentFastpathWorkingSet(I, DL, Addr, Alignment);
-  }
-  return false;
-}
-
-bool EfficiencySanitizer::instrumentFastpathCacheFrag(Instruction *I,
-                                                      const DataLayout &DL,
-                                                      Value *Addr,
-                                                      unsigned Alignment) {
-  // Do nothing.
-  return true; // Return true to avoid slowpath instrumentation.
-}
-
-bool EfficiencySanitizer::instrumentFastpathWorkingSet(
-    Instruction *I, const DataLayout &DL, Value *Addr, unsigned Alignment) {
-  assert(ShadowScale[Options.ToolType] == 6); // The code below assumes this
-  IRBuilder<> IRB(I);
-  Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
-  const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
-  // Bail to the slowpath if the access might touch multiple cache lines.
-  // An access aligned to its size is guaranteed to be intra-cache-line.
-  // getMemoryAccessFuncIndex has already ruled out a size larger than 16
-  // and thus larger than a cache line for platforms this tool targets
-  // (and our shadow memory setup assumes 64-byte cache lines).
-  assert(TypeSize <= 128);
-  if (!(TypeSize == 8 ||
-        (Alignment % (TypeSize / 8)) == 0)) {
-    if (ClAssumeIntraCacheLine)
-      ++NumAssumedIntraCacheLine;
-    else
-      return false;
-  }
-
-  // We inline instrumentation to set the corresponding shadow bits for
-  // each cache line touched by the application.  Here we handle a single
-  // load or store where we've already ruled out the possibility that it
-  // might touch more than one cache line and thus we simply update the
-  // shadow memory for a single cache line.
-  // Our shadow memory model is fine with races when manipulating shadow values.
-  // We generate the following code:
-  //
-  //   const char BitMask = 0x81;
-  //   char *ShadowAddr = appToShadow(AppAddr);
-  //   if ((*ShadowAddr & BitMask) != BitMask)
-  //     *ShadowAddr |= Bitmask;
-  //
-  Value *AddrPtr = IRB.CreatePointerCast(Addr, IntptrTy);
-  Value *ShadowPtr = appToShadow(AddrPtr, IRB);
-  Type *ShadowTy = IntegerType::get(*Ctx, 8U);
-  Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
-  // The bottom bit is used for the current sampling period's working set.
-  // The top bit is used for the total working set.  We set both on each
-  // memory access, if they are not already set.
-  Value *ValueMask = ConstantInt::get(ShadowTy, 0x81); // 10000001B
-
-  Value *OldValue = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
-  // The AND and CMP will be turned into a TEST instruction by the compiler.
-  Value *Cmp = IRB.CreateICmpNE(IRB.CreateAnd(OldValue, ValueMask), ValueMask);
-  Instruction *CmpTerm = SplitBlockAndInsertIfThen(Cmp, I, false);
-  // FIXME: do I need to call SetCurrentDebugLocation?
-  IRB.SetInsertPoint(CmpTerm);
-  // We use OR to set the shadow bits to avoid corrupting the middle 6 bits,
-  // which are used by the runtime library.
-  Value *NewVal = IRB.CreateOr(OldValue, ValueMask);
-  IRB.CreateStore(NewVal, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
-  IRB.SetInsertPoint(I);
-
-  return true;
-}
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 9af64ed332cd..59950ffc4e9a 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -1,9 +1,8 @@
 //===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -103,11 +102,11 @@ private:
                                       std::vector<Regex> &Regexes);
 
   // Get pointers to the functions in the runtime library.
-  Constant *getStartFileFunc();
-  Constant *getEmitFunctionFunc();
-  Constant *getEmitArcsFunc();
-  Constant *getSummaryInfoFunc();
-  Constant *getEndFileFunc();
+  FunctionCallee getStartFileFunc();
+  FunctionCallee getEmitFunctionFunc();
+  FunctionCallee getEmitArcsFunc();
+  FunctionCallee getSummaryInfoFunc();
+  FunctionCallee getEndFileFunc();
 
   // Add the function to write out all our counters to the global destructor
   // list.
@@ -648,7 +647,7 @@ void GCOVProfiler::AddFlushBeforeForkAndExec() {
   for (auto I : ForkAndExecs) {
     IRBuilder<> Builder(I);
     FunctionType *FTy = FunctionType::get(Builder.getVoidTy(), {}, false);
-    Constant *GCOVFlush = M->getOrInsertFunction("__gcov_flush", FTy);
+    FunctionCallee GCOVFlush = M->getOrInsertFunction("__gcov_flush", FTy);
     Builder.CreateCall(GCOVFlush);
     I->getParent()->splitBasicBlock(I);
   }
@@ -811,14 +810,14 @@ bool GCOVProfiler::emitProfileArcs() {
             auto It = EdgeToCounter.find({Pred, &BB});
             assert(It != EdgeToCounter.end());
             const unsigned Edge = It->second;
-            Value *EdgeCounter =
-                BuilderForPhi.CreateConstInBoundsGEP2_64(Counters, 0, Edge);
+            Value *EdgeCounter = BuilderForPhi.CreateConstInBoundsGEP2_64(
+                Counters->getValueType(), Counters, 0, Edge);
             Phi->addIncoming(EdgeCounter, Pred);
           }
 
           // Skip phis, landingpads.
           IRBuilder<> Builder(&*BB.getFirstInsertionPt());
-          Value *Count = Builder.CreateLoad(Phi);
+          Value *Count = Builder.CreateLoad(Builder.getInt64Ty(), Phi);
           Count = Builder.CreateAdd(Count, Builder.getInt64(1));
           Builder.CreateStore(Count, Phi);
 
@@ -827,9 +826,9 @@ bool GCOVProfiler::emitProfileArcs() {
             auto It = EdgeToCounter.find({&BB, nullptr});
             assert(It != EdgeToCounter.end());
             const unsigned Edge = It->second;
-            Value *Counter =
-                Builder.CreateConstInBoundsGEP2_64(Counters, 0, Edge);
-            Value *Count = Builder.CreateLoad(Counter);
+            Value *Counter = Builder.CreateConstInBoundsGEP2_64(
+                Counters->getValueType(), Counters, 0, Edge);
+            Value *Count = Builder.CreateLoad(Builder.getInt64Ty(), Counter);
             Count = Builder.CreateAdd(Count, Builder.getInt64(1));
             Builder.CreateStore(Count, Counter);
           }
@@ -864,7 +863,7 @@ bool GCOVProfiler::emitProfileArcs() {
 
     // Initialize the environment and register the local writeout and flush
     // functions.
-    Constant *GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
+    FunctionCallee GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
     Builder.CreateCall(GCOVInit, {WriteoutF, FlushF});
     Builder.CreateRetVoid();
 
@@ -874,22 +873,21 @@ bool GCOVProfiler::emitProfileArcs() {
   return Result;
 }
 
-Constant *GCOVProfiler::getStartFileFunc() {
+FunctionCallee GCOVProfiler::getStartFileFunc() {
   Type *Args[] = {
     Type::getInt8PtrTy(*Ctx),  // const char *orig_filename
     Type::getInt8PtrTy(*Ctx),  // const char version[4]
     Type::getInt32Ty(*Ctx),    // uint32_t checksum
   };
   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
-  auto *Res = M->getOrInsertFunction("llvm_gcda_start_file", FTy);
-  if (Function *FunRes = dyn_cast<Function>(Res))
-    if (auto AK = TLI->getExtAttrForI32Param(false))
-      FunRes->addParamAttr(2, AK);
+  AttributeList AL;
+  if (auto AK = TLI->getExtAttrForI32Param(false))
+    AL = AL.addParamAttribute(*Ctx, 2, AK);
+  FunctionCallee Res = M->getOrInsertFunction("llvm_gcda_start_file", FTy, AL);
   return Res;
-
 }
 
-Constant *GCOVProfiler::getEmitFunctionFunc() {
+FunctionCallee GCOVProfiler::getEmitFunctionFunc() {
   Type *Args[] = {
     Type::getInt32Ty(*Ctx),    // uint32_t ident
     Type::getInt8PtrTy(*Ctx),  // const char *function_name
@@ -898,36 +896,34 @@ Constant *GCOVProfiler::getEmitFunctionFunc() {
     Type::getInt32Ty(*Ctx),    // uint32_t cfg_checksum
   };
   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
-  auto *Res = M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
-  if (Function *FunRes = dyn_cast<Function>(Res))
-    if (auto AK = TLI->getExtAttrForI32Param(false)) {
-      FunRes->addParamAttr(0, AK);
-      FunRes->addParamAttr(2, AK);
-      FunRes->addParamAttr(3, AK);
-      FunRes->addParamAttr(4, AK);
-    }
-  return Res;
+  AttributeList AL;
+  if (auto AK = TLI->getExtAttrForI32Param(false)) {
+    AL = AL.addParamAttribute(*Ctx, 0, AK);
+    AL = AL.addParamAttribute(*Ctx, 2, AK);
+    AL = AL.addParamAttribute(*Ctx, 3, AK);
+    AL = AL.addParamAttribute(*Ctx, 4, AK);
+  }
+  return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
 }
 
-Constant *GCOVProfiler::getEmitArcsFunc() {
+FunctionCallee GCOVProfiler::getEmitArcsFunc() {
   Type *Args[] = {
     Type::getInt32Ty(*Ctx),     // uint32_t num_counters
     Type::getInt64PtrTy(*Ctx),  // uint64_t *counters
   };
   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
-  auto *Res = M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
-  if (Function *FunRes = dyn_cast<Function>(Res))
-    if (auto AK = TLI->getExtAttrForI32Param(false))
-      FunRes->addParamAttr(0, AK);
-  return Res;
+  AttributeList AL;
+  if (auto AK = TLI->getExtAttrForI32Param(false))
+    AL = AL.addParamAttribute(*Ctx, 0, AK);
+  return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy, AL);
 }
 
-Constant *GCOVProfiler::getSummaryInfoFunc() {
+FunctionCallee GCOVProfiler::getSummaryInfoFunc() {
   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
   return M->getOrInsertFunction("llvm_gcda_summary_info", FTy);
 }
 
-Constant *GCOVProfiler::getEndFileFunc() {
+FunctionCallee GCOVProfiler::getEndFileFunc() {
   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
   return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
 }
@@ -947,11 +943,11 @@ Function *GCOVProfiler::insertCounterWriteout(
   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
   IRBuilder<> Builder(BB);
 
-  Constant *StartFile = getStartFileFunc();
-  Constant *EmitFunction = getEmitFunctionFunc();
-  Constant *EmitArcs = getEmitArcsFunc();
-  Constant *SummaryInfo = getSummaryInfoFunc();
-  Constant *EndFile = getEndFileFunc();
+  FunctionCallee StartFile = getStartFileFunc();
+  FunctionCallee EmitFunction = getEmitFunctionFunc();
+  FunctionCallee EmitArcs = getEmitArcsFunc();
+  FunctionCallee SummaryInfo = getSummaryInfoFunc();
+  FunctionCallee EndFile = getEndFileFunc();
 
   NamedMDNode *CUNodes = M->getNamedMetadata("llvm.dbg.cu");
   if (!CUNodes) {
@@ -1088,22 +1084,32 @@ Function *GCOVProfiler::insertCounterWriteout(
   PHINode *IV =
       Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2);
   IV->addIncoming(Builder.getInt32(0), BB);
-  auto *FileInfoPtr =
-      Builder.CreateInBoundsGEP(FileInfoArrayGV, {Builder.getInt32(0), IV});
-  auto *StartFileCallArgsPtr = Builder.CreateStructGEP(FileInfoPtr, 0);
+  auto *FileInfoPtr = Builder.CreateInBoundsGEP(
+      FileInfoArrayTy, FileInfoArrayGV, {Builder.getInt32(0), IV});
+  auto *StartFileCallArgsPtr =
+      Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 0);
   auto *StartFileCall = Builder.CreateCall(
       StartFile,
-      {Builder.CreateLoad(Builder.CreateStructGEP(StartFileCallArgsPtr, 0)),
-       Builder.CreateLoad(Builder.CreateStructGEP(StartFileCallArgsPtr, 1)),
-       Builder.CreateLoad(Builder.CreateStructGEP(StartFileCallArgsPtr, 2))});
+      {Builder.CreateLoad(StartFileCallArgsTy->getElementType(0),
+                          Builder.CreateStructGEP(StartFileCallArgsTy,
+                                                  StartFileCallArgsPtr, 0)),
+       Builder.CreateLoad(StartFileCallArgsTy->getElementType(1),
+                          Builder.CreateStructGEP(StartFileCallArgsTy,
+                                                  StartFileCallArgsPtr, 1)),
+       Builder.CreateLoad(StartFileCallArgsTy->getElementType(2),
+                          Builder.CreateStructGEP(StartFileCallArgsTy,
+                                                  StartFileCallArgsPtr, 2))});
   if (auto AK = TLI->getExtAttrForI32Param(false))
     StartFileCall->addParamAttr(2, AK);
   auto *NumCounters =
-      Builder.CreateLoad(Builder.CreateStructGEP(FileInfoPtr, 1));
+      Builder.CreateLoad(FileInfoTy->getElementType(1),
+                         Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 1));
   auto *EmitFunctionCallArgsArray =
-      Builder.CreateLoad(Builder.CreateStructGEP(FileInfoPtr, 2));
+      Builder.CreateLoad(FileInfoTy->getElementType(2),
+                         Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 2));
   auto *EmitArcsCallArgsArray =
-      Builder.CreateLoad(Builder.CreateStructGEP(FileInfoPtr, 3));
+      Builder.CreateLoad(FileInfoTy->getElementType(3),
+                         Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 3));
   auto *EnterCounterLoopCond =
       Builder.CreateICmpSLT(Builder.getInt32(0), NumCounters);
   Builder.CreateCondBr(EnterCounterLoopCond, CounterLoopHeader, FileLoopLatch);
@@ -1111,16 +1117,26 @@ Function *GCOVProfiler::insertCounterWriteout(
   Builder.SetInsertPoint(CounterLoopHeader);
   auto *JV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2);
   JV->addIncoming(Builder.getInt32(0), FileLoopHeader);
-  auto *EmitFunctionCallArgsPtr =
-      Builder.CreateInBoundsGEP(EmitFunctionCallArgsArray, {JV});
+  auto *EmitFunctionCallArgsPtr = Builder.CreateInBoundsGEP(
+      EmitFunctionCallArgsTy, EmitFunctionCallArgsArray, JV);
   auto *EmitFunctionCall = Builder.CreateCall(
       EmitFunction,
-      {Builder.CreateLoad(Builder.CreateStructGEP(EmitFunctionCallArgsPtr, 0)),
-       Builder.CreateLoad(Builder.CreateStructGEP(EmitFunctionCallArgsPtr, 1)),
-       Builder.CreateLoad(Builder.CreateStructGEP(EmitFunctionCallArgsPtr, 2)),
-       Builder.CreateLoad(Builder.CreateStructGEP(EmitFunctionCallArgsPtr, 3)),
-       Builder.CreateLoad(
-           Builder.CreateStructGEP(EmitFunctionCallArgsPtr, 4))});
+      {Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(0),
+                          Builder.CreateStructGEP(EmitFunctionCallArgsTy,
+                                                  EmitFunctionCallArgsPtr, 0)),
+       Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(1),
+                          Builder.CreateStructGEP(EmitFunctionCallArgsTy,
+                                                  EmitFunctionCallArgsPtr, 1)),
+       Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(2),
+                          Builder.CreateStructGEP(EmitFunctionCallArgsTy,
+                                                  EmitFunctionCallArgsPtr, 2)),
+       Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(3),
+                          Builder.CreateStructGEP(EmitFunctionCallArgsTy,
+                                                  EmitFunctionCallArgsPtr, 3)),
+       Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(4),
+                          Builder.CreateStructGEP(EmitFunctionCallArgsTy,
+                                                  EmitFunctionCallArgsPtr,
+                                                  4))});
   if (auto AK = TLI->getExtAttrForI32Param(false)) {
     EmitFunctionCall->addParamAttr(0, AK);
     EmitFunctionCall->addParamAttr(2, AK);
@@ -1128,11 +1144,15 @@ Function *GCOVProfiler::insertCounterWriteout(
     EmitFunctionCall->addParamAttr(4, AK);
   }
   auto *EmitArcsCallArgsPtr =
-      Builder.CreateInBoundsGEP(EmitArcsCallArgsArray, {JV});
+      Builder.CreateInBoundsGEP(EmitArcsCallArgsTy, EmitArcsCallArgsArray, JV);
   auto *EmitArcsCall = Builder.CreateCall(
       EmitArcs,
-      {Builder.CreateLoad(Builder.CreateStructGEP(EmitArcsCallArgsPtr, 0)),
-       Builder.CreateLoad(Builder.CreateStructGEP(EmitArcsCallArgsPtr, 1))});
+      {Builder.CreateLoad(
+           EmitArcsCallArgsTy->getElementType(0),
+           Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 0)),
+       Builder.CreateLoad(EmitArcsCallArgsTy->getElementType(1),
+                          Builder.CreateStructGEP(EmitArcsCallArgsTy,
+                                                  EmitArcsCallArgsPtr, 1))});
   if (auto AK = TLI->getExtAttrForI32Param(false))
     EmitArcsCall->addParamAttr(0, AK);
   auto *NextJV = Builder.CreateAdd(JV, Builder.getInt32(1));
@@ -1172,7 +1192,7 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
   BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF);
 
   // Write out the current counters.
-  Constant *WriteoutF = M->getFunction("__llvm_gcov_writeout");
+  Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
   assert(WriteoutF && "Need to create the writeout function first!");
 
   IRBuilder<> Builder(Entry);
diff --git a/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index d04c2b76288f..90a9f4955a4b 100644
--- a/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -1,9 +1,8 @@
 //===- HWAddressSanitizer.cpp - detector of uninitialized reads -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -12,6 +11,7 @@
 /// based on tagged addressing.
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
@@ -21,6 +21,7 @@
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
@@ -125,10 +126,10 @@ static cl::opt<bool> ClEnableKhwasan(
 // is accessed. The shadow mapping looks like:
 //    Shadow = (Mem >> scale) + offset
 
-static cl::opt<unsigned long long> ClMappingOffset(
-    "hwasan-mapping-offset",
-    cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"), cl::Hidden,
-    cl::init(0));
+static cl::opt<uint64_t>
+    ClMappingOffset("hwasan-mapping-offset",
+                    cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
+                    cl::Hidden, cl::init(0));
 
 static cl::opt<bool>
     ClWithIfunc("hwasan-with-ifunc",
@@ -147,43 +148,47 @@ static cl::opt<bool>
                          cl::desc("Record stack frames with tagged allocations "
                                   "in a thread-local ring buffer"),
                          cl::Hidden, cl::init(true));
-static cl::opt<bool>
-    ClCreateFrameDescriptions("hwasan-create-frame-descriptions",
-                              cl::desc("create static frame descriptions"),
-                              cl::Hidden, cl::init(true));
-
 static cl::opt<bool>
     ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
                               cl::desc("instrument memory intrinsics"),
                               cl::Hidden, cl::init(true));
+
+static cl::opt<bool>
+    ClInstrumentLandingPads("hwasan-instrument-landing-pads",
+                              cl::desc("instrument landing pads"), cl::Hidden,
+                              cl::init(true));
+
+static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
+                                       cl::desc("inline all checks"),
+                                       cl::Hidden, cl::init(false));
+
 namespace {
 
 /// An instrumentation pass implementing detection of addressability bugs
 /// using tagged pointers.
-class HWAddressSanitizer : public FunctionPass {
+class HWAddressSanitizer {
 public:
-  // Pass identification, replacement for typeid.
-  static char ID;
-
-  explicit HWAddressSanitizer(bool CompileKernel = false, bool Recover = false)
-      : FunctionPass(ID) {
+  explicit HWAddressSanitizer(Module &M, bool CompileKernel = false,
+                              bool Recover = false) {
     this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
     this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0 ?
         ClEnableKhwasan : CompileKernel;
-  }
 
-  StringRef getPassName() const override { return "HWAddressSanitizer"; }
+    initializeModule(M);
+  }
 
-  bool runOnFunction(Function &F) override;
-  bool doInitialization(Module &M) override;
+  bool sanitizeFunction(Function &F);
+  void initializeModule(Module &M);
 
   void initializeCallbacks(Module &M);
 
+  Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
   Value *getDynamicShadowNonTls(IRBuilder<> &IRB);
 
   void untagPointerOperand(Instruction *I, Value *Addr);
-  Value *memToShadow(Value *Shadow, Type *Ty, IRBuilder<> &IRB);
-  void instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
+  Value *shadowBase();
+  Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
+  void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
                                  unsigned AccessSizeIndex,
                                  Instruction *InsertBefore);
   void instrumentMemIntrinsic(MemIntrinsic *MI);
@@ -193,11 +198,15 @@ public:
                                    Value **MaybeMask);
 
   bool isInterestingAlloca(const AllocaInst &AI);
-  bool tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag);
+  bool tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
   Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
   Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
-  bool instrumentStack(SmallVectorImpl<AllocaInst *> &Allocas,
-                       SmallVectorImpl<Instruction *> &RetVec, Value *StackTag);
+  bool instrumentStack(
+      SmallVectorImpl<AllocaInst *> &Allocas,
+      DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> &AllocaDeclareMap,
+      SmallVectorImpl<Instruction *> &RetVec, Value *StackTag);
+  Value *readRegister(IRBuilder<> &IRB, StringRef Name);
+  bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
   Value *getNextTagWithCall(IRBuilder<> &IRB);
   Value *getStackBaseTag(IRBuilder<> &IRB);
   Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI,
@@ -205,31 +214,14 @@ public:
   Value *getUARTag(IRBuilder<> &IRB, Value *StackTag);
 
   Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty);
-  Value *emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
+  void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
 
 private:
   LLVMContext *C;
   std::string CurModuleUniqueId;
   Triple TargetTriple;
-  Function *HWAsanMemmove, *HWAsanMemcpy, *HWAsanMemset;
-
-  // Frame description is a way to pass names/sizes of local variables
-  // to the run-time w/o adding extra executable code in every function.
-  // We do this by creating a separate section with {PC,Descr} pairs and passing
-  // the section beg/end to __hwasan_init_frames() at module init time.
-  std::string createFrameString(ArrayRef<AllocaInst*> Allocas);
-  void createFrameGlobal(Function &F, const std::string &FrameString);
-  // Get the section name for frame descriptions. Currently ELF-only.
-  const char *getFrameSection() { return "__hwasan_frames"; }
-  const char *getFrameSectionBeg() { return  "__start___hwasan_frames"; }
-  const char *getFrameSectionEnd() { return  "__stop___hwasan_frames"; }
-  GlobalVariable *createFrameSectionBound(Module &M, Type *Ty,
-                                          const char *Name) {
-    auto GV = new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
-                                 nullptr, Name);
-    GV->setVisibility(GlobalValue::HiddenVisibility);
-    return GV;
-  }
+  FunctionCallee HWAsanMemmove, HWAsanMemcpy, HWAsanMemset;
+  FunctionCallee HWAsanHandleVfork;
 
   /// This struct defines the shadow mapping using the rule:
   ///   shadow = (mem >> Scale) + Offset.
@@ -253,48 +245,95 @@ private:
   Type *IntptrTy;
   Type *Int8PtrTy;
   Type *Int8Ty;
+  Type *Int32Ty;
 
   bool CompileKernel;
   bool Recover;
 
   Function *HwasanCtorFunction;
 
-  Function *HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
-  Function *HwasanMemoryAccessCallbackSized[2];
+  FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
+  FunctionCallee HwasanMemoryAccessCallbackSized[2];
 
-  Function *HwasanTagMemoryFunc;
-  Function *HwasanGenerateTagFunc;
-  Function *HwasanThreadEnterFunc;
+  FunctionCallee HwasanTagMemoryFunc;
+  FunctionCallee HwasanGenerateTagFunc;
+  FunctionCallee HwasanThreadEnterFunc;
 
   Constant *ShadowGlobal;
 
   Value *LocalDynamicShadow = nullptr;
+  Value *StackBaseTag = nullptr;
   GlobalValue *ThreadPtrGlobal = nullptr;
 };
 
+class HWAddressSanitizerLegacyPass : public FunctionPass {
+public:
+  // Pass identification, replacement for typeid.
+  static char ID;
+
+  explicit HWAddressSanitizerLegacyPass(bool CompileKernel = false,
+                                        bool Recover = false)
+      : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover) {}
+
+  StringRef getPassName() const override { return "HWAddressSanitizer"; }
+
+  bool doInitialization(Module &M) override {
+    HWASan = llvm::make_unique<HWAddressSanitizer>(M, CompileKernel, Recover);
+    return true;
+  }
+
+  bool runOnFunction(Function &F) override {
+    return HWASan->sanitizeFunction(F);
+  }
+
+  bool doFinalization(Module &M) override {
+    HWASan.reset();
+    return false;
+  }
+
+private:
+  std::unique_ptr<HWAddressSanitizer> HWASan;
+  bool CompileKernel;
+  bool Recover;
+};
+
 } // end anonymous namespace
 
-char HWAddressSanitizer::ID = 0;
+char HWAddressSanitizerLegacyPass::ID = 0;
 
 INITIALIZE_PASS_BEGIN(
-    HWAddressSanitizer, "hwasan",
+    HWAddressSanitizerLegacyPass, "hwasan",
     "HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
     false)
 INITIALIZE_PASS_END(
-    HWAddressSanitizer, "hwasan",
+    HWAddressSanitizerLegacyPass, "hwasan",
     "HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
     false)
 
-FunctionPass *llvm::createHWAddressSanitizerPass(bool CompileKernel,
-                                                 bool Recover) {
+FunctionPass *llvm::createHWAddressSanitizerLegacyPassPass(bool CompileKernel,
+                                                           bool Recover) {
   assert(!CompileKernel || Recover);
-  return new HWAddressSanitizer(CompileKernel, Recover);
+  return new HWAddressSanitizerLegacyPass(CompileKernel, Recover);
+}
+
+HWAddressSanitizerPass::HWAddressSanitizerPass(bool CompileKernel, bool Recover)
+    : CompileKernel(CompileKernel), Recover(Recover) {}
+
+PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
+                                              ModuleAnalysisManager &MAM) {
+  HWAddressSanitizer HWASan(M, CompileKernel, Recover);
+  bool Modified = false;
+  for (Function &F : M)
+    Modified |= HWASan.sanitizeFunction(F);
+  if (Modified)
+    return PreservedAnalyses::none();
+  return PreservedAnalyses::all();
 }
 
 /// Module-level initialization.
 ///
 /// inserts a call to __hwasan_init to the module's constructor list.
-bool HWAddressSanitizer::doInitialization(Module &M) {
+void HWAddressSanitizer::initializeModule(Module &M) {
   LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
   auto &DL = M.getDataLayout();
 
@@ -308,47 +347,35 @@ bool HWAddressSanitizer::doInitialization(Module &M) {
   IntptrTy = IRB.getIntPtrTy(DL);
   Int8PtrTy = IRB.getInt8PtrTy();
   Int8Ty = IRB.getInt8Ty();
+  Int32Ty = IRB.getInt32Ty();
 
   HwasanCtorFunction = nullptr;
   if (!CompileKernel) {
     std::tie(HwasanCtorFunction, std::ignore) =
-        createSanitizerCtorAndInitFunctions(M, kHwasanModuleCtorName,
-                                            kHwasanInitName,
-                                            /*InitArgTypes=*/{},
-                                            /*InitArgs=*/{});
-    Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
-    HwasanCtorFunction->setComdat(CtorComdat);
-    appendToGlobalCtors(M, HwasanCtorFunction, 0, HwasanCtorFunction);
-
-    // Create a zero-length global in __hwasan_frame so that the linker will
-    // always create start and stop symbols.
-    //
-    // N.B. If we ever start creating associated metadata in this pass this
-    // global will need to be associated with the ctor.
-    Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
-    auto GV =
-        new GlobalVariable(M, Int8Arr0Ty, /*isConstantGlobal*/ true,
-                           GlobalVariable::PrivateLinkage,
-                           Constant::getNullValue(Int8Arr0Ty), "__hwasan");
-    GV->setSection(getFrameSection());
-    GV->setComdat(CtorComdat);
-    appendToCompilerUsed(M, GV);
-
-    IRBuilder<> IRBCtor(HwasanCtorFunction->getEntryBlock().getTerminator());
-    IRBCtor.CreateCall(
-        declareSanitizerInitFunction(M, "__hwasan_init_frames",
-                                     {Int8PtrTy, Int8PtrTy}),
-        {createFrameSectionBound(M, Int8Ty, getFrameSectionBeg()),
-         createFrameSectionBound(M, Int8Ty, getFrameSectionEnd())});
+        getOrCreateSanitizerCtorAndInitFunctions(
+            M, kHwasanModuleCtorName, kHwasanInitName,
+            /*InitArgTypes=*/{},
+            /*InitArgs=*/{},
+            // This callback is invoked when the functions are created the first
+            // time. Hook them into the global ctors list in that case:
+            [&](Function *Ctor, FunctionCallee) {
+              Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
+              Ctor->setComdat(CtorComdat);
+              appendToGlobalCtors(M, Ctor, 0, Ctor);
+            });
   }
 
-  if (!TargetTriple.isAndroid())
-    appendToCompilerUsed(
-        M, ThreadPtrGlobal = new GlobalVariable(
-               M, IntptrTy, false, GlobalVariable::ExternalLinkage, nullptr,
-               "__hwasan_tls", nullptr, GlobalVariable::InitialExecTLSModel));
-
-  return true;
+  if (!TargetTriple.isAndroid()) {
+    Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] {
+      auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
+                                    GlobalValue::ExternalLinkage, nullptr,
+                                    "__hwasan_tls", nullptr,
+                                    GlobalVariable::InitialExecTLSModel);
+      appendToCompilerUsed(M, GV);
+      return GV;
+    });
+    ThreadPtrGlobal = cast<GlobalVariable>(C);
+  }
 }
 
 void HWAddressSanitizer::initializeCallbacks(Module &M) {
@@ -357,44 +384,55 @@ void HWAddressSanitizer::initializeCallbacks(Module &M) {
     const std::string TypeStr = AccessIsWrite ? "store" : "load";
     const std::string EndingStr = Recover ? "_noabort" : "";
 
-    HwasanMemoryAccessCallbackSized[AccessIsWrite] =
-        checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-            ClMemoryAccessCallbackPrefix + TypeStr + "N" + EndingStr,
-            FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false)));
+    HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
+        ClMemoryAccessCallbackPrefix + TypeStr + "N" + EndingStr,
+        FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false));
 
     for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
          AccessSizeIndex++) {
       HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
-          checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+          M.getOrInsertFunction(
               ClMemoryAccessCallbackPrefix + TypeStr +
                   itostr(1ULL << AccessSizeIndex) + EndingStr,
-              FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false)));
+              FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false));
     }
   }
 
-  HwasanTagMemoryFunc = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      "__hwasan_tag_memory", IRB.getVoidTy(), Int8PtrTy, Int8Ty, IntptrTy));
-  HwasanGenerateTagFunc = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty));
+  HwasanTagMemoryFunc = M.getOrInsertFunction(
+      "__hwasan_tag_memory", IRB.getVoidTy(), Int8PtrTy, Int8Ty, IntptrTy);
+  HwasanGenerateTagFunc =
+      M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
 
-  if (Mapping.InGlobal)
-    ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
-                                       ArrayType::get(IRB.getInt8Ty(), 0));
+  ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
+                                     ArrayType::get(IRB.getInt8Ty(), 0));
 
   const std::string MemIntrinCallbackPrefix =
       CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix;
-  HWAsanMemmove = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      MemIntrinCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
-      IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy));
-  HWAsanMemcpy = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      MemIntrinCallbackPrefix + "memcpy", IRB.getInt8PtrTy(),
-      IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy));
-  HWAsanMemset = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      MemIntrinCallbackPrefix + "memset", IRB.getInt8PtrTy(),
-      IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy));
-
-  HwasanThreadEnterFunc = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction("__hwasan_thread_enter", IRB.getVoidTy()));
+  HWAsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
+                                        IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                                        IRB.getInt8PtrTy(), IntptrTy);
+  HWAsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy",
+                                       IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                                       IRB.getInt8PtrTy(), IntptrTy);
+  HWAsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset",
+                                       IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                                       IRB.getInt32Ty(), IntptrTy);
+
+  HWAsanHandleVfork =
+      M.getOrInsertFunction("__hwasan_handle_vfork", IRB.getVoidTy(), IntptrTy);
+
+  HwasanThreadEnterFunc =
+      M.getOrInsertFunction("__hwasan_thread_enter", IRB.getVoidTy());
+}
+
+Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
+  // An empty inline asm with input reg == output reg.
+  // An opaque no-op cast, basically.
+  InlineAsm *Asm = InlineAsm::get(
+      FunctionType::get(Int8PtrTy, {ShadowGlobal->getType()}, false),
+      StringRef(""), StringRef("=r,0"),
+      /*hasSideEffects=*/false);
+  return IRB.CreateCall(Asm, {ShadowGlobal}, ".hwasan.shadow");
 }
 
 Value *HWAddressSanitizer::getDynamicShadowNonTls(IRBuilder<> &IRB) {
@@ -403,18 +441,12 @@ Value *HWAddressSanitizer::getDynamicShadowNonTls(IRBuilder<> &IRB) {
     return nullptr;
 
   if (Mapping.InGlobal) {
-    // An empty inline asm with input reg == output reg.
-    // An opaque pointer-to-int cast, basically.
-    InlineAsm *Asm = InlineAsm::get(
-        FunctionType::get(IntptrTy, {ShadowGlobal->getType()}, false),
-        StringRef(""), StringRef("=r,0"),
-        /*hasSideEffects=*/false);
-    return IRB.CreateCall(Asm, {ShadowGlobal}, ".hwasan.shadow");
+    return getDynamicShadowIfunc(IRB);
   } else {
     Value *GlobalDynamicAddress =
         IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
-            kHwasanShadowMemoryDynamicAddress, IntptrTy);
-    return IRB.CreateLoad(GlobalDynamicAddress);
+            kHwasanShadowMemoryDynamicAddress, Int8PtrTy);
+    return IRB.CreateLoad(Int8PtrTy, GlobalDynamicAddress);
   }
 }
 
@@ -506,29 +538,44 @@ void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
   I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
 }
 
-Value *HWAddressSanitizer::memToShadow(Value *Mem, Type *Ty, IRBuilder<> &IRB) {
+Value *HWAddressSanitizer::shadowBase() {
+  if (LocalDynamicShadow)
+    return LocalDynamicShadow;
+  return ConstantExpr::getIntToPtr(ConstantInt::get(IntptrTy, Mapping.Offset),
+                                   Int8PtrTy);
+}
+
+Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
   // Mem >> Scale
   Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale);
   if (Mapping.Offset == 0)
-    return Shadow;
+    return IRB.CreateIntToPtr(Shadow, Int8PtrTy);
   // (Mem >> Scale) + Offset
-  Value *ShadowBase;
-  if (LocalDynamicShadow)
-    ShadowBase = LocalDynamicShadow;
-  else
-    ShadowBase = ConstantInt::get(Ty, Mapping.Offset);
-  return IRB.CreateAdd(Shadow, ShadowBase);
+  return IRB.CreateGEP(Int8Ty, shadowBase(), Shadow);
 }
 
-void HWAddressSanitizer::instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
+void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
                                                    unsigned AccessSizeIndex,
                                                    Instruction *InsertBefore) {
+  const int64_t AccessInfo = Recover * 0x20 + IsWrite * 0x10 + AccessSizeIndex;
   IRBuilder<> IRB(InsertBefore);
+
+  if (!ClInlineAllChecks && TargetTriple.isAArch64() &&
+      TargetTriple.isOSBinFormatELF() && !Recover) {
+    Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+    Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy);
+    IRB.CreateCall(
+        Intrinsic::getDeclaration(M, Intrinsic::hwasan_check_memaccess),
+        {shadowBase(), Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
+    return;
+  }
+
+  Value *PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
   Value *PtrTag = IRB.CreateTrunc(IRB.CreateLShr(PtrLong, kPointerTagShift),
                                   IRB.getInt8Ty());
   Value *AddrLong = untagPointer(IRB, PtrLong);
-  Value *ShadowLong = memToShadow(AddrLong, PtrLong->getType(), IRB);
-  Value *MemTag = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowLong, Int8PtrTy));
+  Value *Shadow = memToShadow(AddrLong, IRB);
+  Value *MemTag = IRB.CreateLoad(Int8Ty, Shadow);
   Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag);
 
   int matchAllTag = ClMatchAllTag.getNumOccurrences() > 0 ?
@@ -540,11 +587,35 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
   }
 
   Instruction *CheckTerm =
-      SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, !Recover,
+      SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, false,
                                 MDBuilder(*C).createBranchWeights(1, 100000));
 
   IRB.SetInsertPoint(CheckTerm);
-  const int64_t AccessInfo = Recover * 0x20 + IsWrite * 0x10 + AccessSizeIndex;
+  Value *OutOfShortGranuleTagRange =
+      IRB.CreateICmpUGT(MemTag, ConstantInt::get(Int8Ty, 15));
+  Instruction *CheckFailTerm =
+      SplitBlockAndInsertIfThen(OutOfShortGranuleTagRange, CheckTerm, !Recover,
+                                MDBuilder(*C).createBranchWeights(1, 100000));
+
+  IRB.SetInsertPoint(CheckTerm);
+  Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(PtrLong, 15), Int8Ty);
+  PtrLowBits = IRB.CreateAdd(
+      PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1));
+  Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, MemTag);
+  SplitBlockAndInsertIfThen(PtrLowBitsOOB, CheckTerm, false,
+                            MDBuilder(*C).createBranchWeights(1, 100000),
+                            nullptr, nullptr, CheckFailTerm->getParent());
+
+  IRB.SetInsertPoint(CheckTerm);
+  Value *InlineTagAddr = IRB.CreateOr(AddrLong, 15);
+  InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, Int8PtrTy);
+  Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr);
+  Value *InlineTagMismatch = IRB.CreateICmpNE(PtrTag, InlineTag);
+  SplitBlockAndInsertIfThen(InlineTagMismatch, CheckTerm, false,
+                            MDBuilder(*C).createBranchWeights(1, 100000),
+                            nullptr, nullptr, CheckFailTerm->getParent());
+
+  IRB.SetInsertPoint(CheckFailTerm);
   InlineAsm *Asm;
   switch (TargetTriple.getArch()) {
     case Triple::x86_64:
@@ -568,6 +639,8 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
       report_fatal_error("unsupported architecture");
   }
   IRB.CreateCall(Asm, PtrLong);
+  if (Recover)
+    cast<BranchInst>(CheckFailTerm)->setSuccessor(0, CheckTerm->getParent());
 }
 
 void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
@@ -610,7 +683,6 @@ bool HWAddressSanitizer::instrumentMemAccess(Instruction *I) {
     return false; //FIXME
 
   IRBuilder<> IRB(I);
-  Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
   if (isPowerOf2_64(TypeSize) &&
       (TypeSize / 8 <= (1UL << (kNumberOfAccessSizes - 1))) &&
       (Alignment >= (1UL << Mapping.Scale) || Alignment == 0 ||
@@ -618,13 +690,14 @@ bool HWAddressSanitizer::instrumentMemAccess(Instruction *I) {
     size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
     if (ClInstrumentWithCalls) {
       IRB.CreateCall(HwasanMemoryAccessCallback[IsWrite][AccessSizeIndex],
-                     AddrLong);
+                     IRB.CreatePointerCast(Addr, IntptrTy));
     } else {
-      instrumentMemAccessInline(AddrLong, IsWrite, AccessSizeIndex, I);
+      instrumentMemAccessInline(Addr, IsWrite, AccessSizeIndex, I);
     }
   } else {
     IRB.CreateCall(HwasanMemoryAccessCallbackSized[IsWrite],
-                   {AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8)});
+                   {IRB.CreatePointerCast(Addr, IntptrTy),
+                    ConstantInt::get(IntptrTy, TypeSize / 8)});
   }
   untagPointerOperand(I, Addr);
 
@@ -644,27 +717,33 @@ static uint64_t getAllocaSizeInBytes(const AllocaInst &AI) {
 }
 
 bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI,
-                                   Value *Tag) {
-  size_t Size = (getAllocaSizeInBytes(*AI) + Mapping.getAllocaAlignment() - 1) &
-                ~(Mapping.getAllocaAlignment() - 1);
+                                   Value *Tag, size_t Size) {
+  size_t AlignedSize = alignTo(Size, Mapping.getAllocaAlignment());
 
   Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty());
   if (ClInstrumentWithCalls) {
     IRB.CreateCall(HwasanTagMemoryFunc,
                    {IRB.CreatePointerCast(AI, Int8PtrTy), JustTag,
-                    ConstantInt::get(IntptrTy, Size)});
+                    ConstantInt::get(IntptrTy, AlignedSize)});
   } else {
     size_t ShadowSize = Size >> Mapping.Scale;
-    Value *ShadowPtr = IRB.CreateIntToPtr(
-        memToShadow(IRB.CreatePointerCast(AI, IntptrTy), AI->getType(), IRB),
-        Int8PtrTy);
+    Value *ShadowPtr = memToShadow(IRB.CreatePointerCast(AI, IntptrTy), IRB);
     // If this memset is not inlined, it will be intercepted in the hwasan
     // runtime library. That's OK, because the interceptor skips the checks if
     // the address is in the shadow region.
     // FIXME: the interceptor is not as fast as real memset. Consider lowering
     // llvm.memset right here into either a sequence of stores, or a call to
     // hwasan_tag_memory.
-    IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, /*Align=*/1);
+    if (ShadowSize)
+      IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, /*Align=*/1);
+    if (Size != AlignedSize) {
+      IRB.CreateStore(
+          ConstantInt::get(Int8Ty, Size % Mapping.getAllocaAlignment()),
+          IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
+      IRB.CreateStore(JustTag, IRB.CreateConstGEP1_32(
+                                   Int8Ty, IRB.CreateBitCast(AI, Int8PtrTy),
+                                   AlignedSize - 1));
+    }
   }
   return true;
 }
@@ -674,10 +753,16 @@ static unsigned RetagMask(unsigned AllocaNo) {
   // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
   // masks.
   // The list does not include the value 255, which is used for UAR.
-  static unsigned FastMasks[] = {
-      0,   1,   2,   3,   4,   6,   7,   8,   12,  14,  15, 16,  24,
-      28,  30,  31,  32,  48,  56,  60,  62,  63,  64,  96, 112, 120,
-      124, 126, 127, 128, 192, 224, 240, 248, 252, 254};
+  //
+  // Because we are more likely to use earlier elements of this list than later
+  // ones, it is sorted in increasing order of probability of collision with a
+  // mask allocated (temporally) nearby. The program that generated this list
+  // can be found at:
+  // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
+  static unsigned FastMasks[] = {0,  128, 64,  192, 32,  96,  224, 112, 240,
+                                 48, 16,  120, 248, 56,  24,  8,   124, 252,
+                                 60, 28,  12,  4,   126, 254, 62,  30,  14,
+                                 6,  2,   127, 63,  31,  15,  7,   3,   1};
   return FastMasks[AllocaNo % (sizeof(FastMasks) / sizeof(FastMasks[0]))];
 }
 
@@ -688,6 +773,8 @@ Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
 Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
   if (ClGenerateTagsWithCalls)
     return getNextTagWithCall(IRB);
+  if (StackBaseTag)
+    return StackBaseTag;
   // FIXME: use addressofreturnaddress (but implement it in aarch64 backend
   // first).
   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
@@ -763,7 +850,8 @@ Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
     Function *ThreadPointerFunc =
         Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
     Value *SlotPtr = IRB.CreatePointerCast(
-        IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), 0x30),
+        IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
+                               IRB.CreateCall(ThreadPointerFunc), 0x30),
         Ty->getPointerTo(0));
     return SlotPtr;
   }
@@ -774,45 +862,21 @@ Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
   return nullptr;
 }
 
-// Creates a string with a description of the stack frame (set of Allocas).
-// The string is intended to be human readable.
-// The current form is: Size1 Name1; Size2 Name2; ...
-std::string
-HWAddressSanitizer::createFrameString(ArrayRef<AllocaInst *> Allocas) {
-  std::ostringstream Descr;
-  for (auto AI : Allocas)
-    Descr << getAllocaSizeInBytes(*AI) << " " <<  AI->getName().str() << "; ";
-  return Descr.str();
-}
-
-// Creates a global in the frame section which consists of two pointers:
-// the function PC and the frame string constant.
-void HWAddressSanitizer::createFrameGlobal(Function &F,
-                                           const std::string &FrameString) {
-  Module &M = *F.getParent();
-  auto DescrGV = createPrivateGlobalForString(M, FrameString, true);
-  auto PtrPairTy = StructType::get(F.getType(), DescrGV->getType());
-  auto GV = new GlobalVariable(
-      M, PtrPairTy, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
-      ConstantStruct::get(PtrPairTy, (Constant *)&F, (Constant *)DescrGV),
-      "__hwasan");
-  GV->setSection(getFrameSection());
-  appendToCompilerUsed(M, GV);
-  // Put GV into the F's Comadat so that if F is deleted GV can be deleted too.
-  if (auto Comdat =
-          GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId))
-    GV->setComdat(Comdat);
-}
+void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
+  if (!Mapping.InTls) {
+    LocalDynamicShadow = getDynamicShadowNonTls(IRB);
+    return;
+  }
 
-Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB,
-                                        bool WithFrameRecord) {
-  if (!Mapping.InTls)
-    return getDynamicShadowNonTls(IRB);
+  if (!WithFrameRecord && TargetTriple.isAndroid()) {
+    LocalDynamicShadow = getDynamicShadowIfunc(IRB);
+    return;
+  }
 
   Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
   assert(SlotPtr);
 
-  Instruction *ThreadLong = IRB.CreateLoad(SlotPtr);
+  Instruction *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
 
   Function *F = IRB.GetInsertBlock()->getParent();
   if (F->getFnAttribute("hwasan-abi").getValueAsString() == "interceptor") {
@@ -826,7 +890,7 @@ Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB,
     // FIXME: This should call a new runtime function with a custom calling
     // convention to avoid needing to spill all arguments here.
     IRB.CreateCall(HwasanThreadEnterFunc);
-    LoadInst *ReloadThreadLong = IRB.CreateLoad(SlotPtr);
+    LoadInst *ReloadThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
 
     IRB.SetInsertPoint(&*Br->getSuccessor(0)->begin());
     PHINode *ThreadLongPhi = IRB.CreatePHI(IntptrTy, 2);
@@ -840,15 +904,21 @@ Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB,
       TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong);
 
   if (WithFrameRecord) {
+    StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
+
     // Prepare ring buffer data.
-    auto PC = IRB.CreatePtrToInt(F, IntptrTy);
+    Value *PC;
+    if (TargetTriple.getArch() == Triple::aarch64)
+      PC = readRegister(IRB, "pc");
+    else
+      PC = IRB.CreatePtrToInt(F, IntptrTy);
     auto GetStackPointerFn =
         Intrinsic::getDeclaration(F->getParent(), Intrinsic::frameaddress);
     Value *SP = IRB.CreatePtrToInt(
         IRB.CreateCall(GetStackPointerFn,
                        {Constant::getNullValue(IRB.getInt32Ty())}),
         IntptrTy);
-    // Mix SP and PC. TODO: also add the tag to the mix.
+    // Mix SP and PC.
     // Assumptions:
     // PC is 0x0000PPPPPPPPPPPP  (48 bits are meaningful, others are zero)
     // SP is 0xsssssssssssSSSS0  (4 lower bits are zero)
@@ -879,16 +949,38 @@ Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB,
   // Get shadow base address by aligning RecordPtr up.
   // Note: this is not correct if the pointer is already aligned.
   // Runtime library will make sure this never happens.
-  Value *ShadowBase = IRB.CreateAdd(
+  LocalDynamicShadow = IRB.CreateAdd(
       IRB.CreateOr(
           ThreadLongMaybeUntagged,
           ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
       ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
-  return ShadowBase;
+  LocalDynamicShadow = IRB.CreateIntToPtr(LocalDynamicShadow, Int8PtrTy);
+}
+
+Value *HWAddressSanitizer::readRegister(IRBuilder<> &IRB, StringRef Name) {
+  Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+  Function *ReadRegister =
+      Intrinsic::getDeclaration(M, Intrinsic::read_register, IntptrTy);
+  MDNode *MD = MDNode::get(*C, {MDString::get(*C, Name)});
+  Value *Args[] = {MetadataAsValue::get(*C, MD)};
+  return IRB.CreateCall(ReadRegister, Args);
+}
+
+bool HWAddressSanitizer::instrumentLandingPads(
+    SmallVectorImpl<Instruction *> &LandingPadVec) {
+  for (auto *LP : LandingPadVec) {
+    IRBuilder<> IRB(LP->getNextNode());
+    IRB.CreateCall(
+        HWAsanHandleVfork,
+        {readRegister(IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp"
+                                                                      : "sp")});
+  }
+  return true;
 }
 
 bool HWAddressSanitizer::instrumentStack(
     SmallVectorImpl<AllocaInst *> &Allocas,
+    DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> &AllocaDeclareMap,
     SmallVectorImpl<Instruction *> &RetVec, Value *StackTag) {
   // Ideally, we want to calculate tagged stack base pointer, and rewrite all
   // alloca addresses using that. Unfortunately, offsets are not known yet
@@ -913,14 +1005,22 @@ bool HWAddressSanitizer::instrumentStack(
         U.set(Replacement);
     }
 
-    tagAlloca(IRB, AI, Tag);
+    for (auto *DDI : AllocaDeclareMap.lookup(AI)) {
+      DIExpression *OldExpr = DDI->getExpression();
+      DIExpression *NewExpr = DIExpression::append(
+          OldExpr, {dwarf::DW_OP_LLVM_tag_offset, RetagMask(N)});
+      DDI->setArgOperand(2, MetadataAsValue::get(*C, NewExpr));
+    }
+
+    size_t Size = getAllocaSizeInBytes(*AI);
+    tagAlloca(IRB, AI, Tag, Size);
 
     for (auto RI : RetVec) {
       IRB.SetInsertPoint(RI);
 
       // Re-tag alloca memory with the special UAR tag.
       Value *Tag = getUARTag(IRB, StackTag);
-      tagAlloca(IRB, AI, Tag);
+      tagAlloca(IRB, AI, Tag, alignTo(Size, Mapping.getAllocaAlignment()));
     }
   }
 
@@ -943,7 +1043,7 @@ bool HWAddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
           !AI.isSwiftError());
 }
 
-bool HWAddressSanitizer::runOnFunction(Function &F) {
+bool HWAddressSanitizer::sanitizeFunction(Function &F) {
   if (&F == HwasanCtorFunction)
     return false;
 
@@ -955,15 +1055,12 @@ bool HWAddressSanitizer::runOnFunction(Function &F) {
   SmallVector<Instruction*, 16> ToInstrument;
   SmallVector<AllocaInst*, 8> AllocasToInstrument;
   SmallVector<Instruction*, 8> RetVec;
+  SmallVector<Instruction*, 8> LandingPadVec;
+  DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> AllocaDeclareMap;
   for (auto &BB : F) {
     for (auto &Inst : BB) {
       if (ClInstrumentStack)
         if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
-          // Realign all allocas. We don't want small uninteresting allocas to
-          // hide in instrumented alloca's padding.
-          if (AI->getAlignment() < Mapping.getAllocaAlignment())
-            AI->setAlignment(Mapping.getAllocaAlignment());
-          // Instrument some of them.
           if (isInterestingAlloca(*AI))
             AllocasToInstrument.push_back(AI);
           continue;
@@ -973,6 +1070,13 @@ bool HWAddressSanitizer::runOnFunction(Function &F) {
           isa<CleanupReturnInst>(Inst))
         RetVec.push_back(&Inst);
 
+      if (auto *DDI = dyn_cast<DbgDeclareInst>(&Inst))
+        if (auto *Alloca = dyn_cast_or_null<AllocaInst>(DDI->getAddress()))
+          AllocaDeclareMap[Alloca].push_back(DDI);
+
+      if (ClInstrumentLandingPads && isa<LandingPadInst>(Inst))
+        LandingPadVec.push_back(&Inst);
+
       Value *MaybeMask = nullptr;
       bool IsWrite;
       unsigned Alignment;
@@ -984,33 +1088,93 @@ bool HWAddressSanitizer::runOnFunction(Function &F) {
     }
   }
 
-  if (AllocasToInstrument.empty() && ToInstrument.empty())
-    return false;
+  initializeCallbacks(*F.getParent());
 
-  if (ClCreateFrameDescriptions && !AllocasToInstrument.empty())
-    createFrameGlobal(F, createFrameString(AllocasToInstrument));
+  if (!LandingPadVec.empty())
+    instrumentLandingPads(LandingPadVec);
 
-  initializeCallbacks(*F.getParent());
+  if (AllocasToInstrument.empty() && ToInstrument.empty())
+    return false;
 
   assert(!LocalDynamicShadow);
 
   Instruction *InsertPt = &*F.getEntryBlock().begin();
   IRBuilder<> EntryIRB(InsertPt);
-  LocalDynamicShadow = emitPrologue(EntryIRB,
-                                    /*WithFrameRecord*/ ClRecordStackHistory &&
-                                        !AllocasToInstrument.empty());
+  emitPrologue(EntryIRB,
+               /*WithFrameRecord*/ ClRecordStackHistory &&
+                   !AllocasToInstrument.empty());
 
   bool Changed = false;
   if (!AllocasToInstrument.empty()) {
     Value *StackTag =
         ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
-    Changed |= instrumentStack(AllocasToInstrument, RetVec, StackTag);
+    Changed |= instrumentStack(AllocasToInstrument, AllocaDeclareMap, RetVec,
+                               StackTag);
+  }
+
+  // Pad and align each of the allocas that we instrumented to stop small
+  // uninteresting allocas from hiding in instrumented alloca's padding and so
+  // that we have enough space to store real tags for short granules.
+  DenseMap<AllocaInst *, AllocaInst *> AllocaToPaddedAllocaMap;
+  for (AllocaInst *AI : AllocasToInstrument) {
+    uint64_t Size = getAllocaSizeInBytes(*AI);
+    uint64_t AlignedSize = alignTo(Size, Mapping.getAllocaAlignment());
+    AI->setAlignment(std::max(AI->getAlignment(), 16u));
+    if (Size != AlignedSize) {
+      Type *AllocatedType = AI->getAllocatedType();
+      if (AI->isArrayAllocation()) {
+        uint64_t ArraySize =
+            cast<ConstantInt>(AI->getArraySize())->getZExtValue();
+        AllocatedType = ArrayType::get(AllocatedType, ArraySize);
+      }
+      Type *TypeWithPadding = StructType::get(
+          AllocatedType, ArrayType::get(Int8Ty, AlignedSize - Size));
+      auto *NewAI = new AllocaInst(
+          TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI);
+      NewAI->takeName(AI);
+      NewAI->setAlignment(AI->getAlignment());
+      NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca());
+      NewAI->setSwiftError(AI->isSwiftError());
+      NewAI->copyMetadata(*AI);
+      auto *Bitcast = new BitCastInst(NewAI, AI->getType(), "", AI);
+      AI->replaceAllUsesWith(Bitcast);
+      AllocaToPaddedAllocaMap[AI] = NewAI;
+    }
+  }
+
+  if (!AllocaToPaddedAllocaMap.empty()) {
+    for (auto &BB : F)
+      for (auto &Inst : BB)
+        if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst))
+          if (auto *AI =
+                  dyn_cast_or_null<AllocaInst>(DVI->getVariableLocation()))
+            if (auto *NewAI = AllocaToPaddedAllocaMap.lookup(AI))
+              DVI->setArgOperand(
+                  0, MetadataAsValue::get(*C, LocalAsMetadata::get(NewAI)));
+    for (auto &P : AllocaToPaddedAllocaMap)
+      P.first->eraseFromParent();
+  }
+
+  // If we split the entry block, move any allocas that were originally in the
+  // entry block back into the entry block so that they aren't treated as
+  // dynamic allocas.
+  if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
+    InsertPt = &*F.getEntryBlock().begin();
+    for (auto II = EntryIRB.GetInsertBlock()->begin(),
+              IE = EntryIRB.GetInsertBlock()->end();
+         II != IE;) {
+      Instruction *I = &*II++;
+      if (auto *AI = dyn_cast<AllocaInst>(I))
+        if (isa<ConstantInt>(AI->getArraySize()))
+          I->moveBefore(InsertPt);
+    }
   }
 
   for (auto Inst : ToInstrument)
     Changed |= instrumentMemAccess(Inst);
 
   LocalDynamicShadow = nullptr;
+  StackBaseTag = nullptr;
 
   return Changed;
 }
diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 58436c8560ad..c7371f567ff3 100644
--- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -1,9 +1,8 @@
 //===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -239,7 +238,7 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite(
     LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
                       << "  Target_func: " << Target << "\n");
 
-    if (ICPInvokeOnly && dyn_cast<CallInst>(Inst)) {
+    if (ICPInvokeOnly && isa<CallInst>(Inst)) {
       LLVM_DEBUG(dbgs() << " Not promote: User options.\n");
       ORE.emit([&]() {
         return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst)
@@ -247,7 +246,7 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite(
       });
       break;
     }
-    if (ICPCallOnly && dyn_cast<InvokeInst>(Inst)) {
+    if (ICPCallOnly && isa<InvokeInst>(Inst)) {
       LLVM_DEBUG(dbgs() << " Not promote: User option.\n");
       ORE.emit([&]() {
         return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst)
@@ -311,10 +310,10 @@ Instruction *llvm::pgo::promoteIndirectCall(Instruction *Inst,
       promoteCallWithIfThenElse(CallSite(Inst), DirectCallee, BranchWeights);
 
   if (AttachProfToDirectCall) {
-    SmallVector<uint32_t, 1> Weights;
-    Weights.push_back(Count);
     MDBuilder MDB(NewInst->getContext());
-    NewInst->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+    NewInst->setMetadata(
+        LLVMContext::MD_prof,
+        MDB.createBranchWeights({static_cast<uint32_t>(Count)}));
   }
 
   using namespace ore;
@@ -394,9 +393,7 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI,
   }
   bool Changed = false;
   for (auto &F : M) {
-    if (F.isDeclaration())
-      continue;
-    if (F.hasFnAttribute(Attribute::OptimizeNone))
+    if (F.isDeclaration() || F.hasOptNone())
       continue;
 
     std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
diff --git a/lib/Transforms/Instrumentation/InstrOrderFile.cpp b/lib/Transforms/Instrumentation/InstrOrderFile.cpp
new file mode 100644
index 000000000000..a2c1ddfd279e
--- /dev/null
+++ b/lib/Transforms/Instrumentation/InstrOrderFile.cpp
@@ -0,0 +1,211 @@
+//===- InstrOrderFile.cpp ---- Late IR instrumentation for order file ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
+#include <fstream>
+#include <map>
+#include <mutex>
+#include <set>
+#include <sstream>
+
+using namespace llvm;
+#define DEBUG_TYPE "instrorderfile"
+
+static cl::opt<std::string> ClOrderFileWriteMapping(
+    "orderfile-write-mapping", cl::init(""),
+    cl::desc(
+        "Dump functions and their MD5 hash to deobfuscate profile data"),
+    cl::Hidden);
+
+namespace {
+
+// We need a global bitmap to tell if a function is executed. We also
+// need a global variable to save the order of functions. We can use a
+// fixed-size buffer that saves the MD5 hash of the function. We need
+// a global variable to save the index into the buffer.
+
+std::mutex MappingMutex;
+
+struct InstrOrderFile {
+private:
+  GlobalVariable *OrderFileBuffer;
+  GlobalVariable *BufferIdx;
+  GlobalVariable *BitMap;
+  ArrayType *BufferTy;
+  ArrayType *MapTy;
+
+public:
+  InstrOrderFile() {}
+
+  void createOrderFileData(Module &M) {
+    LLVMContext &Ctx = M.getContext();
+    int NumFunctions = 0;
+    for (Function &F : M) {
+      if (!F.isDeclaration())
+        NumFunctions++;
+    }
+
+    BufferTy =
+        ArrayType::get(Type::getInt64Ty(Ctx), INSTR_ORDER_FILE_BUFFER_SIZE);
+    Type *IdxTy = Type::getInt32Ty(Ctx);
+    MapTy = ArrayType::get(Type::getInt8Ty(Ctx), NumFunctions);
+
+    // Create the global variables.
+    std::string SymbolName = INSTR_PROF_ORDERFILE_BUFFER_NAME_STR;
+    OrderFileBuffer = new GlobalVariable(M, BufferTy, false, GlobalValue::LinkOnceODRLinkage,
+                           Constant::getNullValue(BufferTy), SymbolName);
+    Triple TT = Triple(M.getTargetTriple());
+    OrderFileBuffer->setSection(
+        getInstrProfSectionName(IPSK_orderfile, TT.getObjectFormat()));
+
+    std::string IndexName = INSTR_PROF_ORDERFILE_BUFFER_IDX_NAME_STR;
+    BufferIdx = new GlobalVariable(M, IdxTy, false, GlobalValue::LinkOnceODRLinkage,
+                           Constant::getNullValue(IdxTy), IndexName);
+
+    std::string BitMapName = "bitmap_0";
+    BitMap = new GlobalVariable(M, MapTy, false, GlobalValue::PrivateLinkage,
+                                Constant::getNullValue(MapTy), BitMapName);
+  }
+
+  // Generate the code sequence in the entry block of each function to
+  // update the buffer.
+  void generateCodeSequence(Module &M, Function &F, int FuncId) {
+    if (!ClOrderFileWriteMapping.empty()) {
+      std::lock_guard<std::mutex> LogLock(MappingMutex);
+      std::error_code EC;
+      llvm::raw_fd_ostream OS(ClOrderFileWriteMapping, EC, llvm::sys::fs::F_Append);
+      if (EC) {
+        report_fatal_error(Twine("Failed to open ") + ClOrderFileWriteMapping +
+                           " to save mapping file for order file instrumentation\n");
+      } else {
+        std::stringstream stream;
+        stream << std::hex << MD5Hash(F.getName());
+        std::string singleLine = "MD5 " + stream.str() + " " +
+                                 std::string(F.getName()) + '\n';
+        OS << singleLine;
+      }
+    }
+
+    BasicBlock *OrigEntry = &F.getEntryBlock();
+
+    LLVMContext &Ctx = M.getContext();
+    IntegerType *Int32Ty = Type::getInt32Ty(Ctx);
+    IntegerType *Int8Ty = Type::getInt8Ty(Ctx);
+
+    // Create a new entry block for instrumentation. We will check the bitmap
+    // in this basic block.
+    BasicBlock *NewEntry =
+        BasicBlock::Create(M.getContext(), "order_file_entry", &F, OrigEntry);
+    IRBuilder<> entryB(NewEntry);
+    // Create a basic block for updating the circular buffer.
+    BasicBlock *UpdateOrderFileBB =
+        BasicBlock::Create(M.getContext(), "order_file_set", &F, OrigEntry);
+    IRBuilder<> updateB(UpdateOrderFileBB);
+
+    // Check the bitmap, if it is already 1, do nothing.
+    // Otherwise, set the bit, grab the index, update the buffer.
+    Value *IdxFlags[] = {ConstantInt::get(Int32Ty, 0),
+                         ConstantInt::get(Int32Ty, FuncId)};
+    Value *MapAddr = entryB.CreateGEP(MapTy, BitMap, IdxFlags, "");
+    LoadInst *loadBitMap = entryB.CreateLoad(Int8Ty, MapAddr, "");
+    entryB.CreateStore(ConstantInt::get(Int8Ty, 1), MapAddr);
+    Value *IsNotExecuted =
+        entryB.CreateICmpEQ(loadBitMap, ConstantInt::get(Int8Ty, 0));
+    entryB.CreateCondBr(IsNotExecuted, UpdateOrderFileBB, OrigEntry);
+
+    // Fill up UpdateOrderFileBB: grab the index, update the buffer!
+    Value *IdxVal = updateB.CreateAtomicRMW(
+        AtomicRMWInst::Add, BufferIdx, ConstantInt::get(Int32Ty, 1),
+        AtomicOrdering::SequentiallyConsistent);
+    // We need to wrap around the index to fit it inside the buffer.
+    Value *WrappedIdx = updateB.CreateAnd(
+        IdxVal, ConstantInt::get(Int32Ty, INSTR_ORDER_FILE_BUFFER_MASK));
+    Value *BufferGEPIdx[] = {ConstantInt::get(Int32Ty, 0), WrappedIdx};
+    Value *BufferAddr =
+        updateB.CreateGEP(BufferTy, OrderFileBuffer, BufferGEPIdx, "");
+    updateB.CreateStore(ConstantInt::get(Type::getInt64Ty(Ctx), MD5Hash(F.getName())),
+                        BufferAddr);
+    updateB.CreateBr(OrigEntry);
+  }
+
+  bool run(Module &M) {
+    createOrderFileData(M);
+
+    int FuncId = 0;
+    for (Function &F : M) {
+      if (F.isDeclaration())
+        continue;
+      generateCodeSequence(M, F, FuncId);
+      ++FuncId;
+    }
+
+    return true;
+  }
+
+}; // End of InstrOrderFile struct
+
+class InstrOrderFileLegacyPass : public ModulePass {
+public:
+  static char ID;
+
+  InstrOrderFileLegacyPass() : ModulePass(ID) {
+    initializeInstrOrderFileLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M) override;
+};
+
+} // End anonymous namespace
+
+bool InstrOrderFileLegacyPass::runOnModule(Module &M) {
+  if (skipModule(M))
+    return false;
+
+  return InstrOrderFile().run(M);
+}
+
+PreservedAnalyses
+InstrOrderFilePass::run(Module &M, ModuleAnalysisManager &AM) {
+  if (InstrOrderFile().run(M))
+    return PreservedAnalyses::none();
+  return PreservedAnalyses::all();
+}
+
+INITIALIZE_PASS_BEGIN(InstrOrderFileLegacyPass, "instrorderfile",
+                      "Instrumentation for Order File", false, false)
+INITIALIZE_PASS_END(InstrOrderFileLegacyPass, "instrorderfile",
+                    "Instrumentation for Order File", false, false)
+
+char InstrOrderFileLegacyPass::ID = 0;
+
+ModulePass *llvm::createInstrOrderFilePass() {
+  return new InstrOrderFileLegacyPass();
+}
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 15b94388cbe5..63c2b8078967 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -1,9 +1,8 @@
 //===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,6 +18,8 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/Attributes.h"
@@ -148,8 +149,8 @@ public:
   static char ID;
 
   InstrProfilingLegacyPass() : ModulePass(ID) {}
-  InstrProfilingLegacyPass(const InstrProfOptions &Options)
-      : ModulePass(ID), InstrProf(Options) {}
+  InstrProfilingLegacyPass(const InstrProfOptions &Options, bool IsCS = false)
+      : ModulePass(ID), InstrProf(Options, IsCS) {}
 
   StringRef getPassName() const override {
     return "Frontend instrumentation-based coverage lowering";
@@ -187,7 +188,7 @@ public:
     SSA.AddAvailableValue(PH, Init);
   }
 
-  void doExtraRewritesBeforeFinalDeletion() const override {
+  void doExtraRewritesBeforeFinalDeletion() override {
     for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
       BasicBlock *ExitBlock = ExitBlocks[i];
       Instruction *InsertPos = InsertPts[i];
@@ -196,6 +197,7 @@ public:
       // block.
       Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
       Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
+      Type *Ty = LiveInValue->getType();
       IRBuilder<> Builder(InsertPos);
       if (AtomicCounterUpdatePromoted)
         // automic update currently can only be promoted across the current
@@ -203,7 +205,7 @@ public:
         Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
                                 AtomicOrdering::SequentiallyConsistent);
       else {
-        LoadInst *OldVal = Builder.CreateLoad(Addr, "pgocount.promoted");
+        LoadInst *OldVal = Builder.CreateLoad(Ty, Addr, "pgocount.promoted");
         auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
         auto *NewStore = Builder.CreateStore(NewVal, Addr);
 
@@ -232,9 +234,9 @@ class PGOCounterPromoter {
 public:
   PGOCounterPromoter(
       DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
-      Loop &CurLoop, LoopInfo &LI)
+      Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI)
       : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop),
-        LI(LI) {
+        LI(LI), BFI(BFI) {
 
     SmallVector<BasicBlock *, 8> LoopExitBlocks;
     SmallPtrSet<BasicBlock *, 8> BlockSet;
@@ -263,6 +265,20 @@ public:
       SSAUpdater SSA(&NewPHIs);
       Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
 
+      // If BFI is set, we will use it to guide the promotions.
+      if (BFI) {
+        auto *BB = Cand.first->getParent();
+        auto InstrCount = BFI->getBlockProfileCount(BB);
+        if (!InstrCount)
+          continue;
+        auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader());
+        // If the average loop trip count is not greater than 1.5, we skip
+        // promotion.
+        if (PreheaderCount &&
+            (PreheaderCount.getValue() * 3) >= (InstrCount.getValue() * 2))
+          continue;
+      }
+
       PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
                                         L.getLoopPreheader(), ExitBlocks,
                                         InsertPts, LoopToCandidates, LI);
@@ -312,6 +328,11 @@ private:
 
     SmallVector<BasicBlock *, 8> ExitingBlocks;
     LP->getExitingBlocks(ExitingBlocks);
+
+    // If BFI is set, we do more aggressive promotions based on BFI.
+    if (BFI)
+      return (unsigned)-1;
+
     // Not considierered speculative.
     if (ExitingBlocks.size() == 1)
       return MaxNumOfPromotionsPerLoop;
@@ -343,6 +364,7 @@ private:
   SmallVector<Instruction *, 8> InsertPts;
   Loop &L;
   LoopInfo &LI;
+  BlockFrequencyInfo *BFI;
 };
 
 } // end anonymous namespace
@@ -365,8 +387,9 @@ INITIALIZE_PASS_END(
     "Frontend instrumentation-based coverage lowering.", false, false)
 
 ModulePass *
-llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options) {
-  return new InstrProfilingLegacyPass(Options);
+llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options,
+                                     bool IsCS) {
+  return new InstrProfilingLegacyPass(Options, IsCS);
 }
 
 static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
@@ -415,6 +438,13 @@ void InstrProfiling::promoteCounterLoadStores(Function *F) {
   LoopInfo LI(DT);
   DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
 
+  std::unique_ptr<BlockFrequencyInfo> BFI;
+  if (Options.UseBFIInPromotion) {
+    std::unique_ptr<BranchProbabilityInfo> BPI;
+    BPI.reset(new BranchProbabilityInfo(*F, LI, TLI));
+    BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI));
+  }
+
   for (const auto &LoadStore : PromotionCandidates) {
     auto *CounterLoad = LoadStore.first;
     auto *CounterStore = LoadStore.second;
@@ -430,7 +460,7 @@ void InstrProfiling::promoteCounterLoadStores(Function *F) {
   // Do a post-order traversal of the loops so that counter updates can be
   // iteratively hoisted outside the loop nest.
   for (auto *Loop : llvm::reverse(Loops)) {
-    PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI);
+    PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get());
     Promoter.run(&TotalCountersPromoted);
   }
 }
@@ -509,13 +539,16 @@ bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) {
   return true;
 }
 
-static Constant *getOrInsertValueProfilingCall(Module &M,
-                                               const TargetLibraryInfo &TLI,
-                                               bool IsRange = false) {
+static FunctionCallee
+getOrInsertValueProfilingCall(Module &M, const TargetLibraryInfo &TLI,
+                              bool IsRange = false) {
   LLVMContext &Ctx = M.getContext();
   auto *ReturnTy = Type::getVoidTy(M.getContext());
 
-  Constant *Res;
+  AttributeList AL;
+  if (auto AK = TLI.getExtAttrForI32Param(false))
+    AL = AL.addParamAttribute(M.getContext(), 2, AK);
+
   if (!IsRange) {
     Type *ParamTypes[] = {
 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
@@ -523,8 +556,8 @@ static Constant *getOrInsertValueProfilingCall(Module &M,
     };
     auto *ValueProfilingCallTy =
         FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
-    Res = M.getOrInsertFunction(getInstrProfValueProfFuncName(),
-                                ValueProfilingCallTy);
+    return M.getOrInsertFunction(getInstrProfValueProfFuncName(),
+                                 ValueProfilingCallTy, AL);
   } else {
     Type *RangeParamTypes[] = {
 #define VALUE_RANGE_PROF 1
@@ -534,15 +567,9 @@ static Constant *getOrInsertValueProfilingCall(Module &M,
     };
     auto *ValueRangeProfilingCallTy =
         FunctionType::get(ReturnTy, makeArrayRef(RangeParamTypes), false);
-    Res = M.getOrInsertFunction(getInstrProfValueRangeProfFuncName(),
-                                ValueRangeProfilingCallTy);
+    return M.getOrInsertFunction(getInstrProfValueRangeProfFuncName(),
+                                 ValueRangeProfilingCallTy, AL);
   }
-
-  if (Function *FunRes = dyn_cast<Function>(Res)) {
-    if (auto AK = TLI.getExtAttrForI32Param(false))
-      FunRes->addParamAttr(2, AK);
-  }
-  return Res;
 }
 
 void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
@@ -601,13 +628,15 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
 
   IRBuilder<> Builder(Inc);
   uint64_t Index = Inc->getIndex()->getZExtValue();
-  Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
+  Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters->getValueType(),
+                                                   Counters, 0, Index);
 
   if (Options.Atomic || AtomicCounterUpdateAll) {
     Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
                             AtomicOrdering::Monotonic);
   } else {
-    Value *Load = Builder.CreateLoad(Addr, "pgocount");
+    Value *IncStep = Inc->getStep();
+    Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount");
     auto *Count = Builder.CreateAdd(Load, Inc->getStep());
     auto *Store = Builder.CreateStore(Count, Addr);
     if (isCounterPromotionEnabled())
@@ -678,32 +707,14 @@ static inline bool shouldRecordFunctionAddr(Function *F) {
   return F->hasAddressTaken() || F->hasLinkOnceLinkage();
 }
 
-static inline Comdat *getOrCreateProfileComdat(Module &M, Function &F,
-                                               InstrProfIncrementInst *Inc) {
-  if (!needsComdatForCounter(F, M))
-    return nullptr;
-
-  // COFF format requires a COMDAT section to have a key symbol with the same
-  // name. The linker targeting COFF also requires that the COMDAT
-  // a section is associated to must precede the associating section. For this
-  // reason, we must choose the counter var's name as the name of the comdat.
-  StringRef ComdatPrefix = (Triple(M.getTargetTriple()).isOSBinFormatCOFF()
-                                ? getInstrProfCountersVarPrefix()
-                                : getInstrProfComdatPrefix());
-  return M.getOrInsertComdat(StringRef(getVarName(Inc, ComdatPrefix)));
-}
-
-static bool needsRuntimeRegistrationOfSectionRange(const Module &M) {
+static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
   // Don't do this for Darwin.  compiler-rt uses linker magic.
-  if (Triple(M.getTargetTriple()).isOSDarwin())
+  if (TT.isOSDarwin())
     return false;
-
   // Use linker script magic to get data/cnts/name start/end.
-  if (Triple(M.getTargetTriple()).isOSLinux() ||
-      Triple(M.getTargetTriple()).isOSFreeBSD() ||
-      Triple(M.getTargetTriple()).isOSNetBSD() ||
-      Triple(M.getTargetTriple()).isOSFuchsia() ||
-      Triple(M.getTargetTriple()).isPS4CPU())
+  if (TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() ||
+      TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS4CPU() ||
+      TT.isOSWindows())
     return false;
 
   return true;
@@ -720,13 +731,37 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
     PD = It->second;
   }
 
-  // Move the name variable to the right section. Place them in a COMDAT group
-  // if the associated function is a COMDAT. This will make sure that
-  // only one copy of counters of the COMDAT function will be emitted after
-  // linking.
+  // Match the linkage and visibility of the name global, except on COFF, where
+  // the linkage must be local and consequentially the visibility must be
+  // default.
   Function *Fn = Inc->getParent()->getParent();
-  Comdat *ProfileVarsComdat = nullptr;
-  ProfileVarsComdat = getOrCreateProfileComdat(*M, *Fn, Inc);
+  GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
+  GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
+  if (TT.isOSBinFormatCOFF()) {
+    Linkage = GlobalValue::InternalLinkage;
+    Visibility = GlobalValue::DefaultVisibility;
+  }
+
+  // Move the name variable to the right section. Place them in a COMDAT group
+  // if the associated function is a COMDAT. This will make sure that only one
+  // copy of counters of the COMDAT function will be emitted after linking. Keep
+  // in mind that this pass may run before the inliner, so we need to create a
+  // new comdat group for the counters and profiling data. If we use the comdat
+  // of the parent function, that will result in relocations against discarded
+  // sections.
+  Comdat *Cmdt = nullptr;
+  GlobalValue::LinkageTypes CounterLinkage = Linkage;
+  if (needsComdatForCounter(*Fn, *M)) {
+    StringRef CmdtPrefix = getInstrProfComdatPrefix();
+    if (TT.isOSBinFormatCOFF()) {
+      // For COFF, the comdat group name must be the name of a symbol in the
+      // group. Use the counter variable name, and upgrade its linkage to
+      // something externally visible, like linkonce_odr.
+      CmdtPrefix = getInstrProfCountersVarPrefix();
+      CounterLinkage = GlobalValue::LinkOnceODRLinkage;
+    }
+    Cmdt = M->getOrInsertComdat(getVarName(Inc, CmdtPrefix));
+  }
 
   uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
   LLVMContext &Ctx = M->getContext();
@@ -734,20 +769,21 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
 
   // Create the counters variable.
   auto *CounterPtr =
-      new GlobalVariable(*M, CounterTy, false, NamePtr->getLinkage(),
+      new GlobalVariable(*M, CounterTy, false, Linkage,
                          Constant::getNullValue(CounterTy),
                          getVarName(Inc, getInstrProfCountersVarPrefix()));
-  CounterPtr->setVisibility(NamePtr->getVisibility());
+  CounterPtr->setVisibility(Visibility);
   CounterPtr->setSection(
       getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
   CounterPtr->setAlignment(8);
-  CounterPtr->setComdat(ProfileVarsComdat);
+  CounterPtr->setComdat(Cmdt);
+  CounterPtr->setLinkage(CounterLinkage);
 
   auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
   // Allocate statically the array of pointers to value profile nodes for
   // the current function.
   Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
-  if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(*M)) {
+  if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(TT)) {
     uint64_t NS = 0;
     for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
       NS += PD.NumValueSites[Kind];
@@ -755,14 +791,14 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
       ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
 
       auto *ValuesVar =
-          new GlobalVariable(*M, ValuesTy, false, NamePtr->getLinkage(),
+          new GlobalVariable(*M, ValuesTy, false, Linkage,
                              Constant::getNullValue(ValuesTy),
                              getVarName(Inc, getInstrProfValuesVarPrefix()));
-      ValuesVar->setVisibility(NamePtr->getVisibility());
+      ValuesVar->setVisibility(Visibility);
       ValuesVar->setSection(
           getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
       ValuesVar->setAlignment(8);
-      ValuesVar->setComdat(ProfileVarsComdat);
+      ValuesVar->setComdat(Cmdt);
       ValuesPtrExpr =
           ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
     }
@@ -789,13 +825,13 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
 #include "llvm/ProfileData/InstrProfData.inc"
   };
-  auto *Data = new GlobalVariable(*M, DataTy, false, NamePtr->getLinkage(),
+  auto *Data = new GlobalVariable(*M, DataTy, false, Linkage,
                                   ConstantStruct::get(DataTy, DataVals),
                                   getVarName(Inc, getInstrProfDataVarPrefix()));
-  Data->setVisibility(NamePtr->getVisibility());
+  Data->setVisibility(Visibility);
   Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
   Data->setAlignment(INSTR_PROF_DATA_ALIGNMENT);
-  Data->setComdat(ProfileVarsComdat);
+  Data->setComdat(Cmdt);
 
   PD.RegionCounters = CounterPtr;
   PD.DataVar = Data;
@@ -820,7 +856,7 @@ void InstrProfiling::emitVNodes() {
   // For now only support this on platforms that do
   // not require runtime registration to discover
   // named section start/end.
-  if (needsRuntimeRegistrationOfSectionRange(*M))
+  if (needsRuntimeRegistrationOfSectionRange(TT))
     return;
 
   size_t TotalNS = 0;
@@ -881,6 +917,10 @@ void InstrProfiling::emitNameData() {
   NamesSize = CompressedNameStr.size();
   NamesVar->setSection(
       getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
+  // On COFF, it's important to reduce the alignment down to 1 to prevent the
+  // linker from inserting padding before the start of the names section or
+  // between names entries.
+  NamesVar->setAlignment(1);
   UsedVars.push_back(NamesVar);
 
   for (auto *NamePtr : ReferencedNames)
@@ -888,7 +928,7 @@ void InstrProfiling::emitNameData() {
 }
 
 void InstrProfiling::emitRegistration() {
-  if (!needsRuntimeRegistrationOfSectionRange(*M))
+  if (!needsRuntimeRegistrationOfSectionRange(TT))
     return;
 
   // Construct the function.
@@ -929,7 +969,7 @@ void InstrProfiling::emitRegistration() {
 bool InstrProfiling::emitRuntimeHook() {
   // We expect the linker to be invoked with -u<hook_var> flag for linux,
   // for which case there is no need to emit the user function.
-  if (Triple(M->getTargetTriple()).isOSLinux())
+  if (TT.isOSLinux())
     return false;
 
   // If the module's provided its own runtime, we don't need to do anything.
@@ -950,11 +990,11 @@ bool InstrProfiling::emitRuntimeHook() {
   if (Options.NoRedZone)
     User->addFnAttr(Attribute::NoRedZone);
   User->setVisibility(GlobalValue::HiddenVisibility);
-  if (Triple(M->getTargetTriple()).supportsCOMDAT())
+  if (TT.supportsCOMDAT())
     User->setComdat(M->getOrInsertComdat(User->getName()));
 
   IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
-  auto *Load = IRB.CreateLoad(Var);
+  auto *Load = IRB.CreateLoad(Int32Ty, Var);
   IRB.CreateRet(Load);
 
   // Mark the user variable as used so that it isn't stripped out.
@@ -968,23 +1008,13 @@ void InstrProfiling::emitUses() {
 }
 
 void InstrProfiling::emitInitialization() {
-  StringRef InstrProfileOutput = Options.InstrProfileOutput;
-
-  if (!InstrProfileOutput.empty()) {
-    // Create variable for profile name.
-    Constant *ProfileNameConst =
-        ConstantDataArray::getString(M->getContext(), InstrProfileOutput, true);
-    GlobalVariable *ProfileNameVar = new GlobalVariable(
-        *M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage,
-        ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR));
-    if (TT.supportsCOMDAT()) {
-      ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
-      ProfileNameVar->setComdat(M->getOrInsertComdat(
-          StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR))));
-    }
-  }
-
-  Constant *RegisterF = M->getFunction(getInstrProfRegFuncsName());
+  // Create ProfileFileName variable. Don't don't this for the
+  // context-sensitive instrumentation lowering: This lowering is after
+  // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
+  // have already create the variable before LTO/ThinLTO linking.
+  if (!IsCS)
+    createProfileFileNameVar(*M, Options.InstrProfileOutput);
+  Function *RegisterF = M->getFunction(getInstrProfRegFuncsName());
   if (!RegisterF)
     return;
 
@@ -1000,8 +1030,7 @@ void InstrProfiling::emitInitialization() {
 
   // Add the basic block and the necessary calls.
   IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
-  if (RegisterF)
-    IRB.CreateCall(RegisterF, {});
+  IRB.CreateCall(RegisterF, {});
   IRB.CreateRetVoid();
 
   appendToGlobalCtors(*M, F, 0);
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
index c3e323613c70..f56a1bd91b89 100644
--- a/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -1,9 +1,8 @@
 //===-- Instrumentation.cpp - TransformUtils Infrastructure ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -25,10 +24,12 @@ using namespace llvm;
 /// Moves I before IP. Returns new insert point.
 static BasicBlock::iterator moveBeforeInsertPoint(BasicBlock::iterator I, BasicBlock::iterator IP) {
   // If I is IP, move the insert point down.
-  if (I == IP)
-    return ++IP;
-  // Otherwise, move I before IP and return IP.
-  I->moveBefore(&*IP);
+  if (I == IP) {
+    ++IP;
+  } else {
+    // Otherwise, move I before IP and return IP.
+    I->moveBefore(&*IP);
+  }
   return IP;
 }
 
@@ -101,8 +102,8 @@ Comdat *llvm::GetOrCreateFunctionComdat(Function &F, Triple &T,
 /// initializeInstrumentation - Initialize all passes in the TransformUtils
 /// library.
 void llvm::initializeInstrumentation(PassRegistry &Registry) {
-  initializeAddressSanitizerPass(Registry);
-  initializeAddressSanitizerModulePass(Registry);
+  initializeAddressSanitizerLegacyPassPass(Registry);
+  initializeModuleAddressSanitizerLegacyPassPass(Registry);
   initializeBoundsCheckingLegacyPassPass(Registry);
   initializeControlHeightReductionLegacyPassPass(Registry);
   initializeGCOVProfilerLegacyPassPass(Registry);
@@ -110,13 +111,13 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
   initializePGOInstrumentationUseLegacyPassPass(Registry);
   initializePGOIndirectCallPromotionLegacyPassPass(Registry);
   initializePGOMemOPSizeOptLegacyPassPass(Registry);
+  initializeInstrOrderFileLegacyPassPass(Registry);
   initializeInstrProfilingLegacyPassPass(Registry);
   initializeMemorySanitizerLegacyPassPass(Registry);
-  initializeHWAddressSanitizerPass(Registry);
+  initializeHWAddressSanitizerLegacyPassPass(Registry);
   initializeThreadSanitizerLegacyPassPass(Registry);
   initializeSanitizerCoverageModulePass(Registry);
   initializeDataFlowSanitizerPass(Registry);
-  initializeEfficiencySanitizerPass(Registry);
 }
 
 /// LLVMInitializeInstrumentation - C binding for
diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
index 4eb758c69c58..892a6a26da91 100644
--- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h
+++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -1,9 +1,8 @@
 //===- llvm/Analysis/MaximumSpanningTree.h - Interface ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -68,8 +67,7 @@ namespace llvm {
     /// MaximumSpanningTree() - Takes a vector of weighted edges and returns a
     /// spanning tree.
     MaximumSpanningTree(EdgeWeights &EdgeVector) {
-
-      std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare());
+      llvm::stable_sort(EdgeVector, EdgeWeightCompare());
 
       // Create spanning tree, Forest contains a special data structure
       // that makes checking if two nodes are already in a common (sub-)tree
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index e6573af2077d..b25cbed1bb02 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -1,9 +1,8 @@
 //===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -144,6 +143,7 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
@@ -248,6 +248,13 @@ static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact",
        cl::desc("exact handling of relational integer ICmp"),
        cl::Hidden, cl::init(false));
 
+static cl::opt<bool> ClHandleLifetimeIntrinsics(
+    "msan-handle-lifetime-intrinsics",
+    cl::desc(
+        "when possible, poison scoped variables at the beginning of the scope "
+        "(slower, but more precise)"),
+    cl::Hidden, cl::init(true));
+
 // When compiling the Linux kernel, we sometimes see false positives related to
 // MSan being unable to understand that inline assembly calls may initialize
 // local variables.
@@ -305,22 +312,23 @@ static cl::opt<bool> ClWithComdat("msan-with-comdat",
 
 // These options allow to specify custom memory map parameters
 // See MemoryMapParams for details.
-static cl::opt<unsigned long long> ClAndMask("msan-and-mask",
-       cl::desc("Define custom MSan AndMask"),
-       cl::Hidden, cl::init(0));
+static cl::opt<uint64_t> ClAndMask("msan-and-mask",
+                                   cl::desc("Define custom MSan AndMask"),
+                                   cl::Hidden, cl::init(0));
 
-static cl::opt<unsigned long long> ClXorMask("msan-xor-mask",
-       cl::desc("Define custom MSan XorMask"),
-       cl::Hidden, cl::init(0));
+static cl::opt<uint64_t> ClXorMask("msan-xor-mask",
+                                   cl::desc("Define custom MSan XorMask"),
+                                   cl::Hidden, cl::init(0));
 
-static cl::opt<unsigned long long> ClShadowBase("msan-shadow-base",
-       cl::desc("Define custom MSan ShadowBase"),
-       cl::Hidden, cl::init(0));
+static cl::opt<uint64_t> ClShadowBase("msan-shadow-base",
+                                      cl::desc("Define custom MSan ShadowBase"),
+                                      cl::Hidden, cl::init(0));
 
-static cl::opt<unsigned long long> ClOriginBase("msan-origin-base",
-       cl::desc("Define custom MSan OriginBase"),
-       cl::Hidden, cl::init(0));
+static cl::opt<uint64_t> ClOriginBase("msan-origin-base",
+                                      cl::desc("Define custom MSan OriginBase"),
+                                      cl::Hidden, cl::init(0));
 
+static const char *const kMsanModuleCtorName = "msan.module_ctor";
 static const char *const kMsanInitName = "__msan_init";
 
 namespace {
@@ -454,17 +462,16 @@ namespace {
 /// the module.
 class MemorySanitizer {
 public:
-  MemorySanitizer(Module &M, int TrackOrigins = 0, bool Recover = false,
-                  bool EnableKmsan = false) {
+  MemorySanitizer(Module &M, MemorySanitizerOptions Options) {
     this->CompileKernel =
-        ClEnableKmsan.getNumOccurrences() > 0 ? ClEnableKmsan : EnableKmsan;
+        ClEnableKmsan.getNumOccurrences() > 0 ? ClEnableKmsan : Options.Kernel;
     if (ClTrackOrigins.getNumOccurrences() > 0)
       this->TrackOrigins = ClTrackOrigins;
     else
-      this->TrackOrigins = this->CompileKernel ? 2 : TrackOrigins;
+      this->TrackOrigins = this->CompileKernel ? 2 : Options.TrackOrigins;
     this->Recover = ClKeepGoing.getNumOccurrences() > 0
                         ? ClKeepGoing
-                        : (this->CompileKernel | Recover);
+                        : (this->CompileKernel | Options.Recover);
     initializeModule(M);
   }
 
@@ -536,41 +543,42 @@ private:
   bool CallbacksInitialized = false;
 
   /// The run-time callback to print a warning.
-  Value *WarningFn;
+  FunctionCallee WarningFn;
 
   // These arrays are indexed by log2(AccessSize).
-  Value *MaybeWarningFn[kNumberOfAccessSizes];
-  Value *MaybeStoreOriginFn[kNumberOfAccessSizes];
+  FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
+  FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
 
   /// Run-time helper that generates a new origin value for a stack
   /// allocation.
-  Value *MsanSetAllocaOrigin4Fn;
+  FunctionCallee MsanSetAllocaOrigin4Fn;
 
   /// Run-time helper that poisons stack on function entry.
-  Value *MsanPoisonStackFn;
+  FunctionCallee MsanPoisonStackFn;
 
   /// Run-time helper that records a store (or any event) of an
   /// uninitialized value and returns an updated origin id encoding this info.
-  Value *MsanChainOriginFn;
+  FunctionCallee MsanChainOriginFn;
 
   /// MSan runtime replacements for memmove, memcpy and memset.
-  Value *MemmoveFn, *MemcpyFn, *MemsetFn;
+  FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
 
   /// KMSAN callback for task-local function argument shadow.
-  Value *MsanGetContextStateFn;
+  StructType *MsanContextStateTy;
+  FunctionCallee MsanGetContextStateFn;
 
   /// Functions for poisoning/unpoisoning local variables
-  Value *MsanPoisonAllocaFn, *MsanUnpoisonAllocaFn;
+  FunctionCallee MsanPoisonAllocaFn, MsanUnpoisonAllocaFn;
 
   /// Each of the MsanMetadataPtrXxx functions returns a pair of shadow/origin
   /// pointers.
-  Value *MsanMetadataPtrForLoadN, *MsanMetadataPtrForStoreN;
-  Value *MsanMetadataPtrForLoad_1_8[4];
-  Value *MsanMetadataPtrForStore_1_8[4];
-  Value *MsanInstrumentAsmStoreFn;
+  FunctionCallee MsanMetadataPtrForLoadN, MsanMetadataPtrForStoreN;
+  FunctionCallee MsanMetadataPtrForLoad_1_8[4];
+  FunctionCallee MsanMetadataPtrForStore_1_8[4];
+  FunctionCallee MsanInstrumentAsmStoreFn;
 
   /// Helper to choose between different MsanMetadataPtrXxx().
-  Value *getKmsanShadowOriginAccessFn(bool isStore, int size);
+  FunctionCallee getKmsanShadowOriginAccessFn(bool isStore, int size);
 
   /// Memory map parameters used in application-to-shadow calculation.
   const MemoryMapParams *MapParams;
@@ -586,6 +594,8 @@ private:
 
   /// An empty volatile inline asm that prevents callback merge.
   InlineAsm *EmptyAsm;
+
+  Function *MsanCtorFunction;
 };
 
 /// A legacy function pass for msan instrumentation.
@@ -595,10 +605,8 @@ struct MemorySanitizerLegacyPass : public FunctionPass {
   // Pass identification, replacement for typeid.
   static char ID;
 
-  MemorySanitizerLegacyPass(int TrackOrigins = 0, bool Recover = false,
-                            bool EnableKmsan = false)
-      : FunctionPass(ID), TrackOrigins(TrackOrigins), Recover(Recover),
-        EnableKmsan(EnableKmsan) {}
+  MemorySanitizerLegacyPass(MemorySanitizerOptions Options = {})
+      : FunctionPass(ID), Options(Options) {}
   StringRef getPassName() const override { return "MemorySanitizerLegacyPass"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -612,16 +620,14 @@ struct MemorySanitizerLegacyPass : public FunctionPass {
   bool doInitialization(Module &M) override;
 
   Optional<MemorySanitizer> MSan;
-  int TrackOrigins;
-  bool Recover;
-  bool EnableKmsan;
+  MemorySanitizerOptions Options;
 };
 
 } // end anonymous namespace
 
 PreservedAnalyses MemorySanitizerPass::run(Function &F,
                                            FunctionAnalysisManager &FAM) {
-  MemorySanitizer Msan(*F.getParent(), TrackOrigins, Recover, EnableKmsan);
+  MemorySanitizer Msan(*F.getParent(), Options);
   if (Msan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F)))
     return PreservedAnalyses::none();
   return PreservedAnalyses::all();
@@ -637,10 +643,9 @@ INITIALIZE_PASS_END(MemorySanitizerLegacyPass, "msan",
                     "MemorySanitizer: detects uninitialized reads.", false,
                     false)
 
-FunctionPass *llvm::createMemorySanitizerLegacyPassPass(int TrackOrigins,
-                                                        bool Recover,
-                                                        bool CompileKernel) {
-  return new MemorySanitizerLegacyPass(TrackOrigins, Recover, CompileKernel);
+FunctionPass *
+llvm::createMemorySanitizerLegacyPassPass(MemorySanitizerOptions Options) {
+  return new MemorySanitizerLegacyPass(Options);
 }
 
 /// Create a non-const global initialized with the given string.
@@ -675,18 +680,15 @@ void MemorySanitizer::createKernelApi(Module &M) {
                                     IRB.getInt32Ty());
   // Requests the per-task context state (kmsan_context_state*) from the
   // runtime library.
+  MsanContextStateTy = StructType::get(
+      ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
+      ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
+      ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
+      ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), /* va_arg_origin */
+      IRB.getInt64Ty(), ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
+      OriginTy);
   MsanGetContextStateFn = M.getOrInsertFunction(
-      "__msan_get_context_state",
-      PointerType::get(
-          StructType::get(ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
-                          ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
-                          ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
-                          ArrayType::get(IRB.getInt64Ty(),
-                                         kParamTLSSize / 8), /* va_arg_origin */
-                          IRB.getInt64Ty(),
-                          ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
-                          OriginTy),
-          0));
+      "__msan_get_context_state", PointerType::get(MsanContextStateTy, 0));
 
   Type *RetTy = StructType::get(PointerType::get(IRB.getInt8Ty(), 0),
                                 PointerType::get(IRB.getInt32Ty(), 0));
@@ -821,8 +823,9 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
   CallbacksInitialized = true;
 }
 
-Value *MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore, int size) {
-  Value **Fns =
+FunctionCallee MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore,
+                                                             int size) {
+  FunctionCallee *Fns =
       isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8;
   switch (size) {
   case 1:
@@ -839,6 +842,8 @@ Value *MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore, int size) {
 }
 
 /// Module-level initialization.
+///
+/// inserts a call to __msan_init to the module's constructor list.
 void MemorySanitizer::initializeModule(Module &M) {
   auto &DL = M.getDataLayout();
 
@@ -913,7 +918,22 @@ void MemorySanitizer::initializeModule(Module &M) {
   OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
 
   if (!CompileKernel) {
-    getOrCreateInitFunction(M, kMsanInitName);
+    std::tie(MsanCtorFunction, std::ignore) =
+        getOrCreateSanitizerCtorAndInitFunctions(
+            M, kMsanModuleCtorName, kMsanInitName,
+            /*InitArgTypes=*/{},
+            /*InitArgs=*/{},
+            // This callback is invoked when the functions are created the first
+            // time. Hook them into the global ctors list in that case:
+            [&](Function *Ctor, FunctionCallee) {
+              if (!ClWithComdat) {
+                appendToGlobalCtors(M, Ctor, 0);
+                return;
+              }
+              Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
+              Ctor->setComdat(MsanCtorComdat);
+              appendToGlobalCtors(M, Ctor, 0, Ctor);
+            });
 
     if (TrackOrigins)
       M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] {
@@ -932,7 +952,7 @@ void MemorySanitizer::initializeModule(Module &M) {
 }
 
 bool MemorySanitizerLegacyPass::doInitialization(Module &M) {
-  MSan.emplace(M, TrackOrigins, Recover, EnableKmsan);
+  MSan.emplace(M, Options);
   return true;
 }
 
@@ -1011,6 +1031,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       : Shadow(S), Origin(O), OrigIns(I) {}
   };
   SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
+  bool InstrumentLifetimeStart = ClHandleLifetimeIntrinsics;
+  SmallSet<AllocaInst *, 16> AllocaSet;
+  SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList;
   SmallVector<StoreInst *, 16> StoreList;
 
   MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
@@ -1076,7 +1099,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
 
     for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
       Value *GEP =
-          i ? IRB.CreateConstGEP1_32(nullptr, OriginPtr, i) : OriginPtr;
+          i ? IRB.CreateConstGEP1_32(MS.OriginTy, OriginPtr, i) : OriginPtr;
       IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
       CurrentAlignment = kMinOriginAlignment;
     }
@@ -1104,7 +1127,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
           DL.getTypeSizeInBits(ConvertedShadow->getType());
       unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
       if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
-        Value *Fn = MS.MaybeStoreOriginFn[SizeIndex];
+        FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex];
         Value *ConvertedShadow2 = IRB.CreateZExt(
             ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
         IRB.CreateCall(Fn, {ConvertedShadow2,
@@ -1186,7 +1209,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
     if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
-      Value *Fn = MS.MaybeWarningFn[SizeIndex];
+      FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
       Value *ConvertedShadow2 =
           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
       IRB.CreateCall(Fn, {ConvertedShadow2, MS.TrackOrigins && Origin
@@ -1221,20 +1244,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
     Value *ContextState = IRB.CreateCall(MS.MsanGetContextStateFn, {});
     Constant *Zero = IRB.getInt32(0);
-    MS.ParamTLS =
-        IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(0)}, "param_shadow");
-    MS.RetvalTLS =
-        IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(1)}, "retval_shadow");
-    MS.VAArgTLS =
-        IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(2)}, "va_arg_shadow");
-    MS.VAArgOriginTLS =
-        IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(3)}, "va_arg_origin");
-    MS.VAArgOverflowSizeTLS = IRB.CreateGEP(
-        ContextState, {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
-    MS.ParamOriginTLS =
-        IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(5)}, "param_origin");
+    MS.ParamTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+                                {Zero, IRB.getInt32(0)}, "param_shadow");
+    MS.RetvalTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+                                 {Zero, IRB.getInt32(1)}, "retval_shadow");
+    MS.VAArgTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+                                {Zero, IRB.getInt32(2)}, "va_arg_shadow");
+    MS.VAArgOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+                                      {Zero, IRB.getInt32(3)}, "va_arg_origin");
+    MS.VAArgOverflowSizeTLS =
+        IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+                      {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
+    MS.ParamOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+                                      {Zero, IRB.getInt32(5)}, "param_origin");
     MS.RetvalOriginTLS =
-        IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(6)}, "retval_origin");
+        IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+                      {Zero, IRB.getInt32(6)}, "retval_origin");
     return ret;
   }
 
@@ -1265,6 +1290,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
 
     VAHelper->finalizeInstrumentation();
 
+    // Poison llvm.lifetime.start intrinsics, if we haven't fallen back to
+    // instrumenting only allocas.
+    if (InstrumentLifetimeStart) {
+      for (auto Item : LifetimeStartList) {
+        instrumentAlloca(*Item.second, Item.first);
+        AllocaSet.erase(Item.second);
+      }
+    }
+    // Poison the allocas for which we didn't instrument the corresponding
+    // lifetime intrinsics.
+    for (AllocaInst *AI : AllocaSet)
+      instrumentAlloca(*AI);
+
     bool InstrumentWithCalls = ClInstrumentationWithCallThreshold >= 0 &&
                                InstrumentationList.size() + StoreList.size() >
                                    (unsigned)ClInstrumentationWithCallThreshold;
@@ -1381,7 +1419,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
             IRB.CreateAnd(OriginLong, ConstantInt::get(MS.IntptrTy, ~Mask));
       }
       OriginPtr =
-          IRB.CreateIntToPtr(OriginLong, PointerType::get(IRB.getInt32Ty(), 0));
+          IRB.CreateIntToPtr(OriginLong, PointerType::get(MS.OriginTy, 0));
     }
     return std::make_pair(ShadowPtr, OriginPtr);
   }
@@ -1393,7 +1431,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     const DataLayout &DL = F.getParent()->getDataLayout();
     int Size = DL.getTypeStoreSize(ShadowTy);
 
-    Value *Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
+    FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
     Value *AddrCast =
         IRB.CreatePointerCast(Addr, PointerType::get(IRB.getInt8Ty(), 0));
     if (Getter) {
@@ -1598,8 +1636,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
               // ParamTLS overflow.
               *ShadowPtr = getCleanShadow(V);
             } else {
-              *ShadowPtr =
-                  EntryIRB.CreateAlignedLoad(Base, kShadowTLSAlignment);
+              *ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base,
+                                                      kShadowTLSAlignment);
             }
           }
           LLVM_DEBUG(dbgs()
@@ -1607,7 +1645,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
           if (MS.TrackOrigins && !Overflow) {
             Value *OriginPtr =
                 getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
-            setOrigin(A, EntryIRB.CreateLoad(OriginPtr));
+            setOrigin(A, EntryIRB.CreateLoad(MS.OriginTy, OriginPtr));
           } else {
             setOrigin(A, getCleanOrigin());
           }
@@ -1738,7 +1776,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     if (PropagateShadow) {
       std::tie(ShadowPtr, OriginPtr) =
           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
-      setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_msld"));
+      setShadow(&I,
+                IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
     } else {
       setShadow(&I, getCleanShadow(&I));
     }
@@ -1752,7 +1791,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     if (MS.TrackOrigins) {
       if (PropagateShadow) {
         unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
-        setOrigin(&I, IRB.CreateAlignedLoad(OriginPtr, OriginAlignment));
+        setOrigin(
+            &I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr, OriginAlignment));
       } else {
         setOrigin(&I, getCleanOrigin());
       }
@@ -1903,7 +1943,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     Value *S1S2 = IRB.CreateAnd(S1, S2);
     Value *V1S2 = IRB.CreateAnd(V1, S2);
     Value *S1V2 = IRB.CreateAnd(S1, V2);
-    setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2)));
+    setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
     setOriginForNaryOp(I);
   }
 
@@ -1925,7 +1965,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     Value *S1S2 = IRB.CreateAnd(S1, S2);
     Value *V1S2 = IRB.CreateAnd(V1, S2);
     Value *S1V2 = IRB.CreateAnd(S1, V2);
-    setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2)));
+    setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
     setOriginForNaryOp(I);
   }
 
@@ -2070,6 +2110,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     SC.Done(&I);
   }
 
+  void visitFNeg(UnaryOperator &I) { handleShadowOr(I); }
+
   // Handle multiplication by constant.
   //
   // Handle a special case of multiplication by constant that may have one or
@@ -2432,7 +2474,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       unsigned Alignment = 1;
       std::tie(ShadowPtr, OriginPtr) =
           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
-      setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_msld"));
+      setShadow(&I,
+                IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
     } else {
       setShadow(&I, getCleanShadow(&I));
     }
@@ -2442,7 +2485,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
 
     if (MS.TrackOrigins) {
       if (PropagateShadow)
-        setOrigin(&I, IRB.CreateLoad(OriginPtr));
+        setOrigin(&I, IRB.CreateLoad(MS.OriginTy, OriginPtr));
       else
         setOrigin(&I, getCleanOrigin());
     }
@@ -2519,6 +2562,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     return false;
   }
 
+  void handleLifetimeStart(IntrinsicInst &I) {
+    if (!PoisonStack)
+      return;
+    DenseMap<Value *, AllocaInst *> AllocaForValue;
+    AllocaInst *AI =
+        llvm::findAllocaForValue(I.getArgOperand(1), AllocaForValue);
+    if (!AI)
+      InstrumentLifetimeStart = false;
+    LifetimeStartList.push_back(std::make_pair(&I, AI));
+  }
+
   void handleBswap(IntrinsicInst &I) {
     IRBuilder<> IRB(&I);
     Value *Op = I.getArgOperand(0);
@@ -2650,7 +2704,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                              : Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
     Value *V1 = I.getOperand(0);
     Value *V2 = I.getOperand(1);
-    Value *Shift = IRB.CreateCall(I.getCalledValue(),
+    Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledValue(),
                                   {IRB.CreateBitCast(S1, V1->getType()), V2});
     Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
@@ -2660,6 +2714,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   // Get an X86_MMX-sized vector type.
   Type *getMMXVectorTy(unsigned EltSizeInBits) {
     const unsigned X86_MMXSizeInBits = 64;
+    assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
+           "Illegal MMX vector element size");
     return VectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
                            X86_MMXSizeInBits / EltSizeInBits);
   }
@@ -2825,9 +2881,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     if (ClCheckAccessAddress)
       insertShadowCheck(Addr, &I);
 
-    Value *Shadow = IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_ldmxcsr");
-    Value *Origin =
-        MS.TrackOrigins ? IRB.CreateLoad(OriginPtr) : getCleanOrigin();
+    Value *Shadow = IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment, "_ldmxcsr");
+    Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(MS.OriginTy, OriginPtr)
+                                    : getCleanOrigin();
     insertShadowCheck(Shadow, Origin, &I);
   }
 
@@ -2901,7 +2957,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
 
         Value *Origin = IRB.CreateSelect(
             IRB.CreateICmpNE(Acc, Constant::getNullValue(Acc->getType())),
-            getOrigin(PassThru), IRB.CreateLoad(OriginPtr));
+            getOrigin(PassThru), IRB.CreateLoad(MS.OriginTy, OriginPtr));
 
         setOrigin(&I, Origin);
       } else {
@@ -2911,9 +2967,32 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     return true;
   }
 
+  // Instrument BMI / BMI2 intrinsics.
+  // All of these intrinsics are Z = I(X, Y)
+  // where the types of all operands and the result match, and are either i32 or i64.
+  // The following instrumentation happens to work for all of them:
+  //   Sz = I(Sx, Y) | (sext (Sy != 0))
+  void handleBmiIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Type *ShadowTy = getShadowTy(&I);
+
+    // If any bit of the mask operand is poisoned, then the whole thing is.
+    Value *SMask = getShadow(&I, 1);
+    SMask = IRB.CreateSExt(IRB.CreateICmpNE(SMask, getCleanShadow(ShadowTy)),
+                           ShadowTy);
+    // Apply the same intrinsic to the shadow of the first operand.
+    Value *S = IRB.CreateCall(I.getCalledFunction(),
+                              {getShadow(&I, 0), I.getOperand(1)});
+    S = IRB.CreateOr(SMask, S);
+    setShadow(&I, S);
+    setOriginForNaryOp(I);
+  }
 
   void visitIntrinsicInst(IntrinsicInst &I) {
     switch (I.getIntrinsicID()) {
+    case Intrinsic::lifetime_start:
+      handleLifetimeStart(I);
+      break;
     case Intrinsic::bswap:
       handleBswap(I);
       break;
@@ -3127,6 +3206,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       handleVectorComparePackedIntrinsic(I);
       break;
 
+    case Intrinsic::x86_bmi_bextr_32:
+    case Intrinsic::x86_bmi_bextr_64:
+    case Intrinsic::x86_bmi_bzhi_32:
+    case Intrinsic::x86_bmi_bzhi_64:
+    case Intrinsic::x86_bmi_pdep_32:
+    case Intrinsic::x86_bmi_pdep_64:
+    case Intrinsic::x86_bmi_pext_32:
+    case Intrinsic::x86_bmi_pext_64:
+      handleBmiIntrinsic(I);
+      break;
+
     case Intrinsic::is_constant:
       // The result of llvm.is.constant() is always defined.
       setShadow(&I, getCleanShadow(&I));
@@ -3143,21 +3233,21 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   void visitCallSite(CallSite CS) {
     Instruction &I = *CS.getInstruction();
     assert(!I.getMetadata("nosanitize"));
-    assert((CS.isCall() || CS.isInvoke()) && "Unknown type of CallSite");
+    assert((CS.isCall() || CS.isInvoke() || CS.isCallBr()) &&
+           "Unknown type of CallSite");
+    if (CS.isCallBr() || (CS.isCall() && cast<CallInst>(&I)->isInlineAsm())) {
+      // For inline asm (either a call to asm function, or callbr instruction),
+      // do the usual thing: check argument shadow and mark all outputs as
+      // clean. Note that any side effects of the inline asm that are not
+      // immediately visible in its constraints are not handled.
+      if (ClHandleAsmConservative && MS.CompileKernel)
+        visitAsmInstruction(I);
+      else
+        visitInstruction(I);
+      return;
+    }
     if (CS.isCall()) {
       CallInst *Call = cast<CallInst>(&I);
-
-      // For inline asm, do the usual thing: check argument shadow and mark all
-      // outputs as clean. Note that any side effects of the inline asm that are
-      // not immediately visible in its constraints are not handled.
-      if (Call->isInlineAsm()) {
-        if (ClHandleAsmConservative && MS.CompileKernel)
-          visitAsmInstruction(I);
-        else
-          visitInstruction(I);
-        return;
-      }
-
       assert(!isa<IntrinsicInst>(&I) && "intrinsics are handled elsewhere");
 
       // We are going to insert code that relies on the fact that the callee
@@ -3264,12 +3354,13 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
              "Could not find insertion point for retval shadow load");
     }
     IRBuilder<> IRBAfter(&*NextInsn);
-    Value *RetvalShadow =
-      IRBAfter.CreateAlignedLoad(getShadowPtrForRetval(&I, IRBAfter),
-                                 kShadowTLSAlignment, "_msret");
+    Value *RetvalShadow = IRBAfter.CreateAlignedLoad(
+        getShadowTy(&I), getShadowPtrForRetval(&I, IRBAfter),
+        kShadowTLSAlignment, "_msret");
     setShadow(&I, RetvalShadow);
     if (MS.TrackOrigins)
-      setOrigin(&I, IRBAfter.CreateLoad(getOriginPtrForRetval(IRBAfter)));
+      setOrigin(&I, IRBAfter.CreateLoad(MS.OriginTy,
+                                        getOriginPtrForRetval(IRBAfter)));
   }
 
   bool isAMustTailRetVal(Value *RetVal) {
@@ -3330,7 +3421,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                                                 StackDescription.str());
   }
 
-  void instrumentAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
+  void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
     if (PoisonStack && ClPoisonStackWithCall) {
       IRB.CreateCall(MS.MsanPoisonStackFn,
                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
@@ -3352,7 +3443,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     }
   }
 
-  void instrumentAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
+  void poisonAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
     Value *Descr = getLocalVarDescription(I);
     if (PoisonStack) {
       IRB.CreateCall(MS.MsanPoisonAllocaFn,
@@ -3364,10 +3455,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     }
   }
 
-  void visitAllocaInst(AllocaInst &I) {
-    setShadow(&I, getCleanShadow(&I));
-    setOrigin(&I, getCleanOrigin());
-    IRBuilder<> IRB(I.getNextNode());
+  void instrumentAlloca(AllocaInst &I, Instruction *InsPoint = nullptr) {
+    if (!InsPoint)
+      InsPoint = &I;
+    IRBuilder<> IRB(InsPoint->getNextNode());
     const DataLayout &DL = F.getParent()->getDataLayout();
     uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
     Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
@@ -3375,9 +3466,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       Len = IRB.CreateMul(Len, I.getArraySize());
 
     if (MS.CompileKernel)
-      instrumentAllocaKmsan(I, IRB, Len);
+      poisonAllocaKmsan(I, IRB, Len);
     else
-      instrumentAllocaUserspace(I, IRB, Len);
+      poisonAllocaUserspace(I, IRB, Len);
+  }
+
+  void visitAllocaInst(AllocaInst &I) {
+    setShadow(&I, getCleanShadow(&I));
+    setOrigin(&I, getCleanOrigin());
+    // We'll get to this alloca later unless it's poisoned at the corresponding
+    // llvm.lifetime.start.
+    AllocaSet.insert(&I);
   }
 
   void visitSelectInst(SelectInst& I) {
@@ -3409,7 +3508,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       D = CreateAppToShadowCast(IRB, D);
 
       // Result shadow if condition shadow is 1.
-      Sa1 = IRB.CreateOr(IRB.CreateXor(C, D), IRB.CreateOr(Sc, Sd));
+      Sa1 = IRB.CreateOr({IRB.CreateXor(C, D), Sc, Sd});
     }
     Value *Sa = IRB.CreateSelect(Sb, Sa1, Sa0, "_msprop_select");
     setShadow(&I, Sa);
@@ -3525,10 +3624,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   }
 
   /// Get the number of output arguments returned by pointers.
-  int getNumOutputArgs(InlineAsm *IA, CallInst *CI) {
+  int getNumOutputArgs(InlineAsm *IA, CallBase *CB) {
     int NumRetOutputs = 0;
     int NumOutputs = 0;
-    Type *RetTy = dyn_cast<Value>(CI)->getType();
+    Type *RetTy = dyn_cast<Value>(CB)->getType();
     if (!RetTy->isVoidTy()) {
       // Register outputs are returned via the CallInst return value.
       StructType *ST = dyn_cast_or_null<StructType>(RetTy);
@@ -3568,24 +3667,24 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     // corresponding CallInst has nO+nI+1 operands (the last operand is the
     // function to be called).
     const DataLayout &DL = F.getParent()->getDataLayout();
-    CallInst *CI = dyn_cast<CallInst>(&I);
+    CallBase *CB = dyn_cast<CallBase>(&I);
     IRBuilder<> IRB(&I);
-    InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
-    int OutputArgs = getNumOutputArgs(IA, CI);
+    InlineAsm *IA = cast<InlineAsm>(CB->getCalledValue());
+    int OutputArgs = getNumOutputArgs(IA, CB);
     // The last operand of a CallInst is the function itself.
-    int NumOperands = CI->getNumOperands() - 1;
+    int NumOperands = CB->getNumOperands() - 1;
 
     // Check input arguments. Doing so before unpoisoning output arguments, so
     // that we won't overwrite uninit values before checking them.
     for (int i = OutputArgs; i < NumOperands; i++) {
-      Value *Operand = CI->getOperand(i);
+      Value *Operand = CB->getOperand(i);
       instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ false);
     }
     // Unpoison output arguments. This must happen before the actual InlineAsm
     // call, so that the shadow for memory published in the asm() statement
     // remains valid.
     for (int i = 0; i < OutputArgs; i++) {
-      Value *Operand = CI->getOperand(i);
+      Value *Operand = CB->getOperand(i);
       instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ true);
     }
 
@@ -3817,7 +3916,8 @@ struct VarArgAMD64Helper : public VarArgHelper {
       // If there is a va_start in this function, make a backup copy of
       // va_arg_tls somewhere in the function entry block.
       IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
-      VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
+      VAArgOverflowSize =
+          IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
       Value *CopySize =
         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
                       VAArgOverflowSize);
@@ -3836,11 +3936,13 @@ struct VarArgAMD64Helper : public VarArgHelper {
       IRBuilder<> IRB(OrigInst->getNextNode());
       Value *VAListTag = OrigInst->getArgOperand(0);
 
+      Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
       Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
                         ConstantInt::get(MS.IntptrTy, 16)),
-          PointerType::get(Type::getInt64PtrTy(*MS.C), 0));
-      Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
+          PointerType::get(RegSaveAreaPtrTy, 0));
+      Value *RegSaveAreaPtr =
+          IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
       unsigned Alignment = 16;
       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
@@ -3851,11 +3953,13 @@ struct VarArgAMD64Helper : public VarArgHelper {
       if (MS.TrackOrigins)
         IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
                          Alignment, AMD64FpEndOffset);
+      Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
       Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
                         ConstantInt::get(MS.IntptrTy, 8)),
-          PointerType::get(Type::getInt64PtrTy(*MS.C), 0));
-      Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr);
+          PointerType::get(OverflowArgAreaPtrTy, 0));
+      Value *OverflowArgAreaPtr =
+          IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
       Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
       std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
           MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
@@ -3957,7 +4061,7 @@ struct VarArgMIPS64Helper : public VarArgHelper {
     assert(!VAArgSize && !VAArgTLSCopy &&
            "finalizeInstrumentation called twice");
     IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
-    VAArgSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
+    VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
     Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
                                     VAArgSize);
 
@@ -3974,10 +4078,12 @@ struct VarArgMIPS64Helper : public VarArgHelper {
       CallInst *OrigInst = VAStartInstrumentationList[i];
       IRBuilder<> IRB(OrigInst->getNextNode());
       Value *VAListTag = OrigInst->getArgOperand(0);
+      Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
       Value *RegSaveAreaPtrPtr =
           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
-                             PointerType::get(Type::getInt64PtrTy(*MS.C), 0));
-      Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
+                             PointerType::get(RegSaveAreaPtrTy, 0));
+      Value *RegSaveAreaPtr =
+          IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
       unsigned Alignment = 8;
       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
@@ -4127,7 +4233,7 @@ struct VarArgAArch64Helper : public VarArgHelper {
         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
                       ConstantInt::get(MS.IntptrTy, offset)),
         Type::getInt64PtrTy(*MS.C));
-    return IRB.CreateLoad(SaveAreaPtrPtr);
+    return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr);
   }
 
   // Retrieve a va_list field of 'int' size.
@@ -4137,7 +4243,7 @@ struct VarArgAArch64Helper : public VarArgHelper {
         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
                       ConstantInt::get(MS.IntptrTy, offset)),
         Type::getInt32PtrTy(*MS.C));
-    Value *SaveArea32 = IRB.CreateLoad(SaveAreaPtr);
+    Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr);
     return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
   }
 
@@ -4148,7 +4254,8 @@ struct VarArgAArch64Helper : public VarArgHelper {
       // If there is a va_start in this function, make a backup copy of
       // va_arg_tls somewhere in the function entry block.
       IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
-      VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
+      VAArgOverflowSize =
+          IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
       Value *CopySize =
         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset),
                       VAArgOverflowSize);
@@ -4391,7 +4498,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
     assert(!VAArgSize && !VAArgTLSCopy &&
            "finalizeInstrumentation called twice");
     IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
-    VAArgSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
+    VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
     Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
                                     VAArgSize);
 
@@ -4408,10 +4515,12 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
       CallInst *OrigInst = VAStartInstrumentationList[i];
       IRBuilder<> IRB(OrigInst->getNextNode());
       Value *VAListTag = OrigInst->getArgOperand(0);
+      Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
       Value *RegSaveAreaPtrPtr =
           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
-                             PointerType::get(Type::getInt64PtrTy(*MS.C), 0));
-      Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
+                             PointerType::get(RegSaveAreaPtrTy, 0));
+      Value *RegSaveAreaPtr =
+          IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
       unsigned Alignment = 8;
       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
@@ -4458,6 +4567,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
 }
 
 bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
+  if (!CompileKernel && (&F == MsanCtorFunction))
+    return false;
   MemorySanitizerVisitor Visitor(F, *this, TLI);
 
   // Clear out readonly/readnone attributes.
diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index f043325f5bba..6fec3c9c79ee 100644
--- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -1,9 +1,8 @@
 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -48,7 +47,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
 #include "CFGMST.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -66,6 +64,7 @@
 #include "llvm/Analysis/IndirectCallVisitor.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
@@ -107,6 +106,7 @@
 #include "llvm/Support/JamCRC.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include <algorithm>
 #include <cassert>
@@ -133,6 +133,19 @@ STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
 STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
+STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
+STATISTIC(NumOfCSPGOSelectInsts,
+          "Number of select instruction instrumented in CSPGO.");
+STATISTIC(NumOfCSPGOMemIntrinsics,
+          "Number of mem intrinsics instrumented in CSPGO.");
+STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
+STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
+STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
+STATISTIC(NumOfCSPGOFunc,
+          "Number of functions having valid profile counts in CSPGO.");
+STATISTIC(NumOfCSPGOMismatch,
+          "Number of functions having mismatch profile in CSPGO.");
+STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
 
 // Command line option to specify the file to read profile from. This is
 // mainly used for testing.
@@ -384,7 +397,8 @@ class PGOInstrumentationGenLegacyPass : public ModulePass {
 public:
   static char ID;
 
-  PGOInstrumentationGenLegacyPass() : ModulePass(ID) {
+  PGOInstrumentationGenLegacyPass(bool IsCS = false)
+      : ModulePass(ID), IsCS(IsCS) {
     initializePGOInstrumentationGenLegacyPassPass(
         *PassRegistry::getPassRegistry());
   }
@@ -392,6 +406,8 @@ public:
   StringRef getPassName() const override { return "PGOInstrumentationGenPass"; }
 
 private:
+  // Is this is context-sensitive instrumentation.
+  bool IsCS;
   bool runOnModule(Module &M) override;
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -404,8 +420,8 @@ public:
   static char ID;
 
   // Provide the profile filename as the parameter.
-  PGOInstrumentationUseLegacyPass(std::string Filename = "")
-      : ModulePass(ID), ProfileFileName(std::move(Filename)) {
+  PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false)
+      : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) {
     if (!PGOTestProfileFile.empty())
       ProfileFileName = PGOTestProfileFile;
     initializePGOInstrumentationUseLegacyPassPass(
@@ -416,14 +432,38 @@ public:
 
 private:
   std::string ProfileFileName;
+  // Is this is context-sensitive instrumentation use.
+  bool IsCS;
 
   bool runOnModule(Module &M) override;
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
     AU.addRequired<BlockFrequencyInfoWrapperPass>();
   }
 };
 
+class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass {
+public:
+  static char ID;
+  StringRef getPassName() const override {
+    return "PGOInstrumentationGenCreateVarPass";
+  }
+  PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "")
+      : ModulePass(ID), InstrProfileOutput(CSInstrName) {
+    initializePGOInstrumentationGenCreateVarLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+private:
+  bool runOnModule(Module &M) override {
+    createProfileFileNameVar(M, InstrProfileOutput);
+    createIRLevelProfileFlagVar(M, true);
+    return false;
+  }
+  std::string InstrProfileOutput;
+};
+
 } // end anonymous namespace
 
 char PGOInstrumentationGenLegacyPass::ID = 0;
@@ -435,8 +475,8 @@ INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
                     "PGO instrumentation.", false, false)
 
-ModulePass *llvm::createPGOInstrumentationGenLegacyPass() {
-  return new PGOInstrumentationGenLegacyPass();
+ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) {
+  return new PGOInstrumentationGenLegacyPass(IsCS);
 }
 
 char PGOInstrumentationUseLegacyPass::ID = 0;
@@ -445,11 +485,25 @@ INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
                       "Read PGO instrumentation profile.", false, false)
 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
                     "Read PGO instrumentation profile.", false, false)
 
-ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename) {
-  return new PGOInstrumentationUseLegacyPass(Filename.str());
+ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename,
+                                                        bool IsCS) {
+  return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS);
+}
+
+char PGOInstrumentationGenCreateVarLegacyPass::ID = 0;
+
+INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass,
+                "pgo-instr-gen-create-var",
+                "Create PGO instrumentation version variable for CSPGO.", false,
+                false)
+
+ModulePass *
+llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) {
+  return new PGOInstrumentationGenCreateVarLegacyPass(CSInstrName);
 }
 
 namespace {
@@ -490,6 +544,12 @@ struct BBInfo {
   const std::string infoString() const {
     return (Twine("Index=") + Twine(Index)).str();
   }
+
+  // Empty function -- only applicable to UseBBInfo.
+  void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
+
+  // Empty function -- only applicable to UseBBInfo.
+  void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
 };
 
 // This class implements the CFG edges. Note the CFG can be a multi-graph.
@@ -497,6 +557,9 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
 private:
   Function &F;
 
+  // Is this is context-sensitive instrumentation.
+  bool IsCS;
+
   // A map that stores the Comdat group in function F.
   std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
 
@@ -516,6 +579,10 @@ public:
   // The Minimum Spanning Tree of function CFG.
   CFGMST<Edge, BBInfo> MST;
 
+  // Collect all the BBs that will be instrumented, and store them in
+  // InstrumentBBs.
+  void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
+
   // Give an edge, find the BB that will be instrumented.
   // Return nullptr if there is no BB to be instrumented.
   BasicBlock *getInstrBB(Edge *E);
@@ -536,15 +603,23 @@ public:
       Function &Func,
       std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
       bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
-      BlockFrequencyInfo *BFI = nullptr)
-      : F(Func), ComdatMembers(ComdatMembers), ValueSites(IPVK_Last + 1),
-        SIVisitor(Func), MIVisitor(Func), MST(F, BPI, BFI) {
+      BlockFrequencyInfo *BFI = nullptr, bool IsCS = false)
+      : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers),
+        ValueSites(IPVK_Last + 1), SIVisitor(Func), MIVisitor(Func),
+        MST(F, BPI, BFI) {
     // This should be done before CFG hash computation.
     SIVisitor.countSelects(Func);
     MIVisitor.countMemIntrinsics(Func);
-    NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
-    NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
-    ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func);
+    if (!IsCS) {
+      NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
+      NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
+      NumOfPGOBB += MST.BBInfos.size();
+      ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func);
+    } else {
+      NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
+      NumOfCSPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
+      NumOfCSPGOBB += MST.BBInfos.size();
+    }
     ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func);
 
     FuncName = getPGOFuncName(F);
@@ -553,28 +628,17 @@ public:
       renameComdatFunction();
     LLVM_DEBUG(dumpInfo("after CFGMST"));
 
-    NumOfPGOBB += MST.BBInfos.size();
     for (auto &E : MST.AllEdges) {
       if (E->Removed)
         continue;
-      NumOfPGOEdge++;
+      IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
       if (!E->InMST)
-        NumOfPGOInstrument++;
+        IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
     }
 
     if (CreateGlobalVar)
       FuncNameVar = createPGOFuncNameVar(F, FuncName);
   }
-
-  // Return the number of profile counters needed for the function.
-  unsigned getNumCounters() {
-    unsigned NumCounters = 0;
-    for (auto &E : this->MST.AllEdges) {
-      if (!E->InMST && !E->Removed)
-        NumCounters++;
-    }
-    return NumCounters + SIVisitor.getNumOfSelectInsts();
-  }
 };
 
 } // end anonymous namespace
@@ -598,9 +662,17 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
     }
   }
   JC.update(Indexes);
+
+  // Hash format for context sensitive profile. Reserve 4 bits for other
+  // information.
   FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
                  (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
+                 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
                  (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
+  // Reserve bit 60-63 for other information purpose.
+  FunctionHash &= 0x0FFFFFFFFFFFFFFF;
+  if (IsCS)
+    NamedInstrProfRecord::setCSFlagInHash(FunctionHash);
   LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
                     << " CRC = " << JC.getCRC()
                     << ", Selects = " << SIVisitor.getNumOfSelectInsts()
@@ -681,6 +753,36 @@ void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
   }
 }
 
+// Collect all the BBs that will be instruments and return them in
+// InstrumentBBs and setup InEdges/OutEdge for UseBBInfo.
+template <class Edge, class BBInfo>
+void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
+    std::vector<BasicBlock *> &InstrumentBBs) {
+  // Use a worklist as we will update the vector during the iteration.
+  std::vector<Edge *> EdgeList;
+  EdgeList.reserve(MST.AllEdges.size());
+  for (auto &E : MST.AllEdges)
+    EdgeList.push_back(E.get());
+
+  for (auto &E : EdgeList) {
+    BasicBlock *InstrBB = getInstrBB(E);
+    if (InstrBB)
+      InstrumentBBs.push_back(InstrBB);
+  }
+
+  // Set up InEdges/OutEdges for all BBs.
+  for (auto &E : MST.AllEdges) {
+    if (E->Removed)
+      continue;
+    const BasicBlock *SrcBB = E->SrcBB;
+    const BasicBlock *DestBB = E->DestBB;
+    BBInfo &SrcInfo = getBBInfo(SrcBB);
+    BBInfo &DestInfo = getBBInfo(DestBB);
+    SrcInfo.addOutEdge(E.get());
+    DestInfo.addInEdge(E.get());
+  }
+}
+
 // Given a CFG E to be instrumented, find which BB to place the instrumented
 // code. The function will split the critical edge if necessary.
 template <class Edge, class BBInfo>
@@ -696,46 +798,64 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
   if (DestBB == nullptr)
     return SrcBB;
 
+  auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
+    // There are basic blocks (such as catchswitch) cannot be instrumented.
+    // If the returned first insertion point is the end of BB, skip this BB.
+    if (BB->getFirstInsertionPt() == BB->end())
+      return nullptr;
+    return BB;
+  };
+
   // Instrument the SrcBB if it has a single successor,
   // otherwise, the DestBB if this is not a critical edge.
   Instruction *TI = SrcBB->getTerminator();
   if (TI->getNumSuccessors() <= 1)
-    return SrcBB;
+    return canInstrument(SrcBB);
   if (!E->IsCritical)
-    return DestBB;
+    return canInstrument(DestBB);
 
+  unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
+  BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
+  if (!InstrBB) {
+    LLVM_DEBUG(
+        dbgs() << "Fail to split critical edge: not instrument this edge.\n");
+    return nullptr;
+  }
   // For a critical edge, we have to split. Instrument the newly
   // created BB.
-  NumOfPGOSplit++;
+  IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
   LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
                     << " --> " << getBBInfo(DestBB).Index << "\n");
-  unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
-  BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
-  assert(InstrBB && "Critical edge is not split");
-
+  // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
+  MST.addEdge(SrcBB, InstrBB, 0);
+  // Second one: Add new edge of InstrBB->DestBB.
+  Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
+  NewEdge1.InMST = true;
   E->Removed = true;
-  return InstrBB;
+
+  return canInstrument(InstrBB);
 }
 
 // Visit all edge and instrument the edges not in MST, and do value profiling.
 // Critical edges will be split.
 static void instrumentOneFunc(
     Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI,
-    std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
+    std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
+    bool IsCS) {
   // Split indirectbr critical edges here before computing the MST rather than
   // later in getInstrBB() to avoid invalidating it.
   SplitIndirectBrCriticalEdges(F, BPI, BFI);
+
   FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI,
-                                                   BFI);
-  unsigned NumCounters = FuncInfo.getNumCounters();
+                                                   BFI, IsCS);
+  std::vector<BasicBlock *> InstrumentBBs;
+  FuncInfo.getInstrumentBBs(InstrumentBBs);
+  unsigned NumCounters =
+      InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
 
   uint32_t I = 0;
   Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
-  for (auto &E : FuncInfo.MST.AllEdges) {
-    BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get());
-    if (!InstrBB)
-      continue;
-
+  for (auto *InstrBB : InstrumentBBs) {
     IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
     assert(Builder.GetInsertPoint() != InstrBB->end() &&
            "Cannot get the Instrumentation point");
@@ -831,6 +951,18 @@ struct UseBBInfo : public BBInfo {
       return BBInfo::infoString();
     return (Twine(BBInfo::infoString()) + "  Count=" + Twine(CountValue)).str();
   }
+
+  // Add an OutEdge and update the edge count.
+  void addOutEdge(PGOUseEdge *E) {
+    OutEdges.push_back(E);
+    UnknownCountOutEdge++;
+  }
+
+  // Add an InEdge and update the edge count.
+  void addInEdge(PGOUseEdge *E) {
+    InEdges.push_back(E);
+    UnknownCountInEdge++;
+  }
 };
 
 } // end anonymous namespace
@@ -853,10 +985,10 @@ public:
   PGOUseFunc(Function &Func, Module *Modu,
              std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
              BranchProbabilityInfo *BPI = nullptr,
-             BlockFrequencyInfo *BFIin = nullptr)
+             BlockFrequencyInfo *BFIin = nullptr, bool IsCS = false)
       : F(Func), M(Modu), BFI(BFIin),
-        FuncInfo(Func, ComdatMembers, false, BPI, BFIin),
-        FreqAttr(FFA_Normal) {}
+        FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS),
+        FreqAttr(FFA_Normal), IsCS(IsCS) {}
 
   // Read counts for the instrumented BB from profile.
   bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros);
@@ -929,8 +1061,11 @@ private:
   // Function hotness info derived from profile.
   FuncFreqAttr FreqAttr;
 
-  // Find the Instrumented BB and set the value.
-  void setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
+  // Is to use the context sensitive profile.
+  bool IsCS;
+
+  // Find the Instrumented BB and set the value. Return false on error.
+  bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
 
   // Set the edge counter value for the unknown edge -- there should be only
   // one unknown edge.
@@ -959,41 +1094,64 @@ private:
 } // end anonymous namespace
 
 // Visit all the edges and assign the count value for the instrumented
-// edges and the BB.
-void PGOUseFunc::setInstrumentedCounts(
+// edges and the BB. Return false on error.
+bool PGOUseFunc::setInstrumentedCounts(
     const std::vector<uint64_t> &CountFromProfile) {
-  assert(FuncInfo.getNumCounters() == CountFromProfile.size());
-  // Use a worklist as we will update the vector during the iteration.
-  std::vector<PGOUseEdge *> WorkList;
-  for (auto &E : FuncInfo.MST.AllEdges)
-    WorkList.push_back(E.get());
 
+  std::vector<BasicBlock *> InstrumentBBs;
+  FuncInfo.getInstrumentBBs(InstrumentBBs);
+  unsigned NumCounters =
+      InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
+  // The number of counters here should match the number of counters
+  // in profile. Return if they mismatch.
+  if (NumCounters != CountFromProfile.size()) {
+    return false;
+  }
+  // Set the profile count to the Instrumented BBs.
   uint32_t I = 0;
-  for (auto &E : WorkList) {
-    BasicBlock *InstrBB = FuncInfo.getInstrBB(E);
-    if (!InstrBB)
-      continue;
+  for (BasicBlock *InstrBB : InstrumentBBs) {
     uint64_t CountValue = CountFromProfile[I++];
-    if (!E->Removed) {
-      getBBInfo(InstrBB).setBBInfoCount(CountValue);
-      E->setEdgeCount(CountValue);
-      continue;
-    }
-
-    // Need to add two new edges.
-    BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
-    BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
-    // Add new edge of SrcBB->InstrBB.
-    PGOUseEdge &NewEdge = FuncInfo.MST.addEdge(SrcBB, InstrBB, 0);
-    NewEdge.setEdgeCount(CountValue);
-    // Add new edge of InstrBB->DestBB.
-    PGOUseEdge &NewEdge1 = FuncInfo.MST.addEdge(InstrBB, DestBB, 0);
-    NewEdge1.setEdgeCount(CountValue);
-    NewEdge1.InMST = true;
-    getBBInfo(InstrBB).setBBInfoCount(CountValue);
+    UseBBInfo &Info = getBBInfo(InstrBB);
+    Info.setBBInfoCount(CountValue);
   }
   ProfileCountSize = CountFromProfile.size();
   CountPosition = I;
+
+  // Set the edge count and update the count of unknown edges for BBs.
+  auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
+    E->setEdgeCount(Value);
+    this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
+    this->getBBInfo(E->DestBB).UnknownCountInEdge--;
+  };
+
+  // Set the profile count the Instrumented edges. There are BBs that not in
+  // MST but not instrumented. Need to set the edge count value so that we can
+  // populate the profile counts later.
+  for (auto &E : FuncInfo.MST.AllEdges) {
+    if (E->Removed || E->InMST)
+      continue;
+    const BasicBlock *SrcBB = E->SrcBB;
+    UseBBInfo &SrcInfo = getBBInfo(SrcBB);
+
+    // If only one out-edge, the edge profile count should be the same as BB
+    // profile count.
+    if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1)
+      setEdgeCount(E.get(), SrcInfo.CountValue);
+    else {
+      const BasicBlock *DestBB = E->DestBB;
+      UseBBInfo &DestInfo = getBBInfo(DestBB);
+      // If only one in-edge, the edge profile count should be the same as BB
+      // profile count.
+      if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
+        setEdgeCount(E.get(), DestInfo.CountValue);
+    }
+    if (E->CountValid)
+      continue;
+    // E's count should have been set from profile. If not, this meenas E skips
+    // the instrumentation. We set the count to 0.
+    setEdgeCount(E.get(), 0);
+  }
+  return true;
 }
 
 // Set the count value for the unknown edge. There should be one and only one
@@ -1022,23 +1180,31 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros)
     handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
       auto Err = IPE.get();
       bool SkipWarning = false;
+      LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
+                        << FuncInfo.FuncName << ": ");
       if (Err == instrprof_error::unknown_function) {
-        NumOfPGOMissing++;
+        IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
         SkipWarning = !PGOWarnMissing;
+        LLVM_DEBUG(dbgs() << "unknown function");
       } else if (Err == instrprof_error::hash_mismatch ||
                  Err == instrprof_error::malformed) {
-        NumOfPGOMismatch++;
+        IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
         SkipWarning =
             NoPGOWarnMismatch ||
             (NoPGOWarnMismatchComdat &&
              (F.hasComdat() ||
               F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
+        LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
       }
 
+      LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
       if (SkipWarning)
         return;
 
-      std::string Msg = IPE.message() + std::string(" ") + F.getName().str();
+      std::string Msg = IPE.message() + std::string(" ") + F.getName().str() +
+                        std::string(" Hash = ") +
+                        std::to_string(FuncInfo.FunctionHash);
+
       Ctx.diagnose(
           DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
     });
@@ -1047,7 +1213,7 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros)
   ProfileRecord = std::move(Result.get());
   std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
 
-  NumOfPGOFunc++;
+  IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
   LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
   uint64_t ValueSum = 0;
   for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
@@ -1061,34 +1227,23 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros)
   getBBInfo(nullptr).UnknownCountOutEdge = 2;
   getBBInfo(nullptr).UnknownCountInEdge = 2;
 
-  setInstrumentedCounts(CountFromProfile);
-  ProgramMaxCount = PGOReader->getMaximumFunctionCount();
+  if (!setInstrumentedCounts(CountFromProfile)) {
+    LLVM_DEBUG(
+        dbgs() << "Inconsistent number of counts, skipping this function");
+    Ctx.diagnose(DiagnosticInfoPGOProfile(
+        M->getName().data(),
+        Twine("Inconsistent number of counts in ") + F.getName().str()
+        + Twine(": the profile may be stale or there is a function name collision."),
+        DS_Warning));
+    return false;
+  }
+  ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
   return true;
 }
 
 // Populate the counters from instrumented BBs to all BBs.
 // In the end of this operation, all BBs should have a valid count value.
 void PGOUseFunc::populateCounters() {
-  // First set up Count variable for all BBs.
-  for (auto &E : FuncInfo.MST.AllEdges) {
-    if (E->Removed)
-      continue;
-
-    const BasicBlock *SrcBB = E->SrcBB;
-    const BasicBlock *DestBB = E->DestBB;
-    UseBBInfo &SrcInfo = getBBInfo(SrcBB);
-    UseBBInfo &DestInfo = getBBInfo(DestBB);
-    SrcInfo.OutEdges.push_back(E.get());
-    DestInfo.InEdges.push_back(E.get());
-    SrcInfo.UnknownCountOutEdge++;
-    DestInfo.UnknownCountInEdge++;
-
-    if (!E->CountValid)
-      continue;
-    DestInfo.UnknownCountInEdge--;
-    SrcInfo.UnknownCountOutEdge--;
-  }
-
   bool Changes = true;
   unsigned NumPasses = 0;
   while (Changes) {
@@ -1167,7 +1322,8 @@ void PGOUseFunc::populateCounters() {
 // Assign the scaled count values to the BB with multiple out edges.
 void PGOUseFunc::setBranchWeights() {
   // Generate MD_prof metadata for every branch instruction.
-  LLVM_DEBUG(dbgs() << "\nSetting branch weights.\n");
+  LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
+                    << " IsCS=" << IsCS << "\n");
   for (auto &BB : F) {
     Instruction *TI = BB.getTerminator();
     if (TI->getNumSuccessors() < 2)
@@ -1175,6 +1331,7 @@ void PGOUseFunc::setBranchWeights() {
     if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
           isa<IndirectBrInst>(TI)))
       continue;
+
     if (getBBInfo(&BB).CountValue == 0)
       continue;
 
@@ -1282,7 +1439,7 @@ void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) {
   Type *Int64Ty = Builder.getInt64Ty();
   Type *I8PtrTy = Builder.getInt8PtrTy();
   Value *Length = MI.getLength();
-  assert(!dyn_cast<ConstantInt>(Length));
+  assert(!isa<ConstantInt>(Length));
   Builder.CreateCall(
       Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
       {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
@@ -1325,8 +1482,14 @@ void PGOUseFunc::annotateValueSites() {
     annotateValueSites(Kind);
 }
 
+static const char *ValueProfKindDescr[] = {
+#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+
 // Annotate the instructions for a specific value kind.
 void PGOUseFunc::annotateValueSites(uint32_t Kind) {
+  assert(Kind <= IPVK_Last);
   unsigned ValueSiteIndex = 0;
   auto &ValueSites = FuncInfo.ValueSites[Kind];
   unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
@@ -1334,8 +1497,10 @@ void PGOUseFunc::annotateValueSites(uint32_t Kind) {
     auto &Ctx = M->getContext();
     Ctx.diagnose(DiagnosticInfoPGOProfile(
         M->getName().data(),
-        Twine("Inconsistent number of value sites for kind = ") + Twine(Kind) +
-            " in " + F.getName().str(),
+        Twine("Inconsistent number of value sites for ") +
+            Twine(ValueProfKindDescr[Kind]) +
+            Twine(" profiling in \"") + F.getName().str() +
+            Twine("\", possibly due to the use of a stale profile."),
         DS_Warning));
     return;
   }
@@ -1352,24 +1517,6 @@ void PGOUseFunc::annotateValueSites(uint32_t Kind) {
   }
 }
 
-// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
-// aware this is an ir_level profile so it can set the version flag.
-static void createIRLevelProfileFlagVariable(Module &M) {
-  Type *IntTy64 = Type::getInt64Ty(M.getContext());
-  uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
-  auto IRLevelVersionVariable = new GlobalVariable(
-      M, IntTy64, true, GlobalVariable::ExternalLinkage,
-      Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)),
-      INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
-  IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility);
-  Triple TT(M.getTargetTriple());
-  if (!TT.supportsCOMDAT())
-    IRLevelVersionVariable->setLinkage(GlobalValue::WeakAnyLinkage);
-  else
-    IRLevelVersionVariable->setComdat(M.getOrInsertComdat(
-        StringRef(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR))));
-}
-
 // Collect the set of members for each Comdat in module M and store
 // in ComdatMembers.
 static void collectComdatMembers(
@@ -1390,8 +1537,11 @@ static void collectComdatMembers(
 
 static bool InstrumentAllFunctions(
     Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
-    function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) {
-  createIRLevelProfileFlagVariable(M);
+    function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
+  // For the context-sensitve instrumentation, we should have a separated pass
+  // (before LTO/ThinLTO linking) to create these variables.
+  if (!IsCS)
+    createIRLevelProfileFlagVar(M, /* IsCS */ false);
   std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
   collectComdatMembers(M, ComdatMembers);
 
@@ -1400,11 +1550,18 @@ static bool InstrumentAllFunctions(
       continue;
     auto *BPI = LookupBPI(F);
     auto *BFI = LookupBFI(F);
-    instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers);
+    instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS);
   }
   return true;
 }
 
+PreservedAnalyses
+PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
+  createProfileFileNameVar(M, CSInstrName);
+  createIRLevelProfileFlagVar(M, /* IsCS */ true);
+  return PreservedAnalyses::all();
+}
+
 bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) {
   if (skipModule(M))
     return false;
@@ -1415,7 +1572,7 @@ bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) {
   auto LookupBFI = [this](Function &F) {
     return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
   };
-  return InstrumentAllFunctions(M, LookupBPI, LookupBFI);
+  return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS);
 }
 
 PreservedAnalyses PGOInstrumentationGen::run(Module &M,
@@ -1429,7 +1586,7 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M,
     return &FAM.getResult<BlockFrequencyAnalysis>(F);
   };
 
-  if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI))
+  if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS))
     return PreservedAnalyses::all();
 
   return PreservedAnalyses::none();
@@ -1438,7 +1595,7 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M,
 static bool annotateAllFunctions(
     Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
     function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
-    function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) {
+    function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
   LLVM_DEBUG(dbgs() << "Read in profile counters: ");
   auto &Ctx = M.getContext();
   // Read the counter array from file.
@@ -1459,6 +1616,9 @@ static bool annotateAllFunctions(
                                           StringRef("Cannot get PGOReader")));
     return false;
   }
+  if (!PGOReader->hasCSIRLevelProfile() && IsCS)
+    return false;
+
   // TODO: might need to change the warning once the clang option is finalized.
   if (!PGOReader->isIRLevelProfile()) {
     Ctx.diagnose(DiagnosticInfoPGOProfile(
@@ -1478,7 +1638,7 @@ static bool annotateAllFunctions(
     // Split indirectbr critical edges here before computing the MST rather than
     // later in getInstrBB() to avoid invalidating it.
     SplitIndirectBrCriticalEdges(F, BPI, BFI);
-    PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI);
+    PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, IsCS);
     bool AllZeros = false;
     if (!Func.readCounters(PGOReader.get(), AllZeros))
       continue;
@@ -1526,7 +1686,10 @@ static bool annotateAllFunctions(
       }
     }
   }
-  M.setProfileSummary(PGOReader->getSummary().getMD(M.getContext()));
+  M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
+                      IsCS ? ProfileSummary::PSK_CSInstr
+                           : ProfileSummary::PSK_Instr);
+
   // Set function hotness attribute from the profile.
   // We have to apply these attributes at the end because their presence
   // can affect the BranchProbabilityInfo of any callers, resulting in an
@@ -1545,9 +1708,10 @@ static bool annotateAllFunctions(
 }
 
 PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename,
-                                             std::string RemappingFilename)
+                                             std::string RemappingFilename,
+                                             bool IsCS)
     : ProfileFileName(std::move(Filename)),
-      ProfileRemappingFileName(std::move(RemappingFilename)) {
+      ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) {
   if (!PGOTestProfileFile.empty())
     ProfileFileName = PGOTestProfileFile;
   if (!PGOTestProfileRemappingFile.empty())
@@ -1567,7 +1731,7 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M,
   };
 
   if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
-                            LookupBPI, LookupBFI))
+                            LookupBPI, LookupBFI, IsCS))
     return PreservedAnalyses::all();
 
   return PreservedAnalyses::none();
@@ -1584,7 +1748,8 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
     return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
   };
 
-  return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI);
+  return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI,
+                              IsCS);
 }
 
 static std::string getSimpleNodeName(const BasicBlock *Node) {
diff --git a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
index 2c71e75dadcc..188f95b4676b 100644
--- a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
+++ b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
@@ -1,9 +1,8 @@
 //===-- PGOMemOPSizeOpt.cpp - Optimizations based on value profiling ===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -20,12 +19,12 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
diff --git a/lib/Transforms/Instrumentation/PoisonChecking.cpp b/lib/Transforms/Instrumentation/PoisonChecking.cpp
new file mode 100644
index 000000000000..81d92e724c7d
--- /dev/null
+++ b/lib/Transforms/Instrumentation/PoisonChecking.cpp
@@ -0,0 +1,357 @@
+//===- PoisonChecking.cpp - -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements a transform pass which instruments IR such that poison semantics
+// are made explicit.  That is, it provides a (possibly partial) executable
+// semantics for every instruction w.r.t. poison as specified in the LLVM
+// LangRef.  There are obvious parallels to the sanitizer tools, but this pass
+// is focused purely on the semantics of LLVM IR, not any particular source
+// language.   If you're looking for something to see if your C/C++ contains
+// UB, this is not it.  
+// 
+// The rewritten semantics of each instruction will include the following
+// components: 
+//
+// 1) The original instruction, unmodified.
+// 2) A propagation rule which translates dynamic information about the poison
+//    state of each input to whether the dynamic output of the instruction
+//    produces poison.
+// 3) A flag validation rule which validates any poison producing flags on the
+//    instruction itself (e.g. checks for overflow on nsw).
+// 4) A check rule which traps (to a handler function) if this instruction must
+//    execute undefined behavior given the poison state of it's inputs.
+//
+// At the moment, the UB detection is done in a best effort manner; that is,
+// the resulting code may produce a false negative result (not report UB when
+// it actually exists according to the LangRef spec), but should never produce
+// a false positive (report UB where it doesn't exist).  The intention is to
+// eventually support a "strict" mode which never dynamically reports a false
+// negative at the cost of rejecting some valid inputs to translation.
+//
+// Use cases for this pass include:
+// - Understanding (and testing!) the implications of the definition of poison
+//   from the LangRef.
+// - Validating the output of a IR fuzzer to ensure that all programs produced
+//   are well defined on the specific input used.
+// - Finding/confirming poison specific miscompiles by checking the poison
+//   status of an input/IR pair is the same before and after an optimization
+//   transform. 
+// - Checking that a bugpoint reduction does not introduce UB which didn't
+//   exist in the original program being reduced.
+//
+// The major sources of inaccuracy are currently:
+// - Most validation rules not yet implemented for instructions with poison
+//   relavant flags.  At the moment, only nsw/nuw on add/sub are supported.
+// - UB which is control dependent on a branch on poison is not yet
+//   reported. Currently, only data flow dependence is modeled.
+// - Poison which is propagated through memory is not modeled.  As such,
+//   storing poison to memory and then reloading it will cause a false negative
+//   as we consider the reloaded value to not be poisoned.
+// - Poison propagation across function boundaries is not modeled.  At the
+//   moment, all arguments and return values are assumed not to be poison.
+// - Undef is not modeled.  In particular, the optimizer's freedom to pick
+//   concrete values for undef bits so as to maximize potential for producing
+//   poison is not modeled.  
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "poison-checking"
+
+static cl::opt<bool>
+LocalCheck("poison-checking-function-local",
+           cl::init(false),
+           cl::desc("Check that returns are non-poison (for testing)"));
+
+
+static bool isConstantFalse(Value* V) {
+  assert(V->getType()->isIntegerTy(1));
+  if (auto *CI = dyn_cast<ConstantInt>(V))
+    return CI->isZero();
+  return false;
+}
+
+static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
+  if (Ops.size() == 0)
+    return B.getFalse();
+  unsigned i = 0;
+  for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
+  if (i == Ops.size())
+    return B.getFalse();
+  Value *Accum = Ops[i++];
+  for (; i < Ops.size(); i++)
+    if (!isConstantFalse(Ops[i]))
+      Accum = B.CreateOr(Accum, Ops[i]);
+  return Accum;
+}
+
+static void generatePoisonChecksForBinOp(Instruction &I,
+                                         SmallVector<Value*, 2> &Checks) {
+  assert(isa<BinaryOperator>(I));
+  
+  IRBuilder<> B(&I);
+  Value *LHS = I.getOperand(0);
+  Value *RHS = I.getOperand(1);
+  switch (I.getOpcode()) {
+  default:
+    return;
+  case Instruction::Add: {
+    if (I.hasNoSignedWrap()) {
+      auto *OverflowOp =
+        B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
+      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+    }
+    if (I.hasNoUnsignedWrap()) {
+      auto *OverflowOp =
+        B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
+      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+    }
+    break;
+  }
+  case Instruction::Sub: {
+    if (I.hasNoSignedWrap()) {
+      auto *OverflowOp =
+        B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
+      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+    }
+    if (I.hasNoUnsignedWrap()) {
+      auto *OverflowOp =
+        B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
+      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+    }
+    break;
+  }
+  case Instruction::Mul: {
+    if (I.hasNoSignedWrap()) {
+      auto *OverflowOp =
+        B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
+      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+    }
+    if (I.hasNoUnsignedWrap()) {
+      auto *OverflowOp =
+        B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
+      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+    }
+    break;
+  }
+  case Instruction::UDiv: {
+    if (I.isExact()) {
+      auto *Check =
+        B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
+                     ConstantInt::get(LHS->getType(), 0));
+      Checks.push_back(Check);
+    }
+    break;
+  }
+  case Instruction::SDiv: {
+    if (I.isExact()) {
+      auto *Check =
+        B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
+                     ConstantInt::get(LHS->getType(), 0));
+      Checks.push_back(Check);
+    }
+    break;
+  }
+  case Instruction::AShr:
+  case Instruction::LShr:
+  case Instruction::Shl: {
+    Value *ShiftCheck =
+      B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
+                   ConstantInt::get(RHS->getType(),
+                                    LHS->getType()->getScalarSizeInBits()));
+    Checks.push_back(ShiftCheck);
+    break;
+  }
+  };
+}
+
+static Value* generatePoisonChecks(Instruction &I) {
+  IRBuilder<> B(&I);
+  SmallVector<Value*, 2> Checks;
+  if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
+    generatePoisonChecksForBinOp(I, Checks);
+
+  // Handle non-binops seperately
+  switch (I.getOpcode()) {
+  default:
+    break;
+  case Instruction::ExtractElement: {
+    Value *Vec = I.getOperand(0);
+    if (Vec->getType()->getVectorIsScalable())
+      break;
+    Value *Idx = I.getOperand(1);
+    unsigned NumElts = Vec->getType()->getVectorNumElements();
+    Value *Check =
+      B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
+                   ConstantInt::get(Idx->getType(), NumElts));
+    Checks.push_back(Check);
+    break;
+  }
+  case Instruction::InsertElement: {
+    Value *Vec = I.getOperand(0);
+    if (Vec->getType()->getVectorIsScalable())
+      break;
+    Value *Idx = I.getOperand(2);
+    unsigned NumElts = Vec->getType()->getVectorNumElements();
+    Value *Check =
+      B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
+                   ConstantInt::get(Idx->getType(), NumElts));
+    Checks.push_back(Check);
+    break;
+  }
+  };
+  return buildOrChain(B, Checks);
+}
+
+static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
+  auto Itr = ValToPoison.find(V);
+  if (Itr != ValToPoison.end())
+    return Itr->second;
+  if (isa<Constant>(V)) {
+    return ConstantInt::getFalse(V->getContext());
+  }
+  // Return false for unknwon values - this implements a non-strict mode where
+  // unhandled IR constructs are simply considered to never produce poison.  At
+  // some point in the future, we probably want a "strict mode" for testing if
+  // nothing else.
+  return ConstantInt::getFalse(V->getContext());
+}
+
+static void CreateAssert(IRBuilder<> &B, Value *Cond) {
+  assert(Cond->getType()->isIntegerTy(1));
+  if (auto *CI = dyn_cast<ConstantInt>(Cond))
+    if (CI->isAllOnesValue())
+      return;
+
+  Module *M = B.GetInsertBlock()->getModule();
+  M->getOrInsertFunction("__poison_checker_assert",
+                         Type::getVoidTy(M->getContext()),
+                         Type::getInt1Ty(M->getContext()));
+  Function *TrapFunc = M->getFunction("__poison_checker_assert");
+  B.CreateCall(TrapFunc, Cond);
+}
+
+static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
+  assert(Cond->getType()->isIntegerTy(1));
+  CreateAssert(B, B.CreateNot(Cond));
+}
+
+static bool rewrite(Function &F) {
+  auto * const Int1Ty = Type::getInt1Ty(F.getContext());
+
+  DenseMap<Value *, Value *> ValToPoison;
+
+  for (BasicBlock &BB : F)
+    for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
+      auto *OldPHI = cast<PHINode>(&*I);
+      auto *NewPHI = PHINode::Create(Int1Ty, 
+                                     OldPHI->getNumIncomingValues());
+      for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
+        NewPHI->addIncoming(UndefValue::get(Int1Ty),
+                            OldPHI->getIncomingBlock(i));
+      NewPHI->insertBefore(OldPHI);
+      ValToPoison[OldPHI] = NewPHI;
+    }
+  
+  for (BasicBlock &BB : F)
+    for (Instruction &I : BB) {
+      if (isa<PHINode>(I)) continue;
+
+      IRBuilder<> B(cast<Instruction>(&I));
+      
+      // Note: There are many more sources of documented UB, but this pass only
+      // attempts to find UB triggered by propagation of poison.
+      if (Value *Op = const_cast<Value*>(getGuaranteedNonFullPoisonOp(&I)))
+        CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
+
+      if (LocalCheck)
+        if (auto *RI = dyn_cast<ReturnInst>(&I))
+          if (RI->getNumOperands() != 0) {
+            Value *Op = RI->getOperand(0);
+            CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
+          }
+
+      SmallVector<Value*, 4> Checks;
+      if (propagatesFullPoison(&I))
+        for (Value *V : I.operands())
+          Checks.push_back(getPoisonFor(ValToPoison, V));
+
+      if (auto *Check = generatePoisonChecks(I))
+        Checks.push_back(Check);
+      ValToPoison[&I] = buildOrChain(B, Checks);
+    }
+
+  for (BasicBlock &BB : F)
+    for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
+      auto *OldPHI = cast<PHINode>(&*I);
+      if (!ValToPoison.count(OldPHI))
+        continue; // skip the newly inserted phis
+      auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
+      for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
+        auto *OldVal = OldPHI->getIncomingValue(i);
+        NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
+      }
+    }
+  return true;
+}
+
+
+PreservedAnalyses PoisonCheckingPass::run(Module &M,
+                                          ModuleAnalysisManager &AM) {
+  bool Changed = false;
+  for (auto &F : M)
+    Changed |= rewrite(F);
+
+  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+PreservedAnalyses PoisonCheckingPass::run(Function &F,
+                                          FunctionAnalysisManager &AM) {
+  return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+
+/* Major TODO Items:
+   - Control dependent poison UB
+   - Strict mode - (i.e. must analyze every operand)
+     - Poison through memory
+     - Function ABIs
+     - Full coverage of intrinsics, etc.. (ouch)
+
+   Instructions w/Unclear Semantics:
+   - shufflevector - It would seem reasonable for an out of bounds mask element
+     to produce poison, but the LangRef does not state.  
+   - and/or - It would seem reasonable for poison to propagate from both
+     arguments, but LangRef doesn't state and propagatesFullPoison doesn't
+     include these two.
+   - all binary ops w/vector operands - The likely interpretation would be that
+     any element overflowing should produce poison for the entire result, but
+     the LangRef does not state.
+   - Floating point binary ops w/fmf flags other than (nnan, noinfs).  It seems
+     strange that only certian flags should be documented as producing poison.
+
+   Cases of clear poison semantics not yet implemented:
+   - Exact flags on ashr/lshr produce poison
+   - NSW/NUW flags on shl produce poison
+   - Inbounds flag on getelementptr produce poison
+   - fptosi/fptoui (out of bounds input) produce poison
+   - Scalable vector types for insertelement/extractelement
+   - Floating point binary ops w/fmf nnan/noinfs flags produce poison
+ */
diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 0ba8d5765e8c..ca0cb4bdbe84 100644
--- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -1,9 +1,8 @@
 //===-- SanitizerCoverage.cpp - coverage instrumentation for sanitizers ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -62,7 +61,10 @@ static const char *const SanCovTraceDiv4 = "__sanitizer_cov_trace_div4";
 static const char *const SanCovTraceDiv8 = "__sanitizer_cov_trace_div8";
 static const char *const SanCovTraceGep = "__sanitizer_cov_trace_gep";
 static const char *const SanCovTraceSwitchName = "__sanitizer_cov_trace_switch";
-static const char *const SanCovModuleCtorName = "sancov.module_ctor";
+static const char *const SanCovModuleCtorTracePcGuardName =
+    "sancov.module_ctor_trace_pc_guard";
+static const char *const SanCovModuleCtor8bitCountersName =
+    "sancov.module_ctor_8bit_counters";
 static const uint64_t SanCtorAndDtorPriority = 2;
 
 static const char *const SanCovTracePCGuardName =
@@ -210,8 +212,9 @@ private:
   void CreateFunctionLocalArrays(Function &F, ArrayRef<BasicBlock *> AllBlocks);
   void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx,
                              bool IsLeafFunc = true);
-  Function *CreateInitCallsForSections(Module &M, const char *InitFunctionName,
-                                       Type *Ty, const char *Section);
+  Function *CreateInitCallsForSections(Module &M, const char *CtorName,
+                                       const char *InitFunctionName, Type *Ty,
+                                       const char *Section);
   std::pair<Value *, Value *> CreateSecStartEnd(Module &M, const char *Section,
                                                 Type *Ty);
 
@@ -223,13 +226,13 @@ private:
   std::string getSectionName(const std::string &Section) const;
   std::string getSectionStart(const std::string &Section) const;
   std::string getSectionEnd(const std::string &Section) const;
-  Function *SanCovTracePCIndir;
-  Function *SanCovTracePC, *SanCovTracePCGuard;
-  Function *SanCovTraceCmpFunction[4];
-  Function *SanCovTraceConstCmpFunction[4];
-  Function *SanCovTraceDivFunction[2];
-  Function *SanCovTraceGepFunction;
-  Function *SanCovTraceSwitchFunction;
+  FunctionCallee SanCovTracePCIndir;
+  FunctionCallee SanCovTracePC, SanCovTracePCGuard;
+  FunctionCallee SanCovTraceCmpFunction[4];
+  FunctionCallee SanCovTraceConstCmpFunction[4];
+  FunctionCallee SanCovTraceDivFunction[2];
+  FunctionCallee SanCovTraceGepFunction;
+  FunctionCallee SanCovTraceSwitchFunction;
   GlobalVariable *SanCovLowestStack;
   InlineAsm *EmptyAsm;
   Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy,
@@ -270,24 +273,25 @@ SanitizerCoverageModule::CreateSecStartEnd(Module &M, const char *Section,
   // Account for the fact that on windows-msvc __start_* symbols actually
   // point to a uint64_t before the start of the array.
   auto SecStartI8Ptr = IRB.CreatePointerCast(SecStart, Int8PtrTy);
-  auto GEP = IRB.CreateGEP(SecStartI8Ptr,
+  auto GEP = IRB.CreateGEP(Int8Ty, SecStartI8Ptr,
                            ConstantInt::get(IntptrTy, sizeof(uint64_t)));
   return std::make_pair(IRB.CreatePointerCast(GEP, Ty), SecEndPtr);
 }
 
 Function *SanitizerCoverageModule::CreateInitCallsForSections(
-    Module &M, const char *InitFunctionName, Type *Ty,
+    Module &M, const char *CtorName, const char *InitFunctionName, Type *Ty,
     const char *Section) {
   auto SecStartEnd = CreateSecStartEnd(M, Section, Ty);
   auto SecStart = SecStartEnd.first;
   auto SecEnd = SecStartEnd.second;
   Function *CtorFunc;
   std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
-      M, SanCovModuleCtorName, InitFunctionName, {Ty, Ty}, {SecStart, SecEnd});
+      M, CtorName, InitFunctionName, {Ty, Ty}, {SecStart, SecEnd});
+  assert(CtorFunc->getName() == CtorName);
 
   if (TargetTriple.supportsCOMDAT()) {
     // Use comdat to dedup CtorFunc.
-    CtorFunc->setComdat(M.getOrInsertComdat(SanCovModuleCtorName));
+    CtorFunc->setComdat(M.getOrInsertComdat(CtorName));
     appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc);
   } else {
     appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
@@ -329,77 +333,74 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
   Int16Ty = IRB.getInt16Ty();
   Int8Ty = IRB.getInt8Ty();
 
-  SanCovTracePCIndir = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy));
+  SanCovTracePCIndir =
+      M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy);
+  // Make sure smaller parameters are zero-extended to i64 as required by the
+  // x86_64 ABI.
+  AttributeList SanCovTraceCmpZeroExtAL;
+  if (TargetTriple.getArch() == Triple::x86_64) {
+    SanCovTraceCmpZeroExtAL =
+        SanCovTraceCmpZeroExtAL.addParamAttribute(*C, 0, Attribute::ZExt);
+    SanCovTraceCmpZeroExtAL =
+        SanCovTraceCmpZeroExtAL.addParamAttribute(*C, 1, Attribute::ZExt);
+  }
+
   SanCovTraceCmpFunction[0] =
-      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-          SanCovTraceCmp1, VoidTy, IRB.getInt8Ty(), IRB.getInt8Ty()));
-  SanCovTraceCmpFunction[1] = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction(SanCovTraceCmp2, VoidTy, IRB.getInt16Ty(),
-                            IRB.getInt16Ty()));
-  SanCovTraceCmpFunction[2] = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction(SanCovTraceCmp4, VoidTy, IRB.getInt32Ty(),
-                            IRB.getInt32Ty()));
+      M.getOrInsertFunction(SanCovTraceCmp1, SanCovTraceCmpZeroExtAL, VoidTy,
+                            IRB.getInt8Ty(), IRB.getInt8Ty());
+  SanCovTraceCmpFunction[1] =
+      M.getOrInsertFunction(SanCovTraceCmp2, SanCovTraceCmpZeroExtAL, VoidTy,
+                            IRB.getInt16Ty(), IRB.getInt16Ty());
+  SanCovTraceCmpFunction[2] =
+      M.getOrInsertFunction(SanCovTraceCmp4, SanCovTraceCmpZeroExtAL, VoidTy,
+                            IRB.getInt32Ty(), IRB.getInt32Ty());
   SanCovTraceCmpFunction[3] =
-      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-          SanCovTraceCmp8, VoidTy, Int64Ty, Int64Ty));
-
-  SanCovTraceConstCmpFunction[0] =
-      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-          SanCovTraceConstCmp1, VoidTy, Int8Ty, Int8Ty));
-  SanCovTraceConstCmpFunction[1] =
-      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-          SanCovTraceConstCmp2, VoidTy, Int16Ty, Int16Ty));
-  SanCovTraceConstCmpFunction[2] =
-      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-          SanCovTraceConstCmp4, VoidTy, Int32Ty, Int32Ty));
+      M.getOrInsertFunction(SanCovTraceCmp8, VoidTy, Int64Ty, Int64Ty);
+
+  SanCovTraceConstCmpFunction[0] = M.getOrInsertFunction(
+      SanCovTraceConstCmp1, SanCovTraceCmpZeroExtAL, VoidTy, Int8Ty, Int8Ty);
+  SanCovTraceConstCmpFunction[1] = M.getOrInsertFunction(
+      SanCovTraceConstCmp2, SanCovTraceCmpZeroExtAL, VoidTy, Int16Ty, Int16Ty);
+  SanCovTraceConstCmpFunction[2] = M.getOrInsertFunction(
+      SanCovTraceConstCmp4, SanCovTraceCmpZeroExtAL, VoidTy, Int32Ty, Int32Ty);
   SanCovTraceConstCmpFunction[3] =
-      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-          SanCovTraceConstCmp8, VoidTy, Int64Ty, Int64Ty));
-
-  SanCovTraceDivFunction[0] =
-      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-          SanCovTraceDiv4, VoidTy, IRB.getInt32Ty()));
+      M.getOrInsertFunction(SanCovTraceConstCmp8, VoidTy, Int64Ty, Int64Ty);
+
+  {
+    AttributeList AL;
+    if (TargetTriple.getArch() == Triple::x86_64)
+      AL = AL.addParamAttribute(*C, 0, Attribute::ZExt);
+    SanCovTraceDivFunction[0] =
+        M.getOrInsertFunction(SanCovTraceDiv4, AL, VoidTy, IRB.getInt32Ty());
+  }
   SanCovTraceDivFunction[1] =
-      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-          SanCovTraceDiv8, VoidTy, Int64Ty));
+      M.getOrInsertFunction(SanCovTraceDiv8, VoidTy, Int64Ty);
   SanCovTraceGepFunction =
-      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-          SanCovTraceGep, VoidTy, IntptrTy));
+      M.getOrInsertFunction(SanCovTraceGep, VoidTy, IntptrTy);
   SanCovTraceSwitchFunction =
-      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-          SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy));
+      M.getOrInsertFunction(SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy);
 
   Constant *SanCovLowestStackConstant =
       M.getOrInsertGlobal(SanCovLowestStackName, IntptrTy);
-  SanCovLowestStack = cast<GlobalVariable>(SanCovLowestStackConstant);
+  SanCovLowestStack = dyn_cast<GlobalVariable>(SanCovLowestStackConstant);
+  if (!SanCovLowestStack) {
+    C->emitError(StringRef("'") + SanCovLowestStackName +
+                 "' should not be declared by the user");
+    return true;
+  }
   SanCovLowestStack->setThreadLocalMode(
       GlobalValue::ThreadLocalMode::InitialExecTLSModel);
   if (Options.StackDepth && !SanCovLowestStack->isDeclaration())
     SanCovLowestStack->setInitializer(Constant::getAllOnesValue(IntptrTy));
 
-  // Make sure smaller parameters are zero-extended to i64 as required by the
-  // x86_64 ABI.
-  if (TargetTriple.getArch() == Triple::x86_64) {
-    for (int i = 0; i < 3; i++) {
-      SanCovTraceCmpFunction[i]->addParamAttr(0, Attribute::ZExt);
-      SanCovTraceCmpFunction[i]->addParamAttr(1, Attribute::ZExt);
-      SanCovTraceConstCmpFunction[i]->addParamAttr(0, Attribute::ZExt);
-      SanCovTraceConstCmpFunction[i]->addParamAttr(1, Attribute::ZExt);
-    }
-    SanCovTraceDivFunction[0]->addParamAttr(0, Attribute::ZExt);
-  }
-
-
   // We insert an empty inline asm after cov callbacks to avoid callback merge.
   EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
                             StringRef(""), StringRef(""),
                             /*hasSideEffects=*/true);
 
-  SanCovTracePC = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction(SanCovTracePCName, VoidTy));
-  SanCovTracePCGuard = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      SanCovTracePCGuardName, VoidTy, Int32PtrTy));
+  SanCovTracePC = M.getOrInsertFunction(SanCovTracePCName, VoidTy);
+  SanCovTracePCGuard =
+      M.getOrInsertFunction(SanCovTracePCGuardName, VoidTy, Int32PtrTy);
 
   for (auto &F : M)
     runOnFunction(F);
@@ -407,14 +408,16 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
   Function *Ctor = nullptr;
 
   if (FunctionGuardArray)
-    Ctor = CreateInitCallsForSections(M, SanCovTracePCGuardInitName, Int32PtrTy,
+    Ctor = CreateInitCallsForSections(M, SanCovModuleCtorTracePcGuardName,
+                                      SanCovTracePCGuardInitName, Int32PtrTy,
                                       SanCovGuardsSectionName);
   if (Function8bitCounterArray)
-    Ctor = CreateInitCallsForSections(M, SanCov8bitCountersInitName, Int8PtrTy,
+    Ctor = CreateInitCallsForSections(M, SanCovModuleCtor8bitCountersName,
+                                      SanCov8bitCountersInitName, Int8PtrTy,
                                       SanCovCountersSectionName);
   if (Ctor && Options.PCTable) {
     auto SecStartEnd = CreateSecStartEnd(M, SanCovPCsSectionName, IntptrPtrTy);
-    Function *InitFunction = declareSanitizerInitFunction(
+    FunctionCallee InitFunction = declareSanitizerInitFunction(
         M, SanCovPCsInitName, {IntptrPtrTy, IntptrPtrTy});
     IRBuilder<> IRBCtor(Ctor->getEntryBlock().getTerminator());
     IRBCtor.CreateCall(InitFunction, {SecStartEnd.first, SecStartEnd.second});
@@ -458,12 +461,12 @@ static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB,
                                   const DominatorTree *DT,
                                   const PostDominatorTree *PDT,
                                   const SanitizerCoverageOptions &Options) {
-  // Don't insert coverage for unreachable blocks: we will never call
-  // __sanitizer_cov() for them, so counting them in
+  // Don't insert coverage for blocks containing nothing but unreachable: we
+  // will never call __sanitizer_cov() for them, so counting them in
   // NumberOfInstrumentedBlocks() might complicate calculation of code coverage
   // percentage. Also, unreachable instructions frequently have no debug
   // locations.
-  if (isa<UnreachableInst>(BB->getTerminator()))
+  if (isa<UnreachableInst>(BB->getFirstNonPHIOrDbgOrLifetime()))
     return false;
 
   // Don't insert coverage into blocks without a valid insertion point
@@ -484,6 +487,37 @@ static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB,
     && !(isFullPostDominator(BB, PDT) && !BB->getSinglePredecessor());
 }
 
+
+// Returns true iff From->To is a backedge.
+// A twist here is that we treat From->To as a backedge if
+//   * To dominates From or
+//   * To->UniqueSuccessor dominates From
+static bool IsBackEdge(BasicBlock *From, BasicBlock *To,
+                       const DominatorTree *DT) {
+  if (DT->dominates(To, From))
+    return true;
+  if (auto Next = To->getUniqueSuccessor())
+    if (DT->dominates(Next, From))
+      return true;
+  return false;
+}
+
+// Prunes uninteresting Cmp instrumentation:
+//   * CMP instructions that feed into loop backedge branch.
+//
+// Note that Cmp pruning is controlled by the same flag as the
+// BB pruning.
+static bool IsInterestingCmp(ICmpInst *CMP, const DominatorTree *DT,
+                             const SanitizerCoverageOptions &Options) {
+  if (!Options.NoPrune)
+    if (CMP->hasOneUse())
+      if (auto BR = dyn_cast<BranchInst>(CMP->user_back()))
+        for (BasicBlock *B : BR->successors())
+          if (IsBackEdge(BR->getParent(), B, DT))
+            return false;
+  return true;
+}
+
 bool SanitizerCoverageModule::runOnFunction(Function &F) {
   if (F.empty())
     return false;
@@ -508,7 +542,7 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
       isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
     return false;
   if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge)
-    SplitAllCriticalEdges(F);
+    SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions().setIgnoreUnreachableDests());
   SmallVector<Instruction *, 8> IndirCalls;
   SmallVector<BasicBlock *, 16> BlocksToInstrument;
   SmallVector<Instruction *, 8> CmpTraceTargets;
@@ -532,8 +566,9 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
           IndirCalls.push_back(&Inst);
       }
       if (Options.TraceCmp) {
-        if (isa<ICmpInst>(&Inst))
-          CmpTraceTargets.push_back(&Inst);
+        if (ICmpInst *CMP = dyn_cast<ICmpInst>(&Inst))
+          if (IsInterestingCmp(CMP, DT, Options))
+            CmpTraceTargets.push_back(&Inst);
         if (isa<SwitchInst>(&Inst))
           SwitchTraceTargets.push_back(&Inst);
       }
@@ -797,9 +832,9 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
   }
   if (Options.Inline8bitCounters) {
     auto CounterPtr = IRB.CreateGEP(
-        Function8bitCounterArray,
+        Function8bitCounterArray->getValueType(), Function8bitCounterArray,
         {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)});
-    auto Load = IRB.CreateLoad(CounterPtr);
+    auto Load = IRB.CreateLoad(Int8Ty, CounterPtr);
     auto Inc = IRB.CreateAdd(Load, ConstantInt::get(Int8Ty, 1));
     auto Store = IRB.CreateStore(Inc, CounterPtr);
     SetNoSanitizeMetadata(Load);
@@ -812,7 +847,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
     auto FrameAddrPtr =
         IRB.CreateCall(GetFrameAddr, {Constant::getNullValue(Int32Ty)});
     auto FrameAddrInt = IRB.CreatePtrToInt(FrameAddrPtr, IntptrTy);
-    auto LowestStack = IRB.CreateLoad(SanCovLowestStack);
+    auto LowestStack = IRB.CreateLoad(IntptrTy, SanCovLowestStack);
     auto IsStackLower = IRB.CreateICmpULT(FrameAddrInt, LowestStack);
     auto ThenTerm = SplitBlockAndInsertIfThen(IsStackLower, &*IP, false);
     IRBuilder<> ThenIRB(ThenTerm);
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 077364e15c4f..5be13fa745cb 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -1,9 +1,8 @@
 //===-- ThreadSanitizer.cpp - race detector -------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -111,25 +110,26 @@ private:
   Type *IntptrTy;
   IntegerType *OrdTy;
   // Callbacks to run-time library are computed in doInitialization.
-  Function *TsanFuncEntry;
-  Function *TsanFuncExit;
-  Function *TsanIgnoreBegin;
-  Function *TsanIgnoreEnd;
+  FunctionCallee TsanFuncEntry;
+  FunctionCallee TsanFuncExit;
+  FunctionCallee TsanIgnoreBegin;
+  FunctionCallee TsanIgnoreEnd;
   // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
   static const size_t kNumberOfAccessSizes = 5;
-  Function *TsanRead[kNumberOfAccessSizes];
-  Function *TsanWrite[kNumberOfAccessSizes];
-  Function *TsanUnalignedRead[kNumberOfAccessSizes];
-  Function *TsanUnalignedWrite[kNumberOfAccessSizes];
-  Function *TsanAtomicLoad[kNumberOfAccessSizes];
-  Function *TsanAtomicStore[kNumberOfAccessSizes];
-  Function *TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1][kNumberOfAccessSizes];
-  Function *TsanAtomicCAS[kNumberOfAccessSizes];
-  Function *TsanAtomicThreadFence;
-  Function *TsanAtomicSignalFence;
-  Function *TsanVptrUpdate;
-  Function *TsanVptrLoad;
-  Function *MemmoveFn, *MemcpyFn, *MemsetFn;
+  FunctionCallee TsanRead[kNumberOfAccessSizes];
+  FunctionCallee TsanWrite[kNumberOfAccessSizes];
+  FunctionCallee TsanUnalignedRead[kNumberOfAccessSizes];
+  FunctionCallee TsanUnalignedWrite[kNumberOfAccessSizes];
+  FunctionCallee TsanAtomicLoad[kNumberOfAccessSizes];
+  FunctionCallee TsanAtomicStore[kNumberOfAccessSizes];
+  FunctionCallee TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1]
+                              [kNumberOfAccessSizes];
+  FunctionCallee TsanAtomicCAS[kNumberOfAccessSizes];
+  FunctionCallee TsanAtomicThreadFence;
+  FunctionCallee TsanAtomicSignalFence;
+  FunctionCallee TsanVptrUpdate;
+  FunctionCallee TsanVptrLoad;
+  FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
   Function *TsanCtorFunction;
 };
 
@@ -189,14 +189,14 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
   Attr = Attr.addAttribute(M.getContext(), AttributeList::FunctionIndex,
                            Attribute::NoUnwind);
   // Initialize the callbacks.
-  TsanFuncEntry = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      "__tsan_func_entry", Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
-  TsanFuncExit = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction("__tsan_func_exit", Attr, IRB.getVoidTy()));
-  TsanIgnoreBegin = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      "__tsan_ignore_thread_begin", Attr, IRB.getVoidTy()));
-  TsanIgnoreEnd = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      "__tsan_ignore_thread_end", Attr, IRB.getVoidTy()));
+  TsanFuncEntry = M.getOrInsertFunction("__tsan_func_entry", Attr,
+                                        IRB.getVoidTy(), IRB.getInt8PtrTy());
+  TsanFuncExit =
+      M.getOrInsertFunction("__tsan_func_exit", Attr, IRB.getVoidTy());
+  TsanIgnoreBegin = M.getOrInsertFunction("__tsan_ignore_thread_begin", Attr,
+                                          IRB.getVoidTy());
+  TsanIgnoreEnd =
+      M.getOrInsertFunction("__tsan_ignore_thread_end", Attr, IRB.getVoidTy());
   OrdTy = IRB.getInt32Ty();
   for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
     const unsigned ByteSize = 1U << i;
@@ -204,32 +204,30 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
     std::string ByteSizeStr = utostr(ByteSize);
     std::string BitSizeStr = utostr(BitSize);
     SmallString<32> ReadName("__tsan_read" + ByteSizeStr);
-    TsanRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-        ReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
+    TsanRead[i] = M.getOrInsertFunction(ReadName, Attr, IRB.getVoidTy(),
+                                        IRB.getInt8PtrTy());
 
     SmallString<32> WriteName("__tsan_write" + ByteSizeStr);
-    TsanWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-        WriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
+    TsanWrite[i] = M.getOrInsertFunction(WriteName, Attr, IRB.getVoidTy(),
+                                         IRB.getInt8PtrTy());
 
     SmallString<64> UnalignedReadName("__tsan_unaligned_read" + ByteSizeStr);
-    TsanUnalignedRead[i] =
-        checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-            UnalignedReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
+    TsanUnalignedRead[i] = M.getOrInsertFunction(
+        UnalignedReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
 
     SmallString<64> UnalignedWriteName("__tsan_unaligned_write" + ByteSizeStr);
-    TsanUnalignedWrite[i] =
-        checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-            UnalignedWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
+    TsanUnalignedWrite[i] = M.getOrInsertFunction(
+        UnalignedWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
 
     Type *Ty = Type::getIntNTy(M.getContext(), BitSize);
     Type *PtrTy = Ty->getPointerTo();
     SmallString<32> AtomicLoadName("__tsan_atomic" + BitSizeStr + "_load");
-    TsanAtomicLoad[i] = checkSanitizerInterfaceFunction(
-        M.getOrInsertFunction(AtomicLoadName, Attr, Ty, PtrTy, OrdTy));
+    TsanAtomicLoad[i] =
+        M.getOrInsertFunction(AtomicLoadName, Attr, Ty, PtrTy, OrdTy);
 
     SmallString<32> AtomicStoreName("__tsan_atomic" + BitSizeStr + "_store");
-    TsanAtomicStore[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-        AtomicStoreName, Attr, IRB.getVoidTy(), PtrTy, Ty, OrdTy));
+    TsanAtomicStore[i] = M.getOrInsertFunction(
+        AtomicStoreName, Attr, IRB.getVoidTy(), PtrTy, Ty, OrdTy);
 
     for (int op = AtomicRMWInst::FIRST_BINOP;
         op <= AtomicRMWInst::LAST_BINOP; ++op) {
@@ -252,34 +250,34 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
       else
         continue;
       SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart);
-      TsanAtomicRMW[op][i] = checkSanitizerInterfaceFunction(
-          M.getOrInsertFunction(RMWName, Attr, Ty, PtrTy, Ty, OrdTy));
+      TsanAtomicRMW[op][i] =
+          M.getOrInsertFunction(RMWName, Attr, Ty, PtrTy, Ty, OrdTy);
     }
 
     SmallString<32> AtomicCASName("__tsan_atomic" + BitSizeStr +
                                   "_compare_exchange_val");
-    TsanAtomicCAS[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-        AtomicCASName, Attr, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy));
+    TsanAtomicCAS[i] = M.getOrInsertFunction(AtomicCASName, Attr, Ty, PtrTy, Ty,
+                                             Ty, OrdTy, OrdTy);
   }
-  TsanVptrUpdate = checkSanitizerInterfaceFunction(
+  TsanVptrUpdate =
       M.getOrInsertFunction("__tsan_vptr_update", Attr, IRB.getVoidTy(),
-                            IRB.getInt8PtrTy(), IRB.getInt8PtrTy()));
-  TsanVptrLoad = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      "__tsan_vptr_read", Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
-  TsanAtomicThreadFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      "__tsan_atomic_thread_fence", Attr, IRB.getVoidTy(), OrdTy));
-  TsanAtomicSignalFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      "__tsan_atomic_signal_fence", Attr, IRB.getVoidTy(), OrdTy));
-
-  MemmoveFn = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction("memmove", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
-                            IRB.getInt8PtrTy(), IntptrTy));
-  MemcpyFn = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction("memcpy", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
-                            IRB.getInt8PtrTy(), IntptrTy));
-  MemsetFn = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction("memset", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
-                            IRB.getInt32Ty(), IntptrTy));
+                            IRB.getInt8PtrTy(), IRB.getInt8PtrTy());
+  TsanVptrLoad = M.getOrInsertFunction("__tsan_vptr_read", Attr,
+                                       IRB.getVoidTy(), IRB.getInt8PtrTy());
+  TsanAtomicThreadFence = M.getOrInsertFunction("__tsan_atomic_thread_fence",
+                                                Attr, IRB.getVoidTy(), OrdTy);
+  TsanAtomicSignalFence = M.getOrInsertFunction("__tsan_atomic_signal_fence",
+                                                Attr, IRB.getVoidTy(), OrdTy);
+
+  MemmoveFn =
+      M.getOrInsertFunction("memmove", Attr, IRB.getInt8PtrTy(),
+                            IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy);
+  MemcpyFn =
+      M.getOrInsertFunction("memcpy", Attr, IRB.getInt8PtrTy(),
+                            IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy);
+  MemsetFn =
+      M.getOrInsertFunction("memset", Attr, IRB.getInt8PtrTy(),
+                            IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy);
 }
 
 ThreadSanitizer::ThreadSanitizer(Module &M) {
@@ -291,7 +289,9 @@ ThreadSanitizer::ThreadSanitizer(Module &M) {
           /*InitArgs=*/{},
           // This callback is invoked when the functions are created the first
           // time. Hook them into the global ctors list in that case:
-          [&](Function *Ctor, Function *) { appendToGlobalCtors(M, Ctor, 0); });
+          [&](Function *Ctor, FunctionCallee) {
+            appendToGlobalCtors(M, Ctor, 0);
+          });
 }
 
 static bool isVtableAccess(Instruction *I) {
@@ -559,7 +559,7 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I,
       : cast<LoadInst>(I)->getAlignment();
   Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
   const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
-  Value *OnAccessFunc = nullptr;
+  FunctionCallee OnAccessFunc = nullptr;
   if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0)
     OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
   else
@@ -659,7 +659,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
     int Idx = getMemoryAccessFuncIndex(Addr, DL);
     if (Idx < 0)
       return false;
-    Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx];
+    FunctionCallee F = TsanAtomicRMW[RMWI->getOperation()][Idx];
     if (!F)
       return false;
     const unsigned ByteSize = 1U << Idx;
@@ -706,8 +706,9 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
     I->eraseFromParent();
   } else if (FenceInst *FI = dyn_cast<FenceInst>(I)) {
     Value *Args[] = {createOrdering(&IRB, FI->getOrdering())};
-    Function *F = FI->getSyncScopeID() == SyncScope::SingleThread ?
-        TsanAtomicSignalFence : TsanAtomicThreadFence;
+    FunctionCallee F = FI->getSyncScopeID() == SyncScope::SingleThread
+                           ? TsanAtomicSignalFence
+                           : TsanAtomicThreadFence;
     CallInst *C = CallInst::Create(F, Args);
     ReplaceInstWithInst(I, C);
   }
diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
index 7f6b157304a3..e1e95cd6a407 100644
--- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
+++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -1,9 +1,8 @@
 //===- ARCRuntimeEntryPoints.h - ObjC ARC Optimization ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -34,7 +33,7 @@
 
 namespace llvm {
 
-class Constant;
+class Function;
 class LLVMContext;
 
 namespace objcarc {
@@ -70,7 +69,7 @@ public:
     RetainAutoreleaseRV = nullptr;
   }
 
-  Constant *get(ARCRuntimeEntryPointKind kind) {
+  Function *get(ARCRuntimeEntryPointKind kind) {
     assert(TheModule != nullptr && "Not initialized.");
 
     switch (kind) {
@@ -106,33 +105,33 @@ private:
   Module *TheModule = nullptr;
 
   /// Declaration for ObjC runtime function objc_autoreleaseReturnValue.
-  Constant *AutoreleaseRV = nullptr;
+  Function *AutoreleaseRV = nullptr;
 
   /// Declaration for ObjC runtime function objc_release.
-  Constant *Release = nullptr;
+  Function *Release = nullptr;
 
   /// Declaration for ObjC runtime function objc_retain.
-  Constant *Retain = nullptr;
+  Function *Retain = nullptr;
 
   /// Declaration for ObjC runtime function objc_retainBlock.
-  Constant *RetainBlock = nullptr;
+  Function *RetainBlock = nullptr;
 
   /// Declaration for ObjC runtime function objc_autorelease.
-  Constant *Autorelease = nullptr;
+  Function *Autorelease = nullptr;
 
   /// Declaration for objc_storeStrong().
-  Constant *StoreStrong = nullptr;
+  Function *StoreStrong = nullptr;
 
   /// Declaration for objc_retainAutoreleasedReturnValue().
-  Constant *RetainRV = nullptr;
+  Function *RetainRV = nullptr;
 
   /// Declaration for objc_retainAutorelease().
-  Constant *RetainAutorelease = nullptr;
+  Function *RetainAutorelease = nullptr;
 
   /// Declaration for objc_retainAutoreleaseReturnValue().
-  Constant *RetainAutoreleaseRV = nullptr;
+  Function *RetainAutoreleaseRV = nullptr;
 
-  Constant *getIntrinsicEntryPoint(Constant *&Decl, Intrinsic::ID IntID) {
+  Function *getIntrinsicEntryPoint(Function *&Decl, Intrinsic::ID IntID) {
     if (Decl)
       return Decl;
 
diff --git a/lib/Transforms/ObjCARC/BlotMapVector.h b/lib/Transforms/ObjCARC/BlotMapVector.h
index 9ade14c1177a..2fa07cfb32c0 100644
--- a/lib/Transforms/ObjCARC/BlotMapVector.h
+++ b/lib/Transforms/ObjCARC/BlotMapVector.h
@@ -1,9 +1,8 @@
 //===- BlotMapVector.h - A MapVector with the blot operation ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 4bd5fd1acd4c..e8f8fb6f3a7c 100644
--- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -1,9 +1,8 @@
 //===- DependencyAnalysis.cpp - ObjC ARC Optimization ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.h b/lib/Transforms/ObjCARC/DependencyAnalysis.h
index 0f13b02c806f..ed89c8c8fc89 100644
--- a/lib/Transforms/ObjCARC/DependencyAnalysis.h
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.h
@@ -1,9 +1,8 @@
 //===- DependencyAnalysis.h - ObjC ARC Optimization ---*- C++ -*-----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/Transforms/ObjCARC/ObjCARC.cpp b/lib/Transforms/ObjCARC/ObjCARC.cpp
index c30aaebd0f4d..f4da51650a7d 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -1,9 +1,8 @@
 //===-- ObjCARC.cpp -------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h
index 751c8f30e814..d465630800b9 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/lib/Transforms/ObjCARC/ObjCARC.h
@@ -1,9 +1,8 @@
 //===- ObjCARC.h - ObjC ARC Optimization --------------*- C++ -*-----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
index 8d3ef8fde534..b341dd807508 100644
--- a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -1,9 +1,8 @@
 //===- ObjCARCAPElim.cpp - ObjC ARC Optimization --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index abe2871c0b8f..36aa513ec554 100644
--- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -1,9 +1,8 @@
 //===- ObjCARCContract.cpp - ObjC ARC Optimization ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -46,6 +45,10 @@ using namespace llvm::objcarc;
 STATISTIC(NumPeeps,       "Number of calls peephole-optimized");
 STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
 
+static cl::opt<unsigned> MaxBBSize("arc-contract-max-bb-size", cl::Hidden,
+    cl::desc("Maximum basic block size to discover the dominance relation of "
+             "two instructions in the same basic block"), cl::init(65535));
+
 //===----------------------------------------------------------------------===//
 //                                Declarations
 //===----------------------------------------------------------------------===//
@@ -140,7 +143,7 @@ bool ObjCARCContract::optimizeRetainCall(Function &F, Instruction *Retain) {
 
   // We do not have to worry about tail calls/does not throw since
   // retain/retainRV have the same properties.
-  Constant *Decl = EP.get(ARCRuntimeEntryPointKind::RetainRV);
+  Function *Decl = EP.get(ARCRuntimeEntryPointKind::RetainRV);
   cast<CallInst>(Retain)->setCalledFunction(Decl);
 
   LLVM_DEBUG(dbgs() << "New: " << *Retain << "\n");
@@ -189,7 +192,7 @@ bool ObjCARCContract::contractAutorelease(
                        "        Retain: "
                     << *Retain << "\n");
 
-  Constant *Decl = EP.get(Class == ARCInstKind::AutoreleaseRV
+  Function *Decl = EP.get(Class == ARCInstKind::AutoreleaseRV
                               ? ARCRuntimeEntryPointKind::RetainAutoreleaseRV
                               : ARCRuntimeEntryPointKind::RetainAutorelease);
   Retain->setCalledFunction(Decl);
@@ -314,8 +317,8 @@ findRetainForStoreStrongContraction(Value *New, StoreInst *Store,
 /// Create a call instruction with the correct funclet token. Should be used
 /// instead of calling CallInst::Create directly.
 static CallInst *
-createCallInst(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr,
-               Instruction *InsertBefore,
+createCallInst(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
+               const Twine &NameStr, Instruction *InsertBefore,
                const DenseMap<BasicBlock *, ColorVector> &BlockColors) {
   SmallVector<OperandBundleDef, 1> OpBundles;
   if (!BlockColors.empty()) {
@@ -326,7 +329,15 @@ createCallInst(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr,
       OpBundles.emplace_back("funclet", EHPad);
   }
 
-  return CallInst::Create(Func, Args, OpBundles, NameStr, InsertBefore);
+  return CallInst::Create(FTy, Func, Args, OpBundles, NameStr, InsertBefore);
+}
+
+static CallInst *
+createCallInst(FunctionCallee Func, ArrayRef<Value *> Args, const Twine &NameStr,
+               Instruction *InsertBefore,
+               const DenseMap<BasicBlock *, ColorVector> &BlockColors) {
+  return createCallInst(Func.getFunctionType(), Func.getCallee(), Args, NameStr,
+                        InsertBefore, BlockColors);
 }
 
 /// Attempt to merge an objc_release with a store, load, and objc_retain to form
@@ -409,7 +420,7 @@ void ObjCARCContract::tryToContractReleaseIntoStoreStrong(
     Args[0] = new BitCastInst(Args[0], I8XX, "", Store);
   if (Args[1]->getType() != I8X)
     Args[1] = new BitCastInst(Args[1], I8X, "", Store);
-  Constant *Decl = EP.get(ARCRuntimeEntryPointKind::StoreStrong);
+  Function *Decl = EP.get(ARCRuntimeEntryPointKind::StoreStrong);
   CallInst *StoreStrong = createCallInst(Decl, Args, "", Store, BlockColors);
   StoreStrong->setDoesNotThrow();
   StoreStrong->setDebugLoc(Store->getDebugLoc());
@@ -432,102 +443,100 @@ void ObjCARCContract::tryToContractReleaseIntoStoreStrong(
 }
 
 bool ObjCARCContract::tryToPeepholeInstruction(
-  Function &F, Instruction *Inst, inst_iterator &Iter,
-  SmallPtrSetImpl<Instruction *> &DependingInsts,
-  SmallPtrSetImpl<const BasicBlock *> &Visited,
-  bool &TailOkForStoreStrongs,
-  const DenseMap<BasicBlock *, ColorVector> &BlockColors) {
-    // Only these library routines return their argument. In particular,
-    // objc_retainBlock does not necessarily return its argument.
+    Function &F, Instruction *Inst, inst_iterator &Iter,
+    SmallPtrSetImpl<Instruction *> &DependingInsts,
+    SmallPtrSetImpl<const BasicBlock *> &Visited, bool &TailOkForStoreStrongs,
+    const DenseMap<BasicBlock *, ColorVector> &BlockColors) {
+  // Only these library routines return their argument. In particular,
+  // objc_retainBlock does not necessarily return its argument.
   ARCInstKind Class = GetBasicARCInstKind(Inst);
-    switch (Class) {
-    case ARCInstKind::FusedRetainAutorelease:
-    case ARCInstKind::FusedRetainAutoreleaseRV:
+  switch (Class) {
+  case ARCInstKind::FusedRetainAutorelease:
+  case ARCInstKind::FusedRetainAutoreleaseRV:
+    return false;
+  case ARCInstKind::Autorelease:
+  case ARCInstKind::AutoreleaseRV:
+    return contractAutorelease(F, Inst, Class, DependingInsts, Visited);
+  case ARCInstKind::Retain:
+    // Attempt to convert retains to retainrvs if they are next to function
+    // calls.
+    if (!optimizeRetainCall(F, Inst))
       return false;
-    case ARCInstKind::Autorelease:
-    case ARCInstKind::AutoreleaseRV:
-      return contractAutorelease(F, Inst, Class, DependingInsts, Visited);
-    case ARCInstKind::Retain:
-      // Attempt to convert retains to retainrvs if they are next to function
-      // calls.
-      if (!optimizeRetainCall(F, Inst))
-        return false;
-      // If we succeed in our optimization, fall through.
-      LLVM_FALLTHROUGH;
-    case ARCInstKind::RetainRV:
-    case ARCInstKind::ClaimRV: {
-      // If we're compiling for a target which needs a special inline-asm
-      // marker to do the return value optimization, insert it now.
-      if (!RVInstMarker)
-        return false;
-      BasicBlock::iterator BBI = Inst->getIterator();
-      BasicBlock *InstParent = Inst->getParent();
-
-      // Step up to see if the call immediately precedes the RV call.
-      // If it's an invoke, we have to cross a block boundary. And we have
-      // to carefully dodge no-op instructions.
-      do {
-        if (BBI == InstParent->begin()) {
-          BasicBlock *Pred = InstParent->getSinglePredecessor();
-          if (!Pred)
-            goto decline_rv_optimization;
-          BBI = Pred->getTerminator()->getIterator();
-          break;
-        }
-        --BBI;
-      } while (IsNoopInstruction(&*BBI));
-
-      if (&*BBI == GetArgRCIdentityRoot(Inst)) {
-        LLVM_DEBUG(dbgs() << "Adding inline asm marker for the return value "
-                             "optimization.\n");
-        Changed = true;
-        InlineAsm *IA = InlineAsm::get(
-            FunctionType::get(Type::getVoidTy(Inst->getContext()),
-                              /*isVarArg=*/false),
-            RVInstMarker->getString(),
-            /*Constraints=*/"", /*hasSideEffects=*/true);
-
-        createCallInst(IA, None, "", Inst, BlockColors);
-      }
-    decline_rv_optimization:
+    // If we succeed in our optimization, fall through.
+    LLVM_FALLTHROUGH;
+  case ARCInstKind::RetainRV:
+  case ARCInstKind::ClaimRV: {
+    // If we're compiling for a target which needs a special inline-asm
+    // marker to do the return value optimization, insert it now.
+    if (!RVInstMarker)
       return false;
-    }
-    case ARCInstKind::InitWeak: {
-      // objc_initWeak(p, null) => *p = null
-      CallInst *CI = cast<CallInst>(Inst);
-      if (IsNullOrUndef(CI->getArgOperand(1))) {
-        Value *Null =
-          ConstantPointerNull::get(cast<PointerType>(CI->getType()));
-        Changed = true;
-        new StoreInst(Null, CI->getArgOperand(0), CI);
-
-        LLVM_DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n"
-                          << "                 New = " << *Null << "\n");
-
-        CI->replaceAllUsesWith(Null);
-        CI->eraseFromParent();
+    BasicBlock::iterator BBI = Inst->getIterator();
+    BasicBlock *InstParent = Inst->getParent();
+
+    // Step up to see if the call immediately precedes the RV call.
+    // If it's an invoke, we have to cross a block boundary. And we have
+    // to carefully dodge no-op instructions.
+    do {
+      if (BBI == InstParent->begin()) {
+        BasicBlock *Pred = InstParent->getSinglePredecessor();
+        if (!Pred)
+          goto decline_rv_optimization;
+        BBI = Pred->getTerminator()->getIterator();
+        break;
       }
-      return true;
+      --BBI;
+    } while (IsNoopInstruction(&*BBI));
+
+    if (&*BBI == GetArgRCIdentityRoot(Inst)) {
+      LLVM_DEBUG(dbgs() << "Adding inline asm marker for the return value "
+                           "optimization.\n");
+      Changed = true;
+      InlineAsm *IA =
+          InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()),
+                                           /*isVarArg=*/false),
+                         RVInstMarker->getString(),
+                         /*Constraints=*/"", /*hasSideEffects=*/true);
+
+      createCallInst(IA, None, "", Inst, BlockColors);
     }
-    case ARCInstKind::Release:
-      // Try to form an objc store strong from our release. If we fail, there is
-      // nothing further to do below, so continue.
-      tryToContractReleaseIntoStoreStrong(Inst, Iter, BlockColors);
-      return true;
-    case ARCInstKind::User:
-      // Be conservative if the function has any alloca instructions.
-      // Technically we only care about escaping alloca instructions,
-      // but this is sufficient to handle some interesting cases.
-      if (isa<AllocaInst>(Inst))
-        TailOkForStoreStrongs = false;
-      return true;
-    case ARCInstKind::IntrinsicUser:
-      // Remove calls to @llvm.objc.clang.arc.use(...).
-      Inst->eraseFromParent();
-      return true;
-    default:
-      return true;
+  decline_rv_optimization:
+    return false;
+  }
+  case ARCInstKind::InitWeak: {
+    // objc_initWeak(p, null) => *p = null
+    CallInst *CI = cast<CallInst>(Inst);
+    if (IsNullOrUndef(CI->getArgOperand(1))) {
+      Value *Null = ConstantPointerNull::get(cast<PointerType>(CI->getType()));
+      Changed = true;
+      new StoreInst(Null, CI->getArgOperand(0), CI);
+
+      LLVM_DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n"
+                        << "                 New = " << *Null << "\n");
+
+      CI->replaceAllUsesWith(Null);
+      CI->eraseFromParent();
     }
+    return true;
+  }
+  case ARCInstKind::Release:
+    // Try to form an objc store strong from our release. If we fail, there is
+    // nothing further to do below, so continue.
+    tryToContractReleaseIntoStoreStrong(Inst, Iter, BlockColors);
+    return true;
+  case ARCInstKind::User:
+    // Be conservative if the function has any alloca instructions.
+    // Technically we only care about escaping alloca instructions,
+    // but this is sufficient to handle some interesting cases.
+    if (isa<AllocaInst>(Inst))
+      TailOkForStoreStrongs = false;
+    return true;
+  case ARCInstKind::IntrinsicUser:
+    // Remove calls to @llvm.objc.clang.arc.use(...).
+    Inst->eraseFromParent();
+    return true;
+  default:
+    return true;
+  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -568,6 +577,24 @@ bool ObjCARCContract::runOnFunction(Function &F) {
   // reduces register pressure.
   SmallPtrSet<Instruction *, 4> DependingInstructions;
   SmallPtrSet<const BasicBlock *, 4> Visited;
+
+  // Cache the basic block size.
+  DenseMap<const BasicBlock *, unsigned> BBSizeMap;
+
+  // A lambda that lazily computes the size of a basic block and determines
+  // whether the size exceeds MaxBBSize.
+  auto IsLargeBB = [&](const BasicBlock *BB) {
+    unsigned BBSize;
+    auto I = BBSizeMap.find(BB);
+
+    if (I != BBSizeMap.end())
+      BBSize = I->second;
+    else
+      BBSize = BBSizeMap[BB] = BB->size();
+
+    return BBSize > MaxBBSize;
+  };
+
   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E;) {
     Instruction *Inst = &*I++;
 
@@ -585,7 +612,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
     // and such; to do the replacement, the argument must have type i8*.
 
     // Function for replacing uses of Arg dominated by Inst.
-    auto ReplaceArgUses = [Inst, this](Value *Arg) {
+    auto ReplaceArgUses = [Inst, IsLargeBB, this](Value *Arg) {
       // If we're compiling bugpointed code, don't get in trouble.
       if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
         return;
@@ -597,6 +624,17 @@ bool ObjCARCContract::runOnFunction(Function &F) {
         Use &U = *UI++;
         unsigned OperandNo = U.getOperandNo();
 
+        // Don't replace the uses if Inst and the user belong to the same basic
+        // block and the size of the basic block is large. We don't want to call
+        // DominatorTree::dominate in that case. We can remove this check if we
+        // can use OrderedBasicBlock to compute the dominance relation between
+        // two instructions, but that's not currently possible since it doesn't
+        // recompute the instruction ordering when new instructions are inserted
+        // to the basic block.
+        if (Inst->getParent() == cast<Instruction>(U.getUser())->getParent() &&
+            IsLargeBB(Inst->getParent()))
+          continue;
+
         // If the call's return value dominates a use of the call's argument
         // value, rewrite the use to use the return value. We check for
         // reachability here because an unreachable call is considered to
@@ -737,15 +775,8 @@ bool ObjCARCContract::doInitialization(Module &M) {
   EP.init(&M);
 
   // Initialize RVInstMarker.
-  RVInstMarker = nullptr;
-  if (NamedMDNode *NMD =
-          M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
-    if (NMD->getNumOperands() == 1) {
-      const MDNode *N = NMD->getOperand(0);
-      if (N->getNumOperands() == 1)
-        if (const MDString *S = dyn_cast<MDString>(N->getOperand(0)))
-          RVInstMarker = S;
-    }
+  const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
+  RVInstMarker = dyn_cast_or_null<MDString>(M.getModuleFlag(MarkerKey));
 
   return false;
 }
diff --git a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
index 6a345ef56e1b..04e98d8f5577 100644
--- a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
@@ -1,9 +1,8 @@
 //===- ObjCARCExpand.cpp - ObjC ARC Optimization --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 9a02174556fc..6653ff0bb91a 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -1,9 +1,8 @@
 //===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -74,6 +73,11 @@ using namespace llvm::objcarc;
 
 #define DEBUG_TYPE "objc-arc-opts"
 
+static cl::opt<unsigned> MaxPtrStates("arc-opt-max-ptr-states",
+    cl::Hidden,
+    cl::desc("Maximum number of ptr states the optimizer keeps track of"),
+    cl::init(4095));
+
 /// \defgroup ARCUtilities Utility declarations/definitions specific to ARC.
 /// @{
 
@@ -220,6 +224,10 @@ namespace {
       return !PerPtrTopDown.empty();
     }
 
+    unsigned top_down_ptr_list_size() const {
+      return std::distance(top_down_ptr_begin(), top_down_ptr_end());
+    }
+
     using bottom_up_ptr_iterator = decltype(PerPtrBottomUp)::iterator;
     using const_bottom_up_ptr_iterator =
         decltype(PerPtrBottomUp)::const_iterator;
@@ -238,6 +246,10 @@ namespace {
       return !PerPtrBottomUp.empty();
     }
 
+    unsigned bottom_up_ptr_list_size() const {
+      return std::distance(bottom_up_ptr_begin(), bottom_up_ptr_end());
+    }
+
     /// Mark this block as being an entry block, which has one path from the
     /// entry by definition.
     void SetAsEntry() { TopDownPathCount = 1; }
@@ -481,6 +493,10 @@ namespace {
     /// A flag indicating whether this optimization pass should run.
     bool Run;
 
+    /// A flag indicating whether the optimization that removes or moves
+    /// retain/release pairs should be performed.
+    bool DisableRetainReleasePairing = false;
+
     /// Flags which determine whether each of the interesting runtime functions
     /// is in fact used in the current function.
     unsigned UsedInThisFunction;
@@ -642,7 +658,7 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
                        "Old = "
                     << *RetainRV << "\n");
 
-  Constant *NewDecl = EP.get(ARCRuntimeEntryPointKind::Retain);
+  Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Retain);
   cast<CallInst>(RetainRV)->setCalledFunction(NewDecl);
 
   LLVM_DEBUG(dbgs() << "New = " << *RetainRV << "\n");
@@ -691,7 +707,7 @@ void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F,
              << *AutoreleaseRV << "\n");
 
   CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV);
-  Constant *NewDecl = EP.get(ARCRuntimeEntryPointKind::Autorelease);
+  Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Autorelease);
   AutoreleaseRVCI->setCalledFunction(NewDecl);
   AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease.
   Class = ARCInstKind::Autorelease;
@@ -744,6 +760,19 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
 
     LLVM_DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n");
 
+    // Some of the ARC calls can be deleted if their arguments are global
+    // variables that are inert in ARC.
+    if (IsNoopOnGlobal(Class)) {
+      Value *Opnd = Inst->getOperand(0);
+      if (auto *GV = dyn_cast<GlobalVariable>(Opnd->stripPointerCasts()))
+        if (GV->hasAttribute("objc_arc_inert")) {
+          if (!Inst->getType()->isVoidTy())
+            Inst->replaceAllUsesWith(Opnd);
+          Inst->eraseFromParent();
+          continue;
+        }
+    }
+
     switch (Class) {
     default: break;
 
@@ -830,7 +859,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
         // Create the declaration lazily.
         LLVMContext &C = Inst->getContext();
 
-        Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
+        Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
         CallInst *NewCall = CallInst::Create(Decl, Call->getArgOperand(0), "",
                                              Call);
         NewCall->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease),
@@ -849,7 +878,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
 
     // For functions which can never be passed stack arguments, add
     // a tail keyword.
-    if (IsAlwaysTail(Class)) {
+    if (IsAlwaysTail(Class) && !cast<CallInst>(Inst)->isNoTailCall()) {
       Changed = true;
       LLVM_DEBUG(
           dbgs() << "Adding tail keyword to function since it can never be "
@@ -1273,6 +1302,13 @@ bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
     LLVM_DEBUG(dbgs() << "    Visiting " << *Inst << "\n");
 
     NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
+
+    // Bail out if the number of pointers being tracked becomes too large so
+    // that this pass can complete in a reasonable amount of time.
+    if (MyStates.bottom_up_ptr_list_size() > MaxPtrStates) {
+      DisableRetainReleasePairing = true;
+      return false;
+    }
   }
 
   // If there's a predecessor with an invoke, visit the invoke as if it were
@@ -1395,6 +1431,13 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
     LLVM_DEBUG(dbgs() << "    Visiting " << Inst << "\n");
 
     NestingDetected |= VisitInstructionTopDown(&Inst, Releases, MyStates);
+
+    // Bail out if the number of pointers being tracked becomes too large so
+    // that this pass can complete in a reasonable amount of time.
+    if (MyStates.top_down_ptr_list_size() > MaxPtrStates) {
+      DisableRetainReleasePairing = true;
+      return false;
+    }
   }
 
   LLVM_DEBUG(dbgs() << "\nState Before Checking for CFG Hazards:\n"
@@ -1501,13 +1544,19 @@ bool ObjCARCOpt::Visit(Function &F,
 
   // Use reverse-postorder on the reverse CFG for bottom-up.
   bool BottomUpNestingDetected = false;
-  for (BasicBlock *BB : llvm::reverse(ReverseCFGPostOrder))
+  for (BasicBlock *BB : llvm::reverse(ReverseCFGPostOrder)) {
     BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
+    if (DisableRetainReleasePairing)
+      return false;
+  }
 
   // Use reverse-postorder for top-down.
   bool TopDownNestingDetected = false;
-  for (BasicBlock *BB : llvm::reverse(PostOrder))
+  for (BasicBlock *BB : llvm::reverse(PostOrder)) {
     TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases);
+    if (DisableRetainReleasePairing)
+      return false;
+  }
 
   return TopDownNestingDetected && BottomUpNestingDetected;
 }
@@ -1528,7 +1577,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove,
   for (Instruction *InsertPt : ReleasesToMove.ReverseInsertPts) {
     Value *MyArg = ArgTy == ParamTy ? Arg :
                    new BitCastInst(Arg, ParamTy, "", InsertPt);
-    Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
+    Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
     CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt);
     Call->setDoesNotThrow();
     Call->setTailCall();
@@ -1541,7 +1590,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove,
   for (Instruction *InsertPt : RetainsToMove.ReverseInsertPts) {
     Value *MyArg = ArgTy == ParamTy ? Arg :
                    new BitCastInst(Arg, ParamTy, "", InsertPt);
-    Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
+    Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
     CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt);
     // Attach a clang.imprecise_release metadata tag, if appropriate.
     if (MDNode *M = ReleasesToMove.ReleaseMetadata)
@@ -1877,7 +1926,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
           Changed = true;
           // If the load has a builtin retain, insert a plain retain for it.
           if (Class == ARCInstKind::LoadWeakRetained) {
-            Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
+            Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
             CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call);
             CI->setTailCall();
           }
@@ -1906,7 +1955,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
           Changed = true;
           // If the load has a builtin retain, insert a plain retain for it.
           if (Class == ARCInstKind::LoadWeakRetained) {
-            Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
+            Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
             CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call);
             CI->setTailCall();
           }
@@ -2003,6 +2052,9 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) {
   // Analyze the CFG of the function, and all instructions.
   bool NestingDetected = Visit(F, BBStates, Retains, Releases);
 
+  if (DisableRetainReleasePairing)
+    return false;
+
   // Transform.
   bool AnyPairsCompletelyEliminated = PerformCodePlacement(BBStates, Retains,
                                                            Releases,
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
index 3004fffb9745..c6138edba95a 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
@@ -1,9 +1,8 @@
 //===- ProvenanceAnalysis.cpp - ObjC ARC Optimization ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
index 1276f564a022..8fd842fd42d6 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
@@ -1,9 +1,8 @@
 //===- ProvenanceAnalysis.h - ObjC ARC Optimization -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
index 870a5f600fd8..b768f7973b87 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
@@ -1,9 +1,8 @@
 //===- ProvenanceAnalysisEvaluator.cpp - ObjC ARC Optimization ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp
index 8a7b6a74fae2..3243481dee0d 100644
--- a/lib/Transforms/ObjCARC/PtrState.cpp
+++ b/lib/Transforms/ObjCARC/PtrState.cpp
@@ -1,9 +1,8 @@
 //===- PtrState.cpp -------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Transforms/ObjCARC/PtrState.h b/lib/Transforms/ObjCARC/PtrState.h
index f5b9b853d8e3..66614c06cb79 100644
--- a/lib/Transforms/ObjCARC/PtrState.h
+++ b/lib/Transforms/ObjCARC/PtrState.h
@@ -1,9 +1,8 @@
 //===- PtrState.h - ARC State for a Ptr -------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index b0602d96798c..7f7460c5746a 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -1,9 +1,8 @@
 //===- ADCE.cpp - Code to perform dead code elimination -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -20,9 +19,11 @@
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/IteratedDominanceFrontier.h"
 #include "llvm/Analysis/PostDominators.h"
@@ -30,7 +31,6 @@
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
@@ -136,7 +136,7 @@ class AggressiveDeadCodeElimination {
   SmallPtrSet<const Metadata *, 32> AliveScopes;
 
   /// Set of blocks with not known to have live terminators.
-  SmallPtrSet<BasicBlock *, 16> BlocksWithDeadTerminators;
+  SmallSetVector<BasicBlock *, 16> BlocksWithDeadTerminators;
 
   /// The set of blocks which we have determined whose control
   /// dependence sources must be live and which have not had
@@ -390,7 +390,7 @@ void AggressiveDeadCodeElimination::markLive(Instruction *I) {
   // Mark the containing block live
   auto &BBInfo = *Info.Block;
   if (BBInfo.Terminator == I) {
-    BlocksWithDeadTerminators.erase(BBInfo.BB);
+    BlocksWithDeadTerminators.remove(BBInfo.BB);
     // For live terminators, mark destination blocks
     // live to preserve this control flow edges.
     if (!BBInfo.UnconditionalBranch)
@@ -479,10 +479,14 @@ void AggressiveDeadCodeElimination::markLiveBranchesFromControlDependences() {
   // which currently have dead terminators that are control
   // dependence sources of a block which is in NewLiveBlocks.
 
+  const SmallPtrSet<BasicBlock *, 16> BWDT{
+      BlocksWithDeadTerminators.begin(),
+      BlocksWithDeadTerminators.end()
+  };
   SmallVector<BasicBlock *, 32> IDFBlocks;
   ReverseIDFCalculator IDFs(PDT);
   IDFs.setDefiningBlocks(NewLiveBlocks);
-  IDFs.setLiveInBlocks(BlocksWithDeadTerminators);
+  IDFs.setLiveInBlocks(BWDT);
   IDFs.calculate(IDFBlocks);
   NewLiveBlocks.clear();
 
diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 0830ff5dd042..de9a62e88c27 100644
--- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -1,10 +1,9 @@
 //===----------------------- AlignmentFromAssumptions.cpp -----------------===//
 //                  Set Load/Store Alignments From Assumptions
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp
index d3c9b9a270aa..9bd387c33e80 100644
--- a/lib/Transforms/Scalar/BDCE.cpp
+++ b/lib/Transforms/Scalar/BDCE.cpp
@@ -1,9 +1,8 @@
 //===---- BDCE.cpp - Bit-tracking dead code elimination -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -42,14 +41,17 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
          "Trivializing a non-integer value?");
 
   // Initialize the worklist with eligible direct users.
+  SmallPtrSet<Instruction *, 16> Visited;
   SmallVector<Instruction *, 16> WorkList;
   for (User *JU : I->users()) {
     // If all bits of a user are demanded, then we know that nothing below that
     // in the def-use chain needs to be changed.
     auto *J = dyn_cast<Instruction>(JU);
     if (J && J->getType()->isIntOrIntVectorTy() &&
-        !DB.getDemandedBits(J).isAllOnesValue())
+        !DB.getDemandedBits(J).isAllOnesValue()) {
+      Visited.insert(J);
       WorkList.push_back(J);
+    }
 
     // Note that we need to check for non-int types above before asking for
     // demanded bits. Normally, the only way to reach an instruction with an
@@ -62,7 +64,6 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
   }
 
   // DFS through subsequent users while tracking visits to avoid cycles.
-  SmallPtrSet<Instruction *, 16> Visited;
   while (!WorkList.empty()) {
     Instruction *J = WorkList.pop_back_val();
 
@@ -73,13 +74,11 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
     // 1. llvm.assume demands its operand, so trivializing can't change it.
     // 2. range metadata only applies to memory accesses which demand all bits.
 
-    Visited.insert(J);
-
     for (User *KU : J->users()) {
       // If all bits of a user are demanded, then we know that nothing below
       // that in the def-use chain needs to be changed.
       auto *K = dyn_cast<Instruction>(KU);
-      if (K && !Visited.count(K) && K->getType()->isIntOrIntVectorTy() &&
+      if (K && Visited.insert(K).second && K->getType()->isIntOrIntVectorTy() &&
           !DB.getDemandedBits(K).isAllOnesValue())
         WorkList.push_back(K);
     }
diff --git a/lib/Transforms/Scalar/CallSiteSplitting.cpp b/lib/Transforms/Scalar/CallSiteSplitting.cpp
index a806d6faed60..3519b000a33f 100644
--- a/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -1,9 +1,8 @@
 //===- CallSiteSplitting.cpp ----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -184,6 +183,9 @@ static SmallVector<BasicBlock *, 2> getTwoPredecessors(BasicBlock *BB) {
 }
 
 static bool canSplitCallSite(CallSite CS, TargetTransformInfo &TTI) {
+  if (CS.isConvergent() || CS.cannotDuplicate())
+    return false;
+
   // FIXME: As of now we handle only CallInst. InvokeInst could be handled
   // without too much effort.
   Instruction *Instr = CS.getInstruction();
@@ -367,7 +369,7 @@ static void splitCallSite(
     assert(Splits.size() == 2 && "Expected exactly 2 splits!");
     for (unsigned i = 0; i < Splits.size(); i++) {
       Splits[i]->getTerminator()->eraseFromParent();
-      DTU.deleteEdge(Splits[i], TailBB);
+      DTU.applyUpdatesPermissive({{DominatorTree::Delete, Splits[i], TailBB}});
     }
 
     // Erase the tail block once done with musttail patching
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index beac0d967a98..98243a23f1ef 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -1,9 +1,8 @@
 //===- ConstantHoisting.cpp - Prepare code for expensive constants --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -42,6 +41,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/BasicBlock.h"
@@ -61,6 +61,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -112,11 +113,10 @@ public:
     if (ConstHoistWithBlockFrequency)
       AU.addRequired<BlockFrequencyInfoWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
   }
 
-  void releaseMemory() override { Impl.releaseMemory(); }
-
 private:
   ConstantHoistingPass Impl;
 };
@@ -129,6 +129,7 @@ INITIALIZE_PASS_BEGIN(ConstantHoistingLegacyPass, "consthoist",
                       "Constant Hoisting", false, false)
 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist",
                     "Constant Hoisting", false, false)
@@ -151,7 +152,8 @@ bool ConstantHoistingLegacyPass::runOnFunction(Function &Fn) {
                    ConstHoistWithBlockFrequency
                        ? &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI()
                        : nullptr,
-                   Fn.getEntryBlock());
+                   Fn.getEntryBlock(),
+                   &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
 
   if (MadeChange) {
     LLVM_DEBUG(dbgs() << "********** Function after Constant Hoisting: "
@@ -211,6 +213,9 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
   // in the dominator tree from Entry to 'BB'.
   SmallPtrSet<BasicBlock *, 16> Candidates;
   for (auto BB : BBs) {
+    // Ignore unreachable basic blocks.
+    if (!DT.isReachableFromEntry(BB))
+      continue;
     Path.clear();
     // Walk up the dominator tree until Entry or another BB in BBs
     // is reached. Insert the nodes on the way to the Path.
@@ -548,7 +553,9 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
                                            ConstCandVecType::iterator &MaxCostItr) {
   unsigned NumUses = 0;
 
-  if(!Entry->getParent()->optForSize() || std::distance(S,E) > 100) {
+  bool OptForSize = Entry->getParent()->hasOptSize() ||
+                    llvm::shouldOptimizeForSize(Entry->getParent(), PSI, BFI);
+  if (!OptForSize || std::distance(S,E) > 100) {
     for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
       NumUses += ConstCand->Uses.size();
       if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)
@@ -640,8 +647,8 @@ void ConstantHoistingPass::findBaseConstants(GlobalVariable *BaseGV) {
       ConstGEPInfoMap[BaseGV] : ConstIntInfoVec;
 
   // Sort the constants by value and type. This invalidates the mapping!
-  std::stable_sort(ConstCandVec.begin(), ConstCandVec.end(),
-             [](const ConstantCandidate &LHS, const ConstantCandidate &RHS) {
+  llvm::stable_sort(ConstCandVec, [](const ConstantCandidate &LHS,
+                                     const ConstantCandidate &RHS) {
     if (LHS.ConstInt->getType() != RHS.ConstInt->getType())
       return LHS.ConstInt->getType()->getBitWidth() <
              RHS.ConstInt->getType()->getBitWidth();
@@ -824,7 +831,9 @@ bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) {
       BaseGV ? ConstGEPInfoMap[BaseGV] : ConstIntInfoVec;
   for (auto const &ConstInfo : ConstInfoVec) {
     SmallPtrSet<Instruction *, 8> IPSet = findConstantInsertionPoint(ConstInfo);
-    assert(!IPSet.empty() && "IPSet is empty");
+    // We can have an empty set if the function contains unreachable blocks.
+    if (IPSet.empty())
+      continue;
 
     unsigned UsesNum = 0;
     unsigned ReBasesNum = 0;
@@ -917,13 +926,14 @@ void ConstantHoistingPass::deleteDeadCastInst() const {
 /// Optimize expensive integer constants in the given function.
 bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
                                    DominatorTree &DT, BlockFrequencyInfo *BFI,
-                                   BasicBlock &Entry) {
+                                   BasicBlock &Entry, ProfileSummaryInfo *PSI) {
   this->TTI = &TTI;
   this->DT = &DT;
   this->BFI = BFI;
   this->DL = &Fn.getParent()->getDataLayout();
   this->Ctx = &Fn.getContext();
   this->Entry = &Entry;
+  this->PSI = PSI;
   // Collect all constant candidates.
   collectConstantCandidates(Fn);
 
@@ -948,6 +958,8 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
   // Cleanup dead instructions.
   deleteDeadCastInst();
 
+  cleanup();
+
   return MadeChange;
 }
 
@@ -958,7 +970,9 @@ PreservedAnalyses ConstantHoistingPass::run(Function &F,
   auto BFI = ConstHoistWithBlockFrequency
                  ? &AM.getResult<BlockFrequencyAnalysis>(F)
                  : nullptr;
-  if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock()))
+  auto &MAM = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+  auto *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+  if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock(), PSI))
     return PreservedAnalyses::all();
 
   PreservedAnalyses PA;
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 51032b0625f8..770321c740a0 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -1,9 +1,8 @@
 //===- ConstantProp.cpp - Code to perform Simple Constant Propagation -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index d0105701c73f..89497177524f 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -1,9 +1,8 @@
 //===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,6 +15,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LazyValueInfo.h"
@@ -27,7 +27,6 @@
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstrTypes.h"
@@ -64,8 +63,10 @@ STATISTIC(NumUDivs,     "Number of udivs whose width was decreased");
 STATISTIC(NumAShrs,     "Number of ashr converted to lshr");
 STATISTIC(NumSRems,     "Number of srem converted to urem");
 STATISTIC(NumOverflows, "Number of overflow checks removed");
+STATISTIC(NumSaturating,
+    "Number of saturating arithmetics converted to normal arithmetics");
 
-static cl::opt<bool> DontProcessAdds("cvp-dont-process-adds", cl::init(true));
+static cl::opt<bool> DontAddNoWrapFlags("cvp-dont-add-nowrap-flags", cl::init(false));
 
 namespace {
 
@@ -307,11 +308,11 @@ static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
 /// that cannot fire no matter what the incoming edge can safely be removed. If
 /// a case fires on every incoming edge then the entire switch can be removed
 /// and replaced with a branch to the case destination.
-static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI,
+static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
                           DominatorTree *DT) {
   DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
-  Value *Cond = SI->getCondition();
-  BasicBlock *BB = SI->getParent();
+  Value *Cond = I->getCondition();
+  BasicBlock *BB = I->getParent();
 
   // If the condition was defined in same block as the switch then LazyValueInfo
   // currently won't say anything useful about it, though in theory it could.
@@ -328,67 +329,72 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI,
   for (auto *Succ : successors(BB))
     SuccessorsCount[Succ]++;
 
-  for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
-    ConstantInt *Case = CI->getCaseValue();
-
-    // Check to see if the switch condition is equal to/not equal to the case
-    // value on every incoming edge, equal/not equal being the same each time.
-    LazyValueInfo::Tristate State = LazyValueInfo::Unknown;
-    for (pred_iterator PI = PB; PI != PE; ++PI) {
-      // Is the switch condition equal to the case value?
-      LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
-                                                              Cond, Case, *PI,
-                                                              BB, SI);
-      // Give up on this case if nothing is known.
-      if (Value == LazyValueInfo::Unknown) {
-        State = LazyValueInfo::Unknown;
-        break;
+  { // Scope for SwitchInstProfUpdateWrapper. It must not live during
+    // ConstantFoldTerminator() as the underlying SwitchInst can be changed.
+    SwitchInstProfUpdateWrapper SI(*I);
+
+    for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
+      ConstantInt *Case = CI->getCaseValue();
+
+      // Check to see if the switch condition is equal to/not equal to the case
+      // value on every incoming edge, equal/not equal being the same each time.
+      LazyValueInfo::Tristate State = LazyValueInfo::Unknown;
+      for (pred_iterator PI = PB; PI != PE; ++PI) {
+        // Is the switch condition equal to the case value?
+        LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
+                                                                Cond, Case, *PI,
+                                                                BB, SI);
+        // Give up on this case if nothing is known.
+        if (Value == LazyValueInfo::Unknown) {
+          State = LazyValueInfo::Unknown;
+          break;
+        }
+
+        // If this was the first edge to be visited, record that all other edges
+        // need to give the same result.
+        if (PI == PB) {
+          State = Value;
+          continue;
+        }
+
+        // If this case is known to fire for some edges and known not to fire for
+        // others then there is nothing we can do - give up.
+        if (Value != State) {
+          State = LazyValueInfo::Unknown;
+          break;
+        }
       }
 
-      // If this was the first edge to be visited, record that all other edges
-      // need to give the same result.
-      if (PI == PB) {
-        State = Value;
+      if (State == LazyValueInfo::False) {
+        // This case never fires - remove it.
+        BasicBlock *Succ = CI->getCaseSuccessor();
+        Succ->removePredecessor(BB);
+        CI = SI.removeCase(CI);
+        CE = SI->case_end();
+
+        // The condition can be modified by removePredecessor's PHI simplification
+        // logic.
+        Cond = SI->getCondition();
+
+        ++NumDeadCases;
+        Changed = true;
+        if (--SuccessorsCount[Succ] == 0)
+          DTU.applyUpdatesPermissive({{DominatorTree::Delete, BB, Succ}});
         continue;
       }
-
-      // If this case is known to fire for some edges and known not to fire for
-      // others then there is nothing we can do - give up.
-      if (Value != State) {
-        State = LazyValueInfo::Unknown;
+      if (State == LazyValueInfo::True) {
+        // This case always fires.  Arrange for the switch to be turned into an
+        // unconditional branch by replacing the switch condition with the case
+        // value.
+        SI->setCondition(Case);
+        NumDeadCases += SI->getNumCases();
+        Changed = true;
         break;
       }
-    }
 
-    if (State == LazyValueInfo::False) {
-      // This case never fires - remove it.
-      BasicBlock *Succ = CI->getCaseSuccessor();
-      Succ->removePredecessor(BB);
-      CI = SI->removeCase(CI);
-      CE = SI->case_end();
-
-      // The condition can be modified by removePredecessor's PHI simplification
-      // logic.
-      Cond = SI->getCondition();
-
-      ++NumDeadCases;
-      Changed = true;
-      if (--SuccessorsCount[Succ] == 0)
-        DTU.deleteEdge(BB, Succ);
-      continue;
-    }
-    if (State == LazyValueInfo::True) {
-      // This case always fires.  Arrange for the switch to be turned into an
-      // unconditional branch by replacing the switch condition with the case
-      // value.
-      SI->setCondition(Case);
-      NumDeadCases += SI->getNumCases();
-      Changed = true;
-      break;
+      // Increment the case iterator since we didn't delete it.
+      ++CI;
     }
-
-    // Increment the case iterator since we didn't delete it.
-    ++CI;
   }
 
   if (Changed)
@@ -399,56 +405,48 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI,
   return Changed;
 }
 
-// See if we can prove that the given overflow intrinsic will not overflow.
-static bool willNotOverflow(IntrinsicInst *II, LazyValueInfo *LVI) {
-  using OBO = OverflowingBinaryOperator;
-  auto NoWrap = [&] (Instruction::BinaryOps BinOp, unsigned NoWrapKind) {
-    Value *RHS = II->getOperand(1);
-    ConstantRange RRange = LVI->getConstantRange(RHS, II->getParent(), II);
-    ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
-        BinOp, RRange, NoWrapKind);
-    // As an optimization, do not compute LRange if we do not need it.
-    if (NWRegion.isEmptySet())
-      return false;
-    Value *LHS = II->getOperand(0);
-    ConstantRange LRange = LVI->getConstantRange(LHS, II->getParent(), II);
-    return NWRegion.contains(LRange);
-  };
-  switch (II->getIntrinsicID()) {
-  default:
-    break;
-  case Intrinsic::uadd_with_overflow:
-    return NoWrap(Instruction::Add, OBO::NoUnsignedWrap);
-  case Intrinsic::sadd_with_overflow:
-    return NoWrap(Instruction::Add, OBO::NoSignedWrap);
-  case Intrinsic::usub_with_overflow:
-    return NoWrap(Instruction::Sub, OBO::NoUnsignedWrap);
-  case Intrinsic::ssub_with_overflow:
-    return NoWrap(Instruction::Sub, OBO::NoSignedWrap);
-  }
-  return false;
+// See if we can prove that the given binary op intrinsic will not overflow.
+static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI) {
+  ConstantRange LRange = LVI->getConstantRange(
+      BO->getLHS(), BO->getParent(), BO);
+  ConstantRange RRange = LVI->getConstantRange(
+      BO->getRHS(), BO->getParent(), BO);
+  ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
+      BO->getBinaryOp(), RRange, BO->getNoWrapKind());
+  return NWRegion.contains(LRange);
 }
 
-static void processOverflowIntrinsic(IntrinsicInst *II) {
-  IRBuilder<> B(II);
-  Value *NewOp = nullptr;
-  switch (II->getIntrinsicID()) {
-  default:
-    llvm_unreachable("Unexpected instruction.");
-  case Intrinsic::uadd_with_overflow:
-  case Intrinsic::sadd_with_overflow:
-    NewOp = B.CreateAdd(II->getOperand(0), II->getOperand(1), II->getName());
-    break;
-  case Intrinsic::usub_with_overflow:
-  case Intrinsic::ssub_with_overflow:
-    NewOp = B.CreateSub(II->getOperand(0), II->getOperand(1), II->getName());
-    break;
+static void processOverflowIntrinsic(WithOverflowInst *WO) {
+  IRBuilder<> B(WO);
+  Value *NewOp = B.CreateBinOp(
+      WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), WO->getName());
+  // Constant-folding could have happened.
+  if (auto *Inst = dyn_cast<Instruction>(NewOp)) {
+    if (WO->isSigned())
+      Inst->setHasNoSignedWrap();
+    else
+      Inst->setHasNoUnsignedWrap();
   }
+
+  Value *NewI = B.CreateInsertValue(UndefValue::get(WO->getType()), NewOp, 0);
+  NewI = B.CreateInsertValue(NewI, ConstantInt::getFalse(WO->getContext()), 1);
+  WO->replaceAllUsesWith(NewI);
+  WO->eraseFromParent();
   ++NumOverflows;
-  Value *NewI = B.CreateInsertValue(UndefValue::get(II->getType()), NewOp, 0);
-  NewI = B.CreateInsertValue(NewI, ConstantInt::getFalse(II->getContext()), 1);
-  II->replaceAllUsesWith(NewI);
-  II->eraseFromParent();
+}
+
+static void processSaturatingInst(SaturatingInst *SI) {
+  BinaryOperator *BinOp = BinaryOperator::Create(
+      SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI);
+  BinOp->setDebugLoc(SI->getDebugLoc());
+  if (SI->isSigned())
+    BinOp->setHasNoSignedWrap();
+  else
+    BinOp->setHasNoUnsignedWrap();
+
+  SI->replaceAllUsesWith(BinOp);
+  SI->eraseFromParent();
+  ++NumSaturating;
 }
 
 /// Infer nonnull attributes for the arguments at the specified callsite.
@@ -456,13 +454,44 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
   SmallVector<unsigned, 4> ArgNos;
   unsigned ArgNo = 0;
 
-  if (auto *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
-    if (willNotOverflow(II, LVI)) {
-      processOverflowIntrinsic(II);
+  if (auto *WO = dyn_cast<WithOverflowInst>(CS.getInstruction())) {
+    if (WO->getLHS()->getType()->isIntegerTy() && willNotOverflow(WO, LVI)) {
+      processOverflowIntrinsic(WO);
+      return true;
+    }
+  }
+
+  if (auto *SI = dyn_cast<SaturatingInst>(CS.getInstruction())) {
+    if (SI->getType()->isIntegerTy() && willNotOverflow(SI, LVI)) {
+      processSaturatingInst(SI);
       return true;
     }
   }
 
+  // Deopt bundle operands are intended to capture state with minimal
+  // perturbance of the code otherwise.  If we can find a constant value for
+  // any such operand and remove a use of the original value, that's
+  // desireable since it may allow further optimization of that value (e.g. via
+  // single use rules in instcombine).  Since deopt uses tend to,
+  // idiomatically, appear along rare conditional paths, it's reasonable likely
+  // we may have a conditional fact with which LVI can fold.   
+  if (auto DeoptBundle = CS.getOperandBundle(LLVMContext::OB_deopt)) {
+    bool Progress = false;
+    for (const Use &ConstU : DeoptBundle->Inputs) {
+      Use &U = const_cast<Use&>(ConstU);
+      Value *V = U.get();
+      if (V->getType()->isVectorTy()) continue;
+      if (isa<Constant>(V)) continue;
+
+      Constant *C = LVI->getConstant(V, CS.getParent(), CS.getInstruction());
+      if (!C) continue;
+      U.set(C);
+      Progress = true;
+    }
+    if (Progress)
+      return true;
+  }
+
   for (Value *V : CS.args()) {
     PointerType *Type = dyn_cast<PointerType>(V->getType());
     // Try to mark pointer typed parameters as non-null.  We skip the
@@ -512,7 +541,7 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
   // Find the smallest power of two bitwidth that's sufficient to hold Instr's
   // operands.
   auto OrigWidth = Instr->getType()->getIntegerBitWidth();
-  ConstantRange OperandRange(OrigWidth, /*isFullset=*/false);
+  ConstantRange OperandRange(OrigWidth, /*isFullSet=*/false);
   for (Value *Operand : Instr->operands()) {
     OperandRange = OperandRange.unionWith(
         LVI->getConstantRange(Operand, Instr->getParent()));
@@ -603,55 +632,42 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
   return true;
 }
 
-static bool processAdd(BinaryOperator *AddOp, LazyValueInfo *LVI) {
+static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) {
   using OBO = OverflowingBinaryOperator;
 
-  if (DontProcessAdds)
+  if (DontAddNoWrapFlags)
     return false;
 
-  if (AddOp->getType()->isVectorTy())
+  if (BinOp->getType()->isVectorTy())
     return false;
 
-  bool NSW = AddOp->hasNoSignedWrap();
-  bool NUW = AddOp->hasNoUnsignedWrap();
+  bool NSW = BinOp->hasNoSignedWrap();
+  bool NUW = BinOp->hasNoUnsignedWrap();
   if (NSW && NUW)
     return false;
 
-  BasicBlock *BB = AddOp->getParent();
+  BasicBlock *BB = BinOp->getParent();
 
-  Value *LHS = AddOp->getOperand(0);
-  Value *RHS = AddOp->getOperand(1);
+  Value *LHS = BinOp->getOperand(0);
+  Value *RHS = BinOp->getOperand(1);
 
-  ConstantRange LRange = LVI->getConstantRange(LHS, BB, AddOp);
-
-  // Initialize RRange only if we need it. If we know that guaranteed no wrap
-  // range for the given LHS range is empty don't spend time calculating the
-  // range for the RHS.
-  Optional<ConstantRange> RRange;
-  auto LazyRRange = [&] () {
-      if (!RRange)
-        RRange = LVI->getConstantRange(RHS, BB, AddOp);
-      return RRange.getValue();
-  };
+  ConstantRange LRange = LVI->getConstantRange(LHS, BB, BinOp);
+  ConstantRange RRange = LVI->getConstantRange(RHS, BB, BinOp);
 
   bool Changed = false;
   if (!NUW) {
     ConstantRange NUWRange = ConstantRange::makeGuaranteedNoWrapRegion(
-        BinaryOperator::Add, LRange, OBO::NoUnsignedWrap);
-    if (!NUWRange.isEmptySet()) {
-      bool NewNUW = NUWRange.contains(LazyRRange());
-      AddOp->setHasNoUnsignedWrap(NewNUW);
-      Changed |= NewNUW;
-    }
+        BinOp->getOpcode(), RRange, OBO::NoUnsignedWrap);
+    bool NewNUW = NUWRange.contains(LRange);
+    BinOp->setHasNoUnsignedWrap(NewNUW);
+    Changed |= NewNUW;
   }
   if (!NSW) {
     ConstantRange NSWRange = ConstantRange::makeGuaranteedNoWrapRegion(
-        BinaryOperator::Add, LRange, OBO::NoSignedWrap);
-    if (!NSWRange.isEmptySet()) {
-      bool NewNSW = NSWRange.contains(LazyRRange());
-      AddOp->setHasNoSignedWrap(NewNSW);
-      Changed |= NewNSW;
-    }
+        BinOp->getOpcode(), RRange, OBO::NoSignedWrap);
+    bool NewNSW = NSWRange.contains(LRange);
+    BinOp->setHasNoSignedWrap(NewNSW);
+    Changed |= NewNSW;
   }
 
   return Changed;
@@ -725,7 +741,8 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
         BBChanged |= processAShr(cast<BinaryOperator>(II), LVI);
         break;
       case Instruction::Add:
-        BBChanged |= processAdd(cast<BinaryOperator>(II), LVI);
+      case Instruction::Sub:
+        BBChanged |= processBinOp(cast<BinaryOperator>(II), LVI);
         break;
       }
     }
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 4c964e6e888c..479e0ed74074 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -1,9 +1,8 @@
 //===- DCE.cpp - Code to perform dead code elimination --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 469930ca6a19..a81645745b48 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1,9 +1,8 @@
 //===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -29,8 +28,8 @@
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/OrderedBasicBlock.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/BasicBlock.h"
@@ -57,6 +56,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
@@ -98,9 +98,8 @@ using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>;
 static void
 deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI,
                       MemoryDependenceResults &MD, const TargetLibraryInfo &TLI,
-                      InstOverlapIntervalsTy &IOL,
-                      DenseMap<Instruction*, size_t> *InstrOrdering,
-                      SmallSetVector<Value *, 16> *ValueSet = nullptr) {
+                      InstOverlapIntervalsTy &IOL, OrderedBasicBlock &OBB,
+                      SmallSetVector<const Value *, 16> *ValueSet = nullptr) {
   SmallVector<Instruction*, 32> NowDeadInsts;
 
   NowDeadInsts.push_back(I);
@@ -136,8 +135,8 @@ deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI,
     }
 
     if (ValueSet) ValueSet->remove(DeadInst);
-    InstrOrdering->erase(DeadInst);
     IOL.erase(DeadInst);
+    OBB.eraseInstruction(DeadInst);
 
     if (NewIter == DeadInst->getIterator())
       NewIter = DeadInst->eraseFromParent();
@@ -657,8 +656,7 @@ static void findUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
 static bool handleFree(CallInst *F, AliasAnalysis *AA,
                        MemoryDependenceResults *MD, DominatorTree *DT,
                        const TargetLibraryInfo *TLI,
-                       InstOverlapIntervalsTy &IOL,
-                       DenseMap<Instruction*, size_t> *InstrOrdering) {
+                       InstOverlapIntervalsTy &IOL, OrderedBasicBlock &OBB) {
   bool MadeChange = false;
 
   MemoryLocation Loc = MemoryLocation(F->getOperand(0));
@@ -692,7 +690,7 @@ static bool handleFree(CallInst *F, AliasAnalysis *AA,
 
       // DCE instructions only used to calculate that store.
       BasicBlock::iterator BBI(Dependency);
-      deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL, InstrOrdering);
+      deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL, OBB);
       ++NumFastStores;
       MadeChange = true;
 
@@ -715,7 +713,7 @@ static bool handleFree(CallInst *F, AliasAnalysis *AA,
 /// the DeadStackObjects set. If so, they become live because the location is
 /// being loaded.
 static void removeAccessedObjects(const MemoryLocation &LoadedLoc,
-                                  SmallSetVector<Value *, 16> &DeadStackObjects,
+                                  SmallSetVector<const Value *, 16> &DeadStackObjects,
                                   const DataLayout &DL, AliasAnalysis *AA,
                                   const TargetLibraryInfo *TLI,
                                   const Function *F) {
@@ -728,12 +726,12 @@ static void removeAccessedObjects(const MemoryLocation &LoadedLoc,
   // If the kill pointer can be easily reduced to an alloca, don't bother doing
   // extraneous AA queries.
   if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
-    DeadStackObjects.remove(const_cast<Value*>(UnderlyingPointer));
+    DeadStackObjects.remove(UnderlyingPointer);
     return;
   }
 
   // Remove objects that could alias LoadedLoc.
-  DeadStackObjects.remove_if([&](Value *I) {
+  DeadStackObjects.remove_if([&](const Value *I) {
     // See if the loaded location could alias the stack location.
     MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI, F));
     return !AA->isNoAlias(StackLoc, LoadedLoc);
@@ -747,15 +745,15 @@ static void removeAccessedObjects(const MemoryLocation &LoadedLoc,
 /// store i32 1, i32* %A
 /// ret void
 static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
-                             MemoryDependenceResults *MD,
-                             const TargetLibraryInfo *TLI,
-                             InstOverlapIntervalsTy &IOL,
-                             DenseMap<Instruction*, size_t> *InstrOrdering) {
+                           MemoryDependenceResults *MD,
+                           const TargetLibraryInfo *TLI,
+                           InstOverlapIntervalsTy &IOL,
+                           OrderedBasicBlock &OBB) {
   bool MadeChange = false;
 
   // Keep track of all of the stack objects that are dead at the end of the
   // function.
-  SmallSetVector<Value*, 16> DeadStackObjects;
+  SmallSetVector<const Value*, 16> DeadStackObjects;
 
   // Find all of the alloca'd pointers in the entry block.
   BasicBlock &Entry = BB.getParent()->front();
@@ -784,12 +782,12 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
     // If we find a store, check to see if it points into a dead stack value.
     if (hasAnalyzableMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) {
       // See through pointer-to-pointer bitcasts
-      SmallVector<Value *, 4> Pointers;
+      SmallVector<const Value *, 4> Pointers;
       GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL);
 
       // Stores to stack values are valid candidates for removal.
       bool AllDead = true;
-      for (Value *Pointer : Pointers)
+      for (const Value *Pointer : Pointers)
         if (!DeadStackObjects.count(Pointer)) {
           AllDead = false;
           break;
@@ -800,7 +798,8 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
 
         LLVM_DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n  DEAD: "
                           << *Dead << "\n  Objects: ";
-                   for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(),
+                   for (SmallVectorImpl<const Value *>::iterator I =
+                            Pointers.begin(),
                         E = Pointers.end();
                         I != E; ++I) {
                      dbgs() << **I;
@@ -810,7 +809,8 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
                    << '\n');
 
         // DCE instructions only used to calculate that store.
-        deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, InstrOrdering, &DeadStackObjects);
+        deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, OBB,
+                              &DeadStackObjects);
         ++NumFastStores;
         MadeChange = true;
         continue;
@@ -821,7 +821,8 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
     if (isInstructionTriviallyDead(&*BBI, TLI)) {
       LLVM_DEBUG(dbgs() << "DSE: Removing trivially dead instruction:\n  DEAD: "
                         << *&*BBI << '\n');
-      deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, InstrOrdering, &DeadStackObjects);
+      deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, OBB,
+                            &DeadStackObjects);
       ++NumFastOther;
       MadeChange = true;
       continue;
@@ -847,7 +848,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
 
       // If the call might load from any of our allocas, then any store above
       // the call is live.
-      DeadStackObjects.remove_if([&](Value *I) {
+      DeadStackObjects.remove_if([&](const Value *I) {
         // See if the call site touches the value.
         return isRefSet(AA->getModRefInfo(
             Call, I, getPointerSize(I, DL, *TLI, BB.getParent())));
@@ -946,7 +947,9 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset,
     Value *Indices[1] = {
         ConstantInt::get(EarlierWriteLength->getType(), OffsetMoved)};
     GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
+        EarlierIntrinsic->getRawDest()->getType()->getPointerElementType(),
         EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite);
+    NewDestGEP->setDebugLoc(EarlierIntrinsic->getDebugLoc());
     EarlierIntrinsic->setDest(NewDestGEP);
     EarlierOffset = EarlierOffset + OffsetMoved;
   }
@@ -1025,7 +1028,7 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
                                const DataLayout &DL,
                                const TargetLibraryInfo *TLI,
                                InstOverlapIntervalsTy &IOL,
-                               DenseMap<Instruction*, size_t> *InstrOrdering) {
+                               OrderedBasicBlock &OBB) {
   // Must be a store instruction.
   StoreInst *SI = dyn_cast<StoreInst>(Inst);
   if (!SI)
@@ -1041,7 +1044,7 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
           dbgs() << "DSE: Remove Store Of Load from same pointer:\n  LOAD: "
                  << *DepLoad << "\n  STORE: " << *SI << '\n');
 
-      deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, InstrOrdering);
+      deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, OBB);
       ++NumRedundantStores;
       return true;
     }
@@ -1059,7 +1062,7 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
           dbgs() << "DSE: Remove null store to the calloc'ed object:\n  DEAD: "
                  << *Inst << "\n  OBJECT: " << *UnderlyingPointer << '\n');
 
-      deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, InstrOrdering);
+      deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, OBB);
       ++NumRedundantStores;
       return true;
     }
@@ -1073,11 +1076,8 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
   const DataLayout &DL = BB.getModule()->getDataLayout();
   bool MadeChange = false;
 
-  // FIXME: Maybe change this to use some abstraction like OrderedBasicBlock?
-  // The current OrderedBasicBlock can't deal with mutation at the moment.
-  size_t LastThrowingInstIndex = 0;
-  DenseMap<Instruction*, size_t> InstrOrdering;
-  size_t InstrIndex = 1;
+  OrderedBasicBlock OBB(&BB);
+  Instruction *LastThrowing = nullptr;
 
   // A map of interval maps representing partially-overwritten value parts.
   InstOverlapIntervalsTy IOL;
@@ -1086,7 +1086,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
   for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
     // Handle 'free' calls specially.
     if (CallInst *F = isFreeCall(&*BBI, TLI)) {
-      MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL, &InstrOrdering);
+      MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL, OBB);
       // Increment BBI after handleFree has potentially deleted instructions.
       // This ensures we maintain a valid iterator.
       ++BBI;
@@ -1095,10 +1095,8 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
 
     Instruction *Inst = &*BBI++;
 
-    size_t CurInstNumber = InstrIndex++;
-    InstrOrdering.insert(std::make_pair(Inst, CurInstNumber));
     if (Inst->mayThrow()) {
-      LastThrowingInstIndex = CurInstNumber;
+      LastThrowing = Inst;
       continue;
     }
 
@@ -1107,13 +1105,13 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
       continue;
 
     // eliminateNoopStore will update in iterator, if necessary.
-    if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, &InstrOrdering)) {
+    if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, OBB)) {
       MadeChange = true;
       continue;
     }
 
     // If we find something that writes memory, get its memory dependence.
-    MemDepResult InstDep = MD->getDependency(Inst);
+    MemDepResult InstDep = MD->getDependency(Inst, &OBB);
 
     // Ignore any store where we can't find a local dependence.
     // FIXME: cross-block DSE would be fun. :)
@@ -1158,9 +1156,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
       // If the underlying object is a non-escaping memory allocation, any store
       // to it is dead along the unwind edge. Otherwise, we need to preserve
       // the store.
-      size_t DepIndex = InstrOrdering.lookup(DepWrite);
-      assert(DepIndex && "Unexpected instruction");
-      if (DepIndex <= LastThrowingInstIndex) {
+      if (LastThrowing && OBB.dominates(DepWrite, LastThrowing)) {
         const Value* Underlying = GetUnderlyingObject(DepLoc.Ptr, DL);
         bool IsStoreDeadOnUnwind = isa<AllocaInst>(Underlying);
         if (!IsStoreDeadOnUnwind) {
@@ -1191,12 +1187,12 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
                             << "\n  KILLER: " << *Inst << '\n');
 
           // Delete the store and now-dead instructions that feed it.
-          deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, &InstrOrdering);
+          deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, OBB);
           ++NumFastStores;
           MadeChange = true;
 
           // We erased DepWrite; start over.
-          InstDep = MD->getDependency(Inst);
+          InstDep = MD->getDependency(Inst, &OBB);
           continue;
         } else if ((OR == OW_End && isShortenableAtTheEnd(DepWrite)) ||
                    ((OR == OW_Begin &&
@@ -1215,12 +1211,17 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
           auto *Earlier = dyn_cast<StoreInst>(DepWrite);
           auto *Later = dyn_cast<StoreInst>(Inst);
           if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) &&
+              DL.typeSizeEqualsStoreSize(
+                  Earlier->getValueOperand()->getType()) &&
               Later && isa<ConstantInt>(Later->getValueOperand()) &&
+              DL.typeSizeEqualsStoreSize(
+                  Later->getValueOperand()->getType()) &&
               memoryIsNotModifiedBetween(Earlier, Later, AA)) {
             // If the store we find is:
             //   a) partially overwritten by the store to 'Loc'
             //   b) the later store is fully contained in the earlier one and
             //   c) they both have a constant value
+            //   d) none of the two stores need padding
             // Merge the two stores, replacing the earlier store's value with a
             // merge of both values.
             // TODO: Deal with other constant types (vectors, etc), and probably
@@ -1264,14 +1265,11 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
             ++NumModifiedStores;
 
             // Remove earlier, wider, store
-            size_t Idx = InstrOrdering.lookup(DepWrite);
-            InstrOrdering.erase(DepWrite);
-            InstrOrdering.insert(std::make_pair(SI, Idx));
+            OBB.replaceInstruction(DepWrite, SI);
 
             // Delete the old stores and now-dead instructions that feed them.
-            deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, &InstrOrdering);
-            deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL,
-                                  &InstrOrdering);
+            deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, OBB);
+            deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, OBB);
             MadeChange = true;
 
             // We erased DepWrite and Inst (Loc); start over.
@@ -1306,7 +1304,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
   // If this block ends in a return, unwind, or unreachable, all allocas are
   // dead at its end, which means stores to them are also dead.
   if (BB.getTerminator()->getNumSuccessors() == 0)
-    MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL, &InstrOrdering);
+    MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL, OBB);
 
   return MadeChange;
 }
diff --git a/lib/Transforms/Scalar/DivRemPairs.cpp b/lib/Transforms/Scalar/DivRemPairs.cpp
index ffcf34f1cf7a..876681b4f9de 100644
--- a/lib/Transforms/Scalar/DivRemPairs.cpp
+++ b/lib/Transforms/Scalar/DivRemPairs.cpp
@@ -1,9 +1,8 @@
 //===- DivRemPairs.cpp - Hoist/decompose division and remainder -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 1f09979b3382..f1f075257020 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -1,9 +1,8 @@
 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -76,6 +75,16 @@ STATISTIC(NumDSE,      "Number of trivial dead stores removed");
 DEBUG_COUNTER(CSECounter, "early-cse",
               "Controls which instructions are removed");
 
+static cl::opt<unsigned> EarlyCSEMssaOptCap(
+    "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,
+    cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
+             "for faster compile. Caps the MemorySSA clobbering calls."));
+
+static cl::opt<bool> EarlyCSEDebugHash(
+    "earlycse-debug-hash", cl::init(false), cl::Hidden,
+    cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
+             "function is well-behaved w.r.t. its isEqual predicate"));
+
 //===----------------------------------------------------------------------===//
 // SimpleValue
 //===----------------------------------------------------------------------===//
@@ -126,7 +135,33 @@ template <> struct DenseMapInfo<SimpleValue> {
 
 } // end namespace llvm
 
-unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
+/// Match a 'select' including an optional 'not's of the condition.
+static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
+                                           Value *&B,
+                                           SelectPatternFlavor &Flavor) {
+  // Return false if V is not even a select.
+  if (!match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))))
+    return false;
+
+  // Look through a 'not' of the condition operand by swapping A/B.
+  Value *CondNot;
+  if (match(Cond, m_Not(m_Value(CondNot)))) {
+    Cond = CondNot;
+    std::swap(A, B);
+  }
+
+  // Set flavor if we find a match, or set it to unknown otherwise; in
+  // either case, return true to indicate that this is a select we can
+  // process.
+  if (auto *CmpI = dyn_cast<ICmpInst>(Cond))
+    Flavor = matchDecomposedSelectPattern(CmpI, A, B, A, B).Flavor;
+  else
+    Flavor = SPF_UNKNOWN;
+
+  return true;
+}
+
+static unsigned getHashValueImpl(SimpleValue Val) {
   Instruction *Inst = Val.Inst;
   // Hash in all of the operands as pointers.
   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
@@ -139,32 +174,56 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
   }
 
   if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
+    // Compares can be commuted by swapping the comparands and
+    // updating the predicate.  Choose the form that has the
+    // comparands in sorted order, or in the case of a tie, the
+    // one with the lower predicate.
     Value *LHS = CI->getOperand(0);
     Value *RHS = CI->getOperand(1);
     CmpInst::Predicate Pred = CI->getPredicate();
-    if (Inst->getOperand(0) > Inst->getOperand(1)) {
+    CmpInst::Predicate SwappedPred = CI->getSwappedPredicate();
+    if (std::tie(LHS, Pred) > std::tie(RHS, SwappedPred)) {
       std::swap(LHS, RHS);
-      Pred = CI->getSwappedPredicate();
+      Pred = SwappedPred;
     }
     return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
   }
 
-  // Hash min/max/abs (cmp + select) to allow for commuted operands.
-  // Min/max may also have non-canonical compare predicate (eg, the compare for
-  // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
-  // compare.
-  Value *A, *B;
-  SelectPatternFlavor SPF = matchSelectPattern(Inst, A, B).Flavor;
-  // TODO: We should also detect FP min/max.
-  if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
-      SPF == SPF_UMIN || SPF == SPF_UMAX) {
-    if (A > B)
+  // Hash general selects to allow matching commuted true/false operands.
+  SelectPatternFlavor SPF;
+  Value *Cond, *A, *B;
+  if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {
+    // Hash min/max/abs (cmp + select) to allow for commuted operands.
+    // Min/max may also have non-canonical compare predicate (eg, the compare for
+    // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
+    // compare.
+    // TODO: We should also detect FP min/max.
+    if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
+        SPF == SPF_UMIN || SPF == SPF_UMAX) {
+      if (A > B)
+        std::swap(A, B);
+      return hash_combine(Inst->getOpcode(), SPF, A, B);
+    }
+    if (SPF == SPF_ABS || SPF == SPF_NABS) {
+      // ABS/NABS always puts the input in A and its negation in B.
+      return hash_combine(Inst->getOpcode(), SPF, A, B);
+    }
+
+    // Hash general selects to allow matching commuted true/false operands.
+
+    // If we do not have a compare as the condition, just hash in the condition.
+    CmpInst::Predicate Pred;
+    Value *X, *Y;
+    if (!match(Cond, m_Cmp(Pred, m_Value(X), m_Value(Y))))
+      return hash_combine(Inst->getOpcode(), Cond, A, B);
+
+    // Similar to cmp normalization (above) - canonicalize the predicate value:
+    // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
+    if (CmpInst::getInversePredicate(Pred) < Pred) {
+      Pred = CmpInst::getInversePredicate(Pred);
       std::swap(A, B);
-    return hash_combine(Inst->getOpcode(), SPF, A, B);
-  }
-  if (SPF == SPF_ABS || SPF == SPF_NABS) {
-    // ABS/NABS always puts the input in A and its negation in B.
-    return hash_combine(Inst->getOpcode(), SPF, A, B);
+    }
+    return hash_combine(Inst->getOpcode(), Pred, X, Y, A, B);
   }
 
   if (CastInst *CI = dyn_cast<CastInst>(Inst))
@@ -179,8 +238,7 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
                         IVI->getOperand(1),
                         hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
 
-  assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
-          isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
+  assert((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
           isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
           isa<ShuffleVectorInst>(Inst)) &&
          "Invalid/unknown instruction");
@@ -191,7 +249,19 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
 }
 
-bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
+unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
+#ifndef NDEBUG
+  // If -earlycse-debug-hash was specified, return a constant -- this
+  // will force all hashing to collide, so we'll exhaustively search
+  // the table for a match, and the assertion in isEqual will fire if
+  // there's a bug causing equal keys to hash differently.
+  if (EarlyCSEDebugHash)
+    return 0;
+#endif
+  return getHashValueImpl(Val);
+}
+
+static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
 
   if (LHS.isSentinel() || RHS.isSentinel())
@@ -227,26 +297,68 @@ bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
 
   // Min/max/abs can occur with commuted operands, non-canonical predicates,
   // and/or non-canonical operands.
-  Value *LHSA, *LHSB;
-  SelectPatternFlavor LSPF = matchSelectPattern(LHSI, LHSA, LHSB).Flavor;
-  // TODO: We should also detect FP min/max.
-  if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
-      LSPF == SPF_UMIN || LSPF == SPF_UMAX ||
-      LSPF == SPF_ABS || LSPF == SPF_NABS) {
-    Value *RHSA, *RHSB;
-    SelectPatternFlavor RSPF = matchSelectPattern(RHSI, RHSA, RHSB).Flavor;
+  // Selects can be non-trivially equivalent via inverted conditions and swaps.
+  SelectPatternFlavor LSPF, RSPF;
+  Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;
+  if (matchSelectWithOptionalNotCond(LHSI, CondL, LHSA, LHSB, LSPF) &&
+      matchSelectWithOptionalNotCond(RHSI, CondR, RHSA, RHSB, RSPF)) {
     if (LSPF == RSPF) {
-      // Abs results are placed in a defined order by matchSelectPattern.
-      if (LSPF == SPF_ABS || LSPF == SPF_NABS)
+      // TODO: We should also detect FP min/max.
+      if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
+          LSPF == SPF_UMIN || LSPF == SPF_UMAX)
+        return ((LHSA == RHSA && LHSB == RHSB) ||
+                (LHSA == RHSB && LHSB == RHSA));
+
+      if (LSPF == SPF_ABS || LSPF == SPF_NABS) {
+        // Abs results are placed in a defined order by matchSelectPattern.
         return LHSA == RHSA && LHSB == RHSB;
-      return ((LHSA == RHSA && LHSB == RHSB) ||
-              (LHSA == RHSB && LHSB == RHSA));
+      }
+
+      // select Cond, A, B <--> select not(Cond), B, A
+      if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
+        return true;
+    }
+
+    // If the true/false operands are swapped and the conditions are compares
+    // with inverted predicates, the selects are equal:
+    // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
+    //
+    // This also handles patterns with a double-negation in the sense of not +
+    // inverse, because we looked through a 'not' in the matching function and
+    // swapped A/B:
+    // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
+    //
+    // This intentionally does NOT handle patterns with a double-negation in
+    // the sense of not + not, because doing so could result in values
+    // comparing
+    // as equal that hash differently in the min/max/abs cases like:
+    // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
+    //   ^ hashes as min                  ^ would not hash as min
+    // In the context of the EarlyCSE pass, however, such cases never reach
+    // this code, as we simplify the double-negation before hashing the second
+    // select (and so still succeed at CSEing them).
+    if (LHSA == RHSB && LHSB == RHSA) {
+      CmpInst::Predicate PredL, PredR;
+      Value *X, *Y;
+      if (match(CondL, m_Cmp(PredL, m_Value(X), m_Value(Y))) &&
+          match(CondR, m_Cmp(PredR, m_Specific(X), m_Specific(Y))) &&
+          CmpInst::getInversePredicate(PredL) == PredR)
+        return true;
     }
   }
 
   return false;
 }
 
+bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
+  // These comparisons are nontrivial, so assert that equality implies
+  // hash equality (DenseMap demands this as an invariant).
+  bool Result = isEqualImpl(LHS, RHS);
+  assert(!Result || (LHS.isSentinel() && LHS.Inst == RHS.Inst) ||
+         getHashValueImpl(LHS) == getHashValueImpl(RHS));
+  return Result;
+}
+
 //===----------------------------------------------------------------------===//
 // CallValue
 //===----------------------------------------------------------------------===//
@@ -419,6 +531,7 @@ public:
   bool run();
 
 private:
+  unsigned ClobberCounter = 0;
   // Almost a POD, but needs to call the constructors for the scoped hash
   // tables so that a new scope gets pushed on. These are RAII so that the
   // scope gets popped when the NodeScope is destroyed.
@@ -608,36 +721,11 @@ private:
       MSSA->verifyMemorySSA();
     // Removing a store here can leave MemorySSA in an unoptimized state by
     // creating MemoryPhis that have identical arguments and by creating
-    // MemoryUses whose defining access is not an actual clobber.  We handle the
-    // phi case eagerly here.  The non-optimized MemoryUse case is lazily
-    // updated by MemorySSA getClobberingMemoryAccess.
-    if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst)) {
-      // Optimize MemoryPhi nodes that may become redundant by having all the
-      // same input values once MA is removed.
-      SmallSetVector<MemoryPhi *, 4> PhisToCheck;
-      SmallVector<MemoryAccess *, 8> WorkQueue;
-      WorkQueue.push_back(MA);
-      // Process MemoryPhi nodes in FIFO order using a ever-growing vector since
-      // we shouldn't be processing that many phis and this will avoid an
-      // allocation in almost all cases.
-      for (unsigned I = 0; I < WorkQueue.size(); ++I) {
-        MemoryAccess *WI = WorkQueue[I];
-
-        for (auto *U : WI->users())
-          if (MemoryPhi *MP = dyn_cast<MemoryPhi>(U))
-            PhisToCheck.insert(MP);
-
-        MSSAUpdater->removeMemoryAccess(WI);
-
-        for (MemoryPhi *MP : PhisToCheck) {
-          MemoryAccess *FirstIn = MP->getIncomingValue(0);
-          if (llvm::all_of(MP->incoming_values(),
-                           [=](Use &In) { return In == FirstIn; }))
-            WorkQueue.push_back(MP);
-        }
-        PhisToCheck.clear();
-      }
-    }
+    // MemoryUses whose defining access is not an actual clobber. The phi case
+    // is handled by MemorySSA when passing OptimizePhis = true to
+    // removeMemoryAccess.  The non-optimized MemoryUse case is lazily updated
+    // by MemorySSA's getClobberingMemoryAccess.
+    MSSAUpdater->removeMemoryAccess(Inst, true);
   }
 };
 
@@ -688,8 +776,13 @@ bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
   // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
   // EarlierInst and LaterInst and neither can any other write that potentially
   // clobbers LaterInst.
-  MemoryAccess *LaterDef =
-      MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
+  MemoryAccess *LaterDef;
+  if (ClobberCounter < EarlyCSEMssaOptCap) {
+    LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
+    ClobberCounter++;
+  } else
+    LaterDef = LaterMA->getDefiningAccess();
+
   return MSSA->dominates(LaterDef, EarlierMA);
 }
 
@@ -1117,7 +1210,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
         // At the moment, we don't remove ordered stores, but do remove
         // unordered atomic stores.  There's no special requirement (for
         // unordered atomics) about removing atomic stores only in favor of
-        // other atomic stores since we we're going to execute the non-atomic
+        // other atomic stores since we were going to execute the non-atomic
         // one anyway and the atomic one might never have become visible.
         if (LastStore) {
           ParseMemoryInst LastStoreMemInst(LastStore, TTI);
@@ -1184,8 +1277,7 @@ bool EarlyCSE::run() {
       CurrentGeneration, DT.getRootNode(),
       DT.getRootNode()->begin(), DT.getRootNode()->end()));
 
-  // Save the current generation.
-  unsigned LiveOutGeneration = CurrentGeneration;
+  assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
 
   // Process the stack.
   while (!nodesToProcess.empty()) {
@@ -1217,9 +1309,6 @@ bool EarlyCSE::run() {
     }
   } // while (!nodes...)
 
-  // Reset the current generation.
-  CurrentGeneration = LiveOutGeneration;
-
   return Changed;
 }
 
diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp
index 117b19fb8a42..31670b1464e4 100644
--- a/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -1,9 +1,8 @@
 //===- FlattenCFGPass.cpp - CFG Flatten Pass ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp
index f2828e80bc58..4f83e869b303 100644
--- a/lib/Transforms/Scalar/Float2Int.cpp
+++ b/lib/Transforms/Scalar/Float2Int.cpp
@@ -1,9 +1,8 @@
 //===- Float2Int.cpp - Demote floating point ops to work on integers ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -148,10 +147,10 @@ void Float2IntPass::seen(Instruction *I, ConstantRange R) {
 
 // Helper - get a range representing a poison value.
 ConstantRange Float2IntPass::badRange() {
-  return ConstantRange(MaxIntegerBW + 1, true);
+  return ConstantRange::getFull(MaxIntegerBW + 1);
 }
 ConstantRange Float2IntPass::unknownRange() {
-  return ConstantRange(MaxIntegerBW + 1, false);
+  return ConstantRange::getEmpty(MaxIntegerBW + 1);
 }
 ConstantRange Float2IntPass::validateRange(ConstantRange R) {
   if (R.getBitWidth() > MaxIntegerBW + 1)
@@ -195,12 +194,13 @@ void Float2IntPass::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
       // Path terminated cleanly - use the type of the integer input to seed
       // the analysis.
       unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
-      auto Input = ConstantRange(BW, true);
+      auto Input = ConstantRange::getFull(BW);
       auto CastOp = (Instruction::CastOps)I->getOpcode();
       seen(I, validateRange(Input.castOp(CastOp, MaxIntegerBW+1)));
       continue;
     }
 
+    case Instruction::FNeg:
     case Instruction::FAdd:
     case Instruction::FSub:
     case Instruction::FMul:
@@ -241,6 +241,15 @@ void Float2IntPass::walkForwards() {
     case Instruction::SIToFP:
       llvm_unreachable("Should have been handled in walkForwards!");
 
+    case Instruction::FNeg:
+      Op = [](ArrayRef<ConstantRange> Ops) {
+        assert(Ops.size() == 1 && "FNeg is a unary operator!");
+        unsigned Size = Ops[0].getBitWidth();
+        auto Zero = ConstantRange(APInt::getNullValue(Size));
+        return Zero.sub(Ops[0]);
+      };
+      break;
+
     case Instruction::FAdd:
     case Instruction::FSub:
     case Instruction::FMul:
@@ -427,7 +436,7 @@ Value *Float2IntPass::convert(Instruction *I, Type *ToTy) {
     } else if (Instruction *VI = dyn_cast<Instruction>(V)) {
       NewOperands.push_back(convert(VI, ToTy));
     } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
-      APSInt Val(ToTy->getPrimitiveSizeInBits(), /*IsUnsigned=*/false);
+      APSInt Val(ToTy->getPrimitiveSizeInBits(), /*isUnsigned=*/false);
       bool Exact;
       CF->getValueAPF().convertToInteger(Val,
                                          APFloat::rmNearestTiesToEven,
@@ -467,6 +476,10 @@ Value *Float2IntPass::convert(Instruction *I, Type *ToTy) {
     NewV = IRB.CreateSExtOrTrunc(NewOperands[0], ToTy);
     break;
 
+  case Instruction::FNeg:
+    NewV = IRB.CreateNeg(NewOperands[0], I->getName());
+    break;
+
   case Instruction::FAdd:
   case Instruction::FSub:
   case Instruction::FMul:
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 9861948c8297..1a02e9d33f49 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1,9 +1,8 @@
 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -30,6 +29,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -46,8 +46,8 @@
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
@@ -330,36 +330,15 @@ GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
   e.type = EI->getType();
   e.opcode = 0;
 
-  IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
-  if (I != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
-    // EI might be an extract from one of our recognised intrinsics. If it
-    // is we'll synthesize a semantically equivalent expression instead on
-    // an extract value expression.
-    switch (I->getIntrinsicID()) {
-      case Intrinsic::sadd_with_overflow:
-      case Intrinsic::uadd_with_overflow:
-        e.opcode = Instruction::Add;
-        break;
-      case Intrinsic::ssub_with_overflow:
-      case Intrinsic::usub_with_overflow:
-        e.opcode = Instruction::Sub;
-        break;
-      case Intrinsic::smul_with_overflow:
-      case Intrinsic::umul_with_overflow:
-        e.opcode = Instruction::Mul;
-        break;
-      default:
-        break;
-    }
-
-    if (e.opcode != 0) {
-      // Intrinsic recognized. Grab its args to finish building the expression.
-      assert(I->getNumArgOperands() == 2 &&
-             "Expect two args for recognised intrinsics.");
-      e.varargs.push_back(lookupOrAdd(I->getArgOperand(0)));
-      e.varargs.push_back(lookupOrAdd(I->getArgOperand(1)));
-      return e;
-    }
+  WithOverflowInst *WO = dyn_cast<WithOverflowInst>(EI->getAggregateOperand());
+  if (WO != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
+    // EI is an extract from one of our with.overflow intrinsics. Synthesize
+    // a semantically equivalent expression instead of an extract value
+    // expression.
+    e.opcode = WO->getBinaryOp();
+    e.varargs.push_back(lookupOrAdd(WO->getLHS()));
+    e.varargs.push_back(lookupOrAdd(WO->getRHS()));
+    return e;
   }
 
   // Not a recognised intrinsic. Fall back to producing an extract value
@@ -513,6 +492,7 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
   switch (I->getOpcode()) {
     case Instruction::Call:
       return lookupOrAddCall(cast<CallInst>(I));
+    case Instruction::FNeg:
     case Instruction::Add:
     case Instruction::FAdd:
     case Instruction::Sub:
@@ -544,6 +524,7 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
     case Instruction::FPExt:
     case Instruction::PtrToInt:
     case Instruction::IntToPtr:
+    case Instruction::AddrSpaceCast:
     case Instruction::BitCast:
     case Instruction::Select:
     case Instruction::ExtractElement:
@@ -879,11 +860,12 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
 
   const DataLayout &DL = LI->getModule()->getDataLayout();
 
+  Instruction *DepInst = DepInfo.getInst();
   if (DepInfo.isClobber()) {
     // If the dependence is to a store that writes to a superset of the bits
     // read by the load, we can extract the bits we need for the load from the
     // stored value.
-    if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
+    if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
       // Can't forward from non-atomic to atomic without violating memory model.
       if (Address && LI->isAtomic() <= DepSI->isAtomic()) {
         int Offset =
@@ -899,7 +881,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
     //    load i32* P
     //    load i8* (P+1)
     // if we have this, replace the later with an extraction from the former.
-    if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
+    if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
       // If this is a clobber and L is the first instruction in its block, then
       // we have the first instruction in the entry block.
       // Can't forward from non-atomic to atomic without violating memory model.
@@ -916,7 +898,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
 
     // If the clobbering value is a memset/memcpy/memmove, see if we can
     // forward a value on from it.
-    if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
+    if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInst)) {
       if (Address && !LI->isAtomic()) {
         int Offset = analyzeLoadFromClobberingMemInst(LI->getType(), Address,
                                                       DepMI, DL);
@@ -930,8 +912,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
     LLVM_DEBUG(
         // fast print dep, using operator<< on instruction is too slow.
         dbgs() << "GVN: load "; LI->printAsOperand(dbgs());
-        Instruction *I = DepInfo.getInst();
-        dbgs() << " is clobbered by " << *I << '\n';);
+        dbgs() << " is clobbered by " << *DepInst << '\n';);
     if (ORE->allowExtraAnalysis(DEBUG_TYPE))
       reportMayClobberedLoad(LI, DepInfo, DT, ORE);
 
@@ -939,8 +920,6 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
   }
   assert(DepInfo.isDef() && "follows from above");
 
-  Instruction *DepInst = DepInfo.getInst();
-
   // Loading the allocation -> undef.
   if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) ||
       // Loading immediately after lifetime begin -> undef.
@@ -959,9 +938,8 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
     // Reject loads and stores that are to the same address but are of
     // different types if we have to. If the stored value is larger or equal to
     // the loaded value, we can reuse it.
-    if (S->getValueOperand()->getType() != LI->getType() &&
-        !canCoerceMustAliasedValueToLoad(S->getValueOperand(),
-                                         LI->getType(), DL))
+    if (!canCoerceMustAliasedValueToLoad(S->getValueOperand(), LI->getType(),
+                                         DL))
       return false;
 
     // Can't forward from non-atomic to atomic without violating memory model.
@@ -976,8 +954,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
     // If the types mismatch and we can't handle it, reject reuse of the load.
     // If the stored value is larger or equal to the loaded value, we can reuse
     // it.
-    if (LD->getType() != LI->getType() &&
-        !canCoerceMustAliasedValueToLoad(LD, LI->getType(), DL))
+    if (!canCoerceMustAliasedValueToLoad(LD, LI->getType(), DL))
       return false;
 
     // Can't forward from non-atomic to atomic without violating memory model.
@@ -1132,6 +1109,14 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
         return false;
       }
 
+      // FIXME: Can we support the fallthrough edge?
+      if (isa<CallBrInst>(Pred->getTerminator())) {
+        LLVM_DEBUG(
+            dbgs() << "COULD NOT PRE LOAD BECAUSE OF CALLBR CRITICAL EDGE '"
+                   << Pred->getName() << "': " << *LI << '\n');
+        return false;
+      }
+
       if (LoadBB->isEHPad()) {
         LLVM_DEBUG(
             dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
@@ -1220,9 +1205,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
     // Instructions that have been inserted in predecessor(s) to materialize
     // the load address do not retain their original debug locations. Doing
     // so could lead to confusing (but correct) source attributions.
-    // FIXME: How do we retain source locations without causing poor debugging
-    // behavior?
-    I->setDebugLoc(DebugLoc());
+    if (const DebugLoc &DL = I->getDebugLoc())
+      I->setDebugLoc(DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt()));
 
     // FIXME: We really _ought_ to insert these value numbers into their
     // parent's availability map.  However, in doing so, we risk getting into
@@ -1235,10 +1219,10 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
     BasicBlock *UnavailablePred = PredLoad.first;
     Value *LoadPtr = PredLoad.second;
 
-    auto *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre",
-                                 LI->isVolatile(), LI->getAlignment(),
-                                 LI->getOrdering(), LI->getSyncScopeID(),
-                                 UnavailablePred->getTerminator());
+    auto *NewLoad =
+        new LoadInst(LI->getType(), LoadPtr, LI->getName() + ".pre",
+                     LI->isVolatile(), LI->getAlignment(), LI->getOrdering(),
+                     LI->getSyncScopeID(), UnavailablePred->getTerminator());
     NewLoad->setDebugLoc(LI->getDebugLoc());
 
     // Transfer the old load's AA tags to the new load.
@@ -2168,8 +2152,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
     return false;
 
   // We don't currently value number ANY inline asm calls.
-  if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
-    if (CallI->isInlineAsm())
+  if (auto *CallB = dyn_cast<CallBase>(CurInst))
+    if (CallB->isInlineAsm())
       return false;
 
   uint32_t ValNo = VN.lookup(CurInst);
@@ -2252,6 +2236,11 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
     if (isa<IndirectBrInst>(PREPred->getTerminator()))
       return false;
 
+    // Don't do PRE across callbr.
+    // FIXME: Can we do this across the fallthrough edge?
+    if (isa<CallBrInst>(PREPred->getTerminator()))
+      return false;
+
     // We can't do PRE safely on a critical edge, so instead we schedule
     // the edge to be split and perform the PRE the next time we iterate
     // on the function.
@@ -2479,8 +2468,7 @@ void GVN::addDeadBlock(BasicBlock *BB) {
 
       for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) {
         PHINode &Phi = cast<PHINode>(*II);
-        Phi.setIncomingValue(Phi.getBasicBlockIndex(P),
-                             UndefValue::get(Phi.getType()));
+        Phi.setIncomingValueForBlock(P, UndefValue::get(Phi.getType()));
         if (MD)
           MD->invalidateCachedPointerInfo(&Phi);
       }
diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp
index 76a42d7fe750..7614599653c4 100644
--- a/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/lib/Transforms/Scalar/GVNHoist.cpp
@@ -1,9 +1,8 @@
 //===- GVNHoist.cpp - Hoist scalar and load expressions -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -703,7 +702,7 @@ private:
       // Vector of PHIs contains PHIs for different instructions.
       // Sort the args according to their VNs, such that identical
       // instructions are together.
-      std::stable_sort(CHIs.begin(), CHIs.end(), cmpVN);
+      llvm::stable_sort(CHIs, cmpVN);
       auto TI = BB->getTerminator();
       auto B = CHIs.begin();
       // [PreIt, PHIIt) form a range of CHIs which have identical VNs.
diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp
index 1df5f5400c14..054025755c69 100644
--- a/lib/Transforms/Scalar/GVNSink.cpp
+++ b/lib/Transforms/Scalar/GVNSink.cpp
@@ -1,9 +1,8 @@
 //===- GVNSink.cpp - sink expressions into successors ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -442,6 +441,7 @@ public:
       break;
     case Instruction::Call:
     case Instruction::Invoke:
+    case Instruction::FNeg:
     case Instruction::Add:
     case Instruction::FAdd:
     case Instruction::Sub:
@@ -714,6 +714,15 @@ Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
   // FIXME: If any of these fail, we should partition up the candidates to
   // try and continue making progress.
   Instruction *I0 = NewInsts[0];
+
+  // If all instructions that are going to participate don't have the same
+  // number of operands, we can't do any useful PHI analysis for all operands.
+  auto hasDifferentNumOperands = [&I0](Instruction *I) {
+    return I->getNumOperands() != I0->getNumOperands();
+  };
+  if (any_of(NewInsts, hasDifferentNumOperands))
+    return None;
+
   for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
     ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
     if (PHI.areAllIncomingValuesSame())
@@ -791,10 +800,7 @@ unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
     --LRI;
   }
 
-  std::stable_sort(
-      Candidates.begin(), Candidates.end(),
-      [](const SinkingInstructionCandidate &A,
-         const SinkingInstructionCandidate &B) { return A > B; });
+  llvm::stable_sort(Candidates, std::greater<SinkingInstructionCandidate>());
   LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
                                                          : Candidates) dbgs()
                                                     << "  " << C << "\n";);
diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp
index efc204d4f74b..e14f44bb7069 100644
--- a/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/lib/Transforms/Scalar/GuardWidening.cpp
@@ -1,9 +1,8 @@
 //===- GuardWidening.cpp - ---- Guard widening ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -83,6 +82,11 @@ static cl::opt<unsigned> FrequentBranchThreshold(
              "it is considered frequently taken"),
     cl::init(1000));
 
+static cl::opt<bool>
+    WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden,
+                      cl::desc("Whether or not we should widen guards  "
+                               "expressed as branches by widenable conditions"),
+                      cl::init(true));
 
 namespace {
 
@@ -93,6 +97,10 @@ static Value *getCondition(Instruction *I) {
            "Bad guard intrinsic?");
     return GI->getArgOperand(0);
   }
+  if (isGuardAsWidenableBranch(I)) {
+    auto *Cond = cast<BranchInst>(I)->getCondition();
+    return cast<BinaryOperator>(Cond)->getOperand(0);
+  }
   return cast<BranchInst>(I)->getCondition();
 }
 
@@ -133,12 +141,12 @@ class GuardWideningImpl {
   /// guards.
   DenseSet<Instruction *> WidenedGuards;
 
-  /// Try to eliminate guard \p Guard by widening it into an earlier dominating
-  /// guard.  \p DFSI is the DFS iterator on the dominator tree that is
-  /// currently visiting the block containing \p Guard, and \p GuardsPerBlock
+  /// Try to eliminate instruction \p Instr by widening it into an earlier
+  /// dominating guard.  \p DFSI is the DFS iterator on the dominator tree that
+  /// is currently visiting the block containing \p Guard, and \p GuardsPerBlock
   /// maps BasicBlocks to the set of guards seen in that block.
-  bool eliminateGuardViaWidening(
-      Instruction *Guard, const df_iterator<DomTreeNode *> &DFSI,
+  bool eliminateInstrViaWidening(
+      Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI,
       const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
           GuardsPerBlock, bool InvertCondition = false);
 
@@ -162,28 +170,25 @@ class GuardWideningImpl {
 
   static StringRef scoreTypeToString(WideningScore WS);
 
-  /// Compute the score for widening the condition in \p DominatedGuard
-  /// (contained in \p DominatedGuardLoop) into \p DominatingGuard (contained in
-  /// \p DominatingGuardLoop). If \p InvertCond is set, then we widen the
+  /// Compute the score for widening the condition in \p DominatedInstr
+  /// into \p DominatingGuard. If \p InvertCond is set, then we widen the
   /// inverted condition of the dominating guard.
-  WideningScore computeWideningScore(Instruction *DominatedGuard,
-                                     Loop *DominatedGuardLoop,
+  WideningScore computeWideningScore(Instruction *DominatedInstr,
                                      Instruction *DominatingGuard,
-                                     Loop *DominatingGuardLoop,
                                      bool InvertCond);
 
   /// Helper to check if \p V can be hoisted to \p InsertPos.
-  bool isAvailableAt(Value *V, Instruction *InsertPos) {
-    SmallPtrSet<Instruction *, 8> Visited;
+  bool isAvailableAt(const Value *V, const Instruction *InsertPos) const {
+    SmallPtrSet<const Instruction *, 8> Visited;
     return isAvailableAt(V, InsertPos, Visited);
   }
 
-  bool isAvailableAt(Value *V, Instruction *InsertPos,
-                     SmallPtrSetImpl<Instruction *> &Visited);
+  bool isAvailableAt(const Value *V, const Instruction *InsertPos,
+                     SmallPtrSetImpl<const Instruction *> &Visited) const;
 
   /// Helper to hoist \p V to \p InsertPos.  Guaranteed to succeed if \c
   /// isAvailableAt returned true.
-  void makeAvailableAt(Value *V, Instruction *InsertPos);
+  void makeAvailableAt(Value *V, Instruction *InsertPos) const;
 
   /// Common helper used by \c widenGuard and \c isWideningCondProfitable.  Try
   /// to generate an expression computing the logical AND of \p Cond0 and (\p
@@ -200,23 +205,23 @@ class GuardWideningImpl {
   /// pre-existing instruction in the IR that computes the result of this range
   /// check.
   class RangeCheck {
-    Value *Base;
-    ConstantInt *Offset;
-    Value *Length;
+    const Value *Base;
+    const ConstantInt *Offset;
+    const Value *Length;
     ICmpInst *CheckInst;
 
   public:
-    explicit RangeCheck(Value *Base, ConstantInt *Offset, Value *Length,
-                        ICmpInst *CheckInst)
+    explicit RangeCheck(const Value *Base, const ConstantInt *Offset,
+                        const Value *Length, ICmpInst *CheckInst)
         : Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {}
 
-    void setBase(Value *NewBase) { Base = NewBase; }
-    void setOffset(ConstantInt *NewOffset) { Offset = NewOffset; }
+    void setBase(const Value *NewBase) { Base = NewBase; }
+    void setOffset(const ConstantInt *NewOffset) { Offset = NewOffset; }
 
-    Value *getBase() const { return Base; }
-    ConstantInt *getOffset() const { return Offset; }
+    const Value *getBase() const { return Base; }
+    const ConstantInt *getOffset() const { return Offset; }
     const APInt &getOffsetValue() const { return getOffset()->getValue(); }
-    Value *getLength() const { return Length; };
+    const Value *getLength() const { return Length; };
     ICmpInst *getCheckInst() const { return CheckInst; }
 
     void print(raw_ostream &OS, bool PrintTypes = false) {
@@ -238,19 +243,19 @@ class GuardWideningImpl {
   /// append them to \p Checks.  Returns true on success, may clobber \c Checks
   /// on failure.
   bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks) {
-    SmallPtrSet<Value *, 8> Visited;
+    SmallPtrSet<const Value *, 8> Visited;
     return parseRangeChecks(CheckCond, Checks, Visited);
   }
 
   bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks,
-                        SmallPtrSetImpl<Value *> &Visited);
+                        SmallPtrSetImpl<const Value *> &Visited);
 
   /// Combine the checks in \p Checks into a smaller set of checks and append
   /// them into \p CombinedChecks.  Return true on success (i.e. all of checks
   /// in \p Checks were combined into \p CombinedChecks).  Clobbers \p Checks
   /// and \p CombinedChecks on success and on failure.
   bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks,
-                          SmallVectorImpl<RangeCheck> &CombinedChecks);
+                          SmallVectorImpl<RangeCheck> &CombinedChecks) const;
 
   /// Can we compute the logical AND of \p Cond0 and \p Cond1 for the price of
   /// computing only one of the two expressions?
@@ -266,8 +271,16 @@ class GuardWideningImpl {
   void widenGuard(Instruction *ToWiden, Value *NewCondition,
                   bool InvertCondition) {
     Value *Result;
-    widenCondCommon(ToWiden->getOperand(0), NewCondition, ToWiden, Result,
+    widenCondCommon(getCondition(ToWiden), NewCondition, ToWiden, Result,
                     InvertCondition);
+    Value *WidenableCondition = nullptr;
+    if (isGuardAsWidenableBranch(ToWiden)) {
+      auto *Cond = cast<BranchInst>(ToWiden)->getCondition();
+      WidenableCondition = cast<BinaryOperator>(Cond)->getOperand(1);
+    }
+    if (WidenableCondition)
+      Result = BinaryOperator::CreateAnd(Result, WidenableCondition,
+                                         "guard.chk", ToWiden);
     setCondition(ToWiden, Result);
   }
 
@@ -285,6 +298,14 @@ public:
 };
 }
 
+static bool isSupportedGuardInstruction(const Instruction *Insn) {
+  if (isGuard(Insn))
+    return true;
+  if (WidenBranchGuards && isGuardAsWidenableBranch(Insn))
+    return true;
+  return false;
+}
+
 bool GuardWideningImpl::run() {
   DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock;
   bool Changed = false;
@@ -304,20 +325,20 @@ bool GuardWideningImpl::run() {
     auto &CurrentList = GuardsInBlock[BB];
 
     for (auto &I : *BB)
-      if (isGuard(&I))
+      if (isSupportedGuardInstruction(&I))
         CurrentList.push_back(cast<Instruction>(&I));
 
     for (auto *II : CurrentList)
-      Changed |= eliminateGuardViaWidening(II, DFI, GuardsInBlock);
+      Changed |= eliminateInstrViaWidening(II, DFI, GuardsInBlock);
     if (WidenFrequentBranches && BPI)
       if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator()))
         if (BI->isConditional()) {
           // If one of branches of a conditional is likely taken, try to
           // eliminate it.
           if (BPI->getEdgeProbability(BB, 0U) >= *LikelyTaken)
-            Changed |= eliminateGuardViaWidening(BI, DFI, GuardsInBlock);
+            Changed |= eliminateInstrViaWidening(BI, DFI, GuardsInBlock);
           else if (BPI->getEdgeProbability(BB, 1U) >= *LikelyTaken)
-            Changed |= eliminateGuardViaWidening(BI, DFI, GuardsInBlock,
+            Changed |= eliminateInstrViaWidening(BI, DFI, GuardsInBlock,
                                                  /*InvertCondition*/true);
         }
   }
@@ -326,7 +347,7 @@ bool GuardWideningImpl::run() {
   for (auto *I : EliminatedGuardsAndBranches)
     if (!WidenedGuards.count(I)) {
       assert(isa<ConstantInt>(getCondition(I)) && "Should be!");
-      if (isGuard(I))
+      if (isSupportedGuardInstruction(I))
         eliminateGuard(I);
       else {
         assert(isa<BranchInst>(I) &&
@@ -338,19 +359,18 @@ bool GuardWideningImpl::run() {
   return Changed;
 }
 
-bool GuardWideningImpl::eliminateGuardViaWidening(
-    Instruction *GuardInst, const df_iterator<DomTreeNode *> &DFSI,
+bool GuardWideningImpl::eliminateInstrViaWidening(
+    Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI,
     const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
         GuardsInBlock, bool InvertCondition) {
   // Ignore trivial true or false conditions. These instructions will be
   // trivially eliminated by any cleanup pass. Do not erase them because other
   // guards can possibly be widened into them.
-  if (isa<ConstantInt>(getCondition(GuardInst)))
+  if (isa<ConstantInt>(getCondition(Instr)))
     return false;
 
   Instruction *BestSoFar = nullptr;
   auto BestScoreSoFar = WS_IllegalOrNegative;
-  auto *GuardInstLoop = LI.getLoopFor(GuardInst->getParent());
 
   // In the set of dominating guards, find the one we can merge GuardInst with
   // for the most profit.
@@ -358,12 +378,13 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
     auto *CurBB = DFSI.getPath(i)->getBlock();
     if (!BlockFilter(CurBB))
       break;
-    auto *CurLoop = LI.getLoopFor(CurBB);
     assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!");
     const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second;
 
     auto I = GuardsInCurBB.begin();
-    auto E = GuardsInCurBB.end();
+    auto E = Instr->getParent() == CurBB
+                 ? std::find(GuardsInCurBB.begin(), GuardsInCurBB.end(), Instr)
+                 : GuardsInCurBB.end();
 
 #ifndef NDEBUG
     {
@@ -379,21 +400,11 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
     }
 #endif
 
-    assert((i == (e - 1)) == (GuardInst->getParent() == CurBB) && "Bad DFS?");
-
-    if (i == (e - 1) && CurBB->getTerminator() != GuardInst) {
-      // Corner case: make sure we're only looking at guards strictly dominating
-      // GuardInst when visiting GuardInst->getParent().
-      auto NewEnd = std::find(I, E, GuardInst);
-      assert(NewEnd != E && "GuardInst not in its own block?");
-      E = NewEnd;
-    }
+    assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?");
 
     for (auto *Candidate : make_range(I, E)) {
-      auto Score =
-          computeWideningScore(GuardInst, GuardInstLoop, Candidate, CurLoop,
-                               InvertCondition);
-      LLVM_DEBUG(dbgs() << "Score between " << *getCondition(GuardInst)
+      auto Score = computeWideningScore(Instr, Candidate, InvertCondition);
+      LLVM_DEBUG(dbgs() << "Score between " << *getCondition(Instr)
                         << " and " << *getCondition(Candidate) << " is "
                         << scoreTypeToString(Score) << "\n");
       if (Score > BestScoreSoFar) {
@@ -404,42 +415,45 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
   }
 
   if (BestScoreSoFar == WS_IllegalOrNegative) {
-    LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *GuardInst << "\n");
+    LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *Instr << "\n");
     return false;
   }
 
-  assert(BestSoFar != GuardInst && "Should have never visited same guard!");
-  assert(DT.dominates(BestSoFar, GuardInst) && "Should be!");
+  assert(BestSoFar != Instr && "Should have never visited same guard!");
+  assert(DT.dominates(BestSoFar, Instr) && "Should be!");
 
-  LLVM_DEBUG(dbgs() << "Widening " << *GuardInst << " into " << *BestSoFar
+  LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar
                     << " with score " << scoreTypeToString(BestScoreSoFar)
                     << "\n");
-  widenGuard(BestSoFar, getCondition(GuardInst), InvertCondition);
+  widenGuard(BestSoFar, getCondition(Instr), InvertCondition);
   auto NewGuardCondition = InvertCondition
-                               ? ConstantInt::getFalse(GuardInst->getContext())
-                               : ConstantInt::getTrue(GuardInst->getContext());
-  setCondition(GuardInst, NewGuardCondition);
-  EliminatedGuardsAndBranches.push_back(GuardInst);
+                               ? ConstantInt::getFalse(Instr->getContext())
+                               : ConstantInt::getTrue(Instr->getContext());
+  setCondition(Instr, NewGuardCondition);
+  EliminatedGuardsAndBranches.push_back(Instr);
   WidenedGuards.insert(BestSoFar);
   return true;
 }
 
-GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
-    Instruction *DominatedGuard, Loop *DominatedGuardLoop,
-    Instruction *DominatingGuard, Loop *DominatingGuardLoop, bool InvertCond) {
+GuardWideningImpl::WideningScore
+GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr,
+                                        Instruction *DominatingGuard,
+                                        bool InvertCond) {
+  Loop *DominatedInstrLoop = LI.getLoopFor(DominatedInstr->getParent());
+  Loop *DominatingGuardLoop = LI.getLoopFor(DominatingGuard->getParent());
   bool HoistingOutOfLoop = false;
 
-  if (DominatingGuardLoop != DominatedGuardLoop) {
+  if (DominatingGuardLoop != DominatedInstrLoop) {
     // Be conservative and don't widen into a sibling loop.  TODO: If the
     // sibling is colder, we should consider allowing this.
     if (DominatingGuardLoop &&
-        !DominatingGuardLoop->contains(DominatedGuardLoop))
+        !DominatingGuardLoop->contains(DominatedInstrLoop))
       return WS_IllegalOrNegative;
 
     HoistingOutOfLoop = true;
   }
 
-  if (!isAvailableAt(getCondition(DominatedGuard), DominatingGuard))
+  if (!isAvailableAt(getCondition(DominatedInstr), DominatingGuard))
     return WS_IllegalOrNegative;
 
   // If the guard was conditional executed, it may never be reached
@@ -450,7 +464,7 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
   // here.  TODO: evaluate cost model for spurious deopt
   // NOTE: As written, this also lets us hoist right over another guard which
   // is essentially just another spelling for control flow.
-  if (isWideningCondProfitable(getCondition(DominatedGuard),
+  if (isWideningCondProfitable(getCondition(DominatedInstr),
                                getCondition(DominatingGuard), InvertCond))
     return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive;
 
@@ -462,7 +476,9 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
   // throw, etc...).  That choice appears arbitrary.
   auto MaybeHoistingOutOfIf = [&]() {
     auto *DominatingBlock = DominatingGuard->getParent();
-    auto *DominatedBlock = DominatedGuard->getParent();
+    auto *DominatedBlock = DominatedInstr->getParent();
+    if (isGuardAsWidenableBranch(DominatingGuard))
+      DominatingBlock = cast<BranchInst>(DominatingGuard)->getSuccessor(0);
 
     // Same Block?
     if (DominatedBlock == DominatingBlock)
@@ -478,8 +494,9 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
   return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative : WS_Neutral;
 }
 
-bool GuardWideningImpl::isAvailableAt(Value *V, Instruction *Loc,
-                                      SmallPtrSetImpl<Instruction *> &Visited) {
+bool GuardWideningImpl::isAvailableAt(
+    const Value *V, const Instruction *Loc,
+    SmallPtrSetImpl<const Instruction *> &Visited) const {
   auto *Inst = dyn_cast<Instruction>(V);
   if (!Inst || DT.dominates(Inst, Loc) || Visited.count(Inst))
     return true;
@@ -499,7 +516,7 @@ bool GuardWideningImpl::isAvailableAt(Value *V, Instruction *Loc,
                 [&](Value *Op) { return isAvailableAt(Op, Loc, Visited); });
 }
 
-void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) {
+void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const {
   auto *Inst = dyn_cast<Instruction>(V);
   if (!Inst || DT.dominates(Inst, Loc))
     return;
@@ -597,7 +614,7 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
 
 bool GuardWideningImpl::parseRangeChecks(
     Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks,
-    SmallPtrSetImpl<Value *> &Visited) {
+    SmallPtrSetImpl<const Value *> &Visited) {
   if (!Visited.insert(CheckCond).second)
     return true;
 
@@ -616,7 +633,7 @@ bool GuardWideningImpl::parseRangeChecks(
        IC->getPredicate() != ICmpInst::ICMP_UGT))
     return false;
 
-  Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1);
+  const Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1);
   if (IC->getPredicate() == ICmpInst::ICMP_UGT)
     std::swap(CmpLHS, CmpRHS);
 
@@ -669,13 +686,13 @@ bool GuardWideningImpl::parseRangeChecks(
 
 bool GuardWideningImpl::combineRangeChecks(
     SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks,
-    SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) {
+    SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) const {
   unsigned OldCount = Checks.size();
   while (!Checks.empty()) {
     // Pick all of the range checks with a specific base and length, and try to
     // merge them.
-    Value *CurrentBase = Checks.front().getBase();
-    Value *CurrentLength = Checks.front().getLength();
+    const Value *CurrentBase = Checks.front().getBase();
+    const Value *CurrentLength = Checks.front().getLength();
 
     SmallVector<GuardWideningImpl::RangeCheck, 3> CurrentChecks;
 
@@ -704,8 +721,8 @@ bool GuardWideningImpl::combineRangeChecks(
 
     // Note: std::sort should not invalidate the ChecksStart iterator.
 
-    ConstantInt *MinOffset = CurrentChecks.front().getOffset(),
-                *MaxOffset = CurrentChecks.back().getOffset();
+    const ConstantInt *MinOffset = CurrentChecks.front().getOffset();
+    const ConstantInt *MaxOffset = CurrentChecks.back().getOffset();
 
     unsigned BitWidth = MaxOffset->getValue().getBitWidth();
     if ((MaxOffset->getValue() - MinOffset->getValue())
@@ -800,6 +817,31 @@ PreservedAnalyses GuardWideningPass::run(Function &F,
   return PA;
 }
 
+PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM,
+                                         LoopStandardAnalysisResults &AR,
+                                         LPMUpdater &U) {
+
+  const auto &FAM =
+    AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
+  Function &F = *L.getHeader()->getParent();
+  BranchProbabilityInfo *BPI = nullptr;
+  if (WidenFrequentBranches)
+    BPI = FAM.getCachedResult<BranchProbabilityAnalysis>(F);
+
+  BasicBlock *RootBB = L.getLoopPredecessor();
+  if (!RootBB)
+    RootBB = L.getHeader();
+  auto BlockFilter = [&](BasicBlock *BB) {
+    return BB == RootBB || L.contains(BB);
+  };
+  if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, BPI,
+                         AR.DT.getNode(RootBB),
+                         BlockFilter).run())
+    return PreservedAnalyses::all();
+
+  return getLoopPassPreservedAnalyses();
+}
+
 namespace {
 struct GuardWideningLegacyPass : public FunctionPass {
   static char ID;
diff --git a/lib/Transforms/Scalar/IVUsersPrinter.cpp b/lib/Transforms/Scalar/IVUsersPrinter.cpp
index 807593379283..e2022aba97c4 100644
--- a/lib/Transforms/Scalar/IVUsersPrinter.cpp
+++ b/lib/Transforms/Scalar/IVUsersPrinter.cpp
@@ -1,9 +1,8 @@
 //===- IVUsersPrinter.cpp - Induction Variable Users Printer ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 48d8e457ba7c..f9fc698a4a9b 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1,9 +1,8 @@
 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -32,6 +31,7 @@
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -43,6 +43,7 @@
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
@@ -101,7 +102,7 @@ static cl::opt<bool> VerifyIndvars(
   "verify-indvars", cl::Hidden,
   cl::desc("Verify the ScalarEvolution result after running indvars"));
 
-enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, AlwaysRepl };
+enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
 
 static cl::opt<ReplaceExitVal> ReplaceExitValue(
     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
@@ -109,6 +110,8 @@ static cl::opt<ReplaceExitVal> ReplaceExitValue(
     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
                clEnumValN(OnlyCheapRepl, "cheap",
                           "only replace exit value when the cost is cheap"),
+               clEnumValN(NoHardUse, "noharduse",
+                          "only replace exit values when loop def likely dead"),
                clEnumValN(AlwaysRepl, "always",
                           "always replace exit value whenever possible")));
 
@@ -141,13 +144,15 @@ class IndVarSimplify {
   bool rewriteNonIntegerIVs(Loop *L);
 
   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
+  bool optimizeLoopExits(Loop *L);
 
   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
   bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
   bool rewriteFirstIterationLoopExitValues(Loop *L);
   bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
 
-  bool linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+  bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
+                                 const SCEV *ExitCount,
                                  PHINode *IndVar, SCEVExpander &Rewriter);
 
   bool sinkUnusedInvariants(Loop *L);
@@ -218,7 +223,9 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
 /// Determine the insertion point for this user. By default, insert immediately
 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
-/// common dominator for the incoming blocks.
+/// common dominator for the incoming blocks. A nullptr can be returned if no
+/// viable location is found: it may happen if User is a PHI and Def only comes
+/// to this PHI from unreachable blocks.
 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
                                           DominatorTree *DT, LoopInfo *LI) {
   PHINode *PHI = dyn_cast<PHINode>(User);
@@ -231,6 +238,10 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
       continue;
 
     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
+
+    if (!DT->isReachableFromEntry(InsertBB))
+      continue;
+
     if (!InsertPt) {
       InsertPt = InsertBB->getTerminator();
       continue;
@@ -238,7 +249,11 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
     InsertPt = InsertBB->getTerminator();
   }
-  assert(InsertPt && "Missing phi operand");
+
+  // If we have skipped all inputs, it means that Def only comes to Phi from
+  // unreachable blocks.
+  if (!InsertPt)
+    return nullptr;
 
   auto *DefI = dyn_cast<Instruction>(Def);
   if (!DefI)
@@ -621,8 +636,12 @@ bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
 
         // Computing the value outside of the loop brings no benefit if it is
         // definitely used inside the loop in a way which can not be optimized
-        // away.
-        if (!isa<SCEVConstant>(ExitValue) && hasHardUserWithinLoop(L, Inst))
+        // away.  Avoid doing so unless we know we have a value which computes
+        // the ExitValue already.  TODO: This should be merged into SCEV
+        // expander to leverage its knowledge of existing expressions.
+        if (ReplaceExitValue != AlwaysRepl &&
+            !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
+            hasHardUserWithinLoop(L, Inst))
           continue;
 
         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
@@ -707,8 +726,6 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
 
   SmallVector<BasicBlock *, 8> ExitBlocks;
   L->getUniqueExitBlocks(ExitBlocks);
-  auto *LoopHeader = L->getHeader();
-  assert(LoopHeader && "Invalid loop");
 
   bool MadeAnyChanges = false;
   for (auto *ExitBB : ExitBlocks) {
@@ -719,11 +736,13 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
            IncomingValIdx != E; ++IncomingValIdx) {
         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
 
-        // We currently only support loop exits from loop header. If the
-        // incoming block is not loop header, we need to recursively check
-        // all conditions starting from loop header are loop invariants.
-        // Additional support might be added in the future.
-        if (IncomingBB != LoopHeader)
+        // Can we prove that the exit must run on the first iteration if it
+        // runs at all?  (i.e. early exits are fine for our purposes, but
+        // traces which lead to this exit being taken on the 2nd iteration
+        // aren't.)  Note that this is about whether the exit branch is
+        // executed, not about whether it is taken.
+        if (!L->getLoopLatch() ||
+            !DT->dominates(IncomingBB, L->getLoopLatch()))
           continue;
 
         // Get condition that leads to the exit path.
@@ -744,8 +763,8 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
 
         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
 
-        // Only deal with PHIs.
-        if (!ExitVal)
+        // Only deal with PHIs in the loop header.
+        if (!ExitVal || ExitVal->getParent() != L->getHeader())
           continue;
 
         // If ExitVal is a PHI on the loop header, then we know its
@@ -755,7 +774,7 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
         assert(LoopPreheader && "Invalid loop");
         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
         if (PreheaderIdx != -1) {
-          assert(ExitVal->getParent() == LoopHeader &&
+          assert(ExitVal->getParent() == L->getHeader() &&
                  "ExitVal must be in loop header");
           MadeAnyChanges = true;
           PN.setIncomingValue(IncomingValIdx,
@@ -1022,24 +1041,13 @@ protected:
 
 } // end anonymous namespace
 
-/// Perform a quick domtree based check for loop invariance assuming that V is
-/// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this
-/// purpose.
-static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
-  Instruction *Inst = dyn_cast<Instruction>(V);
-  if (!Inst)
-    return true;
-
-  return DT->properlyDominates(Inst->getParent(), L->getHeader());
-}
-
 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
                                  bool IsSigned, Instruction *Use) {
   // Set the debug location and conservative insertion point.
   IRBuilder<> Builder(Use);
   // Hoist the insertion point into loop preheaders as far as possible.
   for (const Loop *L = LI->getLoopFor(Use->getParent());
-       L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
+       L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
        L = L->getParentLoop())
     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
 
@@ -1305,13 +1313,15 @@ WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
   return {AddRec, ExtKind};
 }
 
-/// This IV user cannot be widen. Replace this use of the original narrow IV
+/// This IV user cannot be widened. Replace this use of the original narrow IV
 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
+  auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
+  if (!InsertPt)
+    return;
   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
                     << *DU.NarrowUse << "\n");
-  IRBuilder<> Builder(
-      getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
+  IRBuilder<> Builder(InsertPt);
   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
 }
@@ -1348,8 +1358,10 @@ bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
 
   // Widen the compare instruction.
-  IRBuilder<> Builder(
-      getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
+  auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
+  if (!InsertPt)
+    return false;
+  IRBuilder<> Builder(InsertPt);
   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
 
   // Widen the other operand of the compare, if necessary.
@@ -1977,41 +1989,10 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L,
 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
 //===----------------------------------------------------------------------===//
 
-/// Return true if this loop's backedge taken count expression can be safely and
-/// cheaply expanded into an instruction sequence that can be used by
-/// linearFunctionTestReplace.
-///
-/// TODO: This fails for pointer-type loop counters with greater than one byte
-/// strides, consequently preventing LFTR from running. For the purpose of LFTR
-/// we could skip this check in the case that the LFTR loop counter (chosen by
-/// FindLoopCounter) is also pointer type. Instead, we could directly convert
-/// the loop test to an inequality test by checking the target data's alignment
-/// of element types (given that the initial pointer value originates from or is
-/// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
-/// However, we don't yet have a strong motivation for converting loop tests
-/// into inequality tests.
-static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE,
-                                        SCEVExpander &Rewriter) {
-  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
-  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
-      BackedgeTakenCount->isZero())
-    return false;
-
-  if (!L->getExitingBlock())
-    return false;
-
-  // Can't rewrite non-branch yet.
-  if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
-    return false;
-
-  if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
-    return false;
-
-  return true;
-}
-
-/// Return the loop header phi IFF IncV adds a loop invariant value to the phi.
-static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
+/// Given an Value which is hoped to be part of an add recurance in the given
+/// loop, return the associated Phi node if so.  Otherwise, return null.  Note
+/// that this is less general than SCEVs AddRec checking.  
+static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
   Instruction *IncI = dyn_cast<Instruction>(IncV);
   if (!IncI)
     return nullptr;
@@ -2031,7 +2012,7 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
 
   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
   if (Phi && Phi->getParent() == L->getHeader()) {
-    if (isLoopInvariant(IncI->getOperand(1), L, DT))
+    if (L->isLoopInvariant(IncI->getOperand(1)))
       return Phi;
     return nullptr;
   }
@@ -2041,32 +2022,40 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
   // Allow add/sub to be commuted.
   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
   if (Phi && Phi->getParent() == L->getHeader()) {
-    if (isLoopInvariant(IncI->getOperand(0), L, DT))
+    if (L->isLoopInvariant(IncI->getOperand(0)))
       return Phi;
   }
   return nullptr;
 }
 
-/// Return the compare guarding the loop latch, or NULL for unrecognized tests.
-static ICmpInst *getLoopTest(Loop *L) {
-  assert(L->getExitingBlock() && "expected loop exit");
-
-  BasicBlock *LatchBlock = L->getLoopLatch();
-  // Don't bother with LFTR if the loop is not properly simplified.
-  if (!LatchBlock)
-    return nullptr;
-
-  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
-  assert(BI && "expected exit branch");
+/// Whether the current loop exit test is based on this value.  Currently this
+/// is limited to a direct use in the loop condition.
+static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
+  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
+  ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
+  // TODO: Allow non-icmp loop test.
+  if (!ICmp)
+    return false;
 
-  return dyn_cast<ICmpInst>(BI->getCondition());
+  // TODO: Allow indirect use.
+  return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
 }
 
 /// linearFunctionTestReplace policy. Return true unless we can show that the
 /// current exit test is already sufficiently canonical.
-static bool needsLFTR(Loop *L, DominatorTree *DT) {
+static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
+  assert(L->getLoopLatch() && "Must be in simplified form");
+
+  // Avoid converting a constant or loop invariant test back to a runtime
+  // test.  This is critical for when SCEV's cached ExitCount is less precise
+  // than the current IR (such as after we've proven a particular exit is
+  // actually dead and thus the BE count never reaches our ExitCount.)
+  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
+  if (L->isLoopInvariant(BI->getCondition()))
+    return false;
+  
   // Do LFTR to simplify the exit condition to an ICMP.
-  ICmpInst *Cond = getLoopTest(L);
+  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
   if (!Cond)
     return true;
 
@@ -2078,15 +2067,15 @@ static bool needsLFTR(Loop *L, DominatorTree *DT) {
   // Look for a loop invariant RHS
   Value *LHS = Cond->getOperand(0);
   Value *RHS = Cond->getOperand(1);
-  if (!isLoopInvariant(RHS, L, DT)) {
-    if (!isLoopInvariant(LHS, L, DT))
+  if (!L->isLoopInvariant(RHS)) {
+    if (!L->isLoopInvariant(LHS))
       return true;
     std::swap(LHS, RHS);
   }
   // Look for a simple IV counter LHS
   PHINode *Phi = dyn_cast<PHINode>(LHS);
   if (!Phi)
-    Phi = getLoopPhiForCounter(LHS, L, DT);
+    Phi = getLoopPhiForCounter(LHS, L);
 
   if (!Phi)
     return true;
@@ -2098,7 +2087,49 @@ static bool needsLFTR(Loop *L, DominatorTree *DT) {
 
   // Do LFTR if the exit condition's IV is *not* a simple counter.
   Value *IncV = Phi->getIncomingValue(Idx);
-  return Phi != getLoopPhiForCounter(IncV, L, DT);
+  return Phi != getLoopPhiForCounter(IncV, L);
+}
+
+/// Return true if undefined behavior would provable be executed on the path to
+/// OnPathTo if Root produced a posion result.  Note that this doesn't say
+/// anything about whether OnPathTo is actually executed or whether Root is
+/// actually poison.  This can be used to assess whether a new use of Root can
+/// be added at a location which is control equivalent with OnPathTo (such as
+/// immediately before it) without introducing UB which didn't previously
+/// exist.  Note that a false result conveys no information.  
+static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
+                                          Instruction *OnPathTo, 
+                                          DominatorTree *DT) {
+  // Basic approach is to assume Root is poison, propagate poison forward
+  // through all users we can easily track, and then check whether any of those
+  // users are provable UB and must execute before out exiting block might
+  // exit.
+
+  // The set of all recursive users we've visited (which are assumed to all be
+  // poison because of said visit)
+  SmallSet<const Value *, 16> KnownPoison;
+  SmallVector<const Instruction*, 16> Worklist;
+  Worklist.push_back(Root);
+  while (!Worklist.empty()) {
+    const Instruction *I = Worklist.pop_back_val();
+
+    // If we know this must trigger UB on a path leading our target.
+    if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
+      return true;
+    
+    // If we can't analyze propagation through this instruction, just skip it
+    // and transitive users.  Safe as false is a conservative result.
+    if (!propagatesFullPoison(I) && I != Root)
+      continue;
+
+    if (KnownPoison.insert(I).second)
+      for (const User *User : I->users())
+        Worklist.push_back(cast<Instruction>(User));
+  }
+
+  // Might be non-UB, or might have a path we couldn't prove must execute on
+  // way to exiting bb. 
+  return false;
 }
 
 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
@@ -2157,46 +2188,62 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
   return true;
 }
 
-/// Find an affine IV in canonical form.
+/// Return true if the given phi is a "counter" in L.  A counter is an
+/// add recurance (of integer or pointer type) with an arbitrary start, and a
+/// step of 1.  Note that L must have exactly one latch.
+static bool isLoopCounter(PHINode* Phi, Loop *L,
+                          ScalarEvolution *SE) {
+  assert(Phi->getParent() == L->getHeader());
+  assert(L->getLoopLatch());
+  
+  if (!SE->isSCEVable(Phi->getType()))
+    return false;
+
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
+  if (!AR || AR->getLoop() != L || !AR->isAffine())
+    return false;
+
+  const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
+  if (!Step || !Step->isOne())
+    return false;
+
+  int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
+  Value *IncV = Phi->getIncomingValue(LatchIdx);
+  return (getLoopPhiForCounter(IncV, L) == Phi);
+}
+
+/// Search the loop header for a loop counter (anadd rec w/step of one)
+/// suitable for use by LFTR.  If multiple counters are available, select the
+/// "best" one based profitable heuristics.
 ///
 /// BECount may be an i8* pointer type. The pointer difference is already
 /// valid count without scaling the address stride, so it remains a pointer
 /// expression as far as SCEV is concerned.
-///
-/// Currently only valid for LFTR. See the comments on hasConcreteDef below.
-///
-/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
-///
-/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
-/// This is difficult in general for SCEV because of potential overflow. But we
-/// could at least handle constant BECounts.
-static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
+static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
+                                const SCEV *BECount,
                                 ScalarEvolution *SE, DominatorTree *DT) {
   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
 
-  Value *Cond =
-    cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
+  Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
 
   // Loop over all of the PHI nodes, looking for a simple counter.
   PHINode *BestPhi = nullptr;
   const SCEV *BestInit = nullptr;
   BasicBlock *LatchBlock = L->getLoopLatch();
-  assert(LatchBlock && "needsLFTR should guarantee a loop latch");
+  assert(LatchBlock && "Must be in simplified form");
   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
 
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
     PHINode *Phi = cast<PHINode>(I);
-    if (!SE->isSCEVable(Phi->getType()))
+    if (!isLoopCounter(Phi, L, SE))
       continue;
 
     // Avoid comparing an integer IV against a pointer Limit.
     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
       continue;
 
-    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
-    if (!AR || AR->getLoop() != L || !AR->isAffine())
-      continue;
-
+    const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
+    
     // AR may be a pointer type, while BECount is an integer type.
     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
     // AR may not be a narrower type, or we may never exit.
@@ -2204,28 +2251,30 @@ static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
       continue;
 
-    const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
-    if (!Step || !Step->isOne())
-      continue;
-
-    int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
-    Value *IncV = Phi->getIncomingValue(LatchIdx);
-    if (getLoopPhiForCounter(IncV, L, DT) != Phi)
-      continue;
-
     // Avoid reusing a potentially undef value to compute other values that may
     // have originally had a concrete definition.
     if (!hasConcreteDef(Phi)) {
       // We explicitly allow unknown phis as long as they are already used by
-      // the loop test. In this case we assume that performing LFTR could not
-      // increase the number of undef users.
-      if (ICmpInst *Cond = getLoopTest(L)) {
-        if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT) &&
-            Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
-          continue;
-        }
-      }
+      // the loop exit test.  This is legal since performing LFTR could not
+      // increase the number of undef users. 
+      Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
+      if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
+          !isLoopExitTestBasedOn(IncPhi, ExitingBB))
+        continue;
     }
+
+    // Avoid introducing undefined behavior due to poison which didn't exist in
+    // the original program.  (Annoyingly, the rules for poison and undef
+    // propagation are distinct, so this does NOT cover the undef case above.)
+    // We have to ensure that we don't introduce UB by introducing a use on an
+    // iteration where said IV produces poison.  Our strategy here differs for
+    // pointers and integer IVs.  For integers, we strip and reinfer as needed,
+    // see code in linearFunctionTestReplace.  For pointers, we restrict
+    // transforms as there is no good way to reinfer inbounds once lost.
+    if (!Phi->getType()->isIntegerTy() &&
+        !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
+      continue;
+    
     const SCEV *Init = AR->getStart();
 
     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
@@ -2251,47 +2300,49 @@ static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
   return BestPhi;
 }
 
-/// Help linearFunctionTestReplace by generating a value that holds the RHS of
-/// the new loop test.
-static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
+/// Insert an IR expression which computes the value held by the IV IndVar
+/// (which must be an loop counter w/unit stride) after the backedge of loop L
+/// is taken ExitCount times.
+static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
+                           const SCEV *ExitCount, bool UsePostInc, Loop *L,
                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
-  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
-  assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
+  assert(isLoopCounter(IndVar, L, SE));
+  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
   const SCEV *IVInit = AR->getStart();
 
-  // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
-  // finds a valid pointer IV. Sign extend BECount in order to materialize a
+  // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
+  // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
   // the existing GEPs whenever possible.
-  if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) {
+  if (IndVar->getType()->isPointerTy() &&
+      !ExitCount->getType()->isPointerTy()) {
     // IVOffset will be the new GEP offset that is interpreted by GEP as a
-    // signed value. IVCount on the other hand represents the loop trip count,
+    // signed value. ExitCount on the other hand represents the loop trip count,
     // which is an unsigned value. FindLoopCounter only allows induction
     // variables that have a positive unit stride of one. This means we don't
     // have to handle the case of negative offsets (yet) and just need to zero
-    // extend IVCount.
+    // extend ExitCount.
     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
-    const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
+    const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
+    if (UsePostInc)
+      IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
 
     // Expand the code for the iteration count.
     assert(SE->isLoopInvariant(IVOffset, L) &&
            "Computed iteration count is not loop invariant!");
-    BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
-    Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
 
-    Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
-    assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
     // We could handle pointer IVs other than i8*, but we need to compensate for
-    // gep index scaling. See canExpandBackedgeTakenCount comments.
+    // gep index scaling.
     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
-                             cast<PointerType>(GEPBase->getType())
+                             cast<PointerType>(IndVar->getType())
                                  ->getElementType())->isOne() &&
            "unit stride pointer IV must be i8*");
 
-    IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
-    return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
+    const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
+    BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
+    return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
   } else {
-    // In any other case, convert both IVInit and IVCount to integers before
+    // In any other case, convert both IVInit and ExitCount to integers before
     // comparing. This may result in SCEV expansion of pointers, but in practice
     // SCEV will fold the pointer arithmetic away as such:
     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
@@ -2299,35 +2350,40 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
     // Valid Cases: (1) both integers is most common; (2) both may be pointers
     // for simple memset-style loops.
     //
-    // IVInit integer and IVCount pointer would only occur if a canonical IV
+    // IVInit integer and ExitCount pointer would only occur if a canonical IV
     // were generated on top of case #2, which is not expected.
 
-    const SCEV *IVLimit = nullptr;
-    // For unit stride, IVCount = Start + BECount with 2's complement overflow.
-    // For non-zero Start, compute IVCount here.
-    if (AR->getStart()->isZero())
-      IVLimit = IVCount;
-    else {
-      assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
-      const SCEV *IVInit = AR->getStart();
+    assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
+    // For unit stride, IVCount = Start + ExitCount with 2's complement
+    // overflow.
+
+    // For integer IVs, truncate the IV before computing IVInit + BECount,
+    // unless we know apriori that the limit must be a constant when evaluated
+    // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
+    // of the IV in the loop over a (potentially) expensive expansion of the
+    // widened exit count add(zext(add)) expression.
+    if (SE->getTypeSizeInBits(IVInit->getType())
+        > SE->getTypeSizeInBits(ExitCount->getType())) {
+      if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
+        ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
+      else
+        IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
+    }
 
-      // For integer IVs, truncate the IV before computing IVInit + BECount.
-      if (SE->getTypeSizeInBits(IVInit->getType())
-          > SE->getTypeSizeInBits(IVCount->getType()))
-        IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
+    const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
+
+    if (UsePostInc)
+      IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
 
-      IVLimit = SE->getAddExpr(IVInit, IVCount);
-    }
     // Expand the code for the iteration count.
-    BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
-    IRBuilder<> Builder(BI);
     assert(SE->isLoopInvariant(IVLimit, L) &&
            "Computed iteration count is not loop invariant!");
     // Ensure that we generate the same type as IndVar, or a smaller integer
     // type. In the presence of null pointer values, we have an integer type
     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
-    Type *LimitTy = IVCount->getType()->isPointerTy() ?
-      IndVar->getType() : IVCount->getType();
+    Type *LimitTy = ExitCount->getType()->isPointerTy() ?
+      IndVar->getType() : ExitCount->getType();
+    BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
   }
 }
@@ -2338,51 +2394,70 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
 /// determine a loop-invariant trip count of the loop, which is actually a much
 /// broader range than just linear tests.
 bool IndVarSimplify::
-linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
+                          const SCEV *ExitCount,
                           PHINode *IndVar, SCEVExpander &Rewriter) {
-  assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
+  assert(L->getLoopLatch() && "Loop no longer in simplified form?");
+  assert(isLoopCounter(IndVar, L, SE));
+  Instruction * const IncVar =
+    cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
 
-  // Initialize CmpIndVar and IVCount to their preincremented values.
+  // Initialize CmpIndVar to the preincremented IV.
   Value *CmpIndVar = IndVar;
-  const SCEV *IVCount = BackedgeTakenCount;
-
-  assert(L->getLoopLatch() && "Loop no longer in simplified form?");
+  bool UsePostInc = false;
 
   // If the exiting block is the same as the backedge block, we prefer to
   // compare against the post-incremented value, otherwise we must compare
   // against the preincremented value.
-  if (L->getExitingBlock() == L->getLoopLatch()) {
-    // Add one to the "backedge-taken" count to get the trip count.
-    // This addition may overflow, which is valid as long as the comparison is
-    // truncated to BackedgeTakenCount->getType().
-    IVCount = SE->getAddExpr(BackedgeTakenCount,
-                             SE->getOne(BackedgeTakenCount->getType()));
-    // The BackedgeTaken expression contains the number of times that the
-    // backedge branches to the loop header.  This is one less than the
-    // number of times the loop executes, so use the incremented indvar.
-    CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+  if (ExitingBB == L->getLoopLatch()) {
+    // For pointer IVs, we chose to not strip inbounds which requires us not
+    // to add a potentially UB introducing use.  We need to either a) show
+    // the loop test we're modifying is already in post-inc form, or b) show
+    // that adding a use must not introduce UB.
+    bool SafeToPostInc =
+        IndVar->getType()->isIntegerTy() ||
+        isLoopExitTestBasedOn(IncVar, ExitingBB) ||
+        mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
+    if (SafeToPostInc) {
+      UsePostInc = true;
+      CmpIndVar = IncVar;
+    }
   }
 
-  Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
+  // It may be necessary to drop nowrap flags on the incrementing instruction
+  // if either LFTR moves from a pre-inc check to a post-inc check (in which
+  // case the increment might have previously been poison on the last iteration
+  // only) or if LFTR switches to a different IV that was previously dynamically
+  // dead (and as such may be arbitrarily poison). We remove any nowrap flags
+  // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
+  // check), because the pre-inc addrec flags may be adopted from the original
+  // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
+  // TODO: This handling is inaccurate for one case: If we switch to a
+  // dynamically dead IV that wraps on the first loop iteration only, which is
+  // not covered by the post-inc addrec. (If the new IV was not dynamically
+  // dead, it could not be poison on the first iteration in the first place.)
+  if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
+    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
+    if (BO->hasNoUnsignedWrap())
+      BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
+    if (BO->hasNoSignedWrap())
+      BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
+  }
+
+  Value *ExitCnt = genLoopLimit(
+      IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
   assert(ExitCnt->getType()->isPointerTy() ==
              IndVar->getType()->isPointerTy() &&
          "genLoopLimit missed a cast");
 
   // Insert a new icmp_ne or icmp_eq instruction before the branch.
-  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
   ICmpInst::Predicate P;
   if (L->contains(BI->getSuccessor(0)))
     P = ICmpInst::ICMP_NE;
   else
     P = ICmpInst::ICMP_EQ;
 
-  LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
-                    << "      LHS:" << *CmpIndVar << '\n'
-                    << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
-                    << "\n"
-                    << "      RHS:\t" << *ExitCnt << "\n"
-                    << "  IVCount:\t" << *IVCount << "\n");
-
   IRBuilder<> Builder(BI);
 
   // The new loop exit condition should reuse the debug location of the
@@ -2390,67 +2465,58 @@ linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
 
-  // LFTR can ignore IV overflow and truncate to the width of
-  // BECount. This avoids materializing the add(zext(add)) expression.
+  // For integer IVs, if we evaluated the limit in the narrower bitwidth to
+  // avoid the expensive expansion of the limit expression in the wider type,
+  // emit a truncate to narrow the IV to the ExitCount type.  This is safe
+  // since we know (from the exit count bitwidth), that we can't self-wrap in
+  // the narrower type.
   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
   if (CmpIndVarSize > ExitCntSize) {
-    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
-    const SCEV *ARStart = AR->getStart();
-    const SCEV *ARStep = AR->getStepRecurrence(*SE);
-    // For constant IVCount, avoid truncation.
-    if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
-      const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
-      APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
-      // Note that the post-inc value of BackedgeTakenCount may have overflowed
-      // above such that IVCount is now zero.
-      if (IVCount != BackedgeTakenCount && Count == 0) {
-        Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
-        ++Count;
-      }
-      else
-        Count = Count.zext(CmpIndVarSize);
-      APInt NewLimit;
-      if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
-        NewLimit = Start - Count;
-      else
-        NewLimit = Start + Count;
-      ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
-
-      LLVM_DEBUG(dbgs() << "  Widen RHS:\t" << *ExitCnt << "\n");
+    assert(!CmpIndVar->getType()->isPointerTy() &&
+           !ExitCnt->getType()->isPointerTy());
+
+    // Before resorting to actually inserting the truncate, use the same
+    // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
+    // the other side of the comparison instead.  We still evaluate the limit
+    // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
+    // a truncate within in.  
+    bool Extended = false;
+    const SCEV *IV = SE->getSCEV(CmpIndVar);
+    const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
+                                                  ExitCnt->getType());
+    const SCEV *ZExtTrunc =
+      SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
+    
+    if (ZExtTrunc == IV) {
+      Extended = true;
+      ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
+                                   "wide.trip.count");
     } else {
-      // We try to extend trip count first. If that doesn't work we truncate IV.
-      // Zext(trunc(IV)) == IV implies equivalence of the following two:
-      // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If
-      // one of the two holds, extend the trip count, otherwise we truncate IV.
-      bool Extended = false;
-      const SCEV *IV = SE->getSCEV(CmpIndVar);
-      const SCEV *ZExtTrunc =
-           SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
-                                                     ExitCnt->getType()),
-                                 CmpIndVar->getType());
-
-      if (ZExtTrunc == IV) {
+      const SCEV *SExtTrunc =
+        SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
+      if (SExtTrunc == IV) {
         Extended = true;
-        ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
+        ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
                                      "wide.trip.count");
-      } else {
-        const SCEV *SExtTrunc =
-          SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
-                                                    ExitCnt->getType()),
-                                CmpIndVar->getType());
-        if (SExtTrunc == IV) {
-          Extended = true;
-          ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
-                                       "wide.trip.count");
-        }
       }
-
-      if (!Extended)
-        CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
-                                        "lftr.wideiv");
     }
+
+    if (Extended) {
+      bool Discard;
+      L->makeLoopInvariant(ExitCnt, Discard);
+    } else 
+      CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
+                                      "lftr.wideiv");
   }
+  LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
+                    << "      LHS:" << *CmpIndVar << '\n'
+                    << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
+                    << "\n"
+                    << "      RHS:\t" << *ExitCnt << "\n"
+                    << "ExitCount:\t" << *ExitCount << "\n"
+                    << "  was: " << *BI->getCondition() << "\n");
+
   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
   Value *OrigCond = BI->getCondition();
   // It's tempting to use replaceAllUsesWith here to fully replace the old
@@ -2558,6 +2624,111 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
   return MadeAnyChanges;
 }
 
+bool IndVarSimplify::optimizeLoopExits(Loop *L) {
+  SmallVector<BasicBlock*, 16> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+
+  // Form an expression for the maximum exit count possible for this loop. We
+  // merge the max and exact information to approximate a version of
+  // getMaxBackedgeTakenInfo which isn't restricted to just constants.
+  // TODO: factor this out as a version of getMaxBackedgeTakenCount which
+  // isn't guaranteed to return a constant.
+  SmallVector<const SCEV*, 4> ExitCounts;
+  const SCEV *MaxConstEC = SE->getMaxBackedgeTakenCount(L);
+  if (!isa<SCEVCouldNotCompute>(MaxConstEC))
+    ExitCounts.push_back(MaxConstEC);
+  for (BasicBlock *ExitingBB : ExitingBlocks) {
+    const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
+    if (!isa<SCEVCouldNotCompute>(ExitCount)) {
+      assert(DT->dominates(ExitingBB, L->getLoopLatch()) &&
+             "We should only have known counts for exiting blocks that "
+             "dominate latch!");
+      ExitCounts.push_back(ExitCount);
+    }
+  }
+  if (ExitCounts.empty())
+    return false;
+  const SCEV *MaxExitCount = SE->getUMinFromMismatchedTypes(ExitCounts);
+
+  bool Changed = false;
+  for (BasicBlock *ExitingBB : ExitingBlocks) {
+    // If our exitting block exits multiple loops, we can only rewrite the
+    // innermost one.  Otherwise, we're changing how many times the innermost
+    // loop runs before it exits. 
+    if (LI->getLoopFor(ExitingBB) != L)
+      continue;
+
+    // Can't rewrite non-branch yet.
+    BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+    if (!BI)
+      continue;
+
+    // If already constant, nothing to do.
+    if (isa<Constant>(BI->getCondition()))
+      continue;
+    
+    const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
+    if (isa<SCEVCouldNotCompute>(ExitCount))
+      continue;
+
+    // If we know we'd exit on the first iteration, rewrite the exit to
+    // reflect this.  This does not imply the loop must exit through this
+    // exit; there may be an earlier one taken on the first iteration.
+    // TODO: Given we know the backedge can't be taken, we should go ahead
+    // and break it.  Or at least, kill all the header phis and simplify.
+    if (ExitCount->isZero()) {
+      bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
+      auto *OldCond = BI->getCondition();
+      auto *NewCond = ExitIfTrue ? ConstantInt::getTrue(OldCond->getType()) :
+        ConstantInt::getFalse(OldCond->getType());
+      BI->setCondition(NewCond);
+      if (OldCond->use_empty())
+        DeadInsts.push_back(OldCond);
+      Changed = true;
+      continue;
+    }
+
+    // If we end up with a pointer exit count, bail.
+    if (!ExitCount->getType()->isIntegerTy() ||
+        !MaxExitCount->getType()->isIntegerTy())
+      return false;
+    
+    Type *WiderType =
+      SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
+    ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
+    MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
+    assert(MaxExitCount->getType() == ExitCount->getType());
+    
+    // Can we prove that some other exit must be taken strictly before this
+    // one?  TODO: handle cases where ule is known, and equality is covered
+    // by a dominating exit
+    if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
+                                     MaxExitCount, ExitCount)) {
+      bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
+      auto *OldCond = BI->getCondition();
+      auto *NewCond = ExitIfTrue ? ConstantInt::getFalse(OldCond->getType()) :
+        ConstantInt::getTrue(OldCond->getType());
+      BI->setCondition(NewCond);
+      if (OldCond->use_empty())
+        DeadInsts.push_back(OldCond);
+      Changed = true;
+      continue;
+    }
+
+    // TODO: If we can prove that the exiting iteration is equal to the exit
+    // count for this exit and that no previous exit oppurtunities exist within
+    // the loop, then we can discharge all other exits.  (May fall out of
+    // previous TODO.) 
+    
+    // TODO: If we can't prove any relation between our exit count and the
+    // loops exit count, but taking this exit doesn't require actually running
+    // the loop (i.e. no side effects, no computed values used in exit), then
+    // we can replace the exit test with a loop invariant test which exits on
+    // the first iteration.  
+  }
+  return Changed;
+}
+
 //===----------------------------------------------------------------------===//
 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
 //===----------------------------------------------------------------------===//
@@ -2614,23 +2785,60 @@ bool IndVarSimplify::run(Loop *L) {
   // Eliminate redundant IV cycles.
   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
 
+  Changed |= optimizeLoopExits(L);
+
   // If we have a trip count expression, rewrite the loop's exit condition
-  // using it.  We can currently only handle loops with a single exit.
-  if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) &&
-      needsLFTR(L, DT)) {
-    PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
-    if (IndVar) {
+  // using it.  
+  if (!DisableLFTR) {
+    SmallVector<BasicBlock*, 16> ExitingBlocks;
+    L->getExitingBlocks(ExitingBlocks);
+    for (BasicBlock *ExitingBB : ExitingBlocks) {
+      // Can't rewrite non-branch yet.
+      if (!isa<BranchInst>(ExitingBB->getTerminator()))
+        continue;
+
+      // If our exitting block exits multiple loops, we can only rewrite the
+      // innermost one.  Otherwise, we're changing how many times the innermost
+      // loop runs before it exits. 
+      if (LI->getLoopFor(ExitingBB) != L)
+        continue;
+      
+      if (!needsLFTR(L, ExitingBB))
+        continue;
+
+      const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
+      if (isa<SCEVCouldNotCompute>(ExitCount))
+        continue;
+
+      // This was handled above, but as we form SCEVs, we can sometimes refine
+      // existing ones; this allows exit counts to be folded to zero which
+      // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
+      // until stable to handle cases like this better.
+      if (ExitCount->isZero())
+        continue;
+      
+      PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
+      if (!IndVar)
+        continue;
+      
+      // Avoid high cost expansions.  Note: This heuristic is questionable in
+      // that our definition of "high cost" is not exactly principled.  
+      if (Rewriter.isHighCostExpansion(ExitCount, L))
+        continue;
+      
       // Check preconditions for proper SCEVExpander operation. SCEV does not
-      // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
-      // pass that uses the SCEVExpander must do it. This does not work well for
-      // loop passes because SCEVExpander makes assumptions about all loops,
-      // while LoopPassManager only forces the current loop to be simplified.
+      // express SCEVExpander's dependencies, such as LoopSimplify. Instead
+      // any pass that uses the SCEVExpander must do it. This does not work
+      // well for loop passes because SCEVExpander makes assumptions about
+      // all loops, while LoopPassManager only forces the current loop to be
+      // simplified. 
       //
       // FIXME: SCEV expansion has no way to bail out, so the caller must
       // explicitly check any assumptions made by SCEV. Brittle.
-      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
+      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
       if (!AR || AR->getLoop()->getLoopPreheader())
-        Changed |= linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
+        Changed |= linearFunctionTestReplace(L, ExitingBB,
+                                             ExitCount, IndVar,
                                              Rewriter);
     }
   }
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 1c701bbee185..997d68838152 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -1,9 +1,8 @@
 //===- InductiveRangeCheckElimination.cpp - -------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -116,6 +115,11 @@ static cl::opt<bool> SkipProfitabilityChecks("irce-skip-profitability-checks",
 static cl::opt<bool> AllowUnsignedLatchCondition("irce-allow-unsigned-latch",
                                                  cl::Hidden, cl::init(true));
 
+static cl::opt<bool> AllowNarrowLatchCondition(
+    "irce-allow-narrow-latch", cl::Hidden, cl::init(true),
+    cl::desc("If set to true, IRCE may eliminate wide range checks in loops "
+             "with narrow latch condition."));
+
 static const char *ClonedLoopTag = "irce.loop.clone";
 
 #define DEBUG_TYPE "irce"
@@ -532,12 +536,6 @@ class LoopConstrainer {
     Optional<const SCEV *> HighLimit;
   };
 
-  // A utility function that does a `replaceUsesOfWith' on the incoming block
-  // set of a `PHINode' -- replaces instances of `Block' in the `PHINode's
-  // incoming block list with `ReplaceBy'.
-  static void replacePHIBlock(PHINode *PN, BasicBlock *Block,
-                              BasicBlock *ReplaceBy);
-
   // Compute a safe set of limits for the main loop to run in -- effectively the
   // intersection of `Range' and the iteration space of the original loop.
   // Return None if unable to compute the set of subranges.
@@ -639,13 +637,6 @@ public:
 
 } // end anonymous namespace
 
-void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block,
-                                      BasicBlock *ReplaceBy) {
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-    if (PN->getIncomingBlock(i) == Block)
-      PN->setIncomingBlock(i, ReplaceBy);
-}
-
 /// Given a loop with an deccreasing induction variable, is it possible to
 /// safely calculate the bounds of a new loop using the given Predicate.
 static bool isSafeDecreasingBound(const SCEV *Start,
@@ -868,7 +859,7 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
 
   assert(!StepCI->isZero() && "Zero step?");
   bool IsIncreasing = !StepCI->isNegative();
-  bool IsSignedPredicate = ICmpInst::isSigned(Pred);
+  bool IsSignedPredicate;
   const SCEV *StartNext = IndVarBase->getStart();
   const SCEV *Addend = SE.getNegativeSCEV(IndVarBase->getStepRecurrence(SE));
   const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend);
@@ -1045,11 +1036,23 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
   return Result;
 }
 
+/// If the type of \p S matches with \p Ty, return \p S. Otherwise, return
+/// signed or unsigned extension of \p S to type \p Ty.
+static const SCEV *NoopOrExtend(const SCEV *S, Type *Ty, ScalarEvolution &SE,
+                                bool Signed) {
+  return Signed ? SE.getNoopOrSignExtend(S, Ty) : SE.getNoopOrZeroExtend(S, Ty);
+}
+
 Optional<LoopConstrainer::SubRanges>
 LoopConstrainer::calculateSubRanges(bool IsSignedPredicate) const {
   IntegerType *Ty = cast<IntegerType>(LatchTakenCount->getType());
 
-  if (Range.getType() != Ty)
+  auto *RTy = cast<IntegerType>(Range.getType());
+
+  // We only support wide range checks and narrow latches.
+  if (!AllowNarrowLatchCondition && RTy != Ty)
+    return None;
+  if (RTy->getBitWidth() < Ty->getBitWidth())
     return None;
 
   LoopConstrainer::SubRanges Result;
@@ -1057,8 +1060,10 @@ LoopConstrainer::calculateSubRanges(bool IsSignedPredicate) const {
   // I think we can be more aggressive here and make this nuw / nsw if the
   // addition that feeds into the icmp for the latch's terminating branch is nuw
   // / nsw.  In any case, a wrapping 2's complement addition is safe.
-  const SCEV *Start = SE.getSCEV(MainLoopStructure.IndVarStart);
-  const SCEV *End = SE.getSCEV(MainLoopStructure.LoopExitAt);
+  const SCEV *Start = NoopOrExtend(SE.getSCEV(MainLoopStructure.IndVarStart),
+                                   RTy, SE, IsSignedPredicate);
+  const SCEV *End = NoopOrExtend(SE.getSCEV(MainLoopStructure.LoopExitAt), RTy,
+                                 SE, IsSignedPredicate);
 
   bool Increasing = MainLoopStructure.IndVarIncreasing;
 
@@ -1068,7 +1073,7 @@ LoopConstrainer::calculateSubRanges(bool IsSignedPredicate) const {
 
   const SCEV *Smallest = nullptr, *Greatest = nullptr, *GreatestSeen = nullptr;
 
-  const SCEV *One = SE.getOne(Ty);
+  const SCEV *One = SE.getOne(RTy);
   if (Increasing) {
     Smallest = Start;
     Greatest = End;
@@ -1257,6 +1262,13 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
   bool IsSignedPredicate = LS.IsSignedPredicate;
 
   IRBuilder<> B(PreheaderJump);
+  auto *RangeTy = Range.getBegin()->getType();
+  auto NoopOrExt = [&](Value *V) {
+    if (V->getType() == RangeTy)
+      return V;
+    return IsSignedPredicate ? B.CreateSExt(V, RangeTy, "wide." + V->getName())
+                             : B.CreateZExt(V, RangeTy, "wide." + V->getName());
+  };
 
   // EnterLoopCond - is it okay to start executing this `LS'?
   Value *EnterLoopCond = nullptr;
@@ -1264,15 +1276,16 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
       Increasing
           ? (IsSignedPredicate ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT)
           : (IsSignedPredicate ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
-  EnterLoopCond = B.CreateICmp(Pred, LS.IndVarStart, ExitSubloopAt);
+  Value *IndVarStart = NoopOrExt(LS.IndVarStart);
+  EnterLoopCond = B.CreateICmp(Pred, IndVarStart, ExitSubloopAt);
 
   B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit);
   PreheaderJump->eraseFromParent();
 
   LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector);
   B.SetInsertPoint(LS.LatchBr);
-  Value *TakeBackedgeLoopCond = B.CreateICmp(Pred, LS.IndVarBase,
-                                             ExitSubloopAt);
+  Value *IndVarBase = NoopOrExt(LS.IndVarBase);
+  Value *TakeBackedgeLoopCond = B.CreateICmp(Pred, IndVarBase, ExitSubloopAt);
 
   Value *CondForBranch = LS.LatchBrExitIdx == 1
                              ? TakeBackedgeLoopCond
@@ -1285,7 +1298,8 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
   // IterationsLeft - are there any more iterations left, given the original
   // upper bound on the induction variable?  If not, we branch to the "real"
   // exit.
-  Value *IterationsLeft = B.CreateICmp(Pred, LS.IndVarBase, LS.LoopExitAt);
+  Value *LoopExitAt = NoopOrExt(LS.LoopExitAt);
+  Value *IterationsLeft = B.CreateICmp(Pred, IndVarBase, LoopExitAt);
   B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit);
 
   BranchInst *BranchToContinuation =
@@ -1304,15 +1318,14 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
     RRI.PHIValuesAtPseudoExit.push_back(NewPHI);
   }
 
-  RRI.IndVarEnd = PHINode::Create(LS.IndVarBase->getType(), 2, "indvar.end",
+  RRI.IndVarEnd = PHINode::Create(IndVarBase->getType(), 2, "indvar.end",
                                   BranchToContinuation);
-  RRI.IndVarEnd->addIncoming(LS.IndVarStart, Preheader);
-  RRI.IndVarEnd->addIncoming(LS.IndVarBase, RRI.ExitSelector);
+  RRI.IndVarEnd->addIncoming(IndVarStart, Preheader);
+  RRI.IndVarEnd->addIncoming(IndVarBase, RRI.ExitSelector);
 
   // The latch exit now has a branch from `RRI.ExitSelector' instead of
   // `LS.Latch'.  The PHI nodes need to be updated to reflect that.
-  for (PHINode &PN : LS.LatchExit->phis())
-    replacePHIBlock(&PN, LS.Latch, RRI.ExitSelector);
+  LS.LatchExit->replacePhiUsesWith(LS.Latch, RRI.ExitSelector);
 
   return RRI;
 }
@@ -1322,9 +1335,8 @@ void LoopConstrainer::rewriteIncomingValuesForPHIs(
     const LoopConstrainer::RewrittenRangeInfo &RRI) const {
   unsigned PHIIndex = 0;
   for (PHINode &PN : LS.Header->phis())
-    for (unsigned i = 0, e = PN.getNumIncomingValues(); i < e; ++i)
-      if (PN.getIncomingBlock(i) == ContinuationBlock)
-        PN.setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]);
+    PN.setIncomingValueForBlock(ContinuationBlock,
+                                RRI.PHIValuesAtPseudoExit[PHIIndex++]);
 
   LS.IndVarStart = RRI.IndVarEnd;
 }
@@ -1335,9 +1347,7 @@ BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS,
   BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header);
   BranchInst::Create(LS.Header, Preheader);
 
-  for (PHINode &PN : LS.Header->phis())
-    for (unsigned i = 0, e = PN.getNumIncomingValues(); i < e; ++i)
-      replacePHIBlock(&PN, OldPreheader, Preheader);
+  LS.Header->replacePhiUsesWith(OldPreheader, Preheader);
 
   return Preheader;
 }
@@ -1393,7 +1403,7 @@ bool LoopConstrainer::run() {
   SubRanges SR = MaybeSR.getValue();
   bool Increasing = MainLoopStructure.IndVarIncreasing;
   IntegerType *IVTy =
-      cast<IntegerType>(MainLoopStructure.IndVarBase->getType());
+      cast<IntegerType>(Range.getBegin()->getType());
 
   SCEVExpander Expander(SE, F.getParent()->getDataLayout(), "irce");
   Instruction *InsertPt = OriginalPreheader->getTerminator();
@@ -1534,7 +1544,7 @@ bool LoopConstrainer::run() {
   // This function canonicalizes the loop into Loop-Simplify and LCSSA forms.
   auto CanonicalizeLoop = [&] (Loop *L, bool IsOriginalLoop) {
     formLCSSARecursively(*L, DT, &LI, &SE);
-    simplifyLoop(L, &DT, &LI, &SE, nullptr, true);
+    simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr, true);
     // Pre/post loops are slow paths, we do not need to perform any loop
     // optimizations on them.
     if (!IsOriginalLoop)
@@ -1556,6 +1566,12 @@ Optional<InductiveRangeCheck::Range>
 InductiveRangeCheck::computeSafeIterationSpace(
     ScalarEvolution &SE, const SCEVAddRecExpr *IndVar,
     bool IsLatchSigned) const {
+  // We can deal when types of latch check and range checks don't match in case
+  // if latch check is more narrow.
+  auto *IVType = cast<IntegerType>(IndVar->getType());
+  auto *RCType = cast<IntegerType>(getBegin()->getType());
+  if (IVType->getBitWidth() > RCType->getBitWidth())
+    return None;
   // IndVar is of the form "A + B * I" (where "I" is the canonical induction
   // variable, that may or may not exist as a real llvm::Value in the loop) and
   // this inductive range check is a range check on the "C + D * I" ("C" is
@@ -1579,8 +1595,9 @@ InductiveRangeCheck::computeSafeIterationSpace(
   if (!IndVar->isAffine())
     return None;
 
-  const SCEV *A = IndVar->getStart();
-  const SCEVConstant *B = dyn_cast<SCEVConstant>(IndVar->getStepRecurrence(SE));
+  const SCEV *A = NoopOrExtend(IndVar->getStart(), RCType, SE, IsLatchSigned);
+  const SCEVConstant *B = dyn_cast<SCEVConstant>(
+      NoopOrExtend(IndVar->getStepRecurrence(SE), RCType, SE, IsLatchSigned));
   if (!B)
     return None;
   assert(!B->isZero() && "Recurrence with zero step?");
@@ -1591,7 +1608,7 @@ InductiveRangeCheck::computeSafeIterationSpace(
     return None;
 
   assert(!D->getValue()->isZero() && "Recurrence with zero step?");
-  unsigned BitWidth = cast<IntegerType>(IndVar->getType())->getBitWidth();
+  unsigned BitWidth = RCType->getBitWidth();
   const SCEV *SIntMax = SE.getConstant(APInt::getSignedMaxValue(BitWidth));
 
   // Subtract Y from X so that it does not go through border of the IV
diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp
index fbbc09eb487f..5f0e2001c73d 100644
--- a/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -1,9 +1,8 @@
 //===- InferAddressSpace.cpp - --------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -149,7 +148,9 @@ class InferAddressSpaces : public FunctionPass {
 public:
   static char ID;
 
-  InferAddressSpaces() : FunctionPass(ID) {}
+  InferAddressSpaces() :
+    FunctionPass(ID), FlatAddrSpace(UninitializedAddressSpace) {}
+  InferAddressSpaces(unsigned AS) : FunctionPass(ID), FlatAddrSpace(AS) {}
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
@@ -218,13 +219,17 @@ static bool isAddressExpression(const Value &V) {
   if (!isa<Operator>(V))
     return false;
 
-  switch (cast<Operator>(V).getOpcode()) {
+  const Operator &Op = cast<Operator>(V);
+  switch (Op.getOpcode()) {
   case Instruction::PHI:
+    assert(Op.getType()->isPointerTy());
+    return true;
   case Instruction::BitCast:
   case Instruction::AddrSpaceCast:
   case Instruction::GetElementPtr:
-  case Instruction::Select:
     return true;
+  case Instruction::Select:
+    return Op.getType()->isPointerTy();
   default:
     return false;
   }
@@ -548,10 +553,17 @@ static Value *cloneConstantExprWithNewAddressSpace(
     if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) {
       IsNew = true;
       NewOperands.push_back(cast<Constant>(NewOperand));
-    } else {
-      // Otherwise, reuses the old operand.
-      NewOperands.push_back(Operand);
+      continue;
     }
+    if (auto CExpr = dyn_cast<ConstantExpr>(Operand))
+      if (Value *NewOperand = cloneConstantExprWithNewAddressSpace(
+              CExpr, NewAddrSpace, ValueWithNewAddrSpace)) {
+        IsNew = true;
+        NewOperands.push_back(cast<Constant>(NewOperand));
+        continue;
+      }
+    // Otherwise, reuses the old operand.
+    NewOperands.push_back(Operand);
   }
 
   // If !IsNew, we will replace the Value with itself. However, replaced values
@@ -621,9 +633,12 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
 
   const TargetTransformInfo &TTI =
       getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
-  FlatAddrSpace = TTI.getFlatAddressSpace();
-  if (FlatAddrSpace == UninitializedAddressSpace)
-    return false;
+
+  if (FlatAddrSpace == UninitializedAddressSpace) {
+    FlatAddrSpace = TTI.getFlatAddressSpace();
+    if (FlatAddrSpace == UninitializedAddressSpace)
+      return false;
+  }
 
   // Collects all flat address expressions in postorder.
   std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F);
@@ -991,8 +1006,12 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
         }
 
         // Otherwise, replaces the use with flat(NewV).
-        if (Instruction *I = dyn_cast<Instruction>(V)) {
-          BasicBlock::iterator InsertPos = std::next(I->getIterator());
+        if (Instruction *Inst = dyn_cast<Instruction>(V)) {
+          // Don't create a copy of the original addrspacecast.
+          if (U == V && isa<AddrSpaceCastInst>(V))
+            continue;
+
+          BasicBlock::iterator InsertPos = std::next(Inst->getIterator());
           while (isa<PHINode>(InsertPos))
             ++InsertPos;
           U.set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
@@ -1015,6 +1034,6 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
   return true;
 }
 
-FunctionPass *llvm::createInferAddressSpacesPass() {
-  return new InferAddressSpaces();
+FunctionPass *llvm::createInferAddressSpacesPass(unsigned AddressSpace) {
+  return new InferAddressSpaces(AddressSpace);
 }
diff --git a/lib/Transforms/Scalar/InstSimplifyPass.cpp b/lib/Transforms/Scalar/InstSimplifyPass.cpp
index 05cd48d83267..6616364ab203 100644
--- a/lib/Transforms/Scalar/InstSimplifyPass.cpp
+++ b/lib/Transforms/Scalar/InstSimplifyPass.cpp
@@ -1,9 +1,8 @@
 //===- InstSimplifyPass.cpp -----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 48de56a02834..b86bf2fefbe5 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1,9 +1,8 @@
 //===- JumpThreading.cpp - Thread control through conditional blocks ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -24,6 +23,7 @@
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/GuardUtils.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -38,7 +38,6 @@
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
@@ -103,6 +102,12 @@ static cl::opt<bool> PrintLVIAfterJumpThreading(
     cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
     cl::Hidden);
 
+static cl::opt<bool> ThreadAcrossLoopHeaders(
+    "jump-threading-across-loop-headers",
+    cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
+    cl::init(false), cl::Hidden);
+
+
 namespace {
 
   /// This pass performs 'jump threading', which looks at blocks that have
@@ -369,7 +374,8 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
     if (!DT.isReachableFromEntry(&BB))
       Unreachable.insert(&BB);
 
-  FindLoopHeaders(F);
+  if (!ThreadAcrossLoopHeaders)
+    FindLoopHeaders(F);
 
   bool EverChanged = false;
   bool Changed;
@@ -1056,7 +1062,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
     Condition = IB->getAddress()->stripPointerCasts();
     Preference = WantBlockAddress;
   } else {
-    return false; // Must be an invoke.
+    return false; // Must be an invoke or callbr.
   }
 
   // Run constant folding to see if we can reduce the condition to a simple
@@ -1092,7 +1098,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
                       << "' folding undef terminator: " << *BBTerm << '\n');
     BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
     BBTerm->eraseFromParent();
-    DTU->applyUpdates(Updates);
+    DTU->applyUpdatesPermissive(Updates);
     return true;
   }
 
@@ -1143,7 +1149,9 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
         unsigned ToKeep = Ret == LazyValueInfo::True ? 0 : 1;
         BasicBlock *ToRemoveSucc = CondBr->getSuccessor(ToRemove);
         ToRemoveSucc->removePredecessor(BB, true);
-        BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
+        BranchInst *UncondBr =
+          BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
+        UncondBr->setDebugLoc(CondBr->getDebugLoc());
         CondBr->eraseFromParent();
         if (CondCmp->use_empty())
           CondCmp->eraseFromParent();
@@ -1160,7 +1168,8 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
             ConstantInt::getFalse(CondCmp->getType());
           ReplaceFoldableUses(CondCmp, CI);
         }
-        DTU->deleteEdgeRelaxed(BB, ToRemoveSucc);
+        DTU->applyUpdatesPermissive(
+            {{DominatorTree::Delete, BB, ToRemoveSucc}});
         return true;
       }
 
@@ -1172,7 +1181,8 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
   }
 
   if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
-    TryToUnfoldSelect(SI, BB);
+    if (TryToUnfoldSelect(SI, BB))
+      return true;
 
   // Check for some cases that are worth simplifying.  Right now we want to look
   // for loads that are used by a switch or by the condition for the branch.  If
@@ -1245,9 +1255,10 @@ bool JumpThreadingPass::ProcessImpliedCondition(BasicBlock *BB) {
       BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);
       BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);
       RemoveSucc->removePredecessor(BB);
-      BranchInst::Create(KeepSucc, BI);
+      BranchInst *UncondBI = BranchInst::Create(KeepSucc, BI);
+      UncondBI->setDebugLoc(BI->getDebugLoc());
       BI->eraseFromParent();
-      DTU->deleteEdgeRelaxed(BB, RemoveSucc);
+      DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
       return true;
     }
     CurrentBB = CurrentPred;
@@ -1429,7 +1440,9 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
     // Add all the unavailable predecessors to the PredsToSplit list.
     for (BasicBlock *P : predecessors(LoadBB)) {
       // If the predecessor is an indirect goto, we can't split the edge.
-      if (isa<IndirectBrInst>(P->getTerminator()))
+      // Same for CallBr.
+      if (isa<IndirectBrInst>(P->getTerminator()) ||
+          isa<CallBrInst>(P->getTerminator()))
         return false;
 
       if (!AvailablePredSet.count(P))
@@ -1446,11 +1459,11 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
   if (UnavailablePred) {
     assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
            "Can't handle critical edge here!");
-    LoadInst *NewVal =
-        new LoadInst(LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
-                     LoadI->getName() + ".pr", false, LoadI->getAlignment(),
-                     LoadI->getOrdering(), LoadI->getSyncScopeID(),
-                     UnavailablePred->getTerminator());
+    LoadInst *NewVal = new LoadInst(
+        LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
+        LoadI->getName() + ".pr", false, LoadI->getAlignment(),
+        LoadI->getOrdering(), LoadI->getSyncScopeID(),
+        UnavailablePred->getTerminator());
     NewVal->setDebugLoc(LoadI->getDebugLoc());
     if (AATags)
       NewVal->setAAMetadata(AATags);
@@ -1474,8 +1487,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
   for (pred_iterator PI = PB; PI != PE; ++PI) {
     BasicBlock *P = *PI;
     AvailablePredsTy::iterator I =
-      std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
-                       std::make_pair(P, (Value*)nullptr));
+        llvm::lower_bound(AvailablePreds, std::make_pair(P, (Value *)nullptr));
 
     assert(I != AvailablePreds.end() && I->first == P &&
            "Didn't find entry for predecessor!");
@@ -1601,7 +1613,6 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
   Constant *OnlyVal = nullptr;
   Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
 
-  unsigned PredWithKnownDest = 0;
   for (const auto &PredValue : PredValues) {
     BasicBlock *Pred = PredValue.second;
     if (!SeenPreds.insert(Pred).second)
@@ -1638,12 +1649,10 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
         OnlyVal = MultipleVal;
     }
 
-    // We know where this predecessor is going.
-    ++PredWithKnownDest;
-
     // If the predecessor ends with an indirect goto, we can't change its
-    // destination.
-    if (isa<IndirectBrInst>(Pred->getTerminator()))
+    // destination. Same for CallBr.
+    if (isa<IndirectBrInst>(Pred->getTerminator()) ||
+        isa<CallBrInst>(Pred->getTerminator()))
       continue;
 
     PredToDestList.push_back(std::make_pair(Pred, DestBB));
@@ -1657,7 +1666,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
   // not thread. By doing so, we do not need to duplicate the current block and
   // also miss potential opportunities in case we dont/cant duplicate.
   if (OnlyDest && OnlyDest != MultipleDestSentinel) {
-    if (PredWithKnownDest == (size_t)pred_size(BB)) {
+    if (BB->hasNPredecessors(PredToDestList.size())) {
       bool SeenFirstBranchToOnlyDest = false;
       std::vector <DominatorTree::UpdateType> Updates;
       Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1);
@@ -1674,7 +1683,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
       Instruction *Term = BB->getTerminator();
       BranchInst::Create(OnlyDest, Term);
       Term->eraseFromParent();
-      DTU->applyUpdates(Updates);
+      DTU->applyUpdatesPermissive(Updates);
 
       // If the condition is now dead due to the removal of the old terminator,
       // erase it.
@@ -1976,8 +1985,14 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
   }
 
   BasicBlock::iterator BI = BB->begin();
-  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
-    ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+  // Clone the phi nodes of BB into NewBB. The resulting phi nodes are trivial,
+  // since NewBB only has one predecessor, but SSAUpdater might need to rewrite
+  // the operand of the cloned phi.
+  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
+    PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
+    NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
+    ValueMapping[PN] = NewPN;
+  }
 
   // Clone the non-phi instructions of BB into NewBB, keeping track of the
   // mapping and using it to remap operands in the cloned instructions.
@@ -2016,9 +2031,9 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
     }
 
   // Enqueue required DT updates.
-  DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB},
-                     {DominatorTree::Insert, PredBB, NewBB},
-                     {DominatorTree::Delete, PredBB, BB}});
+  DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, SuccBB},
+                               {DominatorTree::Insert, PredBB, NewBB},
+                               {DominatorTree::Delete, PredBB, BB}});
 
   // If there were values defined in BB that are used outside the block, then we
   // now have to update all uses of the value to use either the original value,
@@ -2112,7 +2127,7 @@ BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB,
       BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
   }
 
-  DTU->applyUpdates(Updates);
+  DTU->applyUpdatesPermissive(Updates);
   return NewBBs[0];
 }
 
@@ -2385,7 +2400,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
 
   // Remove the unconditional branch at the end of the PredBB block.
   OldPredBranch->eraseFromParent();
-  DTU->applyUpdates(Updates);
+  DTU->applyUpdatesPermissive(Updates);
 
   ++NumDupes;
   return true;
@@ -2421,8 +2436,8 @@ void JumpThreadingPass::UnfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,
 
   // The select is now dead.
   SI->eraseFromParent();
-  DTU->applyUpdates({{DominatorTree::Insert, NewBB, BB},
-                    {DominatorTree::Insert, Pred, NewBB}});
+  DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, BB},
+                               {DominatorTree::Insert, Pred, NewBB}});
 
   // Update any other PHI nodes in BB.
   for (BasicBlock::iterator BI = BB->begin();
@@ -2599,7 +2614,7 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
       Updates.push_back({DominatorTree::Delete, BB, Succ});
       Updates.push_back({DominatorTree::Insert, SplitBB, Succ});
     }
-    DTU->applyUpdates(Updates);
+    DTU->applyUpdatesPermissive(Updates);
     return true;
   }
   return false;
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index d204654c3915..d9dda4cef2d2 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -1,9 +1,8 @@
 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -55,6 +54,7 @@
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
@@ -107,17 +107,29 @@ static cl::opt<int>
 LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
                cl::desc("How many instruction to cross product using AA"));
 
-// Experimental option to allow imprecision in LICM (use MemorySSA cap) in
-// pathological cases, in exchange for faster compile. This is to be removed
-// if MemorySSA starts to address the same issue. This flag applies only when
-// LICM uses MemorySSA instead on AliasSetTracker. When the flag is disabled
-// (default), LICM calls MemorySSAWalker's getClobberingMemoryAccess, which
-// gets perfect accuracy. When flag is enabled, LICM will call into MemorySSA's
-// getDefiningAccess, which may not be precise, since optimizeUses is capped.
-static cl::opt<bool> EnableLicmCap(
-    "enable-licm-cap", cl::init(false), cl::Hidden,
-    cl::desc("Enable imprecision in LICM (uses MemorySSA cap) in "
-             "pathological cases, in exchange for faster compile"));
+// Experimental option to allow imprecision in LICM in pathological cases, in
+// exchange for faster compile. This is to be removed if MemorySSA starts to
+// address the same issue. This flag applies only when LICM uses MemorySSA
+// instead on AliasSetTracker. LICM calls MemorySSAWalker's
+// getClobberingMemoryAccess, up to the value of the Cap, getting perfect
+// accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
+// which may not be precise, since optimizeUses is capped. The result is
+// correct, but we may not get as "far up" as possible to get which access is
+// clobbering the one queried.
+cl::opt<unsigned> llvm::SetLicmMssaOptCap(
+    "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
+    cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
+             "for faster compile. Caps the MemorySSA clobbering calls."));
+
+// Experimentally, memory promotion carries less importance than sinking and
+// hoisting. Limit when we do promotion when using MemorySSA, in order to save
+// compile time.
+cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
+    "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
+    cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
+             "effect. When MSSA in LICM is enabled, then this is the maximum "
+             "number of accesses allowed to be present in a loop in order to "
+             "enable memory promotion."));
 
 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
@@ -128,8 +140,7 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
                   MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
                  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
-                 MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE,
-                 bool FreeInLoop);
+                 MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
 static bool isSafeToExecuteUnconditionally(Instruction &Inst,
                                            const DominatorTree *DT,
                                            const Loop *CurLoop,
@@ -140,7 +151,8 @@ static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
                                      AliasSetTracker *CurAST, Loop *CurLoop,
                                      AliasAnalysis *AA);
 static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
-                                             Loop *CurLoop);
+                                             Loop *CurLoop,
+                                             SinkAndHoistLICMFlags &Flags);
 static Instruction *CloneInstructionInExitBlock(
     Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
     const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
@@ -149,7 +161,8 @@ static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
                              AliasSetTracker *AST, MemorySSAUpdater *MSSAU);
 
 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
-                                  ICFLoopSafetyInfo &SafetyInfo);
+                                  ICFLoopSafetyInfo &SafetyInfo,
+                                  MemorySSAUpdater *MSSAU);
 
 namespace {
 struct LoopInvariantCodeMotion {
@@ -160,17 +173,29 @@ struct LoopInvariantCodeMotion {
                  OptimizationRemarkEmitter *ORE, bool DeleteAST);
 
   ASTrackerMapTy &getLoopToAliasSetMap() { return LoopToAliasSetMap; }
+  LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
+                          unsigned LicmMssaNoAccForPromotionCap)
+      : LicmMssaOptCap(LicmMssaOptCap),
+        LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
 
 private:
   ASTrackerMapTy LoopToAliasSetMap;
+  unsigned LicmMssaOptCap;
+  unsigned LicmMssaNoAccForPromotionCap;
 
   std::unique_ptr<AliasSetTracker>
   collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA);
+  std::unique_ptr<AliasSetTracker>
+  collectAliasInfoForLoopWithMSSA(Loop *L, AliasAnalysis *AA,
+                                  MemorySSAUpdater *MSSAU);
 };
 
 struct LegacyLICMPass : public LoopPass {
   static char ID; // Pass identification, replacement for typeid
-  LegacyLICMPass() : LoopPass(ID) {
+  LegacyLICMPass(
+      unsigned LicmMssaOptCap = SetLicmMssaOptCap,
+      unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap)
+      : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) {
     initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
   }
 
@@ -219,8 +244,16 @@ struct LegacyLICMPass : public LoopPass {
   using llvm::Pass::doFinalization;
 
   bool doFinalization() override {
-    assert(LICM.getLoopToAliasSetMap().empty() &&
+    auto &AliasSetMap = LICM.getLoopToAliasSetMap();
+    // All loops in the AliasSetMap should be cleaned up already. The only case
+    // where we fail to do so is if an outer loop gets deleted before LICM
+    // visits it.
+    assert(all_of(AliasSetMap,
+                  [](LoopInvariantCodeMotion::ASTrackerMapTy::value_type &KV) {
+                    return !KV.first->getParentLoop();
+                  }) &&
            "Didn't free loop alias sets");
+    AliasSetMap.clear();
     return false;
   }
 
@@ -252,7 +285,7 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
     report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not "
                        "cached at a higher level");
 
-  LoopInvariantCodeMotion LICM;
+  LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
   if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
                       AR.MSSA, ORE, true))
     return PreservedAnalyses::all();
@@ -261,6 +294,8 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
 
   PA.preserve<DominatorTreeAnalysis>();
   PA.preserve<LoopAnalysis>();
+  if (EnableMSSALoopDependency)
+    PA.preserve<MemorySSAAnalysis>();
 
   return PA;
 }
@@ -276,6 +311,10 @@ INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
                     false)
 
 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
+Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
+                           unsigned LicmMssaNoAccForPromotionCap) {
+  return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
+}
 
 /// Hoist expressions out of the specified loop. Note, alias info for inner
 /// loop is not preserved so it is not a good idea to run LICM multiple
@@ -293,12 +332,31 @@ bool LoopInvariantCodeMotion::runOnLoop(
 
   std::unique_ptr<AliasSetTracker> CurAST;
   std::unique_ptr<MemorySSAUpdater> MSSAU;
+  bool NoOfMemAccTooLarge = false;
+  unsigned LicmMssaOptCounter = 0;
+
   if (!MSSA) {
     LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
     CurAST = collectAliasInfoForLoop(L, LI, AA);
   } else {
-    LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA. Promotion disabled.\n");
+    LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n");
     MSSAU = make_unique<MemorySSAUpdater>(MSSA);
+
+    unsigned AccessCapCount = 0;
+    for (auto *BB : L->getBlocks()) {
+      if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
+        for (const auto &MA : *Accesses) {
+          (void)MA;
+          AccessCapCount++;
+          if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
+            NoOfMemAccTooLarge = true;
+            break;
+          }
+        }
+      }
+      if (NoOfMemAccTooLarge)
+        break;
+    }
   }
 
   // Get the preheader block to move instructions into...
@@ -317,13 +375,16 @@ bool LoopInvariantCodeMotion::runOnLoop(
   // that we are guaranteed to see definitions before we see uses.  This allows
   // us to sink instructions in one pass, without iteration.  After sinking
   // instructions, we perform another pass to hoist them out of the loop.
-  //
+  SinkAndHoistLICMFlags Flags = {NoOfMemAccTooLarge, LicmMssaOptCounter,
+                                 LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                                 /*IsSink=*/true};
   if (L->hasDedicatedExits())
     Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
-                          CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
+                          CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
+  Flags.IsSink = false;
   if (Preheader)
     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
-                           CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
+                           CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
 
   // Now that all loop invariants have been removed from the loop, promote any
   // memory references to scalars that we can.
@@ -332,7 +393,8 @@ bool LoopInvariantCodeMotion::runOnLoop(
   // make sure we catch that. An additional load may be generated in the
   // preheader for SSA updater, so also avoid sinking when no preheader
   // is available.
-  if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+  if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
+      !NoOfMemAccTooLarge) {
     // Figure out the loop exits and their insertion points
     SmallVector<BasicBlock *, 8> ExitBlocks;
     L->getUniqueExitBlocks(ExitBlocks);
@@ -344,38 +406,45 @@ bool LoopInvariantCodeMotion::runOnLoop(
 
     if (!HasCatchSwitch) {
       SmallVector<Instruction *, 8> InsertPts;
+      SmallVector<MemoryAccess *, 8> MSSAInsertPts;
       InsertPts.reserve(ExitBlocks.size());
-      for (BasicBlock *ExitBlock : ExitBlocks)
+      if (MSSAU)
+        MSSAInsertPts.reserve(ExitBlocks.size());
+      for (BasicBlock *ExitBlock : ExitBlocks) {
         InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
+        if (MSSAU)
+          MSSAInsertPts.push_back(nullptr);
+      }
 
       PredIteratorCache PIC;
 
       bool Promoted = false;
 
-      if (CurAST.get()) {
-        // Loop over all of the alias sets in the tracker object.
-        for (AliasSet &AS : *CurAST) {
-          // We can promote this alias set if it has a store, if it is a "Must"
-          // alias set, if the pointer is loop invariant, and if we are not
-          // eliminating any volatile loads or stores.
-          if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
-              !L->isLoopInvariant(AS.begin()->getValue()))
-            continue;
-
-          assert(
-              !AS.empty() &&
-              "Must alias set should have at least one pointer element in it!");
-
-          SmallSetVector<Value *, 8> PointerMustAliases;
-          for (const auto &ASI : AS)
-            PointerMustAliases.insert(ASI.getValue());
-
-          Promoted |= promoteLoopAccessesToScalars(
-              PointerMustAliases, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L,
-              CurAST.get(), &SafetyInfo, ORE);
-        }
+      // Build an AST using MSSA.
+      if (!CurAST.get())
+        CurAST = collectAliasInfoForLoopWithMSSA(L, AA, MSSAU.get());
+
+      // Loop over all of the alias sets in the tracker object.
+      for (AliasSet &AS : *CurAST) {
+        // We can promote this alias set if it has a store, if it is a "Must"
+        // alias set, if the pointer is loop invariant, and if we are not
+        // eliminating any volatile loads or stores.
+        if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
+            !L->isLoopInvariant(AS.begin()->getValue()))
+          continue;
+
+        assert(
+            !AS.empty() &&
+            "Must alias set should have at least one pointer element in it!");
+
+        SmallSetVector<Value *, 8> PointerMustAliases;
+        for (const auto &ASI : AS)
+          PointerMustAliases.insert(ASI.getValue());
+
+        Promoted |= promoteLoopAccessesToScalars(
+            PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
+            DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
       }
-      // FIXME: Promotion initially disabled when using MemorySSA.
 
       // Once we have promoted values across the loop body we have to
       // recursively reform LCSSA as any nested loop may now have values defined
@@ -399,7 +468,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
 
   // If this loop is nested inside of another one, save the alias information
   // for when we process the outer loop.
-  if (CurAST.get() && L->getParentLoop() && !DeleteAST)
+  if (!MSSAU.get() && CurAST.get() && L->getParentLoop() && !DeleteAST)
     LoopToAliasSetMap[L] = std::move(CurAST);
 
   if (MSSAU.get() && VerifyMemorySSA)
@@ -420,6 +489,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
                       TargetTransformInfo *TTI, Loop *CurLoop,
                       AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
                       ICFLoopSafetyInfo *SafetyInfo,
+                      SinkAndHoistLICMFlags &Flags,
                       OptimizationRemarkEmitter *ORE) {
 
   // Verify inputs.
@@ -463,9 +533,10 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
       //
       bool FreeInLoop = false;
       if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
-          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
+          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
+                             ORE) &&
           !I.mayHaveSideEffects()) {
-        if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE, FreeInLoop)) {
+        if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
           if (!FreeInLoop) {
             ++II;
             eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
@@ -718,6 +789,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
                        DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
                        AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
                        ICFLoopSafetyInfo *SafetyInfo,
+                       SinkAndHoistLICMFlags &Flags,
                        OptimizationRemarkEmitter *ORE) {
   // Verify inputs.
   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
@@ -770,7 +842,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
       // and we have accurately duplicated the control flow from the loop header
       // to that block.
       if (CurLoop->hasLoopInvariantOperands(&I) &&
-          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
+          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
+                             ORE) &&
           isSafeToExecuteUnconditionally(
               I, DT, CurLoop, SafetyInfo, ORE,
               CurLoop->getLoopPreheader()->getTerminator())) {
@@ -808,13 +881,18 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
         continue;
       }
 
-      using namespace PatternMatch;
-      if (((I.use_empty() &&
-            match(&I, m_Intrinsic<Intrinsic::invariant_start>())) ||
-           isGuard(&I)) &&
+      auto IsInvariantStart = [&](Instruction &I) {
+        using namespace PatternMatch;
+        return I.use_empty() &&
+               match(&I, m_Intrinsic<Intrinsic::invariant_start>());
+      };
+      auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
+        return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
+               SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
+      };
+      if ((IsInvariantStart(I) || isGuard(&I)) &&
           CurLoop->hasLoopInvariantOperands(&I) &&
-          SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
-          SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop)) {
+          MustExecuteWithoutWritesBefore(I)) {
         hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
               MSSAU, ORE);
         HoistedInstructions.push_back(&I);
@@ -867,7 +945,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
         LLVM_DEBUG(dbgs() << "LICM rehoisting to "
                           << HoistPoint->getParent()->getName()
                           << ": " << *I << "\n");
-        moveInstructionBefore(*I, *HoistPoint, *SafetyInfo);
+        moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU);
         HoistPoint = I;
         Changed = true;
       }
@@ -897,8 +975,7 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
                                   Loop *CurLoop) {
   Value *Addr = LI->getOperand(0);
   const DataLayout &DL = LI->getModule()->getDataLayout();
-  const uint32_t LocSizeInBits = DL.getTypeSizeInBits(
-      cast<PointerType>(Addr->getType())->getElementType());
+  const uint32_t LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
 
   // if the type is i8 addrspace(x)*, we know this is the type of
   // llvm.invariant.start operand
@@ -945,16 +1022,15 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
 namespace {
 /// Return true if-and-only-if we know how to (mechanically) both hoist and
 /// sink a given instruction out of a loop.  Does not address legality
-/// concerns such as aliasing or speculation safety.  
+/// concerns such as aliasing or speculation safety.
 bool isHoistableAndSinkableInst(Instruction &I) {
   // Only these instructions are hoistable/sinkable.
-  return (isa<LoadInst>(I) || isa<StoreInst>(I) ||
-          isa<CallInst>(I) || isa<FenceInst>(I) || 
-          isa<BinaryOperator>(I) || isa<CastInst>(I) ||
-          isa<SelectInst>(I) || isa<GetElementPtrInst>(I) ||
-          isa<CmpInst>(I) || isa<InsertElementInst>(I) ||
-          isa<ExtractElementInst>(I) || isa<ShuffleVectorInst>(I) ||
-          isa<ExtractValueInst>(I) || isa<InsertValueInst>(I));
+  return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
+          isa<FenceInst>(I) || isa<BinaryOperator>(I) || isa<CastInst>(I) ||
+          isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
+          isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
+          isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
+          isa<InsertValueInst>(I));
 }
 /// Return true if all of the alias sets within this AST are known not to
 /// contain a Mod, or if MSSA knows thare are no MemoryDefs in the loop.
@@ -997,12 +1073,15 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
                               Loop *CurLoop, AliasSetTracker *CurAST,
                               MemorySSAUpdater *MSSAU,
                               bool TargetExecutesOncePerLoop,
+                              SinkAndHoistLICMFlags *Flags,
                               OptimizationRemarkEmitter *ORE) {
   // If we don't understand the instruction, bail early.
   if (!isHoistableAndSinkableInst(I))
     return false;
 
   MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
+  if (MSSA)
+    assert(Flags != nullptr && "Flags cannot be null.");
 
   // Loads have extra constraints we have to verify before we can hoist them.
   if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
@@ -1029,7 +1108,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
                                              CurLoop, AA);
     else
       Invalidated = pointerInvalidatedByLoopWithMSSA(
-          MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop);
+          MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, *Flags);
     // Check loop-invariant address because this may also be a sinkable load
     // whose address is not necessarily loop-invariant.
     if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
@@ -1074,7 +1153,8 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
                   CurAST, CurLoop, AA);
             else
               Invalidated = pointerInvalidatedByLoopWithMSSA(
-                  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop);
+                  MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop,
+                  *Flags);
             if (Invalidated)
               return false;
           }
@@ -1133,13 +1213,46 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
     } else { // MSSAU
       if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
         return true;
-      if (!EnableLicmCap) {
-        auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
-        if (MSSA->isLiveOnEntryDef(Source) ||
-            !CurLoop->contains(Source->getBlock()))
-          return true;
-      }
-      return false;
+      // If there are more accesses than the Promotion cap, give up, we're not
+      // walking a list that long.
+      if (Flags->NoOfMemAccTooLarge)
+        return false;
+      // Check store only if there's still "quota" to check clobber.
+      if (Flags->LicmMssaOptCounter >= Flags->LicmMssaOptCap)
+        return false;
+      // If there are interfering Uses (i.e. their defining access is in the
+      // loop), or ordered loads (stored as Defs!), don't move this store.
+      // Could do better here, but this is conservatively correct.
+      // TODO: Cache set of Uses on the first walk in runOnLoop, update when
+      // moving accesses. Can also extend to dominating uses.
+      auto *SIMD = MSSA->getMemoryAccess(SI);
+      for (auto *BB : CurLoop->getBlocks())
+        if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
+          for (const auto &MA : *Accesses)
+            if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
+              auto *MD = MU->getDefiningAccess();
+              if (!MSSA->isLiveOnEntryDef(MD) &&
+                  CurLoop->contains(MD->getBlock()))
+                return false;
+              // Disable hoisting past potentially interfering loads. Optimized
+              // Uses may point to an access outside the loop, as getClobbering
+              // checks the previous iteration when walking the backedge.
+              // FIXME: More precise: no Uses that alias SI.
+              if (!Flags->IsSink && !MSSA->dominates(SIMD, MU))
+                return false;
+            } else if (const auto *MD = dyn_cast<MemoryDef>(&MA))
+              if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
+                (void)LI; // Silence warning.
+                assert(!LI->isUnordered() && "Expected unordered load");
+                return false;
+              }
+        }
+
+      auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
+      Flags->LicmMssaOptCounter++;
+      // If there are no clobbering Defs in the loop, store is safe to hoist.
+      return MSSA->isLiveOnEntryDef(Source) ||
+             !CurLoop->contains(Source->getBlock());
     }
   }
 
@@ -1233,7 +1346,7 @@ static Instruction *CloneInstructionInExitBlock(
 
     // Sinking call-sites need to be handled differently from other
     // instructions.  The cloned call-site needs a funclet bundle operand
-    // appropriate for it's location in the CFG.
+    // appropriate for its location in the CFG.
     SmallVector<OperandBundleDef, 1> OpBundles;
     for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
          BundleIdx != BundleEnd; ++BundleIdx) {
@@ -1310,10 +1423,15 @@ static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
 }
 
 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
-                                  ICFLoopSafetyInfo &SafetyInfo) {
+                                  ICFLoopSafetyInfo &SafetyInfo,
+                                  MemorySSAUpdater *MSSAU) {
   SafetyInfo.removeInstruction(&I);
   SafetyInfo.insertInstructionTo(&I, Dest.getParent());
   I.moveBefore(&Dest);
+  if (MSSAU)
+    if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
+            MSSAU->getMemorySSA()->getMemoryAccess(&I)))
+      MSSAU->moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::End);
 }
 
 static Instruction *sinkThroughTriviallyReplaceablePHI(
@@ -1426,8 +1544,7 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
 ///
 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
                  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
-                 MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE,
-                 bool FreeInLoop) {
+                 MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE) {
   LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
   ORE->emit([&]() {
     return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
@@ -1441,7 +1558,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
   ++NumSunk;
 
   // Iterate over users to be ready for actual sinking. Replace users via
-  // unrechable blocks with undef and make all user PHIs trivially replcable.
+  // unreachable blocks with undef and make all user PHIs trivially replaceable.
   SmallPtrSet<Instruction *, 8> VisitedUsers;
   for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
     auto *User = cast<Instruction>(*UI);
@@ -1549,25 +1666,15 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
 
   if (isa<PHINode>(I))
     // Move the new node to the end of the phi list in the destination block.
-    moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo);
+    moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU);
   else
     // Move the new node to the destination block, before its terminator.
-    moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo);
-  if (MSSAU) {
-    // If moving, I just moved a load or store, so update MemorySSA.
-    MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
-        MSSAU->getMemorySSA()->getMemoryAccess(&I));
-    if (OldMemAcc)
-      MSSAU->moveToPlace(OldMemAcc, Dest, MemorySSA::End);
-  }
+    moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU);
 
-  // Do not retain debug locations when we are moving instructions to different
-  // basic blocks, because we want to avoid jumpy line tables. Calls, however,
-  // need to retain their debug locs because they may be inlined.
-  // FIXME: How do we retain source locations without causing poor debugging
-  // behavior?
-  if (!isa<CallInst>(I))
-    I.setDebugLoc(DebugLoc());
+  // Apply line 0 debug locations when we are moving instructions to different
+  // basic blocks because we want to avoid jumpy line tables.
+  if (const DebugLoc &DL = I.getDebugLoc())
+    I.setDebugLoc(DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt()));
 
   if (isa<LoadInst>(I))
     ++NumMovedLoads;
@@ -1611,8 +1718,10 @@ class LoopPromoter : public LoadAndStorePromoter {
   const SmallSetVector<Value *, 8> &PointerMustAliases;
   SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
   SmallVectorImpl<Instruction *> &LoopInsertPts;
+  SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
   PredIteratorCache &PredCache;
   AliasSetTracker &AST;
+  MemorySSAUpdater *MSSAU;
   LoopInfo &LI;
   DebugLoc DL;
   int Alignment;
@@ -1639,15 +1748,16 @@ public:
   LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
                const SmallSetVector<Value *, 8> &PMA,
                SmallVectorImpl<BasicBlock *> &LEB,
-               SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
-               AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
-               bool UnorderedAtomic, const AAMDNodes &AATags,
-               ICFLoopSafetyInfo &SafetyInfo)
+               SmallVectorImpl<Instruction *> &LIP,
+               SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
+               AliasSetTracker &ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
+               DebugLoc dl, int alignment, bool UnorderedAtomic,
+               const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo)
       : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
-        LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
-        LI(li), DL(std::move(dl)), Alignment(alignment),
-        UnorderedAtomic(UnorderedAtomic), AATags(AATags), SafetyInfo(SafetyInfo)
-      {}
+        LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
+        PredCache(PIC), AST(ast), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
+        Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
+        SafetyInfo(SafetyInfo) {}
 
   bool isInstInList(Instruction *I,
                     const SmallVectorImpl<Instruction *> &) const override {
@@ -1659,7 +1769,7 @@ public:
     return PointerMustAliases.count(Ptr);
   }
 
-  void doExtraRewritesBeforeFinalDeletion() const override {
+  void doExtraRewritesBeforeFinalDeletion() override {
     // Insert stores after in the loop exit blocks.  Each exit block gets a
     // store of the live-out values that feed them.  Since we've already told
     // the SSA updater about the defs in the loop and the preheader
@@ -1677,6 +1787,21 @@ public:
       NewSI->setDebugLoc(DL);
       if (AATags)
         NewSI->setAAMetadata(AATags);
+
+      if (MSSAU) {
+        MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
+        MemoryAccess *NewMemAcc;
+        if (!MSSAInsertPoint) {
+          NewMemAcc = MSSAU->createMemoryAccessInBB(
+              NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
+        } else {
+          NewMemAcc =
+              MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
+        }
+        MSSAInsertPts[i] = NewMemAcc;
+        MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
+        // FIXME: true for safety, false may still be correct.
+      }
     }
   }
 
@@ -1687,6 +1812,8 @@ public:
   void instructionDeleted(Instruction *I) const override {
     SafetyInfo.removeInstruction(I);
     AST.deleteValue(I);
+    if (MSSAU)
+      MSSAU->removeMemoryAccess(I);
   }
 };
 
@@ -1723,10 +1850,11 @@ bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) {
 bool llvm::promoteLoopAccessesToScalars(
     const SmallSetVector<Value *, 8> &PointerMustAliases,
     SmallVectorImpl<BasicBlock *> &ExitBlocks,
-    SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC,
+    SmallVectorImpl<Instruction *> &InsertPts,
+    SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
     LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
-    Loop *CurLoop, AliasSetTracker *CurAST, ICFLoopSafetyInfo *SafetyInfo,
-    OptimizationRemarkEmitter *ORE) {
+    Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
+    ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
   // Verify inputs.
   assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
          CurAST != nullptr && SafetyInfo != nullptr &&
@@ -1827,9 +1955,21 @@ bool llvm::promoteLoopAccessesToScalars(
         SawUnorderedAtomic |= Load->isAtomic();
         SawNotAtomic |= !Load->isAtomic();
 
-        if (!DereferenceableInPH)
-          DereferenceableInPH = isSafeToExecuteUnconditionally(
-              *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator());
+        unsigned InstAlignment = Load->getAlignment();
+        if (!InstAlignment)
+          InstAlignment =
+              MDL.getABITypeAlignment(Load->getType());
+
+        // Note that proving a load safe to speculate requires proving
+        // sufficient alignment at the target location.  Proving it guaranteed
+        // to execute does as well.  Thus we can increase our guaranteed
+        // alignment as well. 
+        if (!DereferenceableInPH || (InstAlignment > Alignment))
+          if (isSafeToExecuteUnconditionally(*Load, DT, CurLoop, SafetyInfo,
+                                             ORE, Preheader->getTerminator())) {
+            DereferenceableInPH = true;
+            Alignment = std::max(Alignment, InstAlignment);
+          }
       } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
         // Stores *of* the pointer are not interesting, only stores *to* the
         // pointer.
@@ -1875,8 +2015,8 @@ bool llvm::promoteLoopAccessesToScalars(
         // deref info through it.
         if (!DereferenceableInPH) {
           DereferenceableInPH = isDereferenceableAndAlignedPointer(
-              Store->getPointerOperand(), Store->getAlignment(), MDL,
-              Preheader->getTerminator(), DT);
+              Store->getPointerOperand(), Store->getValueOperand()->getType(),
+              Store->getAlignment(), MDL, Preheader->getTerminator(), DT);
         }
       } else
         return false; // Not a load or store.
@@ -1900,6 +2040,14 @@ bool llvm::promoteLoopAccessesToScalars(
   if (SawUnorderedAtomic && SawNotAtomic)
     return false;
 
+  // If we're inserting an atomic load in the preheader, we must be able to
+  // lower it.  We're only guaranteed to be able to lower naturally aligned
+  // atomics.
+  auto *SomePtrElemType = SomePtr->getType()->getPointerElementType();
+  if (SawUnorderedAtomic &&
+      Alignment < MDL.getTypeStoreSize(SomePtrElemType))
+    return false;
+
   // If we couldn't prove we can hoist the load, bail.
   if (!DereferenceableInPH)
     return false;
@@ -1943,13 +2091,14 @@ bool llvm::promoteLoopAccessesToScalars(
   SmallVector<PHINode *, 16> NewPHIs;
   SSAUpdater SSA(&NewPHIs);
   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
-                        InsertPts, PIC, *CurAST, *LI, DL, Alignment,
-                        SawUnorderedAtomic, AATags, *SafetyInfo);
+                        InsertPts, MSSAInsertPts, PIC, *CurAST, MSSAU, *LI, DL,
+                        Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
 
   // Set up the preheader to have a definition of the value.  It is the live-out
   // value from the preheader that uses in the loop will use.
   LoadInst *PreheaderLoad = new LoadInst(
-      SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator());
+      SomePtr->getType()->getPointerElementType(), SomePtr,
+      SomePtr->getName() + ".promoted", Preheader->getTerminator());
   if (SawUnorderedAtomic)
     PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
   PreheaderLoad->setAlignment(Alignment);
@@ -1958,13 +2107,23 @@ bool llvm::promoteLoopAccessesToScalars(
     PreheaderLoad->setAAMetadata(AATags);
   SSA.AddAvailableValue(Preheader, PreheaderLoad);
 
+  MemoryAccess *PreheaderLoadMemoryAccess;
+  if (MSSAU) {
+    PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
+        PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
+    MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
+    MSSAU->insertUse(NewMemUse);
+  }
+
   // Rewrite all the loads in the loop and remember all the definitions from
   // stores in the loop.
   Promoter.run(LoopUses);
 
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
   // If the SSAUpdater didn't use the load in the preheader, just zap it now.
   if (PreheaderLoad->use_empty())
-    eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, nullptr);
+    eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU);
 
   return true;
 }
@@ -2017,6 +2176,15 @@ LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
   return CurAST;
 }
 
+std::unique_ptr<AliasSetTracker>
+LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA(
+    Loop *L, AliasAnalysis *AA, MemorySSAUpdater *MSSAU) {
+  auto *MSSA = MSSAU->getMemorySSA();
+  auto CurAST = make_unique<AliasSetTracker>(*AA, MSSA, L);
+  CurAST->addAllInstructionsInLoopUsingMSSA();
+  return CurAST;
+}
+
 /// Simple analysis hook. Clone alias set info.
 ///
 void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
@@ -2095,15 +2263,49 @@ static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
 }
 
 static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
-                                             Loop *CurLoop) {
-  MemoryAccess *Source;
-  // See declaration of EnableLicmCap for usage details.
-  if (EnableLicmCap)
-    Source = MU->getDefiningAccess();
-  else
-    Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
-  return !MSSA->isLiveOnEntryDef(Source) &&
-         CurLoop->contains(Source->getBlock());
+                                             Loop *CurLoop,
+                                             SinkAndHoistLICMFlags &Flags) {
+  // For hoisting, use the walker to determine safety
+  if (!Flags.IsSink) {
+    MemoryAccess *Source;
+    // See declaration of SetLicmMssaOptCap for usage details.
+    if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap)
+      Source = MU->getDefiningAccess();
+    else {
+      Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
+      Flags.LicmMssaOptCounter++;
+    }
+    return !MSSA->isLiveOnEntryDef(Source) &&
+           CurLoop->contains(Source->getBlock());
+  }
+
+  // For sinking, we'd need to check all Defs below this use. The getClobbering
+  // call will look on the backedge of the loop, but will check aliasing with
+  // the instructions on the previous iteration.
+  // For example:
+  // for (i ... )
+  //   load a[i] ( Use (LoE)
+  //   store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
+  //   i++;
+  // The load sees no clobbering inside the loop, as the backedge alias check
+  // does phi translation, and will check aliasing against store a[i-1].
+  // However sinking the load outside the loop, below the store is incorrect.
+
+  // For now, only sink if there are no Defs in the loop, and the existing ones
+  // precede the use and are in the same block.
+  // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
+  // needs PostDominatorTreeAnalysis.
+  // FIXME: More precise: no Defs that alias this Use.
+  if (Flags.NoOfMemAccTooLarge)
+    return true;
+  for (auto *BB : CurLoop->getBlocks())
+    if (auto *Accesses = MSSA->getBlockDefs(BB))
+      for (const auto &MA : *Accesses)
+        if (const auto *MD = dyn_cast<MemoryDef>(&MA))
+          if (MU->getBlock() != MD->getBlock() ||
+              !MSSA->locallyDominates(MD, MU))
+            return true;
+  return false;
 }
 
 /// Little predicate that returns true if the specified basic block is in
diff --git a/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp b/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp
index a64c99117d64..1c3ff1a61b7e 100644
--- a/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp
+++ b/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp
@@ -1,9 +1,8 @@
 //===- LoopAccessAnalysisPrinter.cpp - Loop Access Analysis Printer --------==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index 3b41b5d96c86..1fcf1315a177 100644
--- a/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -1,9 +1,8 @@
 //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -313,7 +312,8 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
       IRBuilder<> Builder(MemI);
       Module *M = BB->getParent()->getParent();
       Type *I32 = Type::getInt32Ty(BB->getContext());
-      Value *PrefetchFunc = Intrinsic::getDeclaration(M, Intrinsic::prefetch);
+      Function *PrefetchFunc =
+          Intrinsic::getDeclaration(M, Intrinsic::prefetch);
       Builder.CreateCall(
           PrefetchFunc,
           {PrefPtrValue,
@@ -333,4 +333,3 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
 
   return MadeChange;
 }
-
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index d412025d7e94..8371367e24e7 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -1,9 +1,8 @@
 //===- LoopDeletion.cpp - Dead Loop Deletion Pass ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Scalar/LoopDistribute.cpp b/lib/Transforms/Scalar/LoopDistribute.cpp
index d797c9dc9e72..f45e5fd0f50b 100644
--- a/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -1,9 +1,8 @@
 //===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -767,8 +766,14 @@ public:
                     "cannot isolate unsafe dependencies");
     }
 
-    // Don't distribute the loop if we need too many SCEV run-time checks.
+    // Don't distribute the loop if we need too many SCEV run-time checks, or
+    // any if it's illegal.
     const SCEVUnionPredicate &Pred = LAI->getPSE().getUnionPredicate();
+    if (LAI->hasConvergentOp() && !Pred.isAlwaysTrue()) {
+      return fail("RuntimeCheckWithConvergent",
+                  "may not insert runtime check with convergent operation");
+    }
+
     if (Pred.getComplexity() > (IsForced.getValueOr(false)
                                     ? PragmaDistributeSCEVCheckThreshold
                                     : DistributeSCEVCheckThreshold))
@@ -796,7 +801,14 @@ public:
     auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
                                                   RtPtrChecking);
 
+    if (LAI->hasConvergentOp() && !Checks.empty()) {
+      return fail("RuntimeCheckWithConvergent",
+                  "may not insert runtime check with convergent operation");
+    }
+
     if (!Pred.isAlwaysTrue() || !Checks.empty()) {
+      assert(!LAI->hasConvergentOp() && "inserting illegal loop versioning");
+
       MDNode *OrigLoopID = L->getLoopID();
 
       LLVM_DEBUG(dbgs() << "\nPointers:\n");
diff --git a/lib/Transforms/Scalar/LoopFuse.cpp b/lib/Transforms/Scalar/LoopFuse.cpp
new file mode 100644
index 000000000000..0bc2bcff2ae1
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopFuse.cpp
@@ -0,0 +1,1215 @@
+//===- LoopFuse.cpp - Loop Fusion Pass ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the loop fusion pass.
+/// The implementation is largely based on the following document:
+///
+///       Code Transformations to Augment the Scope of Loop Fusion in a
+///         Production Compiler
+///       Christopher Mark Barton
+///       MSc Thesis
+///       https://webdocs.cs.ualberta.ca/~amaral/thesis/ChristopherBartonMSc.pdf
+///
+/// The general approach taken is to collect sets of control flow equivalent
+/// loops and test whether they can be fused. The necessary conditions for
+/// fusion are:
+///    1. The loops must be adjacent (there cannot be any statements between
+///       the two loops).
+///    2. The loops must be conforming (they must execute the same number of
+///       iterations).
+///    3. The loops must be control flow equivalent (if one loop executes, the
+///       other is guaranteed to execute).
+///    4. There cannot be any negative distance dependencies between the loops.
+/// If all of these conditions are satisfied, it is safe to fuse the loops.
+///
+/// This implementation creates FusionCandidates that represent the loop and the
+/// necessary information needed by fusion. It then operates on the fusion
+/// candidates, first confirming that the candidate is eligible for fusion. The
+/// candidates are then collected into control flow equivalent sets, sorted in
+/// dominance order. Each set of control flow equivalent candidates is then
+/// traversed, attempting to fuse pairs of candidates in the set. If all
+/// requirements for fusion are met, the two candidates are fused, creating a
+/// new (fused) candidate which is then added back into the set to consider for
+/// additional fusion.
+///
+/// This implementation currently does not make any modifications to remove
+/// conditions for fusion. Code transformations to make loops conform to each of
+/// the conditions for fusion are discussed in more detail in the document
+/// above. These can be added to the current implementation in the future.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LoopFuse.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-fusion"
+
+STATISTIC(FuseCounter, "Count number of loop fusions performed");
+STATISTIC(NumFusionCandidates, "Number of candidates for loop fusion");
+STATISTIC(InvalidPreheader, "Loop has invalid preheader");
+STATISTIC(InvalidHeader, "Loop has invalid header");
+STATISTIC(InvalidExitingBlock, "Loop has invalid exiting blocks");
+STATISTIC(InvalidExitBlock, "Loop has invalid exit block");
+STATISTIC(InvalidLatch, "Loop has invalid latch");
+STATISTIC(InvalidLoop, "Loop is invalid");
+STATISTIC(AddressTakenBB, "Basic block has address taken");
+STATISTIC(MayThrowException, "Loop may throw an exception");
+STATISTIC(ContainsVolatileAccess, "Loop contains a volatile access");
+STATISTIC(NotSimplifiedForm, "Loop is not in simplified form");
+STATISTIC(InvalidDependencies, "Dependencies prevent fusion");
+STATISTIC(InvalidTripCount,
+          "Loop does not have invariant backedge taken count");
+STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop");
+STATISTIC(NonEqualTripCount, "Candidate trip counts are not the same");
+STATISTIC(NonAdjacent, "Candidates are not adjacent");
+STATISTIC(NonEmptyPreheader, "Candidate has a non-empty preheader");
+
+enum FusionDependenceAnalysisChoice {
+  FUSION_DEPENDENCE_ANALYSIS_SCEV,
+  FUSION_DEPENDENCE_ANALYSIS_DA,
+  FUSION_DEPENDENCE_ANALYSIS_ALL,
+};
+
+static cl::opt<FusionDependenceAnalysisChoice> FusionDependenceAnalysis(
+    "loop-fusion-dependence-analysis",
+    cl::desc("Which dependence analysis should loop fusion use?"),
+    cl::values(clEnumValN(FUSION_DEPENDENCE_ANALYSIS_SCEV, "scev",
+                          "Use the scalar evolution interface"),
+               clEnumValN(FUSION_DEPENDENCE_ANALYSIS_DA, "da",
+                          "Use the dependence analysis interface"),
+               clEnumValN(FUSION_DEPENDENCE_ANALYSIS_ALL, "all",
+                          "Use all available analyses")),
+    cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL), cl::ZeroOrMore);
+
+#ifndef NDEBUG
+static cl::opt<bool>
+    VerboseFusionDebugging("loop-fusion-verbose-debug",
+                           cl::desc("Enable verbose debugging for Loop Fusion"),
+                           cl::Hidden, cl::init(false), cl::ZeroOrMore);
+#endif
+
+/// This class is used to represent a candidate for loop fusion. When it is
+/// constructed, it checks the conditions for loop fusion to ensure that it
+/// represents a valid candidate. It caches several parts of a loop that are
+/// used throughout loop fusion (e.g., loop preheader, loop header, etc) instead
+/// of continually querying the underlying Loop to retrieve these values. It is
+/// assumed these will not change throughout loop fusion.
+///
+/// The invalidate method should be used to indicate that the FusionCandidate is
+/// no longer a valid candidate for fusion. Similarly, the isValid() method can
+/// be used to ensure that the FusionCandidate is still valid for fusion.
+struct FusionCandidate {
+  /// Cache of parts of the loop used throughout loop fusion. These should not
+  /// need to change throughout the analysis and transformation.
+  /// These parts are cached to avoid repeatedly looking up in the Loop class.
+
+  /// Preheader of the loop this candidate represents
+  BasicBlock *Preheader;
+  /// Header of the loop this candidate represents
+  BasicBlock *Header;
+  /// Blocks in the loop that exit the loop
+  BasicBlock *ExitingBlock;
+  /// The successor block of this loop (where the exiting blocks go to)
+  BasicBlock *ExitBlock;
+  /// Latch of the loop
+  BasicBlock *Latch;
+  /// The loop that this fusion candidate represents
+  Loop *L;
+  /// Vector of instructions in this loop that read from memory
+  SmallVector<Instruction *, 16> MemReads;
+  /// Vector of instructions in this loop that write to memory
+  SmallVector<Instruction *, 16> MemWrites;
+  /// Are all of the members of this fusion candidate still valid
+  bool Valid;
+
+  /// Dominator and PostDominator trees are needed for the
+  /// FusionCandidateCompare function, required by FusionCandidateSet to
+  /// determine where the FusionCandidate should be inserted into the set. These
+  /// are used to establish ordering of the FusionCandidates based on dominance.
+  const DominatorTree *DT;
+  const PostDominatorTree *PDT;
+
+  FusionCandidate(Loop *L, const DominatorTree *DT,
+                  const PostDominatorTree *PDT)
+      : Preheader(L->getLoopPreheader()), Header(L->getHeader()),
+        ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
+        Latch(L->getLoopLatch()), L(L), Valid(true), DT(DT), PDT(PDT) {
+
+    // Walk over all blocks in the loop and check for conditions that may
+    // prevent fusion. For each block, walk over all instructions and collect
+    // the memory reads and writes If any instructions that prevent fusion are
+    // found, invalidate this object and return.
+    for (BasicBlock *BB : L->blocks()) {
+      if (BB->hasAddressTaken()) {
+        AddressTakenBB++;
+        invalidate();
+        return;
+      }
+
+      for (Instruction &I : *BB) {
+        if (I.mayThrow()) {
+          MayThrowException++;
+          invalidate();
+          return;
+        }
+        if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+          if (SI->isVolatile()) {
+            ContainsVolatileAccess++;
+            invalidate();
+            return;
+          }
+        }
+        if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+          if (LI->isVolatile()) {
+            ContainsVolatileAccess++;
+            invalidate();
+            return;
+          }
+        }
+        if (I.mayWriteToMemory())
+          MemWrites.push_back(&I);
+        if (I.mayReadFromMemory())
+          MemReads.push_back(&I);
+      }
+    }
+  }
+
+  /// Check if all members of the class are valid.
+  bool isValid() const {
+    return Preheader && Header && ExitingBlock && ExitBlock && Latch && L &&
+           !L->isInvalid() && Valid;
+  }
+
+  /// Verify that all members are in sync with the Loop object.
+  void verify() const {
+    assert(isValid() && "Candidate is not valid!!");
+    assert(!L->isInvalid() && "Loop is invalid!");
+    assert(Preheader == L->getLoopPreheader() && "Preheader is out of sync");
+    assert(Header == L->getHeader() && "Header is out of sync");
+    assert(ExitingBlock == L->getExitingBlock() &&
+           "Exiting Blocks is out of sync");
+    assert(ExitBlock == L->getExitBlock() && "Exit block is out of sync");
+    assert(Latch == L->getLoopLatch() && "Latch is out of sync");
+  }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  LLVM_DUMP_METHOD void dump() const {
+    dbgs() << "\tPreheader: " << (Preheader ? Preheader->getName() : "nullptr")
+           << "\n"
+           << "\tHeader: " << (Header ? Header->getName() : "nullptr") << "\n"
+           << "\tExitingBB: "
+           << (ExitingBlock ? ExitingBlock->getName() : "nullptr") << "\n"
+           << "\tExitBB: " << (ExitBlock ? ExitBlock->getName() : "nullptr")
+           << "\n"
+           << "\tLatch: " << (Latch ? Latch->getName() : "nullptr") << "\n";
+  }
+#endif
+
+private:
+  // This is only used internally for now, to clear the MemWrites and MemReads
+  // list and setting Valid to false. I can't envision other uses of this right
+  // now, since once FusionCandidates are put into the FusionCandidateSet they
+  // are immutable. Thus, any time we need to change/update a FusionCandidate,
+  // we must create a new one and insert it into the FusionCandidateSet to
+  // ensure the FusionCandidateSet remains ordered correctly.
+  void invalidate() {
+    MemWrites.clear();
+    MemReads.clear();
+    Valid = false;
+  }
+};
+
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+                                     const FusionCandidate &FC) {
+  if (FC.isValid())
+    OS << FC.Preheader->getName();
+  else
+    OS << "<Invalid>";
+
+  return OS;
+}
+
+struct FusionCandidateCompare {
+  /// Comparison functor to sort two Control Flow Equivalent fusion candidates
+  /// into dominance order.
+  /// If LHS dominates RHS and RHS post-dominates LHS, return true;
+  /// IF RHS dominates LHS and LHS post-dominates RHS, return false;
+  bool operator()(const FusionCandidate &LHS,
+                  const FusionCandidate &RHS) const {
+    const DominatorTree *DT = LHS.DT;
+
+    // Do not save PDT to local variable as it is only used in asserts and thus
+    // will trigger an unused variable warning if building without asserts.
+    assert(DT && LHS.PDT && "Expecting valid dominator tree");
+
+    // Do this compare first so if LHS == RHS, function returns false.
+    if (DT->dominates(RHS.Preheader, LHS.Preheader)) {
+      // RHS dominates LHS
+      // Verify LHS post-dominates RHS
+      assert(LHS.PDT->dominates(LHS.Preheader, RHS.Preheader));
+      return false;
+    }
+
+    if (DT->dominates(LHS.Preheader, RHS.Preheader)) {
+      // Verify RHS Postdominates LHS
+      assert(LHS.PDT->dominates(RHS.Preheader, LHS.Preheader));
+      return true;
+    }
+
+    // If LHS does not dominate RHS and RHS does not dominate LHS then there is
+    // no dominance relationship between the two FusionCandidates. Thus, they
+    // should not be in the same set together.
+    llvm_unreachable(
+        "No dominance relationship between these fusion candidates!");
+  }
+};
+
+namespace {
+using LoopVector = SmallVector<Loop *, 4>;
+
+// Set of Control Flow Equivalent (CFE) Fusion Candidates, sorted in dominance
+// order. Thus, if FC0 comes *before* FC1 in a FusionCandidateSet, then FC0
+// dominates FC1 and FC1 post-dominates FC0.
+// std::set was chosen because we want a sorted data structure with stable
+// iterators. A subsequent patch to loop fusion will enable fusing non-ajdacent
+// loops by moving intervening code around. When this intervening code contains
+// loops, those loops will be moved also. The corresponding FusionCandidates
+// will also need to be moved accordingly. As this is done, having stable
+// iterators will simplify the logic. Similarly, having an efficient insert that
+// keeps the FusionCandidateSet sorted will also simplify the implementation.
+using FusionCandidateSet = std::set<FusionCandidate, FusionCandidateCompare>;
+using FusionCandidateCollection = SmallVector<FusionCandidateSet, 4>;
+} // namespace
+
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+                                     const FusionCandidateSet &CandSet) {
+  for (auto IT : CandSet)
+    OS << IT << "\n";
+
+  return OS;
+}
+
+#if !defined(NDEBUG)
+static void
+printFusionCandidates(const FusionCandidateCollection &FusionCandidates) {
+  dbgs() << "Fusion Candidates: \n";
+  for (const auto &CandidateSet : FusionCandidates) {
+    dbgs() << "*** Fusion Candidate Set ***\n";
+    dbgs() << CandidateSet;
+    dbgs() << "****************************\n";
+  }
+}
+#endif
+
+/// Collect all loops in function at the same nest level, starting at the
+/// outermost level.
+///
+/// This data structure collects all loops at the same nest level for a
+/// given function (specified by the LoopInfo object). It starts at the
+/// outermost level.
+struct LoopDepthTree {
+  using LoopsOnLevelTy = SmallVector<LoopVector, 4>;
+  using iterator = LoopsOnLevelTy::iterator;
+  using const_iterator = LoopsOnLevelTy::const_iterator;
+
+  LoopDepthTree(LoopInfo &LI) : Depth(1) {
+    if (!LI.empty())
+      LoopsOnLevel.emplace_back(LoopVector(LI.rbegin(), LI.rend()));
+  }
+
+  /// Test whether a given loop has been removed from the function, and thus is
+  /// no longer valid.
+  bool isRemovedLoop(const Loop *L) const { return RemovedLoops.count(L); }
+
+  /// Record that a given loop has been removed from the function and is no
+  /// longer valid.
+  void removeLoop(const Loop *L) { RemovedLoops.insert(L); }
+
+  /// Descend the tree to the next (inner) nesting level
+  void descend() {
+    LoopsOnLevelTy LoopsOnNextLevel;
+
+    for (const LoopVector &LV : *this)
+      for (Loop *L : LV)
+        if (!isRemovedLoop(L) && L->begin() != L->end())
+          LoopsOnNextLevel.emplace_back(LoopVector(L->begin(), L->end()));
+
+    LoopsOnLevel = LoopsOnNextLevel;
+    RemovedLoops.clear();
+    Depth++;
+  }
+
+  bool empty() const { return size() == 0; }
+  size_t size() const { return LoopsOnLevel.size() - RemovedLoops.size(); }
+  unsigned getDepth() const { return Depth; }
+
+  iterator begin() { return LoopsOnLevel.begin(); }
+  iterator end() { return LoopsOnLevel.end(); }
+  const_iterator begin() const { return LoopsOnLevel.begin(); }
+  const_iterator end() const { return LoopsOnLevel.end(); }
+
+private:
+  /// Set of loops that have been removed from the function and are no longer
+  /// valid.
+  SmallPtrSet<const Loop *, 8> RemovedLoops;
+
+  /// Depth of the current level, starting at 1 (outermost loops).
+  unsigned Depth;
+
+  /// Vector of loops at the current depth level that have the same parent loop
+  LoopsOnLevelTy LoopsOnLevel;
+};
+
+#ifndef NDEBUG
+static void printLoopVector(const LoopVector &LV) {
+  dbgs() << "****************************\n";
+  for (auto L : LV)
+    printLoop(*L, dbgs());
+  dbgs() << "****************************\n";
+}
+#endif
+
+static void reportLoopFusion(const FusionCandidate &FC0,
+                             const FusionCandidate &FC1,
+                             OptimizationRemarkEmitter &ORE) {
+  using namespace ore;
+  ORE.emit(
+      OptimizationRemark(DEBUG_TYPE, "LoopFusion", FC0.Preheader->getParent())
+      << "Fused " << NV("Cand1", StringRef(FC0.Preheader->getName()))
+      << " with " << NV("Cand2", StringRef(FC1.Preheader->getName())));
+}
+
+struct LoopFuser {
+private:
+  // Sets of control flow equivalent fusion candidates for a given nest level.
+  FusionCandidateCollection FusionCandidates;
+
+  LoopDepthTree LDT;
+  DomTreeUpdater DTU;
+
+  LoopInfo &LI;
+  DominatorTree &DT;
+  DependenceInfo &DI;
+  ScalarEvolution &SE;
+  PostDominatorTree &PDT;
+  OptimizationRemarkEmitter &ORE;
+
+public:
+  LoopFuser(LoopInfo &LI, DominatorTree &DT, DependenceInfo &DI,
+            ScalarEvolution &SE, PostDominatorTree &PDT,
+            OptimizationRemarkEmitter &ORE, const DataLayout &DL)
+      : LDT(LI), DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy), LI(LI),
+        DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE) {}
+
+  /// This is the main entry point for loop fusion. It will traverse the
+  /// specified function and collect candidate loops to fuse, starting at the
+  /// outermost nesting level and working inwards.
+  bool fuseLoops(Function &F) {
+#ifndef NDEBUG
+    if (VerboseFusionDebugging) {
+      LI.print(dbgs());
+    }
+#endif
+
+    LLVM_DEBUG(dbgs() << "Performing Loop Fusion on function " << F.getName()
+                      << "\n");
+    bool Changed = false;
+
+    while (!LDT.empty()) {
+      LLVM_DEBUG(dbgs() << "Got " << LDT.size() << " loop sets for depth "
+                        << LDT.getDepth() << "\n";);
+
+      for (const LoopVector &LV : LDT) {
+        assert(LV.size() > 0 && "Empty loop set was build!");
+
+        // Skip singleton loop sets as they do not offer fusion opportunities on
+        // this level.
+        if (LV.size() == 1)
+          continue;
+#ifndef NDEBUG
+        if (VerboseFusionDebugging) {
+          LLVM_DEBUG({
+            dbgs() << "  Visit loop set (#" << LV.size() << "):\n";
+            printLoopVector(LV);
+          });
+        }
+#endif
+
+        collectFusionCandidates(LV);
+        Changed |= fuseCandidates();
+      }
+
+      // Finished analyzing candidates at this level.
+      // Descend to the next level and clear all of the candidates currently
+      // collected. Note that it will not be possible to fuse any of the
+      // existing candidates with new candidates because the new candidates will
+      // be at a different nest level and thus not be control flow equivalent
+      // with all of the candidates collected so far.
+      LLVM_DEBUG(dbgs() << "Descend one level!\n");
+      LDT.descend();
+      FusionCandidates.clear();
+    }
+
+    if (Changed)
+      LLVM_DEBUG(dbgs() << "Function after Loop Fusion: \n"; F.dump(););
+
+#ifndef NDEBUG
+    assert(DT.verify());
+    assert(PDT.verify());
+    LI.verify(DT);
+    SE.verify();
+#endif
+
+    LLVM_DEBUG(dbgs() << "Loop Fusion complete\n");
+    return Changed;
+  }
+
+private:
+  /// Determine if two fusion candidates are control flow equivalent.
+  ///
+  /// Two fusion candidates are control flow equivalent if when one executes,
+  /// the other is guaranteed to execute. This is determined using dominators
+  /// and post-dominators: if A dominates B and B post-dominates A then A and B
+  /// are control-flow equivalent.
+  bool isControlFlowEquivalent(const FusionCandidate &FC0,
+                               const FusionCandidate &FC1) const {
+    assert(FC0.Preheader && FC1.Preheader && "Expecting valid preheaders");
+
+    if (DT.dominates(FC0.Preheader, FC1.Preheader))
+      return PDT.dominates(FC1.Preheader, FC0.Preheader);
+
+    if (DT.dominates(FC1.Preheader, FC0.Preheader))
+      return PDT.dominates(FC0.Preheader, FC1.Preheader);
+
+    return false;
+  }
+
+  /// Determine if a fusion candidate (representing a loop) is eligible for
+  /// fusion. Note that this only checks whether a single loop can be fused - it
+  /// does not check whether it is *legal* to fuse two loops together.
+  bool eligibleForFusion(const FusionCandidate &FC) const {
+    if (!FC.isValid()) {
+      LLVM_DEBUG(dbgs() << "FC " << FC << " has invalid CFG requirements!\n");
+      if (!FC.Preheader)
+        InvalidPreheader++;
+      if (!FC.Header)
+        InvalidHeader++;
+      if (!FC.ExitingBlock)
+        InvalidExitingBlock++;
+      if (!FC.ExitBlock)
+        InvalidExitBlock++;
+      if (!FC.Latch)
+        InvalidLatch++;
+      if (FC.L->isInvalid())
+        InvalidLoop++;
+
+      return false;
+    }
+
+    // Require ScalarEvolution to be able to determine a trip count.
+    if (!SE.hasLoopInvariantBackedgeTakenCount(FC.L)) {
+      LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName()
+                        << " trip count not computable!\n");
+      InvalidTripCount++;
+      return false;
+    }
+
+    if (!FC.L->isLoopSimplifyForm()) {
+      LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName()
+                        << " is not in simplified form!\n");
+      NotSimplifiedForm++;
+      return false;
+    }
+
+    return true;
+  }
+
+  /// Iterate over all loops in the given loop set and identify the loops that
+  /// are eligible for fusion. Place all eligible fusion candidates into Control
+  /// Flow Equivalent sets, sorted by dominance.
+  void collectFusionCandidates(const LoopVector &LV) {
+    for (Loop *L : LV) {
+      FusionCandidate CurrCand(L, &DT, &PDT);
+      if (!eligibleForFusion(CurrCand))
+        continue;
+
+      // Go through each list in FusionCandidates and determine if L is control
+      // flow equivalent with the first loop in that list. If it is, append LV.
+      // If not, go to the next list.
+      // If no suitable list is found, start another list and add it to
+      // FusionCandidates.
+      bool FoundSet = false;
+
+      for (auto &CurrCandSet : FusionCandidates) {
+        if (isControlFlowEquivalent(*CurrCandSet.begin(), CurrCand)) {
+          CurrCandSet.insert(CurrCand);
+          FoundSet = true;
+#ifndef NDEBUG
+          if (VerboseFusionDebugging)
+            LLVM_DEBUG(dbgs() << "Adding " << CurrCand
+                              << " to existing candidate set\n");
+#endif
+          break;
+        }
+      }
+      if (!FoundSet) {
+        // No set was found. Create a new set and add to FusionCandidates
+#ifndef NDEBUG
+        if (VerboseFusionDebugging)
+          LLVM_DEBUG(dbgs() << "Adding " << CurrCand << " to new set\n");
+#endif
+        FusionCandidateSet NewCandSet;
+        NewCandSet.insert(CurrCand);
+        FusionCandidates.push_back(NewCandSet);
+      }
+      NumFusionCandidates++;
+    }
+  }
+
+  /// Determine if it is beneficial to fuse two loops.
+  ///
+  /// For now, this method simply returns true because we want to fuse as much
+  /// as possible (primarily to test the pass). This method will evolve, over
+  /// time, to add heuristics for profitability of fusion.
+  bool isBeneficialFusion(const FusionCandidate &FC0,
+                          const FusionCandidate &FC1) {
+    return true;
+  }
+
+  /// Determine if two fusion candidates have the same trip count (i.e., they
+  /// execute the same number of iterations).
+  ///
+  /// Note that for now this method simply returns a boolean value because there
+  /// are no mechanisms in loop fusion to handle different trip counts. In the
+  /// future, this behaviour can be extended to adjust one of the loops to make
+  /// the trip counts equal (e.g., loop peeling). When this is added, this
+  /// interface may need to change to return more information than just a
+  /// boolean value.
+  bool identicalTripCounts(const FusionCandidate &FC0,
+                           const FusionCandidate &FC1) const {
+    const SCEV *TripCount0 = SE.getBackedgeTakenCount(FC0.L);
+    if (isa<SCEVCouldNotCompute>(TripCount0)) {
+      UncomputableTripCount++;
+      LLVM_DEBUG(dbgs() << "Trip count of first loop could not be computed!");
+      return false;
+    }
+
+    const SCEV *TripCount1 = SE.getBackedgeTakenCount(FC1.L);
+    if (isa<SCEVCouldNotCompute>(TripCount1)) {
+      UncomputableTripCount++;
+      LLVM_DEBUG(dbgs() << "Trip count of second loop could not be computed!");
+      return false;
+    }
+    LLVM_DEBUG(dbgs() << "\tTrip counts: " << *TripCount0 << " & "
+                      << *TripCount1 << " are "
+                      << (TripCount0 == TripCount1 ? "identical" : "different")
+                      << "\n");
+
+    return (TripCount0 == TripCount1);
+  }
+
+  /// Walk each set of control flow equivalent fusion candidates and attempt to
+  /// fuse them. This does a single linear traversal of all candidates in the
+  /// set. The conditions for legal fusion are checked at this point. If a pair
+  /// of fusion candidates passes all legality checks, they are fused together
+  /// and a new fusion candidate is created and added to the FusionCandidateSet.
+  /// The original fusion candidates are then removed, as they are no longer
+  /// valid.
+  bool fuseCandidates() {
+    bool Fused = false;
+    LLVM_DEBUG(printFusionCandidates(FusionCandidates));
+    for (auto &CandidateSet : FusionCandidates) {
+      if (CandidateSet.size() < 2)
+        continue;
+
+      LLVM_DEBUG(dbgs() << "Attempting fusion on Candidate Set:\n"
+                        << CandidateSet << "\n");
+
+      for (auto FC0 = CandidateSet.begin(); FC0 != CandidateSet.end(); ++FC0) {
+        assert(!LDT.isRemovedLoop(FC0->L) &&
+               "Should not have removed loops in CandidateSet!");
+        auto FC1 = FC0;
+        for (++FC1; FC1 != CandidateSet.end(); ++FC1) {
+          assert(!LDT.isRemovedLoop(FC1->L) &&
+                 "Should not have removed loops in CandidateSet!");
+
+          LLVM_DEBUG(dbgs() << "Attempting to fuse candidate \n"; FC0->dump();
+                     dbgs() << " with\n"; FC1->dump(); dbgs() << "\n");
+
+          FC0->verify();
+          FC1->verify();
+
+          if (!identicalTripCounts(*FC0, *FC1)) {
+            LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
+                                 "counts. Not fusing.\n");
+            NonEqualTripCount++;
+            continue;
+          }
+
+          if (!isAdjacent(*FC0, *FC1)) {
+            LLVM_DEBUG(dbgs()
+                       << "Fusion candidates are not adjacent. Not fusing.\n");
+            NonAdjacent++;
+            continue;
+          }
+
+          // For now we skip fusing if the second candidate has any instructions
+          // in the preheader. This is done because we currently do not have the
+          // safety checks to determine if it is save to move the preheader of
+          // the second candidate past the body of the first candidate. Once
+          // these checks are added, this condition can be removed.
+          if (!isEmptyPreheader(*FC1)) {
+            LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
+                                 "preheader. Not fusing.\n");
+            NonEmptyPreheader++;
+            continue;
+          }
+
+          if (!dependencesAllowFusion(*FC0, *FC1)) {
+            LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
+            continue;
+          }
+
+          bool BeneficialToFuse = isBeneficialFusion(*FC0, *FC1);
+          LLVM_DEBUG(dbgs()
+                     << "\tFusion appears to be "
+                     << (BeneficialToFuse ? "" : "un") << "profitable!\n");
+          if (!BeneficialToFuse)
+            continue;
+
+          // All analysis has completed and has determined that fusion is legal
+          // and profitable. At this point, start transforming the code and
+          // perform fusion.
+
+          LLVM_DEBUG(dbgs() << "\tFusion is performed: " << *FC0 << " and "
+                            << *FC1 << "\n");
+
+          // Report fusion to the Optimization Remarks.
+          // Note this needs to be done *before* performFusion because
+          // performFusion will change the original loops, making it not
+          // possible to identify them after fusion is complete.
+          reportLoopFusion(*FC0, *FC1, ORE);
+
+          FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT);
+          FusedCand.verify();
+          assert(eligibleForFusion(FusedCand) &&
+                 "Fused candidate should be eligible for fusion!");
+
+          // Notify the loop-depth-tree that these loops are not valid objects
+          // anymore.
+          LDT.removeLoop(FC1->L);
+
+          CandidateSet.erase(FC0);
+          CandidateSet.erase(FC1);
+
+          auto InsertPos = CandidateSet.insert(FusedCand);
+
+          assert(InsertPos.second &&
+                 "Unable to insert TargetCandidate in CandidateSet!");
+
+          // Reset FC0 and FC1 the new (fused) candidate. Subsequent iterations
+          // of the FC1 loop will attempt to fuse the new (fused) loop with the
+          // remaining candidates in the current candidate set.
+          FC0 = FC1 = InsertPos.first;
+
+          LLVM_DEBUG(dbgs() << "Candidate Set (after fusion): " << CandidateSet
+                            << "\n");
+
+          Fused = true;
+        }
+      }
+    }
+    return Fused;
+  }
+
+  /// Rewrite all additive recurrences in a SCEV to use a new loop.
+  class AddRecLoopReplacer : public SCEVRewriteVisitor<AddRecLoopReplacer> {
+  public:
+    AddRecLoopReplacer(ScalarEvolution &SE, const Loop &OldL, const Loop &NewL,
+                       bool UseMax = true)
+        : SCEVRewriteVisitor(SE), Valid(true), UseMax(UseMax), OldL(OldL),
+          NewL(NewL) {}
+
+    const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+      const Loop *ExprL = Expr->getLoop();
+      SmallVector<const SCEV *, 2> Operands;
+      if (ExprL == &OldL) {
+        Operands.append(Expr->op_begin(), Expr->op_end());
+        return SE.getAddRecExpr(Operands, &NewL, Expr->getNoWrapFlags());
+      }
+
+      if (OldL.contains(ExprL)) {
+        bool Pos = SE.isKnownPositive(Expr->getStepRecurrence(SE));
+        if (!UseMax || !Pos || !Expr->isAffine()) {
+          Valid = false;
+          return Expr;
+        }
+        return visit(Expr->getStart());
+      }
+
+      for (const SCEV *Op : Expr->operands())
+        Operands.push_back(visit(Op));
+      return SE.getAddRecExpr(Operands, ExprL, Expr->getNoWrapFlags());
+    }
+
+    bool wasValidSCEV() const { return Valid; }
+
+  private:
+    bool Valid, UseMax;
+    const Loop &OldL, &NewL;
+  };
+
+  /// Return false if the access functions of \p I0 and \p I1 could cause
+  /// a negative dependence.
+  bool accessDiffIsPositive(const Loop &L0, const Loop &L1, Instruction &I0,
+                            Instruction &I1, bool EqualIsInvalid) {
+    Value *Ptr0 = getLoadStorePointerOperand(&I0);
+    Value *Ptr1 = getLoadStorePointerOperand(&I1);
+    if (!Ptr0 || !Ptr1)
+      return false;
+
+    const SCEV *SCEVPtr0 = SE.getSCEVAtScope(Ptr0, &L0);
+    const SCEV *SCEVPtr1 = SE.getSCEVAtScope(Ptr1, &L1);
+#ifndef NDEBUG
+    if (VerboseFusionDebugging)
+      LLVM_DEBUG(dbgs() << "    Access function check: " << *SCEVPtr0 << " vs "
+                        << *SCEVPtr1 << "\n");
+#endif
+    AddRecLoopReplacer Rewriter(SE, L0, L1);
+    SCEVPtr0 = Rewriter.visit(SCEVPtr0);
+#ifndef NDEBUG
+    if (VerboseFusionDebugging)
+      LLVM_DEBUG(dbgs() << "    Access function after rewrite: " << *SCEVPtr0
+                        << " [Valid: " << Rewriter.wasValidSCEV() << "]\n");
+#endif
+    if (!Rewriter.wasValidSCEV())
+      return false;
+
+    // TODO: isKnownPredicate doesnt work well when one SCEV is loop carried (by
+    //       L0) and the other is not. We could check if it is monotone and test
+    //       the beginning and end value instead.
+
+    BasicBlock *L0Header = L0.getHeader();
+    auto HasNonLinearDominanceRelation = [&](const SCEV *S) {
+      const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S);
+      if (!AddRec)
+        return false;
+      return !DT.dominates(L0Header, AddRec->getLoop()->getHeader()) &&
+             !DT.dominates(AddRec->getLoop()->getHeader(), L0Header);
+    };
+    if (SCEVExprContains(SCEVPtr1, HasNonLinearDominanceRelation))
+      return false;
+
+    ICmpInst::Predicate Pred =
+        EqualIsInvalid ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_SGE;
+    bool IsAlwaysGE = SE.isKnownPredicate(Pred, SCEVPtr0, SCEVPtr1);
+#ifndef NDEBUG
+    if (VerboseFusionDebugging)
+      LLVM_DEBUG(dbgs() << "    Relation: " << *SCEVPtr0
+                        << (IsAlwaysGE ? "  >=  " : "  may <  ") << *SCEVPtr1
+                        << "\n");
+#endif
+    return IsAlwaysGE;
+  }
+
+  /// Return true if the dependences between @p I0 (in @p L0) and @p I1 (in
+  /// @p L1) allow loop fusion of @p L0 and @p L1. The dependence analyses
+  /// specified by @p DepChoice are used to determine this.
+  bool dependencesAllowFusion(const FusionCandidate &FC0,
+                              const FusionCandidate &FC1, Instruction &I0,
+                              Instruction &I1, bool AnyDep,
+                              FusionDependenceAnalysisChoice DepChoice) {
+#ifndef NDEBUG
+    if (VerboseFusionDebugging) {
+      LLVM_DEBUG(dbgs() << "Check dep: " << I0 << " vs " << I1 << " : "
+                        << DepChoice << "\n");
+    }
+#endif
+    switch (DepChoice) {
+    case FUSION_DEPENDENCE_ANALYSIS_SCEV:
+      return accessDiffIsPositive(*FC0.L, *FC1.L, I0, I1, AnyDep);
+    case FUSION_DEPENDENCE_ANALYSIS_DA: {
+      auto DepResult = DI.depends(&I0, &I1, true);
+      if (!DepResult)
+        return true;
+#ifndef NDEBUG
+      if (VerboseFusionDebugging) {
+        LLVM_DEBUG(dbgs() << "DA res: "; DepResult->dump(dbgs());
+                   dbgs() << " [#l: " << DepResult->getLevels() << "][Ordered: "
+                          << (DepResult->isOrdered() ? "true" : "false")
+                          << "]\n");
+        LLVM_DEBUG(dbgs() << "DepResult Levels: " << DepResult->getLevels()
+                          << "\n");
+      }
+#endif
+
+      if (DepResult->getNextPredecessor() || DepResult->getNextSuccessor())
+        LLVM_DEBUG(
+            dbgs() << "TODO: Implement pred/succ dependence handling!\n");
+
+      // TODO: Can we actually use the dependence info analysis here?
+      return false;
+    }
+
+    case FUSION_DEPENDENCE_ANALYSIS_ALL:
+      return dependencesAllowFusion(FC0, FC1, I0, I1, AnyDep,
+                                    FUSION_DEPENDENCE_ANALYSIS_SCEV) ||
+             dependencesAllowFusion(FC0, FC1, I0, I1, AnyDep,
+                                    FUSION_DEPENDENCE_ANALYSIS_DA);
+    }
+
+    llvm_unreachable("Unknown fusion dependence analysis choice!");
+  }
+
+  /// Perform a dependence check and return if @p FC0 and @p FC1 can be fused.
+  bool dependencesAllowFusion(const FusionCandidate &FC0,
+                              const FusionCandidate &FC1) {
+    LLVM_DEBUG(dbgs() << "Check if " << FC0 << " can be fused with " << FC1
+                      << "\n");
+    assert(FC0.L->getLoopDepth() == FC1.L->getLoopDepth());
+    assert(DT.dominates(FC0.Preheader, FC1.Preheader));
+
+    for (Instruction *WriteL0 : FC0.MemWrites) {
+      for (Instruction *WriteL1 : FC1.MemWrites)
+        if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *WriteL1,
+                                    /* AnyDep */ false,
+                                    FusionDependenceAnalysis)) {
+          InvalidDependencies++;
+          return false;
+        }
+      for (Instruction *ReadL1 : FC1.MemReads)
+        if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *ReadL1,
+                                    /* AnyDep */ false,
+                                    FusionDependenceAnalysis)) {
+          InvalidDependencies++;
+          return false;
+        }
+    }
+
+    for (Instruction *WriteL1 : FC1.MemWrites) {
+      for (Instruction *WriteL0 : FC0.MemWrites)
+        if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *WriteL1,
+                                    /* AnyDep */ false,
+                                    FusionDependenceAnalysis)) {
+          InvalidDependencies++;
+          return false;
+        }
+      for (Instruction *ReadL0 : FC0.MemReads)
+        if (!dependencesAllowFusion(FC0, FC1, *ReadL0, *WriteL1,
+                                    /* AnyDep */ false,
+                                    FusionDependenceAnalysis)) {
+          InvalidDependencies++;
+          return false;
+        }
+    }
+
+    // Walk through all uses in FC1. For each use, find the reaching def. If the
+    // def is located in FC0 then it is is not safe to fuse.
+    for (BasicBlock *BB : FC1.L->blocks())
+      for (Instruction &I : *BB)
+        for (auto &Op : I.operands())
+          if (Instruction *Def = dyn_cast<Instruction>(Op))
+            if (FC0.L->contains(Def->getParent())) {
+              InvalidDependencies++;
+              return false;
+            }
+
+    return true;
+  }
+
+  /// Determine if the exit block of \p FC0 is the preheader of \p FC1. In this
+  /// case, there is no code in between the two fusion candidates, thus making
+  /// them adjacent.
+  bool isAdjacent(const FusionCandidate &FC0,
+                  const FusionCandidate &FC1) const {
+    return FC0.ExitBlock == FC1.Preheader;
+  }
+
+  bool isEmptyPreheader(const FusionCandidate &FC) const {
+    return FC.Preheader->size() == 1;
+  }
+
+  /// Fuse two fusion candidates, creating a new fused loop.
+  ///
+  /// This method contains the mechanics of fusing two loops, represented by \p
+  /// FC0 and \p FC1. It is assumed that \p FC0 dominates \p FC1 and \p FC1
+  /// postdominates \p FC0 (making them control flow equivalent). It also
+  /// assumes that the other conditions for fusion have been met: adjacent,
+  /// identical trip counts, and no negative distance dependencies exist that
+  /// would prevent fusion. Thus, there is no checking for these conditions in
+  /// this method.
+  ///
+  /// Fusion is performed by rewiring the CFG to update successor blocks of the
+  /// components of tho loop. Specifically, the following changes are done:
+  ///
+  ///   1. The preheader of \p FC1 is removed as it is no longer necessary
+  ///   (because it is currently only a single statement block).
+  ///   2. The latch of \p FC0 is modified to jump to the header of \p FC1.
+  ///   3. The latch of \p FC1 i modified to jump to the header of \p FC0.
+  ///   4. All blocks from \p FC1 are removed from FC1 and added to FC0.
+  ///
+  /// All of these modifications are done with dominator tree updates, thus
+  /// keeping the dominator (and post dominator) information up-to-date.
+  ///
+  /// This can be improved in the future by actually merging blocks during
+  /// fusion. For example, the preheader of \p FC1 can be merged with the
+  /// preheader of \p FC0. This would allow loops with more than a single
+  /// statement in the preheader to be fused. Similarly, the latch blocks of the
+  /// two loops could also be fused into a single block. This will require
+  /// analysis to prove it is safe to move the contents of the block past
+  /// existing code, which currently has not been implemented.
+  Loop *performFusion(const FusionCandidate &FC0, const FusionCandidate &FC1) {
+    assert(FC0.isValid() && FC1.isValid() &&
+           "Expecting valid fusion candidates");
+
+    LLVM_DEBUG(dbgs() << "Fusion Candidate 0: \n"; FC0.dump();
+               dbgs() << "Fusion Candidate 1: \n"; FC1.dump(););
+
+    assert(FC1.Preheader == FC0.ExitBlock);
+    assert(FC1.Preheader->size() == 1 &&
+           FC1.Preheader->getSingleSuccessor() == FC1.Header);
+
+    // Remember the phi nodes originally in the header of FC0 in order to rewire
+    // them later. However, this is only necessary if the new loop carried
+    // values might not dominate the exiting branch. While we do not generally
+    // test if this is the case but simply insert intermediate phi nodes, we
+    // need to make sure these intermediate phi nodes have different
+    // predecessors. To this end, we filter the special case where the exiting
+    // block is the latch block of the first loop. Nothing needs to be done
+    // anyway as all loop carried values dominate the latch and thereby also the
+    // exiting branch.
+    SmallVector<PHINode *, 8> OriginalFC0PHIs;
+    if (FC0.ExitingBlock != FC0.Latch)
+      for (PHINode &PHI : FC0.Header->phis())
+        OriginalFC0PHIs.push_back(&PHI);
+
+    // Replace incoming blocks for header PHIs first.
+    FC1.Preheader->replaceSuccessorsPhiUsesWith(FC0.Preheader);
+    FC0.Latch->replaceSuccessorsPhiUsesWith(FC1.Latch);
+
+    // Then modify the control flow and update DT and PDT.
+    SmallVector<DominatorTree::UpdateType, 8> TreeUpdates;
+
+    // The old exiting block of the first loop (FC0) has to jump to the header
+    // of the second as we need to execute the code in the second header block
+    // regardless of the trip count. That is, if the trip count is 0, so the
+    // back edge is never taken, we still have to execute both loop headers,
+    // especially (but not only!) if the second is a do-while style loop.
+    // However, doing so might invalidate the phi nodes of the first loop as
+    // the new values do only need to dominate their latch and not the exiting
+    // predicate. To remedy this potential problem we always introduce phi
+    // nodes in the header of the second loop later that select the loop carried
+    // value, if the second header was reached through an old latch of the
+    // first, or undef otherwise. This is sound as exiting the first implies the
+    // second will exit too, __without__ taking the back-edge. [Their
+    // trip-counts are equal after all.
+    // KB: Would this sequence be simpler to just just make FC0.ExitingBlock go
+    // to FC1.Header? I think this is basically what the three sequences are
+    // trying to accomplish; however, doing this directly in the CFG may mean
+    // the DT/PDT becomes invalid
+    FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC1.Preheader,
+                                                         FC1.Header);
+    TreeUpdates.emplace_back(DominatorTree::UpdateType(
+        DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader));
+    TreeUpdates.emplace_back(DominatorTree::UpdateType(
+        DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
+
+    // The pre-header of L1 is not necessary anymore.
+    assert(pred_begin(FC1.Preheader) == pred_end(FC1.Preheader));
+    FC1.Preheader->getTerminator()->eraseFromParent();
+    new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader);
+    TreeUpdates.emplace_back(DominatorTree::UpdateType(
+        DominatorTree::Delete, FC1.Preheader, FC1.Header));
+
+    // Moves the phi nodes from the second to the first loops header block.
+    while (PHINode *PHI = dyn_cast<PHINode>(&FC1.Header->front())) {
+      if (SE.isSCEVable(PHI->getType()))
+        SE.forgetValue(PHI);
+      if (PHI->hasNUsesOrMore(1))
+        PHI->moveBefore(&*FC0.Header->getFirstInsertionPt());
+      else
+        PHI->eraseFromParent();
+    }
+
+    // Introduce new phi nodes in the second loop header to ensure
+    // exiting the first and jumping to the header of the second does not break
+    // the SSA property of the phis originally in the first loop. See also the
+    // comment above.
+    Instruction *L1HeaderIP = &FC1.Header->front();
+    for (PHINode *LCPHI : OriginalFC0PHIs) {
+      int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch);
+      assert(L1LatchBBIdx >= 0 &&
+             "Expected loop carried value to be rewired at this point!");
+
+      Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx);
+
+      PHINode *L1HeaderPHI = PHINode::Create(
+          LCV->getType(), 2, LCPHI->getName() + ".afterFC0", L1HeaderIP);
+      L1HeaderPHI->addIncoming(LCV, FC0.Latch);
+      L1HeaderPHI->addIncoming(UndefValue::get(LCV->getType()),
+                               FC0.ExitingBlock);
+
+      LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI);
+    }
+
+    // Replace latch terminator destinations.
+    FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
+    FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
+
+    // If FC0.Latch and FC0.ExitingBlock are the same then we have already
+    // performed the updates above.
+    if (FC0.Latch != FC0.ExitingBlock)
+      TreeUpdates.emplace_back(DominatorTree::UpdateType(
+          DominatorTree::Insert, FC0.Latch, FC1.Header));
+
+    TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
+                                                       FC0.Latch, FC0.Header));
+    TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Insert,
+                                                       FC1.Latch, FC0.Header));
+    TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
+                                                       FC1.Latch, FC1.Header));
+
+    // Update DT/PDT
+    DTU.applyUpdates(TreeUpdates);
+
+    LI.removeBlock(FC1.Preheader);
+    DTU.deleteBB(FC1.Preheader);
+    DTU.flush();
+
+    // Is there a way to keep SE up-to-date so we don't need to forget the loops
+    // and rebuild the information in subsequent passes of fusion?
+    SE.forgetLoop(FC1.L);
+    SE.forgetLoop(FC0.L);
+
+    // Merge the loops.
+    SmallVector<BasicBlock *, 8> Blocks(FC1.L->block_begin(),
+                                        FC1.L->block_end());
+    for (BasicBlock *BB : Blocks) {
+      FC0.L->addBlockEntry(BB);
+      FC1.L->removeBlockFromLoop(BB);
+      if (LI.getLoopFor(BB) != FC1.L)
+        continue;
+      LI.changeLoopFor(BB, FC0.L);
+    }
+    while (!FC1.L->empty()) {
+      const auto &ChildLoopIt = FC1.L->begin();
+      Loop *ChildLoop = *ChildLoopIt;
+      FC1.L->removeChildLoop(ChildLoopIt);
+      FC0.L->addChildLoop(ChildLoop);
+    }
+
+    // Delete the now empty loop L1.
+    LI.erase(FC1.L);
+
+#ifndef NDEBUG
+    assert(!verifyFunction(*FC0.Header->getParent(), &errs()));
+    assert(DT.verify(DominatorTree::VerificationLevel::Fast));
+    assert(PDT.verify());
+    LI.verify(DT);
+    SE.verify();
+#endif
+
+    FuseCounter++;
+
+    LLVM_DEBUG(dbgs() << "Fusion done:\n");
+
+    return FC0.L;
+  }
+};
+
+struct LoopFuseLegacy : public FunctionPass {
+
+  static char ID;
+
+  LoopFuseLegacy() : FunctionPass(ID) {
+    initializeLoopFuseLegacyPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequiredID(LoopSimplifyID);
+    AU.addRequired<ScalarEvolutionWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<PostDominatorTreeWrapperPass>();
+    AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+    AU.addRequired<DependenceAnalysisWrapperPass>();
+
+    AU.addPreserved<ScalarEvolutionWrapperPass>();
+    AU.addPreserved<LoopInfoWrapperPass>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addPreserved<PostDominatorTreeWrapperPass>();
+  }
+
+  bool runOnFunction(Function &F) override {
+    if (skipFunction(F))
+      return false;
+    auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+    auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
+    auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+    auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+    auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL);
+    return LF.fuseLoops(F);
+  }
+};
+
+PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) {
+  auto &LI = AM.getResult<LoopAnalysis>(F);
+  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+  auto &DI = AM.getResult<DependenceAnalysis>(F);
+  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+  auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
+  auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL);
+  bool Changed = LF.fuseLoops(F);
+  if (!Changed)
+    return PreservedAnalyses::all();
+
+  PreservedAnalyses PA;
+  PA.preserve<DominatorTreeAnalysis>();
+  PA.preserve<PostDominatorTreeAnalysis>();
+  PA.preserve<ScalarEvolutionAnalysis>();
+  PA.preserve<LoopAnalysis>();
+  return PA;
+}
+
+char LoopFuseLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_END(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false, false)
+
+FunctionPass *llvm::createLoopFusePass() { return new LoopFuseLegacy(); }
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index fbffa1920a84..e561494f19cf 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1,9 +1,8 @@
 //===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -37,6 +36,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
@@ -51,12 +51,12 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
@@ -87,8 +87,8 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include <algorithm>
 #include <cassert>
@@ -120,6 +120,7 @@ class LoopIdiomRecognize {
   TargetLibraryInfo *TLI;
   const TargetTransformInfo *TTI;
   const DataLayout *DL;
+  OptimizationRemarkEmitter &ORE;
   bool ApplyCodeSizeHeuristics;
 
 public:
@@ -127,8 +128,9 @@ public:
                               LoopInfo *LI, ScalarEvolution *SE,
                               TargetLibraryInfo *TLI,
                               const TargetTransformInfo *TTI,
-                              const DataLayout *DL)
-      : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL) {}
+                              const DataLayout *DL,
+                              OptimizationRemarkEmitter &ORE)
+      : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {}
 
   bool runOnLoop(Loop *L);
 
@@ -221,7 +223,12 @@ public:
             *L->getHeader()->getParent());
     const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
 
-    LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL);
+    // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
+    // pass.  Function analyses need to be preserved across loop transformations
+    // but ORE cannot be preserved (see comment before the pass definition).
+    OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
+
+    LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, ORE);
     return LIR.runOnLoop(L);
   }
 
@@ -243,7 +250,19 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
                                               LPMUpdater &) {
   const auto *DL = &L.getHeader()->getModule()->getDataLayout();
 
-  LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL);
+  const auto &FAM =
+      AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
+  Function *F = L.getHeader()->getParent();
+
+  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
+  // FIXME: This should probably be optional rather than required.
+  if (!ORE)
+    report_fatal_error(
+        "LoopIdiomRecognizePass: OptimizationRemarkEmitterAnalysis not cached "
+        "at a higher level");
+
+  LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL,
+                         *ORE);
   if (!LIR.runOnLoop(&L))
     return PreservedAnalyses::all();
 
@@ -285,7 +304,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
 
   // Determine if code size heuristics need to be applied.
   ApplyCodeSizeHeuristics =
-      L->getHeader()->getParent()->optForSize() && UseLIRCodeSizeHeurs;
+      L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;
 
   HasMemset = TLI->has(LibFunc_memset);
   HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
@@ -313,9 +332,10 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
   SmallVector<BasicBlock *, 8> ExitBlocks;
   CurLoop->getUniqueExitBlocks(ExitBlocks);
 
-  LLVM_DEBUG(dbgs() << "loop-idiom Scanning: F["
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
                     << CurLoop->getHeader()->getParent()->getName()
-                    << "] Loop %" << CurLoop->getHeader()->getName() << "\n");
+                    << "] Countable Loop %" << CurLoop->getHeader()->getName()
+                    << "\n");
 
   bool MadeChange = false;
 
@@ -430,7 +450,7 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
   // turned into a memset of i8 -1, assuming that all the consecutive bytes
   // are stored.  A store of i32 0x01020304 can never be turned into a memset,
   // but it can be turned into memset_pattern if the target supports it.
-  Value *SplatValue = isBytewiseValue(StoredVal);
+  Value *SplatValue = isBytewiseValue(StoredVal, *DL);
   Constant *PatternValue = nullptr;
 
   // Note: memset and memset_pattern on unordered-atomic is yet not supported
@@ -607,7 +627,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
     Constant *FirstPatternValue = nullptr;
 
     if (For == ForMemset::Yes)
-      FirstSplatValue = isBytewiseValue(FirstStoredVal);
+      FirstSplatValue = isBytewiseValue(FirstStoredVal, *DL);
     else
       FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
 
@@ -640,7 +660,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
       Constant *SecondPatternValue = nullptr;
 
       if (For == ForMemset::Yes)
-        SecondSplatValue = isBytewiseValue(SecondStoredVal);
+        SecondSplatValue = isBytewiseValue(SecondStoredVal, *DL);
       else
         SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
 
@@ -860,7 +880,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
     Value *StoredVal, Instruction *TheStore,
     SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
     const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
-  Value *SplatValue = isBytewiseValue(StoredVal);
+  Value *SplatValue = isBytewiseValue(StoredVal, *DL);
   Constant *PatternValue = nullptr;
 
   if (!SplatValue)
@@ -931,9 +951,8 @@ bool LoopIdiomRecognize::processLoopStridedStore(
 
     Module *M = TheStore->getModule();
     StringRef FuncName = "memset_pattern16";
-    Value *MSP =
-        M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
-                               Int8PtrTy, Int8PtrTy, IntPtr);
+    FunctionCallee MSP = M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
+                                                Int8PtrTy, Int8PtrTy, IntPtr);
     inferLibFuncAttributes(M, FuncName, *TLI);
 
     // Otherwise we should form a memset_pattern16.  PatternValue is known to be
@@ -952,6 +971,14 @@ bool LoopIdiomRecognize::processLoopStridedStore(
                     << "\n");
   NewCall->setDebugLoc(TheStore->getDebugLoc());
 
+  ORE.emit([&]() {
+    return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore",
+                              NewCall->getDebugLoc(), Preheader)
+           << "Transformed loop-strided store into a call to "
+           << ore::NV("NewFunction", NewCall->getCalledFunction())
+           << "() function";
+  });
+
   // Okay, the memset has been formed.  Zap the original store and anything that
   // feeds into it.
   for (auto *I : Stores)
@@ -1084,6 +1111,14 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
                     << "    from store ptr=" << *StoreEv << " at: " << *SI
                     << "\n");
 
+  ORE.emit([&]() {
+    return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStoreOfLoopLoad",
+                              NewCall->getDebugLoc(), Preheader)
+           << "Formed a call to "
+           << ore::NV("NewFunction", NewCall->getCalledFunction())
+           << "() function";
+  });
+
   // Okay, the memcpy has been formed.  Zap the original store and anything that
   // feeds into it.
   deleteDeadInstruction(SI);
@@ -1109,6 +1144,11 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
 }
 
 bool LoopIdiomRecognize::runOnNoncountableLoop() {
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
+                    << CurLoop->getHeader()->getParent()->getName()
+                    << "] Noncountable Loop %"
+                    << CurLoop->getHeader()->getName() << "\n");
+
   return recognizePopcount() || recognizeAndInsertFFS();
 }
 
@@ -1462,9 +1502,15 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() {
   const Value *Args[] =
       {InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
                         : ConstantInt::getFalse(InitX->getContext())};
-  if (CurLoop->getHeader()->size() != IdiomCanonicalSize &&
+
+  // @llvm.dbg doesn't count as they have no semantic effect.
+  auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
+  uint32_t HeaderSize =
+      std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
+
+  if (HeaderSize != IdiomCanonicalSize &&
       TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) >
-        TargetTransformInfo::TCC_Basic)
+          TargetTransformInfo::TCC_Basic)
     return false;
 
   transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
@@ -1529,7 +1575,7 @@ static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
   Type *Tys[] = {Val->getType()};
 
   Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
-  Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
+  Function *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
   CallInst *CI = IRBuilder.CreateCall(Func, Ops);
   CI->setDebugLoc(DL);
 
@@ -1543,7 +1589,7 @@ static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
   Type *Tys[] = {Val->getType()};
 
   Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
-  Value *Func = Intrinsic::getDeclaration(M, IID, Tys);
+  Function *Func = Intrinsic::getDeclaration(M, IID, Tys);
   CallInst *CI = IRBuilder.CreateCall(Func, Ops);
   CI->setDebugLoc(DL);
 
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 6f7dc2429c09..31191b52895c 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -1,9 +1,8 @@
 //===- LoopInstSimplify.cpp - Loop Instruction Simplification Pass --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -234,6 +233,8 @@ PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
 
   auto PA = getLoopPassPreservedAnalyses();
   PA.preserveSet<CFGAnalyses>();
+  if (EnableMSSALoopDependency)
+    PA.preserve<MemorySSAAnalysis>();
   return PA;
 }
 
diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp
index 766e39b439a0..9a42365adc1b 100644
--- a/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1,9 +1,8 @@
 //===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1265,9 +1264,7 @@ bool LoopInterchangeTransform::transform() {
 }
 
 void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) {
-  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
-  BasicBlock *InnerLoopLatchPred = InnerLoopLatch;
-  InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI);
+  SplitBlock(InnerLoop->getLoopLatch(), Inc, DT, LI);
 }
 
 /// \brief Move all instructions except the terminator from FromBB right before
@@ -1280,17 +1277,6 @@ static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
                 FromBB->getTerminator()->getIterator());
 }
 
-static void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred,
-                                BasicBlock *NewPred) {
-  for (PHINode &PHI : CurrBlock->phis()) {
-    unsigned Num = PHI.getNumIncomingValues();
-    for (unsigned i = 0; i < Num; ++i) {
-      if (PHI.getIncomingBlock(i) == OldPred)
-        PHI.setIncomingBlock(i, NewPred);
-    }
-  }
-}
-
 /// Update BI to jump to NewBB instead of OldBB. Records updates to
 /// the dominator tree in DTUpdates, if DT should be preserved.
 static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
@@ -1313,8 +1299,41 @@ static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
 }
 
 // Move Lcssa PHIs to the right place.
-static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerLatch,
-                          BasicBlock *OuterLatch) {
+static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
+                          BasicBlock *InnerLatch, BasicBlock *OuterHeader,
+                          BasicBlock *OuterLatch, BasicBlock *OuterExit) {
+
+  // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
+  // defined either in the header or latch. Those blocks will become header and
+  // latch of the new outer loop, and the only possible users can PHI nodes
+  // in the exit block of the loop nest or the outer loop header (reduction
+  // PHIs, in that case, the incoming value must be defined in the inner loop
+  // header). We can just substitute the user with the incoming value and remove
+  // the PHI.
+  for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
+    assert(P.getNumIncomingValues() == 1 &&
+           "Only loops with a single exit are supported!");
+
+    // Incoming values are guaranteed be instructions currently.
+    auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
+    // Skip phis with incoming values from the inner loop body, excluding the
+    // header and latch.
+    if (IncI->getParent() != InnerLatch && IncI->getParent() != InnerHeader)
+      continue;
+
+    assert(all_of(P.users(),
+                  [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
+                    return (cast<PHINode>(U)->getParent() == OuterHeader &&
+                            IncI->getParent() == InnerHeader) ||
+                           cast<PHINode>(U)->getParent() == OuterExit;
+                  }) &&
+           "Can only replace phis iff the uses are in the loop nest exit or "
+           "the incoming value is defined in the inner header (it will "
+           "dominate all loop blocks after interchanging)");
+    P.replaceAllUsesWith(IncI);
+    P.eraseFromParent();
+  }
+
   SmallVector<PHINode *, 8> LcssaInnerExit;
   for (PHINode &P : InnerExit->phis())
     LcssaInnerExit.push_back(&P);
@@ -1327,35 +1346,43 @@ static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerLatch,
   // If a PHI node has users outside of InnerExit, it has a use outside the
   // interchanged loop and we have to preserve it. We move these to
   // InnerLatch, which will become the new exit block for the innermost
-  // loop after interchanging. For PHIs only used in InnerExit, we can just
-  // replace them with the incoming value.
-  for (PHINode *P : LcssaInnerExit) {
-    bool hasUsersOutside = false;
-    for (auto UI = P->use_begin(), E = P->use_end(); UI != E;) {
-      Use &U = *UI;
-      ++UI;
-      auto *Usr = cast<Instruction>(U.getUser());
-      if (Usr->getParent() != InnerExit) {
-        hasUsersOutside = true;
-        continue;
-      }
-      U.set(P->getIncomingValueForBlock(InnerLatch));
-    }
-    if (hasUsersOutside)
-      P->moveBefore(InnerLatch->getFirstNonPHI());
-    else
-      P->eraseFromParent();
-  }
+  // loop after interchanging.
+  for (PHINode *P : LcssaInnerExit)
+    P->moveBefore(InnerLatch->getFirstNonPHI());
 
   // If the inner loop latch contains LCSSA PHIs, those come from a child loop
   // and we have to move them to the new inner latch.
   for (PHINode *P : LcssaInnerLatch)
     P->moveBefore(InnerExit->getFirstNonPHI());
 
+  // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
+  // incoming values from the outer latch or header, we have to add a new PHI
+  // in the inner loop latch, which became the exit block of the outer loop,
+  // after interchanging.
+  if (OuterExit) {
+    for (PHINode &P : OuterExit->phis()) {
+      if (P.getNumIncomingValues() != 1)
+        continue;
+      // Skip Phis with incoming values not defined in the outer loop's header
+      // and latch. Also skip incoming phis defined in the latch. Those should
+      // already have been updated.
+      auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
+      if (!I || ((I->getParent() != OuterLatch || isa<PHINode>(I)) &&
+                 I->getParent() != OuterHeader))
+        continue;
+
+      PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
+      NewPhi->setIncomingValue(0, P.getIncomingValue(0));
+      NewPhi->setIncomingBlock(0, OuterLatch);
+      NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
+      P.setIncomingValue(0, NewPhi);
+    }
+  }
+
   // Now adjust the incoming blocks for the LCSSA PHIs.
   // For PHIs moved from Inner's exit block, we need to replace Inner's latch
   // with the new latch.
-  updateIncomingBlock(InnerLatch, InnerLatch, OuterLatch);
+  InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
 }
 
 bool LoopInterchangeTransform::adjustLoopBranches() {
@@ -1374,9 +1401,11 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
   // preheaders do not satisfy those conditions.
   if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
       !OuterLoopPreHeader->getUniquePredecessor())
-    OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, DT, LI, true);
+    OuterLoopPreHeader =
+        InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
   if (InnerLoopPreHeader == OuterLoop->getHeader())
-    InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, DT, LI, true);
+    InnerLoopPreHeader =
+        InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
 
   // Adjust the loop preheader
   BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
@@ -1422,8 +1451,8 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
                   InnerLoopHeaderSuccessor, DTUpdates);
 
   // Adjust reduction PHI's now that the incoming block has changed.
-  updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader,
-                      OuterLoopHeader);
+  InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
+                                               OuterLoopHeader);
 
   updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
                   OuterLoopPreHeader, DTUpdates);
@@ -1452,10 +1481,11 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
   restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
                    OuterLoopPreHeader);
 
-  moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopLatch, OuterLoopLatch);
+  moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
+                OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock());
   // For PHIs in the exit block of the outer loop, outer's latch has been
   // replaced by Inners'.
-  updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch);
+  OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
 
   // Now update the reduction PHIs in the inner and outer loop headers.
   SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
@@ -1482,10 +1512,10 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
   }
 
   // Update the incoming blocks for moved PHI nodes.
-  updateIncomingBlock(OuterLoopHeader, InnerLoopPreHeader, OuterLoopPreHeader);
-  updateIncomingBlock(OuterLoopHeader, InnerLoopLatch, OuterLoopLatch);
-  updateIncomingBlock(InnerLoopHeader, OuterLoopPreHeader, InnerLoopPreHeader);
-  updateIncomingBlock(InnerLoopHeader, OuterLoopLatch, InnerLoopLatch);
+  OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
+  OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
+  InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
+  InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
 
   return true;
 }
diff --git a/lib/Transforms/Scalar/LoopLoadElimination.cpp b/lib/Transforms/Scalar/LoopLoadElimination.cpp
index 19bd9ebcc15b..2b3d5e0ce9b7 100644
--- a/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -1,9 +1,8 @@
 //===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -30,10 +29,14 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopAnalysisManager.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -54,6 +57,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils.h"
 #include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
 #include <algorithm>
 #include <cassert>
 #include <forward_list>
@@ -159,8 +163,9 @@ namespace {
 class LoadEliminationForLoop {
 public:
   LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
-                         DominatorTree *DT)
-      : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.getPSE()) {}
+                         DominatorTree *DT, BlockFrequencyInfo *BFI,
+                         ProfileSummaryInfo* PSI)
+      : L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE(LAI.getPSE()) {}
 
   /// Look through the loop-carried and loop-independent dependences in
   /// this loop and find store->load dependences.
@@ -428,9 +433,9 @@ public:
     auto *PH = L->getLoopPreheader();
     Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
                                           PH->getTerminator());
-    Value *Initial =
-        new LoadInst(InitialPtr, "load_initial", /* isVolatile */ false,
-                     Cand.Load->getAlignment(), PH->getTerminator());
+    Value *Initial = new LoadInst(
+        Cand.Load->getType(), InitialPtr, "load_initial",
+        /* isVolatile */ false, Cand.Load->getAlignment(), PH->getTerminator());
 
     PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded",
                                    &L->getHeader()->front());
@@ -529,7 +534,17 @@ public:
     }
 
     if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) {
-      if (L->getHeader()->getParent()->optForSize()) {
+      if (LAI.hasConvergentOp()) {
+        LLVM_DEBUG(dbgs() << "Versioning is needed but not allowed with "
+                             "convergent calls\n");
+        return false;
+      }
+
+      auto *HeaderBB = L->getHeader();
+      auto *F = HeaderBB->getParent();
+      bool OptForSize = F->hasOptSize() ||
+                        llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI);
+      if (OptForSize) {
         LLVM_DEBUG(
             dbgs() << "Versioning is needed but not allowed when optimizing "
                       "for size.\n");
@@ -572,6 +587,8 @@ private:
   LoopInfo *LI;
   const LoopAccessInfo &LAI;
   DominatorTree *DT;
+  BlockFrequencyInfo *BFI;
+  ProfileSummaryInfo *PSI;
   PredicatedScalarEvolution PSE;
 };
 
@@ -579,6 +596,7 @@ private:
 
 static bool
 eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT,
+                          BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
                           function_ref<const LoopAccessInfo &(Loop &)> GetLAI) {
   // Build up a worklist of inner-loops to transform to avoid iterator
   // invalidation.
@@ -597,7 +615,7 @@ eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT,
   bool Changed = false;
   for (Loop *L : Worklist) {
     // The actual work is performed by LoadEliminationForLoop.
-    LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT);
+    LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT, BFI, PSI);
     Changed |= LEL.processLoop();
   }
   return Changed;
@@ -622,10 +640,14 @@ public:
     auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     auto &LAA = getAnalysis<LoopAccessLegacyAnalysis>();
     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+    auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+                &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
+                nullptr;
 
     // Process each loop nest in the function.
     return eliminateLoadsAcrossLoops(
-        F, LI, DT,
+        F, LI, DT, BFI, PSI,
         [&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); });
   }
 
@@ -638,6 +660,8 @@ public:
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
+    LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
   }
 };
 
@@ -653,6 +677,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
 INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
 
 FunctionPass *llvm::createLoopLoadEliminationPass() {
@@ -668,12 +694,18 @@ PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
   auto &AA = AM.getResult<AAManager>(F);
   auto &AC = AM.getResult<AssumptionAnalysis>(F);
+  auto &MAM = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+  auto *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+      &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
+  MemorySSA *MSSA = EnableMSSALoopDependency
+                        ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
+                        : nullptr;
 
   auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
   bool Changed = eliminateLoadsAcrossLoops(
-      F, LI, DT, [&](Loop &L) -> const LoopAccessInfo & {
-        LoopStandardAnalysisResults AR = {AA, AC,  DT,  LI,
-                                          SE, TLI, TTI, nullptr};
+      F, LI, DT, BFI, PSI, [&](Loop &L) -> const LoopAccessInfo & {
+        LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
         return LAM.getResult<LoopAccessAnalysis>(L, AR);
       });
 
diff --git a/lib/Transforms/Scalar/LoopPassManager.cpp b/lib/Transforms/Scalar/LoopPassManager.cpp
index 774ad7b945a0..f3bfbd3564ab 100644
--- a/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -1,9 +1,8 @@
 //===- LoopPassManager.cpp - Loop pass management -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp
index 5983c804c0c1..507a1e251ca6 100644
--- a/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/lib/Transforms/Scalar/LoopPredication.cpp
@@ -1,9 +1,8 @@
 //===-- LoopPredication.cpp - Guard based loop predication pass -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -179,6 +178,7 @@
 
 #include "llvm/Transforms/Scalar/LoopPredication.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/GuardUtils.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -194,6 +194,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 
 #define DEBUG_TYPE "loop-predication"
@@ -222,24 +223,31 @@ static cl::opt<float> LatchExitProbabilityScale(
     cl::desc("scale factor for the latch probability. Value should be greater "
              "than 1. Lower values are ignored"));
 
+static cl::opt<bool> PredicateWidenableBranchGuards(
+    "loop-predication-predicate-widenable-branches-to-deopt", cl::Hidden,
+    cl::desc("Whether or not we should predicate guards "
+             "expressed as widenable branches to deoptimize blocks"),
+    cl::init(true));
+
 namespace {
-class LoopPredication {
-  /// Represents an induction variable check:
-  ///   icmp Pred, <induction variable>, <loop invariant limit>
-  struct LoopICmp {
-    ICmpInst::Predicate Pred;
-    const SCEVAddRecExpr *IV;
-    const SCEV *Limit;
-    LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV,
-             const SCEV *Limit)
-        : Pred(Pred), IV(IV), Limit(Limit) {}
-    LoopICmp() {}
-    void dump() {
-      dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV
-             << ", Limit = " << *Limit << "\n";
-    }
-  };
+/// Represents an induction variable check:
+///   icmp Pred, <induction variable>, <loop invariant limit>
+struct LoopICmp {
+  ICmpInst::Predicate Pred;
+  const SCEVAddRecExpr *IV;
+  const SCEV *Limit;
+  LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV,
+           const SCEV *Limit)
+    : Pred(Pred), IV(IV), Limit(Limit) {}
+  LoopICmp() {}
+  void dump() {
+    dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV
+           << ", Limit = " << *Limit << "\n";
+  }
+};
 
+class LoopPredication {
+  AliasAnalysis *AA;
   ScalarEvolution *SE;
   BranchProbabilityInfo *BPI;
 
@@ -249,58 +257,53 @@ class LoopPredication {
   LoopICmp LatchCheck;
 
   bool isSupportedStep(const SCEV* Step);
-  Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI) {
-    return parseLoopICmp(ICI->getPredicate(), ICI->getOperand(0),
-                         ICI->getOperand(1));
-  }
-  Optional<LoopICmp> parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS,
-                                   Value *RHS);
-
+  Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI);
   Optional<LoopICmp> parseLoopLatchICmp();
 
-  bool CanExpand(const SCEV* S);
-  Value *expandCheck(SCEVExpander &Expander, IRBuilder<> &Builder,
-                     ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
-                     Instruction *InsertAt);
+  /// Return an insertion point suitable for inserting a safe to speculate
+  /// instruction whose only user will be 'User' which has operands 'Ops'.  A
+  /// trivial result would be the at the User itself, but we try to return a
+  /// loop invariant location if possible.  
+  Instruction *findInsertPt(Instruction *User, ArrayRef<Value*> Ops);
+  /// Same as above, *except* that this uses the SCEV definition of invariant
+  /// which is that an expression *can be made* invariant via SCEVExpander.
+  /// Thus, this version is only suitable for finding an insert point to be be
+  /// passed to SCEVExpander!
+  Instruction *findInsertPt(Instruction *User, ArrayRef<const SCEV*> Ops);
+
+  /// Return true if the value is known to produce a single fixed value across
+  /// all iterations on which it executes.  Note that this does not imply
+  /// speculation safety.  That must be established seperately.  
+  bool isLoopInvariantValue(const SCEV* S);
+
+  Value *expandCheck(SCEVExpander &Expander, Instruction *Guard,
+                     ICmpInst::Predicate Pred, const SCEV *LHS,
+                     const SCEV *RHS);
 
   Optional<Value *> widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander,
-                                        IRBuilder<> &Builder);
+                                        Instruction *Guard);
   Optional<Value *> widenICmpRangeCheckIncrementingLoop(LoopICmp LatchCheck,
                                                         LoopICmp RangeCheck,
                                                         SCEVExpander &Expander,
-                                                        IRBuilder<> &Builder);
+                                                        Instruction *Guard);
   Optional<Value *> widenICmpRangeCheckDecrementingLoop(LoopICmp LatchCheck,
                                                         LoopICmp RangeCheck,
                                                         SCEVExpander &Expander,
-                                                        IRBuilder<> &Builder);
+                                                        Instruction *Guard);
+  unsigned collectChecks(SmallVectorImpl<Value *> &Checks, Value *Condition,
+                         SCEVExpander &Expander, Instruction *Guard);
   bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);
-
+  bool widenWidenableBranchGuardConditions(BranchInst *Guard, SCEVExpander &Expander);
   // If the loop always exits through another block in the loop, we should not
   // predicate based on the latch check. For example, the latch check can be a
   // very coarse grained check and there can be more fine grained exit checks
   // within the loop. We identify such unprofitable loops through BPI.
   bool isLoopProfitableToPredicate();
 
-  // When the IV type is wider than the range operand type, we can still do loop
-  // predication, by generating SCEVs for the range and latch that are of the
-  // same type. We achieve this by generating a SCEV truncate expression for the
-  // latch IV. This is done iff truncation of the IV is a safe operation,
-  // without loss of information.
-  // Another way to achieve this is by generating a wider type SCEV for the
-  // range check operand, however, this needs a more involved check that
-  // operands do not overflow. This can lead to loss of information when the
-  // range operand is of the form: add i32 %offset, %iv. We need to prove that
-  // sext(x + y) is same as sext(x) + sext(y).
-  // This function returns true if we can safely represent the IV type in
-  // the RangeCheckType without loss of information.
-  bool isSafeToTruncateWideIVType(Type *RangeCheckType);
-  // Return the loopLatchCheck corresponding to the RangeCheckType if safe to do
-  // so.
-  Optional<LoopICmp> generateLoopLatchCheck(Type *RangeCheckType);
-
 public:
-  LoopPredication(ScalarEvolution *SE, BranchProbabilityInfo *BPI)
-      : SE(SE), BPI(BPI){};
+  LoopPredication(AliasAnalysis *AA, ScalarEvolution *SE,
+                  BranchProbabilityInfo *BPI)
+    : AA(AA), SE(SE), BPI(BPI){};
   bool runOnLoop(Loop *L);
 };
 
@@ -322,7 +325,8 @@ public:
     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
     BranchProbabilityInfo &BPI =
         getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
-    LoopPredication LP(SE, &BPI);
+    auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+    LoopPredication LP(AA, SE, &BPI);
     return LP.runOnLoop(L);
   }
 };
@@ -348,16 +352,19 @@ PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
       AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
   Function *F = L.getHeader()->getParent();
   auto *BPI = FAM.getCachedResult<BranchProbabilityAnalysis>(*F);
-  LoopPredication LP(&AR.SE, BPI);
+  LoopPredication LP(&AR.AA, &AR.SE, BPI);
   if (!LP.runOnLoop(&L))
     return PreservedAnalyses::all();
 
   return getLoopPassPreservedAnalyses();
 }
 
-Optional<LoopPredication::LoopICmp>
-LoopPredication::parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS,
-                               Value *RHS) {
+Optional<LoopICmp>
+LoopPredication::parseLoopICmp(ICmpInst *ICI) {
+  auto Pred = ICI->getPredicate();
+  auto *LHS = ICI->getOperand(0);
+  auto *RHS = ICI->getOperand(1);
+
   const SCEV *LHSS = SE->getSCEV(LHS);
   if (isa<SCEVCouldNotCompute>(LHSS))
     return None;
@@ -380,42 +387,98 @@ LoopPredication::parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS,
 }
 
 Value *LoopPredication::expandCheck(SCEVExpander &Expander,
-                                    IRBuilder<> &Builder,
+                                    Instruction *Guard, 
                                     ICmpInst::Predicate Pred, const SCEV *LHS,
-                                    const SCEV *RHS, Instruction *InsertAt) {
-  // TODO: we can check isLoopEntryGuardedByCond before emitting the check
-
+                                    const SCEV *RHS) {
   Type *Ty = LHS->getType();
   assert(Ty == RHS->getType() && "expandCheck operands have different types?");
 
-  if (SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS))
-    return Builder.getTrue();
+  if (SE->isLoopInvariant(LHS, L) && SE->isLoopInvariant(RHS, L)) {
+    IRBuilder<> Builder(Guard);
+    if (SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS))
+      return Builder.getTrue();
+    if (SE->isLoopEntryGuardedByCond(L, ICmpInst::getInversePredicate(Pred),
+                                     LHS, RHS))
+      return Builder.getFalse();
+  }
 
-  Value *LHSV = Expander.expandCodeFor(LHS, Ty, InsertAt);
-  Value *RHSV = Expander.expandCodeFor(RHS, Ty, InsertAt);
+  Value *LHSV = Expander.expandCodeFor(LHS, Ty, findInsertPt(Guard, {LHS}));
+  Value *RHSV = Expander.expandCodeFor(RHS, Ty, findInsertPt(Guard, {RHS}));
+  IRBuilder<> Builder(findInsertPt(Guard, {LHSV, RHSV}));
   return Builder.CreateICmp(Pred, LHSV, RHSV);
 }
 
-Optional<LoopPredication::LoopICmp>
-LoopPredication::generateLoopLatchCheck(Type *RangeCheckType) {
+
+// Returns true if its safe to truncate the IV to RangeCheckType.
+// When the IV type is wider than the range operand type, we can still do loop
+// predication, by generating SCEVs for the range and latch that are of the
+// same type. We achieve this by generating a SCEV truncate expression for the
+// latch IV. This is done iff truncation of the IV is a safe operation,
+// without loss of information.
+// Another way to achieve this is by generating a wider type SCEV for the
+// range check operand, however, this needs a more involved check that
+// operands do not overflow. This can lead to loss of information when the
+// range operand is of the form: add i32 %offset, %iv. We need to prove that
+// sext(x + y) is same as sext(x) + sext(y).
+// This function returns true if we can safely represent the IV type in
+// the RangeCheckType without loss of information.
+static bool isSafeToTruncateWideIVType(const DataLayout &DL,
+                                       ScalarEvolution &SE,
+                                       const LoopICmp LatchCheck,
+                                       Type *RangeCheckType) {
+  if (!EnableIVTruncation)
+    return false;
+  assert(DL.getTypeSizeInBits(LatchCheck.IV->getType()) >
+             DL.getTypeSizeInBits(RangeCheckType) &&
+         "Expected latch check IV type to be larger than range check operand "
+         "type!");
+  // The start and end values of the IV should be known. This is to guarantee
+  // that truncating the wide type will not lose information.
+  auto *Limit = dyn_cast<SCEVConstant>(LatchCheck.Limit);
+  auto *Start = dyn_cast<SCEVConstant>(LatchCheck.IV->getStart());
+  if (!Limit || !Start)
+    return false;
+  // This check makes sure that the IV does not change sign during loop
+  // iterations. Consider latchType = i64, LatchStart = 5, Pred = ICMP_SGE,
+  // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the
+  // IV wraps around, and the truncation of the IV would lose the range of
+  // iterations between 2^32 and 2^64.
+  bool Increasing;
+  if (!SE.isMonotonicPredicate(LatchCheck.IV, LatchCheck.Pred, Increasing))
+    return false;
+  // The active bits should be less than the bits in the RangeCheckType. This
+  // guarantees that truncating the latch check to RangeCheckType is a safe
+  // operation.
+  auto RangeCheckTypeBitSize = DL.getTypeSizeInBits(RangeCheckType);
+  return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize &&
+         Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize;
+}
+
+
+// Return an LoopICmp describing a latch check equivlent to LatchCheck but with
+// the requested type if safe to do so.  May involve the use of a new IV.
+static Optional<LoopICmp> generateLoopLatchCheck(const DataLayout &DL,
+                                                 ScalarEvolution &SE,
+                                                 const LoopICmp LatchCheck,
+                                                 Type *RangeCheckType) {
 
   auto *LatchType = LatchCheck.IV->getType();
   if (RangeCheckType == LatchType)
     return LatchCheck;
   // For now, bail out if latch type is narrower than range type.
-  if (DL->getTypeSizeInBits(LatchType) < DL->getTypeSizeInBits(RangeCheckType))
+  if (DL.getTypeSizeInBits(LatchType) < DL.getTypeSizeInBits(RangeCheckType))
     return None;
-  if (!isSafeToTruncateWideIVType(RangeCheckType))
+  if (!isSafeToTruncateWideIVType(DL, SE, LatchCheck, RangeCheckType))
     return None;
   // We can now safely identify the truncated version of the IV and limit for
   // RangeCheckType.
   LoopICmp NewLatchCheck;
   NewLatchCheck.Pred = LatchCheck.Pred;
   NewLatchCheck.IV = dyn_cast<SCEVAddRecExpr>(
-      SE->getTruncateExpr(LatchCheck.IV, RangeCheckType));
+      SE.getTruncateExpr(LatchCheck.IV, RangeCheckType));
   if (!NewLatchCheck.IV)
     return None;
-  NewLatchCheck.Limit = SE->getTruncateExpr(LatchCheck.Limit, RangeCheckType);
+  NewLatchCheck.Limit = SE.getTruncateExpr(LatchCheck.Limit, RangeCheckType);
   LLVM_DEBUG(dbgs() << "IV of type: " << *LatchType
                     << "can be represented as range check type:"
                     << *RangeCheckType << "\n");
@@ -428,13 +491,66 @@ bool LoopPredication::isSupportedStep(const SCEV* Step) {
   return Step->isOne() || (Step->isAllOnesValue() && EnableCountDownLoop);
 }
 
-bool LoopPredication::CanExpand(const SCEV* S) {
-  return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE);
+Instruction *LoopPredication::findInsertPt(Instruction *Use,
+                                           ArrayRef<Value*> Ops) {
+  for (Value *Op : Ops)
+    if (!L->isLoopInvariant(Op))
+      return Use;
+  return Preheader->getTerminator();
+}
+
+Instruction *LoopPredication::findInsertPt(Instruction *Use,
+                                           ArrayRef<const SCEV*> Ops) {
+  // Subtlety: SCEV considers things to be invariant if the value produced is
+  // the same across iterations.  This is not the same as being able to
+  // evaluate outside the loop, which is what we actually need here.
+  for (const SCEV *Op : Ops)
+    if (!SE->isLoopInvariant(Op, L) ||
+        !isSafeToExpandAt(Op, Preheader->getTerminator(), *SE))
+      return Use;
+  return Preheader->getTerminator();
+}
+
+bool LoopPredication::isLoopInvariantValue(const SCEV* S) { 
+  // Handling expressions which produce invariant results, but *haven't* yet
+  // been removed from the loop serves two important purposes.
+  // 1) Most importantly, it resolves a pass ordering cycle which would
+  // otherwise need us to iteration licm, loop-predication, and either
+  // loop-unswitch or loop-peeling to make progress on examples with lots of
+  // predicable range checks in a row.  (Since, in the general case,  we can't
+  // hoist the length checks until the dominating checks have been discharged
+  // as we can't prove doing so is safe.)
+  // 2) As a nice side effect, this exposes the value of peeling or unswitching
+  // much more obviously in the IR.  Otherwise, the cost modeling for other
+  // transforms would end up needing to duplicate all of this logic to model a
+  // check which becomes predictable based on a modeled peel or unswitch.
+  // 
+  // The cost of doing so in the worst case is an extra fill from the stack  in
+  // the loop to materialize the loop invariant test value instead of checking
+  // against the original IV which is presumable in a register inside the loop.
+  // Such cases are presumably rare, and hint at missing oppurtunities for
+  // other passes. 
+
+  if (SE->isLoopInvariant(S, L))
+    // Note: This the SCEV variant, so the original Value* may be within the
+    // loop even though SCEV has proven it is loop invariant.
+    return true;
+
+  // Handle a particular important case which SCEV doesn't yet know about which
+  // shows up in range checks on arrays with immutable lengths.  
+  // TODO: This should be sunk inside SCEV.
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
+    if (const auto *LI = dyn_cast<LoadInst>(U->getValue()))
+      if (LI->isUnordered() && L->hasLoopInvariantOperands(LI))
+        if (AA->pointsToConstantMemory(LI->getOperand(0)) ||
+            LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
+          return true;
+  return false;
 }
 
 Optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop(
-    LoopPredication::LoopICmp LatchCheck, LoopPredication::LoopICmp RangeCheck,
-    SCEVExpander &Expander, IRBuilder<> &Builder) {
+    LoopICmp LatchCheck, LoopICmp RangeCheck,
+    SCEVExpander &Expander, Instruction *Guard) {
   auto *Ty = RangeCheck.IV->getType();
   // Generate the widened condition for the forward loop:
   //   guardStart u< guardLimit &&
@@ -446,40 +562,61 @@ Optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop(
   const SCEV *GuardLimit = RangeCheck.Limit;
   const SCEV *LatchStart = LatchCheck.IV->getStart();
   const SCEV *LatchLimit = LatchCheck.Limit;
+  // Subtlety: We need all the values to be *invariant* across all iterations,
+  // but we only need to check expansion safety for those which *aren't*
+  // already guaranteed to dominate the guard.  
+  if (!isLoopInvariantValue(GuardStart) ||
+      !isLoopInvariantValue(GuardLimit) ||
+      !isLoopInvariantValue(LatchStart) ||
+      !isLoopInvariantValue(LatchLimit)) {
+    LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
+    return None;
+  }
+  if (!isSafeToExpandAt(LatchStart, Guard, *SE) ||
+      !isSafeToExpandAt(LatchLimit, Guard, *SE)) {
+    LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
+    return None;
+  }
 
   // guardLimit - guardStart + latchStart - 1
   const SCEV *RHS =
       SE->getAddExpr(SE->getMinusSCEV(GuardLimit, GuardStart),
                      SE->getMinusSCEV(LatchStart, SE->getOne(Ty)));
-  if (!CanExpand(GuardStart) || !CanExpand(GuardLimit) ||
-      !CanExpand(LatchLimit) || !CanExpand(RHS)) {
-    LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
-    return None;
-  }
   auto LimitCheckPred =
       ICmpInst::getFlippedStrictnessPredicate(LatchCheck.Pred);
 
   LLVM_DEBUG(dbgs() << "LHS: " << *LatchLimit << "\n");
   LLVM_DEBUG(dbgs() << "RHS: " << *RHS << "\n");
   LLVM_DEBUG(dbgs() << "Pred: " << LimitCheckPred << "\n");
-
-  Instruction *InsertAt = Preheader->getTerminator();
+ 
   auto *LimitCheck =
-      expandCheck(Expander, Builder, LimitCheckPred, LatchLimit, RHS, InsertAt);
-  auto *FirstIterationCheck = expandCheck(Expander, Builder, RangeCheck.Pred,
-                                          GuardStart, GuardLimit, InsertAt);
+      expandCheck(Expander, Guard, LimitCheckPred, LatchLimit, RHS);
+  auto *FirstIterationCheck = expandCheck(Expander, Guard, RangeCheck.Pred,
+                                          GuardStart, GuardLimit);
+  IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck}));
   return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
 }
 
 Optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop(
-    LoopPredication::LoopICmp LatchCheck, LoopPredication::LoopICmp RangeCheck,
-    SCEVExpander &Expander, IRBuilder<> &Builder) {
+    LoopICmp LatchCheck, LoopICmp RangeCheck,
+    SCEVExpander &Expander, Instruction *Guard) {
   auto *Ty = RangeCheck.IV->getType();
   const SCEV *GuardStart = RangeCheck.IV->getStart();
   const SCEV *GuardLimit = RangeCheck.Limit;
+  const SCEV *LatchStart = LatchCheck.IV->getStart();
   const SCEV *LatchLimit = LatchCheck.Limit;
-  if (!CanExpand(GuardStart) || !CanExpand(GuardLimit) ||
-      !CanExpand(LatchLimit)) {
+  // Subtlety: We need all the values to be *invariant* across all iterations,
+  // but we only need to check expansion safety for those which *aren't*
+  // already guaranteed to dominate the guard.  
+  if (!isLoopInvariantValue(GuardStart) ||
+      !isLoopInvariantValue(GuardLimit) ||
+      !isLoopInvariantValue(LatchStart) ||
+      !isLoopInvariantValue(LatchLimit)) {
+    LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
+    return None;
+  }
+  if (!isSafeToExpandAt(LatchStart, Guard, *SE) ||
+      !isSafeToExpandAt(LatchLimit, Guard, *SE)) {
     LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
     return None;
   }
@@ -497,22 +634,35 @@ Optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop(
   // guardStart u< guardLimit &&
   // latchLimit <pred> 1.
   // See the header comment for reasoning of the checks.
-  Instruction *InsertAt = Preheader->getTerminator();
   auto LimitCheckPred =
       ICmpInst::getFlippedStrictnessPredicate(LatchCheck.Pred);
-  auto *FirstIterationCheck = expandCheck(Expander, Builder, ICmpInst::ICMP_ULT,
-                                          GuardStart, GuardLimit, InsertAt);
-  auto *LimitCheck = expandCheck(Expander, Builder, LimitCheckPred, LatchLimit,
-                                 SE->getOne(Ty), InsertAt);
+  auto *FirstIterationCheck = expandCheck(Expander, Guard,
+                                          ICmpInst::ICMP_ULT,
+                                          GuardStart, GuardLimit);
+  auto *LimitCheck = expandCheck(Expander, Guard, LimitCheckPred, LatchLimit,
+                                 SE->getOne(Ty));
+  IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck}));
   return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
 }
 
+static void normalizePredicate(ScalarEvolution *SE, Loop *L,
+                               LoopICmp& RC) {
+  // LFTR canonicalizes checks to the ICMP_NE/EQ form; normalize back to the
+  // ULT/UGE form for ease of handling by our caller. 
+  if (ICmpInst::isEquality(RC.Pred) &&
+      RC.IV->getStepRecurrence(*SE)->isOne() &&
+      SE->isKnownPredicate(ICmpInst::ICMP_ULE, RC.IV->getStart(), RC.Limit))
+    RC.Pred = RC.Pred == ICmpInst::ICMP_NE ?
+      ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE;
+}
+
+
 /// If ICI can be widened to a loop invariant condition emits the loop
 /// invariant condition in the loop preheader and return it, otherwise
 /// returns None.
 Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
                                                        SCEVExpander &Expander,
-                                                       IRBuilder<> &Builder) {
+                                                       Instruction *Guard) {
   LLVM_DEBUG(dbgs() << "Analyzing ICmpInst condition:\n");
   LLVM_DEBUG(ICI->dump());
 
@@ -545,7 +695,7 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
     return None;
   }
   auto *Ty = RangeCheckIV->getType();
-  auto CurrLatchCheckOpt = generateLoopLatchCheck(Ty);
+  auto CurrLatchCheckOpt = generateLoopLatchCheck(*DL, *SE, LatchCheck, Ty);
   if (!CurrLatchCheckOpt) {
     LLVM_DEBUG(dbgs() << "Failed to generate a loop latch check "
                          "corresponding to range type: "
@@ -566,34 +716,27 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
 
   if (Step->isOne())
     return widenICmpRangeCheckIncrementingLoop(CurrLatchCheck, *RangeCheck,
-                                               Expander, Builder);
+                                               Expander, Guard);
   else {
     assert(Step->isAllOnesValue() && "Step should be -1!");
     return widenICmpRangeCheckDecrementingLoop(CurrLatchCheck, *RangeCheck,
-                                               Expander, Builder);
+                                               Expander, Guard);
   }
 }
 
-bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
-                                           SCEVExpander &Expander) {
-  LLVM_DEBUG(dbgs() << "Processing guard:\n");
-  LLVM_DEBUG(Guard->dump());
-
-  TotalConsidered++;
-
-  IRBuilder<> Builder(cast<Instruction>(Preheader->getTerminator()));
-
+unsigned LoopPredication::collectChecks(SmallVectorImpl<Value *> &Checks,
+                                        Value *Condition,
+                                        SCEVExpander &Expander,
+                                        Instruction *Guard) {
+  unsigned NumWidened = 0;
   // The guard condition is expected to be in form of:
   //   cond1 && cond2 && cond3 ...
   // Iterate over subconditions looking for icmp conditions which can be
   // widened across loop iterations. Widening these conditions remember the
   // resulting list of subconditions in Checks vector.
-  SmallVector<Value *, 4> Worklist(1, Guard->getOperand(0));
+  SmallVector<Value *, 4> Worklist(1, Condition);
   SmallPtrSet<Value *, 4> Visited;
-
-  SmallVector<Value *, 4> Checks;
-
-  unsigned NumWidened = 0;
+  Value *WideableCond = nullptr;
   do {
     Value *Condition = Worklist.pop_back_val();
     if (!Visited.insert(Condition).second)
@@ -607,8 +750,16 @@ bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
       continue;
     }
 
+    if (match(Condition,
+              m_Intrinsic<Intrinsic::experimental_widenable_condition>())) {
+      // Pick any, we don't care which
+      WideableCond = Condition;
+      continue;
+    }
+
     if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) {
-      if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander, Builder)) {
+      if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander,
+                                                   Guard)) {
         Checks.push_back(NewRangeCheck.getValue());
         NumWidened++;
         continue;
@@ -617,28 +768,70 @@ bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
 
     // Save the condition as is if we can't widen it
     Checks.push_back(Condition);
-  } while (Worklist.size() != 0);
+  } while (!Worklist.empty());
+  // At the moment, our matching logic for wideable conditions implicitly
+  // assumes we preserve the form: (br (and Cond, WC())).  FIXME
+  // Note that if there were multiple calls to wideable condition in the
+  // traversal, we only need to keep one, and which one is arbitrary.
+  if (WideableCond)
+    Checks.push_back(WideableCond);
+  return NumWidened;
+}
+
+bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
+                                           SCEVExpander &Expander) {
+  LLVM_DEBUG(dbgs() << "Processing guard:\n");
+  LLVM_DEBUG(Guard->dump());
 
+  TotalConsidered++;
+  SmallVector<Value *, 4> Checks;
+  unsigned NumWidened = collectChecks(Checks, Guard->getOperand(0), Expander,
+                                      Guard);
+  if (NumWidened == 0)
+    return false;
+
+  TotalWidened += NumWidened;
+
+  // Emit the new guard condition
+  IRBuilder<> Builder(findInsertPt(Guard, Checks));
+  Value *AllChecks = Builder.CreateAnd(Checks);
+  auto *OldCond = Guard->getOperand(0);
+  Guard->setOperand(0, AllChecks);
+  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+
+  LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
+  return true;
+}
+
+bool LoopPredication::widenWidenableBranchGuardConditions(
+    BranchInst *BI, SCEVExpander &Expander) {
+  assert(isGuardAsWidenableBranch(BI) && "Must be!");
+  LLVM_DEBUG(dbgs() << "Processing guard:\n");
+  LLVM_DEBUG(BI->dump());
+
+  TotalConsidered++;
+  SmallVector<Value *, 4> Checks;
+  unsigned NumWidened = collectChecks(Checks, BI->getCondition(),
+                                      Expander, BI);
   if (NumWidened == 0)
     return false;
 
   TotalWidened += NumWidened;
 
   // Emit the new guard condition
-  Builder.SetInsertPoint(Guard);
-  Value *LastCheck = nullptr;
-  for (auto *Check : Checks)
-    if (!LastCheck)
-      LastCheck = Check;
-    else
-      LastCheck = Builder.CreateAnd(LastCheck, Check);
-  Guard->setOperand(0, LastCheck);
+  IRBuilder<> Builder(findInsertPt(BI, Checks));
+  Value *AllChecks = Builder.CreateAnd(Checks);
+  auto *OldCond = BI->getCondition();
+  BI->setCondition(AllChecks);
+  assert(isGuardAsWidenableBranch(BI) &&
+         "Stopped being a guard after transform?");
+  RecursivelyDeleteTriviallyDeadInstructions(OldCond);
 
   LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
   return true;
 }
 
-Optional<LoopPredication::LoopICmp> LoopPredication::parseLoopLatchICmp() {
+Optional<LoopICmp> LoopPredication::parseLoopLatchICmp() {
   using namespace PatternMatch;
 
   BasicBlock *LoopLatch = L->getLoopLatch();
@@ -647,27 +840,30 @@ Optional<LoopPredication::LoopICmp> LoopPredication::parseLoopLatchICmp() {
     return None;
   }
 
-  ICmpInst::Predicate Pred;
-  Value *LHS, *RHS;
-  BasicBlock *TrueDest, *FalseDest;
-
-  if (!match(LoopLatch->getTerminator(),
-             m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TrueDest,
-                  FalseDest))) {
+  auto *BI = dyn_cast<BranchInst>(LoopLatch->getTerminator());
+  if (!BI || !BI->isConditional()) {
     LLVM_DEBUG(dbgs() << "Failed to match the latch terminator!\n");
     return None;
   }
-  assert((TrueDest == L->getHeader() || FalseDest == L->getHeader()) &&
-         "One of the latch's destinations must be the header");
-  if (TrueDest != L->getHeader())
-    Pred = ICmpInst::getInversePredicate(Pred);
-
-  auto Result = parseLoopICmp(Pred, LHS, RHS);
+  BasicBlock *TrueDest = BI->getSuccessor(0);
+  assert(
+      (TrueDest == L->getHeader() || BI->getSuccessor(1) == L->getHeader()) &&
+      "One of the latch's destinations must be the header");
+
+  auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
+  if (!ICI) {
+    LLVM_DEBUG(dbgs() << "Failed to match the latch condition!\n");
+    return None;
+  }
+  auto Result = parseLoopICmp(ICI);
   if (!Result) {
     LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n");
     return None;
   }
 
+  if (TrueDest != L->getHeader())
+    Result->Pred = ICmpInst::getInversePredicate(Result->Pred);
+
   // Check affine first, so if it's not we don't try to compute the step
   // recurrence.
   if (!Result->IV->isAffine()) {
@@ -692,49 +888,22 @@ Optional<LoopPredication::LoopICmp> LoopPredication::parseLoopLatchICmp() {
     }
   };
 
+  normalizePredicate(SE, L, *Result);
   if (IsUnsupportedPredicate(Step, Result->Pred)) {
     LLVM_DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred
                       << ")!\n");
     return None;
   }
+
   return Result;
 }
 
-// Returns true if its safe to truncate the IV to RangeCheckType.
-bool LoopPredication::isSafeToTruncateWideIVType(Type *RangeCheckType) {
-  if (!EnableIVTruncation)
-    return false;
-  assert(DL->getTypeSizeInBits(LatchCheck.IV->getType()) >
-             DL->getTypeSizeInBits(RangeCheckType) &&
-         "Expected latch check IV type to be larger than range check operand "
-         "type!");
-  // The start and end values of the IV should be known. This is to guarantee
-  // that truncating the wide type will not lose information.
-  auto *Limit = dyn_cast<SCEVConstant>(LatchCheck.Limit);
-  auto *Start = dyn_cast<SCEVConstant>(LatchCheck.IV->getStart());
-  if (!Limit || !Start)
-    return false;
-  // This check makes sure that the IV does not change sign during loop
-  // iterations. Consider latchType = i64, LatchStart = 5, Pred = ICMP_SGE,
-  // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the
-  // IV wraps around, and the truncation of the IV would lose the range of
-  // iterations between 2^32 and 2^64.
-  bool Increasing;
-  if (!SE->isMonotonicPredicate(LatchCheck.IV, LatchCheck.Pred, Increasing))
-    return false;
-  // The active bits should be less than the bits in the RangeCheckType. This
-  // guarantees that truncating the latch check to RangeCheckType is a safe
-  // operation.
-  auto RangeCheckTypeBitSize = DL->getTypeSizeInBits(RangeCheckType);
-  return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize &&
-         Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize;
-}
 
 bool LoopPredication::isLoopProfitableToPredicate() {
   if (SkipProfitabilityChecks || !BPI)
     return true;
 
-  SmallVector<std::pair<const BasicBlock *, const BasicBlock *>, 8> ExitEdges;
+  SmallVector<std::pair<BasicBlock *, BasicBlock *>, 8> ExitEdges;
   L->getExitEdges(ExitEdges);
   // If there is only one exiting edge in the loop, it is always profitable to
   // predicate the loop.
@@ -795,7 +964,12 @@ bool LoopPredication::runOnLoop(Loop *Loop) {
   // There is nothing to do if the module doesn't use guards
   auto *GuardDecl =
       M->getFunction(Intrinsic::getName(Intrinsic::experimental_guard));
-  if (!GuardDecl || GuardDecl->use_empty())
+  bool HasIntrinsicGuards = GuardDecl && !GuardDecl->use_empty();
+  auto *WCDecl = M->getFunction(
+      Intrinsic::getName(Intrinsic::experimental_widenable_condition));
+  bool HasWidenableConditions =
+      PredicateWidenableBranchGuards && WCDecl && !WCDecl->use_empty();
+  if (!HasIntrinsicGuards && !HasWidenableConditions)
     return false;
 
   DL = &M->getDataLayout();
@@ -819,12 +993,18 @@ bool LoopPredication::runOnLoop(Loop *Loop) {
   // Collect all the guards into a vector and process later, so as not
   // to invalidate the instruction iterator.
   SmallVector<IntrinsicInst *, 4> Guards;
-  for (const auto BB : L->blocks())
+  SmallVector<BranchInst *, 4> GuardsAsWidenableBranches;
+  for (const auto BB : L->blocks()) {
     for (auto &I : *BB)
       if (isGuard(&I))
         Guards.push_back(cast<IntrinsicInst>(&I));
+    if (PredicateWidenableBranchGuards &&
+        isGuardAsWidenableBranch(BB->getTerminator()))
+      GuardsAsWidenableBranches.push_back(
+          cast<BranchInst>(BB->getTerminator()));
+  }
 
-  if (Guards.empty())
+  if (Guards.empty() && GuardsAsWidenableBranches.empty())
     return false;
 
   SCEVExpander Expander(*SE, *DL, "loop-predication");
@@ -832,6 +1012,8 @@ bool LoopPredication::runOnLoop(Loop *Loop) {
   bool Changed = false;
   for (auto *Guard : Guards)
     Changed |= widenGuardConditions(Guard, Expander);
+  for (auto *Guard : GuardsAsWidenableBranches)
+    Changed |= widenWidenableBranchGuardConditions(Guard, Expander);
 
   return Changed;
 }
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index 9a99e5925572..166b57f20b43 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -1,9 +1,8 @@
 //===- LoopReroll.cpp - Loop rerolling pass -------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -892,12 +891,22 @@ bool LoopReroll::DAGRootTracker::validateRootSet(DAGRootSet &DRS) {
   const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
   if (!ADR)
     return false;
+
+  // Check that the first root is evenly spaced.
   unsigned N = DRS.Roots.size() + 1;
   const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), ADR);
   const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
   if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV))
     return false;
 
+  // Check that the remainling roots are evenly spaced.
+  for (unsigned i = 1; i < N - 1; ++i) {
+    const SCEV *NewStepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[i]),
+                                               SE->getSCEV(DRS.Roots[i-1]));
+    if (NewStepSCEV != StepSCEV)
+      return false;
+  }
+
   return true;
 }
 
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index fd22128f7fe6..e009947690af 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -1,9 +1,8 @@
 //===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -55,7 +54,10 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
   if (AR.MSSA && VerifyMemorySSA)
     AR.MSSA->verifyMemorySSA();
 
-  return getLoopPassPreservedAnalyses();
+  auto PA = getLoopPassPreservedAnalyses();
+  if (EnableMSSALoopDependency)
+    PA.preserve<MemorySSAAnalysis>();
+  return PA;
 }
 
 namespace {
diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index 2e5927f9a068..046f4c8af492 100644
--- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -1,9 +1,8 @@
 //===--------- LoopSimplifyCFG.cpp - Loop CFG Simplification Pass ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -21,6 +20,7 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -29,7 +29,6 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
@@ -42,7 +41,7 @@ using namespace llvm;
 #define DEBUG_TYPE "loop-simplifycfg"
 
 static cl::opt<bool> EnableTermFolding("enable-loop-simplifycfg-term-folding",
-                                       cl::init(false));
+                                       cl::init(true));
 
 STATISTIC(NumTerminatorsFolded,
           "Number of terminators folded to unconditional branches");
@@ -80,6 +79,36 @@ static BasicBlock *getOnlyLiveSuccessor(BasicBlock *BB) {
   return nullptr;
 }
 
+/// Removes \p BB from all loops from [FirstLoop, LastLoop) in parent chain.
+static void removeBlockFromLoops(BasicBlock *BB, Loop *FirstLoop,
+                                 Loop *LastLoop = nullptr) {
+  assert((!LastLoop || LastLoop->contains(FirstLoop->getHeader())) &&
+         "First loop is supposed to be inside of last loop!");
+  assert(FirstLoop->contains(BB) && "Must be a loop block!");
+  for (Loop *Current = FirstLoop; Current != LastLoop;
+       Current = Current->getParentLoop())
+    Current->removeBlockFromLoop(BB);
+}
+
+/// Find innermost loop that contains at least one block from \p BBs and
+/// contains the header of loop \p L.
+static Loop *getInnermostLoopFor(SmallPtrSetImpl<BasicBlock *> &BBs,
+                                 Loop &L, LoopInfo &LI) {
+  Loop *Innermost = nullptr;
+  for (BasicBlock *BB : BBs) {
+    Loop *BBL = LI.getLoopFor(BB);
+    while (BBL && !BBL->contains(L.getHeader()))
+      BBL = BBL->getParentLoop();
+    if (BBL == &L)
+      BBL = BBL->getParentLoop();
+    if (!BBL)
+      continue;
+    if (!Innermost || BBL->getLoopDepth() > Innermost->getLoopDepth())
+      Innermost = BBL;
+  }
+  return Innermost;
+}
+
 namespace {
 /// Helper class that can turn branches and switches with constant conditions
 /// into unconditional branches.
@@ -90,6 +119,9 @@ private:
   DominatorTree &DT;
   ScalarEvolution &SE;
   MemorySSAUpdater *MSSAU;
+  LoopBlocksDFS DFS;
+  DomTreeUpdater DTU;
+  SmallVector<DominatorTree::UpdateType, 16> DTUpdates;
 
   // Whether or not the current loop has irreducible CFG.
   bool HasIrreducibleCFG = false;
@@ -175,7 +207,6 @@ private:
   /// Fill all information about status of blocks and exits of the current loop
   /// if constant folding of all branches will be done.
   void analyze() {
-    LoopBlocksDFS DFS(&L);
     DFS.perform(&LI);
     assert(DFS.isComplete() && "DFS is expected to be finished");
 
@@ -208,12 +239,13 @@ private:
       // folding. Only handle blocks from current loop: branches in child loops
       // are skipped because if they can be folded, they should be folded during
       // the processing of child loops.
-      if (TheOnlySucc && LI.getLoopFor(BB) == &L)
+      bool TakeFoldCandidate = TheOnlySucc && LI.getLoopFor(BB) == &L;
+      if (TakeFoldCandidate)
         FoldCandidates.push_back(BB);
 
       // Handle successors.
       for (BasicBlock *Succ : successors(BB))
-        if (!TheOnlySucc || TheOnlySucc == Succ) {
+        if (!TakeFoldCandidate || TheOnlySucc == Succ) {
           if (L.contains(Succ))
             LiveLoopBlocks.insert(Succ);
           else
@@ -229,8 +261,10 @@ private:
     // Now, all exit blocks that are not marked as live are dead.
     SmallVector<BasicBlock *, 8> ExitBlocks;
     L.getExitBlocks(ExitBlocks);
+    SmallPtrSet<BasicBlock *, 8> UniqueDeadExits;
     for (auto *ExitBlock : ExitBlocks)
-      if (!LiveExitBlocks.count(ExitBlock))
+      if (!LiveExitBlocks.count(ExitBlock) &&
+          UniqueDeadExits.insert(ExitBlock).second)
         DeadExitBlocks.push_back(ExitBlock);
 
     // Whether or not the edge From->To will still be present in graph after the
@@ -239,7 +273,7 @@ private:
       if (!LiveLoopBlocks.count(From))
         return false;
       BasicBlock *TheOnlySucc = getOnlyLiveSuccessor(From);
-      return !TheOnlySucc || TheOnlySucc == To;
+      return !TheOnlySucc || TheOnlySucc == To || LI.getLoopFor(From) != &L;
     };
 
     // The loop will not be destroyed if its latch is live.
@@ -317,14 +351,10 @@ private:
 
     // Construct split preheader and the dummy switch to thread edges from it to
     // dead exits.
-    DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
     BasicBlock *Preheader = L.getLoopPreheader();
-    BasicBlock *NewPreheader = Preheader->splitBasicBlock(
-        Preheader->getTerminator(),
-        Twine(Preheader->getName()).concat("-split"));
-    DTU.deleteEdge(Preheader, L.getHeader());
-    DTU.insertEdge(NewPreheader, L.getHeader());
-    DTU.insertEdge(Preheader, NewPreheader);
+    BasicBlock *NewPreheader = llvm::SplitBlock(
+        Preheader, Preheader->getTerminator(), &DT, &LI, MSSAU);
+
     IRBuilder<> Builder(Preheader->getTerminator());
     SwitchInst *DummySwitch =
         Builder.CreateSwitch(Builder.getInt32(0), NewPreheader);
@@ -343,75 +373,106 @@ private:
       }
       assert(DummyIdx != 0 && "Too many dead exits!");
       DummySwitch->addCase(Builder.getInt32(DummyIdx++), BB);
-      DTU.insertEdge(Preheader, BB);
+      DTUpdates.push_back({DominatorTree::Insert, Preheader, BB});
       ++NumLoopExitsDeleted;
     }
 
     assert(L.getLoopPreheader() == NewPreheader && "Malformed CFG?");
     if (Loop *OuterLoop = LI.getLoopFor(Preheader)) {
-      OuterLoop->addBasicBlockToLoop(NewPreheader, LI);
-
       // When we break dead edges, the outer loop may become unreachable from
       // the current loop. We need to fix loop info accordingly. For this, we
       // find the most nested loop that still contains L and remove L from all
       // loops that are inside of it.
-      Loop *StillReachable = nullptr;
-      for (BasicBlock *BB : LiveExitBlocks) {
-        Loop *BBL = LI.getLoopFor(BB);
-        if (BBL && BBL->contains(L.getHeader()))
-          if (!StillReachable ||
-              BBL->getLoopDepth() > StillReachable->getLoopDepth())
-            StillReachable = BBL;
-      }
+      Loop *StillReachable = getInnermostLoopFor(LiveExitBlocks, L, LI);
 
       // Okay, our loop is no longer in the outer loop (and maybe not in some of
       // its parents as well). Make the fixup.
       if (StillReachable != OuterLoop) {
         LI.changeLoopFor(NewPreheader, StillReachable);
-        for (Loop *NotContaining = OuterLoop; NotContaining != StillReachable;
-             NotContaining = NotContaining->getParentLoop()) {
-          NotContaining->removeBlockFromLoop(NewPreheader);
-          for (auto *BB : L.blocks())
-            NotContaining->removeBlockFromLoop(BB);
-        }
+        removeBlockFromLoops(NewPreheader, OuterLoop, StillReachable);
+        for (auto *BB : L.blocks())
+          removeBlockFromLoops(BB, OuterLoop, StillReachable);
         OuterLoop->removeChildLoop(&L);
         if (StillReachable)
           StillReachable->addChildLoop(&L);
         else
           LI.addTopLevelLoop(&L);
+
+        // Some values from loops in [OuterLoop, StillReachable) could be used
+        // in the current loop. Now it is not their child anymore, so such uses
+        // require LCSSA Phis.
+        Loop *FixLCSSALoop = OuterLoop;
+        while (FixLCSSALoop->getParentLoop() != StillReachable)
+          FixLCSSALoop = FixLCSSALoop->getParentLoop();
+        assert(FixLCSSALoop && "Should be a loop!");
+        // We need all DT updates to be done before forming LCSSA.
+        DTU.applyUpdates(DTUpdates);
+        if (MSSAU)
+          MSSAU->applyUpdates(DTUpdates, DT);
+        DTUpdates.clear();
+        formLCSSARecursively(*FixLCSSALoop, DT, &LI, &SE);
       }
     }
+
+    if (MSSAU) {
+      // Clear all updates now. Facilitates deletes that follow.
+      DTU.applyUpdates(DTUpdates);
+      MSSAU->applyUpdates(DTUpdates, DT);
+      DTUpdates.clear();
+      if (VerifyMemorySSA)
+        MSSAU->getMemorySSA()->verifyMemorySSA();
+    }
   }
 
   /// Delete loop blocks that have become unreachable after folding. Make all
   /// relevant updates to DT and LI.
   void deleteDeadLoopBlocks() {
-    DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
     if (MSSAU) {
-      SmallPtrSet<BasicBlock *, 8> DeadLoopBlocksSet(DeadLoopBlocks.begin(),
-                                                     DeadLoopBlocks.end());
+      SmallSetVector<BasicBlock *, 8> DeadLoopBlocksSet(DeadLoopBlocks.begin(),
+                                                        DeadLoopBlocks.end());
       MSSAU->removeBlocks(DeadLoopBlocksSet);
     }
+
+    // The function LI.erase has some invariants that need to be preserved when
+    // it tries to remove a loop which is not the top-level loop. In particular,
+    // it requires loop's preheader to be strictly in loop's parent. We cannot
+    // just remove blocks one by one, because after removal of preheader we may
+    // break this invariant for the dead loop. So we detatch and erase all dead
+    // loops beforehand.
+    for (auto *BB : DeadLoopBlocks)
+      if (LI.isLoopHeader(BB)) {
+        assert(LI.getLoopFor(BB) != &L && "Attempt to remove current loop!");
+        Loop *DL = LI.getLoopFor(BB);
+        if (DL->getParentLoop()) {
+          for (auto *PL = DL->getParentLoop(); PL; PL = PL->getParentLoop())
+            for (auto *BB : DL->getBlocks())
+              PL->removeBlockFromLoop(BB);
+          DL->getParentLoop()->removeChildLoop(DL);
+          LI.addTopLevelLoop(DL);
+        }
+        LI.erase(DL);
+      }
+
     for (auto *BB : DeadLoopBlocks) {
       assert(BB != L.getHeader() &&
              "Header of the current loop cannot be dead!");
       LLVM_DEBUG(dbgs() << "Deleting dead loop block " << BB->getName()
                         << "\n");
-      if (LI.isLoopHeader(BB)) {
-        assert(LI.getLoopFor(BB) != &L && "Attempt to remove current loop!");
-        LI.erase(LI.getLoopFor(BB));
-      }
       LI.removeBlock(BB);
-      DeleteDeadBlock(BB, &DTU);
-      ++NumLoopBlocksDeleted;
     }
+
+    DetatchDeadBlocks(DeadLoopBlocks, &DTUpdates, /*KeepOneInputPHIs*/true);
+    DTU.applyUpdates(DTUpdates);
+    DTUpdates.clear();
+    for (auto *BB : DeadLoopBlocks)
+      DTU.deleteBB(BB);
+
+    NumLoopBlocksDeleted += DeadLoopBlocks.size();
   }
 
   /// Constant-fold terminators of blocks acculumated in FoldCandidates into the
   /// unconditional branches.
   void foldTerminators() {
-    DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
-
     for (BasicBlock *BB : FoldCandidates) {
       assert(LI.getLoopFor(BB) == &L && "Should be a loop block!");
       BasicBlock *TheOnlySucc = getOnlyLiveSuccessor(BB);
@@ -453,7 +514,7 @@ private:
       Term->eraseFromParent();
 
       for (auto *DeadSucc : DeadSuccessors)
-        DTU.deleteEdge(BB, DeadSucc);
+        DTUpdates.push_back({DominatorTree::Delete, BB, DeadSucc});
 
       ++NumTerminatorsFolded;
     }
@@ -463,15 +524,18 @@ public:
   ConstantTerminatorFoldingImpl(Loop &L, LoopInfo &LI, DominatorTree &DT,
                                 ScalarEvolution &SE,
                                 MemorySSAUpdater *MSSAU)
-      : L(L), LI(LI), DT(DT), SE(SE), MSSAU(MSSAU) {}
+      : L(L), LI(LI), DT(DT), SE(SE), MSSAU(MSSAU), DFS(&L),
+        DTU(DT, DomTreeUpdater::UpdateStrategy::Eager) {}
   bool run() {
     assert(L.getLoopLatch() && "Should be single latch!");
 
     // Collect all available information about status of blocks after constant
     // folding.
     analyze();
+    BasicBlock *Header = L.getHeader();
+    (void)Header;
 
-    LLVM_DEBUG(dbgs() << "In function " << L.getHeader()->getParent()->getName()
+    LLVM_DEBUG(dbgs() << "In function " << Header->getParent()->getName()
                       << ": ");
 
     if (HasIrreducibleCFG) {
@@ -483,7 +547,7 @@ public:
     if (FoldCandidates.empty()) {
       LLVM_DEBUG(
           dbgs() << "No constant terminator folding candidates found in loop "
-                 << L.getHeader()->getName() << "\n");
+                 << Header->getName() << "\n");
       return false;
     }
 
@@ -491,8 +555,7 @@ public:
     if (DeleteCurrentLoop) {
       LLVM_DEBUG(
           dbgs()
-          << "Give up constant terminator folding in loop "
-          << L.getHeader()->getName()
+          << "Give up constant terminator folding in loop " << Header->getName()
           << ": we don't currently support deletion of the current loop.\n");
       return false;
     }
@@ -503,8 +566,7 @@ public:
         L.getNumBlocks()) {
       LLVM_DEBUG(
           dbgs() << "Give up constant terminator folding in loop "
-                 << L.getHeader()->getName()
-                 << ": we don't currently"
+                 << Header->getName() << ": we don't currently"
                     " support blocks that are not dead, but will stop "
                     "being a part of the loop after constant-folding.\n");
       return false;
@@ -515,8 +577,7 @@ public:
     LLVM_DEBUG(dump());
 
     LLVM_DEBUG(dbgs() << "Constant-folding " << FoldCandidates.size()
-                      << " terminators in loop " << L.getHeader()->getName()
-                      << "\n");
+                      << " terminators in loop " << Header->getName() << "\n");
 
     // Make the actual transforms.
     handleDeadExits();
@@ -524,20 +585,36 @@ public:
 
     if (!DeadLoopBlocks.empty()) {
       LLVM_DEBUG(dbgs() << "Deleting " << DeadLoopBlocks.size()
-                    << " dead blocks in loop " << L.getHeader()->getName()
-                    << "\n");
+                    << " dead blocks in loop " << Header->getName() << "\n");
       deleteDeadLoopBlocks();
+    } else {
+      // If we didn't do updates inside deleteDeadLoopBlocks, do them here.
+      DTU.applyUpdates(DTUpdates);
+      DTUpdates.clear();
     }
 
+    if (MSSAU && VerifyMemorySSA)
+      MSSAU->getMemorySSA()->verifyMemorySSA();
+
 #ifndef NDEBUG
     // Make sure that we have preserved all data structures after the transform.
-    DT.verify();
-    assert(DT.isReachableFromEntry(L.getHeader()));
+#if defined(EXPENSIVE_CHECKS)
+    assert(DT.verify(DominatorTree::VerificationLevel::Full) &&
+           "DT broken after transform!");
+#else
+    assert(DT.verify(DominatorTree::VerificationLevel::Fast) &&
+           "DT broken after transform!");
+#endif
+    assert(DT.isReachableFromEntry(Header));
     LI.verify(DT);
 #endif
 
     return true;
   }
+
+  bool foldingBreaksCurrentLoop() const {
+    return DeleteCurrentLoop;
+  }
 };
 } // namespace
 
@@ -545,7 +622,8 @@ public:
 /// branches.
 static bool constantFoldTerminators(Loop &L, DominatorTree &DT, LoopInfo &LI,
                                     ScalarEvolution &SE,
-                                    MemorySSAUpdater *MSSAU) {
+                                    MemorySSAUpdater *MSSAU,
+                                    bool &IsLoopDeleted) {
   if (!EnableTermFolding)
     return false;
 
@@ -555,7 +633,9 @@ static bool constantFoldTerminators(Loop &L, DominatorTree &DT, LoopInfo &LI,
     return false;
 
   ConstantTerminatorFoldingImpl BranchFolder(L, LI, DT, SE, MSSAU);
-  return BranchFolder.run();
+  bool Changed = BranchFolder.run();
+  IsLoopDeleted = Changed && BranchFolder.foldingBreaksCurrentLoop();
+  return Changed;
 }
 
 static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT,
@@ -587,11 +667,15 @@ static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT,
 }
 
 static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI,
-                            ScalarEvolution &SE, MemorySSAUpdater *MSSAU) {
+                            ScalarEvolution &SE, MemorySSAUpdater *MSSAU,
+                            bool &isLoopDeleted) {
   bool Changed = false;
 
   // Constant-fold terminators with known constant conditions.
-  Changed |= constantFoldTerminators(L, DT, LI, SE, MSSAU);
+  Changed |= constantFoldTerminators(L, DT, LI, SE, MSSAU, isLoopDeleted);
+
+  if (isLoopDeleted)
+    return true;
 
   // Eliminate unconditional branches by merging blocks into their predecessors.
   Changed |= mergeBlocksIntoPredecessors(L, DT, LI, MSSAU);
@@ -604,15 +688,23 @@ static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI,
 
 PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM,
                                            LoopStandardAnalysisResults &AR,
-                                           LPMUpdater &) {
+                                           LPMUpdater &LPMU) {
   Optional<MemorySSAUpdater> MSSAU;
   if (EnableMSSALoopDependency && AR.MSSA)
     MSSAU = MemorySSAUpdater(AR.MSSA);
+  bool DeleteCurrentLoop = false;
   if (!simplifyLoopCFG(L, AR.DT, AR.LI, AR.SE,
-                       MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
+                       MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
+                       DeleteCurrentLoop))
     return PreservedAnalyses::all();
 
-  return getLoopPassPreservedAnalyses();
+  if (DeleteCurrentLoop)
+    LPMU.markLoopAsDeleted(L, "loop-simplifycfg");
+
+  auto PA = getLoopPassPreservedAnalyses();
+  if (EnableMSSALoopDependency)
+    PA.preserve<MemorySSAAnalysis>();
+  return PA;
 }
 
 namespace {
@@ -623,7 +715,7 @@ public:
     initializeLoopSimplifyCFGLegacyPassPass(*PassRegistry::getPassRegistry());
   }
 
-  bool runOnLoop(Loop *L, LPPassManager &) override {
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
     if (skipLoop(L))
       return false;
 
@@ -637,8 +729,13 @@ public:
       if (VerifyMemorySSA)
         MSSA->verifyMemorySSA();
     }
-    return simplifyLoopCFG(*L, DT, LI, SE,
-                           MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
+    bool DeleteCurrentLoop = false;
+    bool Changed = simplifyLoopCFG(
+        *L, DT, LI, SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
+        DeleteCurrentLoop);
+    if (DeleteCurrentLoop)
+      LPM.markLoopAsDeleted(*L);
+    return Changed;
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp
index 2f7ad2126ed3..975452e13f09 100644
--- a/lib/Transforms/Scalar/LoopSink.cpp
+++ b/lib/Transforms/Scalar/LoopSink.cpp
@@ -1,9 +1,8 @@
 //===-- LoopSink.cpp - Loop Sink Pass -------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -291,10 +290,9 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
       ColdLoopBBs.push_back(B);
       LoopBlockNumber[B] = ++i;
     }
-  std::stable_sort(ColdLoopBBs.begin(), ColdLoopBBs.end(),
-                   [&](BasicBlock *A, BasicBlock *B) {
-                     return BFI.getBlockFreq(A) < BFI.getBlockFreq(B);
-                   });
+  llvm::stable_sort(ColdLoopBBs, [&](BasicBlock *A, BasicBlock *B) {
+    return BFI.getBlockFreq(A) < BFI.getBlockFreq(B);
+  });
 
   // Traverse preheader's instructions in reverse order becaue if A depends
   // on B (A appears after B), A needs to be sinked first before B can be
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 773ffb9df0a2..59a387a186b8 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1,9 +1,8 @@
 //===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -116,6 +115,7 @@
 #include <cstdlib>
 #include <iterator>
 #include <limits>
+#include <numeric>
 #include <map>
 #include <utility>
 
@@ -155,11 +155,19 @@ static cl::opt<bool> FilterSameScaledReg(
     cl::desc("Narrow LSR search space by filtering non-optimal formulae"
              " with the same ScaledReg and Scale"));
 
+static cl::opt<bool> EnableBackedgeIndexing(
+  "lsr-backedge-indexing", cl::Hidden, cl::init(true),
+  cl::desc("Enable the generation of cross iteration indexed memops"));
+
 static cl::opt<unsigned> ComplexityLimit(
   "lsr-complexity-limit", cl::Hidden,
   cl::init(std::numeric_limits<uint16_t>::max()),
   cl::desc("LSR search space complexity limit"));
 
+static cl::opt<unsigned> SetupCostDepthLimit(
+    "lsr-setupcost-depth-limit", cl::Hidden, cl::init(7),
+    cl::desc("The limit on recursion depth for LSRs setup cost"));
+
 #ifndef NDEBUG
 // Stress test IV chain generation.
 static cl::opt<bool> StressIVChain(
@@ -1007,10 +1015,15 @@ namespace {
 
 /// This class is used to measure and compare candidate formulae.
 class Cost {
+  const Loop *L = nullptr;
+  ScalarEvolution *SE = nullptr;
+  const TargetTransformInfo *TTI = nullptr;
   TargetTransformInfo::LSRCost C;
 
 public:
-  Cost() {
+  Cost() = delete;
+  Cost(const Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI) :
+    L(L), SE(&SE), TTI(&TTI) {
     C.Insns = 0;
     C.NumRegs = 0;
     C.AddRecCost = 0;
@@ -1021,7 +1034,7 @@ public:
     C.ScaleCost = 0;
   }
 
-  bool isLess(Cost &Other, const TargetTransformInfo &TTI);
+  bool isLess(Cost &Other);
 
   void Lose();
 
@@ -1040,12 +1053,9 @@ public:
     return C.NumRegs == ~0u;
   }
 
-  void RateFormula(const TargetTransformInfo &TTI,
-                   const Formula &F,
+  void RateFormula(const Formula &F,
                    SmallPtrSetImpl<const SCEV *> &Regs,
                    const DenseSet<const SCEV *> &VisitedRegs,
-                   const Loop *L,
-                   ScalarEvolution &SE, DominatorTree &DT,
                    const LSRUse &LU,
                    SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr);
 
@@ -1053,17 +1063,11 @@ public:
   void dump() const;
 
 private:
-  void RateRegister(const SCEV *Reg,
-                    SmallPtrSetImpl<const SCEV *> &Regs,
-                    const Loop *L,
-                    ScalarEvolution &SE, DominatorTree &DT,
-                    const TargetTransformInfo &TTI);
-  void RatePrimaryRegister(const SCEV *Reg,
+  void RateRegister(const Formula &F, const SCEV *Reg,
+                    SmallPtrSetImpl<const SCEV *> &Regs);
+  void RatePrimaryRegister(const Formula &F, const SCEV *Reg,
                            SmallPtrSetImpl<const SCEV *> &Regs,
-                           const Loop *L,
-                           ScalarEvolution &SE, DominatorTree &DT,
-                           SmallPtrSetImpl<const SCEV *> *LoserRegs,
-                           const TargetTransformInfo &TTI);
+                           SmallPtrSetImpl<const SCEV *> *LoserRegs);
 };
 
 /// An operand value in an instruction which is to be replaced with some
@@ -1208,19 +1212,36 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
                                  bool HasBaseReg, int64_t Scale,
                                  Instruction *Fixup = nullptr);
 
+static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) {
+  if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
+    return 1;
+  if (Depth == 0)
+    return 0;
+  if (const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
+    return getSetupCost(S->getStart(), Depth - 1);
+  if (auto S = dyn_cast<SCEVCastExpr>(Reg))
+    return getSetupCost(S->getOperand(), Depth - 1);
+  if (auto S = dyn_cast<SCEVNAryExpr>(Reg))
+    return std::accumulate(S->op_begin(), S->op_end(), 0,
+                           [&](unsigned i, const SCEV *Reg) {
+                             return i + getSetupCost(Reg, Depth - 1);
+                           });
+  if (auto S = dyn_cast<SCEVUDivExpr>(Reg))
+    return getSetupCost(S->getLHS(), Depth - 1) +
+           getSetupCost(S->getRHS(), Depth - 1);
+  return 0;
+}
+
 /// Tally up interesting quantities from the given register.
-void Cost::RateRegister(const SCEV *Reg,
-                        SmallPtrSetImpl<const SCEV *> &Regs,
-                        const Loop *L,
-                        ScalarEvolution &SE, DominatorTree &DT,
-                        const TargetTransformInfo &TTI) {
+void Cost::RateRegister(const Formula &F, const SCEV *Reg,
+                        SmallPtrSetImpl<const SCEV *> &Regs) {
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
     // If this is an addrec for another loop, it should be an invariant
     // with respect to L since L is the innermost loop (at least
     // for now LSR only handles innermost loops).
     if (AR->getLoop() != L) {
       // If the AddRec exists, consider it's register free and leave it alone.
-      if (isExistingPhi(AR, SE))
+      if (isExistingPhi(AR, *SE))
         return;
 
       // It is bad to allow LSR for current loop to add induction variables
@@ -1236,16 +1257,24 @@ void Cost::RateRegister(const SCEV *Reg,
     }
 
     unsigned LoopCost = 1;
-    if (TTI.shouldFavorPostInc()) {
-      const SCEV *LoopStep = AR->getStepRecurrence(SE);
-      if (isa<SCEVConstant>(LoopStep)) {
-        // Check if a post-indexed load/store can be used.
-        if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
-            TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
+    if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) ||
+        TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) {
+
+      // If the step size matches the base offset, we could use pre-indexed
+      // addressing.
+      if (TTI->shouldFavorBackedgeIndex(L)) {
+        if (auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
+          if (Step->getAPInt() == F.BaseOffset)
+            LoopCost = 0;
+      }
+
+      if (TTI->shouldFavorPostInc()) {
+        const SCEV *LoopStep = AR->getStepRecurrence(*SE);
+        if (isa<SCEVConstant>(LoopStep)) {
           const SCEV *LoopStart = AR->getStart();
           if (!isa<SCEVConstant>(LoopStart) &&
-            SE.isLoopInvariant(LoopStart, L))
-              LoopCost = 0;
+              SE->isLoopInvariant(LoopStart, L))
+            LoopCost = 0;
         }
       }
     }
@@ -1255,7 +1284,7 @@ void Cost::RateRegister(const SCEV *Reg,
     // TODO: The non-affine case isn't precisely modeled here.
     if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
       if (!Regs.count(AR->getOperand(1))) {
-        RateRegister(AR->getOperand(1), Regs, L, SE, DT, TTI);
+        RateRegister(F, AR->getOperand(1), Regs);
         if (isLoser())
           return;
       }
@@ -1265,43 +1294,34 @@ void Cost::RateRegister(const SCEV *Reg,
 
   // Rough heuristic; favor registers which don't require extra setup
   // instructions in the preheader.
-  if (!isa<SCEVUnknown>(Reg) &&
-      !isa<SCEVConstant>(Reg) &&
-      !(isa<SCEVAddRecExpr>(Reg) &&
-        (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
-         isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
-    ++C.SetupCost;
+  C.SetupCost += getSetupCost(Reg, SetupCostDepthLimit);
+  // Ensure we don't, even with the recusion limit, produce invalid costs.
+  C.SetupCost = std::min<unsigned>(C.SetupCost, 1 << 16);
 
   C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
-               SE.hasComputableLoopEvolution(Reg, L);
+               SE->hasComputableLoopEvolution(Reg, L);
 }
 
 /// Record this register in the set. If we haven't seen it before, rate
 /// it. Optional LoserRegs provides a way to declare any formula that refers to
 /// one of those regs an instant loser.
-void Cost::RatePrimaryRegister(const SCEV *Reg,
+void Cost::RatePrimaryRegister(const Formula &F, const SCEV *Reg,
                                SmallPtrSetImpl<const SCEV *> &Regs,
-                               const Loop *L,
-                               ScalarEvolution &SE, DominatorTree &DT,
-                               SmallPtrSetImpl<const SCEV *> *LoserRegs,
-                               const TargetTransformInfo &TTI) {
+                               SmallPtrSetImpl<const SCEV *> *LoserRegs) {
   if (LoserRegs && LoserRegs->count(Reg)) {
     Lose();
     return;
   }
   if (Regs.insert(Reg).second) {
-    RateRegister(Reg, Regs, L, SE, DT, TTI);
+    RateRegister(F, Reg, Regs);
     if (LoserRegs && isLoser())
       LoserRegs->insert(Reg);
   }
 }
 
-void Cost::RateFormula(const TargetTransformInfo &TTI,
-                       const Formula &F,
+void Cost::RateFormula(const Formula &F,
                        SmallPtrSetImpl<const SCEV *> &Regs,
                        const DenseSet<const SCEV *> &VisitedRegs,
-                       const Loop *L,
-                       ScalarEvolution &SE, DominatorTree &DT,
                        const LSRUse &LU,
                        SmallPtrSetImpl<const SCEV *> *LoserRegs) {
   assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
@@ -1314,7 +1334,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
       Lose();
       return;
     }
-    RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs, TTI);
+    RatePrimaryRegister(F, ScaledReg, Regs, LoserRegs);
     if (isLoser())
       return;
   }
@@ -1323,7 +1343,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
       Lose();
       return;
     }
-    RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs, TTI);
+    RatePrimaryRegister(F, BaseReg, Regs, LoserRegs);
     if (isLoser())
       return;
   }
@@ -1334,11 +1354,11 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
     // Do not count the base and a possible second register if the target
     // allows to fold 2 registers.
     C.NumBaseAdds +=
-        NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
+        NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(*TTI, LU, F)));
   C.NumBaseAdds += (F.UnfoldedOffset != 0);
 
   // Accumulate non-free scaling amounts.
-  C.ScaleCost += getScalingFactorCost(TTI, LU, F, *L);
+  C.ScaleCost += getScalingFactorCost(*TTI, LU, F, *L);
 
   // Tally up the non-zero immediates.
   for (const LSRFixup &Fixup : LU.Fixups) {
@@ -1353,7 +1373,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
     // Check with target if this offset with this instruction is
     // specifically not supported.
     if (LU.Kind == LSRUse::Address && Offset != 0 &&
-        !isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
+        !isAMCompletelyFolded(*TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
                               Offset, F.HasBaseReg, F.Scale, Fixup.UserInst))
       C.NumBaseAdds++;
   }
@@ -1366,7 +1386,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
 
   // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
   // additional instruction (at least fill).
-  unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1;
+  unsigned TTIRegNum = TTI->getNumberOfRegisters(false) - 1;
   if (C.NumRegs > TTIRegNum) {
     // Cost already exceeded TTIRegNum, then only newly added register can add
     // new instructions.
@@ -1386,7 +1406,8 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
   //
   // For {-10, +, 1}:
   // i = i + 1;
-  if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() && !TTI.canMacroFuseCmp())
+  if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() &&
+      !TTI->canMacroFuseCmp())
     C.Insns++;
   // Each new AddRec adds 1 instruction to calculation.
   C.Insns += (C.AddRecCost - PrevAddRecCost);
@@ -1410,11 +1431,11 @@ void Cost::Lose() {
 }
 
 /// Choose the lower cost.
-bool Cost::isLess(Cost &Other, const TargetTransformInfo &TTI) {
+bool Cost::isLess(Cost &Other) {
   if (InsnsCost.getNumOccurrences() > 0 && InsnsCost &&
       C.Insns != Other.C.Insns)
     return C.Insns < Other.C.Insns;
-  return TTI.isLSRCostLess(C, Other.C);
+  return TTI->isLSRCostLess(C, Other.C);
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1888,8 +1909,11 @@ class LSRInstance {
   ScalarEvolution &SE;
   DominatorTree &DT;
   LoopInfo &LI;
+  AssumptionCache &AC;
+  TargetLibraryInfo &LibInfo;
   const TargetTransformInfo &TTI;
   Loop *const L;
+  bool FavorBackedgeIndex = false;
   bool Changed = false;
 
   /// This is the insert position that the current loop's induction variable
@@ -1910,7 +1934,7 @@ class LSRInstance {
   SmallSetVector<Type *, 4> Types;
 
   /// The list of interesting uses.
-  SmallVector<LSRUse, 16> Uses;
+  mutable SmallVector<LSRUse, 16> Uses;
 
   /// Track which uses use which register candidates.
   RegUseTracker RegUses;
@@ -2025,7 +2049,8 @@ class LSRInstance {
 
 public:
   LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
-              LoopInfo &LI, const TargetTransformInfo &TTI);
+              LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC,
+              TargetLibraryInfo &LibInfo);
 
   bool getChanged() const { return Changed; }
 
@@ -2804,7 +2829,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
 /// TODO: Consider IVInc free if it's already used in another chains.
 static bool
 isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users,
-                  ScalarEvolution &SE, const TargetTransformInfo &TTI) {
+                  ScalarEvolution &SE) {
   if (StressIVChain)
     return true;
 
@@ -3064,7 +3089,7 @@ void LSRInstance::CollectChains() {
   for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
        UsersIdx < NChains; ++UsersIdx) {
     if (!isProfitableChain(IVChainVec[UsersIdx],
-                           ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
+                           ChainUsersVec[UsersIdx].FarUsers, SE))
       continue;
     // Preserve the chain at UsesIdx.
     if (ChainIdx != UsersIdx)
@@ -3078,7 +3103,7 @@ void LSRInstance::CollectChains() {
 void LSRInstance::FinalizeChain(IVChain &Chain) {
   assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
   LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
-
+  
   for (const IVInc &Inc : Chain) {
     LLVM_DEBUG(dbgs() << "        Inc: " << *Inc.UserInst << "\n");
     auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);
@@ -3100,7 +3125,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
   MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand);
   int64_t IncOffset = IncConst->getValue()->getSExtValue();
   if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,
-                        IncOffset, /*HaseBaseReg=*/false))
+                        IncOffset, /*HasBaseReg=*/false))
     return false;
 
   return true;
@@ -3210,6 +3235,9 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
 }
 
 void LSRInstance::CollectFixupsAndInitialFormulae() {
+  BranchInst *ExitBranch = nullptr;
+  bool SaveCmp = TTI.canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &LibInfo);
+
   for (const IVStrideUse &U : IU) {
     Instruction *UserInst = U.getUser();
     // Skip IV users that are part of profitable IV Chains.
@@ -3239,6 +3267,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
     // equality icmps, thanks to IndVarSimplify.
     if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst))
       if (CI->isEquality()) {
+        // If CI can be saved in some target, like replaced inside hardware loop
+        // in PowerPC, no need to generate initial formulae for it.
+        if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->getCondition()))
+          continue;
         // Swap the operands if needed to put the OperandValToReplace on the
         // left, for consistency.
         Value *NV = CI->getOperand(1);
@@ -3738,10 +3770,11 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
 void LSRInstance::GenerateConstantOffsetsImpl(
     LSRUse &LU, unsigned LUIdx, const Formula &Base,
     const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
-  const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
-  for (int64_t Offset : Worklist) {
+
+  auto GenerateOffset = [&](const SCEV *G, int64_t Offset) {
     Formula F = Base;
     F.BaseOffset = (uint64_t)Base.BaseOffset - Offset;
+
     if (isLegalUse(TTI, LU.MinOffset - Offset, LU.MaxOffset - Offset, LU.Kind,
                    LU.AccessTy, F)) {
       // Add the offset to the base register.
@@ -3761,7 +3794,35 @@ void LSRInstance::GenerateConstantOffsetsImpl(
 
       (void)InsertFormula(LU, LUIdx, F);
     }
+  };
+
+  const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
+
+  // With constant offsets and constant steps, we can generate pre-inc
+  // accesses by having the offset equal the step. So, for access #0 with a
+  // step of 8, we generate a G - 8 base which would require the first access
+  // to be ((G - 8) + 8),+,8. The pre-indexed access then updates the pointer
+  // for itself and hopefully becomes the base for other accesses. This means
+  // means that a single pre-indexed access can be generated to become the new
+  // base pointer for each iteration of the loop, resulting in no extra add/sub
+  // instructions for pointer updating.
+  if (FavorBackedgeIndex && LU.Kind == LSRUse::Address) {
+    if (auto *GAR = dyn_cast<SCEVAddRecExpr>(G)) {
+      if (auto *StepRec =
+          dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
+        const APInt &StepInt = StepRec->getAPInt();
+        int64_t Step = StepInt.isNegative() ?
+          StepInt.getSExtValue() : StepInt.getZExtValue();
+
+        for (int64_t Offset : Worklist) {
+          Offset -= Step;
+          GenerateOffset(G, Offset);
+        }
+      }
+    }
   }
+  for (int64_t Offset : Worklist)
+    GenerateOffset(G, Offset);
 
   int64_t Imm = ExtractImmediate(G, SE);
   if (G->isZero() || Imm == 0)
@@ -3968,9 +4029,27 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
     if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
       Formula F = Base;
 
-      if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy);
-      for (const SCEV *&BaseReg : F.BaseRegs)
-        BaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy);
+      // Sometimes SCEV is able to prove zero during ext transform. It may
+      // happen if SCEV did not do all possible transforms while creating the
+      // initial node (maybe due to depth limitations), but it can do them while
+      // taking ext.
+      if (F.ScaledReg) {
+        const SCEV *NewScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy);
+        if (NewScaledReg->isZero())
+         continue;
+        F.ScaledReg = NewScaledReg;
+      }
+      bool HasZeroBaseReg = false;
+      for (const SCEV *&BaseReg : F.BaseRegs) {
+        const SCEV *NewBaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy);
+        if (NewBaseReg->isZero()) {
+          HasZeroBaseReg = true;
+          break;
+        }
+        BaseReg = NewBaseReg;
+      }
+      if (HasZeroBaseReg)
+        continue;
 
       // TODO: This assumes we've done basic processing on all uses and
       // have an idea what the register usage is.
@@ -4067,11 +4146,17 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
 
       // Conservatively examine offsets between this orig reg a few selected
       // other orig regs.
+      int64_t First = Imms.begin()->first;
+      int64_t Last = std::prev(Imms.end())->first;
+      // Compute (First + Last)  / 2 without overflow using the fact that
+      // First + Last = 2 * (First + Last) + (First ^ Last).
+      int64_t Avg = (First & Last) + ((First ^ Last) >> 1);
+      // If the result is negative and First is odd and Last even (or vice versa),
+      // we rounded towards -inf. Add 1 in that case, to round towards 0.
+      Avg = Avg + ((First ^ Last) & ((uint64_t)Avg >> 63));
       ImmMapTy::const_iterator OtherImms[] = {
-        Imms.begin(), std::prev(Imms.end()),
-        Imms.lower_bound((Imms.begin()->first + std::prev(Imms.end())->first) /
-                         2)
-      };
+          Imms.begin(), std::prev(Imms.end()),
+         Imms.lower_bound(Avg)};
       for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
         ImmMapTy::const_iterator M = OtherImms[i];
         if (M == J || M == JE) continue;
@@ -4249,9 +4334,9 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
       // avoids the need to recompute this information across formulae using the
       // same bad AddRec. Passing LoserRegs is also essential unless we remove
       // the corresponding bad register from the Regs set.
-      Cost CostF;
+      Cost CostF(L, SE, TTI);
       Regs.clear();
-      CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, SE, DT, LU, &LoserRegs);
+      CostF.RateFormula(F, Regs, VisitedRegs, LU, &LoserRegs);
       if (CostF.isLoser()) {
         // During initial formula generation, undesirable formulae are generated
         // by uses within other loops that have some non-trivial address mode or
@@ -4282,10 +4367,10 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
 
         Formula &Best = LU.Formulae[P.first->second];
 
-        Cost CostBest;
+        Cost CostBest(L, SE, TTI);
         Regs.clear();
-        CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU);
-        if (CostF.isLess(CostBest, TTI))
+        CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
+        if (CostF.isLess(CostBest))
           std::swap(F, Best);
         LLVM_DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
                    dbgs() << "\n"
@@ -4357,7 +4442,9 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
              I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
           if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
             Formula NewF = F;
-            NewF.BaseOffset += C->getValue()->getSExtValue();
+            //FIXME: Formulas should store bitwidth to do wrapping properly.
+            //       See PR41034.
+            NewF.BaseOffset += (uint64_t)C->getValue()->getSExtValue();
             NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
                                 (I - F.BaseRegs.begin()));
             if (LU.HasFormulaWithSameRegs(NewF)) {
@@ -4400,7 +4487,7 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
 /// When there are many registers for expressions like A, A+1, A+2, etc.,
 /// allocate a single register for them.
 void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
-  if (EstimateSearchSpaceComplexity() < ComplexityLimit)
+  if (EstimateSearchSpaceComplexity() < ComplexityLimit) 
     return;
 
   LLVM_DEBUG(
@@ -4533,12 +4620,13 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
 
       // If the new register numbers are the same, choose the Formula with
       // less Cost.
-      Cost CostFA, CostFB;
+      Cost CostFA(L, SE, TTI);
+      Cost CostFB(L, SE, TTI);
       Regs.clear();
-      CostFA.RateFormula(TTI, FA, Regs, VisitedRegs, L, SE, DT, LU);
+      CostFA.RateFormula(FA, Regs, VisitedRegs, LU);
       Regs.clear();
-      CostFB.RateFormula(TTI, FB, Regs, VisitedRegs, L, SE, DT, LU);
-      return CostFA.isLess(CostFB, TTI);
+      CostFB.RateFormula(FB, Regs, VisitedRegs, LU);
+      return CostFA.isLess(CostFB);
     };
 
     bool Any = false;
@@ -4824,7 +4912,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
       ReqRegs.insert(S);
 
   SmallPtrSet<const SCEV *, 16> NewRegs;
-  Cost NewCost;
+  Cost NewCost(L, SE, TTI);
   for (const Formula &F : LU.Formulae) {
     // Ignore formulae which may not be ideal in terms of register reuse of
     // ReqRegs.  The formula should use all required registers before
@@ -4848,8 +4936,8 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
     // the current best, prune the search at that point.
     NewCost = CurCost;
     NewRegs = CurRegs;
-    NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, SE, DT, LU);
-    if (NewCost.isLess(SolutionCost, TTI)) {
+    NewCost.RateFormula(F, NewRegs, VisitedRegs, LU);
+    if (NewCost.isLess(SolutionCost)) {
       Workspace.push_back(&F);
       if (Workspace.size() != Uses.size()) {
         SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
@@ -4858,9 +4946,9 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
           VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
       } else {
         LLVM_DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
-                   dbgs() << ".\n Regs:"; for (const SCEV *S
-                                               : NewRegs) dbgs()
-                                          << ' ' << *S;
+                   dbgs() << ".\nRegs:\n";
+                   for (const SCEV *S : NewRegs) dbgs()
+                      << "- " << *S << "\n";
                    dbgs() << '\n');
 
         SolutionCost = NewCost;
@@ -4875,9 +4963,9 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
 /// vector.
 void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
   SmallVector<const Formula *, 8> Workspace;
-  Cost SolutionCost;
+  Cost SolutionCost(L, SE, TTI);
   SolutionCost.Lose();
-  Cost CurCost;
+  Cost CurCost(L, SE, TTI);
   SmallPtrSet<const SCEV *, 16> CurRegs;
   DenseSet<const SCEV *> VisitedRegs;
   Workspace.reserve(Uses.size());
@@ -5215,6 +5303,7 @@ void LSRInstance::RewriteForPHI(
   DenseMap<BasicBlock *, Value *> Inserted;
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
     if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
+      bool needUpdateFixups = false;
       BasicBlock *BB = PN->getIncomingBlock(i);
 
       // If this is a critical edge, split the edge so that we do not insert
@@ -5233,7 +5322,7 @@ void LSRInstance::RewriteForPHI(
             NewBB = SplitCriticalEdge(BB, Parent,
                                       CriticalEdgeSplittingOptions(&DT, &LI)
                                           .setMergeIdenticalEdges()
-                                          .setDontDeleteUselessPHIs());
+                                          .setKeepOneInputPHIs());
           } else {
             SmallVector<BasicBlock*, 2> NewBBs;
             SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI);
@@ -5253,6 +5342,8 @@ void LSRInstance::RewriteForPHI(
             e = PN->getNumIncomingValues();
             BB = NewBB;
             i = PN->getBasicBlockIndex(BB);
+
+            needUpdateFixups = true;
           }
         }
       }
@@ -5277,6 +5368,44 @@ void LSRInstance::RewriteForPHI(
         PN->setIncomingValue(i, FullV);
         Pair.first->second = FullV;
       }
+
+      // If LSR splits critical edge and phi node has other pending
+      // fixup operands, we need to update those pending fixups. Otherwise
+      // formulae will not be implemented completely and some instructions
+      // will not be eliminated.
+      if (needUpdateFixups) {
+        for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
+          for (LSRFixup &Fixup : Uses[LUIdx].Fixups)
+            // If fixup is supposed to rewrite some operand in the phi
+            // that was just updated, it may be already moved to
+            // another phi node. Such fixup requires update.
+            if (Fixup.UserInst == PN) {
+              // Check if the operand we try to replace still exists in the
+              // original phi.
+              bool foundInOriginalPHI = false;
+              for (const auto &val : PN->incoming_values())
+                if (val == Fixup.OperandValToReplace) {
+                  foundInOriginalPHI = true;
+                  break;
+                }
+
+              // If fixup operand found in original PHI - nothing to do.
+              if (foundInOriginalPHI)
+                continue;
+
+              // Otherwise it might be moved to another PHI and requires update.
+              // If fixup operand not found in any of the incoming blocks that
+              // means we have already rewritten it - nothing to do.
+              for (const auto &Block : PN->blocks())
+                for (BasicBlock::iterator I = Block->begin(); isa<PHINode>(I);
+                     ++I) {
+                  PHINode *NewPN = cast<PHINode>(I);
+                  for (const auto &val : NewPN->incoming_values())
+                    if (val == Fixup.OperandValToReplace)
+                      Fixup.UserInst = NewPN;
+                }
+            }
+      }
     }
 }
 
@@ -5360,8 +5489,11 @@ void LSRInstance::ImplementSolution(
 
 LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
                          DominatorTree &DT, LoopInfo &LI,
-                         const TargetTransformInfo &TTI)
-    : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L) {
+                         const TargetTransformInfo &TTI, AssumptionCache &AC,
+                         TargetLibraryInfo &LibInfo)
+    : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), LibInfo(LibInfo), TTI(TTI), L(L),
+      FavorBackedgeIndex(EnableBackedgeIndexing &&
+                         TTI.shouldFavorBackedgeIndex(L)) {
   // If LoopSimplify form is not available, stay out of trouble.
   if (!L->isLoopSimplifyForm())
     return;
@@ -5556,6 +5688,8 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<DominatorTreeWrapperPass>();
   AU.addRequired<ScalarEvolutionWrapperPass>();
   AU.addPreserved<ScalarEvolutionWrapperPass>();
+  AU.addRequired<AssumptionCacheTracker>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
   // Requiring LoopSimplify a second time here prevents IVUsers from running
   // twice, since LoopSimplify was invalidated by running ScalarEvolution.
   AU.addRequiredID(LoopSimplifyID);
@@ -5566,11 +5700,14 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
 
 static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
                                DominatorTree &DT, LoopInfo &LI,
-                               const TargetTransformInfo &TTI) {
+                               const TargetTransformInfo &TTI,
+                               AssumptionCache &AC,
+                               TargetLibraryInfo &LibInfo) {
+
   bool Changed = false;
 
   // Run the main LSR transformation.
-  Changed |= LSRInstance(L, IU, SE, DT, LI, TTI).getChanged();
+  Changed |= LSRInstance(L, IU, SE, DT, LI, TTI, AC, LibInfo).getChanged();
 
   // Remove any extra phis created by processing inner loops.
   Changed |= DeleteDeadPHIs(L->getHeader());
@@ -5601,14 +5738,17 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
       *L->getHeader()->getParent());
-  return ReduceLoopStrength(L, IU, SE, DT, LI, TTI);
+  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
+      *L->getHeader()->getParent());
+  auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, LibInfo);
 }
 
 PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,
                                               LoopStandardAnalysisResults &AR,
                                               LPMUpdater &) {
   if (!ReduceLoopStrength(&L, AM.getResult<IVUsersAnalysis>(L, AR), AR.SE,
-                          AR.DT, AR.LI, AR.TTI))
+                          AR.DT, AR.LI, AR.TTI, AR.AC, AR.TLI))
     return PreservedAnalyses::all();
 
   return getLoopPassPreservedAnalyses();
diff --git a/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index da46210b6fdd..86891eb451bb 100644
--- a/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -1,9 +1,8 @@
 //===- LoopUnrollAndJam.cpp - Loop unroll and jam pass --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -295,7 +294,8 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
     return LoopUnrollResult::Unmodified;
 
   TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
-      L, SE, TTI, OptLevel, None, None, None, None, None, None);
+      L, SE, TTI, nullptr, nullptr, OptLevel,
+      None, None, None, None, None, None);
   if (AllowUnrollAndJam.getNumOccurrences() > 0)
     UP.UnrollAndJam = AllowUnrollAndJam;
   if (UnrollAndJamThreshold.getNumOccurrences() > 0)
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 38b80f48ed0e..2fa7436213dd 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1,9 +1,8 @@
 //===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -24,7 +23,9 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
 #include "llvm/Analysis/LoopAnalysisManager.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -56,6 +57,7 @@
 #include "llvm/Transforms/Utils.h"
 #include "llvm/Transforms/Utils/LoopSimplify.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
 #include <algorithm>
 #include <cassert>
@@ -69,6 +71,12 @@ using namespace llvm;
 
 #define DEBUG_TYPE "loop-unroll"
 
+cl::opt<bool> llvm::ForgetSCEVInLoopUnroll(
+    "forget-scev-loop-unroll", cl::init(false), cl::Hidden,
+    cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
+             " the current top-most loop. This is somtimes preferred to reduce"
+             " compile time."));
+
 static cl::opt<unsigned>
     UnrollThreshold("unroll-threshold", cl::Hidden,
                     cl::desc("The cost threshold for loop unrolling"));
@@ -166,7 +174,8 @@ static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
 /// Gather the various unrolling parameters based on the defaults, compiler
 /// flags, TTI overrides and user specified parameters.
 TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
-    Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
+    Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
+    BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
     Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
     Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
     Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling) {
@@ -199,9 +208,12 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
   TTI.getUnrollingPreferences(L, SE, UP);
 
   // Apply size attributes
-  if (L->getHeader()->getParent()->optForSize()) {
+  bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
+                    llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI);
+  if (OptForSize) {
     UP.Threshold = UP.OptSizeThreshold;
     UP.PartialThreshold = UP.PartialOptSizeThreshold;
+    UP.MaxPercentThresholdBoost = 100;
   }
 
   // Apply any user values specified by cl::opt
@@ -964,8 +976,10 @@ bool llvm::computeUnrollCount(
 static LoopUnrollResult tryToUnrollLoop(
     Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
     const TargetTransformInfo &TTI, AssumptionCache &AC,
-    OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel,
-    bool OnlyWhenForced, Optional<unsigned> ProvidedCount,
+    OptimizationRemarkEmitter &ORE,
+    BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+    bool PreserveLCSSA, int OptLevel,
+    bool OnlyWhenForced, bool ForgetAllSCEV, Optional<unsigned> ProvidedCount,
     Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
     Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
     Optional<bool> ProvidedAllowPeeling) {
@@ -986,15 +1000,19 @@ static LoopUnrollResult tryToUnrollLoop(
   if (OnlyWhenForced && !(TM & TM_Enable))
     return LoopUnrollResult::Unmodified;
 
+  bool OptForSize = L->getHeader()->getParent()->hasOptSize();
   unsigned NumInlineCandidates;
   bool NotDuplicatable;
   bool Convergent;
   TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
-      L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount,
+      L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
       ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
       ProvidedAllowPeeling);
-  // Exit early if unrolling is disabled.
-  if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
+
+  // Exit early if unrolling is disabled. For OptForSize, we pick the loop size
+  // as threshold later on.
+  if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0) &&
+      !OptForSize)
     return LoopUnrollResult::Unmodified;
 
   SmallPtrSet<const Value *, 32> EphValues;
@@ -1009,6 +1027,12 @@ static LoopUnrollResult tryToUnrollLoop(
                       << " instructions.\n");
     return LoopUnrollResult::Unmodified;
   }
+
+  // When optimizing for size, use LoopSize as threshold, to (fully) unroll
+  // loops, if it does not increase code size.
+  if (OptForSize)
+    UP.Threshold = std::max(UP.Threshold, LoopSize);
+
   if (NumInlineCandidates != 0) {
     LLVM_DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
     return LoopUnrollResult::Unmodified;
@@ -1081,8 +1105,10 @@ static LoopUnrollResult tryToUnrollLoop(
   // Unroll the loop.
   Loop *RemainderLoop = nullptr;
   LoopUnrollResult UnrollResult = UnrollLoop(
-      L, UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
-      UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
+      L,
+      {UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
+       UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
+       ForgetAllSCEV},
       LI, &SE, &DT, &AC, &ORE, PreserveLCSSA, &RemainderLoop);
   if (UnrollResult == LoopUnrollResult::Unmodified)
     return LoopUnrollResult::Unmodified;
@@ -1132,6 +1158,11 @@ public:
   /// metadata are considered. All other loops are skipped.
   bool OnlyWhenForced;
 
+  /// If false, when SCEV is invalidated, only forget everything in the
+  /// top-most loop (call forgetTopMostLoop), of the loop being processed.
+  /// Otherwise, forgetAllLoops and rebuild when needed next.
+  bool ForgetAllSCEV;
+
   Optional<unsigned> ProvidedCount;
   Optional<unsigned> ProvidedThreshold;
   Optional<bool> ProvidedAllowPartial;
@@ -1140,15 +1171,16 @@ public:
   Optional<bool> ProvidedAllowPeeling;
 
   LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
-             Optional<unsigned> Threshold = None,
+             bool ForgetAllSCEV = false, Optional<unsigned> Threshold = None,
              Optional<unsigned> Count = None,
              Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
              Optional<bool> UpperBound = None,
              Optional<bool> AllowPeeling = None)
       : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
-        ProvidedCount(std::move(Count)), ProvidedThreshold(Threshold),
-        ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime),
-        ProvidedUpperBound(UpperBound), ProvidedAllowPeeling(AllowPeeling) {
+        ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),
+        ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
+        ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
+        ProvidedAllowPeeling(AllowPeeling) {
     initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
   }
 
@@ -1171,9 +1203,10 @@ public:
     bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
 
     LoopUnrollResult Result = tryToUnrollLoop(
-        L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, OnlyWhenForced,
-        ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
-        ProvidedUpperBound, ProvidedAllowPeeling);
+        L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr,
+        PreserveLCSSA, OptLevel, OnlyWhenForced,
+        ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial,
+        ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling);
 
     if (Result == LoopUnrollResult::FullyUnrolled)
       LPM.markLoopAsDeleted(*L);
@@ -1203,14 +1236,14 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
 
 Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
-                                 int Threshold, int Count, int AllowPartial,
-                                 int Runtime, int UpperBound,
+                                 bool ForgetAllSCEV, int Threshold, int Count,
+                                 int AllowPartial, int Runtime, int UpperBound,
                                  int AllowPeeling) {
   // TODO: It would make more sense for this function to take the optionals
   // directly, but that's dangerous since it would silently break out of tree
   // callers.
   return new LoopUnroll(
-      OptLevel, OnlyWhenForced,
+      OptLevel, OnlyWhenForced, ForgetAllSCEV,
       Threshold == -1 ? None : Optional<unsigned>(Threshold),
       Count == -1 ? None : Optional<unsigned>(Count),
       AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
@@ -1219,8 +1252,10 @@ Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
       AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling));
 }
 
-Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced) {
-  return createLoopUnrollPass(OptLevel, OnlyWhenForced, -1, -1, 0, 0, 0, 0);
+Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
+                                       bool ForgetAllSCEV) {
+  return createLoopUnrollPass(OptLevel, OnlyWhenForced, ForgetAllSCEV, -1, -1,
+                              0, 0, 0, 0);
 }
 
 PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
@@ -1250,8 +1285,9 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
 
   bool Changed =
       tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
+                      /*BFI*/ nullptr, /*PSI*/ nullptr,
                       /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced,
-                      /*Count*/ None,
+                      ForgetSCEV, /*Count*/ None,
                       /*Threshold*/ None, /*AllowPartial*/ false,
                       /*Runtime*/ false, /*UpperBound*/ false,
                       /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified;
@@ -1352,6 +1388,8 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
       AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
   ProfileSummaryInfo *PSI =
       MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+      &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
 
   bool Changed = false;
 
@@ -1361,7 +1399,8 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
   // will simplify all loops, regardless of whether anything end up being
   // unrolled.
   for (auto &L : LI) {
-    Changed |= simplifyLoop(L, &DT, &LI, &SE, &AC, false /* PreserveLCSSA */);
+    Changed |=
+        simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
     Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
   }
 
@@ -1387,9 +1426,9 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
     // The API here is quite complex to call and we allow to select some
     // flavors of unrolling during construction time (by setting UnrollOpts).
     LoopUnrollResult Result = tryToUnrollLoop(
-        &L, DT, &LI, SE, TTI, AC, ORE,
+        &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
         /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
-        /*Count*/ None,
+        UnrollOpts.ForgetSCEV, /*Count*/ None,
         /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
         UnrollOpts.AllowUpperBound, LocalAllowPeeling);
     Changed |= Result != LoopUnrollResult::Unmodified;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 4a089dfa7dbf..b5b8e720069c 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -1,9 +1,8 @@
 //===- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -658,7 +657,7 @@ bool LoopUnswitch::processCurrentLoop() {
   }
 
   // Do not do non-trivial unswitch while optimizing for size.
-  // FIXME: Use Function::optForSize().
+  // FIXME: Use Function::hasOptSize().
   if (OptimizeForSize ||
       loopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize))
     return false;
@@ -1405,8 +1404,8 @@ static void RemoveFromWorklist(Instruction *I,
 /// When we find that I really equals V, remove I from the
 /// program, replacing all uses with V and update the worklist.
 static void ReplaceUsesOfWith(Instruction *I, Value *V,
-                              std::vector<Instruction*> &Worklist,
-                              Loop *L, LPPassManager *LPM) {
+                              std::vector<Instruction *> &Worklist, Loop *L,
+                              LPPassManager *LPM, MemorySSAUpdater *MSSAU) {
   LLVM_DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n");
 
   // Add uses to the worklist, which may be dead now.
@@ -1420,8 +1419,11 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
   LPM->deleteSimpleAnalysisValue(I, L);
   RemoveFromWorklist(I, Worklist);
   I->replaceAllUsesWith(V);
-  if (!I->mayHaveSideEffects())
+  if (!I->mayHaveSideEffects()) {
+    if (MSSAU)
+      MSSAU->removeMemoryAccess(I);
     I->eraseFromParent();
+  }
   ++NumSimplify;
 }
 
@@ -1548,8 +1550,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
                        ConstantInt::getTrue(Context), NewSISucc);
     // Release the PHI operands for this edge.
     for (PHINode &PN : NewSISucc->phis())
-      PN.setIncomingValue(PN.getBasicBlockIndex(Switch),
-                          UndefValue::get(PN.getType()));
+      PN.setIncomingValueForBlock(Switch, UndefValue::get(PN.getType()));
     // Tell the domtree about the new block. We don't fully update the
     // domtree here -- instead we force it to do a full recomputation
     // after the pass is complete -- but we do need to inform it of
@@ -1596,7 +1597,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
     // 'false'.  TODO: update the domtree properly so we can pass it here.
     if (Value *V = SimplifyInstruction(I, DL))
       if (LI->replacementPreservesLCSSAForm(I, V)) {
-        ReplaceUsesOfWith(I, V, Worklist, L, LPM);
+        ReplaceUsesOfWith(I, V, Worklist, L, LPM, MSSAU.get());
         continue;
       }
 
@@ -1616,7 +1617,8 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
 
         // Resolve any single entry PHI nodes in Succ.
         while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
-          ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
+          ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM,
+                            MSSAU.get());
 
         // If Succ has any successors with PHI nodes, update them to have
         // entries coming from Pred instead of Succ.
diff --git a/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 83861b98fbd8..896dd8bcb922 100644
--- a/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -1,9 +1,8 @@
 //===- LoopVersioningLICM.cpp - LICM Loop Versioning ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -357,14 +356,22 @@ bool LoopVersioningLICM::legalLoopMemoryAccesses() {
 /// 1) Check all load store in loop body are non atomic & non volatile.
 /// 2) Check function call safety, by ensuring its not accessing memory.
 /// 3) Loop body shouldn't have any may throw instruction.
+/// 4) Loop body shouldn't have any convergent or noduplicate instructions.
 bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) {
   assert(I != nullptr && "Null instruction found!");
   // Check function call safety
-  if (auto *Call = dyn_cast<CallBase>(I))
+  if (auto *Call = dyn_cast<CallBase>(I)) {
+    if (Call->isConvergent() || Call->cannotDuplicate()) {
+      LLVM_DEBUG(dbgs() << "    Convergent call site found.\n");
+      return false;
+    }
+
     if (!AA->doesNotAccessMemory(Call)) {
       LLVM_DEBUG(dbgs() << "    Unsafe call site found.\n");
       return false;
     }
+  }
+
   // Avoid loops with possiblity of throw
   if (I->mayThrow()) {
     LLVM_DEBUG(dbgs() << "    May throw instruction found in loop body\n");
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index c165c5ece95c..e076424d9042 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -1,9 +1,8 @@
 //===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -27,7 +26,7 @@ static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
   Value *Cmp = CXI->getCompareOperand();
   Value *Val = CXI->getNewValOperand();
 
-  LoadInst *Orig = Builder.CreateLoad(Ptr);
+  LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr);
   Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
   Value *Res = Builder.CreateSelect(Equal, Val, Orig);
   Builder.CreateStore(Res, Ptr);
@@ -45,7 +44,7 @@ static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
   Value *Ptr = RMWI->getPointerOperand();
   Value *Val = RMWI->getValOperand();
 
-  LoadInst *Orig = Builder.CreateLoad(Ptr);
+  LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr);
   Value *Res = nullptr;
 
   switch (RMWI->getOperation()) {
@@ -87,6 +86,12 @@ static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
     Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
                                Orig, Val);
     break;
+  case AtomicRMWInst::FAdd:
+    Res = Builder.CreateFAdd(Orig, Val);
+    break;
+  case AtomicRMWInst::FSub:
+    Res = Builder.CreateFSub(Orig, Val);
+    break;
   }
   Builder.CreateStore(Res, Ptr);
   RMWI->replaceAllUsesWith(Orig);
diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 68bfa0030395..0d67c0d740ec 100644
--- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -1,9 +1,8 @@
 //===- LowerExpectIntrinsic.cpp - Lower expect intrinsic ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp b/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
index 4867b33d671f..9489e01774d6 100644
--- a/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
@@ -1,9 +1,8 @@
 //===- LowerGuardIntrinsic.cpp - Lower the guard intrinsic ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Scalar/LowerWidenableCondition.cpp b/lib/Transforms/Scalar/LowerWidenableCondition.cpp
new file mode 100644
index 000000000000..5342f2ddcb6b
--- /dev/null
+++ b/lib/Transforms/Scalar/LowerWidenableCondition.cpp
@@ -0,0 +1,85 @@
+//===- LowerWidenableCondition.cpp - Lower the guard intrinsic ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers the llvm.widenable.condition intrinsic to default value
+// which is i1 true.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LowerWidenableCondition.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/GuardUtils.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/GuardUtils.h"
+
+using namespace llvm;
+
+namespace {
+struct LowerWidenableConditionLegacyPass : public FunctionPass {
+  static char ID;
+  LowerWidenableConditionLegacyPass() : FunctionPass(ID) {
+    initializeLowerWidenableConditionLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override;
+};
+}
+
+static bool lowerWidenableCondition(Function &F) {
+  // Check if we can cheaply rule out the possibility of not having any work to
+  // do.
+  auto *WCDecl = F.getParent()->getFunction(
+      Intrinsic::getName(Intrinsic::experimental_widenable_condition));
+  if (!WCDecl || WCDecl->use_empty())
+    return false;
+
+  using namespace llvm::PatternMatch;
+  SmallVector<CallInst *, 8> ToLower;
+  for (auto &I : instructions(F))
+    if (match(&I, m_Intrinsic<Intrinsic::experimental_widenable_condition>()))
+      ToLower.push_back(cast<CallInst>(&I));
+
+  if (ToLower.empty())
+    return false;
+
+  for (auto *CI : ToLower) {
+    CI->replaceAllUsesWith(ConstantInt::getTrue(CI->getContext()));
+    CI->eraseFromParent();
+  }
+  return true;
+}
+
+bool LowerWidenableConditionLegacyPass::runOnFunction(Function &F) {
+  return lowerWidenableCondition(F);
+}
+
+char LowerWidenableConditionLegacyPass::ID = 0;
+INITIALIZE_PASS(LowerWidenableConditionLegacyPass, "lower-widenable-condition",
+                "Lower the widenable condition to default true value", false,
+                false)
+
+Pass *llvm::createLowerWidenableConditionPass() {
+  return new LowerWidenableConditionLegacyPass();
+}
+
+PreservedAnalyses LowerWidenableConditionPass::run(Function &F,
+                                               FunctionAnalysisManager &AM) {
+  if (lowerWidenableCondition(F))
+    return PreservedAnalyses::none();
+
+  return PreservedAnalyses::all();
+}
diff --git a/lib/Transforms/Scalar/MakeGuardsExplicit.cpp b/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
index 1ba3994eba0e..789232e0f5ce 100644
--- a/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
+++ b/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
@@ -1,9 +1,8 @@
 //===- MakeGuardsExplicit.cpp - Turn guard intrinsics into guard branches -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index ced923d6973d..5a055139be4f 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1,9 +1,8 @@
 //===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -279,8 +278,8 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
                             unsigned Alignment, Instruction *Inst) {
   int64_t End = Start+Size;
 
-  range_iterator I = std::lower_bound(Ranges.begin(), Ranges.end(), Start,
-    [](const MemsetRange &LHS, int64_t RHS) { return LHS.End < RHS; });
+  range_iterator I = partition_point(
+      Ranges, [=](const MemsetRange &O) { return O.End < Start; });
 
   // We now know that I == E, in which case we didn't find anything to merge
   // with, or that Start <= I->End.  If End < I->Start or I == E, then we need
@@ -413,7 +412,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
       if (!NextStore->isSimple()) break;
 
       // Check to see if this stored value is of the same byte-splattable value.
-      Value *StoredByte = isBytewiseValue(NextStore->getOperand(0));
+      Value *StoredByte = isBytewiseValue(NextStore->getOperand(0), DL);
       if (isa<UndefValue>(ByteVal) && StoredByte)
         ByteVal = StoredByte;
       if (ByteVal != StoredByte)
@@ -750,7 +749,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
   // byte at a time like "0" or "-1" or any width, as well as things like
   // 0xA0A0A0A0 and 0.0.
   auto *V = SI->getOperand(0);
-  if (Value *ByteVal = isBytewiseValue(V)) {
+  if (Value *ByteVal = isBytewiseValue(V, DL)) {
     if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
                                               ByteVal)) {
       BBI = I->getIterator(); // Don't invalidate iterator.
@@ -1135,8 +1134,10 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
   Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
   Value *MemsetLen = Builder.CreateSelect(
       Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
-  Builder.CreateMemSet(Builder.CreateGEP(Dest, SrcSize), MemSet->getOperand(1),
-                       MemsetLen, Align);
+  Builder.CreateMemSet(
+      Builder.CreateGEP(Dest->getType()->getPointerElementType(), Dest,
+                        SrcSize),
+      MemSet->getOperand(1), MemsetLen, Align);
 
   MD->removeInstruction(MemSet);
   MemSet->eraseFromParent();
@@ -1228,7 +1229,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
   // If copying from a constant, try to turn the memcpy into a memset.
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
     if (GV->isConstant() && GV->hasDefinitiveInitializer())
-      if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
+      if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
+                                           M->getModule()->getDataLayout())) {
         IRBuilder<> Builder(M);
         Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
                              M->getDestAlignment(), false);
diff --git a/lib/Transforms/Scalar/MergeICmps.cpp b/lib/Transforms/Scalar/MergeICmps.cpp
index 69fd8b163a07..3d047a193267 100644
--- a/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/lib/Transforms/Scalar/MergeICmps.cpp
@@ -1,9 +1,8 @@
 //===- MergeICmps.cpp - Optimize chains of integer comparisons ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -11,29 +10,54 @@
 // later typically inlined as a chain of efficient hardware comparisons). This
 // typically benefits c++ member or nonmember operator==().
 //
-// The basic idea is to replace a larger chain of integer comparisons loaded
-// from contiguous memory locations into a smaller chain of such integer
+// The basic idea is to replace a longer chain of integer comparisons loaded
+// from contiguous memory locations into a shorter chain of larger integer
 // comparisons. Benefits are double:
 //  - There are less jumps, and therefore less opportunities for mispredictions
 //    and I-cache misses.
 //  - Code size is smaller, both because jumps are removed and because the
 //    encoding of a 2*n byte compare is smaller than that of two n-byte
 //    compares.
-
+//
+// Example:
+//
+//  struct S {
+//    int a;
+//    char b;
+//    char c;
+//    uint16_t d;
+//    bool operator==(const S& o) const {
+//      return a == o.a && b == o.b && c == o.c && d == o.d;
+//    }
+//  };
+//
+//  Is optimized as :
+//
+//    bool S::operator==(const S& o) const {
+//      return memcmp(this, &o, 8) == 0;
+//    }
+//
+//  Which will later be expanded (ExpandMemCmp) as a single 8-bytes icmp.
+//
 //===----------------------------------------------------------------------===//
 
-#include <algorithm>
-#include <numeric>
-#include <utility>
-#include <vector>
+#include "llvm/Transforms/Scalar/MergeICmps.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include <algorithm>
+#include <numeric>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 
@@ -50,76 +74,109 @@ static bool isSimpleLoadOrStore(const Instruction *I) {
   return false;
 }
 
-// A BCE atom.
+// A BCE atom "Binary Compare Expression Atom" represents an integer load
+// that is a constant offset from a base value, e.g. `a` or `o.c` in the example
+// at the top.
 struct BCEAtom {
-  BCEAtom() : GEP(nullptr), LoadI(nullptr), Offset() {}
-
-  const Value *Base() const { return GEP ? GEP->getPointerOperand() : nullptr; }
-
+  BCEAtom() = default;
+  BCEAtom(GetElementPtrInst *GEP, LoadInst *LoadI, int BaseId, APInt Offset)
+      : GEP(GEP), LoadI(LoadI), BaseId(BaseId), Offset(Offset) {}
+
+  BCEAtom(const BCEAtom &) = delete;
+  BCEAtom &operator=(const BCEAtom &) = delete;
+
+  BCEAtom(BCEAtom &&that) = default;
+  BCEAtom &operator=(BCEAtom &&that) {
+    if (this == &that)
+      return *this;
+    GEP = that.GEP;
+    LoadI = that.LoadI;
+    BaseId = that.BaseId;
+    Offset = std::move(that.Offset);
+    return *this;
+  }
+
+  // We want to order BCEAtoms by (Base, Offset). However we cannot use
+  // the pointer values for Base because these are non-deterministic.
+  // To make sure that the sort order is stable, we first assign to each atom
+  // base value an index based on its order of appearance in the chain of
+  // comparisons. We call this index `BaseOrdering`. For example, for:
+  //    b[3] == c[2] && a[1] == d[1] && b[4] == c[3]
+  //    |  block 1 |    |  block 2 |    |  block 3 |
+  // b gets assigned index 0 and a index 1, because b appears as LHS in block 1,
+  // which is before block 2.
+  // We then sort by (BaseOrdering[LHS.Base()], LHS.Offset), which is stable.
   bool operator<(const BCEAtom &O) const {
-    assert(Base() && "invalid atom");
-    assert(O.Base() && "invalid atom");
-    // Just ordering by (Base(), Offset) is sufficient. However because this
-    // means that the ordering will depend on the addresses of the base
-    // values, which are not reproducible from run to run. To guarantee
-    // stability, we use the names of the values if they exist; we sort by:
-    // (Base.getName(), Base(), Offset).
-    const int NameCmp = Base()->getName().compare(O.Base()->getName());
-    if (NameCmp == 0) {
-      if (Base() == O.Base()) {
-        return Offset.slt(O.Offset);
-      }
-      return Base() < O.Base();
-    }
-    return NameCmp < 0;
+    return BaseId != O.BaseId ? BaseId < O.BaseId : Offset.slt(O.Offset);
   }
 
-  GetElementPtrInst *GEP;
-  LoadInst *LoadI;
+  GetElementPtrInst *GEP = nullptr;
+  LoadInst *LoadI = nullptr;
+  unsigned BaseId = 0;
   APInt Offset;
 };
 
+// A class that assigns increasing ids to values in the order in which they are
+// seen. See comment in `BCEAtom::operator<()``.
+class BaseIdentifier {
+public:
+  // Returns the id for value `Base`, after assigning one if `Base` has not been
+  // seen before.
+  int getBaseId(const Value *Base) {
+    assert(Base && "invalid base");
+    const auto Insertion = BaseToIndex.try_emplace(Base, Order);
+    if (Insertion.second)
+      ++Order;
+    return Insertion.first->second;
+  }
+
+private:
+  unsigned Order = 1;
+  DenseMap<const Value*, int> BaseToIndex;
+};
+
 // If this value is a load from a constant offset w.r.t. a base address, and
 // there are no other users of the load or address, returns the base address and
 // the offset.
-BCEAtom visitICmpLoadOperand(Value *const Val) {
-  BCEAtom Result;
-  if (auto *const LoadI = dyn_cast<LoadInst>(Val)) {
-    LLVM_DEBUG(dbgs() << "load\n");
-    if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
-      LLVM_DEBUG(dbgs() << "used outside of block\n");
-      return {};
-    }
-    // Do not optimize atomic loads to non-atomic memcmp
-    if (!LoadI->isSimple()) {
-      LLVM_DEBUG(dbgs() << "volatile or atomic\n");
-      return {};
-    }
-    Value *const Addr = LoadI->getOperand(0);
-    if (auto *const GEP = dyn_cast<GetElementPtrInst>(Addr)) {
-      LLVM_DEBUG(dbgs() << "GEP\n");
-      if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
-        LLVM_DEBUG(dbgs() << "used outside of block\n");
-        return {};
-      }
-      const auto &DL = GEP->getModule()->getDataLayout();
-      if (!isDereferenceablePointer(GEP, DL)) {
-        LLVM_DEBUG(dbgs() << "not dereferenceable\n");
-        // We need to make sure that we can do comparison in any order, so we
-        // require memory to be unconditionnally dereferencable.
-        return {};
-      }
-      Result.Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0);
-      if (GEP->accumulateConstantOffset(DL, Result.Offset)) {
-        Result.GEP = GEP;
-        Result.LoadI = LoadI;
-      }
-    }
+BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) {
+  auto *const LoadI = dyn_cast<LoadInst>(Val);
+  if (!LoadI)
+    return {};
+  LLVM_DEBUG(dbgs() << "load\n");
+  if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
+    LLVM_DEBUG(dbgs() << "used outside of block\n");
+    return {};
+  }
+  // Do not optimize atomic loads to non-atomic memcmp
+  if (!LoadI->isSimple()) {
+    LLVM_DEBUG(dbgs() << "volatile or atomic\n");
+    return {};
   }
-  return Result;
+  Value *const Addr = LoadI->getOperand(0);
+  auto *const GEP = dyn_cast<GetElementPtrInst>(Addr);
+  if (!GEP)
+    return {};
+  LLVM_DEBUG(dbgs() << "GEP\n");
+  if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
+    LLVM_DEBUG(dbgs() << "used outside of block\n");
+    return {};
+  }
+  const auto &DL = GEP->getModule()->getDataLayout();
+  if (!isDereferenceablePointer(GEP, LoadI->getType(), DL)) {
+    LLVM_DEBUG(dbgs() << "not dereferenceable\n");
+    // We need to make sure that we can do comparison in any order, so we
+    // require memory to be unconditionnally dereferencable.
+    return {};
+  }
+  APInt Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0);
+  if (!GEP->accumulateConstantOffset(DL, Offset))
+    return {};
+  return BCEAtom(GEP, LoadI, BaseId.getBaseId(GEP->getPointerOperand()),
+                 Offset);
 }
 
-// A basic block with a comparison between two BCE atoms.
+// A basic block with a comparison between two BCE atoms, e.g. `a == o.a` in the
+// example at the top.
 // The block might do extra work besides the atom comparison, in which case
 // doesOtherWork() returns true. Under some conditions, the block can be
 // split into the atom comparison part and the "other work" part
@@ -133,13 +190,11 @@ class BCECmpBlock {
   BCECmpBlock() {}
 
   BCECmpBlock(BCEAtom L, BCEAtom R, int SizeBits)
-      : Lhs_(L), Rhs_(R), SizeBits_(SizeBits) {
+      : Lhs_(std::move(L)), Rhs_(std::move(R)), SizeBits_(SizeBits) {
     if (Rhs_ < Lhs_) std::swap(Rhs_, Lhs_);
   }
 
-  bool IsValid() const {
-    return Lhs_.Base() != nullptr && Rhs_.Base() != nullptr;
-  }
+  bool IsValid() const { return Lhs_.BaseId != 0 && Rhs_.BaseId != 0; }
 
   // Assert the block is consistent: If valid, it should also have
   // non-null members besides Lhs_ and Rhs_.
@@ -160,19 +215,19 @@ class BCECmpBlock {
 
   // Returns true if the non-BCE-cmp instructions can be separated from BCE-cmp
   // instructions in the block.
-  bool canSplit(AliasAnalysis *AA) const;
+  bool canSplit(AliasAnalysis &AA) const;
 
   // Return true if this all the relevant instructions in the BCE-cmp-block can
   // be sunk below this instruction. By doing this, we know we can separate the
   // BCE-cmp-block instructions from the non-BCE-cmp-block instructions in the
   // block.
   bool canSinkBCECmpInst(const Instruction *, DenseSet<Instruction *> &,
-                         AliasAnalysis *AA) const;
+                         AliasAnalysis &AA) const;
 
   // We can separate the BCE-cmp-block instructions and the non-BCE-cmp-block
   // instructions. Split the old block and move all non-BCE-cmp-insts into the
   // new parent block.
-  void split(BasicBlock *NewParent, AliasAnalysis *AA) const;
+  void split(BasicBlock *NewParent, AliasAnalysis &AA) const;
 
   // The basic block where this comparison happens.
   BasicBlock *BB = nullptr;
@@ -191,7 +246,7 @@ private:
 
 bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
                                     DenseSet<Instruction *> &BlockInsts,
-                                    AliasAnalysis *AA) const {
+                                    AliasAnalysis &AA) const {
   // If this instruction has side effects and its in middle of the BCE cmp block
   // instructions, then bail for now.
   if (Inst->mayHaveSideEffects()) {
@@ -201,9 +256,9 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
     // Disallow stores that might alias the BCE operands
     MemoryLocation LLoc = MemoryLocation::get(Lhs_.LoadI);
     MemoryLocation RLoc = MemoryLocation::get(Rhs_.LoadI);
-    if (isModSet(AA->getModRefInfo(Inst, LLoc)) ||
-        isModSet(AA->getModRefInfo(Inst, RLoc)))
-        return false;
+    if (isModSet(AA.getModRefInfo(Inst, LLoc)) ||
+        isModSet(AA.getModRefInfo(Inst, RLoc)))
+      return false;
   }
   // Make sure this instruction does not use any of the BCE cmp block
   // instructions as operand.
@@ -214,7 +269,7 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
   return true;
 }
 
-void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis *AA) const {
+void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis &AA) const {
   DenseSet<Instruction *> BlockInsts(
       {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
   llvm::SmallVector<Instruction *, 4> OtherInsts;
@@ -234,7 +289,7 @@ void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis *AA) const {
   }
 }
 
-bool BCECmpBlock::canSplit(AliasAnalysis *AA) const {
+bool BCECmpBlock::canSplit(AliasAnalysis &AA) const {
   DenseSet<Instruction *> BlockInsts(
       {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
   for (Instruction &Inst : *BB) {
@@ -265,7 +320,8 @@ bool BCECmpBlock::doesOtherWork() const {
 // Visit the given comparison. If this is a comparison between two valid
 // BCE atoms, returns the comparison.
 BCECmpBlock visitICmp(const ICmpInst *const CmpI,
-                      const ICmpInst::Predicate ExpectedPredicate) {
+                      const ICmpInst::Predicate ExpectedPredicate,
+                      BaseIdentifier &BaseId) {
   // The comparison can only be used once:
   //  - For intermediate blocks, as a branch condition.
   //  - For the final block, as an incoming value for the Phi.
@@ -275,25 +331,27 @@ BCECmpBlock visitICmp(const ICmpInst *const CmpI,
     LLVM_DEBUG(dbgs() << "cmp has several uses\n");
     return {};
   }
-  if (CmpI->getPredicate() == ExpectedPredicate) {
-    LLVM_DEBUG(dbgs() << "cmp "
-                      << (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne")
-                      << "\n");
-    auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0));
-    if (!Lhs.Base()) return {};
-    auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1));
-    if (!Rhs.Base()) return {};
-    const auto &DL = CmpI->getModule()->getDataLayout();
-    return BCECmpBlock(std::move(Lhs), std::move(Rhs),
-                       DL.getTypeSizeInBits(CmpI->getOperand(0)->getType()));
-  }
-  return {};
+  if (CmpI->getPredicate() != ExpectedPredicate)
+    return {};
+  LLVM_DEBUG(dbgs() << "cmp "
+                    << (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne")
+                    << "\n");
+  auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0), BaseId);
+  if (!Lhs.BaseId)
+    return {};
+  auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1), BaseId);
+  if (!Rhs.BaseId)
+    return {};
+  const auto &DL = CmpI->getModule()->getDataLayout();
+  return BCECmpBlock(std::move(Lhs), std::move(Rhs),
+                     DL.getTypeSizeInBits(CmpI->getOperand(0)->getType()));
 }
 
 // Visit the given comparison block. If this is a comparison between two valid
 // BCE atoms, returns the comparison.
 BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
-                          const BasicBlock *const PhiBlock) {
+                          const BasicBlock *const PhiBlock,
+                          BaseIdentifier &BaseId) {
   if (Block->empty()) return {};
   auto *const BranchI = dyn_cast<BranchInst>(Block->getTerminator());
   if (!BranchI) return {};
@@ -306,7 +364,7 @@ BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
     auto *const CmpI = dyn_cast<ICmpInst>(Val);
     if (!CmpI) return {};
     LLVM_DEBUG(dbgs() << "icmp\n");
-    auto Result = visitICmp(CmpI, ICmpInst::ICMP_EQ);
+    auto Result = visitICmp(CmpI, ICmpInst::ICMP_EQ, BaseId);
     Result.CmpI = CmpI;
     Result.BranchI = BranchI;
     return Result;
@@ -323,7 +381,8 @@ BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
     assert(BranchI->getNumSuccessors() == 2 && "expecting a cond branch");
     BasicBlock *const FalseBlock = BranchI->getSuccessor(1);
     auto Result = visitICmp(
-        CmpI, FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE);
+        CmpI, FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
+        BaseId);
     Result.CmpI = CmpI;
     Result.BranchI = BranchI;
     return Result;
@@ -332,47 +391,41 @@ BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
 }
 
 static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons,
-                                BCECmpBlock &Comparison) {
+                                BCECmpBlock &&Comparison) {
   LLVM_DEBUG(dbgs() << "Block '" << Comparison.BB->getName()
                     << "': Found cmp of " << Comparison.SizeBits()
-                    << " bits between " << Comparison.Lhs().Base() << " + "
+                    << " bits between " << Comparison.Lhs().BaseId << " + "
                     << Comparison.Lhs().Offset << " and "
-                    << Comparison.Rhs().Base() << " + "
+                    << Comparison.Rhs().BaseId << " + "
                     << Comparison.Rhs().Offset << "\n");
   LLVM_DEBUG(dbgs() << "\n");
-  Comparisons.push_back(Comparison);
+  Comparisons.push_back(std::move(Comparison));
 }
 
 // A chain of comparisons.
 class BCECmpChain {
  public:
-  BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
-              AliasAnalysis *AA);
+   BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
+               AliasAnalysis &AA);
 
-  int size() const { return Comparisons_.size(); }
+   int size() const { return Comparisons_.size(); }
 
 #ifdef MERGEICMPS_DOT_ON
   void dump() const;
 #endif  // MERGEICMPS_DOT_ON
 
-  bool simplify(const TargetLibraryInfo *const TLI, AliasAnalysis *AA);
+  bool simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
+                DomTreeUpdater &DTU);
 
- private:
+private:
   static bool IsContiguous(const BCECmpBlock &First,
                            const BCECmpBlock &Second) {
-    return First.Lhs().Base() == Second.Lhs().Base() &&
-           First.Rhs().Base() == Second.Rhs().Base() &&
+    return First.Lhs().BaseId == Second.Lhs().BaseId &&
+           First.Rhs().BaseId == Second.Rhs().BaseId &&
            First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
            First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
   }
 
-  // Merges the given comparison blocks into one memcmp block and update
-  // branches. Comparisons are assumed to be continguous. If NextBBInChain is
-  // null, the merged block will link to the phi block.
-  void mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
-                        BasicBlock *const NextBBInChain, PHINode &Phi,
-                        const TargetLibraryInfo *const TLI, AliasAnalysis *AA);
-
   PHINode &Phi_;
   std::vector<BCECmpBlock> Comparisons_;
   // The original entry block (before sorting);
@@ -380,16 +433,17 @@ class BCECmpChain {
 };
 
 BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
-                         AliasAnalysis *AA)
+                         AliasAnalysis &AA)
     : Phi_(Phi) {
   assert(!Blocks.empty() && "a chain should have at least one block");
   // Now look inside blocks to check for BCE comparisons.
   std::vector<BCECmpBlock> Comparisons;
+  BaseIdentifier BaseId;
   for (size_t BlockIdx = 0; BlockIdx < Blocks.size(); ++BlockIdx) {
     BasicBlock *const Block = Blocks[BlockIdx];
     assert(Block && "invalid block");
     BCECmpBlock Comparison = visitCmpBlock(Phi.getIncomingValueForBlock(Block),
-                                           Block, Phi.getParent());
+                                           Block, Phi.getParent(), BaseId);
     Comparison.BB = Block;
     if (!Comparison.IsValid()) {
       LLVM_DEBUG(dbgs() << "chain with invalid BCECmpBlock, no merge.\n");
@@ -411,13 +465,13 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
         // chain before sorting. Unless we can abort the chain at this point
         // and start anew.
         //
-        // NOTE: we only handle block with single predecessor for now.
+        // NOTE: we only handle blocks a with single predecessor for now.
         if (Comparison.canSplit(AA)) {
           LLVM_DEBUG(dbgs()
                      << "Split initial block '" << Comparison.BB->getName()
                      << "' that does extra work besides compare\n");
           Comparison.RequireSplit = true;
-          enqueueBlock(Comparisons, Comparison);
+          enqueueBlock(Comparisons, std::move(Comparison));
         } else {
           LLVM_DEBUG(dbgs()
                      << "ignoring initial block '" << Comparison.BB->getName()
@@ -450,7 +504,7 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
       // We could still merge bb1 and bb2 though.
       return;
     }
-    enqueueBlock(Comparisons, Comparison);
+    enqueueBlock(Comparisons, std::move(Comparison));
   }
 
   // It is possible we have no suitable comparison to merge.
@@ -466,9 +520,11 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
 #endif  // MERGEICMPS_DOT_ON
   // Reorder blocks by LHS. We can do that without changing the
   // semantics because we are only accessing dereferencable memory.
-  llvm::sort(Comparisons_, [](const BCECmpBlock &a, const BCECmpBlock &b) {
-    return a.Lhs() < b.Lhs();
-  });
+  llvm::sort(Comparisons_,
+             [](const BCECmpBlock &LhsBlock, const BCECmpBlock &RhsBlock) {
+               return std::tie(LhsBlock.Lhs(), LhsBlock.Rhs()) <
+                      std::tie(RhsBlock.Lhs(), RhsBlock.Rhs());
+             });
 #ifdef MERGEICMPS_DOT_ON
   errs() << "AFTER REORDERING:\n\n";
   dump();
@@ -498,162 +554,205 @@ void BCECmpChain::dump() const {
 }
 #endif  // MERGEICMPS_DOT_ON
 
-bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI,
-                           AliasAnalysis *AA) {
-  // First pass to check if there is at least one merge. If not, we don't do
-  // anything and we keep analysis passes intact.
-  {
-    bool AtLeastOneMerged = false;
-    for (size_t I = 1; I < Comparisons_.size(); ++I) {
-      if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) {
-        AtLeastOneMerged = true;
-        break;
+namespace {
+
+// A class to compute the name of a set of merged basic blocks.
+// This is optimized for the common case of no block names.
+class MergedBlockName {
+  // Storage for the uncommon case of several named blocks.
+  SmallString<16> Scratch;
+
+public:
+  explicit MergedBlockName(ArrayRef<BCECmpBlock> Comparisons)
+      : Name(makeName(Comparisons)) {}
+  const StringRef Name;
+
+private:
+  StringRef makeName(ArrayRef<BCECmpBlock> Comparisons) {
+    assert(!Comparisons.empty() && "no basic block");
+    // Fast path: only one block, or no names at all.
+    if (Comparisons.size() == 1)
+      return Comparisons[0].BB->getName();
+    const int size = std::accumulate(Comparisons.begin(), Comparisons.end(), 0,
+                                     [](int i, const BCECmpBlock &Cmp) {
+                                       return i + Cmp.BB->getName().size();
+                                     });
+    if (size == 0)
+      return StringRef("", 0);
+
+    // Slow path: at least two blocks, at least one block with a name.
+    Scratch.clear();
+    // We'll have `size` bytes for name and `Comparisons.size() - 1` bytes for
+    // separators.
+    Scratch.reserve(size + Comparisons.size() - 1);
+    const auto append = [this](StringRef str) {
+      Scratch.append(str.begin(), str.end());
+    };
+    append(Comparisons[0].BB->getName());
+    for (int I = 1, E = Comparisons.size(); I < E; ++I) {
+      const BasicBlock *const BB = Comparisons[I].BB;
+      if (!BB->getName().empty()) {
+        append("+");
+        append(BB->getName());
       }
     }
-    if (!AtLeastOneMerged) return false;
+    return StringRef(Scratch);
   }
+};
+} // namespace
+
+// Merges the given contiguous comparison blocks into one memcmp block.
+static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
+                                    BasicBlock *const InsertBefore,
+                                    BasicBlock *const NextCmpBlock,
+                                    PHINode &Phi, const TargetLibraryInfo &TLI,
+                                    AliasAnalysis &AA, DomTreeUpdater &DTU) {
+  assert(!Comparisons.empty() && "merging zero comparisons");
+  LLVMContext &Context = NextCmpBlock->getContext();
+  const BCECmpBlock &FirstCmp = Comparisons[0];
+
+  // Create a new cmp block before next cmp block.
+  BasicBlock *const BB =
+      BasicBlock::Create(Context, MergedBlockName(Comparisons).Name,
+                         NextCmpBlock->getParent(), InsertBefore);
+  IRBuilder<> Builder(BB);
+  // Add the GEPs from the first BCECmpBlock.
+  Value *const Lhs = Builder.Insert(FirstCmp.Lhs().GEP->clone());
+  Value *const Rhs = Builder.Insert(FirstCmp.Rhs().GEP->clone());
+
+  Value *IsEqual = nullptr;
+  LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons -> "
+                    << BB->getName() << "\n");
+  if (Comparisons.size() == 1) {
+    LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n");
+    Value *const LhsLoad =
+        Builder.CreateLoad(FirstCmp.Lhs().LoadI->getType(), Lhs);
+    Value *const RhsLoad =
+        Builder.CreateLoad(FirstCmp.Rhs().LoadI->getType(), Rhs);
+    // There are no blocks to merge, just do the comparison.
+    IsEqual = Builder.CreateICmpEQ(LhsLoad, RhsLoad);
+  } else {
+    // If there is one block that requires splitting, we do it now, i.e.
+    // just before we know we will collapse the chain. The instructions
+    // can be executed before any of the instructions in the chain.
+    const auto ToSplit =
+        std::find_if(Comparisons.begin(), Comparisons.end(),
+                     [](const BCECmpBlock &B) { return B.RequireSplit; });
+    if (ToSplit != Comparisons.end()) {
+      LLVM_DEBUG(dbgs() << "Splitting non_BCE work to header\n");
+      ToSplit->split(BB, AA);
+    }
 
-  // Remove phi references to comparison blocks, they will be rebuilt as we
-  // merge the blocks.
-  for (const auto &Comparison : Comparisons_) {
-    Phi_.removeIncomingValue(Comparison.BB, false);
-  }
+    const unsigned TotalSizeBits = std::accumulate(
+        Comparisons.begin(), Comparisons.end(), 0u,
+        [](int Size, const BCECmpBlock &C) { return Size + C.SizeBits(); });
 
-  // If entry block is part of the chain, we need to make the first block
-  // of the chain the new entry block of the function.
-  BasicBlock *Entry = &Comparisons_[0].BB->getParent()->getEntryBlock();
-  for (size_t I = 1; I < Comparisons_.size(); ++I) {
-    if (Entry == Comparisons_[I].BB) {
-      BasicBlock *NEntryBB = BasicBlock::Create(Entry->getContext(), "",
-                                                Entry->getParent(), Entry);
-      BranchInst::Create(Entry, NEntryBB);
-      break;
-    }
+    // Create memcmp() == 0.
+    const auto &DL = Phi.getModule()->getDataLayout();
+    Value *const MemCmpCall = emitMemCmp(
+        Lhs, Rhs,
+        ConstantInt::get(DL.getIntPtrType(Context), TotalSizeBits / 8), Builder,
+        DL, &TLI);
+    IsEqual = Builder.CreateICmpEQ(
+        MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0));
   }
 
-  // Point the predecessors of the chain to the first comparison block (which is
-  // the new entry point) and update the entry block of the chain.
-  if (EntryBlock_ != Comparisons_[0].BB) {
-    EntryBlock_->replaceAllUsesWith(Comparisons_[0].BB);
-    EntryBlock_ = Comparisons_[0].BB;
+  BasicBlock *const PhiBB = Phi.getParent();
+  // Add a branch to the next basic block in the chain.
+  if (NextCmpBlock == PhiBB) {
+    // Continue to phi, passing it the comparison result.
+    Builder.CreateBr(PhiBB);
+    Phi.addIncoming(IsEqual, BB);
+    DTU.applyUpdates({{DominatorTree::Insert, BB, PhiBB}});
+  } else {
+    // Continue to next block if equal, exit to phi else.
+    Builder.CreateCondBr(IsEqual, NextCmpBlock, PhiBB);
+    Phi.addIncoming(ConstantInt::getFalse(Context), BB);
+    DTU.applyUpdates({{DominatorTree::Insert, BB, NextCmpBlock},
+                      {DominatorTree::Insert, BB, PhiBB}});
   }
+  return BB;
+}
+
+bool BCECmpChain::simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
+                           DomTreeUpdater &DTU) {
+  assert(Comparisons_.size() >= 2 && "simplifying trivial BCECmpChain");
+  // First pass to check if there is at least one merge. If not, we don't do
+  // anything and we keep analysis passes intact.
+  const auto AtLeastOneMerged = [this]() {
+    for (size_t I = 1; I < Comparisons_.size(); ++I) {
+      if (IsContiguous(Comparisons_[I - 1], Comparisons_[I]))
+        return true;
+    }
+    return false;
+  };
+  if (!AtLeastOneMerged())
+    return false;
 
-  // Effectively merge blocks.
+  LLVM_DEBUG(dbgs() << "Simplifying comparison chain starting at block "
+                    << EntryBlock_->getName() << "\n");
+
+  // Effectively merge blocks. We go in the reverse direction from the phi block
+  // so that the next block is always available to branch to.
+  const auto mergeRange = [this, &TLI, &AA, &DTU](int I, int Num,
+                                                  BasicBlock *InsertBefore,
+                                                  BasicBlock *Next) {
+    return mergeComparisons(makeArrayRef(Comparisons_).slice(I, Num),
+                            InsertBefore, Next, Phi_, TLI, AA, DTU);
+  };
   int NumMerged = 1;
-  for (size_t I = 1; I < Comparisons_.size(); ++I) {
-    if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) {
+  BasicBlock *NextCmpBlock = Phi_.getParent();
+  for (int I = static_cast<int>(Comparisons_.size()) - 2; I >= 0; --I) {
+    if (IsContiguous(Comparisons_[I], Comparisons_[I + 1])) {
+      LLVM_DEBUG(dbgs() << "Merging block " << Comparisons_[I].BB->getName()
+                        << " into " << Comparisons_[I + 1].BB->getName()
+                        << "\n");
       ++NumMerged;
     } else {
-      // Merge all previous comparisons and start a new merge block.
-      mergeComparisons(
-          makeArrayRef(Comparisons_).slice(I - NumMerged, NumMerged),
-          Comparisons_[I].BB, Phi_, TLI, AA);
+      NextCmpBlock = mergeRange(I + 1, NumMerged, NextCmpBlock, NextCmpBlock);
       NumMerged = 1;
     }
   }
-  mergeComparisons(makeArrayRef(Comparisons_)
-                       .slice(Comparisons_.size() - NumMerged, NumMerged),
-                   nullptr, Phi_, TLI, AA);
-
-  return true;
-}
-
-void BCECmpChain::mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
-                                   BasicBlock *const NextBBInChain,
-                                   PHINode &Phi,
-                                   const TargetLibraryInfo *const TLI,
-                                   AliasAnalysis *AA) {
-  assert(!Comparisons.empty());
-  const auto &FirstComparison = *Comparisons.begin();
-  BasicBlock *const BB = FirstComparison.BB;
-  LLVMContext &Context = BB->getContext();
-
-  if (Comparisons.size() >= 2) {
-    // If there is one block that requires splitting, we do it now, i.e.
-    // just before we know we will collapse the chain. The instructions
-    // can be executed before any of the instructions in the chain.
-    auto C = std::find_if(Comparisons.begin(), Comparisons.end(),
-                          [](const BCECmpBlock &B) { return B.RequireSplit; });
-    if (C != Comparisons.end())
-      C->split(EntryBlock_, AA);
-
-    LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n");
-    const auto TotalSize =
-        std::accumulate(Comparisons.begin(), Comparisons.end(), 0,
-                        [](int Size, const BCECmpBlock &C) {
-                          return Size + C.SizeBits();
-                        }) /
-        8;
-
-    // Incoming edges do not need to be updated, and both GEPs are already
-    // computing the right address, we just need to:
-    //   - replace the two loads and the icmp with the memcmp
-    //   - update the branch
-    //   - update the incoming values in the phi.
-    FirstComparison.BranchI->eraseFromParent();
-    FirstComparison.CmpI->eraseFromParent();
-    FirstComparison.Lhs().LoadI->eraseFromParent();
-    FirstComparison.Rhs().LoadI->eraseFromParent();
-
-    IRBuilder<> Builder(BB);
-    const auto &DL = Phi.getModule()->getDataLayout();
-    Value *const MemCmpCall = emitMemCmp(
-        FirstComparison.Lhs().GEP, FirstComparison.Rhs().GEP,
-        ConstantInt::get(DL.getIntPtrType(Context), TotalSize),
-        Builder, DL, TLI);
-    Value *const MemCmpIsZero = Builder.CreateICmpEQ(
-        MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0));
+  // Insert the entry block for the new chain before the old entry block.
+  // If the old entry block was the function entry, this ensures that the new
+  // entry can become the function entry.
+  NextCmpBlock = mergeRange(0, NumMerged, EntryBlock_, NextCmpBlock);
+
+  // Replace the original cmp chain with the new cmp chain by pointing all
+  // predecessors of EntryBlock_ to NextCmpBlock instead. This makes all cmp
+  // blocks in the old chain unreachable.
+  while (!pred_empty(EntryBlock_)) {
+    BasicBlock* const Pred = *pred_begin(EntryBlock_);
+    LLVM_DEBUG(dbgs() << "Updating jump into old chain from " << Pred->getName()
+                      << "\n");
+    Pred->getTerminator()->replaceUsesOfWith(EntryBlock_, NextCmpBlock);
+    DTU.applyUpdates({{DominatorTree::Delete, Pred, EntryBlock_},
+                      {DominatorTree::Insert, Pred, NextCmpBlock}});
+  }
 
-    // Add a branch to the next basic block in the chain.
-    if (NextBBInChain) {
-      Builder.CreateCondBr(MemCmpIsZero, NextBBInChain, Phi.getParent());
-      Phi.addIncoming(ConstantInt::getFalse(Context), BB);
-    } else {
-      Builder.CreateBr(Phi.getParent());
-      Phi.addIncoming(MemCmpIsZero, BB);
-    }
+  // If the old cmp chain was the function entry, we need to update the function
+  // entry.
+  const bool ChainEntryIsFnEntry =
+      (EntryBlock_ == &EntryBlock_->getParent()->getEntryBlock());
+  if (ChainEntryIsFnEntry && DTU.hasDomTree()) {
+    LLVM_DEBUG(dbgs() << "Changing function entry from "
+                      << EntryBlock_->getName() << " to "
+                      << NextCmpBlock->getName() << "\n");
+    DTU.getDomTree().setNewRoot(NextCmpBlock);
+    DTU.applyUpdates({{DominatorTree::Delete, NextCmpBlock, EntryBlock_}});
+  }
+  EntryBlock_ = nullptr;
 
-    // Delete merged blocks.
-    for (size_t I = 1; I < Comparisons.size(); ++I) {
-      BasicBlock *CBB = Comparisons[I].BB;
-      CBB->replaceAllUsesWith(BB);
-      CBB->eraseFromParent();
-    }
-  } else {
-    assert(Comparisons.size() == 1);
-    // There are no blocks to merge, but we still need to update the branches.
-    LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n");
-    if (NextBBInChain) {
-      if (FirstComparison.BranchI->isConditional()) {
-        LLVM_DEBUG(dbgs() << "conditional -> conditional\n");
-        // Just update the "true" target, the "false" target should already be
-        // the phi block.
-        assert(FirstComparison.BranchI->getSuccessor(1) == Phi.getParent());
-        FirstComparison.BranchI->setSuccessor(0, NextBBInChain);
-        Phi.addIncoming(ConstantInt::getFalse(Context), BB);
-      } else {
-        LLVM_DEBUG(dbgs() << "unconditional -> conditional\n");
-        // Replace the unconditional branch by a conditional one.
-        FirstComparison.BranchI->eraseFromParent();
-        IRBuilder<> Builder(BB);
-        Builder.CreateCondBr(FirstComparison.CmpI, NextBBInChain,
-                             Phi.getParent());
-        Phi.addIncoming(FirstComparison.CmpI, BB);
-      }
-    } else {
-      if (FirstComparison.BranchI->isConditional()) {
-        LLVM_DEBUG(dbgs() << "conditional -> unconditional\n");
-        // Replace the conditional branch by an unconditional one.
-        FirstComparison.BranchI->eraseFromParent();
-        IRBuilder<> Builder(BB);
-        Builder.CreateBr(Phi.getParent());
-        Phi.addIncoming(FirstComparison.CmpI, BB);
-      } else {
-        LLVM_DEBUG(dbgs() << "unconditional -> unconditional\n");
-        Phi.addIncoming(FirstComparison.CmpI, BB);
-      }
-    }
+  // Delete merged blocks. This also removes incoming values in phi.
+  SmallVector<BasicBlock *, 16> DeadBlocks;
+  for (auto &Cmp : Comparisons_) {
+    LLVM_DEBUG(dbgs() << "Deleting merged block " << Cmp.BB->getName() << "\n");
+    DeadBlocks.push_back(Cmp.BB);
   }
+  DeleteDeadBlocks(DeadBlocks, &DTU);
+
+  Comparisons_.clear();
+  return true;
 }
 
 std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi,
@@ -691,8 +790,8 @@ std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi,
   return Blocks;
 }
 
-bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI,
-                AliasAnalysis *AA) {
+bool processPhi(PHINode &Phi, const TargetLibraryInfo &TLI, AliasAnalysis &AA,
+                DomTreeUpdater &DTU) {
   LLVM_DEBUG(dbgs() << "processPhi()\n");
   if (Phi.getNumIncomingValues() <= 1) {
     LLVM_DEBUG(dbgs() << "skip: only one incoming value in phi\n");
@@ -757,24 +856,54 @@ bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI,
     return false;
   }
 
-  return CmpChain.simplify(TLI, AA);
+  return CmpChain.simplify(TLI, AA, DTU);
 }
 
-class MergeICmps : public FunctionPass {
- public:
+static bool runImpl(Function &F, const TargetLibraryInfo &TLI,
+                    const TargetTransformInfo &TTI, AliasAnalysis &AA,
+                    DominatorTree *DT) {
+  LLVM_DEBUG(dbgs() << "MergeICmpsLegacyPass: " << F.getName() << "\n");
+
+  // We only try merging comparisons if the target wants to expand memcmp later.
+  // The rationale is to avoid turning small chains into memcmp calls.
+  if (!TTI.enableMemCmpExpansion(F.hasOptSize(), true))
+    return false;
+
+  // If we don't have memcmp avaiable we can't emit calls to it.
+  if (!TLI.has(LibFunc_memcmp))
+    return false;
+
+  DomTreeUpdater DTU(DT, /*PostDominatorTree*/ nullptr,
+                     DomTreeUpdater::UpdateStrategy::Eager);
+
+  bool MadeChange = false;
+
+  for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) {
+    // A Phi operation is always first in a basic block.
+    if (auto *const Phi = dyn_cast<PHINode>(&*BBIt->begin()))
+      MadeChange |= processPhi(*Phi, TLI, AA, DTU);
+  }
+
+  return MadeChange;
+}
+
+class MergeICmpsLegacyPass : public FunctionPass {
+public:
   static char ID;
 
-  MergeICmps() : FunctionPass(ID) {
-    initializeMergeICmpsPass(*PassRegistry::getPassRegistry());
+  MergeICmpsLegacyPass() : FunctionPass(ID) {
+    initializeMergeICmpsLegacyPassPass(*PassRegistry::getPassRegistry());
   }
 
   bool runOnFunction(Function &F) override {
     if (skipFunction(F)) return false;
     const auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
     const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
-    AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
-    auto PA = runImpl(F, &TLI, &TTI, AA);
-    return !PA.areAllPreserved();
+    // MergeICmps does not need the DominatorTree, but we update it if it's
+    // already available.
+    auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+    auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+    return runImpl(F, TLI, TTI, AA, DTWP ? &DTWP->getDomTree() : nullptr);
   }
 
  private:
@@ -782,46 +911,35 @@ class MergeICmps : public FunctionPass {
     AU.addRequired<TargetLibraryInfoWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
     AU.addRequired<AAResultsWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
   }
-
-  PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
-                            const TargetTransformInfo *TTI, AliasAnalysis *AA);
 };
 
-PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI,
-                                      const TargetTransformInfo *TTI,
-                                      AliasAnalysis *AA) {
-  LLVM_DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n");
-
-  // We only try merging comparisons if the target wants to expand memcmp later.
-  // The rationale is to avoid turning small chains into memcmp calls.
-  if (!TTI->enableMemCmpExpansion(true)) return PreservedAnalyses::all();
-
-  // If we don't have memcmp avaiable we can't emit calls to it.
-  if (!TLI->has(LibFunc_memcmp))
-    return PreservedAnalyses::all();
-
-  bool MadeChange = false;
-
-  for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) {
-    // A Phi operation is always first in a basic block.
-    if (auto *const Phi = dyn_cast<PHINode>(&*BBIt->begin()))
-      MadeChange |= processPhi(*Phi, TLI, AA);
-  }
-
-  if (MadeChange) return PreservedAnalyses::none();
-  return PreservedAnalyses::all();
-}
+} // namespace
 
-}  // namespace
-
-char MergeICmps::ID = 0;
-INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps",
+char MergeICmpsLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(MergeICmpsLegacyPass, "mergeicmps",
                       "Merge contiguous icmps into a memcmp", false, false)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(MergeICmps, "mergeicmps",
+INITIALIZE_PASS_END(MergeICmpsLegacyPass, "mergeicmps",
                     "Merge contiguous icmps into a memcmp", false, false)
 
-Pass *llvm::createMergeICmpsPass() { return new MergeICmps(); }
+Pass *llvm::createMergeICmpsLegacyPass() { return new MergeICmpsLegacyPass(); }
+
+PreservedAnalyses MergeICmpsPass::run(Function &F,
+                                      FunctionAnalysisManager &AM) {
+  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+  auto &AA = AM.getResult<AAManager>(F);
+  auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+  const bool MadeChanges = runImpl(F, TLI, TTI, AA, DT);
+  if (!MadeChanges)
+    return PreservedAnalyses::all();
+  PreservedAnalyses PA;
+  PA.preserve<GlobalsAA>();
+  PA.preserve<DominatorTreeAnalysis>();
+  return PA;
+}
diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index ee21feca8d2c..30645f4400e3 100644
--- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -1,9 +1,8 @@
 //===- MergedLoadStoreMotion.cpp - merge and hoist/sink load/stores -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp
index 7106ea216ad6..94436b55752a 100644
--- a/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -1,9 +1,8 @@
 //===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -427,8 +426,8 @@ NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
     RHS = Builder.CreateMul(
         RHS, ConstantInt::get(IntPtrTy, IndexedSize / ElementSize));
   }
-  GetElementPtrInst *NewGEP =
-      cast<GetElementPtrInst>(Builder.CreateGEP(Candidate, RHS));
+  GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(
+      Builder.CreateGEP(GEP->getResultElementType(), Candidate, RHS));
   NewGEP->setIsInBounds(GEP->isInBounds());
   NewGEP->takeName(GEP);
   return NewGEP;
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 7cbb0fe70f82..08ac2b666fce 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -1,9 +1,8 @@
 //===- NewGVN.cpp - Global Value Numbering Pass ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1167,9 +1166,9 @@ const Expression *NewGVN::createExpression(Instruction *I) const {
         SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), SQ);
     if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
       return SimplifiedE;
-  } else if (auto *BI = dyn_cast<BitCastInst>(I)) {
+  } else if (auto *CI = dyn_cast<CastInst>(I)) {
     Value *V =
-        SimplifyCastInst(BI->getOpcode(), BI->getOperand(0), BI->getType(), SQ);
+        SimplifyCastInst(CI->getOpcode(), E->getOperand(0), CI->getType(), SQ);
     if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
       return SimplifiedE;
   } else if (isa<GetElementPtrInst>(I)) {
@@ -1815,39 +1814,13 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps,
 const Expression *
 NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) const {
   if (auto *EI = dyn_cast<ExtractValueInst>(I)) {
-    auto *II = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
-    if (II && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
-      unsigned Opcode = 0;
-      // EI might be an extract from one of our recognised intrinsics. If it
-      // is we'll synthesize a semantically equivalent expression instead on
-      // an extract value expression.
-      switch (II->getIntrinsicID()) {
-      case Intrinsic::sadd_with_overflow:
-      case Intrinsic::uadd_with_overflow:
-        Opcode = Instruction::Add;
-        break;
-      case Intrinsic::ssub_with_overflow:
-      case Intrinsic::usub_with_overflow:
-        Opcode = Instruction::Sub;
-        break;
-      case Intrinsic::smul_with_overflow:
-      case Intrinsic::umul_with_overflow:
-        Opcode = Instruction::Mul;
-        break;
-      default:
-        break;
-      }
-
-      if (Opcode != 0) {
-        // Intrinsic recognized. Grab its args to finish building the
-        // expression.
-        assert(II->getNumArgOperands() == 2 &&
-               "Expect two args for recognised intrinsics.");
-        return createBinaryExpression(Opcode, EI->getType(),
-                                      II->getArgOperand(0),
-                                      II->getArgOperand(1), I);
-      }
-    }
+    auto *WO = dyn_cast<WithOverflowInst>(EI->getAggregateOperand());
+    if (WO && EI->getNumIndices() == 1 && *EI->idx_begin() == 0)
+      // EI is an extract from one of our with.overflow intrinsics. Synthesize
+      // a semantically equivalent expression instead of an extract value
+      // expression.
+      return createBinaryExpression(WO->getBinaryOp(), EI->getType(),
+                                    WO->getLHS(), WO->getRHS(), I);
   }
 
   return createAggregateValueExpression(I);
@@ -2011,12 +1984,14 @@ NewGVN::performSymbolicEvaluation(Value *V,
       E = performSymbolicLoadEvaluation(I);
       break;
     case Instruction::BitCast:
+    case Instruction::AddrSpaceCast:
       E = createExpression(I);
       break;
     case Instruction::ICmp:
     case Instruction::FCmp:
       E = performSymbolicCmpEvaluation(I);
       break;
+    case Instruction::FNeg:
     case Instruction::Add:
     case Instruction::FAdd:
     case Instruction::Sub:
@@ -2122,7 +2097,7 @@ void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) const {
 
   if (auto *PBranch = dyn_cast<PredicateBranch>(PB))
     PredicateToUsers[PBranch->Condition].insert(I);
-  else if (auto *PAssume = dyn_cast<PredicateBranch>(PB))
+  else if (auto *PAssume = dyn_cast<PredicateAssume>(PB))
     PredicateToUsers[PAssume->Condition].insert(I);
 }
 
@@ -2524,9 +2499,6 @@ void NewGVN::processOutgoingEdges(Instruction *TI, BasicBlock *B) {
     // For switches, propagate the case values into the case
     // destinations.
 
-    // Remember how many outgoing edges there are to every successor.
-    SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
-
     Value *SwitchCond = SI->getCondition();
     Value *CondEvaluated = findConditionEquivalence(SwitchCond);
     // See if we were able to turn this switch statement into a constant.
@@ -2547,7 +2519,6 @@ void NewGVN::processOutgoingEdges(Instruction *TI, BasicBlock *B) {
     } else {
       for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
         BasicBlock *TargetBlock = SI->getSuccessor(i);
-        ++SwitchEdges[TargetBlock];
         updateReachableEdge(B, TargetBlock);
       }
     }
@@ -3503,7 +3474,7 @@ bool NewGVN::runGVN() {
            "BB containing ToErase deleted unexpectedly!");
     ToErase->eraseFromParent();
   }
-	Changed |= !InstructionsToErase.empty();
+  Changed |= !InstructionsToErase.empty();
 
   // Delete all unreachable blocks.
   auto UnreachableBlockPred = [&](const BasicBlock &BB) {
diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 05ea9144f66c..039123218544 100644
--- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -1,9 +1,8 @@
 //===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp
index fd2eb85fd7bf..b544f0a39ea8 100644
--- a/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -1,9 +1,8 @@
 //===- PlaceSafepoints.cpp - Place GC Safepoints --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -56,7 +55,6 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LegacyPassManager.h"
@@ -179,19 +177,18 @@ struct PlaceSafepoints : public FunctionPass {
 // callers job.
 static void
 InsertSafepointPoll(Instruction *InsertBefore,
-                    std::vector<CallSite> &ParsePointsNeeded /*rval*/,
+                    std::vector<CallBase *> &ParsePointsNeeded /*rval*/,
                     const TargetLibraryInfo &TLI);
 
-static bool needsStatepoint(const CallSite &CS, const TargetLibraryInfo &TLI) {
-  if (callsGCLeafFunction(CS, TLI))
+static bool needsStatepoint(CallBase *Call, const TargetLibraryInfo &TLI) {
+  if (callsGCLeafFunction(Call, TLI))
     return false;
-  if (CS.isCall()) {
-    CallInst *call = cast<CallInst>(CS.getInstruction());
-    if (call->isInlineAsm())
+  if (auto *CI = dyn_cast<CallInst>(Call)) {
+    if (CI->isInlineAsm())
       return false;
   }
 
-  return !(isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS));
+  return !(isStatepoint(Call) || isGCRelocate(Call) || isGCResult(Call));
 }
 
 /// Returns true if this loop is known to contain a call safepoint which
@@ -217,14 +214,14 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,
   BasicBlock *Current = Pred;
   while (true) {
     for (Instruction &I : *Current) {
-      if (auto CS = CallSite(&I))
+      if (auto *Call = dyn_cast<CallBase>(&I))
         // Note: Technically, needing a safepoint isn't quite the right
         // condition here.  We should instead be checking if the target method
         // has an
         // unconditional poll. In practice, this is only a theoretical concern
         // since we don't have any methods with conditional-only safepoint
         // polls.
-        if (needsStatepoint(CS, TLI))
+        if (needsStatepoint(Call, TLI))
           return true;
     }
 
@@ -360,9 +357,8 @@ bool PlaceBackedgeSafepointsImpl::runOnLoop(Loop *L) {
 
 /// Returns true if an entry safepoint is not required before this callsite in
 /// the caller function.
-static bool doesNotRequireEntrySafepointBefore(const CallSite &CS) {
-  Instruction *Inst = CS.getInstruction();
-  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+static bool doesNotRequireEntrySafepointBefore(CallBase *Call) {
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Call)) {
     switch (II->getIntrinsicID()) {
     case Intrinsic::experimental_gc_statepoint:
     case Intrinsic::experimental_patchpoint_void:
@@ -424,8 +420,8 @@ static Instruction *findLocationForEntrySafepoint(Function &F,
     // which can grow the stack by an unbounded amount.  This isn't required
     // for GC semantics per se, but is a common requirement for languages
     // which detect stack overflow via guard pages and then throw exceptions.
-    if (auto CS = CallSite(Cursor)) {
-      if (doesNotRequireEntrySafepointBefore(CS))
+    if (auto *Call = dyn_cast<CallBase>(Cursor)) {
+      if (doesNotRequireEntrySafepointBefore(Call))
         continue;
       break;
     }
@@ -500,7 +496,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
   DT.recalculate(F);
 
   SmallVector<Instruction *, 16> PollsNeeded;
-  std::vector<CallSite> ParsePointNeeded;
+  std::vector<CallBase *> ParsePointNeeded;
 
   if (enableBackedgeSafepoints(F)) {
     // Construct a pass manager to run the LoopPass backedge logic.  We
@@ -589,7 +585,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
   // Now that we've identified all the needed safepoint poll locations, insert
   // safepoint polls themselves.
   for (Instruction *PollLocation : PollsNeeded) {
-    std::vector<CallSite> RuntimeCalls;
+    std::vector<CallBase *> RuntimeCalls;
     InsertSafepointPoll(PollLocation, RuntimeCalls, TLI);
     ParsePointNeeded.insert(ParsePointNeeded.end(), RuntimeCalls.begin(),
                             RuntimeCalls.end());
@@ -622,7 +618,7 @@ INITIALIZE_PASS_END(PlaceSafepoints, "place-safepoints", "Place Safepoints",
 
 static void
 InsertSafepointPoll(Instruction *InsertBefore,
-                    std::vector<CallSite> &ParsePointsNeeded /*rval*/,
+                    std::vector<CallBase *> &ParsePointsNeeded /*rval*/,
                     const TargetLibraryInfo &TLI) {
   BasicBlock *OrigBB = InsertBefore->getParent();
   Module *M = InsertBefore->getModule();
@@ -687,7 +683,7 @@ InsertSafepointPoll(Instruction *InsertBefore,
 
     // These are likely runtime calls.  Should we assert that via calling
     // convention or something?
-    ParsePointsNeeded.push_back(CallSite(CI));
+    ParsePointsNeeded.push_back(CI);
   }
   assert(ParsePointsNeeded.size() <= Calls.size());
 }
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index cb893eab1654..fa8c9e2a5fe4 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -1,9 +1,8 @@
 //===- Reassociate.cpp - Reassociate binary expressions -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -267,12 +266,16 @@ static BinaryOperator *CreateNeg(Value *S1, const Twine &Name,
 
 /// Replace 0-X with X*-1.
 static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) {
+  assert((isa<UnaryOperator>(Neg) || isa<BinaryOperator>(Neg)) &&
+         "Expected a Negate!");
+  // FIXME: It's not safe to lower a unary FNeg into a FMul by -1.0.
+  unsigned OpNo = isa<BinaryOperator>(Neg) ? 1 : 0;
   Type *Ty = Neg->getType();
   Constant *NegOne = Ty->isIntOrIntVectorTy() ?
     ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0);
 
-  BinaryOperator *Res = CreateMul(Neg->getOperand(1), NegOne, "", Neg, Neg);
-  Neg->setOperand(1, Constant::getNullValue(Ty)); // Drop use of op.
+  BinaryOperator *Res = CreateMul(Neg->getOperand(OpNo), NegOne, "", Neg, Neg);
+  Neg->setOperand(OpNo, Constant::getNullValue(Ty)); // Drop use of op.
   Res->takeName(Neg);
   Neg->replaceAllUsesWith(Res);
   Res->setDebugLoc(Neg->getDebugLoc());
@@ -445,8 +448,10 @@ using RepeatedValue = std::pair<Value*, APInt>;
 /// that have all uses inside the expression (i.e. only used by non-leaf nodes
 /// of the expression) if it can turn them into binary operators of the right
 /// type and thus make the expression bigger.
-static bool LinearizeExprTree(BinaryOperator *I,
+static bool LinearizeExprTree(Instruction *I,
                               SmallVectorImpl<RepeatedValue> &Ops) {
+  assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
+         "Expected a UnaryOperator or BinaryOperator!");
   LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
   unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
   unsigned Opcode = I->getOpcode();
@@ -463,7 +468,7 @@ static bool LinearizeExprTree(BinaryOperator *I,
   // with their weights, representing a certain number of paths to the operator.
   // If an operator occurs in the worklist multiple times then we found multiple
   // ways to get to it.
-  SmallVector<std::pair<BinaryOperator*, APInt>, 8> Worklist; // (Op, Weight)
+  SmallVector<std::pair<Instruction*, APInt>, 8> Worklist; // (Op, Weight)
   Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1)));
   bool Changed = false;
 
@@ -490,10 +495,10 @@ static bool LinearizeExprTree(BinaryOperator *I,
   SmallPtrSet<Value *, 8> Visited; // For sanity checking the iteration scheme.
 #endif
   while (!Worklist.empty()) {
-    std::pair<BinaryOperator*, APInt> P = Worklist.pop_back_val();
+    std::pair<Instruction*, APInt> P = Worklist.pop_back_val();
     I = P.first; // We examine the operands of this binary operator.
 
-    for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { // Visit operands.
+    for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands.
       Value *Op = I->getOperand(OpIdx);
       APInt Weight = P.second; // Number of paths to this operand.
       LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
@@ -573,14 +578,14 @@ static bool LinearizeExprTree(BinaryOperator *I,
 
       // If this is a multiply expression, turn any internal negations into
       // multiplies by -1 so they can be reassociated.
-      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op))
-        if ((Opcode == Instruction::Mul && match(BO, m_Neg(m_Value()))) ||
-            (Opcode == Instruction::FMul && match(BO, m_FNeg(m_Value())))) {
+      if (Instruction *Tmp = dyn_cast<Instruction>(Op))
+        if ((Opcode == Instruction::Mul && match(Tmp, m_Neg(m_Value()))) ||
+            (Opcode == Instruction::FMul && match(Tmp, m_FNeg(m_Value())))) {
           LLVM_DEBUG(dbgs()
                      << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
-          BO = LowerNegateToMultiply(BO);
-          LLVM_DEBUG(dbgs() << *BO << '\n');
-          Worklist.push_back(std::make_pair(BO, Weight));
+          Tmp = LowerNegateToMultiply(Tmp);
+          LLVM_DEBUG(dbgs() << *Tmp << '\n');
+          Worklist.push_back(std::make_pair(Tmp, Weight));
           Changed = true;
           continue;
         }
@@ -862,6 +867,8 @@ static Value *NegateValue(Value *V, Instruction *BI,
     if (TheNeg->getParent()->getParent() != BI->getParent()->getParent())
       continue;
 
+    bool FoundCatchSwitch = false;
+
     BasicBlock::iterator InsertPt;
     if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
       if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) {
@@ -869,10 +876,30 @@ static Value *NegateValue(Value *V, Instruction *BI,
       } else {
         InsertPt = ++InstInput->getIterator();
       }
-      while (isa<PHINode>(InsertPt)) ++InsertPt;
+
+      const BasicBlock *BB = InsertPt->getParent();
+
+      // Make sure we don't move anything before PHIs or exception
+      // handling pads.
+      while (InsertPt != BB->end() && (isa<PHINode>(InsertPt) ||
+                                       InsertPt->isEHPad())) {
+        if (isa<CatchSwitchInst>(InsertPt))
+          // A catchswitch cannot have anything in the block except
+          // itself and PHIs.  We'll bail out below.
+          FoundCatchSwitch = true;
+        ++InsertPt;
+      }
     } else {
       InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
     }
+
+    // We found a catchswitch in the block where we want to move the
+    // neg.  We cannot move anything into that block.  Bail and just
+    // create the neg before BI, as if we hadn't found an existing
+    // neg.
+    if (FoundCatchSwitch)
+      break;
+
     TheNeg->moveBefore(&*InsertPt);
     if (TheNeg->getOpcode() == Instruction::Sub) {
       TheNeg->setHasNoUnsignedWrap(false);
@@ -1329,8 +1356,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
   //     So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
   //     than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
   //     "z" in the order of X-Y-Z is better than any other orders.
-  std::stable_sort(OpndPtrs.begin(), OpndPtrs.end(),
-                   [](XorOpnd *LHS, XorOpnd *RHS) {
+  llvm::stable_sort(OpndPtrs, [](XorOpnd *LHS, XorOpnd *RHS) {
     return LHS->getSymbolicRank() < RHS->getSymbolicRank();
   });
 
@@ -1687,8 +1713,7 @@ static bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
   // below our mininum of '4'.
   assert(FactorPowerSum >= 4);
 
-  std::stable_sort(Factors.begin(), Factors.end(),
-                   [](const Factor &LHS, const Factor &RHS) {
+  llvm::stable_sort(Factors, [](const Factor &LHS, const Factor &RHS) {
     return LHS.Power > RHS.Power;
   });
   return true;
@@ -1801,7 +1826,7 @@ Value *ReassociatePass::OptimizeMul(BinaryOperator *I,
     return V;
 
   ValueEntry NewEntry = ValueEntry(getRank(V), V);
-  Ops.insert(std::lower_bound(Ops.begin(), Ops.end(), NewEntry), NewEntry);
+  Ops.insert(llvm::lower_bound(Ops, NewEntry), NewEntry);
   return nullptr;
 }
 
@@ -2001,7 +2026,7 @@ Instruction *ReassociatePass::canonicalizeNegConstExpr(Instruction *I) {
 /// instructions is not allowed.
 void ReassociatePass::OptimizeInst(Instruction *I) {
   // Only consider operations that we understand.
-  if (!isa<BinaryOperator>(I))
+  if (!isa<UnaryOperator>(I) && !isa<BinaryOperator>(I))
     return;
 
   if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(I->getOperand(1)))
@@ -2066,7 +2091,8 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
         I = NI;
       }
     }
-  } else if (I->getOpcode() == Instruction::FSub) {
+  } else if (I->getOpcode() == Instruction::FNeg ||
+             I->getOpcode() == Instruction::FSub) {
     if (ShouldBreakUpSubtract(I)) {
       Instruction *NI = BreakUpSubtract(I, RedoInsts);
       RedoInsts.insert(I);
@@ -2075,7 +2101,9 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
     } else if (match(I, m_FNeg(m_Value()))) {
       // Otherwise, this is a negation.  See if the operand is a multiply tree
       // and if this is not an inner node of a multiply tree.
-      if (isReassociableOp(I->getOperand(1), Instruction::FMul) &&
+      Value *Op = isa<BinaryOperator>(I) ? I->getOperand(1) :
+                                           I->getOperand(0);
+      if (isReassociableOp(Op, Instruction::FMul) &&
           (!I->hasOneUse() ||
            !isReassociableOp(I->user_back(), Instruction::FMul))) {
         // If the negate was simplified, revisit the users to see if we can
@@ -2142,7 +2170,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
   // positions maintained (and so the compiler is deterministic).  Note that
   // this sorts so that the highest ranking values end up at the beginning of
   // the vector.
-  std::stable_sort(Ops.begin(), Ops.end());
+  llvm::stable_sort(Ops);
 
   // Now that we have the expression tree in a convenient
   // sorted form, optimize it globally if possible.
@@ -2218,8 +2246,15 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
         if (std::less<Value *>()(Op1, Op0))
           std::swap(Op0, Op1);
         auto it = PairMap[Idx].find({Op0, Op1});
-        if (it != PairMap[Idx].end())
-          Score += it->second;
+        if (it != PairMap[Idx].end()) {
+          // Functions like BreakUpSubtract() can erase the Values we're using
+          // as keys and create new Values after we built the PairMap. There's a
+          // small chance that the new nodes can have the same address as
+          // something already in the table. We shouldn't accumulate the stored
+          // score in that case as it refers to the wrong Value.
+          if (it->second.isValid())
+            Score += it->second.Score;
+        }
 
         unsigned MaxRank = std::max(Ops[i].Rank, Ops[j].Rank);
         if (Score > Max || (Score == Max && MaxRank < BestRank)) {
@@ -2288,9 +2323,15 @@ ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) {
             std::swap(Op0, Op1);
           if (!Visited.insert({Op0, Op1}).second)
             continue;
-          auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, 1});
-          if (!res.second)
-            ++res.first->second;
+          auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, {Op0, Op1, 1}});
+          if (!res.second) {
+            // If either key value has been erased then we've got the same
+            // address by coincidence. That can't happen here because nothing is
+            // erasing values but it can happen by the time we're querying the
+            // map.
+            assert(res.first->second.isValid() && "WeakVH invalidated");
+            ++res.first->second.Score;
+          }
         }
       }
     }
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 018feb035a4f..3296322e00d5 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -1,9 +1,8 @@
 //===- Reg2Mem.cpp - Convert registers to allocas -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 42d7ed5bc534..c358258d24cf 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1,9 +1,8 @@
 //===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -26,18 +25,17 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
@@ -286,9 +284,9 @@ struct PartiallyConstructedSafepointRecord {
 
 } // end anonymous namespace
 
-static ArrayRef<Use> GetDeoptBundleOperands(ImmutableCallSite CS) {
+static ArrayRef<Use> GetDeoptBundleOperands(const CallBase *Call) {
   Optional<OperandBundleUse> DeoptBundle =
-      CS.getOperandBundle(LLVMContext::OB_deopt);
+      Call->getOperandBundle(LLVMContext::OB_deopt);
 
   if (!DeoptBundle.hasValue()) {
     assert(AllowStatepointWithNoDeoptInfo &&
@@ -370,14 +368,11 @@ static std::string suffixed_name_or(Value *V, StringRef Suffix,
 // given instruction. The  analysis is performed immediately before the
 // given instruction. Values defined by that instruction are not considered
 // live.  Values used by that instruction are considered live.
-static void
-analyzeParsePointLiveness(DominatorTree &DT,
-                          GCPtrLivenessData &OriginalLivenessData, CallSite CS,
-                          PartiallyConstructedSafepointRecord &Result) {
-  Instruction *Inst = CS.getInstruction();
-
+static void analyzeParsePointLiveness(
+    DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, CallBase *Call,
+    PartiallyConstructedSafepointRecord &Result) {
   StatepointLiveSetTy LiveSet;
-  findLiveSetAtInst(Inst, OriginalLivenessData, LiveSet);
+  findLiveSetAtInst(Call, OriginalLivenessData, LiveSet);
 
   if (PrintLiveSet) {
     dbgs() << "Live Variables:\n";
@@ -385,7 +380,7 @@ analyzeParsePointLiveness(DominatorTree &DT,
       dbgs() << " " << V->getName() << " " << *V << "\n";
   }
   if (PrintLiveSetSize) {
-    dbgs() << "Safepoint For: " << CS.getCalledValue()->getName() << "\n";
+    dbgs() << "Safepoint For: " << Call->getCalledValue()->getName() << "\n";
     dbgs() << "Number live values: " << LiveSet.size() << "\n";
   }
   Result.LiveSet = LiveSet;
@@ -1178,7 +1173,7 @@ findBasePointers(const StatepointLiveSetTy &live,
 /// Find the required based pointers (and adjust the live set) for the given
 /// parse point.
 static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
-                             CallSite CS,
+                             CallBase *Call,
                              PartiallyConstructedSafepointRecord &result) {
   MapVector<Value *, Value *> PointerToBase;
   findBasePointers(result.LiveSet, PointerToBase, &DT, DVCache);
@@ -1200,11 +1195,11 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
 /// Given an updated version of the dataflow liveness results, update the
 /// liveset and base pointer maps for the call site CS.
 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
-                                  CallSite CS,
+                                  CallBase *Call,
                                   PartiallyConstructedSafepointRecord &result);
 
 static void recomputeLiveInValues(
-    Function &F, DominatorTree &DT, ArrayRef<CallSite> toUpdate,
+    Function &F, DominatorTree &DT, ArrayRef<CallBase *> toUpdate,
     MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
   // TODO-PERF: reuse the original liveness, then simply run the dataflow
   // again.  The old values are still live and will help it stabilize quickly.
@@ -1307,7 +1302,7 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
   // Lazily populated map from input types to the canonicalized form mentioned
   // in the comment above.  This should probably be cached somewhere more
   // broadly.
-  DenseMap<Type*, Value*> TypeToDeclMap;
+  DenseMap<Type *, Function *> TypeToDeclMap;
 
   for (unsigned i = 0; i < LiveVariables.size(); i++) {
     // Generate the gc.relocate call and save the result
@@ -1318,7 +1313,7 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
     Type *Ty = LiveVariables[i]->getType();
     if (!TypeToDeclMap.count(Ty))
       TypeToDeclMap[Ty] = getGCRelocateDecl(Ty);
-    Value *GCRelocateDecl = TypeToDeclMap[Ty];
+    Function *GCRelocateDecl = TypeToDeclMap[Ty];
 
     // only specify a debug name if we can give a useful one
     CallInst *Reloc = Builder.CreateCall(
@@ -1399,16 +1394,16 @@ public:
 
 } // end anonymous namespace
 
-static StringRef getDeoptLowering(CallSite CS) {
+static StringRef getDeoptLowering(CallBase *Call) {
   const char *DeoptLowering = "deopt-lowering";
-  if (CS.hasFnAttr(DeoptLowering)) {
-    // FIXME: CallSite has a *really* confusing interface around attributes
+  if (Call->hasFnAttr(DeoptLowering)) {
+    // FIXME: Calls have a *really* confusing interface around attributes
     // with values.
-    const AttributeList &CSAS = CS.getAttributes();
+    const AttributeList &CSAS = Call->getAttributes();
     if (CSAS.hasAttribute(AttributeList::FunctionIndex, DeoptLowering))
       return CSAS.getAttribute(AttributeList::FunctionIndex, DeoptLowering)
           .getValueAsString();
-    Function *F = CS.getCalledFunction();
+    Function *F = Call->getCalledFunction();
     assert(F && F->hasFnAttribute(DeoptLowering));
     return F->getFnAttribute(DeoptLowering).getValueAsString();
   }
@@ -1416,7 +1411,7 @@ static StringRef getDeoptLowering(CallSite CS) {
 }
 
 static void
-makeStatepointExplicitImpl(const CallSite CS, /* to replace */
+makeStatepointExplicitImpl(CallBase *Call, /* to replace */
                            const SmallVectorImpl<Value *> &BasePtrs,
                            const SmallVectorImpl<Value *> &LiveVariables,
                            PartiallyConstructedSafepointRecord &Result,
@@ -1427,19 +1422,18 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
   // immediately before the previous instruction under the assumption that all
   // arguments will be available here.  We can't insert afterwards since we may
   // be replacing a terminator.
-  Instruction *InsertBefore = CS.getInstruction();
-  IRBuilder<> Builder(InsertBefore);
+  IRBuilder<> Builder(Call);
 
   ArrayRef<Value *> GCArgs(LiveVariables);
   uint64_t StatepointID = StatepointDirectives::DefaultStatepointID;
   uint32_t NumPatchBytes = 0;
   uint32_t Flags = uint32_t(StatepointFlags::None);
 
-  ArrayRef<Use> CallArgs(CS.arg_begin(), CS.arg_end());
-  ArrayRef<Use> DeoptArgs = GetDeoptBundleOperands(CS);
+  ArrayRef<Use> CallArgs(Call->arg_begin(), Call->arg_end());
+  ArrayRef<Use> DeoptArgs = GetDeoptBundleOperands(Call);
   ArrayRef<Use> TransitionArgs;
   if (auto TransitionBundle =
-      CS.getOperandBundle(LLVMContext::OB_gc_transition)) {
+          Call->getOperandBundle(LLVMContext::OB_gc_transition)) {
     Flags |= uint32_t(StatepointFlags::GCTransition);
     TransitionArgs = TransitionBundle->Inputs;
   }
@@ -1450,21 +1444,21 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
   bool IsDeoptimize = false;
 
   StatepointDirectives SD =
-      parseStatepointDirectivesFromAttrs(CS.getAttributes());
+      parseStatepointDirectivesFromAttrs(Call->getAttributes());
   if (SD.NumPatchBytes)
     NumPatchBytes = *SD.NumPatchBytes;
   if (SD.StatepointID)
     StatepointID = *SD.StatepointID;
 
   // Pass through the requested lowering if any.  The default is live-through.
-  StringRef DeoptLowering = getDeoptLowering(CS);
+  StringRef DeoptLowering = getDeoptLowering(Call);
   if (DeoptLowering.equals("live-in"))
     Flags |= uint32_t(StatepointFlags::DeoptLiveIn);
   else {
     assert(DeoptLowering.equals("live-through") && "Unsupported value!");
   }
 
-  Value *CallTarget = CS.getCalledValue();
+  Value *CallTarget = Call->getCalledValue();
   if (Function *F = dyn_cast<Function>(CallTarget)) {
     if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize) {
       // Calls to llvm.experimental.deoptimize are lowered to calls to the
@@ -1481,8 +1475,9 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
       // calls to @llvm.experimental.deoptimize with different argument types in
       // the same module.  This is fine -- we assume the frontend knew what it
       // was doing when generating this kind of IR.
-      CallTarget =
-          F->getParent()->getOrInsertFunction("__llvm_deoptimize", FTy);
+      CallTarget = F->getParent()
+                       ->getOrInsertFunction("__llvm_deoptimize", FTy)
+                       .getCallee();
 
       IsDeoptimize = true;
     }
@@ -1490,57 +1485,56 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
 
   // Create the statepoint given all the arguments
   Instruction *Token = nullptr;
-  if (CS.isCall()) {
-    CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
-    CallInst *Call = Builder.CreateGCStatepointCall(
+  if (auto *CI = dyn_cast<CallInst>(Call)) {
+    CallInst *SPCall = Builder.CreateGCStatepointCall(
         StatepointID, NumPatchBytes, CallTarget, Flags, CallArgs,
         TransitionArgs, DeoptArgs, GCArgs, "safepoint_token");
 
-    Call->setTailCallKind(ToReplace->getTailCallKind());
-    Call->setCallingConv(ToReplace->getCallingConv());
+    SPCall->setTailCallKind(CI->getTailCallKind());
+    SPCall->setCallingConv(CI->getCallingConv());
 
     // Currently we will fail on parameter attributes and on certain
     // function attributes.  In case if we can handle this set of attributes -
     // set up function attrs directly on statepoint and return attrs later for
     // gc_result intrinsic.
-    Call->setAttributes(legalizeCallAttributes(ToReplace->getAttributes()));
+    SPCall->setAttributes(legalizeCallAttributes(CI->getAttributes()));
 
-    Token = Call;
+    Token = SPCall;
 
     // Put the following gc_result and gc_relocate calls immediately after the
     // the old call (which we're about to delete)
-    assert(ToReplace->getNextNode() && "Not a terminator, must have next!");
-    Builder.SetInsertPoint(ToReplace->getNextNode());
-    Builder.SetCurrentDebugLocation(ToReplace->getNextNode()->getDebugLoc());
+    assert(CI->getNextNode() && "Not a terminator, must have next!");
+    Builder.SetInsertPoint(CI->getNextNode());
+    Builder.SetCurrentDebugLocation(CI->getNextNode()->getDebugLoc());
   } else {
-    InvokeInst *ToReplace = cast<InvokeInst>(CS.getInstruction());
+    auto *II = cast<InvokeInst>(Call);
 
     // Insert the new invoke into the old block.  We'll remove the old one in a
     // moment at which point this will become the new terminator for the
     // original block.
-    InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
-        StatepointID, NumPatchBytes, CallTarget, ToReplace->getNormalDest(),
-        ToReplace->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs,
-        GCArgs, "statepoint_token");
+    InvokeInst *SPInvoke = Builder.CreateGCStatepointInvoke(
+        StatepointID, NumPatchBytes, CallTarget, II->getNormalDest(),
+        II->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs, GCArgs,
+        "statepoint_token");
 
-    Invoke->setCallingConv(ToReplace->getCallingConv());
+    SPInvoke->setCallingConv(II->getCallingConv());
 
     // Currently we will fail on parameter attributes and on certain
     // function attributes.  In case if we can handle this set of attributes -
     // set up function attrs directly on statepoint and return attrs later for
     // gc_result intrinsic.
-    Invoke->setAttributes(legalizeCallAttributes(ToReplace->getAttributes()));
+    SPInvoke->setAttributes(legalizeCallAttributes(II->getAttributes()));
 
-    Token = Invoke;
+    Token = SPInvoke;
 
     // Generate gc relocates in exceptional path
-    BasicBlock *UnwindBlock = ToReplace->getUnwindDest();
+    BasicBlock *UnwindBlock = II->getUnwindDest();
     assert(!isa<PHINode>(UnwindBlock->begin()) &&
            UnwindBlock->getUniquePredecessor() &&
            "can't safely insert in this block!");
 
     Builder.SetInsertPoint(&*UnwindBlock->getFirstInsertionPt());
-    Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
+    Builder.SetCurrentDebugLocation(II->getDebugLoc());
 
     // Attach exceptional gc relocates to the landingpad.
     Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst();
@@ -1551,7 +1545,7 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
                       Builder);
 
     // Generate gc relocates and returns for normal block
-    BasicBlock *NormalDest = ToReplace->getNormalDest();
+    BasicBlock *NormalDest = II->getNormalDest();
     assert(!isa<PHINode>(NormalDest->begin()) &&
            NormalDest->getUniquePredecessor() &&
            "can't safely insert in this block!");
@@ -1568,16 +1562,15 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
     // transform the tail-call like structure to a call to a void function
     // followed by unreachable to get better codegen.
     Replacements.push_back(
-        DeferredReplacement::createDeoptimizeReplacement(CS.getInstruction()));
+        DeferredReplacement::createDeoptimizeReplacement(Call));
   } else {
     Token->setName("statepoint_token");
-    if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) {
-      StringRef Name =
-          CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : "";
-      CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), Name);
+    if (!Call->getType()->isVoidTy() && !Call->use_empty()) {
+      StringRef Name = Call->hasName() ? Call->getName() : "";
+      CallInst *GCResult = Builder.CreateGCResult(Token, Call->getType(), Name);
       GCResult->setAttributes(
           AttributeList::get(GCResult->getContext(), AttributeList::ReturnIndex,
-                             CS.getAttributes().getRetAttributes()));
+                             Call->getAttributes().getRetAttributes()));
 
       // We cannot RAUW or delete CS.getInstruction() because it could be in the
       // live set of some other safepoint, in which case that safepoint's
@@ -1586,10 +1579,9 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
       // after the live sets have been made explicit in the IR, and we no longer
       // have raw pointers to worry about.
       Replacements.emplace_back(
-          DeferredReplacement::createRAUW(CS.getInstruction(), GCResult));
+          DeferredReplacement::createRAUW(Call, GCResult));
     } else {
-      Replacements.emplace_back(
-          DeferredReplacement::createDelete(CS.getInstruction()));
+      Replacements.emplace_back(DeferredReplacement::createDelete(Call));
     }
   }
 
@@ -1606,7 +1598,7 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
 // WARNING: Does not do any fixup to adjust users of the original live
 // values.  That's the callers responsibility.
 static void
-makeStatepointExplicit(DominatorTree &DT, CallSite CS,
+makeStatepointExplicit(DominatorTree &DT, CallBase *Call,
                        PartiallyConstructedSafepointRecord &Result,
                        std::vector<DeferredReplacement> &Replacements) {
   const auto &LiveSet = Result.LiveSet;
@@ -1625,7 +1617,7 @@ makeStatepointExplicit(DominatorTree &DT, CallSite CS,
   assert(LiveVec.size() == BaseVec.size());
 
   // Do the actual rewriting and delete the old statepoint
-  makeStatepointExplicitImpl(CS, BaseVec, LiveVec, Result, Replacements);
+  makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements);
 }
 
 // Helper function for the relocationViaAlloca.
@@ -1636,7 +1628,7 @@ makeStatepointExplicit(DominatorTree &DT, CallSite CS,
 // for sanity checking.
 static void
 insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
-                       DenseMap<Value *, Value *> &AllocaMap,
+                       DenseMap<Value *, AllocaInst *> &AllocaMap,
                        DenseSet<Value *> &VisitedLiveValues) {
   for (User *U : GCRelocs) {
     GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U);
@@ -1671,7 +1663,7 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
 // "insertRelocationStores" but works for rematerialized values.
 static void insertRematerializationStores(
     const RematerializedValueMapTy &RematerializedValues,
-    DenseMap<Value *, Value *> &AllocaMap,
+    DenseMap<Value *, AllocaInst *> &AllocaMap,
     DenseSet<Value *> &VisitedLiveValues) {
   for (auto RematerializedValuePair: RematerializedValues) {
     Instruction *RematerializedValue = RematerializedValuePair.first;
@@ -1704,7 +1696,7 @@ static void relocationViaAlloca(
 #endif
 
   // TODO-PERF: change data structures, reserve
-  DenseMap<Value *, Value *> AllocaMap;
+  DenseMap<Value *, AllocaInst *> AllocaMap;
   SmallVector<AllocaInst *, 200> PromotableAllocas;
   // Used later to chack that we have enough allocas to store all values
   std::size_t NumRematerializedValues = 0;
@@ -1774,7 +1766,7 @@ static void relocationViaAlloca(
       SmallVector<AllocaInst *, 64> ToClobber;
       for (auto Pair : AllocaMap) {
         Value *Def = Pair.first;
-        AllocaInst *Alloca = cast<AllocaInst>(Pair.second);
+        AllocaInst *Alloca = Pair.second;
 
         // This value was relocated
         if (VisitedLiveValues.count(Def)) {
@@ -1806,7 +1798,7 @@ static void relocationViaAlloca(
   // Update use with load allocas and add store for gc_relocated.
   for (auto Pair : AllocaMap) {
     Value *Def = Pair.first;
-    Value *Alloca = Pair.second;
+    AllocaInst *Alloca = Pair.second;
 
     // We pre-record the uses of allocas so that we dont have to worry about
     // later update that changes the user information..
@@ -1834,13 +1826,15 @@ static void relocationViaAlloca(
         PHINode *Phi = cast<PHINode>(Use);
         for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) {
           if (Def == Phi->getIncomingValue(i)) {
-            LoadInst *Load = new LoadInst(
-                Alloca, "", Phi->getIncomingBlock(i)->getTerminator());
+            LoadInst *Load =
+                new LoadInst(Alloca->getAllocatedType(), Alloca, "",
+                             Phi->getIncomingBlock(i)->getTerminator());
             Phi->setIncomingValue(i, Load);
           }
         }
       } else {
-        LoadInst *Load = new LoadInst(Alloca, "", Use);
+        LoadInst *Load =
+            new LoadInst(Alloca->getAllocatedType(), Alloca, "", Use);
         Use->replaceUsesOfWith(Def, Load);
       }
     }
@@ -1893,25 +1887,25 @@ template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {
 
 /// Insert holders so that each Value is obviously live through the entire
 /// lifetime of the call.
-static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
+static void insertUseHolderAfter(CallBase *Call, const ArrayRef<Value *> Values,
                                  SmallVectorImpl<CallInst *> &Holders) {
   if (Values.empty())
     // No values to hold live, might as well not insert the empty holder
     return;
 
-  Module *M = CS.getInstruction()->getModule();
+  Module *M = Call->getModule();
   // Use a dummy vararg function to actually hold the values live
-  Function *Func = cast<Function>(M->getOrInsertFunction(
-      "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true)));
-  if (CS.isCall()) {
+  FunctionCallee Func = M->getOrInsertFunction(
+      "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true));
+  if (isa<CallInst>(Call)) {
     // For call safepoints insert dummy calls right after safepoint
-    Holders.push_back(CallInst::Create(Func, Values, "",
-                                       &*++CS.getInstruction()->getIterator()));
+    Holders.push_back(
+        CallInst::Create(Func, Values, "", &*++Call->getIterator()));
     return;
   }
   // For invoke safepooints insert dummy calls both in normal and
   // exceptional destination blocks
-  auto *II = cast<InvokeInst>(CS.getInstruction());
+  auto *II = cast<InvokeInst>(Call);
   Holders.push_back(CallInst::Create(
       Func, Values, "", &*II->getNormalDest()->getFirstInsertionPt()));
   Holders.push_back(CallInst::Create(
@@ -1919,7 +1913,7 @@ static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
 }
 
 static void findLiveReferences(
-    Function &F, DominatorTree &DT, ArrayRef<CallSite> toUpdate,
+    Function &F, DominatorTree &DT, ArrayRef<CallBase *> toUpdate,
     MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
   GCPtrLivenessData OriginalLivenessData;
   computeLiveInValues(DT, F, OriginalLivenessData);
@@ -2022,7 +2016,7 @@ static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPh
 // to relocate. Remove this values from the live set, rematerialize them after
 // statepoint and record them in "Info" structure. Note that similar to
 // relocated values we don't do any user adjustments here.
-static void rematerializeLiveValues(CallSite CS,
+static void rematerializeLiveValues(CallBase *Call,
                                     PartiallyConstructedSafepointRecord &Info,
                                     TargetTransformInfo &TTI) {
   const unsigned int ChainLengthThreshold = 10;
@@ -2076,7 +2070,7 @@ static void rematerializeLiveValues(CallSite CS,
 
     // For invokes we need to rematerialize each chain twice - for normal and
     // for unwind basic blocks. Model this by multiplying cost by two.
-    if (CS.isInvoke()) {
+    if (isa<InvokeInst>(Call)) {
       Cost *= 2;
     }
     // If it's too expensive - skip it
@@ -2144,14 +2138,14 @@ static void rematerializeLiveValues(CallSite CS,
 
     // Different cases for calls and invokes. For invokes we need to clone
     // instructions both on normal and unwind path.
-    if (CS.isCall()) {
-      Instruction *InsertBefore = CS.getInstruction()->getNextNode();
+    if (isa<CallInst>(Call)) {
+      Instruction *InsertBefore = Call->getNextNode();
       assert(InsertBefore);
       Instruction *RematerializedValue = rematerializeChain(
           InsertBefore, RootOfChain, Info.PointerToBase[LiveValue]);
       Info.RematerializedValues[RematerializedValue] = LiveValue;
     } else {
-      InvokeInst *Invoke = cast<InvokeInst>(CS.getInstruction());
+      auto *Invoke = cast<InvokeInst>(Call);
 
       Instruction *NormalInsertBefore =
           &*Invoke->getNormalDest()->getFirstInsertionPt();
@@ -2176,25 +2170,25 @@ static void rematerializeLiveValues(CallSite CS,
 
 static bool insertParsePoints(Function &F, DominatorTree &DT,
                               TargetTransformInfo &TTI,
-                              SmallVectorImpl<CallSite> &ToUpdate) {
+                              SmallVectorImpl<CallBase *> &ToUpdate) {
 #ifndef NDEBUG
   // sanity check the input
-  std::set<CallSite> Uniqued;
+  std::set<CallBase *> Uniqued;
   Uniqued.insert(ToUpdate.begin(), ToUpdate.end());
   assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!");
 
-  for (CallSite CS : ToUpdate)
-    assert(CS.getInstruction()->getFunction() == &F);
+  for (CallBase *Call : ToUpdate)
+    assert(Call->getFunction() == &F);
 #endif
 
   // When inserting gc.relocates for invokes, we need to be able to insert at
   // the top of the successor blocks.  See the comment on
   // normalForInvokeSafepoint on exactly what is needed.  Note that this step
   // may restructure the CFG.
-  for (CallSite CS : ToUpdate) {
-    if (!CS.isInvoke())
+  for (CallBase *Call : ToUpdate) {
+    auto *II = dyn_cast<InvokeInst>(Call);
+    if (!II)
       continue;
-    auto *II = cast<InvokeInst>(CS.getInstruction());
     normalizeForInvokeSafepoint(II->getNormalDest(), II->getParent(), DT);
     normalizeForInvokeSafepoint(II->getUnwindDest(), II->getParent(), DT);
   }
@@ -2207,17 +2201,17 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
   // actual safepoint insertion as arguments.  This ensures reference operands
   // in the deopt argument list are considered live through the safepoint (and
   // thus makes sure they get relocated.)
-  for (CallSite CS : ToUpdate) {
+  for (CallBase *Call : ToUpdate) {
     SmallVector<Value *, 64> DeoptValues;
 
-    for (Value *Arg : GetDeoptBundleOperands(CS)) {
+    for (Value *Arg : GetDeoptBundleOperands(Call)) {
       assert(!isUnhandledGCPointerType(Arg->getType()) &&
              "support for FCA unimplemented");
       if (isHandledGCPointerType(Arg->getType()))
         DeoptValues.push_back(Arg);
     }
 
-    insertUseHolderAfter(CS, DeoptValues, Holders);
+    insertUseHolderAfter(Call, DeoptValues, Holders);
   }
 
   SmallVector<PartiallyConstructedSafepointRecord, 64> Records(ToUpdate.size());
@@ -2319,7 +2313,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
   for (size_t i = 0; i < Records.size(); i++)
     makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements);
 
-  ToUpdate.clear(); // prevent accident use of invalid CallSites
+  ToUpdate.clear(); // prevent accident use of invalid calls.
 
   for (auto &PR : Replacements)
     PR.doReplacement();
@@ -2384,7 +2378,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
   return !Records.empty();
 }
 
-// Handles both return values and arguments for Functions and CallSites.
+// Handles both return values and arguments for Functions and calls.
 template <typename AttrHolder>
 static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
                                       unsigned Index) {
@@ -2476,12 +2470,13 @@ static void stripNonValidDataFromBody(Function &F) {
 
     stripInvalidMetadataFromInstruction(I);
 
-    if (CallSite CS = CallSite(&I)) {
-      for (int i = 0, e = CS.arg_size(); i != e; i++)
-        if (isa<PointerType>(CS.getArgument(i)->getType()))
-          RemoveNonValidAttrAtIndex(Ctx, CS, i + AttributeList::FirstArgIndex);
-      if (isa<PointerType>(CS.getType()))
-        RemoveNonValidAttrAtIndex(Ctx, CS, AttributeList::ReturnIndex);
+    if (auto *Call = dyn_cast<CallBase>(&I)) {
+      for (int i = 0, e = Call->arg_size(); i != e; i++)
+        if (isa<PointerType>(Call->getArgOperand(i)->getType()))
+          RemoveNonValidAttrAtIndex(Ctx, *Call,
+                                    i + AttributeList::FirstArgIndex);
+      if (isa<PointerType>(Call->getType()))
+        RemoveNonValidAttrAtIndex(Ctx, *Call, AttributeList::ReturnIndex);
     }
   }
 
@@ -2526,12 +2521,11 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
   assert(shouldRewriteStatepointsIn(F) && "mismatch in rewrite decision");
 
   auto NeedsRewrite = [&TLI](Instruction &I) {
-    if (ImmutableCallSite CS = ImmutableCallSite(&I))
-      return !callsGCLeafFunction(CS, TLI) && !isStatepoint(CS);
+    if (const auto *Call = dyn_cast<CallBase>(&I))
+      return !callsGCLeafFunction(Call, TLI) && !isStatepoint(Call);
     return false;
   };
 
-
   // Delete any unreachable statepoints so that we don't have unrewritten
   // statepoints surviving this pass.  This makes testing easier and the
   // resulting IR less confusing to human readers.
@@ -2543,7 +2537,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
   // Gather all the statepoints which need rewritten.  Be careful to only
   // consider those in reachable code since we need to ask dominance queries
   // when rewriting.  We'll delete the unreachable ones in a moment.
-  SmallVector<CallSite, 64> ParsePointNeeded;
+  SmallVector<CallBase *, 64> ParsePointNeeded;
   for (Instruction &I : instructions(F)) {
     // TODO: only the ones with the flag set!
     if (NeedsRewrite(I)) {
@@ -2553,7 +2547,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
       // isReachableFromEntry() returns true.
       assert(DT.isReachableFromEntry(I.getParent()) &&
             "no unreachable blocks expected");
-      ParsePointNeeded.push_back(CallSite(&I));
+      ParsePointNeeded.push_back(cast<CallBase>(&I));
     }
   }
 
@@ -2602,6 +2596,33 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
       }
   }
 
+  // Nasty workaround - The base computation code in the main algorithm doesn't
+  // consider the fact that a GEP can be used to convert a scalar to a vector.
+  // The right fix for this is to integrate GEPs into the base rewriting
+  // algorithm properly, this is just a short term workaround to prevent
+  // crashes by canonicalizing such GEPs into fully vector GEPs.
+  for (Instruction &I : instructions(F)) {
+    if (!isa<GetElementPtrInst>(I))
+      continue;
+
+    unsigned VF = 0;
+    for (unsigned i = 0; i < I.getNumOperands(); i++)
+      if (I.getOperand(i)->getType()->isVectorTy()) {
+        assert(VF == 0 ||
+               VF == I.getOperand(i)->getType()->getVectorNumElements());
+        VF = I.getOperand(i)->getType()->getVectorNumElements();
+      }
+
+    // It's the vector to scalar traversal through the pointer operand which
+    // confuses base pointer rewriting, so limit ourselves to that case.
+    if (!I.getOperand(0)->getType()->isVectorTy() && VF != 0) {
+      IRBuilder<> B(&I);
+      auto *Splat = B.CreateVectorSplat(VF, I.getOperand(0));
+      I.setOperand(0, Splat);
+      MadeChange = true;
+    }
+  }
+
   MadeChange |= insertParsePoints(F, DT, TTI, ParsePointNeeded);
   return MadeChange;
 }
@@ -2786,11 +2807,10 @@ static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
 }
 
 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
-                                  CallSite CS,
+                                  CallBase *Call,
                                   PartiallyConstructedSafepointRecord &Info) {
-  Instruction *Inst = CS.getInstruction();
   StatepointLiveSetTy Updated;
-  findLiveSetAtInst(Inst, RevisedLivenessData, Updated);
+  findLiveSetAtInst(Call, RevisedLivenessData, Updated);
 
   // We may have base pointers which are now live that weren't before.  We need
   // to update the PointerToBase structure to reflect this.
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 2f6ed05c023b..4093e50ce899 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1,9 +1,8 @@
 //===- SCCP.cpp - Sparse Conditional Constant Propagation -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -21,6 +20,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -210,11 +210,11 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
   /// TrackedRetVals - If we are tracking arguments into and the return
   /// value out of a function, it will have an entry in this map, indicating
   /// what the known return value for the function is.
-  DenseMap<Function *, LatticeVal> TrackedRetVals;
+  MapVector<Function *, LatticeVal> TrackedRetVals;
 
   /// TrackedMultipleRetVals - Same as TrackedRetVals, but used for functions
   /// that return multiple values.
-  DenseMap<std::pair<Function *, unsigned>, LatticeVal> TrackedMultipleRetVals;
+  MapVector<std::pair<Function *, unsigned>, LatticeVal> TrackedMultipleRetVals;
 
   /// MRVFunctionsTracked - Each function in TrackedMultipleRetVals is
   /// represented here for efficient lookup.
@@ -372,7 +372,7 @@ public:
   }
 
   /// getTrackedRetVals - Get the inferred return value map.
-  const DenseMap<Function*, LatticeVal> &getTrackedRetVals() {
+  const MapVector<Function*, LatticeVal> &getTrackedRetVals() {
     return TrackedRetVals;
   }
 
@@ -614,6 +614,7 @@ private:
 
   void visitCastInst(CastInst &I);
   void visitSelectInst(SelectInst &I);
+  void visitUnaryOperator(Instruction &I);
   void visitBinaryOperator(Instruction &I);
   void visitCmpInst(CmpInst &I);
   void visitExtractValueInst(ExtractValueInst &EVI);
@@ -639,6 +640,11 @@ private:
     visitTerminator(II);
   }
 
+  void visitCallBrInst    (CallBrInst &CBI) {
+    visitCallSite(&CBI);
+    visitTerminator(CBI);
+  }
+
   void visitCallSite      (CallSite CS);
   void visitResumeInst    (ResumeInst &I) { /*returns void*/ }
   void visitUnreachableInst(UnreachableInst &I) { /*returns void*/ }
@@ -734,6 +740,13 @@ void SCCPSolver::getFeasibleSuccessors(Instruction &TI,
     return;
   }
 
+  // In case of callbr, we pessimistically assume that all successors are
+  // feasible.
+  if (isa<CallBrInst>(&TI)) {
+    Succs.assign(TI.getNumSuccessors(), true);
+    return;
+  }
+
   LLVM_DEBUG(dbgs() << "Unknown terminator instruction: " << TI << '\n');
   llvm_unreachable("SCCP: Don't know how to handle this terminator!");
 }
@@ -825,7 +838,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) {
 
   // If we are tracking the return value of this function, merge it in.
   if (!TrackedRetVals.empty() && !ResultOp->getType()->isStructTy()) {
-    DenseMap<Function*, LatticeVal>::iterator TFRVI =
+    MapVector<Function*, LatticeVal>::iterator TFRVI =
       TrackedRetVals.find(F);
     if (TFRVI != TrackedRetVals.end()) {
       mergeInValue(TFRVI->second, F, getValueState(ResultOp));
@@ -958,6 +971,29 @@ void SCCPSolver::visitSelectInst(SelectInst &I) {
   markOverdefined(&I);
 }
 
+// Handle Unary Operators.
+void SCCPSolver::visitUnaryOperator(Instruction &I) {
+  LatticeVal V0State = getValueState(I.getOperand(0));
+
+  LatticeVal &IV = ValueState[&I];
+  if (IV.isOverdefined()) return;
+
+  if (V0State.isConstant()) {
+    Constant *C = ConstantExpr::get(I.getOpcode(), V0State.getConstant());
+
+    // op Y -> undef.
+    if (isa<UndefValue>(C))
+      return;
+    return (void)markConstant(IV, &I, C);
+  }
+
+  // If something is undef, wait for it to resolve.
+  if (!V0State.isOverdefined())
+    return;
+
+  markOverdefined(&I);
+}
+
 // Handle Binary Operators.
 void SCCPSolver::visitBinaryOperator(Instruction &I) {
   LatticeVal V1State = getValueState(I.getOperand(0));
@@ -1232,7 +1268,7 @@ CallOverdefined:
     // Otherwise, if we have a single return value case, and if the function is
     // a declaration, maybe we can constant fold it.
     if (F && F->isDeclaration() && !I->getType()->isStructTy() &&
-        canConstantFoldCallTo(CS, F)) {
+        canConstantFoldCallTo(cast<CallBase>(CS.getInstruction()), F)) {
       SmallVector<Constant*, 8> Operands;
       for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end();
            AI != E; ++AI) {
@@ -1253,7 +1289,8 @@ CallOverdefined:
 
       // If we can constant fold this, mark the result of the call as a
       // constant.
-      if (Constant *C = ConstantFoldCall(CS, F, Operands, TLI)) {
+      if (Constant *C = ConstantFoldCall(cast<CallBase>(CS.getInstruction()), F,
+                                         Operands, TLI)) {
         // call -> undef.
         if (isa<UndefValue>(C))
           return;
@@ -1315,7 +1352,7 @@ CallOverdefined:
       mergeInValue(getStructValueState(I, i), I,
                    TrackedMultipleRetVals[std::make_pair(F, i)]);
   } else {
-    DenseMap<Function*, LatticeVal>::iterator TFRVI = TrackedRetVals.find(F);
+    MapVector<Function*, LatticeVal>::iterator TFRVI = TrackedRetVals.find(F);
     if (TFRVI == TrackedRetVals.end())
       goto CallOverdefined;  // Not tracking this callee.
 
@@ -1472,6 +1509,8 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         else
           markOverdefined(&I);
         return true;
+      case Instruction::FNeg:
+        break; // fneg undef -> undef
       case Instruction::ZExt:
       case Instruction::SExt:
       case Instruction::FPToUI:
@@ -1598,6 +1637,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         return true;
       case Instruction::Call:
       case Instruction::Invoke:
+      case Instruction::CallBr:
         // There are two reasons a call can have an undef result
         // 1. It could be tracked.
         // 2. It could be constant-foldable.
@@ -2070,12 +2110,22 @@ bool llvm::runIPSCCP(
         // If we have forced an edge for an indeterminate value, then force the
         // terminator to fold to that edge.
         forceIndeterminateEdge(I, Solver);
-        bool Folded = ConstantFoldTerminator(I->getParent(),
+        BasicBlock *InstBB = I->getParent();
+        bool Folded = ConstantFoldTerminator(InstBB,
                                              /*DeleteDeadConditions=*/false,
                                              /*TLI=*/nullptr, &DTU);
         assert(Folded &&
               "Expect TermInst on constantint or blockaddress to be folded");
         (void) Folded;
+        // If we folded the terminator to an unconditional branch to another
+        // dead block, replace it with Unreachable, to avoid trying to fold that
+        // branch again.
+        BranchInst *BI = cast<BranchInst>(InstBB->getTerminator());
+        if (BI && BI->isUnconditional() &&
+            !Solver.isBlockExecutable(BI->getSuccessor(0))) {
+          InstBB->getTerminator()->eraseFromParent();
+          new UnreachableInst(InstBB->getContext(), InstBB);
+        }
       }
       // Mark dead BB for deletion.
       DTU.deleteBB(DeadBB);
@@ -2109,7 +2159,7 @@ bool llvm::runIPSCCP(
   // whether other functions are optimizable.
   SmallVector<ReturnInst*, 8> ReturnsToZap;
 
-  const DenseMap<Function*, LatticeVal> &RV = Solver.getTrackedRetVals();
+  const MapVector<Function*, LatticeVal> &RV = Solver.getTrackedRetVals();
   for (const auto &I : RV) {
     Function *F = I.first;
     if (I.second.isOverdefined() || F->getReturnType()->isVoidTy())
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index eab77cf4cda9..33f90d0b01e4 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -1,9 +1,8 @@
 //===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -222,13 +221,6 @@ public:
 
 } // end anonymous namespace
 
-namespace llvm {
-
-template <typename T> struct isPodLike;
-template <> struct isPodLike<Slice> { static const bool value = true; };
-
-} // end namespace llvm
-
 /// Representation of the alloca slices.
 ///
 /// This class represents the slices of an alloca which are formed by its
@@ -721,6 +713,13 @@ private:
     return Base::visitBitCastInst(BC);
   }
 
+  void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
+    if (ASC.use_empty())
+      return markAsDead(ASC);
+
+    return Base::visitAddrSpaceCastInst(ASC);
+  }
+
   void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
     if (GEPI.use_empty())
       return markAsDead(GEPI);
@@ -784,7 +783,10 @@ private:
     if (!IsOffsetKnown)
       return PI.setAborted(&LI);
 
-    const DataLayout &DL = LI.getModule()->getDataLayout();
+    if (LI.isVolatile() &&
+        LI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
+      return PI.setAborted(&LI);
+
     uint64_t Size = DL.getTypeStoreSize(LI.getType());
     return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
   }
@@ -796,7 +798,10 @@ private:
     if (!IsOffsetKnown)
       return PI.setAborted(&SI);
 
-    const DataLayout &DL = SI.getModule()->getDataLayout();
+    if (SI.isVolatile() &&
+        SI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
+      return PI.setAborted(&SI);
+
     uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
 
     // If this memory access can be shown to *statically* extend outside the
@@ -831,6 +836,11 @@ private:
     if (!IsOffsetKnown)
       return PI.setAborted(&II);
 
+    // Don't replace this with a store with a different address space.  TODO:
+    // Use a store with the casted new alloca?
+    if (II.isVolatile() && II.getDestAddressSpace() != DL.getAllocaAddrSpace())
+      return PI.setAborted(&II);
+
     insertUse(II, Offset, Length ? Length->getLimitedValue()
                                  : AllocSize - Offset.getLimitedValue(),
               (bool)Length);
@@ -850,6 +860,13 @@ private:
     if (!IsOffsetKnown)
       return PI.setAborted(&II);
 
+    // Don't replace this with a load/store with a different address space.
+    // TODO: Use a store with the casted new alloca?
+    if (II.isVolatile() &&
+        (II.getDestAddressSpace() != DL.getAllocaAddrSpace() ||
+         II.getSourceAddressSpace() != DL.getAllocaAddrSpace()))
+      return PI.setAborted(&II);
+
     // This side of the transfer is completely out-of-bounds, and so we can
     // nuke the entire transfer. However, we also need to nuke the other side
     // if already added to our partitions.
@@ -957,7 +974,7 @@ private:
         if (!GEP->hasAllZeroIndices())
           return GEP;
       } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
-                 !isa<SelectInst>(I)) {
+                 !isa<SelectInst>(I) && !isa<AddrSpaceCastInst>(I)) {
         return I;
       }
 
@@ -1173,12 +1190,16 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
 /// FIXME: This should be hoisted into a generic utility, likely in
 /// Transforms/Util/Local.h
 static bool isSafePHIToSpeculate(PHINode &PN) {
+  const DataLayout &DL = PN.getModule()->getDataLayout();
+
   // For now, we can only do this promotion if the load is in the same block
   // as the PHI, and if there are no stores between the phi and load.
   // TODO: Allow recursive phi users.
   // TODO: Allow stores.
   BasicBlock *BB = PN.getParent();
   unsigned MaxAlign = 0;
+  uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType());
+  APInt MaxSize(APWidth, 0);
   bool HaveLoad = false;
   for (User *U : PN.users()) {
     LoadInst *LI = dyn_cast<LoadInst>(U);
@@ -1197,15 +1218,15 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
       if (BBI->mayWriteToMemory())
         return false;
 
+    uint64_t Size = DL.getTypeStoreSizeInBits(LI->getType());
     MaxAlign = std::max(MaxAlign, LI->getAlignment());
+    MaxSize = MaxSize.ult(Size) ? APInt(APWidth, Size) : MaxSize;
     HaveLoad = true;
   }
 
   if (!HaveLoad)
     return false;
 
-  const DataLayout &DL = PN.getModule()->getDataLayout();
-
   // We can only transform this if it is safe to push the loads into the
   // predecessor blocks. The only thing to watch out for is that we can't put
   // a possibly trapping load in the predecessor if it is a critical edge.
@@ -1227,7 +1248,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
     // If this pointer is always safe to load, or if we can prove that there
     // is already a load in the block, then we can move the load to the pred
     // block.
-    if (isSafeToLoadUnconditionally(InVal, MaxAlign, DL, TI))
+    if (isSafeToLoadUnconditionally(InVal, MaxAlign, MaxSize, DL, TI))
       continue;
 
     return false;
@@ -1239,15 +1260,14 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
 static void speculatePHINodeLoads(PHINode &PN) {
   LLVM_DEBUG(dbgs() << "    original: " << PN << "\n");
 
-  Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
+  LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
+  Type *LoadTy = SomeLoad->getType();
   IRBuilderTy PHIBuilder(&PN);
   PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
                                         PN.getName() + ".sroa.speculated");
 
   // Get the AA tags and alignment to use from one of the loads.  It doesn't
   // matter which one we get and if any differ.
-  LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
-
   AAMDNodes AATags;
   SomeLoad->getAAMetadata(AATags);
   unsigned Align = SomeLoad->getAlignment();
@@ -1278,7 +1298,8 @@ static void speculatePHINodeLoads(PHINode &PN) {
     IRBuilderTy PredBuilder(TI);
 
     LoadInst *Load = PredBuilder.CreateLoad(
-        InVal, (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
+        LoadTy, InVal,
+        (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
     ++NumLoadsSpeculated;
     Load->setAlignment(Align);
     if (AATags)
@@ -1317,9 +1338,11 @@ static bool isSafeSelectToSpeculate(SelectInst &SI) {
     // Both operands to the select need to be dereferenceable, either
     // absolutely (e.g. allocas) or at this point because we can see other
     // accesses to it.
-    if (!isSafeToLoadUnconditionally(TValue, LI->getAlignment(), DL, LI))
+    if (!isSafeToLoadUnconditionally(TValue, LI->getType(), LI->getAlignment(),
+                                     DL, LI))
       return false;
-    if (!isSafeToLoadUnconditionally(FValue, LI->getAlignment(), DL, LI))
+    if (!isSafeToLoadUnconditionally(FValue, LI->getType(), LI->getAlignment(),
+                                     DL, LI))
       return false;
   }
 
@@ -1338,10 +1361,10 @@ static void speculateSelectInstLoads(SelectInst &SI) {
     assert(LI->isSimple() && "We only speculate simple loads");
 
     IRB.SetInsertPoint(LI);
-    LoadInst *TL =
-        IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true");
-    LoadInst *FL =
-        IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false");
+    LoadInst *TL = IRB.CreateLoad(LI->getType(), TV,
+                                  LI->getName() + ".sroa.speculate.load.true");
+    LoadInst *FL = IRB.CreateLoad(LI->getType(), FV,
+                                  LI->getName() + ".sroa.speculate.load.false");
     NumLoadsSpeculated += 2;
 
     // Transfer alignment and AA info if present.
@@ -1379,8 +1402,8 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
   if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
     return BasePtr;
 
-  return IRB.CreateInBoundsGEP(nullptr, BasePtr, Indices,
-                               NamePrefix + "sroa_idx");
+  return IRB.CreateInBoundsGEP(BasePtr->getType()->getPointerElementType(),
+                               BasePtr, Indices, NamePrefix + "sroa_idx");
 }
 
 /// Get a natural GEP off of the BasePtr walking through Ty toward
@@ -1569,7 +1592,14 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
   Value *Int8Ptr = nullptr;
   APInt Int8PtrOffset(Offset.getBitWidth(), 0);
 
-  Type *TargetTy = PointerTy->getPointerElementType();
+  PointerType *TargetPtrTy = cast<PointerType>(PointerTy);
+  Type *TargetTy = TargetPtrTy->getElementType();
+
+  // As `addrspacecast` is , `Ptr` (the storage pointer) may have different
+  // address space from the expected `PointerTy` (the pointer to be used).
+  // Adjust the pointer type based the original storage pointer.
+  auto AS = cast<PointerType>(Ptr->getType())->getAddressSpace();
+  PointerTy = TargetTy->getPointerTo(AS);
 
   do {
     // First fold any existing GEPs into the offset.
@@ -1599,7 +1629,7 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
       OffsetBasePtr = Ptr;
       // If we also found a pointer of the right type, we're done.
       if (P->getType() == PointerTy)
-        return P;
+        break;
     }
 
     // Stash this pointer if we've found an i8*.
@@ -1638,8 +1668,11 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
   Ptr = OffsetPtr;
 
   // On the off chance we were targeting i8*, guard the bitcast here.
-  if (Ptr->getType() != PointerTy)
-    Ptr = IRB.CreateBitCast(Ptr, PointerTy, NamePrefix + "sroa_cast");
+  if (cast<PointerType>(Ptr->getType()) != TargetPtrTy) {
+    Ptr = IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr,
+                                                  TargetPtrTy,
+                                                  NamePrefix + "sroa_cast");
+  }
 
   return Ptr;
 }
@@ -2418,14 +2451,16 @@ private:
     unsigned EndIndex = getIndex(NewEndOffset);
     assert(EndIndex > BeginIndex && "Empty vector!");
 
-    Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
+    Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+                                     NewAI.getAlignment(), "load");
     return extractVector(IRB, V, BeginIndex, EndIndex, "vec");
   }
 
   Value *rewriteIntegerLoad(LoadInst &LI) {
     assert(IntTy && "We cannot insert an integer to the alloca");
     assert(!LI.isVolatile());
-    Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
+    Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+                                     NewAI.getAlignment(), "load");
     V = convertValue(DL, IRB, V, IntTy);
     assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
     uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
@@ -2469,7 +2504,8 @@ private:
                (canConvertValue(DL, NewAllocaTy, TargetTy) ||
                 (IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
                  TargetTy->isIntegerTy()))) {
-      LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+      LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+                                              NewAI.getAlignment(),
                                               LI.isVolatile(), LI.getName());
       if (AATags)
         NewLI->setAAMetadata(AATags);
@@ -2505,9 +2541,9 @@ private:
           }
     } else {
       Type *LTy = TargetTy->getPointerTo(AS);
-      LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
-                                              getSliceAlign(TargetTy),
-                                              LI.isVolatile(), LI.getName());
+      LoadInst *NewLI = IRB.CreateAlignedLoad(
+          TargetTy, getNewAllocaSlicePtr(IRB, LTy), getSliceAlign(TargetTy),
+          LI.isVolatile(), LI.getName());
       if (AATags)
         NewLI->setAAMetadata(AATags);
       if (LI.isVolatile())
@@ -2524,8 +2560,7 @@ private:
              "Only integer type loads and stores are split");
       assert(SliceSize < DL.getTypeStoreSize(LI.getType()) &&
              "Split load isn't smaller than original load");
-      assert(LI.getType()->getIntegerBitWidth() ==
-                 DL.getTypeStoreSizeInBits(LI.getType()) &&
+      assert(DL.typeSizeEqualsStoreSize(LI.getType()) &&
              "Non-byte-multiple bit width");
       // Move the insertion point just past the load so that we can refer to it.
       IRB.SetInsertPoint(&*std::next(BasicBlock::iterator(&LI)));
@@ -2533,8 +2568,8 @@ private:
       // basis for the new value. This allows us to replace the uses of LI with
       // the computed value, and then replace the placeholder with LI, leaving
       // LI only used for this computation.
-      Value *Placeholder =
-          new LoadInst(UndefValue::get(LI.getType()->getPointerTo(AS)));
+      Value *Placeholder = new LoadInst(
+          LI.getType(), UndefValue::get(LI.getType()->getPointerTo(AS)));
       V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
                         "insert");
       LI.replaceAllUsesWith(V);
@@ -2565,7 +2600,8 @@ private:
         V = convertValue(DL, IRB, V, SliceTy);
 
       // Mix in the existing elements.
-      Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
+      Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+                                         NewAI.getAlignment(), "load");
       V = insertVector(IRB, Old, V, BeginIndex, "vec");
     }
     StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
@@ -2581,8 +2617,8 @@ private:
     assert(IntTy && "We cannot extract an integer from the alloca");
     assert(!SI.isVolatile());
     if (DL.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
-      Value *Old =
-          IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
+      Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+                                         NewAI.getAlignment(), "oldload");
       Old = convertValue(DL, IRB, Old, IntTy);
       assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
       uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
@@ -2619,8 +2655,7 @@ private:
       assert(!SI.isVolatile());
       assert(V->getType()->isIntegerTy() &&
              "Only integer type loads and stores are split");
-      assert(V->getType()->getIntegerBitWidth() ==
-                 DL.getTypeStoreSizeInBits(V->getType()) &&
+      assert(DL.typeSizeEqualsStoreSize(V->getType()) &&
              "Non-byte-multiple bit width");
       IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
       V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
@@ -2731,15 +2766,26 @@ private:
 
     Type *AllocaTy = NewAI.getAllocatedType();
     Type *ScalarTy = AllocaTy->getScalarType();
+    
+    const bool CanContinue = [&]() {
+      if (VecTy || IntTy)
+        return true;
+      if (BeginOffset > NewAllocaBeginOffset ||
+          EndOffset < NewAllocaEndOffset)
+        return false;
+      auto *C = cast<ConstantInt>(II.getLength());
+      if (C->getBitWidth() > 64)
+        return false;
+      const auto Len = C->getZExtValue();
+      auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());
+      auto *SrcTy = VectorType::get(Int8Ty, Len);
+      return canConvertValue(DL, SrcTy, AllocaTy) &&
+        DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy));
+    }();
 
     // If this doesn't map cleanly onto the alloca type, and that type isn't
     // a single value type, just emit a memset.
-    if (!VecTy && !IntTy &&
-        (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
-         SliceSize != DL.getTypeStoreSize(AllocaTy) ||
-         !AllocaTy->isSingleValueType() ||
-         !DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy)) ||
-         DL.getTypeSizeInBits(ScalarTy) % 8 != 0)) {
+    if (!CanContinue) {
       Type *SizeTy = II.getLength()->getType();
       Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
       CallInst *New = IRB.CreateMemSet(
@@ -2774,8 +2820,8 @@ private:
       if (NumElements > 1)
         Splat = getVectorSplat(Splat, NumElements);
 
-      Value *Old =
-          IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
+      Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+                                         NewAI.getAlignment(), "oldload");
       V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
     } else if (IntTy) {
       // If this is a memset on an alloca where we can widen stores, insert the
@@ -2787,8 +2833,8 @@ private:
 
       if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
                     EndOffset != NewAllocaBeginOffset)) {
-        Value *Old =
-            IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
+        Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+                                           NewAI.getAlignment(), "oldload");
         Old = convertValue(DL, IRB, Old, IntTy);
         uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
         V = insertInteger(DL, IRB, Old, V, Offset, "insert");
@@ -2948,18 +2994,18 @@ private:
 
     // Reset the other pointer type to match the register type we're going to
     // use, but using the address space of the original other pointer.
+    Type *OtherTy;
     if (VecTy && !IsWholeAlloca) {
       if (NumElements == 1)
-        OtherPtrTy = VecTy->getElementType();
+        OtherTy = VecTy->getElementType();
       else
-        OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements);
-
-      OtherPtrTy = OtherPtrTy->getPointerTo(OtherAS);
+        OtherTy = VectorType::get(VecTy->getElementType(), NumElements);
     } else if (IntTy && !IsWholeAlloca) {
-      OtherPtrTy = SubIntTy->getPointerTo(OtherAS);
+      OtherTy = SubIntTy;
     } else {
-      OtherPtrTy = NewAllocaTy->getPointerTo(OtherAS);
+      OtherTy = NewAllocaTy;
     }
+    OtherPtrTy = OtherTy->getPointerTo(OtherAS);
 
     Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
                                    OtherPtr->getName() + ".");
@@ -2973,28 +3019,30 @@ private:
 
     Value *Src;
     if (VecTy && !IsWholeAlloca && !IsDest) {
-      Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
+      Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+                                  NewAI.getAlignment(), "load");
       Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
     } else if (IntTy && !IsWholeAlloca && !IsDest) {
-      Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
+      Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+                                  NewAI.getAlignment(), "load");
       Src = convertValue(DL, IRB, Src, IntTy);
       uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
       Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
     } else {
-      LoadInst *Load = IRB.CreateAlignedLoad(SrcPtr, SrcAlign, II.isVolatile(),
-                                             "copyload");
+      LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
+                                             II.isVolatile(), "copyload");
       if (AATags)
         Load->setAAMetadata(AATags);
       Src = Load;
     }
 
     if (VecTy && !IsWholeAlloca && IsDest) {
-      Value *Old =
-          IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
+      Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+                                         NewAI.getAlignment(), "oldload");
       Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
     } else if (IntTy && !IsWholeAlloca && IsDest) {
-      Value *Old =
-          IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
+      Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+                                         NewAI.getAlignment(), "oldload");
       Old = convertValue(DL, IRB, Old, IntTy);
       uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
       Src = insertInteger(DL, IRB, Old, Src, Offset, "insert");
@@ -3031,7 +3079,10 @@ private:
     ConstantInt *Size =
         ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
                          NewEndOffset - NewBeginOffset);
-    Value *Ptr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
+    // Lifetime intrinsics always expect an i8* so directly get such a pointer
+    // for the new alloca slice.
+    Type *PointerTy = IRB.getInt8PtrTy(OldPtr->getType()->getPointerAddressSpace());
+    Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy);
     Value *New;
     if (II.getIntrinsicID() == Intrinsic::lifetime_start)
       New = IRB.CreateLifetimeStart(Ptr, Size);
@@ -3072,8 +3123,9 @@ private:
         continue;
       }
 
-      assert(isa<BitCastInst>(I) || isa<PHINode>(I) ||
-             isa<SelectInst>(I) || isa<GetElementPtrInst>(I));
+      assert(isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I) ||
+             isa<PHINode>(I) || isa<SelectInst>(I) ||
+             isa<GetElementPtrInst>(I));
       for (User *U : I->users())
         if (Visited.insert(cast<Instruction>(U)).second)
           Uses.push_back(cast<Instruction>(U));
@@ -3297,8 +3349,8 @@ private:
       assert(Ty->isSingleValueType());
       // Load the single value and insert it using the indices.
       Value *GEP =
-          IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
-      LoadInst *Load = IRB.CreateAlignedLoad(GEP, Align, Name + ".load");
+          IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
+      LoadInst *Load = IRB.CreateAlignedLoad(Ty, GEP, Align, Name + ".load");
       if (AATags)
         Load->setAAMetadata(AATags);
       Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
@@ -3342,7 +3394,7 @@ private:
       Value *ExtractValue =
           IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
       Value *InBoundsGEP =
-          IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
+          IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
       StoreInst *Store =
           IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Align);
       if (AATags)
@@ -3374,6 +3426,11 @@ private:
     return false;
   }
 
+  bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
+    enqueueUsers(ASC);
+    return false;
+  }
+
   bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
     enqueueUsers(GEPI);
     return false;
@@ -3792,6 +3849,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
       auto AS = LI->getPointerAddressSpace();
       auto *PartPtrTy = PartTy->getPointerTo(AS);
       LoadInst *PLoad = IRB.CreateAlignedLoad(
+          PartTy,
           getAdjustedPtr(IRB, DL, BasePtr,
                          APInt(DL.getIndexSizeInBits(AS), PartOffset),
                          PartPtrTy, BasePtr->getName() + "."),
@@ -3933,6 +3991,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
         IRB.SetInsertPoint(LI);
         auto AS = LI->getPointerAddressSpace();
         PLoad = IRB.CreateAlignedLoad(
+            PartTy,
             getAdjustedPtr(IRB, DL, LoadBasePtr,
                            APInt(DL.getIndexSizeInBits(AS), PartOffset),
                            LoadPartPtrTy, LoadBasePtr->getName() + "."),
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 976daf4c78c2..869cf00e0a89 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -1,9 +1,8 @@
 //===-- Scalar.cpp --------------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -63,6 +62,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeJumpThreadingPass(Registry);
   initializeLegacyLICMPassPass(Registry);
   initializeLegacyLoopSinkPassPass(Registry);
+  initializeLoopFuseLegacyPass(Registry);
   initializeLoopDataPrefetchLegacyPassPass(Registry);
   initializeLoopDeletionLegacyPassPass(Registry);
   initializeLoopAccessLegacyAnalysisPass(Registry);
@@ -81,8 +81,9 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeLowerAtomicLegacyPassPass(Registry);
   initializeLowerExpectIntrinsicPass(Registry);
   initializeLowerGuardIntrinsicLegacyPassPass(Registry);
+  initializeLowerWidenableConditionLegacyPassPass(Registry);
   initializeMemCpyOptLegacyPassPass(Registry);
-  initializeMergeICmpsPass(Registry);
+  initializeMergeICmpsLegacyPassPass(Registry);
   initializeMergedLoadStoreMotionLegacyPassPass(Registry);
   initializeNaryReassociateLegacyPassPass(Registry);
   initializePartiallyInlineLibCallsLegacyPassPass(Registry);
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index 5eb3fdab6d5c..2ee1a3a95f2a 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -1,9 +1,8 @@
 //===- Scalarizer.cpp - Scalarize vector operations -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -125,6 +124,18 @@ struct ICmpSplitter {
   ICmpInst &ICI;
 };
 
+// UnarySpliiter(UO)(Builder, X, Name) uses Builder to create
+// a unary operator like UO called Name with operand X.
+struct UnarySplitter {
+  UnarySplitter(UnaryOperator &uo) : UO(uo) {}
+
+  Value *operator()(IRBuilder<> &Builder, Value *Op, const Twine &Name) const {
+    return Builder.CreateUnOp(UO.getOpcode(), Op, Name);
+  }
+
+  UnaryOperator &UO;
+};
+
 // BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create
 // a binary operator like BO called Name with operands X and Y.
 struct BinarySplitter {
@@ -174,6 +185,7 @@ public:
   bool visitSelectInst(SelectInst &SI);
   bool visitICmpInst(ICmpInst &ICI);
   bool visitFCmpInst(FCmpInst &FCI);
+  bool visitUnaryOperator(UnaryOperator &UO);
   bool visitBinaryOperator(BinaryOperator &BO);
   bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
   bool visitCastInst(CastInst &CI);
@@ -188,11 +200,12 @@ private:
   Scatterer scatter(Instruction *Point, Value *V);
   void gather(Instruction *Op, const ValueVector &CV);
   bool canTransferMetadata(unsigned Kind);
-  void transferMetadata(Instruction *Op, const ValueVector &CV);
+  void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV);
   bool getVectorLayout(Type *Ty, unsigned Alignment, VectorLayout &Layout,
                        const DataLayout &DL);
   bool finish();
 
+  template<typename T> bool splitUnary(Instruction &, const T &);
   template<typename T> bool splitBinary(Instruction &, const T &);
 
   bool splitCall(CallInst &CI);
@@ -246,14 +259,13 @@ Value *Scatterer::operator[](unsigned I) {
     return CV[I];
   IRBuilder<> Builder(BB, BBI);
   if (PtrTy) {
+    Type *ElTy = PtrTy->getElementType()->getVectorElementType();
     if (!CV[0]) {
-      Type *Ty =
-        PointerType::get(PtrTy->getElementType()->getVectorElementType(),
-                         PtrTy->getAddressSpace());
-      CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0");
+      Type *NewPtrTy = PointerType::get(ElTy, PtrTy->getAddressSpace());
+      CV[0] = Builder.CreateBitCast(V, NewPtrTy, V->getName() + ".i0");
     }
     if (I != 0)
-      CV[I] = Builder.CreateConstGEP1_32(nullptr, CV[0], I,
+      CV[I] = Builder.CreateConstGEP1_32(ElTy, CV[0], I,
                                          V->getName() + ".i" + Twine(I));
   } else {
     // Search through a chain of InsertElementInsts looking for element I.
@@ -349,7 +361,7 @@ void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) {
   for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I)
     Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType()));
 
-  transferMetadata(Op, CV);
+  transferMetadataAndIRFlags(Op, CV);
 
   // If we already have a scattered form of Op (created from ExtractElements
   // of Op itself), replace them with the new form.
@@ -385,7 +397,8 @@ bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
 
 // Transfer metadata from Op to the instructions in CV if it is known
 // to be safe to do so.
-void ScalarizerVisitor::transferMetadata(Instruction *Op, const ValueVector &CV) {
+void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
+                                                   const ValueVector &CV) {
   SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
   Op->getAllMetadataOtherThanDebugLoc(MDs);
   for (unsigned I = 0, E = CV.size(); I != E; ++I) {
@@ -393,6 +406,7 @@ void ScalarizerVisitor::transferMetadata(Instruction *Op, const ValueVector &CV)
       for (const auto &MD : MDs)
         if (canTransferMetadata(MD.first))
           New->setMetadata(MD.first, MD.second);
+      New->copyIRFlags(Op);
       if (Op->getDebugLoc() && !New->getDebugLoc())
         New->setDebugLoc(Op->getDebugLoc());
     }
@@ -410,8 +424,7 @@ bool ScalarizerVisitor::getVectorLayout(Type *Ty, unsigned Alignment,
 
   // Check that we're dealing with full-byte elements.
   Layout.ElemTy = Layout.VecTy->getElementType();
-  if (DL.getTypeSizeInBits(Layout.ElemTy) !=
-      DL.getTypeStoreSizeInBits(Layout.ElemTy))
+  if (!DL.typeSizeEqualsStoreSize(Layout.ElemTy))
     return false;
 
   if (Alignment)
@@ -422,6 +435,26 @@ bool ScalarizerVisitor::getVectorLayout(Type *Ty, unsigned Alignment,
   return true;
 }
 
+// Scalarize one-operand instruction I, using Split(Builder, X, Name)
+// to create an instruction like I with operand X and name Name.
+template<typename Splitter>
+bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
+  VectorType *VT = dyn_cast<VectorType>(I.getType());
+  if (!VT)
+    return false;
+
+  unsigned NumElems = VT->getNumElements();
+  IRBuilder<> Builder(&I);
+  Scatterer Op = scatter(&I, I.getOperand(0));
+  assert(Op.size() == NumElems && "Mismatched unary operation");
+  ValueVector Res;
+  Res.resize(NumElems);
+  for (unsigned Elem = 0; Elem < NumElems; ++Elem)
+    Res[Elem] = Split(Builder, Op[Elem], I.getName() + ".i" + Twine(Elem));
+  gather(&I, Res);
+  return true;
+}
+
 // Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
 // to create an instruction like I with operands X and Y and name Name.
 template<typename Splitter>
@@ -554,6 +587,10 @@ bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
   return splitBinary(FCI, FCmpSplitter(FCI));
 }
 
+bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) {
+  return splitUnary(UO, UnarySplitter(UO));
+}
+
 bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
   return splitBinary(BO, BinarySplitter(BO));
 }
@@ -744,7 +781,8 @@ bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
   Res.resize(NumElems);
 
   for (unsigned I = 0; I < NumElems; ++I)
-    Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I),
+    Res[I] = Builder.CreateAlignedLoad(Layout.VecTy->getElementType(), Ptr[I],
+                                       Layout.getElemAlign(I),
                                        LI.getName() + ".i" + Twine(I));
   gather(&LI, Res);
   return true;
@@ -773,7 +811,7 @@ bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
     unsigned Align = Layout.getElemAlign(I);
     Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align);
   }
-  transferMetadata(&SI, Stores);
+  transferMetadataAndIRFlags(&SI, Stores);
   return true;
 }
 
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 967f4a42a8fb..f6a12fb13142 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1,9 +1,8 @@
 //===- SeparateConstOffsetFromGEP.cpp -------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 5a67178cef37..aeac6f548b32 100644
--- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -1,9 +1,8 @@
 ///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -181,14 +180,9 @@ static void buildPartialUnswitchConditionalBranch(BasicBlock &BB,
                                                   BasicBlock &UnswitchedSucc,
                                                   BasicBlock &NormalSucc) {
   IRBuilder<> IRB(&BB);
-  Value *Cond = Invariants.front();
-  for (Value *Invariant :
-       make_range(std::next(Invariants.begin()), Invariants.end()))
-    if (Direction)
-      Cond = IRB.CreateOr(Cond, Invariant);
-    else
-      Cond = IRB.CreateAnd(Cond, Invariant);
-
+  
+  Value *Cond = Direction ? IRB.CreateOr(Invariants) :
+    IRB.CreateAnd(Invariants);
   IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
                    Direction ? &NormalSucc : &UnswitchedSucc);
 }
@@ -268,7 +262,8 @@ static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
 /// loops reachable and need to move the current loop up the loop nest or even
 /// to an entirely separate nest.
 static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
-                                 DominatorTree &DT, LoopInfo &LI) {
+                                 DominatorTree &DT, LoopInfo &LI,
+                                 MemorySSAUpdater *MSSAU) {
   // If the loop is already at the top level, we can't hoist it anywhere.
   Loop *OldParentL = L.getParentLoop();
   if (!OldParentL)
@@ -329,7 +324,8 @@ static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
     // unswitching it is possible to get new non-dedicated exits out of parent
     // loop so let's conservatively form dedicated exit blocks and figure out
     // if we can optimize later.
-    formDedicatedExitBlocks(OldContainingL, &DT, &LI, /*PreserveLCSSA*/ true);
+    formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
+                            /*PreserveLCSSA*/ true);
   }
 }
 
@@ -536,7 +532,10 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
   // If this was full unswitching, we may have changed the nesting relationship
   // for this loop so hoist it to its correct parent if needed.
   if (FullUnswitch)
-    hoistLoopToNewParent(L, *NewPH, DT, LI);
+    hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU);
+
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
 
   LLVM_DEBUG(dbgs() << "    done: unswitching trivial branch...\n");
   ++NumTrivial;
@@ -590,11 +589,13 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
       ExitCaseIndices.push_back(Case.getCaseIndex());
   }
   BasicBlock *DefaultExitBB = nullptr;
+  SwitchInstProfUpdateWrapper::CaseWeightOpt DefaultCaseWeight =
+      SwitchInstProfUpdateWrapper::getSuccessorWeight(SI, 0);
   if (!L.contains(SI.getDefaultDest()) &&
       areLoopExitPHIsLoopInvariant(L, *ParentBB, *SI.getDefaultDest()) &&
-      !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator()))
+      !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator())) {
     DefaultExitBB = SI.getDefaultDest();
-  else if (ExitCaseIndices.empty())
+  } else if (ExitCaseIndices.empty())
     return false;
 
   LLVM_DEBUG(dbgs() << "    unswitching trivial switch...\n");
@@ -618,8 +619,11 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
 
   // Store the exit cases into a separate data structure and remove them from
   // the switch.
-  SmallVector<std::pair<ConstantInt *, BasicBlock *>, 4> ExitCases;
+  SmallVector<std::tuple<ConstantInt *, BasicBlock *,
+                         SwitchInstProfUpdateWrapper::CaseWeightOpt>,
+              4> ExitCases;
   ExitCases.reserve(ExitCaseIndices.size());
+  SwitchInstProfUpdateWrapper SIW(SI);
   // We walk the case indices backwards so that we remove the last case first
   // and don't disrupt the earlier indices.
   for (unsigned Index : reverse(ExitCaseIndices)) {
@@ -629,9 +633,10 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
     if (!ExitL || ExitL->contains(OuterL))
       OuterL = ExitL;
     // Save the value of this case.
-    ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()});
+    auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
+    ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
     // Delete the unswitched cases.
-    SI.removeCase(CaseI);
+    SIW.removeCase(CaseI);
   }
 
   if (SE) {
@@ -669,6 +674,7 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
 
   // Now add the unswitched switch.
   auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
+  SwitchInstProfUpdateWrapper NewSIW(*NewSI);
 
   // Rewrite the IR for the unswitched basic blocks. This requires two steps.
   // First, we split any exit blocks with remaining in-loop predecessors. Then
@@ -696,9 +702,9 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
   }
   // Note that we must use a reference in the for loop so that we update the
   // container.
-  for (auto &CasePair : reverse(ExitCases)) {
+  for (auto &ExitCase : reverse(ExitCases)) {
     // Grab a reference to the exit block in the pair so that we can update it.
-    BasicBlock *ExitBB = CasePair.second;
+    BasicBlock *ExitBB = std::get<1>(ExitCase);
 
     // If this case is the last edge into the exit block, we can simply reuse it
     // as it will no longer be a loop exit. No mapping necessary.
@@ -720,27 +726,39 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
                                                 /*FullUnswitch*/ true);
     }
     // Update the case pair to point to the split block.
-    CasePair.second = SplitExitBB;
+    std::get<1>(ExitCase) = SplitExitBB;
   }
 
   // Now add the unswitched cases. We do this in reverse order as we built them
   // in reverse order.
-  for (auto CasePair : reverse(ExitCases)) {
-    ConstantInt *CaseVal = CasePair.first;
-    BasicBlock *UnswitchedBB = CasePair.second;
+  for (auto &ExitCase : reverse(ExitCases)) {
+    ConstantInt *CaseVal = std::get<0>(ExitCase);
+    BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
 
-    NewSI->addCase(CaseVal, UnswitchedBB);
+    NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
   }
 
   // If the default was unswitched, re-point it and add explicit cases for
   // entering the loop.
   if (DefaultExitBB) {
-    NewSI->setDefaultDest(DefaultExitBB);
+    NewSIW->setDefaultDest(DefaultExitBB);
+    NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
 
     // We removed all the exit cases, so we just copy the cases to the
     // unswitched switch.
-    for (auto Case : SI.cases())
-      NewSI->addCase(Case.getCaseValue(), NewPH);
+    for (const auto &Case : SI.cases())
+      NewSIW.addCase(Case.getCaseValue(), NewPH,
+                     SIW.getSuccessorWeight(Case.getSuccessorIndex()));
+  } else if (DefaultCaseWeight) {
+    // We have to set branch weight of the default case.
+    uint64_t SW = *DefaultCaseWeight;
+    for (const auto &Case : SI.cases()) {
+      auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
+      assert(W &&
+             "case weight must be defined as default case weight is defined");
+      SW += *W;
+    }
+    NewSIW.setSuccessorWeight(0, SW);
   }
 
   // If we ended up with a common successor for every path through the switch
@@ -762,10 +780,10 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
         continue;
       }
       CommonSuccBB->removePredecessor(BB,
-                                      /*DontDeleteUselessPHIs*/ true);
+                                      /*KeepOneInputPHIs*/ true);
     }
     // Now nuke the switch and replace it with a direct branch.
-    SI.eraseFromParent();
+    SIW.eraseFromParent();
     BranchInst::Create(CommonSuccBB, BB);
   } else if (DefaultExitBB) {
     assert(SI.getNumCases() > 0 &&
@@ -775,8 +793,11 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
     // being simple and keeping the number of edges from this switch to
     // successors the same, and avoiding any PHI update complexity.
     auto LastCaseI = std::prev(SI.case_end());
+
     SI.setDefaultDest(LastCaseI->getCaseSuccessor());
-    SI.removeCase(LastCaseI);
+    SIW.setSuccessorWeight(
+        0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
+    SIW.removeCase(LastCaseI);
   }
 
   // Walk the unswitched exit blocks and the unswitched split blocks and update
@@ -789,9 +810,8 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
     DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
   }
   for (auto SplitUnswitchedPair : SplitExitBBMap) {
-    auto *UnswitchedBB = SplitUnswitchedPair.second;
-    DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedBB});
-    DTUpdates.push_back({DT.Insert, OldPH, UnswitchedBB});
+    DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
+    DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
   }
   DT.applyUpdates(DTUpdates);
 
@@ -805,7 +825,10 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
 
   // We may have changed the nesting relationship for this loop so hoist it to
   // its correct parent if needed.
-  hoistLoopToNewParent(L, *NewPH, DT, LI);
+  hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU);
+
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
 
   ++NumTrivial;
   ++NumSwitches;
@@ -848,6 +871,10 @@ static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
     // Check if there are any side-effecting instructions (e.g. stores, calls,
     // volatile loads) in the part of the loop that the code *would* execute
     // without unswitching.
+    if (MSSAU) // Possible early exit with MSSA
+      if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
+        if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
+          return Changed;
     if (llvm::any_of(*CurrentBB,
                      [](Instruction &I) { return I.mayHaveSideEffects(); }))
       return Changed;
@@ -1066,7 +1093,7 @@ static BasicBlock *buildClonedLoopBlocks(
       continue;
 
     ClonedSuccBB->removePredecessor(ClonedParentBB,
-                                    /*DontDeleteUselessPHIs*/ true);
+                                    /*KeepOneInputPHIs*/ true);
   }
 
   // Replace the cloned branch with an unconditional branch to the cloned
@@ -1436,8 +1463,8 @@ deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
 
   // Remove all MemorySSA in the dead blocks
   if (MSSAU) {
-    SmallPtrSet<BasicBlock *, 16> DeadBlockSet(DeadBlocks.begin(),
-                                               DeadBlocks.end());
+    SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
+                                                 DeadBlocks.end());
     MSSAU->removeBlocks(DeadBlockSet);
   }
 
@@ -1455,7 +1482,7 @@ static void deleteDeadBlocksFromLoop(Loop &L,
                                      MemorySSAUpdater *MSSAU) {
   // Find all the dead blocks tied to this loop, and remove them from their
   // successors.
-  SmallPtrSet<BasicBlock *, 16> DeadBlockSet;
+  SmallSetVector<BasicBlock *, 8> DeadBlockSet;
 
   // Start with loop/exit blocks and get a transitive closure of reachable dead
   // blocks.
@@ -1712,10 +1739,9 @@ static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
 
   // Sort the exits in ascending loop depth, we'll work backwards across these
   // to process them inside out.
-  std::stable_sort(ExitsInLoops.begin(), ExitsInLoops.end(),
-                   [&](BasicBlock *LHS, BasicBlock *RHS) {
-                     return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
-                   });
+  llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
+    return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
+  });
 
   // We'll build up a set for each exit loop.
   SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
@@ -2075,7 +2101,7 @@ static void unswitchNontrivialInvariants(
              "Only one possible unswitched block for a branch!");
       BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
       UnswitchedSuccBB->removePredecessor(ParentBB,
-                                          /*DontDeleteUselessPHIs*/ true);
+                                          /*KeepOneInputPHIs*/ true);
       DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
     } else {
       // Note that we actually want to remove the parent block as a predecessor
@@ -2090,7 +2116,7 @@ static void unswitchNontrivialInvariants(
       for (auto &Case : NewSI->cases())
         Case.getCaseSuccessor()->removePredecessor(
             ParentBB,
-            /*DontDeleteUselessPHIs*/ true);
+            /*KeepOneInputPHIs*/ true);
 
       // We need to use the set to populate domtree updates as even when there
       // are multiple cases pointing at the same successor we only want to
@@ -2236,7 +2262,7 @@ static void unswitchNontrivialInvariants(
     // introduced new, non-dedicated exits. At least try to re-form dedicated
     // exits for these loops. This may fail if they couldn't have dedicated
     // exits to start with.
-    formDedicatedExitBlocks(&UpdateL, &DT, &LI, /*PreserveLCSSA*/ true);
+    formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
   };
 
   // For non-child cloned loops and hoisted loops, we just need to update LCSSA
@@ -2526,7 +2552,7 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
       // We can only consider fully loop-invariant switch conditions as we need
       // to completely eliminate the switch after unswitching.
       if (!isa<Constant>(SI->getCondition()) &&
-          L.isLoopInvariant(SI->getCondition()))
+          L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
         UnswitchCandidates.push_back({SI, {SI->getCondition()}});
       continue;
     }
@@ -2852,7 +2878,11 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
   // Historically this pass has had issues with the dominator tree so verify it
   // in asserts builds.
   assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
-  return getLoopPassPreservedAnalyses();
+
+  auto PA = getLoopPassPreservedAnalyses();
+  if (EnableMSSALoopDependency)
+    PA.preserve<MemorySSAAnalysis>();
+  return PA;
 }
 
 namespace {
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index b7b1db76b492..4544975a4887 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -1,9 +1,8 @@
 //===- SimplifyCFGPass.cpp - CFG Simplification Pass ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index c99da8f0737a..90f3a2aa46e1 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -1,9 +1,8 @@
 //===-- Sink.cpp - Code Sinking -------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
index c0f75ddddbe0..c13fb3e04516 100644
--- a/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
+++ b/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
@@ -1,9 +1,8 @@
 //===- SpeculateAroundPHIs.cpp --------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -68,6 +67,14 @@ isSafeToSpeculatePHIUsers(PHINode &PN, DominatorTree &DT,
       return false;
     }
 
+    if (auto CS = ImmutableCallSite(UI)) {
+      if (CS.isConvergent() || CS.cannotDuplicate()) {
+        LLVM_DEBUG(dbgs() << "  Unsafe: convergent "
+                   "callsite cannot de duplicated: " << *UI << '\n');
+        return false;
+      }
+    }
+
     // FIXME: This check is much too conservative. We're not going to move these
     // instructions onto new dynamic paths through the program unless there is
     // a call instruction between the use and the PHI node. And memory isn't
diff --git a/lib/Transforms/Scalar/SpeculativeExecution.cpp b/lib/Transforms/Scalar/SpeculativeExecution.cpp
index f5e1dd6ed850..f9d027eb4a3b 100644
--- a/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -1,9 +1,8 @@
 //===- SpeculativeExecution.cpp ---------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -241,6 +240,7 @@ static unsigned ComputeSpeculationCost(const Instruction *I,
     case Instruction::FMul:
     case Instruction::FDiv:
     case Instruction::FRem:
+    case Instruction::FNeg:
     case Instruction::ICmp:
     case Instruction::FCmp:
       return TTI.getUserCost(I);
diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index b5089b006bdd..a58c32cc5894 100644
--- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -1,9 +1,8 @@
 //===- StraightLineStrengthReduce.cpp - -----------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -683,9 +682,13 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
         // Canonicalize bump to pointer size.
         Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy);
         if (InBounds)
-          Reduced = Builder.CreateInBoundsGEP(nullptr, Basis.Ins, Bump);
+          Reduced = Builder.CreateInBoundsGEP(
+              cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(),
+              Basis.Ins, Bump);
         else
-          Reduced = Builder.CreateGEP(nullptr, Basis.Ins, Bump);
+          Reduced = Builder.CreateGEP(
+              cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(),
+              Basis.Ins, Bump);
       }
       break;
     }
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 0db762d846f2..e5400676c7e8 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -1,9 +1,8 @@
 //===- StructurizeCFG.cpp -------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -63,6 +62,11 @@ static cl::opt<bool> ForceSkipUniformRegions(
   cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
   cl::init(false));
 
+static cl::opt<bool>
+    RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
+                          cl::desc("Allow relaxed uniform region checks"),
+                          cl::init(false));
+
 // Definition of the complex types used in this pass.
 
 using BBValuePair = std::pair<BasicBlock *, Value *>;
@@ -624,11 +628,8 @@ void StructurizeCFG::setPhiValues() {
       if (!Dominator.resultIsRememberedBlock())
         Updater.AddAvailableValue(Dominator.result(), Undef);
 
-      for (BasicBlock *FI : From) {
-        int Idx = Phi->getBasicBlockIndex(FI);
-        assert(Idx != -1);
-        Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(FI));
-      }
+      for (BasicBlock *FI : From)
+        Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
     }
 
     DeletedPhis.erase(To);
@@ -937,6 +938,11 @@ void StructurizeCFG::rebuildSSA() {
 
 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
                                    const LegacyDivergenceAnalysis &DA) {
+  // Bool for if all sub-regions are uniform.
+  bool SubRegionsAreUniform = true;
+  // Count of how many direct children are conditional.
+  unsigned ConditionalDirectChildren = 0;
+
   for (auto E : R->elements()) {
     if (!E->isSubRegion()) {
       auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
@@ -945,6 +951,10 @@ static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
 
       if (!DA.isUniform(Br))
         return false;
+
+      // One of our direct children is conditional.
+      ConditionalDirectChildren++;
+
       LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
                         << " has uniform terminator\n");
     } else {
@@ -962,12 +972,25 @@ static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
         if (!Br || !Br->isConditional())
           continue;
 
-        if (!Br->getMetadata(UniformMDKindID))
-          return false;
+        if (!Br->getMetadata(UniformMDKindID)) {
+          // Early exit if we cannot have relaxed uniform regions.
+          if (!RelaxedUniformRegions)
+            return false;
+
+          SubRegionsAreUniform = false;
+          break;
+        }
       }
     }
   }
-  return true;
+
+  // Our region is uniform if:
+  // 1. All conditional branches that are direct children are uniform (checked
+  // above).
+  // 2. And either:
+  //   a. All sub-regions are uniform.
+  //   b. There is one or less conditional branches among the direct children.
+  return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
 }
 
 /// Run the transformation for each region found
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 0f6db21f73b6..f0b79079d817 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -1,9 +1,8 @@
 //===- TailRecursionElimination.cpp - Eliminate Tail Calls ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -56,6 +55,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -69,7 +69,6 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstIterator.h"
@@ -341,7 +340,7 @@ static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) {
       // being loaded from.
       const DataLayout &DL = L->getModule()->getDataLayout();
       if (isModSet(AA->getModRefInfo(CI, MemoryLocation::get(L))) ||
-          !isSafeToLoadUnconditionally(L->getPointerOperand(),
+          !isSafeToLoadUnconditionally(L->getPointerOperand(), L->getType(),
                                        L->getAlignment(), DL, L))
         return false;
     }
@@ -679,7 +678,7 @@ static bool eliminateRecursiveTailCall(
 
   BB->getInstList().erase(Ret);  // Remove return.
   BB->getInstList().erase(CI);   // Remove call.
-  DTU.insertEdge(BB, OldEntry);
+  DTU.applyUpdates({{DominatorTree::Insert, BB, OldEntry}});
   ++NumEliminated;
   return true;
 }
diff --git a/lib/Transforms/Scalar/WarnMissedTransforms.cpp b/lib/Transforms/Scalar/WarnMissedTransforms.cpp
index 80f761e53774..707adf46d1f4 100644
--- a/lib/Transforms/Scalar/WarnMissedTransforms.cpp
+++ b/lib/Transforms/Scalar/WarnMissedTransforms.cpp
@@ -1,9 +1,8 @@
 //===- LoopTransformWarning.cpp -  ----------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -93,7 +92,7 @@ PreservedAnalyses
 WarnMissedTransformationsPass::run(Function &F, FunctionAnalysisManager &AM) {
   // Do not warn about not applied transformations if optimizations are
   // disabled.
-  if (F.hasFnAttribute(Attribute::OptimizeNone))
+  if (F.hasOptNone())
     return PreservedAnalyses::all();
 
   auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
diff --git a/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
index 364878dc588d..01912297324a 100644
--- a/lib/Transforms/Utils/ASanStackFrameLayout.cpp
+++ b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -1,9 +1,8 @@
 //===-- ASanStackFrameLayout.cpp - helper for AddressSanitizer ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -63,7 +62,7 @@ ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars,
   for (size_t i = 0; i < NumVars; i++)
     Vars[i].Alignment = std::max(Vars[i].Alignment, kMinAlignment);
 
-  std::stable_sort(Vars.begin(), Vars.end(), CompareVars);
+  llvm::stable_sort(Vars, CompareVars);
 
   ASanStackFrameLayout Layout;
   Layout.Granularity = Granularity;
diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp
index 564537af0c2a..ee0973002c47 100644
--- a/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -1,9 +1,8 @@
 //===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -209,7 +208,7 @@ static bool addDiscriminators(Function &F) {
       // Only the lowest 7 bits are used to represent a discriminator to fit
       // it in 1 byte ULEB128 representation.
       unsigned Discriminator = R.second ? ++LDM[L] : LDM[L];
-      auto NewDIL = DIL->setBaseDiscriminator(Discriminator);
+      auto NewDIL = DIL->cloneWithBaseDiscriminator(Discriminator);
       if (!NewDIL) {
         LLVM_DEBUG(dbgs() << "Could not encode discriminator: "
                           << DIL->getFilename() << ":" << DIL->getLine() << ":"
@@ -246,7 +245,7 @@ static bool addDiscriminators(Function &F) {
           std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
       if (!CallLocations.insert(L).second) {
         unsigned Discriminator = ++LDM[L];
-        auto NewDIL = CurrentDIL->setBaseDiscriminator(Discriminator);
+        auto NewDIL = CurrentDIL->cloneWithBaseDiscriminator(Discriminator);
         if (!NewDIL) {
           LLVM_DEBUG(dbgs()
                      << "Could not encode discriminator: "
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 7da768252fc1..5fa371377c85 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -1,9 +1,8 @@
 //===- BasicBlockUtils.cpp - BasicBlock Utilities --------------------------==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,6 +17,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/MemorySSAUpdater.h"
@@ -26,7 +26,6 @@
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
@@ -39,6 +38,8 @@
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <cassert>
 #include <cstdint>
@@ -48,30 +49,20 @@
 
 using namespace llvm;
 
-void llvm::DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU) {
-  SmallVector<BasicBlock *, 1> BBs = {BB};
-  DeleteDeadBlocks(BBs, DTU);
-}
-
-void llvm::DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs,
-                            DomTreeUpdater *DTU) {
-#ifndef NDEBUG
-  // Make sure that all predecessors of each dead block is also dead.
-  SmallPtrSet<BasicBlock *, 4> Dead(BBs.begin(), BBs.end());
-  assert(Dead.size() == BBs.size() && "Duplicating blocks?");
-  for (auto *BB : Dead)
-    for (BasicBlock *Pred : predecessors(BB))
-      assert(Dead.count(Pred) && "All predecessors must be dead!");
-#endif
+#define DEBUG_TYPE "basicblock-utils"
 
-  SmallVector<DominatorTree::UpdateType, 4> Updates;
+void llvm::DetatchDeadBlocks(
+    ArrayRef<BasicBlock *> BBs,
+    SmallVectorImpl<DominatorTree::UpdateType> *Updates,
+    bool KeepOneInputPHIs) {
   for (auto *BB : BBs) {
     // Loop through all of our successors and make sure they know that one
     // of their predecessors is going away.
+    SmallPtrSet<BasicBlock *, 4> UniqueSuccessors;
     for (BasicBlock *Succ : successors(BB)) {
-      Succ->removePredecessor(BB);
-      if (DTU)
-        Updates.push_back({DominatorTree::Delete, BB, Succ});
+      Succ->removePredecessor(BB, KeepOneInputPHIs);
+      if (Updates && UniqueSuccessors.insert(Succ).second)
+        Updates->push_back({DominatorTree::Delete, BB, Succ});
     }
 
     // Zap all the instructions in the block.
@@ -92,8 +83,29 @@ void llvm::DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs,
            "The successor list of BB isn't empty before "
            "applying corresponding DTU updates.");
   }
+}
+
+void llvm::DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU,
+                           bool KeepOneInputPHIs) {
+  DeleteDeadBlocks({BB}, DTU, KeepOneInputPHIs);
+}
+
+void llvm::DeleteDeadBlocks(ArrayRef <BasicBlock *> BBs, DomTreeUpdater *DTU,
+                            bool KeepOneInputPHIs) {
+#ifndef NDEBUG
+  // Make sure that all predecessors of each dead block is also dead.
+  SmallPtrSet<BasicBlock *, 4> Dead(BBs.begin(), BBs.end());
+  assert(Dead.size() == BBs.size() && "Duplicating blocks?");
+  for (auto *BB : Dead)
+    for (BasicBlock *Pred : predecessors(BB))
+      assert(Dead.count(Pred) && "All predecessors must be dead!");
+#endif
+
+  SmallVector<DominatorTree::UpdateType, 4> Updates;
+  DetatchDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs);
+
   if (DTU)
-    DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+    DTU->applyUpdatesPermissive(Updates);
 
   for (BasicBlock *BB : BBs)
     if (DTU)
@@ -102,6 +114,28 @@ void llvm::DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs,
       BB->eraseFromParent();
 }
 
+bool llvm::EliminateUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
+                                      bool KeepOneInputPHIs) {
+  df_iterator_default_set<BasicBlock*> Reachable;
+
+  // Mark all reachable blocks.
+  for (BasicBlock *BB : depth_first_ext(&F, Reachable))
+    (void)BB/* Mark all reachable blocks */;
+
+  // Collect all dead blocks.
+  std::vector<BasicBlock*> DeadBlocks;
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+    if (!Reachable.count(&*I)) {
+      BasicBlock *BB = &*I;
+      DeadBlocks.push_back(BB);
+    }
+
+  // Delete the dead blocks.
+  DeleteDeadBlocks(DeadBlocks, DTU, KeepOneInputPHIs);
+
+  return !DeadBlocks.empty();
+}
+
 void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
                                    MemoryDependenceResults *MemDep) {
   if (!isa<PHINode>(BB->begin())) return;
@@ -160,6 +194,9 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
       if (IncValue == &PN)
         return false;
 
+  LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into "
+                    << PredBB->getName() << "\n");
+
   // Begin by getting rid of unneeded PHIs.
   SmallVector<AssertingVH<Value>, 4> IncomingValues;
   if (isa<PHINode>(BB->front())) {
@@ -175,11 +212,19 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
   std::vector<DominatorTree::UpdateType> Updates;
   if (DTU) {
     Updates.reserve(1 + (2 * succ_size(BB)));
-    Updates.push_back({DominatorTree::Delete, PredBB, BB});
-    for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+    // Add insert edges first. Experimentally, for the particular case of two
+    // blocks that can be merged, with a single successor and single predecessor
+    // respectively, it is beneficial to have all insert updates first. Deleting
+    // edges first may lead to unreachable blocks, followed by inserting edges
+    // making the blocks reachable again. Such DT updates lead to high compile
+    // times. We add inserts before deletes here to reduce compile time.
+    for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
+      // This successor of BB may already have PredBB as a predecessor.
+      if (llvm::find(successors(PredBB), *I) == succ_end(PredBB))
+        Updates.push_back({DominatorTree::Insert, PredBB, *I});
+    for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
       Updates.push_back({DominatorTree::Delete, BB, *I});
-      Updates.push_back({DominatorTree::Insert, PredBB, *I});
-    }
+    Updates.push_back({DominatorTree::Delete, PredBB, BB});
   }
 
   if (MSSAU)
@@ -227,7 +272,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
            isa<UnreachableInst>(BB->getTerminator()) &&
            "The successor list of BB isn't empty before "
            "applying corresponding DTU updates.");
-    DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+    DTU->applyUpdatesPermissive(Updates);
     DTU->deleteBB(BB);
   }
 
@@ -534,7 +579,13 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
 
   // The new block unconditionally branches to the old block.
   BranchInst *BI = BranchInst::Create(BB, NewBB);
-  BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
+  // Splitting the predecessors of a loop header creates a preheader block.
+  if (LI && LI->isLoopHeader(BB))
+    // Using the loop start line number prevents debuggers stepping into the
+    // loop body for this instruction.
+    BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc());
+  else
+    BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
 
   // Move the edges from Preds to point to NewBB instead of BB.
   for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
@@ -543,6 +594,8 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
     // all BlockAddress uses would need to be updated.
     assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
            "Cannot split an edge from an IndirectBrInst");
+    assert(!isa<CallBrInst>(Preds[i]->getTerminator()) &&
+           "Cannot split an edge from a CallBrInst");
     Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
   }
 
@@ -711,7 +764,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
   UncondBranch->eraseFromParent();
 
   if (DTU)
-    DTU->deleteEdge(Pred, BB);
+    DTU->applyUpdates({{DominatorTree::Delete, Pred, BB}});
 
   return cast<ReturnInst>(NewRet);
 }
@@ -720,18 +773,23 @@ Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
                                              Instruction *SplitBefore,
                                              bool Unreachable,
                                              MDNode *BranchWeights,
-                                             DominatorTree *DT, LoopInfo *LI) {
+                                             DominatorTree *DT, LoopInfo *LI,
+                                             BasicBlock *ThenBlock) {
   BasicBlock *Head = SplitBefore->getParent();
   BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
   Instruction *HeadOldTerm = Head->getTerminator();
   LLVMContext &C = Head->getContext();
-  BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
   Instruction *CheckTerm;
-  if (Unreachable)
-    CheckTerm = new UnreachableInst(C, ThenBlock);
-  else
-    CheckTerm = BranchInst::Create(Tail, ThenBlock);
-  CheckTerm->setDebugLoc(SplitBefore->getDebugLoc());
+  bool CreateThenBlock = (ThenBlock == nullptr);
+  if (CreateThenBlock) {
+    ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+    if (Unreachable)
+      CheckTerm = new UnreachableInst(C, ThenBlock);
+    else
+      CheckTerm = BranchInst::Create(Tail, ThenBlock);
+    CheckTerm->setDebugLoc(SplitBefore->getDebugLoc());
+  } else
+    CheckTerm = ThenBlock->getTerminator();
   BranchInst *HeadNewTerm =
     BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cond);
   HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
@@ -746,7 +804,10 @@ Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
         DT->changeImmediateDominator(Child, NewNode);
 
       // Head dominates ThenBlock.
-      DT->addNewBlock(ThenBlock, Head);
+      if (CreateThenBlock)
+        DT->addNewBlock(ThenBlock, Head);
+      else
+        DT->changeImmediateDominator(ThenBlock, Head);
     }
   }
 
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index fafc9aaba5c9..f5e4b53f6d97 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -1,9 +1,8 @@
 //===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -24,6 +23,7 @@
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/PostDominators.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
@@ -49,10 +49,14 @@ namespace {
     bool runOnFunction(Function &F) override {
       auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
       auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+
+      auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
+      auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
+
       auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
       auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
       unsigned N =
-          SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI));
+          SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI, nullptr, PDT));
       NumBroken += N;
       return N > 0;
     }
@@ -145,6 +149,14 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
   // it in this generic function.
   if (DestBB->isEHPad()) return nullptr;
 
+  // Don't split the non-fallthrough edge from a callbr.
+  if (isa<CallBrInst>(TI) && SuccNum > 0)
+    return nullptr;
+
+  if (Options.IgnoreUnreachableDests &&
+      isa<UnreachableInst>(DestBB->getFirstNonPHIOrDbgOrLifetime()))
+    return nullptr;
+
   // Create a new basic block, linking it into the CFG.
   BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
                       TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
@@ -189,7 +201,7 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
       if (TI->getSuccessor(i) != DestBB) continue;
 
       // Remove an entry for TIBB from DestBB phi nodes.
-      DestBB->removePredecessor(TIBB, Options.DontDeleteUselessPHIs);
+      DestBB->removePredecessor(TIBB, Options.KeepOneInputPHIs);
 
       // We found another edge to DestBB, go to NewBB instead.
       TI->setSuccessor(i, NewBB);
@@ -198,16 +210,17 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
 
   // If we have nothing to update, just return.
   auto *DT = Options.DT;
+  auto *PDT = Options.PDT;
   auto *LI = Options.LI;
   auto *MSSAU = Options.MSSAU;
   if (MSSAU)
     MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
         DestBB, NewBB, {TIBB}, Options.MergeIdenticalEdges);
 
-  if (!DT && !LI)
+  if (!DT && !PDT && !LI)
     return NewBB;
 
-  if (DT) {
+  if (DT || PDT) {
     // Update the DominatorTree.
     //       ---> NewBB -----\
     //      /                 V
@@ -223,7 +236,10 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
     if (llvm::find(successors(TIBB), DestBB) == succ_end(TIBB))
       Updates.push_back({DominatorTree::Delete, TIBB, DestBB});
 
-    DT->applyUpdates(Updates);
+    if (DT)
+      DT->applyUpdates(Updates);
+    if (PDT)
+      PDT->applyUpdates(Updates);
   }
 
   // Update LoopInfo if it is around.
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 3466dedd3236..27f110e24f9c 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1,9 +1,8 @@
 //===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -23,6 +22,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 
 using namespace llvm;
 
@@ -121,6 +121,13 @@ static bool setNonLazyBind(Function &F) {
   return true;
 }
 
+static bool setDoesNotFreeMemory(Function &F) {
+  if (F.hasFnAttribute(Attribute::NoFree))
+    return false;
+  F.addFnAttr(Attribute::NoFree);
+  return true;
+}
+
 bool llvm::inferLibFuncAttributes(Module *M, StringRef Name,
                                   const TargetLibraryInfo &TLI) {
   Function *F = M->getFunction(Name);
@@ -136,6 +143,9 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
 
   bool Changed = false;
 
+  if(!isLibFreeFunction(&F, TheLibFunc) && !isReallocLikeFn(&F,  &TLI))
+    Changed |= setDoesNotFreeMemory(F);
+
   if (F.getParent() != nullptr && F.getParent()->getRtLibUseGOT())
     Changed |= setNonLazyBind(F);
 
@@ -790,95 +800,76 @@ Value *llvm::castToCStr(Value *V, IRBuilder<> &B) {
   return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr");
 }
 
-Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
-                        const TargetLibraryInfo *TLI) {
-  if (!TLI->has(LibFunc_strlen))
+static Value *emitLibCall(LibFunc TheLibFunc, Type *ReturnType,
+                          ArrayRef<Type *> ParamTypes,
+                          ArrayRef<Value *> Operands, IRBuilder<> &B,
+                          const TargetLibraryInfo *TLI,
+                          bool IsVaArgs = false) {
+  if (!TLI->has(TheLibFunc))
     return nullptr;
 
   Module *M = B.GetInsertBlock()->getModule();
-  StringRef StrlenName = TLI->getName(LibFunc_strlen);
-  LLVMContext &Context = B.GetInsertBlock()->getContext();
-  Constant *StrLen = M->getOrInsertFunction(StrlenName, DL.getIntPtrType(Context),
-                                            B.getInt8PtrTy());
-  inferLibFuncAttributes(M, StrlenName, *TLI);
-  CallInst *CI = B.CreateCall(StrLen, castToCStr(Ptr, B), StrlenName);
-  if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
+  StringRef FuncName = TLI->getName(TheLibFunc);
+  FunctionType *FuncType = FunctionType::get(ReturnType, ParamTypes, IsVaArgs);
+  FunctionCallee Callee = M->getOrInsertFunction(FuncName, FuncType);
+  inferLibFuncAttributes(M, FuncName, *TLI);
+  CallInst *CI = B.CreateCall(Callee, Operands, FuncName);
+  if (const Function *F =
+          dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
-
   return CI;
 }
 
-Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
                         const TargetLibraryInfo *TLI) {
-  if (!TLI->has(LibFunc_strchr))
-    return nullptr;
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  return emitLibCall(LibFunc_strlen, DL.getIntPtrType(Context),
+                     B.getInt8PtrTy(), castToCStr(Ptr, B), B, TLI);
+}
 
-  Module *M = B.GetInsertBlock()->getModule();
-  StringRef StrChrName = TLI->getName(LibFunc_strchr);
+Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+                        const TargetLibraryInfo *TLI) {
   Type *I8Ptr = B.getInt8PtrTy();
   Type *I32Ty = B.getInt32Ty();
-  Constant *StrChr =
-      M->getOrInsertFunction(StrChrName, I8Ptr, I8Ptr, I32Ty);
-  inferLibFuncAttributes(M, StrChrName, *TLI);
-  CallInst *CI = B.CreateCall(
-      StrChr, {castToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, StrChrName);
-  if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-  return CI;
+  return emitLibCall(LibFunc_strchr, I8Ptr, {I8Ptr, I32Ty},
+                     {castToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, B, TLI);
 }
 
 Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
                          const DataLayout &DL, const TargetLibraryInfo *TLI) {
-  if (!TLI->has(LibFunc_strncmp))
-    return nullptr;
-
-  Module *M = B.GetInsertBlock()->getModule();
-  StringRef StrNCmpName = TLI->getName(LibFunc_strncmp);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
-  Value *StrNCmp = M->getOrInsertFunction(StrNCmpName, B.getInt32Ty(),
-                                          B.getInt8PtrTy(), B.getInt8PtrTy(),
-                                          DL.getIntPtrType(Context));
-  inferLibFuncAttributes(M, StrNCmpName, *TLI);
-  CallInst *CI = B.CreateCall(
-      StrNCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, StrNCmpName);
-
-  if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-
-  return CI;
+  return emitLibCall(
+      LibFunc_strncmp, B.getInt32Ty(),
+      {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)},
+      {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
 }
 
 Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
-                        const TargetLibraryInfo *TLI, StringRef Name) {
-  if (!TLI->has(LibFunc_strcpy))
-    return nullptr;
+                        const TargetLibraryInfo *TLI) {
+  Type *I8Ptr = B.getInt8PtrTy();
+  return emitLibCall(LibFunc_strcpy, I8Ptr, {I8Ptr, I8Ptr},
+                     {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI);
+}
 
-  Module *M = B.GetInsertBlock()->getModule();
+Value *llvm::emitStpCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+                        const TargetLibraryInfo *TLI) {
   Type *I8Ptr = B.getInt8PtrTy();
-  Value *StrCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr);
-  inferLibFuncAttributes(M, Name, *TLI);
-  CallInst *CI =
-      B.CreateCall(StrCpy, {castToCStr(Dst, B), castToCStr(Src, B)}, Name);
-  if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-  return CI;
+  return emitLibCall(LibFunc_stpcpy, I8Ptr, {I8Ptr, I8Ptr},
+                     {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI);
 }
 
 Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
-                         const TargetLibraryInfo *TLI, StringRef Name) {
-  if (!TLI->has(LibFunc_strncpy))
-    return nullptr;
+                         const TargetLibraryInfo *TLI) {
+  Type *I8Ptr = B.getInt8PtrTy();
+  return emitLibCall(LibFunc_strncpy, I8Ptr, {I8Ptr, I8Ptr, Len->getType()},
+                     {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI);
+}
 
-  Module *M = B.GetInsertBlock()->getModule();
+Value *llvm::emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
+                         const TargetLibraryInfo *TLI) {
   Type *I8Ptr = B.getInt8PtrTy();
-  Value *StrNCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr,
-                                          Len->getType());
-  inferLibFuncAttributes(M, Name, *TLI);
-  CallInst *CI = B.CreateCall(
-      StrNCpy, {castToCStr(Dst, B), castToCStr(Src, B), Len}, Name);
-  if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-  return CI;
+  return emitLibCall(LibFunc_stpncpy, I8Ptr, {I8Ptr, I8Ptr, Len->getType()},
+                     {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI);
 }
 
 Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
@@ -892,57 +883,115 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
   AS = AttributeList::get(M->getContext(), AttributeList::FunctionIndex,
                           Attribute::NoUnwind);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
-  Value *MemCpy = M->getOrInsertFunction(
+  FunctionCallee MemCpy = M->getOrInsertFunction(
       "__memcpy_chk", AttributeList::get(M->getContext(), AS), B.getInt8PtrTy(),
       B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context),
       DL.getIntPtrType(Context));
   Dst = castToCStr(Dst, B);
   Src = castToCStr(Src, B);
   CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize});
-  if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts()))
+  if (const Function *F =
+          dyn_cast<Function>(MemCpy.getCallee()->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
   return CI;
 }
 
 Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
                         const DataLayout &DL, const TargetLibraryInfo *TLI) {
-  if (!TLI->has(LibFunc_memchr))
-    return nullptr;
-
-  Module *M = B.GetInsertBlock()->getModule();
-  StringRef MemChrName = TLI->getName(LibFunc_memchr);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
-  Value *MemChr = M->getOrInsertFunction(MemChrName, B.getInt8PtrTy(),
-                                         B.getInt8PtrTy(), B.getInt32Ty(),
-                                         DL.getIntPtrType(Context));
-  inferLibFuncAttributes(M, MemChrName, *TLI);
-  CallInst *CI = B.CreateCall(MemChr, {castToCStr(Ptr, B), Val, Len}, MemChrName);
-
-  if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-
-  return CI;
+  return emitLibCall(
+      LibFunc_memchr, B.getInt8PtrTy(),
+      {B.getInt8PtrTy(), B.getInt32Ty(), DL.getIntPtrType(Context)},
+      {castToCStr(Ptr, B), Val, Len}, B, TLI);
 }
 
 Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
                         const DataLayout &DL, const TargetLibraryInfo *TLI) {
-  if (!TLI->has(LibFunc_memcmp))
-    return nullptr;
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  return emitLibCall(
+      LibFunc_memcmp, B.getInt32Ty(),
+      {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)},
+      {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
+}
 
-  Module *M = B.GetInsertBlock()->getModule();
-  StringRef MemCmpName = TLI->getName(LibFunc_memcmp);
+Value *llvm::emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+                      const DataLayout &DL, const TargetLibraryInfo *TLI) {
   LLVMContext &Context = B.GetInsertBlock()->getContext();
-  Value *MemCmp = M->getOrInsertFunction(MemCmpName, B.getInt32Ty(),
-                                         B.getInt8PtrTy(), B.getInt8PtrTy(),
-                                         DL.getIntPtrType(Context));
-  inferLibFuncAttributes(M, MemCmpName, *TLI);
-  CallInst *CI = B.CreateCall(
-      MemCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, MemCmpName);
-
-  if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
+  return emitLibCall(
+      LibFunc_bcmp, B.getInt32Ty(),
+      {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)},
+      {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
+}
 
-  return CI;
+Value *llvm::emitMemCCpy(Value *Ptr1, Value *Ptr2, Value *Val, Value *Len,
+                         IRBuilder<> &B, const TargetLibraryInfo *TLI) {
+  return emitLibCall(
+      LibFunc_memccpy, B.getInt8PtrTy(),
+      {B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt32Ty(), Len->getType()},
+      {Ptr1, Ptr2, Val, Len}, B, TLI);
+}
+
+Value *llvm::emitSNPrintf(Value *Dest, Value *Size, Value *Fmt,
+                          ArrayRef<Value *> VariadicArgs, IRBuilder<> &B,
+                          const TargetLibraryInfo *TLI) {
+  SmallVector<Value *, 8> Args{castToCStr(Dest, B), Size, castToCStr(Fmt, B)};
+  Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end());
+  return emitLibCall(LibFunc_snprintf, B.getInt32Ty(),
+                     {B.getInt8PtrTy(), Size->getType(), B.getInt8PtrTy()},
+                     Args, B, TLI, /*IsVaArgs=*/true);
+}
+
+Value *llvm::emitSPrintf(Value *Dest, Value *Fmt,
+                         ArrayRef<Value *> VariadicArgs, IRBuilder<> &B,
+                         const TargetLibraryInfo *TLI) {
+  SmallVector<Value *, 8> Args{castToCStr(Dest, B), castToCStr(Fmt, B)};
+  Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end());
+  return emitLibCall(LibFunc_sprintf, B.getInt32Ty(),
+                     {B.getInt8PtrTy(), B.getInt8PtrTy()}, Args, B, TLI,
+                     /*IsVaArgs=*/true);
+}
+
+Value *llvm::emitStrCat(Value *Dest, Value *Src, IRBuilder<> &B,
+                        const TargetLibraryInfo *TLI) {
+  return emitLibCall(LibFunc_strcat, B.getInt8PtrTy(),
+                     {B.getInt8PtrTy(), B.getInt8PtrTy()},
+                     {castToCStr(Dest, B), castToCStr(Src, B)}, B, TLI);
+}
+
+Value *llvm::emitStrLCpy(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+                         const TargetLibraryInfo *TLI) {
+  return emitLibCall(LibFunc_strlcpy, Size->getType(),
+                     {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()},
+                     {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+}
+
+Value *llvm::emitStrLCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+                         const TargetLibraryInfo *TLI) {
+  return emitLibCall(LibFunc_strlcat, Size->getType(),
+                     {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()},
+                     {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+}
+
+Value *llvm::emitStrNCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+                         const TargetLibraryInfo *TLI) {
+  return emitLibCall(LibFunc_strncat, B.getInt8PtrTy(),
+                     {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()},
+                     {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+}
+
+Value *llvm::emitVSNPrintf(Value *Dest, Value *Size, Value *Fmt, Value *VAList,
+                           IRBuilder<> &B, const TargetLibraryInfo *TLI) {
+  return emitLibCall(
+      LibFunc_vsnprintf, B.getInt32Ty(),
+      {B.getInt8PtrTy(), Size->getType(), B.getInt8PtrTy(), VAList->getType()},
+      {castToCStr(Dest, B), Size, castToCStr(Fmt, B), VAList}, B, TLI);
+}
+
+Value *llvm::emitVSPrintf(Value *Dest, Value *Fmt, Value *VAList,
+                          IRBuilder<> &B, const TargetLibraryInfo *TLI) {
+  return emitLibCall(LibFunc_vsprintf, B.getInt32Ty(),
+                     {B.getInt8PtrTy(), B.getInt8PtrTy(), VAList->getType()},
+                     {castToCStr(Dest, B), castToCStr(Fmt, B), VAList}, B, TLI);
 }
 
 /// Append a suffix to the function name according to the type of 'Op'.
@@ -966,8 +1015,8 @@ static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name,
   assert((Name != "") && "Must specify Name to emitUnaryFloatFnCall");
 
   Module *M = B.GetInsertBlock()->getModule();
-  Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
-                                         Op->getType());
+  FunctionCallee Callee =
+      M->getOrInsertFunction(Name, Op->getType(), Op->getType());
   CallInst *CI = B.CreateCall(Callee, Op, Name);
 
   // The incoming attribute set may have come from a speculatable intrinsic, but
@@ -976,7 +1025,8 @@ static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name,
   CI->setAttributes(Attrs.removeAttribute(B.getContext(),
                                           AttributeList::FunctionIndex,
                                           Attribute::Speculatable));
-  if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+  if (const Function *F =
+          dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
 
   return CI;
@@ -1009,11 +1059,12 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
   appendTypeSuffix(Op1, Name, NameBuffer);
 
   Module *M = B.GetInsertBlock()->getModule();
-  Value *Callee = M->getOrInsertFunction(Name, Op1->getType(), Op1->getType(),
-                                         Op2->getType());
+  FunctionCallee Callee = M->getOrInsertFunction(
+      Name, Op1->getType(), Op1->getType(), Op2->getType());
   CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name);
   CI->setAttributes(Attrs);
-  if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+  if (const Function *F =
+          dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
 
   return CI;
@@ -1026,7 +1077,8 @@ Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B,
 
   Module *M = B.GetInsertBlock()->getModule();
   StringRef PutCharName = TLI->getName(LibFunc_putchar);
-  Value *PutChar = M->getOrInsertFunction(PutCharName, B.getInt32Ty(), B.getInt32Ty());
+  FunctionCallee PutChar =
+      M->getOrInsertFunction(PutCharName, B.getInt32Ty(), B.getInt32Ty());
   inferLibFuncAttributes(M, PutCharName, *TLI);
   CallInst *CI = B.CreateCall(PutChar,
                               B.CreateIntCast(Char,
@@ -1035,7 +1087,8 @@ Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B,
                               "chari"),
                               PutCharName);
 
-  if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
+  if (const Function *F =
+          dyn_cast<Function>(PutChar.getCallee()->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
   return CI;
 }
@@ -1047,11 +1100,12 @@ Value *llvm::emitPutS(Value *Str, IRBuilder<> &B,
 
   Module *M = B.GetInsertBlock()->getModule();
   StringRef PutsName = TLI->getName(LibFunc_puts);
-  Value *PutS =
+  FunctionCallee PutS =
       M->getOrInsertFunction(PutsName, B.getInt32Ty(), B.getInt8PtrTy());
   inferLibFuncAttributes(M, PutsName, *TLI);
   CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), PutsName);
-  if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
+  if (const Function *F =
+          dyn_cast<Function>(PutS.getCallee()->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
   return CI;
 }
@@ -1063,15 +1117,16 @@ Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B,
 
   Module *M = B.GetInsertBlock()->getModule();
   StringRef FPutcName = TLI->getName(LibFunc_fputc);
-  Constant *F = M->getOrInsertFunction(FPutcName, B.getInt32Ty(), B.getInt32Ty(),
-                                       File->getType());
+  FunctionCallee F = M->getOrInsertFunction(FPutcName, B.getInt32Ty(),
+                                            B.getInt32Ty(), File->getType());
   if (File->getType()->isPointerTy())
     inferLibFuncAttributes(M, FPutcName, *TLI);
   Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
                          "chari");
   CallInst *CI = B.CreateCall(F, {Char, File}, FPutcName);
 
-  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+  if (const Function *Fn =
+          dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
     CI->setCallingConv(Fn->getCallingConv());
   return CI;
 }
@@ -1083,14 +1138,15 @@ Value *llvm::emitFPutCUnlocked(Value *Char, Value *File, IRBuilder<> &B,
 
   Module *M = B.GetInsertBlock()->getModule();
   StringRef FPutcUnlockedName = TLI->getName(LibFunc_fputc_unlocked);
-  Constant *F = M->getOrInsertFunction(FPutcUnlockedName, B.getInt32Ty(),
-                                       B.getInt32Ty(), File->getType());
+  FunctionCallee F = M->getOrInsertFunction(FPutcUnlockedName, B.getInt32Ty(),
+                                            B.getInt32Ty(), File->getType());
   if (File->getType()->isPointerTy())
     inferLibFuncAttributes(M, FPutcUnlockedName, *TLI);
   Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/ true, "chari");
   CallInst *CI = B.CreateCall(F, {Char, File}, FPutcUnlockedName);
 
-  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+  if (const Function *Fn =
+          dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
     CI->setCallingConv(Fn->getCallingConv());
   return CI;
 }
@@ -1102,13 +1158,14 @@ Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B,
 
   Module *M = B.GetInsertBlock()->getModule();
   StringRef FPutsName = TLI->getName(LibFunc_fputs);
-  Constant *F = M->getOrInsertFunction(
-      FPutsName, B.getInt32Ty(), B.getInt8PtrTy(), File->getType());
+  FunctionCallee F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(),
+                                            B.getInt8PtrTy(), File->getType());
   if (File->getType()->isPointerTy())
     inferLibFuncAttributes(M, FPutsName, *TLI);
   CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsName);
 
-  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+  if (const Function *Fn =
+          dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
     CI->setCallingConv(Fn->getCallingConv());
   return CI;
 }
@@ -1120,13 +1177,14 @@ Value *llvm::emitFPutSUnlocked(Value *Str, Value *File, IRBuilder<> &B,
 
   Module *M = B.GetInsertBlock()->getModule();
   StringRef FPutsUnlockedName = TLI->getName(LibFunc_fputs_unlocked);
-  Constant *F = M->getOrInsertFunction(FPutsUnlockedName, B.getInt32Ty(),
-                                       B.getInt8PtrTy(), File->getType());
+  FunctionCallee F = M->getOrInsertFunction(FPutsUnlockedName, B.getInt32Ty(),
+                                            B.getInt8PtrTy(), File->getType());
   if (File->getType()->isPointerTy())
     inferLibFuncAttributes(M, FPutsUnlockedName, *TLI);
   CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsUnlockedName);
 
-  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+  if (const Function *Fn =
+          dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
     CI->setCallingConv(Fn->getCallingConv());
   return CI;
 }
@@ -1139,7 +1197,7 @@ Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
   Module *M = B.GetInsertBlock()->getModule();
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   StringRef FWriteName = TLI->getName(LibFunc_fwrite);
-  Constant *F = M->getOrInsertFunction(
+  FunctionCallee F = M->getOrInsertFunction(
       FWriteName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
       DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
 
@@ -1149,7 +1207,8 @@ Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
       B.CreateCall(F, {castToCStr(Ptr, B), Size,
                        ConstantInt::get(DL.getIntPtrType(Context), 1), File});
 
-  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+  if (const Function *Fn =
+          dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
     CI->setCallingConv(Fn->getCallingConv());
   return CI;
 }
@@ -1162,12 +1221,13 @@ Value *llvm::emitMalloc(Value *Num, IRBuilder<> &B, const DataLayout &DL,
   Module *M = B.GetInsertBlock()->getModule();
   StringRef MallocName = TLI->getName(LibFunc_malloc);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
-  Value *Malloc = M->getOrInsertFunction(MallocName, B.getInt8PtrTy(),
-                                         DL.getIntPtrType(Context));
+  FunctionCallee Malloc = M->getOrInsertFunction(MallocName, B.getInt8PtrTy(),
+                                                 DL.getIntPtrType(Context));
   inferLibFuncAttributes(M, MallocName, *TLI);
   CallInst *CI = B.CreateCall(Malloc, Num, MallocName);
 
-  if (const Function *F = dyn_cast<Function>(Malloc->stripPointerCasts()))
+  if (const Function *F =
+          dyn_cast<Function>(Malloc.getCallee()->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
 
   return CI;
@@ -1182,12 +1242,13 @@ Value *llvm::emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs,
   StringRef CallocName = TLI.getName(LibFunc_calloc);
   const DataLayout &DL = M->getDataLayout();
   IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext()));
-  Value *Calloc = M->getOrInsertFunction(CallocName, Attrs, B.getInt8PtrTy(),
-                                         PtrType, PtrType);
+  FunctionCallee Calloc = M->getOrInsertFunction(
+      CallocName, Attrs, B.getInt8PtrTy(), PtrType, PtrType);
   inferLibFuncAttributes(M, CallocName, TLI);
   CallInst *CI = B.CreateCall(Calloc, {Num, Size}, CallocName);
 
-  if (const auto *F = dyn_cast<Function>(Calloc->stripPointerCasts()))
+  if (const auto *F =
+          dyn_cast<Function>(Calloc.getCallee()->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
 
   return CI;
@@ -1202,7 +1263,7 @@ Value *llvm::emitFWriteUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
   Module *M = B.GetInsertBlock()->getModule();
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   StringRef FWriteUnlockedName = TLI->getName(LibFunc_fwrite_unlocked);
-  Constant *F = M->getOrInsertFunction(
+  FunctionCallee F = M->getOrInsertFunction(
       FWriteUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
       DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
 
@@ -1210,7 +1271,8 @@ Value *llvm::emitFWriteUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
     inferLibFuncAttributes(M, FWriteUnlockedName, *TLI);
   CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File});
 
-  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+  if (const Function *Fn =
+          dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
     CI->setCallingConv(Fn->getCallingConv());
   return CI;
 }
@@ -1222,13 +1284,14 @@ Value *llvm::emitFGetCUnlocked(Value *File, IRBuilder<> &B,
 
   Module *M = B.GetInsertBlock()->getModule();
   StringRef FGetCUnlockedName = TLI->getName(LibFunc_fgetc_unlocked);
-  Constant *F =
-      M->getOrInsertFunction(FGetCUnlockedName, B.getInt32Ty(), File->getType());
+  FunctionCallee F = M->getOrInsertFunction(FGetCUnlockedName, B.getInt32Ty(),
+                                            File->getType());
   if (File->getType()->isPointerTy())
     inferLibFuncAttributes(M, FGetCUnlockedName, *TLI);
   CallInst *CI = B.CreateCall(F, File, FGetCUnlockedName);
 
-  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+  if (const Function *Fn =
+          dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
     CI->setCallingConv(Fn->getCallingConv());
   return CI;
 }
@@ -1240,14 +1303,15 @@ Value *llvm::emitFGetSUnlocked(Value *Str, Value *Size, Value *File,
 
   Module *M = B.GetInsertBlock()->getModule();
   StringRef FGetSUnlockedName = TLI->getName(LibFunc_fgets_unlocked);
-  Constant *F =
+  FunctionCallee F =
       M->getOrInsertFunction(FGetSUnlockedName, B.getInt8PtrTy(),
                              B.getInt8PtrTy(), B.getInt32Ty(), File->getType());
   inferLibFuncAttributes(M, FGetSUnlockedName, *TLI);
   CallInst *CI =
       B.CreateCall(F, {castToCStr(Str, B), Size, File}, FGetSUnlockedName);
 
-  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+  if (const Function *Fn =
+          dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
     CI->setCallingConv(Fn->getCallingConv());
   return CI;
 }
@@ -1261,7 +1325,7 @@ Value *llvm::emitFReadUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
   Module *M = B.GetInsertBlock()->getModule();
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   StringRef FReadUnlockedName = TLI->getName(LibFunc_fread_unlocked);
-  Constant *F = M->getOrInsertFunction(
+  FunctionCallee F = M->getOrInsertFunction(
       FReadUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
       DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
 
@@ -1269,7 +1333,8 @@ Value *llvm::emitFReadUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
     inferLibFuncAttributes(M, FReadUnlockedName, *TLI);
   CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File});
 
-  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+  if (const Function *Fn =
+          dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
     CI->setCallingConv(Fn->getCallingConv());
   return CI;
 }
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index e7828af648a9..df299f673f65 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -1,9 +1,8 @@
 //===- BypassSlowDivision.cpp - Bypass slow division ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Utils/CallPromotionUtils.cpp b/lib/Transforms/Utils/CallPromotionUtils.cpp
index e58ddcf34667..f04d76e70c0d 100644
--- a/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -1,9 +1,8 @@
 //===- CallPromotionUtils.cpp - Utilities for call promotion ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -367,8 +366,9 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee,
                                CastInst **RetBitCast) {
   assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted");
 
-  // Set the called function of the call site to be the given callee.
-  CS.setCalledFunction(Callee);
+  // Set the called function of the call site to be the given callee (but don't
+  // change the type).
+  cast<CallBase>(CS.getInstruction())->setCalledOperand(Callee);
 
   // Since the call site will no longer be direct, we must clear metadata that
   // is only appropriate for indirect calls. This includes !prof and !callees
@@ -412,6 +412,15 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee,
       // Remove any incompatible attributes for the argument.
       AttrBuilder ArgAttrs(CallerPAL.getParamAttributes(ArgNo));
       ArgAttrs.remove(AttributeFuncs::typeIncompatible(FormalTy));
+
+      // If byval is used, this must be a pointer type, and the byval type must
+      // match the element type. Update it if present.
+      if (ArgAttrs.getByValType()) {
+        Type *NewTy = Callee->getParamByValType(ArgNo);
+        ArgAttrs.addByValAttr(
+            NewTy ? NewTy : cast<PointerType>(FormalTy)->getElementType());
+      }
+
       NewArgAttrs.push_back(AttributeSet::get(Ctx, ArgAttrs));
       AttributeChanged = true;
     } else
diff --git a/lib/Transforms/Utils/CanonicalizeAliases.cpp b/lib/Transforms/Utils/CanonicalizeAliases.cpp
index cf41fd2e14c0..455fcbb1cf98 100644
--- a/lib/Transforms/Utils/CanonicalizeAliases.cpp
+++ b/lib/Transforms/Utils/CanonicalizeAliases.cpp
@@ -1,9 +1,8 @@
 //===- CanonicalizeAliases.cpp - ThinLTO Support: Canonicalize Aliases ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 8f8c601f5f13..1026c9d37038 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -1,9 +1,8 @@
 //===- CloneFunction.cpp - Clone a function into another function ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,13 +15,13 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
@@ -740,12 +739,12 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
                                    const Twine &NameSuffix, LoopInfo *LI,
                                    DominatorTree *DT,
                                    SmallVectorImpl<BasicBlock *> &Blocks) {
-  assert(OrigLoop->getSubLoops().empty() &&
-         "Loop to be cloned cannot have inner loop");
   Function *F = OrigLoop->getHeader()->getParent();
   Loop *ParentLoop = OrigLoop->getParentLoop();
+  DenseMap<Loop *, Loop *> LMap;
 
   Loop *NewLoop = LI->AllocateLoop();
+  LMap[OrigLoop] = NewLoop;
   if (ParentLoop)
     ParentLoop->addChildLoop(NewLoop);
   else
@@ -765,14 +764,36 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
   // Update DominatorTree.
   DT->addNewBlock(NewPH, LoopDomBB);
 
+  for (Loop *CurLoop : OrigLoop->getLoopsInPreorder()) {
+    Loop *&NewLoop = LMap[CurLoop];
+    if (!NewLoop) {
+      NewLoop = LI->AllocateLoop();
+
+      // Establish the parent/child relationship.
+      Loop *OrigParent = CurLoop->getParentLoop();
+      assert(OrigParent && "Could not find the original parent loop");
+      Loop *NewParentLoop = LMap[OrigParent];
+      assert(NewParentLoop && "Could not find the new parent loop");
+
+      NewParentLoop->addChildLoop(NewLoop);
+    }
+  }
+
   for (BasicBlock *BB : OrigLoop->getBlocks()) {
+    Loop *CurLoop = LI->getLoopFor(BB);
+    Loop *&NewLoop = LMap[CurLoop];
+    assert(NewLoop && "Expecting new loop to be allocated");
+
     BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F);
     VMap[BB] = NewBB;
 
     // Update LoopInfo.
     NewLoop->addBasicBlockToLoop(NewBB, *LI);
+    if (BB == CurLoop->getHeader())
+      NewLoop->moveToHeader(NewBB);
 
-    // Add DominatorTree node. After seeing all blocks, update to correct IDom.
+    // Add DominatorTree node. After seeing all blocks, update to correct
+    // IDom.
     DT->addNewBlock(NewBB, NewPH);
 
     Blocks.push_back(NewBB);
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 659993aa5478..7ddf59becba9 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -1,9 +1,8 @@
 //===- CloneModule.cpp - Clone an entire module ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 25d4ae583ecc..fa6d3f8ae873 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1,9 +1,8 @@
 //===- CodeExtractor.cpp - Pull code region into a new function -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -21,6 +20,7 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
@@ -44,6 +44,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
@@ -67,6 +68,7 @@
 #include <vector>
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 using ProfileCount = Function::ProfileCount;
 
 #define DEBUG_TYPE "code-extractor"
@@ -207,6 +209,9 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
       llvm_unreachable("Repeated basic blocks in extraction input");
   }
 
+  LLVM_DEBUG(dbgs() << "Region front block: " << Result.front()->getName()
+                    << '\n');
+
   for (auto *BB : Result) {
     if (!isBlockValidForExtraction(*BB, Result, AllowVarArgs, AllowAlloca))
       return {};
@@ -224,9 +229,11 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
     // the subgraph which is being extracted.
     for (auto *PBB : predecessors(BB))
       if (!Result.count(PBB)) {
-        LLVM_DEBUG(
-            dbgs() << "No blocks in this region may have entries from "
-                      "outside the region except for the first block!\n");
+        LLVM_DEBUG(dbgs() << "No blocks in this region may have entries from "
+                             "outside the region except for the first block!\n"
+                          << "Problematic source BB: " << BB->getName() << "\n"
+                          << "Problematic destination BB: " << PBB->getName()
+                          << "\n");
         return {};
       }
   }
@@ -236,18 +243,20 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
 
 CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
                              bool AggregateArgs, BlockFrequencyInfo *BFI,
-                             BranchProbabilityInfo *BPI, bool AllowVarArgs,
-                             bool AllowAlloca, std::string Suffix)
+                             BranchProbabilityInfo *BPI, AssumptionCache *AC,
+                             bool AllowVarArgs, bool AllowAlloca,
+                             std::string Suffix)
     : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
-      BPI(BPI), AllowVarArgs(AllowVarArgs),
+      BPI(BPI), AC(AC), AllowVarArgs(AllowVarArgs),
       Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)),
       Suffix(Suffix) {}
 
 CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
                              BlockFrequencyInfo *BFI,
-                             BranchProbabilityInfo *BPI, std::string Suffix)
+                             BranchProbabilityInfo *BPI, AssumptionCache *AC,
+                             std::string Suffix)
     : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
-      BPI(BPI), AllowVarArgs(false),
+      BPI(BPI), AC(AC), AllowVarArgs(false),
       Blocks(buildExtractionBlockSet(L.getBlocks(), &DT,
                                      /* AllowVarArgs */ false,
                                      /* AllowAlloca */ false)),
@@ -325,7 +334,7 @@ bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
         if (dyn_cast<Constant>(MemAddr))
           break;
         Value *Base = MemAddr->stripInBoundsConstantOffsets();
-        if (!dyn_cast<AllocaInst>(Base) || Base == AI)
+        if (!isa<AllocaInst>(Base) || Base == AI)
           return false;
         break;
       }
@@ -401,11 +410,74 @@ CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) {
   return CommonExitBlock;
 }
 
+// Find the pair of life time markers for address 'Addr' that are either
+// defined inside the outline region or can legally be shrinkwrapped into the
+// outline region. If there are not other untracked uses of the address, return
+// the pair of markers if found; otherwise return a pair of nullptr.
+CodeExtractor::LifetimeMarkerInfo
+CodeExtractor::getLifetimeMarkers(Instruction *Addr,
+                                  BasicBlock *ExitBlock) const {
+  LifetimeMarkerInfo Info;
+
+  for (User *U : Addr->users()) {
+    IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
+    if (IntrInst) {
+      if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) {
+        // Do not handle the case where Addr has multiple start markers.
+        if (Info.LifeStart)
+          return {};
+        Info.LifeStart = IntrInst;
+      }
+      if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) {
+        if (Info.LifeEnd)
+          return {};
+        Info.LifeEnd = IntrInst;
+      }
+      continue;
+    }
+    // Find untracked uses of the address, bail.
+    if (!definedInRegion(Blocks, U))
+      return {};
+  }
+
+  if (!Info.LifeStart || !Info.LifeEnd)
+    return {};
+
+  Info.SinkLifeStart = !definedInRegion(Blocks, Info.LifeStart);
+  Info.HoistLifeEnd = !definedInRegion(Blocks, Info.LifeEnd);
+  // Do legality check.
+  if ((Info.SinkLifeStart || Info.HoistLifeEnd) &&
+      !isLegalToShrinkwrapLifetimeMarkers(Addr))
+    return {};
+
+  // Check to see if we have a place to do hoisting, if not, bail.
+  if (Info.HoistLifeEnd && !ExitBlock)
+    return {};
+
+  return Info;
+}
+
 void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
                                 BasicBlock *&ExitBlock) const {
   Function *Func = (*Blocks.begin())->getParent();
   ExitBlock = getCommonExitBlock(Blocks);
 
+  auto moveOrIgnoreLifetimeMarkers =
+      [&](const LifetimeMarkerInfo &LMI) -> bool {
+    if (!LMI.LifeStart)
+      return false;
+    if (LMI.SinkLifeStart) {
+      LLVM_DEBUG(dbgs() << "Sinking lifetime.start: " << *LMI.LifeStart
+                        << "\n");
+      SinkCands.insert(LMI.LifeStart);
+    }
+    if (LMI.HoistLifeEnd) {
+      LLVM_DEBUG(dbgs() << "Hoisting lifetime.end: " << *LMI.LifeEnd << "\n");
+      HoistCands.insert(LMI.LifeEnd);
+    }
+    return true;
+  };
+
   for (BasicBlock &BB : *Func) {
     if (Blocks.count(&BB))
       continue;
@@ -414,95 +486,52 @@ void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
       if (!AI)
         continue;
 
-      // Find the pair of life time markers for address 'Addr' that are either
-      // defined inside the outline region or can legally be shrinkwrapped into
-      // the outline region. If there are not other untracked uses of the
-      // address, return the pair of markers if found; otherwise return a pair
-      // of nullptr.
-      auto GetLifeTimeMarkers =
-          [&](Instruction *Addr, bool &SinkLifeStart,
-              bool &HoistLifeEnd) -> std::pair<Instruction *, Instruction *> {
-        Instruction *LifeStart = nullptr, *LifeEnd = nullptr;
-
-        for (User *U : Addr->users()) {
-          IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
-          if (IntrInst) {
-            if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) {
-              // Do not handle the case where AI has multiple start markers.
-              if (LifeStart)
-                return std::make_pair<Instruction *>(nullptr, nullptr);
-              LifeStart = IntrInst;
-            }
-            if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) {
-              if (LifeEnd)
-                return std::make_pair<Instruction *>(nullptr, nullptr);
-              LifeEnd = IntrInst;
-            }
-            continue;
-          }
-          // Find untracked uses of the address, bail.
-          if (!definedInRegion(Blocks, U))
-            return std::make_pair<Instruction *>(nullptr, nullptr);
-        }
-
-        if (!LifeStart || !LifeEnd)
-          return std::make_pair<Instruction *>(nullptr, nullptr);
-
-        SinkLifeStart = !definedInRegion(Blocks, LifeStart);
-        HoistLifeEnd = !definedInRegion(Blocks, LifeEnd);
-        // Do legality Check.
-        if ((SinkLifeStart || HoistLifeEnd) &&
-            !isLegalToShrinkwrapLifetimeMarkers(Addr))
-          return std::make_pair<Instruction *>(nullptr, nullptr);
-
-        // Check to see if we have a place to do hoisting, if not, bail.
-        if (HoistLifeEnd && !ExitBlock)
-          return std::make_pair<Instruction *>(nullptr, nullptr);
-
-        return std::make_pair(LifeStart, LifeEnd);
-      };
-
-      bool SinkLifeStart = false, HoistLifeEnd = false;
-      auto Markers = GetLifeTimeMarkers(AI, SinkLifeStart, HoistLifeEnd);
-
-      if (Markers.first) {
-        if (SinkLifeStart)
-          SinkCands.insert(Markers.first);
+      LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(AI, ExitBlock);
+      bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo);
+      if (Moved) {
+        LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n");
         SinkCands.insert(AI);
-        if (HoistLifeEnd)
-          HoistCands.insert(Markers.second);
         continue;
       }
 
-      // Follow the bitcast.
-      Instruction *MarkerAddr = nullptr;
+      // Follow any bitcasts.
+      SmallVector<Instruction *, 2> Bitcasts;
+      SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo;
       for (User *U : AI->users()) {
         if (U->stripInBoundsConstantOffsets() == AI) {
-          SinkLifeStart = false;
-          HoistLifeEnd = false;
           Instruction *Bitcast = cast<Instruction>(U);
-          Markers = GetLifeTimeMarkers(Bitcast, SinkLifeStart, HoistLifeEnd);
-          if (Markers.first) {
-            MarkerAddr = Bitcast;
+          LifetimeMarkerInfo LMI = getLifetimeMarkers(Bitcast, ExitBlock);
+          if (LMI.LifeStart) {
+            Bitcasts.push_back(Bitcast);
+            BitcastLifetimeInfo.push_back(LMI);
             continue;
           }
         }
 
         // Found unknown use of AI.
         if (!definedInRegion(Blocks, U)) {
-          MarkerAddr = nullptr;
+          Bitcasts.clear();
           break;
         }
       }
 
-      if (MarkerAddr) {
-        if (SinkLifeStart)
-          SinkCands.insert(Markers.first);
-        if (!definedInRegion(Blocks, MarkerAddr))
-          SinkCands.insert(MarkerAddr);
-        SinkCands.insert(AI);
-        if (HoistLifeEnd)
-          HoistCands.insert(Markers.second);
+      // Either no bitcasts reference the alloca or there are unknown uses.
+      if (Bitcasts.empty())
+        continue;
+
+      LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n");
+      SinkCands.insert(AI);
+      for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) {
+        Instruction *BitcastAddr = Bitcasts[I];
+        const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I];
+        assert(LMI.LifeStart &&
+               "Unsafe to sink bitcast without lifetime markers");
+        moveOrIgnoreLifetimeMarkers(LMI);
+        if (!definedInRegion(Blocks, BitcastAddr)) {
+          LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr
+                            << "\n");
+          SinkCands.insert(BitcastAddr);
+        }
       }
     }
   }
@@ -780,6 +809,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
       case Attribute::NoBuiltin:
       case Attribute::NoCapture:
       case Attribute::NoReturn:
+      case Attribute::NoSync:
       case Attribute::None:
       case Attribute::NonNull:
       case Attribute::ReadNone:
@@ -792,8 +822,10 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
       case Attribute::StructRet:
       case Attribute::SwiftError:
       case Attribute::SwiftSelf:
+      case Attribute::WillReturn:
       case Attribute::WriteOnly:
       case Attribute::ZExt:
+      case Attribute::ImmArg:
       case Attribute::EndAttrKinds:
         continue;
       // Those attributes should be safe to propagate to the extracted function.
@@ -803,6 +835,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
       case Attribute::InlineHint:
       case Attribute::MinSize:
       case Attribute::NoDuplicate:
+      case Attribute::NoFree:
       case Attribute::NoImplicitFloat:
       case Attribute::NoInline:
       case Attribute::NonLazyBind:
@@ -817,6 +850,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
       case Attribute::SanitizeMemory:
       case Attribute::SanitizeThread:
       case Attribute::SanitizeHWAddress:
+      case Attribute::SanitizeMemTag:
       case Attribute::SpeculativeLoadHardening:
       case Attribute::StackProtect:
       case Attribute::StackProtectReq:
@@ -845,7 +879,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
       Instruction *TI = newFunction->begin()->getTerminator();
       GetElementPtrInst *GEP = GetElementPtrInst::Create(
           StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI);
-      RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
+      RewriteVal = new LoadInst(StructTy->getElementType(i), GEP,
+                                "loadgep_" + inputs[i]->getName(), TI);
     } else
       RewriteVal = &*AI++;
 
@@ -880,6 +915,88 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
   return newFunction;
 }
 
+/// Erase lifetime.start markers which reference inputs to the extraction
+/// region, and insert the referenced memory into \p LifetimesStart.
+///
+/// The extraction region is defined by a set of blocks (\p Blocks), and a set
+/// of allocas which will be moved from the caller function into the extracted
+/// function (\p SunkAllocas).
+static void eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
+                                         const SetVector<Value *> &SunkAllocas,
+                                         SetVector<Value *> &LifetimesStart) {
+  for (BasicBlock *BB : Blocks) {
+    for (auto It = BB->begin(), End = BB->end(); It != End;) {
+      auto *II = dyn_cast<IntrinsicInst>(&*It);
+      ++It;
+      if (!II || !II->isLifetimeStartOrEnd())
+        continue;
+
+      // Get the memory operand of the lifetime marker. If the underlying
+      // object is a sunk alloca, or is otherwise defined in the extraction
+      // region, the lifetime marker must not be erased.
+      Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
+      if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
+        continue;
+
+      if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+        LifetimesStart.insert(Mem);
+      II->eraseFromParent();
+    }
+  }
+}
+
+/// Insert lifetime start/end markers surrounding the call to the new function
+/// for objects defined in the caller.
+static void insertLifetimeMarkersSurroundingCall(
+    Module *M, ArrayRef<Value *> LifetimesStart, ArrayRef<Value *> LifetimesEnd,
+    CallInst *TheCall) {
+  LLVMContext &Ctx = M->getContext();
+  auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
+  auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
+  Instruction *Term = TheCall->getParent()->getTerminator();
+
+  // The memory argument to a lifetime marker must be a i8*. Cache any bitcasts
+  // needed to satisfy this requirement so they may be reused.
+  DenseMap<Value *, Value *> Bitcasts;
+
+  // Emit lifetime markers for the pointers given in \p Objects. Insert the
+  // markers before the call if \p InsertBefore, and after the call otherwise.
+  auto insertMarkers = [&](Function *MarkerFunc, ArrayRef<Value *> Objects,
+                           bool InsertBefore) {
+    for (Value *Mem : Objects) {
+      assert((!isa<Instruction>(Mem) || cast<Instruction>(Mem)->getFunction() ==
+                                            TheCall->getFunction()) &&
+             "Input memory not defined in original function");
+      Value *&MemAsI8Ptr = Bitcasts[Mem];
+      if (!MemAsI8Ptr) {
+        if (Mem->getType() == Int8PtrTy)
+          MemAsI8Ptr = Mem;
+        else
+          MemAsI8Ptr =
+              CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
+      }
+
+      auto Marker = CallInst::Create(MarkerFunc, {NegativeOne, MemAsI8Ptr});
+      if (InsertBefore)
+        Marker->insertBefore(TheCall);
+      else
+        Marker->insertBefore(Term);
+    }
+  };
+
+  if (!LifetimesStart.empty()) {
+    auto StartFn = llvm::Intrinsic::getDeclaration(
+        M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
+    insertMarkers(StartFn, LifetimesStart, /*InsertBefore=*/true);
+  }
+
+  if (!LifetimesEnd.empty()) {
+    auto EndFn = llvm::Intrinsic::getDeclaration(
+        M, llvm::Intrinsic::lifetime_end, Int8PtrTy);
+    insertMarkers(EndFn, LifetimesEnd, /*InsertBefore=*/false);
+  }
+}
+
 /// emitCallAndSwitchStatement - This method sets up the caller side by adding
 /// the call instruction, splitting any PHI nodes in the header block as
 /// necessary.
@@ -897,11 +1014,18 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
   CallInst *call = nullptr;
 
   // Add inputs as params, or to be filled into the struct
-  for (Value *input : inputs)
+  unsigned ArgNo = 0;
+  SmallVector<unsigned, 1> SwiftErrorArgs;
+  for (Value *input : inputs) {
     if (AggregateArgs)
       StructValues.push_back(input);
-    else
+    else {
       params.push_back(input);
+      if (input->isSwiftError())
+        SwiftErrorArgs.push_back(ArgNo);
+    }
+    ++ArgNo;
+  }
 
   // Create allocas for the outputs
   for (Value *output : outputs) {
@@ -957,13 +1081,18 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
   }
   codeReplacer->getInstList().push_back(call);
 
+  // Set swifterror parameter attributes.
+  for (unsigned SwiftErrArgNo : SwiftErrorArgs) {
+    call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError);
+    newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError);
+  }
+
   Function::arg_iterator OutputArgBegin = newFunction->arg_begin();
   unsigned FirstOut = inputs.size();
   if (!AggregateArgs)
     std::advance(OutputArgBegin, inputs.size());
 
   // Reload the outputs passed in by reference.
-  Function::arg_iterator OAI = OutputArgBegin;
   for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
     Value *Output = nullptr;
     if (AggregateArgs) {
@@ -977,7 +1106,8 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
     } else {
       Output = ReloadOutputs[i];
     }
-    LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
+    LoadInst *load = new LoadInst(outputs[i]->getType(), Output,
+                                  outputs[i]->getName() + ".reload");
     Reloads.push_back(load);
     codeReplacer->getInstList().push_back(load);
     std::vector<User *> Users(outputs[i]->user_begin(), outputs[i]->user_end());
@@ -986,40 +1116,6 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
       if (!Blocks.count(inst->getParent()))
         inst->replaceUsesOfWith(outputs[i], load);
     }
-
-    // Store to argument right after the definition of output value.
-    auto *OutI = dyn_cast<Instruction>(outputs[i]);
-    if (!OutI)
-      continue;
-
-    // Find proper insertion point.
-    BasicBlock::iterator InsertPt;
-    // In case OutI is an invoke, we insert the store at the beginning in the
-    // 'normal destination' BB. Otherwise we insert the store right after OutI.
-    if (auto *InvokeI = dyn_cast<InvokeInst>(OutI))
-      InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt();
-    else if (auto *Phi = dyn_cast<PHINode>(OutI))
-      InsertPt = Phi->getParent()->getFirstInsertionPt();
-    else
-      InsertPt = std::next(OutI->getIterator());
-
-    assert(OAI != newFunction->arg_end() &&
-           "Number of output arguments should match "
-           "the amount of defined values");
-    if (AggregateArgs) {
-      Value *Idx[2];
-      Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
-      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
-      GetElementPtrInst *GEP = GetElementPtrInst::Create(
-          StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), &*InsertPt);
-      new StoreInst(outputs[i], GEP, &*InsertPt);
-      // Since there should be only one struct argument aggregating
-      // all the output values, we shouldn't increment OAI, which always
-      // points to the struct argument, in this case.
-    } else {
-      new StoreInst(outputs[i], &*OAI, &*InsertPt);
-      ++OAI;
-    }
   }
 
   // Now we can emit a switch statement using the call as a value.
@@ -1075,6 +1171,50 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
       }
   }
 
+  // Store the arguments right after the definition of output value.
+  // This should be proceeded after creating exit stubs to be ensure that invoke
+  // result restore will be placed in the outlined function.
+  Function::arg_iterator OAI = OutputArgBegin;
+  for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
+    auto *OutI = dyn_cast<Instruction>(outputs[i]);
+    if (!OutI)
+      continue;
+
+    // Find proper insertion point.
+    BasicBlock::iterator InsertPt;
+    // In case OutI is an invoke, we insert the store at the beginning in the
+    // 'normal destination' BB. Otherwise we insert the store right after OutI.
+    if (auto *InvokeI = dyn_cast<InvokeInst>(OutI))
+      InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt();
+    else if (auto *Phi = dyn_cast<PHINode>(OutI))
+      InsertPt = Phi->getParent()->getFirstInsertionPt();
+    else
+      InsertPt = std::next(OutI->getIterator());
+
+    Instruction *InsertBefore = &*InsertPt;
+    assert((InsertBefore->getFunction() == newFunction ||
+            Blocks.count(InsertBefore->getParent())) &&
+           "InsertPt should be in new function");
+    assert(OAI != newFunction->arg_end() &&
+           "Number of output arguments should match "
+           "the amount of defined values");
+    if (AggregateArgs) {
+      Value *Idx[2];
+      Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
+      GetElementPtrInst *GEP = GetElementPtrInst::Create(
+          StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(),
+          InsertBefore);
+      new StoreInst(outputs[i], GEP, InsertBefore);
+      // Since there should be only one struct argument aggregating
+      // all the output values, we shouldn't increment OAI, which always
+      // points to the struct argument, in this case.
+    } else {
+      new StoreInst(outputs[i], &*OAI, InsertBefore);
+      ++OAI;
+    }
+  }
+
   // Now that we've done the deed, simplify the switch instruction.
   Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType();
   switch (NumExitBlocks) {
@@ -1119,6 +1259,10 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
     break;
   }
 
+  // Insert lifetime markers around the reloads of any output values. The
+  // allocas output values are stored in are only in-use in the codeRepl block.
+  insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call);
+
   return call;
 }
 
@@ -1133,6 +1277,13 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) {
 
     // Insert this basic block into the new function
     newBlocks.push_back(Block);
+
+    // Remove @llvm.assume calls that were moved to the new function from the
+    // old function's assumption cache.
+    if (AC)
+      for (auto &I : *Block)
+        if (match(&I, m_Intrinsic<Intrinsic::assume>()))
+          AC->unregisterAssumption(cast<CallInst>(&I));
   }
 }
 
@@ -1181,71 +1332,6 @@ void CodeExtractor::calculateNewCallTerminatorWeights(
       MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
 }
 
-/// Scan the extraction region for lifetime markers which reference inputs.
-/// Erase these markers. Return the inputs which were referenced.
-///
-/// The extraction region is defined by a set of blocks (\p Blocks), and a set
-/// of allocas which will be moved from the caller function into the extracted
-/// function (\p SunkAllocas).
-static SetVector<Value *>
-eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
-                             const SetVector<Value *> &SunkAllocas) {
-  SetVector<Value *> InputObjectsWithLifetime;
-  for (BasicBlock *BB : Blocks) {
-    for (auto It = BB->begin(), End = BB->end(); It != End;) {
-      auto *II = dyn_cast<IntrinsicInst>(&*It);
-      ++It;
-      if (!II || !II->isLifetimeStartOrEnd())
-        continue;
-
-      // Get the memory operand of the lifetime marker. If the underlying
-      // object is a sunk alloca, or is otherwise defined in the extraction
-      // region, the lifetime marker must not be erased.
-      Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
-      if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
-        continue;
-
-      InputObjectsWithLifetime.insert(Mem);
-      II->eraseFromParent();
-    }
-  }
-  return InputObjectsWithLifetime;
-}
-
-/// Insert lifetime start/end markers surrounding the call to the new function
-/// for objects defined in the caller.
-static void insertLifetimeMarkersSurroundingCall(
-    Module *M, const SetVector<Value *> &InputObjectsWithLifetime,
-    CallInst *TheCall) {
-  if (InputObjectsWithLifetime.empty())
-    return;
-
-  LLVMContext &Ctx = M->getContext();
-  auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
-  auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
-  auto LifetimeStartFn = llvm::Intrinsic::getDeclaration(
-      M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
-  auto LifetimeEndFn = llvm::Intrinsic::getDeclaration(
-      M, llvm::Intrinsic::lifetime_end, Int8PtrTy);
-  for (Value *Mem : InputObjectsWithLifetime) {
-    assert((!isa<Instruction>(Mem) ||
-            cast<Instruction>(Mem)->getFunction() == TheCall->getFunction()) &&
-           "Input memory not defined in original function");
-    Value *MemAsI8Ptr = nullptr;
-    if (Mem->getType() == Int8PtrTy)
-      MemAsI8Ptr = Mem;
-    else
-      MemAsI8Ptr =
-          CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
-
-    auto StartMarker =
-        CallInst::Create(LifetimeStartFn, {NegativeOne, MemAsI8Ptr});
-    StartMarker->insertBefore(TheCall);
-    auto EndMarker = CallInst::Create(LifetimeEndFn, {NegativeOne, MemAsI8Ptr});
-    EndMarker->insertAfter(TheCall);
-  }
-}
-
 Function *CodeExtractor::extractCodeRegion() {
   if (!isEligible())
     return nullptr;
@@ -1348,10 +1434,24 @@ Function *CodeExtractor::extractCodeRegion() {
   // Find inputs to, outputs from the code region.
   findInputsOutputs(inputs, outputs, SinkingCands);
 
-  // Now sink all instructions which only have non-phi uses inside the region
-  for (auto *II : SinkingCands)
-    cast<Instruction>(II)->moveBefore(*newFuncRoot,
-                                      newFuncRoot->getFirstInsertionPt());
+  // Now sink all instructions which only have non-phi uses inside the region.
+  // Group the allocas at the start of the block, so that any bitcast uses of
+  // the allocas are well-defined.
+  AllocaInst *FirstSunkAlloca = nullptr;
+  for (auto *II : SinkingCands) {
+    if (auto *AI = dyn_cast<AllocaInst>(II)) {
+      AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt());
+      if (!FirstSunkAlloca)
+        FirstSunkAlloca = AI;
+    }
+  }
+  assert((SinkingCands.empty() || FirstSunkAlloca) &&
+         "Did not expect a sink candidate without any allocas");
+  for (auto *II : SinkingCands) {
+    if (!isa<AllocaInst>(II)) {
+      cast<Instruction>(II)->moveAfter(FirstSunkAlloca);
+    }
+  }
 
   if (!HoistingCands.empty()) {
     auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit);
@@ -1361,11 +1461,11 @@ Function *CodeExtractor::extractCodeRegion() {
   }
 
   // Collect objects which are inputs to the extraction region and also
-  // referenced by lifetime start/end markers within it. The effects of these
+  // referenced by lifetime start markers within it. The effects of these
   // markers must be replicated in the calling function to prevent the stack
   // coloring pass from merging slots which store input objects.
-  ValueSet InputObjectsWithLifetime =
-      eraseLifetimeMarkersOnInputs(Blocks, SinkingCands);
+  ValueSet LifetimesStart;
+  eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart);
 
   // Construct new function based on inputs/outputs & add allocas for all defs.
   Function *newFunction =
@@ -1388,8 +1488,8 @@ Function *CodeExtractor::extractCodeRegion() {
 
   // Replicate the effects of any lifetime start/end markers which referenced
   // input objects in the extraction region by placing markers around the call.
-  insertLifetimeMarkersSurroundingCall(oldFunction->getParent(),
-                                       InputObjectsWithLifetime, TheCall);
+  insertLifetimeMarkersSurroundingCall(
+      oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, TheCall);
 
   // Propagate personality info to the new function if there is one.
   if (oldFunction->hasPersonalityFn())
diff --git a/lib/Transforms/Utils/CtorUtils.cpp b/lib/Transforms/Utils/CtorUtils.cpp
index 4e7da7d0449f..069a86f6ab33 100644
--- a/lib/Transforms/Utils/CtorUtils.cpp
+++ b/lib/Transforms/Utils/CtorUtils.cpp
@@ -1,9 +1,8 @@
 //===- CtorUtils.cpp - Helpers for working with global_ctors ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index 975b363859a9..5f53d794fe8a 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -1,9 +1,8 @@
 //===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -73,7 +72,8 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
           Value *&V = Loads[PN->getIncomingBlock(i)];
           if (!V) {
             // Insert the load into the predecessor block
-            V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads,
+            V = new LoadInst(I.getType(), Slot, I.getName() + ".reload",
+                             VolatileLoads,
                              PN->getIncomingBlock(i)->getTerminator());
           }
           PN->setIncomingValue(i, V);
@@ -81,7 +81,8 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
 
     } else {
       // If this is a normal instruction, just insert a load.
-      Value *V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, U);
+      Value *V = new LoadInst(I.getType(), Slot, I.getName() + ".reload",
+                              VolatileLoads, U);
       U->replaceUsesOfWith(&I, V);
     }
   }
@@ -142,7 +143,8 @@ AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
   for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
     /* empty */;   // Don't insert before PHI nodes or landingpad instrs.
 
-  Value *V = new LoadInst(Slot, P->getName() + ".reload", &*InsertPt);
+  Value *V =
+      new LoadInst(P->getType(), Slot, P->getName() + ".reload", &*InsertPt);
   P->replaceAllUsesWith(V);
 
   // Delete PHI.
diff --git a/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index 569ea58a3047..4aa40eeadda4 100644
--- a/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -1,9 +1,8 @@
 //===- EntryExitInstrumenter.cpp - Function Entry/Exit Instrumentation ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -31,7 +30,7 @@ static void insertCall(Function &CurFn, StringRef Func,
       Func == "__mcount" ||
       Func == "_mcount" ||
       Func == "__cyg_profile_func_enter_bare") {
-    Constant *Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C));
+    FunctionCallee Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C));
     CallInst *Call = CallInst::Create(Fn, "", InsertionPt);
     Call->setDebugLoc(DL);
     return;
@@ -40,7 +39,7 @@ static void insertCall(Function &CurFn, StringRef Func,
   if (Func == "__cyg_profile_func_enter" || Func == "__cyg_profile_func_exit") {
     Type *ArgTypes[] = {Type::getInt8PtrTy(C), Type::getInt8PtrTy(C)};
 
-    Constant *Fn = M.getOrInsertFunction(
+    FunctionCallee Fn = M.getOrInsertFunction(
         Func, FunctionType::get(Type::getVoidTy(C), ArgTypes, false));
 
     Instruction *RetAddr = CallInst::Create(
diff --git a/lib/Transforms/Utils/EscapeEnumerator.cpp b/lib/Transforms/Utils/EscapeEnumerator.cpp
index 762a374c135c..914babeb6829 100644
--- a/lib/Transforms/Utils/EscapeEnumerator.cpp
+++ b/lib/Transforms/Utils/EscapeEnumerator.cpp
@@ -1,9 +1,8 @@
 //===- EscapeEnumerator.cpp -----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,7 +18,7 @@
 #include "llvm/IR/Module.h"
 using namespace llvm;
 
-static Constant *getDefaultPersonalityFn(Module *M) {
+static FunctionCallee getDefaultPersonalityFn(Module *M) {
   LLVMContext &C = M->getContext();
   Triple T(M->getTargetTriple());
   EHPersonality Pers = getDefaultEHPersonality(T);
@@ -69,8 +68,8 @@ IRBuilder<> *EscapeEnumerator::Next() {
   BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
   Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C));
   if (!F.hasPersonalityFn()) {
-    Constant *PersFn = getDefaultPersonalityFn(F.getParent());
-    F.setPersonalityFn(PersFn);
+    FunctionCallee PersFn = getDefaultPersonalityFn(F.getParent());
+    F.setPersonalityFn(cast<Constant>(PersFn.getCallee()));
   }
 
   if (isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) {
diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp
index e875cd686b00..0e203f4e075d 100644
--- a/lib/Transforms/Utils/Evaluator.cpp
+++ b/lib/Transforms/Utils/Evaluator.cpp
@@ -1,9 +1,8 @@
 //===- Evaluator.cpp - LLVM IR evaluator ----------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -175,6 +174,34 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
   return false;
 }
 
+/// Apply 'Func' to Ptr. If this returns nullptr, introspect the pointer's
+/// type and walk down through the initial elements to obtain additional
+/// pointers to try. Returns the first non-null return value from Func, or
+/// nullptr if the type can't be introspected further.
+static Constant *
+evaluateBitcastFromPtr(Constant *Ptr, const DataLayout &DL,
+                       const TargetLibraryInfo *TLI,
+                       std::function<Constant *(Constant *)> Func) {
+  Constant *Val;
+  while (!(Val = Func(Ptr))) {
+    // If Ty is a struct, we can convert the pointer to the struct
+    // into a pointer to its first member.
+    // FIXME: This could be extended to support arrays as well.
+    Type *Ty = cast<PointerType>(Ptr->getType())->getElementType();
+    if (!isa<StructType>(Ty))
+      break;
+
+    IntegerType *IdxTy = IntegerType::get(Ty->getContext(), 32);
+    Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
+    Constant *const IdxList[] = {IdxZero, IdxZero};
+
+    Ptr = ConstantExpr::getGetElementPtr(Ty, Ptr, IdxList);
+    if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI))
+      Ptr = FoldedPtr;
+  }
+  return Val;
+}
+
 static Constant *getInitializer(Constant *C) {
   auto *GV = dyn_cast<GlobalVariable>(C);
   return GV && GV->hasDefinitiveInitializer() ? GV->getInitializer() : nullptr;
@@ -185,8 +212,14 @@ static Constant *getInitializer(Constant *C) {
 Constant *Evaluator::ComputeLoadResult(Constant *P) {
   // If this memory location has been recently stored, use the stored value: it
   // is the most up-to-date.
-  DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P);
-  if (I != MutatedMemory.end()) return I->second;
+  auto findMemLoc = [this](Constant *Ptr) {
+    DenseMap<Constant *, Constant *>::const_iterator I =
+        MutatedMemory.find(Ptr);
+    return I != MutatedMemory.end() ? I->second : nullptr;
+  };
+
+  if (Constant *Val = findMemLoc(P))
+    return Val;
 
   // Access it.
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
@@ -204,13 +237,17 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) {
       break;
     // Handle a constantexpr bitcast.
     case Instruction::BitCast:
-      Constant *Val = getVal(CE->getOperand(0));
-      auto MM = MutatedMemory.find(Val);
-      auto *I = (MM != MutatedMemory.end()) ? MM->second
-                                            : getInitializer(CE->getOperand(0));
-      if (I)
+      // We're evaluating a load through a pointer that was bitcast to a
+      // different type. See if the "from" pointer has recently been stored.
+      // If it hasn't, we may still be able to find a stored pointer by
+      // introspecting the type.
+      Constant *Val =
+          evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, findMemLoc);
+      if (!Val)
+        Val = getInitializer(CE->getOperand(0));
+      if (Val)
         return ConstantFoldLoadThroughBitcast(
-            I, P->getType()->getPointerElementType(), DL);
+            Val, P->getType()->getPointerElementType(), DL);
       break;
     }
   }
@@ -330,37 +367,26 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
                      << "Attempting to resolve bitcast on constant ptr.\n");
           // If we're evaluating a store through a bitcast, then we need
           // to pull the bitcast off the pointer type and push it onto the
-          // stored value.
-          Ptr = CE->getOperand(0);
-
-          Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();
-
-          // In order to push the bitcast onto the stored value, a bitcast
-          // from NewTy to Val's type must be legal.  If it's not, we can try
-          // introspecting NewTy to find a legal conversion.
-          Constant *NewVal;
-          while (!(NewVal = ConstantFoldLoadThroughBitcast(Val, NewTy, DL))) {
-            // If NewTy is a struct, we can convert the pointer to the struct
-            // into a pointer to its first member.
-            // FIXME: This could be extended to support arrays as well.
-            if (StructType *STy = dyn_cast<StructType>(NewTy)) {
-              NewTy = STy->getTypeAtIndex(0U);
-
-              IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32);
-              Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
-              Constant * const IdxList[] = {IdxZero, IdxZero};
-
-              Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList);
-              if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI))
-                Ptr = FoldedPtr;
-
-            // If we can't improve the situation by introspecting NewTy,
-            // we have to give up.
-            } else {
-              LLVM_DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
-                                   "evaluate.\n");
-              return false;
+          // stored value. In order to push the bitcast onto the stored value,
+          // a bitcast from the pointer's element type to Val's type must be
+          // legal. If it's not, we can try introspecting the type to find a
+          // legal conversion.
+
+          auto castValTy = [&](Constant *P) -> Constant * {
+            Type *Ty = cast<PointerType>(P->getType())->getElementType();
+            if (Constant *FV = ConstantFoldLoadThroughBitcast(Val, Ty, DL)) {
+              Ptr = P;
+              return FV;
             }
+            return nullptr;
+          };
+
+          Constant *NewVal =
+              evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, castValTy);
+          if (!NewVal) {
+            LLVM_DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
+                                 "evaluate.\n");
+            return false;
           }
 
           Val = NewVal;
@@ -541,7 +567,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
 
       if (Callee->isDeclaration()) {
         // If this is a function we can constant fold, do it.
-        if (Constant *C = ConstantFoldCall(CS, Callee, Formals, TLI)) {
+        if (Constant *C = ConstantFoldCall(cast<CallBase>(CS.getInstruction()),
+                                           Callee, Formals, TLI)) {
           InstResult = castCallResultIfNeeded(CS.getCalledValue(), C);
           if (!InstResult)
             return false;
diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
index d9778f4a1fb7..0c52e6f3703b 100644
--- a/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -1,9 +1,8 @@
 //===- FlatternCFG.cpp - Code to perform CFG flattening -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp
index a717d9b72819..a9b28754c8e9 100644
--- a/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/lib/Transforms/Utils/FunctionComparator.cpp
@@ -1,9 +1,8 @@
 //===- FunctionComparator.h - Function Comparator -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -114,6 +113,19 @@ int FunctionComparator::cmpAttrs(const AttributeList L,
     for (; LI != LE && RI != RE; ++LI, ++RI) {
       Attribute LA = *LI;
       Attribute RA = *RI;
+      if (LA.isTypeAttribute() && RA.isTypeAttribute()) {
+        if (LA.getKindAsEnum() != RA.getKindAsEnum())
+          return cmpNumbers(LA.getKindAsEnum(), RA.getKindAsEnum());
+
+        Type *TyL = LA.getValueAsType();
+        Type *TyR = RA.getValueAsType();
+        if (TyL && TyR)
+          return cmpTypes(TyL, TyR);
+
+        // Two pointers, at least one null, so the comparison result is
+        // independent of the value of a real pointer.
+        return cmpNumbers((uint64_t)TyL, (uint64_t)TyR);
+      }
       if (LA < RA)
         return -1;
       if (RA < LA)
@@ -557,31 +569,20 @@ int FunctionComparator::cmpOperations(const Instruction *L,
   }
   if (const CmpInst *CI = dyn_cast<CmpInst>(L))
     return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate());
-  if (const CallInst *CI = dyn_cast<CallInst>(L)) {
-    if (int Res = cmpNumbers(CI->getCallingConv(),
-                             cast<CallInst>(R)->getCallingConv()))
+  if (auto CSL = CallSite(const_cast<Instruction *>(L))) {
+    auto CSR = CallSite(const_cast<Instruction *>(R));
+    if (int Res = cmpNumbers(CSL.getCallingConv(), CSR.getCallingConv()))
       return Res;
-    if (int Res =
-            cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()))
+    if (int Res = cmpAttrs(CSL.getAttributes(), CSR.getAttributes()))
       return Res;
-    if (int Res = cmpOperandBundlesSchema(CI, R))
-      return Res;
-    return cmpRangeMetadata(
-        CI->getMetadata(LLVMContext::MD_range),
-        cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
-  }
-  if (const InvokeInst *II = dyn_cast<InvokeInst>(L)) {
-    if (int Res = cmpNumbers(II->getCallingConv(),
-                             cast<InvokeInst>(R)->getCallingConv()))
+    if (int Res = cmpOperandBundlesSchema(L, R))
       return Res;
-    if (int Res =
-            cmpAttrs(II->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
-      return Res;
-    if (int Res = cmpOperandBundlesSchema(II, R))
-      return Res;
-    return cmpRangeMetadata(
-        II->getMetadata(LLVMContext::MD_range),
-        cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
+    if (const CallInst *CI = dyn_cast<CallInst>(L))
+      if (int Res = cmpNumbers(CI->getTailCallKind(),
+                               cast<CallInst>(R)->getTailCallKind()))
+        return Res;
+    return cmpRangeMetadata(L->getMetadata(LLVMContext::MD_range),
+                            R->getMetadata(LLVMContext::MD_range));
   }
   if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
     ArrayRef<unsigned> LIndices = IVI->getIndices();
diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp
index a9772e31da50..c9cc0990f237 100644
--- a/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -1,9 +1,8 @@
 //===- lib/Transforms/Utils/FunctionImportUtils.cpp - Importing utilities -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -130,7 +129,7 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
     // definitions upon import, so that they are available for inlining
     // and/or optimization, but are turned into declarations later
     // during the EliminateAvailableExternally pass.
-    if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+    if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV))
       return GlobalValue::AvailableExternallyLinkage;
     // An imported external declaration stays external.
     return SGV->getLinkage();
@@ -159,7 +158,7 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
     // equivalent, so the issue described above for weak_any does not exist,
     // and the definition can be imported. It can be treated similarly
     // to an imported externally visible global value.
-    if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+    if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV))
       return GlobalValue::AvailableExternallyLinkage;
     else
       return GlobalValue::ExternalLinkage;
@@ -177,7 +176,7 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
     // If we are promoting the local to global scope, it is handled
     // similarly to a normal externally visible global.
     if (DoPromote) {
-      if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+      if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV))
         return GlobalValue::AvailableExternallyLinkage;
       else
         return GlobalValue::ExternalLinkage;
@@ -230,11 +229,11 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
     }
   }
 
-  // Mark read-only variables which can be imported with specific attribute.
-  // We can't internalize them now because IRMover will fail to link variable
-  // definitions to their external declarations during ThinLTO import. We'll
-  // internalize read-only variables later, after import is finished.
-  // See internalizeImmutableGVs.
+  // Mark read/write-only variables which can be imported with specific
+  // attribute. We can't internalize them now because IRMover will fail
+  // to link variable definitions to their external declarations during
+  // ThinLTO import. We'll internalize read-only variables later, after
+  // import is finished. See internalizeGVsAfterImport.
   //
   // If global value dead stripping is not enabled in summary then
   // propagateConstants hasn't been run. We can't internalize GV
@@ -242,13 +241,16 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
   if (!GV.isDeclaration() && VI && ImportIndex.withGlobalValueDeadStripping()) {
     const auto &SL = VI.getSummaryList();
     auto *GVS = SL.empty() ? nullptr : dyn_cast<GlobalVarSummary>(SL[0].get());
-    if (GVS && GVS->isReadOnly())
+    // At this stage "maybe" is "definitely"
+    if (GVS && (GVS->maybeReadOnly() || GVS->maybeWriteOnly()))
       cast<GlobalVariable>(&GV)->addAttribute("thinlto-internalize");
   }
 
   bool DoPromote = false;
   if (GV.hasLocalLinkage() &&
       ((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) {
+    // Save the original name string before we rename GV below.
+    auto Name = GV.getName().str();
     // Once we change the name or linkage it is difficult to determine
     // again whether we should promote since shouldPromoteLocalToGlobal needs
     // to locate the summary (based on GUID from name and linkage). Therefore,
@@ -257,6 +259,12 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
     GV.setLinkage(getLinkage(&GV, DoPromote));
     if (!GV.hasLocalLinkage())
       GV.setVisibility(GlobalValue::HiddenVisibility);
+
+    // If we are renaming a COMDAT leader, ensure that we record the COMDAT
+    // for later renaming as well. This is required for COFF.
+    if (const auto *C = GV.getComdat())
+      if (C->getName() == Name)
+        RenamedComdats.try_emplace(C, M.getOrInsertComdat(GV.getName()));
   } else
     GV.setLinkage(getLinkage(&GV, /* DoPromote */ false));
 
@@ -281,6 +289,16 @@ void FunctionImportGlobalProcessing::processGlobalsForThinLTO() {
     processGlobalForThinLTO(SF);
   for (GlobalAlias &GA : M.aliases())
     processGlobalForThinLTO(GA);
+
+  // Replace any COMDATS that required renaming (because the COMDAT leader was
+  // promoted and renamed).
+  if (!RenamedComdats.empty())
+    for (auto &GO : M.global_objects())
+      if (auto *C = GO.getComdat()) {
+        auto Replacement = RenamedComdats.find(C);
+        if (Replacement != RenamedComdats.end())
+          GO.setComdat(Replacement->second);
+      }
 }
 
 bool FunctionImportGlobalProcessing::run() {
diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp
index ff6970db47da..a2942869130d 100644
--- a/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/lib/Transforms/Utils/GlobalStatus.cpp
@@ -1,9 +1,8 @@
 //===-- GlobalStatus.cpp - Compute status info for globals -----------------==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Transforms/Utils/GuardUtils.cpp b/lib/Transforms/Utils/GuardUtils.cpp
index 08de0a4c53e9..34c32d9c0c98 100644
--- a/lib/Transforms/Utils/GuardUtils.cpp
+++ b/lib/Transforms/Utils/GuardUtils.cpp
@@ -1,9 +1,8 @@
 //===-- GuardUtils.cpp - Utils for work with guards -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Utils that are used to perform transformations related to guards and their
diff --git a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
index 02482c550321..8041e66e6c4c 100644
--- a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
+++ b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
@@ -1,9 +1,8 @@
 //===-- ImportedFunctionsInliningStats.cpp ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Generating inliner statistics for imported functions, mostly useful for
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 623fe91a5a60..a7f0f7ac5d61 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -1,9 +1,8 @@
 //===- InlineFunction.cpp - Code to perform function inlining -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -85,16 +84,10 @@ PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
   cl::init(true), cl::Hidden,
   cl::desc("Convert align attributes to assumptions during inlining."));
 
-llvm::InlineResult llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
-                                        AAResults *CalleeAAR,
-                                        bool InsertLifetime) {
-  return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime);
-}
-
-llvm::InlineResult llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
+llvm::InlineResult llvm::InlineFunction(CallBase *CB, InlineFunctionInfo &IFI,
                                         AAResults *CalleeAAR,
                                         bool InsertLifetime) {
-  return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime);
+  return InlineFunction(CallSite(CB), IFI, CalleeAAR, InsertLifetime);
 }
 
 namespace {
@@ -1042,11 +1035,10 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
 
       SmallSetVector<const Argument *, 4> NAPtrArgs;
       for (const Value *V : PtrArgs) {
-        SmallVector<Value *, 4> Objects;
-        GetUnderlyingObjects(const_cast<Value*>(V),
-                             Objects, DL, /* LI = */ nullptr);
+        SmallVector<const Value *, 4> Objects;
+        GetUnderlyingObjects(V, Objects, DL, /* LI = */ nullptr);
 
-        for (Value *O : Objects)
+        for (const Value *O : Objects)
           ObjSet.insert(O);
       }
 
@@ -1216,14 +1208,14 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
 
     // If the call was inlined, but then constant folded, there is no edge to
     // add.  Check for this case.
-    Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
+    auto *NewCall = dyn_cast<CallBase>(VMI->second);
     if (!NewCall)
       continue;
 
     // We do not treat intrinsic calls like real function calls because we
     // expect them to become inline code; do not add an edge for an intrinsic.
-    CallSite CS = CallSite(NewCall);
-    if (CS && CS.getCalledFunction() && CS.getCalledFunction()->isIntrinsic())
+    if (NewCall->getCalledFunction() &&
+        NewCall->getCalledFunction()->isIntrinsic())
       continue;
 
     // Remember that this call site got inlined for the client of
@@ -1236,19 +1228,19 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
     // destination.  This can also happen if the call graph node of the caller
     // was just unnecessarily imprecise.
     if (!I->second->getFunction())
-      if (Function *F = CallSite(NewCall).getCalledFunction()) {
+      if (Function *F = NewCall->getCalledFunction()) {
         // Indirect call site resolved to direct call.
-        CallerNode->addCalledFunction(CallSite(NewCall), CG[F]);
+        CallerNode->addCalledFunction(NewCall, CG[F]);
 
         continue;
       }
 
-    CallerNode->addCalledFunction(CallSite(NewCall), I->second);
+    CallerNode->addCalledFunction(NewCall, I->second);
   }
 
   // Update the call graph by deleting the edge from Callee to Caller.  We must
   // do this after the loop above in case Caller and Callee are the same.
-  CallerNode->removeCallEdgeFor(CS);
+  CallerNode->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction()));
 }
 
 static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
@@ -1353,6 +1345,44 @@ static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
   return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca();
 }
 
+/// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL
+/// inlined at \p InlinedAt. \p IANodes is an inlined-at cache.
+static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt,
+                               LLVMContext &Ctx,
+                               DenseMap<const MDNode *, MDNode *> &IANodes) {
+  auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes);
+  return DebugLoc::get(OrigDL.getLine(), OrigDL.getCol(), OrigDL.getScope(),
+                       IA);
+}
+
+/// Returns the LoopID for a loop which has has been cloned from another
+/// function for inlining with the new inlined-at start and end locs.
+static MDNode *inlineLoopID(const MDNode *OrigLoopId, DILocation *InlinedAt,
+                            LLVMContext &Ctx,
+                            DenseMap<const MDNode *, MDNode *> &IANodes) {
+  assert(OrigLoopId && OrigLoopId->getNumOperands() > 0 &&
+         "Loop ID needs at least one operand");
+  assert(OrigLoopId && OrigLoopId->getOperand(0).get() == OrigLoopId &&
+         "Loop ID should refer to itself");
+
+  // Save space for the self-referential LoopID.
+  SmallVector<Metadata *, 4> MDs = {nullptr};
+
+  for (unsigned i = 1; i < OrigLoopId->getNumOperands(); ++i) {
+    Metadata *MD = OrigLoopId->getOperand(i);
+    // Update the DILocations to encode the inlined-at metadata.
+    if (DILocation *DL = dyn_cast<DILocation>(MD))
+      MDs.push_back(inlineDebugLoc(DL, InlinedAt, Ctx, IANodes));
+    else
+      MDs.push_back(MD);
+  }
+
+  MDNode *NewLoopID = MDNode::getDistinct(Ctx, MDs);
+  // Insert the self-referential LoopID.
+  NewLoopID->replaceOperandWith(0, NewLoopID);
+  return NewLoopID;
+}
+
 /// Update inlined instructions' line numbers to
 /// to encode location where these instructions are inlined.
 static void fixupLineNumbers(Function *Fn, Function::iterator FI,
@@ -1378,10 +1408,17 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
   for (; FI != Fn->end(); ++FI) {
     for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
          BI != BE; ++BI) {
+      // Loop metadata needs to be updated so that the start and end locs
+      // reference inlined-at locations.
+      if (MDNode *LoopID = BI->getMetadata(LLVMContext::MD_loop)) {
+        MDNode *NewLoopID =
+            inlineLoopID(LoopID, InlinedAtNode, BI->getContext(), IANodes);
+        BI->setMetadata(LLVMContext::MD_loop, NewLoopID);
+      }
+
       if (DebugLoc DL = BI->getDebugLoc()) {
-        auto IA = DebugLoc::appendInlinedAt(DL, InlinedAtNode, BI->getContext(),
-                                            IANodes);
-        auto IDL = DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), IA);
+        DebugLoc IDL =
+            inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes);
         BI->setDebugLoc(IDL);
         continue;
       }
@@ -1448,47 +1485,45 @@ static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
       CalleeEntryCount.getCount() < 1)
     return;
   auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None;
-  uint64_t CallCount =
+  int64_t CallCount =
       std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0,
                CalleeEntryCount.getCount());
-
-  for (auto const &Entry : VMap)
-    if (isa<CallInst>(Entry.first))
-      if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
-        CI->updateProfWeight(CallCount, CalleeEntryCount.getCount());
-  for (BasicBlock &BB : *Callee)
-    // No need to update the callsite if it is pruned during inlining.
-    if (VMap.count(&BB))
-      for (Instruction &I : BB)
-        if (CallInst *CI = dyn_cast<CallInst>(&I))
-          CI->updateProfWeight(CalleeEntryCount.getCount() - CallCount,
-                               CalleeEntryCount.getCount());
+  updateProfileCallee(Callee, -CallCount, &VMap);
 }
 
-/// Update the entry count of callee after inlining.
-///
-/// The callsite's block count is subtracted from the callee's function entry
-/// count.
-static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB,
-                              Instruction *CallInst, Function *Callee,
-                              ProfileSummaryInfo *PSI) {
-  // If the callee has a original count of N, and the estimated count of
-  // callsite is M, the new callee count is set to N - M. M is estimated from
-  // the caller's entry count, its entry block frequency and the block frequency
-  // of the callsite.
+void llvm::updateProfileCallee(
+    Function *Callee, int64_t entryDelta,
+    const ValueMap<const Value *, WeakTrackingVH> *VMap) {
   auto CalleeCount = Callee->getEntryCount();
-  if (!CalleeCount.hasValue() || !PSI)
-    return;
-  auto CallCount = PSI->getProfileCount(CallInst, CallerBFI);
-  if (!CallCount.hasValue())
+  if (!CalleeCount.hasValue())
     return;
+
+  uint64_t priorEntryCount = CalleeCount.getCount();
+  uint64_t newEntryCount;
+
   // Since CallSiteCount is an estimate, it could exceed the original callee
-  // count and has to be set to 0.
-  if (CallCount.getValue() > CalleeCount.getCount())
-    CalleeCount.setCount(0);
+  // count and has to be set to 0 so guard against underflow.
+  if (entryDelta < 0 && static_cast<uint64_t>(-entryDelta) > priorEntryCount)
+    newEntryCount = 0;
   else
-    CalleeCount.setCount(CalleeCount.getCount() - CallCount.getValue());
-  Callee->setEntryCount(CalleeCount);
+    newEntryCount = priorEntryCount + entryDelta;
+
+  Callee->setEntryCount(newEntryCount);
+
+  // During inlining ?
+  if (VMap) {
+    uint64_t cloneEntryCount = priorEntryCount - newEntryCount;
+    for (auto const &Entry : *VMap)
+      if (isa<CallInst>(Entry.first))
+        if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
+          CI->updateProfWeight(cloneEntryCount, priorEntryCount);
+  }
+  for (BasicBlock &BB : *Callee)
+    // No need to update the callsite if it is pruned during inlining.
+    if (!VMap || VMap->count(&BB))
+      for (Instruction &I : BB)
+        if (CallInst *CI = dyn_cast<CallInst>(&I))
+          CI->updateProfWeight(newEntryCount, priorEntryCount);
 }
 
 /// This function inlines the called function into the basic block of the
@@ -1507,6 +1542,10 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
   assert(TheCall->getParent() && TheCall->getFunction()
          && "Instruction not in function!");
 
+  // FIXME: we don't inline callbr yet.
+  if (isa<CallBrInst>(TheCall))
+    return false;
+
   // If IFI has any state in it, zap it before we fill it in.
   IFI.reset();
 
@@ -1684,8 +1723,6 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
 
     updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall,
                       IFI.PSI, IFI.CallerBFI);
-    // Update the profile count of callee.
-    updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc, IFI.PSI);
 
     // Inject byval arguments initialization.
     for (std::pair<Value*, Value*> &Init : ByValInit)
@@ -1734,6 +1771,8 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
         Instruction *NewI = nullptr;
         if (isa<CallInst>(I))
           NewI = CallInst::Create(cast<CallInst>(I), OpDefs, I);
+        else if (isa<CallBrInst>(I))
+          NewI = CallBrInst::Create(cast<CallBrInst>(I), OpDefs, I);
         else
           NewI = InvokeInst::Create(cast<InvokeInst>(I), OpDefs, I);
 
@@ -1817,8 +1856,7 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     // Move any dbg.declares describing the allocas into the entry basic block.
     DIBuilder DIB(*Caller->getParent());
     for (auto &AI : IFI.StaticAllocas)
-      replaceDbgDeclareForAlloca(AI, AI, DIB, DIExpression::NoDeref, 0,
-                                 DIExpression::NoDeref);
+      replaceDbgDeclareForAlloca(AI, AI, DIB, DIExpression::ApplyOffset, 0);
   }
 
   SmallVector<Value*,4> VarArgsToForward;
@@ -1869,10 +1907,8 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
           // Add VarArgs to existing parameters.
           SmallVector<Value *, 6> Params(CI->arg_operands());
           Params.append(VarArgsToForward.begin(), VarArgsToForward.end());
-          CallInst *NewCI =
-              CallInst::Create(CI->getCalledFunction() ? CI->getCalledFunction()
-                                                       : CI->getCalledValue(),
-                               Params, "", CI);
+          CallInst *NewCI = CallInst::Create(
+              CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI);
           NewCI->setDebugLoc(CI->getDebugLoc());
           NewCI->setAttributes(Attrs);
           NewCI->setCallingConv(CI->getCallingConv());
@@ -2038,6 +2074,8 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
         Instruction *NewInst;
         if (CS.isCall())
           NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I);
+        else if (CS.isCallBr())
+          NewInst = CallBrInst::Create(cast<CallBrInst>(I), OpBundles, I);
         else
           NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I);
         NewInst->takeName(I);
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
index 003721f2b939..6c4fc1ceb991 100644
--- a/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -1,9 +1,8 @@
 //===- InstructionNamer.cpp - Give anonymous instructions names -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp
index 4a359b99bebd..9082049c82da 100644
--- a/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/lib/Transforms/Utils/IntegerDivision.cpp
@@ -1,9 +1,8 @@
 //===-- IntegerDivision.cpp - Expand integer division ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 53d444b309d5..29e7c5260f46 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -1,9 +1,8 @@
 //===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -32,11 +31,12 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
@@ -45,6 +45,7 @@
 #include "llvm/IR/PredIteratorCache.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 using namespace llvm;
@@ -198,6 +199,17 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
         continue;
       }
 
+      // If we added a single PHI, it must dominate all uses and we can directly
+      // rename it.
+      if (AddedPHIs.size() == 1) {
+        // Tell the VHs that the uses changed. This updates SCEV's caches.
+        // We might call ValueIsRAUWd multiple times for the same value.
+        if (UseToRewrite->get()->hasValueHandle())
+          ValueHandleBase::ValueIsRAUWd(*UseToRewrite, AddedPHIs[0]);
+        UseToRewrite->set(AddedPHIs[0]);
+        continue;
+      }
+
       // Otherwise, do full PHI insertion.
       SSAUpdate.RewriteUse(*UseToRewrite);
     }
@@ -211,9 +223,12 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
       BasicBlock *UserBB = DVI->getParent();
       if (InstBB == UserBB || L->contains(UserBB))
         continue;
-      // We currently only handle debug values residing in blocks where we have
-      // inserted a PHI instruction.
-      if (Value *V = SSAUpdate.FindValueForBlock(UserBB))
+      // We currently only handle debug values residing in blocks that were
+      // traversed while rewriting the uses. If we inserted just a single PHI,
+      // we will handle all relevant debug values.
+      Value *V = AddedPHIs.size() == 1 ? AddedPHIs[0]
+                                       : SSAUpdate.FindValueForBlock(UserBB);
+      if (V)
         DVI->setOperand(0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(V)));
     }
 
@@ -306,6 +321,12 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
                      ScalarEvolution *SE) {
   bool Changed = false;
 
+#ifdef EXPENSIVE_CHECKS
+  // Verify all sub-loops are in LCSSA form already.
+  for (Loop *SubLoop: L)
+    assert(SubLoop->isRecursivelyLCSSAForm(DT, *LI) && "Subloop not in LCSSA!");
+#endif
+
   SmallVector<BasicBlock *, 8> ExitBlocks;
   L.getExitBlocks(ExitBlocks);
   if (ExitBlocks.empty())
@@ -325,6 +346,10 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
   // Look at all the instructions in the loop, checking to see if they have uses
   // outside the loop.  If so, put them into the worklist to rewrite those uses.
   for (BasicBlock *BB : BlocksDominatingExits) {
+    // Skip blocks that are part of any sub-loops, they must be in LCSSA
+    // already.
+    if (LI->getLoopFor(BB) != &L)
+      continue;
     for (Instruction &I : *BB) {
       // Reject two common cases fast: instructions with no uses (like stores)
       // and instructions with one use that is in the same block as this.
@@ -419,6 +444,8 @@ struct LCSSAWrapperPass : public FunctionPass {
     AU.addPreserved<GlobalsAAWrapperPass>();
     AU.addPreserved<ScalarEvolutionWrapperPass>();
     AU.addPreserved<SCEVAAWrapperPass>();
+    AU.addPreserved<BranchProbabilityInfoWrapperPass>();
+    AU.addPreserved<MemorySSAWrapperPass>();
 
     // This is needed to perform LCSSA verification inside LPPassManager
     AU.addRequired<LCSSAVerificationPass>();
@@ -462,5 +489,9 @@ PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) {
   PA.preserve<GlobalsAA>();
   PA.preserve<SCEVAA>();
   PA.preserve<ScalarEvolutionAnalysis>();
+  // BPI maps terminators to probabilities, since we don't modify the CFG, no
+  // updates are needed to preserve it.
+  PA.preserve<BranchProbabilityAnalysis>();
+  PA.preserve<MemorySSAAnalysis>();
   return PA;
 }
diff --git a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index e1592c867636..8c67d1dc6eb3 100644
--- a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -1,9 +1,8 @@
 //===-- LibCallsShrinkWrap.cpp ----------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 499e611acb57..39b6b889f91c 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -1,9 +1,8 @@
 //===- Local.cpp - Functions to perform local transformations -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -27,6 +26,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LazyValueInfo.h"
@@ -49,7 +49,6 @@
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
@@ -92,6 +91,10 @@ using namespace llvm::PatternMatch;
 
 STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
 
+// Max recursion depth for collectBitParts used when detecting bswap and
+// bitreverse idioms
+static const unsigned BitPartRecursionMaxDepth = 64;
+
 //===----------------------------------------------------------------------===//
 //  Local constant propagation.
 //
@@ -129,7 +132,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
       Builder.CreateBr(Destination);
       BI->eraseFromParent();
       if (DTU)
-        DTU->deleteEdgeRelaxed(BB, OldDest);
+        DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, OldDest}});
       return true;
     }
 
@@ -205,7 +208,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
         i = SI->removeCase(i);
         e = SI->case_end();
         if (DTU)
-          DTU->deleteEdgeRelaxed(ParentBB, DefaultDest);
+          DTU->applyUpdatesPermissive(
+              {{DominatorTree::Delete, ParentBB, DefaultDest}});
         continue;
       }
 
@@ -253,7 +257,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
       if (DeleteDeadConditions)
         RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
       if (DTU)
-        DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+        DTU->applyUpdatesPermissive(Updates);
       return true;
     }
 
@@ -331,7 +335,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
       }
 
       if (DTU)
-        DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+        DTU->applyUpdatesPermissive(Updates);
       return true;
     }
   }
@@ -416,8 +420,8 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
     if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
       return C->isNullValue() || isa<UndefValue>(C);
 
-  if (CallSite CS = CallSite(I))
-    if (isMathLibCallNoop(CS, TLI))
+  if (auto *Call = dyn_cast<CallBase>(I))
+    if (isMathLibCallNoop(Call, TLI))
       return true;
 
   return false;
@@ -430,7 +434,7 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
 bool llvm::RecursivelyDeleteTriviallyDeadInstructions(
     Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU) {
   Instruction *I = dyn_cast<Instruction>(V);
-  if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI))
+  if (!I || !isInstructionTriviallyDead(I, TLI))
     return false;
 
   SmallVector<Instruction*, 16> DeadInsts;
@@ -665,7 +669,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
     if (PhiIt != OldPhiIt) PhiIt = &BB->front();
   }
   if (DTU)
-    DTU->deleteEdgeRelaxed(Pred, BB);
+    DTU->applyUpdatesPermissive({{DominatorTree::Delete, Pred, BB}});
 }
 
 /// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
@@ -734,7 +738,7 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB,
            isa<UnreachableInst>(PredBB->getTerminator()) &&
            "The successor list of PredBB isn't empty before "
            "applying corresponding DTU updates.");
-    DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+    DTU->applyUpdatesPermissive(Updates);
     DTU->deleteBB(PredBB);
     // Recalculation of DomTree is needed when updating a forward DomTree and
     // the Entry BB is replaced.
@@ -997,6 +1001,18 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
     }
   }
 
+  // We cannot fold the block if it's a branch to an already present callbr
+  // successor because that creates duplicate successors.
+  for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+    if (auto *CBI = dyn_cast<CallBrInst>((*I)->getTerminator())) {
+      if (Succ == CBI->getDefaultDest())
+        return false;
+      for (unsigned i = 0, e = CBI->getNumIndirectDests(); i != e; ++i)
+        if (Succ == CBI->getIndirectDest(i))
+          return false;
+    }
+  }
+
   LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
 
   SmallVector<DominatorTree::UpdateType, 32> Updates;
@@ -1064,7 +1080,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
                            "applying corresponding DTU updates.");
 
   if (DTU) {
-    DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+    DTU->applyUpdatesPermissive(Updates);
     DTU->deleteBB(BB);
   } else {
     BB->eraseFromParent(); // Delete the old basic block.
@@ -1272,6 +1288,19 @@ static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) {
   return false;
 }
 
+/// Produce a DebugLoc to use for each dbg.declare/inst pair that are promoted
+/// to a dbg.value. Because no machine insts can come from debug intrinsics,
+/// only the scope and inlinedAt is significant. Zero line numbers are used in
+/// case this DebugLoc leaks into any adjacent instructions.
+static DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII, Instruction *Src) {
+  // Original dbg.declare must have a location.
+  DebugLoc DeclareLoc = DII->getDebugLoc();
+  MDNode *Scope = DeclareLoc.getScope();
+  DILocation *InlinedAt = DeclareLoc.getInlinedAt();
+  // Produce an unknown location with the correct scope / inlinedAt fields.
+  return DebugLoc::get(0, 0, Scope, InlinedAt);
+}
+
 /// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
 /// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
 void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
@@ -1280,9 +1309,11 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
   auto *DIVar = DII->getVariable();
   assert(DIVar && "Missing variable");
   auto *DIExpr = DII->getExpression();
-  Value *DV = SI->getOperand(0);
+  Value *DV = SI->getValueOperand();
+
+  DebugLoc NewLoc = getDebugValueLoc(DII, SI);
 
-  if (!valueCoversEntireFragment(SI->getValueOperand()->getType(), DII)) {
+  if (!valueCoversEntireFragment(DV->getType(), DII)) {
     // FIXME: If storing to a part of the variable described by the dbg.declare,
     // then we want to insert a dbg.value for the corresponding fragment.
     LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: "
@@ -1292,14 +1323,12 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
     // know nothing about the variable's content.
     DV = UndefValue::get(DV->getType());
     if (!LdStHasDebugValue(DIVar, DIExpr, SI))
-      Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, DII->getDebugLoc(),
-                                      SI);
+      Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
     return;
   }
 
   if (!LdStHasDebugValue(DIVar, DIExpr, SI))
-    Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, DII->getDebugLoc(),
-                                    SI);
+    Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
 }
 
 /// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
@@ -1322,12 +1351,14 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
     return;
   }
 
+  DebugLoc NewLoc = getDebugValueLoc(DII, nullptr);
+
   // We are now tracking the loaded value instead of the address. In the
   // future if multi-location support is added to the IR, it might be
   // preferable to keep tracking both the loaded value and the original
   // address in case the alloca can not be elided.
   Instruction *DbgValue = Builder.insertDbgValueIntrinsic(
-      LI, DIVar, DIExpr, DII->getDebugLoc(), (Instruction *)nullptr);
+      LI, DIVar, DIExpr, NewLoc, (Instruction *)nullptr);
   DbgValue->insertAfter(LI);
 }
 
@@ -1354,12 +1385,13 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
   BasicBlock *BB = APN->getParent();
   auto InsertionPt = BB->getFirstInsertionPt();
 
+  DebugLoc NewLoc = getDebugValueLoc(DII, nullptr);
+
   // The block may be a catchswitch block, which does not have a valid
   // insertion point.
   // FIXME: Insert dbg.value markers in the successors when appropriate.
   if (InsertionPt != BB->end())
-    Builder.insertDbgValueIntrinsic(APN, DIVar, DIExpr, DII->getDebugLoc(),
-                                    &*InsertionPt);
+    Builder.insertDbgValueIntrinsic(APN, DIVar, DIExpr, NewLoc, &*InsertionPt);
 }
 
 /// Determine whether this alloca is either a VLA or an array.
@@ -1414,10 +1446,11 @@ bool llvm::LowerDbgDeclare(Function &F) {
         // This is a call by-value or some other instruction that takes a
         // pointer to the variable. Insert a *value* intrinsic that describes
         // the variable by dereferencing the alloca.
+        DebugLoc NewLoc = getDebugValueLoc(DDI, nullptr);
         auto *DerefExpr =
             DIExpression::append(DDI->getExpression(), dwarf::DW_OP_deref);
-        DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr,
-                                    DDI->getDebugLoc(), CI);
+        DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr, NewLoc,
+                                    CI);
       }
     }
     DDI->eraseFromParent();
@@ -1519,14 +1552,14 @@ void llvm::findDbgUsers(SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers,
 
 bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
                              Instruction *InsertBefore, DIBuilder &Builder,
-                             bool DerefBefore, int Offset, bool DerefAfter) {
+                             uint8_t DIExprFlags, int Offset) {
   auto DbgAddrs = FindDbgAddrUses(Address);
   for (DbgVariableIntrinsic *DII : DbgAddrs) {
     DebugLoc Loc = DII->getDebugLoc();
     auto *DIVar = DII->getVariable();
     auto *DIExpr = DII->getExpression();
     assert(DIVar && "Missing variable");
-    DIExpr = DIExpression::prepend(DIExpr, DerefBefore, Offset, DerefAfter);
+    DIExpr = DIExpression::prepend(DIExpr, DIExprFlags, Offset);
     // Insert llvm.dbg.declare immediately before InsertBefore, and remove old
     // llvm.dbg.declare.
     Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore);
@@ -1538,10 +1571,10 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
 }
 
 bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
-                                      DIBuilder &Builder, bool DerefBefore,
-                                      int Offset, bool DerefAfter) {
+                                      DIBuilder &Builder, uint8_t DIExprFlags,
+                                      int Offset) {
   return replaceDbgDeclare(AI, NewAllocaAddress, AI->getNextNode(), Builder,
-                           DerefBefore, Offset, DerefAfter);
+                           DIExprFlags, Offset);
 }
 
 static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
@@ -1594,120 +1627,119 @@ bool llvm::salvageDebugInfo(Instruction &I) {
   if (DbgUsers.empty())
     return false;
 
-  auto &M = *I.getModule();
-  auto &DL = M.getDataLayout();
+  return salvageDebugInfoForDbgValues(I, DbgUsers);
+}
+
+bool llvm::salvageDebugInfoForDbgValues(
+    Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers) {
   auto &Ctx = I.getContext();
   auto wrapMD = [&](Value *V) { return wrapValueInMetadata(Ctx, V); };
 
-  auto doSalvage = [&](DbgVariableIntrinsic *DII, SmallVectorImpl<uint64_t> &Ops) {
-    auto *DIExpr = DII->getExpression();
-    if (!Ops.empty()) {
-      // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they
-      // are implicitly pointing out the value as a DWARF memory location
-      // description.
-      bool WithStackValue = isa<DbgValueInst>(DII);
-      DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue);
-    }
+  for (auto *DII : DbgUsers) {
+    // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they
+    // are implicitly pointing out the value as a DWARF memory location
+    // description.
+    bool StackValue = isa<DbgValueInst>(DII);
+
+    DIExpression *DIExpr =
+        salvageDebugInfoImpl(I, DII->getExpression(), StackValue);
+
+    // salvageDebugInfoImpl should fail on examining the first element of
+    // DbgUsers, or none of them.
+    if (!DIExpr)
+      return false;
+
     DII->setOperand(0, wrapMD(I.getOperand(0)));
     DII->setOperand(2, MetadataAsValue::get(Ctx, DIExpr));
     LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
+  }
+
+  return true;
+}
+
+DIExpression *llvm::salvageDebugInfoImpl(Instruction &I,
+                                         DIExpression *SrcDIExpr,
+                                         bool WithStackValue) {
+  auto &M = *I.getModule();
+  auto &DL = M.getDataLayout();
+
+  // Apply a vector of opcodes to the source DIExpression.
+  auto doSalvage = [&](SmallVectorImpl<uint64_t> &Ops) -> DIExpression * {
+    DIExpression *DIExpr = SrcDIExpr;
+    if (!Ops.empty()) {
+      DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue);
+    }
+    return DIExpr;
   };
 
-  auto applyOffset = [&](DbgVariableIntrinsic *DII, uint64_t Offset) {
+  // Apply the given offset to the source DIExpression.
+  auto applyOffset = [&](uint64_t Offset) -> DIExpression * {
     SmallVector<uint64_t, 8> Ops;
     DIExpression::appendOffset(Ops, Offset);
-    doSalvage(DII, Ops);
+    return doSalvage(Ops);
   };
 
-  auto applyOps = [&](DbgVariableIntrinsic *DII,
-                      std::initializer_list<uint64_t> Opcodes) {
+  // initializer-list helper for applying operators to the source DIExpression.
+  auto applyOps =
+      [&](std::initializer_list<uint64_t> Opcodes) -> DIExpression * {
     SmallVector<uint64_t, 8> Ops(Opcodes);
-    doSalvage(DII, Ops);
+    return doSalvage(Ops);
   };
 
   if (auto *CI = dyn_cast<CastInst>(&I)) {
-    if (!CI->isNoopCast(DL))
-      return false;
-
-    // No-op casts are irrelevant for debug info.
-    MetadataAsValue *CastSrc = wrapMD(I.getOperand(0));
-    for (auto *DII : DbgUsers) {
-      DII->setOperand(0, CastSrc);
-      LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
-    }
-    return true;
+    // No-op casts and zexts are irrelevant for debug info.
+    if (CI->isNoopCast(DL) || isa<ZExtInst>(&I))
+      return SrcDIExpr;
+    return nullptr;
   } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
     unsigned BitWidth =
         M.getDataLayout().getIndexSizeInBits(GEP->getPointerAddressSpace());
-    // Rewrite a constant GEP into a DIExpression.  Since we are performing
-    // arithmetic to compute the variable's *value* in the DIExpression, we
-    // need to mark the expression with a DW_OP_stack_value.
+    // Rewrite a constant GEP into a DIExpression.
     APInt Offset(BitWidth, 0);
-    if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset))
-      for (auto *DII : DbgUsers)
-        applyOffset(DII, Offset.getSExtValue());
-    return true;
+    if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) {
+      return applyOffset(Offset.getSExtValue());
+    } else {
+      return nullptr;
+    }
   } else if (auto *BI = dyn_cast<BinaryOperator>(&I)) {
     // Rewrite binary operations with constant integer operands.
     auto *ConstInt = dyn_cast<ConstantInt>(I.getOperand(1));
     if (!ConstInt || ConstInt->getBitWidth() > 64)
-      return false;
+      return nullptr;
 
     uint64_t Val = ConstInt->getSExtValue();
-    for (auto *DII : DbgUsers) {
-      switch (BI->getOpcode()) {
-      case Instruction::Add:
-        applyOffset(DII, Val);
-        break;
-      case Instruction::Sub:
-        applyOffset(DII, -int64_t(Val));
-        break;
-      case Instruction::Mul:
-        applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_mul});
-        break;
-      case Instruction::SDiv:
-        applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_div});
-        break;
-      case Instruction::SRem:
-        applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_mod});
-        break;
-      case Instruction::Or:
-        applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_or});
-        break;
-      case Instruction::And:
-        applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_and});
-        break;
-      case Instruction::Xor:
-        applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_xor});
-        break;
-      case Instruction::Shl:
-        applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shl});
-        break;
-      case Instruction::LShr:
-        applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shr});
-        break;
-      case Instruction::AShr:
-        applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shra});
-        break;
-      default:
-        // TODO: Salvage constants from each kind of binop we know about.
-        return false;
-      }
+    switch (BI->getOpcode()) {
+    case Instruction::Add:
+      return applyOffset(Val);
+    case Instruction::Sub:
+      return applyOffset(-int64_t(Val));
+    case Instruction::Mul:
+      return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_mul});
+    case Instruction::SDiv:
+      return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_div});
+    case Instruction::SRem:
+      return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_mod});
+    case Instruction::Or:
+      return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_or});
+    case Instruction::And:
+      return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_and});
+    case Instruction::Xor:
+      return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_xor});
+    case Instruction::Shl:
+      return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shl});
+    case Instruction::LShr:
+      return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shr});
+    case Instruction::AShr:
+      return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shra});
+    default:
+      // TODO: Salvage constants from each kind of binop we know about.
+      return nullptr;
     }
-    return true;
-  } else if (isa<LoadInst>(&I)) {
-    MetadataAsValue *AddrMD = wrapMD(I.getOperand(0));
-    for (auto *DII : DbgUsers) {
-      // Rewrite the load into DW_OP_deref.
-      auto *DIExpr = DII->getExpression();
-      DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref);
-      DII->setOperand(0, AddrMD);
-      DII->setOperand(2, MetadataAsValue::get(Ctx, DIExpr));
-      LLVM_DEBUG(dbgs() << "SALVAGE:  " << *DII << '\n');
-    }
-    return true;
+    // *Not* to do: we should not attempt to salvage load instructions,
+    // because the validity and lifetime of a dbg.value containing
+    // DW_OP_deref becomes difficult to analyze. See PR40628 for examples.
   }
-  return false;
+  return nullptr;
 }
 
 /// A replacement for a dbg.value expression.
@@ -1849,21 +1881,10 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
         return None;
 
       bool Signed = *Signedness == DIBasicType::Signedness::Signed;
-
-      if (!Signed) {
-        // In the unsigned case, assume that a debugger will initialize the
-        // high bits to 0 and do a no-op conversion.
-        return Identity(DII);
-      } else {
-        // In the signed case, the high bits are given by sign extension, i.e:
-        //   (To >> (ToBits - 1)) * ((2 ^ FromBits) - 1)
-        // Calculate the high bits and OR them together with the low bits.
-        SmallVector<uint64_t, 8> Ops({dwarf::DW_OP_dup, dwarf::DW_OP_constu,
-                                      (ToBits - 1), dwarf::DW_OP_shr,
-                                      dwarf::DW_OP_lit0, dwarf::DW_OP_not,
-                                      dwarf::DW_OP_mul, dwarf::DW_OP_or});
-        return DIExpression::appendToStack(DII.getExpression(), Ops);
-      }
+      dwarf::TypeKind TK = Signed ? dwarf::DW_ATE_signed : dwarf::DW_ATE_unsigned;
+      SmallVector<uint64_t, 8> Ops({dwarf::DW_OP_LLVM_convert, ToBits, TK,
+                                   dwarf::DW_OP_LLVM_convert, FromBits, TK});
+      return DIExpression::appendToStack(DII.getExpression(), Ops);
     };
     return rewriteDebugUsers(From, To, DomPoint, DT, SignOrZeroExt);
   }
@@ -1894,10 +1915,14 @@ unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
 }
 
 unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
-                                   bool PreserveLCSSA, DomTreeUpdater *DTU) {
+                                   bool PreserveLCSSA, DomTreeUpdater *DTU,
+                                   MemorySSAUpdater *MSSAU) {
   BasicBlock *BB = I->getParent();
   std::vector <DominatorTree::UpdateType> Updates;
 
+  if (MSSAU)
+    MSSAU->changeToUnreachable(I);
+
   // Loop over all of the successors, removing BB's entry from any PHI
   // nodes.
   if (DTU)
@@ -1928,7 +1953,7 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
     ++NumInstrsRemoved;
   }
   if (DTU)
-    DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+    DTU->applyUpdatesPermissive(Updates);
   return NumInstrsRemoved;
 }
 
@@ -1937,8 +1962,8 @@ static void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr) {
   SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
   SmallVector<OperandBundleDef, 1> OpBundles;
   II->getOperandBundlesAsDefs(OpBundles);
-  CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, OpBundles,
-                                       "", II);
+  CallInst *NewCall = CallInst::Create(
+      II->getFunctionType(), II->getCalledValue(), Args, OpBundles, "", II);
   NewCall->takeName(II);
   NewCall->setCallingConv(II->getCallingConv());
   NewCall->setAttributes(II->getAttributes());
@@ -1956,7 +1981,7 @@ static void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr) {
   UnwindDestBB->removePredecessor(BB);
   II->eraseFromParent();
   if (DTU)
-    DTU->deleteEdgeRelaxed(BB, UnwindDestBB);
+    DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, UnwindDestBB}});
 }
 
 BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
@@ -1981,8 +2006,9 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
   // can potentially be avoided with a cleverer API design that we do not have
   // as of this time.
 
-  InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge,
-                                      InvokeArgs, OpBundles, CI->getName(), BB);
+  InvokeInst *II =
+      InvokeInst::Create(CI->getFunctionType(), CI->getCalledValue(), Split,
+                         UnwindEdge, InvokeArgs, OpBundles, CI->getName(), BB);
   II->setDebugLoc(CI->getDebugLoc());
   II->setCallingConv(CI->getCallingConv());
   II->setAttributes(CI->getAttributes());
@@ -2052,7 +2078,7 @@ static bool markAliveBlocks(Function &F,
           Changed = true;
           break;
         }
-        if (CI->doesNotReturn()) {
+        if (CI->doesNotReturn() && !CI->isMustTailCall()) {
           // If we found a call to a no-return function, insert an unreachable
           // instruction after it.  Make sure there isn't *already* one there
           // though.
@@ -2102,7 +2128,8 @@ static bool markAliveBlocks(Function &F,
           UnwindDestBB->removePredecessor(II->getParent());
           II->eraseFromParent();
           if (DTU)
-            DTU->deleteEdgeRelaxed(BB, UnwindDestBB);
+            DTU->applyUpdatesPermissive(
+                {{DominatorTree::Delete, BB, UnwindDestBB}});
         } else
           changeToCall(II, DTU);
         Changed = true;
@@ -2191,7 +2218,7 @@ void llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) {
   TI->replaceAllUsesWith(NewTI);
   TI->eraseFromParent();
   if (DTU)
-    DTU->deleteEdgeRelaxed(BB, UnwindDest);
+    DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, UnwindDest}});
 }
 
 /// removeUnreachableBlocks - Remove blocks that are not reachable, even
@@ -2211,7 +2238,7 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
   assert(Reachable.size() < F.size());
   NumRemoved += F.size()-Reachable.size();
 
-  SmallPtrSet<BasicBlock *, 16> DeadBlockSet;
+  SmallSetVector<BasicBlock *, 8> DeadBlockSet;
   for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) {
     auto *BB = &*I;
     if (Reachable.count(BB))
@@ -2256,7 +2283,7 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
   }
 
   if (DTU) {
-    DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+    DTU->applyUpdatesPermissive(Updates);
     bool Deleted = false;
     for (auto *BB : DeadBlockSet) {
       if (DTU->isBBPendingDeletion(BB))
@@ -2450,12 +2477,12 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
   return ::replaceDominatedUsesWith(From, To, BB, ProperlyDominates);
 }
 
-bool llvm::callsGCLeafFunction(ImmutableCallSite CS,
+bool llvm::callsGCLeafFunction(const CallBase *Call,
                                const TargetLibraryInfo &TLI) {
   // Check if the function is specifically marked as a gc leaf function.
-  if (CS.hasFnAttr("gc-leaf-function"))
+  if (Call->hasFnAttr("gc-leaf-function"))
     return true;
-  if (const Function *F = CS.getCalledFunction()) {
+  if (const Function *F = Call->getCalledFunction()) {
     if (F->hasFnAttribute("gc-leaf-function"))
       return true;
 
@@ -2469,7 +2496,7 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS,
   // marked as 'gc-leaf-function.' All available Libcalls are
   // GC-leaf.
   LibFunc LF;
-  if (TLI.getLibFunc(CS, LF)) {
+  if (TLI.getLibFunc(ImmutableCallSite(Call), LF)) {
     return TLI.has(LF);
   }
 
@@ -2530,13 +2557,13 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
                                     BasicBlock *BB) {
   // Since we are moving the instructions out of its basic block, we do not
   // retain their original debug locations (DILocations) and debug intrinsic
-  // instructions (dbg.values).
+  // instructions.
   //
   // Doing so would degrade the debugging experience and adversely affect the
   // accuracy of profiling information.
   //
   // Currently, when hoisting the instructions, we take the following actions:
-  // - Remove their dbg.values.
+  // - Remove their debug intrinsic instructions.
   // - Set their debug locations to the values from the insertion point.
   //
   // As per PR39141 (comment #8), the more fundamental reason why the dbg.values
@@ -2554,7 +2581,7 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
     I->dropUnknownNonDebugMetadata();
     if (I->isUsedByMetadata())
       dropDebugUsers(*I);
-    if (isa<DbgVariableIntrinsic>(I)) {
+    if (isa<DbgInfoIntrinsic>(I)) {
       // Remove DbgInfo Intrinsics.
       II = I->eraseFromParent();
       continue;
@@ -2613,7 +2640,7 @@ struct BitPart {
 /// does not invalidate internal references (std::map instead of DenseMap).
 static const Optional<BitPart> &
 collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
-                std::map<Value *, Optional<BitPart>> &BPS) {
+                std::map<Value *, Optional<BitPart>> &BPS, int Depth) {
   auto I = BPS.find(V);
   if (I != BPS.end())
     return I->second;
@@ -2621,13 +2648,19 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
   auto &Result = BPS[V] = None;
   auto BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
 
+  // Prevent stack overflow by limiting the recursion depth
+  if (Depth == BitPartRecursionMaxDepth) {
+    LLVM_DEBUG(dbgs() << "collectBitParts max recursion depth reached.\n");
+    return Result;
+  }
+
   if (Instruction *I = dyn_cast<Instruction>(V)) {
     // If this is an or instruction, it may be an inner node of the bswap.
     if (I->getOpcode() == Instruction::Or) {
       auto &A = collectBitParts(I->getOperand(0), MatchBSwaps,
-                                MatchBitReversals, BPS);
+                                MatchBitReversals, BPS, Depth + 1);
       auto &B = collectBitParts(I->getOperand(1), MatchBSwaps,
-                                MatchBitReversals, BPS);
+                                MatchBitReversals, BPS, Depth + 1);
       if (!A || !B)
         return Result;
 
@@ -2660,7 +2693,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
         return Result;
 
       auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
-                                  MatchBitReversals, BPS);
+                                  MatchBitReversals, BPS, Depth + 1);
       if (!Res)
         return Result;
       Result = Res;
@@ -2692,7 +2725,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
         return Result;
 
       auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
-                                  MatchBitReversals, BPS);
+                                  MatchBitReversals, BPS, Depth + 1);
       if (!Res)
         return Result;
       Result = Res;
@@ -2707,7 +2740,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
     // If this is a zext instruction zero extend the result.
     if (I->getOpcode() == Instruction::ZExt) {
       auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
-                                  MatchBitReversals, BPS);
+                                  MatchBitReversals, BPS, Depth + 1);
       if (!Res)
         return Result;
 
@@ -2769,7 +2802,7 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
 
   // Try to find all the pieces corresponding to the bswap.
   std::map<Value *, Optional<BitPart>> BPS;
-  auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS);
+  auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS, 0);
   if (!Res)
     return false;
   auto &BitProvenance = Res->Provenance;
@@ -2883,3 +2916,41 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
     return true;
   }
 }
+
+using AllocaForValueMapTy = DenseMap<Value *, AllocaInst *>;
+AllocaInst *llvm::findAllocaForValue(Value *V,
+                                     AllocaForValueMapTy &AllocaForValue) {
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(V))
+    return AI;
+  // See if we've already calculated (or started to calculate) alloca for a
+  // given value.
+  AllocaForValueMapTy::iterator I = AllocaForValue.find(V);
+  if (I != AllocaForValue.end())
+    return I->second;
+  // Store 0 while we're calculating alloca for value V to avoid
+  // infinite recursion if the value references itself.
+  AllocaForValue[V] = nullptr;
+  AllocaInst *Res = nullptr;
+  if (CastInst *CI = dyn_cast<CastInst>(V))
+    Res = findAllocaForValue(CI->getOperand(0), AllocaForValue);
+  else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+    for (Value *IncValue : PN->incoming_values()) {
+      // Allow self-referencing phi-nodes.
+      if (IncValue == PN)
+        continue;
+      AllocaInst *IncValueAI = findAllocaForValue(IncValue, AllocaForValue);
+      // AI for incoming values should exist and should all be equal.
+      if (IncValueAI == nullptr || (Res != nullptr && IncValueAI != Res))
+        return nullptr;
+      Res = IncValueAI;
+    }
+  } else if (GetElementPtrInst *EP = dyn_cast<GetElementPtrInst>(V)) {
+    Res = findAllocaForValue(EP->getPointerOperand(), AllocaForValue);
+  } else {
+    LLVM_DEBUG(dbgs() << "Alloca search cancelled on unknown instruction: "
+                      << *V << "\n");
+  }
+  if (Res)
+    AllocaForValue[V] = Res;
+  return Res;
+}
diff --git a/lib/Transforms/Utils/LoopRotationUtils.cpp b/lib/Transforms/Utils/LoopRotationUtils.cpp
index 41f14a834617..37389a695b45 100644
--- a/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -1,9 +1,8 @@
 //===----------------- LoopRotationUtils.cpp -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,6 +16,7 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -28,7 +28,6 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -296,7 +295,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
   // Begin by walking OrigHeader and populating ValueMap with an entry for
   // each Instruction.
   BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
-  ValueToValueMapTy ValueMap;
+  ValueToValueMapTy ValueMap, ValueMapMSSA;
 
   // For PHI nodes, the value available in OldPreHeader is just the
   // incoming value from OldPreHeader.
@@ -375,6 +374,9 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
       if (auto *II = dyn_cast<IntrinsicInst>(C))
         if (II->getIntrinsicID() == Intrinsic::assume)
           AC->registerAssumption(II);
+      // MemorySSA cares whether the cloned instruction was inserted or not, and
+      // not whether it can be remapped to a simplified value.
+      ValueMapMSSA[Inst] = C;
     }
   }
 
@@ -392,10 +394,11 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
   LoopEntryBranch->eraseFromParent();
 
   // Update MemorySSA before the rewrite call below changes the 1:1
-  // instruction:cloned_instruction_or_value mapping in ValueMap.
+  // instruction:cloned_instruction_or_value mapping.
   if (MSSAU) {
-    ValueMap[OrigHeader] = OrigPreheader;
-    MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader, ValueMap);
+    ValueMapMSSA[OrigHeader] = OrigPreheader;
+    MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader,
+                                        ValueMapMSSA);
   }
 
   SmallVector<PHINode*, 2> InsertedPHIs;
@@ -463,9 +466,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
     for (BasicBlock *ExitPred : ExitPreds) {
       // We only need to split loop exit edges.
       Loop *PredLoop = LI->getLoopFor(ExitPred);
-      if (!PredLoop || PredLoop->contains(Exit))
-        continue;
-      if (isa<IndirectBrInst>(ExitPred->getTerminator()))
+      if (!PredLoop || PredLoop->contains(Exit) ||
+          ExitPred->getTerminator()->isIndirectTerminator())
         continue;
       SplitLatchEdge |= L->getLoopLatch() == ExitPred;
       BasicBlock *ExitSplit = SplitCriticalEdge(
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 380f4fca54d9..7e6da02d5707 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -1,9 +1,8 @@
 //===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -28,6 +27,9 @@
 // to transform the loop and make these guarantees. Client code should check
 // that these conditions are true before relying on them.
 //
+// Similar complications arise from callbr instructions, particularly in
+// asm-goto where blockaddress expressions are used.
+//
 // Note that the simplifycfg pass will clean up blocks which are split out but
 // end up being unnecessary, so usage of this pass should not pessimize
 // generated code.
@@ -46,13 +48,15 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/DependenceAnalysis.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -67,6 +71,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 using namespace llvm;
 
@@ -115,7 +120,8 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB,
 /// preheader insertion and analysis updating.
 ///
 BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
-                                         LoopInfo *LI, bool PreserveLCSSA) {
+                                         LoopInfo *LI, MemorySSAUpdater *MSSAU,
+                                         bool PreserveLCSSA) {
   BasicBlock *Header = L->getHeader();
 
   // Compute the set of predecessors of the loop that are not in the loop.
@@ -124,10 +130,11 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
        PI != PE; ++PI) {
     BasicBlock *P = *PI;
     if (!L->contains(P)) {         // Coming in from outside the loop?
-      // If the loop is branched to from an indirect branch, we won't
+      // If the loop is branched to from an indirect terminator, we won't
       // be able to fully transform the loop, because it prohibits
       // edge splitting.
-      if (isa<IndirectBrInst>(P->getTerminator())) return nullptr;
+      if (P->getTerminator()->isIndirectTerminator())
+        return nullptr;
 
       // Keep track of it.
       OutsideBlocks.push_back(P);
@@ -137,7 +144,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
   // Split out the loop pre-header.
   BasicBlock *PreheaderBB;
   PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT,
-                                       LI, nullptr, PreserveLCSSA);
+                                       LI, MSSAU, PreserveLCSSA);
   if (!PreheaderBB)
     return nullptr;
 
@@ -217,7 +224,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,
 static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
                                 DominatorTree *DT, LoopInfo *LI,
                                 ScalarEvolution *SE, bool PreserveLCSSA,
-                                AssumptionCache *AC) {
+                                AssumptionCache *AC, MemorySSAUpdater *MSSAU) {
   // Don't try to separate loops without a preheader.
   if (!Preheader)
     return nullptr;
@@ -236,8 +243,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     if (PN->getIncomingValue(i) != PN ||
         !L->contains(PN->getIncomingBlock(i))) {
-      // We can't split indirectbr edges.
-      if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
+      // We can't split indirect control flow edges.
+      if (PN->getIncomingBlock(i)->getTerminator()->isIndirectTerminator())
         return nullptr;
       OuterLoopPreds.push_back(PN->getIncomingBlock(i));
     }
@@ -251,7 +258,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
     SE->forgetLoop(L);
 
   BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer",
-                                             DT, LI, nullptr, PreserveLCSSA);
+                                             DT, LI, MSSAU, PreserveLCSSA);
 
   // Make sure that NewBB is put someplace intelligent, which doesn't mess up
   // code layout too horribly.
@@ -314,7 +321,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
 
   // Split edges to exit blocks from the inner loop, if they emerged in the
   // process of separating the outer one.
-  formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA);
+  formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA);
 
   if (PreserveLCSSA) {
     // Fix LCSSA form for L. Some values, which previously were only used inside
@@ -339,7 +346,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
 /// and have that block branch to the loop header.  This ensures that loops
 /// have exactly one backedge.
 static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
-                                             DominatorTree *DT, LoopInfo *LI) {
+                                             DominatorTree *DT, LoopInfo *LI,
+                                             MemorySSAUpdater *MSSAU) {
   assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
 
   // Get information about the loop
@@ -358,8 +366,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
   for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
     BasicBlock *P = *I;
 
-    // Indirectbr edges cannot be split, so we must fail if we find one.
-    if (isa<IndirectBrInst>(P->getTerminator()))
+    // Indirect edges cannot be split, so we must fail if we find one.
+    if (P->getTerminator()->isIndirectTerminator())
       return nullptr;
 
     if (P != Preheader) BackedgeBlocks.push_back(P);
@@ -439,9 +447,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
     if (!LoopMD)
       LoopMD = TI->getMetadata(LoopMDKind);
     TI->setMetadata(LoopMDKind, nullptr);
-    for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op)
-      if (TI->getSuccessor(Op) == Header)
-        TI->setSuccessor(Op, BEBlock);
+    TI->replaceSuccessorWith(Header, BEBlock);
   }
   BEBlock->getTerminator()->setMetadata(LoopMDKind, LoopMD);
 
@@ -454,6 +460,10 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
   // Update dominator information
   DT->splitBlock(BEBlock);
 
+  if (MSSAU)
+    MSSAU->updatePhisWhenInsertingUniqueBackedgeBlock(Header, Preheader,
+                                                      BEBlock);
+
   return BEBlock;
 }
 
@@ -461,8 +471,11 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
 static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
                             DominatorTree *DT, LoopInfo *LI,
                             ScalarEvolution *SE, AssumptionCache *AC,
-                            bool PreserveLCSSA) {
+                            MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
   bool Changed = false;
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
 ReprocessLoop:
 
   // Check to see that no blocks (other than the header) in this loop have
@@ -489,11 +502,15 @@ ReprocessLoop:
 
       // Zap the dead pred's terminator and replace it with unreachable.
       Instruction *TI = P->getTerminator();
-      changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA);
+      changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA,
+                          /*DTU=*/nullptr, MSSAU);
       Changed = true;
     }
   }
 
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
   // If there are exiting blocks with branches on undef, resolve the undef in
   // the direction which will exit the loop. This will help simplify loop
   // trip count computations.
@@ -518,7 +535,7 @@ ReprocessLoop:
   // Does the loop already have a preheader?  If so, don't insert one.
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
-    Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
+    Preheader = InsertPreheaderForLoop(L, DT, LI, MSSAU, PreserveLCSSA);
     if (Preheader)
       Changed = true;
   }
@@ -527,9 +544,12 @@ ReprocessLoop:
   // predecessors that are inside of the loop.  This check guarantees that the
   // loop preheader/header will dominate the exit blocks.  If the exit block has
   // predecessors from outside of the loop, split the edge now.
-  if (formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA))
+  if (formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA))
     Changed = true;
 
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
   // If the header has more than two predecessors at this point (from the
   // preheader and from multiple backedges), we must adjust the loop.
   BasicBlock *LoopLatch = L->getLoopLatch();
@@ -538,8 +558,8 @@ ReprocessLoop:
     // this for loops with a giant number of backedges, just factor them into a
     // common backedge instead.
     if (L->getNumBackEdges() < 8) {
-      if (Loop *OuterL =
-              separateNestedLoop(L, Preheader, DT, LI, SE, PreserveLCSSA, AC)) {
+      if (Loop *OuterL = separateNestedLoop(L, Preheader, DT, LI, SE,
+                                            PreserveLCSSA, AC, MSSAU)) {
         ++NumNested;
         // Enqueue the outer loop as it should be processed next in our
         // depth-first nest walk.
@@ -556,11 +576,14 @@ ReprocessLoop:
     // If we either couldn't, or didn't want to, identify nesting of the loops,
     // insert a new block that all backedges target, then make it jump to the
     // loop header.
-    LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI);
+    LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI, MSSAU);
     if (LoopLatch)
       Changed = true;
   }
 
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
 
   // Scan over the PHI nodes in the loop header.  Since they now have only two
@@ -618,9 +641,9 @@ ReprocessLoop:
         Instruction *Inst = &*I++;
         if (Inst == CI)
           continue;
-        if (!L->makeLoopInvariant(Inst, AnyInvariant,
-                                  Preheader ? Preheader->getTerminator()
-                                            : nullptr)) {
+        if (!L->makeLoopInvariant(
+                Inst, AnyInvariant,
+                Preheader ? Preheader->getTerminator() : nullptr, MSSAU)) {
           AllInvariant = false;
           break;
         }
@@ -637,7 +660,7 @@ ReprocessLoop:
       // The block has now been cleared of all instructions except for
       // a comparison and a conditional branch. SimplifyCFG may be able
       // to fold it now.
-      if (!FoldBranchToCommonDest(BI))
+      if (!FoldBranchToCommonDest(BI, MSSAU))
         continue;
 
       // Success. The block is now dead, so remove it from the loop,
@@ -657,11 +680,16 @@ ReprocessLoop:
         DT->changeImmediateDominator(Child, Node->getIDom());
       }
       DT->eraseNode(ExitingBlock);
+      if (MSSAU) {
+        SmallSetVector<BasicBlock *, 8> ExitBlockSet;
+        ExitBlockSet.insert(ExitingBlock);
+        MSSAU->removeBlocks(ExitBlockSet);
+      }
 
       BI->getSuccessor(0)->removePredecessor(
-          ExitingBlock, /* DontDeleteUselessPHIs */ PreserveLCSSA);
+          ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA);
       BI->getSuccessor(1)->removePredecessor(
-          ExitingBlock, /* DontDeleteUselessPHIs */ PreserveLCSSA);
+          ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA);
       ExitingBlock->eraseFromParent();
     }
   }
@@ -672,12 +700,15 @@ ReprocessLoop:
   if (Changed && SE)
     SE->forgetTopmostLoop(L);
 
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
   return Changed;
 }
 
 bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
                         ScalarEvolution *SE, AssumptionCache *AC,
-                        bool PreserveLCSSA) {
+                        MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
   bool Changed = false;
 
 #ifndef NDEBUG
@@ -705,7 +736,7 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
 
   while (!Worklist.empty())
     Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE,
-                               AC, PreserveLCSSA);
+                               AC, MSSAU, PreserveLCSSA);
 
   return Changed;
 }
@@ -737,6 +768,9 @@ namespace {
       AU.addPreservedID(LCSSAID);
       AU.addPreserved<DependenceAnalysisWrapperPass>();
       AU.addPreservedID(BreakCriticalEdgesID);  // No critical edges added.
+      AU.addPreserved<BranchProbabilityInfoWrapperPass>();
+      if (EnableMSSALoopDependency)
+        AU.addPreserved<MemorySSAWrapperPass>();
     }
 
     /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
@@ -768,12 +802,21 @@ bool LoopSimplify::runOnFunction(Function &F) {
   ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr;
   AssumptionCache *AC =
       &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+  MemorySSA *MSSA = nullptr;
+  std::unique_ptr<MemorySSAUpdater> MSSAU;
+  if (EnableMSSALoopDependency) {
+    auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+    if (MSSAAnalysis) {
+      MSSA = &MSSAAnalysis->getMSSA();
+      MSSAU = make_unique<MemorySSAUpdater>(MSSA);
+    }
+  }
 
   bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
 
   // Simplify each loop nest in the function.
   for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
-    Changed |= simplifyLoop(*I, DT, LI, SE, AC, PreserveLCSSA);
+    Changed |= simplifyLoop(*I, DT, LI, SE, AC, MSSAU.get(), PreserveLCSSA);
 
 #ifndef NDEBUG
   if (PreserveLCSSA) {
@@ -794,9 +837,10 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F,
   AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
 
   // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA
-  // after simplifying the loops.
+  // after simplifying the loops. MemorySSA is not preserved either.
   for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
-    Changed |= simplifyLoop(*I, DT, LI, SE, AC, /*PreserveLCSSA*/ false);
+    Changed |=
+        simplifyLoop(*I, DT, LI, SE, AC, nullptr, /*PreserveLCSSA*/ false);
 
   if (!Changed)
     return PreservedAnalyses::all();
@@ -809,6 +853,12 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F,
   PA.preserve<SCEVAA>();
   PA.preserve<ScalarEvolutionAnalysis>();
   PA.preserve<DependenceAnalysis>();
+  // BPI maps conditional terminators to probabilities, LoopSimplify can insert
+  // blocks, but it does so only by splitting existing blocks and edges. This
+  // results in the interesting property that all new terminators inserted are
+  // unconditional branches which do not appear in BPI. All deletions are
+  // handled via ValueHandle callbacks w/in BPI.
+  PA.preserve<BranchProbabilityAnalysis>();
   return PA;
 }
 
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index da7ed2bd1652..e39ade523714 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -1,9 +1,8 @@
 //===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -45,6 +44,8 @@ using namespace llvm;
 // TODO: Should these be here or in LoopUnroll?
 STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
 STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
+STATISTIC(NumUnrolledWithHeader, "Number of loops unrolled without a "
+                                 "conditional latch (completely or otherwise)");
 
 static cl::opt<bool>
 UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
@@ -94,66 +95,6 @@ void llvm::remapInstruction(Instruction *I, ValueToValueMapTy &VMap) {
   }
 }
 
-/// Folds a basic block into its predecessor if it only has one predecessor, and
-/// that predecessor only has one successor.
-/// The LoopInfo Analysis that is passed will be kept consistent.
-BasicBlock *llvm::foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI,
-                                           ScalarEvolution *SE,
-                                           DominatorTree *DT) {
-  // Merge basic blocks into their predecessor if there is only one distinct
-  // pred, and if there is only one distinct successor of the predecessor, and
-  // if there are no PHI nodes.
-  BasicBlock *OnlyPred = BB->getSinglePredecessor();
-  if (!OnlyPred) return nullptr;
-
-  if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
-    return nullptr;
-
-  LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into "
-                    << OnlyPred->getName() << "\n");
-
-  // Resolve any PHI nodes at the start of the block.  They are all
-  // guaranteed to have exactly one entry if they exist, unless there are
-  // multiple duplicate (but guaranteed to be equal) entries for the
-  // incoming edges.  This occurs when there are multiple edges from
-  // OnlyPred to OnlySucc.
-  FoldSingleEntryPHINodes(BB);
-
-  // Delete the unconditional branch from the predecessor...
-  OnlyPred->getInstList().pop_back();
-
-  // Make all PHI nodes that referred to BB now refer to Pred as their
-  // source...
-  BB->replaceAllUsesWith(OnlyPred);
-
-  // Move all definitions in the successor to the predecessor...
-  OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
-
-  // OldName will be valid until erased.
-  StringRef OldName = BB->getName();
-
-  // Erase the old block and update dominator info.
-  if (DT)
-    if (DomTreeNode *DTN = DT->getNode(BB)) {
-      DomTreeNode *PredDTN = DT->getNode(OnlyPred);
-      SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end());
-      for (auto *DI : Children)
-        DT->changeImmediateDominator(DI, PredDTN);
-
-      DT->eraseNode(BB);
-    }
-
-  LI->removeBlock(BB);
-
-  // Inherit predecessor's name if it exists...
-  if (!OldName.empty() && !OnlyPred->hasName())
-    OnlyPred->setName(OldName);
-
-  BB->eraseFromParent();
-
-  return OnlyPred;
-}
-
 /// Check if unrolling created a situation where we need to insert phi nodes to
 /// preserve LCSSA form.
 /// \param Blocks is a vector of basic blocks representing unrolled loop.
@@ -332,12 +273,11 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
 ///
 /// If RemainderLoop is non-null, it will receive the remainder loop (if
 /// required and not fully unrolled).
-LoopUnrollResult llvm::UnrollLoop(
-    Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime,
-    bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst,
-    unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder,
-    LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
-    OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop) {
+LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
+                                  ScalarEvolution *SE, DominatorTree *DT,
+                                  AssumptionCache *AC,
+                                  OptimizationRemarkEmitter *ORE,
+                                  bool PreserveLCSSA, Loop **RemainderLoop) {
 
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
@@ -357,28 +297,46 @@ LoopUnrollResult llvm::UnrollLoop(
     return LoopUnrollResult::Unmodified;
   }
 
-  // The current loop unroll pass can only unroll loops with a single latch
+  // The current loop unroll pass can unroll loops with a single latch or header
   // that's a conditional branch exiting the loop.
   // FIXME: The implementation can be extended to work with more complicated
   // cases, e.g. loops with multiple latches.
   BasicBlock *Header = L->getHeader();
+  BranchInst *HeaderBI = dyn_cast<BranchInst>(Header->getTerminator());
   BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
 
-  if (!BI || BI->isUnconditional()) {
-    // The loop-rotate pass can be helpful to avoid this in many cases.
+  // FIXME: Support loops without conditional latch and multiple exiting blocks.
+  if (!BI ||
+      (BI->isUnconditional() && (!HeaderBI || HeaderBI->isUnconditional() ||
+                                 L->getExitingBlock() != Header))) {
+    LLVM_DEBUG(dbgs() << "  Can't unroll; loop not terminated by a conditional "
+                         "branch in the latch or header.\n");
+    return LoopUnrollResult::Unmodified;
+  }
+
+  auto CheckLatchSuccessors = [&](unsigned S1, unsigned S2) {
+    return BI->isConditional() && BI->getSuccessor(S1) == Header &&
+           !L->contains(BI->getSuccessor(S2));
+  };
+
+  // If we have a conditional latch, it must exit the loop.
+  if (BI && BI->isConditional() && !CheckLatchSuccessors(0, 1) &&
+      !CheckLatchSuccessors(1, 0)) {
     LLVM_DEBUG(
-        dbgs()
-        << "  Can't unroll; loop not terminated by a conditional branch.\n");
+        dbgs() << "Can't unroll; a conditional latch must exit the loop");
     return LoopUnrollResult::Unmodified;
   }
 
-  auto CheckSuccessors = [&](unsigned S1, unsigned S2) {
-    return BI->getSuccessor(S1) == Header && !L->contains(BI->getSuccessor(S2));
+  auto CheckHeaderSuccessors = [&](unsigned S1, unsigned S2) {
+    return HeaderBI && HeaderBI->isConditional() &&
+           L->contains(HeaderBI->getSuccessor(S1)) &&
+           !L->contains(HeaderBI->getSuccessor(S2));
   };
 
-  if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) {
-    LLVM_DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch"
-                         " exiting the loop can be unrolled\n");
+  // If we do not have a conditional latch, the header must exit the loop.
+  if (BI && !BI->isConditional() && HeaderBI && HeaderBI->isConditional() &&
+      !CheckHeaderSuccessors(0, 1) && !CheckHeaderSuccessors(1, 0)) {
+    LLVM_DEBUG(dbgs() << "Can't unroll; conditional header must exit the loop");
     return LoopUnrollResult::Unmodified;
   }
 
@@ -389,28 +347,28 @@ LoopUnrollResult llvm::UnrollLoop(
     return LoopUnrollResult::Unmodified;
   }
 
-  if (TripCount != 0)
-    LLVM_DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");
-  if (TripMultiple != 1)
-    LLVM_DEBUG(dbgs() << "  Trip Multiple = " << TripMultiple << "\n");
+  if (ULO.TripCount != 0)
+    LLVM_DEBUG(dbgs() << "  Trip Count = " << ULO.TripCount << "\n");
+  if (ULO.TripMultiple != 1)
+    LLVM_DEBUG(dbgs() << "  Trip Multiple = " << ULO.TripMultiple << "\n");
 
   // Effectively "DCE" unrolled iterations that are beyond the tripcount
   // and will never be executed.
-  if (TripCount != 0 && Count > TripCount)
-    Count = TripCount;
+  if (ULO.TripCount != 0 && ULO.Count > ULO.TripCount)
+    ULO.Count = ULO.TripCount;
 
   // Don't enter the unroll code if there is nothing to do.
-  if (TripCount == 0 && Count < 2 && PeelCount == 0) {
+  if (ULO.TripCount == 0 && ULO.Count < 2 && ULO.PeelCount == 0) {
     LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
     return LoopUnrollResult::Unmodified;
   }
 
-  assert(Count > 0);
-  assert(TripMultiple > 0);
-  assert(TripCount == 0 || TripCount % TripMultiple == 0);
+  assert(ULO.Count > 0);
+  assert(ULO.TripMultiple > 0);
+  assert(ULO.TripCount == 0 || ULO.TripCount % ULO.TripMultiple == 0);
 
   // Are we eliminating the loop control altogether?
-  bool CompletelyUnroll = Count == TripCount;
+  bool CompletelyUnroll = ULO.Count == ULO.TripCount;
   SmallVector<BasicBlock *, 4> ExitBlocks;
   L->getExitBlocks(ExitBlocks);
   std::vector<BasicBlock*> OriginalLoopBlocks = L->getBlocks();
@@ -429,24 +387,29 @@ LoopUnrollResult llvm::UnrollLoop(
   // We assume a run-time trip count if the compiler cannot
   // figure out the loop trip count and the unroll-runtime
   // flag is specified.
-  bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
+  bool RuntimeTripCount =
+      (ULO.TripCount == 0 && ULO.Count > 0 && ULO.AllowRuntime);
 
-  assert((!RuntimeTripCount || !PeelCount) &&
+  assert((!RuntimeTripCount || !ULO.PeelCount) &&
          "Did not expect runtime trip-count unrolling "
          "and peeling for the same loop");
 
   bool Peeled = false;
-  if (PeelCount) {
-    Peeled = peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA);
+  if (ULO.PeelCount) {
+    Peeled = peelLoop(L, ULO.PeelCount, LI, SE, DT, AC, PreserveLCSSA);
 
     // Successful peeling may result in a change in the loop preheader/trip
     // counts. If we later unroll the loop, we want these to be updated.
     if (Peeled) {
-      BasicBlock *ExitingBlock = L->getExitingBlock();
+      // According to our guards and profitability checks the only
+      // meaningful exit should be latch block. Other exits go to deopt,
+      // so we do not worry about them.
+      BasicBlock *ExitingBlock = L->getLoopLatch();
       assert(ExitingBlock && "Loop without exiting block?");
+      assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?");
       Preheader = L->getLoopPreheader();
-      TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
-      TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
+      ULO.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
+      ULO.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
     }
   }
 
@@ -459,7 +422,7 @@ LoopUnrollResult llvm::UnrollLoop(
           for (auto &I : *BB)
             if (auto CS = CallSite(&I))
               HasConvergent |= CS.isConvergent();
-        assert((!HasConvergent || TripMultiple % Count == 0) &&
+        assert((!HasConvergent || ULO.TripMultiple % ULO.Count == 0) &&
                "Unroll count must divide trip multiple if loop contains a "
                "convergent operation.");
       });
@@ -468,11 +431,12 @@ LoopUnrollResult llvm::UnrollLoop(
       UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
                                               : isEpilogProfitable(L);
 
-  if (RuntimeTripCount && TripMultiple % Count != 0 &&
-      !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
-                                  EpilogProfitability, UnrollRemainder, LI, SE,
-                                  DT, AC, PreserveLCSSA, RemainderLoop)) {
-    if (Force)
+  if (RuntimeTripCount && ULO.TripMultiple % ULO.Count != 0 &&
+      !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount,
+                                  EpilogProfitability, ULO.UnrollRemainder,
+                                  ULO.ForgetAllSCEV, LI, SE, DT, AC,
+                                  PreserveLCSSA, RemainderLoop)) {
+    if (ULO.Force)
       RuntimeTripCount = false;
     else {
       LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be "
@@ -483,35 +447,35 @@ LoopUnrollResult llvm::UnrollLoop(
 
   // If we know the trip count, we know the multiple...
   unsigned BreakoutTrip = 0;
-  if (TripCount != 0) {
-    BreakoutTrip = TripCount % Count;
-    TripMultiple = 0;
+  if (ULO.TripCount != 0) {
+    BreakoutTrip = ULO.TripCount % ULO.Count;
+    ULO.TripMultiple = 0;
   } else {
     // Figure out what multiple to use.
-    BreakoutTrip = TripMultiple =
-      (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
+    BreakoutTrip = ULO.TripMultiple =
+        (unsigned)GreatestCommonDivisor64(ULO.Count, ULO.TripMultiple);
   }
 
   using namespace ore;
   // Report the unrolling decision.
   if (CompletelyUnroll) {
     LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
-                      << " with trip count " << TripCount << "!\n");
+                      << " with trip count " << ULO.TripCount << "!\n");
     if (ORE)
       ORE->emit([&]() {
         return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
                                   L->getHeader())
                << "completely unrolled loop with "
-               << NV("UnrollCount", TripCount) << " iterations";
+               << NV("UnrollCount", ULO.TripCount) << " iterations";
       });
-  } else if (PeelCount) {
+  } else if (ULO.PeelCount) {
     LLVM_DEBUG(dbgs() << "PEELING loop %" << Header->getName()
-                      << " with iteration count " << PeelCount << "!\n");
+                      << " with iteration count " << ULO.PeelCount << "!\n");
     if (ORE)
       ORE->emit([&]() {
         return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
                                   L->getHeader())
-               << " peeled loop by " << NV("PeelCount", PeelCount)
+               << " peeled loop by " << NV("PeelCount", ULO.PeelCount)
                << " iterations";
       });
   } else {
@@ -519,24 +483,25 @@ LoopUnrollResult llvm::UnrollLoop(
       OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
                               L->getHeader());
       return Diag << "unrolled loop by a factor of "
-                  << NV("UnrollCount", Count);
+                  << NV("UnrollCount", ULO.Count);
     };
 
     LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by "
-                      << Count);
-    if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
+                      << ULO.Count);
+    if (ULO.TripMultiple == 0 || BreakoutTrip != ULO.TripMultiple) {
       LLVM_DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
       if (ORE)
         ORE->emit([&]() {
           return DiagBuilder() << " with a breakout at trip "
                                << NV("BreakoutTrip", BreakoutTrip);
         });
-    } else if (TripMultiple != 1) {
-      LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+    } else if (ULO.TripMultiple != 1) {
+      LLVM_DEBUG(dbgs() << " with " << ULO.TripMultiple << " trips per branch");
       if (ORE)
         ORE->emit([&]() {
-          return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple)
-                               << " trips per branch";
+          return DiagBuilder()
+                 << " with " << NV("TripMultiple", ULO.TripMultiple)
+                 << " trips per branch";
         });
     } else if (RuntimeTripCount) {
       LLVM_DEBUG(dbgs() << " with run-time trip count");
@@ -555,11 +520,24 @@ LoopUnrollResult llvm::UnrollLoop(
   // and if something changes inside them then any of outer loops may also
   // change. When we forget outermost loop, we also forget all contained loops
   // and this is what we need here.
-  if (SE)
-    SE->forgetTopmostLoop(L);
+  if (SE) {
+    if (ULO.ForgetAllSCEV)
+      SE->forgetAllLoops();
+    else
+      SE->forgetTopmostLoop(L);
+  }
 
-  bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
-  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
+  bool ContinueOnTrue;
+  bool LatchIsExiting = BI->isConditional();
+  BasicBlock *LoopExit = nullptr;
+  if (LatchIsExiting) {
+    ContinueOnTrue = L->contains(BI->getSuccessor(0));
+    LoopExit = BI->getSuccessor(ContinueOnTrue);
+  } else {
+    NumUnrolledWithHeader++;
+    ContinueOnTrue = L->contains(HeaderBI->getSuccessor(0));
+    LoopExit = HeaderBI->getSuccessor(ContinueOnTrue);
+  }
 
   // For the first iteration of the loop, we should use the precloned values for
   // PHI nodes.  Insert associations now.
@@ -569,11 +547,23 @@ LoopUnrollResult llvm::UnrollLoop(
     OrigPHINode.push_back(cast<PHINode>(I));
   }
 
-  std::vector<BasicBlock*> Headers;
-  std::vector<BasicBlock*> Latches;
+  std::vector<BasicBlock *> Headers;
+  std::vector<BasicBlock *> HeaderSucc;
+  std::vector<BasicBlock *> Latches;
   Headers.push_back(Header);
   Latches.push_back(LatchBlock);
 
+  if (!LatchIsExiting) {
+    auto *Term = cast<BranchInst>(Header->getTerminator());
+    if (Term->isUnconditional() || L->contains(Term->getSuccessor(0))) {
+      assert(L->contains(Term->getSuccessor(0)));
+      HeaderSucc.push_back(Term->getSuccessor(0));
+    } else {
+      assert(L->contains(Term->getSuccessor(1)));
+      HeaderSucc.push_back(Term->getSuccessor(1));
+    }
+  }
+
   // The current on-the-fly SSA update requires blocks to be processed in
   // reverse postorder so that LastValueMap contains the correct value at each
   // exit.
@@ -599,7 +589,7 @@ LoopUnrollResult llvm::UnrollLoop(
       for (Instruction &I : *BB)
         if (!isa<DbgInfoIntrinsic>(&I))
           if (const DILocation *DIL = I.getDebugLoc()) {
-            auto NewDIL = DIL->cloneWithDuplicationFactor(Count);
+            auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(ULO.Count);
             if (NewDIL)
               I.setDebugLoc(NewDIL.getValue());
             else
@@ -608,7 +598,7 @@ LoopUnrollResult llvm::UnrollLoop(
                          << DIL->getFilename() << " Line: " << DIL->getLine());
           }
 
-  for (unsigned It = 1; It != Count; ++It) {
+  for (unsigned It = 1; It != ULO.Count; ++It) {
     std::vector<BasicBlock*> NewBlocks;
     SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
     NewLoops[L] = L;
@@ -663,6 +653,13 @@ LoopUnrollResult llvm::UnrollLoop(
       if (*BB == LatchBlock)
         Latches.push_back(New);
 
+      // Keep track of the successor of the new header in the current iteration.
+      for (auto *Pred : predecessors(*BB))
+        if (Pred == Header) {
+          HeaderSucc.push_back(New);
+          break;
+        }
+
       NewBlocks.push_back(New);
       UnrolledLoopBlocks.push_back(New);
 
@@ -699,8 +696,7 @@ LoopUnrollResult llvm::UnrollLoop(
     if (CompletelyUnroll) {
       PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
       Header->getInstList().erase(PN);
-    }
-    else if (Count > 1) {
+    } else if (ULO.Count > 1) {
       Value *InVal = PN->removeIncomingValue(LatchBlock, false);
       // If this value was defined in the loop, take the value defined by the
       // last iteration of the loop.
@@ -713,39 +709,11 @@ LoopUnrollResult llvm::UnrollLoop(
     }
   }
 
-  // Now that all the basic blocks for the unrolled iterations are in place,
-  // set up the branches to connect them.
-  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
-    // The original branch was replicated in each unrolled iteration.
-    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
-
-    // The branch destination.
-    unsigned j = (i + 1) % e;
-    BasicBlock *Dest = Headers[j];
-    bool NeedConditional = true;
-
-    if (RuntimeTripCount && j != 0) {
-      NeedConditional = false;
-    }
-
-    // For a complete unroll, make the last iteration end with a branch
-    // to the exit block.
-    if (CompletelyUnroll) {
-      if (j == 0)
-        Dest = LoopExit;
-      // If using trip count upper bound to completely unroll, we need to keep
-      // the conditional branch except the last one because the loop may exit
-      // after any iteration.
-      assert(NeedConditional &&
-             "NeedCondition cannot be modified by both complete "
-             "unrolling and runtime unrolling");
-      NeedConditional = (PreserveCondBr && j && !(PreserveOnlyFirst && i != 0));
-    } else if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
-      // If we know the trip count or a multiple of it, we can safely use an
-      // unconditional branch for some iterations.
-      NeedConditional = false;
-    }
-
+  auto setDest = [LoopExit, ContinueOnTrue](BasicBlock *Src, BasicBlock *Dest,
+                                            ArrayRef<BasicBlock *> NextBlocks,
+                                            BasicBlock *CurrentHeader,
+                                            bool NeedConditional) {
+    auto *Term = cast<BranchInst>(Src->getTerminator());
     if (NeedConditional) {
       // Update the conditional branch's successor for the following
       // iteration.
@@ -753,9 +721,9 @@ LoopUnrollResult llvm::UnrollLoop(
     } else {
       // Remove phi operands at this loop exit
       if (Dest != LoopExit) {
-        BasicBlock *BB = Latches[i];
-        for (BasicBlock *Succ: successors(BB)) {
-          if (Succ == Headers[i])
+        BasicBlock *BB = Src;
+        for (BasicBlock *Succ : successors(BB)) {
+          if (Succ == CurrentHeader)
             continue;
           for (PHINode &Phi : Succ->phis())
             Phi.removeIncomingValue(BB, false);
@@ -765,13 +733,97 @@ LoopUnrollResult llvm::UnrollLoop(
       BranchInst::Create(Dest, Term);
       Term->eraseFromParent();
     }
+  };
+
+  // Now that all the basic blocks for the unrolled iterations are in place,
+  // set up the branches to connect them.
+  if (LatchIsExiting) {
+    // Set up latches to branch to the new header in the unrolled iterations or
+    // the loop exit for the last latch in a fully unrolled loop.
+    for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+      // The branch destination.
+      unsigned j = (i + 1) % e;
+      BasicBlock *Dest = Headers[j];
+      bool NeedConditional = true;
+
+      if (RuntimeTripCount && j != 0) {
+        NeedConditional = false;
+      }
+
+      // For a complete unroll, make the last iteration end with a branch
+      // to the exit block.
+      if (CompletelyUnroll) {
+        if (j == 0)
+          Dest = LoopExit;
+        // If using trip count upper bound to completely unroll, we need to keep
+        // the conditional branch except the last one because the loop may exit
+        // after any iteration.
+        assert(NeedConditional &&
+               "NeedCondition cannot be modified by both complete "
+               "unrolling and runtime unrolling");
+        NeedConditional =
+            (ULO.PreserveCondBr && j && !(ULO.PreserveOnlyFirst && i != 0));
+      } else if (j != BreakoutTrip &&
+                 (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) {
+        // If we know the trip count or a multiple of it, we can safely use an
+        // unconditional branch for some iterations.
+        NeedConditional = false;
+      }
+
+      setDest(Latches[i], Dest, Headers, Headers[i], NeedConditional);
+    }
+  } else {
+    // Setup headers to branch to their new successors in the unrolled
+    // iterations.
+    for (unsigned i = 0, e = Headers.size(); i != e; ++i) {
+      // The branch destination.
+      unsigned j = (i + 1) % e;
+      BasicBlock *Dest = HeaderSucc[i];
+      bool NeedConditional = true;
+
+      if (RuntimeTripCount && j != 0)
+        NeedConditional = false;
+
+      if (CompletelyUnroll)
+        // We cannot drop the conditional branch for the last condition, as we
+        // may have to execute the loop body depending on the condition.
+        NeedConditional = j == 0 || ULO.PreserveCondBr;
+      else if (j != BreakoutTrip &&
+               (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0))
+        // If we know the trip count or a multiple of it, we can safely use an
+        // unconditional branch for some iterations.
+        NeedConditional = false;
+
+      setDest(Headers[i], Dest, Headers, Headers[i], NeedConditional);
+    }
+
+    // Set up latches to branch to the new header in the unrolled iterations or
+    // the loop exit for the last latch in a fully unrolled loop.
+
+    for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+      // The original branch was replicated in each unrolled iteration.
+      BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+
+      // The branch destination.
+      unsigned j = (i + 1) % e;
+      BasicBlock *Dest = Headers[j];
+
+      // When completely unrolling, the last latch becomes unreachable.
+      if (CompletelyUnroll && j == 0)
+        new UnreachableInst(Term->getContext(), Term);
+      else
+        // Replace the conditional branch with an unconditional one.
+        BranchInst::Create(Dest, Term);
+
+      Term->eraseFromParent();
+    }
   }
 
   // Update dominators of blocks we might reach through exits.
   // Immediate dominator of such block might change, because we add more
   // routes which can lead to the exit: we can now reach it from the copied
   // iterations too.
-  if (DT && Count > 1) {
+  if (DT && ULO.Count > 1) {
     for (auto *BB : OriginalLoopBlocks) {
       auto *BBDomNode = DT->getNode(BB);
       SmallVector<BasicBlock *, 16> ChildrenToUpdate;
@@ -781,7 +833,9 @@ LoopUnrollResult llvm::UnrollLoop(
           ChildrenToUpdate.push_back(ChildBB);
       }
       BasicBlock *NewIDom;
-      if (BB == LatchBlock) {
+      BasicBlock *&TermBlock = LatchIsExiting ? LatchBlock : Header;
+      auto &TermBlocks = LatchIsExiting ? Latches : Headers;
+      if (BB == TermBlock) {
         // The latch is special because we emit unconditional branches in
         // some cases where the original loop contained a conditional branch.
         // Since the latch is always at the bottom of the loop, if the latch
@@ -789,11 +843,13 @@ LoopUnrollResult llvm::UnrollLoop(
         // must also be a latch.  Specifically, the dominator is the first
         // latch which ends in a conditional branch, or the last latch if
         // there is no such latch.
-        NewIDom = Latches.back();
-        for (BasicBlock *IterLatch : Latches) {
-          Instruction *Term = IterLatch->getTerminator();
+        // For loops exiting from the header, we limit the supported loops
+        // to have a single exiting block.
+        NewIDom = TermBlocks.back();
+        for (BasicBlock *Iter : TermBlocks) {
+          Instruction *Term = Iter->getTerminator();
           if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
-            NewIDom = IterLatch;
+            NewIDom = Iter;
             break;
           }
         }
@@ -810,14 +866,20 @@ LoopUnrollResult llvm::UnrollLoop(
   }
 
   assert(!DT || !UnrollVerifyDomtree ||
-      DT->verify(DominatorTree::VerificationLevel::Fast));
+         DT->verify(DominatorTree::VerificationLevel::Fast));
 
+  DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
   // Merge adjacent basic blocks, if possible.
   for (BasicBlock *Latch : Latches) {
-    BranchInst *Term = cast<BranchInst>(Latch->getTerminator());
-    if (Term->isUnconditional()) {
+    BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator());
+    assert((Term ||
+            (CompletelyUnroll && !LatchIsExiting && Latch == Latches.back())) &&
+           "Need a branch as terminator, except when fully unrolling with "
+           "unconditional latch");
+    if (Term && Term->isUnconditional()) {
       BasicBlock *Dest = Term->getSuccessor(0);
-      if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) {
+      BasicBlock *Fold = Dest->getUniquePredecessor();
+      if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {
         // Dest has been folded into Fold. Update our worklists accordingly.
         std::replace(Latches.begin(), Latches.end(), Dest, Fold);
         UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(),
@@ -829,8 +891,8 @@ LoopUnrollResult llvm::UnrollLoop(
 
   // At this point, the code is well formed.  We now simplify the unrolled loop,
   // doing constant propagation and dead code elimination as we go.
-  simplifyLoopAfterUnroll(L, !CompletelyUnroll && (Count > 1 || Peeled), LI, SE,
-                          DT, AC);
+  simplifyLoopAfterUnroll(L, !CompletelyUnroll && (ULO.Count > 1 || Peeled), LI,
+                          SE, DT, AC);
 
   NumCompletelyUnrolled += CompletelyUnroll;
   ++NumUnrolled;
@@ -878,11 +940,11 @@ LoopUnrollResult llvm::UnrollLoop(
 
       // TODO: That potentially might be compile-time expensive. We should try
       // to fix the loop-simplified form incrementally.
-      simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA);
+      simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA);
     } else {
       // Simplify loops for which we might've broken loop-simplify form.
       for (Loop *SubLoop : LoopsToSimplify)
-        simplifyLoop(SubLoop, DT, LI, SE, AC, PreserveLCSSA);
+        simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA);
     }
   }
 
diff --git a/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index e26762639c13..ff49d83f25c5 100644
--- a/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -1,9 +1,8 @@
 //===-- LoopUnrollAndJam.cpp - Loop unrolling utilities -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -198,8 +197,8 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
   if (TripMultiple == 1 || TripMultiple % Count != 0) {
     if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
                                     /*UseEpilogRemainder*/ true,
-                                    UnrollRemainder, LI, SE, DT, AC, true,
-                                    EpilogueLoop)) {
+                                    UnrollRemainder, /*ForgetAllSCEV*/ false,
+                                    LI, SE, DT, AC, true, EpilogueLoop)) {
       LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
                            "generated when assuming runtime trip count\n");
       return LoopUnrollResult::Unmodified;
@@ -301,7 +300,7 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
       for (Instruction &I : *BB)
         if (!isa<DbgInfoIntrinsic>(&I))
           if (const DILocation *DIL = I.getDebugLoc()) {
-            auto NewDIL = DIL->cloneWithDuplicationFactor(Count);
+            auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(Count);
             if (NewDIL)
               I.setDebugLoc(NewDIL.getValue());
             else
@@ -539,12 +538,14 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
   MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
   MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
   MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end());
+  DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
   while (!MergeBlocks.empty()) {
     BasicBlock *BB = *MergeBlocks.begin();
     BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
     if (Term && Term->isUnconditional() && L->contains(Term->getSuccessor(0))) {
       BasicBlock *Dest = Term->getSuccessor(0);
-      if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) {
+      BasicBlock *Fold = Dest->getUniquePredecessor();
+      if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {
         // Don't remove BB and add Fold as they are the same BB
         assert(Fold == BB);
         (void)Fold;
diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 151a285af4e9..005306cf1898 100644
--- a/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -1,9 +1,8 @@
 //===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -62,6 +61,10 @@ static cl::opt<unsigned> UnrollForcePeelCount(
     "unroll-force-peel-count", cl::init(0), cl::Hidden,
     cl::desc("Force a peel count regardless of profiling information."));
 
+static cl::opt<bool> UnrollPeelMultiDeoptExit(
+    "unroll-peel-multi-deopt-exit", cl::init(false), cl::Hidden,
+    cl::desc("Allow peeling of loops with multiple deopt exits."));
+
 // Designates that a Phi is estimated to become invariant after an "infinite"
 // number of loop iterations (i.e. only may become an invariant if the loop is
 // fully unrolled).
@@ -74,6 +77,22 @@ bool llvm::canPeel(Loop *L) {
   if (!L->isLoopSimplifyForm())
     return false;
 
+  if (UnrollPeelMultiDeoptExit) {
+    SmallVector<BasicBlock *, 4> Exits;
+    L->getUniqueNonLatchExitBlocks(Exits);
+
+    if (!Exits.empty()) {
+      // Latch's terminator is a conditional branch, Latch is exiting and
+      // all non Latch exits ends up with deoptimize.
+      const BasicBlock *Latch = L->getLoopLatch();
+      const BranchInst *T = dyn_cast<BranchInst>(Latch->getTerminator());
+      return T && T->isConditional() && L->isLoopExiting(Latch) &&
+             all_of(Exits, [](const BasicBlock *BB) {
+               return BB->getTerminatingDeoptimizeCall();
+             });
+    }
+  }
+
   // Only peel loops that contain a single exit
   if (!L->getExitingBlock() || !L->getUniqueExitBlock())
     return false;
@@ -363,41 +382,89 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
 static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
                                 unsigned IterNumber, unsigned AvgIters,
                                 uint64_t &PeeledHeaderWeight) {
+  if (!PeeledHeaderWeight)
+    return;
   // FIXME: Pick a more realistic distribution.
   // Currently the proportion of weight we assign to the fall-through
   // side of the branch drops linearly with the iteration number, and we use
   // a 0.9 fudge factor to make the drop-off less sharp...
-  if (PeeledHeaderWeight) {
-    uint64_t FallThruWeight =
-        PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9);
-    uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight;
-    PeeledHeaderWeight -= ExitWeight;
-
-    unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
-    MDBuilder MDB(LatchBR->getContext());
-    MDNode *WeightNode =
-        HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)
-                  : MDB.createBranchWeights(FallThruWeight, ExitWeight);
-    LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
-  }
+  uint64_t FallThruWeight =
+      PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9);
+  uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight;
+  PeeledHeaderWeight -= ExitWeight;
+
+  unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
+  MDBuilder MDB(LatchBR->getContext());
+  MDNode *WeightNode =
+      HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)
+                : MDB.createBranchWeights(FallThruWeight, ExitWeight);
+  LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
+}
+
+/// Initialize the weights.
+///
+/// \param Header The header block.
+/// \param LatchBR The latch branch.
+/// \param AvgIters The average number of iterations we expect the loop to have.
+/// \param[out] ExitWeight The # of times the edge from Latch to Exit is taken.
+/// \param[out] CurHeaderWeight The # of times the header is executed.
+static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
+                              unsigned AvgIters, uint64_t &ExitWeight,
+                              uint64_t &CurHeaderWeight) {
+  uint64_t TrueWeight, FalseWeight;
+  if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight))
+    return;
+  unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
+  ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
+  // The # of times the loop body executes is the sum of the exit block
+  // is taken and the # of times the backedges are taken.
+  CurHeaderWeight = TrueWeight + FalseWeight;
+}
+
+/// Update the weights of original Latch block after peeling off all iterations.
+///
+/// \param Header The header block.
+/// \param LatchBR The latch branch.
+/// \param ExitWeight The weight of the edge from Latch to Exit block.
+/// \param CurHeaderWeight The # of time the header is executed.
+static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
+                               uint64_t ExitWeight, uint64_t CurHeaderWeight) {
+  // Adjust the branch weights on the loop exit.
+  if (!ExitWeight)
+    return;
+
+  // The backedge count is the difference of current header weight and
+  // current loop exit weight. If the current header weight is smaller than
+  // the current loop exit weight, we mark the loop backedge weight as 1.
+  uint64_t BackEdgeWeight = 0;
+  if (ExitWeight < CurHeaderWeight)
+    BackEdgeWeight = CurHeaderWeight - ExitWeight;
+  else
+    BackEdgeWeight = 1;
+  MDBuilder MDB(LatchBR->getContext());
+  unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
+  MDNode *WeightNode =
+      HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
+                : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
+  LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
 }
 
 /// Clones the body of the loop L, putting it between \p InsertTop and \p
 /// InsertBot.
 /// \param IterNumber The serial number of the iteration currently being
 /// peeled off.
-/// \param Exit The exit block of the original loop.
+/// \param ExitEdges The exit edges of the original loop.
 /// \param[out] NewBlocks A list of the blocks in the newly created clone
 /// \param[out] VMap The value map between the loop and the new clone.
 /// \param LoopBlocks A helper for DFS-traversal of the loop.
 /// \param LVMap A value-map that maps instructions from the original loop to
 /// instructions in the last peeled-off iteration.
-static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
-                            BasicBlock *InsertBot, BasicBlock *Exit,
-                            SmallVectorImpl<BasicBlock *> &NewBlocks,
-                            LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
-                            ValueToValueMapTy &LVMap, DominatorTree *DT,
-                            LoopInfo *LI) {
+static void cloneLoopBlocks(
+    Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
+    SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *> > &ExitEdges,
+    SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
+    ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
+    LoopInfo *LI) {
   BasicBlock *Header = L->getHeader();
   BasicBlock *Latch = L->getLoopLatch();
   BasicBlock *PreHeader = L->getLoopPreheader();
@@ -443,9 +510,11 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
   // iteration (for every other iteration)
   BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
   BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator());
-  unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
-  LatchBR->setSuccessor(HeaderIdx, InsertBot);
-  LatchBR->setSuccessor(1 - HeaderIdx, Exit);
+  for (unsigned idx = 0, e = LatchBR->getNumSuccessors(); idx < e; ++idx)
+    if (LatchBR->getSuccessor(idx) == Header) {
+      LatchBR->setSuccessor(idx, InsertBot);
+      break;
+    }
   if (DT)
     DT->changeImmediateDominator(InsertBot, NewLatch);
 
@@ -476,14 +545,14 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
   // we've just created. Note that this must happen *after* the incoming
   // values are adjusted, since the value going out of the latch may also be
   // a value coming into the header.
-  for (BasicBlock::iterator I = Exit->begin(); isa<PHINode>(I); ++I) {
-    PHINode *PHI = cast<PHINode>(I);
-    Value *LatchVal = PHI->getIncomingValueForBlock(Latch);
-    Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
-    if (LatchInst && L->contains(LatchInst))
-      LatchVal = VMap[LatchVal];
-    PHI->addIncoming(LatchVal, cast<BasicBlock>(VMap[Latch]));
-  }
+  for (auto Edge : ExitEdges)
+    for (PHINode &PHI : Edge.second->phis()) {
+      Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
+      Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
+      if (LatchInst && L->contains(LatchInst))
+        LatchVal = VMap[LatchVal];
+      PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
+    }
 
   // LastValueMap is updated with the values for the current loop
   // which are used the next time this function is called.
@@ -512,7 +581,20 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
   BasicBlock *Header = L->getHeader();
   BasicBlock *PreHeader = L->getLoopPreheader();
   BasicBlock *Latch = L->getLoopLatch();
-  BasicBlock *Exit = L->getUniqueExitBlock();
+  SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges;
+  L->getExitEdges(ExitEdges);
+
+  DenseMap<BasicBlock *, BasicBlock *> ExitIDom;
+  if (DT) {
+    assert(L->hasDedicatedExits() && "No dedicated exits?");
+    for (auto Edge : ExitEdges) {
+      if (ExitIDom.count(Edge.second))
+        continue;
+      BasicBlock *BB = DT->getNode(Edge.second)->getIDom()->getBlock();
+      assert(L->contains(BB) && "IDom is not in a loop");
+      ExitIDom[Edge.second] = BB;
+    }
+  }
 
   Function *F = Header->getParent();
 
@@ -577,16 +659,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
   // newly created branches.
   BranchInst *LatchBR =
       cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
-  unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
-
-  uint64_t TrueWeight, FalseWeight;
   uint64_t ExitWeight = 0, CurHeaderWeight = 0;
-  if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
-    ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
-    // The # of times the loop body executes is the sum of the exit block
-    // weight and the # of times the backedges are taken.
-    CurHeaderWeight = TrueWeight + FalseWeight;
-  }
+  initBranchWeights(Header, LatchBR, PeelCount, ExitWeight, CurHeaderWeight);
 
   // For each peeled-off iteration, make a copy of the loop.
   for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
@@ -602,8 +676,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
     else
       CurHeaderWeight = 1;
 
-    cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit,
-                    NewBlocks, LoopBlocks, VMap, LVMap, DT, LI);
+    cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
+                    LoopBlocks, VMap, LVMap, DT, LI);
 
     // Remap to use values from the current iteration instead of the
     // previous one.
@@ -614,7 +688,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
       // latter is the first cloned loop body, as original PreHeader dominates
       // the original loop body.
       if (Iter == 0)
-        DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch]));
+        for (auto Exit : ExitIDom)
+          DT->changeImmediateDominator(Exit.first,
+                                       cast<BasicBlock>(LVMap[Exit.second]));
 #ifdef EXPENSIVE_CHECKS
       assert(DT->verify(DominatorTree::VerificationLevel::Fast));
 #endif
@@ -645,36 +721,22 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
     if (LatchInst && L->contains(LatchInst))
       NewVal = LVMap[LatchInst];
 
-    PHI->setIncomingValue(PHI->getBasicBlockIndex(NewPreHeader), NewVal);
+    PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
   }
 
-  // Adjust the branch weights on the loop exit.
-  if (ExitWeight) {
-    // The backedge count is the difference of current header weight and
-    // current loop exit weight. If the current header weight is smaller than
-    // the current loop exit weight, we mark the loop backedge weight as 1.
-    uint64_t BackEdgeWeight = 0;
-    if (ExitWeight < CurHeaderWeight)
-      BackEdgeWeight = CurHeaderWeight - ExitWeight;
-    else
-      BackEdgeWeight = 1;
-    MDBuilder MDB(LatchBR->getContext());
-    MDNode *WeightNode =
-        HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
-                  : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
-    LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
-  }
+  fixupBranchWeights(Header, LatchBR, ExitWeight, CurHeaderWeight);
 
-  // If the loop is nested, we changed the parent loop, update SE.
-  if (Loop *ParentLoop = L->getParentLoop()) {
-    SE->forgetLoop(ParentLoop);
+  if (Loop *ParentLoop = L->getParentLoop())
+    L = ParentLoop;
 
-    // FIXME: Incrementally update loop-simplify
-    simplifyLoop(ParentLoop, DT, LI, SE, AC, PreserveLCSSA);
-  } else {
-    // FIXME: Incrementally update loop-simplify
-    simplifyLoop(L, DT, LI, SE, AC, PreserveLCSSA);
-  }
+  // We modified the loop, update SE.
+  SE->forgetTopmostLoop(L);
+
+  // Finally DomtTree must be correct.
+  assert(DT->verify(DominatorTree::VerificationLevel::Fast));
+
+  // FIXME: Incrementally update loop-simplify
+  simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA);
 
   NumPeeled++;
 
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 00d2fd2fdbac..d22fdb4d52dc 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -1,9 +1,8 @@
 //===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -125,11 +124,10 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
       // Update the existing PHI node operand with the value from the
       // new PHI node.  How this is done depends on if the existing
       // PHI node is in the original loop block, or the exit block.
-      if (L->contains(&PN)) {
-        PN.setIncomingValue(PN.getBasicBlockIndex(NewPreHeader), NewPN);
-      } else {
+      if (L->contains(&PN))
+        PN.setIncomingValueForBlock(NewPreHeader, NewPN);
+      else
         PN.addIncoming(NewPN, PrologExit);
-      }
     }
   }
 
@@ -265,7 +263,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
       // Update the existing PHI node operand with the value from the new PHI
       // node.  Corresponding instruction in epilog loop should be PHI.
       PHINode *VPN = cast<PHINode>(VMap[&PN]);
-      VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN);
+      VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN);
     }
   }
 
@@ -426,10 +424,9 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
 
 /// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits
 /// is populated with all the loop exit blocks other than the LatchExit block.
-static bool
-canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits,
-                             BasicBlock *LatchExit, bool PreserveLCSSA,
-                             bool UseEpilogRemainder) {
+static bool canSafelyUnrollMultiExitLoop(Loop *L, BasicBlock *LatchExit,
+                                         bool PreserveLCSSA,
+                                         bool UseEpilogRemainder) {
 
   // We currently have some correctness constrains in unrolling a multi-exit
   // loop. Check for these below.
@@ -437,11 +434,6 @@ canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits,
   // We rely on LCSSA form being preserved when the exit blocks are transformed.
   if (!PreserveLCSSA)
     return false;
-  SmallVector<BasicBlock *, 4> Exits;
-  L->getUniqueExitBlocks(Exits);
-  for (auto *BB : Exits)
-    if (BB != LatchExit)
-      OtherExits.push_back(BB);
 
   // TODO: Support multiple exiting blocks jumping to the `LatchExit` when
   // UnrollRuntimeMultiExit is true. This will need updating the logic in
@@ -471,9 +463,8 @@ static bool canProfitablyUnrollMultiExitLoop(
     bool PreserveLCSSA, bool UseEpilogRemainder) {
 
 #if !defined(NDEBUG)
-  SmallVector<BasicBlock *, 8> OtherExitsDummyCheck;
-  assert(canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck, LatchExit,
-                                      PreserveLCSSA, UseEpilogRemainder) &&
+  assert(canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
+                                      UseEpilogRemainder) &&
          "Should be safe to unroll before checking profitability!");
 #endif
 
@@ -554,10 +545,10 @@ static bool canProfitablyUnrollMultiExitLoop(
 bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                       bool AllowExpensiveTripCount,
                                       bool UseEpilogRemainder,
-                                      bool UnrollRemainder, LoopInfo *LI,
-                                      ScalarEvolution *SE, DominatorTree *DT,
-                                      AssumptionCache *AC, bool PreserveLCSSA,
-                                      Loop **ResultLoop) {
+                                      bool UnrollRemainder, bool ForgetAllSCEV,
+                                      LoopInfo *LI, ScalarEvolution *SE,
+                                      DominatorTree *DT, AssumptionCache *AC,
+                                      bool PreserveLCSSA, Loop **ResultLoop) {
   LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
   LLVM_DEBUG(L->dump());
   LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
@@ -597,8 +588,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
 
   // These are exit blocks other than the target of the latch exiting block.
   SmallVector<BasicBlock *, 4> OtherExits;
+  L->getUniqueNonLatchExitBlocks(OtherExits);
   bool isMultiExitUnrollingEnabled =
-      canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
+      canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
                                    UseEpilogRemainder) &&
       canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
                                        UseEpilogRemainder);
@@ -939,23 +931,24 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
   if (OtherExits.size() > 0) {
     // Generate dedicated exit blocks for the original loop, to preserve
     // LoopSimplifyForm.
-    formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA);
+    formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);
     // Generate dedicated exit blocks for the remainder loop if one exists, to
     // preserve LoopSimplifyForm.
     if (remainderLoop)
-      formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
+      formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA);
   }
 
   auto UnrollResult = LoopUnrollResult::Unmodified;
   if (remainderLoop && UnrollRemainder) {
     LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
     UnrollResult =
-        UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1,
-                   /*Force*/ false, /*AllowRuntime*/ false,
-                   /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
-                   /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
-                   /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC,
-                   /*ORE*/ nullptr, PreserveLCSSA);
+        UnrollLoop(remainderLoop,
+                   {/*Count*/ Count - 1, /*TripCount*/ Count - 1,
+                    /*Force*/ false, /*AllowRuntime*/ false,
+                    /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
+                    /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
+                    /*PeelCount*/ 0, /*UnrollRemainder*/ false, ForgetAllSCEV},
+                   LI, SE, DT, AC, /*ORE*/ nullptr, PreserveLCSSA);
   }
 
   if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index a93d1aeb62ef..ec226e65f650 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -1,9 +1,8 @@
 //===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -15,10 +14,12 @@
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
@@ -27,7 +28,6 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -47,6 +47,7 @@ using namespace llvm::PatternMatch;
 static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
 
 bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
+                                   MemorySSAUpdater *MSSAU,
                                    bool PreserveLCSSA) {
   bool Changed = false;
 
@@ -66,6 +67,9 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
         if (isa<IndirectBrInst>(PredBB->getTerminator()))
           // We cannot rewrite exiting edges from an indirectbr.
           return false;
+        if (isa<CallBrInst>(PredBB->getTerminator()))
+          // We cannot rewrite exiting edges from a callbr.
+          return false;
 
         InLoopPredecessors.push_back(PredBB);
       } else {
@@ -79,7 +83,7 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
       return false;
 
     auto *NewExitBB = SplitBlockPredecessors(
-        BB, InLoopPredecessors, ".loopexit", DT, LI, nullptr, PreserveLCSSA);
+        BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA);
 
     if (!NewExitBB)
       LLVM_DEBUG(
@@ -217,7 +221,10 @@ static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
     // When the value is absent it is interpreted as 'attribute set'.
     return true;
   case 2:
-    return mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get());
+    if (ConstantInt *IntMD =
+            mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get()))
+      return IntMD->getZExtValue();
+    return true;
   }
   llvm_unreachable("unexpected number of options");
 }
@@ -376,17 +383,17 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
   Optional<int> InterleaveCount =
       getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
 
-  if (Enable == true) {
-    // 'Forcing' vector width and interleave count to one effectively disables
-    // this tranformation.
-    if (VectorizeWidth == 1 && InterleaveCount == 1)
-      return TM_SuppressedByUser;
-    return TM_ForcedByUser;
-  }
+  // 'Forcing' vector width and interleave count to one effectively disables
+  // this tranformation.
+  if (Enable == true && VectorizeWidth == 1 && InterleaveCount == 1)
+    return TM_SuppressedByUser;
 
   if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
     return TM_Disable;
 
+  if (Enable == true)
+    return TM_ForcedByUser;
+
   if (VectorizeWidth == 1 && InterleaveCount == 1)
     return TM_Disable;
 
@@ -528,10 +535,9 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
   DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
   if (DT) {
     // Update the dominator tree by informing it about the new edge from the
-    // preheader to the exit.
-    DTU.insertEdge(Preheader, ExitBlock);
-    // Inform the dominator tree about the removed edge.
-    DTU.deleteEdge(Preheader, L->getHeader());
+    // preheader to the exit and the removed edge.
+    DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock},
+                      {DominatorTree::Delete, Preheader, L->getHeader()}});
   }
 
   // Use a map to unique and a vector to guarantee deterministic ordering.
@@ -578,10 +584,14 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
   // dbg.value truncates the range of any dbg.value before the loop where the
   // loop used to be. This is particularly important for constant values.
   DIBuilder DIB(*ExitBlock->getModule());
+  Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI();
+  assert(InsertDbgValueBefore &&
+         "There should be a non-PHI instruction in exit block, else these "
+         "instructions will have no parent.");
   for (auto *DVI : DeadDebugInst)
-    DIB.insertDbgValueIntrinsic(
-        UndefValue::get(Builder.getInt32Ty()), DVI->getVariable(),
-        DVI->getExpression(), DVI->getDebugLoc(), ExitBlock->getFirstNonPHI());
+    DIB.insertDbgValueIntrinsic(UndefValue::get(Builder.getInt32Ty()),
+                                DVI->getVariable(), DVI->getExpression(),
+                                DVI->getDebugLoc(), InsertDbgValueBefore);
 
   // Remove the block from the reference counting scheme, so that we can
   // delete it freely later.
@@ -611,20 +621,28 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
 }
 
 Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
-  // Only support loops with a unique exiting block, and a latch.
-  if (!L->getExitingBlock())
-    return None;
+  // Support loops with an exiting latch and other existing exists only
+  // deoptimize.
 
   // Get the branch weights for the loop's backedge.
-  BranchInst *LatchBR =
-      dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator());
-  if (!LatchBR || LatchBR->getNumSuccessors() != 2)
+  BasicBlock *Latch = L->getLoopLatch();
+  if (!Latch)
+    return None;
+  BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
+  if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
     return None;
 
   assert((LatchBR->getSuccessor(0) == L->getHeader() ||
           LatchBR->getSuccessor(1) == L->getHeader()) &&
          "At least one edge out of the latch must go to the header");
 
+  SmallVector<BasicBlock *, 4> ExitBlocks;
+  L->getUniqueNonLatchExitBlocks(ExitBlocks);
+  if (any_of(ExitBlocks, [](const BasicBlock *EB) {
+        return !EB->getTerminatingDeoptimizeCall();
+      }))
+    return None;
+
   // To estimate the number of times the loop body was executed, we want to
   // know the number of times the backedge was taken, vs. the number of times
   // we exited the loop.
@@ -665,16 +683,6 @@ bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
   return true;
 }
 
-/// Adds a 'fast' flag to floating point operations.
-static Value *addFastMathFlag(Value *V) {
-  if (isa<FPMathOperator>(V)) {
-    FastMathFlags Flags;
-    Flags.setFast();
-    cast<Instruction>(V)->setFastMathFlags(Flags);
-  }
-  return V;
-}
-
 Value *llvm::createMinMaxOp(IRBuilder<> &Builder,
                             RecurrenceDescriptor::MinMaxRecurrenceKind RK,
                             Value *Left, Value *Right) {
@@ -778,9 +786,9 @@ llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
         ConstantVector::get(ShuffleMask), "rdx.shuf");
 
     if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
-      // Floating point operations had to be 'fast' to enable the reduction.
-      TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op,
-                                                   TmpVec, Shuf, "bin.rdx"));
+      // The builder propagates its fast-math-flags setting.
+      TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
+                                   "bin.rdx");
     } else {
       assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
              "Invalid min/max");
@@ -801,13 +809,9 @@ Value *llvm::createSimpleTargetReduction(
     ArrayRef<Value *> RedOps) {
   assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
 
-  Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType());
   std::function<Value *()> BuildFunc;
   using RD = RecurrenceDescriptor;
   RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
-  // TODO: Support creating ordered reductions.
-  FastMathFlags FMFFast;
-  FMFFast.setFast();
 
   switch (Opcode) {
   case Instruction::Add:
@@ -827,15 +831,15 @@ Value *llvm::createSimpleTargetReduction(
     break;
   case Instruction::FAdd:
     BuildFunc = [&]() {
-      auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src);
-      cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
+      auto Rdx = Builder.CreateFAddReduce(
+          Constant::getNullValue(Src->getType()->getVectorElementType()), Src);
       return Rdx;
     };
     break;
   case Instruction::FMul:
     BuildFunc = [&]() {
-      auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src);
-      cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
+      Type *Ty = Src->getType()->getVectorElementType();
+      auto Rdx = Builder.CreateFMulReduce(ConstantFP::get(Ty, 1.0), Src);
       return Rdx;
     };
     break;
@@ -880,6 +884,12 @@ Value *llvm::createTargetReduction(IRBuilder<> &B,
   RD::RecurrenceKind RecKind = Desc.getRecurrenceKind();
   TargetTransformInfo::ReductionFlags Flags;
   Flags.NoNaN = NoNaN;
+
+  // All ops in the reduction inherit fast-math-flags from the recurrence
+  // descriptor.
+  IRBuilder<>::FastMathFlagGuard FMFGuard(B);
+  B.setFastMathFlags(Desc.getFastMathFlags());
+
   switch (RecKind) {
   case RD::RK_FloatAdd:
     return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags);
diff --git a/lib/Transforms/Utils/LoopVersioning.cpp b/lib/Transforms/Utils/LoopVersioning.cpp
index abbcd5f9e3b8..a9a480a4b7f9 100644
--- a/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/lib/Transforms/Utils/LoopVersioning.cpp
@@ -1,9 +1,8 @@
 //===- LoopVersioning.cpp - Utility to version a loop ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -281,8 +280,9 @@ public:
     bool Changed = false;
     for (Loop *L : Worklist) {
       const LoopAccessInfo &LAI = LAA->getInfo(L);
-      if (L->isLoopSimplifyForm() && (LAI.getNumRuntimePointerChecks() ||
-          !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) {
+      if (L->isLoopSimplifyForm() && !LAI.hasConvergentOp() &&
+          (LAI.getNumRuntimePointerChecks() ||
+           !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) {
         LoopVersioning LVer(LAI, L, LI, DT, SE);
         LVer.versionLoop();
         LVer.annotateLoopWithNoAlias();
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index c852d538b0d1..fe67e191dc62 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -1,9 +1,8 @@
 //===- LowerInvoke.cpp - Eliminate Invoke instructions --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -53,7 +52,8 @@ static bool runImpl(Function &F) {
       II->getOperandBundlesAsDefs(OpBundles);
       // Insert a normal call instruction...
       CallInst *NewCall =
-          CallInst::Create(II->getCalledValue(), CallArgs, OpBundles, "", II);
+          CallInst::Create(II->getFunctionType(), II->getCalledValue(),
+                           CallArgs, OpBundles, "", II);
       NewCall->takeName(II);
       NewCall->setCallingConv(II->getCallingConv());
       NewCall->setAttributes(II->getAttributes());
diff --git a/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 661b4fa5bcb7..0cc085dc366c 100644
--- a/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -1,9 +1,8 @@
 //===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -73,7 +72,7 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
     // Loop Body
     Value *SrcGEP =
         LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
-    Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+    Value *Load = LoopBuilder.CreateLoad(LoopOpType, SrcGEP, SrcIsVolatile);
     Value *DstGEP =
         LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
     LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
@@ -115,7 +114,7 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
                              : RBuilder.CreateBitCast(SrcAddr, SrcPtrType);
       Value *SrcGEP = RBuilder.CreateInBoundsGEP(
           OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex));
-      Value *Load = RBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+      Value *Load = RBuilder.CreateLoad(OpTy, SrcGEP, SrcIsVolatile);
 
       // Cast destination to operand type and store.
       PointerType *DstPtrType = PointerType::get(OpTy, DstAS);
@@ -182,7 +181,7 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
   LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB);
 
   Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
-  Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+  Value *Load = LoopBuilder.CreateLoad(LoopOpType, SrcGEP, SrcIsVolatile);
   Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
   LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
 
@@ -235,7 +234,7 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
     Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex);
     Value *SrcGEP =
         ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset);
-    Value *Load = ResBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+    Value *Load = ResBuilder.CreateLoad(Int8Type, SrcGEP, SrcIsVolatile);
     Value *DstGEP =
         ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset);
     ResBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
@@ -293,6 +292,8 @@ static void createMemMoveLoop(Instruction *InsertBefore,
   BasicBlock *OrigBB = InsertBefore->getParent();
   Function *F = OrigBB->getParent();
 
+  Type *EltTy = cast<PointerType>(SrcAddr->getType())->getElementType();
+
   // Create the a comparison of src and dst, based on which we jump to either
   // the forward-copy part of the function (if src >= dst) or the backwards-copy
   // part (if src < dst).
@@ -331,9 +332,10 @@ static void createMemMoveLoop(Instruction *InsertBefore,
   Value *IndexPtr = LoopBuilder.CreateSub(
       LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr");
   Value *Element = LoopBuilder.CreateLoad(
-      LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element");
-  LoopBuilder.CreateStore(Element,
-                          LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr));
+      EltTy, LoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, IndexPtr),
+      "element");
+  LoopBuilder.CreateStore(
+      Element, LoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, IndexPtr));
   LoopBuilder.CreateCondBr(
       LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)),
       ExitBB, LoopBB);
@@ -348,9 +350,10 @@ static void createMemMoveLoop(Instruction *InsertBefore,
   IRBuilder<> FwdLoopBuilder(FwdLoopBB);
   PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr");
   Value *FwdElement = FwdLoopBuilder.CreateLoad(
-      FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element");
+      EltTy, FwdLoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, FwdCopyPhi),
+      "element");
   FwdLoopBuilder.CreateStore(
-      FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi));
+      FwdElement, FwdLoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, FwdCopyPhi));
   Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd(
       FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment");
   FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen),
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index d019a44fc705..8256e3b5f5af 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -1,9 +1,8 @@
 //===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,8 +16,12 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
@@ -28,6 +31,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -58,9 +62,8 @@ static bool IsInRanges(const IntRange &R,
   // Find the first range whose High field is >= R.High,
   // then check if the Low field is <= R.Low. If so, we
   // have a Range that covers R.
-  auto I = std::lower_bound(
-      Ranges.begin(), Ranges.end(), R,
-      [](const IntRange &A, const IntRange &B) { return A.High < B.High; });
+  auto I = llvm::lower_bound(
+      Ranges, R, [](IntRange A, IntRange B) { return A.High < B.High; });
   return I != Ranges.end() && I->Low <= R.Low;
 }
 
@@ -78,6 +81,10 @@ namespace {
 
     bool runOnFunction(Function &F) override;
 
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<LazyValueInfoWrapperPass>();
+    }
+
     struct CaseRange {
       ConstantInt* Low;
       ConstantInt* High;
@@ -91,15 +98,18 @@ namespace {
     using CaseItr = std::vector<CaseRange>::iterator;
 
   private:
-    void processSwitchInst(SwitchInst *SI, SmallPtrSetImpl<BasicBlock*> &DeleteList);
+    void processSwitchInst(SwitchInst *SI,
+                           SmallPtrSetImpl<BasicBlock *> &DeleteList,
+                           AssumptionCache *AC, LazyValueInfo *LVI);
 
     BasicBlock *switchConvert(CaseItr Begin, CaseItr End,
                               ConstantInt *LowerBound, ConstantInt *UpperBound,
                               Value *Val, BasicBlock *Predecessor,
                               BasicBlock *OrigBlock, BasicBlock *Default,
                               const std::vector<IntRange> &UnreachableRanges);
-    BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, BasicBlock *OrigBlock,
-                             BasicBlock *Default);
+    BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val,
+                             ConstantInt *LowerBound, ConstantInt *UpperBound,
+                             BasicBlock *OrigBlock, BasicBlock *Default);
     unsigned Clusterify(CaseVector &Cases, SwitchInst *SI);
   };
 
@@ -121,8 +131,12 @@ char LowerSwitch::ID = 0;
 // Publicly exposed interface to pass...
 char &llvm::LowerSwitchID = LowerSwitch::ID;
 
-INITIALIZE_PASS(LowerSwitch, "lowerswitch",
-                "Lower SwitchInst's to branches", false, false)
+INITIALIZE_PASS_BEGIN(LowerSwitch, "lowerswitch",
+                      "Lower SwitchInst's to branches", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
+INITIALIZE_PASS_END(LowerSwitch, "lowerswitch",
+                    "Lower SwitchInst's to branches", false, false)
 
 // createLowerSwitchPass - Interface to this file...
 FunctionPass *llvm::createLowerSwitchPass() {
@@ -130,6 +144,17 @@ FunctionPass *llvm::createLowerSwitchPass() {
 }
 
 bool LowerSwitch::runOnFunction(Function &F) {
+  LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
+  auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>();
+  AssumptionCache *AC = ACT ? &ACT->getAssumptionCache(F) : nullptr;
+  // Prevent LazyValueInfo from using the DominatorTree as LowerSwitch does not
+  // preserve it and it becomes stale (when available) pretty much immediately.
+  // Currently the DominatorTree is only used by LowerSwitch indirectly via LVI
+  // and computeKnownBits to refine isValidAssumeForContext's results. Given
+  // that the latter can handle some of the simple cases w/o a DominatorTree,
+  // it's easier to refrain from using the tree than to keep it up to date.
+  LVI->disableDT();
+
   bool Changed = false;
   SmallPtrSet<BasicBlock*, 8> DeleteList;
 
@@ -143,11 +168,12 @@ bool LowerSwitch::runOnFunction(Function &F) {
 
     if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
       Changed = true;
-      processSwitchInst(SI, DeleteList);
+      processSwitchInst(SI, DeleteList, AC, LVI);
     }
   }
 
   for (BasicBlock* BB: DeleteList) {
+    LVI->eraseBlock(BB);
     DeleteDeadBlock(BB);
   }
 
@@ -160,10 +186,11 @@ static raw_ostream &operator<<(raw_ostream &O,
                                const LowerSwitch::CaseVector &C) {
   O << "[";
 
-  for (LowerSwitch::CaseVector::const_iterator B = C.begin(),
-         E = C.end(); B != E; ) {
-    O << *B->Low << " -" << *B->High;
-    if (++B != E) O << ", ";
+  for (LowerSwitch::CaseVector::const_iterator B = C.begin(), E = C.end();
+       B != E;) {
+    O << "[" << B->Low->getValue() << ", " << B->High->getValue() << "]";
+    if (++B != E)
+      O << ", ";
   }
 
   return O << "]";
@@ -179,8 +206,9 @@ static raw_ostream &operator<<(raw_ostream &O,
 /// 2) Removed if subsequent incoming values now share the same case, i.e.,
 /// multiple outcome edges are condensed into one. This is necessary to keep the
 /// number of phi values equal to the number of branches to SuccBB.
-static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
-                    unsigned NumMergedCases) {
+static void
+fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
+        const unsigned NumMergedCases = std::numeric_limits<unsigned>::max()) {
   for (BasicBlock::iterator I = SuccBB->begin(),
                             IE = SuccBB->getFirstNonPHI()->getIterator();
        I != IE; ++I) {
@@ -222,6 +250,7 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
                            BasicBlock *Predecessor, BasicBlock *OrigBlock,
                            BasicBlock *Default,
                            const std::vector<IntRange> &UnreachableRanges) {
+  assert(LowerBound && UpperBound && "Bounds must be initialized");
   unsigned Size = End - Begin;
 
   if (Size == 1) {
@@ -231,13 +260,12 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
     // because the bounds already tell us so.
     if (Begin->Low == LowerBound && Begin->High == UpperBound) {
       unsigned NumMergedCases = 0;
-      if (LowerBound && UpperBound)
-        NumMergedCases =
-            UpperBound->getSExtValue() - LowerBound->getSExtValue();
+      NumMergedCases = UpperBound->getSExtValue() - LowerBound->getSExtValue();
       fixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases);
       return Begin->BB;
     }
-    return newLeafBlock(*Begin, Val, OrigBlock, Default);
+    return newLeafBlock(*Begin, Val, LowerBound, UpperBound, OrigBlock,
+                        Default);
   }
 
   unsigned Mid = Size / 2;
@@ -247,8 +275,8 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
   LLVM_DEBUG(dbgs() << "RHS: " << RHS << "\n");
 
   CaseRange &Pivot = *(Begin + Mid);
-  LLVM_DEBUG(dbgs() << "Pivot ==> " << Pivot.Low->getValue() << " -"
-                    << Pivot.High->getValue() << "\n");
+  LLVM_DEBUG(dbgs() << "Pivot ==> [" << Pivot.Low->getValue() << ", "
+                    << Pivot.High->getValue() << "]\n");
 
   // NewLowerBound here should never be the integer minimal value.
   // This is because it is computed from a case range that is never
@@ -270,14 +298,10 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
       NewUpperBound = LHS.back().High;
   }
 
-  LLVM_DEBUG(dbgs() << "LHS Bounds ==> "; if (LowerBound) {
-    dbgs() << LowerBound->getSExtValue();
-  } else { dbgs() << "NONE"; } dbgs() << " - "
-                                      << NewUpperBound->getSExtValue() << "\n";
-             dbgs() << "RHS Bounds ==> ";
-             dbgs() << NewLowerBound->getSExtValue() << " - "; if (UpperBound) {
-               dbgs() << UpperBound->getSExtValue() << "\n";
-             } else { dbgs() << "NONE\n"; });
+  LLVM_DEBUG(dbgs() << "LHS Bounds ==> [" << LowerBound->getSExtValue() << ", "
+                    << NewUpperBound->getSExtValue() << "]\n"
+                    << "RHS Bounds ==> [" << NewLowerBound->getSExtValue()
+                    << ", " << UpperBound->getSExtValue() << "]\n");
 
   // Create a new node that checks if the value is < pivot. Go to the
   // left branch if it is and right branch if not.
@@ -305,9 +329,11 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
 /// switch's value == the case's value. If not, then it jumps to the default
 /// branch. At this point in the tree, the value can't be another valid case
 /// value, so the jump to the "default" branch is warranted.
-BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
-                                      BasicBlock* OrigBlock,
-                                      BasicBlock* Default) {
+BasicBlock *LowerSwitch::newLeafBlock(CaseRange &Leaf, Value *Val,
+                                      ConstantInt *LowerBound,
+                                      ConstantInt *UpperBound,
+                                      BasicBlock *OrigBlock,
+                                      BasicBlock *Default) {
   Function* F = OrigBlock->getParent();
   BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
   F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf);
@@ -320,10 +346,14 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
                         Leaf.Low, "SwitchLeaf");
   } else {
     // Make range comparison
-    if (Leaf.Low->isMinValue(true /*isSigned*/)) {
+    if (Leaf.Low == LowerBound) {
       // Val >= Min && Val <= Hi --> Val <= Hi
       Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
                           "SwitchLeaf");
+    } else if (Leaf.High == UpperBound) {
+      // Val <= Max && Val >= Lo --> Val >= Lo
+      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SGE, Val, Leaf.Low,
+                          "SwitchLeaf");
     } else if (Leaf.Low->isZero()) {
       // Val >= 0 && Val <= Hi --> Val <=u Hi
       Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
@@ -363,14 +393,20 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
   return NewLeaf;
 }
 
-/// Transform simple list of Cases into list of CaseRange's.
+/// Transform simple list of \p SI's cases into list of CaseRange's \p Cases.
+/// \post \p Cases wouldn't contain references to \p SI's default BB.
+/// \returns Number of \p SI's cases that do not reference \p SI's default BB.
 unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
-  unsigned numCmps = 0;
+  unsigned NumSimpleCases = 0;
 
   // Start with "simple" cases
-  for (auto Case : SI->cases())
+  for (auto Case : SI->cases()) {
+    if (Case.getCaseSuccessor() == SI->getDefaultDest())
+      continue;
     Cases.push_back(CaseRange(Case.getCaseValue(), Case.getCaseValue(),
                               Case.getCaseSuccessor()));
+    ++NumSimpleCases;
+  }
 
   llvm::sort(Cases, CaseCmp());
 
@@ -396,60 +432,88 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
     Cases.erase(std::next(I), Cases.end());
   }
 
-  for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
-    if (I->Low != I->High)
-      // A range counts double, since it requires two compares.
-      ++numCmps;
-  }
-
-  return numCmps;
+  return NumSimpleCases;
 }
 
 /// Replace the specified switch instruction with a sequence of chained if-then
 /// insts in a balanced binary search.
 void LowerSwitch::processSwitchInst(SwitchInst *SI,
-                                    SmallPtrSetImpl<BasicBlock*> &DeleteList) {
-  BasicBlock *CurBlock = SI->getParent();
-  BasicBlock *OrigBlock = CurBlock;
-  Function *F = CurBlock->getParent();
+                                    SmallPtrSetImpl<BasicBlock *> &DeleteList,
+                                    AssumptionCache *AC, LazyValueInfo *LVI) {
+  BasicBlock *OrigBlock = SI->getParent();
+  Function *F = OrigBlock->getParent();
   Value *Val = SI->getCondition();  // The value we are switching on...
   BasicBlock* Default = SI->getDefaultDest();
 
   // Don't handle unreachable blocks. If there are successors with phis, this
   // would leave them behind with missing predecessors.
-  if ((CurBlock != &F->getEntryBlock() && pred_empty(CurBlock)) ||
-      CurBlock->getSinglePredecessor() == CurBlock) {
-    DeleteList.insert(CurBlock);
+  if ((OrigBlock != &F->getEntryBlock() && pred_empty(OrigBlock)) ||
+      OrigBlock->getSinglePredecessor() == OrigBlock) {
+    DeleteList.insert(OrigBlock);
     return;
   }
 
+  // Prepare cases vector.
+  CaseVector Cases;
+  const unsigned NumSimpleCases = Clusterify(Cases, SI);
+  LLVM_DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+                    << ". Total non-default cases: " << NumSimpleCases
+                    << "\nCase clusters: " << Cases << "\n");
+
   // If there is only the default destination, just branch.
-  if (!SI->getNumCases()) {
-    BranchInst::Create(Default, CurBlock);
+  if (Cases.empty()) {
+    BranchInst::Create(Default, OrigBlock);
+    // Remove all the references from Default's PHIs to OrigBlock, but one.
+    fixPhis(Default, OrigBlock, OrigBlock);
     SI->eraseFromParent();
     return;
   }
 
-  // Prepare cases vector.
-  CaseVector Cases;
-  unsigned numCmps = Clusterify(Cases, SI);
-  LLVM_DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
-                    << ". Total compares: " << numCmps << "\n");
-  LLVM_DEBUG(dbgs() << "Cases: " << Cases << "\n");
-  (void)numCmps;
-
   ConstantInt *LowerBound = nullptr;
   ConstantInt *UpperBound = nullptr;
-  std::vector<IntRange> UnreachableRanges;
+  bool DefaultIsUnreachableFromSwitch = false;
 
   if (isa<UnreachableInst>(Default->getFirstNonPHIOrDbg())) {
     // Make the bounds tightly fitted around the case value range, because we
     // know that the value passed to the switch must be exactly one of the case
     // values.
-    assert(!Cases.empty());
     LowerBound = Cases.front().Low;
     UpperBound = Cases.back().High;
+    DefaultIsUnreachableFromSwitch = true;
+  } else {
+    // Constraining the range of the value being switched over helps eliminating
+    // unreachable BBs and minimizing the number of `add` instructions
+    // newLeafBlock ends up emitting. Running CorrelatedValuePropagation after
+    // LowerSwitch isn't as good, and also much more expensive in terms of
+    // compile time for the following reasons:
+    // 1. it processes many kinds of instructions, not just switches;
+    // 2. even if limited to icmp instructions only, it will have to process
+    //    roughly C icmp's per switch, where C is the number of cases in the
+    //    switch, while LowerSwitch only needs to call LVI once per switch.
+    const DataLayout &DL = F->getParent()->getDataLayout();
+    KnownBits Known = computeKnownBits(Val, DL, /*Depth=*/0, AC, SI);
+    // TODO Shouldn't this create a signed range?
+    ConstantRange KnownBitsRange =
+        ConstantRange::fromKnownBits(Known, /*IsSigned=*/false);
+    const ConstantRange LVIRange = LVI->getConstantRange(Val, OrigBlock, SI);
+    ConstantRange ValRange = KnownBitsRange.intersectWith(LVIRange);
+    // We delegate removal of unreachable non-default cases to other passes. In
+    // the unlikely event that some of them survived, we just conservatively
+    // maintain the invariant that all the cases lie between the bounds. This
+    // may, however, still render the default case effectively unreachable.
+    APInt Low = Cases.front().Low->getValue();
+    APInt High = Cases.back().High->getValue();
+    APInt Min = APIntOps::smin(ValRange.getSignedMin(), Low);
+    APInt Max = APIntOps::smax(ValRange.getSignedMax(), High);
+
+    LowerBound = ConstantInt::get(SI->getContext(), Min);
+    UpperBound = ConstantInt::get(SI->getContext(), Max);
+    DefaultIsUnreachableFromSwitch = (Min + (NumSimpleCases - 1) == Max);
+  }
+
+  std::vector<IntRange> UnreachableRanges;
 
+  if (DefaultIsUnreachableFromSwitch) {
     DenseMap<BasicBlock *, unsigned> Popularity;
     unsigned MaxPop = 0;
     BasicBlock *PopSucc = nullptr;
@@ -496,8 +560,10 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
 #endif
 
     // As the default block in the switch is unreachable, update the PHI nodes
-    // (remove the entry to the default block) to reflect this.
-    Default->removePredecessor(OrigBlock);
+    // (remove all of the references to the default block) to reflect this.
+    const unsigned NumDefaultEdges = SI->getNumCases() + 1 - NumSimpleCases;
+    for (unsigned I = 0; I < NumDefaultEdges; ++I)
+      Default->removePredecessor(OrigBlock);
 
     // Use the most popular block as the new default, reducing the number of
     // cases.
@@ -510,7 +576,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
 
     // If there are no cases left, just branch.
     if (Cases.empty()) {
-      BranchInst::Create(Default, CurBlock);
+      BranchInst::Create(Default, OrigBlock);
       SI->eraseFromParent();
       // As all the cases have been replaced with a single branch, only keep
       // one entry in the PHI nodes.
@@ -518,12 +584,12 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
         PopSucc->removePredecessor(OrigBlock);
       return;
     }
-  }
 
-  unsigned NrOfDefaults = (SI->getDefaultDest() == Default) ? 1 : 0;
-  for (const auto &Case : SI->cases())
-    if (Case.getCaseSuccessor() == Default)
-      NrOfDefaults++;
+    // If the condition was a PHI node with the switch block as a predecessor
+    // removing predecessors may have caused the condition to be erased.
+    // Getting the condition value again here protects against that.
+    Val = SI->getCondition();
+  }
 
   // Create a new, empty default block so that the new hierarchy of
   // if-then statements go to this and the PHI nodes are happy.
@@ -537,14 +603,14 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
 
   // If there are entries in any PHI nodes for the default edge, make sure
   // to update them as well.
-  fixPhis(Default, OrigBlock, NewDefault, NrOfDefaults);
+  fixPhis(Default, OrigBlock, NewDefault);
 
   // Branch to our shiny new if-then stuff...
   BranchInst::Create(SwitchBlock, OrigBlock);
 
   // We are now done with the switch instruction, delete it.
   BasicBlock *OldDefault = SI->getDefaultDest();
-  CurBlock->getInstList().erase(SI);
+  OrigBlock->getInstList().erase(SI);
 
   // If the Default block has no more predecessors just add it to DeleteList.
   if (pred_begin(OldDefault) == pred_end(OldDefault))
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 23145e584751..cd2c81b6abc8 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -1,9 +1,8 @@
 //===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index 88d595ee02ab..c0b7edc547fd 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -1,9 +1,8 @@
 //===- MetaRenamer.cpp - Rename everything with metasyntatic names --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index ae5e72ea4d30..c84beceee191 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -1,9 +1,8 @@
 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -28,44 +27,24 @@ static void appendToGlobalArray(const char *Array, Module &M, Function *F,
   // Get the current set of static global constructors and add the new ctor
   // to the list.
   SmallVector<Constant *, 16> CurrentCtors;
-  StructType *EltTy;
+  StructType *EltTy = StructType::get(
+      IRB.getInt32Ty(), PointerType::getUnqual(FnTy), IRB.getInt8PtrTy());
   if (GlobalVariable *GVCtor = M.getNamedGlobal(Array)) {
-    ArrayType *ATy = cast<ArrayType>(GVCtor->getValueType());
-    StructType *OldEltTy = cast<StructType>(ATy->getElementType());
-    // Upgrade a 2-field global array type to the new 3-field format if needed.
-    if (Data && OldEltTy->getNumElements() < 3)
-      EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
-                              IRB.getInt8PtrTy());
-    else
-      EltTy = OldEltTy;
     if (Constant *Init = GVCtor->getInitializer()) {
       unsigned n = Init->getNumOperands();
       CurrentCtors.reserve(n + 1);
-      for (unsigned i = 0; i != n; ++i) {
-        auto Ctor = cast<Constant>(Init->getOperand(i));
-        if (EltTy != OldEltTy)
-          Ctor =
-              ConstantStruct::get(EltTy, Ctor->getAggregateElement((unsigned)0),
-                                  Ctor->getAggregateElement(1),
-                                  Constant::getNullValue(IRB.getInt8PtrTy()));
-        CurrentCtors.push_back(Ctor);
-      }
+      for (unsigned i = 0; i != n; ++i)
+        CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
     }
     GVCtor->eraseFromParent();
-  } else {
-    // Use the new three-field struct if there isn't one already.
-    EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
-                            IRB.getInt8PtrTy());
   }
 
-  // Build a 2 or 3 field global_ctor entry.  We don't take a comdat key.
+  // Build a 3 field global_ctor entry.  We don't take a comdat key.
   Constant *CSVals[3];
   CSVals[0] = IRB.getInt32(Priority);
   CSVals[1] = F;
-  // FIXME: Drop support for the two element form in LLVM 4.0.
-  if (EltTy->getNumElements() >= 3)
-    CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy())
-                     : Constant::getNullValue(IRB.getInt8PtrTy());
+  CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy())
+                   : Constant::getNullValue(IRB.getInt8PtrTy());
   Constant *RuntimeCtorInit =
       ConstantStruct::get(EltTy, makeArrayRef(CSVals, EltTy->getNumElements()));
 
@@ -127,36 +106,24 @@ void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
   appendToUsedList(M, "llvm.compiler.used", Values);
 }
 
-Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) {
-  if (isa<Function>(FuncOrBitcast))
-    return cast<Function>(FuncOrBitcast);
-  FuncOrBitcast->print(errs());
-  errs() << '\n';
-  std::string Err;
-  raw_string_ostream Stream(Err);
-  Stream << "Sanitizer interface function redefined: " << *FuncOrBitcast;
-  report_fatal_error(Err);
-}
-
-Function *llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
-                                             ArrayRef<Type *> InitArgTypes) {
+FunctionCallee
+llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
+                                   ArrayRef<Type *> InitArgTypes) {
   assert(!InitName.empty() && "Expected init function name");
-  Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+  return M.getOrInsertFunction(
       InitName,
       FunctionType::get(Type::getVoidTy(M.getContext()), InitArgTypes, false),
-      AttributeList()));
-  F->setLinkage(Function::ExternalLinkage);
-  return F;
+      AttributeList());
 }
 
-std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
+std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
     Module &M, StringRef CtorName, StringRef InitName,
     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
     StringRef VersionCheckName) {
   assert(!InitName.empty() && "Expected init function name");
   assert(InitArgs.size() == InitArgTypes.size() &&
          "Sanitizer's init function expects different number of arguments");
-  Function *InitFunction =
+  FunctionCallee InitFunction =
       declareSanitizerInitFunction(M, InitName, InitArgTypes);
   Function *Ctor = Function::Create(
       FunctionType::get(Type::getVoidTy(M.getContext()), false),
@@ -165,20 +132,19 @@ std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
   IRBuilder<> IRB(ReturnInst::Create(M.getContext(), CtorBB));
   IRB.CreateCall(InitFunction, InitArgs);
   if (!VersionCheckName.empty()) {
-    Function *VersionCheckFunction =
-        checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-            VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
-            AttributeList()));
+    FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
+        VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
+        AttributeList());
     IRB.CreateCall(VersionCheckFunction, {});
   }
   return std::make_pair(Ctor, InitFunction);
 }
 
-std::pair<Function *, Function *>
+std::pair<Function *, FunctionCallee>
 llvm::getOrCreateSanitizerCtorAndInitFunctions(
     Module &M, StringRef CtorName, StringRef InitName,
     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
-    function_ref<void(Function *, Function *)> FunctionsCreatedCallback,
+    function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
     StringRef VersionCheckName) {
   assert(!CtorName.empty() && "Expected ctor function name");
 
@@ -189,7 +155,8 @@ llvm::getOrCreateSanitizerCtorAndInitFunctions(
         Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
       return {Ctor, declareSanitizerInitFunction(M, InitName, InitArgTypes)};
 
-  Function *Ctor, *InitFunction;
+  Function *Ctor;
+  FunctionCallee InitFunction;
   std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
       M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName);
   FunctionsCreatedCallback(Ctor, InitFunction);
@@ -208,9 +175,10 @@ Function *llvm::getOrCreateInitFunction(Module &M, StringRef Name) {
     }
     return F;
   }
-  Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      Name, AttributeList(), Type::getVoidTy(M.getContext())));
-  F->setLinkage(Function::ExternalLinkage);
+  Function *F =
+      cast<Function>(M.getOrInsertFunction(Name, AttributeList(),
+                                           Type::getVoidTy(M.getContext()))
+                         .getCallee());
 
   appendToGlobalCtors(M, F, 0);
 
diff --git a/lib/Transforms/Utils/NameAnonGlobals.cpp b/lib/Transforms/Utils/NameAnonGlobals.cpp
index 34dc1cccdd5b..ac8991e9d475 100644
--- a/lib/Transforms/Utils/NameAnonGlobals.cpp
+++ b/lib/Transforms/Utils/NameAnonGlobals.cpp
@@ -1,9 +1,8 @@
 //===- NameAnonGlobals.cpp - ThinLTO Support: Name Unnamed Globals --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp
index 585ce6b4c118..bdf24d80bd17 100644
--- a/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/lib/Transforms/Utils/PredicateInfo.cpp
@@ -1,9 +1,8 @@
 //===-- PredicateInfo.cpp - PredicateInfo Builder--------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------===//
 //
@@ -474,7 +473,8 @@ void PredicateInfo::buildPredicateInfo() {
   }
   for (auto &Assume : AC.assumptions()) {
     if (auto *II = dyn_cast_or_null<IntrinsicInst>(Assume))
-      processAssume(II, II->getParent(), OpsToRename);
+      if (DT.isReachableFromEntry(II->getParent()))
+        processAssume(II, II->getParent(), OpsToRename);
   }
   // Now rename all our operations.
   renameUses(OpsToRename);
@@ -489,8 +489,10 @@ void PredicateInfo::buildPredicateInfo() {
 // tricky (FIXME).
 static Function *getCopyDeclaration(Module *M, Type *Ty) {
   std::string Name = "llvm.ssa.copy." + utostr((uintptr_t) Ty);
-  return cast<Function>(M->getOrInsertFunction(
-      Name, getType(M->getContext(), Intrinsic::ssa_copy, Ty)));
+  return cast<Function>(
+      M->getOrInsertFunction(Name,
+                             getType(M->getContext(), Intrinsic::ssa_copy, Ty))
+          .getCallee());
 }
 
 // Given the renaming stack, make all the operands currently on the stack real
@@ -633,7 +635,7 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {
     // uses in the same instruction do not have a strict sort order
     // currently and will be considered equal. We could get rid of the
     // stable sort by creating one if we wanted.
-    std::stable_sort(OrderedUses.begin(), OrderedUses.end(), Compare);
+    llvm::stable_sort(OrderedUses, Compare);
     SmallVector<ValueDFS, 8> RenameStack;
     // For each use, sorted into dfs order, push values and replaces uses with
     // top of stack, which will represent the reaching def.
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 91e4f4254b3e..d58e1ea574ef 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -1,9 +1,8 @@
 //===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -114,7 +113,6 @@ struct AllocaInfo {
   BasicBlock *OnlyBlock;
   bool OnlyUsedInOneBlock;
 
-  Value *AllocaPointerVal;
   TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares;
 
   void clear() {
@@ -123,7 +121,6 @@ struct AllocaInfo {
     OnlyStore = nullptr;
     OnlyBlock = nullptr;
     OnlyUsedInOneBlock = true;
-    AllocaPointerVal = nullptr;
     DbgDeclares.clear();
   }
 
@@ -141,14 +138,12 @@ struct AllocaInfo {
       if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
         // Remember the basic blocks which define new values for the alloca
         DefiningBlocks.push_back(SI->getParent());
-        AllocaPointerVal = SI->getOperand(0);
         OnlyStore = SI;
       } else {
         LoadInst *LI = cast<LoadInst>(User);
         // Otherwise it must be a load instruction, keep track of variable
         // reads.
         UsingBlocks.push_back(LI->getParent());
-        AllocaPointerVal = LI;
       }
 
       if (OnlyUsedInOneBlock) {
@@ -254,11 +249,6 @@ struct PromoteMem2Reg {
   /// to.
   DenseMap<PHINode *, unsigned> PhiToAllocaMap;
 
-  /// If we are updating an AliasSetTracker, then for each alloca that is of
-  /// pointer type, we keep track of what to copyValue to the inserted PHI
-  /// nodes here.
-  std::vector<Value *> PointerAllocaValues;
-
   /// For each alloca, we keep track of the dbg.declare intrinsic that
   /// describes it, if any, so that we can convert it to a dbg.value
   /// intrinsic if the alloca gets promoted.
@@ -367,10 +357,8 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
 
   for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
     Instruction *UserInst = cast<Instruction>(*UI++);
-    if (!isa<LoadInst>(UserInst)) {
-      assert(UserInst == OnlyStore && "Should only have load/stores");
+    if (UserInst == OnlyStore)
       continue;
-    }
     LoadInst *LI = cast<LoadInst>(UserInst);
 
     // Okay, if we have a load from the alloca, we want to replace it with the
@@ -390,8 +378,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
           Info.UsingBlocks.push_back(StoreBB);
           continue;
         }
-      } else if (LI->getParent() != StoreBB &&
-                 !DT.dominates(StoreBB, LI->getParent())) {
+      } else if (!DT.dominates(StoreBB, LI->getParent())) {
         // If the load and store are in different blocks, use BB dominance to
         // check their relationships.  If the store doesn't dom the use, bail
         // out.
@@ -429,14 +416,12 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
     DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
     ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB);
     DII->eraseFromParent();
-    LBI.deleteValue(DII);
   }
   // Remove the (now dead) store and alloca.
   Info.OnlyStore->eraseFromParent();
   LBI.deleteValue(Info.OnlyStore);
 
   AI->eraseFromParent();
-  LBI.deleteValue(AI);
   return true;
 }
 
@@ -488,11 +473,10 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
     unsigned LoadIdx = LBI.getInstructionIndex(LI);
 
     // Find the nearest store that has a lower index than this load.
-    StoresByIndexTy::iterator I =
-        std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
-                         std::make_pair(LoadIdx,
-                                        static_cast<StoreInst *>(nullptr)),
-                         less_first());
+    StoresByIndexTy::iterator I = llvm::lower_bound(
+        StoresByIndex,
+        std::make_pair(LoadIdx, static_cast<StoreInst *>(nullptr)),
+        less_first());
     if (I == StoresByIndex.begin()) {
       if (StoresByIndex.empty())
         // If there are no stores, the load takes the undef value.
@@ -535,13 +519,10 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
   }
 
   AI->eraseFromParent();
-  LBI.deleteValue(AI);
 
   // The alloca's debuginfo can be removed as well.
-  for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
+  for (DbgVariableIntrinsic *DII : Info.DbgDeclares)
     DII->eraseFromParent();
-    LBI.deleteValue(DII);
-  }
 
   ++NumLocalPromoted;
   return true;
@@ -620,8 +601,8 @@ void PromoteMem2Reg::run() {
     // dead phi nodes.
 
     // Unique the set of defining blocks for efficient lookup.
-    SmallPtrSet<BasicBlock *, 32> DefBlocks;
-    DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
+    SmallPtrSet<BasicBlock *, 32> DefBlocks(Info.DefiningBlocks.begin(),
+                                            Info.DefiningBlocks.end());
 
     // Determine which blocks the value is live in.  These are blocks which lead
     // to uses.
@@ -636,10 +617,9 @@ void PromoteMem2Reg::run() {
     IDF.setDefiningBlocks(DefBlocks);
     SmallVector<BasicBlock *, 32> PHIBlocks;
     IDF.calculate(PHIBlocks);
-    if (PHIBlocks.size() > 1)
-      llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) {
-        return BBNumbers.lookup(A) < BBNumbers.lookup(B);
-      });
+    llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) {
+      return BBNumbers.find(A)->second < BBNumbers.find(B)->second;
+    });
 
     unsigned CurrentVersion = 0;
     for (BasicBlock *BB : PHIBlocks)
@@ -751,7 +731,7 @@ void PromoteMem2Reg::run() {
     // basic blocks.  Start by sorting the incoming predecessors for efficient
     // access.
     auto CompareBBNumbers = [this](BasicBlock *A, BasicBlock *B) {
-      return BBNumbers.lookup(A) < BBNumbers.lookup(B);
+      return BBNumbers.find(A)->second < BBNumbers.find(B)->second;
     };
     llvm::sort(Preds, CompareBBNumbers);
 
@@ -759,9 +739,8 @@ void PromoteMem2Reg::run() {
     // them from the Preds list.
     for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
       // Do a log(n) search of the Preds list for the entry we want.
-      SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound(
-          Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i),
-          CompareBBNumbers);
+      SmallVectorImpl<BasicBlock *>::iterator EntIt = llvm::lower_bound(
+          Preds, SomePHI->getIncomingBlock(i), CompareBBNumbers);
       assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
              "PHI node has entry for a block which is not a predecessor!");
 
@@ -825,14 +804,11 @@ void PromoteMem2Reg::ComputeLiveInBlocks(
         break;
       }
 
-      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-        if (LI->getOperand(0) != AI)
-          continue;
-
+      if (LoadInst *LI = dyn_cast<LoadInst>(I))
         // Okay, we found a load before a store to the alloca.  It is actually
         // live into this block.
-        break;
-      }
+        if (LI->getOperand(0) == AI)
+          break;
     }
   }
 
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 9e5fb0e7172d..bffdd115d940 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -1,9 +1,8 @@
 //===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -350,8 +349,7 @@ LoadAndStorePromoter(ArrayRef<const Instruction *> Insts,
   SSA.Initialize(SomeVal->getType(), BaseName);
 }
 
-void LoadAndStorePromoter::
-run(const SmallVectorImpl<Instruction *> &Insts) const {
+void LoadAndStorePromoter::run(const SmallVectorImpl<Instruction *> &Insts) {
   // First step: bucket up uses of the alloca by the block they occur in.
   // This is important because we have to handle multiple defs/uses in a block
   // ourselves: SSAUpdater is purely for cross-block references.
diff --git a/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/lib/Transforms/Utils/SSAUpdaterBulk.cpp
index 397bac2940a4..917d5e0a1ef0 100644
--- a/lib/Transforms/Utils/SSAUpdaterBulk.cpp
+++ b/lib/Transforms/Utils/SSAUpdaterBulk.cpp
@@ -1,9 +1,8 @@
 //===- SSAUpdaterBulk.cpp - Unstructured SSA Update Tool ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Utils/SanitizerStats.cpp b/lib/Transforms/Utils/SanitizerStats.cpp
index 8c23957ac43e..a1313c77ed77 100644
--- a/lib/Transforms/Utils/SanitizerStats.cpp
+++ b/lib/Transforms/Utils/SanitizerStats.cpp
@@ -1,9 +1,8 @@
 //===- SanitizerStats.cpp - Sanitizer statistics gathering ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -57,8 +56,8 @@ void SanitizerStatReport::create(IRBuilder<> &B, SanitizerStatKind SK) {
 
   FunctionType *StatReportTy =
       FunctionType::get(B.getVoidTy(), Int8PtrTy, false);
-  Constant *StatReport = M->getOrInsertFunction(
-      "__sanitizer_stat_report", StatReportTy);
+  FunctionCallee StatReport =
+      M->getOrInsertFunction("__sanitizer_stat_report", StatReportTy);
 
   auto InitAddr = ConstantExpr::getGetElementPtr(
       EmptyModuleStatsTy, ModuleStatsGV,
@@ -98,8 +97,8 @@ void SanitizerStatReport::finish() {
   IRBuilder<> B(BB);
 
   FunctionType *StatInitTy = FunctionType::get(VoidTy, Int8PtrTy, false);
-  Constant *StatInit = M->getOrInsertFunction(
-      "__sanitizer_stat_init", StatInitTy);
+  FunctionCallee StatInit =
+      M->getOrInsertFunction("__sanitizer_stat_init", StatInitTy);
 
   B.CreateCall(StatInit, ConstantExpr::getBitCast(NewModuleStatsGV, Int8PtrTy));
   B.CreateRetVoid();
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 03b73954321d..11651d040dc0 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1,9 +1,8 @@
 //===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -26,8 +25,9 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
@@ -66,6 +66,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include <algorithm>
 #include <cassert>
@@ -292,9 +293,13 @@ isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2,
 /// will be the same as those coming in from ExistPred, an existing predecessor
 /// of Succ.
 static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
-                                  BasicBlock *ExistPred) {
+                                  BasicBlock *ExistPred,
+                                  MemorySSAUpdater *MSSAU = nullptr) {
   for (PHINode &PN : Succ->phis())
     PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
+  if (MSSAU)
+    if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
+      MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
 }
 
 /// Compute an abstract "cost" of speculating the given instruction,
@@ -670,7 +675,8 @@ private:
 
 } // end anonymous namespace
 
-static void EraseTerminatorAndDCECond(Instruction *TI) {
+static void EraseTerminatorAndDCECond(Instruction *TI,
+                                      MemorySSAUpdater *MSSAU = nullptr) {
   Instruction *Cond = nullptr;
   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
     Cond = dyn_cast<Instruction>(SI->getCondition());
@@ -683,7 +689,7 @@ static void EraseTerminatorAndDCECond(Instruction *TI) {
 
   TI->eraseFromParent();
   if (Cond)
-    RecursivelyDeleteTriviallyDeadInstructions(Cond);
+    RecursivelyDeleteTriviallyDeadInstructions(Cond, nullptr, MSSAU);
 }
 
 /// Return true if the specified terminator checks
@@ -858,7 +864,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
       return true;
     }
 
-    SwitchInst *SI = cast<SwitchInst>(TI);
+    SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
     // Okay, TI has cases that are statically dead, prune them away.
     SmallPtrSet<Constant *, 16> DeadCases;
     for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
@@ -867,30 +873,13 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
     LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
                       << "Through successor TI: " << *TI);
 
-    // Collect branch weights into a vector.
-    SmallVector<uint32_t, 8> Weights;
-    MDNode *MD = SI->getMetadata(LLVMContext::MD_prof);
-    bool HasWeight = MD && (MD->getNumOperands() == 2 + SI->getNumCases());
-    if (HasWeight)
-      for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
-           ++MD_i) {
-        ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(MD_i));
-        Weights.push_back(CI->getValue().getZExtValue());
-      }
     for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
       --i;
       if (DeadCases.count(i->getCaseValue())) {
-        if (HasWeight) {
-          std::swap(Weights[i->getCaseIndex() + 1], Weights.back());
-          Weights.pop_back();
-        }
         i->getCaseSuccessor()->removePredecessor(TI->getParent());
-        SI->removeCase(i);
+        SI.removeCase(i);
       }
     }
-    if (HasWeight && Weights.size() >= 2)
-      setBranchWeights(SI, Weights);
-
     LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
     return true;
   }
@@ -1266,8 +1255,10 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
     while (isa<DbgInfoIntrinsic>(I2))
       I2 = &*BB2_Itr++;
   }
+  // FIXME: Can we define a safety predicate for CallBr?
   if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
-      (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
+      (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) ||
+      isa<CallBrInst>(I1))
     return false;
 
   BasicBlock *BIParent = BI->getParent();
@@ -1350,9 +1341,14 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
 
 HoistTerminator:
   // It may not be possible to hoist an invoke.
+  // FIXME: Can we define a safety predicate for CallBr?
   if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
     return Changed;
 
+  // TODO: callbr hoisting currently disabled pending further study.
+  if (isa<CallBrInst>(I1))
+    return Changed;
+
   for (BasicBlock *Succ : successors(BB1)) {
     for (PHINode &PN : Succ->phis()) {
       Value *BB1V = PN.getIncomingValueForBlock(BB1);
@@ -1432,9 +1428,10 @@ HoistTerminator:
 static bool canSinkInstructions(
     ArrayRef<Instruction *> Insts,
     DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
-  // Prune out obviously bad instructions to move. Any non-store instruction
-  // must have exactly one use, and we check later that use is by a single,
-  // common PHI instruction in the successor.
+  // Prune out obviously bad instructions to move. Each instruction must have
+  // exactly zero or one use, and we check later that use is by a single, common
+  // PHI instruction in the successor.
+  bool HasUse = !Insts.front()->user_empty();
   for (auto *I : Insts) {
     // These instructions may change or break semantics if moved.
     if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
@@ -1444,13 +1441,14 @@ static bool canSinkInstructions(
     // Conservatively return false if I is an inline-asm instruction. Sinking
     // and merging inline-asm instructions can potentially create arguments
     // that cannot satisfy the inline-asm constraints.
-    if (const auto *C = dyn_cast<CallInst>(I))
+    if (const auto *C = dyn_cast<CallBase>(I))
       if (C->isInlineAsm())
         return false;
 
-    // Everything must have only one use too, apart from stores which
-    // have no uses.
-    if (!isa<StoreInst>(I) && !I->hasOneUse())
+    // Each instruction must have zero or one use.
+    if (HasUse && !I->hasOneUse())
+      return false;
+    if (!HasUse && !I->user_empty())
       return false;
   }
 
@@ -1459,11 +1457,11 @@ static bool canSinkInstructions(
     if (!I->isSameOperationAs(I0))
       return false;
 
-  // All instructions in Insts are known to be the same opcode. If they aren't
-  // stores, check the only user of each is a PHI or in the same block as the
-  // instruction, because if a user is in the same block as an instruction
-  // we're contemplating sinking, it must already be determined to be sinkable.
-  if (!isa<StoreInst>(I0)) {
+  // All instructions in Insts are known to be the same opcode. If they have a
+  // use, check that the only user is a PHI or in the same block as the
+  // instruction, because if a user is in the same block as an instruction we're
+  // contemplating sinking, it must already be determined to be sinkable.
+  if (HasUse) {
     auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
     auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
     if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
@@ -1507,7 +1505,7 @@ static bool canSinkInstructions(
         // We can't create a PHI from this GEP.
         return false;
       // Don't create indirect calls! The called value is the final operand.
-      if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OI == OE - 1) {
+      if (isa<CallBase>(I0) && OI == OE - 1) {
         // FIXME: if the call was *already* indirect, we should do this.
         return false;
       }
@@ -1541,7 +1539,7 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
   // it is slightly over-aggressive - it gets confused by commutative instructions
   // so double-check it here.
   Instruction *I0 = Insts.front();
-  if (!isa<StoreInst>(I0)) {
+  if (!I0->user_empty()) {
     auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
     if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
           auto *U = cast<Instruction>(*I->user_begin());
@@ -1599,11 +1597,10 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
       I0->andIRFlags(I);
     }
 
-  if (!isa<StoreInst>(I0)) {
+  if (!I0->user_empty()) {
     // canSinkLastInstruction checked that all instructions were used by
     // one and only one PHI node. Find that now, RAUW it to our common
     // instruction and nuke it.
-    assert(I0->hasOneUse());
     auto *PN = cast<PHINode>(*I0->user_begin());
     PN->replaceAllUsesWith(I0);
     PN->eraseFromParent();
@@ -2203,7 +2200,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
     BasicBlock *EdgeBB =
         BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge",
                            RealDest->getParent(), RealDest);
-    BranchInst::Create(RealDest, EdgeBB);
+    BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB);
+    CritEdgeBranch->setDebugLoc(BI->getDebugLoc());
 
     // Update PHI nodes.
     AddPredecessorToBlock(RealDest, EdgeBB, BB);
@@ -2539,7 +2537,8 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
 /// If this basic block is simple enough, and if a predecessor branches to us
 /// and one of our successors, fold the block into the predecessor and use
 /// logical operations to pick the right destination.
-bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
+bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
+                                  unsigned BonusInstThreshold) {
   BasicBlock *BB = BI->getParent();
 
   const unsigned PredCount = pred_size(BB);
@@ -2594,7 +2593,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
   // unconditionally. We denote all involved instructions except the condition
   // as "bonus instructions", and only allow this transformation when the
   // number of the bonus instructions we'll need to create when cloning into
-  // each predecessor does not exceed a certain threshold. 
+  // each predecessor does not exceed a certain threshold.
   unsigned NumBonusInsts = 0;
   for (auto I = BB->begin(); Cond != &*I; ++I) {
     // Ignore dbg intrinsics.
@@ -2611,7 +2610,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
     // and Cond.
 
     // Account for the cost of duplicating this instruction into each
-    // predecessor. 
+    // predecessor.
     NumBonusInsts += PredCount;
     // Early exits once we reach the limit.
     if (NumBonusInsts > BonusInstThreshold)
@@ -2750,7 +2749,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
                                    (SuccFalseWeight + SuccTrueWeight) +
                                PredTrueWeight * SuccFalseWeight);
         }
-        AddPredecessorToBlock(TrueDest, PredBlock, BB);
+        AddPredecessorToBlock(TrueDest, PredBlock, BB, MSSAU);
         PBI->setSuccessor(0, TrueDest);
       }
       if (PBI->getSuccessor(1) == BB) {
@@ -2765,7 +2764,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
           // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
           NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
         }
-        AddPredecessorToBlock(FalseDest, PredBlock, BB);
+        AddPredecessorToBlock(FalseDest, PredBlock, BB, MSSAU);
         PBI->setSuccessor(1, FalseDest);
       }
       if (NewWeights.size() == 2) {
@@ -2810,12 +2809,17 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
           }
         }
         // Update PHI Node.
-        PHIs[i]->setIncomingValue(PHIs[i]->getBasicBlockIndex(PBI->getParent()),
-                                  MergedCond);
+	PHIs[i]->setIncomingValueForBlock(PBI->getParent(), MergedCond);
       }
+
+      // PBI is changed to branch to TrueDest below. Remove itself from
+      // potential phis from all other successors.
+      if (MSSAU)
+        MSSAU->changeCondBranchToUnconditionalTo(PBI, TrueDest);
+
       // Change PBI from Conditional to Unconditional.
       BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI);
-      EraseTerminatorAndDCECond(PBI);
+      EraseTerminatorAndDCECond(PBI, MSSAU);
       PBI = New_PBI;
     }
 
@@ -3430,7 +3434,7 @@ static bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
       KeepEdge2 = nullptr;
     else
       Succ->removePredecessor(OldTerm->getParent(),
-                              /*DontDeleteUselessPHIs=*/true);
+                              /*KeepOneInputPHIs=*/true);
   }
 
   IRBuilder<> Builder(OldTerm);
@@ -3622,20 +3626,16 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
   // the switch to the merge point on the compared value.
   BasicBlock *NewBB =
       BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
-  SmallVector<uint64_t, 8> Weights;
-  bool HasWeights = HasBranchWeights(SI);
-  if (HasWeights) {
-    GetBranchWeights(SI, Weights);
-    if (Weights.size() == 1 + SI->getNumCases()) {
-      // Split weight for default case to case for "Cst".
-      Weights[0] = (Weights[0] + 1) >> 1;
-      Weights.push_back(Weights[0]);
-
-      SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
-      setBranchWeights(SI, MDWeights);
+  {
+    SwitchInstProfUpdateWrapper SIW(*SI);
+    auto W0 = SIW.getSuccessorWeight(0);
+    SwitchInstProfUpdateWrapper::CaseWeightOpt NewW;
+    if (W0) {
+      NewW = ((uint64_t(*W0) + 1) >> 1);
+      SIW.setSuccessorWeight(0, *NewW);
     }
+    SIW.addCase(Cst, NewBB, NewW);
   }
-  SI->addCase(Cst, NewBB);
 
   // NewBB branches to the phi block, add the uncond branch and the phi entry.
   Builder.SetInsertPoint(NewBB);
@@ -4184,24 +4184,28 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
           Changed = true;
         }
       } else {
+        Value* Cond = BI->getCondition();
         if (BI->getSuccessor(0) == BB) {
+          Builder.CreateAssumption(Builder.CreateNot(Cond));
           Builder.CreateBr(BI->getSuccessor(1));
           EraseTerminatorAndDCECond(BI);
         } else if (BI->getSuccessor(1) == BB) {
+          Builder.CreateAssumption(Cond);
           Builder.CreateBr(BI->getSuccessor(0));
           EraseTerminatorAndDCECond(BI);
           Changed = true;
         }
       }
     } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
-      for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) {
+      SwitchInstProfUpdateWrapper SU(*SI);
+      for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
         if (i->getCaseSuccessor() != BB) {
           ++i;
           continue;
         }
-        BB->removePredecessor(SI->getParent());
-        i = SI->removeCase(i);
-        e = SI->case_end();
+        BB->removePredecessor(SU->getParent());
+        i = SU.removeCase(i);
+        e = SU->case_end();
         Changed = true;
       }
     } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
@@ -4435,33 +4439,20 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
     return true;
   }
 
-  SmallVector<uint64_t, 8> Weights;
-  bool HasWeight = HasBranchWeights(SI);
-  if (HasWeight) {
-    GetBranchWeights(SI, Weights);
-    HasWeight = (Weights.size() == 1 + SI->getNumCases());
-  }
+  if (DeadCases.empty())
+    return false;
 
-  // Remove dead cases from the switch.
+  SwitchInstProfUpdateWrapper SIW(*SI);
   for (ConstantInt *DeadCase : DeadCases) {
     SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
     assert(CaseI != SI->case_default() &&
            "Case was not found. Probably mistake in DeadCases forming.");
-    if (HasWeight) {
-      std::swap(Weights[CaseI->getCaseIndex() + 1], Weights.back());
-      Weights.pop_back();
-    }
-
     // Prune unused values from PHI nodes.
     CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
-    SI->removeCase(CaseI);
-  }
-  if (HasWeight && Weights.size() >= 2) {
-    SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
-    setBranchWeights(SI, MDWeights);
+    SIW.removeCase(CaseI);
   }
 
-  return !DeadCases.empty();
+  return true;
 }
 
 /// If BB would be eligible for simplification by
@@ -5034,7 +5025,7 @@ SwitchLookupTable::SwitchLookupTable(
   ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
   Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
 
-  Array = new GlobalVariable(M, ArrayTy, /*constant=*/true,
+  Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
                              GlobalVariable::PrivateLinkage, Initializer,
                              "switch.table." + FuncName);
   Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
@@ -5091,7 +5082,9 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
     Value *GEPIndices[] = {Builder.getInt32(0), Index};
     Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
                                            GEPIndices, "switch.gep");
-    return Builder.CreateLoad(GEP, "switch.load");
+    return Builder.CreateLoad(
+        cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
+        "switch.load");
   }
   }
   llvm_unreachable("Unknown lookup table kind!");
@@ -5425,7 +5418,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
     // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
     // do not delete PHINodes here.
     SI->getDefaultDest()->removePredecessor(SI->getParent(),
-                                            /*DontDeleteUselessPHIs=*/true);
+                                            /*KeepOneInputPHIs=*/true);
   }
 
   bool ReturnedEarly = false;
@@ -5533,25 +5526,23 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
   // Now we have signed numbers that have been shifted so that, given enough
   // precision, there are no negative values. Since the rest of the transform
   // is bitwise only, we switch now to an unsigned representation.
-  uint64_t GCD = 0;
-  for (auto &V : Values)
-    GCD = GreatestCommonDivisor64(GCD, (uint64_t)V);
 
-  // This transform can be done speculatively because it is so cheap - it results
-  // in a single rotate operation being inserted. This can only happen if the
-  // factor extracted is a power of 2.
-  // FIXME: If the GCD is an odd number we can multiply by the multiplicative
-  // inverse of GCD and then perform this transform.
+  // This transform can be done speculatively because it is so cheap - it
+  // results in a single rotate operation being inserted.
   // FIXME: It's possible that optimizing a switch on powers of two might also
   // be beneficial - flag values are often powers of two and we could use a CLZ
   // as the key function.
-  if (GCD <= 1 || !isPowerOf2_64(GCD))
-    // No common divisor found or too expensive to compute key function.
-    return false;
 
-  unsigned Shift = Log2_64(GCD);
+  // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
+  // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
+  // less than 64.
+  unsigned Shift = 64;
   for (auto &V : Values)
-    V = (int64_t)((uint64_t)V >> Shift);
+    Shift = std::min(Shift, countTrailingZeros((uint64_t)V));
+  assert(Shift < 64);
+  if (Shift > 0)
+    for (auto &V : Values)
+      V = (int64_t)((uint64_t)V >> Shift);
 
   if (!isSwitchDense(Values))
     // Transform didn't create a dense switch.
@@ -5796,7 +5787,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
   // branches to us and our successor, fold the comparison into the
   // predecessor and use logical operations to update the incoming value
   // for PHI nodes in common successor.
-  if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold))
+  if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
     return requestResimplify();
   return false;
 }
@@ -5860,7 +5851,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   // If this basic block is ONLY a compare and a branch, and if a predecessor
   // branches to us and one of our successors, fold the comparison into the
   // predecessor and use logical operations to pick the right destination.
-  if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold))
+  if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
     return requestResimplify();
 
   // We have a conditional branch to two blocks that are only reachable
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 7faf291e73d9..cbb114f9a47a 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -1,9 +1,8 @@
 //===-- SimplifyIndVar.cpp - Induction variable simplification ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -23,6 +22,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -80,7 +80,8 @@ namespace {
     bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
     bool replaceIVUserWithLoopInvariant(Instruction *UseInst);
 
-    bool eliminateOverflowIntrinsic(CallInst *CI);
+    bool eliminateOverflowIntrinsic(WithOverflowInst *WO);
+    bool eliminateSaturatingIntrinsic(SaturatingInst *SI);
     bool eliminateTrunc(TruncInst *TI);
     bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
     bool makeIVComparisonInvariant(ICmpInst *ICmp, Value *IVOperand);
@@ -401,61 +402,29 @@ void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand,
   replaceSRemWithURem(Rem);
 }
 
-bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
-  auto *F = CI->getCalledFunction();
-  if (!F)
-    return false;
-
-  typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)(
-      const SCEV *, const SCEV *, SCEV::NoWrapFlags, unsigned);
-  typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)(
-      const SCEV *, Type *, unsigned);
-
-  OperationFunctionTy Operation;
-  ExtensionFunctionTy Extension;
-
-  Instruction::BinaryOps RawOp;
-
-  // We always have exactly one of nsw or nuw.  If NoSignedOverflow is false, we
-  // have nuw.
-  bool NoSignedOverflow;
-
-  switch (F->getIntrinsicID()) {
+static bool willNotOverflow(ScalarEvolution *SE, Instruction::BinaryOps BinOp,
+                            bool Signed, const SCEV *LHS, const SCEV *RHS) {
+  const SCEV *(ScalarEvolution::*Operation)(const SCEV *, const SCEV *,
+                                            SCEV::NoWrapFlags, unsigned);
+  switch (BinOp) {
   default:
-    return false;
-
-  case Intrinsic::sadd_with_overflow:
-    Operation = &ScalarEvolution::getAddExpr;
-    Extension = &ScalarEvolution::getSignExtendExpr;
-    RawOp = Instruction::Add;
-    NoSignedOverflow = true;
-    break;
-
-  case Intrinsic::uadd_with_overflow:
+    llvm_unreachable("Unsupported binary op");
+  case Instruction::Add:
     Operation = &ScalarEvolution::getAddExpr;
-    Extension = &ScalarEvolution::getZeroExtendExpr;
-    RawOp = Instruction::Add;
-    NoSignedOverflow = false;
     break;
-
-  case Intrinsic::ssub_with_overflow:
+  case Instruction::Sub:
     Operation = &ScalarEvolution::getMinusSCEV;
-    Extension = &ScalarEvolution::getSignExtendExpr;
-    RawOp = Instruction::Sub;
-    NoSignedOverflow = true;
     break;
-
-  case Intrinsic::usub_with_overflow:
-    Operation = &ScalarEvolution::getMinusSCEV;
-    Extension = &ScalarEvolution::getZeroExtendExpr;
-    RawOp = Instruction::Sub;
-    NoSignedOverflow = false;
+  case Instruction::Mul:
+    Operation = &ScalarEvolution::getMulExpr;
     break;
   }
 
-  const SCEV *LHS = SE->getSCEV(CI->getArgOperand(0));
-  const SCEV *RHS = SE->getSCEV(CI->getArgOperand(1));
+  const SCEV *(ScalarEvolution::*Extension)(const SCEV *, Type *, unsigned) =
+      Signed ? &ScalarEvolution::getSignExtendExpr
+             : &ScalarEvolution::getZeroExtendExpr;
 
+  // Check ext(LHS op RHS) == ext(LHS) op ext(RHS)
   auto *NarrowTy = cast<IntegerType>(LHS->getType());
   auto *WideTy =
     IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);
@@ -466,27 +435,32 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
   const SCEV *B =
       (SE->*Operation)((SE->*Extension)(LHS, WideTy, 0),
                        (SE->*Extension)(RHS, WideTy, 0), SCEV::FlagAnyWrap, 0);
+  return A == B;
+}
 
-  if (A != B)
+bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) {
+  const SCEV *LHS = SE->getSCEV(WO->getLHS());
+  const SCEV *RHS = SE->getSCEV(WO->getRHS());
+  if (!willNotOverflow(SE, WO->getBinaryOp(), WO->isSigned(), LHS, RHS))
     return false;
 
   // Proved no overflow, nuke the overflow check and, if possible, the overflow
   // intrinsic as well.
 
   BinaryOperator *NewResult = BinaryOperator::Create(
-      RawOp, CI->getArgOperand(0), CI->getArgOperand(1), "", CI);
+      WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), "", WO);
 
-  if (NoSignedOverflow)
+  if (WO->isSigned())
     NewResult->setHasNoSignedWrap(true);
   else
     NewResult->setHasNoUnsignedWrap(true);
 
   SmallVector<ExtractValueInst *, 4> ToDelete;
 
-  for (auto *U : CI->users()) {
+  for (auto *U : WO->users()) {
     if (auto *EVI = dyn_cast<ExtractValueInst>(U)) {
       if (EVI->getIndices()[0] == 1)
-        EVI->replaceAllUsesWith(ConstantInt::getFalse(CI->getContext()));
+        EVI->replaceAllUsesWith(ConstantInt::getFalse(WO->getContext()));
       else {
         assert(EVI->getIndices()[0] == 0 && "Only two possibilities!");
         EVI->replaceAllUsesWith(NewResult);
@@ -498,9 +472,28 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
   for (auto *EVI : ToDelete)
     EVI->eraseFromParent();
 
-  if (CI->use_empty())
-    CI->eraseFromParent();
+  if (WO->use_empty())
+    WO->eraseFromParent();
+
+  return true;
+}
+
+bool SimplifyIndvar::eliminateSaturatingIntrinsic(SaturatingInst *SI) {
+  const SCEV *LHS = SE->getSCEV(SI->getLHS());
+  const SCEV *RHS = SE->getSCEV(SI->getRHS());
+  if (!willNotOverflow(SE, SI->getBinaryOp(), SI->isSigned(), LHS, RHS))
+    return false;
+
+  BinaryOperator *BO = BinaryOperator::Create(
+      SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI);
+  if (SI->isSigned())
+    BO->setHasNoSignedWrap();
+  else
+    BO->setHasNoUnsignedWrap();
 
+  SI->replaceAllUsesWith(BO);
+  DeadInsts.emplace_back(SI);
+  Changed = true;
   return true;
 }
 
@@ -548,20 +541,19 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
     if (isa<Instruction>(U) &&
         !DT->isReachableFromEntry(cast<Instruction>(U)->getParent()))
       continue;
-    if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) {
-      if (ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) {
-        assert(L->contains(ICI->getParent()) && "LCSSA form broken?");
-        // If we cannot get rid of trunc, bail.
-        if (ICI->isSigned() && !DoesSExtCollapse)
-          return false;
-        if (ICI->isUnsigned() && !DoesZExtCollapse)
-          return false;
-        // For equality, either signed or unsigned works.
-        ICmpUsers.push_back(ICI);
-      } else
-        return false;
-    } else
+    ICmpInst *ICI = dyn_cast<ICmpInst>(U);
+    if (!ICI) return false;
+    assert(L->contains(ICI->getParent()) && "LCSSA form broken?");
+    if (!(ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) &&
+        !(ICI->getOperand(1) == TI && L->isLoopInvariant(ICI->getOperand(0))))
       return false;
+    // If we cannot get rid of trunc, bail.
+    if (ICI->isSigned() && !DoesSExtCollapse)
+      return false;
+    if (ICI->isUnsigned() && !DoesZExtCollapse)
+      return false;
+    // For equality, either signed or unsigned works.
+    ICmpUsers.push_back(ICI);
   }
 
   auto CanUseZExt = [&](ICmpInst *ICI) {
@@ -584,7 +576,8 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
   };
   // Replace all comparisons against trunc with comparisons against IV.
   for (auto *ICI : ICmpUsers) {
-    auto *Op1 = ICI->getOperand(1);
+    bool IsSwapped = L->isLoopInvariant(ICI->getOperand(0));
+    auto *Op1 = IsSwapped ? ICI->getOperand(0) : ICI->getOperand(1);
     Instruction *Ext = nullptr;
     // For signed/unsigned predicate, replace the old comparison with comparison
     // of immediate IV against sext/zext of the invariant argument. If we can
@@ -593,6 +586,7 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
     // TODO: If we see a signed comparison which can be turned into unsigned,
     // we can do it here for canonicalization purposes.
     ICmpInst::Predicate Pred = ICI->getPredicate();
+    if (IsSwapped) Pred = ICmpInst::getSwappedPredicate(Pred);
     if (CanUseZExt(ICI)) {
       assert(DoesZExtCollapse && "Unprofitable zext?");
       Ext = new ZExtInst(Op1, IVTy, "zext", ICI);
@@ -636,8 +630,12 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
       return eliminateSDiv(Bin);
   }
 
-  if (auto *CI = dyn_cast<CallInst>(UseInst))
-    if (eliminateOverflowIntrinsic(CI))
+  if (auto *WO = dyn_cast<WithOverflowInst>(UseInst))
+    if (eliminateOverflowIntrinsic(WO))
+      return true;
+
+  if (auto *SI = dyn_cast<SaturatingInst>(UseInst))
+    if (eliminateSaturatingIntrinsic(SI))
       return true;
 
   if (auto *TI = dyn_cast<TruncInst>(UseInst))
@@ -730,59 +728,31 @@ bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
 /// unsigned-overflow.  Returns true if anything changed, false otherwise.
 bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
                                                     Value *IVOperand) {
-
   // Fastpath: we don't have any work to do if `BO` is `nuw` and `nsw`.
   if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap())
     return false;
 
-  const SCEV *(ScalarEvolution::*GetExprForBO)(const SCEV *, const SCEV *,
-                                               SCEV::NoWrapFlags, unsigned);
-  switch (BO->getOpcode()) {
-  default:
+  if (BO->getOpcode() != Instruction::Add &&
+      BO->getOpcode() != Instruction::Sub &&
+      BO->getOpcode() != Instruction::Mul)
     return false;
 
-  case Instruction::Add:
-    GetExprForBO = &ScalarEvolution::getAddExpr;
-    break;
-
-  case Instruction::Sub:
-    GetExprForBO = &ScalarEvolution::getMinusSCEV;
-    break;
-
-  case Instruction::Mul:
-    GetExprForBO = &ScalarEvolution::getMulExpr;
-    break;
-  }
-
-  unsigned BitWidth = cast<IntegerType>(BO->getType())->getBitWidth();
-  Type *WideTy = IntegerType::get(BO->getContext(), BitWidth * 2);
   const SCEV *LHS = SE->getSCEV(BO->getOperand(0));
   const SCEV *RHS = SE->getSCEV(BO->getOperand(1));
-
   bool Changed = false;
 
-  if (!BO->hasNoUnsignedWrap()) {
-    const SCEV *ExtendAfterOp = SE->getZeroExtendExpr(SE->getSCEV(BO), WideTy);
-    const SCEV *OpAfterExtend = (SE->*GetExprForBO)(
-      SE->getZeroExtendExpr(LHS, WideTy), SE->getZeroExtendExpr(RHS, WideTy),
-      SCEV::FlagAnyWrap, 0u);
-    if (ExtendAfterOp == OpAfterExtend) {
-      BO->setHasNoUnsignedWrap();
-      SE->forgetValue(BO);
-      Changed = true;
-    }
+  if (!BO->hasNoUnsignedWrap() &&
+      willNotOverflow(SE, BO->getOpcode(), /* Signed */ false, LHS, RHS)) {
+    BO->setHasNoUnsignedWrap();
+    SE->forgetValue(BO);
+    Changed = true;
   }
 
-  if (!BO->hasNoSignedWrap()) {
-    const SCEV *ExtendAfterOp = SE->getSignExtendExpr(SE->getSCEV(BO), WideTy);
-    const SCEV *OpAfterExtend = (SE->*GetExprForBO)(
-      SE->getSignExtendExpr(LHS, WideTy), SE->getSignExtendExpr(RHS, WideTy),
-      SCEV::FlagAnyWrap, 0u);
-    if (ExtendAfterOp == OpAfterExtend) {
-      BO->setHasNoSignedWrap();
-      SE->forgetValue(BO);
-      Changed = true;
-    }
+  if (!BO->hasNoSignedWrap() &&
+      willNotOverflow(SE, BO->getOpcode(), /* Signed */ true, LHS, RHS)) {
+    BO->setHasNoSignedWrap();
+    SE->forgetValue(BO);
+    Changed = true;
   }
 
   return Changed;
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 1bb26caa2af2..e0def81d5eee 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1,9 +1,8 @@
 //===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,8 +16,10 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -35,6 +36,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
 
 using namespace llvm;
 using namespace PatternMatch;
@@ -105,6 +107,12 @@ static bool callHasFloatingPointArgument(const CallInst *CI) {
   });
 }
 
+static bool callHasFP128Argument(const CallInst *CI) {
+  return any_of(CI->operands(), [](const Use &OI) {
+    return OI->getType()->isFP128Ty();
+  });
+}
+
 static Value *convertStrToNumber(CallInst *CI, StringRef &Str, int64_t Base) {
   if (Base < 2 || Base > 36)
     // handle special zero base
@@ -334,11 +342,12 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
     return ConstantInt::get(CI->getType(), Str1.compare(Str2));
 
   if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
-    return B.CreateNeg(
-        B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()));
+    return B.CreateNeg(B.CreateZExt(
+        B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType()));
 
   if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
-    return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+    return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"),
+                        CI->getType());
 
   // strcmp(P, "x") -> memcmp(P, "x", 2)
   uint64_t Len1 = GetStringLength(Str1P);
@@ -398,11 +407,12 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
   }
 
   if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
-    return B.CreateNeg(
-        B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()));
+    return B.CreateNeg(B.CreateZExt(
+        B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType()));
 
   if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
-    return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+    return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"),
+                        CI->getType());
 
   uint64_t Len1 = GetStringLength(Str1P);
   uint64_t Len2 = GetStringLength(Str2P);
@@ -591,7 +601,8 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B,
   // strlen(x) != 0 --> *x != 0
   // strlen(x) == 0 --> *x == 0
   if (isOnlyUsedInZeroEqualityComparison(CI))
-    return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
+    return B.CreateZExt(B.CreateLoad(B.getIntNTy(CharSize), Src, "strlenfirst"),
+                        CI->getType());
 
   return nullptr;
 }
@@ -735,7 +746,8 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) {
 
     // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
     Value *Result = castToCStr(CI->getArgOperand(0), B);
-    Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
+    Result =
+        B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), Result, Offset, "strstr");
     return B.CreateBitCast(Result, CI->getType());
   }
 
@@ -773,7 +785,8 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
   // It would be really nice to reuse switch lowering here but we can't change
   // the CFG at this point.
   //
-  // memchr("\r\n", C, 2) != nullptr -> (C & ((1 << '\r') | (1 << '\n'))) != 0
+  // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n')))
+  // != 0
   //   after bounds check.
   if (!CharC && !Str.empty() && isOnlyUsedInZeroEqualityComparison(CI)) {
     unsigned char Max =
@@ -828,27 +841,20 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
   return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr");
 }
 
-Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
-  Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
-
-  if (LHS == RHS) // memcmp(s,s,x) -> 0
-    return Constant::getNullValue(CI->getType());
-
-  // Make sure we have a constant length.
-  ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
-  if (!LenC)
-    return nullptr;
-
-  uint64_t Len = LenC->getZExtValue();
+static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
+                                         uint64_t Len, IRBuilder<> &B,
+                                         const DataLayout &DL) {
   if (Len == 0) // memcmp(s1,s2,0) -> 0
     return Constant::getNullValue(CI->getType());
 
   // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
   if (Len == 1) {
-    Value *LHSV = B.CreateZExt(B.CreateLoad(castToCStr(LHS, B), "lhsc"),
-                               CI->getType(), "lhsv");
-    Value *RHSV = B.CreateZExt(B.CreateLoad(castToCStr(RHS, B), "rhsc"),
-                               CI->getType(), "rhsv");
+    Value *LHSV =
+        B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(LHS, B), "lhsc"),
+                     CI->getType(), "lhsv");
+    Value *RHSV =
+        B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(RHS, B), "rhsc"),
+                     CI->getType(), "rhsv");
     return B.CreateSub(LHSV, RHSV, "chardiff");
   }
 
@@ -878,12 +884,12 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
       if (!LHSV) {
         Type *LHSPtrTy =
             IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());
-        LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy), "lhsv");
+        LHSV = B.CreateLoad(IntType, B.CreateBitCast(LHS, LHSPtrTy), "lhsv");
       }
       if (!RHSV) {
         Type *RHSPtrTy =
             IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
-        RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy), "rhsv");
+        RHSV = B.CreateLoad(IntType, B.CreateBitCast(RHS, RHSPtrTy), "rhsv");
       }
       return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
     }
@@ -907,10 +913,48 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
       Ret = 1;
     return ConstantInt::get(CI->getType(), Ret);
   }
+  return nullptr;
+}
+
+// Most simplifications for memcmp also apply to bcmp.
+Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI,
+                                                   IRBuilder<> &B) {
+  Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
+  Value *Size = CI->getArgOperand(2);
+
+  if (LHS == RHS) // memcmp(s,s,x) -> 0
+    return Constant::getNullValue(CI->getType());
+
+  // Handle constant lengths.
+  if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size))
+    if (Value *Res = optimizeMemCmpConstantSize(CI, LHS, RHS,
+                                                LenC->getZExtValue(), B, DL))
+      return Res;
+
+  return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
+  if (Value *V = optimizeMemCmpBCmpCommon(CI, B))
+    return V;
+
+  // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0
+  // `bcmp` can be more efficient than memcmp because it only has to know that
+  // there is a difference, not where it is.
+  if (isOnlyUsedInZeroEqualityComparison(CI) && TLI->has(LibFunc_bcmp)) {
+    Value *LHS = CI->getArgOperand(0);
+    Value *RHS = CI->getArgOperand(1);
+    Value *Size = CI->getArgOperand(2);
+    return emitBCmp(LHS, RHS, Size, B, DL, TLI);
+  }
 
   return nullptr;
 }
 
+Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilder<> &B) {
+  return optimizeMemCmpBCmpCommon(CI, B);
+}
+
 Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
   // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n)
   B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
@@ -1031,7 +1075,8 @@ static Value *valueHasFloatPrecision(Value *Val) {
 /// Shrink double -> float functions.
 static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
                                bool isBinary, bool isPrecise = false) {
-  if (!CI->getType()->isDoubleTy())
+  Function *CalleeFn = CI->getCalledFunction();
+  if (!CI->getType()->isDoubleTy() || !CalleeFn)
     return nullptr;
 
   // If not all the uses of the function are converted to float, then bail out.
@@ -1051,15 +1096,16 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
   if (!V[0] || (isBinary && !V[1]))
     return nullptr;
 
+  StringRef CalleeNm = CalleeFn->getName();
+  AttributeList CalleeAt = CalleeFn->getAttributes();
+  bool CalleeIn = CalleeFn->isIntrinsic();
+
   // If call isn't an intrinsic, check that it isn't within a function with the
   // same name as the float version of this call, otherwise the result is an
   // infinite loop.  For example, from MinGW-w64:
   //
   // float expf(float val) { return (float) exp((double) val); }
-  Function *CalleeFn = CI->getCalledFunction();
-  StringRef CalleeNm = CalleeFn->getName();
-  AttributeList CalleeAt = CalleeFn->getAttributes();
-  if (CalleeFn && !CalleeFn->isIntrinsic()) {
+  if (!CalleeIn) {
     const Function *Fn = CI->getFunction();
     StringRef FnName = Fn->getName();
     if (FnName.back() == 'f' &&
@@ -1074,7 +1120,7 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
 
   // g((double) float) -> (double) gf(float)
   Value *R;
-  if (CalleeFn->isIntrinsic()) {
+  if (CalleeIn) {
     Module *M = CI->getModule();
     Intrinsic::ID IID = CalleeFn->getIntrinsicID();
     Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
@@ -1132,10 +1178,10 @@ static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func,
                                       IRBuilder<> &B) {
   if (!isa<FPMathOperator>(Call))
     return nullptr;
-  
+
   IRBuilder<>::FastMathFlagGuard Guard(B);
   B.setFastMathFlags(Call->getFastMathFlags());
-  
+
   // TODO: Can this be shared to also handle LLVM intrinsics?
   Value *X;
   switch (Func) {
@@ -1189,7 +1235,8 @@ static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
 }
 
 /// Use exp{,2}(x * y) for pow(exp{,2}(x), y);
-/// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x).
+/// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x);
+/// exp2(log2(n) * x) for pow(n, x).
 Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
   Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
   AttributeList Attrs = Pow->getCalledFunction()->getAttributes();
@@ -1276,12 +1323,12 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
     APFloat BaseR = APFloat(1.0);
     BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored);
     BaseR = BaseR / *BaseF;
-    bool IsInteger    = BaseF->isInteger(),
-         IsReciprocal = BaseR.isInteger();
+    bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger();
     const APFloat *NF = IsReciprocal ? &BaseR : BaseF;
     APSInt NI(64, false);
     if ((IsInteger || IsReciprocal) &&
-        !NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) &&
+        NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) ==
+            APFloat::opOK &&
         NI > 1 && NI.isPowerOf2()) {
       double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0);
       Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul");
@@ -1301,6 +1348,28 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
     return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f,
                                 LibFunc_exp10l, B, Attrs);
 
+  // pow(n, x) -> exp2(log2(n) * x)
+  if (Pow->hasOneUse() && Pow->hasApproxFunc() && Pow->hasNoNaNs() &&
+      Pow->hasNoInfs() && BaseF->isNormal() && !BaseF->isNegative()) {
+    Value *Log = nullptr;
+    if (Ty->isFloatTy())
+      Log = ConstantFP::get(Ty, std::log2(BaseF->convertToFloat()));
+    else if (Ty->isDoubleTy())
+      Log = ConstantFP::get(Ty, std::log2(BaseF->convertToDouble()));
+
+    if (Log) {
+      Value *FMul = B.CreateFMul(Log, Expo, "mul");
+      if (Pow->doesNotAccessMemory()) {
+        return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty),
+                            FMul, "exp2");
+      } else {
+        if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f,
+                            LibFunc_exp2l))
+          return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f,
+                                      LibFunc_exp2l, B, Attrs);
+      }
+    }
+  }
   return nullptr;
 }
 
@@ -1364,12 +1433,22 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
   return Sqrt;
 }
 
+static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M,
+                                           IRBuilder<> &B) {
+  Value *Args[] = {Base, Expo};
+  Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType());
+  return B.CreateCall(F, Args);
+}
+
 Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
-  Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+  Value *Base = Pow->getArgOperand(0);
+  Value *Expo = Pow->getArgOperand(1);
   Function *Callee = Pow->getCalledFunction();
   StringRef Name = Callee->getName();
   Type *Ty = Pow->getType();
+  Module *M = Pow->getModule();
   Value *Shrunk = nullptr;
+  bool AllowApprox = Pow->hasApproxFunc();
   bool Ignored;
 
   // Bail out if simplifying libcalls to pow() is disabled.
@@ -1382,8 +1461,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
 
   // Shrink pow() to powf() if the arguments are single precision,
   // unless the result is expected to be double precision.
-  if (UnsafeFPShrink &&
-      Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name))
+  if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) &&
+      hasFloatVersion(Name))
     Shrunk = optimizeBinaryDoubleFP(Pow, B, true);
 
   // Evaluate special cases related to the base.
@@ -1403,7 +1482,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
 
   // pow(x, 0.0) -> 1.0
   if (match(Expo, m_SpecificFP(0.0)))
-      return ConstantFP::get(Ty, 1.0);
+    return ConstantFP::get(Ty, 1.0);
 
   // pow(x, 1.0) -> x
   if (match(Expo, m_FPOne()))
@@ -1418,7 +1497,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
 
   // pow(x, n) -> x * x * x * ...
   const APFloat *ExpoF;
-  if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) {
+  if (AllowApprox && match(Expo, m_APFloat(ExpoF))) {
     // We limit to a max of 7 multiplications, thus the maximum exponent is 32.
     // If the exponent is an integer+0.5 we generate a call to sqrt and an
     // additional fmul.
@@ -1442,9 +1521,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
         if (!Expo2.isInteger())
           return nullptr;
 
-        Sqrt =
-            getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
-                        Pow->doesNotAccessMemory(), Pow->getModule(), B, TLI);
+        Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
+                           Pow->doesNotAccessMemory(), M, B, TLI);
       }
 
       // We will memoize intermediate products of the Addition Chain.
@@ -1467,6 +1545,29 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
 
       return FMul;
     }
+
+    APSInt IntExpo(32, /*isUnsigned=*/false);
+    // powf(x, n) -> powi(x, n) if n is a constant signed integer value
+    if (ExpoF->isInteger() &&
+        ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
+            APFloat::opOK) {
+      return createPowWithIntegerExponent(
+          Base, ConstantInt::get(B.getInt32Ty(), IntExpo), M, B);
+    }
+  }
+
+  // powf(x, itofp(y)) -> powi(x, y)
+  if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) {
+    Value *IntExpo = cast<Instruction>(Expo)->getOperand(0);
+    Value *NewExpo = nullptr;
+    unsigned BitWidth = IntExpo->getType()->getPrimitiveSizeInBits();
+    if (isa<SIToFPInst>(Expo) && BitWidth == 32)
+      NewExpo = IntExpo;
+    else if (BitWidth < 32)
+      NewExpo = isa<SIToFPInst>(Expo) ? B.CreateSExt(IntExpo, B.getInt32Ty())
+                                      : B.CreateZExt(IntExpo, B.getInt32Ty());
+    if (NewExpo)
+      return createPowWithIntegerExponent(Base, NewExpo, M, B);
   }
 
   return Shrunk;
@@ -1504,9 +1605,8 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
         One = ConstantExpr::getFPExtend(One, Op->getType());
 
       Module *M = CI->getModule();
-      Value *NewCallee =
-          M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(),
-                                 Op->getType(), B.getInt32Ty());
+      FunctionCallee NewCallee = M->getOrInsertFunction(
+          TLI->getName(LdExp), Op->getType(), Op->getType(), B.getInt32Ty());
       CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg});
       if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
         CI->setCallingConv(F->getCallingConv());
@@ -1518,40 +1618,30 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
 }
 
 Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
-  Function *Callee = CI->getCalledFunction();
   // If we can shrink the call to a float function rather than a double
   // function, do that first.
+  Function *Callee = CI->getCalledFunction();
   StringRef Name = Callee->getName();
   if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name))
     if (Value *Ret = optimizeBinaryDoubleFP(CI, B))
       return Ret;
 
+  // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to
+  // the intrinsics for improved optimization (for example, vectorization).
+  // No-signed-zeros is implied by the definitions of fmax/fmin themselves.
+  // From the C standard draft WG14/N1256:
+  // "Ideally, fmax would be sensitive to the sign of zero, for example
+  // fmax(-0.0, +0.0) would return +0; however, implementation in software
+  // might be impractical."
   IRBuilder<>::FastMathFlagGuard Guard(B);
-  FastMathFlags FMF;
-  if (CI->isFast()) {
-    // If the call is 'fast', then anything we create here will also be 'fast'.
-    FMF.setFast();
-  } else {
-    // At a minimum, no-nans-fp-math must be true.
-    if (!CI->hasNoNaNs())
-      return nullptr;
-    // No-signed-zeros is implied by the definitions of fmax/fmin themselves:
-    // "Ideally, fmax would be sensitive to the sign of zero, for example
-    // fmax(-0. 0, +0. 0) would return +0; however, implementation in software
-    // might be impractical."
-    FMF.setNoSignedZeros();
-    FMF.setNoNaNs();
-  }
+  FastMathFlags FMF = CI->getFastMathFlags();
+  FMF.setNoSignedZeros();
   B.setFastMathFlags(FMF);
 
-  // We have a relaxed floating-point environment. We can ignore NaN-handling
-  // and transform to a compare and select. We do not have to consider errno or
-  // exceptions, because fmin/fmax do not have those.
-  Value *Op0 = CI->getArgOperand(0);
-  Value *Op1 = CI->getArgOperand(1);
-  Value *Cmp = Callee->getName().startswith("fmin") ?
-    B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1);
-  return B.CreateSelect(Cmp, Op0, Op1);
+  Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum
+                                                           : Intrinsic::maxnum;
+  Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType());
+  return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) });
 }
 
 Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
@@ -1654,13 +1744,13 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
   // replace it with the fabs of that factor.
   Module *M = Callee->getParent();
   Type *ArgType = I->getType();
-  Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
+  Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
   Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs");
   if (OtherOp) {
     // If we found a non-repeated factor, we still need to get its square
     // root. We then multiply that by the value that was simplified out
     // of the square root calculation.
-    Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
+    Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
     Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt");
     return B.CreateFMul(FabsCall, SqrtCall);
   }
@@ -1728,8 +1818,8 @@ static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
   }
 
   Module *M = OrigCallee->getParent();
-  Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(),
-                                         ResTy, ArgTy);
+  FunctionCallee Callee =
+      M->getOrInsertFunction(Name, OrigCallee->getAttributes(), ResTy, ArgTy);
 
   if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
     // If the argument is an instruction, it must dominate all uses so put our
@@ -1840,8 +1930,8 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
   // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
   Value *Op = CI->getArgOperand(0);
   Type *ArgType = Op->getType();
-  Value *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
-                                       Intrinsic::cttz, ArgType);
+  Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
+                                          Intrinsic::cttz, ArgType);
   Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz");
   V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
   V = B.CreateIntCast(V, B.getInt32Ty(), false);
@@ -1854,8 +1944,8 @@ Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilder<> &B) {
   // fls(x) -> (i32)(sizeInBits(x) - llvm.ctlz(x, false))
   Value *Op = CI->getArgOperand(0);
   Type *ArgType = Op->getType();
-  Value *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
-                                       Intrinsic::ctlz, ArgType);
+  Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
+                                          Intrinsic::ctlz, ArgType);
   Value *V = B.CreateCall(F, {Op, B.getFalse()}, "ctlz");
   V = B.CreateSub(ConstantInt::get(V->getType(), ArgType->getIntegerBitWidth()),
                   V);
@@ -2026,13 +2116,27 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) {
   // arguments.
   if (TLI->has(LibFunc_iprintf) && !callHasFloatingPointArgument(CI)) {
     Module *M = B.GetInsertBlock()->getParent()->getParent();
-    Constant *IPrintFFn =
+    FunctionCallee IPrintFFn =
         M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
     CallInst *New = cast<CallInst>(CI->clone());
     New->setCalledFunction(IPrintFFn);
     B.Insert(New);
     return New;
   }
+
+  // printf(format, ...) -> __small_printf(format, ...) if no 128-bit floating point
+  // arguments.
+  if (TLI->has(LibFunc_small_printf) && !callHasFP128Argument(CI)) {
+    Module *M = B.GetInsertBlock()->getParent()->getParent();
+    auto SmallPrintFFn =
+        M->getOrInsertFunction(TLI->getName(LibFunc_small_printf),
+                               FT, Callee->getAttributes());
+    CallInst *New = cast<CallInst>(CI->clone());
+    New->setCalledFunction(SmallPrintFFn);
+    B.Insert(New);
+    return New;
+  }
+
   return nullptr;
 }
 
@@ -2077,7 +2181,8 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
   }
 
   if (FormatStr[1] == 's') {
-    // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
+    // sprintf(dest, "%s", str) -> llvm.memcpy(align 1 dest, align 1 str,
+    // strlen(str)+1)
     if (!CI->getArgOperand(2)->getType()->isPointerTy())
       return nullptr;
 
@@ -2105,13 +2210,27 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) {
   // point arguments.
   if (TLI->has(LibFunc_siprintf) && !callHasFloatingPointArgument(CI)) {
     Module *M = B.GetInsertBlock()->getParent()->getParent();
-    Constant *SIPrintFFn =
+    FunctionCallee SIPrintFFn =
         M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
     CallInst *New = cast<CallInst>(CI->clone());
     New->setCalledFunction(SIPrintFFn);
     B.Insert(New);
     return New;
   }
+
+  // sprintf(str, format, ...) -> __small_sprintf(str, format, ...) if no 128-bit
+  // floating point arguments.
+  if (TLI->has(LibFunc_small_sprintf) && !callHasFP128Argument(CI)) {
+    Module *M = B.GetInsertBlock()->getParent()->getParent();
+    auto SmallSPrintFFn =
+        M->getOrInsertFunction(TLI->getName(LibFunc_small_sprintf),
+                               FT, Callee->getAttributes());
+    CallInst *New = cast<CallInst>(CI->clone());
+    New->setCalledFunction(SmallSPrintFFn);
+    B.Insert(New);
+    return New;
+  }
+
   return nullptr;
 }
 
@@ -2140,7 +2259,7 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) {
     else if (N < FormatStr.size() + 1)
       return nullptr;
 
-    // sprintf(str, size, fmt) -> llvm.memcpy(align 1 str, align 1 fmt,
+    // snprintf(dst, size, fmt) -> llvm.memcpy(align 1 dst, align 1 fmt,
     // strlen(fmt)+1)
     B.CreateMemCpy(
         CI->getArgOperand(0), 1, CI->getArgOperand(2), 1,
@@ -2262,13 +2381,27 @@ Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilder<> &B) {
   // floating point arguments.
   if (TLI->has(LibFunc_fiprintf) && !callHasFloatingPointArgument(CI)) {
     Module *M = B.GetInsertBlock()->getParent()->getParent();
-    Constant *FIPrintFFn =
+    FunctionCallee FIPrintFFn =
         M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
     CallInst *New = cast<CallInst>(CI->clone());
     New->setCalledFunction(FIPrintFFn);
     B.Insert(New);
     return New;
   }
+
+  // fprintf(stream, format, ...) -> __small_fprintf(stream, format, ...) if no
+  // 128-bit floating point arguments.
+  if (TLI->has(LibFunc_small_fprintf) && !callHasFP128Argument(CI)) {
+    Module *M = B.GetInsertBlock()->getParent()->getParent();
+    auto SmallFPrintFFn =
+        M->getOrInsertFunction(TLI->getName(LibFunc_small_fprintf),
+                               FT, Callee->getAttributes());
+    CallInst *New = cast<CallInst>(CI->clone());
+    New->setCalledFunction(SmallFPrintFFn);
+    B.Insert(New);
+    return New;
+  }
+
   return nullptr;
 }
 
@@ -2288,7 +2421,8 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) {
     // If this is writing one byte, turn it into fputc.
     // This optimisation is only valid, if the return value is unused.
     if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
-      Value *Char = B.CreateLoad(castToCStr(CI->getArgOperand(0), B), "char");
+      Value *Char = B.CreateLoad(B.getInt8Ty(),
+                                 castToCStr(CI->getArgOperand(0), B), "char");
       Value *NewCI = emitFPutC(Char, CI->getArgOperand(3), B, TLI);
       return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
     }
@@ -2307,7 +2441,9 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
 
   // Don't rewrite fputs to fwrite when optimising for size because fwrite
   // requires more arguments and thus extra MOVs are required.
-  if (CI->getFunction()->optForSize())
+  bool OptForSize = CI->getFunction()->hasOptSize() ||
+                    llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
+  if (OptForSize)
     return nullptr;
 
   // Check if has any use
@@ -2320,7 +2456,7 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
       return nullptr;
   }
 
-  // fputs(s,F) --> fwrite(s,1,strlen(s),F)
+  // fputs(s,F) --> fwrite(s,strlen(s),1,F)
   uint64_t Len = GetStringLength(CI->getArgOperand(0));
   if (!Len)
     return nullptr;
@@ -2367,18 +2503,14 @@ Value *LibCallSimplifier::optimizeFRead(CallInst *CI, IRBuilder<> &B) {
 }
 
 Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
-  // Check for a constant string.
-  StringRef Str;
-  if (!getConstantStringInfo(CI->getArgOperand(0), Str))
+  if (!CI->use_empty())
     return nullptr;
 
-  if (Str.empty() && CI->use_empty()) {
-    // puts("") -> putchar('\n')
-    Value *Res = emitPutChar(B.getInt32('\n'), B, TLI);
-    if (CI->use_empty() || !Res)
-      return Res;
-    return B.CreateIntCast(Res, CI->getType(), true);
-  }
+  // Check for a constant string.
+  // puts("") -> putchar('\n')
+  StringRef Str;
+  if (getConstantStringInfo(CI->getArgOperand(0), Str) && Str.empty())
+    return emitPutChar(B.getInt32('\n'), B, TLI);
 
   return nullptr;
 }
@@ -2441,6 +2573,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
       return optimizeStrStr(CI, Builder);
     case LibFunc_memchr:
       return optimizeMemChr(CI, Builder);
+    case LibFunc_bcmp:
+      return optimizeBCmp(CI, Builder);
     case LibFunc_memcmp:
       return optimizeMemCmp(CI, Builder);
     case LibFunc_memcpy:
@@ -2686,9 +2820,10 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
 LibCallSimplifier::LibCallSimplifier(
     const DataLayout &DL, const TargetLibraryInfo *TLI,
     OptimizationRemarkEmitter &ORE,
+    BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
     function_ref<void(Instruction *, Value *)> Replacer,
     function_ref<void(Instruction *)> Eraser)
-    : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE),
+    : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), BFI(BFI), PSI(PSI),
       UnsafeFPShrink(false), Replacer(Replacer), Eraser(Eraser) {}
 
 void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
@@ -2735,12 +2870,23 @@ void LibCallSimplifier::eraseFromParent(Instruction *I) {
 // Fortified Library Call Optimizations
 //===----------------------------------------------------------------------===//
 
-bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
-                                                         unsigned ObjSizeOp,
-                                                         unsigned SizeOp,
-                                                         bool isString) {
-  if (CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(SizeOp))
+bool
+FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
+                                                    unsigned ObjSizeOp,
+                                                    Optional<unsigned> SizeOp,
+                                                    Optional<unsigned> StrOp,
+                                                    Optional<unsigned> FlagOp) {
+  // If this function takes a flag argument, the implementation may use it to
+  // perform extra checks. Don't fold into the non-checking variant.
+  if (FlagOp) {
+    ConstantInt *Flag = dyn_cast<ConstantInt>(CI->getArgOperand(*FlagOp));
+    if (!Flag || !Flag->isZero())
+      return false;
+  }
+
+  if (SizeOp && CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(*SizeOp))
     return true;
+
   if (ConstantInt *ObjSizeCI =
           dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) {
     if (ObjSizeCI->isMinusOne())
@@ -2748,23 +2894,27 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
     // If the object size wasn't -1 (unknown), bail out if we were asked to.
     if (OnlyLowerUnknownSize)
       return false;
-    if (isString) {
-      uint64_t Len = GetStringLength(CI->getArgOperand(SizeOp));
+    if (StrOp) {
+      uint64_t Len = GetStringLength(CI->getArgOperand(*StrOp));
       // If the length is 0 we don't know how long it is and so we can't
       // remove the check.
       if (Len == 0)
         return false;
       return ObjSizeCI->getZExtValue() >= Len;
     }
-    if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeOp)))
-      return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue();
+
+    if (SizeOp) {
+      if (ConstantInt *SizeCI =
+              dyn_cast<ConstantInt>(CI->getArgOperand(*SizeOp)))
+        return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue();
+    }
   }
   return false;
 }
 
 Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
                                                      IRBuilder<> &B) {
-  if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+  if (isFortifiedCallFoldable(CI, 3, 2)) {
     B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
                    CI->getArgOperand(2));
     return CI->getArgOperand(0);
@@ -2774,7 +2924,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
 
 Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
                                                       IRBuilder<> &B) {
-  if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+  if (isFortifiedCallFoldable(CI, 3, 2)) {
     B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
                     CI->getArgOperand(2));
     return CI->getArgOperand(0);
@@ -2786,7 +2936,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
                                                      IRBuilder<> &B) {
   // TODO: Try foldMallocMemset() here.
 
-  if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+  if (isFortifiedCallFoldable(CI, 3, 2)) {
     Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
     B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
     return CI->getArgOperand(0);
@@ -2797,8 +2947,6 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
 Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
                                                       IRBuilder<> &B,
                                                       LibFunc Func) {
-  Function *Callee = CI->getCalledFunction();
-  StringRef Name = Callee->getName();
   const DataLayout &DL = CI->getModule()->getDataLayout();
   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1),
         *ObjSize = CI->getArgOperand(2);
@@ -2814,8 +2962,12 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
   // st[rp]cpy_chk call which may fail at runtime if the size is too long.
   // TODO: It might be nice to get a maximum length out of the possible
   // string lengths for varying.
-  if (isFortifiedCallFoldable(CI, 2, 1, true))
-    return emitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6));
+  if (isFortifiedCallFoldable(CI, 2, None, 1)) {
+    if (Func == LibFunc_strcpy_chk)
+      return emitStrCpy(Dst, Src, B, TLI);
+    else
+      return emitStpCpy(Dst, Src, B, TLI);
+  }
 
   if (OnlyLowerUnknownSize)
     return nullptr;
@@ -2838,13 +2990,99 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
 Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
                                                        IRBuilder<> &B,
                                                        LibFunc Func) {
-  Function *Callee = CI->getCalledFunction();
-  StringRef Name = Callee->getName();
-  if (isFortifiedCallFoldable(CI, 3, 2, false)) {
-    Value *Ret = emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-                             CI->getArgOperand(2), B, TLI, Name.substr(2, 7));
-    return Ret;
+  if (isFortifiedCallFoldable(CI, 3, 2)) {
+    if (Func == LibFunc_strncpy_chk)
+      return emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                               CI->getArgOperand(2), B, TLI);
+    else
+      return emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                         CI->getArgOperand(2), B, TLI);
   }
+
+  return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI,
+                                                      IRBuilder<> &B) {
+  if (isFortifiedCallFoldable(CI, 4, 3))
+    return emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                       CI->getArgOperand(2), CI->getArgOperand(3), B, TLI);
+
+  return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI,
+                                                       IRBuilder<> &B) {
+  if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) {
+    SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 5, CI->arg_end());
+    return emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
+                        CI->getArgOperand(4), VariadicArgs, B, TLI);
+  }
+
+  return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI,
+                                                      IRBuilder<> &B) {
+  if (isFortifiedCallFoldable(CI, 2, None, None, 1)) {
+    SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 4, CI->arg_end());
+    return emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), VariadicArgs,
+                       B, TLI);
+  }
+
+  return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI,
+                                                     IRBuilder<> &B) {
+  if (isFortifiedCallFoldable(CI, 2))
+    return emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI);
+
+  return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI,
+                                                   IRBuilder<> &B) {
+  if (isFortifiedCallFoldable(CI, 3))
+    return emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1),
+                       CI->getArgOperand(2), B, TLI);
+
+  return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI,
+                                                      IRBuilder<> &B) {
+  if (isFortifiedCallFoldable(CI, 3))
+    return emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1),
+                       CI->getArgOperand(2), B, TLI);
+
+  return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI,
+                                                      IRBuilder<> &B) {
+  if (isFortifiedCallFoldable(CI, 3))
+    return emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                       CI->getArgOperand(2), B, TLI);
+
+  return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI,
+                                                        IRBuilder<> &B) {
+  if (isFortifiedCallFoldable(CI, 3, 1, None, 2))
+    return emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
+                         CI->getArgOperand(4), CI->getArgOperand(5), B, TLI);
+
+  return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI,
+                                                       IRBuilder<> &B) {
+  if (isFortifiedCallFoldable(CI, 2, None, None, 1))
+    return emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
+                        CI->getArgOperand(4), B, TLI);
+
   return nullptr;
 }
 
@@ -2892,6 +3130,24 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
   case LibFunc_stpncpy_chk:
   case LibFunc_strncpy_chk:
     return optimizeStrpNCpyChk(CI, Builder, Func);
+  case LibFunc_memccpy_chk:
+    return optimizeMemCCpyChk(CI, Builder);
+  case LibFunc_snprintf_chk:
+    return optimizeSNPrintfChk(CI, Builder);
+  case LibFunc_sprintf_chk:
+    return optimizeSPrintfChk(CI, Builder);
+  case LibFunc_strcat_chk:
+    return optimizeStrCatChk(CI, Builder);
+  case LibFunc_strlcat_chk:
+    return optimizeStrLCat(CI, Builder);
+  case LibFunc_strncat_chk:
+    return optimizeStrNCatChk(CI, Builder);
+  case LibFunc_strlcpy_chk:
+    return optimizeStrLCpyChk(CI, Builder);
+  case LibFunc_vsnprintf_chk:
+    return optimizeVSNPrintfChk(CI, Builder);
+  case LibFunc_vsprintf_chk:
+    return optimizeVSPrintfChk(CI, Builder);
   default:
     break;
   }
diff --git a/lib/Transforms/Utils/SizeOpts.cpp b/lib/Transforms/Utils/SizeOpts.cpp
new file mode 100644
index 000000000000..1519751197d2
--- /dev/null
+++ b/lib/Transforms/Utils/SizeOpts.cpp
@@ -0,0 +1,37 @@
+//===-- SizeOpts.cpp - code size optimization related code ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some shared code size optimization related code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
+using namespace llvm;
+
+static cl::opt<bool> ProfileGuidedSizeOpt(
+    "pgso", cl::Hidden, cl::init(true),
+    cl::desc("Enable the profile guided size optimization. "));
+
+bool llvm::shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI,
+                                 BlockFrequencyInfo *BFI) {
+  assert(F);
+  if (!PSI || !BFI || !PSI->hasProfileSummary())
+    return false;
+  return ProfileGuidedSizeOpt && PSI->isFunctionColdInCallGraph(F, *BFI);
+}
+
+bool llvm::shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI,
+                                 BlockFrequencyInfo *BFI) {
+  assert(BB);
+  if (!PSI || !BFI || !PSI->hasProfileSummary())
+    return false;
+  return ProfileGuidedSizeOpt && PSI->isColdBlock(BB, BFI);
+}
diff --git a/lib/Transforms/Utils/SplitModule.cpp b/lib/Transforms/Utils/SplitModule.cpp
index 5db4d2e4df9d..e2c387cb8983 100644
--- a/lib/Transforms/Utils/SplitModule.cpp
+++ b/lib/Transforms/Utils/SplitModule.cpp
@@ -1,9 +1,8 @@
 //===- SplitModule.cpp - Split a module into partitions -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Utils/StripGCRelocates.cpp b/lib/Transforms/Utils/StripGCRelocates.cpp
index ac0b519f4a77..50844cf9d1c5 100644
--- a/lib/Transforms/Utils/StripGCRelocates.cpp
+++ b/lib/Transforms/Utils/StripGCRelocates.cpp
@@ -1,9 +1,8 @@
 //===- StripGCRelocates.cpp - Remove gc.relocates inserted by RewriteStatePoints===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
index 8956a089a99c..97a4533fabe5 100644
--- a/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
+++ b/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
@@ -1,9 +1,8 @@
 //===- StripNonLineTableDebugInfo.cpp -- Strip parts of Debug Info --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp
index fd0da79487f1..456724779b43 100644
--- a/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -1,9 +1,8 @@
 //===- SymbolRewriter.cpp - Symbol Rewriter -------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index d49b26472548..7f7bdf8a3d6d 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -1,9 +1,8 @@
 //===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp
index 95416de07439..5272ab6e95d5 100644
--- a/lib/Transforms/Utils/Utils.cpp
+++ b/lib/Transforms/Utils/Utils.cpp
@@ -1,9 +1,8 @@
 //===-- Utils.cpp - TransformUtils Infrastructure -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -55,3 +54,6 @@ void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createPromoteMemoryToRegisterPass());
 }
 
+void LLVMAddAddDiscriminatorsPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createAddDiscriminatorsPass());
+}
diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
index 948d9bd5baad..a77bf50fe10b 100644
--- a/lib/Transforms/Utils/VNCoercion.cpp
+++ b/lib/Transforms/Utils/VNCoercion.cpp
@@ -14,13 +14,17 @@ namespace VNCoercion {
 /// Return true if coerceAvailableValueToLoadType will succeed.
 bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
                                      const DataLayout &DL) {
+  Type *StoredTy = StoredVal->getType();
+  if (StoredTy == LoadTy)
+    return true;
+
   // If the loaded or stored value is an first class array or struct, don't try
   // to transform them.  We need to be able to bitcast to integer.
-  if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
-      StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy())
+  if (LoadTy->isStructTy() || LoadTy->isArrayTy() || StoredTy->isStructTy() ||
+      StoredTy->isArrayTy())
     return false;
 
-  uint64_t StoreSize = DL.getTypeSizeInBits(StoredVal->getType());
+  uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy);
 
   // The store size must be byte-aligned to support future type casts.
   if (llvm::alignTo(StoreSize, 8) != StoreSize)
@@ -31,10 +35,16 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
     return false;
 
   // Don't coerce non-integral pointers to integers or vice versa.
-  if (DL.isNonIntegralPointerType(StoredVal->getType()) !=
-      DL.isNonIntegralPointerType(LoadTy))
+  if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) !=
+      DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
+    // As a special case, allow coercion of memset used to initialize
+    // an array w/null.  Despite non-integral pointers not generally having a
+    // specific bit pattern, we do assume null is zero.
+    if (auto *CI = dyn_cast<Constant>(StoredVal))
+      return CI->isNullValue();
     return false;
-
+  }
+  
   return true;
 }
 
@@ -207,11 +217,22 @@ static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
 /// memdep query of a load that ends up being a clobbering store.
 int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
                                    StoreInst *DepSI, const DataLayout &DL) {
+  auto *StoredVal = DepSI->getValueOperand();
+  
   // Cannot handle reading from store of first-class aggregate yet.
-  if (DepSI->getValueOperand()->getType()->isStructTy() ||
-      DepSI->getValueOperand()->getType()->isArrayTy())
+  if (StoredVal->getType()->isStructTy() ||
+      StoredVal->getType()->isArrayTy())
     return -1;
 
+  // Don't coerce non-integral pointers to integers or vice versa.
+  if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) !=
+      DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
+    // Allow casts of zero values to null as a special case
+    auto *CI = dyn_cast<Constant>(StoredVal);
+    if (!CI || !CI->isNullValue())
+      return -1;
+  }
+
   Value *StorePtr = DepSI->getPointerOperand();
   uint64_t StoreSize =
       DL.getTypeSizeInBits(DepSI->getValueOperand()->getType());
@@ -228,6 +249,11 @@ int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
   if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
     return -1;
 
+  // Don't coerce non-integral pointers to integers or vice versa.
+  if (DL.isNonIntegralPointerType(DepLI->getType()->getScalarType()) !=
+      DL.isNonIntegralPointerType(LoadTy->getScalarType()))
+    return -1;
+
   Value *DepPtr = DepLI->getPointerOperand();
   uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType());
   int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL);
@@ -264,9 +290,15 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
 
   // If this is memset, we just need to see if the offset is valid in the size
   // of the memset..
-  if (MI->getIntrinsicID() == Intrinsic::memset)
+  if (MI->getIntrinsicID() == Intrinsic::memset) {
+    if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
+      auto *CI = dyn_cast<ConstantInt>(cast<MemSetInst>(MI)->getValue());
+      if (!CI || !CI->isZero())
+        return -1;
+    }
     return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
                                           MemSizeInBits, DL);
+  }
 
   // If we have a memcpy/memmove, the only case we can handle is if this is a
   // copy from constant memory.  In that case, we can read directly from the
@@ -278,7 +310,7 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
     return -1;
 
   GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL));
-  if (!GV || !GV->isConstant())
+  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
     return -1;
 
   // See if the access is within the bounds of the transfer.
@@ -287,6 +319,12 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
   if (Offset == -1)
     return Offset;
 
+  // Don't coerce non-integral pointers to integers or vice versa, and the
+  // memtransfer is implicitly a raw byte code
+  if (DL.isNonIntegralPointerType(LoadTy->getScalarType()))
+    // TODO: Can allow nullptrs from constant zeros
+    return -1;
+
   unsigned AS = Src->getType()->getPointerAddressSpace();
   // Otherwise, see if we can constant fold a load from the constant with the
   // offset applied as appropriate.
@@ -386,12 +424,12 @@ Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy,
     // memdep queries will find the new load.  We can't easily remove the old
     // load completely because it is already in the value numbering table.
     IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
-    Type *DestPTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8);
-    DestPTy =
-        PointerType::get(DestPTy, PtrVal->getType()->getPointerAddressSpace());
+    Type *DestTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8);
+    Type *DestPTy =
+        PointerType::get(DestTy, PtrVal->getType()->getPointerAddressSpace());
     Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
     PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
-    LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
+    LoadInst *NewLoad = Builder.CreateLoad(DestTy, PtrVal);
     NewLoad->takeName(SrcVal);
     NewLoad->setAlignment(SrcVal->getAlignment());
 
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 55fff3f3872a..fbc3407c301f 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -1,9 +1,8 @@
 //===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -914,6 +913,21 @@ void Mapper::remapInstruction(Instruction *I) {
       Tys.push_back(TypeMapper->remapType(Ty));
     CS.mutateFunctionType(FunctionType::get(
         TypeMapper->remapType(I->getType()), Tys, FTy->isVarArg()));
+
+    LLVMContext &C = CS->getContext();
+    AttributeList Attrs = CS.getAttributes();
+    for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
+      if (Attrs.hasAttribute(i, Attribute::ByVal)) {
+        Type *Ty = Attrs.getAttribute(i, Attribute::ByVal).getValueAsType();
+        if (!Ty)
+          continue;
+
+        Attrs = Attrs.removeAttribute(C, i, Attribute::ByVal);
+        Attrs = Attrs.addAttribute(
+            C, i, Attribute::getWithByValType(C, TypeMapper->remapType(Ty)));
+      }
+    }
+    CS.setAttributes(Attrs);
     return;
   }
   if (auto *AI = dyn_cast<AllocaInst>(I))
diff --git a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 9ff18328c219..4273080ddd91 100644
--- a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -1,9 +1,8 @@
 //===- LoadStoreVectorizer.cpp - GPU Load & Store Vectorizer --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -927,7 +926,7 @@ bool Vectorizer::vectorizeStoreChain(
   StoreInst *S0 = cast<StoreInst>(Chain[0]);
 
   // If the vector has an int element, default to int for the whole store.
-  Type *StoreTy;
+  Type *StoreTy = nullptr;
   for (Instruction *I : Chain) {
     StoreTy = cast<StoreInst>(I)->getValueOperand()->getType();
     if (StoreTy->isIntOrIntVectorTy())
@@ -939,6 +938,7 @@ bool Vectorizer::vectorizeStoreChain(
       break;
     }
   }
+  assert(StoreTy && "Failed to find store type");
 
   unsigned Sz = DL.getTypeSizeInBits(StoreTy);
   unsigned AS = S0->getPointerAddressSpace();
@@ -1152,13 +1152,8 @@ bool Vectorizer::vectorizeLoadChain(
              vectorizeLoadChain(Chains.second, InstructionsProcessed);
     }
 
-    unsigned NewAlign = getOrEnforceKnownAlignment(L0->getPointerOperand(),
-                                                   StackAdjustedAlignment,
-                                                   DL, L0, nullptr, &DT);
-    if (NewAlign != 0)
-      Alignment = NewAlign;
-
-    Alignment = NewAlign;
+    Alignment = getOrEnforceKnownAlignment(
+        L0->getPointerOperand(), StackAdjustedAlignment, DL, L0, nullptr, &DT);
   }
 
   if (!TTI.isLegalToVectorizeLoadChain(SzInBytes, Alignment, AS)) {
@@ -1182,7 +1177,7 @@ bool Vectorizer::vectorizeLoadChain(
 
   Value *Bitcast =
       Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS));
-  LoadInst *LI = Builder.CreateAlignedLoad(Bitcast, Alignment);
+  LoadInst *LI = Builder.CreateAlignedLoad(VecTy, Bitcast, Alignment);
   propagateMetadata(LI, Chain);
 
   if (VecLoadTy) {
diff --git a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index b44fe5a52a2f..6ef8dc2d3cd7 100644
--- a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1,9 +1,8 @@
 //===- LoopVectorizationLegality.cpp --------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -23,6 +22,8 @@ using namespace llvm;
 #define LV_NAME "loop-vectorize"
 #define DEBUG_TYPE LV_NAME
 
+extern cl::opt<bool> EnableVPlanPredication;
+
 static cl::opt<bool>
     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
                        cl::desc("Enable if-conversion during vectorization."));
@@ -46,6 +47,18 @@ static const unsigned MaxInterleaveFactor = 16;
 
 namespace llvm {
 
+#ifndef NDEBUG
+static void debugVectorizationFailure(const StringRef DebugMsg,
+    Instruction *I) {
+  dbgs() << "LV: Not vectorizing: " << DebugMsg;
+  if (I != nullptr)
+    dbgs() << " " << *I;
+  else
+    dbgs() << '.';
+  dbgs() << '\n';
+}
+#endif
+
 OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
                                                   StringRef RemarkName,
                                                   Loop *TheLoop,
@@ -103,6 +116,25 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
              << "LV: Interleaving disabled by the pass manager\n");
 }
 
+void LoopVectorizeHints::setAlreadyVectorized() {
+  LLVMContext &Context = TheLoop->getHeader()->getContext();
+
+  MDNode *IsVectorizedMD = MDNode::get(
+      Context,
+      {MDString::get(Context, "llvm.loop.isvectorized"),
+       ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
+  MDNode *LoopID = TheLoop->getLoopID();
+  MDNode *NewLoopID =
+      makePostTransformationMetadata(Context, LoopID,
+                                     {Twine(Prefix(), "vectorize.").str(),
+                                      Twine(Prefix(), "interleave.").str()},
+                                     {IsVectorizedMD});
+  TheLoop->setLoopID(NewLoopID);
+
+  // Update internal cache.
+  IsVectorized.Value = 1;
+}
+
 bool LoopVectorizeHints::allowVectorization(
     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
   if (getForce() == LoopVectorizeHints::FK_Disabled) {
@@ -230,57 +262,6 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
   }
 }
 
-MDNode *LoopVectorizeHints::createHintMetadata(StringRef Name,
-                                               unsigned V) const {
-  LLVMContext &Context = TheLoop->getHeader()->getContext();
-  Metadata *MDs[] = {
-      MDString::get(Context, Name),
-      ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))};
-  return MDNode::get(Context, MDs);
-}
-
-bool LoopVectorizeHints::matchesHintMetadataName(MDNode *Node,
-                                                 ArrayRef<Hint> HintTypes) {
-  MDString *Name = dyn_cast<MDString>(Node->getOperand(0));
-  if (!Name)
-    return false;
-
-  for (auto H : HintTypes)
-    if (Name->getString().endswith(H.Name))
-      return true;
-  return false;
-}
-
-void LoopVectorizeHints::writeHintsToMetadata(ArrayRef<Hint> HintTypes) {
-  if (HintTypes.empty())
-    return;
-
-  // Reserve the first element to LoopID (see below).
-  SmallVector<Metadata *, 4> MDs(1);
-  // If the loop already has metadata, then ignore the existing operands.
-  MDNode *LoopID = TheLoop->getLoopID();
-  if (LoopID) {
-    for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
-      MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
-      // If node in update list, ignore old value.
-      if (!matchesHintMetadataName(Node, HintTypes))
-        MDs.push_back(Node);
-    }
-  }
-
-  // Now, add the missing hints.
-  for (auto H : HintTypes)
-    MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value));
-
-  // Replace current metadata node with new one.
-  LLVMContext &Context = TheLoop->getHeader()->getContext();
-  MDNode *NewLoopID = MDNode::get(Context, MDs);
-  // Set operand 0 to refer to the loop id itself.
-  NewLoopID->replaceOperandWith(0, NewLoopID);
-
-  TheLoop->setLoopID(NewLoopID);
-}
-
 bool LoopVectorizationRequirements::doesNotMeet(
     Function *F, Loop *L, const LoopVectorizeHints &Hints) {
   const char *PassName = Hints.vectorizeAnalysisPassName();
@@ -464,6 +445,14 @@ bool LoopVectorizationLegality::isUniform(Value *V) {
   return LAI->isUniform(V);
 }
 
+void LoopVectorizationLegality::reportVectorizationFailure(
+    const StringRef DebugMsg, const StringRef OREMsg,
+    const StringRef ORETag, Instruction *I) const {
+  LLVM_DEBUG(debugVectorizationFailure(DebugMsg, I));
+  ORE->emit(createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
+      ORETag, TheLoop, I) << OREMsg);
+}
+
 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
   assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
   // Store the result and return it at the end instead of exiting early, in case
@@ -476,9 +465,9 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
     // not supported yet.
     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
     if (!Br) {
-      LLVM_DEBUG(dbgs() << "LV: Unsupported basic block terminator.\n");
-      ORE->emit(createMissedAnalysis("CFGNotUnderstood")
-                << "loop control flow is not understood by vectorizer");
+      reportVectorizationFailure("Unsupported basic block terminator",
+          "loop control flow is not understood by vectorizer",
+          "CFGNotUnderstood");
       if (DoExtraAnalysis)
         Result = false;
       else
@@ -488,13 +477,16 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
     // Check whether the BranchInst is a supported one. Only unconditional
     // branches, conditional branches with an outer loop invariant condition or
     // backedges are supported.
-    if (Br && Br->isConditional() &&
+    // FIXME: We skip these checks when VPlan predication is enabled as we
+    // want to allow divergent branches. This whole check will be removed
+    // once VPlan predication is on by default.
+    if (!EnableVPlanPredication && Br && Br->isConditional() &&
         !TheLoop->isLoopInvariant(Br->getCondition()) &&
         !LI->isLoopHeader(Br->getSuccessor(0)) &&
         !LI->isLoopHeader(Br->getSuccessor(1))) {
-      LLVM_DEBUG(dbgs() << "LV: Unsupported conditional branch.\n");
-      ORE->emit(createMissedAnalysis("CFGNotUnderstood")
-                << "loop control flow is not understood by vectorizer");
+      reportVectorizationFailure("Unsupported conditional branch",
+          "loop control flow is not understood by vectorizer",
+          "CFGNotUnderstood");
       if (DoExtraAnalysis)
         Result = false;
       else
@@ -506,11 +498,9 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
   // simple outer loops scenarios with uniform nested loops.
   if (!isUniformLoopNest(TheLoop /*loop nest*/,
                          TheLoop /*context outer loop*/)) {
-    LLVM_DEBUG(
-        dbgs()
-        << "LV: Not vectorizing: Outer loop contains divergent loops.\n");
-    ORE->emit(createMissedAnalysis("CFGNotUnderstood")
-              << "loop control flow is not understood by vectorizer");
+    reportVectorizationFailure("Outer loop contains divergent loops",
+        "loop control flow is not understood by vectorizer",
+        "CFGNotUnderstood");
     if (DoExtraAnalysis)
       Result = false;
     else
@@ -519,10 +509,9 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
 
   // Check whether we are able to set up outer loop induction.
   if (!setupOuterLoopInductions()) {
-    LLVM_DEBUG(
-        dbgs() << "LV: Not vectorizing: Unsupported outer loop Phi(s).\n");
-    ORE->emit(createMissedAnalysis("UnsupportedPhi")
-              << "Unsupported outer loop Phi(s)");
+    reportVectorizationFailure("Unsupported outer loop Phi(s)",
+                               "Unsupported outer loop Phi(s)",
+                               "UnsupportedPhi");
     if (DoExtraAnalysis)
       Result = false;
     else
@@ -627,9 +616,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
         // Check that this PHI type is allowed.
         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
             !PhiTy->isPointerTy()) {
-          ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
-                    << "loop control flow is not understood by vectorizer");
-          LLVM_DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
+          reportVectorizationFailure("Found a non-int non-pointer PHI",
+                                     "loop control flow is not understood by vectorizer",
+                                     "CFGNotUnderstood");
           return false;
         }
 
@@ -647,9 +636,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
 
         // We only allow if-converted PHIs with exactly two incoming values.
         if (Phi->getNumIncomingValues() != 2) {
-          ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
-                    << "control flow not understood by vectorizer");
-          LLVM_DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
+          reportVectorizationFailure("Found an invalid PHI",
+              "loop control flow is not understood by vectorizer",
+              "CFGNotUnderstood", Phi);
           return false;
         }
 
@@ -698,10 +687,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           continue;
         }
 
-        ORE->emit(createMissedAnalysis("NonReductionValueUsedOutsideLoop", Phi)
-                  << "value that could not be identified as "
-                     "reduction is used outside the loop");
-        LLVM_DEBUG(dbgs() << "LV: Found an unidentified PHI." << *Phi << "\n");
+        reportVectorizationFailure("Found an unidentified PHI",
+            "value that could not be identified as "
+            "reduction is used outside the loop",
+            "NonReductionValueUsedOutsideLoop", Phi);
         return false;
       } // end of PHI handling
 
@@ -728,31 +717,33 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           // but it's hard to provide meaningful yet generic advice.
           // Also, should this be guarded by allowExtraAnalysis() and/or be part
           // of the returned info from isFunctionVectorizable()?
-          ORE->emit(createMissedAnalysis("CantVectorizeLibcall", CI)
-              << "library call cannot be vectorized. "
-                 "Try compiling with -fno-math-errno, -ffast-math, "
-                 "or similar flags");
+          reportVectorizationFailure("Found a non-intrinsic callsite",
+              "library call cannot be vectorized. "
+              "Try compiling with -fno-math-errno, -ffast-math, "
+              "or similar flags",
+              "CantVectorizeLibcall", CI);
         } else {
-          ORE->emit(createMissedAnalysis("CantVectorizeCall", CI)
-                    << "call instruction cannot be vectorized");
+          reportVectorizationFailure("Found a non-intrinsic callsite",
+                                     "call instruction cannot be vectorized",
+                                     "CantVectorizeLibcall", CI);
         }
-        LLVM_DEBUG(
-            dbgs() << "LV: Found a non-intrinsic callsite.\n");
         return false;
       }
 
-      // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the
-      // second argument is the same (i.e. loop invariant)
-      if (CI && hasVectorInstrinsicScalarOpd(
-                    getVectorIntrinsicIDForCall(CI, TLI), 1)) {
+      // Some intrinsics have scalar arguments and should be same in order for
+      // them to be vectorized (i.e. loop invariant).
+      if (CI) {
         auto *SE = PSE.getSE();
-        if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) {
-          ORE->emit(createMissedAnalysis("CantVectorizeIntrinsic", CI)
-                    << "intrinsic instruction cannot be vectorized");
-          LLVM_DEBUG(dbgs()
-                     << "LV: Found unvectorizable intrinsic " << *CI << "\n");
-          return false;
-        }
+        Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
+        for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
+          if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
+            if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
+              reportVectorizationFailure("Found unvectorizable intrinsic",
+                  "intrinsic instruction cannot be vectorized",
+                  "CantVectorizeIntrinsic", CI);
+              return false;
+            }
+          }
       }
 
       // Check that the instruction return type is vectorizable.
@@ -760,9 +751,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       if ((!VectorType::isValidElementType(I.getType()) &&
            !I.getType()->isVoidTy()) ||
           isa<ExtractElementInst>(I)) {
-        ORE->emit(createMissedAnalysis("CantVectorizeInstructionReturnType", &I)
-                  << "instruction return type cannot be vectorized");
-        LLVM_DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
+        reportVectorizationFailure("Found unvectorizable type",
+            "instruction return type cannot be vectorized",
+            "CantVectorizeInstructionReturnType", &I);
         return false;
       }
 
@@ -770,11 +761,44 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       if (auto *ST = dyn_cast<StoreInst>(&I)) {
         Type *T = ST->getValueOperand()->getType();
         if (!VectorType::isValidElementType(T)) {
-          ORE->emit(createMissedAnalysis("CantVectorizeStore", ST)
-                    << "store instruction cannot be vectorized");
+          reportVectorizationFailure("Store instruction cannot be vectorized",
+                                     "store instruction cannot be vectorized",
+                                     "CantVectorizeStore", ST);
           return false;
         }
 
+        // For nontemporal stores, check that a nontemporal vector version is
+        // supported on the target.
+        if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
+          // Arbitrarily try a vector of 2 elements.
+          Type *VecTy = VectorType::get(T, /*NumElements=*/2);
+          assert(VecTy && "did not find vectorized version of stored type");
+          unsigned Alignment = getLoadStoreAlignment(ST);
+          if (!TTI->isLegalNTStore(VecTy, Alignment)) {
+            reportVectorizationFailure(
+                "nontemporal store instruction cannot be vectorized",
+                "nontemporal store instruction cannot be vectorized",
+                "CantVectorizeNontemporalStore", ST);
+            return false;
+          }
+        }
+
+      } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
+        if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
+          // For nontemporal loads, check that a nontemporal vector version is
+          // supported on the target (arbitrarily try a vector of 2 elements).
+          Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
+          assert(VecTy && "did not find vectorized version of load type");
+          unsigned Alignment = getLoadStoreAlignment(LD);
+          if (!TTI->isLegalNTLoad(VecTy, Alignment)) {
+            reportVectorizationFailure(
+                "nontemporal load instruction cannot be vectorized",
+                "nontemporal load instruction cannot be vectorized",
+                "CantVectorizeNontemporalLoad", LD);
+            return false;
+          }
+        }
+
         // FP instructions can allow unsafe algebra, thus vectorizable by
         // non-IEEE-754 compliant SIMD units.
         // This applies to floating-point math operations and calls, not memory
@@ -797,23 +821,27 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           AllowedExit.insert(&I);
           continue;
         }
-        ORE->emit(createMissedAnalysis("ValueUsedOutsideLoop", &I)
-                  << "value cannot be used outside the loop");
+        reportVectorizationFailure("Value cannot be used outside the loop",
+                                   "value cannot be used outside the loop",
+                                   "ValueUsedOutsideLoop", &I);
         return false;
       }
     } // next instr.
   }
 
   if (!PrimaryInduction) {
-    LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
     if (Inductions.empty()) {
-      ORE->emit(createMissedAnalysis("NoInductionVariable")
-                << "loop induction variable could not be identified");
+      reportVectorizationFailure("Did not find one integer induction var",
+          "loop induction variable could not be identified",
+          "NoInductionVariable");
       return false;
     } else if (!WidestIndTy) {
-      ORE->emit(createMissedAnalysis("NoIntegerInductionVariable")
-                << "integer loop induction variable could not be identified");
+      reportVectorizationFailure("Did not find one integer induction var",
+          "integer loop induction variable could not be identified",
+          "NoIntegerInductionVariable");
       return false;
+    } else {
+      LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
     }
   }
 
@@ -839,11 +867,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
     return false;
 
   if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
-    ORE->emit(createMissedAnalysis("CantVectorizeStoreToLoopInvariantAddress")
-              << "write to a loop invariant address could not "
-                 "be vectorized");
-    LLVM_DEBUG(
-        dbgs() << "LV: Non vectorizable stores to a uniform address\n");
+    reportVectorizationFailure("Stores to a uniform address",
+        "write to a loop invariant address could not be vectorized",
+        "CantVectorizeStoreToLoopInvariantAddress");
     return false;
   }
   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
@@ -925,8 +951,9 @@ bool LoopVectorizationLegality::blockCanBePredicated(
 
 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
   if (!EnableIfConversion) {
-    ORE->emit(createMissedAnalysis("IfConversionDisabled")
-              << "if-conversion is disabled");
+    reportVectorizationFailure("If-conversion is disabled",
+                               "if-conversion is disabled",
+                               "IfConversionDisabled");
     return false;
   }
 
@@ -950,21 +977,26 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
   for (BasicBlock *BB : TheLoop->blocks()) {
     // We don't support switch statements inside loops.
     if (!isa<BranchInst>(BB->getTerminator())) {
-      ORE->emit(createMissedAnalysis("LoopContainsSwitch", BB->getTerminator())
-                << "loop contains a switch statement");
+      reportVectorizationFailure("Loop contains a switch statement",
+                                 "loop contains a switch statement",
+                                 "LoopContainsSwitch", BB->getTerminator());
       return false;
     }
 
     // We must be able to predicate all blocks that need to be predicated.
     if (blockNeedsPredication(BB)) {
       if (!blockCanBePredicated(BB, SafePointes)) {
-        ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
-                  << "control flow cannot be substituted for a select");
+        reportVectorizationFailure(
+            "Control flow cannot be substituted for a select",
+            "control flow cannot be substituted for a select",
+            "NoCFGForSelect", BB->getTerminator());
         return false;
       }
     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
-      ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
-                << "control flow cannot be substituted for a select");
+      reportVectorizationFailure(
+          "Control flow cannot be substituted for a select",
+          "control flow cannot be substituted for a select",
+          "NoCFGForSelect", BB->getTerminator());
       return false;
     }
   }
@@ -992,9 +1024,9 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
   // We must have a loop in canonical form. Loops with indirectbr in them cannot
   // be canonicalized.
   if (!Lp->getLoopPreheader()) {
-    LLVM_DEBUG(dbgs() << "LV: Loop doesn't have a legal pre-header.\n");
-    ORE->emit(createMissedAnalysis("CFGNotUnderstood")
-              << "loop control flow is not understood by vectorizer");
+    reportVectorizationFailure("Loop doesn't have a legal pre-header",
+        "loop control flow is not understood by vectorizer",
+        "CFGNotUnderstood");
     if (DoExtraAnalysis)
       Result = false;
     else
@@ -1003,8 +1035,9 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
 
   // We must have a single backedge.
   if (Lp->getNumBackEdges() != 1) {
-    ORE->emit(createMissedAnalysis("CFGNotUnderstood")
-              << "loop control flow is not understood by vectorizer");
+    reportVectorizationFailure("The loop must have a single backedge",
+        "loop control flow is not understood by vectorizer",
+        "CFGNotUnderstood");
     if (DoExtraAnalysis)
       Result = false;
     else
@@ -1013,8 +1046,9 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
 
   // We must have a single exiting block.
   if (!Lp->getExitingBlock()) {
-    ORE->emit(createMissedAnalysis("CFGNotUnderstood")
-              << "loop control flow is not understood by vectorizer");
+    reportVectorizationFailure("The loop must have an exiting block",
+        "loop control flow is not understood by vectorizer",
+        "CFGNotUnderstood");
     if (DoExtraAnalysis)
       Result = false;
     else
@@ -1025,8 +1059,9 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
   // checked at the end of each iteration. With that we can assume that all
   // instructions in the loop are executed the same number of times.
   if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
-    ORE->emit(createMissedAnalysis("CFGNotUnderstood")
-              << "loop control flow is not understood by vectorizer");
+    reportVectorizationFailure("The exiting block is not the loop latch",
+        "loop control flow is not understood by vectorizer",
+        "CFGNotUnderstood");
     if (DoExtraAnalysis)
       Result = false;
     else
@@ -1087,7 +1122,9 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
 
     if (!canVectorizeOuterLoop()) {
-      LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Unsupported outer loop.\n");
+      reportVectorizationFailure("Unsupported outer loop",
+                                 "unsupported outer loop",
+                                 "UnsupportedOuterLoop");
       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
       // outer loops.
       return false;
@@ -1137,10 +1174,9 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
 
   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
-    ORE->emit(createMissedAnalysis("TooManySCEVRunTimeChecks")
-              << "Too many SCEV assumptions need to be made and checked "
-              << "at runtime");
-    LLVM_DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n");
+    reportVectorizationFailure("Too many SCEV checks needed",
+        "Too many SCEV assumptions need to be made and checked at runtime",
+        "TooManySCEVRunTimeChecks");
     if (DoExtraAnalysis)
       Result = false;
     else
@@ -1159,20 +1195,20 @@ bool LoopVectorizationLegality::canFoldTailByMasking() {
   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
 
   if (!PrimaryInduction) {
-    ORE->emit(createMissedAnalysis("NoPrimaryInduction")
-              << "Missing a primary induction variable in the loop, which is "
-              << "needed in order to fold tail by masking as required.");
-    LLVM_DEBUG(dbgs() << "LV: No primary induction, cannot fold tail by "
-                      << "masking.\n");
+    reportVectorizationFailure(
+        "No primary induction, cannot fold tail by masking",
+        "Missing a primary induction variable in the loop, which is "
+        "needed in order to fold tail by masking as required.",
+        "NoPrimaryInduction");
     return false;
   }
 
   // TODO: handle reductions when tail is folded by masking.
   if (!Reductions.empty()) {
-    ORE->emit(createMissedAnalysis("ReductionFoldingTailByMasking")
-              << "Cannot fold tail by masking in the presence of reductions.");
-    LLVM_DEBUG(dbgs() << "LV: Loop has reductions, cannot fold tail by "
-                      << "masking.\n");
+    reportVectorizationFailure(
+        "Loop has reductions, cannot fold tail by masking",
+        "Cannot fold tail by masking in the presence of reductions.",
+        "ReductionFoldingTailByMasking");
     return false;
   }
 
@@ -1183,10 +1219,10 @@ bool LoopVectorizationLegality::canFoldTailByMasking() {
       Instruction *UI = cast<Instruction>(U);
       if (TheLoop->contains(UI))
         continue;
-      ORE->emit(createMissedAnalysis("LiveOutFoldingTailByMasking")
-                << "Cannot fold tail by masking in the presence of live outs.");
-      LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop has an "
-                        << "outside user for : " << *UI << '\n');
+      reportVectorizationFailure(
+          "Cannot fold tail by masking, loop has an outside user for",
+          "Cannot fold tail by masking in the presence of live outs.",
+          "LiveOutFoldingTailByMasking", UI);
       return false;
     }
   }
@@ -1198,9 +1234,10 @@ bool LoopVectorizationLegality::canFoldTailByMasking() {
   // do not need predication such as the header block.
   for (BasicBlock *BB : TheLoop->blocks()) {
     if (!blockCanBePredicated(BB, SafePointers)) {
-      ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
-                << "control flow cannot be substituted for a select");
-      LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as required.\n");
+      reportVectorizationFailure(
+          "Cannot fold tail by masking as required",
+          "control flow cannot be substituted for a select",
+          "NoCFGForSelect", BB->getTerminator());
       return false;
     }
   }
diff --git a/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 2aa219064299..97077cce83e3 100644
--- a/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -1,9 +1,8 @@
 //===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -172,6 +171,13 @@ struct VectorizationFactor {
   unsigned Width;
   // Cost of the loop with that width
   unsigned Cost;
+
+  // Width 1 means no vectorization, cost 0 means uncomputed cost.
+  static VectorizationFactor Disabled() { return {1, 0}; }
+
+  bool operator==(const VectorizationFactor &rhs) const {
+    return Width == rhs.Width && Cost == rhs.Cost;
+  }
 };
 
 /// Planner drives the vectorization process after having passed
@@ -192,11 +198,9 @@ class LoopVectorizationPlanner {
   /// The legality analysis.
   LoopVectorizationLegality *Legal;
 
-  /// The profitablity analysis.
+  /// The profitability analysis.
   LoopVectorizationCostModel &CM;
 
-  using VPlanPtr = std::unique_ptr<VPlan>;
-
   SmallVector<VPlanPtr, 4> VPlans;
 
   /// This class is used to enable the VPlan to invoke a method of ILV. This is
@@ -222,8 +226,9 @@ public:
                            LoopVectorizationCostModel &CM)
       : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {}
 
-  /// Plan how to best vectorize, return the best VF and its cost.
-  VectorizationFactor plan(bool OptForSize, unsigned UserVF);
+  /// Plan how to best vectorize, return the best VF and its cost, or None if
+  /// vectorization and interleaving should be avoided up front.
+  Optional<VectorizationFactor> plan(bool OptForSize, unsigned UserVF);
 
   /// Use the VPlan-native path to plan how to best vectorize, return the best
   /// VF and its cost.
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index c45dee590b84..46265e3f3e13 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1,9 +1,8 @@
 //===- LoopVectorize.cpp - A Loop Vectorizer ------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -57,8 +56,10 @@
 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
 #include "LoopVectorizationPlanner.h"
 #include "VPRecipeBuilder.h"
+#include "VPlan.h"
 #include "VPlanHCFGBuilder.h"
 #include "VPlanHCFGTransforms.h"
+#include "VPlanPredicator.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
@@ -86,7 +87,9 @@
 #include "llvm/Analysis/LoopAnalysisManager.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -133,6 +136,7 @@
 #include "llvm/Transforms/Utils/LoopSimplify.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
 #include <algorithm>
 #include <cassert>
@@ -256,6 +260,13 @@ cl::opt<bool> EnableVPlanNativePath(
     cl::desc("Enable VPlan-native vectorization path with "
              "support for outer loop vectorization."));
 
+// FIXME: Remove this switch once we have divergence analysis. Currently we
+// assume divergent non-backedge branches when this switch is true.
+cl::opt<bool> EnableVPlanPredication(
+    "enable-vplan-predication", cl::init(false), cl::Hidden,
+    cl::desc("Enable VPlan-native vectorization path predicator with "
+             "support for outer loop vectorization."));
+
 // This flag enables the stress testing of the VPlan H-CFG construction in the
 // VPlan-native vectorization path. It must be used in conjuction with
 // -enable-vplan-native-path. -vplan-verify-hcfg can also be used to enable the
@@ -267,6 +278,13 @@ static cl::opt<bool> VPlanBuildStressTest(
         "out right after the build (stress test the VPlan H-CFG construction "
         "in the VPlan-native vectorization path)."));
 
+cl::opt<bool> llvm::EnableLoopInterleaving(
+    "interleave-loops", cl::init(true), cl::Hidden,
+    cl::desc("Enable loop interleaving in Loop vectorization passes"));
+cl::opt<bool> llvm::EnableLoopVectorization(
+    "vectorize-loops", cl::init(true), cl::Hidden,
+    cl::desc("Run the Loop vectorization passes"));
+
 /// A helper function for converting Scalar types to vector types.
 /// If the incoming type is void, we return void. If the VF is 1, we return
 /// the scalar type.
@@ -311,11 +329,14 @@ static unsigned getReciprocalPredBlockProb() { return 2; }
 
 /// A helper function that adds a 'fast' flag to floating-point operations.
 static Value *addFastMathFlag(Value *V) {
-  if (isa<FPMathOperator>(V)) {
-    FastMathFlags Flags;
-    Flags.setFast();
-    cast<Instruction>(V)->setFastMathFlags(Flags);
-  }
+  if (isa<FPMathOperator>(V))
+    cast<Instruction>(V)->setFastMathFlags(FastMathFlags::getFast());
+  return V;
+}
+
+static Value *addFastMathFlag(Value *V, FastMathFlags FMF) {
+  if (isa<FPMathOperator>(V))
+    cast<Instruction>(V)->setFastMathFlags(FMF);
   return V;
 }
 
@@ -760,7 +781,7 @@ void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr)
     const DILocation *DIL = Inst->getDebugLoc();
     if (DIL && Inst->getFunction()->isDebugInfoForProfiling() &&
         !isa<DbgInfoIntrinsic>(Inst)) {
-      auto NewDIL = DIL->cloneWithDuplicationFactor(UF * VF);
+      auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(UF * VF);
       if (NewDIL)
         B.SetCurrentDebugLocation(NewDIL.getValue());
       else
@@ -836,7 +857,7 @@ public:
     AC(AC), ORE(ORE), TheFunction(F), Hints(Hints), InterleaveInfo(IAI) {}
 
   /// \return An upper bound for the vectorization factor, or None if
-  /// vectorization should be avoided up front.
+  /// vectorization and interleaving should be avoided up front.
   Optional<unsigned> computeMaxVF(bool OptForSize);
 
   /// \return The most profitable vectorization factor and the cost of that VF.
@@ -1149,6 +1170,18 @@ public:
     return foldTailByMasking() || Legal->blockNeedsPredication(BB);
   }
 
+  /// Estimate cost of an intrinsic call instruction CI if it were vectorized
+  /// with factor VF.  Return the cost of the instruction, including
+  /// scalarization overhead if it's needed.
+  unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF);
+
+  /// Estimate cost of a call instruction CI if it were vectorized with factor
+  /// VF. Return the cost of the instruction, including scalarization overhead
+  /// if it's needed. The flag NeedToScalarize shows if the call needs to be
+  /// scalarized -
+  /// i.e. either vector version isn't available, or is too expensive.
+  unsigned getVectorCallCost(CallInst *CI, unsigned VF, bool &NeedToScalarize);
+
 private:
   unsigned NumPredStores = 0;
 
@@ -1201,6 +1234,10 @@ private:
   /// element)
   unsigned getUniformMemOpCost(Instruction *I, unsigned VF);
 
+  /// Estimate the overhead of scalarizing an instruction. This is a
+  /// convenience wrapper for the type-based getScalarizationOverhead API.
+  unsigned getScalarizationOverhead(Instruction *I, unsigned VF);
+
   /// Returns whether the instruction is a load or store and will be a emitted
   /// as a vector operation.
   bool isConsecutiveLoadOrStore(Instruction *I);
@@ -1295,6 +1332,30 @@ private:
 
   DecisionList WideningDecisions;
 
+  /// Returns true if \p V is expected to be vectorized and it needs to be
+  /// extracted.
+  bool needsExtract(Value *V, unsigned VF) const {
+    Instruction *I = dyn_cast<Instruction>(V);
+    if (VF == 1 || !I || !TheLoop->contains(I) || TheLoop->isLoopInvariant(I))
+      return false;
+
+    // Assume we can vectorize V (and hence we need extraction) if the
+    // scalars are not computed yet. This can happen, because it is called
+    // via getScalarizationOverhead from setCostBasedWideningDecision, before
+    // the scalars are collected. That should be a safe assumption in most
+    // cases, because we check if the operands have vectorizable types
+    // beforehand in LoopVectorizationLegality.
+    return Scalars.find(VF) == Scalars.end() ||
+           !isScalarAfterVectorization(I, VF);
+  };
+
+  /// Returns a range containing only operands needing to be extracted.
+  SmallVector<Value *, 4> filterExtractingOperands(Instruction::op_range Ops,
+                                                   unsigned VF) {
+    return SmallVector<Value *, 4>(make_filter_range(
+        Ops, [this, VF](Value *V) { return this->needsExtract(V, VF); }));
+  }
+
 public:
   /// The loop that we evaluate.
   Loop *TheLoop;
@@ -1372,12 +1433,6 @@ static bool isExplicitVecOuterLoop(Loop *OuterLp,
     return false;
   }
 
-  if (!Hints.getWidth()) {
-    LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No user vector width.\n");
-    Hints.emitRemarkWithHints();
-    return false;
-  }
-
   if (Hints.getInterleave() > 1) {
     // TODO: Interleave support is future work.
     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Interleave is not supported for "
@@ -1447,12 +1502,13 @@ struct LoopVectorize : public FunctionPass {
     auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
     auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
     auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+    auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
 
     std::function<const LoopAccessInfo &(Loop &)> GetLAA =
         [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
 
     return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC,
-                        GetLAA, *ORE);
+                        GetLAA, *ORE, PSI);
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -1478,6 +1534,7 @@ struct LoopVectorize : public FunctionPass {
 
     AU.addPreserved<BasicAAWrapperPass>();
     AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
   }
 };
 
@@ -2051,7 +2108,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
     //       A[i]   = b;     // Member of index 0
     //       A[i+2] = c;     // Member of index 2 (Current instruction)
     // Current pointer is pointed to A[i+2], adjust it to A[i].
-    NewPtr = Builder.CreateGEP(NewPtr, Builder.getInt32(-Index));
+    NewPtr = Builder.CreateGEP(ScalarTy, NewPtr, Builder.getInt32(-Index));
     if (InBounds)
       cast<GetElementPtrInst>(NewPtr)->setIsInBounds(true);
 
@@ -2093,8 +2150,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
                                      GroupMask, UndefVec, "wide.masked.vec");
       }
       else
-        NewLoad = Builder.CreateAlignedLoad(NewPtrs[Part], 
-          Group->getAlignment(), "wide.vec");
+        NewLoad = Builder.CreateAlignedLoad(VecTy, NewPtrs[Part],
+                                            Group->getAlignment(), "wide.vec");
       Group->addMetadata(NewLoad);
       NewLoads.push_back(NewLoad);
     }
@@ -2239,16 +2296,16 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
       // If the address is consecutive but reversed, then the
       // wide store needs to start at the last vector element.
       PartPtr = cast<GetElementPtrInst>(
-          Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)));
+          Builder.CreateGEP(ScalarDataTy, Ptr, Builder.getInt32(-Part * VF)));
       PartPtr->setIsInBounds(InBounds);
       PartPtr = cast<GetElementPtrInst>(
-          Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)));
+          Builder.CreateGEP(ScalarDataTy, PartPtr, Builder.getInt32(1 - VF)));
       PartPtr->setIsInBounds(InBounds);
       if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
         Mask[Part] = reverseVector(Mask[Part]);
     } else {
       PartPtr = cast<GetElementPtrInst>(
-          Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)));
+          Builder.CreateGEP(ScalarDataTy, Ptr, Builder.getInt32(Part * VF)));
       PartPtr->setIsInBounds(InBounds);
     }
 
@@ -2305,7 +2362,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
                                          UndefValue::get(DataTy),
                                          "wide.masked.load");
       else
-        NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load");
+        NewLI =
+            Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load");
 
       // Add metadata to the load, but setVectorValue to the reverse shuffle.
       addMetadata(NewLI, LI);
@@ -2665,7 +2723,7 @@ Value *InnerLoopVectorizer::emitTransformedIndex(
     assert(isa<SCEVConstant>(Step) &&
            "Expected constant step for pointer induction");
     return B.CreateGEP(
-        nullptr, StartValue,
+        StartValue->getType()->getPointerElementType(), StartValue,
         CreateMul(Index, Exp.expandCodeFor(Step, Index->getType(),
                                            &*B.GetInsertPoint())));
   }
@@ -2849,26 +2907,42 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
     BCResumeVal->addIncoming(EndValue, MiddleBlock);
 
     // Fix the scalar body counter (PHI node).
-    unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
-
     // The old induction's phi node in the scalar body needs the truncated
     // value.
     for (BasicBlock *BB : LoopBypassBlocks)
       BCResumeVal->addIncoming(II.getStartValue(), BB);
-    OrigPhi->setIncomingValue(BlockIdx, BCResumeVal);
+    OrigPhi->setIncomingValueForBlock(ScalarPH, BCResumeVal);
   }
 
+  // We need the OrigLoop (scalar loop part) latch terminator to help
+  // produce correct debug info for the middle block BB instructions.
+  // The legality check stage guarantees that the loop will have a single
+  // latch.
+  assert(isa<BranchInst>(OrigLoop->getLoopLatch()->getTerminator()) &&
+         "Scalar loop latch terminator isn't a branch");
+  BranchInst *ScalarLatchBr =
+      cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
+
   // Add a check in the middle block to see if we have completed
   // all of the iterations in the first vector loop.
   // If (N - N%VF) == N, then we *don't* need to run the remainder.
   // If tail is to be folded, we know we don't need to run the remainder.
   Value *CmpN = Builder.getTrue();
-  if (!Cost->foldTailByMasking())
+  if (!Cost->foldTailByMasking()) {
     CmpN =
         CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
                         CountRoundDown, "cmp.n", MiddleBlock->getTerminator());
-  ReplaceInstWithInst(MiddleBlock->getTerminator(),
-                      BranchInst::Create(ExitBlock, ScalarPH, CmpN));
+
+    // Here we use the same DebugLoc as the scalar loop latch branch instead
+    // of the corresponding compare because they may have ended up with
+    // different line numbers and we want to avoid awkward line stepping while
+    // debugging. Eg. if the compare has got a line number inside the loop.
+    cast<Instruction>(CmpN)->setDebugLoc(ScalarLatchBr->getDebugLoc());
+  }
+
+  BranchInst *BrInst = BranchInst::Create(ExitBlock, ScalarPH, CmpN);
+  BrInst->setDebugLoc(ScalarLatchBr->getDebugLoc());
+  ReplaceInstWithInst(MiddleBlock->getTerminator(), BrInst);
 
   // Get ready to start creating new instructions into the vectorized body.
   Builder.SetInsertPoint(&*VecBody->getFirstInsertionPt());
@@ -3022,45 +3096,9 @@ static void cse(BasicBlock *BB) {
   }
 }
 
-/// Estimate the overhead of scalarizing an instruction. This is a
-/// convenience wrapper for the type-based getScalarizationOverhead API.
-static unsigned getScalarizationOverhead(Instruction *I, unsigned VF,
-                                         const TargetTransformInfo &TTI) {
-  if (VF == 1)
-    return 0;
-
-  unsigned Cost = 0;
-  Type *RetTy = ToVectorTy(I->getType(), VF);
-  if (!RetTy->isVoidTy() &&
-      (!isa<LoadInst>(I) ||
-       !TTI.supportsEfficientVectorElementLoadStore()))
-    Cost += TTI.getScalarizationOverhead(RetTy, true, false);
-
-  // Some targets keep addresses scalar.
-  if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
-    return Cost;
-
-  if (CallInst *CI = dyn_cast<CallInst>(I)) {
-    SmallVector<const Value *, 4> Operands(CI->arg_operands());
-    Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
-  }
-  else if (!isa<StoreInst>(I) ||
-           !TTI.supportsEfficientVectorElementLoadStore()) {
-    SmallVector<const Value *, 4> Operands(I->operand_values());
-    Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
-  }
-
-  return Cost;
-}
-
-// Estimate cost of a call instruction CI if it were vectorized with factor VF.
-// Return the cost of the instruction, including scalarization overhead if it's
-// needed. The flag NeedToScalarize shows if the call needs to be scalarized -
-// i.e. either vector version isn't available, or is too expensive.
-static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
-                                  const TargetTransformInfo &TTI,
-                                  const TargetLibraryInfo *TLI,
-                                  bool &NeedToScalarize) {
+unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
+                                                       unsigned VF,
+                                                       bool &NeedToScalarize) {
   Function *F = CI->getCalledFunction();
   StringRef FnName = CI->getCalledFunction()->getName();
   Type *ScalarRetTy = CI->getType();
@@ -3083,7 +3121,7 @@ static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
 
   // Compute costs of unpacking argument values for the scalar calls and
   // packing the return values to a vector.
-  unsigned ScalarizationCost = getScalarizationOverhead(CI, VF, TTI);
+  unsigned ScalarizationCost = getScalarizationOverhead(CI, VF);
 
   unsigned Cost = ScalarCallCost * VF + ScalarizationCost;
 
@@ -3102,12 +3140,8 @@ static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
   return Cost;
 }
 
-// Estimate cost of an intrinsic call instruction CI if it were vectorized with
-// factor VF.  Return the cost of the instruction, including scalarization
-// overhead if it's needed.
-static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF,
-                                       const TargetTransformInfo &TTI,
-                                       const TargetLibraryInfo *TLI) {
+unsigned LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
+                                                            unsigned VF) {
   Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
   assert(ID && "Expected intrinsic call!");
 
@@ -3468,7 +3502,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
     Start->addIncoming(Incoming, BB);
   }
 
-  Phi->setIncomingValue(Phi->getBasicBlockIndex(LoopScalarPreHeader), Start);
+  Phi->setIncomingValueForBlock(LoopScalarPreHeader, Start);
   Phi->setName("scalar.recur");
 
   // Finally, fix users of the recurrence outside the loop. The users will need
@@ -3596,14 +3630,23 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
   // Reduce all of the unrolled parts into a single vector.
   Value *ReducedPartRdx = VectorLoopValueMap.getVectorValue(LoopExitInst, 0);
   unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK);
-  setDebugLocFromInst(Builder, ReducedPartRdx);
+
+  // The middle block terminator has already been assigned a DebugLoc here (the
+  // OrigLoop's single latch terminator). We want the whole middle block to
+  // appear to execute on this line because: (a) it is all compiler generated,
+  // (b) these instructions are always executed after evaluating the latch
+  // conditional branch, and (c) other passes may add new predecessors which
+  // terminate on this line. This is the easiest way to ensure we don't
+  // accidentally cause an extra step back into the loop while debugging.
+  setDebugLocFromInst(Builder, LoopMiddleBlock->getTerminator());
   for (unsigned Part = 1; Part < UF; ++Part) {
     Value *RdxPart = VectorLoopValueMap.getVectorValue(LoopExitInst, Part);
     if (Op != Instruction::ICmp && Op != Instruction::FCmp)
       // Floating point operations had to be 'fast' to enable the reduction.
       ReducedPartRdx = addFastMathFlag(
           Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxPart,
-                              ReducedPartRdx, "bin.rdx"));
+                              ReducedPartRdx, "bin.rdx"),
+          RdxDesc.getFastMathFlags());
     else
       ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,
                                       RdxPart);
@@ -3935,9 +3978,11 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
 
         // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
         // but it should be a vector, otherwise.
-        auto *NewGEP = GEP->isInBounds()
-                           ? Builder.CreateInBoundsGEP(Ptr, Indices)
-                           : Builder.CreateGEP(Ptr, Indices);
+        auto *NewGEP =
+            GEP->isInBounds()
+                ? Builder.CreateInBoundsGEP(GEP->getSourceElementType(), Ptr,
+                                            Indices)
+                : Builder.CreateGEP(GEP->getSourceElementType(), Ptr, Indices);
         assert((VF == 1 || NewGEP->getType()->isVectorTy()) &&
                "NewGEP is not a pointer vector");
         VectorLoopValueMap.setVectorValue(&I, Part, NewGEP);
@@ -3955,6 +4000,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
   case Instruction::FAdd:
   case Instruction::Sub:
   case Instruction::FSub:
+  case Instruction::FNeg:
   case Instruction::Mul:
   case Instruction::FMul:
   case Instruction::FDiv:
@@ -3965,21 +4011,22 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor: {
-    // Just widen binops.
-    auto *BinOp = cast<BinaryOperator>(&I);
-    setDebugLocFromInst(Builder, BinOp);
+    // Just widen unops and binops.
+    setDebugLocFromInst(Builder, &I);
 
     for (unsigned Part = 0; Part < UF; ++Part) {
-      Value *A = getOrCreateVectorValue(BinOp->getOperand(0), Part);
-      Value *B = getOrCreateVectorValue(BinOp->getOperand(1), Part);
-      Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
+      SmallVector<Value *, 2> Ops;
+      for (Value *Op : I.operands())
+        Ops.push_back(getOrCreateVectorValue(Op, Part));
 
-      if (BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V))
-        VecOp->copyIRFlags(BinOp);
+      Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
+
+      if (auto *VecOp = dyn_cast<Instruction>(V))
+        VecOp->copyIRFlags(&I);
 
       // Use this vector value for all users of the original instruction.
       VectorLoopValueMap.setVectorValue(&I, Part, V);
-      addMetadata(V, BinOp);
+      addMetadata(V, &I);
     }
 
     break;
@@ -4088,9 +4135,9 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
     // version of the instruction.
     // Is it beneficial to perform intrinsic call compared to lib call?
     bool NeedToScalarize;
-    unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize);
+    unsigned CallCost = Cost->getVectorCallCost(CI, VF, NeedToScalarize);
     bool UseVectorIntrinsic =
-        ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
+        ID && Cost->getVectorIntrinsicCost(CI, VF) <= CallCost;
     assert((UseVectorIntrinsic || !NeedToScalarize) &&
            "Instruction should be scalarized elsewhere.");
 
@@ -4395,6 +4442,13 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I,
   auto *Group = getInterleavedAccessGroup(I);
   assert(Group && "Must have a group.");
 
+  // If the instruction's allocated size doesn't equal it's type size, it
+  // requires padding and will be scalarized.
+  auto &DL = I->getModule()->getDataLayout();
+  auto *ScalarTy = getMemInstValueType(I);
+  if (hasIrregularType(ScalarTy, DL, VF))
+    return false;
+
   // Check if masking is required.
   // A Group may need masking for one of two reasons: it resides in a block that
   // needs predication, or it was decided to use masking to deal with gaps.
@@ -4987,6 +5041,8 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
   if (LoopCost == 0)
     LoopCost = expectedCost(VF).first;
 
+  assert(LoopCost && "Non-zero loop cost expected");
+
   // Clamp the calculated IC to be between the 1 and the max interleave count
   // that the target allows.
   if (IC > MaxInterleaveCount)
@@ -5314,15 +5370,6 @@ int LoopVectorizationCostModel::computePredInstDiscount(
     return true;
   };
 
-  // Returns true if an operand that cannot be scalarized must be extracted
-  // from a vector. We will account for this scalarization overhead below. Note
-  // that the non-void predicated instructions are placed in their own blocks,
-  // and their return values are inserted into vectors. Thus, an extract would
-  // still be required.
-  auto needsExtract = [&](Instruction *I) -> bool {
-    return TheLoop->contains(I) && !isScalarAfterVectorization(I, VF);
-  };
-
   // Compute the expected cost discount from scalarizing the entire expression
   // feeding the predicated instruction. We currently only consider expressions
   // that are single-use instruction chains.
@@ -5362,7 +5409,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
                "Instruction has non-scalar type");
         if (canBeScalarized(J))
           Worklist.push_back(J);
-        else if (needsExtract(J))
+        else if (needsExtract(J, VF))
           ScalarCost += TTI.getScalarizationOverhead(
                               ToVectorTy(J->getType(),VF), false, true);
       }
@@ -5484,7 +5531,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
 
   // Get the overhead of the extractelement and insertelement instructions
   // we might create due to scalarization.
-  Cost += getScalarizationOverhead(I, VF, TTI);
+  Cost += getScalarizationOverhead(I, VF);
 
   // If we have a predicated store, it may not be executed for each vector
   // lane. Scale the cost by the probability of executing the predicated
@@ -5636,6 +5683,36 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
   return VectorizationCostTy(C, TypeNotScalarized);
 }
 
+unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
+                                                              unsigned VF) {
+
+  if (VF == 1)
+    return 0;
+
+  unsigned Cost = 0;
+  Type *RetTy = ToVectorTy(I->getType(), VF);
+  if (!RetTy->isVoidTy() &&
+      (!isa<LoadInst>(I) || !TTI.supportsEfficientVectorElementLoadStore()))
+    Cost += TTI.getScalarizationOverhead(RetTy, true, false);
+
+  // Some targets keep addresses scalar.
+  if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
+    return Cost;
+
+  // Some targets support efficient element stores.
+  if (isa<StoreInst>(I) && TTI.supportsEfficientVectorElementLoadStore())
+    return Cost;
+
+  // Collect operands to consider.
+  CallInst *CI = dyn_cast<CallInst>(I);
+  Instruction::op_range Ops = CI ? CI->arg_operands() : I->operands();
+
+  // Skip operands that do not require extraction/scalarization and do not incur
+  // any overhead.
+  return Cost + TTI.getOperandsScalarizationOverhead(
+                    filterExtractingOperands(Ops, VF), VF);
+}
+
 void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
   if (VF == 1)
     return;
@@ -5876,7 +5953,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
 
       // The cost of insertelement and extractelement instructions needed for
       // scalarization.
-      Cost += getScalarizationOverhead(I, VF, TTI);
+      Cost += getScalarizationOverhead(I, VF);
 
       // Scale the cost by the probability of executing the predicated blocks.
       // This assumes the predicated block for each vector lane is equally
@@ -5916,6 +5993,14 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
                    I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
                    Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands);
   }
+  case Instruction::FNeg: {
+    unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
+    return N * TTI.getArithmeticInstrCost(
+                   I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
+                   TargetTransformInfo::OK_AnyValue,
+                   TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
+                   I->getOperand(0));
+  }
   case Instruction::Select: {
     SelectInst *SI = cast<SelectInst>(I);
     const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
@@ -5997,16 +6082,16 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
   case Instruction::Call: {
     bool NeedToScalarize;
     CallInst *CI = cast<CallInst>(I);
-    unsigned CallCost = getVectorCallCost(CI, VF, TTI, TLI, NeedToScalarize);
+    unsigned CallCost = getVectorCallCost(CI, VF, NeedToScalarize);
     if (getVectorIntrinsicIDForCall(CI, TLI))
-      return std::min(CallCost, getVectorIntrinsicCost(CI, VF, TTI, TLI));
+      return std::min(CallCost, getVectorIntrinsicCost(CI, VF));
     return CallCost;
   }
   default:
     // The cost of executing VF copies of the scalar instruction. This opcode
     // is unknown. Assume that it is the same as 'mul'.
     return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy) +
-           getScalarizationOverhead(I, VF, TTI);
+           getScalarizationOverhead(I, VF);
   } // end of switch.
 }
 
@@ -6027,10 +6112,13 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
 INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
 
 namespace llvm {
 
+Pass *createLoopVectorizePass() { return new LoopVectorize(); }
+
 Pass *createLoopVectorizePass(bool InterleaveOnlyWhenForced,
                               bool VectorizeOnlyWhenForced) {
   return new LoopVectorize(InterleaveOnlyWhenForced, VectorizeOnlyWhenForced);
@@ -6066,50 +6154,65 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
   }
 }
 
+// TODO: we could return a pair of values that specify the max VF and
+// min VF, to be used in `buildVPlans(MinVF, MaxVF)` instead of
+// `buildVPlans(VF, VF)`. We cannot do it because VPLAN at the moment
+// doesn't have a cost model that can choose which plan to execute if
+// more than one is generated.
+static unsigned determineVPlanVF(const unsigned WidestVectorRegBits,
+                                 LoopVectorizationCostModel &CM) {
+  unsigned WidestType;
+  std::tie(std::ignore, WidestType) = CM.getSmallestAndWidestTypes();
+  return WidestVectorRegBits / WidestType;
+}
+
 VectorizationFactor
 LoopVectorizationPlanner::planInVPlanNativePath(bool OptForSize,
                                                 unsigned UserVF) {
-  // Width 1 means no vectorization, cost 0 means uncomputed cost.
-  const VectorizationFactor NoVectorization = {1U, 0U};
-
+  unsigned VF = UserVF;
   // Outer loop handling: They may require CFG and instruction level
   // transformations before even evaluating whether vectorization is profitable.
   // Since we cannot modify the incoming IR, we need to build VPlan upfront in
   // the vectorization pipeline.
   if (!OrigLoop->empty()) {
-    // TODO: If UserVF is not provided, we set UserVF to 4 for stress testing.
-    // This won't be necessary when UserVF is not required in the VPlan-native
-    // path.
-    if (VPlanBuildStressTest && !UserVF)
-      UserVF = 4;
-
+    // If the user doesn't provide a vectorization factor, determine a
+    // reasonable one.
+    if (!UserVF) {
+      VF = determineVPlanVF(TTI->getRegisterBitWidth(true /* Vector*/), CM);
+      LLVM_DEBUG(dbgs() << "LV: VPlan computed VF " << VF << ".\n");
+
+      // Make sure we have a VF > 1 for stress testing.
+      if (VPlanBuildStressTest && VF < 2) {
+        LLVM_DEBUG(dbgs() << "LV: VPlan stress testing: "
+                          << "overriding computed VF.\n");
+        VF = 4;
+      }
+    }
     assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
-    assert(UserVF && "Expected UserVF for outer loop vectorization.");
-    assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
-    LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
-    buildVPlans(UserVF, UserVF);
+    assert(isPowerOf2_32(VF) && "VF needs to be a power of two");
+    LLVM_DEBUG(dbgs() << "LV: Using " << (UserVF ? "user " : "") << "VF " << VF
+                      << " to build VPlans.\n");
+    buildVPlans(VF, VF);
 
     // For VPlan build stress testing, we bail out after VPlan construction.
     if (VPlanBuildStressTest)
-      return NoVectorization;
+      return VectorizationFactor::Disabled();
 
-    return {UserVF, 0};
+    return {VF, 0};
   }
 
   LLVM_DEBUG(
       dbgs() << "LV: Not vectorizing. Inner loops aren't supported in the "
                 "VPlan-native path.\n");
-  return NoVectorization;
+  return VectorizationFactor::Disabled();
 }
 
-VectorizationFactor
-LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) {
+Optional<VectorizationFactor> LoopVectorizationPlanner::plan(bool OptForSize,
+                                                             unsigned UserVF) {
   assert(OrigLoop->empty() && "Inner loop expected.");
-  // Width 1 means no vectorization, cost 0 means uncomputed cost.
-  const VectorizationFactor NoVectorization = {1U, 0U};
   Optional<unsigned> MaybeMaxVF = CM.computeMaxVF(OptForSize);
-  if (!MaybeMaxVF.hasValue()) // Cases considered too costly to vectorize.
-    return NoVectorization;
+  if (!MaybeMaxVF) // Cases that should not to be vectorized nor interleaved.
+    return None;
 
   // Invalidate interleave groups if all blocks of loop will be predicated.
   if (CM.blockNeedsPredication(OrigLoop->getHeader()) &&
@@ -6129,7 +6232,7 @@ LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) {
     CM.selectUserVectorizationFactor(UserVF);
     buildVPlansWithVPRecipes(UserVF, UserVF);
     LLVM_DEBUG(printPlans(dbgs()));
-    return {UserVF, 0};
+    return {{UserVF, 0}};
   }
 
   unsigned MaxVF = MaybeMaxVF.getValue();
@@ -6148,7 +6251,7 @@ LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) {
   buildVPlansWithVPRecipes(1, MaxVF);
   LLVM_DEBUG(printPlans(dbgs()));
   if (MaxVF == 1)
-    return NoVectorization;
+    return VectorizationFactor::Disabled();
 
   // Select the optimal vectorization factor.
   return CM.selectVectorizationFactor(MaxVF);
@@ -6527,6 +6630,7 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
     case Instruction::FCmp:
     case Instruction::FDiv:
     case Instruction::FMul:
+    case Instruction::FNeg:
     case Instruction::FPExt:
     case Instruction::FPToSI:
     case Instruction::FPToUI:
@@ -6582,9 +6686,9 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
       // version of the instruction.
       // Is it beneficial to perform intrinsic call compared to lib call?
       bool NeedToScalarize;
-      unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize);
+      unsigned CallCost = CM.getVectorCallCost(CI, VF, NeedToScalarize);
       bool UseVectorIntrinsic =
-          ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
+          ID && CM.getVectorIntrinsicCost(CI, VF) <= CallCost;
       return UseVectorIntrinsic || !NeedToScalarize;
     }
     if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
@@ -6756,8 +6860,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
   }
 }
 
-LoopVectorizationPlanner::VPlanPtr
-LoopVectorizationPlanner::buildVPlanWithVPRecipes(
+VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
     VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
     SmallPtrSetImpl<Instruction *> &DeadInstructions) {
   // Hold a mapping from predicated instructions to their recipes, in order to
@@ -6772,7 +6875,7 @@ LoopVectorizationPlanner::buildVPlanWithVPRecipes(
   VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
   auto Plan = llvm::make_unique<VPlan>(VPBB);
 
-  VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, TTI, Legal, CM, Builder);
+  VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
   // Represent values that will have defs inside VPlan.
   for (Value *V : NeedDef)
     Plan->addVPValue(V);
@@ -6881,8 +6984,7 @@ LoopVectorizationPlanner::buildVPlanWithVPRecipes(
   return Plan;
 }
 
-LoopVectorizationPlanner::VPlanPtr
-LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
+VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
   // Outer loop handling: They may require CFG and instruction level
   // transformations before even evaluating whether vectorization is profitable.
   // Since we cannot modify the incoming IR, we need to build VPlan upfront in
@@ -6897,13 +6999,22 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
   VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
   HCFGBuilder.buildHierarchicalCFG();
 
+  for (unsigned VF = Range.Start; VF < Range.End; VF *= 2)
+    Plan->addVF(VF);
+
+  if (EnableVPlanPredication) {
+    VPlanPredicator VPP(*Plan);
+    VPP.predicate();
+
+    // Avoid running transformation to recipes until masked code generation in
+    // VPlan-native path is in place.
+    return Plan;
+  }
+
   SmallPtrSet<Instruction *, 1> DeadInstructions;
   VPlanHCFGTransforms::VPInstructionsToVPRecipes(
       Plan, Legal->getInductionVars(), DeadInstructions);
 
-  for (unsigned VF = Range.Start; VF < Range.End; VF *= 2)
-    Plan->addVF(VF);
-
   return Plan;
 }
 
@@ -7096,7 +7207,8 @@ static bool processLoopInVPlanNativePath(
     Loop *L, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT,
     LoopVectorizationLegality *LVL, TargetTransformInfo *TTI,
     TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC,
-    OptimizationRemarkEmitter *ORE, LoopVectorizeHints &Hints) {
+    OptimizationRemarkEmitter *ORE, BlockFrequencyInfo *BFI,
+    ProfileSummaryInfo *PSI, LoopVectorizeHints &Hints) {
 
   assert(EnableVPlanNativePath && "VPlan-native path is disabled.");
   Function *F = L->getHeader()->getParent();
@@ -7109,24 +7221,28 @@ static bool processLoopInVPlanNativePath(
   LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM);
 
   // Get user vectorization factor.
-  unsigned UserVF = Hints.getWidth();
+  const unsigned UserVF = Hints.getWidth();
 
-  // Check the function attributes to find out if this function should be
-  // optimized for size.
+  // Check the function attributes and profiles to find out if this function
+  // should be optimized for size.
   bool OptForSize =
-      Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
+      Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
+      (F->hasOptSize() ||
+       llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI));
 
   // Plan how to best vectorize, return the best VF and its cost.
-  VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);
+  const VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);
 
   // If we are stress testing VPlan builds, do not attempt to generate vector
-  // code.
-  if (VPlanBuildStressTest)
+  // code. Masked vector code generation support will follow soon.
+  // Also, do not attempt to vectorize if no vector code will be produced.
+  if (VPlanBuildStressTest || EnableVPlanPredication ||
+      VectorizationFactor::Disabled() == VF)
     return false;
 
   LVP.setBestPlan(VF.Width, 1);
 
-  InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, UserVF, 1, LVL,
+  InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, 1, LVL,
                          &CM);
   LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
                     << L->getHeader()->getParent()->getName() << "\"\n");
@@ -7184,7 +7300,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
 
   // Check if it is legal to vectorize the loop.
   LoopVectorizationRequirements Requirements(*ORE);
-  LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, GetLAA, LI, ORE,
+  LoopVectorizationLegality LVL(L, PSE, DT, TTI, TLI, AA, F, GetLAA, LI, ORE,
                                 &Requirements, &Hints, DB, AC);
   if (!LVL.canVectorize(EnableVPlanNativePath)) {
     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
@@ -7192,10 +7308,12 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     return false;
   }
 
-  // Check the function attributes to find out if this function should be
-  // optimized for size.
+  // Check the function attributes and profiles to find out if this function
+  // should be optimized for size.
   bool OptForSize =
-      Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
+      Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
+      (F->hasOptSize() ||
+       llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI));
 
   // Entrance to the VPlan-native vectorization path. Outer loops are processed
   // here. They may require CFG and instruction level transformations before
@@ -7204,7 +7322,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   // pipeline.
   if (!L->empty())
     return processLoopInVPlanNativePath(L, PSE, LI, DT, &LVL, TTI, TLI, DB, AC,
-                                        ORE, Hints);
+                                        ORE, BFI, PSI, Hints);
 
   assert(L->empty() && "Inner loop expected.");
   // Check the loop for a trip count threshold: vectorize loops with a tiny trip
@@ -7304,14 +7422,18 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   unsigned UserVF = Hints.getWidth();
 
   // Plan how to best vectorize, return the best VF and its cost.
-  VectorizationFactor VF = LVP.plan(OptForSize, UserVF);
+  Optional<VectorizationFactor> MaybeVF = LVP.plan(OptForSize, UserVF);
 
-  // Select the interleave count.
-  unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost);
-
-  // Get user interleave count.
+  VectorizationFactor VF = VectorizationFactor::Disabled();
+  unsigned IC = 1;
   unsigned UserIC = Hints.getInterleave();
 
+  if (MaybeVF) {
+    VF = *MaybeVF;
+    // Select the interleave count.
+    IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost);
+  }
+
   // Identify the diagnostic messages that should be produced.
   std::pair<StringRef, std::string> VecDiagMsg, IntDiagMsg;
   bool VectorizeLoop = true, InterleaveLoop = true;
@@ -7330,7 +7452,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     VectorizeLoop = false;
   }
 
-  if (IC == 1 && UserIC <= 1) {
+  if (!MaybeVF && UserIC > 1) {
+    // Tell the user interleaving was avoided up-front, despite being explicitly
+    // requested.
+    LLVM_DEBUG(dbgs() << "LV: Ignoring UserIC, because vectorization and "
+                         "interleaving should be avoided up front\n");
+    IntDiagMsg = std::make_pair(
+        "InterleavingAvoided",
+        "Ignoring UserIC, because interleaving was avoided up front");
+    InterleaveLoop = false;
+  } else if (IC == 1 && UserIC <= 1) {
     // Tell the user interleaving is not beneficial.
     LLVM_DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n");
     IntDiagMsg = std::make_pair(
@@ -7457,7 +7588,7 @@ bool LoopVectorizePass::runImpl(
     DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
     DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
     std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
-    OptimizationRemarkEmitter &ORE_) {
+    OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_) {
   SE = &SE_;
   LI = &LI_;
   TTI = &TTI_;
@@ -7469,6 +7600,7 @@ bool LoopVectorizePass::runImpl(
   GetLAA = &GetLAA_;
   DB = &DB_;
   ORE = &ORE_;
+  PSI = PSI_;
 
   // Don't attempt if
   // 1. the target claims to have no vector registers, and
@@ -7488,7 +7620,8 @@ bool LoopVectorizePass::runImpl(
   // will simplify all loops, regardless of whether anything end up being
   // vectorized.
   for (auto &L : *LI)
-    Changed |= simplifyLoop(L, DT, LI, SE, AC, false /* PreserveLCSSA */);
+    Changed |=
+        simplifyLoop(L, DT, LI, SE, AC, nullptr, false /* PreserveLCSSA */);
 
   // Build up a worklist of inner-loops to vectorize. This is necessary as
   // the act of vectorizing or partially unrolling a loop creates new loops
@@ -7527,15 +7660,22 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
     auto &AC = AM.getResult<AssumptionAnalysis>(F);
     auto &DB = AM.getResult<DemandedBitsAnalysis>(F);
     auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+    MemorySSA *MSSA = EnableMSSALoopDependency
+                          ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
+                          : nullptr;
 
     auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
     std::function<const LoopAccessInfo &(Loop &)> GetLAA =
         [&](Loop &L) -> const LoopAccessInfo & {
-      LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, nullptr};
+      LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
       return LAM.getResult<LoopAccessAnalysis>(L, AR);
     };
+    const ModuleAnalysisManager &MAM =
+        AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+    ProfileSummaryInfo *PSI =
+        MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
     bool Changed =
-        runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE);
+        runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE, PSI);
     if (!Changed)
       return PreservedAnalyses::all();
     PreservedAnalyses PA;
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2e856a7e6802..27a86c0bca91 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1,9 +1,8 @@
 //===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -106,6 +105,10 @@ using namespace slpvectorizer;
 
 STATISTIC(NumVectorInstructions, "Number of vector instructions generated");
 
+cl::opt<bool>
+    llvm::RunSLPVectorization("vectorize-slp", cl::init(false), cl::Hidden,
+                              cl::desc("Run the SLP vectorization passes"));
+
 static cl::opt<int>
     SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
                      cl::desc("Only vectorize if you gain more than this "
@@ -207,6 +210,13 @@ static bool isSplat(ArrayRef<Value *> VL) {
   return true;
 }
 
+/// \returns True if \p I is commutative, handles CmpInst as well as Instruction.
+static bool isCommutative(Instruction *I) {
+  if (auto *IC = dyn_cast<CmpInst>(I))
+    return IC->isCommutative();
+  return I->isCommutative();
+}
+
 /// Checks if the vector of instructions can be represented as a shuffle, like:
 /// %x0 = extractelement <4 x i8> %x, i32 0
 /// %x3 = extractelement <4 x i8> %x, i32 3
@@ -438,8 +448,9 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
   case Instruction::Call: {
     CallInst *CI = cast<CallInst>(UserInst);
     Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
-    if (hasVectorInstrinsicScalarOpd(ID, 1)) {
-      return (CI->getArgOperand(1) == Scalar);
+    for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
+      if (hasVectorInstrinsicScalarOpd(ID, i))
+        return (CI->getArgOperand(i) == Scalar);
     }
     LLVM_FALLTHROUGH;
   }
@@ -474,6 +485,8 @@ namespace slpvectorizer {
 
 /// Bottom Up SLP Vectorizer.
 class BoUpSLP {
+  struct TreeEntry;
+
 public:
   using ValueList = SmallVector<Value *, 8>;
   using InstrList = SmallVector<Instruction *, 16>;
@@ -517,7 +530,7 @@ public:
 
   /// \returns the cost incurred by unwanted spills and fills, caused by
   /// holding live values over call sites.
-  int getSpillCost();
+  int getSpillCost() const;
 
   /// \returns the vectorization cost of the subtree that starts at \p VL.
   /// A negative number means that this is profitable.
@@ -576,7 +589,7 @@ public:
   /// the stored value. Otherwise, the size is the width of the largest loaded
   /// value reaching V. This method is used by the vectorizer to calculate
   /// vectorization factors.
-  unsigned getVectorElementSize(Value *V);
+  unsigned getVectorElementSize(Value *V) const;
 
   /// Compute the minimum type sizes required to represent the entries in a
   /// vectorizable tree.
@@ -599,13 +612,512 @@ public:
 
   /// \returns True if the VectorizableTree is both tiny and not fully
   /// vectorizable. We do not vectorize such trees.
-  bool isTreeTinyAndNotFullyVectorizable();
+  bool isTreeTinyAndNotFullyVectorizable() const;
 
   OptimizationRemarkEmitter *getORE() { return ORE; }
 
-private:
-  struct TreeEntry;
+  /// This structure holds any data we need about the edges being traversed
+  /// during buildTree_rec(). We keep track of:
+  /// (i) the user TreeEntry index, and
+  /// (ii) the index of the edge.
+  struct EdgeInfo {
+    EdgeInfo() = default;
+    EdgeInfo(TreeEntry *UserTE, unsigned EdgeIdx)
+        : UserTE(UserTE), EdgeIdx(EdgeIdx) {}
+    /// The user TreeEntry.
+    TreeEntry *UserTE = nullptr;
+    /// The operand index of the use.
+    unsigned EdgeIdx = UINT_MAX;
+#ifndef NDEBUG
+    friend inline raw_ostream &operator<<(raw_ostream &OS,
+                                          const BoUpSLP::EdgeInfo &EI) {
+      EI.dump(OS);
+      return OS;
+    }
+    /// Debug print.
+    void dump(raw_ostream &OS) const {
+      OS << "{User:" << (UserTE ? std::to_string(UserTE->Idx) : "null")
+         << " EdgeIdx:" << EdgeIdx << "}";
+    }
+    LLVM_DUMP_METHOD void dump() const { dump(dbgs()); }
+#endif
+  };
+
+  /// A helper data structure to hold the operands of a vector of instructions.
+  /// This supports a fixed vector length for all operand vectors.
+  class VLOperands {
+    /// For each operand we need (i) the value, and (ii) the opcode that it
+    /// would be attached to if the expression was in a left-linearized form.
+    /// This is required to avoid illegal operand reordering.
+    /// For example:
+    /// \verbatim
+    ///                         0 Op1
+    ///                         |/
+    /// Op1 Op2   Linearized    + Op2
+    ///   \ /     ---------->   |/
+    ///    -                    -
+    ///
+    /// Op1 - Op2            (0 + Op1) - Op2
+    /// \endverbatim
+    ///
+    /// Value Op1 is attached to a '+' operation, and Op2 to a '-'.
+    ///
+    /// Another way to think of this is to track all the operations across the
+    /// path from the operand all the way to the root of the tree and to
+    /// calculate the operation that corresponds to this path. For example, the
+    /// path from Op2 to the root crosses the RHS of the '-', therefore the
+    /// corresponding operation is a '-' (which matches the one in the
+    /// linearized tree, as shown above).
+    ///
+    /// For lack of a better term, we refer to this operation as Accumulated
+    /// Path Operation (APO).
+    struct OperandData {
+      OperandData() = default;
+      OperandData(Value *V, bool APO, bool IsUsed)
+          : V(V), APO(APO), IsUsed(IsUsed) {}
+      /// The operand value.
+      Value *V = nullptr;
+      /// TreeEntries only allow a single opcode, or an alternate sequence of
+      /// them (e.g, +, -). Therefore, we can safely use a boolean value for the
+      /// APO. It is set to 'true' if 'V' is attached to an inverse operation
+      /// in the left-linearized form (e.g., Sub/Div), and 'false' otherwise
+      /// (e.g., Add/Mul)
+      bool APO = false;
+      /// Helper data for the reordering function.
+      bool IsUsed = false;
+    };
+
+    /// During operand reordering, we are trying to select the operand at lane
+    /// that matches best with the operand at the neighboring lane. Our
+    /// selection is based on the type of value we are looking for. For example,
+    /// if the neighboring lane has a load, we need to look for a load that is
+    /// accessing a consecutive address. These strategies are summarized in the
+    /// 'ReorderingMode' enumerator.
+    enum class ReorderingMode {
+      Load,     ///< Matching loads to consecutive memory addresses
+      Opcode,   ///< Matching instructions based on opcode (same or alternate)
+      Constant, ///< Matching constants
+      Splat,    ///< Matching the same instruction multiple times (broadcast)
+      Failed,   ///< We failed to create a vectorizable group
+    };
+
+    using OperandDataVec = SmallVector<OperandData, 2>;
+
+    /// A vector of operand vectors.
+    SmallVector<OperandDataVec, 4> OpsVec;
+
+    const DataLayout &DL;
+    ScalarEvolution &SE;
+
+    /// \returns the operand data at \p OpIdx and \p Lane.
+    OperandData &getData(unsigned OpIdx, unsigned Lane) {
+      return OpsVec[OpIdx][Lane];
+    }
+
+    /// \returns the operand data at \p OpIdx and \p Lane. Const version.
+    const OperandData &getData(unsigned OpIdx, unsigned Lane) const {
+      return OpsVec[OpIdx][Lane];
+    }
+
+    /// Clears the used flag for all entries.
+    void clearUsed() {
+      for (unsigned OpIdx = 0, NumOperands = getNumOperands();
+           OpIdx != NumOperands; ++OpIdx)
+        for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes;
+             ++Lane)
+          OpsVec[OpIdx][Lane].IsUsed = false;
+    }
+
+    /// Swap the operand at \p OpIdx1 with that one at \p OpIdx2.
+    void swap(unsigned OpIdx1, unsigned OpIdx2, unsigned Lane) {
+      std::swap(OpsVec[OpIdx1][Lane], OpsVec[OpIdx2][Lane]);
+    }
+
+    // Search all operands in Ops[*][Lane] for the one that matches best
+    // Ops[OpIdx][LastLane] and return its opreand index.
+    // If no good match can be found, return None.
+    Optional<unsigned>
+    getBestOperand(unsigned OpIdx, int Lane, int LastLane,
+                   ArrayRef<ReorderingMode> ReorderingModes) {
+      unsigned NumOperands = getNumOperands();
+
+      // The operand of the previous lane at OpIdx.
+      Value *OpLastLane = getData(OpIdx, LastLane).V;
+
+      // Our strategy mode for OpIdx.
+      ReorderingMode RMode = ReorderingModes[OpIdx];
+
+      // The linearized opcode of the operand at OpIdx, Lane.
+      bool OpIdxAPO = getData(OpIdx, Lane).APO;
+
+      const unsigned BestScore = 2;
+      const unsigned GoodScore = 1;
+
+      // The best operand index and its score.
+      // Sometimes we have more than one option (e.g., Opcode and Undefs), so we
+      // are using the score to differentiate between the two.
+      struct BestOpData {
+        Optional<unsigned> Idx = None;
+        unsigned Score = 0;
+      } BestOp;
+
+      // Iterate through all unused operands and look for the best.
+      for (unsigned Idx = 0; Idx != NumOperands; ++Idx) {
+        // Get the operand at Idx and Lane.
+        OperandData &OpData = getData(Idx, Lane);
+        Value *Op = OpData.V;
+        bool OpAPO = OpData.APO;
+
+        // Skip already selected operands.
+        if (OpData.IsUsed)
+          continue;
+
+        // Skip if we are trying to move the operand to a position with a
+        // different opcode in the linearized tree form. This would break the
+        // semantics.
+        if (OpAPO != OpIdxAPO)
+          continue;
+
+        // Look for an operand that matches the current mode.
+        switch (RMode) {
+        case ReorderingMode::Load:
+          if (isa<LoadInst>(Op)) {
+            // Figure out which is left and right, so that we can check for
+            // consecutive loads
+            bool LeftToRight = Lane > LastLane;
+            Value *OpLeft = (LeftToRight) ? OpLastLane : Op;
+            Value *OpRight = (LeftToRight) ? Op : OpLastLane;
+            if (isConsecutiveAccess(cast<LoadInst>(OpLeft),
+                                    cast<LoadInst>(OpRight), DL, SE))
+              BestOp.Idx = Idx;
+          }
+          break;
+        case ReorderingMode::Opcode:
+          // We accept both Instructions and Undefs, but with different scores.
+          if ((isa<Instruction>(Op) && isa<Instruction>(OpLastLane) &&
+               cast<Instruction>(Op)->getOpcode() ==
+                   cast<Instruction>(OpLastLane)->getOpcode()) ||
+              (isa<UndefValue>(OpLastLane) && isa<Instruction>(Op)) ||
+              isa<UndefValue>(Op)) {
+            // An instruction has a higher score than an undef.
+            unsigned Score = (isa<UndefValue>(Op)) ? GoodScore : BestScore;
+            if (Score > BestOp.Score) {
+              BestOp.Idx = Idx;
+              BestOp.Score = Score;
+            }
+          }
+          break;
+        case ReorderingMode::Constant:
+          if (isa<Constant>(Op)) {
+            unsigned Score = (isa<UndefValue>(Op)) ? GoodScore : BestScore;
+            if (Score > BestOp.Score) {
+              BestOp.Idx = Idx;
+              BestOp.Score = Score;
+            }
+          }
+          break;
+        case ReorderingMode::Splat:
+          if (Op == OpLastLane)
+            BestOp.Idx = Idx;
+          break;
+        case ReorderingMode::Failed:
+          return None;
+        }
+      }
+
+      if (BestOp.Idx) {
+        getData(BestOp.Idx.getValue(), Lane).IsUsed = true;
+        return BestOp.Idx;
+      }
+      // If we could not find a good match return None.
+      return None;
+    }
+
+    /// Helper for reorderOperandVecs. \Returns the lane that we should start
+    /// reordering from. This is the one which has the least number of operands
+    /// that can freely move about.
+    unsigned getBestLaneToStartReordering() const {
+      unsigned BestLane = 0;
+      unsigned Min = UINT_MAX;
+      for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes;
+           ++Lane) {
+        unsigned NumFreeOps = getMaxNumOperandsThatCanBeReordered(Lane);
+        if (NumFreeOps < Min) {
+          Min = NumFreeOps;
+          BestLane = Lane;
+        }
+      }
+      return BestLane;
+    }
+
+    /// \Returns the maximum number of operands that are allowed to be reordered
+    /// for \p Lane. This is used as a heuristic for selecting the first lane to
+    /// start operand reordering.
+    unsigned getMaxNumOperandsThatCanBeReordered(unsigned Lane) const {
+      unsigned CntTrue = 0;
+      unsigned NumOperands = getNumOperands();
+      // Operands with the same APO can be reordered. We therefore need to count
+      // how many of them we have for each APO, like this: Cnt[APO] = x.
+      // Since we only have two APOs, namely true and false, we can avoid using
+      // a map. Instead we can simply count the number of operands that
+      // correspond to one of them (in this case the 'true' APO), and calculate
+      // the other by subtracting it from the total number of operands.
+      for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx)
+        if (getData(OpIdx, Lane).APO)
+          ++CntTrue;
+      unsigned CntFalse = NumOperands - CntTrue;
+      return std::max(CntTrue, CntFalse);
+    }
+
+    /// Go through the instructions in VL and append their operands.
+    void appendOperandsOfVL(ArrayRef<Value *> VL) {
+      assert(!VL.empty() && "Bad VL");
+      assert((empty() || VL.size() == getNumLanes()) &&
+             "Expected same number of lanes");
+      assert(isa<Instruction>(VL[0]) && "Expected instruction");
+      unsigned NumOperands = cast<Instruction>(VL[0])->getNumOperands();
+      OpsVec.resize(NumOperands);
+      unsigned NumLanes = VL.size();
+      for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+        OpsVec[OpIdx].resize(NumLanes);
+        for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+          assert(isa<Instruction>(VL[Lane]) && "Expected instruction");
+          // Our tree has just 3 nodes: the root and two operands.
+          // It is therefore trivial to get the APO. We only need to check the
+          // opcode of VL[Lane] and whether the operand at OpIdx is the LHS or
+          // RHS operand. The LHS operand of both add and sub is never attached
+          // to an inversese operation in the linearized form, therefore its APO
+          // is false. The RHS is true only if VL[Lane] is an inverse operation.
+
+          // Since operand reordering is performed on groups of commutative
+          // operations or alternating sequences (e.g., +, -), we can safely
+          // tell the inverse operations by checking commutativity.
+          bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
+          bool APO = (OpIdx == 0) ? false : IsInverseOperation;
+          OpsVec[OpIdx][Lane] = {cast<Instruction>(VL[Lane])->getOperand(OpIdx),
+                                 APO, false};
+        }
+      }
+    }
+
+    /// \returns the number of operands.
+    unsigned getNumOperands() const { return OpsVec.size(); }
+
+    /// \returns the number of lanes.
+    unsigned getNumLanes() const { return OpsVec[0].size(); }
+
+    /// \returns the operand value at \p OpIdx and \p Lane.
+    Value *getValue(unsigned OpIdx, unsigned Lane) const {
+      return getData(OpIdx, Lane).V;
+    }
 
+    /// \returns true if the data structure is empty.
+    bool empty() const { return OpsVec.empty(); }
+
+    /// Clears the data.
+    void clear() { OpsVec.clear(); }
+
+    /// \Returns true if there are enough operands identical to \p Op to fill
+    /// the whole vector.
+    /// Note: This modifies the 'IsUsed' flag, so a cleanUsed() must follow.
+    bool shouldBroadcast(Value *Op, unsigned OpIdx, unsigned Lane) {
+      bool OpAPO = getData(OpIdx, Lane).APO;
+      for (unsigned Ln = 0, Lns = getNumLanes(); Ln != Lns; ++Ln) {
+        if (Ln == Lane)
+          continue;
+        // This is set to true if we found a candidate for broadcast at Lane.
+        bool FoundCandidate = false;
+        for (unsigned OpI = 0, OpE = getNumOperands(); OpI != OpE; ++OpI) {
+          OperandData &Data = getData(OpI, Ln);
+          if (Data.APO != OpAPO || Data.IsUsed)
+            continue;
+          if (Data.V == Op) {
+            FoundCandidate = true;
+            Data.IsUsed = true;
+            break;
+          }
+        }
+        if (!FoundCandidate)
+          return false;
+      }
+      return true;
+    }
+
+  public:
+    /// Initialize with all the operands of the instruction vector \p RootVL.
+    VLOperands(ArrayRef<Value *> RootVL, const DataLayout &DL,
+               ScalarEvolution &SE)
+        : DL(DL), SE(SE) {
+      // Append all the operands of RootVL.
+      appendOperandsOfVL(RootVL);
+    }
+
+    /// \Returns a value vector with the operands across all lanes for the
+    /// opearnd at \p OpIdx.
+    ValueList getVL(unsigned OpIdx) const {
+      ValueList OpVL(OpsVec[OpIdx].size());
+      assert(OpsVec[OpIdx].size() == getNumLanes() &&
+             "Expected same num of lanes across all operands");
+      for (unsigned Lane = 0, Lanes = getNumLanes(); Lane != Lanes; ++Lane)
+        OpVL[Lane] = OpsVec[OpIdx][Lane].V;
+      return OpVL;
+    }
+
+    // Performs operand reordering for 2 or more operands.
+    // The original operands are in OrigOps[OpIdx][Lane].
+    // The reordered operands are returned in 'SortedOps[OpIdx][Lane]'.
+    void reorder() {
+      unsigned NumOperands = getNumOperands();
+      unsigned NumLanes = getNumLanes();
+      // Each operand has its own mode. We are using this mode to help us select
+      // the instructions for each lane, so that they match best with the ones
+      // we have selected so far.
+      SmallVector<ReorderingMode, 2> ReorderingModes(NumOperands);
+
+      // This is a greedy single-pass algorithm. We are going over each lane
+      // once and deciding on the best order right away with no back-tracking.
+      // However, in order to increase its effectiveness, we start with the lane
+      // that has operands that can move the least. For example, given the
+      // following lanes:
+      //  Lane 0 : A[0] = B[0] + C[0]   // Visited 3rd
+      //  Lane 1 : A[1] = C[1] - B[1]   // Visited 1st
+      //  Lane 2 : A[2] = B[2] + C[2]   // Visited 2nd
+      //  Lane 3 : A[3] = C[3] - B[3]   // Visited 4th
+      // we will start at Lane 1, since the operands of the subtraction cannot
+      // be reordered. Then we will visit the rest of the lanes in a circular
+      // fashion. That is, Lanes 2, then Lane 0, and finally Lane 3.
+
+      // Find the first lane that we will start our search from.
+      unsigned FirstLane = getBestLaneToStartReordering();
+
+      // Initialize the modes.
+      for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+        Value *OpLane0 = getValue(OpIdx, FirstLane);
+        // Keep track if we have instructions with all the same opcode on one
+        // side.
+        if (isa<LoadInst>(OpLane0))
+          ReorderingModes[OpIdx] = ReorderingMode::Load;
+        else if (isa<Instruction>(OpLane0)) {
+          // Check if OpLane0 should be broadcast.
+          if (shouldBroadcast(OpLane0, OpIdx, FirstLane))
+            ReorderingModes[OpIdx] = ReorderingMode::Splat;
+          else
+            ReorderingModes[OpIdx] = ReorderingMode::Opcode;
+        }
+        else if (isa<Constant>(OpLane0))
+          ReorderingModes[OpIdx] = ReorderingMode::Constant;
+        else if (isa<Argument>(OpLane0))
+          // Our best hope is a Splat. It may save some cost in some cases.
+          ReorderingModes[OpIdx] = ReorderingMode::Splat;
+        else
+          // NOTE: This should be unreachable.
+          ReorderingModes[OpIdx] = ReorderingMode::Failed;
+      }
+
+      // If the initial strategy fails for any of the operand indexes, then we
+      // perform reordering again in a second pass. This helps avoid assigning
+      // high priority to the failed strategy, and should improve reordering for
+      // the non-failed operand indexes.
+      for (int Pass = 0; Pass != 2; ++Pass) {
+        // Skip the second pass if the first pass did not fail.
+        bool StrategyFailed = false;
+        // Mark all operand data as free to use.
+        clearUsed();
+        // We keep the original operand order for the FirstLane, so reorder the
+        // rest of the lanes. We are visiting the nodes in a circular fashion,
+        // using FirstLane as the center point and increasing the radius
+        // distance.
+        for (unsigned Distance = 1; Distance != NumLanes; ++Distance) {
+          // Visit the lane on the right and then the lane on the left.
+          for (int Direction : {+1, -1}) {
+            int Lane = FirstLane + Direction * Distance;
+            if (Lane < 0 || Lane >= (int)NumLanes)
+              continue;
+            int LastLane = Lane - Direction;
+            assert(LastLane >= 0 && LastLane < (int)NumLanes &&
+                   "Out of bounds");
+            // Look for a good match for each operand.
+            for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+              // Search for the operand that matches SortedOps[OpIdx][Lane-1].
+              Optional<unsigned> BestIdx =
+                  getBestOperand(OpIdx, Lane, LastLane, ReorderingModes);
+              // By not selecting a value, we allow the operands that follow to
+              // select a better matching value. We will get a non-null value in
+              // the next run of getBestOperand().
+              if (BestIdx) {
+                // Swap the current operand with the one returned by
+                // getBestOperand().
+                swap(OpIdx, BestIdx.getValue(), Lane);
+              } else {
+                // We failed to find a best operand, set mode to 'Failed'.
+                ReorderingModes[OpIdx] = ReorderingMode::Failed;
+                // Enable the second pass.
+                StrategyFailed = true;
+              }
+            }
+          }
+        }
+        // Skip second pass if the strategy did not fail.
+        if (!StrategyFailed)
+          break;
+      }
+    }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+    LLVM_DUMP_METHOD static StringRef getModeStr(ReorderingMode RMode) {
+      switch (RMode) {
+      case ReorderingMode::Load:
+        return "Load";
+      case ReorderingMode::Opcode:
+        return "Opcode";
+      case ReorderingMode::Constant:
+        return "Constant";
+      case ReorderingMode::Splat:
+        return "Splat";
+      case ReorderingMode::Failed:
+        return "Failed";
+      }
+      llvm_unreachable("Unimplemented Reordering Type");
+    }
+
+    LLVM_DUMP_METHOD static raw_ostream &printMode(ReorderingMode RMode,
+                                                   raw_ostream &OS) {
+      return OS << getModeStr(RMode);
+    }
+
+    /// Debug print.
+    LLVM_DUMP_METHOD static void dumpMode(ReorderingMode RMode) {
+      printMode(RMode, dbgs());
+    }
+
+    friend raw_ostream &operator<<(raw_ostream &OS, ReorderingMode RMode) {
+      return printMode(RMode, OS);
+    }
+
+    LLVM_DUMP_METHOD raw_ostream &print(raw_ostream &OS) const {
+      const unsigned Indent = 2;
+      unsigned Cnt = 0;
+      for (const OperandDataVec &OpDataVec : OpsVec) {
+        OS << "Operand " << Cnt++ << "\n";
+        for (const OperandData &OpData : OpDataVec) {
+          OS.indent(Indent) << "{";
+          if (Value *V = OpData.V)
+            OS << *V;
+          else
+            OS << "null";
+          OS << ", APO:" << OpData.APO << "}\n";
+        }
+        OS << "\n";
+      }
+      return OS;
+    }
+
+    /// Debug print.
+    LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
+#endif
+  };
+
+private:
   /// Checks if all users of \p I are the part of the vectorization tree.
   bool areAllUsersVectorized(Instruction *I) const;
 
@@ -613,7 +1125,8 @@ private:
   int getEntryCost(TreeEntry *E);
 
   /// This is the recursive part of buildTree.
-  void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth, int);
+  void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth,
+                     const EdgeInfo &EI);
 
   /// \returns true if the ExtractElement/ExtractValue instructions in \p VL can
   /// be vectorized to use the original vector (or aggregate "bitcast" to a
@@ -631,12 +1144,12 @@ private:
 
   /// \returns the scalarization cost for this type. Scalarization in this
   /// context means the creation of vectors from a group of scalars.
-  int getGatherCost(Type *Ty, const DenseSet<unsigned> &ShuffledIndices);
+  int getGatherCost(Type *Ty, const DenseSet<unsigned> &ShuffledIndices) const;
 
   /// \returns the scalarization cost for this list of values. Assuming that
   /// this subtree gets vectorized, we may need to extract the values from the
   /// roots. This method calculates the cost of extracting the values.
-  int getGatherCost(ArrayRef<Value *> VL);
+  int getGatherCost(ArrayRef<Value *> VL) const;
 
   /// Set the Builder insert point to one after the last instruction in
   /// the bundle
@@ -648,22 +1161,18 @@ private:
 
   /// \returns whether the VectorizableTree is fully vectorizable and will
   /// be beneficial even the tree height is tiny.
-  bool isFullyVectorizableTinyTree();
+  bool isFullyVectorizableTinyTree() const;
 
-  /// \reorder commutative operands in alt shuffle if they result in
-  ///  vectorized code.
-  void reorderAltShuffleOperands(const InstructionsState &S,
-                                 ArrayRef<Value *> VL,
-                                 SmallVectorImpl<Value *> &Left,
-                                 SmallVectorImpl<Value *> &Right);
-
-  /// \reorder commutative operands to get better probability of
+  /// Reorder commutative or alt operands to get better probability of
   /// generating vectorized code.
-  void reorderInputsAccordingToOpcode(unsigned Opcode, ArrayRef<Value *> VL,
-                                      SmallVectorImpl<Value *> &Left,
-                                      SmallVectorImpl<Value *> &Right);
+  static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+                                             SmallVectorImpl<Value *> &Left,
+                                             SmallVectorImpl<Value *> &Right,
+                                             const DataLayout &DL,
+                                             ScalarEvolution &SE);
   struct TreeEntry {
-    TreeEntry(std::vector<TreeEntry> &Container) : Container(Container) {}
+    using VecTreeTy = SmallVector<std::unique_ptr<TreeEntry>, 8>;
+    TreeEntry(VecTreeTy &Container) : Container(Container) {}
 
     /// \returns true if the scalars in VL are equal to this entry.
     bool isSame(ArrayRef<Value *> VL) const {
@@ -696,20 +1205,103 @@ private:
     /// to be a pointer and needs to be able to initialize the child iterator.
     /// Thus we need a reference back to the container to translate the indices
     /// to entries.
-    std::vector<TreeEntry> &Container;
+    VecTreeTy &Container;
 
     /// The TreeEntry index containing the user of this entry.  We can actually
     /// have multiple users so the data structure is not truly a tree.
-    SmallVector<int, 1> UserTreeIndices;
+    SmallVector<EdgeInfo, 1> UserTreeIndices;
+
+    /// The index of this treeEntry in VectorizableTree.
+    int Idx = -1;
+
+  private:
+    /// The operands of each instruction in each lane Operands[op_index][lane].
+    /// Note: This helps avoid the replication of the code that performs the
+    /// reordering of operands during buildTree_rec() and vectorizeTree().
+    SmallVector<ValueList, 2> Operands;
+
+  public:
+    /// Set this bundle's \p OpIdx'th operand to \p OpVL.
+    void setOperand(unsigned OpIdx, ArrayRef<Value *> OpVL,
+                    ArrayRef<unsigned> ReuseShuffleIndices) {
+      if (Operands.size() < OpIdx + 1)
+        Operands.resize(OpIdx + 1);
+      assert(Operands[OpIdx].size() == 0 && "Already resized?");
+      Operands[OpIdx].resize(Scalars.size());
+      for (unsigned Lane = 0, E = Scalars.size(); Lane != E; ++Lane)
+        Operands[OpIdx][Lane] = (!ReuseShuffleIndices.empty())
+                                    ? OpVL[ReuseShuffleIndices[Lane]]
+                                    : OpVL[Lane];
+    }
+
+    /// If there is a user TreeEntry, then set its operand.
+    void trySetUserTEOperand(const EdgeInfo &UserTreeIdx,
+                             ArrayRef<Value *> OpVL,
+                             ArrayRef<unsigned> ReuseShuffleIndices) {
+      if (UserTreeIdx.UserTE)
+        UserTreeIdx.UserTE->setOperand(UserTreeIdx.EdgeIdx, OpVL,
+                                       ReuseShuffleIndices);
+    }
+
+    /// \returns the \p OpIdx operand of this TreeEntry.
+    ValueList &getOperand(unsigned OpIdx) {
+      assert(OpIdx < Operands.size() && "Off bounds");
+      return Operands[OpIdx];
+    }
+
+    /// \return the single \p OpIdx operand.
+    Value *getSingleOperand(unsigned OpIdx) const {
+      assert(OpIdx < Operands.size() && "Off bounds");
+      assert(!Operands[OpIdx].empty() && "No operand available");
+      return Operands[OpIdx][0];
+    }
+
+#ifndef NDEBUG
+    /// Debug printer.
+    LLVM_DUMP_METHOD void dump() const {
+      dbgs() << Idx << ".\n";
+      for (unsigned OpI = 0, OpE = Operands.size(); OpI != OpE; ++OpI) {
+        dbgs() << "Operand " << OpI << ":\n";
+        for (const Value *V : Operands[OpI])
+          dbgs().indent(2) << *V << "\n";
+      }
+      dbgs() << "Scalars: \n";
+      for (Value *V : Scalars)
+        dbgs().indent(2) << *V << "\n";
+      dbgs() << "NeedToGather: " << NeedToGather << "\n";
+      dbgs() << "VectorizedValue: ";
+      if (VectorizedValue)
+        dbgs() << *VectorizedValue;
+      else
+        dbgs() << "NULL";
+      dbgs() << "\n";
+      dbgs() << "ReuseShuffleIndices: ";
+      if (ReuseShuffleIndices.empty())
+        dbgs() << "Emtpy";
+      else
+        for (unsigned Idx : ReuseShuffleIndices)
+          dbgs() << Idx << ", ";
+      dbgs() << "\n";
+      dbgs() << "ReorderIndices: ";
+      for (unsigned Idx : ReorderIndices)
+        dbgs() << Idx << ", ";
+      dbgs() << "\n";
+      dbgs() << "UserTreeIndices: ";
+      for (const auto &EInfo : UserTreeIndices)
+        dbgs() << EInfo << ", ";
+      dbgs() << "\n";
+    }
+#endif
   };
 
   /// Create a new VectorizableTree entry.
-  void newTreeEntry(ArrayRef<Value *> VL, bool Vectorized, int &UserTreeIdx,
-                    ArrayRef<unsigned> ReuseShuffleIndices = None,
-                    ArrayRef<unsigned> ReorderIndices = None) {
-    VectorizableTree.emplace_back(VectorizableTree);
-    int idx = VectorizableTree.size() - 1;
-    TreeEntry *Last = &VectorizableTree[idx];
+  TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized,
+                          const EdgeInfo &UserTreeIdx,
+                          ArrayRef<unsigned> ReuseShuffleIndices = None,
+                          ArrayRef<unsigned> ReorderIndices = None) {
+    VectorizableTree.push_back(llvm::make_unique<TreeEntry>(VectorizableTree));
+    TreeEntry *Last = VectorizableTree.back().get();
+    Last->Idx = VectorizableTree.size() - 1;
     Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
     Last->NeedToGather = !Vectorized;
     Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
@@ -718,25 +1310,44 @@ private:
     if (Vectorized) {
       for (int i = 0, e = VL.size(); i != e; ++i) {
         assert(!getTreeEntry(VL[i]) && "Scalar already in tree!");
-        ScalarToTreeEntry[VL[i]] = idx;
+        ScalarToTreeEntry[VL[i]] = Last->Idx;
       }
     } else {
       MustGather.insert(VL.begin(), VL.end());
     }
 
-    if (UserTreeIdx >= 0)
+    if (UserTreeIdx.UserTE)
       Last->UserTreeIndices.push_back(UserTreeIdx);
-    UserTreeIdx = idx;
+
+    Last->trySetUserTEOperand(UserTreeIdx, VL, ReuseShuffleIndices);
+    return Last;
   }
 
   /// -- Vectorization State --
   /// Holds all of the tree entries.
-  std::vector<TreeEntry> VectorizableTree;
+  TreeEntry::VecTreeTy VectorizableTree;
+
+#ifndef NDEBUG
+  /// Debug printer.
+  LLVM_DUMP_METHOD void dumpVectorizableTree() const {
+    for (unsigned Id = 0, IdE = VectorizableTree.size(); Id != IdE; ++Id) {
+      VectorizableTree[Id]->dump();
+      dbgs() << "\n";
+    }
+  }
+#endif
 
   TreeEntry *getTreeEntry(Value *V) {
     auto I = ScalarToTreeEntry.find(V);
     if (I != ScalarToTreeEntry.end())
-      return &VectorizableTree[I->second];
+      return VectorizableTree[I->second].get();
+    return nullptr;
+  }
+
+  const TreeEntry *getTreeEntry(Value *V) const {
+    auto I = ScalarToTreeEntry.find(V);
+    if (I != ScalarToTreeEntry.end())
+      return VectorizableTree[I->second].get();
     return nullptr;
   }
 
@@ -1246,21 +1857,25 @@ template <> struct GraphTraits<BoUpSLP *> {
   /// NodeRef has to be a pointer per the GraphWriter.
   using NodeRef = TreeEntry *;
 
+  using ContainerTy = BoUpSLP::TreeEntry::VecTreeTy;
+
   /// Add the VectorizableTree to the index iterator to be able to return
   /// TreeEntry pointers.
   struct ChildIteratorType
-      : public iterator_adaptor_base<ChildIteratorType,
-                                     SmallVector<int, 1>::iterator> {
-    std::vector<TreeEntry> &VectorizableTree;
+      : public iterator_adaptor_base<
+            ChildIteratorType, SmallVector<BoUpSLP::EdgeInfo, 1>::iterator> {
+    ContainerTy &VectorizableTree;
 
-    ChildIteratorType(SmallVector<int, 1>::iterator W,
-                      std::vector<TreeEntry> &VT)
+    ChildIteratorType(SmallVector<BoUpSLP::EdgeInfo, 1>::iterator W,
+                      ContainerTy &VT)
         : ChildIteratorType::iterator_adaptor_base(W), VectorizableTree(VT) {}
 
-    NodeRef operator*() { return &VectorizableTree[*I]; }
+    NodeRef operator*() { return I->UserTE; }
   };
 
-  static NodeRef getEntryNode(BoUpSLP &R) { return &R.VectorizableTree[0]; }
+  static NodeRef getEntryNode(BoUpSLP &R) {
+    return R.VectorizableTree[0].get();
+  }
 
   static ChildIteratorType child_begin(NodeRef N) {
     return {N->UserTreeIndices.begin(), N->Container};
@@ -1272,7 +1887,19 @@ template <> struct GraphTraits<BoUpSLP *> {
 
   /// For the node iterator we just need to turn the TreeEntry iterator into a
   /// TreeEntry* iterator so that it dereferences to NodeRef.
-  using nodes_iterator = pointer_iterator<std::vector<TreeEntry>::iterator>;
+  class nodes_iterator {
+    using ItTy = ContainerTy::iterator;
+    ItTy It;
+
+  public:
+    nodes_iterator(const ItTy &It2) : It(It2) {}
+    NodeRef operator*() { return It->get(); }
+    nodes_iterator operator++() {
+      ++It;
+      return *this;
+    }
+    bool operator!=(const nodes_iterator &N2) const { return N2.It != It; }
+  };
 
   static nodes_iterator nodes_begin(BoUpSLP *R) {
     return nodes_iterator(R->VectorizableTree.begin());
@@ -1331,11 +1958,11 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
   UserIgnoreList = UserIgnoreLst;
   if (!allSameType(Roots))
     return;
-  buildTree_rec(Roots, 0, -1);
+  buildTree_rec(Roots, 0, EdgeInfo());
 
   // Collect the values that we need to extract from the tree.
-  for (TreeEntry &EIdx : VectorizableTree) {
-    TreeEntry *Entry = &EIdx;
+  for (auto &TEPtr : VectorizableTree) {
+    TreeEntry *Entry = TEPtr.get();
 
     // No need to handle users of gathered values.
     if (Entry->NeedToGather)
@@ -1393,7 +2020,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
 }
 
 void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
-                            int UserTreeIdx) {
+                            const EdgeInfo &UserTreeIdx) {
   assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
 
   InstructionsState S = getSameOpcode(VL);
@@ -1450,6 +2077,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
     E->UserTreeIndices.push_back(UserTreeIdx);
     LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
                       << ".\n");
+    E->trySetUserTEOperand(UserTreeIdx, VL, None);
     return;
   }
 
@@ -1468,8 +2096,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
 
   // If any of the scalars is marked as a value that needs to stay scalar, then
   // we need to gather the scalars.
+  // The reduction nodes (stored in UserIgnoreList) also should stay scalar.
   for (unsigned i = 0, e = VL.size(); i != e; ++i) {
-    if (MustGather.count(VL[i])) {
+    if (MustGather.count(VL[i]) || is_contained(UserIgnoreList, VL[i])) {
       LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
       newTreeEntry(VL, false, UserTreeIdx);
       return;
@@ -1548,7 +2177,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
           }
         }
 
-      newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+      auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
       LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
 
       for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
@@ -1558,7 +2187,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
           Operands.push_back(cast<PHINode>(j)->getIncomingValueForBlock(
               PH->getIncomingBlock(i)));
 
-        buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+        buildTree_rec(Operands, Depth + 1, {TE, i});
       }
       return;
     }
@@ -1571,6 +2200,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         ++NumOpsWantToKeepOriginalOrder;
         newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx,
                      ReuseShuffleIndicies);
+        // This is a special case, as it does not gather, but at the same time
+        // we are not extending buildTree_rec() towards the operands.
+        ValueList Op0;
+        Op0.assign(VL.size(), VL0->getOperand(0));
+        VectorizableTree.back()->setOperand(0, Op0, ReuseShuffleIndicies);
         return;
       }
       if (!CurrentOrder.empty()) {
@@ -1588,6 +2222,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         ++StoredCurrentOrderAndNum->getSecond();
         newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, ReuseShuffleIndicies,
                      StoredCurrentOrderAndNum->getFirst());
+        // This is a special case, as it does not gather, but at the same time
+        // we are not extending buildTree_rec() towards the operands.
+        ValueList Op0;
+        Op0.assign(VL.size(), VL0->getOperand(0));
+        VectorizableTree.back()->setOperand(0, Op0, ReuseShuffleIndicies);
         return;
       }
       LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
@@ -1693,7 +2332,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
           return;
         }
       }
-      newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+      auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
       LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
 
       for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
@@ -1702,7 +2341,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         for (Value *j : VL)
           Operands.push_back(cast<Instruction>(j)->getOperand(i));
 
-        buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+        buildTree_rec(Operands, Depth + 1, {TE, i});
       }
       return;
     }
@@ -1710,10 +2349,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
     case Instruction::FCmp: {
       // Check that all of the compares have the same predicate.
       CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
+      CmpInst::Predicate SwapP0 = CmpInst::getSwappedPredicate(P0);
       Type *ComparedTy = VL0->getOperand(0)->getType();
       for (unsigned i = 1, e = VL.size(); i < e; ++i) {
         CmpInst *Cmp = cast<CmpInst>(VL[i]);
-        if (Cmp->getPredicate() != P0 ||
+        if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) ||
             Cmp->getOperand(0)->getType() != ComparedTy) {
           BS.cancelScheduling(VL, VL0);
           newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
@@ -1723,20 +2363,34 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         }
       }
 
-      newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+      auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
       LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
 
-      for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
-        ValueList Operands;
-        // Prepare the operand vector.
-        for (Value *j : VL)
-          Operands.push_back(cast<Instruction>(j)->getOperand(i));
-
-        buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+      ValueList Left, Right;
+      if (cast<CmpInst>(VL0)->isCommutative()) {
+        // Commutative predicate - collect + sort operands of the instructions
+        // so that each side is more likely to have the same opcode.
+        assert(P0 == SwapP0 && "Commutative Predicate mismatch");
+        reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
+      } else {
+        // Collect operands - commute if it uses the swapped predicate.
+        for (Value *V : VL) {
+          auto *Cmp = cast<CmpInst>(V);
+          Value *LHS = Cmp->getOperand(0);
+          Value *RHS = Cmp->getOperand(1);
+          if (Cmp->getPredicate() != P0)
+            std::swap(LHS, RHS);
+          Left.push_back(LHS);
+          Right.push_back(RHS);
+        }
       }
+
+      buildTree_rec(Left, Depth + 1, {TE, 0});
+      buildTree_rec(Right, Depth + 1, {TE, 1});
       return;
     }
     case Instruction::Select:
+    case Instruction::FNeg:
     case Instruction::Add:
     case Instruction::FAdd:
     case Instruction::Sub:
@@ -1754,17 +2408,17 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
     case Instruction::AShr:
     case Instruction::And:
     case Instruction::Or:
-    case Instruction::Xor:
-      newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
-      LLVM_DEBUG(dbgs() << "SLP: added a vector of bin op.\n");
+    case Instruction::Xor: {
+      auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+      LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
 
       // Sort operands of the instructions so that each side is more likely to
       // have the same opcode.
       if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
         ValueList Left, Right;
-        reorderInputsAccordingToOpcode(S.getOpcode(), VL, Left, Right);
-        buildTree_rec(Left, Depth + 1, UserTreeIdx);
-        buildTree_rec(Right, Depth + 1, UserTreeIdx);
+        reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
+        buildTree_rec(Left, Depth + 1, {TE, 0});
+        buildTree_rec(Right, Depth + 1, {TE, 1});
         return;
       }
 
@@ -1774,10 +2428,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         for (Value *j : VL)
           Operands.push_back(cast<Instruction>(j)->getOperand(i));
 
-        buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+        buildTree_rec(Operands, Depth + 1, {TE, i});
       }
       return;
-
+    }
     case Instruction::GetElementPtr: {
       // We don't combine GEPs with complicated (nested) indexing.
       for (unsigned j = 0; j < VL.size(); ++j) {
@@ -1815,7 +2469,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         }
       }
 
-      newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+      auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
       LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
       for (unsigned i = 0, e = 2; i < e; ++i) {
         ValueList Operands;
@@ -1823,7 +2477,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         for (Value *j : VL)
           Operands.push_back(cast<Instruction>(j)->getOperand(i));
 
-        buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+        buildTree_rec(Operands, Depth + 1, {TE, i});
       }
       return;
     }
@@ -1837,14 +2491,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
           return;
         }
 
-      newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+      auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
       LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
 
       ValueList Operands;
       for (Value *j : VL)
         Operands.push_back(cast<Instruction>(j)->getOperand(0));
 
-      buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+      buildTree_rec(Operands, Depth + 1, {TE, 0});
       return;
     }
     case Instruction::Call: {
@@ -1860,9 +2514,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         return;
       }
       Function *Int = CI->getCalledFunction();
-      Value *A1I = nullptr;
-      if (hasVectorInstrinsicScalarOpd(ID, 1))
-        A1I = CI->getArgOperand(1);
+      unsigned NumArgs = CI->getNumArgOperands();
+      SmallVector<Value*, 4> ScalarArgs(NumArgs, nullptr);
+      for (unsigned j = 0; j != NumArgs; ++j)
+        if (hasVectorInstrinsicScalarOpd(ID, j))
+          ScalarArgs[j] = CI->getArgOperand(j);
       for (unsigned i = 1, e = VL.size(); i != e; ++i) {
         CallInst *CI2 = dyn_cast<CallInst>(VL[i]);
         if (!CI2 || CI2->getCalledFunction() != Int ||
@@ -1874,16 +2530,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
                             << "\n");
           return;
         }
-        // ctlz,cttz and powi are special intrinsics whose second argument
-        // should be same in order for them to be vectorized.
-        if (hasVectorInstrinsicScalarOpd(ID, 1)) {
-          Value *A1J = CI2->getArgOperand(1);
-          if (A1I != A1J) {
-            BS.cancelScheduling(VL, VL0);
-            newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
-            LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
-                              << " argument " << A1I << "!=" << A1J << "\n");
-            return;
+        // Some intrinsics have scalar arguments and should be same in order for
+        // them to be vectorized.
+        for (unsigned j = 0; j != NumArgs; ++j) {
+          if (hasVectorInstrinsicScalarOpd(ID, j)) {
+            Value *A1J = CI2->getArgOperand(j);
+            if (ScalarArgs[j] != A1J) {
+              BS.cancelScheduling(VL, VL0);
+              newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+              LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
+                                << " argument " << ScalarArgs[j] << "!=" << A1J
+                                << "\n");
+              return;
+            }
           }
         }
         // Verify that the bundle operands are identical between the two calls.
@@ -1899,7 +2558,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         }
       }
 
-      newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+      auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
       for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
         ValueList Operands;
         // Prepare the operand vector.
@@ -1907,11 +2566,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
           CallInst *CI2 = dyn_cast<CallInst>(j);
           Operands.push_back(CI2->getArgOperand(i));
         }
-        buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+        buildTree_rec(Operands, Depth + 1, {TE, i});
       }
       return;
     }
-    case Instruction::ShuffleVector:
+    case Instruction::ShuffleVector: {
       // If this is not an alternate sequence of opcode like add-sub
       // then do not vectorize this instruction.
       if (!S.isAltShuffle()) {
@@ -1920,15 +2579,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
         return;
       }
-      newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+      auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
       LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
 
       // Reorder operands if reordering would enable vectorization.
       if (isa<BinaryOperator>(VL0)) {
         ValueList Left, Right;
-        reorderAltShuffleOperands(S, VL, Left, Right);
-        buildTree_rec(Left, Depth + 1, UserTreeIdx);
-        buildTree_rec(Right, Depth + 1, UserTreeIdx);
+        reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
+        buildTree_rec(Left, Depth + 1, {TE, 0});
+        buildTree_rec(Right, Depth + 1, {TE, 1});
         return;
       }
 
@@ -1938,10 +2597,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         for (Value *j : VL)
           Operands.push_back(cast<Instruction>(j)->getOperand(i));
 
-        buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+        buildTree_rec(Operands, Depth + 1, {TE, i});
       }
       return;
-
+    }
     default:
       BS.cancelScheduling(VL, VL0);
       newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
@@ -2223,6 +2882,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
       int VecCost = TTI->getCmpSelInstrCost(S.getOpcode(), VecTy, MaskTy, VL0);
       return ReuseShuffleCost + VecCost - ScalarCost;
     }
+    case Instruction::FNeg:
     case Instruction::Add:
     case Instruction::FAdd:
     case Instruction::Sub:
@@ -2260,7 +2920,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
       ConstantInt *CInt0 = nullptr;
       for (unsigned i = 0, e = VL.size(); i < e; ++i) {
         const Instruction *I = cast<Instruction>(VL[i]);
-        ConstantInt *CInt = dyn_cast<ConstantInt>(I->getOperand(1));
+        unsigned OpIdx = isa<BinaryOperator>(I) ? 1 : 0;
+        ConstantInt *CInt = dyn_cast<ConstantInt>(I->getOperand(OpIdx));
         if (!CInt) {
           Op2VK = TargetTransformInfo::OK_AnyValue;
           Op2VP = TargetTransformInfo::OP_None;
@@ -2413,31 +3074,31 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
   }
 }
 
-bool BoUpSLP::isFullyVectorizableTinyTree() {
+bool BoUpSLP::isFullyVectorizableTinyTree() const {
   LLVM_DEBUG(dbgs() << "SLP: Check whether the tree with height "
                     << VectorizableTree.size() << " is fully vectorizable .\n");
 
   // We only handle trees of heights 1 and 2.
-  if (VectorizableTree.size() == 1 && !VectorizableTree[0].NeedToGather)
+  if (VectorizableTree.size() == 1 && !VectorizableTree[0]->NeedToGather)
     return true;
 
   if (VectorizableTree.size() != 2)
     return false;
 
   // Handle splat and all-constants stores.
-  if (!VectorizableTree[0].NeedToGather &&
-      (allConstant(VectorizableTree[1].Scalars) ||
-       isSplat(VectorizableTree[1].Scalars)))
+  if (!VectorizableTree[0]->NeedToGather &&
+      (allConstant(VectorizableTree[1]->Scalars) ||
+       isSplat(VectorizableTree[1]->Scalars)))
     return true;
 
   // Gathering cost would be too much for tiny trees.
-  if (VectorizableTree[0].NeedToGather || VectorizableTree[1].NeedToGather)
+  if (VectorizableTree[0]->NeedToGather || VectorizableTree[1]->NeedToGather)
     return false;
 
   return true;
 }
 
-bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() {
+bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() const {
   // We can vectorize the tree if its size is greater than or equal to the
   // minimum size specified by the MinTreeSize command line option.
   if (VectorizableTree.size() >= MinTreeSize)
@@ -2457,19 +3118,19 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() {
   return true;
 }
 
-int BoUpSLP::getSpillCost() {
+int BoUpSLP::getSpillCost() const {
   // Walk from the bottom of the tree to the top, tracking which values are
   // live. When we see a call instruction that is not part of our tree,
   // query TTI to see if there is a cost to keeping values live over it
   // (for example, if spills and fills are required).
-  unsigned BundleWidth = VectorizableTree.front().Scalars.size();
+  unsigned BundleWidth = VectorizableTree.front()->Scalars.size();
   int Cost = 0;
 
   SmallPtrSet<Instruction*, 4> LiveValues;
   Instruction *PrevInst = nullptr;
 
-  for (const auto &N : VectorizableTree) {
-    Instruction *Inst = dyn_cast<Instruction>(N.Scalars[0]);
+  for (const auto &TEPtr : VectorizableTree) {
+    Instruction *Inst = dyn_cast<Instruction>(TEPtr->Scalars[0]);
     if (!Inst)
       continue;
 
@@ -2494,6 +3155,7 @@ int BoUpSLP::getSpillCost() {
     });
 
     // Now find the sequence of instructions between PrevInst and Inst.
+    unsigned NumCalls = 0;
     BasicBlock::reverse_iterator InstIt = ++Inst->getIterator().getReverse(),
                                  PrevInstIt =
                                      PrevInst->getIterator().getReverse();
@@ -2506,16 +3168,19 @@ int BoUpSLP::getSpillCost() {
       // Debug informations don't impact spill cost.
       if ((isa<CallInst>(&*PrevInstIt) &&
            !isa<DbgInfoIntrinsic>(&*PrevInstIt)) &&
-          &*PrevInstIt != PrevInst) {
-        SmallVector<Type*, 4> V;
-        for (auto *II : LiveValues)
-          V.push_back(VectorType::get(II->getType(), BundleWidth));
-        Cost += TTI->getCostOfKeepingLiveOverCall(V);
-      }
+          &*PrevInstIt != PrevInst)
+        NumCalls++;
 
       ++PrevInstIt;
     }
 
+    if (NumCalls) {
+      SmallVector<Type*, 4> V;
+      for (auto *II : LiveValues)
+        V.push_back(VectorType::get(II->getType(), BundleWidth));
+      Cost += NumCalls * TTI->getCostOfKeepingLiveOverCall(V);
+    }
+
     PrevInst = Inst;
   }
 
@@ -2527,10 +3192,10 @@ int BoUpSLP::getTreeCost() {
   LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
                     << VectorizableTree.size() << ".\n");
 
-  unsigned BundleWidth = VectorizableTree[0].Scalars.size();
+  unsigned BundleWidth = VectorizableTree[0]->Scalars.size();
 
   for (unsigned I = 0, E = VectorizableTree.size(); I < E; ++I) {
-    TreeEntry &TE = VectorizableTree[I];
+    TreeEntry &TE = *VectorizableTree[I].get();
 
     // We create duplicate tree entries for gather sequences that have multiple
     // uses. However, we should not compute the cost of duplicate sequences.
@@ -2545,10 +3210,11 @@ int BoUpSLP::getTreeCost() {
     // existing heuristics based on tree size may yield different results.
     //
     if (TE.NeedToGather &&
-        std::any_of(std::next(VectorizableTree.begin(), I + 1),
-                    VectorizableTree.end(), [TE](TreeEntry &Entry) {
-                      return Entry.NeedToGather && Entry.isSame(TE.Scalars);
-                    }))
+        std::any_of(
+            std::next(VectorizableTree.begin(), I + 1), VectorizableTree.end(),
+            [TE](const std::unique_ptr<TreeEntry> &EntryPtr) {
+              return EntryPtr->NeedToGather && EntryPtr->isSame(TE.Scalars);
+            }))
       continue;
 
     int C = getEntryCost(&TE);
@@ -2575,7 +3241,7 @@ int BoUpSLP::getTreeCost() {
     // extend the extracted value back to the original type. Here, we account
     // for the extract and the added cost of the sign extend if needed.
     auto *VecTy = VectorType::get(EU.Scalar->getType(), BundleWidth);
-    auto *ScalarRoot = VectorizableTree[0].Scalars[0];
+    auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
     if (MinBWs.count(ScalarRoot)) {
       auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
       auto Extend =
@@ -2608,17 +3274,17 @@ int BoUpSLP::getTreeCost() {
 }
 
 int BoUpSLP::getGatherCost(Type *Ty,
-                           const DenseSet<unsigned> &ShuffledIndices) {
+                           const DenseSet<unsigned> &ShuffledIndices) const {
   int Cost = 0;
   for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
     if (!ShuffledIndices.count(i))
       Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
   if (!ShuffledIndices.empty())
-      Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
+    Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
   return Cost;
 }
 
-int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) {
+int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
   // Find the type of the operands in VL.
   Type *ScalarTy = VL[0]->getType();
   if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
@@ -2638,221 +3304,19 @@ int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) {
   return getGatherCost(VecTy, ShuffledElements);
 }
 
-// Reorder commutative operations in alternate shuffle if the resulting vectors
-// are consecutive loads. This would allow us to vectorize the tree.
-// If we have something like-
-// load a[0] - load b[0]
-// load b[1] + load a[1]
-// load a[2] - load b[2]
-// load a[3] + load b[3]
-// Reordering the second load b[1]  load a[1] would allow us to vectorize this
-// code.
-void BoUpSLP::reorderAltShuffleOperands(const InstructionsState &S,
-                                        ArrayRef<Value *> VL,
-                                        SmallVectorImpl<Value *> &Left,
-                                        SmallVectorImpl<Value *> &Right) {
-  // Push left and right operands of binary operation into Left and Right
-  for (Value *V : VL) {
-    auto *I = cast<Instruction>(V);
-    assert(S.isOpcodeOrAlt(I) && "Incorrect instruction in vector");
-    Left.push_back(I->getOperand(0));
-    Right.push_back(I->getOperand(1));
-  }
-
-  // Reorder if we have a commutative operation and consecutive access
-  // are on either side of the alternate instructions.
-  for (unsigned j = 0; j < VL.size() - 1; ++j) {
-    if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
-      if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
-        Instruction *VL1 = cast<Instruction>(VL[j]);
-        Instruction *VL2 = cast<Instruction>(VL[j + 1]);
-        if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) {
-          std::swap(Left[j], Right[j]);
-          continue;
-        } else if (VL2->isCommutative() &&
-                   isConsecutiveAccess(L, L1, *DL, *SE)) {
-          std::swap(Left[j + 1], Right[j + 1]);
-          continue;
-        }
-        // else unchanged
-      }
-    }
-    if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
-      if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
-        Instruction *VL1 = cast<Instruction>(VL[j]);
-        Instruction *VL2 = cast<Instruction>(VL[j + 1]);
-        if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) {
-          std::swap(Left[j], Right[j]);
-          continue;
-        } else if (VL2->isCommutative() &&
-                   isConsecutiveAccess(L, L1, *DL, *SE)) {
-          std::swap(Left[j + 1], Right[j + 1]);
-          continue;
-        }
-        // else unchanged
-      }
-    }
-  }
-}
-
-// Return true if I should be commuted before adding it's left and right
-// operands to the arrays Left and Right.
-//
-// The vectorizer is trying to either have all elements one side being
-// instruction with the same opcode to enable further vectorization, or having
-// a splat to lower the vectorizing cost.
-static bool shouldReorderOperands(
-    int i, unsigned Opcode, Instruction &I, ArrayRef<Value *> Left,
-    ArrayRef<Value *> Right, bool AllSameOpcodeLeft, bool AllSameOpcodeRight,
-    bool SplatLeft, bool SplatRight, Value *&VLeft, Value *&VRight) {
-  VLeft = I.getOperand(0);
-  VRight = I.getOperand(1);
-  // If we have "SplatRight", try to see if commuting is needed to preserve it.
-  if (SplatRight) {
-    if (VRight == Right[i - 1])
-      // Preserve SplatRight
-      return false;
-    if (VLeft == Right[i - 1]) {
-      // Commuting would preserve SplatRight, but we don't want to break
-      // SplatLeft either, i.e. preserve the original order if possible.
-      // (FIXME: why do we care?)
-      if (SplatLeft && VLeft == Left[i - 1])
-        return false;
-      return true;
-    }
-  }
-  // Symmetrically handle Right side.
-  if (SplatLeft) {
-    if (VLeft == Left[i - 1])
-      // Preserve SplatLeft
-      return false;
-    if (VRight == Left[i - 1])
-      return true;
-  }
-
-  Instruction *ILeft = dyn_cast<Instruction>(VLeft);
-  Instruction *IRight = dyn_cast<Instruction>(VRight);
-
-  // If we have "AllSameOpcodeRight", try to see if the left operands preserves
-  // it and not the right, in this case we want to commute.
-  if (AllSameOpcodeRight) {
-    unsigned RightPrevOpcode = cast<Instruction>(Right[i - 1])->getOpcode();
-    if (IRight && RightPrevOpcode == IRight->getOpcode())
-      // Do not commute, a match on the right preserves AllSameOpcodeRight
-      return false;
-    if (ILeft && RightPrevOpcode == ILeft->getOpcode()) {
-      // We have a match and may want to commute, but first check if there is
-      // not also a match on the existing operands on the Left to preserve
-      // AllSameOpcodeLeft, i.e. preserve the original order if possible.
-      // (FIXME: why do we care?)
-      if (AllSameOpcodeLeft && ILeft &&
-          cast<Instruction>(Left[i - 1])->getOpcode() == ILeft->getOpcode())
-        return false;
-      return true;
-    }
-  }
-  // Symmetrically handle Left side.
-  if (AllSameOpcodeLeft) {
-    unsigned LeftPrevOpcode = cast<Instruction>(Left[i - 1])->getOpcode();
-    if (ILeft && LeftPrevOpcode == ILeft->getOpcode())
-      return false;
-    if (IRight && LeftPrevOpcode == IRight->getOpcode())
-      return true;
-  }
-  return false;
-}
-
-void BoUpSLP::reorderInputsAccordingToOpcode(unsigned Opcode,
-                                             ArrayRef<Value *> VL,
-                                             SmallVectorImpl<Value *> &Left,
-                                             SmallVectorImpl<Value *> &Right) {
-  if (!VL.empty()) {
-    // Peel the first iteration out of the loop since there's nothing
-    // interesting to do anyway and it simplifies the checks in the loop.
-    auto *I = cast<Instruction>(VL[0]);
-    Value *VLeft = I->getOperand(0);
-    Value *VRight = I->getOperand(1);
-    if (!isa<Instruction>(VRight) && isa<Instruction>(VLeft))
-      // Favor having instruction to the right. FIXME: why?
-      std::swap(VLeft, VRight);
-    Left.push_back(VLeft);
-    Right.push_back(VRight);
-  }
-
-  // Keep track if we have instructions with all the same opcode on one side.
-  bool AllSameOpcodeLeft = isa<Instruction>(Left[0]);
-  bool AllSameOpcodeRight = isa<Instruction>(Right[0]);
-  // Keep track if we have one side with all the same value (broadcast).
-  bool SplatLeft = true;
-  bool SplatRight = true;
-
-  for (unsigned i = 1, e = VL.size(); i != e; ++i) {
-    Instruction *I = cast<Instruction>(VL[i]);
-    assert(((I->getOpcode() == Opcode && I->isCommutative()) ||
-            (I->getOpcode() != Opcode && Instruction::isCommutative(Opcode))) &&
-           "Can only process commutative instruction");
-    // Commute to favor either a splat or maximizing having the same opcodes on
-    // one side.
-    Value *VLeft;
-    Value *VRight;
-    if (shouldReorderOperands(i, Opcode, *I, Left, Right, AllSameOpcodeLeft,
-                              AllSameOpcodeRight, SplatLeft, SplatRight, VLeft,
-                              VRight)) {
-      Left.push_back(VRight);
-      Right.push_back(VLeft);
-    } else {
-      Left.push_back(VLeft);
-      Right.push_back(VRight);
-    }
-    // Update Splat* and AllSameOpcode* after the insertion.
-    SplatRight = SplatRight && (Right[i - 1] == Right[i]);
-    SplatLeft = SplatLeft && (Left[i - 1] == Left[i]);
-    AllSameOpcodeLeft = AllSameOpcodeLeft && isa<Instruction>(Left[i]) &&
-                        (cast<Instruction>(Left[i - 1])->getOpcode() ==
-                         cast<Instruction>(Left[i])->getOpcode());
-    AllSameOpcodeRight = AllSameOpcodeRight && isa<Instruction>(Right[i]) &&
-                         (cast<Instruction>(Right[i - 1])->getOpcode() ==
-                          cast<Instruction>(Right[i])->getOpcode());
-  }
-
-  // If one operand end up being broadcast, return this operand order.
-  if (SplatRight || SplatLeft)
+// Perform operand reordering on the instructions in VL and return the reordered
+// operands in Left and Right.
+void BoUpSLP::reorderInputsAccordingToOpcode(
+    ArrayRef<Value *> VL, SmallVectorImpl<Value *> &Left,
+    SmallVectorImpl<Value *> &Right, const DataLayout &DL,
+    ScalarEvolution &SE) {
+  if (VL.empty())
     return;
-
-  // Finally check if we can get longer vectorizable chain by reordering
-  // without breaking the good operand order detected above.
-  // E.g. If we have something like-
-  // load a[0]  load b[0]
-  // load b[1]  load a[1]
-  // load a[2]  load b[2]
-  // load a[3]  load b[3]
-  // Reordering the second load b[1]  load a[1] would allow us to vectorize
-  // this code and we still retain AllSameOpcode property.
-  // FIXME: This load reordering might break AllSameOpcode in some rare cases
-  // such as-
-  // add a[0],c[0]  load b[0]
-  // add a[1],c[2]  load b[1]
-  // b[2]           load b[2]
-  // add a[3],c[3]  load b[3]
-  for (unsigned j = 0, e = VL.size() - 1; j < e; ++j) {
-    if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
-      if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
-        if (isConsecutiveAccess(L, L1, *DL, *SE)) {
-          std::swap(Left[j + 1], Right[j + 1]);
-          continue;
-        }
-      }
-    }
-    if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
-      if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
-        if (isConsecutiveAccess(L, L1, *DL, *SE)) {
-          std::swap(Left[j + 1], Right[j + 1]);
-          continue;
-        }
-      }
-    }
-    // else unchanged
-  }
+  VLOperands Ops(VL, DL, SE);
+  // Reorder the operands in place.
+  Ops.reorder();
+  Left = Ops.getVL(0);
+  Right = Ops.getVL(1);
 }
 
 void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL,
@@ -3082,13 +3546,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
           continue;
         }
 
-        // Prepare the operand vector.
-        for (Value *V : E->Scalars)
-          Operands.push_back(cast<PHINode>(V)->getIncomingValueForBlock(IBB));
-
         Builder.SetInsertPoint(IBB->getTerminator());
         Builder.SetCurrentDebugLocation(PH->getDebugLoc());
-        Value *Vec = vectorizeTree(Operands);
+        Value *Vec = vectorizeTree(E->getOperand(i));
         NewPhi->addIncoming(Vec, IBB);
       }
 
@@ -3099,7 +3559,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
 
     case Instruction::ExtractElement: {
       if (!E->NeedToGather) {
-        Value *V = VL0->getOperand(0);
+        Value *V = E->getSingleOperand(0);
         if (!E->ReorderIndices.empty()) {
           OrdersType Mask;
           inversePermutation(E->ReorderIndices, Mask);
@@ -3132,11 +3592,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
     }
     case Instruction::ExtractValue: {
       if (!E->NeedToGather) {
-        LoadInst *LI = cast<LoadInst>(VL0->getOperand(0));
+        LoadInst *LI = cast<LoadInst>(E->getSingleOperand(0));
         Builder.SetInsertPoint(LI);
         PointerType *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace());
         Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy);
-        LoadInst *V = Builder.CreateAlignedLoad(Ptr, LI->getAlignment());
+        LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlignment());
         Value *NewV = propagateMetadata(V, E->Scalars);
         if (!E->ReorderIndices.empty()) {
           OrdersType Mask;
@@ -3177,13 +3637,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
     case Instruction::Trunc:
     case Instruction::FPTrunc:
     case Instruction::BitCast: {
-      ValueList INVL;
-      for (Value *V : E->Scalars)
-        INVL.push_back(cast<Instruction>(V)->getOperand(0));
-
       setInsertPointAfterBundle(E->Scalars, S);
 
-      Value *InVec = vectorizeTree(INVL);
+      Value *InVec = vectorizeTree(E->getOperand(0));
 
       if (E->VectorizedValue) {
         LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
@@ -3202,16 +3658,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
     }
     case Instruction::FCmp:
     case Instruction::ICmp: {
-      ValueList LHSV, RHSV;
-      for (Value *V : E->Scalars) {
-        LHSV.push_back(cast<Instruction>(V)->getOperand(0));
-        RHSV.push_back(cast<Instruction>(V)->getOperand(1));
-      }
-
       setInsertPointAfterBundle(E->Scalars, S);
 
-      Value *L = vectorizeTree(LHSV);
-      Value *R = vectorizeTree(RHSV);
+      Value *L = vectorizeTree(E->getOperand(0));
+      Value *R = vectorizeTree(E->getOperand(1));
 
       if (E->VectorizedValue) {
         LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
@@ -3235,31 +3685,49 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       return V;
     }
     case Instruction::Select: {
-      ValueList TrueVec, FalseVec, CondVec;
-      for (Value *V : E->Scalars) {
-        CondVec.push_back(cast<Instruction>(V)->getOperand(0));
-        TrueVec.push_back(cast<Instruction>(V)->getOperand(1));
-        FalseVec.push_back(cast<Instruction>(V)->getOperand(2));
+      setInsertPointAfterBundle(E->Scalars, S);
+
+      Value *Cond = vectorizeTree(E->getOperand(0));
+      Value *True = vectorizeTree(E->getOperand(1));
+      Value *False = vectorizeTree(E->getOperand(2));
+
+      if (E->VectorizedValue) {
+        LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
+        return E->VectorizedValue;
       }
 
+      Value *V = Builder.CreateSelect(Cond, True, False);
+      if (NeedToShuffleReuses) {
+        V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
+                                        E->ReuseShuffleIndices, "shuffle");
+      }
+      E->VectorizedValue = V;
+      ++NumVectorInstructions;
+      return V;
+    }
+    case Instruction::FNeg: {
       setInsertPointAfterBundle(E->Scalars, S);
 
-      Value *Cond = vectorizeTree(CondVec);
-      Value *True = vectorizeTree(TrueVec);
-      Value *False = vectorizeTree(FalseVec);
+      Value *Op = vectorizeTree(E->getOperand(0));
 
       if (E->VectorizedValue) {
         LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
         return E->VectorizedValue;
       }
 
-      Value *V = Builder.CreateSelect(Cond, True, False);
+      Value *V = Builder.CreateUnOp(
+          static_cast<Instruction::UnaryOps>(S.getOpcode()), Op);
+      propagateIRFlags(V, E->Scalars, VL0);
+      if (auto *I = dyn_cast<Instruction>(V))
+        V = propagateMetadata(I, E->Scalars);
+
       if (NeedToShuffleReuses) {
         V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
                                         E->ReuseShuffleIndices, "shuffle");
       }
       E->VectorizedValue = V;
       ++NumVectorInstructions;
+
       return V;
     }
     case Instruction::Add:
@@ -3280,21 +3748,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
     case Instruction::And:
     case Instruction::Or:
     case Instruction::Xor: {
-      ValueList LHSVL, RHSVL;
-      if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
-        reorderInputsAccordingToOpcode(S.getOpcode(), E->Scalars, LHSVL,
-                                       RHSVL);
-      else
-        for (Value *V : E->Scalars) {
-          auto *I = cast<Instruction>(V);
-          LHSVL.push_back(I->getOperand(0));
-          RHSVL.push_back(I->getOperand(1));
-        }
-
       setInsertPointAfterBundle(E->Scalars, S);
 
-      Value *LHS = vectorizeTree(LHSVL);
-      Value *RHS = vectorizeTree(RHSVL);
+      Value *LHS = vectorizeTree(E->getOperand(0));
+      Value *RHS = vectorizeTree(E->getOperand(1));
 
       if (E->VectorizedValue) {
         LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
@@ -3341,7 +3798,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
         ExternalUses.push_back(ExternalUser(PO, cast<User>(VecPtr), 0));
 
       unsigned Alignment = LI->getAlignment();
-      LI = Builder.CreateLoad(VecPtr);
+      LI = Builder.CreateLoad(VecTy, VecPtr);
       if (!Alignment) {
         Alignment = DL->getABITypeAlignment(ScalarLoadTy);
       }
@@ -3367,13 +3824,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       unsigned Alignment = SI->getAlignment();
       unsigned AS = SI->getPointerAddressSpace();
 
-      ValueList ScalarStoreValues;
-      for (Value *V : E->Scalars)
-        ScalarStoreValues.push_back(cast<StoreInst>(V)->getValueOperand());
-
       setInsertPointAfterBundle(E->Scalars, S);
 
-      Value *VecValue = vectorizeTree(ScalarStoreValues);
+      Value *VecValue = vectorizeTree(E->getOperand(0));
       Value *ScalarPtr = SI->getPointerOperand();
       Value *VecPtr = Builder.CreateBitCast(ScalarPtr, VecTy->getPointerTo(AS));
       StoreInst *ST = Builder.CreateStore(VecValue, VecPtr);
@@ -3400,20 +3853,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
     case Instruction::GetElementPtr: {
       setInsertPointAfterBundle(E->Scalars, S);
 
-      ValueList Op0VL;
-      for (Value *V : E->Scalars)
-        Op0VL.push_back(cast<GetElementPtrInst>(V)->getOperand(0));
-
-      Value *Op0 = vectorizeTree(Op0VL);
+      Value *Op0 = vectorizeTree(E->getOperand(0));
 
       std::vector<Value *> OpVecs;
       for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e;
            ++j) {
-        ValueList OpVL;
-        for (Value *V : E->Scalars)
-          OpVL.push_back(cast<GetElementPtrInst>(V)->getOperand(j));
-
-        Value *OpVec = vectorizeTree(OpVL);
+        Value *OpVec = vectorizeTree(E->getOperand(j));
         OpVecs.push_back(OpVec);
       }
 
@@ -3443,20 +3888,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       std::vector<Value *> OpVecs;
       for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
         ValueList OpVL;
-        // ctlz,cttz and powi are special intrinsics whose second argument is
-        // a scalar. This argument should not be vectorized.
-        if (hasVectorInstrinsicScalarOpd(IID, 1) && j == 1) {
+        // Some intrinsics have scalar arguments. This argument should not be
+        // vectorized.
+        if (hasVectorInstrinsicScalarOpd(IID, j)) {
           CallInst *CEI = cast<CallInst>(VL0);
           ScalarArg = CEI->getArgOperand(j);
           OpVecs.push_back(CEI->getArgOperand(j));
           continue;
         }
-        for (Value *V : E->Scalars) {
-          CallInst *CEI = cast<CallInst>(V);
-          OpVL.push_back(CEI->getArgOperand(j));
-        }
 
-        Value *OpVec = vectorizeTree(OpVL);
+        Value *OpVec = vectorizeTree(E->getOperand(j));
         LLVM_DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n");
         OpVecs.push_back(OpVec);
       }
@@ -3485,7 +3926,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       return V;
     }
     case Instruction::ShuffleVector: {
-      ValueList LHSVL, RHSVL;
       assert(S.isAltShuffle() &&
              ((Instruction::isBinaryOp(S.getOpcode()) &&
                Instruction::isBinaryOp(S.getAltOpcode())) ||
@@ -3495,16 +3935,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
 
       Value *LHS, *RHS;
       if (Instruction::isBinaryOp(S.getOpcode())) {
-        reorderAltShuffleOperands(S, E->Scalars, LHSVL, RHSVL);
         setInsertPointAfterBundle(E->Scalars, S);
-        LHS = vectorizeTree(LHSVL);
-        RHS = vectorizeTree(RHSVL);
+        LHS = vectorizeTree(E->getOperand(0));
+        RHS = vectorizeTree(E->getOperand(1));
       } else {
-        ValueList INVL;
-        for (Value *V : E->Scalars)
-          INVL.push_back(cast<Instruction>(V)->getOperand(0));
         setInsertPointAfterBundle(E->Scalars, S);
-        LHS = vectorizeTree(INVL);
+        LHS = vectorizeTree(E->getOperand(0));
       }
 
       if (E->VectorizedValue) {
@@ -3578,20 +4014,20 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
   }
 
   Builder.SetInsertPoint(&F->getEntryBlock().front());
-  auto *VectorRoot = vectorizeTree(&VectorizableTree[0]);
+  auto *VectorRoot = vectorizeTree(VectorizableTree[0].get());
 
   // If the vectorized tree can be rewritten in a smaller type, we truncate the
   // vectorized root. InstCombine will then rewrite the entire expression. We
   // sign extend the extracted values below.
-  auto *ScalarRoot = VectorizableTree[0].Scalars[0];
+  auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
   if (MinBWs.count(ScalarRoot)) {
     if (auto *I = dyn_cast<Instruction>(VectorRoot))
       Builder.SetInsertPoint(&*++BasicBlock::iterator(I));
-    auto BundleWidth = VectorizableTree[0].Scalars.size();
+    auto BundleWidth = VectorizableTree[0]->Scalars.size();
     auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
     auto *VecTy = VectorType::get(MinTy, BundleWidth);
     auto *Trunc = Builder.CreateTrunc(VectorRoot, VecTy);
-    VectorizableTree[0].VectorizedValue = Trunc;
+    VectorizableTree[0]->VectorizedValue = Trunc;
   }
 
   LLVM_DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size()
@@ -3687,8 +4123,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
   }
 
   // For each vectorized value:
-  for (TreeEntry &EIdx : VectorizableTree) {
-    TreeEntry *Entry = &EIdx;
+  for (auto &TEPtr : VectorizableTree) {
+    TreeEntry *Entry = TEPtr.get();
 
     // No need to handle users of gathered values.
     if (Entry->NeedToGather)
@@ -3721,7 +4157,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
 
   Builder.ClearInsertionPoint();
 
-  return VectorizableTree[0].VectorizedValue;
+  return VectorizableTree[0]->VectorizedValue;
 }
 
 void BoUpSLP::optimizeGatherSequence() {
@@ -3767,10 +4203,10 @@ void BoUpSLP::optimizeGatherSequence() {
 
   // Sort blocks by domination. This ensures we visit a block after all blocks
   // dominating it are visited.
-  std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(),
-                   [this](const DomTreeNode *A, const DomTreeNode *B) {
-    return DT->properlyDominates(A, B);
-  });
+  llvm::stable_sort(CSEWorkList,
+                    [this](const DomTreeNode *A, const DomTreeNode *B) {
+                      return DT->properlyDominates(A, B);
+                    });
 
   // Perform O(N^2) search over the gather sequences and merge identical
   // instructions. TODO: We can further optimize this scan if we split the
@@ -3989,7 +4425,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
                           << "\n");
         return true;
       }
-      UpIter++;
+      ++UpIter;
     }
     if (DownIter != LowerEnd) {
       if (&*DownIter == I) {
@@ -4003,7 +4439,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
                           << "\n");
         return true;
       }
-      DownIter++;
+      ++DownIter;
     }
     assert((UpIter != UpperEnd || DownIter != LowerEnd) &&
            "instruction not found in block");
@@ -4253,7 +4689,7 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
   BS->ScheduleStart = nullptr;
 }
 
-unsigned BoUpSLP::getVectorElementSize(Value *V) {
+unsigned BoUpSLP::getVectorElementSize(Value *V) const {
   // If V is a store, just return the width of the stored value without
   // traversing the expression tree. This is the common case.
   if (auto *Store = dyn_cast<StoreInst>(V))
@@ -4390,7 +4826,7 @@ void BoUpSLP::computeMinimumValueSizes() {
     return;
 
   // We only attempt to truncate integer expressions.
-  auto &TreeRoot = VectorizableTree[0].Scalars;
+  auto &TreeRoot = VectorizableTree[0]->Scalars;
   auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType());
   if (!TreeRootIT)
     return;
@@ -4411,8 +4847,8 @@ void BoUpSLP::computeMinimumValueSizes() {
   // Collect the scalar values of the vectorizable expression. We will use this
   // context to determine which values can be demoted. If we see a truncation,
   // we mark it as seeding another demotion.
-  for (auto &Entry : VectorizableTree)
-    Expr.insert(Entry.Scalars.begin(), Entry.Scalars.end());
+  for (auto &EntryPtr : VectorizableTree)
+    Expr.insert(EntryPtr->Scalars.begin(), EntryPtr->Scalars.end());
 
   // Ensure the roots of the vectorizable tree don't form a cycle. They must
   // have a single external user that is not in the vectorizable tree.
@@ -4746,38 +5182,29 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
   BoUpSLP::ValueSet VectorizedStores;
   bool Changed = false;
 
-  // Do a quadratic search on all of the given stores in reverse order and find
-  // all of the pairs of stores that follow each other.
-  SmallVector<unsigned, 16> IndexQueue;
-  unsigned E = Stores.size();
-  IndexQueue.resize(E - 1);
-  for (unsigned I = E; I > 0; --I) {
-    unsigned Idx = I - 1;
-    // If a store has multiple consecutive store candidates, search Stores
-    // array according to the sequence: Idx-1, Idx+1, Idx-2, Idx+2, ...
-    // This is because usually pairing with immediate succeeding or preceding
-    // candidate create the best chance to find slp vectorization opportunity.
-    unsigned Offset = 1;
-    unsigned Cnt = 0;
-    for (unsigned J = 0; J < E - 1; ++J, ++Offset) {
-      if (Idx >= Offset) {
-        IndexQueue[Cnt] = Idx - Offset;
-        ++Cnt;
-      }
-      if (Idx + Offset < E) {
-        IndexQueue[Cnt] = Idx + Offset;
-        ++Cnt;
-      }
-    }
+  auto &&FindConsecutiveAccess =
+      [this, &Stores, &Heads, &Tails, &ConsecutiveChain] (int K, int Idx) {
+        if (!isConsecutiveAccess(Stores[K], Stores[Idx], *DL, *SE))
+          return false;
 
-    for (auto K : IndexQueue) {
-      if (isConsecutiveAccess(Stores[K], Stores[Idx], *DL, *SE)) {
         Tails.insert(Stores[Idx]);
         Heads.insert(Stores[K]);
         ConsecutiveChain[Stores[K]] = Stores[Idx];
+        return true;
+      };
+
+  // Do a quadratic search on all of the given stores in reverse order and find
+  // all of the pairs of stores that follow each other.
+  int E = Stores.size();
+  for (int Idx = E - 1; Idx >= 0; --Idx) {
+    // If a store has multiple consecutive store candidates, search according
+    // to the sequence: Idx-1, Idx+1, Idx-2, Idx+2, ...
+    // This is because usually pairing with immediate succeeding or preceding
+    // candidate create the best chance to find slp vectorization opportunity.
+    for (int Offset = 1, F = std::max(E - Idx, Idx + 1); Offset < F; ++Offset)
+      if ((Idx >= Offset && FindConsecutiveAccess(Idx - Offset, Idx)) ||
+          (Idx + Offset < E && FindConsecutiveAccess(Idx + Offset, Idx)))
         break;
-      }
-    }
   }
 
   // For stores that start but don't end a link in the chain:
@@ -5740,6 +6167,9 @@ public:
     unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
 
     Value *VectorizedTree = nullptr;
+
+    // FIXME: Fast-math-flags should be set based on the instructions in the
+    //        reduction (not all of 'fast' are required).
     IRBuilder<> Builder(cast<Instruction>(ReductionRoot));
     FastMathFlags Unsafe;
     Unsafe.setFast();
@@ -5929,10 +6359,14 @@ private:
     assert(isPowerOf2_32(ReduxWidth) &&
            "We only handle power-of-two reductions for now");
 
-    if (!IsPairwiseReduction)
+    if (!IsPairwiseReduction) {
+      // FIXME: The builder should use an FMF guard. It should not be hard-coded
+      //        to 'fast'.
+      assert(Builder.getFastMathFlags().isFast() && "Expected 'fast' FMF");
       return createSimpleTargetReduction(
           Builder, TTI, ReductionData.getOpcode(), VectorizedValue,
           ReductionData.getFlags(), ReductionOps.back());
+    }
 
     Value *TmpVec = VectorizedValue;
     for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
@@ -6256,7 +6690,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
     }
 
     // Sort by type.
-    std::stable_sort(Incoming.begin(), Incoming.end(), PhiTypeSorterFunc);
+    llvm::stable_sort(Incoming, PhiTypeSorterFunc);
 
     // Try to vectorize elements base on their type.
     for (SmallVector<Value *, 4>::iterator IncIt = Incoming.begin(),
@@ -6297,7 +6731,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
 
   SmallVector<WeakVH, 8> PostProcessInstructions;
   SmallDenseSet<Instruction *, 4> KeyNodes;
-  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
+  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
     // We may go through BB multiple times so skip the one we have checked.
     if (!VisitedInstrs.insert(&*it).second) {
       if (it->use_empty() && KeyNodes.count(&*it) > 0 &&
diff --git a/lib/Transforms/Vectorize/VPRecipeBuilder.h b/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 15d38ac9c84c..0ca6a6b93cfd 100644
--- a/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -1,9 +1,8 @@
 //===- VPRecipeBuilder.h - Helper class to build recipes --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -30,9 +29,6 @@ class VPRecipeBuilder {
   /// Target Library Info.
   const TargetLibraryInfo *TLI;
 
-  /// Target Transform Info.
-  const TargetTransformInfo *TTI;
-
   /// The legality analysis.
   LoopVectorizationLegality *Legal;
 
@@ -105,11 +101,9 @@ public:
 
 public:
   VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI,
-                  const TargetTransformInfo *TTI,
                   LoopVectorizationLegality *Legal,
                   LoopVectorizationCostModel &CM, VPBuilder &Builder)
-      : OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
-        Builder(Builder) {}
+      : OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), Builder(Builder) {}
 
   /// Check if a recipe can be create for \p I withing the given VF \p Range.
   /// If a recipe can be created, it adds it to \p VPBB.
diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp
index 05a5400beb4e..517d759d7bfc 100644
--- a/lib/Transforms/Vectorize/VPlan.cpp
+++ b/lib/Transforms/Vectorize/VPlan.cpp
@@ -1,9 +1,8 @@
 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -374,10 +373,9 @@ void VPlan::execute(VPTransformState *State) {
   BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB;
   BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor();
   assert(VectorHeaderBB && "Loop preheader does not have a single successor.");
-  BasicBlock *VectorLatchBB = VectorHeaderBB;
 
   // 1. Make room to generate basic-blocks inside loop body if needed.
-  VectorLatchBB = VectorHeaderBB->splitBasicBlock(
+  BasicBlock *VectorLatchBB = VectorHeaderBB->splitBasicBlock(
       VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch");
   Loop *L = State->LI->getLoopFor(VectorHeaderBB);
   L->addBasicBlockToLoop(VectorLatchBB, *State->LI);
@@ -561,6 +559,19 @@ void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
   bumpIndent(1);
   OS << Indent << "\"" << DOT::EscapeString(BasicBlock->getName()) << ":\\n\"";
   bumpIndent(1);
+
+  // Dump the block predicate.
+  const VPValue *Pred = BasicBlock->getPredicate();
+  if (Pred) {
+    OS << " +\n" << Indent << " \"BlockPredicate: ";
+    if (const VPInstruction *PredI = dyn_cast<VPInstruction>(Pred)) {
+      PredI->printAsOperand(OS);
+      OS << " (" << DOT::EscapeString(PredI->getParent()->getName())
+         << ")\\l\"";
+    } else
+      Pred->printAsOperand(OS);
+  }
+
   for (const VPRecipeBase &Recipe : *BasicBlock)
     Recipe.print(OS, Indent);
 
diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h
index 5c1b4a83c30e..8a06412ad590 100644
--- a/lib/Transforms/Vectorize/VPlan.h
+++ b/lib/Transforms/Vectorize/VPlan.h
@@ -1,9 +1,8 @@
 //===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -353,6 +352,9 @@ private:
   /// Successor selector, null for zero or single successor blocks.
   VPValue *CondBit = nullptr;
 
+  /// Current block predicate - null if the block does not need a predicate.
+  VPValue *Predicate = nullptr;
+
   /// Add \p Successor as the last successor to this block.
   void appendSuccessor(VPBlockBase *Successor) {
     assert(Successor && "Cannot add nullptr successor!");
@@ -491,6 +493,12 @@ public:
 
   void setCondBit(VPValue *CV) { CondBit = CV; }
 
+  VPValue *getPredicate() { return Predicate; }
+
+  const VPValue *getPredicate() const { return Predicate; }
+
+  void setPredicate(VPValue *Pred) { Predicate = Pred; }
+
   /// Set a given VPBlockBase \p Successor as the single successor of this
   /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
   /// This VPBlockBase must have no successors.
@@ -521,6 +529,15 @@ public:
       appendPredecessor(Pred);
   }
 
+  /// Remove all the predecessor of this block.
+  void clearPredecessors() { Predecessors.clear(); }
+
+  /// Remove all the successors of this block and set to null its condition bit
+  void clearSuccessors() {
+    Successors.clear();
+    CondBit = nullptr;
+  }
+
   /// The method which generates the output IR that correspond to this
   /// VPBlockBase, thereby "executing" the VPlan.
   virtual void execute(struct VPTransformState *State) = 0;
@@ -1491,6 +1508,41 @@ public:
     From->removeSuccessor(To);
     To->removePredecessor(From);
   }
+
+  /// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge.
+  static bool isBackEdge(const VPBlockBase *FromBlock,
+                         const VPBlockBase *ToBlock, const VPLoopInfo *VPLI) {
+    assert(FromBlock->getParent() == ToBlock->getParent() &&
+           FromBlock->getParent() && "Must be in same region");
+    const VPLoop *FromLoop = VPLI->getLoopFor(FromBlock);
+    const VPLoop *ToLoop = VPLI->getLoopFor(ToBlock);
+    if (!FromLoop || !ToLoop || FromLoop != ToLoop)
+      return false;
+
+    // A back-edge is a branch from the loop latch to its header.
+    return ToLoop->isLoopLatch(FromBlock) && ToBlock == ToLoop->getHeader();
+  }
+
+  /// Returns true if \p Block is a loop latch
+  static bool blockIsLoopLatch(const VPBlockBase *Block,
+                               const VPLoopInfo *VPLInfo) {
+    if (const VPLoop *ParentVPL = VPLInfo->getLoopFor(Block))
+      return ParentVPL->isLoopLatch(Block);
+
+    return false;
+  }
+
+  /// Count and return the number of succesors of \p PredBlock excluding any
+  /// backedges.
+  static unsigned countSuccessorsNoBE(VPBlockBase *PredBlock,
+                                      VPLoopInfo *VPLI) {
+    unsigned Count = 0;
+    for (VPBlockBase *SuccBlock : PredBlock->getSuccessors()) {
+      if (!VPBlockUtils::isBackEdge(PredBlock, SuccBlock, VPLI))
+        Count++;
+    }
+    return Count;
+  }
 };
 
 class VPInterleavedAccessInfo {
diff --git a/lib/Transforms/Vectorize/VPlanDominatorTree.h b/lib/Transforms/Vectorize/VPlanDominatorTree.h
index 1b81097b6d31..19f5d2c00c60 100644
--- a/lib/Transforms/Vectorize/VPlanDominatorTree.h
+++ b/lib/Transforms/Vectorize/VPlanDominatorTree.h
@@ -1,9 +1,8 @@
 //===-- VPlanDominatorTree.h ------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 0f42694e193b..df96f67288f1 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -1,9 +1,8 @@
 //===-- VPlanHCFGBuilder.cpp ----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -64,7 +63,9 @@ private:
   void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
   void fixPhiNodes();
   VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
+#ifndef NDEBUG
   bool isExternalDef(Value *Val);
+#endif
   VPValue *getOrCreateVPOperand(Value *IRVal);
   void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB);
 
@@ -119,6 +120,7 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
   return VPBB;
 }
 
+#ifndef NDEBUG
 // Return true if \p Val is considered an external definition. An external
 // definition is either:
 // 1. A Value that is not an Instruction. This will be refined in the future.
@@ -154,6 +156,7 @@ bool PlainCFGBuilder::isExternalDef(Value *Val) {
   // Check whether Instruction definition is in loop body.
   return !TheLoop->contains(Inst);
 }
+#endif
 
 // Create a new VPValue or retrieve an existing one for the Instruction's
 // operand \p IRVal. This function must only be used to create/retrieve VPValues
diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
index 3f11dcb5164d..238ee7e6347c 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
+++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
@@ -1,9 +1,8 @@
 //===-- VPlanHCFGBuilder.h --------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp b/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
index 3ad7fc7e7b96..7ed7d21b6caa 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
+++ b/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
@@ -1,9 +1,8 @@
 //===-- VPlanHCFGTransforms.cpp - Utility VPlan to VPlan transforms -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Transforms/Vectorize/VPlanHCFGTransforms.h b/lib/Transforms/Vectorize/VPlanHCFGTransforms.h
index ae549c6871b3..79a23c33184f 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGTransforms.h
+++ b/lib/Transforms/Vectorize/VPlanHCFGTransforms.h
@@ -1,9 +1,8 @@
 //===- VPlanHCFGTransforms.h - Utility VPlan to VPlan transforms ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Transforms/Vectorize/VPlanLoopInfo.h b/lib/Transforms/Vectorize/VPlanLoopInfo.h
index 5c2485fc2145..5208f2d58e2b 100644
--- a/lib/Transforms/Vectorize/VPlanLoopInfo.h
+++ b/lib/Transforms/Vectorize/VPlanLoopInfo.h
@@ -1,9 +1,8 @@
 //===-- VPLoopInfo.h --------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Transforms/Vectorize/VPlanPredicator.cpp b/lib/Transforms/Vectorize/VPlanPredicator.cpp
new file mode 100644
index 000000000000..7a80f3ff80a5
--- /dev/null
+++ b/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -0,0 +1,248 @@
+//===-- VPlanPredicator.cpp -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the VPlanPredicator class which contains the public
+/// interfaces to predicate and linearize the VPlan region.
+///
+//===----------------------------------------------------------------------===//
+
+#include "VPlanPredicator.h"
+#include "VPlan.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "VPlanPredicator"
+
+using namespace llvm;
+
+// Generate VPInstructions at the beginning of CurrBB that calculate the
+// predicate being propagated from PredBB to CurrBB depending on the edge type
+// between them. For example if:
+//  i.  PredBB is controlled by predicate %BP, and
+//  ii. The edge PredBB->CurrBB is the false edge, controlled by the condition
+//  bit value %CBV then this function will generate the following two
+//  VPInstructions at the start of CurrBB:
+//   %IntermediateVal = not %CBV
+//   %FinalVal        = and %BP %IntermediateVal
+// It returns %FinalVal.
+VPValue *VPlanPredicator::getOrCreateNotPredicate(VPBasicBlock *PredBB,
+                                                  VPBasicBlock *CurrBB) {
+  VPValue *CBV = PredBB->getCondBit();
+
+  // Set the intermediate value - this is either 'CBV', or 'not CBV'
+  // depending on the edge type.
+  EdgeType ET = getEdgeTypeBetween(PredBB, CurrBB);
+  VPValue *IntermediateVal = nullptr;
+  switch (ET) {
+  case EdgeType::TRUE_EDGE:
+    // CurrBB is the true successor of PredBB - nothing to do here.
+    IntermediateVal = CBV;
+    break;
+
+  case EdgeType::FALSE_EDGE:
+    // CurrBB is the False successor of PredBB - compute not of CBV.
+    IntermediateVal = Builder.createNot(CBV);
+    break;
+  }
+
+  // Now AND intermediate value with PredBB's block predicate if it has one.
+  VPValue *BP = PredBB->getPredicate();
+  if (BP)
+    return Builder.createAnd(BP, IntermediateVal);
+  else
+    return IntermediateVal;
+}
+
+// Generate a tree of ORs for all IncomingPredicates in  WorkList.
+// Note: This function destroys the original Worklist.
+//
+// P1 P2 P3 P4 P5
+//  \ /   \ /  /
+//  OR1   OR2 /
+//    \    | /
+//     \   +/-+
+//      \  /  |
+//       OR3  |
+//         \  |
+//          OR4 <- Returns this
+//           |
+//
+// The algorithm uses a worklist of predicates as its main data structure.
+// We pop a pair of values from the front (e.g. P1 and P2), generate an OR
+// (in this example OR1), and push it back. In this example the worklist
+// contains {P3, P4, P5, OR1}.
+// The process iterates until we have only one element in the Worklist (OR4).
+// The last element is the root predicate which is returned.
+VPValue *VPlanPredicator::genPredicateTree(std::list<VPValue *> &Worklist) {
+  if (Worklist.empty())
+    return nullptr;
+
+  // The worklist initially contains all the leaf nodes. Initialize the tree
+  // using them.
+  while (Worklist.size() >= 2) {
+    // Pop a pair of values from the front.
+    VPValue *LHS = Worklist.front();
+    Worklist.pop_front();
+    VPValue *RHS = Worklist.front();
+    Worklist.pop_front();
+
+    // Create an OR of these values.
+    VPValue *Or = Builder.createOr(LHS, RHS);
+
+    // Push OR to the back of the worklist.
+    Worklist.push_back(Or);
+  }
+
+  assert(Worklist.size() == 1 && "Expected 1 item in worklist");
+
+  // The root is the last node in the worklist.
+  VPValue *Root = Worklist.front();
+
+  // This root needs to replace the existing block predicate. This is done in
+  // the caller function.
+  return Root;
+}
+
+// Return whether the edge FromBlock -> ToBlock is a TRUE_EDGE or FALSE_EDGE
+VPlanPredicator::EdgeType
+VPlanPredicator::getEdgeTypeBetween(VPBlockBase *FromBlock,
+                                    VPBlockBase *ToBlock) {
+  unsigned Count = 0;
+  for (VPBlockBase *SuccBlock : FromBlock->getSuccessors()) {
+    if (SuccBlock == ToBlock) {
+      assert(Count < 2 && "Switch not supported currently");
+      return (Count == 0) ? EdgeType::TRUE_EDGE : EdgeType::FALSE_EDGE;
+    }
+    Count++;
+  }
+
+  llvm_unreachable("Broken getEdgeTypeBetween");
+}
+
+// Generate all predicates needed for CurrBlock by going through its immediate
+// predecessor blocks.
+void VPlanPredicator::createOrPropagatePredicates(VPBlockBase *CurrBlock,
+                                                  VPRegionBlock *Region) {
+  // Blocks that dominate region exit inherit the predicate from the region.
+  // Return after setting the predicate.
+  if (VPDomTree.dominates(CurrBlock, Region->getExit())) {
+    VPValue *RegionBP = Region->getPredicate();
+    CurrBlock->setPredicate(RegionBP);
+    return;
+  }
+
+  // Collect all incoming predicates in a worklist.
+  std::list<VPValue *> IncomingPredicates;
+
+  // Set the builder's insertion point to the top of the current BB
+  VPBasicBlock *CurrBB = cast<VPBasicBlock>(CurrBlock->getEntryBasicBlock());
+  Builder.setInsertPoint(CurrBB, CurrBB->begin());
+
+  // For each predecessor, generate the VPInstructions required for
+  // computing 'BP AND (not) CBV" at the top of CurrBB.
+  // Collect the outcome of this calculation for all predecessors
+  // into IncomingPredicates.
+  for (VPBlockBase *PredBlock : CurrBlock->getPredecessors()) {
+    // Skip back-edges
+    if (VPBlockUtils::isBackEdge(PredBlock, CurrBlock, VPLI))
+      continue;
+
+    VPValue *IncomingPredicate = nullptr;
+    unsigned NumPredSuccsNoBE =
+        VPBlockUtils::countSuccessorsNoBE(PredBlock, VPLI);
+
+    // If there is an unconditional branch to the currBB, then we don't create
+    // edge predicates. We use the predecessor's block predicate instead.
+    if (NumPredSuccsNoBE == 1)
+      IncomingPredicate = PredBlock->getPredicate();
+    else if (NumPredSuccsNoBE == 2) {
+      // Emit recipes into CurrBlock if required
+      assert(isa<VPBasicBlock>(PredBlock) && "Only BBs have multiple exits");
+      IncomingPredicate =
+          getOrCreateNotPredicate(cast<VPBasicBlock>(PredBlock), CurrBB);
+    } else
+      llvm_unreachable("FIXME: switch statement ?");
+
+    if (IncomingPredicate)
+      IncomingPredicates.push_back(IncomingPredicate);
+  }
+
+  // Logically OR all incoming predicates by building the Predicate Tree.
+  VPValue *Predicate = genPredicateTree(IncomingPredicates);
+
+  // Now update the block's predicate with the new one.
+  CurrBlock->setPredicate(Predicate);
+}
+
+// Generate all predicates needed for Region.
+void VPlanPredicator::predicateRegionRec(VPRegionBlock *Region) {
+  VPBasicBlock *EntryBlock = cast<VPBasicBlock>(Region->getEntry());
+  ReversePostOrderTraversal<VPBlockBase *> RPOT(EntryBlock);
+
+  // Generate edge predicates and append them to the block predicate. RPO is
+  // necessary since the predecessor blocks' block predicate needs to be set
+  // before the current block's block predicate can be computed.
+  for (VPBlockBase *Block : make_range(RPOT.begin(), RPOT.end())) {
+    // TODO: Handle nested regions once we start generating the same.
+    assert(!isa<VPRegionBlock>(Block) && "Nested region not expected");
+    createOrPropagatePredicates(Block, Region);
+  }
+}
+
+// Linearize the CFG within Region.
+// TODO: Predication and linearization need RPOT for every region.
+// This traversal is expensive. Since predication is not adding new
+// blocks, we should be able to compute RPOT once in predication and
+// reuse it here. This becomes even more important once we have nested
+// regions.
+void VPlanPredicator::linearizeRegionRec(VPRegionBlock *Region) {
+  ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry());
+  VPBlockBase *PrevBlock = nullptr;
+
+  for (VPBlockBase *CurrBlock : make_range(RPOT.begin(), RPOT.end())) {
+    // TODO: Handle nested regions once we start generating the same.
+    assert(!isa<VPRegionBlock>(CurrBlock) && "Nested region not expected");
+
+    // Linearize control flow by adding an unconditional edge between PrevBlock
+    // and CurrBlock skipping loop headers and latches to keep intact loop
+    // header predecessors and loop latch successors.
+    if (PrevBlock && !VPLI->isLoopHeader(CurrBlock) &&
+        !VPBlockUtils::blockIsLoopLatch(PrevBlock, VPLI)) {
+
+      LLVM_DEBUG(dbgs() << "Linearizing: " << PrevBlock->getName() << "->"
+                        << CurrBlock->getName() << "\n");
+
+      PrevBlock->clearSuccessors();
+      CurrBlock->clearPredecessors();
+      VPBlockUtils::connectBlocks(PrevBlock, CurrBlock);
+    }
+
+    PrevBlock = CurrBlock;
+  }
+}
+
+// Entry point. The driver function for the predicator.
+void VPlanPredicator::predicate(void) {
+  // Predicate the blocks within Region.
+  predicateRegionRec(cast<VPRegionBlock>(Plan.getEntry()));
+
+  // Linearlize the blocks with Region.
+  linearizeRegionRec(cast<VPRegionBlock>(Plan.getEntry()));
+}
+
+VPlanPredicator::VPlanPredicator(VPlan &Plan)
+    : Plan(Plan), VPLI(&(Plan.getVPLoopInfo())) {
+  // FIXME: Predicator is currently computing the dominator information for the
+  // top region. Once we start storing dominator information in a VPRegionBlock,
+  // we can avoid this recalculation.
+  VPDomTree.recalculate(*(cast<VPRegionBlock>(Plan.getEntry())));
+}
diff --git a/lib/Transforms/Vectorize/VPlanPredicator.h b/lib/Transforms/Vectorize/VPlanPredicator.h
new file mode 100644
index 000000000000..692afd2978d5
--- /dev/null
+++ b/lib/Transforms/Vectorize/VPlanPredicator.h
@@ -0,0 +1,74 @@
+//===-- VPlanPredicator.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines the VPlanPredicator class which contains the public
+/// interfaces to predicate and linearize the VPlan region.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_PREDICATOR_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_PREDICATOR_H
+
+#include "LoopVectorizationPlanner.h"
+#include "VPlan.h"
+#include "VPlanDominatorTree.h"
+
+namespace llvm {
+
+class VPlanPredicator {
+private:
+  enum class EdgeType {
+    TRUE_EDGE,
+    FALSE_EDGE,
+  };
+
+  // VPlan being predicated.
+  VPlan &Plan;
+
+  // VPLoopInfo for Plan's HCFG.
+  VPLoopInfo *VPLI;
+
+  // Dominator tree for Plan's HCFG.
+  VPDominatorTree VPDomTree;
+
+  // VPlan builder used to generate VPInstructions for block predicates.
+  VPBuilder Builder;
+
+  /// Get the type of edge from \p FromBlock to \p ToBlock. Returns TRUE_EDGE if
+  /// \p ToBlock is either the unconditional successor or the conditional true
+  /// successor of \p FromBlock and FALSE_EDGE otherwise.
+  EdgeType getEdgeTypeBetween(VPBlockBase *FromBlock, VPBlockBase *ToBlock);
+
+  /// Create and return VPValue corresponding to the predicate for the edge from
+  /// \p PredBB to \p CurrentBlock.
+  VPValue *getOrCreateNotPredicate(VPBasicBlock *PredBB, VPBasicBlock *CurrBB);
+
+  /// Generate and return the result of ORing all the predicate VPValues in \p
+  /// Worklist.
+  VPValue *genPredicateTree(std::list<VPValue *> &Worklist);
+
+  /// Create or propagate predicate for \p CurrBlock in region \p Region using
+  /// predicate(s) of its predecessor(s)
+  void createOrPropagatePredicates(VPBlockBase *CurrBlock,
+                                   VPRegionBlock *Region);
+
+  /// Predicate the CFG within \p Region.
+  void predicateRegionRec(VPRegionBlock *Region);
+
+  /// Linearize the CFG within \p Region.
+  void linearizeRegionRec(VPRegionBlock *Region);
+
+public:
+  VPlanPredicator(VPlan &Plan);
+
+  /// Predicate Plan's HCFG.
+  void predicate(void);
+};
+} // end namespace llvm
+#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_PREDICATOR_H
diff --git a/lib/Transforms/Vectorize/VPlanSLP.cpp b/lib/Transforms/Vectorize/VPlanSLP.cpp
index ad3a85a6f760..e5ab24e52df6 100644
--- a/lib/Transforms/Vectorize/VPlanSLP.cpp
+++ b/lib/Transforms/Vectorize/VPlanSLP.cpp
@@ -1,9 +1,8 @@
 //===- VPlanSLP.cpp - SLP Analysis based on VPlan -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// This file implements SLP analysis based on VPlan. The analysis is based on
diff --git a/lib/Transforms/Vectorize/VPlanValue.h b/lib/Transforms/Vectorize/VPlanValue.h
index b473579b699f..7b6c228c229e 100644
--- a/lib/Transforms/Vectorize/VPlanValue.h
+++ b/lib/Transforms/Vectorize/VPlanValue.h
@@ -1,9 +1,8 @@
 //===- VPlanValue.h - Represent Values in Vectorizer Plan -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Transforms/Vectorize/VPlanVerifier.cpp b/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 054bed4e177f..394b1b93113b 100644
--- a/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -1,9 +1,8 @@
 //===-- VPlanVerifier.cpp -------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Transforms/Vectorize/VPlanVerifier.h b/lib/Transforms/Vectorize/VPlanVerifier.h
index d2f99d006a66..7d2b26252172 100644
--- a/lib/Transforms/Vectorize/VPlanVerifier.h
+++ b/lib/Transforms/Vectorize/VPlanVerifier.h
@@ -1,9 +1,8 @@
 //===-- VPlanVerifier.h -----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp
index 559ab1968844..6a4f9169c2af 100644
--- a/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/lib/Transforms/Vectorize/Vectorize.cpp
@@ -1,9 +1,8 @@
 //===-- Vectorize.cpp -----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/WindowsManifest/WindowsManifestMerger.cpp b/lib/WindowsManifest/WindowsManifestMerger.cpp
index 0a8abed230d4..d092ab493c9b 100644
--- a/lib/WindowsManifest/WindowsManifestMerger.cpp
+++ b/lib/WindowsManifest/WindowsManifestMerger.cpp
@@ -1,9 +1,8 @@
 //===-- WindowsManifestMerger.cpp ------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===---------------------------------------------------------------------===//
 //
diff --git a/lib/XRay/BlockIndexer.cpp b/lib/XRay/BlockIndexer.cpp
index 4dbe2d2717ad..a99a6815f0d1 100644
--- a/lib/XRay/BlockIndexer.cpp
+++ b/lib/XRay/BlockIndexer.cpp
@@ -1,9 +1,8 @@
 //===- BlockIndexer.cpp - FDR Block Indexing VIsitor ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/XRay/BlockPrinter.cpp b/lib/XRay/BlockPrinter.cpp
index 0acebee0cbdd..63a60c3c56a3 100644
--- a/lib/XRay/BlockPrinter.cpp
+++ b/lib/XRay/BlockPrinter.cpp
@@ -1,9 +1,8 @@
 //===- BlockPrinter.cpp - FDR Block Pretty Printer Implementation --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/XRay/BlockPrinter.h"
diff --git a/lib/XRay/BlockVerifier.cpp b/lib/XRay/BlockVerifier.cpp
index 5e949ec4e46a..9fb49fa9a860 100644
--- a/lib/XRay/BlockVerifier.cpp
+++ b/lib/XRay/BlockVerifier.cpp
@@ -1,9 +1,8 @@
 //===- BlockVerifier.cpp - FDR Block Verifier -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/XRay/BlockVerifier.h"
diff --git a/lib/XRay/FDRRecordProducer.cpp b/lib/XRay/FDRRecordProducer.cpp
index 25b3ee8af219..452bc6c55fb8 100644
--- a/lib/XRay/FDRRecordProducer.cpp
+++ b/lib/XRay/FDRRecordProducer.cpp
@@ -1,9 +1,8 @@
 //===- FDRRecordProducer.cpp - XRay FDR Mode Record Producer --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/XRay/FDRRecordProducer.h"
diff --git a/lib/XRay/FDRRecords.cpp b/lib/XRay/FDRRecords.cpp
index 2a40d5e06229..ff315d35417d 100644
--- a/lib/XRay/FDRRecords.cpp
+++ b/lib/XRay/FDRRecords.cpp
@@ -1,9 +1,8 @@
 //===- FDRRecords.cpp -  XRay Flight Data Recorder Mode Records -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/XRay/FDRTraceExpander.cpp b/lib/XRay/FDRTraceExpander.cpp
index a6e1521da87f..cb7f66bccd7e 100644
--- a/lib/XRay/FDRTraceExpander.cpp
+++ b/lib/XRay/FDRTraceExpander.cpp
@@ -1,9 +1,8 @@
 //===- FDRTraceExpander.cpp -----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/XRay/FDRTraceExpander.h"
diff --git a/lib/XRay/FDRTraceWriter.cpp b/lib/XRay/FDRTraceWriter.cpp
index c5224f4be094..f50dc19b4be8 100644
--- a/lib/XRay/FDRTraceWriter.cpp
+++ b/lib/XRay/FDRTraceWriter.cpp
@@ -1,9 +1,8 @@
 //===- FDRTraceWriter.cpp - XRay FDR Trace Writer ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/lib/XRay/FileHeaderReader.cpp b/lib/XRay/FileHeaderReader.cpp
index 0b3fb8b6f692..3fb021906a6f 100644
--- a/lib/XRay/FileHeaderReader.cpp
+++ b/lib/XRay/FileHeaderReader.cpp
@@ -1,9 +1,8 @@
 //===- FileHeaderReader.cpp - XRay File Header Reader  --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/XRay/FileHeaderReader.h"
diff --git a/lib/XRay/InstrumentationMap.cpp b/lib/XRay/InstrumentationMap.cpp
index 9f2b179486f0..fe5e941f7ea6 100644
--- a/lib/XRay/InstrumentationMap.cpp
+++ b/lib/XRay/InstrumentationMap.cpp
@@ -1,9 +1,8 @@
 //===- InstrumentationMap.cpp - XRay Instrumentation Map ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -79,9 +78,10 @@ loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
         "Failed to find XRay instrumentation map.",
         std::make_error_code(std::errc::executable_format_error));
 
-  if (I->getContents(Contents))
-    return errorCodeToError(
-        std::make_error_code(std::errc::executable_format_error));
+  if (Expected<StringRef> E = I->getContents())
+    Contents = *E;
+  else
+    return E.takeError();
 
   RelocMap Relocs;
   if (ObjFile.getBinary()->isELF()) {
@@ -172,13 +172,14 @@ loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
 }
 
 static Error
-loadYAML(int Fd, size_t FileSize, StringRef Filename,
+loadYAML(sys::fs::file_t Fd, size_t FileSize, StringRef Filename,
          InstrumentationMap::SledContainer &Sleds,
          InstrumentationMap::FunctionAddressMap &FunctionAddresses,
          InstrumentationMap::FunctionAddressReverseMap &FunctionIds) {
   std::error_code EC;
   sys::fs::mapped_file_region MappedFile(
       Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
+  sys::fs::closeFile(Fd);
   if (EC)
     return make_error<StringError>(
         Twine("Failed memory-mapping file '") + Filename + "'.", EC);
@@ -214,9 +215,12 @@ llvm::xray::loadInstrumentationMap(StringRef Filename) {
   if (!ObjectFileOrError) {
     auto E = ObjectFileOrError.takeError();
     // We try to load it as YAML if the ELF load didn't work.
-    int Fd;
-    if (sys::fs::openFileForRead(Filename, Fd))
+    Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename);
+    if (!FdOrErr) {
+      // Report the ELF load error if YAML failed.
+      consumeError(FdOrErr.takeError());
       return std::move(E);
+    }
 
     uint64_t FileSize;
     if (sys::fs::file_size(Filename, FileSize))
@@ -229,7 +233,7 @@ llvm::xray::loadInstrumentationMap(StringRef Filename) {
     // From this point on the errors will be only for the YAML parts, so we
     // consume the errors at this point.
     consumeError(std::move(E));
-    if (auto E = loadYAML(Fd, FileSize, Filename, Map.Sleds,
+    if (auto E = loadYAML(*FdOrErr, FileSize, Filename, Map.Sleds,
                           Map.FunctionAddresses, Map.FunctionIds))
       return std::move(E);
   } else if (auto E = loadObj(Filename, *ObjectFileOrError, Map.Sleds,
diff --git a/lib/XRay/LogBuilderConsumer.cpp b/lib/XRay/LogBuilderConsumer.cpp
index 88b7d2d728b1..ffb49f9eb4e9 100644
--- a/lib/XRay/LogBuilderConsumer.cpp
+++ b/lib/XRay/LogBuilderConsumer.cpp
@@ -1,9 +1,8 @@
 //===- FDRRecordConsumer.h - XRay Flight Data Recorder Mode Records -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/XRay/FDRRecordConsumer.h"
diff --git a/lib/XRay/Profile.cpp b/lib/XRay/Profile.cpp
index e8a082884d69..e34b182f2e02 100644
--- a/lib/XRay/Profile.cpp
+++ b/lib/XRay/Profile.cpp
@@ -1,9 +1,8 @@
 //===- Profile.cpp - XRay Profile Abstraction -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -261,10 +260,9 @@ Profile mergeProfilesByStack(const Profile &L, const Profile &R) {
 }
 
 Expected<Profile> loadProfile(StringRef Filename) {
-  int Fd;
-  if (auto EC = sys::fs::openFileForRead(Filename, Fd))
-    return make_error<StringError>(
-        Twine("Cannot read profile from '") + Filename + "'", EC);
+  Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename);
+  if (!FdOrErr)
+    return FdOrErr.takeError();
 
   uint64_t FileSize;
   if (auto EC = sys::fs::file_size(Filename, FileSize))
@@ -273,7 +271,9 @@ Expected<Profile> loadProfile(StringRef Filename) {
 
   std::error_code EC;
   sys::fs::mapped_file_region MappedFile(
-      Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
+      *FdOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0,
+      EC);
+  sys::fs::closeFile(*FdOrErr);
   if (EC)
     return make_error<StringError>(
         Twine("Cannot mmap profile '") + Filename + "'", EC);
diff --git a/lib/XRay/RecordInitializer.cpp b/lib/XRay/RecordInitializer.cpp
index f136a1e456b7..78163031a8cc 100644
--- a/lib/XRay/RecordInitializer.cpp
+++ b/lib/XRay/RecordInitializer.cpp
@@ -1,9 +1,8 @@
 //===- FDRRecordProducer.cpp - XRay FDR Mode Record Producer --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/XRay/FDRRecords.h"
diff --git a/lib/XRay/RecordPrinter.cpp b/lib/XRay/RecordPrinter.cpp
index 71ea7d0e969f..32d42104db95 100644
--- a/lib/XRay/RecordPrinter.cpp
+++ b/lib/XRay/RecordPrinter.cpp
@@ -1,9 +1,8 @@
 //===- RecordPrinter.cpp - FDR Record Printer -----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/XRay/RecordPrinter.h"
diff --git a/lib/XRay/Trace.cpp b/lib/XRay/Trace.cpp
index 4f28f3f754c1..b9b67c561c66 100644
--- a/lib/XRay/Trace.cpp
+++ b/lib/XRay/Trace.cpp
@@ -1,9 +1,8 @@
 //===- Trace.cpp - XRay Trace Loading implementation. ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -372,11 +371,9 @@ Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader,
 } // namespace
 
 Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
-  int Fd;
-  if (auto EC = sys::fs::openFileForRead(Filename, Fd)) {
-    return make_error<StringError>(
-        Twine("Cannot read log from '") + Filename + "'", EC);
-  }
+  Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename);
+  if (!FdOrErr)
+    return FdOrErr.takeError();
 
   uint64_t FileSize;
   if (auto EC = sys::fs::file_size(Filename, FileSize)) {
@@ -392,7 +389,9 @@ Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
   // Map the opened file into memory and use a StringRef to access it later.
   std::error_code EC;
   sys::fs::mapped_file_region MappedFile(
-      Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
+      *FdOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0,
+      EC);
+  sys::fs::closeFile(*FdOrErr);
   if (EC) {
     return make_error<StringError>(
         Twine("Cannot read log from '") + Filename + "'", EC);
@@ -462,10 +461,9 @@ Expected<Trace> llvm::xray::loadTrace(const DataExtractor &DE, bool Sort) {
   }
 
   if (Sort)
-    std::stable_sort(T.Records.begin(), T.Records.end(),
-                     [&](const XRayRecord &L, const XRayRecord &R) {
-                       return L.TSC < R.TSC;
-                     });
+    llvm::stable_sort(T.Records, [&](const XRayRecord &L, const XRayRecord &R) {
+      return L.TSC < R.TSC;
+    });
 
   return std::move(T);
 }
diff --git a/tools/bugpoint/BugDriver.cpp b/tools/bugpoint/BugDriver.cpp
index 3832e075a693..942028cad80b 100644
--- a/tools/bugpoint/BugDriver.cpp
+++ b/tools/bugpoint/BugDriver.cpp
@@ -1,9 +1,8 @@
 //===- BugDriver.cpp - Top-Level BugPoint class implementation ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/bugpoint/BugDriver.h b/tools/bugpoint/BugDriver.h
index bc60ae753548..75f166b21b2c 100644
--- a/tools/bugpoint/BugDriver.h
+++ b/tools/bugpoint/BugDriver.h
@@ -1,9 +1,8 @@
 //===- BugDriver.h - Top-Level BugPoint class -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/bugpoint/CrashDebugger.cpp b/tools/bugpoint/CrashDebugger.cpp
index ef6a214fde20..aab9debf9b59 100644
--- a/tools/bugpoint/CrashDebugger.cpp
+++ b/tools/bugpoint/CrashDebugger.cpp
@@ -1,9 +1,8 @@
 //===- CrashDebugger.cpp - Debug compilation crashes ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/bugpoint/ExecutionDriver.cpp b/tools/bugpoint/ExecutionDriver.cpp
index 1b86b103d835..40f198b88d1a 100644
--- a/tools/bugpoint/ExecutionDriver.cpp
+++ b/tools/bugpoint/ExecutionDriver.cpp
@@ -1,9 +1,8 @@
 //===- ExecutionDriver.cpp - Allow execution of LLVM program --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/bugpoint/ExtractFunction.cpp b/tools/bugpoint/ExtractFunction.cpp
index 48f1575c25eb..105702de3f1d 100644
--- a/tools/bugpoint/ExtractFunction.cpp
+++ b/tools/bugpoint/ExtractFunction.cpp
@@ -1,9 +1,8 @@
 //===- ExtractFunction.cpp - Extract a function from Program --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/bugpoint/FindBugs.cpp b/tools/bugpoint/FindBugs.cpp
index a695e875b787..2b1146da9680 100644
--- a/tools/bugpoint/FindBugs.cpp
+++ b/tools/bugpoint/FindBugs.cpp
@@ -1,9 +1,8 @@
 //===-- FindBugs.cpp - Run Many Different Optimizations -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/bugpoint/ListReducer.h b/tools/bugpoint/ListReducer.h
index 0f9db022d555..04f2207a31ed 100644
--- a/tools/bugpoint/ListReducer.h
+++ b/tools/bugpoint/ListReducer.h
@@ -1,9 +1,8 @@
 //===- ListReducer.h - Trim down list while retaining property --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/bugpoint/Miscompilation.cpp b/tools/bugpoint/Miscompilation.cpp
index 375bee7a0d50..1621a51c91d6 100644
--- a/tools/bugpoint/Miscompilation.cpp
+++ b/tools/bugpoint/Miscompilation.cpp
@@ -1,9 +1,8 @@
 //===- Miscompilation.cpp - Debug program miscompilations -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -592,9 +591,6 @@ ExtractBlocks(BugDriver &BD,
   if (Linker::linkModules(*ProgClone, std::move(Extracted)))
     exit(1);
 
-  // Set the new program and delete the old one.
-  BD.setNewProgram(std::move(ProgClone));
-
   // Update the list of miscompiled functions.
   MiscompiledFunctions.clear();
 
@@ -604,6 +600,9 @@ ExtractBlocks(BugDriver &BD,
     MiscompiledFunctions.push_back(NewF);
   }
 
+  // Set the new program and delete the old one.
+  BD.setNewProgram(std::move(ProgClone));
+
   return true;
 }
 
@@ -706,8 +705,8 @@ static Expected<bool> TestOptimizer(BugDriver &BD, std::unique_ptr<Module> Test,
   if (!Optimized) {
     errs() << " Error running this sequence of passes"
            << " on the input program!\n";
-    BD.setNewProgram(std::move(Test));
     BD.EmitProgressBitcode(*Test, "pass-error", false);
+    BD.setNewProgram(std::move(Test));
     if (Error E = BD.debugOptimizerCrash())
       return std::move(E);
     return false;
@@ -827,13 +826,14 @@ CleanupAndPrepareModules(BugDriver &BD, std::unique_ptr<Module> Test,
 
   // Add the resolver to the Safe module.
   // Prototype: void *getPointerToNamedFunction(const char* Name)
-  Constant *resolverFunc = Safe->getOrInsertFunction(
+  FunctionCallee resolverFunc = Safe->getOrInsertFunction(
       "getPointerToNamedFunction", Type::getInt8PtrTy(Safe->getContext()),
       Type::getInt8PtrTy(Safe->getContext()));
 
   // Use the function we just added to get addresses of functions we need.
   for (Module::iterator F = Safe->begin(), E = Safe->end(); F != E; ++F) {
-    if (F->isDeclaration() && !F->use_empty() && &*F != resolverFunc &&
+    if (F->isDeclaration() && !F->use_empty() &&
+        &*F != resolverFunc.getCallee() &&
         !F->isIntrinsic() /* ignore intrinsics */) {
       Function *TestFn = Test->getFunction(F->getName());
 
@@ -879,7 +879,8 @@ CleanupAndPrepareModules(BugDriver &BD, std::unique_ptr<Module> Test,
               BasicBlock::Create(F->getContext(), "lookupfp", FuncWrapper);
 
           // Check to see if we already looked up the value.
-          Value *CachedVal = new LoadInst(Cache, "fpcache", EntryBB);
+          Value *CachedVal =
+              new LoadInst(F->getType(), Cache, "fpcache", EntryBB);
           Value *IsNull = new ICmpInst(*EntryBB, ICmpInst::ICMP_EQ, CachedVal,
                                        NullPtr, "isNull");
           BranchInst::Create(LookupBB, DoCallBB, IsNull, EntryBB);
@@ -911,11 +912,11 @@ CleanupAndPrepareModules(BugDriver &BD, std::unique_ptr<Module> Test,
 
           // Pass on the arguments to the real function, return its result
           if (F->getReturnType()->isVoidTy()) {
-            CallInst::Create(FuncPtr, Args, "", DoCallBB);
+            CallInst::Create(FuncTy, FuncPtr, Args, "", DoCallBB);
             ReturnInst::Create(F->getContext(), DoCallBB);
           } else {
             CallInst *Call =
-                CallInst::Create(FuncPtr, Args, "retval", DoCallBB);
+                CallInst::Create(FuncTy, FuncPtr, Args, "retval", DoCallBB);
             ReturnInst::Create(F->getContext(), Call, DoCallBB);
           }
 
diff --git a/tools/bugpoint/OptimizerDriver.cpp b/tools/bugpoint/OptimizerDriver.cpp
index 64fe675de20c..562de7952388 100644
--- a/tools/bugpoint/OptimizerDriver.cpp
+++ b/tools/bugpoint/OptimizerDriver.cpp
@@ -1,9 +1,8 @@
 //===- OptimizerDriver.cpp - Allow BugPoint to run passes safely ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/bugpoint/ToolRunner.cpp b/tools/bugpoint/ToolRunner.cpp
index 7ba8ea1f16c5..da4244345e3b 100644
--- a/tools/bugpoint/ToolRunner.cpp
+++ b/tools/bugpoint/ToolRunner.cpp
@@ -1,9 +1,8 @@
 //===-- ToolRunner.cpp ----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/bugpoint/ToolRunner.h b/tools/bugpoint/ToolRunner.h
index ef8551cc669b..dde4ec539cfb 100644
--- a/tools/bugpoint/ToolRunner.h
+++ b/tools/bugpoint/ToolRunner.h
@@ -1,9 +1,8 @@
 //===-- tools/bugpoint/ToolRunner.h -----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/bugpoint/bugpoint.cpp b/tools/bugpoint/bugpoint.cpp
index f6b7d08455d4..2d5322a351ad 100644
--- a/tools/bugpoint/bugpoint.cpp
+++ b/tools/bugpoint/bugpoint.cpp
@@ -1,9 +1,8 @@
 //===- bugpoint.cpp - The LLVM Bugpoint utility ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index 2329fb3e87c9..76da843f065e 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -1,9 +1,8 @@
 //===-- llc.cpp - Implement the LLVM Native Code Generator ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -32,6 +31,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/RemarkStreamer.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/MC/SubtargetFeature.h"
@@ -133,21 +133,33 @@ static cl::opt<bool> DiscardValueNames(
 
 static cl::list<std::string> IncludeDirs("I", cl::desc("include search path"));
 
-static cl::opt<bool> PassRemarksWithHotness(
+static cl::opt<bool> RemarksWithHotness(
     "pass-remarks-with-hotness",
     cl::desc("With PGO, include profile count in optimization remarks"),
     cl::Hidden);
 
-static cl::opt<unsigned> PassRemarksHotnessThreshold(
-    "pass-remarks-hotness-threshold",
-    cl::desc("Minimum profile count required for an optimization remark to be output"),
-    cl::Hidden);
+static cl::opt<unsigned>
+    RemarksHotnessThreshold("pass-remarks-hotness-threshold",
+                            cl::desc("Minimum profile count required for "
+                                     "an optimization remark to be output"),
+                            cl::Hidden);
 
 static cl::opt<std::string>
     RemarksFilename("pass-remarks-output",
-                    cl::desc("YAML output filename for pass remarks"),
+                    cl::desc("Output filename for pass remarks"),
                     cl::value_desc("filename"));
 
+static cl::opt<std::string>
+    RemarksPasses("pass-remarks-filter",
+                  cl::desc("Only record optimization remarks from passes whose "
+                           "names match the given regular expression"),
+                  cl::value_desc("regex"));
+
+static cl::opt<std::string> RemarksFormat(
+    "pass-remarks-format",
+    cl::desc("The format used for serializing remarks (default: YAML)"),
+    cl::value_desc("format"), cl::init("yaml"));
+
 namespace {
 static ManagedStatic<std::vector<std::string>> RunPassNames;
 
@@ -302,6 +314,7 @@ int main(int argc, char **argv) {
   initializeVectorization(*Registry);
   initializeScalarizeMaskedMemIntrinPass(*Registry);
   initializeExpandReductionsPass(*Registry);
+  initializeHardwareLoopsPass(*Registry);
 
   // Initialize debugging passes.
   initializeScavengerTestPass(*Registry);
@@ -319,24 +332,15 @@ int main(int argc, char **argv) {
       llvm::make_unique<LLCDiagnosticHandler>(&HasError));
   Context.setInlineAsmDiagnosticHandler(InlineAsmDiagHandler, &HasError);
 
-  if (PassRemarksWithHotness)
-    Context.setDiagnosticsHotnessRequested(true);
-
-  if (PassRemarksHotnessThreshold)
-    Context.setDiagnosticsHotnessThreshold(PassRemarksHotnessThreshold);
-
-  std::unique_ptr<ToolOutputFile> YamlFile;
-  if (RemarksFilename != "") {
-    std::error_code EC;
-    YamlFile =
-        llvm::make_unique<ToolOutputFile>(RemarksFilename, EC, sys::fs::F_None);
-    if (EC) {
-      WithColor::error(errs(), argv[0]) << EC.message() << '\n';
-      return 1;
-    }
-    Context.setDiagnosticsOutputFile(
-        llvm::make_unique<yaml::Output>(YamlFile->os()));
+  Expected<std::unique_ptr<ToolOutputFile>> RemarksFileOrErr =
+      setupOptimizationRemarks(Context, RemarksFilename, RemarksPasses,
+                               RemarksFormat, RemarksWithHotness,
+                               RemarksHotnessThreshold);
+  if (Error E = RemarksFileOrErr.takeError()) {
+    WithColor::error(errs(), argv[0]) << toString(std::move(E)) << '\n';
+    return 1;
   }
+  std::unique_ptr<ToolOutputFile> RemarksFile = std::move(*RemarksFileOrErr);
 
   if (InputLanguage != "" && InputLanguage != "ir" &&
       InputLanguage != "mir") {
@@ -351,8 +355,8 @@ int main(int argc, char **argv) {
     if (int RetVal = compileModule(argv, Context))
       return RetVal;
 
-  if (YamlFile)
-    YamlFile->keep();
+  if (RemarksFile)
+    RemarksFile->keep();
   return 0;
 }
 
diff --git a/tools/lli/RemoteJITUtils.h b/tools/lli/RemoteJITUtils.h
index 944881070c70..8e80e73c8082 100644
--- a/tools/lli/RemoteJITUtils.h
+++ b/tools/lli/RemoteJITUtils.h
@@ -1,9 +1,8 @@
 //===-- RemoteJITUtils.h - Utilities for remote-JITing with LLI -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -76,7 +75,7 @@ std::unique_ptr<FDRawChannel> launchRemote();
 
 namespace llvm {
 
-// ForwardingMM - Adapter to connect MCJIT to Orc's Remote8
+// ForwardingMM - Adapter to connect MCJIT to Orc's Remote
 // memory manager.
 class ForwardingMemoryManager : public llvm::RTDyldMemoryManager {
 public:
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index 7e93d31361aa..8c8cd88c9711 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -1,9 +1,8 @@
 //===- lli.cpp - LLVM Interpreter / Dynamic compiler ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -84,18 +83,15 @@ namespace {
                                  cl::desc("Force interpretation: disable JIT"),
                                  cl::init(false));
 
-  cl::opt<JITKind> UseJITKind("jit-kind",
-                              cl::desc("Choose underlying JIT kind."),
-                              cl::init(JITKind::MCJIT),
-                              cl::values(
-                                clEnumValN(JITKind::MCJIT, "mcjit",
-                                           "MCJIT"),
-                                clEnumValN(JITKind::OrcMCJITReplacement,
-                                           "orc-mcjit",
-                                           "Orc-based MCJIT replacement"),
-                                clEnumValN(JITKind::OrcLazy,
-                                           "orc-lazy",
-                                           "Orc-based lazy JIT.")));
+  cl::opt<JITKind> UseJITKind(
+      "jit-kind", cl::desc("Choose underlying JIT kind."),
+      cl::init(JITKind::MCJIT),
+      cl::values(clEnumValN(JITKind::MCJIT, "mcjit", "MCJIT"),
+                 clEnumValN(JITKind::OrcMCJITReplacement, "orc-mcjit",
+                            "Orc-based MCJIT replacement "
+                            "(deprecated)"),
+                 clEnumValN(JITKind::OrcLazy, "orc-lazy",
+                            "Orc-based lazy JIT.")));
 
   cl::opt<unsigned>
   LazyJITCompileThreads("compile-threads",
@@ -173,7 +169,7 @@ namespace {
 
   cl::opt<bool>
   EnableCacheManager("enable-cache-manager",
-        cl::desc("Use cache manager to save/load mdoules"),
+        cl::desc("Use cache manager to save/load modules"),
         cl::init(false));
 
   cl::opt<std::string>
@@ -420,7 +416,8 @@ int main(int argc, char **argv, char * const *envp) {
   builder.setEngineKind(ForceInterpreter
                         ? EngineKind::Interpreter
                         : EngineKind::JIT);
-  builder.setUseOrcMCJITReplacement(UseJITKind == JITKind::OrcMCJITReplacement);
+  builder.setUseOrcMCJITReplacement(AcknowledgeORCv1Deprecation,
+                                    UseJITKind == JITKind::OrcMCJITReplacement);
 
   // If we are supposed to override the target triple, do so now.
   if (!TargetTriple.empty())
@@ -596,8 +593,8 @@ int main(int argc, char **argv, char * const *envp) {
   if (!RemoteMCJIT) {
     // If the program doesn't explicitly call exit, we will need the Exit
     // function later on to make an explicit call, so get the function now.
-    Constant *Exit = Mod->getOrInsertFunction("exit", Type::getVoidTy(Context),
-                                                      Type::getInt32Ty(Context));
+    FunctionCallee Exit = Mod->getOrInsertFunction(
+        "exit", Type::getVoidTy(Context), Type::getInt32Ty(Context));
 
     // Run static constructors.
     if (!ForceInterpreter) {
@@ -621,19 +618,21 @@ int main(int argc, char **argv, char * const *envp) {
 
     // If the program didn't call exit explicitly, we should call it now.
     // This ensures that any atexit handlers get called correctly.
-    if (Function *ExitF = dyn_cast<Function>(Exit)) {
-      std::vector<GenericValue> Args;
-      GenericValue ResultGV;
-      ResultGV.IntVal = APInt(32, Result);
-      Args.push_back(ResultGV);
-      EE->runFunction(ExitF, Args);
-      WithColor::error(errs(), argv[0]) << "exit(" << Result << ") returned!\n";
-      abort();
-    } else {
-      WithColor::error(errs(), argv[0])
-          << "exit defined with wrong prototype!\n";
-      abort();
+    if (Function *ExitF =
+            dyn_cast<Function>(Exit.getCallee()->stripPointerCasts())) {
+      if (ExitF->getFunctionType() == Exit.getFunctionType()) {
+        std::vector<GenericValue> Args;
+        GenericValue ResultGV;
+        ResultGV.IntVal = APInt(32, Result);
+        Args.push_back(ResultGV);
+        EE->runFunction(ExitF, Args);
+        WithColor::error(errs(), argv[0])
+            << "exit(" << Result << ") returned!\n";
+        abort();
+      }
     }
+    WithColor::error(errs(), argv[0]) << "exit defined with wrong prototype!\n";
+    abort();
   } else {
     // else == "if (RemoteMCJIT)"
 
@@ -664,6 +663,7 @@ int main(int argc, char **argv, char * const *envp) {
     // Forward MCJIT's symbol resolution calls to the remote.
     static_cast<ForwardingMemoryManager *>(RTDyldMM)->setResolver(
         orc::createLambdaResolver(
+            AcknowledgeORCv1Deprecation,
             [](const std::string &Name) { return nullptr; },
             [&](const std::string &Name) {
               if (auto Addr = ExitOnErr(R->getSymbolAddress(Name)))
@@ -762,14 +762,17 @@ int runOrcLazyJIT(const char *ProgName) {
     reportError(Err, ProgName);
 
   const auto &TT = MainModule.getModule()->getTargetTriple();
-  orc::JITTargetMachineBuilder JTMB =
+  orc::LLLazyJITBuilder Builder;
+
+  Builder.setJITTargetMachineBuilder(
       TT.empty() ? ExitOnErr(orc::JITTargetMachineBuilder::detectHost())
-                 : orc::JITTargetMachineBuilder(Triple(TT));
+                 : orc::JITTargetMachineBuilder(Triple(TT)));
 
   if (!MArch.empty())
-    JTMB.getTargetTriple().setArchName(MArch);
+    Builder.getJITTargetMachineBuilder()->getTargetTriple().setArchName(MArch);
 
-  JTMB.setCPU(getCPUStr())
+  Builder.getJITTargetMachineBuilder()
+      ->setCPU(getCPUStr())
       .addFeatures(getFeatureList())
       .setRelocationModel(RelocModel.getNumOccurrences()
                               ? Optional<Reloc::Model>(RelocModel)
@@ -778,12 +781,11 @@ int runOrcLazyJIT(const char *ProgName) {
                         ? Optional<CodeModel::Model>(CMModel)
                         : None);
 
-  DataLayout DL = ExitOnErr(JTMB.getDefaultDataLayoutForTarget());
+  Builder.setLazyCompileFailureAddr(
+      pointerToJITTargetAddress(exitOnLazyCallThroughFailure));
+  Builder.setNumCompileThreads(LazyJITCompileThreads);
 
-  auto J = ExitOnErr(orc::LLLazyJIT::Create(
-      std::move(JTMB), DL,
-      pointerToJITTargetAddress(exitOnLazyCallThroughFailure),
-      LazyJITCompileThreads));
+  auto J = ExitOnErr(Builder.create());
 
   if (PerModuleLazy)
     J->setPartitionFunction(orc::CompileOnDemandLayer::compileWholeModule);
@@ -799,9 +801,10 @@ int runOrcLazyJIT(const char *ProgName) {
     return Dump(std::move(TSM), R);
   });
   J->getMainJITDylib().setGenerator(
-      ExitOnErr(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(DL)));
+      ExitOnErr(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
+          J->getDataLayout().getGlobalPrefix())));
 
-  orc::MangleAndInterner Mangle(J->getExecutionSession(), DL);
+  orc::MangleAndInterner Mangle(J->getExecutionSession(), J->getDataLayout());
   orc::LocalCXXRuntimeOverrides CXXRuntimeOverrides;
   ExitOnErr(CXXRuntimeOverrides.enable(J->getMainJITDylib(), Mangle));
 
@@ -817,8 +820,10 @@ int runOrcLazyJIT(const char *ProgName) {
     IdxToDylib[0] = &J->getMainJITDylib();
     for (auto JDItr = JITDylibs.begin(), JDEnd = JITDylibs.end();
          JDItr != JDEnd; ++JDItr) {
-      IdxToDylib[JITDylibs.getPosition(JDItr - JITDylibs.begin())] =
-          &J->createJITDylib(*JDItr);
+      orc::JITDylib *JD = J->getJITDylibByName(*JDItr);
+      if (!JD)
+        JD = &J->createJITDylib(*JDItr);
+      IdxToDylib[JITDylibs.getPosition(JDItr - JITDylibs.begin())] = JD;
     }
 
     for (auto EMItr = ExtraModules.begin(), EMEnd = ExtraModules.end();
@@ -861,8 +866,6 @@ int runOrcLazyJIT(const char *ProgName) {
     AltEntryThreads.push_back(std::thread([EntryPoint]() { EntryPoint(); }));
   }
 
-  J->getExecutionSession().dump(llvm::dbgs());
-
   // Run main.
   auto MainSym = ExitOnErr(J->lookup("main"));
   typedef int (*MainFnPtr)(int, const char *[]);
diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp
index 1c453ee0b569..91746d0fab37 100644
--- a/tools/llvm-ar/llvm-ar.cpp
+++ b/tools/llvm-ar/llvm-ar.cpp
@@ -1,9 +1,8 @@
 //===-- llvm-ar.cpp - LLVM archive librarian utility ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -67,7 +66,7 @@ OPTIONS:
 const char ArHelp[] = R"(
 OVERVIEW: LLVM Archiver
 
-USAGE: llvm-ar [options] [-]<operation>[modifiers] [relpos] <archive> [files]
+USAGE: llvm-ar [options] [-]<operation>[modifiers] [relpos] [count] <archive> [files]
        llvm-ar -M [<mri-script]
 
 OPTIONS:
@@ -79,6 +78,7 @@ OPTIONS:
   --plugin=<string>     - Ignored for compatibility
   --help                - Display available options
   --version             - Display the version of this program
+  @<file>               - read options from <file>
 
 OPERATIONS:
   d - delete [files] from the archive
@@ -98,7 +98,9 @@ MODIFIERS:
   [i] - put [files] before [relpos] (same as [b])
   [l] - ignored for compatibility
   [L] - add archive's contents
+  [N] - use instance [count] of name
   [o] - preserve original dates
+  [P] - use full names when matching (implied for thin archives)
   [s] - create an archive index (cf. ranlib)
   [S] - do not build a symbol table
   [T] - create a thin archive
@@ -169,16 +171,17 @@ enum ArchiveOperation {
 };
 
 // Modifiers to follow operation to vary behavior
-static bool AddAfter = false;      ///< 'a' modifier
-static bool AddBefore = false;     ///< 'b' modifier
-static bool Create = false;        ///< 'c' modifier
-static bool OriginalDates = false; ///< 'o' modifier
-static bool OnlyUpdate = false;    ///< 'u' modifier
-static bool Verbose = false;       ///< 'v' modifier
-static bool Symtab = true;         ///< 's' modifier
-static bool Deterministic = true;  ///< 'D' and 'U' modifiers
-static bool Thin = false;          ///< 'T' modifier
-static bool AddLibrary = false;    ///< 'L' modifier
+static bool AddAfter = false;        ///< 'a' modifier
+static bool AddBefore = false;       ///< 'b' modifier
+static bool Create = false;          ///< 'c' modifier
+static bool OriginalDates = false;   ///< 'o' modifier
+static bool CompareFullPath = false; ///< 'P' modifier
+static bool OnlyUpdate = false;      ///< 'u' modifier
+static bool Verbose = false;         ///< 'v' modifier
+static bool Symtab = true;           ///< 's' modifier
+static bool Deterministic = true;    ///< 'D' and 'U' modifiers
+static bool Thin = false;            ///< 'T' modifier
+static bool AddLibrary = false;      ///< 'L' modifier
 
 // Relative Positional Argument (for insert/move). This variable holds
 // the name of the archive member to which the 'a', 'b' or 'i' modifier
@@ -186,6 +189,11 @@ static bool AddLibrary = false;    ///< 'L' modifier
 // one variable.
 static std::string RelPos;
 
+// Count parameter for 'N' modifier. This variable specifies which file should
+// match for extract/delete operations when there are multiple matches. This is
+// 1-indexed. A value of 0 is invalid, and implies 'N' is not used.
+static int CountParam = 0;
+
 // This variable holds the name of the archive file as given on the
 // command line.
 static std::string ArchiveName;
@@ -194,6 +202,9 @@ static std::string ArchiveName;
 // on the command line.
 static std::vector<StringRef> Members;
 
+// Static buffer to hold StringRefs.
+static BumpPtrAllocator Alloc;
+
 // Extract the member filename from the command line for the [relpos] argument
 // associated with a, b, and i modifiers
 static void getRelPos() {
@@ -203,6 +214,19 @@ static void getRelPos() {
   PositionalArgs.erase(PositionalArgs.begin());
 }
 
+// Extract the parameter from the command line for the [count] argument
+// associated with the N modifier
+static void getCountParam() {
+  if (PositionalArgs.empty())
+    fail("Expected [count] for N modifier");
+  auto CountParamArg = StringRef(PositionalArgs[0]);
+  if (CountParamArg.getAsInteger(10, CountParam))
+    fail("Value for [count] must be numeric, got: " + CountParamArg);
+  if (CountParam < 1)
+    fail("Value for [count] must be positive, got: " + CountParamArg);
+  PositionalArgs.erase(PositionalArgs.begin());
+}
+
 // Get the archive file name from the command line
 static void getArchive() {
   if (PositionalArgs.empty())
@@ -295,6 +319,9 @@ static ArchiveOperation parseCommandLine() {
     case 'o':
       OriginalDates = true;
       break;
+    case 'P':
+      CompareFullPath = true;
+      break;
     case 's':
       Symtab = true;
       MaybeJustCreateSymTab = true;
@@ -329,8 +356,13 @@ static ArchiveOperation parseCommandLine() {
     case 'U':
       Deterministic = false;
       break;
+    case 'N':
+      getCountParam();
+      break;
     case 'T':
       Thin = true;
+      // Thin archives store path names, so P should be forced.
+      CompareFullPath = true;
       break;
     case 'L':
       AddLibrary = true;
@@ -362,11 +394,14 @@ static ArchiveOperation parseCommandLine() {
     fail("Only one operation may be specified");
   if (NumPositional > 1)
     fail("You may only specify one of a, b, and i modifiers");
-  if (AddAfter || AddBefore) {
+  if (AddAfter || AddBefore)
     if (Operation != Move && Operation != ReplaceOrInsert)
       fail("The 'a', 'b' and 'i' modifiers can only be specified with "
            "the 'm' or 'r' operations");
-  }
+  if (CountParam)
+    if (Operation != Extract && Operation != Delete)
+      fail("The 'N' modifier can only be specified with the 'x' or 'd' "
+           "operations");
   if (OriginalDates && Operation != Extract)
     fail("The 'o' modifier is only applicable to the 'x' operation");
   if (OnlyUpdate && Operation != ReplaceOrInsert)
@@ -430,12 +465,19 @@ static void doDisplayTable(StringRef Name, const object::Archive::Child &C) {
   }
 
   if (C.getParent()->isThin()) {
-    outs() << sys::path::parent_path(ArchiveName);
-    outs() << '/';
+    if (!sys::path::is_absolute(Name)) {
+      StringRef ParentDir = sys::path::parent_path(ArchiveName);
+      if (!ParentDir.empty())
+        outs() << sys::path::convert_to_slash(ParentDir) << '/';
+    }
   }
   outs() << Name << "\n";
 }
 
+static StringRef normalizePath(StringRef Path) {
+  return CompareFullPath ? Path : sys::path::filename(Path);
+}
+
 // Implement the 'x' operation. This function extracts files back to the file
 // system.
 static void doExtract(StringRef Name, const object::Archive::Child &C) {
@@ -499,6 +541,7 @@ static void performReadOperation(ArchiveOperation Operation,
     fail("extracting from a thin archive is not supported");
 
   bool Filter = !Members.empty();
+  StringMap<int> MemberCount;
   {
     Error Err = Error::success();
     for (auto &C : OldArchive->children(Err)) {
@@ -507,9 +550,13 @@ static void performReadOperation(ArchiveOperation Operation,
       StringRef Name = NameOrErr.get();
 
       if (Filter) {
-        auto I = find(Members, Name);
+        auto I = find_if(Members, [Name](StringRef Path) {
+          return Name == normalizePath(Path);
+        });
         if (I == Members.end())
           continue;
+        if (CountParam && ++MemberCount[Name] != CountParam)
+          continue;
         Members.erase(I);
       }
 
@@ -545,6 +592,23 @@ static void addChildMember(std::vector<NewArchiveMember> &Members,
   Expected<NewArchiveMember> NMOrErr =
       NewArchiveMember::getOldMember(M, Deterministic);
   failIfError(NMOrErr.takeError());
+  // If the child member we're trying to add is thin, use the path relative to
+  // the archive it's in, so the file resolves correctly.
+  if (Thin && FlattenArchive) {
+    StringSaver Saver(Alloc);
+    Expected<std::string> FileNameOrErr = M.getName();
+    failIfError(FileNameOrErr.takeError());
+    if (sys::path::is_absolute(*FileNameOrErr)) {
+      NMOrErr->MemberName = Saver.save(sys::path::convert_to_slash(*FileNameOrErr));
+    } else {
+      FileNameOrErr = M.getFullName();
+      failIfError(FileNameOrErr.takeError());
+      Expected<std::string> PathOrErr =
+          computeArchiveRelativePath(ArchiveName, *FileNameOrErr);
+      NMOrErr->MemberName = Saver.save(
+          PathOrErr ? *PathOrErr : sys::path::convert_to_slash(*FileNameOrErr));
+    }
+  }
   if (FlattenArchive &&
       identify_magic(NMOrErr->Buf->getBuffer()) == file_magic::archive) {
     Expected<std::string> FileNameOrErr = M.getFullName();
@@ -568,6 +632,23 @@ static void addMember(std::vector<NewArchiveMember> &Members,
   Expected<NewArchiveMember> NMOrErr =
       NewArchiveMember::getFile(FileName, Deterministic);
   failIfError(NMOrErr.takeError(), FileName);
+  StringSaver Saver(Alloc);
+  // For regular archives, use the basename of the object path for the member
+  // name. For thin archives, use the full relative paths so the file resolves
+  // correctly.
+  if (!Thin) {
+    NMOrErr->MemberName = sys::path::filename(NMOrErr->MemberName);
+  } else {
+    if (sys::path::is_absolute(FileName))
+      NMOrErr->MemberName = Saver.save(sys::path::convert_to_slash(FileName));
+    else {
+      Expected<std::string> PathOrErr =
+          computeArchiveRelativePath(ArchiveName, FileName);
+      NMOrErr->MemberName = Saver.save(
+          PathOrErr ? *PathOrErr : sys::path::convert_to_slash(FileName));
+    }
+  }
+
   if (FlattenArchive &&
       identify_magic(NMOrErr->Buf->getBuffer()) == file_magic::archive) {
     object::Archive &Lib = readLibrary(FileName);
@@ -581,8 +662,6 @@ static void addMember(std::vector<NewArchiveMember> &Members,
       return;
     }
   }
-  // Use the basename of the object path for the member name.
-  NMOrErr->MemberName = sys::path::filename(NMOrErr->MemberName);
   Members.push_back(std::move(*NMOrErr));
 }
 
@@ -597,27 +676,29 @@ enum InsertAction {
 static InsertAction computeInsertAction(ArchiveOperation Operation,
                                         const object::Archive::Child &Member,
                                         StringRef Name,
-                                        std::vector<StringRef>::iterator &Pos) {
+                                        std::vector<StringRef>::iterator &Pos,
+                                        StringMap<int> &MemberCount) {
   if (Operation == QuickAppend || Members.empty())
     return IA_AddOldMember;
-
-  auto MI = find_if(Members, [Name](StringRef Path) {
-    return Name == sys::path::filename(Path);
-  });
+  auto MI = find_if(
+      Members, [Name](StringRef Path) { return Name == normalizePath(Path); });
 
   if (MI == Members.end())
     return IA_AddOldMember;
 
   Pos = MI;
 
-  if (Operation == Delete)
+  if (Operation == Delete) {
+    if (CountParam && ++MemberCount[Name] != CountParam)
+      return IA_AddOldMember;
     return IA_Delete;
+  }
 
   if (Operation == Move)
     return IA_MoveOldMember;
 
   if (Operation == ReplaceOrInsert) {
-    StringRef PosName = sys::path::filename(RelPos);
+    StringRef PosName = normalizePath(RelPos);
     if (!OnlyUpdate) {
       if (PosName.empty())
         return IA_AddNewMember;
@@ -651,9 +732,10 @@ computeNewArchiveMembers(ArchiveOperation Operation,
   std::vector<NewArchiveMember> Ret;
   std::vector<NewArchiveMember> Moved;
   int InsertPos = -1;
-  StringRef PosName = sys::path::filename(RelPos);
+  StringRef PosName = normalizePath(RelPos);
   if (OldArchive) {
     Error Err = Error::success();
+    StringMap<int> MemberCount;
     for (auto &Child : OldArchive->children(Err)) {
       int Pos = Ret.size();
       Expected<StringRef> NameOrErr = Child.getName();
@@ -669,10 +751,10 @@ computeNewArchiveMembers(ArchiveOperation Operation,
 
       std::vector<StringRef>::iterator MemberI = Members.end();
       InsertAction Action =
-          computeInsertAction(Operation, Child, Name, MemberI);
+          computeInsertAction(Operation, Child, Name, MemberI, MemberCount);
       switch (Action) {
       case IA_AddOldMember:
-        addChildMember(Ret, Child);
+        addChildMember(Ret, Child, /*FlattenArchive=*/Thin);
         break;
       case IA_AddNewMember:
         addMember(Ret, *MemberI);
@@ -680,13 +762,18 @@ computeNewArchiveMembers(ArchiveOperation Operation,
       case IA_Delete:
         break;
       case IA_MoveOldMember:
-        addChildMember(Moved, Child);
+        addChildMember(Moved, Child, /*FlattenArchive=*/Thin);
         break;
       case IA_MoveNewMember:
         addMember(Moved, *MemberI);
         break;
       }
-      if (MemberI != Members.end())
+      // When processing elements with the count param, we need to preserve the
+      // full members list when iterating over all archive members. For
+      // instance, "llvm-ar dN 2 archive.a member.o" should delete the second
+      // file named member.o it sees; we are not done with member.o the first
+      // time we see it in the archive.
+      if (MemberI != Members.end() && !CountParam)
         Members.erase(MemberI);
     }
     failIfError(std::move(Err));
@@ -843,6 +930,8 @@ static int performOperation(ArchiveOperation Operation,
     EC = errorToErrorCode(std::move(Err));
     failIfError(EC,
                 "error loading '" + ArchiveName + "': " + EC.message() + "!");
+    if (Archive.isThin())
+      CompareFullPath = true;
     performOperation(Operation, &Archive, std::move(Buf.get()), NewMembers);
     return 0;
   }
@@ -864,7 +953,7 @@ static int performOperation(ArchiveOperation Operation,
 }
 
 static void runMRIScript() {
-  enum class MRICommand { AddLib, AddMod, Create, Delete, Save, End, Invalid };
+  enum class MRICommand { AddLib, AddMod, Create, CreateThin, Delete, Save, End, Invalid };
 
   ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getSTDIN();
   failIfError(Buf.getError());
@@ -888,6 +977,7 @@ static void runMRIScript() {
                        .Case("addlib", MRICommand::AddLib)
                        .Case("addmod", MRICommand::AddMod)
                        .Case("create", MRICommand::Create)
+                       .Case("createthin", MRICommand::CreateThin)
                        .Case("delete", MRICommand::Delete)
                        .Case("save", MRICommand::Save)
                        .Case("end", MRICommand::End)
@@ -899,7 +989,7 @@ static void runMRIScript() {
       {
         Error Err = Error::success();
         for (auto &Member : Lib.children(Err))
-          addChildMember(NewMembers, Member);
+          addChildMember(NewMembers, Member, /*FlattenArchive=*/Thin);
         failIfError(std::move(Err));
       }
       break;
@@ -907,6 +997,9 @@ static void runMRIScript() {
     case MRICommand::AddMod:
       addMember(NewMembers, Rest);
       break;
+    case MRICommand::CreateThin:
+      Thin = true;
+      LLVM_FALLTHROUGH;
     case MRICommand::Create:
       Create = true;
       if (!ArchiveName.empty())
@@ -916,7 +1009,7 @@ static void runMRIScript() {
       ArchiveName = Rest;
       break;
     case MRICommand::Delete: {
-      StringRef Name = sys::path::filename(Rest);
+      StringRef Name = normalizePath(Rest);
       llvm::erase_if(NewMembers,
                      [=](NewArchiveMember &M) { return M.MemberName == Name; });
       break;
@@ -951,7 +1044,6 @@ static bool handleGenericOption(StringRef arg) {
 
 static int ar_main(int argc, char **argv) {
   SmallVector<const char *, 0> Argv(argv, argv + argc);
-  BumpPtrAllocator Alloc;
   StringSaver Saver(Alloc);
   cl::ExpandResponseFiles(Saver, cl::TokenizeGNUCommandLine, Argv);
   for (size_t i = 1; i < Argv.size(); ++i) {
diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp
index bb4233aa9ba0..234fef907a38 100644
--- a/tools/llvm-as/llvm-as.cpp
+++ b/tools/llvm-as/llvm-as.cpp
@@ -1,9 +1,8 @@
 //===--- llvm-as.cpp - The low-level LLVM assembler -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -31,38 +30,43 @@
 #include <memory>
 using namespace llvm;
 
+cl::OptionCategory AsCat("llvm-as Options");
+
 static cl::opt<std::string> InputFilename(cl::Positional,
                                           cl::desc("<input .llvm file>"),
                                           cl::init("-"));
 
 static cl::opt<std::string> OutputFilename("o",
                                            cl::desc("Override output filename"),
-                                           cl::value_desc("filename"));
+                                           cl::value_desc("filename"),
+                                           cl::cat(AsCat));
 
-static cl::opt<bool> Force("f", cl::desc("Enable binary output on terminals"));
+static cl::opt<bool> Force("f", cl::desc("Enable binary output on terminals"),
+                           cl::cat(AsCat));
 
 static cl::opt<bool> DisableOutput("disable-output", cl::desc("Disable output"),
-                                   cl::init(false));
+                                   cl::init(false), cl::cat(AsCat));
 
 static cl::opt<bool> EmitModuleHash("module-hash", cl::desc("Emit module hash"),
-                                    cl::init(false));
+                                    cl::init(false), cl::cat(AsCat));
 
 static cl::opt<bool> DumpAsm("d", cl::desc("Print assembly as parsed"),
-                             cl::Hidden);
+                             cl::Hidden, cl::cat(AsCat));
 
 static cl::opt<bool>
     DisableVerify("disable-verify", cl::Hidden,
-                  cl::desc("Do not run verifier on input LLVM (dangerous!)"));
+                  cl::desc("Do not run verifier on input LLVM (dangerous!)"),
+                  cl::cat(AsCat));
 
 static cl::opt<bool> PreserveBitcodeUseListOrder(
     "preserve-bc-uselistorder",
     cl::desc("Preserve use-list order when writing LLVM bitcode."),
-    cl::init(true), cl::Hidden);
+    cl::init(true), cl::Hidden, cl::cat(AsCat));
 
 static cl::opt<std::string> ClDataLayout("data-layout",
                                          cl::desc("data layout string to use"),
                                          cl::value_desc("layout-string"),
-                                         cl::init(""));
+                                         cl::init(""), cl::cat(AsCat));
 
 static void WriteOutputFile(const Module *M, const ModuleSummaryIndex *Index) {
   // Infer the output filename if needed.
@@ -110,6 +114,7 @@ static void WriteOutputFile(const Module *M, const ModuleSummaryIndex *Index) {
 int main(int argc, char **argv) {
   InitLLVM X(argc, argv);
   LLVMContext Context;
+  cl::HideUnrelatedOptions(AsCat);
   cl::ParseCommandLineOptions(argc, argv, "llvm .ll -> .bc assembler\n");
 
   // Parse the file now...
diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 789a666cb41a..01cba1f6e3c9 100644
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -1,9 +1,8 @@
 //===-- llvm-bcanalyzer.cpp - Bitcode Analyzer --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -27,22 +26,18 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/Bitcode/BitstreamReader.h"
-#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/Bitcode/BitcodeAnalyzer.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Format.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/InitLLVM.h"
-#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SHA1.h"
-#include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
+#include <memory>
 using namespace llvm;
 
 static cl::opt<std::string>
-  InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
+    InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
 
 static cl::opt<bool> Dump("dump", cl::desc("Dump low level bitcode trace"));
 
@@ -53,951 +48,66 @@ static cl::opt<bool> Dump("dump", cl::desc("Dump low level bitcode trace"));
 static cl::opt<bool> NoHistogram("disable-histogram",
                                  cl::desc("Do not print per-code histogram"));
 
-static cl::opt<bool>
-NonSymbolic("non-symbolic",
-            cl::desc("Emit numeric info in dump even if"
-                     " symbolic info is available"));
+static cl::opt<bool> NonSymbolic("non-symbolic",
+                                 cl::desc("Emit numeric info in dump even if"
+                                          " symbolic info is available"));
 
 static cl::opt<std::string>
-  BlockInfoFilename("block-info",
-                    cl::desc("Use the BLOCK_INFO from the given file"));
+    BlockInfoFilename("block-info",
+                      cl::desc("Use the BLOCK_INFO from the given file"));
 
 static cl::opt<bool>
-  ShowBinaryBlobs("show-binary-blobs",
-                  cl::desc("Print binary blobs using hex escapes"));
+    ShowBinaryBlobs("show-binary-blobs",
+                    cl::desc("Print binary blobs using hex escapes"));
 
 static cl::opt<std::string> CheckHash(
     "check-hash",
     cl::desc("Check module hash using the argument as a string table"));
 
-namespace {
-
-/// CurStreamTypeType - A type for CurStreamType
-enum CurStreamTypeType {
-  UnknownBitstream,
-  LLVMIRBitstream,
-  ClangSerializedASTBitstream,
-  ClangSerializedDiagnosticsBitstream,
-};
-
-}
-
-/// GetBlockName - Return a symbolic block name if known, otherwise return
-/// null.
-static const char *GetBlockName(unsigned BlockID,
-                                const BitstreamBlockInfo &BlockInfo,
-                                CurStreamTypeType CurStreamType) {
-  // Standard blocks for all bitcode files.
-  if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
-    if (BlockID == bitc::BLOCKINFO_BLOCK_ID)
-      return "BLOCKINFO_BLOCK";
-    return nullptr;
-  }
-
-  // Check to see if we have a blockinfo record for this block, with a name.
-  if (const BitstreamBlockInfo::BlockInfo *Info =
-          BlockInfo.getBlockInfo(BlockID)) {
-    if (!Info->Name.empty())
-      return Info->Name.c_str();
-  }
-
-
-  if (CurStreamType != LLVMIRBitstream) return nullptr;
-
-  switch (BlockID) {
-  default:                                 return nullptr;
-  case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID: return "OPERAND_BUNDLE_TAGS_BLOCK";
-  case bitc::MODULE_BLOCK_ID:              return "MODULE_BLOCK";
-  case bitc::PARAMATTR_BLOCK_ID:           return "PARAMATTR_BLOCK";
-  case bitc::PARAMATTR_GROUP_BLOCK_ID:     return "PARAMATTR_GROUP_BLOCK_ID";
-  case bitc::TYPE_BLOCK_ID_NEW:            return "TYPE_BLOCK_ID";
-  case bitc::CONSTANTS_BLOCK_ID:           return "CONSTANTS_BLOCK";
-  case bitc::FUNCTION_BLOCK_ID:            return "FUNCTION_BLOCK";
-  case bitc::IDENTIFICATION_BLOCK_ID:
-                                           return "IDENTIFICATION_BLOCK_ID";
-  case bitc::VALUE_SYMTAB_BLOCK_ID:        return "VALUE_SYMTAB";
-  case bitc::METADATA_BLOCK_ID:            return "METADATA_BLOCK";
-  case bitc::METADATA_KIND_BLOCK_ID:       return "METADATA_KIND_BLOCK";
-  case bitc::METADATA_ATTACHMENT_ID:       return "METADATA_ATTACHMENT_BLOCK";
-  case bitc::USELIST_BLOCK_ID:             return "USELIST_BLOCK_ID";
-  case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
-                                           return "GLOBALVAL_SUMMARY_BLOCK";
-  case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID:
-                                      return "FULL_LTO_GLOBALVAL_SUMMARY_BLOCK";
-  case bitc::MODULE_STRTAB_BLOCK_ID:       return "MODULE_STRTAB_BLOCK";
-  case bitc::STRTAB_BLOCK_ID:              return "STRTAB_BLOCK";
-  case bitc::SYMTAB_BLOCK_ID:              return "SYMTAB_BLOCK";
-  }
-}
-
-/// GetCodeName - Return a symbolic code name if known, otherwise return
-/// null.
-static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
-                               const BitstreamBlockInfo &BlockInfo,
-                               CurStreamTypeType CurStreamType) {
-  // Standard blocks for all bitcode files.
-  if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
-    if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
-      switch (CodeID) {
-      default: return nullptr;
-      case bitc::BLOCKINFO_CODE_SETBID:        return "SETBID";
-      case bitc::BLOCKINFO_CODE_BLOCKNAME:     return "BLOCKNAME";
-      case bitc::BLOCKINFO_CODE_SETRECORDNAME: return "SETRECORDNAME";
-      }
-    }
-    return nullptr;
-  }
-
-  // Check to see if we have a blockinfo record for this record, with a name.
-  if (const BitstreamBlockInfo::BlockInfo *Info =
-        BlockInfo.getBlockInfo(BlockID)) {
-    for (unsigned i = 0, e = Info->RecordNames.size(); i != e; ++i)
-      if (Info->RecordNames[i].first == CodeID)
-        return Info->RecordNames[i].second.c_str();
-  }
-
-
-  if (CurStreamType != LLVMIRBitstream) return nullptr;
-
-#define STRINGIFY_CODE(PREFIX, CODE)                                           \
-  case bitc::PREFIX##_##CODE:                                                  \
-    return #CODE;
-  switch (BlockID) {
-  default: return nullptr;
-  case bitc::MODULE_BLOCK_ID:
-    switch (CodeID) {
-    default: return nullptr;
-      STRINGIFY_CODE(MODULE_CODE, VERSION)
-      STRINGIFY_CODE(MODULE_CODE, TRIPLE)
-      STRINGIFY_CODE(MODULE_CODE, DATALAYOUT)
-      STRINGIFY_CODE(MODULE_CODE, ASM)
-      STRINGIFY_CODE(MODULE_CODE, SECTIONNAME)
-      STRINGIFY_CODE(MODULE_CODE, DEPLIB) // FIXME: Remove in 4.0
-      STRINGIFY_CODE(MODULE_CODE, GLOBALVAR)
-      STRINGIFY_CODE(MODULE_CODE, FUNCTION)
-      STRINGIFY_CODE(MODULE_CODE, ALIAS)
-      STRINGIFY_CODE(MODULE_CODE, GCNAME)
-      STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
-      STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED)
-      STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME)
-      STRINGIFY_CODE(MODULE_CODE, HASH)
-    }
-  case bitc::IDENTIFICATION_BLOCK_ID:
-    switch (CodeID) {
-    default:
-      return nullptr;
-      STRINGIFY_CODE(IDENTIFICATION_CODE, STRING)
-      STRINGIFY_CODE(IDENTIFICATION_CODE, EPOCH)
-    }
-  case bitc::PARAMATTR_BLOCK_ID:
-    switch (CodeID) {
-    default: return nullptr;
-    // FIXME: Should these be different?
-    case bitc::PARAMATTR_CODE_ENTRY_OLD: return "ENTRY";
-    case bitc::PARAMATTR_CODE_ENTRY:     return "ENTRY";
-    }
-  case bitc::PARAMATTR_GROUP_BLOCK_ID:
-    switch (CodeID) {
-    default: return nullptr;
-    case bitc::PARAMATTR_GRP_CODE_ENTRY: return "ENTRY";
-    }
-  case bitc::TYPE_BLOCK_ID_NEW:
-    switch (CodeID) {
-    default: return nullptr;
-      STRINGIFY_CODE(TYPE_CODE, NUMENTRY)
-      STRINGIFY_CODE(TYPE_CODE, VOID)
-      STRINGIFY_CODE(TYPE_CODE, FLOAT)
-      STRINGIFY_CODE(TYPE_CODE, DOUBLE)
-      STRINGIFY_CODE(TYPE_CODE, LABEL)
-      STRINGIFY_CODE(TYPE_CODE, OPAQUE)
-      STRINGIFY_CODE(TYPE_CODE, INTEGER)
-      STRINGIFY_CODE(TYPE_CODE, POINTER)
-      STRINGIFY_CODE(TYPE_CODE, ARRAY)
-      STRINGIFY_CODE(TYPE_CODE, VECTOR)
-      STRINGIFY_CODE(TYPE_CODE, X86_FP80)
-      STRINGIFY_CODE(TYPE_CODE, FP128)
-      STRINGIFY_CODE(TYPE_CODE, PPC_FP128)
-      STRINGIFY_CODE(TYPE_CODE, METADATA)
-      STRINGIFY_CODE(TYPE_CODE, STRUCT_ANON)
-      STRINGIFY_CODE(TYPE_CODE, STRUCT_NAME)
-      STRINGIFY_CODE(TYPE_CODE, STRUCT_NAMED)
-      STRINGIFY_CODE(TYPE_CODE, FUNCTION)
-    }
-
-  case bitc::CONSTANTS_BLOCK_ID:
-    switch (CodeID) {
-    default: return nullptr;
-      STRINGIFY_CODE(CST_CODE, SETTYPE)
-      STRINGIFY_CODE(CST_CODE, NULL)
-      STRINGIFY_CODE(CST_CODE, UNDEF)
-      STRINGIFY_CODE(CST_CODE, INTEGER)
-      STRINGIFY_CODE(CST_CODE, WIDE_INTEGER)
-      STRINGIFY_CODE(CST_CODE, FLOAT)
-      STRINGIFY_CODE(CST_CODE, AGGREGATE)
-      STRINGIFY_CODE(CST_CODE, STRING)
-      STRINGIFY_CODE(CST_CODE, CSTRING)
-      STRINGIFY_CODE(CST_CODE, CE_BINOP)
-      STRINGIFY_CODE(CST_CODE, CE_CAST)
-      STRINGIFY_CODE(CST_CODE, CE_GEP)
-      STRINGIFY_CODE(CST_CODE, CE_INBOUNDS_GEP)
-      STRINGIFY_CODE(CST_CODE, CE_SELECT)
-      STRINGIFY_CODE(CST_CODE, CE_EXTRACTELT)
-      STRINGIFY_CODE(CST_CODE, CE_INSERTELT)
-      STRINGIFY_CODE(CST_CODE, CE_SHUFFLEVEC)
-      STRINGIFY_CODE(CST_CODE, CE_CMP)
-      STRINGIFY_CODE(CST_CODE, INLINEASM)
-      STRINGIFY_CODE(CST_CODE, CE_SHUFVEC_EX)
-      STRINGIFY_CODE(CST_CODE, CE_UNOP)
-    case bitc::CST_CODE_BLOCKADDRESS:    return "CST_CODE_BLOCKADDRESS";
-      STRINGIFY_CODE(CST_CODE, DATA)
-    }
-  case bitc::FUNCTION_BLOCK_ID:
-    switch (CodeID) {
-    default: return nullptr;
-      STRINGIFY_CODE(FUNC_CODE, DECLAREBLOCKS)
-      STRINGIFY_CODE(FUNC_CODE, INST_BINOP)
-      STRINGIFY_CODE(FUNC_CODE, INST_CAST)
-      STRINGIFY_CODE(FUNC_CODE, INST_GEP_OLD)
-      STRINGIFY_CODE(FUNC_CODE, INST_INBOUNDS_GEP_OLD)
-      STRINGIFY_CODE(FUNC_CODE, INST_SELECT)
-      STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTELT)
-      STRINGIFY_CODE(FUNC_CODE, INST_INSERTELT)
-      STRINGIFY_CODE(FUNC_CODE, INST_SHUFFLEVEC)
-      STRINGIFY_CODE(FUNC_CODE, INST_CMP)
-      STRINGIFY_CODE(FUNC_CODE, INST_RET)
-      STRINGIFY_CODE(FUNC_CODE, INST_BR)
-      STRINGIFY_CODE(FUNC_CODE, INST_SWITCH)
-      STRINGIFY_CODE(FUNC_CODE, INST_INVOKE)
-      STRINGIFY_CODE(FUNC_CODE, INST_UNOP)
-      STRINGIFY_CODE(FUNC_CODE, INST_UNREACHABLE)
-      STRINGIFY_CODE(FUNC_CODE, INST_CLEANUPRET)
-      STRINGIFY_CODE(FUNC_CODE, INST_CATCHRET)
-      STRINGIFY_CODE(FUNC_CODE, INST_CATCHPAD)
-      STRINGIFY_CODE(FUNC_CODE, INST_PHI)
-      STRINGIFY_CODE(FUNC_CODE, INST_ALLOCA)
-      STRINGIFY_CODE(FUNC_CODE, INST_LOAD)
-      STRINGIFY_CODE(FUNC_CODE, INST_VAARG)
-      STRINGIFY_CODE(FUNC_CODE, INST_STORE)
-      STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTVAL)
-      STRINGIFY_CODE(FUNC_CODE, INST_INSERTVAL)
-      STRINGIFY_CODE(FUNC_CODE, INST_CMP2)
-      STRINGIFY_CODE(FUNC_CODE, INST_VSELECT)
-      STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC_AGAIN)
-      STRINGIFY_CODE(FUNC_CODE, INST_CALL)
-      STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC)
-      STRINGIFY_CODE(FUNC_CODE, INST_GEP)
-      STRINGIFY_CODE(FUNC_CODE, OPERAND_BUNDLE)
-      STRINGIFY_CODE(FUNC_CODE, INST_FENCE)
-      STRINGIFY_CODE(FUNC_CODE, INST_ATOMICRMW)
-      STRINGIFY_CODE(FUNC_CODE, INST_LOADATOMIC)
-      STRINGIFY_CODE(FUNC_CODE, INST_STOREATOMIC)
-      STRINGIFY_CODE(FUNC_CODE, INST_CMPXCHG)
-    }
-  case bitc::VALUE_SYMTAB_BLOCK_ID:
-    switch (CodeID) {
-    default: return nullptr;
-    STRINGIFY_CODE(VST_CODE, ENTRY)
-    STRINGIFY_CODE(VST_CODE, BBENTRY)
-    STRINGIFY_CODE(VST_CODE, FNENTRY)
-    STRINGIFY_CODE(VST_CODE, COMBINED_ENTRY)
-    }
-  case bitc::MODULE_STRTAB_BLOCK_ID:
-    switch (CodeID) {
-    default:
-      return nullptr;
-      STRINGIFY_CODE(MST_CODE, ENTRY)
-      STRINGIFY_CODE(MST_CODE, HASH)
-    }
-  case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
-  case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID:
-    switch (CodeID) {
-    default:
-      return nullptr;
-      STRINGIFY_CODE(FS, PERMODULE)
-      STRINGIFY_CODE(FS, PERMODULE_PROFILE)
-      STRINGIFY_CODE(FS, PERMODULE_RELBF)
-      STRINGIFY_CODE(FS, PERMODULE_GLOBALVAR_INIT_REFS)
-      STRINGIFY_CODE(FS, COMBINED)
-      STRINGIFY_CODE(FS, COMBINED_PROFILE)
-      STRINGIFY_CODE(FS, COMBINED_GLOBALVAR_INIT_REFS)
-      STRINGIFY_CODE(FS, ALIAS)
-      STRINGIFY_CODE(FS, COMBINED_ALIAS)
-      STRINGIFY_CODE(FS, COMBINED_ORIGINAL_NAME)
-      STRINGIFY_CODE(FS, VERSION)
-      STRINGIFY_CODE(FS, FLAGS)
-      STRINGIFY_CODE(FS, TYPE_TESTS)
-      STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_VCALLS)
-      STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_VCALLS)
-      STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_CONST_VCALL)
-      STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_CONST_VCALL)
-      STRINGIFY_CODE(FS, VALUE_GUID)
-      STRINGIFY_CODE(FS, CFI_FUNCTION_DEFS)
-      STRINGIFY_CODE(FS, CFI_FUNCTION_DECLS)
-      STRINGIFY_CODE(FS, TYPE_ID)
-    }
-  case bitc::METADATA_ATTACHMENT_ID:
-    switch(CodeID) {
-    default:return nullptr;
-      STRINGIFY_CODE(METADATA, ATTACHMENT)
-    }
-  case bitc::METADATA_BLOCK_ID:
-    switch(CodeID) {
-    default:return nullptr;
-      STRINGIFY_CODE(METADATA, STRING_OLD)
-      STRINGIFY_CODE(METADATA, VALUE)
-      STRINGIFY_CODE(METADATA, NODE)
-      STRINGIFY_CODE(METADATA, NAME)
-      STRINGIFY_CODE(METADATA, DISTINCT_NODE)
-      STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK
-      STRINGIFY_CODE(METADATA, LOCATION)
-      STRINGIFY_CODE(METADATA, OLD_NODE)
-      STRINGIFY_CODE(METADATA, OLD_FN_NODE)
-      STRINGIFY_CODE(METADATA, NAMED_NODE)
-      STRINGIFY_CODE(METADATA, GENERIC_DEBUG)
-      STRINGIFY_CODE(METADATA, SUBRANGE)
-      STRINGIFY_CODE(METADATA, ENUMERATOR)
-      STRINGIFY_CODE(METADATA, BASIC_TYPE)
-      STRINGIFY_CODE(METADATA, FILE)
-      STRINGIFY_CODE(METADATA, DERIVED_TYPE)
-      STRINGIFY_CODE(METADATA, COMPOSITE_TYPE)
-      STRINGIFY_CODE(METADATA, SUBROUTINE_TYPE)
-      STRINGIFY_CODE(METADATA, COMPILE_UNIT)
-      STRINGIFY_CODE(METADATA, SUBPROGRAM)
-      STRINGIFY_CODE(METADATA, LEXICAL_BLOCK)
-      STRINGIFY_CODE(METADATA, LEXICAL_BLOCK_FILE)
-      STRINGIFY_CODE(METADATA, NAMESPACE)
-      STRINGIFY_CODE(METADATA, TEMPLATE_TYPE)
-      STRINGIFY_CODE(METADATA, TEMPLATE_VALUE)
-      STRINGIFY_CODE(METADATA, GLOBAL_VAR)
-      STRINGIFY_CODE(METADATA, LOCAL_VAR)
-      STRINGIFY_CODE(METADATA, EXPRESSION)
-      STRINGIFY_CODE(METADATA, OBJC_PROPERTY)
-      STRINGIFY_CODE(METADATA, IMPORTED_ENTITY)
-      STRINGIFY_CODE(METADATA, MODULE)
-      STRINGIFY_CODE(METADATA, MACRO)
-      STRINGIFY_CODE(METADATA, MACRO_FILE)
-      STRINGIFY_CODE(METADATA, STRINGS)
-      STRINGIFY_CODE(METADATA, GLOBAL_DECL_ATTACHMENT)
-      STRINGIFY_CODE(METADATA, GLOBAL_VAR_EXPR)
-      STRINGIFY_CODE(METADATA, INDEX_OFFSET)
-      STRINGIFY_CODE(METADATA, INDEX)
-    }
-  case bitc::METADATA_KIND_BLOCK_ID:
-    switch (CodeID) {
-    default:
-      return nullptr;
-      STRINGIFY_CODE(METADATA, KIND)
-    }
-  case bitc::USELIST_BLOCK_ID:
-    switch(CodeID) {
-    default:return nullptr;
-    case bitc::USELIST_CODE_DEFAULT: return "USELIST_CODE_DEFAULT";
-    case bitc::USELIST_CODE_BB:      return "USELIST_CODE_BB";
-    }
-
-  case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID:
-    switch(CodeID) {
-    default: return nullptr;
-    case bitc::OPERAND_BUNDLE_TAG: return "OPERAND_BUNDLE_TAG";
-    }
-  case bitc::STRTAB_BLOCK_ID:
-    switch(CodeID) {
-    default: return nullptr;
-    case bitc::STRTAB_BLOB: return "BLOB";
-    }
-  case bitc::SYMTAB_BLOCK_ID:
-    switch(CodeID) {
-    default: return nullptr;
-    case bitc::SYMTAB_BLOB: return "BLOB";
-    }
-  }
-#undef STRINGIFY_CODE
-}
-
-struct PerRecordStats {
-  unsigned NumInstances;
-  unsigned NumAbbrev;
-  uint64_t TotalBits;
-
-  PerRecordStats() : NumInstances(0), NumAbbrev(0), TotalBits(0) {}
-};
-
-struct PerBlockIDStats {
-  /// NumInstances - This the number of times this block ID has been seen.
-  unsigned NumInstances;
-
-  /// NumBits - The total size in bits of all of these blocks.
-  uint64_t NumBits;
-
-  /// NumSubBlocks - The total number of blocks these blocks contain.
-  unsigned NumSubBlocks;
-
-  /// NumAbbrevs - The total number of abbreviations.
-  unsigned NumAbbrevs;
-
-  /// NumRecords - The total number of records these blocks contain, and the
-  /// number that are abbreviated.
-  unsigned NumRecords, NumAbbreviatedRecords;
-
-  /// CodeFreq - Keep track of the number of times we see each code.
-  std::vector<PerRecordStats> CodeFreq;
-
-  PerBlockIDStats()
-    : NumInstances(0), NumBits(0),
-      NumSubBlocks(0), NumAbbrevs(0), NumRecords(0), NumAbbreviatedRecords(0) {}
-};
-
-static std::map<unsigned, PerBlockIDStats> BlockIDStats;
-
-
-
-/// ReportError - All bitcode analysis errors go through this function, making this a
-/// good place to breakpoint if debugging.
-static bool ReportError(const Twine &Err) {
-  WithColor::error() << Err << "\n";
-  return true;
-}
-
-static bool decodeMetadataStringsBlob(StringRef Indent,
-                                      ArrayRef<uint64_t> Record,
-                                      StringRef Blob) {
-  if (Blob.empty())
-    return true;
-
-  if (Record.size() != 2)
-    return true;
-
-  unsigned NumStrings = Record[0];
-  unsigned StringsOffset = Record[1];
-  outs() << " num-strings = " << NumStrings << " {\n";
-
-  StringRef Lengths = Blob.slice(0, StringsOffset);
-  SimpleBitstreamCursor R(Lengths);
-  StringRef Strings = Blob.drop_front(StringsOffset);
-  do {
-    if (R.AtEndOfStream())
-      return ReportError("bad length");
-
-    unsigned Size = R.ReadVBR(6);
-    if (Strings.size() < Size)
-      return ReportError("truncated chars");
-
-    outs() << Indent << "    '";
-    outs().write_escaped(Strings.slice(0, Size), /*hex=*/true);
-    outs() << "'\n";
-    Strings = Strings.drop_front(Size);
-  } while (--NumStrings);
-
-  outs() << Indent << "  }";
-  return false;
-}
-
-static bool decodeBlob(unsigned Code, unsigned BlockID, StringRef Indent,
-                       ArrayRef<uint64_t> Record, StringRef Blob) {
-  if (BlockID != bitc::METADATA_BLOCK_ID)
-    return true;
-  if (Code != bitc::METADATA_STRINGS)
-    return true;
-
-  return decodeMetadataStringsBlob(Indent, Record, Blob);
-}
-
-/// ParseBlock - Read a block, updating statistics, etc.
-static bool ParseBlock(BitstreamCursor &Stream, BitstreamBlockInfo &BlockInfo,
-                       unsigned BlockID, unsigned IndentLevel,
-                       CurStreamTypeType CurStreamType) {
-  std::string Indent(IndentLevel*2, ' ');
-  uint64_t BlockBitStart = Stream.GetCurrentBitNo();
-
-  // Get the statistics for this BlockID.
-  PerBlockIDStats &BlockStats = BlockIDStats[BlockID];
-
-  BlockStats.NumInstances++;
-
-  // BLOCKINFO is a special part of the stream.
-  bool DumpRecords = Dump;
-  if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
-    if (Dump) outs() << Indent << "<BLOCKINFO_BLOCK/>\n";
-    Optional<BitstreamBlockInfo> NewBlockInfo =
-        Stream.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
-    if (!NewBlockInfo)
-      return ReportError("Malformed BlockInfoBlock");
-    BlockInfo = std::move(*NewBlockInfo);
-    Stream.JumpToBit(BlockBitStart);
-    // It's not really interesting to dump the contents of the blockinfo block.
-    DumpRecords = false;
-  }
-
-  unsigned NumWords = 0;
-  if (Stream.EnterSubBlock(BlockID, &NumWords))
-    return ReportError("Malformed block record");
-
-  // Keep it for later, when we see a MODULE_HASH record
-  uint64_t BlockEntryPos = Stream.getCurrentByteNo();
-
-  const char *BlockName = nullptr;
-  if (DumpRecords) {
-    outs() << Indent << "<";
-    if ((BlockName = GetBlockName(BlockID, BlockInfo, CurStreamType)))
-      outs() << BlockName;
-    else
-      outs() << "UnknownBlock" << BlockID;
-
-    if (NonSymbolic && BlockName)
-      outs() << " BlockID=" << BlockID;
-
-    outs() << " NumWords=" << NumWords
-           << " BlockCodeSize=" << Stream.getAbbrevIDWidth() << ">\n";
-  }
-
-  SmallVector<uint64_t, 64> Record;
-
-  // Keep the offset to the metadata index if seen.
-  uint64_t MetadataIndexOffset = 0;
-
-  // Read all the records for this block.
-  while (1) {
-    if (Stream.AtEndOfStream())
-      return ReportError("Premature end of bitstream");
-
-    uint64_t RecordStartBit = Stream.GetCurrentBitNo();
-
-    BitstreamEntry Entry =
-      Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
-    
-    switch (Entry.Kind) {
-    case BitstreamEntry::Error:
-      return ReportError("malformed bitcode file");
-    case BitstreamEntry::EndBlock: {
-      uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
-      BlockStats.NumBits += BlockBitEnd-BlockBitStart;
-      if (DumpRecords) {
-        outs() << Indent << "</";
-        if (BlockName)
-          outs() << BlockName << ">\n";
-        else
-          outs() << "UnknownBlock" << BlockID << ">\n";
-      }
-      return false;
-    }
-        
-    case BitstreamEntry::SubBlock: {
-      uint64_t SubBlockBitStart = Stream.GetCurrentBitNo();
-      if (ParseBlock(Stream, BlockInfo, Entry.ID, IndentLevel + 1,
-                     CurStreamType))
-        return true;
-      ++BlockStats.NumSubBlocks;
-      uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo();
-      
-      // Don't include subblock sizes in the size of this block.
-      BlockBitStart += SubBlockBitEnd-SubBlockBitStart;
-      continue;
-    }
-    case BitstreamEntry::Record:
-      // The interesting case.
-      break;
-    }
-
-    if (Entry.ID == bitc::DEFINE_ABBREV) {
-      Stream.ReadAbbrevRecord();
-      ++BlockStats.NumAbbrevs;
-      continue;
-    }
-    
-    Record.clear();
-
-    ++BlockStats.NumRecords;
-
-    StringRef Blob;
-    uint64_t CurrentRecordPos = Stream.GetCurrentBitNo();
-    unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
-
-    // Increment the # occurrences of this code.
-    if (BlockStats.CodeFreq.size() <= Code)
-      BlockStats.CodeFreq.resize(Code+1);
-    BlockStats.CodeFreq[Code].NumInstances++;
-    BlockStats.CodeFreq[Code].TotalBits +=
-      Stream.GetCurrentBitNo()-RecordStartBit;
-    if (Entry.ID != bitc::UNABBREV_RECORD) {
-      BlockStats.CodeFreq[Code].NumAbbrev++;
-      ++BlockStats.NumAbbreviatedRecords;
-    }
-
-    if (DumpRecords) {
-      outs() << Indent << "  <";
-      if (const char *CodeName =
-              GetCodeName(Code, BlockID, BlockInfo, CurStreamType))
-        outs() << CodeName;
-      else
-        outs() << "UnknownCode" << Code;
-      if (NonSymbolic && GetCodeName(Code, BlockID, BlockInfo, CurStreamType))
-        outs() << " codeid=" << Code;
-      const BitCodeAbbrev *Abbv = nullptr;
-      if (Entry.ID != bitc::UNABBREV_RECORD) {
-        Abbv = Stream.getAbbrev(Entry.ID);
-        outs() << " abbrevid=" << Entry.ID;
-      }
-
-      for (unsigned i = 0, e = Record.size(); i != e; ++i)
-        outs() << " op" << i << "=" << (int64_t)Record[i];
-
-      // If we found a metadata index, let's verify that we had an offset before
-      // and validate its forward reference offset was correct!
-      if (BlockID == bitc::METADATA_BLOCK_ID) {
-        if (Code == bitc::METADATA_INDEX_OFFSET) {
-          if (Record.size() != 2)
-            outs() << "(Invalid record)";
-          else {
-            auto Offset = Record[0] + (Record[1] << 32);
-            MetadataIndexOffset = Stream.GetCurrentBitNo() + Offset;
-          }
-        }
-        if (Code == bitc::METADATA_INDEX) {
-          outs() << " (offset ";
-          if (MetadataIndexOffset == RecordStartBit)
-            outs() << "match)";
-          else
-            outs() << "mismatch: " << MetadataIndexOffset << " vs "
-                   << RecordStartBit << ")";
-        }
-      }
-
-      // If we found a module hash, let's verify that it matches!
-      if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH &&
-          !CheckHash.empty()) {
-        if (Record.size() != 5)
-          outs() << " (invalid)";
-        else {
-          // Recompute the hash and compare it to the one in the bitcode
-          SHA1 Hasher;
-          StringRef Hash;
-          Hasher.update(CheckHash);
-          {
-            int BlockSize = (CurrentRecordPos / 8) - BlockEntryPos;
-            auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize);
-            Hasher.update(ArrayRef<uint8_t>(Ptr, BlockSize));
-            Hash = Hasher.result();
-          }
-          SmallString<20> RecordedHash;
-          RecordedHash.resize(20);
-          int Pos = 0;
-          for (auto &Val : Record) {
-            assert(!(Val >> 32) && "Unexpected high bits set");
-            RecordedHash[Pos++] = (Val >> 24) & 0xFF;
-            RecordedHash[Pos++] = (Val >> 16) & 0xFF;
-            RecordedHash[Pos++] = (Val >> 8) & 0xFF;
-            RecordedHash[Pos++] = (Val >> 0) & 0xFF;
-          }
-          if (Hash == RecordedHash)
-            outs() << " (match)";
-          else
-            outs() << " (!mismatch!)";
-        }
-      }
-
-      outs() << "/>";
-
-      if (Abbv) {
-        for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
-          const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
-          if (!Op.isEncoding() || Op.getEncoding() != BitCodeAbbrevOp::Array)
-            continue;
-          assert(i + 2 == e && "Array op not second to last");
-          std::string Str;
-          bool ArrayIsPrintable = true;
-          for (unsigned j = i - 1, je = Record.size(); j != je; ++j) {
-            if (!isPrint(static_cast<unsigned char>(Record[j]))) {
-              ArrayIsPrintable = false;
-              break;
-            }
-            Str += (char)Record[j];
-          }
-          if (ArrayIsPrintable)
-            outs() << " record string = '" << Str << "'";
-          break;
-        }
-      }
-
-      if (Blob.data() && decodeBlob(Code, BlockID, Indent, Record, Blob)) {
-        outs() << " blob data = ";
-        if (ShowBinaryBlobs) {
-          outs() << "'";
-          outs().write_escaped(Blob, /*hex=*/true) << "'";
-        } else {
-          bool BlobIsPrintable = true;
-          for (unsigned i = 0, e = Blob.size(); i != e; ++i)
-            if (!isPrint(static_cast<unsigned char>(Blob[i]))) {
-              BlobIsPrintable = false;
-              break;
-            }
-
-          if (BlobIsPrintable)
-            outs() << "'" << Blob << "'";
-          else
-            outs() << "unprintable, " << Blob.size() << " bytes.";          
-        }
-      }
-
-      outs() << "\n";
-    }
-
-    // Make sure that we can skip the current record.
-    Stream.JumpToBit(CurrentRecordPos);
-    Stream.skipRecord(Entry.ID);
-  }
-}
-
-static void PrintSize(double Bits) {
-  outs() << format("%.2f/%.2fB/%luW", Bits, Bits/8,(unsigned long)(Bits/32));
-}
-static void PrintSize(uint64_t Bits) {
-  outs() << format("%lub/%.2fB/%luW", (unsigned long)Bits,
-                   (double)Bits/8, (unsigned long)(Bits/32));
+static Error reportError(StringRef Message) {
+  return createStringError(std::errc::illegal_byte_sequence, Message.data());
 }
 
-static CurStreamTypeType ReadSignature(BitstreamCursor &Stream) {
-  char Signature[6];
-  Signature[0] = Stream.Read(8);
-  Signature[1] = Stream.Read(8);
-
-  // Autodetect the file contents, if it is one we know.
-  if (Signature[0] == 'C' && Signature[1] == 'P') {
-    Signature[2] = Stream.Read(8);
-    Signature[3] = Stream.Read(8);
-    if (Signature[2] == 'C' && Signature[3] == 'H')
-      return ClangSerializedASTBitstream;
-  } else if (Signature[0] == 'D' && Signature[1] == 'I') {
-    Signature[2] = Stream.Read(8);
-    Signature[3] = Stream.Read(8);
-    if (Signature[2] == 'A' && Signature[3] == 'G')
-      return ClangSerializedDiagnosticsBitstream;
-  } else {
-    Signature[2] = Stream.Read(4);
-    Signature[3] = Stream.Read(4);
-    Signature[4] = Stream.Read(4);
-    Signature[5] = Stream.Read(4);
-    if (Signature[0] == 'B' && Signature[1] == 'C' &&
-        Signature[2] == 0x0 && Signature[3] == 0xC &&
-        Signature[4] == 0xE && Signature[5] == 0xD)
-      return LLVMIRBitstream;
-  }
-  return UnknownBitstream;
-}
-
-static bool openBitcodeFile(StringRef Path,
-                            std::unique_ptr<MemoryBuffer> &MemBuf,
-                            BitstreamCursor &Stream,
-                            CurStreamTypeType &CurStreamType) {
+static Expected<std::unique_ptr<MemoryBuffer>> openBitcodeFile(StringRef Path) {
   // Read the input file.
-  ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufOrErr =
-      MemoryBuffer::getFileOrSTDIN(Path);
-  if (std::error_code EC = MemBufOrErr.getError())
-    return ReportError(Twine("ReportError reading '") + Path + "': " + EC.message());
-  MemBuf = std::move(MemBufOrErr.get());
-
-  if (MemBuf->getBufferSize() & 3)
-    return ReportError("Bitcode stream should be a multiple of 4 bytes in length");
-
-  const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
-  const unsigned char *EndBufPtr = BufPtr + MemBuf->getBufferSize();
+  Expected<std::unique_ptr<MemoryBuffer>> MemBufOrErr =
+      errorOrToExpected(MemoryBuffer::getFileOrSTDIN(Path));
+  if (Error E = MemBufOrErr.takeError())
+    return std::move(E);
 
-  // If we have a wrapper header, parse it and ignore the non-bc file contents.
-  // The magic number is 0x0B17C0DE stored in little endian.
-  if (isBitcodeWrapper(BufPtr, EndBufPtr)) {
-    if (MemBuf->getBufferSize() < BWH_HeaderSize)
-      return ReportError("Invalid bitcode wrapper header");
+  std::unique_ptr<MemoryBuffer> MemBuf = std::move(*MemBufOrErr);
 
-    if (Dump) {
-      unsigned Magic = support::endian::read32le(&BufPtr[BWH_MagicField]);
-      unsigned Version = support::endian::read32le(&BufPtr[BWH_VersionField]);
-      unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
-      unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
-      unsigned CPUType = support::endian::read32le(&BufPtr[BWH_CPUTypeField]);
-
-      outs() << "<BITCODE_WRAPPER_HEADER"
-             << " Magic=" << format_hex(Magic, 10)
-             << " Version=" << format_hex(Version, 10)
-             << " Offset=" << format_hex(Offset, 10)
-             << " Size=" << format_hex(Size, 10)
-             << " CPUType=" << format_hex(CPUType, 10) << "/>\n";
-    }
-
-    if (SkipBitcodeWrapperHeader(BufPtr, EndBufPtr, true))
-      return ReportError("Invalid bitcode wrapper header");
-  }
-
-  Stream = BitstreamCursor(ArrayRef<uint8_t>(BufPtr, EndBufPtr));
-  CurStreamType = ReadSignature(Stream);
-
-  return false;
+  if (MemBuf->getBufferSize() & 3)
+    return reportError(
+        "Bitcode stream should be a multiple of 4 bytes in length");
+  return std::move(MemBuf);
 }
 
-/// AnalyzeBitcode - Analyze the bitcode file specified by InputFilename.
-static int AnalyzeBitcode() {
-  std::unique_ptr<MemoryBuffer> StreamBuffer;
-  BitstreamCursor Stream;
-  BitstreamBlockInfo BlockInfo;
-  CurStreamTypeType CurStreamType;
-  if (openBitcodeFile(InputFilename, StreamBuffer, Stream, CurStreamType))
-    return true;
-  Stream.setBlockInfo(&BlockInfo);
-
-  // Read block info from BlockInfoFilename, if specified.
-  // The block info must be a top-level block.
-  if (!BlockInfoFilename.empty()) {
-    std::unique_ptr<MemoryBuffer> BlockInfoBuffer;
-    BitstreamCursor BlockInfoCursor;
-    CurStreamTypeType BlockInfoStreamType;
-    if (openBitcodeFile(BlockInfoFilename, BlockInfoBuffer, BlockInfoCursor,
-                        BlockInfoStreamType))
-      return true;
-
-    while (!BlockInfoCursor.AtEndOfStream()) {
-      unsigned Code = BlockInfoCursor.ReadCode();
-      if (Code != bitc::ENTER_SUBBLOCK)
-        return ReportError("Invalid record at top-level in block info file");
-
-      unsigned BlockID = BlockInfoCursor.ReadSubBlockID();
-      if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
-        Optional<BitstreamBlockInfo> NewBlockInfo =
-            BlockInfoCursor.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
-        if (!NewBlockInfo)
-          return ReportError("Malformed BlockInfoBlock in block info file");
-        BlockInfo = std::move(*NewBlockInfo);
-        break;
-      }
-
-      BlockInfoCursor.SkipBlock();
-    }
-  }
-
-  unsigned NumTopBlocks = 0;
-
-  // Parse the top-level structure.  We only allow blocks at the top-level.
-  while (!Stream.AtEndOfStream()) {
-    unsigned Code = Stream.ReadCode();
-    if (Code != bitc::ENTER_SUBBLOCK)
-      return ReportError("Invalid record at top-level");
-
-    unsigned BlockID = Stream.ReadSubBlockID();
-
-    if (ParseBlock(Stream, BlockInfo, BlockID, 0, CurStreamType))
-      return true;
-    ++NumTopBlocks;
-  }
-
-  if (Dump) outs() << "\n\n";
-
-  uint64_t BufferSizeBits = Stream.getBitcodeBytes().size() * CHAR_BIT;
-  // Print a summary of the read file.
-  outs() << "Summary of " << InputFilename << ":\n";
-  outs() << "         Total size: ";
-  PrintSize(BufferSizeBits);
-  outs() << "\n";
-  outs() << "        Stream type: ";
-  switch (CurStreamType) {
-  case UnknownBitstream:
-    outs() << "unknown\n";
-    break;
-  case LLVMIRBitstream:
-    outs() << "LLVM IR\n";
-    break;
-  case ClangSerializedASTBitstream:
-    outs() << "Clang Serialized AST\n";
-    break;
-  case ClangSerializedDiagnosticsBitstream:
-    outs() << "Clang Serialized Diagnostics\n";
-    break;
-  }
-  outs() << "  # Toplevel Blocks: " << NumTopBlocks << "\n";
-  outs() << "\n";
-
-  // Emit per-block stats.
-  outs() << "Per-block Summary:\n";
-  for (std::map<unsigned, PerBlockIDStats>::iterator I = BlockIDStats.begin(),
-       E = BlockIDStats.end(); I != E; ++I) {
-    outs() << "  Block ID #" << I->first;
-    if (const char *BlockName =
-            GetBlockName(I->first, BlockInfo, CurStreamType))
-      outs() << " (" << BlockName << ")";
-    outs() << ":\n";
-
-    const PerBlockIDStats &Stats = I->second;
-    outs() << "      Num Instances: " << Stats.NumInstances << "\n";
-    outs() << "         Total Size: ";
-    PrintSize(Stats.NumBits);
-    outs() << "\n";
-    double pct = (Stats.NumBits * 100.0) / BufferSizeBits;
-    outs() << "    Percent of file: " << format("%2.4f%%", pct) << "\n";
-    if (Stats.NumInstances > 1) {
-      outs() << "       Average Size: ";
-      PrintSize(Stats.NumBits/(double)Stats.NumInstances);
-      outs() << "\n";
-      outs() << "  Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/"
-             << Stats.NumSubBlocks/(double)Stats.NumInstances << "\n";
-      outs() << "    Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/"
-             << Stats.NumAbbrevs/(double)Stats.NumInstances << "\n";
-      outs() << "    Tot/Avg Records: " << Stats.NumRecords << "/"
-             << Stats.NumRecords/(double)Stats.NumInstances << "\n";
-    } else {
-      outs() << "      Num SubBlocks: " << Stats.NumSubBlocks << "\n";
-      outs() << "        Num Abbrevs: " << Stats.NumAbbrevs << "\n";
-      outs() << "        Num Records: " << Stats.NumRecords << "\n";
-    }
-    if (Stats.NumRecords) {
-      double pct = (Stats.NumAbbreviatedRecords * 100.0) / Stats.NumRecords;
-      outs() << "    Percent Abbrevs: " << format("%2.4f%%", pct) << "\n";
-    }
-    outs() << "\n";
-
-    // Print a histogram of the codes we see.
-    if (!NoHistogram && !Stats.CodeFreq.empty()) {
-      std::vector<std::pair<unsigned, unsigned> > FreqPairs;  // <freq,code>
-      for (unsigned i = 0, e = Stats.CodeFreq.size(); i != e; ++i)
-        if (unsigned Freq = Stats.CodeFreq[i].NumInstances)
-          FreqPairs.push_back(std::make_pair(Freq, i));
-      std::stable_sort(FreqPairs.begin(), FreqPairs.end());
-      std::reverse(FreqPairs.begin(), FreqPairs.end());
+int main(int argc, char **argv) {
+  InitLLVM X(argc, argv);
+  cl::ParseCommandLineOptions(argc, argv, "llvm-bcanalyzer file analyzer\n");
+  ExitOnError ExitOnErr("llvm-bcanalyzer: ");
 
-      outs() << "\tRecord Histogram:\n";
-      outs() << "\t\t  Count    # Bits     b/Rec   % Abv  Record Kind\n";
-      for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) {
-        const PerRecordStats &RecStats = Stats.CodeFreq[FreqPairs[i].second];
+  std::unique_ptr<MemoryBuffer> MB = ExitOnErr(openBitcodeFile(InputFilename));
+  std::unique_ptr<MemoryBuffer> BlockInfoMB = nullptr;
+  if (!BlockInfoFilename.empty())
+    BlockInfoMB = ExitOnErr(openBitcodeFile(BlockInfoFilename));
 
-        outs() << format("\t\t%7d %9lu",
-                         RecStats.NumInstances,
-                         (unsigned long)RecStats.TotalBits);
+  BitcodeAnalyzer BA(MB->getBuffer(),
+                     BlockInfoMB ? Optional<StringRef>(BlockInfoMB->getBuffer())
+                                 : None);
 
-        if (RecStats.NumInstances > 1)
-          outs() << format(" %9.1f",
-                           (double)RecStats.TotalBits/RecStats.NumInstances);
-        else
-          outs() << "          ";
+  BCDumpOptions O(outs());
+  O.Histogram = !NoHistogram;
+  O.Symbolic = !NonSymbolic;
+  O.ShowBinaryBlobs = ShowBinaryBlobs;
 
-        if (RecStats.NumAbbrev)
-          outs() <<
-              format(" %7.2f",
-                     (double)RecStats.NumAbbrev/RecStats.NumInstances*100);
-        else
-          outs() << "        ";
+  ExitOnErr(
+      BA.analyze(O, CheckHash.empty() ? None : Optional<StringRef>(CheckHash)));
 
-        outs() << "  ";
-        if (const char *CodeName = GetCodeName(FreqPairs[i].second, I->first,
-                                               BlockInfo, CurStreamType))
-          outs() << CodeName << "\n";
-        else
-          outs() << "UnknownCode" << FreqPairs[i].second << "\n";
-      }
-      outs() << "\n";
+  if (Dump)
+    outs() << "\n\n";
 
-    }
-  }
+  BA.printStats(O, StringRef(InputFilename.getValue()));
   return 0;
 }
-
-
-int main(int argc, char **argv) {
-  InitLLVM X(argc, argv);
-  cl::ParseCommandLineOptions(argc, argv, "llvm-bcanalyzer file analyzer\n");
-  return AnalyzeBitcode();
-}
diff --git a/tools/llvm-cov/CodeCoverage.cpp b/tools/llvm-cov/CodeCoverage.cpp
index 728e00e7c3c2..f707e3c7ab53 100644
--- a/tools/llvm-cov/CodeCoverage.cpp
+++ b/tools/llvm-cov/CodeCoverage.cpp
@@ -1,9 +1,8 @@
 //===- CodeCoverage.cpp - Coverage tool based on profiling instrumentation-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1007,10 +1006,23 @@ int CodeCoverageTool::doReport(int argc, const char **argv,
 int CodeCoverageTool::doExport(int argc, const char **argv,
                                CommandLineParserType commandLineParser) {
 
+  cl::OptionCategory ExportCategory("Exporting options");
+
+  cl::opt<bool> SkipExpansions("skip-expansions", cl::Optional,
+                               cl::desc("Don't export expanded source regions"),
+                               cl::cat(ExportCategory));
+
+  cl::opt<bool> SkipFunctions("skip-functions", cl::Optional,
+                              cl::desc("Don't export per-function data"),
+                              cl::cat(ExportCategory));
+
   auto Err = commandLineParser(argc, argv);
   if (Err)
     return Err;
 
+  ViewOpts.SkipExpansions = SkipExpansions;
+  ViewOpts.SkipFunctions = SkipFunctions;
+
   if (ViewOpts.Format != CoverageViewOptions::OutputFormat::Text &&
       ViewOpts.Format != CoverageViewOptions::OutputFormat::Lcov) {
     error("Coverage data can only be exported as textual JSON or an "
diff --git a/tools/llvm-cov/CoverageExporter.h b/tools/llvm-cov/CoverageExporter.h
index b226d68813d9..751e55dc0916 100644
--- a/tools/llvm-cov/CoverageExporter.h
+++ b/tools/llvm-cov/CoverageExporter.h
@@ -1,9 +1,8 @@
 //===- CoverageExporter.h - Code coverage exporter ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-cov/CoverageExporterJson.cpp b/tools/llvm-cov/CoverageExporterJson.cpp
index 22243f8e2c3e..181d428ed9d8 100644
--- a/tools/llvm-cov/CoverageExporterJson.cpp
+++ b/tools/llvm-cov/CoverageExporterJson.cpp
@@ -1,9 +1,8 @@
 //===- CoverageExporterJson.cpp - Code coverage export --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -43,7 +42,14 @@
 
 #include "CoverageExporterJson.h"
 #include "CoverageReport.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/JSON.h"
+#include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/Threading.h"
+#include <algorithm>
+#include <mutex>
+#include <utility>
 
 /// The semantic version combined as a string.
 #define LLVM_COVERAGE_EXPORT_JSON_STR "2.0.0"
@@ -128,13 +134,15 @@ json::Array renderFileSegments(const coverage::CoverageData &FileCoverage,
 json::Object renderFile(const coverage::CoverageMapping &Coverage,
                         const std::string &Filename,
                         const FileCoverageSummary &FileReport,
-                        bool ExportSummaryOnly) {
+                        const CoverageViewOptions &Options) {
   json::Object File({{"filename", Filename}});
-  if (!ExportSummaryOnly) {
+  if (!Options.ExportSummaryOnly) {
     // Calculate and render detailed coverage information for given file.
     auto FileCoverage = Coverage.getCoverageForFile(Filename);
     File["segments"] = renderFileSegments(FileCoverage, FileReport);
-    File["expansions"] = renderFileExpansions(FileCoverage, FileReport);
+    if (!Options.SkipExpansions) {
+      File["expansions"] = renderFileExpansions(FileCoverage, FileReport);
+    }
   }
   File["summary"] = renderSummary(FileReport);
   return File;
@@ -143,11 +151,28 @@ json::Object renderFile(const coverage::CoverageMapping &Coverage,
 json::Array renderFiles(const coverage::CoverageMapping &Coverage,
                         ArrayRef<std::string> SourceFiles,
                         ArrayRef<FileCoverageSummary> FileReports,
-                        bool ExportSummaryOnly) {
+                        const CoverageViewOptions &Options) {
+  auto NumThreads = Options.NumThreads;
+  if (NumThreads == 0) {
+    NumThreads = std::max(1U, std::min(llvm::heavyweight_hardware_concurrency(),
+                                       unsigned(SourceFiles.size())));
+  }
+  ThreadPool Pool(NumThreads);
   json::Array FileArray;
-  for (unsigned I = 0, E = SourceFiles.size(); I < E; ++I)
-    FileArray.push_back(renderFile(Coverage, SourceFiles[I], FileReports[I],
-                                   ExportSummaryOnly));
+  std::mutex FileArrayMutex;
+
+  for (unsigned I = 0, E = SourceFiles.size(); I < E; ++I) {
+    auto &SourceFile = SourceFiles[I];
+    auto &FileReport = FileReports[I];
+    Pool.async([&] {
+      auto File = renderFile(Coverage, SourceFile, FileReport, Options);
+      {
+        std::lock_guard<std::mutex> Lock(FileArrayMutex);
+        FileArray.push_back(std::move(File));
+      }
+    });
+  }
+  Pool.wait();
   return FileArray;
 }
 
@@ -178,12 +203,22 @@ void CoverageExporterJson::renderRoot(ArrayRef<std::string> SourceFiles) {
   FileCoverageSummary Totals = FileCoverageSummary("Totals");
   auto FileReports = CoverageReport::prepareFileReports(Coverage, Totals,
                                                         SourceFiles, Options);
-  auto Export =
-      json::Object({{"files", renderFiles(Coverage, SourceFiles, FileReports,
-                                          Options.ExportSummaryOnly)},
-                    {"totals", renderSummary(Totals)}});
-  // Skip functions-level information for summary-only export mode.
-  if (!Options.ExportSummaryOnly)
+  auto Files = renderFiles(Coverage, SourceFiles, FileReports, Options);
+  // Sort files in order of their names.
+  std::sort(Files.begin(), Files.end(),
+    [](const json::Value &A, const json::Value &B) {
+      const json::Object *ObjA = A.getAsObject();
+      const json::Object *ObjB = B.getAsObject();
+      assert(ObjA != nullptr && "Value A was not an Object");
+      assert(ObjB != nullptr && "Value B was not an Object");
+      const StringRef FilenameA = ObjA->getString("filename").getValue();
+      const StringRef FilenameB = ObjB->getString("filename").getValue();
+      return FilenameA.compare(FilenameB) < 0;
+    });
+  auto Export = json::Object(
+      {{"files", std::move(Files)}, {"totals", renderSummary(Totals)}});
+  // Skip functions-level information  if necessary.
+  if (!Options.ExportSummaryOnly && !Options.SkipFunctions)
     Export["functions"] = renderFunctions(Coverage.getCoveredFunctions());
 
   auto ExportArray = json::Array({std::move(Export)});
diff --git a/tools/llvm-cov/CoverageExporterJson.h b/tools/llvm-cov/CoverageExporterJson.h
index c37c86b42be9..c19475005552 100644
--- a/tools/llvm-cov/CoverageExporterJson.h
+++ b/tools/llvm-cov/CoverageExporterJson.h
@@ -1,9 +1,8 @@
 //===- CoverageExporterJson.h - Code coverage JSON exporter ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-cov/CoverageExporterLcov.cpp b/tools/llvm-cov/CoverageExporterLcov.cpp
index d149ba1a4c87..d9b0c3b0d7a8 100644
--- a/tools/llvm-cov/CoverageExporterLcov.cpp
+++ b/tools/llvm-cov/CoverageExporterLcov.cpp
@@ -1,9 +1,8 @@
 //===- CoverageExporterLcov.cpp - Code coverage export --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -83,7 +82,7 @@ void renderFile(raw_ostream &OS, const coverage::CoverageMapping &Coverage,
   OS << "SF:" << Filename << '\n';
 
   if (!ExportSummaryOnly) {
-    renderFunctions(OS, Coverage.getCoveredFunctions());
+    renderFunctions(OS, Coverage.getCoveredFunctions(Filename));
   }
   renderFunctionSummary(OS, FileReport);
 
diff --git a/tools/llvm-cov/CoverageExporterLcov.h b/tools/llvm-cov/CoverageExporterLcov.h
index 539b2dacd384..e8a260bf4937 100644
--- a/tools/llvm-cov/CoverageExporterLcov.h
+++ b/tools/llvm-cov/CoverageExporterLcov.h
@@ -1,9 +1,8 @@
 //===- CoverageExporterLcov.h - Code coverage lcov exporter ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-cov/CoverageFilters.cpp b/tools/llvm-cov/CoverageFilters.cpp
index 4dd0f552c7e0..ca241e386e87 100644
--- a/tools/llvm-cov/CoverageFilters.cpp
+++ b/tools/llvm-cov/CoverageFilters.cpp
@@ -1,9 +1,8 @@
 //===- CoverageFilters.cpp - Function coverage mapping filters ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-cov/CoverageFilters.h b/tools/llvm-cov/CoverageFilters.h
index 6424ca5a8081..ce56e1607111 100644
--- a/tools/llvm-cov/CoverageFilters.h
+++ b/tools/llvm-cov/CoverageFilters.h
@@ -1,9 +1,8 @@
 //===- CoverageFilters.h - Function coverage mapping filters --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-cov/CoverageReport.cpp b/tools/llvm-cov/CoverageReport.cpp
index 607a3ceb30cb..82259542c597 100644
--- a/tools/llvm-cov/CoverageReport.cpp
+++ b/tools/llvm-cov/CoverageReport.cpp
@@ -1,9 +1,8 @@
 //===- CoverageReport.cpp - Code coverage report -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-cov/CoverageReport.h b/tools/llvm-cov/CoverageReport.h
index 4a6527e9fe5d..f9a092f510b5 100644
--- a/tools/llvm-cov/CoverageReport.h
+++ b/tools/llvm-cov/CoverageReport.h
@@ -1,9 +1,8 @@
 //===- CoverageReport.h - Code coverage report ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-cov/CoverageSummaryInfo.cpp b/tools/llvm-cov/CoverageSummaryInfo.cpp
index 7847a2abf48c..1029f7784040 100644
--- a/tools/llvm-cov/CoverageSummaryInfo.cpp
+++ b/tools/llvm-cov/CoverageSummaryInfo.cpp
@@ -1,9 +1,8 @@
 //===- CoverageSummaryInfo.cpp - Coverage summary for function/file -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-cov/CoverageSummaryInfo.h b/tools/llvm-cov/CoverageSummaryInfo.h
index 0845e2ce2e77..97beacb26d07 100644
--- a/tools/llvm-cov/CoverageSummaryInfo.h
+++ b/tools/llvm-cov/CoverageSummaryInfo.h
@@ -1,9 +1,8 @@
 //===- CoverageSummaryInfo.h - Coverage summary for function/file ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-cov/CoverageViewOptions.h b/tools/llvm-cov/CoverageViewOptions.h
index c8a472860027..dde0c692ab05 100644
--- a/tools/llvm-cov/CoverageViewOptions.h
+++ b/tools/llvm-cov/CoverageViewOptions.h
@@ -1,9 +1,8 @@
 //===- CoverageViewOptions.h - Code coverage display options -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -35,6 +34,8 @@ struct CoverageViewOptions {
   bool ShowRegionSummary;
   bool ShowInstantiationSummary;
   bool ExportSummaryOnly;
+  bool SkipExpansions;
+  bool SkipFunctions;
   OutputFormat Format;
   std::string ShowOutputDirectory;
   std::vector<std::string> DemanglerOpts;
diff --git a/tools/llvm-cov/RenderingSupport.h b/tools/llvm-cov/RenderingSupport.h
index 2cfe24919142..0674fbac9a3c 100644
--- a/tools/llvm-cov/RenderingSupport.h
+++ b/tools/llvm-cov/RenderingSupport.h
@@ -1,9 +1,8 @@
 //===- RenderingSupport.h - output stream rendering support functions  ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-cov/SourceCoverageView.cpp b/tools/llvm-cov/SourceCoverageView.cpp
index cebaf63adb12..616f667e2c84 100644
--- a/tools/llvm-cov/SourceCoverageView.cpp
+++ b/tools/llvm-cov/SourceCoverageView.cpp
@@ -1,9 +1,8 @@
 //===- SourceCoverageView.cpp - Code coverage view for source code --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -190,8 +189,8 @@ void SourceCoverageView::print(raw_ostream &OS, bool WholeFile,
 
   // We need the expansions and instantiations sorted so we can go through them
   // while we iterate lines.
-  std::stable_sort(ExpansionSubViews.begin(), ExpansionSubViews.end());
-  std::stable_sort(InstantiationSubViews.begin(), InstantiationSubViews.end());
+  llvm::stable_sort(ExpansionSubViews);
+  llvm::stable_sort(InstantiationSubViews);
   auto NextESV = ExpansionSubViews.begin();
   auto EndESV = ExpansionSubViews.end();
   auto NextISV = InstantiationSubViews.begin();
diff --git a/tools/llvm-cov/SourceCoverageView.h b/tools/llvm-cov/SourceCoverageView.h
index e3a2f9e5c0b4..9ae928443651 100644
--- a/tools/llvm-cov/SourceCoverageView.h
+++ b/tools/llvm-cov/SourceCoverageView.h
@@ -1,9 +1,8 @@
 //===- SourceCoverageView.h - Code coverage view for source code ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/tools/llvm-cov/SourceCoverageViewHTML.cpp b/tools/llvm-cov/SourceCoverageViewHTML.cpp
index 3f730bb7bc82..e3332245f9c8 100644
--- a/tools/llvm-cov/SourceCoverageViewHTML.cpp
+++ b/tools/llvm-cov/SourceCoverageViewHTML.cpp
@@ -1,9 +1,8 @@
 //===- SourceCoverageViewHTML.cpp - A html code coverage view -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/tools/llvm-cov/SourceCoverageViewHTML.h b/tools/llvm-cov/SourceCoverageViewHTML.h
index cb41fcaf37b9..9834040008a6 100644
--- a/tools/llvm-cov/SourceCoverageViewHTML.h
+++ b/tools/llvm-cov/SourceCoverageViewHTML.h
@@ -1,9 +1,8 @@
 //===- SourceCoverageViewHTML.h - A html code coverage view ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/tools/llvm-cov/SourceCoverageViewText.cpp b/tools/llvm-cov/SourceCoverageViewText.cpp
index aac70baed613..fcabee2ee69d 100644
--- a/tools/llvm-cov/SourceCoverageViewText.cpp
+++ b/tools/llvm-cov/SourceCoverageViewText.cpp
@@ -1,9 +1,8 @@
 //===- SourceCoverageViewText.cpp - A text-based code coverage view -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/tools/llvm-cov/SourceCoverageViewText.h b/tools/llvm-cov/SourceCoverageViewText.h
index a46f35cc6495..c8c4632c3b9d 100644
--- a/tools/llvm-cov/SourceCoverageViewText.h
+++ b/tools/llvm-cov/SourceCoverageViewText.h
@@ -1,9 +1,8 @@
 //===- SourceCoverageViewText.h - A text-based code coverage view ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/tools/llvm-cov/TestingSupport.cpp b/tools/llvm-cov/TestingSupport.cpp
index 16a1c2665299..3ee318c9c640 100644
--- a/tools/llvm-cov/TestingSupport.cpp
+++ b/tools/llvm-cov/TestingSupport.cpp
@@ -1,9 +1,8 @@
 //===- TestingSupport.cpp - Convert objects files into test files --------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -70,9 +69,18 @@ int convertForTestingMain(int argc, const char *argv[]) {
   uint64_t ProfileNamesAddress = ProfileNames.getAddress();
   StringRef CoverageMappingData;
   StringRef ProfileNamesData;
-  if (CoverageMapping.getContents(CoverageMappingData) ||
-      ProfileNames.getContents(ProfileNamesData))
+  if (Expected<StringRef> E = CoverageMapping.getContents())
+    CoverageMappingData = *E;
+  else {
+    consumeError(E.takeError());
+    return 1;
+  }
+  if (Expected<StringRef> E = ProfileNames.getContents())
+    ProfileNamesData = *E;
+  else {
+    consumeError(E.takeError());
     return 1;
+  }
 
   int FD;
   if (auto Err = sys::fs::openFileForWrite(OutputFilename, FD)) {
diff --git a/tools/llvm-cov/gcov.cpp b/tools/llvm-cov/gcov.cpp
index 7776f2aa9a68..8a00ff64711f 100644
--- a/tools/llvm-cov/gcov.cpp
+++ b/tools/llvm-cov/gcov.cpp
@@ -1,9 +1,8 @@
 //===- gcov.cpp - GCOV compatible LLVM coverage tool ----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -125,6 +124,11 @@ int gcovMain(int argc, const char *argv[]) {
                                       "(requires -b)"));
   cl::alias UncondBranchA("unconditional-branches", cl::aliasopt(UncondBranch));
 
+  cl::opt<bool> HashFilenames("x", cl::Grouping, cl::init(false),
+                              cl::desc("Hash long pathnames"));
+  cl::alias HashFilenamesA("hash-filenames", cl::aliasopt(HashFilenames));
+
+
   cl::OptionCategory DebugCat("Internal and debugging options");
   cl::opt<bool> DumpGCOV("dump", cl::init(false), cl::cat(DebugCat),
                          cl::desc("Dump the gcov file to stderr"));
@@ -136,7 +140,8 @@ int gcovMain(int argc, const char *argv[]) {
   cl::ParseCommandLineOptions(argc, argv, "LLVM code coverage tool\n");
 
   GCOV::Options Options(AllBlocks, BranchProb, BranchCount, FuncSummary,
-                        PreservePaths, UncondBranch, LongNames, NoOutput);
+                        PreservePaths, UncondBranch, LongNames, NoOutput,
+                        HashFilenames);
 
   for (const auto &SourceFile : SourceFiles)
     reportCoverage(SourceFile, ObjectDir, InputGCNO, InputGCDA, DumpGCOV,
diff --git a/tools/llvm-cov/llvm-cov.cpp b/tools/llvm-cov/llvm-cov.cpp
index 4c3b574451c3..172ec9f3cedf 100644
--- a/tools/llvm-cov/llvm-cov.cpp
+++ b/tools/llvm-cov/llvm-cov.cpp
@@ -1,9 +1,8 @@
 //===- llvm-cov.cpp - LLVM coverage tool ----------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-cxxdump/Error.cpp b/tools/llvm-cxxdump/Error.cpp
index 54207fad32af..25317820409c 100644
--- a/tools/llvm-cxxdump/Error.cpp
+++ b/tools/llvm-cxxdump/Error.cpp
@@ -1,9 +1,8 @@
 //===- Error.cpp - system_error extensions for llvm-cxxdump -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-cxxdump/Error.h b/tools/llvm-cxxdump/Error.h
index 7caf6d6447c9..439902fa3803 100644
--- a/tools/llvm-cxxdump/Error.h
+++ b/tools/llvm-cxxdump/Error.h
@@ -1,9 +1,8 @@
 //===- Error.h - system_error extensions for llvm-cxxdump -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-cxxdump/llvm-cxxdump.cpp b/tools/llvm-cxxdump/llvm-cxxdump.cpp
index 7594066a395d..833312655788 100644
--- a/tools/llvm-cxxdump/llvm-cxxdump.cpp
+++ b/tools/llvm-cxxdump/llvm-cxxdump.cpp
@@ -1,9 +1,8 @@
 //===- llvm-cxxdump.cpp - Dump C++ data in an Object File -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -49,15 +48,20 @@ static void error(std::error_code EC) {
   exit(1);
 }
 
-static void error(Error Err) {
-  if (!Err)
-    return;
+LLVM_ATTRIBUTE_NORETURN static void error(Error Err) {
   logAllUnhandledErrors(std::move(Err), WithColor::error(outs()),
                         "reading file: ");
   outs().flush();
   exit(1);
 }
 
+template <typename T>
+T unwrapOrError(Expected<T> EO) {
+  if (!EO)
+    error(EO.takeError());
+  return std::move(*EO);
+}
+
 } // namespace llvm
 
 static void reportError(StringRef Input, StringRef Message) {
@@ -196,8 +200,7 @@ static void dumpCXXData(const ObjectFile *Obj) {
     // Skip virtual or BSS sections.
     if (Sec.isBSS() || Sec.isVirtual())
       continue;
-    StringRef SecContents;
-    error(Sec.getContents(SecContents));
+    StringRef SecContents = unwrapOrError(Sec.getContents());
     Expected<uint64_t> SymAddressOrErr = Sym.getAddress();
     error(errorToErrorCode(SymAddressOrErr.takeError()));
     uint64_t SymAddress = *SymAddressOrErr;
@@ -511,7 +514,8 @@ static void dumpArchive(const Archive *Arc) {
     else
       reportError(Arc->getFileName(), cxxdump_error::unrecognized_file_format);
   }
-  error(std::move(Err));
+  if (Err)
+    error(std::move(Err));
 }
 
 static void dumpInput(StringRef File) {
diff --git a/tools/llvm-cxxdump/llvm-cxxdump.h b/tools/llvm-cxxdump/llvm-cxxdump.h
index daa05cb2ca0a..739cfe481a4b 100644
--- a/tools/llvm-cxxdump/llvm-cxxdump.h
+++ b/tools/llvm-cxxdump/llvm-cxxdump.h
@@ -1,9 +1,8 @@
 //===-- llvm-cxxdump.h ------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-cxxfilt/llvm-cxxfilt.cpp b/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
index afc1e4a8d128..9ac8bcf0ff01 100644
--- a/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
+++ b/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
@@ -1,12 +1,12 @@
 //===-- llvm-c++filt.cpp --------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Demangle/Demangle.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/InitLLVM.h"
@@ -25,7 +25,7 @@ enum Style {
   EDG,   ///< EDG compiler
   GNUv3, ///< GNU C++ v3 ABI
   Java,  ///< Java (gcj)
-  GNAT   ///< ADA copiler (gnat)
+  GNAT   ///< ADA compiler (gnat)
 };
 static cl::opt<Style>
     Format("format", cl::desc("decoration style"),
@@ -52,31 +52,84 @@ static cl::alias TypesShort("t", cl::desc("alias for --types"),
 static cl::list<std::string>
 Decorated(cl::Positional, cl::desc("<mangled>"), cl::ZeroOrMore);
 
-static void demangle(llvm::raw_ostream &OS, const std::string &Mangled) {
+static cl::extrahelp
+    HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
+
+static std::string demangle(llvm::raw_ostream &OS, const std::string &Mangled) {
   int Status;
 
-  const char *Decorated = Mangled.c_str();
+  const char *DecoratedStr = Mangled.c_str();
   if (StripUnderscore)
-    if (Decorated[0] == '_')
-      ++Decorated;
-  size_t DecoratedLength = strlen(Decorated);
+    if (DecoratedStr[0] == '_')
+      ++DecoratedStr;
+  size_t DecoratedLength = strlen(DecoratedStr);
 
   char *Undecorated = nullptr;
 
-  if (Types || ((DecoratedLength >= 2 && strncmp(Decorated, "_Z", 2) == 0) ||
-                (DecoratedLength >= 4 && strncmp(Decorated, "___Z", 4) == 0)))
-    Undecorated = itaniumDemangle(Decorated, nullptr, nullptr, &Status);
+  if (Types ||
+      ((DecoratedLength >= 2 && strncmp(DecoratedStr, "_Z", 2) == 0) ||
+       (DecoratedLength >= 4 && strncmp(DecoratedStr, "___Z", 4) == 0)))
+    Undecorated = itaniumDemangle(DecoratedStr, nullptr, nullptr, &Status);
 
   if (!Undecorated &&
-      (DecoratedLength > 6 && strncmp(Decorated, "__imp_", 6) == 0)) {
+      (DecoratedLength > 6 && strncmp(DecoratedStr, "__imp_", 6) == 0)) {
     OS << "import thunk for ";
-    Undecorated = itaniumDemangle(Decorated + 6, nullptr, nullptr, &Status);
+    Undecorated = itaniumDemangle(DecoratedStr + 6, nullptr, nullptr, &Status);
   }
 
-  OS << (Undecorated ? Undecorated : Mangled) << '\n';
-  OS.flush();
-
+  std::string Result(Undecorated ? Undecorated : Mangled);
   free(Undecorated);
+  return Result;
+}
+
+// Split 'Source' on any character that fails to pass 'IsLegalChar'.  The
+// returned vector consists of pairs where 'first' is the delimited word, and
+// 'second' are the delimiters following that word.
+static void SplitStringDelims(
+    StringRef Source,
+    SmallVectorImpl<std::pair<StringRef, StringRef>> &OutFragments,
+    function_ref<bool(char)> IsLegalChar) {
+  // The beginning of the input string.
+  const auto Head = Source.begin();
+
+  // Obtain any leading delimiters.
+  auto Start = std::find_if(Head, Source.end(), IsLegalChar);
+  if (Start != Head)
+    OutFragments.push_back({"", Source.slice(0, Start - Head)});
+
+  // Capture each word and the delimiters following that word.
+  while (Start != Source.end()) {
+    Start = std::find_if(Start, Source.end(), IsLegalChar);
+    auto End = std::find_if_not(Start, Source.end(), IsLegalChar);
+    auto DEnd = std::find_if(End, Source.end(), IsLegalChar);
+    OutFragments.push_back({Source.slice(Start - Head, End - Head),
+                            Source.slice(End - Head, DEnd - Head)});
+    Start = DEnd;
+  }
+}
+
+// This returns true if 'C' is a character that can show up in an
+// Itanium-mangled string.
+static bool IsLegalItaniumChar(char C) {
+  // Itanium CXX ABI [External Names]p5.1.1:
+  // '$' and '.' in mangled names are reserved for private implementations.
+  return isalnum(C) || C == '.' || C == '$' || C == '_';
+}
+
+// If 'Split' is true, then 'Mangled' is broken into individual words and each
+// word is demangled.  Otherwise, the entire string is treated as a single
+// mangled item.  The result is output to 'OS'.
+static void demangleLine(llvm::raw_ostream &OS, StringRef Mangled, bool Split) {
+  std::string Result;
+  if (Split) {
+    SmallVector<std::pair<StringRef, StringRef>, 16> Words;
+    SplitStringDelims(Mangled, Words, IsLegalItaniumChar);
+    for (const auto &Word : Words)
+      Result += demangle(OS, Word.first) + Word.second.str();
+  } else
+    Result = demangle(OS, Mangled);
+  OS << Result << '\n';
+  OS.flush();
 }
 
 int main(int argc, char **argv) {
@@ -86,10 +139,10 @@ int main(int argc, char **argv) {
 
   if (Decorated.empty())
     for (std::string Mangled; std::getline(std::cin, Mangled);)
-      demangle(llvm::outs(), Mangled);
+      demangleLine(llvm::outs(), Mangled, true);
   else
     for (const auto &Symbol : Decorated)
-      demangle(llvm::outs(), Symbol);
+      demangleLine(llvm::outs(), Symbol, false);
 
   return EXIT_SUCCESS;
 }
diff --git a/tools/llvm-cxxmap/llvm-cxxmap.cpp b/tools/llvm-cxxmap/llvm-cxxmap.cpp
index 39028cc86723..87d4d06bbc96 100644
--- a/tools/llvm-cxxmap/llvm-cxxmap.cpp
+++ b/tools/llvm-cxxmap/llvm-cxxmap.cpp
@@ -1,9 +1,8 @@
 //===- llvm-cxxmap.cpp ----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-diff/DiffConsumer.cpp b/tools/llvm-diff/DiffConsumer.cpp
index ec189df27521..b797143bde1b 100644
--- a/tools/llvm-diff/DiffConsumer.cpp
+++ b/tools/llvm-diff/DiffConsumer.cpp
@@ -1,9 +1,8 @@
 //===-- DiffConsumer.cpp - Difference Consumer ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -13,8 +12,8 @@
 
 #include "DiffConsumer.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
 
diff --git a/tools/llvm-diff/DiffConsumer.h b/tools/llvm-diff/DiffConsumer.h
index 82f5ce598b44..6cb8f2eb7eeb 100644
--- a/tools/llvm-diff/DiffConsumer.h
+++ b/tools/llvm-diff/DiffConsumer.h
@@ -1,9 +1,8 @@
 //===-- DiffConsumer.h - Difference Consumer --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-diff/DiffLog.cpp b/tools/llvm-diff/DiffLog.cpp
index 50c0c4cff2fc..6484197521f2 100644
--- a/tools/llvm-diff/DiffLog.cpp
+++ b/tools/llvm-diff/DiffLog.cpp
@@ -1,9 +1,8 @@
 //===-- DiffLog.h - Difference Log Builder and accessories ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-diff/DiffLog.h b/tools/llvm-diff/DiffLog.h
index 8f28461afdde..0c8952496155 100644
--- a/tools/llvm-diff/DiffLog.h
+++ b/tools/llvm-diff/DiffLog.h
@@ -1,9 +1,8 @@
 //===-- DiffLog.h - Difference Log Builder and accessories ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-diff/DifferenceEngine.cpp b/tools/llvm-diff/DifferenceEngine.cpp
index acff8bb3e89b..bc93ece86490 100644
--- a/tools/llvm-diff/DifferenceEngine.cpp
+++ b/tools/llvm-diff/DifferenceEngine.cpp
@@ -1,9 +1,8 @@
 //===-- DifferenceEngine.cpp - Structural function/module comparison ------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -68,7 +67,7 @@ public:
     unsigned NewSize = Storage.size() - 1;
     if (NewSize) {
       // Move the slot at the end to the beginning.
-      if (isPodLike<T>::value)
+      if (is_trivially_copyable<T>::value)
         Storage[0] = Storage[NewSize];
       else
         std::swap(Storage[0], Storage[NewSize]);
diff --git a/tools/llvm-diff/DifferenceEngine.h b/tools/llvm-diff/DifferenceEngine.h
index 7f084a377f0c..da1b6526a6e2 100644
--- a/tools/llvm-diff/DifferenceEngine.h
+++ b/tools/llvm-diff/DifferenceEngine.h
@@ -1,9 +1,8 @@
 //===-- DifferenceEngine.h - Module comparator ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-diff/llvm-diff.cpp b/tools/llvm-diff/llvm-diff.cpp
index e449d6994784..aaf7989e2e3d 100644
--- a/tools/llvm-diff/llvm-diff.cpp
+++ b/tools/llvm-diff/llvm-diff.cpp
@@ -1,9 +1,8 @@
 //===-- llvm-diff.cpp - Module comparator command-line driver ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp
index 8143a2a5a934..3f337b874b16 100644
--- a/tools/llvm-dis/llvm-dis.cpp
+++ b/tools/llvm-dis/llvm-dis.cpp
@@ -1,9 +1,8 @@
 //===-- llvm-dis.cpp - The low-level LLVM disassembler --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-dwarfdump/Statistics.cpp b/tools/llvm-dwarfdump/Statistics.cpp
index 5fe7e8b4615b..f26369b935cb 100644
--- a/tools/llvm-dwarfdump/Statistics.cpp
+++ b/tools/llvm-dwarfdump/Statistics.cpp
@@ -15,14 +15,38 @@ using namespace object;
 struct PerFunctionStats {
   /// Number of inlined instances of this function.
   unsigned NumFnInlined = 0;
-  /// Number of variables with location across all inlined instances.
+  /// Number of inlined instances that have abstract origins.
+  unsigned NumAbstractOrigins = 0;
+  /// Number of variables and parameters with location across all inlined
+  /// instances.
   unsigned TotalVarWithLoc = 0;
   /// Number of constants with location across all inlined instances.
   unsigned ConstantMembers = 0;
-  /// List of all Variables in this function.
+  /// List of all Variables and parameters in this function.
   StringSet<> VarsInFunction;
   /// Compile units also cover a PC range, but have this flag set to false.
   bool IsFunction = false;
+  /// Verify function definition has PC addresses (for detecting when
+  /// a function has been inlined everywhere).
+  bool HasPCAddresses = false;
+  /// Function has source location information.
+  bool HasSourceLocation = false;
+  /// Number of function parameters.
+  unsigned NumParams = 0;
+  /// Number of function parameters with source location.
+  unsigned NumParamSourceLocations = 0;
+  /// Number of function parameters with type.
+  unsigned NumParamTypes = 0;
+  /// Number of function parameters with a DW_AT_location.
+  unsigned NumParamLocations = 0;
+  /// Number of variables.
+  unsigned NumVars = 0;
+  /// Number of variables with source location.
+  unsigned NumVarSourceLocations = 0;
+  /// Number of variables wtih type.
+  unsigned NumVarTypes = 0;
+  /// Number of variables wtih DW_AT_location.
+  unsigned NumVarLocations = 0;
 };
 
 /// Holds accumulated global statistics about DIEs.
@@ -32,7 +56,8 @@ struct GlobalStats {
   /// Total number of PC range bytes in each variable's enclosing scope,
   /// starting from the first definition of the variable.
   unsigned ScopeBytesFromFirstDefinition = 0;
-  /// Total number of call site entries (DW_TAG_call_site).
+  /// Total number of call site entries (DW_TAG_call_site) or
+  /// (DW_AT_call_file & DW_AT_call_line).
   unsigned CallSiteEntries = 0;
   /// Total byte size of concrete functions. This byte size includes
   /// inline functions contained in the concrete functions.
@@ -59,11 +84,13 @@ static uint64_t getLowPC(DWARFDie Die) {
 /// Collect debug info quality metrics for one DIE.
 static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
                                std::string VarPrefix, uint64_t ScopeLowPC,
-                               uint64_t BytesInScope,
-                               uint32_t InlineDepth,
+                               uint64_t BytesInScope, uint32_t InlineDepth,
                                StringMap<PerFunctionStats> &FnStatMap,
                                GlobalStats &GlobalStats) {
   bool HasLoc = false;
+  bool HasSrcLoc = false;
+  bool HasType = false;
+  bool IsArtificial = false;
   uint64_t BytesCovered = 0;
   uint64_t OffsetToFirstDefinition = 0;
 
@@ -79,6 +106,16 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
     return;
   }
 
+  if (Die.findRecursively(dwarf::DW_AT_decl_file) &&
+      Die.findRecursively(dwarf::DW_AT_decl_line))
+    HasSrcLoc = true;
+
+  if (Die.findRecursively(dwarf::DW_AT_type))
+    HasType = true;
+
+  if (Die.find(dwarf::DW_AT_artificial))
+    IsArtificial = true;
+
   if (Die.find(dwarf::DW_AT_const_value)) {
     // This catches constant members *and* variables.
     HasLoc = true;
@@ -125,7 +162,7 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
   // By using the variable name + the path through the lexical block tree, the
   // keys are consistent across duplicate abstract origins in different CUs.
   std::string VarName = StringRef(Die.getName(DINameKind::ShortName));
-  FnStats.VarsInFunction.insert(VarPrefix+VarName);
+  FnStats.VarsInFunction.insert(VarPrefix + VarName);
   if (BytesInScope) {
     FnStats.TotalVarWithLoc += (unsigned)HasLoc;
     // Adjust for the fact the variables often start their lifetime in the
@@ -136,16 +173,36 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
     GlobalStats.ScopeBytesFromFirstDefinition += BytesInScope;
     assert(GlobalStats.ScopeBytesCovered <=
            GlobalStats.ScopeBytesFromFirstDefinition);
-  } else {
+  } else if (Die.getTag() == dwarf::DW_TAG_member) {
     FnStats.ConstantMembers++;
+  } else {
+    FnStats.TotalVarWithLoc += (unsigned)HasLoc;
+  }
+  if (!IsArtificial) {
+    if (Die.getTag() == dwarf::DW_TAG_formal_parameter) {
+      FnStats.NumParams++;
+      if (HasType)
+        FnStats.NumParamTypes++;
+      if (HasSrcLoc)
+        FnStats.NumParamSourceLocations++;
+      if (HasLoc)
+        FnStats.NumParamLocations++;
+    } else if (Die.getTag() == dwarf::DW_TAG_variable) {
+      FnStats.NumVars++;
+      if (HasType)
+        FnStats.NumVarTypes++;
+      if (HasSrcLoc)
+        FnStats.NumVarSourceLocations++;
+      if (HasLoc)
+        FnStats.NumVarLocations++;
+    }
   }
 }
 
 /// Recursively collect debug info quality metrics.
 static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix,
                                   std::string VarPrefix, uint64_t ScopeLowPC,
-                                  uint64_t BytesInScope,
-                                  uint32_t InlineDepth,
+                                  uint64_t BytesInScope, uint32_t InlineDepth,
                                   StringMap<PerFunctionStats> &FnStatMap,
                                   GlobalStats &GlobalStats) {
   // Handle any kind of lexical scope.
@@ -164,20 +221,9 @@ static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix,
     if (Die.find(dwarf::DW_AT_declaration))
       return;
 
-    // Count the function.
-    if (!IsBlock) {
-      StringRef Name = Die.getName(DINameKind::LinkageName);
-      if (Name.empty())
-        Name = Die.getName(DINameKind::ShortName);
-      FnPrefix = Name;
-      // Skip over abstract origins.
-      if (Die.find(dwarf::DW_AT_inline))
-        return;
-      // We've seen an (inlined) instance of this function.
-      auto &FnStats = FnStatMap[Name];
-      FnStats.NumFnInlined++;
-      FnStats.IsFunction = true;
-    }
+    // Check for call sites.
+    if (Die.find(dwarf::DW_AT_call_file) && Die.find(dwarf::DW_AT_call_line))
+      GlobalStats.CallSiteEntries++;
 
     // PC Ranges.
     auto RangesOrError = Die.getAddressRanges();
@@ -192,6 +238,31 @@ static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix,
       BytesInThisScope += Range.HighPC - Range.LowPC;
     ScopeLowPC = getLowPC(Die);
 
+    // Count the function.
+    if (!IsBlock) {
+      StringRef Name = Die.getName(DINameKind::LinkageName);
+      if (Name.empty())
+        Name = Die.getName(DINameKind::ShortName);
+      FnPrefix = Name;
+      // Skip over abstract origins.
+      if (Die.find(dwarf::DW_AT_inline))
+        return;
+      // We've seen an (inlined) instance of this function.
+      auto &FnStats = FnStatMap[Name];
+      if (IsInlinedFunction) {
+        FnStats.NumFnInlined++;
+        if (Die.findRecursively(dwarf::DW_AT_abstract_origin))
+          FnStats.NumAbstractOrigins++;
+      }
+      FnStats.IsFunction = true;
+      if (BytesInThisScope && !IsInlinedFunction)
+        FnStats.HasPCAddresses = true;
+      std::string FnName = StringRef(Die.getName(DINameKind::ShortName));
+      if (Die.findRecursively(dwarf::DW_AT_decl_file) &&
+          Die.findRecursively(dwarf::DW_AT_decl_line))
+        FnStats.HasSourceLocation = true;
+    }
+
     if (BytesInThisScope) {
       BytesInScope = BytesInThisScope;
       if (IsFunction)
@@ -252,29 +323,53 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
   GlobalStats GlobalStats;
   StringMap<PerFunctionStats> Statistics;
   for (const auto &CU : static_cast<DWARFContext *>(&DICtx)->compile_units())
-    if (DWARFDie CUDie = CU->getUnitDIE(false))
+    if (DWARFDie CUDie = CU->getNonSkeletonUnitDIE(false))
       collectStatsRecursive(CUDie, "/", "g", 0, 0, 0, Statistics, GlobalStats);
 
   /// The version number should be increased every time the algorithm is changed
   /// (including bug fixes). New metrics may be added without increasing the
   /// version.
-  unsigned Version = 1;
-  unsigned VarTotal = 0;
-  unsigned VarUnique = 0;
-  unsigned VarWithLoc = 0;
+  unsigned Version = 3;
+  unsigned VarParamTotal = 0;
+  unsigned VarParamUnique = 0;
+  unsigned VarParamWithLoc = 0;
   unsigned NumFunctions = 0;
   unsigned NumInlinedFunctions = 0;
+  unsigned NumFuncsWithSrcLoc = 0;
+  unsigned NumAbstractOrigins = 0;
+  unsigned ParamTotal = 0;
+  unsigned ParamWithType = 0;
+  unsigned ParamWithLoc = 0;
+  unsigned ParamWithSrcLoc = 0;
+  unsigned VarTotal = 0;
+  unsigned VarWithType = 0;
+  unsigned VarWithSrcLoc = 0;
+  unsigned VarWithLoc = 0;
   for (auto &Entry : Statistics) {
     PerFunctionStats &Stats = Entry.getValue();
     unsigned TotalVars = Stats.VarsInFunction.size() * Stats.NumFnInlined;
+    // Count variables in concrete out-of-line functions and in global scope.
+    if (Stats.HasPCAddresses || !Stats.IsFunction)
+      TotalVars += Stats.VarsInFunction.size();
     unsigned Constants = Stats.ConstantMembers;
-    VarWithLoc += Stats.TotalVarWithLoc + Constants;
-    VarTotal += TotalVars + Constants;
-    VarUnique += Stats.VarsInFunction.size();
-    LLVM_DEBUG(for (auto &V : Stats.VarsInFunction) llvm::dbgs()
+    VarParamWithLoc += Stats.TotalVarWithLoc + Constants;
+    VarParamTotal += TotalVars;
+    VarParamUnique += Stats.VarsInFunction.size();
+    LLVM_DEBUG(for (auto &V
+                    : Stats.VarsInFunction) llvm::dbgs()
                << Entry.getKey() << ": " << V.getKey() << "\n");
     NumFunctions += Stats.IsFunction;
+    NumFuncsWithSrcLoc += Stats.HasSourceLocation;
     NumInlinedFunctions += Stats.IsFunction * Stats.NumFnInlined;
+    NumAbstractOrigins += Stats.IsFunction * Stats.NumAbstractOrigins;
+    ParamTotal += Stats.NumParams;
+    ParamWithType += Stats.NumParamTypes;
+    ParamWithLoc += Stats.NumParamLocations;
+    ParamWithSrcLoc += Stats.NumParamSourceLocations;
+    VarTotal += Stats.NumVars;
+    VarWithType += Stats.NumVarTypes;
+    VarWithLoc += Stats.NumVarLocations;
+    VarWithSrcLoc += Stats.NumVarSourceLocations;
   }
 
   // Print summary.
@@ -285,20 +380,31 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
   printDatum(OS, "file", Filename.str());
   printDatum(OS, "format", FormatName);
   printDatum(OS, "source functions", NumFunctions);
+  printDatum(OS, "source functions with location", NumFuncsWithSrcLoc);
   printDatum(OS, "inlined functions", NumInlinedFunctions);
-  printDatum(OS, "unique source variables", VarUnique);
-  printDatum(OS, "source variables", VarTotal);
-  printDatum(OS, "variables with location", VarWithLoc);
+  printDatum(OS, "inlined funcs with abstract origins", NumAbstractOrigins);
+  printDatum(OS, "unique source variables", VarParamUnique);
+  printDatum(OS, "source variables", VarParamTotal);
+  printDatum(OS, "variables with location", VarParamWithLoc);
   printDatum(OS, "call site entries", GlobalStats.CallSiteEntries);
   printDatum(OS, "scope bytes total",
              GlobalStats.ScopeBytesFromFirstDefinition);
   printDatum(OS, "scope bytes covered", GlobalStats.ScopeBytesCovered);
   printDatum(OS, "total function size", GlobalStats.FunctionSize);
   printDatum(OS, "total inlined function size", GlobalStats.InlineFunctionSize);
+  printDatum(OS, "total formal params", ParamTotal);
+  printDatum(OS, "formal params with source location", ParamWithSrcLoc);
+  printDatum(OS, "formal params with type", ParamWithType);
+  printDatum(OS, "formal params with binary location", ParamWithLoc);
+  printDatum(OS, "total vars", VarTotal);
+  printDatum(OS, "vars with source location", VarWithSrcLoc);
+  printDatum(OS, "vars with type", VarWithType);
+  printDatum(OS, "vars with binary location", VarWithLoc);
   OS << "}\n";
   LLVM_DEBUG(
       llvm::dbgs() << "Total Availability: "
-                   << (int)std::round((VarWithLoc * 100.0) / VarTotal) << "%\n";
+                   << (int)std::round((VarParamWithLoc * 100.0) / VarParamTotal)
+                   << "%\n";
       llvm::dbgs() << "PC Ranges covered: "
                    << (int)std::round((GlobalStats.ScopeBytesCovered * 100.0) /
                                       GlobalStats.ScopeBytesFromFirstDefinition)
diff --git a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index d9e8e36efe5c..05a7aef67ece 100644
--- a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -1,9 +1,8 @@
 //===-- llvm-dwarfdump.cpp - Debug info dumping utility for llvm ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -93,8 +92,6 @@ namespace {
 using namespace cl;
 
 OptionCategory DwarfDumpCategory("Specific Options");
-static opt<bool> Help("h", desc("Alias for -help"), Hidden,
-                      cat(DwarfDumpCategory));
 static list<std::string>
     InputFilenames(Positional, desc("<input object files or .dSYM bundles>"),
                    ZeroOrMore, cat(DwarfDumpCategory));
@@ -142,10 +139,9 @@ static list<std::string>
               "-name option can be used instead."),
          value_desc("name"), cat(DwarfDumpCategory));
 static alias FindAlias("f", desc("Alias for -find."), aliasopt(Find));
-static opt<bool>
-    IgnoreCase("ignore-case",
-               desc("Ignore case distinctions in when searching by name."),
-               value_desc("i"), cat(DwarfDumpCategory));
+static opt<bool> IgnoreCase("ignore-case",
+                            desc("Ignore case distinctions when searching."),
+                            value_desc("i"), cat(DwarfDumpCategory));
 static alias IgnoreCaseAlias("i", desc("Alias for -ignore-case."),
                              aliasopt(IgnoreCase));
 static list<std::string> Name(
@@ -155,17 +151,17 @@ static list<std::string> Name(
          "the -regex option <pattern> is interpreted as a regular expression."),
     value_desc("pattern"), cat(DwarfDumpCategory));
 static alias NameAlias("n", desc("Alias for -name"), aliasopt(Name));
-static opt<unsigned long long> Lookup("lookup",
+static opt<uint64_t>
+    Lookup("lookup",
            desc("Lookup <address> in the debug information and print out any "
                 "available file, function, block and line table details."),
            value_desc("address"), cat(DwarfDumpCategory));
 static opt<std::string>
-    OutputFilename("out-file", cl::init(""),
+    OutputFilename("o", cl::init("-"),
                    cl::desc("Redirect output to the specified file."),
-                   cl::value_desc("filename"));
-static alias OutputFilenameAlias("o", desc("Alias for -out-file."),
-                                 aliasopt(OutputFilename),
-                                 cat(DwarfDumpCategory));
+                   cl::value_desc("filename"), cat(DwarfDumpCategory));
+static alias OutputFilenameAlias("out-file", desc("Alias for -o."),
+                                 aliasopt(OutputFilename));
 static opt<bool>
     UseRegex("regex",
              desc("Treat any <pattern> strings as regular expressions when "
@@ -175,14 +171,14 @@ static alias RegexAlias("x", desc("Alias for -regex"), aliasopt(UseRegex));
 static opt<bool>
     ShowChildren("show-children",
                  desc("Show a debug info entry's children when selectively "
-                      "printing with the =<offset> option."),
+                      "printing entries."),
                  cat(DwarfDumpCategory));
 static alias ShowChildrenAlias("c", desc("Alias for -show-children."),
                                aliasopt(ShowChildren));
 static opt<bool>
     ShowParents("show-parents",
                 desc("Show a debug info entry's parents when selectively "
-                     "printing with the =<offset> option."),
+                     "printing entries."),
                 cat(DwarfDumpCategory));
 static alias ShowParentsAlias("p", desc("Alias for -show-parents."),
                               aliasopt(ShowParents));
@@ -192,13 +188,18 @@ static opt<bool>
              cat(DwarfDumpCategory));
 static alias ShowFormAlias("F", desc("Alias for -show-form."),
                            aliasopt(ShowForm), cat(DwarfDumpCategory));
-static opt<unsigned> RecurseDepth(
-    "recurse-depth",
-    desc("Only recurse to a depth of N when displaying debug info entries."),
-    cat(DwarfDumpCategory), init(-1U), value_desc("N"));
-static alias RecurseDepthAlias("r", desc("Alias for -recurse-depth."),
-                               aliasopt(RecurseDepth));
-
+static opt<unsigned>
+    ChildRecurseDepth("recurse-depth",
+                      desc("Only recurse to a depth of N when displaying "
+                           "children of debug info entries."),
+                      cat(DwarfDumpCategory), init(-1U), value_desc("N"));
+static alias ChildRecurseDepthAlias("r", desc("Alias for -recurse-depth."),
+                                    aliasopt(ChildRecurseDepth));
+static opt<unsigned>
+    ParentRecurseDepth("parent-recurse-depth",
+                       desc("Only recurse to a depth of N when displaying "
+                            "parents of debug info entries."),
+                       cat(DwarfDumpCategory), init(-1U), value_desc("N"));
 static opt<bool>
     SummarizeTypes("summarize-types",
                    desc("Abbreviate the description of type unit entries."),
@@ -219,6 +220,8 @@ static opt<bool> Verbose("verbose",
                          cat(DwarfDumpCategory));
 static alias VerboseAlias("v", desc("Alias for -verbose."), aliasopt(Verbose),
                           cat(DwarfDumpCategory));
+static cl::extrahelp
+    HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
 } // namespace
 /// @}
 //===----------------------------------------------------------------------===//
@@ -233,7 +236,8 @@ static void error(StringRef Prefix, std::error_code EC) {
 static DIDumpOptions getDumpOpts() {
   DIDumpOptions DumpOpts;
   DumpOpts.DumpType = DumpType;
-  DumpOpts.RecurseDepth = RecurseDepth;
+  DumpOpts.ChildRecurseDepth = ChildRecurseDepth;
+  DumpOpts.ParentRecurseDepth = ParentRecurseDepth;
   DumpOpts.ShowAddresses = !Diff;
   DumpOpts.ShowChildren = ShowChildren;
   DumpOpts.ShowParents = ShowParents;
@@ -259,19 +263,16 @@ static bool filterArch(ObjectFile &Obj) {
     return true;
 
   if (auto *MachO = dyn_cast<MachOObjectFile>(&Obj)) {
-    std::string ObjArch =
-        Triple::getArchTypeName(MachO->getArchTriple().getArch());
-
     for (auto Arch : ArchFilters) {
-      // Match name.
-      if (Arch == ObjArch)
-        return true;
-
       // Match architecture number.
       unsigned Value;
       if (!StringRef(Arch).getAsInteger(0, Value))
         if (Value == getCPUType(*MachO))
           return true;
+
+      // Match as name.
+      if (MachO->getArchTriple().getArch() == Triple(Arch).getArch())
+        return true;
     }
   }
   return false;
@@ -380,14 +381,19 @@ static void filterByAccelName(ArrayRef<std::string> Names, DWARFContext &DICtx,
 
 /// Handle the --lookup option and dump the DIEs and line info for the given
 /// address.
-static bool lookup(DWARFContext &DICtx, uint64_t Address, raw_ostream &OS) {
+/// TODO: specified Address for --lookup option could relate for several
+/// different sections(in case not-linked object file). llvm-dwarfdump
+/// need to do something with this: extend lookup option with section
+/// information or probably display all matched entries, or something else...
+static bool lookup(ObjectFile &Obj, DWARFContext &DICtx, uint64_t Address,
+                   raw_ostream &OS) {
   auto DIEsForAddr = DICtx.getDIEsForAddress(Lookup);
 
   if (!DIEsForAddr)
     return false;
 
   DIDumpOptions DumpOpts = getDumpOpts();
-  DumpOpts.RecurseDepth = 0;
+  DumpOpts.ChildRecurseDepth = 0;
   DIEsForAddr.CompileUnit->dump(OS, DumpOpts);
   if (DIEsForAddr.FunctionDIE) {
     DIEsForAddr.FunctionDIE.dump(OS, 2, DumpOpts);
@@ -395,7 +401,10 @@ static bool lookup(DWARFContext &DICtx, uint64_t Address, raw_ostream &OS) {
       DIEsForAddr.BlockDIE.dump(OS, 4, DumpOpts);
   }
 
-  if (DILineInfo LineInfo = DICtx.getLineInfoForAddress(Lookup))
+  // TODO: it is neccessary to set proper SectionIndex here.
+  // object::SectionedAddress::UndefSection works for only absolute addresses.
+  if (DILineInfo LineInfo = DICtx.getLineInfoForAddress(
+          {Lookup, object::SectionedAddress::UndefSection}))
     LineInfo.dump(OS);
 
   return true;
@@ -414,7 +423,7 @@ static bool dumpObjectFile(ObjectFile &Obj, DWARFContext &DICtx, Twine Filename,
 
   // Handle the --lookup option.
   if (Lookup)
-    return lookup(DICtx, Lookup, OS);
+    return lookup(Obj, DICtx, Lookup, OS);
 
   // Handle the --name option.
   if (!Name.empty()) {
@@ -566,11 +575,6 @@ int main(int argc, char **argv) {
       "pretty-print DWARF debug information in object files"
       " and debug info archives.\n");
 
-  if (Help) {
-    PrintHelpMessage(/*Hidden =*/false, /*Categorized =*/true);
-    return 0;
-  }
-
   // FIXME: Audit interactions between these two options and make them
   //        compatible.
   if (Diff && Verbose) {
@@ -579,17 +583,12 @@ int main(int argc, char **argv) {
     return 0;
   }
 
-  std::unique_ptr<ToolOutputFile> OutputFile;
-  if (!OutputFilename.empty()) {
-    std::error_code EC;
-    OutputFile = llvm::make_unique<ToolOutputFile>(OutputFilename, EC,
-                                                     sys::fs::F_None);
-    error("Unable to open output file" + OutputFilename, EC);
-    // Don't remove output file if we exit with an error.
-    OutputFile->keep();
-  }
+  std::error_code EC;
+  ToolOutputFile OutputFile(OutputFilename, EC, sys::fs::OF_None);
+  error("Unable to open output file" + OutputFilename, EC);
+  // Don't remove output file if we exit with an error.
+  OutputFile.keep();
 
-  raw_ostream &OS = OutputFile ? OutputFile->os() : outs();
   bool OffsetRequested = false;
 
   // Defaults to dumping all sections, unless brief mode is specified in which
@@ -633,15 +632,15 @@ int main(int argc, char **argv) {
   if (Verify) {
     // If we encountered errors during verify, exit with a non-zero exit status.
     if (!all_of(Objects, [&](std::string Object) {
-          return handleFile(Object, verifyObjectFile, OS);
+          return handleFile(Object, verifyObjectFile, OutputFile.os());
         }))
-      exit(1);
+      return 1;
   } else if (Statistics)
     for (auto Object : Objects)
-      handleFile(Object, collectStatsForObjectFile, OS);
+      handleFile(Object, collectStatsForObjectFile, OutputFile.os());
   else
     for (auto Object : Objects)
-      handleFile(Object, dumpObjectFile, OS);
+      handleFile(Object, dumpObjectFile, OutputFile.os());
 
   return EXIT_SUCCESS;
 }
diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp
index 94aaa2f52eb5..300bc0b4bd52 100644
--- a/tools/llvm-extract/llvm-extract.cpp
+++ b/tools/llvm-extract/llvm-extract.cpp
@@ -1,9 +1,8 @@
 //===- llvm-extract.cpp - LLVM function extraction utility ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -34,86 +33,99 @@
 #include <memory>
 using namespace llvm;
 
+cl::OptionCategory ExtractCat("llvm-extract Options");
+
 // InputFilename - The filename to read from.
-static cl::opt<std::string>
-InputFilename(cl::Positional, cl::desc("<input bitcode file>"),
-              cl::init("-"), cl::value_desc("filename"));
+static cl::opt<std::string> InputFilename(cl::Positional,
+                                          cl::desc("<input bitcode file>"),
+                                          cl::init("-"),
+                                          cl::value_desc("filename"));
 
-static cl::opt<std::string>
-OutputFilename("o", cl::desc("Specify output filename"),
-               cl::value_desc("filename"), cl::init("-"));
+static cl::opt<std::string> OutputFilename("o",
+                                           cl::desc("Specify output filename"),
+                                           cl::value_desc("filename"),
+                                           cl::init("-"), cl::cat(ExtractCat));
 
-static cl::opt<bool>
-Force("f", cl::desc("Enable binary output on terminals"));
+static cl::opt<bool> Force("f", cl::desc("Enable binary output on terminals"),
+                           cl::cat(ExtractCat));
 
-static cl::opt<bool>
-DeleteFn("delete", cl::desc("Delete specified Globals from Module"));
+static cl::opt<bool> DeleteFn("delete",
+                              cl::desc("Delete specified Globals from Module"),
+                              cl::cat(ExtractCat));
 
 static cl::opt<bool>
-    Recursive("recursive",
-              cl::desc("Recursively extract all called functions"));
+    Recursive("recursive", cl::desc("Recursively extract all called functions"),
+              cl::cat(ExtractCat));
 
 // ExtractFuncs - The functions to extract from the module.
 static cl::list<std::string>
-ExtractFuncs("func", cl::desc("Specify function to extract"),
-             cl::ZeroOrMore, cl::value_desc("function"));
+    ExtractFuncs("func", cl::desc("Specify function to extract"),
+                 cl::ZeroOrMore, cl::value_desc("function"),
+                 cl::cat(ExtractCat));
 
 // ExtractRegExpFuncs - The functions, matched via regular expression, to
 // extract from the module.
 static cl::list<std::string>
-ExtractRegExpFuncs("rfunc", cl::desc("Specify function(s) to extract using a "
-                                     "regular expression"),
-                   cl::ZeroOrMore, cl::value_desc("rfunction"));
+    ExtractRegExpFuncs("rfunc",
+                       cl::desc("Specify function(s) to extract using a "
+                                "regular expression"),
+                       cl::ZeroOrMore, cl::value_desc("rfunction"),
+                       cl::cat(ExtractCat));
 
 // ExtractBlocks - The blocks to extract from the module.
-static cl::list<std::string>
-    ExtractBlocks("bb",
-                  cl::desc("Specify <function, basic block> pairs to extract"),
-                  cl::ZeroOrMore, cl::value_desc("function:bb"));
+static cl::list<std::string> ExtractBlocks(
+    "bb", cl::desc("Specify <function, basic block> pairs to extract"),
+    cl::ZeroOrMore, cl::value_desc("function:bb"), cl::cat(ExtractCat));
 
 // ExtractAlias - The alias to extract from the module.
 static cl::list<std::string>
-ExtractAliases("alias", cl::desc("Specify alias to extract"),
-               cl::ZeroOrMore, cl::value_desc("alias"));
-
+    ExtractAliases("alias", cl::desc("Specify alias to extract"),
+                   cl::ZeroOrMore, cl::value_desc("alias"),
+                   cl::cat(ExtractCat));
 
 // ExtractRegExpAliases - The aliases, matched via regular expression, to
 // extract from the module.
 static cl::list<std::string>
-ExtractRegExpAliases("ralias", cl::desc("Specify alias(es) to extract using a "
-                                        "regular expression"),
-                     cl::ZeroOrMore, cl::value_desc("ralias"));
+    ExtractRegExpAliases("ralias",
+                         cl::desc("Specify alias(es) to extract using a "
+                                  "regular expression"),
+                         cl::ZeroOrMore, cl::value_desc("ralias"),
+                         cl::cat(ExtractCat));
 
 // ExtractGlobals - The globals to extract from the module.
 static cl::list<std::string>
-ExtractGlobals("glob", cl::desc("Specify global to extract"),
-               cl::ZeroOrMore, cl::value_desc("global"));
+    ExtractGlobals("glob", cl::desc("Specify global to extract"),
+                   cl::ZeroOrMore, cl::value_desc("global"),
+                   cl::cat(ExtractCat));
 
 // ExtractRegExpGlobals - The globals, matched via regular expression, to
 // extract from the module...
 static cl::list<std::string>
-ExtractRegExpGlobals("rglob", cl::desc("Specify global(s) to extract using a "
-                                       "regular expression"),
-                     cl::ZeroOrMore, cl::value_desc("rglobal"));
+    ExtractRegExpGlobals("rglob",
+                         cl::desc("Specify global(s) to extract using a "
+                                  "regular expression"),
+                         cl::ZeroOrMore, cl::value_desc("rglobal"),
+                         cl::cat(ExtractCat));
 
-static cl::opt<bool>
-OutputAssembly("S",
-               cl::desc("Write output as LLVM assembly"), cl::Hidden);
+static cl::opt<bool> OutputAssembly("S",
+                                    cl::desc("Write output as LLVM assembly"),
+                                    cl::Hidden, cl::cat(ExtractCat));
 
 static cl::opt<bool> PreserveBitcodeUseListOrder(
     "preserve-bc-uselistorder",
     cl::desc("Preserve use-list order when writing LLVM bitcode."),
-    cl::init(true), cl::Hidden);
+    cl::init(true), cl::Hidden, cl::cat(ExtractCat));
 
 static cl::opt<bool> PreserveAssemblyUseListOrder(
     "preserve-ll-uselistorder",
     cl::desc("Preserve use-list order when writing LLVM assembly."),
-    cl::init(false), cl::Hidden);
+    cl::init(false), cl::Hidden, cl::cat(ExtractCat));
 
 int main(int argc, char **argv) {
   InitLLVM X(argc, argv);
 
   LLVMContext Context;
+  cl::HideUnrelatedOptions(ExtractCat);
   cl::ParseCommandLineOptions(argc, argv, "llvm extractor\n");
 
   // Use lazy loading, since we only care about selected global values.
@@ -230,7 +242,7 @@ int main(int argc, char **argv) {
   }
 
   // Figure out which BasicBlocks we should extract.
-  SmallVector<BasicBlock *, 4> BBs;
+  SmallVector<SmallVector<BasicBlock *, 16>, 4> GroupOfBBs;
   for (StringRef StrPair : ExtractBlocks) {
     auto BBInfo = StrPair.split(':');
     // Get the function.
@@ -242,17 +254,24 @@ int main(int argc, char **argv) {
     }
     // Do not materialize this function.
     GVs.insert(F);
-    // Get the basic block.
-    auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) {
-      return BB.getName().equals(BBInfo.second);
-    });
-    if (Res == F->end()) {
-      errs() << argv[0] << ": function " << F->getName()
-             << " doesn't contain a basic block named '" << BBInfo.second
-             << "'!\n";
-      return 1;
+    // Get the basic blocks.
+    SmallVector<BasicBlock *, 16> BBs;
+    SmallVector<StringRef, 16> BBNames;
+    BBInfo.second.split(BBNames, ';', /*MaxSplit=*/-1,
+                        /*KeepEmpty=*/false);
+    for (StringRef BBName : BBNames) {
+      auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) {
+        return BB.getName().equals(BBName);
+      });
+      if (Res == F->end()) {
+        errs() << argv[0] << ": function " << F->getName()
+               << " doesn't contain a basic block named '" << BBInfo.second
+               << "'!\n";
+        return 1;
+      }
+      BBs.push_back(&*Res);
     }
-    BBs.push_back(&*Res);
+    GroupOfBBs.push_back(BBs);
   }
 
   // Use *argv instead of argv[0] to work around a wrong GCC warning.
@@ -271,10 +290,10 @@ int main(int argc, char **argv) {
       ExitOnErr(F->materialize());
       for (auto &BB : *F) {
         for (auto &I : BB) {
-          auto *CI = dyn_cast<CallInst>(&I);
-          if (!CI)
+          CallBase *CB = dyn_cast<CallBase>(&I);
+          if (!CB)
             continue;
-          Function *CF = CI->getCalledFunction();
+          Function *CF = CB->getCalledFunction();
           if (!CF)
             continue;
           if (CF->isDeclaration() || GVs.count(CF))
@@ -317,7 +336,7 @@ int main(int argc, char **argv) {
   // functions.
   if (!ExtractBlocks.empty()) {
     legacy::PassManager PM;
-    PM.add(createBlockExtractorPass(BBs, true));
+    PM.add(createBlockExtractorPass(GroupOfBBs, true));
     PM.run(*M);
   }
 
diff --git a/tools/llvm-link/llvm-link.cpp b/tools/llvm-link/llvm-link.cpp
index b7a888375b3d..50ba57178d02 100644
--- a/tools/llvm-link/llvm-link.cpp
+++ b/tools/llvm-link/llvm-link.cpp
@@ -1,9 +1,8 @@
 //===- llvm-link.cpp - Low-level LLVM linker ------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-lto/llvm-lto.cpp b/tools/llvm-lto/llvm-lto.cpp
index b6facc919b51..585207b25185 100644
--- a/tools/llvm-lto/llvm-lto.cpp
+++ b/tools/llvm-lto/llvm-lto.cpp
@@ -1,9 +1,8 @@
 //===- llvm-lto: a simple command-line program to link modules with LTO ---===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -158,8 +157,8 @@ static cl::opt<int>
     ThinLTOCachePruningInterval("thinlto-cache-pruning-interval",
     cl::init(1200), cl::desc("Set ThinLTO cache pruning interval."));
 
-static cl::opt<unsigned long long>
-    ThinLTOCacheMaxSizeBytes("thinlto-cache-max-size-bytes",
+static cl::opt<uint64_t> ThinLTOCacheMaxSizeBytes(
+    "thinlto-cache-max-size-bytes",
     cl::desc("Set ThinLTO cache pruning directory maximum size in bytes."));
 
 static cl::opt<int>
@@ -205,6 +204,10 @@ static cl::opt<bool> ListSymbolsOnly(
     "list-symbols-only", cl::init(false),
     cl::desc("Instead of running LTO, list the symbols in each IR file"));
 
+static cl::opt<bool> ListDependentLibrariesOnly(
+    "list-dependent-libraries-only", cl::init(false),
+    cl::desc("Instead of running LTO, list the dependent libraries in each IR file"));
+
 static cl::opt<bool> SetMergedModule(
     "set-merged-module", cl::init(false),
     cl::desc("Use the first input module as the merged module"));
@@ -373,6 +376,34 @@ static void listSymbols(const TargetOptions &Options) {
   }
 }
 
+static std::unique_ptr<MemoryBuffer> loadFile(StringRef Filename) {
+    ExitOnError ExitOnErr("llvm-lto: error loading file '" + Filename.str() +
+        "': ");
+    return ExitOnErr(errorOrToExpected(MemoryBuffer::getFileOrSTDIN(Filename)));
+}
+
+static void listDependentLibraries() {
+  for (auto &Filename : InputFilenames) {
+    auto Buffer = loadFile(Filename);
+    std::string E;
+    std::unique_ptr<lto::InputFile> Input(LTOModule::createInputFile(
+        Buffer->getBufferStart(), Buffer->getBufferSize(), Filename.c_str(),
+        E));
+    if (!Input)
+      error(E);
+
+    // List the dependent libraries.
+    outs() << Filename << ":\n";
+    for (size_t I = 0, C = LTOModule::getDependentLibraryCount(Input.get());
+         I != C; ++I) {
+      size_t L = 0;
+      const char *S = LTOModule::getDependentLibrary(Input.get(), I, &L);
+      assert(S);
+      outs() << StringRef(S, L) << "\n";
+    }
+  }
+}
+
 /// Create a combined index file from the input IR files and write it.
 ///
 /// This is meant to enable testing of ThinLTO combined index generation,
@@ -450,22 +481,31 @@ std::unique_ptr<ModuleSummaryIndex> loadCombinedIndex() {
   return ExitOnErr(getModuleSummaryIndexForFile(ThinLTOIndex));
 }
 
-static std::unique_ptr<Module> loadModule(StringRef Filename,
-                                          LLVMContext &Ctx) {
-  SMDiagnostic Err;
-  std::unique_ptr<Module> M(parseIRFile(Filename, Err, Ctx));
-  if (!M) {
-    Err.print("llvm-lto", errs());
-    report_fatal_error("Can't load module for file " + Filename);
-  }
-  maybeVerifyModule(*M);
+static std::unique_ptr<lto::InputFile> loadInputFile(MemoryBufferRef Buffer) {
+  ExitOnError ExitOnErr("llvm-lto: error loading input '" +
+                        Buffer.getBufferIdentifier().str() + "': ");
+  return ExitOnErr(lto::InputFile::create(Buffer));
+}
 
+static std::unique_ptr<Module> loadModuleFromInput(lto::InputFile &File,
+                                                   LLVMContext &CTX) {
+  auto &Mod = File.getSingleBitcodeModule();
+  auto ModuleOrErr = Mod.parseModule(CTX);
+  if (!ModuleOrErr) {
+    handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
+      SMDiagnostic Err = SMDiagnostic(Mod.getModuleIdentifier(),
+                                      SourceMgr::DK_Error, EIB.message());
+      Err.print("llvm-lto", errs());
+    });
+    report_fatal_error("Can't load module, abort.");
+  }
+  maybeVerifyModule(**ModuleOrErr);
   if (ThinLTOModuleId.getNumOccurrences()) {
     if (InputFilenames.size() != 1)
       report_fatal_error("Can't override the module id for multiple files");
-    M->setModuleIdentifier(ThinLTOModuleId);
+    (*ModuleOrErr)->setModuleIdentifier(ThinLTOModuleId);
   }
-  return M;
+  return std::move(*ModuleOrErr);
 }
 
 static void writeModuleToFile(Module &TheModule, StringRef Filename) {
@@ -563,13 +603,15 @@ private:
     auto Index = loadCombinedIndex();
     for (auto &Filename : InputFilenames) {
       LLVMContext Ctx;
-      auto TheModule = loadModule(Filename, Ctx);
+      auto Buffer = loadFile(Filename);
+      auto Input = loadInputFile(Buffer->getMemBufferRef());
+      auto TheModule = loadModuleFromInput(*Input, Ctx);
 
       // Build a map of module to the GUIDs and summary objects that should
       // be written to its index.
       std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
-      ThinGenerator.gatherImportedSummariesForModule(*TheModule, *Index,
-                                                     ModuleToSummariesForIndex);
+      ThinGenerator.gatherImportedSummariesForModule(
+          *TheModule, *Index, ModuleToSummariesForIndex, *Input);
 
       std::string OutputName = OutputFilename;
       if (OutputName.empty()) {
@@ -598,13 +640,16 @@ private:
     auto Index = loadCombinedIndex();
     for (auto &Filename : InputFilenames) {
       LLVMContext Ctx;
-      auto TheModule = loadModule(Filename, Ctx);
+      auto Buffer = loadFile(Filename);
+      auto Input = loadInputFile(Buffer->getMemBufferRef());
+      auto TheModule = loadModuleFromInput(*Input, Ctx);
       std::string OutputName = OutputFilename;
       if (OutputName.empty()) {
         OutputName = Filename + ".imports";
       }
-      OutputName = getThinLTOOutputFile(OutputName, OldPrefix, NewPrefix);
-      ThinGenerator.emitImports(*TheModule, OutputName, *Index);
+      OutputName =
+          getThinLTOOutputFile(OutputName, OldPrefix, NewPrefix);
+      ThinGenerator.emitImports(*TheModule, OutputName, *Index, *Input);
     }
   }
 
@@ -622,9 +667,11 @@ private:
     auto Index = loadCombinedIndex();
     for (auto &Filename : InputFilenames) {
       LLVMContext Ctx;
-      auto TheModule = loadModule(Filename, Ctx);
+      auto Buffer = loadFile(Filename);
+      auto Input = loadInputFile(Buffer->getMemBufferRef());
+      auto TheModule = loadModuleFromInput(*Input, Ctx);
 
-      ThinGenerator.promote(*TheModule, *Index);
+      ThinGenerator.promote(*TheModule, *Index, *Input);
 
       std::string OutputName = OutputFilename;
       if (OutputName.empty()) {
@@ -653,9 +700,11 @@ private:
 
     for (auto &Filename : InputFilenames) {
       LLVMContext Ctx;
-      auto TheModule = loadModule(Filename, Ctx);
+      auto Buffer = loadFile(Filename);
+      auto Input = loadInputFile(Buffer->getMemBufferRef());
+      auto TheModule = loadModuleFromInput(*Input, Ctx);
 
-      ThinGenerator.crossModuleImport(*TheModule, *Index);
+      ThinGenerator.crossModuleImport(*TheModule, *Index, *Input);
 
       std::string OutputName = OutputFilename;
       if (OutputName.empty()) {
@@ -684,9 +733,11 @@ private:
 
     for (auto &Filename : InputFilenames) {
       LLVMContext Ctx;
-      auto TheModule = loadModule(Filename, Ctx);
+      auto Buffer = loadFile(Filename);
+      auto Input = loadInputFile(Buffer->getMemBufferRef());
+      auto TheModule = loadModuleFromInput(*Input, Ctx);
 
-      ThinGenerator.internalize(*TheModule, *Index);
+      ThinGenerator.internalize(*TheModule, *Index, *Input);
 
       std::string OutputName = OutputFilename;
       if (OutputName.empty()) {
@@ -707,7 +758,9 @@ private:
 
     for (auto &Filename : InputFilenames) {
       LLVMContext Ctx;
-      auto TheModule = loadModule(Filename, Ctx);
+      auto Buffer = loadFile(Filename);
+      auto Input = loadInputFile(Buffer->getMemBufferRef());
+      auto TheModule = loadModuleFromInput(*Input, Ctx);
 
       ThinGenerator.optimize(*TheModule);
 
@@ -827,6 +880,11 @@ int main(int argc, char **argv) {
     return 0;
   }
 
+  if (ListDependentLibrariesOnly) {
+    listDependentLibraries();
+    return 0;
+  }
+
   if (IndexStats) {
     printIndexStats();
     return 0;
diff --git a/tools/llvm-lto2/llvm-lto2.cpp b/tools/llvm-lto2/llvm-lto2.cpp
index 26426367e252..0bd9289dc938 100644
--- a/tools/llvm-lto2/llvm-lto2.cpp
+++ b/tools/llvm-lto2/llvm-lto2.cpp
@@ -1,9 +1,8 @@
 //===-- llvm-lto2: test harness for the resolution-based LTO interface ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -92,19 +91,40 @@ static cl::opt<std::string> DefaultTriple(
     cl::desc(
         "Replace unspecified target triples in input files with this triple"));
 
+static cl::opt<bool> RemarksWithHotness(
+    "pass-remarks-with-hotness",
+    cl::desc("With PGO, include profile count in optimization remarks"),
+    cl::Hidden);
+
 static cl::opt<std::string>
-    OptRemarksOutput("pass-remarks-output",
-                     cl::desc("YAML output file for optimization remarks"));
+    RemarksFilename("pass-remarks-output",
+                    cl::desc("Output filename for pass remarks"),
+                    cl::value_desc("filename"));
 
-static cl::opt<bool> OptRemarksWithHotness(
-    "pass-remarks-with-hotness",
-    cl::desc("Whether to include hotness informations in the remarks.\n"
-             "Has effect only if -pass-remarks-output is specified."));
+static cl::opt<std::string>
+    RemarksPasses("pass-remarks-filter",
+                  cl::desc("Only record optimization remarks from passes whose "
+                           "names match the given regular expression"),
+                  cl::value_desc("regex"));
+
+static cl::opt<std::string> RemarksFormat(
+    "pass-remarks-format",
+    cl::desc("The format used for serializing remarks (default: YAML)"),
+    cl::value_desc("format"), cl::init("yaml"));
 
 static cl::opt<std::string>
     SamplePGOFile("lto-sample-profile-file",
                   cl::desc("Specify a SamplePGO profile file"));
 
+static cl::opt<std::string>
+    CSPGOFile("lto-cspgo-profile-file",
+              cl::desc("Specify a context sensitive PGO profile file"));
+
+static cl::opt<bool>
+    RunCSIRInstr("lto-cspgo-gen",
+                 cl::desc("Run PGO context sensitive IR instrumentation"),
+                 cl::init(false), cl::Hidden);
+
 static cl::opt<bool>
     UseNewPM("use-new-pm",
              cl::desc("Run LTO passes using the new pass manager"),
@@ -211,10 +231,14 @@ static int run(int argc, char **argv) {
           "Config::addSaveTemps failed");
 
   // Optimization remarks.
-  Conf.RemarksFilename = OptRemarksOutput;
-  Conf.RemarksWithHotness = OptRemarksWithHotness;
+  Conf.RemarksFilename = RemarksFilename;
+  Conf.RemarksPasses = RemarksPasses;
+  Conf.RemarksWithHotness = RemarksWithHotness;
+  Conf.RemarksFormat = RemarksFormat;
 
   Conf.SampleProfile = SamplePGOFile;
+  Conf.CSIRProfile = CSPGOFile;
+  Conf.RunCSIRInstr = RunCSIRInstr;
 
   // Run a custom pipeline, if asked for.
   Conf.OptPipeline = OptPipeline;
@@ -343,6 +367,13 @@ static int dumpSymtab(int argc, char **argv) {
     if (TT.isOSBinFormatCOFF())
       outs() << "linker opts: " << Input->getCOFFLinkerOpts() << '\n';
 
+    if (TT.isOSBinFormatELF()) {
+      outs() << "dependent libraries:";
+      for (auto L : Input->getDependentLibraries())
+        outs() << " \"" << L << "\"";
+      outs() << '\n';
+    }
+
     std::vector<StringRef> ComdatTable = Input->getComdatTable();
     for (const InputFile::Symbol &Sym : Input->symbols()) {
       switch (Sym.getVisibility()) {
diff --git a/tools/llvm-mc/Disassembler.cpp b/tools/llvm-mc/Disassembler.cpp
index acc5a5f4cab2..e2af2e7f2e32 100644
--- a/tools/llvm-mc/Disassembler.cpp
+++ b/tools/llvm-mc/Disassembler.cpp
@@ -1,9 +1,8 @@
 //===- Disassembler.cpp - Disassembler for hex strings --------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-mc/Disassembler.h b/tools/llvm-mc/Disassembler.h
index 1f18ac075f85..11b685233abc 100644
--- a/tools/llvm-mc/Disassembler.h
+++ b/tools/llvm-mc/Disassembler.h
@@ -1,9 +1,8 @@
 //===- Disassembler.h - Text File Disassembler ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index c0976502f545..ec189c297860 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -1,9 +1,8 @@
 //===-- llvm-mc.cpp - Machine Code Hacking Driver ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -402,18 +401,8 @@ int main(int argc, char **argv) {
   }
   if (!MainFileName.empty())
     Ctx.setMainFileName(MainFileName);
-  if (GenDwarfForAssembly && DwarfVersion >= 5) {
-    // DWARF v5 needs the root file as well as the compilation directory.
-    // If we find a '.file 0' directive that will supersede these values.
-    MD5 Hash;
-    MD5::MD5Result *Cksum =
-        (MD5::MD5Result *)Ctx.allocate(sizeof(MD5::MD5Result), 1);
-    Hash.update(Buffer->getBuffer());
-    Hash.final(*Cksum);
-    Ctx.setMCLineTableRootFile(
-        /*CUID=*/0, Ctx.getCompilationDir(),
-        !MainFileName.empty() ? MainFileName : InputFilename, Cksum, None);
-  }
+  if (GenDwarfForAssembly)
+    Ctx.setGenDwarfRootFile(InputFilename, Buffer->getBuffer());
 
   // Package up features to be passed to target/subtarget
   std::string FeaturesStr;
diff --git a/tools/llvm-mca/CodeRegion.cpp b/tools/llvm-mca/CodeRegion.cpp
index 29a27c50c171..bf592f67245e 100644
--- a/tools/llvm-mca/CodeRegion.cpp
+++ b/tools/llvm-mca/CodeRegion.cpp
@@ -1,9 +1,8 @@
 //===-------------------------- CodeRegion.cpp -----------------*- C++ -* -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -17,7 +16,12 @@
 namespace llvm {
 namespace mca {
 
-bool CodeRegion::isLocInRange(llvm::SMLoc Loc) const {
+CodeRegions::CodeRegions(llvm::SourceMgr &S) : SM(S), FoundErrors(false) {
+  // Create a default region for the input code sequence.
+  Regions.emplace_back(make_unique<CodeRegion>("", SMLoc()));
+}
+
+bool CodeRegion::isLocInRange(SMLoc Loc) const {
   if (RangeEnd.isValid() && Loc.getPointer() > RangeEnd.getPointer())
     return false;
   if (RangeStart.isValid() && Loc.getPointer() < RangeStart.getPointer())
@@ -25,42 +29,88 @@ bool CodeRegion::isLocInRange(llvm::SMLoc Loc) const {
   return true;
 }
 
-void CodeRegions::beginRegion(llvm::StringRef Description, llvm::SMLoc Loc) {
-  assert(!Regions.empty() && "Missing Default region");
-  const CodeRegion &CurrentRegion = *Regions.back();
-  if (CurrentRegion.startLoc().isValid() && !CurrentRegion.endLoc().isValid()) {
-    SM.PrintMessage(Loc, llvm::SourceMgr::DK_Warning,
-                    "Ignoring invalid region start");
-    return;
+void CodeRegions::beginRegion(StringRef Description, SMLoc Loc) {
+  if (ActiveRegions.empty()) {
+    // Remove the default region if there is at least one user defined region.
+    // By construction, only the default region has an invalid start location.
+    if (Regions.size() == 1 && !Regions[0]->startLoc().isValid() &&
+        !Regions[0]->endLoc().isValid()) {
+      ActiveRegions[Description] = 0;
+      Regions[0] = make_unique<CodeRegion>(Description, Loc);
+      return;
+    }
+  } else {
+    auto It = ActiveRegions.find(Description);
+    if (It != ActiveRegions.end()) {
+      const CodeRegion &R = *Regions[It->second];
+      if (Description.empty()) {
+        SM.PrintMessage(Loc, SourceMgr::DK_Error,
+                        "found multiple overlapping anonymous regions");
+        SM.PrintMessage(R.startLoc(), SourceMgr::DK_Note,
+                        "Previous anonymous region was defined here");
+        FoundErrors = true;
+        return;
+      }
+
+      SM.PrintMessage(Loc, SourceMgr::DK_Error,
+                      "overlapping regions cannot have the same name");
+      SM.PrintMessage(R.startLoc(), SourceMgr::DK_Note,
+                      "region " + Description + " was previously defined here");
+      FoundErrors = true;
+      return;
+    }
   }
 
-  // Remove the default region if there are user defined regions.
-  if (!CurrentRegion.startLoc().isValid())
-    Regions.erase(Regions.begin());
-  addRegion(Description, Loc);
+  ActiveRegions[Description] = Regions.size();
+  Regions.emplace_back(make_unique<CodeRegion>(Description, Loc));
+  return;
 }
 
-void CodeRegions::endRegion(llvm::SMLoc Loc) {
-  assert(!Regions.empty() && "Missing Default region");
-  CodeRegion &CurrentRegion = *Regions.back();
-  if (CurrentRegion.endLoc().isValid()) {
-    SM.PrintMessage(Loc, llvm::SourceMgr::DK_Warning,
-                    "Ignoring invalid region end");
+void CodeRegions::endRegion(StringRef Description, SMLoc Loc) {
+  if (Description.empty()) {
+    // Special case where there is only one user defined region,
+    // and this LLVM-MCA-END directive doesn't provide a region name.
+    // In this case, we assume that the user simply wanted to just terminate
+    // the only active region.
+    if (ActiveRegions.size() == 1) {
+      auto It = ActiveRegions.begin();
+      Regions[It->second]->setEndLocation(Loc);
+      ActiveRegions.erase(It);
+      return;
+    }
+
+    // Special case where the region end marker applies to the default region.
+    if (ActiveRegions.empty() && Regions.size() == 1 &&
+        !Regions[0]->startLoc().isValid() && !Regions[0]->endLoc().isValid()) {
+      Regions[0]->setEndLocation(Loc);
+      return;
+    }
+  }
+
+  auto It = ActiveRegions.find(Description);
+  if (It != ActiveRegions.end()) {
+    Regions[It->second]->setEndLocation(Loc);
+    ActiveRegions.erase(It);
     return;
   }
 
-  CurrentRegion.setEndLocation(Loc);
+  FoundErrors = true;
+  SM.PrintMessage(Loc, SourceMgr::DK_Error,
+                  "found an invalid region end directive");
+  if (!Description.empty()) {
+    SM.PrintMessage(Loc, SourceMgr::DK_Note,
+                    "unable to find an active region named " + Description);
+  } else {
+    SM.PrintMessage(Loc, SourceMgr::DK_Note,
+                    "unable to find an active anonymous region");
+  }
 }
 
-void CodeRegions::addInstruction(const llvm::MCInst &Instruction) {
-  const llvm::SMLoc &Loc = Instruction.getLoc();
-  const auto It =
-      std::find_if(Regions.rbegin(), Regions.rend(),
-                   [Loc](const std::unique_ptr<CodeRegion> &Region) {
-                     return Region->isLocInRange(Loc);
-                   });
-  if (It != Regions.rend())
-    (*It)->addInstruction(Instruction);
+void CodeRegions::addInstruction(const MCInst &Instruction) {
+  SMLoc Loc = Instruction.getLoc();
+  for (UniqueCodeRegion &Region : Regions)
+    if (Region->isLocInRange(Loc))
+      Region->addInstruction(Instruction);
 }
 
 } // namespace mca
diff --git a/tools/llvm-mca/CodeRegion.h b/tools/llvm-mca/CodeRegion.h
index 867aa18bb4fe..cabb4a5d4484 100644
--- a/tools/llvm-mca/CodeRegion.h
+++ b/tools/llvm-mca/CodeRegion.h
@@ -1,9 +1,8 @@
 //===-------------------------- CodeRegion.h -------------------*- C++ -* -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -35,6 +34,7 @@
 #define LLVM_TOOLS_LLVM_MCA_CODEREGION_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Support/SMLoc.h"
@@ -51,7 +51,7 @@ class CodeRegion {
   // An optional descriptor for this region.
   llvm::StringRef Description;
   // Instructions that form this region.
-  std::vector<llvm::MCInst> Instructions;
+  llvm::SmallVector<llvm::MCInst, 8> Instructions;
   // Source location range.
   llvm::SMLoc RangeStart;
   llvm::SMLoc RangeEnd;
@@ -79,24 +79,25 @@ public:
   llvm::StringRef getDescription() const { return Description; }
 };
 
+class CodeRegionParseError final : public Error {};
+
 class CodeRegions {
   // A source manager. Used by the tool to generate meaningful warnings.
   llvm::SourceMgr &SM;
 
-  std::vector<std::unique_ptr<CodeRegion>> Regions;
-
-  // Construct a new region of code guarded by LLVM-MCA comments.
-  void addRegion(llvm::StringRef Description, llvm::SMLoc Loc) {
-    Regions.emplace_back(llvm::make_unique<CodeRegion>(Description, Loc));
-  }
+  using UniqueCodeRegion = std::unique_ptr<CodeRegion>;
+  std::vector<UniqueCodeRegion> Regions;
+  llvm::StringMap<unsigned> ActiveRegions;
+  bool FoundErrors;
 
   CodeRegions(const CodeRegions &) = delete;
   CodeRegions &operator=(const CodeRegions &) = delete;
 
 public:
-  typedef std::vector<std::unique_ptr<CodeRegion>>::iterator iterator;
-  typedef std::vector<std::unique_ptr<CodeRegion>>::const_iterator
-      const_iterator;
+  CodeRegions(llvm::SourceMgr &S);
+
+  typedef std::vector<UniqueCodeRegion>::iterator iterator;
+  typedef std::vector<UniqueCodeRegion>::const_iterator const_iterator;
 
   iterator begin() { return Regions.begin(); }
   iterator end() { return Regions.end(); }
@@ -104,24 +105,21 @@ public:
   const_iterator end() const { return Regions.cend(); }
 
   void beginRegion(llvm::StringRef Description, llvm::SMLoc Loc);
-  void endRegion(llvm::SMLoc Loc);
+  void endRegion(llvm::StringRef Description, llvm::SMLoc Loc);
   void addInstruction(const llvm::MCInst &Instruction);
   llvm::SourceMgr &getSourceMgr() const { return SM; }
 
-  CodeRegions(llvm::SourceMgr &S) : SM(S) {
-    // Create a default region for the input code sequence.
-    addRegion("Default", llvm::SMLoc());
-  }
-
   llvm::ArrayRef<llvm::MCInst> getInstructionSequence(unsigned Idx) const {
     return Regions[Idx]->getInstructions();
   }
 
   bool empty() const {
-    return llvm::all_of(Regions, [](const std::unique_ptr<CodeRegion> &Region) {
+    return llvm::all_of(Regions, [](const UniqueCodeRegion &Region) {
       return Region->empty();
     });
   }
+
+  bool isValid() const { return !FoundErrors; }
 };
 
 } // namespace mca
diff --git a/tools/llvm-mca/CodeRegionGenerator.cpp b/tools/llvm-mca/CodeRegionGenerator.cpp
index 5bd37adeeae9..c793169e64e0 100644
--- a/tools/llvm-mca/CodeRegionGenerator.cpp
+++ b/tools/llvm-mca/CodeRegionGenerator.cpp
@@ -1,9 +1,8 @@
 //===----------------------- CodeRegionGenerator.cpp ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -49,8 +48,7 @@ public:
 
   // We only want to intercept the emission of new instructions.
   virtual void EmitInstruction(const MCInst &Inst,
-                               const MCSubtargetInfo & /* unused */,
-                               bool /* unused */) override {
+                               const MCSubtargetInfo &/* unused */) override {
     Regions.addInstruction(Inst);
   }
 
@@ -88,7 +86,11 @@ void MCACommentConsumer::HandleComment(SMLoc Loc, StringRef CommentText) {
 
   Comment = Comment.drop_front(Position);
   if (Comment.consume_front("LLVM-MCA-END")) {
-    Regions.endRegion(Loc);
+    // Skip spaces and tabs.
+    Position = Comment.find_first_not_of(" \t");
+    if (Position < Comment.size())
+      Comment = Comment.drop_front(Position);
+    Regions.endRegion(Comment, Loc);
     return;
   }
 
@@ -117,7 +119,6 @@ Expected<const CodeRegions &> AsmCodeRegionGenerator::parseCodeRegions() {
   MCACommentConsumer CC(Regions);
   Lexer.setCommentConsumer(&CC);
 
-  // Create a target-specific parser and perform the parse.
   std::unique_ptr<MCTargetAsmParser> TAP(
       TheTarget.createMCAsmParser(STI, *Parser, MCII, Opts));
   if (!TAP)
@@ -127,7 +128,7 @@ Expected<const CodeRegions &> AsmCodeRegionGenerator::parseCodeRegions() {
   Parser->setTargetParser(*TAP);
   Parser->Run(false);
 
-  // Get the assembler dialect from the input.  llvm-mca will use this as the
+  // Set the assembler dialect from the input. llvm-mca will use this as the
   // default dialect when printing reports.
   AssemblerDialect = Parser->getAssemblerDialect();
   return Regions;
diff --git a/tools/llvm-mca/CodeRegionGenerator.h b/tools/llvm-mca/CodeRegionGenerator.h
index 892cafb92686..9a10aa2c148b 100644
--- a/tools/llvm-mca/CodeRegionGenerator.h
+++ b/tools/llvm-mca/CodeRegionGenerator.h
@@ -1,9 +1,8 @@
 //===----------------------- CodeRegionGenerator.h --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/tools/llvm-mca/PipelinePrinter.cpp b/tools/llvm-mca/PipelinePrinter.cpp
index 18ef45fc2a65..90d468075996 100644
--- a/tools/llvm-mca/PipelinePrinter.cpp
+++ b/tools/llvm-mca/PipelinePrinter.cpp
@@ -1,9 +1,8 @@
 //===--------------------- PipelinePrinter.cpp ------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/tools/llvm-mca/PipelinePrinter.h b/tools/llvm-mca/PipelinePrinter.h
index 456026e12df3..004309cd7b8e 100644
--- a/tools/llvm-mca/PipelinePrinter.h
+++ b/tools/llvm-mca/PipelinePrinter.h
@@ -1,9 +1,8 @@
 //===--------------------- PipelinePrinter.h --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/tools/llvm-mca/Views/BottleneckAnalysis.cpp
new file mode 100644
index 000000000000..560c6c6e8a33
--- /dev/null
+++ b/tools/llvm-mca/Views/BottleneckAnalysis.cpp
@@ -0,0 +1,624 @@
+//===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the functionalities used by the BottleneckAnalysis
+/// to report bottleneck info.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/BottleneckAnalysis.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MCA/Support.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+PressureTracker::PressureTracker(const MCSchedModel &Model)
+    : SM(Model),
+      ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0),
+      ProcResID2Mask(Model.getNumProcResourceKinds(), 0),
+      ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0),
+      ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) {
+  computeProcResourceMasks(SM, ProcResID2Mask);
+
+  // Ignore the invalid resource at index zero.
+  unsigned NextResourceUsersIdx = 0;
+  for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) {
+    const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+    ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx;
+    NextResourceUsersIdx += ProcResource.NumUnits;
+    uint64_t ResourceMask = ProcResID2Mask[I];
+    ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I;
+  }
+
+  ResourceUsers.resize(NextResourceUsersIdx);
+  std::fill(ResourceUsers.begin(), ResourceUsers.end(),
+            std::make_pair<unsigned, unsigned>(~0U, 0U));
+}
+
+void PressureTracker::getResourceUsers(uint64_t ResourceMask,
+                                       SmallVectorImpl<User> &Users) const {
+  unsigned Index = getResourceStateIndex(ResourceMask);
+  unsigned ProcResID = ResIdx2ProcResID[Index];
+  const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
+  for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
+    const User U = getResourceUser(ProcResID, I);
+    if (U.second && IPI.find(U.first) != IPI.end())
+      Users.emplace_back(U);
+  }
+}
+
+void PressureTracker::onInstructionDispatched(unsigned IID) {
+  IPI.insert(std::make_pair(IID, InstructionPressureInfo()));
+}
+
+void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); }
+
+void PressureTracker::handleInstructionIssuedEvent(
+    const HWInstructionIssuedEvent &Event) {
+  unsigned IID = Event.IR.getSourceIndex();
+  using ResourceRef = HWInstructionIssuedEvent::ResourceRef;
+  using ResourceUse = std::pair<ResourceRef, ResourceCycles>;
+  for (const ResourceUse &Use : Event.UsedResources) {
+    const ResourceRef &RR = Use.first;
+    unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
+    Index += countTrailingZeros(RR.second);
+    ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator());
+  }
+}
+
+void PressureTracker::updateResourcePressureDistribution(
+    uint64_t CumulativeMask) {
+  while (CumulativeMask) {
+    uint64_t Current = CumulativeMask & (-CumulativeMask);
+    unsigned ResIdx = getResourceStateIndex(Current);
+    unsigned ProcResID = ResIdx2ProcResID[ResIdx];
+    uint64_t Mask = ProcResID2Mask[ProcResID];
+
+    if (Mask == Current) {
+      ResourcePressureDistribution[ProcResID]++;
+      CumulativeMask ^= Current;
+      continue;
+    }
+
+    Mask ^= Current;
+    while (Mask) {
+      uint64_t SubUnit = Mask & (-Mask);
+      ResIdx = getResourceStateIndex(SubUnit);
+      ProcResID = ResIdx2ProcResID[ResIdx];
+      ResourcePressureDistribution[ProcResID]++;
+      Mask ^= SubUnit;
+    }
+
+    CumulativeMask ^= Current;
+  }
+}
+
+void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
+  assert(Event.Reason != HWPressureEvent::INVALID &&
+         "Unexpected invalid event!");
+
+  switch (Event.Reason) {
+  default:
+    break;
+
+  case HWPressureEvent::RESOURCES: {
+    const uint64_t ResourceMask = Event.ResourceMask;
+    updateResourcePressureDistribution(Event.ResourceMask);
+
+    for (const InstRef &IR : Event.AffectedInstructions) {
+      const Instruction &IS = *IR.getInstruction();
+      unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask;
+      if (!BusyResources)
+        continue;
+
+      unsigned IID = IR.getSourceIndex();
+      IPI[IID].ResourcePressureCycles++;
+    }
+    break;
+  }
+
+  case HWPressureEvent::REGISTER_DEPS:
+    for (const InstRef &IR : Event.AffectedInstructions) {
+      unsigned IID = IR.getSourceIndex();
+      IPI[IID].RegisterPressureCycles++;
+    }
+    break;
+
+  case HWPressureEvent::MEMORY_DEPS:
+    for (const InstRef &IR : Event.AffectedInstructions) {
+      unsigned IID = IR.getSourceIndex();
+      IPI[IID].MemoryPressureCycles++;
+    }
+  }
+}
+
+#ifndef NDEBUG
+void DependencyGraph::dumpDependencyEdge(raw_ostream &OS,
+                                         const DependencyEdge &DepEdge,
+                                         MCInstPrinter &MCIP) const {
+  unsigned FromIID = DepEdge.FromIID;
+  unsigned ToIID = DepEdge.ToIID;
+  assert(FromIID < ToIID && "Graph should be acyclic!");
+
+  const DependencyEdge::Dependency &DE = DepEdge.Dep;
+  assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!");
+
+  OS << " FROM: " << FromIID << " TO: " << ToIID << "             ";
+  if (DE.Type == DependencyEdge::DT_REGISTER) {
+    OS << " - REGISTER: ";
+    MCIP.printRegName(OS, DE.ResourceOrRegID);
+  } else if (DE.Type == DependencyEdge::DT_MEMORY) {
+    OS << " - MEMORY";
+  } else {
+    assert(DE.Type == DependencyEdge::DT_RESOURCE &&
+           "Unsupported dependency type!");
+    OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
+  }
+  OS << " - CYCLES: " << DE.Cost << '\n';
+}
+#endif // NDEBUG
+
+void DependencyGraph::initializeRootSet(
+    SmallVectorImpl<unsigned> &RootSet) const {
+  for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
+    const DGNode &N = Nodes[I];
+    if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty())
+      RootSet.emplace_back(I);
+  }
+}
+
+void DependencyGraph::propagateThroughEdges(
+    SmallVectorImpl<unsigned> &RootSet) {
+  SmallVector<unsigned, 8> ToVisit;
+
+  // A critical sequence is computed as the longest path from a node of the
+  // RootSet to a leaf node (i.e. a node with no successors).  The RootSet is
+  // composed of nodes with at least one successor, and no predecessors.
+  //
+  // Each node of the graph starts with an initial default cost of zero.  The
+  // cost of a node is a measure of criticality: the higher the cost, the bigger
+  // is the performance impact.
+  //
+  // This algorithm is very similar to a (reverse) Dijkstra.  Every iteration of
+  // the inner loop selects (i.e. visits) a node N from a set of `unvisited
+  // nodes`, and then propagates the cost of N to all its neighbors.
+  //
+  // The `unvisited nodes` set initially contains all the nodes from the
+  // RootSet.  A node N is added to the `unvisited nodes` if all its
+  // predecessors have been visited already.
+  // 
+  // For simplicity, every node tracks the number of unvisited incoming edges in
+  // field `NumVisitedPredecessors`.  When the value of that field drops to
+  // zero, then the corresponding node is added to a `ToVisit` set.
+  //
+  // At the end of every iteration of the outer loop, set `ToVisit` becomes our
+  // new `unvisited nodes` set.
+  // 
+  // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet)
+  // is empty. This algorithm works under the assumption that the graph is
+  // acyclic.
+  do {
+    for (unsigned IID : RootSet) {
+      const DGNode &N = Nodes[IID];
+      for (const DependencyEdge &DepEdge : N.OutgoingEdges) {
+        unsigned ToIID = DepEdge.ToIID;
+        DGNode &To = Nodes[ToIID];
+        uint64_t Cost = N.Cost + DepEdge.Dep.Cost;
+        // Check if this is the most expensive incoming edge seen so far.  In
+        // case, update the total cost of the destination node (ToIID), as well
+        // its field `CriticalPredecessor`.
+        if (Cost > To.Cost) {
+          To.CriticalPredecessor = DepEdge;
+          To.Cost = Cost;
+          To.Depth = N.Depth + 1;
+        }
+        To.NumVisitedPredecessors++;
+        if (To.NumVisitedPredecessors == To.NumPredecessors)
+          ToVisit.emplace_back(ToIID);
+      }
+    }
+
+    std::swap(RootSet, ToVisit);
+    ToVisit.clear();
+  } while (!RootSet.empty());
+}
+
+void DependencyGraph::getCriticalSequence(
+    SmallVectorImpl<const DependencyEdge *> &Seq) const {
+  // At this stage, nodes of the graph have been already visited, and costs have
+  // been propagated through the edges (see method `propagateThroughEdges()`).
+
+  // Identify the node N with the highest cost in the graph. By construction,
+  // that node is the last instruction of our critical sequence.
+  // Field N.Depth would tell us the total length of the sequence.
+  //
+  // To obtain the sequence of critical edges, we simply follow the chain of critical
+  // predecessors starting from node N (field DGNode::CriticalPredecessor).
+  const auto It = std::max_element(
+      Nodes.begin(), Nodes.end(),
+      [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; });
+  unsigned IID = std::distance(Nodes.begin(), It);
+  Seq.resize(Nodes[IID].Depth);
+  for (unsigned I = Seq.size(), E = 0; I > E; --I) {
+    const DGNode &N = Nodes[IID];
+    Seq[I - 1] = &N.CriticalPredecessor;
+    IID = N.CriticalPredecessor.FromIID;
+  }
+}
+
+static void printInstruction(formatted_raw_ostream &FOS,
+                             const MCSubtargetInfo &STI, MCInstPrinter &MCIP,
+                             const MCInst &MCI,
+                             bool UseDifferentColor = false) {
+  std::string Instruction;
+  raw_string_ostream InstrStream(Instruction);
+
+  FOS.PadToColumn(14);
+
+  MCIP.printInst(&MCI, InstrStream, "", STI);
+  InstrStream.flush();
+
+  if (UseDifferentColor)
+    FOS.changeColor(raw_ostream::CYAN, true, false);
+  FOS << StringRef(Instruction).ltrim();
+  if (UseDifferentColor)
+    FOS.resetColor();
+}
+
+void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
+  SmallVector<const DependencyEdge *, 16> Seq;
+  DG.getCriticalSequence(Seq);
+  if (Seq.empty())
+    return;
+
+  OS << "\nCritical sequence based on the simulation:\n\n";
+
+  const DependencyEdge &FirstEdge = *Seq[0];
+  unsigned FromIID = FirstEdge.FromIID % Source.size();
+  unsigned ToIID = FirstEdge.ToIID % Source.size();
+  bool IsLoopCarried = FromIID >= ToIID;
+
+  formatted_raw_ostream FOS(OS);
+  FOS.PadToColumn(14);
+  FOS << "Instruction";
+  FOS.PadToColumn(58);
+  FOS << "Dependency Information";
+
+  bool HasColors = FOS.has_colors();
+
+  unsigned CurrentIID = 0;
+  if (IsLoopCarried) {
+    FOS << "\n +----< " << FromIID << ".";
+    printInstruction(FOS, STI, MCIP, Source[FromIID], HasColors);
+    FOS << "\n |\n |    < loop carried > \n |";
+  } else {
+    while (CurrentIID < FromIID) {
+      FOS << "\n        " << CurrentIID << ".";
+      printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
+      CurrentIID++;
+    }
+
+    FOS << "\n +----< " << CurrentIID << ".";
+    printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors);
+    CurrentIID++;
+  }
+
+  for (const DependencyEdge *&DE : Seq) {
+    ToIID = DE->ToIID % Source.size();
+    unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID;
+
+    while (CurrentIID < LastIID) {
+      FOS << "\n |      " << CurrentIID << ".";
+      printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
+      CurrentIID++;
+    }
+
+    if (CurrentIID == ToIID) {
+      FOS << "\n +----> " << ToIID << ".";
+      printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors);
+    } else {
+      FOS << "\n |\n |    < loop carried > \n |"
+          << "\n +----> " << ToIID << ".";
+      printInstruction(FOS, STI, MCIP, Source[ToIID], HasColors);
+    }
+    FOS.PadToColumn(58);
+
+    const DependencyEdge::Dependency &Dep = DE->Dep;
+    if (HasColors)
+      FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
+
+    if (Dep.Type == DependencyEdge::DT_REGISTER) {
+      FOS << "## REGISTER dependency:  ";
+      if (HasColors)
+        FOS.changeColor(raw_ostream::MAGENTA, true, false);
+      MCIP.printRegName(FOS, Dep.ResourceOrRegID);
+    } else if (Dep.Type == DependencyEdge::DT_MEMORY) {
+      FOS << "## MEMORY dependency.";
+    } else {
+      assert(Dep.Type == DependencyEdge::DT_RESOURCE &&
+             "Unsupported dependency type!");
+      FOS << "## RESOURCE interference:  ";
+      if (HasColors)
+        FOS.changeColor(raw_ostream::MAGENTA, true, false);
+      FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID);
+      if (HasColors) {
+        FOS.resetColor();
+        FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
+      }
+      FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations)
+          << "% ]";
+    }
+    if (HasColors)
+      FOS.resetColor();
+    ++CurrentIID;
+  }
+
+  while (CurrentIID < Source.size()) {
+    FOS << "\n        " << CurrentIID << ".";
+    printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
+    CurrentIID++;
+  }
+
+  FOS << '\n';
+  FOS.flush();
+}
+
+#ifndef NDEBUG
+void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
+  OS << "\nREG DEPS\n";
+  for (const DGNode &Node : Nodes)
+    for (const DependencyEdge &DE : Node.OutgoingEdges)
+      if (DE.Dep.Type == DependencyEdge::DT_REGISTER)
+        dumpDependencyEdge(OS, DE, MCIP);
+
+  OS << "\nMEM DEPS\n";
+  for (const DGNode &Node : Nodes)
+    for (const DependencyEdge &DE : Node.OutgoingEdges)
+      if (DE.Dep.Type == DependencyEdge::DT_MEMORY)
+        dumpDependencyEdge(OS, DE, MCIP);
+
+  OS << "\nRESOURCE DEPS\n";
+  for (const DGNode &Node : Nodes)
+    for (const DependencyEdge &DE : Node.OutgoingEdges)
+      if (DE.Dep.Type == DependencyEdge::DT_RESOURCE)
+        dumpDependencyEdge(OS, DE, MCIP);
+}
+#endif // NDEBUG
+
+void DependencyGraph::addDependency(unsigned From, unsigned To,
+                                    DependencyEdge::Dependency &&Dep) {
+  DGNode &NodeFrom = Nodes[From];
+  DGNode &NodeTo = Nodes[To];
+  SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges;
+
+  auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) {
+    return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID;
+  });
+
+  if (It != Vec.end()) {
+    It->Dep.Cost += Dep.Cost;
+    It->Frequency++;
+    return;
+  }
+
+  DependencyEdge DE = {Dep, From, To, 1};
+  Vec.emplace_back(DE);
+  NodeTo.NumPredecessors++;
+}
+
+BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
+                                       MCInstPrinter &Printer,
+                                       ArrayRef<MCInst> S, unsigned NumIter)
+    : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()), DG(S.size() * 3),
+      Source(S), Iterations(NumIter), TotalCycles(0),
+      PressureIncreasedBecauseOfResources(false),
+      PressureIncreasedBecauseOfRegisterDependencies(false),
+      PressureIncreasedBecauseOfMemoryDependencies(false),
+      SeenStallCycles(false), BPI() {}
+
+void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
+                                        unsigned RegID, unsigned Cost) {
+  bool IsLoopCarried = From >= To;
+  unsigned SourceSize = Source.size();
+  if (IsLoopCarried) {
+    Cost *= Iterations / 2;
+    DG.addRegisterDep(From, To + SourceSize, RegID, Cost);
+    DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost);
+    return;
+  }
+  DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost);
+}
+
+void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
+                                      unsigned Cost) {
+  bool IsLoopCarried = From >= To;
+  unsigned SourceSize = Source.size();
+  if (IsLoopCarried) {
+    Cost *= Iterations / 2;
+    DG.addMemoryDep(From, To + SourceSize, Cost);
+    DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost);
+    return;
+  }
+  DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost);
+}
+
+void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
+                                        uint64_t Mask, unsigned Cost) {
+  bool IsLoopCarried = From >= To;
+  unsigned SourceSize = Source.size();
+  if (IsLoopCarried) {
+    Cost *= Iterations / 2;
+    DG.addResourceDep(From, To + SourceSize, Mask, Cost);
+    DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost);
+    return;
+  }
+  DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost);
+}
+
+void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
+  const unsigned IID = Event.IR.getSourceIndex();
+  if (Event.Type == HWInstructionEvent::Dispatched) {
+    Tracker.onInstructionDispatched(IID);
+    return;
+  }
+  if (Event.Type == HWInstructionEvent::Executed) {
+    Tracker.onInstructionExecuted(IID);
+    return;
+  }
+
+  if (Event.Type != HWInstructionEvent::Issued)
+    return;
+
+  const Instruction &IS = *Event.IR.getInstruction();
+  unsigned To = IID % Source.size();
+
+  unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID);
+  uint64_t ResourceMask = IS.getCriticalResourceMask();
+  SmallVector<std::pair<unsigned, unsigned>, 4> Users;
+  while (ResourceMask) {
+    uint64_t Current = ResourceMask & (-ResourceMask);
+    Tracker.getResourceUsers(Current, Users);
+    for (const std::pair<unsigned, unsigned> &U : Users)
+      addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles);
+    Users.clear();
+    ResourceMask ^= Current;
+  }
+
+  const CriticalDependency &RegDep = IS.getCriticalRegDep();
+  if (RegDep.Cycles) {
+    Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID);
+    unsigned From = RegDep.IID % Source.size();
+    addRegisterDep(From, To, RegDep.RegID, Cycles);
+  }
+
+  const CriticalDependency &MemDep = IS.getCriticalMemDep();
+  if (MemDep.Cycles) {
+    Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID);
+    unsigned From = MemDep.IID % Source.size();
+    addMemoryDep(From, To, Cycles);
+  }
+
+  Tracker.handleInstructionIssuedEvent(
+      static_cast<const HWInstructionIssuedEvent &>(Event));
+
+  // Check if this is the last simulated instruction.
+  if (IID == ((Iterations * Source.size()) - 1))
+    DG.finalizeGraph();
+}
+
+void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
+  assert(Event.Reason != HWPressureEvent::INVALID &&
+         "Unexpected invalid event!");
+
+  Tracker.handlePressureEvent(Event);
+
+  switch (Event.Reason) {
+  default:
+    break;
+
+  case HWPressureEvent::RESOURCES:
+    PressureIncreasedBecauseOfResources = true;
+    break;
+  case HWPressureEvent::REGISTER_DEPS:
+    PressureIncreasedBecauseOfRegisterDependencies = true;
+    break;
+  case HWPressureEvent::MEMORY_DEPS:
+    PressureIncreasedBecauseOfMemoryDependencies = true;
+    break;
+  }
+}
+
+void BottleneckAnalysis::onCycleEnd() {
+  ++TotalCycles;
+
+  bool PressureIncreasedBecauseOfDataDependencies =
+      PressureIncreasedBecauseOfRegisterDependencies ||
+      PressureIncreasedBecauseOfMemoryDependencies;
+  if (!PressureIncreasedBecauseOfResources &&
+      !PressureIncreasedBecauseOfDataDependencies)
+    return;
+
+  ++BPI.PressureIncreaseCycles;
+  if (PressureIncreasedBecauseOfRegisterDependencies)
+    ++BPI.RegisterDependencyCycles;
+  if (PressureIncreasedBecauseOfMemoryDependencies)
+    ++BPI.MemoryDependencyCycles;
+  if (PressureIncreasedBecauseOfDataDependencies)
+    ++BPI.DataDependencyCycles;
+  if (PressureIncreasedBecauseOfResources)
+    ++BPI.ResourcePressureCycles;
+  PressureIncreasedBecauseOfResources = false;
+  PressureIncreasedBecauseOfRegisterDependencies = false;
+  PressureIncreasedBecauseOfMemoryDependencies = false;
+}
+
+void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
+  if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
+    OS << "\n\nNo resource or data dependency bottlenecks discovered.\n";
+    return;
+  }
+
+  double PressurePerCycle =
+      (double)BPI.PressureIncreaseCycles * 100 / TotalCycles;
+  double ResourcePressurePerCycle =
+      (double)BPI.ResourcePressureCycles * 100 / TotalCycles;
+  double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles;
+  double RegDepPressurePerCycle =
+      (double)BPI.RegisterDependencyCycles * 100 / TotalCycles;
+  double MemDepPressurePerCycle =
+      (double)BPI.MemoryDependencyCycles * 100 / TotalCycles;
+
+  OS << "\n\nCycles with backend pressure increase [ "
+     << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]";
+
+  OS << "\nThroughput Bottlenecks: "
+     << "\n  Resource Pressure       [ "
+     << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100)
+     << "% ]";
+
+  if (BPI.PressureIncreaseCycles) {
+    ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
+    const MCSchedModel &SM = STI.getSchedModel();
+    for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
+      unsigned ResourceCycles = Distribution[I];
+      if (ResourceCycles) {
+        double Frequency = (double)ResourceCycles * 100 / TotalCycles;
+        const MCProcResourceDesc &PRDesc = *SM.getProcResource(I);
+        OS << "\n  - " << PRDesc.Name << "  [ "
+           << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]";
+      }
+    }
+  }
+
+  OS << "\n  Data Dependencies:      [ "
+     << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]";
+  OS << "\n  - Register Dependencies [ "
+     << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100)
+     << "% ]";
+  OS << "\n  - Memory Dependencies   [ "
+     << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
+     << "% ]\n";
+}
+
+void BottleneckAnalysis::printView(raw_ostream &OS) const {
+  std::string Buffer;
+  raw_string_ostream TempStream(Buffer);
+  printBottleneckHints(TempStream);
+  TempStream.flush();
+  OS << Buffer;
+  printCriticalSequence(OS);
+}
+
+} // namespace mca.
+} // namespace llvm
diff --git a/tools/llvm-mca/Views/BottleneckAnalysis.h b/tools/llvm-mca/Views/BottleneckAnalysis.h
new file mode 100644
index 000000000000..7564b1a48206
--- /dev/null
+++ b/tools/llvm-mca/Views/BottleneckAnalysis.h
@@ -0,0 +1,341 @@
+//===--------------------- BottleneckAnalysis.h -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the bottleneck analysis view.
+///
+/// This view internally observes backend pressure increase events in order to
+/// identify problematic data dependencies and processor resource interferences.
+///
+/// Example of bottleneck analysis report for a dot-product on X86 btver2:
+///
+/// Cycles with backend pressure increase [ 40.76% ]
+/// Throughput Bottlenecks: 
+///   Resource Pressure       [ 39.34% ]
+///   - JFPA  [ 39.34% ]
+///   - JFPU0  [ 39.34% ]
+///   Data Dependencies:      [ 1.42% ]
+///   - Register Dependencies [ 1.42% ]
+///   - Memory Dependencies   [ 0.00% ]
+///
+/// According to the example, backend pressure increased during the 40.76% of
+/// the simulated cycles.  In particular, the major cause of backend pressure
+/// increases was the contention on floating point adder JFPA accessible from
+/// pipeline resource JFPU0.
+///
+/// At the end of each cycle, if pressure on the simulated out-of-order buffers
+/// has increased, a backend pressure event is reported.
+/// In particular, this occurs when there is a delta between the number of uOps
+/// dispatched and the number of uOps issued to the underlying pipelines.
+///
+/// The bottleneck analysis view is also responsible for identifying and printing
+/// the most "critical" sequence of dependent instructions according to the
+/// simulated run.
+///
+/// Below is the critical sequence computed for the dot-product example on
+/// btver2:
+///
+///              Instruction                     Dependency Information
+/// +----< 2.    vhaddps %xmm3, %xmm3, %xmm4
+/// |
+/// |    < loop carried > 
+/// |
+/// |      0.    vmulps	 %xmm0, %xmm0, %xmm2
+/// +----> 1.    vhaddps %xmm2, %xmm2, %xmm3     ## RESOURCE interference:  JFPA [ probability: 73% ]
+/// +----> 2.    vhaddps %xmm3, %xmm3, %xmm4     ## REGISTER dependency:  %xmm3
+/// |
+/// |    < loop carried > 
+/// |
+/// +----> 1.    vhaddps %xmm2, %xmm2, %xmm3     ## RESOURCE interference:  JFPA [ probability: 73% ]
+///
+///
+/// The algorithm that computes the critical sequence is very similar to a
+/// critical path analysis.
+/// 
+/// A dependency graph is used internally to track dependencies between nodes.
+/// Nodes of the graph represent instructions from the input assembly sequence,
+/// and edges of the graph represent data dependencies or processor resource
+/// interferences.
+///
+/// Edges are dynamically 'discovered' by observing instruction state transitions
+/// and backend pressure increase events. Edges are internally ranked based on
+/// their "criticality". A dependency is considered to be critical if it takes a
+/// long time to execute, and if it contributes to backend pressure increases.
+/// Criticality is internally measured in terms of cycles; it is computed for
+/// every edge in the graph as a function of the edge latency and the number of
+/// backend pressure increase cycles contributed by that edge.
+///
+/// At the end of simulation, costs are propagated to nodes through the edges of
+/// the graph, and the most expensive path connecting the root-set (a
+/// set of nodes with no predecessors) to a leaf node is reported as critical
+/// sequence.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H
+#define LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H
+
+#include "Views/View.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+class PressureTracker {
+  const MCSchedModel &SM;
+
+  // Resource pressure distribution. There is an element for every processor
+  // resource declared by the scheduling model. Quantities are number of cycles.
+  SmallVector<unsigned, 4> ResourcePressureDistribution;
+
+  // Each processor resource is associated with a so-called processor resource
+  // mask. This vector allows to correlate processor resource IDs with processor
+  // resource masks. There is exactly one element per each processor resource
+  // declared by the scheduling model.
+  SmallVector<uint64_t, 4> ProcResID2Mask;
+
+  // Maps processor resource state indices (returned by calls to
+  // `getResourceStateIndex(Mask)` to processor resource identifiers.
+  SmallVector<unsigned, 4> ResIdx2ProcResID;
+
+  // Maps Processor Resource identifiers to ResourceUsers indices.
+  SmallVector<unsigned, 4> ProcResID2ResourceUsersIndex;
+
+  // Identifies the last user of a processor resource unit.
+  // This vector is updated on every instruction issued event.
+  // There is one entry for every processor resource unit declared by the
+  // processor model. An all_ones value is treated like an invalid instruction
+  // identifier.
+  using User = std::pair<unsigned, unsigned>;
+  SmallVector<User, 4> ResourceUsers;
+
+  struct InstructionPressureInfo {
+    unsigned RegisterPressureCycles;
+    unsigned MemoryPressureCycles;
+    unsigned ResourcePressureCycles;
+  };
+  DenseMap<unsigned, InstructionPressureInfo> IPI;
+
+  void updateResourcePressureDistribution(uint64_t CumulativeMask);
+
+  User getResourceUser(unsigned ProcResID, unsigned UnitID) const {
+    unsigned Index = ProcResID2ResourceUsersIndex[ProcResID];
+    return ResourceUsers[Index + UnitID];
+  }
+
+public:
+  PressureTracker(const MCSchedModel &Model);
+
+  ArrayRef<unsigned> getResourcePressureDistribution() const {
+    return ResourcePressureDistribution;
+  }
+
+  void getResourceUsers(uint64_t ResourceMask,
+                        SmallVectorImpl<User> &Users) const;
+
+  unsigned getRegisterPressureCycles(unsigned IID) const {
+    assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!");
+    const InstructionPressureInfo &Info = IPI.find(IID)->second;
+    return Info.RegisterPressureCycles;
+  }
+
+  unsigned getMemoryPressureCycles(unsigned IID) const {
+    assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!");
+    const InstructionPressureInfo &Info = IPI.find(IID)->second;
+    return Info.MemoryPressureCycles;
+  }
+
+  unsigned getResourcePressureCycles(unsigned IID) const {
+    assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!");
+    const InstructionPressureInfo &Info = IPI.find(IID)->second;
+    return Info.ResourcePressureCycles;
+  }
+
+  const char *resolveResourceName(uint64_t ResourceMask) const {
+    unsigned Index = getResourceStateIndex(ResourceMask);
+    unsigned ProcResID = ResIdx2ProcResID[Index];
+    const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
+    return PRDesc.Name;
+  }
+
+  void onInstructionDispatched(unsigned IID);
+  void onInstructionExecuted(unsigned IID);
+
+  void handlePressureEvent(const HWPressureEvent &Event);
+  void handleInstructionIssuedEvent(const HWInstructionIssuedEvent &Event);
+};
+
+// A dependency edge.
+struct DependencyEdge {
+  enum DependencyType { DT_INVALID, DT_REGISTER, DT_MEMORY, DT_RESOURCE };
+
+  // Dependency edge descriptor.
+  //
+  // It specifies the dependency type, as well as the edge cost in cycles.
+  struct Dependency {
+    DependencyType Type;
+    uint64_t ResourceOrRegID;
+    uint64_t Cost;
+  };
+  Dependency Dep;
+
+  unsigned FromIID;
+  unsigned ToIID;
+
+  // Used by the bottleneck analysis to compute the interference
+  // probability for processor resources.
+  unsigned Frequency;
+};
+
+// A dependency graph used by the bottleneck analysis to describe data
+// dependencies and processor resource interferences between instructions.
+//
+// There is a node (an instance of struct DGNode) for every instruction in the
+// input assembly sequence. Edges of the graph represent dependencies between
+// instructions.
+//
+// Each edge of the graph is associated with a cost value which is used
+// internally to rank dependency based on their impact on the runtime
+// performance (see field DependencyEdge::Dependency::Cost). In general, the
+// higher the cost of an edge, the higher the impact on performance.
+//
+// The cost of a dependency is a function of both the latency and the number of
+// cycles where the dependency has been seen as critical (i.e. contributing to
+// back-pressure increases).
+//
+// Loop carried dependencies are carefully expanded by the bottleneck analysis
+// to guarantee that the graph stays acyclic. To this end, extra nodes are
+// pre-allocated at construction time to describe instructions from "past and
+// future" iterations. The graph is kept acyclic mainly because it simplifies the
+// complexity of the algorithm that computes the critical sequence.
+class DependencyGraph {
+  struct DGNode {
+    unsigned NumPredecessors;
+    unsigned NumVisitedPredecessors;
+    uint64_t Cost;
+    unsigned Depth;
+
+    DependencyEdge CriticalPredecessor;
+    SmallVector<DependencyEdge, 8> OutgoingEdges;
+  };
+  SmallVector<DGNode, 16> Nodes;
+
+  DependencyGraph(const DependencyGraph &) = delete;
+  DependencyGraph &operator=(const DependencyGraph &) = delete;
+
+  void addDependency(unsigned From, unsigned To,
+                     DependencyEdge::Dependency &&DE);
+
+  void initializeRootSet(SmallVectorImpl<unsigned> &RootSet) const;
+  void propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet);
+
+#ifndef NDEBUG
+  void dumpDependencyEdge(raw_ostream &OS, const DependencyEdge &DE,
+                          MCInstPrinter &MCIP) const;
+#endif
+
+public:
+  DependencyGraph(unsigned Size) : Nodes(Size) {}
+
+  void addRegisterDep(unsigned From, unsigned To, unsigned RegID,
+                      unsigned Cost) {
+    addDependency(From, To, {DependencyEdge::DT_REGISTER, RegID, Cost});
+  }
+
+  void addMemoryDep(unsigned From, unsigned To, unsigned Cost) {
+    addDependency(From, To, {DependencyEdge::DT_MEMORY, /* unused */ 0, Cost});
+  }
+
+  void addResourceDep(unsigned From, unsigned To, uint64_t Mask,
+                      unsigned Cost) {
+    addDependency(From, To, {DependencyEdge::DT_RESOURCE, Mask, Cost});
+  }
+
+  // Called by the bottleneck analysis at the end of simulation to propagate
+  // costs through the edges of the graph, and compute a critical path.
+  void finalizeGraph() {
+    SmallVector<unsigned, 16> RootSet;
+    initializeRootSet(RootSet);
+    propagateThroughEdges(RootSet);
+  }
+
+  // Returns a sequence of edges representing the critical sequence based on the
+  // simulated run. It assumes that the graph has already been finalized (i.e.
+  // method `finalizeGraph()` has already been called on this graph).
+  void getCriticalSequence(SmallVectorImpl<const DependencyEdge *> &Seq) const;
+
+#ifndef NDEBUG
+  void dump(raw_ostream &OS, MCInstPrinter &MCIP) const;
+#endif
+};
+
+/// A view that collects and prints a few performance numbers.
+class BottleneckAnalysis : public View {
+  const MCSubtargetInfo &STI;
+  MCInstPrinter &MCIP;
+  PressureTracker Tracker;
+  DependencyGraph DG;
+
+  ArrayRef<MCInst> Source;
+  unsigned Iterations;
+  unsigned TotalCycles;
+
+  bool PressureIncreasedBecauseOfResources;
+  bool PressureIncreasedBecauseOfRegisterDependencies;
+  bool PressureIncreasedBecauseOfMemoryDependencies;
+  // True if throughput was affected by dispatch stalls.
+  bool SeenStallCycles;
+
+  struct BackPressureInfo {
+    // Cycles where backpressure increased.
+    unsigned PressureIncreaseCycles;
+    // Cycles where backpressure increased because of pipeline pressure.
+    unsigned ResourcePressureCycles;
+    // Cycles where backpressure increased because of data dependencies.
+    unsigned DataDependencyCycles;
+    // Cycles where backpressure increased because of register dependencies.
+    unsigned RegisterDependencyCycles;
+    // Cycles where backpressure increased because of memory dependencies.
+    unsigned MemoryDependencyCycles;
+  };
+  BackPressureInfo BPI;
+
+  // Used to populate the dependency graph DG.
+  void addRegisterDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy);
+  void addMemoryDep(unsigned From, unsigned To, unsigned Cy);
+  void addResourceDep(unsigned From, unsigned To, uint64_t Mask, unsigned Cy);
+
+  // Prints a bottleneck message to OS.
+  void printBottleneckHints(raw_ostream &OS) const;
+  void printCriticalSequence(raw_ostream &OS) const;
+
+public:
+  BottleneckAnalysis(const MCSubtargetInfo &STI, MCInstPrinter &MCIP,
+                     ArrayRef<MCInst> Sequence, unsigned Iterations);
+
+  void onCycleEnd() override;
+  void onEvent(const HWStallEvent &Event) override { SeenStallCycles = true; }
+  void onEvent(const HWPressureEvent &Event) override;
+  void onEvent(const HWInstructionEvent &Event) override;
+
+  void printView(raw_ostream &OS) const override;
+
+#ifndef NDEBUG
+  void dump(raw_ostream &OS, MCInstPrinter &MCIP) const { DG.dump(OS, MCIP); }
+#endif
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/tools/llvm-mca/Views/DispatchStatistics.cpp b/tools/llvm-mca/Views/DispatchStatistics.cpp
index 2562c82407bf..557b8ba17b17 100644
--- a/tools/llvm-mca/Views/DispatchStatistics.cpp
+++ b/tools/llvm-mca/Views/DispatchStatistics.cpp
@@ -1,10 +1,9 @@
 //===--------------------- DispatchStatistics.cpp ---------------------*- C++
 //-*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/tools/llvm-mca/Views/DispatchStatistics.h b/tools/llvm-mca/Views/DispatchStatistics.h
index 6679c81efe95..07c0f5a4c68f 100644
--- a/tools/llvm-mca/Views/DispatchStatistics.h
+++ b/tools/llvm-mca/Views/DispatchStatistics.h
@@ -1,9 +1,8 @@
 //===--------------------- DispatchStatistics.h -----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/tools/llvm-mca/Views/InstructionInfoView.cpp b/tools/llvm-mca/Views/InstructionInfoView.cpp
index 5016afb49e44..1fbffa3e5b69 100644
--- a/tools/llvm-mca/Views/InstructionInfoView.cpp
+++ b/tools/llvm-mca/Views/InstructionInfoView.cpp
@@ -1,9 +1,8 @@
 //===--------------------- InstructionInfoView.cpp --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -44,6 +43,9 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
     const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
     unsigned NumMicroOpcodes = SCDesc.NumMicroOps;
     unsigned Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
+    // Add extra latency due to delays in the forwarding data paths.
+    Latency += MCSchedModel::getForwardingDelayCycles(
+        STI.getReadAdvanceEntries(SCDesc));
     Optional<double> RThroughput =
         MCSchedModel::getReciprocalThroughput(STI, SCDesc);
 
diff --git a/tools/llvm-mca/Views/InstructionInfoView.h b/tools/llvm-mca/Views/InstructionInfoView.h
index 3ef95d474490..640d87383436 100644
--- a/tools/llvm-mca/Views/InstructionInfoView.h
+++ b/tools/llvm-mca/Views/InstructionInfoView.h
@@ -1,9 +1,8 @@
 //===--------------------- InstructionInfoView.h ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/tools/llvm-mca/Views/RegisterFileStatistics.cpp b/tools/llvm-mca/Views/RegisterFileStatistics.cpp
index 06202bc41421..58736ee0d18c 100644
--- a/tools/llvm-mca/Views/RegisterFileStatistics.cpp
+++ b/tools/llvm-mca/Views/RegisterFileStatistics.cpp
@@ -1,9 +1,8 @@
 //===--------------------- RegisterFileStatistics.cpp -----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/tools/llvm-mca/Views/RegisterFileStatistics.h b/tools/llvm-mca/Views/RegisterFileStatistics.h
index a2c52a668dae..a2273dd48b22 100644
--- a/tools/llvm-mca/Views/RegisterFileStatistics.h
+++ b/tools/llvm-mca/Views/RegisterFileStatistics.h
@@ -1,9 +1,8 @@
 //===--------------------- RegisterFileStatistics.h -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/tools/llvm-mca/Views/ResourcePressureView.cpp b/tools/llvm-mca/Views/ResourcePressureView.cpp
index 6df61840437d..38a2478cf4fe 100644
--- a/tools/llvm-mca/Views/ResourcePressureView.cpp
+++ b/tools/llvm-mca/Views/ResourcePressureView.cpp
@@ -1,9 +1,8 @@
 //===--------------------- ResourcePressureView.cpp -------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/tools/llvm-mca/Views/ResourcePressureView.h b/tools/llvm-mca/Views/ResourcePressureView.h
index 572ce6fe6b70..0fa0b9a36aa3 100644
--- a/tools/llvm-mca/Views/ResourcePressureView.h
+++ b/tools/llvm-mca/Views/ResourcePressureView.h
@@ -1,9 +1,8 @@
 //===--------------------- ResourcePressureView.h ---------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp b/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp
index 54eb28f1add9..cb4fbae78039 100644
--- a/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp
+++ b/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp
@@ -1,9 +1,8 @@
 //===--------------------- RetireControlUnitStatistics.cpp ------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/tools/llvm-mca/Views/RetireControlUnitStatistics.h b/tools/llvm-mca/Views/RetireControlUnitStatistics.h
index 02aa13bc444a..1a4d3dec5c56 100644
--- a/tools/llvm-mca/Views/RetireControlUnitStatistics.h
+++ b/tools/llvm-mca/Views/RetireControlUnitStatistics.h
@@ -1,9 +1,8 @@
 //===--------------------- RetireControlUnitStatistics.h --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/tools/llvm-mca/Views/SchedulerStatistics.cpp b/tools/llvm-mca/Views/SchedulerStatistics.cpp
index 670f90127f18..bd0ba350ab68 100644
--- a/tools/llvm-mca/Views/SchedulerStatistics.cpp
+++ b/tools/llvm-mca/Views/SchedulerStatistics.cpp
@@ -1,9 +1,8 @@
 //===--------------------- SchedulerStatistics.cpp --------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -23,7 +22,6 @@ SchedulerStatistics::SchedulerStatistics(const llvm::MCSubtargetInfo &STI)
     : SM(STI.getSchedModel()), LQResourceID(0), SQResourceID(0), NumIssued(0),
       NumCycles(0), MostRecentLoadDispatched(~0U),
       MostRecentStoreDispatched(~0U),
-      IssuedPerCycle(STI.getSchedModel().NumProcResourceKinds, 0),
       Usage(STI.getSchedModel().NumProcResourceKinds, {0, 0, 0}) {
   if (SM.hasExtraProcessorInfo()) {
     const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
@@ -44,9 +42,10 @@ SchedulerStatistics::SchedulerStatistics(const llvm::MCSubtargetInfo &STI)
 // In future we should add a new "memory queue" event type, so that we stop
 // making assumptions on how LSUnit internally works (See PR39828).
 void SchedulerStatistics::onEvent(const HWInstructionEvent &Event) {
-  if (Event.Type == HWInstructionEvent::Issued)
-    ++NumIssued;
-  else if (Event.Type == HWInstructionEvent::Dispatched) {
+  if (Event.Type == HWInstructionEvent::Issued) {
+    const Instruction &Inst = *Event.IR.getInstruction();
+    NumIssued += Inst.getDesc().NumMicroOps;
+  } else if (Event.Type == HWInstructionEvent::Dispatched) {
     const Instruction &Inst = *Event.IR.getInstruction();
     const unsigned Index = Event.IR.getSourceIndex();
     if (LQResourceID && Inst.getDesc().MayLoad &&
@@ -96,29 +95,25 @@ void SchedulerStatistics::updateHistograms() {
     BU.MaxUsedSlots = std::max(BU.MaxUsedSlots, BU.SlotsInUse);
   }
 
-  IssuedPerCycle[NumIssued]++;
+  IssueWidthPerCycle[NumIssued]++;
   NumIssued = 0;
 }
 
 void SchedulerStatistics::printSchedulerStats(raw_ostream &OS) const {
   OS << "\n\nSchedulers - "
-     << "number of cycles where we saw N instructions issued:\n";
+     << "number of cycles where we saw N micro opcodes issued:\n";
   OS << "[# issued], [# cycles]\n";
 
-  const auto It =
-      std::max_element(IssuedPerCycle.begin(), IssuedPerCycle.end());
-  unsigned Index = std::distance(IssuedPerCycle.begin(), It);
-
   bool HasColors = OS.has_colors();
-  for (unsigned I = 0, E = IssuedPerCycle.size(); I < E; ++I) {
-    unsigned IPC = IssuedPerCycle[I];
-    if (!IPC)
-      continue;
-
-    if (I == Index && HasColors)
+  const auto It =
+      std::max_element(IssueWidthPerCycle.begin(), IssueWidthPerCycle.end());
+  for (const std::pair<unsigned, unsigned> &Entry : IssueWidthPerCycle) {
+    unsigned NumIssued = Entry.first;
+    if (NumIssued == It->first && HasColors)
       OS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
 
-    OS << " " << I << ",          " << IPC << "  ("
+    unsigned IPC = Entry.second;
+    OS << " " << NumIssued << ",          " << IPC << "  ("
        << format("%.1f", ((double)IPC / NumCycles) * 100) << "%)\n";
     if (HasColors)
       OS.resetColor();
diff --git a/tools/llvm-mca/Views/SchedulerStatistics.h b/tools/llvm-mca/Views/SchedulerStatistics.h
index d99a395a726d..32711b4483b4 100644
--- a/tools/llvm-mca/Views/SchedulerStatistics.h
+++ b/tools/llvm-mca/Views/SchedulerStatistics.h
@@ -1,9 +1,8 @@
 //===--------------------- SchedulerStatistics.h ----------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -63,7 +62,9 @@ class SchedulerStatistics final : public View {
     uint64_t CumulativeNumUsedSlots;
   };
 
-  std::vector<unsigned> IssuedPerCycle;
+  using Histogram = std::map<unsigned, unsigned>;
+  Histogram IssueWidthPerCycle;
+
   std::vector<BufferUsage> Usage;
 
   void updateHistograms();
diff --git a/tools/llvm-mca/Views/SummaryView.cpp b/tools/llvm-mca/Views/SummaryView.cpp
index d8ac709e784d..ef5550048f4c 100644
--- a/tools/llvm-mca/Views/SummaryView.cpp
+++ b/tools/llvm-mca/Views/SummaryView.cpp
@@ -1,9 +1,8 @@
 //===--------------------- SummaryView.cpp -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -25,11 +24,17 @@ namespace mca {
 
 SummaryView::SummaryView(const MCSchedModel &Model, ArrayRef<MCInst> S,
                          unsigned Width)
-    : SM(Model), Source(S), DispatchWidth(Width), LastInstructionIdx(0),
+    : SM(Model), Source(S), DispatchWidth(Width?Width: Model.IssueWidth),
+      LastInstructionIdx(0),
       TotalCycles(0), NumMicroOps(0),
       ProcResourceUsage(Model.getNumProcResourceKinds(), 0),
-      ProcResourceMasks(Model.getNumProcResourceKinds()) {
+      ProcResourceMasks(Model.getNumProcResourceKinds()),
+      ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0) {
   computeProcResourceMasks(SM, ProcResourceMasks);
+  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    unsigned Index = getResourceStateIndex(ProcResourceMasks[I]);
+    ResIdx2ProcResID[Index] = I;
+  }
 }
 
 void SummaryView::onEvent(const HWInstructionEvent &Event) {
@@ -51,11 +56,8 @@ void SummaryView::onEvent(const HWInstructionEvent &Event) {
   NumMicroOps += Desc.NumMicroOps;
   for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) {
     if (RU.second.size()) {
-      const auto It = find(ProcResourceMasks, RU.first);
-      assert(It != ProcResourceMasks.end() &&
-             "Invalid processor resource mask!");
-      ProcResourceUsage[std::distance(ProcResourceMasks.begin(), It)] +=
-          RU.second.size();
+      unsigned ProcResID = ResIdx2ProcResID[getResourceStateIndex(RU.first)];
+      ProcResourceUsage[ProcResID] += RU.second.size();
     }
   }
 }
@@ -87,5 +89,6 @@ void SummaryView::printView(raw_ostream &OS) const {
   TempStream.flush();
   OS << Buffer;
 }
+
 } // namespace mca.
 } // namespace llvm
diff --git a/tools/llvm-mca/Views/SummaryView.h b/tools/llvm-mca/Views/SummaryView.h
index f59fd4233fbe..9be31b7d51bd 100644
--- a/tools/llvm-mca/Views/SummaryView.h
+++ b/tools/llvm-mca/Views/SummaryView.h
@@ -1,9 +1,8 @@
 //===--------------------- SummaryView.h ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -46,6 +45,7 @@ class SummaryView : public View {
   unsigned TotalCycles;
   // The total number of micro opcodes contributed by a block of instructions.
   unsigned NumMicroOps;
+
   // For each processor resource, this vector stores the cumulative number of
   // resource cycles consumed by the analyzed code block.
   llvm::SmallVector<unsigned, 8> ProcResourceUsage;
@@ -56,6 +56,9 @@ class SummaryView : public View {
   // declared by the scheduling model.
   llvm::SmallVector<uint64_t, 8> ProcResourceMasks;
 
+  // Used to map resource indices to actual processor resource IDs.
+  llvm::SmallVector<unsigned, 8> ResIdx2ProcResID;
+
   // Compute the reciprocal throughput for the analyzed code block.
   // The reciprocal block throughput is computed as the MAX between:
   //   - NumMicroOps / DispatchWidth
@@ -68,9 +71,9 @@ public:
 
   void onCycleEnd() override { ++TotalCycles; }
   void onEvent(const HWInstructionEvent &Event) override;
-
   void printView(llvm::raw_ostream &OS) const override;
 };
+
 } // namespace mca
 } // namespace llvm
 
diff --git a/tools/llvm-mca/Views/TimelineView.cpp b/tools/llvm-mca/Views/TimelineView.cpp
index 7d55bbc99c73..fe3f16ba344c 100644
--- a/tools/llvm-mca/Views/TimelineView.cpp
+++ b/tools/llvm-mca/Views/TimelineView.cpp
@@ -1,9 +1,8 @@
 //===--------------------- TimelineView.cpp ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \brief
diff --git a/tools/llvm-mca/Views/TimelineView.h b/tools/llvm-mca/Views/TimelineView.h
index ee981800161c..b63b234293cd 100644
--- a/tools/llvm-mca/Views/TimelineView.h
+++ b/tools/llvm-mca/Views/TimelineView.h
@@ -1,9 +1,8 @@
 //===--------------------- TimelineView.h -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \brief
diff --git a/tools/llvm-mca/Views/View.cpp b/tools/llvm-mca/Views/View.cpp
index 6cfb9dd9f394..8e5c34d2d5c2 100644
--- a/tools/llvm-mca/Views/View.cpp
+++ b/tools/llvm-mca/Views/View.cpp
@@ -1,9 +1,8 @@
 //===----------------------- View.cpp ---------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/tools/llvm-mca/Views/View.h b/tools/llvm-mca/Views/View.h
index 4b82b0da0d27..3b52511b4d29 100644
--- a/tools/llvm-mca/Views/View.h
+++ b/tools/llvm-mca/Views/View.h
@@ -1,9 +1,8 @@
 //===----------------------- View.h -----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/tools/llvm-mca/llvm-mca.cpp b/tools/llvm-mca/llvm-mca.cpp
index 68d63db599d7..b3590b5910ec 100644
--- a/tools/llvm-mca/llvm-mca.cpp
+++ b/tools/llvm-mca/llvm-mca.cpp
@@ -1,9 +1,8 @@
 //===-- llvm-mca.cpp - Machine Code Analyzer -------------------*- C++ -* -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -24,6 +23,7 @@
 #include "CodeRegion.h"
 #include "CodeRegionGenerator.h"
 #include "PipelinePrinter.h"
+#include "Views/BottleneckAnalysis.h"
 #include "Views/DispatchStatistics.h"
 #include "Views/InstructionInfoView.h"
 #include "Views/RegisterFileStatistics.h"
@@ -68,8 +68,9 @@ static cl::opt<std::string> OutputFilename("o", cl::desc("Output filename"),
                                            cl::value_desc("filename"));
 
 static cl::opt<std::string>
-    ArchName("march", cl::desc("Target architecture. "
-                               "See -version for available targets"),
+    ArchName("march",
+             cl::desc("Target architecture. "
+                      "See -version for available targets"),
              cl::cat(ToolOptions));
 
 static cl::opt<std::string>
@@ -101,6 +102,17 @@ static cl::opt<unsigned>
                               "be used for register mappings"),
                      cl::cat(ToolOptions), cl::init(0));
 
+static cl::opt<unsigned>
+    MicroOpQueue("micro-op-queue-size", cl::Hidden,
+                 cl::desc("Number of entries in the micro-op queue"),
+                 cl::cat(ToolOptions), cl::init(0));
+
+static cl::opt<unsigned>
+    DecoderThroughput("decoder-throughput", cl::Hidden,
+                      cl::desc("Maximum throughput from the decoders "
+                               "(instructions per cycle)"),
+                      cl::cat(ToolOptions), cl::init(0));
+
 static cl::opt<bool>
     PrintRegisterFileStats("register-file-stats",
                            cl::desc("Print register file statistics"),
@@ -176,6 +188,11 @@ static cl::opt<bool>
                    cl::desc("Print all views including hardware statistics"),
                    cl::cat(ViewOptions), cl::init(false));
 
+static cl::opt<bool> EnableBottleneckAnalysis(
+    "bottleneck-analysis",
+    cl::desc("Enable bottleneck analysis (disabled by default)"),
+    cl::cat(ViewOptions), cl::init(false));
+
 namespace {
 
 const Target *getTarget(const char *ProgName) {
@@ -220,6 +237,7 @@ static void processViewOptions() {
 
   if (EnableAllViews.getNumOccurrences()) {
     processOptionImpl(PrintSummaryView, EnableAllViews);
+    processOptionImpl(EnableBottleneckAnalysis, EnableAllViews);
     processOptionImpl(PrintResourcePressureView, EnableAllViews);
     processOptionImpl(PrintTimelineView, EnableAllViews);
     processOptionImpl(PrintInstructionInfoView, EnableAllViews);
@@ -348,6 +366,11 @@ int main(int argc, char **argv) {
     return 1;
   }
   const mca::CodeRegions &Regions = *RegionsOrErr;
+
+  // Early exit if errors were found by the code region parsing logic.
+  if (!Regions.isValid())
+    return 1;
+
   if (Regions.empty()) {
     WithColor::error() << "no assembly instructions found.\n";
     return 1;
@@ -377,18 +400,15 @@ int main(int argc, char **argv) {
 
   const MCSchedModel &SM = STI->getSchedModel();
 
-  unsigned Width = SM.IssueWidth;
-  if (DispatchWidth)
-    Width = DispatchWidth;
-
   // Create an instruction builder.
   mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get());
 
   // Create a context to control ownership of the pipeline hardware.
   mca::Context MCA(*MRI, *STI);
 
-  mca::PipelineOptions PO(Width, RegisterFileSize, LoadQueueSize,
-                          StoreQueueSize, AssumeNoAlias);
+  mca::PipelineOptions PO(MicroOpQueue, DecoderThroughput, DispatchWidth,
+                          RegisterFileSize, LoadQueueSize, StoreQueueSize,
+                          AssumeNoAlias, EnableBottleneckAnalysis);
 
   // Number each region in the sequence.
   unsigned RegionIdx = 0;
@@ -423,8 +443,8 @@ int main(int argc, char **argv) {
                   WithColor::error() << IE.Message << '\n';
                   IP->printInst(&IE.Inst, SS, "", *STI);
                   SS.flush();
-                  WithColor::note() << "instruction: " << InstructionStr
-                                    << '\n';
+                  WithColor::note()
+                      << "instruction: " << InstructionStr << '\n';
                 })) {
           // Default case.
           WithColor::error() << toString(std::move(NewE));
@@ -464,7 +484,13 @@ int main(int argc, char **argv) {
     mca::PipelinePrinter Printer(*P);
 
     if (PrintSummaryView)
-      Printer.addView(llvm::make_unique<mca::SummaryView>(SM, Insts, Width));
+      Printer.addView(
+          llvm::make_unique<mca::SummaryView>(SM, Insts, DispatchWidth));
+
+    if (EnableBottleneckAnalysis) {
+      Printer.addView(llvm::make_unique<mca::BottleneckAnalysis>(
+          *STI, *IP, Insts, S.getNumIterations()));
+    }
 
     if (PrintInstructionInfoView)
       Printer.addView(
diff --git a/tools/llvm-modextract/llvm-modextract.cpp b/tools/llvm-modextract/llvm-modextract.cpp
index 9fd8340505aa..3adefc5f0d3e 100644
--- a/tools/llvm-modextract/llvm-modextract.cpp
+++ b/tools/llvm-modextract/llvm-modextract.cpp
@@ -1,9 +1,8 @@
 //===-- llvm-modextract.cpp - LLVM module extractor utility ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index 042e284e8369..aa62e6f0209b 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -1,9 +1,8 @@
 //===-- llvm-nm.cpp - Symbol table dumping utility for llvm ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -47,12 +46,15 @@ using namespace object;
 
 namespace {
 enum OutputFormatTy { bsd, sysv, posix, darwin };
+
+cl::OptionCategory NMCat("llvm-nm Options");
+
 cl::opt<OutputFormatTy> OutputFormat(
     "format", cl::desc("Specify output format"),
     cl::values(clEnumVal(bsd, "BSD format"), clEnumVal(sysv, "System V format"),
                clEnumVal(posix, "POSIX.2 format"),
                clEnumVal(darwin, "Darwin -m format")),
-    cl::init(bsd));
+    cl::init(bsd), cl::cat(NMCat));
 cl::alias OutputFormat2("f", cl::desc("Alias for --format"),
                         cl::aliasopt(OutputFormat));
 
@@ -60,50 +62,53 @@ cl::list<std::string> InputFilenames(cl::Positional, cl::desc("<input files>"),
                                      cl::ZeroOrMore);
 
 cl::opt<bool> UndefinedOnly("undefined-only",
-                            cl::desc("Show only undefined symbols"));
+                            cl::desc("Show only undefined symbols"),
+                            cl::cat(NMCat));
 cl::alias UndefinedOnly2("u", cl::desc("Alias for --undefined-only"),
                          cl::aliasopt(UndefinedOnly), cl::Grouping);
 
 cl::opt<bool> DynamicSyms("dynamic",
                           cl::desc("Display the dynamic symbols instead "
-                                   "of normal symbols."));
+                                   "of normal symbols."),
+                          cl::cat(NMCat));
 cl::alias DynamicSyms2("D", cl::desc("Alias for --dynamic"),
                        cl::aliasopt(DynamicSyms), cl::Grouping);
 
-cl::opt<bool> DefinedOnly("defined-only",
-                          cl::desc("Show only defined symbols"));
+cl::opt<bool> DefinedOnly("defined-only", cl::desc("Show only defined symbols"),
+                          cl::cat(NMCat));
 cl::alias DefinedOnly2("U", cl::desc("Alias for --defined-only"),
                        cl::aliasopt(DefinedOnly), cl::Grouping);
 
 cl::opt<bool> ExternalOnly("extern-only",
                            cl::desc("Show only external symbols"),
-                           cl::ZeroOrMore);
+                           cl::ZeroOrMore, cl::cat(NMCat));
 cl::alias ExternalOnly2("g", cl::desc("Alias for --extern-only"),
                         cl::aliasopt(ExternalOnly), cl::Grouping,
                         cl::ZeroOrMore);
 
-cl::opt<bool> NoWeakSymbols("no-weak",
-                            cl::desc("Show only non-weak symbols"));
+cl::opt<bool> NoWeakSymbols("no-weak", cl::desc("Show only non-weak symbols"),
+                            cl::cat(NMCat));
 cl::alias NoWeakSymbols2("W", cl::desc("Alias for --no-weak"),
                          cl::aliasopt(NoWeakSymbols), cl::Grouping);
 
-cl::opt<bool> BSDFormat("B", cl::desc("Alias for --format=bsd"),
-                        cl::Grouping);
+cl::opt<bool> BSDFormat("B", cl::desc("Alias for --format=bsd"), cl::Grouping,
+                        cl::cat(NMCat));
 cl::opt<bool> POSIXFormat("P", cl::desc("Alias for --format=posix"),
-                          cl::Grouping);
+                          cl::Grouping, cl::cat(NMCat));
 cl::alias Portability("portability", cl::desc("Alias for --format=posix"),
                       cl::aliasopt(POSIXFormat), cl::NotHidden);
 cl::opt<bool> DarwinFormat("m", cl::desc("Alias for --format=darwin"),
-                           cl::Grouping);
+                           cl::Grouping, cl::cat(NMCat));
 
 static cl::list<std::string>
     ArchFlags("arch", cl::desc("architecture(s) from a Mach-O file to dump"),
-              cl::ZeroOrMore);
+              cl::ZeroOrMore, cl::cat(NMCat));
 bool ArchAll = false;
 
 cl::opt<bool> PrintFileName(
     "print-file-name",
-    cl::desc("Precede each symbol with the object file it came from"));
+    cl::desc("Precede each symbol with the object file it came from"),
+    cl::cat(NMCat));
 
 cl::alias PrintFileNameA("A", cl::desc("Alias for --print-file-name"),
                          cl::aliasopt(PrintFileName), cl::Grouping);
@@ -111,40 +116,52 @@ cl::alias PrintFileNameo("o", cl::desc("Alias for --print-file-name"),
                          cl::aliasopt(PrintFileName), cl::Grouping);
 
 cl::opt<bool> DebugSyms("debug-syms",
-                        cl::desc("Show all symbols, even debugger only"));
+                        cl::desc("Show all symbols, even debugger only"),
+                        cl::cat(NMCat));
 cl::alias DebugSymsa("a", cl::desc("Alias for --debug-syms"),
                      cl::aliasopt(DebugSyms), cl::Grouping);
 
-cl::opt<bool> NumericSort("numeric-sort", cl::desc("Sort symbols by address"));
+cl::opt<bool> NumericSort("numeric-sort", cl::desc("Sort symbols by address"),
+                          cl::cat(NMCat));
 cl::alias NumericSortn("n", cl::desc("Alias for --numeric-sort"),
                        cl::aliasopt(NumericSort), cl::Grouping);
 cl::alias NumericSortv("v", cl::desc("Alias for --numeric-sort"),
                        cl::aliasopt(NumericSort), cl::Grouping);
 
-cl::opt<bool> NoSort("no-sort", cl::desc("Show symbols in order encountered"));
+cl::opt<bool> NoSort("no-sort", cl::desc("Show symbols in order encountered"),
+                     cl::cat(NMCat));
 cl::alias NoSortp("p", cl::desc("Alias for --no-sort"), cl::aliasopt(NoSort),
                   cl::Grouping);
 
-cl::opt<bool> Demangle("demangle", cl::desc("Demangle C++ symbol names"));
-cl::alias DemangleC("C", cl::desc("Alias for --demangle"), cl::aliasopt(Demangle),
-                    cl::Grouping);
+cl::opt<bool> Demangle("demangle", cl::ZeroOrMore,
+                       cl::desc("Demangle C++ symbol names"), cl::cat(NMCat));
+cl::alias DemangleC("C", cl::desc("Alias for --demangle"),
+                    cl::aliasopt(Demangle), cl::Grouping);
+cl::opt<bool> NoDemangle("no-demangle", cl::init(false), cl::ZeroOrMore,
+                         cl::desc("Don't demangle symbol names"),
+                         cl::cat(NMCat));
 
-cl::opt<bool> ReverseSort("reverse-sort", cl::desc("Sort in reverse order"));
+cl::opt<bool> ReverseSort("reverse-sort", cl::desc("Sort in reverse order"),
+                          cl::cat(NMCat));
 cl::alias ReverseSortr("r", cl::desc("Alias for --reverse-sort"),
                        cl::aliasopt(ReverseSort), cl::Grouping);
 
 cl::opt<bool> PrintSize("print-size",
-                        cl::desc("Show symbol size instead of address"));
+                        cl::desc("Show symbol size as well as address"),
+                        cl::cat(NMCat));
 cl::alias PrintSizeS("S", cl::desc("Alias for --print-size"),
                      cl::aliasopt(PrintSize), cl::Grouping);
 bool MachOPrintSizeWarning = false;
 
-cl::opt<bool> SizeSort("size-sort", cl::desc("Sort symbols by size"));
+cl::opt<bool> SizeSort("size-sort", cl::desc("Sort symbols by size"),
+                       cl::cat(NMCat));
 
 cl::opt<bool> WithoutAliases("without-aliases", cl::Hidden,
-                             cl::desc("Exclude aliases from output"));
+                             cl::desc("Exclude aliases from output"),
+                             cl::cat(NMCat));
 
-cl::opt<bool> ArchiveMap("print-armap", cl::desc("Print the archive map"));
+cl::opt<bool> ArchiveMap("print-armap", cl::desc("Print the archive map"),
+                         cl::cat(NMCat));
 cl::alias ArchiveMaps("M", cl::desc("Alias for --print-armap"),
                       cl::aliasopt(ArchiveMap), cl::Grouping);
 
@@ -153,38 +170,45 @@ cl::opt<Radix>
     AddressRadix("radix", cl::desc("Radix (o/d/x) for printing symbol Values"),
                  cl::values(clEnumVal(d, "decimal"), clEnumVal(o, "octal"),
                             clEnumVal(x, "hexadecimal")),
-                 cl::init(x));
+                 cl::init(x), cl::cat(NMCat));
 cl::alias RadixAlias("t", cl::desc("Alias for --radix"),
                      cl::aliasopt(AddressRadix));
 
 cl::opt<bool> JustSymbolName("just-symbol-name",
-                             cl::desc("Print just the symbol's name"));
+                             cl::desc("Print just the symbol's name"),
+                             cl::cat(NMCat));
 cl::alias JustSymbolNames("j", cl::desc("Alias for --just-symbol-name"),
                           cl::aliasopt(JustSymbolName), cl::Grouping);
 
-// FIXME: This option takes exactly two strings and should be allowed anywhere
-// on the command line.  Such that "llvm-nm -s __TEXT __text foo.o" would work.
-// But that does not as the CommandLine Library does not have a way to make
-// this work.  For now the "-s __TEXT __text" has to be last on the command
-// line.
-cl::list<std::string> SegSect("s", cl::Positional, cl::ZeroOrMore,
+cl::opt<bool> SpecialSyms("special-syms",
+                          cl::desc("No-op. Used for GNU compatibility only"));
+
+cl::list<std::string> SegSect("s", cl::multi_val(2), cl::ZeroOrMore,
+                              cl::value_desc("segment section"), cl::Hidden,
                               cl::desc("Dump only symbols from this segment "
-                                       "and section name, Mach-O only"));
+                                       "and section name, Mach-O only"),
+                              cl::cat(NMCat));
 
-cl::opt<bool> FormatMachOasHex("x", cl::desc("Print symbol entry in hex, "
-                                             "Mach-O only"), cl::Grouping);
+cl::opt<bool> FormatMachOasHex("x",
+                               cl::desc("Print symbol entry in hex, "
+                                        "Mach-O only"),
+                               cl::Grouping, cl::cat(NMCat));
 cl::opt<bool> AddDyldInfo("add-dyldinfo",
                           cl::desc("Add symbols from the dyldinfo not already "
-                                   "in the symbol table, Mach-O only"));
+                                   "in the symbol table, Mach-O only"),
+                          cl::cat(NMCat));
 cl::opt<bool> NoDyldInfo("no-dyldinfo",
                          cl::desc("Don't add any symbols from the dyldinfo, "
-                                  "Mach-O only"));
+                                  "Mach-O only"),
+                         cl::cat(NMCat));
 cl::opt<bool> DyldInfoOnly("dyldinfo-only",
                            cl::desc("Show only symbols from the dyldinfo, "
-                                    "Mach-O only"));
+                                    "Mach-O only"),
+                           cl::cat(NMCat));
 
 cl::opt<bool> NoLLVMBitcode("no-llvm-bc",
-                            cl::desc("Disable LLVM bitcode reader"));
+                            cl::desc("Disable LLVM bitcode reader"),
+                            cl::cat(NMCat));
 
 cl::extrahelp HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
 
@@ -263,6 +287,8 @@ struct NMSymbol {
   uint64_t Size;
   char TypeChar;
   StringRef Name;
+  StringRef SectionName;
+  StringRef TypeName;
   BasicSymbolRef Sym;
   // The Sym field above points to the native symbol in the object file,
   // for Mach-O when we are creating symbols from the dyld info the above
@@ -316,8 +342,7 @@ static char isSymbolList64Bit(SymbolicFile &Obj) {
 }
 
 static StringRef CurrentFilename;
-typedef std::vector<NMSymbol> SymbolListT;
-static SymbolListT SymbolList;
+static std::vector<NMSymbol> SymbolList;
 
 static char getSymbolNMTypeChar(IRObjectFile &Obj, basic_symbol_iterator I);
 
@@ -326,9 +351,10 @@ static char getSymbolNMTypeChar(IRObjectFile &Obj, basic_symbol_iterator I);
 // the darwin format it produces the same output as darwin's nm(1) -m output
 // and when printing Mach-O symbols in hex it produces the same output as
 // darwin's nm(1) -x format.
-static void darwinPrintSymbol(SymbolicFile &Obj, SymbolListT::iterator I,
+static void darwinPrintSymbol(SymbolicFile &Obj, const NMSymbol &S,
                               char *SymbolAddrStr, const char *printBlanks,
-                              const char *printDashes, const char *printFormat) {
+                              const char *printDashes,
+                              const char *printFormat) {
   MachO::mach_header H;
   MachO::mach_header_64 H_64;
   uint32_t Filetype = MachO::MH_OBJECT;
@@ -340,7 +366,7 @@ static void darwinPrintSymbol(SymbolicFile &Obj, SymbolListT::iterator I,
   uint64_t NValue = 0;
   MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
   if (Obj.isIR()) {
-    uint32_t SymFlags = I->Sym.getFlags();
+    uint32_t SymFlags = S.Sym.getFlags();
     if (SymFlags & SymbolRef::SF_Global)
       NType |= MachO::N_EXT;
     if (SymFlags & SymbolRef::SF_Hidden)
@@ -362,7 +388,7 @@ static void darwinPrintSymbol(SymbolicFile &Obj, SymbolListT::iterator I,
     if (SymFlags & SymbolRef::SF_Weak)
       NDesc |= MachO::N_WEAK_DEF;
   } else {
-    DataRefImpl SymDRI = I->Sym.getRawDataRefImpl();
+    DataRefImpl SymDRI = S.Sym.getRawDataRefImpl();
     if (MachO->is64Bit()) {
       H_64 = MachO->MachOObjectFile::getHeader64();
       Filetype = H_64.filetype;
@@ -375,11 +401,11 @@ static void darwinPrintSymbol(SymbolicFile &Obj, SymbolListT::iterator I,
         NStrx = STE_64.n_strx;
         NValue = STE_64.n_value;
       } else {
-        NType = I->NType;
-        NSect = I->NSect;
-        NDesc = I->NDesc;
+        NType = S.NType;
+        NSect = S.NSect;
+        NDesc = S.NDesc;
         NStrx = 0;
-        NValue = I->Address;
+        NValue = S.Address;
       }
     } else {
       H = MachO->MachOObjectFile::getHeader();
@@ -393,42 +419,31 @@ static void darwinPrintSymbol(SymbolicFile &Obj, SymbolListT::iterator I,
         NStrx = STE.n_strx;
         NValue = STE.n_value;
       } else {
-        NType = I->NType;
-        NSect = I->NSect;
-        NDesc = I->NDesc;
+        NType = S.NType;
+        NSect = S.NSect;
+        NDesc = S.NDesc;
         NStrx = 0;
-        NValue = I->Address;
+        NValue = S.Address;
       }
     }
   }
 
   // If we are printing Mach-O symbols in hex do that and return.
   if (FormatMachOasHex) {
-    char Str[18] = "";
-    format(printFormat, NValue).print(Str, sizeof(Str));
-    outs() << Str << ' ';
-    format("%02x", NType).print(Str, sizeof(Str));
-    outs() << Str << ' ';
-    format("%02x", NSect).print(Str, sizeof(Str));
-    outs() << Str << ' ';
-    format("%04x", NDesc).print(Str, sizeof(Str));
-    outs() << Str << ' ';
-    format("%08x", NStrx).print(Str, sizeof(Str));
-    outs() << Str << ' ';
-    outs() << I->Name;
+    outs() << format(printFormat, NValue) << ' '
+           << format("%02x %02x %04x %08x", NType, NSect, NDesc, NStrx) << ' '
+           << S.Name;
     if ((NType & MachO::N_TYPE) == MachO::N_INDR) {
       outs() << " (indirect for ";
-      format(printFormat, NValue).print(Str, sizeof(Str));
-      outs() << Str << ' ';
+      outs() << format(printFormat, NValue) << ' ';
       StringRef IndirectName;
-      if (I->Sym.getRawDataRefImpl().p) {
-        if (MachO->getIndirectName(I->Sym.getRawDataRefImpl(), IndirectName))
+      if (S.Sym.getRawDataRefImpl().p) {
+        if (MachO->getIndirectName(S.Sym.getRawDataRefImpl(), IndirectName))
           outs() << "?)";
         else
           outs() << IndirectName << ")";
-      }
-      else
-        outs() << I->IndirectName << ")";
+      } else
+        outs() << S.IndirectName << ")";
     }
     outs() << "\n";
     return;
@@ -487,9 +502,9 @@ static void darwinPrintSymbol(SymbolicFile &Obj, SymbolListT::iterator I,
       break;
     }
     section_iterator Sec = SectionRef();
-    if (I->Sym.getRawDataRefImpl().p) {
+    if (S.Sym.getRawDataRefImpl().p) {
       Expected<section_iterator> SecOrErr =
-        MachO->getSymbolSection(I->Sym.getRawDataRefImpl());
+          MachO->getSymbolSection(S.Sym.getRawDataRefImpl());
       if (!SecOrErr) {
         consumeError(SecOrErr.takeError());
         outs() << "(?,?) ";
@@ -501,11 +516,12 @@ static void darwinPrintSymbol(SymbolicFile &Obj, SymbolListT::iterator I,
         break;
       }
     } else {
-      Sec = I->Section;
+      Sec = S.Section;
     }
     DataRefImpl Ref = Sec->getRawDataRefImpl();
     StringRef SectionName;
-    MachO->getSectionName(Ref, SectionName);
+    if (Expected<StringRef> NameOrErr = MachO->getSectionName(Ref))
+      SectionName = *NameOrErr;
     StringRef SegmentName = MachO->getSectionFinalSegmentName(Ref);
     outs() << "(" << SegmentName << "," << SectionName << ") ";
     break;
@@ -541,39 +557,36 @@ static void darwinPrintSymbol(SymbolicFile &Obj, SymbolListT::iterator I,
       outs() << "non-external ";
   }
 
-  if (Filetype == MachO::MH_OBJECT &&
-      (NDesc & MachO::N_NO_DEAD_STRIP) == MachO::N_NO_DEAD_STRIP)
-    outs() << "[no dead strip] ";
-
-  if (Filetype == MachO::MH_OBJECT &&
-      ((NType & MachO::N_TYPE) != MachO::N_UNDF) &&
-      (NDesc & MachO::N_SYMBOL_RESOLVER) == MachO::N_SYMBOL_RESOLVER)
-    outs() << "[symbol resolver] ";
-
-  if (Filetype == MachO::MH_OBJECT &&
-      ((NType & MachO::N_TYPE) != MachO::N_UNDF) &&
-      (NDesc & MachO::N_ALT_ENTRY) == MachO::N_ALT_ENTRY)
-    outs() << "[alt entry] ";
+  if (Filetype == MachO::MH_OBJECT) {
+    if (NDesc & MachO::N_NO_DEAD_STRIP)
+      outs() << "[no dead strip] ";
+    if ((NType & MachO::N_TYPE) != MachO::N_UNDF &&
+        NDesc & MachO::N_SYMBOL_RESOLVER)
+      outs() << "[symbol resolver] ";
+    if ((NType & MachO::N_TYPE) != MachO::N_UNDF && NDesc & MachO::N_ALT_ENTRY)
+      outs() << "[alt entry] ";
+    if ((NType & MachO::N_TYPE) != MachO::N_UNDF && NDesc & MachO::N_COLD_FUNC)
+      outs() << "[cold func] ";
+  }
 
   if ((NDesc & MachO::N_ARM_THUMB_DEF) == MachO::N_ARM_THUMB_DEF)
     outs() << "[Thumb] ";
 
   if ((NType & MachO::N_TYPE) == MachO::N_INDR) {
-    outs() << I->Name << " (for ";
+    outs() << S.Name << " (for ";
     StringRef IndirectName;
     if (MachO) {
-      if (I->Sym.getRawDataRefImpl().p) {
-        if (MachO->getIndirectName(I->Sym.getRawDataRefImpl(), IndirectName))
+      if (S.Sym.getRawDataRefImpl().p) {
+        if (MachO->getIndirectName(S.Sym.getRawDataRefImpl(), IndirectName))
           outs() << "?)";
         else
           outs() << IndirectName << ")";
-      }
-      else
-        outs() << I->IndirectName << ")";
+      } else
+        outs() << S.IndirectName << ")";
     } else
       outs() << "?)";
   } else
-    outs() << I->Name;
+    outs() << S.Name;
 
   if ((Flags & MachO::MH_TWOLEVEL) == MachO::MH_TWOLEVEL &&
       (((NType & MachO::N_TYPE) == MachO::N_UNDF && NValue == 0) ||
@@ -635,25 +648,24 @@ static const struct DarwinStabName DarwinStabNames[] = {
     {MachO::N_ECOMM, "ECOMM"},
     {MachO::N_ECOML, "ECOML"},
     {MachO::N_LENG, "LENG"},
-    {0, nullptr}};
+};
 
 static const char *getDarwinStabString(uint8_t NType) {
-  for (unsigned i = 0; DarwinStabNames[i].Name; i++) {
-    if (DarwinStabNames[i].NType == NType)
-      return DarwinStabNames[i].Name;
-  }
+  for (auto I : makeArrayRef(DarwinStabNames))
+    if (I.NType == NType)
+      return I.Name;
   return nullptr;
 }
 
 // darwinPrintStab() prints the n_sect, n_desc along with a symbolic name of
 // a stab n_type value in a Mach-O file.
-static void darwinPrintStab(MachOObjectFile *MachO, SymbolListT::iterator I) {
+static void darwinPrintStab(MachOObjectFile *MachO, const NMSymbol &S) {
   MachO::nlist_64 STE_64;
   MachO::nlist STE;
   uint8_t NType;
   uint8_t NSect;
   uint16_t NDesc;
-  DataRefImpl SymDRI = I->Sym.getRawDataRefImpl();
+  DataRefImpl SymDRI = S.Sym.getRawDataRefImpl();
   if (MachO->is64Bit()) {
     STE_64 = MachO->getSymbol64TableEntry(SymDRI);
     NType = STE_64.n_type;
@@ -666,16 +678,11 @@ static void darwinPrintStab(MachOObjectFile *MachO, SymbolListT::iterator I) {
     NDesc = STE.n_desc;
   }
 
-  char Str[18] = "";
-  format("%02x", NSect).print(Str, sizeof(Str));
-  outs() << ' ' << Str << ' ';
-  format("%04x", NDesc).print(Str, sizeof(Str));
-  outs() << Str << ' ';
+  outs() << format(" %02x %04x ", NSect, NDesc);
   if (const char *stabString = getDarwinStabString(NType))
-    format("%5.5s", stabString).print(Str, sizeof(Str));
+    outs() << format("%5.5s", stabString);
   else
-    format("   %02x", NType).print(Str, sizeof(Str));
-  outs() << Str;
+    outs() << format("   %02x", NType);
 }
 
 static Optional<std::string> demangle(StringRef Name, bool StripUnderscore) {
@@ -780,26 +787,24 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
     errs() << "no symbols\n";
   }
 
-  for (SymbolListT::iterator I = SymbolList.begin(), E = SymbolList.end();
-       I != E; ++I) {
+  for (const NMSymbol &S : SymbolList) {
     uint32_t SymFlags;
-    std::string Name = I->Name.str();
+    std::string Name = S.Name.str();
     MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
     if (Demangle) {
-      if (Optional<std::string> Opt = demangle(I->Name, MachO))
+      if (Optional<std::string> Opt = demangle(S.Name, MachO))
         Name = *Opt;
     }
-    if (I->Sym.getRawDataRefImpl().p)
-      SymFlags = I->Sym.getFlags();
+    if (S.Sym.getRawDataRefImpl().p)
+      SymFlags = S.Sym.getFlags();
     else
-      SymFlags = I->SymFlags;
+      SymFlags = S.SymFlags;
 
     bool Undefined = SymFlags & SymbolRef::SF_Undefined;
     bool Global = SymFlags & SymbolRef::SF_Global;
     bool Weak = SymFlags & SymbolRef::SF_Weak;
     if ((!Undefined && UndefinedOnly) || (Undefined && DefinedOnly) ||
-        (!Global && ExternalOnly) || (SizeSort && !PrintAddress) ||
-        (Weak && NoWeakSymbols))
+        (!Global && ExternalOnly) || (Weak && NoWeakSymbols))
       continue;
     if (PrintFileName)
       writeFileName(outs());
@@ -810,32 +815,30 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
       continue;
     }
 
-    char SymbolAddrStr[18] = "";
-    char SymbolSizeStr[18] = "";
+    char SymbolAddrStr[23], SymbolSizeStr[23];
 
     // If the format is SysV or the symbol isn't defined, then print spaces.
-    if (OutputFormat == sysv || !symbolIsDefined(*I)) {
+    if (OutputFormat == sysv || !symbolIsDefined(S)) {
       if (OutputFormat == posix) {
-        format(printFormat, I->Address)
-          .print(SymbolAddrStr, sizeof(SymbolAddrStr));
-        format(printFormat, I->Size)
-            .print(SymbolSizeStr, sizeof(SymbolSizeStr));
+        format(printFormat, S.Address)
+            .print(SymbolAddrStr, sizeof(SymbolAddrStr));
+        format(printFormat, S.Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
       } else {
         strcpy(SymbolAddrStr, printBlanks);
         strcpy(SymbolSizeStr, printBlanks);
       }
     }
 
-    // Otherwise, print the symbol address and size.
-    if (symbolIsDefined(*I)) {
+    if (symbolIsDefined(S)) {
+      // Otherwise, print the symbol address and size.
       if (Obj.isIR())
         strcpy(SymbolAddrStr, printDashes);
-      else if(MachO && I->TypeChar == 'I')
+      else if (MachO && S.TypeChar == 'I')
         strcpy(SymbolAddrStr, printBlanks);
       else
-        format(printFormat, I->Address)
-          .print(SymbolAddrStr, sizeof(SymbolAddrStr));
-      format(printFormat, I->Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
+        format(printFormat, S.Address)
+            .print(SymbolAddrStr, sizeof(SymbolAddrStr));
+      format(printFormat, S.Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
     }
 
     // If OutputFormat is darwin or we are printing Mach-O symbols in hex and
@@ -844,43 +847,36 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
     // printing Mach-O symbols in hex and not a Mach-O object fall back to
     // OutputFormat bsd (see below).
     if ((OutputFormat == darwin || FormatMachOasHex) && (MachO || Obj.isIR())) {
-      darwinPrintSymbol(Obj, I, SymbolAddrStr, printBlanks, printDashes,
+      darwinPrintSymbol(Obj, S, SymbolAddrStr, printBlanks, printDashes,
                         printFormat);
     } else if (OutputFormat == posix) {
-      outs() << Name << " " << I->TypeChar << " ";
-      if (MachO)
-        outs() << SymbolAddrStr << " " << "0" /* SymbolSizeStr */ << "\n";
-      else
-        outs() << SymbolAddrStr << " " << SymbolSizeStr << "\n";
+      outs() << Name << " " << S.TypeChar << " " << SymbolAddrStr << " "
+             << (MachO ? "0" : SymbolSizeStr) << "\n";
     } else if (OutputFormat == bsd || (OutputFormat == darwin && !MachO)) {
       if (PrintAddress)
         outs() << SymbolAddrStr << ' ';
-      if (PrintSize) {
-        outs() << SymbolSizeStr;
-        outs() << ' ';
-      }
-      outs() << I->TypeChar;
-      if (I->TypeChar == '-' && MachO)
-        darwinPrintStab(MachO, I);
+      if (PrintSize)
+        outs() << SymbolSizeStr << ' ';
+      outs() << S.TypeChar;
+      if (S.TypeChar == '-' && MachO)
+        darwinPrintStab(MachO, S);
       outs() << " " << Name;
-      if (I->TypeChar == 'I' && MachO) {
+      if (S.TypeChar == 'I' && MachO) {
         outs() << " (indirect for ";
-        if (I->Sym.getRawDataRefImpl().p) {
+        if (S.Sym.getRawDataRefImpl().p) {
           StringRef IndirectName;
-          if (MachO->getIndirectName(I->Sym.getRawDataRefImpl(), IndirectName))
+          if (MachO->getIndirectName(S.Sym.getRawDataRefImpl(), IndirectName))
             outs() << "?)";
           else
             outs() << IndirectName << ")";
         } else
-          outs() << I->IndirectName << ")";
+          outs() << S.IndirectName << ")";
       }
       outs() << "\n";
     } else if (OutputFormat == sysv) {
-      std::string PaddedName(Name);
-      while (PaddedName.length() < 20)
-        PaddedName += " ";
-      outs() << PaddedName << "|" << SymbolAddrStr << "|   " << I->TypeChar
-             << "  |                  |" << SymbolSizeStr << "|     |\n";
+      outs() << left_justify(Name, 20) << "|" << SymbolAddrStr << "|   "
+             << S.TypeChar << "  |" << right_justify(S.TypeName, 18) << "|"
+             << SymbolSizeStr << "|     |" << S.SectionName << "\n";
     }
   }
 
@@ -898,44 +894,35 @@ static char getSymbolNMTypeChar(ELFObjectFileBase &Obj,
     return '?';
   }
 
+  uint8_t Binding = SymI->getBinding();
+  if (Binding == ELF::STB_GNU_UNIQUE)
+    return 'u';
+
+  assert(Binding != ELF::STB_WEAK && "STB_WEAK not tested in calling function");
+  if (Binding != ELF::STB_GLOBAL && Binding != ELF::STB_LOCAL)
+    return '?';
+
   elf_section_iterator SecI = *SecIOrErr;
   if (SecI != Obj.section_end()) {
-    switch (SecI->getType()) {
-    case ELF::SHT_PROGBITS:
-    case ELF::SHT_DYNAMIC:
-      switch (SecI->getFlags()) {
-      case (ELF::SHF_ALLOC | ELF::SHF_EXECINSTR):
-        return 't';
-      case (ELF::SHF_TLS | ELF::SHF_ALLOC | ELF::SHF_WRITE):
-      case (ELF::SHF_ALLOC | ELF::SHF_WRITE):
-        return 'd';
-      case ELF::SHF_ALLOC:
-      case (ELF::SHF_ALLOC | ELF::SHF_MERGE):
-      case (ELF::SHF_ALLOC | ELF::SHF_MERGE | ELF::SHF_STRINGS):
-        return 'r';
-      }
-      break;
-    case ELF::SHT_NOBITS:
-      return 'b';
-    case ELF::SHT_INIT_ARRAY:
-    case ELF::SHT_FINI_ARRAY:
+    uint32_t Type = SecI->getType();
+    uint64_t Flags = SecI->getFlags();
+    if (Flags & ELF::SHF_EXECINSTR)
       return 't';
-    }
-  }
+    if (Type == ELF::SHT_NOBITS)
+      return 'b';
+    if (Flags & ELF::SHF_ALLOC)
+      return Flags & ELF::SHF_WRITE ? 'd' : 'r';
 
-  if (SymI->getELFType() == ELF::STT_SECTION) {
-    Expected<StringRef> Name = SymI->getName();
-    if (!Name) {
-      consumeError(Name.takeError());
+    StringRef SecName;
+    if (SecI->getName(SecName))
       return '?';
-    }
-    return StringSwitch<char>(*Name)
-        .StartsWith(".debug", 'N')
-        .StartsWith(".note", 'n')
-        .Default('?');
+    if (SecName.startswith(".debug"))
+      return 'N';
+    if (!(Flags & ELF::SHF_WRITE))
+      return 'n';
   }
 
-  return 'n';
+  return '?';
 }
 
 static char getSymbolNMTypeChar(COFFObjectFile &Obj, symbol_iterator I) {
@@ -967,10 +954,9 @@ static char getSymbolNMTypeChar(COFFObjectFile &Obj, symbol_iterator I) {
     section_iterator SecI = *SecIOrErr;
     const coff_section *Section = Obj.getCOFFSection(*SecI);
     Characteristics = Section->Characteristics;
-    StringRef SectionName;
-    Obj.getSectionName(Section, SectionName);
-    if (SectionName.startswith(".idata"))
-      return 'i';
+    if (Expected<StringRef> NameOrErr = Obj.getSectionName(Section))
+      if (NameOrErr->startswith(".idata"))
+        return 'i';
   }
 
   switch (Symb.getSectionNumber()) {
@@ -1030,7 +1016,8 @@ static char getSymbolNMTypeChar(MachOObjectFile &Obj, basic_symbol_iterator I) {
       return 's';
     DataRefImpl Ref = Sec->getRawDataRefImpl();
     StringRef SectionName;
-    Obj.getSectionName(Ref, SectionName);
+    if (Expected<StringRef> NameOrErr = Obj.getSectionName(Ref))
+      SectionName = *NameOrErr;
     StringRef SegmentName = Obj.getSectionFinalSegmentName(Ref);
     if (Obj.is64Bit() && Obj.getHeader64().filetype == MachO::MH_KEXT_BUNDLE &&
         SegmentName == "__TEXT_EXEC" && SectionName == "__text")
@@ -1074,8 +1061,40 @@ static bool isObject(SymbolicFile &Obj, basic_symbol_iterator I) {
              : elf_symbol_iterator(I)->getELFType() == ELF::STT_OBJECT;
 }
 
-static char getNMTypeChar(SymbolicFile &Obj, basic_symbol_iterator I) {
+// For ELF object files, Set TypeName to the symbol typename, to be printed
+// in the 'Type' column of the SYSV format output.
+static StringRef getNMTypeName(SymbolicFile &Obj, basic_symbol_iterator I) {
+  if (isa<ELFObjectFileBase>(&Obj)) {
+    elf_symbol_iterator SymI(I);
+    return SymI->getELFTypeName();
+  }
+  return "";
+}
+
+// Return Posix nm class type tag (single letter), but also set SecName and
+// section and name, to be used in format=sysv output.
+static char getNMSectionTagAndName(SymbolicFile &Obj, basic_symbol_iterator I,
+                                   StringRef &SecName) {
   uint32_t Symflags = I->getFlags();
+  if (isa<ELFObjectFileBase>(&Obj)) {
+    if (Symflags & object::SymbolRef::SF_Absolute)
+      SecName = "*ABS*";
+    else if (Symflags & object::SymbolRef::SF_Common)
+      SecName = "*COM*";
+    else if (Symflags & object::SymbolRef::SF_Undefined)
+      SecName = "*UND*";
+    else {
+      elf_symbol_iterator SymI(I);
+      Expected<elf_section_iterator> SecIOrErr = SymI->getSection();
+      if (!SecIOrErr) {
+        consumeError(SecIOrErr.takeError());
+        return '?';
+      }
+      elf_section_iterator secT = *SecIOrErr;
+      secT->getName(SecName);
+    }
+  }
+
   if ((Symflags & object::SymbolRef::SF_Weak) && !isa<MachOObjectFile>(Obj)) {
     char Ret = isObject(Obj, I) ? 'v' : 'w';
     return (!(Symflags & object::SymbolRef::SF_Undefined)) ? toupper(Ret) : Ret;
@@ -1103,10 +1122,13 @@ static char getNMTypeChar(SymbolicFile &Obj, basic_symbol_iterator I) {
   else
     Ret = getSymbolNMTypeChar(cast<ELFObjectFileBase>(Obj), I);
 
-  if (Symflags & object::SymbolRef::SF_Global)
-    Ret = toupper(Ret);
+  if (!(Symflags & object::SymbolRef::SF_Global))
+    return Ret;
 
-  return Ret;
+  if (Obj.isELF() && ELFSymbolRef(*I).getBinding() == ELF::STB_GNU_UNIQUE)
+    return Ret;
+
+  return toupper(Ret);
 }
 
 // getNsectForSegSect() is used to implement the Mach-O "-s segname sectname"
@@ -1120,7 +1142,8 @@ static unsigned getNsectForSegSect(MachOObjectFile *Obj) {
   for (auto &S : Obj->sections()) {
     DataRefImpl Ref = S.getRawDataRefImpl();
     StringRef SectionName;
-    Obj->getSectionName(Ref, SectionName);
+    if (Expected<StringRef> NameOrErr = Obj->getSectionName(Ref))
+      SectionName = *NameOrErr;
     StringRef SegmentName = Obj->getSectionFinalSegmentName(Ref);
     if (SegmentName == SegSect[0] && SectionName == SegSect[1])
       return Nsect;
@@ -1155,9 +1178,7 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
       error("File format has no dynamic symbol table", Obj.getFileName());
       return;
     }
-    auto DynSymbols = E->getDynamicSymbolIterators();
-    Symbols =
-        make_range<basic_symbol_iterator>(DynSymbols.begin(), DynSymbols.end());
+    Symbols = E->getDynamicSymbolIterators();
   }
   std::string NameBuffer;
   raw_string_ostream OS(NameBuffer);
@@ -1186,10 +1207,8 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
       NMSymbol S = {};
       S.Size = 0;
       S.Address = 0;
-      if (PrintSize) {
-        if (isa<ELFObjectFileBase>(&Obj))
-          S.Size = ELFSymbolRef(Sym).getSize();
-      }
+      if (isa<ELFObjectFileBase>(&Obj))
+        S.Size = ELFSymbolRef(Sym).getSize();
       if (PrintAddress && isa<ObjectFile>(Obj)) {
         SymbolRef SymRef(Sym);
         Expected<uint64_t> AddressOrErr = SymRef.getAddress();
@@ -1199,12 +1218,15 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
         }
         S.Address = *AddressOrErr;
       }
-      S.TypeChar = getNMTypeChar(Obj, Sym);
-      std::error_code EC = Sym.printName(OS);
-      if (EC && MachO)
-        OS << "bad string index";
-      else
-        error(EC);
+      S.TypeName = getNMTypeName(Obj, Sym);
+      S.TypeChar = getNMSectionTagAndName(Obj, Sym, S.SectionName);
+      if (Error E = Sym.printName(OS)) {
+        if (MachO) {
+          OS << "bad string index";
+          consumeError(std::move(E));
+        } else
+          error(std::move(E), Obj.getFileName());
+      }
       OS << '\0';
       S.Sym = Sym;
       SymbolList.push_back(S);
@@ -1270,11 +1292,12 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
         bool found = false;
         bool ReExport = false;
         if (!DyldInfoOnly) {
-          for (unsigned J = 0; J < SymbolList.size() && !found; ++J) {
-            if (SymbolList[J].Address == Entry.address() + BaseSegmentAddress &&
-                SymbolList[J].Name == Entry.name())
+          for (const NMSymbol &S : SymbolList)
+            if (S.Address == Entry.address() + BaseSegmentAddress &&
+                S.Name == Entry.name()) {
               found = true;
-          }
+              break;
+            }
         }
         if (!found) {
           NMSymbol S = {};
@@ -1445,7 +1468,6 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
           B.NType = MachO::N_EXT | MachO::N_UNDF;
           B.NSect = 0;
           B.NDesc = 0;
-          B.NDesc = 0;
           MachO::SET_LIBRARY_ORDINAL(B.NDesc, Entry.ordinal());
           B.IndirectName = StringRef();
           B.Name = Entry.symbolName();
@@ -1735,8 +1757,9 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
     return;
 
   LLVMContext Context;
-  Expected<std::unique_ptr<Binary>> BinaryOrErr = createBinary(
-      BufferOrErr.get()->getMemBufferRef(), NoLLVMBitcode ? nullptr : &Context);
+  LLVMContext *ContextPtr = NoLLVMBitcode ? nullptr : &Context;
+  Expected<std::unique_ptr<Binary>> BinaryOrErr =
+      createBinary(BufferOrErr.get()->getMemBufferRef(), ContextPtr);
   if (!BinaryOrErr) {
     error(BinaryOrErr.takeError(), Filename);
     return;
@@ -1770,7 +1793,8 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
     {
       Error Err = Error::success();
       for (auto &C : A->children(Err)) {
-        Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary(&Context);
+        Expected<std::unique_ptr<Binary>> ChildOrErr =
+            C.getAsBinary(ContextPtr);
         if (!ChildOrErr) {
           if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
             error(std::move(E), Filename, C);
@@ -1841,7 +1865,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
               Error Err = Error::success();
               for (auto &C : A->children(Err)) {
                 Expected<std::unique_ptr<Binary>> ChildOrErr =
-                    C.getAsBinary(&Context);
+                    C.getAsBinary(ContextPtr);
                 if (!ChildOrErr) {
                   if (auto E = isNotObjectErrorInvalidFileType(
                                        ChildOrErr.takeError())) {
@@ -1912,7 +1936,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
             Error Err = Error::success();
             for (auto &C : A->children(Err)) {
               Expected<std::unique_ptr<Binary>> ChildOrErr =
-                  C.getAsBinary(&Context);
+                  C.getAsBinary(ContextPtr);
               if (!ChildOrErr) {
                 if (auto E = isNotObjectErrorInvalidFileType(
                                      ChildOrErr.takeError()))
@@ -1946,10 +1970,8 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
     // Either all architectures have been specified or none have been specified
     // and this does not contain the host architecture so dump all the slices.
     bool moreThanOneArch = UB->getNumberOfObjects() > 1;
-    for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
-                                               E = UB->end_objects();
-         I != E; ++I) {
-      Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
+    for (const MachOUniversalBinary::ObjectForArch &O : UB->objects()) {
+      Expected<std::unique_ptr<ObjectFile>> ObjOrErr = O.getAsObjectFile();
       std::string ArchiveName;
       std::string ArchitectureName;
       ArchiveName.clear();
@@ -1958,28 +1980,28 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
         ObjectFile &Obj = *ObjOrErr.get();
         if (PrintFileName) {
           if (isa<MachOObjectFile>(Obj) && moreThanOneArch)
-            ArchitectureName = I->getArchFlagName();
+            ArchitectureName = O.getArchFlagName();
         } else {
           if (moreThanOneArch)
             outs() << "\n";
           outs() << Obj.getFileName();
           if (isa<MachOObjectFile>(Obj) && moreThanOneArch)
-            outs() << " (for architecture " << I->getArchFlagName() << ")";
+            outs() << " (for architecture " << O.getArchFlagName() << ")";
           outs() << ":\n";
         }
         dumpSymbolNamesFromObject(Obj, false, ArchiveName, ArchitectureName);
       } else if (auto E = isNotObjectErrorInvalidFileType(
                  ObjOrErr.takeError())) {
         error(std::move(E), Filename, moreThanOneArch ?
-              StringRef(I->getArchFlagName()) : StringRef());
+              StringRef(O.getArchFlagName()) : StringRef());
         continue;
       } else if (Expected<std::unique_ptr<Archive>> AOrErr =
-                  I->getAsArchive()) {
+                  O.getAsArchive()) {
         std::unique_ptr<Archive> &A = *AOrErr;
         Error Err = Error::success();
         for (auto &C : A->children(Err)) {
           Expected<std::unique_ptr<Binary>> ChildOrErr =
-            C.getAsBinary(&Context);
+            C.getAsBinary(ContextPtr);
           if (!ChildOrErr) {
             if (auto E = isNotObjectErrorInvalidFileType(
                                  ChildOrErr.takeError()))
@@ -1987,23 +2009,23 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
                     StringRef(ArchitectureName) : StringRef());
             continue;
           }
-          if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
+          if (SymbolicFile *F = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
             if (PrintFileName) {
               ArchiveName = A->getFileName();
-              if (isa<MachOObjectFile>(O) && moreThanOneArch)
-                ArchitectureName = I->getArchFlagName();
+              if (isa<MachOObjectFile>(F) && moreThanOneArch)
+                ArchitectureName = O.getArchFlagName();
             } else {
               outs() << "\n" << A->getFileName();
-              if (isa<MachOObjectFile>(O)) {
-                outs() << "(" << O->getFileName() << ")";
+              if (isa<MachOObjectFile>(F)) {
+                outs() << "(" << F->getFileName() << ")";
                 if (moreThanOneArch)
-                  outs() << " (for architecture " << I->getArchFlagName()
+                  outs() << " (for architecture " << O.getArchFlagName()
                          << ")";
               } else
-                outs() << ":" << O->getFileName();
+                outs() << ":" << F->getFileName();
               outs() << ":\n";
             }
-            dumpSymbolNamesFromObject(*O, false, ArchiveName, ArchitectureName);
+            dumpSymbolNamesFromObject(*F, false, ArchiveName, ArchitectureName);
           }
         }
         if (Err)
@@ -2011,7 +2033,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
       } else {
         consumeError(AOrErr.takeError());
         error(Filename + " for architecture " +
-              StringRef(I->getArchFlagName()) +
+              StringRef(O.getArchFlagName()) +
               " is not a Mach-O file or an archive file",
               "Mach-O universal file");
       }
@@ -2021,7 +2043,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
   if (SymbolicFile *O = dyn_cast<SymbolicFile>(&Bin)) {
     if (!MachOPrintSizeWarning && PrintSize &&  isa<MachOObjectFile>(O)) {
       WithColor::warning(errs(), ToolName)
-          << "sizes with -print-size for Mach-O files are always zero.\n";
+          << "sizes with --print-size for Mach-O files are always zero.\n";
       MachOPrintSizeWarning = true;
     }
     if (!checkMachOAndArchFlags(O, Filename))
@@ -2032,6 +2054,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
 
 int main(int argc, char **argv) {
   InitLLVM X(argc, argv);
+  cl::HideUnrelatedOptions(NMCat);
   cl::ParseCommandLineOptions(argc, argv, "llvm symbol table dumper\n");
 
   // llvm-nm only reads binary files.
@@ -2063,13 +2086,17 @@ int main(int argc, char **argv) {
   if (InputFilenames.size() > 1)
     MultipleFiles = true;
 
+  // If both --demangle and --no-demangle are specified then pick the last one.
+  if (NoDemangle.getPosition() > Demangle.getPosition())
+    Demangle = !NoDemangle;
+
   for (unsigned i = 0; i < ArchFlags.size(); ++i) {
     if (ArchFlags[i] == "all") {
       ArchAll = true;
     } else {
       if (!MachOObjectFile::isValidArch(ArchFlags[i]))
         error("Unknown architecture named '" + ArchFlags[i] + "'",
-              "for the -arch option");
+              "for the --arch option");
     }
   }
 
@@ -2078,7 +2105,7 @@ int main(int argc, char **argv) {
           "for the -s option");
 
   if (NoDyldInfo && (AddDyldInfo || DyldInfoOnly))
-    error("-no-dyldinfo can't be used with -add-dyldinfo or -dyldinfo-only");
+    error("--no-dyldinfo can't be used with --add-dyldinfo or --dyldinfo-only");
 
   llvm::for_each(InputFilenames, dumpSymbolNamesFromFile);
 
diff --git a/tools/llvm-objcopy/Buffer.cpp b/tools/llvm-objcopy/Buffer.cpp
index 8044b023aaad..06b2a20a762f 100644
--- a/tools/llvm-objcopy/Buffer.cpp
+++ b/tools/llvm-objcopy/Buffer.cpp
@@ -1,16 +1,16 @@
 //===- Buffer.cpp ---------------------------------------------------------===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "Buffer.h"
-#include "llvm-objcopy.h"
 #include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Process.h"
 #include <memory>
 
 namespace llvm {
@@ -18,23 +18,51 @@ namespace objcopy {
 
 Buffer::~Buffer() {}
 
-void FileBuffer::allocate(size_t Size) {
+static Error createEmptyFile(StringRef FileName) {
+  // Create an empty tempfile and atomically swap it in place with the desired
+  // output file.
+  Expected<sys::fs::TempFile> Temp =
+      sys::fs::TempFile::create(FileName + ".temp-empty-%%%%%%%");
+  return Temp ? Temp->keep(FileName) : Temp.takeError();
+}
+
+Error FileBuffer::allocate(size_t Size) {
+  // When a 0-sized file is requested, skip allocation but defer file
+  // creation/truncation until commit() to avoid side effects if something
+  // happens between allocate() and commit().
+  if (Size == 0) {
+    EmptyFile = true;
+    return Error::success();
+  }
+
   Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
       FileOutputBuffer::create(getName(), Size, FileOutputBuffer::F_executable);
-  handleAllErrors(BufferOrErr.takeError(), [this](const ErrorInfoBase &E) {
-    error("failed to open " + getName() + ": " + E.message());
-  });
+  // FileOutputBuffer::create() returns an Error that is just a wrapper around
+  // std::error_code. Wrap it in FileError to include the actual filename.
+  if (!BufferOrErr)
+    return createFileError(getName(), BufferOrErr.takeError());
   Buf = std::move(*BufferOrErr);
+  return Error::success();
 }
 
-Error FileBuffer::commit() { return Buf->commit(); }
+Error FileBuffer::commit() {
+  if (EmptyFile)
+    return createEmptyFile(getName());
+
+  assert(Buf && "allocate() not called before commit()!");
+  Error Err = Buf->commit();
+  // FileOutputBuffer::commit() returns an Error that is just a wrapper around
+  // std::error_code. Wrap it in FileError to include the actual filename.
+  return Err ? createFileError(getName(), std::move(Err)) : std::move(Err);
+}
 
 uint8_t *FileBuffer::getBufferStart() {
   return reinterpret_cast<uint8_t *>(Buf->getBufferStart());
 }
 
-void MemBuffer::allocate(size_t Size) {
+Error MemBuffer::allocate(size_t Size) {
   Buf = WritableMemoryBuffer::getNewMemBuffer(Size, getName());
+  return Error::success();
 }
 
 Error MemBuffer::commit() { return Error::success(); }
diff --git a/tools/llvm-objcopy/Buffer.h b/tools/llvm-objcopy/Buffer.h
index e5b9c5b2d22b..487d5585c364 100644
--- a/tools/llvm-objcopy/Buffer.h
+++ b/tools/llvm-objcopy/Buffer.h
@@ -1,9 +1,8 @@
 //===- Buffer.h -------------------------------------------------*- C++ -*-===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -28,7 +27,7 @@ class Buffer {
 
 public:
   virtual ~Buffer();
-  virtual void allocate(size_t Size) = 0;
+  virtual Error allocate(size_t Size) = 0;
   virtual uint8_t *getBufferStart() = 0;
   virtual Error commit() = 0;
 
@@ -38,9 +37,12 @@ public:
 
 class FileBuffer : public Buffer {
   std::unique_ptr<FileOutputBuffer> Buf;
+  // Indicates that allocate(0) was called, and commit() should create or
+  // truncate a file instead of using a FileOutputBuffer.
+  bool EmptyFile = false;
 
 public:
-  void allocate(size_t Size) override;
+  Error allocate(size_t Size) override;
   uint8_t *getBufferStart() override;
   Error commit() override;
 
@@ -51,7 +53,7 @@ class MemBuffer : public Buffer {
   std::unique_ptr<WritableMemoryBuffer> Buf;
 
 public:
-  void allocate(size_t Size) override;
+  Error allocate(size_t Size) override;
   uint8_t *getBufferStart() override;
   Error commit() override;
 
diff --git a/tools/llvm-objcopy/COFF/COFFObjcopy.cpp b/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
index 6b386d29979c..4ae46851a66f 100644
--- a/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
+++ b/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
@@ -1,9 +1,8 @@
 //===- COFFObjcopy.cpp ----------------------------------------------------===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -18,6 +17,8 @@
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Support/Errc.h"
+#include "llvm/Support/JamCRC.h"
+#include "llvm/Support/Path.h"
 #include <cassert>
 
 namespace llvm {
@@ -27,14 +28,104 @@ namespace coff {
 using namespace object;
 using namespace COFF;
 
+static bool isDebugSection(const Section &Sec) {
+  return Sec.Name.startswith(".debug");
+}
+
+static uint64_t getNextRVA(const Object &Obj) {
+  if (Obj.getSections().empty())
+    return 0;
+  const Section &Last = Obj.getSections().back();
+  return alignTo(Last.Header.VirtualAddress + Last.Header.VirtualSize,
+                 Obj.IsPE ? Obj.PeHeader.SectionAlignment : 1);
+}
+
+static uint32_t getCRC32(StringRef Data) {
+  JamCRC CRC;
+  CRC.update(ArrayRef<char>(Data.data(), Data.size()));
+  // The CRC32 value needs to be complemented because the JamCRC dosn't
+  // finalize the CRC32 value. It also dosn't negate the initial CRC32 value
+  // but it starts by default at 0xFFFFFFFF which is the complement of zero.
+  return ~CRC.getCRC();
+}
+
+static std::vector<uint8_t> createGnuDebugLinkSectionContents(StringRef File) {
+  ErrorOr<std::unique_ptr<MemoryBuffer>> LinkTargetOrErr =
+      MemoryBuffer::getFile(File);
+  if (!LinkTargetOrErr)
+    error("'" + File + "': " + LinkTargetOrErr.getError().message());
+  auto LinkTarget = std::move(*LinkTargetOrErr);
+  uint32_t CRC32 = getCRC32(LinkTarget->getBuffer());
+
+  StringRef FileName = sys::path::filename(File);
+  size_t CRCPos = alignTo(FileName.size() + 1, 4);
+  std::vector<uint8_t> Data(CRCPos + 4);
+  memcpy(Data.data(), FileName.data(), FileName.size());
+  support::endian::write32le(Data.data() + CRCPos, CRC32);
+  return Data;
+}
+
+static void addGnuDebugLink(Object &Obj, StringRef DebugLinkFile) {
+  uint32_t StartRVA = getNextRVA(Obj);
+
+  std::vector<Section> Sections;
+  Section Sec;
+  Sec.setOwnedContents(createGnuDebugLinkSectionContents(DebugLinkFile));
+  Sec.Name = ".gnu_debuglink";
+  Sec.Header.VirtualSize = Sec.getContents().size();
+  Sec.Header.VirtualAddress = StartRVA;
+  Sec.Header.SizeOfRawData = alignTo(Sec.Header.VirtualSize,
+                                     Obj.IsPE ? Obj.PeHeader.FileAlignment : 1);
+  // Sec.Header.PointerToRawData is filled in by the writer.
+  Sec.Header.PointerToRelocations = 0;
+  Sec.Header.PointerToLinenumbers = 0;
+  // Sec.Header.NumberOfRelocations is filled in by the writer.
+  Sec.Header.NumberOfLinenumbers = 0;
+  Sec.Header.Characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA |
+                               IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_DISCARDABLE;
+  Sections.push_back(Sec);
+  Obj.addSections(Sections);
+}
+
 static Error handleArgs(const CopyConfig &Config, Object &Obj) {
+  // Perform the actual section removals.
+  Obj.removeSections([&Config](const Section &Sec) {
+    // Contrary to --only-keep-debug, --only-section fully removes sections that
+    // aren't mentioned.
+    if (!Config.OnlySection.empty() &&
+        !is_contained(Config.OnlySection, Sec.Name))
+      return true;
+
+    if (Config.StripDebug || Config.StripAll || Config.StripAllGNU ||
+        Config.DiscardMode == DiscardType::All || Config.StripUnneeded) {
+      if (isDebugSection(Sec) &&
+          (Sec.Header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) != 0)
+        return true;
+    }
+
+    if (is_contained(Config.ToRemove, Sec.Name))
+      return true;
+
+    return false;
+  });
+
+  if (Config.OnlyKeepDebug) {
+    // For --only-keep-debug, we keep all other sections, but remove their
+    // content. The VirtualSize field in the section header is kept intact.
+    Obj.truncateSections([](const Section &Sec) {
+      return !isDebugSection(Sec) && Sec.Name != ".buildid" &&
+             ((Sec.Header.Characteristics &
+               (IMAGE_SCN_CNT_CODE | IMAGE_SCN_CNT_INITIALIZED_DATA)) != 0);
+    });
+  }
+
   // StripAll removes all symbols and thus also removes all relocations.
   if (Config.StripAll || Config.StripAllGNU)
-    for (Section &Sec : Obj.Sections)
+    for (Section &Sec : Obj.getMutableSections())
       Sec.Relocs.clear();
 
   // If we need to do per-symbol removals, initialize the Referenced field.
-  if (Config.StripUnneeded || Config.DiscardAll ||
+  if (Config.StripUnneeded || Config.DiscardMode == DiscardType::All ||
       !Config.SymbolsToRemove.empty())
     if (Error E = Obj.markSymbols())
       return E;
@@ -50,47 +141,74 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) {
       // Explicitly removing a referenced symbol is an error.
       if (Sym.Referenced)
         reportError(Config.OutputFilename,
-                    make_error<StringError>(
-                        "not stripping symbol '" + Sym.Name +
-                            "' because it is named in a relocation.",
-                        llvm::errc::invalid_argument));
+                    createStringError(llvm::errc::invalid_argument,
+                                      "not stripping symbol '%s' because it is "
+                                      "named in a relocation",
+                                      Sym.Name.str().c_str()));
       return true;
     }
 
     if (!Sym.Referenced) {
       // With --strip-unneeded, GNU objcopy removes all unreferenced local
       // symbols, and any unreferenced undefined external.
-      if (Config.StripUnneeded &&
-          (Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC ||
-           Sym.Sym.SectionNumber == 0))
-        return true;
+      // With --strip-unneeded-symbol we strip only specific unreferenced
+      // local symbol instead of removing all of such.
+      if (Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC ||
+          Sym.Sym.SectionNumber == 0)
+        if (Config.StripUnneeded ||
+            is_contained(Config.UnneededSymbolsToRemove, Sym.Name))
+          return true;
 
       // GNU objcopy keeps referenced local symbols and external symbols
       // if --discard-all is set, similar to what --strip-unneeded does,
       // but undefined local symbols are kept when --discard-all is set.
-      if (Config.DiscardAll && Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC &&
+      if (Config.DiscardMode == DiscardType::All &&
+          Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC &&
           Sym.Sym.SectionNumber != 0)
         return true;
     }
 
     return false;
   });
+
+  if (!Config.AddGnuDebugLink.empty())
+    addGnuDebugLink(Obj, Config.AddGnuDebugLink);
+
+  if (Config.AllowBrokenLinks || !Config.BuildIdLinkDir.empty() ||
+      Config.BuildIdLinkInput || Config.BuildIdLinkOutput ||
+      !Config.SplitDWO.empty() || !Config.SymbolsPrefix.empty() ||
+      !Config.AllocSectionsPrefix.empty() || !Config.AddSection.empty() ||
+      !Config.DumpSection.empty() || !Config.KeepSection.empty() ||
+      !Config.SymbolsToGlobalize.empty() || !Config.SymbolsToKeep.empty() ||
+      !Config.SymbolsToLocalize.empty() || !Config.SymbolsToWeaken.empty() ||
+      !Config.SymbolsToKeepGlobal.empty() || !Config.SectionsToRename.empty() ||
+      !Config.SetSectionFlags.empty() || !Config.SymbolsToRename.empty() ||
+      Config.ExtractDWO || Config.KeepFileSymbols || Config.LocalizeHidden ||
+      Config.PreserveDates || Config.StripDWO || Config.StripNonAlloc ||
+      Config.StripSections || Config.Weaken || Config.DecompressDebugSections ||
+      Config.DiscardMode == DiscardType::Locals ||
+      !Config.SymbolsToAdd.empty() || Config.EntryExpr) {
+    return createStringError(llvm::errc::invalid_argument,
+                             "option not supported by llvm-objcopy for COFF");
+  }
+
   return Error::success();
 }
 
-void executeObjcopyOnBinary(const CopyConfig &Config,
-                            object::COFFObjectFile &In, Buffer &Out) {
+Error executeObjcopyOnBinary(const CopyConfig &Config, COFFObjectFile &In,
+                             Buffer &Out) {
   COFFReader Reader(In);
   Expected<std::unique_ptr<Object>> ObjOrErr = Reader.create();
   if (!ObjOrErr)
-    reportError(Config.InputFilename, ObjOrErr.takeError());
+    return createFileError(Config.InputFilename, ObjOrErr.takeError());
   Object *Obj = ObjOrErr->get();
   assert(Obj && "Unable to deserialize COFF object");
   if (Error E = handleArgs(Config, *Obj))
-    reportError(Config.InputFilename, std::move(E));
+    return createFileError(Config.InputFilename, std::move(E));
   COFFWriter Writer(*Obj, Out);
   if (Error E = Writer.write())
-    reportError(Config.OutputFilename, std::move(E));
+    return createFileError(Config.OutputFilename, std::move(E));
+  return Error::success();
 }
 
 } // end namespace coff
diff --git a/tools/llvm-objcopy/COFF/COFFObjcopy.h b/tools/llvm-objcopy/COFF/COFFObjcopy.h
index bf70bd9b4d84..858759e52c4a 100644
--- a/tools/llvm-objcopy/COFF/COFFObjcopy.h
+++ b/tools/llvm-objcopy/COFF/COFFObjcopy.h
@@ -1,9 +1,8 @@
 //===- COFFObjcopy.h --------------------------------------------*- C++ -*-===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -11,6 +10,7 @@
 #define LLVM_TOOLS_OBJCOPY_COFFOBJCOPY_H
 
 namespace llvm {
+class Error;
 
 namespace object {
 class COFFObjectFile;
@@ -21,8 +21,8 @@ struct CopyConfig;
 class Buffer;
 
 namespace coff {
-void executeObjcopyOnBinary(const CopyConfig &Config,
-                            object::COFFObjectFile &In, Buffer &Out);
+Error executeObjcopyOnBinary(const CopyConfig &Config,
+                             object::COFFObjectFile &In, Buffer &Out);
 
 } // end namespace coff
 } // end namespace objcopy
diff --git a/tools/llvm-objcopy/COFF/Object.cpp b/tools/llvm-objcopy/COFF/Object.cpp
index 315d3a778623..b07532c1dc39 100644
--- a/tools/llvm-objcopy/COFF/Object.cpp
+++ b/tools/llvm-objcopy/COFF/Object.cpp
@@ -1,13 +1,13 @@
 //===- Object.cpp ---------------------------------------------------------===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "Object.h"
+#include "llvm/ADT/DenseSet.h"
 #include <algorithm>
 
 namespace llvm {
@@ -26,12 +26,8 @@ void Object::addSymbols(ArrayRef<Symbol> NewSymbols) {
 
 void Object::updateSymbols() {
   SymbolMap = DenseMap<size_t, Symbol *>(Symbols.size());
-  size_t RawSymIndex = 0;
-  for (Symbol &Sym : Symbols) {
+  for (Symbol &Sym : Symbols)
     SymbolMap[Sym.UniqueId] = &Sym;
-    Sym.RawIndex = RawSymIndex;
-    RawSymIndex += 1 + Sym.Sym.NumberOfAuxSymbols;
-  }
 }
 
 const Symbol *Object::findSymbol(size_t UniqueId) const {
@@ -56,15 +52,86 @@ Error Object::markSymbols() {
     for (const Relocation &R : Sec.Relocs) {
       auto It = SymbolMap.find(R.Target);
       if (It == SymbolMap.end())
-        return make_error<StringError>("Relocation target " + Twine(R.Target) +
-                                           " not found",
-                                       object_error::invalid_symbol_index);
+        return createStringError(object_error::invalid_symbol_index,
+                                 "relocation target %zu not found", R.Target);
       It->second->Referenced = true;
     }
   }
   return Error::success();
 }
 
+void Object::addSections(ArrayRef<Section> NewSections) {
+  for (Section S : NewSections) {
+    S.UniqueId = NextSectionUniqueId++;
+    Sections.emplace_back(S);
+  }
+  updateSections();
+}
+
+void Object::updateSections() {
+  SectionMap = DenseMap<ssize_t, Section *>(Sections.size());
+  size_t Index = 1;
+  for (Section &S : Sections) {
+    SectionMap[S.UniqueId] = &S;
+    S.Index = Index++;
+  }
+}
+
+const Section *Object::findSection(ssize_t UniqueId) const {
+  auto It = SectionMap.find(UniqueId);
+  if (It == SectionMap.end())
+    return nullptr;
+  return It->second;
+}
+
+void Object::removeSections(function_ref<bool(const Section &)> ToRemove) {
+  DenseSet<ssize_t> AssociatedSections;
+  auto RemoveAssociated = [&AssociatedSections](const Section &Sec) {
+    return AssociatedSections.count(Sec.UniqueId) == 1;
+  };
+  do {
+    DenseSet<ssize_t> RemovedSections;
+    Sections.erase(
+        std::remove_if(std::begin(Sections), std::end(Sections),
+                       [ToRemove, &RemovedSections](const Section &Sec) {
+                         bool Remove = ToRemove(Sec);
+                         if (Remove)
+                           RemovedSections.insert(Sec.UniqueId);
+                         return Remove;
+                       }),
+        std::end(Sections));
+    // Remove all symbols referring to the removed sections.
+    AssociatedSections.clear();
+    Symbols.erase(
+        std::remove_if(
+            std::begin(Symbols), std::end(Symbols),
+            [&RemovedSections, &AssociatedSections](const Symbol &Sym) {
+              // If there are sections that are associative to a removed
+              // section,
+              // remove those as well as nothing will include them (and we can't
+              // leave them dangling).
+              if (RemovedSections.count(Sym.AssociativeComdatTargetSectionId) ==
+                  1)
+                AssociatedSections.insert(Sym.TargetSectionId);
+              return RemovedSections.count(Sym.TargetSectionId) == 1;
+            }),
+        std::end(Symbols));
+    ToRemove = RemoveAssociated;
+  } while (!AssociatedSections.empty());
+  updateSections();
+  updateSymbols();
+}
+
+void Object::truncateSections(function_ref<bool(const Section &)> ToTruncate) {
+  for (Section &Sec : Sections) {
+    if (ToTruncate(Sec)) {
+      Sec.clearContents();
+      Sec.Relocs.clear();
+      Sec.Header.SizeOfRawData = 0;
+    }
+  }
+}
+
 } // end namespace coff
 } // end namespace objcopy
 } // end namespace llvm
diff --git a/tools/llvm-objcopy/COFF/Object.h b/tools/llvm-objcopy/COFF/Object.h
index 7531fb4cf39e..21475b068629 100644
--- a/tools/llvm-objcopy/COFF/Object.h
+++ b/tools/llvm-objcopy/COFF/Object.h
@@ -1,9 +1,8 @@
 //===- Object.h -------------------------------------------------*- C++ -*-===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -12,6 +11,7 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/BinaryFormat/COFF.h"
@@ -35,15 +35,58 @@ struct Relocation {
 
 struct Section {
   object::coff_section Header;
-  ArrayRef<uint8_t> Contents;
   std::vector<Relocation> Relocs;
   StringRef Name;
+  ssize_t UniqueId;
+  size_t Index;
+
+  ArrayRef<uint8_t> getContents() const {
+    if (!OwnedContents.empty())
+      return OwnedContents;
+    return ContentsRef;
+  }
+
+  void setContentsRef(ArrayRef<uint8_t> Data) {
+    OwnedContents.clear();
+    ContentsRef = Data;
+  }
+
+  void setOwnedContents(std::vector<uint8_t> &&Data) {
+    ContentsRef = ArrayRef<uint8_t>();
+    OwnedContents = std::move(Data);
+  }
+
+  void clearContents() {
+    ContentsRef = ArrayRef<uint8_t>();
+    OwnedContents.clear();
+  }
+
+private:
+  ArrayRef<uint8_t> ContentsRef;
+  std::vector<uint8_t> OwnedContents;
+};
+
+struct AuxSymbol {
+  AuxSymbol(ArrayRef<uint8_t> In) {
+    assert(In.size() == sizeof(Opaque));
+    std::copy(In.begin(), In.end(), Opaque);
+  }
+
+  ArrayRef<uint8_t> getRef() const {
+    return ArrayRef<uint8_t>(Opaque, sizeof(Opaque));
+  }
+
+  uint8_t Opaque[sizeof(object::coff_symbol16)];
 };
 
 struct Symbol {
   object::coff_symbol32 Sym;
   StringRef Name;
-  ArrayRef<uint8_t> AuxData;
+  std::vector<AuxSymbol> AuxData;
+  StringRef AuxFile;
+  ssize_t TargetSectionId;
+  ssize_t AssociativeComdatTargetSectionId = 0;
+  Optional<size_t> WeakTargetSymbolId;
   size_t UniqueId;
   size_t RawIndex;
   bool Referenced;
@@ -62,7 +105,6 @@ struct Object {
   uint32_t BaseOfData = 0; // pe32plus_header lacks this field.
 
   std::vector<object::data_directory> DataDirectories;
-  std::vector<Section> Sections;
 
   ArrayRef<Symbol> getSymbols() const { return Symbols; }
   // This allows mutating individual Symbols, but not mutating the list
@@ -80,14 +122,35 @@ struct Object {
   // all sections.
   Error markSymbols();
 
+  ArrayRef<Section> getSections() const { return Sections; }
+  // This allows mutating individual Sections, but not mutating the list
+  // of symbols itself.
+  iterator_range<std::vector<Section>::iterator> getMutableSections() {
+    return make_range(Sections.begin(), Sections.end());
+  }
+
+  const Section *findSection(ssize_t UniqueId) const;
+
+  void addSections(ArrayRef<Section> NewSections);
+  void removeSections(function_ref<bool(const Section &)> ToRemove);
+  void truncateSections(function_ref<bool(const Section &)> ToTruncate);
+
 private:
   std::vector<Symbol> Symbols;
   DenseMap<size_t, Symbol *> SymbolMap;
 
   size_t NextSymbolUniqueId = 0;
 
-  // Update SymbolMap and RawIndex in each Symbol.
+  std::vector<Section> Sections;
+  DenseMap<ssize_t, Section *> SectionMap;
+
+  ssize_t NextSectionUniqueId = 1; // Allow a UniqueId 0 to mean undefined.
+
+  // Update SymbolMap.
   void updateSymbols();
+
+  // Update SectionMap and Index in each Section.
+  void updateSections();
 };
 
 // Copy between coff_symbol16 and coff_symbol32.
diff --git a/tools/llvm-objcopy/COFF/Reader.cpp b/tools/llvm-objcopy/COFF/Reader.cpp
index a01768392d7d..1f0ec9fa9691 100644
--- a/tools/llvm-objcopy/COFF/Reader.cpp
+++ b/tools/llvm-objcopy/COFF/Reader.cpp
@@ -1,17 +1,16 @@
 //===- Reader.cpp ---------------------------------------------------------===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "Reader.h"
 #include "Object.h"
-#include "llvm-objcopy.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cstddef>
@@ -22,6 +21,7 @@ namespace objcopy {
 namespace coff {
 
 using namespace object;
+using namespace COFF;
 
 Error COFFReader::readExecutableHeaders(Object &Obj) const {
   const dos_header *DH = COFFObj.getDOSHeader();
@@ -59,31 +59,38 @@ Error COFFReader::readExecutableHeaders(Object &Obj) const {
 }
 
 Error COFFReader::readSections(Object &Obj) const {
+  std::vector<Section> Sections;
   // Section indexing starts from 1.
   for (size_t I = 1, E = COFFObj.getNumberOfSections(); I <= E; I++) {
     const coff_section *Sec;
     if (auto EC = COFFObj.getSection(I, Sec))
       return errorCodeToError(EC);
-    Obj.Sections.push_back(Section());
-    Section &S = Obj.Sections.back();
+    Sections.push_back(Section());
+    Section &S = Sections.back();
     S.Header = *Sec;
-    if (auto EC = COFFObj.getSectionContents(Sec, S.Contents))
-      return errorCodeToError(EC);
+    ArrayRef<uint8_t> Contents;
+    if (Error E = COFFObj.getSectionContents(Sec, Contents))
+      return E;
+    S.setContentsRef(Contents);
     ArrayRef<coff_relocation> Relocs = COFFObj.getRelocations(Sec);
     for (const coff_relocation &R : Relocs)
       S.Relocs.push_back(R);
-    if (auto EC = COFFObj.getSectionName(Sec, S.Name))
-      return errorCodeToError(EC);
+    if (Expected<StringRef> NameOrErr = COFFObj.getSectionName(Sec))
+      S.Name = *NameOrErr;
+    else
+      return NameOrErr.takeError();
     if (Sec->hasExtendedRelocations())
-      return make_error<StringError>("Extended relocations not supported yet",
-                                     object_error::parse_failed);
+      return createStringError(object_error::parse_failed,
+                               "extended relocations not supported yet");
   }
+  Obj.addSections(Sections);
   return Error::success();
 }
 
 Error COFFReader::readSymbols(Object &Obj, bool IsBigObj) const {
   std::vector<Symbol> Symbols;
   Symbols.reserve(COFFObj.getRawNumberOfSymbols());
+  ArrayRef<Section> Sections = Obj.getSections();
   for (uint32_t I = 0, E = COFFObj.getRawNumberOfSymbols(); I < E;) {
     Expected<COFFSymbolRef> SymOrErr = COFFObj.getSymbol(I);
     if (!SymOrErr)
@@ -101,31 +108,86 @@ Error COFFReader::readSymbols(Object &Obj, bool IsBigObj) const {
                  *reinterpret_cast<const coff_symbol16 *>(SymRef.getRawPtr()));
     if (auto EC = COFFObj.getSymbolName(SymRef, Sym.Name))
       return errorCodeToError(EC);
-    Sym.AuxData = COFFObj.getSymbolAuxData(SymRef);
-    assert((Sym.AuxData.size() %
-            (IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16))) == 0);
+
+    ArrayRef<uint8_t> AuxData = COFFObj.getSymbolAuxData(SymRef);
+    size_t SymSize = IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16);
+    assert(AuxData.size() == SymSize * SymRef.getNumberOfAuxSymbols());
+    // The auxillary symbols are structs of sizeof(coff_symbol16) each.
+    // In the big object format (where symbols are coff_symbol32), each
+    // auxillary symbol is padded with 2 bytes at the end. Copy each
+    // auxillary symbol to the Sym.AuxData vector. For file symbols,
+    // the whole range of aux symbols are interpreted as one null padded
+    // string instead.
+    if (SymRef.isFileRecord())
+      Sym.AuxFile = StringRef(reinterpret_cast<const char *>(AuxData.data()),
+                              AuxData.size())
+                        .rtrim('\0');
+    else
+      for (size_t I = 0; I < SymRef.getNumberOfAuxSymbols(); I++)
+        Sym.AuxData.push_back(AuxData.slice(I * SymSize, sizeof(AuxSymbol)));
+
+    // Find the unique id of the section
+    if (SymRef.getSectionNumber() <=
+        0) // Special symbol (undefined/absolute/debug)
+      Sym.TargetSectionId = SymRef.getSectionNumber();
+    else if (static_cast<uint32_t>(SymRef.getSectionNumber() - 1) <
+             Sections.size())
+      Sym.TargetSectionId = Sections[SymRef.getSectionNumber() - 1].UniqueId;
+    else
+      return createStringError(object_error::parse_failed,
+                               "section number out of range");
+    // For section definitions, check if it is comdat associative, and if
+    // it is, find the target section unique id.
+    const coff_aux_section_definition *SD = SymRef.getSectionDefinition();
+    const coff_aux_weak_external *WE = SymRef.getWeakExternal();
+    if (SD && SD->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
+      int32_t Index = SD->getNumber(IsBigObj);
+      if (Index <= 0 || static_cast<uint32_t>(Index - 1) >= Sections.size())
+        return createStringError(object_error::parse_failed,
+                                 "unexpected associative section index");
+      Sym.AssociativeComdatTargetSectionId = Sections[Index - 1].UniqueId;
+    } else if (WE) {
+      // This is a raw symbol index for now, but store it in the Symbol
+      // until we've added them to the Object, which assigns the final
+      // unique ids.
+      Sym.WeakTargetSymbolId = WE->TagIndex;
+    }
     I += 1 + SymRef.getNumberOfAuxSymbols();
   }
   Obj.addSymbols(Symbols);
   return Error::success();
 }
 
-Error COFFReader::setRelocTargets(Object &Obj) const {
+Error COFFReader::setSymbolTargets(Object &Obj) const {
   std::vector<const Symbol *> RawSymbolTable;
   for (const Symbol &Sym : Obj.getSymbols()) {
     RawSymbolTable.push_back(&Sym);
     for (size_t I = 0; I < Sym.Sym.NumberOfAuxSymbols; I++)
       RawSymbolTable.push_back(nullptr);
   }
-  for (Section &Sec : Obj.Sections) {
+  for (Symbol &Sym : Obj.getMutableSymbols()) {
+    // Convert WeakTargetSymbolId from the original raw symbol index to
+    // a proper unique id.
+    if (Sym.WeakTargetSymbolId) {
+      if (*Sym.WeakTargetSymbolId >= RawSymbolTable.size())
+        return createStringError(object_error::parse_failed,
+                                 "weak external reference out of range");
+      const Symbol *Target = RawSymbolTable[*Sym.WeakTargetSymbolId];
+      if (Target == nullptr)
+        return createStringError(object_error::parse_failed,
+                                 "invalid SymbolTableIndex");
+      Sym.WeakTargetSymbolId = Target->UniqueId;
+    }
+  }
+  for (Section &Sec : Obj.getMutableSections()) {
     for (Relocation &R : Sec.Relocs) {
       if (R.Reloc.SymbolTableIndex >= RawSymbolTable.size())
-        return make_error<StringError>("SymbolTableIndex out of range",
-                                       object_error::parse_failed);
+        return createStringError(object_error::parse_failed,
+                                 "SymbolTableIndex out of range");
       const Symbol *Sym = RawSymbolTable[R.Reloc.SymbolTableIndex];
       if (Sym == nullptr)
-        return make_error<StringError>("Invalid SymbolTableIndex",
-                                       object_error::parse_failed);
+        return createStringError(object_error::parse_failed,
+                                 "invalid SymbolTableIndex");
       R.Target = Sym->UniqueId;
       R.TargetName = Sym->Name;
     }
@@ -145,8 +207,8 @@ Expected<std::unique_ptr<Object>> COFFReader::create() const {
     Obj->CoffFileHeader = *CFH;
   } else {
     if (!CBFH)
-      return make_error<StringError>("No COFF file header returned",
-                                     object_error::parse_failed);
+      return createStringError(object_error::parse_failed,
+                               "no COFF file header returned");
     // Only copying the few fields from the bigobj header that we need
     // and won't recreate in the end.
     Obj->CoffFileHeader.Machine = CBFH->Machine;
@@ -160,7 +222,7 @@ Expected<std::unique_ptr<Object>> COFFReader::create() const {
     return std::move(E);
   if (Error E = readSymbols(*Obj, IsBigObj))
     return std::move(E);
-  if (Error E = setRelocTargets(*Obj))
+  if (Error E = setSymbolTargets(*Obj))
     return std::move(E);
 
   return std::move(Obj);
diff --git a/tools/llvm-objcopy/COFF/Reader.h b/tools/llvm-objcopy/COFF/Reader.h
index ca7057d08c9f..ec15369db0b8 100644
--- a/tools/llvm-objcopy/COFF/Reader.h
+++ b/tools/llvm-objcopy/COFF/Reader.h
@@ -1,9 +1,8 @@
 //===- Reader.h -------------------------------------------------*- C++ -*-===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -29,7 +28,7 @@ class COFFReader {
   Error readExecutableHeaders(Object &Obj) const;
   Error readSections(Object &Obj) const;
   Error readSymbols(Object &Obj, bool IsBigObj) const;
-  Error setRelocTargets(Object &Obj) const;
+  Error setSymbolTargets(Object &Obj) const;
 
 public:
   explicit COFFReader(const COFFObjectFile &O) : COFFObj(O) {}
diff --git a/tools/llvm-objcopy/COFF/Writer.cpp b/tools/llvm-objcopy/COFF/Writer.cpp
index 385d43b1bae5..f3bb1ce331f2 100644
--- a/tools/llvm-objcopy/COFF/Writer.cpp
+++ b/tools/llvm-objcopy/COFF/Writer.cpp
@@ -1,15 +1,13 @@
 //===- Writer.cpp ---------------------------------------------------------===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "Writer.h"
 #include "Object.h"
-#include "llvm-objcopy.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/COFF.h"
@@ -26,22 +24,75 @@ using namespace object;
 using namespace COFF;
 
 Error COFFWriter::finalizeRelocTargets() {
-  for (Section &Sec : Obj.Sections) {
+  for (Section &Sec : Obj.getMutableSections()) {
     for (Relocation &R : Sec.Relocs) {
       const Symbol *Sym = Obj.findSymbol(R.Target);
       if (Sym == nullptr)
-        return make_error<StringError>("Relocation target " + R.TargetName +
-                                           " (" + Twine(R.Target) +
-                                           ") not found",
-                                       object_error::invalid_symbol_index);
+        return createStringError(object_error::invalid_symbol_index,
+                                 "relocation target '%s' (%zu) not found",
+                                 R.TargetName.str().c_str(), R.Target);
       R.Reloc.SymbolTableIndex = Sym->RawIndex;
     }
   }
   return Error::success();
 }
 
+Error COFFWriter::finalizeSymbolContents() {
+  for (Symbol &Sym : Obj.getMutableSymbols()) {
+    if (Sym.TargetSectionId <= 0) {
+      // Undefined, or a special kind of symbol. These negative values
+      // are stored in the SectionNumber field which is unsigned.
+      Sym.Sym.SectionNumber = static_cast<uint32_t>(Sym.TargetSectionId);
+    } else {
+      const Section *Sec = Obj.findSection(Sym.TargetSectionId);
+      if (Sec == nullptr)
+        return createStringError(object_error::invalid_symbol_index,
+                                 "symbol '%s' points to a removed section",
+                                 Sym.Name.str().c_str());
+      Sym.Sym.SectionNumber = Sec->Index;
+
+      if (Sym.Sym.NumberOfAuxSymbols == 1 &&
+          Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC) {
+        coff_aux_section_definition *SD =
+            reinterpret_cast<coff_aux_section_definition *>(
+                Sym.AuxData[0].Opaque);
+        uint32_t SDSectionNumber;
+        if (Sym.AssociativeComdatTargetSectionId == 0) {
+          // Not a comdat associative section; just set the Number field to
+          // the number of the section itself.
+          SDSectionNumber = Sec->Index;
+        } else {
+          Sec = Obj.findSection(Sym.AssociativeComdatTargetSectionId);
+          if (Sec == nullptr)
+            return createStringError(
+                object_error::invalid_symbol_index,
+                "symbol '%s' is associative to a removed section",
+                Sym.Name.str().c_str());
+          SDSectionNumber = Sec->Index;
+        }
+        // Update the section definition with the new section number.
+        SD->NumberLowPart = static_cast<uint16_t>(SDSectionNumber);
+        SD->NumberHighPart = static_cast<uint16_t>(SDSectionNumber >> 16);
+      }
+    }
+    // Check that we actually have got AuxData to match the weak symbol target
+    // we want to set. Only >= 1 would be required, but only == 1 makes sense.
+    if (Sym.WeakTargetSymbolId && Sym.Sym.NumberOfAuxSymbols == 1) {
+      coff_aux_weak_external *WE =
+          reinterpret_cast<coff_aux_weak_external *>(Sym.AuxData[0].Opaque);
+      const Symbol *Target = Obj.findSymbol(*Sym.WeakTargetSymbolId);
+      if (Target == nullptr)
+        return createStringError(object_error::invalid_symbol_index,
+                                 "symbol '%s' is missing its weak target",
+                                 Sym.Name.str().c_str());
+      WE->TagIndex = Target->RawIndex;
+    }
+  }
+  return Error::success();
+}
+
 void COFFWriter::layoutSections() {
-  for (auto &S : Obj.Sections) {
+  for (auto &S : Obj.getMutableSections()) {
     if (S.Header.SizeOfRawData > 0)
       S.Header.PointerToRawData = FileSize;
     FileSize += S.Header.SizeOfRawData; // For executables, this is already
@@ -58,7 +109,7 @@ void COFFWriter::layoutSections() {
 }
 
 size_t COFFWriter::finalizeStringTable() {
-  for (auto &S : Obj.Sections)
+  for (const auto &S : Obj.getSections())
     if (S.Name.size() > COFF::NameSize)
       StrTabBuilder.add(S.Name);
 
@@ -68,8 +119,9 @@ size_t COFFWriter::finalizeStringTable() {
 
   StrTabBuilder.finalize();
 
-  for (auto &S : Obj.Sections) {
+  for (auto &S : Obj.getMutableSections()) {
     if (S.Name.size() > COFF::NameSize) {
+      memset(S.Header.Name, 0, sizeof(S.Header.Name));
       snprintf(S.Header.Name, sizeof(S.Header.Name), "/%d",
                (int)StrTabBuilder.getOffset(S.Name));
     } else {
@@ -89,15 +141,30 @@ size_t COFFWriter::finalizeStringTable() {
 
 template <class SymbolTy>
 std::pair<size_t, size_t> COFFWriter::finalizeSymbolTable() {
-  size_t SymTabSize = Obj.getSymbols().size() * sizeof(SymbolTy);
-  for (const auto &S : Obj.getSymbols())
-    SymTabSize += S.AuxData.size();
-  return std::make_pair(SymTabSize, sizeof(SymbolTy));
+  size_t RawSymIndex = 0;
+  for (auto &S : Obj.getMutableSymbols()) {
+    // Symbols normally have NumberOfAuxSymbols set correctly all the time.
+    // For file symbols, we need to know the output file's symbol size to be
+    // able to calculate the number of slots it occupies.
+    if (!S.AuxFile.empty())
+      S.Sym.NumberOfAuxSymbols =
+          alignTo(S.AuxFile.size(), sizeof(SymbolTy)) / sizeof(SymbolTy);
+    S.RawIndex = RawSymIndex;
+    RawSymIndex += 1 + S.Sym.NumberOfAuxSymbols;
+  }
+  return std::make_pair(RawSymIndex * sizeof(SymbolTy), sizeof(SymbolTy));
 }
 
 Error COFFWriter::finalize(bool IsBigObj) {
+  size_t SymTabSize, SymbolSize;
+  std::tie(SymTabSize, SymbolSize) = IsBigObj
+                                         ? finalizeSymbolTable<coff_symbol32>()
+                                         : finalizeSymbolTable<coff_symbol16>();
+
   if (Error E = finalizeRelocTargets())
     return E;
+  if (Error E = finalizeSymbolContents())
+    return E;
 
   size_t SizeOfHeaders = 0;
   FileAlignment = 1;
@@ -114,10 +181,10 @@ Error COFFWriter::finalize(bool IsBigObj) {
     SizeOfHeaders +=
         PeHeaderSize + sizeof(data_directory) * Obj.DataDirectories.size();
   }
-  Obj.CoffFileHeader.NumberOfSections = Obj.Sections.size();
+  Obj.CoffFileHeader.NumberOfSections = Obj.getSections().size();
   SizeOfHeaders +=
       IsBigObj ? sizeof(coff_bigobj_file_header) : sizeof(coff_file_header);
-  SizeOfHeaders += sizeof(coff_section) * Obj.Sections.size();
+  SizeOfHeaders += sizeof(coff_section) * Obj.getSections().size();
   SizeOfHeaders = alignTo(SizeOfHeaders, FileAlignment);
 
   Obj.CoffFileHeader.SizeOfOptionalHeader =
@@ -132,8 +199,8 @@ Error COFFWriter::finalize(bool IsBigObj) {
     Obj.PeHeader.SizeOfHeaders = SizeOfHeaders;
     Obj.PeHeader.SizeOfInitializedData = SizeOfInitializedData;
 
-    if (!Obj.Sections.empty()) {
-      const Section &S = Obj.Sections.back();
+    if (!Obj.getSections().empty()) {
+      const Section &S = Obj.getSections().back();
       Obj.PeHeader.SizeOfImage =
           alignTo(S.Header.VirtualAddress + S.Header.VirtualSize,
                   Obj.PeHeader.SectionAlignment);
@@ -145,10 +212,6 @@ Error COFFWriter::finalize(bool IsBigObj) {
   }
 
   size_t StrTabSize = finalizeStringTable();
-  size_t SymTabSize, SymbolSize;
-  std::tie(SymTabSize, SymbolSize) = IsBigObj
-                                         ? finalizeSymbolTable<coff_symbol32>()
-                                         : finalizeSymbolTable<coff_symbol16>();
 
   size_t PointerToSymbolTable = FileSize;
   // StrTabSize <= 4 is the size of an empty string table, only consisting
@@ -199,7 +262,7 @@ void COFFWriter::writeHeaders(bool IsBigObj) {
     BigObjHeader.unused4 = 0;
     // The value in Obj.CoffFileHeader.NumberOfSections is truncated, thus
     // get the original one instead.
-    BigObjHeader.NumberOfSections = Obj.Sections.size();
+    BigObjHeader.NumberOfSections = Obj.getSections().size();
     BigObjHeader.PointerToSymbolTable = Obj.CoffFileHeader.PointerToSymbolTable;
     BigObjHeader.NumberOfSymbols = Obj.CoffFileHeader.NumberOfSymbols;
 
@@ -224,23 +287,24 @@ void COFFWriter::writeHeaders(bool IsBigObj) {
       Ptr += sizeof(DD);
     }
   }
-  for (const auto &S : Obj.Sections) {
+  for (const auto &S : Obj.getSections()) {
     memcpy(Ptr, &S.Header, sizeof(S.Header));
     Ptr += sizeof(S.Header);
   }
 }
 
 void COFFWriter::writeSections() {
-  for (const auto &S : Obj.Sections) {
+  for (const auto &S : Obj.getSections()) {
     uint8_t *Ptr = Buf.getBufferStart() + S.Header.PointerToRawData;
-    std::copy(S.Contents.begin(), S.Contents.end(), Ptr);
+    ArrayRef<uint8_t> Contents = S.getContents();
+    std::copy(Contents.begin(), Contents.end(), Ptr);
 
     // For executable sections, pad the remainder of the raw data size with
     // 0xcc, which is int3 on x86.
     if ((S.Header.Characteristics & IMAGE_SCN_CNT_CODE) &&
-        S.Header.SizeOfRawData > S.Contents.size())
-      memset(Ptr + S.Contents.size(), 0xcc,
-             S.Header.SizeOfRawData - S.Contents.size());
+        S.Header.SizeOfRawData > Contents.size())
+      memset(Ptr + Contents.size(), 0xcc,
+             S.Header.SizeOfRawData - Contents.size());
 
     Ptr += S.Header.SizeOfRawData;
     for (const auto &R : S.Relocs) {
@@ -257,8 +321,23 @@ template <class SymbolTy> void COFFWriter::writeSymbolStringTables() {
     copySymbol<SymbolTy, coff_symbol32>(*reinterpret_cast<SymbolTy *>(Ptr),
                                         S.Sym);
     Ptr += sizeof(SymbolTy);
-    std::copy(S.AuxData.begin(), S.AuxData.end(), Ptr);
-    Ptr += S.AuxData.size();
+    if (!S.AuxFile.empty()) {
+      // For file symbols, just write the string into the aux symbol slots,
+      // assuming that the unwritten parts are initialized to zero in the memory
+      // mapped file.
+      std::copy(S.AuxFile.begin(), S.AuxFile.end(), Ptr);
+      Ptr += S.Sym.NumberOfAuxSymbols * sizeof(SymbolTy);
+    } else {
+      // For other auxillary symbols, write their opaque payload into one symbol
+      // table slot each. For big object files, the symbols are larger than the
+      // opaque auxillary symbol struct and we leave padding at the end of each
+      // entry.
+      for (const AuxSymbol &AuxSym : S.AuxData) {
+        ArrayRef<uint8_t> Ref = AuxSym.getRef();
+        std::copy(Ref.begin(), Ref.end(), Ptr);
+        Ptr += sizeof(SymbolTy);
+      }
+    }
   }
   if (StrTabBuilder.getSize() > 4 || !Obj.IsPE) {
     // Always write a string table in object files, even an empty one.
@@ -271,7 +350,8 @@ Error COFFWriter::write(bool IsBigObj) {
   if (Error E = finalize(IsBigObj))
     return E;
 
-  Buf.allocate(FileSize);
+  if (Error E = Buf.allocate(FileSize))
+    return E;
 
   writeHeaders(IsBigObj);
   writeSections();
@@ -296,15 +376,14 @@ Error COFFWriter::patchDebugDirectory() {
   const data_directory *Dir = &Obj.DataDirectories[DEBUG_DIRECTORY];
   if (Dir->Size <= 0)
     return Error::success();
-  for (const auto &S : Obj.Sections) {
+  for (const auto &S : Obj.getSections()) {
     if (Dir->RelativeVirtualAddress >= S.Header.VirtualAddress &&
         Dir->RelativeVirtualAddress <
             S.Header.VirtualAddress + S.Header.SizeOfRawData) {
       if (Dir->RelativeVirtualAddress + Dir->Size >
           S.Header.VirtualAddress + S.Header.SizeOfRawData)
-        return make_error<StringError>(
-            "Debug directory extends past end of section",
-            object_error::parse_failed);
+        return createStringError(object_error::parse_failed,
+                                 "debug directory extends past end of section");
 
       size_t Offset = Dir->RelativeVirtualAddress - S.Header.VirtualAddress;
       uint8_t *Ptr = Buf.getBufferStart() + S.Header.PointerToRawData + Offset;
@@ -320,15 +399,15 @@ Error COFFWriter::patchDebugDirectory() {
       return Error::success();
     }
   }
-  return make_error<StringError>("Debug directory not found",
-                                 object_error::parse_failed);
+  return createStringError(object_error::parse_failed,
+                           "debug directory not found");
 }
 
 Error COFFWriter::write() {
-  bool IsBigObj = Obj.Sections.size() > MaxNumberOfSections16;
+  bool IsBigObj = Obj.getSections().size() > MaxNumberOfSections16;
   if (IsBigObj && Obj.IsPE)
-    return make_error<StringError>("Too many sections for executable",
-                                   object_error::parse_failed);
+    return createStringError(object_error::parse_failed,
+                             "too many sections for executable");
   return write(IsBigObj);
 }
 
diff --git a/tools/llvm-objcopy/COFF/Writer.h b/tools/llvm-objcopy/COFF/Writer.h
index ab66e0cc1134..681a8d5e4a66 100644
--- a/tools/llvm-objcopy/COFF/Writer.h
+++ b/tools/llvm-objcopy/COFF/Writer.h
@@ -1,9 +1,8 @@
 //===- Writer.h -------------------------------------------------*- C++ -*-===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -31,10 +30,11 @@ class COFFWriter {
   size_t SizeOfInitializedData;
   StringTableBuilder StrTabBuilder;
 
+  template <class SymbolTy> std::pair<size_t, size_t> finalizeSymbolTable();
   Error finalizeRelocTargets();
+  Error finalizeSymbolContents();
   void layoutSections();
   size_t finalizeStringTable();
-  template <class SymbolTy> std::pair<size_t, size_t> finalizeSymbolTable();
 
   Error finalize(bool IsBigObj);
 
diff --git a/tools/llvm-objcopy/CopyConfig.cpp b/tools/llvm-objcopy/CopyConfig.cpp
index 3737f571ae61..8d6431b3044f 100644
--- a/tools/llvm-objcopy/CopyConfig.cpp
+++ b/tools/llvm-objcopy/CopyConfig.cpp
@@ -1,27 +1,26 @@
 //===- CopyConfig.cpp -----------------------------------------------------===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "CopyConfig.h"
-#include "llvm-objcopy.h"
 
-#include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Object/ELFTypes.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compression.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/JamCRC.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/StringSaver.h"
 #include <memory>
-#include <string>
 
 namespace llvm {
 namespace objcopy {
@@ -93,45 +92,47 @@ public:
   StripOptTable() : OptTable(StripInfoTable) {}
 };
 
-enum SectionFlag {
-  SecNone = 0,
-  SecAlloc = 1 << 0,
-  SecLoad = 1 << 1,
-  SecNoload = 1 << 2,
-  SecReadonly = 1 << 3,
-  SecDebug = 1 << 4,
-  SecCode = 1 << 5,
-  SecData = 1 << 6,
-  SecRom = 1 << 7,
-  SecMerge = 1 << 8,
-  SecStrings = 1 << 9,
-  SecContents = 1 << 10,
-  SecShare = 1 << 11,
-  LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ SecShare)
-};
-
 } // namespace
 
 static SectionFlag parseSectionRenameFlag(StringRef SectionName) {
   return llvm::StringSwitch<SectionFlag>(SectionName)
-      .Case("alloc", SectionFlag::SecAlloc)
-      .Case("load", SectionFlag::SecLoad)
-      .Case("noload", SectionFlag::SecNoload)
-      .Case("readonly", SectionFlag::SecReadonly)
-      .Case("debug", SectionFlag::SecDebug)
-      .Case("code", SectionFlag::SecCode)
-      .Case("data", SectionFlag::SecData)
-      .Case("rom", SectionFlag::SecRom)
-      .Case("merge", SectionFlag::SecMerge)
-      .Case("strings", SectionFlag::SecStrings)
-      .Case("contents", SectionFlag::SecContents)
-      .Case("share", SectionFlag::SecShare)
+      .CaseLower("alloc", SectionFlag::SecAlloc)
+      .CaseLower("load", SectionFlag::SecLoad)
+      .CaseLower("noload", SectionFlag::SecNoload)
+      .CaseLower("readonly", SectionFlag::SecReadonly)
+      .CaseLower("debug", SectionFlag::SecDebug)
+      .CaseLower("code", SectionFlag::SecCode)
+      .CaseLower("data", SectionFlag::SecData)
+      .CaseLower("rom", SectionFlag::SecRom)
+      .CaseLower("merge", SectionFlag::SecMerge)
+      .CaseLower("strings", SectionFlag::SecStrings)
+      .CaseLower("contents", SectionFlag::SecContents)
+      .CaseLower("share", SectionFlag::SecShare)
       .Default(SectionFlag::SecNone);
 }
 
-static SectionRename parseRenameSectionValue(StringRef FlagValue) {
+static Expected<SectionFlag>
+parseSectionFlagSet(ArrayRef<StringRef> SectionFlags) {
+  SectionFlag ParsedFlags = SectionFlag::SecNone;
+  for (StringRef Flag : SectionFlags) {
+    SectionFlag ParsedFlag = parseSectionRenameFlag(Flag);
+    if (ParsedFlag == SectionFlag::SecNone)
+      return createStringError(
+          errc::invalid_argument,
+          "unrecognized section flag '%s'. Flags supported for GNU "
+          "compatibility: alloc, load, noload, readonly, debug, code, data, "
+          "rom, share, contents, merge, strings",
+          Flag.str().c_str());
+    ParsedFlags |= ParsedFlag;
+  }
+
+  return ParsedFlags;
+}
+
+static Expected<SectionRename> parseRenameSectionValue(StringRef FlagValue) {
   if (!FlagValue.contains('='))
-    error("Bad format for --rename-section: missing '='");
+    return createStringError(errc::invalid_argument,
+                             "bad format for --rename-section: missing '='");
 
   // Initial split: ".foo" = ".bar,f1,f2,..."
   auto Old2New = FlagValue.split('=');
@@ -144,73 +145,210 @@ static SectionRename parseRenameSectionValue(StringRef FlagValue) {
   SR.NewName = NameAndFlags[0];
 
   if (NameAndFlags.size() > 1) {
-    SectionFlag Flags = SectionFlag::SecNone;
-    for (size_t I = 1, Size = NameAndFlags.size(); I < Size; ++I) {
-      SectionFlag Flag = parseSectionRenameFlag(NameAndFlags[I]);
-      if (Flag == SectionFlag::SecNone)
-        error("Unrecognized section flag '" + NameAndFlags[I] +
-              "'. Flags supported for GNU compatibility: alloc, load, noload, "
-              "readonly, debug, code, data, rom, share, contents, merge, "
-              "strings.");
-      Flags |= Flag;
-    }
-
-    SR.NewFlags = 0;
-    if (Flags & SectionFlag::SecAlloc)
-      *SR.NewFlags |= ELF::SHF_ALLOC;
-    if (!(Flags & SectionFlag::SecReadonly))
-      *SR.NewFlags |= ELF::SHF_WRITE;
-    if (Flags & SectionFlag::SecCode)
-      *SR.NewFlags |= ELF::SHF_EXECINSTR;
-    if (Flags & SectionFlag::SecMerge)
-      *SR.NewFlags |= ELF::SHF_MERGE;
-    if (Flags & SectionFlag::SecStrings)
-      *SR.NewFlags |= ELF::SHF_STRINGS;
+    Expected<SectionFlag> ParsedFlagSet =
+        parseSectionFlagSet(makeArrayRef(NameAndFlags).drop_front());
+    if (!ParsedFlagSet)
+      return ParsedFlagSet.takeError();
+    SR.NewFlags = *ParsedFlagSet;
   }
 
   return SR;
 }
 
+static Expected<SectionFlagsUpdate>
+parseSetSectionFlagValue(StringRef FlagValue) {
+  if (!StringRef(FlagValue).contains('='))
+    return createStringError(errc::invalid_argument,
+                             "bad format for --set-section-flags: missing '='");
+
+  // Initial split: ".foo" = "f1,f2,..."
+  auto Section2Flags = StringRef(FlagValue).split('=');
+  SectionFlagsUpdate SFU;
+  SFU.Name = Section2Flags.first;
+
+  // Flags split: "f1" "f2" ...
+  SmallVector<StringRef, 6> SectionFlags;
+  Section2Flags.second.split(SectionFlags, ',');
+  Expected<SectionFlag> ParsedFlagSet = parseSectionFlagSet(SectionFlags);
+  if (!ParsedFlagSet)
+    return ParsedFlagSet.takeError();
+  SFU.NewFlags = *ParsedFlagSet;
+
+  return SFU;
+}
+
+static Expected<NewSymbolInfo> parseNewSymbolInfo(StringRef FlagValue) {
+  // Parse value given with --add-symbol option and create the
+  // new symbol if possible. The value format for --add-symbol is:
+  //
+  // <name>=[<section>:]<value>[,<flags>]
+  //
+  // where:
+  // <name> - symbol name, can be empty string
+  // <section> - optional section name. If not given ABS symbol is created
+  // <value> - symbol value, can be decimal or hexadecimal number prefixed
+  //           with 0x.
+  // <flags> - optional flags affecting symbol type, binding or visibility:
+  //           The following are currently supported:
+  //
+  //           global, local, weak, default, hidden, file, section, object,
+  //           indirect-function.
+  //
+  //           The following flags are ignored and provided for GNU
+  //           compatibility only:
+  //
+  //           warning, debug, constructor, indirect, synthetic,
+  //           unique-object, before=<symbol>.
+  NewSymbolInfo SI;
+  StringRef Value;
+  std::tie(SI.SymbolName, Value) = FlagValue.split('=');
+  if (Value.empty())
+    return createStringError(
+        errc::invalid_argument,
+        "bad format for --add-symbol, missing '=' after '%s'",
+        SI.SymbolName.str().c_str());
+
+  if (Value.contains(':')) {
+    std::tie(SI.SectionName, Value) = Value.split(':');
+    if (SI.SectionName.empty() || Value.empty())
+      return createStringError(
+          errc::invalid_argument,
+          "bad format for --add-symbol, missing section name or symbol value");
+  }
+
+  SmallVector<StringRef, 6> Flags;
+  Value.split(Flags, ',');
+  if (Flags[0].getAsInteger(0, SI.Value))
+    return createStringError(errc::invalid_argument, "bad symbol value: '%s'",
+                             Flags[0].str().c_str());
+
+  using Functor = std::function<void(void)>;
+  SmallVector<StringRef, 6> UnsupportedFlags;
+  for (size_t I = 1, NumFlags = Flags.size(); I < NumFlags; ++I)
+    static_cast<Functor>(
+        StringSwitch<Functor>(Flags[I])
+            .CaseLower("global", [&SI] { SI.Bind = ELF::STB_GLOBAL; })
+            .CaseLower("local", [&SI] { SI.Bind = ELF::STB_LOCAL; })
+            .CaseLower("weak", [&SI] { SI.Bind = ELF::STB_WEAK; })
+            .CaseLower("default", [&SI] { SI.Visibility = ELF::STV_DEFAULT; })
+            .CaseLower("hidden", [&SI] { SI.Visibility = ELF::STV_HIDDEN; })
+            .CaseLower("file", [&SI] { SI.Type = ELF::STT_FILE; })
+            .CaseLower("section", [&SI] { SI.Type = ELF::STT_SECTION; })
+            .CaseLower("object", [&SI] { SI.Type = ELF::STT_OBJECT; })
+            .CaseLower("function", [&SI] { SI.Type = ELF::STT_FUNC; })
+            .CaseLower("indirect-function",
+                       [&SI] { SI.Type = ELF::STT_GNU_IFUNC; })
+            .CaseLower("debug", [] {})
+            .CaseLower("constructor", [] {})
+            .CaseLower("warning", [] {})
+            .CaseLower("indirect", [] {})
+            .CaseLower("synthetic", [] {})
+            .CaseLower("unique-object", [] {})
+            .StartsWithLower("before", [] {})
+            .Default([&] { UnsupportedFlags.push_back(Flags[I]); }))();
+  if (!UnsupportedFlags.empty())
+    return createStringError(errc::invalid_argument,
+                             "unsupported flag%s for --add-symbol: '%s'",
+                             UnsupportedFlags.size() > 1 ? "s" : "",
+                             join(UnsupportedFlags, "', '").c_str());
+  return SI;
+}
+
 static const StringMap<MachineInfo> ArchMap{
     // Name, {EMachine, 64bit, LittleEndian}
     {"aarch64", {ELF::EM_AARCH64, true, true}},
     {"arm", {ELF::EM_ARM, false, true}},
     {"i386", {ELF::EM_386, false, true}},
     {"i386:x86-64", {ELF::EM_X86_64, true, true}},
+    {"mips", {ELF::EM_MIPS, false, false}},
     {"powerpc:common64", {ELF::EM_PPC64, true, true}},
-    {"sparc", {ELF::EM_SPARC, false, true}},
+    {"riscv:rv32", {ELF::EM_RISCV, false, true}},
+    {"riscv:rv64", {ELF::EM_RISCV, true, true}},
+    {"sparc", {ELF::EM_SPARC, false, false}},
+    {"sparcel", {ELF::EM_SPARC, false, true}},
     {"x86-64", {ELF::EM_X86_64, true, true}},
 };
 
-static const MachineInfo &getMachineInfo(StringRef Arch) {
+static Expected<const MachineInfo &> getMachineInfo(StringRef Arch) {
   auto Iter = ArchMap.find(Arch);
   if (Iter == std::end(ArchMap))
-    error("Invalid architecture: '" + Arch + "'");
+    return createStringError(errc::invalid_argument,
+                             "invalid architecture: '%s'", Arch.str().c_str());
   return Iter->getValue();
 }
 
-static const StringMap<MachineInfo> OutputFormatMap{
+struct TargetInfo {
+  FileFormat Format;
+  MachineInfo Machine;
+};
+
+// FIXME: consolidate with the bfd parsing used by lld.
+static const StringMap<MachineInfo> TargetMap{
     // Name, {EMachine, 64bit, LittleEndian}
+    // x86
     {"elf32-i386", {ELF::EM_386, false, true}},
-    {"elf32-powerpcle", {ELF::EM_PPC, false, true}},
     {"elf32-x86-64", {ELF::EM_X86_64, false, true}},
-    {"elf64-powerpcle", {ELF::EM_PPC64, true, true}},
     {"elf64-x86-64", {ELF::EM_X86_64, true, true}},
+    // Intel MCU
+    {"elf32-iamcu", {ELF::EM_IAMCU, false, true}},
+    // ARM
+    {"elf32-littlearm", {ELF::EM_ARM, false, true}},
+    // ARM AArch64
+    {"elf64-aarch64", {ELF::EM_AARCH64, true, true}},
+    {"elf64-littleaarch64", {ELF::EM_AARCH64, true, true}},
+    // RISC-V
+    {"elf32-littleriscv", {ELF::EM_RISCV, false, true}},
+    {"elf64-littleriscv", {ELF::EM_RISCV, true, true}},
+    // PowerPC
+    {"elf32-powerpc", {ELF::EM_PPC, false, false}},
+    {"elf32-powerpcle", {ELF::EM_PPC, false, true}},
+    {"elf64-powerpc", {ELF::EM_PPC64, true, false}},
+    {"elf64-powerpcle", {ELF::EM_PPC64, true, true}},
+    // MIPS
+    {"elf32-bigmips", {ELF::EM_MIPS, false, false}},
+    {"elf32-ntradbigmips", {ELF::EM_MIPS, false, false}},
+    {"elf32-ntradlittlemips", {ELF::EM_MIPS, false, true}},
+    {"elf32-tradbigmips", {ELF::EM_MIPS, false, false}},
+    {"elf32-tradlittlemips", {ELF::EM_MIPS, false, true}},
+    {"elf64-tradbigmips", {ELF::EM_MIPS, true, false}},
+    {"elf64-tradlittlemips", {ELF::EM_MIPS, true, true}},
+    // SPARC
+    {"elf32-sparc", {ELF::EM_SPARC, false, false}},
+    {"elf32-sparcel", {ELF::EM_SPARC, false, true}},
 };
 
-static const MachineInfo &getOutputFormatMachineInfo(StringRef Format) {
-  auto Iter = OutputFormatMap.find(Format);
-  if (Iter == std::end(OutputFormatMap))
-    error("Invalid output format: '" + Format + "'");
-  return Iter->getValue();
+static Expected<TargetInfo>
+getOutputTargetInfoByTargetName(StringRef TargetName) {
+  StringRef OriginalTargetName = TargetName;
+  bool IsFreeBSD = TargetName.consume_back("-freebsd");
+  auto Iter = TargetMap.find(TargetName);
+  if (Iter == std::end(TargetMap))
+    return createStringError(errc::invalid_argument,
+                             "invalid output format: '%s'",
+                             OriginalTargetName.str().c_str());
+  MachineInfo MI = Iter->getValue();
+  if (IsFreeBSD)
+    MI.OSABI = ELF::ELFOSABI_FREEBSD;
+
+  FileFormat Format;
+  if (TargetName.startswith("elf"))
+    Format = FileFormat::ELF;
+  else
+    // This should never happen because `TargetName` is valid (it certainly
+    // exists in the TargetMap).
+    llvm_unreachable("unknown target prefix");
+
+  return {TargetInfo{Format, MI}};
 }
 
-static void addGlobalSymbolsFromFile(std::vector<std::string> &Symbols,
-                                     StringRef Filename) {
+static Error addSymbolsFromFile(std::vector<NameOrRegex> &Symbols,
+                                BumpPtrAllocator &Alloc, StringRef Filename,
+                                bool UseRegex) {
+  StringSaver Saver(Alloc);
   SmallVector<StringRef, 16> Lines;
   auto BufOrErr = MemoryBuffer::getFile(Filename);
   if (!BufOrErr)
-    reportError(Filename, BufOrErr.getError());
+    return createFileError(Filename, BufOrErr.getError());
 
   BufOrErr.get()->getBuffer().split(Lines, '\n');
   for (StringRef Line : Lines) {
@@ -218,14 +356,62 @@ static void addGlobalSymbolsFromFile(std::vector<std::string> &Symbols,
     // it's not empty.
     auto TrimmedLine = Line.split('#').first.trim();
     if (!TrimmedLine.empty())
-      Symbols.push_back(TrimmedLine.str());
+      Symbols.emplace_back(Saver.save(TrimmedLine), UseRegex);
   }
+
+  return Error::success();
+}
+
+NameOrRegex::NameOrRegex(StringRef Pattern, bool IsRegex) {
+  if (!IsRegex) {
+    Name = Pattern;
+    return;
+  }
+
+  SmallVector<char, 32> Data;
+  R = std::make_shared<Regex>(
+      ("^" + Pattern.ltrim('^').rtrim('$') + "$").toStringRef(Data));
+}
+
+static Error addSymbolsToRenameFromFile(StringMap<StringRef> &SymbolsToRename,
+                                        BumpPtrAllocator &Alloc,
+                                        StringRef Filename) {
+  StringSaver Saver(Alloc);
+  SmallVector<StringRef, 16> Lines;
+  auto BufOrErr = MemoryBuffer::getFile(Filename);
+  if (!BufOrErr)
+    return createFileError(Filename, BufOrErr.getError());
+
+  BufOrErr.get()->getBuffer().split(Lines, '\n');
+  size_t NumLines = Lines.size();
+  for (size_t LineNo = 0; LineNo < NumLines; ++LineNo) {
+    StringRef TrimmedLine = Lines[LineNo].split('#').first.trim();
+    if (TrimmedLine.empty())
+      continue;
+
+    std::pair<StringRef, StringRef> Pair = Saver.save(TrimmedLine).split(' ');
+    StringRef NewName = Pair.second.trim();
+    if (NewName.empty())
+      return createStringError(errc::invalid_argument,
+                               "%s:%zu: missing new symbol name",
+                               Filename.str().c_str(), LineNo + 1);
+    SymbolsToRename.insert({Pair.first, NewName});
+  }
+  return Error::success();
+}
+
+template <class T> static ErrorOr<T> getAsInteger(StringRef Val) {
+  T Result;
+  if (Val.getAsInteger(0, Result))
+    return errc::invalid_argument;
+  return Result;
 }
 
 // ParseObjcopyOptions returns the config and sets the input arguments. If a
 // help flag is set then ParseObjcopyOptions will print the help messege and
 // exit.
-DriverConfig parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
+Expected<DriverConfig> parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
+  DriverConfig DC;
   ObjcopyOptTable T;
   unsigned MissingArgumentIndex, MissingArgumentCount;
   llvm::opt::InputArgList InputArgs =
@@ -250,16 +436,18 @@ DriverConfig parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
   SmallVector<const char *, 2> Positional;
 
   for (auto Arg : InputArgs.filtered(OBJCOPY_UNKNOWN))
-    error("unknown argument '" + Arg->getAsString(InputArgs) + "'");
+    return createStringError(errc::invalid_argument, "unknown argument '%s'",
+                             Arg->getAsString(InputArgs).c_str());
 
   for (auto Arg : InputArgs.filtered(OBJCOPY_INPUT))
     Positional.push_back(Arg->getValue());
 
   if (Positional.empty())
-    error("No input file specified");
+    return createStringError(errc::invalid_argument, "no input file specified");
 
   if (Positional.size() > 2)
-    error("Too many positional arguments");
+    return createStringError(errc::invalid_argument,
+                             "too many positional arguments");
 
   CopyConfig Config;
   Config.InputFilename = Positional[0];
@@ -267,23 +455,50 @@ DriverConfig parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
   if (InputArgs.hasArg(OBJCOPY_target) &&
       (InputArgs.hasArg(OBJCOPY_input_target) ||
        InputArgs.hasArg(OBJCOPY_output_target)))
-    error("--target cannot be used with --input-target or --output-target");
+    return createStringError(
+        errc::invalid_argument,
+        "--target cannot be used with --input-target or --output-target");
 
+  bool UseRegex = InputArgs.hasArg(OBJCOPY_regex);
+  StringRef InputFormat, OutputFormat;
   if (InputArgs.hasArg(OBJCOPY_target)) {
-    Config.InputFormat = InputArgs.getLastArgValue(OBJCOPY_target);
-    Config.OutputFormat = InputArgs.getLastArgValue(OBJCOPY_target);
+    InputFormat = InputArgs.getLastArgValue(OBJCOPY_target);
+    OutputFormat = InputArgs.getLastArgValue(OBJCOPY_target);
   } else {
-    Config.InputFormat = InputArgs.getLastArgValue(OBJCOPY_input_target);
-    Config.OutputFormat = InputArgs.getLastArgValue(OBJCOPY_output_target);
+    InputFormat = InputArgs.getLastArgValue(OBJCOPY_input_target);
+    OutputFormat = InputArgs.getLastArgValue(OBJCOPY_output_target);
   }
-  if (Config.InputFormat == "binary") {
+
+  // FIXME:  Currently, we ignore the target for non-binary/ihex formats
+  // explicitly specified by -I option (e.g. -Ielf32-x86-64) and guess the
+  // format by llvm::object::createBinary regardless of the option value.
+  Config.InputFormat = StringSwitch<FileFormat>(InputFormat)
+                           .Case("binary", FileFormat::Binary)
+                           .Case("ihex", FileFormat::IHex)
+                           .Default(FileFormat::Unspecified);
+  if (Config.InputFormat == FileFormat::Binary) {
     auto BinaryArch = InputArgs.getLastArgValue(OBJCOPY_binary_architecture);
     if (BinaryArch.empty())
-      error("Specified binary input without specifiying an architecture");
-    Config.BinaryArch = getMachineInfo(BinaryArch);
+      return createStringError(
+          errc::invalid_argument,
+          "specified binary input without specifiying an architecture");
+    Expected<const MachineInfo &> MI = getMachineInfo(BinaryArch);
+    if (!MI)
+      return MI.takeError();
+    Config.BinaryArch = *MI;
+  }
+
+  Config.OutputFormat = StringSwitch<FileFormat>(OutputFormat)
+                            .Case("binary", FileFormat::Binary)
+                            .Case("ihex", FileFormat::IHex)
+                            .Default(FileFormat::Unspecified);
+  if (Config.OutputFormat == FileFormat::Unspecified && !OutputFormat.empty()) {
+    Expected<TargetInfo> Target = getOutputTargetInfoByTargetName(OutputFormat);
+    if (!Target)
+      return Target.takeError();
+    Config.OutputFormat = Target->Format;
+    Config.OutputArch = Target->Machine;
   }
-  if (!Config.OutputFormat.empty() && Config.OutputFormat != "binary")
-    Config.OutputArch = getOutputFormatMachineInfo(Config.OutputFormat);
 
   if (auto Arg = InputArgs.getLastArg(OBJCOPY_compress_debug_sections,
                                       OBJCOPY_compress_debug_sections_eq)) {
@@ -297,14 +512,36 @@ DriverConfig parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
               .Case("zlib", DebugCompressionType::Z)
               .Default(DebugCompressionType::None);
       if (Config.CompressionType == DebugCompressionType::None)
-        error("Invalid or unsupported --compress-debug-sections format: " +
-              InputArgs.getLastArgValue(OBJCOPY_compress_debug_sections_eq));
-      if (!zlib::isAvailable())
-        error("LLVM was not compiled with LLVM_ENABLE_ZLIB: can not compress.");
+        return createStringError(
+            errc::invalid_argument,
+            "invalid or unsupported --compress-debug-sections format: %s",
+            InputArgs.getLastArgValue(OBJCOPY_compress_debug_sections_eq)
+                .str()
+                .c_str());
     }
+    if (!zlib::isAvailable())
+      return createStringError(
+          errc::invalid_argument,
+          "LLVM was not compiled with LLVM_ENABLE_ZLIB: can not compress");
   }
 
   Config.AddGnuDebugLink = InputArgs.getLastArgValue(OBJCOPY_add_gnu_debuglink);
+  // The gnu_debuglink's target is expected to not change or else its CRC would
+  // become invalidated and get rejected. We can avoid recalculating the
+  // checksum for every target file inside an archive by precomputing the CRC
+  // here. This prevents a significant amount of I/O.
+  if (!Config.AddGnuDebugLink.empty()) {
+    auto DebugOrErr = MemoryBuffer::getFile(Config.AddGnuDebugLink);
+    if (!DebugOrErr)
+      return createFileError(Config.AddGnuDebugLink, DebugOrErr.getError());
+    auto Debug = std::move(*DebugOrErr);
+    JamCRC CRC;
+    CRC.update(
+        ArrayRef<char>(Debug->getBuffer().data(), Debug->getBuffer().size()));
+    // The CRC32 value needs to be complemented because the JamCRC doesn't
+    // finalize the CRC32 value.
+    Config.GnuDebugLinkCRC32 = ~CRC.getCRC();
+  }
   Config.BuildIdLinkDir = InputArgs.getLastArgValue(OBJCOPY_build_id_link_dir);
   if (InputArgs.hasArg(OBJCOPY_build_id_link_input))
     Config.BuildIdLinkInput =
@@ -314,27 +551,72 @@ DriverConfig parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
         InputArgs.getLastArgValue(OBJCOPY_build_id_link_output);
   Config.SplitDWO = InputArgs.getLastArgValue(OBJCOPY_split_dwo);
   Config.SymbolsPrefix = InputArgs.getLastArgValue(OBJCOPY_prefix_symbols);
+  Config.AllocSectionsPrefix =
+      InputArgs.getLastArgValue(OBJCOPY_prefix_alloc_sections);
+  if (auto Arg = InputArgs.getLastArg(OBJCOPY_extract_partition))
+    Config.ExtractPartition = Arg->getValue();
 
   for (auto Arg : InputArgs.filtered(OBJCOPY_redefine_symbol)) {
     if (!StringRef(Arg->getValue()).contains('='))
-      error("Bad format for --redefine-sym");
+      return createStringError(errc::invalid_argument,
+                               "bad format for --redefine-sym");
     auto Old2New = StringRef(Arg->getValue()).split('=');
     if (!Config.SymbolsToRename.insert(Old2New).second)
-      error("Multiple redefinition of symbol " + Old2New.first);
+      return createStringError(errc::invalid_argument,
+                               "multiple redefinition of symbol '%s'",
+                               Old2New.first.str().c_str());
   }
 
+  for (auto Arg : InputArgs.filtered(OBJCOPY_redefine_symbols))
+    if (Error E = addSymbolsToRenameFromFile(Config.SymbolsToRename, DC.Alloc,
+                                             Arg->getValue()))
+      return std::move(E);
+
   for (auto Arg : InputArgs.filtered(OBJCOPY_rename_section)) {
-    SectionRename SR = parseRenameSectionValue(StringRef(Arg->getValue()));
-    if (!Config.SectionsToRename.try_emplace(SR.OriginalName, SR).second)
-      error("Multiple renames of section " + SR.OriginalName);
+    Expected<SectionRename> SR =
+        parseRenameSectionValue(StringRef(Arg->getValue()));
+    if (!SR)
+      return SR.takeError();
+    if (!Config.SectionsToRename.try_emplace(SR->OriginalName, *SR).second)
+      return createStringError(errc::invalid_argument,
+                               "multiple renames of section '%s'",
+                               SR->OriginalName.str().c_str());
+  }
+  for (auto Arg : InputArgs.filtered(OBJCOPY_set_section_flags)) {
+    Expected<SectionFlagsUpdate> SFU =
+        parseSetSectionFlagValue(Arg->getValue());
+    if (!SFU)
+      return SFU.takeError();
+    if (!Config.SetSectionFlags.try_emplace(SFU->Name, *SFU).second)
+      return createStringError(
+          errc::invalid_argument,
+          "--set-section-flags set multiple times for section '%s'",
+          SFU->Name.str().c_str());
+  }
+  // Prohibit combinations of --set-section-flags when the section name is used
+  // by --rename-section, either as a source or a destination.
+  for (const auto &E : Config.SectionsToRename) {
+    const SectionRename &SR = E.second;
+    if (Config.SetSectionFlags.count(SR.OriginalName))
+      return createStringError(
+          errc::invalid_argument,
+          "--set-section-flags=%s conflicts with --rename-section=%s=%s",
+          SR.OriginalName.str().c_str(), SR.OriginalName.str().c_str(),
+          SR.NewName.str().c_str());
+    if (Config.SetSectionFlags.count(SR.NewName))
+      return createStringError(
+          errc::invalid_argument,
+          "--set-section-flags=%s conflicts with --rename-section=%s=%s",
+          SR.NewName.str().c_str(), SR.OriginalName.str().c_str(),
+          SR.NewName.str().c_str());
   }
 
   for (auto Arg : InputArgs.filtered(OBJCOPY_remove_section))
-    Config.ToRemove.push_back(Arg->getValue());
+    Config.ToRemove.emplace_back(Arg->getValue(), UseRegex);
   for (auto Arg : InputArgs.filtered(OBJCOPY_keep_section))
-    Config.KeepSection.push_back(Arg->getValue());
+    Config.KeepSection.emplace_back(Arg->getValue(), UseRegex);
   for (auto Arg : InputArgs.filtered(OBJCOPY_only_section))
-    Config.OnlySection.push_back(Arg->getValue());
+    Config.OnlySection.emplace_back(Arg->getValue(), UseRegex);
   for (auto Arg : InputArgs.filtered(OBJCOPY_add_section))
     Config.AddSection.push_back(Arg->getValue());
   for (auto Arg : InputArgs.filtered(OBJCOPY_dump_section))
@@ -347,27 +629,71 @@ DriverConfig parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
   Config.StripNonAlloc = InputArgs.hasArg(OBJCOPY_strip_non_alloc);
   Config.StripUnneeded = InputArgs.hasArg(OBJCOPY_strip_unneeded);
   Config.ExtractDWO = InputArgs.hasArg(OBJCOPY_extract_dwo);
+  Config.ExtractMainPartition =
+      InputArgs.hasArg(OBJCOPY_extract_main_partition);
   Config.LocalizeHidden = InputArgs.hasArg(OBJCOPY_localize_hidden);
   Config.Weaken = InputArgs.hasArg(OBJCOPY_weaken);
-  Config.DiscardAll = InputArgs.hasArg(OBJCOPY_discard_all);
+  if (InputArgs.hasArg(OBJCOPY_discard_all, OBJCOPY_discard_locals))
+    Config.DiscardMode =
+        InputArgs.hasFlag(OBJCOPY_discard_all, OBJCOPY_discard_locals)
+            ? DiscardType::All
+            : DiscardType::Locals;
   Config.OnlyKeepDebug = InputArgs.hasArg(OBJCOPY_only_keep_debug);
   Config.KeepFileSymbols = InputArgs.hasArg(OBJCOPY_keep_file_symbols);
   Config.DecompressDebugSections =
       InputArgs.hasArg(OBJCOPY_decompress_debug_sections);
+  if (Config.DiscardMode == DiscardType::All)
+    Config.StripDebug = true;
   for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbol))
-    Config.SymbolsToLocalize.push_back(Arg->getValue());
+    Config.SymbolsToLocalize.emplace_back(Arg->getValue(), UseRegex);
+  for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbols))
+    if (Error E = addSymbolsFromFile(Config.SymbolsToLocalize, DC.Alloc,
+                                     Arg->getValue(), UseRegex))
+      return std::move(E);
   for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbol))
-    Config.SymbolsToKeepGlobal.push_back(Arg->getValue());
+    Config.SymbolsToKeepGlobal.emplace_back(Arg->getValue(), UseRegex);
   for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbols))
-    addGlobalSymbolsFromFile(Config.SymbolsToKeepGlobal, Arg->getValue());
+    if (Error E = addSymbolsFromFile(Config.SymbolsToKeepGlobal, DC.Alloc,
+                                     Arg->getValue(), UseRegex))
+      return std::move(E);
   for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbol))
-    Config.SymbolsToGlobalize.push_back(Arg->getValue());
+    Config.SymbolsToGlobalize.emplace_back(Arg->getValue(), UseRegex);
+  for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbols))
+    if (Error E = addSymbolsFromFile(Config.SymbolsToGlobalize, DC.Alloc,
+                                     Arg->getValue(), UseRegex))
+      return std::move(E);
   for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbol))
-    Config.SymbolsToWeaken.push_back(Arg->getValue());
+    Config.SymbolsToWeaken.emplace_back(Arg->getValue(), UseRegex);
+  for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbols))
+    if (Error E = addSymbolsFromFile(Config.SymbolsToWeaken, DC.Alloc,
+                                     Arg->getValue(), UseRegex))
+      return std::move(E);
   for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbol))
-    Config.SymbolsToRemove.push_back(Arg->getValue());
+    Config.SymbolsToRemove.emplace_back(Arg->getValue(), UseRegex);
+  for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbols))
+    if (Error E = addSymbolsFromFile(Config.SymbolsToRemove, DC.Alloc,
+                                     Arg->getValue(), UseRegex))
+      return std::move(E);
+  for (auto Arg : InputArgs.filtered(OBJCOPY_strip_unneeded_symbol))
+    Config.UnneededSymbolsToRemove.emplace_back(Arg->getValue(), UseRegex);
+  for (auto Arg : InputArgs.filtered(OBJCOPY_strip_unneeded_symbols))
+    if (Error E = addSymbolsFromFile(Config.UnneededSymbolsToRemove, DC.Alloc,
+                                     Arg->getValue(), UseRegex))
+      return std::move(E);
   for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbol))
-    Config.SymbolsToKeep.push_back(Arg->getValue());
+    Config.SymbolsToKeep.emplace_back(Arg->getValue(), UseRegex);
+  for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbols))
+    if (Error E = addSymbolsFromFile(Config.SymbolsToKeep, DC.Alloc,
+                                     Arg->getValue(), UseRegex))
+      return std::move(E);
+  for (auto Arg : InputArgs.filtered(OBJCOPY_add_symbol)) {
+    Expected<NewSymbolInfo> NSI = parseNewSymbolInfo(Arg->getValue());
+    if (!NSI)
+      return NSI.takeError();
+    Config.SymbolsToAdd.push_back(*NSI);
+  }
+
+  Config.AllowBrokenLinks = InputArgs.hasArg(OBJCOPY_allow_broken_links);
 
   Config.DeterministicArchives = InputArgs.hasFlag(
       OBJCOPY_enable_deterministic_archives,
@@ -375,24 +701,60 @@ DriverConfig parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
 
   Config.PreserveDates = InputArgs.hasArg(OBJCOPY_preserve_dates);
 
+  if (Config.PreserveDates &&
+      (Config.OutputFilename == "-" || Config.InputFilename == "-"))
+    return createStringError(errc::invalid_argument,
+                             "--preserve-dates requires a file");
+
+  for (auto Arg : InputArgs)
+    if (Arg->getOption().matches(OBJCOPY_set_start)) {
+      auto EAddr = getAsInteger<uint64_t>(Arg->getValue());
+      if (!EAddr)
+        return createStringError(
+            EAddr.getError(), "bad entry point address: '%s'", Arg->getValue());
+
+      Config.EntryExpr = [EAddr](uint64_t) { return *EAddr; };
+    } else if (Arg->getOption().matches(OBJCOPY_change_start)) {
+      auto EIncr = getAsInteger<int64_t>(Arg->getValue());
+      if (!EIncr)
+        return createStringError(EIncr.getError(),
+                                 "bad entry point increment: '%s'",
+                                 Arg->getValue());
+      auto Expr = Config.EntryExpr ? std::move(Config.EntryExpr)
+                                   : [](uint64_t A) { return A; };
+      Config.EntryExpr = [Expr, EIncr](uint64_t EAddr) {
+        return Expr(EAddr) + *EIncr;
+      };
+    }
+
   if (Config.DecompressDebugSections &&
       Config.CompressionType != DebugCompressionType::None) {
-    error("Cannot specify --compress-debug-sections at the same time as "
-          "--decompress-debug-sections at the same time");
+    return createStringError(
+        errc::invalid_argument,
+        "cannot specify both --compress-debug-sections and "
+        "--decompress-debug-sections");
   }
 
   if (Config.DecompressDebugSections && !zlib::isAvailable())
-    error("LLVM was not compiled with LLVM_ENABLE_ZLIB: cannot decompress.");
+    return createStringError(
+        errc::invalid_argument,
+        "LLVM was not compiled with LLVM_ENABLE_ZLIB: cannot decompress");
+
+  if (Config.ExtractPartition && Config.ExtractMainPartition)
+    return createStringError(errc::invalid_argument,
+                             "cannot specify --extract-partition together with "
+                             "--extract-main-partition");
 
-  DriverConfig DC;
   DC.CopyConfigs.push_back(std::move(Config));
-  return DC;
+  return std::move(DC);
 }
 
 // ParseStripOptions returns the config and sets the input arguments. If a
 // help flag is set then ParseStripOptions will print the help messege and
 // exit.
-DriverConfig parseStripOptions(ArrayRef<const char *> ArgsArr) {
+Expected<DriverConfig>
+parseStripOptions(ArrayRef<const char *> ArgsArr,
+                  std::function<Error(Error)> ErrorCallback) {
   StripOptTable T;
   unsigned MissingArgumentIndex, MissingArgumentCount;
   llvm::opt::InputArgList InputArgs =
@@ -414,44 +776,65 @@ DriverConfig parseStripOptions(ArrayRef<const char *> ArgsArr) {
     exit(0);
   }
 
-  SmallVector<const char *, 2> Positional;
+  SmallVector<StringRef, 2> Positional;
   for (auto Arg : InputArgs.filtered(STRIP_UNKNOWN))
-    error("unknown argument '" + Arg->getAsString(InputArgs) + "'");
+    return createStringError(errc::invalid_argument, "unknown argument '%s'",
+                             Arg->getAsString(InputArgs).c_str());
   for (auto Arg : InputArgs.filtered(STRIP_INPUT))
     Positional.push_back(Arg->getValue());
 
   if (Positional.empty())
-    error("No input file specified");
+    return createStringError(errc::invalid_argument, "no input file specified");
 
   if (Positional.size() > 1 && InputArgs.hasArg(STRIP_output))
-    error("Multiple input files cannot be used in combination with -o");
+    return createStringError(
+        errc::invalid_argument,
+        "multiple input files cannot be used in combination with -o");
 
   CopyConfig Config;
+  bool UseRegexp = InputArgs.hasArg(STRIP_regex);
+  Config.AllowBrokenLinks = InputArgs.hasArg(STRIP_allow_broken_links);
   Config.StripDebug = InputArgs.hasArg(STRIP_strip_debug);
 
-  Config.DiscardAll = InputArgs.hasArg(STRIP_discard_all);
+  if (InputArgs.hasArg(STRIP_discard_all, STRIP_discard_locals))
+    Config.DiscardMode =
+        InputArgs.hasFlag(STRIP_discard_all, STRIP_discard_locals)
+            ? DiscardType::All
+            : DiscardType::Locals;
   Config.StripUnneeded = InputArgs.hasArg(STRIP_strip_unneeded);
-  Config.StripAll = InputArgs.hasArg(STRIP_strip_all);
+  if (auto Arg = InputArgs.getLastArg(STRIP_strip_all, STRIP_no_strip_all))
+    Config.StripAll = Arg->getOption().getID() == STRIP_strip_all;
   Config.StripAllGNU = InputArgs.hasArg(STRIP_strip_all_gnu);
-
-  if (!Config.StripDebug && !Config.StripUnneeded && !Config.DiscardAll &&
-      !Config.StripAllGNU)
-    Config.StripAll = true;
+  Config.OnlyKeepDebug = InputArgs.hasArg(STRIP_only_keep_debug);
+  Config.KeepFileSymbols = InputArgs.hasArg(STRIP_keep_file_symbols);
 
   for (auto Arg : InputArgs.filtered(STRIP_keep_section))
-    Config.KeepSection.push_back(Arg->getValue());
+    Config.KeepSection.emplace_back(Arg->getValue(), UseRegexp);
 
   for (auto Arg : InputArgs.filtered(STRIP_remove_section))
-    Config.ToRemove.push_back(Arg->getValue());
+    Config.ToRemove.emplace_back(Arg->getValue(), UseRegexp);
+
+  for (auto Arg : InputArgs.filtered(STRIP_strip_symbol))
+    Config.SymbolsToRemove.emplace_back(Arg->getValue(), UseRegexp);
 
   for (auto Arg : InputArgs.filtered(STRIP_keep_symbol))
-    Config.SymbolsToKeep.push_back(Arg->getValue());
+    Config.SymbolsToKeep.emplace_back(Arg->getValue(), UseRegexp);
+
+  if (!InputArgs.hasArg(STRIP_no_strip_all) && !Config.StripDebug &&
+      !Config.StripUnneeded && Config.DiscardMode == DiscardType::None &&
+      !Config.StripAllGNU && Config.SymbolsToRemove.empty())
+    Config.StripAll = true;
+
+  if (Config.DiscardMode == DiscardType::All)
+    Config.StripDebug = true;
 
   Config.DeterministicArchives =
       InputArgs.hasFlag(STRIP_enable_deterministic_archives,
                         STRIP_disable_deterministic_archives, /*default=*/true);
 
   Config.PreserveDates = InputArgs.hasArg(STRIP_preserve_dates);
+  Config.InputFormat = FileFormat::Unspecified;
+  Config.OutputFormat = FileFormat::Unspecified;
 
   DriverConfig DC;
   if (Positional.size() == 1) {
@@ -460,14 +843,30 @@ DriverConfig parseStripOptions(ArrayRef<const char *> ArgsArr) {
         InputArgs.getLastArgValue(STRIP_output, Positional[0]);
     DC.CopyConfigs.push_back(std::move(Config));
   } else {
-    for (const char *Filename : Positional) {
+    StringMap<unsigned> InputFiles;
+    for (StringRef Filename : Positional) {
+      if (InputFiles[Filename]++ == 1) {
+        if (Filename == "-")
+          return createStringError(
+              errc::invalid_argument,
+              "cannot specify '-' as an input file more than once");
+        if (Error E = ErrorCallback(createStringError(
+                errc::invalid_argument, "'%s' was already specified",
+                Filename.str().c_str())))
+          return std::move(E);
+      }
       Config.InputFilename = Filename;
       Config.OutputFilename = Filename;
       DC.CopyConfigs.push_back(Config);
     }
   }
 
-  return DC;
+  if (Config.PreserveDates && (is_contained(Positional, "-") ||
+                               InputArgs.getLastArgValue(STRIP_output) == "-"))
+    return createStringError(errc::invalid_argument,
+                             "--preserve-dates requires a file");
+
+  return std::move(DC);
 }
 
 } // namespace objcopy
diff --git a/tools/llvm-objcopy/CopyConfig.h b/tools/llvm-objcopy/CopyConfig.h
index 71a2423ae1c8..aff3631a487c 100644
--- a/tools/llvm-objcopy/CopyConfig.h
+++ b/tools/llvm-objcopy/CopyConfig.h
@@ -1,9 +1,8 @@
 //===- CopyConfig.h -------------------------------------------------------===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -11,40 +10,110 @@
 #define LLVM_TOOLS_LLVM_OBJCOPY_COPY_CONFIG_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Object/ELFTypes.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/Regex.h"
 // Necessary for llvm::DebugCompressionType::None
 #include "llvm/Target/TargetOptions.h"
-#include <string>
 #include <vector>
 
 namespace llvm {
 namespace objcopy {
 
+enum class FileFormat {
+  Unspecified,
+  ELF,
+  Binary,
+  IHex,
+};
+
 // This type keeps track of the machine info for various architectures. This
 // lets us map architecture names to ELF types and the e_machine value of the
 // ELF file.
 struct MachineInfo {
+  MachineInfo(uint16_t EM, uint8_t ABI, bool Is64, bool IsLittle)
+      : EMachine(EM), OSABI(ABI), Is64Bit(Is64), IsLittleEndian(IsLittle) {}
+  // Alternative constructor that defaults to NONE for OSABI.
+  MachineInfo(uint16_t EM, bool Is64, bool IsLittle)
+      : MachineInfo(EM, ELF::ELFOSABI_NONE, Is64, IsLittle) {}
+  // Default constructor for unset fields.
+  MachineInfo() : MachineInfo(0, 0, false, false) {}
   uint16_t EMachine;
+  uint8_t OSABI;
   bool Is64Bit;
   bool IsLittleEndian;
 };
 
+// Flags set by --set-section-flags or --rename-section. Interpretation of these
+// is format-specific and not all flags are meaningful for all object file
+// formats. This is a bitmask; many section flags may be set.
+enum SectionFlag {
+  SecNone = 0,
+  SecAlloc = 1 << 0,
+  SecLoad = 1 << 1,
+  SecNoload = 1 << 2,
+  SecReadonly = 1 << 3,
+  SecDebug = 1 << 4,
+  SecCode = 1 << 5,
+  SecData = 1 << 6,
+  SecRom = 1 << 7,
+  SecMerge = 1 << 8,
+  SecStrings = 1 << 9,
+  SecContents = 1 << 10,
+  SecShare = 1 << 11,
+  LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ SecShare)
+};
+
 struct SectionRename {
   StringRef OriginalName;
   StringRef NewName;
-  Optional<uint64_t> NewFlags;
+  Optional<SectionFlag> NewFlags;
+};
+
+struct SectionFlagsUpdate {
+  StringRef Name;
+  SectionFlag NewFlags;
+};
+
+enum class DiscardType {
+  None,   // Default
+  All,    // --discard-all (-x)
+  Locals, // --discard-locals (-X)
+};
+
+class NameOrRegex {
+  StringRef Name;
+  // Regex is shared between multiple CopyConfig instances.
+  std::shared_ptr<Regex> R;
+
+public:
+  NameOrRegex(StringRef Pattern, bool IsRegex);
+  bool operator==(StringRef S) const { return R ? R->match(S) : Name == S; }
+  bool operator!=(StringRef S) const { return !operator==(S); }
+};
+
+struct NewSymbolInfo {
+  StringRef SymbolName;
+  StringRef SectionName;
+  uint64_t Value = 0;
+  uint8_t Type = ELF::STT_NOTYPE;
+  uint8_t Bind = ELF::STB_GLOBAL;
+  uint8_t Visibility = ELF::STV_DEFAULT;
 };
 
 // Configuration for copying/stripping a single file.
 struct CopyConfig {
   // Main input/output options
   StringRef InputFilename;
-  StringRef InputFormat;
+  FileFormat InputFormat;
   StringRef OutputFilename;
-  StringRef OutputFormat;
+  FileFormat OutputFormat;
 
   // Only applicable for --input-format=binary
   MachineInfo BinaryArch;
@@ -53,33 +122,48 @@ struct CopyConfig {
 
   // Advanced options
   StringRef AddGnuDebugLink;
+  // Cached gnu_debuglink's target CRC
+  uint32_t GnuDebugLinkCRC32;
   StringRef BuildIdLinkDir;
   Optional<StringRef> BuildIdLinkInput;
   Optional<StringRef> BuildIdLinkOutput;
+  Optional<StringRef> ExtractPartition;
   StringRef SplitDWO;
   StringRef SymbolsPrefix;
+  StringRef AllocSectionsPrefix;
+  DiscardType DiscardMode = DiscardType::None;
 
   // Repeated options
   std::vector<StringRef> AddSection;
   std::vector<StringRef> DumpSection;
-  std::vector<StringRef> KeepSection;
-  std::vector<StringRef> OnlySection;
-  std::vector<StringRef> SymbolsToGlobalize;
-  std::vector<StringRef> SymbolsToKeep;
-  std::vector<StringRef> SymbolsToLocalize;
-  std::vector<StringRef> SymbolsToRemove;
-  std::vector<StringRef> SymbolsToWeaken;
-  std::vector<StringRef> ToRemove;
-  std::vector<std::string> SymbolsToKeepGlobal;
+  std::vector<NewSymbolInfo> SymbolsToAdd;
+  std::vector<NameOrRegex> KeepSection;
+  std::vector<NameOrRegex> OnlySection;
+  std::vector<NameOrRegex> SymbolsToGlobalize;
+  std::vector<NameOrRegex> SymbolsToKeep;
+  std::vector<NameOrRegex> SymbolsToLocalize;
+  std::vector<NameOrRegex> SymbolsToRemove;
+  std::vector<NameOrRegex> UnneededSymbolsToRemove;
+  std::vector<NameOrRegex> SymbolsToWeaken;
+  std::vector<NameOrRegex> ToRemove;
+  std::vector<NameOrRegex> SymbolsToKeepGlobal;
 
   // Map options
   StringMap<SectionRename> SectionsToRename;
+  StringMap<SectionFlagsUpdate> SetSectionFlags;
   StringMap<StringRef> SymbolsToRename;
 
+  // ELF entry point address expression. The input parameter is an entry point
+  // address in the input ELF file. The entry address in the output file is
+  // calculated with EntryExpr(input_address), when either --set-start or
+  // --change-start is used.
+  std::function<uint64_t(uint64_t)> EntryExpr;
+
   // Boolean options
+  bool AllowBrokenLinks = false;
   bool DeterministicArchives = true;
-  bool DiscardAll = false;
   bool ExtractDWO = false;
+  bool ExtractMainPartition = false;
   bool KeepFileSymbols = false;
   bool LocalizeHidden = false;
   bool OnlyKeepDebug = false;
@@ -101,17 +185,21 @@ struct CopyConfig {
 // will contain one or more CopyConfigs.
 struct DriverConfig {
   SmallVector<CopyConfig, 1> CopyConfigs;
+  BumpPtrAllocator Alloc;
 };
 
 // ParseObjcopyOptions returns the config and sets the input arguments. If a
 // help flag is set then ParseObjcopyOptions will print the help messege and
 // exit.
-DriverConfig parseObjcopyOptions(ArrayRef<const char *> ArgsArr);
+Expected<DriverConfig> parseObjcopyOptions(ArrayRef<const char *> ArgsArr);
 
 // ParseStripOptions returns the config and sets the input arguments. If a
 // help flag is set then ParseStripOptions will print the help messege and
-// exit.
-DriverConfig parseStripOptions(ArrayRef<const char *> ArgsArr);
+// exit. ErrorCallback is used to handle recoverable errors. An Error returned
+// by the callback aborts the parsing and is then returned by this function.
+Expected<DriverConfig>
+parseStripOptions(ArrayRef<const char *> ArgsArr,
+                  std::function<Error(Error)> ErrorCallback);
 
 } // namespace objcopy
 } // namespace llvm
diff --git a/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
index f5ab8e708267..b366c6e55987 100644
--- a/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
+++ b/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
@@ -1,9 +1,8 @@
 //===- ELFObjcopy.cpp -----------------------------------------------------===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -14,6 +13,7 @@
 #include "llvm-objcopy.h"
 
 #include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
@@ -71,6 +71,44 @@ static bool onlyKeepDWOPred(const Object &Obj, const SectionBase &Sec) {
   return !isDWOSection(Sec);
 }
 
+uint64_t getNewShfFlags(SectionFlag AllFlags) {
+  uint64_t NewFlags = 0;
+  if (AllFlags & SectionFlag::SecAlloc)
+    NewFlags |= ELF::SHF_ALLOC;
+  if (!(AllFlags & SectionFlag::SecReadonly))
+    NewFlags |= ELF::SHF_WRITE;
+  if (AllFlags & SectionFlag::SecCode)
+    NewFlags |= ELF::SHF_EXECINSTR;
+  if (AllFlags & SectionFlag::SecMerge)
+    NewFlags |= ELF::SHF_MERGE;
+  if (AllFlags & SectionFlag::SecStrings)
+    NewFlags |= ELF::SHF_STRINGS;
+  return NewFlags;
+}
+
+static uint64_t getSectionFlagsPreserveMask(uint64_t OldFlags,
+                                            uint64_t NewFlags) {
+  // Preserve some flags which should not be dropped when setting flags.
+  // Also, preserve anything OS/processor dependant.
+  const uint64_t PreserveMask = ELF::SHF_COMPRESSED | ELF::SHF_EXCLUDE |
+                                ELF::SHF_GROUP | ELF::SHF_LINK_ORDER |
+                                ELF::SHF_MASKOS | ELF::SHF_MASKPROC |
+                                ELF::SHF_TLS | ELF::SHF_INFO_LINK;
+  return (OldFlags & PreserveMask) | (NewFlags & ~PreserveMask);
+}
+
+static void setSectionFlagsAndType(SectionBase &Sec, SectionFlag Flags) {
+  Sec.Flags = getSectionFlagsPreserveMask(Sec.Flags, getNewShfFlags(Flags));
+
+  // In GNU objcopy, certain flags promote SHT_NOBITS to SHT_PROGBITS. This rule
+  // may promote more non-ALLOC sections than GNU objcopy, but it is fine as
+  // non-ALLOC SHT_NOBITS sections do not make much sense.
+  if (Sec.Type == SHT_NOBITS &&
+      (!(Sec.Flags & ELF::SHF_ALLOC) ||
+       Flags & (SectionFlag::SecContents | SectionFlag::SecLoad)))
+    Sec.Type = SHT_PROGBITS;
+}
+
 static ElfType getOutputElfType(const Binary &Bin) {
   // Infer output ELF type from the input ELF object
   if (isa<ELFObjectFile<ELF32LE>>(Bin))
@@ -92,12 +130,9 @@ static ElfType getOutputElfType(const MachineInfo &MI) {
     return MI.IsLittleEndian ? ELFT_ELF32LE : ELFT_ELF32BE;
 }
 
-static std::unique_ptr<Writer> createWriter(const CopyConfig &Config,
-                                            Object &Obj, Buffer &Buf,
-                                            ElfType OutputElfType) {
-  if (Config.OutputFormat == "binary") {
-    return llvm::make_unique<BinaryWriter>(Obj, Buf);
-  }
+static std::unique_ptr<Writer> createELFWriter(const CopyConfig &Config,
+                                               Object &Obj, Buffer &Buf,
+                                               ElfType OutputElfType) {
   // Depending on the initial ELFT and OutputFormat we need a different Writer.
   switch (OutputElfType) {
   case ELFT_ELF32LE:
@@ -116,10 +151,27 @@ static std::unique_ptr<Writer> createWriter(const CopyConfig &Config,
   llvm_unreachable("Invalid output format");
 }
 
+static std::unique_ptr<Writer> createWriter(const CopyConfig &Config,
+                                            Object &Obj, Buffer &Buf,
+                                            ElfType OutputElfType) {
+  switch (Config.OutputFormat) {
+  case FileFormat::Binary:
+    return llvm::make_unique<BinaryWriter>(Obj, Buf);
+  case FileFormat::IHex:
+    return llvm::make_unique<IHexWriter>(Obj, Buf);
+  default:
+    return createELFWriter(Config, Obj, Buf, OutputElfType);
+  }
+}
+
 template <class ELFT>
 static Expected<ArrayRef<uint8_t>>
-findBuildID(const object::ELFFile<ELFT> &In) {
-  for (const auto &Phdr : unwrapOrError(In.program_headers())) {
+findBuildID(const CopyConfig &Config, const object::ELFFile<ELFT> &In) {
+  auto PhdrsOrErr = In.program_headers();
+  if (auto Err = PhdrsOrErr.takeError())
+    return createFileError(Config.InputFilename, std::move(Err));
+
+  for (const auto &Phdr : *PhdrsOrErr) {
     if (Phdr.p_type != PT_NOTE)
       continue;
     Error Err = Error::success();
@@ -127,58 +179,106 @@ findBuildID(const object::ELFFile<ELFT> &In) {
       if (Note.getType() == NT_GNU_BUILD_ID && Note.getName() == ELF_NOTE_GNU)
         return Note.getDesc();
     if (Err)
-      return std::move(Err);
+      return createFileError(Config.InputFilename, std::move(Err));
   }
-  return createStringError(llvm::errc::invalid_argument,
-                           "Could not find build ID.");
+
+  return createFileError(
+      Config.InputFilename,
+      createStringError(llvm::errc::invalid_argument,
+                        "could not find build ID"));
 }
 
 static Expected<ArrayRef<uint8_t>>
-findBuildID(const object::ELFObjectFileBase &In) {
+findBuildID(const CopyConfig &Config, const object::ELFObjectFileBase &In) {
   if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(&In))
-    return findBuildID(*O->getELFFile());
+    return findBuildID(Config, *O->getELFFile());
   else if (auto *O = dyn_cast<ELFObjectFile<ELF64LE>>(&In))
-    return findBuildID(*O->getELFFile());
+    return findBuildID(Config, *O->getELFFile());
   else if (auto *O = dyn_cast<ELFObjectFile<ELF32BE>>(&In))
-    return findBuildID(*O->getELFFile());
+    return findBuildID(Config, *O->getELFFile());
   else if (auto *O = dyn_cast<ELFObjectFile<ELF64BE>>(&In))
-    return findBuildID(*O->getELFFile());
+    return findBuildID(Config, *O->getELFFile());
 
   llvm_unreachable("Bad file format");
 }
 
-static void linkToBuildIdDir(const CopyConfig &Config, StringRef ToLink,
-                             StringRef Suffix, ArrayRef<uint8_t> BuildIdBytes) {
+template <class... Ts>
+static Error makeStringError(std::error_code EC, const Twine &Msg, Ts &&... Args) {
+  std::string FullMsg = (EC.message() + ": " + Msg).str();
+  return createStringError(EC, FullMsg.c_str(), std::forward<Ts>(Args)...);
+}
+
+#define MODEL_8 "%%%%%%%%"
+#define MODEL_16 MODEL_8 MODEL_8
+#define MODEL_32 (MODEL_16 MODEL_16)
+
+static Error linkToBuildIdDir(const CopyConfig &Config, StringRef ToLink,
+                              StringRef Suffix,
+                              ArrayRef<uint8_t> BuildIdBytes) {
   SmallString<128> Path = Config.BuildIdLinkDir;
   sys::path::append(Path, llvm::toHex(BuildIdBytes[0], /*LowerCase*/ true));
   if (auto EC = sys::fs::create_directories(Path))
-    error("cannot create build ID link directory " + Path + ": " +
-          EC.message());
+    return createFileError(
+        Path.str(),
+        makeStringError(EC, "cannot create build ID link directory"));
 
   sys::path::append(Path,
                     llvm::toHex(BuildIdBytes.slice(1), /*LowerCase*/ true));
   Path += Suffix;
-  if (auto EC = sys::fs::create_hard_link(ToLink, Path)) {
-    // Hard linking failed, try to remove the file first if it exists.
-    if (sys::fs::exists(Path))
-      sys::fs::remove(Path);
-    EC = sys::fs::create_hard_link(ToLink, Path);
-    if (EC)
-      error("cannot link " + ToLink + " to " + Path + ": " + EC.message());
+  SmallString<128> TmpPath;
+  // create_hard_link races so we need to link to a temporary path but
+  // we want to make sure that we choose a filename that does not exist.
+  // By using 32 model characters we get 128-bits of entropy. It is
+  // unlikely that this string has ever existed before much less exists
+  // on this disk or in the current working directory.
+  // Additionally we prepend the original Path for debugging but also
+  // because it ensures that we're linking within a directory on the same
+  // partition on the same device which is critical. It has the added
+  // win of yet further decreasing the odds of a conflict.
+  sys::fs::createUniquePath(Twine(Path) + "-" + MODEL_32 + ".tmp", TmpPath,
+                            /*MakeAbsolute*/ false);
+  if (auto EC = sys::fs::create_hard_link(ToLink, TmpPath)) {
+    Path.push_back('\0');
+    return makeStringError(EC, "cannot link '%s' to '%s'", ToLink.data(),
+                           Path.data());
+  }
+  // We then atomically rename the link into place which will just move the
+  // link. If rename fails something is more seriously wrong so just return
+  // an error.
+  if (auto EC = sys::fs::rename(TmpPath, Path)) {
+    Path.push_back('\0');
+    return makeStringError(EC, "cannot link '%s' to '%s'", ToLink.data(),
+                           Path.data());
+  }
+  // If `Path` was already a hard-link to the same underlying file then the
+  // temp file will be left so we need to remove it. Remove will not cause
+  // an error by default if the file is already gone so just blindly remove
+  // it rather than checking.
+  if (auto EC = sys::fs::remove(TmpPath)) {
+    TmpPath.push_back('\0');
+    return makeStringError(EC, "could not remove '%s'", TmpPath.data());
   }
+  return Error::success();
 }
 
-static void splitDWOToFile(const CopyConfig &Config, const Reader &Reader,
-                           StringRef File, ElfType OutputElfType) {
+static Error splitDWOToFile(const CopyConfig &Config, const Reader &Reader,
+                            StringRef File, ElfType OutputElfType) {
   auto DWOFile = Reader.create();
-  DWOFile->removeSections(
-      [&](const SectionBase &Sec) { return onlyKeepDWOPred(*DWOFile, Sec); });
-  if (Config.OutputArch)
+  auto OnlyKeepDWOPred = [&DWOFile](const SectionBase &Sec) {
+    return onlyKeepDWOPred(*DWOFile, Sec);
+  };
+  if (Error E = DWOFile->removeSections(Config.AllowBrokenLinks, 
+                                        OnlyKeepDWOPred))
+    return E;
+  if (Config.OutputArch) {
     DWOFile->Machine = Config.OutputArch.getValue().EMachine;
+    DWOFile->OSABI = Config.OutputArch.getValue().OSABI;
+  }
   FileBuffer FB(File);
   auto Writer = createWriter(Config, *DWOFile, FB, OutputElfType);
-  Writer->finalize();
-  Writer->write();
+  if (Error E = Writer->finalize())
+    return E;
+  return Writer->write();
 }
 
 static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
@@ -186,9 +286,9 @@ static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
   for (auto &Sec : Obj.sections()) {
     if (Sec.Name == SecName) {
       if (Sec.OriginalData.empty())
-        return make_error<StringError>("Can't dump section \"" + SecName +
-                                           "\": it has no contents",
-                                       object_error::parse_failed);
+        return createStringError(object_error::parse_failed,
+                                 "cannot dump section '%s': it has no contents",
+                                 SecName.str().c_str());
       Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
           FileOutputBuffer::create(Filename, Sec.OriginalData.size());
       if (!BufferOrErr)
@@ -201,149 +301,143 @@ static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
       return Error::success();
     }
   }
-  return make_error<StringError>("Section not found",
-                                 object_error::parse_failed);
-}
-
-static bool isCompressed(const SectionBase &Section) {
-  const char *Magic = "ZLIB";
-  return StringRef(Section.Name).startswith(".zdebug") ||
-         (Section.OriginalData.size() > strlen(Magic) &&
-          !strncmp(reinterpret_cast<const char *>(Section.OriginalData.data()),
-                   Magic, strlen(Magic))) ||
-         (Section.Flags & ELF::SHF_COMPRESSED);
+  return createStringError(object_error::parse_failed, "section '%s' not found",
+                           SecName.str().c_str());
 }
 
 static bool isCompressable(const SectionBase &Section) {
-  return !isCompressed(Section) && isDebugSection(Section) &&
-         Section.Name != ".gdb_index";
+  return !(Section.Flags & ELF::SHF_COMPRESSED) &&
+         StringRef(Section.Name).startswith(".debug");
 }
 
 static void replaceDebugSections(
-    const CopyConfig &Config, Object &Obj, SectionPred &RemovePred,
+    Object &Obj, SectionPred &RemovePred,
     function_ref<bool(const SectionBase &)> shouldReplace,
     function_ref<SectionBase *(const SectionBase *)> addSection) {
+  // Build a list of the debug sections we are going to replace.
+  // We can't call `addSection` while iterating over sections,
+  // because it would mutate the sections array.
   SmallVector<SectionBase *, 13> ToReplace;
-  SmallVector<RelocationSection *, 13> RelocationSections;
-  for (auto &Sec : Obj.sections()) {
-    if (RelocationSection *R = dyn_cast<RelocationSection>(&Sec)) {
-      if (shouldReplace(*R->getSection()))
-        RelocationSections.push_back(R);
-      continue;
-    }
-
+  for (auto &Sec : Obj.sections())
     if (shouldReplace(Sec))
       ToReplace.push_back(&Sec);
-  }
 
-  for (SectionBase *S : ToReplace) {
-    SectionBase *NewSection = addSection(S);
+  // Build a mapping from original section to a new one.
+  DenseMap<SectionBase *, SectionBase *> FromTo;
+  for (SectionBase *S : ToReplace)
+    FromTo[S] = addSection(S);
 
-    for (RelocationSection *RS : RelocationSections) {
-      if (RS->getSection() == S)
-        RS->setSection(NewSection);
-    }
-  }
+  // Now we want to update the target sections of relocation
+  // sections. Also we will update the relocations themselves
+  // to update the symbol references.
+  for (auto &Sec : Obj.sections())
+    Sec.replaceSectionReferences(FromTo);
 
   RemovePred = [shouldReplace, RemovePred](const SectionBase &Sec) {
     return shouldReplace(Sec) || RemovePred(Sec);
   };
 }
 
-// This function handles the high level operations of GNU objcopy including
-// handling command line options. It's important to outline certain properties
-// we expect to hold of the command line operations. Any operation that "keeps"
-// should keep regardless of a remove. Additionally any removal should respect
-// any previous removals. Lastly whether or not something is removed shouldn't
-// depend a) on the order the options occur in or b) on some opaque priority
-// system. The only priority is that keeps/copies overrule removes.
-static void handleArgs(const CopyConfig &Config, Object &Obj,
-                       const Reader &Reader, ElfType OutputElfType) {
-
-  if (!Config.SplitDWO.empty()) {
-    splitDWOToFile(Config, Reader, Config.SplitDWO, OutputElfType);
-  }
-  if (Config.OutputArch)
-    Obj.Machine = Config.OutputArch.getValue().EMachine;
+static bool isUnneededSymbol(const Symbol &Sym) {
+  return !Sym.Referenced &&
+         (Sym.Binding == STB_LOCAL || Sym.getShndx() == SHN_UNDEF) &&
+         Sym.Type != STT_SECTION;
+}
 
+static Error updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) {
   // TODO: update or remove symbols only if there is an option that affects
   // them.
-  if (Obj.SymbolTable) {
-    Obj.SymbolTable->updateSymbols([&](Symbol &Sym) {
-      if (!Sym.isCommon() &&
-          ((Config.LocalizeHidden &&
-            (Sym.Visibility == STV_HIDDEN || Sym.Visibility == STV_INTERNAL)) ||
-           is_contained(Config.SymbolsToLocalize, Sym.Name)))
-        Sym.Binding = STB_LOCAL;
-
-      // Note: these two globalize flags have very similar names but different
-      // meanings:
-      //
-      // --globalize-symbol: promote a symbol to global
-      // --keep-global-symbol: all symbols except for these should be made local
-      //
-      // If --globalize-symbol is specified for a given symbol, it will be
-      // global in the output file even if it is not included via
-      // --keep-global-symbol. Because of that, make sure to check
-      // --globalize-symbol second.
-      if (!Config.SymbolsToKeepGlobal.empty() &&
-          !is_contained(Config.SymbolsToKeepGlobal, Sym.Name) &&
-          Sym.getShndx() != SHN_UNDEF)
-        Sym.Binding = STB_LOCAL;
-
-      if (is_contained(Config.SymbolsToGlobalize, Sym.Name) &&
-          Sym.getShndx() != SHN_UNDEF)
-        Sym.Binding = STB_GLOBAL;
-
-      if (is_contained(Config.SymbolsToWeaken, Sym.Name) &&
-          Sym.Binding == STB_GLOBAL)
-        Sym.Binding = STB_WEAK;
-
-      if (Config.Weaken && Sym.Binding == STB_GLOBAL &&
-          Sym.getShndx() != SHN_UNDEF)
-        Sym.Binding = STB_WEAK;
-
-      const auto I = Config.SymbolsToRename.find(Sym.Name);
-      if (I != Config.SymbolsToRename.end())
-        Sym.Name = I->getValue();
-
-      if (!Config.SymbolsPrefix.empty() && Sym.Type != STT_SECTION)
-        Sym.Name = (Config.SymbolsPrefix + Sym.Name).str();
-    });
-
-    // The purpose of this loop is to mark symbols referenced by sections
-    // (like GroupSection or RelocationSection). This way, we know which
-    // symbols are still 'needed' and which are not.
-    if (Config.StripUnneeded) {
-      for (auto &Section : Obj.sections())
-        Section.markSymbols();
-    }
+  if (!Obj.SymbolTable)
+    return Error::success();
+
+  Obj.SymbolTable->updateSymbols([&](Symbol &Sym) {
+    // Common and undefined symbols don't make sense as local symbols, and can
+    // even cause crashes if we localize those, so skip them.
+    if (!Sym.isCommon() && Sym.getShndx() != SHN_UNDEF &&
+        ((Config.LocalizeHidden &&
+          (Sym.Visibility == STV_HIDDEN || Sym.Visibility == STV_INTERNAL)) ||
+         is_contained(Config.SymbolsToLocalize, Sym.Name)))
+      Sym.Binding = STB_LOCAL;
+
+    // Note: these two globalize flags have very similar names but different
+    // meanings:
+    //
+    // --globalize-symbol: promote a symbol to global
+    // --keep-global-symbol: all symbols except for these should be made local
+    //
+    // If --globalize-symbol is specified for a given symbol, it will be
+    // global in the output file even if it is not included via
+    // --keep-global-symbol. Because of that, make sure to check
+    // --globalize-symbol second.
+    if (!Config.SymbolsToKeepGlobal.empty() &&
+        !is_contained(Config.SymbolsToKeepGlobal, Sym.Name) &&
+        Sym.getShndx() != SHN_UNDEF)
+      Sym.Binding = STB_LOCAL;
+
+    if (is_contained(Config.SymbolsToGlobalize, Sym.Name) &&
+        Sym.getShndx() != SHN_UNDEF)
+      Sym.Binding = STB_GLOBAL;
+
+    if (is_contained(Config.SymbolsToWeaken, Sym.Name) &&
+        Sym.Binding == STB_GLOBAL)
+      Sym.Binding = STB_WEAK;
+
+    if (Config.Weaken && Sym.Binding == STB_GLOBAL &&
+        Sym.getShndx() != SHN_UNDEF)
+      Sym.Binding = STB_WEAK;
+
+    const auto I = Config.SymbolsToRename.find(Sym.Name);
+    if (I != Config.SymbolsToRename.end())
+      Sym.Name = I->getValue();
+
+    if (!Config.SymbolsPrefix.empty() && Sym.Type != STT_SECTION)
+      Sym.Name = (Config.SymbolsPrefix + Sym.Name).str();
+  });
+
+  // The purpose of this loop is to mark symbols referenced by sections
+  // (like GroupSection or RelocationSection). This way, we know which
+  // symbols are still 'needed' and which are not.
+  if (Config.StripUnneeded || !Config.UnneededSymbolsToRemove.empty() ||
+      !Config.OnlySection.empty()) {
+    for (auto &Section : Obj.sections())
+      Section.markSymbols();
+  }
 
-    Obj.removeSymbols([&](const Symbol &Sym) {
-      if (is_contained(Config.SymbolsToKeep, Sym.Name) ||
-          (Config.KeepFileSymbols && Sym.Type == STT_FILE))
-        return false;
+  auto RemoveSymbolsPred = [&](const Symbol &Sym) {
+    if (is_contained(Config.SymbolsToKeep, Sym.Name) ||
+        (Config.KeepFileSymbols && Sym.Type == STT_FILE))
+      return false;
 
-      if (Config.DiscardAll && Sym.Binding == STB_LOCAL &&
-          Sym.getShndx() != SHN_UNDEF && Sym.Type != STT_FILE &&
-          Sym.Type != STT_SECTION)
-        return true;
+    if ((Config.DiscardMode == DiscardType::All ||
+         (Config.DiscardMode == DiscardType::Locals &&
+          StringRef(Sym.Name).startswith(".L"))) &&
+        Sym.Binding == STB_LOCAL && Sym.getShndx() != SHN_UNDEF &&
+        Sym.Type != STT_FILE && Sym.Type != STT_SECTION)
+      return true;
 
-      if (Config.StripAll || Config.StripAllGNU)
-        return true;
+    if (Config.StripAll || Config.StripAllGNU)
+      return true;
 
-      if (is_contained(Config.SymbolsToRemove, Sym.Name))
-        return true;
+    if (is_contained(Config.SymbolsToRemove, Sym.Name))
+      return true;
 
-      if (Config.StripUnneeded && !Sym.Referenced &&
-          (Sym.Binding == STB_LOCAL || Sym.getShndx() == SHN_UNDEF) &&
-          Sym.Type != STT_FILE && Sym.Type != STT_SECTION)
-        return true;
+    if ((Config.StripUnneeded ||
+         is_contained(Config.UnneededSymbolsToRemove, Sym.Name)) &&
+        isUnneededSymbol(Sym))
+      return true;
 
-      return false;
-    });
-  }
+    // We want to remove undefined symbols if all references have been stripped.
+    if (!Config.OnlySection.empty() && !Sym.Referenced &&
+        Sym.getShndx() == SHN_UNDEF)
+      return true;
+
+    return false;
+  };
 
+  return Obj.removeSymbols(RemoveSymbolsPred);
+}
+
+static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) {
   SectionPred RemovePred = [](const SectionBase &) { return false; };
 
   // Removes:
@@ -383,7 +477,7 @@ static void handleArgs(const CopyConfig &Config, Object &Obj,
 
   if (Config.StripSections) {
     RemovePred = [RemovePred](const SectionBase &Sec) {
-      return RemovePred(Sec) || (Sec.Flags & SHF_ALLOC) == 0;
+      return RemovePred(Sec) || Sec.ParentSegment == nullptr;
     };
   }
 
@@ -399,7 +493,7 @@ static void handleArgs(const CopyConfig &Config, Object &Obj,
         return true;
       if (&Sec == Obj.SectionNames)
         return false;
-      return (Sec.Flags & SHF_ALLOC) == 0;
+      return (Sec.Flags & SHF_ALLOC) == 0 && Sec.ParentSegment == nullptr;
     };
 
   if (Config.StripAll)
@@ -410,9 +504,21 @@ static void handleArgs(const CopyConfig &Config, Object &Obj,
         return false;
       if (StringRef(Sec.Name).startswith(".gnu.warning"))
         return false;
+      if (Sec.ParentSegment != nullptr)
+        return false;
       return (Sec.Flags & SHF_ALLOC) == 0;
     };
 
+  if (Config.ExtractPartition || Config.ExtractMainPartition) {
+    RemovePred = [RemovePred](const SectionBase &Sec) {
+      if (RemovePred(Sec))
+        return true;
+      if (Sec.Type == SHT_LLVM_PART_EHDR || Sec.Type == SHT_LLVM_PART_PHDR)
+        return true;
+      return (Sec.Flags & SHF_ALLOC) != 0 && !Sec.ParentSegment;
+    };
+  }
+
   // Explicit copies:
   if (!Config.OnlySection.empty()) {
     RemovePred = [&Config, RemovePred, &Obj](const SectionBase &Sec) {
@@ -461,95 +567,210 @@ static void handleArgs(const CopyConfig &Config, Object &Obj,
   }
 
   if (Config.CompressionType != DebugCompressionType::None)
-    replaceDebugSections(Config, Obj, RemovePred, isCompressable,
+    replaceDebugSections(Obj, RemovePred, isCompressable, 
                          [&Config, &Obj](const SectionBase *S) {
                            return &Obj.addSection<CompressedSection>(
-                               *S, Config.CompressionType);
-                         });
+                                *S, Config.CompressionType);
+                        });
   else if (Config.DecompressDebugSections)
     replaceDebugSections(
-        Config, Obj, RemovePred,
+        Obj, RemovePred,
         [](const SectionBase &S) { return isa<CompressedSection>(&S); },
         [&Obj](const SectionBase *S) {
           auto CS = cast<CompressedSection>(S);
           return &Obj.addSection<DecompressedSection>(*CS);
         });
 
-  Obj.removeSections(RemovePred);
+  return Obj.removeSections(Config.AllowBrokenLinks, RemovePred);
+}
 
-  if (!Config.SectionsToRename.empty()) {
+// This function handles the high level operations of GNU objcopy including
+// handling command line options. It's important to outline certain properties
+// we expect to hold of the command line operations. Any operation that "keeps"
+// should keep regardless of a remove. Additionally any removal should respect
+// any previous removals. Lastly whether or not something is removed shouldn't
+// depend a) on the order the options occur in or b) on some opaque priority
+// system. The only priority is that keeps/copies overrule removes.
+static Error handleArgs(const CopyConfig &Config, Object &Obj,
+                        const Reader &Reader, ElfType OutputElfType) {
+
+  if (!Config.SplitDWO.empty())
+    if (Error E =
+            splitDWOToFile(Config, Reader, Config.SplitDWO, OutputElfType))
+      return E;
+
+  if (Config.OutputArch) {
+    Obj.Machine = Config.OutputArch.getValue().EMachine;
+    Obj.OSABI = Config.OutputArch.getValue().OSABI;
+  }
+
+  // It is important to remove the sections first. For example, we want to
+  // remove the relocation sections before removing the symbols. That allows
+  // us to avoid reporting the inappropriate errors about removing symbols
+  // named in relocations.
+  if (Error E = replaceAndRemoveSections(Config, Obj))
+    return E;
+
+  if (Error E = updateAndRemoveSymbols(Config, Obj))
+    return E;
+
+  if (!Config.SectionsToRename.empty() || !Config.AllocSectionsPrefix.empty()) {
+    DenseSet<SectionBase *> PrefixedSections;
     for (auto &Sec : Obj.sections()) {
       const auto Iter = Config.SectionsToRename.find(Sec.Name);
       if (Iter != Config.SectionsToRename.end()) {
         const SectionRename &SR = Iter->second;
         Sec.Name = SR.NewName;
-        if (SR.NewFlags.hasValue()) {
-          // Preserve some flags which should not be dropped when setting flags.
-          // Also, preserve anything OS/processor dependant.
-          const uint64_t PreserveMask = ELF::SHF_COMPRESSED | ELF::SHF_EXCLUDE |
-                                        ELF::SHF_GROUP | ELF::SHF_LINK_ORDER |
-                                        ELF::SHF_MASKOS | ELF::SHF_MASKPROC |
-                                        ELF::SHF_TLS | ELF::SHF_INFO_LINK;
-          Sec.Flags = (Sec.Flags & PreserveMask) |
-                      (SR.NewFlags.getValue() & ~PreserveMask);
+        if (SR.NewFlags.hasValue())
+          setSectionFlagsAndType(Sec, SR.NewFlags.getValue());
+      }
+
+      // Add a prefix to allocated sections and their relocation sections. This
+      // should be done after renaming the section by Config.SectionToRename to
+      // imitate the GNU objcopy behavior.
+      if (!Config.AllocSectionsPrefix.empty()) {
+        if (Sec.Flags & SHF_ALLOC) {
+          Sec.Name = (Config.AllocSectionsPrefix + Sec.Name).str();
+          PrefixedSections.insert(&Sec);
+
+          // Rename relocation sections associated to the allocated sections.
+          // For example, if we rename .text to .prefix.text, we also rename
+          // .rel.text to .rel.prefix.text.
+          //
+          // Dynamic relocation sections (SHT_REL[A] with SHF_ALLOC) are handled
+          // above, e.g., .rela.plt is renamed to .prefix.rela.plt, not
+          // .rela.prefix.plt since GNU objcopy does so.
+        } else if (auto *RelocSec = dyn_cast<RelocationSectionBase>(&Sec)) {
+          auto *TargetSec = RelocSec->getSection();
+          if (TargetSec && (TargetSec->Flags & SHF_ALLOC)) {
+            StringRef prefix;
+            switch (Sec.Type) {
+            case SHT_REL:
+              prefix = ".rel";
+              break;
+            case SHT_RELA:
+              prefix = ".rela";
+              break;
+            default:
+              continue;
+            }
+
+            // If the relocation section comes *after* the target section, we
+            // don't add Config.AllocSectionsPrefix because we've already added
+            // the prefix to TargetSec->Name. Otherwise, if the relocation
+            // section comes *before* the target section, we add the prefix.
+            if (PrefixedSections.count(TargetSec)) {
+              Sec.Name = (prefix + TargetSec->Name).str();
+            } else {
+              const auto Iter = Config.SectionsToRename.find(TargetSec->Name);
+              if (Iter != Config.SectionsToRename.end()) {
+                // Both `--rename-section` and `--prefix-alloc-sections` are
+                // given but the target section is not yet renamed.
+                Sec.Name =
+                    (prefix + Config.AllocSectionsPrefix + Iter->second.NewName)
+                        .str();
+              } else {
+                Sec.Name =
+                    (prefix + Config.AllocSectionsPrefix + TargetSec->Name)
+                        .str();
+              }
+            }
+          }
         }
       }
     }
   }
 
-  if (!Config.AddSection.empty()) {
-    for (const auto &Flag : Config.AddSection) {
-      std::pair<StringRef, StringRef> SecPair = Flag.split("=");
-      StringRef SecName = SecPair.first;
-      StringRef File = SecPair.second;
-      ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
-          MemoryBuffer::getFile(File);
-      if (!BufOrErr)
-        reportError(File, BufOrErr.getError());
-      std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr);
-      ArrayRef<uint8_t> Data(
-          reinterpret_cast<const uint8_t *>(Buf->getBufferStart()),
-          Buf->getBufferSize());
-      OwnedDataSection &NewSection =
-          Obj.addSection<OwnedDataSection>(SecName, Data);
-      if (SecName.startswith(".note") && SecName != ".note.GNU-stack")
-        NewSection.Type = SHT_NOTE;
+  if (!Config.SetSectionFlags.empty()) {
+    for (auto &Sec : Obj.sections()) {
+      const auto Iter = Config.SetSectionFlags.find(Sec.Name);
+      if (Iter != Config.SetSectionFlags.end()) {
+        const SectionFlagsUpdate &SFU = Iter->second;
+        setSectionFlagsAndType(Sec, SFU.NewFlags);
+      }
     }
   }
 
-  if (!Config.DumpSection.empty()) {
-    for (const auto &Flag : Config.DumpSection) {
-      std::pair<StringRef, StringRef> SecPair = Flag.split("=");
-      StringRef SecName = SecPair.first;
-      StringRef File = SecPair.second;
-      if (Error E = dumpSectionToFile(SecName, File, Obj))
-        reportError(Config.InputFilename, std::move(E));
-    }
+  for (const auto &Flag : Config.AddSection) {
+    std::pair<StringRef, StringRef> SecPair = Flag.split("=");
+    StringRef SecName = SecPair.first;
+    StringRef File = SecPair.second;
+    ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
+        MemoryBuffer::getFile(File);
+    if (!BufOrErr)
+      return createFileError(File, errorCodeToError(BufOrErr.getError()));
+    std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr);
+    ArrayRef<uint8_t> Data(
+        reinterpret_cast<const uint8_t *>(Buf->getBufferStart()),
+        Buf->getBufferSize());
+    OwnedDataSection &NewSection =
+        Obj.addSection<OwnedDataSection>(SecName, Data);
+    if (SecName.startswith(".note") && SecName != ".note.GNU-stack")
+      NewSection.Type = SHT_NOTE;
+  }
+
+  for (const auto &Flag : Config.DumpSection) {
+    std::pair<StringRef, StringRef> SecPair = Flag.split("=");
+    StringRef SecName = SecPair.first;
+    StringRef File = SecPair.second;
+    if (Error E = dumpSectionToFile(SecName, File, Obj))
+      return E;
   }
 
   if (!Config.AddGnuDebugLink.empty())
-    Obj.addSection<GnuDebugLinkSection>(Config.AddGnuDebugLink);
+    Obj.addSection<GnuDebugLinkSection>(Config.AddGnuDebugLink,
+                                        Config.GnuDebugLinkCRC32);
+
+  for (const NewSymbolInfo &SI : Config.SymbolsToAdd) {
+    SectionBase *Sec = Obj.findSection(SI.SectionName);
+    uint64_t Value = Sec ? Sec->Addr + SI.Value : SI.Value;
+    Obj.SymbolTable->addSymbol(
+        SI.SymbolName, SI.Bind, SI.Type, Sec, Value, SI.Visibility,
+        Sec ? (uint16_t)SYMBOL_SIMPLE_INDEX : (uint16_t)SHN_ABS, 0);
+  }
+
+  if (Config.EntryExpr)
+    Obj.Entry = Config.EntryExpr(Obj.Entry);
+  return Error::success();
 }
 
-void executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In,
-                               Buffer &Out) {
+static Error writeOutput(const CopyConfig &Config, Object &Obj, Buffer &Out,
+                         ElfType OutputElfType) {
+  std::unique_ptr<Writer> Writer =
+      createWriter(Config, Obj, Out, OutputElfType);
+  if (Error E = Writer->finalize())
+    return E;
+  return Writer->write();
+}
+
+Error executeObjcopyOnIHex(const CopyConfig &Config, MemoryBuffer &In,
+                           Buffer &Out) {
+  IHexReader Reader(&In);
+  std::unique_ptr<Object> Obj = Reader.create();
+  const ElfType OutputElfType =
+      getOutputElfType(Config.OutputArch.getValueOr(Config.BinaryArch));
+  if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType))
+    return E;
+  return writeOutput(Config, *Obj, Out, OutputElfType);
+}
+
+Error executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In,
+                                Buffer &Out) {
   BinaryReader Reader(Config.BinaryArch, &In);
   std::unique_ptr<Object> Obj = Reader.create();
 
   // Prefer OutputArch (-O<format>) if set, otherwise fallback to BinaryArch
   // (-B<arch>).
-  const ElfType OutputElfType = getOutputElfType(
-      Config.OutputArch ? Config.OutputArch.getValue() : Config.BinaryArch);
-  handleArgs(Config, *Obj, Reader, OutputElfType);
-  std::unique_ptr<Writer> Writer =
-      createWriter(Config, *Obj, Out, OutputElfType);
-  Writer->finalize();
-  Writer->write();
+  const ElfType OutputElfType =
+      getOutputElfType(Config.OutputArch.getValueOr(Config.BinaryArch));
+  if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType))
+    return E;
+  return writeOutput(Config, *Obj, Out, OutputElfType);
 }
 
-void executeObjcopyOnBinary(const CopyConfig &Config,
-                            object::ELFObjectFileBase &In, Buffer &Out) {
-  ELFReader Reader(&In);
+Error executeObjcopyOnBinary(const CopyConfig &Config,
+                             object::ELFObjectFileBase &In, Buffer &Out) {
+  ELFReader Reader(&In, Config.ExtractPartition);
   std::unique_ptr<Object> Obj = Reader.create();
   // Prefer OutputArch (-O<format>) if set, otherwise infer it from the input.
   const ElfType OutputElfType =
@@ -558,25 +779,36 @@ void executeObjcopyOnBinary(const CopyConfig &Config,
   ArrayRef<uint8_t> BuildIdBytes;
 
   if (!Config.BuildIdLinkDir.empty()) {
-    BuildIdBytes = unwrapOrError(findBuildID(In));
+    auto BuildIdBytesOrErr = findBuildID(Config, In);
+    if (auto E = BuildIdBytesOrErr.takeError())
+      return E;
+    BuildIdBytes = *BuildIdBytesOrErr;
+
     if (BuildIdBytes.size() < 2)
-      error("build ID in file '" + Config.InputFilename +
-            "' is smaller than two bytes");
+      return createFileError(
+          Config.InputFilename,
+          createStringError(object_error::parse_failed,
+                            "build ID is smaller than two bytes"));
   }
 
-  if (!Config.BuildIdLinkDir.empty() && Config.BuildIdLinkInput) {
-    linkToBuildIdDir(Config, Config.InputFilename,
-                     Config.BuildIdLinkInput.getValue(), BuildIdBytes);
-  }
-  handleArgs(Config, *Obj, Reader, OutputElfType);
-  std::unique_ptr<Writer> Writer =
-      createWriter(Config, *Obj, Out, OutputElfType);
-  Writer->finalize();
-  Writer->write();
-  if (!Config.BuildIdLinkDir.empty() && Config.BuildIdLinkOutput) {
-    linkToBuildIdDir(Config, Config.OutputFilename,
-                     Config.BuildIdLinkOutput.getValue(), BuildIdBytes);
-  }
+  if (!Config.BuildIdLinkDir.empty() && Config.BuildIdLinkInput)
+    if (Error E =
+            linkToBuildIdDir(Config, Config.InputFilename,
+                             Config.BuildIdLinkInput.getValue(), BuildIdBytes))
+      return E;
+
+  if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType))
+    return createFileError(Config.InputFilename, std::move(E));
+
+  if (Error E = writeOutput(Config, *Obj, Out, OutputElfType))
+    return createFileError(Config.InputFilename, std::move(E));
+  if (!Config.BuildIdLinkDir.empty() && Config.BuildIdLinkOutput)
+    if (Error E =
+            linkToBuildIdDir(Config, Config.OutputFilename,
+                             Config.BuildIdLinkOutput.getValue(), BuildIdBytes))
+      return createFileError(Config.OutputFilename, std::move(E));
+
+  return Error::success();
 }
 
 } // end namespace elf
diff --git a/tools/llvm-objcopy/ELF/ELFObjcopy.h b/tools/llvm-objcopy/ELF/ELFObjcopy.h
index 43f41c00ce5b..e13e237e29c4 100644
--- a/tools/llvm-objcopy/ELF/ELFObjcopy.h
+++ b/tools/llvm-objcopy/ELF/ELFObjcopy.h
@@ -1,9 +1,8 @@
 //===- ELFObjcopy.h ---------------------------------------------*- C++ -*-===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -11,6 +10,7 @@
 #define LLVM_TOOLS_OBJCOPY_ELFOBJCOPY_H
 
 namespace llvm {
+class Error;
 class MemoryBuffer;
 
 namespace object {
@@ -22,10 +22,12 @@ struct CopyConfig;
 class Buffer;
 
 namespace elf {
-void executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In,
-                               Buffer &Out);
-void executeObjcopyOnBinary(const CopyConfig &Config,
-                            object::ELFObjectFileBase &In, Buffer &Out);
+Error executeObjcopyOnIHex(const CopyConfig &Config, MemoryBuffer &In,
+                           Buffer &Out);
+Error executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In,
+                                Buffer &Out);
+Error executeObjcopyOnBinary(const CopyConfig &Config,
+                             object::ELFObjectFileBase &In, Buffer &Out);
 
 } // end namespace elf
 } // end namespace objcopy
diff --git a/tools/llvm-objcopy/ELF/Object.cpp b/tools/llvm-objcopy/ELF/Object.cpp
index 3d3e029c09eb..fa696380e17c 100644
--- a/tools/llvm-objcopy/ELF/Object.cpp
+++ b/tools/llvm-objcopy/ELF/Object.cpp
@@ -1,9 +1,8 @@
 //===- Object.cpp ---------------------------------------------------------===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -18,6 +17,7 @@
 #include "llvm/MC/MCTargetOptions.h"
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Support/Compression.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/Path.h"
@@ -25,6 +25,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <iterator>
+#include <unordered_set>
 #include <utility>
 #include <vector>
 
@@ -36,8 +37,8 @@ using namespace object;
 using namespace ELF;
 
 template <class ELFT> void ELFWriter<ELFT>::writePhdr(const Segment &Seg) {
-  uint8_t *B = Buf.getBufferStart();
-  B += Obj.ProgramHdrSegment.Offset + Seg.Index * sizeof(Elf_Phdr);
+  uint8_t *B = Buf.getBufferStart() + Obj.ProgramHdrSegment.Offset +
+               Seg.Index * sizeof(Elf_Phdr);
   Elf_Phdr &Phdr = *reinterpret_cast<Elf_Phdr *>(B);
   Phdr.p_type = Seg.Type;
   Phdr.p_flags = Seg.Flags;
@@ -49,15 +50,24 @@ template <class ELFT> void ELFWriter<ELFT>::writePhdr(const Segment &Seg) {
   Phdr.p_align = Seg.Align;
 }
 
-void SectionBase::removeSectionReferences(const SectionBase *Sec) {}
-void SectionBase::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) {}
+Error SectionBase::removeSectionReferences(
+    bool AllowBrokenLinks,
+    function_ref<bool(const SectionBase *)> ToRemove) {
+  return Error::success();
+}
+
+Error SectionBase::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) {
+  return Error::success();
+}
+
 void SectionBase::initialize(SectionTableRef SecTable) {}
 void SectionBase::finalize() {}
 void SectionBase::markSymbols() {}
+void SectionBase::replaceSectionReferences(
+    const DenseMap<SectionBase *, SectionBase *> &) {}
 
 template <class ELFT> void ELFWriter<ELFT>::writeShdr(const SectionBase &Sec) {
-  uint8_t *B = Buf.getBufferStart();
-  B += Sec.HeaderOffset;
+  uint8_t *B = Buf.getBufferStart() + Sec.HeaderOffset;
   Elf_Shdr &Shdr = *reinterpret_cast<Elf_Shdr *>(B);
   Shdr.sh_name = Sec.NameIndex;
   Shdr.sh_type = Sec.Type;
@@ -113,30 +123,270 @@ template <class ELFT>
 void ELFSectionSizer<ELFT>::visit(DecompressedSection &Sec) {}
 
 void BinarySectionWriter::visit(const SectionIndexSection &Sec) {
-  error("Cannot write symbol section index table '" + Sec.Name + "' ");
+  error("cannot write symbol section index table '" + Sec.Name + "' ");
 }
 
 void BinarySectionWriter::visit(const SymbolTableSection &Sec) {
-  error("Cannot write symbol table '" + Sec.Name + "' out to binary");
+  error("cannot write symbol table '" + Sec.Name + "' out to binary");
 }
 
 void BinarySectionWriter::visit(const RelocationSection &Sec) {
-  error("Cannot write relocation section '" + Sec.Name + "' out to binary");
+  error("cannot write relocation section '" + Sec.Name + "' out to binary");
 }
 
 void BinarySectionWriter::visit(const GnuDebugLinkSection &Sec) {
-  error("Cannot write '" + Sec.Name + "' out to binary");
+  error("cannot write '" + Sec.Name + "' out to binary");
 }
 
 void BinarySectionWriter::visit(const GroupSection &Sec) {
-  error("Cannot write '" + Sec.Name + "' out to binary");
+  error("cannot write '" + Sec.Name + "' out to binary");
 }
 
 void SectionWriter::visit(const Section &Sec) {
-  if (Sec.Type == SHT_NOBITS)
-    return;
-  uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
-  llvm::copy(Sec.Contents, Buf);
+  if (Sec.Type != SHT_NOBITS)
+    llvm::copy(Sec.Contents, Out.getBufferStart() + Sec.Offset);
+}
+
+static bool addressOverflows32bit(uint64_t Addr) {
+  // Sign extended 32 bit addresses (e.g 0xFFFFFFFF80000000) are ok
+  return Addr > UINT32_MAX && Addr + 0x80000000 > UINT32_MAX;
+}
+
+template <class T> static T checkedGetHex(StringRef S) {
+  T Value;
+  bool Fail = S.getAsInteger(16, Value);
+  assert(!Fail);
+  (void)Fail;
+  return Value;
+}
+
+// Fills exactly Len bytes of buffer with hexadecimal characters
+// representing value 'X'
+template <class T, class Iterator>
+static Iterator utohexstr(T X, Iterator It, size_t Len) {
+  // Fill range with '0'
+  std::fill(It, It + Len, '0');
+
+  for (long I = Len - 1; I >= 0; --I) {
+    unsigned char Mod = static_cast<unsigned char>(X) & 15;
+    *(It + I) = hexdigit(Mod, false);
+    X >>= 4;
+  }
+  assert(X == 0);
+  return It + Len;
+}
+
+uint8_t IHexRecord::getChecksum(StringRef S) {
+  assert((S.size() & 1) == 0);
+  uint8_t Checksum = 0;
+  while (!S.empty()) {
+    Checksum += checkedGetHex<uint8_t>(S.take_front(2));
+    S = S.drop_front(2);
+  }
+  return -Checksum;
+}
+
+IHexLineData IHexRecord::getLine(uint8_t Type, uint16_t Addr,
+                                 ArrayRef<uint8_t> Data) {
+  IHexLineData Line(getLineLength(Data.size()));
+  assert(Line.size());
+  auto Iter = Line.begin();
+  *Iter++ = ':';
+  Iter = utohexstr(Data.size(), Iter, 2);
+  Iter = utohexstr(Addr, Iter, 4);
+  Iter = utohexstr(Type, Iter, 2);
+  for (uint8_t X : Data)
+    Iter = utohexstr(X, Iter, 2);
+  StringRef S(Line.data() + 1, std::distance(Line.begin() + 1, Iter));
+  Iter = utohexstr(getChecksum(S), Iter, 2);
+  *Iter++ = '\r';
+  *Iter++ = '\n';
+  assert(Iter == Line.end());
+  return Line;
+}
+
+static Error checkRecord(const IHexRecord &R) {
+  switch (R.Type) {
+  case IHexRecord::Data:
+    if (R.HexData.size() == 0)
+      return createStringError(
+          errc::invalid_argument,
+          "zero data length is not allowed for data records");
+    break;
+  case IHexRecord::EndOfFile:
+    break;
+  case IHexRecord::SegmentAddr:
+    // 20-bit segment address. Data length must be 2 bytes
+    // (4 bytes in hex)
+    if (R.HexData.size() != 4)
+      return createStringError(
+          errc::invalid_argument,
+          "segment address data should be 2 bytes in size");
+    break;
+  case IHexRecord::StartAddr80x86:
+  case IHexRecord::StartAddr:
+    if (R.HexData.size() != 8)
+      return createStringError(errc::invalid_argument,
+                               "start address data should be 4 bytes in size");
+    // According to Intel HEX specification '03' record
+    // only specifies the code address within the 20-bit
+    // segmented address space of the 8086/80186. This
+    // means 12 high order bits should be zeroes.
+    if (R.Type == IHexRecord::StartAddr80x86 &&
+        R.HexData.take_front(3) != "000")
+      return createStringError(errc::invalid_argument,
+                               "start address exceeds 20 bit for 80x86");
+    break;
+  case IHexRecord::ExtendedAddr:
+    // 16-31 bits of linear base address
+    if (R.HexData.size() != 4)
+      return createStringError(
+          errc::invalid_argument,
+          "extended address data should be 2 bytes in size");
+    break;
+  default:
+    // Unknown record type
+    return createStringError(errc::invalid_argument, "unknown record type: %u",
+                             static_cast<unsigned>(R.Type));
+  }
+  return Error::success();
+}
+
+// Checks that IHEX line contains valid characters.
+// This allows converting hexadecimal data to integers
+// without extra verification.
+static Error checkChars(StringRef Line) {
+  assert(!Line.empty());
+  if (Line[0] != ':')
+    return createStringError(errc::invalid_argument,
+                             "missing ':' in the beginning of line.");
+
+  for (size_t Pos = 1; Pos < Line.size(); ++Pos)
+    if (hexDigitValue(Line[Pos]) == -1U)
+      return createStringError(errc::invalid_argument,
+                               "invalid character at position %zu.", Pos + 1);
+  return Error::success();
+}
+
+Expected<IHexRecord> IHexRecord::parse(StringRef Line) {
+  assert(!Line.empty());
+
+  // ':' + Length + Address + Type + Checksum with empty data ':LLAAAATTCC'
+  if (Line.size() < 11)
+    return createStringError(errc::invalid_argument,
+                             "line is too short: %zu chars.", Line.size());
+
+  if (Error E = checkChars(Line))
+    return std::move(E);
+
+  IHexRecord Rec;
+  size_t DataLen = checkedGetHex<uint8_t>(Line.substr(1, 2));
+  if (Line.size() != getLength(DataLen))
+    return createStringError(errc::invalid_argument,
+                             "invalid line length %zu (should be %zu)",
+                             Line.size(), getLength(DataLen));
+
+  Rec.Addr = checkedGetHex<uint16_t>(Line.substr(3, 4));
+  Rec.Type = checkedGetHex<uint8_t>(Line.substr(7, 2));
+  Rec.HexData = Line.substr(9, DataLen * 2);
+
+  if (getChecksum(Line.drop_front(1)) != 0)
+    return createStringError(errc::invalid_argument, "incorrect checksum.");
+  if (Error E = checkRecord(Rec))
+    return std::move(E);
+  return Rec;
+}
+
+static uint64_t sectionPhysicalAddr(const SectionBase *Sec) {
+  Segment *Seg = Sec->ParentSegment;
+  if (Seg && Seg->Type != ELF::PT_LOAD)
+    Seg = nullptr;
+  return Seg ? Seg->PAddr + Sec->OriginalOffset - Seg->OriginalOffset
+             : Sec->Addr;
+}
+
+void IHexSectionWriterBase::writeSection(const SectionBase *Sec,
+                                         ArrayRef<uint8_t> Data) {
+  assert(Data.size() == Sec->Size);
+  const uint32_t ChunkSize = 16;
+  uint32_t Addr = sectionPhysicalAddr(Sec) & 0xFFFFFFFFU;
+  while (!Data.empty()) {
+    uint64_t DataSize = std::min<uint64_t>(Data.size(), ChunkSize);
+    if (Addr > SegmentAddr + BaseAddr + 0xFFFFU) {
+      if (Addr > 0xFFFFFU) {
+        // Write extended address record, zeroing segment address
+        // if needed.
+        if (SegmentAddr != 0)
+          SegmentAddr = writeSegmentAddr(0U);
+        BaseAddr = writeBaseAddr(Addr);
+      } else {
+        // We can still remain 16-bit
+        SegmentAddr = writeSegmentAddr(Addr);
+      }
+    }
+    uint64_t SegOffset = Addr - BaseAddr - SegmentAddr;
+    assert(SegOffset <= 0xFFFFU);
+    DataSize = std::min(DataSize, 0x10000U - SegOffset);
+    writeData(0, SegOffset, Data.take_front(DataSize));
+    Addr += DataSize;
+    Data = Data.drop_front(DataSize);
+  }
+}
+
+uint64_t IHexSectionWriterBase::writeSegmentAddr(uint64_t Addr) {
+  assert(Addr <= 0xFFFFFU);
+  uint8_t Data[] = {static_cast<uint8_t>((Addr & 0xF0000U) >> 12), 0};
+  writeData(2, 0, Data);
+  return Addr & 0xF0000U;
+}
+
+uint64_t IHexSectionWriterBase::writeBaseAddr(uint64_t Addr) {
+  assert(Addr <= 0xFFFFFFFFU);
+  uint64_t Base = Addr & 0xFFFF0000U;
+  uint8_t Data[] = {static_cast<uint8_t>(Base >> 24),
+                    static_cast<uint8_t>((Base >> 16) & 0xFF)};
+  writeData(4, 0, Data);
+  return Base;
+}
+
+void IHexSectionWriterBase::writeData(uint8_t Type, uint16_t Addr,
+                                      ArrayRef<uint8_t> Data) {
+  Offset += IHexRecord::getLineLength(Data.size());
+}
+
+void IHexSectionWriterBase::visit(const Section &Sec) {
+  writeSection(&Sec, Sec.Contents);
+}
+
+void IHexSectionWriterBase::visit(const OwnedDataSection &Sec) {
+  writeSection(&Sec, Sec.Data);
+}
+
+void IHexSectionWriterBase::visit(const StringTableSection &Sec) {
+  // Check that sizer has already done its work
+  assert(Sec.Size == Sec.StrTabBuilder.getSize());
+  // We are free to pass an invalid pointer to writeSection as long
+  // as we don't actually write any data. The real writer class has
+  // to override this method .
+  writeSection(&Sec, {nullptr, static_cast<size_t>(Sec.Size)});
+}
+
+void IHexSectionWriterBase::visit(const DynamicRelocationSection &Sec) {
+  writeSection(&Sec, Sec.Contents);
+}
+
+void IHexSectionWriter::writeData(uint8_t Type, uint16_t Addr,
+                                  ArrayRef<uint8_t> Data) {
+  IHexLineData HexData = IHexRecord::getLine(Type, Addr, Data);
+  memcpy(Out.getBufferStart() + Offset, HexData.data(), HexData.size());
+  Offset += HexData.size();
+}
+
+void IHexSectionWriter::visit(const StringTableSection &Sec) {
+  assert(Sec.Size == Sec.StrTabBuilder.getSize());
+  std::vector<uint8_t> Data(Sec.Size);
+  Sec.StrTabBuilder.write(Data.data());
+  writeSection(&Sec, Data);
 }
 
 void Section::accept(SectionVisitor &Visitor) const { Visitor.visit(*this); }
@@ -144,8 +394,7 @@ void Section::accept(SectionVisitor &Visitor) const { Visitor.visit(*this); }
 void Section::accept(MutableSectionVisitor &Visitor) { Visitor.visit(*this); }
 
 void SectionWriter::visit(const OwnedDataSection &Sec) {
-  uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
-  llvm::copy(Sec.Data, Buf);
+  llvm::copy(Sec.Data, Out.getBufferStart() + Sec.Offset);
 }
 
 static const std::vector<uint8_t> ZlibGnuMagic = {'Z', 'L', 'I', 'B'};
@@ -161,8 +410,7 @@ getDecompressedSizeAndAlignment(ArrayRef<uint8_t> Data) {
   const bool IsGnuDebug = isDataGnuCompressed(Data);
   const uint64_t DecompressedSize =
       IsGnuDebug
-          ? support::endian::read64be(reinterpret_cast<const uint64_t *>(
-                Data.data() + ZlibGnuMagic.size()))
+          ? support::endian::read64be(Data.data() + ZlibGnuMagic.size())
           : reinterpret_cast<const Elf_Chdr_Impl<ELFT> *>(Data.data())->ch_size;
   const uint64_t DecompressedAlign =
       IsGnuDebug ? 1
@@ -174,13 +422,6 @@ getDecompressedSizeAndAlignment(ArrayRef<uint8_t> Data) {
 
 template <class ELFT>
 void ELFSectionWriter<ELFT>::visit(const DecompressedSection &Sec) {
-  uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
-
-  if (!zlib::isAvailable()) {
-    std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf);
-    return;
-  }
-
   const size_t DataOffset = isDataGnuCompressed(Sec.OriginalData)
                                 ? (ZlibGnuMagic.size() + sizeof(Sec.Size))
                                 : sizeof(Elf_Chdr_Impl<ELFT>);
@@ -194,11 +435,12 @@ void ELFSectionWriter<ELFT>::visit(const DecompressedSection &Sec) {
                                  static_cast<size_t>(Sec.Size)))
     reportError(Sec.Name, std::move(E));
 
+  uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
   std::copy(DecompressedContent.begin(), DecompressedContent.end(), Buf);
 }
 
 void BinarySectionWriter::visit(const DecompressedSection &Sec) {
-  error("Cannot write compressed section '" + Sec.Name + "' ");
+  error("cannot write compressed section '" + Sec.Name + "' ");
 }
 
 void DecompressedSection::accept(SectionVisitor &Visitor) const {
@@ -217,15 +459,22 @@ void OwnedDataSection::accept(MutableSectionVisitor &Visitor) {
   Visitor.visit(*this);
 }
 
+void OwnedDataSection::appendHexData(StringRef HexData) {
+  assert((HexData.size() & 1) == 0);
+  while (!HexData.empty()) {
+    Data.push_back(checkedGetHex<uint8_t>(HexData.take_front(2)));
+    HexData = HexData.drop_front(2);
+  }
+  Size = Data.size();
+}
+
 void BinarySectionWriter::visit(const CompressedSection &Sec) {
-  error("Cannot write compressed section '" + Sec.Name + "' ");
+  error("cannot write compressed section '" + Sec.Name + "' ");
 }
 
 template <class ELFT>
 void ELFSectionWriter<ELFT>::visit(const CompressedSection &Sec) {
-  uint8_t *Buf = Out.getBufferStart();
-  Buf += Sec.Offset;
-
+  uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
   if (Sec.CompressionType == DebugCompressionType::None) {
     std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf);
     return;
@@ -255,12 +504,6 @@ CompressedSection::CompressedSection(const SectionBase &Sec,
                                      DebugCompressionType CompressionType)
     : SectionBase(Sec), CompressionType(CompressionType),
       DecompressedSize(Sec.OriginalData.size()), DecompressedAlign(Sec.Align) {
-
-  if (!zlib::isAvailable()) {
-    CompressionType = DebugCompressionType::None;
-    return;
-  }
-
   if (Error E = zlib::compress(
           StringRef(reinterpret_cast<const char *>(OriginalData.data()),
                     OriginalData.size()),
@@ -299,16 +542,16 @@ void CompressedSection::accept(MutableSectionVisitor &Visitor) {
   Visitor.visit(*this);
 }
 
-void StringTableSection::addString(StringRef Name) {
-  StrTabBuilder.add(Name);
-  Size = StrTabBuilder.getSize();
-}
+void StringTableSection::addString(StringRef Name) { StrTabBuilder.add(Name); }
 
 uint32_t StringTableSection::findIndex(StringRef Name) const {
   return StrTabBuilder.getOffset(Name);
 }
 
-void StringTableSection::finalize() { StrTabBuilder.finalize(); }
+void StringTableSection::prepareForLayout() {
+  StrTabBuilder.finalize();
+  Size = StrTabBuilder.getSize();
+}
 
 void SectionWriter::visit(const StringTableSection &Sec) {
   Sec.StrTabBuilder.write(Out.getBufferStart() + Sec.Offset);
@@ -325,8 +568,7 @@ void StringTableSection::accept(MutableSectionVisitor &Visitor) {
 template <class ELFT>
 void ELFSectionWriter<ELFT>::visit(const SectionIndexSection &Sec) {
   uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
-  auto *IndexesBuffer = reinterpret_cast<Elf_Word *>(Buf);
-  llvm::copy(Sec.Indexes, IndexesBuffer);
+  llvm::copy(Sec.Indexes, reinterpret_cast<Elf_Word *>(Buf));
 }
 
 void SectionIndexSection::initialize(SectionTableRef SecTable) {
@@ -355,6 +597,11 @@ static bool isValidReservedSectionIndex(uint16_t Index, uint16_t Machine) {
   case SHN_COMMON:
     return true;
   }
+
+  if (Machine == EM_AMDGPU) {
+    return Index == SHN_AMDGPU_LDS;
+  }
+
   if (Machine == EM_HEXAGON) {
     switch (Index) {
     case SHN_HEXAGON_SCOMMON:
@@ -376,21 +623,17 @@ uint16_t Symbol::getShndx() const {
       return SHN_XINDEX;
     return DefinedIn->Index;
   }
-  switch (ShndxType) {
-  // This means that we don't have a defined section but we do need to
-  // output a legitimate section index.
-  case SYMBOL_SIMPLE_INDEX:
+
+  if (ShndxType == SYMBOL_SIMPLE_INDEX) {
+    // This means that we don't have a defined section but we do need to
+    // output a legitimate section index.
     return SHN_UNDEF;
-  case SYMBOL_ABS:
-  case SYMBOL_COMMON:
-  case SYMBOL_HEXAGON_SCOMMON:
-  case SYMBOL_HEXAGON_SCOMMON_2:
-  case SYMBOL_HEXAGON_SCOMMON_4:
-  case SYMBOL_HEXAGON_SCOMMON_8:
-  case SYMBOL_XINDEX:
-    return static_cast<uint16_t>(ShndxType);
   }
-  llvm_unreachable("Symbol with invalid ShndxType encountered");
+
+  assert(ShndxType == SYMBOL_ABS || ShndxType == SYMBOL_COMMON ||
+         (ShndxType >= SYMBOL_LOPROC && ShndxType <= SYMBOL_HIPROC) ||
+         (ShndxType >= SYMBOL_LOOS && ShndxType <= SYMBOL_HIOS));
+  return static_cast<uint16_t>(ShndxType);
 }
 
 bool Symbol::isCommon() const { return getShndx() == SHN_COMMON; }
@@ -404,7 +647,7 @@ void SymbolTableSection::assignIndices() {
 void SymbolTableSection::addSymbol(Twine Name, uint8_t Bind, uint8_t Type,
                                    SectionBase *DefinedIn, uint64_t Value,
                                    uint8_t Visibility, uint16_t Shndx,
-                                   uint64_t Size) {
+                                   uint64_t SymbolSize) {
   Symbol Sym;
   Sym.Name = Name.str();
   Sym.Binding = Bind;
@@ -420,21 +663,28 @@ void SymbolTableSection::addSymbol(Twine Name, uint8_t Bind, uint8_t Type,
   }
   Sym.Value = Value;
   Sym.Visibility = Visibility;
-  Sym.Size = Size;
+  Sym.Size = SymbolSize;
   Sym.Index = Symbols.size();
   Symbols.emplace_back(llvm::make_unique<Symbol>(Sym));
   Size += this->EntrySize;
 }
 
-void SymbolTableSection::removeSectionReferences(const SectionBase *Sec) {
-  if (SectionIndexTable == Sec)
+Error SymbolTableSection::removeSectionReferences(
+    bool AllowBrokenLinks,
+    function_ref<bool(const SectionBase *)> ToRemove) {
+  if (ToRemove(SectionIndexTable))
     SectionIndexTable = nullptr;
-  if (SymbolNames == Sec) {
-    error("String table " + SymbolNames->Name +
-          " cannot be removed because it is referenced by the symbol table " +
-          this->Name);
+  if (ToRemove(SymbolNames)) {
+    if (!AllowBrokenLinks)
+      return createStringError(
+          llvm::errc::invalid_argument,
+          "string table '%s' cannot be removed because it is "
+          "referenced by the symbol table '%s'",
+          SymbolNames->Name.data(), this->Name.data());
+    SymbolNames = nullptr;
   }
-  removeSymbols([Sec](const Symbol &Sym) { return Sym.DefinedIn == Sec; });
+  return removeSymbols(
+      [ToRemove](const Symbol &Sym) { return ToRemove(Sym.DefinedIn); });
 }
 
 void SymbolTableSection::updateSymbols(function_ref<void(Symbol &)> Callable) {
@@ -446,7 +696,7 @@ void SymbolTableSection::updateSymbols(function_ref<void(Symbol &)> Callable) {
   assignIndices();
 }
 
-void SymbolTableSection::removeSymbols(
+Error SymbolTableSection::removeSymbols(
     function_ref<bool(const Symbol &)> ToRemove) {
   Symbols.erase(
       std::remove_if(std::begin(Symbols) + 1, std::end(Symbols),
@@ -454,6 +704,14 @@ void SymbolTableSection::removeSymbols(
       std::end(Symbols));
   Size = Symbols.size() * EntrySize;
   assignIndices();
+  return Error::success();
+}
+
+void SymbolTableSection::replaceSectionReferences(
+    const DenseMap<SectionBase *, SectionBase *> &FromTo) {
+  for (std::unique_ptr<Symbol> &Sym : Symbols)
+    if (SectionBase *To = FromTo.lookup(Sym->DefinedIn))
+      Sym->DefinedIn = To;
 }
 
 void SymbolTableSection::initialize(SectionTableRef SecTable) {
@@ -467,40 +725,50 @@ void SymbolTableSection::initialize(SectionTableRef SecTable) {
 }
 
 void SymbolTableSection::finalize() {
-  // Make sure SymbolNames is finalized before getting name indexes.
-  SymbolNames->finalize();
-
   uint32_t MaxLocalIndex = 0;
-  for (auto &Sym : Symbols) {
-    Sym->NameIndex = SymbolNames->findIndex(Sym->Name);
+  for (std::unique_ptr<Symbol> &Sym : Symbols) {
+    Sym->NameIndex =
+        SymbolNames == nullptr ? 0 : SymbolNames->findIndex(Sym->Name);
     if (Sym->Binding == STB_LOCAL)
       MaxLocalIndex = std::max(MaxLocalIndex, Sym->Index);
   }
   // Now we need to set the Link and Info fields.
-  Link = SymbolNames->Index;
+  Link = SymbolNames == nullptr ? 0 : SymbolNames->Index;
   Info = MaxLocalIndex + 1;
 }
 
 void SymbolTableSection::prepareForLayout() {
-  // Add all potential section indexes before file layout so that the section
-  // index section has the approprite size.
-  if (SectionIndexTable != nullptr) {
-    for (const auto &Sym : Symbols) {
-      if (Sym->DefinedIn != nullptr && Sym->DefinedIn->Index >= SHN_LORESERVE)
-        SectionIndexTable->addIndex(Sym->DefinedIn->Index);
-      else
-        SectionIndexTable->addIndex(SHN_UNDEF);
-    }
-  }
+  // Reserve proper amount of space in section index table, so we can
+  // layout sections correctly. We will fill the table with correct
+  // indexes later in fillShdnxTable.
+  if (SectionIndexTable)  
+    SectionIndexTable->reserve(Symbols.size());
+
   // Add all of our strings to SymbolNames so that SymbolNames has the right
   // size before layout is decided.
-  for (auto &Sym : Symbols)
-    SymbolNames->addString(Sym->Name);
+  // If the symbol names section has been removed, don't try to add strings to
+  // the table.
+  if (SymbolNames != nullptr)
+    for (std::unique_ptr<Symbol> &Sym : Symbols)
+      SymbolNames->addString(Sym->Name);
+}
+
+void SymbolTableSection::fillShndxTable() {
+  if (SectionIndexTable == nullptr)
+    return;
+  // Fill section index table with real section indexes. This function must
+  // be called after assignOffsets.
+  for (const std::unique_ptr<Symbol> &Sym : Symbols) {
+    if (Sym->DefinedIn != nullptr && Sym->DefinedIn->Index >= SHN_LORESERVE)
+      SectionIndexTable->addIndex(Sym->DefinedIn->Index);
+    else
+      SectionIndexTable->addIndex(SHN_UNDEF);
+  }
 }
 
 const Symbol *SymbolTableSection::getSymbolByIndex(uint32_t Index) const {
   if (Symbols.size() <= Index)
-    error("Invalid symbol index: " + Twine(Index));
+    error("invalid symbol index: " + Twine(Index));
   return Symbols[Index].get();
 }
 
@@ -511,11 +779,9 @@ Symbol *SymbolTableSection::getSymbolByIndex(uint32_t Index) {
 
 template <class ELFT>
 void ELFSectionWriter<ELFT>::visit(const SymbolTableSection &Sec) {
-  uint8_t *Buf = Out.getBufferStart();
-  Buf += Sec.Offset;
-  Elf_Sym *Sym = reinterpret_cast<Elf_Sym *>(Buf);
+  Elf_Sym *Sym = reinterpret_cast<Elf_Sym *>(Out.getBufferStart() + Sec.Offset);
   // Loop though symbols setting each entry of the symbol table.
-  for (auto &Symbol : Sec.Symbols) {
+  for (const std::unique_ptr<Symbol> &Symbol : Sec.Symbols) {
     Sym->st_name = Symbol->NameIndex;
     Sym->st_value = Symbol->Value;
     Sym->st_size = Symbol->Size;
@@ -535,16 +801,31 @@ void SymbolTableSection::accept(MutableSectionVisitor &Visitor) {
   Visitor.visit(*this);
 }
 
-template <class SymTabType>
-void RelocSectionWithSymtabBase<SymTabType>::removeSectionReferences(
-    const SectionBase *Sec) {
-  if (Symbols == Sec) {
-    error("Symbol table " + Symbols->Name +
-          " cannot be removed because it is "
-          "referenced by the relocation "
-          "section " +
-          this->Name);
+Error RelocationSection::removeSectionReferences(
+    bool AllowBrokenLinks,
+    function_ref<bool(const SectionBase *)> ToRemove) {
+  if (ToRemove(Symbols)) {
+    if (!AllowBrokenLinks)
+      return createStringError(
+          llvm::errc::invalid_argument,
+          "symbol table '%s' cannot be removed because it is "
+          "referenced by the relocation section '%s'",
+          Symbols->Name.data(), this->Name.data());
+    Symbols = nullptr;
   }
+
+  for (const Relocation &R : Relocations) {
+    if (!R.RelocSymbol->DefinedIn || !ToRemove(R.RelocSymbol->DefinedIn))
+      continue;
+    return createStringError(llvm::errc::invalid_argument,
+                             "section '%s' cannot be removed: (%s+0x%" PRIx64
+                             ") has relocation against symbol '%s'",
+                             R.RelocSymbol->DefinedIn->Name.data(),
+                             SecToApplyRel->Name.data(), R.Offset,
+                             R.RelocSymbol->Name.c_str());
+  }
+
+  return Error::success();
 }
 
 template <class SymTabType>
@@ -609,12 +890,15 @@ void RelocationSection::accept(MutableSectionVisitor &Visitor) {
   Visitor.visit(*this);
 }
 
-void RelocationSection::removeSymbols(
+Error RelocationSection::removeSymbols(
     function_ref<bool(const Symbol &)> ToRemove) {
   for (const Relocation &Reloc : Relocations)
     if (ToRemove(*Reloc.RelocSymbol))
-      error("not stripping symbol '" + Reloc.RelocSymbol->Name +
-            "' because it is named in a relocation");
+      return createStringError(
+          llvm::errc::invalid_argument,
+          "not stripping symbol '%s' because it is named in a relocation",
+          Reloc.RelocSymbol->Name.data());
+  return Error::success();
 }
 
 void RelocationSection::markSymbols() {
@@ -622,9 +906,15 @@ void RelocationSection::markSymbols() {
     Reloc.RelocSymbol->Referenced = true;
 }
 
+void RelocationSection::replaceSectionReferences(
+    const DenseMap<SectionBase *, SectionBase *> &FromTo) {
+  // Update the target section if it was replaced.
+  if (SectionBase *To = FromTo.lookup(SecToApplyRel))
+    SecToApplyRel = To;
+}
+
 void SectionWriter::visit(const DynamicRelocationSection &Sec) {
-  llvm::copy(Sec.Contents,
-            Out.getBufferStart() + Sec.Offset);
+  llvm::copy(Sec.Contents, Out.getBufferStart() + Sec.Offset);
 }
 
 void DynamicRelocationSection::accept(SectionVisitor &Visitor) const {
@@ -635,13 +925,38 @@ void DynamicRelocationSection::accept(MutableSectionVisitor &Visitor) {
   Visitor.visit(*this);
 }
 
-void Section::removeSectionReferences(const SectionBase *Sec) {
-  if (LinkSection == Sec) {
-    error("Section " + LinkSection->Name +
-          " cannot be removed because it is "
-          "referenced by the section " +
-          this->Name);
+Error DynamicRelocationSection::removeSectionReferences(
+    bool AllowBrokenLinks, function_ref<bool(const SectionBase *)> ToRemove) {
+  if (ToRemove(Symbols)) {
+    if (!AllowBrokenLinks)
+      return createStringError(
+          llvm::errc::invalid_argument,
+          "symbol table '%s' cannot be removed because it is "
+          "referenced by the relocation section '%s'",
+          Symbols->Name.data(), this->Name.data());
+    Symbols = nullptr;
+  }
+
+  // SecToApplyRel contains a section referenced by sh_info field. It keeps
+  // a section to which the relocation section applies. When we remove any
+  // sections we also remove their relocation sections. Since we do that much
+  // earlier, this assert should never be triggered.
+  assert(!SecToApplyRel || !ToRemove(SecToApplyRel));
+  return Error::success();
+}
+
+Error Section::removeSectionReferences(
+    bool AllowBrokenDependency,
+    function_ref<bool(const SectionBase *)> ToRemove) {
+  if (ToRemove(LinkSection)) {
+    if (!AllowBrokenDependency)
+      return createStringError(llvm::errc::invalid_argument,
+                               "section '%s' cannot be removed because it is "
+                               "referenced by the section '%s'",
+                               LinkSection->Name.data(), this->Name.data());
+    LinkSection = nullptr;
   }
+  return Error::success();
 }
 
 void GroupSection::finalize() {
@@ -649,13 +964,13 @@ void GroupSection::finalize() {
   this->Link = SymTab->Index;
 }
 
-void GroupSection::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) {
-  if (ToRemove(*Sym)) {
-    error("Symbol " + Sym->Name +
-          " cannot be removed because it is "
-          "referenced by the section " +
-          this->Name + "[" + Twine(this->Index) + "]");
-  }
+Error GroupSection::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) {
+  if (ToRemove(*Sym))
+    return createStringError(llvm::errc::invalid_argument,
+                             "symbol '%s' cannot be removed because it is "
+                             "referenced by the section '%s[%d]'",
+                             Sym->Name.data(), this->Name.data(), this->Index);
+  return Error::success();
 }
 
 void GroupSection::markSymbols() {
@@ -663,19 +978,26 @@ void GroupSection::markSymbols() {
     Sym->Referenced = true;
 }
 
+void GroupSection::replaceSectionReferences(
+    const DenseMap<SectionBase *, SectionBase *> &FromTo) {
+  for (SectionBase *&Sec : GroupMembers)
+    if (SectionBase *To = FromTo.lookup(Sec))
+      Sec = To;
+}
+
 void Section::initialize(SectionTableRef SecTable) {
-  if (Link != ELF::SHN_UNDEF) {
-    LinkSection =
-        SecTable.getSection(Link, "Link field value " + Twine(Link) +
-                                      " in section " + Name + " is invalid");
-    if (LinkSection->Type == ELF::SHT_SYMTAB)
-      LinkSection = nullptr;
-  }
+  if (Link == ELF::SHN_UNDEF)
+    return;
+  LinkSection =
+      SecTable.getSection(Link, "Link field value " + Twine(Link) +
+                                    " in section " + Name + " is invalid");
+  if (LinkSection->Type == ELF::SHT_SYMTAB)
+    LinkSection = nullptr;
 }
 
 void Section::finalize() { this->Link = LinkSection ? LinkSection->Index : 0; }
 
-void GnuDebugLinkSection::init(StringRef File, StringRef Data) {
+void GnuDebugLinkSection::init(StringRef File) {
   FileName = sys::path::filename(File);
   // The format for the .gnu_debuglink starts with the file name and is
   // followed by a null terminator and then the CRC32 of the file. The CRC32
@@ -690,31 +1012,21 @@ void GnuDebugLinkSection::init(StringRef File, StringRef Data) {
   // establish the order that sections should go in. By using the maximum
   // possible offset we cause this section to wind up at the end.
   OriginalOffset = std::numeric_limits<uint64_t>::max();
-  JamCRC CRC;
-  CRC.update(ArrayRef<char>(Data.data(), Data.size()));
-  // The CRC32 value needs to be complemented because the JamCRC dosn't
-  // finalize the CRC32 value. It also dosn't negate the initial CRC32 value
-  // but it starts by default at 0xFFFFFFFF which is the complement of zero.
-  CRC32 = ~CRC.getCRC();
 }
 
-GnuDebugLinkSection::GnuDebugLinkSection(StringRef File) : FileName(File) {
-  // Read in the file to compute the CRC of it.
-  auto DebugOrErr = MemoryBuffer::getFile(File);
-  if (!DebugOrErr)
-    error("'" + File + "': " + DebugOrErr.getError().message());
-  auto Debug = std::move(*DebugOrErr);
-  init(File, Debug->getBuffer());
+GnuDebugLinkSection::GnuDebugLinkSection(StringRef File,
+                                         uint32_t PrecomputedCRC)
+    : FileName(File), CRC32(PrecomputedCRC) {
+  init(File);
 }
 
 template <class ELFT>
 void ELFSectionWriter<ELFT>::visit(const GnuDebugLinkSection &Sec) {
-  auto Buf = Out.getBufferStart() + Sec.Offset;
-  char *File = reinterpret_cast<char *>(Buf);
+  unsigned char *Buf = Out.getBufferStart() + Sec.Offset;
   Elf_Word *CRC =
       reinterpret_cast<Elf_Word *>(Buf + Sec.Size - sizeof(Elf_Word));
   *CRC = Sec.CRC32;
-  llvm::copy(Sec.FileName, File);
+  llvm::copy(Sec.FileName, Buf);
 }
 
 void GnuDebugLinkSection::accept(SectionVisitor &Visitor) const {
@@ -730,7 +1042,7 @@ void ELFSectionWriter<ELFT>::visit(const GroupSection &Sec) {
   ELF::Elf32_Word *Buf =
       reinterpret_cast<ELF::Elf32_Word *>(Out.getBufferStart() + Sec.Offset);
   *Buf++ = Sec.FlagWord;
-  for (const auto *S : Sec.GroupMembers)
+  for (SectionBase *S : Sec.GroupMembers)
     support::endian::write32<ELFT::TargetEndianness>(Buf++, S->Index);
 }
 
@@ -750,6 +1062,20 @@ static bool sectionWithinSegment(const SectionBase &Section,
   // segments and ensures that the section "belongs" to the second segment and
   // not the first.
   uint64_t SecSize = Section.Size ? Section.Size : 1;
+
+  if (Section.Type == SHT_NOBITS) {
+    if (!(Section.Flags & SHF_ALLOC))
+      return false;
+
+    bool SectionIsTLS = Section.Flags & SHF_TLS;
+    bool SegmentIsTLS = Segment.Type == PT_TLS;
+    if (SectionIsTLS != SegmentIsTLS)
+      return false;
+
+    return Segment.VAddr <= Section.Addr &&
+           Segment.VAddr + Segment.MemSize >= Section.Addr + SecSize;
+  }
+
   return Segment.Offset <= Section.OriginalOffset &&
          Segment.Offset + Segment.FileSize >= Section.OriginalOffset + SecSize;
 }
@@ -781,7 +1107,7 @@ static bool compareSegmentsByPAddr(const Segment *A, const Segment *B) {
   return A->Index < B->Index;
 }
 
-void BinaryELFBuilder::initFileHeader() {
+void BasicELFBuilder::initFileHeader() {
   Obj->Flags = 0x0;
   Obj->Type = ET_REL;
   Obj->OSABI = ELFOSABI_NONE;
@@ -791,9 +1117,9 @@ void BinaryELFBuilder::initFileHeader() {
   Obj->Version = 1;
 }
 
-void BinaryELFBuilder::initHeaderSegment() { Obj->ElfHdrSegment.Index = 0; }
+void BasicELFBuilder::initHeaderSegment() { Obj->ElfHdrSegment.Index = 0; }
 
-StringTableSection *BinaryELFBuilder::addStrTab() {
+StringTableSection *BasicELFBuilder::addStrTab() {
   auto &StrTab = Obj->addSection<StringTableSection>();
   StrTab.Name = ".strtab";
 
@@ -801,7 +1127,7 @@ StringTableSection *BinaryELFBuilder::addStrTab() {
   return &StrTab;
 }
 
-SymbolTableSection *BinaryELFBuilder::addSymTab(StringTableSection *StrTab) {
+SymbolTableSection *BasicELFBuilder::addSymTab(StringTableSection *StrTab) {
   auto &SymTab = Obj->addSection<SymbolTableSection>();
 
   SymTab.Name = ".symtab";
@@ -814,6 +1140,11 @@ SymbolTableSection *BinaryELFBuilder::addSymTab(StringTableSection *StrTab) {
   return &SymTab;
 }
 
+void BasicELFBuilder::initSections() {
+  for (auto &Section : Obj->sections())
+    Section.initialize(Obj->sections());
+}
+
 void BinaryELFBuilder::addData(SymbolTableSection *SymTab) {
   auto Data = ArrayRef<uint8_t>(
       reinterpret_cast<const uint8_t *>(MemBuf->getBufferStart()),
@@ -837,25 +1168,75 @@ void BinaryELFBuilder::addData(SymbolTableSection *SymTab) {
                     /*Value=*/DataSection.Size, STV_DEFAULT, SHN_ABS, 0);
 }
 
-void BinaryELFBuilder::initSections() {
-  for (auto &Section : Obj->sections()) {
-    Section.initialize(Obj->sections());
+std::unique_ptr<Object> BinaryELFBuilder::build() {
+  initFileHeader();
+  initHeaderSegment();
+
+  SymbolTableSection *SymTab = addSymTab(addStrTab());
+  initSections();
+  addData(SymTab);
+
+  return std::move(Obj);
+}
+
+// Adds sections from IHEX data file. Data should have been
+// fully validated by this time.
+void IHexELFBuilder::addDataSections() {
+  OwnedDataSection *Section = nullptr;
+  uint64_t SegmentAddr = 0, BaseAddr = 0;
+  uint32_t SecNo = 1;
+
+  for (const IHexRecord &R : Records) {
+    uint64_t RecAddr;
+    switch (R.Type) {
+    case IHexRecord::Data:
+      // Ignore empty data records
+      if (R.HexData.empty())
+        continue;
+      RecAddr = R.Addr + SegmentAddr + BaseAddr;
+      if (!Section || Section->Addr + Section->Size != RecAddr)
+        // OriginalOffset field is only used to sort section properly, so
+        // instead of keeping track of real offset in IHEX file, we use
+        // section number.
+        Section = &Obj->addSection<OwnedDataSection>(
+            ".sec" + std::to_string(SecNo++), RecAddr,
+            ELF::SHF_ALLOC | ELF::SHF_WRITE, SecNo);
+      Section->appendHexData(R.HexData);
+      break;
+    case IHexRecord::EndOfFile:
+      break;
+    case IHexRecord::SegmentAddr:
+      // 20-bit segment address.
+      SegmentAddr = checkedGetHex<uint16_t>(R.HexData) << 4;
+      break;
+    case IHexRecord::StartAddr80x86:
+    case IHexRecord::StartAddr:
+      Obj->Entry = checkedGetHex<uint32_t>(R.HexData);
+      assert(Obj->Entry <= 0xFFFFFU);
+      break;
+    case IHexRecord::ExtendedAddr:
+      // 16-31 bits of linear base address
+      BaseAddr = checkedGetHex<uint16_t>(R.HexData) << 16;
+      break;
+    default:
+      llvm_unreachable("unknown record type");
+    }
   }
 }
 
-std::unique_ptr<Object> BinaryELFBuilder::build() {
+std::unique_ptr<Object> IHexELFBuilder::build() {
   initFileHeader();
   initHeaderSegment();
   StringTableSection *StrTab = addStrTab();
-  SymbolTableSection *SymTab = addSymTab(StrTab);
+  addSymTab(StrTab);
   initSections();
-  addData(SymTab);
+  addDataSections();
 
   return std::move(Obj);
 }
 
 template <class ELFT> void ELFBuilder<ELFT>::setParentSegment(Segment &Child) {
-  for (auto &Parent : Obj.segments()) {
+  for (Segment &Parent : Obj.segments()) {
     // Every segment will overlap with itself but we don't want a segment to
     // be it's own parent so we avoid that situation.
     if (&Child != &Parent && segmentOverlapsSegment(Child, Parent)) {
@@ -870,23 +1251,43 @@ template <class ELFT> void ELFBuilder<ELFT>::setParentSegment(Segment &Child) {
   }
 }
 
-template <class ELFT> void ELFBuilder<ELFT>::readProgramHeaders() {
+template <class ELFT> void ELFBuilder<ELFT>::findEhdrOffset() {
+  if (!ExtractPartition)
+    return;
+
+  for (const SectionBase &Section : Obj.sections()) {
+    if (Section.Type == SHT_LLVM_PART_EHDR &&
+        Section.Name == *ExtractPartition) {
+      EhdrOffset = Section.Offset;
+      return;
+    }
+  }
+  error("could not find partition named '" + *ExtractPartition + "'");
+}
+
+template <class ELFT>
+void ELFBuilder<ELFT>::readProgramHeaders(const ELFFile<ELFT> &HeadersFile) {
   uint32_t Index = 0;
-  for (const auto &Phdr : unwrapOrError(ElfFile.program_headers())) {
-    ArrayRef<uint8_t> Data{ElfFile.base() + Phdr.p_offset,
+  for (const auto &Phdr : unwrapOrError(HeadersFile.program_headers())) {
+    if (Phdr.p_offset + Phdr.p_filesz > HeadersFile.getBufSize())
+      error("program header with offset 0x" + Twine::utohexstr(Phdr.p_offset) +
+            " and file size 0x" + Twine::utohexstr(Phdr.p_filesz) +
+            " goes past the end of the file");
+
+    ArrayRef<uint8_t> Data{HeadersFile.base() + Phdr.p_offset,
                            (size_t)Phdr.p_filesz};
     Segment &Seg = Obj.addSegment(Data);
     Seg.Type = Phdr.p_type;
     Seg.Flags = Phdr.p_flags;
-    Seg.OriginalOffset = Phdr.p_offset;
-    Seg.Offset = Phdr.p_offset;
+    Seg.OriginalOffset = Phdr.p_offset + EhdrOffset;
+    Seg.Offset = Phdr.p_offset + EhdrOffset;
     Seg.VAddr = Phdr.p_vaddr;
     Seg.PAddr = Phdr.p_paddr;
     Seg.FileSize = Phdr.p_filesz;
     Seg.MemSize = Phdr.p_memsz;
     Seg.Align = Phdr.p_align;
     Seg.Index = Index++;
-    for (auto &Section : Obj.sections()) {
+    for (SectionBase &Section : Obj.sections()) {
       if (sectionWithinSegment(Section, Seg)) {
         Seg.addSection(&Section);
         if (!Section.ParentSegment ||
@@ -899,8 +1300,9 @@ template <class ELFT> void ELFBuilder<ELFT>::readProgramHeaders() {
 
   auto &ElfHdr = Obj.ElfHdrSegment;
   ElfHdr.Index = Index++;
+  ElfHdr.OriginalOffset = ElfHdr.Offset = EhdrOffset;
 
-  const auto &Ehdr = *ElfFile.getHeader();
+  const auto &Ehdr = *HeadersFile.getHeader();
   auto &PrHdr = Obj.ProgramHdrSegment;
   PrHdr.Type = PT_PHDR;
   PrHdr.Flags = 0;
@@ -908,7 +1310,7 @@ template <class ELFT> void ELFBuilder<ELFT>::readProgramHeaders() {
   // Whereas this works automatically for ElfHdr, here OriginalOffset is
   // always non-zero and to ensure the equation we assign the same value to
   // VAddr as well.
-  PrHdr.OriginalOffset = PrHdr.Offset = PrHdr.VAddr = Ehdr.e_phoff;
+  PrHdr.OriginalOffset = PrHdr.Offset = PrHdr.VAddr = EhdrOffset + Ehdr.e_phoff;
   PrHdr.PAddr = 0;
   PrHdr.FileSize = PrHdr.MemSize = Ehdr.e_phentsize * Ehdr.e_phnum;
   // The spec requires us to naturally align all the fields.
@@ -917,7 +1319,7 @@ template <class ELFT> void ELFBuilder<ELFT>::readProgramHeaders() {
 
   // Now we do an O(n^2) loop through the segments in order to match up
   // segments.
-  for (auto &Child : Obj.segments())
+  for (Segment &Child : Obj.segments())
     setParentSegment(Child);
   setParentSegment(ElfHdr);
   setParentSegment(PrHdr);
@@ -925,22 +1327,25 @@ template <class ELFT> void ELFBuilder<ELFT>::readProgramHeaders() {
 
 template <class ELFT>
 void ELFBuilder<ELFT>::initGroupSection(GroupSection *GroupSec) {
-  auto SecTable = Obj.sections();
+  if (GroupSec->Align % sizeof(ELF::Elf32_Word) != 0)
+    error("invalid alignment " + Twine(GroupSec->Align) + " of group section '" +
+          GroupSec->Name + "'");
+  SectionTableRef SecTable = Obj.sections();
   auto SymTab = SecTable.template getSectionOfType<SymbolTableSection>(
       GroupSec->Link,
-      "Link field value " + Twine(GroupSec->Link) + " in section " +
-          GroupSec->Name + " is invalid",
-      "Link field value " + Twine(GroupSec->Link) + " in section " +
-          GroupSec->Name + " is not a symbol table");
-  auto Sym = SymTab->getSymbolByIndex(GroupSec->Info);
+      "link field value '" + Twine(GroupSec->Link) + "' in section '" +
+          GroupSec->Name + "' is invalid",
+      "link field value '" + Twine(GroupSec->Link) + "' in section '" +
+          GroupSec->Name + "' is not a symbol table");
+  Symbol *Sym = SymTab->getSymbolByIndex(GroupSec->Info);
   if (!Sym)
-    error("Info field value " + Twine(GroupSec->Info) + " in section " +
-          GroupSec->Name + " is not a valid symbol index");
+    error("info field value '" + Twine(GroupSec->Info) + "' in section '" +
+          GroupSec->Name + "' is not a valid symbol index");
   GroupSec->setSymTab(SymTab);
   GroupSec->setSymbol(Sym);
   if (GroupSec->Contents.size() % sizeof(ELF::Elf32_Word) ||
       GroupSec->Contents.empty())
-    error("The content of the section " + GroupSec->Name + " is malformed");
+    error("the content of the section " + GroupSec->Name + " is malformed");
   const ELF::Elf32_Word *Word =
       reinterpret_cast<const ELF::Elf32_Word *>(GroupSec->Contents.data());
   const ELF::Elf32_Word *End =
@@ -949,8 +1354,8 @@ void ELFBuilder<ELFT>::initGroupSection(GroupSection *GroupSec) {
   for (; Word != End; ++Word) {
     uint32_t Index = support::endian::read32<ELFT::TargetEndianness>(Word);
     GroupSec->addMember(SecTable.getSection(
-        Index, "Group member index " + Twine(Index) + " in section " +
-                   GroupSec->Name + " is invalid"));
+        Index, "group member index " + Twine(Index) + " in section '" +
+                   GroupSec->Name + "' is invalid"));
   }
 }
 
@@ -967,31 +1372,31 @@ void ELFBuilder<ELFT>::initSymbolTable(SymbolTableSection *SymTab) {
 
     if (Sym.st_shndx == SHN_XINDEX) {
       if (SymTab->getShndxTable() == nullptr)
-        error("Symbol '" + Name +
-              "' has index SHN_XINDEX but no SHT_SYMTAB_SHNDX section exists.");
+        error("symbol '" + Name +
+              "' has index SHN_XINDEX but no SHT_SYMTAB_SHNDX section exists");
       if (ShndxData.data() == nullptr) {
         const Elf_Shdr &ShndxSec =
             *unwrapOrError(ElfFile.getSection(SymTab->getShndxTable()->Index));
         ShndxData = unwrapOrError(
             ElfFile.template getSectionContentsAsArray<Elf_Word>(&ShndxSec));
         if (ShndxData.size() != Symbols.size())
-          error("Symbol section index table does not have the same number of "
-                "entries as the symbol table.");
+          error("symbol section index table does not have the same number of "
+                "entries as the symbol table");
       }
       Elf_Word Index = ShndxData[&Sym - Symbols.begin()];
       DefSection = Obj.sections().getSection(
           Index,
-          "Symbol '" + Name + "' has invalid section index " + Twine(Index));
+          "symbol '" + Name + "' has invalid section index " + Twine(Index));
     } else if (Sym.st_shndx >= SHN_LORESERVE) {
       if (!isValidReservedSectionIndex(Sym.st_shndx, Obj.Machine)) {
         error(
-            "Symbol '" + Name +
+            "symbol '" + Name +
             "' has unsupported value greater than or equal to SHN_LORESERVE: " +
             Twine(Sym.st_shndx));
       }
     } else if (Sym.st_shndx != SHN_UNDEF) {
       DefSection = Obj.sections().getSection(
-          Sym.st_shndx, "Symbol '" + Name +
+          Sym.st_shndx, "symbol '" + Name +
                             "' is defined has invalid section index " +
                             Twine(Sym.st_shndx));
     }
@@ -1086,7 +1491,8 @@ SectionBase &ELFBuilder<ELFT>::makeSection(const Elf_Shdr &Shdr) {
   default: {
     Data = unwrapOrError(ElfFile.getSectionContents(&Shdr));
 
-    if (isDataGnuCompressed(Data) || (Shdr.sh_flags & ELF::SHF_COMPRESSED)) {
+    StringRef Name = unwrapOrError(ElfFile.getSectionName(&Shdr));
+    if (Name.startswith(".zdebug") || (Shdr.sh_flags & ELF::SHF_COMPRESSED)) {
       uint64_t DecompressedSize, DecompressedAlign;
       std::tie(DecompressedSize, DecompressedAlign) =
           getDecompressedSizeAndAlignment<ELFT>(Data);
@@ -1123,7 +1529,9 @@ template <class ELFT> void ELFBuilder<ELFT>::readSectionHeaders() {
         ArrayRef<uint8_t>(ElfFile.base() + Shdr.sh_offset,
                           (Shdr.sh_type == SHT_NOBITS) ? 0 : Shdr.sh_size);
   }
+}
 
+template <class ELFT> void ELFBuilder<ELFT>::readSections() {
   // If a section index table exists we'll need to initialize it before we
   // initialize the symbol table because the symbol table might need to
   // reference it.
@@ -1157,11 +1565,34 @@ template <class ELFT> void ELFBuilder<ELFT>::readSectionHeaders() {
       initGroupSection(GroupSec);
     }
   }
+
+  uint32_t ShstrIndex = ElfFile.getHeader()->e_shstrndx;
+  if (ShstrIndex == SHN_XINDEX)
+    ShstrIndex = unwrapOrError(ElfFile.getSection(0))->sh_link;
+
+  if (ShstrIndex == SHN_UNDEF)
+    Obj.HadShdrs = false;
+  else
+    Obj.SectionNames =
+        Obj.sections().template getSectionOfType<StringTableSection>(
+            ShstrIndex,
+            "e_shstrndx field value " + Twine(ShstrIndex) + " in elf header " +
+                " is invalid",
+            "e_shstrndx field value " + Twine(ShstrIndex) + " in elf header " +
+                " is not a string table");
 }
 
 template <class ELFT> void ELFBuilder<ELFT>::build() {
-  const auto &Ehdr = *ElfFile.getHeader();
+  readSectionHeaders();
+  findEhdrOffset();
+
+  // The ELFFile whose ELF headers and program headers are copied into the
+  // output file. Normally the same as ElfFile, but if we're extracting a
+  // loadable partition it will point to the partition's headers.
+  ELFFile<ELFT> HeadersFile = unwrapOrError(ELFFile<ELFT>::create(toStringRef(
+      {ElfFile.base() + EhdrOffset, ElfFile.getBufSize() - EhdrOffset})));
 
+  auto &Ehdr = *HeadersFile.getHeader();
   Obj.OSABI = Ehdr.e_ident[EI_OSABI];
   Obj.ABIVersion = Ehdr.e_ident[EI_ABIVERSION];
   Obj.Type = Ehdr.e_type;
@@ -1170,25 +1601,8 @@ template <class ELFT> void ELFBuilder<ELFT>::build() {
   Obj.Entry = Ehdr.e_entry;
   Obj.Flags = Ehdr.e_flags;
 
-  readSectionHeaders();
-  readProgramHeaders();
-
-  uint32_t ShstrIndex = Ehdr.e_shstrndx;
-  if (ShstrIndex == SHN_XINDEX)
-    ShstrIndex = unwrapOrError(ElfFile.getSection(0))->sh_link;
-
-  Obj.SectionNames =
-      Obj.sections().template getSectionOfType<StringTableSection>(
-          ShstrIndex,
-          "e_shstrndx field value " + Twine(Ehdr.e_shstrndx) +
-              " in elf header " + " is invalid",
-          "e_shstrndx field value " + Twine(Ehdr.e_shstrndx) +
-              " in elf header " + " is not a string table");
-}
-
-// A generic size function which computes sizes of any random access range.
-template <class R> size_t size(R &&Range) {
-  return static_cast<size_t>(std::end(Range) - std::begin(Range));
+  readSections();
+  readProgramHeaders(HeadersFile);
 }
 
 Writer::~Writer() {}
@@ -1199,31 +1613,61 @@ std::unique_ptr<Object> BinaryReader::create() const {
   return BinaryELFBuilder(MInfo.EMachine, MemBuf).build();
 }
 
+Expected<std::vector<IHexRecord>> IHexReader::parse() const {
+  SmallVector<StringRef, 16> Lines;
+  std::vector<IHexRecord> Records;
+  bool HasSections = false;
+
+  MemBuf->getBuffer().split(Lines, '\n');
+  Records.reserve(Lines.size());
+  for (size_t LineNo = 1; LineNo <= Lines.size(); ++LineNo) {
+    StringRef Line = Lines[LineNo - 1].trim();
+    if (Line.empty())
+      continue;
+
+    Expected<IHexRecord> R = IHexRecord::parse(Line);
+    if (!R)
+      return parseError(LineNo, R.takeError());
+    if (R->Type == IHexRecord::EndOfFile)
+      break;
+    HasSections |= (R->Type == IHexRecord::Data);
+    Records.push_back(*R);
+  }
+  if (!HasSections)
+    return parseError(-1U, "no sections");
+
+  return std::move(Records);
+}
+
+std::unique_ptr<Object> IHexReader::create() const {
+  std::vector<IHexRecord> Records = unwrapOrError(parse());
+  return IHexELFBuilder(Records).build();
+}
+
 std::unique_ptr<Object> ELFReader::create() const {
   auto Obj = llvm::make_unique<Object>();
   if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(Bin)) {
-    ELFBuilder<ELF32LE> Builder(*O, *Obj);
+    ELFBuilder<ELF32LE> Builder(*O, *Obj, ExtractPartition);
     Builder.build();
     return Obj;
   } else if (auto *O = dyn_cast<ELFObjectFile<ELF64LE>>(Bin)) {
-    ELFBuilder<ELF64LE> Builder(*O, *Obj);
+    ELFBuilder<ELF64LE> Builder(*O, *Obj, ExtractPartition);
     Builder.build();
     return Obj;
   } else if (auto *O = dyn_cast<ELFObjectFile<ELF32BE>>(Bin)) {
-    ELFBuilder<ELF32BE> Builder(*O, *Obj);
+    ELFBuilder<ELF32BE> Builder(*O, *Obj, ExtractPartition);
     Builder.build();
     return Obj;
   } else if (auto *O = dyn_cast<ELFObjectFile<ELF64BE>>(Bin)) {
-    ELFBuilder<ELF64BE> Builder(*O, *Obj);
+    ELFBuilder<ELF64BE> Builder(*O, *Obj, ExtractPartition);
     Builder.build();
     return Obj;
   }
-  error("Invalid file type");
+  error("invalid file type");
 }
 
 template <class ELFT> void ELFWriter<ELFT>::writeEhdr() {
-  uint8_t *B = Buf.getBufferStart();
-  Elf_Ehdr &Ehdr = *reinterpret_cast<Elf_Ehdr *>(B);
+  Elf_Ehdr &Ehdr = *reinterpret_cast<Elf_Ehdr *>(Buf.getBufferStart());
   std::fill(Ehdr.e_ident, Ehdr.e_ident + 16, 0);
   Ehdr.e_ident[EI_MAG0] = 0x7f;
   Ehdr.e_ident[EI_MAG1] = 'E';
@@ -1247,7 +1691,7 @@ template <class ELFT> void ELFWriter<ELFT>::writeEhdr() {
   Ehdr.e_phentsize = (Ehdr.e_phnum != 0) ? sizeof(Elf_Phdr) : 0;
   Ehdr.e_flags = Obj.Flags;
   Ehdr.e_ehsize = sizeof(Elf_Ehdr);
-  if (WriteSectionHeaders && size(Obj.sections()) != 0) {
+  if (WriteSectionHeaders && Obj.sections().size() != 0) {
     Ehdr.e_shentsize = sizeof(Elf_Shdr);
     Ehdr.e_shoff = Obj.SHOffset;
     // """
@@ -1256,7 +1700,7 @@ template <class ELFT> void ELFWriter<ELFT>::writeEhdr() {
     // number of section header table entries is contained in the sh_size field
     // of the section header at index 0.
     // """
-    auto Shnum = size(Obj.sections()) + 1;
+    auto Shnum = Obj.sections().size() + 1;
     if (Shnum >= SHN_LORESERVE)
       Ehdr.e_shnum = 0;
     else
@@ -1285,17 +1729,17 @@ template <class ELFT> void ELFWriter<ELFT>::writePhdrs() {
 }
 
 template <class ELFT> void ELFWriter<ELFT>::writeShdrs() {
-  uint8_t *B = Buf.getBufferStart() + Obj.SHOffset;
   // This reference serves to write the dummy section header at the begining
   // of the file. It is not used for anything else
-  Elf_Shdr &Shdr = *reinterpret_cast<Elf_Shdr *>(B);
+  Elf_Shdr &Shdr =
+      *reinterpret_cast<Elf_Shdr *>(Buf.getBufferStart() + Obj.SHOffset);
   Shdr.sh_name = 0;
   Shdr.sh_type = SHT_NULL;
   Shdr.sh_flags = 0;
   Shdr.sh_addr = 0;
   Shdr.sh_offset = 0;
   // See writeEhdr for why we do this.
-  uint64_t Shnum = size(Obj.sections()) + 1;
+  uint64_t Shnum = Obj.sections().size() + 1;
   if (Shnum >= SHN_LORESERVE)
     Shdr.sh_size = Shnum;
   else
@@ -1309,16 +1753,44 @@ template <class ELFT> void ELFWriter<ELFT>::writeShdrs() {
   Shdr.sh_addralign = 0;
   Shdr.sh_entsize = 0;
 
-  for (auto &Sec : Obj.sections())
+  for (SectionBase &Sec : Obj.sections())
     writeShdr(Sec);
 }
 
 template <class ELFT> void ELFWriter<ELFT>::writeSectionData() {
-  for (auto &Sec : Obj.sections())
-    Sec.accept(*SecWriter);
+  for (SectionBase &Sec : Obj.sections())
+    // Segments are responsible for writing their contents, so only write the
+    // section data if the section is not in a segment. Note that this renders
+    // sections in segments effectively immutable.
+    if (Sec.ParentSegment == nullptr)
+      Sec.accept(*SecWriter);
+}
+
+template <class ELFT> void ELFWriter<ELFT>::writeSegmentData() {
+  for (Segment &Seg : Obj.segments()) {
+    uint8_t *B = Buf.getBufferStart() + Seg.Offset;
+    assert(Seg.FileSize == Seg.getContents().size() &&
+           "Segment size must match contents size");
+    std::memcpy(B, Seg.getContents().data(), Seg.FileSize);
+  }
+
+  // Iterate over removed sections and overwrite their old data with zeroes.
+  for (auto &Sec : Obj.removedSections()) {
+    Segment *Parent = Sec.ParentSegment;
+    if (Parent == nullptr || Sec.Type == SHT_NOBITS || Sec.Size == 0)
+      continue;
+    uint64_t Offset =
+        Sec.OriginalOffset - Parent->OriginalOffset + Parent->Offset;
+    std::memset(Buf.getBufferStart() + Offset, 0, Sec.Size);
+  }
 }
 
-void Object::removeSections(std::function<bool(const SectionBase &)> ToRemove) {
+template <class ELFT>
+ELFWriter<ELFT>::ELFWriter(Object &Obj, Buffer &Buf, bool WSH)
+    : Writer(Obj, Buf), WriteSectionHeaders(WSH && Obj.HadShdrs) {}
+
+Error Object::removeSections(bool AllowBrokenLinks,
+    std::function<bool(const SectionBase &)> ToRemove) {
 
   auto Iter = std::stable_partition(
       std::begin(Sections), std::end(Sections), [=](const SecPtr &Sec) {
@@ -1339,32 +1811,55 @@ void Object::removeSections(std::function<bool(const SectionBase &)> ToRemove) {
   // Now make sure there are no remaining references to the sections that will
   // be removed. Sometimes it is impossible to remove a reference so we emit
   // an error here instead.
+  std::unordered_set<const SectionBase *> RemoveSections;
+  RemoveSections.reserve(std::distance(Iter, std::end(Sections)));
   for (auto &RemoveSec : make_range(Iter, std::end(Sections))) {
     for (auto &Segment : Segments)
       Segment->removeSection(RemoveSec.get());
-    for (auto &KeepSec : make_range(std::begin(Sections), Iter))
-      KeepSec->removeSectionReferences(RemoveSec.get());
+    RemoveSections.insert(RemoveSec.get());
   }
-  // Now finally get rid of them all togethor.
+
+  // For each section that remains alive, we want to remove the dead references.
+  // This either might update the content of the section (e.g. remove symbols
+  // from symbol table that belongs to removed section) or trigger an error if
+  // a live section critically depends on a section being removed somehow
+  // (e.g. the removed section is referenced by a relocation).
+  for (auto &KeepSec : make_range(std::begin(Sections), Iter)) {
+    if (Error E = KeepSec->removeSectionReferences(AllowBrokenLinks,
+            [&RemoveSections](const SectionBase *Sec) {
+              return RemoveSections.find(Sec) != RemoveSections.end();
+            }))
+      return E;
+  }
+
+  // Transfer removed sections into the Object RemovedSections container for use
+  // later.
+  std::move(Iter, Sections.end(), std::back_inserter(RemovedSections));
+  // Now finally get rid of them all together.
   Sections.erase(Iter, std::end(Sections));
+  return Error::success();
 }
 
-void Object::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) {
-  if (!SymbolTable)
-    return;
-
-  for (const SecPtr &Sec : Sections)
-    Sec->removeSymbols(ToRemove);
+Error Object::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) {
+  if (SymbolTable)
+    for (const SecPtr &Sec : Sections)
+      if (Error E = Sec->removeSymbols(ToRemove))
+        return E;
+  return Error::success();
 }
 
 void Object::sortSections() {
-  // Put all sections in offset order. Maintain the ordering as closely as
-  // possible while meeting that demand however.
-  auto CompareSections = [](const SecPtr &A, const SecPtr &B) {
+  // Use stable_sort to maintain the original ordering as closely as possible.
+  llvm::stable_sort(Sections, [](const SecPtr &A, const SecPtr &B) {
+    // Put SHT_GROUP sections first, since group section headers must come
+    // before the sections they contain. This also matches what GNU objcopy
+    // does.
+    if (A->Type != B->Type &&
+        (A->Type == ELF::SHT_GROUP || B->Type == ELF::SHT_GROUP))
+      return A->Type == ELF::SHT_GROUP;
+    // For all other sections, sort by offset order.
     return A->OriginalOffset < B->OriginalOffset;
-  };
-  std::stable_sort(std::begin(this->Sections), std::end(this->Sections),
-                   CompareSections);
+  });
 }
 
 static uint64_t alignToAddr(uint64_t Offset, uint64_t Addr, uint64_t Align) {
@@ -1382,14 +1877,13 @@ static uint64_t alignToAddr(uint64_t Offset, uint64_t Addr, uint64_t Align) {
 
 // Orders segments such that if x = y->ParentSegment then y comes before x.
 static void orderSegments(std::vector<Segment *> &Segments) {
-  std::stable_sort(std::begin(Segments), std::end(Segments),
-                   compareSegmentsByOffset);
+  llvm::stable_sort(Segments, compareSegmentsByOffset);
 }
 
 // This function finds a consistent layout for a list of segments starting from
 // an Offset. It assumes that Segments have been sorted by OrderSegments and
 // returns an Offset one past the end of the last segment.
-static uint64_t LayoutSegments(std::vector<Segment *> &Segments,
+static uint64_t layoutSegments(std::vector<Segment *> &Segments,
                                uint64_t Offset) {
   assert(std::is_sorted(std::begin(Segments), std::end(Segments),
                         compareSegmentsByOffset));
@@ -1398,20 +1892,20 @@ static uint64_t LayoutSegments(std::vector<Segment *> &Segments,
   // then it's acceptable, but not ideal, to simply move it to after the
   // segments. So we can simply layout segments one after the other accounting
   // for alignment.
-  for (auto &Segment : Segments) {
+  for (Segment *Seg : Segments) {
     // We assume that segments have been ordered by OriginalOffset and Index
     // such that a parent segment will always come before a child segment in
     // OrderedSegments. This means that the Offset of the ParentSegment should
     // already be set and we can set our offset relative to it.
-    if (Segment->ParentSegment != nullptr) {
-      auto Parent = Segment->ParentSegment;
-      Segment->Offset =
-          Parent->Offset + Segment->OriginalOffset - Parent->OriginalOffset;
+    if (Seg->ParentSegment != nullptr) {
+      Segment *Parent = Seg->ParentSegment;
+      Seg->Offset =
+          Parent->Offset + Seg->OriginalOffset - Parent->OriginalOffset;
     } else {
-      Offset = alignToAddr(Offset, Segment->VAddr, Segment->Align);
-      Segment->Offset = Offset;
+      Offset = alignToAddr(Offset, Seg->VAddr, Seg->Align);
+      Seg->Offset = Offset;
     }
-    Offset = std::max(Offset, Segment->Offset + Segment->FileSize);
+    Offset = std::max(Offset, Seg->Offset + Seg->FileSize);
   }
   return Offset;
 }
@@ -1448,10 +1942,9 @@ static uint64_t layoutSections(Range Sections, uint64_t Offset) {
 }
 
 template <class ELFT> void ELFWriter<ELFT>::initEhdrSegment() {
-  auto &ElfHdr = Obj.ElfHdrSegment;
+  Segment &ElfHdr = Obj.ElfHdrSegment;
   ElfHdr.Type = PT_PHDR;
   ElfHdr.Flags = 0;
-  ElfHdr.OriginalOffset = ElfHdr.Offset = 0;
   ElfHdr.VAddr = 0;
   ElfHdr.PAddr = 0;
   ElfHdr.FileSize = ElfHdr.MemSize = sizeof(Elf_Ehdr);
@@ -1463,7 +1956,7 @@ template <class ELFT> void ELFWriter<ELFT>::assignOffsets() {
   // so that we know that anytime ->ParentSegment is set that segment has
   // already had its offset properly set.
   std::vector<Segment *> OrderedSegments;
-  for (auto &Segment : Obj.segments())
+  for (Segment &Segment : Obj.segments())
     OrderedSegments.push_back(&Segment);
   OrderedSegments.push_back(&Obj.ElfHdrSegment);
   OrderedSegments.push_back(&Obj.ProgramHdrSegment);
@@ -1472,7 +1965,7 @@ template <class ELFT> void ELFWriter<ELFT>::assignOffsets() {
   // Since the ELF Header (ElfHdrSegment) must be at the start of the file,
   // we start at offset 0.
   uint64_t Offset = 0;
-  Offset = LayoutSegments(OrderedSegments, Offset);
+  Offset = layoutSegments(OrderedSegments, Offset);
   Offset = layoutSections(Obj.sections(), Offset);
   // If we need to write the section header table out then we need to align the
   // Offset so that SHOffset is valid.
@@ -1484,28 +1977,32 @@ template <class ELFT> void ELFWriter<ELFT>::assignOffsets() {
 template <class ELFT> size_t ELFWriter<ELFT>::totalSize() const {
   // We already have the section header offset so we can calculate the total
   // size by just adding up the size of each section header.
-  auto NullSectionSize = WriteSectionHeaders ? sizeof(Elf_Shdr) : 0;
-  return Obj.SHOffset + size(Obj.sections()) * sizeof(Elf_Shdr) +
-         NullSectionSize;
+  if (!WriteSectionHeaders)
+    return Obj.SHOffset;
+  size_t ShdrCount = Obj.sections().size() + 1; // Includes null shdr.
+  return Obj.SHOffset + ShdrCount * sizeof(Elf_Shdr);
 }
 
-template <class ELFT> void ELFWriter<ELFT>::write() {
+template <class ELFT> Error ELFWriter<ELFT>::write() {
+  // Segment data must be written first, so that the ELF header and program
+  // header tables can overwrite it, if covered by a segment.
+  writeSegmentData();
   writeEhdr();
   writePhdrs();
   writeSectionData();
   if (WriteSectionHeaders)
     writeShdrs();
-  if (auto E = Buf.commit())
-    reportError(Buf.getName(), errorToErrorCode(std::move(E)));
+  return Buf.commit();
 }
 
-template <class ELFT> void ELFWriter<ELFT>::finalize() {
+template <class ELFT> Error ELFWriter<ELFT>::finalize() {
   // It could happen that SectionNames has been removed and yet the user wants
   // a section header table output. We need to throw an error if a user tries
   // to do that.
   if (Obj.SectionNames == nullptr && WriteSectionHeaders)
-    error("Cannot write section header table because section header string "
-          "table was removed.");
+    return createStringError(llvm::errc::invalid_argument,
+                             "cannot write section header table because "
+                             "section header string table was removed");
 
   Obj.sortSections();
 
@@ -1513,8 +2010,8 @@ template <class ELFT> void ELFWriter<ELFT>::finalize() {
   // if we need large indexes or not. We can assign indexes first and check as
   // we go to see if we will actully need large indexes.
   bool NeedsLargeIndexes = false;
-  if (size(Obj.sections()) >= SHN_LORESERVE) {
-    auto Sections = Obj.sections();
+  if (Obj.sections().size() >= SHN_LORESERVE) {
+    SectionTableRef Sections = Obj.sections();
     NeedsLargeIndexes =
         std::any_of(Sections.begin() + SHN_LORESERVE, Sections.end(),
                     [](const SectionBase &Sec) { return Sec.HasSymbol; });
@@ -1536,9 +2033,12 @@ template <class ELFT> void ELFWriter<ELFT>::finalize() {
     // Since we don't need SectionIndexTable we should remove it and all
     // references to it.
     if (Obj.SectionIndexTable != nullptr) {
-      Obj.removeSections([this](const SectionBase &Sec) {
-        return &Sec == Obj.SectionIndexTable;
-      });
+      // We do not support sections referring to the section index table.
+      if (Error E = Obj.removeSections(false /*AllowBrokenLinks*/,
+                                       [this](const SectionBase &Sec) {
+                                         return &Sec == Obj.SectionIndexTable;
+                                       }))
+        return E;
     }
   }
 
@@ -1567,15 +2067,23 @@ template <class ELFT> void ELFWriter<ELFT>::finalize() {
   if (Obj.SymbolTable != nullptr)
     Obj.SymbolTable->prepareForLayout();
 
+  // Now that all strings are added we want to finalize string table builders,
+  // because that affects section sizes which in turn affects section offsets.
+  for (SectionBase &Sec : Obj.sections())
+    if (auto StrTab = dyn_cast<StringTableSection>(&Sec))
+      StrTab->prepareForLayout();
+
   assignOffsets();
 
-  // Finalize SectionNames first so that we can assign name indexes.
-  if (Obj.SectionNames != nullptr)
-    Obj.SectionNames->finalize();
+  // layoutSections could have modified section indexes, so we need
+  // to fill the index table after assignOffsets.
+  if (Obj.SymbolTable != nullptr)
+    Obj.SymbolTable->fillShndxTable();
+
   // Finally now that all offsets and indexes have been set we can finalize any
   // remaining issues.
   uint64_t Offset = Obj.SHOffset + sizeof(Elf_Shdr);
-  for (auto &Section : Obj.sections()) {
+  for (SectionBase &Section : Obj.sections()) {
     Section.HeaderOffset = Offset;
     Offset += sizeof(Elf_Shdr);
     if (WriteSectionHeaders)
@@ -1583,21 +2091,20 @@ template <class ELFT> void ELFWriter<ELFT>::finalize() {
     Section.finalize();
   }
 
-  Buf.allocate(totalSize());
+  if (Error E = Buf.allocate(totalSize()))
+    return E;
   SecWriter = llvm::make_unique<ELFSectionWriter<ELFT>>(Buf);
+  return Error::success();
 }
 
-void BinaryWriter::write() {
-  for (auto &Section : Obj.sections()) {
-    if ((Section.Flags & SHF_ALLOC) == 0)
-      continue;
-    Section.accept(*SecWriter);
-  }
-  if (auto E = Buf.commit())
-    reportError(Buf.getName(), errorToErrorCode(std::move(E)));
+Error BinaryWriter::write() {
+  for (auto &Section : Obj.sections())
+    if (Section.Flags & SHF_ALLOC)
+      Section.accept(*SecWriter);
+  return Buf.commit();
 }
 
-void BinaryWriter::finalize() {
+Error BinaryWriter::finalize() {
   // TODO: Create a filter range to construct OrderedSegments from so that this
   // code can be deduped with assignOffsets above. This should also solve the
   // todo below for LayoutSections.
@@ -1606,11 +2113,9 @@ void BinaryWriter::finalize() {
   // already had it's offset properly set. We only want to consider the segments
   // that will affect layout of allocated sections so we only add those.
   std::vector<Segment *> OrderedSegments;
-  for (auto &Section : Obj.sections()) {
-    if ((Section.Flags & SHF_ALLOC) != 0 && Section.ParentSegment != nullptr) {
+  for (SectionBase &Section : Obj.sections())
+    if ((Section.Flags & SHF_ALLOC) != 0 && Section.ParentSegment != nullptr)
       OrderedSegments.push_back(Section.ParentSegment);
-    }
-  }
 
   // For binary output, we're going to use physical addresses instead of
   // virtual addresses, since a binary output is used for cases like ROM
@@ -1622,8 +2127,7 @@ void BinaryWriter::finalize() {
     for (Segment *Seg : OrderedSegments)
       Seg->PAddr = Seg->VAddr;
 
-  std::stable_sort(std::begin(OrderedSegments), std::end(OrderedSegments),
-                   compareSegmentsByPAddr);
+  llvm::stable_sort(OrderedSegments, compareSegmentsByPAddr);
 
   // Because we add a ParentSegment for each section we might have duplicate
   // segments in OrderedSegments. If there were duplicates then LayoutSegments
@@ -1638,8 +2142,8 @@ void BinaryWriter::finalize() {
   // our layout algorithm to proceed as expected while not writing out the gap
   // at the start.
   if (!OrderedSegments.empty()) {
-    auto Seg = OrderedSegments[0];
-    auto Sec = Seg->firstSection();
+    Segment *Seg = OrderedSegments[0];
+    const SectionBase *Sec = Seg->firstSection();
     auto Diff = Sec->OriginalOffset - Seg->OriginalOffset;
     Seg->OriginalOffset += Diff;
     // The size needs to be shrunk as well.
@@ -1648,7 +2152,7 @@ void BinaryWriter::finalize() {
     // section.
     Seg->PAddr += Diff;
     uint64_t LowestPAddr = Seg->PAddr;
-    for (auto &Segment : OrderedSegments) {
+    for (Segment *Segment : OrderedSegments) {
       Segment->Offset = Segment->PAddr - LowestPAddr;
       Offset = std::max(Offset, Segment->Offset + Segment->FileSize);
     }
@@ -1659,11 +2163,9 @@ void BinaryWriter::finalize() {
   // not hold. Then pass such a range to LayoutSections instead of constructing
   // AllocatedSections here.
   std::vector<SectionBase *> AllocatedSections;
-  for (auto &Section : Obj.sections()) {
-    if ((Section.Flags & SHF_ALLOC) == 0)
-      continue;
-    AllocatedSections.push_back(&Section);
-  }
+  for (SectionBase &Section : Obj.sections())
+    if (Section.Flags & SHF_ALLOC)
+      AllocatedSections.push_back(&Section);
   layoutSections(make_pointee_range(AllocatedSections), Offset);
 
   // Now that every section has been laid out we just need to compute the total
@@ -1671,13 +2173,117 @@ void BinaryWriter::finalize() {
   // LayoutSections, because we want to truncate the last segment to the end of
   // its last section, to match GNU objcopy's behaviour.
   TotalSize = 0;
-  for (const auto &Section : AllocatedSections) {
+  for (SectionBase *Section : AllocatedSections)
     if (Section->Type != SHT_NOBITS)
       TotalSize = std::max(TotalSize, Section->Offset + Section->Size);
-  }
 
-  Buf.allocate(TotalSize);
+  if (Error E = Buf.allocate(TotalSize))
+    return E;
   SecWriter = llvm::make_unique<BinarySectionWriter>(Buf);
+  return Error::success();
+}
+
+bool IHexWriter::SectionCompare::operator()(const SectionBase *Lhs,
+                                            const SectionBase *Rhs) const {
+  return (sectionPhysicalAddr(Lhs) & 0xFFFFFFFFU) <
+         (sectionPhysicalAddr(Rhs) & 0xFFFFFFFFU);
+}
+
+uint64_t IHexWriter::writeEntryPointRecord(uint8_t *Buf) {
+  IHexLineData HexData;
+  uint8_t Data[4] = {};
+  // We don't write entry point record if entry is zero.
+  if (Obj.Entry == 0)
+    return 0;
+
+  if (Obj.Entry <= 0xFFFFFU) {
+    Data[0] = ((Obj.Entry & 0xF0000U) >> 12) & 0xFF;
+    support::endian::write(&Data[2], static_cast<uint16_t>(Obj.Entry),
+                           support::big);
+    HexData = IHexRecord::getLine(IHexRecord::StartAddr80x86, 0, Data);
+  } else {
+    support::endian::write(Data, static_cast<uint32_t>(Obj.Entry),
+                           support::big);
+    HexData = IHexRecord::getLine(IHexRecord::StartAddr, 0, Data);
+  }
+  memcpy(Buf, HexData.data(), HexData.size());
+  return HexData.size();
+}
+
+uint64_t IHexWriter::writeEndOfFileRecord(uint8_t *Buf) {
+  IHexLineData HexData = IHexRecord::getLine(IHexRecord::EndOfFile, 0, {});
+  memcpy(Buf, HexData.data(), HexData.size());
+  return HexData.size();
+}
+
+Error IHexWriter::write() {
+  IHexSectionWriter Writer(Buf);
+  // Write sections.
+  for (const SectionBase *Sec : Sections)
+    Sec->accept(Writer);
+
+  uint64_t Offset = Writer.getBufferOffset();
+  // Write entry point address.
+  Offset += writeEntryPointRecord(Buf.getBufferStart() + Offset);
+  // Write EOF.
+  Offset += writeEndOfFileRecord(Buf.getBufferStart() + Offset);
+  assert(Offset == TotalSize);
+  return Buf.commit();
+}
+
+Error IHexWriter::checkSection(const SectionBase &Sec) {
+  uint64_t Addr = sectionPhysicalAddr(&Sec);
+  if (addressOverflows32bit(Addr) || addressOverflows32bit(Addr + Sec.Size - 1))
+    return createStringError(
+        errc::invalid_argument,
+        "Section '%s' address range [0x%llx, 0x%llx] is not 32 bit", Sec.Name.c_str(),
+        Addr, Addr + Sec.Size - 1);
+  return Error::success();
+}
+
+Error IHexWriter::finalize() {
+  bool UseSegments = false;
+  auto ShouldWrite = [](const SectionBase &Sec) {
+    return (Sec.Flags & ELF::SHF_ALLOC) && (Sec.Type != ELF::SHT_NOBITS);
+  };
+  auto IsInPtLoad = [](const SectionBase &Sec) {
+    return Sec.ParentSegment && Sec.ParentSegment->Type == ELF::PT_LOAD;
+  };
+
+  // We can't write 64-bit addresses.
+  if (addressOverflows32bit(Obj.Entry))
+    return createStringError(errc::invalid_argument,
+                             "Entry point address 0x%llx overflows 32 bits.",
+                             Obj.Entry);
+
+  // If any section we're to write has segment then we
+  // switch to using physical addresses. Otherwise we
+  // use section virtual address.
+  for (auto &Section : Obj.sections())
+    if (ShouldWrite(Section) && IsInPtLoad(Section)) {
+      UseSegments = true;
+      break;
+    }
+
+  for (auto &Section : Obj.sections())
+    if (ShouldWrite(Section) && (!UseSegments || IsInPtLoad(Section))) {
+      if (Error E = checkSection(Section))
+        return E;
+      Sections.insert(&Section);
+    }
+
+  IHexSectionWriterBase LengthCalc(Buf);
+  for (const SectionBase *Sec : Sections)
+    Sec->accept(LengthCalc);
+
+  // We need space to write section records + StartAddress record
+  // (if start adress is not zero) + EndOfFile record.
+  TotalSize = LengthCalc.getBufferOffset() +
+              (Obj.Entry ? IHexRecord::getLineLength(4) : 0) +
+              IHexRecord::getLineLength(0);
+  if (Error E = Buf.allocate(TotalSize))
+    return E;
+  return Error::success();
 }
 
 template class ELFBuilder<ELF64LE>;
diff --git a/tools/llvm-objcopy/ELF/Object.h b/tools/llvm-objcopy/ELF/Object.h
index e5730cd543ee..f3df93b9662f 100644
--- a/tools/llvm-objcopy/ELF/Object.h
+++ b/tools/llvm-objcopy/ELF/Object.h
@@ -1,9 +1,8 @@
 //===- Object.h -------------------------------------------------*- C++ -*-===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -18,8 +17,8 @@
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/StringTableBuilder.h"
 #include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/FileOutputBuffer.h"
-#include "llvm/Support/JamCRC.h"
 #include <cstddef>
 #include <cstdint>
 #include <functional>
@@ -60,6 +59,7 @@ public:
 
   iterator begin() { return iterator(Sections.data()); }
   iterator end() { return iterator(Sections.data() + Sections.size()); }
+  size_t size() const { return Sections.size(); }
 
   SectionBase *getSection(uint32_t Index, Twine ErrMsg);
 
@@ -108,7 +108,7 @@ protected:
   Buffer &Out;
 
 public:
-  virtual ~SectionWriter(){};
+  virtual ~SectionWriter() = default;
 
   void visit(const Section &Sec) override;
   void visit(const OwnedDataSection &Sec) override;
@@ -169,6 +169,8 @@ public:
 
 #define MAKE_SEC_WRITER_FRIEND                                                 \
   friend class SectionWriter;                                                  \
+  friend class IHexSectionWriterBase;                                          \
+  friend class IHexSectionWriter;                                              \
   template <class ELFT> friend class ELFSectionWriter;                         \
   template <class ELFT> friend class ELFSectionSizer;
 
@@ -187,6 +189,118 @@ public:
   explicit BinarySectionWriter(Buffer &Buf) : SectionWriter(Buf) {}
 };
 
+using IHexLineData = SmallVector<char, 64>;
+
+struct IHexRecord {
+  // Memory address of the record.
+  uint16_t Addr;
+  // Record type (see below).
+  uint16_t Type;
+  // Record data in hexadecimal form.
+  StringRef HexData;
+
+  // Helper method to get file length of the record
+  // including newline character
+  static size_t getLength(size_t DataSize) {
+    // :LLAAAATT[DD...DD]CC'
+    return DataSize * 2 + 11;
+  }
+
+  // Gets length of line in a file (getLength + CRLF).
+  static size_t getLineLength(size_t DataSize) {
+    return getLength(DataSize) + 2;
+  }
+
+  // Given type, address and data returns line which can
+  // be written to output file.
+  static IHexLineData getLine(uint8_t Type, uint16_t Addr,
+                              ArrayRef<uint8_t> Data);
+
+  // Parses the line and returns record if possible.
+  // Line should be trimmed from whitespace characters.
+  static Expected<IHexRecord> parse(StringRef Line);
+
+  // Calculates checksum of stringified record representation
+  // S must NOT contain leading ':' and trailing whitespace
+  // characters
+  static uint8_t getChecksum(StringRef S);
+
+  enum Type {
+    // Contains data and a 16-bit starting address for the data.
+    // The byte count specifies number of data bytes in the record.
+    Data = 0,
+    // Must occur exactly once per file in the last line of the file.
+    // The data field is empty (thus byte count is 00) and the address
+    // field is typically 0000.
+    EndOfFile = 1,
+    // The data field contains a 16-bit segment base address (thus byte
+    // count is always 02) compatible with 80x86 real mode addressing.
+    // The address field (typically 0000) is ignored. The segment address
+    // from the most recent 02 record is multiplied by 16 and added to each
+    // subsequent data record address to form the physical starting address
+    // for the data. This allows addressing up to one megabyte of address
+    // space.
+    SegmentAddr = 2,
+    // or 80x86 processors, specifies the initial content of the CS:IP
+    // registers. The address field is 0000, the byte count is always 04,
+    // the first two data bytes are the CS value, the latter two are the
+    // IP value.
+    StartAddr80x86 = 3,
+    // Allows for 32 bit addressing (up to 4GiB). The record's address field
+    // is ignored (typically 0000) and its byte count is always 02. The two
+    // data bytes (big endian) specify the upper 16 bits of the 32 bit
+    // absolute address for all subsequent type 00 records
+    ExtendedAddr = 4,
+    // The address field is 0000 (not used) and the byte count is always 04.
+    // The four data bytes represent a 32-bit address value. In the case of
+    // 80386 and higher CPUs, this address is loaded into the EIP register.
+    StartAddr = 5,
+    // We have no other valid types
+    InvalidType = 6
+  };
+};
+
+// Base class for IHexSectionWriter. This class implements writing algorithm,
+// but doesn't actually write records. It is used for output buffer size
+// calculation in IHexWriter::finalize.
+class IHexSectionWriterBase : public BinarySectionWriter {
+  // 20-bit segment address
+  uint32_t SegmentAddr = 0;
+  // Extended linear address
+  uint32_t BaseAddr = 0;
+
+  // Write segment address corresponding to 'Addr'
+  uint64_t writeSegmentAddr(uint64_t Addr);
+  // Write extended linear (base) address corresponding to 'Addr'
+  uint64_t writeBaseAddr(uint64_t Addr);
+
+protected:
+  // Offset in the output buffer
+  uint64_t Offset = 0;
+
+  void writeSection(const SectionBase *Sec, ArrayRef<uint8_t> Data);
+  virtual void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data);
+
+public:
+  explicit IHexSectionWriterBase(Buffer &Buf) : BinarySectionWriter(Buf) {}
+
+  uint64_t getBufferOffset() const { return Offset; }
+  void visit(const Section &Sec) final;
+  void visit(const OwnedDataSection &Sec) final;
+  void visit(const StringTableSection &Sec) override;
+  void visit(const DynamicRelocationSection &Sec) final;
+  using BinarySectionWriter::visit;
+};
+
+// Real IHEX section writer
+class IHexSectionWriter : public IHexSectionWriterBase {
+public:
+  IHexSectionWriter(Buffer &Buf) : IHexSectionWriterBase(Buf) {}
+
+  void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data) override;
+  void visit(const StringTableSection &Sec) override;
+};
+
 class Writer {
 protected:
   Object &Obj;
@@ -194,8 +308,8 @@ protected:
 
 public:
   virtual ~Writer();
-  virtual void finalize() = 0;
-  virtual void write() = 0;
+  virtual Error finalize() = 0;
+  virtual Error write() = 0;
 
   Writer(Object &O, Buffer &B) : Obj(O), Buf(B) {}
 };
@@ -216,6 +330,7 @@ private:
   void writePhdrs();
   void writeShdrs();
   void writeSectionData();
+  void writeSegmentData();
 
   void assignOffsets();
 
@@ -225,12 +340,11 @@ private:
 
 public:
   virtual ~ELFWriter() {}
-  bool WriteSectionHeaders = true;
+  bool WriteSectionHeaders;
 
-  void finalize() override;
-  void write() override;
-  ELFWriter(Object &Obj, Buffer &Buf, bool WSH)
-      : Writer(Obj, Buf), WriteSectionHeaders(WSH) {}
+  Error finalize() override;
+  Error write() override;
+  ELFWriter(Object &Obj, Buffer &Buf, bool WSH);
 };
 
 class BinaryWriter : public Writer {
@@ -241,11 +355,30 @@ private:
 
 public:
   ~BinaryWriter() {}
-  void finalize() override;
-  void write() override;
+  Error finalize() override;
+  Error write() override;
   BinaryWriter(Object &Obj, Buffer &Buf) : Writer(Obj, Buf) {}
 };
 
+class IHexWriter : public Writer {
+  struct SectionCompare {
+    bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const;
+  };
+
+  std::set<const SectionBase *, SectionCompare> Sections;
+  size_t TotalSize;
+
+  Error checkSection(const SectionBase &Sec);
+  uint64_t writeEntryPointRecord(uint8_t *Buf);
+  uint64_t writeEndOfFileRecord(uint8_t *Buf);
+
+public:
+  ~IHexWriter() {}
+  Error finalize() override;
+  Error write() override;
+  IHexWriter(Object &Obj, Buffer &Buf) : Writer(Obj, Buf) {}
+};
+
 class SectionBase {
 public:
   std::string Name;
@@ -274,11 +407,16 @@ public:
 
   virtual void initialize(SectionTableRef SecTable);
   virtual void finalize();
-  virtual void removeSectionReferences(const SectionBase *Sec);
-  virtual void removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
+  // Remove references to these sections. The list of sections must be sorted.
+  virtual Error
+  removeSectionReferences(bool AllowBrokenLinks,
+                          function_ref<bool(const SectionBase *)> ToRemove);
+  virtual Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
   virtual void accept(SectionVisitor &Visitor) const = 0;
   virtual void accept(MutableSectionVisitor &Visitor) = 0;
   virtual void markSymbols();
+  virtual void
+  replaceSectionReferences(const DenseMap<SectionBase *, SectionBase *> &);
 };
 
 class Segment {
@@ -322,6 +460,8 @@ public:
 
   void removeSection(const SectionBase *Sec) { Sections.erase(Sec); }
   void addSection(const SectionBase *Sec) { Sections.insert(Sec); }
+
+  ArrayRef<uint8_t> getContents() const { return Contents; }
 };
 
 class Section : public SectionBase {
@@ -335,7 +475,8 @@ public:
 
   void accept(SectionVisitor &Visitor) const override;
   void accept(MutableSectionVisitor &Visitor) override;
-  void removeSectionReferences(const SectionBase *Sec) override;
+  Error removeSectionReferences(bool AllowBrokenLinks,
+      function_ref<bool(const SectionBase *)> ToRemove) override;
   void initialize(SectionTableRef SecTable) override;
   void finalize() override;
 };
@@ -354,6 +495,16 @@ public:
     OriginalOffset = std::numeric_limits<uint64_t>::max();
   }
 
+  OwnedDataSection(const Twine &SecName, uint64_t SecAddr, uint64_t SecFlags,
+                   uint64_t SecOff) {
+    Name = SecName.str();
+    Type = ELF::SHT_PROGBITS;
+    Addr = SecAddr;
+    Flags = SecFlags;
+    OriginalOffset = SecOff;
+  }
+
+  void appendHexData(StringRef HexData);
   void accept(SectionVisitor &Sec) const override;
   void accept(MutableSectionVisitor &Visitor) override;
 };
@@ -421,7 +572,7 @@ public:
 
   void addString(StringRef Name);
   uint32_t findIndex(StringRef Name) const;
-  void finalize() override;
+  void prepareForLayout();
   void accept(SectionVisitor &Visitor) const override;
   void accept(MutableSectionVisitor &Visitor) override;
 
@@ -440,10 +591,15 @@ enum SymbolShndxType {
   SYMBOL_SIMPLE_INDEX = 0,
   SYMBOL_ABS = ELF::SHN_ABS,
   SYMBOL_COMMON = ELF::SHN_COMMON,
+  SYMBOL_LOPROC = ELF::SHN_LOPROC,
+  SYMBOL_AMDGPU_LDS = ELF::SHN_AMDGPU_LDS,
   SYMBOL_HEXAGON_SCOMMON = ELF::SHN_HEXAGON_SCOMMON,
   SYMBOL_HEXAGON_SCOMMON_2 = ELF::SHN_HEXAGON_SCOMMON_2,
   SYMBOL_HEXAGON_SCOMMON_4 = ELF::SHN_HEXAGON_SCOMMON_4,
   SYMBOL_HEXAGON_SCOMMON_8 = ELF::SHN_HEXAGON_SCOMMON_8,
+  SYMBOL_HIPROC = ELF::SHN_HIPROC,
+  SYMBOL_LOOS = ELF::SHN_LOOS,
+  SYMBOL_HIOS = ELF::SHN_HIOS,
   SYMBOL_XINDEX = ELF::SHN_XINDEX,
 };
 
@@ -474,9 +630,14 @@ private:
 public:
   virtual ~SectionIndexSection() {}
   void addIndex(uint32_t Index) {
-    Indexes.push_back(Index);
-    Size += 4;
+    assert(Size > 0);
+    Indexes.push_back(Index);    
   }
+
+  void reserve(size_t NumSymbols) {
+    Indexes.reserve(NumSymbols);
+    Size = NumSymbols * 4;
+  }  
   void setSymTab(SymbolTableSection *SymTab) { Symbols = SymTab; }
   void initialize(SectionTableRef SecTable) override;
   void finalize() override;
@@ -509,7 +670,7 @@ public:
 
   void addSymbol(Twine Name, uint8_t Bind, uint8_t Type, SectionBase *DefinedIn,
                  uint64_t Value, uint8_t Visibility, uint16_t Shndx,
-                 uint64_t Size);
+                 uint64_t SymbolSize);
   void prepareForLayout();
   // An 'empty' symbol table still contains a null symbol.
   bool empty() const { return Symbols.size() == 1; }
@@ -517,17 +678,21 @@ public:
     SectionIndexTable = ShndxTable;
   }
   const SectionIndexSection *getShndxTable() const { return SectionIndexTable; }
+  void fillShndxTable();
   const SectionBase *getStrTab() const { return SymbolNames; }
   const Symbol *getSymbolByIndex(uint32_t Index) const;
   Symbol *getSymbolByIndex(uint32_t Index);
   void updateSymbols(function_ref<void(Symbol &)> Callable);
 
-  void removeSectionReferences(const SectionBase *Sec) override;
+  Error removeSectionReferences(bool AllowBrokenLinks,
+      function_ref<bool(const SectionBase *)> ToRemove) override;
   void initialize(SectionTableRef SecTable) override;
   void finalize() override;
   void accept(SectionVisitor &Visitor) const override;
   void accept(MutableSectionVisitor &Visitor) override;
-  void removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
+  Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
+  void replaceSectionReferences(
+      const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
 
   static bool classof(const SectionBase *S) {
     return S->Type == ELF::SHT_SYMTAB;
@@ -567,14 +732,14 @@ public:
 // that code between the two symbol table types.
 template <class SymTabType>
 class RelocSectionWithSymtabBase : public RelocationSectionBase {
-  SymTabType *Symbols = nullptr;
   void setSymTab(SymTabType *SymTab) { Symbols = SymTab; }
 
 protected:
   RelocSectionWithSymtabBase() = default;
 
+  SymTabType *Symbols = nullptr;
+
 public:
-  void removeSectionReferences(const SectionBase *Sec) override;
   void initialize(SectionTableRef SecTable) override;
   void finalize() override;
 };
@@ -589,8 +754,12 @@ public:
   void addRelocation(Relocation Rel) { Relocations.push_back(Rel); }
   void accept(SectionVisitor &Visitor) const override;
   void accept(MutableSectionVisitor &Visitor) override;
-  void removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
+  Error removeSectionReferences(bool AllowBrokenLinks,
+      function_ref<bool(const SectionBase *)> ToRemove) override;
+  Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
   void markSymbols() override;
+  void replaceSectionReferences(
+      const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
 
   static bool classof(const SectionBase *S) {
     if (S->Flags & ELF::SHF_ALLOC)
@@ -624,8 +793,10 @@ public:
   void accept(SectionVisitor &) const override;
   void accept(MutableSectionVisitor &Visitor) override;
   void finalize() override;
-  void removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
+  Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
   void markSymbols() override;
+  void replaceSectionReferences(
+      const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
 
   static bool classof(const SectionBase *S) {
     return S->Type == ELF::SHT_GROUP;
@@ -662,6 +833,9 @@ public:
 
   void accept(SectionVisitor &) const override;
   void accept(MutableSectionVisitor &Visitor) override;
+  Error removeSectionReferences(
+      bool AllowBrokenLinks,
+      function_ref<bool(const SectionBase *)> ToRemove) override;
 
   static bool classof(const SectionBase *S) {
     if (!(S->Flags & ELF::SHF_ALLOC))
@@ -677,11 +851,11 @@ private:
   StringRef FileName;
   uint32_t CRC32;
 
-  void init(StringRef File, StringRef Data);
+  void init(StringRef File);
 
 public:
   // If we add this section from an external source we can use this ctor.
-  explicit GnuDebugLinkSection(StringRef File);
+  explicit GnuDebugLinkSection(StringRef File, uint32_t PrecomputedCRC);
   void accept(SectionVisitor &Visitor) const override;
   void accept(MutableSectionVisitor &Visitor) override;
 };
@@ -697,21 +871,41 @@ using object::ELFFile;
 using object::ELFObjectFile;
 using object::OwningBinary;
 
-class BinaryELFBuilder {
+class BasicELFBuilder {
+protected:
   uint16_t EMachine;
-  MemoryBuffer *MemBuf;
   std::unique_ptr<Object> Obj;
 
   void initFileHeader();
   void initHeaderSegment();
   StringTableSection *addStrTab();
   SymbolTableSection *addSymTab(StringTableSection *StrTab);
-  void addData(SymbolTableSection *SymTab);
   void initSections();
 
+public:
+  BasicELFBuilder(uint16_t EM)
+      : EMachine(EM), Obj(llvm::make_unique<Object>()) {}
+};
+
+class BinaryELFBuilder : public BasicELFBuilder {
+  MemoryBuffer *MemBuf;
+  void addData(SymbolTableSection *SymTab);
+
 public:
   BinaryELFBuilder(uint16_t EM, MemoryBuffer *MB)
-      : EMachine(EM), MemBuf(MB), Obj(llvm::make_unique<Object>()) {}
+      : BasicELFBuilder(EM), MemBuf(MB) {}
+
+  std::unique_ptr<Object> build();
+};
+
+class IHexELFBuilder : public BasicELFBuilder {
+  const std::vector<IHexRecord> &Records;
+
+  void addDataSections();
+
+public:
+  IHexELFBuilder(const std::vector<IHexRecord> &Records)
+      : BasicELFBuilder(ELF::EM_386), Records(Records) {}
 
   std::unique_ptr<Object> build();
 };
@@ -724,17 +918,23 @@ private:
 
   const ELFFile<ELFT> &ElfFile;
   Object &Obj;
+  size_t EhdrOffset = 0;
+  Optional<StringRef> ExtractPartition;
 
   void setParentSegment(Segment &Child);
-  void readProgramHeaders();
+  void readProgramHeaders(const ELFFile<ELFT> &HeadersFile);
   void initGroupSection(GroupSection *GroupSec);
   void initSymbolTable(SymbolTableSection *SymTab);
   void readSectionHeaders();
+  void readSections();
+  void findEhdrOffset();
   SectionBase &makeSection(const Elf_Shdr &Shdr);
 
 public:
-  ELFBuilder(const ELFObjectFile<ELFT> &ElfObj, Object &Obj)
-      : ElfFile(*ElfObj.getELFFile()), Obj(Obj) {}
+  ELFBuilder(const ELFObjectFile<ELFT> &ElfObj, Object &Obj,
+             Optional<StringRef> ExtractPartition)
+      : ElfFile(*ElfObj.getELFFile()), Obj(Obj),
+        ExtractPartition(ExtractPartition) {}
 
   void build();
 };
@@ -749,12 +949,36 @@ public:
   std::unique_ptr<Object> create() const override;
 };
 
+class IHexReader : public Reader {
+  MemoryBuffer *MemBuf;
+
+  Expected<std::vector<IHexRecord>> parse() const;
+  Error parseError(size_t LineNo, Error E) const {
+    return LineNo == -1U
+               ? createFileError(MemBuf->getBufferIdentifier(), std::move(E))
+               : createFileError(MemBuf->getBufferIdentifier(), LineNo,
+                                 std::move(E));
+  }
+  template <typename... Ts>
+  Error parseError(size_t LineNo, char const *Fmt, const Ts &... Vals) const {
+    Error E = createStringError(errc::invalid_argument, Fmt, Vals...);
+    return parseError(LineNo, std::move(E));
+  }
+
+public:
+  IHexReader(MemoryBuffer *MB) : MemBuf(MB) {}
+
+  std::unique_ptr<Object> create() const override;
+};
+
 class ELFReader : public Reader {
   Binary *Bin;
+  Optional<StringRef> ExtractPartition;
 
 public:
   std::unique_ptr<Object> create() const override;
-  explicit ELFReader(Binary *B) : Bin(B) {}
+  explicit ELFReader(Binary *B, Optional<StringRef> ExtractPartition)
+      : Bin(B), ExtractPartition(ExtractPartition) {}
 };
 
 class Object {
@@ -764,6 +988,7 @@ private:
 
   std::vector<SecPtr> Sections;
   std::vector<SegPtr> Segments;
+  std::vector<SecPtr> RemovedSections;
 
 public:
   template <class T>
@@ -792,6 +1017,7 @@ public:
   uint32_t Version;
   uint32_t Flags;
 
+  bool HadShdrs = true;
   StringTableSection *SectionNames = nullptr;
   SymbolTableSection *SymbolTable = nullptr;
   SectionIndexSection *SectionIndexTable = nullptr;
@@ -801,11 +1027,19 @@ public:
   ConstRange<SectionBase> sections() const {
     return make_pointee_range(Sections);
   }
+  SectionBase *findSection(StringRef Name) {
+    auto SecIt =
+        find_if(Sections, [&](const SecPtr &Sec) { return Sec->Name == Name; });
+    return SecIt == Sections.end() ? nullptr : SecIt->get();
+  }
+  SectionTableRef removedSections() { return SectionTableRef(RemovedSections); }
+
   Range<Segment> segments() { return make_pointee_range(Segments); }
   ConstRange<Segment> segments() const { return make_pointee_range(Segments); }
 
-  void removeSections(std::function<bool(const SectionBase &)> ToRemove);
-  void removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
+  Error removeSections(bool AllowBrokenLinks,
+                       std::function<bool(const SectionBase &)> ToRemove);
+  Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
   template <class T, class... Ts> T &addSection(Ts &&... Args) {
     auto Sec = llvm::make_unique<T>(std::forward<Ts>(Args)...);
     auto Ptr = Sec.get();
diff --git a/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
new file mode 100644
index 000000000000..19343b65dd1e
--- /dev/null
+++ b/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
@@ -0,0 +1,68 @@
+//===- MachOObjcopy.cpp -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MachOObjcopy.h"
+#include "../CopyConfig.h"
+#include "MachOReader.h"
+#include "MachOWriter.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace objcopy {
+namespace macho {
+
+using namespace object;
+
+static Error handleArgs(const CopyConfig &Config, Object &Obj) {
+  if (Config.AllowBrokenLinks || !Config.BuildIdLinkDir.empty() ||
+      Config.BuildIdLinkInput || Config.BuildIdLinkOutput ||
+      !Config.SplitDWO.empty() || !Config.SymbolsPrefix.empty() ||
+      !Config.AllocSectionsPrefix.empty() || !Config.AddSection.empty() ||
+      !Config.DumpSection.empty() || !Config.KeepSection.empty() ||
+      !Config.OnlySection.empty() || !Config.SymbolsToGlobalize.empty() ||
+      !Config.SymbolsToKeep.empty() || !Config.SymbolsToLocalize.empty() ||
+      !Config.SymbolsToWeaken.empty() || !Config.SymbolsToKeepGlobal.empty() ||
+      !Config.SectionsToRename.empty() || !Config.SymbolsToRename.empty() ||
+      !Config.UnneededSymbolsToRemove.empty() ||
+      !Config.SetSectionFlags.empty() || !Config.ToRemove.empty() ||
+      Config.ExtractDWO || Config.KeepFileSymbols || Config.LocalizeHidden ||
+      Config.PreserveDates || Config.StripDWO || Config.StripNonAlloc ||
+      Config.StripSections || Config.Weaken || Config.DecompressDebugSections ||
+      Config.StripDebug || Config.StripNonAlloc || Config.StripSections ||
+      Config.StripUnneeded || Config.DiscardMode != DiscardType::None ||
+      !Config.SymbolsToAdd.empty() || Config.EntryExpr) {
+    return createStringError(llvm::errc::invalid_argument,
+                             "option not supported by llvm-objcopy for MachO");
+  }
+
+  return Error::success();
+}
+
+Error executeObjcopyOnBinary(const CopyConfig &Config,
+                             object::MachOObjectFile &In, Buffer &Out) {
+  MachOReader Reader(In);
+  std::unique_ptr<Object> O = Reader.create();
+  if (!O)
+    return createFileError(
+        Config.InputFilename,
+        createStringError(object_error::parse_failed,
+                          "unable to deserialize MachO object"));
+
+  if (Error E = handleArgs(Config, *O))
+    return createFileError(Config.InputFilename, std::move(E));
+
+  MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), Out);
+  if (auto E = Writer.finalize())
+    return E;
+  return Writer.write();
+}
+
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/tools/llvm-objcopy/MachO/MachOObjcopy.h b/tools/llvm-objcopy/MachO/MachOObjcopy.h
new file mode 100644
index 000000000000..f34e361db7ea
--- /dev/null
+++ b/tools/llvm-objcopy/MachO/MachOObjcopy.h
@@ -0,0 +1,31 @@
+//===- MachOObjcopy.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_OBJCOPY_MACHOOBJCOPY_H
+#define LLVM_TOOLS_OBJCOPY_MACHOOBJCOPY_H
+
+namespace llvm {
+class Error;
+
+namespace object {
+class MachOObjectFile;
+class MachOUniversalBinary;
+} // end namespace object
+
+namespace objcopy {
+struct CopyConfig;
+class Buffer;
+
+namespace macho {
+Error executeObjcopyOnBinary(const CopyConfig &Config,
+                             object::MachOObjectFile &In, Buffer &Out);
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_TOOLS_OBJCOPY_MACHOOBJCOPY_H
diff --git a/tools/llvm-objcopy/MachO/MachOReader.cpp b/tools/llvm-objcopy/MachO/MachOReader.cpp
new file mode 100644
index 000000000000..d31293034608
--- /dev/null
+++ b/tools/llvm-objcopy/MachO/MachOReader.cpp
@@ -0,0 +1,241 @@
+//===- MachOReader.cpp ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MachOReader.h"
+#include "../llvm-objcopy.h"
+#include "Object.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Object/MachO.h"
+#include <memory>
+
+namespace llvm {
+namespace objcopy {
+namespace macho {
+
+void MachOReader::readHeader(Object &O) const {
+  O.Header.Magic = MachOObj.getHeader().magic;
+  O.Header.CPUType = MachOObj.getHeader().cputype;
+  O.Header.CPUSubType = MachOObj.getHeader().cpusubtype;
+  O.Header.FileType = MachOObj.getHeader().filetype;
+  O.Header.NCmds = MachOObj.getHeader().ncmds;
+  O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds;
+  O.Header.Flags = MachOObj.getHeader().flags;
+}
+
+template <typename SectionType>
+Section constructSectionCommon(SectionType Sec) {
+  Section S;
+  S.Sectname =
+      StringRef(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname)))
+          .str();
+  S.Segname =
+      StringRef(Sec.segname, strnlen(Sec.segname, sizeof(Sec.sectname))).str();
+  S.Addr = Sec.addr;
+  S.Size = Sec.size;
+  S.Offset = Sec.offset;
+  S.Align = Sec.align;
+  S.RelOff = Sec.reloff;
+  S.NReloc = Sec.nreloc;
+  S.Flags = Sec.flags;
+  S.Reserved1 = Sec.reserved1;
+  S.Reserved2 = Sec.reserved2;
+  S.Reserved3 = 0;
+  return S;
+}
+
+template <typename SectionType> Section constructSection(SectionType Sec);
+
+template <> Section constructSection(MachO::section Sec) {
+  return constructSectionCommon(Sec);
+}
+
+template <> Section constructSection(MachO::section_64 Sec) {
+  Section S = constructSectionCommon(Sec);
+  S.Reserved3 = Sec.reserved3;
+  return S;
+}
+
+// TODO: get rid of reportError and make MachOReader return Expected<> instead.
+template <typename SectionType, typename SegmentType>
+std::vector<Section>
+extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
+                const object::MachOObjectFile &MachOObj,
+                size_t &NextSectionIndex) {
+  auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize;
+  const SectionType *Curr =
+      reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType));
+  std::vector<Section> Sections;
+  for (; reinterpret_cast<const void *>(Curr) < End; Curr++) {
+    if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) {
+      SectionType Sec;
+      memcpy((void *)&Sec, Curr, sizeof(SectionType));
+      MachO::swapStruct(Sec);
+      Sections.push_back(constructSection(Sec));
+    } else {
+      Sections.push_back(constructSection(*Curr));
+    }
+
+    Section &S = Sections.back();
+
+    Expected<object::SectionRef> SecRef =
+        MachOObj.getSection(NextSectionIndex++);
+    if (!SecRef)
+      reportError(MachOObj.getFileName(), SecRef.takeError());
+
+    if (Expected<ArrayRef<uint8_t>> E =
+            MachOObj.getSectionContents(SecRef->getRawDataRefImpl()))
+      S.Content =
+          StringRef(reinterpret_cast<const char *>(E->data()), E->size());
+    else
+      reportError(MachOObj.getFileName(), E.takeError());
+
+    S.Relocations.reserve(S.NReloc);
+    for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()),
+              RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl());
+         RI != RE; ++RI) {
+      RelocationInfo R;
+      R.Symbol = nullptr; // We'll fill this field later.
+      R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl());
+      R.Scattered = MachOObj.isRelocationScattered(R.Info);
+      S.Relocations.push_back(R);
+    }
+
+    assert(S.NReloc == S.Relocations.size() &&
+           "Incorrect number of relocations");
+  }
+  return Sections;
+}
+
+void MachOReader::readLoadCommands(Object &O) const {
+  // For MachO sections indices start from 1.
+  size_t NextSectionIndex = 1;
+  for (auto LoadCmd : MachOObj.load_commands()) {
+    LoadCommand LC;
+    switch (LoadCmd.C.cmd) {
+    case MachO::LC_SEGMENT:
+      LC.Sections = extractSections<MachO::section, MachO::segment_command>(
+          LoadCmd, MachOObj, NextSectionIndex);
+      break;
+    case MachO::LC_SEGMENT_64:
+      LC.Sections =
+          extractSections<MachO::section_64, MachO::segment_command_64>(
+              LoadCmd, MachOObj, NextSectionIndex);
+      break;
+    case MachO::LC_SYMTAB:
+      O.SymTabCommandIndex = O.LoadCommands.size();
+      break;
+    case MachO::LC_DYLD_INFO:
+    case MachO::LC_DYLD_INFO_ONLY:
+      O.DyLdInfoCommandIndex = O.LoadCommands.size();
+      break;
+    }
+#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
+  case MachO::LCName:                                                          \
+    memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr,        \
+           sizeof(MachO::LCStruct));                                           \
+    if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)                  \
+      MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data);                  \
+    LC.Payload = ArrayRef<uint8_t>(                                            \
+        reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +         \
+            sizeof(MachO::LCStruct),                                           \
+        LoadCmd.C.cmdsize - sizeof(MachO::LCStruct));                          \
+    break;
+
+    switch (LoadCmd.C.cmd) {
+    default:
+      memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr,
+             sizeof(MachO::load_command));
+      if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
+        MachO::swapStruct(LC.MachOLoadCommand.load_command_data);
+      LC.Payload = ArrayRef<uint8_t>(
+          reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +
+              sizeof(MachO::load_command),
+          LoadCmd.C.cmdsize - sizeof(MachO::load_command));
+      break;
+#include "llvm/BinaryFormat/MachO.def"
+    }
+    O.LoadCommands.push_back(std::move(LC));
+  }
+}
+
+template <typename nlist_t>
+SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) {
+  assert(nlist.n_strx < StrTable.size() &&
+         "n_strx exceeds the size of the string table");
+  SymbolEntry SE;
+  SE.Name = StringRef(StrTable.data() + nlist.n_strx).str();
+  SE.n_type = nlist.n_type;
+  SE.n_sect = nlist.n_sect;
+  SE.n_desc = nlist.n_desc;
+  SE.n_value = nlist.n_value;
+  return SE;
+}
+
+void MachOReader::readSymbolTable(Object &O) const {
+  StringRef StrTable = MachOObj.getStringTableData();
+  for (auto Symbol : MachOObj.symbols()) {
+    SymbolEntry SE =
+        (MachOObj.is64Bit()
+             ? constructSymbolEntry(
+                   StrTable,
+                   MachOObj.getSymbol64TableEntry(Symbol.getRawDataRefImpl()))
+             : constructSymbolEntry(
+                   StrTable,
+                   MachOObj.getSymbolTableEntry(Symbol.getRawDataRefImpl())));
+
+    O.SymTable.Symbols.push_back(llvm::make_unique<SymbolEntry>(SE));
+  }
+}
+
+void MachOReader::setSymbolInRelocationInfo(Object &O) const {
+  for (auto &LC : O.LoadCommands)
+    for (auto &Sec : LC.Sections)
+      for (auto &Reloc : Sec.Relocations)
+        if (!Reloc.Scattered) {
+          auto *Info = reinterpret_cast<MachO::relocation_info *>(&Reloc.Info);
+          Reloc.Symbol = O.SymTable.getSymbolByIndex(Info->r_symbolnum);
+        }
+}
+
+void MachOReader::readRebaseInfo(Object &O) const {
+  O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes();
+}
+
+void MachOReader::readBindInfo(Object &O) const {
+  O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes();
+}
+
+void MachOReader::readWeakBindInfo(Object &O) const {
+  O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes();
+}
+
+void MachOReader::readLazyBindInfo(Object &O) const {
+  O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes();
+}
+
+void MachOReader::readExportInfo(Object &O) const {
+  O.Exports.Trie = MachOObj.getDyldInfoExportsTrie();
+}
+
+std::unique_ptr<Object> MachOReader::create() const {
+  auto Obj = llvm::make_unique<Object>();
+  readHeader(*Obj);
+  readLoadCommands(*Obj);
+  readSymbolTable(*Obj);
+  setSymbolInRelocationInfo(*Obj);
+  readRebaseInfo(*Obj);
+  readBindInfo(*Obj);
+  readWeakBindInfo(*Obj);
+  readLazyBindInfo(*Obj);
+  readExportInfo(*Obj);
+  return Obj;
+}
+
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/tools/llvm-objcopy/MachO/MachOReader.h b/tools/llvm-objcopy/MachO/MachOReader.h
new file mode 100644
index 000000000000..795e5cc2363d
--- /dev/null
+++ b/tools/llvm-objcopy/MachO/MachOReader.h
@@ -0,0 +1,48 @@
+//===- MachOReader.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MachOObjcopy.h"
+#include "Object.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Object/MachO.h"
+#include <memory>
+
+namespace llvm {
+namespace objcopy {
+namespace macho {
+
+// The hierarchy of readers is responsible for parsing different inputs:
+// raw binaries and regular MachO object files.
+class Reader {
+public:
+  virtual ~Reader(){};
+  virtual std::unique_ptr<Object> create() const = 0;
+};
+
+class MachOReader : public Reader {
+  const object::MachOObjectFile &MachOObj;
+
+  void readHeader(Object &O) const;
+  void readLoadCommands(Object &O) const;
+  void readSymbolTable(Object &O) const;
+  void setSymbolInRelocationInfo(Object &O) const;
+  void readRebaseInfo(Object &O) const;
+  void readBindInfo(Object &O) const;
+  void readWeakBindInfo(Object &O) const;
+  void readLazyBindInfo(Object &O) const;
+  void readExportInfo(Object &O) const;
+
+public:
+  explicit MachOReader(const object::MachOObjectFile &Obj) : MachOObj(Obj) {}
+
+  std::unique_ptr<Object> create() const override;
+};
+
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/tools/llvm-objcopy/MachO/MachOWriter.cpp b/tools/llvm-objcopy/MachO/MachOWriter.cpp
new file mode 100644
index 000000000000..74200c5aa62a
--- /dev/null
+++ b/tools/llvm-objcopy/MachO/MachOWriter.cpp
@@ -0,0 +1,590 @@
+//===- MachOWriter.cpp ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MachOWriter.h"
+#include "Object.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <memory>
+
+namespace llvm {
+namespace objcopy {
+namespace macho {
+
+size_t MachOWriter::headerSize() const {
+  return Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
+}
+
+size_t MachOWriter::loadCommandsSize() const { return O.Header.SizeOfCmds; }
+
+size_t MachOWriter::symTableSize() const {
+  return O.SymTable.Symbols.size() *
+         (Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist));
+}
+
+size_t MachOWriter::totalSize() const {
+  // Going from tail to head and looking for an appropriate "anchor" to
+  // calculate the total size assuming that all the offsets are either valid
+  // ("true") or 0 (0 indicates that the corresponding part is missing).
+
+  SmallVector<size_t, 7> Ends;
+  if (O.SymTabCommandIndex) {
+    const MachO::symtab_command &SymTabCommand =
+        O.LoadCommands[*O.SymTabCommandIndex]
+            .MachOLoadCommand.symtab_command_data;
+    if (SymTabCommand.symoff) {
+      assert((SymTabCommand.nsyms == O.SymTable.Symbols.size()) &&
+             "Incorrect number of symbols");
+      Ends.push_back(SymTabCommand.symoff + symTableSize());
+    }
+    if (SymTabCommand.stroff) {
+      assert((SymTabCommand.strsize == StrTableBuilder.getSize()) &&
+             "Incorrect string table size");
+      Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize);
+    }
+  }
+  if (O.DyLdInfoCommandIndex) {
+    const MachO::dyld_info_command &DyLdInfoCommand =
+        O.LoadCommands[*O.DyLdInfoCommandIndex]
+            .MachOLoadCommand.dyld_info_command_data;
+    if (DyLdInfoCommand.rebase_off) {
+      assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) &&
+             "Incorrect rebase opcodes size");
+      Ends.push_back(DyLdInfoCommand.rebase_off + DyLdInfoCommand.rebase_size);
+    }
+    if (DyLdInfoCommand.bind_off) {
+      assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) &&
+             "Incorrect bind opcodes size");
+      Ends.push_back(DyLdInfoCommand.bind_off + DyLdInfoCommand.bind_size);
+    }
+    if (DyLdInfoCommand.weak_bind_off) {
+      assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) &&
+             "Incorrect weak bind opcodes size");
+      Ends.push_back(DyLdInfoCommand.weak_bind_off +
+                     DyLdInfoCommand.weak_bind_size);
+    }
+    if (DyLdInfoCommand.lazy_bind_off) {
+      assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) &&
+             "Incorrect lazy bind opcodes size");
+      Ends.push_back(DyLdInfoCommand.lazy_bind_off +
+                     DyLdInfoCommand.lazy_bind_size);
+    }
+    if (DyLdInfoCommand.export_off) {
+      assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) &&
+             "Incorrect trie size");
+      Ends.push_back(DyLdInfoCommand.export_off + DyLdInfoCommand.export_size);
+    }
+  }
+
+  // Otherwise, use the last section / reloction.
+  for (const auto &LC : O.LoadCommands)
+    for (const auto &S : LC.Sections) {
+      Ends.push_back(S.Offset + S.Size);
+      if (S.RelOff)
+        Ends.push_back(S.RelOff +
+                       S.NReloc * sizeof(MachO::any_relocation_info));
+    }
+
+  if (!Ends.empty())
+    return *std::max_element(Ends.begin(), Ends.end());
+
+  // Otherwise, we have only Mach header and load commands.
+  return headerSize() + loadCommandsSize();
+}
+
+void MachOWriter::writeHeader() {
+  MachO::mach_header_64 Header;
+
+  Header.magic = O.Header.Magic;
+  Header.cputype = O.Header.CPUType;
+  Header.cpusubtype = O.Header.CPUSubType;
+  Header.filetype = O.Header.FileType;
+  Header.ncmds = O.Header.NCmds;
+  Header.sizeofcmds = O.Header.SizeOfCmds;
+  Header.flags = O.Header.Flags;
+  Header.reserved = O.Header.Reserved;
+
+  if (IsLittleEndian != sys::IsLittleEndianHost)
+    MachO::swapStruct(Header);
+
+  auto HeaderSize =
+      Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
+  memcpy(B.getBufferStart(), &Header, HeaderSize);
+}
+
+void MachOWriter::updateSymbolIndexes() {
+  uint32_t Index = 0;
+  for (auto &Symbol : O.SymTable.Symbols) {
+    Symbol->Index = Index;
+    Index++;
+  }
+}
+
+void MachOWriter::writeLoadCommands() {
+  uint8_t *Begin = B.getBufferStart() + headerSize();
+  for (const auto &LC : O.LoadCommands) {
+    // Construct a load command.
+    MachO::macho_load_command MLC = LC.MachOLoadCommand;
+    switch (MLC.load_command_data.cmd) {
+    case MachO::LC_SEGMENT:
+      if (IsLittleEndian != sys::IsLittleEndianHost)
+        MachO::swapStruct(MLC.segment_command_data);
+      memcpy(Begin, &MLC.segment_command_data, sizeof(MachO::segment_command));
+      Begin += sizeof(MachO::segment_command);
+
+      for (const auto &Sec : LC.Sections)
+        writeSectionInLoadCommand<MachO::section>(Sec, Begin);
+      continue;
+    case MachO::LC_SEGMENT_64:
+      if (IsLittleEndian != sys::IsLittleEndianHost)
+        MachO::swapStruct(MLC.segment_command_64_data);
+      memcpy(Begin, &MLC.segment_command_64_data,
+             sizeof(MachO::segment_command_64));
+      Begin += sizeof(MachO::segment_command_64);
+
+      for (const auto &Sec : LC.Sections)
+        writeSectionInLoadCommand<MachO::section_64>(Sec, Begin);
+      continue;
+    }
+
+#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
+  case MachO::LCName:                                                          \
+    assert(sizeof(MachO::LCStruct) + LC.Payload.size() ==                      \
+           MLC.load_command_data.cmdsize);                                     \
+    if (IsLittleEndian != sys::IsLittleEndianHost)                             \
+      MachO::swapStruct(MLC.LCStruct##_data);                                  \
+    memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct));              \
+    Begin += sizeof(MachO::LCStruct);                                          \
+    memcpy(Begin, LC.Payload.data(), LC.Payload.size());                       \
+    Begin += LC.Payload.size();                                                \
+    break;
+
+    // Copy the load command as it is.
+    switch (MLC.load_command_data.cmd) {
+    default:
+      assert(sizeof(MachO::load_command) + LC.Payload.size() ==
+             MLC.load_command_data.cmdsize);
+      if (IsLittleEndian != sys::IsLittleEndianHost)
+        MachO::swapStruct(MLC.load_command_data);
+      memcpy(Begin, &MLC.load_command_data, sizeof(MachO::load_command));
+      Begin += sizeof(MachO::load_command);
+      memcpy(Begin, LC.Payload.data(), LC.Payload.size());
+      Begin += LC.Payload.size();
+      break;
+#include "llvm/BinaryFormat/MachO.def"
+    }
+  }
+}
+
+template <typename StructType>
+void MachOWriter::writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out) {
+  StructType Temp;
+  assert(Sec.Segname.size() <= sizeof(Temp.segname) && "too long segment name");
+  assert(Sec.Sectname.size() <= sizeof(Temp.sectname) &&
+         "too long section name");
+  memset(&Temp, 0, sizeof(StructType));
+  memcpy(Temp.segname, Sec.Segname.data(), Sec.Segname.size());
+  memcpy(Temp.sectname, Sec.Sectname.data(), Sec.Sectname.size());
+  Temp.addr = Sec.Addr;
+  Temp.size = Sec.Size;
+  Temp.offset = Sec.Offset;
+  Temp.align = Sec.Align;
+  Temp.reloff = Sec.RelOff;
+  Temp.nreloc = Sec.NReloc;
+  Temp.flags = Sec.Flags;
+  Temp.reserved1 = Sec.Reserved1;
+  Temp.reserved2 = Sec.Reserved2;
+
+  if (IsLittleEndian != sys::IsLittleEndianHost)
+    MachO::swapStruct(Temp);
+  memcpy(Out, &Temp, sizeof(StructType));
+  Out += sizeof(StructType);
+}
+
+void MachOWriter::writeSections() {
+  for (const auto &LC : O.LoadCommands)
+    for (const auto &Sec : LC.Sections) {
+      if (Sec.isVirtualSection())
+        continue;
+
+      assert(Sec.Offset && "Section offset can not be zero");
+      assert((Sec.Size == Sec.Content.size()) && "Incorrect section size");
+      memcpy(B.getBufferStart() + Sec.Offset, Sec.Content.data(),
+             Sec.Content.size());
+      for (size_t Index = 0; Index < Sec.Relocations.size(); ++Index) {
+        auto RelocInfo = Sec.Relocations[Index];
+        if (!RelocInfo.Scattered) {
+          auto *Info =
+              reinterpret_cast<MachO::relocation_info *>(&RelocInfo.Info);
+          Info->r_symbolnum = RelocInfo.Symbol->Index;
+        }
+
+        if (IsLittleEndian != sys::IsLittleEndianHost)
+          MachO::swapStruct(
+              reinterpret_cast<MachO::any_relocation_info &>(RelocInfo.Info));
+        memcpy(B.getBufferStart() + Sec.RelOff +
+                   Index * sizeof(MachO::any_relocation_info),
+               &RelocInfo.Info, sizeof(RelocInfo.Info));
+      }
+    }
+}
+
+template <typename NListType>
+void writeNListEntry(const SymbolEntry &SE, bool IsLittleEndian, char *&Out,
+                     uint32_t Nstrx) {
+  NListType ListEntry;
+  ListEntry.n_strx = Nstrx;
+  ListEntry.n_type = SE.n_type;
+  ListEntry.n_sect = SE.n_sect;
+  ListEntry.n_desc = SE.n_desc;
+  ListEntry.n_value = SE.n_value;
+
+  if (IsLittleEndian != sys::IsLittleEndianHost)
+    MachO::swapStruct(ListEntry);
+  memcpy(Out, reinterpret_cast<const char *>(&ListEntry), sizeof(NListType));
+  Out += sizeof(NListType);
+}
+
+void MachOWriter::writeSymbolTable() {
+  if (!O.SymTabCommandIndex)
+    return;
+  const MachO::symtab_command &SymTabCommand =
+      O.LoadCommands[*O.SymTabCommandIndex]
+          .MachOLoadCommand.symtab_command_data;
+
+  uint8_t *StrTable = (uint8_t *)B.getBufferStart() + SymTabCommand.stroff;
+  StrTableBuilder.write(StrTable);
+}
+
+void MachOWriter::writeStringTable() {
+  if (!O.SymTabCommandIndex)
+    return;
+  const MachO::symtab_command &SymTabCommand =
+      O.LoadCommands[*O.SymTabCommandIndex]
+          .MachOLoadCommand.symtab_command_data;
+
+  char *SymTable = (char *)B.getBufferStart() + SymTabCommand.symoff;
+  for (auto Iter = O.SymTable.Symbols.begin(), End = O.SymTable.Symbols.end();
+       Iter != End; Iter++) {
+    SymbolEntry *Sym = Iter->get();
+    auto Nstrx = StrTableBuilder.getOffset(Sym->Name);
+
+    if (Is64Bit)
+      writeNListEntry<MachO::nlist_64>(*Sym, IsLittleEndian, SymTable, Nstrx);
+    else
+      writeNListEntry<MachO::nlist>(*Sym, IsLittleEndian, SymTable, Nstrx);
+  }
+}
+
+void MachOWriter::writeRebaseInfo() {
+  if (!O.DyLdInfoCommandIndex)
+    return;
+  const MachO::dyld_info_command &DyLdInfoCommand =
+      O.LoadCommands[*O.DyLdInfoCommandIndex]
+          .MachOLoadCommand.dyld_info_command_data;
+  char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.rebase_off;
+  assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) &&
+         "Incorrect rebase opcodes size");
+  memcpy(Out, O.Rebases.Opcodes.data(), O.Rebases.Opcodes.size());
+}
+
+void MachOWriter::writeBindInfo() {
+  if (!O.DyLdInfoCommandIndex)
+    return;
+  const MachO::dyld_info_command &DyLdInfoCommand =
+      O.LoadCommands[*O.DyLdInfoCommandIndex]
+          .MachOLoadCommand.dyld_info_command_data;
+  char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.bind_off;
+  assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) &&
+         "Incorrect bind opcodes size");
+  memcpy(Out, O.Binds.Opcodes.data(), O.Binds.Opcodes.size());
+}
+
+void MachOWriter::writeWeakBindInfo() {
+  if (!O.DyLdInfoCommandIndex)
+    return;
+  const MachO::dyld_info_command &DyLdInfoCommand =
+      O.LoadCommands[*O.DyLdInfoCommandIndex]
+          .MachOLoadCommand.dyld_info_command_data;
+  char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.weak_bind_off;
+  assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) &&
+         "Incorrect weak bind opcodes size");
+  memcpy(Out, O.WeakBinds.Opcodes.data(), O.WeakBinds.Opcodes.size());
+}
+
+void MachOWriter::writeLazyBindInfo() {
+  if (!O.DyLdInfoCommandIndex)
+    return;
+  const MachO::dyld_info_command &DyLdInfoCommand =
+      O.LoadCommands[*O.DyLdInfoCommandIndex]
+          .MachOLoadCommand.dyld_info_command_data;
+  char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.lazy_bind_off;
+  assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) &&
+         "Incorrect lazy bind opcodes size");
+  memcpy(Out, O.LazyBinds.Opcodes.data(), O.LazyBinds.Opcodes.size());
+}
+
+void MachOWriter::writeExportInfo() {
+  if (!O.DyLdInfoCommandIndex)
+    return;
+  const MachO::dyld_info_command &DyLdInfoCommand =
+      O.LoadCommands[*O.DyLdInfoCommandIndex]
+          .MachOLoadCommand.dyld_info_command_data;
+  char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.export_off;
+  assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) &&
+         "Incorrect export trie size");
+  memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size());
+}
+
+void MachOWriter::writeTail() {
+  typedef void (MachOWriter::*WriteHandlerType)(void);
+  typedef std::pair<uint64_t, WriteHandlerType> WriteOperation;
+  SmallVector<WriteOperation, 7> Queue;
+
+  if (O.SymTabCommandIndex) {
+    const MachO::symtab_command &SymTabCommand =
+        O.LoadCommands[*O.SymTabCommandIndex]
+            .MachOLoadCommand.symtab_command_data;
+    if (SymTabCommand.symoff)
+      Queue.push_back({SymTabCommand.symoff, &MachOWriter::writeSymbolTable});
+    if (SymTabCommand.stroff)
+      Queue.push_back({SymTabCommand.stroff, &MachOWriter::writeStringTable});
+  }
+
+  if (O.DyLdInfoCommandIndex) {
+    const MachO::dyld_info_command &DyLdInfoCommand =
+        O.LoadCommands[*O.DyLdInfoCommandIndex]
+            .MachOLoadCommand.dyld_info_command_data;
+    if (DyLdInfoCommand.rebase_off)
+      Queue.push_back(
+          {DyLdInfoCommand.rebase_off, &MachOWriter::writeRebaseInfo});
+    if (DyLdInfoCommand.bind_off)
+      Queue.push_back({DyLdInfoCommand.bind_off, &MachOWriter::writeBindInfo});
+    if (DyLdInfoCommand.weak_bind_off)
+      Queue.push_back(
+          {DyLdInfoCommand.weak_bind_off, &MachOWriter::writeWeakBindInfo});
+    if (DyLdInfoCommand.lazy_bind_off)
+      Queue.push_back(
+          {DyLdInfoCommand.lazy_bind_off, &MachOWriter::writeLazyBindInfo});
+    if (DyLdInfoCommand.export_off)
+      Queue.push_back(
+          {DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo});
+  }
+
+  llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) {
+    return LHS.first < RHS.first;
+  });
+
+  for (auto WriteOp : Queue)
+    (this->*WriteOp.second)();
+}
+
+void MachOWriter::updateSizeOfCmds() {
+  auto Size = 0;
+  for (const auto &LC : O.LoadCommands) {
+    auto &MLC = LC.MachOLoadCommand;
+    auto cmd = MLC.load_command_data.cmd;
+
+    switch (cmd) {
+    case MachO::LC_SEGMENT:
+      Size += sizeof(MachO::segment_command) +
+              sizeof(MachO::section) * LC.Sections.size();
+      continue;
+    case MachO::LC_SEGMENT_64:
+      Size += sizeof(MachO::segment_command_64) +
+              sizeof(MachO::section_64) * LC.Sections.size();
+      continue;
+    }
+
+    switch (cmd) {
+#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
+  case MachO::LCName:                                                          \
+    Size += sizeof(MachO::LCStruct);                                           \
+    break;
+#include "llvm/BinaryFormat/MachO.def"
+#undef HANDLE_LOAD_COMMAND
+    }
+  }
+
+  O.Header.SizeOfCmds = Size;
+}
+
+// Updates the index and the number of local/external/undefined symbols. Here we
+// assume that MLC is a LC_DYSYMTAB and the nlist entries in the symbol table
+// are already sorted by the those types.
+void MachOWriter::updateDySymTab(MachO::macho_load_command &MLC) {
+  uint32_t NumLocalSymbols = 0;
+  auto Iter = O.SymTable.Symbols.begin();
+  auto End = O.SymTable.Symbols.end();
+  for (; Iter != End; Iter++) {
+    if ((*Iter)->n_type & (MachO::N_EXT | MachO::N_PEXT))
+      break;
+
+    NumLocalSymbols++;
+  }
+
+  uint32_t NumExtDefSymbols = 0;
+  for (; Iter != End; Iter++) {
+    if (((*Iter)->n_type & MachO::N_TYPE) == MachO::N_UNDF)
+      break;
+
+    NumExtDefSymbols++;
+  }
+
+  MLC.dysymtab_command_data.ilocalsym = 0;
+  MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
+  MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
+  MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
+  MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
+  MLC.dysymtab_command_data.nundefsym =
+      O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
+}
+
+// Recomputes and updates offset and size fields in load commands and sections
+// since they could be modified.
+Error MachOWriter::layout() {
+  auto SizeOfCmds = loadCommandsSize();
+  auto Offset = headerSize() + SizeOfCmds;
+  O.Header.NCmds = O.LoadCommands.size();
+  O.Header.SizeOfCmds = SizeOfCmds;
+
+  // Lay out sections.
+  for (auto &LC : O.LoadCommands) {
+    uint64_t FileOff = Offset;
+    uint64_t VMSize = 0;
+    uint64_t FileOffsetInSegment = 0;
+    for (auto &Sec : LC.Sections) {
+      if (!Sec.isVirtualSection()) {
+        auto FilePaddingSize =
+            OffsetToAlignment(FileOffsetInSegment, 1ull << Sec.Align);
+        Sec.Offset = Offset + FileOffsetInSegment + FilePaddingSize;
+        Sec.Size = Sec.Content.size();
+        FileOffsetInSegment += FilePaddingSize + Sec.Size;
+      }
+
+      VMSize = std::max(VMSize, Sec.Addr + Sec.Size);
+    }
+
+    // TODO: Handle the __PAGEZERO segment.
+    auto &MLC = LC.MachOLoadCommand;
+    switch (MLC.load_command_data.cmd) {
+    case MachO::LC_SEGMENT:
+      MLC.segment_command_data.cmdsize =
+          sizeof(MachO::segment_command) +
+          sizeof(MachO::section) * LC.Sections.size();
+      MLC.segment_command_data.nsects = LC.Sections.size();
+      MLC.segment_command_data.fileoff = FileOff;
+      MLC.segment_command_data.vmsize = VMSize;
+      MLC.segment_command_data.filesize = FileOffsetInSegment;
+      break;
+    case MachO::LC_SEGMENT_64:
+      MLC.segment_command_64_data.cmdsize =
+          sizeof(MachO::segment_command_64) +
+          sizeof(MachO::section_64) * LC.Sections.size();
+      MLC.segment_command_64_data.nsects = LC.Sections.size();
+      MLC.segment_command_64_data.fileoff = FileOff;
+      MLC.segment_command_64_data.vmsize = VMSize;
+      MLC.segment_command_64_data.filesize = FileOffsetInSegment;
+      break;
+    }
+
+    Offset += FileOffsetInSegment;
+  }
+
+  // Lay out relocations.
+  for (auto &LC : O.LoadCommands)
+    for (auto &Sec : LC.Sections) {
+      Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset;
+      Sec.NReloc = Sec.Relocations.size();
+      Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc;
+    }
+
+  // Lay out tail stuff.
+  auto NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
+  for (auto &LC : O.LoadCommands) {
+    auto &MLC = LC.MachOLoadCommand;
+    auto cmd = MLC.load_command_data.cmd;
+    switch (cmd) {
+    case MachO::LC_SYMTAB:
+      MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
+      MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
+      MLC.symtab_command_data.symoff = Offset;
+      Offset += NListSize * MLC.symtab_command_data.nsyms;
+      MLC.symtab_command_data.stroff = Offset;
+      Offset += MLC.symtab_command_data.strsize;
+      break;
+    case MachO::LC_DYSYMTAB: {
+      if (MLC.dysymtab_command_data.ntoc != 0 ||
+          MLC.dysymtab_command_data.nmodtab != 0 ||
+          MLC.dysymtab_command_data.nextrefsyms != 0 ||
+          MLC.dysymtab_command_data.nlocrel != 0 ||
+          MLC.dysymtab_command_data.nextrel != 0)
+        return createStringError(llvm::errc::not_supported,
+                                 "shared library is not yet supported");
+
+      if (MLC.dysymtab_command_data.nindirectsyms != 0)
+        return createStringError(llvm::errc::not_supported,
+                                 "indirect symbol table is not yet supported");
+
+      updateDySymTab(MLC);
+      break;
+    }
+    case MachO::LC_SEGMENT:
+    case MachO::LC_SEGMENT_64:
+    case MachO::LC_VERSION_MIN_MACOSX:
+    case MachO::LC_BUILD_VERSION:
+    case MachO::LC_ID_DYLIB:
+    case MachO::LC_LOAD_DYLIB:
+    case MachO::LC_UUID:
+    case MachO::LC_SOURCE_VERSION:
+      // Nothing to update.
+      break;
+    default:
+      // Abort if it's unsupported in order to prevent corrupting the object.
+      return createStringError(llvm::errc::not_supported,
+                               "unsupported load command (cmd=0x%x)", cmd);
+    }
+  }
+
+  return Error::success();
+}
+
+void MachOWriter::constructStringTable() {
+  for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
+    StrTableBuilder.add(Sym->Name);
+  StrTableBuilder.finalize();
+}
+
+Error MachOWriter::finalize() {
+  updateSizeOfCmds();
+  constructStringTable();
+
+  if (auto E = layout())
+    return E;
+
+  return Error::success();
+}
+
+Error MachOWriter::write() {
+  if (Error E = B.allocate(totalSize()))
+    return E;
+  memset(B.getBufferStart(), 0, totalSize());
+  writeHeader();
+  updateSymbolIndexes();
+  writeLoadCommands();
+  writeSections();
+  writeTail();
+  return B.commit();
+}
+
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/tools/llvm-objcopy/MachO/MachOWriter.h b/tools/llvm-objcopy/MachO/MachOWriter.h
new file mode 100644
index 000000000000..ecf12d62de2c
--- /dev/null
+++ b/tools/llvm-objcopy/MachO/MachOWriter.h
@@ -0,0 +1,64 @@
+//===- MachOWriter.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../Buffer.h"
+#include "MachOObjcopy.h"
+#include "Object.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Object/MachO.h"
+
+namespace llvm {
+class Error;
+
+namespace objcopy {
+namespace macho {
+
+class MachOWriter {
+  Object &O;
+  bool Is64Bit;
+  bool IsLittleEndian;
+  Buffer &B;
+  StringTableBuilder StrTableBuilder{StringTableBuilder::MachO};
+
+  size_t headerSize() const;
+  size_t loadCommandsSize() const;
+  size_t symTableSize() const;
+  size_t strTableSize() const;
+
+  void updateDySymTab(MachO::macho_load_command &MLC);
+  void updateSizeOfCmds();
+  void updateSymbolIndexes();
+  void constructStringTable();
+  Error layout();
+
+  void writeHeader();
+  void writeLoadCommands();
+  template <typename StructType>
+  void writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out);
+  void writeSections();
+  void writeSymbolTable();
+  void writeStringTable();
+  void writeRebaseInfo();
+  void writeBindInfo();
+  void writeWeakBindInfo();
+  void writeLazyBindInfo();
+  void writeExportInfo();
+  void writeTail();
+
+public:
+  MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, Buffer &B)
+      : O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), B(B) {}
+
+  size_t totalSize() const;
+  Error finalize();
+  Error write();
+};
+
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/tools/llvm-objcopy/MachO/Object.cpp b/tools/llvm-objcopy/MachO/Object.cpp
new file mode 100644
index 000000000000..264f39c28ed2
--- /dev/null
+++ b/tools/llvm-objcopy/MachO/Object.cpp
@@ -0,0 +1,15 @@
+#include "Object.h"
+#include "../llvm-objcopy.h"
+
+namespace llvm {
+namespace objcopy {
+namespace macho {
+
+const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const {
+  assert(Index < Symbols.size() && "invalid symbol index");
+  return Symbols[Index].get();
+}
+
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/tools/llvm-objcopy/MachO/Object.h b/tools/llvm-objcopy/MachO/Object.h
new file mode 100644
index 000000000000..ed85fcbc47f7
--- /dev/null
+++ b/tools/llvm-objcopy/MachO/Object.h
@@ -0,0 +1,232 @@
+//===- Object.h - Mach-O object file model ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJCOPY_MACHO_OBJECT_H
+#define LLVM_OBJCOPY_MACHO_OBJECT_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/ObjectYAML/DWARFYAML.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <cstdint>
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace objcopy {
+namespace macho {
+
+struct MachHeader {
+  uint32_t Magic;
+  uint32_t CPUType;
+  uint32_t CPUSubType;
+  uint32_t FileType;
+  uint32_t NCmds;
+  uint32_t SizeOfCmds;
+  uint32_t Flags;
+  uint32_t Reserved = 0;
+};
+
+struct RelocationInfo;
+struct Section {
+  std::string Sectname;
+  std::string Segname;
+  uint64_t Addr;
+  uint64_t Size;
+  uint32_t Offset;
+  uint32_t Align;
+  uint32_t RelOff;
+  uint32_t NReloc;
+  uint32_t Flags;
+  uint32_t Reserved1;
+  uint32_t Reserved2;
+  uint32_t Reserved3;
+
+  StringRef Content;
+  std::vector<RelocationInfo> Relocations;
+
+  MachO::SectionType getType() const {
+    return static_cast<MachO::SectionType>(Flags & MachO::SECTION_TYPE);
+  }
+
+  bool isVirtualSection() const {
+    return (getType() == MachO::S_ZEROFILL ||
+            getType() == MachO::S_GB_ZEROFILL ||
+            getType() == MachO::S_THREAD_LOCAL_ZEROFILL);
+  }
+};
+
+struct LoadCommand {
+  // The type MachO::macho_load_command is defined in llvm/BinaryFormat/MachO.h
+  // and it is a union of all the structs corresponding to various load
+  // commands.
+  MachO::macho_load_command MachOLoadCommand;
+
+  // The raw content of the payload of the load command (located right after the
+  // corresponding struct). In some cases it is either empty or can be
+  // copied-over without digging into its structure.
+  ArrayRef<uint8_t> Payload;
+
+  // Some load commands can contain (inside the payload) an array of sections,
+  // though the contents of the sections are stored separately. The struct
+  // Section describes only sections' metadata and where to find the
+  // corresponding content inside the binary.
+  std::vector<Section> Sections;
+};
+
+// A symbol information. Fields which starts with "n_" are same as them in the
+// nlist.
+struct SymbolEntry {
+  std::string Name;
+  uint32_t Index;
+  uint8_t n_type;
+  uint8_t n_sect;
+  uint16_t n_desc;
+  uint64_t n_value;
+};
+
+/// The location of the symbol table inside the binary is described by LC_SYMTAB
+/// load command.
+struct SymbolTable {
+  std::vector<std::unique_ptr<SymbolEntry>> Symbols;
+
+  const SymbolEntry *getSymbolByIndex(uint32_t Index) const;
+};
+
+/// The location of the string table inside the binary is described by LC_SYMTAB
+/// load command.
+struct StringTable {
+  std::vector<std::string> Strings;
+};
+
+struct RelocationInfo {
+  const SymbolEntry *Symbol;
+  // True if Info is a scattered_relocation_info.
+  bool Scattered;
+  MachO::any_relocation_info Info;
+};
+
+/// The location of the rebase info inside the binary is described by
+/// LC_DYLD_INFO load command. Dyld rebases an image whenever dyld loads it at
+/// an address different from its preferred address.  The rebase information is
+/// a stream of byte sized opcodes whose symbolic names start with
+/// REBASE_OPCODE_. Conceptually the rebase information is a table of tuples:
+///   <seg-index, seg-offset, type>
+/// The opcodes are a compressed way to encode the table by only
+/// encoding when a column changes.  In addition simple patterns
+/// like "every n'th offset for m times" can be encoded in a few
+/// bytes.
+struct RebaseInfo {
+  // At the moment we do not parse this info (and it is simply copied over),
+  // but the proper support will be added later.
+  ArrayRef<uint8_t> Opcodes;
+};
+
+/// The location of the bind info inside the binary is described by
+/// LC_DYLD_INFO load command. Dyld binds an image during the loading process,
+/// if the image requires any pointers to be initialized to symbols in other
+/// images. The bind information is a stream of byte sized opcodes whose
+/// symbolic names start with BIND_OPCODE_. Conceptually the bind information is
+/// a table of tuples: <seg-index, seg-offset, type, symbol-library-ordinal,
+/// symbol-name, addend> The opcodes are a compressed way to encode the table by
+/// only encoding when a column changes.  In addition simple patterns like for
+/// runs of pointers initialized to the same value can be encoded in a few
+/// bytes.
+struct BindInfo {
+  // At the moment we do not parse this info (and it is simply copied over),
+  // but the proper support will be added later.
+  ArrayRef<uint8_t> Opcodes;
+};
+
+/// The location of the weak bind info inside the binary is described by
+/// LC_DYLD_INFO load command. Some C++ programs require dyld to unique symbols
+/// so that all images in the process use the same copy of some code/data. This
+/// step is done after binding. The content of the weak_bind info is an opcode
+/// stream like the bind_info.  But it is sorted alphabetically by symbol name.
+/// This enable dyld to walk all images with weak binding information in order
+/// and look for collisions.  If there are no collisions, dyld does no updating.
+/// That means that some fixups are also encoded in the bind_info.  For
+/// instance, all calls to "operator new" are first bound to libstdc++.dylib
+/// using the information in bind_info.  Then if some image overrides operator
+/// new that is detected when the weak_bind information is processed and the
+/// call to operator new is then rebound.
+struct WeakBindInfo {
+  // At the moment we do not parse this info (and it is simply copied over),
+  // but the proper support will be added later.
+  ArrayRef<uint8_t> Opcodes;
+};
+
+/// The location of the lazy bind info inside the binary is described by
+/// LC_DYLD_INFO load command. Some uses of external symbols do not need to be
+/// bound immediately. Instead they can be lazily bound on first use.  The
+/// lazy_bind contains a stream of BIND opcodes to bind all lazy symbols. Normal
+/// use is that dyld ignores the lazy_bind section when loading an image.
+/// Instead the static linker arranged for the lazy pointer to initially point
+/// to a helper function which pushes the offset into the lazy_bind area for the
+/// symbol needing to be bound, then jumps to dyld which simply adds the offset
+/// to lazy_bind_off to get the information on what to bind.
+struct LazyBindInfo {
+  ArrayRef<uint8_t> Opcodes;
+};
+
+/// The location of the export info inside the binary is described by
+/// LC_DYLD_INFO load command. The symbols exported by a dylib are encoded in a
+/// trie.  This is a compact representation that factors out common prefixes. It
+/// also reduces LINKEDIT pages in RAM because it encodes all information (name,
+/// address, flags) in one small, contiguous range. The export area is a stream
+/// of nodes.  The first node sequentially is the start node for the trie. Nodes
+/// for a symbol start with a uleb128 that is the length of the exported symbol
+/// information for the string so far. If there is no exported symbol, the node
+/// starts with a zero byte. If there is exported info, it follows the length.
+/// First is a uleb128 containing flags. Normally, it is followed by
+/// a uleb128 encoded offset which is location of the content named
+/// by the symbol from the mach_header for the image.  If the flags
+/// is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags is
+/// a uleb128 encoded library ordinal, then a zero terminated
+/// UTF8 string.  If the string is zero length, then the symbol
+/// is re-export from the specified dylib with the same name.
+/// If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following
+/// the flags is two uleb128s: the stub offset and the resolver offset.
+/// The stub is used by non-lazy pointers.  The resolver is used
+/// by lazy pointers and must be called to get the actual address to use.
+/// After the optional exported symbol information is a byte of
+/// how many edges (0-255) that this node has leaving it,
+/// followed by each edge.
+/// Each edge is a zero terminated UTF8 of the addition chars
+/// in the symbol, followed by a uleb128 offset for the node that
+/// edge points to.
+struct ExportInfo {
+  ArrayRef<uint8_t> Trie;
+};
+
+struct Object {
+  MachHeader Header;
+  std::vector<LoadCommand> LoadCommands;
+
+  SymbolTable SymTable;
+  StringTable StrTable;
+
+  RebaseInfo Rebases;
+  BindInfo Binds;
+  WeakBindInfo WeakBinds;
+  LazyBindInfo LazyBinds;
+  ExportInfo Exports;
+
+  /// The index of LC_SYMTAB load command if present.
+  Optional<size_t> SymTabCommandIndex;
+  /// The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present.
+  Optional<size_t> DyLdInfoCommandIndex;
+};
+
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_OBJCOPY_MACHO_OBJECT_H
diff --git a/tools/llvm-objcopy/ObjcopyOpts.td b/tools/llvm-objcopy/ObjcopyOpts.td
index 1f7e64e4091c..5fce4fbde539 100644
--- a/tools/llvm-objcopy/ObjcopyOpts.td
+++ b/tools/llvm-objcopy/ObjcopyOpts.td
@@ -1,13 +1,20 @@
 include "llvm/Option/OptParser.td"
 
 multiclass Eq<string name, string help> {
-  def NAME : Separate<["--", "-"], name>;
-  def NAME #_eq : Joined<["--", "-"], name #"=">,
+  def NAME : Separate<["--"], name>;
+  def NAME #_eq : Joined<["--"], name #"=">,
                   Alias<!cast<Separate>(NAME)>,
                   HelpText<help>;
 }
 
-def help : Flag<["-", "--"], "help">;
+def help : Flag<["--"], "help">;
+def h : Flag<["-"], "h">, Alias<help>;
+
+def allow_broken_links
+    : Flag<["--"], "allow-broken-links">,
+      HelpText<"Allow llvm-objcopy to remove sections even if it would leave "
+               "invalid section references. The appropriate sh_link fields "
+               "will be set to zero.">;
 
 defm binary_architecture
     : Eq<"binary-architecture", "Used when transforming an architecture-less "
@@ -26,13 +33,13 @@ defm output_target : Eq<"output-target", "Format of the output file">,
                      Values<"binary">;
 def O : JoinedOrSeparate<["-"], "O">, Alias<output_target>;
 
-def compress_debug_sections : Flag<["--", "-"], "compress-debug-sections">;
+def compress_debug_sections : Flag<["--"], "compress-debug-sections">;
 def compress_debug_sections_eq
-    : Joined<["--", "-"], "compress-debug-sections=">,
+    : Joined<["--"], "compress-debug-sections=">,
       MetaVarName<"[ zlib | zlib-gnu ]">,
       HelpText<"Compress DWARF debug sections using specified style. Supported "
                "styles: 'zlib-gnu' and 'zlib'">;
-def decompress_debug_sections : Flag<["-", "--"], "decompress-debug-sections">,
+def decompress_debug_sections : Flag<["--"], "decompress-debug-sections">,
                                 HelpText<"Decompress DWARF debug sections.">;
 defm split_dwo
     : Eq<"split-dwo", "Equivalent to extract-dwo on the input file to "
@@ -40,7 +47,7 @@ defm split_dwo
       MetaVarName<"dwo-file">;
 
 def enable_deterministic_archives
-    : Flag<["-", "--"], "enable-deterministic-archives">,
+    : Flag<["--"], "enable-deterministic-archives">,
       HelpText<"Enable deterministic mode when copying archives (use zero for "
                "UIDs, GIDs, and timestamps).">;
 def D : Flag<["-"], "D">,
@@ -48,14 +55,14 @@ def D : Flag<["-"], "D">,
         HelpText<"Alias for --enable-deterministic-archives">;
 
 def disable_deterministic_archives
-    : Flag<["-", "--"], "disable-deterministic-archives">,
+    : Flag<["--"], "disable-deterministic-archives">,
       HelpText<"Disable deterministic mode when copying archives (use real "
                "values for UIDs, GIDs, and timestamps).">;
 def U : Flag<["-"], "U">,
         Alias<disable_deterministic_archives>,
         HelpText<"Alias for --disable-deterministic-archives">;
 
-def preserve_dates : Flag<["-", "--"], "preserve-dates">,
+def preserve_dates : Flag<["--"], "preserve-dates">,
                      HelpText<"Preserve access and modification timestamps">;
 def p : Flag<["-"], "p">, Alias<preserve_dates>;
 
@@ -76,6 +83,16 @@ defm rename_section
 defm redefine_symbol
     : Eq<"redefine-sym", "Change the name of a symbol old to new">,
       MetaVarName<"old=new">;
+defm redefine_symbols
+    : Eq<"redefine-syms",
+         "Reads a list of symbol pairs from <filename> and runs as if "
+         "--redefine-sym=<old>=<new> is set for each one. <filename> "
+         "contains two symbols per line separated with whitespace and may "
+         "contain comments beginning with '#'. Leading and trailing "
+         "whitespace is stripped from each line. May be repeated to read "
+         "symbols from many files.">,         
+      MetaVarName<"filename">;
+
 defm keep_section : Eq<"keep-section", "Keep <section>">,
                     MetaVarName<"section">;
 defm only_section : Eq<"only-section", "Remove all but <section>">,
@@ -86,39 +103,76 @@ defm add_section
          "Make a section named <section> with the contents of <file>.">,
       MetaVarName<"section=file">;
 
-def strip_all
-    : Flag<["-", "--"], "strip-all">,
-      HelpText<
-          "Remove non-allocated sections other than .gnu.warning* sections">;
+defm set_section_flags
+    : Eq<"set-section-flags",
+         "Set section flags for a given section. Flags supported for GNU "
+         "compatibility: alloc, load, noload, readonly, debug, code, data, "
+         "rom, share, contents, merge, strings.">,
+      MetaVarName<"section=flag1[,flag2,...]">;
+
+def strip_all : Flag<["--"], "strip-all">,
+                HelpText<"Remove non-allocated sections outside segments. "
+                         ".gnu.warning* sections are not removed">;
 def S : Flag<["-"], "S">, Alias<strip_all>;
-def strip_all_gnu : Flag<["-", "--"], "strip-all-gnu">,
+def strip_all_gnu : Flag<["--"], "strip-all-gnu">,
                     HelpText<"Compatible with GNU objcopy's --strip-all">;
-def strip_debug : Flag<["-", "--"], "strip-debug">,
+def strip_debug : Flag<["--"], "strip-debug">,
                   HelpText<"Remove all debug information">;
-def strip_dwo : Flag<["-", "--"], "strip-dwo">,
+def g : Flag<["-"], "g">, Alias<strip_debug>,
+        HelpText<"Alias for --strip-debug">;
+def strip_dwo : Flag<["--"], "strip-dwo">,
                 HelpText<"Remove all DWARF .dwo sections from file">;
-def strip_sections : Flag<["-", "--"], "strip-sections">,
-                     HelpText<"Remove all section headers">;
-def strip_non_alloc : Flag<["-", "--"], "strip-non-alloc">,
-                      HelpText<"Remove all non-allocated sections">;
-def strip_unneeded : Flag<["-", "--"], "strip-unneeded">,
+def strip_sections
+    : Flag<["--"], "strip-sections">,
+      HelpText<"Remove all section headers and all sections not in segments">;
+def strip_non_alloc
+    : Flag<["--"], "strip-non-alloc">,
+      HelpText<"Remove all non-allocated sections outside segments">;
+def strip_unneeded : Flag<["--"], "strip-unneeded">,
                      HelpText<"Remove all symbols not needed by relocations">;
+defm strip_unneeded_symbol
+    : Eq<"strip-unneeded-symbol",
+         "Remove symbol <symbol> if it is not needed by relocations">,
+      MetaVarName<"symbol">;
+defm strip_unneeded_symbols
+    : Eq<"strip-unneeded-symbols",
+         "Reads a list of symbols from <filename> and removes them "
+         "if they are not needed by relocations">,
+      MetaVarName<"filename">;
 
 def extract_dwo
-    : Flag<["-", "--"], "extract-dwo">,
+    : Flag<["--"], "extract-dwo">,
       HelpText<
           "Remove all sections that are not DWARF .dwo sections from file">;
 
+defm extract_partition
+    : Eq<"extract-partition", "Extract named partition from input file">,
+      MetaVarName<"name">;
+def extract_main_partition
+    : Flag<["--"], "extract-main-partition">,
+      HelpText<"Extract main partition from the input file">;
+
 def localize_hidden
-    : Flag<["-", "--"], "localize-hidden">,
+    : Flag<["--"], "localize-hidden">,
       HelpText<
           "Mark all symbols that have hidden or internal visibility as local">;
 defm localize_symbol : Eq<"localize-symbol", "Mark <symbol> as local">,
                        MetaVarName<"symbol">;
+defm localize_symbols
+    : Eq<"localize-symbols",
+         "Reads a list of symbols from <filename> and marks them local.">,
+      MetaVarName<"filename">;
+
 def L : JoinedOrSeparate<["-"], "L">, Alias<localize_symbol>;
 
 defm globalize_symbol : Eq<"globalize-symbol", "Mark <symbol> as global">,
                         MetaVarName<"symbol">;
+
+defm globalize_symbols
+    : Eq<"globalize-symbols",
+         "Reads a list of symbols from <filename> and marks them global.">,
+      MetaVarName<"filename">;
+
 defm keep_global_symbol
     : Eq<"keep-global-symbol",
          "Convert all symbols except <symbol> to local. May be repeated to "
@@ -137,23 +191,51 @@ defm keep_global_symbols
 
 defm weaken_symbol : Eq<"weaken-symbol", "Mark <symbol> as weak">,
                      MetaVarName<"symbol">;
+defm weaken_symbols
+    : Eq<"weaken-symbols",
+         "Reads a list of symbols from <filename> and marks them weak.">,
+      MetaVarName<"filename">;
+
 def W : JoinedOrSeparate<["-"], "W">, Alias<weaken_symbol>;
-def weaken : Flag<["-", "--"], "weaken">,
+def weaken : Flag<["--"], "weaken">,
              HelpText<"Mark all global symbols as weak">;
+
+def discard_locals : Flag<["--"], "discard-locals">,
+                     HelpText<"Remove compiler-generated local symbols, (e.g. "
+                              "symbols starting with .L)">;
+def X : Flag<["-"], "X">, Alias<discard_locals>;
+
 def discard_all
-    : Flag<["-", "--"], "discard-all">,
+    : Flag<["--"], "discard-all">,
       HelpText<"Remove all local symbols except file and section symbols">;
 def x : Flag<["-"], "x">, Alias<discard_all>;
 defm strip_symbol : Eq<"strip-symbol", "Remove symbol <symbol>">,
                     MetaVarName<"symbol">;
+defm strip_symbols
+    : Eq<"strip-symbols",
+         "Reads a list of symbols from <filename> and removes them.">,
+      MetaVarName<"filename">;
+
 def N : JoinedOrSeparate<["-"], "N">, Alias<strip_symbol>;
 defm keep_symbol : Eq<"keep-symbol", "Do not remove symbol <symbol>">,
                    MetaVarName<"symbol">;
 def K : JoinedOrSeparate<["-"], "K">, Alias<keep_symbol>;
+
+defm keep_symbols
+    : Eq<"keep-symbols",
+         "Reads a list of symbols from <filename> and runs as if "
+         "--keep-symbol=<symbol> is set for each one. <filename> "
+         "contains one symbol per line and may contain comments beginning with "
+         "'#'. Leading and trailing whitespace is stripped from each line. May "
+         "be repeated to read symbols from many files.">,
+      MetaVarName<"filename">;
+
 def only_keep_debug
-    : Flag<["-", "--"], "only-keep-debug">,
-      HelpText<"Currently ignored. Only for compatibility with GNU objcopy.">;
-def keep_file_symbols : Flag<["-", "--"], "keep-file-symbols">,
+    : Flag<["--"], "only-keep-debug">,
+      HelpText<"Clear sections that would not be stripped by --strip-debug. "
+               "Currently only implemented for COFF.">;
+
+def keep_file_symbols : Flag<["--"], "keep-file-symbols">,
                         HelpText<"Do not remove file symbols">;
 defm dump_section
     : Eq<"dump-section",
@@ -163,7 +245,11 @@ defm prefix_symbols
     : Eq<"prefix-symbols", "Add <prefix> to the start of every symbol name">,
       MetaVarName<"prefix">;
 
-def version : Flag<["-", "--"], "version">,
+defm prefix_alloc_sections
+    : Eq<"prefix-alloc-sections", "Add <prefix> to the start of every allocated section name">,
+      MetaVarName<"prefix">;
+
+def version : Flag<["--"], "version">,
               HelpText<"Print the version and exit.">;
 def V : Flag<["-"], "V">, Alias<version>;
 defm build_id_link_dir
@@ -178,3 +264,25 @@ defm build_id_link_output
     : Eq<"build-id-link-output", "Hard-link the output to <dir>/xx/xxx<suffix> "
                                  "name derived from hex build ID">,
       MetaVarName<"suffix">;
+
+def regex
+    : Flag<["--"], "regex">,
+      HelpText<"Permit regular expressions in name comparison">;
+
+defm set_start : Eq<"set-start", "Set the start address to <addr>. Overrides "
+                    "any previous --change-start or --adjust-start values.">,
+                 MetaVarName<"addr">;
+defm change_start : Eq<"change-start", "Add <incr> to the start address. Can be "                        
+                       "specified multiple times, all values will be applied "
+                       "cumulatively.">,
+                    MetaVarName<"incr">;
+def adjust_start : JoinedOrSeparate<["--"], "adjust-start">,
+                   Alias<change_start>;
+
+defm add_symbol
+    : Eq<"add-symbol", "Add new symbol <name> to .symtab. Accepted flags: "
+         "global, local, weak, default, hidden, file, section, object, "
+         "function, indirect-function. Accepted but ignored for "
+         "compatibility: debug, constructor, warning, indirect, synthetic, "
+         "unique-object, before.">,
+      MetaVarName<"name=[section:]value[,flags]">;
diff --git a/tools/llvm-objcopy/StripOpts.td b/tools/llvm-objcopy/StripOpts.td
index fa98e27e9321..1d06bb3dfb38 100644
--- a/tools/llvm-objcopy/StripOpts.td
+++ b/tools/llvm-objcopy/StripOpts.td
@@ -1,16 +1,23 @@
 include "llvm/Option/OptParser.td"
 
 multiclass Eq<string name, string help> {
-  def NAME : Separate<["--", "-"], name>;
-  def NAME #_eq : Joined<["--", "-"], name #"=">,
+  def NAME : Separate<["--"], name>;
+  def NAME #_eq : Joined<["--"], name #"=">,
                   Alias<!cast<Separate>(NAME)>,
                   HelpText<help>;
 }
 
-def help : Flag<["-", "--"], "help">;
+def help : Flag<["--"], "help">;
+def h : Flag<["-"], "h">, Alias<help>;
+
+def allow_broken_links
+    : Flag<["--"], "allow-broken-links">,
+      HelpText<"Allow llvm-strip to remove sections even if it would leave "
+               "invalid section references. The appropriate sh_link fields "
+               "will be set to zero.">;
 
 def enable_deterministic_archives
-    : Flag<["-", "--"], "enable-deterministic-archives">,
+    : Flag<["--"], "enable-deterministic-archives">,
       HelpText<"Enable deterministic mode when stripping archives (use zero "
                "for UIDs, GIDs, and timestamps).">;
 def D : Flag<["-"], "D">,
@@ -18,50 +25,72 @@ def D : Flag<["-"], "D">,
         HelpText<"Alias for --enable-deterministic-archives">;
 
 def disable_deterministic_archives
-    : Flag<["-", "--"], "disable-deterministic-archives">,
+    : Flag<["--"], "disable-deterministic-archives">,
       HelpText<"Disable deterministic mode when stripping archives (use real "
                "values for UIDs, GIDs, and timestamps).">;
 def U : Flag<["-"], "U">,
         Alias<disable_deterministic_archives>,
         HelpText<"Alias for --disable-deterministic-archives">;
 
-defm output : Eq<"o", "Write output to <file>">, MetaVarName<"output">;
+def output : JoinedOrSeparate<["-"], "o">, HelpText<"Write output to <file>">;
 
-def preserve_dates : Flag<["-", "--"], "preserve-dates">,
+def preserve_dates : Flag<["--"], "preserve-dates">,
                      HelpText<"Preserve access and modification timestamps">;
 def p : Flag<["-"], "p">, Alias<preserve_dates>;
 
-def strip_all
-    : Flag<["-", "--"], "strip-all">,
-      HelpText<
-          "Remove non-allocated sections other than .gnu.warning* sections">;
+def strip_all : Flag<["--"], "strip-all">,
+                HelpText<"Remove non-allocated sections outside segments. "
+                         ".gnu.warning* sections are not removed">;
 def s : Flag<["-"], "s">, Alias<strip_all>;
+def no_strip_all : Flag<["--"], "no-strip-all">,
+                   HelpText<"Disable --strip-all">;
 
-def strip_all_gnu : Flag<["-", "--"], "strip-all-gnu">,
+def strip_all_gnu : Flag<["--"], "strip-all-gnu">,
                     HelpText<"Compatible with GNU strip's --strip-all">;
-def strip_debug : Flag<["-", "--"], "strip-debug">,
+def strip_debug : Flag<["--"], "strip-debug">,
                   HelpText<"Remove debugging symbols only">;
 def d : Flag<["-"], "d">, Alias<strip_debug>;
 def g : Flag<["-"], "g">, Alias<strip_debug>;
 def S : Flag<["-"], "S">, Alias<strip_debug>;
-def strip_unneeded : Flag<["-", "--"], "strip-unneeded">,
+def strip_unneeded : Flag<["--"], "strip-unneeded">,
                      HelpText<"Remove all symbols not needed by relocations">;
 
 defm remove_section : Eq<"remove-section", "Remove <section>">,
                       MetaVarName<"section">;
 def R : JoinedOrSeparate<["-"], "R">, Alias<remove_section>;
 
+defm strip_symbol : Eq<"strip-symbol", "Strip <symbol>">,
+                    MetaVarName<"symbol">;
+def N : JoinedOrSeparate<["-"], "N">, Alias<strip_symbol>;
+
 defm keep_section : Eq<"keep-section", "Keep <section>">,
                     MetaVarName<"section">;
 defm keep_symbol : Eq<"keep-symbol", "Do not remove symbol <symbol>">,
                    MetaVarName<"symbol">;
+def keep_file_symbols : Flag<["--"], "keep-file-symbols">,
+                        HelpText<"Do not remove file symbols">;
+
 def K : JoinedOrSeparate<["-"], "K">, Alias<keep_symbol>;
 
+def only_keep_debug
+    : Flag<["--"], "only-keep-debug">,
+      HelpText<"Clear sections that would not be stripped by --strip-debug. "
+               "Currently only implemented for COFF.">;
+
+def discard_locals : Flag<["--"], "discard-locals">,
+                     HelpText<"Remove compiler-generated local symbols, (e.g. "
+                              "symbols starting with .L)">;
+def X : Flag<["-"], "X">, Alias<discard_locals>;
+
 def discard_all
-    : Flag<["-", "--"], "discard-all">,
+    : Flag<["--"], "discard-all">,
       HelpText<"Remove all local symbols except file and section symbols">;
 def x : Flag<["-"], "x">, Alias<discard_all>;
 
-def version : Flag<["-", "--"], "version">,
+def regex
+    : Flag<["--"], "regex">,
+      HelpText<"Permit regular expressions in name comparison">;
+
+def version : Flag<["--"], "version">,
               HelpText<"Print the version and exit.">;
 def V : Flag<["-"], "V">, Alias<version>;
diff --git a/tools/llvm-objcopy/llvm-objcopy.cpp b/tools/llvm-objcopy/llvm-objcopy.cpp
index fb1ff18b015b..e9372176e43b 100644
--- a/tools/llvm-objcopy/llvm-objcopy.cpp
+++ b/tools/llvm-objcopy/llvm-objcopy.cpp
@@ -1,17 +1,17 @@
 //===- llvm-objcopy.cpp ---------------------------------------------------===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm-objcopy.h"
 #include "Buffer.h"
-#include "COFF/COFFObjcopy.h"
 #include "CopyConfig.h"
 #include "ELF/ELFObjcopy.h"
+#include "COFF/COFFObjcopy.h"
+#include "MachO/MachOObjcopy.h"
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
@@ -24,6 +24,7 @@
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Object/ELFTypes.h"
 #include "llvm/Object/Error.h"
+#include "llvm/Object/MachO.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/Option.h"
@@ -52,16 +53,23 @@ namespace objcopy {
 StringRef ToolName;
 
 LLVM_ATTRIBUTE_NORETURN void error(Twine Message) {
-  WithColor::error(errs(), ToolName) << Message << ".\n";
-  errs().flush();
+  WithColor::error(errs(), ToolName) << Message << "\n";
+  exit(1);
+}
+
+LLVM_ATTRIBUTE_NORETURN void error(Error E) {
+  assert(E);
+  std::string Buf;
+  raw_string_ostream OS(Buf);
+  logAllUnhandledErrors(std::move(E), OS);
+  OS.flush();
+  WithColor::error(errs(), ToolName) << Buf;
   exit(1);
 }
 
 LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, std::error_code EC) {
   assert(EC);
-  WithColor::error(errs(), ToolName)
-      << "'" << File << "': " << EC.message() << ".\n";
-  exit(1);
+  error(createFileError(File, EC));
 }
 
 LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Error E) {
@@ -74,6 +82,12 @@ LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Error E) {
   exit(1);
 }
 
+ErrorSuccess reportWarning(Error E) {
+  assert(E);
+  WithColor::warning(errs(), ToolName) << toString(std::move(E));
+  return Error::success();
+}
+
 } // end namespace objcopy
 } // end namespace llvm
 
@@ -87,10 +101,13 @@ static Error deepWriteArchive(StringRef ArcName,
                               ArrayRef<NewArchiveMember> NewMembers,
                               bool WriteSymtab, object::Archive::Kind Kind,
                               bool Deterministic, bool Thin) {
-  Error E =
-      writeArchive(ArcName, NewMembers, WriteSymtab, Kind, Deterministic, Thin);
-  if (!Thin || E)
-    return E;
+  if (Error E = writeArchive(ArcName, NewMembers, WriteSymtab, Kind,
+                             Deterministic, Thin))
+    return createFileError(ArcName, std::move(E));
+
+  if (!Thin)
+    return Error::success();
+
   for (const NewArchiveMember &Member : NewMembers) {
     // Internally, FileBuffer will use the buffer created by
     // FileOutputBuffer::create, for regular files (that is the case for
@@ -101,132 +118,212 @@ static Error deepWriteArchive(StringRef ArcName,
     // NewArchiveMember still requires them even though writeArchive does not
     // write them on disk.
     FileBuffer FB(Member.MemberName);
-    FB.allocate(Member.Buf->getBufferSize());
+    if (Error E = FB.allocate(Member.Buf->getBufferSize()))
+      return E;
     std::copy(Member.Buf->getBufferStart(), Member.Buf->getBufferEnd(),
               FB.getBufferStart());
-    if (auto E = FB.commit())
+    if (Error E = FB.commit())
       return E;
   }
   return Error::success();
 }
 
+/// The function executeObjcopyOnIHex does the dispatch based on the format
+/// of the output specified by the command line options.
+static Error executeObjcopyOnIHex(const CopyConfig &Config, MemoryBuffer &In,
+                                  Buffer &Out) {
+  // TODO: support output formats other than ELF.
+  return elf::executeObjcopyOnIHex(Config, In, Out);
+}
+
 /// The function executeObjcopyOnRawBinary does the dispatch based on the format
 /// of the output specified by the command line options.
-static void executeObjcopyOnRawBinary(const CopyConfig &Config,
-                                      MemoryBuffer &In, Buffer &Out) {
-  // TODO: llvm-objcopy should parse CopyConfig.OutputFormat to recognize
-  // formats other than ELF / "binary" and invoke
-  // elf::executeObjcopyOnRawBinary, macho::executeObjcopyOnRawBinary or
-  // coff::executeObjcopyOnRawBinary accordingly.
-  return elf::executeObjcopyOnRawBinary(Config, In, Out);
+static Error executeObjcopyOnRawBinary(const CopyConfig &Config,
+                                       MemoryBuffer &In, Buffer &Out) {
+  switch (Config.OutputFormat) {
+  case FileFormat::ELF:
+  // FIXME: Currently, we call elf::executeObjcopyOnRawBinary even if the
+  // output format is binary/ihex or it's not given. This behavior differs from
+  // GNU objcopy. See https://bugs.llvm.org/show_bug.cgi?id=42171 for details.
+  case FileFormat::Binary:
+  case FileFormat::IHex:
+  case FileFormat::Unspecified:
+    return elf::executeObjcopyOnRawBinary(Config, In, Out);
+  }
+
+  llvm_unreachable("unsupported output format");
 }
 
 /// The function executeObjcopyOnBinary does the dispatch based on the format
 /// of the input binary (ELF, MachO or COFF).
-static void executeObjcopyOnBinary(const CopyConfig &Config, object::Binary &In,
-                                   Buffer &Out) {
+static Error executeObjcopyOnBinary(const CopyConfig &Config,
+                                    object::Binary &In, Buffer &Out) {
   if (auto *ELFBinary = dyn_cast<object::ELFObjectFileBase>(&In))
     return elf::executeObjcopyOnBinary(Config, *ELFBinary, Out);
   else if (auto *COFFBinary = dyn_cast<object::COFFObjectFile>(&In))
     return coff::executeObjcopyOnBinary(Config, *COFFBinary, Out);
+  else if (auto *MachOBinary = dyn_cast<object::MachOObjectFile>(&In))
+    return macho::executeObjcopyOnBinary(Config, *MachOBinary, Out);
   else
-    error("Unsupported object file format");
+    return createStringError(object_error::invalid_file_type,
+                             "unsupported object file format");
 }
 
-static void executeObjcopyOnArchive(const CopyConfig &Config,
-                                    const Archive &Ar) {
+static Error executeObjcopyOnArchive(const CopyConfig &Config,
+                                     const Archive &Ar) {
   std::vector<NewArchiveMember> NewArchiveMembers;
   Error Err = Error::success();
   for (const Archive::Child &Child : Ar.children(Err)) {
-    Expected<std::unique_ptr<Binary>> ChildOrErr = Child.getAsBinary();
-    if (!ChildOrErr)
-      reportError(Ar.getFileName(), ChildOrErr.takeError());
-    Binary *Bin = ChildOrErr->get();
-
     Expected<StringRef> ChildNameOrErr = Child.getName();
     if (!ChildNameOrErr)
-      reportError(Ar.getFileName(), ChildNameOrErr.takeError());
+      return createFileError(Ar.getFileName(), ChildNameOrErr.takeError());
+
+    Expected<std::unique_ptr<Binary>> ChildOrErr = Child.getAsBinary();
+    if (!ChildOrErr)
+      return createFileError(Ar.getFileName() + "(" + *ChildNameOrErr + ")",
+                             ChildOrErr.takeError());
 
     MemBuffer MB(ChildNameOrErr.get());
-    executeObjcopyOnBinary(Config, *Bin, MB);
+    if (Error E = executeObjcopyOnBinary(Config, *ChildOrErr->get(), MB))
+      return E;
 
     Expected<NewArchiveMember> Member =
         NewArchiveMember::getOldMember(Child, Config.DeterministicArchives);
     if (!Member)
-      reportError(Ar.getFileName(), Member.takeError());
+      return createFileError(Ar.getFileName(), Member.takeError());
     Member->Buf = MB.releaseMemoryBuffer();
     Member->MemberName = Member->Buf->getBufferIdentifier();
     NewArchiveMembers.push_back(std::move(*Member));
   }
-
   if (Err)
-    reportError(Config.InputFilename, std::move(Err));
-  if (Error E = deepWriteArchive(Config.OutputFilename, NewArchiveMembers,
-                                 Ar.hasSymbolTable(), Ar.kind(),
-                                 Config.DeterministicArchives, Ar.isThin()))
-    reportError(Config.OutputFilename, std::move(E));
+    return createFileError(Config.InputFilename, std::move(Err));
+
+  return deepWriteArchive(Config.OutputFilename, NewArchiveMembers,
+                          Ar.hasSymbolTable(), Ar.kind(),
+                          Config.DeterministicArchives, Ar.isThin());
 }
 
-static void restoreDateOnFile(StringRef Filename,
-                              const sys::fs::file_status &Stat) {
+static Error restoreStatOnFile(StringRef Filename,
+                               const sys::fs::file_status &Stat,
+                               bool PreserveDates) {
   int FD;
 
+  // Writing to stdout should not be treated as an error here, just
+  // do not set access/modification times or permissions.
+  if (Filename == "-")
+    return Error::success();
+
   if (auto EC =
           sys::fs::openFileForWrite(Filename, FD, sys::fs::CD_OpenExisting))
-    reportError(Filename, EC);
+    return createFileError(Filename, EC);
+
+  if (PreserveDates)
+    if (auto EC = sys::fs::setLastAccessAndModificationTime(
+            FD, Stat.getLastAccessedTime(), Stat.getLastModificationTime()))
+      return createFileError(Filename, EC);
 
-  if (auto EC = sys::fs::setLastAccessAndModificationTime(
-          FD, Stat.getLastAccessedTime(), Stat.getLastModificationTime()))
-    reportError(Filename, EC);
+  sys::fs::file_status OStat;
+  if (std::error_code EC = sys::fs::status(FD, OStat))
+    return createFileError(Filename, EC);
+  if (OStat.type() == sys::fs::file_type::regular_file)
+#ifdef _WIN32
+    if (auto EC = sys::fs::setPermissions(
+            Filename, static_cast<sys::fs::perms>(Stat.permissions() &
+                                                  ~sys::fs::getUmask())))
+#else
+    if (auto EC = sys::fs::setPermissions(
+            FD, static_cast<sys::fs::perms>(Stat.permissions() &
+                                            ~sys::fs::getUmask())))
+#endif
+      return createFileError(Filename, EC);
 
   if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD))
-    reportError(Filename, EC);
+    return createFileError(Filename, EC);
+
+  return Error::success();
 }
 
 /// The function executeObjcopy does the higher level dispatch based on the type
 /// of input (raw binary, archive or single object file) and takes care of the
 /// format-agnostic modifications, i.e. preserving dates.
-static void executeObjcopy(const CopyConfig &Config) {
+static Error executeObjcopy(const CopyConfig &Config) {
   sys::fs::file_status Stat;
-  if (Config.PreserveDates)
+  if (Config.InputFilename != "-") {
     if (auto EC = sys::fs::status(Config.InputFilename, Stat))
-      reportError(Config.InputFilename, EC);
+      return createFileError(Config.InputFilename, EC);
+  } else {
+    Stat.permissions(static_cast<sys::fs::perms>(0777));
+  }
 
-  if (Config.InputFormat == "binary") {
-    auto BufOrErr = MemoryBuffer::getFile(Config.InputFilename);
+  typedef Error (*ProcessRawFn)(const CopyConfig &, MemoryBuffer &, Buffer &);
+  ProcessRawFn ProcessRaw;
+  switch (Config.InputFormat) {
+  case FileFormat::Binary:
+    ProcessRaw = executeObjcopyOnRawBinary;
+    break;
+  case FileFormat::IHex:
+    ProcessRaw = executeObjcopyOnIHex;
+    break;
+  default:
+    ProcessRaw = nullptr;
+  }
+
+  if (ProcessRaw) {
+    auto BufOrErr = MemoryBuffer::getFileOrSTDIN(Config.InputFilename);
     if (!BufOrErr)
-      reportError(Config.InputFilename, BufOrErr.getError());
+      return createFileError(Config.InputFilename, BufOrErr.getError());
     FileBuffer FB(Config.OutputFilename);
-    executeObjcopyOnRawBinary(Config, *BufOrErr->get(), FB);
+    if (Error E = ProcessRaw(Config, *BufOrErr->get(), FB))
+      return E;
   } else {
     Expected<OwningBinary<llvm::object::Binary>> BinaryOrErr =
         createBinary(Config.InputFilename);
     if (!BinaryOrErr)
-      reportError(Config.InputFilename, BinaryOrErr.takeError());
+      return createFileError(Config.InputFilename, BinaryOrErr.takeError());
 
     if (Archive *Ar = dyn_cast<Archive>(BinaryOrErr.get().getBinary())) {
-      executeObjcopyOnArchive(Config, *Ar);
+      if (Error E = executeObjcopyOnArchive(Config, *Ar))
+        return E;
     } else {
       FileBuffer FB(Config.OutputFilename);
-      executeObjcopyOnBinary(Config, *BinaryOrErr.get().getBinary(), FB);
+      if (Error E = executeObjcopyOnBinary(Config,
+                                           *BinaryOrErr.get().getBinary(), FB))
+        return E;
     }
   }
 
-  if (Config.PreserveDates) {
-    restoreDateOnFile(Config.OutputFilename, Stat);
-    if (!Config.SplitDWO.empty())
-      restoreDateOnFile(Config.SplitDWO, Stat);
+  if (Error E =
+          restoreStatOnFile(Config.OutputFilename, Stat, Config.PreserveDates))
+    return E;
+
+  if (!Config.SplitDWO.empty()) {
+    Stat.permissions(static_cast<sys::fs::perms>(0666));
+    if (Error E =
+            restoreStatOnFile(Config.SplitDWO, Stat, Config.PreserveDates))
+      return E;
   }
+
+  return Error::success();
 }
 
 int main(int argc, char **argv) {
   InitLLVM X(argc, argv);
   ToolName = argv[0];
-  DriverConfig DriverConfig;
-  if (sys::path::stem(ToolName).contains("strip"))
-    DriverConfig = parseStripOptions(makeArrayRef(argv + 1, argc));
-  else
-    DriverConfig = parseObjcopyOptions(makeArrayRef(argv + 1, argc));
-  for (const CopyConfig &CopyConfig : DriverConfig.CopyConfigs)
-    executeObjcopy(CopyConfig);
+  bool IsStrip = sys::path::stem(ToolName).contains("strip");
+  Expected<DriverConfig> DriverConfig =
+      IsStrip ? parseStripOptions(makeArrayRef(argv + 1, argc), reportWarning)
+              : parseObjcopyOptions(makeArrayRef(argv + 1, argc));
+  if (!DriverConfig) {
+    logAllUnhandledErrors(DriverConfig.takeError(),
+                          WithColor::error(errs(), ToolName));
+    return 1;
+  }
+  for (const CopyConfig &CopyConfig : DriverConfig->CopyConfigs) {
+    if (Error E = executeObjcopy(CopyConfig)) {
+      logAllUnhandledErrors(std::move(E), WithColor::error(errs(), ToolName));
+      return 1;
+    }
+  }
+
+  return 0;
 }
diff --git a/tools/llvm-objcopy/llvm-objcopy.h b/tools/llvm-objcopy/llvm-objcopy.h
index d8edf3e29ee0..18a789ca1f83 100644
--- a/tools/llvm-objcopy/llvm-objcopy.h
+++ b/tools/llvm-objcopy/llvm-objcopy.h
@@ -1,9 +1,8 @@
 //===- llvm-objcopy.h -------------------------------------------*- C++ -*-===//
 //
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -20,6 +19,7 @@ namespace llvm {
 namespace objcopy {
 
 LLVM_ATTRIBUTE_NORETURN extern void error(Twine Message);
+LLVM_ATTRIBUTE_NORETURN extern void error(Error E);
 LLVM_ATTRIBUTE_NORETURN extern void reportError(StringRef File, Error E);
 LLVM_ATTRIBUTE_NORETURN extern void reportError(StringRef File,
                                                 std::error_code EC);
diff --git a/tools/llvm-objdump/COFFDump.cpp b/tools/llvm-objdump/COFFDump.cpp
index 55607ec299be..1ba0a68902c9 100644
--- a/tools/llvm-objdump/COFFDump.cpp
+++ b/tools/llvm-objdump/COFFDump.cpp
@@ -1,9 +1,8 @@
 //===-- COFFDump.cpp - COFF-specific dumper ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -25,10 +24,10 @@
 #include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
 
-using namespace llvm;
-using namespace object;
+using namespace llvm::object;
 using namespace llvm::Win64EH;
 
+namespace llvm {
 // Returns the name of the unwind code.
 static StringRef getUnwindCodeTypeName(uint8_t Code) {
   switch(Code) {
@@ -156,70 +155,68 @@ static void printAllUnwindCodes(ArrayRef<UnwindCode> UCs) {
 }
 
 // Given a symbol sym this functions returns the address and section of it.
-static std::error_code
-resolveSectionAndAddress(const COFFObjectFile *Obj, const SymbolRef &Sym,
-                         const coff_section *&ResolvedSection,
-                         uint64_t &ResolvedAddr) {
+static Error resolveSectionAndAddress(const COFFObjectFile *Obj,
+                                      const SymbolRef &Sym,
+                                      const coff_section *&ResolvedSection,
+                                      uint64_t &ResolvedAddr) {
   Expected<uint64_t> ResolvedAddrOrErr = Sym.getAddress();
   if (!ResolvedAddrOrErr)
-    return errorToErrorCode(ResolvedAddrOrErr.takeError());
+    return ResolvedAddrOrErr.takeError();
   ResolvedAddr = *ResolvedAddrOrErr;
   Expected<section_iterator> Iter = Sym.getSection();
   if (!Iter)
-    return errorToErrorCode(Iter.takeError());
+    return Iter.takeError();
   ResolvedSection = Obj->getCOFFSection(**Iter);
-  return std::error_code();
+  return Error::success();
 }
 
 // Given a vector of relocations for a section and an offset into this section
 // the function returns the symbol used for the relocation at the offset.
-static std::error_code resolveSymbol(const std::vector<RelocationRef> &Rels,
+static Error resolveSymbol(const std::vector<RelocationRef> &Rels,
                                      uint64_t Offset, SymbolRef &Sym) {
   for (auto &R : Rels) {
     uint64_t Ofs = R.getOffset();
     if (Ofs == Offset) {
       Sym = *R.getSymbol();
-      return std::error_code();
+      return Error::success();
     }
   }
-  return object_error::parse_failed;
+  return make_error<BinaryError>();
 }
 
 // Given a vector of relocations for a section and an offset into this section
 // the function resolves the symbol used for the relocation at the offset and
 // returns the section content and the address inside the content pointed to
 // by the symbol.
-static std::error_code
+static Error
 getSectionContents(const COFFObjectFile *Obj,
                    const std::vector<RelocationRef> &Rels, uint64_t Offset,
                    ArrayRef<uint8_t> &Contents, uint64_t &Addr) {
   SymbolRef Sym;
-  if (std::error_code EC = resolveSymbol(Rels, Offset, Sym))
-    return EC;
+  if (Error E = resolveSymbol(Rels, Offset, Sym))
+    return E;
   const coff_section *Section;
-  if (std::error_code EC = resolveSectionAndAddress(Obj, Sym, Section, Addr))
-    return EC;
-  if (std::error_code EC = Obj->getSectionContents(Section, Contents))
-    return EC;
-  return std::error_code();
+  if (Error E = resolveSectionAndAddress(Obj, Sym, Section, Addr))
+    return E;
+  return Obj->getSectionContents(Section, Contents);
 }
 
 // Given a vector of relocations for a section and an offset into this section
 // the function returns the name of the symbol used for the relocation at the
 // offset.
-static std::error_code resolveSymbolName(const std::vector<RelocationRef> &Rels,
-                                         uint64_t Offset, StringRef &Name) {
+static Error resolveSymbolName(const std::vector<RelocationRef> &Rels,
+                               uint64_t Offset, StringRef &Name) {
   SymbolRef Sym;
-  if (std::error_code EC = resolveSymbol(Rels, Offset, Sym))
+  if (Error EC = resolveSymbol(Rels, Offset, Sym))
     return EC;
   Expected<StringRef> NameOrErr = Sym.getName();
   if (!NameOrErr)
-    return errorToErrorCode(NameOrErr.takeError());
+    return NameOrErr.takeError();
   Name = *NameOrErr;
-  return std::error_code();
+  return Error::success();
 }
 
-static void printCOFFSymbolAddress(llvm::raw_ostream &Out,
+static void printCOFFSymbolAddress(raw_ostream &Out,
                                    const std::vector<RelocationRef> &Rels,
                                    uint64_t Offset, uint32_t Disp) {
   StringRef Sym;
@@ -469,6 +466,18 @@ static bool getPDataSection(const COFFObjectFile *Obj,
   return false;
 }
 
+Error getCOFFRelocationValueString(const COFFObjectFile *Obj,
+                                         const RelocationRef &Rel,
+                                         SmallVectorImpl<char> &Result) {
+  symbol_iterator SymI = Rel.getSymbol();
+  Expected<StringRef> SymNameOrErr = SymI->getName();
+  if (!SymNameOrErr)
+    return SymNameOrErr.takeError();
+  StringRef SymName = *SymNameOrErr;
+  Result.append(SymName.begin(), SymName.end());
+  return Error::success();
+}
+
 static void printWin64EHUnwindInfo(const Win64EH::UnwindInfo *UI) {
   // The casts to int are required in order to output the value as number.
   // Without the casts the value would be interpreted as char data (which
@@ -578,7 +587,7 @@ static void printRuntimeFunctionRels(const COFFObjectFile *Obj,
   printWin64EHUnwindInfo(UI);
 }
 
-void llvm::printCOFFUnwindInfo(const COFFObjectFile *Obj) {
+void printCOFFUnwindInfo(const COFFObjectFile *Obj) {
   if (Obj->getMachine() != COFF::IMAGE_FILE_MACHINE_AMD64) {
     WithColor::error(errs(), "llvm-objdump")
         << "unsupported image machine type "
@@ -607,7 +616,7 @@ void llvm::printCOFFUnwindInfo(const COFFObjectFile *Obj) {
   }
 }
 
-void llvm::printCOFFFileHeader(const object::ObjectFile *Obj) {
+void printCOFFFileHeader(const object::ObjectFile *Obj) {
   const COFFObjectFile *file = dyn_cast<const COFFObjectFile>(Obj);
   printTLSDirectory(file);
   printLoadConfiguration(file);
@@ -615,7 +624,7 @@ void llvm::printCOFFFileHeader(const object::ObjectFile *Obj) {
   printExportTable(file);
 }
 
-void llvm::printCOFFSymbolTable(const object::COFFImportFile *i) {
+void printCOFFSymbolTable(const object::COFFImportFile *i) {
   unsigned Index = 0;
   bool IsCode = i->getCOFFImportHeader()->getType() == COFF::IMPORT_CODE;
 
@@ -623,7 +632,7 @@ void llvm::printCOFFSymbolTable(const object::COFFImportFile *i) {
     std::string Name;
     raw_string_ostream NS(Name);
 
-    Sym.printName(NS);
+    cantFail(Sym.printName(NS));
     NS.flush();
 
     outs() << "[" << format("%2d", Index) << "]"
@@ -638,11 +647,11 @@ void llvm::printCOFFSymbolTable(const object::COFFImportFile *i) {
   }
 }
 
-void llvm::printCOFFSymbolTable(const COFFObjectFile *coff) {
+void printCOFFSymbolTable(const COFFObjectFile *coff) {
   for (unsigned SI = 0, SE = coff->getNumberOfSymbols(); SI != SE; ++SI) {
     Expected<COFFSymbolRef> Symbol = coff->getSymbol(SI);
     StringRef Name;
-    error(errorToErrorCode(Symbol.takeError()));
+    error(Symbol.takeError());
     error(coff->getSymbolName(*Symbol, Name));
 
     outs() << "[" << format("%2d", SI) << "]"
@@ -709,3 +718,4 @@ void llvm::printCOFFSymbolTable(const COFFObjectFile *coff) {
     }
   }
 }
+} // namespace llvm
diff --git a/tools/llvm-objdump/ELFDump.cpp b/tools/llvm-objdump/ELFDump.cpp
index b17a15a0d8fc..9c4d67d0f1bd 100644
--- a/tools/llvm-objdump/ELFDump.cpp
+++ b/tools/llvm-objdump/ELFDump.cpp
@@ -1,9 +1,8 @@
 //===-- ELFDump.cpp - ELF-specific dumper -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -13,23 +12,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm-objdump.h"
+#include "llvm/Demangle/Demangle.h"
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 
-using namespace llvm;
 using namespace llvm::object;
 
+namespace llvm {
 template <class ELFT>
-Expected<StringRef> getDynamicStrTab(const ELFFile<ELFT> *Elf) {
-  typedef ELFFile<ELFT> ELFO;
-
+static Expected<StringRef> getDynamicStrTab(const ELFFile<ELFT> *Elf) {
   auto DynamicEntriesOrError = Elf->dynamicEntries();
   if (!DynamicEntriesOrError)
     return DynamicEntriesOrError.takeError();
 
-  for (const typename ELFO::Elf_Dyn &Dyn : *DynamicEntriesOrError) {
+  for (const typename ELFT::Dyn &Dyn : *DynamicEntriesOrError) {
     if (Dyn.d_tag == ELF::DT_STRTAB) {
       auto MappedAddrOrError = Elf->toMappedAddr(Dyn.getPtr());
       if (!MappedAddrOrError)
@@ -43,7 +41,7 @@ Expected<StringRef> getDynamicStrTab(const ELFFile<ELFT> *Elf) {
   if (!SectionsOrError)
     return SectionsOrError.takeError();
 
-  for (const typename ELFO::Elf_Shdr &Sec : *SectionsOrError) {
+  for (const typename ELFT::Shdr &Sec : *SectionsOrError) {
     if (Sec.sh_type == ELF::SHT_DYNSYM)
       return Elf->getStringTableForSymtab(Sec);
   }
@@ -52,40 +50,135 @@ Expected<StringRef> getDynamicStrTab(const ELFFile<ELFT> *Elf) {
 }
 
 template <class ELFT>
-void printDynamicSection(const ELFFile<ELFT> *Elf, StringRef Filename) {
-  auto ProgramHeaderOrError = Elf->program_headers();
-  if (!ProgramHeaderOrError)
-    report_error(Filename, ProgramHeaderOrError.takeError());
+static Error getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
+                                      const RelocationRef &RelRef,
+                                      SmallVectorImpl<char> &Result) {
+  const ELFFile<ELFT> &EF = *Obj->getELFFile();
+  DataRefImpl Rel = RelRef.getRawDataRefImpl();
+  auto SecOrErr = EF.getSection(Rel.d.a);
+  if (!SecOrErr)
+    return SecOrErr.takeError();
 
-  auto DynamicEntriesOrError = Elf->dynamicEntries();
-  if (!DynamicEntriesOrError)
-    report_error(Filename, DynamicEntriesOrError.takeError());
+  int64_t Addend = 0;
+  // If there is no Symbol associated with the relocation, we set the undef
+  // boolean value to 'true'. This will prevent us from calling functions that
+  // requires the relocation to be associated with a symbol.
+  //
+  // In SHT_REL case we would need to read the addend from section data.
+  // GNU objdump does not do that and we just follow for simplicity atm.
+  bool Undef = false;
+  if ((*SecOrErr)->sh_type == ELF::SHT_RELA) {
+    const typename ELFT::Rela *ERela = Obj->getRela(Rel);
+    Addend = ERela->r_addend;
+    Undef = ERela->getSymbol(false) == 0;
+  } else if ((*SecOrErr)->sh_type != ELF::SHT_REL) {
+    return make_error<BinaryError>();
+  }
+
+  // Default scheme is to print Target, as well as "+ <addend>" for nonzero
+  // addend. Should be acceptable for all normal purposes.
+  std::string FmtBuf;
+  raw_string_ostream Fmt(FmtBuf);
+
+  if (!Undef) {
+    symbol_iterator SI = RelRef.getSymbol();
+    const typename ELFT::Sym *Sym = Obj->getSymbol(SI->getRawDataRefImpl());
+    if (Sym->getType() == ELF::STT_SECTION) {
+      Expected<section_iterator> SymSI = SI->getSection();
+      if (!SymSI)
+        return SymSI.takeError();
+      const typename ELFT::Shdr *SymSec =
+          Obj->getSection((*SymSI)->getRawDataRefImpl());
+      auto SecName = EF.getSectionName(SymSec);
+      if (!SecName)
+        return SecName.takeError();
+      Fmt << *SecName;
+    } else {
+      Expected<StringRef> SymName = SI->getName();
+      if (!SymName)
+        return SymName.takeError();
+      if (Demangle)
+        Fmt << demangle(*SymName);
+      else
+        Fmt << *SymName;
+    }
+  } else {
+    Fmt << "*ABS*";
+  }
+
+  if (Addend != 0)
+    Fmt << (Addend < 0 ? "" : "+") << Addend;
+  Fmt.flush();
+  Result.append(FmtBuf.begin(), FmtBuf.end());
+  return Error::success();
+}
 
+Error getELFRelocationValueString(const ELFObjectFileBase *Obj,
+                                  const RelocationRef &Rel,
+                                  SmallVectorImpl<char> &Result) {
+  if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj))
+    return getRelocationValueString(ELF32LE, Rel, Result);
+  if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj))
+    return getRelocationValueString(ELF64LE, Rel, Result);
+  if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj))
+    return getRelocationValueString(ELF32BE, Rel, Result);
+  auto *ELF64BE = cast<ELF64BEObjectFile>(Obj);
+  return getRelocationValueString(ELF64BE, Rel, Result);
+}
+
+template <class ELFT>
+static uint64_t getSectionLMA(const ELFFile<ELFT> *Obj,
+                              const object::ELFSectionRef &Sec) {
+  auto PhdrRangeOrErr = Obj->program_headers();
+  if (!PhdrRangeOrErr)
+    report_fatal_error(toString(PhdrRangeOrErr.takeError()));
+
+  // Search for a PT_LOAD segment containing the requested section. Use this
+  // segment's p_addr to calculate the section's LMA.
+  for (const typename ELFT::Phdr &Phdr : *PhdrRangeOrErr)
+    if ((Phdr.p_type == ELF::PT_LOAD) && (Phdr.p_vaddr <= Sec.getAddress()) &&
+        (Phdr.p_vaddr + Phdr.p_memsz > Sec.getAddress()))
+      return Sec.getAddress() - Phdr.p_vaddr + Phdr.p_paddr;
+
+  // Return section's VMA if it isn't in a PT_LOAD segment.
+  return Sec.getAddress();
+}
+
+uint64_t getELFSectionLMA(const object::ELFSectionRef &Sec) {
+  if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Sec.getObject()))
+    return getSectionLMA(ELFObj->getELFFile(), Sec);
+  else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Sec.getObject()))
+    return getSectionLMA(ELFObj->getELFFile(), Sec);
+  else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Sec.getObject()))
+    return getSectionLMA(ELFObj->getELFFile(), Sec);
+  const auto *ELFObj = cast<ELF64BEObjectFile>(Sec.getObject());
+  return getSectionLMA(ELFObj->getELFFile(), Sec);
+}
+
+template <class ELFT>
+void printDynamicSection(const ELFFile<ELFT> *Elf, StringRef Filename) {
+  ArrayRef<typename ELFT::Dyn> DynamicEntries =
+      unwrapOrError(Elf->dynamicEntries(), Filename);
   outs() << "Dynamic Section:\n";
-  for (const auto &Dyn : *DynamicEntriesOrError) {
+  for (const typename ELFT::Dyn &Dyn : DynamicEntries) {
     if (Dyn.d_tag == ELF::DT_NULL)
       continue;
 
-    StringRef Str = StringRef(Elf->getDynamicTagAsString(Dyn.d_tag));
-
-    if (Str.empty()) {
-      std::string HexStr = utohexstr(static_cast<uint64_t>(Dyn.d_tag), true);
-      outs() << format("  0x%-19s", HexStr.c_str());
-    } else {
-      // We use "-21" in order to match GNU objdump's output.
-      outs() << format("  %-21s", Str.data());
-    }
+    std::string Str = Elf->getDynamicTagAsString(Dyn.d_tag);
+    outs() << format("  %-21s", Str.c_str());
 
     const char *Fmt =
         ELFT::Is64Bits ? "0x%016" PRIx64 "\n" : "0x%08" PRIx64 "\n";
-    if (Dyn.d_tag == ELF::DT_NEEDED) {
+    if (Dyn.d_tag == ELF::DT_NEEDED || Dyn.d_tag == ELF::DT_RPATH ||
+        Dyn.d_tag == ELF::DT_RUNPATH || Dyn.d_tag == ELF::DT_SONAME ||
+        Dyn.d_tag == ELF::DT_AUXILIARY || Dyn.d_tag == ELF::DT_FILTER) {
       Expected<StringRef> StrTabOrErr = getDynamicStrTab(Elf);
       if (StrTabOrErr) {
         const char *Data = StrTabOrErr.get().data();
         outs() << (Data + Dyn.d_un.d_val) << "\n";
         continue;
       }
-      warn(errorToErrorCode(StrTabOrErr.takeError()).message());
+      warn(toString(StrTabOrErr.takeError()));
       consumeError(StrTabOrErr.takeError());
     }
     outs() << format(Fmt, (uint64_t)Dyn.d_un.d_val);
@@ -93,13 +186,11 @@ void printDynamicSection(const ELFFile<ELFT> *Elf, StringRef Filename) {
 }
 
 template <class ELFT> void printProgramHeaders(const ELFFile<ELFT> *o) {
-  typedef ELFFile<ELFT> ELFO;
   outs() << "Program Header:\n";
   auto ProgramHeaderOrError = o->program_headers();
   if (!ProgramHeaderOrError)
-    report_fatal_error(
-        errorToErrorCode(ProgramHeaderOrError.takeError()).message());
-  for (const typename ELFO::Elf_Phdr &Phdr : *ProgramHeaderOrError) {
+    report_fatal_error(toString(ProgramHeaderOrError.takeError()));
+  for (const typename ELFT::Phdr &Phdr : *ProgramHeaderOrError) {
     switch (Phdr.p_type) {
     case ELF::PT_DYNAMIC:
       outs() << " DYNAMIC ";
@@ -157,7 +248,86 @@ template <class ELFT> void printProgramHeaders(const ELFFile<ELFT> *o) {
   outs() << "\n";
 }
 
-void llvm::printELFFileHeader(const object::ObjectFile *Obj) {
+template <class ELFT>
+void printSymbolVersionDependency(ArrayRef<uint8_t> Contents,
+                                  StringRef StrTab) {
+  outs() << "Version References:\n";
+
+  const uint8_t *Buf = Contents.data();
+  while (Buf) {
+    auto *Verneed = reinterpret_cast<const typename ELFT::Verneed *>(Buf);
+    outs() << "  required from "
+           << StringRef(StrTab.drop_front(Verneed->vn_file).data()) << ":\n";
+
+    const uint8_t *BufAux = Buf + Verneed->vn_aux;
+    while (BufAux) {
+      auto *Vernaux = reinterpret_cast<const typename ELFT::Vernaux *>(BufAux);
+      outs() << "    "
+             << format("0x%08" PRIx32 " ", (uint32_t)Vernaux->vna_hash)
+             << format("0x%02" PRIx16 " ", (uint16_t)Vernaux->vna_flags)
+             << format("%02" PRIu16 " ", (uint16_t)Vernaux->vna_other)
+             << StringRef(StrTab.drop_front(Vernaux->vna_name).data()) << '\n';
+      BufAux = Vernaux->vna_next ? BufAux + Vernaux->vna_next : nullptr;
+    }
+    Buf = Verneed->vn_next ? Buf + Verneed->vn_next : nullptr;
+  }
+}
+
+template <class ELFT>
+void printSymbolVersionDefinition(const typename ELFT::Shdr &Shdr,
+                                  ArrayRef<uint8_t> Contents,
+                                  StringRef StrTab) {
+  outs() << "Version definitions:\n";
+
+  const uint8_t *Buf = Contents.data();
+  uint32_t VerdefIndex = 1;
+  // sh_info contains the number of entries in the SHT_GNU_verdef section. To
+  // make the index column have consistent width, we should insert blank spaces
+  // according to sh_info.
+  uint16_t VerdefIndexWidth = std::to_string(Shdr.sh_info).size();
+  while (Buf) {
+    auto *Verdef = reinterpret_cast<const typename ELFT::Verdef *>(Buf);
+    outs() << format_decimal(VerdefIndex++, VerdefIndexWidth) << " "
+           << format("0x%02" PRIx16 " ", (uint16_t)Verdef->vd_flags)
+           << format("0x%08" PRIx32 " ", (uint32_t)Verdef->vd_hash);
+
+    const uint8_t *BufAux = Buf + Verdef->vd_aux;
+    uint16_t VerdauxIndex = 0;
+    while (BufAux) {
+      auto *Verdaux = reinterpret_cast<const typename ELFT::Verdaux *>(BufAux);
+      if (VerdauxIndex)
+        outs() << std::string(VerdefIndexWidth + 17, ' ');
+      outs() << StringRef(StrTab.drop_front(Verdaux->vda_name).data()) << '\n';
+      BufAux = Verdaux->vda_next ? BufAux + Verdaux->vda_next : nullptr;
+      ++VerdauxIndex;
+    }
+    Buf = Verdef->vd_next ? Buf + Verdef->vd_next : nullptr;
+  }
+}
+
+template <class ELFT>
+void printSymbolVersionInfo(const ELFFile<ELFT> *Elf, StringRef FileName) {
+  ArrayRef<typename ELFT::Shdr> Sections =
+      unwrapOrError(Elf->sections(), FileName);
+  for (const typename ELFT::Shdr &Shdr : Sections) {
+    if (Shdr.sh_type != ELF::SHT_GNU_verneed &&
+        Shdr.sh_type != ELF::SHT_GNU_verdef)
+      continue;
+
+    ArrayRef<uint8_t> Contents =
+        unwrapOrError(Elf->getSectionContents(&Shdr), FileName);
+    const typename ELFT::Shdr *StrTabSec =
+        unwrapOrError(Elf->getSection(Shdr.sh_link), FileName);
+    StringRef StrTab = unwrapOrError(Elf->getStringTable(StrTabSec), FileName);
+
+    if (Shdr.sh_type == ELF::SHT_GNU_verneed)
+      printSymbolVersionDependency<ELFT>(Contents, StrTab);
+    else
+      printSymbolVersionDefinition<ELFT>(Shdr, Contents, StrTab);
+  }
+}
+
+void printELFFileHeader(const object::ObjectFile *Obj) {
   if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
     printProgramHeaders(ELFObj->getELFFile());
   else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
@@ -168,7 +338,7 @@ void llvm::printELFFileHeader(const object::ObjectFile *Obj) {
     printProgramHeaders(ELFObj->getELFFile());
 }
 
-void llvm::printELFDynamicSection(const object::ObjectFile *Obj) {
+void printELFDynamicSection(const object::ObjectFile *Obj) {
   if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
     printDynamicSection(ELFObj->getELFFile(), Obj->getFileName());
   else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
@@ -178,3 +348,15 @@ void llvm::printELFDynamicSection(const object::ObjectFile *Obj) {
   else if (const auto *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
     printDynamicSection(ELFObj->getELFFile(), Obj->getFileName());
 }
+
+void printELFSymbolVersionInfo(const object::ObjectFile *Obj) {
+  if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
+    printSymbolVersionInfo(ELFObj->getELFFile(), Obj->getFileName());
+  else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
+    printSymbolVersionInfo(ELFObj->getELFFile(), Obj->getFileName());
+  else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
+    printSymbolVersionInfo(ELFObj->getELFFile(), Obj->getFileName());
+  else if (const auto *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
+    printSymbolVersionInfo(ELFObj->getELFFile(), Obj->getFileName());
+}
+} // namespace llvm
diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp
index 5ef7058ec9da..58ff7be4543c 100644
--- a/tools/llvm-objdump/MachODump.cpp
+++ b/tools/llvm-objdump/MachODump.cpp
@@ -1,9 +1,8 @@
 //===-- MachODump.cpp - Object file dumping utility for llvm --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -56,83 +55,140 @@ extern "C" {
 }
 #endif
 
-using namespace llvm;
-using namespace object;
+using namespace llvm::object;
+
+namespace llvm {
+
+cl::OptionCategory MachOCat("llvm-objdump MachO Specific Options");
+
+extern cl::opt<bool> ArchiveHeaders;
+extern cl::opt<bool> Disassemble;
+extern cl::opt<bool> DisassembleAll;
+extern cl::opt<DIDumpType> DwarfDumpType;
+extern cl::list<std::string> FilterSections;
+extern cl::list<std::string> MAttrs;
+extern cl::opt<std::string> MCPU;
+extern cl::opt<bool> NoShowRawInsn;
+extern cl::opt<bool> NoLeadingAddr;
+extern cl::opt<bool> PrintImmHex;
+extern cl::opt<bool> PrivateHeaders;
+extern cl::opt<bool> Relocations;
+extern cl::opt<bool> SectionHeaders;
+extern cl::opt<bool> SectionContents;
+extern cl::opt<bool> SymbolTable;
+extern cl::opt<std::string> TripleName;
+extern cl::opt<bool> UnwindInfo;
+
+cl::opt<bool>
+    FirstPrivateHeader("private-header",
+                       cl::desc("Display only the first format specific file "
+                                "header"),
+                       cl::cat(MachOCat));
+
+cl::opt<bool> ExportsTrie("exports-trie",
+                          cl::desc("Display mach-o exported symbols"),
+                          cl::cat(MachOCat));
+
+cl::opt<bool> Rebase("rebase", cl::desc("Display mach-o rebasing info"),
+                     cl::cat(MachOCat));
+
+cl::opt<bool> Bind("bind", cl::desc("Display mach-o binding info"),
+                   cl::cat(MachOCat));
+
+cl::opt<bool> LazyBind("lazy-bind",
+                       cl::desc("Display mach-o lazy binding info"),
+                       cl::cat(MachOCat));
+
+cl::opt<bool> WeakBind("weak-bind",
+                       cl::desc("Display mach-o weak binding info"),
+                       cl::cat(MachOCat));
 
 static cl::opt<bool>
-    UseDbg("g",
-           cl::desc("Print line information from debug info if available"));
+    UseDbg("g", cl::Grouping,
+           cl::desc("Print line information from debug info if available"),
+           cl::cat(MachOCat));
 
 static cl::opt<std::string> DSYMFile("dsym",
-                                     cl::desc("Use .dSYM file for debug info"));
+                                     cl::desc("Use .dSYM file for debug info"),
+                                     cl::cat(MachOCat));
 
 static cl::opt<bool> FullLeadingAddr("full-leading-addr",
-                                     cl::desc("Print full leading address"));
+                                     cl::desc("Print full leading address"),
+                                     cl::cat(MachOCat));
 
 static cl::opt<bool> NoLeadingHeaders("no-leading-headers",
-                                      cl::desc("Print no leading headers"));
+                                      cl::desc("Print no leading headers"),
+                                      cl::cat(MachOCat));
 
-cl::opt<bool> llvm::UniversalHeaders("universal-headers",
-                                     cl::desc("Print Mach-O universal headers "
-                                              "(requires -macho)"));
+cl::opt<bool> UniversalHeaders("universal-headers",
+                               cl::desc("Print Mach-O universal headers "
+                                        "(requires -macho)"),
+                               cl::cat(MachOCat));
 
 cl::opt<bool>
     ArchiveMemberOffsets("archive-member-offsets",
                          cl::desc("Print the offset to each archive member for "
                                   "Mach-O archives (requires -macho and "
-                                  "-archive-headers)"));
-
-cl::opt<bool>
-    llvm::IndirectSymbols("indirect-symbols",
-                          cl::desc("Print indirect symbol table for Mach-O "
-                                   "objects (requires -macho)"));
+                                  "-archive-headers)"),
+                         cl::cat(MachOCat));
 
-cl::opt<bool>
-    llvm::DataInCode("data-in-code",
-                     cl::desc("Print the data in code table for Mach-O objects "
-                              "(requires -macho)"));
-
-cl::opt<bool>
-    llvm::LinkOptHints("link-opt-hints",
-                       cl::desc("Print the linker optimization hints for "
-                                "Mach-O objects (requires -macho)"));
-
-cl::opt<bool>
-    llvm::InfoPlist("info-plist",
-                    cl::desc("Print the info plist section as strings for "
-                             "Mach-O objects (requires -macho)"));
+cl::opt<bool> IndirectSymbols("indirect-symbols",
+                              cl::desc("Print indirect symbol table for Mach-O "
+                                       "objects (requires -macho)"),
+                              cl::cat(MachOCat));
 
 cl::opt<bool>
-    llvm::DylibsUsed("dylibs-used",
-                     cl::desc("Print the shared libraries used for linked "
-                              "Mach-O files (requires -macho)"));
+    DataInCode("data-in-code",
+               cl::desc("Print the data in code table for Mach-O objects "
+                        "(requires -macho)"),
+               cl::cat(MachOCat));
+
+cl::opt<bool> LinkOptHints("link-opt-hints",
+                           cl::desc("Print the linker optimization hints for "
+                                    "Mach-O objects (requires -macho)"),
+                           cl::cat(MachOCat));
+
+cl::opt<bool> InfoPlist("info-plist",
+                        cl::desc("Print the info plist section as strings for "
+                                 "Mach-O objects (requires -macho)"),
+                        cl::cat(MachOCat));
+
+cl::opt<bool> DylibsUsed("dylibs-used",
+                         cl::desc("Print the shared libraries used for linked "
+                                  "Mach-O files (requires -macho)"),
+                         cl::cat(MachOCat));
 
 cl::opt<bool>
-    llvm::DylibId("dylib-id",
-                  cl::desc("Print the shared library's id for the dylib Mach-O "
-                           "file (requires -macho)"));
+    DylibId("dylib-id",
+            cl::desc("Print the shared library's id for the dylib Mach-O "
+                     "file (requires -macho)"),
+            cl::cat(MachOCat));
 
 cl::opt<bool>
-    llvm::NonVerbose("non-verbose",
-                     cl::desc("Print the info for Mach-O objects in "
-                              "non-verbose or numeric form (requires -macho)"));
+    NonVerbose("non-verbose",
+               cl::desc("Print the info for Mach-O objects in "
+                        "non-verbose or numeric form (requires -macho)"),
+               cl::cat(MachOCat));
 
 cl::opt<bool>
-    llvm::ObjcMetaData("objc-meta-data",
-                       cl::desc("Print the Objective-C runtime meta data for "
-                                "Mach-O files (requires -macho)"));
+    ObjcMetaData("objc-meta-data",
+                 cl::desc("Print the Objective-C runtime meta data for "
+                          "Mach-O files (requires -macho)"),
+                 cl::cat(MachOCat));
 
-cl::opt<std::string> llvm::DisSymName(
+cl::opt<std::string> DisSymName(
     "dis-symname",
-    cl::desc("disassemble just this symbol's instructions (requires -macho)"));
+    cl::desc("disassemble just this symbol's instructions (requires -macho)"),
+    cl::cat(MachOCat));
 
 static cl::opt<bool> NoSymbolicOperands(
     "no-symbolic-operands",
-    cl::desc("do not symbolic operands when disassembling (requires -macho)"));
+    cl::desc("do not symbolic operands when disassembling (requires -macho)"),
+    cl::cat(MachOCat));
 
 static cl::list<std::string>
     ArchFlags("arch", cl::desc("architecture(s) from a Mach-O file to dump"),
-              cl::ZeroOrMore);
+              cl::ZeroOrMore, cl::cat(MachOCat));
 
 bool ArchAll = false;
 
@@ -142,7 +198,7 @@ static const Target *GetTarget(const MachOObjectFile *MachOObj,
                                const char **McpuDefault,
                                const Target **ThumbTarget) {
   // Figure out the target triple.
-  llvm::Triple TT(TripleName);
+  Triple TT(TripleName);
   if (TripleName.empty()) {
     TT = MachOObj->getArchTriple(McpuDefault);
     TripleName = TT.str();
@@ -151,7 +207,7 @@ static const Target *GetTarget(const MachOObjectFile *MachOObj,
   if (TT.getArch() == Triple::arm) {
     // We've inferred a 32-bit ARM target from the object file. All MachO CPUs
     // that support ARM are also capable of Thumb mode.
-    llvm::Triple ThumbTriple = TT;
+    Triple ThumbTriple = TT;
     std::string ThumbName = (Twine("thumb") + TT.getArchName().substr(3)).str();
     ThumbTriple.setArchName(ThumbName);
     ThumbTripleName = ThumbTriple.str();
@@ -180,11 +236,11 @@ struct SymbolSorter {
   bool operator()(const SymbolRef &A, const SymbolRef &B) {
     Expected<SymbolRef::Type> ATypeOrErr = A.getType();
     if (!ATypeOrErr)
-      report_error(A.getObject()->getFileName(), ATypeOrErr.takeError());
+      report_error(ATypeOrErr.takeError(), A.getObject()->getFileName());
     SymbolRef::Type AType = *ATypeOrErr;
     Expected<SymbolRef::Type> BTypeOrErr = B.getType();
     if (!BTypeOrErr)
-      report_error(B.getObject()->getFileName(), BTypeOrErr.takeError());
+      report_error(BTypeOrErr.takeError(), B.getObject()->getFileName());
     SymbolRef::Type BType = *BTypeOrErr;
     uint64_t AAddr = (AType != SymbolRef::ST_Function) ? 0 : A.getValue();
     uint64_t BAddr = (BType != SymbolRef::ST_Function) ? 0 : B.getValue();
@@ -308,11 +364,10 @@ static void getSectionsAndSymbols(MachOObjectFile *MachOObj,
                                   std::vector<SymbolRef> &Symbols,
                                   SmallVectorImpl<uint64_t> &FoundFns,
                                   uint64_t &BaseSegmentAddress) {
+  const StringRef FileName = MachOObj->getFileName();
   for (const SymbolRef &Symbol : MachOObj->symbols()) {
-    Expected<StringRef> SymName = Symbol.getName();
-    if (!SymName)
-      report_error(MachOObj->getFileName(), SymName.takeError());
-    if (!SymName->startswith("ltmp"))
+    StringRef SymName = unwrapOrError(Symbol.getName(), FileName);
+    if (!SymName.startswith("ltmp"))
       Symbols.push_back(Symbol);
   }
 
@@ -342,6 +397,254 @@ static void getSectionsAndSymbols(MachOObjectFile *MachOObj,
   }
 }
 
+static void printRelocationTargetName(const MachOObjectFile *O,
+                                      const MachO::any_relocation_info &RE,
+                                      raw_string_ostream &Fmt) {
+  // Target of a scattered relocation is an address.  In the interest of
+  // generating pretty output, scan through the symbol table looking for a
+  // symbol that aligns with that address.  If we find one, print it.
+  // Otherwise, we just print the hex address of the target.
+  const StringRef FileName = O->getFileName();
+  if (O->isRelocationScattered(RE)) {
+    uint32_t Val = O->getPlainRelocationSymbolNum(RE);
+
+    for (const SymbolRef &Symbol : O->symbols()) {
+      uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
+      if (Addr != Val)
+        continue;
+      Fmt << unwrapOrError(Symbol.getName(), FileName);
+      return;
+    }
+
+    // If we couldn't find a symbol that this relocation refers to, try
+    // to find a section beginning instead.
+    for (const SectionRef &Section : ToolSectionFilter(*O)) {
+      StringRef Name;
+      uint64_t Addr = Section.getAddress();
+      if (Addr != Val)
+        continue;
+      if (std::error_code EC = Section.getName(Name))
+        report_error(errorCodeToError(EC), O->getFileName());
+      Fmt << Name;
+      return;
+    }
+
+    Fmt << format("0x%x", Val);
+    return;
+  }
+
+  StringRef S;
+  bool isExtern = O->getPlainRelocationExternal(RE);
+  uint64_t Val = O->getPlainRelocationSymbolNum(RE);
+
+  if (O->getAnyRelocationType(RE) == MachO::ARM64_RELOC_ADDEND) {
+    Fmt << format("0x%0" PRIx64, Val);
+    return;
+  }
+
+  if (isExtern) {
+    symbol_iterator SI = O->symbol_begin();
+    advance(SI, Val);
+    S = unwrapOrError(SI->getName(), FileName);
+  } else {
+    section_iterator SI = O->section_begin();
+    // Adjust for the fact that sections are 1-indexed.
+    if (Val == 0) {
+      Fmt << "0 (?,?)";
+      return;
+    }
+    uint32_t I = Val - 1;
+    while (I != 0 && SI != O->section_end()) {
+      --I;
+      advance(SI, 1);
+    }
+    if (SI == O->section_end())
+      Fmt << Val << " (?,?)";
+    else
+      SI->getName(S);
+  }
+
+  Fmt << S;
+}
+
+Error getMachORelocationValueString(const MachOObjectFile *Obj,
+                                    const RelocationRef &RelRef,
+                                    SmallVectorImpl<char> &Result) {
+  DataRefImpl Rel = RelRef.getRawDataRefImpl();
+  MachO::any_relocation_info RE = Obj->getRelocation(Rel);
+
+  unsigned Arch = Obj->getArch();
+
+  std::string FmtBuf;
+  raw_string_ostream Fmt(FmtBuf);
+  unsigned Type = Obj->getAnyRelocationType(RE);
+  bool IsPCRel = Obj->getAnyRelocationPCRel(RE);
+
+  // Determine any addends that should be displayed with the relocation.
+  // These require decoding the relocation type, which is triple-specific.
+
+  // X86_64 has entirely custom relocation types.
+  if (Arch == Triple::x86_64) {
+    switch (Type) {
+    case MachO::X86_64_RELOC_GOT_LOAD:
+    case MachO::X86_64_RELOC_GOT: {
+      printRelocationTargetName(Obj, RE, Fmt);
+      Fmt << "@GOT";
+      if (IsPCRel)
+        Fmt << "PCREL";
+      break;
+    }
+    case MachO::X86_64_RELOC_SUBTRACTOR: {
+      DataRefImpl RelNext = Rel;
+      Obj->moveRelocationNext(RelNext);
+      MachO::any_relocation_info RENext = Obj->getRelocation(RelNext);
+
+      // X86_64_RELOC_SUBTRACTOR must be followed by a relocation of type
+      // X86_64_RELOC_UNSIGNED.
+      // NOTE: Scattered relocations don't exist on x86_64.
+      unsigned RType = Obj->getAnyRelocationType(RENext);
+      if (RType != MachO::X86_64_RELOC_UNSIGNED)
+        report_error(Obj->getFileName(), "Expected X86_64_RELOC_UNSIGNED after "
+                                         "X86_64_RELOC_SUBTRACTOR.");
+
+      // The X86_64_RELOC_UNSIGNED contains the minuend symbol;
+      // X86_64_RELOC_SUBTRACTOR contains the subtrahend.
+      printRelocationTargetName(Obj, RENext, Fmt);
+      Fmt << "-";
+      printRelocationTargetName(Obj, RE, Fmt);
+      break;
+    }
+    case MachO::X86_64_RELOC_TLV:
+      printRelocationTargetName(Obj, RE, Fmt);
+      Fmt << "@TLV";
+      if (IsPCRel)
+        Fmt << "P";
+      break;
+    case MachO::X86_64_RELOC_SIGNED_1:
+      printRelocationTargetName(Obj, RE, Fmt);
+      Fmt << "-1";
+      break;
+    case MachO::X86_64_RELOC_SIGNED_2:
+      printRelocationTargetName(Obj, RE, Fmt);
+      Fmt << "-2";
+      break;
+    case MachO::X86_64_RELOC_SIGNED_4:
+      printRelocationTargetName(Obj, RE, Fmt);
+      Fmt << "-4";
+      break;
+    default:
+      printRelocationTargetName(Obj, RE, Fmt);
+      break;
+    }
+    // X86 and ARM share some relocation types in common.
+  } else if (Arch == Triple::x86 || Arch == Triple::arm ||
+             Arch == Triple::ppc) {
+    // Generic relocation types...
+    switch (Type) {
+    case MachO::GENERIC_RELOC_PAIR: // prints no info
+      return Error::success();
+    case MachO::GENERIC_RELOC_SECTDIFF: {
+      DataRefImpl RelNext = Rel;
+      Obj->moveRelocationNext(RelNext);
+      MachO::any_relocation_info RENext = Obj->getRelocation(RelNext);
+
+      // X86 sect diff's must be followed by a relocation of type
+      // GENERIC_RELOC_PAIR.
+      unsigned RType = Obj->getAnyRelocationType(RENext);
+
+      if (RType != MachO::GENERIC_RELOC_PAIR)
+        report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after "
+                                         "GENERIC_RELOC_SECTDIFF.");
+
+      printRelocationTargetName(Obj, RE, Fmt);
+      Fmt << "-";
+      printRelocationTargetName(Obj, RENext, Fmt);
+      break;
+    }
+    }
+
+    if (Arch == Triple::x86 || Arch == Triple::ppc) {
+      switch (Type) {
+      case MachO::GENERIC_RELOC_LOCAL_SECTDIFF: {
+        DataRefImpl RelNext = Rel;
+        Obj->moveRelocationNext(RelNext);
+        MachO::any_relocation_info RENext = Obj->getRelocation(RelNext);
+
+        // X86 sect diff's must be followed by a relocation of type
+        // GENERIC_RELOC_PAIR.
+        unsigned RType = Obj->getAnyRelocationType(RENext);
+        if (RType != MachO::GENERIC_RELOC_PAIR)
+          report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after "
+                                           "GENERIC_RELOC_LOCAL_SECTDIFF.");
+
+        printRelocationTargetName(Obj, RE, Fmt);
+        Fmt << "-";
+        printRelocationTargetName(Obj, RENext, Fmt);
+        break;
+      }
+      case MachO::GENERIC_RELOC_TLV: {
+        printRelocationTargetName(Obj, RE, Fmt);
+        Fmt << "@TLV";
+        if (IsPCRel)
+          Fmt << "P";
+        break;
+      }
+      default:
+        printRelocationTargetName(Obj, RE, Fmt);
+      }
+    } else { // ARM-specific relocations
+      switch (Type) {
+      case MachO::ARM_RELOC_HALF:
+      case MachO::ARM_RELOC_HALF_SECTDIFF: {
+        // Half relocations steal a bit from the length field to encode
+        // whether this is an upper16 or a lower16 relocation.
+        bool isUpper = (Obj->getAnyRelocationLength(RE) & 0x1) == 1;
+
+        if (isUpper)
+          Fmt << ":upper16:(";
+        else
+          Fmt << ":lower16:(";
+        printRelocationTargetName(Obj, RE, Fmt);
+
+        DataRefImpl RelNext = Rel;
+        Obj->moveRelocationNext(RelNext);
+        MachO::any_relocation_info RENext = Obj->getRelocation(RelNext);
+
+        // ARM half relocs must be followed by a relocation of type
+        // ARM_RELOC_PAIR.
+        unsigned RType = Obj->getAnyRelocationType(RENext);
+        if (RType != MachO::ARM_RELOC_PAIR)
+          report_error(Obj->getFileName(), "Expected ARM_RELOC_PAIR after "
+                                           "ARM_RELOC_HALF");
+
+        // NOTE: The half of the target virtual address is stashed in the
+        // address field of the secondary relocation, but we can't reverse
+        // engineer the constant offset from it without decoding the movw/movt
+        // instruction to find the other half in its immediate field.
+
+        // ARM_RELOC_HALF_SECTDIFF encodes the second section in the
+        // symbol/section pointer of the follow-on relocation.
+        if (Type == MachO::ARM_RELOC_HALF_SECTDIFF) {
+          Fmt << "-";
+          printRelocationTargetName(Obj, RENext, Fmt);
+        }
+
+        Fmt << ")";
+        break;
+      }
+      default: {
+        printRelocationTargetName(Obj, RE, Fmt);
+      }
+      }
+    }
+  } else
+    printRelocationTargetName(Obj, RE, Fmt);
+
+  Fmt.flush();
+  Result.append(FmtBuf.begin(), FmtBuf.end());
+  return Error::success();
+}
+
 static void PrintIndirectSymbolTable(MachOObjectFile *O, bool verbose,
                                      uint32_t n, uint32_t count,
                                      uint32_t stride, uint64_t addr) {
@@ -389,10 +692,7 @@ static void PrintIndirectSymbolTable(MachOObjectFile *O, bool verbose,
       if (indirect_symbol < Symtab.nsyms) {
         symbol_iterator Sym = O->getSymbolByIndex(indirect_symbol);
         SymbolRef Symbol = *Sym;
-        Expected<StringRef> SymName = Symbol.getName();
-        if (!SymName)
-          report_error(O->getFileName(), SymName.takeError());
-        outs() << *SymName;
+        outs() << unwrapOrError(Symbol.getName(), O->getFileName());
       } else {
         outs() << "?";
       }
@@ -500,6 +800,7 @@ static void PrintRType(const uint64_t cputype, const unsigned r_type) {
       outs() << arm_r_types[r_type];
       break;
     case MachO::CPU_TYPE_ARM64:
+    case MachO::CPU_TYPE_ARM64_32:
       outs() << arm64_r_types[r_type];
       break;
     default:
@@ -510,9 +811,8 @@ static void PrintRType(const uint64_t cputype, const unsigned r_type) {
 static void PrintRLength(const uint64_t cputype, const unsigned r_type,
                          const unsigned r_length, const bool previous_arm_half){
   if (cputype == MachO::CPU_TYPE_ARM &&
-      (r_type == llvm::MachO::ARM_RELOC_HALF ||
-       r_type == llvm::MachO::ARM_RELOC_HALF_SECTDIFF ||
-       previous_arm_half == true)) {
+      (r_type == MachO::ARM_RELOC_HALF ||
+       r_type == MachO::ARM_RELOC_HALF_SECTDIFF || previous_arm_half == true)) {
     if ((r_length & 0x1) == 0)
       outs() << "lo/";
     else
@@ -573,9 +873,8 @@ static void PrintRelocationEntries(const MachOObjectFile *O,
       if (verbose) {
         // scattered: address
         if ((cputype == MachO::CPU_TYPE_I386 &&
-             r_type == llvm::MachO::GENERIC_RELOC_PAIR) ||
-            (cputype == MachO::CPU_TYPE_ARM &&
-             r_type == llvm::MachO::ARM_RELOC_PAIR))
+             r_type == MachO::GENERIC_RELOC_PAIR) ||
+            (cputype == MachO::CPU_TYPE_ARM && r_type == MachO::ARM_RELOC_PAIR))
           outs() << "         ";
         else
           outs() << format("%08x ", (unsigned int)r_address);
@@ -597,29 +896,27 @@ static void PrintRelocationEntries(const MachOObjectFile *O,
         outs() << format("True      0x%08x", (unsigned int)r_value);
         if (previous_sectdiff == false) {
           if ((cputype == MachO::CPU_TYPE_ARM &&
-               r_type == llvm::MachO::ARM_RELOC_PAIR))
+               r_type == MachO::ARM_RELOC_PAIR))
             outs() << format(" half = 0x%04x ", (unsigned int)r_address);
-        }
-        else if (cputype == MachO::CPU_TYPE_ARM &&
-                 sectdiff_r_type == llvm::MachO::ARM_RELOC_HALF_SECTDIFF)
+        } else if (cputype == MachO::CPU_TYPE_ARM &&
+                   sectdiff_r_type == MachO::ARM_RELOC_HALF_SECTDIFF)
           outs() << format(" other_half = 0x%04x ", (unsigned int)r_address);
         if ((cputype == MachO::CPU_TYPE_I386 &&
-             (r_type == llvm::MachO::GENERIC_RELOC_SECTDIFF ||
-              r_type == llvm::MachO::GENERIC_RELOC_LOCAL_SECTDIFF)) ||
+             (r_type == MachO::GENERIC_RELOC_SECTDIFF ||
+              r_type == MachO::GENERIC_RELOC_LOCAL_SECTDIFF)) ||
             (cputype == MachO::CPU_TYPE_ARM &&
-             (sectdiff_r_type == llvm::MachO::ARM_RELOC_SECTDIFF ||
-              sectdiff_r_type == llvm::MachO::ARM_RELOC_LOCAL_SECTDIFF ||
-              sectdiff_r_type == llvm::MachO::ARM_RELOC_HALF_SECTDIFF))) {
-               previous_sectdiff = true;
-               sectdiff_r_type = r_type;
-             }
-        else {
+             (sectdiff_r_type == MachO::ARM_RELOC_SECTDIFF ||
+              sectdiff_r_type == MachO::ARM_RELOC_LOCAL_SECTDIFF ||
+              sectdiff_r_type == MachO::ARM_RELOC_HALF_SECTDIFF))) {
+          previous_sectdiff = true;
+          sectdiff_r_type = r_type;
+        } else {
           previous_sectdiff = false;
           sectdiff_r_type = 0;
         }
         if (cputype == MachO::CPU_TYPE_ARM &&
-            (r_type == llvm::MachO::ARM_RELOC_HALF ||
-             r_type == llvm::MachO::ARM_RELOC_HALF_SECTDIFF))
+            (r_type == MachO::ARM_RELOC_HALF ||
+             r_type == MachO::ARM_RELOC_HALF_SECTDIFF))
           previous_arm_half = true;
         else
           previous_arm_half = false;
@@ -635,8 +932,7 @@ static void PrintRelocationEntries(const MachOObjectFile *O,
     else {
       if (verbose) {
         // plain: address
-        if (cputype == MachO::CPU_TYPE_ARM &&
-            r_type == llvm::MachO::ARM_RELOC_PAIR)
+        if (cputype == MachO::CPU_TYPE_ARM && r_type == MachO::ARM_RELOC_PAIR)
           outs() << "         ";
         else
           outs() << format("%08x ", (unsigned int)r_address);
@@ -678,28 +974,27 @@ static void PrintRelocationEntries(const MachOObjectFile *O,
           outs() << "False     ";
 
           // plain: symbolnum/value
-          if (cputype == MachO::CPU_TYPE_ARM &&
-                   r_type == llvm::MachO::ARM_RELOC_PAIR)
+          if (cputype == MachO::CPU_TYPE_ARM && r_type == MachO::ARM_RELOC_PAIR)
             outs() << format("other_half = 0x%04x\n", (unsigned int)r_address);
-          else if (cputype == MachO::CPU_TYPE_ARM64 &&
-                   r_type == llvm::MachO::ARM64_RELOC_ADDEND)
+          else if ((cputype == MachO::CPU_TYPE_ARM64 ||
+                    cputype == MachO::CPU_TYPE_ARM64_32) &&
+                   r_type == MachO::ARM64_RELOC_ADDEND)
             outs() << format("addend = 0x%06x\n", (unsigned int)r_symbolnum);
           else {
             outs() << format("%d ", r_symbolnum);
-            if (r_symbolnum == llvm::MachO::R_ABS)
+            if (r_symbolnum == MachO::R_ABS)
               outs() << "R_ABS\n";
             else {
               // in this case, r_symbolnum is actually a 1-based section number
               uint32_t nsects = O->section_end()->getRawDataRefImpl().d.a;
               if (r_symbolnum > 0 && r_symbolnum <= nsects) {
-                llvm::object::DataRefImpl DRI;
+                object::DataRefImpl DRI;
                 DRI.d.a = r_symbolnum-1;
                 StringRef SegName = O->getSectionFinalSegmentName(DRI);
-                StringRef SectName;
-                if (O->getSectionName(DRI, SectName))
-                  outs() << "(?,?)\n";
+                if (Expected<StringRef> NameOrErr = O->getSectionName(DRI))
+                  outs() << "(" << SegName << "," << *NameOrErr << ")\n";
                 else
-                  outs() << "(" << SegName << "," << SectName << ")\n";
+                  outs() << "(?,?)\n";
               }
               else {
                 outs() << "(?,?)\n";
@@ -708,8 +1003,8 @@ static void PrintRelocationEntries(const MachOObjectFile *O,
           }
         }
         if (cputype == MachO::CPU_TYPE_ARM &&
-            (r_type == llvm::MachO::ARM_RELOC_HALF ||
-             r_type == llvm::MachO::ARM_RELOC_HALF_SECTDIFF))
+            (r_type == MachO::ARM_RELOC_HALF ||
+             r_type == MachO::ARM_RELOC_HALF_SECTDIFF))
           previous_arm_half = true;
         else
           previous_arm_half = false;
@@ -752,13 +1047,12 @@ static void PrintRelocations(const MachOObjectFile *O, const bool verbose) {
           DataRefImpl DRI;
           DRI.d.a = J;
           const StringRef SegName = O->getSectionFinalSegmentName(DRI);
-          StringRef SectName;
-          if (O->getSectionName(DRI, SectName))
+          if (Expected<StringRef> NameOrErr = O->getSectionName(DRI))
+            outs() << "Relocation information (" << SegName << "," << *NameOrErr
+                   << format(") %u entries", Sec.nreloc);
+          else
             outs() << "Relocation information (" << SegName << ",?) "
                    << format("%u entries", Sec.nreloc);
-          else
-            outs() << "Relocation information (" << SegName << ","
-                   << SectName << format(") %u entries", Sec.nreloc);
           outs() << "\naddress  pcrel length extern type    scattered "
                     "symbolnum/value\n";
           PrintRelocationEntries(O, O->section_rel_begin(DRI),
@@ -773,13 +1067,12 @@ static void PrintRelocations(const MachOObjectFile *O, const bool verbose) {
           DataRefImpl DRI;
           DRI.d.a = J;
           const StringRef SegName = O->getSectionFinalSegmentName(DRI);
-          StringRef SectName;
-          if (O->getSectionName(DRI, SectName))
+          if (Expected<StringRef> NameOrErr = O->getSectionName(DRI))
+            outs() << "Relocation information (" << SegName << "," << *NameOrErr
+                   << format(") %u entries", Sec.nreloc);
+          else
             outs() << "Relocation information (" << SegName << ",?) "
                    << format("%u entries", Sec.nreloc);
-          else
-            outs() << "Relocation information (" << SegName << ","
-                   << SectName << format(") %u entries", Sec.nreloc);
           outs() << "\naddress  pcrel length extern type    scattered "
                     "symbolnum/value\n";
           PrintRelocationEntries(O, O->section_rel_begin(DRI),
@@ -913,7 +1206,16 @@ static void PrintDylibs(MachOObjectFile *O, bool JustId) {
           outs() << " current version "
                  << ((dl.dylib.current_version >> 16) & 0xffff) << "."
                  << ((dl.dylib.current_version >> 8) & 0xff) << "."
-                 << (dl.dylib.current_version & 0xff) << ")\n";
+                 << (dl.dylib.current_version & 0xff);
+          if (Load.C.cmd == MachO::LC_LOAD_WEAK_DYLIB)
+            outs() << ", weak";
+          if (Load.C.cmd == MachO::LC_REEXPORT_DYLIB)
+            outs() << ", reexport";
+          if (Load.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB)
+            outs() << ", upward";
+          if (Load.C.cmd == MachO::LC_LAZY_LOAD_DYLIB)
+            outs() << ", lazy";
+          outs() << ")\n";
         }
       } else {
         outs() << "\tBad offset (" << dl.dylib.name << ") for name of ";
@@ -942,18 +1244,13 @@ typedef DenseMap<uint64_t, StringRef> SymbolAddressMap;
 static void CreateSymbolAddressMap(MachOObjectFile *O,
                                    SymbolAddressMap *AddrMap) {
   // Create a map of symbol addresses to symbol names.
+  const StringRef FileName = O->getFileName();
   for (const SymbolRef &Symbol : O->symbols()) {
-    Expected<SymbolRef::Type> STOrErr = Symbol.getType();
-    if (!STOrErr)
-      report_error(O->getFileName(), STOrErr.takeError());
-    SymbolRef::Type ST = *STOrErr;
+    SymbolRef::Type ST = unwrapOrError(Symbol.getType(), FileName);
     if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data ||
         ST == SymbolRef::ST_Other) {
       uint64_t Address = Symbol.getValue();
-      Expected<StringRef> SymNameOrErr = Symbol.getName();
-      if (!SymNameOrErr)
-        report_error(O->getFileName(), SymNameOrErr.takeError());
-      StringRef SymName = *SymNameOrErr;
+      StringRef SymName = unwrapOrError(Symbol.getName(), FileName);
       if (!SymName.startswith(".objc"))
         (*AddrMap)[Address] = SymName;
     }
@@ -1186,10 +1483,8 @@ static void DumpLiteralPointerSection(MachOObjectFile *O,
     });
     if (Reloc != Relocs.end()) {
       symbol_iterator RelocSym = Reloc->second;
-      Expected<StringRef> SymName = RelocSym->getName();
-      if (!SymName)
-        report_error(O->getFileName(), SymName.takeError());
-      outs() << "external relocation entry for symbol:" << *SymName << "\n";
+      StringRef SymName = unwrapOrError(RelocSym->getName(), O->getFileName());
+      outs() << "external relocation entry for symbol:" << SymName << "\n";
       continue;
     }
 
@@ -1220,8 +1515,8 @@ static void DumpLiteralPointerSection(MachOObjectFile *O,
       section_type = Sec.flags & MachO::SECTION_TYPE;
     }
 
-    StringRef BytesStr;
-    Sect->getContents(BytesStr);
+    StringRef BytesStr = unwrapOrError(Sect->getContents(), O->getFileName());
+
     const char *Contents = reinterpret_cast<const char *>(BytesStr.data());
 
     switch (section_type) {
@@ -1333,10 +1628,7 @@ static void DumpInitTermPointerSection(MachOObjectFile *O,
       });
       if (Reloc != Relocs.end()) {
         symbol_iterator RelocSym = Reloc->second;
-        Expected<StringRef> SymName = RelocSym->getName();
-        if (!SymName)
-          report_error(O->getFileName(), SymName.takeError());
-        outs() << " " << *SymName;
+        outs() << " " << unwrapOrError(RelocSym->getName(), O->getFileName());
       } else {
         SymbolName = GuessSymbolName(p, AddrMap);
         if (SymbolName)
@@ -1438,8 +1730,8 @@ static void DumpSectionContents(StringRef Filename, MachOObjectFile *O,
         }
         uint32_t section_type = section_flags & MachO::SECTION_TYPE;
 
-        StringRef BytesStr;
-        Section.getContents(BytesStr);
+        StringRef BytesStr =
+            unwrapOrError(Section.getContents(), O->getFileName());
         const char *sect = reinterpret_cast<const char *>(BytesStr.data());
         uint32_t sect_size = BytesStr.size();
         uint64_t sect_addr = Section.getAddress();
@@ -1523,8 +1815,8 @@ static void DumpInfoPlistSectionContents(StringRef Filename,
     if (SegName == "__TEXT" && SectName == "__info_plist") {
       if (!NoLeadingHeaders)
         outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
-      StringRef BytesStr;
-      Section.getContents(BytesStr);
+      StringRef BytesStr =
+          unwrapOrError(Section.getContents(), O->getFileName());
       const char *sect = reinterpret_cast<const char *>(BytesStr.data());
       outs() << format("%.*s", BytesStr.size(), sect) << "\n";
       return;
@@ -1609,8 +1901,8 @@ static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF,
   // the error message.
   if (Disassemble || IndirectSymbols || !FilterSections.empty() || UnwindInfo)
     if (Error Err = MachOOF->checkSymbolTable())
-      report_error(ArchiveName, FileName, std::move(Err), ArchitectureName);
-  
+      report_error(std::move(Err), ArchiveName, FileName, ArchitectureName);
+
   if (DisassembleAll) {
     for (const SectionRef &Section : MachOOF->sections()) {
       StringRef SectName;
@@ -1774,6 +2066,21 @@ static void printCPUType(uint32_t cputype, uint32_t cpusubtype) {
       outs() << "    cputype CPU_TYPE_ARM64\n";
       outs() << "    cpusubtype CPU_SUBTYPE_ARM64_ALL\n";
       break;
+    case MachO::CPU_SUBTYPE_ARM64E:
+      outs() << "    cputype CPU_TYPE_ARM64\n";
+      outs() << "    cpusubtype CPU_SUBTYPE_ARM64E\n";
+      break;
+    default:
+      printUnknownCPUType(cputype, cpusubtype);
+      break;
+    }
+    break;
+  case MachO::CPU_TYPE_ARM64_32:
+    switch (cpusubtype & ~MachO::CPU_SUBTYPE_MASK) {
+    case MachO::CPU_SUBTYPE_ARM64_32_V8:
+      outs() << "    cputype CPU_TYPE_ARM64_32\n";
+      outs() << "    cpusubtype CPU_SUBTYPE_ARM64_32_V8\n";
+      break;
     default:
       printUnknownCPUType(cputype, cpusubtype);
       break;
@@ -1862,10 +2169,8 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C,
                               StringRef ArchitectureName = StringRef()) {
   if (print_offset)
     outs() << C.getChildOffset() << "\t";
-  Expected<sys::fs::perms> ModeOrErr = C.getAccessMode();
-  if (!ModeOrErr)
-    report_error(Filename, C, ModeOrErr.takeError(), ArchitectureName);
-  sys::fs::perms Mode = ModeOrErr.get();
+  sys::fs::perms Mode =
+      unwrapOrError(C.getAccessMode(), Filename, C, ArchitectureName);
   if (verbose) {
     // FIXME: this first dash, "-", is for (Mode & S_IFMT) == S_IFREG.
     // But there is nothing in sys::fs::perms for S_IFMT or S_IFREG.
@@ -1883,20 +2188,11 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C,
     outs() << format("0%o ", Mode);
   }
 
-  Expected<unsigned> UIDOrErr = C.getUID();
-  if (!UIDOrErr)
-    report_error(Filename, C, UIDOrErr.takeError(), ArchitectureName);
-  unsigned UID = UIDOrErr.get();
-  outs() << format("%3d/", UID);
-  Expected<unsigned> GIDOrErr = C.getGID();
-  if (!GIDOrErr)
-    report_error(Filename, C, GIDOrErr.takeError(), ArchitectureName);
-  unsigned GID = GIDOrErr.get();
-  outs() << format("%-3d ", GID);
-  Expected<uint64_t> Size = C.getRawSize();
-  if (!Size)
-    report_error(Filename, C, Size.takeError(), ArchitectureName);
-  outs() << format("%5" PRId64, Size.get()) << " ";
+  outs() << format(
+      "%3d/%-3d %5" PRId64 " ",
+      unwrapOrError(C.getUID(), Filename, C, ArchitectureName),
+      unwrapOrError(C.getGID(), Filename, C, ArchitectureName),
+      unwrapOrError(C.getRawSize(), Filename, C, ArchitectureName));
 
   StringRef RawLastModified = C.getRawLastModified();
   if (verbose) {
@@ -1919,21 +2215,15 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C,
     Expected<StringRef> NameOrErr = C.getName();
     if (!NameOrErr) {
       consumeError(NameOrErr.takeError());
-      Expected<StringRef> NameOrErr = C.getRawName();
-      if (!NameOrErr)
-        report_error(Filename, C, NameOrErr.takeError(), ArchitectureName);
-      StringRef RawName = NameOrErr.get();
-      outs() << RawName << "\n";
+      outs() << unwrapOrError(C.getRawName(), Filename, C, ArchitectureName)
+             << "\n";
     } else {
       StringRef Name = NameOrErr.get();
       outs() << Name << "\n";
     }
   } else {
-    Expected<StringRef> NameOrErr = C.getRawName();
-    if (!NameOrErr)
-      report_error(Filename, C, NameOrErr.takeError(), ArchitectureName);
-    StringRef RawName = NameOrErr.get();
-    outs() << RawName << "\n";
+    outs() << unwrapOrError(C.getRawName(), Filename, C, ArchitectureName)
+           << "\n";
   }
 }
 
@@ -1941,12 +2231,11 @@ static void printArchiveHeaders(StringRef Filename, Archive *A, bool verbose,
                                 bool print_offset,
                                 StringRef ArchitectureName = StringRef()) {
   Error Err = Error::success();
-  ;
   for (const auto &C : A->children(Err, false))
     printArchiveChild(Filename, C, verbose, print_offset, ArchitectureName);
 
   if (Err)
-    report_error(StringRef(), Filename, std::move(Err), ArchitectureName);
+    report_error(std::move(Err), StringRef(), Filename, ArchitectureName);
 }
 
 static bool ValidateArchFlags() {
@@ -1970,15 +2259,15 @@ static bool ValidateArchFlags() {
 // -arch flags selecting just those slices as specified by them and also parses
 // archive files.  Then for each individual Mach-O file ProcessMachO() is
 // called to process the file based on the command line options.
-void llvm::parseInputMachO(StringRef Filename) {
+void parseInputMachO(StringRef Filename) {
   if (!ValidateArchFlags())
     return;
 
   // Attempt to open the binary.
   Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Filename);
   if (!BinaryOrErr) {
-    if (auto E = isNotObjectErrorInvalidFileType(BinaryOrErr.takeError()))
-      report_error(Filename, std::move(E));
+    if (Error E = isNotObjectErrorInvalidFileType(BinaryOrErr.takeError()))
+      report_error(std::move(E), Filename);
     else
       outs() << Filename << ": is not an object file\n";
     return;
@@ -1994,8 +2283,8 @@ void llvm::parseInputMachO(StringRef Filename) {
     for (auto &C : A->children(Err)) {
       Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
       if (!ChildOrErr) {
-        if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
-          report_error(Filename, C, std::move(E));
+        if (Error E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
+          report_error(std::move(E), Filename, C);
         continue;
       }
       if (MachOObjectFile *O = dyn_cast<MachOObjectFile>(&*ChildOrErr.get())) {
@@ -2005,7 +2294,7 @@ void llvm::parseInputMachO(StringRef Filename) {
       }
     }
     if (Err)
-      report_error(Filename, std::move(Err));
+      report_error(std::move(Err), Filename);
     return;
   }
   if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Bin)) {
@@ -2026,7 +2315,7 @@ void llvm::parseInputMachO(StringRef Filename) {
   llvm_unreachable("Input object can't be invalid at this point");
 }
 
-void llvm::parseInputMachO(MachOUniversalBinary *UB) {
+void parseInputMachO(MachOUniversalBinary *UB) {
   if (!ValidateArchFlags())
     return;
 
@@ -2055,13 +2344,12 @@ void llvm::parseInputMachO(MachOUniversalBinary *UB) {
             ObjectFile &O = *ObjOrErr.get();
             if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&O))
               ProcessMachO(Filename, MachOOF, "", ArchitectureName);
-          } else if (auto E = isNotObjectErrorInvalidFileType(
-                      ObjOrErr.takeError())) {
-            report_error(Filename, StringRef(), std::move(E),
-                          ArchitectureName);
+          } else if (Error E = isNotObjectErrorInvalidFileType(
+                         ObjOrErr.takeError())) {
+            report_error(std::move(E), Filename, StringRef(), ArchitectureName);
             continue;
           } else if (Expected<std::unique_ptr<Archive>> AOrErr =
-                          I->getAsArchive()) {
+                         I->getAsArchive()) {
             std::unique_ptr<Archive> &A = *AOrErr;
             outs() << "Archive : " << Filename;
             if (!ArchitectureName.empty())
@@ -2074,8 +2362,8 @@ void llvm::parseInputMachO(MachOUniversalBinary *UB) {
             for (auto &C : A->children(Err)) {
               Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
               if (!ChildOrErr) {
-                if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
-                  report_error(Filename, C, std::move(E), ArchitectureName);
+                if (Error E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
+                  report_error(std::move(E), Filename, C, ArchitectureName);
                 continue;
               }
               if (MachOObjectFile *O =
@@ -2083,7 +2371,7 @@ void llvm::parseInputMachO(MachOUniversalBinary *UB) {
                 ProcessMachO(Filename, O, O->getFileName(), ArchitectureName);
             }
             if (Err)
-              report_error(Filename, std::move(Err));
+              report_error(std::move(Err), Filename);
           } else {
             consumeError(AOrErr.takeError());
             error("Mach-O universal file: " + Filename + " for " +
@@ -2116,11 +2404,11 @@ void llvm::parseInputMachO(MachOUniversalBinary *UB) {
           ObjectFile &O = *ObjOrErr.get();
           if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&O))
             ProcessMachO(Filename, MachOOF);
-        } else if (auto E = isNotObjectErrorInvalidFileType(
-                    ObjOrErr.takeError())) {
-          report_error(Filename, std::move(E));
+        } else if (Error E =
+                       isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) {
+          report_error(std::move(E), Filename);
         } else if (Expected<std::unique_ptr<Archive>> AOrErr =
-                        I->getAsArchive()) {
+                       I->getAsArchive()) {
           std::unique_ptr<Archive> &A = *AOrErr;
           outs() << "Archive : " << Filename << "\n";
           if (ArchiveHeaders)
@@ -2130,8 +2418,9 @@ void llvm::parseInputMachO(MachOUniversalBinary *UB) {
           for (auto &C : A->children(Err)) {
             Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
             if (!ChildOrErr) {
-              if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
-                report_error(Filename, C, std::move(E));
+              if (Error E =
+                      isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
+                report_error(std::move(E), Filename, C);
               continue;
             }
             if (MachOObjectFile *O =
@@ -2139,7 +2428,7 @@ void llvm::parseInputMachO(MachOUniversalBinary *UB) {
               ProcessMachO(Filename, O, O->getFileName());
           }
           if (Err)
-            report_error(Filename, std::move(Err));
+            report_error(std::move(Err), Filename);
         } else {
           consumeError(AOrErr.takeError());
           error("Mach-O universal file: " + Filename + " for architecture " +
@@ -2164,11 +2453,10 @@ void llvm::parseInputMachO(MachOUniversalBinary *UB) {
       ObjectFile &Obj = *ObjOrErr.get();
       if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&Obj))
         ProcessMachO(Filename, MachOOF, "", ArchitectureName);
-    } else if (auto E = isNotObjectErrorInvalidFileType(
-                ObjOrErr.takeError())) {
-      report_error(StringRef(), Filename, std::move(E), ArchitectureName);
-    } else if (Expected<std::unique_ptr<Archive>> AOrErr =
-                  I->getAsArchive()) {
+    } else if (Error E =
+                   isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) {
+      report_error(std::move(E), StringRef(), Filename, ArchitectureName);
+    } else if (Expected<std::unique_ptr<Archive>> AOrErr = I->getAsArchive()) {
       std::unique_ptr<Archive> &A = *AOrErr;
       outs() << "Archive : " << Filename;
       if (!ArchitectureName.empty())
@@ -2181,8 +2469,8 @@ void llvm::parseInputMachO(MachOUniversalBinary *UB) {
       for (auto &C : A->children(Err)) {
         Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
         if (!ChildOrErr) {
-          if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
-            report_error(Filename, C, std::move(E), ArchitectureName);
+          if (Error E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
+            report_error(std::move(E), Filename, C, ArchitectureName);
           continue;
         }
         if (MachOObjectFile *O =
@@ -2193,7 +2481,7 @@ void llvm::parseInputMachO(MachOUniversalBinary *UB) {
         }
       }
       if (Err)
-        report_error(Filename, std::move(Err));
+        report_error(std::move(Err), Filename);
     } else {
       consumeError(AOrErr.takeError());
       error("Mach-O universal file: " + Filename + " for architecture " +
@@ -2308,12 +2596,9 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
       }
     }
     if (reloc_found && isExtern) {
-      Expected<StringRef> SymName = Symbol.getName();
-      if (!SymName)
-        report_error(info->O->getFileName(), SymName.takeError());
-      const char *name = SymName->data();
       op_info->AddSymbol.Present = 1;
-      op_info->AddSymbol.Name = name;
+      op_info->AddSymbol.Name =
+          unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
       // For i386 extern relocation entries the value in the instruction is
       // the offset from the symbol, and value is already set in op_info->Value.
       return 1;
@@ -2372,10 +2657,8 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
         // is the offset from the external symbol.
         if (info->O->getAnyRelocationPCRel(RE))
           op_info->Value -= Pc + Offset + Size;
-        Expected<StringRef> SymName = Symbol.getName();
-        if (!SymName)
-          report_error(info->O->getFileName(), SymName.takeError());
-        const char *name = SymName->data();
+        const char *name =
+            unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
         op_info->AddSymbol.Present = 1;
         op_info->AddSymbol.Name = name;
         return 1;
@@ -2412,10 +2695,8 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
       // is the offset from the external symbol.
       if (info->O->getAnyRelocationPCRel(RE))
         op_info->Value -= Pc + Offset + Size;
-      Expected<StringRef> SymName = Symbol.getName();
-      if (!SymName)
-        report_error(info->O->getFileName(), SymName.takeError());
-      const char *name = SymName->data();
+      const char *name =
+          unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
       unsigned Type = info->O->getAnyRelocationType(RE);
       if (Type == MachO::X86_64_RELOC_SUBTRACTOR) {
         DataRefImpl RelNext = Rel;
@@ -2429,10 +2710,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
           op_info->SubtractSymbol.Name = name;
           symbol_iterator RelocSymNext = info->O->getSymbolByIndex(SymbolNum);
           Symbol = *RelocSymNext;
-          Expected<StringRef> SymNameNext = Symbol.getName();
-          if (!SymNameNext)
-            report_error(info->O->getFileName(), SymNameNext.takeError());
-          name = SymNameNext->data();
+          name = unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
         }
       }
       // TODO: add the VariantKinds to op_info->VariantKind for relocation types
@@ -2501,10 +2779,8 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
     }
 
     if (isExtern) {
-      Expected<StringRef> SymName = Symbol.getName();
-      if (!SymName)
-        report_error(info->O->getFileName(), SymName.takeError());
-      const char *name = SymName->data();
+      const char *name =
+          unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
       op_info->AddSymbol.Present = 1;
       op_info->AddSymbol.Name = name;
       switch (r_type) {
@@ -2620,10 +2896,9 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
     // NOTE: Scattered relocations don't exist on arm64.
     if (!info->O->getPlainRelocationExternal(RE))
       return 0;
-    Expected<StringRef> SymName = Reloc->getSymbol()->getName();
-    if (!SymName)
-      report_error(info->O->getFileName(), SymName.takeError());
-    const char *name = SymName->data();
+    const char *name =
+        unwrapOrError(Reloc->getSymbol()->getName(), info->O->getFileName())
+            .data();
     op_info->AddSymbol.Present = 1;
     op_info->AddSymbol.Name = name;
 
@@ -2749,12 +3024,8 @@ static const char *GuessIndirectSymbol(uint64_t ReferenceValue,
                 info->O->getIndirectSymbolTableEntry(Dysymtab, index);
             if (indirect_symbol < Symtab.nsyms) {
               symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol);
-              SymbolRef Symbol = *Sym;
-              Expected<StringRef> SymName = Symbol.getName();
-              if (!SymName)
-                report_error(info->O->getFileName(), SymName.takeError());
-              const char *name = SymName->data();
-              return name;
+              return unwrapOrError(Sym->getName(), info->O->getFileName())
+                  .data();
             }
           }
         }
@@ -2784,12 +3055,8 @@ static const char *GuessIndirectSymbol(uint64_t ReferenceValue,
                 info->O->getIndirectSymbolTableEntry(Dysymtab, index);
             if (indirect_symbol < Symtab.nsyms) {
               symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol);
-              SymbolRef Symbol = *Sym;
-              Expected<StringRef> SymName = Symbol.getName();
-              if (!SymName)
-                report_error(info->O->getFileName(), SymName.takeError());
-              const char *name = SymName->data();
-              return name;
+              return unwrapOrError(Sym->getName(), info->O->getFileName())
+                  .data();
             }
           }
         }
@@ -2960,8 +3227,8 @@ static const char *get_pointer_64(uint64_t Address, uint32_t &offset,
       S = (*(info->Sections))[SectIdx];
       offset = Address - SectAddress;
       left = SectSize - offset;
-      StringRef SectContents;
-      ((*(info->Sections))[SectIdx]).getContents(SectContents);
+      StringRef SectContents = unwrapOrError(
+          ((*(info->Sections))[SectIdx]).getContents(), info->O->getFileName());
       return SectContents.data() + offset;
     }
   }
@@ -3015,10 +3282,7 @@ static const char *get_symbol_64(uint32_t sect_offset, SectionRef S,
   const char *SymbolName = nullptr;
   if (reloc_found && isExtern) {
     n_value = Symbol.getValue();
-    Expected<StringRef> NameOrError = Symbol.getName();
-    if (!NameOrError)
-      report_error(info->O->getFileName(), NameOrError.takeError());
-    StringRef Name = *NameOrError;
+    StringRef Name = unwrapOrError(Symbol.getName(), info->O->getFileName());
     if (!Name.empty()) {
       SymbolName = Name.data();
       return SymbolName;
@@ -3767,8 +4031,7 @@ walk_pointer_list_64(const char *listname, const SectionRef S,
   StringRef SegName = O->getSectionFinalSegmentName(Ref);
   outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
 
-  StringRef BytesStr;
-  S.getContents(BytesStr);
+  StringRef BytesStr = unwrapOrError(S.getContents(), O->getFileName());
   const char *Contents = reinterpret_cast<const char *>(BytesStr.data());
 
   for (uint32_t i = 0; i < S.getSize(); i += sizeof(uint64_t)) {
@@ -3818,8 +4081,7 @@ walk_pointer_list_32(const char *listname, const SectionRef S,
   StringRef SegName = O->getSectionFinalSegmentName(Ref);
   outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
 
-  StringRef BytesStr;
-  S.getContents(BytesStr);
+  StringRef BytesStr = unwrapOrError(S.getContents(), O->getFileName());
   const char *Contents = reinterpret_cast<const char *>(BytesStr.data());
 
   for (uint32_t i = 0; i < S.getSize(); i += sizeof(uint32_t)) {
@@ -6970,32 +7232,78 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
   raw_ostream &DebugOut = nulls();
 #endif
 
+  // Try to find debug info and set up the DIContext for it.
   std::unique_ptr<DIContext> diContext;
-  ObjectFile *DbgObj = MachOOF;
+  std::unique_ptr<Binary> DSYMBinary;
   std::unique_ptr<MemoryBuffer> DSYMBuf;
-  // Try to find debug info and set up the DIContext for it.
   if (UseDbg) {
+    ObjectFile *DbgObj = MachOOF;
+
     // A separate DSym file path was specified, parse it as a macho file,
     // get the sections and supply it to the section name parsing machinery.
     if (!DSYMFile.empty()) {
       ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
           MemoryBuffer::getFileOrSTDIN(DSYMFile);
       if (std::error_code EC = BufOrErr.getError()) {
-        report_error(DSYMFile, errorCodeToError(EC));
+        report_error(errorCodeToError(EC), DSYMFile);
         return;
       }
 
-      Expected<std::unique_ptr<MachOObjectFile>> DbgObjCheck =
-          ObjectFile::createMachOObjectFile(BufOrErr.get()->getMemBufferRef());
+      // We need to keep the file alive, because we're replacing DbgObj with it.
+      DSYMBuf = std::move(BufOrErr.get());
 
-      if (Error E = DbgObjCheck.takeError()) {
-        report_error(DSYMFile, std::move(E));
+      Expected<std::unique_ptr<Binary>> BinaryOrErr =
+      createBinary(DSYMBuf.get()->getMemBufferRef());
+      if (!BinaryOrErr) {
+        report_error(BinaryOrErr.takeError(), DSYMFile);
         return;
       }
 
-      DbgObj = DbgObjCheck.get().release();
-      // We need to keep the file alive, because we're replacing DbgObj with it.
-      DSYMBuf = std::move(BufOrErr.get());
+      // We need to keep the Binary elive with the buffer
+      DSYMBinary = std::move(BinaryOrErr.get());
+    
+      if (ObjectFile *O = dyn_cast<ObjectFile>(DSYMBinary.get())) {
+        // this is a Mach-O object file, use it
+        if (MachOObjectFile *MachDSYM = dyn_cast<MachOObjectFile>(&*O)) {
+          DbgObj = MachDSYM;
+        }
+        else {
+          WithColor::error(errs(), "llvm-objdump")
+            << DSYMFile << " is not a Mach-O file type.\n";
+          return;
+        }
+      }
+      else if (auto UB = dyn_cast<MachOUniversalBinary>(DSYMBinary.get())){
+        // this is a Universal Binary, find a Mach-O for this architecture
+        uint32_t CPUType, CPUSubType;
+        const char *ArchFlag;
+        if (MachOOF->is64Bit()) {
+          const MachO::mach_header_64 H_64 = MachOOF->getHeader64();
+          CPUType = H_64.cputype;
+          CPUSubType = H_64.cpusubtype;
+        } else {
+          const MachO::mach_header H = MachOOF->getHeader();
+          CPUType = H.cputype;
+          CPUSubType = H.cpusubtype;
+        }
+        Triple T = MachOObjectFile::getArchTriple(CPUType, CPUSubType, nullptr,
+                                                  &ArchFlag);
+        Expected<std::unique_ptr<MachOObjectFile>> MachDSYM =
+            UB->getObjectForArch(ArchFlag);
+        if (!MachDSYM) {
+          report_error(MachDSYM.takeError(), DSYMFile);
+          return;
+        }
+    
+        // We need to keep the Binary elive with the buffer
+        DbgObj = &*MachDSYM.get();
+        DSYMBinary = std::move(*MachDSYM);
+      }
+      else {
+        WithColor::error(errs(), "llvm-objdump")
+          << DSYMFile << " is not a Mach-O or Universal file type.\n";
+        return;
+      }
     }
 
     // Setup the DIContext
@@ -7016,10 +7324,9 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
     if (SegmentName != DisSegName)
       continue;
 
-    StringRef BytesStr;
-    Sections[SectIdx].getContents(BytesStr);
-    ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(BytesStr.data()),
-                            BytesStr.size());
+    StringRef BytesStr =
+        unwrapOrError(Sections[SectIdx].getContents(), Filename);
+    ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(BytesStr);
     uint64_t SectAddress = Sections[SectIdx].getAddress();
 
     bool symbolTableWorked = false;
@@ -7029,17 +7336,13 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
     SymbolAddressMap AddrMap;
     bool DisSymNameFound = false;
     for (const SymbolRef &Symbol : MachOOF->symbols()) {
-      Expected<SymbolRef::Type> STOrErr = Symbol.getType();
-      if (!STOrErr)
-        report_error(MachOOF->getFileName(), STOrErr.takeError());
-      SymbolRef::Type ST = *STOrErr;
+      SymbolRef::Type ST =
+          unwrapOrError(Symbol.getType(), MachOOF->getFileName());
       if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data ||
           ST == SymbolRef::ST_Other) {
         uint64_t Address = Symbol.getValue();
-        Expected<StringRef> SymNameOrErr = Symbol.getName();
-        if (!SymNameOrErr)
-          report_error(MachOOF->getFileName(), SymNameOrErr.takeError());
-        StringRef SymName = *SymNameOrErr;
+        StringRef SymName =
+            unwrapOrError(Symbol.getName(), MachOOF->getFileName());
         AddrMap[Address] = SymName;
         if (!DisSymName.empty() && DisSymName == SymName)
           DisSymNameFound = true;
@@ -7076,15 +7379,10 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
 
     // Disassemble symbol by symbol.
     for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) {
-      Expected<StringRef> SymNameOrErr = Symbols[SymIdx].getName();
-      if (!SymNameOrErr)
-        report_error(MachOOF->getFileName(), SymNameOrErr.takeError());
-      StringRef SymName = *SymNameOrErr;
-
-      Expected<SymbolRef::Type> STOrErr = Symbols[SymIdx].getType();
-      if (!STOrErr)
-        report_error(MachOOF->getFileName(), STOrErr.takeError());
-      SymbolRef::Type ST = *STOrErr;
+      StringRef SymName =
+          unwrapOrError(Symbols[SymIdx].getName(), MachOOF->getFileName());
+      SymbolRef::Type ST =
+          unwrapOrError(Symbols[SymIdx].getType(), MachOOF->getFileName());
       if (ST != SymbolRef::ST_Function && ST != SymbolRef::ST_Data)
         continue;
 
@@ -7137,10 +7435,8 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
       uint64_t NextSym = 0;
       uint64_t NextSymIdx = SymIdx + 1;
       while (Symbols.size() > NextSymIdx) {
-        Expected<SymbolRef::Type> STOrErr = Symbols[NextSymIdx].getType();
-        if (!STOrErr)
-          report_error(MachOOF->getFileName(), STOrErr.takeError());
-        SymbolRef::Type NextSymType = *STOrErr;
+        SymbolRef::Type NextSymType = unwrapOrError(
+            Symbols[NextSymIdx].getType(), MachOOF->getFileName());
         if (NextSymType == SymbolRef::ST_Function) {
           containsNextSym =
               Sections[SectIdx].containsSymbol(Symbols[NextSymIdx]);
@@ -7243,7 +7539,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
 
           // Print debug info.
           if (diContext) {
-            DILineInfo dli = diContext->getLineInfoForAddress(PC);
+            DILineInfo dli = diContext->getLineInfoForAddress({PC, SectIdx});
             // Print valid line info if it changed.
             if (dli != lastLine && dli.Line != 0)
               outs() << "\t## " << dli.FileName << ':' << dli.Line << ':'
@@ -7415,10 +7711,7 @@ static void findUnwindRelocNameAddend(const MachOObjectFile *Obj,
                                       const RelocationRef &Reloc, uint64_t Addr,
                                       StringRef &Name, uint64_t &Addend) {
   if (Reloc.getSymbol() != Obj->symbol_end()) {
-    Expected<StringRef> NameOrErr = Reloc.getSymbol()->getName();
-    if (!NameOrErr)
-      report_error(Obj->getFileName(), NameOrErr.takeError());
-    Name = *NameOrErr;
+    Name = unwrapOrError(Reloc.getSymbol()->getName(), Obj->getFileName());
     Addend = Addr;
     return;
   }
@@ -7440,16 +7733,11 @@ static void findUnwindRelocNameAddend(const MachOObjectFile *Obj,
   // Go back one so that SymbolAddress <= Addr.
   --Sym;
 
-  auto SectOrErr = Sym->second.getSection();
-  if (!SectOrErr)
-    report_error(Obj->getFileName(), SectOrErr.takeError());
-  section_iterator SymSection = *SectOrErr;
+  section_iterator SymSection =
+      unwrapOrError(Sym->second.getSection(), Obj->getFileName());
   if (RelocSection == *SymSection) {
     // There's a valid symbol in the same section before this reference.
-    Expected<StringRef> NameOrErr = Sym->second.getName();
-    if (!NameOrErr)
-      report_error(Obj->getFileName(), NameOrErr.takeError());
-    Name = *NameOrErr;
+    Name = unwrapOrError(Sym->second.getName(), Obj->getFileName());
     Addend = Addr - Sym->first;
     return;
   }
@@ -7490,9 +7778,8 @@ printMachOCompactUnwindSection(const MachOObjectFile *Obj,
   uint32_t PointerSize = Is64 ? sizeof(uint64_t) : sizeof(uint32_t);
   uint32_t EntrySize = 3 * PointerSize + 2 * sizeof(uint32_t);
 
-  StringRef Contents;
-  CompactUnwind.getContents(Contents);
-
+  StringRef Contents =
+      unwrapOrError(CompactUnwind.getContents(), Obj->getFileName());
   SmallVector<CompactUnwindEntry, 4> CompactUnwinds;
 
   // First populate the initial raw offsets, encodings and so on from the entry.
@@ -7633,8 +7920,8 @@ static void printMachOUnwindInfoSection(const MachOObjectFile *Obj,
 
   outs() << "Contents of __unwind_info section:\n";
 
-  StringRef Contents;
-  UnwindInfo.getContents(Contents);
+  StringRef Contents =
+      unwrapOrError(UnwindInfo.getContents(), Obj->getFileName());
   ptrdiff_t Pos = 0;
 
   //===----------------------------------
@@ -7801,7 +8088,7 @@ static void printMachOUnwindInfoSection(const MachOObjectFile *Obj,
   }
 }
 
-void llvm::printMachOUnwindInfo(const MachOObjectFile *Obj) {
+void printMachOUnwindInfo(const MachOObjectFile *Obj) {
   std::map<uint64_t, SymbolRef> Symbols;
   for (const SymbolRef &SymRef : Obj->symbols()) {
     // Discard any undefined or absolute symbols. They're not going to take part
@@ -7917,6 +8204,20 @@ static void PrintMachHeader(uint32_t magic, uint32_t cputype,
       case MachO::CPU_SUBTYPE_ARM64_ALL:
         outs() << "        ALL";
         break;
+      case MachO::CPU_SUBTYPE_ARM64E:
+        outs() << "          E";
+        break;
+      default:
+        outs() << format(" %10d", cpusubtype & ~MachO::CPU_SUBTYPE_MASK);
+        break;
+      }
+      break;
+    case MachO::CPU_TYPE_ARM64_32:
+      outs() << " ARM64_32";
+      switch (cpusubtype & ~MachO::CPU_SUBTYPE_MASK) {
+      case MachO::CPU_SUBTYPE_ARM64_32_V8:
+        outs() << "        V8";
+        break;
       default:
         outs() << format(" %10d", cpusubtype & ~MachO::CPU_SUBTYPE_MASK);
         break;
@@ -9485,7 +9786,8 @@ static void PrintThreadCommand(MachO::thread_command t, const char *Ptr,
         begin += count * sizeof(uint32_t);
       }
     }
-  } else if (cputype == MachO::CPU_TYPE_ARM64) {
+  } else if (cputype == MachO::CPU_TYPE_ARM64 ||
+             cputype == MachO::CPU_TYPE_ARM64_32) {
     while (begin < end) {
       if (end - begin > (ptrdiff_t)sizeof(uint32_t)) {
         memcpy((char *)&flavor, begin, sizeof(uint32_t));
@@ -9790,12 +10092,12 @@ static void PrintMachHeader(const MachOObjectFile *Obj, bool verbose) {
   }
 }
 
-void llvm::printMachOFileHeader(const object::ObjectFile *Obj) {
+void printMachOFileHeader(const object::ObjectFile *Obj) {
   const MachOObjectFile *file = dyn_cast<const MachOObjectFile>(Obj);
   PrintMachHeader(file, !NonVerbose);
 }
 
-void llvm::printMachOLoadCommands(const object::ObjectFile *Obj) {
+void printMachOLoadCommands(const object::ObjectFile *Obj) {
   const MachOObjectFile *file = dyn_cast<const MachOObjectFile>(Obj);
   uint32_t filetype = 0;
   uint32_t cputype = 0;
@@ -9817,7 +10119,7 @@ void llvm::printMachOLoadCommands(const object::ObjectFile *Obj) {
 // export trie dumping
 //===----------------------------------------------------------------------===//
 
-void llvm::printMachOExportsTrie(const object::MachOObjectFile *Obj) {
+void printMachOExportsTrie(const object::MachOObjectFile *Obj) {
   uint64_t BaseSegmentAddress = 0;
   for (const auto &Command : Obj->load_commands()) {
     if (Command.C.cmd == MachO::LC_SEGMENT) {
@@ -9835,7 +10137,7 @@ void llvm::printMachOExportsTrie(const object::MachOObjectFile *Obj) {
     }
   }
   Error Err = Error::success();
-  for (const llvm::object::ExportEntry &Entry : Obj->exports(Err)) {
+  for (const object::ExportEntry &Entry : Obj->exports(Err)) {
     uint64_t Flags = Entry.flags();
     bool ReExport = (Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT);
     bool WeakDef = (Flags & MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
@@ -9889,17 +10191,17 @@ void llvm::printMachOExportsTrie(const object::MachOObjectFile *Obj) {
     outs() << "\n";
   }
   if (Err)
-    report_error(Obj->getFileName(), std::move(Err));
+    report_error(std::move(Err), Obj->getFileName());
 }
 
 //===----------------------------------------------------------------------===//
 // rebase table dumping
 //===----------------------------------------------------------------------===//
 
-void llvm::printMachORebaseTable(object::MachOObjectFile *Obj) {
+void printMachORebaseTable(object::MachOObjectFile *Obj) {
   outs() << "segment  section            address     type\n";
   Error Err = Error::success();
-  for (const llvm::object::MachORebaseEntry &Entry : Obj->rebaseTable(Err)) {
+  for (const object::MachORebaseEntry &Entry : Obj->rebaseTable(Err)) {
     StringRef SegmentName = Entry.segmentName();
     StringRef SectionName = Entry.sectionName();
     uint64_t Address = Entry.address();
@@ -9910,7 +10212,7 @@ void llvm::printMachORebaseTable(object::MachOObjectFile *Obj) {
                      Address, Entry.typeName().str().c_str());
   }
   if (Err)
-    report_error(Obj->getFileName(), std::move(Err));
+    report_error(std::move(Err), Obj->getFileName());
 }
 
 static StringRef ordinalName(const object::MachOObjectFile *Obj, int Ordinal) {
@@ -9938,12 +10240,12 @@ static StringRef ordinalName(const object::MachOObjectFile *Obj, int Ordinal) {
 // bind table dumping
 //===----------------------------------------------------------------------===//
 
-void llvm::printMachOBindTable(object::MachOObjectFile *Obj) {
+void printMachOBindTable(object::MachOObjectFile *Obj) {
   // Build table of sections so names can used in final output.
   outs() << "segment  section            address    type       "
             "addend dylib            symbol\n";
   Error Err = Error::success();
-  for (const llvm::object::MachOBindEntry &Entry : Obj->bindTable(Err)) {
+  for (const object::MachOBindEntry &Entry : Obj->bindTable(Err)) {
     StringRef SegmentName = Entry.segmentName();
     StringRef SectionName = Entry.sectionName();
     uint64_t Address = Entry.address();
@@ -9962,18 +10264,18 @@ void llvm::printMachOBindTable(object::MachOObjectFile *Obj) {
            << Entry.symbolName() << Attr << "\n";
   }
   if (Err)
-    report_error(Obj->getFileName(), std::move(Err));
+    report_error(std::move(Err), Obj->getFileName());
 }
 
 //===----------------------------------------------------------------------===//
 // lazy bind table dumping
 //===----------------------------------------------------------------------===//
 
-void llvm::printMachOLazyBindTable(object::MachOObjectFile *Obj) {
+void printMachOLazyBindTable(object::MachOObjectFile *Obj) {
   outs() << "segment  section            address     "
             "dylib            symbol\n";
   Error Err = Error::success();
-  for (const llvm::object::MachOBindEntry &Entry : Obj->lazyBindTable(Err)) {
+  for (const object::MachOBindEntry &Entry : Obj->lazyBindTable(Err)) {
     StringRef SegmentName = Entry.segmentName();
     StringRef SectionName = Entry.sectionName();
     uint64_t Address = Entry.address();
@@ -9987,18 +10289,18 @@ void llvm::printMachOLazyBindTable(object::MachOObjectFile *Obj) {
            << Entry.symbolName() << "\n";
   }
   if (Err)
-    report_error(Obj->getFileName(), std::move(Err));
+    report_error(std::move(Err), Obj->getFileName());
 }
 
 //===----------------------------------------------------------------------===//
 // weak bind table dumping
 //===----------------------------------------------------------------------===//
 
-void llvm::printMachOWeakBindTable(object::MachOObjectFile *Obj) {
+void printMachOWeakBindTable(object::MachOObjectFile *Obj) {
   outs() << "segment  section            address     "
             "type       addend   symbol\n";
   Error Err = Error::success();
-  for (const llvm::object::MachOBindEntry &Entry : Obj->weakBindTable(Err)) {
+  for (const object::MachOBindEntry &Entry : Obj->weakBindTable(Err)) {
     // Strong symbols don't have a location to update.
     if (Entry.flags() & MachO::BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) {
       outs() << "                                        strong              "
@@ -10019,7 +10321,7 @@ void llvm::printMachOWeakBindTable(object::MachOObjectFile *Obj) {
            << "\n";
   }
   if (Err)
-    report_error(Obj->getFileName(), std::move(Err));
+    report_error(std::move(Err), Obj->getFileName());
 }
 
 // get_dyld_bind_info_symbolname() is used for disassembly and passed an
@@ -10031,16 +10333,66 @@ static const char *get_dyld_bind_info_symbolname(uint64_t ReferenceValue,
   if (info->bindtable == nullptr) {
     info->bindtable = llvm::make_unique<SymbolAddressMap>();
     Error Err = Error::success();
-    for (const llvm::object::MachOBindEntry &Entry : info->O->bindTable(Err)) {
+    for (const object::MachOBindEntry &Entry : info->O->bindTable(Err)) {
       uint64_t Address = Entry.address();
       StringRef name = Entry.symbolName();
       if (!name.empty())
         (*info->bindtable)[Address] = name;
     }
     if (Err)
-      report_error(info->O->getFileName(), std::move(Err));
+      report_error(std::move(Err), info->O->getFileName());
   }
   auto name = info->bindtable->lookup(ReferenceValue);
   return !name.empty() ? name.data() : nullptr;
 }
 
+void printLazyBindTable(ObjectFile *o) {
+  outs() << "Lazy bind table:\n";
+  if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
+    printMachOLazyBindTable(MachO);
+  else
+    WithColor::error()
+        << "This operation is only currently supported "
+           "for Mach-O executable files.\n";
+}
+
+void printWeakBindTable(ObjectFile *o) {
+  outs() << "Weak bind table:\n";
+  if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
+    printMachOWeakBindTable(MachO);
+  else
+    WithColor::error()
+        << "This operation is only currently supported "
+           "for Mach-O executable files.\n";
+}
+
+void printExportsTrie(const ObjectFile *o) {
+  outs() << "Exports trie:\n";
+  if (const MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
+    printMachOExportsTrie(MachO);
+  else
+    WithColor::error()
+        << "This operation is only currently supported "
+           "for Mach-O executable files.\n";
+}
+
+void printRebaseTable(ObjectFile *o) {
+  outs() << "Rebase table:\n";
+  if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
+    printMachORebaseTable(MachO);
+  else
+    WithColor::error()
+        << "This operation is only currently supported "
+           "for Mach-O executable files.\n";
+}
+
+void printBindTable(ObjectFile *o) {
+  outs() << "Bind table:\n";
+  if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
+    printMachOBindTable(MachO);
+  else
+    WithColor::error()
+        << "This operation is only currently supported "
+           "for Mach-O executable files.\n";
+}
+} // namespace llvm
diff --git a/tools/llvm-objdump/WasmDump.cpp b/tools/llvm-objdump/WasmDump.cpp
index 045002cd4b34..da27a4acbb5f 100644
--- a/tools/llvm-objdump/WasmDump.cpp
+++ b/tools/llvm-objdump/WasmDump.cpp
@@ -1,9 +1,8 @@
 //===-- WasmDump.cpp - wasm-specific dumper ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -15,14 +14,39 @@
 #include "llvm-objdump.h"
 #include "llvm/Object/Wasm.h"
 
-using namespace llvm;
-using namespace object;
+using namespace llvm::object;
 
-void llvm::printWasmFileHeader(const object::ObjectFile *Obj) {
-  const WasmObjectFile *File = dyn_cast<const WasmObjectFile>(Obj);
+namespace llvm {
+void printWasmFileHeader(const object::ObjectFile *Obj) {
+  const auto *File = dyn_cast<const WasmObjectFile>(Obj);
 
   outs() << "Program Header:\n";
   outs() << "Version: 0x";
   outs().write_hex(File->getHeader().Version);
   outs() << "\n";
 }
+
+Error getWasmRelocationValueString(const WasmObjectFile *Obj,
+                                         const RelocationRef &RelRef,
+                                         SmallVectorImpl<char> &Result) {
+  const wasm::WasmRelocation &Rel = Obj->getWasmRelocation(RelRef);
+  symbol_iterator SI = RelRef.getSymbol();
+  std::string FmtBuf;
+  raw_string_ostream Fmt(FmtBuf);
+  if (SI == Obj->symbol_end()) {
+    // Not all wasm relocations have symbols associated with them.
+    // In particular R_WASM_TYPE_INDEX_LEB.
+    Fmt << Rel.Index;
+  } else {
+    Expected<StringRef> SymNameOrErr = SI->getName();
+    if (!SymNameOrErr)
+      return SymNameOrErr.takeError();
+    StringRef SymName = *SymNameOrErr;
+    Result.append(SymName.begin(), SymName.end());
+  }
+  Fmt << (Rel.Addend < 0 ? "" : "+") << Rel.Addend;
+  Fmt.flush();
+  Result.append(FmtBuf.begin(), FmtBuf.end());
+  return Error::success();
+}
+} // namespace llvm
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index ba8d3c5b8d5c..58981203c59e 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -1,9 +1,8 @@
 //===-- llvm-objdump.cpp - Object file dumping utility for llvm -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -19,6 +18,7 @@
 #include "llvm-objdump.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/Triple.h"
@@ -68,283 +68,298 @@
 #include <unordered_map>
 #include <utility>
 
-using namespace llvm;
-using namespace object;
+using namespace llvm::object;
+
+namespace llvm {
+
+cl::OptionCategory ObjdumpCat("llvm-objdump Options");
+
+// MachO specific
+extern cl::OptionCategory MachOCat;
+extern cl::opt<bool> Bind;
+extern cl::opt<bool> DataInCode;
+extern cl::opt<bool> DylibsUsed;
+extern cl::opt<bool> DylibId;
+extern cl::opt<bool> ExportsTrie;
+extern cl::opt<bool> FirstPrivateHeader;
+extern cl::opt<bool> IndirectSymbols;
+extern cl::opt<bool> InfoPlist;
+extern cl::opt<bool> LazyBind;
+extern cl::opt<bool> LinkOptHints;
+extern cl::opt<bool> ObjcMetaData;
+extern cl::opt<bool> Rebase;
+extern cl::opt<bool> UniversalHeaders;
+extern cl::opt<bool> WeakBind;
+
+static cl::opt<uint64_t> AdjustVMA(
+    "adjust-vma",
+    cl::desc("Increase the displayed address by the specified offset"),
+    cl::value_desc("offset"), cl::init(0), cl::cat(ObjdumpCat));
 
-cl::opt<bool>
-    llvm::AllHeaders("all-headers",
-                     cl::desc("Display all available header information"));
+static cl::opt<bool>
+    AllHeaders("all-headers",
+               cl::desc("Display all available header information"),
+               cl::cat(ObjdumpCat));
 static cl::alias AllHeadersShort("x", cl::desc("Alias for --all-headers"),
+                                 cl::NotHidden, cl::Grouping,
                                  cl::aliasopt(AllHeaders));
 
-static cl::list<std::string>
-InputFilenames(cl::Positional, cl::desc("<input object files>"),cl::ZeroOrMore);
+static cl::opt<std::string>
+    ArchName("arch-name",
+             cl::desc("Target arch to disassemble for, "
+                      "see -version for available targets"),
+             cl::cat(ObjdumpCat));
+
+cl::opt<bool> ArchiveHeaders("archive-headers",
+                             cl::desc("Display archive header information"),
+                             cl::cat(ObjdumpCat));
+static cl::alias ArchiveHeadersShort("a",
+                                     cl::desc("Alias for --archive-headers"),
+                                     cl::NotHidden, cl::Grouping,
+                                     cl::aliasopt(ArchiveHeaders));
+
+cl::opt<bool> Demangle("demangle", cl::desc("Demangle symbols names"),
+                       cl::init(false), cl::cat(ObjdumpCat));
+static cl::alias DemangleShort("C", cl::desc("Alias for --demangle"),
+                               cl::NotHidden, cl::Grouping,
+                               cl::aliasopt(Demangle));
+
+cl::opt<bool> Disassemble(
+    "disassemble",
+    cl::desc("Display assembler mnemonics for the machine instructions"),
+    cl::cat(ObjdumpCat));
+static cl::alias DisassembleShort("d", cl::desc("Alias for --disassemble"),
+                                  cl::NotHidden, cl::Grouping,
+                                  cl::aliasopt(Disassemble));
+
+cl::opt<bool> DisassembleAll(
+    "disassemble-all",
+    cl::desc("Display assembler mnemonics for the machine instructions"),
+    cl::cat(ObjdumpCat));
+static cl::alias DisassembleAllShort("D",
+                                     cl::desc("Alias for --disassemble-all"),
+                                     cl::NotHidden, cl::Grouping,
+                                     cl::aliasopt(DisassembleAll));
 
-cl::opt<bool>
-llvm::Disassemble("disassemble",
-  cl::desc("Display assembler mnemonics for the machine instructions"));
+static cl::list<std::string>
+    DisassembleFunctions("disassemble-functions", cl::CommaSeparated,
+                         cl::desc("List of functions to disassemble. "
+                                  "Accept demangled names when --demangle is "
+                                  "specified, otherwise accept mangled names"),
+                         cl::cat(ObjdumpCat));
+
+static cl::opt<bool> DisassembleZeroes(
+    "disassemble-zeroes",
+    cl::desc("Do not skip blocks of zeroes when disassembling"),
+    cl::cat(ObjdumpCat));
 static cl::alias
-Disassembled("d", cl::desc("Alias for --disassemble"),
-             cl::aliasopt(Disassemble));
+    DisassembleZeroesShort("z", cl::desc("Alias for --disassemble-zeroes"),
+                           cl::NotHidden, cl::Grouping,
+                           cl::aliasopt(DisassembleZeroes));
 
-cl::opt<bool>
-llvm::DisassembleAll("disassemble-all",
-  cl::desc("Display assembler mnemonics for the machine instructions"));
+static cl::list<std::string>
+    DisassemblerOptions("disassembler-options",
+                        cl::desc("Pass target specific disassembler options"),
+                        cl::value_desc("options"), cl::CommaSeparated,
+                        cl::cat(ObjdumpCat));
 static cl::alias
-DisassembleAlld("D", cl::desc("Alias for --disassemble-all"),
-             cl::aliasopt(DisassembleAll));
-
-cl::opt<bool> llvm::Demangle("demangle", cl::desc("Demangle symbols names"),
-                             cl::init(false));
-
-static cl::alias DemangleShort("C", cl::desc("Alias for --demangle"),
-                               cl::aliasopt(llvm::Demangle));
+    DisassemblerOptionsShort("M", cl::desc("Alias for --disassembler-options"),
+                             cl::NotHidden, cl::Grouping, cl::Prefix,
+                             cl::CommaSeparated,
+                             cl::aliasopt(DisassemblerOptions));
 
-static cl::list<std::string>
-DisassembleFunctions("df",
-                     cl::CommaSeparated,
-                     cl::desc("List of functions to disassemble"));
-static StringSet<> DisasmFuncsSet;
+cl::opt<DIDumpType> DwarfDumpType(
+    "dwarf", cl::init(DIDT_Null), cl::desc("Dump of dwarf debug sections:"),
+    cl::values(clEnumValN(DIDT_DebugFrame, "frames", ".debug_frame")),
+    cl::cat(ObjdumpCat));
+
+static cl::opt<bool> DynamicRelocations(
+    "dynamic-reloc",
+    cl::desc("Display the dynamic relocation entries in the file"),
+    cl::cat(ObjdumpCat));
+static cl::alias DynamicRelocationShort("R",
+                                        cl::desc("Alias for --dynamic-reloc"),
+                                        cl::NotHidden, cl::Grouping,
+                                        cl::aliasopt(DynamicRelocations));
 
-cl::opt<bool>
-llvm::Relocations("reloc",
-                  cl::desc("Display the relocation entries in the file"));
-static cl::alias RelocationsShort("r", cl::desc("Alias for --reloc"),
-                                  cl::NotHidden,
-                                  cl::aliasopt(llvm::Relocations));
+static cl::opt<bool>
+    FaultMapSection("fault-map-section",
+                    cl::desc("Display contents of faultmap section"),
+                    cl::cat(ObjdumpCat));
 
-cl::opt<bool>
-llvm::DynamicRelocations("dynamic-reloc",
-  cl::desc("Display the dynamic relocation entries in the file"));
-static cl::alias
-DynamicRelocationsd("R", cl::desc("Alias for --dynamic-reloc"),
-             cl::aliasopt(DynamicRelocations));
+static cl::opt<bool>
+    FileHeaders("file-headers",
+                cl::desc("Display the contents of the overall file header"),
+                cl::cat(ObjdumpCat));
+static cl::alias FileHeadersShort("f", cl::desc("Alias for --file-headers"),
+                                  cl::NotHidden, cl::Grouping,
+                                  cl::aliasopt(FileHeaders));
 
-cl::opt<bool>
-    llvm::SectionContents("full-contents",
-                          cl::desc("Display the content of each section"));
+cl::opt<bool> SectionContents("full-contents",
+                              cl::desc("Display the content of each section"),
+                              cl::cat(ObjdumpCat));
 static cl::alias SectionContentsShort("s",
                                       cl::desc("Alias for --full-contents"),
+                                      cl::NotHidden, cl::Grouping,
                                       cl::aliasopt(SectionContents));
 
-cl::opt<bool> llvm::SymbolTable("syms", cl::desc("Display the symbol table"));
-static cl::alias SymbolTableShort("t", cl::desc("Alias for --syms"),
-                                  cl::NotHidden,
-                                  cl::aliasopt(llvm::SymbolTable));
-
-cl::opt<bool>
-llvm::ExportsTrie("exports-trie", cl::desc("Display mach-o exported symbols"));
-
-cl::opt<bool>
-llvm::Rebase("rebase", cl::desc("Display mach-o rebasing info"));
-
-cl::opt<bool>
-llvm::Bind("bind", cl::desc("Display mach-o binding info"));
-
-cl::opt<bool>
-llvm::LazyBind("lazy-bind", cl::desc("Display mach-o lazy binding info"));
-
-cl::opt<bool>
-llvm::WeakBind("weak-bind", cl::desc("Display mach-o weak binding info"));
-
-cl::opt<bool>
-llvm::RawClangAST("raw-clang-ast",
-    cl::desc("Dump the raw binary contents of the clang AST section"));
+static cl::list<std::string> InputFilenames(cl::Positional,
+                                            cl::desc("<input object files>"),
+                                            cl::ZeroOrMore,
+                                            cl::cat(ObjdumpCat));
 
 static cl::opt<bool>
-MachOOpt("macho", cl::desc("Use MachO specific object file parser"));
-static cl::alias
-MachOm("m", cl::desc("Alias for --macho"), cl::aliasopt(MachOOpt));
-
-cl::opt<std::string>
-llvm::TripleName("triple", cl::desc("Target triple to disassemble for, "
-                                    "see -version for available targets"));
+    PrintLines("line-numbers",
+               cl::desc("Display source line numbers with "
+                        "disassembly. Implies disassemble object"),
+               cl::cat(ObjdumpCat));
+static cl::alias PrintLinesShort("l", cl::desc("Alias for --line-numbers"),
+                                 cl::NotHidden, cl::Grouping,
+                                 cl::aliasopt(PrintLines));
+
+static cl::opt<bool> MachOOpt("macho",
+                              cl::desc("Use MachO specific object file parser"),
+                              cl::cat(ObjdumpCat));
+static cl::alias MachOm("m", cl::desc("Alias for --macho"), cl::NotHidden,
+                        cl::Grouping, cl::aliasopt(MachOOpt));
 
 cl::opt<std::string>
-llvm::MCPU("mcpu",
-     cl::desc("Target a specific cpu type (-mcpu=help for details)"),
-     cl::value_desc("cpu-name"),
-     cl::init(""));
-
-cl::opt<std::string>
-llvm::ArchName("arch-name", cl::desc("Target arch to disassemble for, "
-                                "see -version for available targets"));
+    MCPU("mcpu",
+         cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+         cl::value_desc("cpu-name"), cl::init(""), cl::cat(ObjdumpCat));
+
+cl::list<std::string> MAttrs("mattr", cl::CommaSeparated,
+                             cl::desc("Target specific attributes"),
+                             cl::value_desc("a1,+a2,-a3,..."),
+                             cl::cat(ObjdumpCat));
+
+cl::opt<bool> NoShowRawInsn("no-show-raw-insn",
+                            cl::desc("When disassembling "
+                                     "instructions, do not print "
+                                     "the instruction bytes."),
+                            cl::cat(ObjdumpCat));
+cl::opt<bool> NoLeadingAddr("no-leading-addr",
+                            cl::desc("Print no leading address"),
+                            cl::cat(ObjdumpCat));
+
+static cl::opt<bool> RawClangAST(
+    "raw-clang-ast",
+    cl::desc("Dump the raw binary contents of the clang AST section"),
+    cl::cat(ObjdumpCat));
 
 cl::opt<bool>
-llvm::SectionHeaders("section-headers", cl::desc("Display summaries of the "
-                                                 "headers for each section."));
-static cl::alias
-SectionHeadersShort("headers", cl::desc("Alias for --section-headers"),
-                    cl::aliasopt(SectionHeaders));
-static cl::alias
-SectionHeadersShorter("h", cl::desc("Alias for --section-headers"),
-                      cl::aliasopt(SectionHeaders));
-
-cl::list<std::string>
-llvm::FilterSections("section", cl::desc("Operate on the specified sections only. "
-                                         "With -macho dump segment,section"));
-cl::alias
-static FilterSectionsj("j", cl::desc("Alias for --section"),
-                 cl::aliasopt(llvm::FilterSections));
-
-cl::list<std::string>
-llvm::MAttrs("mattr",
-  cl::CommaSeparated,
-  cl::desc("Target specific attributes"),
-  cl::value_desc("a1,+a2,-a3,..."));
-
-cl::opt<bool>
-llvm::NoShowRawInsn("no-show-raw-insn", cl::desc("When disassembling "
-                                                 "instructions, do not print "
-                                                 "the instruction bytes."));
-cl::opt<bool>
-llvm::NoLeadingAddr("no-leading-addr", cl::desc("Print no leading address"));
-
-cl::opt<bool>
-llvm::UnwindInfo("unwind-info", cl::desc("Display unwind information"));
-
-static cl::alias
-UnwindInfoShort("u", cl::desc("Alias for --unwind-info"),
-                cl::aliasopt(UnwindInfo));
-
-cl::opt<bool>
-llvm::PrivateHeaders("private-headers",
-                     cl::desc("Display format specific file headers"));
-
-cl::opt<bool>
-llvm::FirstPrivateHeader("private-header",
-                         cl::desc("Display only the first format specific file "
-                                  "header"));
-
-static cl::alias
-PrivateHeadersShort("p", cl::desc("Alias for --private-headers"),
-                    cl::aliasopt(PrivateHeaders));
-
-cl::opt<bool> llvm::FileHeaders(
-    "file-headers",
-    cl::desc("Display the contents of the overall file header"));
-
-static cl::alias FileHeadersShort("f", cl::desc("Alias for --file-headers"),
-                                  cl::aliasopt(FileHeaders));
-
-cl::opt<bool>
-    llvm::ArchiveHeaders("archive-headers",
-                         cl::desc("Display archive header information"));
+    Relocations("reloc", cl::desc("Display the relocation entries in the file"),
+                cl::cat(ObjdumpCat));
+static cl::alias RelocationsShort("r", cl::desc("Alias for --reloc"),
+                                  cl::NotHidden, cl::Grouping,
+                                  cl::aliasopt(Relocations));
 
-cl::alias
-ArchiveHeadersShort("a", cl::desc("Alias for --archive-headers"),
-                    cl::aliasopt(ArchiveHeaders));
+cl::opt<bool> PrintImmHex("print-imm-hex",
+                          cl::desc("Use hex format for immediate values"),
+                          cl::cat(ObjdumpCat));
 
-cl::opt<bool>
-    llvm::PrintImmHex("print-imm-hex",
-                      cl::desc("Use hex format for immediate values"));
+cl::opt<bool> PrivateHeaders("private-headers",
+                             cl::desc("Display format specific file headers"),
+                             cl::cat(ObjdumpCat));
+static cl::alias PrivateHeadersShort("p",
+                                     cl::desc("Alias for --private-headers"),
+                                     cl::NotHidden, cl::Grouping,
+                                     cl::aliasopt(PrivateHeaders));
 
-cl::opt<bool> PrintFaultMaps("fault-map-section",
-                             cl::desc("Display contents of faultmap section"));
+cl::list<std::string>
+    FilterSections("section",
+                   cl::desc("Operate on the specified sections only. "
+                            "With -macho dump segment,section"),
+                   cl::cat(ObjdumpCat));
+static cl::alias FilterSectionsj("j", cl::desc("Alias for --section"),
+                                 cl::NotHidden, cl::Grouping, cl::Prefix,
+                                 cl::aliasopt(FilterSections));
+
+cl::opt<bool> SectionHeaders("section-headers",
+                             cl::desc("Display summaries of the "
+                                      "headers for each section."),
+                             cl::cat(ObjdumpCat));
+static cl::alias SectionHeadersShort("headers",
+                                     cl::desc("Alias for --section-headers"),
+                                     cl::NotHidden,
+                                     cl::aliasopt(SectionHeaders));
+static cl::alias SectionHeadersShorter("h",
+                                       cl::desc("Alias for --section-headers"),
+                                       cl::NotHidden, cl::Grouping,
+                                       cl::aliasopt(SectionHeaders));
 
-cl::opt<DIDumpType> llvm::DwarfDumpType(
-    "dwarf", cl::init(DIDT_Null), cl::desc("Dump of dwarf debug sections:"),
-    cl::values(clEnumValN(DIDT_DebugFrame, "frames", ".debug_frame")));
+static cl::opt<bool>
+    ShowLMA("show-lma",
+            cl::desc("Display LMA column when dumping ELF section headers"),
+            cl::cat(ObjdumpCat));
 
-cl::opt<bool> PrintSource(
+static cl::opt<bool> PrintSource(
     "source",
     cl::desc(
-        "Display source inlined with disassembly. Implies disassemble object"));
-
-cl::alias PrintSourceShort("S", cl::desc("Alias for -source"),
-                           cl::aliasopt(PrintSource));
+        "Display source inlined with disassembly. Implies disassemble object"),
+    cl::cat(ObjdumpCat));
+static cl::alias PrintSourceShort("S", cl::desc("Alias for -source"),
+                                  cl::NotHidden, cl::Grouping,
+                                  cl::aliasopt(PrintSource));
 
-cl::opt<bool> PrintLines("line-numbers",
-                         cl::desc("Display source line numbers with "
-                                  "disassembly. Implies disassemble object"));
-
-cl::alias PrintLinesShort("l", cl::desc("Alias for -line-numbers"),
-                          cl::aliasopt(PrintLines));
-
-cl::opt<unsigned long long>
+static cl::opt<uint64_t>
     StartAddress("start-address", cl::desc("Disassemble beginning at address"),
-                 cl::value_desc("address"), cl::init(0));
-cl::opt<unsigned long long>
-    StopAddress("stop-address",
-                cl::desc("Stop disassembly at address"),
-                cl::value_desc("address"), cl::init(UINT64_MAX));
-
-cl::opt<bool> DisassembleZeroes(
-                "disassemble-zeroes",
-                cl::desc("Do not skip blocks of zeroes when disassembling"));
-cl::alias DisassembleZeroesShort("z",
-                                 cl::desc("Alias for --disassemble-zeroes"),
-                                 cl::aliasopt(DisassembleZeroes));
+                 cl::value_desc("address"), cl::init(0), cl::cat(ObjdumpCat));
+static cl::opt<uint64_t> StopAddress("stop-address",
+                                     cl::desc("Stop disassembly at address"),
+                                     cl::value_desc("address"),
+                                     cl::init(UINT64_MAX), cl::cat(ObjdumpCat));
+
+cl::opt<bool> SymbolTable("syms", cl::desc("Display the symbol table"),
+                          cl::cat(ObjdumpCat));
+static cl::alias SymbolTableShort("t", cl::desc("Alias for --syms"),
+                                  cl::NotHidden, cl::Grouping,
+                                  cl::aliasopt(SymbolTable));
 
-static StringRef ToolName;
+cl::opt<std::string> TripleName("triple",
+                                cl::desc("Target triple to disassemble for, "
+                                         "see -version for available targets"),
+                                cl::cat(ObjdumpCat));
 
-typedef std::vector<std::tuple<uint64_t, StringRef, uint8_t>> SectionSymbolsTy;
+cl::opt<bool> UnwindInfo("unwind-info", cl::desc("Display unwind information"),
+                         cl::cat(ObjdumpCat));
+static cl::alias UnwindInfoShort("u", cl::desc("Alias for --unwind-info"),
+                                 cl::NotHidden, cl::Grouping,
+                                 cl::aliasopt(UnwindInfo));
 
-namespace {
-typedef std::function<bool(llvm::object::SectionRef const &)> FilterPredicate;
+static cl::opt<bool>
+    Wide("wide", cl::desc("Ignored for compatibility with GNU objdump"),
+         cl::cat(ObjdumpCat));
+static cl::alias WideShort("w", cl::Grouping, cl::aliasopt(Wide));
 
-class SectionFilterIterator {
-public:
-  SectionFilterIterator(FilterPredicate P,
-                        llvm::object::section_iterator const &I,
-                        llvm::object::section_iterator const &E)
-      : Predicate(std::move(P)), Iterator(I), End(E) {
-    ScanPredicate();
-  }
-  const llvm::object::SectionRef &operator*() const { return *Iterator; }
-  SectionFilterIterator &operator++() {
-    ++Iterator;
-    ScanPredicate();
-    return *this;
-  }
-  bool operator!=(SectionFilterIterator const &Other) const {
-    return Iterator != Other.Iterator;
-  }
+static cl::extrahelp
+    HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
 
-private:
-  void ScanPredicate() {
-    while (Iterator != End && !Predicate(*Iterator)) {
-      ++Iterator;
-    }
-  }
-  FilterPredicate Predicate;
-  llvm::object::section_iterator Iterator;
-  llvm::object::section_iterator End;
-};
+static StringSet<> DisasmFuncsSet;
+static StringSet<> FoundSectionSet;
+static StringRef ToolName;
 
-class SectionFilter {
-public:
-  SectionFilter(FilterPredicate P, llvm::object::ObjectFile const &O)
-      : Predicate(std::move(P)), Object(O) {}
-  SectionFilterIterator begin() {
-    return SectionFilterIterator(Predicate, Object.section_begin(),
-                                 Object.section_end());
-  }
-  SectionFilterIterator end() {
-    return SectionFilterIterator(Predicate, Object.section_end(),
-                                 Object.section_end());
-  }
+typedef std::vector<std::tuple<uint64_t, StringRef, uint8_t>> SectionSymbolsTy;
 
-private:
-  FilterPredicate Predicate;
-  llvm::object::ObjectFile const &Object;
-};
-SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O) {
-  return SectionFilter(
-      [](llvm::object::SectionRef const &S) {
-        if (FilterSections.empty())
-          return true;
-        llvm::StringRef String;
-        std::error_code error = S.getName(String);
-        if (error)
-          return false;
-        return is_contained(FilterSections, String);
-      },
-      O);
+static bool shouldKeep(object::SectionRef S) {
+  if (FilterSections.empty())
+    return true;
+  StringRef SecName;
+  std::error_code error = S.getName(SecName);
+  if (error)
+    return false;
+  // StringSet does not allow empty key so avoid adding sections with
+  // no name (such as the section with index 0) here.
+  if (!SecName.empty())
+    FoundSectionSet.insert(SecName);
+  return is_contained(FilterSections, SecName);
 }
+
+SectionFilter ToolSectionFilter(object::ObjectFile const &O) {
+  return SectionFilter([](object::SectionRef S) { return shouldKeep(S); }, O);
 }
 
-void llvm::error(std::error_code EC) {
+void error(std::error_code EC) {
   if (!EC)
     return;
   WithColor::error(errs(), ToolName)
@@ -353,34 +368,39 @@ void llvm::error(std::error_code EC) {
   exit(1);
 }
 
-LLVM_ATTRIBUTE_NORETURN void llvm::error(Twine Message) {
+void error(Error E) {
+  if (!E)
+    return;
+  WithColor::error(errs(), ToolName) << toString(std::move(E));
+  exit(1);
+}
+
+LLVM_ATTRIBUTE_NORETURN void error(Twine Message) {
   WithColor::error(errs(), ToolName) << Message << ".\n";
   errs().flush();
   exit(1);
 }
 
-void llvm::warn(StringRef Message) {
+void warn(StringRef Message) {
   WithColor::warning(errs(), ToolName) << Message << ".\n";
   errs().flush();
 }
 
-LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef File,
-                                                Twine Message) {
-  WithColor::error(errs(), ToolName)
-      << "'" << File << "': " << Message << ".\n";
-  exit(1);
+static void warn(Twine Message) {
+  // Output order between errs() and outs() matters especially for archive
+  // files where the output is per member object.
+  outs().flush();
+  WithColor::warning(errs(), ToolName) << Message << "\n";
+  errs().flush();
 }
 
-LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef File,
-                                                std::error_code EC) {
-  assert(EC);
+LLVM_ATTRIBUTE_NORETURN void report_error(StringRef File, Twine Message) {
   WithColor::error(errs(), ToolName)
-      << "'" << File << "': " << EC.message() << ".\n";
+      << "'" << File << "': " << Message << ".\n";
   exit(1);
 }
 
-LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef File,
-                                                llvm::Error E) {
+LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef File) {
   assert(E);
   std::string Buf;
   raw_string_ostream OS(Buf);
@@ -390,10 +410,9 @@ LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef File,
   exit(1);
 }
 
-LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef ArchiveName,
-                                                StringRef FileName,
-                                                llvm::Error E,
-                                                StringRef ArchitectureName) {
+LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef ArchiveName,
+                                          StringRef FileName,
+                                          StringRef ArchitectureName) {
   assert(E);
   WithColor::error(errs(), ToolName);
   if (ArchiveName != "")
@@ -410,25 +429,39 @@ LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef ArchiveName,
   exit(1);
 }
 
-LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef ArchiveName,
-                                                const object::Archive::Child &C,
-                                                llvm::Error E,
-                                                StringRef ArchitectureName) {
+LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef ArchiveName,
+                                          const object::Archive::Child &C,
+                                          StringRef ArchitectureName) {
   Expected<StringRef> NameOrErr = C.getName();
   // TODO: if we have a error getting the name then it would be nice to print
   // the index of which archive member this is and or its offset in the
   // archive instead of "???" as the name.
   if (!NameOrErr) {
     consumeError(NameOrErr.takeError());
-    llvm::report_error(ArchiveName, "???", std::move(E), ArchitectureName);
+    report_error(std::move(E), ArchiveName, "???", ArchitectureName);
   } else
-    llvm::report_error(ArchiveName, NameOrErr.get(), std::move(E),
-                       ArchitectureName);
+    report_error(std::move(E), ArchiveName, NameOrErr.get(), ArchitectureName);
+}
+
+static void warnOnNoMatchForSections() {
+  SetVector<StringRef> MissingSections;
+  for (StringRef S : FilterSections) {
+    if (FoundSectionSet.count(S))
+      return;
+    // User may specify a unnamed section. Don't warn for it.
+    if (!S.empty())
+      MissingSections.insert(S);
+  }
+
+  // Warn only if no section in FilterSections is matched.
+  for (StringRef S : MissingSections)
+    warn("section '" + S + "' mentioned in a -j/--section option, but not "
+         "found in any input file");
 }
 
 static const Target *getTarget(const ObjectFile *Obj = nullptr) {
   // Figure out the target triple.
-  llvm::Triple TheTriple("unknown-unknown-unknown");
+  Triple TheTriple("unknown-unknown-unknown");
   if (TripleName.empty()) {
     if (Obj)
       TheTriple = Obj->makeTriple();
@@ -459,423 +492,21 @@ static const Target *getTarget(const ObjectFile *Obj = nullptr) {
   return TheTarget;
 }
 
-bool llvm::isRelocAddressLess(RelocationRef A, RelocationRef B) {
+bool isRelocAddressLess(RelocationRef A, RelocationRef B) {
   return A.getOffset() < B.getOffset();
 }
 
-static std::string demangle(StringRef Name) {
-  char *Demangled = nullptr;
-  if (Name.startswith("_Z"))
-    Demangled = itaniumDemangle(Name.data(), Demangled, nullptr, nullptr);
-  else if (Name.startswith("?"))
-    Demangled = microsoftDemangle(Name.data(), Demangled, nullptr, nullptr);
-
-  if (!Demangled)
-    return Name;
-
-  std::string Ret = Demangled;
-  free(Demangled);
-  return Ret;
-}
-
-template <class ELFT>
-static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
-                                                const RelocationRef &RelRef,
-                                                SmallVectorImpl<char> &Result) {
-  typedef typename ELFObjectFile<ELFT>::Elf_Sym Elf_Sym;
-  typedef typename ELFObjectFile<ELFT>::Elf_Shdr Elf_Shdr;
-  typedef typename ELFObjectFile<ELFT>::Elf_Rela Elf_Rela;
-
-  const ELFFile<ELFT> &EF = *Obj->getELFFile();
-  DataRefImpl Rel = RelRef.getRawDataRefImpl();
-  auto SecOrErr = EF.getSection(Rel.d.a);
-  if (!SecOrErr)
-    return errorToErrorCode(SecOrErr.takeError());
-  const Elf_Shdr *Sec = *SecOrErr;
-  auto SymTabOrErr = EF.getSection(Sec->sh_link);
-  if (!SymTabOrErr)
-    return errorToErrorCode(SymTabOrErr.takeError());
-  const Elf_Shdr *SymTab = *SymTabOrErr;
-  assert(SymTab->sh_type == ELF::SHT_SYMTAB ||
-         SymTab->sh_type == ELF::SHT_DYNSYM);
-  auto StrTabSec = EF.getSection(SymTab->sh_link);
-  if (!StrTabSec)
-    return errorToErrorCode(StrTabSec.takeError());
-  auto StrTabOrErr = EF.getStringTable(*StrTabSec);
-  if (!StrTabOrErr)
-    return errorToErrorCode(StrTabOrErr.takeError());
-  StringRef StrTab = *StrTabOrErr;
-  int64_t Addend = 0;
-  // If there is no Symbol associated with the relocation, we set the undef
-  // boolean value to 'true'. This will prevent us from calling functions that
-  // requires the relocation to be associated with a symbol.
-  bool Undef = false;
-  switch (Sec->sh_type) {
-  default:
-    return object_error::parse_failed;
-  case ELF::SHT_REL: {
-    // TODO: Read implicit addend from section data.
-    break;
-  }
-  case ELF::SHT_RELA: {
-    const Elf_Rela *ERela = Obj->getRela(Rel);
-    Addend = ERela->r_addend;
-    Undef = ERela->getSymbol(false) == 0;
-    break;
-  }
-  }
-  std::string Target;
-  if (!Undef) {
-    symbol_iterator SI = RelRef.getSymbol();
-    const Elf_Sym *symb = Obj->getSymbol(SI->getRawDataRefImpl());
-    if (symb->getType() == ELF::STT_SECTION) {
-      Expected<section_iterator> SymSI = SI->getSection();
-      if (!SymSI)
-        return errorToErrorCode(SymSI.takeError());
-      const Elf_Shdr *SymSec = Obj->getSection((*SymSI)->getRawDataRefImpl());
-      auto SecName = EF.getSectionName(SymSec);
-      if (!SecName)
-        return errorToErrorCode(SecName.takeError());
-      Target = *SecName;
-    } else {
-      Expected<StringRef> SymName = symb->getName(StrTab);
-      if (!SymName)
-        return errorToErrorCode(SymName.takeError());
-      if (Demangle)
-        Target = demangle(*SymName);
-      else
-        Target = *SymName;
-    }
-  } else
-    Target = "*ABS*";
-
-  // Default scheme is to print Target, as well as "+ <addend>" for nonzero
-  // addend. Should be acceptable for all normal purposes.
-  std::string FmtBuf;
-  raw_string_ostream Fmt(FmtBuf);
-  Fmt << Target;
-  if (Addend != 0)
-    Fmt << (Addend < 0 ? "" : "+") << Addend;
-  Fmt.flush();
-  Result.append(FmtBuf.begin(), FmtBuf.end());
-  return std::error_code();
-}
-
-static std::error_code getRelocationValueString(const ELFObjectFileBase *Obj,
-                                                const RelocationRef &Rel,
-                                                SmallVectorImpl<char> &Result) {
-  if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj))
-    return getRelocationValueString(ELF32LE, Rel, Result);
-  if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj))
-    return getRelocationValueString(ELF64LE, Rel, Result);
-  if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj))
-    return getRelocationValueString(ELF32BE, Rel, Result);
-  auto *ELF64BE = cast<ELF64BEObjectFile>(Obj);
-  return getRelocationValueString(ELF64BE, Rel, Result);
-}
-
-static std::error_code getRelocationValueString(const COFFObjectFile *Obj,
-                                                const RelocationRef &Rel,
-                                                SmallVectorImpl<char> &Result) {
-  symbol_iterator SymI = Rel.getSymbol();
-  Expected<StringRef> SymNameOrErr = SymI->getName();
-  if (!SymNameOrErr)
-    return errorToErrorCode(SymNameOrErr.takeError());
-  StringRef SymName = *SymNameOrErr;
-  Result.append(SymName.begin(), SymName.end());
-  return std::error_code();
-}
-
-static void printRelocationTargetName(const MachOObjectFile *O,
-                                      const MachO::any_relocation_info &RE,
-                                      raw_string_ostream &Fmt) {
-  // Target of a scattered relocation is an address.  In the interest of
-  // generating pretty output, scan through the symbol table looking for a
-  // symbol that aligns with that address.  If we find one, print it.
-  // Otherwise, we just print the hex address of the target.
-  if (O->isRelocationScattered(RE)) {
-    uint32_t Val = O->getPlainRelocationSymbolNum(RE);
-
-    for (const SymbolRef &Symbol : O->symbols()) {
-      Expected<uint64_t> Addr = Symbol.getAddress();
-      if (!Addr)
-        report_error(O->getFileName(), Addr.takeError());
-      if (*Addr != Val)
-        continue;
-      Expected<StringRef> Name = Symbol.getName();
-      if (!Name)
-        report_error(O->getFileName(), Name.takeError());
-      Fmt << *Name;
-      return;
-    }
-
-    // If we couldn't find a symbol that this relocation refers to, try
-    // to find a section beginning instead.
-    for (const SectionRef &Section : ToolSectionFilter(*O)) {
-      std::error_code ec;
-
-      StringRef Name;
-      uint64_t Addr = Section.getAddress();
-      if (Addr != Val)
-        continue;
-      if ((ec = Section.getName(Name)))
-        report_error(O->getFileName(), ec);
-      Fmt << Name;
-      return;
-    }
-
-    Fmt << format("0x%x", Val);
-    return;
-  }
-
-  StringRef S;
-  bool isExtern = O->getPlainRelocationExternal(RE);
-  uint64_t Val = O->getPlainRelocationSymbolNum(RE);
-
-  if (O->getAnyRelocationType(RE) == MachO::ARM64_RELOC_ADDEND) {
-    Fmt << format("0x%0" PRIx64, Val);
-    return;
-  }
-
-  if (isExtern) {
-    symbol_iterator SI = O->symbol_begin();
-    advance(SI, Val);
-    Expected<StringRef> SOrErr = SI->getName();
-    if (!SOrErr)
-      report_error(O->getFileName(), SOrErr.takeError());
-    S = *SOrErr;
-  } else {
-    section_iterator SI = O->section_begin();
-    // Adjust for the fact that sections are 1-indexed.
-    if (Val == 0) {
-      Fmt << "0 (?,?)";
-      return;
-    }
-    uint32_t I = Val - 1;
-    while (I != 0 && SI != O->section_end()) {
-      --I;
-      advance(SI, 1);
-    }
-    if (SI == O->section_end())
-      Fmt << Val << " (?,?)";
-    else
-      SI->getName(S);
-  }
-
-  Fmt << S;
-}
-
-static std::error_code getRelocationValueString(const WasmObjectFile *Obj,
-                                                const RelocationRef &RelRef,
-                                                SmallVectorImpl<char> &Result) {
-  const wasm::WasmRelocation& Rel = Obj->getWasmRelocation(RelRef);
-  symbol_iterator SI = RelRef.getSymbol();
-  std::string FmtBuf;
-  raw_string_ostream Fmt(FmtBuf);
-  if (SI == Obj->symbol_end()) {
-    // Not all wasm relocations have symbols associated with them.
-    // In particular R_WEBASSEMBLY_TYPE_INDEX_LEB.
-    Fmt << Rel.Index;
-  } else {
-    Expected<StringRef> SymNameOrErr = SI->getName();
-    if (!SymNameOrErr)
-      return errorToErrorCode(SymNameOrErr.takeError());
-    StringRef SymName = *SymNameOrErr;
-    Result.append(SymName.begin(), SymName.end());
-  }
-  Fmt << (Rel.Addend < 0 ? "" : "+") << Rel.Addend;
-  Fmt.flush();
-  Result.append(FmtBuf.begin(), FmtBuf.end());
-  return std::error_code();
-}
-
-static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
-                                                const RelocationRef &RelRef,
-                                                SmallVectorImpl<char> &Result) {
-  DataRefImpl Rel = RelRef.getRawDataRefImpl();
-  MachO::any_relocation_info RE = Obj->getRelocation(Rel);
-
-  unsigned Arch = Obj->getArch();
-
-  std::string FmtBuf;
-  raw_string_ostream Fmt(FmtBuf);
-  unsigned Type = Obj->getAnyRelocationType(RE);
-  bool IsPCRel = Obj->getAnyRelocationPCRel(RE);
-
-  // Determine any addends that should be displayed with the relocation.
-  // These require decoding the relocation type, which is triple-specific.
-
-  // X86_64 has entirely custom relocation types.
-  if (Arch == Triple::x86_64) {
-    switch (Type) {
-    case MachO::X86_64_RELOC_GOT_LOAD:
-    case MachO::X86_64_RELOC_GOT: {
-      printRelocationTargetName(Obj, RE, Fmt);
-      Fmt << "@GOT";
-      if (IsPCRel)
-        Fmt << "PCREL";
-      break;
-    }
-    case MachO::X86_64_RELOC_SUBTRACTOR: {
-      DataRefImpl RelNext = Rel;
-      Obj->moveRelocationNext(RelNext);
-      MachO::any_relocation_info RENext = Obj->getRelocation(RelNext);
-
-      // X86_64_RELOC_SUBTRACTOR must be followed by a relocation of type
-      // X86_64_RELOC_UNSIGNED.
-      // NOTE: Scattered relocations don't exist on x86_64.
-      unsigned RType = Obj->getAnyRelocationType(RENext);
-      if (RType != MachO::X86_64_RELOC_UNSIGNED)
-        report_error(Obj->getFileName(), "Expected X86_64_RELOC_UNSIGNED after "
-                     "X86_64_RELOC_SUBTRACTOR.");
-
-      // The X86_64_RELOC_UNSIGNED contains the minuend symbol;
-      // X86_64_RELOC_SUBTRACTOR contains the subtrahend.
-      printRelocationTargetName(Obj, RENext, Fmt);
-      Fmt << "-";
-      printRelocationTargetName(Obj, RE, Fmt);
-      break;
-    }
-    case MachO::X86_64_RELOC_TLV:
-      printRelocationTargetName(Obj, RE, Fmt);
-      Fmt << "@TLV";
-      if (IsPCRel)
-        Fmt << "P";
-      break;
-    case MachO::X86_64_RELOC_SIGNED_1:
-      printRelocationTargetName(Obj, RE, Fmt);
-      Fmt << "-1";
-      break;
-    case MachO::X86_64_RELOC_SIGNED_2:
-      printRelocationTargetName(Obj, RE, Fmt);
-      Fmt << "-2";
-      break;
-    case MachO::X86_64_RELOC_SIGNED_4:
-      printRelocationTargetName(Obj, RE, Fmt);
-      Fmt << "-4";
-      break;
-    default:
-      printRelocationTargetName(Obj, RE, Fmt);
-      break;
-    }
-    // X86 and ARM share some relocation types in common.
-  } else if (Arch == Triple::x86 || Arch == Triple::arm ||
-             Arch == Triple::ppc) {
-    // Generic relocation types...
-    switch (Type) {
-    case MachO::GENERIC_RELOC_PAIR: // prints no info
-      return std::error_code();
-    case MachO::GENERIC_RELOC_SECTDIFF: {
-      DataRefImpl RelNext = Rel;
-      Obj->moveRelocationNext(RelNext);
-      MachO::any_relocation_info RENext = Obj->getRelocation(RelNext);
-
-      // X86 sect diff's must be followed by a relocation of type
-      // GENERIC_RELOC_PAIR.
-      unsigned RType = Obj->getAnyRelocationType(RENext);
-
-      if (RType != MachO::GENERIC_RELOC_PAIR)
-        report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after "
-                     "GENERIC_RELOC_SECTDIFF.");
-
-      printRelocationTargetName(Obj, RE, Fmt);
-      Fmt << "-";
-      printRelocationTargetName(Obj, RENext, Fmt);
-      break;
-    }
-    }
-
-    if (Arch == Triple::x86 || Arch == Triple::ppc) {
-      switch (Type) {
-      case MachO::GENERIC_RELOC_LOCAL_SECTDIFF: {
-        DataRefImpl RelNext = Rel;
-        Obj->moveRelocationNext(RelNext);
-        MachO::any_relocation_info RENext = Obj->getRelocation(RelNext);
-
-        // X86 sect diff's must be followed by a relocation of type
-        // GENERIC_RELOC_PAIR.
-        unsigned RType = Obj->getAnyRelocationType(RENext);
-        if (RType != MachO::GENERIC_RELOC_PAIR)
-          report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after "
-                       "GENERIC_RELOC_LOCAL_SECTDIFF.");
-
-        printRelocationTargetName(Obj, RE, Fmt);
-        Fmt << "-";
-        printRelocationTargetName(Obj, RENext, Fmt);
-        break;
-      }
-      case MachO::GENERIC_RELOC_TLV: {
-        printRelocationTargetName(Obj, RE, Fmt);
-        Fmt << "@TLV";
-        if (IsPCRel)
-          Fmt << "P";
-        break;
-      }
-      default:
-        printRelocationTargetName(Obj, RE, Fmt);
-      }
-    } else { // ARM-specific relocations
-      switch (Type) {
-      case MachO::ARM_RELOC_HALF:
-      case MachO::ARM_RELOC_HALF_SECTDIFF: {
-        // Half relocations steal a bit from the length field to encode
-        // whether this is an upper16 or a lower16 relocation.
-        bool isUpper = (Obj->getAnyRelocationLength(RE) & 0x1) == 1;
-
-        if (isUpper)
-          Fmt << ":upper16:(";
-        else
-          Fmt << ":lower16:(";
-        printRelocationTargetName(Obj, RE, Fmt);
-
-        DataRefImpl RelNext = Rel;
-        Obj->moveRelocationNext(RelNext);
-        MachO::any_relocation_info RENext = Obj->getRelocation(RelNext);
-
-        // ARM half relocs must be followed by a relocation of type
-        // ARM_RELOC_PAIR.
-        unsigned RType = Obj->getAnyRelocationType(RENext);
-        if (RType != MachO::ARM_RELOC_PAIR)
-          report_error(Obj->getFileName(), "Expected ARM_RELOC_PAIR after "
-                       "ARM_RELOC_HALF");
-
-        // NOTE: The half of the target virtual address is stashed in the
-        // address field of the secondary relocation, but we can't reverse
-        // engineer the constant offset from it without decoding the movw/movt
-        // instruction to find the other half in its immediate field.
-
-        // ARM_RELOC_HALF_SECTDIFF encodes the second section in the
-        // symbol/section pointer of the follow-on relocation.
-        if (Type == MachO::ARM_RELOC_HALF_SECTDIFF) {
-          Fmt << "-";
-          printRelocationTargetName(Obj, RENext, Fmt);
-        }
-
-        Fmt << ")";
-        break;
-      }
-      default: { printRelocationTargetName(Obj, RE, Fmt); }
-      }
-    }
-  } else
-    printRelocationTargetName(Obj, RE, Fmt);
-
-  Fmt.flush();
-  Result.append(FmtBuf.begin(), FmtBuf.end());
-  return std::error_code();
-}
-
-static std::error_code getRelocationValueString(const RelocationRef &Rel,
-                                                SmallVectorImpl<char> &Result) {
+static Error getRelocationValueString(const RelocationRef &Rel,
+                                      SmallVectorImpl<char> &Result) {
   const ObjectFile *Obj = Rel.getObject();
   if (auto *ELF = dyn_cast<ELFObjectFileBase>(Obj))
-    return getRelocationValueString(ELF, Rel, Result);
+    return getELFRelocationValueString(ELF, Rel, Result);
   if (auto *COFF = dyn_cast<COFFObjectFile>(Obj))
-    return getRelocationValueString(COFF, Rel, Result);
+    return getCOFFRelocationValueString(COFF, Rel, Result);
   if (auto *Wasm = dyn_cast<WasmObjectFile>(Obj))
-    return getRelocationValueString(Wasm, Rel, Result);
+    return getWasmRelocationValueString(Wasm, Rel, Result);
   if (auto *MachO = dyn_cast<MachOObjectFile>(Obj))
-    return getRelocationValueString(MachO, Rel, Result);
+    return getMachORelocationValueString(MachO, Rel, Result);
   llvm_unreachable("unknown object file format");
 }
 
@@ -928,13 +559,15 @@ private:
 public:
   SourcePrinter() = default;
   SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch) : Obj(Obj) {
-    symbolize::LLVMSymbolizer::Options SymbolizerOpts(
-        DILineInfoSpecifier::FunctionNameKind::None, true, false, false,
-        DefaultArch);
+    symbolize::LLVMSymbolizer::Options SymbolizerOpts;
+    SymbolizerOpts.PrintFunctions = DILineInfoSpecifier::FunctionNameKind::None;
+    SymbolizerOpts.Demangle = false;
+    SymbolizerOpts.DefaultArch = DefaultArch;
     Symbolizer.reset(new symbolize::LLVMSymbolizer(SymbolizerOpts));
   }
   virtual ~SourcePrinter() = default;
-  virtual void printSourceLine(raw_ostream &OS, uint64_t Address,
+  virtual void printSourceLine(raw_ostream &OS,
+                               object::SectionedAddress Address,
                                StringRef Delimiter = "; ");
 };
 
@@ -949,35 +582,37 @@ bool SourcePrinter::cacheSource(const DILineInfo &LineInfo) {
     Buffer = std::move(*BufferOrError);
   }
   // Chomp the file to get lines
-  size_t BufferSize = Buffer->getBufferSize();
-  const char *BufferStart = Buffer->getBufferStart();
-  for (const char *Start = BufferStart, *End = BufferStart;
-       End < BufferStart + BufferSize; End++)
-    if (*End == '\n' || End == BufferStart + BufferSize - 1 ||
-        (*End == '\r' && *(End + 1) == '\n')) {
-      LineCache[LineInfo.FileName].push_back(StringRef(Start, End - Start));
-      if (*End == '\r')
-        End++;
-      Start = End + 1;
+  const char *BufferStart = Buffer->getBufferStart(),
+             *BufferEnd = Buffer->getBufferEnd();
+  std::vector<StringRef> &Lines = LineCache[LineInfo.FileName];
+  const char *Start = BufferStart;
+  for (const char *I = BufferStart; I != BufferEnd; ++I)
+    if (*I == '\n') {
+      Lines.emplace_back(Start, I - Start - (BufferStart < I && I[-1] == '\r'));
+      Start = I + 1;
     }
+  if (Start < BufferEnd)
+    Lines.emplace_back(Start, BufferEnd - Start);
   SourceCache[LineInfo.FileName] = std::move(Buffer);
   return true;
 }
 
-void SourcePrinter::printSourceLine(raw_ostream &OS, uint64_t Address,
+void SourcePrinter::printSourceLine(raw_ostream &OS,
+                                    object::SectionedAddress Address,
                                     StringRef Delimiter) {
   if (!Symbolizer)
     return;
+
   DILineInfo LineInfo = DILineInfo();
-  auto ExpectecLineInfo =
-      Symbolizer->symbolizeCode(Obj->getFileName(), Address);
-  if (!ExpectecLineInfo)
-    consumeError(ExpectecLineInfo.takeError());
+  auto ExpectedLineInfo = Symbolizer->symbolizeCode(*Obj, Address);
+  if (!ExpectedLineInfo)
+    consumeError(ExpectedLineInfo.takeError());
   else
-    LineInfo = *ExpectecLineInfo;
+    LineInfo = *ExpectedLineInfo;
 
-  if ((LineInfo.FileName == "<invalid>") || OldLineInfo.Line == LineInfo.Line ||
-      LineInfo.Line == 0)
+  if ((LineInfo.FileName == "<invalid>") || LineInfo.Line == 0 ||
+      ((OldLineInfo.Line == LineInfo.Line) &&
+       (OldLineInfo.FileName == LineInfo.FileName)))
     return;
 
   if (PrintLines)
@@ -986,53 +621,79 @@ void SourcePrinter::printSourceLine(raw_ostream &OS, uint64_t Address,
     if (SourceCache.find(LineInfo.FileName) == SourceCache.end())
       if (!cacheSource(LineInfo))
         return;
-    auto FileBuffer = SourceCache.find(LineInfo.FileName);
-    if (FileBuffer != SourceCache.end()) {
-      auto LineBuffer = LineCache.find(LineInfo.FileName);
-      if (LineBuffer != LineCache.end()) {
-        if (LineInfo.Line > LineBuffer->second.size())
-          return;
-        // Vector begins at 0, line numbers are non-zero
-        OS << Delimiter << LineBuffer->second[LineInfo.Line - 1].ltrim()
-           << "\n";
-      }
+    auto LineBuffer = LineCache.find(LineInfo.FileName);
+    if (LineBuffer != LineCache.end()) {
+      if (LineInfo.Line > LineBuffer->second.size())
+        return;
+      // Vector begins at 0, line numbers are non-zero
+      OS << Delimiter << LineBuffer->second[LineInfo.Line - 1] << '\n';
     }
   }
   OldLineInfo = LineInfo;
 }
 
+static bool isAArch64Elf(const ObjectFile *Obj) {
+  const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
+  return Elf && Elf->getEMachine() == ELF::EM_AARCH64;
+}
+
 static bool isArmElf(const ObjectFile *Obj) {
-  return (Obj->isELF() &&
-          (Obj->getArch() == Triple::aarch64 ||
-           Obj->getArch() == Triple::aarch64_be ||
-           Obj->getArch() == Triple::arm || Obj->getArch() == Triple::armeb ||
-           Obj->getArch() == Triple::thumb ||
-           Obj->getArch() == Triple::thumbeb));
+  const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
+  return Elf && Elf->getEMachine() == ELF::EM_ARM;
+}
+
+static bool hasMappingSymbols(const ObjectFile *Obj) {
+  return isArmElf(Obj) || isAArch64Elf(Obj);
+}
+
+static void printRelocation(const RelocationRef &Rel, uint64_t Address,
+                            bool Is64Bits) {
+  StringRef Fmt = Is64Bits ? "\t\t%016" PRIx64 ":  " : "\t\t\t%08" PRIx64 ":  ";
+  SmallString<16> Name;
+  SmallString<32> Val;
+  Rel.getTypeName(Name);
+  error(getRelocationValueString(Rel, Val));
+  outs() << format(Fmt.data(), Address) << Name << "\t" << Val << "\n";
 }
 
 class PrettyPrinter {
 public:
   virtual ~PrettyPrinter() = default;
   virtual void printInst(MCInstPrinter &IP, const MCInst *MI,
-                         ArrayRef<uint8_t> Bytes, uint64_t Address,
-                         raw_ostream &OS, StringRef Annot,
-                         MCSubtargetInfo const &STI, SourcePrinter *SP,
+                         ArrayRef<uint8_t> Bytes,
+                         object::SectionedAddress Address, raw_ostream &OS,
+                         StringRef Annot, MCSubtargetInfo const &STI,
+                         SourcePrinter *SP,
                          std::vector<RelocationRef> *Rels = nullptr) {
     if (SP && (PrintSource || PrintLines))
       SP->printSourceLine(OS, Address);
-    if (!NoLeadingAddr)
-      OS << format("%8" PRIx64 ":", Address);
-    if (!NoShowRawInsn) {
-      OS << "\t";
-      dumpBytes(Bytes, OS);
+
+    {
+      formatted_raw_ostream FOS(OS);
+      if (!NoLeadingAddr)
+        FOS << format("%8" PRIx64 ":", Address.Address);
+      if (!NoShowRawInsn) {
+        FOS << ' ';
+        dumpBytes(Bytes, FOS);
+      }
+      FOS.flush();
+      // The output of printInst starts with a tab. Print some spaces so that
+      // the tab has 1 column and advances to the target tab stop.
+      unsigned TabStop = NoShowRawInsn ? 16 : 40;
+      unsigned Column = FOS.getColumn();
+      FOS.indent(Column < TabStop - 1 ? TabStop - 1 - Column : 7 - Column % 8);
+
+      // The dtor calls flush() to ensure the indent comes before printInst().
     }
+
     if (MI)
       IP.printInst(MI, OS, "", STI);
     else
-      OS << " <unknown>";
+      OS << "\t<unknown>";
   }
 };
 PrettyPrinter PrettyPrinterInst;
+
 class HexagonPrettyPrinter : public PrettyPrinter {
 public:
   void printLead(ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -1044,17 +705,17 @@ public:
     if (!NoShowRawInsn) {
       OS << "\t";
       dumpBytes(Bytes.slice(0, 4), OS);
-      OS << format("%08" PRIx32, opcode);
+      OS << format("\t%08" PRIx32, opcode);
     }
   }
   void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
-                 uint64_t Address, raw_ostream &OS, StringRef Annot,
-                 MCSubtargetInfo const &STI, SourcePrinter *SP,
+                 object::SectionedAddress Address, raw_ostream &OS,
+                 StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
                  std::vector<RelocationRef> *Rels) override {
     if (SP && (PrintSource || PrintLines))
       SP->printSourceLine(OS, Address, "");
     if (!MI) {
-      printLead(Bytes, Address, OS);
+      printLead(Bytes, Address.Address, OS);
       OS << " <unknown>";
       return;
     }
@@ -1070,21 +731,15 @@ public:
     auto HeadTail = PacketBundle.first.split('\n');
     auto Preamble = " { ";
     auto Separator = "";
-    StringRef Fmt = "\t\t\t%08" PRIx64 ":  ";
-    std::vector<RelocationRef>::const_iterator RelCur = Rels->begin();
-    std::vector<RelocationRef>::const_iterator RelEnd = Rels->end();
 
     // Hexagon's packets require relocations to be inline rather than
     // clustered at the end of the packet.
+    std::vector<RelocationRef>::const_iterator RelCur = Rels->begin();
+    std::vector<RelocationRef>::const_iterator RelEnd = Rels->end();
     auto PrintReloc = [&]() -> void {
-      while ((RelCur != RelEnd) && (RelCur->getOffset() <= Address)) {
-        if (RelCur->getOffset() == Address) {
-          SmallString<16> Name;
-          SmallString<32> Val;
-          RelCur->getTypeName(Name);
-          error(getRelocationValueString(*RelCur, Val));
-          OS << Separator << format(Fmt.data(), Address) << Name << "\t" << Val
-                << "\n";
+      while ((RelCur != RelEnd) && (RelCur->getOffset() <= Address.Address)) {
+        if (RelCur->getOffset() == Address.Address) {
+          printRelocation(*RelCur, Address.Address, false);
           return;
         }
         ++RelCur;
@@ -1096,7 +751,7 @@ public:
       Separator = "\n";
       if (SP && (PrintSource || PrintLines))
         SP->printSourceLine(OS, Address, "");
-      printLead(Bytes, Address, OS);
+      printLead(Bytes, Address.Address, OS);
       OS << Preamble;
       Preamble = "   ";
       StringRef Inst;
@@ -1114,7 +769,7 @@ public:
         OS << " } " << PacketBundle.second;
       PrintReloc();
       Bytes = Bytes.slice(4);
-      Address += 4;
+      Address.Address += 4;
     }
   }
 };
@@ -1123,14 +778,12 @@ HexagonPrettyPrinter HexagonPrettyPrinterInst;
 class AMDGCNPrettyPrinter : public PrettyPrinter {
 public:
   void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
-                 uint64_t Address, raw_ostream &OS, StringRef Annot,
-                 MCSubtargetInfo const &STI, SourcePrinter *SP,
+                 object::SectionedAddress Address, raw_ostream &OS,
+                 StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
                  std::vector<RelocationRef> *Rels) override {
     if (SP && (PrintSource || PrintLines))
       SP->printSourceLine(OS, Address);
 
-    typedef support::ulittle32_t U32;
-
     if (MI) {
       SmallString<40> InstStr;
       raw_svector_ostream IS(InstStr);
@@ -1144,7 +797,7 @@ public:
       // remaining
       if (Bytes.size() >= 4) {
         OS << format("\t.long 0x%08" PRIx32 " ",
-                     static_cast<uint32_t>(*reinterpret_cast<const U32*>(Bytes.data())));
+                     support::endian::read32<support::little>(Bytes.data()));
         OS.indent(42);
       } else {
           OS << format("\t.byte 0x%02" PRIx8, Bytes[0]);
@@ -1154,20 +807,21 @@ public:
       }
     }
 
-    OS << format("// %012" PRIX64 ": ", Address);
-    if (Bytes.size() >=4) {
-      for (auto D : makeArrayRef(reinterpret_cast<const U32*>(Bytes.data()),
-                                 Bytes.size() / sizeof(U32)))
-        // D should be explicitly casted to uint32_t here as it is passed
-        // by format to snprintf as vararg.
-        OS << format("%08" PRIX32 " ", static_cast<uint32_t>(D));
+    OS << format("// %012" PRIX64 ":", Address.Address);
+    if (Bytes.size() >= 4) {
+      // D should be casted to uint32_t here as it is passed by format to
+      // snprintf as vararg.
+      for (uint32_t D : makeArrayRef(
+               reinterpret_cast<const support::little32_t *>(Bytes.data()),
+               Bytes.size() / 4))
+        OS << format(" %08" PRIX32, D);
     } else {
-      for (unsigned int i = 0; i < Bytes.size(); i++)
-        OS << format("%02" PRIX8 " ", Bytes[i]);
+      for (unsigned char B : Bytes)
+        OS << format(" %02" PRIX8, B);
     }
 
     if (!Annot.empty())
-      OS << "// " << Annot;
+      OS << " // " << Annot;
   }
 };
 AMDGCNPrettyPrinter AMDGCNPrettyPrinterInst;
@@ -1175,13 +829,13 @@ AMDGCNPrettyPrinter AMDGCNPrettyPrinterInst;
 class BPFPrettyPrinter : public PrettyPrinter {
 public:
   void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
-                 uint64_t Address, raw_ostream &OS, StringRef Annot,
-                 MCSubtargetInfo const &STI, SourcePrinter *SP,
+                 object::SectionedAddress Address, raw_ostream &OS,
+                 StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
                  std::vector<RelocationRef> *Rels) override {
     if (SP && (PrintSource || PrintLines))
       SP->printSourceLine(OS, Address);
     if (!NoLeadingAddr)
-      OS << format("%8" PRId64 ":", Address / 8);
+      OS << format("%8" PRId64 ":", Address.Address / 8);
     if (!NoShowRawInsn) {
       OS << "\t";
       dumpBytes(Bytes, OS);
@@ -1189,7 +843,7 @@ public:
     if (MI)
       IP.printInst(MI, OS, "", STI);
     else
-      OS << " <unknown>";
+      OS << "\t<unknown>";
   }
 };
 BPFPrettyPrinter BPFPrettyPrinterInst;
@@ -1227,27 +881,25 @@ addDynamicElfSymbols(const ELFObjectFile<ELFT> *Obj,
                      std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
   for (auto Symbol : Obj->getDynamicSymbolIterators()) {
     uint8_t SymbolType = Symbol.getELFType();
-    if (SymbolType != ELF::STT_FUNC || Symbol.getSize() == 0)
+    if (SymbolType == ELF::STT_SECTION)
       continue;
 
-    Expected<uint64_t> AddressOrErr = Symbol.getAddress();
-    if (!AddressOrErr)
-      report_error(Obj->getFileName(), AddressOrErr.takeError());
+    uint64_t Address = unwrapOrError(Symbol.getAddress(), Obj->getFileName());
+    // ELFSymbolRef::getAddress() returns size instead of value for common
+    // symbols which is not desirable for disassembly output. Overriding.
+    if (SymbolType == ELF::STT_COMMON)
+      Address = Obj->getSymbol(Symbol.getRawDataRefImpl())->st_value;
 
-    Expected<StringRef> Name = Symbol.getName();
-    if (!Name)
-      report_error(Obj->getFileName(), Name.takeError());
-    if (Name->empty())
+    StringRef Name = unwrapOrError(Symbol.getName(), Obj->getFileName());
+    if (Name.empty())
       continue;
 
-    Expected<section_iterator> SectionOrErr = Symbol.getSection();
-    if (!SectionOrErr)
-      report_error(Obj->getFileName(), SectionOrErr.takeError());
-    section_iterator SecI = *SectionOrErr;
+    section_iterator SecI =
+        unwrapOrError(Symbol.getSection(), Obj->getFileName());
     if (SecI == Obj->section_end())
       continue;
 
-    AllSymbols[*SecI].emplace_back(*AddressOrErr, *Name, SymbolType);
+    AllSymbols[*SecI].emplace_back(Address, Name, SymbolType);
   }
 }
 
@@ -1285,14 +937,10 @@ static void addPltEntries(const ObjectFile *Obj,
       SymbolRef Symbol(PltEntry.first, ElfObj);
       uint8_t SymbolType = getElfSymbolType(Obj, Symbol);
 
-      Expected<StringRef> NameOrErr = Symbol.getName();
-      if (!NameOrErr)
-        report_error(Obj->getFileName(), NameOrErr.takeError());
-      if (NameOrErr->empty())
-        continue;
-      StringRef Name = Saver.save((*NameOrErr + "@plt").str());
-
-      AllSymbols[*Plt].emplace_back(PltEntry.second, Name, SymbolType);
+      StringRef Name = unwrapOrError(Symbol.getName(), Obj->getFileName());
+      if (!Name.empty())
+        AllSymbols[*Plt].emplace_back(
+            PltEntry.second, Saver.save((Name + "@plt").str()), SymbolType);
     }
   }
 }
@@ -1301,10 +949,6 @@ static void addPltEntries(const ObjectFile *Obj,
 // returns the number of zero bytes that can be skipped when dumping the
 // disassembly of the instructions in Buf.
 static size_t countSkippableZeroBytes(ArrayRef<uint8_t> Buf) {
-  // When -z or --disassemble-zeroes are given we always dissasemble them.
-  if (DisassembleZeroes)
-    return 0;
-
   // Find the number of leading zeroes.
   size_t N = 0;
   while (N < Buf.size() && !Buf[N])
@@ -1320,108 +964,160 @@ static size_t countSkippableZeroBytes(ArrayRef<uint8_t> Buf) {
   return N & ~0x3;
 }
 
-static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
-  if (StartAddress > StopAddress)
-    error("Start address should be less than stop address");
-
-  const Target *TheTarget = getTarget(Obj);
-
-  // Package up features to be passed to target/subtarget
-  SubtargetFeatures Features = Obj->getFeatures();
-  if (!MAttrs.empty())
-    for (unsigned I = 0; I != MAttrs.size(); ++I)
-      Features.AddFeature(MAttrs[I]);
-
-  std::unique_ptr<const MCRegisterInfo> MRI(
-      TheTarget->createMCRegInfo(TripleName));
-  if (!MRI)
-    report_error(Obj->getFileName(), "no register info for target " +
-                 TripleName);
-
-  // Set up disassembler.
-  std::unique_ptr<const MCAsmInfo> AsmInfo(
-      TheTarget->createMCAsmInfo(*MRI, TripleName));
-  if (!AsmInfo)
-    report_error(Obj->getFileName(), "no assembly info for target " +
-                 TripleName);
-  std::unique_ptr<const MCSubtargetInfo> STI(
-      TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString()));
-  if (!STI)
-    report_error(Obj->getFileName(), "no subtarget info for target " +
-                 TripleName);
-  std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
-  if (!MII)
-    report_error(Obj->getFileName(), "no instruction info for target " +
-                 TripleName);
-  MCObjectFileInfo MOFI;
-  MCContext Ctx(AsmInfo.get(), MRI.get(), &MOFI);
-  // FIXME: for now initialize MCObjectFileInfo with default values
-  MOFI.InitMCObjectFileInfo(Triple(TripleName), false, Ctx);
-
-  std::unique_ptr<MCDisassembler> DisAsm(
-    TheTarget->createMCDisassembler(*STI, Ctx));
-  if (!DisAsm)
-    report_error(Obj->getFileName(), "no disassembler for target " +
-                 TripleName);
+// Returns a map from sections to their relocations.
+static std::map<SectionRef, std::vector<RelocationRef>>
+getRelocsMap(object::ObjectFile const &Obj) {
+  std::map<SectionRef, std::vector<RelocationRef>> Ret;
+  for (SectionRef Sec : Obj.sections()) {
+    section_iterator Relocated = Sec.getRelocatedSection();
+    if (Relocated == Obj.section_end() || !shouldKeep(*Relocated))
+      continue;
+    std::vector<RelocationRef> &V = Ret[*Relocated];
+    for (const RelocationRef &R : Sec.relocations())
+      V.push_back(R);
+    // Sort relocations by address.
+    llvm::stable_sort(V, isRelocAddressLess);
+  }
+  return Ret;
+}
 
-  std::unique_ptr<const MCInstrAnalysis> MIA(
-      TheTarget->createMCInstrAnalysis(MII.get()));
+// Used for --adjust-vma to check if address should be adjusted by the
+// specified value for a given section.
+// For ELF we do not adjust non-allocatable sections like debug ones,
+// because they are not loadable.
+// TODO: implement for other file formats.
+static bool shouldAdjustVA(const SectionRef &Section) {
+  const ObjectFile *Obj = Section.getObject();
+  if (isa<object::ELFObjectFileBase>(Obj))
+    return ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC;
+  return false;
+}
 
-  int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
-  std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
-      Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
-  if (!IP)
-    report_error(Obj->getFileName(), "no instruction printer for target " +
-                 TripleName);
-  IP->setPrintImmHex(PrintImmHex);
-  PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName));
 
-  StringRef Fmt = Obj->getBytesInAddress() > 4 ? "\t\t%016" PRIx64 ":  " :
-                                                 "\t\t\t%08" PRIx64 ":  ";
+typedef std::pair<uint64_t, char> MappingSymbolPair;
+static char getMappingSymbolKind(ArrayRef<MappingSymbolPair> MappingSymbols,
+                                 uint64_t Address) {
+  auto It =
+      partition_point(MappingSymbols, [Address](const MappingSymbolPair &Val) {
+        return Val.first <= Address;
+      });
+  // Return zero for any address before the first mapping symbol; this means
+  // we should use the default disassembly mode, depending on the target.
+  if (It == MappingSymbols.begin())
+    return '\x00';
+  return (It - 1)->second;
+}
 
-  SourcePrinter SP(Obj, TheTarget->getName());
+static uint64_t
+dumpARMELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
+               const ObjectFile *Obj, ArrayRef<uint8_t> Bytes,
+               ArrayRef<MappingSymbolPair> MappingSymbols) {
+  support::endianness Endian =
+      Obj->isLittleEndian() ? support::little : support::big;
+  while (Index < End) {
+    outs() << format("%8" PRIx64 ":", SectionAddr + Index);
+    outs() << "\t";
+    if (Index + 4 <= End) {
+      dumpBytes(Bytes.slice(Index, 4), outs());
+      outs() << "\t.word\t"
+             << format_hex(
+                    support::endian::read32(Bytes.data() + Index, Endian), 10);
+      Index += 4;
+    } else if (Index + 2 <= End) {
+      dumpBytes(Bytes.slice(Index, 2), outs());
+      outs() << "\t\t.short\t"
+             << format_hex(
+                    support::endian::read16(Bytes.data() + Index, Endian), 6);
+      Index += 2;
+    } else {
+      dumpBytes(Bytes.slice(Index, 1), outs());
+      outs() << "\t\t.byte\t" << format_hex(Bytes[0], 4);
+      ++Index;
+    }
+    outs() << "\n";
+    if (getMappingSymbolKind(MappingSymbols, Index) != 'd')
+      break;
+  }
+  return Index;
+}
 
-  // Create a mapping, RelocSecs = SectionRelocMap[S], where sections
-  // in RelocSecs contain the relocations for section S.
-  std::error_code EC;
-  std::map<SectionRef, SmallVector<SectionRef, 1>> SectionRelocMap;
-  for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
-    section_iterator Sec2 = Section.getRelocatedSection();
-    if (Sec2 != Obj->section_end())
-      SectionRelocMap[*Sec2].push_back(Section);
+static void dumpELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
+                        ArrayRef<uint8_t> Bytes) {
+  // print out data up to 8 bytes at a time in hex and ascii
+  uint8_t AsciiData[9] = {'\0'};
+  uint8_t Byte;
+  int NumBytes = 0;
+
+  for (; Index < End; ++Index) {
+    if (NumBytes == 0)
+      outs() << format("%8" PRIx64 ":", SectionAddr + Index);
+    Byte = Bytes.slice(Index)[0];
+    outs() << format(" %02x", Byte);
+    AsciiData[NumBytes] = isPrint(Byte) ? Byte : '.';
+
+    uint8_t IndentOffset = 0;
+    NumBytes++;
+    if (Index == End - 1 || NumBytes > 8) {
+      // Indent the space for less than 8 bytes data.
+      // 2 spaces for byte and one for space between bytes
+      IndentOffset = 3 * (8 - NumBytes);
+      for (int Excess = NumBytes; Excess < 8; Excess++)
+        AsciiData[Excess] = '\0';
+      NumBytes = 8;
+    }
+    if (NumBytes == 8) {
+      AsciiData[8] = '\0';
+      outs() << std::string(IndentOffset, ' ') << "         ";
+      outs() << reinterpret_cast<char *>(AsciiData);
+      outs() << '\n';
+      NumBytes = 0;
+    }
   }
+}
+
+static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
+                              MCContext &Ctx, MCDisassembler *PrimaryDisAsm,
+                              MCDisassembler *SecondaryDisAsm,
+                              const MCInstrAnalysis *MIA, MCInstPrinter *IP,
+                              const MCSubtargetInfo *PrimarySTI,
+                              const MCSubtargetInfo *SecondarySTI,
+                              PrettyPrinter &PIP,
+                              SourcePrinter &SP, bool InlineRelocs) {
+  const MCSubtargetInfo *STI = PrimarySTI;
+  MCDisassembler *DisAsm = PrimaryDisAsm;
+  bool PrimaryIsThumb = false;
+  if (isArmElf(Obj))
+    PrimaryIsThumb = STI->checkFeatures("+thumb-mode");
+
+  std::map<SectionRef, std::vector<RelocationRef>> RelocMap;
+  if (InlineRelocs)
+    RelocMap = getRelocsMap(*Obj);
+  bool Is64Bits = Obj->getBytesInAddress() > 4;
 
   // Create a mapping from virtual address to symbol name.  This is used to
   // pretty print the symbols while disassembling.
   std::map<SectionRef, SectionSymbolsTy> AllSymbols;
   SectionSymbolsTy AbsoluteSymbols;
+  const StringRef FileName = Obj->getFileName();
   for (const SymbolRef &Symbol : Obj->symbols()) {
-    Expected<uint64_t> AddressOrErr = Symbol.getAddress();
-    if (!AddressOrErr)
-      report_error(Obj->getFileName(), AddressOrErr.takeError());
-    uint64_t Address = *AddressOrErr;
-
-    Expected<StringRef> Name = Symbol.getName();
-    if (!Name)
-      report_error(Obj->getFileName(), Name.takeError());
-    if (Name->empty())
-      continue;
+    uint64_t Address = unwrapOrError(Symbol.getAddress(), FileName);
 
-    Expected<section_iterator> SectionOrErr = Symbol.getSection();
-    if (!SectionOrErr)
-      report_error(Obj->getFileName(), SectionOrErr.takeError());
+    StringRef Name = unwrapOrError(Symbol.getName(), FileName);
+    if (Name.empty())
+      continue;
 
     uint8_t SymbolType = ELF::STT_NOTYPE;
-    if (Obj->isELF())
+    if (Obj->isELF()) {
       SymbolType = getElfSymbolType(Obj, Symbol);
+      if (SymbolType == ELF::STT_SECTION)
+        continue;
+    }
 
-    section_iterator SecI = *SectionOrErr;
+    section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
     if (SecI != Obj->section_end())
-      AllSymbols[*SecI].emplace_back(Address, *Name, SymbolType);
+      AllSymbols[*SecI].emplace_back(Address, Name, SymbolType);
     else
-      AbsoluteSymbols.emplace_back(Address, *Name, SymbolType);
-
-
+      AbsoluteSymbols.emplace_back(Address, Name, SymbolType);
   }
   if (AllSymbols.empty() && Obj->isELF())
     addDynamicElfSymbols(Obj, AllSymbols);
@@ -1448,31 +1144,28 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
       error(ExportEntry.getExportRVA(RVA));
 
       uint64_t VA = COFFObj->getImageBase() + RVA;
-      auto Sec = std::upper_bound(
-          SectionAddresses.begin(), SectionAddresses.end(), VA,
-          [](uint64_t LHS, const std::pair<uint64_t, SectionRef> &RHS) {
-            return LHS < RHS.first;
+      auto Sec = partition_point(
+          SectionAddresses, [VA](const std::pair<uint64_t, SectionRef> &O) {
+            return O.first <= VA;
           });
-      if (Sec != SectionAddresses.begin())
+      if (Sec != SectionAddresses.begin()) {
         --Sec;
-      else
-        Sec = SectionAddresses.end();
-
-      if (Sec != SectionAddresses.end())
         AllSymbols[Sec->second].emplace_back(VA, Name, ELF::STT_NOTYPE);
-      else
+      } else
         AbsoluteSymbols.emplace_back(VA, Name, ELF::STT_NOTYPE);
     }
   }
 
   // Sort all the symbols, this allows us to use a simple binary search to find
   // a symbol near an address.
+  StringSet<> FoundDisasmFuncsSet;
   for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
     array_pod_sort(SecSyms.second.begin(), SecSyms.second.end());
   array_pod_sort(AbsoluteSymbols.begin(), AbsoluteSymbols.end());
 
   for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
-    if (!DisassembleAll && (!Section.isText() || Section.isVirtual()))
+    if (FilterSections.empty() && !DisassembleAll &&
+        (!Section.isText() || Section.isVirtual()))
       continue;
 
     uint64_t SectionAddr = Section.getAddress();
@@ -1482,25 +1175,23 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
 
     // Get the list of all the symbols in this section.
     SectionSymbolsTy &Symbols = AllSymbols[Section];
-    std::vector<uint64_t> DataMappingSymsAddr;
-    std::vector<uint64_t> TextMappingSymsAddr;
-    if (isArmElf(Obj)) {
+    std::vector<MappingSymbolPair> MappingSymbols;
+    if (hasMappingSymbols(Obj)) {
       for (const auto &Symb : Symbols) {
         uint64_t Address = std::get<0>(Symb);
         StringRef Name = std::get<1>(Symb);
         if (Name.startswith("$d"))
-          DataMappingSymsAddr.push_back(Address - SectionAddr);
+          MappingSymbols.emplace_back(Address - SectionAddr, 'd');
         if (Name.startswith("$x"))
-          TextMappingSymsAddr.push_back(Address - SectionAddr);
+          MappingSymbols.emplace_back(Address - SectionAddr, 'x');
         if (Name.startswith("$a"))
-          TextMappingSymsAddr.push_back(Address - SectionAddr);
+          MappingSymbols.emplace_back(Address - SectionAddr, 'a');
         if (Name.startswith("$t"))
-          TextMappingSymsAddr.push_back(Address - SectionAddr);
+          MappingSymbols.emplace_back(Address - SectionAddr, 't');
       }
     }
 
-    llvm::sort(DataMappingSymsAddr);
-    llvm::sort(TextMappingSymsAddr);
+    llvm::sort(MappingSymbols);
 
     if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
       // AMDGPU disassembler uses symbolizer for printing labels
@@ -1514,19 +1205,6 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
       }
     }
 
-    // Make a list of all the relocations for this section.
-    std::vector<RelocationRef> Rels;
-    if (InlineRelocs) {
-      for (const SectionRef &RelocSec : SectionRelocMap[Section]) {
-        for (const RelocationRef &Reloc : RelocSec.relocations()) {
-          Rels.push_back(Reloc);
-        }
-      }
-    }
-
-    // Sort relocations by address.
-    llvm::sort(Rels, isRelocAddressLess);
-
     StringRef SegmentName = "";
     if (const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj)) {
       DataRefImpl DR = Section.getRawDataRefImpl();
@@ -1546,56 +1224,54 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
     SmallString<40> Comments;
     raw_svector_ostream CommentStream(Comments);
 
-    StringRef BytesStr;
-    error(Section.getContents(BytesStr));
-    ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(BytesStr.data()),
-                            BytesStr.size());
+    ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(
+        unwrapOrError(Section.getContents(), Obj->getFileName()));
+
+    uint64_t VMAAdjustment = 0;
+    if (shouldAdjustVA(Section))
+      VMAAdjustment = AdjustVMA;
 
     uint64_t Size;
     uint64_t Index;
     bool PrintedSection = false;
-
+    std::vector<RelocationRef> Rels = RelocMap[Section];
     std::vector<RelocationRef>::const_iterator RelCur = Rels.begin();
     std::vector<RelocationRef>::const_iterator RelEnd = Rels.end();
     // Disassemble symbol by symbol.
     for (unsigned SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
-      uint64_t Start = std::get<0>(Symbols[SI]) - SectionAddr;
-      // The end is either the section end or the beginning of the next
-      // symbol.
-      uint64_t End = (SI == SE - 1)
-                         ? SectSize
-                         : std::get<0>(Symbols[SI + 1]) - SectionAddr;
-      // Don't try to disassemble beyond the end of section contents.
-      if (End > SectSize)
-        End = SectSize;
-      // If this symbol has the same address as the next symbol, then skip it.
-      if (Start >= End)
-        continue;
+      std::string SymbolName = std::get<1>(Symbols[SI]).str();
+      if (Demangle)
+        SymbolName = demangle(SymbolName);
 
-      // Check if we need to skip symbol
-      // Skip if the symbol's data is not between StartAddress and StopAddress
-      if (End + SectionAddr < StartAddress ||
-          Start + SectionAddr > StopAddress) {
+      // Skip if --disassemble-functions is not empty and the symbol is not in
+      // the list.
+      if (!DisasmFuncsSet.empty() && !DisasmFuncsSet.count(SymbolName))
         continue;
-      }
 
-      /// Skip if user requested specific symbols and this is not in the list
-      if (!DisasmFuncsSet.empty() &&
-          !DisasmFuncsSet.count(std::get<1>(Symbols[SI])))
+      uint64_t Start = std::get<0>(Symbols[SI]);
+      if (Start < SectionAddr || StopAddress <= Start)
         continue;
+      else
+        FoundDisasmFuncsSet.insert(SymbolName);
+
+      // The end is the section end, the beginning of the next symbol, or
+      // --stop-address.
+      uint64_t End = std::min<uint64_t>(SectionAddr + SectSize, StopAddress);
+      if (SI + 1 < SE)
+        End = std::min(End, std::get<0>(Symbols[SI + 1]));
+      if (Start >= End || End <= StartAddress)
+        continue;
+      Start -= SectionAddr;
+      End -= SectionAddr;
 
       if (!PrintedSection) {
         PrintedSection = true;
-        outs() << "Disassembly of section ";
+        outs() << "\nDisassembly of section ";
         if (!SegmentName.empty())
           outs() << SegmentName << ",";
-        outs() << SectionName << ':';
+        outs() << SectionName << ":\n";
       }
 
-      // Stop disassembly at the stop address specified
-      if (End + SectionAddr > StopAddress)
-        End = StopAddress - SectionAddr;
-
       if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
         if (std::get<2>(Symbols[SI]) == ELF::STT_AMDGPU_HSA_KERNEL) {
           // skip amd_kernel_code_t at the begining of kernel symbol (256 bytes)
@@ -1615,13 +1291,10 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
 
       outs() << '\n';
       if (!NoLeadingAddr)
-        outs() << format("%016" PRIx64 " ", SectionAddr + Start);
+        outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ",
+                         SectionAddr + Start + VMAAdjustment);
 
-      StringRef SymbolName = std::get<1>(Symbols[SI]);
-      if (Demangle)
-        outs() << demangle(SymbolName) << ":\n";
-      else
-        outs() << SymbolName << ":\n";
+      outs() << SymbolName << ":\n";
 
       // Don't print raw contents of a virtual section. A virtual section
       // doesn't have any contents in the file.
@@ -1636,143 +1309,82 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
       raw_ostream &DebugOut = nulls();
 #endif
 
-      for (Index = Start; Index < End; Index += Size) {
-        MCInst Inst;
+      // Some targets (like WebAssembly) have a special prelude at the start
+      // of each symbol.
+      DisAsm->onSymbolStart(SymbolName, Size, Bytes.slice(Start, End - Start),
+                            SectionAddr + Start, DebugOut, CommentStream);
+      Start += Size;
+
+      Index = Start;
+      if (SectionAddr < StartAddress)
+        Index = std::max<uint64_t>(Index, StartAddress - SectionAddr);
+
+      // If there is a data/common symbol inside an ELF text section and we are
+      // only disassembling text (applicable all architectures), we are in a
+      // situation where we must print the data and not disassemble it.
+      if (Obj->isELF() && !DisassembleAll && Section.isText()) {
+        uint8_t SymTy = std::get<2>(Symbols[SI]);
+        if (SymTy == ELF::STT_OBJECT || SymTy == ELF::STT_COMMON) {
+          dumpELFData(SectionAddr, Index, End, Bytes);
+          Index = End;
+        }
+      }
 
-        if (Index + SectionAddr < StartAddress ||
-            Index + SectionAddr > StopAddress) {
-          // skip byte by byte till StartAddress is reached
-          Size = 1;
+      bool CheckARMELFData = hasMappingSymbols(Obj) &&
+                             std::get<2>(Symbols[SI]) != ELF::STT_OBJECT &&
+                             !DisassembleAll;
+      while (Index < End) {
+        // ARM and AArch64 ELF binaries can interleave data and text in the
+        // same section. We rely on the markers introduced to understand what
+        // we need to dump. If the data marker is within a function, it is
+        // denoted as a word/short etc.
+        if (CheckARMELFData &&
+            getMappingSymbolKind(MappingSymbols, Index) == 'd') {
+          Index = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes,
+                                 MappingSymbols);
           continue;
         }
-        // AArch64 ELF binaries can interleave data and text in the
-        // same section. We rely on the markers introduced to
-        // understand what we need to dump. If the data marker is within a
-        // function, it is denoted as a word/short etc
-        if (isArmElf(Obj) && std::get<2>(Symbols[SI]) != ELF::STT_OBJECT &&
-            !DisassembleAll) {
-          uint64_t Stride = 0;
-
-          auto DAI = std::lower_bound(DataMappingSymsAddr.begin(),
-                                      DataMappingSymsAddr.end(), Index);
-          if (DAI != DataMappingSymsAddr.end() && *DAI == Index) {
-            // Switch to data.
-            while (Index < End) {
-              outs() << format("%8" PRIx64 ":", SectionAddr + Index);
-              outs() << "\t";
-              if (Index + 4 <= End) {
-                Stride = 4;
-                dumpBytes(Bytes.slice(Index, 4), outs());
-                outs() << "\t.word\t";
-                uint32_t Data = 0;
-                if (Obj->isLittleEndian()) {
-                  const auto Word =
-                      reinterpret_cast<const support::ulittle32_t *>(
-                          Bytes.data() + Index);
-                  Data = *Word;
-                } else {
-                  const auto Word = reinterpret_cast<const support::ubig32_t *>(
-                      Bytes.data() + Index);
-                  Data = *Word;
-                }
-                outs() << "0x" << format("%08" PRIx32, Data);
-              } else if (Index + 2 <= End) {
-                Stride = 2;
-                dumpBytes(Bytes.slice(Index, 2), outs());
-                outs() << "\t\t.short\t";
-                uint16_t Data = 0;
-                if (Obj->isLittleEndian()) {
-                  const auto Short =
-                      reinterpret_cast<const support::ulittle16_t *>(
-                          Bytes.data() + Index);
-                  Data = *Short;
-                } else {
-                  const auto Short =
-                      reinterpret_cast<const support::ubig16_t *>(Bytes.data() +
-                                                                  Index);
-                  Data = *Short;
-                }
-                outs() << "0x" << format("%04" PRIx16, Data);
-              } else {
-                Stride = 1;
-                dumpBytes(Bytes.slice(Index, 1), outs());
-                outs() << "\t\t.byte\t";
-                outs() << "0x" << format("%02" PRIx8, Bytes.slice(Index, 1)[0]);
-              }
-              Index += Stride;
-              outs() << "\n";
-              auto TAI = std::lower_bound(TextMappingSymsAddr.begin(),
-                                          TextMappingSymsAddr.end(), Index);
-              if (TAI != TextMappingSymsAddr.end() && *TAI == Index)
-                break;
-            }
+
+        // When -z or --disassemble-zeroes are given we always dissasemble
+        // them. Otherwise we might want to skip zero bytes we see.
+        if (!DisassembleZeroes) {
+          uint64_t MaxOffset = End - Index;
+          // For -reloc: print zero blocks patched by relocations, so that
+          // relocations can be shown in the dump.
+          if (RelCur != RelEnd)
+            MaxOffset = RelCur->getOffset() - Index;
+
+          if (size_t N =
+                  countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) {
+            outs() << "\t\t..." << '\n';
+            Index += N;
+            continue;
           }
         }
 
-        // If there is a data symbol inside an ELF text section and we are only
-        // disassembling text (applicable all architectures),
-        // we are in a situation where we must print the data and not
-        // disassemble it.
-        if (Obj->isELF() && std::get<2>(Symbols[SI]) == ELF::STT_OBJECT &&
-            !DisassembleAll && Section.isText()) {
-          // print out data up to 8 bytes at a time in hex and ascii
-          uint8_t AsciiData[9] = {'\0'};
-          uint8_t Byte;
-          int NumBytes = 0;
-
-          for (Index = Start; Index < End; Index += 1) {
-            if (((SectionAddr + Index) < StartAddress) ||
-                ((SectionAddr + Index) > StopAddress))
-              continue;
-            if (NumBytes == 0) {
-              outs() << format("%8" PRIx64 ":", SectionAddr + Index);
-              outs() << "\t";
-            }
-            Byte = Bytes.slice(Index)[0];
-            outs() << format(" %02x", Byte);
-            AsciiData[NumBytes] = isPrint(Byte) ? Byte : '.';
-
-            uint8_t IndentOffset = 0;
-            NumBytes++;
-            if (Index == End - 1 || NumBytes > 8) {
-              // Indent the space for less than 8 bytes data.
-              // 2 spaces for byte and one for space between bytes
-              IndentOffset = 3 * (8 - NumBytes);
-              for (int Excess = 8 - NumBytes; Excess < 8; Excess++)
-                AsciiData[Excess] = '\0';
-              NumBytes = 8;
-            }
-            if (NumBytes == 8) {
-              AsciiData[8] = '\0';
-              outs() << std::string(IndentOffset, ' ') << "         ";
-              outs() << reinterpret_cast<char *>(AsciiData);
-              outs() << '\n';
-              NumBytes = 0;
-            }
+        if (SecondarySTI) {
+          if (getMappingSymbolKind(MappingSymbols, Index) == 'a') {
+            STI = PrimaryIsThumb ? SecondarySTI : PrimarySTI;
+            DisAsm = PrimaryIsThumb ? SecondaryDisAsm : PrimaryDisAsm;
+          } else if (getMappingSymbolKind(MappingSymbols, Index) == 't') {
+            STI = PrimaryIsThumb ? PrimarySTI : SecondarySTI;
+            DisAsm = PrimaryIsThumb ? PrimaryDisAsm : SecondaryDisAsm;
           }
         }
-        if (Index >= End)
-          break;
-
-        if (size_t N =
-                countSkippableZeroBytes(Bytes.slice(Index, End - Index))) {
-          outs() << "\t\t..." << '\n';
-          Index += N;
-          if (Index >= End)
-            break;
-        }
 
         // Disassemble a real instruction or a data when disassemble all is
         // provided
-        bool Disassembled = DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
-                                                   SectionAddr + Index, DebugOut,
-                                                   CommentStream);
+        MCInst Inst;
+        bool Disassembled = DisAsm->getInstruction(
+            Inst, Size, Bytes.slice(Index), SectionAddr + Index, DebugOut,
+            CommentStream);
         if (Size == 0)
           Size = 1;
 
-        PIP.printInst(*IP, Disassembled ? &Inst : nullptr,
-                      Bytes.slice(Index, Size), SectionAddr + Index, outs(), "",
-                      *STI, &SP, &Rels);
+        PIP.printInst(
+            *IP, Disassembled ? &Inst : nullptr, Bytes.slice(Index, Size),
+            {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, outs(),
+            "", *STI, &SP, &Rels);
         outs() << CommentStream.str();
         Comments.clear();
 
@@ -1791,37 +1403,34 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
             // N.B. We don't walk the relocations in the relocatable case yet.
             auto *TargetSectionSymbols = &Symbols;
             if (!Obj->isRelocatableObject()) {
-              auto SectionAddress = std::upper_bound(
-                  SectionAddresses.begin(), SectionAddresses.end(), Target,
-                  [](uint64_t LHS,
-                      const std::pair<uint64_t, SectionRef> &RHS) {
-                    return LHS < RHS.first;
+              auto It = partition_point(
+                  SectionAddresses,
+                  [=](const std::pair<uint64_t, SectionRef> &O) {
+                    return O.first <= Target;
                   });
-              if (SectionAddress != SectionAddresses.begin()) {
-                --SectionAddress;
-                TargetSectionSymbols = &AllSymbols[SectionAddress->second];
+              if (It != SectionAddresses.begin()) {
+                --It;
+                TargetSectionSymbols = &AllSymbols[It->second];
               } else {
                 TargetSectionSymbols = &AbsoluteSymbols;
               }
             }
 
-            // Find the first symbol in the section whose offset is less than
+            // Find the last symbol in the section whose offset is less than
             // or equal to the target. If there isn't a section that contains
             // the target, find the nearest preceding absolute symbol.
-            auto TargetSym = std::upper_bound(
-                TargetSectionSymbols->begin(), TargetSectionSymbols->end(),
-                Target, [](uint64_t LHS,
-                           const std::tuple<uint64_t, StringRef, uint8_t> &RHS) {
-                  return LHS < std::get<0>(RHS);
+            auto TargetSym = partition_point(
+                *TargetSectionSymbols,
+                [=](const std::tuple<uint64_t, StringRef, uint8_t> &O) {
+                  return std::get<0>(O) <= Target;
                 });
             if (TargetSym == TargetSectionSymbols->begin()) {
               TargetSectionSymbols = &AbsoluteSymbols;
-              TargetSym = std::upper_bound(
-                  AbsoluteSymbols.begin(), AbsoluteSymbols.end(),
-                  Target, [](uint64_t LHS,
-                             const std::tuple<uint64_t, StringRef, uint8_t> &RHS) {
-                            return LHS < std::get<0>(RHS);
-                          });
+              TargetSym = partition_point(
+                  AbsoluteSymbols,
+                  [=](const std::tuple<uint64_t, StringRef, uint8_t> &O) {
+                    return std::get<0>(O) <= Target;
+                  });
             }
             if (TargetSym != TargetSectionSymbols->begin()) {
               --TargetSym;
@@ -1838,34 +1447,125 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
         outs() << "\n";
 
         // Hexagon does this in pretty printer
-        if (Obj->getArch() != Triple::hexagon)
+        if (Obj->getArch() != Triple::hexagon) {
           // Print relocation for instruction.
           while (RelCur != RelEnd) {
-            uint64_t Addr = RelCur->getOffset();
-            SmallString<16> Name;
-            SmallString<32> Val;
-
+            uint64_t Offset = RelCur->getOffset();
             // If this relocation is hidden, skip it.
-            if (getHidden(*RelCur) || ((SectionAddr + Addr) < StartAddress)) {
+            if (getHidden(*RelCur) || SectionAddr + Offset < StartAddress) {
               ++RelCur;
               continue;
             }
 
-            // Stop when rel_cur's address is past the current instruction.
-            if (Addr >= Index + Size)
+            // Stop when RelCur's offset is past the current instruction.
+            if (Offset >= Index + Size)
               break;
-            RelCur->getTypeName(Name);
-            error(getRelocationValueString(*RelCur, Val));
-            outs() << format(Fmt.data(), SectionAddr + Addr) << Name << "\t"
-                   << Val << "\n";
+
+            // When --adjust-vma is used, update the address printed.
+            if (RelCur->getSymbol() != Obj->symbol_end()) {
+              Expected<section_iterator> SymSI =
+                  RelCur->getSymbol()->getSection();
+              if (SymSI && *SymSI != Obj->section_end() &&
+                  shouldAdjustVA(**SymSI))
+                Offset += AdjustVMA;
+            }
+
+            printRelocation(*RelCur, SectionAddr + Offset, Is64Bits);
             ++RelCur;
           }
+        }
+
+        Index += Size;
       }
     }
   }
+  StringSet<> MissingDisasmFuncsSet =
+      set_difference(DisasmFuncsSet, FoundDisasmFuncsSet);
+  for (StringRef MissingDisasmFunc : MissingDisasmFuncsSet.keys())
+    warn("failed to disassemble missing function " + MissingDisasmFunc);
+}
+
+static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
+  const Target *TheTarget = getTarget(Obj);
+
+  // Package up features to be passed to target/subtarget
+  SubtargetFeatures Features = Obj->getFeatures();
+  if (!MAttrs.empty())
+    for (unsigned I = 0; I != MAttrs.size(); ++I)
+      Features.AddFeature(MAttrs[I]);
+
+  std::unique_ptr<const MCRegisterInfo> MRI(
+      TheTarget->createMCRegInfo(TripleName));
+  if (!MRI)
+    report_error(Obj->getFileName(),
+                 "no register info for target " + TripleName);
+
+  // Set up disassembler.
+  std::unique_ptr<const MCAsmInfo> AsmInfo(
+      TheTarget->createMCAsmInfo(*MRI, TripleName));
+  if (!AsmInfo)
+    report_error(Obj->getFileName(),
+                 "no assembly info for target " + TripleName);
+  std::unique_ptr<const MCSubtargetInfo> STI(
+      TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString()));
+  if (!STI)
+    report_error(Obj->getFileName(),
+                 "no subtarget info for target " + TripleName);
+  std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
+  if (!MII)
+    report_error(Obj->getFileName(),
+                 "no instruction info for target " + TripleName);
+  MCObjectFileInfo MOFI;
+  MCContext Ctx(AsmInfo.get(), MRI.get(), &MOFI);
+  // FIXME: for now initialize MCObjectFileInfo with default values
+  MOFI.InitMCObjectFileInfo(Triple(TripleName), false, Ctx);
+
+  std::unique_ptr<MCDisassembler> DisAsm(
+      TheTarget->createMCDisassembler(*STI, Ctx));
+  if (!DisAsm)
+    report_error(Obj->getFileName(),
+                 "no disassembler for target " + TripleName);
+
+  // If we have an ARM object file, we need a second disassembler, because
+  // ARM CPUs have two different instruction sets: ARM mode, and Thumb mode.
+  // We use mapping symbols to switch between the two assemblers, where
+  // appropriate.
+  std::unique_ptr<MCDisassembler> SecondaryDisAsm;
+  std::unique_ptr<const MCSubtargetInfo> SecondarySTI;
+  if (isArmElf(Obj) && !STI->checkFeatures("+mclass")) {
+    if (STI->checkFeatures("+thumb-mode"))
+      Features.AddFeature("-thumb-mode");
+    else
+      Features.AddFeature("+thumb-mode");
+    SecondarySTI.reset(TheTarget->createMCSubtargetInfo(TripleName, MCPU,
+                                                        Features.getString()));
+    SecondaryDisAsm.reset(TheTarget->createMCDisassembler(*SecondarySTI, Ctx));
+  }
+
+  std::unique_ptr<const MCInstrAnalysis> MIA(
+      TheTarget->createMCInstrAnalysis(MII.get()));
+
+  int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
+  std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
+      Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
+  if (!IP)
+    report_error(Obj->getFileName(),
+                 "no instruction printer for target " + TripleName);
+  IP->setPrintImmHex(PrintImmHex);
+
+  PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName));
+  SourcePrinter SP(Obj, TheTarget->getName());
+
+  for (StringRef Opt : DisassemblerOptions)
+    if (!IP->applyTargetSpecificCLOption(Opt))
+      error("Unrecognized disassembler option: " + Opt);
+
+  disassembleObject(TheTarget, Obj, Ctx, DisAsm.get(), SecondaryDisAsm.get(),
+                    MIA.get(), IP.get(), STI.get(), SecondarySTI.get(), PIP,
+                    SP, InlineRelocs);
 }
 
-void llvm::printRelocations(const ObjectFile *Obj) {
+void printRelocations(const ObjectFile *Obj) {
   StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 :
                                                  "%08" PRIx64;
   // Regular objdump doesn't print relocations in non-relocatable object
@@ -1873,28 +1573,40 @@ void llvm::printRelocations(const ObjectFile *Obj) {
   if (!Obj->isRelocatableObject())
     return;
 
+  // Build a mapping from relocation target to a vector of relocation
+  // sections. Usually, there is an only one relocation section for
+  // each relocated section.
+  MapVector<SectionRef, std::vector<SectionRef>> SecToRelSec;
   for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
     if (Section.relocation_begin() == Section.relocation_end())
       continue;
+    const SectionRef TargetSec = *Section.getRelocatedSection();
+    SecToRelSec[TargetSec].push_back(Section);
+  }
+
+  for (std::pair<SectionRef, std::vector<SectionRef>> &P : SecToRelSec) {
     StringRef SecName;
-    error(Section.getName(SecName));
+    error(P.first.getName(SecName));
     outs() << "RELOCATION RECORDS FOR [" << SecName << "]:\n";
-    for (const RelocationRef &Reloc : Section.relocations()) {
-      uint64_t Address = Reloc.getOffset();
-      SmallString<32> RelocName;
-      SmallString<32> ValueStr;
-      if (Address < StartAddress || Address > StopAddress || getHidden(Reloc))
-        continue;
-      Reloc.getTypeName(RelocName);
-      error(getRelocationValueString(Reloc, ValueStr));
-      outs() << format(Fmt.data(), Address) << " " << RelocName << " "
-             << ValueStr << "\n";
+
+    for (SectionRef Section : P.second) {
+      for (const RelocationRef &Reloc : Section.relocations()) {
+        uint64_t Address = Reloc.getOffset();
+        SmallString<32> RelocName;
+        SmallString<32> ValueStr;
+        if (Address < StartAddress || Address > StopAddress || getHidden(Reloc))
+          continue;
+        Reloc.getTypeName(RelocName);
+        error(getRelocationValueString(Reloc, ValueStr));
+        outs() << format(Fmt.data(), Address) << " " << RelocName << " "
+               << ValueStr << "\n";
+      }
     }
     outs() << "\n";
   }
 }
 
-void llvm::printDynamicRelocations(const ObjectFile *Obj) {
+void printDynamicRelocations(const ObjectFile *Obj) {
   // For the moment, this option is for ELF only
   if (!Obj->isELF())
     return;
@@ -1911,9 +1623,7 @@ void llvm::printDynamicRelocations(const ObjectFile *Obj) {
 
   outs() << "DYNAMIC RELOCATION RECORDS\n";
   StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
-  for (const SectionRef &Section : DynRelSec) {
-    if (Section.relocation_begin() == Section.relocation_end())
-      continue;
+  for (const SectionRef &Section : DynRelSec)
     for (const RelocationRef &Reloc : Section.relocations()) {
       uint64_t Address = Reloc.getOffset();
       SmallString<32> RelocName;
@@ -1923,34 +1633,60 @@ void llvm::printDynamicRelocations(const ObjectFile *Obj) {
       outs() << format(Fmt.data(), Address) << " " << RelocName << " "
              << ValueStr << "\n";
     }
-  }
 }
 
-void llvm::printSectionHeaders(const ObjectFile *Obj) {
-  outs() << "Sections:\n"
-            "Idx Name          Size      Address          Type\n";
+// Returns true if we need to show LMA column when dumping section headers. We
+// show it only when the platform is ELF and either we have at least one section
+// whose VMA and LMA are different and/or when --show-lma flag is used.
+static bool shouldDisplayLMA(const ObjectFile *Obj) {
+  if (!Obj->isELF())
+    return false;
+  for (const SectionRef &S : ToolSectionFilter(*Obj))
+    if (S.getAddress() != getELFSectionLMA(S))
+      return true;
+  return ShowLMA;
+}
+
+void printSectionHeaders(const ObjectFile *Obj) {
+  bool HasLMAColumn = shouldDisplayLMA(Obj);
+  if (HasLMAColumn)
+    outs() << "Sections:\n"
+              "Idx Name          Size     VMA              LMA              "
+              "Type\n";
+  else
+    outs() << "Sections:\n"
+              "Idx Name          Size     VMA          Type\n";
+
   for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
     StringRef Name;
     error(Section.getName(Name));
-    uint64_t Address = Section.getAddress();
+    uint64_t VMA = Section.getAddress();
+    if (shouldAdjustVA(Section))
+      VMA += AdjustVMA;
+
     uint64_t Size = Section.getSize();
     bool Text = Section.isText();
     bool Data = Section.isData();
     bool BSS = Section.isBSS();
     std::string Type = (std::string(Text ? "TEXT " : "") +
                         (Data ? "DATA " : "") + (BSS ? "BSS" : ""));
-    outs() << format("%3d %-13s %08" PRIx64 " %016" PRIx64 " %s\n",
-                     (unsigned)Section.getIndex(), Name.str().c_str(), Size,
-                     Address, Type.c_str());
+
+    if (HasLMAColumn)
+      outs() << format("%3d %-13s %08" PRIx64 " %016" PRIx64 " %016" PRIx64
+                       " %s\n",
+                       (unsigned)Section.getIndex(), Name.str().c_str(), Size,
+                       VMA, getELFSectionLMA(Section), Type.c_str());
+    else
+      outs() << format("%3d %-13s %08" PRIx64 " %016" PRIx64 " %s\n",
+                       (unsigned)Section.getIndex(), Name.str().c_str(), Size,
+                       VMA, Type.c_str());
   }
   outs() << "\n";
 }
 
-void llvm::printSectionContents(const ObjectFile *Obj) {
-  std::error_code EC;
+void printSectionContents(const ObjectFile *Obj) {
   for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
     StringRef Name;
-    StringRef Contents;
     error(Section.getName(Name));
     uint64_t BaseAddr = Section.getAddress();
     uint64_t Size = Section.getSize();
@@ -1965,7 +1701,7 @@ void llvm::printSectionContents(const ObjectFile *Obj) {
       continue;
     }
 
-    error(Section.getContents(Contents));
+    StringRef Contents = unwrapOrError(Section.getContents(), Obj->getFileName());
 
     // Dump out the content as hex and printable ascii characters.
     for (std::size_t Addr = 0, End = Contents.size(); Addr < End; Addr += 16) {
@@ -1993,8 +1729,8 @@ void llvm::printSectionContents(const ObjectFile *Obj) {
   }
 }
 
-void llvm::printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
-                            StringRef ArchitectureName) {
+void printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
+                      StringRef ArchitectureName) {
   outs() << "SYMBOL TABLE:\n";
 
   if (const COFFObjectFile *Coff = dyn_cast<const COFFObjectFile>(O)) {
@@ -2002,41 +1738,24 @@ void llvm::printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
     return;
   }
 
+  const StringRef FileName = O->getFileName();
   for (auto I = O->symbol_begin(), E = O->symbol_end(); I != E; ++I) {
-    // Skip printing the special zero symbol when dumping an ELF file.
-    // This makes the output consistent with the GNU objdump.
-    if (I == O->symbol_begin() && isa<ELFObjectFileBase>(O))
-      continue;
-
     const SymbolRef &Symbol = *I;
-    Expected<uint64_t> AddressOrError = Symbol.getAddress();
-    if (!AddressOrError)
-      report_error(ArchiveName, O->getFileName(), AddressOrError.takeError(),
-                   ArchitectureName);
-    uint64_t Address = *AddressOrError;
+    uint64_t Address = unwrapOrError(Symbol.getAddress(), ArchiveName, FileName,
+                                     ArchitectureName);
     if ((Address < StartAddress) || (Address > StopAddress))
       continue;
-    Expected<SymbolRef::Type> TypeOrError = Symbol.getType();
-    if (!TypeOrError)
-      report_error(ArchiveName, O->getFileName(), TypeOrError.takeError(),
-                   ArchitectureName);
-    SymbolRef::Type Type = *TypeOrError;
+    SymbolRef::Type Type = unwrapOrError(Symbol.getType(), ArchiveName,
+                                         FileName, ArchitectureName);
     uint32_t Flags = Symbol.getFlags();
-    Expected<section_iterator> SectionOrErr = Symbol.getSection();
-    if (!SectionOrErr)
-      report_error(ArchiveName, O->getFileName(), SectionOrErr.takeError(),
-                   ArchitectureName);
-    section_iterator Section = *SectionOrErr;
+    section_iterator Section = unwrapOrError(Symbol.getSection(), ArchiveName,
+                                             FileName, ArchitectureName);
     StringRef Name;
-    if (Type == SymbolRef::ST_Debug && Section != O->section_end()) {
+    if (Type == SymbolRef::ST_Debug && Section != O->section_end())
       Section->getName(Name);
-    } else {
-      Expected<StringRef> NameOrErr = Symbol.getName();
-      if (!NameOrErr)
-        report_error(ArchiveName, O->getFileName(), NameOrErr.takeError(),
-                     ArchitectureName);
-      Name = *NameOrErr;
-    }
+    else
+      Name = unwrapOrError(Symbol.getName(), ArchiveName, FileName,
+                           ArchitectureName);
 
     bool Global = Flags & SymbolRef::SF_Global;
     bool Weak = Flags & SymbolRef::SF_Weak;
@@ -2087,20 +1806,38 @@ void llvm::printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
       outs() << SectionName;
     }
 
-    outs() << '\t';
     if (Common || isa<ELFObjectFileBase>(O)) {
       uint64_t Val =
           Common ? Symbol.getAlignment() : ELFSymbolRef(Symbol).getSize();
-      outs() << format("\t %08" PRIx64 " ", Val);
+      outs() << format("\t%08" PRIx64, Val);
     }
 
-    if (Hidden)
-      outs() << ".hidden ";
+    if (isa<ELFObjectFileBase>(O)) {
+      uint8_t Other = ELFSymbolRef(Symbol).getOther();
+      switch (Other) {
+      case ELF::STV_DEFAULT:
+        break;
+      case ELF::STV_INTERNAL:
+        outs() << " .internal";
+        break;
+      case ELF::STV_HIDDEN:
+        outs() << " .hidden";
+        break;
+      case ELF::STV_PROTECTED:
+        outs() << " .protected";
+        break;
+      default:
+        outs() << format(" 0x%02x", Other);
+        break;
+      }
+    } else if (Hidden) {
+      outs() << " .hidden";
+    }
 
     if (Demangle)
-      outs() << demangle(Name) << '\n';
+      outs() << ' ' << demangle(Name) << '\n';
     else
-      outs() << Name << '\n';
+      outs() << ' ' << Name << '\n';
   }
 }
 
@@ -2118,59 +1855,9 @@ static void printUnwindInfo(const ObjectFile *O) {
            "for COFF and MachO object files.\n";
 }
 
-void llvm::printExportsTrie(const ObjectFile *o) {
-  outs() << "Exports trie:\n";
-  if (const MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
-    printMachOExportsTrie(MachO);
-  else
-    WithColor::error(errs(), ToolName)
-        << "This operation is only currently supported "
-           "for Mach-O executable files.\n";
-}
-
-void llvm::printRebaseTable(ObjectFile *o) {
-  outs() << "Rebase table:\n";
-  if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
-    printMachORebaseTable(MachO);
-  else
-    WithColor::error(errs(), ToolName)
-        << "This operation is only currently supported "
-           "for Mach-O executable files.\n";
-}
-
-void llvm::printBindTable(ObjectFile *o) {
-  outs() << "Bind table:\n";
-  if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
-    printMachOBindTable(MachO);
-  else
-    WithColor::error(errs(), ToolName)
-        << "This operation is only currently supported "
-           "for Mach-O executable files.\n";
-}
-
-void llvm::printLazyBindTable(ObjectFile *o) {
-  outs() << "Lazy bind table:\n";
-  if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
-    printMachOLazyBindTable(MachO);
-  else
-    WithColor::error(errs(), ToolName)
-        << "This operation is only currently supported "
-           "for Mach-O executable files.\n";
-}
-
-void llvm::printWeakBindTable(ObjectFile *o) {
-  outs() << "Weak bind table:\n";
-  if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
-    printMachOWeakBindTable(MachO);
-  else
-    WithColor::error(errs(), ToolName)
-        << "This operation is only currently supported "
-           "for Mach-O executable files.\n";
-}
-
 /// Dump the raw contents of the __clangast section so the output can be piped
 /// into llvm-bcanalyzer.
-void llvm::printRawClangAST(const ObjectFile *Obj) {
+void printRawClangAST(const ObjectFile *Obj) {
   if (outs().is_displayed()) {
     WithColor::error(errs(), ToolName)
         << "The -raw-clang-ast option will dump the raw binary contents of "
@@ -2197,8 +1884,8 @@ void llvm::printRawClangAST(const ObjectFile *Obj) {
   if (!ClangASTSection)
     return;
 
-  StringRef ClangASTContents;
-  error(ClangASTSection.getValue().getContents(ClangASTContents));
+  StringRef ClangASTContents = unwrapOrError(
+      ClangASTSection.getValue().getContents(), Obj->getFileName());
   outs().write(ClangASTContents.data(), ClangASTContents.size());
 }
 
@@ -2234,9 +1921,8 @@ static void printFaultMaps(const ObjectFile *Obj) {
     return;
   }
 
-  StringRef FaultMapContents;
-  error(FaultMapSection.getValue().getContents(FaultMapContents));
-
+  StringRef FaultMapContents =
+      unwrapOrError(FaultMapSection.getValue().getContents(), Obj->getFileName());
   FaultMapParser FMP(FaultMapContents.bytes_begin(),
                      FaultMapContents.bytes_end());
 
@@ -2246,7 +1932,9 @@ static void printFaultMaps(const ObjectFile *Obj) {
 static void printPrivateFileHeaders(const ObjectFile *O, bool OnlyFirst) {
   if (O->isELF()) {
     printELFFileHeader(O);
-    return printELFDynamicSection(O);
+    printELFDynamicSection(O);
+    printELFSymbolVersionInfo(O);
+    return;
   }
   if (O->isCOFF())
     return printCOFFFileHeader(O);
@@ -2267,12 +1955,9 @@ static void printFileHeaders(const ObjectFile *O) {
 
   Triple::ArchType AT = O->getArch();
   outs() << "architecture: " << Triple::getArchTypeName(AT) << "\n";
-  Expected<uint64_t> StartAddrOrErr = O->getStartAddress();
-  if (!StartAddrOrErr)
-    report_error(O->getFileName(), StartAddrOrErr.takeError());
+  uint64_t Address = unwrapOrError(O->getStartAddress(), O->getFileName());
 
   StringRef Fmt = O->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
-  uint64_t Address = StartAddrOrErr.get();
   outs() << "start address: "
          << "0x" << format(Fmt.data(), Address) << "\n\n";
 }
@@ -2297,22 +1982,9 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C) {
 
   outs() << " ";
 
-  Expected<unsigned> UIDOrErr = C.getUID();
-  if (!UIDOrErr)
-    report_error(Filename, UIDOrErr.takeError());
-  unsigned UID = UIDOrErr.get();
-  outs() << format("%d/", UID);
-
-  Expected<unsigned> GIDOrErr = C.getGID();
-  if (!GIDOrErr)
-    report_error(Filename, GIDOrErr.takeError());
-  unsigned GID = GIDOrErr.get();
-  outs() << format("%-d ", GID);
-
-  Expected<uint64_t> Size = C.getRawSize();
-  if (!Size)
-    report_error(Filename, Size.takeError());
-  outs() << format("%6" PRId64, Size.get()) << " ";
+  outs() << format("%d/%d %6" PRId64 " ", unwrapOrError(C.getUID(), Filename),
+                   unwrapOrError(C.getGID(), Filename),
+                   unwrapOrError(C.getRawSize(), Filename));
 
   StringRef RawLastModified = C.getRawLastModified();
   unsigned Seconds;
@@ -2331,10 +2003,7 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C) {
   Expected<StringRef> NameOrErr = C.getName();
   if (!NameOrErr) {
     consumeError(NameOrErr.takeError());
-    Expected<StringRef> RawNameOrErr = C.getRawName();
-    if (!RawNameOrErr)
-      report_error(Filename, NameOrErr.takeError());
-    Name = RawNameOrErr.get();
+    Name = unwrapOrError(C.getRawName(), Filename);
   } else {
     Name = NameOrErr.get();
   }
@@ -2386,7 +2055,7 @@ static void dumpObject(ObjectFile *O, const Archive *A = nullptr,
     printWeakBindTable(O);
   if (RawClangAST)
     printRawClangAST(O);
-  if (PrintFaultMaps)
+  if (FaultMapSection)
     printFaultMaps(O);
   if (DwarfDumpType != DIDT_Null) {
     std::unique_ptr<DIContext> DICtx = DWARFContext::create(*O);
@@ -2421,7 +2090,7 @@ static void dumpArchive(const Archive *A) {
     Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
     if (!ChildOrErr) {
       if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
-        report_error(A->getFileName(), C, std::move(E));
+        report_error(std::move(E), A->getFileName(), C);
       continue;
     }
     if (ObjectFile *O = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
@@ -2429,10 +2098,11 @@ static void dumpArchive(const Archive *A) {
     else if (COFFImportFile *I = dyn_cast<COFFImportFile>(&*ChildOrErr.get()))
       dumpObject(I, A, &C);
     else
-      report_error(A->getFileName(), object_error::invalid_file_type);
+      report_error(errorCodeToError(object_error::invalid_file_type),
+                   A->getFileName());
   }
   if (Err)
-    report_error(A->getFileName(), std::move(Err));
+    report_error(std::move(Err), A->getFileName());
 }
 
 /// Open file and figure out how to dump it.
@@ -2446,10 +2116,8 @@ static void dumpInput(StringRef file) {
   }
 
   // Attempt to open the binary.
-  Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(file);
-  if (!BinaryOrErr)
-    report_error(file, BinaryOrErr.takeError());
-  Binary &Binary = *BinaryOrErr.get().getBinary();
+  OwningBinary<Binary> OBinary = unwrapOrError(createBinary(file), file);
+  Binary &Binary = *OBinary.getBinary();
 
   if (Archive *A = dyn_cast<Archive>(&Binary))
     dumpArchive(A);
@@ -2458,22 +2126,29 @@ static void dumpInput(StringRef file) {
   else if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Binary))
     parseInputMachO(UB);
   else
-    report_error(file, object_error::invalid_file_type);
+    report_error(errorCodeToError(object_error::invalid_file_type), file);
 }
+} // namespace llvm
 
 int main(int argc, char **argv) {
+  using namespace llvm;
   InitLLVM X(argc, argv);
+  const cl::OptionCategory *OptionFilters[] = {&ObjdumpCat, &MachOCat};
+  cl::HideUnrelatedOptions(OptionFilters);
 
   // Initialize targets and assembly printers/parsers.
-  llvm::InitializeAllTargetInfos();
-  llvm::InitializeAllTargetMCs();
-  llvm::InitializeAllDisassemblers();
+  InitializeAllTargetInfos();
+  InitializeAllTargetMCs();
+  InitializeAllDisassemblers();
 
   // Register the target printer for --version.
   cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
 
   cl::ParseCommandLineOptions(argc, argv, "llvm object file dumper\n");
 
+  if (StartAddress >= StopAddress)
+    error("start address should be less than stop address");
+
   ToolName = argv[0];
 
   // Defaults to a.out if no filenames specified.
@@ -2481,40 +2156,22 @@ int main(int argc, char **argv) {
     InputFilenames.push_back("a.out");
 
   if (AllHeaders)
-    FileHeaders = PrivateHeaders = Relocations = SectionHeaders = SymbolTable =
-        true;
+    ArchiveHeaders = FileHeaders = PrivateHeaders = Relocations =
+        SectionHeaders = SymbolTable = true;
 
-  if (DisassembleAll || PrintSource || PrintLines)
+  if (DisassembleAll || PrintSource || PrintLines ||
+      (!DisassembleFunctions.empty()))
     Disassemble = true;
 
-  if (!Disassemble
-      && !Relocations
-      && !DynamicRelocations
-      && !SectionHeaders
-      && !SectionContents
-      && !SymbolTable
-      && !UnwindInfo
-      && !PrivateHeaders
-      && !FileHeaders
-      && !FirstPrivateHeader
-      && !ExportsTrie
-      && !Rebase
-      && !Bind
-      && !LazyBind
-      && !WeakBind
-      && !RawClangAST
-      && !(UniversalHeaders && MachOOpt)
-      && !ArchiveHeaders
-      && !(IndirectSymbols && MachOOpt)
-      && !(DataInCode && MachOOpt)
-      && !(LinkOptHints && MachOOpt)
-      && !(InfoPlist && MachOOpt)
-      && !(DylibsUsed && MachOOpt)
-      && !(DylibId && MachOOpt)
-      && !(ObjcMetaData && MachOOpt)
-      && !(!FilterSections.empty() && MachOOpt)
-      && !PrintFaultMaps
-      && DwarfDumpType == DIDT_Null) {
+  if (!ArchiveHeaders && !Disassemble && DwarfDumpType == DIDT_Null &&
+      !DynamicRelocations && !FileHeaders && !PrivateHeaders && !RawClangAST &&
+      !Relocations && !SectionHeaders && !SectionContents && !SymbolTable &&
+      !UnwindInfo && !FaultMapSection &&
+      !(MachOOpt &&
+        (Bind || DataInCode || DylibId || DylibsUsed || ExportsTrie ||
+         FirstPrivateHeader || IndirectSymbols || InfoPlist || LazyBind ||
+         LinkOptHints || ObjcMetaData || Rebase || UniversalHeaders ||
+         WeakBind || !FilterSections.empty()))) {
     cl::PrintHelpMessage();
     return 2;
   }
@@ -2524,5 +2181,7 @@ int main(int argc, char **argv) {
 
   llvm::for_each(InputFilenames, dumpInput);
 
+  warnOnNoMatchForSections();
+
   return EXIT_SUCCESS;
 }
diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h
index fe2cb05fe227..e58d4a05c2e6 100644
--- a/tools/llvm-objdump/llvm-objdump.h
+++ b/tools/llvm-objdump/llvm-objdump.h
@@ -1,8 +1,7 @@
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -19,57 +18,86 @@ namespace llvm {
 class StringRef;
 
 namespace object {
-  class COFFObjectFile;
-  class COFFImportFile;
-  class MachOObjectFile;
-  class MachOUniversalBinary;
-  class ObjectFile;
-  class Archive;
-  class RelocationRef;
+class COFFObjectFile;
+class COFFImportFile;
+class ELFObjectFileBase;
+class ELFSectionRef;
+class MachOObjectFile;
+class MachOUniversalBinary;
+class RelocationRef;
 }
 
-extern cl::opt<std::string> TripleName;
-extern cl::opt<std::string> ArchName;
-extern cl::opt<std::string> MCPU;
-extern cl::list<std::string> MAttrs;
-extern cl::list<std::string> FilterSections;
-extern cl::opt<bool> AllHeaders;
 extern cl::opt<bool> Demangle;
-extern cl::opt<bool> Disassemble;
-extern cl::opt<bool> DisassembleAll;
-extern cl::opt<bool> NoShowRawInsn;
-extern cl::opt<bool> NoLeadingAddr;
-extern cl::opt<bool> PrivateHeaders;
-extern cl::opt<bool> FileHeaders;
-extern cl::opt<bool> FirstPrivateHeader;
-extern cl::opt<bool> ExportsTrie;
-extern cl::opt<bool> Rebase;
-extern cl::opt<bool> Bind;
-extern cl::opt<bool> LazyBind;
-extern cl::opt<bool> WeakBind;
-extern cl::opt<bool> RawClangAST;
-extern cl::opt<bool> UniversalHeaders;
-extern cl::opt<bool> ArchiveHeaders;
-extern cl::opt<bool> IndirectSymbols;
-extern cl::opt<bool> DataInCode;
-extern cl::opt<bool> LinkOptHints;
-extern cl::opt<bool> InfoPlist;
-extern cl::opt<bool> DylibsUsed;
-extern cl::opt<bool> DylibId;
-extern cl::opt<bool> ObjcMetaData;
-extern cl::opt<std::string> DisSymName;
-extern cl::opt<bool> NonVerbose;
-extern cl::opt<bool> Relocations;
-extern cl::opt<bool> DynamicRelocations;
-extern cl::opt<bool> SectionHeaders;
-extern cl::opt<bool> SectionContents;
-extern cl::opt<bool> SymbolTable;
-extern cl::opt<bool> UnwindInfo;
-extern cl::opt<bool> PrintImmHex;
-extern cl::opt<DIDumpType> DwarfDumpType;
+
+typedef std::function<bool(llvm::object::SectionRef const &)> FilterPredicate;
+
+class SectionFilterIterator {
+public:
+  SectionFilterIterator(FilterPredicate P,
+                        llvm::object::section_iterator const &I,
+                        llvm::object::section_iterator const &E)
+      : Predicate(std::move(P)), Iterator(I), End(E) {
+    ScanPredicate();
+  }
+  const llvm::object::SectionRef &operator*() const { return *Iterator; }
+  SectionFilterIterator &operator++() {
+    ++Iterator;
+    ScanPredicate();
+    return *this;
+  }
+  bool operator!=(SectionFilterIterator const &Other) const {
+    return Iterator != Other.Iterator;
+  }
+
+private:
+  void ScanPredicate() {
+    while (Iterator != End && !Predicate(*Iterator)) {
+      ++Iterator;
+    }
+  }
+  FilterPredicate Predicate;
+  llvm::object::section_iterator Iterator;
+  llvm::object::section_iterator End;
+};
+
+class SectionFilter {
+public:
+  SectionFilter(FilterPredicate P, llvm::object::ObjectFile const &O)
+      : Predicate(std::move(P)), Object(O) {}
+  SectionFilterIterator begin() {
+    return SectionFilterIterator(Predicate, Object.section_begin(),
+                                 Object.section_end());
+  }
+  SectionFilterIterator end() {
+    return SectionFilterIterator(Predicate, Object.section_end(),
+                                 Object.section_end());
+  }
+
+private:
+  FilterPredicate Predicate;
+  llvm::object::ObjectFile const &Object;
+};
 
 // Various helper functions.
+SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O);
+
+Error getELFRelocationValueString(const object::ELFObjectFileBase *Obj,
+                                  const object::RelocationRef &Rel,
+                                  llvm::SmallVectorImpl<char> &Result);
+Error getCOFFRelocationValueString(const object::COFFObjectFile *Obj,
+                                   const object::RelocationRef &Rel,
+                                   llvm::SmallVectorImpl<char> &Result);
+Error getWasmRelocationValueString(const object::WasmObjectFile *Obj,
+                                   const object::RelocationRef &RelRef,
+                                   llvm::SmallVectorImpl<char> &Result);
+Error getMachORelocationValueString(const object::MachOObjectFile *Obj,
+                                    const object::RelocationRef &RelRef,
+                                    llvm::SmallVectorImpl<char> &Result);
+
+uint64_t getELFSectionLMA(const object::ELFSectionRef& Sec);
+
 void error(std::error_code ec);
+void error(Error E);
 bool isRelocAddressLess(object::RelocationRef A, object::RelocationRef B);
 void parseInputMachO(StringRef Filename);
 void parseInputMachO(object::MachOUniversalBinary *UB);
@@ -82,6 +110,7 @@ void printMachOLazyBindTable(object::MachOObjectFile *O);
 void printMachOWeakBindTable(object::MachOObjectFile *O);
 void printELFFileHeader(const object::ObjectFile *O);
 void printELFDynamicSection(const object::ObjectFile *Obj);
+void printELFSymbolVersionInfo(const object::ObjectFile *Obj);
 void printCOFFFileHeader(const object::ObjectFile *O);
 void printCOFFSymbolTable(const object::COFFImportFile *I);
 void printCOFFSymbolTable(const object::COFFObjectFile *O);
@@ -103,18 +132,20 @@ void printSymbolTable(const object::ObjectFile *O, StringRef ArchiveName,
 void warn(StringRef Message);
 LLVM_ATTRIBUTE_NORETURN void error(Twine Message);
 LLVM_ATTRIBUTE_NORETURN void report_error(StringRef File, Twine Message);
-LLVM_ATTRIBUTE_NORETURN void report_error(StringRef File, std::error_code EC);
-LLVM_ATTRIBUTE_NORETURN void report_error(StringRef File, llvm::Error E);
-LLVM_ATTRIBUTE_NORETURN void report_error(StringRef FileName,
-                                          StringRef ArchiveName,
-                                          llvm::Error E,
-                                          StringRef ArchitectureName
-                                                    = StringRef());
-LLVM_ATTRIBUTE_NORETURN void report_error(StringRef ArchiveName,
-                                          const object::Archive::Child &C,
-                                          llvm::Error E,
-                                          StringRef ArchitectureName
-                                                    = StringRef());
+LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef File);
+LLVM_ATTRIBUTE_NORETURN void
+report_error(Error E, StringRef FileName, StringRef ArchiveName,
+             StringRef ArchitectureName = StringRef());
+LLVM_ATTRIBUTE_NORETURN void
+report_error(Error E, StringRef ArchiveName, const object::Archive::Child &C,
+             StringRef ArchitectureName = StringRef());
+
+template <typename T, typename... Ts>
+T unwrapOrError(Expected<T> EO, Ts &&... Args) {
+  if (EO)
+    return std::move(*EO);
+  report_error(EO.takeError(), std::forward<Ts>(Args)...);
+}
 
 } // end namespace llvm
 
diff --git a/tools/llvm-pdbutil/BytesOutputStyle.cpp b/tools/llvm-pdbutil/BytesOutputStyle.cpp
index 2b96c8f986aa..162d12c120b4 100644
--- a/tools/llvm-pdbutil/BytesOutputStyle.cpp
+++ b/tools/llvm-pdbutil/BytesOutputStyle.cpp
@@ -1,9 +1,8 @@
 //===- BytesOutputStyle.cpp ----------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -341,9 +340,7 @@ static void iterateOneModule(PDBFile &File, LinePrinter &P,
   if (ModiStream == kInvalidStreamIndex)
     return;
 
-  auto ModStreamData = MappedBlockStream::createIndexedStream(
-      File.getMsfLayout(), File.getMsfBuffer(), ModiStream,
-      File.getAllocator());
+  auto ModStreamData = File.createIndexedStream(ModiStream);
   ModuleDebugStreamRef ModStream(Modi, std::move(ModStreamData));
   if (auto EC = ModStream.reload()) {
     P.formatLine("Could not parse debug information.");
diff --git a/tools/llvm-pdbutil/BytesOutputStyle.h b/tools/llvm-pdbutil/BytesOutputStyle.h
index aa5342998e56..d3aceb47679e 100644
--- a/tools/llvm-pdbutil/BytesOutputStyle.h
+++ b/tools/llvm-pdbutil/BytesOutputStyle.h
@@ -1,9 +1,8 @@
 //===- BytesOutputStyle.h ------------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/DumpOutputStyle.cpp b/tools/llvm-pdbutil/DumpOutputStyle.cpp
index e4f6aa7f6ec5..962d4cf88a8a 100644
--- a/tools/llvm-pdbutil/DumpOutputStyle.cpp
+++ b/tools/llvm-pdbutil/DumpOutputStyle.cpp
@@ -1,9 +1,8 @@
 //===- DumpOutputStyle.cpp ------------------------------------ *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -14,6 +13,7 @@
 #include "MinimalSymbolDumper.h"
 #include "MinimalTypeDumper.h"
 #include "StreamUtil.h"
+#include "TypeReferenceTracker.h"
 #include "llvm-pdbutil.h"
 
 #include "llvm/ADT/STLExtras.h"
@@ -61,7 +61,12 @@ using namespace llvm::msf;
 using namespace llvm::pdb;
 
 DumpOutputStyle::DumpOutputStyle(InputFile &File)
-    : File(File), P(2, false, outs()) {}
+    : File(File), P(2, false, outs()) {
+  if (opts::dump::DumpTypeRefStats)
+    RefTracker.reset(new TypeReferenceTracker(File));
+}
+
+DumpOutputStyle::~DumpOutputStyle() {}
 
 PDBFile &DumpOutputStyle::getPdb() { return File.pdb(); }
 object::COFFObjectFile &DumpOutputStyle::getObj() { return File.obj(); }
@@ -77,6 +82,10 @@ void DumpOutputStyle::printStreamNotPresent(StringRef StreamName) {
 }
 
 Error DumpOutputStyle::dump() {
+  // Walk symbols & globals if we are supposed to mark types referenced.
+  if (opts::dump::DumpTypeRefStats)
+    RefTracker->mark();
+
   if (opts::dump::DumpSummary) {
     if (auto EC = dumpFileSummary())
       return EC;
@@ -101,6 +110,12 @@ Error DumpOutputStyle::dump() {
     P.NewLine();
   }
 
+  if (opts::dump::DumpTypeStats) {
+    if (auto EC = dumpTypeStats())
+      return EC;
+    P.NewLine();
+  }
+
   if (opts::dump::DumpNamedStreams) {
     if (auto EC = dumpNamedStreams())
       return EC;
@@ -188,6 +203,11 @@ Error DumpOutputStyle::dump() {
       return EC;
   }
 
+  if (opts::dump::DumpTypeRefStats) {
+    if (auto EC = dumpTypeRefStats())
+      return EC;
+  }
+
   if (opts::dump::DumpSectionHeaders) {
     if (auto EC = dumpSectionHeaders())
       return EC;
@@ -203,6 +223,8 @@ Error DumpOutputStyle::dump() {
       return EC;
   }
 
+  P.NewLine();
+
   return Error::success();
 }
 
@@ -293,18 +315,30 @@ static inline std::string formatModuleDetailKind(SymbolKind K) {
   return formatSymbolKind(K);
 }
 
+// Get the stats sorted by size, descending.
+std::vector<StatCollection::KindAndStat>
+StatCollection::getStatsSortedBySize() const {
+  std::vector<KindAndStat> SortedStats(Individual.begin(), Individual.end());
+  llvm::stable_sort(SortedStats,
+                    [](const KindAndStat &LHS, const KindAndStat &RHS) {
+                      return LHS.second.Size > RHS.second.Size;
+                    });
+  return SortedStats;
+}
+
 template <typename Kind>
 static void printModuleDetailStats(LinePrinter &P, StringRef Label,
                                    const StatCollection &Stats) {
   P.NewLine();
   P.formatLine("  {0}", Label);
   AutoIndent Indent(P);
-  P.formatLine("{0,40}: {1,7} entries ({2,8} bytes)", "Total",
+  P.formatLine("{0,40}: {1,7} entries ({2,12:N} bytes)", "Total",
                Stats.Totals.Count, Stats.Totals.Size);
   P.formatLine("{0}", fmt_repeat('-', 74));
-  for (const auto &K : Stats.Individual) {
+
+  for (const auto &K : Stats.getStatsSortedBySize()) {
     std::string KindName = formatModuleDetailKind(Kind(K.first));
-    P.formatLine("{0,40}: {1,7} entries ({2,8} bytes)", KindName,
+    P.formatLine("{0,40}: {1,7} entries ({2,12:N} bytes)", KindName,
                  K.second.Count, K.second.Size);
   }
 }
@@ -662,6 +696,35 @@ Error DumpOutputStyle::dumpSymbolStats() {
   return Error::success();
 }
 
+Error DumpOutputStyle::dumpTypeStats() {
+  printHeader(P, "Type Record Stats");
+
+  // Iterate the types, categorize by kind, accumulate size stats.
+  StatCollection TypeStats;
+  LazyRandomTypeCollection &Types = File.types();
+  for (Optional<TypeIndex> TI = Types.getFirst(); TI; TI = Types.getNext(*TI)) {
+    CVType Type = Types.getType(*TI);
+    TypeStats.update(uint32_t(Type.kind()), Type.length());
+  }
+
+  P.NewLine();
+  P.formatLine("  Types");
+  AutoIndent Indent(P);
+  P.formatLine("{0,14}: {1,7} entries ({2,12:N} bytes, {3,7} avg)", "Total",
+               TypeStats.Totals.Count, TypeStats.Totals.Size,
+               (double)TypeStats.Totals.Size / TypeStats.Totals.Count);
+  P.formatLine("{0}", fmt_repeat('-', 74));
+
+  for (const auto &K : TypeStats.getStatsSortedBySize()) {
+    P.formatLine("{0,14}: {1,7} entries ({2,12:N} bytes, {3,7} avg)",
+                 formatTypeLeafKind(TypeLeafKind(K.first)), K.second.Count,
+                 K.second.Size, (double)K.second.Size / K.second.Count);
+  }
+
+
+  return Error::success();
+}
+
 static bool isValidNamespaceIdentifier(StringRef S) {
   if (S.empty())
     return false;
@@ -806,7 +869,7 @@ Error DumpOutputStyle::dumpUdtStats() {
                fmt_align(SizeHeader, AlignStyle::Right, SD));
 
   P.formatLine("{0}", fmt_repeat('-', TableWidth));
-  for (const auto &Stat : UdtTargetStats.Individual) {
+  for (const auto &Stat : UdtTargetStats.getStatsSortedBySize()) {
     StringRef Label = getUdtStatLabel(Stat.first);
     P.formatLine("{0} | {1:N}  {2:N}",
                  fmt_align(Label, AlignStyle::Right, FieldWidth),
@@ -819,12 +882,25 @@ Error DumpOutputStyle::dumpUdtStats() {
                fmt_align(UdtStats.Totals.Count, AlignStyle::Right, CD),
                fmt_align(UdtStats.Totals.Size, AlignStyle::Right, SD));
   P.formatLine("{0}", fmt_repeat('-', TableWidth));
-  for (const auto &Stat : NamespacedStats) {
-    std::string Label = formatv("namespace '{0}'", Stat.getKey());
+  struct StrAndStat {
+    StringRef Key;
+    StatCollection::Stat Stat;
+  };
+
+  // Print namespace stats in descending order of size.
+  std::vector<StrAndStat> NamespacedStatsSorted;
+  for (const auto &Stat : NamespacedStats)
+    NamespacedStatsSorted.push_back({Stat.getKey(), Stat.second});
+  llvm::stable_sort(NamespacedStatsSorted,
+                    [](const StrAndStat &L, const StrAndStat &R) {
+                      return L.Stat.Size > R.Stat.Size;
+                    });
+  for (const auto &Stat : NamespacedStatsSorted) {
+    std::string Label = formatv("namespace '{0}'", Stat.Key);
     P.formatLine("{0} | {1:N}  {2:N}",
                  fmt_align(Label, AlignStyle::Right, FieldWidth),
-                 fmt_align(Stat.second.Count, AlignStyle::Right, CD),
-                 fmt_align(Stat.second.Size, AlignStyle::Right, SD));
+                 fmt_align(Stat.Stat.Count, AlignStyle::Right, CD),
+                 fmt_align(Stat.Stat.Size, AlignStyle::Right, SD));
   }
   return Error::success();
 }
@@ -921,6 +997,10 @@ Error DumpOutputStyle::dumpInlineeLines() {
           P.formatLine("{0,+8} | {1,+5} | ", Entry.Header->Inlinee,
                        fmtle(Entry.Header->SourceLineNum));
           Strings.formatFromChecksumsOffset(P, Entry.Header->FileID, true);
+          for (const auto &ExtraFileID : Entry.ExtraFiles) {
+            P.formatLine("                   ");
+            Strings.formatFromChecksumsOffset(P, ExtraFileID, true);
+          }
         }
         P.NewLine();
       });
@@ -1011,17 +1091,12 @@ Error DumpOutputStyle::dumpOldFpo(PDBFile &File) {
   ExitOnError Err("Error dumping old fpo data:");
   auto &Dbi = Err(File.getPDBDbiStream());
 
-  uint32_t Index = Dbi.getDebugStreamIndex(DbgHeaderType::FPO);
-  if (Index == kInvalidStreamIndex) {
+  if (!Dbi.hasOldFpoRecords()) {
     printStreamNotPresent("FPO");
     return Error::success();
   }
 
-  std::unique_ptr<MappedBlockStream> OldFpo = File.createIndexedStream(Index);
-  BinaryStreamReader Reader(*OldFpo);
-  FixedStreamArray<object::FpoData> Records;
-  Err(Reader.readArray(Records,
-                       Reader.bytesRemaining() / sizeof(object::FpoData)));
+  const FixedStreamArray<object::FpoData>& Records = Dbi.getOldFpoRecords();
 
   P.printLine("  RVA    | Code | Locals | Params | Prolog | Saved Regs | Use "
               "BP | Has SEH | Frame Type");
@@ -1043,18 +1118,12 @@ Error DumpOutputStyle::dumpNewFpo(PDBFile &File) {
   ExitOnError Err("Error dumping new fpo data:");
   auto &Dbi = Err(File.getPDBDbiStream());
 
-  uint32_t Index = Dbi.getDebugStreamIndex(DbgHeaderType::NewFPO);
-  if (Index == kInvalidStreamIndex) {
+  if (!Dbi.hasNewFpoRecords()) {
     printStreamNotPresent("New FPO");
     return Error::success();
   }
 
-  std::unique_ptr<MappedBlockStream> NewFpo = File.createIndexedStream(Index);
-
-  DebugFrameDataSubsectionRef FDS;
-  if (auto EC = FDS.initialize(*NewFpo))
-    return make_error<RawError>(raw_error_code::corrupt_file,
-                                "Invalid new fpo stream");
+  const DebugFrameDataSubsectionRef& FDS = Dbi.getNewFpoRecords();
 
   P.printLine("  RVA    | Code | Locals | Params | Stack | Prolog | Saved Regs "
               "| Has SEH | Has C++EH | Start | Program");
@@ -1239,14 +1308,15 @@ static void buildDepSet(LazyRandomTypeCollection &Types,
 
 static void
 dumpFullTypeStream(LinePrinter &Printer, LazyRandomTypeCollection &Types,
-                   uint32_t NumTypeRecords, uint32_t NumHashBuckets,
+                   TypeReferenceTracker *RefTracker, uint32_t NumTypeRecords,
+                   uint32_t NumHashBuckets,
                    FixedStreamArray<support::ulittle32_t> HashValues,
                    TpiStream *Stream, bool Bytes, bool Extras) {
 
   Printer.formatLine("Showing {0:N} records", NumTypeRecords);
   uint32_t Width = NumDigits(TypeIndex::FirstNonSimpleIndex + NumTypeRecords);
 
-  MinimalTypeDumpVisitor V(Printer, Width + 2, Bytes, Extras, Types,
+  MinimalTypeDumpVisitor V(Printer, Width + 2, Bytes, Extras, Types, RefTracker,
                            NumHashBuckets, HashValues, Stream);
 
   if (auto EC = codeview::visitTypeStream(Types, V)) {
@@ -1257,12 +1327,13 @@ dumpFullTypeStream(LinePrinter &Printer, LazyRandomTypeCollection &Types,
 
 static void dumpPartialTypeStream(LinePrinter &Printer,
                                   LazyRandomTypeCollection &Types,
+                                  TypeReferenceTracker *RefTracker,
                                   TpiStream &Stream, ArrayRef<TypeIndex> TiList,
                                   bool Bytes, bool Extras, bool Deps) {
   uint32_t Width =
       NumDigits(TypeIndex::FirstNonSimpleIndex + Stream.getNumTypeRecords());
 
-  MinimalTypeDumpVisitor V(Printer, Width + 2, Bytes, Extras, Types,
+  MinimalTypeDumpVisitor V(Printer, Width + 2, Bytes, Extras, Types, RefTracker,
                            Stream.getNumHashBuckets(), Stream.getHashValues(),
                            &Stream);
 
@@ -1311,12 +1382,12 @@ Error DumpOutputStyle::dumpTypesFromObjectFile() {
     else
       continue;
 
-    StringRef Contents;
-    if (auto EC = S.getContents(Contents))
-      return errorCodeToError(EC);
+    Expected<StringRef> ContentsOrErr = S.getContents();
+    if (!ContentsOrErr)
+      return ContentsOrErr.takeError();
 
     uint32_t Magic;
-    BinaryStreamReader Reader(Contents, llvm::support::little);
+    BinaryStreamReader Reader(*ContentsOrErr, llvm::support::little);
     if (auto EC = Reader.readInteger(Magic))
       return EC;
     if (Magic != COFF::DEBUG_SECTION_MAGIC)
@@ -1326,8 +1397,8 @@ Error DumpOutputStyle::dumpTypesFromObjectFile() {
     Types.reset(Reader, 100);
 
     if (opts::dump::DumpTypes) {
-      dumpFullTypeStream(P, Types, 0, 0, {}, nullptr, opts::dump::DumpTypeData,
-                         false);
+      dumpFullTypeStream(P, Types, RefTracker.get(), 0, 0, {}, nullptr,
+                         opts::dump::DumpTypeData, false);
     } else if (opts::dump::DumpTypeExtras) {
       auto LocalHashes = LocallyHashedType::hashTypeCollection(Types);
       auto GlobalHashes = GloballyHashedType::hashTypeCollection(Types);
@@ -1396,23 +1467,36 @@ Error DumpOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
 
   auto &Types = (StreamIdx == StreamTPI) ? File.types() : File.ids();
 
+  // Only emit notes about referenced/unreferenced for types.
+  TypeReferenceTracker *MaybeTracker =
+      (StreamIdx == StreamTPI) ? RefTracker.get() : nullptr;
+
   // Enable resolving forward decls.
   Stream.buildHashMap();
 
   if (DumpTypes || !Indices.empty()) {
     if (Indices.empty())
-      dumpFullTypeStream(P, Types, Stream.getNumTypeRecords(),
+      dumpFullTypeStream(P, Types, MaybeTracker, Stream.getNumTypeRecords(),
                          Stream.getNumHashBuckets(), Stream.getHashValues(),
                          &Stream, DumpBytes, DumpExtras);
     else {
       std::vector<TypeIndex> TiList(Indices.begin(), Indices.end());
-      dumpPartialTypeStream(P, Types, Stream, TiList, DumpBytes, DumpExtras,
-                            opts::dump::DumpTypeDependents);
+      dumpPartialTypeStream(P, Types, MaybeTracker, Stream, TiList, DumpBytes,
+                            DumpExtras, opts::dump::DumpTypeDependents);
     }
   }
 
   if (DumpExtras) {
     P.NewLine();
+
+    P.formatLine("Header Version: {0}",
+                 static_cast<uint32_t>(Stream.getTpiVersion()));
+    P.formatLine("Hash Stream Index: {0}", Stream.getTypeHashStreamIndex());
+    P.formatLine("Aux Hash Stream Index: {0}",
+                 Stream.getTypeHashStreamAuxIndex());
+    P.formatLine("Hash Key Size: {0}", Stream.getHashKeySize());
+    P.formatLine("Num Hash Buckets: {0}", Stream.getNumHashBuckets());
+
     auto IndexOffsets = Stream.getTypeIndexOffsets();
     P.formatLine("Type Index Offsets:");
     for (const auto &IO : IndexOffsets) {
@@ -1523,6 +1607,34 @@ Error DumpOutputStyle::dumpModuleSymsForPdb() {
   return Error::success();
 }
 
+Error DumpOutputStyle::dumpTypeRefStats() {
+  printHeader(P, "Type Reference Statistics");
+  AutoIndent Indent(P);
+
+  // Sum the byte size of all type records, and the size and count of all
+  // referenced records.
+  size_t TotalRecs = File.types().size();
+  size_t RefRecs = 0;
+  size_t TotalBytes = 0;
+  size_t RefBytes = 0;
+  auto &Types = File.types();
+  for (Optional<TypeIndex> TI = Types.getFirst(); TI; TI = Types.getNext(*TI)) {
+    CVType Type = File.types().getType(*TI);
+    TotalBytes += Type.length();
+    if (RefTracker->isTypeReferenced(*TI)) {
+      ++RefRecs;
+      RefBytes += Type.length();
+    }
+  }
+
+  P.formatLine("Records referenced: {0:N} / {1:N} {2:P}", RefRecs, TotalRecs,
+               (double)RefRecs / TotalRecs);
+  P.formatLine("Bytes referenced: {0:N} / {1:N} {2:P}", RefBytes, TotalBytes,
+               (double)RefBytes / TotalBytes);
+
+  return Error::success();
+}
+
 Error DumpOutputStyle::dumpGSIRecords() {
   printHeader(P, "GSI Records");
 
diff --git a/tools/llvm-pdbutil/DumpOutputStyle.h b/tools/llvm-pdbutil/DumpOutputStyle.h
index 9b3a85587bde..796cd7a10c36 100644
--- a/tools/llvm-pdbutil/DumpOutputStyle.h
+++ b/tools/llvm-pdbutil/DumpOutputStyle.h
@@ -1,9 +1,8 @@
 //===- DumpOutputStyle.h -------------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -35,6 +34,7 @@ class COFFObjectFile;
 namespace pdb {
 class GSIHashTable;
 class InputFile;
+class TypeReferenceTracker;
 
 struct StatCollection {
   struct Stat {
@@ -49,6 +49,8 @@ struct StatCollection {
     }
   };
 
+  using KindAndStat = std::pair<uint32_t, Stat>;
+
   void update(uint32_t Kind, uint32_t RecordSize) {
     Totals.update(RecordSize);
     auto Iter = Individual.try_emplace(Kind, 1, RecordSize);
@@ -57,12 +59,15 @@ struct StatCollection {
   }
   Stat Totals;
   DenseMap<uint32_t, Stat> Individual;
+
+  std::vector<KindAndStat> getStatsSortedBySize() const;
 };
 
 class DumpOutputStyle : public OutputStyle {
 
 public:
   DumpOutputStyle(InputFile &File);
+  ~DumpOutputStyle() override;
 
   Error dump() override;
 
@@ -77,6 +82,7 @@ private:
   Error dumpStreamSummary();
   Error dumpSymbolStats();
   Error dumpUdtStats();
+  Error dumpTypeStats();
   Error dumpNamedStreams();
   Error dumpStringTable();
   Error dumpStringTableFromPdb();
@@ -90,6 +96,7 @@ private:
   Error dumpNewFpo(PDBFile &File);
   Error dumpTpiStream(uint32_t StreamIdx);
   Error dumpTypesFromObjectFile();
+  Error dumpTypeRefStats();
   Error dumpModules();
   Error dumpModuleFiles();
   Error dumpModuleSymsForPdb();
@@ -105,6 +112,7 @@ private:
   void dumpSectionHeaders(StringRef Label, DbgHeaderType Type);
 
   InputFile &File;
+  std::unique_ptr<TypeReferenceTracker> RefTracker;
   LinePrinter P;
   SmallVector<StreamInfo, 32> StreamPurposes;
 };
diff --git a/tools/llvm-pdbutil/ExplainOutputStyle.cpp b/tools/llvm-pdbutil/ExplainOutputStyle.cpp
index d16bfa480e1d..94faa0463981 100644
--- a/tools/llvm-pdbutil/ExplainOutputStyle.cpp
+++ b/tools/llvm-pdbutil/ExplainOutputStyle.cpp
@@ -1,9 +1,8 @@
 //===- ExplainOutputStyle.cpp --------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/ExplainOutputStyle.h b/tools/llvm-pdbutil/ExplainOutputStyle.h
index 9a497accb812..f405cf615e92 100644
--- a/tools/llvm-pdbutil/ExplainOutputStyle.h
+++ b/tools/llvm-pdbutil/ExplainOutputStyle.h
@@ -1,9 +1,8 @@
 //===- ExplainOutputStyle.h ----------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/FormatUtil.cpp b/tools/llvm-pdbutil/FormatUtil.cpp
index f55d478127d6..1a13f383e53c 100644
--- a/tools/llvm-pdbutil/FormatUtil.cpp
+++ b/tools/llvm-pdbutil/FormatUtil.cpp
@@ -1,9 +1,8 @@
 //===- FormatUtil.cpp ----------------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/FormatUtil.h b/tools/llvm-pdbutil/FormatUtil.h
index 9a003c9285c9..19ce248f9a6f 100644
--- a/tools/llvm-pdbutil/FormatUtil.h
+++ b/tools/llvm-pdbutil/FormatUtil.h
@@ -1,9 +1,8 @@
 //===- FormatUtil.h ------------------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/InputFile.cpp b/tools/llvm-pdbutil/InputFile.cpp
index 8eb116cf0d80..bd23bfdbe31a 100644
--- a/tools/llvm-pdbutil/InputFile.cpp
+++ b/tools/llvm-pdbutil/InputFile.cpp
@@ -1,9 +1,8 @@
 //===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -67,17 +66,20 @@ getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) {
 static inline bool isCodeViewDebugSubsection(object::SectionRef Section,
                                              StringRef Name,
                                              BinaryStreamReader &Reader) {
-  StringRef SectionName, Contents;
+  StringRef SectionName;
   if (Section.getName(SectionName))
     return false;
 
   if (SectionName != Name)
     return false;
 
-  if (Section.getContents(Contents))
+  Expected<StringRef> ContentsOrErr = Section.getContents();
+  if (!ContentsOrErr) {
+    consumeError(ContentsOrErr.takeError());
     return false;
+  }
 
-  Reader = BinaryStreamReader(Contents, support::little);
+  Reader = BinaryStreamReader(*ContentsOrErr, support::little);
   uint32_t Magic;
   if (Reader.bytesRemaining() < sizeof(uint32_t))
     return false;
diff --git a/tools/llvm-pdbutil/InputFile.h b/tools/llvm-pdbutil/InputFile.h
index ee4e651c1e99..f25390c971d0 100644
--- a/tools/llvm-pdbutil/InputFile.h
+++ b/tools/llvm-pdbutil/InputFile.h
@@ -1,9 +1,8 @@
 //===- InputFile.h -------------------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/LinePrinter.cpp b/tools/llvm-pdbutil/LinePrinter.cpp
index e80a1762450b..280c000bd65f 100644
--- a/tools/llvm-pdbutil/LinePrinter.cpp
+++ b/tools/llvm-pdbutil/LinePrinter.cpp
@@ -1,9 +1,8 @@
 //===- LinePrinter.cpp ------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -187,8 +186,7 @@ void LinePrinter::formatMsfStreamData(StringRef Label, PDBFile &File,
     return;
   }
 
-  auto S = MappedBlockStream::createIndexedStream(
-      File.getMsfLayout(), File.getMsfBuffer(), StreamIdx, File.getAllocator());
+  auto S = File.createIndexedStream(StreamIdx);
   if (!S) {
     NewLine();
     formatLine("Stream {0}: Not present", StreamIdx);
diff --git a/tools/llvm-pdbutil/LinePrinter.h b/tools/llvm-pdbutil/LinePrinter.h
index 09bde28f516a..7ecfae17354f 100644
--- a/tools/llvm-pdbutil/LinePrinter.h
+++ b/tools/llvm-pdbutil/LinePrinter.h
@@ -1,9 +1,8 @@
 //===- LinePrinter.h ------------------------------------------ *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -133,8 +132,7 @@ struct AutoIndent {
 
 template <class T>
 inline raw_ostream &operator<<(LinePrinter &Printer, const T &Item) {
-  Printer.getStream() << Item;
-  return Printer.getStream();
+  return Printer.getStream() << Item;
 }
 
 enum class PDB_ColorItem {
diff --git a/tools/llvm-pdbutil/MinimalSymbolDumper.cpp b/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
index 2c7b213b0a9f..e5ae47050678 100644
--- a/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
+++ b/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
@@ -1,9 +1,8 @@
 //===- MinimalSymbolDumper.cpp -------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -207,6 +206,7 @@ static std::string formatSourceLanguage(SourceLanguage Lang) {
     RETURN_CASE(SourceLanguage, MSIL, "msil");
     RETURN_CASE(SourceLanguage, HLSL, "hlsl");
     RETURN_CASE(SourceLanguage, D, "d");
+    RETURN_CASE(SourceLanguage, Swift, "swift");
   }
   return formatUnknownEnum(Lang);
 }
@@ -287,21 +287,39 @@ static std::string formatCookieKind(FrameCookieKind Kind) {
   return formatUnknownEnum(Kind);
 }
 
-static std::string formatRegisterId(RegisterId Id) {
-  switch (Id) {
+static std::string formatRegisterId(RegisterId Id, CPUType Cpu) {
+  if (Cpu == CPUType::ARM64) {
+    switch (Id) {
+#define CV_REGISTERS_ARM64
+#define CV_REGISTER(name, val) RETURN_CASE(RegisterId, name, #name)
+#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
+#undef CV_REGISTER
+#undef CV_REGISTERS_ARM64
+
+    default:
+      break;
+    }
+  } else {
+    switch (Id) {
+#define CV_REGISTERS_X86
 #define CV_REGISTER(name, val) RETURN_CASE(RegisterId, name, #name)
 #include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
 #undef CV_REGISTER
+#undef CV_REGISTERS_X86
+
+    default:
+      break;
+    }
   }
   return formatUnknownEnum(Id);
 }
 
-static std::string formatRegisterId(uint16_t Reg16) {
-  return formatRegisterId(RegisterId(Reg16));
+static std::string formatRegisterId(uint16_t Reg16, CPUType Cpu) {
+  return formatRegisterId(RegisterId(Reg16), Cpu);
 }
 
-static std::string formatRegisterId(ulittle16_t &Reg16) {
-  return formatRegisterId(uint16_t(Reg16));
+static std::string formatRegisterId(ulittle16_t &Reg16, CPUType Cpu) {
+  return formatRegisterId(uint16_t(Reg16), Cpu);
 }
 
 static std::string formatRange(LocalVariableAddrRange Range) {
@@ -331,7 +349,7 @@ Error MinimalSymbolDumper::visitSymbolBegin(codeview::CVSymbol &Record,
   // append to the existing line.
   P.formatLine("{0} | {1} [size = {2}]",
                fmt_align(Offset, AlignStyle::Right, 6),
-               formatSymbolKind(Record.Type), Record.length());
+               formatSymbolKind(Record.kind()), Record.length());
   P.Indent();
   return Error::success();
 }
@@ -562,7 +580,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
   AutoIndent Indent(P, 7);
   P.formatLine("register = {0}, offset = {1}, offset in parent = {2}, has "
                "spilled udt = {3}",
-               formatRegisterId(Def.Hdr.Register),
+               formatRegisterId(Def.Hdr.Register, CompilationCPU),
                int32_t(Def.Hdr.BasePointerOffset), Def.offsetInParent(),
                Def.hasSpilledUDTMember());
   P.formatLine("range = {0}, gaps = {1}", formatRange(Def.Range),
@@ -575,7 +593,7 @@ Error MinimalSymbolDumper::visitKnownRecord(
   AutoIndent Indent(P, 7);
   P.formatLine("register = {0}, may have no name = {1}, range start = "
                "{2}, length = {3}",
-               formatRegisterId(DefRangeRegister.Hdr.Register),
+               formatRegisterId(DefRangeRegister.Hdr.Register, CompilationCPU),
                bool(DefRangeRegister.Hdr.MayHaveNoName),
                formatSegmentOffset(DefRangeRegister.Range.ISectStart,
                                    DefRangeRegister.Range.OffsetStart),
@@ -590,7 +608,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
   AutoIndent Indent(P, 7);
   bool NoName = !!(Def.Hdr.MayHaveNoName == 0);
   P.formatLine("register = {0}, may have no name = {1}, offset in parent = {2}",
-               formatRegisterId(Def.Hdr.Register), NoName,
+               formatRegisterId(Def.Hdr.Register, CompilationCPU), NoName,
                uint32_t(Def.Hdr.OffsetInParent));
   P.formatLine("range = {0}, gaps = {1}", formatRange(Def.Range),
                formatGaps(P.getIndentLevel() + 9, Def.Gaps));
@@ -617,7 +635,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, DefRangeSym &Def) {
 Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, FrameCookieSym &FC) {
   AutoIndent Indent(P, 7);
   P.formatLine("code offset = {0}, Register = {1}, kind = {2}, flags = {3}",
-               FC.CodeOffset, formatRegisterId(FC.Register),
+               FC.CodeOffset, formatRegisterId(FC.Register, CompilationCPU),
                formatCookieKind(FC.CookieKind), FC.Flags);
   return Error::success();
 }
@@ -631,9 +649,10 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, FrameProcSym &FP) {
                FP.BytesOfCalleeSavedRegisters,
                formatSegmentOffset(FP.SectionIdOfExceptionHandler,
                                    FP.OffsetOfExceptionHandler));
-  P.formatLine("local fp reg = {0}, param fp reg = {1}",
-               formatRegisterId(FP.getLocalFramePtrReg(CompilationCPU)),
-               formatRegisterId(FP.getParamFramePtrReg(CompilationCPU)));
+  P.formatLine(
+      "local fp reg = {0}, param fp reg = {1}",
+      formatRegisterId(FP.getLocalFramePtrReg(CompilationCPU), CompilationCPU),
+      formatRegisterId(FP.getParamFramePtrReg(CompilationCPU), CompilationCPU));
   P.formatLine("flags = {0}",
                formatFrameProcedureOptions(P.getIndentLevel() + 9, FP.Flags));
   return Error::success();
@@ -650,13 +669,89 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
 
 Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, InlineSiteSym &IS) {
   AutoIndent Indent(P, 7);
-  auto Bytes = makeArrayRef(IS.AnnotationData);
-  StringRef Annotations(reinterpret_cast<const char *>(Bytes.begin()),
-                        Bytes.size());
-
   P.formatLine("inlinee = {0}, parent = {1}, end = {2}", idIndex(IS.Inlinee),
                IS.Parent, IS.End);
-  P.formatLine("annotations = {0}", toHex(Annotations));
+
+  // Break down the annotation byte code and calculate code and line offsets.
+  // FIXME: It would be helpful if we could look up the initial file and inlinee
+  // lines offset using the inlinee index above.
+  uint32_t CodeOffset = 0;
+  int32_t LineOffset = 0;
+  for (auto &Annot : IS.annotations()) {
+    P.formatLine("  {0}", fmt_align(toHex(Annot.Bytes), AlignStyle::Left, 9));
+
+    auto formatCodeOffset = [&](uint32_t Delta) {
+      CodeOffset += Delta;
+      P.format(" code 0x{0} (+0x{1})", utohexstr(CodeOffset), utohexstr(Delta));
+    };
+    auto formatCodeLength = [&](uint32_t Length) {
+      // Notably, changing the code length does not affect the code offset.
+      P.format(" code end 0x{0} (+0x{1})", utohexstr(CodeOffset + Length),
+               utohexstr(Length));
+    };
+    auto formatLineOffset = [&](int32_t Delta) {
+      LineOffset += Delta;
+      char Sign = Delta > 0 ? '+' : '-';
+      P.format(" line {0} ({1}{2})", LineOffset, Sign, std::abs(Delta));
+    };
+
+    // Use the opcode to interpret the integer values.
+    switch (Annot.OpCode) {
+    case BinaryAnnotationsOpCode::Invalid:
+      break;
+    case BinaryAnnotationsOpCode::CodeOffset:
+    case BinaryAnnotationsOpCode::ChangeCodeOffset:
+      formatCodeOffset(Annot.U1);
+      break;
+    case BinaryAnnotationsOpCode::ChangeLineOffset:
+      formatLineOffset(Annot.S1);
+      break;
+    case BinaryAnnotationsOpCode::ChangeCodeLength:
+      formatCodeLength(Annot.U1);
+      // Apparently this annotation updates the code offset. It's hard to make
+      // MSVC produce this opcode, but clang uses it, and debuggers seem to use
+      // this interpretation.
+      CodeOffset += Annot.U1;
+      break;
+    case BinaryAnnotationsOpCode::ChangeCodeOffsetAndLineOffset:
+      formatCodeOffset(Annot.U1);
+      formatLineOffset(Annot.S1);
+      break;
+    case BinaryAnnotationsOpCode::ChangeCodeLengthAndCodeOffset:
+      formatCodeOffset(Annot.U2);
+      formatCodeLength(Annot.U1);
+      break;
+
+    case BinaryAnnotationsOpCode::ChangeFile: {
+      uint32_t FileOffset = Annot.U1;
+      StringRef Filename = "<unknown>";
+      if (SymGroup) {
+        if (Expected<StringRef> MaybeFile =
+                SymGroup->getNameFromStringTable(FileOffset))
+          Filename = *MaybeFile;
+        else
+          return MaybeFile.takeError();
+      }
+      P.format(" setfile {0} 0x{1}", utohexstr(FileOffset));
+      break;
+    }
+
+    // The rest of these are hard to convince MSVC to emit, so they are not as
+    // well understood.
+    case BinaryAnnotationsOpCode::ChangeCodeOffsetBase:
+      formatCodeOffset(Annot.U1);
+      break;
+    case BinaryAnnotationsOpCode::ChangeLineEndDelta:
+    case BinaryAnnotationsOpCode::ChangeRangeKind:
+    case BinaryAnnotationsOpCode::ChangeColumnStart:
+    case BinaryAnnotationsOpCode::ChangeColumnEnd:
+      P.format(" {0} {1}", Annot.Name, Annot.U1);
+      break;
+    case BinaryAnnotationsOpCode::ChangeColumnEndDelta:
+      P.format(" {0} {1}", Annot.Name, Annot.S1);
+      break;
+    }
+  }
   return Error::success();
 }
 
@@ -665,7 +760,8 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
   P.format(" `{0}`", Register.Name);
   AutoIndent Indent(P, 7);
   P.formatLine("register = {0}, type = {1}",
-               formatRegisterId(Register.Register), typeIndex(Register.Index));
+               formatRegisterId(Register.Register, CompilationCPU),
+               typeIndex(Register.Index));
   return Error::success();
 }
 
@@ -753,9 +849,9 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
                                             RegRelativeSym &RegRel) {
   P.format(" `{0}`", RegRel.Name);
   AutoIndent Indent(P, 7);
-  P.formatLine("type = {0}, register = {1}, offset = {2}",
-               typeIndex(RegRel.Type), formatRegisterId(RegRel.Register),
-               RegRel.Offset);
+  P.formatLine(
+      "type = {0}, register = {1}, offset = {2}", typeIndex(RegRel.Type),
+      formatRegisterId(RegRel.Register, CompilationCPU), RegRel.Offset);
   return Error::success();
 }
 
@@ -780,3 +876,12 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
   P.format(" `{0}`", UN.Name);
   return Error::success();
 }
+
+Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
+                                            AnnotationSym &Annot) {
+  AutoIndent Indent(P, 7);
+  P.formatLine("addr = {0}", formatSegmentOffset(Annot.Segment, Annot.CodeOffset));
+  P.formatLine("strings = {0}", typesetStringList(P.getIndentLevel() + 9 + 2,
+                                                   Annot.Strings));
+  return Error::success();
+}
diff --git a/tools/llvm-pdbutil/MinimalSymbolDumper.h b/tools/llvm-pdbutil/MinimalSymbolDumper.h
index 033e193cee6c..cdc75c1cfba0 100644
--- a/tools/llvm-pdbutil/MinimalSymbolDumper.h
+++ b/tools/llvm-pdbutil/MinimalSymbolDumper.h
@@ -1,9 +1,8 @@
 //===- MinimalSymbolDumper.h ---------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/MinimalTypeDumper.cpp b/tools/llvm-pdbutil/MinimalTypeDumper.cpp
index 3f10e8ab8a1e..3fdef085f19e 100644
--- a/tools/llvm-pdbutil/MinimalTypeDumper.cpp
+++ b/tools/llvm-pdbutil/MinimalTypeDumper.cpp
@@ -1,9 +1,8 @@
 //===- MinimalTypeDumper.cpp ---------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -11,6 +10,7 @@
 
 #include "FormatUtil.h"
 #include "LinePrinter.h"
+#include "TypeReferenceTracker.h"
 
 #include "llvm-pdbutil.h"
 #include "llvm/DebugInfo/CodeView/CVRecord.h"
@@ -222,11 +222,10 @@ Error MinimalTypeDumpVisitor::visitTypeBegin(CVType &Record, TypeIndex Index) {
   // formatLine puts the newline at the beginning, so we use formatLine here
   // to start a new line, and then individual visit methods use format to
   // append to the existing line.
-  if (!Hashes) {
-    P.formatLine("{0} | {1} [size = {2}]",
-                 fmt_align(Index, AlignStyle::Right, Width),
-                 formatTypeLeafKind(Record.Type), Record.length());
-  } else {
+  P.formatLine("{0} | {1} [size = {2}",
+               fmt_align(Index, AlignStyle::Right, Width),
+               formatTypeLeafKind(Record.kind()), Record.length());
+  if (Hashes) {
     std::string H;
     if (Index.toArrayIndex() >= HashValues.size()) {
       H = "(not present)";
@@ -242,13 +241,19 @@ Error MinimalTypeDumpVisitor::visitTypeBegin(CVType &Record, TypeIndex Index) {
       else
         H = "0x" + utohexstr(Hash) + ", our hash = 0x" + utohexstr(OurHash);
     }
-    P.formatLine("{0} | {1} [size = {2}, hash = {3}]",
-                 fmt_align(Index, AlignStyle::Right, Width),
-                 formatTypeLeafKind(Record.Type), Record.length(), H);
+    P.format(", hash = {0}", H);
   }
+  if (RefTracker) {
+    if (RefTracker->isTypeReferenced(Index))
+      P.format(", referenced");
+    else
+      P.format(", unreferenced");
+  }
+  P.format("]");
   P.Indent(Width + 3);
   return Error::success();
 }
+
 Error MinimalTypeDumpVisitor::visitTypeEnd(CVType &Record) {
   P.Unindent(Width + 3);
   if (RecordBytes) {
diff --git a/tools/llvm-pdbutil/MinimalTypeDumper.h b/tools/llvm-pdbutil/MinimalTypeDumper.h
index 8f6bdc6110ae..6bc456d47ac4 100644
--- a/tools/llvm-pdbutil/MinimalTypeDumper.h
+++ b/tools/llvm-pdbutil/MinimalTypeDumper.h
@@ -1,9 +1,8 @@
 //===- MinimalTypeDumper.h ------------------------------------ *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -21,17 +20,19 @@ class LazyRandomTypeCollection;
 namespace pdb {
 class LinePrinter;
 class TpiStream;
+class TypeReferenceTracker;
 
 class MinimalTypeDumpVisitor : public codeview::TypeVisitorCallbacks {
 public:
   MinimalTypeDumpVisitor(LinePrinter &P, uint32_t Width, bool RecordBytes,
                          bool Hashes, codeview::LazyRandomTypeCollection &Types,
+                         TypeReferenceTracker *RefTracker,
                          uint32_t NumHashBuckets,
                          FixedStreamArray<support::ulittle32_t> HashValues,
                          pdb::TpiStream *Stream)
       : P(P), Width(Width), RecordBytes(RecordBytes), Hashes(Hashes),
-        Types(Types), NumHashBuckets(NumHashBuckets), HashValues(HashValues),
-        Stream(Stream) {}
+        Types(Types), RefTracker(RefTracker), NumHashBuckets(NumHashBuckets),
+        HashValues(HashValues), Stream(Stream) {}
 
   Error visitTypeBegin(codeview::CVType &Record,
                        codeview::TypeIndex Index) override;
@@ -57,6 +58,7 @@ private:
   bool RecordBytes = false;
   bool Hashes = false;
   codeview::LazyRandomTypeCollection &Types;
+  pdb::TypeReferenceTracker *RefTracker = nullptr;
   uint32_t NumHashBuckets;
   codeview::TypeIndex CurrentTypeIndex;
   FixedStreamArray<support::ulittle32_t> HashValues;
diff --git a/tools/llvm-pdbutil/OutputStyle.h b/tools/llvm-pdbutil/OutputStyle.h
index dfefc25a215e..40b0de8bdf72 100644
--- a/tools/llvm-pdbutil/OutputStyle.h
+++ b/tools/llvm-pdbutil/OutputStyle.h
@@ -1,9 +1,8 @@
 //===- OutputStyle.h ------------------------------------------ *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/PdbYaml.cpp b/tools/llvm-pdbutil/PdbYaml.cpp
index 3ea333608314..a26241967b5a 100644
--- a/tools/llvm-pdbutil/PdbYaml.cpp
+++ b/tools/llvm-pdbutil/PdbYaml.cpp
@@ -1,9 +1,8 @@
-//===- PdbYAML.cpp -------------------------------------------- *- C++ --*-===//
+//===-- PdbYaml.cpp ------------------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -51,6 +50,7 @@ template <> struct ScalarEnumerationTraits<llvm::pdb::PDB_Machine> {
     io.enumCase(Value, "SH3DSP", PDB_Machine::SH3DSP);
     io.enumCase(Value, "Thumb", PDB_Machine::Thumb);
     io.enumCase(Value, "WceMipsV2", PDB_Machine::WceMipsV2);
+    io.enumCase(Value, "Arm64", PDB_Machine::Arm64);
   }
 };
 
diff --git a/tools/llvm-pdbutil/PdbYaml.h b/tools/llvm-pdbutil/PdbYaml.h
index 97ba87266cc6..ed6346c2c4db 100644
--- a/tools/llvm-pdbutil/PdbYaml.h
+++ b/tools/llvm-pdbutil/PdbYaml.h
@@ -1,9 +1,8 @@
 //===- PdbYAML.h ---------------------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp b/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp
index bcdecca81aec..cd01a4004819 100644
--- a/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp
@@ -1,9 +1,8 @@
 //===- PrettyBuiltinDumper.cpp ---------------------------------- *- C++ *-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/PrettyBuiltinDumper.h b/tools/llvm-pdbutil/PrettyBuiltinDumper.h
index fb6b0b172e6e..3bdef34c48f8 100644
--- a/tools/llvm-pdbutil/PrettyBuiltinDumper.h
+++ b/tools/llvm-pdbutil/PrettyBuiltinDumper.h
@@ -1,9 +1,8 @@
 //===- PrettyBuiltinDumper.h ---------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp b/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp
index f009f53a3932..b7eccac5988c 100644
--- a/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp
@@ -1,9 +1,8 @@
 //===- PrettyClassDefinitionDumper.cpp --------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/PrettyClassDefinitionDumper.h b/tools/llvm-pdbutil/PrettyClassDefinitionDumper.h
index 6569a1d304f6..f43c5c11bdfd 100644
--- a/tools/llvm-pdbutil/PrettyClassDefinitionDumper.h
+++ b/tools/llvm-pdbutil/PrettyClassDefinitionDumper.h
@@ -1,9 +1,8 @@
 //===- PrettyClassDefinitionDumper.h ----------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp b/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp
index a572522c8cd7..a522935e34f1 100644
--- a/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp
@@ -1,9 +1,8 @@
 //===- PrettyClassLayoutGraphicalDumper.h -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.h b/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.h
index f83f1a6c1b34..8f78b3b503d0 100644
--- a/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.h
+++ b/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.h
@@ -1,9 +1,8 @@
 //===- PrettyClassLayoutGraphicalDumper.h -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/PrettyCompilandDumper.cpp b/tools/llvm-pdbutil/PrettyCompilandDumper.cpp
index 94a0b2d5e780..cf769ff66472 100644
--- a/tools/llvm-pdbutil/PrettyCompilandDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyCompilandDumper.cpp
@@ -1,9 +1,8 @@
 //===- PrettyCompilandDumper.cpp - llvm-pdbutil compiland dumper -*- C++ *-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/PrettyCompilandDumper.h b/tools/llvm-pdbutil/PrettyCompilandDumper.h
index 1a840e49607c..c83a58672d1a 100644
--- a/tools/llvm-pdbutil/PrettyCompilandDumper.h
+++ b/tools/llvm-pdbutil/PrettyCompilandDumper.h
@@ -1,9 +1,8 @@
 //===- PrettyCompilandDumper.h - llvm-pdbutil compiland dumper -*- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/PrettyEnumDumper.cpp b/tools/llvm-pdbutil/PrettyEnumDumper.cpp
index f4cbd3f8fa14..9ed5893f252e 100644
--- a/tools/llvm-pdbutil/PrettyEnumDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyEnumDumper.cpp
@@ -1,9 +1,8 @@
 //===- PrettyEnumDumper.cpp -------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/PrettyEnumDumper.h b/tools/llvm-pdbutil/PrettyEnumDumper.h
index c6e65a6d1772..e7c5c1aeb018 100644
--- a/tools/llvm-pdbutil/PrettyEnumDumper.h
+++ b/tools/llvm-pdbutil/PrettyEnumDumper.h
@@ -1,9 +1,8 @@
 //===- PrettyEnumDumper.h ---------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp b/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp
index 1270223b1c78..fede031ec0c0 100644
--- a/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp
@@ -1,9 +1,8 @@
 //===- PrettyExternalSymbolDumper.cpp -------------------------- *- C++ *-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/PrettyExternalSymbolDumper.h b/tools/llvm-pdbutil/PrettyExternalSymbolDumper.h
index 6a009862ddd4..58fafe943315 100644
--- a/tools/llvm-pdbutil/PrettyExternalSymbolDumper.h
+++ b/tools/llvm-pdbutil/PrettyExternalSymbolDumper.h
@@ -1,9 +1,8 @@
 //===- PrettyExternalSymbolDumper.h --------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/PrettyFunctionDumper.cpp b/tools/llvm-pdbutil/PrettyFunctionDumper.cpp
index 836ede41054e..b820ca333965 100644
--- a/tools/llvm-pdbutil/PrettyFunctionDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyFunctionDumper.cpp
@@ -1,9 +1,8 @@
 //===- PrettyFunctionDumper.cpp --------------------------------- *- C++ *-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -139,7 +138,8 @@ void FunctionDumper::start(const PDBSymbolFunc &Symbol, PointerType Pointer) {
 
   if (Symbol.hasFramePointer()) {
     WithColor(Printer, PDB_ColorItem::Register).get()
-        << Symbol.getLocalBasePointerRegisterId();
+        << CPURegister{Symbol.getRawSymbol().getPlatform(),
+                       Symbol.getLocalBasePointerRegisterId()};
   } else {
     WithColor(Printer, PDB_ColorItem::Register).get() << "FPO";
   }
@@ -229,9 +229,9 @@ void FunctionDumper::dump(const PDBSymbolTypeFunctionArg &Symbol) {
   uint32_t TypeId = Symbol.getTypeId();
   auto Type = Symbol.getSession().getSymbolById(TypeId);
   if (Type)
-    Printer << "<unknown-type>";
-  else
     Type->dump(*this);
+  else
+    Printer << "<unknown-type>";
 }
 
 void FunctionDumper::dump(const PDBSymbolTypeTypedef &Symbol) {
diff --git a/tools/llvm-pdbutil/PrettyFunctionDumper.h b/tools/llvm-pdbutil/PrettyFunctionDumper.h
index 1a6f5430ec5a..df62604ac881 100644
--- a/tools/llvm-pdbutil/PrettyFunctionDumper.h
+++ b/tools/llvm-pdbutil/PrettyFunctionDumper.h
@@ -1,9 +1,8 @@
 //===- PrettyFunctionDumper.h --------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/PrettyTypeDumper.cpp b/tools/llvm-pdbutil/PrettyTypeDumper.cpp
index daf3cd45b327..e8f8e5aa62c9 100644
--- a/tools/llvm-pdbutil/PrettyTypeDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyTypeDumper.cpp
@@ -1,9 +1,8 @@
 //===- PrettyTypeDumper.cpp - PDBSymDumper type dumper *------------ C++ *-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/PrettyTypeDumper.h b/tools/llvm-pdbutil/PrettyTypeDumper.h
index 36e586fea7e3..b6539d95bf31 100644
--- a/tools/llvm-pdbutil/PrettyTypeDumper.h
+++ b/tools/llvm-pdbutil/PrettyTypeDumper.h
@@ -1,9 +1,8 @@
 //===- PrettyTypeDumper.h - PDBSymDumper implementation for types *- C++ *-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/PrettyTypedefDumper.cpp b/tools/llvm-pdbutil/PrettyTypedefDumper.cpp
index 2b3f3691ed98..ef73a8cdf9c4 100644
--- a/tools/llvm-pdbutil/PrettyTypedefDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyTypedefDumper.cpp
@@ -1,9 +1,8 @@
 //===- PrettyTypedefDumper.cpp - PDBSymDumper impl for typedefs -- * C++ *-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/PrettyTypedefDumper.h b/tools/llvm-pdbutil/PrettyTypedefDumper.h
index 133bbfb7db0e..ad8b3f37dcfd 100644
--- a/tools/llvm-pdbutil/PrettyTypedefDumper.h
+++ b/tools/llvm-pdbutil/PrettyTypedefDumper.h
@@ -1,9 +1,8 @@
 //===- PrettyTypedefDumper.h - llvm-pdbutil typedef dumper ---*- C++ ----*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/PrettyVariableDumper.cpp b/tools/llvm-pdbutil/PrettyVariableDumper.cpp
index ddac8cf0da4a..6dd7cc384cc9 100644
--- a/tools/llvm-pdbutil/PrettyVariableDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyVariableDumper.cpp
@@ -1,9 +1,8 @@
 //===- PrettyVariableDumper.cpp ---------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/PrettyVariableDumper.h b/tools/llvm-pdbutil/PrettyVariableDumper.h
index cacf1ce9577b..65cf5cd2cf55 100644
--- a/tools/llvm-pdbutil/PrettyVariableDumper.h
+++ b/tools/llvm-pdbutil/PrettyVariableDumper.h
@@ -1,9 +1,8 @@
 //===- PrettyVariableDumper.h - PDBSymDumper variable dumper ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/StreamUtil.cpp b/tools/llvm-pdbutil/StreamUtil.cpp
index 367d947d25ee..7dfc2beefe78 100644
--- a/tools/llvm-pdbutil/StreamUtil.cpp
+++ b/tools/llvm-pdbutil/StreamUtil.cpp
@@ -1,9 +1,8 @@
 //===- StreamUtil.cpp - PDB stream utilities --------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/StreamUtil.h b/tools/llvm-pdbutil/StreamUtil.h
index 0e2e80707361..f810f7dc15b4 100644
--- a/tools/llvm-pdbutil/StreamUtil.h
+++ b/tools/llvm-pdbutil/StreamUtil.h
@@ -1,9 +1,8 @@
 //===- Streamutil.h - PDB stream utilities ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/TypeReferenceTracker.cpp b/tools/llvm-pdbutil/TypeReferenceTracker.cpp
new file mode 100644
index 000000000000..f184f02e01ee
--- /dev/null
+++ b/tools/llvm-pdbutil/TypeReferenceTracker.cpp
@@ -0,0 +1,160 @@
+//===- TypeReferenceTracker.cpp ------------------------------- *- C++ --*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "TypeReferenceTracker.h"
+
+#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
+
+using namespace llvm;
+using namespace llvm::pdb;
+using namespace llvm::codeview;
+
+// LazyRandomTypeCollection doesn't appear to expose the number of records, so
+// just iterate up front to find out.
+static uint32_t getNumRecordsInCollection(LazyRandomTypeCollection &Types) {
+  uint32_t NumTypes = 0;
+  for (Optional<TypeIndex> TI = Types.getFirst(); TI; TI = Types.getNext(*TI))
+    ++NumTypes;
+  return NumTypes;
+}
+
+TypeReferenceTracker::TypeReferenceTracker(InputFile &File)
+    : File(File), Types(File.types()),
+      Ids(File.isPdb() ? &File.ids() : nullptr) {
+  NumTypeRecords = getNumRecordsInCollection(Types);
+  TypeReferenced.resize(NumTypeRecords, false);
+
+  // If this is a PDB, ids are stored separately, so make a separate bit vector.
+  if (Ids) {
+    NumIdRecords = getNumRecordsInCollection(*Ids);
+    IdReferenced.resize(NumIdRecords, false);
+  }
+
+  // Get the TpiStream pointer for forward decl resolution if this is a pdb.
+  // Build the hash map to enable resolving forward decls.
+  if (File.isPdb()) {
+    Tpi = &cantFail(File.pdb().getPDBTpiStream());
+    Tpi->buildHashMap();
+  }
+}
+
+void TypeReferenceTracker::mark() {
+  // Walk type roots:
+  // - globals
+  // - modi symbols
+  // - LF_UDT_MOD_SRC_LINE? VC always links these in.
+  for (SymbolGroup SG : File.symbol_groups()) {
+    if (File.isObj()) {
+      for (const auto &SS : SG.getDebugSubsections()) {
+        // FIXME: Are there other type-referencing subsections? Inlinees?
+        // Probably for IDs.
+        if (SS.kind() != DebugSubsectionKind::Symbols)
+          continue;
+
+        CVSymbolArray Symbols;
+        BinaryStreamReader Reader(SS.getRecordData());
+        cantFail(Reader.readArray(Symbols, Reader.getLength()));
+        for (const CVSymbol &S : Symbols)
+          addTypeRefsFromSymbol(S);
+      }
+    } else if (SG.hasDebugStream()) {
+      for (const CVSymbol &S : SG.getPdbModuleStream().getSymbolArray())
+        addTypeRefsFromSymbol(S);
+    }
+  }
+
+  // Walk globals and mark types referenced from globals.
+  if (File.isPdb() && File.pdb().hasPDBGlobalsStream()) {
+    SymbolStream &SymStream = cantFail(File.pdb().getPDBSymbolStream());
+    GlobalsStream &GS = cantFail(File.pdb().getPDBGlobalsStream());
+    for (uint32_t PubSymOff : GS.getGlobalsTable()) {
+      CVSymbol Sym = SymStream.readRecord(PubSymOff);
+      addTypeRefsFromSymbol(Sym);
+    }
+  }
+
+  // FIXME: Should we walk Ids?
+}
+
+void TypeReferenceTracker::addOneTypeRef(TiRefKind RefKind, TypeIndex RefTI) {
+  // If it's simple or already seen, no need to add to work list.
+  BitVector &TypeOrIdReferenced =
+      (Ids && RefKind == TiRefKind::IndexRef) ? IdReferenced : TypeReferenced;
+  if (RefTI.isSimple() || TypeOrIdReferenced.test(RefTI.toArrayIndex()))
+    return;
+
+  // Otherwise, mark it seen and add it to the work list.
+  TypeOrIdReferenced.set(RefTI.toArrayIndex());
+  RefWorklist.push_back({RefKind, RefTI});
+}
+
+void TypeReferenceTracker::addTypeRefsFromSymbol(const CVSymbol &Sym) {
+  SmallVector<TiReference, 4> DepList;
+  // FIXME: Check for failure.
+  discoverTypeIndicesInSymbol(Sym, DepList);
+  addReferencedTypes(Sym.content(), DepList);
+  markReferencedTypes();
+}
+
+void TypeReferenceTracker::addReferencedTypes(ArrayRef<uint8_t> RecData,
+                                              ArrayRef<TiReference> DepList) {
+  for (const auto &Ref : DepList) {
+    // FIXME: Report OOB slice instead of truncating.
+    ArrayRef<uint8_t> ByteSlice =
+        RecData.drop_front(Ref.Offset).take_front(4 * Ref.Count);
+    ArrayRef<TypeIndex> TIs(
+        reinterpret_cast<const TypeIndex *>(ByteSlice.data()),
+        ByteSlice.size() / 4);
+
+    // If this is a PDB and this is an item reference, track it in the IPI
+    // bitvector. Otherwise, it's a type ref, or there is only one stream.
+    for (TypeIndex RefTI : TIs)
+      addOneTypeRef(Ref.Kind, RefTI);
+  }
+}
+
+void TypeReferenceTracker::markReferencedTypes() {
+  while (!RefWorklist.empty()) {
+    TiRefKind RefKind;
+    TypeIndex RefTI;
+    std::tie(RefKind, RefTI) = RefWorklist.pop_back_val();
+    Optional<CVType> Rec = (Ids && RefKind == TiRefKind::IndexRef)
+                               ? Ids->tryGetType(RefTI)
+                               : Types.tryGetType(RefTI);
+    if (!Rec)
+      continue; // FIXME: Report a reference to a non-existant type.
+
+    SmallVector<TiReference, 4> DepList;
+    // FIXME: Check for failure.
+    discoverTypeIndices(*Rec, DepList);
+    addReferencedTypes(Rec->content(), DepList);
+
+    // If this is a tag kind and this is a PDB input, mark the complete type as
+    // referenced.
+    // FIXME: This limitation makes this feature somewhat useless on object file
+    // inputs.
+    if (Tpi) {
+      switch (Rec->kind()) {
+      default:
+        break;
+      case LF_CLASS:
+      case LF_INTERFACE:
+      case LF_STRUCTURE:
+      case LF_UNION:
+      case LF_ENUM:
+        addOneTypeRef(TiRefKind::TypeRef,
+                      cantFail(Tpi->findFullDeclForForwardRef(RefTI)));
+        break;
+      }
+    }
+  }
+}
diff --git a/tools/llvm-pdbutil/TypeReferenceTracker.h b/tools/llvm-pdbutil/TypeReferenceTracker.h
new file mode 100644
index 000000000000..8861731ab6ee
--- /dev/null
+++ b/tools/llvm-pdbutil/TypeReferenceTracker.h
@@ -0,0 +1,69 @@
+//===- TypeReferenceTracker.h --------------------------------- *- C++ --*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVMPDBDUMP_TYPEREFERENCETRACKER_H
+#define LLVM_TOOLS_LLVMPDBDUMP_TYPEREFERENCETRACKER_H
+
+#include "InputFile.h"
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace pdb {
+
+class TpiStream;
+
+/// Maintains bitvector to track whether a type was referenced by a symbol
+/// record.
+class TypeReferenceTracker {
+public:
+  TypeReferenceTracker(InputFile &File);
+
+  // Do the work of marking referenced types.
+  void mark();
+
+  // Return true if a symbol record transitively references this type.
+  bool isTypeReferenced(codeview::TypeIndex TI) {
+    return TI.toArrayIndex() <= NumTypeRecords &&
+           TypeReferenced.test(TI.toArrayIndex());
+  }
+
+private:
+  void addTypeRefsFromSymbol(const codeview::CVSymbol &Sym);
+
+  // Mark types on this list as referenced.
+  void addReferencedTypes(ArrayRef<uint8_t> RecData,
+                          ArrayRef<codeview::TiReference> Refs);
+
+  // Consume all types on the worklist.
+  void markReferencedTypes();
+
+  void addOneTypeRef(codeview::TiRefKind RefKind, codeview::TypeIndex RefTI);
+
+  InputFile &File;
+  codeview::LazyRandomTypeCollection &Types;
+  codeview::LazyRandomTypeCollection *Ids = nullptr;
+  TpiStream *Tpi = nullptr;
+  BitVector TypeReferenced;
+  BitVector IdReferenced;
+  SmallVector<std::pair<codeview::TiRefKind, codeview::TypeIndex>, 10>
+      RefWorklist;
+  uint32_t NumTypeRecords = 0;
+  uint32_t NumIdRecords = 0;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif // LLVM_TOOLS_LLVMPDBDUMP_TYPEREFERENCETRACKER_H
diff --git a/tools/llvm-pdbutil/YAMLOutputStyle.cpp b/tools/llvm-pdbutil/YAMLOutputStyle.cpp
index 62b5c428d410..80b76657facc 100644
--- a/tools/llvm-pdbutil/YAMLOutputStyle.cpp
+++ b/tools/llvm-pdbutil/YAMLOutputStyle.cpp
@@ -1,9 +1,8 @@
 //===- YAMLOutputStyle.cpp ------------------------------------ *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -232,10 +231,7 @@ Error YAMLOutputStyle::dumpDbiStream() {
       if (ModiStream == kInvalidStreamIndex)
         continue;
 
-      auto ModStreamData = msf::MappedBlockStream::createIndexedStream(
-          File.getMsfLayout(), File.getMsfBuffer(), ModiStream,
-          File.getAllocator());
-
+      auto ModStreamData = File.createIndexedStream(ModiStream);
       pdb::ModuleDebugStreamRef ModS(MI, std::move(ModStreamData));
       if (auto EC = ModS.reload())
         return EC;
diff --git a/tools/llvm-pdbutil/YAMLOutputStyle.h b/tools/llvm-pdbutil/YAMLOutputStyle.h
index a5ad3355d2ab..7a50af1abe3f 100644
--- a/tools/llvm-pdbutil/YAMLOutputStyle.h
+++ b/tools/llvm-pdbutil/YAMLOutputStyle.h
@@ -1,9 +1,8 @@
 //===- YAMLOutputStyle.h -------------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-pdbutil/llvm-pdbutil.cpp b/tools/llvm-pdbutil/llvm-pdbutil.cpp
index 76f61a2a95a7..785a98086791 100644
--- a/tools/llvm-pdbutil/llvm-pdbutil.cpp
+++ b/tools/llvm-pdbutil/llvm-pdbutil.cpp
@@ -1,9 +1,8 @@
 //===- llvm-pdbutil.cpp - Dump debug info from a PDB file -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -463,7 +462,10 @@ cl::opt<bool> DumpSymbolStats(
     "sym-stats",
     cl::desc("Dump a detailed breakdown of symbol usage/size for each module"),
     cl::cat(MsfOptions), cl::sub(DumpSubcommand));
-
+cl::opt<bool> DumpTypeStats(
+    "type-stats",
+    cl::desc("Dump a detailed breakdown of type usage/size"),
+    cl::cat(MsfOptions), cl::sub(DumpSubcommand));
 cl::opt<bool> DumpUdtStats(
     "udt-stats",
     cl::desc("Dump a detailed breakdown of S_UDT record usage / stats"),
@@ -477,6 +479,11 @@ cl::opt<bool> DumpTypeData(
     "type-data",
     cl::desc("dump CodeView type record raw bytes from TPI stream"),
     cl::cat(TypeOptions), cl::sub(DumpSubcommand));
+cl::opt<bool>
+    DumpTypeRefStats("type-ref-stats",
+                     cl::desc("dump statistics on the number and size of types "
+                              "transitively referenced by symbol records"),
+                     cl::cat(TypeOptions), cl::sub(DumpSubcommand));
 
 cl::opt<bool> DumpTypeExtras("type-extras",
                              cl::desc("dump type hashes and index offsets"),
@@ -927,7 +934,7 @@ static std::string stringOr(std::string Str, std::string IfEmpty) {
 
 static void dumpInjectedSources(LinePrinter &Printer, IPDBSession &Session) {
   auto Sources = Session.getInjectedSources();
-  if (0 == Sources->getChildCount()) {
+  if (!Sources || !Sources->getChildCount()) {
     Printer.printLine("There are no injected sources.");
     return;
   }
@@ -940,9 +947,6 @@ static void dumpInjectedSources(LinePrinter &Printer, IPDBSession &Session) {
     std::string VFName = stringOr(IS->getVirtualFileName(), "<null>");
     uint32_t CRC = IS->getCrc32();
 
-    std::string CompressionStr;
-    llvm::raw_string_ostream Stream(CompressionStr);
-    Stream << IS->getCompression();
     WithColor(Printer, PDB_ColorItem::Path).get() << File;
     Printer << " (";
     WithColor(Printer, PDB_ColorItem::LiteralValue).get() << Size;
@@ -961,7 +965,9 @@ static void dumpInjectedSources(LinePrinter &Printer, IPDBSession &Session) {
     Printer << ", ";
     WithColor(Printer, PDB_ColorItem::Keyword).get() << "compression";
     Printer << "=";
-    WithColor(Printer, PDB_ColorItem::LiteralValue).get() << Stream.str();
+    dumpPDBSourceCompression(
+        WithColor(Printer, PDB_ColorItem::LiteralValue).get(),
+        IS->getCompression());
 
     if (!opts::pretty::ShowInjectedSourceContent)
       continue;
@@ -970,7 +976,12 @@ static void dumpInjectedSources(LinePrinter &Printer, IPDBSession &Session) {
     int Indent = Printer.getIndentLevel();
     Printer.Unindent(Indent);
 
-    Printer.printLine(IS->getCode());
+    if (IS->getCompression() == PDB_SourceCompression::None)
+      Printer.printLine(IS->getCode());
+    else
+      Printer.formatBinary("Compressed data",
+                           arrayRefFromStringRef(IS->getCode()),
+                           /*StartOffset=*/0);
 
     // Re-indent back to the original level.
     Printer.Indent(Indent);
@@ -1272,12 +1283,7 @@ static void dumpPretty(StringRef Path) {
     WithColor(Printer, PDB_ColorItem::SectionHeader).get()
         << "---INJECTED SOURCES---";
     AutoIndent Indent1(Printer);
-
-    if (ReaderType == PDB_ReaderType::Native)
-      Printer.printLine(
-          "Injected sources are not supported with the native reader.");
-    else
-      dumpInjectedSources(Printer, *Session);
+    dumpInjectedSources(Printer, *Session);
   }
 
   Printer.NewLine();
@@ -1377,8 +1383,7 @@ static void exportStream() {
            << "' (index " << Index << ") to file " << OutFileName << ".\n";
   }
 
-  SourceStream = MappedBlockStream::createIndexedStream(
-      File.getMsfLayout(), File.getMsfBuffer(), Index, File.getAllocator());
+  SourceStream = File.createIndexedStream(Index);
   auto OutFile = ExitOnErr(
       FileOutputBuffer::create(OutFileName, SourceStream->getLength()));
   FileBufferByteStream DestStream(std::move(OutFile), llvm::support::little);
diff --git a/tools/llvm-pdbutil/llvm-pdbutil.h b/tools/llvm-pdbutil/llvm-pdbutil.h
index a57cc51d7fd7..321f41bba7f1 100644
--- a/tools/llvm-pdbutil/llvm-pdbutil.h
+++ b/tools/llvm-pdbutil/llvm-pdbutil.h
@@ -1,9 +1,8 @@
 //===- llvm-pdbutil.h ----------------------------------------- *- C++ --*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -141,6 +140,7 @@ extern llvm::cl::opt<bool> DumpSummary;
 extern llvm::cl::opt<bool> DumpFpm;
 extern llvm::cl::opt<bool> DumpStreams;
 extern llvm::cl::opt<bool> DumpSymbolStats;
+extern llvm::cl::opt<bool> DumpTypeStats;
 extern llvm::cl::opt<bool> DumpUdtStats;
 extern llvm::cl::opt<bool> DumpStreamBlocks;
 
@@ -156,6 +156,7 @@ extern llvm::cl::opt<bool> DumpTypeData;
 extern llvm::cl::opt<bool> DumpTypeExtras;
 extern llvm::cl::list<uint32_t> DumpTypeIndex;
 extern llvm::cl::opt<bool> DumpTypeDependents;
+extern llvm::cl::opt<bool> DumpTypeRefStats;
 extern llvm::cl::opt<bool> DumpSectionHeaders;
 
 extern llvm::cl::opt<bool> DumpIds;
diff --git a/tools/llvm-profdata/llvm-profdata.cpp b/tools/llvm-profdata/llvm-profdata.cpp
index c25cbc2b64df..16d3ebe3fcbc 100644
--- a/tools/llvm-profdata/llvm-profdata.cpp
+++ b/tools/llvm-profdata/llvm-profdata.cpp
@@ -1,9 +1,8 @@
 //===- llvm-profdata.cpp - LLVM profile data tool -------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -27,8 +26,8 @@
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
-#include "llvm/Support/WithColor.h"
 #include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 
@@ -201,6 +200,32 @@ static bool isFatalError(instrprof_error IPE) {
   }
 }
 
+/// Computer the overlap b/w profile BaseFilename and TestFileName,
+/// and store the program level result to Overlap.
+static void overlapInput(const std::string &BaseFilename,
+                         const std::string &TestFilename, WriterContext *WC,
+                         OverlapStats &Overlap,
+                         const OverlapFuncFilters &FuncFilter,
+                         raw_fd_ostream &OS, bool IsCS) {
+  auto ReaderOrErr = InstrProfReader::create(TestFilename);
+  if (Error E = ReaderOrErr.takeError()) {
+    // Skip the empty profiles by returning sliently.
+    instrprof_error IPE = InstrProfError::take(std::move(E));
+    if (IPE != instrprof_error::empty_raw_profile)
+      WC->Err = make_error<InstrProfError>(IPE);
+    return;
+  }
+
+  auto Reader = std::move(ReaderOrErr.get());
+  for (auto &I : *Reader) {
+    OverlapStats FuncOverlap(OverlapStats::FunctionLevel);
+    FuncOverlap.setFuncInfo(I.Name, I.Hash);
+
+    WC->Writer.overlapRecord(std::move(I), Overlap, FuncOverlap, FuncFilter);
+    FuncOverlap.dump(OS);
+  }
+}
+
 /// Load an input into a writer context.
 static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
                       WriterContext *WC) {
@@ -226,7 +251,8 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
 
   auto Reader = std::move(ReaderOrErr.get());
   bool IsIRProfile = Reader->isIRLevelProfile();
-  if (WC->Writer.setIsIRLevelProfile(IsIRProfile)) {
+  bool HasCSIRProfile = Reader->hasCSIRLevelProfile();
+  if (WC->Writer.setIsIRLevelProfile(IsIRProfile, HasCSIRProfile)) {
     WC->Err = make_error<StringError>(
         "Merge IR generated profile with Clang generated profile.",
         std::error_code());
@@ -291,11 +317,6 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
       OutputFormat != PF_Text)
     exitWithError("Unknown format is specified.");
 
-  std::error_code EC;
-  raw_fd_ostream Output(OutputFilename.data(), EC, sys::fs::F_None);
-  if (EC)
-    exitWithErrorCode(EC, OutputFilename);
-
   std::mutex ErrorLock;
   SmallSet<instrprof_error, 4> WriterErrorCodes;
 
@@ -358,6 +379,11 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
            WC->ErrWhence);
   }
 
+  std::error_code EC;
+  raw_fd_ostream Output(OutputFilename.data(), EC, sys::fs::F_None);
+  if (EC)
+    exitWithErrorCode(EC, OutputFilename);
+
   InstrProfWriter &Writer = Contexts[0]->Writer;
   if (OutputFormat == PF_Text) {
     if (Error E = Writer.writeText(Output))
@@ -407,12 +433,6 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs,
                                StringRef OutputFilename,
                                ProfileFormat OutputFormat) {
   using namespace sampleprof;
-  auto WriterOrErr =
-      SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
-  if (std::error_code EC = WriterOrErr.getError())
-    exitWithErrorCode(EC, OutputFilename);
-
-  auto Writer = std::move(WriterOrErr.get());
   StringMap<FunctionSamples> ProfileMap;
   SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
   LLVMContext Context;
@@ -447,6 +467,12 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs,
       }
     }
   }
+  auto WriterOrErr =
+      SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
+  if (std::error_code EC = WriterOrErr.getError())
+    exitWithErrorCode(EC, OutputFilename);
+
+  auto Writer = std::move(WriterOrErr.get());
   Writer->write(ProfileMap);
 }
 
@@ -608,6 +634,65 @@ static int merge_main(int argc, const char *argv[]) {
   return 0;
 }
 
+/// Computer the overlap b/w profile BaseFilename and profile TestFilename.
+static void overlapInstrProfile(const std::string &BaseFilename,
+                                const std::string &TestFilename,
+                                const OverlapFuncFilters &FuncFilter,
+                                raw_fd_ostream &OS, bool IsCS) {
+  std::mutex ErrorLock;
+  SmallSet<instrprof_error, 4> WriterErrorCodes;
+  WriterContext Context(false, ErrorLock, WriterErrorCodes);
+  WeightedFile WeightedInput{BaseFilename, 1};
+  OverlapStats Overlap;
+  Error E = Overlap.accumuateCounts(BaseFilename, TestFilename, IsCS);
+  if (E)
+    exitWithError(std::move(E), "Error in getting profile count sums");
+  if (Overlap.Base.CountSum < 1.0f) {
+    OS << "Sum of edge counts for profile " << BaseFilename << " is 0.\n";
+    exit(0);
+  }
+  if (Overlap.Test.CountSum < 1.0f) {
+    OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n";
+    exit(0);
+  }
+  loadInput(WeightedInput, nullptr, &Context);
+  overlapInput(BaseFilename, TestFilename, &Context, Overlap, FuncFilter, OS,
+               IsCS);
+  Overlap.dump(OS);
+}
+
+static int overlap_main(int argc, const char *argv[]) {
+  cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
+                                    cl::desc("<base profile file>"));
+  cl::opt<std::string> TestFilename(cl::Positional, cl::Required,
+                                    cl::desc("<test profile file>"));
+  cl::opt<std::string> Output("output", cl::value_desc("output"), cl::init("-"),
+                              cl::desc("Output file"));
+  cl::alias OutputA("o", cl::desc("Alias for --output"), cl::aliasopt(Output));
+  cl::opt<bool> IsCS("cs", cl::init(false),
+                     cl::desc("For context sensitive counts"));
+  cl::opt<unsigned long long> ValueCutoff(
+      "value-cutoff", cl::init(-1),
+      cl::desc(
+          "Function level overlap information for every function in test "
+          "profile with max count value greater then the parameter value"));
+  cl::opt<std::string> FuncNameFilter(
+      "function",
+      cl::desc("Function level overlap information for matching functions"));
+  cl::ParseCommandLineOptions(argc, argv, "LLVM profile data overlap tool\n");
+
+  std::error_code EC;
+  raw_fd_ostream OS(Output.data(), EC, sys::fs::F_Text);
+  if (EC)
+    exitWithErrorCode(EC, Output);
+
+  overlapInstrProfile(BaseFilename, TestFilename,
+                      OverlapFuncFilters{ValueCutoff, FuncNameFilter}, OS,
+                      IsCS);
+
+  return 0;
+}
+
 typedef struct ValueSitesStats {
   ValueSitesStats()
       : TotalNumValueSites(0), TotalNumValueSitesWithValueProfile(0),
@@ -643,7 +728,7 @@ static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK,
     for (uint32_t V = 0; V < NV; V++) {
       OS << "\t[ " << format("%2u", I) << ", ";
       if (Symtab == nullptr)
-        OS << format("%4u", VD[V].Value);
+        OS << format("%4" PRIu64, VD[V].Value);
       else
         OS << Symtab->getFuncName(VD[V].Value);
       OS << ", " << format("%10" PRId64, VD[V].Count) << " ] ("
@@ -670,9 +755,10 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
                             uint32_t TopN, bool ShowIndirectCallTargets,
                             bool ShowMemOPSizes, bool ShowDetailedSummary,
                             std::vector<uint32_t> DetailedSummaryCutoffs,
-                            bool ShowAllFunctions, uint64_t ValueCutoff,
-                            bool OnlyListBelow, const std::string &ShowFunction,
-                            bool TextFormat, raw_fd_ostream &OS) {
+                            bool ShowAllFunctions, bool ShowCS,
+                            uint64_t ValueCutoff, bool OnlyListBelow,
+                            const std::string &ShowFunction, bool TextFormat,
+                            raw_fd_ostream &OS) {
   auto ReaderOrErr = InstrProfReader::create(Filename);
   std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
   if (ShowDetailedSummary && Cutoffs.empty()) {
@@ -709,6 +795,11 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
     OS << ":ir\n";
 
   for (const auto &Func : *Reader) {
+    if (Reader->isIRLevelProfile()) {
+      bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash);
+      if (FuncIsCS != ShowCS)
+        continue;
+    }
     bool Show =
         ShowAllFunctions || (!ShowFunction.empty() &&
                              Func.Name.find(ShowFunction) != Func.Name.npos);
@@ -900,6 +991,8 @@ static int show_main(int argc, const char *argv[]) {
       cl::value_desc("800000,901000,999999"));
   cl::opt<bool> ShowAllFunctions("all-functions", cl::init(false),
                                  cl::desc("Details for every function"));
+  cl::opt<bool> ShowCS("showcs", cl::init(false),
+                       cl::desc("Show context sensitive counts"));
   cl::opt<std::string> ShowFunction("function",
                                     cl::desc("Details for matching functions"));
 
@@ -927,6 +1020,12 @@ static int show_main(int argc, const char *argv[]) {
   if (OutputFilename.empty())
     OutputFilename = "-";
 
+  if (!Filename.compare(OutputFilename)) {
+    errs() << sys::path::filename(argv[0])
+           << ": Input file name cannot be the same as the output file name!\n";
+    return 1;
+  }
+
   std::error_code EC;
   raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::F_Text);
   if (EC)
@@ -935,14 +1034,12 @@ static int show_main(int argc, const char *argv[]) {
   if (ShowAllFunctions && !ShowFunction.empty())
     WithColor::warning() << "-function argument ignored: showing all functions\n";
 
-  std::vector<uint32_t> Cutoffs(DetailedSummaryCutoffs.begin(),
-                                DetailedSummaryCutoffs.end());
   if (ProfileKind == instr)
     return showInstrProfile(Filename, ShowCounts, TopNFunctions,
                             ShowIndirectCallTargets, ShowMemOPSizes,
                             ShowDetailedSummary, DetailedSummaryCutoffs,
-                            ShowAllFunctions, ValueCutoff, OnlyListBelow,
-                            ShowFunction, TextFormat, OS);
+                            ShowAllFunctions, ShowCS, ValueCutoff,
+                            OnlyListBelow, ShowFunction, TextFormat, OS);
   else
     return showSampleProfile(Filename, ShowCounts, ShowAllFunctions,
                              ShowFunction, OS);
@@ -959,6 +1056,8 @@ int main(int argc, const char *argv[]) {
       func = merge_main;
     else if (strcmp(argv[1], "show") == 0)
       func = show_main;
+    else if (strcmp(argv[1], "overlap") == 0)
+      func = overlap_main;
 
     if (func) {
       std::string Invocation(ProgName.str() + " " + argv[1]);
@@ -973,7 +1072,7 @@ int main(int argc, const char *argv[]) {
              << "USAGE: " << ProgName << " <command> [args...]\n"
              << "USAGE: " << ProgName << " <command> -help\n\n"
              << "See each individual command --help for more details.\n"
-             << "Available commands: merge, show\n";
+             << "Available commands: merge, show, overlap\n";
       return 0;
     }
   }
@@ -983,6 +1082,6 @@ int main(int argc, const char *argv[]) {
   else
     errs() << ProgName << ": Unknown command!\n";
 
-  errs() << "USAGE: " << ProgName << " <merge|show> [args...]\n";
+  errs() << "USAGE: " << ProgName << " <merge|show|overlap> [args...]\n";
   return 1;
 }
diff --git a/tools/llvm-readobj/ARMEHABIPrinter.h b/tools/llvm-readobj/ARMEHABIPrinter.h
index 51128f113c4c..11f9d6166a59 100644
--- a/tools/llvm-readobj/ARMEHABIPrinter.h
+++ b/tools/llvm-readobj/ARMEHABIPrinter.h
@@ -1,9 +1,8 @@
 //===--- ARMEHABIPrinter.h - ARM EHABI Unwind Information Printer ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -366,6 +365,8 @@ template <typename ET>
 ErrorOr<StringRef>
 PrinterContext<ET>::FunctionAtAddress(unsigned Section,
                                       uint64_t Address) const {
+  if (!Symtab)
+    return readobj_error::unknown_symbol;
   auto StrTableOrErr = ELF->getStringTableForSymtab(*Symtab);
   if (!StrTableOrErr)
     error(StrTableOrErr.takeError());
@@ -551,13 +552,15 @@ void PrinterContext<ET>::PrintIndexTable(unsigned SectionIndex,
       const Elf_Shdr *EHT =
         FindExceptionTable(SectionIndex, Entry * IndexTableEntrySize + 4);
 
-      if (auto Name = ELF->getSectionName(EHT))
-        SW.printString("ExceptionHandlingTable", *Name);
+      if (EHT)
+        if (auto Name = ELF->getSectionName(EHT))
+          SW.printString("ExceptionHandlingTable", *Name);
 
       uint64_t TableEntryOffset = PREL31(Word1, IT->sh_addr);
       SW.printHex("TableEntryOffset", TableEntryOffset);
 
-      PrintExceptionTable(IT, EHT, TableEntryOffset);
+      if (EHT)
+        PrintExceptionTable(IT, EHT, TableEntryOffset);
     }
   }
 }
diff --git a/tools/llvm-readobj/ARMWinEHPrinter.cpp b/tools/llvm-readobj/ARMWinEHPrinter.cpp
index 4b823b816c35..4de14e2e78d5 100644
--- a/tools/llvm-readobj/ARMWinEHPrinter.cpp
+++ b/tools/llvm-readobj/ARMWinEHPrinter.cpp
@@ -1,9 +1,8 @@
 //===-- ARMWinEHPrinter.cpp - Windows on ARM EH Data Printer ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -1095,17 +1094,17 @@ void Decoder::dumpProcedureData(const COFFObjectFile &COFF,
       break;
 }
 
-std::error_code Decoder::dumpProcedureData(const COFFObjectFile &COFF) {
+Error Decoder::dumpProcedureData(const COFFObjectFile &COFF) {
   for (const auto &Section : COFF.sections()) {
-    StringRef SectionName;
-    if (std::error_code EC =
-            COFF.getSectionName(COFF.getCOFFSection(Section), SectionName))
-      return EC;
+    Expected<StringRef> NameOrErr =
+        COFF.getSectionName(COFF.getCOFFSection(Section));
+    if (!NameOrErr)
+      return NameOrErr.takeError();
 
-    if (SectionName.startswith(".pdata"))
+    if (NameOrErr->startswith(".pdata"))
       dumpProcedureData(COFF, Section);
   }
-  return std::error_code();
+  return Error::success();
 }
 }
 }
diff --git a/tools/llvm-readobj/ARMWinEHPrinter.h b/tools/llvm-readobj/ARMWinEHPrinter.h
index e271a1e6fe77..5de7062cb1d7 100644
--- a/tools/llvm-readobj/ARMWinEHPrinter.h
+++ b/tools/llvm-readobj/ARMWinEHPrinter.h
@@ -1,9 +1,8 @@
 //===--- ARMWinEHPrinter.h - Windows on ARM Unwind Information Printer ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License.  See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -157,7 +156,7 @@ public:
   Decoder(ScopedPrinter &SW, bool isAArch64) : SW(SW),
                                                OS(SW.getOStream()),
                                                isAArch64(isAArch64) {}
-  std::error_code dumpProcedureData(const object::COFFObjectFile &COFF);
+  Error dumpProcedureData(const object::COFFObjectFile &COFF);
 };
 }
 }
diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp
index 3e2626dad118..4c2e39dfa3cc 100644
--- a/tools/llvm-readobj/COFFDumper.cpp
+++ b/tools/llvm-readobj/COFFDumper.cpp
@@ -1,9 +1,8 @@
 //===-- COFFDumper.cpp - COFF-specific dumper -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -44,13 +43,14 @@
 #include "llvm/DebugInfo/CodeView/TypeTableCollection.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/WindowsResource.h"
 #include "llvm/Support/BinaryStreamReader.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/LEB128.h"
+#include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/Win64EH.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -81,8 +81,6 @@ public:
   void printFileHeaders() override;
   void printSectionHeaders() override;
   void printRelocations() override;
-  void printSymbols() override;
-  void printDynamicSymbols() override;
   void printUnwindInfo() override;
 
   void printNeededLibraries() override;
@@ -95,12 +93,16 @@ public:
   void printCOFFResources() override;
   void printCOFFLoadConfig() override;
   void printCodeViewDebugInfo() override;
-  void
-  mergeCodeViewTypes(llvm::codeview::MergingTypeTableBuilder &CVIDs,
-                     llvm::codeview::MergingTypeTableBuilder &CVTypes) override;
+  void mergeCodeViewTypes(llvm::codeview::MergingTypeTableBuilder &CVIDs,
+                          llvm::codeview::MergingTypeTableBuilder &CVTypes,
+                          llvm::codeview::GlobalTypeTableBuilder &GlobalCVIDs,
+                          llvm::codeview::GlobalTypeTableBuilder &GlobalCVTypes,
+                          bool GHash) override;
   void printStackMap() const override;
   void printAddrsig() override;
 private:
+  void printSymbols() override;
+  void printDynamicSymbols() override;
   void printSymbol(const SymbolRef &Sym);
   void printRelocation(const SectionRef &Section, const RelocationRef &Reloc,
                        uint64_t Bias = 0);
@@ -568,29 +570,6 @@ static const EnumEntry<uint8_t> FileChecksumKindNames[] = {
   LLVM_READOBJ_ENUM_CLASS_ENT(FileChecksumKind, SHA256),
 };
 
-static const EnumEntry<COFF::ResourceTypeID> ResourceTypeNames[]{
-    {"kRT_CURSOR (ID 1)", COFF::RID_Cursor},
-    {"kRT_BITMAP (ID 2)", COFF::RID_Bitmap},
-    {"kRT_ICON (ID 3)", COFF::RID_Icon},
-    {"kRT_MENU (ID 4)", COFF::RID_Menu},
-    {"kRT_DIALOG (ID 5)", COFF::RID_Dialog},
-    {"kRT_STRING (ID 6)", COFF::RID_String},
-    {"kRT_FONTDIR (ID 7)", COFF::RID_FontDir},
-    {"kRT_FONT (ID 8)", COFF::RID_Font},
-    {"kRT_ACCELERATOR (ID 9)", COFF::RID_Accelerator},
-    {"kRT_RCDATA (ID 10)", COFF::RID_RCData},
-    {"kRT_MESSAGETABLE (ID 11)", COFF::RID_MessageTable},
-    {"kRT_GROUP_CURSOR (ID 12)", COFF::RID_Group_Cursor},
-    {"kRT_GROUP_ICON (ID 14)", COFF::RID_Group_Icon},
-    {"kRT_VERSION (ID 16)", COFF::RID_Version},
-    {"kRT_DLGINCLUDE (ID 17)", COFF::RID_DLGInclude},
-    {"kRT_PLUGPLAY (ID 19)", COFF::RID_PlugPlay},
-    {"kRT_VXD (ID 20)", COFF::RID_VXD},
-    {"kRT_ANICURSOR (ID 21)", COFF::RID_AniCursor},
-    {"kRT_ANIICON (ID 22)", COFF::RID_AniIcon},
-    {"kRT_HTML (ID 23)", COFF::RID_HTML},
-    {"kRT_MANIFEST (ID 24)", COFF::RID_Manifest}};
-
 template <typename T>
 static std::error_code getSymbolAuxData(const COFFObjectFile *Obj,
                                         COFFSymbolRef Symbol,
@@ -613,11 +592,14 @@ void COFFDumper::cacheRelocations() {
       RelocMap[Section].push_back(Reloc);
 
     // Sort relocations by address.
-    llvm::sort(RelocMap[Section], relocAddressLess);
+    llvm::sort(RelocMap[Section], [](RelocationRef L, RelocationRef R) {
+      return L.getOffset() < R.getOffset();
+    });
   }
 }
 
-void COFFDumper::printDataDirectory(uint32_t Index, const std::string &FieldName) {
+void COFFDumper::printDataDirectory(uint32_t Index,
+                                    const std::string &FieldName) {
   const data_directory *Data;
   if (Obj->getDataDirectory(Index, Data))
     return;
@@ -951,8 +933,7 @@ void COFFDumper::initializeFileAndStringTables(BinaryStreamReader &Reader) {
 
 void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
                                             const SectionRef &Section) {
-  StringRef SectionContents;
-  error(Section.getContents(SectionContents));
+  StringRef SectionContents = unwrapOrError(Section.getContents());
   StringRef Data = SectionContents;
 
   SmallVector<StringRef, 10> FunctionNames;
@@ -980,6 +961,11 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
     error(consume(Data, SubSectionSize));
 
     ListScope S(W, "Subsection");
+    // Dump the subsection as normal even if the ignore bit is set.
+    if (SubType & SubsectionIgnoreFlag) {
+      W.printHex("IgnoredSubsectionKind", SubType);
+      SubType &= ~SubsectionIgnoreFlag;
+    }
     W.printEnum("SubSectionType", SubType, makeArrayRef(SubSectionTypes));
     W.printHex("SubSectionSize", SubSectionSize);
 
@@ -1228,13 +1214,15 @@ void COFFDumper::printFileNameForOffset(StringRef Label, uint32_t FileOffset) {
 }
 
 void COFFDumper::mergeCodeViewTypes(MergingTypeTableBuilder &CVIDs,
-                                    MergingTypeTableBuilder &CVTypes) {
+                                    MergingTypeTableBuilder &CVTypes,
+                                    GlobalTypeTableBuilder &GlobalCVIDs,
+                                    GlobalTypeTableBuilder &GlobalCVTypes,
+                                    bool GHash) {
   for (const SectionRef &S : Obj->sections()) {
     StringRef SectionName;
     error(S.getName(SectionName));
     if (SectionName == ".debug$T") {
-      StringRef Data;
-      error(S.getContents(Data));
+      StringRef Data = unwrapOrError(S.getContents());
       uint32_t Magic;
       error(consume(Data, Magic));
       if (Magic != 4)
@@ -1249,9 +1237,18 @@ void COFFDumper::mergeCodeViewTypes(MergingTypeTableBuilder &CVIDs,
       }
       SmallVector<TypeIndex, 128> SourceToDest;
       Optional<uint32_t> PCHSignature;
-      if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types,
-                                          PCHSignature))
-        return error(std::move(EC));
+      if (GHash) {
+        std::vector<GloballyHashedType> Hashes =
+            GloballyHashedType::hashTypes(Types);
+        if (auto EC =
+                mergeTypeAndIdRecords(GlobalCVIDs, GlobalCVTypes, SourceToDest,
+                                      Types, Hashes, PCHSignature))
+          return error(std::move(EC));
+      } else {
+        if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types,
+                                            PCHSignature))
+          return error(std::move(EC));
+      }
     }
   }
 }
@@ -1261,8 +1258,7 @@ void COFFDumper::printCodeViewTypeSection(StringRef SectionName,
   ListScope D(W, "CodeViewTypes");
   W.printNumber("Section", SectionName, Obj->getSectionID(Section));
 
-  StringRef Data;
-  error(Section.getContents(Data));
+  StringRef Data = unwrapOrError(Section.getContents());
   if (opts::CodeViewSubsectionBytes)
     W.printBinaryBlock("Data", Data);
 
@@ -1322,9 +1318,7 @@ void COFFDumper::printSectionHeaders() {
 
     if (opts::SectionData &&
         !(Section->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)) {
-      StringRef Data;
-      error(Sec.getContents(Data));
-
+      StringRef Data = unwrapOrError(Sec.getContents());
       W.printBinaryBlock("SectionData", Data);
     }
   }
@@ -1398,15 +1392,11 @@ void COFFDumper::printSymbols() {
 
 void COFFDumper::printDynamicSymbols() { ListScope Group(W, "DynamicSymbols"); }
 
-static ErrorOr<StringRef>
+static Expected<StringRef>
 getSectionName(const llvm::object::COFFObjectFile *Obj, int32_t SectionNumber,
                const coff_section *Section) {
-  if (Section) {
-    StringRef SectionName;
-    if (std::error_code EC = Obj->getSectionName(Section, SectionName))
-      return EC;
-    return SectionName;
-  }
+  if (Section)
+    return Obj->getSectionName(Section);
   if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG)
     return StringRef("IMAGE_SYM_DEBUG");
   if (SectionNumber == llvm::COFF::IMAGE_SYM_ABSOLUTE)
@@ -1431,11 +1421,10 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) {
   if (Obj->getSymbolName(Symbol, SymbolName))
     SymbolName = "";
 
-  StringRef SectionName = "";
-  ErrorOr<StringRef> Res =
-      getSectionName(Obj, Symbol.getSectionNumber(), Section);
-  if (Res)
-    SectionName = *Res;
+  StringRef SectionName;
+  if (Expected<StringRef> NameOrErr =
+          getSectionName(Obj, Symbol.getSectionNumber(), Section))
+    SectionName = *NameOrErr;
 
   W.printString("Name", SymbolName);
   W.printNumber("Value", Symbol.getValue());
@@ -1503,16 +1492,12 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) {
           && Aux->Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
         const coff_section *Assoc;
         StringRef AssocName = "";
-        std::error_code EC = Obj->getSection(AuxNumber, Assoc);
-        ErrorOr<StringRef> Res = getSectionName(Obj, AuxNumber, Assoc);
-        if (Res)
-          AssocName = *Res;
-        if (!EC)
-          EC = Res.getError();
-        if (EC) {
-          AssocName = "";
+        if (std::error_code EC = Obj->getSection(AuxNumber, Assoc))
           error(EC);
-        }
+        Expected<StringRef> Res = getSectionName(Obj, AuxNumber, Assoc);
+        if (!Res)
+          error(Res.takeError());
+        AssocName = *Res;
 
         W.printNumber("AssocSection", AssocName, AuxNumber);
       }
@@ -1559,7 +1544,8 @@ void COFFDumper::printUnwindInfo() {
   case COFF::IMAGE_FILE_MACHINE_ARMNT: {
     ARM::WinEH::Decoder Decoder(W, Obj->getMachine() ==
                                        COFF::IMAGE_FILE_MACHINE_ARM64);
-    Decoder.dumpProcedureData(*Obj);
+    // TODO Propagate the error.
+    consumeError(Decoder.dumpProcedureData(*Obj));
     break;
   }
   default:
@@ -1581,10 +1567,10 @@ void COFFDumper::printNeededLibraries() {
       Libs.push_back(Name);
   }
 
-  std::stable_sort(Libs.begin(), Libs.end());
+  llvm::stable_sort(Libs);
 
   for (const auto &L : Libs) {
-    outs() << "  " << L << "\n";
+    W.startLine() << L << "\n";
   }
 }
 
@@ -1674,15 +1660,13 @@ void COFFDumper::printCOFFExports() {
 
 void COFFDumper::printCOFFDirectives() {
   for (const SectionRef &Section : Obj->sections()) {
-    StringRef Contents;
     StringRef Name;
 
     error(Section.getName(Name));
     if (Name != ".drectve")
       continue;
 
-    error(Section.getContents(Contents));
-
+    StringRef Contents = unwrapOrError(Section.getContents());
     W.printString("Directive(s)", Contents);
   }
 }
@@ -1721,8 +1705,7 @@ void COFFDumper::printCOFFResources() {
     if (!Name.startswith(".rsrc"))
       continue;
 
-    StringRef Ref;
-    error(S.getContents(Ref));
+    StringRef Ref = unwrapOrError(S.getContents());
 
     if ((Name == ".rsrc") || (Name == ".rsrc$01")) {
       ResourceSectionRef RSF(Ref);
@@ -1777,7 +1760,8 @@ void COFFDumper::printResourceDirectoryTable(
     SmallString<20> IDStr;
     raw_svector_ostream OS(IDStr);
     if (i < Table.NumberOfNameEntries) {
-      ArrayRef<UTF16> RawEntryNameString = unwrapOrError(RSF.getEntryNameString(Entry));
+      ArrayRef<UTF16> RawEntryNameString =
+          unwrapOrError(RSF.getEntryNameString(Entry));
       std::vector<UTF16> EndianCorrectedNameString;
       if (llvm::sys::IsBigEndianHost) {
         EndianCorrectedNameString.resize(RawEntryNameString.size() + 1);
@@ -1793,9 +1777,8 @@ void COFFDumper::printResourceDirectoryTable(
       OS << EntryNameString;
     } else {
       if (Level == "Type") {
-        ScopedPrinter Printer(OS);
-        Printer.printEnum("", Entry.Identifier.ID,
-                          makeArrayRef(ResourceTypeNames));
+        OS << ": ";
+        printResourceTypeName(Entry.Identifier.ID, OS);
         IDStr = IDStr.slice(0, IDStr.find_first_of(")", 0) + 1);
       } else {
         OS << ": (ID " << Entry.Identifier.ID << ")";
@@ -1848,18 +1831,16 @@ void COFFDumper::printStackMap() const {
   if (StackMapSection == object::SectionRef())
     return;
 
-  StringRef StackMapContents;
-  StackMapSection.getContents(StackMapContents);
-  ArrayRef<uint8_t> StackMapContentsArray(
-      reinterpret_cast<const uint8_t*>(StackMapContents.data()),
-      StackMapContents.size());
+  StringRef StackMapContents = unwrapOrError(StackMapSection.getContents());
+  ArrayRef<uint8_t> StackMapContentsArray =
+      arrayRefFromStringRef(StackMapContents);
 
   if (Obj->isLittleEndian())
     prettyPrintStackMap(
-        W, StackMapV2Parser<support::little>(StackMapContentsArray));
+        W, StackMapParser<support::little>(StackMapContentsArray));
   else
-    prettyPrintStackMap(W,
-                        StackMapV2Parser<support::big>(StackMapContentsArray));
+    prettyPrintStackMap(
+        W, StackMapParser<support::big>(StackMapContentsArray));
 }
 
 void COFFDumper::printAddrsig() {
@@ -1876,15 +1857,13 @@ void COFFDumper::printAddrsig() {
   if (AddrsigSection == object::SectionRef())
     return;
 
-  StringRef AddrsigContents;
-  AddrsigSection.getContents(AddrsigContents);
-  ArrayRef<uint8_t> AddrsigContentsArray(
-      reinterpret_cast<const uint8_t*>(AddrsigContents.data()),
-      AddrsigContents.size());
+  StringRef AddrsigContents = unwrapOrError(AddrsigSection.getContents());
+  ArrayRef<uint8_t> AddrsigContentsArray(AddrsigContents.bytes_begin(),
+                                         AddrsigContents.size());
 
   ListScope L(W, "Addrsig");
-  auto *Cur = reinterpret_cast<const uint8_t *>(AddrsigContents.begin());
-  auto *End = reinterpret_cast<const uint8_t *>(AddrsigContents.end());
+  const uint8_t *Cur = AddrsigContents.bytes_begin();
+  const uint8_t *End = AddrsigContents.bytes_end();
   while (Cur != End) {
     unsigned Size;
     const char *Err;
@@ -1905,16 +1884,10 @@ void COFFDumper::printAddrsig() {
   }
 }
 
-void llvm::dumpCodeViewMergedTypes(
-    ScopedPrinter &Writer, llvm::codeview::MergingTypeTableBuilder &IDTable,
-    llvm::codeview::MergingTypeTableBuilder &CVTypes) {
-  // Flatten it first, then run our dumper on it.
-  SmallString<0> TypeBuf;
-  CVTypes.ForEachRecord([&](TypeIndex TI, const CVType &Record) {
-    TypeBuf.append(Record.RecordData.begin(), Record.RecordData.end());
-  });
-
-  TypeTableCollection TpiTypes(CVTypes.records());
+void llvm::dumpCodeViewMergedTypes(ScopedPrinter &Writer,
+                                   ArrayRef<ArrayRef<uint8_t>> IpiRecords,
+                                   ArrayRef<ArrayRef<uint8_t>> TpiRecords) {
+  TypeTableCollection TpiTypes(TpiRecords);
   {
     ListScope S(Writer, "MergedTypeStream");
     TypeDumpVisitor TDV(TpiTypes, &Writer, opts::CodeViewSubsectionBytes);
@@ -1924,7 +1897,7 @@ void llvm::dumpCodeViewMergedTypes(
 
   // Flatten the id stream and print it next. The ID stream refers to names from
   // the type stream.
-  TypeTableCollection IpiTypes(IDTable.records());
+  TypeTableCollection IpiTypes(IpiRecords);
   {
     ListScope S(Writer, "MergedIDStream");
     TypeDumpVisitor TDV(TpiTypes, &Writer, opts::CodeViewSubsectionBytes);
diff --git a/tools/llvm-readobj/COFFImportDumper.cpp b/tools/llvm-readobj/COFFImportDumper.cpp
index 18010c34f0f3..c9d5e82263db 100644
--- a/tools/llvm-readobj/COFFImportDumper.cpp
+++ b/tools/llvm-readobj/COFFImportDumper.cpp
@@ -1,9 +1,8 @@
 //===-- COFFImportDumper.cpp - COFF import library dumper -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -51,7 +50,7 @@ void dumpCOFFImportFile(const COFFImportFile *File, ScopedPrinter &Writer) {
   for (const object::BasicSymbolRef &Sym : File->symbols()) {
     raw_ostream &OS = Writer.startLine();
     OS << "Symbol: ";
-    Sym.printName(OS);
+    cantFail(Sym.printName(OS));
     OS << "\n";
   }
 }
diff --git a/tools/llvm-readobj/DwarfCFIEHPrinter.h b/tools/llvm-readobj/DwarfCFIEHPrinter.h
index d91d764c4d0a..7055510ef2f2 100644
--- a/tools/llvm-readobj/DwarfCFIEHPrinter.h
+++ b/tools/llvm-readobj/DwarfCFIEHPrinter.h
@@ -1,9 +1,8 @@
 //===--- DwarfCFIEHPrinter.h - DWARF-based Unwind Information Printer -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp
index 93254717e921..4e1cb7d544e7 100644
--- a/tools/llvm-readobj/ELFDumper.cpp
+++ b/tools/llvm-readobj/ELFDumper.cpp
@@ -1,9 +1,8 @@
 //===- ELFDumper.cpp - ELF-specific dumper --------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -20,6 +19,7 @@
 #include "llvm-readobj.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/STLExtras.h"
@@ -30,6 +30,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
 #include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Demangle/Demangle.h"
 #include "llvm/Object/ELF.h"
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Object/ELFTypes.h"
@@ -66,13 +67,14 @@ using namespace llvm;
 using namespace llvm::object;
 using namespace ELF;
 
-#define LLVM_READOBJ_ENUM_CASE(ns, enum) \
-  case ns::enum: return #enum;
+#define LLVM_READOBJ_ENUM_CASE(ns, enum)                                       \
+  case ns::enum:                                                               \
+    return #enum;
 
-#define ENUM_ENT(enum, altName) \
+#define ENUM_ENT(enum, altName)                                                \
   { #enum, altName, ELF::enum }
 
-#define ENUM_ENT_1(enum) \
+#define ENUM_ENT_1(enum)                                                       \
   { #enum, #enum, ELF::enum }
 
 #define LLVM_READOBJ_PHDR_ENUM(ns, enum)                                       \
@@ -132,14 +134,17 @@ struct DynRegionInfo {
     const Type *Start = reinterpret_cast<const Type *>(Addr);
     if (!Start)
       return {Start, Start};
-    if (EntSize != sizeof(Type) || Size % EntSize)
-      reportError("Invalid entity size");
+    if (EntSize != sizeof(Type) || Size % EntSize) {
+      // TODO: Add a section index to this warning.
+      reportWarning("invalid section size (" + Twine(Size) +
+                    ") or entity size (" + Twine(EntSize) + ")");
+      return {Start, Start};
+    }
     return {Start, Start + (Size / EntSize)};
   }
 };
 
-template<typename ELFT>
-class ELFDumper : public ObjDumper {
+template <typename ELFT> class ELFDumper : public ObjDumper {
 public:
   ELFDumper(const object::ELFObjectFile<ELFT> *ObjF, ScopedPrinter &Writer);
 
@@ -147,13 +152,14 @@ public:
   void printSectionHeaders() override;
   void printRelocations() override;
   void printDynamicRelocations() override;
-  void printSymbols() override;
-  void printDynamicSymbols() override;
+  void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) override;
+  void printHashSymbols() override;
   void printUnwindInfo() override;
 
   void printDynamicTable() override;
   void printNeededLibraries() override;
-  void printProgramHeaders() override;
+  void printProgramHeaders(bool PrintProgramHeaders,
+                           cl::boolOrDefault PrintSectionMapping) override;
   void printHashTable() override;
   void printGnuHashTable() override;
   void printLoadName() override;
@@ -177,6 +183,8 @@ public:
 
   void printELFLinkerOptions() override;
 
+  const object::ELFObjectFile<ELFT> *getElfObject() const { return ObjF; };
+
 private:
   std::unique_ptr<DumpStyle<ELFT>> ELFDumperStyle;
 
@@ -185,24 +193,25 @@ private:
   DynRegionInfo checkDRI(DynRegionInfo DRI) {
     const ELFFile<ELFT> *Obj = ObjF->getELFFile();
     if (DRI.Addr < Obj->base() ||
-        (const uint8_t *)DRI.Addr + DRI.Size > Obj->base() + Obj->getBufSize())
+        reinterpret_cast<const uint8_t *>(DRI.Addr) + DRI.Size >
+            Obj->base() + Obj->getBufSize())
       error(llvm::object::object_error::parse_failed);
     return DRI;
   }
 
   DynRegionInfo createDRIFrom(const Elf_Phdr *P, uintX_t EntSize) {
-    return checkDRI({ObjF->getELFFile()->base() + P->p_offset, P->p_filesz, EntSize});
+    return checkDRI(
+        {ObjF->getELFFile()->base() + P->p_offset, P->p_filesz, EntSize});
   }
 
   DynRegionInfo createDRIFrom(const Elf_Shdr *S) {
-    return checkDRI({ObjF->getELFFile()->base() + S->sh_offset, S->sh_size, S->sh_entsize});
+    return checkDRI(
+        {ObjF->getELFFile()->base() + S->sh_offset, S->sh_size, S->sh_entsize});
   }
 
-  void parseDynamicTable(ArrayRef<const Elf_Phdr *> LoadSegments);
-
-  void printValue(uint64_t Type, uint64_t Value);
+  void loadDynamicTable(const ELFFile<ELFT> *Obj);
+  void parseDynamicTable();
 
-  StringRef getDynamicString(uint64_t Offset) const;
   StringRef getSymbolVersion(StringRef StrTab, const Elf_Sym *symb,
                              bool &IsDefault) const;
   void LoadVersionMap() const;
@@ -217,7 +226,7 @@ private:
   DynRegionInfo DynSymRegion;
   DynRegionInfo DynamicTable;
   StringRef DynamicStringTable;
-  StringRef SOName;
+  StringRef SOName = "<Not found>";
   const Elf_Hash *HashTable = nullptr;
   const Elf_GnuHash *GnuHashTable = nullptr;
   const Elf_Shdr *DotSymtabSec = nullptr;
@@ -226,9 +235,9 @@ private:
   StringRef DynSymtabName;
   ArrayRef<Elf_Word> ShndxTable;
 
-  const Elf_Shdr *dot_gnu_version_sec = nullptr;   // .gnu.version
-  const Elf_Shdr *dot_gnu_version_r_sec = nullptr; // .gnu.version_r
-  const Elf_Shdr *dot_gnu_version_d_sec = nullptr; // .gnu.version_d
+  const Elf_Shdr *SymbolVersionSection = nullptr;   // .gnu.version
+  const Elf_Shdr *SymbolVersionNeedSection = nullptr; // .gnu.version_r
+  const Elf_Shdr *SymbolVersionDefSection = nullptr; // .gnu.version_d
 
   // Records for each version index the corresponding Verdef or Vernaux entry.
   // This is filled the first time LoadVersionMap() is called.
@@ -256,7 +265,18 @@ private:
 
 public:
   Elf_Dyn_Range dynamic_table() const {
-    return DynamicTable.getAsArrayRef<Elf_Dyn>();
+    // A valid .dynamic section contains an array of entries terminated
+    // with a DT_NULL entry. However, sometimes the section content may
+    // continue past the DT_NULL entry, so to dump the section correctly,
+    // we first find the end of the entries by iterating over them.
+    Elf_Dyn_Range Table = DynamicTable.getAsArrayRef<Elf_Dyn>();
+
+    size_t Size = 0;
+    while (Size < Table.size())
+      if (Table[Size++].getTag() == DT_NULL)
+        break;
+
+    return Table.slice(0, Size);
   }
 
   Elf_Sym_Range dynamic_symbols() const {
@@ -271,9 +291,14 @@ public:
   void getSectionNameIndex(const Elf_Sym *Symbol, const Elf_Sym *FirstSym,
                            StringRef &SectionName,
                            unsigned &SectionIndex) const;
-  StringRef getStaticSymbolName(uint32_t Index) const;
+  std::string getStaticSymbolName(uint32_t Index) const;
+  StringRef getSymbolVersionByIndex(StringRef StrTab,
+                                    uint32_t VersionSymbolIndex,
+                                    bool &IsDefault) const;
 
   void printSymbolsHelper(bool IsDynamic) const;
+  void printDynamicEntry(raw_ostream &OS, uint64_t Type, uint64_t Value) const;
+
   const Elf_Shdr *getDotSymtabSec() const { return DotSymtabSec; }
   const Elf_Shdr *getDotCGProfileSec() const { return DotCGProfileSec; }
   const Elf_Shdr *getDotAddrsigSec() const { return DotAddrsigSec; }
@@ -283,6 +308,7 @@ public:
   const DynRegionInfo &getDynRelaRegion() const { return DynRelaRegion; }
   const DynRegionInfo &getDynRelrRegion() const { return DynRelrRegion; }
   const DynRegionInfo &getDynPLTRelRegion() const { return DynPLTRelRegion; }
+  const DynRegionInfo &getDynamicTableRegion() const { return DynamicTable; }
   const Elf_Hash *getHashTable() const { return HashTable; }
   const Elf_GnuHash *getGnuHashTable() const { return GnuHashTable; }
 };
@@ -328,15 +354,25 @@ public:
   virtual void printGroupSections(const ELFFile<ELFT> *Obj) = 0;
   virtual void printRelocations(const ELFFile<ELFT> *Obj) = 0;
   virtual void printSectionHeaders(const ELFFile<ELFT> *Obj) = 0;
-  virtual void printSymbols(const ELFFile<ELFT> *Obj) = 0;
-  virtual void printDynamicSymbols(const ELFFile<ELFT> *Obj) = 0;
+  virtual void printSymbols(const ELFFile<ELFT> *Obj, bool PrintSymbols,
+                            bool PrintDynamicSymbols) = 0;
+  virtual void printHashSymbols(const ELFFile<ELFT> *Obj) {}
+  virtual void printDynamic(const ELFFile<ELFT> *Obj) {}
   virtual void printDynamicRelocations(const ELFFile<ELFT> *Obj) = 0;
-  virtual void printSymtabMessage(const ELFFile<ELFT> *obj, StringRef Name,
+  virtual void printSymtabMessage(const ELFFile<ELFT> *Obj, StringRef Name,
                                   size_t Offset) {}
   virtual void printSymbol(const ELFFile<ELFT> *Obj, const Elf_Sym *Symbol,
                            const Elf_Sym *FirstSym, StringRef StrTable,
                            bool IsDynamic) = 0;
-  virtual void printProgramHeaders(const ELFFile<ELFT> *Obj) = 0;
+  virtual void printProgramHeaders(const ELFFile<ELFT> *Obj,
+                                   bool PrintProgramHeaders,
+                                   cl::boolOrDefault PrintSectionMapping) = 0;
+  virtual void printVersionSymbolSection(const ELFFile<ELFT> *Obj,
+                                         const Elf_Shdr *Sec) = 0;
+  virtual void printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
+                                             const Elf_Shdr *Sec) = 0;
+  virtual void printVersionDependencySection(const ELFFile<ELFT> *Obj,
+                                             const Elf_Shdr *Sec) = 0;
   virtual void printHashHistogram(const ELFFile<ELFT> *Obj) = 0;
   virtual void printCGProfile(const ELFFile<ELFT> *Obj) = 0;
   virtual void printAddrsig(const ELFFile<ELFT> *Obj) = 0;
@@ -351,24 +387,36 @@ private:
 };
 
 template <typename ELFT> class GNUStyle : public DumpStyle<ELFT> {
-  formatted_raw_ostream OS;
+  formatted_raw_ostream &OS;
 
 public:
   TYPEDEF_ELF_TYPES(ELFT)
 
   GNUStyle(ScopedPrinter &W, ELFDumper<ELFT> *Dumper)
-      : DumpStyle<ELFT>(Dumper), OS(W.getOStream()) {}
+      : DumpStyle<ELFT>(Dumper),
+        OS(static_cast<formatted_raw_ostream&>(W.getOStream())) {
+    assert (&W.getOStream() == &llvm::fouts());
+  }
 
   void printFileHeaders(const ELFO *Obj) override;
   void printGroupSections(const ELFFile<ELFT> *Obj) override;
   void printRelocations(const ELFO *Obj) override;
   void printSectionHeaders(const ELFO *Obj) override;
-  void printSymbols(const ELFO *Obj) override;
-  void printDynamicSymbols(const ELFO *Obj) override;
+  void printSymbols(const ELFO *Obj, bool PrintSymbols,
+                    bool PrintDynamicSymbols) override;
+  void printHashSymbols(const ELFO *Obj) override;
+  void printDynamic(const ELFFile<ELFT> *Obj) override;
   void printDynamicRelocations(const ELFO *Obj) override;
   void printSymtabMessage(const ELFO *Obj, StringRef Name,
                           size_t Offset) override;
-  void printProgramHeaders(const ELFO *Obj) override;
+  void printProgramHeaders(const ELFO *Obj, bool PrintProgramHeaders,
+                           cl::boolOrDefault PrintSectionMapping) override;
+  void printVersionSymbolSection(const ELFFile<ELFT> *Obj,
+                                 const Elf_Shdr *Sec) override;
+  void printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
+                                     const Elf_Shdr *Sec) override;
+  void printVersionDependencySection(const ELFFile<ELFT> *Obj,
+                                     const Elf_Shdr *Sec) override;
   void printHashHistogram(const ELFFile<ELFT> *Obj) override;
   void printCGProfile(const ELFFile<ELFT> *Obj) override;
   void printAddrsig(const ELFFile<ELFT> *Obj) override;
@@ -379,11 +427,11 @@ public:
 
 private:
   struct Field {
-    StringRef Str;
+    std::string Str;
     unsigned Column;
 
     Field(StringRef S, unsigned Col) : Str(S), Column(Col) {}
-    Field(unsigned Col) : Str(""), Column(Col) {}
+    Field(unsigned Col) : Column(Col) {}
   };
 
   template <typename T, typename TEnum>
@@ -433,6 +481,8 @@ private:
   void printRelocHeader(unsigned SType);
   void printRelocation(const ELFO *Obj, const Elf_Shdr *SymTab,
                        const Elf_Rela &R, bool IsRela);
+  void printRelocation(const ELFO *Obj, const Elf_Sym *Sym,
+                       StringRef SymbolName, const Elf_Rela &R, bool IsRela);
   void printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First,
                    StringRef StrTable, bool IsDynamic) override;
   std::string getSymbolSectionNdx(const ELFO *Obj, const Elf_Sym *Symbol,
@@ -442,6 +492,8 @@ private:
   bool checkoffsets(const Elf_Phdr &Phdr, const Elf_Shdr &Sec);
   bool checkVMA(const Elf_Phdr &Phdr, const Elf_Shdr &Sec);
   bool checkPTDynamic(const Elf_Phdr &Phdr, const Elf_Shdr &Sec);
+  void printProgramHeaders(const ELFO *Obj);
+  void printSectionMapping(const ELFO *Obj);
 };
 
 template <typename ELFT> class LLVMStyle : public DumpStyle<ELFT> {
@@ -456,10 +508,18 @@ public:
   void printRelocations(const ELFO *Obj) override;
   void printRelocations(const Elf_Shdr *Sec, const ELFO *Obj);
   void printSectionHeaders(const ELFO *Obj) override;
-  void printSymbols(const ELFO *Obj) override;
-  void printDynamicSymbols(const ELFO *Obj) override;
+  void printSymbols(const ELFO *Obj, bool PrintSymbols,
+                    bool PrintDynamicSymbols) override;
+  void printDynamic(const ELFFile<ELFT> *Obj) override;
   void printDynamicRelocations(const ELFO *Obj) override;
-  void printProgramHeaders(const ELFO *Obj) override;
+  void printProgramHeaders(const ELFO *Obj, bool PrintProgramHeaders,
+                           cl::boolOrDefault PrintSectionMapping) override;
+  void printVersionSymbolSection(const ELFFile<ELFT> *Obj,
+                                 const Elf_Shdr *Sec) override;
+  void printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
+                                     const Elf_Shdr *Sec) override;
+  void printVersionDependencySection(const ELFFile<ELFT> *Obj,
+                                     const Elf_Shdr *Sec) override;
   void printHashHistogram(const ELFFile<ELFT> *Obj) override;
   void printCGProfile(const ELFFile<ELFT> *Obj) override;
   void printAddrsig(const ELFFile<ELFT> *Obj) override;
@@ -471,8 +531,12 @@ public:
 private:
   void printRelocation(const ELFO *Obj, Elf_Rela Rel, const Elf_Shdr *SymTab);
   void printDynamicRelocation(const ELFO *Obj, Elf_Rela Rel);
+  void printSymbols(const ELFO *Obj);
+  void printDynamicSymbols(const ELFO *Obj);
   void printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First,
                    StringRef StrTable, bool IsDynamic) override;
+  void printProgramHeaders(const ELFO *Obj);
+  void printSectionMapping(const ELFO *Obj) {}
 
   ScopedPrinter &W;
 };
@@ -516,65 +580,71 @@ std::error_code createELFDumper(const object::ObjectFile *Obj,
 // Iterate through the versions needed section, and place each Elf_Vernaux
 // in the VersionMap according to its index.
 template <class ELFT>
-void ELFDumper<ELFT>::LoadVersionNeeds(const Elf_Shdr *sec) const {
-  unsigned vn_size = sec->sh_size;  // Size of section in bytes
-  unsigned vn_count = sec->sh_info; // Number of Verneed entries
-  const char *sec_start = (const char *)ObjF->getELFFile()->base() + sec->sh_offset;
-  const char *sec_end = sec_start + vn_size;
+void ELFDumper<ELFT>::LoadVersionNeeds(const Elf_Shdr *Sec) const {
+  unsigned VerneedSize = Sec->sh_size;    // Size of section in bytes
+  unsigned VerneedEntries = Sec->sh_info; // Number of Verneed entries
+  const uint8_t *VerneedStart = reinterpret_cast<const uint8_t *>(
+      ObjF->getELFFile()->base() + Sec->sh_offset);
+  const uint8_t *VerneedEnd = VerneedStart + VerneedSize;
   // The first Verneed entry is at the start of the section.
-  const char *p = sec_start;
-  for (unsigned i = 0; i < vn_count; i++) {
-    if (p + sizeof(Elf_Verneed) > sec_end)
+  const uint8_t *VerneedBuf = VerneedStart;
+  for (unsigned VerneedIndex = 0; VerneedIndex < VerneedEntries;
+       ++VerneedIndex) {
+    if (VerneedBuf + sizeof(Elf_Verneed) > VerneedEnd)
       report_fatal_error("Section ended unexpectedly while scanning "
                          "version needed records.");
-    const Elf_Verneed *vn = reinterpret_cast<const Elf_Verneed *>(p);
-    if (vn->vn_version != ELF::VER_NEED_CURRENT)
+    const Elf_Verneed *Verneed =
+        reinterpret_cast<const Elf_Verneed *>(VerneedBuf);
+    if (Verneed->vn_version != ELF::VER_NEED_CURRENT)
       report_fatal_error("Unexpected verneed version");
     // Iterate through the Vernaux entries
-    const char *paux = p + vn->vn_aux;
-    for (unsigned j = 0; j < vn->vn_cnt; j++) {
-      if (paux + sizeof(Elf_Vernaux) > sec_end)
+    const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux;
+    for (unsigned VernauxIndex = 0; VernauxIndex < Verneed->vn_cnt;
+         ++VernauxIndex) {
+      if (VernauxBuf + sizeof(Elf_Vernaux) > VerneedEnd)
         report_fatal_error("Section ended unexpected while scanning auxiliary "
                            "version needed records.");
-      const Elf_Vernaux *vna = reinterpret_cast<const Elf_Vernaux *>(paux);
-      size_t index = vna->vna_other & ELF::VERSYM_VERSION;
-      if (index >= VersionMap.size())
-        VersionMap.resize(index + 1);
-      VersionMap[index] = VersionMapEntry(vna);
-      paux += vna->vna_next;
+      const Elf_Vernaux *Vernaux =
+          reinterpret_cast<const Elf_Vernaux *>(VernauxBuf);
+      size_t Index = Vernaux->vna_other & ELF::VERSYM_VERSION;
+      if (Index >= VersionMap.size())
+        VersionMap.resize(Index + 1);
+      VersionMap[Index] = VersionMapEntry(Vernaux);
+      VernauxBuf += Vernaux->vna_next;
     }
-    p += vn->vn_next;
+    VerneedBuf += Verneed->vn_next;
   }
 }
 
 // Iterate through the version definitions, and place each Elf_Verdef
 // in the VersionMap according to its index.
 template <class ELFT>
-void ELFDumper<ELFT>::LoadVersionDefs(const Elf_Shdr *sec) const {
-  unsigned vd_size = sec->sh_size;  // Size of section in bytes
-  unsigned vd_count = sec->sh_info; // Number of Verdef entries
-  const char *sec_start = (const char *)ObjF->getELFFile()->base() + sec->sh_offset;
-  const char *sec_end = sec_start + vd_size;
+void ELFDumper<ELFT>::LoadVersionDefs(const Elf_Shdr *Sec) const {
+  unsigned VerdefSize = Sec->sh_size;    // Size of section in bytes
+  unsigned VerdefEntries = Sec->sh_info; // Number of Verdef entries
+  const uint8_t *VerdefStart = reinterpret_cast<const uint8_t *>(
+      ObjF->getELFFile()->base() + Sec->sh_offset);
+  const uint8_t *VerdefEnd = VerdefStart + VerdefSize;
   // The first Verdef entry is at the start of the section.
-  const char *p = sec_start;
-  for (unsigned i = 0; i < vd_count; i++) {
-    if (p + sizeof(Elf_Verdef) > sec_end)
+  const uint8_t *VerdefBuf = VerdefStart;
+  for (unsigned VerdefIndex = 0; VerdefIndex < VerdefEntries; ++VerdefIndex) {
+    if (VerdefBuf + sizeof(Elf_Verdef) > VerdefEnd)
       report_fatal_error("Section ended unexpectedly while scanning "
                          "version definitions.");
-    const Elf_Verdef *vd = reinterpret_cast<const Elf_Verdef *>(p);
-    if (vd->vd_version != ELF::VER_DEF_CURRENT)
+    const Elf_Verdef *Verdef = reinterpret_cast<const Elf_Verdef *>(VerdefBuf);
+    if (Verdef->vd_version != ELF::VER_DEF_CURRENT)
       report_fatal_error("Unexpected verdef version");
-    size_t index = vd->vd_ndx & ELF::VERSYM_VERSION;
-    if (index >= VersionMap.size())
-      VersionMap.resize(index + 1);
-    VersionMap[index] = VersionMapEntry(vd);
-    p += vd->vd_next;
+    size_t Index = Verdef->vd_ndx & ELF::VERSYM_VERSION;
+    if (Index >= VersionMap.size())
+      VersionMap.resize(Index + 1);
+    VersionMap[Index] = VersionMapEntry(Verdef);
+    VerdefBuf += Verdef->vd_next;
   }
 }
 
 template <class ELFT> void ELFDumper<ELFT>::LoadVersionMap() const {
   // If there is no dynamic symtab or version table, there is nothing to do.
-  if (!DynSymRegion.Addr || !dot_gnu_version_sec)
+  if (!DynSymRegion.Addr || !SymbolVersionSection)
     return;
 
   // Has the VersionMap already been loaded?
@@ -586,243 +656,111 @@ template <class ELFT> void ELFDumper<ELFT>::LoadVersionMap() const {
   VersionMap.push_back(VersionMapEntry());
   VersionMap.push_back(VersionMapEntry());
 
-  if (dot_gnu_version_d_sec)
-    LoadVersionDefs(dot_gnu_version_d_sec);
-
-  if (dot_gnu_version_r_sec)
-    LoadVersionNeeds(dot_gnu_version_r_sec);
-}
-
-template <typename ELFO, class ELFT>
-static void printVersionSymbolSection(ELFDumper<ELFT> *Dumper, const ELFO *Obj,
-                                      const typename ELFO::Elf_Shdr *Sec,
-                                      ScopedPrinter &W) {
-  DictScope SS(W, "Version symbols");
-  if (!Sec)
-    return;
-  StringRef Name = unwrapOrError(Obj->getSectionName(Sec));
-  W.printNumber("Section Name", Name, Sec->sh_name);
-  W.printHex("Address", Sec->sh_addr);
-  W.printHex("Offset", Sec->sh_offset);
-  W.printNumber("Link", Sec->sh_link);
-
-  const uint8_t *P = (const uint8_t *)Obj->base() + Sec->sh_offset;
-  StringRef StrTable = Dumper->getDynamicStringTable();
-
-  // Same number of entries in the dynamic symbol table (DT_SYMTAB).
-  ListScope Syms(W, "Symbols");
-  for (const typename ELFO::Elf_Sym &Sym : Dumper->dynamic_symbols()) {
-    DictScope S(W, "Symbol");
-    std::string FullSymbolName =
-        Dumper->getFullSymbolName(&Sym, StrTable, true /* IsDynamic */);
-    W.printNumber("Version", *P);
-    W.printString("Name", FullSymbolName);
-    P += sizeof(typename ELFO::Elf_Half);
-  }
-}
-
-static const EnumEntry<unsigned> SymVersionFlags[] = {
-    {"Base", "BASE", VER_FLG_BASE},
-    {"Weak", "WEAK", VER_FLG_WEAK},
-    {"Info", "INFO", VER_FLG_INFO}};
-
-template <typename ELFO, class ELFT>
-static void printVersionDefinitionSection(ELFDumper<ELFT> *Dumper,
-                                          const ELFO *Obj,
-                                          const typename ELFO::Elf_Shdr *Sec,
-                                          ScopedPrinter &W) {
-  using VerDef = typename ELFO::Elf_Verdef;
-  using VerdAux = typename ELFO::Elf_Verdaux;
-
-  DictScope SD(W, "SHT_GNU_verdef");
-  if (!Sec)
-    return;
-
-  // The number of entries in the section SHT_GNU_verdef
-  // is determined by DT_VERDEFNUM tag.
-  unsigned VerDefsNum = 0;
-  for (const typename ELFO::Elf_Dyn &Dyn : Dumper->dynamic_table()) {
-    if (Dyn.d_tag == DT_VERDEFNUM) {
-      VerDefsNum = Dyn.d_un.d_val;
-      break;
-    }
-  }
-
-  const uint8_t *SecStartAddress =
-      (const uint8_t *)Obj->base() + Sec->sh_offset;
-  const uint8_t *SecEndAddress = SecStartAddress + Sec->sh_size;
-  const uint8_t *P = SecStartAddress;
-  const typename ELFO::Elf_Shdr *StrTab =
-      unwrapOrError(Obj->getSection(Sec->sh_link));
-
-  while (VerDefsNum--) {
-    if (P + sizeof(VerDef) > SecEndAddress)
-      report_fatal_error("invalid offset in the section");
-
-    auto *VD = reinterpret_cast<const VerDef *>(P);
-    DictScope Def(W, "Definition");
-    W.printNumber("Version", VD->vd_version);
-    W.printEnum("Flags", VD->vd_flags, makeArrayRef(SymVersionFlags));
-    W.printNumber("Index", VD->vd_ndx);
-    W.printNumber("Hash", VD->vd_hash);
-    W.printString("Name",
-                  StringRef((const char *)(Obj->base() + StrTab->sh_offset +
-                                           VD->getAux()->vda_name)));
-    if (!VD->vd_cnt)
-      report_fatal_error("at least one definition string must exist");
-    if (VD->vd_cnt > 2)
-      report_fatal_error("more than one predecessor is not expected");
-
-    if (VD->vd_cnt == 2) {
-      const uint8_t *PAux = P + VD->vd_aux + VD->getAux()->vda_next;
-      const VerdAux *Aux = reinterpret_cast<const VerdAux *>(PAux);
-      W.printString("Predecessor",
-                    StringRef((const char *)(Obj->base() + StrTab->sh_offset +
-                                             Aux->vda_name)));
-    }
+  if (SymbolVersionDefSection)
+    LoadVersionDefs(SymbolVersionDefSection);
 
-    P += VD->vd_next;
-  }
-}
-
-template <typename ELFO, class ELFT>
-static void printVersionDependencySection(ELFDumper<ELFT> *Dumper,
-                                          const ELFO *Obj,
-                                          const typename ELFO::Elf_Shdr *Sec,
-                                          ScopedPrinter &W) {
-  using VerNeed = typename ELFO::Elf_Verneed;
-  using VernAux = typename ELFO::Elf_Vernaux;
-
-  DictScope SD(W, "SHT_GNU_verneed");
-  if (!Sec)
-    return;
-
-  unsigned VerNeedNum = 0;
-  for (const typename ELFO::Elf_Dyn &Dyn : Dumper->dynamic_table()) {
-    if (Dyn.d_tag == DT_VERNEEDNUM) {
-      VerNeedNum = Dyn.d_un.d_val;
-      break;
-    }
-  }
-
-  const uint8_t *SecData = (const uint8_t *)Obj->base() + Sec->sh_offset;
-  const typename ELFO::Elf_Shdr *StrTab =
-      unwrapOrError(Obj->getSection(Sec->sh_link));
-
-  const uint8_t *P = SecData;
-  for (unsigned I = 0; I < VerNeedNum; ++I) {
-    const VerNeed *Need = reinterpret_cast<const VerNeed *>(P);
-    DictScope Entry(W, "Dependency");
-    W.printNumber("Version", Need->vn_version);
-    W.printNumber("Count", Need->vn_cnt);
-    W.printString("FileName",
-                  StringRef((const char *)(Obj->base() + StrTab->sh_offset +
-                                           Need->vn_file)));
-
-    const uint8_t *PAux = P + Need->vn_aux;
-    for (unsigned J = 0; J < Need->vn_cnt; ++J) {
-      const VernAux *Aux = reinterpret_cast<const VernAux *>(PAux);
-      DictScope Entry(W, "Entry");
-      W.printNumber("Hash", Aux->vna_hash);
-      W.printEnum("Flags", Aux->vna_flags, makeArrayRef(SymVersionFlags));
-      W.printNumber("Index", Aux->vna_other);
-      W.printString("Name",
-                    StringRef((const char *)(Obj->base() + StrTab->sh_offset +
-                                             Aux->vna_name)));
-      PAux += Aux->vna_next;
-    }
-    P += Need->vn_next;
-  }
-}
-
-template <typename ELFT> void ELFDumper<ELFT>::printVersionInfo() {
-  // Dump version symbol section.
-  printVersionSymbolSection(this, ObjF->getELFFile(), dot_gnu_version_sec, W);
-
-  // Dump version definition section.
-  printVersionDefinitionSection(this, ObjF->getELFFile(), dot_gnu_version_d_sec, W);
-
-  // Dump version dependency section.
-  printVersionDependencySection(this, ObjF->getELFFile(), dot_gnu_version_r_sec, W);
+  if (SymbolVersionNeedSection)
+    LoadVersionNeeds(SymbolVersionNeedSection);
 }
 
 template <typename ELFT>
 StringRef ELFDumper<ELFT>::getSymbolVersion(StringRef StrTab,
-                                            const Elf_Sym *symb,
+                                            const Elf_Sym *Sym,
                                             bool &IsDefault) const {
   // This is a dynamic symbol. Look in the GNU symbol version table.
-  if (!dot_gnu_version_sec) {
+  if (!SymbolVersionSection) {
     // No version table.
     IsDefault = false;
-    return StringRef("");
+    return "";
   }
 
   // Determine the position in the symbol table of this entry.
-  size_t entry_index = (reinterpret_cast<uintptr_t>(symb) -
+  size_t EntryIndex = (reinterpret_cast<uintptr_t>(Sym) -
                         reinterpret_cast<uintptr_t>(DynSymRegion.Addr)) /
                        sizeof(Elf_Sym);
 
-  // Get the corresponding version index entry
-  const Elf_Versym *vs = unwrapOrError(
-      ObjF->getELFFile()->template getEntry<Elf_Versym>(dot_gnu_version_sec, entry_index));
-  size_t version_index = vs->vs_index & ELF::VERSYM_VERSION;
+  // Get the corresponding version index entry.
+  const Elf_Versym *Versym =
+      unwrapOrError(ObjF->getELFFile()->template getEntry<Elf_Versym>(
+          SymbolVersionSection, EntryIndex));
+  return this->getSymbolVersionByIndex(StrTab, Versym->vs_index, IsDefault);
+}
+
+static std::string maybeDemangle(StringRef Name) {
+  return opts::Demangle ? demangle(Name) : Name.str();
+}
+
+template <typename ELFT>
+std::string ELFDumper<ELFT>::getStaticSymbolName(uint32_t Index) const {
+  const ELFFile<ELFT> *Obj = ObjF->getELFFile();
+  StringRef StrTable =
+      unwrapOrError(Obj->getStringTableForSymtab(*DotSymtabSec));
+  Elf_Sym_Range Syms = unwrapOrError(Obj->symbols(DotSymtabSec));
+  if (Index >= Syms.size())
+    reportError("Invalid symbol index");
+  const Elf_Sym *Sym = &Syms[Index];
+  return maybeDemangle(unwrapOrError(Sym->getName(StrTable)));
+}
+
+template <typename ELFT>
+StringRef ELFDumper<ELFT>::getSymbolVersionByIndex(StringRef StrTab,
+                                                   uint32_t SymbolVersionIndex,
+                                                   bool &IsDefault) const {
+  size_t VersionIndex = SymbolVersionIndex & VERSYM_VERSION;
 
   // Special markers for unversioned symbols.
-  if (version_index == ELF::VER_NDX_LOCAL ||
-      version_index == ELF::VER_NDX_GLOBAL) {
+  if (VersionIndex == VER_NDX_LOCAL || VersionIndex == VER_NDX_GLOBAL) {
     IsDefault = false;
-    return StringRef("");
+    return "";
   }
 
-  // Lookup this symbol in the version table
+  // Lookup this symbol in the version table.
   LoadVersionMap();
-  if (version_index >= VersionMap.size() || VersionMap[version_index].isNull())
+  if (VersionIndex >= VersionMap.size() || VersionMap[VersionIndex].isNull())
     reportError("Invalid version entry");
-  const VersionMapEntry &entry = VersionMap[version_index];
+  const VersionMapEntry &Entry = VersionMap[VersionIndex];
 
-  // Get the version name string
-  size_t name_offset;
-  if (entry.isVerdef()) {
+  // Get the version name string.
+  size_t NameOffset;
+  if (Entry.isVerdef()) {
     // The first Verdaux entry holds the name.
-    name_offset = entry.getVerdef()->getAux()->vda_name;
-    IsDefault = !(vs->vs_index & ELF::VERSYM_HIDDEN);
+    NameOffset = Entry.getVerdef()->getAux()->vda_name;
+    IsDefault = !(SymbolVersionIndex & VERSYM_HIDDEN);
   } else {
-    name_offset = entry.getVernaux()->vna_name;
+    NameOffset = Entry.getVernaux()->vna_name;
     IsDefault = false;
   }
-  if (name_offset >= StrTab.size())
+  if (NameOffset >= StrTab.size())
     reportError("Invalid string offset");
-  return StringRef(StrTab.data() + name_offset);
-}
-
-template <typename ELFT>
-StringRef ELFDumper<ELFT>::getStaticSymbolName(uint32_t Index) const {
-  const ELFFile<ELFT> *Obj = ObjF->getELFFile();
-  StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*DotSymtabSec));
-  Elf_Sym_Range Syms = unwrapOrError(Obj->symbols(DotSymtabSec));
-  if (Index >= Syms.size())
-    reportError("Invalid symbol index");
-  const Elf_Sym *Sym = &Syms[Index];
-  return unwrapOrError(Sym->getName(StrTable));
+  return StrTab.data() + NameOffset;
 }
 
 template <typename ELFT>
 std::string ELFDumper<ELFT>::getFullSymbolName(const Elf_Sym *Symbol,
                                                StringRef StrTable,
                                                bool IsDynamic) const {
-  StringRef SymbolName = unwrapOrError(Symbol->getName(StrTable));
+  std::string SymbolName =
+      maybeDemangle(unwrapOrError(Symbol->getName(StrTable)));
+
+  if (SymbolName.empty() && Symbol->getType() == ELF::STT_SECTION) {
+    unsigned SectionIndex;
+    StringRef SectionName;
+    Elf_Sym_Range Syms =
+        unwrapOrError(ObjF->getELFFile()->symbols(DotSymtabSec));
+    getSectionNameIndex(Symbol, Syms.begin(), SectionName, SectionIndex);
+    return SectionName;
+  }
+
   if (!IsDynamic)
     return SymbolName;
 
-  std::string FullSymbolName(SymbolName);
-
   bool IsDefault;
   StringRef Version = getSymbolVersion(StrTable, &*Symbol, IsDefault);
   if (!Version.empty()) {
-    FullSymbolName += (IsDefault ? "@@" : "@");
-    FullSymbolName += Version;
+    SymbolName += (IsDefault ? "@@" : "@");
+    SymbolName += Version;
   }
-  return FullSymbolName;
+  return SymbolName;
 }
 
 template <typename ELFT>
@@ -914,6 +852,11 @@ static const EnumEntry<unsigned> ElfOSABI[] = {
   {"Standalone",   "Standalone App",       ELF::ELFOSABI_STANDALONE}
 };
 
+static const EnumEntry<unsigned> SymVersionFlags[] = {
+    {"Base", "BASE", VER_FLG_BASE},
+    {"Weak", "WEAK", VER_FLG_WEAK},
+    {"Info", "INFO", VER_FLG_INFO}};
+
 static const EnumEntry<unsigned> AMDGPUElfOSABI[] = {
   {"AMDGPU_HSA",    "AMDGPU - HSA",    ELF::ELFOSABI_AMDGPU_HSA},
   {"AMDGPU_PAL",    "AMDGPU - PAL",    ELF::ELFOSABI_AMDGPU_PAL},
@@ -1103,16 +1046,6 @@ static const EnumEntry<unsigned> ElfSymbolVisibilities[] = {
     {"HIDDEN",    "HIDDEN",    ELF::STV_HIDDEN},
     {"PROTECTED", "PROTECTED", ELF::STV_PROTECTED}};
 
-static const EnumEntry<unsigned> ElfSymbolTypes[] = {
-    {"None",      "NOTYPE",  ELF::STT_NOTYPE},
-    {"Object",    "OBJECT",  ELF::STT_OBJECT},
-    {"Function",  "FUNC",    ELF::STT_FUNC},
-    {"Section",   "SECTION", ELF::STT_SECTION},
-    {"File",      "FILE",    ELF::STT_FILE},
-    {"Common",    "COMMON",  ELF::STT_COMMON},
-    {"TLS",       "TLS",     ELF::STT_TLS},
-    {"GNU_IFunc", "IFUNC",   ELF::STT_GNU_IFUNC}};
-
 static const EnumEntry<unsigned> AMDGPUSymbolTypes[] = {
   { "AMDGPU_HSA_KERNEL",            ELF::STT_AMDGPU_HSA_KERNEL }
 };
@@ -1205,14 +1138,12 @@ static const char *getElfSegmentType(unsigned Arch, unsigned Type) {
   // program header type.
   switch (Arch) {
   case ELF::EM_ARM:
-    switch (Type) {
-    LLVM_READOBJ_ENUM_CASE(ELF, PT_ARM_EXIDX);
-    }
+    switch (Type) { LLVM_READOBJ_ENUM_CASE(ELF, PT_ARM_EXIDX); }
     break;
   case ELF::EM_MIPS:
   case ELF::EM_MIPS_RS3_LE:
     switch (Type) {
-    LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_REGINFO);
+      LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_REGINFO);
     LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_RTPROC);
     LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_OPTIONS);
     LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_ABIFLAGS);
@@ -1233,14 +1164,15 @@ static const char *getElfSegmentType(unsigned Arch, unsigned Type) {
   LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_EH_FRAME);
   LLVM_READOBJ_ENUM_CASE(ELF, PT_SUNW_UNWIND);
 
-  LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_STACK);
-  LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_RELRO);
+    LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_STACK);
+    LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_RELRO);
 
-  LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_RANDOMIZE);
-  LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_WXNEEDED);
-  LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_BOOTDATA);
+    LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_RANDOMIZE);
+    LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_WXNEEDED);
+    LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_BOOTDATA);
 
-  default: return "";
+  default:
+    return "";
   }
 }
 
@@ -1368,7 +1300,11 @@ static const EnumEntry<unsigned> ElfHeaderAMDGPUFlags[] = {
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX902),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX904),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX908),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
+  LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK),
   LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_SRAM_ECC)
 };
@@ -1420,68 +1356,118 @@ static const char *getElfMipsOptionsOdkType(unsigned Odk) {
 }
 
 template <typename ELFT>
-ELFDumper<ELFT>::ELFDumper(const object::ELFObjectFile<ELFT> *ObjF,
-    ScopedPrinter &Writer)
-    : ObjDumper(Writer), ObjF(ObjF) {
-  SmallVector<const Elf_Phdr *, 4> LoadSegments;
-  const ELFFile<ELFT> *Obj = ObjF->getELFFile();
+void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
+  // Try to locate the PT_DYNAMIC header.
+  const Elf_Phdr *DynamicPhdr = nullptr;
   for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) {
-    if (Phdr.p_type == ELF::PT_DYNAMIC) {
-      DynamicTable = createDRIFrom(&Phdr, sizeof(Elf_Dyn));
+    if (Phdr.p_type != ELF::PT_DYNAMIC)
       continue;
-    }
-    if (Phdr.p_type != ELF::PT_LOAD || Phdr.p_filesz == 0)
+    DynamicPhdr = &Phdr;
+    break;
+  }
+
+  // Try to locate the .dynamic section in the sections header table.
+  const Elf_Shdr *DynamicSec = nullptr;
+  for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
+    if (Sec.sh_type != ELF::SHT_DYNAMIC)
       continue;
-    LoadSegments.push_back(&Phdr);
+    DynamicSec = &Sec;
+    break;
   }
 
+  // Information in the section header has priority over the information
+  // in a PT_DYNAMIC header.
+  // Ignore sh_entsize and use the expected value for entry size explicitly.
+  // This allows us to dump the dynamic sections with a broken sh_entsize
+  // field.
+  if (DynamicSec) {
+    DynamicTable = checkDRI({ObjF->getELFFile()->base() + DynamicSec->sh_offset,
+                             DynamicSec->sh_size, sizeof(Elf_Dyn)});
+    parseDynamicTable();
+  }
+
+  // If we have a PT_DYNAMIC header, we will either check the found dynamic
+  // section or take the dynamic table data directly from the header.
+  if (!DynamicPhdr)
+    return;
+
+  if (DynamicPhdr->p_offset + DynamicPhdr->p_filesz >
+      ObjF->getMemoryBufferRef().getBufferSize())
+    reportError(
+        "PT_DYNAMIC segment offset + size exceeds the size of the file");
+
+  if (!DynamicSec) {
+    DynamicTable = createDRIFrom(DynamicPhdr, sizeof(Elf_Dyn));
+    parseDynamicTable();
+    return;
+  }
+
+  StringRef Name = unwrapOrError(Obj->getSectionName(DynamicSec));
+  if (DynamicSec->sh_addr + DynamicSec->sh_size >
+          DynamicPhdr->p_vaddr + DynamicPhdr->p_memsz ||
+      DynamicSec->sh_addr < DynamicPhdr->p_vaddr)
+    reportWarning("The SHT_DYNAMIC section '" + Name +
+                  "' is not contained within the "
+                  "PT_DYNAMIC segment");
+
+  if (DynamicSec->sh_addr != DynamicPhdr->p_vaddr)
+    reportWarning("The SHT_DYNAMIC section '" + Name +
+                  "' is not at the start of "
+                  "PT_DYNAMIC segment");
+}
+
+template <typename ELFT>
+ELFDumper<ELFT>::ELFDumper(const object::ELFObjectFile<ELFT> *ObjF,
+    ScopedPrinter &Writer)
+    : ObjDumper(Writer), ObjF(ObjF) {
+  const ELFFile<ELFT> *Obj = ObjF->getELFFile();
+
   for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
     switch (Sec.sh_type) {
     case ELF::SHT_SYMTAB:
-      if (DotSymtabSec != nullptr)
-        reportError("Multiple SHT_SYMTAB");
-      DotSymtabSec = &Sec;
+      if (!DotSymtabSec)
+        DotSymtabSec = &Sec;
       break;
     case ELF::SHT_DYNSYM:
-      if (DynSymRegion.Size)
-        reportError("Multiple SHT_DYNSYM");
-      DynSymRegion = createDRIFrom(&Sec);
-      // This is only used (if Elf_Shdr present)for naming section in GNU style
-      DynSymtabName = unwrapOrError(Obj->getSectionName(&Sec));
-      DynamicStringTable = unwrapOrError(Obj->getStringTableForSymtab(Sec));
+      if (!DynSymRegion.Size) {
+        DynSymRegion = createDRIFrom(&Sec);
+        // This is only used (if Elf_Shdr present)for naming section in GNU
+        // style
+        DynSymtabName = unwrapOrError(Obj->getSectionName(&Sec));
+
+        if (Expected<StringRef> E = Obj->getStringTableForSymtab(Sec))
+          DynamicStringTable = *E;
+        else
+          warn(E.takeError());
+      }
       break;
     case ELF::SHT_SYMTAB_SHNDX:
       ShndxTable = unwrapOrError(Obj->getSHNDXTable(Sec));
       break;
     case ELF::SHT_GNU_versym:
-      if (dot_gnu_version_sec != nullptr)
-        reportError("Multiple SHT_GNU_versym");
-      dot_gnu_version_sec = &Sec;
+      if (!SymbolVersionSection)
+        SymbolVersionSection = &Sec;
       break;
     case ELF::SHT_GNU_verdef:
-      if (dot_gnu_version_d_sec != nullptr)
-        reportError("Multiple SHT_GNU_verdef");
-      dot_gnu_version_d_sec = &Sec;
+      if (!SymbolVersionDefSection)
+        SymbolVersionDefSection = &Sec;
       break;
     case ELF::SHT_GNU_verneed:
-      if (dot_gnu_version_r_sec != nullptr)
-        reportError("Multiple SHT_GNU_verneed");
-      dot_gnu_version_r_sec = &Sec;
+      if (!SymbolVersionNeedSection)
+        SymbolVersionNeedSection = &Sec;
       break;
     case ELF::SHT_LLVM_CALL_GRAPH_PROFILE:
-      if (DotCGProfileSec != nullptr)
-        reportError("Multiple .llvm.call-graph-profile");
-      DotCGProfileSec = &Sec;
+      if (!DotCGProfileSec)
+        DotCGProfileSec = &Sec;
       break;
     case ELF::SHT_LLVM_ADDRSIG:
-      if (DotAddrsigSec != nullptr)
-        reportError("Multiple .llvm_addrsig");
-      DotAddrsigSec = &Sec;
+      if (!DotAddrsigSec)
+        DotAddrsigSec = &Sec;
       break;
     }
   }
 
-  parseDynamicTable(LoadSegments);
+  loadDynamicTable(Obj);
 
   if (opts::Output == opts::GNU)
     ELFDumperStyle.reset(new GNUStyle<ELFT>(Writer, this));
@@ -1489,13 +1475,84 @@ ELFDumper<ELFT>::ELFDumper(const object::ELFObjectFile<ELFT> *ObjF,
     ELFDumperStyle.reset(new LLVMStyle<ELFT>(Writer, this));
 }
 
-template <typename ELFT>
-void ELFDumper<ELFT>::parseDynamicTable(
-    ArrayRef<const Elf_Phdr *> LoadSegments) {
-  auto toMappedAddr = [&](uint64_t VAddr) -> const uint8_t * {
+static const char *getTypeString(unsigned Arch, uint64_t Type) {
+#define DYNAMIC_TAG(n, v)
+  switch (Arch) {
+
+  case EM_AARCH64:
+    switch (Type) {
+#define AARCH64_DYNAMIC_TAG(name, value)                                       \
+    case DT_##name:                                                            \
+      return #name;
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef AARCH64_DYNAMIC_TAG
+    }
+    break;
+
+  case EM_HEXAGON:
+    switch (Type) {
+#define HEXAGON_DYNAMIC_TAG(name, value)                                       \
+  case DT_##name:                                                              \
+    return #name;
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef HEXAGON_DYNAMIC_TAG
+    }
+    break;
+
+  case EM_MIPS:
+    switch (Type) {
+#define MIPS_DYNAMIC_TAG(name, value)                                          \
+  case DT_##name:                                                              \
+    return #name;
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef MIPS_DYNAMIC_TAG
+    }
+    break;
+
+  case EM_PPC64:
+    switch (Type) {
+#define PPC64_DYNAMIC_TAG(name, value)                                         \
+  case DT_##name:                                                              \
+    return #name;
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef PPC64_DYNAMIC_TAG
+    }
+    break;
+  }
+#undef DYNAMIC_TAG
+  switch (Type) {
+// Now handle all dynamic tags except the architecture specific ones
+#define AARCH64_DYNAMIC_TAG(name, value)
+#define MIPS_DYNAMIC_TAG(name, value)
+#define HEXAGON_DYNAMIC_TAG(name, value)
+#define PPC64_DYNAMIC_TAG(name, value)
+// Also ignore marker tags such as DT_HIOS (maps to DT_VERNEEDNUM), etc.
+#define DYNAMIC_TAG_MARKER(name, value)
+#define DYNAMIC_TAG(name, value)                                               \
+  case DT_##name:                                                              \
+    return #name;
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef DYNAMIC_TAG
+#undef AARCH64_DYNAMIC_TAG
+#undef MIPS_DYNAMIC_TAG
+#undef HEXAGON_DYNAMIC_TAG
+#undef PPC64_DYNAMIC_TAG
+#undef DYNAMIC_TAG_MARKER
+  default:
+    return "unknown";
+  }
+}
+
+template <typename ELFT> void ELFDumper<ELFT>::parseDynamicTable() {
+  auto toMappedAddr = [&](uint64_t Tag, uint64_t VAddr) -> const uint8_t * {
     auto MappedAddrOrError = ObjF->getELFFile()->toMappedAddr(VAddr);
-    if (!MappedAddrOrError)
-      report_fatal_error(MappedAddrOrError.takeError());
+    if (!MappedAddrOrError) {
+      reportWarning("Unable to parse DT_" +
+                    Twine(getTypeString(
+                        ObjF->getELFFile()->getHeader()->e_machine, Tag)) +
+                    ": " + llvm::toString(MappedAddrOrError.takeError()));
+      return nullptr;
+    }
     return MappedAddrOrError.get();
   };
 
@@ -1505,25 +1562,26 @@ void ELFDumper<ELFT>::parseDynamicTable(
   for (const Elf_Dyn &Dyn : dynamic_table()) {
     switch (Dyn.d_tag) {
     case ELF::DT_HASH:
-      HashTable =
-          reinterpret_cast<const Elf_Hash *>(toMappedAddr(Dyn.getPtr()));
+      HashTable = reinterpret_cast<const Elf_Hash *>(
+          toMappedAddr(Dyn.getTag(), Dyn.getPtr()));
       break;
     case ELF::DT_GNU_HASH:
-      GnuHashTable =
-          reinterpret_cast<const Elf_GnuHash *>(toMappedAddr(Dyn.getPtr()));
+      GnuHashTable = reinterpret_cast<const Elf_GnuHash *>(
+          toMappedAddr(Dyn.getTag(), Dyn.getPtr()));
       break;
     case ELF::DT_STRTAB:
-      StringTableBegin = (const char *)toMappedAddr(Dyn.getPtr());
+      StringTableBegin = reinterpret_cast<const char *>(
+          toMappedAddr(Dyn.getTag(), Dyn.getPtr()));
       break;
     case ELF::DT_STRSZ:
       StringTableSize = Dyn.getVal();
       break;
     case ELF::DT_SYMTAB:
-      DynSymRegion.Addr = toMappedAddr(Dyn.getPtr());
+      DynSymRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
       DynSymRegion.EntSize = sizeof(Elf_Sym);
       break;
     case ELF::DT_RELA:
-      DynRelaRegion.Addr = toMappedAddr(Dyn.getPtr());
+      DynRelaRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
       break;
     case ELF::DT_RELASZ:
       DynRelaRegion.Size = Dyn.getVal();
@@ -1535,7 +1593,7 @@ void ELFDumper<ELFT>::parseDynamicTable(
       SONameOffset = Dyn.getVal();
       break;
     case ELF::DT_REL:
-      DynRelRegion.Addr = toMappedAddr(Dyn.getPtr());
+      DynRelRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
       break;
     case ELF::DT_RELSZ:
       DynRelRegion.Size = Dyn.getVal();
@@ -1545,7 +1603,7 @@ void ELFDumper<ELFT>::parseDynamicTable(
       break;
     case ELF::DT_RELR:
     case ELF::DT_ANDROID_RELR:
-      DynRelrRegion.Addr = toMappedAddr(Dyn.getPtr());
+      DynRelrRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
       break;
     case ELF::DT_RELRSZ:
     case ELF::DT_ANDROID_RELRSZ:
@@ -1565,7 +1623,7 @@ void ELFDumper<ELFT>::parseDynamicTable(
                     Twine((uint64_t)Dyn.getVal()));
       break;
     case ELF::DT_JMPREL:
-      DynPLTRelRegion.Addr = toMappedAddr(Dyn.getPtr());
+      DynPLTRelRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
       break;
     case ELF::DT_PLTRELSZ:
       DynPLTRelRegion.Size = Dyn.getVal();
@@ -1574,8 +1632,8 @@ void ELFDumper<ELFT>::parseDynamicTable(
   }
   if (StringTableBegin)
     DynamicStringTable = StringRef(StringTableBegin, StringTableSize);
-  if (SONameOffset)
-    SOName = getDynamicString(SONameOffset);
+  if (SONameOffset && SONameOffset < DynamicStringTable.size())
+    SOName = DynamicStringTable.data() + SONameOffset;
 }
 
 template <typename ELFT>
@@ -1593,37 +1651,52 @@ typename ELFDumper<ELFT>::Elf_Relr_Range ELFDumper<ELFT>::dyn_relrs() const {
   return DynRelrRegion.getAsArrayRef<Elf_Relr>();
 }
 
-template<class ELFT>
-void ELFDumper<ELFT>::printFileHeaders() {
+template <class ELFT> void ELFDumper<ELFT>::printFileHeaders() {
   ELFDumperStyle->printFileHeaders(ObjF->getELFFile());
 }
 
-template<class ELFT>
-void ELFDumper<ELFT>::printSectionHeaders() {
+template <class ELFT> void ELFDumper<ELFT>::printSectionHeaders() {
   ELFDumperStyle->printSectionHeaders(ObjF->getELFFile());
 }
 
-template<class ELFT>
-void ELFDumper<ELFT>::printRelocations() {
+template <class ELFT> void ELFDumper<ELFT>::printRelocations() {
   ELFDumperStyle->printRelocations(ObjF->getELFFile());
 }
 
-template <class ELFT> void ELFDumper<ELFT>::printProgramHeaders() {
-  ELFDumperStyle->printProgramHeaders(ObjF->getELFFile());
+template <class ELFT>
+void ELFDumper<ELFT>::printProgramHeaders(
+    bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) {
+  ELFDumperStyle->printProgramHeaders(ObjF->getELFFile(), PrintProgramHeaders,
+                                      PrintSectionMapping);
+}
+
+template <typename ELFT> void ELFDumper<ELFT>::printVersionInfo() {
+  // Dump version symbol section.
+  ELFDumperStyle->printVersionSymbolSection(ObjF->getELFFile(),
+                                            SymbolVersionSection);
+
+  // Dump version definition section.
+  ELFDumperStyle->printVersionDefinitionSection(ObjF->getELFFile(),
+                                                SymbolVersionDefSection);
+
+  // Dump version dependency section.
+  ELFDumperStyle->printVersionDependencySection(ObjF->getELFFile(),
+                                                SymbolVersionNeedSection);
 }
 
 template <class ELFT> void ELFDumper<ELFT>::printDynamicRelocations() {
   ELFDumperStyle->printDynamicRelocations(ObjF->getELFFile());
 }
 
-template<class ELFT>
-void ELFDumper<ELFT>::printSymbols() {
-  ELFDumperStyle->printSymbols(ObjF->getELFFile());
+template <class ELFT>
+void ELFDumper<ELFT>::printSymbols(bool PrintSymbols,
+                                   bool PrintDynamicSymbols) {
+  ELFDumperStyle->printSymbols(ObjF->getELFFile(), PrintSymbols,
+                               PrintDynamicSymbols);
 }
 
-template<class ELFT>
-void ELFDumper<ELFT>::printDynamicSymbols() {
-  ELFDumperStyle->printDynamicSymbols(ObjF->getELFFile());
+template <class ELFT> void ELFDumper<ELFT>::printHashSymbols() {
+  ELFDumperStyle->printHashSymbols(ObjF->getELFFile());
 }
 
 template <class ELFT> void ELFDumper<ELFT>::printHashHistogram() {
@@ -1642,61 +1715,7 @@ template <class ELFT> void ELFDumper<ELFT>::printELFLinkerOptions() {
   ELFDumperStyle->printELFLinkerOptions(ObjF->getELFFile());
 }
 
-static const char *getTypeString(unsigned Arch, uint64_t Type) {
-#define DYNAMIC_TAG(n, v)
-  switch (Arch) {
-  case EM_HEXAGON:
-    switch (Type) {
-#define HEXAGON_DYNAMIC_TAG(name, value)                                       \
-    case DT_##name:                                                            \
-      return #name;
-#include "llvm/BinaryFormat/DynamicTags.def"
-#undef HEXAGON_DYNAMIC_TAG
-    }
-    break;
-
-  case EM_MIPS:
-    switch (Type) {
-#define MIPS_DYNAMIC_TAG(name, value)                                          \
-    case DT_##name:                                                            \
-      return #name;
-#include "llvm/BinaryFormat/DynamicTags.def"
-#undef MIPS_DYNAMIC_TAG
-    }
-    break;
-
-  case EM_PPC64:
-    switch(Type) {
-#define PPC64_DYNAMIC_TAG(name, value)                                         \
-    case DT_##name:                                                            \
-      return #name;
-#include "llvm/BinaryFormat/DynamicTags.def"
-#undef PPC64_DYNAMIC_TAG
-    }
-    break;
-  }
-#undef DYNAMIC_TAG
-  switch (Type) {
-// Now handle all dynamic tags except the architecture specific ones
-#define MIPS_DYNAMIC_TAG(name, value)
-#define HEXAGON_DYNAMIC_TAG(name, value)
-#define PPC64_DYNAMIC_TAG(name, value)
-// Also ignore marker tags such as DT_HIOS (maps to DT_VERNEEDNUM), etc.
-#define DYNAMIC_TAG_MARKER(name, value)
-#define DYNAMIC_TAG(name, value)                                               \
-  case DT_##name:                                                              \
-    return #name;
-#include "llvm/BinaryFormat/DynamicTags.def"
-#undef DYNAMIC_TAG
-#undef MIPS_DYNAMIC_TAG
-#undef HEXAGON_DYNAMIC_TAG
-#undef PPC64_DYNAMIC_TAG
-#undef DYNAMIC_TAG_MARKER
-  default: return "unknown";
-  }
-}
-
-#define LLVM_READOBJ_DT_FLAG_ENT(prefix, enum) \
+#define LLVM_READOBJ_DT_FLAG_ENT(prefix, enum)                                 \
   { #enum, prefix##_##enum }
 
 static const EnumEntry<unsigned> ElfDynamicDTFlags[] = {
@@ -1724,6 +1743,7 @@ static const EnumEntry<unsigned> ElfDynamicDTFlags1[] = {
   LLVM_READOBJ_DT_FLAG_ENT(DF_1, CONFALT),
   LLVM_READOBJ_DT_FLAG_ENT(DF_1, ENDFILTEE),
   LLVM_READOBJ_DT_FLAG_ENT(DF_1, DISPRELDNE),
+  LLVM_READOBJ_DT_FLAG_ENT(DF_1, DISPRELPND),
   LLVM_READOBJ_DT_FLAG_ENT(DF_1, NODIRECT),
   LLVM_READOBJ_DT_FLAG_ENT(DF_1, IGNMULDEF),
   LLVM_READOBJ_DT_FLAG_ENT(DF_1, NOKSYMS),
@@ -1776,20 +1796,97 @@ void printFlags(T Value, ArrayRef<EnumEntry<TFlag>> Flags, raw_ostream &OS) {
 }
 
 template <class ELFT>
-StringRef ELFDumper<ELFT>::getDynamicString(uint64_t Value) const {
-  if (Value >= DynamicStringTable.size())
-    reportError("Invalid dynamic string table reference");
-  return StringRef(DynamicStringTable.data() + Value);
-}
-
-static void printLibrary(raw_ostream &OS, const Twine &Tag, const Twine &Name) {
-  OS << Tag << ": [" << Name << "]";
-}
+void ELFDumper<ELFT>::printDynamicEntry(raw_ostream &OS, uint64_t Type,
+                                        uint64_t Value) const {
+  const char *ConvChar =
+      (opts::Output == opts::GNU) ? "0x%" PRIx64 : "0x%" PRIX64;
+
+  // Handle custom printing of architecture specific tags
+  switch (ObjF->getELFFile()->getHeader()->e_machine) {
+  case EM_AARCH64:
+    switch (Type) {
+    case DT_AARCH64_BTI_PLT:
+    case DT_AARCH64_PAC_PLT:
+      OS << Value;
+      return;
+    default:
+      break;
+    }
+    break;
+  case EM_HEXAGON:
+    switch (Type) {
+    case DT_HEXAGON_VER:
+      OS << Value;
+      return;
+    case DT_HEXAGON_SYMSZ:
+    case DT_HEXAGON_PLT:
+      OS << format(ConvChar, Value);
+      return;
+    default:
+      break;
+    }
+    break;
+  case EM_MIPS:
+    switch (Type) {
+    case DT_MIPS_RLD_VERSION:
+    case DT_MIPS_LOCAL_GOTNO:
+    case DT_MIPS_SYMTABNO:
+    case DT_MIPS_UNREFEXTNO:
+      OS << Value;
+      return;
+    case DT_MIPS_TIME_STAMP:
+    case DT_MIPS_ICHECKSUM:
+    case DT_MIPS_IVERSION:
+    case DT_MIPS_BASE_ADDRESS:
+    case DT_MIPS_MSYM:
+    case DT_MIPS_CONFLICT:
+    case DT_MIPS_LIBLIST:
+    case DT_MIPS_CONFLICTNO:
+    case DT_MIPS_LIBLISTNO:
+    case DT_MIPS_GOTSYM:
+    case DT_MIPS_HIPAGENO:
+    case DT_MIPS_RLD_MAP:
+    case DT_MIPS_DELTA_CLASS:
+    case DT_MIPS_DELTA_CLASS_NO:
+    case DT_MIPS_DELTA_INSTANCE:
+    case DT_MIPS_DELTA_RELOC:
+    case DT_MIPS_DELTA_RELOC_NO:
+    case DT_MIPS_DELTA_SYM:
+    case DT_MIPS_DELTA_SYM_NO:
+    case DT_MIPS_DELTA_CLASSSYM:
+    case DT_MIPS_DELTA_CLASSSYM_NO:
+    case DT_MIPS_CXX_FLAGS:
+    case DT_MIPS_PIXIE_INIT:
+    case DT_MIPS_SYMBOL_LIB:
+    case DT_MIPS_LOCALPAGE_GOTIDX:
+    case DT_MIPS_LOCAL_GOTIDX:
+    case DT_MIPS_HIDDEN_GOTIDX:
+    case DT_MIPS_PROTECTED_GOTIDX:
+    case DT_MIPS_OPTIONS:
+    case DT_MIPS_INTERFACE:
+    case DT_MIPS_DYNSTR_ALIGN:
+    case DT_MIPS_INTERFACE_SIZE:
+    case DT_MIPS_RLD_TEXT_RESOLVE_ADDR:
+    case DT_MIPS_PERF_SUFFIX:
+    case DT_MIPS_COMPACT_SIZE:
+    case DT_MIPS_GP_VALUE:
+    case DT_MIPS_AUX_DYNAMIC:
+    case DT_MIPS_PLTGOT:
+    case DT_MIPS_RWPLT:
+    case DT_MIPS_RLD_MAP_REL:
+      OS << format(ConvChar, Value);
+      return;
+    case DT_MIPS_FLAGS:
+      printFlags(Value, makeArrayRef(ElfDynamicDTMipsFlags), OS);
+      return;
+    default:
+      break;
+    }
+    break;
+  default:
+    break;
+  }
 
-template <class ELFT>
-void ELFDumper<ELFT>::printValue(uint64_t Type, uint64_t Value) {
-  raw_ostream &OS = W.getOStream();
-  const char* ConvChar = (opts::Output == opts::GNU) ? "0x%" PRIx64 : "0x%" PRIX64;
   switch (Type) {
   case DT_PLTREL:
     if (Value == DT_REL) {
@@ -1818,22 +1915,12 @@ void ELFDumper<ELFT>::printValue(uint64_t Type, uint64_t Value) {
   case DT_VERSYM:
   case DT_GNU_HASH:
   case DT_NULL:
-  case DT_MIPS_BASE_ADDRESS:
-  case DT_MIPS_GOTSYM:
-  case DT_MIPS_RLD_MAP:
-  case DT_MIPS_RLD_MAP_REL:
-  case DT_MIPS_PLTGOT:
-  case DT_MIPS_OPTIONS:
     OS << format(ConvChar, Value);
     break;
   case DT_RELACOUNT:
   case DT_RELCOUNT:
   case DT_VERDEFNUM:
   case DT_VERNEEDNUM:
-  case DT_MIPS_RLD_VERSION:
-  case DT_MIPS_LOCAL_GOTNO:
-  case DT_MIPS_SYMTABNO:
-  case DT_MIPS_UNREFEXTNO:
     OS << Value;
     break;
   case DT_PLTRELSZ:
@@ -1851,24 +1938,30 @@ void ELFDumper<ELFT>::printValue(uint64_t Type, uint64_t Value) {
     OS << Value << " (bytes)";
     break;
   case DT_NEEDED:
-    printLibrary(OS, "Shared library", getDynamicString(Value));
-    break;
   case DT_SONAME:
-    printLibrary(OS, "Library soname", getDynamicString(Value));
-    break;
   case DT_AUXILIARY:
-    printLibrary(OS, "Auxiliary library", getDynamicString(Value));
-    break;
+  case DT_USED:
   case DT_FILTER:
-    printLibrary(OS, "Filter library", getDynamicString(Value));
-    break;
   case DT_RPATH:
-  case DT_RUNPATH:
-    OS << getDynamicString(Value);
-    break;
-  case DT_MIPS_FLAGS:
-    printFlags(Value, makeArrayRef(ElfDynamicDTMipsFlags), OS);
+  case DT_RUNPATH: {
+    const std::map<uint64_t, const char*> TagNames = {
+      {DT_NEEDED,    "Shared library"},
+      {DT_SONAME,    "Library soname"},
+      {DT_AUXILIARY, "Auxiliary library"},
+      {DT_USED,      "Not needed object"},
+      {DT_FILTER,    "Filter library"},
+      {DT_RPATH,     "Library rpath"},
+      {DT_RUNPATH,   "Library runpath"},
+    };
+    OS << TagNames.at(Type) << ": ";
+    if (DynamicStringTable.empty())
+      OS << "<String table is empty or was not found> ";
+    else if (Value < DynamicStringTable.size())
+      OS << "[" << StringRef(DynamicStringTable.data() + Value) << "]";
+    else
+      OS << "<Invalid offset 0x" << utohexstr(Value) << ">";
     break;
+  }
   case DT_FLAGS:
     printFlags(Value, makeArrayRef(ElfDynamicDTFlags), OS);
     break;
@@ -1881,14 +1974,9 @@ void ELFDumper<ELFT>::printValue(uint64_t Type, uint64_t Value) {
   }
 }
 
-template<class ELFT>
-void ELFDumper<ELFT>::printUnwindInfo() {
-  const unsigned Machine = ObjF->getELFFile()->getHeader()->e_machine;
-  if (Machine == EM_386 || Machine == EM_X86_64) {
-    DwarfCFIEH::PrinterContext<ELFT> Ctx(W, ObjF);
-    return Ctx.printUnwindInformation();
-  }
-  W.startLine() << "UnwindInfo not implemented.\n";
+template <class ELFT> void ELFDumper<ELFT>::printUnwindInfo() {
+  DwarfCFIEH::PrinterContext<ELFT> Ctx(W, ObjF);
+  Ctx.printUnwindInformation();
 }
 
 namespace {
@@ -1898,73 +1986,40 @@ template <> void ELFDumper<ELF32LE>::printUnwindInfo() {
   const unsigned Machine = Obj->getHeader()->e_machine;
   if (Machine == EM_ARM) {
     ARM::EHABI::PrinterContext<ELF32LE> Ctx(W, Obj, DotSymtabSec);
-    return Ctx.PrintUnwindInformation();
+    Ctx.PrintUnwindInformation();
   }
-  W.startLine() << "UnwindInfo not implemented.\n";
+  DwarfCFIEH::PrinterContext<ELF32LE> Ctx(W, ObjF);
+  Ctx.printUnwindInformation();
 }
 
 } // end anonymous namespace
 
-template<class ELFT>
-void ELFDumper<ELFT>::printDynamicTable() {
-  auto I = dynamic_table().begin();
-  auto E = dynamic_table().end();
-
-  if (I == E)
-    return;
-
-  --E;
-  while (I != E && E->getTag() == ELF::DT_NULL)
-    --E;
-  if (E->getTag() != ELF::DT_NULL)
-    ++E;
-  ++E;
-
-  ptrdiff_t Total = std::distance(I, E);
-  if (Total == 0)
-    return;
-
-  raw_ostream &OS = W.getOStream();
-  W.startLine() << "DynamicSection [ (" << Total << " entries)\n";
-
-  bool Is64 = ELFT::Is64Bits;
-
-  W.startLine()
-     << "  Tag" << (Is64 ? "                " : "        ") << "Type"
-     << "                 " << "Name/Value\n";
-  while (I != E) {
-    const Elf_Dyn &Entry = *I;
-    uintX_t Tag = Entry.getTag();
-    ++I;
-    W.startLine() << "  " << format_hex(Tag, Is64 ? 18 : 10, opts::Output != opts::GNU) << " "
-                  << format("%-21s", getTypeString(ObjF->getELFFile()->getHeader()->e_machine, Tag));
-    printValue(Tag, Entry.getVal());
-    OS << "\n";
-  }
-
-  W.startLine() << "]\n";
+template <class ELFT> void ELFDumper<ELFT>::printDynamicTable() {
+  ELFDumperStyle->printDynamic(ObjF->getELFFile());
 }
 
-template<class ELFT>
-void ELFDumper<ELFT>::printNeededLibraries() {
+template <class ELFT> void ELFDumper<ELFT>::printNeededLibraries() {
   ListScope D(W, "NeededLibraries");
 
   using LibsTy = std::vector<StringRef>;
   LibsTy Libs;
 
   for (const auto &Entry : dynamic_table())
-    if (Entry.d_tag == ELF::DT_NEEDED)
-      Libs.push_back(getDynamicString(Entry.d_un.d_val));
+    if (Entry.d_tag == ELF::DT_NEEDED) {
+      uint64_t Value = Entry.d_un.d_val;
+      if (Value < DynamicStringTable.size())
+        Libs.push_back(StringRef(DynamicStringTable.data() + Value));
+      else
+        Libs.push_back("<Library name index out of range>");
+    }
 
-  std::stable_sort(Libs.begin(), Libs.end());
+  llvm::stable_sort(Libs);
 
   for (const auto &L : Libs)
-     W.startLine() << L << "\n";
+    W.startLine() << L << "\n";
 }
 
-
-template <typename ELFT>
-void ELFDumper<ELFT>::printHashTable() {
+template <typename ELFT> void ELFDumper<ELFT>::printHashTable() {
   DictScope D(W, "HashTable");
   if (!HashTable)
     return;
@@ -1974,8 +2029,7 @@ void ELFDumper<ELFT>::printHashTable() {
   W.printList("Chains", HashTable->chains());
 }
 
-template <typename ELFT>
-void ELFDumper<ELFT>::printGnuHashTable() {
+template <typename ELFT> void ELFDumper<ELFT>::printGnuHashTable() {
   DictScope D(W, "GnuHashTable");
   if (!GnuHashTable)
     return;
@@ -1996,8 +2050,7 @@ template <typename ELFT> void ELFDumper<ELFT>::printLoadName() {
   W.printString("LoadName", SOName);
 }
 
-template <class ELFT>
-void ELFDumper<ELFT>::printAttributes() {
+template <class ELFT> void ELFDumper<ELFT>::printAttributes() {
   W.startLine() << "Attributes not implemented.\n";
 }
 
@@ -2486,7 +2539,7 @@ template <class ELFT> void ELFDumper<ELFT>::printStackMap() const {
       unwrapOrError(Obj->getSectionContents(StackMapSection));
 
   prettyPrintStackMap(
-      W, StackMapV2Parser<ELFT::TargetEndianness>(StackMapContentsArray));
+      W, StackMapParser<ELFT::TargetEndianness>(StackMapContentsArray));
 }
 
 template <class ELFT> void ELFDumper<ELFT>::printGroupSections() {
@@ -2527,7 +2580,8 @@ static std::string getSectionHeaderTableIndexString(const ELFFile<ELFT> *Obj) {
   ArrayRef<typename ELFT::Shdr> Arr = unwrapOrError(Obj->sections());
   if (Arr.empty())
     return "65535 (corrupt: out of range)";
-  return to_string(ElfHeader->e_shstrndx) + " (" + to_string(Arr[0].sh_link) + ")";
+  return to_string(ElfHeader->e_shstrndx) + " (" + to_string(Arr[0].sh_link) +
+         ")";
 }
 
 template <class ELFT> void GNUStyle<ELFT>::printFileHeaders(const ELFO *Obj) {
@@ -2599,7 +2653,7 @@ struct GroupMember {
 
 struct GroupSection {
   StringRef Name;
-  StringRef Signature;
+  std::string Signature;
   uint64_t ShName;
   uint64_t Index;
   uint32_t Link;
@@ -2630,13 +2684,13 @@ std::vector<GroupSection> getGroups(const ELFFile<ELFT> *Obj) {
 
     StringRef Name = unwrapOrError(Obj->getSectionName(&Sec));
     StringRef Signature = StrTable.data() + Sym->st_name;
-    Ret.push_back({Name, 
-                   Signature, 
-                   Sec.sh_name, 
+    Ret.push_back({Name,
+                   maybeDemangle(Signature),
+                   Sec.sh_name,
                    I - 1,
                    Sec.sh_link,
                    Sec.sh_info,
-                   Data[0], 
+                   Data[0],
                    {}});
 
     std::vector<GroupMember> &GM = Ret.back().Members;
@@ -2691,53 +2745,57 @@ template <class ELFT> void GNUStyle<ELFT>::printGroupSections(const ELFO *Obj) {
 template <class ELFT>
 void GNUStyle<ELFT>::printRelocation(const ELFO *Obj, const Elf_Shdr *SymTab,
                                      const Elf_Rela &R, bool IsRela) {
-  std::string Offset, Info, Addend, Value;
-  SmallString<32> RelocName;
-  StringRef TargetName;
-  const Elf_Sym *Sym = nullptr;
-  unsigned Width = ELFT::Is64Bits ? 16 : 8;
-  unsigned Bias = ELFT::Is64Bits ? 8 : 0;
-
-  // First two fields are bit width dependent. The rest of them are after are
-  // fixed width.
-  Field Fields[5] = {0, 10 + Bias, 19 + 2 * Bias, 42 + 2 * Bias, 53 + 2 * Bias};
-  Obj->getRelocationTypeName(R.getType(Obj->isMips64EL()), RelocName);
-  Sym = unwrapOrError(Obj->getRelocationSymbol(&R, SymTab));
+  const Elf_Sym *Sym = unwrapOrError(Obj->getRelocationSymbol(&R, SymTab));
+  std::string TargetName;
   if (Sym && Sym->getType() == ELF::STT_SECTION) {
     const Elf_Shdr *Sec = unwrapOrError(
         Obj->getSection(Sym, SymTab, this->dumper()->getShndxTable()));
     TargetName = unwrapOrError(Obj->getSectionName(Sec));
   } else if (Sym) {
     StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*SymTab));
-    TargetName = unwrapOrError(Sym->getName(StrTable));
+    TargetName = this->dumper()->getFullSymbolName(
+        Sym, StrTable, SymTab->sh_type == SHT_DYNSYM /* IsDynamic */);
   }
+  printRelocation(Obj, Sym, TargetName, R, IsRela);
+}
 
-  if (Sym && IsRela) {
-    if (R.r_addend < 0)
-      Addend = " - ";
-    else
-      Addend = " + ";
-  }
+template <class ELFT>
+void GNUStyle<ELFT>::printRelocation(const ELFO *Obj, const Elf_Sym *Sym,
+                                     StringRef SymbolName, const Elf_Rela &R,
+                                     bool IsRela) {
+  // First two fields are bit width dependent. The rest of them are fixed width.
+  unsigned Bias = ELFT::Is64Bits ? 8 : 0;
+  Field Fields[5] = {0, 10 + Bias, 19 + 2 * Bias, 42 + 2 * Bias, 53 + 2 * Bias};
+  unsigned Width = ELFT::Is64Bits ? 16 : 8;
+
+  Fields[0].Str = to_string(format_hex_no_prefix(R.r_offset, Width));
+  Fields[1].Str = to_string(format_hex_no_prefix(R.r_info, Width));
 
-  Offset = to_string(format_hex_no_prefix(R.r_offset, Width));
-  Info = to_string(format_hex_no_prefix(R.r_info, Width));
+  SmallString<32> RelocName;
+  Obj->getRelocationTypeName(R.getType(Obj->isMips64EL()), RelocName);
+  Fields[2].Str = RelocName.c_str();
+
+  if (Sym && (!SymbolName.empty() || Sym->getValue() != 0))
+    Fields[3].Str = to_string(format_hex_no_prefix(Sym->getValue(), Width));
+
+  Fields[4].Str = SymbolName;
+  for (const Field &F : Fields)
+    printField(F);
+
+  std::string Addend;
+  if (IsRela) {
+    int64_t RelAddend = R.r_addend;
+    if (!SymbolName.empty()) {
+      if (R.r_addend < 0) {
+        Addend = " - ";
+        RelAddend = std::abs(RelAddend);
+      } else
+        Addend = " + ";
+    }
 
-  int64_t RelAddend = R.r_addend;
-  if (IsRela)
-    Addend += to_hexString(std::abs(RelAddend), false);
-
-  if (Sym)
-    Value = to_string(format_hex_no_prefix(Sym->getValue(), Width));
-
-  Fields[0].Str = Offset;
-  Fields[1].Str = Info;
-  Fields[2].Str = RelocName;
-  Fields[3].Str = Value;
-  Fields[4].Str = TargetName;
-  for (auto &field : Fields)
-    printField(field);
-  OS << Addend;
-  OS << "\n";
+    Addend += to_hexString(RelAddend, false);
+  }
+  OS << Addend << "\n";
 }
 
 template <class ELFT> void GNUStyle<ELFT>::printRelocHeader(unsigned SType) {
@@ -2764,10 +2822,8 @@ template <class ELFT> void GNUStyle<ELFT>::printRelocHeader(unsigned SType) {
 template <class ELFT> void GNUStyle<ELFT>::printRelocations(const ELFO *Obj) {
   bool HasRelocSections = false;
   for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
-    if (Sec.sh_type != ELF::SHT_REL &&
-        Sec.sh_type != ELF::SHT_RELA &&
-        Sec.sh_type != ELF::SHT_RELR &&
-        Sec.sh_type != ELF::SHT_ANDROID_REL &&
+    if (Sec.sh_type != ELF::SHT_REL && Sec.sh_type != ELF::SHT_RELA &&
+        Sec.sh_type != ELF::SHT_RELR && Sec.sh_type != ELF::SHT_ANDROID_REL &&
         Sec.sh_type != ELF::SHT_ANDROID_RELA &&
         Sec.sh_type != ELF::SHT_ANDROID_RELR)
       continue;
@@ -2832,7 +2888,21 @@ template <class ELFT> void GNUStyle<ELFT>::printRelocations(const ELFO *Obj) {
     OS << "\nThere are no relocations in this file.\n";
 }
 
-std::string getSectionTypeString(unsigned Arch, unsigned Type) {
+// Print the offset of a particular section from anyone of the ranges:
+// [SHT_LOOS, SHT_HIOS], [SHT_LOPROC, SHT_HIPROC], [SHT_LOUSER, SHT_HIUSER].
+// If 'Type' does not fall within any of those ranges, then a string is
+// returned as '<unknown>' followed by the type value.
+static std::string getSectionTypeOffsetString(unsigned Type) {
+  if (Type >= SHT_LOOS && Type <= SHT_HIOS)
+    return "LOOS+0x" + to_hexString(Type - SHT_LOOS);
+  else if (Type >= SHT_LOPROC && Type <= SHT_HIPROC)
+    return "LOPROC+0x" + to_hexString(Type - SHT_LOPROC);
+  else if (Type >= SHT_LOUSER && Type <= SHT_HIUSER)
+    return "LOUSER+0x" + to_hexString(Type - SHT_LOUSER);
+  return "0x" + to_hexString(Type) + ": <unknown>";
+}
+
+static std::string getSectionTypeString(unsigned Arch, unsigned Type) {
   using namespace ELF;
 
   switch (Arch) {
@@ -2863,10 +2933,10 @@ std::string getSectionTypeString(unsigned Arch, unsigned Type) {
       return "MIPS_REGINFO";
     case SHT_MIPS_OPTIONS:
       return "MIPS_OPTIONS";
+    case SHT_MIPS_DWARF:
+      return "MIPS_DWARF";
     case SHT_MIPS_ABIFLAGS:
       return "MIPS_ABIFLAGS";
-    case SHT_MIPS_DWARF:
-      return "SHT_MIPS_DWARF";
     }
     break;
   }
@@ -2905,6 +2975,10 @@ std::string getSectionTypeString(unsigned Arch, unsigned Type) {
     return "GROUP";
   case SHT_SYMTAB_SHNDX:
     return "SYMTAB SECTION INDICES";
+  case SHT_ANDROID_REL:
+    return "ANDROID_REL";
+  case SHT_ANDROID_RELA:
+    return "ANDROID_RELA";
   case SHT_RELR:
   case SHT_ANDROID_RELR:
     return "RELR";
@@ -2916,6 +2990,8 @@ std::string getSectionTypeString(unsigned Arch, unsigned Type) {
     return "LLVM_CALL_GRAPH_PROFILE";
   case SHT_LLVM_ADDRSIG:
     return "LLVM_ADDRSIG";
+  case SHT_LLVM_DEPENDENT_LIBRARIES:
+    return "LLVM_DEPENDENT_LIBRARIES";
   // FIXME: Parse processor specific GNU attributes
   case SHT_GNU_ATTRIBUTES:
     return "ATTRIBUTES";
@@ -2928,69 +3004,65 @@ std::string getSectionTypeString(unsigned Arch, unsigned Type) {
   case SHT_GNU_versym:
     return "VERSYM";
   default:
-    return "";
+    return getSectionTypeOffsetString(Type);
   }
   return "";
 }
 
 template <class ELFT>
-void GNUStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
-  size_t SectionIndex = 0;
-  std::string Number, Type, Size, Address, Offset, Flags, Link, Info, EntrySize,
-      Alignment;
-  unsigned Bias;
-  unsigned Width;
-
-  if (ELFT::Is64Bits) {
-    Bias = 0;
-    Width = 16;
-  } else {
-    Bias = 8;
-    Width = 8;
-  }
+static StringRef getSectionName(const typename ELFT::Shdr &Sec,
+                                const ELFObjectFile<ELFT> &ElfObj,
+                                ArrayRef<typename ELFT::Shdr> Sections) {
+  const ELFFile<ELFT> &Obj = *ElfObj.getELFFile();
+  uint32_t Index = Obj.getHeader()->e_shstrndx;
+  if (Index == ELF::SHN_XINDEX)
+    Index = Sections[0].sh_link;
+  if (!Index) // no section string table.
+    return "";
+  // TODO: Test a case when the sh_link of the section with index 0 is broken.
+  if (Index >= Sections.size())
+    reportError(ElfObj.getFileName(),
+                createError("section header string table index " +
+                            Twine(Index) + " does not exist"));
+  StringRef Data = toStringRef(unwrapOrError(
+      Obj.template getSectionContentsAsArray<uint8_t>(&Sections[Index])));
+  return unwrapOrError(Obj.getSectionName(&Sec, Data));
+}
 
+template <class ELFT>
+void GNUStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
+  unsigned Bias = ELFT::Is64Bits ? 0 : 8;
   ArrayRef<Elf_Shdr> Sections = unwrapOrError(Obj->sections());
   OS << "There are " << to_string(Sections.size())
      << " section headers, starting at offset "
      << "0x" << to_hexString(Obj->getHeader()->e_shoff, false) << ":\n\n";
   OS << "Section Headers:\n";
-  Field Fields[11] = {{"[Nr]", 2},
-                      {"Name", 7},
-                      {"Type", 25},
-                      {"Address", 41},
-                      {"Off", 58 - Bias},
-                      {"Size", 65 - Bias},
-                      {"ES", 72 - Bias},
-                      {"Flg", 75 - Bias},
-                      {"Lk", 79 - Bias},
-                      {"Inf", 82 - Bias},
-                      {"Al", 86 - Bias}};
-  for (auto &f : Fields)
-    printField(f);
+  Field Fields[11] = {
+      {"[Nr]", 2},        {"Name", 7},        {"Type", 25},
+      {"Address", 41},    {"Off", 58 - Bias}, {"Size", 65 - Bias},
+      {"ES", 72 - Bias},  {"Flg", 75 - Bias}, {"Lk", 79 - Bias},
+      {"Inf", 82 - Bias}, {"Al", 86 - Bias}};
+  for (auto &F : Fields)
+    printField(F);
   OS << "\n";
 
+  const ELFObjectFile<ELFT> *ElfObj = this->dumper()->getElfObject();
+  size_t SectionIndex = 0;
   for (const Elf_Shdr &Sec : Sections) {
-    Number = to_string(SectionIndex);
-    Fields[0].Str = Number;
-    Fields[1].Str = unwrapOrError(Obj->getSectionName(&Sec));
-    Type = getSectionTypeString(Obj->getHeader()->e_machine, Sec.sh_type);
-    Fields[2].Str = Type;
-    Address = to_string(format_hex_no_prefix(Sec.sh_addr, Width));
-    Fields[3].Str = Address;
-    Offset = to_string(format_hex_no_prefix(Sec.sh_offset, 6));
-    Fields[4].Str = Offset;
-    Size = to_string(format_hex_no_prefix(Sec.sh_size, 6));
-    Fields[5].Str = Size;
-    EntrySize = to_string(format_hex_no_prefix(Sec.sh_entsize, 2));
-    Fields[6].Str = EntrySize;
-    Flags = getGNUFlags(Sec.sh_flags);
-    Fields[7].Str = Flags;
-    Link = to_string(Sec.sh_link);
-    Fields[8].Str = Link;
-    Info = to_string(Sec.sh_info);
-    Fields[9].Str = Info;
-    Alignment = to_string(Sec.sh_addralign);
-    Fields[10].Str = Alignment;
+    Fields[0].Str = to_string(SectionIndex);
+    Fields[1].Str = getSectionName(Sec, *ElfObj, Sections);
+    Fields[2].Str =
+        getSectionTypeString(Obj->getHeader()->e_machine, Sec.sh_type);
+    Fields[3].Str =
+        to_string(format_hex_no_prefix(Sec.sh_addr, ELFT::Is64Bits ? 16 : 8));
+    Fields[4].Str = to_string(format_hex_no_prefix(Sec.sh_offset, 6));
+    Fields[5].Str = to_string(format_hex_no_prefix(Sec.sh_size, 6));
+    Fields[6].Str = to_string(format_hex_no_prefix(Sec.sh_entsize, 2));
+    Fields[7].Str = getGNUFlags(Sec.sh_flags);
+    Fields[8].Str = to_string(Sec.sh_link);
+    Fields[9].Str = to_string(Sec.sh_info);
+    Fields[10].Str = to_string(Sec.sh_addralign);
+
     OS.PadToColumn(Fields[0].Column);
     OS << "[" << right_justify(Fields[0].Str, 2) << "]";
     for (int i = 1; i < 7; i++)
@@ -3043,9 +3115,10 @@ std::string GNUStyle<ELFT>::getSymbolSectionNdx(const ELFO *Obj,
   case ELF::SHN_COMMON:
     return "COM";
   case ELF::SHN_XINDEX:
-    SectionIndex = unwrapOrError(object::getExtendedSymbolTableIndex<ELFT>(
-        Symbol, FirstSym, this->dumper()->getShndxTable()));
-    LLVM_FALLTHROUGH;
+    return to_string(
+        format_decimal(unwrapOrError(object::getExtendedSymbolTableIndex<ELFT>(
+                           Symbol, FirstSym, this->dumper()->getShndxTable())),
+                       3));
   default:
     // Find if:
     // Processor specific
@@ -3072,7 +3145,6 @@ void GNUStyle<ELFT>::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol,
                                  bool IsDynamic) {
   static int Idx = 0;
   static bool Dynamic = true;
-  size_t Width;
 
   // If this function was called with a different value from IsDynamic
   // from last call, happens when we move from dynamic to static symbol
@@ -3081,111 +3153,87 @@ void GNUStyle<ELFT>::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol,
     Idx = 0;
     Dynamic = false;
   }
-  std::string Num, Name, Value, Size, Binding, Type, Visibility, Section;
-  unsigned Bias = 0;
-  if (ELFT::Is64Bits) {
-    Bias = 8;
-    Width = 16;
-  } else {
-    Bias = 0;
-    Width = 8;
-  }
+
+  unsigned Bias = ELFT::Is64Bits ? 8 : 0;
   Field Fields[8] = {0,         8,         17 + Bias, 23 + Bias,
                      31 + Bias, 38 + Bias, 47 + Bias, 51 + Bias};
-  Num = to_string(format_decimal(Idx++, 6)) + ":";
-  Value = to_string(format_hex_no_prefix(Symbol->st_value, Width));
-  Size = to_string(format_decimal(Symbol->st_size, 5));
+  Fields[0].Str = to_string(format_decimal(Idx++, 6)) + ":";
+  Fields[1].Str = to_string(
+      format_hex_no_prefix(Symbol->st_value, ELFT::Is64Bits ? 16 : 8));
+  Fields[2].Str = to_string(format_decimal(Symbol->st_size, 5));
+
   unsigned char SymbolType = Symbol->getType();
   if (Obj->getHeader()->e_machine == ELF::EM_AMDGPU &&
       SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS)
-    Type = printEnum(SymbolType, makeArrayRef(AMDGPUSymbolTypes));
+    Fields[3].Str = printEnum(SymbolType, makeArrayRef(AMDGPUSymbolTypes));
   else
-    Type = printEnum(SymbolType, makeArrayRef(ElfSymbolTypes));
-  unsigned Vis = Symbol->getVisibility();
-  Binding = printEnum(Symbol->getBinding(), makeArrayRef(ElfSymbolBindings));
-  Visibility = printEnum(Vis, makeArrayRef(ElfSymbolVisibilities));
-  Section = getSymbolSectionNdx(Obj, Symbol, FirstSym);
-  Name = this->dumper()->getFullSymbolName(Symbol, StrTable, IsDynamic);
-  Fields[0].Str = Num;
-  Fields[1].Str = Value;
-  Fields[2].Str = Size;
-  Fields[3].Str = Type;
-  Fields[4].Str = Binding;
-  Fields[5].Str = Visibility;
-  Fields[6].Str = Section;
-  Fields[7].Str = Name;
+    Fields[3].Str = printEnum(SymbolType, makeArrayRef(ElfSymbolTypes));
+
+  Fields[4].Str =
+      printEnum(Symbol->getBinding(), makeArrayRef(ElfSymbolBindings));
+  Fields[5].Str =
+      printEnum(Symbol->getVisibility(), makeArrayRef(ElfSymbolVisibilities));
+  Fields[6].Str = getSymbolSectionNdx(Obj, Symbol, FirstSym);
+  Fields[7].Str =
+      this->dumper()->getFullSymbolName(Symbol, StrTable, IsDynamic);
   for (auto &Entry : Fields)
     printField(Entry);
   OS << "\n";
 }
+
 template <class ELFT>
 void GNUStyle<ELFT>::printHashedSymbol(const ELFO *Obj, const Elf_Sym *FirstSym,
                                        uint32_t Sym, StringRef StrTable,
                                        uint32_t Bucket) {
-  std::string Num, Buc, Name, Value, Size, Binding, Type, Visibility, Section;
-  unsigned Width, Bias = 0;
-  if (ELFT::Is64Bits) {
-    Bias = 8;
-    Width = 16;
-  } else {
-    Bias = 0;
-    Width = 8;
-  }
+  unsigned Bias = ELFT::Is64Bits ? 8 : 0;
   Field Fields[9] = {0,         6,         11,        20 + Bias, 25 + Bias,
                      34 + Bias, 41 + Bias, 49 + Bias, 53 + Bias};
-  Num = to_string(format_decimal(Sym, 5));
-  Buc = to_string(format_decimal(Bucket, 3)) + ":";
+  Fields[0].Str = to_string(format_decimal(Sym, 5));
+  Fields[1].Str = to_string(format_decimal(Bucket, 3)) + ":";
 
   const auto Symbol = FirstSym + Sym;
-  Value = to_string(format_hex_no_prefix(Symbol->st_value, Width));
-  Size = to_string(format_decimal(Symbol->st_size, 5));
+  Fields[2].Str = to_string(
+      format_hex_no_prefix(Symbol->st_value, ELFT::Is64Bits ? 18 : 8));
+  Fields[3].Str = to_string(format_decimal(Symbol->st_size, 5));
+
   unsigned char SymbolType = Symbol->getType();
   if (Obj->getHeader()->e_machine == ELF::EM_AMDGPU &&
       SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS)
-    Type = printEnum(SymbolType, makeArrayRef(AMDGPUSymbolTypes));
+    Fields[4].Str = printEnum(SymbolType, makeArrayRef(AMDGPUSymbolTypes));
   else
-    Type = printEnum(SymbolType, makeArrayRef(ElfSymbolTypes));
-  unsigned Vis = Symbol->getVisibility();
-  Binding = printEnum(Symbol->getBinding(), makeArrayRef(ElfSymbolBindings));
-  Visibility = printEnum(Vis, makeArrayRef(ElfSymbolVisibilities));
-  Section = getSymbolSectionNdx(Obj, Symbol, FirstSym);
-  Name = this->dumper()->getFullSymbolName(Symbol, StrTable, true);
-  Fields[0].Str = Num;
-  Fields[1].Str = Buc;
-  Fields[2].Str = Value;
-  Fields[3].Str = Size;
-  Fields[4].Str = Type;
-  Fields[5].Str = Binding;
-  Fields[6].Str = Visibility;
-  Fields[7].Str = Section;
-  Fields[8].Str = Name;
+    Fields[4].Str = printEnum(SymbolType, makeArrayRef(ElfSymbolTypes));
+
+  Fields[5].Str =
+      printEnum(Symbol->getBinding(), makeArrayRef(ElfSymbolBindings));
+  Fields[6].Str =
+      printEnum(Symbol->getVisibility(), makeArrayRef(ElfSymbolVisibilities));
+  Fields[7].Str = getSymbolSectionNdx(Obj, Symbol, FirstSym);
+  Fields[8].Str = this->dumper()->getFullSymbolName(Symbol, StrTable, true);
+
   for (auto &Entry : Fields)
     printField(Entry);
   OS << "\n";
 }
 
-template <class ELFT> void GNUStyle<ELFT>::printSymbols(const ELFO *Obj) {
-  if (opts::DynamicSymbols)
+template <class ELFT>
+void GNUStyle<ELFT>::printSymbols(const ELFO *Obj, bool PrintSymbols,
+                                  bool PrintDynamicSymbols) {
+  if (!PrintSymbols && !PrintDynamicSymbols)
     return;
+  // GNU readelf prints both the .dynsym and .symtab with --symbols.
   this->dumper()->printSymbolsHelper(true);
-  this->dumper()->printSymbolsHelper(false);
+  if (PrintSymbols)
+    this->dumper()->printSymbolsHelper(false);
 }
 
-template <class ELFT>
-void GNUStyle<ELFT>::printDynamicSymbols(const ELFO *Obj) {
+template <class ELFT> void GNUStyle<ELFT>::printHashSymbols(const ELFO *Obj) {
   if (this->dumper()->getDynamicStringTable().empty())
     return;
   auto StringTable = this->dumper()->getDynamicStringTable();
   auto DynSyms = this->dumper()->dynamic_symbols();
-  auto GnuHash = this->dumper()->getGnuHashTable();
-  auto SysVHash = this->dumper()->getHashTable();
-
-  // If no hash or .gnu.hash found, try using symbol table
-  if (GnuHash == nullptr && SysVHash == nullptr)
-    this->dumper()->printSymbolsHelper(true);
 
   // Try printing .hash
-  if (this->dumper()->getHashTable()) {
+  if (auto SysVHash = this->dumper()->getHashTable()) {
     OS << "\n Symbol table of .hash for image:\n";
     if (ELFT::Is64Bits)
       OS << "  Num Buc:    Value          Size   Type   Bind Vis      Ndx Name";
@@ -3193,14 +3241,12 @@ void GNUStyle<ELFT>::printDynamicSymbols(const ELFO *Obj) {
       OS << "  Num Buc:    Value  Size   Type   Bind Vis      Ndx Name";
     OS << "\n";
 
-    uint32_t NBuckets = SysVHash->nbucket;
-    uint32_t NChains = SysVHash->nchain;
     auto Buckets = SysVHash->buckets();
     auto Chains = SysVHash->chains();
-    for (uint32_t Buc = 0; Buc < NBuckets; Buc++) {
+    for (uint32_t Buc = 0; Buc < SysVHash->nbucket; Buc++) {
       if (Buckets[Buc] == ELF::STN_UNDEF)
         continue;
-      for (uint32_t Ch = Buckets[Buc]; Ch < NChains; Ch = Chains[Ch]) {
+      for (uint32_t Ch = Buckets[Buc]; Ch < SysVHash->nchain; Ch = Chains[Ch]) {
         if (Ch == ELF::STN_UNDEF)
           break;
         printHashedSymbol(Obj, &DynSyms[0], Ch, StringTable, Buc);
@@ -3209,16 +3255,15 @@ void GNUStyle<ELFT>::printDynamicSymbols(const ELFO *Obj) {
   }
 
   // Try printing .gnu.hash
-  if (GnuHash) {
+  if (auto GnuHash = this->dumper()->getGnuHashTable()) {
     OS << "\n Symbol table of .gnu.hash for image:\n";
     if (ELFT::Is64Bits)
       OS << "  Num Buc:    Value          Size   Type   Bind Vis      Ndx Name";
     else
       OS << "  Num Buc:    Value  Size   Type   Bind Vis      Ndx Name";
     OS << "\n";
-    uint32_t NBuckets = GnuHash->nbuckets;
     auto Buckets = GnuHash->buckets();
-    for (uint32_t Buc = 0; Buc < NBuckets; Buc++) {
+    for (uint32_t Buc = 0; Buc < GnuHash->nbuckets; Buc++) {
       if (Buckets[Buc] == ELF::STN_UNDEF)
         continue;
       uint32_t Index = Buckets[Buc];
@@ -3266,8 +3311,8 @@ bool GNUStyle<ELFT>::checkoffsets(const Elf_Phdr &Phdr, const Elf_Shdr &Sec) {
       (IsSpecial && Phdr.p_type != ELF::PT_TLS) ? 0 : Sec.sh_size;
   if (Sec.sh_offset >= Phdr.p_offset)
     return ((Sec.sh_offset + SectionSize <= Phdr.p_filesz + Phdr.p_offset)
-            /*only non-zero sized sections at end*/ &&
-            (Sec.sh_offset + 1 <= Phdr.p_offset + Phdr.p_filesz));
+            /*only non-zero sized sections at end*/
+            && (Sec.sh_offset + 1 <= Phdr.p_offset + Phdr.p_filesz));
   return false;
 }
 
@@ -3301,13 +3346,22 @@ bool GNUStyle<ELFT>::checkPTDynamic(const Elf_Phdr &Phdr, const Elf_Shdr &Sec) {
           (Sec.sh_addr > Phdr.p_vaddr && Sec.sh_addr < Phdr.p_memsz));
 }
 
+template <class ELFT>
+void GNUStyle<ELFT>::printProgramHeaders(
+    const ELFO *Obj, bool PrintProgramHeaders,
+    cl::boolOrDefault PrintSectionMapping) {
+  if (PrintProgramHeaders)
+    printProgramHeaders(Obj);
+
+  // Display the section mapping along with the program headers, unless
+  // -section-mapping is explicitly set to false.
+  if (PrintSectionMapping != cl::BOU_FALSE)
+    printSectionMapping(Obj);
+}
+
 template <class ELFT>
 void GNUStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
   unsigned Bias = ELFT::Is64Bits ? 8 : 0;
-  unsigned Width = ELFT::Is64Bits ? 18 : 10;
-  unsigned SizeWidth = ELFT::Is64Bits ? 8 : 7;
-  std::string Type, Offset, VMA, LMA, FileSz, MemSz, Flag, Align;
-
   const Elf_Ehdr *Header = Obj->getHeader();
   Field Fields[8] = {2,         17,        26,        37 + Bias,
                      48 + Bias, 56 + Bias, 64 + Bias, 68 + Bias};
@@ -3323,23 +3377,18 @@ void GNUStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
   else
     OS << "  Type           Offset   VirtAddr   PhysAddr   FileSiz "
        << "MemSiz  Flg Align\n";
+
+  unsigned Width = ELFT::Is64Bits ? 18 : 10;
+  unsigned SizeWidth = ELFT::Is64Bits ? 8 : 7;
   for (const auto &Phdr : unwrapOrError(Obj->program_headers())) {
-    Type = getElfPtType(Header->e_machine, Phdr.p_type);
-    Offset = to_string(format_hex(Phdr.p_offset, 8));
-    VMA = to_string(format_hex(Phdr.p_vaddr, Width));
-    LMA = to_string(format_hex(Phdr.p_paddr, Width));
-    FileSz = to_string(format_hex(Phdr.p_filesz, SizeWidth));
-    MemSz = to_string(format_hex(Phdr.p_memsz, SizeWidth));
-    Flag = printPhdrFlags(Phdr.p_flags);
-    Align = to_string(format_hex(Phdr.p_align, 1));
-    Fields[0].Str = Type;
-    Fields[1].Str = Offset;
-    Fields[2].Str = VMA;
-    Fields[3].Str = LMA;
-    Fields[4].Str = FileSz;
-    Fields[5].Str = MemSz;
-    Fields[6].Str = Flag;
-    Fields[7].Str = Align;
+    Fields[0].Str = getElfPtType(Header->e_machine, Phdr.p_type);
+    Fields[1].Str = to_string(format_hex(Phdr.p_offset, 8));
+    Fields[2].Str = to_string(format_hex(Phdr.p_vaddr, Width));
+    Fields[3].Str = to_string(format_hex(Phdr.p_paddr, Width));
+    Fields[4].Str = to_string(format_hex(Phdr.p_filesz, SizeWidth));
+    Fields[5].Str = to_string(format_hex(Phdr.p_memsz, SizeWidth));
+    Fields[6].Str = printPhdrFlags(Phdr.p_flags);
+    Fields[7].Str = to_string(format_hex(Phdr.p_align, 1));
     for (auto Field : Fields)
       printField(Field);
     if (Phdr.p_type == ELF::PT_INTERP) {
@@ -3348,7 +3397,12 @@ void GNUStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
     }
     OS << "\n";
   }
+}
+
+template <class ELFT>
+void GNUStyle<ELFT>::printSectionMapping(const ELFO *Obj) {
   OS << "\n Section to Segment mapping:\n  Segment Sections...\n";
+  DenseSet<const Elf_Shdr *> BelongsToSegment;
   int Phnum = 0;
   for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) {
     std::string Sections;
@@ -3363,58 +3417,66 @@ void GNUStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
                           Phdr.p_type != ELF::PT_TLS;
       if (!TbssInNonTLS && checkTLSSections(Phdr, Sec) &&
           checkoffsets(Phdr, Sec) && checkVMA(Phdr, Sec) &&
-          checkPTDynamic(Phdr, Sec) && (Sec.sh_type != ELF::SHT_NULL))
+          checkPTDynamic(Phdr, Sec) && (Sec.sh_type != ELF::SHT_NULL)) {
         Sections += unwrapOrError(Obj->getSectionName(&Sec)).str() + " ";
+        BelongsToSegment.insert(&Sec);
+      }
     }
     OS << Sections << "\n";
     OS.flush();
   }
+
+  // Display sections that do not belong to a segment.
+  std::string Sections;
+  for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
+    if (BelongsToSegment.find(&Sec) == BelongsToSegment.end())
+      Sections += unwrapOrError(Obj->getSectionName(&Sec)).str() + ' ';
+  }
+  if (!Sections.empty()) {
+    OS << "   None  " << Sections << '\n';
+    OS.flush();
+  }
 }
 
 template <class ELFT>
 void GNUStyle<ELFT>::printDynamicRelocation(const ELFO *Obj, Elf_Rela R,
                                             bool IsRela) {
-  SmallString<32> RelocName;
-  StringRef SymbolName;
-  unsigned Width = ELFT::Is64Bits ? 16 : 8;
-  unsigned Bias = ELFT::Is64Bits ? 8 : 0;
-  // First two fields are bit width dependent. The rest of them are after are
-  // fixed width.
-  Field Fields[5] = {0, 10 + Bias, 19 + 2 * Bias, 42 + 2 * Bias, 53 + 2 * Bias};
-
   uint32_t SymIndex = R.getSymbol(Obj->isMips64EL());
   const Elf_Sym *Sym = this->dumper()->dynamic_symbols().begin() + SymIndex;
-  Obj->getRelocationTypeName(R.getType(Obj->isMips64EL()), RelocName);
-  SymbolName =
-      unwrapOrError(Sym->getName(this->dumper()->getDynamicStringTable()));
-  std::string Addend, Info, Offset, Value;
-  Offset = to_string(format_hex_no_prefix(R.r_offset, Width));
-  Info = to_string(format_hex_no_prefix(R.r_info, Width));
-  Value = to_string(format_hex_no_prefix(Sym->getValue(), Width));
-  int64_t RelAddend = R.r_addend;
-  if (!SymbolName.empty() && IsRela) {
-    if (R.r_addend < 0)
-      Addend = " - ";
-    else
-      Addend = " + ";
-  }
+  std::string SymbolName = maybeDemangle(
+      unwrapOrError(Sym->getName(this->dumper()->getDynamicStringTable())));
+  printRelocation(Obj, Sym, SymbolName, R, IsRela);
+}
 
-  if (SymbolName.empty() && Sym->getValue() == 0)
-    Value = "";
+template <class ELFT> void GNUStyle<ELFT>::printDynamic(const ELFO *Obj) {
+  Elf_Dyn_Range Table = this->dumper()->dynamic_table();
+  if (Table.empty())
+    return;
 
-  if (IsRela)
-    Addend += to_string(format_hex_no_prefix(std::abs(RelAddend), 1));
+  const DynRegionInfo &DynamicTableRegion =
+      this->dumper()->getDynamicTableRegion();
 
+  OS << "Dynamic section at offset "
+     << format_hex(reinterpret_cast<const uint8_t *>(DynamicTableRegion.Addr) -
+                       Obj->base(),
+                   1)
+     << " contains " << Table.size() << " entries:\n";
 
-  Fields[0].Str = Offset;
-  Fields[1].Str = Info;
-  Fields[2].Str = RelocName.c_str();
-  Fields[3].Str = Value;
-  Fields[4].Str = SymbolName;
-  for (auto &Field : Fields)
-    printField(Field);
-  OS << Addend;
-  OS << "\n";
+  bool Is64 = ELFT::Is64Bits;
+  if (Is64)
+    OS << "  Tag                Type                 Name/Value\n";
+  else
+    OS << "  Tag        Type                 Name/Value\n";
+  for (auto Entry : Table) {
+    uintX_t Tag = Entry.getTag();
+    std::string TypeString = std::string("(") +
+                             getTypeString(Obj->getHeader()->e_machine, Tag) +
+                             ")";
+    OS << "  " << format_hex(Tag, Is64 ? 18 : 10)
+       << format(" %-20s ", TypeString.c_str());
+    this->dumper()->printDynamicEntry(OS, Tag, Entry.getVal());
+    OS << "\n";
+  }
 }
 
 template <class ELFT>
@@ -3427,7 +3489,8 @@ void GNUStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
     OS << "\n'RELA' relocation section at offset "
        << format_hex(reinterpret_cast<const uint8_t *>(DynRelaRegion.Addr) -
                          Obj->base(),
-                     1) << " contains " << DynRelaRegion.Size << " bytes:\n";
+                     1)
+       << " contains " << DynRelaRegion.Size << " bytes:\n";
     printRelocHeader(ELF::SHT_RELA);
     for (const Elf_Rela &Rela : this->dumper()->dyn_relas())
       printDynamicRelocation(Obj, Rela, true);
@@ -3436,7 +3499,8 @@ void GNUStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
     OS << "\n'REL' relocation section at offset "
        << format_hex(reinterpret_cast<const uint8_t *>(DynRelRegion.Addr) -
                          Obj->base(),
-                     1) << " contains " << DynRelRegion.Size << " bytes:\n";
+                     1)
+       << " contains " << DynRelRegion.Size << " bytes:\n";
     printRelocHeader(ELF::SHT_REL);
     for (const Elf_Rel &Rel : this->dumper()->dyn_rels()) {
       Elf_Rela Rela;
@@ -3450,7 +3514,8 @@ void GNUStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
     OS << "\n'RELR' relocation section at offset "
        << format_hex(reinterpret_cast<const uint8_t *>(DynRelrRegion.Addr) -
                          Obj->base(),
-                     1) << " contains " << DynRelrRegion.Size << " bytes:\n";
+                     1)
+       << " contains " << DynRelrRegion.Size << " bytes:\n";
     printRelocHeader(ELF::SHT_REL);
     Elf_Relr_Range Relrs = this->dumper()->dyn_relrs();
     std::vector<Elf_Rela> RelrRelas = unwrapOrError(Obj->decode_relrs(Relrs));
@@ -3462,7 +3527,8 @@ void GNUStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
     OS << "\n'PLT' relocation section at offset "
        << format_hex(reinterpret_cast<const uint8_t *>(DynPLTRelRegion.Addr) -
                          Obj->base(),
-                     1) << " contains " << DynPLTRelRegion.Size << " bytes:\n";
+                     1)
+       << " contains " << DynPLTRelRegion.Size << " bytes:\n";
   }
   if (DynPLTRelRegion.EntSize == sizeof(Elf_Rela)) {
     printRelocHeader(ELF::SHT_RELA);
@@ -3480,18 +3546,189 @@ void GNUStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
   }
 }
 
+template <class ELFT>
+static void printGNUVersionSectionProlog(formatted_raw_ostream &OS,
+                                         const Twine &Name, unsigned EntriesNum,
+                                         const ELFFile<ELFT> *Obj,
+                                         const typename ELFT::Shdr *Sec) {
+  StringRef SecName = unwrapOrError(Obj->getSectionName(Sec));
+  OS << Name << " section '" << SecName << "' "
+     << "contains " << EntriesNum << " entries:\n";
+
+  const typename ELFT::Shdr *SymTab =
+      unwrapOrError(Obj->getSection(Sec->sh_link));
+  StringRef SymTabName = unwrapOrError(Obj->getSectionName(SymTab));
+  OS << " Addr: " << format_hex_no_prefix(Sec->sh_addr, 16)
+     << "  Offset: " << format_hex(Sec->sh_offset, 8)
+     << "  Link: " << Sec->sh_link << " (" << SymTabName << ")\n";
+}
+
+template <class ELFT>
+void GNUStyle<ELFT>::printVersionSymbolSection(const ELFFile<ELFT> *Obj,
+                                               const Elf_Shdr *Sec) {
+  if (!Sec)
+    return;
+
+  unsigned Entries = Sec->sh_size / sizeof(Elf_Versym);
+  printGNUVersionSectionProlog(OS, "Version symbols", Entries, Obj, Sec);
+
+  const uint8_t *VersymBuf =
+      reinterpret_cast<const uint8_t *>(Obj->base() + Sec->sh_offset);
+  const ELFDumper<ELFT> *Dumper = this->dumper();
+  StringRef StrTable = Dumper->getDynamicStringTable();
+
+  // readelf prints 4 entries per line.
+  for (uint64_t VersymRow = 0; VersymRow < Entries; VersymRow += 4) {
+    OS << "  " << format_hex_no_prefix(VersymRow, 3) << ":";
+
+    for (uint64_t VersymIndex = 0;
+         (VersymIndex < 4) && (VersymIndex + VersymRow) < Entries;
+         ++VersymIndex) {
+      const Elf_Versym *Versym =
+          reinterpret_cast<const Elf_Versym *>(VersymBuf);
+      switch (Versym->vs_index) {
+      case 0:
+        OS << "   0 (*local*)    ";
+        break;
+      case 1:
+        OS << "   1 (*global*)   ";
+        break;
+      default:
+        OS << format("%4x%c", Versym->vs_index & VERSYM_VERSION,
+                     Versym->vs_index & VERSYM_HIDDEN ? 'h' : ' ');
+
+        bool IsDefault = true;
+        std::string VersionName = Dumper->getSymbolVersionByIndex(
+            StrTable, Versym->vs_index, IsDefault);
+
+        if (!VersionName.empty())
+          VersionName = "(" + VersionName + ")";
+        else
+          VersionName = "(*invalid*)";
+        OS << left_justify(VersionName, 13);
+      }
+      VersymBuf += sizeof(Elf_Versym);
+    }
+    OS << '\n';
+  }
+  OS << '\n';
+}
+
+static std::string versionFlagToString(unsigned Flags) {
+  if (Flags == 0)
+    return "none";
+
+  std::string Ret;
+  auto AddFlag = [&Ret, &Flags](unsigned Flag, StringRef Name) {
+    if (!(Flags & Flag))
+      return;
+    if (!Ret.empty())
+      Ret += " | ";
+    Ret += Name;
+    Flags &= ~Flag;
+  };
+
+  AddFlag(VER_FLG_BASE, "BASE");
+  AddFlag(VER_FLG_WEAK, "WEAK");
+  AddFlag(VER_FLG_INFO, "INFO");
+  AddFlag(~0, "<unknown>");
+  return Ret;
+}
+
+template <class ELFT>
+void GNUStyle<ELFT>::printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
+                                                   const Elf_Shdr *Sec) {
+  if (!Sec)
+    return;
+
+  unsigned VerDefsNum = Sec->sh_info;
+  printGNUVersionSectionProlog(OS, "Version definition", VerDefsNum, Obj, Sec);
+
+  const Elf_Shdr *StrTabSec = unwrapOrError(Obj->getSection(Sec->sh_link));
+  StringRef StringTable(
+      reinterpret_cast<const char *>(Obj->base() + StrTabSec->sh_offset),
+      (size_t)StrTabSec->sh_size);
+
+  const uint8_t *VerdefBuf = unwrapOrError(Obj->getSectionContents(Sec)).data();
+  const uint8_t *Begin = VerdefBuf;
+
+  while (VerDefsNum--) {
+    const Elf_Verdef *Verdef = reinterpret_cast<const Elf_Verdef *>(VerdefBuf);
+    OS << format("  0x%04x: Rev: %u  Flags: %s  Index: %u  Cnt: %u",
+                 VerdefBuf - Begin, (unsigned)Verdef->vd_version,
+                 versionFlagToString(Verdef->vd_flags).c_str(),
+                 (unsigned)Verdef->vd_ndx, (unsigned)Verdef->vd_cnt);
+
+    const uint8_t *VerdauxBuf = VerdefBuf + Verdef->vd_aux;
+    const Elf_Verdaux *Verdaux =
+        reinterpret_cast<const Elf_Verdaux *>(VerdauxBuf);
+    OS << format("  Name: %s\n",
+                 StringTable.drop_front(Verdaux->vda_name).data());
+
+    for (unsigned I = 1; I < Verdef->vd_cnt; ++I) {
+      VerdauxBuf += Verdaux->vda_next;
+      Verdaux = reinterpret_cast<const Elf_Verdaux *>(VerdauxBuf);
+      OS << format("  0x%04x: Parent %u: %s\n", VerdauxBuf - Begin, I,
+                   StringTable.drop_front(Verdaux->vda_name).data());
+    }
+
+    VerdefBuf += Verdef->vd_next;
+  }
+  OS << '\n';
+}
+
+template <class ELFT>
+void GNUStyle<ELFT>::printVersionDependencySection(const ELFFile<ELFT> *Obj,
+                                                   const Elf_Shdr *Sec) {
+  if (!Sec)
+    return;
+
+  unsigned VerneedNum = Sec->sh_info;
+  printGNUVersionSectionProlog(OS, "Version needs", VerneedNum, Obj, Sec);
+
+  ArrayRef<uint8_t> SecData = unwrapOrError(Obj->getSectionContents(Sec));
+
+  const Elf_Shdr *StrTabSec = unwrapOrError(Obj->getSection(Sec->sh_link));
+  StringRef StringTable = {
+      reinterpret_cast<const char *>(Obj->base() + StrTabSec->sh_offset),
+      (size_t)StrTabSec->sh_size};
+
+  const uint8_t *VerneedBuf = SecData.data();
+  for (unsigned I = 0; I < VerneedNum; ++I) {
+    const Elf_Verneed *Verneed =
+        reinterpret_cast<const Elf_Verneed *>(VerneedBuf);
+
+    OS << format("  0x%04x: Version: %u  File: %s  Cnt: %u\n",
+                 reinterpret_cast<const uint8_t *>(Verneed) - SecData.begin(),
+                 (unsigned)Verneed->vn_version,
+                 StringTable.drop_front(Verneed->vn_file).data(),
+                 (unsigned)Verneed->vn_cnt);
+
+    const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux;
+    for (unsigned J = 0; J < Verneed->vn_cnt; ++J) {
+      const Elf_Vernaux *Vernaux =
+          reinterpret_cast<const Elf_Vernaux *>(VernauxBuf);
+
+      OS << format("  0x%04x:   Name: %s  Flags: %s  Version: %u\n",
+                   reinterpret_cast<const uint8_t *>(Vernaux) - SecData.begin(),
+                   StringTable.drop_front(Vernaux->vna_name).data(),
+                   versionFlagToString(Vernaux->vna_flags).c_str(),
+                   (unsigned)Vernaux->vna_other);
+      VernauxBuf += Vernaux->vna_next;
+    }
+    VerneedBuf += Verneed->vn_next;
+  }
+  OS << '\n';
+}
+
 // Hash histogram shows  statistics of how efficient the hash was for the
 // dynamic symbol table. The table shows number of hash buckets for different
 // lengths of chains as absolute number and percentage of the total buckets.
 // Additionally cumulative coverage of symbols for each set of buckets.
 template <class ELFT>
 void GNUStyle<ELFT>::printHashHistogram(const ELFFile<ELFT> *Obj) {
-
-  const Elf_Hash *HashTable = this->dumper()->getHashTable();
-  const Elf_GnuHash *GnuHashTable = this->dumper()->getGnuHashTable();
-
   // Print histogram for .hash section
-  if (HashTable) {
+  if (const Elf_Hash *HashTable = this->dumper()->getHashTable()) {
     size_t NBucket = HashTable->nbucket;
     size_t NChain = HashTable->nchain;
     ArrayRef<Elf_Word> Buckets = HashTable->buckets();
@@ -3535,7 +3772,7 @@ void GNUStyle<ELFT>::printHashHistogram(const ELFFile<ELFT> *Obj) {
   }
 
   // Print histogram for .gnu.hash section
-  if (GnuHashTable) {
+  if (const Elf_GnuHash *GnuHashTable = this->dumper()->getGnuHashTable()) {
     size_t NBucket = GnuHashTable->nbuckets;
     ArrayRef<Elf_Word> Buckets = GnuHashTable->buckets();
     unsigned NumSyms = this->dumper()->dynamic_symbols().size();
@@ -3595,6 +3832,24 @@ void GNUStyle<ELFT>::printAddrsig(const ELFFile<ELFT> *Obj) {
     OS << "GNUStyle::printAddrsig not implemented\n";
 }
 
+static StringRef getGenericNoteTypeName(const uint32_t NT) {
+  static const struct {
+    uint32_t ID;
+    const char *Name;
+  } Notes[] = {
+      {ELF::NT_VERSION, "NT_VERSION (version)"},
+      {ELF::NT_ARCH, "NT_ARCH (architecture)"},
+      {ELF::NT_GNU_BUILD_ATTRIBUTE_OPEN, "OPEN"},
+      {ELF::NT_GNU_BUILD_ATTRIBUTE_FUNC, "func"},
+  };
+
+  for (const auto &Note : Notes)
+    if (Note.ID == NT)
+      return Note.Name;
+
+  return "";
+}
+
 static std::string getGNUNoteTypeName(const uint32_t NT) {
   static const struct {
     uint32_t ID;
@@ -3649,14 +3904,11 @@ static std::string getAMDNoteTypeName(const uint32_t NT) {
   static const struct {
     uint32_t ID;
     const char *Name;
-  } Notes[] = {
-    {ELF::NT_AMD_AMDGPU_HSA_METADATA,
-     "NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)"},
-    {ELF::NT_AMD_AMDGPU_ISA,
-     "NT_AMD_AMDGPU_ISA (ISA Version)"},
-    {ELF::NT_AMD_AMDGPU_PAL_METADATA,
-     "NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)"}
-  };
+  } Notes[] = {{ELF::NT_AMD_AMDGPU_HSA_METADATA,
+                "NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)"},
+               {ELF::NT_AMD_AMDGPU_ISA, "NT_AMD_AMDGPU_ISA (ISA Version)"},
+               {ELF::NT_AMD_AMDGPU_PAL_METADATA,
+                "NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)"}};
 
   for (const auto &Note : Notes)
     if (Note.ID == NT)
@@ -3683,6 +3935,16 @@ static std::string getGNUProperty(uint32_t Type, uint32_t DataSize,
                                   ArrayRef<uint8_t> Data) {
   std::string str;
   raw_string_ostream OS(str);
+  uint32_t PrData;
+  auto DumpBit = [&](uint32_t Flag, StringRef Name) {
+    if (PrData & Flag) {
+      PrData &= ~Flag;
+      OS << Name;
+      if (PrData)
+        OS << ", ";
+    }
+  };
+
   switch (Type) {
   default:
     OS << format("<application-specific type 0x%x>", Type);
@@ -3701,41 +3963,101 @@ static std::string getGNUProperty(uint32_t Type, uint32_t DataSize,
     if (DataSize)
       OS << format(" <corrupt length: 0x%x>", DataSize);
     return OS.str();
+  case GNU_PROPERTY_AARCH64_FEATURE_1_AND:
   case GNU_PROPERTY_X86_FEATURE_1_AND:
-    OS << "X86 features: ";
-    if (DataSize != 4 && DataSize != 8) {
+    OS << ((Type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) ? "aarch64 feature: "
+                                                        : "x86 feature: ");
+    if (DataSize != 4) {
       OS << format("<corrupt length: 0x%x>", DataSize);
       return OS.str();
     }
-    uint64_t CFProtection =
-        (DataSize == 4)
-            ? support::endian::read32<ELFT::TargetEndianness>(Data.data())
-            : support::endian::read64<ELFT::TargetEndianness>(Data.data());
-    if (CFProtection == 0) {
-      OS << "none";
+    PrData = support::endian::read32<ELFT::TargetEndianness>(Data.data());
+    if (PrData == 0) {
+      OS << "<None>";
       return OS.str();
     }
-    if (CFProtection & GNU_PROPERTY_X86_FEATURE_1_IBT) {
-      OS << "IBT";
-      CFProtection &= ~GNU_PROPERTY_X86_FEATURE_1_IBT;
-      if (CFProtection)
-        OS << ", ";
+    if (Type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) {
+      DumpBit(GNU_PROPERTY_AARCH64_FEATURE_1_BTI, "BTI");
+      DumpBit(GNU_PROPERTY_AARCH64_FEATURE_1_PAC, "PAC");
+    } else {
+      DumpBit(GNU_PROPERTY_X86_FEATURE_1_IBT, "IBT");
+      DumpBit(GNU_PROPERTY_X86_FEATURE_1_SHSTK, "SHSTK");
     }
-    if (CFProtection & GNU_PROPERTY_X86_FEATURE_1_SHSTK) {
-      OS << "SHSTK";
-      CFProtection &= ~GNU_PROPERTY_X86_FEATURE_1_SHSTK;
-      if (CFProtection)
-        OS << ", ";
+    if (PrData)
+      OS << format("<unknown flags: 0x%x>", PrData);
+    return OS.str();
+  case GNU_PROPERTY_X86_ISA_1_NEEDED:
+  case GNU_PROPERTY_X86_ISA_1_USED:
+    OS << "x86 ISA "
+       << (Type == GNU_PROPERTY_X86_ISA_1_NEEDED ? "needed: " : "used: ");
+    if (DataSize != 4) {
+      OS << format("<corrupt length: 0x%x>", DataSize);
+      return OS.str();
+    }
+    PrData = support::endian::read32<ELFT::TargetEndianness>(Data.data());
+    if (PrData == 0) {
+      OS << "<None>";
+      return OS.str();
+    }
+    DumpBit(GNU_PROPERTY_X86_ISA_1_CMOV, "CMOV");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_SSE, "SSE");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_SSE2, "SSE2");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_SSE3, "SSE3");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_SSSE3, "SSSE3");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_SSE4_1, "SSE4_1");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_SSE4_2, "SSE4_2");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_AVX, "AVX");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_AVX2, "AVX2");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_FMA, "FMA");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512F, "AVX512F");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512CD, "AVX512CD");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512ER, "AVX512ER");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512PF, "AVX512PF");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512VL, "AVX512VL");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512DQ, "AVX512DQ");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512BW, "AVX512BW");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512_4FMAPS, "AVX512_4FMAPS");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512_4VNNIW, "AVX512_4VNNIW");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512_BITALG, "AVX512_BITALG");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512_IFMA, "AVX512_IFMA");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512_VBMI, "AVX512_VBMI");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512_VBMI2, "AVX512_VBMI2");
+    DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512_VNNI, "AVX512_VNNI");
+    if (PrData)
+      OS << format("<unknown flags: 0x%x>", PrData);
+    return OS.str();
+    break;
+  case GNU_PROPERTY_X86_FEATURE_2_NEEDED:
+  case GNU_PROPERTY_X86_FEATURE_2_USED:
+    OS << "x86 feature "
+       << (Type == GNU_PROPERTY_X86_FEATURE_2_NEEDED ? "needed: " : "used: ");
+    if (DataSize != 4) {
+      OS << format("<corrupt length: 0x%x>", DataSize);
+      return OS.str();
     }
-    if (CFProtection)
-      OS << format("<unknown flags: 0x%llx>", CFProtection);
+    PrData = support::endian::read32<ELFT::TargetEndianness>(Data.data());
+    if (PrData == 0) {
+      OS << "<None>";
+      return OS.str();
+    }
+    DumpBit(GNU_PROPERTY_X86_FEATURE_2_X86, "x86");
+    DumpBit(GNU_PROPERTY_X86_FEATURE_2_X87, "x87");
+    DumpBit(GNU_PROPERTY_X86_FEATURE_2_MMX, "MMX");
+    DumpBit(GNU_PROPERTY_X86_FEATURE_2_XMM, "XMM");
+    DumpBit(GNU_PROPERTY_X86_FEATURE_2_YMM, "YMM");
+    DumpBit(GNU_PROPERTY_X86_FEATURE_2_ZMM, "ZMM");
+    DumpBit(GNU_PROPERTY_X86_FEATURE_2_FXSR, "FXSR");
+    DumpBit(GNU_PROPERTY_X86_FEATURE_2_XSAVE, "XSAVE");
+    DumpBit(GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT, "XSAVEOPT");
+    DumpBit(GNU_PROPERTY_X86_FEATURE_2_XSAVEC, "XSAVEC");
+    if (PrData)
+      OS << format("<unknown flags: 0x%x>", PrData);
     return OS.str();
   }
 }
 
 template <typename ELFT>
-static SmallVector<std::string, 4>
-getGNUPropertyList(ArrayRef<uint8_t> Arr) {
+static SmallVector<std::string, 4> getGNUPropertyList(ArrayRef<uint8_t> Arr) {
   using Elf_Word = typename ELFT::Word;
 
   SmallVector<std::string, 4> Properties;
@@ -3770,12 +4092,11 @@ struct GNUAbiTag {
   bool IsValid;
 };
 
-template <typename ELFT>
-static GNUAbiTag getGNUAbiTag(ArrayRef<uint8_t> Desc) {
+template <typename ELFT> static GNUAbiTag getGNUAbiTag(ArrayRef<uint8_t> Desc) {
   typedef typename ELFT::Word Elf_Word;
 
-  ArrayRef<Elf_Word> Words(reinterpret_cast<const Elf_Word*>(Desc.begin()),
-                           reinterpret_cast<const Elf_Word*>(Desc.end()));
+  ArrayRef<Elf_Word> Words(reinterpret_cast<const Elf_Word *>(Desc.begin()),
+                           reinterpret_cast<const Elf_Word *>(Desc.end()));
 
   if (Words.size() < 4)
     return {"", "", /*IsValid=*/false};
@@ -3846,24 +4167,13 @@ static AMDNote getAMDNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
   default:
     return {"", ""};
   case ELF::NT_AMD_AMDGPU_HSA_METADATA:
-    return {"HSA Metadata",
-            std::string(reinterpret_cast<const char *>(Desc.data()),
-                        Desc.size())};
+    return {
+        "HSA Metadata",
+        std::string(reinterpret_cast<const char *>(Desc.data()), Desc.size())};
   case ELF::NT_AMD_AMDGPU_ISA:
-    return {"ISA Version",
-            std::string(reinterpret_cast<const char *>(Desc.data()),
-                        Desc.size())};
-  case ELF::NT_AMD_AMDGPU_PAL_METADATA:
-    const uint32_t *PALMetadataBegin =
-        reinterpret_cast<const uint32_t *>(Desc.data());
-    const uint32_t *PALMetadataEnd = PALMetadataBegin + Desc.size();
-    std::vector<uint32_t> PALMetadata(PALMetadataBegin, PALMetadataEnd);
-    std::string PALMetadataString;
-    auto Error = AMDGPU::PALMD::toString(PALMetadata, PALMetadataString);
-    if (Error) {
-      return {"PAL Metadata", "Invalid"};
-    }
-    return {"PAL Metadata", PALMetadataString};
+    return {
+        "ISA Version",
+        std::string(reinterpret_cast<const char *>(Desc.data()), Desc.size())};
   }
 }
 
@@ -3877,36 +4187,28 @@ static AMDGPUNote getAMDGPUNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
   switch (NoteType) {
   default:
     return {"", ""};
-  case ELF::NT_AMDGPU_METADATA:
+  case ELF::NT_AMDGPU_METADATA: {
     auto MsgPackString =
         StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
-    msgpack::Reader MsgPackReader(MsgPackString);
-    auto OptMsgPackNodeOrErr = msgpack::Node::read(MsgPackReader);
-    if (errorToBool(OptMsgPackNodeOrErr.takeError()))
+    msgpack::Document MsgPackDoc;
+    if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
       return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
-    auto &OptMsgPackNode = *OptMsgPackNodeOrErr;
-    if (!OptMsgPackNode)
-      return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
-    auto &MsgPackNode = *OptMsgPackNode;
 
     AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
-    if (!Verifier.verify(*MsgPackNode))
+    if (!Verifier.verify(MsgPackDoc.getRoot()))
       return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
 
     std::string HSAMetadataString;
     raw_string_ostream StrOS(HSAMetadataString);
-    yaml::Output YOut(StrOS);
-    YOut << MsgPackNode;
+    MsgPackDoc.toYAML(StrOS);
 
     return {"AMDGPU Metadata", StrOS.str()};
   }
+  }
 }
 
 template <class ELFT>
 void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
-  const Elf_Ehdr *e = Obj->getHeader();
-  bool IsCore = e->e_type == ELF::ET_CORE;
-
   auto PrintHeader = [&](const typename ELFT::Off Offset,
                          const typename ELFT::Addr Size) {
     OS << "Displaying notes found at file offset " << format_hex(Offset, 10)
@@ -3938,12 +4240,16 @@ void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
       if (!N.Type.empty())
         OS << "    " << N.Type << ":\n        " << N.Value << '\n';
     } else {
-      OS << "Unknown note type: (" << format_hex(Type, 10) << ')';
+      StringRef NoteType = getGenericNoteTypeName(Type);
+      if (!NoteType.empty())
+        OS << NoteType;
+      else
+        OS << "Unknown note type: (" << format_hex(Type, 10) << ')';
     }
     OS << '\n';
   };
 
-  if (IsCore) {
+  if (Obj->getHeader()->e_type == ELF::ET_CORE) {
     for (const auto &P : unwrapOrError(Obj->program_headers())) {
       if (P.p_type != PT_NOTE)
         continue;
@@ -3992,7 +4298,10 @@ void GNUStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
      << format_hex_no_prefix(Parser.getGp(), 8 + Bias) << "\n\n";
 
   OS << " Reserved entries:\n";
-  OS << "   Address     Access  Initial Purpose\n";
+  if (ELFT::Is64Bits)
+    OS << "           Address     Access          Initial Purpose\n";
+  else
+    OS << "   Address     Access  Initial Purpose\n";
   PrintEntry(Parser.getGotLazyResolver(), "Lazy resolver");
   if (Parser.getGotModulePointer())
     PrintEntry(Parser.getGotModulePointer(), "Module pointer (GNU extension)");
@@ -4000,7 +4309,10 @@ void GNUStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
   if (!Parser.getLocalEntries().empty()) {
     OS << "\n";
     OS << " Local entries:\n";
-    OS << "   Address     Access  Initial\n";
+    if (ELFT::Is64Bits)
+      OS << "           Address     Access          Initial\n";
+    else
+      OS << "   Address     Access  Initial\n";
     for (auto &E : Parser.getLocalEntries())
       PrintEntry(&E, "");
   }
@@ -4011,7 +4323,11 @@ void GNUStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
   if (!Parser.getGlobalEntries().empty()) {
     OS << "\n";
     OS << " Global entries:\n";
-    OS << "   Address     Access  Initial Sym.Val. Type    Ndx Name\n";
+    if (ELFT::Is64Bits)
+      OS << "           Address     Access          Initial         Sym.Val."
+         << " Type    Ndx Name\n";
+    else
+      OS << "   Address     Access  Initial Sym.Val. Type    Ndx Name\n";
     for (auto &E : Parser.getGlobalEntries()) {
       const Elf_Sym *Sym = Parser.getGotSym(&E);
       std::string SymName = this->dumper()->getFullSymbolName(
@@ -4045,7 +4361,7 @@ void GNUStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
   size_t Bias = ELFT::Is64Bits ? 8 : 0;
   auto PrintEntry = [&](const Elf_Addr *E, StringRef Purpose) {
     OS.PadToColumn(2);
-    OS << format_hex_no_prefix(Parser.getGotAddress(E), 8 + Bias);
+    OS << format_hex_no_prefix(Parser.getPltAddress(E), 8 + Bias);
     OS.PadToColumn(11 + Bias);
     OS << format_hex_no_prefix(*E, 8 + Bias);
     OS.PadToColumn(20 + 2 * Bias);
@@ -4058,7 +4374,7 @@ void GNUStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
   OS << "   Address  Initial Purpose\n";
   PrintEntry(Parser.getPltLazyResolver(), "PLT lazy resolver");
   if (Parser.getPltModulePointer())
-    PrintEntry(Parser.getGotModulePointer(), "Module pointer");
+    PrintEntry(Parser.getPltModulePointer(), "Module pointer");
 
   if (!Parser.getPltEntries().empty()) {
     OS << "\n";
@@ -4070,7 +4386,7 @@ void GNUStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
           Sym, this->dumper()->getDynamicStringTable(), false);
 
       OS.PadToColumn(2);
-      OS << to_string(format_hex_no_prefix(Parser.getGotAddress(&E), 8 + Bias));
+      OS << to_string(format_hex_no_prefix(Parser.getPltAddress(&E), 8 + Bias));
       OS.PadToColumn(11 + Bias);
       OS << to_string(format_hex_no_prefix(E, 8 + Bias));
       OS.PadToColumn(20 + 2 * Bias);
@@ -4087,21 +4403,21 @@ void GNUStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
 }
 
 template <class ELFT> void LLVMStyle<ELFT>::printFileHeaders(const ELFO *Obj) {
-  const Elf_Ehdr *e = Obj->getHeader();
+  const Elf_Ehdr *E = Obj->getHeader();
   {
     DictScope D(W, "ElfHeader");
     {
       DictScope D(W, "Ident");
-      W.printBinary("Magic", makeArrayRef(e->e_ident).slice(ELF::EI_MAG0, 4));
-      W.printEnum("Class", e->e_ident[ELF::EI_CLASS], makeArrayRef(ElfClass));
-      W.printEnum("DataEncoding", e->e_ident[ELF::EI_DATA],
+      W.printBinary("Magic", makeArrayRef(E->e_ident).slice(ELF::EI_MAG0, 4));
+      W.printEnum("Class", E->e_ident[ELF::EI_CLASS], makeArrayRef(ElfClass));
+      W.printEnum("DataEncoding", E->e_ident[ELF::EI_DATA],
                   makeArrayRef(ElfDataEncoding));
-      W.printNumber("FileVersion", e->e_ident[ELF::EI_VERSION]);
+      W.printNumber("FileVersion", E->e_ident[ELF::EI_VERSION]);
 
       auto OSABI = makeArrayRef(ElfOSABI);
-      if (e->e_ident[ELF::EI_OSABI] >= ELF::ELFOSABI_FIRST_ARCH &&
-          e->e_ident[ELF::EI_OSABI] <= ELF::ELFOSABI_LAST_ARCH) {
-        switch (e->e_machine) {
+      if (E->e_ident[ELF::EI_OSABI] >= ELF::ELFOSABI_FIRST_ARCH &&
+          E->e_ident[ELF::EI_OSABI] <= ELF::ELFOSABI_LAST_ARCH) {
+        switch (E->e_machine) {
         case ELF::EM_AMDGPU:
           OSABI = makeArrayRef(AMDGPUElfOSABI);
           break;
@@ -4113,34 +4429,35 @@ template <class ELFT> void LLVMStyle<ELFT>::printFileHeaders(const ELFO *Obj) {
           break;
         }
       }
-      W.printEnum("OS/ABI", e->e_ident[ELF::EI_OSABI], OSABI);
-      W.printNumber("ABIVersion", e->e_ident[ELF::EI_ABIVERSION]);
-      W.printBinary("Unused", makeArrayRef(e->e_ident).slice(ELF::EI_PAD));
+      W.printEnum("OS/ABI", E->e_ident[ELF::EI_OSABI], OSABI);
+      W.printNumber("ABIVersion", E->e_ident[ELF::EI_ABIVERSION]);
+      W.printBinary("Unused", makeArrayRef(E->e_ident).slice(ELF::EI_PAD));
     }
 
-    W.printEnum("Type", e->e_type, makeArrayRef(ElfObjectFileType));
-    W.printEnum("Machine", e->e_machine, makeArrayRef(ElfMachineType));
-    W.printNumber("Version", e->e_version);
-    W.printHex("Entry", e->e_entry);
-    W.printHex("ProgramHeaderOffset", e->e_phoff);
-    W.printHex("SectionHeaderOffset", e->e_shoff);
-    if (e->e_machine == EM_MIPS)
-      W.printFlags("Flags", e->e_flags, makeArrayRef(ElfHeaderMipsFlags),
+    W.printEnum("Type", E->e_type, makeArrayRef(ElfObjectFileType));
+    W.printEnum("Machine", E->e_machine, makeArrayRef(ElfMachineType));
+    W.printNumber("Version", E->e_version);
+    W.printHex("Entry", E->e_entry);
+    W.printHex("ProgramHeaderOffset", E->e_phoff);
+    W.printHex("SectionHeaderOffset", E->e_shoff);
+    if (E->e_machine == EM_MIPS)
+      W.printFlags("Flags", E->e_flags, makeArrayRef(ElfHeaderMipsFlags),
                    unsigned(ELF::EF_MIPS_ARCH), unsigned(ELF::EF_MIPS_ABI),
                    unsigned(ELF::EF_MIPS_MACH));
-    else if (e->e_machine == EM_AMDGPU)
-      W.printFlags("Flags", e->e_flags, makeArrayRef(ElfHeaderAMDGPUFlags),
+    else if (E->e_machine == EM_AMDGPU)
+      W.printFlags("Flags", E->e_flags, makeArrayRef(ElfHeaderAMDGPUFlags),
                    unsigned(ELF::EF_AMDGPU_MACH));
-    else if (e->e_machine == EM_RISCV)
-      W.printFlags("Flags", e->e_flags, makeArrayRef(ElfHeaderRISCVFlags));
+    else if (E->e_machine == EM_RISCV)
+      W.printFlags("Flags", E->e_flags, makeArrayRef(ElfHeaderRISCVFlags));
     else
-      W.printFlags("Flags", e->e_flags);
-    W.printNumber("HeaderSize", e->e_ehsize);
-    W.printNumber("ProgramHeaderEntrySize", e->e_phentsize);
-    W.printNumber("ProgramHeaderCount", e->e_phnum);
-    W.printNumber("SectionHeaderEntrySize", e->e_shentsize);
+      W.printFlags("Flags", E->e_flags);
+    W.printNumber("HeaderSize", E->e_ehsize);
+    W.printNumber("ProgramHeaderEntrySize", E->e_phentsize);
+    W.printNumber("ProgramHeaderCount", E->e_phnum);
+    W.printNumber("SectionHeaderEntrySize", E->e_shentsize);
     W.printString("SectionHeaderCount", getSectionHeadersNumString(Obj));
-    W.printString("StringTableSectionIndex", getSectionHeaderTableIndexString(Obj));
+    W.printString("StringTableSectionIndex",
+                  getSectionHeaderTableIndexString(Obj));
   }
 }
 
@@ -4185,10 +4502,8 @@ template <class ELFT> void LLVMStyle<ELFT>::printRelocations(const ELFO *Obj) {
   for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
     ++SectionNumber;
 
-    if (Sec.sh_type != ELF::SHT_REL &&
-        Sec.sh_type != ELF::SHT_RELA &&
-        Sec.sh_type != ELF::SHT_RELR &&
-        Sec.sh_type != ELF::SHT_ANDROID_REL &&
+    if (Sec.sh_type != ELF::SHT_REL && Sec.sh_type != ELF::SHT_RELA &&
+        Sec.sh_type != ELF::SHT_RELR && Sec.sh_type != ELF::SHT_ANDROID_REL &&
         Sec.sh_type != ELF::SHT_ANDROID_RELA &&
         Sec.sh_type != ELF::SHT_ANDROID_RELR)
       continue;
@@ -4249,7 +4564,7 @@ void LLVMStyle<ELFT>::printRelocation(const ELFO *Obj, Elf_Rela Rel,
                                       const Elf_Shdr *SymTab) {
   SmallString<32> RelocName;
   Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName);
-  StringRef TargetName;
+  std::string TargetName;
   const Elf_Sym *Sym = unwrapOrError(Obj->getRelocationSymbol(&Rel, SymTab));
   if (Sym && Sym->getType() == ELF::STT_SECTION) {
     const Elf_Shdr *Sec = unwrapOrError(
@@ -4257,7 +4572,8 @@ void LLVMStyle<ELFT>::printRelocation(const ELFO *Obj, Elf_Rela Rel,
     TargetName = unwrapOrError(Obj->getSectionName(Sec));
   } else if (Sym) {
     StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*SymTab));
-    TargetName = unwrapOrError(Sym->getName(StrTable));
+    TargetName = this->dumper()->getFullSymbolName(
+        Sym, StrTable, SymTab->sh_type == SHT_DYNSYM /* IsDynamic */);
   }
 
   if (opts::ExpandRelocs) {
@@ -4270,8 +4586,8 @@ void LLVMStyle<ELFT>::printRelocation(const ELFO *Obj, Elf_Rela Rel,
   } else {
     raw_ostream &OS = W.startLine();
     OS << W.hex(Rel.r_offset) << " " << RelocName << " "
-       << (!TargetName.empty() ? TargetName : "-") << " "
-       << W.hex(Rel.r_addend) << "\n";
+       << (!TargetName.empty() ? TargetName : "-") << " " << W.hex(Rel.r_addend)
+       << "\n";
   }
 }
 
@@ -4280,13 +4596,12 @@ void LLVMStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
   ListScope SectionsD(W, "Sections");
 
   int SectionIndex = -1;
-  for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
-    ++SectionIndex;
-
-    StringRef Name = unwrapOrError(Obj->getSectionName(&Sec));
-
+  ArrayRef<Elf_Shdr> Sections = unwrapOrError(Obj->sections());
+  const ELFObjectFile<ELFT> *ElfObj = this->dumper()->getElfObject();
+  for (const Elf_Shdr &Sec : Sections) {
+    StringRef Name = getSectionName(Sec, *ElfObj, Sections);
     DictScope SectionD(W, "Section");
-    W.printNumber("Index", SectionIndex);
+    W.printNumber("Index", ++SectionIndex);
     W.printNumber("Name", Name, Sec.sh_name);
     W.printHex(
         "Type",
@@ -4350,8 +4665,9 @@ void LLVMStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
 
     if (opts::SectionData && Sec.sh_type != ELF::SHT_NOBITS) {
       ArrayRef<uint8_t> Data = unwrapOrError(Obj->getSectionContents(&Sec));
-      W.printBinaryBlock("SectionData",
-                         StringRef((const char *)Data.data(), Data.size()));
+      W.printBinaryBlock(
+          "SectionData",
+          StringRef(reinterpret_cast<const char *>(Data.data()), Data.size()));
     }
   }
 }
@@ -4402,6 +4718,15 @@ void LLVMStyle<ELFT>::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol,
   W.printHex("Section", SectionName, SectionIndex);
 }
 
+template <class ELFT>
+void LLVMStyle<ELFT>::printSymbols(const ELFO *Obj, bool PrintSymbols,
+                                   bool PrintDynamicSymbols) {
+  if (PrintSymbols)
+    printSymbols(Obj);
+  if (PrintDynamicSymbols)
+    printDynamicSymbols(Obj);
+}
+
 template <class ELFT> void LLVMStyle<ELFT>::printSymbols(const ELFO *Obj) {
   ListScope Group(W, "Symbols");
   this->dumper()->printSymbolsHelper(false);
@@ -4413,6 +4738,31 @@ void LLVMStyle<ELFT>::printDynamicSymbols(const ELFO *Obj) {
   this->dumper()->printSymbolsHelper(true);
 }
 
+template <class ELFT> void LLVMStyle<ELFT>::printDynamic(const ELFFile<ELFT> *Obj) {
+  Elf_Dyn_Range Table = this->dumper()->dynamic_table();
+  if (Table.empty())
+    return;
+
+  raw_ostream &OS = W.getOStream();
+  W.startLine() << "DynamicSection [ (" << Table.size() << " entries)\n";
+
+  bool Is64 = ELFT::Is64Bits;
+  if (Is64)
+    W.startLine() << "  Tag                Type                 Name/Value\n";
+  else
+    W.startLine() << "  Tag        Type                 Name/Value\n";
+  for (auto Entry : Table) {
+    uintX_t Tag = Entry.getTag();
+    W.startLine() << "  " << format_hex(Tag, Is64 ? 18 : 10, true) << " "
+                  << format("%-21s",
+                            getTypeString(Obj->getHeader()->e_machine, Tag));
+    this->dumper()->printDynamicEntry(OS, Tag, Entry.getVal());
+    OS << "\n";
+  }
+
+  W.startLine() << "]\n";
+}
+
 template <class ELFT>
 void LLVMStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
   const DynRegionInfo &DynRelRegion = this->dumper()->getDynRelRegion();
@@ -4459,11 +4809,11 @@ template <class ELFT>
 void LLVMStyle<ELFT>::printDynamicRelocation(const ELFO *Obj, Elf_Rela Rel) {
   SmallString<32> RelocName;
   Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName);
-  StringRef SymbolName;
+  std::string SymbolName;
   uint32_t SymIndex = Rel.getSymbol(Obj->isMips64EL());
   const Elf_Sym *Sym = this->dumper()->dynamic_symbols().begin() + SymIndex;
-  SymbolName =
-      unwrapOrError(Sym->getName(this->dumper()->getDynamicStringTable()));
+  SymbolName = maybeDemangle(
+      unwrapOrError(Sym->getName(this->dumper()->getDynamicStringTable())));
   if (opts::ExpandRelocs) {
     DictScope Group(W, "Relocation");
     W.printHex("Offset", Rel.r_offset);
@@ -4473,11 +4823,21 @@ void LLVMStyle<ELFT>::printDynamicRelocation(const ELFO *Obj, Elf_Rela Rel) {
   } else {
     raw_ostream &OS = W.startLine();
     OS << W.hex(Rel.r_offset) << " " << RelocName << " "
-       << (!SymbolName.empty() ? SymbolName : "-") << " "
-       << W.hex(Rel.r_addend) << "\n";
+       << (!SymbolName.empty() ? SymbolName : "-") << " " << W.hex(Rel.r_addend)
+       << "\n";
   }
 }
 
+template <class ELFT>
+void LLVMStyle<ELFT>::printProgramHeaders(
+    const ELFO *Obj, bool PrintProgramHeaders,
+    cl::boolOrDefault PrintSectionMapping) {
+  if (PrintProgramHeaders)
+    printProgramHeaders(Obj);
+  if (PrintSectionMapping == cl::BOU_TRUE)
+    printSectionMapping(Obj);
+}
+
 template <class ELFT>
 void LLVMStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
   ListScope L(W, "ProgramHeaders");
@@ -4497,6 +4857,125 @@ void LLVMStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
   }
 }
 
+template <class ELFT>
+void LLVMStyle<ELFT>::printVersionSymbolSection(const ELFFile<ELFT> *Obj,
+                                                const Elf_Shdr *Sec) {
+  DictScope SS(W, "Version symbols");
+  if (!Sec)
+    return;
+
+  StringRef SecName = unwrapOrError(Obj->getSectionName(Sec));
+  W.printNumber("Section Name", SecName, Sec->sh_name);
+  W.printHex("Address", Sec->sh_addr);
+  W.printHex("Offset", Sec->sh_offset);
+  W.printNumber("Link", Sec->sh_link);
+
+  const uint8_t *VersymBuf =
+      reinterpret_cast<const uint8_t *>(Obj->base() + Sec->sh_offset);
+  const ELFDumper<ELFT> *Dumper = this->dumper();
+  StringRef StrTable = Dumper->getDynamicStringTable();
+
+  // Same number of entries in the dynamic symbol table (DT_SYMTAB).
+  ListScope Syms(W, "Symbols");
+  for (const Elf_Sym &Sym : Dumper->dynamic_symbols()) {
+    DictScope S(W, "Symbol");
+    const Elf_Versym *Versym = reinterpret_cast<const Elf_Versym *>(VersymBuf);
+    std::string FullSymbolName =
+        Dumper->getFullSymbolName(&Sym, StrTable, true /* IsDynamic */);
+    W.printNumber("Version", Versym->vs_index & VERSYM_VERSION);
+    W.printString("Name", FullSymbolName);
+    VersymBuf += sizeof(Elf_Versym);
+  }
+}
+
+template <class ELFT>
+void LLVMStyle<ELFT>::printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
+                                                    const Elf_Shdr *Sec) {
+  DictScope SD(W, "SHT_GNU_verdef");
+  if (!Sec)
+    return;
+
+  const uint8_t *SecStartAddress =
+      reinterpret_cast<const uint8_t *>(Obj->base() + Sec->sh_offset);
+  const uint8_t *SecEndAddress = SecStartAddress + Sec->sh_size;
+  const uint8_t *VerdefBuf = SecStartAddress;
+  const Elf_Shdr *StrTab = unwrapOrError(Obj->getSection(Sec->sh_link));
+
+  unsigned VerDefsNum = Sec->sh_info;
+  while (VerDefsNum--) {
+    if (VerdefBuf + sizeof(Elf_Verdef) > SecEndAddress)
+      // FIXME: report_fatal_error is not a good way to report error. We should
+      // emit a parsing error here and below.
+      report_fatal_error("invalid offset in the section");
+
+    const Elf_Verdef *Verdef = reinterpret_cast<const Elf_Verdef *>(VerdefBuf);
+    DictScope Def(W, "Definition");
+    W.printNumber("Version", Verdef->vd_version);
+    W.printEnum("Flags", Verdef->vd_flags, makeArrayRef(SymVersionFlags));
+    W.printNumber("Index", Verdef->vd_ndx);
+    W.printNumber("Hash", Verdef->vd_hash);
+    W.printString("Name", StringRef(reinterpret_cast<const char *>(
+                              Obj->base() + StrTab->sh_offset +
+                              Verdef->getAux()->vda_name)));
+    if (!Verdef->vd_cnt)
+      report_fatal_error("at least one definition string must exist");
+    if (Verdef->vd_cnt > 2)
+      report_fatal_error("more than one predecessor is not expected");
+
+    if (Verdef->vd_cnt == 2) {
+      const uint8_t *VerdauxBuf =
+          VerdefBuf + Verdef->vd_aux + Verdef->getAux()->vda_next;
+      const Elf_Verdaux *Verdaux =
+          reinterpret_cast<const Elf_Verdaux *>(VerdauxBuf);
+      W.printString("Predecessor",
+                    StringRef(reinterpret_cast<const char *>(
+                        Obj->base() + StrTab->sh_offset + Verdaux->vda_name)));
+    }
+    VerdefBuf += Verdef->vd_next;
+  }
+}
+
+template <class ELFT>
+void LLVMStyle<ELFT>::printVersionDependencySection(const ELFFile<ELFT> *Obj,
+                                                    const Elf_Shdr *Sec) {
+  DictScope SD(W, "SHT_GNU_verneed");
+  if (!Sec)
+    return;
+
+  const uint8_t *SecData =
+      reinterpret_cast<const uint8_t *>(Obj->base() + Sec->sh_offset);
+  const Elf_Shdr *StrTab = unwrapOrError(Obj->getSection(Sec->sh_link));
+
+  const uint8_t *VerneedBuf = SecData;
+  unsigned VerneedNum = Sec->sh_info;
+  for (unsigned I = 0; I < VerneedNum; ++I) {
+    const Elf_Verneed *Verneed =
+        reinterpret_cast<const Elf_Verneed *>(VerneedBuf);
+    DictScope Entry(W, "Dependency");
+    W.printNumber("Version", Verneed->vn_version);
+    W.printNumber("Count", Verneed->vn_cnt);
+    W.printString("FileName",
+                  StringRef(reinterpret_cast<const char *>(
+                      Obj->base() + StrTab->sh_offset + Verneed->vn_file)));
+
+    const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux;
+    ListScope L(W, "Entries");
+    for (unsigned J = 0; J < Verneed->vn_cnt; ++J) {
+      const Elf_Vernaux *Vernaux =
+          reinterpret_cast<const Elf_Vernaux *>(VernauxBuf);
+      DictScope Entry(W, "Entry");
+      W.printNumber("Hash", Vernaux->vna_hash);
+      W.printEnum("Flags", Vernaux->vna_flags, makeArrayRef(SymVersionFlags));
+      W.printNumber("Index", Vernaux->vna_other);
+      W.printString("Name",
+                    StringRef(reinterpret_cast<const char *>(
+                        Obj->base() + StrTab->sh_offset + Vernaux->vna_name)));
+      VernauxBuf += Vernaux->vna_next;
+    }
+    VerneedBuf += Verneed->vn_next;
+  }
+}
+
 template <class ELFT>
 void LLVMStyle<ELFT>::printHashHistogram(const ELFFile<ELFT> *Obj) {
   W.startLine() << "Hash Histogram not implemented!\n";
@@ -4542,8 +5021,7 @@ void LLVMStyle<ELFT>::printAddrsig(const ELFFile<ELFT> *Obj) {
 }
 
 template <typename ELFT>
-static void printGNUNoteLLVMStyle(uint32_t NoteType,
-                                  ArrayRef<uint8_t> Desc,
+static void printGNUNoteLLVMStyle(uint32_t NoteType, ArrayRef<uint8_t> Desc,
                                   ScopedPrinter &W) {
   switch (NoteType) {
   default:
@@ -4576,8 +5054,6 @@ static void printGNUNoteLLVMStyle(uint32_t NoteType,
 template <class ELFT>
 void LLVMStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
   ListScope L(W, "Notes");
-  const Elf_Ehdr *e = Obj->getHeader();
-  bool IsCore = e->e_type == ELF::ET_CORE;
 
   auto PrintHeader = [&](const typename ELFT::Off Offset,
                          const typename ELFT::Addr Size) {
@@ -4609,11 +5085,16 @@ void LLVMStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
       if (!N.Type.empty())
         W.printString(N.Type, N.Value);
     } else {
-      W.getOStream() << "Unknown note type: (" << format_hex(Type, 10) << ')';
+      StringRef NoteType = getGenericNoteTypeName(Type);
+      if (!NoteType.empty())
+        W.printString("Type", NoteType);
+      else
+        W.printString("Type",
+                      "Unknown (" + to_string(format_hex(Type, 10)) + ")");
     }
   };
 
-  if (IsCore) {
+  if (Obj->getHeader()->e_type == ELF::ET_CORE) {
     for (const auto &P : unwrapOrError(Obj->program_headers())) {
       if (P.p_type != PT_NOTE)
         continue;
diff --git a/tools/llvm-readobj/Error.cpp b/tools/llvm-readobj/Error.cpp
index 03d349440e6b..1010f18a58c8 100644
--- a/tools/llvm-readobj/Error.cpp
+++ b/tools/llvm-readobj/Error.cpp
@@ -1,9 +1,8 @@
 //===- Error.cpp - system_error extensions for llvm-readobj -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-readobj/Error.h b/tools/llvm-readobj/Error.h
index f3e24bbe5dbf..f390e1b96f8a 100644
--- a/tools/llvm-readobj/Error.h
+++ b/tools/llvm-readobj/Error.h
@@ -1,9 +1,8 @@
 //===- Error.h - system_error extensions for llvm-readobj -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-readobj/MachODumper.cpp b/tools/llvm-readobj/MachODumper.cpp
index 35e4cfcb6b10..32a3866eb2f2 100644
--- a/tools/llvm-readobj/MachODumper.cpp
+++ b/tools/llvm-readobj/MachODumper.cpp
@@ -1,9 +1,8 @@
-//===-- MachODump.cpp - Object file dumping utility for llvm --------------===//
+//===- MachODumper.cpp - Object file dumping utility for llvm -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -34,8 +33,6 @@ public:
   void printFileHeaders() override;
   void printSectionHeaders() override;
   void printRelocations() override;
-  void printSymbols() override;
-  void printDynamicSymbols() override;
   void printUnwindInfo() override;
   void printStackMap() const override;
 
@@ -53,6 +50,8 @@ private:
   template<class MachHeader>
   void printFileHeaders(const MachHeader &Header);
 
+  void printSymbols() override;
+  void printDynamicSymbols() override;
   void printSymbol(const SymbolRef &Symbol);
 
   void printRelocation(const RelocationRef &Reloc);
@@ -163,6 +162,7 @@ static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesARM[] = {
 
 static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesARM64[] = {
   LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM64_ALL),
+  LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM64E),
 };
 
 static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesSPARC[] = {
@@ -483,15 +483,8 @@ void MachODumper::printSectionHeaders(const MachOObjectFile *Obj) {
       }
     }
 
-    if (opts::SectionData) {
-      bool IsBSS = Section.isBSS();
-      if (!IsBSS) {
-        StringRef Data;
-        error(Section.getContents(Data));
-
-        W.printBinaryBlock("SectionData", Data);
-      }
-    }
+    if (opts::SectionData && !Section.isBSS())
+      W.printBinaryBlock("SectionData", unwrapOrError(Section.getContents()));
   }
 }
 
@@ -660,18 +653,16 @@ void MachODumper::printStackMap() const {
   if (StackMapSection == object::SectionRef())
     return;
 
-  StringRef StackMapContents;
-  StackMapSection.getContents(StackMapContents);
-  ArrayRef<uint8_t> StackMapContentsArray(
-      reinterpret_cast<const uint8_t*>(StackMapContents.data()),
-      StackMapContents.size());
+  StringRef StackMapContents = unwrapOrError(StackMapSection.getContents());
+  ArrayRef<uint8_t> StackMapContentsArray =
+      arrayRefFromStringRef(StackMapContents);
 
   if (Obj->isLittleEndian())
     prettyPrintStackMap(
-        W, StackMapV2Parser<support::little>(StackMapContentsArray));
+        W, StackMapParser<support::little>(StackMapContentsArray));
   else
-    prettyPrintStackMap(W,
-                        StackMapV2Parser<support::big>(StackMapContentsArray));
+    prettyPrintStackMap(
+        W, StackMapParser<support::big>(StackMapContentsArray));
 }
 
 void MachODumper::printNeededLibraries() {
@@ -695,10 +686,10 @@ void MachODumper::printNeededLibraries() {
     }
   }
 
-  std::stable_sort(Libs.begin(), Libs.end());
+  llvm::stable_sort(Libs);
 
   for (const auto &L : Libs) {
-    outs() << "  " << L << "\n";
+    W.startLine() << L << "\n";
   }
 }
 
diff --git a/tools/llvm-readobj/ObjDumper.cpp b/tools/llvm-readobj/ObjDumper.cpp
index a725140c9d33..0a9e22c8a71c 100644
--- a/tools/llvm-readobj/ObjDumper.cpp
+++ b/tools/llvm-readobj/ObjDumper.cpp
@@ -1,9 +1,8 @@
 //===-- ObjDumper.cpp - Base dumper class -----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -17,8 +16,10 @@
 #include "llvm-readobj.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/raw_ostream.h"
+#include <map>
 
 namespace llvm {
 
@@ -32,116 +33,127 @@ static void printAsPrintable(raw_ostream &W, const uint8_t *Start, size_t Len) {
     W << (isPrint(Start[i]) ? static_cast<char>(Start[i]) : '.');
 }
 
-static Expected<object::SectionRef>
-getSecNameOrIndexAsSecRef(const object::ObjectFile *Obj, StringRef SecName) {
-  char *StrPtr;
-  long SectionIndex = strtol(SecName.data(), &StrPtr, 10);
-  object::SectionRef Section;
-  long SecIndex;
-  if (Obj->isELF())
-    SecIndex = 0;
-  else
-    SecIndex = 1;
-  for (object::SectionRef SecRef : Obj->sections()) {
-    if (*StrPtr) {
-      StringRef SectionName;
-
-      if (std::error_code E = SecRef.getName(SectionName))
-        return errorCodeToError(E);
-
-      if (SectionName == SecName)
-        return SecRef;
-    } else if (SecIndex == SectionIndex)
-      return SecRef;
+static std::vector<object::SectionRef>
+getSectionRefsByNameOrIndex(const object::ObjectFile *Obj,
+                            ArrayRef<std::string> Sections) {
+  std::vector<object::SectionRef> Ret;
+  std::map<std::string, bool> SecNames;
+  std::map<unsigned, bool> SecIndices;
+  unsigned SecIndex;
+  for (StringRef Section : Sections) {
+    if (!Section.getAsInteger(0, SecIndex))
+      SecIndices.emplace(SecIndex, false);
+    else
+      SecNames.emplace(Section, false);
+  }
 
+  SecIndex = Obj->isELF() ? 0 : 1;
+  for (object::SectionRef SecRef : Obj->sections()) {
+    StringRef SecName;
+    error(SecRef.getName(SecName));
+    auto NameIt = SecNames.find(SecName);
+    if (NameIt != SecNames.end())
+      NameIt->second = true;
+    auto IndexIt = SecIndices.find(SecIndex);
+    if (IndexIt != SecIndices.end())
+      IndexIt->second = true;
+    if (NameIt != SecNames.end() || IndexIt != SecIndices.end())
+      Ret.push_back(SecRef);
     SecIndex++;
   }
-  return make_error<StringError>("invalid section reference",
-                                 object::object_error::parse_failed);
+
+  for (const std::pair<std::string, bool> &S : SecNames)
+    if (!S.second)
+      reportWarning(formatv("could not find section '{0}'", S.first).str());
+  for (std::pair<unsigned, bool> S : SecIndices)
+    if (!S.second)
+      reportWarning(formatv("could not find section {0}", S.first).str());
+
+  return Ret;
 }
 
-void ObjDumper::printSectionAsString(const object::ObjectFile *Obj,
-                                     StringRef SecName) {
-  Expected<object::SectionRef> SectionRefOrError =
-      getSecNameOrIndexAsSecRef(Obj, SecName);
-  if (!SectionRefOrError)
-    error(std::move(SectionRefOrError));
-  object::SectionRef Section = *SectionRefOrError;
-  StringRef SectionName;
-
-  if (std::error_code E = Section.getName(SectionName))
-    error(E);
-  W.startLine() << "String dump of section '" << SectionName << "':\n";
-
-  StringRef SectionContent;
-  Section.getContents(SectionContent);
-
-  const uint8_t *SecContent = SectionContent.bytes_begin();
-  const uint8_t *CurrentWord = SecContent;
-  const uint8_t *SecEnd = SectionContent.bytes_end();
-
-  while (CurrentWord <= SecEnd) {
-    size_t WordSize = strnlen(reinterpret_cast<const char *>(CurrentWord),
-                              SecEnd - CurrentWord);
-    if (!WordSize) {
-      CurrentWord++;
-      continue;
+void ObjDumper::printSectionsAsString(const object::ObjectFile *Obj,
+                                      ArrayRef<std::string> Sections) {
+  bool First = true;
+  for (object::SectionRef Section :
+       getSectionRefsByNameOrIndex(Obj, Sections)) {
+    StringRef SectionName;
+    error(Section.getName(SectionName));
+    if (!First)
+      W.startLine() << '\n';
+    First = false;
+    W.startLine() << "String dump of section '" << SectionName << "':\n";
+
+    StringRef SectionContent = unwrapOrError(Section.getContents());
+
+    const uint8_t *SecContent = SectionContent.bytes_begin();
+    const uint8_t *CurrentWord = SecContent;
+    const uint8_t *SecEnd = SectionContent.bytes_end();
+
+    while (CurrentWord <= SecEnd) {
+      size_t WordSize = strnlen(reinterpret_cast<const char *>(CurrentWord),
+                                SecEnd - CurrentWord);
+      if (!WordSize) {
+        CurrentWord++;
+        continue;
+      }
+      W.startLine() << format("[%6tx] ", CurrentWord - SecContent);
+      printAsPrintable(W.startLine(), CurrentWord, WordSize);
+      W.startLine() << '\n';
+      CurrentWord += WordSize + 1;
     }
-    W.startLine() << format("[%6tx] ", CurrentWord - SecContent);
-    printAsPrintable(W.startLine(), CurrentWord, WordSize);
-    W.startLine() << '\n';
-    CurrentWord += WordSize + 1;
   }
 }
 
-void ObjDumper::printSectionAsHex(const object::ObjectFile *Obj,
-                                  StringRef SecName) {
-  Expected<object::SectionRef> SectionRefOrError =
-      getSecNameOrIndexAsSecRef(Obj, SecName);
-  if (!SectionRefOrError)
-    error(std::move(SectionRefOrError));
-  object::SectionRef Section = *SectionRefOrError;
-  StringRef SectionName;
-
-  if (std::error_code E = Section.getName(SectionName))
-    error(E);
-  W.startLine() << "Hex dump of section '" << SectionName << "':\n";
-
-  StringRef SectionContent;
-  Section.getContents(SectionContent);
-  const uint8_t *SecContent = SectionContent.bytes_begin();
-  const uint8_t *SecEnd = SecContent + SectionContent.size();
-
-  for (const uint8_t *SecPtr = SecContent; SecPtr < SecEnd; SecPtr += 16) {
-    const uint8_t *TmpSecPtr = SecPtr;
-    uint8_t i;
-    uint8_t k;
-
-    W.startLine() << format_hex(SecPtr - SecContent, 10);
-    W.startLine() << ' ';
-    for (i = 0; TmpSecPtr < SecEnd && i < 4; ++i) {
-      for (k = 0; TmpSecPtr < SecEnd && k < 4; k++, TmpSecPtr++) {
-        uint8_t Val = *(reinterpret_cast<const uint8_t *>(TmpSecPtr));
-        W.startLine() << format_hex_no_prefix(Val, 2);
-      }
+void ObjDumper::printSectionsAsHex(const object::ObjectFile *Obj,
+                                   ArrayRef<std::string> Sections) {
+  bool First = true;
+  for (object::SectionRef Section :
+       getSectionRefsByNameOrIndex(Obj, Sections)) {
+    StringRef SectionName;
+    error(Section.getName(SectionName));
+    if (!First)
+      W.startLine() << '\n';
+    First = false;
+    W.startLine() << "Hex dump of section '" << SectionName << "':\n";
+
+    StringRef SectionContent = unwrapOrError(Section.getContents());
+    const uint8_t *SecContent = SectionContent.bytes_begin();
+    const uint8_t *SecEnd = SecContent + SectionContent.size();
+
+    for (const uint8_t *SecPtr = SecContent; SecPtr < SecEnd; SecPtr += 16) {
+      const uint8_t *TmpSecPtr = SecPtr;
+      uint8_t i;
+      uint8_t k;
+
+      W.startLine() << format_hex(Section.getAddress() + (SecPtr - SecContent),
+                                  10);
       W.startLine() << ' ';
-    }
+      for (i = 0; TmpSecPtr < SecEnd && i < 4; ++i) {
+        for (k = 0; TmpSecPtr < SecEnd && k < 4; k++, TmpSecPtr++) {
+          uint8_t Val = *(reinterpret_cast<const uint8_t *>(TmpSecPtr));
+          W.startLine() << format_hex_no_prefix(Val, 2);
+        }
+        W.startLine() << ' ';
+      }
 
-    // We need to print the correct amount of spaces to match the format.
-    // We are adding the (4 - i) last rows that are 8 characters each.
-    // Then, the (4 - i) spaces that are in between the rows.
-    // Least, if we cut in a middle of a row, we add the remaining characters,
-    // which is (8 - (k * 2))
-    if (i < 4)
-      W.startLine() << format("%*c", (4 - i) * 8 + (4 - i) + (8 - (k * 2)),
-                              ' ');
-
-    TmpSecPtr = SecPtr;
-    for (i = 0; TmpSecPtr + i < SecEnd && i < 16; ++i)
-      W.startLine() << (isPrint(TmpSecPtr[i]) ? static_cast<char>(TmpSecPtr[i])
-                                              : '.');
-
-    W.startLine() << '\n';
+      // We need to print the correct amount of spaces to match the format.
+      // We are adding the (4 - i) last rows that are 8 characters each.
+      // Then, the (4 - i) spaces that are in between the rows.
+      // Least, if we cut in a middle of a row, we add the remaining characters,
+      // which is (8 - (k * 2)).
+      if (i < 4)
+        W.startLine() << format("%*c", (4 - i) * 8 + (4 - i) + (8 - (k * 2)),
+                                ' ');
+
+      TmpSecPtr = SecPtr;
+      for (i = 0; TmpSecPtr + i < SecEnd && i < 16; ++i)
+        W.startLine() << (isPrint(TmpSecPtr[i])
+                              ? static_cast<char>(TmpSecPtr[i])
+                              : '.');
+
+      W.startLine() << '\n';
+    }
   }
 }
 
diff --git a/tools/llvm-readobj/ObjDumper.h b/tools/llvm-readobj/ObjDumper.h
index 13de563469ab..aaabfa2ca2e8 100644
--- a/tools/llvm-readobj/ObjDumper.h
+++ b/tools/llvm-readobj/ObjDumper.h
@@ -1,9 +1,8 @@
 //===-- ObjDumper.h ---------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,6 +14,7 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/CommandLine.h"
 
 namespace llvm {
 namespace object {
@@ -22,8 +22,9 @@ class COFFImportFile;
 class ObjectFile;
 }
 namespace codeview {
+class GlobalTypeTableBuilder;
 class MergingTypeTableBuilder;
-}
+} // namespace codeview
 
 class ScopedPrinter;
 
@@ -35,18 +36,30 @@ public:
   virtual void printFileHeaders() = 0;
   virtual void printSectionHeaders() = 0;
   virtual void printRelocations() = 0;
-  virtual void printSymbols() = 0;
-  virtual void printDynamicSymbols() = 0;
+  virtual void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) {
+    if (PrintSymbols)
+      printSymbols();
+    if (PrintDynamicSymbols)
+      printDynamicSymbols();
+  }
+  virtual void printProgramHeaders(bool PrintProgramHeaders,
+                                   cl::boolOrDefault PrintSectionMapping) {
+    if (PrintProgramHeaders)
+      printProgramHeaders();
+    if (PrintSectionMapping == cl::BOU_TRUE)
+      printSectionMapping();
+  }
+
   virtual void printUnwindInfo() = 0;
 
   // Only implemented for ELF at this time.
   virtual void printDynamicRelocations() { }
   virtual void printDynamicTable() { }
   virtual void printNeededLibraries() { }
-  virtual void printProgramHeaders() { }
   virtual void printSectionAsHex(StringRef SectionName) {}
   virtual void printHashTable() { }
   virtual void printGnuHashTable() { }
+  virtual void printHashSymbols() {}
   virtual void printLoadName() {}
   virtual void printVersionInfo() {}
   virtual void printGroupSections() {}
@@ -76,7 +89,10 @@ public:
   virtual void printCodeViewDebugInfo() { }
   virtual void
   mergeCodeViewTypes(llvm::codeview::MergingTypeTableBuilder &CVIDs,
-                     llvm::codeview::MergingTypeTableBuilder &CVTypes) {}
+                     llvm::codeview::MergingTypeTableBuilder &CVTypes,
+                     llvm::codeview::GlobalTypeTableBuilder &GlobalCVIDs,
+                     llvm::codeview::GlobalTypeTableBuilder &GlobalCVTypes,
+                     bool GHash) {}
 
   // Only implemented for MachO.
   virtual void printMachODataInCode() { }
@@ -88,11 +104,19 @@ public:
 
   virtual void printStackMap() const = 0;
 
-  void printSectionAsString(const object::ObjectFile *Obj, StringRef SecName);
-  void printSectionAsHex(const object::ObjectFile *Obj, StringRef SecName);
+  void printSectionsAsString(const object::ObjectFile *Obj,
+                             ArrayRef<std::string> Sections);
+  void printSectionsAsHex(const object::ObjectFile *Obj,
+                          ArrayRef<std::string> Sections);
 
 protected:
   ScopedPrinter &W;
+
+private:
+  virtual void printSymbols() {}
+  virtual void printDynamicSymbols() {}
+  virtual void printProgramHeaders() {}
+  virtual void printSectionMapping() {}
 };
 
 std::error_code createCOFFDumper(const object::ObjectFile *Obj,
@@ -111,12 +135,16 @@ std::error_code createWasmDumper(const object::ObjectFile *Obj,
                                  ScopedPrinter &Writer,
                                  std::unique_ptr<ObjDumper> &Result);
 
+std::error_code createXCOFFDumper(const object::ObjectFile *Obj,
+                                  ScopedPrinter &Writer,
+                                  std::unique_ptr<ObjDumper> &Result);
+
 void dumpCOFFImportFile(const object::COFFImportFile *File,
                         ScopedPrinter &Writer);
 
-void dumpCodeViewMergedTypes(
-    ScopedPrinter &Writer, llvm::codeview::MergingTypeTableBuilder &IDTable,
-    llvm::codeview::MergingTypeTableBuilder &TypeTable);
+void dumpCodeViewMergedTypes(ScopedPrinter &Writer,
+                             ArrayRef<ArrayRef<uint8_t>> IpiRecords,
+                             ArrayRef<ArrayRef<uint8_t>> TpiRecords);
 
 } // namespace llvm
 
diff --git a/tools/llvm-readobj/StackMapPrinter.h b/tools/llvm-readobj/StackMapPrinter.h
index 77a054b178a5..ef7575640268 100644
--- a/tools/llvm-readobj/StackMapPrinter.h
+++ b/tools/llvm-readobj/StackMapPrinter.h
@@ -1,9 +1,8 @@
 //===-------- StackMapPrinter.h - Pretty-print stackmaps --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -48,24 +47,24 @@ void prettyPrintStackMap(ScopedPrinter &W, const StackMapParserT &SMP) {
       OS << "      #" << ++LocationIndex << ": ";
       switch (Loc.getKind()) {
       case StackMapParserT::LocationKind::Register:
-        OS << "Register R#" << Loc.getDwarfRegNum() << "\n";
+        OS << "Register R#" << Loc.getDwarfRegNum();
         break;
       case StackMapParserT::LocationKind::Direct:
-        OS << "Direct R#" << Loc.getDwarfRegNum() << " + " << Loc.getOffset()
-           << "\n";
+        OS << "Direct R#" << Loc.getDwarfRegNum() << " + " << Loc.getOffset();
         break;
       case StackMapParserT::LocationKind::Indirect:
         OS << "Indirect [R#" << Loc.getDwarfRegNum() << " + " << Loc.getOffset()
-           << "]\n";
+           << "]";
         break;
       case StackMapParserT::LocationKind::Constant:
-        OS << "Constant " << Loc.getSmallConstant() << "\n";
+        OS << "Constant " << Loc.getSmallConstant();
         break;
       case StackMapParserT::LocationKind::ConstantIndex:
         OS << "ConstantIndex #" << Loc.getConstantIndex() << " ("
-           << SMP.getConstant(Loc.getConstantIndex()).getValue() << ")\n";
+           << SMP.getConstant(Loc.getConstantIndex()).getValue() << ")";
         break;
       }
+      OS << ", size: " << Loc.getSizeInBytes() << "\n";
     }
 
     raw_ostream &OS = W.startLine();
diff --git a/tools/llvm-readobj/WasmDumper.cpp b/tools/llvm-readobj/WasmDumper.cpp
index 79d3db4e2d29..041a9a15bdb6 100644
--- a/tools/llvm-readobj/WasmDumper.cpp
+++ b/tools/llvm-readobj/WasmDumper.cpp
@@ -1,9 +1,8 @@
 //===-- WasmDumper.cpp - Wasm-specific object file dumper -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -33,11 +32,25 @@ static const EnumEntry<unsigned> WasmSymbolTypes[] = {
 static const EnumEntry<uint32_t> WasmSectionTypes[] = {
 #define ENUM_ENTRY(X)                                                          \
   { #X, wasm::WASM_SEC_##X }
-    ENUM_ENTRY(CUSTOM),   ENUM_ENTRY(TYPE),  ENUM_ENTRY(IMPORT),
-    ENUM_ENTRY(FUNCTION), ENUM_ENTRY(TABLE), ENUM_ENTRY(MEMORY),
-    ENUM_ENTRY(GLOBAL),   ENUM_ENTRY(EVENT), ENUM_ENTRY(EXPORT),
-    ENUM_ENTRY(START),    ENUM_ENTRY(ELEM),  ENUM_ENTRY(CODE),
-    ENUM_ENTRY(DATA),
+    ENUM_ENTRY(CUSTOM),   ENUM_ENTRY(TYPE),      ENUM_ENTRY(IMPORT),
+    ENUM_ENTRY(FUNCTION), ENUM_ENTRY(TABLE),     ENUM_ENTRY(MEMORY),
+    ENUM_ENTRY(GLOBAL),   ENUM_ENTRY(EVENT),     ENUM_ENTRY(EXPORT),
+    ENUM_ENTRY(START),    ENUM_ENTRY(ELEM),      ENUM_ENTRY(CODE),
+    ENUM_ENTRY(DATA),     ENUM_ENTRY(DATACOUNT),
+#undef ENUM_ENTRY
+};
+
+static const EnumEntry<unsigned> WasmSymbolFlags[] = {
+#define ENUM_ENTRY(X)                                                          \
+  { #X, wasm::WASM_SYMBOL_##X }
+  ENUM_ENTRY(BINDING_GLOBAL),
+  ENUM_ENTRY(BINDING_WEAK),
+  ENUM_ENTRY(BINDING_LOCAL),
+  ENUM_ENTRY(VISIBILITY_DEFAULT),
+  ENUM_ENTRY(VISIBILITY_HIDDEN),
+  ENUM_ENTRY(UNDEFINED),
+  ENUM_ENTRY(EXPORTED),
+  ENUM_ENTRY(EXPLICIT_NAME),
 #undef ENUM_ENTRY
 };
 
@@ -49,8 +62,6 @@ public:
   void printFileHeaders() override;
   void printSectionHeaders() override;
   void printRelocations() override;
-  void printSymbols() override;
-  void printDynamicSymbols() override { llvm_unreachable("unimplemented"); }
   void printUnwindInfo() override { llvm_unreachable("unimplemented"); }
   void printStackMap() const override { llvm_unreachable("unimplemented"); }
 
@@ -59,6 +70,9 @@ protected:
   void printRelocation(const SectionRef &Section, const RelocationRef &Reloc);
 
 private:
+  void printSymbols() override;
+  void printDynamicSymbols() override { llvm_unreachable("unimplemented"); }
+
   const WasmObjectFile *Obj;
 };
 
@@ -80,11 +94,11 @@ void WasmDumper::printRelocation(const SectionRef &Section,
 
   bool HasAddend = false;
   switch (RelocType) {
-  case wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB:
-  case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
-  case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32:
-  case wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
-  case wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32:
+  case wasm::R_WASM_MEMORY_ADDR_LEB:
+  case wasm::R_WASM_MEMORY_ADDR_SLEB:
+  case wasm::R_WASM_MEMORY_ADDR_I32:
+  case wasm::R_WASM_FUNCTION_OFFSET_I32:
+  case wasm::R_WASM_SECTION_OFFSET_I32:
     HasAddend = true;
     break;
   default:
@@ -209,7 +223,19 @@ void WasmDumper::printSymbol(const SymbolRef &Sym) {
   WasmSymbol Symbol = Obj->getWasmSymbol(Sym.getRawDataRefImpl());
   W.printString("Name", Symbol.Info.Name);
   W.printEnum("Type", Symbol.Info.Kind, makeArrayRef(WasmSymbolTypes));
-  W.printHex("Flags", Symbol.Info.Flags);
+  W.printFlags("Flags", Symbol.Info.Flags, makeArrayRef(WasmSymbolFlags));
+
+  if (Symbol.Info.Flags & wasm::WASM_SYMBOL_UNDEFINED) {
+    W.printString("ImportName", Symbol.Info.ImportName);
+    W.printString("ImportModule", Symbol.Info.ImportModule);
+  }
+  if (Symbol.Info.Kind != wasm::WASM_SYMBOL_TYPE_DATA) {
+    W.printHex("ElementIndex", Symbol.Info.ElementIndex);
+  } else if (!(Symbol.Info.Flags & wasm::WASM_SYMBOL_UNDEFINED)) {
+    W.printHex("Offset", Symbol.Info.DataRef.Offset);
+    W.printHex("Segment", Symbol.Info.DataRef.Segment);
+    W.printHex("Size", Symbol.Info.DataRef.Size);
+  }
 }
 
 } // namespace
@@ -219,7 +245,7 @@ namespace llvm {
 std::error_code createWasmDumper(const object::ObjectFile *Obj,
                                  ScopedPrinter &Writer,
                                  std::unique_ptr<ObjDumper> &Result) {
-  const WasmObjectFile *WasmObj = dyn_cast<WasmObjectFile>(Obj);
+  const auto *WasmObj = dyn_cast<WasmObjectFile>(Obj);
   assert(WasmObj && "createWasmDumper called with non-wasm object");
 
   Result.reset(new WasmDumper(WasmObj, Writer));
diff --git a/tools/llvm-readobj/Win64EHDumper.cpp b/tools/llvm-readobj/Win64EHDumper.cpp
index f7e56b361542..e64b8f157180 100644
--- a/tools/llvm-readobj/Win64EHDumper.cpp
+++ b/tools/llvm-readobj/Win64EHDumper.cpp
@@ -1,9 +1,8 @@
 //===- Win64EHDumper.cpp - Win64 EH Printer ---------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-readobj/Win64EHDumper.h b/tools/llvm-readobj/Win64EHDumper.h
index 772f68bf283f..97458c916bec 100644
--- a/tools/llvm-readobj/Win64EHDumper.h
+++ b/tools/llvm-readobj/Win64EHDumper.h
@@ -1,9 +1,8 @@
 //===- Win64EHDumper.h - Win64 EH Printing ----------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-readobj/WindowsResourceDumper.cpp b/tools/llvm-readobj/WindowsResourceDumper.cpp
index 1f568a963671..13989f696d9d 100644
--- a/tools/llvm-readobj/WindowsResourceDumper.cpp
+++ b/tools/llvm-readobj/WindowsResourceDumper.cpp
@@ -1,9 +1,8 @@
 //===-- WindowsResourceDumper.cpp - Windows Resource printer --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-readobj/WindowsResourceDumper.h b/tools/llvm-readobj/WindowsResourceDumper.h
index ca6da4046605..6a5878804eb1 100644
--- a/tools/llvm-readobj/WindowsResourceDumper.h
+++ b/tools/llvm-readobj/WindowsResourceDumper.h
@@ -1,9 +1,8 @@
 //===- WindowsResourceDumper.h - Windows Resource printer -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/tools/llvm-readobj/XCOFFDumper.cpp b/tools/llvm-readobj/XCOFFDumper.cpp
new file mode 100644
index 000000000000..6f260f91537f
--- /dev/null
+++ b/tools/llvm-readobj/XCOFFDumper.cpp
@@ -0,0 +1,190 @@
+//===-- XCOFFDumper.cpp - XCOFF dumping utility -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an XCOFF specific dumper for llvm-readobj.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Error.h"
+#include "ObjDumper.h"
+#include "llvm-readobj.h"
+#include "llvm/Object/XCOFFObjectFile.h"
+#include "llvm/Support/ScopedPrinter.h"
+
+using namespace llvm;
+using namespace object;
+
+namespace {
+
+class XCOFFDumper : public ObjDumper {
+public:
+  XCOFFDumper(const XCOFFObjectFile &Obj, ScopedPrinter &Writer)
+      : ObjDumper(Writer), Obj(Obj) {}
+
+  void printFileHeaders() override;
+  void printSectionHeaders() override;
+  void printRelocations() override;
+  void printSymbols() override;
+  void printDynamicSymbols() override;
+  void printUnwindInfo() override;
+  void printStackMap() const override;
+  void printNeededLibraries() override;
+
+private:
+  template <typename T> void printSectionHeaders(ArrayRef<T> Sections);
+
+  const XCOFFObjectFile &Obj;
+
+  // Least significant 3 bits are reserved.
+  static constexpr unsigned SectionFlagsReservedMask = 0x7;
+};
+} // anonymous namespace
+
+void XCOFFDumper::printFileHeaders() {
+  DictScope DS(W, "FileHeader");
+  W.printHex("Magic", Obj.getMagic());
+  W.printNumber("NumberOfSections", Obj.getNumberOfSections());
+
+  // Negative timestamp values are reserved for future use.
+  int32_t TimeStamp = Obj.getTimeStamp();
+  if (TimeStamp > 0) {
+    // This handling of the time stamp assumes that the host system's time_t is
+    // compatible with AIX time_t. If a platform is not compatible, the lit
+    // tests will let us know.
+    time_t TimeDate = TimeStamp;
+
+    char FormattedTime[21] = {};
+    size_t BytesWritten =
+        strftime(FormattedTime, 21, "%Y-%m-%dT%H:%M:%SZ", gmtime(&TimeDate));
+    if (BytesWritten)
+      W.printHex("TimeStamp", FormattedTime, TimeStamp);
+    else
+      W.printHex("Timestamp", TimeStamp);
+  } else {
+    W.printHex("TimeStamp", TimeStamp == 0 ? "None" : "Reserved Value",
+               TimeStamp);
+  }
+
+  // The number of symbol table entries is an unsigned value in 64-bit objects
+  // and a signed value (with negative values being 'reserved') in 32-bit
+  // objects.
+  if (Obj.is64Bit()) {
+    W.printHex("SymbolTableOffset", Obj.getSymbolTableOffset64());
+    W.printNumber("SymbolTableEntries", Obj.getNumberOfSymbolTableEntries64());
+  } else {
+    W.printHex("SymbolTableOffset", Obj.getSymbolTableOffset32());
+    int32_t SymTabEntries = Obj.getRawNumberOfSymbolTableEntries32();
+    if (SymTabEntries >= 0)
+      W.printNumber("SymbolTableEntries", SymTabEntries);
+    else
+      W.printHex("SymbolTableEntries", "Reserved Value", SymTabEntries);
+  }
+
+  W.printHex("OptionalHeaderSize", Obj.getOptionalHeaderSize());
+  W.printHex("Flags", Obj.getFlags());
+
+  // TODO FIXME Add support for the auxiliary header (if any) once
+  // XCOFFObjectFile has the necessary support.
+}
+
+void XCOFFDumper::printSectionHeaders() {
+  if (Obj.is64Bit())
+    printSectionHeaders(Obj.sections64());
+  else
+    printSectionHeaders(Obj.sections32());
+}
+
+void XCOFFDumper::printRelocations() {
+  llvm_unreachable("Unimplemented functionality for XCOFFDumper");
+}
+
+void XCOFFDumper::printSymbols() {
+  llvm_unreachable("Unimplemented functionality for XCOFFDumper");
+}
+
+void XCOFFDumper::printDynamicSymbols() {
+  llvm_unreachable("Unimplemented functionality for XCOFFDumper");
+}
+
+void XCOFFDumper::printUnwindInfo() {
+  llvm_unreachable("Unimplemented functionality for XCOFFDumper");
+}
+
+void XCOFFDumper::printStackMap() const {
+  llvm_unreachable("Unimplemented functionality for XCOFFDumper");
+}
+
+void XCOFFDumper::printNeededLibraries() {
+  llvm_unreachable("Unimplemented functionality for XCOFFDumper");
+}
+
+static const EnumEntry<XCOFF::SectionTypeFlags> SectionTypeFlagsNames[] = {
+#define ECase(X)                                                               \
+  { #X, XCOFF::X }
+    ECase(STYP_PAD),    ECase(STYP_DWARF), ECase(STYP_TEXT),
+    ECase(STYP_DATA),   ECase(STYP_BSS),   ECase(STYP_EXCEPT),
+    ECase(STYP_INFO),   ECase(STYP_TDATA), ECase(STYP_TBSS),
+    ECase(STYP_LOADER), ECase(STYP_DEBUG), ECase(STYP_TYPCHK),
+    ECase(STYP_OVRFLO)
+#undef ECase
+};
+
+template <typename T>
+void XCOFFDumper::printSectionHeaders(ArrayRef<T> Sections) {
+  ListScope Group(W, "Sections");
+
+  uint16_t Index = 1;
+  for (const T &Sec : Sections) {
+    DictScope SecDS(W, "Section");
+
+    W.printNumber("Index", Index++);
+    W.printString("Name", Sec.getName());
+
+    W.printHex("PhysicalAddress", Sec.PhysicalAddress);
+    W.printHex("VirtualAddress", Sec.VirtualAddress);
+    W.printHex("Size", Sec.SectionSize);
+    W.printHex("RawDataOffset", Sec.FileOffsetToRawData);
+    W.printHex("RelocationPointer", Sec.FileOffsetToRelocationInfo);
+    W.printHex("LineNumberPointer", Sec.FileOffsetToLineNumberInfo);
+
+    // TODO Need to add overflow handling when NumberOfX == _OVERFLOW_MARKER
+    // in 32-bit object files.
+    W.printNumber("NumberOfRelocations", Sec.NumberOfRelocations);
+    W.printNumber("NumberOfLineNumbers", Sec.NumberOfLineNumbers);
+
+    // The most significant 16-bits represent the DWARF section subtype. For
+    // now we just dump the section type flags.
+    uint16_t Flags = Sec.Flags & 0xffffu;
+    if (Flags & SectionFlagsReservedMask)
+      W.printHex("Flags", "Reserved", Flags);
+    else
+      W.printEnum("Type", Flags, makeArrayRef(SectionTypeFlagsNames));
+  }
+
+  if (opts::SectionRelocations)
+    report_fatal_error("Dumping section relocations is unimplemented");
+
+  if (opts::SectionSymbols)
+    report_fatal_error("Dumping symbols is unimplemented");
+
+  if (opts::SectionData)
+    report_fatal_error("Dumping section data is unimplemented");
+}
+
+namespace llvm {
+std::error_code createXCOFFDumper(const object::ObjectFile *Obj,
+                                  ScopedPrinter &Writer,
+                                  std::unique_ptr<ObjDumper> &Result) {
+  const XCOFFObjectFile *XObj = dyn_cast<XCOFFObjectFile>(Obj);
+  if (!XObj)
+    return readobj_error::unsupported_obj_file_format;
+
+  Result.reset(new XCOFFDumper(*XObj, Writer));
+  return readobj_error::success;
+}
+} // namespace llvm
diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp
index 81ce7a590364..1bd5bb74bf29 100644
--- a/tools/llvm-readobj/llvm-readobj.cpp
+++ b/tools/llvm-readobj/llvm-readobj.cpp
@@ -1,9 +1,8 @@
 //===- llvm-readobj.cpp - Dump contents of an Object File -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -23,6 +22,7 @@
 #include "Error.h"
 #include "ObjDumper.h"
 #include "WindowsResourceDumper.h"
+#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
 #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/COFFImportFile.h"
@@ -39,6 +39,7 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/WithColor.h"
 
 using namespace llvm;
 using namespace llvm::object;
@@ -48,7 +49,7 @@ namespace opts {
     cl::desc("<input object files>"),
     cl::ZeroOrMore);
 
-  // -all, -a
+  // --all, -a
   cl::opt<bool>
       All("all",
           cl::desc("Equivalent to setting: --file-headers, --program-headers, "
@@ -65,7 +66,7 @@ namespace opts {
   cl::alias HeadersShort("e", cl::desc("Alias for --headers"),
      cl::aliasopt(Headers));
 
-  // -wide, -W
+  // --wide, -W
   cl::opt<bool>
       WideOutput("wide", cl::desc("Ignored for compatibility with GNU readelf"),
                  cl::Hidden);
@@ -73,7 +74,7 @@ namespace opts {
     cl::desc("Alias for --wide"),
     cl::aliasopt(WideOutput));
 
-  // -file-headers, -file-header, -h
+  // --file-headers, --file-header, -h
   cl::opt<bool> FileHeaders("file-headers",
     cl::desc("Display file headers "));
   cl::alias FileHeadersShort("h", cl::desc("Alias for --file-headers"),
@@ -82,7 +83,7 @@ namespace opts {
                                 cl::desc("Alias for --file-headers"),
                                 cl::aliasopt(FileHeaders));
 
-  // -section-headers, -sections, -S
+  // --section-headers, --sections, -S
   // Also -s in llvm-readobj mode.
   cl::opt<bool> SectionHeaders("section-headers",
                                cl::desc("Display all section headers."));
@@ -92,22 +93,27 @@ namespace opts {
                                 cl::desc("Alias for --section-headers"),
                                 cl::aliasopt(SectionHeaders), cl::NotHidden);
 
-  // -section-relocations
-  // Also -sr in llvm-readobj mode.
+  // --section-relocations
+  // Also --sr in llvm-readobj mode.
   cl::opt<bool> SectionRelocations("section-relocations",
     cl::desc("Display relocations for each section shown."));
 
-  // -section-symbols
-  // Also -st in llvm-readobj mode.
+  // --section-symbols
+  // Also --st in llvm-readobj mode.
   cl::opt<bool> SectionSymbols("section-symbols",
     cl::desc("Display symbols for each section shown."));
 
-  // -section-data
-  // Also -sd in llvm-readobj mode.
+  // --section-data
+  // Also --sd in llvm-readobj mode.
   cl::opt<bool> SectionData("section-data",
     cl::desc("Display section data for each section shown."));
 
-  // -relocations, -relocs, -r
+  // --section-mapping
+  cl::opt<cl::boolOrDefault>
+      SectionMapping("section-mapping",
+                     cl::desc("Display the section to segment mapping."));
+
+  // --relocations, --relocs, -r
   cl::opt<bool> Relocations("relocations",
     cl::desc("Display the relocation entries in the file"));
   cl::alias RelocationsShort("r", cl::desc("Alias for --relocations"),
@@ -115,36 +121,43 @@ namespace opts {
   cl::alias RelocationsGNU("relocs", cl::desc("Alias for --relocations"),
                            cl::aliasopt(Relocations));
 
-  // -notes, -n
+  // --notes, -n
   cl::opt<bool> Notes("notes", cl::desc("Display the ELF notes in the file"));
   cl::alias NotesShort("n", cl::desc("Alias for --notes"), cl::aliasopt(Notes));
 
-  // -dyn-relocations
+  // --dyn-relocations
   cl::opt<bool> DynRelocs("dyn-relocations",
     cl::desc("Display the dynamic relocation entries in the file"));
 
-  // -symbols
+  // --symbols
   // Also -s in llvm-readelf mode, or -t in llvm-readobj mode.
-  cl::opt<bool> Symbols("symbols",
-    cl::desc("Display the symbol table"));
+  cl::opt<bool>
+      Symbols("symbols",
+              cl::desc("Display the symbol table. Also display the dynamic "
+                       "symbol table when using GNU output style for ELF"));
   cl::alias SymbolsGNU("syms", cl::desc("Alias for --symbols"),
                        cl::aliasopt(Symbols));
 
-  // -dyn-symbols, -dyn-syms
-  // Also -dt in llvm-readobj mode.
+  // --dyn-symbols, --dyn-syms
+  // Also --dt in llvm-readobj mode.
   cl::opt<bool> DynamicSymbols("dyn-symbols",
     cl::desc("Display the dynamic symbol table"));
   cl::alias DynSymsGNU("dyn-syms", cl::desc("Alias for --dyn-symbols"),
                        cl::aliasopt(DynamicSymbols));
 
-  // -unwind, -u
+  // --hash-symbols
+  cl::opt<bool> HashSymbols(
+      "hash-symbols",
+      cl::desc("Display the dynamic symbols derived from the hash section"));
+
+  // --unwind, -u
   cl::opt<bool> UnwindInfo("unwind",
     cl::desc("Display unwind information"));
   cl::alias UnwindInfoShort("u",
     cl::desc("Alias for --unwind"),
     cl::aliasopt(UnwindInfo));
 
-  // -dynamic-table, -dynamic, -d
+  // --dynamic-table, --dynamic, -d
   cl::opt<bool> DynamicTable("dynamic-table",
     cl::desc("Display the ELF .dynamic section table"));
   cl::alias DynamicTableShort("d", cl::desc("Alias for --dynamic-table"),
@@ -152,11 +165,11 @@ namespace opts {
   cl::alias DynamicTableAlias("dynamic", cl::desc("Alias for --dynamic-table"),
                               cl::aliasopt(DynamicTable));
 
-  // -needed-libs
+  // --needed-libs
   cl::opt<bool> NeededLibraries("needed-libs",
     cl::desc("Display the needed libraries"));
 
-  // -program-headers, -segments, -l
+  // --program-headers, --segments, -l
   cl::opt<bool> ProgramHeaders("program-headers",
     cl::desc("Display ELF program headers"));
   cl::alias ProgramHeadersShort("l", cl::desc("Alias for --program-headers"),
@@ -164,149 +177,161 @@ namespace opts {
   cl::alias SegmentsAlias("segments", cl::desc("Alias for --program-headers"),
                           cl::aliasopt(ProgramHeaders));
 
-  // -string-dump, -p
+  // --string-dump, -p
   cl::list<std::string> StringDump("string-dump", cl::desc("<number|name>"),
                                    cl::ZeroOrMore);
   cl::alias StringDumpShort("p", cl::desc("Alias for --string-dump"),
-                            cl::aliasopt(StringDump));
+                            cl::aliasopt(StringDump), cl::Prefix);
 
-  // -hex-dump, -x
+  // --hex-dump, -x
   cl::list<std::string> HexDump("hex-dump", cl::desc("<number|name>"),
                                 cl::ZeroOrMore);
   cl::alias HexDumpShort("x", cl::desc("Alias for --hex-dump"),
-                         cl::aliasopt(HexDump));
+                         cl::aliasopt(HexDump), cl::Prefix);
 
-  // -hash-table
+  // --demangle, -C
+  cl::opt<bool> Demangle("demangle",
+                         cl::desc("Demangle symbol names in output"));
+  cl::alias DemangleShort("C", cl::desc("Alias for --demangle"),
+                          cl::aliasopt(Demangle), cl::NotHidden);
+
+  // --hash-table
   cl::opt<bool> HashTable("hash-table",
     cl::desc("Display ELF hash table"));
 
-  // -gnu-hash-table
+  // --gnu-hash-table
   cl::opt<bool> GnuHashTable("gnu-hash-table",
     cl::desc("Display ELF .gnu.hash section"));
 
-  // -expand-relocs
+  // --expand-relocs
   cl::opt<bool> ExpandRelocs("expand-relocs",
     cl::desc("Expand each shown relocation to multiple lines"));
 
-  // -raw-relr
+  // --raw-relr
   cl::opt<bool> RawRelr("raw-relr",
     cl::desc("Do not decode relocations in SHT_RELR section, display raw contents"));
 
-  // -codeview
+  // --codeview
   cl::opt<bool> CodeView("codeview",
                          cl::desc("Display CodeView debug information"));
 
-  // -codeview-merged-types
+  // --codeview-merged-types
   cl::opt<bool>
       CodeViewMergedTypes("codeview-merged-types",
                           cl::desc("Display the merged CodeView type stream"));
 
-  // -codeview-subsection-bytes
+  // --codeview-ghash
+  cl::opt<bool> CodeViewEnableGHash(
+      "codeview-ghash",
+      cl::desc(
+          "Enable global hashing for CodeView type stream de-duplication"));
+
+  // --codeview-subsection-bytes
   cl::opt<bool> CodeViewSubsectionBytes(
       "codeview-subsection-bytes",
       cl::desc("Dump raw contents of codeview debug sections and records"));
 
-  // -arm-attributes
+  // --arm-attributes
   cl::opt<bool> ARMAttributes("arm-attributes",
                               cl::desc("Display the ARM attributes section"));
 
-  // -mips-plt-got
+  // --mips-plt-got
   cl::opt<bool>
   MipsPLTGOT("mips-plt-got",
              cl::desc("Display the MIPS GOT and PLT GOT sections"));
 
-  // -mips-abi-flags
+  // --mips-abi-flags
   cl::opt<bool> MipsABIFlags("mips-abi-flags",
                              cl::desc("Display the MIPS.abiflags section"));
 
-  // -mips-reginfo
+  // --mips-reginfo
   cl::opt<bool> MipsReginfo("mips-reginfo",
                             cl::desc("Display the MIPS .reginfo section"));
 
-  // -mips-options
+  // --mips-options
   cl::opt<bool> MipsOptions("mips-options",
                             cl::desc("Display the MIPS .MIPS.options section"));
 
-  // -coff-imports
+  // --coff-imports
   cl::opt<bool>
   COFFImports("coff-imports", cl::desc("Display the PE/COFF import table"));
 
-  // -coff-exports
+  // --coff-exports
   cl::opt<bool>
   COFFExports("coff-exports", cl::desc("Display the PE/COFF export table"));
 
-  // -coff-directives
+  // --coff-directives
   cl::opt<bool>
   COFFDirectives("coff-directives",
                  cl::desc("Display the PE/COFF .drectve section"));
 
-  // -coff-basereloc
+  // --coff-basereloc
   cl::opt<bool>
   COFFBaseRelocs("coff-basereloc",
                  cl::desc("Display the PE/COFF .reloc section"));
 
-  // -coff-debug-directory
+  // --coff-debug-directory
   cl::opt<bool>
   COFFDebugDirectory("coff-debug-directory",
                      cl::desc("Display the PE/COFF debug directory"));
 
-  // -coff-resources
+  // --coff-resources
   cl::opt<bool> COFFResources("coff-resources",
                               cl::desc("Display the PE/COFF .rsrc section"));
 
-  // -coff-load-config
+  // --coff-load-config
   cl::opt<bool>
   COFFLoadConfig("coff-load-config",
                  cl::desc("Display the PE/COFF load config"));
 
-  // -elf-linker-options
+  // --elf-linker-options
   cl::opt<bool>
   ELFLinkerOptions("elf-linker-options",
                    cl::desc("Display the ELF .linker-options section"));
 
-  // -macho-data-in-code
+  // --macho-data-in-code
   cl::opt<bool>
   MachODataInCode("macho-data-in-code",
                   cl::desc("Display MachO Data in Code command"));
 
-  // -macho-indirect-symbols
+  // --macho-indirect-symbols
   cl::opt<bool>
   MachOIndirectSymbols("macho-indirect-symbols",
                   cl::desc("Display MachO indirect symbols"));
 
-  // -macho-linker-options
+  // --macho-linker-options
   cl::opt<bool>
   MachOLinkerOptions("macho-linker-options",
                   cl::desc("Display MachO linker options"));
 
-  // -macho-segment
+  // --macho-segment
   cl::opt<bool>
   MachOSegment("macho-segment",
                   cl::desc("Display MachO Segment command"));
 
-  // -macho-version-min
+  // --macho-version-min
   cl::opt<bool>
   MachOVersionMin("macho-version-min",
                   cl::desc("Display MachO version min command"));
 
-  // -macho-dysymtab
+  // --macho-dysymtab
   cl::opt<bool>
   MachODysymtab("macho-dysymtab",
                   cl::desc("Display MachO Dysymtab command"));
 
-  // -stackmap
+  // --stackmap
   cl::opt<bool>
   PrintStackMap("stackmap",
                 cl::desc("Display contents of stackmap section"));
 
-  // -version-info, -V
+  // --version-info, -V
   cl::opt<bool>
       VersionInfo("version-info",
                   cl::desc("Display ELF version sections (if present)"));
   cl::alias VersionInfoShort("V", cl::desc("Alias for -version-info"),
                              cl::aliasopt(VersionInfo));
 
-  // -elf-section-groups, -section-groups, -g
+  // --elf-section-groups, --section-groups, -g
   cl::opt<bool> SectionGroups("elf-section-groups",
                               cl::desc("Display ELF section group contents"));
   cl::alias SectionGroupsAlias("section-groups",
@@ -315,7 +340,7 @@ namespace opts {
   cl::alias SectionGroupsShort("g", cl::desc("Alias for -elf-sections-groups"),
                                cl::aliasopt(SectionGroups));
 
-  // -elf-hash-histogram, -histogram, -I
+  // --elf-hash-histogram, --histogram, -I
   cl::opt<bool> HashHistogram(
       "elf-hash-histogram",
       cl::desc("Display bucket list histogram for hash sections"));
@@ -325,7 +350,7 @@ namespace opts {
                            cl::desc("Alias for --elf-hash-histogram"),
                            cl::aliasopt(HashHistogram));
 
-  // -elf-cg-profile
+  // --elf-cg-profile
   cl::opt<bool> CGProfile("elf-cg-profile", cl::desc("Display callgraph profile section"));
 
   // -addrsig
@@ -338,16 +363,38 @@ namespace opts {
              cl::values(clEnumVal(LLVM, "LLVM default style"),
                         clEnumVal(GNU, "GNU readelf style")),
              cl::init(LLVM));
+
+  cl::extrahelp
+      HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
 } // namespace opts
 
 namespace llvm {
 
 LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg) {
-  errs() << "\nError reading file: " << Msg << ".\n";
-  errs().flush();
+  fouts().flush();
+  errs() << "\n";
+  WithColor::error(errs()) << Msg << "\n";
   exit(1);
 }
 
+void reportError(StringRef Input, Error Err) {
+  if (Input == "-")
+    Input = "<stdin>";
+  error(createFileError(Input, std::move(Err)));
+}
+
+void reportWarning(Twine Msg) {
+  fouts().flush();
+  errs() << "\n";
+  WithColor::warning(errs()) << Msg << "\n";
+}
+
+void warn(Error Err) {
+  handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) {
+    reportWarning(EI.message());
+  });
+}
+
 void error(Error EC) {
   if (!EC)
     return;
@@ -361,28 +408,10 @@ void error(std::error_code EC) {
   reportError(EC.message());
 }
 
-bool relocAddressLess(RelocationRef a, RelocationRef b) {
-  return a.getOffset() < b.getOffset();
-}
-
 } // namespace llvm
 
 static void reportError(StringRef Input, std::error_code EC) {
-  if (Input == "-")
-    Input = "<stdin>";
-
-  reportError(Twine(Input) + ": " + EC.message());
-}
-
-static void reportError(StringRef Input, Error Err) {
-  if (Input == "-")
-    Input = "<stdin>";
-  std::string ErrMsg;
-  {
-    raw_string_ostream ErrStream(ErrMsg);
-    logAllUnhandledErrors(std::move(Err), ErrStream, Input + ": ");
-  }
-  reportError(ErrMsg);
+  reportError(Input, errorCodeToError(EC));
 }
 
 static bool isMipsArch(unsigned Arch) {
@@ -399,13 +428,17 @@ static bool isMipsArch(unsigned Arch) {
 namespace {
 struct ReadObjTypeTableBuilder {
   ReadObjTypeTableBuilder()
-      : Allocator(), IDTable(Allocator), TypeTable(Allocator) {}
+      : Allocator(), IDTable(Allocator), TypeTable(Allocator),
+        GlobalIDTable(Allocator), GlobalTypeTable(Allocator) {}
 
   llvm::BumpPtrAllocator Allocator;
   llvm::codeview::MergingTypeTableBuilder IDTable;
   llvm::codeview::MergingTypeTableBuilder TypeTable;
+  llvm::codeview::GlobalTypeTableBuilder GlobalIDTable;
+  llvm::codeview::GlobalTypeTableBuilder GlobalTypeTable;
+  std::vector<OwningBinary<Binary>> Binaries;
 };
-}
+} // namespace
 static ReadObjTypeTableBuilder CVTypes;
 
 /// Creates an format-specific object file dumper.
@@ -423,25 +456,34 @@ static std::error_code createDumper(const ObjectFile *Obj,
     return createMachODumper(Obj, Writer, Result);
   if (Obj->isWasm())
     return createWasmDumper(Obj, Writer, Result);
+  if (Obj->isXCOFF())
+    return createXCOFFDumper(Obj, Writer, Result);
 
   return readobj_error::unsupported_obj_file_format;
 }
 
 /// Dumps the specified object file.
-static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer) {
+static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer,
+                       const Archive *A = nullptr) {
+  std::string FileStr =
+          A ? Twine(A->getFileName() + "(" + Obj->getFileName() + ")").str()
+            : Obj->getFileName().str();
+
   std::unique_ptr<ObjDumper> Dumper;
   if (std::error_code EC = createDumper(Obj, Writer, Dumper))
-    reportError(Obj->getFileName(), EC);
+    reportError(FileStr, EC);
 
+  Writer.startLine() << "\n";
   if (opts::Output == opts::LLVM) {
-    Writer.startLine() << "\n";
-    Writer.printString("File", Obj->getFileName());
+    Writer.printString("File", FileStr);
     Writer.printString("Format", Obj->getFileFormatName());
     Writer.printString("Arch", Triple::getArchTypeName(
                                    (llvm::Triple::ArchType)Obj->getArch()));
     Writer.printString("AddressSize",
                        formatv("{0}bit", 8 * Obj->getBytesInAddress()));
     Dumper->printLoadName();
+  } else if (opts::Output == opts::GNU && A) {
+    Writer.printString("File", FileStr);
   }
 
   if (opts::FileHeaders)
@@ -452,26 +494,22 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer) {
     Dumper->printRelocations();
   if (opts::DynRelocs)
     Dumper->printDynamicRelocations();
-  if (opts::Symbols)
-    Dumper->printSymbols();
-  if (opts::DynamicSymbols)
-    Dumper->printDynamicSymbols();
+  if (opts::Symbols || opts::DynamicSymbols)
+    Dumper->printSymbols(opts::Symbols, opts::DynamicSymbols);
+  if (opts::HashSymbols)
+    Dumper->printHashSymbols();
   if (opts::UnwindInfo)
     Dumper->printUnwindInfo();
   if (opts::DynamicTable)
     Dumper->printDynamicTable();
   if (opts::NeededLibraries)
     Dumper->printNeededLibraries();
-  if (opts::ProgramHeaders)
-    Dumper->printProgramHeaders();
+  if (opts::ProgramHeaders || opts::SectionMapping == cl::BOU_TRUE)
+    Dumper->printProgramHeaders(opts::ProgramHeaders, opts::SectionMapping);
   if (!opts::StringDump.empty())
-    llvm::for_each(opts::StringDump, [&Dumper, Obj](StringRef SectionName) {
-      Dumper->printSectionAsString(Obj, SectionName);
-    });
+    Dumper->printSectionsAsString(Obj, opts::StringDump);
   if (!opts::HexDump.empty())
-    llvm::for_each(opts::HexDump, [&Dumper, Obj](StringRef SectionName) {
-      Dumper->printSectionAsHex(Obj, SectionName);
-    });
+    Dumper->printSectionsAsHex(Obj, opts::HexDump);
   if (opts::HashTable)
     Dumper->printHashTable();
   if (opts::GnuHashTable)
@@ -525,7 +563,9 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer) {
     if (opts::CodeView)
       Dumper->printCodeViewDebugInfo();
     if (opts::CodeViewMergedTypes)
-      Dumper->mergeCodeViewTypes(CVTypes.IDTable, CVTypes.TypeTable);
+      Dumper->mergeCodeViewTypes(CVTypes.IDTable, CVTypes.TypeTable,
+                                 CVTypes.GlobalIDTable, CVTypes.GlobalTypeTable,
+                                 opts::CodeViewEnableGHash);
   }
   if (Obj->isMachO()) {
     if (opts::MachODataInCode)
@@ -552,12 +592,12 @@ static void dumpArchive(const Archive *Arc, ScopedPrinter &Writer) {
     Expected<std::unique_ptr<Binary>> ChildOrErr = Child.getAsBinary();
     if (!ChildOrErr) {
       if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) {
-        reportError(Arc->getFileName(), ChildOrErr.takeError());
+        reportError(Arc->getFileName(), std::move(E));
       }
       continue;
     }
     if (ObjectFile *Obj = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
-      dumpObject(Obj, Writer);
+      dumpObject(Obj, Writer, Arc);
     else if (COFFImportFile *Imp = dyn_cast<COFFImportFile>(&*ChildOrErr.get()))
       dumpCOFFImportFile(Imp, Writer);
     else
@@ -583,8 +623,8 @@ static void dumpMachOUniversalBinary(const MachOUniversalBinary *UBinary,
 }
 
 /// Dumps \a WinRes, Windows Resource (.res) file;
-static void dumpWindowsResourceFile(WindowsResource *WinRes) {
-  ScopedPrinter Printer{outs()};
+static void dumpWindowsResourceFile(WindowsResource *WinRes,
+                                    ScopedPrinter &Printer) {
   WindowsRes::Dumper Dumper(WinRes, Printer);
   if (auto Err = Dumper.printData())
     reportError(WinRes->getFileName(), std::move(Err));
@@ -592,9 +632,7 @@ static void dumpWindowsResourceFile(WindowsResource *WinRes) {
 
 
 /// Opens \a File and dumps it.
-static void dumpInput(StringRef File) {
-  ScopedPrinter Writer(outs());
-
+static void dumpInput(StringRef File, ScopedPrinter &Writer) {
   // Attempt to open the binary.
   Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(File);
   if (!BinaryOrErr)
@@ -611,9 +649,11 @@ static void dumpInput(StringRef File) {
   else if (COFFImportFile *Import = dyn_cast<COFFImportFile>(&Binary))
     dumpCOFFImportFile(Import, Writer);
   else if (WindowsResource *WinRes = dyn_cast<WindowsResource>(&Binary))
-    dumpWindowsResourceFile(WinRes);
+    dumpWindowsResourceFile(WinRes, Writer);
   else
     reportError(File, readobj_error::unrecognized_file_format);
+
+  CVTypes.Binaries.push_back(std::move(*BinaryOrErr));
 }
 
 /// Registers aliases that should only be allowed by readobj.
@@ -656,7 +696,7 @@ static void registerReadelfAliases() {
     StringRef ArgName = OptEntry.getKey();
     cl::Option *Option = OptEntry.getValue();
     if (ArgName.size() == 1)
-      Option->setFormattingFlag(cl::Grouping);
+      apply(Option, cl::Grouping);
   }
 }
 
@@ -699,11 +739,17 @@ int main(int argc, const char *argv[]) {
   if (opts::InputFilenames.empty())
     opts::InputFilenames.push_back("-");
 
-  llvm::for_each(opts::InputFilenames, dumpInput);
+  ScopedPrinter Writer(fouts());
+  for (const std::string &I : opts::InputFilenames)
+    dumpInput(I, Writer);
 
   if (opts::CodeViewMergedTypes) {
-    ScopedPrinter W(outs());
-    dumpCodeViewMergedTypes(W, CVTypes.IDTable, CVTypes.TypeTable);
+    if (opts::CodeViewEnableGHash)
+      dumpCodeViewMergedTypes(Writer, CVTypes.GlobalIDTable.records(),
+                              CVTypes.GlobalTypeTable.records());
+    else
+      dumpCodeViewMergedTypes(Writer, CVTypes.IDTable.records(),
+                              CVTypes.TypeTable.records());
   }
 
   return 0;
diff --git a/tools/llvm-readobj/llvm-readobj.h b/tools/llvm-readobj/llvm-readobj.h
index 92ed098dc642..0e02da4cb847 100644
--- a/tools/llvm-readobj/llvm-readobj.h
+++ b/tools/llvm-readobj/llvm-readobj.h
@@ -1,9 +1,8 @@
 //===-- llvm-readobj.h ----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -23,6 +22,9 @@ namespace llvm {
 
   // Various helper functions.
   LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg);
+  void reportError(StringRef Input, Error Err); 
+  void reportWarning(Twine Msg);
+  void warn(llvm::Error Err);
   void error(std::error_code EC);
   void error(llvm::Error EC);
   template <typename T> T error(llvm::Expected<T> &&E) {
@@ -44,18 +46,16 @@ namespace llvm {
     OS.flush();
     reportError(Buf);
   }
-  bool relocAddressLess(object::RelocationRef A,
-                        object::RelocationRef B);
 } // namespace llvm
 
 namespace opts {
   extern llvm::cl::opt<bool> SectionRelocations;
   extern llvm::cl::opt<bool> SectionSymbols;
   extern llvm::cl::opt<bool> SectionData;
-  extern llvm::cl::opt<bool> DynamicSymbols;
   extern llvm::cl::opt<bool> ExpandRelocs;
   extern llvm::cl::opt<bool> RawRelr;
   extern llvm::cl::opt<bool> CodeViewSubsectionBytes;
+  extern llvm::cl::opt<bool> Demangle;
   enum OutputStyleTy { LLVM, GNU };
   extern llvm::cl::opt<OutputStyleTy> Output;
 } // namespace opts
diff --git a/tools/llvm-rtdyld/llvm-rtdyld.cpp b/tools/llvm-rtdyld/llvm-rtdyld.cpp
index 975638ed82d1..a7cc1deb8cf6 100644
--- a/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -1,9 +1,8 @@
 //===-- llvm-rtdyld.cpp - MCJIT Testing Tool ------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -30,9 +29,13 @@
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/Memory.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MSVCErrorWorkarounds.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/raw_ostream.h"
+
+#include <future>
 #include <list>
 
 using namespace llvm;
@@ -74,6 +77,10 @@ Dylibs("dylib",
        cl::desc("Add library."),
        cl::ZeroOrMore);
 
+static cl::list<std::string> InputArgv("args", cl::Positional,
+                                       cl::desc("<program arguments>..."),
+                                       cl::ZeroOrMore, cl::PositionalEatsArgs);
+
 static cl::opt<std::string>
 TripleName("triple", cl::desc("Target triple for disassembler"));
 
@@ -88,35 +95,28 @@ CheckFiles("check",
            cl::desc("File containing RuntimeDyld verifier checks."),
            cl::ZeroOrMore);
 
-// Tracking BUG: 19665
-// http://llvm.org/bugs/show_bug.cgi?id=19665
-//
-// Do not change these options to cl::opt<uint64_t> since this silently breaks
-// argument parsing.
-static cl::opt<unsigned long long>
-PreallocMemory("preallocate",
-              cl::desc("Allocate memory upfront rather than on-demand"),
-              cl::init(0));
-
-static cl::opt<unsigned long long>
-TargetAddrStart("target-addr-start",
-                cl::desc("For -verify only: start of phony target address "
-                         "range."),
-                cl::init(4096), // Start at "page 1" - no allocating at "null".
-                cl::Hidden);
-
-static cl::opt<unsigned long long>
-TargetAddrEnd("target-addr-end",
-              cl::desc("For -verify only: end of phony target address range."),
-              cl::init(~0ULL),
-              cl::Hidden);
-
-static cl::opt<unsigned long long>
-TargetSectionSep("target-section-sep",
-                 cl::desc("For -verify only: Separation between sections in "
-                          "phony target address space."),
-                 cl::init(0),
-                 cl::Hidden);
+static cl::opt<uint64_t>
+    PreallocMemory("preallocate",
+                   cl::desc("Allocate memory upfront rather than on-demand"),
+                   cl::init(0));
+
+static cl::opt<uint64_t> TargetAddrStart(
+    "target-addr-start",
+    cl::desc("For -verify only: start of phony target address "
+             "range."),
+    cl::init(4096), // Start at "page 1" - no allocating at "null".
+    cl::Hidden);
+
+static cl::opt<uint64_t> TargetAddrEnd(
+    "target-addr-end",
+    cl::desc("For -verify only: end of phony target address range."),
+    cl::init(~0ULL), cl::Hidden);
+
+static cl::opt<uint64_t> TargetSectionSep(
+    "target-section-sep",
+    cl::desc("For -verify only: Separation between sections in "
+             "phony target address space."),
+    cl::init(0), cl::Hidden);
 
 static cl::list<std::string>
 SpecificSectionMappings("map-section",
@@ -138,14 +138,50 @@ PrintAllocationRequests("print-alloc-requests",
                                  "manager by RuntimeDyld"),
                         cl::Hidden);
 
+ExitOnError ExitOnErr;
+
 /* *** */
 
+using SectionIDMap = StringMap<unsigned>;
+using FileToSectionIDMap = StringMap<SectionIDMap>;
+
+void dumpFileToSectionIDMap(const FileToSectionIDMap &FileToSecIDMap) {
+  for (const auto &KV : FileToSecIDMap) {
+    llvm::dbgs() << "In " << KV.first() << "\n";
+    for (auto &KV2 : KV.second)
+      llvm::dbgs() << "  \"" << KV2.first() << "\" -> " << KV2.second << "\n";
+  }
+}
+
+Expected<unsigned> getSectionId(const FileToSectionIDMap &FileToSecIDMap,
+                                StringRef FileName, StringRef SectionName) {
+  auto I = FileToSecIDMap.find(FileName);
+  if (I == FileToSecIDMap.end())
+    return make_error<StringError>("No file named " + FileName,
+                                   inconvertibleErrorCode());
+  auto &SectionIDs = I->second;
+  auto J = SectionIDs.find(SectionName);
+  if (J == SectionIDs.end())
+    return make_error<StringError>("No section named \"" + SectionName +
+                                   "\" in file " + FileName,
+                                   inconvertibleErrorCode());
+  return J->second;
+}
+
 // A trivial memory manager that doesn't do anything fancy, just uses the
 // support library allocation routines directly.
 class TrivialMemoryManager : public RTDyldMemoryManager {
 public:
-  SmallVector<sys::MemoryBlock, 16> FunctionMemory;
-  SmallVector<sys::MemoryBlock, 16> DataMemory;
+  struct SectionInfo {
+    SectionInfo(StringRef Name, sys::MemoryBlock MB, unsigned SectionID)
+      : Name(Name), MB(std::move(MB)), SectionID(SectionID) {}
+    std::string Name;
+    sys::MemoryBlock MB;
+    unsigned SectionID = ~0U;
+  };
+
+  SmallVector<SectionInfo, 16> FunctionMemory;
+  SmallVector<SectionInfo, 16> DataMemory;
 
   uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
                                unsigned SectionID,
@@ -154,6 +190,11 @@ public:
                                unsigned SectionID, StringRef SectionName,
                                bool IsReadOnly) override;
 
+  /// If non null, records subsequent Name -> SectionID mappings.
+  void setSectionIDsMap(SectionIDMap *SecIDMap) {
+    this->SecIDMap = SecIDMap;
+  }
+
   void *getPointerToNamedFunction(const std::string &Name,
                                   bool AbortOnFailure = true) override {
     return nullptr;
@@ -171,7 +212,15 @@ public:
     if (I != DummyExterns.end())
       return JITSymbol(I->second, JITSymbolFlags::Exported);
 
-    return RTDyldMemoryManager::findSymbol(Name);
+    if (auto Sym = RTDyldMemoryManager::findSymbol(Name))
+      return Sym;
+    else if (auto Err = Sym.takeError())
+      ExitOnErr(std::move(Err));
+    else
+      ExitOnErr(make_error<StringError>("Could not find definition for \"" +
+                                            Name + "\"",
+                                        inconvertibleErrorCode()));
+    llvm_unreachable("Should have returned or exited by now");
   }
 
   void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
@@ -193,7 +242,8 @@ public:
     SlabSize = Size;
   }
 
-  uint8_t *allocateFromSlab(uintptr_t Size, unsigned Alignment, bool isCode) {
+  uint8_t *allocateFromSlab(uintptr_t Size, unsigned Alignment, bool isCode,
+                            StringRef SectionName, unsigned SectionID) {
     Size = alignTo(Size, Alignment);
     if (CurrentSlabOffset + Size > SlabSize)
       report_fatal_error("Can't allocate enough memory. Tune --preallocate");
@@ -201,9 +251,9 @@ public:
     uintptr_t OldSlabOffset = CurrentSlabOffset;
     sys::MemoryBlock MB((void *)OldSlabOffset, Size);
     if (isCode)
-      FunctionMemory.push_back(MB);
+      FunctionMemory.push_back(SectionInfo(SectionName, MB, SectionID));
     else
-      DataMemory.push_back(MB);
+      DataMemory.push_back(SectionInfo(SectionName, MB, SectionID));
     CurrentSlabOffset += Size;
     return (uint8_t*)OldSlabOffset;
   }
@@ -214,6 +264,7 @@ private:
   bool UsePreallocation = false;
   uintptr_t SlabSize = 0;
   uintptr_t CurrentSlabOffset = 0;
+  SectionIDMap *SecIDMap = nullptr;
 };
 
 uint8_t *TrivialMemoryManager::allocateCodeSection(uintptr_t Size,
@@ -224,8 +275,12 @@ uint8_t *TrivialMemoryManager::allocateCodeSection(uintptr_t Size,
     outs() << "allocateCodeSection(Size = " << Size << ", Alignment = "
            << Alignment << ", SectionName = " << SectionName << ")\n";
 
+  if (SecIDMap)
+    (*SecIDMap)[SectionName] = SectionID;
+
   if (UsePreallocation)
-    return allocateFromSlab(Size, Alignment, true /* isCode */);
+    return allocateFromSlab(Size, Alignment, true /* isCode */,
+                            SectionName, SectionID);
 
   std::error_code EC;
   sys::MemoryBlock MB =
@@ -235,7 +290,7 @@ uint8_t *TrivialMemoryManager::allocateCodeSection(uintptr_t Size,
                                       EC);
   if (!MB.base())
     report_fatal_error("MemoryManager allocation failed: " + EC.message());
-  FunctionMemory.push_back(MB);
+  FunctionMemory.push_back(SectionInfo(SectionName, MB, SectionID));
   return (uint8_t*)MB.base();
 }
 
@@ -248,8 +303,12 @@ uint8_t *TrivialMemoryManager::allocateDataSection(uintptr_t Size,
     outs() << "allocateDataSection(Size = " << Size << ", Alignment = "
            << Alignment << ", SectionName = " << SectionName << ")\n";
 
+  if (SecIDMap)
+    (*SecIDMap)[SectionName] = SectionID;
+
   if (UsePreallocation)
-    return allocateFromSlab(Size, Alignment, false /* isCode */);
+    return allocateFromSlab(Size, Alignment, false /* isCode */, SectionName,
+                            SectionID);
 
   std::error_code EC;
   sys::MemoryBlock MB =
@@ -259,7 +318,7 @@ uint8_t *TrivialMemoryManager::allocateDataSection(uintptr_t Size,
                                       EC);
   if (!MB.base())
     report_fatal_error("MemoryManager allocation failed: " + EC.message());
-  DataMemory.push_back(MB);
+  DataMemory.push_back(SectionInfo(SectionName, MB, SectionID));
   return (uint8_t*)MB.base();
 }
 
@@ -368,6 +427,8 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
         }
         uint64_t Addr = *AddrOrErr;
 
+        object::SectionedAddress Address;
+
         uint64_t Size = P.second;
         // If we're not using the debug object, compute the address of the
         // symbol in memory (rather than that in the unrelocated object file)
@@ -382,16 +443,20 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
           object::section_iterator Sec = *SecOrErr;
           StringRef SecName;
           Sec->getName(SecName);
+          Address.SectionIndex = Sec->getIndex();
           uint64_t SectionLoadAddress =
             LoadedObjInfo->getSectionLoadAddress(*Sec);
           if (SectionLoadAddress != 0)
             Addr += SectionLoadAddress - Sec->getAddress();
-        }
+        } else if (auto SecOrErr = Sym.getSection())
+          Address.SectionIndex = SecOrErr.get()->getIndex();
 
         outs() << "Function: " << *Name << ", Size = " << Size
                << ", Addr = " << Addr << "\n";
 
-        DILineInfoTable Lines = Context->getLineInfoForAddressRange(Addr, Size);
+        Address.Address = Addr;
+        DILineInfoTable Lines =
+            Context->getLineInfoForAddressRange(Address, Size);
         for (auto &D : Lines) {
           outs() << "  Line info @ " << D.first - Addr << ": "
                  << D.second.FileName << ", line:" << D.second.Line << "\n";
@@ -464,9 +529,11 @@ static int executeInput() {
   // Invalidate the instruction cache for each loaded function.
   for (auto &FM : MemMgr.FunctionMemory) {
 
+    auto &FM_MB = FM.MB;
+
     // Make sure the memory is executable.
     // setExecutable will call InvalidateInstructionCache.
-    if (auto EC = sys::Memory::protectMappedMemory(FM,
+    if (auto EC = sys::Memory::protectMappedMemory(FM_MB,
                                                    sys::Memory::MF_READ |
                                                    sys::Memory::MF_EXEC))
       ErrorAndExit("unable to mark function executable: '" + EC.message() +
@@ -478,11 +545,13 @@ static int executeInput() {
 
   int (*Main)(int, const char**) =
     (int(*)(int,const char**)) uintptr_t(MainAddress);
-  const char **Argv = new const char*[2];
+  std::vector<const char *> Argv;
   // Use the name of the first input object module as argv[0] for the target.
-  Argv[0] = InputFileList[0].c_str();
-  Argv[1] = nullptr;
-  return Main(1, Argv);
+  Argv.push_back(InputFileList[0].data());
+  for (auto &Arg : InputArgv)
+    Argv.push_back(Arg.data());
+  Argv.push_back(nullptr);
+  return Main(Argv.size() - 1, Argv.data());
 }
 
 static int checkAllExpressions(RuntimeDyldChecker &Checker) {
@@ -500,10 +569,10 @@ static int checkAllExpressions(RuntimeDyldChecker &Checker) {
   return 0;
 }
 
-void applySpecificSectionMappings(RuntimeDyldChecker &Checker) {
+void applySpecificSectionMappings(RuntimeDyld &Dyld,
+                                  const FileToSectionIDMap &FileToSecIDMap) {
 
   for (StringRef Mapping : SpecificSectionMappings) {
-
     size_t EqualsIdx = Mapping.find_first_of("=");
     std::string SectionIDStr = Mapping.substr(0, EqualsIdx);
     size_t ComaIdx = Mapping.find_first_of(",");
@@ -514,17 +583,10 @@ void applySpecificSectionMappings(RuntimeDyldChecker &Checker) {
 
     std::string FileName = SectionIDStr.substr(0, ComaIdx);
     std::string SectionName = SectionIDStr.substr(ComaIdx + 1);
+    unsigned SectionID =
+      ExitOnErr(getSectionId(FileToSecIDMap, FileName, SectionName));
 
-    uint64_t OldAddrInt;
-    std::string ErrorMsg;
-    std::tie(OldAddrInt, ErrorMsg) =
-      Checker.getSectionAddr(FileName, SectionName, true);
-
-    if (ErrorMsg != "")
-      report_fatal_error(ErrorMsg);
-
-    void* OldAddr = reinterpret_cast<void*>(static_cast<uintptr_t>(OldAddrInt));
-
+    auto* OldAddr = Dyld.getSectionContent(SectionID).data();
     std::string NewAddrStr = Mapping.substr(EqualsIdx + 1);
     uint64_t NewAddr;
 
@@ -532,7 +594,7 @@ void applySpecificSectionMappings(RuntimeDyldChecker &Checker) {
       report_fatal_error("Invalid section address in mapping '" + Mapping +
                          "'.");
 
-    Checker.getRTDyld().mapSectionAddress(OldAddr, NewAddr);
+    Dyld.mapSectionAddress(OldAddr, NewAddr);
   }
 }
 
@@ -548,21 +610,17 @@ void applySpecificSectionMappings(RuntimeDyldChecker &Checker) {
 //                            (e.g. 1 << 32) to stress-test stubs, GOTs, etc.
 //
 static void remapSectionsAndSymbols(const llvm::Triple &TargetTriple,
-                                    TrivialMemoryManager &MemMgr,
-                                    RuntimeDyldChecker &Checker) {
+                                    RuntimeDyld &Dyld,
+                                    TrivialMemoryManager &MemMgr) {
 
   // Set up a work list (section addr/size pairs).
-  typedef std::list<std::pair<void*, uint64_t>> WorklistT;
+  typedef std::list<const TrivialMemoryManager::SectionInfo*> WorklistT;
   WorklistT Worklist;
 
   for (const auto& CodeSection : MemMgr.FunctionMemory)
-    Worklist.push_back(std::make_pair(CodeSection.base(), CodeSection.size()));
+    Worklist.push_back(&CodeSection);
   for (const auto& DataSection : MemMgr.DataMemory)
-    Worklist.push_back(std::make_pair(DataSection.base(), DataSection.size()));
-
-  // Apply any section-specific mappings that were requested on the command
-  // line.
-  applySpecificSectionMappings(Checker);
+    Worklist.push_back(&DataSection);
 
   // Keep an "already allocated" mapping of section target addresses to sizes.
   // Sections whose address mappings aren't specified on the command line will
@@ -577,16 +635,16 @@ static void remapSectionsAndSymbols(const llvm::Triple &TargetTriple,
        I != E;) {
     WorklistT::iterator Tmp = I;
     ++I;
-    auto LoadAddr = Checker.getSectionLoadAddress(Tmp->first);
 
-    if (LoadAddr &&
-        *LoadAddr != static_cast<uint64_t>(
-                       reinterpret_cast<uintptr_t>(Tmp->first))) {
+    auto LoadAddr = Dyld.getSectionLoadAddress((*Tmp)->SectionID);
+
+    if (LoadAddr != static_cast<uint64_t>(
+          reinterpret_cast<uintptr_t>((*Tmp)->MB.base()))) {
       // A section will have a LoadAddr of 0 if it wasn't loaded for whatever
       // reason (e.g. zero byte COFF sections). Don't include those sections in
       // the allocation map.
-      if (*LoadAddr != 0)
-        AlreadyAllocated[*LoadAddr] = Tmp->second;
+      if (LoadAddr != 0)
+        AlreadyAllocated[LoadAddr] = (*Tmp)->MB.allocatedSize();
       Worklist.erase(Tmp);
     }
   }
@@ -604,19 +662,20 @@ static void remapSectionsAndSymbols(const llvm::Triple &TargetTriple,
 
   // Process any elements remaining in the worklist.
   while (!Worklist.empty()) {
-    std::pair<void*, uint64_t> CurEntry = Worklist.front();
+    auto *CurEntry = Worklist.front();
     Worklist.pop_front();
 
     uint64_t NextSectionAddr = TargetAddrStart;
 
     for (const auto &Alloc : AlreadyAllocated)
-      if (NextSectionAddr + CurEntry.second + TargetSectionSep <= Alloc.first)
+      if (NextSectionAddr + CurEntry->MB.allocatedSize() + TargetSectionSep <=
+          Alloc.first)
         break;
       else
         NextSectionAddr = Alloc.first + Alloc.second + TargetSectionSep;
 
-    AlreadyAllocated[NextSectionAddr] = CurEntry.second;
-    Checker.getRTDyld().mapSectionAddress(CurEntry.first, NextSectionAddr);
+    Dyld.mapSectionAddress(CurEntry->MB.base(), NextSectionAddr);
+    AlreadyAllocated[NextSectionAddr] = CurEntry->MB.allocatedSize();
   }
 
   // Add dummy symbols to the memory manager.
@@ -688,18 +747,132 @@ static int linkAndVerify() {
   // Instantiate a dynamic linker.
   TrivialMemoryManager MemMgr;
   doPreallocation(MemMgr);
+
+  struct StubID {
+    unsigned SectionID;
+    uint32_t Offset;
+  };
+  using StubInfos = StringMap<StubID>;
+  using StubContainers = StringMap<StubInfos>;
+
+  StubContainers StubMap;
   RuntimeDyld Dyld(MemMgr, MemMgr);
   Dyld.setProcessAllSections(true);
-  RuntimeDyldChecker Checker(Dyld, Disassembler.get(), InstPrinter.get(),
-                             llvm::dbgs());
+
+  Dyld.setNotifyStubEmitted([&StubMap](StringRef FilePath,
+                                       StringRef SectionName,
+                                       StringRef SymbolName, unsigned SectionID,
+                                       uint32_t StubOffset) {
+    std::string ContainerName =
+        (sys::path::filename(FilePath) + "/" + SectionName).str();
+    StubMap[ContainerName][SymbolName] = {SectionID, StubOffset};
+  });
+
+  auto GetSymbolInfo =
+      [&Dyld, &MemMgr](
+          StringRef Symbol) -> Expected<RuntimeDyldChecker::MemoryRegionInfo> {
+    RuntimeDyldChecker::MemoryRegionInfo SymInfo;
+
+    // First get the target address.
+    if (auto InternalSymbol = Dyld.getSymbol(Symbol))
+      SymInfo.setTargetAddress(InternalSymbol.getAddress());
+    else {
+      // Symbol not found in RuntimeDyld. Fall back to external lookup.
+#ifdef _MSC_VER
+      using ExpectedLookupResult =
+          MSVCPExpected<JITSymbolResolver::LookupResult>;
+#else
+      using ExpectedLookupResult = Expected<JITSymbolResolver::LookupResult>;
+#endif
+
+      auto ResultP = std::make_shared<std::promise<ExpectedLookupResult>>();
+      auto ResultF = ResultP->get_future();
+
+      MemMgr.lookup(JITSymbolResolver::LookupSet({Symbol}),
+                    [=](Expected<JITSymbolResolver::LookupResult> Result) {
+                      ResultP->set_value(std::move(Result));
+                    });
+
+      auto Result = ResultF.get();
+      if (!Result)
+        return Result.takeError();
+
+      auto I = Result->find(Symbol);
+      assert(I != Result->end() &&
+             "Expected symbol address if no error occurred");
+      SymInfo.setTargetAddress(I->second.getAddress());
+    }
+
+    // Now find the symbol content if possible (otherwise leave content as a
+    // default-constructed StringRef).
+    if (auto *SymAddr = Dyld.getSymbolLocalAddress(Symbol)) {
+      unsigned SectionID = Dyld.getSymbolSectionID(Symbol);
+      if (SectionID != ~0U) {
+        char *CSymAddr = static_cast<char *>(SymAddr);
+        StringRef SecContent = Dyld.getSectionContent(SectionID);
+        uint64_t SymSize = SecContent.size() - (CSymAddr - SecContent.data());
+        SymInfo.setContent(StringRef(CSymAddr, SymSize));
+      }
+    }
+    return SymInfo;
+  };
+
+  auto IsSymbolValid = [&Dyld, GetSymbolInfo](StringRef Symbol) {
+    if (Dyld.getSymbol(Symbol))
+      return true;
+    auto SymInfo = GetSymbolInfo(Symbol);
+    if (!SymInfo) {
+      logAllUnhandledErrors(SymInfo.takeError(), errs(), "RTDyldChecker: ");
+      return false;
+    }
+    return SymInfo->getTargetAddress() != 0;
+  };
+
+  FileToSectionIDMap FileToSecIDMap;
+
+  auto GetSectionInfo = [&Dyld, &FileToSecIDMap](StringRef FileName,
+                                                 StringRef SectionName)
+      -> Expected<RuntimeDyldChecker::MemoryRegionInfo> {
+    auto SectionID = getSectionId(FileToSecIDMap, FileName, SectionName);
+    if (!SectionID)
+      return SectionID.takeError();
+    RuntimeDyldChecker::MemoryRegionInfo SecInfo;
+    SecInfo.setTargetAddress(Dyld.getSectionLoadAddress(*SectionID));
+    SecInfo.setContent(Dyld.getSectionContent(*SectionID));
+    return SecInfo;
+  };
+
+  auto GetStubInfo = [&Dyld, &StubMap](StringRef StubContainer,
+                                       StringRef SymbolName)
+      -> Expected<RuntimeDyldChecker::MemoryRegionInfo> {
+    if (!StubMap.count(StubContainer))
+      return make_error<StringError>("Stub container not found: " +
+                                         StubContainer,
+                                     inconvertibleErrorCode());
+    if (!StubMap[StubContainer].count(SymbolName))
+      return make_error<StringError>("Symbol name " + SymbolName +
+                                         " in stub container " + StubContainer,
+                                     inconvertibleErrorCode());
+    auto &SI = StubMap[StubContainer][SymbolName];
+    RuntimeDyldChecker::MemoryRegionInfo StubMemInfo;
+    StubMemInfo.setTargetAddress(Dyld.getSectionLoadAddress(SI.SectionID) +
+                                 SI.Offset);
+    StubMemInfo.setContent(
+        Dyld.getSectionContent(SI.SectionID).substr(SI.Offset));
+    return StubMemInfo;
+  };
+
+  // We will initialize this below once we have the first object file and can
+  // know the endianness.
+  std::unique_ptr<RuntimeDyldChecker> Checker;
 
   // If we don't have any input files, read from stdin.
   if (!InputFileList.size())
     InputFileList.push_back("-");
-  for (auto &Filename : InputFileList) {
+  for (auto &InputFile : InputFileList) {
     // Load the input memory buffer.
     ErrorOr<std::unique_ptr<MemoryBuffer>> InputBuffer =
-        MemoryBuffer::getFileOrSTDIN(Filename);
+        MemoryBuffer::getFileOrSTDIN(InputFile);
 
     if (std::error_code EC = InputBuffer.getError())
       ErrorAndExit("unable to read input: '" + EC.message() + "'");
@@ -717,6 +890,15 @@ static int linkAndVerify() {
 
     ObjectFile &Obj = **MaybeObj;
 
+    if (!Checker)
+      Checker = llvm::make_unique<RuntimeDyldChecker>(
+          IsSymbolValid, GetSymbolInfo, GetSectionInfo, GetStubInfo,
+          GetStubInfo, Obj.isLittleEndian() ? support::little : support::big,
+          Disassembler.get(), InstPrinter.get(), dbgs());
+
+    auto FileName = sys::path::filename(InputFile);
+    MemMgr.setSectionIDsMap(&FileToSecIDMap[FileName]);
+
     // Load the object file
     Dyld.loadObject(Obj);
     if (Dyld.hasError()) {
@@ -726,7 +908,8 @@ static int linkAndVerify() {
 
   // Re-map the section addresses into the phony target address space and add
   // dummy symbols.
-  remapSectionsAndSymbols(TheTriple, MemMgr, Checker);
+  applySpecificSectionMappings(Dyld, FileToSecIDMap);
+  remapSectionsAndSymbols(TheTriple, Dyld, MemMgr);
 
   // Resolve all the relocations we can.
   Dyld.resolveRelocations();
@@ -734,7 +917,7 @@ static int linkAndVerify() {
   // Register EH frames.
   Dyld.registerEHFrames();
 
-  int ErrorCode = checkAllExpressions(Checker);
+  int ErrorCode = checkAllExpressions(*Checker);
   if (Dyld.hasError())
     ErrorAndExit("RTDyld reported an error applying relocations:\n  " +
                  Dyld.getErrorString());
@@ -752,6 +935,8 @@ int main(int argc, char **argv) {
 
   cl::ParseCommandLineOptions(argc, argv, "llvm MC-JIT tool\n");
 
+  ExitOnErr.setBanner(std::string(argv[0]) + ": ");
+
   switch (Action) {
   case AC_Execute:
     return executeInput();
diff --git a/tools/llvm-stress/llvm-stress.cpp b/tools/llvm-stress/llvm-stress.cpp
index c29b7a7f7e46..a455bf13fe7b 100644
--- a/tools/llvm-stress/llvm-stress.cpp
+++ b/tools/llvm-stress/llvm-stress.cpp
@@ -1,9 +1,8 @@
 //===- llvm-stress.cpp - Generate random LL files to stress-test LLVM -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -277,7 +276,7 @@ protected:
 
   /// Pick a random type.
   Type *pickType() {
-    return (getRandom() & 1 ? pickVectorType() : pickScalarType());
+    return (getRandom() & 1) ? pickVectorType() : pickScalarType();
   }
 
   /// Pick a random pointer type.
diff --git a/tools/llvm-symbolizer/llvm-symbolizer.cpp b/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 9d19f994b739..ea94cf9b69a1 100644
--- a/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -1,9 +1,8 @@
 //===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -39,12 +38,17 @@ ClUseSymbolTable("use-symbol-table", cl::init(true),
 
 static cl::opt<FunctionNameKind> ClPrintFunctions(
     "functions", cl::init(FunctionNameKind::LinkageName),
-    cl::desc("Print function name for a given address:"),
+    cl::desc("Print function name for a given address"), cl::ValueOptional,
     cl::values(clEnumValN(FunctionNameKind::None, "none", "omit function name"),
                clEnumValN(FunctionNameKind::ShortName, "short",
                           "print short function name"),
                clEnumValN(FunctionNameKind::LinkageName, "linkage",
-                          "print function linkage name")));
+                          "print function linkage name"),
+               // Sentinel value for unspecified value.
+               clEnumValN(FunctionNameKind::LinkageName, "", "")));
+static cl::alias ClPrintFunctionsShort("f", cl::desc("Alias for -functions"),
+                                       cl::NotHidden, cl::Grouping,
+                                       cl::aliasopt(ClPrintFunctions));
 
 static cl::opt<bool>
     ClUseRelativeAddress("relative-address", cl::init(false),
@@ -54,13 +58,29 @@ static cl::opt<bool>
 static cl::opt<bool>
     ClPrintInlining("inlining", cl::init(true),
                     cl::desc("Print all inlined frames for a given address"));
+static cl::alias
+    ClPrintInliningAliasI("i", cl::desc("Alias for -inlining"),
+                          cl::NotHidden, cl::aliasopt(ClPrintInlining),
+                          cl::Grouping);
+static cl::alias
+    ClPrintInliningAliasInlines("inlines", cl::desc("Alias for -inlining"),
+                                cl::NotHidden, cl::aliasopt(ClPrintInlining));
 
-// -demangle, -C
+// -basenames, -s
+static cl::opt<bool> ClBasenames("basenames", cl::init(false),
+                                 cl::desc("Strip directory names from paths"));
+static cl::alias ClBasenamesShort("s", cl::desc("Alias for -basenames"),
+                                  cl::NotHidden, cl::aliasopt(ClBasenames));
+
+// -demangle, -C, -no-demangle
 static cl::opt<bool>
 ClDemangle("demangle", cl::init(true), cl::desc("Demangle function names"));
 static cl::alias
 ClDemangleShort("C", cl::desc("Alias for -demangle"),
-                cl::NotHidden, cl::aliasopt(ClDemangle));
+                cl::NotHidden, cl::aliasopt(ClDemangle), cl::Grouping);
+static cl::opt<bool>
+ClNoDemangle("no-demangle", cl::init(false),
+             cl::desc("Don't demangle function names"));
 
 static cl::opt<std::string> ClDefaultArch("default-arch", cl::init(""),
                                           cl::desc("Default architecture "
@@ -74,10 +94,9 @@ ClBinaryName("obj", cl::init(""),
 static cl::alias
 ClBinaryNameAliasExe("exe", cl::desc("Alias for -obj"),
                      cl::NotHidden, cl::aliasopt(ClBinaryName));
-static cl::alias
-ClBinaryNameAliasE("e", cl::desc("Alias for -obj"),
-                   cl::NotHidden, cl::aliasopt(ClBinaryName));
-
+static cl::alias ClBinaryNameAliasE("e", cl::desc("Alias for -obj"),
+                                    cl::NotHidden, cl::Grouping, cl::Prefix,
+                                    cl::aliasopt(ClBinaryName));
 
 static cl::opt<std::string>
     ClDwpName("dwp", cl::init(""),
@@ -97,7 +116,7 @@ ClPrintAddressAliasAddresses("addresses", cl::desc("Alias for -print-address"),
                              cl::NotHidden, cl::aliasopt(ClPrintAddress));
 static cl::alias
 ClPrintAddressAliasA("a", cl::desc("Alias for -print-address"),
-                     cl::NotHidden, cl::aliasopt(ClPrintAddress));
+                     cl::NotHidden, cl::aliasopt(ClPrintAddress), cl::Grouping);
 
 // -pretty-print, -p
 static cl::opt<bool>
@@ -105,7 +124,7 @@ static cl::opt<bool>
                   cl::desc("Make the output more human friendly"));
 static cl::alias ClPrettyPrintShort("p", cl::desc("Alias for -pretty-print"),
                                     cl::NotHidden,
-                                    cl::aliasopt(ClPrettyPrint));
+                                    cl::aliasopt(ClPrettyPrint), cl::Grouping);
 
 static cl::opt<int> ClPrintSourceContextLines(
     "print-source-context-lines", cl::init(0),
@@ -114,10 +133,30 @@ static cl::opt<int> ClPrintSourceContextLines(
 static cl::opt<bool> ClVerbose("verbose", cl::init(false),
                                cl::desc("Print verbose line info"));
 
+// -adjust-vma
+static cl::opt<uint64_t>
+    ClAdjustVMA("adjust-vma", cl::init(0), cl::value_desc("offset"),
+                cl::desc("Add specified offset to object file addresses"));
+
 static cl::list<std::string> ClInputAddresses(cl::Positional,
                                               cl::desc("<input addresses>..."),
                                               cl::ZeroOrMore);
 
+static cl::opt<std::string>
+    ClFallbackDebugPath("fallback-debug-path", cl::init(""),
+                        cl::desc("Fallback path for debug binaries."));
+
+static cl::opt<DIPrinter::OutputStyle>
+    ClOutputStyle("output-style", cl::init(DIPrinter::OutputStyle::LLVM),
+                  cl::desc("Specify print style"),
+                  cl::values(clEnumValN(DIPrinter::OutputStyle::LLVM, "LLVM",
+                                        "LLVM default style"),
+                             clEnumValN(DIPrinter::OutputStyle::GNU, "GNU",
+                                        "GNU addr2line style")));
+
+static cl::extrahelp
+    HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
+
 template<typename T>
 static bool error(Expected<T> &ResOrErr) {
   if (ResOrErr)
@@ -127,17 +166,25 @@ static bool error(Expected<T> &ResOrErr) {
   return true;
 }
 
-static bool parseCommand(StringRef InputString, bool &IsData,
+enum class Command {
+  Code,
+  Data,
+  Frame,
+};
+
+static bool parseCommand(StringRef InputString, Command &Cmd,
                          std::string &ModuleName, uint64_t &ModuleOffset) {
   const char kDelimiters[] = " \n\r";
   ModuleName = "";
   if (InputString.consume_front("CODE ")) {
-    IsData = false;
+    Cmd = Command::Code;
   } else if (InputString.consume_front("DATA ")) {
-    IsData = true;
+    Cmd = Command::Data;
+  } else if (InputString.consume_front("FRAME ")) {
+    Cmd = Command::Frame;
   } else {
     // If no cmd, assume it's CODE.
-    IsData = false;
+    Cmd = Command::Code;
   }
   const char *pos = InputString.data();
   // Skip delimiters and parse input filename (if needed).
@@ -167,44 +214,85 @@ static bool parseCommand(StringRef InputString, bool &IsData,
 
 static void symbolizeInput(StringRef InputString, LLVMSymbolizer &Symbolizer,
                            DIPrinter &Printer) {
-  bool IsData = false;
+  Command Cmd;
   std::string ModuleName;
-  uint64_t ModuleOffset = 0;
-  if (!parseCommand(StringRef(InputString), IsData, ModuleName, ModuleOffset)) {
+  uint64_t Offset = 0;
+  if (!parseCommand(StringRef(InputString), Cmd, ModuleName, Offset)) {
     outs() << InputString;
     return;
   }
 
   if (ClPrintAddress) {
     outs() << "0x";
-    outs().write_hex(ModuleOffset);
+    outs().write_hex(Offset);
     StringRef Delimiter = ClPrettyPrint ? ": " : "\n";
     outs() << Delimiter;
   }
-  if (IsData) {
-    auto ResOrErr = Symbolizer.symbolizeData(ModuleName, ModuleOffset);
+  Offset -= ClAdjustVMA;
+  if (Cmd == Command::Data) {
+    auto ResOrErr = Symbolizer.symbolizeData(
+        ModuleName, {Offset, object::SectionedAddress::UndefSection});
     Printer << (error(ResOrErr) ? DIGlobal() : ResOrErr.get());
+  } else if (Cmd == Command::Frame) {
+    auto ResOrErr = Symbolizer.symbolizeFrame(
+        ModuleName, {Offset, object::SectionedAddress::UndefSection});
+    if (!error(ResOrErr)) {
+      for (DILocal Local : *ResOrErr)
+        Printer << Local;
+      if (ResOrErr->empty())
+        outs() << "??\n";
+    }
   } else if (ClPrintInlining) {
-    auto ResOrErr =
-        Symbolizer.symbolizeInlinedCode(ModuleName, ModuleOffset, ClDwpName);
+    auto ResOrErr = Symbolizer.symbolizeInlinedCode(
+        ModuleName, {Offset, object::SectionedAddress::UndefSection});
     Printer << (error(ResOrErr) ? DIInliningInfo() : ResOrErr.get());
+  } else if (ClOutputStyle == DIPrinter::OutputStyle::GNU) {
+    // With ClPrintFunctions == FunctionNameKind::LinkageName (default)
+    // and ClUseSymbolTable == true (also default), Symbolizer.symbolizeCode()
+    // may override the name of an inlined function with the name of the topmost
+    // caller function in the inlining chain. This contradicts the existing
+    // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
+    // the topmost function, which suits our needs better.
+    auto ResOrErr = Symbolizer.symbolizeInlinedCode(
+        ModuleName, {Offset, object::SectionedAddress::UndefSection});
+    Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get().getFrame(0));
   } else {
-    auto ResOrErr =
-        Symbolizer.symbolizeCode(ModuleName, ModuleOffset, ClDwpName);
+    auto ResOrErr = Symbolizer.symbolizeCode(
+        ModuleName, {Offset, object::SectionedAddress::UndefSection});
     Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get());
   }
-  outs() << "\n";
-  outs().flush();
+  if (ClOutputStyle == DIPrinter::OutputStyle::LLVM)
+    outs() << "\n";
 }
 
 int main(int argc, char **argv) {
   InitLLVM X(argc, argv);
 
+  bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line");
+
+  if (IsAddr2Line) {
+    ClDemangle.setInitialValue(false);
+    ClPrintFunctions.setInitialValue(FunctionNameKind::None);
+    ClPrintInlining.setInitialValue(false);
+    ClOutputStyle.setInitialValue(DIPrinter::OutputStyle::GNU);
+  }
+
   llvm::sys::InitializeCOMRAII COM(llvm::sys::COMThreadingMode::MultiThreaded);
+  cl::ParseCommandLineOptions(argc, argv, IsAddr2Line ? "llvm-addr2line\n"
+                                                      : "llvm-symbolizer\n");
 
-  cl::ParseCommandLineOptions(argc, argv, "llvm-symbolizer\n");
-  LLVMSymbolizer::Options Opts(ClPrintFunctions, ClUseSymbolTable, ClDemangle,
-                               ClUseRelativeAddress, ClDefaultArch);
+  // If both --demangle and --no-demangle are specified then pick the last one.
+  if (ClNoDemangle.getPosition() > ClDemangle.getPosition())
+    ClDemangle = !ClNoDemangle;
+
+  LLVMSymbolizer::Options Opts;
+  Opts.PrintFunctions = ClPrintFunctions;
+  Opts.UseSymbolTable = ClUseSymbolTable;
+  Opts.Demangle = ClDemangle;
+  Opts.RelativeAddresses = ClUseRelativeAddress;
+  Opts.DefaultArch = ClDefaultArch;
+  Opts.FallbackDebugPath = ClFallbackDebugPath;
+  Opts.DWPName = ClDwpName;
 
   for (const auto &hint : ClDsymHint) {
     if (sys::path::extension(hint) == ".dSYM") {
@@ -217,14 +305,17 @@ int main(int argc, char **argv) {
   LLVMSymbolizer Symbolizer(Opts);
 
   DIPrinter Printer(outs(), ClPrintFunctions != FunctionNameKind::None,
-                    ClPrettyPrint, ClPrintSourceContextLines, ClVerbose);
+                    ClPrettyPrint, ClPrintSourceContextLines, ClVerbose,
+                    ClBasenames, ClOutputStyle);
 
   if (ClInputAddresses.empty()) {
     const int kMaxInputStringLength = 1024;
     char InputString[kMaxInputStringLength];
 
-    while (fgets(InputString, sizeof(InputString), stdin))
+    while (fgets(InputString, sizeof(InputString), stdin)) {
       symbolizeInput(InputString, Symbolizer, Printer);
+      outs().flush();
+    }
   } else {
     for (StringRef Address : ClInputAddresses)
       symbolizeInput(Address, Symbolizer, Printer);
diff --git a/tools/llvm-xray/func-id-helper.cpp b/tools/llvm-xray/func-id-helper.cpp
index c2bef6ddfb39..dc821a420c67 100644
--- a/tools/llvm-xray/func-id-helper.cpp
+++ b/tools/llvm-xray/func-id-helper.cpp
@@ -1,9 +1,8 @@
 //===- xray-fc-account.cpp: XRay Function Call Accounting Tool ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -30,7 +29,12 @@ std::string FuncIdConversionHelper::SymbolOrNumber(int32_t FuncId) const {
     return F.str();
   }
 
-  if (auto ResOrErr = Symbolizer.symbolizeCode(BinaryInstrMap, It->second)) {
+  object::SectionedAddress ModuleAddress;
+  ModuleAddress.Address = It->second;
+  // TODO: set proper section index here.
+  // object::SectionedAddress::UndefSection works for only absolute addresses.
+  ModuleAddress.SectionIndex = object::SectionedAddress::UndefSection;
+  if (auto ResOrErr = Symbolizer.symbolizeCode(BinaryInstrMap, ModuleAddress)) {
     auto &DI = *ResOrErr;
     if (DI.FunctionName == "<invalid>")
       F << "@(" << std::hex << It->second << ")";
@@ -52,7 +56,12 @@ std::string FuncIdConversionHelper::FileLineAndColumn(int32_t FuncId) const {
     return "(unknown)";
 
   std::ostringstream F;
-  auto ResOrErr = Symbolizer.symbolizeCode(BinaryInstrMap, It->second);
+  object::SectionedAddress ModuleAddress;
+  ModuleAddress.Address = It->second;
+  // TODO: set proper section index here.
+  // object::SectionedAddress::UndefSection works for only absolute addresses.
+  ModuleAddress.SectionIndex = object::SectionedAddress::UndefSection;
+  auto ResOrErr = Symbolizer.symbolizeCode(BinaryInstrMap, ModuleAddress);
   if (!ResOrErr) {
     consumeError(ResOrErr.takeError());
     return "(unknown)";
diff --git a/tools/llvm-xray/func-id-helper.h b/tools/llvm-xray/func-id-helper.h
index 3e0780d54f90..c6ce198170d5 100644
--- a/tools/llvm-xray/func-id-helper.h
+++ b/tools/llvm-xray/func-id-helper.h
@@ -1,9 +1,8 @@
 //===- func-id-helper.h - XRay Function ID Conversion Helpers -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-xray/llvm-xray.cpp b/tools/llvm-xray/llvm-xray.cpp
index e74628f5025f..9ee653e97b2d 100644
--- a/tools/llvm-xray/llvm-xray.cpp
+++ b/tools/llvm-xray/llvm-xray.cpp
@@ -1,9 +1,8 @@
 //===- llvm-xray.cpp: XRay Tool Main Program ------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-xray/trie-node.h b/tools/llvm-xray/trie-node.h
index e6ba4e215b91..47d4b8f1e78c 100644
--- a/tools/llvm-xray/trie-node.h
+++ b/tools/llvm-xray/trie-node.h
@@ -1,9 +1,8 @@
 //===- trie-node.h - XRay Call Stack Data Structure -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-xray/xray-account.cpp b/tools/llvm-xray/xray-account.cpp
index 9985c9adcf6c..2b49a311d7e3 100644
--- a/tools/llvm-xray/xray-account.cpp
+++ b/tools/llvm-xray/xray-account.cpp
@@ -1,9 +1,8 @@
 //===- xray-account.h - XRay Function Call Accounting ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -428,9 +427,7 @@ static CommandRegistration Unused(&Account, []() -> Error {
         Twine("Cannot open file '") + AccountOutput + "' for writing.", EC);
 
   const auto &FunctionAddresses = Map.getFunctionAddresses();
-  symbolize::LLVMSymbolizer::Options Opts(
-      symbolize::FunctionNameKind::LinkageName, true, true, false, "");
-  symbolize::LLVMSymbolizer Symbolizer(Opts);
+  symbolize::LLVMSymbolizer Symbolizer;
   llvm::xray::FuncIdConversionHelper FuncIdHelper(AccountInstrMap, Symbolizer,
                                                   FunctionAddresses);
   xray::LatencyAccountant FCA(FuncIdHelper, AccountDeduceSiblingCalls);
diff --git a/tools/llvm-xray/xray-account.h b/tools/llvm-xray/xray-account.h
index 5c457f178166..b63ecc59b71a 100644
--- a/tools/llvm-xray/xray-account.h
+++ b/tools/llvm-xray/xray-account.h
@@ -1,9 +1,8 @@
 //===- xray-account.h - XRay Function Call Accounting ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -29,12 +28,11 @@ namespace xray {
 class LatencyAccountant {
 public:
   typedef std::map<int32_t, std::vector<uint64_t>> FunctionLatencyMap;
-  typedef std::map<llvm::sys::procid_t, std::pair<uint64_t, uint64_t>>
+  typedef std::map<uint32_t, std::pair<uint64_t, uint64_t>>
       PerThreadMinMaxTSCMap;
   typedef std::map<uint8_t, std::pair<uint64_t, uint64_t>> PerCPUMinMaxTSCMap;
   typedef std::vector<std::pair<int32_t, uint64_t>> FunctionStack;
-  typedef std::map<llvm::sys::procid_t, FunctionStack>
-      PerThreadFunctionStackMap;
+  typedef std::map<uint32_t, FunctionStack> PerThreadFunctionStackMap;
 
 private:
   PerThreadFunctionStackMap PerThreadFunctionStack;
@@ -78,13 +76,6 @@ public:
   ///
   bool accountRecord(const XRayRecord &Record);
 
-  const FunctionStack *getThreadFunctionStack(llvm::sys::procid_t TId) const {
-    auto I = PerThreadFunctionStack.find(TId);
-    if (I == PerThreadFunctionStack.end())
-      return nullptr;
-    return &I->second;
-  }
-
   const PerThreadFunctionStackMap &getPerThreadFunctionStack() const {
     return PerThreadFunctionStack;
   }
diff --git a/tools/llvm-xray/xray-color-helper.cpp b/tools/llvm-xray/xray-color-helper.cpp
index 78a264b73d8f..c09cad3ba7d2 100644
--- a/tools/llvm-xray/xray-color-helper.cpp
+++ b/tools/llvm-xray/xray-color-helper.cpp
@@ -1,9 +1,8 @@
 //===-- xray-graph.cpp: XRay Function Call Graph Renderer -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-xray/xray-color-helper.h b/tools/llvm-xray/xray-color-helper.h
index b2dcf626a65f..0940fc211343 100644
--- a/tools/llvm-xray/xray-color-helper.h
+++ b/tools/llvm-xray/xray-color-helper.h
@@ -1,9 +1,8 @@
 //===-- xray-graph.h - XRay Function Call Graph Renderer --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-xray/xray-converter.cpp b/tools/llvm-xray/xray-converter.cpp
index 3f153b99bc93..dfc757e0f276 100644
--- a/tools/llvm-xray/xray-converter.cpp
+++ b/tools/llvm-xray/xray-converter.cpp
@@ -1,9 +1,8 @@
 //===- xray-converter.cpp: XRay Trace Conversion --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -18,7 +17,6 @@
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/JSON.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/raw_ostream.h"
@@ -242,6 +240,31 @@ StackTrieNode *findOrCreateStackNode(
   return CurrentStack;
 }
 
+void writeTraceViewerRecord(uint16_t Version, raw_ostream &OS, int32_t FuncId,
+                            uint32_t TId, uint32_t PId, bool Symbolize,
+                            const FuncIdConversionHelper &FuncIdHelper,
+                            double EventTimestampUs,
+                            const StackTrieNode &StackCursor,
+                            StringRef FunctionPhenotype) {
+  OS << "    ";
+  if (Version >= 3) {
+    OS << llvm::formatv(
+        R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "{3}", )"
+        R"("ts" : "{4:f4}", "sf" : "{5}" })",
+        (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId)
+                   : llvm::to_string(FuncId)),
+        FunctionPhenotype, TId, PId, EventTimestampUs,
+        StackCursor.ExtraData.id);
+  } else {
+    OS << llvm::formatv(
+        R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "1", )"
+        R"("ts" : "{3:f3}", "sf" : "{4}" })",
+        (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId)
+                   : llvm::to_string(FuncId)),
+        FunctionPhenotype, TId, EventTimestampUs, StackCursor.ExtraData.id);
+  }
+}
+
 } // namespace
 
 void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
@@ -252,14 +275,18 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
 
   unsigned id_counter = 0;
 
+  OS << "{\n  \"traceEvents\": [";
   DenseMap<uint32_t, StackTrieNode *> StackCursorByThreadId{};
   DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>> StackRootsByThreadId{};
   DenseMap<unsigned, StackTrieNode *> StacksByStackId{};
   std::forward_list<StackTrieNode> NodeStore{};
-
-  // Create a JSON Array which will hold all trace events.
-  json::Array TraceEvents;
+  int loop_count = 0;
   for (const auto &R : Records) {
+    if (loop_count++ == 0)
+      OS << "\n";
+    else
+      OS << ",\n";
+
     // Chrome trace event format always wants data in micros.
     // CyclesPerMicro = CycleHertz / 10^6
     // TSC / CyclesPerMicro == TSC * 10^6 / CycleHertz == MicroTimestamp
@@ -284,15 +311,8 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
       // type of B for begin or E for end, thread id, process id,
       // timestamp in microseconds, and a stack frame id. The ids are logged
       // in an id dictionary after the events.
-      TraceEvents.push_back(json::Object({
-          {"name", Symbolize ? FuncIdHelper.SymbolOrNumber(R.FuncId)
-                             : llvm::to_string(R.FuncId)},
-          {"ph", "B"},
-          {"tid", llvm::to_string(R.TId)},
-          {"pid", llvm::to_string(Version >= 3 ? R.PId : 1)},
-          {"ts", llvm::formatv("{0:f4}", EventTimestampUs)},
-          {"sf", llvm::to_string(StackCursor->ExtraData.id)},
-      }));
+      writeTraceViewerRecord(Version, OS, R.FuncId, R.TId, R.PId, Symbolize,
+                             FuncIdHelper, EventTimestampUs, *StackCursor, "B");
       break;
     case RecordTypes::EXIT:
     case RecordTypes::TAIL_EXIT:
@@ -303,51 +323,43 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
       // (And/Or in loop termination below)
       StackTrieNode *PreviousCursor = nullptr;
       do {
-        TraceEvents.push_back(json::Object({
-            {"name", Symbolize
-                         ? FuncIdHelper.SymbolOrNumber(StackCursor->FuncId)
-                         : llvm::to_string(StackCursor->FuncId)},
-            {"ph", "E"},
-            {"tid", llvm::to_string(R.TId)},
-            {"pid", llvm::to_string(Version >= 3 ? R.PId : 1)},
-            {"ts", llvm::formatv("{0:f4}", EventTimestampUs)},
-            {"sf", llvm::to_string(StackCursor->ExtraData.id)},
-        }));
+        if (PreviousCursor != nullptr) {
+          OS << ",\n";
+        }
+        writeTraceViewerRecord(Version, OS, StackCursor->FuncId, R.TId, R.PId,
+                               Symbolize, FuncIdHelper, EventTimestampUs,
+                               *StackCursor, "E");
         PreviousCursor = StackCursor;
         StackCursor = StackCursor->Parent;
       } while (PreviousCursor->FuncId != R.FuncId && StackCursor != nullptr);
       break;
     }
   }
+  OS << "\n  ],\n"; // Close the Trace Events array.
+  OS << "  "
+     << "\"displayTimeUnit\": \"ns\",\n";
 
   // The stackFrames dictionary substantially reduces size of the output file by
   // avoiding repeating the entire call stack of function names for each entry.
-  json::Object StackFrames;
-  for (const auto &Stack : StacksByStackId) {
-    const auto &StackId = Stack.first;
-    const auto &StackFunctionNode = Stack.second;
-    json::Object::iterator It;
-    std::tie(It, std::ignore) = StackFrames.insert({
-        llvm::to_string(StackId),
-        json::Object{
-            {"name",
-             Symbolize ? FuncIdHelper.SymbolOrNumber(StackFunctionNode->FuncId)
-                       : llvm::to_string(StackFunctionNode->FuncId)}},
-    });
-
-    if (StackFunctionNode->Parent != nullptr)
-      It->second.getAsObject()->insert(
-          {"parent", llvm::to_string(StackFunctionNode->Parent->ExtraData.id)});
+  OS << R"(  "stackFrames": {)";
+  int stack_frame_count = 0;
+  for (auto map_iter : StacksByStackId) {
+    if (stack_frame_count++ == 0)
+      OS << "\n";
+    else
+      OS << ",\n";
+    OS << "    ";
+    OS << llvm::formatv(
+        R"("{0}" : { "name" : "{1}")", map_iter.first,
+        (Symbolize ? FuncIdHelper.SymbolOrNumber(map_iter.second->FuncId)
+                   : llvm::to_string(map_iter.second->FuncId)));
+    if (map_iter.second->Parent != nullptr)
+      OS << llvm::formatv(R"(, "parent": "{0}")",
+                          map_iter.second->Parent->ExtraData.id);
+    OS << " }";
   }
-
-  json::Object TraceJSON{
-      {"displayTimeUnit", "ns"},
-      {"traceEvents", std::move(TraceEvents)},
-      {"stackFrames", std::move(StackFrames)},
-  };
-
-  // Pretty-print the JSON using two spaces for indentations.
-  OS << formatv("{0:2}", json::Value(std::move(TraceJSON)));
+  OS << "\n  }\n"; // Close the stack frames map.
+  OS << "}\n";     // Close the JSON entry.
 }
 
 namespace llvm {
@@ -368,9 +380,7 @@ static CommandRegistration Unused(&Convert, []() -> Error {
   }
 
   const auto &FunctionAddresses = Map.getFunctionAddresses();
-  symbolize::LLVMSymbolizer::Options Opts(
-      symbolize::FunctionNameKind::LinkageName, true, true, false, "");
-  symbolize::LLVMSymbolizer Symbolizer(Opts);
+  symbolize::LLVMSymbolizer Symbolizer;
   llvm::xray::FuncIdConversionHelper FuncIdHelper(ConvertInstrMap, Symbolizer,
                                                   FunctionAddresses);
   llvm::xray::TraceConverter TC(FuncIdHelper, ConvertSymbolize);
diff --git a/tools/llvm-xray/xray-converter.h b/tools/llvm-xray/xray-converter.h
index 5f0a3ee298eb..db6d2b1614ee 100644
--- a/tools/llvm-xray/xray-converter.h
+++ b/tools/llvm-xray/xray-converter.h
@@ -1,9 +1,8 @@
 //===- xray-converter.h - XRay Trace Conversion ---------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-xray/xray-extract.cpp b/tools/llvm-xray/xray-extract.cpp
index 10fe7d8d6209..7c7d26b5a389 100644
--- a/tools/llvm-xray/xray-extract.cpp
+++ b/tools/llvm-xray/xray-extract.cpp
@@ -1,9 +1,8 @@
 //===- xray-extract.cpp: XRay Instrumentation Map Extraction --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -87,9 +86,7 @@ static CommandRegistration Unused(&Extract, []() -> Error {
         Twine("Cannot open file '") + ExtractOutput + "' for writing.", EC);
   const auto &FunctionAddresses =
       InstrumentationMapOrError->getFunctionAddresses();
-  symbolize::LLVMSymbolizer::Options Opts(
-      symbolize::FunctionNameKind::LinkageName, true, true, false, "");
-  symbolize::LLVMSymbolizer Symbolizer(Opts);
+  symbolize::LLVMSymbolizer Symbolizer;
   llvm::xray::FuncIdConversionHelper FuncIdHelper(ExtractInput, Symbolizer,
                                                   FunctionAddresses);
   exportAsYAML(*InstrumentationMapOrError, OS, FuncIdHelper);
diff --git a/tools/llvm-xray/xray-fdr-dump.cpp b/tools/llvm-xray/xray-fdr-dump.cpp
index 389825605b62..81a93cac57c4 100644
--- a/tools/llvm-xray/xray-fdr-dump.cpp
+++ b/tools/llvm-xray/xray-fdr-dump.cpp
@@ -1,9 +1,8 @@
 //===- xray-fdr-dump.cpp: XRay FDR Trace Dump Tool ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -36,10 +35,9 @@ static cl::opt<bool> DumpVerify("verify",
 
 static CommandRegistration Unused(&Dump, []() -> Error {
   // Open the file provided.
-  int Fd;
-  if (auto EC = sys::fs::openFileForRead(DumpInput, Fd))
-    return createStringError(EC, "Cannot open file '%s' for read.",
-                             DumpInput.c_str());
+  auto FDOrErr = sys::fs::openNativeFileForRead(DumpInput);
+  if (!FDOrErr)
+    return FDOrErr.takeError();
 
   uint64_t FileSize;
   if (auto EC = sys::fs::file_size(DumpInput, FileSize))
@@ -48,7 +46,9 @@ static CommandRegistration Unused(&Dump, []() -> Error {
 
   std::error_code EC;
   sys::fs::mapped_file_region MappedFile(
-      Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
+      *FDOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0,
+      EC);
+  sys::fs::closeFile(*FDOrErr);
 
   DataExtractor DE(StringRef(MappedFile.data(), MappedFile.size()), true, 8);
   uint32_t OffsetPtr = 0;
diff --git a/tools/llvm-xray/xray-graph-diff.cpp b/tools/llvm-xray/xray-graph-diff.cpp
index a22f2a99811d..a514be97f40b 100644
--- a/tools/llvm-xray/xray-graph-diff.cpp
+++ b/tools/llvm-xray/xray-graph-diff.cpp
@@ -1,9 +1,8 @@
 //===-- xray-graph-diff.cpp: XRay Function Call Graph Renderer ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-xray/xray-graph-diff.h b/tools/llvm-xray/xray-graph-diff.h
index 5abec91d8582..5d12c563f47c 100644
--- a/tools/llvm-xray/xray-graph-diff.h
+++ b/tools/llvm-xray/xray-graph-diff.h
@@ -1,9 +1,8 @@
 //===-- xray-graph-diff.h - XRay Graph Diff Renderer ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-xray/xray-graph.cpp b/tools/llvm-xray/xray-graph.cpp
index fe49cca20d57..c09357fcb502 100644
--- a/tools/llvm-xray/xray-graph.cpp
+++ b/tools/llvm-xray/xray-graph.cpp
@@ -1,9 +1,8 @@
 //===-- xray-graph.cpp: XRay Function Call Graph Renderer -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -437,9 +436,7 @@ Expected<GraphRenderer> GraphRenderer::Factory::getGraphRenderer() {
 
   const auto &FunctionAddresses = Map.getFunctionAddresses();
 
-  symbolize::LLVMSymbolizer::Options Opts(
-      symbolize::FunctionNameKind::LinkageName, true, true, false, "");
-  symbolize::LLVMSymbolizer Symbolizer(Opts);
+  symbolize::LLVMSymbolizer Symbolizer;
   const auto &Header = Trace.getFileHeader();
 
   llvm::xray::FuncIdConversionHelper FuncIdHelper(InstrMap, Symbolizer,
diff --git a/tools/llvm-xray/xray-graph.h b/tools/llvm-xray/xray-graph.h
index fc7f8bb470f2..23372d40f05e 100644
--- a/tools/llvm-xray/xray-graph.h
+++ b/tools/llvm-xray/xray-graph.h
@@ -1,9 +1,8 @@
 //===-- xray-graph.h - XRay Function Call Graph Renderer --------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -79,8 +78,7 @@ public:
 
   using FunctionStack = SmallVector<FunctionAttr, 4>;
 
-  using PerThreadFunctionStackMap =
-      DenseMap<llvm::sys::procid_t, FunctionStack>;
+  using PerThreadFunctionStackMap = DenseMap<uint32_t, FunctionStack>;
 
   class GraphT : public Graph<FunctionStats, CallStats, int32_t> {
   public:
diff --git a/tools/llvm-xray/xray-registry.cpp b/tools/llvm-xray/xray-registry.cpp
index fe58e4deaa1e..e5c253d2e8f1 100644
--- a/tools/llvm-xray/xray-registry.cpp
+++ b/tools/llvm-xray/xray-registry.cpp
@@ -1,9 +1,8 @@
 //===- xray-registry.cpp: Implement a command registry. -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-xray/xray-registry.h b/tools/llvm-xray/xray-registry.h
index 6eab016273f5..d6fae78ea53c 100644
--- a/tools/llvm-xray/xray-registry.h
+++ b/tools/llvm-xray/xray-registry.h
@@ -1,9 +1,8 @@
 //===- xray-registry.h - Define registry mechanism for commands. ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/llvm-xray/xray-stacks.cpp b/tools/llvm-xray/xray-stacks.cpp
index d3af9e25e6f2..bcfc5cb1f1be 100644
--- a/tools/llvm-xray/xray-stacks.cpp
+++ b/tools/llvm-xray/xray-stacks.cpp
@@ -1,9 +1,8 @@
 //===- xray-stacks.cpp: XRay Function Call Stack Accounting ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -634,10 +633,8 @@ public:
                               Top->ExtraData.TerminalDurations.end(), 0uLL);
           {
             auto E = std::make_pair(Top, TopSum);
-            TopStacksBySum.insert(std::lower_bound(TopStacksBySum.begin(),
-                                                   TopStacksBySum.end(), E,
-                                                   greater_second),
-                                  E);
+            TopStacksBySum.insert(
+                llvm::lower_bound(TopStacksBySum, E, greater_second), E);
             if (TopStacksBySum.size() == 11)
               TopStacksBySum.pop_back();
           }
@@ -721,9 +718,7 @@ static CommandRegistration Unused(&Stack, []() -> Error {
               "-all-stacks."),
         std::make_error_code(std::errc::invalid_argument));
 
-  symbolize::LLVMSymbolizer::Options Opts(
-      symbolize::FunctionNameKind::LinkageName, true, true, false, "");
-  symbolize::LLVMSymbolizer Symbolizer(Opts);
+  symbolize::LLVMSymbolizer Symbolizer;
   FuncIdConversionHelper FuncIdHelper(StacksInstrMap, Symbolizer,
                                       Map.getFunctionAddresses());
   // TODO: Someday, support output to files instead of just directly to
diff --git a/tools/opt/AnalysisWrappers.cpp b/tools/opt/AnalysisWrappers.cpp
index cfdd2cf1582b..b888605a516c 100644
--- a/tools/opt/AnalysisWrappers.cpp
+++ b/tools/opt/AnalysisWrappers.cpp
@@ -1,9 +1,8 @@
 //===- AnalysisWrappers.cpp - Wrappers around non-pass analyses -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/opt/BreakpointPrinter.cpp b/tools/opt/BreakpointPrinter.cpp
index d3f54c034f55..a57a8c43c264 100644
--- a/tools/opt/BreakpointPrinter.cpp
+++ b/tools/opt/BreakpointPrinter.cpp
@@ -1,9 +1,8 @@
 //===- BreakpointPrinter.cpp - Breakpoint location printer ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -36,7 +35,7 @@ struct BreakpointPrinter : public ModulePass {
       }
     } else if (auto *TY = dyn_cast<DIType>(Context)) {
       if (!TY->getName().empty()) {
-        getContextName(TY->getScope().resolve(), N);
+        getContextName(TY->getScope(), N);
         N = N + TY->getName().str() + "::";
       }
     }
@@ -50,7 +49,7 @@ struct BreakpointPrinter : public ModulePass {
         auto *SP = cast_or_null<DISubprogram>(NMD->getOperand(i));
         if (!SP)
           continue;
-        getContextName(SP->getScope().resolve(), Name);
+        getContextName(SP->getScope(), Name);
         Name = Name + SP->getName().str();
         if (!Name.empty() && Processed.insert(Name).second) {
           Out << Name << "\n";
diff --git a/tools/opt/BreakpointPrinter.h b/tools/opt/BreakpointPrinter.h
index 57670e5ee8d8..2877555f852c 100644
--- a/tools/opt/BreakpointPrinter.h
+++ b/tools/opt/BreakpointPrinter.h
@@ -1,9 +1,8 @@
 //===- BreakpointPrinter.h - Breakpoint location printer ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/tools/opt/Debugify.cpp b/tools/opt/Debugify.cpp
index 3b1effba1592..222cc702bc1f 100644
--- a/tools/opt/Debugify.cpp
+++ b/tools/opt/Debugify.cpp
@@ -1,9 +1,8 @@
 //===- Debugify.cpp - Attach synthetic debug info to everything -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/tools/opt/Debugify.h b/tools/opt/Debugify.h
index d1a60c73e723..266f577951ae 100644
--- a/tools/opt/Debugify.h
+++ b/tools/opt/Debugify.h
@@ -1,9 +1,8 @@
 //===- Debugify.h - Attach synthetic debug info to everything -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/tools/opt/GraphPrinters.cpp b/tools/opt/GraphPrinters.cpp
index a8bb12f3e018..611fb20513c9 100644
--- a/tools/opt/GraphPrinters.cpp
+++ b/tools/opt/GraphPrinters.cpp
@@ -1,9 +1,8 @@
 //===- GraphPrinters.cpp - DOT printers for various graph types -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/opt/NewPMDriver.cpp b/tools/opt/NewPMDriver.cpp
index 211a3b151fe1..efe0bec35d72 100644
--- a/tools/opt/NewPMDriver.cpp
+++ b/tools/opt/NewPMDriver.cpp
@@ -1,9 +1,8 @@
 //===- NewPMDriver.cpp - Driver for opt with new PM -----------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -101,19 +100,11 @@ static cl::opt<std::string> OptimizerLastEPPipeline(
              "the OptimizerLast extension point into default pipelines"),
     cl::Hidden);
 
-enum PGOKind { NoPGO, InstrGen, InstrUse, SampleUse };
-static cl::opt<PGOKind> PGOKindFlag(
-    "pgo-kind", cl::init(NoPGO), cl::Hidden,
-    cl::desc("The kind of profile guided optimization"),
-    cl::values(clEnumValN(NoPGO, "nopgo", "Do not use PGO."),
-               clEnumValN(InstrGen, "new-pm-pgo-instr-gen-pipeline",
-                          "Instrument the IR to generate profile."),
-               clEnumValN(InstrUse, "new-pm-pgo-instr-use-pipeline",
-                          "Use instrumented profile to guide PGO."),
-               clEnumValN(SampleUse, "new-pm-pgo-sample-use-pipeline",
-                          "Use sampled profile to guide PGO.")));
-static cl::opt<std::string> ProfileFile(
-    "profile-file", cl::desc("Path to the profile."), cl::Hidden);
+extern cl::opt<PGOKind> PGOKindFlag;
+extern cl::opt<std::string> ProfileFile;
+extern cl::opt<CSPGOKind> CSPGOKindFlag;
+extern cl::opt<std::string> CSProfileGenFile;
+
 static cl::opt<std::string>
     ProfileRemappingFile("profile-remapping-file",
                          cl::desc("Path to the profile remapping file."),
@@ -231,25 +222,46 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
   Optional<PGOOptions> P;
   switch (PGOKindFlag) {
     case InstrGen:
-      P = PGOOptions(ProfileFile, "", "", "", true);
+      P = PGOOptions(ProfileFile, "", "", PGOOptions::IRInstr);
       break;
     case InstrUse:
-      P = PGOOptions("", ProfileFile, "", ProfileRemappingFile, false);
+      P = PGOOptions(ProfileFile, "", ProfileRemappingFile, PGOOptions::IRUse);
       break;
     case SampleUse:
-      P = PGOOptions("", "", ProfileFile, ProfileRemappingFile, false);
+      P = PGOOptions(ProfileFile, "", ProfileRemappingFile,
+                     PGOOptions::SampleUse);
       break;
     case NoPGO:
       if (DebugInfoForProfiling)
-        P = PGOOptions("", "", "", "", false, true);
+        P = PGOOptions("", "", "", PGOOptions::NoAction, PGOOptions::NoCSAction,
+                       true);
       else
         P = None;
-  }
+    }
+    if (CSPGOKindFlag != NoCSPGO) {
+      if (P && (P->Action == PGOOptions::IRInstr ||
+                P->Action == PGOOptions::SampleUse))
+        errs() << "CSPGOKind cannot be used with IRInstr or SampleUse";
+      if (CSPGOKindFlag == CSInstrGen) {
+        if (CSProfileGenFile.empty())
+          errs() << "CSInstrGen needs to specify CSProfileGenFile";
+        if (P) {
+          P->CSAction = PGOOptions::CSIRInstr;
+          P->CSProfileGenFile = CSProfileGenFile;
+        } else
+          P = PGOOptions("", CSProfileGenFile, ProfileRemappingFile,
+                         PGOOptions::NoAction, PGOOptions::CSIRInstr);
+      } else /* CSPGOKindFlag == CSInstrUse */ {
+        if (!P)
+          errs() << "CSInstrUse needs to be together with InstrUse";
+        P->CSAction = PGOOptions::CSIRUse;
+      }
+    }
   PassInstrumentationCallbacks PIC;
   StandardInstrumentations SI;
   SI.registerCallbacks(PIC);
 
-  PassBuilder PB(TM, P, &PIC);
+  PassBuilder PB(TM, PipelineTuningOptions(), P, &PIC);
   registerEPCallbacks(PB, VerifyEachPass, DebugPM);
 
   // Load requested pass plugins and let them register pass builder callbacks
diff --git a/tools/opt/NewPMDriver.h b/tools/opt/NewPMDriver.h
index 7d74a5777d11..b672c97c9aa3 100644
--- a/tools/opt/NewPMDriver.h
+++ b/tools/opt/NewPMDriver.h
@@ -1,9 +1,8 @@
 //===- NewPMDriver.h - Function to drive opt with the new PM ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
@@ -40,6 +39,13 @@ enum VerifierKind {
   VK_VerifyInAndOut,
   VK_VerifyEachPass
 };
+enum PGOKind {
+  NoPGO,
+  InstrGen,
+  InstrUse,
+  SampleUse
+};
+enum CSPGOKind { NoCSPGO, CSInstrGen, CSInstrUse };
 }
 
 /// Driver function to run the new pass manager over a module.
diff --git a/tools/opt/PassPrinters.cpp b/tools/opt/PassPrinters.cpp
index 310d491c06a5..70da6a43f8d9 100644
--- a/tools/opt/PassPrinters.cpp
+++ b/tools/opt/PassPrinters.cpp
@@ -1,9 +1,8 @@
 //===- PassPrinters.cpp - Utilities to print analysis info for passes -----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/tools/opt/PassPrinters.h b/tools/opt/PassPrinters.h
index e66f3f457b7a..d4e7a4a97f31 100644
--- a/tools/opt/PassPrinters.h
+++ b/tools/opt/PassPrinters.h
@@ -1,9 +1,8 @@
 //=- PassPrinters.h - Utilities to print analysis info for passes -*- C++ -*-=//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
diff --git a/tools/opt/PrintSCC.cpp b/tools/opt/PrintSCC.cpp
index 78ede2b72f84..419886d6cc60 100644
--- a/tools/opt/PrintSCC.cpp
+++ b/tools/opt/PrintSCC.cpp
@@ -1,9 +1,8 @@
 //===- PrintSCC.cpp - Enumerate SCCs in some key graphs -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index a4967a234d9c..ccf8b073b82b 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -1,9 +1,8 @@
 //===- opt.cpp - The LLVM Modular Optimizer -------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -34,6 +33,7 @@
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/LegacyPassNameParser.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/RemarkStreamer.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/InitializePasses.h"
@@ -172,19 +172,10 @@ CodeGenOptLevel("codegen-opt-level",
 static cl::opt<std::string>
 TargetTriple("mtriple", cl::desc("Override target triple for module"));
 
-static cl::opt<bool>
-UnitAtATime("funit-at-a-time",
-            cl::desc("Enable IPO. This corresponds to gcc's -funit-at-a-time"),
-            cl::init(true));
-
 static cl::opt<bool>
 DisableLoopUnrolling("disable-loop-unrolling",
                      cl::desc("Disable loop unrolling in all relevant passes"),
                      cl::init(false));
-static cl::opt<bool>
-DisableLoopVectorization("disable-loop-vectorization",
-                     cl::desc("Disable the loop vectorization pass"),
-                     cl::init(false));
 
 static cl::opt<bool>
 DisableSLPVectorization("disable-slp-vectorization",
@@ -260,21 +251,62 @@ static cl::opt<bool> Coroutines(
   cl::desc("Enable coroutine passes."),
   cl::init(false), cl::Hidden);
 
-static cl::opt<bool> PassRemarksWithHotness(
+static cl::opt<bool> RemarksWithHotness(
     "pass-remarks-with-hotness",
     cl::desc("With PGO, include profile count in optimization remarks"),
     cl::Hidden);
 
-static cl::opt<unsigned> PassRemarksHotnessThreshold(
-    "pass-remarks-hotness-threshold",
-    cl::desc("Minimum profile count required for an optimization remark to be output"),
-    cl::Hidden);
+static cl::opt<unsigned>
+    RemarksHotnessThreshold("pass-remarks-hotness-threshold",
+                            cl::desc("Minimum profile count required for "
+                                     "an optimization remark to be output"),
+                            cl::Hidden);
 
 static cl::opt<std::string>
     RemarksFilename("pass-remarks-output",
-                    cl::desc("YAML output filename for pass remarks"),
+                    cl::desc("Output filename for pass remarks"),
                     cl::value_desc("filename"));
 
+static cl::opt<std::string>
+    RemarksPasses("pass-remarks-filter",
+                  cl::desc("Only record optimization remarks from passes whose "
+                           "names match the given regular expression"),
+                  cl::value_desc("regex"));
+
+static cl::opt<std::string> RemarksFormat(
+    "pass-remarks-format",
+    cl::desc("The format used for serializing remarks (default: YAML)"),
+    cl::value_desc("format"), cl::init("yaml"));
+
+cl::opt<PGOKind>
+    PGOKindFlag("pgo-kind", cl::init(NoPGO), cl::Hidden,
+                cl::desc("The kind of profile guided optimization"),
+                cl::values(clEnumValN(NoPGO, "nopgo", "Do not use PGO."),
+                           clEnumValN(InstrGen, "pgo-instr-gen-pipeline",
+                                      "Instrument the IR to generate profile."),
+                           clEnumValN(InstrUse, "pgo-instr-use-pipeline",
+                                      "Use instrumented profile to guide PGO."),
+                           clEnumValN(SampleUse, "pgo-sample-use-pipeline",
+                                      "Use sampled profile to guide PGO.")));
+cl::opt<std::string> ProfileFile("profile-file",
+                                 cl::desc("Path to the profile."), cl::Hidden);
+
+cl::opt<CSPGOKind> CSPGOKindFlag(
+    "cspgo-kind", cl::init(NoCSPGO), cl::Hidden,
+    cl::desc("The kind of context sensitive profile guided optimization"),
+    cl::values(
+        clEnumValN(NoCSPGO, "nocspgo", "Do not use CSPGO."),
+        clEnumValN(
+            CSInstrGen, "cspgo-instr-gen-pipeline",
+            "Instrument (context sensitive) the IR to generate profile."),
+        clEnumValN(
+            CSInstrUse, "cspgo-instr-use-pipeline",
+            "Use instrumented (context sensitive) profile to guide PGO.")));
+cl::opt<std::string> CSProfileGenFile(
+    "cs-profilegen-file",
+    cl::desc("Path to the instrumented context sensitive profile."),
+    cl::Hidden);
+
 class OptCustomPassManager : public legacy::PassManager {
   DebugifyStatsMap DIStatsMap;
 
@@ -348,15 +380,16 @@ static void AddOptimizationPasses(legacy::PassManagerBase &MPM,
   } else {
     Builder.Inliner = createAlwaysInlinerLegacyPass();
   }
-  Builder.DisableUnitAtATime = !UnitAtATime;
   Builder.DisableUnrollLoops = (DisableLoopUnrolling.getNumOccurrences() > 0) ?
                                DisableLoopUnrolling : OptLevel == 0;
 
-  // This is final, unless there is a #pragma vectorize enable
-  if (DisableLoopVectorization)
-    Builder.LoopVectorize = false;
-  // If option wasn't forced via cmd line (-vectorize-loops, -loop-vectorize)
-  else if (!Builder.LoopVectorize)
+  // Check if vectorization is explicitly disabled via -vectorize-loops=false.
+  // The flag enables vectorization in the LoopVectorize pass, it is on by
+  // default, and if it was disabled, leave it disabled here.
+  // Another flag that exists: -loop-vectorize, controls adding the pass to the
+  // pass manager. If set, the pass is added, and there is no additional check
+  // here for it.
+  if (Builder.LoopVectorize)
     Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2;
 
   // When #pragma vectorize is on for SLP, do the same as above
@@ -369,6 +402,32 @@ static void AddOptimizationPasses(legacy::PassManagerBase &MPM,
   if (Coroutines)
     addCoroutinePassesToExtensionPoints(Builder);
 
+  switch (PGOKindFlag) {
+  case InstrGen:
+    Builder.EnablePGOInstrGen = true;
+    Builder.PGOInstrGen = ProfileFile;
+    break;
+  case InstrUse:
+    Builder.PGOInstrUse = ProfileFile;
+    break;
+  case SampleUse:
+    Builder.PGOSampleUse = ProfileFile;
+    break;
+  default:
+    break;
+  }
+
+  switch (CSPGOKindFlag) {
+  case CSInstrGen:
+    Builder.EnablePGOCSInstrGen = true;
+    break;
+  case CSInstrUse:
+    Builder.EnablePGOCSInstrUse = true;
+    break;
+  default:
+    break;
+  }
+
   Builder.populateFunctionPassManager(FPM);
   Builder.populateModulePassManager(MPM);
 }
@@ -464,6 +523,7 @@ int main(int argc, char **argv) {
   initializeDwarfEHPreparePass(Registry);
   initializeSafeStackLegacyPassPass(Registry);
   initializeSjLjEHPreparePass(Registry);
+  initializeStackProtectorPass(Registry);
   initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
   initializeGlobalMergePass(Registry);
   initializeIndirectBrExpandPassPass(Registry);
@@ -475,6 +535,7 @@ int main(int argc, char **argv) {
   initializeExpandReductionsPass(Registry);
   initializeWasmEHPreparePass(Registry);
   initializeWriteBitcodePassPass(Registry);
+  initializeHardwareLoopsPass(Registry);
 
 #ifdef LINK_POLLY_INTO_TOOLS
   polly::initializePollyPasses(Registry);
@@ -494,24 +555,15 @@ int main(int argc, char **argv) {
   if (!DisableDITypeMap)
     Context.enableDebugTypeODRUniquing();
 
-  if (PassRemarksWithHotness)
-    Context.setDiagnosticsHotnessRequested(true);
-
-  if (PassRemarksHotnessThreshold)
-    Context.setDiagnosticsHotnessThreshold(PassRemarksHotnessThreshold);
-
-  std::unique_ptr<ToolOutputFile> OptRemarkFile;
-  if (RemarksFilename != "") {
-    std::error_code EC;
-    OptRemarkFile =
-        llvm::make_unique<ToolOutputFile>(RemarksFilename, EC, sys::fs::F_None);
-    if (EC) {
-      errs() << EC.message() << '\n';
-      return 1;
-    }
-    Context.setDiagnosticsOutputFile(
-        llvm::make_unique<yaml::Output>(OptRemarkFile->os()));
+  Expected<std::unique_ptr<ToolOutputFile>> RemarksFileOrErr =
+      setupOptimizationRemarks(Context, RemarksFilename, RemarksPasses,
+                               RemarksFormat, RemarksWithHotness,
+                               RemarksHotnessThreshold);
+  if (Error E = RemarksFileOrErr.takeError()) {
+    errs() << toString(std::move(E)) << '\n';
+    return 1;
   }
+  std::unique_ptr<ToolOutputFile> RemarksFile = std::move(*RemarksFileOrErr);
 
   // Load the input module...
   std::unique_ptr<Module> M =
@@ -585,6 +637,11 @@ int main(int argc, char **argv) {
     CPUStr = getCPUStr();
     FeaturesStr = getFeaturesStr();
     Machine = GetTargetMachine(ModuleTriple, CPUStr, FeaturesStr, Options);
+  } else if (ModuleTriple.getArchName() != "unknown" &&
+             ModuleTriple.getArchName() != "") {
+    errs() << argv[0] << ": unrecognized architecture '"
+           << ModuleTriple.getArchName() << "' provided.\n";
+    return 1;
   }
 
   std::unique_ptr<TargetMachine> TM(Machine);
@@ -620,7 +677,7 @@ int main(int argc, char **argv) {
     // string. Hand off the rest of the functionality to the new code for that
     // layer.
     return runPassPipeline(argv[0], *M, TM.get(), Out.get(), ThinLinkOut.get(),
-                           OptRemarkFile.get(), PassPipeline, OK, VK,
+                           RemarksFile.get(), PassPipeline, OK, VK,
                            PreserveAssemblyUseListOrder,
                            PreserveBitcodeUseListOrder, EmitSummaryIndex,
                            EmitModuleHash, EnableDebugify)
@@ -856,8 +913,8 @@ int main(int argc, char **argv) {
              "the compile-twice option\n";
       Out->os() << BOS->str();
       Out->keep();
-      if (OptRemarkFile)
-        OptRemarkFile->keep();
+      if (RemarksFile)
+        RemarksFile->keep();
       return 1;
     }
     Out->os() << BOS->str();
@@ -870,8 +927,8 @@ int main(int argc, char **argv) {
   if (!NoOutput || PrintBreakpoints)
     Out->keep();
 
-  if (OptRemarkFile)
-    OptRemarkFile->keep();
+  if (RemarksFile)
+    RemarksFile->keep();
 
   if (ThinLinkOut)
     ThinLinkOut->keep();
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index 5b4229e64682..146d10835b8d 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -1,9 +1,8 @@
 //===- AsmMatcherEmitter.cpp - Generate an assembly matcher ---------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -1073,8 +1072,9 @@ bool MatchableInfo::validate(StringRef CommentDelimiter, bool IsAlias) const {
   // handle, the target should be refactored to use operands instead of
   // modifiers.
   //
-  // Also, check for instructions which reference the operand multiple times;
-  // this implies a constraint we would not honor.
+  // Also, check for instructions which reference the operand multiple times,
+  // if they don't define a custom AsmMatcher: this implies a constraint that
+  // the built-in matching code would not honor.
   std::set<std::string> OperandNames;
   for (const AsmOperand &Op : AsmOperands) {
     StringRef Tok = Op.Token;
@@ -1084,7 +1084,8 @@ bool MatchableInfo::validate(StringRef CommentDelimiter, bool IsAlias) const {
                       "' not supported by asm matcher.  Mark isCodeGenOnly!");
     // Verify that any operand is only mentioned once.
     // We reject aliases and ignore instructions for now.
-    if (!IsAlias && Tok[0] == '$' && !OperandNames.insert(Tok).second) {
+    if (!IsAlias && TheDef->getValueAsString("AsmMatchConverter").empty() &&
+        Tok[0] == '$' && !OperandNames.insert(Tok).second) {
       LLVM_DEBUG({
         errs() << "warning: '" << TheDef->getName() << "': "
                << "ignoring instruction with tied operand '"
@@ -1160,8 +1161,9 @@ AsmMatcherInfo::getOperandClass(Record *Rec, int SubOpIdx) {
     // use it, else just fall back to the underlying register class.
     const RecordVal *R = Rec->getValue("ParserMatchClass");
     if (!R || !R->getValue())
-      PrintFatalError("Record `" + Rec->getName() +
-        "' does not have a ParserMatchClass!\n");
+      PrintFatalError(Rec->getLoc(),
+                      "Record `" + Rec->getName() +
+                          "' does not have a ParserMatchClass!\n");
 
     if (DefInit *DI= dyn_cast<DefInit>(R->getValue())) {
       Record *MatchClass = DI->getDef();
@@ -1473,7 +1475,6 @@ void AsmMatcherInfo::buildInfo() {
   for (const auto &Pair : SubtargetFeatures)
     LLVM_DEBUG(Pair.second.dump());
 #endif // NDEBUG
-  assert(SubtargetFeatures.size() <= 64 && "Too many subtarget features!");
 
   bool HasMnemonicFirst = AsmParser->getValueAsBit("HasMnemonicFirst");
   bool ReportMultipleNearMisses =
@@ -1928,10 +1929,11 @@ getConverterOperandID(const std::string &Name,
   return ID;
 }
 
-static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
-                             std::vector<std::unique_ptr<MatchableInfo>> &Infos,
-                             bool HasMnemonicFirst, bool HasOptionalOperands,
-                             raw_ostream &OS) {
+static unsigned
+emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
+                 std::vector<std::unique_ptr<MatchableInfo>> &Infos,
+                 bool HasMnemonicFirst, bool HasOptionalOperands,
+                 raw_ostream &OS) {
   SmallSetVector<CachedHashString, 16> OperandConversionKinds;
   SmallSetVector<CachedHashString, 16> InstructionConversionKinds;
   std::vector<std::vector<uint8_t> > ConversionTable;
@@ -2337,6 +2339,8 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
 
   // Spit out the operand number lookup function.
   OS << OpOS.str();
+
+  return ConversionTable.size();
 }
 
 /// emitMatchClassEnumeration - Emit the enumeration for match class kinds.
@@ -2675,7 +2679,7 @@ static void emitGetSubtargetFeatureName(AsmMatcherInfo &Info, raw_ostream &OS) {
     for (const auto &SF : Info.SubtargetFeatures) {
       const SubtargetFeatureInfo &SFI = SF.second;
       // FIXME: Totally just a placeholder name to get the algorithm working.
-      OS << "  case " << SFI.getEnumName() << ": return \""
+      OS << "  case " << SFI.getEnumBitName() << ": return \""
          << SFI.TheDef->getValueAsString("PredicateName") << "\";\n";
     }
     OS << "  default: return \"(unknown)\";\n";
@@ -2691,7 +2695,10 @@ static std::string GetAliasRequiredFeatures(Record *R,
                                             const AsmMatcherInfo &Info) {
   std::vector<Record*> ReqFeatures = R->getValueAsListOfDefs("Predicates");
   std::string Result;
-  unsigned NumFeatures = 0;
+
+  if (ReqFeatures.empty())
+    return Result;
+
   for (unsigned i = 0, e = ReqFeatures.size(); i != e; ++i) {
     const SubtargetFeatureInfo *F = Info.getSubtargetFeature(ReqFeatures[i]);
 
@@ -2699,15 +2706,12 @@ static std::string GetAliasRequiredFeatures(Record *R,
       PrintFatalError(R->getLoc(), "Predicate '" + ReqFeatures[i]->getName() +
                     "' is not marked as an AssemblerPredicate!");
 
-    if (NumFeatures)
-      Result += '|';
+    if (i)
+      Result += " && ";
 
-    Result += F->getEnumName();
-    ++NumFeatures;
+    Result += "Features.test(" + F->getEnumBitName() + ')';
   }
 
-  if (NumFeatures > 1)
-    Result = '(' + Result + ')';
   return Result;
 }
 
@@ -2763,7 +2767,7 @@ static void emitMnemonicAliasVariant(raw_ostream &OS,const AsmMatcherInfo &Info,
 
       if (!MatchCode.empty())
         MatchCode += "else ";
-      MatchCode += "if ((Features & " + FeatureMask + ") == "+FeatureMask+")\n";
+      MatchCode += "if (" + FeatureMask + ")\n";
       MatchCode += "  Mnemonic = \"";
       MatchCode += R->getValueAsString("ToMnemonic");
       MatchCode += "\";\n";
@@ -2798,7 +2802,7 @@ static bool emitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info,
   if (Aliases.empty()) return false;
 
   OS << "static void applyMnemonicAliases(StringRef &Mnemonic, "
-    "uint64_t Features, unsigned VariantID) {\n";
+    "const FeatureBitset &Features, unsigned VariantID) {\n";
   OS << "  switch (VariantID) {\n";
   unsigned VariantCount = Target.getAsmParserVariantCount();
   for (unsigned VC = 0; VC != VariantCount; ++VC) {
@@ -2823,7 +2827,9 @@ static bool emitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info,
 static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
                               const AsmMatcherInfo &Info, StringRef ClassName,
                               StringToOffsetTable &StringTable,
-                              unsigned MaxMnemonicIndex, bool HasMnemonicFirst) {
+                              unsigned MaxMnemonicIndex,
+                              unsigned MaxFeaturesIndex,
+                              bool HasMnemonicFirst) {
   unsigned MaxMask = 0;
   for (const OperandMatchEntry &OMI : Info.OperandMatchInfo) {
     MaxMask |= OMI.OperandMask;
@@ -2832,14 +2838,14 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
   // Emit the static custom operand parsing table;
   OS << "namespace {\n";
   OS << "  struct OperandMatchEntry {\n";
-  OS << "    " << getMinimalTypeForEnumBitfield(Info.SubtargetFeatures.size())
-               << " RequiredFeatures;\n";
   OS << "    " << getMinimalTypeForRange(MaxMnemonicIndex)
                << " Mnemonic;\n";
+  OS << "    " << getMinimalTypeForRange(MaxMask)
+               << " OperandMask;\n";
   OS << "    " << getMinimalTypeForRange(std::distance(
                       Info.Classes.begin(), Info.Classes.end())) << " Class;\n";
-  OS << "    " << getMinimalTypeForRange(MaxMask)
-               << " OperandMask;\n\n";
+  OS << "    " << getMinimalTypeForRange(MaxFeaturesIndex)
+               << " RequiredFeaturesIdx;\n\n";
   OS << "    StringRef getMnemonic() const {\n";
   OS << "      return StringRef(MnemonicTable + Mnemonic + 1,\n";
   OS << "                       MnemonicTable[Mnemonic]);\n";
@@ -2865,29 +2871,18 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
   OS << "static const OperandMatchEntry OperandMatchTable["
      << Info.OperandMatchInfo.size() << "] = {\n";
 
-  OS << "  /* Operand List Mask, Mnemonic, Operand Class, Features */\n";
+  OS << "  /* Operand List Mnemonic, Mask, Operand Class, Features */\n";
   for (const OperandMatchEntry &OMI : Info.OperandMatchInfo) {
     const MatchableInfo &II = *OMI.MI;
 
     OS << "  { ";
 
-    // Write the required features mask.
-    if (!II.RequiredFeatures.empty()) {
-      for (unsigned i = 0, e = II.RequiredFeatures.size(); i != e; ++i) {
-        if (i) OS << "|";
-        OS << II.RequiredFeatures[i]->getEnumName();
-      }
-    } else
-      OS << "0";
-
     // Store a pascal-style length byte in the mnemonic.
     std::string LenMnemonic = char(II.Mnemonic.size()) + II.Mnemonic.str();
-    OS << ", " << StringTable.GetOrAddStringOffset(LenMnemonic, false)
+    OS << StringTable.GetOrAddStringOffset(LenMnemonic, false)
        << " /* " << II.Mnemonic << " */, ";
 
-    OS << OMI.CI->Name;
-
-    OS << ", " << OMI.OperandMask;
+    OS << OMI.OperandMask;
     OS << " /* ";
     bool printComma = false;
     for (int i = 0, e = 31; i !=e; ++i)
@@ -2897,7 +2892,17 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
         OS << i;
         printComma = true;
       }
-    OS << " */";
+    OS << " */, ";
+
+    OS << OMI.CI->Name;
+
+    // Write the required features mask.
+    OS << ", AMFBS";
+    if (II.RequiredFeatures.empty())
+      OS << "_None";
+    else
+      for (unsigned i = 0, e = II.RequiredFeatures.size(); i != e; ++i)
+        OS << '_' << II.RequiredFeatures[i]->TheDef->getName();
 
     OS << " },\n";
   }
@@ -2933,7 +2938,7 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
 
   // Emit code to get the available features.
   OS << "  // Get the current feature set.\n";
-  OS << "  uint64_t AvailableFeatures = getAvailableFeatures();\n\n";
+  OS << "  const FeatureBitset &AvailableFeatures = getAvailableFeatures();\n\n";
 
   OS << "  // Get the next operand index.\n";
   OS << "  unsigned NextOpNum = Operands.size()"
@@ -2967,8 +2972,10 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
 
   // Emit check that the required features are available.
   OS << "    // check if the available features match\n";
+  OS << "    const FeatureBitset &RequiredFeatures = "
+        "FeatureBitsets[it->RequiredFeaturesIdx];\n";
   OS << "    if (!ParseForAllFeatures && (AvailableFeatures & "
-        "it->RequiredFeatures) != it->RequiredFeatures)\n";
+        "RequiredFeatures) != RequiredFeatures)\n";
   OS << "        continue;\n\n";
 
   // Emit check to ensure the operand number matches.
@@ -3034,7 +3041,8 @@ static void emitAsmTiedOperandConstraints(CodeGenTarget &Target,
 static void emitMnemonicSpellChecker(raw_ostream &OS, CodeGenTarget &Target,
                                      unsigned VariantCount) {
   OS << "static std::string " << Target.getName()
-     << "MnemonicSpellCheck(StringRef S, uint64_t FBS, unsigned VariantID) {\n";
+     << "MnemonicSpellCheck(StringRef S, const FeatureBitset &FBS,"
+     << " unsigned VariantID) {\n";
   if (!VariantCount)
     OS <<  "  return \"\";";
   else {
@@ -3055,7 +3063,9 @@ static void emitMnemonicSpellChecker(raw_ostream &OS, CodeGenTarget &Target,
     OS << "  }\n\n";
     OS << "  for (auto I = Start; I < End; I++) {\n";
     OS << "    // Ignore unsupported instructions.\n";
-    OS << "    if ((FBS & I->RequiredFeatures) != I->RequiredFeatures)\n";
+    OS << "    const FeatureBitset &RequiredFeatures = "
+          "FeatureBitsets[I->RequiredFeaturesIdx];\n";
+    OS << "    if ((FBS & RequiredFeatures) != RequiredFeatures)\n";
     OS << "      continue;\n";
     OS << "\n";
     OS << "    StringRef T = I->getMnemonic();\n";
@@ -3103,6 +3113,14 @@ static void emitMatchClassKindNames(std::forward_list<ClassInfo> &Infos,
   OS << "#endif // NDEBUG\n";
 }
 
+static std::string
+getNameForFeatureBitset(const std::vector<Record *> &FeatureBitset) {
+  std::string Name = "AMFBS";
+  for (const auto &Feature : FeatureBitset)
+    Name += ("_" + Feature->getName()).str();
+  return Name;
+}
+
 void AsmMatcherEmitter::run(raw_ostream &OS) {
   CodeGenTarget Target(Records);
   Record *AsmParser = Target.getAsmParser();
@@ -3115,10 +3133,10 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   // Sort the instruction table using the partial order on classes. We use
   // stable_sort to ensure that ambiguous instructions are still
   // deterministically ordered.
-  std::stable_sort(Info.Matchables.begin(), Info.Matchables.end(),
-                   [](const std::unique_ptr<MatchableInfo> &a,
-                      const std::unique_ptr<MatchableInfo> &b){
-                     return *a < *b;});
+  llvm::stable_sort(
+      Info.Matchables,
+      [](const std::unique_ptr<MatchableInfo> &a,
+         const std::unique_ptr<MatchableInfo> &b) { return *a < *b; });
 
 #ifdef EXPENSIVE_CHECKS
   // Verify that the table is sorted and operator < works transitively.
@@ -3174,7 +3192,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "#undef GET_ASSEMBLER_HEADER\n";
   OS << "  // This should be included into the middle of the declaration of\n";
   OS << "  // your subclasses implementation of MCTargetAsmParser.\n";
-  OS << "  uint64_t ComputeAvailableFeatures(const FeatureBitset& FB) const;\n";
+  OS << "  FeatureBitset ComputeAvailableFeatures(const FeatureBitset& FB) const;\n";
   if (HasOptionalOperands) {
     OS << "  void convertToMCInst(unsigned Kind, MCInst &Inst, "
        << "unsigned Opcode,\n"
@@ -3192,9 +3210,21 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   if (ReportMultipleNearMisses)
     OS << "                                SmallVectorImpl<NearMissInfo> *NearMisses,\n";
   else
-    OS << "                                uint64_t &ErrorInfo,\n";
+    OS << "                                uint64_t &ErrorInfo,\n"
+       << "                                FeatureBitset &MissingFeatures,\n";
   OS << "                                bool matchingInlineAsm,\n"
      << "                                unsigned VariantID = 0);\n";
+  if (!ReportMultipleNearMisses)
+    OS << "  unsigned MatchInstructionImpl(const OperandVector &Operands,\n"
+       << "                                MCInst &Inst,\n"
+       << "                                uint64_t &ErrorInfo,\n"
+       << "                                bool matchingInlineAsm,\n"
+       << "                                unsigned VariantID = 0) {\n"
+       << "    FeatureBitset MissingFeatures;\n"
+       << "    return MatchInstructionImpl(Operands, Inst, ErrorInfo, MissingFeatures,\n"
+       << "                                matchingInlineAsm, VariantID);\n"
+       << "  }\n\n";
+
 
   if (!Info.OperandMatchInfo.empty()) {
     OS << "  OperandMatchResultTy MatchOperandParserImpl(\n";
@@ -3219,7 +3249,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "#undef GET_REGISTER_MATCHER\n\n";
 
   // Emit the subtarget feature enumeration.
-  SubtargetFeatureInfo::emitSubtargetFeatureFlagEnumeration(
+  SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration(
       Info.SubtargetFeatures, OS);
 
   // Emit the function to match a register name to number.
@@ -3249,8 +3279,9 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   // Generate the convertToMCInst function to convert operands into an MCInst.
   // Also, generate the convertToMapAndConstraints function for MS-style inline
   // assembly.  The latter doesn't actually generate a MCInst.
-  emitConvertFuncs(Target, ClassName, Info.Matchables, HasMnemonicFirst,
-                   HasOptionalOperands, OS);
+  unsigned NumConverters = emitConvertFuncs(Target, ClassName, Info.Matchables,
+                                            HasMnemonicFirst,
+                                            HasOptionalOperands, OS);
 
   // Emit the enumeration for classes which participate in matching.
   emitMatchClassEnumeration(Target, Info.Classes, OS);
@@ -3300,6 +3331,56 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   StringTable.EmitString(OS);
   OS << ";\n\n";
 
+  std::vector<std::vector<Record *>> FeatureBitsets;
+  for (const auto &MI : Info.Matchables) {
+    if (MI->RequiredFeatures.empty())
+      continue;
+    FeatureBitsets.emplace_back();
+    for (unsigned I = 0, E = MI->RequiredFeatures.size(); I != E; ++I)
+      FeatureBitsets.back().push_back(MI->RequiredFeatures[I]->TheDef);
+  }
+
+  llvm::sort(FeatureBitsets, [&](const std::vector<Record *> &A,
+                                 const std::vector<Record *> &B) {
+    if (A.size() < B.size())
+      return true;
+    if (A.size() > B.size())
+      return false;
+    for (const auto &Pair : zip(A, B)) {
+      if (std::get<0>(Pair)->getName() < std::get<1>(Pair)->getName())
+        return true;
+      if (std::get<0>(Pair)->getName() > std::get<1>(Pair)->getName())
+        return false;
+    }
+    return false;
+  });
+  FeatureBitsets.erase(
+      std::unique(FeatureBitsets.begin(), FeatureBitsets.end()),
+      FeatureBitsets.end());
+  OS << "// Feature bitsets.\n"
+     << "enum : " << getMinimalTypeForRange(FeatureBitsets.size()) << " {\n"
+     << "  AMFBS_None,\n";
+  for (const auto &FeatureBitset : FeatureBitsets) {
+    if (FeatureBitset.empty())
+      continue;
+    OS << "  " << getNameForFeatureBitset(FeatureBitset) << ",\n";
+  }
+  OS << "};\n\n"
+     << "const static FeatureBitset FeatureBitsets[] {\n"
+     << "  {}, // AMFBS_None\n";
+  for (const auto &FeatureBitset : FeatureBitsets) {
+    if (FeatureBitset.empty())
+      continue;
+    OS << "  {";
+    for (const auto &Feature : FeatureBitset) {
+      const auto &I = Info.SubtargetFeatures.find(Feature);
+      assert(I != Info.SubtargetFeatures.end() && "Didn't import predicate?");
+      OS << I->second.getEnumBitName() << ", ";
+    }
+    OS << "},\n";
+  }
+  OS << "};\n\n";
+
   // Emit the static match table; unused classes get initialized to 0 which is
   // guaranteed to be InvalidMatchClass.
   //
@@ -3315,10 +3396,10 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "    " << getMinimalTypeForRange(MaxMnemonicIndex)
                << " Mnemonic;\n";
   OS << "    uint16_t Opcode;\n";
-  OS << "    " << getMinimalTypeForRange(Info.Matchables.size())
+  OS << "    " << getMinimalTypeForRange(NumConverters)
                << " ConvertFn;\n";
-  OS << "    " << getMinimalTypeForEnumBitfield(Info.SubtargetFeatures.size())
-               << " RequiredFeatures;\n";
+  OS << "    " << getMinimalTypeForRange(FeatureBitsets.size())
+               << " RequiredFeaturesIdx;\n";
   OS << "    " << getMinimalTypeForRange(
                       std::distance(Info.Classes.begin(), Info.Classes.end()))
      << " Classes[" << MaxNumOperands << "];\n";
@@ -3363,13 +3444,12 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
          << MI->ConversionFnKind << ", ";
 
       // Write the required features mask.
-      if (!MI->RequiredFeatures.empty()) {
-        for (unsigned i = 0, e = MI->RequiredFeatures.size(); i != e; ++i) {
-          if (i) OS << "|";
-          OS << MI->RequiredFeatures[i]->getEnumName();
-        }
-      } else
-        OS << "0";
+      OS << "AMFBS";
+      if (MI->RequiredFeatures.empty())
+        OS << "_None";
+      else
+        for (unsigned i = 0, e = MI->RequiredFeatures.size(); i != e; ++i)
+          OS << '_' << MI->RequiredFeatures[i]->TheDef->getName();
 
       OS << ", { ";
       for (unsigned i = 0, e = MI->AsmOperands.size(); i != e; ++i) {
@@ -3394,7 +3474,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   if (ReportMultipleNearMisses)
     OS << "                     SmallVectorImpl<NearMissInfo> *NearMisses,\n";
   else
-    OS << "                     uint64_t &ErrorInfo,\n";
+    OS << "                     uint64_t &ErrorInfo,\n"
+       << "                     FeatureBitset &MissingFeatures,\n";
   OS << "                     bool matchingInlineAsm, unsigned VariantID) {\n";
 
   if (!ReportMultipleNearMisses) {
@@ -3409,7 +3490,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
 
   // Emit code to get the available features.
   OS << "  // Get the current feature set.\n";
-  OS << "  uint64_t AvailableFeatures = getAvailableFeatures();\n\n";
+  OS << "  const FeatureBitset &AvailableFeatures = getAvailableFeatures();\n\n";
 
   OS << "  // Get the instruction mnemonic, which is the first token.\n";
   if (HasMnemonicFirst) {
@@ -3433,7 +3514,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
     OS << "  bool HadMatchOtherThanFeatures = false;\n";
     OS << "  bool HadMatchOtherThanPredicate = false;\n";
     OS << "  unsigned RetCode = Match_InvalidOperand;\n";
-    OS << "  uint64_t MissingFeatures = ~0ULL;\n";
+    OS << "  MissingFeatures.set();\n";
     OS << "  // Set ErrorInfo to the operand that mismatches if it is\n";
     OS << "  // wrong for all instances of the instruction.\n";
     OS << "  ErrorInfo = ~0ULL;\n";
@@ -3479,9 +3560,10 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "  for (const MatchEntry *it = MnemonicRange.first, "
      << "*ie = MnemonicRange.second;\n";
   OS << "       it != ie; ++it) {\n";
+  OS << "    const FeatureBitset &RequiredFeatures = "
+        "FeatureBitsets[it->RequiredFeaturesIdx];\n";
   OS << "    bool HasRequiredFeatures =\n";
-  OS << "      (AvailableFeatures & it->RequiredFeatures) == "
-        "it->RequiredFeatures;\n";
+  OS << "      (AvailableFeatures & RequiredFeatures) == RequiredFeatures;\n";
   OS << "    DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \"Trying to match opcode \"\n";
   OS << "                                          << MII.getName(it->Opcode) << \"\\n\");\n";
 
@@ -3640,16 +3722,18 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "    if (!HasRequiredFeatures) {\n";
   if (!ReportMultipleNearMisses)
     OS << "      HadMatchOtherThanFeatures = true;\n";
-  OS << "      uint64_t NewMissingFeatures = it->RequiredFeatures & "
+  OS << "      FeatureBitset NewMissingFeatures = RequiredFeatures & "
         "~AvailableFeatures;\n";
-  OS << "      DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \"Missing target features: \"\n";
-  OS << "                                            << format_hex(NewMissingFeatures, 18)\n";
-  OS << "                                            << \"\\n\");\n";
+  OS << "      DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \"Missing target features:\";\n";
+  OS << "                       for (unsigned I = 0, E = NewMissingFeatures.size(); I != E; ++I)\n";
+  OS << "                         if (NewMissingFeatures[I])\n";
+  OS << "                           dbgs() << ' ' << I;\n";
+  OS << "                       dbgs() << \"\\n\");\n";
   if (ReportMultipleNearMisses) {
     OS << "      FeaturesNearMiss = NearMissInfo::getMissedFeature(NewMissingFeatures);\n";
   } else {
-    OS << "      if (countPopulation(NewMissingFeatures) <=\n"
-          "          countPopulation(MissingFeatures))\n";
+    OS << "      if (NewMissingFeatures.count() <=\n"
+          "          MissingFeatures.count())\n";
     OS << "        MissingFeatures = NewMissingFeatures;\n";
     OS << "      continue;\n";
   }
@@ -3804,15 +3888,15 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
     OS << "  // Okay, we had no match.  Try to return a useful error code.\n";
     OS << "  if (HadMatchOtherThanPredicate || !HadMatchOtherThanFeatures)\n";
     OS << "    return RetCode;\n\n";
-    OS << "  // Missing feature matches return which features were missing\n";
-    OS << "  ErrorInfo = MissingFeatures;\n";
+    OS << "  ErrorInfo = 0;\n";
     OS << "  return Match_MissingFeature;\n";
   }
   OS << "}\n\n";
 
   if (!Info.OperandMatchInfo.empty())
     emitCustomOperandParsing(OS, Target, Info, ClassName, StringTable,
-                             MaxMnemonicIndex, HasMnemonicFirst);
+                             MaxMnemonicIndex, FeatureBitsets.size(),
+                             HasMnemonicFirst);
 
   OS << "#endif // GET_MATCHER_IMPLEMENTATION\n\n";
 
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index a8f191181766..05d81f133505 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -1,9 +1,8 @@
 //===- AsmWriterEmitter.cpp - Generate an assembly writer -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -586,11 +585,20 @@ void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
       O << "  case ";
       if (!Namespace.empty())
         O << Namespace << "::";
-      O << AltName << ":\n"
-        << "    assert(*(AsmStrs" << AltName << "+RegAsmOffset" << AltName
-        << "[RegNo-1]) &&\n"
-        << "           \"Invalid alt name index for register!\");\n"
-        << "    return AsmStrs" << AltName << "+RegAsmOffset" << AltName
+      O << AltName << ":\n";
+      if (R->isValueUnset("FallbackRegAltNameIndex"))
+        O << "    assert(*(AsmStrs" << AltName << "+RegAsmOffset" << AltName
+          << "[RegNo-1]) &&\n"
+          << "           \"Invalid alt name index for register!\");\n";
+      else {
+        O << "    if (!*(AsmStrs" << AltName << "+RegAsmOffset" << AltName
+          << "[RegNo-1]))\n"
+          << "      return getRegisterName(RegNo, ";
+        if (!Namespace.empty())
+          O << Namespace << "::";
+        O << R->getValueAsDef("FallbackRegAltNameIndex")->getName() << ");\n";
+      }
+      O << "    return AsmStrs" << AltName << "+RegAsmOffset" << AltName
         << "[RegNo-1];\n";
     }
     O << "  }\n";
diff --git a/utils/TableGen/AsmWriterInst.cpp b/utils/TableGen/AsmWriterInst.cpp
index 2c19e5d663d6..c26e0e421183 100644
--- a/utils/TableGen/AsmWriterInst.cpp
+++ b/utils/TableGen/AsmWriterInst.cpp
@@ -1,9 +1,8 @@
 //===- AsmWriterInst.h - Classes encapsulating a printable inst -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -94,8 +93,10 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned CGIIndex,
                    != std::string::npos) {
           AddLiteralString(std::string(1, AsmString[DollarPos+1]));
         } else {
-          PrintFatalError("Non-supported escaped character found in instruction '" +
-            CGI.TheDef->getName() + "'!");
+          PrintFatalError(
+              CGI.TheDef->getLoc(),
+              "Non-supported escaped character found in instruction '" +
+                  CGI.TheDef->getName() + "'!");
         }
         LastEmitted = DollarPos+2;
         continue;
@@ -132,15 +133,19 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned CGIIndex,
       // brace.
       if (hasCurlyBraces) {
         if (VarEnd >= AsmString.size())
-          PrintFatalError("Reached end of string before terminating curly brace in '"
-            + CGI.TheDef->getName() + "'");
+          PrintFatalError(
+              CGI.TheDef->getLoc(),
+              "Reached end of string before terminating curly brace in '" +
+                  CGI.TheDef->getName() + "'");
 
         // Look for a modifier string.
         if (AsmString[VarEnd] == ':') {
           ++VarEnd;
           if (VarEnd >= AsmString.size())
-            PrintFatalError("Reached end of string before terminating curly brace in '"
-              + CGI.TheDef->getName() + "'");
+            PrintFatalError(
+                CGI.TheDef->getLoc(),
+                "Reached end of string before terminating curly brace in '" +
+                    CGI.TheDef->getName() + "'");
 
           std::string::size_type ModifierStart = VarEnd;
           while (VarEnd < AsmString.size() && isIdentChar(AsmString[VarEnd]))
@@ -148,17 +153,22 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned CGIIndex,
           Modifier = std::string(AsmString.begin()+ModifierStart,
                                  AsmString.begin()+VarEnd);
           if (Modifier.empty())
-            PrintFatalError("Bad operand modifier name in '"+ CGI.TheDef->getName() + "'");
+            PrintFatalError(CGI.TheDef->getLoc(),
+                            "Bad operand modifier name in '" +
+                                CGI.TheDef->getName() + "'");
         }
 
         if (AsmString[VarEnd] != '}')
-          PrintFatalError("Variable name beginning with '{' did not end with '}' in '"
-            + CGI.TheDef->getName() + "'");
+          PrintFatalError(
+              CGI.TheDef->getLoc(),
+              "Variable name beginning with '{' did not end with '}' in '" +
+                  CGI.TheDef->getName() + "'");
         ++VarEnd;
       }
       if (VarName.empty() && Modifier.empty())
-        PrintFatalError("Stray '$' in '" + CGI.TheDef->getName() +
-          "' asm string, maybe you want $$?");
+        PrintFatalError(CGI.TheDef->getLoc(),
+                        "Stray '$' in '" + CGI.TheDef->getName() +
+                            "' asm string, maybe you want $$?");
 
       if (VarName.empty()) {
         // Just a modifier, pass this into PrintSpecial.
diff --git a/utils/TableGen/AsmWriterInst.h b/utils/TableGen/AsmWriterInst.h
index 708f23cb5b0e..7d88e5a9d037 100644
--- a/utils/TableGen/AsmWriterInst.h
+++ b/utils/TableGen/AsmWriterInst.h
@@ -1,9 +1,8 @@
 //===- AsmWriterInst.h - Classes encapsulating a printable inst -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/utils/TableGen/Attributes.cpp b/utils/TableGen/Attributes.cpp
index 6bfc0ab896f9..6fbc595d7300 100644
--- a/utils/TableGen/Attributes.cpp
+++ b/utils/TableGen/Attributes.cpp
@@ -1,9 +1,8 @@
 //===- Attributes.cpp - Generate attributes -------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/utils/TableGen/CTagsEmitter.cpp b/utils/TableGen/CTagsEmitter.cpp
index bd596bcb47a8..ccb7f3300dde 100644
--- a/utils/TableGen/CTagsEmitter.cpp
+++ b/utils/TableGen/CTagsEmitter.cpp
@@ -1,9 +1,8 @@
 //===- CTagsEmitter.cpp - Generate ctags-compatible index ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/utils/TableGen/CallingConvEmitter.cpp b/utils/TableGen/CallingConvEmitter.cpp
index d452031f8850..de5044e24d49 100644
--- a/utils/TableGen/CallingConvEmitter.cpp
+++ b/utils/TableGen/CallingConvEmitter.cpp
@@ -1,9 +1,8 @@
 //===- CallingConvEmitter.cpp - Generate calling conventions --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -41,11 +40,17 @@ void CallingConvEmitter::run(raw_ostream &O) {
   // each other.
   for (Record *CC : CCs) {
     if (!CC->getValueAsBit("Custom")) {
-      O << "static bool " << CC->getName()
-        << "(unsigned ValNo, MVT ValVT,\n"
-        << std::string(CC->getName().size() + 13, ' ')
-        << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n"
-        << std::string(CC->getName().size() + 13, ' ')
+      unsigned Pad = CC->getName().size();
+      if (CC->getValueAsBit("Entry")) {
+        O << "bool llvm::";
+        Pad += 12;
+      } else {
+        O << "static bool ";
+        Pad += 13;
+      }
+      O << CC->getName() << "(unsigned ValNo, MVT ValVT,\n"
+        << std::string(Pad, ' ') << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n"
+        << std::string(Pad, ' ')
         << "ISD::ArgFlagsTy ArgFlags, CCState &State);\n";
     }
   }
@@ -62,12 +67,18 @@ void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) {
   ListInit *CCActions = CC->getValueAsListInit("Actions");
   Counter = 0;
 
-  O << "\n\nstatic bool " << CC->getName()
-    << "(unsigned ValNo, MVT ValVT,\n"
-    << std::string(CC->getName().size()+13, ' ')
-    << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n"
-    << std::string(CC->getName().size()+13, ' ')
-    << "ISD::ArgFlagsTy ArgFlags, CCState &State) {\n";
+  O << "\n\n";
+  unsigned Pad = CC->getName().size();
+  if (CC->getValueAsBit("Entry")) {
+    O << "bool llvm::";
+    Pad += 12;
+  } else {
+    O << "static bool ";
+    Pad += 13;
+  }
+  O << CC->getName() << "(unsigned ValNo, MVT ValVT,\n"
+    << std::string(Pad, ' ') << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n"
+    << std::string(Pad, ' ') << "ISD::ArgFlagsTy ArgFlags, CCState &State) {\n";
   // Emit all of the actions, in order.
   for (unsigned i = 0, e = CCActions->size(); i != e; ++i) {
     O << "\n";
@@ -97,7 +108,7 @@ void CallingConvEmitter::EmitAction(Record *Action,
       O << Action->getValueAsString("Predicate");
     } else {
       errs() << *Action;
-      PrintFatalError("Unknown CCPredicateAction!");
+      PrintFatalError(Action->getLoc(), "Unknown CCPredicateAction!");
     }
     
     O << ") {\n";
@@ -134,7 +145,8 @@ void CallingConvEmitter::EmitAction(Record *Action,
       ListInit *RegList = Action->getValueAsListInit("RegList");
       ListInit *ShadowRegList = Action->getValueAsListInit("ShadowRegList");
       if (!ShadowRegList->empty() && ShadowRegList->size() != RegList->size())
-        PrintFatalError("Invalid length of list of shadowed registers");
+        PrintFatalError(Action->getLoc(),
+                        "Invalid length of list of shadowed registers");
 
       if (RegList->size() == 1) {
         O << IndentStr << "if (unsigned Reg = State.AllocateReg(";
@@ -237,7 +249,8 @@ void CallingConvEmitter::EmitAction(Record *Action,
       MVT::SimpleValueType DestVT = getValueType(DestTy);
       O << IndentStr << "LocVT = " << getEnumName(DestVT) << ";\n";
       if (MVT(DestVT).isFloatingPoint()) {
-        PrintFatalError("CCPromoteToUpperBitsInType does not handle floating "
+        PrintFatalError(Action->getLoc(),
+                        "CCPromoteToUpperBitsInType does not handle floating "
                         "point");
       } else {
         O << IndentStr << "if (ArgFlags.isSExt())\n"
@@ -269,7 +282,7 @@ void CallingConvEmitter::EmitAction(Record *Action,
       O << IndentStr << IndentStr << "return false;\n";
     } else {
       errs() << *Action;
-      PrintFatalError("Unknown CCAction!");
+      PrintFatalError(Action->getLoc(), "Unknown CCAction!");
     }
   }
 }
diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp
index 23751a2cbfba..da65763905a8 100644
--- a/utils/TableGen/CodeEmitterGen.cpp
+++ b/utils/TableGen/CodeEmitterGen.cpp
@@ -1,9 +1,8 @@
 //===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,6 +15,7 @@
 #include "CodeGenInstruction.h"
 #include "CodeGenTarget.h"
 #include "SubtargetFeatureInfo.h"
+#include "Types.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Casting.h"
@@ -229,6 +229,14 @@ std::string CodeEmitterGen::getInstructionCase(Record *R,
   return Case;
 }
 
+static std::string
+getNameForFeatureBitset(const std::vector<Record *> &FeatureBitset) {
+  std::string Name = "CEFBS";
+  for (const auto &Feature : FeatureBitset)
+    Name += ("_" + Feature->getName()).str();
+  return Name;
+}
+
 void CodeEmitterGen::run(raw_ostream &o) {
   CodeGenTarget Target(Records);
   std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
@@ -327,8 +335,8 @@ void CodeEmitterGen::run(raw_ostream &o) {
     << "#include <sstream>\n\n";
 
   // Emit the subtarget feature enumeration.
-  SubtargetFeatureInfo::emitSubtargetFeatureFlagEnumeration(SubtargetFeatures,
-                                                            o);
+  SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration(SubtargetFeatures,
+                                                           o);
 
   // Emit the name table for error messages.
   o << "#ifndef NDEBUG\n";
@@ -340,35 +348,97 @@ void CodeEmitterGen::run(raw_ostream &o) {
       Target.getName(), "MCCodeEmitter", "computeAvailableFeatures",
       SubtargetFeatures, o);
 
+  std::vector<std::vector<Record *>> FeatureBitsets;
+  for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) {
+    FeatureBitsets.emplace_back();
+    for (Record *Predicate : Inst->TheDef->getValueAsListOfDefs("Predicates")) {
+      const auto &I = SubtargetFeatures.find(Predicate);
+      if (I != SubtargetFeatures.end())
+        FeatureBitsets.back().push_back(I->second.TheDef);
+    }
+  }
+
+  llvm::sort(FeatureBitsets, [&](const std::vector<Record *> &A,
+                                 const std::vector<Record *> &B) {
+    if (A.size() < B.size())
+      return true;
+    if (A.size() > B.size())
+      return false;
+    for (const auto &Pair : zip(A, B)) {
+      if (std::get<0>(Pair)->getName() < std::get<1>(Pair)->getName())
+        return true;
+      if (std::get<0>(Pair)->getName() > std::get<1>(Pair)->getName())
+        return false;
+    }
+    return false;
+  });
+  FeatureBitsets.erase(
+      std::unique(FeatureBitsets.begin(), FeatureBitsets.end()),
+      FeatureBitsets.end());
+  o << "#ifndef NDEBUG\n"
+    << "// Feature bitsets.\n"
+    << "enum : " << getMinimalTypeForRange(FeatureBitsets.size()) << " {\n"
+    << "  CEFBS_None,\n";
+  for (const auto &FeatureBitset : FeatureBitsets) {
+    if (FeatureBitset.empty())
+      continue;
+    o << "  " << getNameForFeatureBitset(FeatureBitset) << ",\n";
+  }
+  o << "};\n\n"
+     << "const static FeatureBitset FeatureBitsets[] {\n"
+     << "  {}, // CEFBS_None\n";
+  for (const auto &FeatureBitset : FeatureBitsets) {
+    if (FeatureBitset.empty())
+      continue;
+    o << "  {";
+    for (const auto &Feature : FeatureBitset) {
+      const auto &I = SubtargetFeatures.find(Feature);
+      assert(I != SubtargetFeatures.end() && "Didn't import predicate?");
+      o << I->second.getEnumBitName() << ", ";
+    }
+    o << "},\n";
+  }
+  o << "};\n"
+    << "#endif // NDEBUG\n\n";
+
+
   // Emit the predicate verifier.
   o << "void " << Target.getName()
     << "MCCodeEmitter::verifyInstructionPredicates(\n"
-    << "    const MCInst &Inst, uint64_t AvailableFeatures) const {\n"
+    << "    const MCInst &Inst, const FeatureBitset &AvailableFeatures) const {\n"
     << "#ifndef NDEBUG\n"
-    << "  static uint64_t RequiredFeatures[] = {\n";
+    << "  static " << getMinimalTypeForRange(FeatureBitsets.size())
+    << " RequiredFeaturesRefs[] = {\n";
   unsigned InstIdx = 0;
   for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) {
-    o << "    ";
+    o << "    CEFBS";
+    unsigned NumPredicates = 0;
     for (Record *Predicate : Inst->TheDef->getValueAsListOfDefs("Predicates")) {
       const auto &I = SubtargetFeatures.find(Predicate);
-      if (I != SubtargetFeatures.end())
-        o << I->second.getEnumName() << " | ";
+      if (I != SubtargetFeatures.end()) {
+        o << '_' << I->second.TheDef->getName();
+        NumPredicates++;
+      }
     }
-    o << "0, // " << Inst->TheDef->getName() << " = " << InstIdx << "\n";
+    if (!NumPredicates)
+      o << "_None";
+    o << ", // " << Inst->TheDef->getName() << " = " << InstIdx << "\n";
     InstIdx++;
   }
   o << "  };\n\n";
   o << "  assert(Inst.getOpcode() < " << InstIdx << ");\n";
-  o << "  uint64_t MissingFeatures =\n"
-    << "      (AvailableFeatures & RequiredFeatures[Inst.getOpcode()]) ^\n"
-    << "      RequiredFeatures[Inst.getOpcode()];\n"
-    << "  if (MissingFeatures) {\n"
+  o << "  const FeatureBitset &RequiredFeatures = "
+       "FeatureBitsets[RequiredFeaturesRefs[Inst.getOpcode()]];\n";
+  o << "  FeatureBitset MissingFeatures =\n"
+    << "      (AvailableFeatures & RequiredFeatures) ^\n"
+    << "      RequiredFeatures;\n"
+    << "  if (MissingFeatures.any()) {\n"
     << "    std::ostringstream Msg;\n"
     << "    Msg << \"Attempting to emit \" << "
        "MCII.getName(Inst.getOpcode()).str()\n"
     << "        << \" instruction but the \";\n"
-    << "    for (unsigned i = 0; i < 8 * sizeof(MissingFeatures); ++i)\n"
-    << "      if (MissingFeatures & (1ULL << i))\n"
+    << "    for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i)\n"
+    << "      if (MissingFeatures.test(i))\n"
     << "        Msg << SubtargetFeatureNames[i] << \" \";\n"
     << "    Msg << \"predicate(s) are not met\";\n"
     << "    report_fatal_error(Msg.str());\n"
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index 96c90c9cf6bd..c8f710d66a03 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -1,9 +1,8 @@
 //===- CodeGenDAGPatterns.cpp - Read DAG patterns from .td file -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -68,8 +67,10 @@ static bool berase_if(MachineValueTypeSet &S, Predicate P) {
 // inference will apply to each mode separately.
 
 TypeSetByHwMode::TypeSetByHwMode(ArrayRef<ValueTypeByHwMode> VTList) {
-  for (const ValueTypeByHwMode &VVT : VTList)
+  for (const ValueTypeByHwMode &VVT : VTList) {
     insert(VVT);
+    AddrSpaces.push_back(VVT.PtrAddrSpace);
+  }
 }
 
 bool TypeSetByHwMode::isValueTypeByHwMode(bool AllowEmpty) const {
@@ -86,9 +87,13 @@ ValueTypeByHwMode TypeSetByHwMode::getValueTypeByHwMode() const {
   assert(isValueTypeByHwMode(true) &&
          "The type set has multiple types for at least one HW mode");
   ValueTypeByHwMode VVT;
+  auto ASI = AddrSpaces.begin();
+
   for (const auto &I : *this) {
     MVT T = I.second.empty() ? MVT::Other : *I.second.begin();
     VVT.getOrCreateTypeForMode(I.first, T);
+    if (ASI != AddrSpaces.end())
+      VVT.PtrAddrSpace = *ASI++;
   }
   return VVT;
 }
@@ -502,22 +507,14 @@ bool TypeInfer::EnforceSmallerThan(TypeSetByHwMode &Small,
            (A.getScalarSizeInBits() == B.getScalarSizeInBits() &&
             A.getSizeInBits() < B.getSizeInBits());
   };
-  auto LE = [](MVT A, MVT B) -> bool {
+  auto LE = [&LT](MVT A, MVT B) -> bool {
     // This function is used when removing elements: when a vector is compared
     // to a non-vector, it should return false (to avoid removal).
     if (A.isVector() != B.isVector())
       return false;
 
-    // Note on the < comparison below:
-    // X86 has patterns like
-    //   (set VR128X:$dst, (v16i8 (X86vtrunc (v4i32 VR128X:$src1)))),
-    // where the truncated vector is given a type v16i8, while the source
-    // vector has type v4i32. They both have the same size in bits.
-    // The minimal type in the result is obviously v16i8, and when we remove
-    // all types from the source that are smaller-or-equal than v8i16, the
-    // only source type would also be removed (since it's equal in size).
-    return A.getScalarSizeInBits() <= B.getScalarSizeInBits() ||
-           A.getSizeInBits() < B.getSizeInBits();
+    return LT(A, B) || (A.getScalarSizeInBits() == B.getScalarSizeInBits() &&
+                        A.getSizeInBits() == B.getSizeInBits());
   };
 
   for (unsigned M : Modes) {
@@ -957,13 +954,33 @@ std::string TreePredicateFn::getPredCode() const {
   }
 
   if (isLoad() || isStore() || isAtomic()) {
-    StringRef SDNodeName =
-        isLoad() ? "LoadSDNode" : isStore() ? "StoreSDNode" : "AtomicSDNode";
+    if (ListInit *AddressSpaces = getAddressSpaces()) {
+      Code += "unsigned AddrSpace = cast<MemSDNode>(N)->getAddressSpace();\n"
+        " if (";
+
+      bool First = true;
+      for (Init *Val : AddressSpaces->getValues()) {
+        if (First)
+          First = false;
+        else
+          Code += " && ";
+
+        IntInit *IntVal = dyn_cast<IntInit>(Val);
+        if (!IntVal) {
+          PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                          "AddressSpaces element must be integer");
+        }
+
+        Code += "AddrSpace != " + utostr(IntVal->getValue());
+      }
+
+      Code += ")\nreturn false;\n";
+    }
 
     Record *MemoryVT = getMemoryVT();
 
     if (MemoryVT)
-      Code += ("if (cast<" + SDNodeName + ">(N)->getMemoryVT() != MVT::" +
+      Code += ("if (cast<MemSDNode>(N)->getMemoryVT() != MVT::" +
                MemoryVT->getName() + ") return false;\n")
                   .str();
   }
@@ -1152,6 +1169,14 @@ Record *TreePredicateFn::getMemoryVT() const {
     return nullptr;
   return R->getValueAsDef("MemoryVT");
 }
+
+ListInit *TreePredicateFn::getAddressSpaces() const {
+  Record *R = getOrigPatFragRecord()->getRecord();
+  if (R->isValueUnset("AddressSpaces"))
+    return nullptr;
+  return R->getValueAsListInit("AddressSpaces");
+}
+
 Record *TreePredicateFn::getScalarMemoryVT() const {
   Record *R = getOrigPatFragRecord()->getRecord();
   if (R->isValueUnset("ScalarMemoryVT"))
@@ -1276,6 +1301,17 @@ std::string TreePredicateFn::getCodeToRunOnSDNode() const {
 // PatternToMatch implementation
 //
 
+static bool isImmAllOnesAllZerosMatch(const TreePatternNode *P) {
+  if (!P->isLeaf())
+    return false;
+  DefInit *DI = dyn_cast<DefInit>(P->getLeafValue());
+  if (!DI)
+    return false;
+
+  Record *R = DI->getDef();
+  return R->getName() == "immAllOnesV" || R->getName() == "immAllZerosV";
+}
+
 /// getPatternSize - Return the 'size' of this pattern.  We want to match large
 /// patterns before small ones.  This is used to determine the size of a
 /// pattern.
@@ -1315,6 +1351,8 @@ static unsigned getPatternSize(const TreePatternNode *P,
         Size += 5;  // Matches a ConstantSDNode (+3) and a specific value (+2).
       else if (Child->getComplexPatternInfo(CGP))
         Size += getPatternSize(Child, CGP);
+      else if (isImmAllOnesAllZerosMatch(Child))
+        Size += 4; // Matches a build_vector(+3) and a predicate (+1).
       else if (!Child->getPredicateCalls().empty())
         ++Size;
     }
@@ -1408,7 +1446,8 @@ SDTypeConstraint::SDTypeConstraint(Record *R, const CodeGenHwModes &CGH) {
     x.SDTCisSameSizeAs_Info.OtherOperandNum =
       R->getValueAsInt("OtherOperandNum");
   } else {
-    PrintFatalError("Unrecognized SDTypeConstraint '" + R->getName() + "'!\n");
+    PrintFatalError(R->getLoc(),
+                    "Unrecognized SDTypeConstraint '" + R->getName() + "'!\n");
   }
 }
 
@@ -2120,7 +2159,8 @@ static TypeSetByHwMode getImplicitType(Record *R, unsigned ResNo,
   }
 
   if (R->getName() == "node" || R->getName() == "srcvalue" ||
-      R->getName() == "zero_reg") {
+      R->getName() == "zero_reg" || R->getName() == "immAllOnesV" ||
+      R->getName() == "immAllZerosV" || R->getName() == "undef_tied_input") {
     // Placeholder.
     return TypeSetByHwMode(); // Unknown.
   }
@@ -2425,18 +2465,32 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
       }
     }
 
+    // If one or more operands with a default value appear at the end of the
+    // formal operand list for an instruction, we allow them to be overridden
+    // by optional operands provided in the pattern.
+    //
+    // But if an operand B without a default appears at any point after an
+    // operand A with a default, then we don't allow A to be overridden,
+    // because there would be no way to specify whether the next operand in
+    // the pattern was intended to override A or skip it.
+    unsigned NonOverridableOperands = Inst.getNumOperands();
+    while (NonOverridableOperands > 0 &&
+           CDP.operandHasDefault(Inst.getOperand(NonOverridableOperands-1)))
+      --NonOverridableOperands;
+
     unsigned ChildNo = 0;
     for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) {
       Record *OperandNode = Inst.getOperand(i);
 
-      // If the instruction expects a predicate or optional def operand, we
-      // codegen this by setting the operand to it's default value if it has a
-      // non-empty DefaultOps field.
-      if (OperandNode->isSubClassOf("OperandWithDefaultOps") &&
-          !CDP.getDefaultOperand(OperandNode).DefaultOps.empty())
+      // If the operand has a default value, do we use it? We must use the
+      // default if we've run out of children of the pattern DAG to consume,
+      // or if the operand is followed by a non-defaulted one.
+      if (CDP.operandHasDefault(OperandNode) &&
+          (i < NonOverridableOperands || ChildNo >= getNumChildren()))
         continue;
 
-      // Verify that we didn't run out of provided operands.
+      // If we have run out of child nodes and there _isn't_ a default
+      // value we can use for the next operand, give an error.
       if (ChildNo >= getNumChildren()) {
         emitTooFewOperandsError(TP, getOperator()->getName(), getNumChildren());
         return false;
@@ -2753,7 +2807,7 @@ TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit,
     // chain.
     if (Int.IS.RetVTs.empty())
       Operator = getDAGPatterns().get_intrinsic_void_sdnode();
-    else if (Int.ModRef != CodeGenIntrinsic::NoMem)
+    else if (Int.ModRef != CodeGenIntrinsic::NoMem || Int.hasSideEffects)
       // Has side-effects, requires chain.
       Operator = getDAGPatterns().get_intrinsic_w_chain_sdnode();
     else // Otherwise, no chain.
diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h
index 4be9afdcacd2..2b49a64c3f1d 100644
--- a/utils/TableGen/CodeGenDAGPatterns.h
+++ b/utils/TableGen/CodeGenDAGPatterns.h
@@ -1,9 +1,8 @@
 //===- CodeGenDAGPatterns.h - Read DAG patterns from .td file ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -191,6 +190,7 @@ private:
 
 struct TypeSetByHwMode : public InfoByHwMode<MachineValueTypeSet> {
   using SetType = MachineValueTypeSet;
+  std::vector<unsigned> AddrSpaces;
 
   TypeSetByHwMode() = default;
   TypeSetByHwMode(const TypeSetByHwMode &VTS) = default;
@@ -227,6 +227,15 @@ struct TypeSetByHwMode : public InfoByHwMode<MachineValueTypeSet> {
     return Map.size() == 1 && Map.begin()->first == DefaultMode;
   }
 
+  bool isPointer() const {
+    return getValueTypeByHwMode().isPointer();
+  }
+
+  unsigned getPtrAddrSpace() const {
+    assert(isPointer());
+    return getValueTypeByHwMode().PtrAddrSpace;
+  }
+
   bool insert(const ValueTypeByHwMode &VVT);
   bool constrain(const TypeSetByHwMode &VTS);
   template <typename Predicate> bool constrain(Predicate P);
@@ -243,6 +252,7 @@ struct TypeSetByHwMode : public InfoByHwMode<MachineValueTypeSet> {
   bool validate() const;
 
 private:
+  unsigned PtrAddrSpace = std::numeric_limits<unsigned>::max();
   /// Intersect two sets. Return true if anything has changed.
   bool intersect(SetType &Out, const SetType &In);
 };
@@ -583,6 +593,8 @@ public:
   /// ValueType record for the memory VT.
   Record *getScalarMemoryVT() const;
 
+  ListInit *getAddressSpaces() const;
+
   // If true, indicates that GlobalISel-based C++ code was supplied.
   bool hasGISelPredicateCode() const;
   std::string getGISelPredicateCode() const;
@@ -1272,6 +1284,11 @@ public:
 
   unsigned allocateScope() { return ++NumScopes; }
 
+  bool operandHasDefault(Record *Op) const {
+    return Op->isSubClassOf("OperandWithDefaultOps") &&
+      !getDefaultOperand(Op).DefaultOps.empty();
+  }
+
 private:
   void ParseNodeInfo();
   void ParseNodeTransforms();
diff --git a/utils/TableGen/CodeGenHwModes.cpp b/utils/TableGen/CodeGenHwModes.cpp
index 9f88d95275b4..9052cdd2bd3e 100644
--- a/utils/TableGen/CodeGenHwModes.cpp
+++ b/utils/TableGen/CodeGenHwModes.cpp
@@ -1,9 +1,8 @@
 //===--- CodeGenHwModes.cpp -----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Classes to parse and store HW mode information for instruction selection
diff --git a/utils/TableGen/CodeGenHwModes.h b/utils/TableGen/CodeGenHwModes.h
index 36df835d1933..1ff2faaa0e52 100644
--- a/utils/TableGen/CodeGenHwModes.h
+++ b/utils/TableGen/CodeGenHwModes.h
@@ -1,9 +1,8 @@
 //===--- CodeGenHwModes.h ---------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Classes to parse and store HW mode information for instruction selection.
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index 6d06ba2c8b67..2463824469ab 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -1,9 +1,8 @@
 //===- CodeGenInstruction.cpp - CodeGen Instruction Class Wrapper ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -34,18 +33,24 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
 
   if (DefInit *Init = dyn_cast<DefInit>(OutDI->getOperator())) {
     if (Init->getDef()->getName() != "outs")
-      PrintFatalError(R->getName() + ": invalid def name for output list: use 'outs'");
+      PrintFatalError(R->getLoc(),
+                      R->getName() +
+                          ": invalid def name for output list: use 'outs'");
   } else
-    PrintFatalError(R->getName() + ": invalid output list: use 'outs'");
+    PrintFatalError(R->getLoc(),
+                    R->getName() + ": invalid output list: use 'outs'");
 
   NumDefs = OutDI->getNumArgs();
 
   DagInit *InDI = R->getValueAsDag("InOperandList");
   if (DefInit *Init = dyn_cast<DefInit>(InDI->getOperator())) {
     if (Init->getDef()->getName() != "ins")
-      PrintFatalError(R->getName() + ": invalid def name for input list: use 'ins'");
+      PrintFatalError(R->getLoc(),
+                      R->getName() +
+                          ": invalid def name for input list: use 'ins'");
   } else
-    PrintFatalError(R->getName() + ": invalid input list: use 'ins'");
+    PrintFatalError(R->getLoc(),
+                    R->getName() + ": invalid input list: use 'ins'");
 
   unsigned MIOperandNo = 0;
   std::set<std::string> OperandNames;
@@ -64,7 +69,8 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
 
     DefInit *Arg = dyn_cast<DefInit>(ArgInit);
     if (!Arg)
-      PrintFatalError("Illegal operand for the '" + R->getName() + "' instruction!");
+      PrintFatalError(R->getLoc(), "Illegal operand for the '" + R->getName() +
+                                       "' instruction!");
 
     Record *Rec = Arg->getDef();
     std::string PrintMethod = "printOperand";
@@ -89,8 +95,9 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
       // Verify that MIOpInfo has an 'ops' root value.
       if (!isa<DefInit>(MIOpInfo->getOperator()) ||
           cast<DefInit>(MIOpInfo->getOperator())->getDef()->getName() != "ops")
-        PrintFatalError("Bad value for MIOperandInfo in operand '" + Rec->getName() +
-          "'\n");
+        PrintFatalError(R->getLoc(),
+                        "Bad value for MIOperandInfo in operand '" +
+                            Rec->getName() + "'\n");
 
       // If we have MIOpInfo, then we have #operands equal to number of entries
       // in MIOperandInfo.
@@ -108,16 +115,20 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
       OperandType = "OPERAND_REGISTER";
     } else if (!Rec->isSubClassOf("PointerLikeRegClass") &&
                !Rec->isSubClassOf("unknown_class"))
-      PrintFatalError("Unknown operand class '" + Rec->getName() +
-        "' in '" + R->getName() + "' instruction!");
+      PrintFatalError(R->getLoc(), "Unknown operand class '" + Rec->getName() +
+                                       "' in '" + R->getName() +
+                                       "' instruction!");
 
     // Check that the operand has a name and that it's unique.
     if (ArgName.empty())
-      PrintFatalError("In instruction '" + R->getName() + "', operand #" +
-                      Twine(i) + " has no name!");
+      PrintFatalError(R->getLoc(), "In instruction '" + R->getName() +
+                                       "', operand #" + Twine(i) +
+                                       " has no name!");
     if (!OperandNames.insert(ArgName).second)
-      PrintFatalError("In instruction '" + R->getName() + "', operand #" +
-                      Twine(i) + " has the same name as a previous operand!");
+      PrintFatalError(R->getLoc(),
+                      "In instruction '" + R->getName() + "', operand #" +
+                          Twine(i) +
+                          " has the same name as a previous operand!");
 
     OperandList.emplace_back(Rec, ArgName, PrintMethod, EncoderMethod,
                              OperandNamespace + "::" + OperandType, MIOperandNo,
@@ -139,9 +150,11 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
 ///
 unsigned CGIOperandList::getOperandNamed(StringRef Name) const {
   unsigned OpIdx;
-  if (hasOperandNamed(Name, OpIdx)) return OpIdx;
-  PrintFatalError("'" + TheDef->getName() +
-                  "' does not have an operand named '$" + Name + "'!");
+  if (hasOperandNamed(Name, OpIdx))
+    return OpIdx;
+  PrintFatalError(TheDef->getLoc(), "'" + TheDef->getName() +
+                                        "' does not have an operand named '$" +
+                                        Name + "'!");
 }
 
 /// hasOperandNamed - Query whether the instruction has an operand of the
@@ -160,7 +173,8 @@ bool CGIOperandList::hasOperandNamed(StringRef Name, unsigned &OpIdx) const {
 std::pair<unsigned,unsigned>
 CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) {
   if (Op.empty() || Op[0] != '$')
-    PrintFatalError(TheDef->getName() + ": Illegal operand name: '" + Op + "'");
+    PrintFatalError(TheDef->getLoc(),
+                    TheDef->getName() + ": Illegal operand name: '" + Op + "'");
 
   std::string OpName = Op.substr(1);
   std::string SubOpName;
@@ -170,7 +184,9 @@ CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) {
   if (DotIdx != std::string::npos) {
     SubOpName = OpName.substr(DotIdx+1);
     if (SubOpName.empty())
-      PrintFatalError(TheDef->getName() + ": illegal empty suboperand name in '" +Op +"'");
+      PrintFatalError(TheDef->getLoc(),
+                      TheDef->getName() +
+                          ": illegal empty suboperand name in '" + Op + "'");
     OpName = OpName.substr(0, DotIdx);
   }
 
@@ -180,8 +196,11 @@ CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) {
     // If one was needed, throw.
     if (OperandList[OpIdx].MINumOperands > 1 && !AllowWholeOp &&
         SubOpName.empty())
-      PrintFatalError(TheDef->getName() + ": Illegal to refer to"
-        " whole operand part of complex operand '" + Op + "'");
+      PrintFatalError(TheDef->getLoc(),
+                      TheDef->getName() +
+                          ": Illegal to refer to"
+                          " whole operand part of complex operand '" +
+                          Op + "'");
 
     // Otherwise, return the operand.
     return std::make_pair(OpIdx, 0U);
@@ -190,7 +209,9 @@ CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) {
   // Find the suboperand number involved.
   DagInit *MIOpInfo = OperandList[OpIdx].MIOperandInfo;
   if (!MIOpInfo)
-    PrintFatalError(TheDef->getName() + ": unknown suboperand name in '" + Op + "'");
+    PrintFatalError(TheDef->getLoc(), TheDef->getName() +
+                                          ": unknown suboperand name in '" +
+                                          Op + "'");
 
   // Find the operand with the right name.
   for (unsigned i = 0, e = MIOpInfo->getNumArgs(); i != e; ++i)
@@ -198,7 +219,9 @@ CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) {
       return std::make_pair(OpIdx, i);
 
   // Otherwise, didn't find it!
-  PrintFatalError(TheDef->getName() + ": unknown suboperand name in '" + Op + "'");
+  PrintFatalError(TheDef->getLoc(), TheDef->getName() +
+                                        ": unknown suboperand name in '" + Op +
+                                        "'");
   return std::make_pair(0U, 0U);
 }
 
@@ -354,7 +377,8 @@ CodeGenInstruction::CodeGenInstruction(Record *R)
   isAdd        = R->getValueAsBit("isAdd");
   isTrap       = R->getValueAsBit("isTrap");
   canFoldAsLoad = R->getValueAsBit("canFoldAsLoad");
-  isPredicable = Operands.isPredicable || R->getValueAsBit("isPredicable");
+  isPredicable = !R->getValueAsBit("isUnpredicable") && (
+      Operands.isPredicable || R->getValueAsBit("isPredicable"));
   isConvertibleToThreeAddress = R->getValueAsBit("isConvertibleToThreeAddress");
   isCommutable = R->getValueAsBit("isCommutable");
   isTerminator = R->getValueAsBit("isTerminator");
@@ -377,6 +401,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R)
   mayLoad_Unset = Unset;
   mayStore     = R->getValueAsBitOrUnset("mayStore", Unset);
   mayStore_Unset = Unset;
+  mayRaiseFPException = R->getValueAsBit("mayRaiseFPException");
   hasSideEffects = R->getValueAsBitOrUnset("hasSideEffects", Unset);
   hasSideEffects_Unset = Unset;
 
diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h
index 2e3d2f48a928..bb5b1369649f 100644
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@@ -1,9 +1,8 @@
 //===- CodeGenInstruction.h - Instruction Class Wrapper ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -250,6 +249,7 @@ template <typename T> class ArrayRef;
     bool mayLoad_Unset : 1;
     bool mayStore : 1;
     bool mayStore_Unset : 1;
+    bool mayRaiseFPException : 1;
     bool isPredicable : 1;
     bool isConvertibleToThreeAddress : 1;
     bool isCommutable : 1;
diff --git a/utils/TableGen/CodeGenIntrinsics.h b/utils/TableGen/CodeGenIntrinsics.h
index 9487a79c1432..7b74bb07d6e0 100644
--- a/utils/TableGen/CodeGenIntrinsics.h
+++ b/utils/TableGen/CodeGenIntrinsics.h
@@ -1,9 +1,8 @@
 //===- CodeGenIntrinsic.h - Intrinsic Class Wrapper ------------*- C++ -*--===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -124,6 +123,9 @@ struct CodeGenIntrinsic {
   /// True if the intrinsic is no-return.
   bool isNoReturn;
 
+  /// True if the intrinsic is will-return.
+  bool isWillReturn;
+
   /// True if the intrinsic is cold.
   bool isCold;
 
@@ -137,7 +139,15 @@ struct CodeGenIntrinsic {
   // True if the intrinsic is marked as speculatable.
   bool isSpeculatable;
 
-  enum ArgAttribute { NoCapture, Returned, ReadOnly, WriteOnly, ReadNone };
+  enum ArgAttribute {
+    NoCapture,
+    Returned,
+    ReadOnly,
+    WriteOnly,
+    ReadNone,
+    ImmArg
+  };
+
   std::vector<std::pair<unsigned, ArgAttribute>> ArgumentAttributes;
 
   bool hasProperty(enum SDNP Prop) const {
diff --git a/utils/TableGen/CodeGenMapTable.cpp b/utils/TableGen/CodeGenMapTable.cpp
index e5b0426cdcc3..b1774b01ba8c 100644
--- a/utils/TableGen/CodeGenMapTable.cpp
+++ b/utils/TableGen/CodeGenMapTable.cpp
@@ -1,9 +1,8 @@
 //===- CodeGenMapTable.cpp - Instruction Mapping Table Generator ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // CodeGenMapTable provides functionality for the TabelGen to create
diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp
index 74a2b078dfb3..f87c6d6c945a 100644
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@@ -1,9 +1,8 @@
 //===- CodeGenRegisters.cpp - Register and RegisterClass Info -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -740,8 +739,9 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
   for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
     Record *Type = TypeList[i];
     if (!Type->isSubClassOf("ValueType"))
-      PrintFatalError("RegTypes list member '" + Type->getName() +
-        "' does not derive from the ValueType class!");
+      PrintFatalError(R->getLoc(),
+                      "RegTypes list member '" + Type->getName() +
+                          "' does not derive from the ValueType class!");
     VTs.push_back(getValueTypeByHwMode(Type, RegBank.getHwModes()));
   }
   assert(!VTs.empty() && "RegisterClass must contain at least one ValueType!");
@@ -2101,8 +2101,7 @@ void CodeGenRegBank::computeDerivedInfo() {
   for (unsigned Idx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx)
     RegUnitSetOrder.push_back(Idx);
 
-  std::stable_sort(RegUnitSetOrder.begin(), RegUnitSetOrder.end(),
-                   [this](unsigned ID1, unsigned ID2) {
+  llvm::stable_sort(RegUnitSetOrder, [this](unsigned ID1, unsigned ID2) {
     return getRegPressureSet(ID1).Units.size() <
            getRegPressureSet(ID2).Units.size();
   });
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index 0f7a025ded10..f04a90f8fde5 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -1,9 +1,8 @@
 //===- CodeGenRegisters.h - Register and RegisterClass Info -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp
index 6d259cbb33ee..fd007044a16e 100644
--- a/utils/TableGen/CodeGenSchedule.cpp
+++ b/utils/TableGen/CodeGenSchedule.cpp
@@ -1,9 +1,8 @@
 //===- CodeGenSchedule.cpp - Scheduling MachineModels ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -369,24 +368,22 @@ processSTIPredicate(STIPredicateFunction &Fn,
              [&](const OpcodeMapPair &Lhs, const OpcodeMapPair &Rhs) {
                unsigned LhsIdx = Opcode2Index[Lhs.first];
                unsigned RhsIdx = Opcode2Index[Rhs.first];
-               std::pair<APInt, APInt> &LhsMasks = OpcodeMasks[LhsIdx];
-               std::pair<APInt, APInt> &RhsMasks = OpcodeMasks[RhsIdx];
-
-               if (LhsMasks.first != RhsMasks.first) {
-                 if (LhsMasks.first.countPopulation() <
-                     RhsMasks.first.countPopulation())
-                   return true;
-                 return LhsMasks.first.countLeadingZeros() >
-                        RhsMasks.first.countLeadingZeros();
-               }
-
-               if (LhsMasks.second != RhsMasks.second) {
-                 if (LhsMasks.second.countPopulation() <
-                     RhsMasks.second.countPopulation())
-                   return true;
-                 return LhsMasks.second.countLeadingZeros() >
-                        RhsMasks.second.countLeadingZeros();
-               }
+               const std::pair<APInt, APInt> &LhsMasks = OpcodeMasks[LhsIdx];
+               const std::pair<APInt, APInt> &RhsMasks = OpcodeMasks[RhsIdx];
+
+               auto LessThan = [](const APInt &Lhs, const APInt &Rhs) {
+                 unsigned LhsCountPopulation = Lhs.countPopulation();
+                 unsigned RhsCountPopulation = Rhs.countPopulation();
+                 return ((LhsCountPopulation < RhsCountPopulation) ||
+                         ((LhsCountPopulation == RhsCountPopulation) &&
+                          (Lhs.countLeadingZeros() > Rhs.countLeadingZeros())));
+               };
+
+               if (LhsMasks.first != RhsMasks.first)
+                 return LessThan(LhsMasks.first, RhsMasks.first);
+
+               if (LhsMasks.second != RhsMasks.second)
+                 return LessThan(LhsMasks.second, RhsMasks.second);
 
                return LhsIdx < RhsIdx;
              });
@@ -1936,8 +1933,10 @@ void CodeGenSchedModels::checkCompleteness() {
       unsigned SCIdx = getSchedClassIdx(*Inst);
       if (!SCIdx) {
         if (Inst->TheDef->isValueUnset("SchedRW") && !HadCompleteModel) {
-          PrintError("No schedule information for instruction '"
-                     + Inst->TheDef->getName() + "'");
+          PrintError(Inst->TheDef->getLoc(),
+                     "No schedule information for instruction '" +
+                         Inst->TheDef->getName() + "' in SchedMachineModel '" +
+                     ProcModel.ModelDef->getName() + "'");
           Complete = false;
         }
         continue;
@@ -1955,8 +1954,9 @@ void CodeGenSchedModels::checkCompleteness() {
         return R->getValueAsDef("SchedModel") == ProcModel.ModelDef;
       });
       if (I == InstRWs.end()) {
-        PrintError("'" + ProcModel.ModelName + "' lacks information for '" +
-                   Inst->TheDef->getName() + "'");
+        PrintError(Inst->TheDef->getLoc(), "'" + ProcModel.ModelName +
+                                               "' lacks information for '" +
+                                               Inst->TheDef->getName() + "'");
         Complete = false;
       }
     }
diff --git a/utils/TableGen/CodeGenSchedule.h b/utils/TableGen/CodeGenSchedule.h
index 87a051b0c05e..c26fb1f97807 100644
--- a/utils/TableGen/CodeGenSchedule.h
+++ b/utils/TableGen/CodeGenSchedule.h
@@ -1,9 +1,8 @@
 //===- CodeGenSchedule.h - Scheduling Machine Models ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index bcb653135551..b65e1b6af791 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -1,9 +1,8 @@
 //===- CodeGenTarget.cpp - CodeGen Target Class Wrapper -------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -21,8 +20,10 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Timer.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -105,11 +106,18 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::v128i16:  return "MVT::v128i16";
   case MVT::v1i32:    return "MVT::v1i32";
   case MVT::v2i32:    return "MVT::v2i32";
+  case MVT::v3i32:    return "MVT::v3i32";
   case MVT::v4i32:    return "MVT::v4i32";
+  case MVT::v5i32:    return "MVT::v5i32";
   case MVT::v8i32:    return "MVT::v8i32";
   case MVT::v16i32:   return "MVT::v16i32";
   case MVT::v32i32:   return "MVT::v32i32";
   case MVT::v64i32:   return "MVT::v64i32";
+  case MVT::v128i32:  return "MVT::v128i32";
+  case MVT::v256i32:  return "MVT::v256i32";
+  case MVT::v512i32:  return "MVT::v512i32";
+  case MVT::v1024i32: return "MVT::v1024i32";
+  case MVT::v2048i32: return "MVT::v2048i32";
   case MVT::v1i64:    return "MVT::v1i64";
   case MVT::v2i64:    return "MVT::v2i64";
   case MVT::v4i64:    return "MVT::v4i64";
@@ -122,9 +130,18 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::v8f16:    return "MVT::v8f16";
   case MVT::v1f32:    return "MVT::v1f32";
   case MVT::v2f32:    return "MVT::v2f32";
+  case MVT::v3f32:    return "MVT::v3f32";
   case MVT::v4f32:    return "MVT::v4f32";
+  case MVT::v5f32:    return "MVT::v5f32";
   case MVT::v8f32:    return "MVT::v8f32";
   case MVT::v16f32:   return "MVT::v16f32";
+  case MVT::v32f32:   return "MVT::v32f32";
+  case MVT::v64f32:   return "MVT::v64f32";
+  case MVT::v128f32:  return "MVT::v128f32";
+  case MVT::v256f32:  return "MVT::v256f32";
+  case MVT::v512f32:  return "MVT::v512f32";
+  case MVT::v1024f32: return "MVT::v1024f32";
+  case MVT::v2048f32: return "MVT::v2048f32";
   case MVT::v1f64:    return "MVT::v1f64";
   case MVT::v2f64:    return "MVT::v2f64";
   case MVT::v4f64:    return "MVT::v4f64";
@@ -174,7 +191,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::iPTR:     return "MVT::iPTR";
   case MVT::iPTRAny:  return "MVT::iPTRAny";
   case MVT::Untyped:  return "MVT::Untyped";
-  case MVT::ExceptRef: return "MVT::ExceptRef";
+  case MVT::exnref:   return "MVT::exnref";
   default: llvm_unreachable("ILLEGAL VALUE TYPE!");
   }
 }
@@ -327,6 +344,8 @@ CodeGenSchedModels &CodeGenTarget::getSchedModels() const {
 }
 
 void CodeGenTarget::ReadInstructions() const {
+  NamedRegionTimer T("Read Instructions", "Time spent reading instructions",
+                     "CodeGenTarget", "CodeGenTarget", TimeRegions);
   std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
   if (Insts.size() <= 2)
     PrintFatalError("No 'Instruction' subclasses defined!");
@@ -492,9 +511,10 @@ ComplexPattern::ComplexPattern(Record *R) {
     } else if (PropList[i]->getName() == "SDNPWantParent") {
       Properties |= 1 << SDNPWantParent;
     } else {
-      PrintFatalError("Unsupported SD Node property '" +
-                      PropList[i]->getName() + "' on ComplexPattern '" +
-                      R->getName() + "'!");
+      PrintFatalError(R->getLoc(), "Unsupported SD Node property '" +
+                                       PropList[i]->getName() +
+                                       "' on ComplexPattern '" + R->getName() +
+                                       "'!");
     }
 }
 
@@ -530,12 +550,14 @@ CodeGenIntrinsicTable::CodeGenIntrinsicTable(const RecordKeeper &RC,
 CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
   TheDef = R;
   std::string DefName = R->getName();
+  ArrayRef<SMLoc> DefLoc = R->getLoc();
   ModRef = ReadWriteMem;
   Properties = 0;
   isOverloaded = false;
   isCommutative = false;
   canThrow = false;
   isNoReturn = false;
+  isWillReturn = false;
   isCold = false;
   isNoDuplicate = false;
   isConvergent = false;
@@ -544,7 +566,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
 
   if (DefName.size() <= 4 ||
       std::string(DefName.begin(), DefName.begin() + 4) != "int_")
-    PrintFatalError("Intrinsic '" + DefName + "' does not start with 'int_'!");
+    PrintFatalError(DefLoc,
+                    "Intrinsic '" + DefName + "' does not start with 'int_'!");
 
   EnumName = std::string(DefName.begin()+4, DefName.end());
 
@@ -566,7 +589,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
     // Verify it starts with "llvm.".
     if (Name.size() <= 5 ||
         std::string(Name.begin(), Name.begin() + 5) != "llvm.")
-      PrintFatalError("Intrinsic '" + DefName + "'s name does not start with 'llvm.'!");
+      PrintFatalError(DefLoc, "Intrinsic '" + DefName +
+                                  "'s name does not start with 'llvm.'!");
   }
 
   // If TargetPrefix is specified, make sure that Name starts with
@@ -575,13 +599,34 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
     if (Name.size() < 6+TargetPrefix.size() ||
         std::string(Name.begin() + 5, Name.begin() + 6 + TargetPrefix.size())
         != (TargetPrefix + "."))
-      PrintFatalError("Intrinsic '" + DefName + "' does not start with 'llvm." +
-        TargetPrefix + ".'!");
+      PrintFatalError(DefLoc, "Intrinsic '" + DefName +
+                                  "' does not start with 'llvm." +
+                                  TargetPrefix + ".'!");
   }
 
-  // Parse the list of return types.
+  ListInit *RetTypes = R->getValueAsListInit("RetTypes");
+  ListInit *ParamTypes = R->getValueAsListInit("ParamTypes");
+
+  // First collate a list of overloaded types.
   std::vector<MVT::SimpleValueType> OverloadedVTs;
-  ListInit *TypeList = R->getValueAsListInit("RetTypes");
+  for (ListInit *TypeList : {RetTypes, ParamTypes}) {
+    for (unsigned i = 0, e = TypeList->size(); i != e; ++i) {
+      Record *TyEl = TypeList->getElementAsRecord(i);
+      assert(TyEl->isSubClassOf("LLVMType") && "Expected a type!");
+
+      if (TyEl->isSubClassOf("LLVMMatchType"))
+        continue;
+
+      MVT::SimpleValueType VT = getValueType(TyEl->getValueAsDef("VT"));
+      if (MVT(VT).isOverloaded()) {
+        OverloadedVTs.push_back(VT);
+        isOverloaded = true;
+      }
+    }
+  }
+
+  // Parse the list of return types.
+  ListInit *TypeList = RetTypes;
   for (unsigned i = 0, e = TypeList->size(); i != e; ++i) {
     Record *TyEl = TypeList->getElementAsRecord(i);
     assert(TyEl->isSubClassOf("LLVMType") && "Expected a type!");
@@ -601,21 +646,18 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
     } else {
       VT = getValueType(TyEl->getValueAsDef("VT"));
     }
-    if (MVT(VT).isOverloaded()) {
-      OverloadedVTs.push_back(VT);
-      isOverloaded = true;
-    }
 
     // Reject invalid types.
     if (VT == MVT::isVoid)
-      PrintFatalError("Intrinsic '" + DefName + " has void in result type list!");
+      PrintFatalError(DefLoc, "Intrinsic '" + DefName +
+                                  " has void in result type list!");
 
     IS.RetVTs.push_back(VT);
     IS.RetTypeDefs.push_back(TyEl);
   }
 
   // Parse the list of parameter types.
-  TypeList = R->getValueAsListInit("ParamTypes");
+  TypeList = ParamTypes;
   for (unsigned i = 0, e = TypeList->size(); i != e; ++i) {
     Record *TyEl = TypeList->getElementAsRecord(i);
     assert(TyEl->isSubClassOf("LLVMType") && "Expected a type!");
@@ -626,7 +668,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
         PrintError(R->getLoc(),
                    "Parameter #" + Twine(i) + " has out of bounds matching "
                    "number " + Twine(MatchTy));
-        PrintFatalError(Twine("ParamTypes is ") + TypeList->getAsString());
+        PrintFatalError(DefLoc,
+                        Twine("ParamTypes is ") + TypeList->getAsString());
       }
       VT = OverloadedVTs[MatchTy];
       // It only makes sense to use the extended and truncated vector element
@@ -634,20 +677,16 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
       // overloaded, all the types can be specified directly.
       assert(((!TyEl->isSubClassOf("LLVMExtendedType") &&
                !TyEl->isSubClassOf("LLVMTruncatedType") &&
-               !TyEl->isSubClassOf("LLVMVectorSameWidth")) ||
+               !TyEl->isSubClassOf("LLVMScalarOrSameVectorWidth")) ||
               VT == MVT::iAny || VT == MVT::vAny) &&
              "Expected iAny or vAny type");
     } else
       VT = getValueType(TyEl->getValueAsDef("VT"));
 
-    if (MVT(VT).isOverloaded()) {
-      OverloadedVTs.push_back(VT);
-      isOverloaded = true;
-    }
-
     // Reject invalid types.
     if (VT == MVT::isVoid && i != e-1 /*void at end means varargs*/)
-      PrintFatalError("Intrinsic '" + DefName + " has void in result type list!");
+      PrintFatalError(DefLoc, "Intrinsic '" + DefName +
+                                  " has void in result type list!");
 
     IS.ParamVTs.push_back(VT);
     IS.ParamTypeDefs.push_back(TyEl);
@@ -683,6 +722,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
       isConvergent = true;
     else if (Property->getName() == "IntrNoReturn")
       isNoReturn = true;
+    else if (Property->getName() == "IntrWillReturn")
+      isWillReturn = true;
     else if (Property->getName() == "IntrCold")
       isCold = true;
     else if (Property->getName() == "IntrSpeculatable")
@@ -704,6 +745,9 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
     } else if (Property->isSubClassOf("ReadNone")) {
       unsigned ArgNo = Property->getValueAsInt("ArgNo");
       ArgumentAttributes.push_back(std::make_pair(ArgNo, ReadNone));
+    } else if (Property->isSubClassOf("ImmArg")) {
+      unsigned ArgNo = Property->getValueAsInt("ArgNo");
+      ArgumentAttributes.push_back(std::make_pair(ArgNo, ImmArg));
     } else
       llvm_unreachable("Unknown property!");
   }
diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h
index d2833d5b6a92..1ab2de269c76 100644
--- a/utils/TableGen/CodeGenTarget.h
+++ b/utils/TableGen/CodeGenTarget.h
@@ -1,9 +1,8 @@
 //===- CodeGenTarget.h - Target Class Wrapper -------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/utils/TableGen/DAGISelEmitter.cpp b/utils/TableGen/DAGISelEmitter.cpp
index 62a0ff700725..fb0c6faa5295 100644
--- a/utils/TableGen/DAGISelEmitter.cpp
+++ b/utils/TableGen/DAGISelEmitter.cpp
@@ -1,9 +1,8 @@
 //===- DAGISelEmitter.cpp - Generate an instruction selector --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/utils/TableGen/DAGISelMatcher.cpp b/utils/TableGen/DAGISelMatcher.cpp
index c8e005739460..bebd205ad58f 100644
--- a/utils/TableGen/DAGISelMatcher.cpp
+++ b/utils/TableGen/DAGISelMatcher.cpp
@@ -1,9 +1,8 @@
 //===- DAGISelMatcher.cpp - Representation of DAG pattern matcher ---------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -212,6 +211,11 @@ void CheckCondCodeMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
   OS.indent(indent) << "CheckCondCode ISD::" << CondCodeName << '\n';
 }
 
+void CheckChild2CondCodeMatcher::printImpl(raw_ostream &OS,
+                                           unsigned indent) const {
+  OS.indent(indent) << "CheckChild2CondCode ISD::" << CondCodeName << '\n';
+}
+
 void CheckValueTypeMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
   OS.indent(indent) << "CheckValueType MVT::" << TypeName << '\n';
 }
@@ -233,6 +237,16 @@ void CheckFoldableChainNodeMatcher::printImpl(raw_ostream &OS,
   OS.indent(indent) << "CheckFoldableChainNode\n";
 }
 
+void CheckImmAllOnesVMatcher::printImpl(raw_ostream &OS,
+                                        unsigned indent) const {
+  OS.indent(indent) << "CheckAllOnesV\n";
+}
+
+void CheckImmAllZerosVMatcher::printImpl(raw_ostream &OS,
+                                         unsigned indent) const {
+  OS.indent(indent) << "CheckAllZerosV\n";
+}
+
 void EmitIntegerMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
   OS.indent(indent) << "EmitInteger " << Val << " VT=" << getEnumName(VT)
                     << '\n';
@@ -398,3 +412,12 @@ bool CheckValueTypeMatcher::isContradictoryImpl(const Matcher *M) const {
   return false;
 }
 
+bool CheckImmAllOnesVMatcher::isContradictoryImpl(const Matcher *M) const {
+  // AllZeros is contradictory.
+  return isa<CheckImmAllZerosVMatcher>(M);
+}
+
+bool CheckImmAllZerosVMatcher::isContradictoryImpl(const Matcher *M) const {
+  // AllOnes is contradictory.
+  return isa<CheckImmAllOnesVMatcher>(M);
+}
diff --git a/utils/TableGen/DAGISelMatcher.h b/utils/TableGen/DAGISelMatcher.h
index 9be7295c67d4..0a782e84a372 100644
--- a/utils/TableGen/DAGISelMatcher.h
+++ b/utils/TableGen/DAGISelMatcher.h
@@ -1,9 +1,8 @@
 //===- DAGISelMatcher.h - Representation of DAG pattern matcher -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -67,10 +66,13 @@ public:
     CheckInteger,         // Fail if wrong val.
     CheckChildInteger,    // Fail if child is wrong val.
     CheckCondCode,        // Fail if not condcode.
+    CheckChild2CondCode,  // Fail if child is wrong condcode.
     CheckValueType,
     CheckComplexPat,
     CheckAndImm,
     CheckOrImm,
+    CheckImmAllOnesV,
+    CheckImmAllZerosV,
     CheckFoldableChainNode,
 
     // Node creation/emisssion.
@@ -122,9 +124,12 @@ public:
     case CheckInteger:
     case CheckChildInteger:
     case CheckCondCode:
+    case CheckChild2CondCode:
     case CheckValueType:
     case CheckAndImm:
     case CheckOrImm:
+    case CheckImmAllOnesV:
+    case CheckImmAllZerosV:
     case CheckFoldableChainNode:
       return true;
     }
@@ -626,6 +631,27 @@ private:
   }
 };
 
+/// CheckChild2CondCodeMatcher - This checks to see if child 2 node is a
+/// CondCodeSDNode with the specified condition, if not it fails to match.
+class CheckChild2CondCodeMatcher : public Matcher {
+  StringRef CondCodeName;
+public:
+  CheckChild2CondCodeMatcher(StringRef condcodename)
+    : Matcher(CheckChild2CondCode), CondCodeName(condcodename) {}
+
+  StringRef getCondCodeName() const { return CondCodeName; }
+
+  static bool classof(const Matcher *N) {
+    return N->getKind() == CheckChild2CondCode;
+  }
+
+private:
+  void printImpl(raw_ostream &OS, unsigned indent) const override;
+  bool isEqualImpl(const Matcher *M) const override {
+    return cast<CheckChild2CondCodeMatcher>(M)->CondCodeName == CondCodeName;
+  }
+};
+
 /// CheckValueTypeMatcher - This checks to see if the current node is a
 /// VTSDNode with the specified type, if not it fails to match.
 class CheckValueTypeMatcher : public Matcher {
@@ -731,6 +757,38 @@ private:
   }
 };
 
+/// CheckImmAllOnesVMatcher - This check if the current node is an build vector
+/// of all ones.
+class CheckImmAllOnesVMatcher : public Matcher {
+public:
+  CheckImmAllOnesVMatcher() : Matcher(CheckImmAllOnesV) {}
+
+  static bool classof(const Matcher *N) {
+    return N->getKind() == CheckImmAllOnesV;
+  }
+
+private:
+  void printImpl(raw_ostream &OS, unsigned indent) const override;
+  bool isEqualImpl(const Matcher *M) const override { return true; }
+  bool isContradictoryImpl(const Matcher *M) const override;
+};
+
+/// CheckImmAllZerosVMatcher - This check if the current node is an build vector
+/// of all zeros.
+class CheckImmAllZerosVMatcher : public Matcher {
+public:
+  CheckImmAllZerosVMatcher() : Matcher(CheckImmAllZerosV) {}
+
+  static bool classof(const Matcher *N) {
+    return N->getKind() == CheckImmAllZerosV;
+  }
+
+private:
+  void printImpl(raw_ostream &OS, unsigned indent) const override;
+  bool isEqualImpl(const Matcher *M) const override { return true; }
+  bool isContradictoryImpl(const Matcher *M) const override;
+};
+
 /// CheckFoldableChainNodeMatcher - This checks to see if the current node
 /// (which defines a chain operand) is safe to fold into a larger pattern.
 class CheckFoldableChainNodeMatcher : public Matcher {
diff --git a/utils/TableGen/DAGISelMatcherEmitter.cpp b/utils/TableGen/DAGISelMatcherEmitter.cpp
index 90ca1bff5344..cecbc6cccdff 100644
--- a/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -1,9 +1,8 @@
 //===- DAGISelMatcherEmitter.cpp - Matcher Emitter ------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -259,7 +258,7 @@ void MatcherTableEmitter::EmitPatternMatchTable(raw_ostream &OS) {
 
   OS << "\n};";
   OS << "\nreturn StringRef(PATTERN_MATCH_TABLE[Index]);";
-  OS << "\n}";
+  OS << "\n}\n";
   EndEmitFunction(OS);
 
   BeginEmitFunction(OS, "StringRef", "getIncludePathForIndex(unsigned Index)",
@@ -273,7 +272,7 @@ void MatcherTableEmitter::EmitPatternMatchTable(raw_ostream &OS) {
 
   OS << "\n};";
   OS << "\nreturn StringRef(INCLUDE_PATH_TABLE[Index]);";
-  OS << "\n}";
+  OS << "\n}\n";
   EndEmitFunction(OS);
 }
 
@@ -555,6 +554,11 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
        << cast<CheckCondCodeMatcher>(N)->getCondCodeName() << ",\n";
     return 2;
 
+  case Matcher::CheckChild2CondCode:
+    OS << "OPC_CheckChild2CondCode, ISD::"
+       << cast<CheckChild2CondCodeMatcher>(N)->getCondCodeName() << ",\n";
+    return 2;
+
   case Matcher::CheckValueType:
     OS << "OPC_CheckValueType, MVT::"
        << cast<CheckValueTypeMatcher>(N)->getTypeName() << ",\n";
@@ -597,6 +601,14 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     OS << "OPC_CheckFoldableChainNode,\n";
     return 1;
 
+  case Matcher::CheckImmAllOnesV:
+    OS << "OPC_CheckImmAllOnesV,\n";
+    return 1;
+
+  case Matcher::CheckImmAllZerosV:
+    OS << "OPC_CheckImmAllZerosV,\n";
+    return 1;
+
   case Matcher::EmitInteger: {
     int64_t Val = cast<EmitIntegerMatcher>(N)->getValue();
     OS << "OPC_EmitInteger, "
@@ -996,12 +1008,15 @@ static StringRef getOpcodeString(Matcher::KindTy Kind) {
   case Matcher::CheckInteger: return "OPC_CheckInteger"; break;
   case Matcher::CheckChildInteger: return "OPC_CheckChildInteger"; break;
   case Matcher::CheckCondCode: return "OPC_CheckCondCode"; break;
+  case Matcher::CheckChild2CondCode: return "OPC_CheckChild2CondCode"; break;
   case Matcher::CheckValueType: return "OPC_CheckValueType"; break;
   case Matcher::CheckComplexPat: return "OPC_CheckComplexPat"; break;
   case Matcher::CheckAndImm: return "OPC_CheckAndImm"; break;
   case Matcher::CheckOrImm: return "OPC_CheckOrImm"; break;
   case Matcher::CheckFoldableChainNode:
     return "OPC_CheckFoldableChainNode"; break;
+  case Matcher::CheckImmAllOnesV: return "OPC_CheckImmAllOnesV"; break;
+  case Matcher::CheckImmAllZerosV: return "OPC_CheckImmAllZerosV"; break;
   case Matcher::EmitInteger: return "OPC_EmitInteger"; break;
   case Matcher::EmitStringInteger: return "OPC_EmitStringInteger"; break;
   case Matcher::EmitRegister: return "OPC_EmitRegister"; break;
diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp
index 612342ddcddf..8f54beeba65b 100644
--- a/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/utils/TableGen/DAGISelMatcherGen.cpp
@@ -1,9 +1,8 @@
 //===- DAGISelMatcherGen.cpp - Matcher generator --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -278,6 +277,27 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
     return;
   }
 
+  if (LeafRec->getName() == "immAllOnesV") {
+    // If this is the root of the dag we're matching, we emit a redundant opcode
+    // check to ensure that this gets folded into the normal top-level
+    // OpcodeSwitch.
+    if (N == Pattern.getSrcPattern()) {
+      const SDNodeInfo &NI = CGP.getSDNodeInfo(CGP.getSDNodeNamed("build_vector"));
+      AddMatcher(new CheckOpcodeMatcher(NI));
+    }
+    return AddMatcher(new CheckImmAllOnesVMatcher());
+  }
+  if (LeafRec->getName() == "immAllZerosV") {
+    // If this is the root of the dag we're matching, we emit a redundant opcode
+    // check to ensure that this gets folded into the normal top-level
+    // OpcodeSwitch.
+    if (N == Pattern.getSrcPattern()) {
+      const SDNodeInfo &NI = CGP.getSDNodeInfo(CGP.getSDNodeNamed("build_vector"));
+      AddMatcher(new CheckOpcodeMatcher(NI));
+    }
+    return AddMatcher(new CheckImmAllZerosVMatcher());
+  }
+
   errs() << "Unknown leaf kind: " << *N << "\n";
   abort();
 }
@@ -671,6 +691,17 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
       return;
     }
 
+    if (Def->getName() == "undef_tied_input") {
+      std::array<MVT::SimpleValueType, 1> ResultVTs = {{ N->getSimpleType(0) }};
+      std::array<unsigned, 0> InstOps;
+      auto IDOperandNo = NextRecordedOperandNo++;
+      AddMatcher(new EmitNodeMatcher("TargetOpcode::IMPLICIT_DEF",
+                                     ResultVTs, InstOps, false, false, false,
+                                     false, -1, IDOperandNo));
+      ResultOps.push_back(IDOperandNo);
+      return;
+    }
+
     // Handle a reference to a register class. This is used
     // in COPY_TO_SUBREG instructions.
     if (Def->isSubClassOf("RegisterOperand"))
@@ -763,14 +794,27 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
   // 'execute always' values. Match up the node operands to the instruction
   // operands to do this.
   unsigned ChildNo = 0;
+
+  // Similarly to the code in TreePatternNode::ApplyTypeConstraints, count the
+  // number of operands at the end of the list which have default values.
+  // Those can come from the pattern if it provides enough arguments, or be
+  // filled in with the default if the pattern hasn't provided them. But any
+  // operand with a default value _before_ the last mandatory one will be
+  // filled in with their defaults unconditionally.
+  unsigned NonOverridableOperands = NumFixedOperands;
+  while (NonOverridableOperands > NumResults &&
+         CGP.operandHasDefault(II.Operands[NonOverridableOperands-1].Rec))
+    --NonOverridableOperands;
+
   for (unsigned InstOpNo = NumResults, e = NumFixedOperands;
        InstOpNo != e; ++InstOpNo) {
     // Determine what to emit for this operand.
     Record *OperandNode = II.Operands[InstOpNo].Rec;
-    if (OperandNode->isSubClassOf("OperandWithDefaultOps") &&
-        !CGP.getDefaultOperand(OperandNode).DefaultOps.empty()) {
-      // This is a predicate or optional def operand; emit the
-      // 'default ops' operands.
+    if (CGP.operandHasDefault(OperandNode) &&
+        (InstOpNo < NonOverridableOperands || ChildNo >= N->getNumChildren())) {
+      // This is a predicate or optional def operand which the pattern has not
+      // overridden, or which we aren't letting it override; emit the 'default
+      // ops' operands.
       const DAGDefaultOperand &DefaultOp
         = CGP.getDefaultOperand(OperandNode);
       for (unsigned i = 0, e = DefaultOp.DefaultOps.size(); i != e; ++i)
diff --git a/utils/TableGen/DAGISelMatcherOpt.cpp b/utils/TableGen/DAGISelMatcherOpt.cpp
index 554c7438ce3d..7d51b0769372 100644
--- a/utils/TableGen/DAGISelMatcherOpt.cpp
+++ b/utils/TableGen/DAGISelMatcherOpt.cpp
@@ -1,9 +1,8 @@
 //===- DAGISelMatcherOpt.cpp - Optimize a DAG Matcher ---------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -56,9 +55,13 @@ static void ContractNodes(std::unique_ptr<Matcher> &MatcherPtr,
       if (MC->getChildNo() < 4)  // Only have CheckChildSame0...3
         New = new CheckChildSameMatcher(MC->getChildNo(), CS->getMatchNumber());
 
-    if (CheckIntegerMatcher *CS = dyn_cast<CheckIntegerMatcher>(MC->getNext()))
+    if (CheckIntegerMatcher *CI = dyn_cast<CheckIntegerMatcher>(MC->getNext()))
       if (MC->getChildNo() < 5)  // Only have CheckChildInteger0...4
-        New = new CheckChildIntegerMatcher(MC->getChildNo(), CS->getValue());
+        New = new CheckChildIntegerMatcher(MC->getChildNo(), CI->getValue());
+
+    if (auto *CCC = dyn_cast<CheckCondCodeMatcher>(MC->getNext()))
+      if (MC->getChildNo() == 2)  // Only have CheckChild2CondCode
+        New = new CheckChild2CondCodeMatcher(CCC->getCondCodeName());
 
     if (New) {
       // Insert the new node.
diff --git a/utils/TableGen/DFAPacketizerEmitter.cpp b/utils/TableGen/DFAPacketizerEmitter.cpp
index 0db0f55f5ed6..dabcc8f8ed55 100644
--- a/utils/TableGen/DFAPacketizerEmitter.cpp
+++ b/utils/TableGen/DFAPacketizerEmitter.cpp
@@ -1,9 +1,8 @@
 //===- DFAPacketizerEmitter.cpp - Packetization DFA for a VLIW machine ----===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/utils/TableGen/DisassemblerEmitter.cpp b/utils/TableGen/DisassemblerEmitter.cpp
index b99a0a973a2c..9e75c7fba77b 100644
--- a/utils/TableGen/DisassemblerEmitter.cpp
+++ b/utils/TableGen/DisassemblerEmitter.cpp
@@ -1,9 +1,8 @@
 //===- DisassemblerEmitter.cpp - Generate a disassembler ------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/utils/TableGen/ExegesisEmitter.cpp b/utils/TableGen/ExegesisEmitter.cpp
index 208237aca20c..976d5f51776f 100644
--- a/utils/TableGen/ExegesisEmitter.cpp
+++ b/utils/TableGen/ExegesisEmitter.cpp
@@ -1,9 +1,8 @@
 //===- ExegesisEmitter.cpp - Generate exegesis target data ----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/utils/TableGen/FastISelEmitter.cpp b/utils/TableGen/FastISelEmitter.cpp
index 5134b684c6f9..b39956859fe8 100644
--- a/utils/TableGen/FastISelEmitter.cpp
+++ b/utils/TableGen/FastISelEmitter.cpp
@@ -1,9 +1,8 @@
 ///===- FastISelEmitter.cpp - Generate an instruction selector -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp
index 5e621fc0efdd..f5e975d2e5ae 100644
--- a/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -1,9 +1,8 @@
 //===------------ FixedLenDecoderEmitter.cpp - Decoder Generator ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -17,9 +16,10 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/CachedHashString.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
@@ -48,6 +48,12 @@ using namespace llvm;
 
 namespace {
 
+STATISTIC(NumEncodings, "Number of encodings considered");
+STATISTIC(NumEncodingsLackingDisasm, "Number of encodings without disassembler info");
+STATISTIC(NumInstructions, "Number of instructions considered");
+STATISTIC(NumEncodingsSupported, "Number of encodings supported");
+STATISTIC(NumEncodingsOmitted, "Number of encodings omitted");
+
 struct EncodingField {
   unsigned Base, Width, Offset;
   EncodingField(unsigned B, unsigned W, unsigned O)
@@ -95,6 +101,15 @@ struct EncodingAndInst {
       : EncodingDef(EncodingDef), Inst(Inst) {}
 };
 
+struct EncodingIDAndOpcode {
+  unsigned EncodingID;
+  unsigned Opcode;
+
+  EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {}
+  EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode)
+      : EncodingID(EncodingID), Opcode(Opcode) {}
+};
+
 raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) {
   if (Value.EncodingDef != Value.Inst->TheDef)
     OS << Value.EncodingDef->getName() << ":";
@@ -103,6 +118,7 @@ raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) {
 }
 
 class FixedLenDecoderEmitter {
+  RecordKeeper &RK;
   std::vector<EncodingAndInst> NumberedEncodings;
 
 public:
@@ -114,7 +130,7 @@ public:
                          std::string ROK = "MCDisassembler::Success",
                          std::string RFail = "MCDisassembler::Fail",
                          std::string L = "")
-      : Target(R), PredicateNamespace(std::move(PredicateNamespace)),
+      : RK(R), Target(R), PredicateNamespace(std::move(PredicateNamespace)),
         GuardPrefix(std::move(GPrefix)), GuardPostfix(std::move(GPostfix)),
         ReturnOK(std::move(ROK)), ReturnFail(std::move(RFail)),
         Locals(std::move(L)) {}
@@ -252,10 +268,11 @@ protected:
   bool Mixed; // a mixed region contains both set and unset bits
 
   // Map of well-known segment value to the set of uid's with that value.
-  std::map<uint64_t, std::vector<unsigned>> FilteredInstructions;
+  std::map<uint64_t, std::vector<EncodingIDAndOpcode>>
+      FilteredInstructions;
 
   // Set of uid's with non-constant segment values.
-  std::vector<unsigned> VariableInstructions;
+  std::vector<EncodingIDAndOpcode> VariableInstructions;
 
   // Map of well-known segment value to its delegate.
   std::map<unsigned, std::unique_ptr<const FilterChooser>> FilterChooserMap;
@@ -264,7 +281,7 @@ protected:
   unsigned NumFiltered;
 
   // Keeps track of the last opcode in the filtered bucket.
-  unsigned LastOpcFiltered;
+  EncodingIDAndOpcode LastOpcFiltered;
 
 public:
   Filter(Filter &&f);
@@ -274,7 +291,7 @@ public:
 
   unsigned getNumFiltered() const { return NumFiltered; }
 
-  unsigned getSingletonOpc() const {
+  EncodingIDAndOpcode getSingletonOpc() const {
     assert(NumFiltered == 1);
     return LastOpcFiltered;
   }
@@ -341,7 +358,9 @@ protected:
   ArrayRef<EncodingAndInst> AllInstructions;
 
   // Vector of uid's for this filter chooser to work on.
-  const std::vector<unsigned> &Opcodes;
+  // The first member of the pair is the opcode id being decoded, the second is
+  // the opcode id that should be emitted.
+  const std::vector<EncodingIDAndOpcode> &Opcodes;
 
   // Lookup table for the operand decoding of instructions.
   const std::map<unsigned, std::vector<OperandInfo>> &Operands;
@@ -367,7 +386,7 @@ protected:
 
 public:
   FilterChooser(ArrayRef<EncodingAndInst> Insts,
-                const std::vector<unsigned> &IDs,
+                const std::vector<EncodingIDAndOpcode> &IDs,
                 const std::map<unsigned, std::vector<OperandInfo>> &Ops,
                 unsigned BW, const FixedLenDecoderEmitter *E)
       : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
@@ -377,7 +396,7 @@ public:
   }
 
   FilterChooser(ArrayRef<EncodingAndInst> Insts,
-                const std::vector<unsigned> &IDs,
+                const std::vector<EncodingIDAndOpcode> &IDs,
                 const std::map<unsigned, std::vector<OperandInfo>> &Ops,
                 const std::vector<bit_value_t> &ParentFilterBitValues,
                 const FilterChooser &parent)
@@ -413,6 +432,15 @@ protected:
     }
   }
 
+  // Emit the name of the encoding/instruction pair.
+  void emitNameWithID(raw_ostream &OS, unsigned Opcode) const {
+    const Record *EncodingDef = AllInstructions[Opcode].EncodingDef;
+    const Record *InstDef = AllInstructions[Opcode].Inst->TheDef;
+    if (EncodingDef != InstDef)
+      OS << EncodingDef->getName() << ":";
+    OS << InstDef->getName();
+  }
+
   // Populates the field of the insn given the start position and the number of
   // consecutive bits to scan for.
   //
@@ -463,7 +491,7 @@ protected:
 
   // Emits table entries to decode the singleton.
   void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
-                               unsigned Opc) const;
+                               EncodingIDAndOpcode Opc) const;
 
   // Emits code to decode the singleton, and then to decode the rest.
   void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
@@ -524,13 +552,13 @@ Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits,
   assert(StartBit + NumBits - 1 < Owner->BitWidth);
 
   NumFiltered = 0;
-  LastOpcFiltered = 0;
+  LastOpcFiltered = {0, 0};
 
   for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) {
     insn_t Insn;
 
     // Populates the insn given the uid.
-    Owner->insnWithID(Insn, Owner->Opcodes[i]);
+    Owner->insnWithID(Insn, Owner->Opcodes[i].EncodingID);
 
     uint64_t Field;
     // Scans the segment for possibly well-specified encoding bits.
@@ -1026,7 +1054,7 @@ unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits,
   // 1: Water (the bit value does not affect decoding)
   // 2: Island (well-known bit value needed for decoding)
   int State = 0;
-  int Val = -1;
+  int64_t Val = -1;
 
   for (unsigned i = 0; i < BitWidth; ++i) {
     Val = Value(Insn[i]);
@@ -1314,12 +1342,12 @@ void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
 
 // Emits table entries to decode the singleton.
 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
-                                            unsigned Opc) const {
+                                            EncodingIDAndOpcode Opc) const {
   std::vector<unsigned> StartBits;
   std::vector<unsigned> EndBits;
   std::vector<uint64_t> FieldVals;
   insn_t Insn;
-  insnWithID(Insn, Opc);
+  insnWithID(Insn, Opc.EncodingID);
 
   // Look for islands of undecoded bits of the singleton.
   getIslands(StartBits, EndBits, FieldVals, Insn);
@@ -1327,7 +1355,7 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
   unsigned Size = StartBits.size();
 
   // Emit the predicate table entry if one is needed.
-  emitPredicateTableEntry(TableInfo, Opc);
+  emitPredicateTableEntry(TableInfo, Opc.EncodingID);
 
   // Check any additional encoding fields needed.
   for (unsigned I = Size; I != 0; --I) {
@@ -1351,10 +1379,11 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
   }
 
   // Check for soft failure of the match.
-  emitSoftFailTableEntry(TableInfo, Opc);
+  emitSoftFailTableEntry(TableInfo, Opc.EncodingID);
 
   bool HasCompleteDecoder;
-  unsigned DIdx = getDecoderIndex(TableInfo.Decoders, Opc, HasCompleteDecoder);
+  unsigned DIdx =
+      getDecoderIndex(TableInfo.Decoders, Opc.EncodingID, HasCompleteDecoder);
 
   // Produce OPC_Decode or OPC_TryDecode opcode based on the information
   // whether the instruction decoder is complete or not. If it is complete
@@ -1367,8 +1396,9 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
   // can decode it.
   TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode :
       MCD::OPC_TryDecode);
+  NumEncodingsSupported++;
   uint8_t Buffer[16], *p;
-  encodeULEB128(Opc, Buffer);
+  encodeULEB128(Opc.Opcode, Buffer);
   for (p = Buffer; *p >= 128 ; ++p)
     TableInfo.Table.push_back(*p);
   TableInfo.Table.push_back(*p);
@@ -1394,7 +1424,7 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
 // Emits table entries to decode the singleton, and then to decode the rest.
 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
                                             const Filter &Best) const {
-  unsigned Opc = Best.getSingletonOpc();
+  EncodingIDAndOpcode Opc = Best.getSingletonOpc();
 
   // complex singletons need predicate checks from the first singleton
   // to refer forward to the variable filterchooser that follows.
@@ -1454,7 +1484,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
       std::vector<uint64_t> FieldVals;
       insn_t Insn;
 
-      insnWithID(Insn, Opcodes[i]);
+      insnWithID(Insn, Opcodes[i].EncodingID);
 
       // Look for islands of undecoded bits of any instruction.
       if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) {
@@ -1498,7 +1528,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
   for (unsigned InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) {
     insn_t insn;
 
-    insnWithID(insn, Opcodes[InsnIndex]);
+    insnWithID(insn, Opcodes[InsnIndex].EncodingID);
 
     for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
       switch (bitAttrs[BitIndex]) {
@@ -1717,9 +1747,12 @@ void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const {
   dumpStack(errs(), "\t\t");
 
   for (unsigned i = 0; i < Opcodes.size(); ++i) {
-    errs() << '\t' << AllInstructions[Opcodes[i]] << " ";
-    dumpBits(errs(),
-             getBitsField(*AllInstructions[Opcodes[i]].EncodingDef, "Inst"));
+    errs() << '\t';
+    emitNameWithID(errs(), Opcodes[i].EncodingID);
+    errs() << " ";
+    dumpBits(
+        errs(),
+        getBitsField(*AllInstructions[Opcodes[i].EncodingID].EncodingDef, "Inst"));
     errs() << '\n';
   }
 }
@@ -1751,24 +1784,25 @@ static std::string findOperandDecoderMethod(TypedInit *TI) {
   return Decoder;
 }
 
-static bool populateInstruction(CodeGenTarget &Target,
-                       const CodeGenInstruction &CGI, unsigned Opc,
-                       std::map<unsigned, std::vector<OperandInfo>> &Operands){
+static bool
+populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
+                    const CodeGenInstruction &CGI, unsigned Opc,
+                    std::map<unsigned, std::vector<OperandInfo>> &Operands) {
   const Record &Def = *CGI.TheDef;
   // If all the bit positions are not specified; do not decode this instruction.
   // We are bound to fail!  For proper disassembly, the well-known encoding bits
   // of the instruction must be fully specified.
 
-  BitsInit &Bits = getBitsField(Def, "Inst");
+  BitsInit &Bits = getBitsField(EncodingDef, "Inst");
   if (Bits.allInComplete()) return false;
 
   std::vector<OperandInfo> InsnOperands;
 
   // If the instruction has specified a custom decoding hook, use that instead
   // of trying to auto-generate the decoder.
-  StringRef InstDecoder = Def.getValueAsString("DecoderMethod");
+  StringRef InstDecoder = EncodingDef.getValueAsString("DecoderMethod");
   if (InstDecoder != "") {
-    bool HasCompleteInstDecoder = Def.getValueAsBit("hasCompleteDecoder");
+    bool HasCompleteInstDecoder = EncodingDef.getValueAsBit("hasCompleteDecoder");
     InsnOperands.push_back(OperandInfo(InstDecoder, HasCompleteInstDecoder));
     Operands[Opc] = InsnOperands;
     return true;
@@ -2144,7 +2178,7 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
      << "  const FeatureBitset& Bits = STI.getFeatureBits();\n"
      << "\n"
      << "  const uint8_t *Ptr = DecodeTable;\n"
-     << "  uint32_t CurFieldValue = 0;\n"
+     << "  InsnType CurFieldValue = 0;\n"
      << "  DecodeStatus S = MCDisassembler::Success;\n"
      << "  while (true) {\n"
      << "    ptrdiff_t Loc = Ptr - DecodeTable;\n"
@@ -2189,7 +2223,7 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
      << "      unsigned Len = *++Ptr;\n"
      << "      InsnType FieldValue = fieldFromInstruction(insn, Start, Len);\n"
      << "      // Decode the field value.\n"
-     << "      uint32_t ExpectedValue = decodeULEB128(++Ptr, &Len);\n"
+     << "      InsnType ExpectedValue = decodeULEB128(++Ptr, &Len);\n"
      << "      Ptr += Len;\n"
      << "      // NumToSkip is a plain 24-bit integer.\n"
      << "      unsigned NumToSkip = *Ptr++;\n"
@@ -2336,37 +2370,52 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
   // Parameterize the decoders based on namespace and instruction width.
   const auto &NumberedInstructions = Target.getInstructionsByEnumValue();
   NumberedEncodings.reserve(NumberedInstructions.size());
-  for (const auto &NumberedInstruction : NumberedInstructions)
+  DenseMap<Record *, unsigned> IndexOfInstruction;
+  for (const auto &NumberedInstruction : NumberedInstructions) {
+    IndexOfInstruction[NumberedInstruction->TheDef] = NumberedEncodings.size();
     NumberedEncodings.emplace_back(NumberedInstruction->TheDef, NumberedInstruction);
+  }
+  for (const auto &NumberedAlias : RK.getAllDerivedDefinitions("AdditionalEncoding"))
+    NumberedEncodings.emplace_back(
+        NumberedAlias,
+        &Target.getInstruction(NumberedAlias->getValueAsDef("AliasOf")));
 
-  std::map<std::pair<std::string, unsigned>,
-           std::vector<unsigned>> OpcMap;
+  std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>>
+      OpcMap;
   std::map<unsigned, std::vector<OperandInfo>> Operands;
 
   for (unsigned i = 0; i < NumberedEncodings.size(); ++i) {
+    const Record *EncodingDef = NumberedEncodings[i].EncodingDef;
     const CodeGenInstruction *Inst = NumberedEncodings[i].Inst;
     const Record *Def = Inst->TheDef;
-    unsigned Size = Def->getValueAsInt("Size");
+    unsigned Size = EncodingDef->getValueAsInt("Size");
     if (Def->getValueAsString("Namespace") == "TargetOpcode" ||
         Def->getValueAsBit("isPseudo") ||
         Def->getValueAsBit("isAsmParserOnly") ||
-        Def->getValueAsBit("isCodeGenOnly"))
+        Def->getValueAsBit("isCodeGenOnly")) {
+      NumEncodingsLackingDisasm++;
       continue;
+    }
 
-    StringRef DecoderNamespace = Def->getValueAsString("DecoderNamespace");
+    if (i < NumberedInstructions.size())
+      NumInstructions++;
+    NumEncodings++;
+
+    StringRef DecoderNamespace = EncodingDef->getValueAsString("DecoderNamespace");
 
     if (Size) {
-      if (populateInstruction(Target, *Inst, i, Operands)) {
-        OpcMap[std::make_pair(DecoderNamespace, Size)].push_back(i);
-      }
+      if (populateInstruction(Target, *EncodingDef, *Inst, i, Operands)) {
+        OpcMap[std::make_pair(DecoderNamespace, Size)].emplace_back(i, IndexOfInstruction.find(Def)->second);
+      } else
+        NumEncodingsOmitted++;
     }
   }
 
   DecoderTableInfo TableInfo;
   for (const auto &Opc : OpcMap) {
     // Emit the decoder for this namespace+width combination.
-    ArrayRef<EncodingAndInst> NumberedEncodingsRef(NumberedEncodings.data(),
-                                                   NumberedEncodings.size());
+    ArrayRef<EncodingAndInst> NumberedEncodingsRef(
+        NumberedEncodings.data(), NumberedEncodings.size());
     FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands,
                      8 * Opc.first.second, this);
 
diff --git a/utils/TableGen/GlobalISelEmitter.cpp b/utils/TableGen/GlobalISelEmitter.cpp
index 997ceb12becd..f1c02134198b 100644
--- a/utils/TableGen/GlobalISelEmitter.cpp
+++ b/utils/TableGen/GlobalISelEmitter.cpp
@@ -1,9 +1,8 @@
 //===- GlobalISelEmitter.cpp - Generate an instruction selector -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -233,6 +232,23 @@ static std::string explainPredicates(const TreePatternNode *N) {
     if (Record *VT = P.getScalarMemoryVT())
       Explanation += (" ScalarVT(MemVT)=" + VT->getName()).str();
 
+    if (ListInit *AddrSpaces = P.getAddressSpaces()) {
+      raw_string_ostream OS(Explanation);
+      OS << " AddressSpaces=[";
+
+      StringRef AddrSpaceSeparator;
+      for (Init *Val : AddrSpaces->getValues()) {
+        IntInit *IntVal = dyn_cast<IntInit>(Val);
+        if (!IntVal)
+          continue;
+
+        OS << AddrSpaceSeparator << IntVal->getValue();
+        AddrSpaceSeparator = ", ";
+      }
+
+      OS << ']';
+    }
+
     if (P.isAtomicOrderingMonotonic())
       Explanation += " monotonic";
     if (P.isAtomicOrderingAcquire())
@@ -298,7 +314,7 @@ static Error isTrivialOperatorNode(const TreePatternNode *N) {
         Predicate.isSignExtLoad() || Predicate.isZeroExtLoad())
       continue;
 
-    if (Predicate.isNonTruncStore())
+    if (Predicate.isNonTruncStore() || Predicate.isTruncStore())
       continue;
 
     if (Predicate.isLoad() && Predicate.getMemoryVT())
@@ -309,6 +325,12 @@ static Error isTrivialOperatorNode(const TreePatternNode *N) {
         continue;
     }
 
+    if (Predicate.isLoad() || Predicate.isStore() || Predicate.isAtomic()) {
+      const ListInit *AddrSpaces = Predicate.getAddressSpaces();
+      if (AddrSpaces && !AddrSpaces->empty())
+        continue;
+    }
+
     if (Predicate.isAtomic() && Predicate.getMemoryVT())
       continue;
 
@@ -882,12 +904,19 @@ public:
 
   void defineOperand(StringRef SymbolicName, OperandMatcher &OM);
 
-  void defineComplexSubOperand(StringRef SymbolicName, Record *ComplexPattern,
-                               unsigned RendererID, unsigned SubOperandID) {
-    assert(ComplexSubOperands.count(SymbolicName) == 0 && "Already defined");
+  Error defineComplexSubOperand(StringRef SymbolicName, Record *ComplexPattern,
+                                unsigned RendererID, unsigned SubOperandID) {
+    if (ComplexSubOperands.count(SymbolicName))
+      return failedImport(
+          "Complex suboperand referenced more than once (Operand: " +
+          SymbolicName + ")");
+
     ComplexSubOperands[SymbolicName] =
         std::make_tuple(ComplexPattern, RendererID, SubOperandID);
+
+    return Error::success();
   }
+
   Optional<DefinedComplexPatternSubOperand>
   getComplexSubOperand(StringRef SymbolicName) const {
     const auto &I = ComplexSubOperands.find(SymbolicName);
@@ -1022,6 +1051,7 @@ public:
     IPM_AtomicOrderingMMO,
     IPM_MemoryLLTSize,
     IPM_MemoryVsLLTSize,
+    IPM_MemoryAddressSpace,
     IPM_GenericPredicate,
     OPM_SameOperand,
     OPM_ComplexPattern,
@@ -1507,6 +1537,9 @@ Error OperandMatcher::addTypeCheckPredicate(const TypeSetByHwMode &VTy,
 
   if (OperandIsAPointer)
     addPredicate<PointerToAnyOperandMatcher>(OpTyOrNone->get().getSizeInBits());
+  else if (VTy.isPointer())
+    addPredicate<LLTOperandMatcher>(LLT::pointer(VTy.getPtrAddrSpace(),
+                                                 OpTyOrNone->get().getSizeInBits()));
   else
     addPredicate<LLTOperandMatcher>(*OpTyOrNone);
   return Error::success();
@@ -1780,6 +1813,42 @@ public:
   }
 };
 
+class MemoryAddressSpacePredicateMatcher : public InstructionPredicateMatcher {
+protected:
+  unsigned MMOIdx;
+  SmallVector<unsigned, 4> AddrSpaces;
+
+public:
+  MemoryAddressSpacePredicateMatcher(unsigned InsnVarID, unsigned MMOIdx,
+                                     ArrayRef<unsigned> AddrSpaces)
+      : InstructionPredicateMatcher(IPM_MemoryAddressSpace, InsnVarID),
+        MMOIdx(MMOIdx), AddrSpaces(AddrSpaces.begin(), AddrSpaces.end()) {}
+
+  static bool classof(const PredicateMatcher *P) {
+    return P->getKind() == IPM_MemoryAddressSpace;
+  }
+  bool isIdentical(const PredicateMatcher &B) const override {
+    if (!InstructionPredicateMatcher::isIdentical(B))
+      return false;
+    auto *Other = cast<MemoryAddressSpacePredicateMatcher>(&B);
+    return MMOIdx == Other->MMOIdx && AddrSpaces == Other->AddrSpaces;
+  }
+
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode("GIM_CheckMemoryAddressSpace")
+          << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+          << MatchTable::Comment("MMO") << MatchTable::IntValue(MMOIdx)
+        // Encode number of address spaces to expect.
+          << MatchTable::Comment("NumAddrSpace")
+          << MatchTable::IntValue(AddrSpaces.size());
+    for (unsigned AS : AddrSpaces)
+      Table << MatchTable::Comment("AddrSpace") << MatchTable::IntValue(AS);
+
+    Table << MatchTable::LineBreak;
+  }
+};
+
 /// Generates code to check that the size of an MMO is less-than, equal-to, or
 /// greater than a given LLT.
 class MemoryVsLLTSizePredicateMatcher : public InstructionPredicateMatcher {
@@ -3028,7 +3097,8 @@ private:
   importExplicitUseRenderer(action_iterator InsertPt, RuleMatcher &Rule,
                             BuildMIAction &DstMIBuilder,
                             TreePatternNode *DstChild);
-  Error importDefaultOperandRenderers(BuildMIAction &DstMIBuilder,
+  Error importDefaultOperandRenderers(action_iterator InsertPt, RuleMatcher &M,
+                                      BuildMIAction &DstMIBuilder,
                                       DagInit *DefaultOps) const;
   Error
   importImplicitDefRenderers(BuildMIAction &DstMIBuilder,
@@ -3200,7 +3270,26 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
       continue;
     }
 
-    // G_LOAD is used for both non-extending and any-extending loads. 
+    // An address space check is needed in all contexts if there is one.
+    if (Predicate.isLoad() || Predicate.isStore() || Predicate.isAtomic()) {
+      if (const ListInit *AddrSpaces = Predicate.getAddressSpaces()) {
+        SmallVector<unsigned, 4> ParsedAddrSpaces;
+
+        for (Init *Val : AddrSpaces->getValues()) {
+          IntInit *IntVal = dyn_cast<IntInit>(Val);
+          if (!IntVal)
+            return failedImport("Address space is not an integer");
+          ParsedAddrSpaces.push_back(IntVal->getValue());
+        }
+
+        if (!ParsedAddrSpaces.empty()) {
+          InsnMatcher.addPredicate<MemoryAddressSpacePredicateMatcher>(
+            0, ParsedAddrSpaces);
+        }
+      }
+    }
+
+    // G_LOAD is used for both non-extending and any-extending loads.
     if (Predicate.isLoad() && Predicate.isNonExtLoad()) {
       InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
           0, MemoryVsLLTSizePredicateMatcher::EqualTo, 0);
@@ -3212,6 +3301,13 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
       continue;
     }
 
+    if (Predicate.isStore() && Predicate.isTruncStore()) {
+      // FIXME: If MemoryVT is set, we end up with 2 checks for the MMO size.
+      InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
+        0, MemoryVsLLTSizePredicateMatcher::LessThan, 0);
+      continue;
+    }
+
     // No check required. We already did it by swapping the opcode.
     if (!SrcGIEquivOrNull->isValueUnset("IfSignExtend") &&
         Predicate.isSignExtLoad())
@@ -3422,9 +3518,12 @@ Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule,
 
       for (unsigned i = 0, e = SrcChild->getNumChildren(); i != e; ++i) {
         auto *SubOperand = SrcChild->getChild(i);
-        if (!SubOperand->getName().empty())
-          Rule.defineComplexSubOperand(SubOperand->getName(),
-                                       SrcChild->getOperator(), RendererID, i);
+        if (!SubOperand->getName().empty()) {
+          if (auto Error = Rule.defineComplexSubOperand(SubOperand->getName(),
+                                                        SrcChild->getOperator(),
+                                                        RendererID, i))
+            return Error;
+        }
       }
 
       return Error::success();
@@ -3765,7 +3864,8 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
     // end up with too many rendered operands.
     if (DstIOperand.Rec->isSubClassOf("OperandWithDefaultOps")) {
       DagInit *DefaultOps = DstIOperand.Rec->getValueAsDag("DefaultOps");
-      if (auto Error = importDefaultOperandRenderers(DstMIBuilder, DefaultOps))
+      if (auto Error = importDefaultOperandRenderers(
+            InsertPt, M, DstMIBuilder, DefaultOps))
         return std::move(Error);
       ++NumDefaultOps;
       continue;
@@ -3790,19 +3890,39 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
 }
 
 Error GlobalISelEmitter::importDefaultOperandRenderers(
-    BuildMIAction &DstMIBuilder, DagInit *DefaultOps) const {
+    action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder,
+    DagInit *DefaultOps) const {
   for (const auto *DefaultOp : DefaultOps->getArgs()) {
+    Optional<LLTCodeGen> OpTyOrNone = None;
+
     // Look through ValueType operators.
     if (const DagInit *DefaultDagOp = dyn_cast<DagInit>(DefaultOp)) {
       if (const DefInit *DefaultDagOperator =
               dyn_cast<DefInit>(DefaultDagOp->getOperator())) {
-        if (DefaultDagOperator->getDef()->isSubClassOf("ValueType"))
+        if (DefaultDagOperator->getDef()->isSubClassOf("ValueType")) {
+          OpTyOrNone = MVTToLLT(getValueType(
+                                  DefaultDagOperator->getDef()));
           DefaultOp = DefaultDagOp->getArg(0);
+        }
       }
     }
 
     if (const DefInit *DefaultDefOp = dyn_cast<DefInit>(DefaultOp)) {
-      DstMIBuilder.addRenderer<AddRegisterRenderer>(DefaultDefOp->getDef());
+      auto Def = DefaultDefOp->getDef();
+      if (Def->getName() == "undef_tied_input") {
+        unsigned TempRegID = M.allocateTempRegID();
+        M.insertAction<MakeTempRegisterAction>(
+          InsertPt, OpTyOrNone.getValue(), TempRegID);
+        InsertPt = M.insertAction<BuildMIAction>(
+          InsertPt, M.allocateOutputInsnID(),
+          &Target.getInstruction(RK.getDef("IMPLICIT_DEF")));
+        BuildMIAction &IDMIBuilder = *static_cast<BuildMIAction *>(
+          InsertPt->get());
+        IDMIBuilder.addRenderer<TempRegRenderer>(TempRegID);
+        DstMIBuilder.addRenderer<TempRegRenderer>(TempRegID);
+      } else {
+        DstMIBuilder.addRenderer<AddRegisterRenderer>(Def);
+      }
       continue;
     }
 
@@ -4489,8 +4609,7 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
        << ", // " << Record->getName() << "\n";
   OS << "};\n\n";
 
-  std::stable_sort(Rules.begin(), Rules.end(), [&](const RuleMatcher &A,
-                                                   const RuleMatcher &B) {
+  llvm::stable_sort(Rules, [&](const RuleMatcher &A, const RuleMatcher &B) {
     int ScoreA = RuleMatcherScores[A.getRuleID()];
     int ScoreB = RuleMatcherScores[B.getRuleID()];
     if (ScoreA > ScoreB)
diff --git a/utils/TableGen/InfoByHwMode.cpp b/utils/TableGen/InfoByHwMode.cpp
index 086e12dafd74..d9662889a5db 100644
--- a/utils/TableGen/InfoByHwMode.cpp
+++ b/utils/TableGen/InfoByHwMode.cpp
@@ -1,9 +1,8 @@
 //===--- InfoByHwMode.cpp -------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Classes that implement data parameterized by HW modes for instruction
@@ -39,6 +38,11 @@ ValueTypeByHwMode::ValueTypeByHwMode(Record *R, const CodeGenHwModes &CGH) {
   }
 }
 
+ValueTypeByHwMode::ValueTypeByHwMode(Record *R, MVT T) : ValueTypeByHwMode(T) {
+  if (R->isSubClassOf("PtrValueType"))
+    PtrAddrSpace = R->getValueAsInt("AddrSpace");
+}
+
 bool ValueTypeByHwMode::operator== (const ValueTypeByHwMode &T) const {
   assert(isValid() && T.isValid() && "Invalid type in assignment");
   bool Simple = isSimple();
@@ -112,7 +116,7 @@ ValueTypeByHwMode llvm::getValueTypeByHwMode(Record *Rec,
          "Record must be derived from ValueType");
   if (Rec->isSubClassOf("HwModeSelect"))
     return ValueTypeByHwMode(Rec, CGH);
-  return ValueTypeByHwMode(llvm::getValueType(Rec));
+  return ValueTypeByHwMode(Rec, llvm::getValueType(Rec));
 }
 
 RegSizeInfo::RegSizeInfo(Record *R, const CodeGenHwModes &CGH) {
diff --git a/utils/TableGen/InfoByHwMode.h b/utils/TableGen/InfoByHwMode.h
index 7be4678f271b..9e5cc3d5f2a4 100644
--- a/utils/TableGen/InfoByHwMode.h
+++ b/utils/TableGen/InfoByHwMode.h
@@ -1,9 +1,8 @@
 //===--- InfoByHwMode.h -----------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // Classes that implement data parameterized by HW modes for instruction
@@ -120,6 +119,7 @@ struct InfoByHwMode {
 
 struct ValueTypeByHwMode : public InfoByHwMode<MVT> {
   ValueTypeByHwMode(Record *R, const CodeGenHwModes &CGH);
+  ValueTypeByHwMode(Record *R, MVT T);
   ValueTypeByHwMode(MVT T) { Map.insert({DefaultMode,T}); }
   ValueTypeByHwMode() = default;
 
@@ -135,6 +135,11 @@ struct ValueTypeByHwMode : public InfoByHwMode<MVT> {
   static StringRef getMVTName(MVT T);
   void writeToStream(raw_ostream &OS) const;
   void dump() const;
+
+  unsigned PtrAddrSpace = std::numeric_limits<unsigned>::max();
+  bool isPointer() const {
+    return PtrAddrSpace != std::numeric_limits<unsigned>::max();
+  }
 };
 
 ValueTypeByHwMode getValueTypeByHwMode(Record *Rec,
diff --git a/utils/TableGen/InstrDocsEmitter.cpp b/utils/TableGen/InstrDocsEmitter.cpp
index 9d50351854ec..91c457ba08fd 100644
--- a/utils/TableGen/InstrDocsEmitter.cpp
+++ b/utils/TableGen/InstrDocsEmitter.cpp
@@ -1,9 +1,8 @@
 //===- InstrDocsEmitter.cpp - Opcode Documentation Generator --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 39d9e8526386..2d367f538b71 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -1,9 +1,8 @@
 //===- InstrInfoEmitter.cpp - Generate a Instruction Set Desc. --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -77,7 +76,9 @@ private:
                   std::map<std::vector<Record*>, unsigned> &EL,
                   const OperandInfoMapTy &OpInfo,
                   raw_ostream &OS);
-  void emitOperandTypesEnum(raw_ostream &OS, const CodeGenTarget &Target);
+  void emitOperandTypeMappings(
+      raw_ostream &OS, const CodeGenTarget &Target,
+      ArrayRef<const CodeGenInstruction *> NumberedInstructions);
   void initOperandMapData(
             ArrayRef<const CodeGenInstruction *> NumberedInstructions,
             StringRef Namespace,
@@ -212,7 +213,7 @@ void InstrInfoEmitter::EmitOperandInfo(raw_ostream &OS,
 }
 
 /// Initialize data structures for generating operand name mappings.
-/// 
+///
 /// \param Operands [out] A map used to generate the OpName enum with operand
 ///        names as its keys and operand enum values as its values.
 /// \param OperandMap [out] A map for representing the operand name mappings for
@@ -325,8 +326,9 @@ void InstrInfoEmitter::emitOperandNameMappings(raw_ostream &OS,
 /// Generate an enum for all the operand types for this target, under the
 /// llvm::TargetNamespace::OpTypes namespace.
 /// Operand types are all definitions derived of the Operand Target.td class.
-void InstrInfoEmitter::emitOperandTypesEnum(raw_ostream &OS,
-                                            const CodeGenTarget &Target) {
+void InstrInfoEmitter::emitOperandTypeMappings(
+    raw_ostream &OS, const CodeGenTarget &Target,
+    ArrayRef<const CodeGenInstruction *> NumberedInstructions) {
 
   StringRef Namespace = Target.getInstNamespace();
   std::vector<Record *> Operands = Records.getAllDerivedDefinitions("Operand");
@@ -350,6 +352,69 @@ void InstrInfoEmitter::emitOperandTypesEnum(raw_ostream &OS,
   OS << "} // end namespace " << Namespace << "\n";
   OS << "} // end namespace llvm\n";
   OS << "#endif // GET_INSTRINFO_OPERAND_TYPES_ENUM\n\n";
+
+  OS << "#ifdef GET_INSTRINFO_OPERAND_TYPE\n";
+  OS << "#undef GET_INSTRINFO_OPERAND_TYPE\n";
+  OS << "namespace llvm {\n";
+  OS << "namespace " << Namespace << " {\n";
+  OS << "LLVM_READONLY\n";
+  OS << "int getOperandType(uint16_t Opcode, uint16_t OpIdx) {\n";
+  if (!NumberedInstructions.empty()) {
+    std::vector<int> OperandOffsets;
+    std::vector<Record *> OperandRecords;
+    int CurrentOffset = 0;
+    for (const CodeGenInstruction *Inst : NumberedInstructions) {
+      OperandOffsets.push_back(CurrentOffset);
+      for (const auto &Op : Inst->Operands) {
+        const DagInit *MIOI = Op.MIOperandInfo;
+        if (!MIOI || MIOI->getNumArgs() == 0) {
+          // Single, anonymous, operand.
+          OperandRecords.push_back(Op.Rec);
+          ++CurrentOffset;
+        } else {
+          for (Init *Arg : make_range(MIOI->arg_begin(), MIOI->arg_end())) {
+            OperandRecords.push_back(cast<DefInit>(Arg)->getDef());
+            ++CurrentOffset;
+          }
+        }
+      }
+    }
+
+    // Emit the table of offsets for the opcode lookup.
+    OS << "  const int Offsets[] = {\n";
+    for (int I = 0, E = OperandOffsets.size(); I != E; ++I)
+      OS << "    " << OperandOffsets[I] << ",\n";
+    OS << "  };\n";
+
+    // Add an entry for the end so that we don't need to special case it below.
+    OperandOffsets.push_back(OperandRecords.size());
+    // Emit the actual operand types in a flat table.
+    OS << "  const int OpcodeOperandTypes[] = {\n    ";
+    for (int I = 0, E = OperandRecords.size(), CurOffset = 1; I != E; ++I) {
+      // We print each Opcode's operands in its own row.
+      if (I == OperandOffsets[CurOffset]) {
+        OS << "\n    ";
+        // If there are empty rows, mark them with an empty comment.
+        while (OperandOffsets[++CurOffset] == I)
+          OS << "/**/\n    ";
+      }
+      Record *OpR = OperandRecords[I];
+      if (OpR->isSubClassOf("Operand") && !OpR->isAnonymous())
+        OS << "OpTypes::" << OpR->getName();
+      else
+        OS << -1;
+      OS << ", ";
+    }
+    OS << "\n  };\n";
+
+    OS << "  return OpcodeOperandTypes[Offsets[Opcode] + OpIdx];\n";
+  } else {
+    OS << "  llvm_unreachable(\"No instructions defined\");\n";
+  }
+  OS << "}\n";
+  OS << "} // end namespace " << Namespace << "\n";
+  OS << "} // end namespace llvm\n";
+  OS << "#endif //GET_INSTRINFO_OPERAND_TYPE\n\n";
 }
 
 void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS,
@@ -561,7 +626,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
 
   emitOperandNameMappings(OS, Target, NumberedInstructions);
 
-  emitOperandTypesEnum(OS, Target);
+  emitOperandTypeMappings(OS, Target, NumberedInstructions);
 
   emitMCIIHelperMethods(OS, TargetName);
 }
@@ -604,6 +669,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
   if (Inst.canFoldAsLoad)      OS << "|(1ULL<<MCID::FoldableAsLoad)";
   if (Inst.mayLoad)            OS << "|(1ULL<<MCID::MayLoad)";
   if (Inst.mayStore)           OS << "|(1ULL<<MCID::MayStore)";
+  if (Inst.mayRaiseFPException) OS << "|(1ULL<<MCID::MayRaiseFPException)";
   if (Inst.isPredicable)       OS << "|(1ULL<<MCID::Predicable)";
   if (Inst.isConvertibleToThreeAddress) OS << "|(1ULL<<MCID::ConvertibleTo3Addr)";
   if (Inst.isCommutable)       OS << "|(1ULL<<MCID::Commutable)";
@@ -629,13 +695,14 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
   // Emit all of the target-specific flags...
   BitsInit *TSF = Inst.TheDef->getValueAsBitsInit("TSFlags");
   if (!TSF)
-    PrintFatalError("no TSFlags?");
+    PrintFatalError(Inst.TheDef->getLoc(), "no TSFlags?");
   uint64_t Value = 0;
   for (unsigned i = 0, e = TSF->getNumBits(); i != e; ++i) {
     if (const auto *Bit = dyn_cast<BitInit>(TSF->getBit(i)))
       Value |= uint64_t(Bit->getValue()) << i;
     else
-      PrintFatalError("Invalid TSFlags bit in " + Inst.TheDef->getName());
+      PrintFatalError(Inst.TheDef->getLoc(),
+                      "Invalid TSFlags bit in " + Inst.TheDef->getName());
   }
   OS << ", 0x";
   OS.write_hex(Value);
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index 049282e5ebfe..979af98f6768 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -1,9 +1,8 @@
 //===- IntrinsicEmitter.cpp - Generate intrinsic information --------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -220,7 +219,8 @@ enum IIT_Info {
   IIT_STRUCT6 = 38,
   IIT_STRUCT7 = 39,
   IIT_STRUCT8 = 40,
-  IIT_F128 = 41
+  IIT_F128 = 41,
+  IIT_VEC_ELEMENT = 42
 };
 
 static void EncodeFixedValueType(MVT::SimpleValueType VT,
@@ -259,10 +259,12 @@ static void EncodeFixedValueType(MVT::SimpleValueType VT,
 #endif
 
 static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
-                            std::vector<unsigned char> &Sig) {
+                            unsigned &NextArgCode,
+                            std::vector<unsigned char> &Sig,
+                            ArrayRef<unsigned char> Mapping) {
 
   if (R->isSubClassOf("LLVMMatchType")) {
-    unsigned Number = R->getValueAsInt("Number");
+    unsigned Number = Mapping[R->getValueAsInt("Number")];
     assert(Number < ArgCodes.size() && "Invalid matching number!");
     if (R->isSubClassOf("LLVMExtendedType"))
       Sig.push_back(IIT_EXTEND_ARG);
@@ -270,7 +272,7 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
       Sig.push_back(IIT_TRUNC_ARG);
     else if (R->isSubClassOf("LLVMHalfElementsVectorType"))
       Sig.push_back(IIT_HALF_VEC_ARG);
-    else if (R->isSubClassOf("LLVMVectorSameWidth")) {
+    else if (R->isSubClassOf("LLVMScalarOrSameVectorWidth")) {
       Sig.push_back(IIT_SAME_VEC_WIDTH_ARG);
       Sig.push_back((Number << 3) | ArgCodes[Number]);
       MVT::SimpleValueType VT = getValueType(R->getValueAsDef("ElTy"));
@@ -281,18 +283,18 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
       Sig.push_back(IIT_PTR_TO_ARG);
     else if (R->isSubClassOf("LLVMVectorOfAnyPointersToElt")) {
       Sig.push_back(IIT_VEC_OF_ANYPTRS_TO_ELT);
-      unsigned ArgNo = ArgCodes.size();
-      ArgCodes.push_back(3 /*vAny*/);
       // Encode overloaded ArgNo
-      Sig.push_back(ArgNo);
+      Sig.push_back(NextArgCode++);
       // Encode LLVMMatchType<Number> ArgNo
       Sig.push_back(Number);
       return;
     } else if (R->isSubClassOf("LLVMPointerToElt"))
       Sig.push_back(IIT_PTR_TO_ELT);
+    else if (R->isSubClassOf("LLVMVectorElementType"))
+      Sig.push_back(IIT_VEC_ELEMENT);
     else
       Sig.push_back(IIT_ARG);
-    return Sig.push_back((Number << 3) | ArgCodes[Number]);
+    return Sig.push_back((Number << 3) | 7 /*IITDescriptor::AK_MatchType*/);
   }
 
   MVT::SimpleValueType VT = getValueType(R->getValueAsDef("VT"));
@@ -310,8 +312,9 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
     Sig.push_back(IIT_ARG);
 
     // Figure out what arg # this is consuming, and remember what kind it was.
-    unsigned ArgNo = ArgCodes.size();
-    ArgCodes.push_back(Tmp);
+    assert(NextArgCode < ArgCodes.size() && ArgCodes[NextArgCode] == Tmp &&
+           "Invalid or no ArgCode associated with overloaded VT!");
+    unsigned ArgNo = NextArgCode++;
 
     // Encode what sort of argument it must be in the low 3 bits of the ArgNo.
     return Sig.push_back((ArgNo << 3) | Tmp);
@@ -329,7 +332,8 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
     } else {
       Sig.push_back(IIT_PTR);
     }
-    return EncodeFixedType(R->getValueAsDef("ElTy"), ArgCodes, Sig);
+    return EncodeFixedType(R->getValueAsDef("ElTy"), ArgCodes, NextArgCode, Sig,
+                           Mapping);
   }
   }
 
@@ -354,6 +358,45 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
   EncodeFixedValueType(VT, Sig);
 }
 
+static void UpdateArgCodes(Record *R, std::vector<unsigned char> &ArgCodes,
+                           unsigned int &NumInserted,
+                           SmallVectorImpl<unsigned char> &Mapping) {
+  if (R->isSubClassOf("LLVMMatchType")) {
+    if (R->isSubClassOf("LLVMVectorOfAnyPointersToElt")) {
+      ArgCodes.push_back(3 /*vAny*/);
+      ++NumInserted;
+    }
+    return;
+  }
+
+  unsigned Tmp = 0;
+  switch (getValueType(R->getValueAsDef("VT"))) {
+  default: break;
+  case MVT::iPTR:
+    UpdateArgCodes(R->getValueAsDef("ElTy"), ArgCodes, NumInserted, Mapping);
+    break;
+  case MVT::iPTRAny:
+    ++Tmp;
+    LLVM_FALLTHROUGH;
+  case MVT::vAny:
+    ++Tmp;
+    LLVM_FALLTHROUGH;
+  case MVT::fAny:
+    ++Tmp;
+    LLVM_FALLTHROUGH;
+  case MVT::iAny:
+    ++Tmp;
+    LLVM_FALLTHROUGH;
+  case MVT::Any:
+    unsigned OriginalIdx = ArgCodes.size() - NumInserted;
+    assert(OriginalIdx >= Mapping.size());
+    Mapping.resize(OriginalIdx+1);
+    Mapping[OriginalIdx] = ArgCodes.size();
+    ArgCodes.push_back(Tmp);
+    break;
+  }
+}
+
 #if defined(_MSC_VER) && !defined(__clang__)
 #pragma optimize("",on)
 #endif
@@ -364,6 +407,17 @@ static void ComputeFixedEncoding(const CodeGenIntrinsic &Int,
                                  std::vector<unsigned char> &TypeSig) {
   std::vector<unsigned char> ArgCodes;
 
+  // Add codes for any overloaded result VTs.
+  unsigned int NumInserted = 0;
+  SmallVector<unsigned char, 8> ArgMapping;
+  for (unsigned i = 0, e = Int.IS.RetVTs.size(); i != e; ++i)
+    UpdateArgCodes(Int.IS.RetTypeDefs[i], ArgCodes, NumInserted, ArgMapping);
+
+  // Add codes for any overloaded operand VTs.
+  for (unsigned i = 0, e = Int.IS.ParamTypeDefs.size(); i != e; ++i)
+    UpdateArgCodes(Int.IS.ParamTypeDefs[i], ArgCodes, NumInserted, ArgMapping);
+
+  unsigned NextArgCode = 0;
   if (Int.IS.RetVTs.empty())
     TypeSig.push_back(IIT_Done);
   else if (Int.IS.RetVTs.size() == 1 &&
@@ -383,11 +437,13 @@ static void ComputeFixedEncoding(const CodeGenIntrinsic &Int,
     }
 
     for (unsigned i = 0, e = Int.IS.RetVTs.size(); i != e; ++i)
-      EncodeFixedType(Int.IS.RetTypeDefs[i], ArgCodes, TypeSig);
+      EncodeFixedType(Int.IS.RetTypeDefs[i], ArgCodes, NextArgCode, TypeSig,
+                      ArgMapping);
   }
 
   for (unsigned i = 0, e = Int.IS.ParamTypeDefs.size(); i != e; ++i)
-    EncodeFixedType(Int.IS.ParamTypeDefs[i], ArgCodes, TypeSig);
+    EncodeFixedType(Int.IS.ParamTypeDefs[i], ArgCodes, NextArgCode, TypeSig,
+                    ArgMapping);
 }
 
 static void printIITEntry(raw_ostream &OS, unsigned char X) {
@@ -489,6 +545,9 @@ struct AttributeComparator {
     if (L->isNoReturn != R->isNoReturn)
       return R->isNoReturn;
 
+    if (L->isWillReturn != R->isWillReturn)
+      return R->isWillReturn;
+
     if (L->isCold != R->isCold)
       return R->isCold;
 
@@ -505,7 +564,6 @@ struct AttributeComparator {
     CodeGenIntrinsic::ModRefBehavior LK = L->ModRef;
     CodeGenIntrinsic::ModRefBehavior RK = R->ModRef;
     if (LK != RK) return (LK > RK);
-
     // Order by argument attributes.
     // This is reliable because each side is already sorted internally.
     return (L->ArgumentAttributes < R->ArgumentAttributes);
@@ -613,6 +671,12 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
             OS << "Attribute::ReadNone";
             addComma = true;
             break;
+          case CodeGenIntrinsic::ImmArg:
+            if (addComma)
+              OS << ',';
+            OS << "Attribute::ImmArg";
+            addComma = true;
+            break;
           }
 
           ++ai;
@@ -624,9 +688,10 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
     }
 
     if (!intrinsic.canThrow ||
-        intrinsic.ModRef != CodeGenIntrinsic::ReadWriteMem ||
-        intrinsic.isNoReturn || intrinsic.isCold || intrinsic.isNoDuplicate ||
-        intrinsic.isConvergent || intrinsic.isSpeculatable) {
+        (intrinsic.ModRef != CodeGenIntrinsic::ReadWriteMem && !intrinsic.hasSideEffects) ||
+        intrinsic.isNoReturn || intrinsic.isWillReturn || intrinsic.isCold ||
+        intrinsic.isNoDuplicate || intrinsic.isConvergent ||
+        intrinsic.isSpeculatable) {
       OS << "      const Attribute::AttrKind Atts[] = {";
       bool addComma = false;
       if (!intrinsic.canThrow) {
@@ -639,6 +704,12 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
         OS << "Attribute::NoReturn";
         addComma = true;
       }
+      if (intrinsic.isWillReturn) {
+        if (addComma)
+          OS << ",";
+        OS << "Attribute::WillReturn";
+        addComma = true;
+      }
       if (intrinsic.isCold) {
         if (addComma)
           OS << ",";
@@ -666,6 +737,8 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
 
       switch (intrinsic.ModRef) {
       case CodeGenIntrinsic::NoMem:
+        if (intrinsic.hasSideEffects)
+          break;
         if (addComma)
           OS << ",";
         OS << "Attribute::ReadNone";
@@ -771,8 +844,9 @@ void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
           BuiltinMap[Ints[i].TargetPrefix];
 
       if (!BIM.insert(std::make_pair(BuiltinName, Ints[i].EnumName)).second)
-        PrintFatalError("Intrinsic '" + Ints[i].TheDef->getName() +
-                        "': duplicate " + CompilerName + " builtin name!");
+        PrintFatalError(Ints[i].TheDef->getLoc(),
+                        "Intrinsic '" + Ints[i].TheDef->getName() +
+                            "': duplicate " + CompilerName + " builtin name!");
       Table.GetOrAddStringOffset(BuiltinName);
     }
   }
diff --git a/utils/TableGen/OptParserEmitter.cpp b/utils/TableGen/OptParserEmitter.cpp
index 0358cf26509b..51b1cb093b21 100644
--- a/utils/TableGen/OptParserEmitter.cpp
+++ b/utils/TableGen/OptParserEmitter.cpp
@@ -1,9 +1,8 @@
 //===- OptParserEmitter.cpp - Table Driven Command Line Parsing -----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/utils/TableGen/PredicateExpander.cpp b/utils/TableGen/PredicateExpander.cpp
index 2e01b7c3138e..9f7f40db2626 100644
--- a/utils/TableGen/PredicateExpander.cpp
+++ b/utils/TableGen/PredicateExpander.cpp
@@ -1,9 +1,8 @@
 //===--------------------- PredicateExpander.cpp --------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/utils/TableGen/PredicateExpander.h b/utils/TableGen/PredicateExpander.h
index 0f3ee6867e65..115a81cf123b 100644
--- a/utils/TableGen/PredicateExpander.h
+++ b/utils/TableGen/PredicateExpander.h
@@ -1,9 +1,8 @@
 //===--------------------- PredicateExpander.h ----------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
diff --git a/utils/TableGen/PseudoLoweringEmitter.cpp b/utils/TableGen/PseudoLoweringEmitter.cpp
index a363015730f3..3a80d8e5d1c4 100644
--- a/utils/TableGen/PseudoLoweringEmitter.cpp
+++ b/utils/TableGen/PseudoLoweringEmitter.cpp
@@ -1,9 +1,8 @@
 //===- PseudoLoweringEmitter.cpp - PseudoLowering Generator -----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/utils/TableGen/RISCVCompressInstEmitter.cpp b/utils/TableGen/RISCVCompressInstEmitter.cpp
index e03663b40f8a..e62f528ebc2e 100644
--- a/utils/TableGen/RISCVCompressInstEmitter.cpp
+++ b/utils/TableGen/RISCVCompressInstEmitter.cpp
@@ -1,9 +1,8 @@
 //===- RISCVCompressInstEmitter.cpp - Generator for RISCV Compression -===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 // RISCVCompressInstEmitter implements a tablegen-driven CompressPat based
 // RISCV Instruction Compression mechanism.
@@ -65,6 +64,7 @@
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
+#include <set>
 #include <vector>
 using namespace llvm;
 
@@ -253,12 +253,14 @@ static bool verifyDagOpCount(CodeGenInstruction &Inst, DagInit *Dag,
   // Source instructions are non compressed instructions and don't have tied
   // operands.
   if (IsSource)
-    PrintFatalError("Input operands for Inst '" + Inst.TheDef->getName() +
-                    "' and input Dag operand count mismatch");
+    PrintFatalError(Inst.TheDef->getLoc(),
+                    "Input operands for Inst '" + Inst.TheDef->getName() +
+                        "' and input Dag operand count mismatch");
   // The Dag can't have more arguments than the Instruction.
   if (Dag->getNumArgs() > Inst.Operands.size())
-    PrintFatalError("Inst '" + Inst.TheDef->getName() +
-                    "' and Dag operand count mismatch");
+    PrintFatalError(Inst.TheDef->getLoc(),
+                    "Inst '" + Inst.TheDef->getName() +
+                        "' and Dag operand count mismatch");
 
   // The Instruction might have tied operands so the Dag might have
   //  a fewer operand count.
@@ -268,8 +270,9 @@ static bool verifyDagOpCount(CodeGenInstruction &Inst, DagInit *Dag,
       --RealCount;
 
   if (Dag->getNumArgs() != RealCount)
-    PrintFatalError("Inst '" + Inst.TheDef->getName() +
-                    "' and Dag operand count mismatch");
+    PrintFatalError(Inst.TheDef->getLoc(),
+                    "Inst '" + Inst.TheDef->getName() +
+                        "' and Dag operand count mismatch");
   return true;
 }
 
@@ -472,7 +475,7 @@ void RISCVCompressInstEmitter::evaluateCompressPat(Record *Rec) {
                                          SourceOperandMap, DestOperandMap));
 }
 
-static void getReqFeatures(std::map<StringRef, int> &FeaturesMap,
+static void getReqFeatures(std::set<StringRef> &FeaturesSet,
                            const std::vector<Record *> &ReqFeatures) {
   for (auto &R : ReqFeatures) {
     StringRef AsmCondString = R->getValueAsString("AssemblerCondString");
@@ -481,11 +484,9 @@ static void getReqFeatures(std::map<StringRef, int> &FeaturesMap,
     SmallVector<StringRef, 4> Ops;
     SplitString(AsmCondString, Ops, ",");
     assert(!Ops.empty() && "AssemblerCondString cannot be empty");
-
     for (auto &Op : Ops) {
       assert(!Op.empty() && "Empty operator");
-      if (FeaturesMap.find(Op) == FeaturesMap.end())
-        FeaturesMap[Op] = FeaturesMap.size();
+      FeaturesSet.insert(Op);
     }
   }
 }
@@ -530,7 +531,8 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
                                                        bool Compress) {
   Record *AsmWriter = Target.getAsmWriter();
   if (!AsmWriter->getValueAsInt("PassSubtarget"))
-    PrintFatalError("'PassSubtarget' is false. SubTargetInfo object is needed "
+    PrintFatalError(AsmWriter->getLoc(),
+                    "'PassSubtarget' is false. SubTargetInfo object is needed "
                     "for target features.\n");
 
   std::string Namespace = Target.getName();
@@ -540,15 +542,15 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
   // transformed to a C_ADD or a C_MV. When emitting 'uncompress()' function the
   // source and destination are flipped and the sort key needs to change
   // accordingly.
-  std::stable_sort(CompressPatterns.begin(), CompressPatterns.end(),
-                   [Compress](const CompressPat &LHS, const CompressPat &RHS) {
-                     if (Compress)
-                       return (LHS.Source.TheDef->getName().str() <
-                               RHS.Source.TheDef->getName().str());
-                     else
-                       return (LHS.Dest.TheDef->getName().str() <
-                               RHS.Dest.TheDef->getName().str());
-                   });
+  llvm::stable_sort(CompressPatterns,
+                    [Compress](const CompressPat &LHS, const CompressPat &RHS) {
+                      if (Compress)
+                        return (LHS.Source.TheDef->getName().str() <
+                                RHS.Source.TheDef->getName().str());
+                      else
+                        return (LHS.Dest.TheDef->getName().str() <
+                                RHS.Dest.TheDef->getName().str());
+                    });
 
   // A list of MCOperandPredicates for all operands in use, and the reverse map.
   std::vector<const Record *> MCOpPredicates;
@@ -617,9 +619,9 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
       CaseStream.indent(4) << "case " + Namespace + "::" + CurOp + ": {\n";
     }
 
-    std::map<StringRef, int> FeaturesMap;
+    std::set<StringRef> FeaturesSet;
     // Add CompressPat required features.
-    getReqFeatures(FeaturesMap, CompressPat.PatReqFeatures);
+    getReqFeatures(FeaturesSet, CompressPat.PatReqFeatures);
 
     // Add Dest instruction required features.
     std::vector<Record *> ReqFeatures;
@@ -627,11 +629,10 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
     copy_if(RF, std::back_inserter(ReqFeatures), [](Record *R) {
       return R->getValueAsBit("AssemblerMatcherPredicate");
     });
-    getReqFeatures(FeaturesMap, ReqFeatures);
+    getReqFeatures(FeaturesSet, ReqFeatures);
 
     // Emit checks for all required features.
-    for (auto &F : FeaturesMap) {
-      StringRef Op = F.first;
+    for (auto &Op : FeaturesSet) {
       if (Op[0] == '!')
         CondStream.indent(6) << ("!STI.getFeatureBits()[" + Namespace +
                                  "::" + Op.substr(1) + "]")
diff --git a/utils/TableGen/RegisterBankEmitter.cpp b/utils/TableGen/RegisterBankEmitter.cpp
index 879b4162d629..7f6b3931d3de 100644
--- a/utils/TableGen/RegisterBankEmitter.cpp
+++ b/utils/TableGen/RegisterBankEmitter.cpp
@@ -1,9 +1,8 @@
 //===- RegisterBankEmitter.cpp - Generate a Register Bank Desc. -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index ded54c828bcd..1b619072c814 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -1,9 +1,8 @@
 //===- RegisterInfoEmitter.cpp - Generate a Register File Desc. -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/utils/TableGen/SDNodeProperties.cpp b/utils/TableGen/SDNodeProperties.cpp
index 343febc99d1e..1843a78aa3cf 100644
--- a/utils/TableGen/SDNodeProperties.cpp
+++ b/utils/TableGen/SDNodeProperties.cpp
@@ -1,9 +1,8 @@
 //===- SDNodeProperties.cpp -----------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -39,9 +38,9 @@ unsigned llvm::parseSDPatternOperatorProperties(Record *R) {
     } else if (Property->getName() == "SDNPVariadic") {
       Properties |= 1 << SDNPVariadic;
     } else {
-      PrintFatalError("Unknown SD Node property '" +
-                      Property->getName() + "' on node '" +
-                      R->getName() + "'!");
+      PrintFatalError(R->getLoc(), "Unknown SD Node property '" +
+                                       Property->getName() + "' on node '" +
+                                       R->getName() + "'!");
     }
   }
 
diff --git a/utils/TableGen/SDNodeProperties.h b/utils/TableGen/SDNodeProperties.h
index a8d4efb5dab0..66a04e63150c 100644
--- a/utils/TableGen/SDNodeProperties.h
+++ b/utils/TableGen/SDNodeProperties.h
@@ -1,9 +1,8 @@
 //===- SDNodeProperties.h ---------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/utils/TableGen/SearchableTableEmitter.cpp b/utils/TableGen/SearchableTableEmitter.cpp
index f98a7c74bf0c..954b63e7253c 100644
--- a/utils/TableGen/SearchableTableEmitter.cpp
+++ b/utils/TableGen/SearchableTableEmitter.cpp
@@ -1,9 +1,8 @@
 //===- SearchableTableEmitter.cpp - Generate efficiently searchable tables -==//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -142,7 +141,7 @@ private:
   bool compareBy(Record *LHS, Record *RHS, const SearchIndex &Index);
 
   bool isIntegral(Init *I) {
-    return isa<BitsInit>(I) || isIntrinsic(I);
+    return isa<BitsInit>(I) || isa<CodeInit>(I) || isIntrinsic(I);
   }
 
   std::string searchableFieldType(const GenericField &Field, TypeContext Ctx) {
@@ -600,9 +599,10 @@ void SearchableTableEmitter::collectTableEntries(
     for (auto &Field : Table.Fields) {
       auto TI = dyn_cast<TypedInit>(EntryRec->getValueInit(Field.Name));
       if (!TI) {
-        PrintFatalError(Twine("Record '") + EntryRec->getName() +
-                        "' in table '" + Table.Name + "' is missing field '" +
-                        Field.Name + "'");
+        PrintFatalError(EntryRec->getLoc(),
+                        Twine("Record '") + EntryRec->getName() +
+                            "' in table '" + Table.Name +
+                            "' is missing field '" + Field.Name + "'");
       }
       if (!Field.RecType) {
         Field.RecType = TI->getType();
@@ -611,7 +611,7 @@ void SearchableTableEmitter::collectTableEntries(
         if (!Ty)
           PrintFatalError(Twine("Field '") + Field.Name + "' of table '" +
                           Table.Name + "' has incompatible type: " +
-                          Ty->getAsString() + " vs. " +
+                          Field.RecType->getAsString() + " vs. " +
                           TI->getType()->getAsString());
         Field.RecType = Ty;
       }
@@ -654,8 +654,8 @@ void SearchableTableEmitter::run(raw_ostream &OS) {
     StringRef FilterClass = EnumRec->getValueAsString("FilterClass");
     Enum->Class = Records.getClass(FilterClass);
     if (!Enum->Class)
-      PrintFatalError(Twine("Enum FilterClass '") + FilterClass +
-                      "' does not exist");
+      PrintFatalError(EnumRec->getLoc(), Twine("Enum FilterClass '") +
+                                             FilterClass + "' does not exist");
 
     collectEnumEntries(*Enum, NameField, ValueField,
                        Records.getAllDerivedDefinitions(FilterClass));
@@ -675,9 +675,10 @@ void SearchableTableEmitter::run(raw_ostream &OS) {
 
       if (auto TypeOfVal = TableRec->getValue(("TypeOf_" + FieldName).str())) {
         if (!parseFieldType(Table->Fields.back(), TypeOfVal->getValue())) {
-          PrintFatalError(Twine("Table '") + Table->Name +
-                          "' has bad 'TypeOf_" + FieldName + "': " +
-                          TypeOfVal->getValue()->getAsString());
+          PrintFatalError(TableRec->getLoc(),
+                          Twine("Table '") + Table->Name +
+                              "' has bad 'TypeOf_" + FieldName +
+                              "': " + TypeOfVal->getValue()->getAsString());
         }
       }
     }
@@ -705,8 +706,10 @@ void SearchableTableEmitter::run(raw_ostream &OS) {
     Record *TableRec = IndexRec->getValueAsDef("Table");
     auto It = TableMap.find(TableRec);
     if (It == TableMap.end())
-      PrintFatalError(Twine("SearchIndex '") + IndexRec->getName() +
-                      "' refers to non-existing table '" + TableRec->getName());
+      PrintFatalError(IndexRec->getLoc(),
+                      Twine("SearchIndex '") + IndexRec->getName() +
+                          "' refers to non-existing table '" +
+                          TableRec->getName());
 
     GenericTable &Table = *It->second;
     Table.Indices.push_back(parseSearchIndex(
diff --git a/utils/TableGen/SequenceToOffsetTable.h b/utils/TableGen/SequenceToOffsetTable.h
index 2b8f66a3bf3e..8a826eff311d 100644
--- a/utils/TableGen/SequenceToOffsetTable.h
+++ b/utils/TableGen/SequenceToOffsetTable.h
@@ -1,9 +1,8 @@
 //===-- SequenceToOffsetTable.h - Compress similar sequences ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index 731c14bdb9a0..9ce2b3b275c8 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -1,9 +1,8 @@
 //===- SubtargetEmitter.cpp - Generate subtarget enumerations -------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -74,9 +73,11 @@ class SubtargetEmitter {
   CodeGenSchedModels &SchedModels;
   std::string Target;
 
-  void Enumeration(raw_ostream &OS);
-  unsigned FeatureKeyValues(raw_ostream &OS);
-  unsigned CPUKeyValues(raw_ostream &OS);
+  void Enumeration(raw_ostream &OS, DenseMap<Record *, unsigned> &FeatureMap);
+  unsigned FeatureKeyValues(raw_ostream &OS,
+                            const DenseMap<Record *, unsigned> &FeatureMap);
+  unsigned CPUKeyValues(raw_ostream &OS,
+                        const DenseMap<Record *, unsigned> &FeatureMap);
   void FormItineraryStageString(const std::string &Names,
                                 Record *ItinData, std::string &ItinString,
                                 unsigned &NStages);
@@ -138,7 +139,8 @@ public:
 //
 // Enumeration - Emit the specified class as an enumeration.
 //
-void SubtargetEmitter::Enumeration(raw_ostream &OS) {
+void SubtargetEmitter::Enumeration(raw_ostream &OS,
+                                   DenseMap<Record *, unsigned> &FeatureMap) {
   // Get all records of class and sort
   std::vector<Record*> DefList =
     Records.getAllDerivedDefinitions("SubtargetFeature");
@@ -147,7 +149,7 @@ void SubtargetEmitter::Enumeration(raw_ostream &OS) {
   unsigned N = DefList.size();
   if (N == 0)
     return;
-  if (N > MAX_SUBTARGET_FEATURES)
+  if (N + 1 > MAX_SUBTARGET_FEATURES)
     PrintFatalError("Too many subtarget features! Bump MAX_SUBTARGET_FEATURES.");
 
   OS << "namespace " << Target << " {\n";
@@ -162,18 +164,42 @@ void SubtargetEmitter::Enumeration(raw_ostream &OS) {
 
     // Get and emit name
     OS << "  " << Def->getName() << " = " << i << ",\n";
+
+    // Save the index for this feature.
+    FeatureMap[Def] = i;
   }
 
+  OS << "  "
+     << "NumSubtargetFeatures = " << N << "\n";
+
   // Close enumeration and namespace
   OS << "};\n";
   OS << "} // end namespace " << Target << "\n";
 }
 
+static void printFeatureMask(raw_ostream &OS, RecVec &FeatureList,
+                             const DenseMap<Record *, unsigned> &FeatureMap) {
+  std::array<uint64_t, MAX_SUBTARGET_WORDS> Mask = {};
+  for (unsigned j = 0, M = FeatureList.size(); j < M; ++j) {
+    unsigned Bit = FeatureMap.lookup(FeatureList[j]);
+    Mask[Bit / 64] |= 1ULL << (Bit % 64);
+  }
+
+  OS << "{ { { ";
+  for (unsigned i = 0; i != Mask.size(); ++i) {
+    OS << "0x";
+    OS.write_hex(Mask[i]);
+    OS << "ULL, ";
+  }
+  OS << "} } }";
+}
+
 //
 // FeatureKeyValues - Emit data of all the subtarget features.  Used by the
 // command line.
 //
-unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
+unsigned SubtargetEmitter::FeatureKeyValues(
+    raw_ostream &OS, const DenseMap<Record *, unsigned> &FeatureMap) {
   // Gather and sort all the features
   std::vector<Record*> FeatureList =
                            Records.getAllDerivedDefinitions("SubtargetFeature");
@@ -204,16 +230,13 @@ unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
     OS << "  { "
        << "\"" << CommandLineName << "\", "
        << "\"" << Desc << "\", "
-       << "{ " << Target << "::" << Name << " }, ";
+       << Target << "::" << Name << ", ";
 
     RecVec ImpliesList = Feature->getValueAsListOfDefs("Implies");
 
-    OS << "{";
-    for (unsigned j = 0, M = ImpliesList.size(); j < M;) {
-      OS << " " << Target << "::" << ImpliesList[j]->getName();
-      if (++j < M) OS << ",";
-    }
-    OS << " } },\n";
+    printFeatureMask(OS, ImpliesList, FeatureMap);
+
+    OS << " },\n";
     ++NumFeatures;
   }
 
@@ -227,7 +250,9 @@ unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
 // CPUKeyValues - Emit data of all the subtarget processors.  Used by command
 // line.
 //
-unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
+unsigned
+SubtargetEmitter::CPUKeyValues(raw_ostream &OS,
+                               const DenseMap<Record *, unsigned> &FeatureMap) {
   // Gather and sort processor information
   std::vector<Record*> ProcessorList =
                           Records.getAllDerivedDefinitions("Processor");
@@ -235,7 +260,7 @@ unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
 
   // Begin processor table
   OS << "// Sorted (by key) array of values for CPU subtype.\n"
-     << "extern const llvm::SubtargetFeatureKV " << Target
+     << "extern const llvm::SubtargetSubTypeKV " << Target
      << "SubTypeKV[] = {\n";
 
   // For each processor
@@ -243,18 +268,16 @@ unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
     StringRef Name = Processor->getValueAsString("Name");
     RecVec FeatureList = Processor->getValueAsListOfDefs("Features");
 
-    // Emit as { "cpu", "description", { f1 , f2 , ... fn } },
-    OS << "  { "
-       << "\"" << Name << "\", "
-       << "\"Select the " << Name << " processor\", ";
+    // Emit as { "cpu", "description", 0, { f1 , f2 , ... fn } },
+    OS << " { "
+       << "\"" << Name << "\", ";
 
-    OS << "{";
-    for (unsigned j = 0, M = FeatureList.size(); j < M;) {
-      OS << " " << Target << "::" << FeatureList[j]->getName();
-      if (++j < M) OS << ",";
-    }
-    // The { } is for the "implies" section of this data structure.
-    OS << " }, { } },\n";
+    printFeatureMask(OS, FeatureList, FeatureMap);
+
+    // Emit the scheduler model pointer.
+    const std::string &ProcModelName =
+      SchedModels.getModelForProc(Processor).ModelName;
+    OS << ", &" << ProcModelName << " },\n";
   }
 
   // End processor table
@@ -1368,33 +1391,6 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
   }
 }
 
-//
-// EmitProcessorLookup - generate cpu name to sched model lookup tables.
-//
-void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) {
-  // Gather and sort processor information
-  std::vector<Record*> ProcessorList =
-                          Records.getAllDerivedDefinitions("Processor");
-  llvm::sort(ProcessorList, LessRecordFieldName());
-
-  // Begin processor->sched model table
-  OS << "\n";
-  OS << "// Sorted (by key) array of sched model for CPU subtype.\n"
-     << "extern const llvm::SubtargetInfoKV " << Target
-     << "ProcSchedKV[] = {\n";
-  // For each processor
-  for (Record *Processor : ProcessorList) {
-    StringRef Name = Processor->getValueAsString("Name");
-    const std::string &ProcModelName =
-      SchedModels.getModelForProc(Processor).ModelName;
-
-    // Emit as { "cpu", procinit },
-    OS << "  { \"" << Name << "\", (const void *)&" << ProcModelName << " },\n";
-  }
-  // End processor->sched model table
-  OS << "};\n";
-}
-
 //
 // EmitSchedModel - Emits all scheduling model tables, folding common patterns.
 //
@@ -1423,12 +1419,10 @@ void SubtargetEmitter::EmitSchedModel(raw_ostream &OS) {
   }
   EmitSchedClassTables(SchedTables, OS);
 
+  OS << "\n#undef DBGFIELD\n";
+
   // Emit the processor machine model
   EmitProcessorModels(OS);
-  // Emit the processor lookup data
-  EmitProcessorLookup(OS);
-
-  OS << "\n#undef DBGFIELD";
 }
 
 static void emitPredicateProlog(const RecordKeeper &Records, raw_ostream &OS) {
@@ -1740,13 +1734,12 @@ void SubtargetEmitter::emitGenMCSubtargetInfo(raw_ostream &OS) {
      << "GenMCSubtargetInfo : public MCSubtargetInfo {\n";
   OS << "  " << Target << "GenMCSubtargetInfo(const Triple &TT, \n"
      << "    StringRef CPU, StringRef FS, ArrayRef<SubtargetFeatureKV> PF,\n"
-     << "    ArrayRef<SubtargetFeatureKV> PD,\n"
-     << "    const SubtargetInfoKV *ProcSched,\n"
+     << "    ArrayRef<SubtargetSubTypeKV> PD,\n"
      << "    const MCWriteProcResEntry *WPR,\n"
      << "    const MCWriteLatencyEntry *WL,\n"
      << "    const MCReadAdvanceEntry *RA, const InstrStage *IS,\n"
      << "    const unsigned *OC, const unsigned *FP) :\n"
-     << "      MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched,\n"
+     << "      MCSubtargetInfo(TT, CPU, FS, PF, PD,\n"
      << "                      WPR, WL, RA, IS, OC, FP) { }\n\n"
      << "  unsigned resolveVariantSchedClass(unsigned SchedClass,\n"
      << "      const MCInst *MI, unsigned CPUID) const override {\n"
@@ -1790,8 +1783,10 @@ void SubtargetEmitter::run(raw_ostream &OS) {
   OS << "\n#ifdef GET_SUBTARGETINFO_ENUM\n";
   OS << "#undef GET_SUBTARGETINFO_ENUM\n\n";
 
+  DenseMap<Record *, unsigned> FeatureMap;
+
   OS << "namespace llvm {\n";
-  Enumeration(OS);
+  Enumeration(OS, FeatureMap);
   OS << "} // end namespace llvm\n\n";
   OS << "#endif // GET_SUBTARGETINFO_ENUM\n\n";
 
@@ -1802,12 +1797,12 @@ void SubtargetEmitter::run(raw_ostream &OS) {
 #if 0
   OS << "namespace {\n";
 #endif
-  unsigned NumFeatures = FeatureKeyValues(OS);
-  OS << "\n";
-  unsigned NumProcs = CPUKeyValues(OS);
+  unsigned NumFeatures = FeatureKeyValues(OS, FeatureMap);
   OS << "\n";
   EmitSchedModel(OS);
   OS << "\n";
+  unsigned NumProcs = CPUKeyValues(OS, FeatureMap);
+  OS << "\n";
 #if 0
   OS << "} // end anonymous namespace\n\n";
 #endif
@@ -1828,8 +1823,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
   else
     OS << "None, ";
   OS << '\n'; OS.indent(22);
-  OS << Target << "ProcSchedKV, "
-     << Target << "WriteProcResTable, "
+  OS << Target << "WriteProcResTable, "
      << Target << "WriteLatencyTable, "
      << Target << "ReadAdvanceTable, ";
   OS << '\n'; OS.indent(22);
@@ -1895,8 +1889,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
   OS << "#include \"llvm/CodeGen/TargetSchedule.h\"\n\n";
   OS << "namespace llvm {\n";
   OS << "extern const llvm::SubtargetFeatureKV " << Target << "FeatureKV[];\n";
-  OS << "extern const llvm::SubtargetFeatureKV " << Target << "SubTypeKV[];\n";
-  OS << "extern const llvm::SubtargetInfoKV " << Target << "ProcSchedKV[];\n";
+  OS << "extern const llvm::SubtargetSubTypeKV " << Target << "SubTypeKV[];\n";
   OS << "extern const llvm::MCWriteProcResEntry "
      << Target << "WriteProcResTable[];\n";
   OS << "extern const llvm::MCWriteLatencyEntry "
@@ -1922,8 +1915,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
   else
     OS << "None, ";
   OS << '\n'; OS.indent(24);
-  OS << Target << "ProcSchedKV, "
-     << Target << "WriteProcResTable, "
+  OS << Target << "WriteProcResTable, "
      << Target << "WriteLatencyTable, "
      << Target << "ReadAdvanceTable, ";
   OS << '\n'; OS.indent(24);
diff --git a/utils/TableGen/SubtargetFeatureInfo.cpp b/utils/TableGen/SubtargetFeatureInfo.cpp
index f9b8853cc117..edf0b4a01c6d 100644
--- a/utils/TableGen/SubtargetFeatureInfo.cpp
+++ b/utils/TableGen/SubtargetFeatureInfo.cpp
@@ -1,9 +1,8 @@
 //===- SubtargetFeatureInfo.cpp - Helpers for subtarget features ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -45,20 +44,6 @@ SubtargetFeatureInfo::getAll(const RecordKeeper &Records) {
   return SubtargetFeatures;
 }
 
-void SubtargetFeatureInfo::emitSubtargetFeatureFlagEnumeration(
-    SubtargetFeatureInfoMap &SubtargetFeatures, raw_ostream &OS) {
-  OS << "// Flags for subtarget features that participate in "
-     << "instruction matching.\n";
-  OS << "enum SubtargetFeatureFlag : "
-     << getMinimalTypeForEnumBitfield(SubtargetFeatures.size()) << " {\n";
-  for (const auto &SF : SubtargetFeatures) {
-    const SubtargetFeatureInfo &SFI = SF.second;
-    OS << "  " << SFI.getEnumName() << " = (1ULL << " << SFI.Index << "),\n";
-  }
-  OS << "  Feature_None = 0\n";
-  OS << "};\n\n";
-}
-
 void SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration(
     SubtargetFeatureInfoMap &SubtargetFeatures, raw_ostream &OS) {
   OS << "// Bits for subtarget features that participate in "
@@ -121,9 +106,9 @@ void SubtargetFeatureInfo::emitComputeAvailableFeatures(
 void SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures(
     StringRef TargetName, StringRef ClassName, StringRef FuncName,
     SubtargetFeatureInfoMap &SubtargetFeatures, raw_ostream &OS) {
-  OS << "uint64_t " << TargetName << ClassName << "::\n"
+  OS << "FeatureBitset " << TargetName << ClassName << "::\n"
      << FuncName << "(const FeatureBitset& FB) const {\n";
-  OS << "  uint64_t Features = 0;\n";
+  OS << "  FeatureBitset Features;\n";
   for (const auto &SF : SubtargetFeatures) {
     const SubtargetFeatureInfo &SFI = SF.second;
 
@@ -157,7 +142,7 @@ void SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures(
     } while (true);
 
     OS << ")\n";
-    OS << "    Features |= " << SFI.getEnumName() << ";\n";
+    OS << "    Features[" << SFI.getEnumBitName() << "] = 1;\n";
   }
   OS << "  return Features;\n";
   OS << "}\n\n";
diff --git a/utils/TableGen/SubtargetFeatureInfo.h b/utils/TableGen/SubtargetFeatureInfo.h
index 71e6748c863f..d72f8b93461f 100644
--- a/utils/TableGen/SubtargetFeatureInfo.h
+++ b/utils/TableGen/SubtargetFeatureInfo.h
@@ -1,9 +1,8 @@
-//===- SubtargetFeatureInfo.h - Helpers for subtarget features ------------===//
+//===- SubtargetFeatureInfo.h - Helpers for subtarget features --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -54,13 +53,6 @@ struct SubtargetFeatureInfo {
   static std::vector<std::pair<Record *, SubtargetFeatureInfo>>
   getAll(const RecordKeeper &Records);
 
-  /// Emit the subtarget feature flag definitions.
-  ///
-  /// This version emits the bit value for the feature and is therefore limited
-  /// to 64 feature bits.
-  static void emitSubtargetFeatureFlagEnumeration(
-      SubtargetFeatureInfoMap &SubtargetFeatures, raw_ostream &OS);
-
   /// Emit the subtarget feature flag definitions.
   ///
   /// This version emits the bit index for the feature and can therefore support
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index d5b6a3c12647..c485ed2feb7a 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -1,9 +1,8 @@
 //===- TableGen.cpp - Top-Level TableGen implementation for LLVM ----------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -56,6 +55,12 @@ enum ActionType {
   GenExegesis,
 };
 
+namespace llvm {
+/// Storage for TimeRegionsOpt as a global so that backends aren't required to
+/// include CommandLine.h
+bool TimeRegions = false;
+} // end namespace llvm
+
 namespace {
   cl::opt<ActionType>
   Action(cl::desc("Action to perform:"),
@@ -127,6 +132,11 @@ namespace {
   Class("class", cl::desc("Print Enum list for this class"),
         cl::value_desc("class name"), cl::cat(PrintEnumsCat));
 
+cl::opt<bool, true>
+    TimeRegionsOpt("time-regions",
+                   cl::desc("Time regions of tablegens execution"),
+                   cl::location(TimeRegions));
+
 bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   switch (Action) {
   case PrintRecords:
diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h
index f4f2909f8e88..135ec65c0f95 100644
--- a/utils/TableGen/TableGenBackends.h
+++ b/utils/TableGen/TableGenBackends.h
@@ -1,9 +1,8 @@
 //===- TableGenBackends.h - Declarations for LLVM TableGen Backends -------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/utils/TableGen/Types.cpp b/utils/TableGen/Types.cpp
index 04d9e40f6743..a6682da90e6b 100644
--- a/utils/TableGen/Types.cpp
+++ b/utils/TableGen/Types.cpp
@@ -1,9 +1,8 @@
 //===- Types.cpp - Helper for the selection of C++ data types. ------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/utils/TableGen/Types.h b/utils/TableGen/Types.h
index d511b7eae6e8..17c7742ccaac 100644
--- a/utils/TableGen/Types.h
+++ b/utils/TableGen/Types.h
@@ -1,9 +1,8 @@
 //===- Types.h - Helper for the selection of C++ types. ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp b/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
index 788f142e125f..365cba5a60ca 100644
--- a/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
+++ b/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
@@ -1,9 +1,8 @@
 //===- WebAssemblyDisassemblerEmitter.cpp - Disassembler tables -*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -51,12 +50,33 @@ void emitWebAssemblyDisassemblerTables(
     auto IsStackBased =
         StackString &&
         reinterpret_cast<const StringInit *>(StackString)->getValue() == "true";
-    if (IsStackBased && !CGIP.second) {
-      // this picks the first of many typed variants, which is
-      // currently the except_ref one, though this shouldn't matter for
-      // disassembly purposes.
-      CGIP = std::make_pair(I, &CGI);
+    if (!IsStackBased)
+      continue;
+    if (CGIP.second) {
+      // We already have an instruction for this slot, so decide which one
+      // should be the canonical one. This determines which variant gets
+      // printed in a disassembly. We want e.g. "call" not "i32.call", and
+      // "end" when we don't know if its "end_loop" or "end_block" etc.
+      auto IsCanonicalExisting = CGIP.second->TheDef->getValue("IsCanonical")
+                                     ->getValue()
+                                     ->getAsString() == "1";
+      // We already have one marked explicitly as canonical, so keep it.
+      if (IsCanonicalExisting)
+        continue;
+      auto IsCanonicalNew =
+          Def.getValue("IsCanonical")->getValue()->getAsString() == "1";
+      // If the new one is explicitly marked as canonical, take it.
+      if (!IsCanonicalNew) {
+        // Neither the existing or new instruction is canonical.
+        // Pick the one with the shortest name as heuristic.
+        // Though ideally IsCanonical is always defined for at least one
+        // variant so this never has to apply.
+        if (CGIP.second->AsmString.size() <= CGI.AsmString.size())
+          continue;
+      }
     }
+    // Set this instruction as the one to use.
+    CGIP = std::make_pair(I, &CGI);
   }
   OS << "#include \"MCTargetDesc/WebAssemblyMCTargetDesc.h\"\n";
   OS << "\n";
diff --git a/utils/TableGen/WebAssemblyDisassemblerEmitter.h b/utils/TableGen/WebAssemblyDisassemblerEmitter.h
index 91f820f120a2..60d3d9433eca 100644
--- a/utils/TableGen/WebAssemblyDisassemblerEmitter.h
+++ b/utils/TableGen/WebAssemblyDisassemblerEmitter.h
@@ -1,9 +1,8 @@
 //===- WebAssemblyDisassemblerEmitter.h - Disassembler tables ---*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/utils/TableGen/X86DisassemblerShared.h b/utils/TableGen/X86DisassemblerShared.h
index 220765f72410..093f220fda5e 100644
--- a/utils/TableGen/X86DisassemblerShared.h
+++ b/utils/TableGen/X86DisassemblerShared.h
@@ -1,9 +1,8 @@
 //===- X86DisassemblerShared.h - Emitter shared header ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp
index 2b5cc1279605..8036aecc4f4b 100644
--- a/utils/TableGen/X86DisassemblerTables.cpp
+++ b/utils/TableGen/X86DisassemblerTables.cpp
@@ -1,9 +1,8 @@
 //===- X86DisassemblerTables.cpp - Disassembler tables ----------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -889,67 +888,44 @@ void DisassemblerTables::emitInstructionInfo(raw_ostream &o,
 }
 
 void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const {
-  const unsigned int tableSize = 16384;
   o.indent(i * 2) << "static const uint8_t " CONTEXTS_STR
-                     "[" << tableSize << "] = {\n";
+                     "[" << ATTR_max << "] = {\n";
   i++;
 
-  for (unsigned index = 0; index < tableSize; ++index) {
+  for (unsigned index = 0; index < ATTR_max; ++index) {
     o.indent(i * 2);
 
-    if (index & ATTR_EVEX) {
-      o << "IC_EVEX";
-      if (index & ATTR_EVEXL2)
+    if ((index & ATTR_EVEX) || (index & ATTR_VEX) || (index & ATTR_VEXL)) {
+      if (index & ATTR_EVEX)
+        o << "IC_EVEX";
+      else
+        o << "IC_VEX";
+
+      if ((index & ATTR_EVEX) && (index & ATTR_EVEXL2))
         o << "_L2";
-      else if (index & ATTR_EVEXL)
+      else if (index & ATTR_VEXL)
         o << "_L";
+
       if (index & ATTR_REXW)
         o << "_W";
+
       if (index & ATTR_OPSIZE)
         o << "_OPSIZE";
       else if (index & ATTR_XD)
         o << "_XD";
       else if (index & ATTR_XS)
         o << "_XS";
-      if (index & ATTR_EVEXKZ)
-        o << "_KZ";
-      else if (index & ATTR_EVEXK)
-        o << "_K";
-      if (index & ATTR_EVEXB)
-        o << "_B";
+
+      if ((index & ATTR_EVEX)) {
+        if (index & ATTR_EVEXKZ)
+          o << "_KZ";
+        else if (index & ATTR_EVEXK)
+          o << "_K";
+
+        if (index & ATTR_EVEXB)
+          o << "_B";
+      }
     }
-    else if ((index & ATTR_VEXL) && (index & ATTR_REXW) && (index & ATTR_OPSIZE))
-      o << "IC_VEX_L_W_OPSIZE";
-    else if ((index & ATTR_VEXL) && (index & ATTR_REXW) && (index & ATTR_XD))
-      o << "IC_VEX_L_W_XD";
-    else if ((index & ATTR_VEXL) && (index & ATTR_REXW) && (index & ATTR_XS))
-      o << "IC_VEX_L_W_XS";
-    else if ((index & ATTR_VEXL) && (index & ATTR_REXW))
-      o << "IC_VEX_L_W";
-    else if ((index & ATTR_VEXL) && (index & ATTR_OPSIZE))
-      o << "IC_VEX_L_OPSIZE";
-    else if ((index & ATTR_VEXL) && (index & ATTR_XD))
-      o << "IC_VEX_L_XD";
-    else if ((index & ATTR_VEXL) && (index & ATTR_XS))
-      o << "IC_VEX_L_XS";
-    else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_OPSIZE))
-      o << "IC_VEX_W_OPSIZE";
-    else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_XD))
-      o << "IC_VEX_W_XD";
-    else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_XS))
-      o << "IC_VEX_W_XS";
-    else if (index & ATTR_VEXL)
-      o << "IC_VEX_L";
-    else if ((index & ATTR_VEX) && (index & ATTR_REXW))
-      o << "IC_VEX_W";
-    else if ((index & ATTR_VEX) && (index & ATTR_OPSIZE))
-      o << "IC_VEX_OPSIZE";
-    else if ((index & ATTR_VEX) && (index & ATTR_XD))
-      o << "IC_VEX_XD";
-    else if ((index & ATTR_VEX) && (index & ATTR_XS))
-      o << "IC_VEX_XS";
-    else if (index & ATTR_VEX)
-      o << "IC_VEX";
     else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS))
       o << "IC_64BIT_REXW_XS";
     else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD))
@@ -1004,12 +980,7 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const {
     else
       o << "IC";
 
-    if (index < tableSize - 1)
-      o << ",";
-    else
-      o << " ";
-
-    o << " /* " << index << " */";
+    o << ", /* " << index << " */";
 
     o << "\n";
   }
diff --git a/utils/TableGen/X86DisassemblerTables.h b/utils/TableGen/X86DisassemblerTables.h
index b0ea9c2e8625..63af68b6fbfa 100644
--- a/utils/TableGen/X86DisassemblerTables.h
+++ b/utils/TableGen/X86DisassemblerTables.h
@@ -1,9 +1,8 @@
 //===- X86DisassemblerTables.h - Disassembler tables ------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
index d5dc10ecad25..3df14f40e4a9 100644
--- a/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
+++ b/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
@@ -1,9 +1,8 @@
 //===- utils/TableGen/X86EVEX2VEXTablesEmitter.cpp - X86 backend-*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 ///
@@ -69,23 +68,6 @@ void X86EVEX2VEXTablesEmitter::printTable(const std::vector<Entry> &Table,
 }
 
 // Return true if the 2 BitsInits are equal
-static inline bool equalBitsInits(const BitsInit *B1, const BitsInit *B2) {
-  if (B1->getNumBits() != B2->getNumBits())
-    PrintFatalError("Comparing two BitsInits with different sizes!");
-
-  for (unsigned i = 0, e = B1->getNumBits(); i != e; ++i) {
-    if (BitInit *Bit1 = dyn_cast<BitInit>(B1->getBit(i))) {
-      if (BitInit *Bit2 = dyn_cast<BitInit>(B2->getBit(i))) {
-        if (Bit1->getValue() != Bit2->getValue())
-          return false;
-      } else
-        PrintFatalError("Invalid BitsInit bit");
-    } else
-      PrintFatalError("Invalid BitsInit bit");
-  }
-  return true;
-}
-
 // Calculates the integer value residing BitsInit object
 static inline uint64_t getValueFromBitsInit(const BitsInit *B) {
   uint64_t Value = 0;
@@ -109,26 +91,25 @@ public:
   bool operator()(const CodeGenInstruction *VEXInst) {
     Record *RecE = EVEXInst->TheDef;
     Record *RecV = VEXInst->TheDef;
-    uint64_t EVEX_W =
-        getValueFromBitsInit(RecE->getValueAsBitsInit("VEX_WPrefix"));
-    uint64_t VEX_W =
-        getValueFromBitsInit(RecV->getValueAsBitsInit("VEX_WPrefix"));
+    bool EVEX_W = RecE->getValueAsBit("HasVEX_W");
+    bool VEX_W  = RecV->getValueAsBit("HasVEX_W");
+    bool VEX_WIG  = RecV->getValueAsBit("IgnoresVEX_W");
+    bool EVEX_WIG = RecE->getValueAsBit("IgnoresVEX_W");
+    bool EVEX_W1_VEX_W0 = RecE->getValueAsBit("EVEX_W1_VEX_W0");
 
     if (RecV->getValueAsDef("OpEnc")->getName().str() != "EncVEX" ||
         // VEX/EVEX fields
         RecV->getValueAsDef("OpPrefix") != RecE->getValueAsDef("OpPrefix") ||
         RecV->getValueAsDef("OpMap") != RecE->getValueAsDef("OpMap") ||
         RecV->getValueAsBit("hasVEX_4V") != RecE->getValueAsBit("hasVEX_4V") ||
-        !equalBitsInits(RecV->getValueAsBitsInit("EVEX_LL"),
-                        RecE->getValueAsBitsInit("EVEX_LL")) ||
+        RecV->getValueAsBit("hasEVEX_L2") != RecE->getValueAsBit("hasEVEX_L2") ||
+        RecV->getValueAsBit("hasVEX_L") != RecE->getValueAsBit("hasVEX_L") ||
         // Match is allowed if either is VEX_WIG, or they match, or EVEX
         // is VEX_W1X and VEX is VEX_W0.
-        (!(EVEX_W == 2 || VEX_W == 2 || EVEX_W == VEX_W ||
-           (EVEX_W == 3 && VEX_W == 0))) ||
+        (!(VEX_WIG || (!EVEX_WIG && EVEX_W == VEX_W) ||
+           (EVEX_W1_VEX_W0 && EVEX_W && !VEX_W))) ||
         // Instruction's format
-        RecV->getValueAsDef("Form") != RecE->getValueAsDef("Form") ||
-        RecV->getValueAsBit("isAsmParserOnly") !=
-            RecE->getValueAsBit("isAsmParserOnly"))
+        RecV->getValueAsDef("Form") != RecE->getValueAsDef("Form"))
       return false;
 
     // This is needed for instructions with intrinsic version (_Int).
@@ -150,8 +131,9 @@ public:
       } else if (isMemoryOperand(OpRec1) && isMemoryOperand(OpRec2)) {
         return false;
       } else if (isImmediateOperand(OpRec1) && isImmediateOperand(OpRec2)) {
-        if (OpRec1->getValueAsDef("Type") != OpRec2->getValueAsDef("Type"))
+        if (OpRec1->getValueAsDef("Type") != OpRec2->getValueAsDef("Type")) {
           return false;
+        }
       } else
         return false;
     }
@@ -207,8 +189,7 @@ void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) {
     else if (Inst->TheDef->getValueAsDef("OpEnc")->getName() == "EncEVEX" &&
              !Inst->TheDef->getValueAsBit("hasEVEX_K") &&
              !Inst->TheDef->getValueAsBit("hasEVEX_B") &&
-             getValueFromBitsInit(Inst->TheDef->
-                                        getValueAsBitsInit("EVEX_LL")) != 2 &&
+             !Inst->TheDef->getValueAsBit("hasEVEX_L2") &&
              !Inst->TheDef->getValueAsBit("notEVEX2VEXConvertible"))
       EVEXInsts.push_back(Inst);
   }
@@ -236,17 +217,10 @@ void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) {
       continue;
 
     // In case a match is found add new entry to the appropriate table
-    switch (getValueFromBitsInit(
-        EVEXInst->TheDef->getValueAsBitsInit("EVEX_LL"))) {
-    case 0:
-      EVEX2VEX128.push_back(std::make_pair(EVEXInst, VEXInst)); // {0,0}
-      break;
-    case 1:
+    if (EVEXInst->TheDef->getValueAsBit("hasVEX_L"))
       EVEX2VEX256.push_back(std::make_pair(EVEXInst, VEXInst)); // {0,1}
-      break;
-    default:
-      llvm_unreachable("Instruction's size not fit for the mapping!");
-    }
+    else
+      EVEX2VEX128.push_back(std::make_pair(EVEXInst, VEXInst)); // {0,0}
   }
 
   // Print both tables
diff --git a/utils/TableGen/X86FoldTablesEmitter.cpp b/utils/TableGen/X86FoldTablesEmitter.cpp
index 1ea668643575..2c15e35f234d 100644
--- a/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -1,9 +1,8 @@
 //===- utils/TableGen/X86FoldTablesEmitter.cpp - X86 backend-*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -14,6 +13,7 @@
 
 #include "CodeGenTarget.h"
 #include "X86RecognizableInstr.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/TableGenBackend.h"
 
@@ -62,9 +62,12 @@ const ManualMapEntry ManualMapSet[] = {
     { "ADD64ri32_DB",     "ADD64mi32",       NO_UNFOLD  },
     { "ADD64ri8_DB",      "ADD64mi8",        NO_UNFOLD  },
     { "ADD64rr_DB",       "ADD64mr",         NO_UNFOLD  },
+    { "ADD8ri_DB",        "ADD8mi",          NO_UNFOLD  },
+    { "ADD8rr_DB",        "ADD8mr",          NO_UNFOLD  },
     { "ADD16rr_DB",       "ADD16rm",         NO_UNFOLD  },
     { "ADD32rr_DB",       "ADD32rm",         NO_UNFOLD  },
     { "ADD64rr_DB",       "ADD64rm",         NO_UNFOLD  },
+    { "ADD8rr_DB",        "ADD8rm",          NO_UNFOLD  },
     { "PUSH16r",          "PUSH16rmm",       UNFOLD },
     { "PUSH32r",          "PUSH32rmm",       UNFOLD },
     { "PUSH64r",          "PUSH64rmm",       UNFOLD },
@@ -106,23 +109,23 @@ class X86FoldTablesEmitter {
                       const CodeGenInstruction *MemInst)
         : RegInst(RegInst), MemInst(MemInst) {}
 
-    friend raw_ostream &operator<<(raw_ostream &OS,
-                                   const X86FoldTableEntry &E) {
-      OS << "{ X86::" << E.RegInst->TheDef->getName()
-         << ", X86::" << E.MemInst->TheDef->getName() << ", ";
+    void print(formatted_raw_ostream &OS) const {
+      OS.indent(2);
+      OS << "{ X86::" << RegInst->TheDef->getName() << ",";
+      OS.PadToColumn(40);
+      OS  << "X86::" << MemInst->TheDef->getName() << ",";
+      OS.PadToColumn(75);
 
-      if (E.IsLoad)
+      if (IsLoad)
         OS << "TB_FOLDED_LOAD | ";
-      if (E.IsStore)
+      if (IsStore)
         OS << "TB_FOLDED_STORE | ";
-      if (E.CannotUnfold)
+      if (CannotUnfold)
         OS << "TB_NO_REVERSE | ";
-      if (E.IsAligned)
-        OS << "TB_ALIGN_" << E.Alignment << " | ";
+      if (IsAligned)
+        OS << "TB_ALIGN_" << Alignment << " | ";
 
       OS << "0 },\n";
-
-      return OS;
     }
   };
 
@@ -142,7 +145,7 @@ public:
   X86FoldTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {}
 
   // run - Generate the 6 X86 memory fold tables.
-  void run(raw_ostream &OS);
+  void run(formatted_raw_ostream &OS);
 
 private:
   // Decides to which table to add the entry with the given instructions.
@@ -160,21 +163,21 @@ private:
   // Print the given table as a static const C++ array of type
   // X86MemoryFoldTableEntry.
   void printTable(const FoldTable &Table, StringRef TableName,
-                  raw_ostream &OS) {
+                  formatted_raw_ostream &OS) {
     OS << "static const X86MemoryFoldTableEntry MemoryFold" << TableName
        << "[] = {\n";
 
     for (const X86FoldTableEntry &E : Table)
-      OS << E;
+      E.print(OS);
 
-    OS << "};\n";
+    OS << "};\n\n";
   }
 };
 
 // Return true if one of the instruction's operands is a RST register class
 static bool hasRSTRegClass(const CodeGenInstruction *Inst) {
   return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) {
-    return OpIn.Rec->getName() == "RST";
+    return OpIn.Rec->getName() == "RST" || OpIn.Rec->getName() == "RSTi";
   });
 }
 
@@ -345,10 +348,18 @@ public:
             MemRec->getValueAsBit("hasLockPrefix") ||
         RegRec->getValueAsBit("hasNoTrackPrefix") !=
             MemRec->getValueAsBit("hasNoTrackPrefix") ||
-        !equalBitsInits(RegRec->getValueAsBitsInit("EVEX_LL"),
-                        MemRec->getValueAsBitsInit("EVEX_LL")) ||
-        !equalBitsInits(RegRec->getValueAsBitsInit("VEX_WPrefix"),
-                        MemRec->getValueAsBitsInit("VEX_WPrefix")) ||
+        RegRec->getValueAsBit("hasVEX_L") !=
+            MemRec->getValueAsBit("hasVEX_L") ||
+        RegRec->getValueAsBit("hasEVEX_L2") !=
+            MemRec->getValueAsBit("hasEVEX_L2") ||
+        RegRec->getValueAsBit("ignoresVEX_L") !=
+            MemRec->getValueAsBit("ignoresVEX_L") ||
+        RegRec->getValueAsBit("HasVEX_W") !=
+            MemRec->getValueAsBit("HasVEX_W") ||
+        RegRec->getValueAsBit("IgnoresVEX_W") !=
+            MemRec->getValueAsBit("IgnoresVEX_W") ||
+        RegRec->getValueAsBit("EVEX_W1_VEX_W0") !=
+            MemRec->getValueAsBit("EVEX_W1_VEX_W0") ||
         // Instruction's format - The register form's "Form" field should be
         // the opposite of the memory form's "Form" field.
         !areOppositeForms(RegRec->getValueAsBitsInit("FormBits"),
@@ -421,6 +432,7 @@ private:
         (MemFormNum == X86Local::MRM6m && RegFormNum == X86Local::MRM6r) ||
         (MemFormNum == X86Local::MRM7m && RegFormNum == X86Local::MRM7r) ||
         (MemFormNum == X86Local::MRMXm && RegFormNum == X86Local::MRMXr) ||
+        (MemFormNum == X86Local::MRMXmCC && RegFormNum == X86Local::MRMXrCC) ||
         (MemFormNum == X86Local::MRMDestMem &&
          RegFormNum == X86Local::MRMDestReg) ||
         (MemFormNum == X86Local::MRMSrcMem &&
@@ -428,7 +440,9 @@ private:
         (MemFormNum == X86Local::MRMSrcMem4VOp3 &&
          RegFormNum == X86Local::MRMSrcReg4VOp3) ||
         (MemFormNum == X86Local::MRMSrcMemOp4 &&
-         RegFormNum == X86Local::MRMSrcRegOp4))
+         RegFormNum == X86Local::MRMSrcRegOp4) ||
+        (MemFormNum == X86Local::MRMSrcMemCC &&
+         RegFormNum == X86Local::MRMSrcRegCC))
       return true;
 
     return false;
@@ -558,7 +572,7 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr,
   return;
 }
 
-void X86FoldTablesEmitter::run(raw_ostream &OS) {
+void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) {
   emitSourceFileHeader("X86 fold tables", OS);
 
   // Holds all memory instructions
@@ -639,7 +653,7 @@ void X86FoldTablesEmitter::run(raw_ostream &OS) {
                  &(Target.getInstruction(MemInstIter)), Entry.Strategy);
   }
 
-  // Print all tables to raw_ostream OS.
+  // Print all tables.
   printTable(Table2Addr, "Table2Addr", OS);
   printTable(Table0, "Table0", OS);
   printTable(Table1, "Table1", OS);
@@ -650,7 +664,8 @@ void X86FoldTablesEmitter::run(raw_ostream &OS) {
 
 namespace llvm {
 
-void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS) {
+void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &o) {
+  formatted_raw_ostream OS(o);
   X86FoldTablesEmitter(RK).run(OS);
 }
 } // namespace llvm
diff --git a/utils/TableGen/X86ModRMFilters.cpp b/utils/TableGen/X86ModRMFilters.cpp
index 1641613aa32d..98e6fb6104d7 100644
--- a/utils/TableGen/X86ModRMFilters.cpp
+++ b/utils/TableGen/X86ModRMFilters.cpp
@@ -1,9 +1,8 @@
 //===- X86ModRMFilters.cpp - Disassembler ModR/M filterss -------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/utils/TableGen/X86ModRMFilters.h b/utils/TableGen/X86ModRMFilters.h
index b0248e878d07..c77b4c21aec4 100644
--- a/utils/TableGen/X86ModRMFilters.h
+++ b/utils/TableGen/X86ModRMFilters.h
@@ -1,9 +1,8 @@
 //===- X86ModRMFilters.h - Disassembler ModR/M filterss ---------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index 2f9b428b8cfe..ab8a8855c478 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -1,9 +1,8 @@
 //===- X86RecognizableInstr.cpp - Disassembler instruction spec --*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -84,7 +83,8 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
   AdSize             = byteFromRec(Rec, "AdSizeBits");
   HasREX_WPrefix     = Rec->getValueAsBit("hasREX_WPrefix");
   HasVEX_4V          = Rec->getValueAsBit("hasVEX_4V");
-  VEX_WPrefix        = byteFromRec(Rec,"VEX_WPrefix");
+  HasVEX_W           = Rec->getValueAsBit("HasVEX_W");
+  IgnoresVEX_W       = Rec->getValueAsBit("IgnoresVEX_W");
   IgnoresVEX_L       = Rec->getValueAsBit("ignoresVEX_L");
   HasEVEX_L2Prefix   = Rec->getValueAsBit("hasEVEX_L2");
   HasEVEX_K          = Rec->getValueAsBit("hasEVEX_K");
@@ -110,7 +110,7 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
   std::vector<Record*> Predicates = Rec->getValueAsListOfDefs("Predicates");
   for (unsigned i = 0, e = Predicates.size(); i != e; ++i) {
     if (Predicates[i]->getName().find("Not64Bit") != Name.npos ||
-	Predicates[i]->getName().find("In32Bit") != Name.npos) {
+        Predicates[i]->getName().find("In32Bit") != Name.npos) {
       Is32Bit = true;
       break;
     }
@@ -164,8 +164,7 @@ InstructionContext RecognizableInstr::insnContext() const {
       llvm_unreachable("Don't support VEX.L if EVEX_L2 is enabled");
     }
     // VEX_L & VEX_W
-    if (!EncodeRC && HasVEX_LPrefix && (VEX_WPrefix == X86Local::VEX_W1 ||
-                                        VEX_WPrefix == X86Local::VEX_W1X)) {
+    if (!EncodeRC && HasVEX_LPrefix && HasVEX_W) {
       if (OpPrefix == X86Local::PD)
         insnContext = EVEX_KB(IC_EVEX_L_W_OPSIZE);
       else if (OpPrefix == X86Local::XS)
@@ -192,9 +191,7 @@ InstructionContext RecognizableInstr::insnContext() const {
         errs() << "Instruction does not use a prefix: " << Name << "\n";
         llvm_unreachable("Invalid prefix");
       }
-    } else if (!EncodeRC && HasEVEX_L2Prefix &&
-               (VEX_WPrefix == X86Local::VEX_W1 ||
-                VEX_WPrefix == X86Local::VEX_W1X)) {
+    } else if (!EncodeRC && HasEVEX_L2Prefix && HasVEX_W) {
       // EVEX_L2 & VEX_W
       if (OpPrefix == X86Local::PD)
         insnContext = EVEX_KB(IC_EVEX_L2_W_OPSIZE);
@@ -223,8 +220,7 @@ InstructionContext RecognizableInstr::insnContext() const {
         llvm_unreachable("Invalid prefix");
       }
     }
-    else if (VEX_WPrefix == X86Local::VEX_W1 ||
-             VEX_WPrefix == X86Local::VEX_W1X) {
+    else if (HasVEX_W) {
       // VEX_W
       if (OpPrefix == X86Local::PD)
         insnContext = EVEX_KB(IC_EVEX_W_OPSIZE);
@@ -254,8 +250,7 @@ InstructionContext RecognizableInstr::insnContext() const {
     }
     /// eof EVEX
   } else if (Encoding == X86Local::VEX || Encoding == X86Local::XOP) {
-    if (HasVEX_LPrefix && (VEX_WPrefix == X86Local::VEX_W1 ||
-                           VEX_WPrefix == X86Local::VEX_W1X)) {
+    if (HasVEX_LPrefix && HasVEX_W) {
       if (OpPrefix == X86Local::PD)
         insnContext = IC_VEX_L_W_OPSIZE;
       else if (OpPrefix == X86Local::XS)
@@ -270,8 +265,7 @@ InstructionContext RecognizableInstr::insnContext() const {
       }
     } else if (OpPrefix == X86Local::PD && HasVEX_LPrefix)
       insnContext = IC_VEX_L_OPSIZE;
-    else if (OpPrefix == X86Local::PD && (VEX_WPrefix == X86Local::VEX_W1 ||
-                                          VEX_WPrefix == X86Local::VEX_W1X))
+    else if (OpPrefix == X86Local::PD && HasVEX_W)
       insnContext = IC_VEX_W_OPSIZE;
     else if (OpPrefix == X86Local::PD)
       insnContext = IC_VEX_OPSIZE;
@@ -279,14 +273,11 @@ InstructionContext RecognizableInstr::insnContext() const {
       insnContext = IC_VEX_L_XS;
     else if (HasVEX_LPrefix && OpPrefix == X86Local::XD)
       insnContext = IC_VEX_L_XD;
-    else if ((VEX_WPrefix == X86Local::VEX_W1 ||
-              VEX_WPrefix == X86Local::VEX_W1X) && OpPrefix == X86Local::XS)
+    else if (HasVEX_W && OpPrefix == X86Local::XS)
       insnContext = IC_VEX_W_XS;
-    else if ((VEX_WPrefix == X86Local::VEX_W1 ||
-              VEX_WPrefix == X86Local::VEX_W1X) && OpPrefix == X86Local::XD)
+    else if (HasVEX_W && OpPrefix == X86Local::XD)
       insnContext = IC_VEX_W_XD;
-    else if ((VEX_WPrefix == X86Local::VEX_W1 ||
-              VEX_WPrefix == X86Local::VEX_W1X) && OpPrefix == X86Local::PS)
+    else if (HasVEX_W && OpPrefix == X86Local::PS)
       insnContext = IC_VEX_W;
     else if (HasVEX_LPrefix && OpPrefix == X86Local::PS)
       insnContext = IC_VEX_L;
@@ -496,6 +487,13 @@ void RecognizableInstr::emitInstructionSpecifier() {
     HANDLE_OPERAND(opcodeModifier)
     HANDLE_OPTIONAL(relocation)
     break;
+  case X86Local::AddCCFrm:
+    // Operand 1 (optional) is an address or immediate.
+    assert(numPhysicalOperands == 2 &&
+           "Unexpected number of operands for AddCCFrm");
+    HANDLE_OPERAND(relocation)
+    HANDLE_OPERAND(opcodeModifier)
+    break;
   case X86Local::MRMDestReg:
     // Operand 1 is a register operand in the R/M field.
     // - In AVX512 there may be a mask operand here -
@@ -581,6 +579,13 @@ void RecognizableInstr::emitInstructionSpecifier() {
     HANDLE_OPERAND(rmRegister)
     HANDLE_OPTIONAL(immediate)
     break;
+  case X86Local::MRMSrcRegCC:
+    assert(numPhysicalOperands == 3 &&
+           "Unexpected number of operands for MRMSrcRegCC");
+    HANDLE_OPERAND(roRegister)
+    HANDLE_OPERAND(rmRegister)
+    HANDLE_OPERAND(opcodeModifier)
+    break;
   case X86Local::MRMSrcMem:
     // Operand 1 is a register operand in the Reg/Opcode field.
     // Operand 2 is a memory operand (possibly SIB-extended)
@@ -621,6 +626,19 @@ void RecognizableInstr::emitInstructionSpecifier() {
     HANDLE_OPERAND(memory)
     HANDLE_OPTIONAL(immediate)
     break;
+  case X86Local::MRMSrcMemCC:
+    assert(numPhysicalOperands == 3 &&
+           "Unexpected number of operands for MRMSrcMemCC");
+    HANDLE_OPERAND(roRegister)
+    HANDLE_OPERAND(memory)
+    HANDLE_OPERAND(opcodeModifier)
+    break;
+  case X86Local::MRMXrCC:
+    assert(numPhysicalOperands == 2 &&
+           "Unexpected number of operands for MRMXrCC");
+    HANDLE_OPERAND(rmRegister)
+    HANDLE_OPERAND(opcodeModifier)
+    break;
   case X86Local::MRMXr:
   case X86Local::MRM0r:
   case X86Local::MRM1r:
@@ -646,6 +664,12 @@ void RecognizableInstr::emitInstructionSpecifier() {
     HANDLE_OPTIONAL(relocation)
     HANDLE_OPTIONAL(immediate)
     break;
+  case X86Local::MRMXmCC:
+    assert(numPhysicalOperands == 2 &&
+           "Unexpected number of operands for MRMXm");
+    HANDLE_OPERAND(memory)
+    HANDLE_OPERAND(opcodeModifier)
+    break;
   case X86Local::MRMXm:
   case X86Local::MRM0m:
   case X86Local::MRM1m:
@@ -724,12 +748,15 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
   case X86Local::RawFrmDstSrc:
   case X86Local::RawFrmImm8:
   case X86Local::RawFrmImm16:
+  case X86Local::AddCCFrm:
     filter = llvm::make_unique<DumbFilter>();
     break;
   case X86Local::MRMDestReg:
   case X86Local::MRMSrcReg:
   case X86Local::MRMSrcReg4VOp3:
   case X86Local::MRMSrcRegOp4:
+  case X86Local::MRMSrcRegCC:
+  case X86Local::MRMXrCC:
   case X86Local::MRMXr:
     filter = llvm::make_unique<ModFilter>(true);
     break;
@@ -737,6 +764,8 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
   case X86Local::MRMSrcMem:
   case X86Local::MRMSrcMem4VOp3:
   case X86Local::MRMSrcMemOp4:
+  case X86Local::MRMSrcMemCC:
+  case X86Local::MRMXmCC:
   case X86Local::MRMXm:
     filter = llvm::make_unique<ModFilter>(false);
     break;
@@ -769,23 +798,24 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
   assert(opcodeType && "Opcode type not set");
   assert(filter && "Filter not set");
 
-  if (Form == X86Local::AddRegFrm) {
-    assert(((opcodeToSet & 7) == 0) &&
-           "ADDREG_FRM opcode not aligned");
+  if (Form == X86Local::AddRegFrm || Form == X86Local::MRMSrcRegCC ||
+      Form == X86Local::MRMSrcMemCC || Form == X86Local::MRMXrCC ||
+      Form == X86Local::MRMXmCC || Form == X86Local::AddCCFrm) {
+    unsigned Count = Form == X86Local::AddRegFrm ? 8 : 16;
+    assert(((opcodeToSet % Count) == 0) && "ADDREG_FRM opcode not aligned");
 
     uint8_t currentOpcode;
 
-    for (currentOpcode = opcodeToSet;
-         currentOpcode < opcodeToSet + 8;
+    for (currentOpcode = opcodeToSet; currentOpcode < opcodeToSet + Count;
          ++currentOpcode)
       tables.setTableFields(*opcodeType, insnContext(), currentOpcode, *filter,
                             UID, Is32Bit, OpPrefix == 0,
                             IgnoresVEX_L || EncodeRC,
-                            VEX_WPrefix == X86Local::VEX_WIG, AddressSize);
+                            IgnoresVEX_W, AddressSize);
   } else {
     tables.setTableFields(*opcodeType, insnContext(), opcodeToSet, *filter, UID,
                           Is32Bit, OpPrefix == 0, IgnoresVEX_L || EncodeRC,
-                          VEX_WPrefix == X86Local::VEX_WIG, AddressSize);
+                          IgnoresVEX_W, AddressSize);
   }
 
 #undef MAP
@@ -825,7 +855,9 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("i8mem",               TYPE_M)
   TYPE("i8imm",               TYPE_IMM)
   TYPE("u8imm",               TYPE_UIMM8)
+  TYPE("i16u8imm",            TYPE_UIMM8)
   TYPE("i32u8imm",            TYPE_UIMM8)
+  TYPE("i64u8imm",            TYPE_UIMM8)
   TYPE("GR8",                 TYPE_R8)
   TYPE("VR128",               TYPE_XMM)
   TYPE("VR128X",              TYPE_XMM)
@@ -842,16 +874,14 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("f32mem",              TYPE_M)
   TYPE("ssmem",               TYPE_M)
   TYPE("RST",                 TYPE_ST)
+  TYPE("RSTi",                TYPE_ST)
   TYPE("i128mem",             TYPE_M)
   TYPE("i256mem",             TYPE_M)
   TYPE("i512mem",             TYPE_M)
   TYPE("i64i32imm_pcrel",     TYPE_REL)
   TYPE("i16imm_pcrel",        TYPE_REL)
   TYPE("i32imm_pcrel",        TYPE_REL)
-  TYPE("SSECC",               TYPE_IMM3)
-  TYPE("XOPCC",               TYPE_IMM3)
-  TYPE("AVXCC",               TYPE_IMM5)
-  TYPE("AVX512ICC",           TYPE_AVX512ICC)
+  TYPE("ccode",               TYPE_IMM)
   TYPE("AVX512RC",            TYPE_IMM)
   TYPE("brtarget32",          TYPE_REL)
   TYPE("brtarget16",          TYPE_REL)
@@ -902,6 +932,11 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("VK32WM",              TYPE_VK)
   TYPE("VK64",                TYPE_VK)
   TYPE("VK64WM",              TYPE_VK)
+  TYPE("VK1Pair",             TYPE_VK_PAIR)
+  TYPE("VK2Pair",             TYPE_VK_PAIR)
+  TYPE("VK4Pair",             TYPE_VK_PAIR)
+  TYPE("VK8Pair",             TYPE_VK_PAIR)
+  TYPE("VK16Pair",            TYPE_VK_PAIR)
   TYPE("vx64mem",             TYPE_MVSIBX)
   TYPE("vx128mem",            TYPE_MVSIBX)
   TYPE("vx256mem",            TYPE_MVSIBX)
@@ -931,10 +966,6 @@ RecognizableInstr::immediateEncodingFromString(const std::string &s,
     ENCODING("i16imm",        ENCODING_IW)
   }
   ENCODING("i32i8imm",        ENCODING_IB)
-  ENCODING("SSECC",           ENCODING_IB)
-  ENCODING("XOPCC",           ENCODING_IB)
-  ENCODING("AVXCC",           ENCODING_IB)
-  ENCODING("AVX512ICC",       ENCODING_IB)
   ENCODING("AVX512RC",        ENCODING_IRC)
   ENCODING("i16imm",          ENCODING_Iv)
   ENCODING("i16i8imm",        ENCODING_IB)
@@ -943,7 +974,9 @@ RecognizableInstr::immediateEncodingFromString(const std::string &s,
   ENCODING("i64i8imm",        ENCODING_IB)
   ENCODING("i8imm",           ENCODING_IB)
   ENCODING("u8imm",           ENCODING_IB)
+  ENCODING("i16u8imm",        ENCODING_IB)
   ENCODING("i32u8imm",        ENCODING_IB)
+  ENCODING("i64u8imm",        ENCODING_IB)
   // This is not a typo.  Instructions like BLENDVPD put
   // register IDs in 8-bit immediates nowadays.
   ENCODING("FR32",            ENCODING_IB)
@@ -964,6 +997,7 @@ OperandEncoding
 RecognizableInstr::rmRegisterEncodingFromString(const std::string &s,
                                                 uint8_t OpSize) {
   ENCODING("RST",             ENCODING_FP)
+  ENCODING("RSTi",            ENCODING_FP)
   ENCODING("GR16",            ENCODING_RM)
   ENCODING("GR32",            ENCODING_RM)
   ENCODING("GR32orGR64",      ENCODING_RM)
@@ -987,6 +1021,11 @@ RecognizableInstr::rmRegisterEncodingFromString(const std::string &s,
   ENCODING("VK16",            ENCODING_RM)
   ENCODING("VK32",            ENCODING_RM)
   ENCODING("VK64",            ENCODING_RM)
+  ENCODING("VK1PAIR",         ENCODING_RM)
+  ENCODING("VK2PAIR",         ENCODING_RM)
+  ENCODING("VK4PAIR",         ENCODING_RM)
+  ENCODING("VK8PAIR",         ENCODING_RM)
+  ENCODING("VK16PAIR",        ENCODING_RM)
   ENCODING("BNDR",            ENCODING_RM)
   errs() << "Unhandled R/M register encoding " << s << "\n";
   llvm_unreachable("Unhandled R/M register encoding");
@@ -1021,6 +1060,11 @@ RecognizableInstr::roRegisterEncodingFromString(const std::string &s,
   ENCODING("VK16",            ENCODING_REG)
   ENCODING("VK32",            ENCODING_REG)
   ENCODING("VK64",            ENCODING_REG)
+  ENCODING("VK1Pair",         ENCODING_REG)
+  ENCODING("VK2Pair",         ENCODING_REG)
+  ENCODING("VK4Pair",         ENCODING_REG)
+  ENCODING("VK8Pair",         ENCODING_REG)
+  ENCODING("VK16Pair",        ENCODING_REG)
   ENCODING("VK1WM",           ENCODING_REG)
   ENCODING("VK2WM",           ENCODING_REG)
   ENCODING("VK4WM",           ENCODING_REG)
@@ -1055,6 +1099,11 @@ RecognizableInstr::vvvvRegisterEncodingFromString(const std::string &s,
   ENCODING("VK16",            ENCODING_VVVV)
   ENCODING("VK32",            ENCODING_VVVV)
   ENCODING("VK64",            ENCODING_VVVV)
+  ENCODING("VK1PAIR",         ENCODING_VVVV)
+  ENCODING("VK2PAIR",         ENCODING_VVVV)
+  ENCODING("VK4PAIR",         ENCODING_VVVV)
+  ENCODING("VK8PAIR",         ENCODING_VVVV)
+  ENCODING("VK16PAIR",        ENCODING_VVVV)
   errs() << "Unhandled VEX.vvvv register encoding " << s << "\n";
   llvm_unreachable("Unhandled VEX.vvvv register encoding");
 }
@@ -1128,7 +1177,9 @@ RecognizableInstr::relocationEncodingFromString(const std::string &s,
   ENCODING("i64i8imm",        ENCODING_IB)
   ENCODING("i8imm",           ENCODING_IB)
   ENCODING("u8imm",           ENCODING_IB)
+  ENCODING("i16u8imm",        ENCODING_IB)
   ENCODING("i32u8imm",        ENCODING_IB)
+  ENCODING("i64u8imm",        ENCODING_IB)
   ENCODING("i64i32imm_pcrel", ENCODING_ID)
   ENCODING("i16imm_pcrel",    ENCODING_IW)
   ENCODING("i32imm_pcrel",    ENCODING_ID)
@@ -1166,6 +1217,7 @@ RecognizableInstr::opcodeModifierEncodingFromString(const std::string &s,
   ENCODING("GR64",            ENCODING_RO)
   ENCODING("GR16",            ENCODING_Rv)
   ENCODING("GR8",             ENCODING_RB)
+  ENCODING("ccode",           ENCODING_CC)
   errs() << "Unhandled opcode modifier encoding " << s << "\n";
   llvm_unreachable("Unhandled opcode modifier encoding");
 }
diff --git a/utils/TableGen/X86RecognizableInstr.h b/utils/TableGen/X86RecognizableInstr.h
index c4d34ee6c80c..b15bef4e1931 100644
--- a/utils/TableGen/X86RecognizableInstr.h
+++ b/utils/TableGen/X86RecognizableInstr.h
@@ -1,9 +1,8 @@
 //===- X86RecognizableInstr.h - Disassembler instruction spec ----*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -102,18 +101,21 @@ namespace X86Local {
     RawFrmDstSrc  = 6,
     RawFrmImm8    = 7,
     RawFrmImm16   = 8,
+    AddCCFrm      = 9,
     MRMDestMem     = 32,
     MRMSrcMem      = 33,
     MRMSrcMem4VOp3 = 34,
     MRMSrcMemOp4   = 35,
-    MRMXm = 39,
+    MRMSrcMemCC    = 36,
+    MRMXmCC = 38, MRMXm = 39,
     MRM0m = 40, MRM1m = 41, MRM2m = 42, MRM3m = 43,
     MRM4m = 44, MRM5m = 45, MRM6m = 46, MRM7m = 47,
     MRMDestReg     = 48,
     MRMSrcReg      = 49,
     MRMSrcReg4VOp3 = 50,
     MRMSrcRegOp4   = 51,
-    MRMXr = 55,
+    MRMSrcRegCC    = 52,
+    MRMXrCC = 54, MRMXr = 55,
     MRM0r = 56, MRM1r = 57, MRM2r = 58, MRM3r = 59,
     MRM4r = 60, MRM5r = 61, MRM6r = 62, MRM7r = 63,
 #define MAP(from, to) MRM_##from = to,
@@ -140,10 +142,6 @@ namespace X86Local {
   enum {
     AdSize16 = 1, AdSize32 = 2, AdSize64 = 3
   };
-
-  enum {
-    VEX_W0 = 0, VEX_W1 = 1, VEX_WIG = 2, VEX_W1X = 3
-  };
 }
 
 namespace X86Disassembler {
@@ -177,8 +175,10 @@ private:
   bool HasREX_WPrefix;
   /// The hasVEX_4V field from the record
   bool HasVEX_4V;
-  /// The VEX_WPrefix field from the record
-  uint8_t VEX_WPrefix;
+  /// The HasVEX_WPrefix field from the record
+  bool HasVEX_W;
+  /// The IgnoresVEX_W field from the record
+  bool IgnoresVEX_W;
   /// Inferred from the operands; indicates whether the L bit in the VEX prefix is set
   bool HasVEX_LPrefix;
   /// The ignoreVEX_L field from the record
-- 
cgit v1.3